From 009b1c42aa6266385f2c37e227516b24077e6dd7 Mon Sep 17 00:00:00 2001 From: Ed Schouten Date: Tue, 2 Jun 2009 17:52:33 +0000 Subject: Import LLVM, at r72732. --- lib/Analysis/AliasAnalysis.cpp | 248 + lib/Analysis/AliasAnalysisCounter.cpp | 173 + lib/Analysis/AliasAnalysisEvaluator.cpp | 246 + lib/Analysis/AliasDebugger.cpp | 123 + lib/Analysis/AliasSetTracker.cpp | 608 + lib/Analysis/Analysis.cpp | 44 + lib/Analysis/BasicAliasAnalysis.cpp | 838 ++ lib/Analysis/CFGPrinter.cpp | 221 + lib/Analysis/CMakeLists.txt | 34 + lib/Analysis/CaptureTracking.cpp | 112 + lib/Analysis/ConstantFolding.cpp | 829 ++ lib/Analysis/DbgInfoPrinter.cpp | 167 + lib/Analysis/DebugInfo.cpp | 1079 ++ lib/Analysis/IPA/Andersens.cpp | 2878 +++++ lib/Analysis/IPA/CMakeLists.txt | 7 + lib/Analysis/IPA/CallGraph.cpp | 314 + lib/Analysis/IPA/CallGraphSCCPass.cpp | 207 + lib/Analysis/IPA/FindUsedTypes.cpp | 104 + lib/Analysis/IPA/GlobalsModRef.cpp | 567 + lib/Analysis/IPA/Makefile | 14 + lib/Analysis/IVUsers.cpp | 391 + lib/Analysis/InstCount.cpp | 86 + lib/Analysis/Interval.cpp | 57 + lib/Analysis/IntervalPartition.cpp | 114 + lib/Analysis/LibCallAliasAnalysis.cpp | 141 + lib/Analysis/LibCallSemantics.cpp | 65 + lib/Analysis/LiveValues.cpp | 191 + lib/Analysis/LoopInfo.cpp | 50 + lib/Analysis/LoopPass.cpp | 340 + lib/Analysis/LoopVR.cpp | 291 + lib/Analysis/Makefile | 16 + lib/Analysis/MemoryDependenceAnalysis.cpp | 1142 ++ lib/Analysis/PostDominators.cpp | 94 + lib/Analysis/ProfileInfo.cpp | 100 + lib/Analysis/ProfileInfoLoader.cpp | 277 + lib/Analysis/ProfileInfoLoaderPass.cpp | 92 + lib/Analysis/ScalarEvolution.cpp | 3824 ++++++ lib/Analysis/ScalarEvolutionExpander.cpp | 646 + lib/Analysis/SparsePropagation.cpp | 331 + lib/Analysis/Trace.cpp | 50 + lib/Analysis/ValueTracking.cpp | 1079 ++ lib/Archive/Archive.cpp | 266 + lib/Archive/ArchiveInternals.h | 85 + lib/Archive/ArchiveReader.cpp | 627 + lib/Archive/ArchiveWriter.cpp | 482 + lib/Archive/CMakeLists.txt | 5 + lib/Archive/Makefile | 17 + lib/AsmParser/CMakeLists.txt | 6 + lib/AsmParser/LLLexer.cpp | 835 ++ lib/AsmParser/LLLexer.h | 84 + lib/AsmParser/LLParser.cpp | 3279 +++++ lib/AsmParser/LLParser.h | 276 + lib/AsmParser/LLToken.h | 130 + lib/AsmParser/Makefile | 14 + lib/AsmParser/Parser.cpp | 87 + lib/Bitcode/Makefile | 14 + lib/Bitcode/Reader/BitReader.cpp | 51 + lib/Bitcode/Reader/BitcodeReader.cpp | 2126 +++ lib/Bitcode/Reader/BitcodeReader.h | 214 + lib/Bitcode/Reader/CMakeLists.txt | 7 + lib/Bitcode/Reader/Deserialize.cpp | 454 + lib/Bitcode/Reader/DeserializeAPFloat.cpp | 24 + lib/Bitcode/Reader/DeserializeAPInt.cpp | 33 + lib/Bitcode/Reader/Makefile | 15 + lib/Bitcode/Writer/BitWriter.cpp | 58 + lib/Bitcode/Writer/BitcodeWriter.cpp | 1449 +++ lib/Bitcode/Writer/BitcodeWriterPass.cpp | 56 + lib/Bitcode/Writer/CMakeLists.txt | 9 + lib/Bitcode/Writer/Makefile | 15 + lib/Bitcode/Writer/Serialize.cpp | 118 + lib/Bitcode/Writer/SerializeAPFloat.cpp | 21 + lib/Bitcode/Writer/SerializeAPInt.cpp | 31 + lib/Bitcode/Writer/ValueEnumerator.cpp | 347 + lib/Bitcode/Writer/ValueEnumerator.h | 127 + lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 1724 +++ lib/CodeGen/AsmPrinter/CMakeLists.txt | 10 + lib/CodeGen/AsmPrinter/DIE.cpp | 518 + lib/CodeGen/AsmPrinter/DIE.h | 549 + lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 2610 ++++ lib/CodeGen/AsmPrinter/DwarfDebug.h | 561 + lib/CodeGen/AsmPrinter/DwarfException.cpp | 706 + lib/CodeGen/AsmPrinter/DwarfException.h | 178 + lib/CodeGen/AsmPrinter/DwarfLabel.cpp | 35 + lib/CodeGen/AsmPrinter/DwarfLabel.h | 56 + lib/CodeGen/AsmPrinter/DwarfPrinter.cpp | 235 + lib/CodeGen/AsmPrinter/DwarfPrinter.h | 153 + lib/CodeGen/AsmPrinter/DwarfWriter.cpp | 129 + lib/CodeGen/AsmPrinter/Makefile | 15 + lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp | 160 + lib/CodeGen/BranchFolding.cpp | 1204 ++ lib/CodeGen/CMakeLists.txt | 62 + lib/CodeGen/CodePlacementOpt.cpp | 358 + lib/CodeGen/DeadMachineInstructionElim.cpp | 161 + lib/CodeGen/DwarfEHPrepare.cpp | 397 + lib/CodeGen/ELFWriter.cpp | 575 + lib/CodeGen/ELFWriter.h | 230 + lib/CodeGen/GCMetadata.cpp | 212 + lib/CodeGen/GCMetadataPrinter.cpp | 30 + lib/CodeGen/GCStrategy.cpp | 392 + lib/CodeGen/IfConversion.cpp | 1229 ++ lib/CodeGen/IntrinsicLowering.cpp | 892 ++ lib/CodeGen/LLVMTargetMachine.cpp | 289 + lib/CodeGen/LatencyPriorityQueue.cpp | 114 + lib/CodeGen/LiveInterval.cpp | 853 ++ lib/CodeGen/LiveIntervalAnalysis.cpp | 2298 ++++ lib/CodeGen/LiveStackAnalysis.cpp | 66 + lib/CodeGen/LiveVariables.cpp | 695 + lib/CodeGen/LowerSubregs.cpp | 292 + lib/CodeGen/MachOWriter.cpp | 976 ++ lib/CodeGen/MachOWriter.h | 629 + lib/CodeGen/MachineBasicBlock.cpp | 372 + lib/CodeGen/MachineDominators.cpp | 53 + lib/CodeGen/MachineFunction.cpp | 598 + lib/CodeGen/MachineInstr.cpp | 1105 ++ lib/CodeGen/MachineLICM.cpp | 406 + lib/CodeGen/MachineLoopInfo.cpp | 40 + lib/CodeGen/MachineModuleInfo.cpp | 368 + lib/CodeGen/MachinePassRegistry.cpp | 41 + lib/CodeGen/MachineRegisterInfo.cpp | 125 + lib/CodeGen/MachineSink.cpp | 257 + lib/CodeGen/MachineVerifier.cpp | 690 + lib/CodeGen/Makefile | 22 + lib/CodeGen/OcamlGC.cpp | 38 + lib/CodeGen/PBQP.cpp | 1395 ++ lib/CodeGen/PBQP.h | 284 + lib/CodeGen/PHIElimination.cpp | 431 + lib/CodeGen/Passes.cpp | 54 + lib/CodeGen/PostRASchedulerList.cpp | 941 ++ lib/CodeGen/PreAllocSplitting.cpp | 1485 +++ lib/CodeGen/PrologEpilogInserter.cpp | 679 + lib/CodeGen/PrologEpilogInserter.h | 167 + lib/CodeGen/PseudoSourceValue.cpp | 92 + lib/CodeGen/README.txt | 208 + lib/CodeGen/RegAllocBigBlock.cpp | 892 ++ lib/CodeGen/RegAllocLinearScan.cpp | 1535 +++ lib/CodeGen/RegAllocLocal.cpp | 1068 ++ lib/CodeGen/RegAllocPBQP.cpp | 871 ++ lib/CodeGen/RegAllocSimple.cpp | 257 + lib/CodeGen/RegisterCoalescer.cpp | 41 + lib/CodeGen/RegisterScavenging.cpp | 480 + lib/CodeGen/ScheduleDAG.cpp | 572 + lib/CodeGen/ScheduleDAGEmit.cpp | 71 + lib/CodeGen/ScheduleDAGInstrs.cpp | 468 + lib/CodeGen/ScheduleDAGInstrs.h | 184 + lib/CodeGen/ScheduleDAGPrinter.cpp | 97 + lib/CodeGen/SelectionDAG/CMakeLists.txt | 22 + lib/CodeGen/SelectionDAG/CallingConvLower.cpp | 148 + lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 6203 +++++++++ lib/CodeGen/SelectionDAG/FastISel.cpp | 1033 ++ lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 3091 +++++ lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp | 1388 ++ lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp | 2382 ++++ lib/CodeGen/SelectionDAG/LegalizeTypes.cpp | 1074 ++ lib/CodeGen/SelectionDAG/LegalizeTypes.h | 736 ++ lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp | 453 + lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp | 335 + lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp | 2151 +++ lib/CodeGen/SelectionDAG/Makefile | 15 + lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp | 635 + lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp | 268 + lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp | 1533 +++ lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp | 294 + lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h | 179 + .../SelectionDAG/ScheduleDAGSDNodesEmit.cpp | 668 + lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 5743 +++++++++ lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp | 6052 +++++++++ lib/CodeGen/SelectionDAG/SelectionDAGBuild.h | 558 + lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp | 1347 ++ lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp | 416 + lib/CodeGen/SelectionDAG/TargetLowering.cpp | 2592 ++++ lib/CodeGen/ShadowStackGC.cpp | 439 + lib/CodeGen/ShrinkWrapping.cpp | 1141 ++ lib/CodeGen/SimpleRegisterCoalescing.cpp | 2827 ++++ lib/CodeGen/SimpleRegisterCoalescing.h | 313 + lib/CodeGen/Spiller.cpp | 229 + lib/CodeGen/Spiller.h | 37 + lib/CodeGen/StackProtector.cpp | 224 + lib/CodeGen/StackSlotColoring.cpp | 733 ++ lib/CodeGen/StrongPHIElimination.cpp | 1053 ++ lib/CodeGen/TargetInstrInfoImpl.cpp | 194 + lib/CodeGen/TwoAddressInstructionPass.cpp | 997 ++ lib/CodeGen/UnreachableBlockElim.cpp | 199 + lib/CodeGen/VirtRegMap.cpp | 269 + lib/CodeGen/VirtRegMap.h | 495 + lib/CodeGen/VirtRegRewriter.cpp | 2225 ++++ lib/CodeGen/VirtRegRewriter.h | 56 + lib/CompilerDriver/Action.cpp | 78 + lib/CompilerDriver/CMakeLists.txt | 10 + lib/CompilerDriver/CompilationGraph.cpp | 536 + lib/CompilerDriver/Makefile | 19 + lib/CompilerDriver/Plugin.cpp | 73 + lib/CompilerDriver/Tool.cpp | 74 + lib/Debugger/CMakeLists.txt | 10 + lib/Debugger/Debugger.cpp | 230 + lib/Debugger/Makefile | 16 + lib/Debugger/ProgramInfo.cpp | 377 + lib/Debugger/README.txt | 7 + lib/Debugger/RuntimeInfo.cpp | 69 + lib/Debugger/SourceFile.cpp | 82 + lib/Debugger/SourceLanguage-CFamily.cpp | 28 + lib/Debugger/SourceLanguage-CPlusPlus.cpp | 27 + lib/Debugger/SourceLanguage-Unknown.cpp | 138 + lib/Debugger/SourceLanguage.cpp | 54 + lib/ExecutionEngine/CMakeLists.txt | 4 + lib/ExecutionEngine/ExecutionEngine.cpp | 1010 ++ lib/ExecutionEngine/ExecutionEngineBindings.cpp | 206 + lib/ExecutionEngine/Interpreter/CMakeLists.txt | 5 + lib/ExecutionEngine/Interpreter/Execution.cpp | 1382 ++ .../Interpreter/ExternalFunctions.cpp | 542 + lib/ExecutionEngine/Interpreter/Interpreter.cpp | 104 + lib/ExecutionEngine/Interpreter/Interpreter.h | 241 + lib/ExecutionEngine/Interpreter/Makefile | 12 + lib/ExecutionEngine/JIT/CMakeLists.txt | 11 + lib/ExecutionEngine/JIT/Intercept.cpp | 148 + lib/ExecutionEngine/JIT/JIT.cpp | 708 + lib/ExecutionEngine/JIT/JIT.h | 176 + lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp | 1056 ++ lib/ExecutionEngine/JIT/JITDwarfEmitter.h | 87 + lib/ExecutionEngine/JIT/JITEmitter.cpp | 1615 +++ lib/ExecutionEngine/JIT/JITMemoryManager.cpp | 541 + lib/ExecutionEngine/JIT/Makefile | 37 + lib/ExecutionEngine/JIT/TargetSelect.cpp | 83 + lib/ExecutionEngine/Makefile | 13 + lib/Linker/CMakeLists.txt | 6 + lib/Linker/LinkArchives.cpp | 201 + lib/Linker/LinkItems.cpp | 238 + lib/Linker/LinkModules.cpp | 1328 ++ lib/Linker/Linker.cpp | 178 + lib/Linker/Makefile | 15 + lib/Makefile | 15 + lib/Support/APFloat.cpp | 2950 +++++ lib/Support/APInt.cpp | 2816 ++++ lib/Support/APSInt.cpp | 23 + lib/Support/Allocator.cpp | 141 + lib/Support/Annotation.cpp | 115 + lib/Support/CMakeLists.txt | 31 + lib/Support/CommandLine.cpp | 1184 ++ lib/Support/ConstantRange.cpp | 472 + lib/Support/Debug.cpp | 77 + lib/Support/Dwarf.cpp | 589 + lib/Support/FileUtilities.cpp | 263 + lib/Support/FoldingSet.cpp | 378 + lib/Support/GraphWriter.cpp | 89 + lib/Support/IsInf.cpp | 49 + lib/Support/IsNAN.cpp | 33 + lib/Support/Makefile | 17 + lib/Support/ManagedStatic.cpp | 91 + lib/Support/MemoryBuffer.cpp | 279 + lib/Support/PluginLoader.cpp | 43 + lib/Support/PrettyStackTrace.cpp | 108 + lib/Support/SlowOperationInformer.cpp | 66 + lib/Support/SmallPtrSet.cpp | 223 + lib/Support/Statistic.cpp | 126 + lib/Support/Streams.cpp | 30 + lib/Support/StringExtras.cpp | 114 + lib/Support/StringMap.cpp | 234 + lib/Support/StringPool.cpp | 35 + lib/Support/SystemUtils.cpp | 58 + lib/Support/Timer.cpp | 387 + lib/Support/Triple.cpp | 187 + lib/Support/raw_ostream.cpp | 376 + lib/System/Alarm.cpp | 33 + lib/System/Atomic.cpp | 53 + lib/System/CMakeLists.txt | 19 + lib/System/Disassembler.cpp | 79 + lib/System/DynamicLibrary.cpp | 165 + lib/System/Host.cpp | 24 + lib/System/IncludeFile.cpp | 20 + lib/System/LICENSE.TXT | 6 + lib/System/Makefile | 19 + lib/System/Memory.cpp | 62 + lib/System/Mutex.cpp | 160 + lib/System/Path.cpp | 287 + lib/System/Process.cpp | 33 + lib/System/Program.cpp | 33 + lib/System/README.txt | 43 + lib/System/Signals.cpp | 34 + lib/System/TimeValue.cpp | 58 + lib/System/Unix/Alarm.inc | 72 + lib/System/Unix/Host.inc | 58 + lib/System/Unix/Memory.inc | 150 + lib/System/Unix/Mutex.inc | 49 + lib/System/Unix/Path.inc | 876 ++ lib/System/Unix/Process.inc | 237 + lib/System/Unix/Program.inc | 287 + lib/System/Unix/README.txt | 16 + lib/System/Unix/Signals.inc | 230 + lib/System/Unix/TimeValue.inc | 56 + lib/System/Unix/Unix.h | 104 + lib/System/Win32/Alarm.inc | 43 + lib/System/Win32/DynamicLibrary.inc | 219 + lib/System/Win32/Host.inc | 23 + lib/System/Win32/Memory.inc | 72 + lib/System/Win32/Mutex.inc | 58 + lib/System/Win32/Path.inc | 825 ++ lib/System/Win32/Process.inc | 150 + lib/System/Win32/Program.inc | 316 + lib/System/Win32/Signals.inc | 270 + lib/System/Win32/TimeValue.inc | 51 + lib/System/Win32/Win32.h | 57 + lib/Target/ARM/ARM.h | 121 + lib/Target/ARM/ARM.td | 136 + lib/Target/ARM/ARMAddressingModes.h | 394 + lib/Target/ARM/ARMBuildAttrs.h | 64 + lib/Target/ARM/ARMCallingConv.td | 87 + lib/Target/ARM/ARMCodeEmitter.cpp | 1411 ++ lib/Target/ARM/ARMConstantIslandPass.cpp | 1285 ++ lib/Target/ARM/ARMConstantPoolValue.cpp | 100 + lib/Target/ARM/ARMConstantPoolValue.h | 92 + lib/Target/ARM/ARMFrameInfo.h | 32 + lib/Target/ARM/ARMISelDAGToDAG.cpp | 911 ++ lib/Target/ARM/ARMISelLowering.cpp | 2346 ++++ lib/Target/ARM/ARMISelLowering.h | 184 + lib/Target/ARM/ARMInstrFormats.td | 868 ++ lib/Target/ARM/ARMInstrInfo.cpp | 1025 ++ lib/Target/ARM/ARMInstrInfo.h | 258 + lib/Target/ARM/ARMInstrInfo.td | 1390 ++ lib/Target/ARM/ARMInstrThumb.td | 562 + lib/Target/ARM/ARMInstrThumb2.td | 12 + lib/Target/ARM/ARMInstrVFP.td | 398 + lib/Target/ARM/ARMJITInfo.cpp | 298 + lib/Target/ARM/ARMJITInfo.h | 178 + lib/Target/ARM/ARMLoadStoreOptimizer.cpp | 778 ++ lib/Target/ARM/ARMMachineFunctionInfo.h | 238 + lib/Target/ARM/ARMRegisterInfo.cpp | 1528 +++ lib/Target/ARM/ARMRegisterInfo.h | 102 + lib/Target/ARM/ARMRegisterInfo.td | 221 + lib/Target/ARM/ARMRelocations.h | 56 + lib/Target/ARM/ARMSubtarget.cpp | 84 + lib/Target/ARM/ARMSubtarget.h | 122 + lib/Target/ARM/ARMTargetAsmInfo.cpp | 291 + lib/Target/ARM/ARMTargetAsmInfo.h | 64 + lib/Target/ARM/ARMTargetMachine.cpp | 242 + lib/Target/ARM/ARMTargetMachine.h | 104 + lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp | 1117 ++ lib/Target/ARM/AsmPrinter/CMakeLists.txt | 9 + lib/Target/ARM/AsmPrinter/Makefile | 15 + lib/Target/ARM/CMakeLists.txt | 27 + lib/Target/ARM/Makefile | 23 + lib/Target/ARM/README-Thumb.txt | 228 + lib/Target/ARM/README.txt | 554 + lib/Target/Alpha/Alpha.h | 51 + lib/Target/Alpha/Alpha.td | 66 + lib/Target/Alpha/AlphaBranchSelector.cpp | 67 + lib/Target/Alpha/AlphaCodeEmitter.cpp | 242 + lib/Target/Alpha/AlphaISelDAGToDAG.cpp | 553 + lib/Target/Alpha/AlphaISelLowering.cpp | 798 ++ lib/Target/Alpha/AlphaISelLowering.h | 114 + lib/Target/Alpha/AlphaInstrFormats.td | 268 + lib/Target/Alpha/AlphaInstrInfo.cpp | 450 + lib/Target/Alpha/AlphaInstrInfo.h | 97 + lib/Target/Alpha/AlphaInstrInfo.td | 1137 ++ lib/Target/Alpha/AlphaJITInfo.cpp | 307 + lib/Target/Alpha/AlphaJITInfo.h | 47 + lib/Target/Alpha/AlphaLLRP.cpp | 158 + lib/Target/Alpha/AlphaRegisterInfo.cpp | 335 + lib/Target/Alpha/AlphaRegisterInfo.h | 67 + lib/Target/Alpha/AlphaRegisterInfo.td | 171 + lib/Target/Alpha/AlphaRelocations.h | 31 + lib/Target/Alpha/AlphaSchedule.td | 84 + lib/Target/Alpha/AlphaSubtarget.cpp | 25 + lib/Target/Alpha/AlphaSubtarget.h | 47 + lib/Target/Alpha/AlphaTargetAsmInfo.cpp | 31 + lib/Target/Alpha/AlphaTargetAsmInfo.h | 32 + lib/Target/Alpha/AlphaTargetMachine.cpp | 126 + lib/Target/Alpha/AlphaTargetMachine.h | 82 + lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp | 305 + lib/Target/Alpha/AsmPrinter/CMakeLists.txt | 9 + lib/Target/Alpha/AsmPrinter/Makefile | 15 + lib/Target/Alpha/CMakeLists.txt | 25 + lib/Target/Alpha/Makefile | 22 + lib/Target/Alpha/README.txt | 42 + lib/Target/CBackend/CBackend.cpp | 3601 ++++++ lib/Target/CBackend/CMakeLists.txt | 3 + lib/Target/CBackend/CTargetMachine.h | 43 + lib/Target/CBackend/Makefile | 14 + lib/Target/CMakeLists.txt | 17 + lib/Target/CellSPU/AsmPrinter/CMakeLists.txt | 12 + lib/Target/CellSPU/AsmPrinter/Makefile | 17 + lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp | 623 + lib/Target/CellSPU/CMakeLists.txt | 24 + lib/Target/CellSPU/CellSDKIntrinsics.td | 448 + lib/Target/CellSPU/Makefile | 22 + lib/Target/CellSPU/README.txt | 90 + lib/Target/CellSPU/SPU.h | 102 + lib/Target/CellSPU/SPU.td | 66 + lib/Target/CellSPU/SPU128InstrInfo.td | 41 + lib/Target/CellSPU/SPU64InstrInfo.td | 394 + lib/Target/CellSPU/SPUCallingConv.td | 115 + lib/Target/CellSPU/SPUFrameInfo.cpp | 29 + lib/Target/CellSPU/SPUFrameInfo.h | 79 + lib/Target/CellSPU/SPUHazardRecognizers.cpp | 138 + lib/Target/CellSPU/SPUHazardRecognizers.h | 41 + lib/Target/CellSPU/SPUISelDAGToDAG.cpp | 1244 ++ lib/Target/CellSPU/SPUISelLowering.cpp | 2980 +++++ lib/Target/CellSPU/SPUISelLowering.h | 154 + lib/Target/CellSPU/SPUInstrBuilder.h | 43 + lib/Target/CellSPU/SPUInstrFormats.td | 298 + lib/Target/CellSPU/SPUInstrInfo.cpp | 693 + lib/Target/CellSPU/SPUInstrInfo.h | 114 + lib/Target/CellSPU/SPUInstrInfo.td | 4614 +++++++ lib/Target/CellSPU/SPUMachineFunction.h | 43 + lib/Target/CellSPU/SPUMathInstr.td | 97 + lib/Target/CellSPU/SPUNodes.td | 156 + lib/Target/CellSPU/SPUOperands.td | 655 + lib/Target/CellSPU/SPURegisterInfo.cpp | 614 + lib/Target/CellSPU/SPURegisterInfo.h | 101 + lib/Target/CellSPU/SPURegisterInfo.td | 429 + lib/Target/CellSPU/SPURegisterNames.h | 18 + lib/Target/CellSPU/SPUSchedule.td | 57 + lib/Target/CellSPU/SPUSubtarget.cpp | 40 + lib/Target/CellSPU/SPUSubtarget.h | 95 + lib/Target/CellSPU/SPUTargetAsmInfo.cpp | 74 + lib/Target/CellSPU/SPUTargetAsmInfo.h | 51 + lib/Target/CellSPU/SPUTargetMachine.cpp | 98 + lib/Target/CellSPU/SPUTargetMachine.h | 95 + lib/Target/CppBackend/CMakeLists.txt | 3 + lib/Target/CppBackend/CPPBackend.cpp | 2007 +++ lib/Target/CppBackend/CPPTargetMachine.h | 44 + lib/Target/CppBackend/Makefile | 14 + lib/Target/DarwinTargetAsmInfo.cpp | 169 + lib/Target/ELFTargetAsmInfo.cpp | 227 + lib/Target/IA64/AsmPrinter/CMakeLists.txt | 12 + lib/Target/IA64/AsmPrinter/IA64AsmPrinter.cpp | 376 + lib/Target/IA64/AsmPrinter/Makefile | 17 + lib/Target/IA64/CMakeLists.txt | 20 + lib/Target/IA64/IA64.h | 58 + lib/Target/IA64/IA64.td | 39 + lib/Target/IA64/IA64Bundling.cpp | 118 + lib/Target/IA64/IA64ISelDAGToDAG.cpp | 575 + lib/Target/IA64/IA64ISelLowering.cpp | 622 + lib/Target/IA64/IA64ISelLowering.h | 76 + lib/Target/IA64/IA64InstrBuilder.h | 40 + lib/Target/IA64/IA64InstrFormats.td | 80 + lib/Target/IA64/IA64InstrInfo.cpp | 193 + lib/Target/IA64/IA64InstrInfo.h | 70 + lib/Target/IA64/IA64InstrInfo.td | 751 ++ lib/Target/IA64/IA64MachineFunctionInfo.h | 34 + lib/Target/IA64/IA64RegisterInfo.cpp | 319 + lib/Target/IA64/IA64RegisterInfo.h | 63 + lib/Target/IA64/IA64RegisterInfo.td | 509 + lib/Target/IA64/IA64Subtarget.cpp | 18 + lib/Target/IA64/IA64Subtarget.h | 28 + lib/Target/IA64/IA64TargetAsmInfo.cpp | 44 + lib/Target/IA64/IA64TargetAsmInfo.h | 33 + lib/Target/IA64/IA64TargetMachine.cpp | 94 + lib/Target/IA64/IA64TargetMachine.h | 64 + lib/Target/IA64/Makefile | 20 + lib/Target/IA64/README | 48 + lib/Target/MSIL/CMakeLists.txt | 3 + lib/Target/MSIL/MSILWriter.cpp | 1680 +++ lib/Target/MSIL/MSILWriter.h | 255 + lib/Target/MSIL/Makefile | 14 + lib/Target/MSIL/README.TXT | 26 + lib/Target/MSP430/CMakeLists.txt | 23 + lib/Target/MSP430/MSP430.h | 40 + lib/Target/MSP430/MSP430.td | 60 + lib/Target/MSP430/MSP430AsmPrinter.cpp | 267 + lib/Target/MSP430/MSP430CallingConv.td | 37 + lib/Target/MSP430/MSP430ISelDAGToDAG.cpp | 194 + lib/Target/MSP430/MSP430ISelLowering.cpp | 670 + lib/Target/MSP430/MSP430ISelLowering.h | 103 + lib/Target/MSP430/MSP430InstrFormats.td | 67 + lib/Target/MSP430/MSP430InstrInfo.cpp | 177 + lib/Target/MSP430/MSP430InstrInfo.h | 84 + lib/Target/MSP430/MSP430InstrInfo.td | 901 ++ lib/Target/MSP430/MSP430MachineFunctionInfo.h | 39 + lib/Target/MSP430/MSP430RegisterInfo.cpp | 355 + lib/Target/MSP430/MSP430RegisterInfo.h | 70 + lib/Target/MSP430/MSP430RegisterInfo.td | 122 + lib/Target/MSP430/MSP430Subtarget.cpp | 27 + lib/Target/MSP430/MSP430Subtarget.h | 41 + lib/Target/MSP430/MSP430TargetAsmInfo.cpp | 22 + lib/Target/MSP430/MSP430TargetAsmInfo.h | 31 + lib/Target/MSP430/MSP430TargetMachine.cpp | 76 + lib/Target/MSP430/MSP430TargetMachine.h | 68 + lib/Target/MSP430/Makefile | 21 + lib/Target/MSP430/README.txt | 42 + lib/Target/Makefile | 20 + lib/Target/Mips/AsmPrinter/CMakeLists.txt | 12 + lib/Target/Mips/AsmPrinter/Makefile | 17 + lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp | 580 + lib/Target/Mips/CMakeLists.txt | 22 + lib/Target/Mips/Makefile | 23 + lib/Target/Mips/Mips.h | 41 + lib/Target/Mips/Mips.td | 88 + lib/Target/Mips/MipsCallingConv.td | 86 + lib/Target/Mips/MipsDelaySlotFiller.cpp | 77 + lib/Target/Mips/MipsISelDAGToDAG.cpp | 392 + lib/Target/Mips/MipsISelLowering.cpp | 1254 ++ lib/Target/Mips/MipsISelLowering.h | 130 + lib/Target/Mips/MipsInstrFPU.td | 304 + lib/Target/Mips/MipsInstrFormats.td | 182 + lib/Target/Mips/MipsInstrInfo.cpp | 623 + lib/Target/Mips/MipsInstrInfo.h | 223 + lib/Target/Mips/MipsInstrInfo.td | 707 + lib/Target/Mips/MipsMachineFunction.h | 131 + lib/Target/Mips/MipsRegisterInfo.cpp | 535 + lib/Target/Mips/MipsRegisterInfo.h | 78 + lib/Target/Mips/MipsRegisterInfo.td | 252 + lib/Target/Mips/MipsSchedule.td | 63 + lib/Target/Mips/MipsSubtarget.cpp | 77 + lib/Target/Mips/MipsSubtarget.h | 139 + lib/Target/Mips/MipsTargetAsmInfo.cpp | 98 + lib/Target/Mips/MipsTargetAsmInfo.h | 51 + lib/Target/Mips/MipsTargetMachine.cpp | 133 + lib/Target/Mips/MipsTargetMachine.h | 80 + lib/Target/PIC16/CMakeLists.txt | 24 + lib/Target/PIC16/Makefile | 21 + lib/Target/PIC16/PIC16.h | 345 + lib/Target/PIC16/PIC16.td | 40 + lib/Target/PIC16/PIC16AsmPrinter.cpp | 404 + lib/Target/PIC16/PIC16AsmPrinter.h | 70 + lib/Target/PIC16/PIC16DebugInfo.cpp | 270 + lib/Target/PIC16/PIC16DebugInfo.h | 114 + lib/Target/PIC16/PIC16ISelDAGToDAG.cpp | 59 + lib/Target/PIC16/PIC16ISelDAGToDAG.h | 60 + lib/Target/PIC16/PIC16ISelLowering.cpp | 1756 +++ lib/Target/PIC16/PIC16ISelLowering.h | 227 + lib/Target/PIC16/PIC16InstrFormats.td | 117 + lib/Target/PIC16/PIC16InstrInfo.cpp | 186 + lib/Target/PIC16/PIC16InstrInfo.h | 70 + lib/Target/PIC16/PIC16InstrInfo.td | 522 + lib/Target/PIC16/PIC16MemSelOpt.cpp | 169 + lib/Target/PIC16/PIC16RegisterInfo.cpp | 91 + lib/Target/PIC16/PIC16RegisterInfo.h | 68 + lib/Target/PIC16/PIC16RegisterInfo.td | 33 + lib/Target/PIC16/PIC16Subtarget.cpp | 27 + lib/Target/PIC16/PIC16Subtarget.h | 45 + lib/Target/PIC16/PIC16TargetAsmInfo.cpp | 264 + lib/Target/PIC16/PIC16TargetAsmInfo.h | 79 + lib/Target/PIC16/PIC16TargetMachine.cpp | 79 + lib/Target/PIC16/PIC16TargetMachine.h | 76 + lib/Target/PowerPC/AsmPrinter/CMakeLists.txt | 9 + lib/Target/PowerPC/AsmPrinter/Makefile | 15 + lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp | 1204 ++ lib/Target/PowerPC/CMakeLists.txt | 28 + lib/Target/PowerPC/Makefile | 22 + lib/Target/PowerPC/PPC.h | 49 + lib/Target/PowerPC/PPC.td | 114 + lib/Target/PowerPC/PPCBranchSelector.cpp | 174 + lib/Target/PowerPC/PPCCallingConv.td | 66 + lib/Target/PowerPC/PPCCodeEmitter.cpp | 266 + lib/Target/PowerPC/PPCFrameInfo.h | 93 + lib/Target/PowerPC/PPCHazardRecognizers.cpp | 304 + lib/Target/PowerPC/PPCHazardRecognizers.h | 73 + lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 1170 ++ lib/Target/PowerPC/PPCISelLowering.cpp | 4878 +++++++ lib/Target/PowerPC/PPCISelLowering.h | 394 + lib/Target/PowerPC/PPCInstr64Bit.td | 723 ++ lib/Target/PowerPC/PPCInstrAltivec.td | 668 + lib/Target/PowerPC/PPCInstrBuilder.h | 43 + lib/Target/PowerPC/PPCInstrFormats.td | 875 ++ lib/Target/PowerPC/PPCInstrInfo.cpp | 818 ++ lib/Target/PowerPC/PPCInstrInfo.h | 168 + lib/Target/PowerPC/PPCInstrInfo.td | 1475 +++ lib/Target/PowerPC/PPCJITInfo.cpp | 437 + lib/Target/PowerPC/PPCJITInfo.h | 48 + lib/Target/PowerPC/PPCMachOWriterInfo.cpp | 151 + lib/Target/PowerPC/PPCMachOWriterInfo.h | 55 + lib/Target/PowerPC/PPCMachineFunctionInfo.h | 104 + lib/Target/PowerPC/PPCPerfectShuffle.h | 6586 ++++++++++ lib/Target/PowerPC/PPCPredicates.cpp | 30 + lib/Target/PowerPC/PPCPredicates.h | 39 + lib/Target/PowerPC/PPCRegisterInfo.cpp | 1446 +++ lib/Target/PowerPC/PPCRegisterInfo.h | 95 + lib/Target/PowerPC/PPCRegisterInfo.td | 360 + lib/Target/PowerPC/PPCRelocations.h | 56 + lib/Target/PowerPC/PPCSchedule.td | 508 + lib/Target/PowerPC/PPCScheduleG3.td | 63 + lib/Target/PowerPC/PPCScheduleG4.td | 73 + lib/Target/PowerPC/PPCScheduleG4Plus.td | 76 + lib/Target/PowerPC/PPCScheduleG5.td | 83 + lib/Target/PowerPC/PPCSubtarget.cpp | 152 + lib/Target/PowerPC/PPCSubtarget.h | 160 + lib/Target/PowerPC/PPCTargetAsmInfo.cpp | 161 + lib/Target/PowerPC/PPCTargetAsmInfo.h | 62 + lib/Target/PowerPC/PPCTargetMachine.cpp | 250 + lib/Target/PowerPC/PPCTargetMachine.h | 120 + lib/Target/PowerPC/README.txt | 799 ++ lib/Target/PowerPC/README_ALTIVEC.txt | 211 + lib/Target/README.txt | 1679 +++ lib/Target/Sparc/AsmPrinter/CMakeLists.txt | 9 + lib/Target/Sparc/AsmPrinter/Makefile | 15 + lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp | 355 + lib/Target/Sparc/CMakeLists.txt | 23 + lib/Target/Sparc/DelaySlotFiller.cpp | 76 + lib/Target/Sparc/FPMover.cpp | 139 + lib/Target/Sparc/Makefile | 22 + lib/Target/Sparc/README.txt | 58 + lib/Target/Sparc/Sparc.h | 119 + lib/Target/Sparc/Sparc.td | 76 + lib/Target/Sparc/SparcCallingConv.td | 32 + lib/Target/Sparc/SparcISelDAGToDAG.cpp | 215 + lib/Target/Sparc/SparcISelLowering.cpp | 1049 ++ lib/Target/Sparc/SparcISelLowering.h | 79 + lib/Target/Sparc/SparcInstrFormats.td | 114 + lib/Target/Sparc/SparcInstrInfo.cpp | 277 + lib/Target/Sparc/SparcInstrInfo.h | 114 + lib/Target/Sparc/SparcInstrInfo.td | 769 ++ lib/Target/Sparc/SparcRegisterInfo.cpp | 196 + lib/Target/Sparc/SparcRegisterInfo.h | 67 + lib/Target/Sparc/SparcRegisterInfo.td | 158 + lib/Target/Sparc/SparcSubtarget.cpp | 43 + lib/Target/Sparc/SparcSubtarget.h | 43 + lib/Target/Sparc/SparcTargetAsmInfo.cpp | 50 + lib/Target/Sparc/SparcTargetAsmInfo.h | 33 + lib/Target/Sparc/SparcTargetMachine.cpp | 94 + lib/Target/Sparc/SparcTargetMachine.h | 63 + lib/Target/SubtargetFeature.cpp | 364 + lib/Target/Target.cpp | 94 + lib/Target/TargetAsmInfo.cpp | 461 + lib/Target/TargetData.cpp | 603 + lib/Target/TargetFrameInfo.cpp | 19 + lib/Target/TargetInstrInfo.cpp | 50 + lib/Target/TargetIntrinsicInfo.cpp | 22 + lib/Target/TargetMachOWriterInfo.cpp | 25 + lib/Target/TargetMachine.cpp | 229 + lib/Target/TargetMachineRegistry.cpp | 78 + lib/Target/TargetRegisterInfo.cpp | 144 + lib/Target/TargetSubtarget.cpp | 22 + lib/Target/X86/AsmPrinter/CMakeLists.txt | 11 + lib/Target/X86/AsmPrinter/Makefile | 15 + lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.cpp | 1075 ++ lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.h | 164 + lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp | 50 + lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.cpp | 609 + lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.h | 152 + lib/Target/X86/CMakeLists.txt | 29 + lib/Target/X86/Makefile | 23 + lib/Target/X86/README-FPStack.txt | 85 + lib/Target/X86/README-MMX.txt | 71 + lib/Target/X86/README-SSE.txt | 918 ++ lib/Target/X86/README-UNIMPLEMENTED.txt | 14 + lib/Target/X86/README-X86-64.txt | 251 + lib/Target/X86/README.txt | 1899 +++ lib/Target/X86/X86.h | 84 + lib/Target/X86/X86.td | 184 + lib/Target/X86/X86COFF.h | 95 + lib/Target/X86/X86CallingConv.td | 360 + lib/Target/X86/X86CodeEmitter.cpp | 811 ++ lib/Target/X86/X86CompilationCallback_Win64.asm | 67 + lib/Target/X86/X86ELFWriterInfo.cpp | 18 + lib/Target/X86/X86ELFWriterInfo.h | 29 + lib/Target/X86/X86FastISel.cpp | 1549 +++ lib/Target/X86/X86FloatingPoint.cpp | 1187 ++ lib/Target/X86/X86FloatingPointRegKill.cpp | 139 + lib/Target/X86/X86ISelDAGToDAG.cpp | 1716 +++ lib/Target/X86/X86ISelLowering.cpp | 8794 +++++++++++++ lib/Target/X86/X86ISelLowering.h | 705 + lib/Target/X86/X86Instr64bit.td | 1937 +++ lib/Target/X86/X86InstrBuilder.h | 168 + lib/Target/X86/X86InstrFPStack.td | 597 + lib/Target/X86/X86InstrFormats.td | 285 + lib/Target/X86/X86InstrInfo.cpp | 3227 +++++ lib/Target/X86/X86InstrInfo.h | 461 + lib/Target/X86/X86InstrInfo.td | 3961 ++++++ lib/Target/X86/X86InstrMMX.td | 694 + lib/Target/X86/X86InstrSSE.td | 3643 ++++++ lib/Target/X86/X86JITInfo.cpp | 560 + lib/Target/X86/X86JITInfo.h | 84 + lib/Target/X86/X86MachineFunctionInfo.h | 112 + lib/Target/X86/X86RegisterInfo.cpp | 1280 ++ lib/Target/X86/X86RegisterInfo.h | 163 + lib/Target/X86/X86RegisterInfo.td | 762 ++ lib/Target/X86/X86Relocations.h | 42 + lib/Target/X86/X86Subtarget.cpp | 446 + lib/Target/X86/X86Subtarget.h | 224 + lib/Target/X86/X86TargetAsmInfo.cpp | 461 + lib/Target/X86/X86TargetAsmInfo.h | 75 + lib/Target/X86/X86TargetMachine.cpp | 317 + lib/Target/X86/X86TargetMachine.h | 124 + lib/Target/XCore/CMakeLists.txt | 23 + lib/Target/XCore/Makefile | 21 + lib/Target/XCore/README.txt | 8 + lib/Target/XCore/XCore.h | 42 + lib/Target/XCore/XCore.td | 62 + lib/Target/XCore/XCoreAsmPrinter.cpp | 472 + lib/Target/XCore/XCoreCallingConv.td | 33 + lib/Target/XCore/XCoreFrameInfo.cpp | 27 + lib/Target/XCore/XCoreFrameInfo.h | 34 + lib/Target/XCore/XCoreISelDAGToDAG.cpp | 230 + lib/Target/XCore/XCoreISelLowering.cpp | 934 ++ lib/Target/XCore/XCoreISelLowering.h | 123 + lib/Target/XCore/XCoreInstrFormats.td | 120 + lib/Target/XCore/XCoreInstrInfo.cpp | 524 + lib/Target/XCore/XCoreInstrInfo.h | 110 + lib/Target/XCore/XCoreInstrInfo.td | 991 ++ lib/Target/XCore/XCoreMachineFunctionInfo.h | 69 + lib/Target/XCore/XCoreRegisterInfo.cpp | 598 + lib/Target/XCore/XCoreRegisterInfo.h | 94 + lib/Target/XCore/XCoreRegisterInfo.td | 91 + lib/Target/XCore/XCoreSubtarget.cpp | 28 + lib/Target/XCore/XCoreSubtarget.h | 46 + lib/Target/XCore/XCoreTargetAsmInfo.cpp | 201 + lib/Target/XCore/XCoreTargetAsmInfo.h | 45 + lib/Target/XCore/XCoreTargetMachine.cpp | 71 + lib/Target/XCore/XCoreTargetMachine.h | 63 + lib/Transforms/Hello/CMakeLists.txt | 3 + lib/Transforms/Hello/Hello.cpp | 67 + lib/Transforms/Hello/Makefile | 16 + lib/Transforms/IPO/ArgumentPromotion.cpp | 863 ++ lib/Transforms/IPO/CMakeLists.txt | 25 + lib/Transforms/IPO/ConstantMerge.cpp | 114 + lib/Transforms/IPO/DeadArgumentElimination.cpp | 944 ++ lib/Transforms/IPO/DeadTypeElimination.cpp | 107 + lib/Transforms/IPO/ExtractGV.cpp | 173 + lib/Transforms/IPO/FunctionAttrs.cpp | 347 + lib/Transforms/IPO/GlobalDCE.cpp | 227 + lib/Transforms/IPO/GlobalOpt.cpp | 2485 ++++ lib/Transforms/IPO/IPConstantPropagation.cpp | 277 + lib/Transforms/IPO/IPO.cpp | 75 + lib/Transforms/IPO/IndMemRemoval.cpp | 89 + lib/Transforms/IPO/InlineAlways.cpp | 75 + lib/Transforms/IPO/InlineSimple.cpp | 106 + lib/Transforms/IPO/Inliner.cpp | 278 + lib/Transforms/IPO/Internalize.cpp | 184 + lib/Transforms/IPO/LoopExtractor.cpp | 261 + lib/Transforms/IPO/LowerSetJmp.cpp | 536 + lib/Transforms/IPO/Makefile | 15 + lib/Transforms/IPO/MergeFunctions.cpp | 377 + lib/Transforms/IPO/PartialSpecialization.cpp | 191 + lib/Transforms/IPO/PruneEH.cpp | 255 + lib/Transforms/IPO/RaiseAllocations.cpp | 251 + lib/Transforms/IPO/StripDeadPrototypes.cpp | 72 + lib/Transforms/IPO/StripSymbols.cpp | 415 + lib/Transforms/IPO/StructRetPromotion.cpp | 351 + lib/Transforms/Instrumentation/BlockProfiling.cpp | 126 + lib/Transforms/Instrumentation/CMakeLists.txt | 6 + lib/Transforms/Instrumentation/EdgeProfiling.cpp | 101 + lib/Transforms/Instrumentation/Makefile | 15 + lib/Transforms/Instrumentation/ProfilingUtils.cpp | 120 + lib/Transforms/Instrumentation/ProfilingUtils.h | 31 + lib/Transforms/Instrumentation/RSProfiling.cpp | 653 + lib/Transforms/Instrumentation/RSProfiling.h | 31 + lib/Transforms/Makefile | 20 + lib/Transforms/Scalar/ADCE.cpp | 98 + lib/Transforms/Scalar/BasicBlockPlacement.cpp | 148 + lib/Transforms/Scalar/CMakeLists.txt | 33 + lib/Transforms/Scalar/CodeGenPrepare.cpp | 873 ++ lib/Transforms/Scalar/CondPropagate.cpp | 295 + lib/Transforms/Scalar/ConstantProp.cpp | 90 + lib/Transforms/Scalar/DCE.cpp | 133 + lib/Transforms/Scalar/DeadStoreElimination.cpp | 461 + lib/Transforms/Scalar/GVN.cpp | 1738 +++ lib/Transforms/Scalar/GVNPRE.cpp | 1885 +++ lib/Transforms/Scalar/IndVarSimplify.cpp | 880 ++ lib/Transforms/Scalar/InstructionCombining.cpp | 12919 +++++++++++++++++++ lib/Transforms/Scalar/JumpThreading.cpp | 954 ++ lib/Transforms/Scalar/LICM.cpp | 885 ++ lib/Transforms/Scalar/LoopDeletion.cpp | 280 + lib/Transforms/Scalar/LoopIndexSplit.cpp | 1237 ++ lib/Transforms/Scalar/LoopRotation.cpp | 572 + lib/Transforms/Scalar/LoopStrengthReduce.cpp | 2605 ++++ lib/Transforms/Scalar/LoopUnroll.cpp | 183 + lib/Transforms/Scalar/LoopUnswitch.cpp | 1098 ++ lib/Transforms/Scalar/Makefile | 15 + lib/Transforms/Scalar/MemCpyOptimizer.cpp | 741 ++ lib/Transforms/Scalar/PredicateSimplifier.cpp | 2725 ++++ lib/Transforms/Scalar/Reassociate.cpp | 896 ++ lib/Transforms/Scalar/Reg2Mem.cpp | 125 + lib/Transforms/Scalar/SCCP.cpp | 1855 +++ lib/Transforms/Scalar/Scalar.cpp | 111 + lib/Transforms/Scalar/ScalarReplAggregates.cpp | 1820 +++ lib/Transforms/Scalar/SimplifyCFGPass.cpp | 232 + lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp | 159 + lib/Transforms/Scalar/SimplifyLibCalls.cpp | 2429 ++++ lib/Transforms/Scalar/TailDuplication.cpp | 365 + lib/Transforms/Scalar/TailRecursionElimination.cpp | 479 + lib/Transforms/Utils/AddrModeMatcher.cpp | 594 + lib/Transforms/Utils/BasicBlockUtils.cpp | 622 + lib/Transforms/Utils/BasicInliner.cpp | 181 + lib/Transforms/Utils/BreakCriticalEdges.cpp | 282 + lib/Transforms/Utils/CMakeLists.txt | 27 + lib/Transforms/Utils/CloneFunction.cpp | 533 + lib/Transforms/Utils/CloneLoop.cpp | 152 + lib/Transforms/Utils/CloneModule.cpp | 126 + lib/Transforms/Utils/CloneTrace.cpp | 119 + lib/Transforms/Utils/CodeExtractor.cpp | 746 ++ lib/Transforms/Utils/DemoteRegToStack.cpp | 144 + lib/Transforms/Utils/InlineCost.cpp | 315 + lib/Transforms/Utils/InlineFunction.cpp | 656 + lib/Transforms/Utils/InstructionNamer.cpp | 63 + lib/Transforms/Utils/LCSSA.cpp | 276 + lib/Transforms/Utils/Local.cpp | 338 + lib/Transforms/Utils/LoopSimplify.cpp | 600 + lib/Transforms/Utils/LowerAllocations.cpp | 177 + lib/Transforms/Utils/LowerInvoke.cpp | 614 + lib/Transforms/Utils/LowerSwitch.cpp | 323 + lib/Transforms/Utils/Makefile | 15 + lib/Transforms/Utils/Mem2Reg.cpp | 92 + lib/Transforms/Utils/PromoteMemoryToRegister.cpp | 1003 ++ lib/Transforms/Utils/SimplifyCFG.cpp | 2213 ++++ lib/Transforms/Utils/UnifyFunctionExitNodes.cpp | 139 + lib/Transforms/Utils/UnrollLoop.cpp | 369 + lib/Transforms/Utils/ValueMapper.cpp | 143 + lib/VMCore/AsmWriter.cpp | 1880 +++ lib/VMCore/Attributes.cpp | 310 + lib/VMCore/AutoUpgrade.cpp | 430 + lib/VMCore/BasicBlock.cpp | 274 + lib/VMCore/CMakeLists.txt | 30 + lib/VMCore/ConstantFold.cpp | 1681 +++ lib/VMCore/ConstantFold.h | 60 + lib/VMCore/Constants.cpp | 2832 ++++ lib/VMCore/Core.cpp | 1450 +++ lib/VMCore/Dominators.cpp | 287 + lib/VMCore/Function.cpp | 367 + lib/VMCore/Globals.cpp | 273 + lib/VMCore/InlineAsm.cpp | 231 + lib/VMCore/Instruction.cpp | 387 + lib/VMCore/Instructions.cpp | 2963 +++++ lib/VMCore/IntrinsicInst.cpp | 77 + lib/VMCore/LeakDetector.cpp | 131 + lib/VMCore/Makefile | 33 + lib/VMCore/Mangler.cpp | 196 + lib/VMCore/Module.cpp | 381 + lib/VMCore/ModuleProvider.cpp | 26 + lib/VMCore/Pass.cpp | 323 + lib/VMCore/PassManager.cpp | 1710 +++ lib/VMCore/PrintModulePass.cpp | 99 + lib/VMCore/SymbolTableListTraitsImpl.h | 118 + lib/VMCore/Type.cpp | 1457 +++ lib/VMCore/TypeSymbolTable.cpp | 165 + lib/VMCore/Use.cpp | 233 + lib/VMCore/Value.cpp | 581 + lib/VMCore/ValueSymbolTable.cpp | 137 + lib/VMCore/ValueTypes.cpp | 185 + lib/VMCore/Verifier.cpp | 1770 +++ 828 files changed, 372534 insertions(+) create mode 100644 lib/Analysis/AliasAnalysis.cpp create mode 100644 lib/Analysis/AliasAnalysisCounter.cpp create mode 100644 lib/Analysis/AliasAnalysisEvaluator.cpp create mode 100644 lib/Analysis/AliasDebugger.cpp create mode 100644 lib/Analysis/AliasSetTracker.cpp create mode 100644 lib/Analysis/Analysis.cpp create mode 100644 lib/Analysis/BasicAliasAnalysis.cpp create mode 100644 lib/Analysis/CFGPrinter.cpp create mode 100644 lib/Analysis/CMakeLists.txt create mode 100644 lib/Analysis/CaptureTracking.cpp create mode 100644 lib/Analysis/ConstantFolding.cpp create mode 100644 lib/Analysis/DbgInfoPrinter.cpp create mode 100644 lib/Analysis/DebugInfo.cpp create mode 100644 lib/Analysis/IPA/Andersens.cpp create mode 100644 lib/Analysis/IPA/CMakeLists.txt create mode 100644 lib/Analysis/IPA/CallGraph.cpp create mode 100644 lib/Analysis/IPA/CallGraphSCCPass.cpp create mode 100644 lib/Analysis/IPA/FindUsedTypes.cpp create mode 100644 lib/Analysis/IPA/GlobalsModRef.cpp create mode 100644 lib/Analysis/IPA/Makefile create mode 100644 lib/Analysis/IVUsers.cpp create mode 100644 lib/Analysis/InstCount.cpp create mode 100644 lib/Analysis/Interval.cpp create mode 100644 lib/Analysis/IntervalPartition.cpp create mode 100644 lib/Analysis/LibCallAliasAnalysis.cpp create mode 100644 lib/Analysis/LibCallSemantics.cpp create mode 100644 lib/Analysis/LiveValues.cpp create mode 100644 lib/Analysis/LoopInfo.cpp create mode 100644 lib/Analysis/LoopPass.cpp create mode 100644 lib/Analysis/LoopVR.cpp create mode 100644 lib/Analysis/Makefile create mode 100644 lib/Analysis/MemoryDependenceAnalysis.cpp create mode 100644 lib/Analysis/PostDominators.cpp create mode 100644 lib/Analysis/ProfileInfo.cpp create mode 100644 lib/Analysis/ProfileInfoLoader.cpp create mode 100644 lib/Analysis/ProfileInfoLoaderPass.cpp create mode 100644 lib/Analysis/ScalarEvolution.cpp create mode 100644 lib/Analysis/ScalarEvolutionExpander.cpp create mode 100644 lib/Analysis/SparsePropagation.cpp create mode 100644 lib/Analysis/Trace.cpp create mode 100644 lib/Analysis/ValueTracking.cpp create mode 100644 lib/Archive/Archive.cpp create mode 100644 lib/Archive/ArchiveInternals.h create mode 100644 lib/Archive/ArchiveReader.cpp create mode 100644 lib/Archive/ArchiveWriter.cpp create mode 100644 lib/Archive/CMakeLists.txt create mode 100644 lib/Archive/Makefile create mode 100644 lib/AsmParser/CMakeLists.txt create mode 100644 lib/AsmParser/LLLexer.cpp create mode 100644 lib/AsmParser/LLLexer.h create mode 100644 lib/AsmParser/LLParser.cpp create mode 100644 lib/AsmParser/LLParser.h create mode 100644 lib/AsmParser/LLToken.h create mode 100644 lib/AsmParser/Makefile create mode 100644 lib/AsmParser/Parser.cpp create mode 100644 lib/Bitcode/Makefile create mode 100644 lib/Bitcode/Reader/BitReader.cpp create mode 100644 lib/Bitcode/Reader/BitcodeReader.cpp create mode 100644 lib/Bitcode/Reader/BitcodeReader.h create mode 100644 lib/Bitcode/Reader/CMakeLists.txt create mode 100644 lib/Bitcode/Reader/Deserialize.cpp create mode 100644 lib/Bitcode/Reader/DeserializeAPFloat.cpp create mode 100644 lib/Bitcode/Reader/DeserializeAPInt.cpp create mode 100644 lib/Bitcode/Reader/Makefile create mode 100644 lib/Bitcode/Writer/BitWriter.cpp create mode 100644 lib/Bitcode/Writer/BitcodeWriter.cpp create mode 100644 lib/Bitcode/Writer/BitcodeWriterPass.cpp create mode 100644 lib/Bitcode/Writer/CMakeLists.txt create mode 100644 lib/Bitcode/Writer/Makefile create mode 100644 lib/Bitcode/Writer/Serialize.cpp create mode 100644 lib/Bitcode/Writer/SerializeAPFloat.cpp create mode 100644 lib/Bitcode/Writer/SerializeAPInt.cpp create mode 100644 lib/Bitcode/Writer/ValueEnumerator.cpp create mode 100644 lib/Bitcode/Writer/ValueEnumerator.h create mode 100644 lib/CodeGen/AsmPrinter/AsmPrinter.cpp create mode 100644 lib/CodeGen/AsmPrinter/CMakeLists.txt create mode 100644 lib/CodeGen/AsmPrinter/DIE.cpp create mode 100644 lib/CodeGen/AsmPrinter/DIE.h create mode 100644 lib/CodeGen/AsmPrinter/DwarfDebug.cpp create mode 100644 lib/CodeGen/AsmPrinter/DwarfDebug.h create mode 100644 lib/CodeGen/AsmPrinter/DwarfException.cpp create mode 100644 lib/CodeGen/AsmPrinter/DwarfException.h create mode 100644 lib/CodeGen/AsmPrinter/DwarfLabel.cpp create mode 100644 lib/CodeGen/AsmPrinter/DwarfLabel.h create mode 100644 lib/CodeGen/AsmPrinter/DwarfPrinter.cpp create mode 100644 lib/CodeGen/AsmPrinter/DwarfPrinter.h create mode 100644 lib/CodeGen/AsmPrinter/DwarfWriter.cpp create mode 100644 lib/CodeGen/AsmPrinter/Makefile create mode 100644 lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp create mode 100644 lib/CodeGen/BranchFolding.cpp create mode 100644 lib/CodeGen/CMakeLists.txt create mode 100644 lib/CodeGen/CodePlacementOpt.cpp create mode 100644 lib/CodeGen/DeadMachineInstructionElim.cpp create mode 100644 lib/CodeGen/DwarfEHPrepare.cpp create mode 100644 lib/CodeGen/ELFWriter.cpp create mode 100644 lib/CodeGen/ELFWriter.h create mode 100644 lib/CodeGen/GCMetadata.cpp create mode 100644 lib/CodeGen/GCMetadataPrinter.cpp create mode 100644 lib/CodeGen/GCStrategy.cpp create mode 100644 lib/CodeGen/IfConversion.cpp create mode 100644 lib/CodeGen/IntrinsicLowering.cpp create mode 100644 lib/CodeGen/LLVMTargetMachine.cpp create mode 100644 lib/CodeGen/LatencyPriorityQueue.cpp create mode 100644 lib/CodeGen/LiveInterval.cpp create mode 100644 lib/CodeGen/LiveIntervalAnalysis.cpp create mode 100644 lib/CodeGen/LiveStackAnalysis.cpp create mode 100644 lib/CodeGen/LiveVariables.cpp create mode 100644 lib/CodeGen/LowerSubregs.cpp create mode 100644 lib/CodeGen/MachOWriter.cpp create mode 100644 lib/CodeGen/MachOWriter.h create mode 100644 lib/CodeGen/MachineBasicBlock.cpp create mode 100644 lib/CodeGen/MachineDominators.cpp create mode 100644 lib/CodeGen/MachineFunction.cpp create mode 100644 lib/CodeGen/MachineInstr.cpp create mode 100644 lib/CodeGen/MachineLICM.cpp create mode 100644 lib/CodeGen/MachineLoopInfo.cpp create mode 100644 lib/CodeGen/MachineModuleInfo.cpp create mode 100644 lib/CodeGen/MachinePassRegistry.cpp create mode 100644 lib/CodeGen/MachineRegisterInfo.cpp create mode 100644 lib/CodeGen/MachineSink.cpp create mode 100644 lib/CodeGen/MachineVerifier.cpp create mode 100644 lib/CodeGen/Makefile create mode 100644 lib/CodeGen/OcamlGC.cpp create mode 100644 lib/CodeGen/PBQP.cpp create mode 100644 lib/CodeGen/PBQP.h create mode 100644 lib/CodeGen/PHIElimination.cpp create mode 100644 lib/CodeGen/Passes.cpp create mode 100644 lib/CodeGen/PostRASchedulerList.cpp create mode 100644 lib/CodeGen/PreAllocSplitting.cpp create mode 100644 lib/CodeGen/PrologEpilogInserter.cpp create mode 100644 lib/CodeGen/PrologEpilogInserter.h create mode 100644 lib/CodeGen/PseudoSourceValue.cpp create mode 100644 lib/CodeGen/README.txt create mode 100644 lib/CodeGen/RegAllocBigBlock.cpp create mode 100644 lib/CodeGen/RegAllocLinearScan.cpp create mode 100644 lib/CodeGen/RegAllocLocal.cpp create mode 100644 lib/CodeGen/RegAllocPBQP.cpp create mode 100644 lib/CodeGen/RegAllocSimple.cpp create mode 100644 lib/CodeGen/RegisterCoalescer.cpp create mode 100644 lib/CodeGen/RegisterScavenging.cpp create mode 100644 lib/CodeGen/ScheduleDAG.cpp create mode 100644 lib/CodeGen/ScheduleDAGEmit.cpp create mode 100644 lib/CodeGen/ScheduleDAGInstrs.cpp create mode 100644 lib/CodeGen/ScheduleDAGInstrs.h create mode 100644 lib/CodeGen/ScheduleDAGPrinter.cpp create mode 100644 lib/CodeGen/SelectionDAG/CMakeLists.txt create mode 100644 lib/CodeGen/SelectionDAG/CallingConvLower.cpp create mode 100644 lib/CodeGen/SelectionDAG/DAGCombiner.cpp create mode 100644 lib/CodeGen/SelectionDAG/FastISel.cpp create mode 100644 lib/CodeGen/SelectionDAG/LegalizeDAG.cpp create mode 100644 lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp create mode 100644 lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp create mode 100644 lib/CodeGen/SelectionDAG/LegalizeTypes.cpp create mode 100644 lib/CodeGen/SelectionDAG/LegalizeTypes.h create mode 100644 lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp create mode 100644 lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp create mode 100644 lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp create mode 100644 lib/CodeGen/SelectionDAG/Makefile create mode 100644 lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp create mode 100644 lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp create mode 100644 lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp create mode 100644 lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp create mode 100644 lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h create mode 100644 lib/CodeGen/SelectionDAG/ScheduleDAGSDNodesEmit.cpp create mode 100644 lib/CodeGen/SelectionDAG/SelectionDAG.cpp create mode 100644 lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp create mode 100644 lib/CodeGen/SelectionDAG/SelectionDAGBuild.h create mode 100644 lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp create mode 100644 lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp create mode 100644 lib/CodeGen/SelectionDAG/TargetLowering.cpp create mode 100644 lib/CodeGen/ShadowStackGC.cpp create mode 100644 lib/CodeGen/ShrinkWrapping.cpp create mode 100644 lib/CodeGen/SimpleRegisterCoalescing.cpp create mode 100644 lib/CodeGen/SimpleRegisterCoalescing.h create mode 100644 lib/CodeGen/Spiller.cpp create mode 100644 lib/CodeGen/Spiller.h create mode 100644 lib/CodeGen/StackProtector.cpp create mode 100644 lib/CodeGen/StackSlotColoring.cpp create mode 100644 lib/CodeGen/StrongPHIElimination.cpp create mode 100644 lib/CodeGen/TargetInstrInfoImpl.cpp create mode 100644 lib/CodeGen/TwoAddressInstructionPass.cpp create mode 100644 lib/CodeGen/UnreachableBlockElim.cpp create mode 100644 lib/CodeGen/VirtRegMap.cpp create mode 100644 lib/CodeGen/VirtRegMap.h create mode 100644 lib/CodeGen/VirtRegRewriter.cpp create mode 100644 lib/CodeGen/VirtRegRewriter.h create mode 100644 lib/CompilerDriver/Action.cpp create mode 100644 lib/CompilerDriver/CMakeLists.txt create mode 100644 lib/CompilerDriver/CompilationGraph.cpp create mode 100644 lib/CompilerDriver/Makefile create mode 100644 lib/CompilerDriver/Plugin.cpp create mode 100644 lib/CompilerDriver/Tool.cpp create mode 100644 lib/Debugger/CMakeLists.txt create mode 100644 lib/Debugger/Debugger.cpp create mode 100644 lib/Debugger/Makefile create mode 100644 lib/Debugger/ProgramInfo.cpp create mode 100644 lib/Debugger/README.txt create mode 100644 lib/Debugger/RuntimeInfo.cpp create mode 100644 lib/Debugger/SourceFile.cpp create mode 100644 lib/Debugger/SourceLanguage-CFamily.cpp create mode 100644 lib/Debugger/SourceLanguage-CPlusPlus.cpp create mode 100644 lib/Debugger/SourceLanguage-Unknown.cpp create mode 100644 lib/Debugger/SourceLanguage.cpp create mode 100644 lib/ExecutionEngine/CMakeLists.txt create mode 100644 lib/ExecutionEngine/ExecutionEngine.cpp create mode 100644 lib/ExecutionEngine/ExecutionEngineBindings.cpp create mode 100644 lib/ExecutionEngine/Interpreter/CMakeLists.txt create mode 100644 lib/ExecutionEngine/Interpreter/Execution.cpp create mode 100644 lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp create mode 100644 lib/ExecutionEngine/Interpreter/Interpreter.cpp create mode 100644 lib/ExecutionEngine/Interpreter/Interpreter.h create mode 100644 lib/ExecutionEngine/Interpreter/Makefile create mode 100644 lib/ExecutionEngine/JIT/CMakeLists.txt create mode 100644 lib/ExecutionEngine/JIT/Intercept.cpp create mode 100644 lib/ExecutionEngine/JIT/JIT.cpp create mode 100644 lib/ExecutionEngine/JIT/JIT.h create mode 100644 lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp create mode 100644 lib/ExecutionEngine/JIT/JITDwarfEmitter.h create mode 100644 lib/ExecutionEngine/JIT/JITEmitter.cpp create mode 100644 lib/ExecutionEngine/JIT/JITMemoryManager.cpp create mode 100644 lib/ExecutionEngine/JIT/Makefile create mode 100644 lib/ExecutionEngine/JIT/TargetSelect.cpp create mode 100644 lib/ExecutionEngine/Makefile create mode 100644 lib/Linker/CMakeLists.txt create mode 100644 lib/Linker/LinkArchives.cpp create mode 100644 lib/Linker/LinkItems.cpp create mode 100644 lib/Linker/LinkModules.cpp create mode 100644 lib/Linker/Linker.cpp create mode 100644 lib/Linker/Makefile create mode 100644 lib/Makefile create mode 100644 lib/Support/APFloat.cpp create mode 100644 lib/Support/APInt.cpp create mode 100644 lib/Support/APSInt.cpp create mode 100644 lib/Support/Allocator.cpp create mode 100644 lib/Support/Annotation.cpp create mode 100644 lib/Support/CMakeLists.txt create mode 100644 lib/Support/CommandLine.cpp create mode 100644 lib/Support/ConstantRange.cpp create mode 100644 lib/Support/Debug.cpp create mode 100644 lib/Support/Dwarf.cpp create mode 100644 lib/Support/FileUtilities.cpp create mode 100644 lib/Support/FoldingSet.cpp create mode 100644 lib/Support/GraphWriter.cpp create mode 100644 lib/Support/IsInf.cpp create mode 100644 lib/Support/IsNAN.cpp create mode 100644 lib/Support/Makefile create mode 100644 lib/Support/ManagedStatic.cpp create mode 100644 lib/Support/MemoryBuffer.cpp create mode 100644 lib/Support/PluginLoader.cpp create mode 100644 lib/Support/PrettyStackTrace.cpp create mode 100644 lib/Support/SlowOperationInformer.cpp create mode 100644 lib/Support/SmallPtrSet.cpp create mode 100644 lib/Support/Statistic.cpp create mode 100644 lib/Support/Streams.cpp create mode 100644 lib/Support/StringExtras.cpp create mode 100644 lib/Support/StringMap.cpp create mode 100644 lib/Support/StringPool.cpp create mode 100644 lib/Support/SystemUtils.cpp create mode 100644 lib/Support/Timer.cpp create mode 100644 lib/Support/Triple.cpp create mode 100644 lib/Support/raw_ostream.cpp create mode 100644 lib/System/Alarm.cpp create mode 100644 lib/System/Atomic.cpp create mode 100644 lib/System/CMakeLists.txt create mode 100644 lib/System/Disassembler.cpp create mode 100644 lib/System/DynamicLibrary.cpp create mode 100644 lib/System/Host.cpp create mode 100644 lib/System/IncludeFile.cpp create mode 100644 lib/System/LICENSE.TXT create mode 100644 lib/System/Makefile create mode 100644 lib/System/Memory.cpp create mode 100644 lib/System/Mutex.cpp create mode 100644 lib/System/Path.cpp create mode 100644 lib/System/Process.cpp create mode 100644 lib/System/Program.cpp create mode 100644 lib/System/README.txt create mode 100644 lib/System/Signals.cpp create mode 100644 lib/System/TimeValue.cpp create mode 100644 lib/System/Unix/Alarm.inc create mode 100644 lib/System/Unix/Host.inc create mode 100644 lib/System/Unix/Memory.inc create mode 100644 lib/System/Unix/Mutex.inc create mode 100644 lib/System/Unix/Path.inc create mode 100644 lib/System/Unix/Process.inc create mode 100644 lib/System/Unix/Program.inc create mode 100644 lib/System/Unix/README.txt create mode 100644 lib/System/Unix/Signals.inc create mode 100644 lib/System/Unix/TimeValue.inc create mode 100644 lib/System/Unix/Unix.h create mode 100644 lib/System/Win32/Alarm.inc create mode 100644 lib/System/Win32/DynamicLibrary.inc create mode 100644 lib/System/Win32/Host.inc create mode 100644 lib/System/Win32/Memory.inc create mode 100644 lib/System/Win32/Mutex.inc create mode 100644 lib/System/Win32/Path.inc create mode 100644 lib/System/Win32/Process.inc create mode 100644 lib/System/Win32/Program.inc create mode 100644 lib/System/Win32/Signals.inc create mode 100644 lib/System/Win32/TimeValue.inc create mode 100644 lib/System/Win32/Win32.h create mode 100644 lib/Target/ARM/ARM.h create mode 100644 lib/Target/ARM/ARM.td create mode 100644 lib/Target/ARM/ARMAddressingModes.h create mode 100644 lib/Target/ARM/ARMBuildAttrs.h create mode 100644 lib/Target/ARM/ARMCallingConv.td create mode 100644 lib/Target/ARM/ARMCodeEmitter.cpp create mode 100644 lib/Target/ARM/ARMConstantIslandPass.cpp create mode 100644 lib/Target/ARM/ARMConstantPoolValue.cpp create mode 100644 lib/Target/ARM/ARMConstantPoolValue.h create mode 100644 lib/Target/ARM/ARMFrameInfo.h create mode 100644 lib/Target/ARM/ARMISelDAGToDAG.cpp create mode 100644 lib/Target/ARM/ARMISelLowering.cpp create mode 100644 lib/Target/ARM/ARMISelLowering.h create mode 100644 lib/Target/ARM/ARMInstrFormats.td create mode 100644 lib/Target/ARM/ARMInstrInfo.cpp create mode 100644 lib/Target/ARM/ARMInstrInfo.h create mode 100644 lib/Target/ARM/ARMInstrInfo.td create mode 100644 lib/Target/ARM/ARMInstrThumb.td create mode 100644 lib/Target/ARM/ARMInstrThumb2.td create mode 100644 lib/Target/ARM/ARMInstrVFP.td create mode 100644 lib/Target/ARM/ARMJITInfo.cpp create mode 100644 lib/Target/ARM/ARMJITInfo.h create mode 100644 lib/Target/ARM/ARMLoadStoreOptimizer.cpp create mode 100644 lib/Target/ARM/ARMMachineFunctionInfo.h create mode 100644 lib/Target/ARM/ARMRegisterInfo.cpp create mode 100644 lib/Target/ARM/ARMRegisterInfo.h create mode 100644 lib/Target/ARM/ARMRegisterInfo.td create mode 100644 lib/Target/ARM/ARMRelocations.h create mode 100644 lib/Target/ARM/ARMSubtarget.cpp create mode 100644 lib/Target/ARM/ARMSubtarget.h create mode 100644 lib/Target/ARM/ARMTargetAsmInfo.cpp create mode 100644 lib/Target/ARM/ARMTargetAsmInfo.h create mode 100644 lib/Target/ARM/ARMTargetMachine.cpp create mode 100644 lib/Target/ARM/ARMTargetMachine.h create mode 100644 lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp create mode 100644 lib/Target/ARM/AsmPrinter/CMakeLists.txt create mode 100644 lib/Target/ARM/AsmPrinter/Makefile create mode 100644 lib/Target/ARM/CMakeLists.txt create mode 100644 lib/Target/ARM/Makefile create mode 100644 lib/Target/ARM/README-Thumb.txt create mode 100644 lib/Target/ARM/README.txt create mode 100644 lib/Target/Alpha/Alpha.h create mode 100644 lib/Target/Alpha/Alpha.td create mode 100644 lib/Target/Alpha/AlphaBranchSelector.cpp create mode 100644 lib/Target/Alpha/AlphaCodeEmitter.cpp create mode 100644 lib/Target/Alpha/AlphaISelDAGToDAG.cpp create mode 100644 lib/Target/Alpha/AlphaISelLowering.cpp create mode 100644 lib/Target/Alpha/AlphaISelLowering.h create mode 100644 lib/Target/Alpha/AlphaInstrFormats.td create mode 100644 lib/Target/Alpha/AlphaInstrInfo.cpp create mode 100644 lib/Target/Alpha/AlphaInstrInfo.h create mode 100644 lib/Target/Alpha/AlphaInstrInfo.td create mode 100644 lib/Target/Alpha/AlphaJITInfo.cpp create mode 100644 lib/Target/Alpha/AlphaJITInfo.h create mode 100644 lib/Target/Alpha/AlphaLLRP.cpp create mode 100644 lib/Target/Alpha/AlphaRegisterInfo.cpp create mode 100644 lib/Target/Alpha/AlphaRegisterInfo.h create mode 100644 lib/Target/Alpha/AlphaRegisterInfo.td create mode 100644 lib/Target/Alpha/AlphaRelocations.h create mode 100644 lib/Target/Alpha/AlphaSchedule.td create mode 100644 lib/Target/Alpha/AlphaSubtarget.cpp create mode 100644 lib/Target/Alpha/AlphaSubtarget.h create mode 100644 lib/Target/Alpha/AlphaTargetAsmInfo.cpp create mode 100644 lib/Target/Alpha/AlphaTargetAsmInfo.h create mode 100644 lib/Target/Alpha/AlphaTargetMachine.cpp create mode 100644 lib/Target/Alpha/AlphaTargetMachine.h create mode 100644 lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp create mode 100644 lib/Target/Alpha/AsmPrinter/CMakeLists.txt create mode 100644 lib/Target/Alpha/AsmPrinter/Makefile create mode 100644 lib/Target/Alpha/CMakeLists.txt create mode 100644 lib/Target/Alpha/Makefile create mode 100644 lib/Target/Alpha/README.txt create mode 100644 lib/Target/CBackend/CBackend.cpp create mode 100644 lib/Target/CBackend/CMakeLists.txt create mode 100644 lib/Target/CBackend/CTargetMachine.h create mode 100644 lib/Target/CBackend/Makefile create mode 100644 lib/Target/CMakeLists.txt create mode 100644 lib/Target/CellSPU/AsmPrinter/CMakeLists.txt create mode 100644 lib/Target/CellSPU/AsmPrinter/Makefile create mode 100644 lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp create mode 100644 lib/Target/CellSPU/CMakeLists.txt create mode 100644 lib/Target/CellSPU/CellSDKIntrinsics.td create mode 100644 lib/Target/CellSPU/Makefile create mode 100644 lib/Target/CellSPU/README.txt create mode 100644 lib/Target/CellSPU/SPU.h create mode 100644 lib/Target/CellSPU/SPU.td create mode 100644 lib/Target/CellSPU/SPU128InstrInfo.td create mode 100644 lib/Target/CellSPU/SPU64InstrInfo.td create mode 100644 lib/Target/CellSPU/SPUCallingConv.td create mode 100644 lib/Target/CellSPU/SPUFrameInfo.cpp create mode 100644 lib/Target/CellSPU/SPUFrameInfo.h create mode 100644 lib/Target/CellSPU/SPUHazardRecognizers.cpp create mode 100644 lib/Target/CellSPU/SPUHazardRecognizers.h create mode 100644 lib/Target/CellSPU/SPUISelDAGToDAG.cpp create mode 100644 lib/Target/CellSPU/SPUISelLowering.cpp create mode 100644 lib/Target/CellSPU/SPUISelLowering.h create mode 100644 lib/Target/CellSPU/SPUInstrBuilder.h create mode 100644 lib/Target/CellSPU/SPUInstrFormats.td create mode 100644 lib/Target/CellSPU/SPUInstrInfo.cpp create mode 100644 lib/Target/CellSPU/SPUInstrInfo.h create mode 100644 lib/Target/CellSPU/SPUInstrInfo.td create mode 100644 lib/Target/CellSPU/SPUMachineFunction.h create mode 100644 lib/Target/CellSPU/SPUMathInstr.td create mode 100644 lib/Target/CellSPU/SPUNodes.td create mode 100644 lib/Target/CellSPU/SPUOperands.td create mode 100644 lib/Target/CellSPU/SPURegisterInfo.cpp create mode 100644 lib/Target/CellSPU/SPURegisterInfo.h create mode 100644 lib/Target/CellSPU/SPURegisterInfo.td create mode 100644 lib/Target/CellSPU/SPURegisterNames.h create mode 100644 lib/Target/CellSPU/SPUSchedule.td create mode 100644 lib/Target/CellSPU/SPUSubtarget.cpp create mode 100644 lib/Target/CellSPU/SPUSubtarget.h create mode 100644 lib/Target/CellSPU/SPUTargetAsmInfo.cpp create mode 100644 lib/Target/CellSPU/SPUTargetAsmInfo.h create mode 100644 lib/Target/CellSPU/SPUTargetMachine.cpp create mode 100644 lib/Target/CellSPU/SPUTargetMachine.h create mode 100644 lib/Target/CppBackend/CMakeLists.txt create mode 100644 lib/Target/CppBackend/CPPBackend.cpp create mode 100644 lib/Target/CppBackend/CPPTargetMachine.h create mode 100644 lib/Target/CppBackend/Makefile create mode 100644 lib/Target/DarwinTargetAsmInfo.cpp create mode 100644 lib/Target/ELFTargetAsmInfo.cpp create mode 100644 lib/Target/IA64/AsmPrinter/CMakeLists.txt create mode 100644 lib/Target/IA64/AsmPrinter/IA64AsmPrinter.cpp create mode 100644 lib/Target/IA64/AsmPrinter/Makefile create mode 100644 lib/Target/IA64/CMakeLists.txt create mode 100644 lib/Target/IA64/IA64.h create mode 100644 lib/Target/IA64/IA64.td create mode 100644 lib/Target/IA64/IA64Bundling.cpp create mode 100644 lib/Target/IA64/IA64ISelDAGToDAG.cpp create mode 100644 lib/Target/IA64/IA64ISelLowering.cpp create mode 100644 lib/Target/IA64/IA64ISelLowering.h create mode 100644 lib/Target/IA64/IA64InstrBuilder.h create mode 100644 lib/Target/IA64/IA64InstrFormats.td create mode 100644 lib/Target/IA64/IA64InstrInfo.cpp create mode 100644 lib/Target/IA64/IA64InstrInfo.h create mode 100644 lib/Target/IA64/IA64InstrInfo.td create mode 100644 lib/Target/IA64/IA64MachineFunctionInfo.h create mode 100644 lib/Target/IA64/IA64RegisterInfo.cpp create mode 100644 lib/Target/IA64/IA64RegisterInfo.h create mode 100644 lib/Target/IA64/IA64RegisterInfo.td create mode 100644 lib/Target/IA64/IA64Subtarget.cpp create mode 100644 lib/Target/IA64/IA64Subtarget.h create mode 100644 lib/Target/IA64/IA64TargetAsmInfo.cpp create mode 100644 lib/Target/IA64/IA64TargetAsmInfo.h create mode 100644 lib/Target/IA64/IA64TargetMachine.cpp create mode 100644 lib/Target/IA64/IA64TargetMachine.h create mode 100644 lib/Target/IA64/Makefile create mode 100644 lib/Target/IA64/README create mode 100644 lib/Target/MSIL/CMakeLists.txt create mode 100644 lib/Target/MSIL/MSILWriter.cpp create mode 100644 lib/Target/MSIL/MSILWriter.h create mode 100644 lib/Target/MSIL/Makefile create mode 100644 lib/Target/MSIL/README.TXT create mode 100644 lib/Target/MSP430/CMakeLists.txt create mode 100644 lib/Target/MSP430/MSP430.h create mode 100644 lib/Target/MSP430/MSP430.td create mode 100644 lib/Target/MSP430/MSP430AsmPrinter.cpp create mode 100644 lib/Target/MSP430/MSP430CallingConv.td create mode 100644 lib/Target/MSP430/MSP430ISelDAGToDAG.cpp create mode 100644 lib/Target/MSP430/MSP430ISelLowering.cpp create mode 100644 lib/Target/MSP430/MSP430ISelLowering.h create mode 100644 lib/Target/MSP430/MSP430InstrFormats.td create mode 100644 lib/Target/MSP430/MSP430InstrInfo.cpp create mode 100644 lib/Target/MSP430/MSP430InstrInfo.h create mode 100644 lib/Target/MSP430/MSP430InstrInfo.td create mode 100644 lib/Target/MSP430/MSP430MachineFunctionInfo.h create mode 100644 lib/Target/MSP430/MSP430RegisterInfo.cpp create mode 100644 lib/Target/MSP430/MSP430RegisterInfo.h create mode 100644 lib/Target/MSP430/MSP430RegisterInfo.td create mode 100644 lib/Target/MSP430/MSP430Subtarget.cpp create mode 100644 lib/Target/MSP430/MSP430Subtarget.h create mode 100644 lib/Target/MSP430/MSP430TargetAsmInfo.cpp create mode 100644 lib/Target/MSP430/MSP430TargetAsmInfo.h create mode 100644 lib/Target/MSP430/MSP430TargetMachine.cpp create mode 100644 lib/Target/MSP430/MSP430TargetMachine.h create mode 100644 lib/Target/MSP430/Makefile create mode 100644 lib/Target/MSP430/README.txt create mode 100644 lib/Target/Makefile create mode 100644 lib/Target/Mips/AsmPrinter/CMakeLists.txt create mode 100644 lib/Target/Mips/AsmPrinter/Makefile create mode 100644 lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp create mode 100644 lib/Target/Mips/CMakeLists.txt create mode 100644 lib/Target/Mips/Makefile create mode 100644 lib/Target/Mips/Mips.h create mode 100644 lib/Target/Mips/Mips.td create mode 100644 lib/Target/Mips/MipsCallingConv.td create mode 100644 lib/Target/Mips/MipsDelaySlotFiller.cpp create mode 100644 lib/Target/Mips/MipsISelDAGToDAG.cpp create mode 100644 lib/Target/Mips/MipsISelLowering.cpp create mode 100644 lib/Target/Mips/MipsISelLowering.h create mode 100644 lib/Target/Mips/MipsInstrFPU.td create mode 100644 lib/Target/Mips/MipsInstrFormats.td create mode 100644 lib/Target/Mips/MipsInstrInfo.cpp create mode 100644 lib/Target/Mips/MipsInstrInfo.h create mode 100644 lib/Target/Mips/MipsInstrInfo.td create mode 100644 lib/Target/Mips/MipsMachineFunction.h create mode 100644 lib/Target/Mips/MipsRegisterInfo.cpp create mode 100644 lib/Target/Mips/MipsRegisterInfo.h create mode 100644 lib/Target/Mips/MipsRegisterInfo.td create mode 100644 lib/Target/Mips/MipsSchedule.td create mode 100644 lib/Target/Mips/MipsSubtarget.cpp create mode 100644 lib/Target/Mips/MipsSubtarget.h create mode 100644 lib/Target/Mips/MipsTargetAsmInfo.cpp create mode 100644 lib/Target/Mips/MipsTargetAsmInfo.h create mode 100644 lib/Target/Mips/MipsTargetMachine.cpp create mode 100644 lib/Target/Mips/MipsTargetMachine.h create mode 100644 lib/Target/PIC16/CMakeLists.txt create mode 100644 lib/Target/PIC16/Makefile create mode 100644 lib/Target/PIC16/PIC16.h create mode 100644 lib/Target/PIC16/PIC16.td create mode 100644 lib/Target/PIC16/PIC16AsmPrinter.cpp create mode 100644 lib/Target/PIC16/PIC16AsmPrinter.h create mode 100644 lib/Target/PIC16/PIC16DebugInfo.cpp create mode 100644 lib/Target/PIC16/PIC16DebugInfo.h create mode 100644 lib/Target/PIC16/PIC16ISelDAGToDAG.cpp create mode 100644 lib/Target/PIC16/PIC16ISelDAGToDAG.h create mode 100644 lib/Target/PIC16/PIC16ISelLowering.cpp create mode 100644 lib/Target/PIC16/PIC16ISelLowering.h create mode 100644 lib/Target/PIC16/PIC16InstrFormats.td create mode 100644 lib/Target/PIC16/PIC16InstrInfo.cpp create mode 100644 lib/Target/PIC16/PIC16InstrInfo.h create mode 100644 lib/Target/PIC16/PIC16InstrInfo.td create mode 100644 lib/Target/PIC16/PIC16MemSelOpt.cpp create mode 100644 lib/Target/PIC16/PIC16RegisterInfo.cpp create mode 100644 lib/Target/PIC16/PIC16RegisterInfo.h create mode 100644 lib/Target/PIC16/PIC16RegisterInfo.td create mode 100644 lib/Target/PIC16/PIC16Subtarget.cpp create mode 100644 lib/Target/PIC16/PIC16Subtarget.h create mode 100644 lib/Target/PIC16/PIC16TargetAsmInfo.cpp create mode 100644 lib/Target/PIC16/PIC16TargetAsmInfo.h create mode 100644 lib/Target/PIC16/PIC16TargetMachine.cpp create mode 100644 lib/Target/PIC16/PIC16TargetMachine.h create mode 100644 lib/Target/PowerPC/AsmPrinter/CMakeLists.txt create mode 100644 lib/Target/PowerPC/AsmPrinter/Makefile create mode 100644 lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp create mode 100644 lib/Target/PowerPC/CMakeLists.txt create mode 100644 lib/Target/PowerPC/Makefile create mode 100644 lib/Target/PowerPC/PPC.h create mode 100644 lib/Target/PowerPC/PPC.td create mode 100644 lib/Target/PowerPC/PPCBranchSelector.cpp create mode 100644 lib/Target/PowerPC/PPCCallingConv.td create mode 100644 lib/Target/PowerPC/PPCCodeEmitter.cpp create mode 100644 lib/Target/PowerPC/PPCFrameInfo.h create mode 100644 lib/Target/PowerPC/PPCHazardRecognizers.cpp create mode 100644 lib/Target/PowerPC/PPCHazardRecognizers.h create mode 100644 lib/Target/PowerPC/PPCISelDAGToDAG.cpp create mode 100644 lib/Target/PowerPC/PPCISelLowering.cpp create mode 100644 lib/Target/PowerPC/PPCISelLowering.h create mode 100644 lib/Target/PowerPC/PPCInstr64Bit.td create mode 100644 lib/Target/PowerPC/PPCInstrAltivec.td create mode 100644 lib/Target/PowerPC/PPCInstrBuilder.h create mode 100644 lib/Target/PowerPC/PPCInstrFormats.td create mode 100644 lib/Target/PowerPC/PPCInstrInfo.cpp create mode 100644 lib/Target/PowerPC/PPCInstrInfo.h create mode 100644 lib/Target/PowerPC/PPCInstrInfo.td create mode 100644 lib/Target/PowerPC/PPCJITInfo.cpp create mode 100644 lib/Target/PowerPC/PPCJITInfo.h create mode 100644 lib/Target/PowerPC/PPCMachOWriterInfo.cpp create mode 100644 lib/Target/PowerPC/PPCMachOWriterInfo.h create mode 100644 lib/Target/PowerPC/PPCMachineFunctionInfo.h create mode 100644 lib/Target/PowerPC/PPCPerfectShuffle.h create mode 100644 lib/Target/PowerPC/PPCPredicates.cpp create mode 100644 lib/Target/PowerPC/PPCPredicates.h create mode 100644 lib/Target/PowerPC/PPCRegisterInfo.cpp create mode 100644 lib/Target/PowerPC/PPCRegisterInfo.h create mode 100644 lib/Target/PowerPC/PPCRegisterInfo.td create mode 100644 lib/Target/PowerPC/PPCRelocations.h create mode 100644 lib/Target/PowerPC/PPCSchedule.td create mode 100644 lib/Target/PowerPC/PPCScheduleG3.td create mode 100644 lib/Target/PowerPC/PPCScheduleG4.td create mode 100644 lib/Target/PowerPC/PPCScheduleG4Plus.td create mode 100644 lib/Target/PowerPC/PPCScheduleG5.td create mode 100644 lib/Target/PowerPC/PPCSubtarget.cpp create mode 100644 lib/Target/PowerPC/PPCSubtarget.h create mode 100644 lib/Target/PowerPC/PPCTargetAsmInfo.cpp create mode 100644 lib/Target/PowerPC/PPCTargetAsmInfo.h create mode 100644 lib/Target/PowerPC/PPCTargetMachine.cpp create mode 100644 lib/Target/PowerPC/PPCTargetMachine.h create mode 100644 lib/Target/PowerPC/README.txt create mode 100644 lib/Target/PowerPC/README_ALTIVEC.txt create mode 100644 lib/Target/README.txt create mode 100644 lib/Target/Sparc/AsmPrinter/CMakeLists.txt create mode 100644 lib/Target/Sparc/AsmPrinter/Makefile create mode 100644 lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp create mode 100644 lib/Target/Sparc/CMakeLists.txt create mode 100644 lib/Target/Sparc/DelaySlotFiller.cpp create mode 100644 lib/Target/Sparc/FPMover.cpp create mode 100644 lib/Target/Sparc/Makefile create mode 100644 lib/Target/Sparc/README.txt create mode 100644 lib/Target/Sparc/Sparc.h create mode 100644 lib/Target/Sparc/Sparc.td create mode 100644 lib/Target/Sparc/SparcCallingConv.td create mode 100644 lib/Target/Sparc/SparcISelDAGToDAG.cpp create mode 100644 lib/Target/Sparc/SparcISelLowering.cpp create mode 100644 lib/Target/Sparc/SparcISelLowering.h create mode 100644 lib/Target/Sparc/SparcInstrFormats.td create mode 100644 lib/Target/Sparc/SparcInstrInfo.cpp create mode 100644 lib/Target/Sparc/SparcInstrInfo.h create mode 100644 lib/Target/Sparc/SparcInstrInfo.td create mode 100644 lib/Target/Sparc/SparcRegisterInfo.cpp create mode 100644 lib/Target/Sparc/SparcRegisterInfo.h create mode 100644 lib/Target/Sparc/SparcRegisterInfo.td create mode 100644 lib/Target/Sparc/SparcSubtarget.cpp create mode 100644 lib/Target/Sparc/SparcSubtarget.h create mode 100644 lib/Target/Sparc/SparcTargetAsmInfo.cpp create mode 100644 lib/Target/Sparc/SparcTargetAsmInfo.h create mode 100644 lib/Target/Sparc/SparcTargetMachine.cpp create mode 100644 lib/Target/Sparc/SparcTargetMachine.h create mode 100644 lib/Target/SubtargetFeature.cpp create mode 100644 lib/Target/Target.cpp create mode 100644 lib/Target/TargetAsmInfo.cpp create mode 100644 lib/Target/TargetData.cpp create mode 100644 lib/Target/TargetFrameInfo.cpp create mode 100644 lib/Target/TargetInstrInfo.cpp create mode 100644 lib/Target/TargetIntrinsicInfo.cpp create mode 100644 lib/Target/TargetMachOWriterInfo.cpp create mode 100644 lib/Target/TargetMachine.cpp create mode 100644 lib/Target/TargetMachineRegistry.cpp create mode 100644 lib/Target/TargetRegisterInfo.cpp create mode 100644 lib/Target/TargetSubtarget.cpp create mode 100644 lib/Target/X86/AsmPrinter/CMakeLists.txt create mode 100644 lib/Target/X86/AsmPrinter/Makefile create mode 100644 lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.cpp create mode 100644 lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.h create mode 100644 lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp create mode 100644 lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.cpp create mode 100644 lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.h create mode 100644 lib/Target/X86/CMakeLists.txt create mode 100644 lib/Target/X86/Makefile create mode 100644 lib/Target/X86/README-FPStack.txt create mode 100644 lib/Target/X86/README-MMX.txt create mode 100644 lib/Target/X86/README-SSE.txt create mode 100644 lib/Target/X86/README-UNIMPLEMENTED.txt create mode 100644 lib/Target/X86/README-X86-64.txt create mode 100644 lib/Target/X86/README.txt create mode 100644 lib/Target/X86/X86.h create mode 100644 lib/Target/X86/X86.td create mode 100644 lib/Target/X86/X86COFF.h create mode 100644 lib/Target/X86/X86CallingConv.td create mode 100644 lib/Target/X86/X86CodeEmitter.cpp create mode 100644 lib/Target/X86/X86CompilationCallback_Win64.asm create mode 100644 lib/Target/X86/X86ELFWriterInfo.cpp create mode 100644 lib/Target/X86/X86ELFWriterInfo.h create mode 100644 lib/Target/X86/X86FastISel.cpp create mode 100644 lib/Target/X86/X86FloatingPoint.cpp create mode 100644 lib/Target/X86/X86FloatingPointRegKill.cpp create mode 100644 lib/Target/X86/X86ISelDAGToDAG.cpp create mode 100644 lib/Target/X86/X86ISelLowering.cpp create mode 100644 lib/Target/X86/X86ISelLowering.h create mode 100644 lib/Target/X86/X86Instr64bit.td create mode 100644 lib/Target/X86/X86InstrBuilder.h create mode 100644 lib/Target/X86/X86InstrFPStack.td create mode 100644 lib/Target/X86/X86InstrFormats.td create mode 100644 lib/Target/X86/X86InstrInfo.cpp create mode 100644 lib/Target/X86/X86InstrInfo.h create mode 100644 lib/Target/X86/X86InstrInfo.td create mode 100644 lib/Target/X86/X86InstrMMX.td create mode 100644 lib/Target/X86/X86InstrSSE.td create mode 100644 lib/Target/X86/X86JITInfo.cpp create mode 100644 lib/Target/X86/X86JITInfo.h create mode 100644 lib/Target/X86/X86MachineFunctionInfo.h create mode 100644 lib/Target/X86/X86RegisterInfo.cpp create mode 100644 lib/Target/X86/X86RegisterInfo.h create mode 100644 lib/Target/X86/X86RegisterInfo.td create mode 100644 lib/Target/X86/X86Relocations.h create mode 100644 lib/Target/X86/X86Subtarget.cpp create mode 100644 lib/Target/X86/X86Subtarget.h create mode 100644 lib/Target/X86/X86TargetAsmInfo.cpp create mode 100644 lib/Target/X86/X86TargetAsmInfo.h create mode 100644 lib/Target/X86/X86TargetMachine.cpp create mode 100644 lib/Target/X86/X86TargetMachine.h create mode 100644 lib/Target/XCore/CMakeLists.txt create mode 100644 lib/Target/XCore/Makefile create mode 100644 lib/Target/XCore/README.txt create mode 100644 lib/Target/XCore/XCore.h create mode 100644 lib/Target/XCore/XCore.td create mode 100644 lib/Target/XCore/XCoreAsmPrinter.cpp create mode 100644 lib/Target/XCore/XCoreCallingConv.td create mode 100644 lib/Target/XCore/XCoreFrameInfo.cpp create mode 100644 lib/Target/XCore/XCoreFrameInfo.h create mode 100644 lib/Target/XCore/XCoreISelDAGToDAG.cpp create mode 100644 lib/Target/XCore/XCoreISelLowering.cpp create mode 100644 lib/Target/XCore/XCoreISelLowering.h create mode 100644 lib/Target/XCore/XCoreInstrFormats.td create mode 100644 lib/Target/XCore/XCoreInstrInfo.cpp create mode 100644 lib/Target/XCore/XCoreInstrInfo.h create mode 100644 lib/Target/XCore/XCoreInstrInfo.td create mode 100644 lib/Target/XCore/XCoreMachineFunctionInfo.h create mode 100644 lib/Target/XCore/XCoreRegisterInfo.cpp create mode 100644 lib/Target/XCore/XCoreRegisterInfo.h create mode 100644 lib/Target/XCore/XCoreRegisterInfo.td create mode 100644 lib/Target/XCore/XCoreSubtarget.cpp create mode 100644 lib/Target/XCore/XCoreSubtarget.h create mode 100644 lib/Target/XCore/XCoreTargetAsmInfo.cpp create mode 100644 lib/Target/XCore/XCoreTargetAsmInfo.h create mode 100644 lib/Target/XCore/XCoreTargetMachine.cpp create mode 100644 lib/Target/XCore/XCoreTargetMachine.h create mode 100644 lib/Transforms/Hello/CMakeLists.txt create mode 100644 lib/Transforms/Hello/Hello.cpp create mode 100644 lib/Transforms/Hello/Makefile create mode 100644 lib/Transforms/IPO/ArgumentPromotion.cpp create mode 100644 lib/Transforms/IPO/CMakeLists.txt create mode 100644 lib/Transforms/IPO/ConstantMerge.cpp create mode 100644 lib/Transforms/IPO/DeadArgumentElimination.cpp create mode 100644 lib/Transforms/IPO/DeadTypeElimination.cpp create mode 100644 lib/Transforms/IPO/ExtractGV.cpp create mode 100644 lib/Transforms/IPO/FunctionAttrs.cpp create mode 100644 lib/Transforms/IPO/GlobalDCE.cpp create mode 100644 lib/Transforms/IPO/GlobalOpt.cpp create mode 100644 lib/Transforms/IPO/IPConstantPropagation.cpp create mode 100644 lib/Transforms/IPO/IPO.cpp create mode 100644 lib/Transforms/IPO/IndMemRemoval.cpp create mode 100644 lib/Transforms/IPO/InlineAlways.cpp create mode 100644 lib/Transforms/IPO/InlineSimple.cpp create mode 100644 lib/Transforms/IPO/Inliner.cpp create mode 100644 lib/Transforms/IPO/Internalize.cpp create mode 100644 lib/Transforms/IPO/LoopExtractor.cpp create mode 100644 lib/Transforms/IPO/LowerSetJmp.cpp create mode 100644 lib/Transforms/IPO/Makefile create mode 100644 lib/Transforms/IPO/MergeFunctions.cpp create mode 100644 lib/Transforms/IPO/PartialSpecialization.cpp create mode 100644 lib/Transforms/IPO/PruneEH.cpp create mode 100644 lib/Transforms/IPO/RaiseAllocations.cpp create mode 100644 lib/Transforms/IPO/StripDeadPrototypes.cpp create mode 100644 lib/Transforms/IPO/StripSymbols.cpp create mode 100644 lib/Transforms/IPO/StructRetPromotion.cpp create mode 100644 lib/Transforms/Instrumentation/BlockProfiling.cpp create mode 100644 lib/Transforms/Instrumentation/CMakeLists.txt create mode 100644 lib/Transforms/Instrumentation/EdgeProfiling.cpp create mode 100644 lib/Transforms/Instrumentation/Makefile create mode 100644 lib/Transforms/Instrumentation/ProfilingUtils.cpp create mode 100644 lib/Transforms/Instrumentation/ProfilingUtils.h create mode 100644 lib/Transforms/Instrumentation/RSProfiling.cpp create mode 100644 lib/Transforms/Instrumentation/RSProfiling.h create mode 100644 lib/Transforms/Makefile create mode 100644 lib/Transforms/Scalar/ADCE.cpp create mode 100644 lib/Transforms/Scalar/BasicBlockPlacement.cpp create mode 100644 lib/Transforms/Scalar/CMakeLists.txt create mode 100644 lib/Transforms/Scalar/CodeGenPrepare.cpp create mode 100644 lib/Transforms/Scalar/CondPropagate.cpp create mode 100644 lib/Transforms/Scalar/ConstantProp.cpp create mode 100644 lib/Transforms/Scalar/DCE.cpp create mode 100644 lib/Transforms/Scalar/DeadStoreElimination.cpp create mode 100644 lib/Transforms/Scalar/GVN.cpp create mode 100644 lib/Transforms/Scalar/GVNPRE.cpp create mode 100644 lib/Transforms/Scalar/IndVarSimplify.cpp create mode 100644 lib/Transforms/Scalar/InstructionCombining.cpp create mode 100644 lib/Transforms/Scalar/JumpThreading.cpp create mode 100644 lib/Transforms/Scalar/LICM.cpp create mode 100644 lib/Transforms/Scalar/LoopDeletion.cpp create mode 100644 lib/Transforms/Scalar/LoopIndexSplit.cpp create mode 100644 lib/Transforms/Scalar/LoopRotation.cpp create mode 100644 lib/Transforms/Scalar/LoopStrengthReduce.cpp create mode 100644 lib/Transforms/Scalar/LoopUnroll.cpp create mode 100644 lib/Transforms/Scalar/LoopUnswitch.cpp create mode 100644 lib/Transforms/Scalar/Makefile create mode 100644 lib/Transforms/Scalar/MemCpyOptimizer.cpp create mode 100644 lib/Transforms/Scalar/PredicateSimplifier.cpp create mode 100644 lib/Transforms/Scalar/Reassociate.cpp create mode 100644 lib/Transforms/Scalar/Reg2Mem.cpp create mode 100644 lib/Transforms/Scalar/SCCP.cpp create mode 100644 lib/Transforms/Scalar/Scalar.cpp create mode 100644 lib/Transforms/Scalar/ScalarReplAggregates.cpp create mode 100644 lib/Transforms/Scalar/SimplifyCFGPass.cpp create mode 100644 lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp create mode 100644 lib/Transforms/Scalar/SimplifyLibCalls.cpp create mode 100644 lib/Transforms/Scalar/TailDuplication.cpp create mode 100644 lib/Transforms/Scalar/TailRecursionElimination.cpp create mode 100644 lib/Transforms/Utils/AddrModeMatcher.cpp create mode 100644 lib/Transforms/Utils/BasicBlockUtils.cpp create mode 100644 lib/Transforms/Utils/BasicInliner.cpp create mode 100644 lib/Transforms/Utils/BreakCriticalEdges.cpp create mode 100644 lib/Transforms/Utils/CMakeLists.txt create mode 100644 lib/Transforms/Utils/CloneFunction.cpp create mode 100644 lib/Transforms/Utils/CloneLoop.cpp create mode 100644 lib/Transforms/Utils/CloneModule.cpp create mode 100644 lib/Transforms/Utils/CloneTrace.cpp create mode 100644 lib/Transforms/Utils/CodeExtractor.cpp create mode 100644 lib/Transforms/Utils/DemoteRegToStack.cpp create mode 100644 lib/Transforms/Utils/InlineCost.cpp create mode 100644 lib/Transforms/Utils/InlineFunction.cpp create mode 100644 lib/Transforms/Utils/InstructionNamer.cpp create mode 100644 lib/Transforms/Utils/LCSSA.cpp create mode 100644 lib/Transforms/Utils/Local.cpp create mode 100644 lib/Transforms/Utils/LoopSimplify.cpp create mode 100644 lib/Transforms/Utils/LowerAllocations.cpp create mode 100644 lib/Transforms/Utils/LowerInvoke.cpp create mode 100644 lib/Transforms/Utils/LowerSwitch.cpp create mode 100644 lib/Transforms/Utils/Makefile create mode 100644 lib/Transforms/Utils/Mem2Reg.cpp create mode 100644 lib/Transforms/Utils/PromoteMemoryToRegister.cpp create mode 100644 lib/Transforms/Utils/SimplifyCFG.cpp create mode 100644 lib/Transforms/Utils/UnifyFunctionExitNodes.cpp create mode 100644 lib/Transforms/Utils/UnrollLoop.cpp create mode 100644 lib/Transforms/Utils/ValueMapper.cpp create mode 100644 lib/VMCore/AsmWriter.cpp create mode 100644 lib/VMCore/Attributes.cpp create mode 100644 lib/VMCore/AutoUpgrade.cpp create mode 100644 lib/VMCore/BasicBlock.cpp create mode 100644 lib/VMCore/CMakeLists.txt create mode 100644 lib/VMCore/ConstantFold.cpp create mode 100644 lib/VMCore/ConstantFold.h create mode 100644 lib/VMCore/Constants.cpp create mode 100644 lib/VMCore/Core.cpp create mode 100644 lib/VMCore/Dominators.cpp create mode 100644 lib/VMCore/Function.cpp create mode 100644 lib/VMCore/Globals.cpp create mode 100644 lib/VMCore/InlineAsm.cpp create mode 100644 lib/VMCore/Instruction.cpp create mode 100644 lib/VMCore/Instructions.cpp create mode 100644 lib/VMCore/IntrinsicInst.cpp create mode 100644 lib/VMCore/LeakDetector.cpp create mode 100644 lib/VMCore/Makefile create mode 100644 lib/VMCore/Mangler.cpp create mode 100644 lib/VMCore/Module.cpp create mode 100644 lib/VMCore/ModuleProvider.cpp create mode 100644 lib/VMCore/Pass.cpp create mode 100644 lib/VMCore/PassManager.cpp create mode 100644 lib/VMCore/PrintModulePass.cpp create mode 100644 lib/VMCore/SymbolTableListTraitsImpl.h create mode 100644 lib/VMCore/Type.cpp create mode 100644 lib/VMCore/TypeSymbolTable.cpp create mode 100644 lib/VMCore/Use.cpp create mode 100644 lib/VMCore/Value.cpp create mode 100644 lib/VMCore/ValueSymbolTable.cpp create mode 100644 lib/VMCore/ValueTypes.cpp create mode 100644 lib/VMCore/Verifier.cpp (limited to 'lib') diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp new file mode 100644 index 000000000000..c5523ec4634d --- /dev/null +++ b/lib/Analysis/AliasAnalysis.cpp @@ -0,0 +1,248 @@ +//===- AliasAnalysis.cpp - Generic Alias Analysis Interface Implementation -==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the generic AliasAnalysis interface which is used as the +// common interface used by all clients and implementations of alias analysis. +// +// This file also implements the default version of the AliasAnalysis interface +// that is to be used when no other implementation is specified. This does some +// simple tests that detect obvious cases: two different global pointers cannot +// alias, a global cannot alias a malloc, two different mallocs cannot alias, +// etc. +// +// This alias analysis implementation really isn't very good for anything, but +// it is very fast, and makes a nice clean default implementation. Because it +// handles lots of little corner cases, other, more complex, alias analysis +// implementations may choose to rely on this pass to resolve these simple and +// easy cases. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Pass.h" +#include "llvm/BasicBlock.h" +#include "llvm/Function.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Instructions.h" +#include "llvm/Type.h" +#include "llvm/Target/TargetData.h" +using namespace llvm; + +// Register the AliasAnalysis interface, providing a nice name to refer to. +static RegisterAnalysisGroup Z("Alias Analysis"); +char AliasAnalysis::ID = 0; + +//===----------------------------------------------------------------------===// +// Default chaining methods +//===----------------------------------------------------------------------===// + +AliasAnalysis::AliasResult +AliasAnalysis::alias(const Value *V1, unsigned V1Size, + const Value *V2, unsigned V2Size) { + assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!"); + return AA->alias(V1, V1Size, V2, V2Size); +} + +void AliasAnalysis::getMustAliases(Value *P, std::vector &RetVals) { + assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!"); + return AA->getMustAliases(P, RetVals); +} + +bool AliasAnalysis::pointsToConstantMemory(const Value *P) { + assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!"); + return AA->pointsToConstantMemory(P); +} + +bool AliasAnalysis::hasNoModRefInfoForCalls() const { + assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!"); + return AA->hasNoModRefInfoForCalls(); +} + +void AliasAnalysis::deleteValue(Value *V) { + assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!"); + AA->deleteValue(V); +} + +void AliasAnalysis::copyValue(Value *From, Value *To) { + assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!"); + AA->copyValue(From, To); +} + +AliasAnalysis::ModRefResult +AliasAnalysis::getModRefInfo(CallSite CS1, CallSite CS2) { + // FIXME: we can do better. + assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!"); + return AA->getModRefInfo(CS1, CS2); +} + + +//===----------------------------------------------------------------------===// +// AliasAnalysis non-virtual helper method implementation +//===----------------------------------------------------------------------===// + +AliasAnalysis::ModRefResult +AliasAnalysis::getModRefInfo(LoadInst *L, Value *P, unsigned Size) { + return alias(L->getOperand(0), TD->getTypeStoreSize(L->getType()), + P, Size) ? Ref : NoModRef; +} + +AliasAnalysis::ModRefResult +AliasAnalysis::getModRefInfo(StoreInst *S, Value *P, unsigned Size) { + // If the stored address cannot alias the pointer in question, then the + // pointer cannot be modified by the store. + if (!alias(S->getOperand(1), + TD->getTypeStoreSize(S->getOperand(0)->getType()), P, Size)) + return NoModRef; + + // If the pointer is a pointer to constant memory, then it could not have been + // modified by this store. + return pointsToConstantMemory(P) ? NoModRef : Mod; +} + +AliasAnalysis::ModRefBehavior +AliasAnalysis::getModRefBehavior(CallSite CS, + std::vector *Info) { + if (CS.doesNotAccessMemory()) + // Can't do better than this. + return DoesNotAccessMemory; + ModRefBehavior MRB = getModRefBehavior(CS.getCalledFunction(), Info); + if (MRB != DoesNotAccessMemory && CS.onlyReadsMemory()) + return OnlyReadsMemory; + return MRB; +} + +AliasAnalysis::ModRefBehavior +AliasAnalysis::getModRefBehavior(Function *F, + std::vector *Info) { + if (F) { + if (F->doesNotAccessMemory()) + // Can't do better than this. + return DoesNotAccessMemory; + if (F->onlyReadsMemory()) + return OnlyReadsMemory; + if (unsigned id = F->getIntrinsicID()) { +#define GET_INTRINSIC_MODREF_BEHAVIOR +#include "llvm/Intrinsics.gen" +#undef GET_INTRINSIC_MODREF_BEHAVIOR + } + } + return UnknownModRefBehavior; +} + +AliasAnalysis::ModRefResult +AliasAnalysis::getModRefInfo(CallSite CS, Value *P, unsigned Size) { + ModRefResult Mask = ModRef; + ModRefBehavior MRB = getModRefBehavior(CS); + if (MRB == DoesNotAccessMemory) + return NoModRef; + else if (MRB == OnlyReadsMemory) + Mask = Ref; + else if (MRB == AliasAnalysis::AccessesArguments) { + bool doesAlias = false; + for (CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end(); + AI != AE; ++AI) + if (alias(*AI, ~0U, P, Size) != NoAlias) { + doesAlias = true; + break; + } + + if (!doesAlias) + return NoModRef; + } + + if (!AA) return Mask; + + // If P points to a constant memory location, the call definitely could not + // modify the memory location. + if ((Mask & Mod) && AA->pointsToConstantMemory(P)) + Mask = ModRefResult(Mask & ~Mod); + + return ModRefResult(Mask & AA->getModRefInfo(CS, P, Size)); +} + +// AliasAnalysis destructor: DO NOT move this to the header file for +// AliasAnalysis or else clients of the AliasAnalysis class may not depend on +// the AliasAnalysis.o file in the current .a file, causing alias analysis +// support to not be included in the tool correctly! +// +AliasAnalysis::~AliasAnalysis() {} + +/// InitializeAliasAnalysis - Subclasses must call this method to initialize the +/// AliasAnalysis interface before any other methods are called. +/// +void AliasAnalysis::InitializeAliasAnalysis(Pass *P) { + TD = &P->getAnalysis(); + AA = &P->getAnalysis(); +} + +// getAnalysisUsage - All alias analysis implementations should invoke this +// directly (using AliasAnalysis::getAnalysisUsage(AU)) to make sure that +// TargetData is required by the pass. +void AliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); // All AA's need TargetData. + AU.addRequired(); // All AA's chain +} + +/// canBasicBlockModify - Return true if it is possible for execution of the +/// specified basic block to modify the value pointed to by Ptr. +/// +bool AliasAnalysis::canBasicBlockModify(const BasicBlock &BB, + const Value *Ptr, unsigned Size) { + return canInstructionRangeModify(BB.front(), BB.back(), Ptr, Size); +} + +/// canInstructionRangeModify - Return true if it is possible for the execution +/// of the specified instructions to modify the value pointed to by Ptr. The +/// instructions to consider are all of the instructions in the range of [I1,I2] +/// INCLUSIVE. I1 and I2 must be in the same basic block. +/// +bool AliasAnalysis::canInstructionRangeModify(const Instruction &I1, + const Instruction &I2, + const Value *Ptr, unsigned Size) { + assert(I1.getParent() == I2.getParent() && + "Instructions not in same basic block!"); + BasicBlock::iterator I = const_cast(&I1); + BasicBlock::iterator E = const_cast(&I2); + ++E; // Convert from inclusive to exclusive range. + + for (; I != E; ++I) // Check every instruction in range + if (getModRefInfo(I, const_cast(Ptr), Size) & Mod) + return true; + return false; +} + +/// isNoAliasCall - Return true if this pointer is returned by a noalias +/// function. +bool llvm::isNoAliasCall(const Value *V) { + if (isa(V) || isa(V)) + return CallSite(const_cast(cast(V))) + .paramHasAttr(0, Attribute::NoAlias); + return false; +} + +/// isIdentifiedObject - Return true if this pointer refers to a distinct and +/// identifiable object. This returns true for: +/// Global Variables and Functions +/// Allocas and Mallocs +/// ByVal and NoAlias Arguments +/// NoAlias returns +/// +bool llvm::isIdentifiedObject(const Value *V) { + if (isa(V) || isa(V) || isNoAliasCall(V)) + return true; + if (const Argument *A = dyn_cast(V)) + return A->hasNoAliasAttr() || A->hasByValAttr(); + return false; +} + +// Because of the way .a files work, we must force the BasicAA implementation to +// be pulled in if the AliasAnalysis classes are pulled in. Otherwise we run +// the risk of AliasAnalysis being used, but the default implementation not +// being linked into the tool that uses it. +DEFINING_FILE_FOR(AliasAnalysis) diff --git a/lib/Analysis/AliasAnalysisCounter.cpp b/lib/Analysis/AliasAnalysisCounter.cpp new file mode 100644 index 000000000000..4362d7d301a8 --- /dev/null +++ b/lib/Analysis/AliasAnalysisCounter.cpp @@ -0,0 +1,173 @@ +//===- AliasAnalysisCounter.cpp - Alias Analysis Query Counter ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a pass which can be used to count how many alias queries +// are being made and how the alias analysis implementation being used responds. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/Passes.h" +#include "llvm/Pass.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Streams.h" +using namespace llvm; + +static cl::opt +PrintAll("count-aa-print-all-queries", cl::ReallyHidden); +static cl::opt +PrintAllFailures("count-aa-print-all-failed-queries", cl::ReallyHidden); + +namespace { + class VISIBILITY_HIDDEN AliasAnalysisCounter + : public ModulePass, public AliasAnalysis { + unsigned No, May, Must; + unsigned NoMR, JustRef, JustMod, MR; + const char *Name; + Module *M; + public: + static char ID; // Class identification, replacement for typeinfo + AliasAnalysisCounter() : ModulePass(&ID) { + No = May = Must = 0; + NoMR = JustRef = JustMod = MR = 0; + } + + void printLine(const char *Desc, unsigned Val, unsigned Sum) { + cerr << " " << Val << " " << Desc << " responses (" + << Val*100/Sum << "%)\n"; + } + ~AliasAnalysisCounter() { + unsigned AASum = No+May+Must; + unsigned MRSum = NoMR+JustRef+JustMod+MR; + if (AASum + MRSum) { // Print a report if any counted queries occurred... + cerr << "\n===== Alias Analysis Counter Report =====\n" + << " Analysis counted: " << Name << "\n" + << " " << AASum << " Total Alias Queries Performed\n"; + if (AASum) { + printLine("no alias", No, AASum); + printLine("may alias", May, AASum); + printLine("must alias", Must, AASum); + cerr << " Alias Analysis Counter Summary: " << No*100/AASum << "%/" + << May*100/AASum << "%/" << Must*100/AASum<<"%\n\n"; + } + + cerr << " " << MRSum << " Total Mod/Ref Queries Performed\n"; + if (MRSum) { + printLine("no mod/ref", NoMR, MRSum); + printLine("ref", JustRef, MRSum); + printLine("mod", JustMod, MRSum); + printLine("mod/ref", MR, MRSum); + cerr << " Mod/Ref Analysis Counter Summary: " <M = &M; + InitializeAliasAnalysis(this); + Name = dynamic_cast(&getAnalysis())->getPassName(); + return false; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AliasAnalysis::getAnalysisUsage(AU); + AU.addRequired(); + AU.setPreservesAll(); + } + + // FIXME: We could count these too... + bool pointsToConstantMemory(const Value *P) { + return getAnalysis().pointsToConstantMemory(P); + } + bool doesNotAccessMemory(CallSite CS) { + return getAnalysis().doesNotAccessMemory(CS); + } + bool doesNotAccessMemory(Function *F) { + return getAnalysis().doesNotAccessMemory(F); + } + bool onlyReadsMemory(CallSite CS) { + return getAnalysis().onlyReadsMemory(CS); + } + bool onlyReadsMemory(Function *F) { + return getAnalysis().onlyReadsMemory(F); + } + + + // Forwarding functions: just delegate to a real AA implementation, counting + // the number of responses... + AliasResult alias(const Value *V1, unsigned V1Size, + const Value *V2, unsigned V2Size); + + ModRefResult getModRefInfo(CallSite CS, Value *P, unsigned Size); + ModRefResult getModRefInfo(CallSite CS1, CallSite CS2) { + return AliasAnalysis::getModRefInfo(CS1,CS2); + } + }; +} + +char AliasAnalysisCounter::ID = 0; +static RegisterPass +X("count-aa", "Count Alias Analysis Query Responses", false, true); +static RegisterAnalysisGroup Y(X); + +ModulePass *llvm::createAliasAnalysisCounterPass() { + return new AliasAnalysisCounter(); +} + +AliasAnalysis::AliasResult +AliasAnalysisCounter::alias(const Value *V1, unsigned V1Size, + const Value *V2, unsigned V2Size) { + AliasResult R = getAnalysis().alias(V1, V1Size, V2, V2Size); + + const char *AliasString; + switch (R) { + default: assert(0 && "Unknown alias type!"); + case NoAlias: No++; AliasString = "No alias"; break; + case MayAlias: May++; AliasString = "May alias"; break; + case MustAlias: Must++; AliasString = "Must alias"; break; + } + + if (PrintAll || (PrintAllFailures && R == MayAlias)) { + cerr << AliasString << ":\t"; + cerr << "[" << V1Size << "B] "; + WriteAsOperand(*cerr.stream(), V1, true, M); + cerr << ", "; + cerr << "[" << V2Size << "B] "; + WriteAsOperand(*cerr.stream(), V2, true, M); + cerr << "\n"; + } + + return R; +} + +AliasAnalysis::ModRefResult +AliasAnalysisCounter::getModRefInfo(CallSite CS, Value *P, unsigned Size) { + ModRefResult R = getAnalysis().getModRefInfo(CS, P, Size); + + const char *MRString; + switch (R) { + default: assert(0 && "Unknown mod/ref type!"); + case NoModRef: NoMR++; MRString = "NoModRef"; break; + case Ref: JustRef++; MRString = "JustRef"; break; + case Mod: JustMod++; MRString = "JustMod"; break; + case ModRef: MR++; MRString = "ModRef"; break; + } + + if (PrintAll || (PrintAllFailures && R == ModRef)) { + cerr << MRString << ": Ptr: "; + cerr << "[" << Size << "B] "; + WriteAsOperand(*cerr.stream(), P, true, M); + cerr << "\t<->" << *CS.getInstruction(); + } + return R; +} diff --git a/lib/Analysis/AliasAnalysisEvaluator.cpp b/lib/Analysis/AliasAnalysisEvaluator.cpp new file mode 100644 index 000000000000..07820e350681 --- /dev/null +++ b/lib/Analysis/AliasAnalysisEvaluator.cpp @@ -0,0 +1,246 @@ +//===- AliasAnalysisEvaluator.cpp - Alias Analysis Accuracy Evaluator -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a simple N^2 alias analysis accuracy evaluator. +// Basically, for each function in the program, it simply queries to see how the +// alias analysis implementation answers alias queries between each pair of +// pointers in the function. +// +// This is inspired and adapted from code by: Naveen Neelakantam, Francesco +// Spadini, and Wojciech Stryjewski. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/Instructions.h" +#include "llvm/Pass.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Support/InstIterator.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Streams.h" +#include +#include +using namespace llvm; + +static cl::opt PrintAll("print-all-alias-modref-info", cl::ReallyHidden); + +static cl::opt PrintNoAlias("print-no-aliases", cl::ReallyHidden); +static cl::opt PrintMayAlias("print-may-aliases", cl::ReallyHidden); +static cl::opt PrintMustAlias("print-must-aliases", cl::ReallyHidden); + +static cl::opt PrintNoModRef("print-no-modref", cl::ReallyHidden); +static cl::opt PrintMod("print-mod", cl::ReallyHidden); +static cl::opt PrintRef("print-ref", cl::ReallyHidden); +static cl::opt PrintModRef("print-modref", cl::ReallyHidden); + +namespace { + class VISIBILITY_HIDDEN AAEval : public FunctionPass { + unsigned NoAlias, MayAlias, MustAlias; + unsigned NoModRef, Mod, Ref, ModRef; + + public: + static char ID; // Pass identification, replacement for typeid + AAEval() : FunctionPass(&ID) {} + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + AU.setPreservesAll(); + } + + bool doInitialization(Module &M) { + NoAlias = MayAlias = MustAlias = 0; + NoModRef = Mod = Ref = ModRef = 0; + + if (PrintAll) { + PrintNoAlias = PrintMayAlias = PrintMustAlias = true; + PrintNoModRef = PrintMod = PrintRef = PrintModRef = true; + } + return false; + } + + bool runOnFunction(Function &F); + bool doFinalization(Module &M); + }; +} + +char AAEval::ID = 0; +static RegisterPass +X("aa-eval", "Exhaustive Alias Analysis Precision Evaluator", false, true); + +FunctionPass *llvm::createAAEvalPass() { return new AAEval(); } + +static void PrintResults(const char *Msg, bool P, const Value *V1, const Value *V2, + const Module *M) { + if (P) { + std::stringstream s1, s2; + WriteAsOperand(s1, V1, true, M); + WriteAsOperand(s2, V2, true, M); + std::string o1(s1.str()), o2(s2.str()); + if (o2 < o1) + std::swap(o1, o2); + cerr << " " << Msg << ":\t" + << o1 << ", " + << o2 << "\n"; + } +} + +static inline void +PrintModRefResults(const char *Msg, bool P, Instruction *I, Value *Ptr, + Module *M) { + if (P) { + cerr << " " << Msg << ": Ptr: "; + WriteAsOperand(*cerr.stream(), Ptr, true, M); + cerr << "\t<->" << *I; + } +} + +bool AAEval::runOnFunction(Function &F) { + AliasAnalysis &AA = getAnalysis(); + + const TargetData &TD = AA.getTargetData(); + + std::set Pointers; + std::set CallSites; + + for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) + if (isa(I->getType())) // Add all pointer arguments + Pointers.insert(I); + + for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) { + if (isa(I->getType())) // Add all pointer instructions + Pointers.insert(&*I); + Instruction &Inst = *I; + User::op_iterator OI = Inst.op_begin(); + CallSite CS = CallSite::get(&Inst); + if (CS.getInstruction() && + isa(CS.getCalledValue())) + ++OI; // Skip actual functions for direct function calls. + for (; OI != Inst.op_end(); ++OI) + if (isa((*OI)->getType()) && !isa(*OI)) + Pointers.insert(*OI); + + if (CS.getInstruction()) CallSites.insert(CS); + } + + if (PrintNoAlias || PrintMayAlias || PrintMustAlias || + PrintNoModRef || PrintMod || PrintRef || PrintModRef) + cerr << "Function: " << F.getName() << ": " << Pointers.size() + << " pointers, " << CallSites.size() << " call sites\n"; + + // iterate over the worklist, and run the full (n^2)/2 disambiguations + for (std::set::iterator I1 = Pointers.begin(), E = Pointers.end(); + I1 != E; ++I1) { + unsigned I1Size = 0; + const Type *I1ElTy = cast((*I1)->getType())->getElementType(); + if (I1ElTy->isSized()) I1Size = TD.getTypeStoreSize(I1ElTy); + + for (std::set::iterator I2 = Pointers.begin(); I2 != I1; ++I2) { + unsigned I2Size = 0; + const Type *I2ElTy =cast((*I2)->getType())->getElementType(); + if (I2ElTy->isSized()) I2Size = TD.getTypeStoreSize(I2ElTy); + + switch (AA.alias(*I1, I1Size, *I2, I2Size)) { + case AliasAnalysis::NoAlias: + PrintResults("NoAlias", PrintNoAlias, *I1, *I2, F.getParent()); + ++NoAlias; break; + case AliasAnalysis::MayAlias: + PrintResults("MayAlias", PrintMayAlias, *I1, *I2, F.getParent()); + ++MayAlias; break; + case AliasAnalysis::MustAlias: + PrintResults("MustAlias", PrintMustAlias, *I1, *I2, F.getParent()); + ++MustAlias; break; + default: + cerr << "Unknown alias query result!\n"; + } + } + } + + // Mod/ref alias analysis: compare all pairs of calls and values + for (std::set::iterator C = CallSites.begin(), + Ce = CallSites.end(); C != Ce; ++C) { + Instruction *I = C->getInstruction(); + + for (std::set::iterator V = Pointers.begin(), Ve = Pointers.end(); + V != Ve; ++V) { + unsigned Size = 0; + const Type *ElTy = cast((*V)->getType())->getElementType(); + if (ElTy->isSized()) Size = TD.getTypeStoreSize(ElTy); + + switch (AA.getModRefInfo(*C, *V, Size)) { + case AliasAnalysis::NoModRef: + PrintModRefResults("NoModRef", PrintNoModRef, I, *V, F.getParent()); + ++NoModRef; break; + case AliasAnalysis::Mod: + PrintModRefResults(" Mod", PrintMod, I, *V, F.getParent()); + ++Mod; break; + case AliasAnalysis::Ref: + PrintModRefResults(" Ref", PrintRef, I, *V, F.getParent()); + ++Ref; break; + case AliasAnalysis::ModRef: + PrintModRefResults(" ModRef", PrintModRef, I, *V, F.getParent()); + ++ModRef; break; + default: + cerr << "Unknown alias query result!\n"; + } + } + } + + return false; +} + +static void PrintPercent(unsigned Num, unsigned Sum) { + cerr << "(" << Num*100ULL/Sum << "." + << ((Num*1000ULL/Sum) % 10) << "%)\n"; +} + +bool AAEval::doFinalization(Module &M) { + unsigned AliasSum = NoAlias + MayAlias + MustAlias; + cerr << "===== Alias Analysis Evaluator Report =====\n"; + if (AliasSum == 0) { + cerr << " Alias Analysis Evaluator Summary: No pointers!\n"; + } else { + cerr << " " << AliasSum << " Total Alias Queries Performed\n"; + cerr << " " << NoAlias << " no alias responses "; + PrintPercent(NoAlias, AliasSum); + cerr << " " << MayAlias << " may alias responses "; + PrintPercent(MayAlias, AliasSum); + cerr << " " << MustAlias << " must alias responses "; + PrintPercent(MustAlias, AliasSum); + cerr << " Alias Analysis Evaluator Pointer Alias Summary: " + << NoAlias*100/AliasSum << "%/" << MayAlias*100/AliasSum << "%/" + << MustAlias*100/AliasSum << "%\n"; + } + + // Display the summary for mod/ref analysis + unsigned ModRefSum = NoModRef + Mod + Ref + ModRef; + if (ModRefSum == 0) { + cerr << " Alias Analysis Mod/Ref Evaluator Summary: no mod/ref!\n"; + } else { + cerr << " " << ModRefSum << " Total ModRef Queries Performed\n"; + cerr << " " << NoModRef << " no mod/ref responses "; + PrintPercent(NoModRef, ModRefSum); + cerr << " " << Mod << " mod responses "; + PrintPercent(Mod, ModRefSum); + cerr << " " << Ref << " ref responses "; + PrintPercent(Ref, ModRefSum); + cerr << " " << ModRef << " mod & ref responses "; + PrintPercent(ModRef, ModRefSum); + cerr << " Alias Analysis Evaluator Mod/Ref Summary: " + << NoModRef*100/ModRefSum << "%/" << Mod*100/ModRefSum << "%/" + << Ref*100/ModRefSum << "%/" << ModRef*100/ModRefSum << "%\n"; + } + + return false; +} diff --git a/lib/Analysis/AliasDebugger.cpp b/lib/Analysis/AliasDebugger.cpp new file mode 100644 index 000000000000..1e82621e0202 --- /dev/null +++ b/lib/Analysis/AliasDebugger.cpp @@ -0,0 +1,123 @@ +//===- AliasDebugger.cpp - Simple Alias Analysis Use Checker --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This simple pass checks alias analysis users to ensure that if they +// create a new value, they do not query AA without informing it of the value. +// It acts as a shim over any other AA pass you want. +// +// Yes keeping track of every value in the program is expensive, but this is +// a debugging pass. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/Passes.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/Instructions.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Support/Compiler.h" +#include +using namespace llvm; + +namespace { + + class VISIBILITY_HIDDEN AliasDebugger + : public ModulePass, public AliasAnalysis { + + //What we do is simple. Keep track of every value the AA could + //know about, and verify that queries are one of those. + //A query to a value that didn't exist when the AA was created + //means someone forgot to update the AA when creating new values + + std::set Vals; + + public: + static char ID; // Class identification, replacement for typeinfo + AliasDebugger() : ModulePass(&ID) {} + + bool runOnModule(Module &M) { + InitializeAliasAnalysis(this); // set up super class + + for(Module::global_iterator I = M.global_begin(), + E = M.global_end(); I != E; ++I) + Vals.insert(&*I); + + for(Module::iterator I = M.begin(), + E = M.end(); I != E; ++I){ + Vals.insert(&*I); + if(!I->isDeclaration()) { + for (Function::arg_iterator AI = I->arg_begin(), AE = I->arg_end(); + AI != AE; ++AI) + Vals.insert(&*AI); + for (Function::const_iterator FI = I->begin(), FE = I->end(); + FI != FE; ++FI) + for (BasicBlock::const_iterator BI = FI->begin(), BE = FI->end(); + BI != BE; ++BI) + Vals.insert(&*BI); + } + + } + return false; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AliasAnalysis::getAnalysisUsage(AU); + AU.setPreservesAll(); // Does not transform code + } + + //------------------------------------------------ + // Implement the AliasAnalysis API + // + AliasResult alias(const Value *V1, unsigned V1Size, + const Value *V2, unsigned V2Size) { + assert(Vals.find(V1) != Vals.end() && "Never seen value in AA before"); + assert(Vals.find(V2) != Vals.end() && "Never seen value in AA before"); + return AliasAnalysis::alias(V1, V1Size, V2, V2Size); + } + + ModRefResult getModRefInfo(CallSite CS, Value *P, unsigned Size) { + assert(Vals.find(P) != Vals.end() && "Never seen value in AA before"); + return AliasAnalysis::getModRefInfo(CS, P, Size); + } + + ModRefResult getModRefInfo(CallSite CS1, CallSite CS2) { + return AliasAnalysis::getModRefInfo(CS1,CS2); + } + + void getMustAliases(Value *P, std::vector &RetVals) { + assert(Vals.find(P) != Vals.end() && "Never seen value in AA before"); + return AliasAnalysis::getMustAliases(P, RetVals); + } + + bool pointsToConstantMemory(const Value *P) { + assert(Vals.find(P) != Vals.end() && "Never seen value in AA before"); + return AliasAnalysis::pointsToConstantMemory(P); + } + + virtual void deleteValue(Value *V) { + assert(Vals.find(V) != Vals.end() && "Never seen value in AA before"); + AliasAnalysis::deleteValue(V); + } + virtual void copyValue(Value *From, Value *To) { + Vals.insert(To); + AliasAnalysis::copyValue(From, To); + } + + }; +} + +char AliasDebugger::ID = 0; +static RegisterPass +X("debug-aa", "AA use debugger", false, true); +static RegisterAnalysisGroup Y(X); + +Pass *llvm::createAliasDebugger() { return new AliasDebugger(); } + diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp new file mode 100644 index 000000000000..18c2b66505f6 --- /dev/null +++ b/lib/Analysis/AliasSetTracker.cpp @@ -0,0 +1,608 @@ +//===- AliasSetTracker.cpp - Alias Sets Tracker implementation-------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the AliasSetTracker and AliasSet classes. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/AliasSetTracker.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Pass.h" +#include "llvm/Type.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/InstIterator.h" +#include "llvm/Support/Streams.h" +using namespace llvm; + +/// mergeSetIn - Merge the specified alias set into this alias set. +/// +void AliasSet::mergeSetIn(AliasSet &AS, AliasSetTracker &AST) { + assert(!AS.Forward && "Alias set is already forwarding!"); + assert(!Forward && "This set is a forwarding set!!"); + + // Update the alias and access types of this set... + AccessTy |= AS.AccessTy; + AliasTy |= AS.AliasTy; + + if (AliasTy == MustAlias) { + // Check that these two merged sets really are must aliases. Since both + // used to be must-alias sets, we can just check any pointer from each set + // for aliasing. + AliasAnalysis &AA = AST.getAliasAnalysis(); + PointerRec *L = getSomePointer(); + PointerRec *R = AS.getSomePointer(); + + // If the pointers are not a must-alias pair, this set becomes a may alias. + if (AA.alias(L->getValue(), L->getSize(), R->getValue(), R->getSize()) + != AliasAnalysis::MustAlias) + AliasTy = MayAlias; + } + + if (CallSites.empty()) { // Merge call sites... + if (!AS.CallSites.empty()) + std::swap(CallSites, AS.CallSites); + } else if (!AS.CallSites.empty()) { + CallSites.insert(CallSites.end(), AS.CallSites.begin(), AS.CallSites.end()); + AS.CallSites.clear(); + } + + AS.Forward = this; // Forward across AS now... + addRef(); // AS is now pointing to us... + + // Merge the list of constituent pointers... + if (AS.PtrList) { + *PtrListEnd = AS.PtrList; + AS.PtrList->setPrevInList(PtrListEnd); + PtrListEnd = AS.PtrListEnd; + + AS.PtrList = 0; + AS.PtrListEnd = &AS.PtrList; + assert(*AS.PtrListEnd == 0 && "End of list is not null?"); + } +} + +void AliasSetTracker::removeAliasSet(AliasSet *AS) { + if (AliasSet *Fwd = AS->Forward) { + Fwd->dropRef(*this); + AS->Forward = 0; + } + AliasSets.erase(AS); +} + +void AliasSet::removeFromTracker(AliasSetTracker &AST) { + assert(RefCount == 0 && "Cannot remove non-dead alias set from tracker!"); + AST.removeAliasSet(this); +} + +void AliasSet::addPointer(AliasSetTracker &AST, PointerRec &Entry, + unsigned Size, bool KnownMustAlias) { + assert(!Entry.hasAliasSet() && "Entry already in set!"); + + // Check to see if we have to downgrade to _may_ alias. + if (isMustAlias() && !KnownMustAlias) + if (PointerRec *P = getSomePointer()) { + AliasAnalysis &AA = AST.getAliasAnalysis(); + AliasAnalysis::AliasResult Result = + AA.alias(P->getValue(), P->getSize(), Entry.getValue(), Size); + if (Result == AliasAnalysis::MayAlias) + AliasTy = MayAlias; + else // First entry of must alias must have maximum size! + P->updateSize(Size); + assert(Result != AliasAnalysis::NoAlias && "Cannot be part of must set!"); + } + + Entry.setAliasSet(this); + Entry.updateSize(Size); + + // Add it to the end of the list... + assert(*PtrListEnd == 0 && "End of list is not null?"); + *PtrListEnd = &Entry; + PtrListEnd = Entry.setPrevInList(PtrListEnd); + assert(*PtrListEnd == 0 && "End of list is not null?"); + addRef(); // Entry points to alias set... +} + +void AliasSet::addCallSite(CallSite CS, AliasAnalysis &AA) { + CallSites.push_back(CS); + + AliasAnalysis::ModRefBehavior Behavior = AA.getModRefBehavior(CS); + if (Behavior == AliasAnalysis::DoesNotAccessMemory) + return; + else if (Behavior == AliasAnalysis::OnlyReadsMemory) { + AliasTy = MayAlias; + AccessTy |= Refs; + return; + } + + // FIXME: This should use mod/ref information to make this not suck so bad + AliasTy = MayAlias; + AccessTy = ModRef; +} + +/// aliasesPointer - Return true if the specified pointer "may" (or must) +/// alias one of the members in the set. +/// +bool AliasSet::aliasesPointer(const Value *Ptr, unsigned Size, + AliasAnalysis &AA) const { + if (AliasTy == MustAlias) { + assert(CallSites.empty() && "Illegal must alias set!"); + + // If this is a set of MustAliases, only check to see if the pointer aliases + // SOME value in the set... + PointerRec *SomePtr = getSomePointer(); + assert(SomePtr && "Empty must-alias set??"); + return AA.alias(SomePtr->getValue(), SomePtr->getSize(), Ptr, Size); + } + + // If this is a may-alias set, we have to check all of the pointers in the set + // to be sure it doesn't alias the set... + for (iterator I = begin(), E = end(); I != E; ++I) + if (AA.alias(Ptr, Size, I.getPointer(), I.getSize())) + return true; + + // Check the call sites list and invoke list... + if (!CallSites.empty()) { + if (AA.hasNoModRefInfoForCalls()) + return true; + + for (unsigned i = 0, e = CallSites.size(); i != e; ++i) + if (AA.getModRefInfo(CallSites[i], const_cast(Ptr), Size) + != AliasAnalysis::NoModRef) + return true; + } + + return false; +} + +bool AliasSet::aliasesCallSite(CallSite CS, AliasAnalysis &AA) const { + if (AA.doesNotAccessMemory(CS)) + return false; + + if (AA.hasNoModRefInfoForCalls()) + return true; + + for (unsigned i = 0, e = CallSites.size(); i != e; ++i) + if (AA.getModRefInfo(CallSites[i], CS) != AliasAnalysis::NoModRef || + AA.getModRefInfo(CS, CallSites[i]) != AliasAnalysis::NoModRef) + return true; + + for (iterator I = begin(), E = end(); I != E; ++I) + if (AA.getModRefInfo(CS, I.getPointer(), I.getSize()) != + AliasAnalysis::NoModRef) + return true; + + return false; +} + +void AliasSetTracker::clear() { + // Delete all the PointerRec entries. + for (DenseMap::iterator I = PointerMap.begin(), + E = PointerMap.end(); I != E; ++I) + I->second->eraseFromList(); + + PointerMap.clear(); + + // The alias sets should all be clear now. + AliasSets.clear(); +} + + +/// findAliasSetForPointer - Given a pointer, find the one alias set to put the +/// instruction referring to the pointer into. If there are multiple alias sets +/// that may alias the pointer, merge them together and return the unified set. +/// +AliasSet *AliasSetTracker::findAliasSetForPointer(const Value *Ptr, + unsigned Size) { + AliasSet *FoundSet = 0; + for (iterator I = begin(), E = end(); I != E; ++I) + if (!I->Forward && I->aliasesPointer(Ptr, Size, AA)) { + if (FoundSet == 0) { // If this is the first alias set ptr can go into. + FoundSet = I; // Remember it. + } else { // Otherwise, we must merge the sets. + FoundSet->mergeSetIn(*I, *this); // Merge in contents. + } + } + + return FoundSet; +} + +/// containsPointer - Return true if the specified location is represented by +/// this alias set, false otherwise. This does not modify the AST object or +/// alias sets. +bool AliasSetTracker::containsPointer(Value *Ptr, unsigned Size) const { + for (const_iterator I = begin(), E = end(); I != E; ++I) + if (!I->Forward && I->aliasesPointer(Ptr, Size, AA)) + return true; + return false; +} + + + +AliasSet *AliasSetTracker::findAliasSetForCallSite(CallSite CS) { + AliasSet *FoundSet = 0; + for (iterator I = begin(), E = end(); I != E; ++I) + if (!I->Forward && I->aliasesCallSite(CS, AA)) { + if (FoundSet == 0) { // If this is the first alias set ptr can go into. + FoundSet = I; // Remember it. + } else if (!I->Forward) { // Otherwise, we must merge the sets. + FoundSet->mergeSetIn(*I, *this); // Merge in contents. + } + } + + return FoundSet; +} + + + + +/// getAliasSetForPointer - Return the alias set that the specified pointer +/// lives in. +AliasSet &AliasSetTracker::getAliasSetForPointer(Value *Pointer, unsigned Size, + bool *New) { + AliasSet::PointerRec &Entry = getEntryFor(Pointer); + + // Check to see if the pointer is already known... + if (Entry.hasAliasSet()) { + Entry.updateSize(Size); + // Return the set! + return *Entry.getAliasSet(*this)->getForwardedTarget(*this); + } else if (AliasSet *AS = findAliasSetForPointer(Pointer, Size)) { + // Add it to the alias set it aliases... + AS->addPointer(*this, Entry, Size); + return *AS; + } else { + if (New) *New = true; + // Otherwise create a new alias set to hold the loaded pointer... + AliasSets.push_back(new AliasSet()); + AliasSets.back().addPointer(*this, Entry, Size); + return AliasSets.back(); + } +} + +bool AliasSetTracker::add(Value *Ptr, unsigned Size) { + bool NewPtr; + addPointer(Ptr, Size, AliasSet::NoModRef, NewPtr); + return NewPtr; +} + + +bool AliasSetTracker::add(LoadInst *LI) { + bool NewPtr; + AliasSet &AS = addPointer(LI->getOperand(0), + AA.getTargetData().getTypeStoreSize(LI->getType()), + AliasSet::Refs, NewPtr); + if (LI->isVolatile()) AS.setVolatile(); + return NewPtr; +} + +bool AliasSetTracker::add(StoreInst *SI) { + bool NewPtr; + Value *Val = SI->getOperand(0); + AliasSet &AS = addPointer(SI->getOperand(1), + AA.getTargetData().getTypeStoreSize(Val->getType()), + AliasSet::Mods, NewPtr); + if (SI->isVolatile()) AS.setVolatile(); + return NewPtr; +} + +bool AliasSetTracker::add(FreeInst *FI) { + bool NewPtr; + addPointer(FI->getOperand(0), ~0, AliasSet::Mods, NewPtr); + return NewPtr; +} + +bool AliasSetTracker::add(VAArgInst *VAAI) { + bool NewPtr; + addPointer(VAAI->getOperand(0), ~0, AliasSet::ModRef, NewPtr); + return NewPtr; +} + + +bool AliasSetTracker::add(CallSite CS) { + if (isa(CS.getInstruction())) + return true; // Ignore DbgInfo Intrinsics. + if (AA.doesNotAccessMemory(CS)) + return true; // doesn't alias anything + + AliasSet *AS = findAliasSetForCallSite(CS); + if (!AS) { + AliasSets.push_back(new AliasSet()); + AS = &AliasSets.back(); + AS->addCallSite(CS, AA); + return true; + } else { + AS->addCallSite(CS, AA); + return false; + } +} + +bool AliasSetTracker::add(Instruction *I) { + // Dispatch to one of the other add methods... + if (LoadInst *LI = dyn_cast(I)) + return add(LI); + else if (StoreInst *SI = dyn_cast(I)) + return add(SI); + else if (CallInst *CI = dyn_cast(I)) + return add(CI); + else if (InvokeInst *II = dyn_cast(I)) + return add(II); + else if (FreeInst *FI = dyn_cast(I)) + return add(FI); + else if (VAArgInst *VAAI = dyn_cast(I)) + return add(VAAI); + return true; +} + +void AliasSetTracker::add(BasicBlock &BB) { + for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I) + add(I); +} + +void AliasSetTracker::add(const AliasSetTracker &AST) { + assert(&AA == &AST.AA && + "Merging AliasSetTracker objects with different Alias Analyses!"); + + // Loop over all of the alias sets in AST, adding the pointers contained + // therein into the current alias sets. This can cause alias sets to be + // merged together in the current AST. + for (const_iterator I = AST.begin(), E = AST.end(); I != E; ++I) + if (!I->Forward) { // Ignore forwarding alias sets + AliasSet &AS = const_cast(*I); + + // If there are any call sites in the alias set, add them to this AST. + for (unsigned i = 0, e = AS.CallSites.size(); i != e; ++i) + add(AS.CallSites[i]); + + // Loop over all of the pointers in this alias set... + AliasSet::iterator I = AS.begin(), E = AS.end(); + bool X; + for (; I != E; ++I) { + AliasSet &NewAS = addPointer(I.getPointer(), I.getSize(), + (AliasSet::AccessType)AS.AccessTy, X); + if (AS.isVolatile()) NewAS.setVolatile(); + } + } +} + +/// remove - Remove the specified (potentially non-empty) alias set from the +/// tracker. +void AliasSetTracker::remove(AliasSet &AS) { + // Drop all call sites. + AS.CallSites.clear(); + + // Clear the alias set. + unsigned NumRefs = 0; + while (!AS.empty()) { + AliasSet::PointerRec *P = AS.PtrList; + + Value *ValToRemove = P->getValue(); + + // Unlink and delete entry from the list of values. + P->eraseFromList(); + + // Remember how many references need to be dropped. + ++NumRefs; + + // Finally, remove the entry. + PointerMap.erase(ValToRemove); + } + + // Stop using the alias set, removing it. + AS.RefCount -= NumRefs; + if (AS.RefCount == 0) + AS.removeFromTracker(*this); +} + +bool AliasSetTracker::remove(Value *Ptr, unsigned Size) { + AliasSet *AS = findAliasSetForPointer(Ptr, Size); + if (!AS) return false; + remove(*AS); + return true; +} + +bool AliasSetTracker::remove(LoadInst *LI) { + unsigned Size = AA.getTargetData().getTypeStoreSize(LI->getType()); + AliasSet *AS = findAliasSetForPointer(LI->getOperand(0), Size); + if (!AS) return false; + remove(*AS); + return true; +} + +bool AliasSetTracker::remove(StoreInst *SI) { + unsigned Size = + AA.getTargetData().getTypeStoreSize(SI->getOperand(0)->getType()); + AliasSet *AS = findAliasSetForPointer(SI->getOperand(1), Size); + if (!AS) return false; + remove(*AS); + return true; +} + +bool AliasSetTracker::remove(FreeInst *FI) { + AliasSet *AS = findAliasSetForPointer(FI->getOperand(0), ~0); + if (!AS) return false; + remove(*AS); + return true; +} + +bool AliasSetTracker::remove(VAArgInst *VAAI) { + AliasSet *AS = findAliasSetForPointer(VAAI->getOperand(0), ~0); + if (!AS) return false; + remove(*AS); + return true; +} + +bool AliasSetTracker::remove(CallSite CS) { + if (AA.doesNotAccessMemory(CS)) + return false; // doesn't alias anything + + AliasSet *AS = findAliasSetForCallSite(CS); + if (!AS) return false; + remove(*AS); + return true; +} + +bool AliasSetTracker::remove(Instruction *I) { + // Dispatch to one of the other remove methods... + if (LoadInst *LI = dyn_cast(I)) + return remove(LI); + else if (StoreInst *SI = dyn_cast(I)) + return remove(SI); + else if (CallInst *CI = dyn_cast(I)) + return remove(CI); + else if (FreeInst *FI = dyn_cast(I)) + return remove(FI); + else if (VAArgInst *VAAI = dyn_cast(I)) + return remove(VAAI); + return true; +} + + +// deleteValue method - This method is used to remove a pointer value from the +// AliasSetTracker entirely. It should be used when an instruction is deleted +// from the program to update the AST. If you don't use this, you would have +// dangling pointers to deleted instructions. +// +void AliasSetTracker::deleteValue(Value *PtrVal) { + // Notify the alias analysis implementation that this value is gone. + AA.deleteValue(PtrVal); + + // If this is a call instruction, remove the callsite from the appropriate + // AliasSet. + CallSite CS = CallSite::get(PtrVal); + if (CS.getInstruction()) + if (!AA.doesNotAccessMemory(CS)) + if (AliasSet *AS = findAliasSetForCallSite(CS)) + AS->removeCallSite(CS); + + // First, look up the PointerRec for this pointer. + DenseMap::iterator I = PointerMap.find(PtrVal); + if (I == PointerMap.end()) return; // Noop + + // If we found one, remove the pointer from the alias set it is in. + AliasSet::PointerRec *PtrValEnt = I->second; + AliasSet *AS = PtrValEnt->getAliasSet(*this); + + // Unlink and delete from the list of values. + PtrValEnt->eraseFromList(); + + // Stop using the alias set. + AS->dropRef(*this); + + PointerMap.erase(I); +} + +// copyValue - This method should be used whenever a preexisting value in the +// program is copied or cloned, introducing a new value. Note that it is ok for +// clients that use this method to introduce the same value multiple times: if +// the tracker already knows about a value, it will ignore the request. +// +void AliasSetTracker::copyValue(Value *From, Value *To) { + // Notify the alias analysis implementation that this value is copied. + AA.copyValue(From, To); + + // First, look up the PointerRec for this pointer. + DenseMap::iterator I = PointerMap.find(From); + if (I == PointerMap.end()) + return; // Noop + assert(I->second->hasAliasSet() && "Dead entry?"); + + AliasSet::PointerRec &Entry = getEntryFor(To); + if (Entry.hasAliasSet()) return; // Already in the tracker! + + // Add it to the alias set it aliases... + I = PointerMap.find(From); + AliasSet *AS = I->second->getAliasSet(*this); + AS->addPointer(*this, Entry, I->second->getSize(), true); +} + + + +//===----------------------------------------------------------------------===// +// AliasSet/AliasSetTracker Printing Support +//===----------------------------------------------------------------------===// + +void AliasSet::print(std::ostream &OS) const { + OS << " AliasSet[" << (void*)this << "," << RefCount << "] "; + OS << (AliasTy == MustAlias ? "must" : "may") << " alias, "; + switch (AccessTy) { + case NoModRef: OS << "No access "; break; + case Refs : OS << "Ref "; break; + case Mods : OS << "Mod "; break; + case ModRef : OS << "Mod/Ref "; break; + default: assert(0 && "Bad value for AccessTy!"); + } + if (isVolatile()) OS << "[volatile] "; + if (Forward) + OS << " forwarding to " << (void*)Forward; + + + if (!empty()) { + OS << "Pointers: "; + for (iterator I = begin(), E = end(); I != E; ++I) { + if (I != begin()) OS << ", "; + WriteAsOperand(OS << "(", I.getPointer()); + OS << ", " << I.getSize() << ")"; + } + } + if (!CallSites.empty()) { + OS << "\n " << CallSites.size() << " Call Sites: "; + for (unsigned i = 0, e = CallSites.size(); i != e; ++i) { + if (i) OS << ", "; + WriteAsOperand(OS, CallSites[i].getCalledValue()); + } + } + OS << "\n"; +} + +void AliasSetTracker::print(std::ostream &OS) const { + OS << "Alias Set Tracker: " << AliasSets.size() << " alias sets for " + << PointerMap.size() << " pointer values.\n"; + for (const_iterator I = begin(), E = end(); I != E; ++I) + I->print(OS); + OS << "\n"; +} + +void AliasSet::dump() const { print (cerr); } +void AliasSetTracker::dump() const { print(cerr); } + +//===----------------------------------------------------------------------===// +// AliasSetPrinter Pass +//===----------------------------------------------------------------------===// + +namespace { + class VISIBILITY_HIDDEN AliasSetPrinter : public FunctionPass { + AliasSetTracker *Tracker; + public: + static char ID; // Pass identification, replacement for typeid + AliasSetPrinter() : FunctionPass(&ID) {} + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired(); + } + + virtual bool runOnFunction(Function &F) { + Tracker = new AliasSetTracker(getAnalysis()); + + for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) + Tracker->add(&*I); + Tracker->print(cerr); + delete Tracker; + return false; + } + }; +} + +char AliasSetPrinter::ID = 0; +static RegisterPass +X("print-alias-sets", "Alias Set Printer", false, true); diff --git a/lib/Analysis/Analysis.cpp b/lib/Analysis/Analysis.cpp new file mode 100644 index 000000000000..493c6e88b3f8 --- /dev/null +++ b/lib/Analysis/Analysis.cpp @@ -0,0 +1,44 @@ +//===-- Analysis.cpp ------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm-c/Analysis.h" +#include "llvm/Analysis/Verifier.h" +#include +#include + +using namespace llvm; + +int LLVMVerifyModule(LLVMModuleRef M, LLVMVerifierFailureAction Action, + char **OutMessages) { + std::string Messages; + + int Result = verifyModule(*unwrap(M), + static_cast(Action), + OutMessages? &Messages : 0); + + if (OutMessages) + *OutMessages = strdup(Messages.c_str()); + + return Result; +} + +int LLVMVerifyFunction(LLVMValueRef Fn, LLVMVerifierFailureAction Action) { + return verifyFunction(*unwrap(Fn), + static_cast(Action)); +} + +void LLVMViewFunctionCFG(LLVMValueRef Fn) { + Function *F = unwrap(Fn); + F->viewCFG(); +} + +void LLVMViewFunctionCFGOnly(LLVMValueRef Fn) { + Function *F = unwrap(Fn); + F->viewCFGOnly(); +} diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp new file mode 100644 index 000000000000..d0620456399b --- /dev/null +++ b/lib/Analysis/BasicAliasAnalysis.cpp @@ -0,0 +1,838 @@ +//===- BasicAliasAnalysis.cpp - Local Alias Analysis Impl -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the default implementation of the Alias Analysis interface +// that simply implements a few identities (two different globals cannot alias, +// etc), but otherwise does no analysis. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/GlobalVariable.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Pass.h" +#include "llvm/Target/TargetData.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/GetElementPtrTypeIterator.h" +#include "llvm/Support/ManagedStatic.h" +#include +using namespace llvm; + +//===----------------------------------------------------------------------===// +// Useful predicates +//===----------------------------------------------------------------------===// + +static const User *isGEP(const Value *V) { + if (isa(V) || + (isa(V) && + cast(V)->getOpcode() == Instruction::GetElementPtr)) + return cast(V); + return 0; +} + +static const Value *GetGEPOperands(const Value *V, + SmallVector &GEPOps) { + assert(GEPOps.empty() && "Expect empty list to populate!"); + GEPOps.insert(GEPOps.end(), cast(V)->op_begin()+1, + cast(V)->op_end()); + + // Accumulate all of the chained indexes into the operand array + V = cast(V)->getOperand(0); + + while (const User *G = isGEP(V)) { + if (!isa(GEPOps[0]) || isa(GEPOps[0]) || + !cast(GEPOps[0])->isNullValue()) + break; // Don't handle folding arbitrary pointer offsets yet... + GEPOps.erase(GEPOps.begin()); // Drop the zero index + GEPOps.insert(GEPOps.begin(), G->op_begin()+1, G->op_end()); + V = G->getOperand(0); + } + return V; +} + +/// isKnownNonNull - Return true if we know that the specified value is never +/// null. +static bool isKnownNonNull(const Value *V) { + // Alloca never returns null, malloc might. + if (isa(V)) return true; + + // A byval argument is never null. + if (const Argument *A = dyn_cast(V)) + return A->hasByValAttr(); + + // Global values are not null unless extern weak. + if (const GlobalValue *GV = dyn_cast(V)) + return !GV->hasExternalWeakLinkage(); + return false; +} + +/// isNonEscapingLocalObject - Return true if the pointer is to a function-local +/// object that never escapes from the function. +static bool isNonEscapingLocalObject(const Value *V) { + // If this is a local allocation, check to see if it escapes. + if (isa(V) || isNoAliasCall(V)) + return !PointerMayBeCaptured(V, false); + + // If this is an argument that corresponds to a byval or noalias argument, + // then it has not escaped before entering the function. Check if it escapes + // inside the function. + if (const Argument *A = dyn_cast(V)) + if (A->hasByValAttr() || A->hasNoAliasAttr()) { + // Don't bother analyzing arguments already known not to escape. + if (A->hasNoCaptureAttr()) + return true; + return !PointerMayBeCaptured(V, false); + } + return false; +} + + +/// isObjectSmallerThan - Return true if we can prove that the object specified +/// by V is smaller than Size. +static bool isObjectSmallerThan(const Value *V, unsigned Size, + const TargetData &TD) { + const Type *AccessTy; + if (const GlobalVariable *GV = dyn_cast(V)) { + AccessTy = GV->getType()->getElementType(); + } else if (const AllocationInst *AI = dyn_cast(V)) { + if (!AI->isArrayAllocation()) + AccessTy = AI->getType()->getElementType(); + else + return false; + } else if (const Argument *A = dyn_cast(V)) { + if (A->hasByValAttr()) + AccessTy = cast(A->getType())->getElementType(); + else + return false; + } else { + return false; + } + + if (AccessTy->isSized()) + return TD.getTypeAllocSize(AccessTy) < Size; + return false; +} + +//===----------------------------------------------------------------------===// +// NoAA Pass +//===----------------------------------------------------------------------===// + +namespace { + /// NoAA - This class implements the -no-aa pass, which always returns "I + /// don't know" for alias queries. NoAA is unlike other alias analysis + /// implementations, in that it does not chain to a previous analysis. As + /// such it doesn't follow many of the rules that other alias analyses must. + /// + struct VISIBILITY_HIDDEN NoAA : public ImmutablePass, public AliasAnalysis { + static char ID; // Class identification, replacement for typeinfo + NoAA() : ImmutablePass(&ID) {} + explicit NoAA(void *PID) : ImmutablePass(PID) { } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + } + + virtual void initializePass() { + TD = &getAnalysis(); + } + + virtual AliasResult alias(const Value *V1, unsigned V1Size, + const Value *V2, unsigned V2Size) { + return MayAlias; + } + + virtual void getArgumentAccesses(Function *F, CallSite CS, + std::vector &Info) { + assert(0 && "This method may not be called on this function!"); + } + + virtual void getMustAliases(Value *P, std::vector &RetVals) { } + virtual bool pointsToConstantMemory(const Value *P) { return false; } + virtual ModRefResult getModRefInfo(CallSite CS, Value *P, unsigned Size) { + return ModRef; + } + virtual ModRefResult getModRefInfo(CallSite CS1, CallSite CS2) { + return ModRef; + } + virtual bool hasNoModRefInfoForCalls() const { return true; } + + virtual void deleteValue(Value *V) {} + virtual void copyValue(Value *From, Value *To) {} + }; +} // End of anonymous namespace + +// Register this pass... +char NoAA::ID = 0; +static RegisterPass +U("no-aa", "No Alias Analysis (always returns 'may' alias)", true, true); + +// Declare that we implement the AliasAnalysis interface +static RegisterAnalysisGroup V(U); + +ImmutablePass *llvm::createNoAAPass() { return new NoAA(); } + +//===----------------------------------------------------------------------===// +// BasicAA Pass +//===----------------------------------------------------------------------===// + +namespace { + /// BasicAliasAnalysis - This is the default alias analysis implementation. + /// Because it doesn't chain to a previous alias analysis (like -no-aa), it + /// derives from the NoAA class. + struct VISIBILITY_HIDDEN BasicAliasAnalysis : public NoAA { + static char ID; // Class identification, replacement for typeinfo + BasicAliasAnalysis() : NoAA(&ID) {} + AliasResult alias(const Value *V1, unsigned V1Size, + const Value *V2, unsigned V2Size); + + ModRefResult getModRefInfo(CallSite CS, Value *P, unsigned Size); + ModRefResult getModRefInfo(CallSite CS1, CallSite CS2); + + /// hasNoModRefInfoForCalls - We can provide mod/ref information against + /// non-escaping allocations. + virtual bool hasNoModRefInfoForCalls() const { return false; } + + /// pointsToConstantMemory - Chase pointers until we find a (constant + /// global) or not. + bool pointsToConstantMemory(const Value *P); + + private: + // CheckGEPInstructions - Check two GEP instructions with known + // must-aliasing base pointers. This checks to see if the index expressions + // preclude the pointers from aliasing... + AliasResult + CheckGEPInstructions(const Type* BasePtr1Ty, + Value **GEP1Ops, unsigned NumGEP1Ops, unsigned G1Size, + const Type *BasePtr2Ty, + Value **GEP2Ops, unsigned NumGEP2Ops, unsigned G2Size); + }; +} // End of anonymous namespace + +// Register this pass... +char BasicAliasAnalysis::ID = 0; +static RegisterPass +X("basicaa", "Basic Alias Analysis (default AA impl)", false, true); + +// Declare that we implement the AliasAnalysis interface +static RegisterAnalysisGroup Y(X); + +ImmutablePass *llvm::createBasicAliasAnalysisPass() { + return new BasicAliasAnalysis(); +} + + +/// pointsToConstantMemory - Chase pointers until we find a (constant +/// global) or not. +bool BasicAliasAnalysis::pointsToConstantMemory(const Value *P) { + if (const GlobalVariable *GV = + dyn_cast(P->getUnderlyingObject())) + return GV->isConstant(); + return false; +} + + +// getModRefInfo - Check to see if the specified callsite can clobber the +// specified memory object. Since we only look at local properties of this +// function, we really can't say much about this query. We do, however, use +// simple "address taken" analysis on local objects. +// +AliasAnalysis::ModRefResult +BasicAliasAnalysis::getModRefInfo(CallSite CS, Value *P, unsigned Size) { + if (!isa(P)) { + const Value *Object = P->getUnderlyingObject(); + + // If this is a tail call and P points to a stack location, we know that + // the tail call cannot access or modify the local stack. + // We cannot exclude byval arguments here; these belong to the caller of + // the current function not to the current function, and a tail callee + // may reference them. + if (isa(Object)) + if (CallInst *CI = dyn_cast(CS.getInstruction())) + if (CI->isTailCall()) + return NoModRef; + + // If the pointer is to a locally allocated object that does not escape, + // then the call can not mod/ref the pointer unless the call takes the + // argument without capturing it. + if (isNonEscapingLocalObject(Object) && CS.getInstruction() != Object) { + bool passedAsArg = false; + // TODO: Eventually only check 'nocapture' arguments. + for (CallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end(); + CI != CE; ++CI) + if (isa((*CI)->getType()) && + alias(cast(CI), ~0U, P, ~0U) != NoAlias) + passedAsArg = true; + + if (!passedAsArg) + return NoModRef; + } + } + + // The AliasAnalysis base class has some smarts, lets use them. + return AliasAnalysis::getModRefInfo(CS, P, Size); +} + + +AliasAnalysis::ModRefResult +BasicAliasAnalysis::getModRefInfo(CallSite CS1, CallSite CS2) { + // If CS1 or CS2 are readnone, they don't interact. + ModRefBehavior CS1B = AliasAnalysis::getModRefBehavior(CS1); + if (CS1B == DoesNotAccessMemory) return NoModRef; + + ModRefBehavior CS2B = AliasAnalysis::getModRefBehavior(CS2); + if (CS2B == DoesNotAccessMemory) return NoModRef; + + // If they both only read from memory, just return ref. + if (CS1B == OnlyReadsMemory && CS2B == OnlyReadsMemory) + return Ref; + + // Otherwise, fall back to NoAA (mod+ref). + return NoAA::getModRefInfo(CS1, CS2); +} + + +// alias - Provide a bunch of ad-hoc rules to disambiguate in common cases, such +// as array references. +// +AliasAnalysis::AliasResult +BasicAliasAnalysis::alias(const Value *V1, unsigned V1Size, + const Value *V2, unsigned V2Size) { + // Strip off any constant expression casts if they exist + if (const ConstantExpr *CE = dyn_cast(V1)) + if (CE->isCast() && isa(CE->getOperand(0)->getType())) + V1 = CE->getOperand(0); + if (const ConstantExpr *CE = dyn_cast(V2)) + if (CE->isCast() && isa(CE->getOperand(0)->getType())) + V2 = CE->getOperand(0); + + // Are we checking for alias of the same value? + if (V1 == V2) return MustAlias; + + if (!isa(V1->getType()) || !isa(V2->getType())) + return NoAlias; // Scalars cannot alias each other + + // Strip off cast instructions. Since V1 and V2 are pointers, they must be + // pointer<->pointer bitcasts. + if (const BitCastInst *I = dyn_cast(V1)) + return alias(I->getOperand(0), V1Size, V2, V2Size); + if (const BitCastInst *I = dyn_cast(V2)) + return alias(V1, V1Size, I->getOperand(0), V2Size); + + // Figure out what objects these things are pointing to if we can. + const Value *O1 = V1->getUnderlyingObject(); + const Value *O2 = V2->getUnderlyingObject(); + + if (O1 != O2) { + // If V1/V2 point to two different objects we know that we have no alias. + if (isIdentifiedObject(O1) && isIdentifiedObject(O2)) + return NoAlias; + + // Arguments can't alias with local allocations or noalias calls. + if ((isa(O1) && (isa(O2) || isNoAliasCall(O2))) || + (isa(O2) && (isa(O1) || isNoAliasCall(O1)))) + return NoAlias; + + // Most objects can't alias null. + if ((isa(V2) && isKnownNonNull(O1)) || + (isa(V1) && isKnownNonNull(O2))) + return NoAlias; + } + + // If the size of one access is larger than the entire object on the other + // side, then we know such behavior is undefined and can assume no alias. + const TargetData &TD = getTargetData(); + if ((V1Size != ~0U && isObjectSmallerThan(O2, V1Size, TD)) || + (V2Size != ~0U && isObjectSmallerThan(O1, V2Size, TD))) + return NoAlias; + + // If one pointer is the result of a call/invoke and the other is a + // non-escaping local object, then we know the object couldn't escape to a + // point where the call could return it. + if ((isa(O1) || isa(O1)) && + isNonEscapingLocalObject(O2) && O1 != O2) + return NoAlias; + if ((isa(O2) || isa(O2)) && + isNonEscapingLocalObject(O1) && O1 != O2) + return NoAlias; + + // If we have two gep instructions with must-alias'ing base pointers, figure + // out if the indexes to the GEP tell us anything about the derived pointer. + // Note that we also handle chains of getelementptr instructions as well as + // constant expression getelementptrs here. + // + if (isGEP(V1) && isGEP(V2)) { + const User *GEP1 = cast(V1); + const User *GEP2 = cast(V2); + + // If V1 and V2 are identical GEPs, just recurse down on both of them. + // This allows us to analyze things like: + // P = gep A, 0, i, 1 + // Q = gep B, 0, i, 1 + // by just analyzing A and B. This is even safe for variable indices. + if (GEP1->getType() == GEP2->getType() && + GEP1->getNumOperands() == GEP2->getNumOperands() && + GEP1->getOperand(0)->getType() == GEP2->getOperand(0)->getType() && + // All operands are the same, ignoring the base. + std::equal(GEP1->op_begin()+1, GEP1->op_end(), GEP2->op_begin()+1)) + return alias(GEP1->getOperand(0), V1Size, GEP2->getOperand(0), V2Size); + + + // Drill down into the first non-gep value, to test for must-aliasing of + // the base pointers. + while (isGEP(GEP1->getOperand(0)) && + GEP1->getOperand(1) == + Constant::getNullValue(GEP1->getOperand(1)->getType())) + GEP1 = cast(GEP1->getOperand(0)); + const Value *BasePtr1 = GEP1->getOperand(0); + + while (isGEP(GEP2->getOperand(0)) && + GEP2->getOperand(1) == + Constant::getNullValue(GEP2->getOperand(1)->getType())) + GEP2 = cast(GEP2->getOperand(0)); + const Value *BasePtr2 = GEP2->getOperand(0); + + // Do the base pointers alias? + AliasResult BaseAlias = alias(BasePtr1, ~0U, BasePtr2, ~0U); + if (BaseAlias == NoAlias) return NoAlias; + if (BaseAlias == MustAlias) { + // If the base pointers alias each other exactly, check to see if we can + // figure out anything about the resultant pointers, to try to prove + // non-aliasing. + + // Collect all of the chained GEP operands together into one simple place + SmallVector GEP1Ops, GEP2Ops; + BasePtr1 = GetGEPOperands(V1, GEP1Ops); + BasePtr2 = GetGEPOperands(V2, GEP2Ops); + + // If GetGEPOperands were able to fold to the same must-aliased pointer, + // do the comparison. + if (BasePtr1 == BasePtr2) { + AliasResult GAlias = + CheckGEPInstructions(BasePtr1->getType(), + &GEP1Ops[0], GEP1Ops.size(), V1Size, + BasePtr2->getType(), + &GEP2Ops[0], GEP2Ops.size(), V2Size); + if (GAlias != MayAlias) + return GAlias; + } + } + } + + // Check to see if these two pointers are related by a getelementptr + // instruction. If one pointer is a GEP with a non-zero index of the other + // pointer, we know they cannot alias. + // + if (isGEP(V2)) { + std::swap(V1, V2); + std::swap(V1Size, V2Size); + } + + if (V1Size != ~0U && V2Size != ~0U) + if (isGEP(V1)) { + SmallVector GEPOperands; + const Value *BasePtr = GetGEPOperands(V1, GEPOperands); + + AliasResult R = alias(BasePtr, V1Size, V2, V2Size); + if (R == MustAlias) { + // If there is at least one non-zero constant index, we know they cannot + // alias. + bool ConstantFound = false; + bool AllZerosFound = true; + for (unsigned i = 0, e = GEPOperands.size(); i != e; ++i) + if (const Constant *C = dyn_cast(GEPOperands[i])) { + if (!C->isNullValue()) { + ConstantFound = true; + AllZerosFound = false; + break; + } + } else { + AllZerosFound = false; + } + + // If we have getelementptr , 0, 0, 0, 0, ... and V2 must aliases + // the ptr, the end result is a must alias also. + if (AllZerosFound) + return MustAlias; + + if (ConstantFound) { + if (V2Size <= 1 && V1Size <= 1) // Just pointer check? + return NoAlias; + + // Otherwise we have to check to see that the distance is more than + // the size of the argument... build an index vector that is equal to + // the arguments provided, except substitute 0's for any variable + // indexes we find... + if (cast( + BasePtr->getType())->getElementType()->isSized()) { + for (unsigned i = 0; i != GEPOperands.size(); ++i) + if (!isa(GEPOperands[i])) + GEPOperands[i] = + Constant::getNullValue(GEPOperands[i]->getType()); + int64_t Offset = + getTargetData().getIndexedOffset(BasePtr->getType(), + &GEPOperands[0], + GEPOperands.size()); + + if (Offset >= (int64_t)V2Size || Offset <= -(int64_t)V1Size) + return NoAlias; + } + } + } + } + + return MayAlias; +} + +// This function is used to determine if the indices of two GEP instructions are +// equal. V1 and V2 are the indices. +static bool IndexOperandsEqual(Value *V1, Value *V2) { + if (V1->getType() == V2->getType()) + return V1 == V2; + if (Constant *C1 = dyn_cast(V1)) + if (Constant *C2 = dyn_cast(V2)) { + // Sign extend the constants to long types, if necessary + if (C1->getType() != Type::Int64Ty) + C1 = ConstantExpr::getSExt(C1, Type::Int64Ty); + if (C2->getType() != Type::Int64Ty) + C2 = ConstantExpr::getSExt(C2, Type::Int64Ty); + return C1 == C2; + } + return false; +} + +/// CheckGEPInstructions - Check two GEP instructions with known must-aliasing +/// base pointers. This checks to see if the index expressions preclude the +/// pointers from aliasing... +AliasAnalysis::AliasResult +BasicAliasAnalysis::CheckGEPInstructions( + const Type* BasePtr1Ty, Value **GEP1Ops, unsigned NumGEP1Ops, unsigned G1S, + const Type *BasePtr2Ty, Value **GEP2Ops, unsigned NumGEP2Ops, unsigned G2S) { + // We currently can't handle the case when the base pointers have different + // primitive types. Since this is uncommon anyway, we are happy being + // extremely conservative. + if (BasePtr1Ty != BasePtr2Ty) + return MayAlias; + + const PointerType *GEPPointerTy = cast(BasePtr1Ty); + + // Find the (possibly empty) initial sequence of equal values... which are not + // necessarily constants. + unsigned NumGEP1Operands = NumGEP1Ops, NumGEP2Operands = NumGEP2Ops; + unsigned MinOperands = std::min(NumGEP1Operands, NumGEP2Operands); + unsigned MaxOperands = std::max(NumGEP1Operands, NumGEP2Operands); + unsigned UnequalOper = 0; + while (UnequalOper != MinOperands && + IndexOperandsEqual(GEP1Ops[UnequalOper], GEP2Ops[UnequalOper])) { + // Advance through the type as we go... + ++UnequalOper; + if (const CompositeType *CT = dyn_cast(BasePtr1Ty)) + BasePtr1Ty = CT->getTypeAtIndex(GEP1Ops[UnequalOper-1]); + else { + // If all operands equal each other, then the derived pointers must + // alias each other... + BasePtr1Ty = 0; + assert(UnequalOper == NumGEP1Operands && UnequalOper == NumGEP2Operands && + "Ran out of type nesting, but not out of operands?"); + return MustAlias; + } + } + + // If we have seen all constant operands, and run out of indexes on one of the + // getelementptrs, check to see if the tail of the leftover one is all zeros. + // If so, return mustalias. + if (UnequalOper == MinOperands) { + if (NumGEP1Ops < NumGEP2Ops) { + std::swap(GEP1Ops, GEP2Ops); + std::swap(NumGEP1Ops, NumGEP2Ops); + } + + bool AllAreZeros = true; + for (unsigned i = UnequalOper; i != MaxOperands; ++i) + if (!isa(GEP1Ops[i]) || + !cast(GEP1Ops[i])->isNullValue()) { + AllAreZeros = false; + break; + } + if (AllAreZeros) return MustAlias; + } + + + // So now we know that the indexes derived from the base pointers, + // which are known to alias, are different. We can still determine a + // no-alias result if there are differing constant pairs in the index + // chain. For example: + // A[i][0] != A[j][1] iff (&A[0][1]-&A[0][0] >= std::max(G1S, G2S)) + // + // We have to be careful here about array accesses. In particular, consider: + // A[1][0] vs A[0][i] + // In this case, we don't *know* that the array will be accessed in bounds: + // the index could even be negative. Because of this, we have to + // conservatively *give up* and return may alias. We disregard differing + // array subscripts that are followed by a variable index without going + // through a struct. + // + unsigned SizeMax = std::max(G1S, G2S); + if (SizeMax == ~0U) return MayAlias; // Avoid frivolous work. + + // Scan for the first operand that is constant and unequal in the + // two getelementptrs... + unsigned FirstConstantOper = UnequalOper; + for (; FirstConstantOper != MinOperands; ++FirstConstantOper) { + const Value *G1Oper = GEP1Ops[FirstConstantOper]; + const Value *G2Oper = GEP2Ops[FirstConstantOper]; + + if (G1Oper != G2Oper) // Found non-equal constant indexes... + if (Constant *G1OC = dyn_cast(const_cast(G1Oper))) + if (Constant *G2OC = dyn_cast(const_cast(G2Oper))){ + if (G1OC->getType() != G2OC->getType()) { + // Sign extend both operands to long. + if (G1OC->getType() != Type::Int64Ty) + G1OC = ConstantExpr::getSExt(G1OC, Type::Int64Ty); + if (G2OC->getType() != Type::Int64Ty) + G2OC = ConstantExpr::getSExt(G2OC, Type::Int64Ty); + GEP1Ops[FirstConstantOper] = G1OC; + GEP2Ops[FirstConstantOper] = G2OC; + } + + if (G1OC != G2OC) { + // Handle the "be careful" case above: if this is an array/vector + // subscript, scan for a subsequent variable array index. + if (const SequentialType *STy = + dyn_cast(BasePtr1Ty)) { + const Type *NextTy = STy; + bool isBadCase = false; + + for (unsigned Idx = FirstConstantOper; + Idx != MinOperands && isa(NextTy); ++Idx) { + const Value *V1 = GEP1Ops[Idx], *V2 = GEP2Ops[Idx]; + if (!isa(V1) || !isa(V2)) { + isBadCase = true; + break; + } + // If the array is indexed beyond the bounds of the static type + // at this level, it will also fall into the "be careful" case. + // It would theoretically be possible to analyze these cases, + // but for now just be conservatively correct. + if (const ArrayType *ATy = dyn_cast(STy)) + if (cast(G1OC)->getZExtValue() >= + ATy->getNumElements() || + cast(G2OC)->getZExtValue() >= + ATy->getNumElements()) { + isBadCase = true; + break; + } + if (const VectorType *VTy = dyn_cast(STy)) + if (cast(G1OC)->getZExtValue() >= + VTy->getNumElements() || + cast(G2OC)->getZExtValue() >= + VTy->getNumElements()) { + isBadCase = true; + break; + } + STy = cast(NextTy); + NextTy = cast(NextTy)->getElementType(); + } + + if (isBadCase) G1OC = 0; + } + + // Make sure they are comparable (ie, not constant expressions), and + // make sure the GEP with the smaller leading constant is GEP1. + if (G1OC) { + Constant *Compare = ConstantExpr::getICmp(ICmpInst::ICMP_SGT, + G1OC, G2OC); + if (ConstantInt *CV = dyn_cast(Compare)) { + if (CV->getZExtValue()) { // If they are comparable and G2 > G1 + std::swap(GEP1Ops, GEP2Ops); // Make GEP1 < GEP2 + std::swap(NumGEP1Ops, NumGEP2Ops); + } + break; + } + } + } + } + BasePtr1Ty = cast(BasePtr1Ty)->getTypeAtIndex(G1Oper); + } + + // No shared constant operands, and we ran out of common operands. At this + // point, the GEP instructions have run through all of their operands, and we + // haven't found evidence that there are any deltas between the GEP's. + // However, one GEP may have more operands than the other. If this is the + // case, there may still be hope. Check this now. + if (FirstConstantOper == MinOperands) { + // Make GEP1Ops be the longer one if there is a longer one. + if (NumGEP1Ops < NumGEP2Ops) { + std::swap(GEP1Ops, GEP2Ops); + std::swap(NumGEP1Ops, NumGEP2Ops); + } + + // Is there anything to check? + if (NumGEP1Ops > MinOperands) { + for (unsigned i = FirstConstantOper; i != MaxOperands; ++i) + if (isa(GEP1Ops[i]) && + !cast(GEP1Ops[i])->isZero()) { + // Yup, there's a constant in the tail. Set all variables to + // constants in the GEP instruction to make it suitable for + // TargetData::getIndexedOffset. + for (i = 0; i != MaxOperands; ++i) + if (!isa(GEP1Ops[i])) + GEP1Ops[i] = Constant::getNullValue(GEP1Ops[i]->getType()); + // Okay, now get the offset. This is the relative offset for the full + // instruction. + const TargetData &TD = getTargetData(); + int64_t Offset1 = TD.getIndexedOffset(GEPPointerTy, GEP1Ops, + NumGEP1Ops); + + // Now check without any constants at the end. + int64_t Offset2 = TD.getIndexedOffset(GEPPointerTy, GEP1Ops, + MinOperands); + + // Make sure we compare the absolute difference. + if (Offset1 > Offset2) + std::swap(Offset1, Offset2); + + // If the tail provided a bit enough offset, return noalias! + if ((uint64_t)(Offset2-Offset1) >= SizeMax) + return NoAlias; + // Otherwise break - we don't look for another constant in the tail. + break; + } + } + + // Couldn't find anything useful. + return MayAlias; + } + + // If there are non-equal constants arguments, then we can figure + // out a minimum known delta between the two index expressions... at + // this point we know that the first constant index of GEP1 is less + // than the first constant index of GEP2. + + // Advance BasePtr[12]Ty over this first differing constant operand. + BasePtr2Ty = cast(BasePtr1Ty)-> + getTypeAtIndex(GEP2Ops[FirstConstantOper]); + BasePtr1Ty = cast(BasePtr1Ty)-> + getTypeAtIndex(GEP1Ops[FirstConstantOper]); + + // We are going to be using TargetData::getIndexedOffset to determine the + // offset that each of the GEP's is reaching. To do this, we have to convert + // all variable references to constant references. To do this, we convert the + // initial sequence of array subscripts into constant zeros to start with. + const Type *ZeroIdxTy = GEPPointerTy; + for (unsigned i = 0; i != FirstConstantOper; ++i) { + if (!isa(ZeroIdxTy)) + GEP1Ops[i] = GEP2Ops[i] = Constant::getNullValue(Type::Int32Ty); + + if (const CompositeType *CT = dyn_cast(ZeroIdxTy)) + ZeroIdxTy = CT->getTypeAtIndex(GEP1Ops[i]); + } + + // We know that GEP1Ops[FirstConstantOper] & GEP2Ops[FirstConstantOper] are ok + + // Loop over the rest of the operands... + for (unsigned i = FirstConstantOper+1; i != MaxOperands; ++i) { + const Value *Op1 = i < NumGEP1Ops ? GEP1Ops[i] : 0; + const Value *Op2 = i < NumGEP2Ops ? GEP2Ops[i] : 0; + // If they are equal, use a zero index... + if (Op1 == Op2 && BasePtr1Ty == BasePtr2Ty) { + if (!isa(Op1)) + GEP1Ops[i] = GEP2Ops[i] = Constant::getNullValue(Op1->getType()); + // Otherwise, just keep the constants we have. + } else { + if (Op1) { + if (const ConstantInt *Op1C = dyn_cast(Op1)) { + // If this is an array index, make sure the array element is in range. + if (const ArrayType *AT = dyn_cast(BasePtr1Ty)) { + if (Op1C->getZExtValue() >= AT->getNumElements()) + return MayAlias; // Be conservative with out-of-range accesses + } else if (const VectorType *VT = dyn_cast(BasePtr1Ty)) { + if (Op1C->getZExtValue() >= VT->getNumElements()) + return MayAlias; // Be conservative with out-of-range accesses + } + + } else { + // GEP1 is known to produce a value less than GEP2. To be + // conservatively correct, we must assume the largest possible + // constant is used in this position. This cannot be the initial + // index to the GEP instructions (because we know we have at least one + // element before this one with the different constant arguments), so + // we know that the current index must be into either a struct or + // array. Because we know it's not constant, this cannot be a + // structure index. Because of this, we can calculate the maximum + // value possible. + // + if (const ArrayType *AT = dyn_cast(BasePtr1Ty)) + GEP1Ops[i] = ConstantInt::get(Type::Int64Ty,AT->getNumElements()-1); + else if (const VectorType *VT = dyn_cast(BasePtr1Ty)) + GEP1Ops[i] = ConstantInt::get(Type::Int64Ty,VT->getNumElements()-1); + } + } + + if (Op2) { + if (const ConstantInt *Op2C = dyn_cast(Op2)) { + // If this is an array index, make sure the array element is in range. + if (const ArrayType *AT = dyn_cast(BasePtr2Ty)) { + if (Op2C->getZExtValue() >= AT->getNumElements()) + return MayAlias; // Be conservative with out-of-range accesses + } else if (const VectorType *VT = dyn_cast(BasePtr2Ty)) { + if (Op2C->getZExtValue() >= VT->getNumElements()) + return MayAlias; // Be conservative with out-of-range accesses + } + } else { // Conservatively assume the minimum value for this index + GEP2Ops[i] = Constant::getNullValue(Op2->getType()); + } + } + } + + if (BasePtr1Ty && Op1) { + if (const CompositeType *CT = dyn_cast(BasePtr1Ty)) + BasePtr1Ty = CT->getTypeAtIndex(GEP1Ops[i]); + else + BasePtr1Ty = 0; + } + + if (BasePtr2Ty && Op2) { + if (const CompositeType *CT = dyn_cast(BasePtr2Ty)) + BasePtr2Ty = CT->getTypeAtIndex(GEP2Ops[i]); + else + BasePtr2Ty = 0; + } + } + + if (GEPPointerTy->getElementType()->isSized()) { + int64_t Offset1 = + getTargetData().getIndexedOffset(GEPPointerTy, GEP1Ops, NumGEP1Ops); + int64_t Offset2 = + getTargetData().getIndexedOffset(GEPPointerTy, GEP2Ops, NumGEP2Ops); + assert(Offset1 != Offset2 && + "There is at least one different constant here!"); + + // Make sure we compare the absolute difference. + if (Offset1 > Offset2) + std::swap(Offset1, Offset2); + + if ((uint64_t)(Offset2-Offset1) >= SizeMax) { + //cerr << "Determined that these two GEP's don't alias [" + // << SizeMax << " bytes]: \n" << *GEP1 << *GEP2; + return NoAlias; + } + } + return MayAlias; +} + +// Make sure that anything that uses AliasAnalysis pulls in this file... +DEFINING_FILE_FOR(BasicAliasAnalysis) diff --git a/lib/Analysis/CFGPrinter.cpp b/lib/Analysis/CFGPrinter.cpp new file mode 100644 index 000000000000..143220ce3880 --- /dev/null +++ b/lib/Analysis/CFGPrinter.cpp @@ -0,0 +1,221 @@ +//===- CFGPrinter.cpp - DOT printer for the control flow graph ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines a '-dot-cfg' analysis pass, which emits the +// cfg..dot file for each function in the program, with a graph of the +// CFG for that function. +// +// The other main feature of this file is that it implements the +// Function::viewCFG method, which is useful for debugging passes which operate +// on the CFG. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Function.h" +#include "llvm/Instructions.h" +#include "llvm/Pass.h" +#include "llvm/Analysis/CFGPrinter.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/GraphWriter.h" +#include "llvm/Config/config.h" +#include +#include +#include +using namespace llvm; + +/// CFGOnly flag - This is used to control whether or not the CFG graph printer +/// prints out the contents of basic blocks or not. This is acceptable because +/// this code is only really used for debugging purposes. +/// +static bool CFGOnly = false; + +namespace llvm { +template<> +struct DOTGraphTraits : public DefaultDOTGraphTraits { + static std::string getGraphName(const Function *F) { + return "CFG for '" + F->getName() + "' function"; + } + + static std::string getNodeLabel(const BasicBlock *Node, + const Function *Graph) { + if (CFGOnly && !Node->getName().empty()) + return Node->getName() + ":"; + + std::ostringstream Out; + if (CFGOnly) { + WriteAsOperand(Out, Node, false); + return Out.str(); + } + + if (Node->getName().empty()) { + WriteAsOperand(Out, Node, false); + Out << ":"; + } + + Out << *Node; + std::string OutStr = Out.str(); + if (OutStr[0] == '\n') OutStr.erase(OutStr.begin()); + + // Process string output to make it nicer... + for (unsigned i = 0; i != OutStr.length(); ++i) + if (OutStr[i] == '\n') { // Left justify + OutStr[i] = '\\'; + OutStr.insert(OutStr.begin()+i+1, 'l'); + } else if (OutStr[i] == ';') { // Delete comments! + unsigned Idx = OutStr.find('\n', i+1); // Find end of line + OutStr.erase(OutStr.begin()+i, OutStr.begin()+Idx); + --i; + } + + return OutStr; + } + + static std::string getEdgeSourceLabel(const BasicBlock *Node, + succ_const_iterator I) { + // Label source of conditional branches with "T" or "F" + if (const BranchInst *BI = dyn_cast(Node->getTerminator())) + if (BI->isConditional()) + return (I == succ_begin(Node)) ? "T" : "F"; + return ""; + } +}; +} + +namespace { + struct VISIBILITY_HIDDEN CFGViewer : public FunctionPass { + static char ID; // Pass identifcation, replacement for typeid + CFGViewer() : FunctionPass(&ID) {} + + virtual bool runOnFunction(Function &F) { + F.viewCFG(); + return false; + } + + void print(std::ostream &OS, const Module* = 0) const {} + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + } + }; +} + +char CFGViewer::ID = 0; +static RegisterPass +V0("view-cfg", "View CFG of function", false, true); + +namespace { + struct VISIBILITY_HIDDEN CFGOnlyViewer : public FunctionPass { + static char ID; // Pass identifcation, replacement for typeid + CFGOnlyViewer() : FunctionPass(&ID) {} + + virtual bool runOnFunction(Function &F) { + CFGOnly = true; + F.viewCFG(); + CFGOnly = false; + return false; + } + + void print(std::ostream &OS, const Module* = 0) const {} + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + } + }; +} + +char CFGOnlyViewer::ID = 0; +static RegisterPass +V1("view-cfg-only", + "View CFG of function (with no function bodies)", false, true); + +namespace { + struct VISIBILITY_HIDDEN CFGPrinter : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + CFGPrinter() : FunctionPass(&ID) {} + explicit CFGPrinter(void *pid) : FunctionPass(pid) {} + + virtual bool runOnFunction(Function &F) { + std::string Filename = "cfg." + F.getName() + ".dot"; + cerr << "Writing '" << Filename << "'..."; + std::ofstream File(Filename.c_str()); + + if (File.good()) + WriteGraph(File, (const Function*)&F); + else + cerr << " error opening file for writing!"; + cerr << "\n"; + return false; + } + + void print(std::ostream &OS, const Module* = 0) const {} + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + } + }; +} + +char CFGPrinter::ID = 0; +static RegisterPass +P1("dot-cfg", "Print CFG of function to 'dot' file", false, true); + +namespace { + struct VISIBILITY_HIDDEN CFGOnlyPrinter : public CFGPrinter { + static char ID; // Pass identification, replacement for typeid + CFGOnlyPrinter() : CFGPrinter(&ID) {} + virtual bool runOnFunction(Function &F) { + bool OldCFGOnly = CFGOnly; + CFGOnly = true; + CFGPrinter::runOnFunction(F); + CFGOnly = OldCFGOnly; + return false; + } + void print(std::ostream &OS, const Module* = 0) const {} + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + } + }; +} + +char CFGOnlyPrinter::ID = 0; +static RegisterPass +P2("dot-cfg-only", + "Print CFG of function to 'dot' file (with no function bodies)", false, true); + +/// viewCFG - This function is meant for use from the debugger. You can just +/// say 'call F->viewCFG()' and a ghostview window should pop up from the +/// program, displaying the CFG of the current function. This depends on there +/// being a 'dot' and 'gv' program in your path. +/// +void Function::viewCFG() const { + ViewGraph(this, "cfg" + getName()); +} + +/// viewCFGOnly - This function is meant for use from the debugger. It works +/// just like viewCFG, but it does not include the contents of basic blocks +/// into the nodes, just the label. If you are only interested in the CFG t +/// his can make the graph smaller. +/// +void Function::viewCFGOnly() const { + CFGOnly = true; + viewCFG(); + CFGOnly = false; +} + +FunctionPass *llvm::createCFGPrinterPass () { + return new CFGPrinter(); +} + +FunctionPass *llvm::createCFGOnlyPrinterPass () { + return new CFGOnlyPrinter(); +} + diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt new file mode 100644 index 000000000000..093aa69bb19f --- /dev/null +++ b/lib/Analysis/CMakeLists.txt @@ -0,0 +1,34 @@ +add_llvm_library(LLVMAnalysis + AliasAnalysis.cpp + AliasAnalysisCounter.cpp + AliasAnalysisEvaluator.cpp + AliasDebugger.cpp + AliasSetTracker.cpp + Analysis.cpp + BasicAliasAnalysis.cpp + CaptureTracking.cpp + CFGPrinter.cpp + ConstantFolding.cpp + DbgInfoPrinter.cpp + DebugInfo.cpp + InstCount.cpp + Interval.cpp + IntervalPartition.cpp + IVUsers.cpp + LibCallAliasAnalysis.cpp + LibCallSemantics.cpp + LiveValues.cpp + LoopInfo.cpp + LoopPass.cpp + LoopVR.cpp + MemoryDependenceAnalysis.cpp + PostDominators.cpp + ProfileInfo.cpp + ProfileInfoLoader.cpp + ProfileInfoLoaderPass.cpp + ScalarEvolution.cpp + ScalarEvolutionExpander.cpp + SparsePropagation.cpp + Trace.cpp + ValueTracking.cpp + ) diff --git a/lib/Analysis/CaptureTracking.cpp b/lib/Analysis/CaptureTracking.cpp new file mode 100644 index 000000000000..a19b8e4f94db --- /dev/null +++ b/lib/Analysis/CaptureTracking.cpp @@ -0,0 +1,112 @@ +//===--- CaptureTracking.cpp - Determine whether a pointer is captured ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains routines that help determine which pointers are captured. +// A pointer value is captured if the function makes a copy of any part of the +// pointer that outlives the call. Not being captured means, more or less, that +// the pointer is only dereferenced and not stored in a global. Returning part +// of the pointer as the function return value may or may not count as capturing +// the pointer, depending on the context. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Instructions.h" +#include "llvm/Value.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/CallSite.h" +using namespace llvm; + +/// PointerMayBeCaptured - Return true if this pointer value may be captured +/// by the enclosing function (which is required to exist). This routine can +/// be expensive, so consider caching the results. The boolean ReturnCaptures +/// specifies whether returning the value (or part of it) from the function +/// counts as capturing it or not. +bool llvm::PointerMayBeCaptured(const Value *V, bool ReturnCaptures) { + assert(isa(V->getType()) && "Capture is for pointers only!"); + SmallVector Worklist; + SmallSet Visited; + + for (Value::use_const_iterator UI = V->use_begin(), UE = V->use_end(); + UI != UE; ++UI) { + Use *U = &UI.getUse(); + Visited.insert(U); + Worklist.push_back(U); + } + + while (!Worklist.empty()) { + Use *U = Worklist.pop_back_val(); + Instruction *I = cast(U->getUser()); + V = U->get(); + + switch (I->getOpcode()) { + case Instruction::Call: + case Instruction::Invoke: { + CallSite CS = CallSite::get(I); + // Not captured if the callee is readonly, doesn't return a copy through + // its return value and doesn't unwind (a readonly function can leak bits + // by throwing an exception or not depending on the input value). + if (CS.onlyReadsMemory() && CS.doesNotThrow() && + I->getType() == Type::VoidTy) + break; + + // Not captured if only passed via 'nocapture' arguments. Note that + // calling a function pointer does not in itself cause the pointer to + // be captured. This is a subtle point considering that (for example) + // the callee might return its own address. It is analogous to saying + // that loading a value from a pointer does not cause the pointer to be + // captured, even though the loaded value might be the pointer itself + // (think of self-referential objects). + CallSite::arg_iterator B = CS.arg_begin(), E = CS.arg_end(); + for (CallSite::arg_iterator A = B; A != E; ++A) + if (A->get() == V && !CS.paramHasAttr(A - B + 1, Attribute::NoCapture)) + // The parameter is not marked 'nocapture' - captured. + return true; + // Only passed via 'nocapture' arguments, or is the called function - not + // captured. + break; + } + case Instruction::Free: + // Freeing a pointer does not cause it to be captured. + break; + case Instruction::Load: + // Loading from a pointer does not cause it to be captured. + break; + case Instruction::Ret: + if (ReturnCaptures) + return true; + break; + case Instruction::Store: + if (V == I->getOperand(0)) + // Stored the pointer - it may be captured. + return true; + // Storing to the pointee does not cause the pointer to be captured. + break; + case Instruction::BitCast: + case Instruction::GetElementPtr: + case Instruction::PHI: + case Instruction::Select: + // The original value is not captured via this if the new value isn't. + for (Instruction::use_iterator UI = I->use_begin(), UE = I->use_end(); + UI != UE; ++UI) { + Use *U = &UI.getUse(); + if (Visited.insert(U)) + Worklist.push_back(U); + } + break; + default: + // Something else - be conservative and say it is captured. + return true; + } + } + + // All uses examined - not captured. + return false; +} diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp new file mode 100644 index 000000000000..e5ab3226ce49 --- /dev/null +++ b/lib/Analysis/ConstantFolding.cpp @@ -0,0 +1,829 @@ +//===-- ConstantFolding.cpp - Analyze constant folding possibilities ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This family of functions determines the possibility of performing constant +// folding. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/GlobalVariable.h" +#include "llvm/Instructions.h" +#include "llvm/Intrinsics.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Support/GetElementPtrTypeIterator.h" +#include "llvm/Support/MathExtras.h" +#include +#include +using namespace llvm; + +//===----------------------------------------------------------------------===// +// Constant Folding internal helper functions +//===----------------------------------------------------------------------===// + +/// IsConstantOffsetFromGlobal - If this constant is actually a constant offset +/// from a global, return the global and the constant. Because of +/// constantexprs, this function is recursive. +static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV, + int64_t &Offset, const TargetData &TD) { + // Trivial case, constant is the global. + if ((GV = dyn_cast(C))) { + Offset = 0; + return true; + } + + // Otherwise, if this isn't a constant expr, bail out. + ConstantExpr *CE = dyn_cast(C); + if (!CE) return false; + + // Look through ptr->int and ptr->ptr casts. + if (CE->getOpcode() == Instruction::PtrToInt || + CE->getOpcode() == Instruction::BitCast) + return IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, TD); + + // i32* getelementptr ([5 x i32]* @a, i32 0, i32 5) + if (CE->getOpcode() == Instruction::GetElementPtr) { + // Cannot compute this if the element type of the pointer is missing size + // info. + if (!cast(CE->getOperand(0)->getType()) + ->getElementType()->isSized()) + return false; + + // If the base isn't a global+constant, we aren't either. + if (!IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, TD)) + return false; + + // Otherwise, add any offset that our operands provide. + gep_type_iterator GTI = gep_type_begin(CE); + for (User::const_op_iterator i = CE->op_begin() + 1, e = CE->op_end(); + i != e; ++i, ++GTI) { + ConstantInt *CI = dyn_cast(*i); + if (!CI) return false; // Index isn't a simple constant? + if (CI->getZExtValue() == 0) continue; // Not adding anything. + + if (const StructType *ST = dyn_cast(*GTI)) { + // N = N + Offset + Offset += TD.getStructLayout(ST)->getElementOffset(CI->getZExtValue()); + } else { + const SequentialType *SQT = cast(*GTI); + Offset += TD.getTypeAllocSize(SQT->getElementType())*CI->getSExtValue(); + } + } + return true; + } + + return false; +} + + +/// SymbolicallyEvaluateBinop - One of Op0/Op1 is a constant expression. +/// Attempt to symbolically evaluate the result of a binary operator merging +/// these together. If target data info is available, it is provided as TD, +/// otherwise TD is null. +static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, + Constant *Op1, const TargetData *TD){ + // SROA + + // Fold (and 0xffffffff00000000, (shl x, 32)) -> shl. + // Fold (lshr (or X, Y), 32) -> (lshr [X/Y], 32) if one doesn't contribute + // bits. + + + // If the constant expr is something like &A[123] - &A[4].f, fold this into a + // constant. This happens frequently when iterating over a global array. + if (Opc == Instruction::Sub && TD) { + GlobalValue *GV1, *GV2; + int64_t Offs1, Offs2; + + if (IsConstantOffsetFromGlobal(Op0, GV1, Offs1, *TD)) + if (IsConstantOffsetFromGlobal(Op1, GV2, Offs2, *TD) && + GV1 == GV2) { + // (&GV+C1) - (&GV+C2) -> C1-C2, pointer arithmetic cannot overflow. + return ConstantInt::get(Op0->getType(), Offs1-Offs2); + } + } + + return 0; +} + +/// SymbolicallyEvaluateGEP - If we can symbolically evaluate the specified GEP +/// constant expression, do so. +static Constant *SymbolicallyEvaluateGEP(Constant* const* Ops, unsigned NumOps, + const Type *ResultTy, + const TargetData *TD) { + Constant *Ptr = Ops[0]; + if (!TD || !cast(Ptr->getType())->getElementType()->isSized()) + return 0; + + uint64_t BasePtr = 0; + if (!Ptr->isNullValue()) { + // If this is a inttoptr from a constant int, we can fold this as the base, + // otherwise we can't. + if (ConstantExpr *CE = dyn_cast(Ptr)) + if (CE->getOpcode() == Instruction::IntToPtr) + if (ConstantInt *Base = dyn_cast(CE->getOperand(0))) + BasePtr = Base->getZExtValue(); + + if (BasePtr == 0) + return 0; + } + + // If this is a constant expr gep that is effectively computing an + // "offsetof", fold it into 'cast int Size to T*' instead of 'gep 0, 0, 12' + for (unsigned i = 1; i != NumOps; ++i) + if (!isa(Ops[i])) + return false; + + uint64_t Offset = TD->getIndexedOffset(Ptr->getType(), + (Value**)Ops+1, NumOps-1); + Constant *C = ConstantInt::get(TD->getIntPtrType(), Offset+BasePtr); + return ConstantExpr::getIntToPtr(C, ResultTy); +} + +/// FoldBitCast - Constant fold bitcast, symbolically evaluating it with +/// targetdata. Return 0 if unfoldable. +static Constant *FoldBitCast(Constant *C, const Type *DestTy, + const TargetData &TD) { + // If this is a bitcast from constant vector -> vector, fold it. + if (ConstantVector *CV = dyn_cast(C)) { + if (const VectorType *DestVTy = dyn_cast(DestTy)) { + // If the element types match, VMCore can fold it. + unsigned NumDstElt = DestVTy->getNumElements(); + unsigned NumSrcElt = CV->getNumOperands(); + if (NumDstElt == NumSrcElt) + return 0; + + const Type *SrcEltTy = CV->getType()->getElementType(); + const Type *DstEltTy = DestVTy->getElementType(); + + // Otherwise, we're changing the number of elements in a vector, which + // requires endianness information to do the right thing. For example, + // bitcast (<2 x i64> to <4 x i32>) + // folds to (little endian): + // <4 x i32> + // and to (big endian): + // <4 x i32> + + // First thing is first. We only want to think about integer here, so if + // we have something in FP form, recast it as integer. + if (DstEltTy->isFloatingPoint()) { + // Fold to an vector of integers with same size as our FP type. + unsigned FPWidth = DstEltTy->getPrimitiveSizeInBits(); + const Type *DestIVTy = VectorType::get(IntegerType::get(FPWidth), + NumDstElt); + // Recursively handle this integer conversion, if possible. + C = FoldBitCast(C, DestIVTy, TD); + if (!C) return 0; + + // Finally, VMCore can handle this now that #elts line up. + return ConstantExpr::getBitCast(C, DestTy); + } + + // Okay, we know the destination is integer, if the input is FP, convert + // it to integer first. + if (SrcEltTy->isFloatingPoint()) { + unsigned FPWidth = SrcEltTy->getPrimitiveSizeInBits(); + const Type *SrcIVTy = VectorType::get(IntegerType::get(FPWidth), + NumSrcElt); + // Ask VMCore to do the conversion now that #elts line up. + C = ConstantExpr::getBitCast(C, SrcIVTy); + CV = dyn_cast(C); + if (!CV) return 0; // If VMCore wasn't able to fold it, bail out. + } + + // Now we know that the input and output vectors are both integer vectors + // of the same size, and that their #elements is not the same. Do the + // conversion here, which depends on whether the input or output has + // more elements. + bool isLittleEndian = TD.isLittleEndian(); + + SmallVector Result; + if (NumDstElt < NumSrcElt) { + // Handle: bitcast (<4 x i32> to <2 x i64>) + Constant *Zero = Constant::getNullValue(DstEltTy); + unsigned Ratio = NumSrcElt/NumDstElt; + unsigned SrcBitSize = SrcEltTy->getPrimitiveSizeInBits(); + unsigned SrcElt = 0; + for (unsigned i = 0; i != NumDstElt; ++i) { + // Build each element of the result. + Constant *Elt = Zero; + unsigned ShiftAmt = isLittleEndian ? 0 : SrcBitSize*(Ratio-1); + for (unsigned j = 0; j != Ratio; ++j) { + Constant *Src = dyn_cast(CV->getOperand(SrcElt++)); + if (!Src) return 0; // Reject constantexpr elements. + + // Zero extend the element to the right size. + Src = ConstantExpr::getZExt(Src, Elt->getType()); + + // Shift it to the right place, depending on endianness. + Src = ConstantExpr::getShl(Src, + ConstantInt::get(Src->getType(), ShiftAmt)); + ShiftAmt += isLittleEndian ? SrcBitSize : -SrcBitSize; + + // Mix it in. + Elt = ConstantExpr::getOr(Elt, Src); + } + Result.push_back(Elt); + } + } else { + // Handle: bitcast (<2 x i64> to <4 x i32>) + unsigned Ratio = NumDstElt/NumSrcElt; + unsigned DstBitSize = DstEltTy->getPrimitiveSizeInBits(); + + // Loop over each source value, expanding into multiple results. + for (unsigned i = 0; i != NumSrcElt; ++i) { + Constant *Src = dyn_cast(CV->getOperand(i)); + if (!Src) return 0; // Reject constantexpr elements. + + unsigned ShiftAmt = isLittleEndian ? 0 : DstBitSize*(Ratio-1); + for (unsigned j = 0; j != Ratio; ++j) { + // Shift the piece of the value into the right place, depending on + // endianness. + Constant *Elt = ConstantExpr::getLShr(Src, + ConstantInt::get(Src->getType(), ShiftAmt)); + ShiftAmt += isLittleEndian ? DstBitSize : -DstBitSize; + + // Truncate and remember this piece. + Result.push_back(ConstantExpr::getTrunc(Elt, DstEltTy)); + } + } + } + + return ConstantVector::get(Result.data(), Result.size()); + } + } + + return 0; +} + + +//===----------------------------------------------------------------------===// +// Constant Folding public APIs +//===----------------------------------------------------------------------===// + + +/// ConstantFoldInstruction - Attempt to constant fold the specified +/// instruction. If successful, the constant result is returned, if not, null +/// is returned. Note that this function can only fail when attempting to fold +/// instructions like loads and stores, which have no constant expression form. +/// +Constant *llvm::ConstantFoldInstruction(Instruction *I, const TargetData *TD) { + if (PHINode *PN = dyn_cast(I)) { + if (PN->getNumIncomingValues() == 0) + return UndefValue::get(PN->getType()); + + Constant *Result = dyn_cast(PN->getIncomingValue(0)); + if (Result == 0) return 0; + + // Handle PHI nodes specially here... + for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i) + if (PN->getIncomingValue(i) != Result && PN->getIncomingValue(i) != PN) + return 0; // Not all the same incoming constants... + + // If we reach here, all incoming values are the same constant. + return Result; + } + + // Scan the operand list, checking to see if they are all constants, if so, + // hand off to ConstantFoldInstOperands. + SmallVector Ops; + for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i) + if (Constant *Op = dyn_cast(*i)) + Ops.push_back(Op); + else + return 0; // All operands not constant! + + if (const CmpInst *CI = dyn_cast(I)) + return ConstantFoldCompareInstOperands(CI->getPredicate(), + Ops.data(), Ops.size(), TD); + else + return ConstantFoldInstOperands(I->getOpcode(), I->getType(), + Ops.data(), Ops.size(), TD); +} + +/// ConstantFoldConstantExpression - Attempt to fold the constant expression +/// using the specified TargetData. If successful, the constant result is +/// result is returned, if not, null is returned. +Constant *llvm::ConstantFoldConstantExpression(ConstantExpr *CE, + const TargetData *TD) { + assert(TD && "ConstantFoldConstantExpression requires a valid TargetData."); + + SmallVector Ops; + for (User::op_iterator i = CE->op_begin(), e = CE->op_end(); i != e; ++i) + Ops.push_back(cast(*i)); + + if (CE->isCompare()) + return ConstantFoldCompareInstOperands(CE->getPredicate(), + Ops.data(), Ops.size(), TD); + else + return ConstantFoldInstOperands(CE->getOpcode(), CE->getType(), + Ops.data(), Ops.size(), TD); +} + +/// ConstantFoldInstOperands - Attempt to constant fold an instruction with the +/// specified opcode and operands. If successful, the constant result is +/// returned, if not, null is returned. Note that this function can fail when +/// attempting to fold instructions like loads and stores, which have no +/// constant expression form. +/// +Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy, + Constant* const* Ops, unsigned NumOps, + const TargetData *TD) { + // Handle easy binops first. + if (Instruction::isBinaryOp(Opcode)) { + if (isa(Ops[0]) || isa(Ops[1])) + if (Constant *C = SymbolicallyEvaluateBinop(Opcode, Ops[0], Ops[1], TD)) + return C; + + return ConstantExpr::get(Opcode, Ops[0], Ops[1]); + } + + switch (Opcode) { + default: return 0; + case Instruction::Call: + if (Function *F = dyn_cast(Ops[0])) + if (canConstantFoldCallTo(F)) + return ConstantFoldCall(F, Ops+1, NumOps-1); + return 0; + case Instruction::ICmp: + case Instruction::FCmp: + case Instruction::VICmp: + case Instruction::VFCmp: + assert(0 &&"This function is invalid for compares: no predicate specified"); + case Instruction::PtrToInt: + // If the input is a inttoptr, eliminate the pair. This requires knowing + // the width of a pointer, so it can't be done in ConstantExpr::getCast. + if (ConstantExpr *CE = dyn_cast(Ops[0])) { + if (TD && CE->getOpcode() == Instruction::IntToPtr) { + Constant *Input = CE->getOperand(0); + unsigned InWidth = Input->getType()->getPrimitiveSizeInBits(); + if (TD->getPointerSizeInBits() < InWidth) { + Constant *Mask = + ConstantInt::get(APInt::getLowBitsSet(InWidth, + TD->getPointerSizeInBits())); + Input = ConstantExpr::getAnd(Input, Mask); + } + // Do a zext or trunc to get to the dest size. + return ConstantExpr::getIntegerCast(Input, DestTy, false); + } + } + return ConstantExpr::getCast(Opcode, Ops[0], DestTy); + case Instruction::IntToPtr: + // If the input is a ptrtoint, turn the pair into a ptr to ptr bitcast if + // the int size is >= the ptr size. This requires knowing the width of a + // pointer, so it can't be done in ConstantExpr::getCast. + if (ConstantExpr *CE = dyn_cast(Ops[0])) { + if (TD && + TD->getPointerSizeInBits() <= + CE->getType()->getPrimitiveSizeInBits()) { + if (CE->getOpcode() == Instruction::PtrToInt) { + Constant *Input = CE->getOperand(0); + Constant *C = FoldBitCast(Input, DestTy, *TD); + return C ? C : ConstantExpr::getBitCast(Input, DestTy); + } + // If there's a constant offset added to the integer value before + // it is casted back to a pointer, see if the expression can be + // converted into a GEP. + if (CE->getOpcode() == Instruction::Add) + if (ConstantInt *L = dyn_cast(CE->getOperand(0))) + if (ConstantExpr *R = dyn_cast(CE->getOperand(1))) + if (R->getOpcode() == Instruction::PtrToInt) + if (GlobalVariable *GV = + dyn_cast(R->getOperand(0))) { + const PointerType *GVTy = cast(GV->getType()); + if (const ArrayType *AT = + dyn_cast(GVTy->getElementType())) { + const Type *ElTy = AT->getElementType(); + uint64_t AllocSize = TD->getTypeAllocSize(ElTy); + APInt PSA(L->getValue().getBitWidth(), AllocSize); + if (ElTy == cast(DestTy)->getElementType() && + L->getValue().urem(PSA) == 0) { + APInt ElemIdx = L->getValue().udiv(PSA); + if (ElemIdx.ult(APInt(ElemIdx.getBitWidth(), + AT->getNumElements()))) { + Constant *Index[] = { + Constant::getNullValue(CE->getType()), + ConstantInt::get(ElemIdx) + }; + return ConstantExpr::getGetElementPtr(GV, &Index[0], 2); + } + } + } + } + } + } + return ConstantExpr::getCast(Opcode, Ops[0], DestTy); + case Instruction::Trunc: + case Instruction::ZExt: + case Instruction::SExt: + case Instruction::FPTrunc: + case Instruction::FPExt: + case Instruction::UIToFP: + case Instruction::SIToFP: + case Instruction::FPToUI: + case Instruction::FPToSI: + return ConstantExpr::getCast(Opcode, Ops[0], DestTy); + case Instruction::BitCast: + if (TD) + if (Constant *C = FoldBitCast(Ops[0], DestTy, *TD)) + return C; + return ConstantExpr::getBitCast(Ops[0], DestTy); + case Instruction::Select: + return ConstantExpr::getSelect(Ops[0], Ops[1], Ops[2]); + case Instruction::ExtractElement: + return ConstantExpr::getExtractElement(Ops[0], Ops[1]); + case Instruction::InsertElement: + return ConstantExpr::getInsertElement(Ops[0], Ops[1], Ops[2]); + case Instruction::ShuffleVector: + return ConstantExpr::getShuffleVector(Ops[0], Ops[1], Ops[2]); + case Instruction::GetElementPtr: + if (Constant *C = SymbolicallyEvaluateGEP(Ops, NumOps, DestTy, TD)) + return C; + + return ConstantExpr::getGetElementPtr(Ops[0], Ops+1, NumOps-1); + } +} + +/// ConstantFoldCompareInstOperands - Attempt to constant fold a compare +/// instruction (icmp/fcmp) with the specified operands. If it fails, it +/// returns a constant expression of the specified operands. +/// +Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, + Constant*const * Ops, + unsigned NumOps, + const TargetData *TD) { + // fold: icmp (inttoptr x), null -> icmp x, 0 + // fold: icmp (ptrtoint x), 0 -> icmp x, null + // fold: icmp (inttoptr x), (inttoptr y) -> icmp trunc/zext x, trunc/zext y + // fold: icmp (ptrtoint x), (ptrtoint y) -> icmp x, y + // + // ConstantExpr::getCompare cannot do this, because it doesn't have TD + // around to know if bit truncation is happening. + if (ConstantExpr *CE0 = dyn_cast(Ops[0])) { + if (TD && Ops[1]->isNullValue()) { + const Type *IntPtrTy = TD->getIntPtrType(); + if (CE0->getOpcode() == Instruction::IntToPtr) { + // Convert the integer value to the right size to ensure we get the + // proper extension or truncation. + Constant *C = ConstantExpr::getIntegerCast(CE0->getOperand(0), + IntPtrTy, false); + Constant *NewOps[] = { C, Constant::getNullValue(C->getType()) }; + return ConstantFoldCompareInstOperands(Predicate, NewOps, 2, TD); + } + + // Only do this transformation if the int is intptrty in size, otherwise + // there is a truncation or extension that we aren't modeling. + if (CE0->getOpcode() == Instruction::PtrToInt && + CE0->getType() == IntPtrTy) { + Constant *C = CE0->getOperand(0); + Constant *NewOps[] = { C, Constant::getNullValue(C->getType()) }; + // FIXME! + return ConstantFoldCompareInstOperands(Predicate, NewOps, 2, TD); + } + } + + if (ConstantExpr *CE1 = dyn_cast(Ops[1])) { + if (TD && CE0->getOpcode() == CE1->getOpcode()) { + const Type *IntPtrTy = TD->getIntPtrType(); + + if (CE0->getOpcode() == Instruction::IntToPtr) { + // Convert the integer value to the right size to ensure we get the + // proper extension or truncation. + Constant *C0 = ConstantExpr::getIntegerCast(CE0->getOperand(0), + IntPtrTy, false); + Constant *C1 = ConstantExpr::getIntegerCast(CE1->getOperand(0), + IntPtrTy, false); + Constant *NewOps[] = { C0, C1 }; + return ConstantFoldCompareInstOperands(Predicate, NewOps, 2, TD); + } + + // Only do this transformation if the int is intptrty in size, otherwise + // there is a truncation or extension that we aren't modeling. + if ((CE0->getOpcode() == Instruction::PtrToInt && + CE0->getType() == IntPtrTy && + CE0->getOperand(0)->getType() == CE1->getOperand(0)->getType())) { + Constant *NewOps[] = { + CE0->getOperand(0), CE1->getOperand(0) + }; + return ConstantFoldCompareInstOperands(Predicate, NewOps, 2, TD); + } + } + } + } + return ConstantExpr::getCompare(Predicate, Ops[0], Ops[1]); +} + + +/// ConstantFoldLoadThroughGEPConstantExpr - Given a constant and a +/// getelementptr constantexpr, return the constant value being addressed by the +/// constant expression, or null if something is funny and we can't decide. +Constant *llvm::ConstantFoldLoadThroughGEPConstantExpr(Constant *C, + ConstantExpr *CE) { + if (CE->getOperand(1) != Constant::getNullValue(CE->getOperand(1)->getType())) + return 0; // Do not allow stepping over the value! + + // Loop over all of the operands, tracking down which value we are + // addressing... + gep_type_iterator I = gep_type_begin(CE), E = gep_type_end(CE); + for (++I; I != E; ++I) + if (const StructType *STy = dyn_cast(*I)) { + ConstantInt *CU = cast(I.getOperand()); + assert(CU->getZExtValue() < STy->getNumElements() && + "Struct index out of range!"); + unsigned El = (unsigned)CU->getZExtValue(); + if (ConstantStruct *CS = dyn_cast(C)) { + C = CS->getOperand(El); + } else if (isa(C)) { + C = Constant::getNullValue(STy->getElementType(El)); + } else if (isa(C)) { + C = UndefValue::get(STy->getElementType(El)); + } else { + return 0; + } + } else if (ConstantInt *CI = dyn_cast(I.getOperand())) { + if (const ArrayType *ATy = dyn_cast(*I)) { + if (CI->getZExtValue() >= ATy->getNumElements()) + return 0; + if (ConstantArray *CA = dyn_cast(C)) + C = CA->getOperand(CI->getZExtValue()); + else if (isa(C)) + C = Constant::getNullValue(ATy->getElementType()); + else if (isa(C)) + C = UndefValue::get(ATy->getElementType()); + else + return 0; + } else if (const VectorType *PTy = dyn_cast(*I)) { + if (CI->getZExtValue() >= PTy->getNumElements()) + return 0; + if (ConstantVector *CP = dyn_cast(C)) + C = CP->getOperand(CI->getZExtValue()); + else if (isa(C)) + C = Constant::getNullValue(PTy->getElementType()); + else if (isa(C)) + C = UndefValue::get(PTy->getElementType()); + else + return 0; + } else { + return 0; + } + } else { + return 0; + } + return C; +} + + +//===----------------------------------------------------------------------===// +// Constant Folding for Calls +// + +/// canConstantFoldCallTo - Return true if its even possible to fold a call to +/// the specified function. +bool +llvm::canConstantFoldCallTo(const Function *F) { + switch (F->getIntrinsicID()) { + case Intrinsic::sqrt: + case Intrinsic::powi: + case Intrinsic::bswap: + case Intrinsic::ctpop: + case Intrinsic::ctlz: + case Intrinsic::cttz: + return true; + default: break; + } + + if (!F->hasName()) return false; + const char *Str = F->getNameStart(); + unsigned Len = F->getNameLen(); + + // In these cases, the check of the length is required. We don't want to + // return true for a name like "cos\0blah" which strcmp would return equal to + // "cos", but has length 8. + switch (Str[0]) { + default: return false; + case 'a': + if (Len == 4) + return !strcmp(Str, "acos") || !strcmp(Str, "asin") || + !strcmp(Str, "atan"); + else if (Len == 5) + return !strcmp(Str, "atan2"); + return false; + case 'c': + if (Len == 3) + return !strcmp(Str, "cos"); + else if (Len == 4) + return !strcmp(Str, "ceil") || !strcmp(Str, "cosf") || + !strcmp(Str, "cosh"); + return false; + case 'e': + if (Len == 3) + return !strcmp(Str, "exp"); + return false; + case 'f': + if (Len == 4) + return !strcmp(Str, "fabs") || !strcmp(Str, "fmod"); + else if (Len == 5) + return !strcmp(Str, "floor"); + return false; + break; + case 'l': + if (Len == 3 && !strcmp(Str, "log")) + return true; + if (Len == 5 && !strcmp(Str, "log10")) + return true; + return false; + case 'p': + if (Len == 3 && !strcmp(Str, "pow")) + return true; + return false; + case 's': + if (Len == 3) + return !strcmp(Str, "sin"); + if (Len == 4) + return !strcmp(Str, "sinh") || !strcmp(Str, "sqrt") || + !strcmp(Str, "sinf"); + if (Len == 5) + return !strcmp(Str, "sqrtf"); + return false; + case 't': + if (Len == 3 && !strcmp(Str, "tan")) + return true; + else if (Len == 4 && !strcmp(Str, "tanh")) + return true; + return false; + } +} + +static Constant *ConstantFoldFP(double (*NativeFP)(double), double V, + const Type *Ty) { + errno = 0; + V = NativeFP(V); + if (errno != 0) { + errno = 0; + return 0; + } + + if (Ty == Type::FloatTy) + return ConstantFP::get(APFloat((float)V)); + if (Ty == Type::DoubleTy) + return ConstantFP::get(APFloat(V)); + assert(0 && "Can only constant fold float/double"); + return 0; // dummy return to suppress warning +} + +static Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double), + double V, double W, + const Type *Ty) { + errno = 0; + V = NativeFP(V, W); + if (errno != 0) { + errno = 0; + return 0; + } + + if (Ty == Type::FloatTy) + return ConstantFP::get(APFloat((float)V)); + if (Ty == Type::DoubleTy) + return ConstantFP::get(APFloat(V)); + assert(0 && "Can only constant fold float/double"); + return 0; // dummy return to suppress warning +} + +/// ConstantFoldCall - Attempt to constant fold a call to the specified function +/// with the specified arguments, returning null if unsuccessful. + +Constant * +llvm::ConstantFoldCall(Function *F, + Constant* const* Operands, unsigned NumOperands) { + if (!F->hasName()) return 0; + const char *Str = F->getNameStart(); + unsigned Len = F->getNameLen(); + + const Type *Ty = F->getReturnType(); + if (NumOperands == 1) { + if (ConstantFP *Op = dyn_cast(Operands[0])) { + if (Ty!=Type::FloatTy && Ty!=Type::DoubleTy) + return 0; + /// Currently APFloat versions of these functions do not exist, so we use + /// the host native double versions. Float versions are not called + /// directly but for all these it is true (float)(f((double)arg)) == + /// f(arg). Long double not supported yet. + double V = Ty==Type::FloatTy ? (double)Op->getValueAPF().convertToFloat(): + Op->getValueAPF().convertToDouble(); + switch (Str[0]) { + case 'a': + if (Len == 4 && !strcmp(Str, "acos")) + return ConstantFoldFP(acos, V, Ty); + else if (Len == 4 && !strcmp(Str, "asin")) + return ConstantFoldFP(asin, V, Ty); + else if (Len == 4 && !strcmp(Str, "atan")) + return ConstantFoldFP(atan, V, Ty); + break; + case 'c': + if (Len == 4 && !strcmp(Str, "ceil")) + return ConstantFoldFP(ceil, V, Ty); + else if (Len == 3 && !strcmp(Str, "cos")) + return ConstantFoldFP(cos, V, Ty); + else if (Len == 4 && !strcmp(Str, "cosh")) + return ConstantFoldFP(cosh, V, Ty); + else if (Len == 4 && !strcmp(Str, "cosf")) + return ConstantFoldFP(cos, V, Ty); + break; + case 'e': + if (Len == 3 && !strcmp(Str, "exp")) + return ConstantFoldFP(exp, V, Ty); + break; + case 'f': + if (Len == 4 && !strcmp(Str, "fabs")) + return ConstantFoldFP(fabs, V, Ty); + else if (Len == 5 && !strcmp(Str, "floor")) + return ConstantFoldFP(floor, V, Ty); + break; + case 'l': + if (Len == 3 && !strcmp(Str, "log") && V > 0) + return ConstantFoldFP(log, V, Ty); + else if (Len == 5 && !strcmp(Str, "log10") && V > 0) + return ConstantFoldFP(log10, V, Ty); + else if (!strcmp(Str, "llvm.sqrt.f32") || + !strcmp(Str, "llvm.sqrt.f64")) { + if (V >= -0.0) + return ConstantFoldFP(sqrt, V, Ty); + else // Undefined + return Constant::getNullValue(Ty); + } + break; + case 's': + if (Len == 3 && !strcmp(Str, "sin")) + return ConstantFoldFP(sin, V, Ty); + else if (Len == 4 && !strcmp(Str, "sinh")) + return ConstantFoldFP(sinh, V, Ty); + else if (Len == 4 && !strcmp(Str, "sqrt") && V >= 0) + return ConstantFoldFP(sqrt, V, Ty); + else if (Len == 5 && !strcmp(Str, "sqrtf") && V >= 0) + return ConstantFoldFP(sqrt, V, Ty); + else if (Len == 4 && !strcmp(Str, "sinf")) + return ConstantFoldFP(sin, V, Ty); + break; + case 't': + if (Len == 3 && !strcmp(Str, "tan")) + return ConstantFoldFP(tan, V, Ty); + else if (Len == 4 && !strcmp(Str, "tanh")) + return ConstantFoldFP(tanh, V, Ty); + break; + default: + break; + } + } else if (ConstantInt *Op = dyn_cast(Operands[0])) { + if (Len > 11 && !memcmp(Str, "llvm.bswap", 10)) + return ConstantInt::get(Op->getValue().byteSwap()); + else if (Len > 11 && !memcmp(Str, "llvm.ctpop", 10)) + return ConstantInt::get(Ty, Op->getValue().countPopulation()); + else if (Len > 10 && !memcmp(Str, "llvm.cttz", 9)) + return ConstantInt::get(Ty, Op->getValue().countTrailingZeros()); + else if (Len > 10 && !memcmp(Str, "llvm.ctlz", 9)) + return ConstantInt::get(Ty, Op->getValue().countLeadingZeros()); + } + } else if (NumOperands == 2) { + if (ConstantFP *Op1 = dyn_cast(Operands[0])) { + if (Ty!=Type::FloatTy && Ty!=Type::DoubleTy) + return 0; + double Op1V = Ty==Type::FloatTy ? + (double)Op1->getValueAPF().convertToFloat(): + Op1->getValueAPF().convertToDouble(); + if (ConstantFP *Op2 = dyn_cast(Operands[1])) { + double Op2V = Ty==Type::FloatTy ? + (double)Op2->getValueAPF().convertToFloat(): + Op2->getValueAPF().convertToDouble(); + + if (Len == 3 && !strcmp(Str, "pow")) { + return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty); + } else if (Len == 4 && !strcmp(Str, "fmod")) { + return ConstantFoldBinaryFP(fmod, Op1V, Op2V, Ty); + } else if (Len == 5 && !strcmp(Str, "atan2")) { + return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty); + } + } else if (ConstantInt *Op2C = dyn_cast(Operands[1])) { + if (!strcmp(Str, "llvm.powi.f32")) { + return ConstantFP::get(APFloat((float)std::pow((float)Op1V, + (int)Op2C->getZExtValue()))); + } else if (!strcmp(Str, "llvm.powi.f64")) { + return ConstantFP::get(APFloat((double)std::pow((double)Op1V, + (int)Op2C->getZExtValue()))); + } + } + } + } + return 0; +} + diff --git a/lib/Analysis/DbgInfoPrinter.cpp b/lib/Analysis/DbgInfoPrinter.cpp new file mode 100644 index 000000000000..d80d5811e096 --- /dev/null +++ b/lib/Analysis/DbgInfoPrinter.cpp @@ -0,0 +1,167 @@ +//===- DbgInfoPrinter.cpp - Print debug info in a human readable form ------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a pass that prints instructions, and associated debug +// info: +// +// - source/line/col information +// - original variable name +// - original type name +// +//===----------------------------------------------------------------------===// + +#include "llvm/Pass.h" +#include "llvm/Function.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/Analysis/DebugInfo.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +static cl::opt +PrintDirectory("print-fullpath", + cl::desc("Print fullpath when printing debug info"), + cl::Hidden); + +namespace { + class VISIBILITY_HIDDEN PrintDbgInfo : public FunctionPass { + raw_ostream &Out; + void printStopPoint(const DbgStopPointInst *DSI); + void printFuncStart(const DbgFuncStartInst *FS); + void printVariableDeclaration(const Value *V); + public: + static char ID; // Pass identification + PrintDbgInfo() : FunctionPass(&ID), Out(outs()) {} + + virtual bool runOnFunction(Function &F); + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + } + }; + char PrintDbgInfo::ID = 0; + static RegisterPass X("print-dbginfo", + "Print debug info in human readable form"); +} + +FunctionPass *llvm::createDbgInfoPrinterPass() { return new PrintDbgInfo(); } + +void PrintDbgInfo::printVariableDeclaration(const Value *V) { + std::string DisplayName, File, Directory, Type; + unsigned LineNo; + + if (!getLocationInfo(V, DisplayName, Type, LineNo, File, Directory)) + return; + + Out << "; "; + WriteAsOperand(Out, V, false, 0); + Out << " is variable " << DisplayName + << " of type " << Type << " declared at "; + + if (PrintDirectory) + Out << Directory << "/"; + + Out << File << ":" << LineNo << "\n"; +} + +void PrintDbgInfo::printStopPoint(const DbgStopPointInst *DSI) { + if (PrintDirectory) { + std::string dir; + GetConstantStringInfo(DSI->getDirectory(), dir); + Out << dir << "/"; + } + + std::string file; + GetConstantStringInfo(DSI->getFileName(), file); + Out << file << ":" << DSI->getLine(); + + if (unsigned Col = DSI->getColumn()) + Out << ":" << Col; +} + +void PrintDbgInfo::printFuncStart(const DbgFuncStartInst *FS) { + DISubprogram Subprogram(cast(FS->getSubprogram())); + std::string Res1, Res2; + Out << "; fully qualified function name: " << Subprogram.getDisplayName(Res1) + << " return type: " << Subprogram.getType().getName(Res2) + << " at line " << Subprogram.getLineNumber() + << "\n\n"; +} + +bool PrintDbgInfo::runOnFunction(Function &F) { + if (F.isDeclaration()) + return false; + + Out << "function " << F.getName() << "\n\n"; + + for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) { + BasicBlock *BB = I; + + if (I != F.begin() && (pred_begin(BB) == pred_end(BB))) + // Skip dead blocks. + continue; + + const DbgStopPointInst *DSI = findBBStopPoint(BB); + Out << BB->getName(); + Out << ":"; + + if (DSI) { + Out << "; ("; + printStopPoint(DSI); + Out << ")"; + } + + Out << "\n"; + + // A dbgstoppoint's information is valid until we encounter a new one. + const DbgStopPointInst *LastDSP = DSI; + bool Printed = DSI != 0; + for (BasicBlock::const_iterator i = BB->begin(), e = BB->end(); + i != e; ++i) { + if (isa(i)) { + if ((DSI = dyn_cast(i))) { + if (DSI->getContext() == LastDSP->getContext() && + DSI->getLineValue() == LastDSP->getLineValue() && + DSI->getColumnValue() == LastDSP->getColumnValue()) + // Don't print same location twice. + continue; + + LastDSP = cast(i); + + // Don't print consecutive stoppoints, use a flag to know which one we + // printed. + Printed = false; + } else if (const DbgFuncStartInst *FS = dyn_cast(i)) { + printFuncStart(FS); + } + } else { + if (!Printed && LastDSP) { + Out << "; "; + printStopPoint(LastDSP); + Out << "\n"; + Printed = true; + } + + Out << *i; + printVariableDeclaration(i); + + if (const User *U = dyn_cast(i)) { + for(unsigned i=0;igetNumOperands();i++) + printVariableDeclaration(U->getOperand(i)); + } + } + } + } + + return false; +} diff --git a/lib/Analysis/DebugInfo.cpp b/lib/Analysis/DebugInfo.cpp new file mode 100644 index 000000000000..6bdb64c975cc --- /dev/null +++ b/lib/Analysis/DebugInfo.cpp @@ -0,0 +1,1079 @@ +//===--- DebugInfo.cpp - Debug Information Helper Classes -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the helper classes used to build and interpret debug +// information in LLVM IR form. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/DebugInfo.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Intrinsics.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Instructions.h" +#include "llvm/Module.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/Streams.h" + +using namespace llvm; +using namespace llvm::dwarf; + +//===----------------------------------------------------------------------===// +// DIDescriptor +//===----------------------------------------------------------------------===// + +/// ValidDebugInfo - Return true if V represents valid debug info value. +bool DIDescriptor::ValidDebugInfo(Value *V, CodeGenOpt::Level OptLevel) { + if (!V) + return false; + + GlobalVariable *GV = dyn_cast(V->stripPointerCasts()); + if (!GV) + return false; + + if (!GV->hasInternalLinkage () && !GV->hasLinkOnceLinkage()) + return false; + + DIDescriptor DI(GV); + + // Check current version. Allow Version6 for now. + unsigned Version = DI.getVersion(); + if (Version != LLVMDebugVersion && Version != LLVMDebugVersion6) + return false; + + unsigned Tag = DI.getTag(); + switch (Tag) { + case DW_TAG_variable: + assert(DIVariable(GV).Verify() && "Invalid DebugInfo value"); + break; + case DW_TAG_compile_unit: + assert(DICompileUnit(GV).Verify() && "Invalid DebugInfo value"); + break; + case DW_TAG_subprogram: + assert(DISubprogram(GV).Verify() && "Invalid DebugInfo value"); + break; + case DW_TAG_lexical_block: + // FIXME: This interfers with the quality of generated code during + // optimization. + if (OptLevel != CodeGenOpt::None) + return false; + // FALLTHROUGH + default: + break; + } + + return true; +} + +DIDescriptor::DIDescriptor(GlobalVariable *gv, unsigned RequiredTag) { + GV = gv; + + // If this is non-null, check to see if the Tag matches. If not, set to null. + if (GV && getTag() != RequiredTag) + GV = 0; +} + +const std::string & +DIDescriptor::getStringField(unsigned Elt, std::string &Result) const { + if (GV == 0) { + Result.clear(); + return Result; + } + + Constant *C = GV->getInitializer(); + if (C == 0 || Elt >= C->getNumOperands()) { + Result.clear(); + return Result; + } + + // Fills in the string if it succeeds + if (!GetConstantStringInfo(C->getOperand(Elt), Result)) + Result.clear(); + + return Result; +} + +uint64_t DIDescriptor::getUInt64Field(unsigned Elt) const { + if (GV == 0) return 0; + + Constant *C = GV->getInitializer(); + if (C == 0 || Elt >= C->getNumOperands()) + return 0; + + if (ConstantInt *CI = dyn_cast(C->getOperand(Elt))) + return CI->getZExtValue(); + return 0; +} + +DIDescriptor DIDescriptor::getDescriptorField(unsigned Elt) const { + if (GV == 0) return DIDescriptor(); + + Constant *C = GV->getInitializer(); + if (C == 0 || Elt >= C->getNumOperands()) + return DIDescriptor(); + + C = C->getOperand(Elt); + return DIDescriptor(dyn_cast(C->stripPointerCasts())); +} + +GlobalVariable *DIDescriptor::getGlobalVariableField(unsigned Elt) const { + if (GV == 0) return 0; + + Constant *C = GV->getInitializer(); + if (C == 0 || Elt >= C->getNumOperands()) + return 0; + + C = C->getOperand(Elt); + return dyn_cast(C->stripPointerCasts()); +} + +//===----------------------------------------------------------------------===// +// Simple Descriptor Constructors and other Methods +//===----------------------------------------------------------------------===// + +// Needed by DIVariable::getType(). +DIType::DIType(GlobalVariable *gv) : DIDescriptor(gv) { + if (!gv) return; + unsigned tag = getTag(); + if (tag != dwarf::DW_TAG_base_type && !DIDerivedType::isDerivedType(tag) && + !DICompositeType::isCompositeType(tag)) + GV = 0; +} + +/// isDerivedType - Return true if the specified tag is legal for +/// DIDerivedType. +bool DIType::isDerivedType(unsigned Tag) { + switch (Tag) { + case dwarf::DW_TAG_typedef: + case dwarf::DW_TAG_pointer_type: + case dwarf::DW_TAG_reference_type: + case dwarf::DW_TAG_const_type: + case dwarf::DW_TAG_volatile_type: + case dwarf::DW_TAG_restrict_type: + case dwarf::DW_TAG_member: + case dwarf::DW_TAG_inheritance: + return true; + default: + // FIXME: Even though it doesn't make sense, CompositeTypes are current + // modelled as DerivedTypes, this should return true for them as well. + return false; + } +} + +/// isCompositeType - Return true if the specified tag is legal for +/// DICompositeType. +bool DIType::isCompositeType(unsigned TAG) { + switch (TAG) { + case dwarf::DW_TAG_array_type: + case dwarf::DW_TAG_structure_type: + case dwarf::DW_TAG_union_type: + case dwarf::DW_TAG_enumeration_type: + case dwarf::DW_TAG_vector_type: + case dwarf::DW_TAG_subroutine_type: + case dwarf::DW_TAG_class_type: + return true; + default: + return false; + } +} + +/// isVariable - Return true if the specified tag is legal for DIVariable. +bool DIVariable::isVariable(unsigned Tag) { + switch (Tag) { + case dwarf::DW_TAG_auto_variable: + case dwarf::DW_TAG_arg_variable: + case dwarf::DW_TAG_return_variable: + return true; + default: + return false; + } +} + +unsigned DIArray::getNumElements() const { + assert (GV && "Invalid DIArray"); + Constant *C = GV->getInitializer(); + assert (C && "Invalid DIArray initializer"); + return C->getNumOperands(); +} + +/// Verify - Verify that a compile unit is well formed. +bool DICompileUnit::Verify() const { + if (isNull()) + return false; + std::string Res; + if (getFilename(Res).empty()) + return false; + // It is possible that directory and produce string is empty. + return true; +} + +/// Verify - Verify that a type descriptor is well formed. +bool DIType::Verify() const { + if (isNull()) + return false; + if (getContext().isNull()) + return false; + + DICompileUnit CU = getCompileUnit(); + if (!CU.isNull() && !CU.Verify()) + return false; + return true; +} + +/// Verify - Verify that a composite type descriptor is well formed. +bool DICompositeType::Verify() const { + if (isNull()) + return false; + if (getContext().isNull()) + return false; + + DICompileUnit CU = getCompileUnit(); + if (!CU.isNull() && !CU.Verify()) + return false; + return true; +} + +/// Verify - Verify that a subprogram descriptor is well formed. +bool DISubprogram::Verify() const { + if (isNull()) + return false; + + if (getContext().isNull()) + return false; + + DICompileUnit CU = getCompileUnit(); + if (!CU.Verify()) + return false; + + DICompositeType Ty = getType(); + if (!Ty.isNull() && !Ty.Verify()) + return false; + return true; +} + +/// Verify - Verify that a global variable descriptor is well formed. +bool DIGlobalVariable::Verify() const { + if (isNull()) + return false; + + if (getContext().isNull()) + return false; + + DICompileUnit CU = getCompileUnit(); + if (!CU.isNull() && !CU.Verify()) + return false; + + DIType Ty = getType(); + if (!Ty.Verify()) + return false; + + if (!getGlobal()) + return false; + + return true; +} + +/// Verify - Verify that a variable descriptor is well formed. +bool DIVariable::Verify() const { + if (isNull()) + return false; + + if (getContext().isNull()) + return false; + + DIType Ty = getType(); + if (!Ty.Verify()) + return false; + + return true; +} + +/// getOriginalTypeSize - If this type is derived from a base type then +/// return base type size. +uint64_t DIDerivedType::getOriginalTypeSize() const { + if (getTag() != dwarf::DW_TAG_member) + return getSizeInBits(); + DIType BT = getTypeDerivedFrom(); + if (BT.getTag() != dwarf::DW_TAG_base_type) + return getSizeInBits(); + return BT.getSizeInBits(); +} + +/// describes - Return true if this subprogram provides debugging +/// information for the function F. +bool DISubprogram::describes(const Function *F) { + assert (F && "Invalid function"); + std::string Name; + getLinkageName(Name); + if (Name.empty()) + getName(Name); + if (!Name.empty() && (strcmp(Name.c_str(), F->getNameStart()) == false)) + return true; + return false; +} + +//===----------------------------------------------------------------------===// +// DIFactory: Basic Helpers +//===----------------------------------------------------------------------===// + +DIFactory::DIFactory(Module &m) + : M(m), StopPointFn(0), FuncStartFn(0), RegionStartFn(0), RegionEndFn(0), + DeclareFn(0) { + EmptyStructPtr = PointerType::getUnqual(StructType::get(NULL, NULL)); +} + +/// getCastToEmpty - Return this descriptor as a Constant* with type '{}*'. +/// This is only valid when the descriptor is non-null. +Constant *DIFactory::getCastToEmpty(DIDescriptor D) { + if (D.isNull()) return Constant::getNullValue(EmptyStructPtr); + return ConstantExpr::getBitCast(D.getGV(), EmptyStructPtr); +} + +Constant *DIFactory::GetTagConstant(unsigned TAG) { + assert((TAG & LLVMDebugVersionMask) == 0 && + "Tag too large for debug encoding!"); + return ConstantInt::get(Type::Int32Ty, TAG | LLVMDebugVersion); +} + +Constant *DIFactory::GetStringConstant(const std::string &String) { + // Check string cache for previous edition. + Constant *&Slot = StringCache[String]; + + // Return Constant if previously defined. + if (Slot) return Slot; + + const PointerType *DestTy = PointerType::getUnqual(Type::Int8Ty); + + // If empty string then use a sbyte* null instead. + if (String.empty()) + return Slot = ConstantPointerNull::get(DestTy); + + // Construct string as an llvm constant. + Constant *ConstStr = ConstantArray::get(String); + + // Otherwise create and return a new string global. + GlobalVariable *StrGV = new GlobalVariable(ConstStr->getType(), true, + GlobalVariable::InternalLinkage, + ConstStr, ".str", &M); + StrGV->setSection("llvm.metadata"); + return Slot = ConstantExpr::getBitCast(StrGV, DestTy); +} + +/// GetOrCreateAnchor - Look up an anchor for the specified tag and name. If it +/// already exists, return it. If not, create a new one and return it. +DIAnchor DIFactory::GetOrCreateAnchor(unsigned TAG, const char *Name) { + const Type *EltTy = StructType::get(Type::Int32Ty, Type::Int32Ty, NULL); + + // Otherwise, create the global or return it if already in the module. + Constant *C = M.getOrInsertGlobal(Name, EltTy); + assert(isa(C) && "Incorrectly typed anchor?"); + GlobalVariable *GV = cast(C); + + // If it has an initializer, it is already in the module. + if (GV->hasInitializer()) + return SubProgramAnchor = DIAnchor(GV); + + GV->setLinkage(GlobalValue::LinkOnceAnyLinkage); + GV->setSection("llvm.metadata"); + GV->setConstant(true); + M.addTypeName("llvm.dbg.anchor.type", EltTy); + + // Otherwise, set the initializer. + Constant *Elts[] = { + GetTagConstant(dwarf::DW_TAG_anchor), + ConstantInt::get(Type::Int32Ty, TAG) + }; + + GV->setInitializer(ConstantStruct::get(Elts, 2)); + return DIAnchor(GV); +} + + + +//===----------------------------------------------------------------------===// +// DIFactory: Primary Constructors +//===----------------------------------------------------------------------===// + +/// GetOrCreateCompileUnitAnchor - Return the anchor for compile units, +/// creating a new one if there isn't already one in the module. +DIAnchor DIFactory::GetOrCreateCompileUnitAnchor() { + // If we already created one, just return it. + if (!CompileUnitAnchor.isNull()) + return CompileUnitAnchor; + return CompileUnitAnchor = GetOrCreateAnchor(dwarf::DW_TAG_compile_unit, + "llvm.dbg.compile_units"); +} + +/// GetOrCreateSubprogramAnchor - Return the anchor for subprograms, +/// creating a new one if there isn't already one in the module. +DIAnchor DIFactory::GetOrCreateSubprogramAnchor() { + // If we already created one, just return it. + if (!SubProgramAnchor.isNull()) + return SubProgramAnchor; + return SubProgramAnchor = GetOrCreateAnchor(dwarf::DW_TAG_subprogram, + "llvm.dbg.subprograms"); +} + +/// GetOrCreateGlobalVariableAnchor - Return the anchor for globals, +/// creating a new one if there isn't already one in the module. +DIAnchor DIFactory::GetOrCreateGlobalVariableAnchor() { + // If we already created one, just return it. + if (!GlobalVariableAnchor.isNull()) + return GlobalVariableAnchor; + return GlobalVariableAnchor = GetOrCreateAnchor(dwarf::DW_TAG_variable, + "llvm.dbg.global_variables"); +} + +/// GetOrCreateArray - Create an descriptor for an array of descriptors. +/// This implicitly uniques the arrays created. +DIArray DIFactory::GetOrCreateArray(DIDescriptor *Tys, unsigned NumTys) { + SmallVector Elts; + + for (unsigned i = 0; i != NumTys; ++i) + Elts.push_back(getCastToEmpty(Tys[i])); + + Constant *Init = ConstantArray::get(ArrayType::get(EmptyStructPtr, + Elts.size()), + Elts.data(), Elts.size()); + // If we already have this array, just return the uniqued version. + DIDescriptor &Entry = SimpleConstantCache[Init]; + if (!Entry.isNull()) return DIArray(Entry.getGV()); + + GlobalVariable *GV = new GlobalVariable(Init->getType(), true, + GlobalValue::InternalLinkage, + Init, "llvm.dbg.array", &M); + GV->setSection("llvm.metadata"); + Entry = DIDescriptor(GV); + return DIArray(GV); +} + +/// GetOrCreateSubrange - Create a descriptor for a value range. This +/// implicitly uniques the values returned. +DISubrange DIFactory::GetOrCreateSubrange(int64_t Lo, int64_t Hi) { + Constant *Elts[] = { + GetTagConstant(dwarf::DW_TAG_subrange_type), + ConstantInt::get(Type::Int64Ty, Lo), + ConstantInt::get(Type::Int64Ty, Hi) + }; + + Constant *Init = ConstantStruct::get(Elts, sizeof(Elts)/sizeof(Elts[0])); + + // If we already have this range, just return the uniqued version. + DIDescriptor &Entry = SimpleConstantCache[Init]; + if (!Entry.isNull()) return DISubrange(Entry.getGV()); + + M.addTypeName("llvm.dbg.subrange.type", Init->getType()); + + GlobalVariable *GV = new GlobalVariable(Init->getType(), true, + GlobalValue::InternalLinkage, + Init, "llvm.dbg.subrange", &M); + GV->setSection("llvm.metadata"); + Entry = DIDescriptor(GV); + return DISubrange(GV); +} + + + +/// CreateCompileUnit - Create a new descriptor for the specified compile +/// unit. Note that this does not unique compile units within the module. +DICompileUnit DIFactory::CreateCompileUnit(unsigned LangID, + const std::string &Filename, + const std::string &Directory, + const std::string &Producer, + bool isMain, + bool isOptimized, + const char *Flags, + unsigned RunTimeVer) { + Constant *Elts[] = { + GetTagConstant(dwarf::DW_TAG_compile_unit), + getCastToEmpty(GetOrCreateCompileUnitAnchor()), + ConstantInt::get(Type::Int32Ty, LangID), + GetStringConstant(Filename), + GetStringConstant(Directory), + GetStringConstant(Producer), + ConstantInt::get(Type::Int1Ty, isMain), + ConstantInt::get(Type::Int1Ty, isOptimized), + GetStringConstant(Flags), + ConstantInt::get(Type::Int32Ty, RunTimeVer) + }; + + Constant *Init = ConstantStruct::get(Elts, sizeof(Elts)/sizeof(Elts[0])); + + M.addTypeName("llvm.dbg.compile_unit.type", Init->getType()); + GlobalVariable *GV = new GlobalVariable(Init->getType(), true, + GlobalValue::InternalLinkage, + Init, "llvm.dbg.compile_unit", &M); + GV->setSection("llvm.metadata"); + return DICompileUnit(GV); +} + +/// CreateEnumerator - Create a single enumerator value. +DIEnumerator DIFactory::CreateEnumerator(const std::string &Name, uint64_t Val){ + Constant *Elts[] = { + GetTagConstant(dwarf::DW_TAG_enumerator), + GetStringConstant(Name), + ConstantInt::get(Type::Int64Ty, Val) + }; + + Constant *Init = ConstantStruct::get(Elts, sizeof(Elts)/sizeof(Elts[0])); + + M.addTypeName("llvm.dbg.enumerator.type", Init->getType()); + GlobalVariable *GV = new GlobalVariable(Init->getType(), true, + GlobalValue::InternalLinkage, + Init, "llvm.dbg.enumerator", &M); + GV->setSection("llvm.metadata"); + return DIEnumerator(GV); +} + + +/// CreateBasicType - Create a basic type like int, float, etc. +DIBasicType DIFactory::CreateBasicType(DIDescriptor Context, + const std::string &Name, + DICompileUnit CompileUnit, + unsigned LineNumber, + uint64_t SizeInBits, + uint64_t AlignInBits, + uint64_t OffsetInBits, unsigned Flags, + unsigned Encoding) { + Constant *Elts[] = { + GetTagConstant(dwarf::DW_TAG_base_type), + getCastToEmpty(Context), + GetStringConstant(Name), + getCastToEmpty(CompileUnit), + ConstantInt::get(Type::Int32Ty, LineNumber), + ConstantInt::get(Type::Int64Ty, SizeInBits), + ConstantInt::get(Type::Int64Ty, AlignInBits), + ConstantInt::get(Type::Int64Ty, OffsetInBits), + ConstantInt::get(Type::Int32Ty, Flags), + ConstantInt::get(Type::Int32Ty, Encoding) + }; + + Constant *Init = ConstantStruct::get(Elts, sizeof(Elts)/sizeof(Elts[0])); + + M.addTypeName("llvm.dbg.basictype.type", Init->getType()); + GlobalVariable *GV = new GlobalVariable(Init->getType(), true, + GlobalValue::InternalLinkage, + Init, "llvm.dbg.basictype", &M); + GV->setSection("llvm.metadata"); + return DIBasicType(GV); +} + +/// CreateDerivedType - Create a derived type like const qualified type, +/// pointer, typedef, etc. +DIDerivedType DIFactory::CreateDerivedType(unsigned Tag, + DIDescriptor Context, + const std::string &Name, + DICompileUnit CompileUnit, + unsigned LineNumber, + uint64_t SizeInBits, + uint64_t AlignInBits, + uint64_t OffsetInBits, + unsigned Flags, + DIType DerivedFrom) { + Constant *Elts[] = { + GetTagConstant(Tag), + getCastToEmpty(Context), + GetStringConstant(Name), + getCastToEmpty(CompileUnit), + ConstantInt::get(Type::Int32Ty, LineNumber), + ConstantInt::get(Type::Int64Ty, SizeInBits), + ConstantInt::get(Type::Int64Ty, AlignInBits), + ConstantInt::get(Type::Int64Ty, OffsetInBits), + ConstantInt::get(Type::Int32Ty, Flags), + getCastToEmpty(DerivedFrom) + }; + + Constant *Init = ConstantStruct::get(Elts, sizeof(Elts)/sizeof(Elts[0])); + + M.addTypeName("llvm.dbg.derivedtype.type", Init->getType()); + GlobalVariable *GV = new GlobalVariable(Init->getType(), true, + GlobalValue::InternalLinkage, + Init, "llvm.dbg.derivedtype", &M); + GV->setSection("llvm.metadata"); + return DIDerivedType(GV); +} + +/// CreateCompositeType - Create a composite type like array, struct, etc. +DICompositeType DIFactory::CreateCompositeType(unsigned Tag, + DIDescriptor Context, + const std::string &Name, + DICompileUnit CompileUnit, + unsigned LineNumber, + uint64_t SizeInBits, + uint64_t AlignInBits, + uint64_t OffsetInBits, + unsigned Flags, + DIType DerivedFrom, + DIArray Elements, + unsigned RuntimeLang) { + + Constant *Elts[] = { + GetTagConstant(Tag), + getCastToEmpty(Context), + GetStringConstant(Name), + getCastToEmpty(CompileUnit), + ConstantInt::get(Type::Int32Ty, LineNumber), + ConstantInt::get(Type::Int64Ty, SizeInBits), + ConstantInt::get(Type::Int64Ty, AlignInBits), + ConstantInt::get(Type::Int64Ty, OffsetInBits), + ConstantInt::get(Type::Int32Ty, Flags), + getCastToEmpty(DerivedFrom), + getCastToEmpty(Elements), + ConstantInt::get(Type::Int32Ty, RuntimeLang) + }; + + Constant *Init = ConstantStruct::get(Elts, sizeof(Elts)/sizeof(Elts[0])); + + M.addTypeName("llvm.dbg.composite.type", Init->getType()); + GlobalVariable *GV = new GlobalVariable(Init->getType(), true, + GlobalValue::InternalLinkage, + Init, "llvm.dbg.composite", &M); + GV->setSection("llvm.metadata"); + return DICompositeType(GV); +} + + +/// CreateSubprogram - Create a new descriptor for the specified subprogram. +/// See comments in DISubprogram for descriptions of these fields. This +/// method does not unique the generated descriptors. +DISubprogram DIFactory::CreateSubprogram(DIDescriptor Context, + const std::string &Name, + const std::string &DisplayName, + const std::string &LinkageName, + DICompileUnit CompileUnit, + unsigned LineNo, DIType Type, + bool isLocalToUnit, + bool isDefinition) { + + Constant *Elts[] = { + GetTagConstant(dwarf::DW_TAG_subprogram), + getCastToEmpty(GetOrCreateSubprogramAnchor()), + getCastToEmpty(Context), + GetStringConstant(Name), + GetStringConstant(DisplayName), + GetStringConstant(LinkageName), + getCastToEmpty(CompileUnit), + ConstantInt::get(Type::Int32Ty, LineNo), + getCastToEmpty(Type), + ConstantInt::get(Type::Int1Ty, isLocalToUnit), + ConstantInt::get(Type::Int1Ty, isDefinition) + }; + + Constant *Init = ConstantStruct::get(Elts, sizeof(Elts)/sizeof(Elts[0])); + + M.addTypeName("llvm.dbg.subprogram.type", Init->getType()); + GlobalVariable *GV = new GlobalVariable(Init->getType(), true, + GlobalValue::InternalLinkage, + Init, "llvm.dbg.subprogram", &M); + GV->setSection("llvm.metadata"); + return DISubprogram(GV); +} + +/// CreateGlobalVariable - Create a new descriptor for the specified global. +DIGlobalVariable +DIFactory::CreateGlobalVariable(DIDescriptor Context, const std::string &Name, + const std::string &DisplayName, + const std::string &LinkageName, + DICompileUnit CompileUnit, + unsigned LineNo, DIType Type,bool isLocalToUnit, + bool isDefinition, llvm::GlobalVariable *Val) { + Constant *Elts[] = { + GetTagConstant(dwarf::DW_TAG_variable), + getCastToEmpty(GetOrCreateGlobalVariableAnchor()), + getCastToEmpty(Context), + GetStringConstant(Name), + GetStringConstant(DisplayName), + GetStringConstant(LinkageName), + getCastToEmpty(CompileUnit), + ConstantInt::get(Type::Int32Ty, LineNo), + getCastToEmpty(Type), + ConstantInt::get(Type::Int1Ty, isLocalToUnit), + ConstantInt::get(Type::Int1Ty, isDefinition), + ConstantExpr::getBitCast(Val, EmptyStructPtr) + }; + + Constant *Init = ConstantStruct::get(Elts, sizeof(Elts)/sizeof(Elts[0])); + + M.addTypeName("llvm.dbg.global_variable.type", Init->getType()); + GlobalVariable *GV = new GlobalVariable(Init->getType(), true, + GlobalValue::InternalLinkage, + Init, "llvm.dbg.global_variable", &M); + GV->setSection("llvm.metadata"); + return DIGlobalVariable(GV); +} + + +/// CreateVariable - Create a new descriptor for the specified variable. +DIVariable DIFactory::CreateVariable(unsigned Tag, DIDescriptor Context, + const std::string &Name, + DICompileUnit CompileUnit, unsigned LineNo, + DIType Type) { + Constant *Elts[] = { + GetTagConstant(Tag), + getCastToEmpty(Context), + GetStringConstant(Name), + getCastToEmpty(CompileUnit), + ConstantInt::get(Type::Int32Ty, LineNo), + getCastToEmpty(Type) + }; + + Constant *Init = ConstantStruct::get(Elts, sizeof(Elts)/sizeof(Elts[0])); + + M.addTypeName("llvm.dbg.variable.type", Init->getType()); + GlobalVariable *GV = new GlobalVariable(Init->getType(), true, + GlobalValue::InternalLinkage, + Init, "llvm.dbg.variable", &M); + GV->setSection("llvm.metadata"); + return DIVariable(GV); +} + + +/// CreateBlock - This creates a descriptor for a lexical block with the +/// specified parent context. +DIBlock DIFactory::CreateBlock(DIDescriptor Context) { + Constant *Elts[] = { + GetTagConstant(dwarf::DW_TAG_lexical_block), + getCastToEmpty(Context) + }; + + Constant *Init = ConstantStruct::get(Elts, sizeof(Elts)/sizeof(Elts[0])); + + M.addTypeName("llvm.dbg.block.type", Init->getType()); + GlobalVariable *GV = new GlobalVariable(Init->getType(), true, + GlobalValue::InternalLinkage, + Init, "llvm.dbg.block", &M); + GV->setSection("llvm.metadata"); + return DIBlock(GV); +} + + +//===----------------------------------------------------------------------===// +// DIFactory: Routines for inserting code into a function +//===----------------------------------------------------------------------===// + +/// InsertStopPoint - Create a new llvm.dbg.stoppoint intrinsic invocation, +/// inserting it at the end of the specified basic block. +void DIFactory::InsertStopPoint(DICompileUnit CU, unsigned LineNo, + unsigned ColNo, BasicBlock *BB) { + + // Lazily construct llvm.dbg.stoppoint function. + if (!StopPointFn) + StopPointFn = llvm::Intrinsic::getDeclaration(&M, + llvm::Intrinsic::dbg_stoppoint); + + // Invoke llvm.dbg.stoppoint + Value *Args[] = { + llvm::ConstantInt::get(llvm::Type::Int32Ty, LineNo), + llvm::ConstantInt::get(llvm::Type::Int32Ty, ColNo), + getCastToEmpty(CU) + }; + CallInst::Create(StopPointFn, Args, Args+3, "", BB); +} + +/// InsertSubprogramStart - Create a new llvm.dbg.func.start intrinsic to +/// mark the start of the specified subprogram. +void DIFactory::InsertSubprogramStart(DISubprogram SP, BasicBlock *BB) { + // Lazily construct llvm.dbg.func.start. + if (!FuncStartFn) + FuncStartFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_func_start); + + // Call llvm.dbg.func.start which also implicitly sets a stoppoint. + CallInst::Create(FuncStartFn, getCastToEmpty(SP), "", BB); +} + +/// InsertRegionStart - Insert a new llvm.dbg.region.start intrinsic call to +/// mark the start of a region for the specified scoping descriptor. +void DIFactory::InsertRegionStart(DIDescriptor D, BasicBlock *BB) { + // Lazily construct llvm.dbg.region.start function. + if (!RegionStartFn) + RegionStartFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_region_start); + + // Call llvm.dbg.func.start. + CallInst::Create(RegionStartFn, getCastToEmpty(D), "", BB); +} + +/// InsertRegionEnd - Insert a new llvm.dbg.region.end intrinsic call to +/// mark the end of a region for the specified scoping descriptor. +void DIFactory::InsertRegionEnd(DIDescriptor D, BasicBlock *BB) { + // Lazily construct llvm.dbg.region.end function. + if (!RegionEndFn) + RegionEndFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_region_end); + + // Call llvm.dbg.region.end. + CallInst::Create(RegionEndFn, getCastToEmpty(D), "", BB); +} + +/// InsertDeclare - Insert a new llvm.dbg.declare intrinsic call. +void DIFactory::InsertDeclare(Value *Storage, DIVariable D, BasicBlock *BB) { + // Cast the storage to a {}* for the call to llvm.dbg.declare. + Storage = new BitCastInst(Storage, EmptyStructPtr, "", BB); + + if (!DeclareFn) + DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare); + + Value *Args[] = { Storage, getCastToEmpty(D) }; + CallInst::Create(DeclareFn, Args, Args+2, "", BB); +} + +namespace llvm { + /// findStopPoint - Find the stoppoint coressponding to this instruction, that + /// is the stoppoint that dominates this instruction. + const DbgStopPointInst *findStopPoint(const Instruction *Inst) { + if (const DbgStopPointInst *DSI = dyn_cast(Inst)) + return DSI; + + const BasicBlock *BB = Inst->getParent(); + BasicBlock::const_iterator I = Inst, B; + while (BB) { + B = BB->begin(); + + // A BB consisting only of a terminator can't have a stoppoint. + while (I != B) { + --I; + if (const DbgStopPointInst *DSI = dyn_cast(I)) + return DSI; + } + + // This BB didn't have a stoppoint: if there is only one predecessor, look + // for a stoppoint there. We could use getIDom(), but that would require + // dominator info. + BB = I->getParent()->getUniquePredecessor(); + if (BB) + I = BB->getTerminator(); + } + + return 0; + } + + /// findBBStopPoint - Find the stoppoint corresponding to first real + /// (non-debug intrinsic) instruction in this Basic Block, and return the + /// stoppoint for it. + const DbgStopPointInst *findBBStopPoint(const BasicBlock *BB) { + for(BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) + if (const DbgStopPointInst *DSI = dyn_cast(I)) + return DSI; + + // Fallback to looking for stoppoint of unique predecessor. Useful if this + // BB contains no stoppoints, but unique predecessor does. + BB = BB->getUniquePredecessor(); + if (BB) + return findStopPoint(BB->getTerminator()); + + return 0; + } + + Value *findDbgGlobalDeclare(GlobalVariable *V) { + const Module *M = V->getParent(); + const Type *Ty = M->getTypeByName("llvm.dbg.global_variable.type"); + if (!Ty) return 0; + + Ty = PointerType::get(Ty, 0); + + Value *Val = V->stripPointerCasts(); + for (Value::use_iterator I = Val->use_begin(), E = Val->use_end(); + I != E; ++I) { + if (ConstantExpr *CE = dyn_cast(I)) { + if (CE->getOpcode() == Instruction::BitCast) { + Value *VV = CE; + + while (VV->hasOneUse()) + VV = *VV->use_begin(); + + if (VV->getType() == Ty) + return VV; + } + } + } + + if (Val->getType() == Ty) + return Val; + + return 0; + } + + /// Finds the llvm.dbg.declare intrinsic corresponding to this value if any. + /// It looks through pointer casts too. + const DbgDeclareInst *findDbgDeclare(const Value *V, bool stripCasts) { + if (stripCasts) { + V = V->stripPointerCasts(); + + // Look for the bitcast. + for (Value::use_const_iterator I = V->use_begin(), E =V->use_end(); + I != E; ++I) + if (isa(I)) + return findDbgDeclare(*I, false); + + return 0; + } + + // Find llvm.dbg.declare among uses of the instruction. + for (Value::use_const_iterator I = V->use_begin(), E =V->use_end(); + I != E; ++I) + if (const DbgDeclareInst *DDI = dyn_cast(I)) + return DDI; + + return 0; + } + + bool getLocationInfo(const Value *V, std::string &DisplayName, + std::string &Type, unsigned &LineNo, std::string &File, + std::string &Dir) { + DICompileUnit Unit; + DIType TypeD; + + if (GlobalVariable *GV = dyn_cast(const_cast(V))) { + Value *DIGV = findDbgGlobalDeclare(GV); + if (!DIGV) return false; + DIGlobalVariable Var(cast(DIGV)); + + Var.getDisplayName(DisplayName); + LineNo = Var.getLineNumber(); + Unit = Var.getCompileUnit(); + TypeD = Var.getType(); + } else { + const DbgDeclareInst *DDI = findDbgDeclare(V); + if (!DDI) return false; + DIVariable Var(cast(DDI->getVariable())); + + Var.getName(DisplayName); + LineNo = Var.getLineNumber(); + Unit = Var.getCompileUnit(); + TypeD = Var.getType(); + } + + TypeD.getName(Type); + Unit.getFilename(File); + Unit.getDirectory(Dir); + return true; + } +} + +/// dump - Print descriptor. +void DIDescriptor::dump() const { + cerr << "[" << dwarf::TagString(getTag()) << "] "; + cerr << std::hex << "[GV:" << GV << "]" << std::dec; +} + +/// dump - Print compile unit. +void DICompileUnit::dump() const { + if (getLanguage()) + cerr << " [" << dwarf::LanguageString(getLanguage()) << "] "; + + std::string Res1, Res2; + cerr << " [" << getDirectory(Res1) << "/" << getFilename(Res2) << " ]"; +} + +/// dump - Print type. +void DIType::dump() const { + if (isNull()) return; + + std::string Res; + if (!getName(Res).empty()) + cerr << " [" << Res << "] "; + + unsigned Tag = getTag(); + cerr << " [" << dwarf::TagString(Tag) << "] "; + + // TODO : Print context + getCompileUnit().dump(); + cerr << " [" + << getLineNumber() << ", " + << getSizeInBits() << ", " + << getAlignInBits() << ", " + << getOffsetInBits() + << "] "; + + if (isPrivate()) + cerr << " [private] "; + else if (isProtected()) + cerr << " [protected] "; + + if (isForwardDecl()) + cerr << " [fwd] "; + + if (isBasicType(Tag)) + DIBasicType(GV).dump(); + else if (isDerivedType(Tag)) + DIDerivedType(GV).dump(); + else if (isCompositeType(Tag)) + DICompositeType(GV).dump(); + else { + cerr << "Invalid DIType\n"; + return; + } + + cerr << "\n"; +} + +/// dump - Print basic type. +void DIBasicType::dump() const { + cerr << " [" << dwarf::AttributeEncodingString(getEncoding()) << "] "; +} + +/// dump - Print derived type. +void DIDerivedType::dump() const { + cerr << "\n\t Derived From: "; getTypeDerivedFrom().dump(); +} + +/// dump - Print composite type. +void DICompositeType::dump() const { + DIArray A = getTypeArray(); + if (A.isNull()) + return; + cerr << " [" << A.getNumElements() << " elements]"; +} + +/// dump - Print global. +void DIGlobal::dump() const { + std::string Res; + if (!getName(Res).empty()) + cerr << " [" << Res << "] "; + + unsigned Tag = getTag(); + cerr << " [" << dwarf::TagString(Tag) << "] "; + + // TODO : Print context + getCompileUnit().dump(); + cerr << " [" << getLineNumber() << "] "; + + if (isLocalToUnit()) + cerr << " [local] "; + + if (isDefinition()) + cerr << " [def] "; + + if (isGlobalVariable(Tag)) + DIGlobalVariable(GV).dump(); + + cerr << "\n"; +} + +/// dump - Print subprogram. +void DISubprogram::dump() const { + DIGlobal::dump(); +} + +/// dump - Print global variable. +void DIGlobalVariable::dump() const { + cerr << " ["; getGlobal()->dump(); cerr << "] "; +} + +/// dump - Print variable. +void DIVariable::dump() const { + std::string Res; + if (!getName(Res).empty()) + cerr << " [" << Res << "] "; + + getCompileUnit().dump(); + cerr << " [" << getLineNumber() << "] "; + getType().dump(); + cerr << "\n"; +} diff --git a/lib/Analysis/IPA/Andersens.cpp b/lib/Analysis/IPA/Andersens.cpp new file mode 100644 index 000000000000..8584d06f7a7b --- /dev/null +++ b/lib/Analysis/IPA/Andersens.cpp @@ -0,0 +1,2878 @@ +//===- Andersens.cpp - Andersen's Interprocedural Alias Analysis ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines an implementation of Andersen's interprocedural alias +// analysis +// +// In pointer analysis terms, this is a subset-based, flow-insensitive, +// field-sensitive, and context-insensitive algorithm pointer algorithm. +// +// This algorithm is implemented as three stages: +// 1. Object identification. +// 2. Inclusion constraint identification. +// 3. Offline constraint graph optimization +// 4. Inclusion constraint solving. +// +// The object identification stage identifies all of the memory objects in the +// program, which includes globals, heap allocated objects, and stack allocated +// objects. +// +// The inclusion constraint identification stage finds all inclusion constraints +// in the program by scanning the program, looking for pointer assignments and +// other statements that effect the points-to graph. For a statement like "A = +// B", this statement is processed to indicate that A can point to anything that +// B can point to. Constraints can handle copies, loads, and stores, and +// address taking. +// +// The offline constraint graph optimization portion includes offline variable +// substitution algorithms intended to compute pointer and location +// equivalences. Pointer equivalences are those pointers that will have the +// same points-to sets, and location equivalences are those variables that +// always appear together in points-to sets. It also includes an offline +// cycle detection algorithm that allows cycles to be collapsed sooner +// during solving. +// +// The inclusion constraint solving phase iteratively propagates the inclusion +// constraints until a fixed point is reached. This is an O(N^3) algorithm. +// +// Function constraints are handled as if they were structs with X fields. +// Thus, an access to argument X of function Y is an access to node index +// getNode(Y) + X. This representation allows handling of indirect calls +// without any issues. To wit, an indirect call Y(a,b) is equivalent to +// *(Y + 1) = a, *(Y + 2) = b. +// The return node for a function is always located at getNode(F) + +// CallReturnPos. The arguments start at getNode(F) + CallArgPos. +// +// Future Improvements: +// Use of BDD's. +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "anders-aa" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Instructions.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/InstIterator.h" +#include "llvm/Support/InstVisitor.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/SparseBitVector.h" +#include "llvm/ADT/DenseSet.h" +#include +#include +#include +#include +#include +#include +#include + +// Determining the actual set of nodes the universal set can consist of is very +// expensive because it means propagating around very large sets. We rely on +// other analysis being able to determine which nodes can never be pointed to in +// order to disambiguate further than "points-to anything". +#define FULL_UNIVERSAL 0 + +using namespace llvm; +STATISTIC(NumIters , "Number of iterations to reach convergence"); +STATISTIC(NumConstraints, "Number of constraints"); +STATISTIC(NumNodes , "Number of nodes"); +STATISTIC(NumUnified , "Number of variables unified"); +STATISTIC(NumErased , "Number of redundant constraints erased"); + +static const unsigned SelfRep = (unsigned)-1; +static const unsigned Unvisited = (unsigned)-1; +// Position of the function return node relative to the function node. +static const unsigned CallReturnPos = 1; +// Position of the function call node relative to the function node. +static const unsigned CallFirstArgPos = 2; + +namespace { + struct BitmapKeyInfo { + static inline SparseBitVector<> *getEmptyKey() { + return reinterpret_cast *>(-1); + } + static inline SparseBitVector<> *getTombstoneKey() { + return reinterpret_cast *>(-2); + } + static unsigned getHashValue(const SparseBitVector<> *bitmap) { + return bitmap->getHashValue(); + } + static bool isEqual(const SparseBitVector<> *LHS, + const SparseBitVector<> *RHS) { + if (LHS == RHS) + return true; + else if (LHS == getEmptyKey() || RHS == getEmptyKey() + || LHS == getTombstoneKey() || RHS == getTombstoneKey()) + return false; + + return *LHS == *RHS; + } + + static bool isPod() { return true; } + }; + + class VISIBILITY_HIDDEN Andersens : public ModulePass, public AliasAnalysis, + private InstVisitor { + struct Node; + + /// Constraint - Objects of this structure are used to represent the various + /// constraints identified by the algorithm. The constraints are 'copy', + /// for statements like "A = B", 'load' for statements like "A = *B", + /// 'store' for statements like "*A = B", and AddressOf for statements like + /// A = alloca; The Offset is applied as *(A + K) = B for stores, + /// A = *(B + K) for loads, and A = B + K for copies. It is + /// illegal on addressof constraints (because it is statically + /// resolvable to A = &C where C = B + K) + + struct Constraint { + enum ConstraintType { Copy, Load, Store, AddressOf } Type; + unsigned Dest; + unsigned Src; + unsigned Offset; + + Constraint(ConstraintType Ty, unsigned D, unsigned S, unsigned O = 0) + : Type(Ty), Dest(D), Src(S), Offset(O) { + assert((Offset == 0 || Ty != AddressOf) && + "Offset is illegal on addressof constraints"); + } + + bool operator==(const Constraint &RHS) const { + return RHS.Type == Type + && RHS.Dest == Dest + && RHS.Src == Src + && RHS.Offset == Offset; + } + + bool operator!=(const Constraint &RHS) const { + return !(*this == RHS); + } + + bool operator<(const Constraint &RHS) const { + if (RHS.Type != Type) + return RHS.Type < Type; + else if (RHS.Dest != Dest) + return RHS.Dest < Dest; + else if (RHS.Src != Src) + return RHS.Src < Src; + return RHS.Offset < Offset; + } + }; + + // Information DenseSet requires implemented in order to be able to do + // it's thing + struct PairKeyInfo { + static inline std::pair getEmptyKey() { + return std::make_pair(~0U, ~0U); + } + static inline std::pair getTombstoneKey() { + return std::make_pair(~0U - 1, ~0U - 1); + } + static unsigned getHashValue(const std::pair &P) { + return P.first ^ P.second; + } + static unsigned isEqual(const std::pair &LHS, + const std::pair &RHS) { + return LHS == RHS; + } + }; + + struct ConstraintKeyInfo { + static inline Constraint getEmptyKey() { + return Constraint(Constraint::Copy, ~0U, ~0U, ~0U); + } + static inline Constraint getTombstoneKey() { + return Constraint(Constraint::Copy, ~0U - 1, ~0U - 1, ~0U - 1); + } + static unsigned getHashValue(const Constraint &C) { + return C.Src ^ C.Dest ^ C.Type ^ C.Offset; + } + static bool isEqual(const Constraint &LHS, + const Constraint &RHS) { + return LHS.Type == RHS.Type && LHS.Dest == RHS.Dest + && LHS.Src == RHS.Src && LHS.Offset == RHS.Offset; + } + }; + + // Node class - This class is used to represent a node in the constraint + // graph. Due to various optimizations, it is not always the case that + // there is a mapping from a Node to a Value. In particular, we add + // artificial Node's that represent the set of pointed-to variables shared + // for each location equivalent Node. + struct Node { + private: + static unsigned Counter; + + public: + Value *Val; + SparseBitVector<> *Edges; + SparseBitVector<> *PointsTo; + SparseBitVector<> *OldPointsTo; + std::list Constraints; + + // Pointer and location equivalence labels + unsigned PointerEquivLabel; + unsigned LocationEquivLabel; + // Predecessor edges, both real and implicit + SparseBitVector<> *PredEdges; + SparseBitVector<> *ImplicitPredEdges; + // Set of nodes that point to us, only use for location equivalence. + SparseBitVector<> *PointedToBy; + // Number of incoming edges, used during variable substitution to early + // free the points-to sets + unsigned NumInEdges; + // True if our points-to set is in the Set2PEClass map + bool StoredInHash; + // True if our node has no indirect constraints (complex or otherwise) + bool Direct; + // True if the node is address taken, *or* it is part of a group of nodes + // that must be kept together. This is set to true for functions and + // their arg nodes, which must be kept at the same position relative to + // their base function node. + bool AddressTaken; + + // Nodes in cycles (or in equivalence classes) are united together using a + // standard union-find representation with path compression. NodeRep + // gives the index into GraphNodes for the representative Node. + unsigned NodeRep; + + // Modification timestamp. Assigned from Counter. + // Used for work list prioritization. + unsigned Timestamp; + + explicit Node(bool direct = true) : + Val(0), Edges(0), PointsTo(0), OldPointsTo(0), + PointerEquivLabel(0), LocationEquivLabel(0), PredEdges(0), + ImplicitPredEdges(0), PointedToBy(0), NumInEdges(0), + StoredInHash(false), Direct(direct), AddressTaken(false), + NodeRep(SelfRep), Timestamp(0) { } + + Node *setValue(Value *V) { + assert(Val == 0 && "Value already set for this node!"); + Val = V; + return this; + } + + /// getValue - Return the LLVM value corresponding to this node. + /// + Value *getValue() const { return Val; } + + /// addPointerTo - Add a pointer to the list of pointees of this node, + /// returning true if this caused a new pointer to be added, or false if + /// we already knew about the points-to relation. + bool addPointerTo(unsigned Node) { + return PointsTo->test_and_set(Node); + } + + /// intersects - Return true if the points-to set of this node intersects + /// with the points-to set of the specified node. + bool intersects(Node *N) const; + + /// intersectsIgnoring - Return true if the points-to set of this node + /// intersects with the points-to set of the specified node on any nodes + /// except for the specified node to ignore. + bool intersectsIgnoring(Node *N, unsigned) const; + + // Timestamp a node (used for work list prioritization) + void Stamp() { + Timestamp = Counter++; + } + + bool isRep() const { + return( (int) NodeRep < 0 ); + } + }; + + struct WorkListElement { + Node* node; + unsigned Timestamp; + WorkListElement(Node* n, unsigned t) : node(n), Timestamp(t) {} + + // Note that we reverse the sense of the comparison because we + // actually want to give low timestamps the priority over high, + // whereas priority is typically interpreted as a greater value is + // given high priority. + bool operator<(const WorkListElement& that) const { + return( this->Timestamp > that.Timestamp ); + } + }; + + // Priority-queue based work list specialized for Nodes. + class WorkList { + std::priority_queue Q; + + public: + void insert(Node* n) { + Q.push( WorkListElement(n, n->Timestamp) ); + } + + // We automatically discard non-representative nodes and nodes + // that were in the work list twice (we keep a copy of the + // timestamp in the work list so we can detect this situation by + // comparing against the node's current timestamp). + Node* pop() { + while( !Q.empty() ) { + WorkListElement x = Q.top(); Q.pop(); + Node* INode = x.node; + + if( INode->isRep() && + INode->Timestamp == x.Timestamp ) { + return(x.node); + } + } + return(0); + } + + bool empty() { + return Q.empty(); + } + }; + + /// GraphNodes - This vector is populated as part of the object + /// identification stage of the analysis, which populates this vector with a + /// node for each memory object and fills in the ValueNodes map. + std::vector GraphNodes; + + /// ValueNodes - This map indicates the Node that a particular Value* is + /// represented by. This contains entries for all pointers. + DenseMap ValueNodes; + + /// ObjectNodes - This map contains entries for each memory object in the + /// program: globals, alloca's and mallocs. + DenseMap ObjectNodes; + + /// ReturnNodes - This map contains an entry for each function in the + /// program that returns a value. + DenseMap ReturnNodes; + + /// VarargNodes - This map contains the entry used to represent all pointers + /// passed through the varargs portion of a function call for a particular + /// function. An entry is not present in this map for functions that do not + /// take variable arguments. + DenseMap VarargNodes; + + + /// Constraints - This vector contains a list of all of the constraints + /// identified by the program. + std::vector Constraints; + + // Map from graph node to maximum K value that is allowed (for functions, + // this is equivalent to the number of arguments + CallFirstArgPos) + std::map MaxK; + + /// This enum defines the GraphNodes indices that correspond to important + /// fixed sets. + enum { + UniversalSet = 0, + NullPtr = 1, + NullObject = 2, + NumberSpecialNodes + }; + // Stack for Tarjan's + std::stack SCCStack; + // Map from Graph Node to DFS number + std::vector Node2DFS; + // Map from Graph Node to Deleted from graph. + std::vector Node2Deleted; + // Same as Node Maps, but implemented as std::map because it is faster to + // clear + std::map Tarjan2DFS; + std::map Tarjan2Deleted; + // Current DFS number + unsigned DFSNumber; + + // Work lists. + WorkList w1, w2; + WorkList *CurrWL, *NextWL; // "current" and "next" work lists + + // Offline variable substitution related things + + // Temporary rep storage, used because we can't collapse SCC's in the + // predecessor graph by uniting the variables permanently, we can only do so + // for the successor graph. + std::vector VSSCCRep; + // Mapping from node to whether we have visited it during SCC finding yet. + std::vector Node2Visited; + // During variable substitution, we create unknowns to represent the unknown + // value that is a dereference of a variable. These nodes are known as + // "ref" nodes (since they represent the value of dereferences). + unsigned FirstRefNode; + // During HVN, we create represent address taken nodes as if they were + // unknown (since HVN, unlike HU, does not evaluate unions). + unsigned FirstAdrNode; + // Current pointer equivalence class number + unsigned PEClass; + // Mapping from points-to sets to equivalence classes + typedef DenseMap *, unsigned, BitmapKeyInfo> BitVectorMap; + BitVectorMap Set2PEClass; + // Mapping from pointer equivalences to the representative node. -1 if we + // have no representative node for this pointer equivalence class yet. + std::vector PEClass2Node; + // Mapping from pointer equivalences to representative node. This includes + // pointer equivalent but not location equivalent variables. -1 if we have + // no representative node for this pointer equivalence class yet. + std::vector PENLEClass2Node; + // Union/Find for HCD + std::vector HCDSCCRep; + // HCD's offline-detected cycles; "Statically DeTected" + // -1 if not part of such a cycle, otherwise a representative node. + std::vector SDT; + // Whether to use SDT (UniteNodes can use it during solving, but not before) + bool SDTActive; + + public: + static char ID; + Andersens() : ModulePass(&ID) {} + + bool runOnModule(Module &M) { + InitializeAliasAnalysis(this); + IdentifyObjects(M); + CollectConstraints(M); +#undef DEBUG_TYPE +#define DEBUG_TYPE "anders-aa-constraints" + DEBUG(PrintConstraints()); +#undef DEBUG_TYPE +#define DEBUG_TYPE "anders-aa" + SolveConstraints(); + DEBUG(PrintPointsToGraph()); + + // Free the constraints list, as we don't need it to respond to alias + // requests. + std::vector().swap(Constraints); + //These are needed for Print() (-analyze in opt) + //ObjectNodes.clear(); + //ReturnNodes.clear(); + //VarargNodes.clear(); + return false; + } + + void releaseMemory() { + // FIXME: Until we have transitively required passes working correctly, + // this cannot be enabled! Otherwise, using -count-aa with the pass + // causes memory to be freed too early. :( +#if 0 + // The memory objects and ValueNodes data structures at the only ones that + // are still live after construction. + std::vector().swap(GraphNodes); + ValueNodes.clear(); +#endif + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AliasAnalysis::getAnalysisUsage(AU); + AU.setPreservesAll(); // Does not transform code + } + + //------------------------------------------------ + // Implement the AliasAnalysis API + // + AliasResult alias(const Value *V1, unsigned V1Size, + const Value *V2, unsigned V2Size); + virtual ModRefResult getModRefInfo(CallSite CS, Value *P, unsigned Size); + virtual ModRefResult getModRefInfo(CallSite CS1, CallSite CS2); + void getMustAliases(Value *P, std::vector &RetVals); + bool pointsToConstantMemory(const Value *P); + + virtual void deleteValue(Value *V) { + ValueNodes.erase(V); + getAnalysis().deleteValue(V); + } + + virtual void copyValue(Value *From, Value *To) { + ValueNodes[To] = ValueNodes[From]; + getAnalysis().copyValue(From, To); + } + + private: + /// getNode - Return the node corresponding to the specified pointer scalar. + /// + unsigned getNode(Value *V) { + if (Constant *C = dyn_cast(V)) + if (!isa(C)) + return getNodeForConstantPointer(C); + + DenseMap::iterator I = ValueNodes.find(V); + if (I == ValueNodes.end()) { +#ifndef NDEBUG + V->dump(); +#endif + assert(0 && "Value does not have a node in the points-to graph!"); + } + return I->second; + } + + /// getObject - Return the node corresponding to the memory object for the + /// specified global or allocation instruction. + unsigned getObject(Value *V) const { + DenseMap::iterator I = ObjectNodes.find(V); + assert(I != ObjectNodes.end() && + "Value does not have an object in the points-to graph!"); + return I->second; + } + + /// getReturnNode - Return the node representing the return value for the + /// specified function. + unsigned getReturnNode(Function *F) const { + DenseMap::iterator I = ReturnNodes.find(F); + assert(I != ReturnNodes.end() && "Function does not return a value!"); + return I->second; + } + + /// getVarargNode - Return the node representing the variable arguments + /// formal for the specified function. + unsigned getVarargNode(Function *F) const { + DenseMap::iterator I = VarargNodes.find(F); + assert(I != VarargNodes.end() && "Function does not take var args!"); + return I->second; + } + + /// getNodeValue - Get the node for the specified LLVM value and set the + /// value for it to be the specified value. + unsigned getNodeValue(Value &V) { + unsigned Index = getNode(&V); + GraphNodes[Index].setValue(&V); + return Index; + } + + unsigned UniteNodes(unsigned First, unsigned Second, + bool UnionByRank = true); + unsigned FindNode(unsigned Node); + unsigned FindNode(unsigned Node) const; + + void IdentifyObjects(Module &M); + void CollectConstraints(Module &M); + bool AnalyzeUsesOfFunction(Value *); + void CreateConstraintGraph(); + void OptimizeConstraints(); + unsigned FindEquivalentNode(unsigned, unsigned); + void ClumpAddressTaken(); + void RewriteConstraints(); + void HU(); + void HVN(); + void HCD(); + void Search(unsigned Node); + void UnitePointerEquivalences(); + void SolveConstraints(); + bool QueryNode(unsigned Node); + void Condense(unsigned Node); + void HUValNum(unsigned Node); + void HVNValNum(unsigned Node); + unsigned getNodeForConstantPointer(Constant *C); + unsigned getNodeForConstantPointerTarget(Constant *C); + void AddGlobalInitializerConstraints(unsigned, Constant *C); + + void AddConstraintsForNonInternalLinkage(Function *F); + void AddConstraintsForCall(CallSite CS, Function *F); + bool AddConstraintsForExternalCall(CallSite CS, Function *F); + + + void PrintNode(const Node *N) const; + void PrintConstraints() const ; + void PrintConstraint(const Constraint &) const; + void PrintLabels() const; + void PrintPointsToGraph() const; + + //===------------------------------------------------------------------===// + // Instruction visitation methods for adding constraints + // + friend class InstVisitor; + void visitReturnInst(ReturnInst &RI); + void visitInvokeInst(InvokeInst &II) { visitCallSite(CallSite(&II)); } + void visitCallInst(CallInst &CI) { visitCallSite(CallSite(&CI)); } + void visitCallSite(CallSite CS); + void visitAllocationInst(AllocationInst &AI); + void visitLoadInst(LoadInst &LI); + void visitStoreInst(StoreInst &SI); + void visitGetElementPtrInst(GetElementPtrInst &GEP); + void visitPHINode(PHINode &PN); + void visitCastInst(CastInst &CI); + void visitICmpInst(ICmpInst &ICI) {} // NOOP! + void visitFCmpInst(FCmpInst &ICI) {} // NOOP! + void visitSelectInst(SelectInst &SI); + void visitVAArg(VAArgInst &I); + void visitInstruction(Instruction &I); + + //===------------------------------------------------------------------===// + // Implement Analyize interface + // + void print(std::ostream &O, const Module* M) const { + PrintPointsToGraph(); + } + }; +} + +char Andersens::ID = 0; +static RegisterPass +X("anders-aa", "Andersen's Interprocedural Alias Analysis", false, true); +static RegisterAnalysisGroup Y(X); + +// Initialize Timestamp Counter (static). +unsigned Andersens::Node::Counter = 0; + +ModulePass *llvm::createAndersensPass() { return new Andersens(); } + +//===----------------------------------------------------------------------===// +// AliasAnalysis Interface Implementation +//===----------------------------------------------------------------------===// + +AliasAnalysis::AliasResult Andersens::alias(const Value *V1, unsigned V1Size, + const Value *V2, unsigned V2Size) { + Node *N1 = &GraphNodes[FindNode(getNode(const_cast(V1)))]; + Node *N2 = &GraphNodes[FindNode(getNode(const_cast(V2)))]; + + // Check to see if the two pointers are known to not alias. They don't alias + // if their points-to sets do not intersect. + if (!N1->intersectsIgnoring(N2, NullObject)) + return NoAlias; + + return AliasAnalysis::alias(V1, V1Size, V2, V2Size); +} + +AliasAnalysis::ModRefResult +Andersens::getModRefInfo(CallSite CS, Value *P, unsigned Size) { + // The only thing useful that we can contribute for mod/ref information is + // when calling external function calls: if we know that memory never escapes + // from the program, it cannot be modified by an external call. + // + // NOTE: This is not really safe, at least not when the entire program is not + // available. The deal is that the external function could call back into the + // program and modify stuff. We ignore this technical niggle for now. This + // is, after all, a "research quality" implementation of Andersen's analysis. + if (Function *F = CS.getCalledFunction()) + if (F->isDeclaration()) { + Node *N1 = &GraphNodes[FindNode(getNode(P))]; + + if (N1->PointsTo->empty()) + return NoModRef; +#if FULL_UNIVERSAL + if (!UniversalSet->PointsTo->test(FindNode(getNode(P)))) + return NoModRef; // Universal set does not contain P +#else + if (!N1->PointsTo->test(UniversalSet)) + return NoModRef; // P doesn't point to the universal set. +#endif + } + + return AliasAnalysis::getModRefInfo(CS, P, Size); +} + +AliasAnalysis::ModRefResult +Andersens::getModRefInfo(CallSite CS1, CallSite CS2) { + return AliasAnalysis::getModRefInfo(CS1,CS2); +} + +/// getMustAlias - We can provide must alias information if we know that a +/// pointer can only point to a specific function or the null pointer. +/// Unfortunately we cannot determine must-alias information for global +/// variables or any other memory memory objects because we do not track whether +/// a pointer points to the beginning of an object or a field of it. +void Andersens::getMustAliases(Value *P, std::vector &RetVals) { + Node *N = &GraphNodes[FindNode(getNode(P))]; + if (N->PointsTo->count() == 1) { + Node *Pointee = &GraphNodes[N->PointsTo->find_first()]; + // If a function is the only object in the points-to set, then it must be + // the destination. Note that we can't handle global variables here, + // because we don't know if the pointer is actually pointing to a field of + // the global or to the beginning of it. + if (Value *V = Pointee->getValue()) { + if (Function *F = dyn_cast(V)) + RetVals.push_back(F); + } else { + // If the object in the points-to set is the null object, then the null + // pointer is a must alias. + if (Pointee == &GraphNodes[NullObject]) + RetVals.push_back(Constant::getNullValue(P->getType())); + } + } + AliasAnalysis::getMustAliases(P, RetVals); +} + +/// pointsToConstantMemory - If we can determine that this pointer only points +/// to constant memory, return true. In practice, this means that if the +/// pointer can only point to constant globals, functions, or the null pointer, +/// return true. +/// +bool Andersens::pointsToConstantMemory(const Value *P) { + Node *N = &GraphNodes[FindNode(getNode(const_cast(P)))]; + unsigned i; + + for (SparseBitVector<>::iterator bi = N->PointsTo->begin(); + bi != N->PointsTo->end(); + ++bi) { + i = *bi; + Node *Pointee = &GraphNodes[i]; + if (Value *V = Pointee->getValue()) { + if (!isa(V) || (isa(V) && + !cast(V)->isConstant())) + return AliasAnalysis::pointsToConstantMemory(P); + } else { + if (i != NullObject) + return AliasAnalysis::pointsToConstantMemory(P); + } + } + + return true; +} + +//===----------------------------------------------------------------------===// +// Object Identification Phase +//===----------------------------------------------------------------------===// + +/// IdentifyObjects - This stage scans the program, adding an entry to the +/// GraphNodes list for each memory object in the program (global stack or +/// heap), and populates the ValueNodes and ObjectNodes maps for these objects. +/// +void Andersens::IdentifyObjects(Module &M) { + unsigned NumObjects = 0; + + // Object #0 is always the universal set: the object that we don't know + // anything about. + assert(NumObjects == UniversalSet && "Something changed!"); + ++NumObjects; + + // Object #1 always represents the null pointer. + assert(NumObjects == NullPtr && "Something changed!"); + ++NumObjects; + + // Object #2 always represents the null object (the object pointed to by null) + assert(NumObjects == NullObject && "Something changed!"); + ++NumObjects; + + // Add all the globals first. + for (Module::global_iterator I = M.global_begin(), E = M.global_end(); + I != E; ++I) { + ObjectNodes[I] = NumObjects++; + ValueNodes[I] = NumObjects++; + } + + // Add nodes for all of the functions and the instructions inside of them. + for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { + // The function itself is a memory object. + unsigned First = NumObjects; + ValueNodes[F] = NumObjects++; + if (isa(F->getFunctionType()->getReturnType())) + ReturnNodes[F] = NumObjects++; + if (F->getFunctionType()->isVarArg()) + VarargNodes[F] = NumObjects++; + + + // Add nodes for all of the incoming pointer arguments. + for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); + I != E; ++I) + { + if (isa(I->getType())) + ValueNodes[I] = NumObjects++; + } + MaxK[First] = NumObjects - First; + + // Scan the function body, creating a memory object for each heap/stack + // allocation in the body of the function and a node to represent all + // pointer values defined by instructions and used as operands. + for (inst_iterator II = inst_begin(F), E = inst_end(F); II != E; ++II) { + // If this is an heap or stack allocation, create a node for the memory + // object. + if (isa(II->getType())) { + ValueNodes[&*II] = NumObjects++; + if (AllocationInst *AI = dyn_cast(&*II)) + ObjectNodes[AI] = NumObjects++; + } + + // Calls to inline asm need to be added as well because the callee isn't + // referenced anywhere else. + if (CallInst *CI = dyn_cast(&*II)) { + Value *Callee = CI->getCalledValue(); + if (isa(Callee)) + ValueNodes[Callee] = NumObjects++; + } + } + } + + // Now that we know how many objects to create, make them all now! + GraphNodes.resize(NumObjects); + NumNodes += NumObjects; +} + +//===----------------------------------------------------------------------===// +// Constraint Identification Phase +//===----------------------------------------------------------------------===// + +/// getNodeForConstantPointer - Return the node corresponding to the constant +/// pointer itself. +unsigned Andersens::getNodeForConstantPointer(Constant *C) { + assert(isa(C->getType()) && "Not a constant pointer!"); + + if (isa(C) || isa(C)) + return NullPtr; + else if (GlobalValue *GV = dyn_cast(C)) + return getNode(GV); + else if (ConstantExpr *CE = dyn_cast(C)) { + switch (CE->getOpcode()) { + case Instruction::GetElementPtr: + return getNodeForConstantPointer(CE->getOperand(0)); + case Instruction::IntToPtr: + return UniversalSet; + case Instruction::BitCast: + return getNodeForConstantPointer(CE->getOperand(0)); + default: + cerr << "Constant Expr not yet handled: " << *CE << "\n"; + assert(0); + } + } else { + assert(0 && "Unknown constant pointer!"); + } + return 0; +} + +/// getNodeForConstantPointerTarget - Return the node POINTED TO by the +/// specified constant pointer. +unsigned Andersens::getNodeForConstantPointerTarget(Constant *C) { + assert(isa(C->getType()) && "Not a constant pointer!"); + + if (isa(C)) + return NullObject; + else if (GlobalValue *GV = dyn_cast(C)) + return getObject(GV); + else if (ConstantExpr *CE = dyn_cast(C)) { + switch (CE->getOpcode()) { + case Instruction::GetElementPtr: + return getNodeForConstantPointerTarget(CE->getOperand(0)); + case Instruction::IntToPtr: + return UniversalSet; + case Instruction::BitCast: + return getNodeForConstantPointerTarget(CE->getOperand(0)); + default: + cerr << "Constant Expr not yet handled: " << *CE << "\n"; + assert(0); + } + } else { + assert(0 && "Unknown constant pointer!"); + } + return 0; +} + +/// AddGlobalInitializerConstraints - Add inclusion constraints for the memory +/// object N, which contains values indicated by C. +void Andersens::AddGlobalInitializerConstraints(unsigned NodeIndex, + Constant *C) { + if (C->getType()->isSingleValueType()) { + if (isa(C->getType())) + Constraints.push_back(Constraint(Constraint::Copy, NodeIndex, + getNodeForConstantPointer(C))); + } else if (C->isNullValue()) { + Constraints.push_back(Constraint(Constraint::Copy, NodeIndex, + NullObject)); + return; + } else if (!isa(C)) { + // If this is an array or struct, include constraints for each element. + assert(isa(C) || isa(C)); + for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) + AddGlobalInitializerConstraints(NodeIndex, + cast(C->getOperand(i))); + } +} + +/// AddConstraintsForNonInternalLinkage - If this function does not have +/// internal linkage, realize that we can't trust anything passed into or +/// returned by this function. +void Andersens::AddConstraintsForNonInternalLinkage(Function *F) { + for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I) + if (isa(I->getType())) + // If this is an argument of an externally accessible function, the + // incoming pointer might point to anything. + Constraints.push_back(Constraint(Constraint::Copy, getNode(I), + UniversalSet)); +} + +/// AddConstraintsForCall - If this is a call to a "known" function, add the +/// constraints and return true. If this is a call to an unknown function, +/// return false. +bool Andersens::AddConstraintsForExternalCall(CallSite CS, Function *F) { + assert(F->isDeclaration() && "Not an external function!"); + + // These functions don't induce any points-to constraints. + if (F->getName() == "atoi" || F->getName() == "atof" || + F->getName() == "atol" || F->getName() == "atoll" || + F->getName() == "remove" || F->getName() == "unlink" || + F->getName() == "rename" || F->getName() == "memcmp" || + F->getName() == "llvm.memset" || + F->getName() == "strcmp" || F->getName() == "strncmp" || + F->getName() == "execl" || F->getName() == "execlp" || + F->getName() == "execle" || F->getName() == "execv" || + F->getName() == "execvp" || F->getName() == "chmod" || + F->getName() == "puts" || F->getName() == "write" || + F->getName() == "open" || F->getName() == "create" || + F->getName() == "truncate" || F->getName() == "chdir" || + F->getName() == "mkdir" || F->getName() == "rmdir" || + F->getName() == "read" || F->getName() == "pipe" || + F->getName() == "wait" || F->getName() == "time" || + F->getName() == "stat" || F->getName() == "fstat" || + F->getName() == "lstat" || F->getName() == "strtod" || + F->getName() == "strtof" || F->getName() == "strtold" || + F->getName() == "fopen" || F->getName() == "fdopen" || + F->getName() == "freopen" || + F->getName() == "fflush" || F->getName() == "feof" || + F->getName() == "fileno" || F->getName() == "clearerr" || + F->getName() == "rewind" || F->getName() == "ftell" || + F->getName() == "ferror" || F->getName() == "fgetc" || + F->getName() == "fgetc" || F->getName() == "_IO_getc" || + F->getName() == "fwrite" || F->getName() == "fread" || + F->getName() == "fgets" || F->getName() == "ungetc" || + F->getName() == "fputc" || + F->getName() == "fputs" || F->getName() == "putc" || + F->getName() == "ftell" || F->getName() == "rewind" || + F->getName() == "_IO_putc" || F->getName() == "fseek" || + F->getName() == "fgetpos" || F->getName() == "fsetpos" || + F->getName() == "printf" || F->getName() == "fprintf" || + F->getName() == "sprintf" || F->getName() == "vprintf" || + F->getName() == "vfprintf" || F->getName() == "vsprintf" || + F->getName() == "scanf" || F->getName() == "fscanf" || + F->getName() == "sscanf" || F->getName() == "__assert_fail" || + F->getName() == "modf") + return true; + + + // These functions do induce points-to edges. + if (F->getName() == "llvm.memcpy" || + F->getName() == "llvm.memmove" || + F->getName() == "memmove") { + + const FunctionType *FTy = F->getFunctionType(); + if (FTy->getNumParams() > 1 && + isa(FTy->getParamType(0)) && + isa(FTy->getParamType(1))) { + + // *Dest = *Src, which requires an artificial graph node to represent the + // constraint. It is broken up into *Dest = temp, temp = *Src + unsigned FirstArg = getNode(CS.getArgument(0)); + unsigned SecondArg = getNode(CS.getArgument(1)); + unsigned TempArg = GraphNodes.size(); + GraphNodes.push_back(Node()); + Constraints.push_back(Constraint(Constraint::Store, + FirstArg, TempArg)); + Constraints.push_back(Constraint(Constraint::Load, + TempArg, SecondArg)); + // In addition, Dest = Src + Constraints.push_back(Constraint(Constraint::Copy, + FirstArg, SecondArg)); + return true; + } + } + + // Result = Arg0 + if (F->getName() == "realloc" || F->getName() == "strchr" || + F->getName() == "strrchr" || F->getName() == "strstr" || + F->getName() == "strtok") { + const FunctionType *FTy = F->getFunctionType(); + if (FTy->getNumParams() > 0 && + isa(FTy->getParamType(0))) { + Constraints.push_back(Constraint(Constraint::Copy, + getNode(CS.getInstruction()), + getNode(CS.getArgument(0)))); + return true; + } + } + + return false; +} + + + +/// AnalyzeUsesOfFunction - Look at all of the users of the specified function. +/// If this is used by anything complex (i.e., the address escapes), return +/// true. +bool Andersens::AnalyzeUsesOfFunction(Value *V) { + + if (!isa(V->getType())) return true; + + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI) + if (dyn_cast(*UI)) { + return false; + } else if (StoreInst *SI = dyn_cast(*UI)) { + if (V == SI->getOperand(1)) { + return false; + } else if (SI->getOperand(1)) { + return true; // Storing the pointer + } + } else if (GetElementPtrInst *GEP = dyn_cast(*UI)) { + if (AnalyzeUsesOfFunction(GEP)) return true; + } else if (CallInst *CI = dyn_cast(*UI)) { + // Make sure that this is just the function being called, not that it is + // passing into the function. + for (unsigned i = 1, e = CI->getNumOperands(); i != e; ++i) + if (CI->getOperand(i) == V) return true; + } else if (InvokeInst *II = dyn_cast(*UI)) { + // Make sure that this is just the function being called, not that it is + // passing into the function. + for (unsigned i = 3, e = II->getNumOperands(); i != e; ++i) + if (II->getOperand(i) == V) return true; + } else if (ConstantExpr *CE = dyn_cast(*UI)) { + if (CE->getOpcode() == Instruction::GetElementPtr || + CE->getOpcode() == Instruction::BitCast) { + if (AnalyzeUsesOfFunction(CE)) + return true; + } else { + return true; + } + } else if (ICmpInst *ICI = dyn_cast(*UI)) { + if (!isa(ICI->getOperand(1))) + return true; // Allow comparison against null. + } else if (dyn_cast(*UI)) { + return false; + } else { + return true; + } + return false; +} + +/// CollectConstraints - This stage scans the program, adding a constraint to +/// the Constraints list for each instruction in the program that induces a +/// constraint, and setting up the initial points-to graph. +/// +void Andersens::CollectConstraints(Module &M) { + // First, the universal set points to itself. + Constraints.push_back(Constraint(Constraint::AddressOf, UniversalSet, + UniversalSet)); + Constraints.push_back(Constraint(Constraint::Store, UniversalSet, + UniversalSet)); + + // Next, the null pointer points to the null object. + Constraints.push_back(Constraint(Constraint::AddressOf, NullPtr, NullObject)); + + // Next, add any constraints on global variables and their initializers. + for (Module::global_iterator I = M.global_begin(), E = M.global_end(); + I != E; ++I) { + // Associate the address of the global object as pointing to the memory for + // the global: &G = + unsigned ObjectIndex = getObject(I); + Node *Object = &GraphNodes[ObjectIndex]; + Object->setValue(I); + Constraints.push_back(Constraint(Constraint::AddressOf, getNodeValue(*I), + ObjectIndex)); + + if (I->hasInitializer()) { + AddGlobalInitializerConstraints(ObjectIndex, I->getInitializer()); + } else { + // If it doesn't have an initializer (i.e. it's defined in another + // translation unit), it points to the universal set. + Constraints.push_back(Constraint(Constraint::Copy, ObjectIndex, + UniversalSet)); + } + } + + for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { + // Set up the return value node. + if (isa(F->getFunctionType()->getReturnType())) + GraphNodes[getReturnNode(F)].setValue(F); + if (F->getFunctionType()->isVarArg()) + GraphNodes[getVarargNode(F)].setValue(F); + + // Set up incoming argument nodes. + for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); + I != E; ++I) + if (isa(I->getType())) + getNodeValue(*I); + + // At some point we should just add constraints for the escaping functions + // at solve time, but this slows down solving. For now, we simply mark + // address taken functions as escaping and treat them as external. + if (!F->hasLocalLinkage() || AnalyzeUsesOfFunction(F)) + AddConstraintsForNonInternalLinkage(F); + + if (!F->isDeclaration()) { + // Scan the function body, creating a memory object for each heap/stack + // allocation in the body of the function and a node to represent all + // pointer values defined by instructions and used as operands. + visit(F); + } else { + // External functions that return pointers return the universal set. + if (isa(F->getFunctionType()->getReturnType())) + Constraints.push_back(Constraint(Constraint::Copy, + getReturnNode(F), + UniversalSet)); + + // Any pointers that are passed into the function have the universal set + // stored into them. + for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); + I != E; ++I) + if (isa(I->getType())) { + // Pointers passed into external functions could have anything stored + // through them. + Constraints.push_back(Constraint(Constraint::Store, getNode(I), + UniversalSet)); + // Memory objects passed into external function calls can have the + // universal set point to them. +#if FULL_UNIVERSAL + Constraints.push_back(Constraint(Constraint::Copy, + UniversalSet, + getNode(I))); +#else + Constraints.push_back(Constraint(Constraint::Copy, + getNode(I), + UniversalSet)); +#endif + } + + // If this is an external varargs function, it can also store pointers + // into any pointers passed through the varargs section. + if (F->getFunctionType()->isVarArg()) + Constraints.push_back(Constraint(Constraint::Store, getVarargNode(F), + UniversalSet)); + } + } + NumConstraints += Constraints.size(); +} + + +void Andersens::visitInstruction(Instruction &I) { +#ifdef NDEBUG + return; // This function is just a big assert. +#endif + if (isa(I)) + return; + // Most instructions don't have any effect on pointer values. + switch (I.getOpcode()) { + case Instruction::Br: + case Instruction::Switch: + case Instruction::Unwind: + case Instruction::Unreachable: + case Instruction::Free: + case Instruction::ICmp: + case Instruction::FCmp: + return; + default: + // Is this something we aren't handling yet? + cerr << "Unknown instruction: " << I; + abort(); + } +} + +void Andersens::visitAllocationInst(AllocationInst &AI) { + unsigned ObjectIndex = getObject(&AI); + GraphNodes[ObjectIndex].setValue(&AI); + Constraints.push_back(Constraint(Constraint::AddressOf, getNodeValue(AI), + ObjectIndex)); +} + +void Andersens::visitReturnInst(ReturnInst &RI) { + if (RI.getNumOperands() && isa(RI.getOperand(0)->getType())) + // return V --> + Constraints.push_back(Constraint(Constraint::Copy, + getReturnNode(RI.getParent()->getParent()), + getNode(RI.getOperand(0)))); +} + +void Andersens::visitLoadInst(LoadInst &LI) { + if (isa(LI.getType())) + // P1 = load P2 --> + Constraints.push_back(Constraint(Constraint::Load, getNodeValue(LI), + getNode(LI.getOperand(0)))); +} + +void Andersens::visitStoreInst(StoreInst &SI) { + if (isa(SI.getOperand(0)->getType())) + // store P1, P2 --> + Constraints.push_back(Constraint(Constraint::Store, + getNode(SI.getOperand(1)), + getNode(SI.getOperand(0)))); +} + +void Andersens::visitGetElementPtrInst(GetElementPtrInst &GEP) { + // P1 = getelementptr P2, ... --> + Constraints.push_back(Constraint(Constraint::Copy, getNodeValue(GEP), + getNode(GEP.getOperand(0)))); +} + +void Andersens::visitPHINode(PHINode &PN) { + if (isa(PN.getType())) { + unsigned PNN = getNodeValue(PN); + for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) + // P1 = phi P2, P3 --> , , ... + Constraints.push_back(Constraint(Constraint::Copy, PNN, + getNode(PN.getIncomingValue(i)))); + } +} + +void Andersens::visitCastInst(CastInst &CI) { + Value *Op = CI.getOperand(0); + if (isa(CI.getType())) { + if (isa(Op->getType())) { + // P1 = cast P2 --> + Constraints.push_back(Constraint(Constraint::Copy, getNodeValue(CI), + getNode(CI.getOperand(0)))); + } else { + // P1 = cast int --> +#if 0 + Constraints.push_back(Constraint(Constraint::Copy, getNodeValue(CI), + UniversalSet)); +#else + getNodeValue(CI); +#endif + } + } else if (isa(Op->getType())) { + // int = cast P1 --> +#if 0 + Constraints.push_back(Constraint(Constraint::Copy, + UniversalSet, + getNode(CI.getOperand(0)))); +#else + getNode(CI.getOperand(0)); +#endif + } +} + +void Andersens::visitSelectInst(SelectInst &SI) { + if (isa(SI.getType())) { + unsigned SIN = getNodeValue(SI); + // P1 = select C, P2, P3 ---> , + Constraints.push_back(Constraint(Constraint::Copy, SIN, + getNode(SI.getOperand(1)))); + Constraints.push_back(Constraint(Constraint::Copy, SIN, + getNode(SI.getOperand(2)))); + } +} + +void Andersens::visitVAArg(VAArgInst &I) { + assert(0 && "vaarg not handled yet!"); +} + +/// AddConstraintsForCall - Add constraints for a call with actual arguments +/// specified by CS to the function specified by F. Note that the types of +/// arguments might not match up in the case where this is an indirect call and +/// the function pointer has been casted. If this is the case, do something +/// reasonable. +void Andersens::AddConstraintsForCall(CallSite CS, Function *F) { + Value *CallValue = CS.getCalledValue(); + bool IsDeref = F == NULL; + + // If this is a call to an external function, try to handle it directly to get + // some taste of context sensitivity. + if (F && F->isDeclaration() && AddConstraintsForExternalCall(CS, F)) + return; + + if (isa(CS.getType())) { + unsigned CSN = getNode(CS.getInstruction()); + if (!F || isa(F->getFunctionType()->getReturnType())) { + if (IsDeref) + Constraints.push_back(Constraint(Constraint::Load, CSN, + getNode(CallValue), CallReturnPos)); + else + Constraints.push_back(Constraint(Constraint::Copy, CSN, + getNode(CallValue) + CallReturnPos)); + } else { + // If the function returns a non-pointer value, handle this just like we + // treat a nonpointer cast to pointer. + Constraints.push_back(Constraint(Constraint::Copy, CSN, + UniversalSet)); + } + } else if (F && isa(F->getFunctionType()->getReturnType())) { +#if FULL_UNIVERSAL + Constraints.push_back(Constraint(Constraint::Copy, + UniversalSet, + getNode(CallValue) + CallReturnPos)); +#else + Constraints.push_back(Constraint(Constraint::Copy, + getNode(CallValue) + CallReturnPos, + UniversalSet)); +#endif + + + } + + CallSite::arg_iterator ArgI = CS.arg_begin(), ArgE = CS.arg_end(); + bool external = !F || F->isDeclaration(); + if (F) { + // Direct Call + Function::arg_iterator AI = F->arg_begin(), AE = F->arg_end(); + for (; AI != AE && ArgI != ArgE; ++AI, ++ArgI) + { +#if !FULL_UNIVERSAL + if (external && isa((*ArgI)->getType())) + { + // Add constraint that ArgI can now point to anything due to + // escaping, as can everything it points to. The second portion of + // this should be taken care of by universal = *universal + Constraints.push_back(Constraint(Constraint::Copy, + getNode(*ArgI), + UniversalSet)); + } +#endif + if (isa(AI->getType())) { + if (isa((*ArgI)->getType())) { + // Copy the actual argument into the formal argument. + Constraints.push_back(Constraint(Constraint::Copy, getNode(AI), + getNode(*ArgI))); + } else { + Constraints.push_back(Constraint(Constraint::Copy, getNode(AI), + UniversalSet)); + } + } else if (isa((*ArgI)->getType())) { +#if FULL_UNIVERSAL + Constraints.push_back(Constraint(Constraint::Copy, + UniversalSet, + getNode(*ArgI))); +#else + Constraints.push_back(Constraint(Constraint::Copy, + getNode(*ArgI), + UniversalSet)); +#endif + } + } + } else { + //Indirect Call + unsigned ArgPos = CallFirstArgPos; + for (; ArgI != ArgE; ++ArgI) { + if (isa((*ArgI)->getType())) { + // Copy the actual argument into the formal argument. + Constraints.push_back(Constraint(Constraint::Store, + getNode(CallValue), + getNode(*ArgI), ArgPos++)); + } else { + Constraints.push_back(Constraint(Constraint::Store, + getNode (CallValue), + UniversalSet, ArgPos++)); + } + } + } + // Copy all pointers passed through the varargs section to the varargs node. + if (F && F->getFunctionType()->isVarArg()) + for (; ArgI != ArgE; ++ArgI) + if (isa((*ArgI)->getType())) + Constraints.push_back(Constraint(Constraint::Copy, getVarargNode(F), + getNode(*ArgI))); + // If more arguments are passed in than we track, just drop them on the floor. +} + +void Andersens::visitCallSite(CallSite CS) { + if (isa(CS.getType())) + getNodeValue(*CS.getInstruction()); + + if (Function *F = CS.getCalledFunction()) { + AddConstraintsForCall(CS, F); + } else { + AddConstraintsForCall(CS, NULL); + } +} + +//===----------------------------------------------------------------------===// +// Constraint Solving Phase +//===----------------------------------------------------------------------===// + +/// intersects - Return true if the points-to set of this node intersects +/// with the points-to set of the specified node. +bool Andersens::Node::intersects(Node *N) const { + return PointsTo->intersects(N->PointsTo); +} + +/// intersectsIgnoring - Return true if the points-to set of this node +/// intersects with the points-to set of the specified node on any nodes +/// except for the specified node to ignore. +bool Andersens::Node::intersectsIgnoring(Node *N, unsigned Ignoring) const { + // TODO: If we are only going to call this with the same value for Ignoring, + // we should move the special values out of the points-to bitmap. + bool WeHadIt = PointsTo->test(Ignoring); + bool NHadIt = N->PointsTo->test(Ignoring); + bool Result = false; + if (WeHadIt) + PointsTo->reset(Ignoring); + if (NHadIt) + N->PointsTo->reset(Ignoring); + Result = PointsTo->intersects(N->PointsTo); + if (WeHadIt) + PointsTo->set(Ignoring); + if (NHadIt) + N->PointsTo->set(Ignoring); + return Result; +} + +void dumpToDOUT(SparseBitVector<> *bitmap) { +#ifndef NDEBUG + dump(*bitmap, DOUT); +#endif +} + + +/// Clump together address taken variables so that the points-to sets use up +/// less space and can be operated on faster. + +void Andersens::ClumpAddressTaken() { +#undef DEBUG_TYPE +#define DEBUG_TYPE "anders-aa-renumber" + std::vector Translate; + std::vector NewGraphNodes; + + Translate.resize(GraphNodes.size()); + unsigned NewPos = 0; + + for (unsigned i = 0; i < Constraints.size(); ++i) { + Constraint &C = Constraints[i]; + if (C.Type == Constraint::AddressOf) { + GraphNodes[C.Src].AddressTaken = true; + } + } + for (unsigned i = 0; i < NumberSpecialNodes; ++i) { + unsigned Pos = NewPos++; + Translate[i] = Pos; + NewGraphNodes.push_back(GraphNodes[i]); + DOUT << "Renumbering node " << i << " to node " << Pos << "\n"; + } + + // I believe this ends up being faster than making two vectors and splicing + // them. + for (unsigned i = NumberSpecialNodes; i < GraphNodes.size(); ++i) { + if (GraphNodes[i].AddressTaken) { + unsigned Pos = NewPos++; + Translate[i] = Pos; + NewGraphNodes.push_back(GraphNodes[i]); + DOUT << "Renumbering node " << i << " to node " << Pos << "\n"; + } + } + + for (unsigned i = NumberSpecialNodes; i < GraphNodes.size(); ++i) { + if (!GraphNodes[i].AddressTaken) { + unsigned Pos = NewPos++; + Translate[i] = Pos; + NewGraphNodes.push_back(GraphNodes[i]); + DOUT << "Renumbering node " << i << " to node " << Pos << "\n"; + } + } + + for (DenseMap::iterator Iter = ValueNodes.begin(); + Iter != ValueNodes.end(); + ++Iter) + Iter->second = Translate[Iter->second]; + + for (DenseMap::iterator Iter = ObjectNodes.begin(); + Iter != ObjectNodes.end(); + ++Iter) + Iter->second = Translate[Iter->second]; + + for (DenseMap::iterator Iter = ReturnNodes.begin(); + Iter != ReturnNodes.end(); + ++Iter) + Iter->second = Translate[Iter->second]; + + for (DenseMap::iterator Iter = VarargNodes.begin(); + Iter != VarargNodes.end(); + ++Iter) + Iter->second = Translate[Iter->second]; + + for (unsigned i = 0; i < Constraints.size(); ++i) { + Constraint &C = Constraints[i]; + C.Src = Translate[C.Src]; + C.Dest = Translate[C.Dest]; + } + + GraphNodes.swap(NewGraphNodes); +#undef DEBUG_TYPE +#define DEBUG_TYPE "anders-aa" +} + +/// The technique used here is described in "Exploiting Pointer and Location +/// Equivalence to Optimize Pointer Analysis. In the 14th International Static +/// Analysis Symposium (SAS), August 2007." It is known as the "HVN" algorithm, +/// and is equivalent to value numbering the collapsed constraint graph without +/// evaluating unions. This is used as a pre-pass to HU in order to resolve +/// first order pointer dereferences and speed up/reduce memory usage of HU. +/// Running both is equivalent to HRU without the iteration +/// HVN in more detail: +/// Imagine the set of constraints was simply straight line code with no loops +/// (we eliminate cycles, so there are no loops), such as: +/// E = &D +/// E = &C +/// E = F +/// F = G +/// G = F +/// Applying value numbering to this code tells us: +/// G == F == E +/// +/// For HVN, this is as far as it goes. We assign new value numbers to every +/// "address node", and every "reference node". +/// To get the optimal result for this, we use a DFS + SCC (since all nodes in a +/// cycle must have the same value number since the = operation is really +/// inclusion, not overwrite), and value number nodes we receive points-to sets +/// before we value our own node. +/// The advantage of HU over HVN is that HU considers the inclusion property, so +/// that if you have +/// E = &D +/// E = &C +/// E = F +/// F = G +/// F = &D +/// G = F +/// HU will determine that G == F == E. HVN will not, because it cannot prove +/// that the points to information ends up being the same because they all +/// receive &D from E anyway. + +void Andersens::HVN() { + DOUT << "Beginning HVN\n"; + // Build a predecessor graph. This is like our constraint graph with the + // edges going in the opposite direction, and there are edges for all the + // constraints, instead of just copy constraints. We also build implicit + // edges for constraints are implied but not explicit. I.E for the constraint + // a = &b, we add implicit edges *a = b. This helps us capture more cycles + for (unsigned i = 0, e = Constraints.size(); i != e; ++i) { + Constraint &C = Constraints[i]; + if (C.Type == Constraint::AddressOf) { + GraphNodes[C.Src].AddressTaken = true; + GraphNodes[C.Src].Direct = false; + + // Dest = &src edge + unsigned AdrNode = C.Src + FirstAdrNode; + if (!GraphNodes[C.Dest].PredEdges) + GraphNodes[C.Dest].PredEdges = new SparseBitVector<>; + GraphNodes[C.Dest].PredEdges->set(AdrNode); + + // *Dest = src edge + unsigned RefNode = C.Dest + FirstRefNode; + if (!GraphNodes[RefNode].ImplicitPredEdges) + GraphNodes[RefNode].ImplicitPredEdges = new SparseBitVector<>; + GraphNodes[RefNode].ImplicitPredEdges->set(C.Src); + } else if (C.Type == Constraint::Load) { + if (C.Offset == 0) { + // dest = *src edge + if (!GraphNodes[C.Dest].PredEdges) + GraphNodes[C.Dest].PredEdges = new SparseBitVector<>; + GraphNodes[C.Dest].PredEdges->set(C.Src + FirstRefNode); + } else { + GraphNodes[C.Dest].Direct = false; + } + } else if (C.Type == Constraint::Store) { + if (C.Offset == 0) { + // *dest = src edge + unsigned RefNode = C.Dest + FirstRefNode; + if (!GraphNodes[RefNode].PredEdges) + GraphNodes[RefNode].PredEdges = new SparseBitVector<>; + GraphNodes[RefNode].PredEdges->set(C.Src); + } + } else { + // Dest = Src edge and *Dest = *Src edge + if (!GraphNodes[C.Dest].PredEdges) + GraphNodes[C.Dest].PredEdges = new SparseBitVector<>; + GraphNodes[C.Dest].PredEdges->set(C.Src); + unsigned RefNode = C.Dest + FirstRefNode; + if (!GraphNodes[RefNode].ImplicitPredEdges) + GraphNodes[RefNode].ImplicitPredEdges = new SparseBitVector<>; + GraphNodes[RefNode].ImplicitPredEdges->set(C.Src + FirstRefNode); + } + } + PEClass = 1; + // Do SCC finding first to condense our predecessor graph + DFSNumber = 0; + Node2DFS.insert(Node2DFS.begin(), GraphNodes.size(), 0); + Node2Deleted.insert(Node2Deleted.begin(), GraphNodes.size(), false); + Node2Visited.insert(Node2Visited.begin(), GraphNodes.size(), false); + + for (unsigned i = 0; i < FirstRefNode; ++i) { + unsigned Node = VSSCCRep[i]; + if (!Node2Visited[Node]) + HVNValNum(Node); + } + for (BitVectorMap::iterator Iter = Set2PEClass.begin(); + Iter != Set2PEClass.end(); + ++Iter) + delete Iter->first; + Set2PEClass.clear(); + Node2DFS.clear(); + Node2Deleted.clear(); + Node2Visited.clear(); + DOUT << "Finished HVN\n"; + +} + +/// This is the workhorse of HVN value numbering. We combine SCC finding at the +/// same time because it's easy. +void Andersens::HVNValNum(unsigned NodeIndex) { + unsigned MyDFS = DFSNumber++; + Node *N = &GraphNodes[NodeIndex]; + Node2Visited[NodeIndex] = true; + Node2DFS[NodeIndex] = MyDFS; + + // First process all our explicit edges + if (N->PredEdges) + for (SparseBitVector<>::iterator Iter = N->PredEdges->begin(); + Iter != N->PredEdges->end(); + ++Iter) { + unsigned j = VSSCCRep[*Iter]; + if (!Node2Deleted[j]) { + if (!Node2Visited[j]) + HVNValNum(j); + if (Node2DFS[NodeIndex] > Node2DFS[j]) + Node2DFS[NodeIndex] = Node2DFS[j]; + } + } + + // Now process all the implicit edges + if (N->ImplicitPredEdges) + for (SparseBitVector<>::iterator Iter = N->ImplicitPredEdges->begin(); + Iter != N->ImplicitPredEdges->end(); + ++Iter) { + unsigned j = VSSCCRep[*Iter]; + if (!Node2Deleted[j]) { + if (!Node2Visited[j]) + HVNValNum(j); + if (Node2DFS[NodeIndex] > Node2DFS[j]) + Node2DFS[NodeIndex] = Node2DFS[j]; + } + } + + // See if we found any cycles + if (MyDFS == Node2DFS[NodeIndex]) { + while (!SCCStack.empty() && Node2DFS[SCCStack.top()] >= MyDFS) { + unsigned CycleNodeIndex = SCCStack.top(); + Node *CycleNode = &GraphNodes[CycleNodeIndex]; + VSSCCRep[CycleNodeIndex] = NodeIndex; + // Unify the nodes + N->Direct &= CycleNode->Direct; + + if (CycleNode->PredEdges) { + if (!N->PredEdges) + N->PredEdges = new SparseBitVector<>; + *(N->PredEdges) |= CycleNode->PredEdges; + delete CycleNode->PredEdges; + CycleNode->PredEdges = NULL; + } + if (CycleNode->ImplicitPredEdges) { + if (!N->ImplicitPredEdges) + N->ImplicitPredEdges = new SparseBitVector<>; + *(N->ImplicitPredEdges) |= CycleNode->ImplicitPredEdges; + delete CycleNode->ImplicitPredEdges; + CycleNode->ImplicitPredEdges = NULL; + } + + SCCStack.pop(); + } + + Node2Deleted[NodeIndex] = true; + + if (!N->Direct) { + GraphNodes[NodeIndex].PointerEquivLabel = PEClass++; + return; + } + + // Collect labels of successor nodes + bool AllSame = true; + unsigned First = ~0; + SparseBitVector<> *Labels = new SparseBitVector<>; + bool Used = false; + + if (N->PredEdges) + for (SparseBitVector<>::iterator Iter = N->PredEdges->begin(); + Iter != N->PredEdges->end(); + ++Iter) { + unsigned j = VSSCCRep[*Iter]; + unsigned Label = GraphNodes[j].PointerEquivLabel; + // Ignore labels that are equal to us or non-pointers + if (j == NodeIndex || Label == 0) + continue; + if (First == (unsigned)~0) + First = Label; + else if (First != Label) + AllSame = false; + Labels->set(Label); + } + + // We either have a non-pointer, a copy of an existing node, or a new node. + // Assign the appropriate pointer equivalence label. + if (Labels->empty()) { + GraphNodes[NodeIndex].PointerEquivLabel = 0; + } else if (AllSame) { + GraphNodes[NodeIndex].PointerEquivLabel = First; + } else { + GraphNodes[NodeIndex].PointerEquivLabel = Set2PEClass[Labels]; + if (GraphNodes[NodeIndex].PointerEquivLabel == 0) { + unsigned EquivClass = PEClass++; + Set2PEClass[Labels] = EquivClass; + GraphNodes[NodeIndex].PointerEquivLabel = EquivClass; + Used = true; + } + } + if (!Used) + delete Labels; + } else { + SCCStack.push(NodeIndex); + } +} + +/// The technique used here is described in "Exploiting Pointer and Location +/// Equivalence to Optimize Pointer Analysis. In the 14th International Static +/// Analysis Symposium (SAS), August 2007." It is known as the "HU" algorithm, +/// and is equivalent to value numbering the collapsed constraint graph +/// including evaluating unions. +void Andersens::HU() { + DOUT << "Beginning HU\n"; + // Build a predecessor graph. This is like our constraint graph with the + // edges going in the opposite direction, and there are edges for all the + // constraints, instead of just copy constraints. We also build implicit + // edges for constraints are implied but not explicit. I.E for the constraint + // a = &b, we add implicit edges *a = b. This helps us capture more cycles + for (unsigned i = 0, e = Constraints.size(); i != e; ++i) { + Constraint &C = Constraints[i]; + if (C.Type == Constraint::AddressOf) { + GraphNodes[C.Src].AddressTaken = true; + GraphNodes[C.Src].Direct = false; + + GraphNodes[C.Dest].PointsTo->set(C.Src); + // *Dest = src edge + unsigned RefNode = C.Dest + FirstRefNode; + if (!GraphNodes[RefNode].ImplicitPredEdges) + GraphNodes[RefNode].ImplicitPredEdges = new SparseBitVector<>; + GraphNodes[RefNode].ImplicitPredEdges->set(C.Src); + GraphNodes[C.Src].PointedToBy->set(C.Dest); + } else if (C.Type == Constraint::Load) { + if (C.Offset == 0) { + // dest = *src edge + if (!GraphNodes[C.Dest].PredEdges) + GraphNodes[C.Dest].PredEdges = new SparseBitVector<>; + GraphNodes[C.Dest].PredEdges->set(C.Src + FirstRefNode); + } else { + GraphNodes[C.Dest].Direct = false; + } + } else if (C.Type == Constraint::Store) { + if (C.Offset == 0) { + // *dest = src edge + unsigned RefNode = C.Dest + FirstRefNode; + if (!GraphNodes[RefNode].PredEdges) + GraphNodes[RefNode].PredEdges = new SparseBitVector<>; + GraphNodes[RefNode].PredEdges->set(C.Src); + } + } else { + // Dest = Src edge and *Dest = *Src edg + if (!GraphNodes[C.Dest].PredEdges) + GraphNodes[C.Dest].PredEdges = new SparseBitVector<>; + GraphNodes[C.Dest].PredEdges->set(C.Src); + unsigned RefNode = C.Dest + FirstRefNode; + if (!GraphNodes[RefNode].ImplicitPredEdges) + GraphNodes[RefNode].ImplicitPredEdges = new SparseBitVector<>; + GraphNodes[RefNode].ImplicitPredEdges->set(C.Src + FirstRefNode); + } + } + PEClass = 1; + // Do SCC finding first to condense our predecessor graph + DFSNumber = 0; + Node2DFS.insert(Node2DFS.begin(), GraphNodes.size(), 0); + Node2Deleted.insert(Node2Deleted.begin(), GraphNodes.size(), false); + Node2Visited.insert(Node2Visited.begin(), GraphNodes.size(), false); + + for (unsigned i = 0; i < FirstRefNode; ++i) { + if (FindNode(i) == i) { + unsigned Node = VSSCCRep[i]; + if (!Node2Visited[Node]) + Condense(Node); + } + } + + // Reset tables for actual labeling + Node2DFS.clear(); + Node2Visited.clear(); + Node2Deleted.clear(); + // Pre-grow our densemap so that we don't get really bad behavior + Set2PEClass.resize(GraphNodes.size()); + + // Visit the condensed graph and generate pointer equivalence labels. + Node2Visited.insert(Node2Visited.begin(), GraphNodes.size(), false); + for (unsigned i = 0; i < FirstRefNode; ++i) { + if (FindNode(i) == i) { + unsigned Node = VSSCCRep[i]; + if (!Node2Visited[Node]) + HUValNum(Node); + } + } + // PEClass nodes will be deleted by the deleting of N->PointsTo in our caller. + Set2PEClass.clear(); + DOUT << "Finished HU\n"; +} + + +/// Implementation of standard Tarjan SCC algorithm as modified by Nuutilla. +void Andersens::Condense(unsigned NodeIndex) { + unsigned MyDFS = DFSNumber++; + Node *N = &GraphNodes[NodeIndex]; + Node2Visited[NodeIndex] = true; + Node2DFS[NodeIndex] = MyDFS; + + // First process all our explicit edges + if (N->PredEdges) + for (SparseBitVector<>::iterator Iter = N->PredEdges->begin(); + Iter != N->PredEdges->end(); + ++Iter) { + unsigned j = VSSCCRep[*Iter]; + if (!Node2Deleted[j]) { + if (!Node2Visited[j]) + Condense(j); + if (Node2DFS[NodeIndex] > Node2DFS[j]) + Node2DFS[NodeIndex] = Node2DFS[j]; + } + } + + // Now process all the implicit edges + if (N->ImplicitPredEdges) + for (SparseBitVector<>::iterator Iter = N->ImplicitPredEdges->begin(); + Iter != N->ImplicitPredEdges->end(); + ++Iter) { + unsigned j = VSSCCRep[*Iter]; + if (!Node2Deleted[j]) { + if (!Node2Visited[j]) + Condense(j); + if (Node2DFS[NodeIndex] > Node2DFS[j]) + Node2DFS[NodeIndex] = Node2DFS[j]; + } + } + + // See if we found any cycles + if (MyDFS == Node2DFS[NodeIndex]) { + while (!SCCStack.empty() && Node2DFS[SCCStack.top()] >= MyDFS) { + unsigned CycleNodeIndex = SCCStack.top(); + Node *CycleNode = &GraphNodes[CycleNodeIndex]; + VSSCCRep[CycleNodeIndex] = NodeIndex; + // Unify the nodes + N->Direct &= CycleNode->Direct; + + *(N->PointsTo) |= CycleNode->PointsTo; + delete CycleNode->PointsTo; + CycleNode->PointsTo = NULL; + if (CycleNode->PredEdges) { + if (!N->PredEdges) + N->PredEdges = new SparseBitVector<>; + *(N->PredEdges) |= CycleNode->PredEdges; + delete CycleNode->PredEdges; + CycleNode->PredEdges = NULL; + } + if (CycleNode->ImplicitPredEdges) { + if (!N->ImplicitPredEdges) + N->ImplicitPredEdges = new SparseBitVector<>; + *(N->ImplicitPredEdges) |= CycleNode->ImplicitPredEdges; + delete CycleNode->ImplicitPredEdges; + CycleNode->ImplicitPredEdges = NULL; + } + SCCStack.pop(); + } + + Node2Deleted[NodeIndex] = true; + + // Set up number of incoming edges for other nodes + if (N->PredEdges) + for (SparseBitVector<>::iterator Iter = N->PredEdges->begin(); + Iter != N->PredEdges->end(); + ++Iter) + ++GraphNodes[VSSCCRep[*Iter]].NumInEdges; + } else { + SCCStack.push(NodeIndex); + } +} + +void Andersens::HUValNum(unsigned NodeIndex) { + Node *N = &GraphNodes[NodeIndex]; + Node2Visited[NodeIndex] = true; + + // Eliminate dereferences of non-pointers for those non-pointers we have + // already identified. These are ref nodes whose non-ref node: + // 1. Has already been visited determined to point to nothing (and thus, a + // dereference of it must point to nothing) + // 2. Any direct node with no predecessor edges in our graph and with no + // points-to set (since it can't point to anything either, being that it + // receives no points-to sets and has none). + if (NodeIndex >= FirstRefNode) { + unsigned j = VSSCCRep[FindNode(NodeIndex - FirstRefNode)]; + if ((Node2Visited[j] && !GraphNodes[j].PointerEquivLabel) + || (GraphNodes[j].Direct && !GraphNodes[j].PredEdges + && GraphNodes[j].PointsTo->empty())){ + return; + } + } + // Process all our explicit edges + if (N->PredEdges) + for (SparseBitVector<>::iterator Iter = N->PredEdges->begin(); + Iter != N->PredEdges->end(); + ++Iter) { + unsigned j = VSSCCRep[*Iter]; + if (!Node2Visited[j]) + HUValNum(j); + + // If this edge turned out to be the same as us, or got no pointer + // equivalence label (and thus points to nothing) , just decrement our + // incoming edges and continue. + if (j == NodeIndex || GraphNodes[j].PointerEquivLabel == 0) { + --GraphNodes[j].NumInEdges; + continue; + } + + *(N->PointsTo) |= GraphNodes[j].PointsTo; + + // If we didn't end up storing this in the hash, and we're done with all + // the edges, we don't need the points-to set anymore. + --GraphNodes[j].NumInEdges; + if (!GraphNodes[j].NumInEdges && !GraphNodes[j].StoredInHash) { + delete GraphNodes[j].PointsTo; + GraphNodes[j].PointsTo = NULL; + } + } + // If this isn't a direct node, generate a fresh variable. + if (!N->Direct) { + N->PointsTo->set(FirstRefNode + NodeIndex); + } + + // See If we have something equivalent to us, if not, generate a new + // equivalence class. + if (N->PointsTo->empty()) { + delete N->PointsTo; + N->PointsTo = NULL; + } else { + if (N->Direct) { + N->PointerEquivLabel = Set2PEClass[N->PointsTo]; + if (N->PointerEquivLabel == 0) { + unsigned EquivClass = PEClass++; + N->StoredInHash = true; + Set2PEClass[N->PointsTo] = EquivClass; + N->PointerEquivLabel = EquivClass; + } + } else { + N->PointerEquivLabel = PEClass++; + } + } +} + +/// Rewrite our list of constraints so that pointer equivalent nodes are +/// replaced by their the pointer equivalence class representative. +void Andersens::RewriteConstraints() { + std::vector NewConstraints; + DenseSet Seen; + + PEClass2Node.clear(); + PENLEClass2Node.clear(); + + // We may have from 1 to Graphnodes + 1 equivalence classes. + PEClass2Node.insert(PEClass2Node.begin(), GraphNodes.size() + 1, -1); + PENLEClass2Node.insert(PENLEClass2Node.begin(), GraphNodes.size() + 1, -1); + + // Rewrite constraints, ignoring non-pointer constraints, uniting equivalent + // nodes, and rewriting constraints to use the representative nodes. + for (unsigned i = 0, e = Constraints.size(); i != e; ++i) { + Constraint &C = Constraints[i]; + unsigned RHSNode = FindNode(C.Src); + unsigned LHSNode = FindNode(C.Dest); + unsigned RHSLabel = GraphNodes[VSSCCRep[RHSNode]].PointerEquivLabel; + unsigned LHSLabel = GraphNodes[VSSCCRep[LHSNode]].PointerEquivLabel; + + // First we try to eliminate constraints for things we can prove don't point + // to anything. + if (LHSLabel == 0) { + DEBUG(PrintNode(&GraphNodes[LHSNode])); + DOUT << " is a non-pointer, ignoring constraint.\n"; + continue; + } + if (RHSLabel == 0) { + DEBUG(PrintNode(&GraphNodes[RHSNode])); + DOUT << " is a non-pointer, ignoring constraint.\n"; + continue; + } + // This constraint may be useless, and it may become useless as we translate + // it. + if (C.Src == C.Dest && C.Type == Constraint::Copy) + continue; + + C.Src = FindEquivalentNode(RHSNode, RHSLabel); + C.Dest = FindEquivalentNode(FindNode(LHSNode), LHSLabel); + if ((C.Src == C.Dest && C.Type == Constraint::Copy) + || Seen.count(C)) + continue; + + Seen.insert(C); + NewConstraints.push_back(C); + } + Constraints.swap(NewConstraints); + PEClass2Node.clear(); +} + +/// See if we have a node that is pointer equivalent to the one being asked +/// about, and if so, unite them and return the equivalent node. Otherwise, +/// return the original node. +unsigned Andersens::FindEquivalentNode(unsigned NodeIndex, + unsigned NodeLabel) { + if (!GraphNodes[NodeIndex].AddressTaken) { + if (PEClass2Node[NodeLabel] != -1) { + // We found an existing node with the same pointer label, so unify them. + // We specifically request that Union-By-Rank not be used so that + // PEClass2Node[NodeLabel] U= NodeIndex and not the other way around. + return UniteNodes(PEClass2Node[NodeLabel], NodeIndex, false); + } else { + PEClass2Node[NodeLabel] = NodeIndex; + PENLEClass2Node[NodeLabel] = NodeIndex; + } + } else if (PENLEClass2Node[NodeLabel] == -1) { + PENLEClass2Node[NodeLabel] = NodeIndex; + } + + return NodeIndex; +} + +void Andersens::PrintLabels() const { + for (unsigned i = 0; i < GraphNodes.size(); ++i) { + if (i < FirstRefNode) { + PrintNode(&GraphNodes[i]); + } else if (i < FirstAdrNode) { + DOUT << "REF("; + PrintNode(&GraphNodes[i-FirstRefNode]); + DOUT <<")"; + } else { + DOUT << "ADR("; + PrintNode(&GraphNodes[i-FirstAdrNode]); + DOUT <<")"; + } + + DOUT << " has pointer label " << GraphNodes[i].PointerEquivLabel + << " and SCC rep " << VSSCCRep[i] + << " and is " << (GraphNodes[i].Direct ? "Direct" : "Not direct") + << "\n"; + } +} + +/// The technique used here is described in "The Ant and the +/// Grasshopper: Fast and Accurate Pointer Analysis for Millions of +/// Lines of Code. In Programming Language Design and Implementation +/// (PLDI), June 2007." It is known as the "HCD" (Hybrid Cycle +/// Detection) algorithm. It is called a hybrid because it performs an +/// offline analysis and uses its results during the solving (online) +/// phase. This is just the offline portion; the results of this +/// operation are stored in SDT and are later used in SolveContraints() +/// and UniteNodes(). +void Andersens::HCD() { + DOUT << "Starting HCD.\n"; + HCDSCCRep.resize(GraphNodes.size()); + + for (unsigned i = 0; i < GraphNodes.size(); ++i) { + GraphNodes[i].Edges = new SparseBitVector<>; + HCDSCCRep[i] = i; + } + + for (unsigned i = 0, e = Constraints.size(); i != e; ++i) { + Constraint &C = Constraints[i]; + assert (C.Src < GraphNodes.size() && C.Dest < GraphNodes.size()); + if (C.Type == Constraint::AddressOf) { + continue; + } else if (C.Type == Constraint::Load) { + if( C.Offset == 0 ) + GraphNodes[C.Dest].Edges->set(C.Src + FirstRefNode); + } else if (C.Type == Constraint::Store) { + if( C.Offset == 0 ) + GraphNodes[C.Dest + FirstRefNode].Edges->set(C.Src); + } else { + GraphNodes[C.Dest].Edges->set(C.Src); + } + } + + Node2DFS.insert(Node2DFS.begin(), GraphNodes.size(), 0); + Node2Deleted.insert(Node2Deleted.begin(), GraphNodes.size(), false); + Node2Visited.insert(Node2Visited.begin(), GraphNodes.size(), false); + SDT.insert(SDT.begin(), GraphNodes.size() / 2, -1); + + DFSNumber = 0; + for (unsigned i = 0; i < GraphNodes.size(); ++i) { + unsigned Node = HCDSCCRep[i]; + if (!Node2Deleted[Node]) + Search(Node); + } + + for (unsigned i = 0; i < GraphNodes.size(); ++i) + if (GraphNodes[i].Edges != NULL) { + delete GraphNodes[i].Edges; + GraphNodes[i].Edges = NULL; + } + + while( !SCCStack.empty() ) + SCCStack.pop(); + + Node2DFS.clear(); + Node2Visited.clear(); + Node2Deleted.clear(); + HCDSCCRep.clear(); + DOUT << "HCD complete.\n"; +} + +// Component of HCD: +// Use Nuutila's variant of Tarjan's algorithm to detect +// Strongly-Connected Components (SCCs). For non-trivial SCCs +// containing ref nodes, insert the appropriate information in SDT. +void Andersens::Search(unsigned Node) { + unsigned MyDFS = DFSNumber++; + + Node2Visited[Node] = true; + Node2DFS[Node] = MyDFS; + + for (SparseBitVector<>::iterator Iter = GraphNodes[Node].Edges->begin(), + End = GraphNodes[Node].Edges->end(); + Iter != End; + ++Iter) { + unsigned J = HCDSCCRep[*Iter]; + assert(GraphNodes[J].isRep() && "Debug check; must be representative"); + if (!Node2Deleted[J]) { + if (!Node2Visited[J]) + Search(J); + if (Node2DFS[Node] > Node2DFS[J]) + Node2DFS[Node] = Node2DFS[J]; + } + } + + if( MyDFS != Node2DFS[Node] ) { + SCCStack.push(Node); + return; + } + + // This node is the root of a SCC, so process it. + // + // If the SCC is "non-trivial" (not a singleton) and contains a reference + // node, we place this SCC into SDT. We unite the nodes in any case. + if (!SCCStack.empty() && Node2DFS[SCCStack.top()] >= MyDFS) { + SparseBitVector<> SCC; + + SCC.set(Node); + + bool Ref = (Node >= FirstRefNode); + + Node2Deleted[Node] = true; + + do { + unsigned P = SCCStack.top(); SCCStack.pop(); + Ref |= (P >= FirstRefNode); + SCC.set(P); + HCDSCCRep[P] = Node; + } while (!SCCStack.empty() && Node2DFS[SCCStack.top()] >= MyDFS); + + if (Ref) { + unsigned Rep = SCC.find_first(); + assert(Rep < FirstRefNode && "The SCC didn't have a non-Ref node!"); + + SparseBitVector<>::iterator i = SCC.begin(); + + // Skip over the non-ref nodes + while( *i < FirstRefNode ) + ++i; + + while( i != SCC.end() ) + SDT[ (*i++) - FirstRefNode ] = Rep; + } + } +} + + +/// Optimize the constraints by performing offline variable substitution and +/// other optimizations. +void Andersens::OptimizeConstraints() { + DOUT << "Beginning constraint optimization\n"; + + SDTActive = false; + + // Function related nodes need to stay in the same relative position and can't + // be location equivalent. + for (std::map::iterator Iter = MaxK.begin(); + Iter != MaxK.end(); + ++Iter) { + for (unsigned i = Iter->first; + i != Iter->first + Iter->second; + ++i) { + GraphNodes[i].AddressTaken = true; + GraphNodes[i].Direct = false; + } + } + + ClumpAddressTaken(); + FirstRefNode = GraphNodes.size(); + FirstAdrNode = FirstRefNode + GraphNodes.size(); + GraphNodes.insert(GraphNodes.end(), 2 * GraphNodes.size(), + Node(false)); + VSSCCRep.resize(GraphNodes.size()); + for (unsigned i = 0; i < GraphNodes.size(); ++i) { + VSSCCRep[i] = i; + } + HVN(); + for (unsigned i = 0; i < GraphNodes.size(); ++i) { + Node *N = &GraphNodes[i]; + delete N->PredEdges; + N->PredEdges = NULL; + delete N->ImplicitPredEdges; + N->ImplicitPredEdges = NULL; + } +#undef DEBUG_TYPE +#define DEBUG_TYPE "anders-aa-labels" + DEBUG(PrintLabels()); +#undef DEBUG_TYPE +#define DEBUG_TYPE "anders-aa" + RewriteConstraints(); + // Delete the adr nodes. + GraphNodes.resize(FirstRefNode * 2); + + // Now perform HU + for (unsigned i = 0; i < GraphNodes.size(); ++i) { + Node *N = &GraphNodes[i]; + if (FindNode(i) == i) { + N->PointsTo = new SparseBitVector<>; + N->PointedToBy = new SparseBitVector<>; + // Reset our labels + } + VSSCCRep[i] = i; + N->PointerEquivLabel = 0; + } + HU(); +#undef DEBUG_TYPE +#define DEBUG_TYPE "anders-aa-labels" + DEBUG(PrintLabels()); +#undef DEBUG_TYPE +#define DEBUG_TYPE "anders-aa" + RewriteConstraints(); + for (unsigned i = 0; i < GraphNodes.size(); ++i) { + if (FindNode(i) == i) { + Node *N = &GraphNodes[i]; + delete N->PointsTo; + N->PointsTo = NULL; + delete N->PredEdges; + N->PredEdges = NULL; + delete N->ImplicitPredEdges; + N->ImplicitPredEdges = NULL; + delete N->PointedToBy; + N->PointedToBy = NULL; + } + } + + // perform Hybrid Cycle Detection (HCD) + HCD(); + SDTActive = true; + + // No longer any need for the upper half of GraphNodes (for ref nodes). + GraphNodes.erase(GraphNodes.begin() + FirstRefNode, GraphNodes.end()); + + // HCD complete. + + DOUT << "Finished constraint optimization\n"; + FirstRefNode = 0; + FirstAdrNode = 0; +} + +/// Unite pointer but not location equivalent variables, now that the constraint +/// graph is built. +void Andersens::UnitePointerEquivalences() { + DOUT << "Uniting remaining pointer equivalences\n"; + for (unsigned i = 0; i < GraphNodes.size(); ++i) { + if (GraphNodes[i].AddressTaken && GraphNodes[i].isRep()) { + unsigned Label = GraphNodes[i].PointerEquivLabel; + + if (Label && PENLEClass2Node[Label] != -1) + UniteNodes(i, PENLEClass2Node[Label]); + } + } + DOUT << "Finished remaining pointer equivalences\n"; + PENLEClass2Node.clear(); +} + +/// Create the constraint graph used for solving points-to analysis. +/// +void Andersens::CreateConstraintGraph() { + for (unsigned i = 0, e = Constraints.size(); i != e; ++i) { + Constraint &C = Constraints[i]; + assert (C.Src < GraphNodes.size() && C.Dest < GraphNodes.size()); + if (C.Type == Constraint::AddressOf) + GraphNodes[C.Dest].PointsTo->set(C.Src); + else if (C.Type == Constraint::Load) + GraphNodes[C.Src].Constraints.push_back(C); + else if (C.Type == Constraint::Store) + GraphNodes[C.Dest].Constraints.push_back(C); + else if (C.Offset != 0) + GraphNodes[C.Src].Constraints.push_back(C); + else + GraphNodes[C.Src].Edges->set(C.Dest); + } +} + +// Perform DFS and cycle detection. +bool Andersens::QueryNode(unsigned Node) { + assert(GraphNodes[Node].isRep() && "Querying a non-rep node"); + unsigned OurDFS = ++DFSNumber; + SparseBitVector<> ToErase; + SparseBitVector<> NewEdges; + Tarjan2DFS[Node] = OurDFS; + + // Changed denotes a change from a recursive call that we will bubble up. + // Merged is set if we actually merge a node ourselves. + bool Changed = false, Merged = false; + + for (SparseBitVector<>::iterator bi = GraphNodes[Node].Edges->begin(); + bi != GraphNodes[Node].Edges->end(); + ++bi) { + unsigned RepNode = FindNode(*bi); + // If this edge points to a non-representative node but we are + // already planning to add an edge to its representative, we have no + // need for this edge anymore. + if (RepNode != *bi && NewEdges.test(RepNode)){ + ToErase.set(*bi); + continue; + } + + // Continue about our DFS. + if (!Tarjan2Deleted[RepNode]){ + if (Tarjan2DFS[RepNode] == 0) { + Changed |= QueryNode(RepNode); + // May have been changed by QueryNode + RepNode = FindNode(RepNode); + } + if (Tarjan2DFS[RepNode] < Tarjan2DFS[Node]) + Tarjan2DFS[Node] = Tarjan2DFS[RepNode]; + } + + // We may have just discovered that this node is part of a cycle, in + // which case we can also erase it. + if (RepNode != *bi) { + ToErase.set(*bi); + NewEdges.set(RepNode); + } + } + + GraphNodes[Node].Edges->intersectWithComplement(ToErase); + GraphNodes[Node].Edges |= NewEdges; + + // If this node is a root of a non-trivial SCC, place it on our + // worklist to be processed. + if (OurDFS == Tarjan2DFS[Node]) { + while (!SCCStack.empty() && Tarjan2DFS[SCCStack.top()] >= OurDFS) { + Node = UniteNodes(Node, SCCStack.top()); + + SCCStack.pop(); + Merged = true; + } + Tarjan2Deleted[Node] = true; + + if (Merged) + NextWL->insert(&GraphNodes[Node]); + } else { + SCCStack.push(Node); + } + + return(Changed | Merged); +} + +/// SolveConstraints - This stage iteratively processes the constraints list +/// propagating constraints (adding edges to the Nodes in the points-to graph) +/// until a fixed point is reached. +/// +/// We use a variant of the technique called "Lazy Cycle Detection", which is +/// described in "The Ant and the Grasshopper: Fast and Accurate Pointer +/// Analysis for Millions of Lines of Code. In Programming Language Design and +/// Implementation (PLDI), June 2007." +/// The paper describes performing cycle detection one node at a time, which can +/// be expensive if there are no cycles, but there are long chains of nodes that +/// it heuristically believes are cycles (because it will DFS from each node +/// without state from previous nodes). +/// Instead, we use the heuristic to build a worklist of nodes to check, then +/// cycle detect them all at the same time to do this more cheaply. This +/// catches cycles slightly later than the original technique did, but does it +/// make significantly cheaper. + +void Andersens::SolveConstraints() { + CurrWL = &w1; + NextWL = &w2; + + OptimizeConstraints(); +#undef DEBUG_TYPE +#define DEBUG_TYPE "anders-aa-constraints" + DEBUG(PrintConstraints()); +#undef DEBUG_TYPE +#define DEBUG_TYPE "anders-aa" + + for (unsigned i = 0; i < GraphNodes.size(); ++i) { + Node *N = &GraphNodes[i]; + N->PointsTo = new SparseBitVector<>; + N->OldPointsTo = new SparseBitVector<>; + N->Edges = new SparseBitVector<>; + } + CreateConstraintGraph(); + UnitePointerEquivalences(); + assert(SCCStack.empty() && "SCC Stack should be empty by now!"); + Node2DFS.clear(); + Node2Deleted.clear(); + Node2DFS.insert(Node2DFS.begin(), GraphNodes.size(), 0); + Node2Deleted.insert(Node2Deleted.begin(), GraphNodes.size(), false); + DFSNumber = 0; + DenseSet Seen; + DenseSet, PairKeyInfo> EdgesChecked; + + // Order graph and add initial nodes to work list. + for (unsigned i = 0; i < GraphNodes.size(); ++i) { + Node *INode = &GraphNodes[i]; + + // Add to work list if it's a representative and can contribute to the + // calculation right now. + if (INode->isRep() && !INode->PointsTo->empty() + && (!INode->Edges->empty() || !INode->Constraints.empty())) { + INode->Stamp(); + CurrWL->insert(INode); + } + } + std::queue TarjanWL; +#if !FULL_UNIVERSAL + // "Rep and special variables" - in order for HCD to maintain conservative + // results when !FULL_UNIVERSAL, we need to treat the special variables in + // the same way that the !FULL_UNIVERSAL tweak does throughout the rest of + // the analysis - it's ok to add edges from the special nodes, but never + // *to* the special nodes. + std::vector RSV; +#endif + while( !CurrWL->empty() ) { + DOUT << "Starting iteration #" << ++NumIters << "\n"; + + Node* CurrNode; + unsigned CurrNodeIndex; + + // Actual cycle checking code. We cycle check all of the lazy cycle + // candidates from the last iteration in one go. + if (!TarjanWL.empty()) { + DFSNumber = 0; + + Tarjan2DFS.clear(); + Tarjan2Deleted.clear(); + while (!TarjanWL.empty()) { + unsigned int ToTarjan = TarjanWL.front(); + TarjanWL.pop(); + if (!Tarjan2Deleted[ToTarjan] + && GraphNodes[ToTarjan].isRep() + && Tarjan2DFS[ToTarjan] == 0) + QueryNode(ToTarjan); + } + } + + // Add to work list if it's a representative and can contribute to the + // calculation right now. + while( (CurrNode = CurrWL->pop()) != NULL ) { + CurrNodeIndex = CurrNode - &GraphNodes[0]; + CurrNode->Stamp(); + + + // Figure out the changed points to bits + SparseBitVector<> CurrPointsTo; + CurrPointsTo.intersectWithComplement(CurrNode->PointsTo, + CurrNode->OldPointsTo); + if (CurrPointsTo.empty()) + continue; + + *(CurrNode->OldPointsTo) |= CurrPointsTo; + + // Check the offline-computed equivalencies from HCD. + bool SCC = false; + unsigned Rep; + + if (SDT[CurrNodeIndex] >= 0) { + SCC = true; + Rep = FindNode(SDT[CurrNodeIndex]); + +#if !FULL_UNIVERSAL + RSV.clear(); +#endif + for (SparseBitVector<>::iterator bi = CurrPointsTo.begin(); + bi != CurrPointsTo.end(); ++bi) { + unsigned Node = FindNode(*bi); +#if !FULL_UNIVERSAL + if (Node < NumberSpecialNodes) { + RSV.push_back(Node); + continue; + } +#endif + Rep = UniteNodes(Rep,Node); + } +#if !FULL_UNIVERSAL + RSV.push_back(Rep); +#endif + + NextWL->insert(&GraphNodes[Rep]); + + if ( ! CurrNode->isRep() ) + continue; + } + + Seen.clear(); + + /* Now process the constraints for this node. */ + for (std::list::iterator li = CurrNode->Constraints.begin(); + li != CurrNode->Constraints.end(); ) { + li->Src = FindNode(li->Src); + li->Dest = FindNode(li->Dest); + + // Delete redundant constraints + if( Seen.count(*li) ) { + std::list::iterator lk = li; li++; + + CurrNode->Constraints.erase(lk); + ++NumErased; + continue; + } + Seen.insert(*li); + + // Src and Dest will be the vars we are going to process. + // This may look a bit ugly, but what it does is allow us to process + // both store and load constraints with the same code. + // Load constraints say that every member of our RHS solution has K + // added to it, and that variable gets an edge to LHS. We also union + // RHS+K's solution into the LHS solution. + // Store constraints say that every member of our LHS solution has K + // added to it, and that variable gets an edge from RHS. We also union + // RHS's solution into the LHS+K solution. + unsigned *Src; + unsigned *Dest; + unsigned K = li->Offset; + unsigned CurrMember; + if (li->Type == Constraint::Load) { + Src = &CurrMember; + Dest = &li->Dest; + } else if (li->Type == Constraint::Store) { + Src = &li->Src; + Dest = &CurrMember; + } else { + // TODO Handle offseted copy constraint + li++; + continue; + } + + // See if we can use Hybrid Cycle Detection (that is, check + // if it was a statically detected offline equivalence that + // involves pointers; if so, remove the redundant constraints). + if( SCC && K == 0 ) { +#if FULL_UNIVERSAL + CurrMember = Rep; + + if (GraphNodes[*Src].Edges->test_and_set(*Dest)) + if (GraphNodes[*Dest].PointsTo |= *(GraphNodes[*Src].PointsTo)) + NextWL->insert(&GraphNodes[*Dest]); +#else + for (unsigned i=0; i < RSV.size(); ++i) { + CurrMember = RSV[i]; + + if (*Dest < NumberSpecialNodes) + continue; + if (GraphNodes[*Src].Edges->test_and_set(*Dest)) + if (GraphNodes[*Dest].PointsTo |= *(GraphNodes[*Src].PointsTo)) + NextWL->insert(&GraphNodes[*Dest]); + } +#endif + // since all future elements of the points-to set will be + // equivalent to the current ones, the complex constraints + // become redundant. + // + std::list::iterator lk = li; li++; +#if !FULL_UNIVERSAL + // In this case, we can still erase the constraints when the + // elements of the points-to sets are referenced by *Dest, + // but not when they are referenced by *Src (i.e. for a Load + // constraint). This is because if another special variable is + // put into the points-to set later, we still need to add the + // new edge from that special variable. + if( lk->Type != Constraint::Load) +#endif + GraphNodes[CurrNodeIndex].Constraints.erase(lk); + } else { + const SparseBitVector<> &Solution = CurrPointsTo; + + for (SparseBitVector<>::iterator bi = Solution.begin(); + bi != Solution.end(); + ++bi) { + CurrMember = *bi; + + // Need to increment the member by K since that is where we are + // supposed to copy to/from. Note that in positive weight cycles, + // which occur in address taking of fields, K can go past + // MaxK[CurrMember] elements, even though that is all it could point + // to. + if (K > 0 && K > MaxK[CurrMember]) + continue; + else + CurrMember = FindNode(CurrMember + K); + + // Add an edge to the graph, so we can just do regular + // bitmap ior next time. It may also let us notice a cycle. +#if !FULL_UNIVERSAL + if (*Dest < NumberSpecialNodes) + continue; +#endif + if (GraphNodes[*Src].Edges->test_and_set(*Dest)) + if (GraphNodes[*Dest].PointsTo |= *(GraphNodes[*Src].PointsTo)) + NextWL->insert(&GraphNodes[*Dest]); + + } + li++; + } + } + SparseBitVector<> NewEdges; + SparseBitVector<> ToErase; + + // Now all we have left to do is propagate points-to info along the + // edges, erasing the redundant edges. + for (SparseBitVector<>::iterator bi = CurrNode->Edges->begin(); + bi != CurrNode->Edges->end(); + ++bi) { + + unsigned DestVar = *bi; + unsigned Rep = FindNode(DestVar); + + // If we ended up with this node as our destination, or we've already + // got an edge for the representative, delete the current edge. + if (Rep == CurrNodeIndex || + (Rep != DestVar && NewEdges.test(Rep))) { + ToErase.set(DestVar); + continue; + } + + std::pair edge(CurrNodeIndex,Rep); + + // This is where we do lazy cycle detection. + // If this is a cycle candidate (equal points-to sets and this + // particular edge has not been cycle-checked previously), add to the + // list to check for cycles on the next iteration. + if (!EdgesChecked.count(edge) && + *(GraphNodes[Rep].PointsTo) == *(CurrNode->PointsTo)) { + EdgesChecked.insert(edge); + TarjanWL.push(Rep); + } + // Union the points-to sets into the dest +#if !FULL_UNIVERSAL + if (Rep >= NumberSpecialNodes) +#endif + if (GraphNodes[Rep].PointsTo |= CurrPointsTo) { + NextWL->insert(&GraphNodes[Rep]); + } + // If this edge's destination was collapsed, rewrite the edge. + if (Rep != DestVar) { + ToErase.set(DestVar); + NewEdges.set(Rep); + } + } + CurrNode->Edges->intersectWithComplement(ToErase); + CurrNode->Edges |= NewEdges; + } + + // Switch to other work list. + WorkList* t = CurrWL; CurrWL = NextWL; NextWL = t; + } + + + Node2DFS.clear(); + Node2Deleted.clear(); + for (unsigned i = 0; i < GraphNodes.size(); ++i) { + Node *N = &GraphNodes[i]; + delete N->OldPointsTo; + delete N->Edges; + } + SDTActive = false; + SDT.clear(); +} + +//===----------------------------------------------------------------------===// +// Union-Find +//===----------------------------------------------------------------------===// + +// Unite nodes First and Second, returning the one which is now the +// representative node. First and Second are indexes into GraphNodes +unsigned Andersens::UniteNodes(unsigned First, unsigned Second, + bool UnionByRank) { + assert (First < GraphNodes.size() && Second < GraphNodes.size() && + "Attempting to merge nodes that don't exist"); + + Node *FirstNode = &GraphNodes[First]; + Node *SecondNode = &GraphNodes[Second]; + + assert (SecondNode->isRep() && FirstNode->isRep() && + "Trying to unite two non-representative nodes!"); + if (First == Second) + return First; + + if (UnionByRank) { + int RankFirst = (int) FirstNode ->NodeRep; + int RankSecond = (int) SecondNode->NodeRep; + + // Rank starts at -1 and gets decremented as it increases. + // Translation: higher rank, lower NodeRep value, which is always negative. + if (RankFirst > RankSecond) { + unsigned t = First; First = Second; Second = t; + Node* tp = FirstNode; FirstNode = SecondNode; SecondNode = tp; + } else if (RankFirst == RankSecond) { + FirstNode->NodeRep = (unsigned) (RankFirst - 1); + } + } + + SecondNode->NodeRep = First; +#if !FULL_UNIVERSAL + if (First >= NumberSpecialNodes) +#endif + if (FirstNode->PointsTo && SecondNode->PointsTo) + FirstNode->PointsTo |= *(SecondNode->PointsTo); + if (FirstNode->Edges && SecondNode->Edges) + FirstNode->Edges |= *(SecondNode->Edges); + if (!SecondNode->Constraints.empty()) + FirstNode->Constraints.splice(FirstNode->Constraints.begin(), + SecondNode->Constraints); + if (FirstNode->OldPointsTo) { + delete FirstNode->OldPointsTo; + FirstNode->OldPointsTo = new SparseBitVector<>; + } + + // Destroy interesting parts of the merged-from node. + delete SecondNode->OldPointsTo; + delete SecondNode->Edges; + delete SecondNode->PointsTo; + SecondNode->Edges = NULL; + SecondNode->PointsTo = NULL; + SecondNode->OldPointsTo = NULL; + + NumUnified++; + DOUT << "Unified Node "; + DEBUG(PrintNode(FirstNode)); + DOUT << " and Node "; + DEBUG(PrintNode(SecondNode)); + DOUT << "\n"; + + if (SDTActive) + if (SDT[Second] >= 0) { + if (SDT[First] < 0) + SDT[First] = SDT[Second]; + else { + UniteNodes( FindNode(SDT[First]), FindNode(SDT[Second]) ); + First = FindNode(First); + } + } + + return First; +} + +// Find the index into GraphNodes of the node representing Node, performing +// path compression along the way +unsigned Andersens::FindNode(unsigned NodeIndex) { + assert (NodeIndex < GraphNodes.size() + && "Attempting to find a node that can't exist"); + Node *N = &GraphNodes[NodeIndex]; + if (N->isRep()) + return NodeIndex; + else + return (N->NodeRep = FindNode(N->NodeRep)); +} + +// Find the index into GraphNodes of the node representing Node, +// don't perform path compression along the way (for Print) +unsigned Andersens::FindNode(unsigned NodeIndex) const { + assert (NodeIndex < GraphNodes.size() + && "Attempting to find a node that can't exist"); + const Node *N = &GraphNodes[NodeIndex]; + if (N->isRep()) + return NodeIndex; + else + return FindNode(N->NodeRep); +} + +//===----------------------------------------------------------------------===// +// Debugging Output +//===----------------------------------------------------------------------===// + +void Andersens::PrintNode(const Node *N) const { + if (N == &GraphNodes[UniversalSet]) { + cerr << ""; + return; + } else if (N == &GraphNodes[NullPtr]) { + cerr << ""; + return; + } else if (N == &GraphNodes[NullObject]) { + cerr << ""; + return; + } + if (!N->getValue()) { + cerr << "artificial" << (intptr_t) N; + return; + } + + assert(N->getValue() != 0 && "Never set node label!"); + Value *V = N->getValue(); + if (Function *F = dyn_cast(V)) { + if (isa(F->getFunctionType()->getReturnType()) && + N == &GraphNodes[getReturnNode(F)]) { + cerr << F->getName() << ":retval"; + return; + } else if (F->getFunctionType()->isVarArg() && + N == &GraphNodes[getVarargNode(F)]) { + cerr << F->getName() << ":vararg"; + return; + } + } + + if (Instruction *I = dyn_cast(V)) + cerr << I->getParent()->getParent()->getName() << ":"; + else if (Argument *Arg = dyn_cast(V)) + cerr << Arg->getParent()->getName() << ":"; + + if (V->hasName()) + cerr << V->getName(); + else + cerr << "(unnamed)"; + + if (isa(V) || isa(V)) + if (N == &GraphNodes[getObject(V)]) + cerr << ""; +} +void Andersens::PrintConstraint(const Constraint &C) const { + if (C.Type == Constraint::Store) { + cerr << "*"; + if (C.Offset != 0) + cerr << "("; + } + PrintNode(&GraphNodes[C.Dest]); + if (C.Type == Constraint::Store && C.Offset != 0) + cerr << " + " << C.Offset << ")"; + cerr << " = "; + if (C.Type == Constraint::Load) { + cerr << "*"; + if (C.Offset != 0) + cerr << "("; + } + else if (C.Type == Constraint::AddressOf) + cerr << "&"; + PrintNode(&GraphNodes[C.Src]); + if (C.Offset != 0 && C.Type != Constraint::Store) + cerr << " + " << C.Offset; + if (C.Type == Constraint::Load && C.Offset != 0) + cerr << ")"; + cerr << "\n"; +} + +void Andersens::PrintConstraints() const { + cerr << "Constraints:\n"; + + for (unsigned i = 0, e = Constraints.size(); i != e; ++i) + PrintConstraint(Constraints[i]); +} + +void Andersens::PrintPointsToGraph() const { + cerr << "Points-to graph:\n"; + for (unsigned i = 0, e = GraphNodes.size(); i != e; ++i) { + const Node *N = &GraphNodes[i]; + if (FindNode(i) != i) { + PrintNode(N); + cerr << "\t--> same as "; + PrintNode(&GraphNodes[FindNode(i)]); + cerr << "\n"; + } else { + cerr << "[" << (N->PointsTo->count()) << "] "; + PrintNode(N); + cerr << "\t--> "; + + bool first = true; + for (SparseBitVector<>::iterator bi = N->PointsTo->begin(); + bi != N->PointsTo->end(); + ++bi) { + if (!first) + cerr << ", "; + PrintNode(&GraphNodes[*bi]); + first = false; + } + cerr << "\n"; + } + } +} diff --git a/lib/Analysis/IPA/CMakeLists.txt b/lib/Analysis/IPA/CMakeLists.txt new file mode 100644 index 000000000000..1ebb0bea36bc --- /dev/null +++ b/lib/Analysis/IPA/CMakeLists.txt @@ -0,0 +1,7 @@ +add_llvm_library(LLVMipa + Andersens.cpp + CallGraph.cpp + CallGraphSCCPass.cpp + FindUsedTypes.cpp + GlobalsModRef.cpp + ) diff --git a/lib/Analysis/IPA/CallGraph.cpp b/lib/Analysis/IPA/CallGraph.cpp new file mode 100644 index 000000000000..6dabcdb94bf1 --- /dev/null +++ b/lib/Analysis/IPA/CallGraph.cpp @@ -0,0 +1,314 @@ +//===- CallGraph.cpp - Build a Module's call graph ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the CallGraph class and provides the BasicCallGraph +// default implementation. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Module.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Streams.h" +#include +using namespace llvm; + +namespace { + +//===----------------------------------------------------------------------===// +// BasicCallGraph class definition +// +class VISIBILITY_HIDDEN BasicCallGraph : public CallGraph, public ModulePass { + // Root is root of the call graph, or the external node if a 'main' function + // couldn't be found. + // + CallGraphNode *Root; + + // ExternalCallingNode - This node has edges to all external functions and + // those internal functions that have their address taken. + CallGraphNode *ExternalCallingNode; + + // CallsExternalNode - This node has edges to it from all functions making + // indirect calls or calling an external function. + CallGraphNode *CallsExternalNode; + +public: + static char ID; // Class identification, replacement for typeinfo + BasicCallGraph() : ModulePass(&ID), Root(0), + ExternalCallingNode(0), CallsExternalNode(0) {} + + // runOnModule - Compute the call graph for the specified module. + virtual bool runOnModule(Module &M) { + CallGraph::initialize(M); + + ExternalCallingNode = getOrInsertFunction(0); + CallsExternalNode = new CallGraphNode(0); + Root = 0; + + // Add every function to the call graph... + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) + addToCallGraph(I); + + // If we didn't find a main function, use the external call graph node + if (Root == 0) Root = ExternalCallingNode; + + return false; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + } + + void print(std::ostream *o, const Module *M) const { + if (o) print(*o, M); + } + + virtual void print(std::ostream &o, const Module *M) const { + o << "CallGraph Root is: "; + if (Function *F = getRoot()->getFunction()) + o << F->getName() << "\n"; + else + o << "<>\n"; + + CallGraph::print(o, M); + } + + virtual void releaseMemory() { + destroy(); + } + + /// dump - Print out this call graph. + /// + inline void dump() const { + print(cerr, Mod); + } + + CallGraphNode* getExternalCallingNode() const { return ExternalCallingNode; } + CallGraphNode* getCallsExternalNode() const { return CallsExternalNode; } + + // getRoot - Return the root of the call graph, which is either main, or if + // main cannot be found, the external node. + // + CallGraphNode *getRoot() { return Root; } + const CallGraphNode *getRoot() const { return Root; } + +private: + //===--------------------------------------------------------------------- + // Implementation of CallGraph construction + // + + // addToCallGraph - Add a function to the call graph, and link the node to all + // of the functions that it calls. + // + void addToCallGraph(Function *F) { + CallGraphNode *Node = getOrInsertFunction(F); + + // If this function has external linkage, anything could call it. + if (!F->hasLocalLinkage()) { + ExternalCallingNode->addCalledFunction(CallSite(), Node); + + // Found the entry point? + if (F->getName() == "main") { + if (Root) // Found multiple external mains? Don't pick one. + Root = ExternalCallingNode; + else + Root = Node; // Found a main, keep track of it! + } + } + + // Loop over all of the users of the function, looking for non-call uses. + for (Value::use_iterator I = F->use_begin(), E = F->use_end(); I != E; ++I) + if ((!isa(I) && !isa(I)) + || !CallSite(cast(I)).isCallee(I)) { + // Not a call, or being used as a parameter rather than as the callee. + ExternalCallingNode->addCalledFunction(CallSite(), Node); + break; + } + + // If this function is not defined in this translation unit, it could call + // anything. + if (F->isDeclaration() && !F->isIntrinsic()) + Node->addCalledFunction(CallSite(), CallsExternalNode); + + // Look for calls by this function. + for (Function::iterator BB = F->begin(), BBE = F->end(); BB != BBE; ++BB) + for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); + II != IE; ++II) { + CallSite CS = CallSite::get(II); + if (CS.getInstruction() && !isa(II)) { + const Function *Callee = CS.getCalledFunction(); + if (Callee) + Node->addCalledFunction(CS, getOrInsertFunction(Callee)); + else + Node->addCalledFunction(CS, CallsExternalNode); + } + } + } + + // + // destroy - Release memory for the call graph + virtual void destroy() { + /// CallsExternalNode is not in the function map, delete it explicitly. + delete CallsExternalNode; + CallsExternalNode = 0; + CallGraph::destroy(); + } +}; + +} //End anonymous namespace + +static RegisterAnalysisGroup X("Call Graph"); +static RegisterPass +Y("basiccg", "Basic CallGraph Construction", false, true); +static RegisterAnalysisGroup Z(Y); + +char CallGraph::ID = 0; +char BasicCallGraph::ID = 0; + +void CallGraph::initialize(Module &M) { + Mod = &M; +} + +void CallGraph::destroy() { + if (!FunctionMap.empty()) { + for (FunctionMapTy::iterator I = FunctionMap.begin(), E = FunctionMap.end(); + I != E; ++I) + delete I->second; + FunctionMap.clear(); + } +} + +void CallGraph::print(std::ostream &OS, const Module *M) const { + for (CallGraph::const_iterator I = begin(), E = end(); I != E; ++I) + I->second->print(OS); +} + +void CallGraph::dump() const { + print(cerr, 0); +} + +//===----------------------------------------------------------------------===// +// Implementations of public modification methods +// + +// removeFunctionFromModule - Unlink the function from this module, returning +// it. Because this removes the function from the module, the call graph node +// is destroyed. This is only valid if the function does not call any other +// functions (ie, there are no edges in it's CGN). The easiest way to do this +// is to dropAllReferences before calling this. +// +Function *CallGraph::removeFunctionFromModule(CallGraphNode *CGN) { + assert(CGN->CalledFunctions.empty() && "Cannot remove function from call " + "graph if it references other functions!"); + Function *F = CGN->getFunction(); // Get the function for the call graph node + delete CGN; // Delete the call graph node for this func + FunctionMap.erase(F); // Remove the call graph node from the map + + Mod->getFunctionList().remove(F); + return F; +} + +// changeFunction - This method changes the function associated with this +// CallGraphNode, for use by transformations that need to change the prototype +// of a Function (thus they must create a new Function and move the old code +// over). +void CallGraph::changeFunction(Function *OldF, Function *NewF) { + iterator I = FunctionMap.find(OldF); + CallGraphNode *&New = FunctionMap[NewF]; + assert(I != FunctionMap.end() && I->second && !New && + "OldF didn't exist in CG or NewF already does!"); + New = I->second; + New->F = NewF; + FunctionMap.erase(I); +} + +// getOrInsertFunction - This method is identical to calling operator[], but +// it will insert a new CallGraphNode for the specified function if one does +// not already exist. +CallGraphNode *CallGraph::getOrInsertFunction(const Function *F) { + CallGraphNode *&CGN = FunctionMap[F]; + if (CGN) return CGN; + + assert((!F || F->getParent() == Mod) && "Function not in current module!"); + return CGN = new CallGraphNode(const_cast(F)); +} + +void CallGraphNode::print(std::ostream &OS) const { + if (Function *F = getFunction()) + OS << "Call graph node for function: '" << F->getName() <<"'\n"; + else + OS << "Call graph node <>:\n"; + + for (const_iterator I = begin(), E = end(); I != E; ++I) + if (Function *FI = I->second->getFunction()) + OS << " Calls function '" << FI->getName() <<"'\n"; + else + OS << " Calls external node\n"; + OS << "\n"; +} + +void CallGraphNode::dump() const { print(cerr); } + +/// removeCallEdgeFor - This method removes the edge in the node for the +/// specified call site. Note that this method takes linear time, so it +/// should be used sparingly. +void CallGraphNode::removeCallEdgeFor(CallSite CS) { + for (CalledFunctionsVector::iterator I = CalledFunctions.begin(); ; ++I) { + assert(I != CalledFunctions.end() && "Cannot find callsite to remove!"); + if (I->first == CS) { + CalledFunctions.erase(I); + return; + } + } +} + + +// removeAnyCallEdgeTo - This method removes any call edges from this node to +// the specified callee function. This takes more time to execute than +// removeCallEdgeTo, so it should not be used unless necessary. +void CallGraphNode::removeAnyCallEdgeTo(CallGraphNode *Callee) { + for (unsigned i = 0, e = CalledFunctions.size(); i != e; ++i) + if (CalledFunctions[i].second == Callee) { + CalledFunctions[i] = CalledFunctions.back(); + CalledFunctions.pop_back(); + --i; --e; + } +} + +/// removeOneAbstractEdgeTo - Remove one edge associated with a null callsite +/// from this node to the specified callee function. +void CallGraphNode::removeOneAbstractEdgeTo(CallGraphNode *Callee) { + for (CalledFunctionsVector::iterator I = CalledFunctions.begin(); ; ++I) { + assert(I != CalledFunctions.end() && "Cannot find callee to remove!"); + CallRecord &CR = *I; + if (CR.second == Callee && !CR.first.getInstruction()) { + CalledFunctions.erase(I); + return; + } + } +} + +/// replaceCallSite - Make the edge in the node for Old CallSite be for +/// New CallSite instead. Note that this method takes linear time, so it +/// should be used sparingly. +void CallGraphNode::replaceCallSite(CallSite Old, CallSite New) { + for (CalledFunctionsVector::iterator I = CalledFunctions.begin(); ; ++I) { + assert(I != CalledFunctions.end() && "Cannot find callsite to replace!"); + if (I->first == Old) { + I->first = New; + return; + } + } +} + +// Enuse that users of CallGraph.h also link with this file +DEFINING_FILE_FOR(CallGraph) diff --git a/lib/Analysis/IPA/CallGraphSCCPass.cpp b/lib/Analysis/IPA/CallGraphSCCPass.cpp new file mode 100644 index 000000000000..3880d0a10bb6 --- /dev/null +++ b/lib/Analysis/IPA/CallGraphSCCPass.cpp @@ -0,0 +1,207 @@ +//===- CallGraphSCCPass.cpp - Pass that operates BU on call graph ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the CallGraphSCCPass class, which is used for passes +// which are implemented as bottom-up traversals on the call graph. Because +// there may be cycles in the call graph, passes of this type operate on the +// call-graph in SCC order: that is, they process function bottom-up, except for +// recursive functions, which they process all at once. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CallGraphSCCPass.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/ADT/SCCIterator.h" +#include "llvm/PassManagers.h" +#include "llvm/Function.h" +using namespace llvm; + +//===----------------------------------------------------------------------===// +// CGPassManager +// +/// CGPassManager manages FPPassManagers and CalLGraphSCCPasses. + +namespace { + +class CGPassManager : public ModulePass, public PMDataManager { + +public: + static char ID; + explicit CGPassManager(int Depth) + : ModulePass(&ID), PMDataManager(Depth) { } + + /// run - Execute all of the passes scheduled for execution. Keep track of + /// whether any of the passes modifies the module, and if so, return true. + bool runOnModule(Module &M); + + bool doInitialization(CallGraph &CG); + bool doFinalization(CallGraph &CG); + + /// Pass Manager itself does not invalidate any analysis info. + void getAnalysisUsage(AnalysisUsage &Info) const { + // CGPassManager walks SCC and it needs CallGraph. + Info.addRequired(); + Info.setPreservesAll(); + } + + virtual const char *getPassName() const { + return "CallGraph Pass Manager"; + } + + // Print passes managed by this manager + void dumpPassStructure(unsigned Offset) { + llvm::cerr << std::string(Offset*2, ' ') << "Call Graph SCC Pass Manager\n"; + for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { + Pass *P = getContainedPass(Index); + P->dumpPassStructure(Offset + 1); + dumpLastUses(P, Offset+1); + } + } + + Pass *getContainedPass(unsigned N) { + assert ( N < PassVector.size() && "Pass number out of range!"); + Pass *FP = static_cast(PassVector[N]); + return FP; + } + + virtual PassManagerType getPassManagerType() const { + return PMT_CallGraphPassManager; + } +}; + +} + +char CGPassManager::ID = 0; +/// run - Execute all of the passes scheduled for execution. Keep track of +/// whether any of the passes modifies the module, and if so, return true. +bool CGPassManager::runOnModule(Module &M) { + CallGraph &CG = getAnalysis(); + bool Changed = doInitialization(CG); + + // Walk SCC + for (scc_iterator I = scc_begin(&CG), E = scc_end(&CG); + I != E; ++I) { + + // Run all passes on current SCC + for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { + Pass *P = getContainedPass(Index); + + dumpPassInfo(P, EXECUTION_MSG, ON_CG_MSG, ""); + dumpRequiredSet(P); + + initializeAnalysisImpl(P); + + StartPassTimer(P); + if (CallGraphSCCPass *CGSP = dynamic_cast(P)) + Changed |= CGSP->runOnSCC(*I); // TODO : What if CG is changed ? + else { + FPPassManager *FPP = dynamic_cast(P); + assert (FPP && "Invalid CGPassManager member"); + + // Run pass P on all functions current SCC + std::vector &SCC = *I; + for (unsigned i = 0, e = SCC.size(); i != e; ++i) { + Function *F = SCC[i]->getFunction(); + if (F) { + dumpPassInfo(P, EXECUTION_MSG, ON_FUNCTION_MSG, F->getNameStart()); + Changed |= FPP->runOnFunction(*F); + } + } + } + StopPassTimer(P); + + if (Changed) + dumpPassInfo(P, MODIFICATION_MSG, ON_CG_MSG, ""); + dumpPreservedSet(P); + + verifyPreservedAnalysis(P); + removeNotPreservedAnalysis(P); + recordAvailableAnalysis(P); + removeDeadPasses(P, "", ON_CG_MSG); + } + } + Changed |= doFinalization(CG); + return Changed; +} + +/// Initialize CG +bool CGPassManager::doInitialization(CallGraph &CG) { + bool Changed = false; + for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { + Pass *P = getContainedPass(Index); + if (CallGraphSCCPass *CGSP = dynamic_cast(P)) { + Changed |= CGSP->doInitialization(CG); + } else { + FPPassManager *FP = dynamic_cast(P); + assert (FP && "Invalid CGPassManager member"); + Changed |= FP->doInitialization(CG.getModule()); + } + } + return Changed; +} + +/// Finalize CG +bool CGPassManager::doFinalization(CallGraph &CG) { + bool Changed = false; + for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { + Pass *P = getContainedPass(Index); + if (CallGraphSCCPass *CGSP = dynamic_cast(P)) { + Changed |= CGSP->doFinalization(CG); + } else { + FPPassManager *FP = dynamic_cast(P); + assert (FP && "Invalid CGPassManager member"); + Changed |= FP->doFinalization(CG.getModule()); + } + } + return Changed; +} + +/// Assign pass manager to manage this pass. +void CallGraphSCCPass::assignPassManager(PMStack &PMS, + PassManagerType PreferredType) { + // Find CGPassManager + while (!PMS.empty() && + PMS.top()->getPassManagerType() > PMT_CallGraphPassManager) + PMS.pop(); + + assert (!PMS.empty() && "Unable to handle Call Graph Pass"); + CGPassManager *CGP = dynamic_cast(PMS.top()); + + // Create new Call Graph SCC Pass Manager if it does not exist. + if (!CGP) { + + assert (!PMS.empty() && "Unable to create Call Graph Pass Manager"); + PMDataManager *PMD = PMS.top(); + + // [1] Create new Call Graph Pass Manager + CGP = new CGPassManager(PMD->getDepth() + 1); + + // [2] Set up new manager's top level manager + PMTopLevelManager *TPM = PMD->getTopLevelManager(); + TPM->addIndirectPassManager(CGP); + + // [3] Assign manager to manage this new manager. This may create + // and push new managers into PMS + Pass *P = dynamic_cast(CGP); + TPM->schedulePass(P); + + // [4] Push new manager into PMS + PMS.push(CGP); + } + + CGP->add(this); +} + +/// getAnalysisUsage - For this class, we declare that we require and preserve +/// the call graph. If the derived class implements this method, it should +/// always explicitly call the implementation here. +void CallGraphSCCPass::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + AU.addPreserved(); +} diff --git a/lib/Analysis/IPA/FindUsedTypes.cpp b/lib/Analysis/IPA/FindUsedTypes.cpp new file mode 100644 index 000000000000..920ee374555f --- /dev/null +++ b/lib/Analysis/IPA/FindUsedTypes.cpp @@ -0,0 +1,104 @@ +//===- FindUsedTypes.cpp - Find all Types used by a module ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass is used to seek out all of the types in use by the program. Note +// that this analysis explicitly does not include types only used by the symbol +// table. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/FindUsedTypes.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Module.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/Support/InstIterator.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +char FindUsedTypes::ID = 0; +static RegisterPass +X("print-used-types", "Find Used Types", false, true); + +// IncorporateType - Incorporate one type and all of its subtypes into the +// collection of used types. +// +void FindUsedTypes::IncorporateType(const Type *Ty) { + // If ty doesn't already exist in the used types map, add it now, otherwise + // return. + if (!UsedTypes.insert(Ty).second) return; // Already contain Ty. + + // Make sure to add any types this type references now. + // + for (Type::subtype_iterator I = Ty->subtype_begin(), E = Ty->subtype_end(); + I != E; ++I) + IncorporateType(*I); +} + +void FindUsedTypes::IncorporateValue(const Value *V) { + IncorporateType(V->getType()); + + // If this is a constant, it could be using other types... + if (const Constant *C = dyn_cast(V)) { + if (!isa(C)) + for (User::const_op_iterator OI = C->op_begin(), OE = C->op_end(); + OI != OE; ++OI) + IncorporateValue(*OI); + } +} + + +// run - This incorporates all types used by the specified module +// +bool FindUsedTypes::runOnModule(Module &m) { + UsedTypes.clear(); // reset if run multiple times... + + // Loop over global variables, incorporating their types + for (Module::const_global_iterator I = m.global_begin(), E = m.global_end(); + I != E; ++I) { + IncorporateType(I->getType()); + if (I->hasInitializer()) + IncorporateValue(I->getInitializer()); + } + + for (Module::iterator MI = m.begin(), ME = m.end(); MI != ME; ++MI) { + IncorporateType(MI->getType()); + const Function &F = *MI; + + // Loop over all of the instructions in the function, adding their return + // type as well as the types of their operands. + // + for (const_inst_iterator II = inst_begin(F), IE = inst_end(F); + II != IE; ++II) { + const Instruction &I = *II; + + IncorporateType(I.getType()); // Incorporate the type of the instruction + for (User::const_op_iterator OI = I.op_begin(), OE = I.op_end(); + OI != OE; ++OI) + IncorporateValue(*OI); // Insert inst operand types as well + } + } + + return false; +} + +// Print the types found in the module. If the optional Module parameter is +// passed in, then the types are printed symbolically if possible, using the +// symbol table from the module. +// +void FindUsedTypes::print(std::ostream &OS, const Module *M) const { + raw_os_ostream RO(OS); + RO << "Types in use by this module:\n"; + for (std::set::const_iterator I = UsedTypes.begin(), + E = UsedTypes.end(); I != E; ++I) { + RO << " "; + WriteTypeSymbolic(RO, *I, M); + RO << '\n'; + } +} diff --git a/lib/Analysis/IPA/GlobalsModRef.cpp b/lib/Analysis/IPA/GlobalsModRef.cpp new file mode 100644 index 000000000000..2e9884aa01b4 --- /dev/null +++ b/lib/Analysis/IPA/GlobalsModRef.cpp @@ -0,0 +1,567 @@ +//===- GlobalsModRef.cpp - Simple Mod/Ref Analysis for Globals ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This simple pass provides alias and mod/ref information for global values +// that do not have their address taken, and keeps track of whether functions +// read or write memory (are "pure"). For this simple (but very common) case, +// we can provide pretty accurate and useful information. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "globalsmodref-aa" +#include "llvm/Analysis/Passes.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/Instructions.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/InstIterator.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/SCCIterator.h" +#include +using namespace llvm; + +STATISTIC(NumNonAddrTakenGlobalVars, + "Number of global vars without address taken"); +STATISTIC(NumNonAddrTakenFunctions,"Number of functions without address taken"); +STATISTIC(NumNoMemFunctions, "Number of functions that do not access memory"); +STATISTIC(NumReadMemFunctions, "Number of functions that only read memory"); +STATISTIC(NumIndirectGlobalVars, "Number of indirect global objects"); + +namespace { + /// FunctionRecord - One instance of this structure is stored for every + /// function in the program. Later, the entries for these functions are + /// removed if the function is found to call an external function (in which + /// case we know nothing about it. + struct VISIBILITY_HIDDEN FunctionRecord { + /// GlobalInfo - Maintain mod/ref info for all of the globals without + /// addresses taken that are read or written (transitively) by this + /// function. + std::map GlobalInfo; + + /// MayReadAnyGlobal - May read global variables, but it is not known which. + bool MayReadAnyGlobal; + + unsigned getInfoForGlobal(GlobalValue *GV) const { + unsigned Effect = MayReadAnyGlobal ? AliasAnalysis::Ref : 0; + std::map::const_iterator I = GlobalInfo.find(GV); + if (I != GlobalInfo.end()) + Effect |= I->second; + return Effect; + } + + /// FunctionEffect - Capture whether or not this function reads or writes to + /// ANY memory. If not, we can do a lot of aggressive analysis on it. + unsigned FunctionEffect; + + FunctionRecord() : MayReadAnyGlobal (false), FunctionEffect(0) {} + }; + + /// GlobalsModRef - The actual analysis pass. + class VISIBILITY_HIDDEN GlobalsModRef + : public ModulePass, public AliasAnalysis { + /// NonAddressTakenGlobals - The globals that do not have their addresses + /// taken. + std::set NonAddressTakenGlobals; + + /// IndirectGlobals - The memory pointed to by this global is known to be + /// 'owned' by the global. + std::set IndirectGlobals; + + /// AllocsForIndirectGlobals - If an instruction allocates memory for an + /// indirect global, this map indicates which one. + std::map AllocsForIndirectGlobals; + + /// FunctionInfo - For each function, keep track of what globals are + /// modified or read. + std::map FunctionInfo; + + public: + static char ID; + GlobalsModRef() : ModulePass(&ID) {} + + bool runOnModule(Module &M) { + InitializeAliasAnalysis(this); // set up super class + AnalyzeGlobals(M); // find non-addr taken globals + AnalyzeCallGraph(getAnalysis(), M); // Propagate on CG + return false; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AliasAnalysis::getAnalysisUsage(AU); + AU.addRequired(); + AU.setPreservesAll(); // Does not transform code + } + + //------------------------------------------------ + // Implement the AliasAnalysis API + // + AliasResult alias(const Value *V1, unsigned V1Size, + const Value *V2, unsigned V2Size); + ModRefResult getModRefInfo(CallSite CS, Value *P, unsigned Size); + ModRefResult getModRefInfo(CallSite CS1, CallSite CS2) { + return AliasAnalysis::getModRefInfo(CS1,CS2); + } + bool hasNoModRefInfoForCalls() const { return false; } + + /// getModRefBehavior - Return the behavior of the specified function if + /// called from the specified call site. The call site may be null in which + /// case the most generic behavior of this function should be returned. + ModRefBehavior getModRefBehavior(Function *F, + std::vector *Info) { + if (FunctionRecord *FR = getFunctionInfo(F)) { + if (FR->FunctionEffect == 0) + return DoesNotAccessMemory; + else if ((FR->FunctionEffect & Mod) == 0) + return OnlyReadsMemory; + } + return AliasAnalysis::getModRefBehavior(F, Info); + } + + /// getModRefBehavior - Return the behavior of the specified function if + /// called from the specified call site. The call site may be null in which + /// case the most generic behavior of this function should be returned. + ModRefBehavior getModRefBehavior(CallSite CS, + std::vector *Info) { + Function* F = CS.getCalledFunction(); + if (!F) return AliasAnalysis::getModRefBehavior(CS, Info); + if (FunctionRecord *FR = getFunctionInfo(F)) { + if (FR->FunctionEffect == 0) + return DoesNotAccessMemory; + else if ((FR->FunctionEffect & Mod) == 0) + return OnlyReadsMemory; + } + return AliasAnalysis::getModRefBehavior(CS, Info); + } + + virtual void deleteValue(Value *V); + virtual void copyValue(Value *From, Value *To); + + private: + /// getFunctionInfo - Return the function info for the function, or null if + /// we don't have anything useful to say about it. + FunctionRecord *getFunctionInfo(Function *F) { + std::map::iterator I = FunctionInfo.find(F); + if (I != FunctionInfo.end()) + return &I->second; + return 0; + } + + void AnalyzeGlobals(Module &M); + void AnalyzeCallGraph(CallGraph &CG, Module &M); + bool AnalyzeUsesOfPointer(Value *V, std::vector &Readers, + std::vector &Writers, + GlobalValue *OkayStoreDest = 0); + bool AnalyzeIndirectGlobalMemory(GlobalValue *GV); + }; +} + +char GlobalsModRef::ID = 0; +static RegisterPass +X("globalsmodref-aa", "Simple mod/ref analysis for globals", false, true); +static RegisterAnalysisGroup Y(X); + +Pass *llvm::createGlobalsModRefPass() { return new GlobalsModRef(); } + +/// AnalyzeGlobals - Scan through the users of all of the internal +/// GlobalValue's in the program. If none of them have their "address taken" +/// (really, their address passed to something nontrivial), record this fact, +/// and record the functions that they are used directly in. +void GlobalsModRef::AnalyzeGlobals(Module &M) { + std::vector Readers, Writers; + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) + if (I->hasLocalLinkage()) { + if (!AnalyzeUsesOfPointer(I, Readers, Writers)) { + // Remember that we are tracking this global. + NonAddressTakenGlobals.insert(I); + ++NumNonAddrTakenFunctions; + } + Readers.clear(); Writers.clear(); + } + + for (Module::global_iterator I = M.global_begin(), E = M.global_end(); + I != E; ++I) + if (I->hasLocalLinkage()) { + if (!AnalyzeUsesOfPointer(I, Readers, Writers)) { + // Remember that we are tracking this global, and the mod/ref fns + NonAddressTakenGlobals.insert(I); + + for (unsigned i = 0, e = Readers.size(); i != e; ++i) + FunctionInfo[Readers[i]].GlobalInfo[I] |= Ref; + + if (!I->isConstant()) // No need to keep track of writers to constants + for (unsigned i = 0, e = Writers.size(); i != e; ++i) + FunctionInfo[Writers[i]].GlobalInfo[I] |= Mod; + ++NumNonAddrTakenGlobalVars; + + // If this global holds a pointer type, see if it is an indirect global. + if (isa(I->getType()->getElementType()) && + AnalyzeIndirectGlobalMemory(I)) + ++NumIndirectGlobalVars; + } + Readers.clear(); Writers.clear(); + } +} + +/// AnalyzeUsesOfPointer - Look at all of the users of the specified pointer. +/// If this is used by anything complex (i.e., the address escapes), return +/// true. Also, while we are at it, keep track of those functions that read and +/// write to the value. +/// +/// If OkayStoreDest is non-null, stores into this global are allowed. +bool GlobalsModRef::AnalyzeUsesOfPointer(Value *V, + std::vector &Readers, + std::vector &Writers, + GlobalValue *OkayStoreDest) { + if (!isa(V->getType())) return true; + + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI) + if (LoadInst *LI = dyn_cast(*UI)) { + Readers.push_back(LI->getParent()->getParent()); + } else if (StoreInst *SI = dyn_cast(*UI)) { + if (V == SI->getOperand(1)) { + Writers.push_back(SI->getParent()->getParent()); + } else if (SI->getOperand(1) != OkayStoreDest) { + return true; // Storing the pointer + } + } else if (GetElementPtrInst *GEP = dyn_cast(*UI)) { + if (AnalyzeUsesOfPointer(GEP, Readers, Writers)) return true; + } else if (CallInst *CI = dyn_cast(*UI)) { + // Make sure that this is just the function being called, not that it is + // passing into the function. + for (unsigned i = 1, e = CI->getNumOperands(); i != e; ++i) + if (CI->getOperand(i) == V) return true; + } else if (InvokeInst *II = dyn_cast(*UI)) { + // Make sure that this is just the function being called, not that it is + // passing into the function. + for (unsigned i = 3, e = II->getNumOperands(); i != e; ++i) + if (II->getOperand(i) == V) return true; + } else if (ConstantExpr *CE = dyn_cast(*UI)) { + if (CE->getOpcode() == Instruction::GetElementPtr || + CE->getOpcode() == Instruction::BitCast) { + if (AnalyzeUsesOfPointer(CE, Readers, Writers)) + return true; + } else { + return true; + } + } else if (ICmpInst *ICI = dyn_cast(*UI)) { + if (!isa(ICI->getOperand(1))) + return true; // Allow comparison against null. + } else if (FreeInst *F = dyn_cast(*UI)) { + Writers.push_back(F->getParent()->getParent()); + } else { + return true; + } + return false; +} + +/// AnalyzeIndirectGlobalMemory - We found an non-address-taken global variable +/// which holds a pointer type. See if the global always points to non-aliased +/// heap memory: that is, all initializers of the globals are allocations, and +/// those allocations have no use other than initialization of the global. +/// Further, all loads out of GV must directly use the memory, not store the +/// pointer somewhere. If this is true, we consider the memory pointed to by +/// GV to be owned by GV and can disambiguate other pointers from it. +bool GlobalsModRef::AnalyzeIndirectGlobalMemory(GlobalValue *GV) { + // Keep track of values related to the allocation of the memory, f.e. the + // value produced by the malloc call and any casts. + std::vector AllocRelatedValues; + + // Walk the user list of the global. If we find anything other than a direct + // load or store, bail out. + for (Value::use_iterator I = GV->use_begin(), E = GV->use_end(); I != E; ++I){ + if (LoadInst *LI = dyn_cast(*I)) { + // The pointer loaded from the global can only be used in simple ways: + // we allow addressing of it and loading storing to it. We do *not* allow + // storing the loaded pointer somewhere else or passing to a function. + std::vector ReadersWriters; + if (AnalyzeUsesOfPointer(LI, ReadersWriters, ReadersWriters)) + return false; // Loaded pointer escapes. + // TODO: Could try some IP mod/ref of the loaded pointer. + } else if (StoreInst *SI = dyn_cast(*I)) { + // Storing the global itself. + if (SI->getOperand(0) == GV) return false; + + // If storing the null pointer, ignore it. + if (isa(SI->getOperand(0))) + continue; + + // Check the value being stored. + Value *Ptr = SI->getOperand(0)->getUnderlyingObject(); + + if (isa(Ptr)) { + // Okay, easy case. + } else if (CallInst *CI = dyn_cast(Ptr)) { + Function *F = CI->getCalledFunction(); + if (!F || !F->isDeclaration()) return false; // Too hard to analyze. + if (F->getName() != "calloc") return false; // Not calloc. + } else { + return false; // Too hard to analyze. + } + + // Analyze all uses of the allocation. If any of them are used in a + // non-simple way (e.g. stored to another global) bail out. + std::vector ReadersWriters; + if (AnalyzeUsesOfPointer(Ptr, ReadersWriters, ReadersWriters, GV)) + return false; // Loaded pointer escapes. + + // Remember that this allocation is related to the indirect global. + AllocRelatedValues.push_back(Ptr); + } else { + // Something complex, bail out. + return false; + } + } + + // Okay, this is an indirect global. Remember all of the allocations for + // this global in AllocsForIndirectGlobals. + while (!AllocRelatedValues.empty()) { + AllocsForIndirectGlobals[AllocRelatedValues.back()] = GV; + AllocRelatedValues.pop_back(); + } + IndirectGlobals.insert(GV); + return true; +} + +/// AnalyzeCallGraph - At this point, we know the functions where globals are +/// immediately stored to and read from. Propagate this information up the call +/// graph to all callers and compute the mod/ref info for all memory for each +/// function. +void GlobalsModRef::AnalyzeCallGraph(CallGraph &CG, Module &M) { + // We do a bottom-up SCC traversal of the call graph. In other words, we + // visit all callees before callers (leaf-first). + for (scc_iterator I = scc_begin(&CG), E = scc_end(&CG); I != E; + ++I) { + std::vector &SCC = *I; + assert(!SCC.empty() && "SCC with no functions?"); + + if (!SCC[0]->getFunction()) { + // Calls externally - can't say anything useful. Remove any existing + // function records (may have been created when scanning globals). + for (unsigned i = 0, e = SCC.size(); i != e; ++i) + FunctionInfo.erase(SCC[i]->getFunction()); + continue; + } + + FunctionRecord &FR = FunctionInfo[SCC[0]->getFunction()]; + + bool KnowNothing = false; + unsigned FunctionEffect = 0; + + // Collect the mod/ref properties due to called functions. We only compute + // one mod-ref set. + for (unsigned i = 0, e = SCC.size(); i != e && !KnowNothing; ++i) { + Function *F = SCC[i]->getFunction(); + if (!F) { + KnowNothing = true; + break; + } + + if (F->isDeclaration()) { + // Try to get mod/ref behaviour from function attributes. + if (F->doesNotAccessMemory()) { + // Can't do better than that! + } else if (F->onlyReadsMemory()) { + FunctionEffect |= Ref; + if (!F->isIntrinsic()) + // This function might call back into the module and read a global - + // consider every global as possibly being read by this function. + FR.MayReadAnyGlobal = true; + } else { + FunctionEffect |= ModRef; + // Can't say anything useful unless it's an intrinsic - they don't + // read or write global variables of the kind considered here. + KnowNothing = !F->isIntrinsic(); + } + continue; + } + + for (CallGraphNode::iterator CI = SCC[i]->begin(), E = SCC[i]->end(); + CI != E && !KnowNothing; ++CI) + if (Function *Callee = CI->second->getFunction()) { + if (FunctionRecord *CalleeFR = getFunctionInfo(Callee)) { + // Propagate function effect up. + FunctionEffect |= CalleeFR->FunctionEffect; + + // Incorporate callee's effects on globals into our info. + for (std::map::iterator GI = + CalleeFR->GlobalInfo.begin(), E = CalleeFR->GlobalInfo.end(); + GI != E; ++GI) + FR.GlobalInfo[GI->first] |= GI->second; + FR.MayReadAnyGlobal |= CalleeFR->MayReadAnyGlobal; + } else { + // Can't say anything about it. However, if it is inside our SCC, + // then nothing needs to be done. + CallGraphNode *CalleeNode = CG[Callee]; + if (std::find(SCC.begin(), SCC.end(), CalleeNode) == SCC.end()) + KnowNothing = true; + } + } else { + KnowNothing = true; + } + } + + // If we can't say anything useful about this SCC, remove all SCC functions + // from the FunctionInfo map. + if (KnowNothing) { + for (unsigned i = 0, e = SCC.size(); i != e; ++i) + FunctionInfo.erase(SCC[i]->getFunction()); + continue; + } + + // Scan the function bodies for explicit loads or stores. + for (unsigned i = 0, e = SCC.size(); i != e && FunctionEffect != ModRef;++i) + for (inst_iterator II = inst_begin(SCC[i]->getFunction()), + E = inst_end(SCC[i]->getFunction()); + II != E && FunctionEffect != ModRef; ++II) + if (isa(*II)) { + FunctionEffect |= Ref; + if (cast(*II).isVolatile()) + // Volatile loads may have side-effects, so mark them as writing + // memory (for example, a flag inside the processor). + FunctionEffect |= Mod; + } else if (isa(*II)) { + FunctionEffect |= Mod; + if (cast(*II).isVolatile()) + // Treat volatile stores as reading memory somewhere. + FunctionEffect |= Ref; + } else if (isa(*II) || isa(*II)) { + FunctionEffect |= ModRef; + } + + if ((FunctionEffect & Mod) == 0) + ++NumReadMemFunctions; + if (FunctionEffect == 0) + ++NumNoMemFunctions; + FR.FunctionEffect = FunctionEffect; + + // Finally, now that we know the full effect on this SCC, clone the + // information to each function in the SCC. + for (unsigned i = 1, e = SCC.size(); i != e; ++i) + FunctionInfo[SCC[i]->getFunction()] = FR; + } +} + + + +/// alias - If one of the pointers is to a global that we are tracking, and the +/// other is some random pointer, we know there cannot be an alias, because the +/// address of the global isn't taken. +AliasAnalysis::AliasResult +GlobalsModRef::alias(const Value *V1, unsigned V1Size, + const Value *V2, unsigned V2Size) { + // Get the base object these pointers point to. + Value *UV1 = const_cast(V1->getUnderlyingObject()); + Value *UV2 = const_cast(V2->getUnderlyingObject()); + + // If either of the underlying values is a global, they may be non-addr-taken + // globals, which we can answer queries about. + GlobalValue *GV1 = dyn_cast(UV1); + GlobalValue *GV2 = dyn_cast(UV2); + if (GV1 || GV2) { + // If the global's address is taken, pretend we don't know it's a pointer to + // the global. + if (GV1 && !NonAddressTakenGlobals.count(GV1)) GV1 = 0; + if (GV2 && !NonAddressTakenGlobals.count(GV2)) GV2 = 0; + + // If the the two pointers are derived from two different non-addr-taken + // globals, or if one is and the other isn't, we know these can't alias. + if ((GV1 || GV2) && GV1 != GV2) + return NoAlias; + + // Otherwise if they are both derived from the same addr-taken global, we + // can't know the two accesses don't overlap. + } + + // These pointers may be based on the memory owned by an indirect global. If + // so, we may be able to handle this. First check to see if the base pointer + // is a direct load from an indirect global. + GV1 = GV2 = 0; + if (LoadInst *LI = dyn_cast(UV1)) + if (GlobalVariable *GV = dyn_cast(LI->getOperand(0))) + if (IndirectGlobals.count(GV)) + GV1 = GV; + if (LoadInst *LI = dyn_cast(UV2)) + if (GlobalVariable *GV = dyn_cast(LI->getOperand(0))) + if (IndirectGlobals.count(GV)) + GV2 = GV; + + // These pointers may also be from an allocation for the indirect global. If + // so, also handle them. + if (AllocsForIndirectGlobals.count(UV1)) + GV1 = AllocsForIndirectGlobals[UV1]; + if (AllocsForIndirectGlobals.count(UV2)) + GV2 = AllocsForIndirectGlobals[UV2]; + + // Now that we know whether the two pointers are related to indirect globals, + // use this to disambiguate the pointers. If either pointer is based on an + // indirect global and if they are not both based on the same indirect global, + // they cannot alias. + if ((GV1 || GV2) && GV1 != GV2) + return NoAlias; + + return AliasAnalysis::alias(V1, V1Size, V2, V2Size); +} + +AliasAnalysis::ModRefResult +GlobalsModRef::getModRefInfo(CallSite CS, Value *P, unsigned Size) { + unsigned Known = ModRef; + + // If we are asking for mod/ref info of a direct call with a pointer to a + // global we are tracking, return information if we have it. + if (GlobalValue *GV = dyn_cast(P->getUnderlyingObject())) + if (GV->hasLocalLinkage()) + if (Function *F = CS.getCalledFunction()) + if (NonAddressTakenGlobals.count(GV)) + if (FunctionRecord *FR = getFunctionInfo(F)) + Known = FR->getInfoForGlobal(GV); + + if (Known == NoModRef) + return NoModRef; // No need to query other mod/ref analyses + return ModRefResult(Known & AliasAnalysis::getModRefInfo(CS, P, Size)); +} + + +//===----------------------------------------------------------------------===// +// Methods to update the analysis as a result of the client transformation. +// +void GlobalsModRef::deleteValue(Value *V) { + if (GlobalValue *GV = dyn_cast(V)) { + if (NonAddressTakenGlobals.erase(GV)) { + // This global might be an indirect global. If so, remove it and remove + // any AllocRelatedValues for it. + if (IndirectGlobals.erase(GV)) { + // Remove any entries in AllocsForIndirectGlobals for this global. + for (std::map::iterator + I = AllocsForIndirectGlobals.begin(), + E = AllocsForIndirectGlobals.end(); I != E; ) { + if (I->second == GV) { + AllocsForIndirectGlobals.erase(I++); + } else { + ++I; + } + } + } + } + } + + // Otherwise, if this is an allocation related to an indirect global, remove + // it. + AllocsForIndirectGlobals.erase(V); + + AliasAnalysis::deleteValue(V); +} + +void GlobalsModRef::copyValue(Value *From, Value *To) { + AliasAnalysis::copyValue(From, To); +} diff --git a/lib/Analysis/IPA/Makefile b/lib/Analysis/IPA/Makefile new file mode 100644 index 000000000000..adacb16ea205 --- /dev/null +++ b/lib/Analysis/IPA/Makefile @@ -0,0 +1,14 @@ +##===- lib/Analysis/IPA/Makefile ---------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../.. +LIBRARYNAME = LLVMipa +BUILD_ARCHIVE = 1 +include $(LEVEL)/Makefile.common + diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp new file mode 100644 index 000000000000..7af91304754d --- /dev/null +++ b/lib/Analysis/IVUsers.cpp @@ -0,0 +1,391 @@ +//===- IVUsers.cpp - Induction Variable Users -------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements bookkeeping for "interesting" users of expressions +// computed from induction variables. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "iv-users" +#include "llvm/Analysis/IVUsers.h" +#include "llvm/Constants.h" +#include "llvm/Instructions.h" +#include "llvm/Type.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include +using namespace llvm; + +char IVUsers::ID = 0; +static RegisterPass +X("iv-users", "Induction Variable Users", false, true); + +Pass *llvm::createIVUsersPass() { + return new IVUsers(); +} + +/// containsAddRecFromDifferentLoop - Determine whether expression S involves a +/// subexpression that is an AddRec from a loop other than L. An outer loop +/// of L is OK, but not an inner loop nor a disjoint loop. +static bool containsAddRecFromDifferentLoop(SCEVHandle S, Loop *L) { + // This is very common, put it first. + if (isa(S)) + return false; + if (const SCEVCommutativeExpr *AE = dyn_cast(S)) { + for (unsigned int i=0; i< AE->getNumOperands(); i++) + if (containsAddRecFromDifferentLoop(AE->getOperand(i), L)) + return true; + return false; + } + if (const SCEVAddRecExpr *AE = dyn_cast(S)) { + if (const Loop *newLoop = AE->getLoop()) { + if (newLoop == L) + return false; + // if newLoop is an outer loop of L, this is OK. + if (!LoopInfoBase::isNotAlreadyContainedIn(L, newLoop)) + return false; + } + return true; + } + if (const SCEVUDivExpr *DE = dyn_cast(S)) + return containsAddRecFromDifferentLoop(DE->getLHS(), L) || + containsAddRecFromDifferentLoop(DE->getRHS(), L); +#if 0 + // SCEVSDivExpr has been backed out temporarily, but will be back; we'll + // need this when it is. + if (const SCEVSDivExpr *DE = dyn_cast(S)) + return containsAddRecFromDifferentLoop(DE->getLHS(), L) || + containsAddRecFromDifferentLoop(DE->getRHS(), L); +#endif + if (const SCEVCastExpr *CE = dyn_cast(S)) + return containsAddRecFromDifferentLoop(CE->getOperand(), L); + return false; +} + +/// getSCEVStartAndStride - Compute the start and stride of this expression, +/// returning false if the expression is not a start/stride pair, or true if it +/// is. The stride must be a loop invariant expression, but the start may be +/// a mix of loop invariant and loop variant expressions. The start cannot, +/// however, contain an AddRec from a different loop, unless that loop is an +/// outer loop of the current loop. +static bool getSCEVStartAndStride(const SCEVHandle &SH, Loop *L, Loop *UseLoop, + SCEVHandle &Start, SCEVHandle &Stride, + bool &isSigned, + ScalarEvolution *SE, DominatorTree *DT) { + SCEVHandle TheAddRec = Start; // Initialize to zero. + bool isSExt = false; + bool isZExt = false; + + // If the outer level is an AddExpr, the operands are all start values except + // for a nested AddRecExpr. + if (const SCEVAddExpr *AE = dyn_cast(SH)) { + for (unsigned i = 0, e = AE->getNumOperands(); i != e; ++i) + if (const SCEVAddRecExpr *AddRec = + dyn_cast(AE->getOperand(i))) { + if (AddRec->getLoop() == L) + TheAddRec = SE->getAddExpr(AddRec, TheAddRec); + else + return false; // Nested IV of some sort? + } else { + Start = SE->getAddExpr(Start, AE->getOperand(i)); + } + + } else if (const SCEVZeroExtendExpr *Z = dyn_cast(SH)) { + TheAddRec = Z->getOperand(); + isZExt = true; + } else if (const SCEVSignExtendExpr *S = dyn_cast(SH)) { + TheAddRec = S->getOperand(); + isSExt = true; + } else if (isa(SH)) { + TheAddRec = SH; + } else { + return false; // not analyzable. + } + + const SCEVAddRecExpr *AddRec = dyn_cast(TheAddRec); + if (!AddRec || AddRec->getLoop() != L) return false; + + // Use getSCEVAtScope to attempt to simplify other loops out of + // the picture. + SCEVHandle AddRecStart = AddRec->getStart(); + SCEVHandle BetterAddRecStart = SE->getSCEVAtScope(AddRecStart, UseLoop); + if (!isa(BetterAddRecStart)) + AddRecStart = BetterAddRecStart; + + // FIXME: If Start contains an SCEVAddRecExpr from a different loop, other + // than an outer loop of the current loop, reject it. LSR has no concept of + // operating on more than one loop at a time so don't confuse it with such + // expressions. + if (containsAddRecFromDifferentLoop(AddRecStart, L)) + return false; + + if (isSExt || isZExt) + Start = SE->getTruncateExpr(Start, AddRec->getType()); + + Start = SE->getAddExpr(Start, AddRecStart); + + if (!isa(AddRec->getStepRecurrence(*SE))) { + // If stride is an instruction, make sure it dominates the loop preheader. + // Otherwise we could end up with a use before def situation. + BasicBlock *Preheader = L->getLoopPreheader(); + if (!AddRec->getStepRecurrence(*SE)->dominates(Preheader, DT)) + return false; + + DOUT << "[" << L->getHeader()->getName() + << "] Variable stride: " << *AddRec << "\n"; + } + + Stride = AddRec->getStepRecurrence(*SE); + isSigned = isSExt; + return true; +} + +/// IVUseShouldUsePostIncValue - We have discovered a "User" of an IV expression +/// and now we need to decide whether the user should use the preinc or post-inc +/// value. If this user should use the post-inc version of the IV, return true. +/// +/// Choosing wrong here can break dominance properties (if we choose to use the +/// post-inc value when we cannot) or it can end up adding extra live-ranges to +/// the loop, resulting in reg-reg copies (if we use the pre-inc value when we +/// should use the post-inc value). +static bool IVUseShouldUsePostIncValue(Instruction *User, Instruction *IV, + Loop *L, LoopInfo *LI, DominatorTree *DT, + Pass *P) { + // If the user is in the loop, use the preinc value. + if (L->contains(User->getParent())) return false; + + BasicBlock *LatchBlock = L->getLoopLatch(); + + // Ok, the user is outside of the loop. If it is dominated by the latch + // block, use the post-inc value. + if (DT->dominates(LatchBlock, User->getParent())) + return true; + + // There is one case we have to be careful of: PHI nodes. These little guys + // can live in blocks that are not dominated by the latch block, but (since + // their uses occur in the predecessor block, not the block the PHI lives in) + // should still use the post-inc value. Check for this case now. + PHINode *PN = dyn_cast(User); + if (!PN) return false; // not a phi, not dominated by latch block. + + // Look at all of the uses of IV by the PHI node. If any use corresponds to + // a block that is not dominated by the latch block, give up and use the + // preincremented value. + unsigned NumUses = 0; + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (PN->getIncomingValue(i) == IV) { + ++NumUses; + if (!DT->dominates(LatchBlock, PN->getIncomingBlock(i))) + return false; + } + + // Okay, all uses of IV by PN are in predecessor blocks that really are + // dominated by the latch block. Use the post-incremented value. + return true; +} + +/// AddUsersIfInteresting - Inspect the specified instruction. If it is a +/// reducible SCEV, recursively add its users to the IVUsesByStride set and +/// return true. Otherwise, return false. +bool IVUsers::AddUsersIfInteresting(Instruction *I) { + if (!SE->isSCEVable(I->getType())) + return false; // Void and FP expressions cannot be reduced. + + // LSR is not APInt clean, do not touch integers bigger than 64-bits. + if (SE->getTypeSizeInBits(I->getType()) > 64) + return false; + + if (!Processed.insert(I)) + return true; // Instruction already handled. + + // Get the symbolic expression for this instruction. + SCEVHandle ISE = SE->getSCEV(I); + if (isa(ISE)) return false; + + // Get the start and stride for this expression. + Loop *UseLoop = LI->getLoopFor(I->getParent()); + SCEVHandle Start = SE->getIntegerSCEV(0, ISE->getType()); + SCEVHandle Stride = Start; + bool isSigned = false; // Arbitrary initial value - pacifies compiler. + + if (!getSCEVStartAndStride(ISE, L, UseLoop, Start, Stride, isSigned, SE, DT)) + return false; // Non-reducible symbolic expression, bail out. + + SmallPtrSet UniqueUsers; + for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); + UI != E; ++UI) { + Instruction *User = cast(*UI); + if (!UniqueUsers.insert(User)) + continue; + + // Do not infinitely recurse on PHI nodes. + if (isa(User) && Processed.count(User)) + continue; + + // Descend recursively, but not into PHI nodes outside the current loop. + // It's important to see the entire expression outside the loop to get + // choices that depend on addressing mode use right, although we won't + // consider references ouside the loop in all cases. + // If User is already in Processed, we don't want to recurse into it again, + // but do want to record a second reference in the same instruction. + bool AddUserToIVUsers = false; + if (LI->getLoopFor(User->getParent()) != L) { + if (isa(User) || Processed.count(User) || + !AddUsersIfInteresting(User)) { + DOUT << "FOUND USER in other loop: " << *User + << " OF SCEV: " << *ISE << "\n"; + AddUserToIVUsers = true; + } + } else if (Processed.count(User) || + !AddUsersIfInteresting(User)) { + DOUT << "FOUND USER: " << *User + << " OF SCEV: " << *ISE << "\n"; + AddUserToIVUsers = true; + } + + if (AddUserToIVUsers) { + IVUsersOfOneStride *StrideUses = IVUsesByStride[Stride]; + if (!StrideUses) { // First occurrence of this stride? + StrideOrder.push_back(Stride); + StrideUses = new IVUsersOfOneStride(Stride); + IVUses.push_back(StrideUses); + IVUsesByStride[Stride] = StrideUses; + } + + // Okay, we found a user that we cannot reduce. Analyze the instruction + // and decide what to do with it. If we are a use inside of the loop, use + // the value before incrementation, otherwise use it after incrementation. + if (IVUseShouldUsePostIncValue(User, I, L, LI, DT, this)) { + // The value used will be incremented by the stride more than we are + // expecting, so subtract this off. + SCEVHandle NewStart = SE->getMinusSCEV(Start, Stride); + StrideUses->addUser(NewStart, User, I, isSigned); + StrideUses->Users.back().setIsUseOfPostIncrementedValue(true); + DOUT << " USING POSTINC SCEV, START=" << *NewStart<< "\n"; + } else { + StrideUses->addUser(Start, User, I, isSigned); + } + } + } + return true; +} + +IVUsers::IVUsers() + : LoopPass(&ID) { +} + +void IVUsers::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.setPreservesAll(); +} + +bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) { + + L = l; + LI = &getAnalysis(); + DT = &getAnalysis(); + SE = &getAnalysis(); + + // Find all uses of induction variables in this loop, and categorize + // them by stride. Start by finding all of the PHI nodes in the header for + // this loop. If they are induction variables, inspect their uses. + for (BasicBlock::iterator I = L->getHeader()->begin(); isa(I); ++I) + AddUsersIfInteresting(I); + + return false; +} + +/// getReplacementExpr - Return a SCEV expression which computes the +/// value of the OperandValToReplace of the given IVStrideUse. +SCEVHandle IVUsers::getReplacementExpr(const IVStrideUse &U) const { + const Type *UseTy = U.getOperandValToReplace()->getType(); + // Start with zero. + SCEVHandle RetVal = SE->getIntegerSCEV(0, U.getParent()->Stride->getType()); + // Create the basic add recurrence. + RetVal = SE->getAddRecExpr(RetVal, U.getParent()->Stride, L); + // Add the offset in a separate step, because it may be loop-variant. + RetVal = SE->getAddExpr(RetVal, U.getOffset()); + // For uses of post-incremented values, add an extra stride to compute + // the actual replacement value. + if (U.isUseOfPostIncrementedValue()) + RetVal = SE->getAddExpr(RetVal, U.getParent()->Stride); + // Evaluate the expression out of the loop, if possible. + if (!L->contains(U.getUser()->getParent())) { + SCEVHandle ExitVal = SE->getSCEVAtScope(RetVal, L->getParentLoop()); + if (!isa(ExitVal) && ExitVal->isLoopInvariant(L)) + RetVal = ExitVal; + } + // Promote the result to the type of the use. + if (SE->getTypeSizeInBits(RetVal->getType()) != + SE->getTypeSizeInBits(UseTy)) { + if (U.isSigned()) + RetVal = SE->getSignExtendExpr(RetVal, UseTy); + else + RetVal = SE->getZeroExtendExpr(RetVal, UseTy); + } + return RetVal; +} + +void IVUsers::print(raw_ostream &OS, const Module *M) const { + OS << "IV Users for loop "; + WriteAsOperand(OS, L->getHeader(), false); + if (SE->hasLoopInvariantBackedgeTakenCount(L)) { + OS << " with backedge-taken count " + << *SE->getBackedgeTakenCount(L); + } + OS << ":\n"; + + for (unsigned Stride = 0, e = StrideOrder.size(); Stride != e; ++Stride) { + std::map::const_iterator SI = + IVUsesByStride.find(StrideOrder[Stride]); + assert(SI != IVUsesByStride.end() && "Stride doesn't exist!"); + OS << " Stride " << *SI->first->getType() << " " << *SI->first << ":\n"; + + for (ilist::const_iterator UI = SI->second->Users.begin(), + E = SI->second->Users.end(); UI != E; ++UI) { + OS << " "; + WriteAsOperand(OS, UI->getOperandValToReplace(), false); + OS << " = "; + OS << *getReplacementExpr(*UI); + if (UI->isUseOfPostIncrementedValue()) + OS << " (post-inc)"; + OS << " in "; + UI->getUser()->print(OS); + } + } +} + +void IVUsers::print(std::ostream &o, const Module *M) const { + raw_os_ostream OS(o); + print(OS, M); +} + +void IVUsers::dump() const { + print(errs()); +} + +void IVUsers::releaseMemory() { + IVUsesByStride.clear(); + StrideOrder.clear(); + Processed.clear(); +} + +void IVStrideUse::deleted() { + // Remove this user from the list. + Parent->Users.erase(this); + // this now dangles! +} diff --git a/lib/Analysis/InstCount.cpp b/lib/Analysis/InstCount.cpp new file mode 100644 index 000000000000..2dea7b3ef687 --- /dev/null +++ b/lib/Analysis/InstCount.cpp @@ -0,0 +1,86 @@ +//===-- InstCount.cpp - Collects the count of all instructions ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass collects the count of all instructions and reports them +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "instcount" +#include "llvm/Analysis/Passes.h" +#include "llvm/Pass.h" +#include "llvm/Function.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/InstVisitor.h" +#include "llvm/Support/Streams.h" +#include "llvm/ADT/Statistic.h" +#include +using namespace llvm; + +STATISTIC(TotalInsts , "Number of instructions (of all types)"); +STATISTIC(TotalBlocks, "Number of basic blocks"); +STATISTIC(TotalFuncs , "Number of non-external functions"); +STATISTIC(TotalMemInst, "Number of memory instructions"); + +#define HANDLE_INST(N, OPCODE, CLASS) \ + STATISTIC(Num ## OPCODE ## Inst, "Number of " #OPCODE " insts"); + +#include "llvm/Instruction.def" + + +namespace { + class VISIBILITY_HIDDEN InstCount + : public FunctionPass, public InstVisitor { + friend class InstVisitor; + + void visitFunction (Function &F) { ++TotalFuncs; } + void visitBasicBlock(BasicBlock &BB) { ++TotalBlocks; } + +#define HANDLE_INST(N, OPCODE, CLASS) \ + void visit##OPCODE(CLASS &) { ++Num##OPCODE##Inst; ++TotalInsts; } + +#include "llvm/Instruction.def" + + void visitInstruction(Instruction &I) { + cerr << "Instruction Count does not know about " << I; + abort(); + } + public: + static char ID; // Pass identification, replacement for typeid + InstCount() : FunctionPass(&ID) {} + + virtual bool runOnFunction(Function &F); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + } + virtual void print(std::ostream &O, const Module *M) const {} + + }; +} + +char InstCount::ID = 0; +static RegisterPass +X("instcount", "Counts the various types of Instructions", false, true); + +FunctionPass *llvm::createInstCountPass() { return new InstCount(); } + +// InstCount::run - This is the main Analysis entry point for a +// function. +// +bool InstCount::runOnFunction(Function &F) { + unsigned StartMemInsts = + NumGetElementPtrInst + NumLoadInst + NumStoreInst + NumCallInst + + NumInvokeInst + NumAllocaInst + NumMallocInst + NumFreeInst; + visit(F); + unsigned EndMemInsts = + NumGetElementPtrInst + NumLoadInst + NumStoreInst + NumCallInst + + NumInvokeInst + NumAllocaInst + NumMallocInst + NumFreeInst; + TotalMemInst += EndMemInsts-StartMemInsts; + return false; +} diff --git a/lib/Analysis/Interval.cpp b/lib/Analysis/Interval.cpp new file mode 100644 index 000000000000..16b194723071 --- /dev/null +++ b/lib/Analysis/Interval.cpp @@ -0,0 +1,57 @@ +//===- Interval.cpp - Interval class code ---------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the definition of the Interval class, which represents a +// partition of a control flow graph of some kind. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/Interval.h" +#include "llvm/BasicBlock.h" +#include "llvm/Support/CFG.h" +#include + +using namespace llvm; + +//===----------------------------------------------------------------------===// +// Interval Implementation +//===----------------------------------------------------------------------===// + +// isLoop - Find out if there is a back edge in this interval... +// +bool Interval::isLoop() const { + // There is a loop in this interval iff one of the predecessors of the header + // node lives in the interval. + for (::pred_iterator I = ::pred_begin(HeaderNode), E = ::pred_end(HeaderNode); + I != E; ++I) { + if (contains(*I)) return true; + } + return false; +} + + +void Interval::print(std::ostream &o) const { + o << "-------------------------------------------------------------\n" + << "Interval Contents:\n"; + + // Print out all of the basic blocks in the interval... + for (std::vector::const_iterator I = Nodes.begin(), + E = Nodes.end(); I != E; ++I) + o << **I << "\n"; + + o << "Interval Predecessors:\n"; + for (std::vector::const_iterator I = Predecessors.begin(), + E = Predecessors.end(); I != E; ++I) + o << **I << "\n"; + + o << "Interval Successors:\n"; + for (std::vector::const_iterator I = Successors.begin(), + E = Successors.end(); I != E; ++I) + o << **I << "\n"; +} diff --git a/lib/Analysis/IntervalPartition.cpp b/lib/Analysis/IntervalPartition.cpp new file mode 100644 index 000000000000..cb8a85da552a --- /dev/null +++ b/lib/Analysis/IntervalPartition.cpp @@ -0,0 +1,114 @@ +//===- IntervalPartition.cpp - Interval Partition module code -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the definition of the IntervalPartition class, which +// calculates and represent the interval partition of a function. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/IntervalIterator.h" +using namespace llvm; + +char IntervalPartition::ID = 0; +static RegisterPass +X("intervals", "Interval Partition Construction", true, true); + +//===----------------------------------------------------------------------===// +// IntervalPartition Implementation +//===----------------------------------------------------------------------===// + +// releaseMemory - Reset state back to before function was analyzed +void IntervalPartition::releaseMemory() { + for (unsigned i = 0, e = Intervals.size(); i != e; ++i) + delete Intervals[i]; + IntervalMap.clear(); + Intervals.clear(); + RootInterval = 0; +} + +void IntervalPartition::print(std::ostream &O, const Module*) const { + for(unsigned i = 0, e = Intervals.size(); i != e; ++i) + Intervals[i]->print(O); +} + +// addIntervalToPartition - Add an interval to the internal list of intervals, +// and then add mappings from all of the basic blocks in the interval to the +// interval itself (in the IntervalMap). +// +void IntervalPartition::addIntervalToPartition(Interval *I) { + Intervals.push_back(I); + + // Add mappings for all of the basic blocks in I to the IntervalPartition + for (Interval::node_iterator It = I->Nodes.begin(), End = I->Nodes.end(); + It != End; ++It) + IntervalMap.insert(std::make_pair(*It, I)); +} + +// updatePredecessors - Interval generation only sets the successor fields of +// the interval data structures. After interval generation is complete, +// run through all of the intervals and propagate successor info as +// predecessor info. +// +void IntervalPartition::updatePredecessors(Interval *Int) { + BasicBlock *Header = Int->getHeaderNode(); + for (Interval::succ_iterator I = Int->Successors.begin(), + E = Int->Successors.end(); I != E; ++I) + getBlockInterval(*I)->Predecessors.push_back(Header); +} + +// IntervalPartition ctor - Build the first level interval partition for the +// specified function... +// +bool IntervalPartition::runOnFunction(Function &F) { + // Pass false to intervals_begin because we take ownership of it's memory + function_interval_iterator I = intervals_begin(&F, false); + assert(I != intervals_end(&F) && "No intervals in function!?!?!"); + + addIntervalToPartition(RootInterval = *I); + + ++I; // After the first one... + + // Add the rest of the intervals to the partition. + for (function_interval_iterator E = intervals_end(&F); I != E; ++I) + addIntervalToPartition(*I); + + // Now that we know all of the successor information, propagate this to the + // predecessors for each block. + for (unsigned i = 0, e = Intervals.size(); i != e; ++i) + updatePredecessors(Intervals[i]); + return false; +} + + +// IntervalPartition ctor - Build a reduced interval partition from an +// existing interval graph. This takes an additional boolean parameter to +// distinguish it from a copy constructor. Always pass in false for now. +// +IntervalPartition::IntervalPartition(IntervalPartition &IP, bool) + : FunctionPass(&ID) { + assert(IP.getRootInterval() && "Cannot operate on empty IntervalPartitions!"); + + // Pass false to intervals_begin because we take ownership of it's memory + interval_part_interval_iterator I = intervals_begin(IP, false); + assert(I != intervals_end(IP) && "No intervals in interval partition!?!?!"); + + addIntervalToPartition(RootInterval = *I); + + ++I; // After the first one... + + // Add the rest of the intervals to the partition. + for (interval_part_interval_iterator E = intervals_end(IP); I != E; ++I) + addIntervalToPartition(*I); + + // Now that we know all of the successor information, propagate this to the + // predecessors for each block. + for (unsigned i = 0, e = Intervals.size(); i != e; ++i) + updatePredecessors(Intervals[i]); +} + diff --git a/lib/Analysis/LibCallAliasAnalysis.cpp b/lib/Analysis/LibCallAliasAnalysis.cpp new file mode 100644 index 000000000000..971e6e7accb4 --- /dev/null +++ b/lib/Analysis/LibCallAliasAnalysis.cpp @@ -0,0 +1,141 @@ +//===- LibCallAliasAnalysis.cpp - Implement AliasAnalysis for libcalls ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the LibCallAliasAnalysis class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/LibCallAliasAnalysis.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/LibCallSemantics.h" +#include "llvm/Function.h" +#include "llvm/Pass.h" +#include "llvm/Target/TargetData.h" +using namespace llvm; + +// Register this pass... +char LibCallAliasAnalysis::ID = 0; +static RegisterPass +X("libcall-aa", "LibCall Alias Analysis", false, true); + +// Declare that we implement the AliasAnalysis interface +static RegisterAnalysisGroup Y(X); + +FunctionPass *llvm::createLibCallAliasAnalysisPass(LibCallInfo *LCI) { + return new LibCallAliasAnalysis(LCI); +} + +LibCallAliasAnalysis::~LibCallAliasAnalysis() { + delete LCI; +} + +void LibCallAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { + AliasAnalysis::getAnalysisUsage(AU); + AU.addRequired(); + AU.setPreservesAll(); // Does not transform code +} + + + +/// AnalyzeLibCallDetails - Given a call to a function with the specified +/// LibCallFunctionInfo, see if we can improve the mod/ref footprint of the call +/// vs the specified pointer/size. +AliasAnalysis::ModRefResult +LibCallAliasAnalysis::AnalyzeLibCallDetails(const LibCallFunctionInfo *FI, + CallSite CS, Value *P, + unsigned Size) { + // If we have a function, check to see what kind of mod/ref effects it + // has. Start by including any info globally known about the function. + AliasAnalysis::ModRefResult MRInfo = FI->UniversalBehavior; + if (MRInfo == NoModRef) return MRInfo; + + // If that didn't tell us that the function is 'readnone', check to see + // if we have detailed info and if 'P' is any of the locations we know + // about. + const LibCallFunctionInfo::LocationMRInfo *Details = FI->LocationDetails; + if (Details == 0) + return MRInfo; + + // If the details array is of the 'DoesNot' kind, we only know something if + // the pointer is a match for one of the locations in 'Details'. If we find a + // match, we can prove some interactions cannot happen. + // + if (FI->DetailsType == LibCallFunctionInfo::DoesNot) { + // Find out if the pointer refers to a known location. + for (unsigned i = 0; Details[i].LocationID != ~0U; ++i) { + const LibCallLocationInfo &Loc = + LCI->getLocationInfo(Details[i].LocationID); + LibCallLocationInfo::LocResult Res = Loc.isLocation(CS, P, Size); + if (Res != LibCallLocationInfo::Yes) continue; + + // If we find a match against a location that we 'do not' interact with, + // learn this info into MRInfo. + return ModRefResult(MRInfo & ~Details[i].MRInfo); + } + return MRInfo; + } + + // If the details are of the 'DoesOnly' sort, we know something if the pointer + // is a match for one of the locations in 'Details'. Also, if we can prove + // that the pointers is *not* one of the locations in 'Details', we know that + // the call is NoModRef. + assert(FI->DetailsType == LibCallFunctionInfo::DoesOnly); + + // Find out if the pointer refers to a known location. + bool NoneMatch = true; + for (unsigned i = 0; Details[i].LocationID != ~0U; ++i) { + const LibCallLocationInfo &Loc = + LCI->getLocationInfo(Details[i].LocationID); + LibCallLocationInfo::LocResult Res = Loc.isLocation(CS, P, Size); + if (Res == LibCallLocationInfo::No) continue; + + // If we don't know if this pointer points to the location, then we have to + // assume it might alias in some case. + if (Res == LibCallLocationInfo::Unknown) { + NoneMatch = false; + continue; + } + + // If we know that this pointer definitely is pointing into the location, + // merge in this information. + return ModRefResult(MRInfo & Details[i].MRInfo); + } + + // If we found that the pointer is guaranteed to not match any of the + // locations in our 'DoesOnly' rule, then we know that the pointer must point + // to some other location. Since the libcall doesn't mod/ref any other + // locations, return NoModRef. + if (NoneMatch) + return NoModRef; + + // Otherwise, return any other info gained so far. + return MRInfo; +} + +// getModRefInfo - Check to see if the specified callsite can clobber the +// specified memory object. +// +AliasAnalysis::ModRefResult +LibCallAliasAnalysis::getModRefInfo(CallSite CS, Value *P, unsigned Size) { + ModRefResult MRInfo = ModRef; + + // If this is a direct call to a function that LCI knows about, get the + // information about the runtime function. + if (LCI) { + if (Function *F = CS.getCalledFunction()) { + if (const LibCallFunctionInfo *FI = LCI->getFunctionInfo(F)) { + MRInfo = ModRefResult(MRInfo & AnalyzeLibCallDetails(FI, CS, P, Size)); + if (MRInfo == NoModRef) return NoModRef; + } + } + } + + // The AliasAnalysis base class has some smarts, lets use them. + return (ModRefResult)(MRInfo | AliasAnalysis::getModRefInfo(CS, P, Size)); +} diff --git a/lib/Analysis/LibCallSemantics.cpp b/lib/Analysis/LibCallSemantics.cpp new file mode 100644 index 000000000000..29850471f7dc --- /dev/null +++ b/lib/Analysis/LibCallSemantics.cpp @@ -0,0 +1,65 @@ +//===- LibCallSemantics.cpp - Describe library semantics ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements interfaces that can be used to describe language +// specific runtime library interfaces (e.g. libc, libm, etc) to LLVM +// optimizers. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/LibCallSemantics.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/Function.h" +using namespace llvm; + +/// getMap - This impl pointer in ~LibCallInfo is actually a StringMap. This +/// helper does the cast. +static StringMap *getMap(void *Ptr) { + return static_cast *>(Ptr); +} + +LibCallInfo::~LibCallInfo() { + delete getMap(Impl); +} + +const LibCallLocationInfo &LibCallInfo::getLocationInfo(unsigned LocID) const { + // Get location info on the first call. + if (NumLocations == 0) + NumLocations = getLocationInfo(Locations); + + assert(LocID < NumLocations && "Invalid location ID!"); + return Locations[LocID]; +} + + +/// getFunctionInfo - Return the LibCallFunctionInfo object corresponding to +/// the specified function if we have it. If not, return null. +const LibCallFunctionInfo *LibCallInfo::getFunctionInfo(Function *F) const { + StringMap *Map = getMap(Impl); + + /// If this is the first time we are querying for this info, lazily construct + /// the StringMap to index it. + if (Map == 0) { + Impl = Map = new StringMap(); + + const LibCallFunctionInfo *Array = getFunctionInfoArray(); + if (Array == 0) return 0; + + // We now have the array of entries. Populate the StringMap. + for (unsigned i = 0; Array[i].Name; ++i) + (*Map)[Array[i].Name] = Array+i; + } + + // Look up this function in the string map. + const char *ValueName = F->getNameStart(); + StringMap::iterator I = + Map->find(ValueName, ValueName+F->getNameLen()); + return I != Map->end() ? I->second : 0; +} + diff --git a/lib/Analysis/LiveValues.cpp b/lib/Analysis/LiveValues.cpp new file mode 100644 index 000000000000..2bbe98aa5c24 --- /dev/null +++ b/lib/Analysis/LiveValues.cpp @@ -0,0 +1,191 @@ +//===- LiveValues.cpp - Liveness information for LLVM IR Values. ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the implementation for the LLVM IR Value liveness +// analysis pass. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/LiveValues.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/LoopInfo.h" +using namespace llvm; + +FunctionPass *llvm::createLiveValuesPass() { return new LiveValues(); } + +char LiveValues::ID = 0; +static RegisterPass +X("live-values", "Value Liveness Analysis", false, true); + +LiveValues::LiveValues() : FunctionPass(&ID) {} + +void LiveValues::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + AU.addRequired(); + AU.setPreservesAll(); +} + +bool LiveValues::runOnFunction(Function &F) { + DT = &getAnalysis(); + LI = &getAnalysis(); + + // This pass' values are computed lazily, so there's nothing to do here. + + return false; +} + +void LiveValues::releaseMemory() { + Memos.clear(); +} + +/// isUsedInBlock - Test if the given value is used in the given block. +/// +bool LiveValues::isUsedInBlock(const Value *V, const BasicBlock *BB) { + Memo &M = getMemo(V); + return M.Used.count(BB); +} + +/// isLiveThroughBlock - Test if the given value is known to be +/// live-through the given block, meaning that the block is properly +/// dominated by the value's definition, and there exists a block +/// reachable from it that contains a use. This uses a conservative +/// approximation that errs on the side of returning false. +/// +bool LiveValues::isLiveThroughBlock(const Value *V, + const BasicBlock *BB) { + Memo &M = getMemo(V); + return M.LiveThrough.count(BB); +} + +/// isKilledInBlock - Test if the given value is known to be killed in +/// the given block, meaning that the block contains a use of the value, +/// and no blocks reachable from the block contain a use. This uses a +/// conservative approximation that errs on the side of returning false. +/// +bool LiveValues::isKilledInBlock(const Value *V, const BasicBlock *BB) { + Memo &M = getMemo(V); + return M.Killed.count(BB); +} + +/// getMemo - Retrieve an existing Memo for the given value if one +/// is available, otherwise compute a new one. +/// +LiveValues::Memo &LiveValues::getMemo(const Value *V) { + DenseMap::iterator I = Memos.find(V); + if (I != Memos.end()) + return I->second; + return compute(V); +} + +/// getImmediateDominator - A handy utility for the specific DominatorTree +/// query that we need here. +/// +static const BasicBlock *getImmediateDominator(const BasicBlock *BB, + const DominatorTree *DT) { + DomTreeNode *Node = DT->getNode(const_cast(BB))->getIDom(); + return Node ? Node->getBlock() : 0; +} + +/// compute - Compute a new Memo for the given value. +/// +LiveValues::Memo &LiveValues::compute(const Value *V) { + Memo &M = Memos[V]; + + // Determine the block containing the definition. + const BasicBlock *DefBB; + // Instructions define values with meaningful live ranges. + if (const Instruction *I = dyn_cast(V)) + DefBB = I->getParent(); + // Arguments can be analyzed as values defined in the entry block. + else if (const Argument *A = dyn_cast(V)) + DefBB = &A->getParent()->getEntryBlock(); + // Constants and other things aren't meaningful here, so just + // return having computed an empty Memo so that we don't come + // here again. The assumption here is that client code won't + // be asking about such values very often. + else + return M; + + // Determine if the value is defined inside a loop. This is used + // to track whether the value is ever used outside the loop, so + // it'll be set to null if the value is either not defined in a + // loop or used outside the loop in which it is defined. + const Loop *L = LI->getLoopFor(DefBB); + + // Track whether the value is used anywhere outside of the block + // in which it is defined. + bool LiveOutOfDefBB = false; + + // Examine each use of the value. + for (Value::use_const_iterator I = V->use_begin(), E = V->use_end(); + I != E; ++I) { + const User *U = *I; + const BasicBlock *UseBB = cast(U)->getParent(); + + // Note the block in which this use occurs. + M.Used.insert(UseBB); + + // If the use block doesn't have successors, the value can be + // considered killed. + if (succ_begin(UseBB) == succ_end(UseBB)) + M.Killed.insert(UseBB); + + // Observe whether the value is used outside of the loop in which + // it is defined. Switch to an enclosing loop if necessary. + for (; L; L = L->getParentLoop()) + if (L->contains(UseBB)) + break; + + // Search for live-through blocks. + const BasicBlock *BB; + if (const PHINode *PHI = dyn_cast(U)) { + // For PHI nodes, start the search at the incoming block paired with the + // incoming value, which must be dominated by the definition. + unsigned Num = PHI->getIncomingValueNumForOperand(I.getOperandNo()); + BB = PHI->getIncomingBlock(Num); + + // A PHI-node use means the value is live-out of it's defining block + // even if that block also contains the only use. + LiveOutOfDefBB = true; + } else { + // Otherwise just start the search at the use. + BB = UseBB; + + // Note if the use is outside the defining block. + LiveOutOfDefBB |= UseBB != DefBB; + } + + // Climb the immediate dominator tree from the use to the definition + // and mark all intermediate blocks as live-through. + for (; BB != DefBB; BB = getImmediateDominator(BB, DT)) { + if (BB != UseBB && !M.LiveThrough.insert(BB)) + break; + } + } + + // If the value is defined inside a loop and is not live outside + // the loop, then each exit block of the loop in which the value + // is used is a kill block. + if (L) { + SmallVector ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) { + const BasicBlock *ExitingBlock = ExitingBlocks[i]; + if (M.Used.count(ExitingBlock)) + M.Killed.insert(ExitingBlock); + } + } + + // If the value was never used outside the the block in which it was + // defined, it's killed in that block. + if (!LiveOutOfDefBB) + M.Killed.insert(DefBB); + + return M; +} diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp new file mode 100644 index 000000000000..de6480a66d5b --- /dev/null +++ b/lib/Analysis/LoopInfo.cpp @@ -0,0 +1,50 @@ +//===- LoopInfo.cpp - Natural Loop Calculator -----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the LoopInfo class that is used to identify natural loops +// and determine the loop depth of various nodes of the CFG. Note that the +// loops identified may actually be several natural loops that share the same +// header node... not just a single natural loop. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Constants.h" +#include "llvm/Instructions.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Streams.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/SmallPtrSet.h" +#include +#include +using namespace llvm; + +char LoopInfo::ID = 0; +static RegisterPass +X("loops", "Natural Loop Information", true, true); + +//===----------------------------------------------------------------------===// +// Loop implementation +// + +//===----------------------------------------------------------------------===// +// LoopInfo implementation +// +bool LoopInfo::runOnFunction(Function &) { + releaseMemory(); + LI->Calculate(getAnalysis().getBase()); // Update + return false; +} + +void LoopInfo::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired(); +} diff --git a/lib/Analysis/LoopPass.cpp b/lib/Analysis/LoopPass.cpp new file mode 100644 index 000000000000..08c25f4ceae4 --- /dev/null +++ b/lib/Analysis/LoopPass.cpp @@ -0,0 +1,340 @@ +//===- LoopPass.cpp - Loop Pass and Loop Pass Manager ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements LoopPass and LPPassManager. All loop optimization +// and transformation passes are derived from LoopPass. LPPassManager is +// responsible for managing LoopPasses. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/LoopPass.h" +using namespace llvm; + +//===----------------------------------------------------------------------===// +// LPPassManager +// + +char LPPassManager::ID = 0; +/// LPPassManager manages FPPassManagers and CalLGraphSCCPasses. + +LPPassManager::LPPassManager(int Depth) + : FunctionPass(&ID), PMDataManager(Depth) { + skipThisLoop = false; + redoThisLoop = false; + LI = NULL; + CurrentLoop = NULL; +} + +/// Delete loop from the loop queue and loop hierarchy (LoopInfo). +void LPPassManager::deleteLoopFromQueue(Loop *L) { + + if (Loop *ParentLoop = L->getParentLoop()) { // Not a top-level loop. + // Reparent all of the blocks in this loop. Since BBLoop had a parent, + // they are now all in it. + for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); + I != E; ++I) + if (LI->getLoopFor(*I) == L) // Don't change blocks in subloops. + LI->changeLoopFor(*I, ParentLoop); + + // Remove the loop from its parent loop. + for (Loop::iterator I = ParentLoop->begin(), E = ParentLoop->end();; + ++I) { + assert(I != E && "Couldn't find loop"); + if (*I == L) { + ParentLoop->removeChildLoop(I); + break; + } + } + + // Move all subloops into the parent loop. + while (!L->empty()) + ParentLoop->addChildLoop(L->removeChildLoop(L->end()-1)); + } else { + // Reparent all of the blocks in this loop. Since BBLoop had no parent, + // they no longer in a loop at all. + + for (unsigned i = 0; i != L->getBlocks().size(); ++i) { + // Don't change blocks in subloops. + if (LI->getLoopFor(L->getBlocks()[i]) == L) { + LI->removeBlock(L->getBlocks()[i]); + --i; + } + } + + // Remove the loop from the top-level LoopInfo object. + for (LoopInfo::iterator I = LI->begin(), E = LI->end();; ++I) { + assert(I != E && "Couldn't find loop"); + if (*I == L) { + LI->removeLoop(I); + break; + } + } + + // Move all of the subloops to the top-level. + while (!L->empty()) + LI->addTopLevelLoop(L->removeChildLoop(L->end()-1)); + } + + delete L; + + // If L is current loop then skip rest of the passes and let + // runOnFunction remove L from LQ. Otherwise, remove L from LQ now + // and continue applying other passes on CurrentLoop. + if (CurrentLoop == L) { + skipThisLoop = true; + return; + } + + for (std::deque::iterator I = LQ.begin(), + E = LQ.end(); I != E; ++I) { + if (*I == L) { + LQ.erase(I); + break; + } + } +} + +// Inset loop into loop nest (LoopInfo) and loop queue (LQ). +void LPPassManager::insertLoop(Loop *L, Loop *ParentLoop) { + + assert (CurrentLoop != L && "Cannot insert CurrentLoop"); + + // Insert into loop nest + if (ParentLoop) + ParentLoop->addChildLoop(L); + else + LI->addTopLevelLoop(L); + + // Insert L into loop queue + if (L == CurrentLoop) + redoLoop(L); + else if (!ParentLoop) + // This is top level loop. + LQ.push_front(L); + else { + // Insert L after ParentLoop + for (std::deque::iterator I = LQ.begin(), + E = LQ.end(); I != E; ++I) { + if (*I == ParentLoop) { + // deque does not support insert after. + ++I; + LQ.insert(I, 1, L); + break; + } + } + } +} + +// Reoptimize this loop. LPPassManager will re-insert this loop into the +// queue. This allows LoopPass to change loop nest for the loop. This +// utility may send LPPassManager into infinite loops so use caution. +void LPPassManager::redoLoop(Loop *L) { + assert (CurrentLoop == L && "Can redo only CurrentLoop"); + redoThisLoop = true; +} + +/// cloneBasicBlockSimpleAnalysis - Invoke cloneBasicBlockAnalysis hook for +/// all loop passes. +void LPPassManager::cloneBasicBlockSimpleAnalysis(BasicBlock *From, + BasicBlock *To, Loop *L) { + for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { + Pass *P = getContainedPass(Index); + LoopPass *LP = dynamic_cast(P); + LP->cloneBasicBlockAnalysis(From, To, L); + } +} + +/// deleteSimpleAnalysisValue - Invoke deleteAnalysisValue hook for all passes. +void LPPassManager::deleteSimpleAnalysisValue(Value *V, Loop *L) { + if (BasicBlock *BB = dyn_cast(V)) { + for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE; + ++BI) { + Instruction &I = *BI; + deleteSimpleAnalysisValue(&I, L); + } + } + for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { + Pass *P = getContainedPass(Index); + LoopPass *LP = dynamic_cast(P); + LP->deleteAnalysisValue(V, L); + } +} + + +// Recurse through all subloops and all loops into LQ. +static void addLoopIntoQueue(Loop *L, std::deque &LQ) { + LQ.push_back(L); + for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) + addLoopIntoQueue(*I, LQ); +} + +/// Pass Manager itself does not invalidate any analysis info. +void LPPassManager::getAnalysisUsage(AnalysisUsage &Info) const { + // LPPassManager needs LoopInfo. In the long term LoopInfo class will + // become part of LPPassManager. + Info.addRequired(); + Info.setPreservesAll(); +} + +/// run - Execute all of the passes scheduled for execution. Keep track of +/// whether any of the passes modifies the function, and if so, return true. +bool LPPassManager::runOnFunction(Function &F) { + LI = &getAnalysis(); + bool Changed = false; + + // Collect inherited analysis from Module level pass manager. + populateInheritedAnalysis(TPM->activeStack); + + // Populate Loop Queue + for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) + addLoopIntoQueue(*I, LQ); + + // Initialization + for (std::deque::const_iterator I = LQ.begin(), E = LQ.end(); + I != E; ++I) { + Loop *L = *I; + for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { + Pass *P = getContainedPass(Index); + LoopPass *LP = dynamic_cast(P); + if (LP) + Changed |= LP->doInitialization(L, *this); + } + } + + // Walk Loops + while (!LQ.empty()) { + + CurrentLoop = LQ.back(); + skipThisLoop = false; + redoThisLoop = false; + + // Run all passes on current SCC + for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { + Pass *P = getContainedPass(Index); + + dumpPassInfo(P, EXECUTION_MSG, ON_LOOP_MSG, ""); + dumpRequiredSet(P); + + initializeAnalysisImpl(P); + + LoopPass *LP = dynamic_cast(P); + { + PassManagerPrettyStackEntry X(LP, *CurrentLoop->getHeader()); + StartPassTimer(P); + assert(LP && "Invalid LPPassManager member"); + Changed |= LP->runOnLoop(CurrentLoop, *this); + StopPassTimer(P); + } + + if (Changed) + dumpPassInfo(P, MODIFICATION_MSG, ON_LOOP_MSG, ""); + dumpPreservedSet(P); + + verifyPreservedAnalysis(LP); + removeNotPreservedAnalysis(P); + recordAvailableAnalysis(P); + removeDeadPasses(P, "", ON_LOOP_MSG); + + // If dominator information is available then verify the info if requested. + verifyDomInfo(*LP, F); + + if (skipThisLoop) + // Do not run other passes on this loop. + break; + } + + // Pop the loop from queue after running all passes. + LQ.pop_back(); + + if (redoThisLoop) + LQ.push_back(CurrentLoop); + } + + // Finalization + for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { + Pass *P = getContainedPass(Index); + LoopPass *LP = dynamic_cast (P); + if (LP) + Changed |= LP->doFinalization(); + } + + return Changed; +} + +/// Print passes managed by this manager +void LPPassManager::dumpPassStructure(unsigned Offset) { + llvm::cerr << std::string(Offset*2, ' ') << "Loop Pass Manager\n"; + for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { + Pass *P = getContainedPass(Index); + P->dumpPassStructure(Offset + 1); + dumpLastUses(P, Offset+1); + } +} + + +//===----------------------------------------------------------------------===// +// LoopPass + +// Check if this pass is suitable for the current LPPassManager, if +// available. This pass P is not suitable for a LPPassManager if P +// is not preserving higher level analysis info used by other +// LPPassManager passes. In such case, pop LPPassManager from the +// stack. This will force assignPassManager() to create new +// LPPassManger as expected. +void LoopPass::preparePassManager(PMStack &PMS) { + + // Find LPPassManager + while (!PMS.empty() && + PMS.top()->getPassManagerType() > PMT_LoopPassManager) + PMS.pop(); + + LPPassManager *LPPM = dynamic_cast(PMS.top()); + + // If this pass is destroying high level information that is used + // by other passes that are managed by LPM then do not insert + // this pass in current LPM. Use new LPPassManager. + if (LPPM && !LPPM->preserveHigherLevelAnalysis(this)) + PMS.pop(); +} + +/// Assign pass manager to manage this pass. +void LoopPass::assignPassManager(PMStack &PMS, + PassManagerType PreferredType) { + // Find LPPassManager + while (!PMS.empty() && + PMS.top()->getPassManagerType() > PMT_LoopPassManager) + PMS.pop(); + + LPPassManager *LPPM = dynamic_cast(PMS.top()); + + // Create new Loop Pass Manager if it does not exist. + if (!LPPM) { + + assert (!PMS.empty() && "Unable to create Loop Pass Manager"); + PMDataManager *PMD = PMS.top(); + + // [1] Create new Call Graph Pass Manager + LPPM = new LPPassManager(PMD->getDepth() + 1); + LPPM->populateInheritedAnalysis(PMS); + + // [2] Set up new manager's top level manager + PMTopLevelManager *TPM = PMD->getTopLevelManager(); + TPM->addIndirectPassManager(LPPM); + + // [3] Assign manager to manage this new manager. This may create + // and push new managers into PMS + Pass *P = dynamic_cast(LPPM); + TPM->schedulePass(P); + + // [4] Push new manager into PMS + PMS.push(LPPM); + } + + LPPM->add(this); +} diff --git a/lib/Analysis/LoopVR.cpp b/lib/Analysis/LoopVR.cpp new file mode 100644 index 000000000000..0a3d06bed7e8 --- /dev/null +++ b/lib/Analysis/LoopVR.cpp @@ -0,0 +1,291 @@ +//===- LoopVR.cpp - Value Range analysis driven by loop information -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// FIXME: What does this do? +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "loopvr" +#include "llvm/Analysis/LoopVR.h" +#include "llvm/Constants.h" +#include "llvm/Instructions.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +char LoopVR::ID = 0; +static RegisterPass X("loopvr", "Loop Value Ranges", false, true); + +/// getRange - determine the range for a particular SCEV within a given Loop +ConstantRange LoopVR::getRange(SCEVHandle S, Loop *L, ScalarEvolution &SE) { + SCEVHandle T = SE.getBackedgeTakenCount(L); + if (isa(T)) + return ConstantRange(cast(S->getType())->getBitWidth(), true); + + T = SE.getTruncateOrZeroExtend(T, S->getType()); + return getRange(S, T, SE); +} + +/// getRange - determine the range for a particular SCEV with a given trip count +ConstantRange LoopVR::getRange(SCEVHandle S, SCEVHandle T, ScalarEvolution &SE){ + + if (const SCEVConstant *C = dyn_cast(S)) + return ConstantRange(C->getValue()->getValue()); + + ConstantRange FullSet(cast(S->getType())->getBitWidth(), true); + + // {x,+,y,+,...z}. We detect overflow by checking the size of the set after + // summing the upper and lower. + if (const SCEVAddExpr *Add = dyn_cast(S)) { + ConstantRange X = getRange(Add->getOperand(0), T, SE); + if (X.isFullSet()) return FullSet; + for (unsigned i = 1, e = Add->getNumOperands(); i != e; ++i) { + ConstantRange Y = getRange(Add->getOperand(i), T, SE); + if (Y.isFullSet()) return FullSet; + + APInt Spread_X = X.getSetSize(), Spread_Y = Y.getSetSize(); + APInt NewLower = X.getLower() + Y.getLower(); + APInt NewUpper = X.getUpper() + Y.getUpper() - 1; + if (NewLower == NewUpper) + return FullSet; + + X = ConstantRange(NewLower, NewUpper); + if (X.getSetSize().ult(Spread_X) || X.getSetSize().ult(Spread_Y)) + return FullSet; // we've wrapped, therefore, full set. + } + return X; + } + + // {x,*,y,*,...,z}. In order to detect overflow, we use k*bitwidth where + // k is the number of terms being multiplied. + if (const SCEVMulExpr *Mul = dyn_cast(S)) { + ConstantRange X = getRange(Mul->getOperand(0), T, SE); + if (X.isFullSet()) return FullSet; + + const IntegerType *Ty = IntegerType::get(X.getBitWidth()); + const IntegerType *ExTy = IntegerType::get(X.getBitWidth() * + Mul->getNumOperands()); + ConstantRange XExt = X.zeroExtend(ExTy->getBitWidth()); + + for (unsigned i = 1, e = Mul->getNumOperands(); i != e; ++i) { + ConstantRange Y = getRange(Mul->getOperand(i), T, SE); + if (Y.isFullSet()) return FullSet; + + ConstantRange YExt = Y.zeroExtend(ExTy->getBitWidth()); + XExt = ConstantRange(XExt.getLower() * YExt.getLower(), + ((XExt.getUpper()-1) * (YExt.getUpper()-1)) + 1); + } + return XExt.truncate(Ty->getBitWidth()); + } + + // X smax Y smax ... Z is: range(smax(X_smin, Y_smin, ..., Z_smin), + // smax(X_smax, Y_smax, ..., Z_smax)) + // It doesn't matter if one of the SCEVs has FullSet because we're taking + // a maximum of the minimums across all of them. + if (const SCEVSMaxExpr *SMax = dyn_cast(S)) { + ConstantRange X = getRange(SMax->getOperand(0), T, SE); + if (X.isFullSet()) return FullSet; + + APInt smin = X.getSignedMin(), smax = X.getSignedMax(); + for (unsigned i = 1, e = SMax->getNumOperands(); i != e; ++i) { + ConstantRange Y = getRange(SMax->getOperand(i), T, SE); + smin = APIntOps::smax(smin, Y.getSignedMin()); + smax = APIntOps::smax(smax, Y.getSignedMax()); + } + if (smax + 1 == smin) return FullSet; + return ConstantRange(smin, smax + 1); + } + + // X umax Y umax ... Z is: range(umax(X_umin, Y_umin, ..., Z_umin), + // umax(X_umax, Y_umax, ..., Z_umax)) + // It doesn't matter if one of the SCEVs has FullSet because we're taking + // a maximum of the minimums across all of them. + if (const SCEVUMaxExpr *UMax = dyn_cast(S)) { + ConstantRange X = getRange(UMax->getOperand(0), T, SE); + if (X.isFullSet()) return FullSet; + + APInt umin = X.getUnsignedMin(), umax = X.getUnsignedMax(); + for (unsigned i = 1, e = UMax->getNumOperands(); i != e; ++i) { + ConstantRange Y = getRange(UMax->getOperand(i), T, SE); + umin = APIntOps::umax(umin, Y.getUnsignedMin()); + umax = APIntOps::umax(umax, Y.getUnsignedMax()); + } + if (umax + 1 == umin) return FullSet; + return ConstantRange(umin, umax + 1); + } + + // L udiv R. Luckily, there's only ever 2 sides to a udiv. + if (const SCEVUDivExpr *UDiv = dyn_cast(S)) { + ConstantRange L = getRange(UDiv->getLHS(), T, SE); + ConstantRange R = getRange(UDiv->getRHS(), T, SE); + if (L.isFullSet() && R.isFullSet()) return FullSet; + + if (R.getUnsignedMax() == 0) { + // RHS must be single-element zero. Return an empty set. + return ConstantRange(R.getBitWidth(), false); + } + + APInt Lower = L.getUnsignedMin().udiv(R.getUnsignedMax()); + + APInt Upper; + + if (R.getUnsignedMin() == 0) { + // Just because it contains zero, doesn't mean it will also contain one. + // Use maximalIntersectWith to get the right behaviour. + ConstantRange NotZero(APInt(L.getBitWidth(), 1), + APInt::getNullValue(L.getBitWidth())); + R = R.maximalIntersectWith(NotZero); + } + + // But, the maximal intersection might still include zero. If it does, then + // we know it also included one. + if (R.contains(APInt::getNullValue(L.getBitWidth()))) + Upper = L.getUnsignedMax(); + else + Upper = L.getUnsignedMax().udiv(R.getUnsignedMin()); + + return ConstantRange(Lower, Upper); + } + + // ConstantRange already implements the cast operators. + + if (const SCEVZeroExtendExpr *ZExt = dyn_cast(S)) { + T = SE.getTruncateOrZeroExtend(T, ZExt->getOperand()->getType()); + ConstantRange X = getRange(ZExt->getOperand(), T, SE); + return X.zeroExtend(cast(ZExt->getType())->getBitWidth()); + } + + if (const SCEVSignExtendExpr *SExt = dyn_cast(S)) { + T = SE.getTruncateOrZeroExtend(T, SExt->getOperand()->getType()); + ConstantRange X = getRange(SExt->getOperand(), T, SE); + return X.signExtend(cast(SExt->getType())->getBitWidth()); + } + + if (const SCEVTruncateExpr *Trunc = dyn_cast(S)) { + T = SE.getTruncateOrZeroExtend(T, Trunc->getOperand()->getType()); + ConstantRange X = getRange(Trunc->getOperand(), T, SE); + if (X.isFullSet()) return FullSet; + return X.truncate(cast(Trunc->getType())->getBitWidth()); + } + + if (const SCEVAddRecExpr *AddRec = dyn_cast(S)) { + const SCEVConstant *Trip = dyn_cast(T); + if (!Trip) return FullSet; + + if (AddRec->isAffine()) { + SCEVHandle StartHandle = AddRec->getStart(); + SCEVHandle StepHandle = AddRec->getOperand(1); + + const SCEVConstant *Step = dyn_cast(StepHandle); + if (!Step) return FullSet; + + uint32_t ExWidth = 2 * Trip->getValue()->getBitWidth(); + APInt TripExt = Trip->getValue()->getValue(); TripExt.zext(ExWidth); + APInt StepExt = Step->getValue()->getValue(); StepExt.zext(ExWidth); + if ((TripExt * StepExt).ugt(APInt::getLowBitsSet(ExWidth, ExWidth >> 1))) + return FullSet; + + SCEVHandle EndHandle = SE.getAddExpr(StartHandle, + SE.getMulExpr(T, StepHandle)); + const SCEVConstant *Start = dyn_cast(StartHandle); + const SCEVConstant *End = dyn_cast(EndHandle); + if (!Start || !End) return FullSet; + + const APInt &StartInt = Start->getValue()->getValue(); + const APInt &EndInt = End->getValue()->getValue(); + const APInt &StepInt = Step->getValue()->getValue(); + + if (StepInt.isNegative()) { + if (EndInt == StartInt + 1) return FullSet; + return ConstantRange(EndInt, StartInt + 1); + } else { + if (StartInt == EndInt + 1) return FullSet; + return ConstantRange(StartInt, EndInt + 1); + } + } + } + + // TODO: non-affine addrec, udiv, SCEVUnknown (narrowed from elsewhere)? + + return FullSet; +} + +bool LoopVR::runOnFunction(Function &F) { Map.clear(); return false; } + +void LoopVR::print(std::ostream &os, const Module *) const { + raw_os_ostream OS(os); + for (std::map::const_iterator I = Map.begin(), + E = Map.end(); I != E; ++I) { + OS << *I->first << ": " << *I->second << '\n'; + } +} + +void LoopVR::releaseMemory() { + for (std::map::iterator I = Map.begin(), + E = Map.end(); I != E; ++I) { + delete I->second; + } + + Map.clear(); +} + +ConstantRange LoopVR::compute(Value *V) { + if (ConstantInt *CI = dyn_cast(V)) + return ConstantRange(CI->getValue()); + + Instruction *I = dyn_cast(V); + if (!I) + return ConstantRange(cast(V->getType())->getBitWidth(), false); + + LoopInfo &LI = getAnalysis(); + + Loop *L = LI.getLoopFor(I->getParent()); + if (!L || L->isLoopInvariant(I)) + return ConstantRange(cast(V->getType())->getBitWidth(), false); + + ScalarEvolution &SE = getAnalysis(); + + SCEVHandle S = SE.getSCEV(I); + if (isa(S) || isa(S)) + return ConstantRange(cast(V->getType())->getBitWidth(), false); + + return ConstantRange(getRange(S, L, SE)); +} + +ConstantRange LoopVR::get(Value *V) { + std::map::iterator I = Map.find(V); + if (I == Map.end()) { + ConstantRange *CR = new ConstantRange(compute(V)); + Map[V] = CR; + return *CR; + } + + return *I->second; +} + +void LoopVR::remove(Value *V) { + std::map::iterator I = Map.find(V); + if (I != Map.end()) { + delete I->second; + Map.erase(I); + } +} + +void LoopVR::narrow(Value *V, const ConstantRange &CR) { + if (CR.isFullSet()) return; + + std::map::iterator I = Map.find(V); + if (I == Map.end()) + Map[V] = new ConstantRange(CR); + else + Map[V] = new ConstantRange(Map[V]->maximalIntersectWith(CR)); +} diff --git a/lib/Analysis/Makefile b/lib/Analysis/Makefile new file mode 100644 index 000000000000..4af6d350a645 --- /dev/null +++ b/lib/Analysis/Makefile @@ -0,0 +1,16 @@ +##===- lib/Analysis/Makefile -------------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../.. +LIBRARYNAME = LLVMAnalysis +DIRS = IPA +BUILD_ARCHIVE = 1 + +include $(LEVEL)/Makefile.common + diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp new file mode 100644 index 000000000000..3b2102955f33 --- /dev/null +++ b/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -0,0 +1,1142 @@ +//===- MemoryDependenceAnalysis.cpp - Mem Deps Implementation --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements an analysis that determines, for a given memory +// operation, what preceding memory operations it depends on. It builds on +// alias analysis information, and tries to provide a lazy, caching interface to +// a common kind of alias information query. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "memdep" +#include "llvm/Analysis/MemoryDependenceAnalysis.h" +#include "llvm/Constants.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Function.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/PredIteratorCache.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetData.h" +using namespace llvm; + +STATISTIC(NumCacheNonLocal, "Number of fully cached non-local responses"); +STATISTIC(NumCacheDirtyNonLocal, "Number of dirty cached non-local responses"); +STATISTIC(NumUncacheNonLocal, "Number of uncached non-local responses"); + +STATISTIC(NumCacheNonLocalPtr, + "Number of fully cached non-local ptr responses"); +STATISTIC(NumCacheDirtyNonLocalPtr, + "Number of cached, but dirty, non-local ptr responses"); +STATISTIC(NumUncacheNonLocalPtr, + "Number of uncached non-local ptr responses"); +STATISTIC(NumCacheCompleteNonLocalPtr, + "Number of block queries that were completely cached"); + +char MemoryDependenceAnalysis::ID = 0; + +// Register this pass... +static RegisterPass X("memdep", + "Memory Dependence Analysis", false, true); + +MemoryDependenceAnalysis::MemoryDependenceAnalysis() +: FunctionPass(&ID), PredCache(0) { +} +MemoryDependenceAnalysis::~MemoryDependenceAnalysis() { +} + +/// Clean up memory in between runs +void MemoryDependenceAnalysis::releaseMemory() { + LocalDeps.clear(); + NonLocalDeps.clear(); + NonLocalPointerDeps.clear(); + ReverseLocalDeps.clear(); + ReverseNonLocalDeps.clear(); + ReverseNonLocalPtrDeps.clear(); + PredCache->clear(); +} + + + +/// getAnalysisUsage - Does not modify anything. It uses Alias Analysis. +/// +void MemoryDependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequiredTransitive(); + AU.addRequiredTransitive(); +} + +bool MemoryDependenceAnalysis::runOnFunction(Function &) { + AA = &getAnalysis(); + TD = &getAnalysis(); + if (PredCache == 0) + PredCache.reset(new PredIteratorCache()); + return false; +} + +/// RemoveFromReverseMap - This is a helper function that removes Val from +/// 'Inst's set in ReverseMap. If the set becomes empty, remove Inst's entry. +template +static void RemoveFromReverseMap(DenseMap > &ReverseMap, + Instruction *Inst, KeyTy Val) { + typename DenseMap >::iterator + InstIt = ReverseMap.find(Inst); + assert(InstIt != ReverseMap.end() && "Reverse map out of sync?"); + bool Found = InstIt->second.erase(Val); + assert(Found && "Invalid reverse map!"); Found=Found; + if (InstIt->second.empty()) + ReverseMap.erase(InstIt); +} + + +/// getCallSiteDependencyFrom - Private helper for finding the local +/// dependencies of a call site. +MemDepResult MemoryDependenceAnalysis:: +getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall, + BasicBlock::iterator ScanIt, BasicBlock *BB) { + // Walk backwards through the block, looking for dependencies + while (ScanIt != BB->begin()) { + Instruction *Inst = --ScanIt; + + // If this inst is a memory op, get the pointer it accessed + Value *Pointer = 0; + uint64_t PointerSize = 0; + if (StoreInst *S = dyn_cast(Inst)) { + Pointer = S->getPointerOperand(); + PointerSize = TD->getTypeStoreSize(S->getOperand(0)->getType()); + } else if (VAArgInst *V = dyn_cast(Inst)) { + Pointer = V->getOperand(0); + PointerSize = TD->getTypeStoreSize(V->getType()); + } else if (FreeInst *F = dyn_cast(Inst)) { + Pointer = F->getPointerOperand(); + + // FreeInsts erase the entire structure + PointerSize = ~0ULL; + } else if (isa(Inst) || isa(Inst)) { + // Debug intrinsics don't cause dependences. + if (isa(Inst)) continue; + CallSite InstCS = CallSite::get(Inst); + // If these two calls do not interfere, look past it. + switch (AA->getModRefInfo(CS, InstCS)) { + case AliasAnalysis::NoModRef: + // If the two calls don't interact (e.g. InstCS is readnone) keep + // scanning. + continue; + case AliasAnalysis::Ref: + // If the two calls read the same memory locations and CS is a readonly + // function, then we have two cases: 1) the calls may not interfere with + // each other at all. 2) the calls may produce the same value. In case + // #1 we want to ignore the values, in case #2, we want to return Inst + // as a Def dependence. This allows us to CSE in cases like: + // X = strlen(P); + // memchr(...); + // Y = strlen(P); // Y = X + if (isReadOnlyCall) { + if (CS.getCalledFunction() != 0 && + CS.getCalledFunction() == InstCS.getCalledFunction()) + return MemDepResult::getDef(Inst); + // Ignore unrelated read/read call dependences. + continue; + } + // FALL THROUGH + default: + return MemDepResult::getClobber(Inst); + } + } else { + // Non-memory instruction. + continue; + } + + if (AA->getModRefInfo(CS, Pointer, PointerSize) != AliasAnalysis::NoModRef) + return MemDepResult::getClobber(Inst); + } + + // No dependence found. If this is the entry block of the function, it is a + // clobber, otherwise it is non-local. + if (BB != &BB->getParent()->getEntryBlock()) + return MemDepResult::getNonLocal(); + return MemDepResult::getClobber(ScanIt); +} + +/// getPointerDependencyFrom - Return the instruction on which a memory +/// location depends. If isLoad is true, this routine ignore may-aliases with +/// read-only operations. +MemDepResult MemoryDependenceAnalysis:: +getPointerDependencyFrom(Value *MemPtr, uint64_t MemSize, bool isLoad, + BasicBlock::iterator ScanIt, BasicBlock *BB) { + + // Walk backwards through the basic block, looking for dependencies. + while (ScanIt != BB->begin()) { + Instruction *Inst = --ScanIt; + + // Debug intrinsics don't cause dependences. + if (isa(Inst)) continue; + + // Values depend on loads if the pointers are must aliased. This means that + // a load depends on another must aliased load from the same value. + if (LoadInst *LI = dyn_cast(Inst)) { + Value *Pointer = LI->getPointerOperand(); + uint64_t PointerSize = TD->getTypeStoreSize(LI->getType()); + + // If we found a pointer, check if it could be the same as our pointer. + AliasAnalysis::AliasResult R = + AA->alias(Pointer, PointerSize, MemPtr, MemSize); + if (R == AliasAnalysis::NoAlias) + continue; + + // May-alias loads don't depend on each other without a dependence. + if (isLoad && R == AliasAnalysis::MayAlias) + continue; + // Stores depend on may and must aliased loads, loads depend on must-alias + // loads. + return MemDepResult::getDef(Inst); + } + + if (StoreInst *SI = dyn_cast(Inst)) { + // If alias analysis can tell that this store is guaranteed to not modify + // the query pointer, ignore it. Use getModRefInfo to handle cases where + // the query pointer points to constant memory etc. + if (AA->getModRefInfo(SI, MemPtr, MemSize) == AliasAnalysis::NoModRef) + continue; + + // Ok, this store might clobber the query pointer. Check to see if it is + // a must alias: in this case, we want to return this as a def. + Value *Pointer = SI->getPointerOperand(); + uint64_t PointerSize = TD->getTypeStoreSize(SI->getOperand(0)->getType()); + + // If we found a pointer, check if it could be the same as our pointer. + AliasAnalysis::AliasResult R = + AA->alias(Pointer, PointerSize, MemPtr, MemSize); + + if (R == AliasAnalysis::NoAlias) + continue; + if (R == AliasAnalysis::MayAlias) + return MemDepResult::getClobber(Inst); + return MemDepResult::getDef(Inst); + } + + // If this is an allocation, and if we know that the accessed pointer is to + // the allocation, return Def. This means that there is no dependence and + // the access can be optimized based on that. For example, a load could + // turn into undef. + if (AllocationInst *AI = dyn_cast(Inst)) { + Value *AccessPtr = MemPtr->getUnderlyingObject(); + + if (AccessPtr == AI || + AA->alias(AI, 1, AccessPtr, 1) == AliasAnalysis::MustAlias) + return MemDepResult::getDef(AI); + continue; + } + + // See if this instruction (e.g. a call or vaarg) mod/ref's the pointer. + switch (AA->getModRefInfo(Inst, MemPtr, MemSize)) { + case AliasAnalysis::NoModRef: + // If the call has no effect on the queried pointer, just ignore it. + continue; + case AliasAnalysis::Ref: + // If the call is known to never store to the pointer, and if this is a + // load query, we can safely ignore it (scan past it). + if (isLoad) + continue; + // FALL THROUGH. + default: + // Otherwise, there is a potential dependence. Return a clobber. + return MemDepResult::getClobber(Inst); + } + } + + // No dependence found. If this is the entry block of the function, it is a + // clobber, otherwise it is non-local. + if (BB != &BB->getParent()->getEntryBlock()) + return MemDepResult::getNonLocal(); + return MemDepResult::getClobber(ScanIt); +} + +/// getDependency - Return the instruction on which a memory operation +/// depends. +MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) { + Instruction *ScanPos = QueryInst; + + // Check for a cached result + MemDepResult &LocalCache = LocalDeps[QueryInst]; + + // If the cached entry is non-dirty, just return it. Note that this depends + // on MemDepResult's default constructing to 'dirty'. + if (!LocalCache.isDirty()) + return LocalCache; + + // Otherwise, if we have a dirty entry, we know we can start the scan at that + // instruction, which may save us some work. + if (Instruction *Inst = LocalCache.getInst()) { + ScanPos = Inst; + + RemoveFromReverseMap(ReverseLocalDeps, Inst, QueryInst); + } + + BasicBlock *QueryParent = QueryInst->getParent(); + + Value *MemPtr = 0; + uint64_t MemSize = 0; + + // Do the scan. + if (BasicBlock::iterator(QueryInst) == QueryParent->begin()) { + // No dependence found. If this is the entry block of the function, it is a + // clobber, otherwise it is non-local. + if (QueryParent != &QueryParent->getParent()->getEntryBlock()) + LocalCache = MemDepResult::getNonLocal(); + else + LocalCache = MemDepResult::getClobber(QueryInst); + } else if (StoreInst *SI = dyn_cast(QueryInst)) { + // If this is a volatile store, don't mess around with it. Just return the + // previous instruction as a clobber. + if (SI->isVolatile()) + LocalCache = MemDepResult::getClobber(--BasicBlock::iterator(ScanPos)); + else { + MemPtr = SI->getPointerOperand(); + MemSize = TD->getTypeStoreSize(SI->getOperand(0)->getType()); + } + } else if (LoadInst *LI = dyn_cast(QueryInst)) { + // If this is a volatile load, don't mess around with it. Just return the + // previous instruction as a clobber. + if (LI->isVolatile()) + LocalCache = MemDepResult::getClobber(--BasicBlock::iterator(ScanPos)); + else { + MemPtr = LI->getPointerOperand(); + MemSize = TD->getTypeStoreSize(LI->getType()); + } + } else if (isa(QueryInst) || isa(QueryInst)) { + CallSite QueryCS = CallSite::get(QueryInst); + bool isReadOnly = AA->onlyReadsMemory(QueryCS); + LocalCache = getCallSiteDependencyFrom(QueryCS, isReadOnly, ScanPos, + QueryParent); + } else if (FreeInst *FI = dyn_cast(QueryInst)) { + MemPtr = FI->getPointerOperand(); + // FreeInsts erase the entire structure, not just a field. + MemSize = ~0UL; + } else { + // Non-memory instruction. + LocalCache = MemDepResult::getClobber(--BasicBlock::iterator(ScanPos)); + } + + // If we need to do a pointer scan, make it happen. + if (MemPtr) + LocalCache = getPointerDependencyFrom(MemPtr, MemSize, + isa(QueryInst), + ScanPos, QueryParent); + + // Remember the result! + if (Instruction *I = LocalCache.getInst()) + ReverseLocalDeps[I].insert(QueryInst); + + return LocalCache; +} + +#ifndef NDEBUG +/// AssertSorted - This method is used when -debug is specified to verify that +/// cache arrays are properly kept sorted. +static void AssertSorted(MemoryDependenceAnalysis::NonLocalDepInfo &Cache, + int Count = -1) { + if (Count == -1) Count = Cache.size(); + if (Count == 0) return; + + for (unsigned i = 1; i != unsigned(Count); ++i) + assert(Cache[i-1] <= Cache[i] && "Cache isn't sorted!"); +} +#endif + +/// getNonLocalCallDependency - Perform a full dependency query for the +/// specified call, returning the set of blocks that the value is +/// potentially live across. The returned set of results will include a +/// "NonLocal" result for all blocks where the value is live across. +/// +/// This method assumes the instruction returns a "NonLocal" dependency +/// within its own block. +/// +/// This returns a reference to an internal data structure that may be +/// invalidated on the next non-local query or when an instruction is +/// removed. Clients must copy this data if they want it around longer than +/// that. +const MemoryDependenceAnalysis::NonLocalDepInfo & +MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) { + assert(getDependency(QueryCS.getInstruction()).isNonLocal() && + "getNonLocalCallDependency should only be used on calls with non-local deps!"); + PerInstNLInfo &CacheP = NonLocalDeps[QueryCS.getInstruction()]; + NonLocalDepInfo &Cache = CacheP.first; + + /// DirtyBlocks - This is the set of blocks that need to be recomputed. In + /// the cached case, this can happen due to instructions being deleted etc. In + /// the uncached case, this starts out as the set of predecessors we care + /// about. + SmallVector DirtyBlocks; + + if (!Cache.empty()) { + // Okay, we have a cache entry. If we know it is not dirty, just return it + // with no computation. + if (!CacheP.second) { + NumCacheNonLocal++; + return Cache; + } + + // If we already have a partially computed set of results, scan them to + // determine what is dirty, seeding our initial DirtyBlocks worklist. + for (NonLocalDepInfo::iterator I = Cache.begin(), E = Cache.end(); + I != E; ++I) + if (I->second.isDirty()) + DirtyBlocks.push_back(I->first); + + // Sort the cache so that we can do fast binary search lookups below. + std::sort(Cache.begin(), Cache.end()); + + ++NumCacheDirtyNonLocal; + //cerr << "CACHED CASE: " << DirtyBlocks.size() << " dirty: " + // << Cache.size() << " cached: " << *QueryInst; + } else { + // Seed DirtyBlocks with each of the preds of QueryInst's block. + BasicBlock *QueryBB = QueryCS.getInstruction()->getParent(); + for (BasicBlock **PI = PredCache->GetPreds(QueryBB); *PI; ++PI) + DirtyBlocks.push_back(*PI); + NumUncacheNonLocal++; + } + + // isReadonlyCall - If this is a read-only call, we can be more aggressive. + bool isReadonlyCall = AA->onlyReadsMemory(QueryCS); + + SmallPtrSet Visited; + + unsigned NumSortedEntries = Cache.size(); + DEBUG(AssertSorted(Cache)); + + // Iterate while we still have blocks to update. + while (!DirtyBlocks.empty()) { + BasicBlock *DirtyBB = DirtyBlocks.back(); + DirtyBlocks.pop_back(); + + // Already processed this block? + if (!Visited.insert(DirtyBB)) + continue; + + // Do a binary search to see if we already have an entry for this block in + // the cache set. If so, find it. + DEBUG(AssertSorted(Cache, NumSortedEntries)); + NonLocalDepInfo::iterator Entry = + std::upper_bound(Cache.begin(), Cache.begin()+NumSortedEntries, + std::make_pair(DirtyBB, MemDepResult())); + if (Entry != Cache.begin() && prior(Entry)->first == DirtyBB) + --Entry; + + MemDepResult *ExistingResult = 0; + if (Entry != Cache.begin()+NumSortedEntries && + Entry->first == DirtyBB) { + // If we already have an entry, and if it isn't already dirty, the block + // is done. + if (!Entry->second.isDirty()) + continue; + + // Otherwise, remember this slot so we can update the value. + ExistingResult = &Entry->second; + } + + // If the dirty entry has a pointer, start scanning from it so we don't have + // to rescan the entire block. + BasicBlock::iterator ScanPos = DirtyBB->end(); + if (ExistingResult) { + if (Instruction *Inst = ExistingResult->getInst()) { + ScanPos = Inst; + // We're removing QueryInst's use of Inst. + RemoveFromReverseMap(ReverseNonLocalDeps, Inst, + QueryCS.getInstruction()); + } + } + + // Find out if this block has a local dependency for QueryInst. + MemDepResult Dep; + + if (ScanPos != DirtyBB->begin()) { + Dep = getCallSiteDependencyFrom(QueryCS, isReadonlyCall,ScanPos, DirtyBB); + } else if (DirtyBB != &DirtyBB->getParent()->getEntryBlock()) { + // No dependence found. If this is the entry block of the function, it is + // a clobber, otherwise it is non-local. + Dep = MemDepResult::getNonLocal(); + } else { + Dep = MemDepResult::getClobber(ScanPos); + } + + // If we had a dirty entry for the block, update it. Otherwise, just add + // a new entry. + if (ExistingResult) + *ExistingResult = Dep; + else + Cache.push_back(std::make_pair(DirtyBB, Dep)); + + // If the block has a dependency (i.e. it isn't completely transparent to + // the value), remember the association! + if (!Dep.isNonLocal()) { + // Keep the ReverseNonLocalDeps map up to date so we can efficiently + // update this when we remove instructions. + if (Instruction *Inst = Dep.getInst()) + ReverseNonLocalDeps[Inst].insert(QueryCS.getInstruction()); + } else { + + // If the block *is* completely transparent to the load, we need to check + // the predecessors of this block. Add them to our worklist. + for (BasicBlock **PI = PredCache->GetPreds(DirtyBB); *PI; ++PI) + DirtyBlocks.push_back(*PI); + } + } + + return Cache; +} + +/// getNonLocalPointerDependency - Perform a full dependency query for an +/// access to the specified (non-volatile) memory location, returning the +/// set of instructions that either define or clobber the value. +/// +/// This method assumes the pointer has a "NonLocal" dependency within its +/// own block. +/// +void MemoryDependenceAnalysis:: +getNonLocalPointerDependency(Value *Pointer, bool isLoad, BasicBlock *FromBB, + SmallVectorImpl &Result) { + assert(isa(Pointer->getType()) && + "Can't get pointer deps of a non-pointer!"); + Result.clear(); + + // We know that the pointer value is live into FromBB find the def/clobbers + // from presecessors. + const Type *EltTy = cast(Pointer->getType())->getElementType(); + uint64_t PointeeSize = TD->getTypeStoreSize(EltTy); + + // This is the set of blocks we've inspected, and the pointer we consider in + // each block. Because of critical edges, we currently bail out if querying + // a block with multiple different pointers. This can happen during PHI + // translation. + DenseMap Visited; + if (!getNonLocalPointerDepFromBB(Pointer, PointeeSize, isLoad, FromBB, + Result, Visited, true)) + return; + Result.clear(); + Result.push_back(std::make_pair(FromBB, + MemDepResult::getClobber(FromBB->begin()))); +} + +/// GetNonLocalInfoForBlock - Compute the memdep value for BB with +/// Pointer/PointeeSize using either cached information in Cache or by doing a +/// lookup (which may use dirty cache info if available). If we do a lookup, +/// add the result to the cache. +MemDepResult MemoryDependenceAnalysis:: +GetNonLocalInfoForBlock(Value *Pointer, uint64_t PointeeSize, + bool isLoad, BasicBlock *BB, + NonLocalDepInfo *Cache, unsigned NumSortedEntries) { + + // Do a binary search to see if we already have an entry for this block in + // the cache set. If so, find it. + NonLocalDepInfo::iterator Entry = + std::upper_bound(Cache->begin(), Cache->begin()+NumSortedEntries, + std::make_pair(BB, MemDepResult())); + if (Entry != Cache->begin() && prior(Entry)->first == BB) + --Entry; + + MemDepResult *ExistingResult = 0; + if (Entry != Cache->begin()+NumSortedEntries && Entry->first == BB) + ExistingResult = &Entry->second; + + // If we have a cached entry, and it is non-dirty, use it as the value for + // this dependency. + if (ExistingResult && !ExistingResult->isDirty()) { + ++NumCacheNonLocalPtr; + return *ExistingResult; + } + + // Otherwise, we have to scan for the value. If we have a dirty cache + // entry, start scanning from its position, otherwise we scan from the end + // of the block. + BasicBlock::iterator ScanPos = BB->end(); + if (ExistingResult && ExistingResult->getInst()) { + assert(ExistingResult->getInst()->getParent() == BB && + "Instruction invalidated?"); + ++NumCacheDirtyNonLocalPtr; + ScanPos = ExistingResult->getInst(); + + // Eliminating the dirty entry from 'Cache', so update the reverse info. + ValueIsLoadPair CacheKey(Pointer, isLoad); + RemoveFromReverseMap(ReverseNonLocalPtrDeps, ScanPos, CacheKey); + } else { + ++NumUncacheNonLocalPtr; + } + + // Scan the block for the dependency. + MemDepResult Dep = getPointerDependencyFrom(Pointer, PointeeSize, isLoad, + ScanPos, BB); + + // If we had a dirty entry for the block, update it. Otherwise, just add + // a new entry. + if (ExistingResult) + *ExistingResult = Dep; + else + Cache->push_back(std::make_pair(BB, Dep)); + + // If the block has a dependency (i.e. it isn't completely transparent to + // the value), remember the reverse association because we just added it + // to Cache! + if (Dep.isNonLocal()) + return Dep; + + // Keep the ReverseNonLocalPtrDeps map up to date so we can efficiently + // update MemDep when we remove instructions. + Instruction *Inst = Dep.getInst(); + assert(Inst && "Didn't depend on anything?"); + ValueIsLoadPair CacheKey(Pointer, isLoad); + ReverseNonLocalPtrDeps[Inst].insert(CacheKey); + return Dep; +} + + +/// getNonLocalPointerDepFromBB - Perform a dependency query based on +/// pointer/pointeesize starting at the end of StartBB. Add any clobber/def +/// results to the results vector and keep track of which blocks are visited in +/// 'Visited'. +/// +/// This has special behavior for the first block queries (when SkipFirstBlock +/// is true). In this special case, it ignores the contents of the specified +/// block and starts returning dependence info for its predecessors. +/// +/// This function returns false on success, or true to indicate that it could +/// not compute dependence information for some reason. This should be treated +/// as a clobber dependence on the first instruction in the predecessor block. +bool MemoryDependenceAnalysis:: +getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize, + bool isLoad, BasicBlock *StartBB, + SmallVectorImpl &Result, + DenseMap &Visited, + bool SkipFirstBlock) { + + // Look up the cached info for Pointer. + ValueIsLoadPair CacheKey(Pointer, isLoad); + + std::pair *CacheInfo = + &NonLocalPointerDeps[CacheKey]; + NonLocalDepInfo *Cache = &CacheInfo->second; + + // If we have valid cached information for exactly the block we are + // investigating, just return it with no recomputation. + if (CacheInfo->first == BBSkipFirstBlockPair(StartBB, SkipFirstBlock)) { + // We have a fully cached result for this query then we can just return the + // cached results and populate the visited set. However, we have to verify + // that we don't already have conflicting results for these blocks. Check + // to ensure that if a block in the results set is in the visited set that + // it was for the same pointer query. + if (!Visited.empty()) { + for (NonLocalDepInfo::iterator I = Cache->begin(), E = Cache->end(); + I != E; ++I) { + DenseMap::iterator VI = Visited.find(I->first); + if (VI == Visited.end() || VI->second == Pointer) continue; + + // We have a pointer mismatch in a block. Just return clobber, saying + // that something was clobbered in this result. We could also do a + // non-fully cached query, but there is little point in doing this. + return true; + } + } + + for (NonLocalDepInfo::iterator I = Cache->begin(), E = Cache->end(); + I != E; ++I) { + Visited.insert(std::make_pair(I->first, Pointer)); + if (!I->second.isNonLocal()) + Result.push_back(*I); + } + ++NumCacheCompleteNonLocalPtr; + return false; + } + + // Otherwise, either this is a new block, a block with an invalid cache + // pointer or one that we're about to invalidate by putting more info into it + // than its valid cache info. If empty, the result will be valid cache info, + // otherwise it isn't. + if (Cache->empty()) + CacheInfo->first = BBSkipFirstBlockPair(StartBB, SkipFirstBlock); + else + CacheInfo->first = BBSkipFirstBlockPair(); + + SmallVector Worklist; + Worklist.push_back(StartBB); + + // Keep track of the entries that we know are sorted. Previously cached + // entries will all be sorted. The entries we add we only sort on demand (we + // don't insert every element into its sorted position). We know that we + // won't get any reuse from currently inserted values, because we don't + // revisit blocks after we insert info for them. + unsigned NumSortedEntries = Cache->size(); + DEBUG(AssertSorted(*Cache)); + + while (!Worklist.empty()) { + BasicBlock *BB = Worklist.pop_back_val(); + + // Skip the first block if we have it. + if (!SkipFirstBlock) { + // Analyze the dependency of *Pointer in FromBB. See if we already have + // been here. + assert(Visited.count(BB) && "Should check 'visited' before adding to WL"); + + // Get the dependency info for Pointer in BB. If we have cached + // information, we will use it, otherwise we compute it. + DEBUG(AssertSorted(*Cache, NumSortedEntries)); + MemDepResult Dep = GetNonLocalInfoForBlock(Pointer, PointeeSize, isLoad, + BB, Cache, NumSortedEntries); + + // If we got a Def or Clobber, add this to the list of results. + if (!Dep.isNonLocal()) { + Result.push_back(NonLocalDepEntry(BB, Dep)); + continue; + } + } + + // If 'Pointer' is an instruction defined in this block, then we need to do + // phi translation to change it into a value live in the predecessor block. + // If phi translation fails, then we can't continue dependence analysis. + Instruction *PtrInst = dyn_cast(Pointer); + bool NeedsPHITranslation = PtrInst && PtrInst->getParent() == BB; + + // If no PHI translation is needed, just add all the predecessors of this + // block to scan them as well. + if (!NeedsPHITranslation) { + SkipFirstBlock = false; + for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI) { + // Verify that we haven't looked at this block yet. + std::pair::iterator, bool> + InsertRes = Visited.insert(std::make_pair(*PI, Pointer)); + if (InsertRes.second) { + // First time we've looked at *PI. + Worklist.push_back(*PI); + continue; + } + + // If we have seen this block before, but it was with a different + // pointer then we have a phi translation failure and we have to treat + // this as a clobber. + if (InsertRes.first->second != Pointer) + goto PredTranslationFailure; + } + continue; + } + + // If we do need to do phi translation, then there are a bunch of different + // cases, because we have to find a Value* live in the predecessor block. We + // know that PtrInst is defined in this block at least. + + // If this is directly a PHI node, just use the incoming values for each + // pred as the phi translated version. + if (PHINode *PtrPHI = dyn_cast(PtrInst)) { + for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI) { + BasicBlock *Pred = *PI; + Value *PredPtr = PtrPHI->getIncomingValueForBlock(Pred); + + // Check to see if we have already visited this pred block with another + // pointer. If so, we can't do this lookup. This failure can occur + // with PHI translation when a critical edge exists and the PHI node in + // the successor translates to a pointer value different than the + // pointer the block was first analyzed with. + std::pair::iterator, bool> + InsertRes = Visited.insert(std::make_pair(Pred, PredPtr)); + + if (!InsertRes.second) { + // If the predecessor was visited with PredPtr, then we already did + // the analysis and can ignore it. + if (InsertRes.first->second == PredPtr) + continue; + + // Otherwise, the block was previously analyzed with a different + // pointer. We can't represent the result of this case, so we just + // treat this as a phi translation failure. + goto PredTranslationFailure; + } + + // We may have added values to the cache list before this PHI + // translation. If so, we haven't done anything to ensure that the + // cache remains sorted. Sort it now (if needed) so that recursive + // invocations of getNonLocalPointerDepFromBB that could reuse the cache + // value will only see properly sorted cache arrays. + if (Cache && NumSortedEntries != Cache->size()) + std::sort(Cache->begin(), Cache->end()); + Cache = 0; + + // FIXME: it is entirely possible that PHI translating will end up with + // the same value. Consider PHI translating something like: + // X = phi [x, bb1], [y, bb2]. PHI translating for bb1 doesn't *need* + // to recurse here, pedantically speaking. + + // If we have a problem phi translating, fall through to the code below + // to handle the failure condition. + if (getNonLocalPointerDepFromBB(PredPtr, PointeeSize, isLoad, Pred, + Result, Visited)) + goto PredTranslationFailure; + } + + // Refresh the CacheInfo/Cache pointer so that it isn't invalidated. + CacheInfo = &NonLocalPointerDeps[CacheKey]; + Cache = &CacheInfo->second; + NumSortedEntries = Cache->size(); + + // Since we did phi translation, the "Cache" set won't contain all of the + // results for the query. This is ok (we can still use it to accelerate + // specific block queries) but we can't do the fastpath "return all + // results from the set" Clear out the indicator for this. + CacheInfo->first = BBSkipFirstBlockPair(); + SkipFirstBlock = false; + continue; + } + + // TODO: BITCAST, GEP. + + // cerr << "MEMDEP: Could not PHI translate: " << *Pointer; + // if (isa(PtrInst) || isa(PtrInst)) + // cerr << "OP:\t\t\t\t" << *PtrInst->getOperand(0); + PredTranslationFailure: + + if (Cache == 0) { + // Refresh the CacheInfo/Cache pointer if it got invalidated. + CacheInfo = &NonLocalPointerDeps[CacheKey]; + Cache = &CacheInfo->second; + NumSortedEntries = Cache->size(); + } else if (NumSortedEntries != Cache->size()) { + std::sort(Cache->begin(), Cache->end()); + NumSortedEntries = Cache->size(); + } + + // Since we did phi translation, the "Cache" set won't contain all of the + // results for the query. This is ok (we can still use it to accelerate + // specific block queries) but we can't do the fastpath "return all + // results from the set" Clear out the indicator for this. + CacheInfo->first = BBSkipFirstBlockPair(); + + // If *nothing* works, mark the pointer as being clobbered by the first + // instruction in this block. + // + // If this is the magic first block, return this as a clobber of the whole + // incoming value. Since we can't phi translate to one of the predecessors, + // we have to bail out. + if (SkipFirstBlock) + return true; + + for (NonLocalDepInfo::reverse_iterator I = Cache->rbegin(); ; ++I) { + assert(I != Cache->rend() && "Didn't find current block??"); + if (I->first != BB) + continue; + + assert(I->second.isNonLocal() && + "Should only be here with transparent block"); + I->second = MemDepResult::getClobber(BB->begin()); + ReverseNonLocalPtrDeps[BB->begin()].insert(CacheKey); + Result.push_back(*I); + break; + } + } + + // Okay, we're done now. If we added new values to the cache, re-sort it. + switch (Cache->size()-NumSortedEntries) { + case 0: + // done, no new entries. + break; + case 2: { + // Two new entries, insert the last one into place. + NonLocalDepEntry Val = Cache->back(); + Cache->pop_back(); + NonLocalDepInfo::iterator Entry = + std::upper_bound(Cache->begin(), Cache->end()-1, Val); + Cache->insert(Entry, Val); + // FALL THROUGH. + } + case 1: + // One new entry, Just insert the new value at the appropriate position. + if (Cache->size() != 1) { + NonLocalDepEntry Val = Cache->back(); + Cache->pop_back(); + NonLocalDepInfo::iterator Entry = + std::upper_bound(Cache->begin(), Cache->end(), Val); + Cache->insert(Entry, Val); + } + break; + default: + // Added many values, do a full scale sort. + std::sort(Cache->begin(), Cache->end()); + } + DEBUG(AssertSorted(*Cache)); + return false; +} + +/// RemoveCachedNonLocalPointerDependencies - If P exists in +/// CachedNonLocalPointerInfo, remove it. +void MemoryDependenceAnalysis:: +RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair P) { + CachedNonLocalPointerInfo::iterator It = + NonLocalPointerDeps.find(P); + if (It == NonLocalPointerDeps.end()) return; + + // Remove all of the entries in the BB->val map. This involves removing + // instructions from the reverse map. + NonLocalDepInfo &PInfo = It->second.second; + + for (unsigned i = 0, e = PInfo.size(); i != e; ++i) { + Instruction *Target = PInfo[i].second.getInst(); + if (Target == 0) continue; // Ignore non-local dep results. + assert(Target->getParent() == PInfo[i].first); + + // Eliminating the dirty entry from 'Cache', so update the reverse info. + RemoveFromReverseMap(ReverseNonLocalPtrDeps, Target, P); + } + + // Remove P from NonLocalPointerDeps (which deletes NonLocalDepInfo). + NonLocalPointerDeps.erase(It); +} + + +/// invalidateCachedPointerInfo - This method is used to invalidate cached +/// information about the specified pointer, because it may be too +/// conservative in memdep. This is an optional call that can be used when +/// the client detects an equivalence between the pointer and some other +/// value and replaces the other value with ptr. This can make Ptr available +/// in more places that cached info does not necessarily keep. +void MemoryDependenceAnalysis::invalidateCachedPointerInfo(Value *Ptr) { + // If Ptr isn't really a pointer, just ignore it. + if (!isa(Ptr->getType())) return; + // Flush store info for the pointer. + RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, false)); + // Flush load info for the pointer. + RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, true)); +} + +/// removeInstruction - Remove an instruction from the dependence analysis, +/// updating the dependence of instructions that previously depended on it. +/// This method attempts to keep the cache coherent using the reverse map. +void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) { + // Walk through the Non-local dependencies, removing this one as the value + // for any cached queries. + NonLocalDepMapType::iterator NLDI = NonLocalDeps.find(RemInst); + if (NLDI != NonLocalDeps.end()) { + NonLocalDepInfo &BlockMap = NLDI->second.first; + for (NonLocalDepInfo::iterator DI = BlockMap.begin(), DE = BlockMap.end(); + DI != DE; ++DI) + if (Instruction *Inst = DI->second.getInst()) + RemoveFromReverseMap(ReverseNonLocalDeps, Inst, RemInst); + NonLocalDeps.erase(NLDI); + } + + // If we have a cached local dependence query for this instruction, remove it. + // + LocalDepMapType::iterator LocalDepEntry = LocalDeps.find(RemInst); + if (LocalDepEntry != LocalDeps.end()) { + // Remove us from DepInst's reverse set now that the local dep info is gone. + if (Instruction *Inst = LocalDepEntry->second.getInst()) + RemoveFromReverseMap(ReverseLocalDeps, Inst, RemInst); + + // Remove this local dependency info. + LocalDeps.erase(LocalDepEntry); + } + + // If we have any cached pointer dependencies on this instruction, remove + // them. If the instruction has non-pointer type, then it can't be a pointer + // base. + + // Remove it from both the load info and the store info. The instruction + // can't be in either of these maps if it is non-pointer. + if (isa(RemInst->getType())) { + RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, false)); + RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, true)); + } + + // Loop over all of the things that depend on the instruction we're removing. + // + SmallVector, 8> ReverseDepsToAdd; + + // If we find RemInst as a clobber or Def in any of the maps for other values, + // we need to replace its entry with a dirty version of the instruction after + // it. If RemInst is a terminator, we use a null dirty value. + // + // Using a dirty version of the instruction after RemInst saves having to scan + // the entire block to get to this point. + MemDepResult NewDirtyVal; + if (!RemInst->isTerminator()) + NewDirtyVal = MemDepResult::getDirty(++BasicBlock::iterator(RemInst)); + + ReverseDepMapType::iterator ReverseDepIt = ReverseLocalDeps.find(RemInst); + if (ReverseDepIt != ReverseLocalDeps.end()) { + SmallPtrSet &ReverseDeps = ReverseDepIt->second; + // RemInst can't be the terminator if it has local stuff depending on it. + assert(!ReverseDeps.empty() && !isa(RemInst) && + "Nothing can locally depend on a terminator"); + + for (SmallPtrSet::iterator I = ReverseDeps.begin(), + E = ReverseDeps.end(); I != E; ++I) { + Instruction *InstDependingOnRemInst = *I; + assert(InstDependingOnRemInst != RemInst && + "Already removed our local dep info"); + + LocalDeps[InstDependingOnRemInst] = NewDirtyVal; + + // Make sure to remember that new things depend on NewDepInst. + assert(NewDirtyVal.getInst() && "There is no way something else can have " + "a local dep on this if it is a terminator!"); + ReverseDepsToAdd.push_back(std::make_pair(NewDirtyVal.getInst(), + InstDependingOnRemInst)); + } + + ReverseLocalDeps.erase(ReverseDepIt); + + // Add new reverse deps after scanning the set, to avoid invalidating the + // 'ReverseDeps' reference. + while (!ReverseDepsToAdd.empty()) { + ReverseLocalDeps[ReverseDepsToAdd.back().first] + .insert(ReverseDepsToAdd.back().second); + ReverseDepsToAdd.pop_back(); + } + } + + ReverseDepIt = ReverseNonLocalDeps.find(RemInst); + if (ReverseDepIt != ReverseNonLocalDeps.end()) { + SmallPtrSet &Set = ReverseDepIt->second; + for (SmallPtrSet::iterator I = Set.begin(), E = Set.end(); + I != E; ++I) { + assert(*I != RemInst && "Already removed NonLocalDep info for RemInst"); + + PerInstNLInfo &INLD = NonLocalDeps[*I]; + // The information is now dirty! + INLD.second = true; + + for (NonLocalDepInfo::iterator DI = INLD.first.begin(), + DE = INLD.first.end(); DI != DE; ++DI) { + if (DI->second.getInst() != RemInst) continue; + + // Convert to a dirty entry for the subsequent instruction. + DI->second = NewDirtyVal; + + if (Instruction *NextI = NewDirtyVal.getInst()) + ReverseDepsToAdd.push_back(std::make_pair(NextI, *I)); + } + } + + ReverseNonLocalDeps.erase(ReverseDepIt); + + // Add new reverse deps after scanning the set, to avoid invalidating 'Set' + while (!ReverseDepsToAdd.empty()) { + ReverseNonLocalDeps[ReverseDepsToAdd.back().first] + .insert(ReverseDepsToAdd.back().second); + ReverseDepsToAdd.pop_back(); + } + } + + // If the instruction is in ReverseNonLocalPtrDeps then it appears as a + // value in the NonLocalPointerDeps info. + ReverseNonLocalPtrDepTy::iterator ReversePtrDepIt = + ReverseNonLocalPtrDeps.find(RemInst); + if (ReversePtrDepIt != ReverseNonLocalPtrDeps.end()) { + SmallPtrSet &Set = ReversePtrDepIt->second; + SmallVector,8> ReversePtrDepsToAdd; + + for (SmallPtrSet::iterator I = Set.begin(), + E = Set.end(); I != E; ++I) { + ValueIsLoadPair P = *I; + assert(P.getPointer() != RemInst && + "Already removed NonLocalPointerDeps info for RemInst"); + + NonLocalDepInfo &NLPDI = NonLocalPointerDeps[P].second; + + // The cache is not valid for any specific block anymore. + NonLocalPointerDeps[P].first = BBSkipFirstBlockPair(); + + // Update any entries for RemInst to use the instruction after it. + for (NonLocalDepInfo::iterator DI = NLPDI.begin(), DE = NLPDI.end(); + DI != DE; ++DI) { + if (DI->second.getInst() != RemInst) continue; + + // Convert to a dirty entry for the subsequent instruction. + DI->second = NewDirtyVal; + + if (Instruction *NewDirtyInst = NewDirtyVal.getInst()) + ReversePtrDepsToAdd.push_back(std::make_pair(NewDirtyInst, P)); + } + + // Re-sort the NonLocalDepInfo. Changing the dirty entry to its + // subsequent value may invalidate the sortedness. + std::sort(NLPDI.begin(), NLPDI.end()); + } + + ReverseNonLocalPtrDeps.erase(ReversePtrDepIt); + + while (!ReversePtrDepsToAdd.empty()) { + ReverseNonLocalPtrDeps[ReversePtrDepsToAdd.back().first] + .insert(ReversePtrDepsToAdd.back().second); + ReversePtrDepsToAdd.pop_back(); + } + } + + + assert(!NonLocalDeps.count(RemInst) && "RemInst got reinserted?"); + AA->deleteValue(RemInst); + DEBUG(verifyRemoved(RemInst)); +} +/// verifyRemoved - Verify that the specified instruction does not occur +/// in our internal data structures. +void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const { + for (LocalDepMapType::const_iterator I = LocalDeps.begin(), + E = LocalDeps.end(); I != E; ++I) { + assert(I->first != D && "Inst occurs in data structures"); + assert(I->second.getInst() != D && + "Inst occurs in data structures"); + } + + for (CachedNonLocalPointerInfo::const_iterator I =NonLocalPointerDeps.begin(), + E = NonLocalPointerDeps.end(); I != E; ++I) { + assert(I->first.getPointer() != D && "Inst occurs in NLPD map key"); + const NonLocalDepInfo &Val = I->second.second; + for (NonLocalDepInfo::const_iterator II = Val.begin(), E = Val.end(); + II != E; ++II) + assert(II->second.getInst() != D && "Inst occurs as NLPD value"); + } + + for (NonLocalDepMapType::const_iterator I = NonLocalDeps.begin(), + E = NonLocalDeps.end(); I != E; ++I) { + assert(I->first != D && "Inst occurs in data structures"); + const PerInstNLInfo &INLD = I->second; + for (NonLocalDepInfo::const_iterator II = INLD.first.begin(), + EE = INLD.first.end(); II != EE; ++II) + assert(II->second.getInst() != D && "Inst occurs in data structures"); + } + + for (ReverseDepMapType::const_iterator I = ReverseLocalDeps.begin(), + E = ReverseLocalDeps.end(); I != E; ++I) { + assert(I->first != D && "Inst occurs in data structures"); + for (SmallPtrSet::const_iterator II = I->second.begin(), + EE = I->second.end(); II != EE; ++II) + assert(*II != D && "Inst occurs in data structures"); + } + + for (ReverseDepMapType::const_iterator I = ReverseNonLocalDeps.begin(), + E = ReverseNonLocalDeps.end(); + I != E; ++I) { + assert(I->first != D && "Inst occurs in data structures"); + for (SmallPtrSet::const_iterator II = I->second.begin(), + EE = I->second.end(); II != EE; ++II) + assert(*II != D && "Inst occurs in data structures"); + } + + for (ReverseNonLocalPtrDepTy::const_iterator + I = ReverseNonLocalPtrDeps.begin(), + E = ReverseNonLocalPtrDeps.end(); I != E; ++I) { + assert(I->first != D && "Inst occurs in rev NLPD map"); + + for (SmallPtrSet::const_iterator II = I->second.begin(), + E = I->second.end(); II != E; ++II) + assert(*II != ValueIsLoadPair(D, false) && + *II != ValueIsLoadPair(D, true) && + "Inst occurs in ReverseNonLocalPtrDeps map"); + } + +} diff --git a/lib/Analysis/PostDominators.cpp b/lib/Analysis/PostDominators.cpp new file mode 100644 index 000000000000..4853c2ac87b7 --- /dev/null +++ b/lib/Analysis/PostDominators.cpp @@ -0,0 +1,94 @@ +//===- PostDominators.cpp - Post-Dominator Calculation --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the post-dominator construction algorithms. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "postdomtree" + +#include "llvm/Analysis/PostDominators.h" +#include "llvm/Instructions.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/SetOperations.h" +#include "llvm/Analysis/DominatorInternals.h" +using namespace llvm; + +//===----------------------------------------------------------------------===// +// PostDominatorTree Implementation +//===----------------------------------------------------------------------===// + +char PostDominatorTree::ID = 0; +char PostDominanceFrontier::ID = 0; +static RegisterPass +F("postdomtree", "Post-Dominator Tree Construction", true, true); + +bool PostDominatorTree::runOnFunction(Function &F) { + DT->recalculate(F); + DEBUG(DT->dump()); + return false; +} + +PostDominatorTree::~PostDominatorTree() +{ + delete DT; +} + +FunctionPass* llvm::createPostDomTree() { + return new PostDominatorTree(); +} + +//===----------------------------------------------------------------------===// +// PostDominanceFrontier Implementation +//===----------------------------------------------------------------------===// + +static RegisterPass +H("postdomfrontier", "Post-Dominance Frontier Construction", true, true); + +const DominanceFrontier::DomSetType & +PostDominanceFrontier::calculate(const PostDominatorTree &DT, + const DomTreeNode *Node) { + // Loop over CFG successors to calculate DFlocal[Node] + BasicBlock *BB = Node->getBlock(); + DomSetType &S = Frontiers[BB]; // The new set to fill in... + if (getRoots().empty()) return S; + + if (BB) + for (pred_iterator SI = pred_begin(BB), SE = pred_end(BB); + SI != SE; ++SI) { + // Does Node immediately dominate this predecessor? + DomTreeNode *SINode = DT[*SI]; + if (SINode && SINode->getIDom() != Node) + S.insert(*SI); + } + + // At this point, S is DFlocal. Now we union in DFup's of our children... + // Loop through and visit the nodes that Node immediately dominates (Node's + // children in the IDomTree) + // + for (DomTreeNode::const_iterator + NI = Node->begin(), NE = Node->end(); NI != NE; ++NI) { + DomTreeNode *IDominee = *NI; + const DomSetType &ChildDF = calculate(DT, IDominee); + + DomSetType::const_iterator CDFI = ChildDF.begin(), CDFE = ChildDF.end(); + for (; CDFI != CDFE; ++CDFI) { + if (!DT.properlyDominates(Node, DT[*CDFI])) + S.insert(*CDFI); + } + } + + return S; +} + +FunctionPass* llvm::createPostDomFrontier() { + return new PostDominanceFrontier(); +} diff --git a/lib/Analysis/ProfileInfo.cpp b/lib/Analysis/ProfileInfo.cpp new file mode 100644 index 000000000000..a0965b66da81 --- /dev/null +++ b/lib/Analysis/ProfileInfo.cpp @@ -0,0 +1,100 @@ +//===- ProfileInfo.cpp - Profile Info Interface ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the abstract ProfileInfo interface, and the default +// "no profile" implementation. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/ProfileInfo.h" +#include "llvm/Pass.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Compiler.h" +#include +using namespace llvm; + +// Register the ProfileInfo interface, providing a nice name to refer to. +static RegisterAnalysisGroup Z("Profile Information"); +char ProfileInfo::ID = 0; + +ProfileInfo::~ProfileInfo() {} + +unsigned ProfileInfo::getExecutionCount(BasicBlock *BB) const { + pred_iterator PI = pred_begin(BB), PE = pred_end(BB); + + // Are there zero predecessors of this block? + if (PI == PE) { + // If this is the entry block, look for the Null -> Entry edge. + if (BB == &BB->getParent()->getEntryBlock()) + return getEdgeWeight(0, BB); + else + return 0; // Otherwise, this is a dead block. + } + + // Otherwise, if there are predecessors, the execution count of this block is + // the sum of the edge frequencies from the incoming edges. Note that if + // there are multiple edges from a predecessor to this block that we don't + // want to count its weight multiple times. For this reason, we keep track of + // the predecessors we've seen and only count them if we haven't run into them + // yet. + // + // We don't want to create an std::set unless we are dealing with a block that + // has a LARGE number of in-edges. Handle the common case of having only a + // few in-edges with special code. + // + BasicBlock *FirstPred = *PI; + unsigned Count = getEdgeWeight(FirstPred, BB); + ++PI; + if (PI == PE) return Count; // Quick exit for single predecessor blocks + + BasicBlock *SecondPred = *PI; + if (SecondPred != FirstPred) Count += getEdgeWeight(SecondPred, BB); + ++PI; + if (PI == PE) return Count; // Quick exit for two predecessor blocks + + BasicBlock *ThirdPred = *PI; + if (ThirdPred != FirstPred && ThirdPred != SecondPred) + Count += getEdgeWeight(ThirdPred, BB); + ++PI; + if (PI == PE) return Count; // Quick exit for three predecessor blocks + + std::set ProcessedPreds; + ProcessedPreds.insert(FirstPred); + ProcessedPreds.insert(SecondPred); + ProcessedPreds.insert(ThirdPred); + for (; PI != PE; ++PI) + if (ProcessedPreds.insert(*PI).second) + Count += getEdgeWeight(*PI, BB); + return Count; +} + + + +//===----------------------------------------------------------------------===// +// NoProfile ProfileInfo implementation +// + +namespace { + struct VISIBILITY_HIDDEN NoProfileInfo + : public ImmutablePass, public ProfileInfo { + static char ID; // Class identification, replacement for typeinfo + NoProfileInfo() : ImmutablePass(&ID) {} + }; +} // End of anonymous namespace + +char NoProfileInfo::ID = 0; +// Register this pass... +static RegisterPass +X("no-profile", "No Profile Information", false, true); + +// Declare that we implement the ProfileInfo interface +static RegisterAnalysisGroup Y(X); + +ImmutablePass *llvm::createNoProfileInfoPass() { return new NoProfileInfo(); } diff --git a/lib/Analysis/ProfileInfoLoader.cpp b/lib/Analysis/ProfileInfoLoader.cpp new file mode 100644 index 000000000000..3a0a740f0035 --- /dev/null +++ b/lib/Analysis/ProfileInfoLoader.cpp @@ -0,0 +1,277 @@ +//===- ProfileInfoLoad.cpp - Load profile information from disk -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The ProfileInfoLoader class is used to load and represent profiling +// information read in from the dump file. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/ProfileInfoLoader.h" +#include "llvm/Analysis/ProfileInfoTypes.h" +#include "llvm/Module.h" +#include "llvm/InstrTypes.h" +#include "llvm/Support/Streams.h" +#include +#include +#include +using namespace llvm; + +// ByteSwap - Byteswap 'Var' if 'Really' is true. +// +static inline unsigned ByteSwap(unsigned Var, bool Really) { + if (!Really) return Var; + return ((Var & (255<< 0)) << 24) | + ((Var & (255<< 8)) << 8) | + ((Var & (255<<16)) >> 8) | + ((Var & (255<<24)) >> 24); +} + +static void ReadProfilingBlock(const char *ToolName, FILE *F, + bool ShouldByteSwap, + std::vector &Data) { + // Read the number of entries... + unsigned NumEntries; + if (fread(&NumEntries, sizeof(unsigned), 1, F) != 1) { + cerr << ToolName << ": data packet truncated!\n"; + perror(0); + exit(1); + } + NumEntries = ByteSwap(NumEntries, ShouldByteSwap); + + // Read the counts... + std::vector TempSpace(NumEntries); + + // Read in the block of data... + if (fread(&TempSpace[0], sizeof(unsigned)*NumEntries, 1, F) != 1) { + cerr << ToolName << ": data packet truncated!\n"; + perror(0); + exit(1); + } + + // Make sure we have enough space... + if (Data.size() < NumEntries) + Data.resize(NumEntries); + + // Accumulate the data we just read into the data. + if (!ShouldByteSwap) { + for (unsigned i = 0; i != NumEntries; ++i) + Data[i] += TempSpace[i]; + } else { + for (unsigned i = 0; i != NumEntries; ++i) + Data[i] += ByteSwap(TempSpace[i], true); + } +} + +// ProfileInfoLoader ctor - Read the specified profiling data file, exiting the +// program if the file is invalid or broken. +// +ProfileInfoLoader::ProfileInfoLoader(const char *ToolName, + const std::string &Filename, + Module &TheModule) : M(TheModule) { + FILE *F = fopen(Filename.c_str(), "r"); + if (F == 0) { + cerr << ToolName << ": Error opening '" << Filename << "': "; + perror(0); + exit(1); + } + + // Keep reading packets until we run out of them. + unsigned PacketType; + while (fread(&PacketType, sizeof(unsigned), 1, F) == 1) { + // If the low eight bits of the packet are zero, we must be dealing with an + // endianness mismatch. Byteswap all words read from the profiling + // information. + bool ShouldByteSwap = (char)PacketType == 0; + PacketType = ByteSwap(PacketType, ShouldByteSwap); + + switch (PacketType) { + case ArgumentInfo: { + unsigned ArgLength; + if (fread(&ArgLength, sizeof(unsigned), 1, F) != 1) { + cerr << ToolName << ": arguments packet truncated!\n"; + perror(0); + exit(1); + } + ArgLength = ByteSwap(ArgLength, ShouldByteSwap); + + // Read in the arguments... + std::vector Chars(ArgLength+4); + + if (ArgLength) + if (fread(&Chars[0], (ArgLength+3) & ~3, 1, F) != 1) { + cerr << ToolName << ": arguments packet truncated!\n"; + perror(0); + exit(1); + } + CommandLines.push_back(std::string(&Chars[0], &Chars[ArgLength])); + break; + } + + case FunctionInfo: + ReadProfilingBlock(ToolName, F, ShouldByteSwap, FunctionCounts); + break; + + case BlockInfo: + ReadProfilingBlock(ToolName, F, ShouldByteSwap, BlockCounts); + break; + + case EdgeInfo: + ReadProfilingBlock(ToolName, F, ShouldByteSwap, EdgeCounts); + break; + + case BBTraceInfo: + ReadProfilingBlock(ToolName, F, ShouldByteSwap, BBTrace); + break; + + default: + cerr << ToolName << ": Unknown packet type #" << PacketType << "!\n"; + exit(1); + } + } + + fclose(F); +} + + +// getFunctionCounts - This method is used by consumers of function counting +// information. If we do not directly have function count information, we +// compute it from other, more refined, types of profile information. +// +void ProfileInfoLoader::getFunctionCounts(std::vector > &Counts) { + if (FunctionCounts.empty()) { + if (hasAccurateBlockCounts()) { + // Synthesize function frequency information from the number of times + // their entry blocks were executed. + std::vector > BlockCounts; + getBlockCounts(BlockCounts); + + for (unsigned i = 0, e = BlockCounts.size(); i != e; ++i) + if (&BlockCounts[i].first->getParent()->getEntryBlock() == + BlockCounts[i].first) + Counts.push_back(std::make_pair(BlockCounts[i].first->getParent(), + BlockCounts[i].second)); + } else { + cerr << "Function counts are not available!\n"; + } + return; + } + + unsigned Counter = 0; + for (Module::iterator I = M.begin(), E = M.end(); + I != E && Counter != FunctionCounts.size(); ++I) + if (!I->isDeclaration()) + Counts.push_back(std::make_pair(I, FunctionCounts[Counter++])); +} + +// getBlockCounts - This method is used by consumers of block counting +// information. If we do not directly have block count information, we +// compute it from other, more refined, types of profile information. +// +void ProfileInfoLoader::getBlockCounts(std::vector > &Counts) { + if (BlockCounts.empty()) { + if (hasAccurateEdgeCounts()) { + // Synthesize block count information from edge frequency information. + // The block execution frequency is equal to the sum of the execution + // frequency of all outgoing edges from a block. + // + // If a block has no successors, this will not be correct, so we have to + // special case it. :( + std::vector > EdgeCounts; + getEdgeCounts(EdgeCounts); + + std::map InEdgeFreqs; + + BasicBlock *LastBlock = 0; + TerminatorInst *TI = 0; + for (unsigned i = 0, e = EdgeCounts.size(); i != e; ++i) { + if (EdgeCounts[i].first.first != LastBlock) { + LastBlock = EdgeCounts[i].first.first; + TI = LastBlock->getTerminator(); + Counts.push_back(std::make_pair(LastBlock, 0)); + } + Counts.back().second += EdgeCounts[i].second; + unsigned SuccNum = EdgeCounts[i].first.second; + if (SuccNum >= TI->getNumSuccessors()) { + static bool Warned = false; + if (!Warned) { + cerr << "WARNING: profile info doesn't seem to match" + << " the program!\n"; + Warned = true; + } + } else { + // If this successor has no successors of its own, we will never + // compute an execution count for that block. Remember the incoming + // edge frequencies to add later. + BasicBlock *Succ = TI->getSuccessor(SuccNum); + if (Succ->getTerminator()->getNumSuccessors() == 0) + InEdgeFreqs[Succ] += EdgeCounts[i].second; + } + } + + // Now we have to accumulate information for those blocks without + // successors into our table. + for (std::map::iterator I = InEdgeFreqs.begin(), + E = InEdgeFreqs.end(); I != E; ++I) { + unsigned i = 0; + for (; i != Counts.size() && Counts[i].first != I->first; ++i) + /*empty*/; + if (i == Counts.size()) Counts.push_back(std::make_pair(I->first, 0)); + Counts[i].second += I->second; + } + + } else { + cerr << "Block counts are not available!\n"; + } + return; + } + + unsigned Counter = 0; + for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { + Counts.push_back(std::make_pair(BB, BlockCounts[Counter++])); + if (Counter == BlockCounts.size()) + return; + } +} + +// getEdgeCounts - This method is used by consumers of edge counting +// information. If we do not directly have edge count information, we compute +// it from other, more refined, types of profile information. +// +void ProfileInfoLoader::getEdgeCounts(std::vector > &Counts) { + if (EdgeCounts.empty()) { + cerr << "Edge counts not available, and no synthesis " + << "is implemented yet!\n"; + return; + } + + unsigned Counter = 0; + for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) + for (unsigned i = 0, e = BB->getTerminator()->getNumSuccessors(); + i != e; ++i) { + Counts.push_back(std::make_pair(Edge(BB, i), EdgeCounts[Counter++])); + if (Counter == EdgeCounts.size()) + return; + } +} + +// getBBTrace - This method is used by consumers of basic-block trace +// information. +// +void ProfileInfoLoader::getBBTrace(std::vector &Trace) { + if (BBTrace.empty ()) { + cerr << "Basic block trace is not available!\n"; + return; + } + cerr << "Basic block trace loading is not implemented yet!\n"; +} diff --git a/lib/Analysis/ProfileInfoLoaderPass.cpp b/lib/Analysis/ProfileInfoLoaderPass.cpp new file mode 100644 index 000000000000..0a8a87bd0f97 --- /dev/null +++ b/lib/Analysis/ProfileInfoLoaderPass.cpp @@ -0,0 +1,92 @@ +//===- ProfileInfoLoaderPass.cpp - LLVM Pass to load profile info ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a concrete implementation of profiling information that +// loads the information from a profile dump file. +// +//===----------------------------------------------------------------------===// + +#include "llvm/BasicBlock.h" +#include "llvm/InstrTypes.h" +#include "llvm/Pass.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/ProfileInfo.h" +#include "llvm/Analysis/ProfileInfoLoader.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Streams.h" +using namespace llvm; + +static cl::opt +ProfileInfoFilename("profile-info-file", cl::init("llvmprof.out"), + cl::value_desc("filename"), + cl::desc("Profile file loaded by -profile-loader")); + +namespace { + class VISIBILITY_HIDDEN LoaderPass : public ModulePass, public ProfileInfo { + std::string Filename; + public: + static char ID; // Class identification, replacement for typeinfo + explicit LoaderPass(const std::string &filename = "") + : ModulePass(&ID), Filename(filename) { + if (filename.empty()) Filename = ProfileInfoFilename; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + } + + virtual const char *getPassName() const { + return "Profiling information loader"; + } + + /// run - Load the profile information from the specified file. + virtual bool runOnModule(Module &M); + }; +} // End of anonymous namespace + +char LoaderPass::ID = 0; +static RegisterPass +X("profile-loader", "Load profile information from llvmprof.out", false, true); + +static RegisterAnalysisGroup Y(X); + +ModulePass *llvm::createProfileLoaderPass() { return new LoaderPass(); } + +/// createProfileLoaderPass - This function returns a Pass that loads the +/// profiling information for the module from the specified filename, making it +/// available to the optimizers. +Pass *llvm::createProfileLoaderPass(const std::string &Filename) { + return new LoaderPass(Filename); +} + +bool LoaderPass::runOnModule(Module &M) { + ProfileInfoLoader PIL("profile-loader", Filename, M); + EdgeCounts.clear(); + bool PrintedWarning = false; + + std::vector > ECs; + PIL.getEdgeCounts(ECs); + for (unsigned i = 0, e = ECs.size(); i != e; ++i) { + BasicBlock *BB = ECs[i].first.first; + unsigned SuccNum = ECs[i].first.second; + TerminatorInst *TI = BB->getTerminator(); + if (SuccNum >= TI->getNumSuccessors()) { + if (!PrintedWarning) { + cerr << "WARNING: profile information is inconsistent with " + << "the current program!\n"; + PrintedWarning = true; + } + } else { + EdgeCounts[std::make_pair(BB, TI->getSuccessor(SuccNum))]+= ECs[i].second; + } + } + + return false; +} diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp new file mode 100644 index 000000000000..f7f1849b6da8 --- /dev/null +++ b/lib/Analysis/ScalarEvolution.cpp @@ -0,0 +1,3824 @@ +//===- ScalarEvolution.cpp - Scalar Evolution Analysis ----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the implementation of the scalar evolution analysis +// engine, which is used primarily to analyze expressions involving induction +// variables in loops. +// +// There are several aspects to this library. First is the representation of +// scalar expressions, which are represented as subclasses of the SCEV class. +// These classes are used to represent certain types of subexpressions that we +// can handle. These classes are reference counted, managed by the SCEVHandle +// class. We only create one SCEV of a particular shape, so pointer-comparisons +// for equality are legal. +// +// One important aspect of the SCEV objects is that they are never cyclic, even +// if there is a cycle in the dataflow for an expression (ie, a PHI node). If +// the PHI node is one of the idioms that we can represent (e.g., a polynomial +// recurrence) then we represent it directly as a recurrence node, otherwise we +// represent it as a SCEVUnknown node. +// +// In addition to being able to represent expressions of various types, we also +// have folders that are used to build the *canonical* representation for a +// particular expression. These folders are capable of using a variety of +// rewrite rules to simplify the expressions. +// +// Once the folders are defined, we can implement the more interesting +// higher-level code, such as the code that recognizes PHI nodes of various +// types, computes the execution count of a loop, etc. +// +// TODO: We should use these routines and value representations to implement +// dependence analysis! +// +//===----------------------------------------------------------------------===// +// +// There are several good references for the techniques used in this analysis. +// +// Chains of recurrences -- a method to expedite the evaluation +// of closed-form functions +// Olaf Bachmann, Paul S. Wang, Eugene V. Zima +// +// On computational properties of chains of recurrences +// Eugene V. Zima +// +// Symbolic Evaluation of Chains of Recurrences for Loop Optimization +// Robert A. van Engelen +// +// Efficient Symbolic Analysis for Optimizing Compilers +// Robert A. van Engelen +// +// Using the chains of recurrences algebra for data dependence testing and +// induction variable substitution +// MS Thesis, Johnie Birch +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "scalar-evolution" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/GlobalVariable.h" +#include "llvm/Instructions.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/ConstantRange.h" +#include "llvm/Support/GetElementPtrTypeIterator.h" +#include "llvm/Support/InstIterator.h" +#include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +#include +#include +using namespace llvm; + +STATISTIC(NumArrayLenItCounts, + "Number of trip counts computed with array length"); +STATISTIC(NumTripCountsComputed, + "Number of loops with predictable loop counts"); +STATISTIC(NumTripCountsNotComputed, + "Number of loops without predictable loop counts"); +STATISTIC(NumBruteForceTripCountsComputed, + "Number of loops with trip counts computed by force"); + +static cl::opt +MaxBruteForceIterations("scalar-evolution-max-iterations", cl::ReallyHidden, + cl::desc("Maximum number of iterations SCEV will " + "symbolically execute a constant derived loop"), + cl::init(100)); + +static RegisterPass +R("scalar-evolution", "Scalar Evolution Analysis", false, true); +char ScalarEvolution::ID = 0; + +//===----------------------------------------------------------------------===// +// SCEV class definitions +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Implementation of the SCEV class. +// +SCEV::~SCEV() {} +void SCEV::dump() const { + print(errs()); + errs() << '\n'; +} + +void SCEV::print(std::ostream &o) const { + raw_os_ostream OS(o); + print(OS); +} + +bool SCEV::isZero() const { + if (const SCEVConstant *SC = dyn_cast(this)) + return SC->getValue()->isZero(); + return false; +} + +bool SCEV::isOne() const { + if (const SCEVConstant *SC = dyn_cast(this)) + return SC->getValue()->isOne(); + return false; +} + +SCEVCouldNotCompute::SCEVCouldNotCompute() : SCEV(scCouldNotCompute) {} +SCEVCouldNotCompute::~SCEVCouldNotCompute() {} + +bool SCEVCouldNotCompute::isLoopInvariant(const Loop *L) const { + assert(0 && "Attempt to use a SCEVCouldNotCompute object!"); + return false; +} + +const Type *SCEVCouldNotCompute::getType() const { + assert(0 && "Attempt to use a SCEVCouldNotCompute object!"); + return 0; +} + +bool SCEVCouldNotCompute::hasComputableLoopEvolution(const Loop *L) const { + assert(0 && "Attempt to use a SCEVCouldNotCompute object!"); + return false; +} + +SCEVHandle SCEVCouldNotCompute:: +replaceSymbolicValuesWithConcrete(const SCEVHandle &Sym, + const SCEVHandle &Conc, + ScalarEvolution &SE) const { + return this; +} + +void SCEVCouldNotCompute::print(raw_ostream &OS) const { + OS << "***COULDNOTCOMPUTE***"; +} + +bool SCEVCouldNotCompute::classof(const SCEV *S) { + return S->getSCEVType() == scCouldNotCompute; +} + + +// SCEVConstants - Only allow the creation of one SCEVConstant for any +// particular value. Don't use a SCEVHandle here, or else the object will +// never be deleted! +static ManagedStatic > SCEVConstants; + + +SCEVConstant::~SCEVConstant() { + SCEVConstants->erase(V); +} + +SCEVHandle ScalarEvolution::getConstant(ConstantInt *V) { + SCEVConstant *&R = (*SCEVConstants)[V]; + if (R == 0) R = new SCEVConstant(V); + return R; +} + +SCEVHandle ScalarEvolution::getConstant(const APInt& Val) { + return getConstant(ConstantInt::get(Val)); +} + +const Type *SCEVConstant::getType() const { return V->getType(); } + +void SCEVConstant::print(raw_ostream &OS) const { + WriteAsOperand(OS, V, false); +} + +SCEVCastExpr::SCEVCastExpr(unsigned SCEVTy, + const SCEVHandle &op, const Type *ty) + : SCEV(SCEVTy), Op(op), Ty(ty) {} + +SCEVCastExpr::~SCEVCastExpr() {} + +bool SCEVCastExpr::dominates(BasicBlock *BB, DominatorTree *DT) const { + return Op->dominates(BB, DT); +} + +// SCEVTruncates - Only allow the creation of one SCEVTruncateExpr for any +// particular input. Don't use a SCEVHandle here, or else the object will +// never be deleted! +static ManagedStatic, + SCEVTruncateExpr*> > SCEVTruncates; + +SCEVTruncateExpr::SCEVTruncateExpr(const SCEVHandle &op, const Type *ty) + : SCEVCastExpr(scTruncate, op, ty) { + assert((Op->getType()->isInteger() || isa(Op->getType())) && + (Ty->isInteger() || isa(Ty)) && + "Cannot truncate non-integer value!"); +} + +SCEVTruncateExpr::~SCEVTruncateExpr() { + SCEVTruncates->erase(std::make_pair(Op, Ty)); +} + +void SCEVTruncateExpr::print(raw_ostream &OS) const { + OS << "(trunc " << *Op->getType() << " " << *Op << " to " << *Ty << ")"; +} + +// SCEVZeroExtends - Only allow the creation of one SCEVZeroExtendExpr for any +// particular input. Don't use a SCEVHandle here, or else the object will never +// be deleted! +static ManagedStatic, + SCEVZeroExtendExpr*> > SCEVZeroExtends; + +SCEVZeroExtendExpr::SCEVZeroExtendExpr(const SCEVHandle &op, const Type *ty) + : SCEVCastExpr(scZeroExtend, op, ty) { + assert((Op->getType()->isInteger() || isa(Op->getType())) && + (Ty->isInteger() || isa(Ty)) && + "Cannot zero extend non-integer value!"); +} + +SCEVZeroExtendExpr::~SCEVZeroExtendExpr() { + SCEVZeroExtends->erase(std::make_pair(Op, Ty)); +} + +void SCEVZeroExtendExpr::print(raw_ostream &OS) const { + OS << "(zext " << *Op->getType() << " " << *Op << " to " << *Ty << ")"; +} + +// SCEVSignExtends - Only allow the creation of one SCEVSignExtendExpr for any +// particular input. Don't use a SCEVHandle here, or else the object will never +// be deleted! +static ManagedStatic, + SCEVSignExtendExpr*> > SCEVSignExtends; + +SCEVSignExtendExpr::SCEVSignExtendExpr(const SCEVHandle &op, const Type *ty) + : SCEVCastExpr(scSignExtend, op, ty) { + assert((Op->getType()->isInteger() || isa(Op->getType())) && + (Ty->isInteger() || isa(Ty)) && + "Cannot sign extend non-integer value!"); +} + +SCEVSignExtendExpr::~SCEVSignExtendExpr() { + SCEVSignExtends->erase(std::make_pair(Op, Ty)); +} + +void SCEVSignExtendExpr::print(raw_ostream &OS) const { + OS << "(sext " << *Op->getType() << " " << *Op << " to " << *Ty << ")"; +} + +// SCEVCommExprs - Only allow the creation of one SCEVCommutativeExpr for any +// particular input. Don't use a SCEVHandle here, or else the object will never +// be deleted! +static ManagedStatic >, + SCEVCommutativeExpr*> > SCEVCommExprs; + +SCEVCommutativeExpr::~SCEVCommutativeExpr() { + std::vector SCEVOps(Operands.begin(), Operands.end()); + SCEVCommExprs->erase(std::make_pair(getSCEVType(), SCEVOps)); +} + +void SCEVCommutativeExpr::print(raw_ostream &OS) const { + assert(Operands.size() > 1 && "This plus expr shouldn't exist!"); + const char *OpStr = getOperationStr(); + OS << "(" << *Operands[0]; + for (unsigned i = 1, e = Operands.size(); i != e; ++i) + OS << OpStr << *Operands[i]; + OS << ")"; +} + +SCEVHandle SCEVCommutativeExpr:: +replaceSymbolicValuesWithConcrete(const SCEVHandle &Sym, + const SCEVHandle &Conc, + ScalarEvolution &SE) const { + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + SCEVHandle H = + getOperand(i)->replaceSymbolicValuesWithConcrete(Sym, Conc, SE); + if (H != getOperand(i)) { + std::vector NewOps; + NewOps.reserve(getNumOperands()); + for (unsigned j = 0; j != i; ++j) + NewOps.push_back(getOperand(j)); + NewOps.push_back(H); + for (++i; i != e; ++i) + NewOps.push_back(getOperand(i)-> + replaceSymbolicValuesWithConcrete(Sym, Conc, SE)); + + if (isa(this)) + return SE.getAddExpr(NewOps); + else if (isa(this)) + return SE.getMulExpr(NewOps); + else if (isa(this)) + return SE.getSMaxExpr(NewOps); + else if (isa(this)) + return SE.getUMaxExpr(NewOps); + else + assert(0 && "Unknown commutative expr!"); + } + } + return this; +} + +bool SCEVNAryExpr::dominates(BasicBlock *BB, DominatorTree *DT) const { + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + if (!getOperand(i)->dominates(BB, DT)) + return false; + } + return true; +} + + +// SCEVUDivs - Only allow the creation of one SCEVUDivExpr for any particular +// input. Don't use a SCEVHandle here, or else the object will never be +// deleted! +static ManagedStatic, + SCEVUDivExpr*> > SCEVUDivs; + +SCEVUDivExpr::~SCEVUDivExpr() { + SCEVUDivs->erase(std::make_pair(LHS, RHS)); +} + +bool SCEVUDivExpr::dominates(BasicBlock *BB, DominatorTree *DT) const { + return LHS->dominates(BB, DT) && RHS->dominates(BB, DT); +} + +void SCEVUDivExpr::print(raw_ostream &OS) const { + OS << "(" << *LHS << " /u " << *RHS << ")"; +} + +const Type *SCEVUDivExpr::getType() const { + // In most cases the types of LHS and RHS will be the same, but in some + // crazy cases one or the other may be a pointer. ScalarEvolution doesn't + // depend on the type for correctness, but handling types carefully can + // avoid extra casts in the SCEVExpander. The LHS is more likely to be + // a pointer type than the RHS, so use the RHS' type here. + return RHS->getType(); +} + +// SCEVAddRecExprs - Only allow the creation of one SCEVAddRecExpr for any +// particular input. Don't use a SCEVHandle here, or else the object will never +// be deleted! +static ManagedStatic >, + SCEVAddRecExpr*> > SCEVAddRecExprs; + +SCEVAddRecExpr::~SCEVAddRecExpr() { + std::vector SCEVOps(Operands.begin(), Operands.end()); + SCEVAddRecExprs->erase(std::make_pair(L, SCEVOps)); +} + +SCEVHandle SCEVAddRecExpr:: +replaceSymbolicValuesWithConcrete(const SCEVHandle &Sym, + const SCEVHandle &Conc, + ScalarEvolution &SE) const { + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + SCEVHandle H = + getOperand(i)->replaceSymbolicValuesWithConcrete(Sym, Conc, SE); + if (H != getOperand(i)) { + std::vector NewOps; + NewOps.reserve(getNumOperands()); + for (unsigned j = 0; j != i; ++j) + NewOps.push_back(getOperand(j)); + NewOps.push_back(H); + for (++i; i != e; ++i) + NewOps.push_back(getOperand(i)-> + replaceSymbolicValuesWithConcrete(Sym, Conc, SE)); + + return SE.getAddRecExpr(NewOps, L); + } + } + return this; +} + + +bool SCEVAddRecExpr::isLoopInvariant(const Loop *QueryLoop) const { + // This recurrence is invariant w.r.t to QueryLoop iff QueryLoop doesn't + // contain L and if the start is invariant. + // Add recurrences are never invariant in the function-body (null loop). + return QueryLoop && + !QueryLoop->contains(L->getHeader()) && + getOperand(0)->isLoopInvariant(QueryLoop); +} + + +void SCEVAddRecExpr::print(raw_ostream &OS) const { + OS << "{" << *Operands[0]; + for (unsigned i = 1, e = Operands.size(); i != e; ++i) + OS << ",+," << *Operands[i]; + OS << "}<" << L->getHeader()->getName() + ">"; +} + +// SCEVUnknowns - Only allow the creation of one SCEVUnknown for any particular +// value. Don't use a SCEVHandle here, or else the object will never be +// deleted! +static ManagedStatic > SCEVUnknowns; + +SCEVUnknown::~SCEVUnknown() { SCEVUnknowns->erase(V); } + +bool SCEVUnknown::isLoopInvariant(const Loop *L) const { + // All non-instruction values are loop invariant. All instructions are loop + // invariant if they are not contained in the specified loop. + // Instructions are never considered invariant in the function body + // (null loop) because they are defined within the "loop". + if (Instruction *I = dyn_cast(V)) + return L && !L->contains(I->getParent()); + return true; +} + +bool SCEVUnknown::dominates(BasicBlock *BB, DominatorTree *DT) const { + if (Instruction *I = dyn_cast(getValue())) + return DT->dominates(I->getParent(), BB); + return true; +} + +const Type *SCEVUnknown::getType() const { + return V->getType(); +} + +void SCEVUnknown::print(raw_ostream &OS) const { + WriteAsOperand(OS, V, false); +} + +//===----------------------------------------------------------------------===// +// SCEV Utilities +//===----------------------------------------------------------------------===// + +namespace { + /// SCEVComplexityCompare - Return true if the complexity of the LHS is less + /// than the complexity of the RHS. This comparator is used to canonicalize + /// expressions. + class VISIBILITY_HIDDEN SCEVComplexityCompare { + LoopInfo *LI; + public: + explicit SCEVComplexityCompare(LoopInfo *li) : LI(li) {} + + bool operator()(const SCEV *LHS, const SCEV *RHS) const { + // Primarily, sort the SCEVs by their getSCEVType(). + if (LHS->getSCEVType() != RHS->getSCEVType()) + return LHS->getSCEVType() < RHS->getSCEVType(); + + // Aside from the getSCEVType() ordering, the particular ordering + // isn't very important except that it's beneficial to be consistent, + // so that (a + b) and (b + a) don't end up as different expressions. + + // Sort SCEVUnknown values with some loose heuristics. TODO: This is + // not as complete as it could be. + if (const SCEVUnknown *LU = dyn_cast(LHS)) { + const SCEVUnknown *RU = cast(RHS); + + // Order pointer values after integer values. This helps SCEVExpander + // form GEPs. + if (isa(LU->getType()) && !isa(RU->getType())) + return false; + if (isa(RU->getType()) && !isa(LU->getType())) + return true; + + // Compare getValueID values. + if (LU->getValue()->getValueID() != RU->getValue()->getValueID()) + return LU->getValue()->getValueID() < RU->getValue()->getValueID(); + + // Sort arguments by their position. + if (const Argument *LA = dyn_cast(LU->getValue())) { + const Argument *RA = cast(RU->getValue()); + return LA->getArgNo() < RA->getArgNo(); + } + + // For instructions, compare their loop depth, and their opcode. + // This is pretty loose. + if (Instruction *LV = dyn_cast(LU->getValue())) { + Instruction *RV = cast(RU->getValue()); + + // Compare loop depths. + if (LI->getLoopDepth(LV->getParent()) != + LI->getLoopDepth(RV->getParent())) + return LI->getLoopDepth(LV->getParent()) < + LI->getLoopDepth(RV->getParent()); + + // Compare opcodes. + if (LV->getOpcode() != RV->getOpcode()) + return LV->getOpcode() < RV->getOpcode(); + + // Compare the number of operands. + if (LV->getNumOperands() != RV->getNumOperands()) + return LV->getNumOperands() < RV->getNumOperands(); + } + + return false; + } + + // Constant sorting doesn't matter since they'll be folded. + if (isa(LHS)) + return false; + + // Lexicographically compare n-ary expressions. + if (const SCEVNAryExpr *LC = dyn_cast(LHS)) { + const SCEVNAryExpr *RC = cast(RHS); + for (unsigned i = 0, e = LC->getNumOperands(); i != e; ++i) { + if (i >= RC->getNumOperands()) + return false; + if (operator()(LC->getOperand(i), RC->getOperand(i))) + return true; + if (operator()(RC->getOperand(i), LC->getOperand(i))) + return false; + } + return LC->getNumOperands() < RC->getNumOperands(); + } + + // Lexicographically compare udiv expressions. + if (const SCEVUDivExpr *LC = dyn_cast(LHS)) { + const SCEVUDivExpr *RC = cast(RHS); + if (operator()(LC->getLHS(), RC->getLHS())) + return true; + if (operator()(RC->getLHS(), LC->getLHS())) + return false; + if (operator()(LC->getRHS(), RC->getRHS())) + return true; + if (operator()(RC->getRHS(), LC->getRHS())) + return false; + return false; + } + + // Compare cast expressions by operand. + if (const SCEVCastExpr *LC = dyn_cast(LHS)) { + const SCEVCastExpr *RC = cast(RHS); + return operator()(LC->getOperand(), RC->getOperand()); + } + + assert(0 && "Unknown SCEV kind!"); + return false; + } + }; +} + +/// GroupByComplexity - Given a list of SCEV objects, order them by their +/// complexity, and group objects of the same complexity together by value. +/// When this routine is finished, we know that any duplicates in the vector are +/// consecutive and that complexity is monotonically increasing. +/// +/// Note that we go take special precautions to ensure that we get determinstic +/// results from this routine. In other words, we don't want the results of +/// this to depend on where the addresses of various SCEV objects happened to +/// land in memory. +/// +static void GroupByComplexity(std::vector &Ops, + LoopInfo *LI) { + if (Ops.size() < 2) return; // Noop + if (Ops.size() == 2) { + // This is the common case, which also happens to be trivially simple. + // Special case it. + if (SCEVComplexityCompare(LI)(Ops[1], Ops[0])) + std::swap(Ops[0], Ops[1]); + return; + } + + // Do the rough sort by complexity. + std::stable_sort(Ops.begin(), Ops.end(), SCEVComplexityCompare(LI)); + + // Now that we are sorted by complexity, group elements of the same + // complexity. Note that this is, at worst, N^2, but the vector is likely to + // be extremely short in practice. Note that we take this approach because we + // do not want to depend on the addresses of the objects we are grouping. + for (unsigned i = 0, e = Ops.size(); i != e-2; ++i) { + const SCEV *S = Ops[i]; + unsigned Complexity = S->getSCEVType(); + + // If there are any objects of the same complexity and same value as this + // one, group them. + for (unsigned j = i+1; j != e && Ops[j]->getSCEVType() == Complexity; ++j) { + if (Ops[j] == S) { // Found a duplicate. + // Move it to immediately after i'th element. + std::swap(Ops[i+1], Ops[j]); + ++i; // no need to rescan it. + if (i == e-2) return; // Done! + } + } + } +} + + + +//===----------------------------------------------------------------------===// +// Simple SCEV method implementations +//===----------------------------------------------------------------------===// + +/// BinomialCoefficient - Compute BC(It, K). The result has width W. +/// Assume, K > 0. +static SCEVHandle BinomialCoefficient(SCEVHandle It, unsigned K, + ScalarEvolution &SE, + const Type* ResultTy) { + // Handle the simplest case efficiently. + if (K == 1) + return SE.getTruncateOrZeroExtend(It, ResultTy); + + // We are using the following formula for BC(It, K): + // + // BC(It, K) = (It * (It - 1) * ... * (It - K + 1)) / K! + // + // Suppose, W is the bitwidth of the return value. We must be prepared for + // overflow. Hence, we must assure that the result of our computation is + // equal to the accurate one modulo 2^W. Unfortunately, division isn't + // safe in modular arithmetic. + // + // However, this code doesn't use exactly that formula; the formula it uses + // is something like the following, where T is the number of factors of 2 in + // K! (i.e. trailing zeros in the binary representation of K!), and ^ is + // exponentiation: + // + // BC(It, K) = (It * (It - 1) * ... * (It - K + 1)) / 2^T / (K! / 2^T) + // + // This formula is trivially equivalent to the previous formula. However, + // this formula can be implemented much more efficiently. The trick is that + // K! / 2^T is odd, and exact division by an odd number *is* safe in modular + // arithmetic. To do exact division in modular arithmetic, all we have + // to do is multiply by the inverse. Therefore, this step can be done at + // width W. + // + // The next issue is how to safely do the division by 2^T. The way this + // is done is by doing the multiplication step at a width of at least W + T + // bits. This way, the bottom W+T bits of the product are accurate. Then, + // when we perform the division by 2^T (which is equivalent to a right shift + // by T), the bottom W bits are accurate. Extra bits are okay; they'll get + // truncated out after the division by 2^T. + // + // In comparison to just directly using the first formula, this technique + // is much more efficient; using the first formula requires W * K bits, + // but this formula less than W + K bits. Also, the first formula requires + // a division step, whereas this formula only requires multiplies and shifts. + // + // It doesn't matter whether the subtraction step is done in the calculation + // width or the input iteration count's width; if the subtraction overflows, + // the result must be zero anyway. We prefer here to do it in the width of + // the induction variable because it helps a lot for certain cases; CodeGen + // isn't smart enough to ignore the overflow, which leads to much less + // efficient code if the width of the subtraction is wider than the native + // register width. + // + // (It's possible to not widen at all by pulling out factors of 2 before + // the multiplication; for example, K=2 can be calculated as + // It/2*(It+(It*INT_MIN/INT_MIN)+-1). However, it requires + // extra arithmetic, so it's not an obvious win, and it gets + // much more complicated for K > 3.) + + // Protection from insane SCEVs; this bound is conservative, + // but it probably doesn't matter. + if (K > 1000) + return SE.getCouldNotCompute(); + + unsigned W = SE.getTypeSizeInBits(ResultTy); + + // Calculate K! / 2^T and T; we divide out the factors of two before + // multiplying for calculating K! / 2^T to avoid overflow. + // Other overflow doesn't matter because we only care about the bottom + // W bits of the result. + APInt OddFactorial(W, 1); + unsigned T = 1; + for (unsigned i = 3; i <= K; ++i) { + APInt Mult(W, i); + unsigned TwoFactors = Mult.countTrailingZeros(); + T += TwoFactors; + Mult = Mult.lshr(TwoFactors); + OddFactorial *= Mult; + } + + // We need at least W + T bits for the multiplication step + unsigned CalculationBits = W + T; + + // Calcuate 2^T, at width T+W. + APInt DivFactor = APInt(CalculationBits, 1).shl(T); + + // Calculate the multiplicative inverse of K! / 2^T; + // this multiplication factor will perform the exact division by + // K! / 2^T. + APInt Mod = APInt::getSignedMinValue(W+1); + APInt MultiplyFactor = OddFactorial.zext(W+1); + MultiplyFactor = MultiplyFactor.multiplicativeInverse(Mod); + MultiplyFactor = MultiplyFactor.trunc(W); + + // Calculate the product, at width T+W + const IntegerType *CalculationTy = IntegerType::get(CalculationBits); + SCEVHandle Dividend = SE.getTruncateOrZeroExtend(It, CalculationTy); + for (unsigned i = 1; i != K; ++i) { + SCEVHandle S = SE.getMinusSCEV(It, SE.getIntegerSCEV(i, It->getType())); + Dividend = SE.getMulExpr(Dividend, + SE.getTruncateOrZeroExtend(S, CalculationTy)); + } + + // Divide by 2^T + SCEVHandle DivResult = SE.getUDivExpr(Dividend, SE.getConstant(DivFactor)); + + // Truncate the result, and divide by K! / 2^T. + + return SE.getMulExpr(SE.getConstant(MultiplyFactor), + SE.getTruncateOrZeroExtend(DivResult, ResultTy)); +} + +/// evaluateAtIteration - Return the value of this chain of recurrences at +/// the specified iteration number. We can evaluate this recurrence by +/// multiplying each element in the chain by the binomial coefficient +/// corresponding to it. In other words, we can evaluate {A,+,B,+,C,+,D} as: +/// +/// A*BC(It, 0) + B*BC(It, 1) + C*BC(It, 2) + D*BC(It, 3) +/// +/// where BC(It, k) stands for binomial coefficient. +/// +SCEVHandle SCEVAddRecExpr::evaluateAtIteration(SCEVHandle It, + ScalarEvolution &SE) const { + SCEVHandle Result = getStart(); + for (unsigned i = 1, e = getNumOperands(); i != e; ++i) { + // The computation is correct in the face of overflow provided that the + // multiplication is performed _after_ the evaluation of the binomial + // coefficient. + SCEVHandle Coeff = BinomialCoefficient(It, i, SE, getType()); + if (isa(Coeff)) + return Coeff; + + Result = SE.getAddExpr(Result, SE.getMulExpr(getOperand(i), Coeff)); + } + return Result; +} + +//===----------------------------------------------------------------------===// +// SCEV Expression folder implementations +//===----------------------------------------------------------------------===// + +SCEVHandle ScalarEvolution::getTruncateExpr(const SCEVHandle &Op, + const Type *Ty) { + assert(getTypeSizeInBits(Op->getType()) > getTypeSizeInBits(Ty) && + "This is not a truncating conversion!"); + assert(isSCEVable(Ty) && + "This is not a conversion to a SCEVable type!"); + Ty = getEffectiveSCEVType(Ty); + + if (const SCEVConstant *SC = dyn_cast(Op)) + return getUnknown( + ConstantExpr::getTrunc(SC->getValue(), Ty)); + + // trunc(trunc(x)) --> trunc(x) + if (const SCEVTruncateExpr *ST = dyn_cast(Op)) + return getTruncateExpr(ST->getOperand(), Ty); + + // trunc(sext(x)) --> sext(x) if widening or trunc(x) if narrowing + if (const SCEVSignExtendExpr *SS = dyn_cast(Op)) + return getTruncateOrSignExtend(SS->getOperand(), Ty); + + // trunc(zext(x)) --> zext(x) if widening or trunc(x) if narrowing + if (const SCEVZeroExtendExpr *SZ = dyn_cast(Op)) + return getTruncateOrZeroExtend(SZ->getOperand(), Ty); + + // If the input value is a chrec scev made out of constants, truncate + // all of the constants. + if (const SCEVAddRecExpr *AddRec = dyn_cast(Op)) { + std::vector Operands; + for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) + Operands.push_back(getTruncateExpr(AddRec->getOperand(i), Ty)); + return getAddRecExpr(Operands, AddRec->getLoop()); + } + + SCEVTruncateExpr *&Result = (*SCEVTruncates)[std::make_pair(Op, Ty)]; + if (Result == 0) Result = new SCEVTruncateExpr(Op, Ty); + return Result; +} + +SCEVHandle ScalarEvolution::getZeroExtendExpr(const SCEVHandle &Op, + const Type *Ty) { + assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) && + "This is not an extending conversion!"); + assert(isSCEVable(Ty) && + "This is not a conversion to a SCEVable type!"); + Ty = getEffectiveSCEVType(Ty); + + if (const SCEVConstant *SC = dyn_cast(Op)) { + const Type *IntTy = getEffectiveSCEVType(Ty); + Constant *C = ConstantExpr::getZExt(SC->getValue(), IntTy); + if (IntTy != Ty) C = ConstantExpr::getIntToPtr(C, Ty); + return getUnknown(C); + } + + // zext(zext(x)) --> zext(x) + if (const SCEVZeroExtendExpr *SZ = dyn_cast(Op)) + return getZeroExtendExpr(SZ->getOperand(), Ty); + + // If the input value is a chrec scev, and we can prove that the value + // did not overflow the old, smaller, value, we can zero extend all of the + // operands (often constants). This allows analysis of something like + // this: for (unsigned char X = 0; X < 100; ++X) { int Y = X; } + if (const SCEVAddRecExpr *AR = dyn_cast(Op)) + if (AR->isAffine()) { + // Check whether the backedge-taken count is SCEVCouldNotCompute. + // Note that this serves two purposes: It filters out loops that are + // simply not analyzable, and it covers the case where this code is + // being called from within backedge-taken count analysis, such that + // attempting to ask for the backedge-taken count would likely result + // in infinite recursion. In the later case, the analysis code will + // cope with a conservative value, and it will take care to purge + // that value once it has finished. + SCEVHandle MaxBECount = getMaxBackedgeTakenCount(AR->getLoop()); + if (!isa(MaxBECount)) { + // Manually compute the final value for AR, checking for + // overflow. + SCEVHandle Start = AR->getStart(); + SCEVHandle Step = AR->getStepRecurrence(*this); + + // Check whether the backedge-taken count can be losslessly casted to + // the addrec's type. The count is always unsigned. + SCEVHandle CastedMaxBECount = + getTruncateOrZeroExtend(MaxBECount, Start->getType()); + SCEVHandle RecastedMaxBECount = + getTruncateOrZeroExtend(CastedMaxBECount, MaxBECount->getType()); + if (MaxBECount == RecastedMaxBECount) { + const Type *WideTy = + IntegerType::get(getTypeSizeInBits(Start->getType()) * 2); + // Check whether Start+Step*MaxBECount has no unsigned overflow. + SCEVHandle ZMul = + getMulExpr(CastedMaxBECount, + getTruncateOrZeroExtend(Step, Start->getType())); + SCEVHandle Add = getAddExpr(Start, ZMul); + SCEVHandle OperandExtendedAdd = + getAddExpr(getZeroExtendExpr(Start, WideTy), + getMulExpr(getZeroExtendExpr(CastedMaxBECount, WideTy), + getZeroExtendExpr(Step, WideTy))); + if (getZeroExtendExpr(Add, WideTy) == OperandExtendedAdd) + // Return the expression with the addrec on the outside. + return getAddRecExpr(getZeroExtendExpr(Start, Ty), + getZeroExtendExpr(Step, Ty), + AR->getLoop()); + + // Similar to above, only this time treat the step value as signed. + // This covers loops that count down. + SCEVHandle SMul = + getMulExpr(CastedMaxBECount, + getTruncateOrSignExtend(Step, Start->getType())); + Add = getAddExpr(Start, SMul); + OperandExtendedAdd = + getAddExpr(getZeroExtendExpr(Start, WideTy), + getMulExpr(getZeroExtendExpr(CastedMaxBECount, WideTy), + getSignExtendExpr(Step, WideTy))); + if (getZeroExtendExpr(Add, WideTy) == OperandExtendedAdd) + // Return the expression with the addrec on the outside. + return getAddRecExpr(getZeroExtendExpr(Start, Ty), + getSignExtendExpr(Step, Ty), + AR->getLoop()); + } + } + } + + SCEVZeroExtendExpr *&Result = (*SCEVZeroExtends)[std::make_pair(Op, Ty)]; + if (Result == 0) Result = new SCEVZeroExtendExpr(Op, Ty); + return Result; +} + +SCEVHandle ScalarEvolution::getSignExtendExpr(const SCEVHandle &Op, + const Type *Ty) { + assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) && + "This is not an extending conversion!"); + assert(isSCEVable(Ty) && + "This is not a conversion to a SCEVable type!"); + Ty = getEffectiveSCEVType(Ty); + + if (const SCEVConstant *SC = dyn_cast(Op)) { + const Type *IntTy = getEffectiveSCEVType(Ty); + Constant *C = ConstantExpr::getSExt(SC->getValue(), IntTy); + if (IntTy != Ty) C = ConstantExpr::getIntToPtr(C, Ty); + return getUnknown(C); + } + + // sext(sext(x)) --> sext(x) + if (const SCEVSignExtendExpr *SS = dyn_cast(Op)) + return getSignExtendExpr(SS->getOperand(), Ty); + + // If the input value is a chrec scev, and we can prove that the value + // did not overflow the old, smaller, value, we can sign extend all of the + // operands (often constants). This allows analysis of something like + // this: for (signed char X = 0; X < 100; ++X) { int Y = X; } + if (const SCEVAddRecExpr *AR = dyn_cast(Op)) + if (AR->isAffine()) { + // Check whether the backedge-taken count is SCEVCouldNotCompute. + // Note that this serves two purposes: It filters out loops that are + // simply not analyzable, and it covers the case where this code is + // being called from within backedge-taken count analysis, such that + // attempting to ask for the backedge-taken count would likely result + // in infinite recursion. In the later case, the analysis code will + // cope with a conservative value, and it will take care to purge + // that value once it has finished. + SCEVHandle MaxBECount = getMaxBackedgeTakenCount(AR->getLoop()); + if (!isa(MaxBECount)) { + // Manually compute the final value for AR, checking for + // overflow. + SCEVHandle Start = AR->getStart(); + SCEVHandle Step = AR->getStepRecurrence(*this); + + // Check whether the backedge-taken count can be losslessly casted to + // the addrec's type. The count is always unsigned. + SCEVHandle CastedMaxBECount = + getTruncateOrZeroExtend(MaxBECount, Start->getType()); + SCEVHandle RecastedMaxBECount = + getTruncateOrZeroExtend(CastedMaxBECount, MaxBECount->getType()); + if (MaxBECount == RecastedMaxBECount) { + const Type *WideTy = + IntegerType::get(getTypeSizeInBits(Start->getType()) * 2); + // Check whether Start+Step*MaxBECount has no signed overflow. + SCEVHandle SMul = + getMulExpr(CastedMaxBECount, + getTruncateOrSignExtend(Step, Start->getType())); + SCEVHandle Add = getAddExpr(Start, SMul); + SCEVHandle OperandExtendedAdd = + getAddExpr(getSignExtendExpr(Start, WideTy), + getMulExpr(getZeroExtendExpr(CastedMaxBECount, WideTy), + getSignExtendExpr(Step, WideTy))); + if (getSignExtendExpr(Add, WideTy) == OperandExtendedAdd) + // Return the expression with the addrec on the outside. + return getAddRecExpr(getSignExtendExpr(Start, Ty), + getSignExtendExpr(Step, Ty), + AR->getLoop()); + } + } + } + + SCEVSignExtendExpr *&Result = (*SCEVSignExtends)[std::make_pair(Op, Ty)]; + if (Result == 0) Result = new SCEVSignExtendExpr(Op, Ty); + return Result; +} + +/// getAddExpr - Get a canonical add expression, or something simpler if +/// possible. +SCEVHandle ScalarEvolution::getAddExpr(std::vector &Ops) { + assert(!Ops.empty() && "Cannot get empty add!"); + if (Ops.size() == 1) return Ops[0]; +#ifndef NDEBUG + for (unsigned i = 1, e = Ops.size(); i != e; ++i) + assert(getEffectiveSCEVType(Ops[i]->getType()) == + getEffectiveSCEVType(Ops[0]->getType()) && + "SCEVAddExpr operand types don't match!"); +#endif + + // Sort by complexity, this groups all similar expression types together. + GroupByComplexity(Ops, LI); + + // If there are any constants, fold them together. + unsigned Idx = 0; + if (const SCEVConstant *LHSC = dyn_cast(Ops[0])) { + ++Idx; + assert(Idx < Ops.size()); + while (const SCEVConstant *RHSC = dyn_cast(Ops[Idx])) { + // We found two constants, fold them together! + ConstantInt *Fold = ConstantInt::get(LHSC->getValue()->getValue() + + RHSC->getValue()->getValue()); + Ops[0] = getConstant(Fold); + Ops.erase(Ops.begin()+1); // Erase the folded element + if (Ops.size() == 1) return Ops[0]; + LHSC = cast(Ops[0]); + } + + // If we are left with a constant zero being added, strip it off. + if (cast(Ops[0])->getValue()->isZero()) { + Ops.erase(Ops.begin()); + --Idx; + } + } + + if (Ops.size() == 1) return Ops[0]; + + // Okay, check to see if the same value occurs in the operand list twice. If + // so, merge them together into an multiply expression. Since we sorted the + // list, these values are required to be adjacent. + const Type *Ty = Ops[0]->getType(); + for (unsigned i = 0, e = Ops.size()-1; i != e; ++i) + if (Ops[i] == Ops[i+1]) { // X + Y + Y --> X + Y*2 + // Found a match, merge the two values into a multiply, and add any + // remaining values to the result. + SCEVHandle Two = getIntegerSCEV(2, Ty); + SCEVHandle Mul = getMulExpr(Ops[i], Two); + if (Ops.size() == 2) + return Mul; + Ops.erase(Ops.begin()+i, Ops.begin()+i+2); + Ops.push_back(Mul); + return getAddExpr(Ops); + } + + // Check for truncates. If all the operands are truncated from the same + // type, see if factoring out the truncate would permit the result to be + // folded. eg., trunc(x) + m*trunc(n) --> trunc(x + trunc(m)*n) + // if the contents of the resulting outer trunc fold to something simple. + for (; Idx < Ops.size() && isa(Ops[Idx]); ++Idx) { + const SCEVTruncateExpr *Trunc = cast(Ops[Idx]); + const Type *DstType = Trunc->getType(); + const Type *SrcType = Trunc->getOperand()->getType(); + std::vector LargeOps; + bool Ok = true; + // Check all the operands to see if they can be represented in the + // source type of the truncate. + for (unsigned i = 0, e = Ops.size(); i != e; ++i) { + if (const SCEVTruncateExpr *T = dyn_cast(Ops[i])) { + if (T->getOperand()->getType() != SrcType) { + Ok = false; + break; + } + LargeOps.push_back(T->getOperand()); + } else if (const SCEVConstant *C = dyn_cast(Ops[i])) { + // This could be either sign or zero extension, but sign extension + // is much more likely to be foldable here. + LargeOps.push_back(getSignExtendExpr(C, SrcType)); + } else if (const SCEVMulExpr *M = dyn_cast(Ops[i])) { + std::vector LargeMulOps; + for (unsigned j = 0, f = M->getNumOperands(); j != f && Ok; ++j) { + if (const SCEVTruncateExpr *T = + dyn_cast(M->getOperand(j))) { + if (T->getOperand()->getType() != SrcType) { + Ok = false; + break; + } + LargeMulOps.push_back(T->getOperand()); + } else if (const SCEVConstant *C = + dyn_cast(M->getOperand(j))) { + // This could be either sign or zero extension, but sign extension + // is much more likely to be foldable here. + LargeMulOps.push_back(getSignExtendExpr(C, SrcType)); + } else { + Ok = false; + break; + } + } + if (Ok) + LargeOps.push_back(getMulExpr(LargeMulOps)); + } else { + Ok = false; + break; + } + } + if (Ok) { + // Evaluate the expression in the larger type. + SCEVHandle Fold = getAddExpr(LargeOps); + // If it folds to something simple, use it. Otherwise, don't. + if (isa(Fold) || isa(Fold)) + return getTruncateExpr(Fold, DstType); + } + } + + // Skip past any other cast SCEVs. + while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddExpr) + ++Idx; + + // If there are add operands they would be next. + if (Idx < Ops.size()) { + bool DeletedAdd = false; + while (const SCEVAddExpr *Add = dyn_cast(Ops[Idx])) { + // If we have an add, expand the add operands onto the end of the operands + // list. + Ops.insert(Ops.end(), Add->op_begin(), Add->op_end()); + Ops.erase(Ops.begin()+Idx); + DeletedAdd = true; + } + + // If we deleted at least one add, we added operands to the end of the list, + // and they are not necessarily sorted. Recurse to resort and resimplify + // any operands we just aquired. + if (DeletedAdd) + return getAddExpr(Ops); + } + + // Skip over the add expression until we get to a multiply. + while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scMulExpr) + ++Idx; + + // If we are adding something to a multiply expression, make sure the + // something is not already an operand of the multiply. If so, merge it into + // the multiply. + for (; Idx < Ops.size() && isa(Ops[Idx]); ++Idx) { + const SCEVMulExpr *Mul = cast(Ops[Idx]); + for (unsigned MulOp = 0, e = Mul->getNumOperands(); MulOp != e; ++MulOp) { + const SCEV *MulOpSCEV = Mul->getOperand(MulOp); + for (unsigned AddOp = 0, e = Ops.size(); AddOp != e; ++AddOp) + if (MulOpSCEV == Ops[AddOp] && !isa(MulOpSCEV)) { + // Fold W + X + (X * Y * Z) --> W + (X * ((Y*Z)+1)) + SCEVHandle InnerMul = Mul->getOperand(MulOp == 0); + if (Mul->getNumOperands() != 2) { + // If the multiply has more than two operands, we must get the + // Y*Z term. + std::vector MulOps(Mul->op_begin(), Mul->op_end()); + MulOps.erase(MulOps.begin()+MulOp); + InnerMul = getMulExpr(MulOps); + } + SCEVHandle One = getIntegerSCEV(1, Ty); + SCEVHandle AddOne = getAddExpr(InnerMul, One); + SCEVHandle OuterMul = getMulExpr(AddOne, Ops[AddOp]); + if (Ops.size() == 2) return OuterMul; + if (AddOp < Idx) { + Ops.erase(Ops.begin()+AddOp); + Ops.erase(Ops.begin()+Idx-1); + } else { + Ops.erase(Ops.begin()+Idx); + Ops.erase(Ops.begin()+AddOp-1); + } + Ops.push_back(OuterMul); + return getAddExpr(Ops); + } + + // Check this multiply against other multiplies being added together. + for (unsigned OtherMulIdx = Idx+1; + OtherMulIdx < Ops.size() && isa(Ops[OtherMulIdx]); + ++OtherMulIdx) { + const SCEVMulExpr *OtherMul = cast(Ops[OtherMulIdx]); + // If MulOp occurs in OtherMul, we can fold the two multiplies + // together. + for (unsigned OMulOp = 0, e = OtherMul->getNumOperands(); + OMulOp != e; ++OMulOp) + if (OtherMul->getOperand(OMulOp) == MulOpSCEV) { + // Fold X + (A*B*C) + (A*D*E) --> X + (A*(B*C+D*E)) + SCEVHandle InnerMul1 = Mul->getOperand(MulOp == 0); + if (Mul->getNumOperands() != 2) { + std::vector MulOps(Mul->op_begin(), Mul->op_end()); + MulOps.erase(MulOps.begin()+MulOp); + InnerMul1 = getMulExpr(MulOps); + } + SCEVHandle InnerMul2 = OtherMul->getOperand(OMulOp == 0); + if (OtherMul->getNumOperands() != 2) { + std::vector MulOps(OtherMul->op_begin(), + OtherMul->op_end()); + MulOps.erase(MulOps.begin()+OMulOp); + InnerMul2 = getMulExpr(MulOps); + } + SCEVHandle InnerMulSum = getAddExpr(InnerMul1,InnerMul2); + SCEVHandle OuterMul = getMulExpr(MulOpSCEV, InnerMulSum); + if (Ops.size() == 2) return OuterMul; + Ops.erase(Ops.begin()+Idx); + Ops.erase(Ops.begin()+OtherMulIdx-1); + Ops.push_back(OuterMul); + return getAddExpr(Ops); + } + } + } + } + + // If there are any add recurrences in the operands list, see if any other + // added values are loop invariant. If so, we can fold them into the + // recurrence. + while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddRecExpr) + ++Idx; + + // Scan over all recurrences, trying to fold loop invariants into them. + for (; Idx < Ops.size() && isa(Ops[Idx]); ++Idx) { + // Scan all of the other operands to this add and add them to the vector if + // they are loop invariant w.r.t. the recurrence. + std::vector LIOps; + const SCEVAddRecExpr *AddRec = cast(Ops[Idx]); + for (unsigned i = 0, e = Ops.size(); i != e; ++i) + if (Ops[i]->isLoopInvariant(AddRec->getLoop())) { + LIOps.push_back(Ops[i]); + Ops.erase(Ops.begin()+i); + --i; --e; + } + + // If we found some loop invariants, fold them into the recurrence. + if (!LIOps.empty()) { + // NLI + LI + {Start,+,Step} --> NLI + {LI+Start,+,Step} + LIOps.push_back(AddRec->getStart()); + + std::vector AddRecOps(AddRec->op_begin(), AddRec->op_end()); + AddRecOps[0] = getAddExpr(LIOps); + + SCEVHandle NewRec = getAddRecExpr(AddRecOps, AddRec->getLoop()); + // If all of the other operands were loop invariant, we are done. + if (Ops.size() == 1) return NewRec; + + // Otherwise, add the folded AddRec by the non-liv parts. + for (unsigned i = 0;; ++i) + if (Ops[i] == AddRec) { + Ops[i] = NewRec; + break; + } + return getAddExpr(Ops); + } + + // Okay, if there weren't any loop invariants to be folded, check to see if + // there are multiple AddRec's with the same loop induction variable being + // added together. If so, we can fold them. + for (unsigned OtherIdx = Idx+1; + OtherIdx < Ops.size() && isa(Ops[OtherIdx]);++OtherIdx) + if (OtherIdx != Idx) { + const SCEVAddRecExpr *OtherAddRec = cast(Ops[OtherIdx]); + if (AddRec->getLoop() == OtherAddRec->getLoop()) { + // Other + {A,+,B} + {C,+,D} --> Other + {A+C,+,B+D} + std::vector NewOps(AddRec->op_begin(), AddRec->op_end()); + for (unsigned i = 0, e = OtherAddRec->getNumOperands(); i != e; ++i) { + if (i >= NewOps.size()) { + NewOps.insert(NewOps.end(), OtherAddRec->op_begin()+i, + OtherAddRec->op_end()); + break; + } + NewOps[i] = getAddExpr(NewOps[i], OtherAddRec->getOperand(i)); + } + SCEVHandle NewAddRec = getAddRecExpr(NewOps, AddRec->getLoop()); + + if (Ops.size() == 2) return NewAddRec; + + Ops.erase(Ops.begin()+Idx); + Ops.erase(Ops.begin()+OtherIdx-1); + Ops.push_back(NewAddRec); + return getAddExpr(Ops); + } + } + + // Otherwise couldn't fold anything into this recurrence. Move onto the + // next one. + } + + // Okay, it looks like we really DO need an add expr. Check to see if we + // already have one, otherwise create a new one. + std::vector SCEVOps(Ops.begin(), Ops.end()); + SCEVCommutativeExpr *&Result = (*SCEVCommExprs)[std::make_pair(scAddExpr, + SCEVOps)]; + if (Result == 0) Result = new SCEVAddExpr(Ops); + return Result; +} + + +/// getMulExpr - Get a canonical multiply expression, or something simpler if +/// possible. +SCEVHandle ScalarEvolution::getMulExpr(std::vector &Ops) { + assert(!Ops.empty() && "Cannot get empty mul!"); +#ifndef NDEBUG + for (unsigned i = 1, e = Ops.size(); i != e; ++i) + assert(getEffectiveSCEVType(Ops[i]->getType()) == + getEffectiveSCEVType(Ops[0]->getType()) && + "SCEVMulExpr operand types don't match!"); +#endif + + // Sort by complexity, this groups all similar expression types together. + GroupByComplexity(Ops, LI); + + // If there are any constants, fold them together. + unsigned Idx = 0; + if (const SCEVConstant *LHSC = dyn_cast(Ops[0])) { + + // C1*(C2+V) -> C1*C2 + C1*V + if (Ops.size() == 2) + if (const SCEVAddExpr *Add = dyn_cast(Ops[1])) + if (Add->getNumOperands() == 2 && + isa(Add->getOperand(0))) + return getAddExpr(getMulExpr(LHSC, Add->getOperand(0)), + getMulExpr(LHSC, Add->getOperand(1))); + + + ++Idx; + while (const SCEVConstant *RHSC = dyn_cast(Ops[Idx])) { + // We found two constants, fold them together! + ConstantInt *Fold = ConstantInt::get(LHSC->getValue()->getValue() * + RHSC->getValue()->getValue()); + Ops[0] = getConstant(Fold); + Ops.erase(Ops.begin()+1); // Erase the folded element + if (Ops.size() == 1) return Ops[0]; + LHSC = cast(Ops[0]); + } + + // If we are left with a constant one being multiplied, strip it off. + if (cast(Ops[0])->getValue()->equalsInt(1)) { + Ops.erase(Ops.begin()); + --Idx; + } else if (cast(Ops[0])->getValue()->isZero()) { + // If we have a multiply of zero, it will always be zero. + return Ops[0]; + } + } + + // Skip over the add expression until we get to a multiply. + while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scMulExpr) + ++Idx; + + if (Ops.size() == 1) + return Ops[0]; + + // If there are mul operands inline them all into this expression. + if (Idx < Ops.size()) { + bool DeletedMul = false; + while (const SCEVMulExpr *Mul = dyn_cast(Ops[Idx])) { + // If we have an mul, expand the mul operands onto the end of the operands + // list. + Ops.insert(Ops.end(), Mul->op_begin(), Mul->op_end()); + Ops.erase(Ops.begin()+Idx); + DeletedMul = true; + } + + // If we deleted at least one mul, we added operands to the end of the list, + // and they are not necessarily sorted. Recurse to resort and resimplify + // any operands we just aquired. + if (DeletedMul) + return getMulExpr(Ops); + } + + // If there are any add recurrences in the operands list, see if any other + // added values are loop invariant. If so, we can fold them into the + // recurrence. + while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddRecExpr) + ++Idx; + + // Scan over all recurrences, trying to fold loop invariants into them. + for (; Idx < Ops.size() && isa(Ops[Idx]); ++Idx) { + // Scan all of the other operands to this mul and add them to the vector if + // they are loop invariant w.r.t. the recurrence. + std::vector LIOps; + const SCEVAddRecExpr *AddRec = cast(Ops[Idx]); + for (unsigned i = 0, e = Ops.size(); i != e; ++i) + if (Ops[i]->isLoopInvariant(AddRec->getLoop())) { + LIOps.push_back(Ops[i]); + Ops.erase(Ops.begin()+i); + --i; --e; + } + + // If we found some loop invariants, fold them into the recurrence. + if (!LIOps.empty()) { + // NLI * LI * {Start,+,Step} --> NLI * {LI*Start,+,LI*Step} + std::vector NewOps; + NewOps.reserve(AddRec->getNumOperands()); + if (LIOps.size() == 1) { + const SCEV *Scale = LIOps[0]; + for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) + NewOps.push_back(getMulExpr(Scale, AddRec->getOperand(i))); + } else { + for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) { + std::vector MulOps(LIOps); + MulOps.push_back(AddRec->getOperand(i)); + NewOps.push_back(getMulExpr(MulOps)); + } + } + + SCEVHandle NewRec = getAddRecExpr(NewOps, AddRec->getLoop()); + + // If all of the other operands were loop invariant, we are done. + if (Ops.size() == 1) return NewRec; + + // Otherwise, multiply the folded AddRec by the non-liv parts. + for (unsigned i = 0;; ++i) + if (Ops[i] == AddRec) { + Ops[i] = NewRec; + break; + } + return getMulExpr(Ops); + } + + // Okay, if there weren't any loop invariants to be folded, check to see if + // there are multiple AddRec's with the same loop induction variable being + // multiplied together. If so, we can fold them. + for (unsigned OtherIdx = Idx+1; + OtherIdx < Ops.size() && isa(Ops[OtherIdx]);++OtherIdx) + if (OtherIdx != Idx) { + const SCEVAddRecExpr *OtherAddRec = cast(Ops[OtherIdx]); + if (AddRec->getLoop() == OtherAddRec->getLoop()) { + // F * G --> {A,+,B} * {C,+,D} --> {A*C,+,F*D + G*B + B*D} + const SCEVAddRecExpr *F = AddRec, *G = OtherAddRec; + SCEVHandle NewStart = getMulExpr(F->getStart(), + G->getStart()); + SCEVHandle B = F->getStepRecurrence(*this); + SCEVHandle D = G->getStepRecurrence(*this); + SCEVHandle NewStep = getAddExpr(getMulExpr(F, D), + getMulExpr(G, B), + getMulExpr(B, D)); + SCEVHandle NewAddRec = getAddRecExpr(NewStart, NewStep, + F->getLoop()); + if (Ops.size() == 2) return NewAddRec; + + Ops.erase(Ops.begin()+Idx); + Ops.erase(Ops.begin()+OtherIdx-1); + Ops.push_back(NewAddRec); + return getMulExpr(Ops); + } + } + + // Otherwise couldn't fold anything into this recurrence. Move onto the + // next one. + } + + // Okay, it looks like we really DO need an mul expr. Check to see if we + // already have one, otherwise create a new one. + std::vector SCEVOps(Ops.begin(), Ops.end()); + SCEVCommutativeExpr *&Result = (*SCEVCommExprs)[std::make_pair(scMulExpr, + SCEVOps)]; + if (Result == 0) + Result = new SCEVMulExpr(Ops); + return Result; +} + +/// getUDivExpr - Get a canonical multiply expression, or something simpler if +/// possible. +SCEVHandle ScalarEvolution::getUDivExpr(const SCEVHandle &LHS, + const SCEVHandle &RHS) { + assert(getEffectiveSCEVType(LHS->getType()) == + getEffectiveSCEVType(RHS->getType()) && + "SCEVUDivExpr operand types don't match!"); + + if (const SCEVConstant *RHSC = dyn_cast(RHS)) { + if (RHSC->getValue()->equalsInt(1)) + return LHS; // X udiv 1 --> x + if (RHSC->isZero()) + return getIntegerSCEV(0, LHS->getType()); // value is undefined + + // Determine if the division can be folded into the operands of + // its operands. + // TODO: Generalize this to non-constants by using known-bits information. + const Type *Ty = LHS->getType(); + unsigned LZ = RHSC->getValue()->getValue().countLeadingZeros(); + unsigned MaxShiftAmt = getTypeSizeInBits(Ty) - LZ; + // For non-power-of-two values, effectively round the value up to the + // nearest power of two. + if (!RHSC->getValue()->getValue().isPowerOf2()) + ++MaxShiftAmt; + const IntegerType *ExtTy = + IntegerType::get(getTypeSizeInBits(Ty) + MaxShiftAmt); + // {X,+,N}/C --> {X/C,+,N/C} if safe and N/C can be folded. + if (const SCEVAddRecExpr *AR = dyn_cast(LHS)) + if (const SCEVConstant *Step = + dyn_cast(AR->getStepRecurrence(*this))) + if (!Step->getValue()->getValue() + .urem(RHSC->getValue()->getValue()) && + getZeroExtendExpr(AR, ExtTy) == + getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy), + getZeroExtendExpr(Step, ExtTy), + AR->getLoop())) { + std::vector Operands; + for (unsigned i = 0, e = AR->getNumOperands(); i != e; ++i) + Operands.push_back(getUDivExpr(AR->getOperand(i), RHS)); + return getAddRecExpr(Operands, AR->getLoop()); + } + // (A*B)/C --> A*(B/C) if safe and B/C can be folded. + if (const SCEVMulExpr *M = dyn_cast(LHS)) { + std::vector Operands; + for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) + Operands.push_back(getZeroExtendExpr(M->getOperand(i), ExtTy)); + if (getZeroExtendExpr(M, ExtTy) == getMulExpr(Operands)) + // Find an operand that's safely divisible. + for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) { + SCEVHandle Op = M->getOperand(i); + SCEVHandle Div = getUDivExpr(Op, RHSC); + if (!isa(Div) && getMulExpr(Div, RHSC) == Op) { + Operands = M->getOperands(); + Operands[i] = Div; + return getMulExpr(Operands); + } + } + } + // (A+B)/C --> (A/C + B/C) if safe and A/C and B/C can be folded. + if (const SCEVAddRecExpr *A = dyn_cast(LHS)) { + std::vector Operands; + for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i) + Operands.push_back(getZeroExtendExpr(A->getOperand(i), ExtTy)); + if (getZeroExtendExpr(A, ExtTy) == getAddExpr(Operands)) { + Operands.clear(); + for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i) { + SCEVHandle Op = getUDivExpr(A->getOperand(i), RHS); + if (isa(Op) || getMulExpr(Op, RHS) != A->getOperand(i)) + break; + Operands.push_back(Op); + } + if (Operands.size() == A->getNumOperands()) + return getAddExpr(Operands); + } + } + + // Fold if both operands are constant. + if (const SCEVConstant *LHSC = dyn_cast(LHS)) { + Constant *LHSCV = LHSC->getValue(); + Constant *RHSCV = RHSC->getValue(); + return getUnknown(ConstantExpr::getUDiv(LHSCV, RHSCV)); + } + } + + SCEVUDivExpr *&Result = (*SCEVUDivs)[std::make_pair(LHS, RHS)]; + if (Result == 0) Result = new SCEVUDivExpr(LHS, RHS); + return Result; +} + + +/// getAddRecExpr - Get an add recurrence expression for the specified loop. +/// Simplify the expression as much as possible. +SCEVHandle ScalarEvolution::getAddRecExpr(const SCEVHandle &Start, + const SCEVHandle &Step, const Loop *L) { + std::vector Operands; + Operands.push_back(Start); + if (const SCEVAddRecExpr *StepChrec = dyn_cast(Step)) + if (StepChrec->getLoop() == L) { + Operands.insert(Operands.end(), StepChrec->op_begin(), + StepChrec->op_end()); + return getAddRecExpr(Operands, L); + } + + Operands.push_back(Step); + return getAddRecExpr(Operands, L); +} + +/// getAddRecExpr - Get an add recurrence expression for the specified loop. +/// Simplify the expression as much as possible. +SCEVHandle ScalarEvolution::getAddRecExpr(std::vector &Operands, + const Loop *L) { + if (Operands.size() == 1) return Operands[0]; +#ifndef NDEBUG + for (unsigned i = 1, e = Operands.size(); i != e; ++i) + assert(getEffectiveSCEVType(Operands[i]->getType()) == + getEffectiveSCEVType(Operands[0]->getType()) && + "SCEVAddRecExpr operand types don't match!"); +#endif + + if (Operands.back()->isZero()) { + Operands.pop_back(); + return getAddRecExpr(Operands, L); // {X,+,0} --> X + } + + // Canonicalize nested AddRecs in by nesting them in order of loop depth. + if (const SCEVAddRecExpr *NestedAR = dyn_cast(Operands[0])) { + const Loop* NestedLoop = NestedAR->getLoop(); + if (L->getLoopDepth() < NestedLoop->getLoopDepth()) { + std::vector NestedOperands(NestedAR->op_begin(), + NestedAR->op_end()); + SCEVHandle NestedARHandle(NestedAR); + Operands[0] = NestedAR->getStart(); + NestedOperands[0] = getAddRecExpr(Operands, L); + return getAddRecExpr(NestedOperands, NestedLoop); + } + } + + std::vector SCEVOps(Operands.begin(), Operands.end()); + SCEVAddRecExpr *&Result = (*SCEVAddRecExprs)[std::make_pair(L, SCEVOps)]; + if (Result == 0) Result = new SCEVAddRecExpr(Operands, L); + return Result; +} + +SCEVHandle ScalarEvolution::getSMaxExpr(const SCEVHandle &LHS, + const SCEVHandle &RHS) { + std::vector Ops; + Ops.push_back(LHS); + Ops.push_back(RHS); + return getSMaxExpr(Ops); +} + +SCEVHandle ScalarEvolution::getSMaxExpr(std::vector Ops) { + assert(!Ops.empty() && "Cannot get empty smax!"); + if (Ops.size() == 1) return Ops[0]; +#ifndef NDEBUG + for (unsigned i = 1, e = Ops.size(); i != e; ++i) + assert(getEffectiveSCEVType(Ops[i]->getType()) == + getEffectiveSCEVType(Ops[0]->getType()) && + "SCEVSMaxExpr operand types don't match!"); +#endif + + // Sort by complexity, this groups all similar expression types together. + GroupByComplexity(Ops, LI); + + // If there are any constants, fold them together. + unsigned Idx = 0; + if (const SCEVConstant *LHSC = dyn_cast(Ops[0])) { + ++Idx; + assert(Idx < Ops.size()); + while (const SCEVConstant *RHSC = dyn_cast(Ops[Idx])) { + // We found two constants, fold them together! + ConstantInt *Fold = ConstantInt::get( + APIntOps::smax(LHSC->getValue()->getValue(), + RHSC->getValue()->getValue())); + Ops[0] = getConstant(Fold); + Ops.erase(Ops.begin()+1); // Erase the folded element + if (Ops.size() == 1) return Ops[0]; + LHSC = cast(Ops[0]); + } + + // If we are left with a constant -inf, strip it off. + if (cast(Ops[0])->getValue()->isMinValue(true)) { + Ops.erase(Ops.begin()); + --Idx; + } + } + + if (Ops.size() == 1) return Ops[0]; + + // Find the first SMax + while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scSMaxExpr) + ++Idx; + + // Check to see if one of the operands is an SMax. If so, expand its operands + // onto our operand list, and recurse to simplify. + if (Idx < Ops.size()) { + bool DeletedSMax = false; + while (const SCEVSMaxExpr *SMax = dyn_cast(Ops[Idx])) { + Ops.insert(Ops.end(), SMax->op_begin(), SMax->op_end()); + Ops.erase(Ops.begin()+Idx); + DeletedSMax = true; + } + + if (DeletedSMax) + return getSMaxExpr(Ops); + } + + // Okay, check to see if the same value occurs in the operand list twice. If + // so, delete one. Since we sorted the list, these values are required to + // be adjacent. + for (unsigned i = 0, e = Ops.size()-1; i != e; ++i) + if (Ops[i] == Ops[i+1]) { // X smax Y smax Y --> X smax Y + Ops.erase(Ops.begin()+i, Ops.begin()+i+1); + --i; --e; + } + + if (Ops.size() == 1) return Ops[0]; + + assert(!Ops.empty() && "Reduced smax down to nothing!"); + + // Okay, it looks like we really DO need an smax expr. Check to see if we + // already have one, otherwise create a new one. + std::vector SCEVOps(Ops.begin(), Ops.end()); + SCEVCommutativeExpr *&Result = (*SCEVCommExprs)[std::make_pair(scSMaxExpr, + SCEVOps)]; + if (Result == 0) Result = new SCEVSMaxExpr(Ops); + return Result; +} + +SCEVHandle ScalarEvolution::getUMaxExpr(const SCEVHandle &LHS, + const SCEVHandle &RHS) { + std::vector Ops; + Ops.push_back(LHS); + Ops.push_back(RHS); + return getUMaxExpr(Ops); +} + +SCEVHandle ScalarEvolution::getUMaxExpr(std::vector Ops) { + assert(!Ops.empty() && "Cannot get empty umax!"); + if (Ops.size() == 1) return Ops[0]; +#ifndef NDEBUG + for (unsigned i = 1, e = Ops.size(); i != e; ++i) + assert(getEffectiveSCEVType(Ops[i]->getType()) == + getEffectiveSCEVType(Ops[0]->getType()) && + "SCEVUMaxExpr operand types don't match!"); +#endif + + // Sort by complexity, this groups all similar expression types together. + GroupByComplexity(Ops, LI); + + // If there are any constants, fold them together. + unsigned Idx = 0; + if (const SCEVConstant *LHSC = dyn_cast(Ops[0])) { + ++Idx; + assert(Idx < Ops.size()); + while (const SCEVConstant *RHSC = dyn_cast(Ops[Idx])) { + // We found two constants, fold them together! + ConstantInt *Fold = ConstantInt::get( + APIntOps::umax(LHSC->getValue()->getValue(), + RHSC->getValue()->getValue())); + Ops[0] = getConstant(Fold); + Ops.erase(Ops.begin()+1); // Erase the folded element + if (Ops.size() == 1) return Ops[0]; + LHSC = cast(Ops[0]); + } + + // If we are left with a constant zero, strip it off. + if (cast(Ops[0])->getValue()->isMinValue(false)) { + Ops.erase(Ops.begin()); + --Idx; + } + } + + if (Ops.size() == 1) return Ops[0]; + + // Find the first UMax + while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scUMaxExpr) + ++Idx; + + // Check to see if one of the operands is a UMax. If so, expand its operands + // onto our operand list, and recurse to simplify. + if (Idx < Ops.size()) { + bool DeletedUMax = false; + while (const SCEVUMaxExpr *UMax = dyn_cast(Ops[Idx])) { + Ops.insert(Ops.end(), UMax->op_begin(), UMax->op_end()); + Ops.erase(Ops.begin()+Idx); + DeletedUMax = true; + } + + if (DeletedUMax) + return getUMaxExpr(Ops); + } + + // Okay, check to see if the same value occurs in the operand list twice. If + // so, delete one. Since we sorted the list, these values are required to + // be adjacent. + for (unsigned i = 0, e = Ops.size()-1; i != e; ++i) + if (Ops[i] == Ops[i+1]) { // X umax Y umax Y --> X umax Y + Ops.erase(Ops.begin()+i, Ops.begin()+i+1); + --i; --e; + } + + if (Ops.size() == 1) return Ops[0]; + + assert(!Ops.empty() && "Reduced umax down to nothing!"); + + // Okay, it looks like we really DO need a umax expr. Check to see if we + // already have one, otherwise create a new one. + std::vector SCEVOps(Ops.begin(), Ops.end()); + SCEVCommutativeExpr *&Result = (*SCEVCommExprs)[std::make_pair(scUMaxExpr, + SCEVOps)]; + if (Result == 0) Result = new SCEVUMaxExpr(Ops); + return Result; +} + +SCEVHandle ScalarEvolution::getUnknown(Value *V) { + if (ConstantInt *CI = dyn_cast(V)) + return getConstant(CI); + if (isa(V)) + return getIntegerSCEV(0, V->getType()); + SCEVUnknown *&Result = (*SCEVUnknowns)[V]; + if (Result == 0) Result = new SCEVUnknown(V); + return Result; +} + +//===----------------------------------------------------------------------===// +// Basic SCEV Analysis and PHI Idiom Recognition Code +// + +/// isSCEVable - Test if values of the given type are analyzable within +/// the SCEV framework. This primarily includes integer types, and it +/// can optionally include pointer types if the ScalarEvolution class +/// has access to target-specific information. +bool ScalarEvolution::isSCEVable(const Type *Ty) const { + // Integers are always SCEVable. + if (Ty->isInteger()) + return true; + + // Pointers are SCEVable if TargetData information is available + // to provide pointer size information. + if (isa(Ty)) + return TD != NULL; + + // Otherwise it's not SCEVable. + return false; +} + +/// getTypeSizeInBits - Return the size in bits of the specified type, +/// for which isSCEVable must return true. +uint64_t ScalarEvolution::getTypeSizeInBits(const Type *Ty) const { + assert(isSCEVable(Ty) && "Type is not SCEVable!"); + + // If we have a TargetData, use it! + if (TD) + return TD->getTypeSizeInBits(Ty); + + // Otherwise, we support only integer types. + assert(Ty->isInteger() && "isSCEVable permitted a non-SCEVable type!"); + return Ty->getPrimitiveSizeInBits(); +} + +/// getEffectiveSCEVType - Return a type with the same bitwidth as +/// the given type and which represents how SCEV will treat the given +/// type, for which isSCEVable must return true. For pointer types, +/// this is the pointer-sized integer type. +const Type *ScalarEvolution::getEffectiveSCEVType(const Type *Ty) const { + assert(isSCEVable(Ty) && "Type is not SCEVable!"); + + if (Ty->isInteger()) + return Ty; + + assert(isa(Ty) && "Unexpected non-pointer non-integer type!"); + return TD->getIntPtrType(); +} + +SCEVHandle ScalarEvolution::getCouldNotCompute() { + return UnknownValue; +} + +/// hasSCEV - Return true if the SCEV for this value has already been +/// computed. +bool ScalarEvolution::hasSCEV(Value *V) const { + return Scalars.count(V); +} + +/// getSCEV - Return an existing SCEV if it exists, otherwise analyze the +/// expression and create a new one. +SCEVHandle ScalarEvolution::getSCEV(Value *V) { + assert(isSCEVable(V->getType()) && "Value is not SCEVable!"); + + std::map::iterator I = Scalars.find(V); + if (I != Scalars.end()) return I->second; + SCEVHandle S = createSCEV(V); + Scalars.insert(std::make_pair(SCEVCallbackVH(V, this), S)); + return S; +} + +/// getIntegerSCEV - Given an integer or FP type, create a constant for the +/// specified signed integer value and return a SCEV for the constant. +SCEVHandle ScalarEvolution::getIntegerSCEV(int Val, const Type *Ty) { + Ty = getEffectiveSCEVType(Ty); + Constant *C; + if (Val == 0) + C = Constant::getNullValue(Ty); + else if (Ty->isFloatingPoint()) + C = ConstantFP::get(APFloat(Ty==Type::FloatTy ? APFloat::IEEEsingle : + APFloat::IEEEdouble, Val)); + else + C = ConstantInt::get(Ty, Val); + return getUnknown(C); +} + +/// getNegativeSCEV - Return a SCEV corresponding to -V = -1*V +/// +SCEVHandle ScalarEvolution::getNegativeSCEV(const SCEVHandle &V) { + if (const SCEVConstant *VC = dyn_cast(V)) + return getUnknown(ConstantExpr::getNeg(VC->getValue())); + + const Type *Ty = V->getType(); + Ty = getEffectiveSCEVType(Ty); + return getMulExpr(V, getConstant(ConstantInt::getAllOnesValue(Ty))); +} + +/// getNotSCEV - Return a SCEV corresponding to ~V = -1-V +SCEVHandle ScalarEvolution::getNotSCEV(const SCEVHandle &V) { + if (const SCEVConstant *VC = dyn_cast(V)) + return getUnknown(ConstantExpr::getNot(VC->getValue())); + + const Type *Ty = V->getType(); + Ty = getEffectiveSCEVType(Ty); + SCEVHandle AllOnes = getConstant(ConstantInt::getAllOnesValue(Ty)); + return getMinusSCEV(AllOnes, V); +} + +/// getMinusSCEV - Return a SCEV corresponding to LHS - RHS. +/// +SCEVHandle ScalarEvolution::getMinusSCEV(const SCEVHandle &LHS, + const SCEVHandle &RHS) { + // X - Y --> X + -Y + return getAddExpr(LHS, getNegativeSCEV(RHS)); +} + +/// getTruncateOrZeroExtend - Return a SCEV corresponding to a conversion of the +/// input value to the specified type. If the type must be extended, it is zero +/// extended. +SCEVHandle +ScalarEvolution::getTruncateOrZeroExtend(const SCEVHandle &V, + const Type *Ty) { + const Type *SrcTy = V->getType(); + assert((SrcTy->isInteger() || (TD && isa(SrcTy))) && + (Ty->isInteger() || (TD && isa(Ty))) && + "Cannot truncate or zero extend with non-integer arguments!"); + if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty)) + return V; // No conversion + if (getTypeSizeInBits(SrcTy) > getTypeSizeInBits(Ty)) + return getTruncateExpr(V, Ty); + return getZeroExtendExpr(V, Ty); +} + +/// getTruncateOrSignExtend - Return a SCEV corresponding to a conversion of the +/// input value to the specified type. If the type must be extended, it is sign +/// extended. +SCEVHandle +ScalarEvolution::getTruncateOrSignExtend(const SCEVHandle &V, + const Type *Ty) { + const Type *SrcTy = V->getType(); + assert((SrcTy->isInteger() || (TD && isa(SrcTy))) && + (Ty->isInteger() || (TD && isa(Ty))) && + "Cannot truncate or zero extend with non-integer arguments!"); + if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty)) + return V; // No conversion + if (getTypeSizeInBits(SrcTy) > getTypeSizeInBits(Ty)) + return getTruncateExpr(V, Ty); + return getSignExtendExpr(V, Ty); +} + +/// getNoopOrZeroExtend - Return a SCEV corresponding to a conversion of the +/// input value to the specified type. If the type must be extended, it is zero +/// extended. The conversion must not be narrowing. +SCEVHandle +ScalarEvolution::getNoopOrZeroExtend(const SCEVHandle &V, const Type *Ty) { + const Type *SrcTy = V->getType(); + assert((SrcTy->isInteger() || (TD && isa(SrcTy))) && + (Ty->isInteger() || (TD && isa(Ty))) && + "Cannot noop or zero extend with non-integer arguments!"); + assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) && + "getNoopOrZeroExtend cannot truncate!"); + if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty)) + return V; // No conversion + return getZeroExtendExpr(V, Ty); +} + +/// getNoopOrSignExtend - Return a SCEV corresponding to a conversion of the +/// input value to the specified type. If the type must be extended, it is sign +/// extended. The conversion must not be narrowing. +SCEVHandle +ScalarEvolution::getNoopOrSignExtend(const SCEVHandle &V, const Type *Ty) { + const Type *SrcTy = V->getType(); + assert((SrcTy->isInteger() || (TD && isa(SrcTy))) && + (Ty->isInteger() || (TD && isa(Ty))) && + "Cannot noop or sign extend with non-integer arguments!"); + assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) && + "getNoopOrSignExtend cannot truncate!"); + if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty)) + return V; // No conversion + return getSignExtendExpr(V, Ty); +} + +/// getTruncateOrNoop - Return a SCEV corresponding to a conversion of the +/// input value to the specified type. The conversion must not be widening. +SCEVHandle +ScalarEvolution::getTruncateOrNoop(const SCEVHandle &V, const Type *Ty) { + const Type *SrcTy = V->getType(); + assert((SrcTy->isInteger() || (TD && isa(SrcTy))) && + (Ty->isInteger() || (TD && isa(Ty))) && + "Cannot truncate or noop with non-integer arguments!"); + assert(getTypeSizeInBits(SrcTy) >= getTypeSizeInBits(Ty) && + "getTruncateOrNoop cannot extend!"); + if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty)) + return V; // No conversion + return getTruncateExpr(V, Ty); +} + +/// ReplaceSymbolicValueWithConcrete - This looks up the computed SCEV value for +/// the specified instruction and replaces any references to the symbolic value +/// SymName with the specified value. This is used during PHI resolution. +void ScalarEvolution:: +ReplaceSymbolicValueWithConcrete(Instruction *I, const SCEVHandle &SymName, + const SCEVHandle &NewVal) { + std::map::iterator SI = + Scalars.find(SCEVCallbackVH(I, this)); + if (SI == Scalars.end()) return; + + SCEVHandle NV = + SI->second->replaceSymbolicValuesWithConcrete(SymName, NewVal, *this); + if (NV == SI->second) return; // No change. + + SI->second = NV; // Update the scalars map! + + // Any instruction values that use this instruction might also need to be + // updated! + for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); + UI != E; ++UI) + ReplaceSymbolicValueWithConcrete(cast(*UI), SymName, NewVal); +} + +/// createNodeForPHI - PHI nodes have two cases. Either the PHI node exists in +/// a loop header, making it a potential recurrence, or it doesn't. +/// +SCEVHandle ScalarEvolution::createNodeForPHI(PHINode *PN) { + if (PN->getNumIncomingValues() == 2) // The loops have been canonicalized. + if (const Loop *L = LI->getLoopFor(PN->getParent())) + if (L->getHeader() == PN->getParent()) { + // If it lives in the loop header, it has two incoming values, one + // from outside the loop, and one from inside. + unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0)); + unsigned BackEdge = IncomingEdge^1; + + // While we are analyzing this PHI node, handle its value symbolically. + SCEVHandle SymbolicName = getUnknown(PN); + assert(Scalars.find(PN) == Scalars.end() && + "PHI node already processed?"); + Scalars.insert(std::make_pair(SCEVCallbackVH(PN, this), SymbolicName)); + + // Using this symbolic name for the PHI, analyze the value coming around + // the back-edge. + SCEVHandle BEValue = getSCEV(PN->getIncomingValue(BackEdge)); + + // NOTE: If BEValue is loop invariant, we know that the PHI node just + // has a special value for the first iteration of the loop. + + // If the value coming around the backedge is an add with the symbolic + // value we just inserted, then we found a simple induction variable! + if (const SCEVAddExpr *Add = dyn_cast(BEValue)) { + // If there is a single occurrence of the symbolic value, replace it + // with a recurrence. + unsigned FoundIndex = Add->getNumOperands(); + for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i) + if (Add->getOperand(i) == SymbolicName) + if (FoundIndex == e) { + FoundIndex = i; + break; + } + + if (FoundIndex != Add->getNumOperands()) { + // Create an add with everything but the specified operand. + std::vector Ops; + for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i) + if (i != FoundIndex) + Ops.push_back(Add->getOperand(i)); + SCEVHandle Accum = getAddExpr(Ops); + + // This is not a valid addrec if the step amount is varying each + // loop iteration, but is not itself an addrec in this loop. + if (Accum->isLoopInvariant(L) || + (isa(Accum) && + cast(Accum)->getLoop() == L)) { + SCEVHandle StartVal = getSCEV(PN->getIncomingValue(IncomingEdge)); + SCEVHandle PHISCEV = getAddRecExpr(StartVal, Accum, L); + + // Okay, for the entire analysis of this edge we assumed the PHI + // to be symbolic. We now need to go back and update all of the + // entries for the scalars that use the PHI (except for the PHI + // itself) to use the new analyzed value instead of the "symbolic" + // value. + ReplaceSymbolicValueWithConcrete(PN, SymbolicName, PHISCEV); + return PHISCEV; + } + } + } else if (const SCEVAddRecExpr *AddRec = + dyn_cast(BEValue)) { + // Otherwise, this could be a loop like this: + // i = 0; for (j = 1; ..; ++j) { .... i = j; } + // In this case, j = {1,+,1} and BEValue is j. + // Because the other in-value of i (0) fits the evolution of BEValue + // i really is an addrec evolution. + if (AddRec->getLoop() == L && AddRec->isAffine()) { + SCEVHandle StartVal = getSCEV(PN->getIncomingValue(IncomingEdge)); + + // If StartVal = j.start - j.stride, we can use StartVal as the + // initial step of the addrec evolution. + if (StartVal == getMinusSCEV(AddRec->getOperand(0), + AddRec->getOperand(1))) { + SCEVHandle PHISCEV = + getAddRecExpr(StartVal, AddRec->getOperand(1), L); + + // Okay, for the entire analysis of this edge we assumed the PHI + // to be symbolic. We now need to go back and update all of the + // entries for the scalars that use the PHI (except for the PHI + // itself) to use the new analyzed value instead of the "symbolic" + // value. + ReplaceSymbolicValueWithConcrete(PN, SymbolicName, PHISCEV); + return PHISCEV; + } + } + } + + return SymbolicName; + } + + // If it's not a loop phi, we can't handle it yet. + return getUnknown(PN); +} + +/// createNodeForGEP - Expand GEP instructions into add and multiply +/// operations. This allows them to be analyzed by regular SCEV code. +/// +SCEVHandle ScalarEvolution::createNodeForGEP(User *GEP) { + + const Type *IntPtrTy = TD->getIntPtrType(); + Value *Base = GEP->getOperand(0); + // Don't attempt to analyze GEPs over unsized objects. + if (!cast(Base->getType())->getElementType()->isSized()) + return getUnknown(GEP); + SCEVHandle TotalOffset = getIntegerSCEV(0, IntPtrTy); + gep_type_iterator GTI = gep_type_begin(GEP); + for (GetElementPtrInst::op_iterator I = next(GEP->op_begin()), + E = GEP->op_end(); + I != E; ++I) { + Value *Index = *I; + // Compute the (potentially symbolic) offset in bytes for this index. + if (const StructType *STy = dyn_cast(*GTI++)) { + // For a struct, add the member offset. + const StructLayout &SL = *TD->getStructLayout(STy); + unsigned FieldNo = cast(Index)->getZExtValue(); + uint64_t Offset = SL.getElementOffset(FieldNo); + TotalOffset = getAddExpr(TotalOffset, + getIntegerSCEV(Offset, IntPtrTy)); + } else { + // For an array, add the element offset, explicitly scaled. + SCEVHandle LocalOffset = getSCEV(Index); + if (!isa(LocalOffset->getType())) + // Getelementptr indicies are signed. + LocalOffset = getTruncateOrSignExtend(LocalOffset, + IntPtrTy); + LocalOffset = + getMulExpr(LocalOffset, + getIntegerSCEV(TD->getTypeAllocSize(*GTI), + IntPtrTy)); + TotalOffset = getAddExpr(TotalOffset, LocalOffset); + } + } + return getAddExpr(getSCEV(Base), TotalOffset); +} + +/// GetMinTrailingZeros - Determine the minimum number of zero bits that S is +/// guaranteed to end in (at every loop iteration). It is, at the same time, +/// the minimum number of times S is divisible by 2. For example, given {4,+,8} +/// it returns 2. If S is guaranteed to be 0, it returns the bitwidth of S. +static uint32_t GetMinTrailingZeros(SCEVHandle S, const ScalarEvolution &SE) { + if (const SCEVConstant *C = dyn_cast(S)) + return C->getValue()->getValue().countTrailingZeros(); + + if (const SCEVTruncateExpr *T = dyn_cast(S)) + return std::min(GetMinTrailingZeros(T->getOperand(), SE), + (uint32_t)SE.getTypeSizeInBits(T->getType())); + + if (const SCEVZeroExtendExpr *E = dyn_cast(S)) { + uint32_t OpRes = GetMinTrailingZeros(E->getOperand(), SE); + return OpRes == SE.getTypeSizeInBits(E->getOperand()->getType()) ? + SE.getTypeSizeInBits(E->getType()) : OpRes; + } + + if (const SCEVSignExtendExpr *E = dyn_cast(S)) { + uint32_t OpRes = GetMinTrailingZeros(E->getOperand(), SE); + return OpRes == SE.getTypeSizeInBits(E->getOperand()->getType()) ? + SE.getTypeSizeInBits(E->getType()) : OpRes; + } + + if (const SCEVAddExpr *A = dyn_cast(S)) { + // The result is the min of all operands results. + uint32_t MinOpRes = GetMinTrailingZeros(A->getOperand(0), SE); + for (unsigned i = 1, e = A->getNumOperands(); MinOpRes && i != e; ++i) + MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(A->getOperand(i), SE)); + return MinOpRes; + } + + if (const SCEVMulExpr *M = dyn_cast(S)) { + // The result is the sum of all operands results. + uint32_t SumOpRes = GetMinTrailingZeros(M->getOperand(0), SE); + uint32_t BitWidth = SE.getTypeSizeInBits(M->getType()); + for (unsigned i = 1, e = M->getNumOperands(); + SumOpRes != BitWidth && i != e; ++i) + SumOpRes = std::min(SumOpRes + GetMinTrailingZeros(M->getOperand(i), SE), + BitWidth); + return SumOpRes; + } + + if (const SCEVAddRecExpr *A = dyn_cast(S)) { + // The result is the min of all operands results. + uint32_t MinOpRes = GetMinTrailingZeros(A->getOperand(0), SE); + for (unsigned i = 1, e = A->getNumOperands(); MinOpRes && i != e; ++i) + MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(A->getOperand(i), SE)); + return MinOpRes; + } + + if (const SCEVSMaxExpr *M = dyn_cast(S)) { + // The result is the min of all operands results. + uint32_t MinOpRes = GetMinTrailingZeros(M->getOperand(0), SE); + for (unsigned i = 1, e = M->getNumOperands(); MinOpRes && i != e; ++i) + MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(M->getOperand(i), SE)); + return MinOpRes; + } + + if (const SCEVUMaxExpr *M = dyn_cast(S)) { + // The result is the min of all operands results. + uint32_t MinOpRes = GetMinTrailingZeros(M->getOperand(0), SE); + for (unsigned i = 1, e = M->getNumOperands(); MinOpRes && i != e; ++i) + MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(M->getOperand(i), SE)); + return MinOpRes; + } + + // SCEVUDivExpr, SCEVUnknown + return 0; +} + +/// createSCEV - We know that there is no SCEV for the specified value. +/// Analyze the expression. +/// +SCEVHandle ScalarEvolution::createSCEV(Value *V) { + if (!isSCEVable(V->getType())) + return getUnknown(V); + + unsigned Opcode = Instruction::UserOp1; + if (Instruction *I = dyn_cast(V)) + Opcode = I->getOpcode(); + else if (ConstantExpr *CE = dyn_cast(V)) + Opcode = CE->getOpcode(); + else + return getUnknown(V); + + User *U = cast(V); + switch (Opcode) { + case Instruction::Add: + return getAddExpr(getSCEV(U->getOperand(0)), + getSCEV(U->getOperand(1))); + case Instruction::Mul: + return getMulExpr(getSCEV(U->getOperand(0)), + getSCEV(U->getOperand(1))); + case Instruction::UDiv: + return getUDivExpr(getSCEV(U->getOperand(0)), + getSCEV(U->getOperand(1))); + case Instruction::Sub: + return getMinusSCEV(getSCEV(U->getOperand(0)), + getSCEV(U->getOperand(1))); + case Instruction::And: + // For an expression like x&255 that merely masks off the high bits, + // use zext(trunc(x)) as the SCEV expression. + if (ConstantInt *CI = dyn_cast(U->getOperand(1))) { + if (CI->isNullValue()) + return getSCEV(U->getOperand(1)); + if (CI->isAllOnesValue()) + return getSCEV(U->getOperand(0)); + const APInt &A = CI->getValue(); + unsigned Ones = A.countTrailingOnes(); + if (APIntOps::isMask(Ones, A)) + return + getZeroExtendExpr(getTruncateExpr(getSCEV(U->getOperand(0)), + IntegerType::get(Ones)), + U->getType()); + } + break; + case Instruction::Or: + // If the RHS of the Or is a constant, we may have something like: + // X*4+1 which got turned into X*4|1. Handle this as an Add so loop + // optimizations will transparently handle this case. + // + // In order for this transformation to be safe, the LHS must be of the + // form X*(2^n) and the Or constant must be less than 2^n. + if (ConstantInt *CI = dyn_cast(U->getOperand(1))) { + SCEVHandle LHS = getSCEV(U->getOperand(0)); + const APInt &CIVal = CI->getValue(); + if (GetMinTrailingZeros(LHS, *this) >= + (CIVal.getBitWidth() - CIVal.countLeadingZeros())) + return getAddExpr(LHS, getSCEV(U->getOperand(1))); + } + break; + case Instruction::Xor: + if (ConstantInt *CI = dyn_cast(U->getOperand(1))) { + // If the RHS of the xor is a signbit, then this is just an add. + // Instcombine turns add of signbit into xor as a strength reduction step. + if (CI->getValue().isSignBit()) + return getAddExpr(getSCEV(U->getOperand(0)), + getSCEV(U->getOperand(1))); + + // If the RHS of xor is -1, then this is a not operation. + if (CI->isAllOnesValue()) + return getNotSCEV(getSCEV(U->getOperand(0))); + + // Model xor(and(x, C), C) as and(~x, C), if C is a low-bits mask. + // This is a variant of the check for xor with -1, and it handles + // the case where instcombine has trimmed non-demanded bits out + // of an xor with -1. + if (BinaryOperator *BO = dyn_cast(U->getOperand(0))) + if (ConstantInt *LCI = dyn_cast(BO->getOperand(1))) + if (BO->getOpcode() == Instruction::And && + LCI->getValue() == CI->getValue()) + if (const SCEVZeroExtendExpr *Z = + dyn_cast(getSCEV(U->getOperand(0)))) + return getZeroExtendExpr(getNotSCEV(Z->getOperand()), + U->getType()); + } + break; + + case Instruction::Shl: + // Turn shift left of a constant amount into a multiply. + if (ConstantInt *SA = dyn_cast(U->getOperand(1))) { + uint32_t BitWidth = cast(V->getType())->getBitWidth(); + Constant *X = ConstantInt::get( + APInt(BitWidth, 1).shl(SA->getLimitedValue(BitWidth))); + return getMulExpr(getSCEV(U->getOperand(0)), getSCEV(X)); + } + break; + + case Instruction::LShr: + // Turn logical shift right of a constant into a unsigned divide. + if (ConstantInt *SA = dyn_cast(U->getOperand(1))) { + uint32_t BitWidth = cast(V->getType())->getBitWidth(); + Constant *X = ConstantInt::get( + APInt(BitWidth, 1).shl(SA->getLimitedValue(BitWidth))); + return getUDivExpr(getSCEV(U->getOperand(0)), getSCEV(X)); + } + break; + + case Instruction::AShr: + // For a two-shift sext-inreg, use sext(trunc(x)) as the SCEV expression. + if (ConstantInt *CI = dyn_cast(U->getOperand(1))) + if (Instruction *L = dyn_cast(U->getOperand(0))) + if (L->getOpcode() == Instruction::Shl && + L->getOperand(1) == U->getOperand(1)) { + unsigned BitWidth = getTypeSizeInBits(U->getType()); + uint64_t Amt = BitWidth - CI->getZExtValue(); + if (Amt == BitWidth) + return getSCEV(L->getOperand(0)); // shift by zero --> noop + if (Amt > BitWidth) + return getIntegerSCEV(0, U->getType()); // value is undefined + return + getSignExtendExpr(getTruncateExpr(getSCEV(L->getOperand(0)), + IntegerType::get(Amt)), + U->getType()); + } + break; + + case Instruction::Trunc: + return getTruncateExpr(getSCEV(U->getOperand(0)), U->getType()); + + case Instruction::ZExt: + return getZeroExtendExpr(getSCEV(U->getOperand(0)), U->getType()); + + case Instruction::SExt: + return getSignExtendExpr(getSCEV(U->getOperand(0)), U->getType()); + + case Instruction::BitCast: + // BitCasts are no-op casts so we just eliminate the cast. + if (isSCEVable(U->getType()) && isSCEVable(U->getOperand(0)->getType())) + return getSCEV(U->getOperand(0)); + break; + + case Instruction::IntToPtr: + if (!TD) break; // Without TD we can't analyze pointers. + return getTruncateOrZeroExtend(getSCEV(U->getOperand(0)), + TD->getIntPtrType()); + + case Instruction::PtrToInt: + if (!TD) break; // Without TD we can't analyze pointers. + return getTruncateOrZeroExtend(getSCEV(U->getOperand(0)), + U->getType()); + + case Instruction::GetElementPtr: + if (!TD) break; // Without TD we can't analyze pointers. + return createNodeForGEP(U); + + case Instruction::PHI: + return createNodeForPHI(cast(U)); + + case Instruction::Select: + // This could be a smax or umax that was lowered earlier. + // Try to recover it. + if (ICmpInst *ICI = dyn_cast(U->getOperand(0))) { + Value *LHS = ICI->getOperand(0); + Value *RHS = ICI->getOperand(1); + switch (ICI->getPredicate()) { + case ICmpInst::ICMP_SLT: + case ICmpInst::ICMP_SLE: + std::swap(LHS, RHS); + // fall through + case ICmpInst::ICMP_SGT: + case ICmpInst::ICMP_SGE: + if (LHS == U->getOperand(1) && RHS == U->getOperand(2)) + return getSMaxExpr(getSCEV(LHS), getSCEV(RHS)); + else if (LHS == U->getOperand(2) && RHS == U->getOperand(1)) + // ~smax(~x, ~y) == smin(x, y). + return getNotSCEV(getSMaxExpr( + getNotSCEV(getSCEV(LHS)), + getNotSCEV(getSCEV(RHS)))); + break; + case ICmpInst::ICMP_ULT: + case ICmpInst::ICMP_ULE: + std::swap(LHS, RHS); + // fall through + case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_UGE: + if (LHS == U->getOperand(1) && RHS == U->getOperand(2)) + return getUMaxExpr(getSCEV(LHS), getSCEV(RHS)); + else if (LHS == U->getOperand(2) && RHS == U->getOperand(1)) + // ~umax(~x, ~y) == umin(x, y) + return getNotSCEV(getUMaxExpr(getNotSCEV(getSCEV(LHS)), + getNotSCEV(getSCEV(RHS)))); + break; + default: + break; + } + } + + default: // We cannot analyze this expression. + break; + } + + return getUnknown(V); +} + + + +//===----------------------------------------------------------------------===// +// Iteration Count Computation Code +// + +/// getBackedgeTakenCount - If the specified loop has a predictable +/// backedge-taken count, return it, otherwise return a SCEVCouldNotCompute +/// object. The backedge-taken count is the number of times the loop header +/// will be branched to from within the loop. This is one less than the +/// trip count of the loop, since it doesn't count the first iteration, +/// when the header is branched to from outside the loop. +/// +/// Note that it is not valid to call this method on a loop without a +/// loop-invariant backedge-taken count (see +/// hasLoopInvariantBackedgeTakenCount). +/// +SCEVHandle ScalarEvolution::getBackedgeTakenCount(const Loop *L) { + return getBackedgeTakenInfo(L).Exact; +} + +/// getMaxBackedgeTakenCount - Similar to getBackedgeTakenCount, except +/// return the least SCEV value that is known never to be less than the +/// actual backedge taken count. +SCEVHandle ScalarEvolution::getMaxBackedgeTakenCount(const Loop *L) { + return getBackedgeTakenInfo(L).Max; +} + +const ScalarEvolution::BackedgeTakenInfo & +ScalarEvolution::getBackedgeTakenInfo(const Loop *L) { + // Initially insert a CouldNotCompute for this loop. If the insertion + // succeeds, procede to actually compute a backedge-taken count and + // update the value. The temporary CouldNotCompute value tells SCEV + // code elsewhere that it shouldn't attempt to request a new + // backedge-taken count, which could result in infinite recursion. + std::pair::iterator, bool> Pair = + BackedgeTakenCounts.insert(std::make_pair(L, getCouldNotCompute())); + if (Pair.second) { + BackedgeTakenInfo ItCount = ComputeBackedgeTakenCount(L); + if (ItCount.Exact != UnknownValue) { + assert(ItCount.Exact->isLoopInvariant(L) && + ItCount.Max->isLoopInvariant(L) && + "Computed trip count isn't loop invariant for loop!"); + ++NumTripCountsComputed; + + // Update the value in the map. + Pair.first->second = ItCount; + } else if (isa(L->getHeader()->begin())) { + // Only count loops that have phi nodes as not being computable. + ++NumTripCountsNotComputed; + } + + // Now that we know more about the trip count for this loop, forget any + // existing SCEV values for PHI nodes in this loop since they are only + // conservative estimates made without the benefit + // of trip count information. + if (ItCount.hasAnyInfo()) + forgetLoopPHIs(L); + } + return Pair.first->second; +} + +/// forgetLoopBackedgeTakenCount - This method should be called by the +/// client when it has changed a loop in a way that may effect +/// ScalarEvolution's ability to compute a trip count, or if the loop +/// is deleted. +void ScalarEvolution::forgetLoopBackedgeTakenCount(const Loop *L) { + BackedgeTakenCounts.erase(L); + forgetLoopPHIs(L); +} + +/// forgetLoopPHIs - Delete the memoized SCEVs associated with the +/// PHI nodes in the given loop. This is used when the trip count of +/// the loop may have changed. +void ScalarEvolution::forgetLoopPHIs(const Loop *L) { + BasicBlock *Header = L->getHeader(); + + // Push all Loop-header PHIs onto the Worklist stack, except those + // that are presently represented via a SCEVUnknown. SCEVUnknown for + // a PHI either means that it has an unrecognized structure, or it's + // a PHI that's in the progress of being computed by createNodeForPHI. + // In the former case, additional loop trip count information isn't + // going to change anything. In the later case, createNodeForPHI will + // perform the necessary updates on its own when it gets to that point. + SmallVector Worklist; + for (BasicBlock::iterator I = Header->begin(); + PHINode *PN = dyn_cast(I); ++I) { + std::map::iterator It = Scalars.find((Value*)I); + if (It != Scalars.end() && !isa(It->second)) + Worklist.push_back(PN); + } + + while (!Worklist.empty()) { + Instruction *I = Worklist.pop_back_val(); + if (Scalars.erase(I)) + for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); + UI != UE; ++UI) + Worklist.push_back(cast(UI)); + } +} + +/// ComputeBackedgeTakenCount - Compute the number of times the backedge +/// of the specified loop will execute. +ScalarEvolution::BackedgeTakenInfo +ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) { + // If the loop has a non-one exit block count, we can't analyze it. + SmallVector ExitBlocks; + L->getExitBlocks(ExitBlocks); + if (ExitBlocks.size() != 1) return UnknownValue; + + // Okay, there is one exit block. Try to find the condition that causes the + // loop to be exited. + BasicBlock *ExitBlock = ExitBlocks[0]; + + BasicBlock *ExitingBlock = 0; + for (pred_iterator PI = pred_begin(ExitBlock), E = pred_end(ExitBlock); + PI != E; ++PI) + if (L->contains(*PI)) { + if (ExitingBlock == 0) + ExitingBlock = *PI; + else + return UnknownValue; // More than one block exiting! + } + assert(ExitingBlock && "No exits from loop, something is broken!"); + + // Okay, we've computed the exiting block. See what condition causes us to + // exit. + // + // FIXME: we should be able to handle switch instructions (with a single exit) + BranchInst *ExitBr = dyn_cast(ExitingBlock->getTerminator()); + if (ExitBr == 0) return UnknownValue; + assert(ExitBr->isConditional() && "If unconditional, it can't be in loop!"); + + // At this point, we know we have a conditional branch that determines whether + // the loop is exited. However, we don't know if the branch is executed each + // time through the loop. If not, then the execution count of the branch will + // not be equal to the trip count of the loop. + // + // Currently we check for this by checking to see if the Exit branch goes to + // the loop header. If so, we know it will always execute the same number of + // times as the loop. We also handle the case where the exit block *is* the + // loop header. This is common for un-rotated loops. More extensive analysis + // could be done to handle more cases here. + if (ExitBr->getSuccessor(0) != L->getHeader() && + ExitBr->getSuccessor(1) != L->getHeader() && + ExitBr->getParent() != L->getHeader()) + return UnknownValue; + + ICmpInst *ExitCond = dyn_cast(ExitBr->getCondition()); + + // If it's not an integer or pointer comparison then compute it the hard way. + if (ExitCond == 0) + return ComputeBackedgeTakenCountExhaustively(L, ExitBr->getCondition(), + ExitBr->getSuccessor(0) == ExitBlock); + + // If the condition was exit on true, convert the condition to exit on false + ICmpInst::Predicate Cond; + if (ExitBr->getSuccessor(1) == ExitBlock) + Cond = ExitCond->getPredicate(); + else + Cond = ExitCond->getInversePredicate(); + + // Handle common loops like: for (X = "string"; *X; ++X) + if (LoadInst *LI = dyn_cast(ExitCond->getOperand(0))) + if (Constant *RHS = dyn_cast(ExitCond->getOperand(1))) { + SCEVHandle ItCnt = + ComputeLoadConstantCompareBackedgeTakenCount(LI, RHS, L, Cond); + if (!isa(ItCnt)) return ItCnt; + } + + SCEVHandle LHS = getSCEV(ExitCond->getOperand(0)); + SCEVHandle RHS = getSCEV(ExitCond->getOperand(1)); + + // Try to evaluate any dependencies out of the loop. + LHS = getSCEVAtScope(LHS, L); + RHS = getSCEVAtScope(RHS, L); + + // At this point, we would like to compute how many iterations of the + // loop the predicate will return true for these inputs. + if (LHS->isLoopInvariant(L) && !RHS->isLoopInvariant(L)) { + // If there is a loop-invariant, force it into the RHS. + std::swap(LHS, RHS); + Cond = ICmpInst::getSwappedPredicate(Cond); + } + + // If we have a comparison of a chrec against a constant, try to use value + // ranges to answer this query. + if (const SCEVConstant *RHSC = dyn_cast(RHS)) + if (const SCEVAddRecExpr *AddRec = dyn_cast(LHS)) + if (AddRec->getLoop() == L) { + // Form the constant range. + ConstantRange CompRange( + ICmpInst::makeConstantRange(Cond, RHSC->getValue()->getValue())); + + SCEVHandle Ret = AddRec->getNumIterationsInRange(CompRange, *this); + if (!isa(Ret)) return Ret; + } + + switch (Cond) { + case ICmpInst::ICMP_NE: { // while (X != Y) + // Convert to: while (X-Y != 0) + SCEVHandle TC = HowFarToZero(getMinusSCEV(LHS, RHS), L); + if (!isa(TC)) return TC; + break; + } + case ICmpInst::ICMP_EQ: { + // Convert to: while (X-Y == 0) // while (X == Y) + SCEVHandle TC = HowFarToNonZero(getMinusSCEV(LHS, RHS), L); + if (!isa(TC)) return TC; + break; + } + case ICmpInst::ICMP_SLT: { + BackedgeTakenInfo BTI = HowManyLessThans(LHS, RHS, L, true); + if (BTI.hasAnyInfo()) return BTI; + break; + } + case ICmpInst::ICMP_SGT: { + BackedgeTakenInfo BTI = HowManyLessThans(getNotSCEV(LHS), + getNotSCEV(RHS), L, true); + if (BTI.hasAnyInfo()) return BTI; + break; + } + case ICmpInst::ICMP_ULT: { + BackedgeTakenInfo BTI = HowManyLessThans(LHS, RHS, L, false); + if (BTI.hasAnyInfo()) return BTI; + break; + } + case ICmpInst::ICMP_UGT: { + BackedgeTakenInfo BTI = HowManyLessThans(getNotSCEV(LHS), + getNotSCEV(RHS), L, false); + if (BTI.hasAnyInfo()) return BTI; + break; + } + default: +#if 0 + errs() << "ComputeBackedgeTakenCount "; + if (ExitCond->getOperand(0)->getType()->isUnsigned()) + errs() << "[unsigned] "; + errs() << *LHS << " " + << Instruction::getOpcodeName(Instruction::ICmp) + << " " << *RHS << "\n"; +#endif + break; + } + return + ComputeBackedgeTakenCountExhaustively(L, ExitCond, + ExitBr->getSuccessor(0) == ExitBlock); +} + +static ConstantInt * +EvaluateConstantChrecAtConstant(const SCEVAddRecExpr *AddRec, ConstantInt *C, + ScalarEvolution &SE) { + SCEVHandle InVal = SE.getConstant(C); + SCEVHandle Val = AddRec->evaluateAtIteration(InVal, SE); + assert(isa(Val) && + "Evaluation of SCEV at constant didn't fold correctly?"); + return cast(Val)->getValue(); +} + +/// GetAddressedElementFromGlobal - Given a global variable with an initializer +/// and a GEP expression (missing the pointer index) indexing into it, return +/// the addressed element of the initializer or null if the index expression is +/// invalid. +static Constant * +GetAddressedElementFromGlobal(GlobalVariable *GV, + const std::vector &Indices) { + Constant *Init = GV->getInitializer(); + for (unsigned i = 0, e = Indices.size(); i != e; ++i) { + uint64_t Idx = Indices[i]->getZExtValue(); + if (ConstantStruct *CS = dyn_cast(Init)) { + assert(Idx < CS->getNumOperands() && "Bad struct index!"); + Init = cast(CS->getOperand(Idx)); + } else if (ConstantArray *CA = dyn_cast(Init)) { + if (Idx >= CA->getNumOperands()) return 0; // Bogus program + Init = cast(CA->getOperand(Idx)); + } else if (isa(Init)) { + if (const StructType *STy = dyn_cast(Init->getType())) { + assert(Idx < STy->getNumElements() && "Bad struct index!"); + Init = Constant::getNullValue(STy->getElementType(Idx)); + } else if (const ArrayType *ATy = dyn_cast(Init->getType())) { + if (Idx >= ATy->getNumElements()) return 0; // Bogus program + Init = Constant::getNullValue(ATy->getElementType()); + } else { + assert(0 && "Unknown constant aggregate type!"); + } + return 0; + } else { + return 0; // Unknown initializer type + } + } + return Init; +} + +/// ComputeLoadConstantCompareBackedgeTakenCount - Given an exit condition of +/// 'icmp op load X, cst', try to see if we can compute the backedge +/// execution count. +SCEVHandle ScalarEvolution:: +ComputeLoadConstantCompareBackedgeTakenCount(LoadInst *LI, Constant *RHS, + const Loop *L, + ICmpInst::Predicate predicate) { + if (LI->isVolatile()) return UnknownValue; + + // Check to see if the loaded pointer is a getelementptr of a global. + GetElementPtrInst *GEP = dyn_cast(LI->getOperand(0)); + if (!GEP) return UnknownValue; + + // Make sure that it is really a constant global we are gepping, with an + // initializer, and make sure the first IDX is really 0. + GlobalVariable *GV = dyn_cast(GEP->getOperand(0)); + if (!GV || !GV->isConstant() || !GV->hasInitializer() || + GEP->getNumOperands() < 3 || !isa(GEP->getOperand(1)) || + !cast(GEP->getOperand(1))->isNullValue()) + return UnknownValue; + + // Okay, we allow one non-constant index into the GEP instruction. + Value *VarIdx = 0; + std::vector Indexes; + unsigned VarIdxNum = 0; + for (unsigned i = 2, e = GEP->getNumOperands(); i != e; ++i) + if (ConstantInt *CI = dyn_cast(GEP->getOperand(i))) { + Indexes.push_back(CI); + } else if (!isa(GEP->getOperand(i))) { + if (VarIdx) return UnknownValue; // Multiple non-constant idx's. + VarIdx = GEP->getOperand(i); + VarIdxNum = i-2; + Indexes.push_back(0); + } + + // Okay, we know we have a (load (gep GV, 0, X)) comparison with a constant. + // Check to see if X is a loop variant variable value now. + SCEVHandle Idx = getSCEV(VarIdx); + Idx = getSCEVAtScope(Idx, L); + + // We can only recognize very limited forms of loop index expressions, in + // particular, only affine AddRec's like {C1,+,C2}. + const SCEVAddRecExpr *IdxExpr = dyn_cast(Idx); + if (!IdxExpr || !IdxExpr->isAffine() || IdxExpr->isLoopInvariant(L) || + !isa(IdxExpr->getOperand(0)) || + !isa(IdxExpr->getOperand(1))) + return UnknownValue; + + unsigned MaxSteps = MaxBruteForceIterations; + for (unsigned IterationNum = 0; IterationNum != MaxSteps; ++IterationNum) { + ConstantInt *ItCst = + ConstantInt::get(IdxExpr->getType(), IterationNum); + ConstantInt *Val = EvaluateConstantChrecAtConstant(IdxExpr, ItCst, *this); + + // Form the GEP offset. + Indexes[VarIdxNum] = Val; + + Constant *Result = GetAddressedElementFromGlobal(GV, Indexes); + if (Result == 0) break; // Cannot compute! + + // Evaluate the condition for this iteration. + Result = ConstantExpr::getICmp(predicate, Result, RHS); + if (!isa(Result)) break; // Couldn't decide for sure + if (cast(Result)->getValue().isMinValue()) { +#if 0 + errs() << "\n***\n*** Computed loop count " << *ItCst + << "\n*** From global " << *GV << "*** BB: " << *L->getHeader() + << "***\n"; +#endif + ++NumArrayLenItCounts; + return getConstant(ItCst); // Found terminating iteration! + } + } + return UnknownValue; +} + + +/// CanConstantFold - Return true if we can constant fold an instruction of the +/// specified type, assuming that all operands were constants. +static bool CanConstantFold(const Instruction *I) { + if (isa(I) || isa(I) || + isa(I) || isa(I) || isa(I)) + return true; + + if (const CallInst *CI = dyn_cast(I)) + if (const Function *F = CI->getCalledFunction()) + return canConstantFoldCallTo(F); + return false; +} + +/// getConstantEvolvingPHI - Given an LLVM value and a loop, return a PHI node +/// in the loop that V is derived from. We allow arbitrary operations along the +/// way, but the operands of an operation must either be constants or a value +/// derived from a constant PHI. If this expression does not fit with these +/// constraints, return null. +static PHINode *getConstantEvolvingPHI(Value *V, const Loop *L) { + // If this is not an instruction, or if this is an instruction outside of the + // loop, it can't be derived from a loop PHI. + Instruction *I = dyn_cast(V); + if (I == 0 || !L->contains(I->getParent())) return 0; + + if (PHINode *PN = dyn_cast(I)) { + if (L->getHeader() == I->getParent()) + return PN; + else + // We don't currently keep track of the control flow needed to evaluate + // PHIs, so we cannot handle PHIs inside of loops. + return 0; + } + + // If we won't be able to constant fold this expression even if the operands + // are constants, return early. + if (!CanConstantFold(I)) return 0; + + // Otherwise, we can evaluate this instruction if all of its operands are + // constant or derived from a PHI node themselves. + PHINode *PHI = 0; + for (unsigned Op = 0, e = I->getNumOperands(); Op != e; ++Op) + if (!(isa(I->getOperand(Op)) || + isa(I->getOperand(Op)))) { + PHINode *P = getConstantEvolvingPHI(I->getOperand(Op), L); + if (P == 0) return 0; // Not evolving from PHI + if (PHI == 0) + PHI = P; + else if (PHI != P) + return 0; // Evolving from multiple different PHIs. + } + + // This is a expression evolving from a constant PHI! + return PHI; +} + +/// EvaluateExpression - Given an expression that passes the +/// getConstantEvolvingPHI predicate, evaluate its value assuming the PHI node +/// in the loop has the value PHIVal. If we can't fold this expression for some +/// reason, return null. +static Constant *EvaluateExpression(Value *V, Constant *PHIVal) { + if (isa(V)) return PHIVal; + if (Constant *C = dyn_cast(V)) return C; + if (GlobalValue *GV = dyn_cast(V)) return GV; + Instruction *I = cast(V); + + std::vector Operands; + Operands.resize(I->getNumOperands()); + + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { + Operands[i] = EvaluateExpression(I->getOperand(i), PHIVal); + if (Operands[i] == 0) return 0; + } + + if (const CmpInst *CI = dyn_cast(I)) + return ConstantFoldCompareInstOperands(CI->getPredicate(), + &Operands[0], Operands.size()); + else + return ConstantFoldInstOperands(I->getOpcode(), I->getType(), + &Operands[0], Operands.size()); +} + +/// getConstantEvolutionLoopExitValue - If we know that the specified Phi is +/// in the header of its containing loop, we know the loop executes a +/// constant number of times, and the PHI node is just a recurrence +/// involving constants, fold it. +Constant *ScalarEvolution:: +getConstantEvolutionLoopExitValue(PHINode *PN, const APInt& BEs, const Loop *L){ + std::map::iterator I = + ConstantEvolutionLoopExitValue.find(PN); + if (I != ConstantEvolutionLoopExitValue.end()) + return I->second; + + if (BEs.ugt(APInt(BEs.getBitWidth(),MaxBruteForceIterations))) + return ConstantEvolutionLoopExitValue[PN] = 0; // Not going to evaluate it. + + Constant *&RetVal = ConstantEvolutionLoopExitValue[PN]; + + // Since the loop is canonicalized, the PHI node must have two entries. One + // entry must be a constant (coming in from outside of the loop), and the + // second must be derived from the same PHI. + bool SecondIsBackedge = L->contains(PN->getIncomingBlock(1)); + Constant *StartCST = + dyn_cast(PN->getIncomingValue(!SecondIsBackedge)); + if (StartCST == 0) + return RetVal = 0; // Must be a constant. + + Value *BEValue = PN->getIncomingValue(SecondIsBackedge); + PHINode *PN2 = getConstantEvolvingPHI(BEValue, L); + if (PN2 != PN) + return RetVal = 0; // Not derived from same PHI. + + // Execute the loop symbolically to determine the exit value. + if (BEs.getActiveBits() >= 32) + return RetVal = 0; // More than 2^32-1 iterations?? Not doing it! + + unsigned NumIterations = BEs.getZExtValue(); // must be in range + unsigned IterationNum = 0; + for (Constant *PHIVal = StartCST; ; ++IterationNum) { + if (IterationNum == NumIterations) + return RetVal = PHIVal; // Got exit value! + + // Compute the value of the PHI node for the next iteration. + Constant *NextPHI = EvaluateExpression(BEValue, PHIVal); + if (NextPHI == PHIVal) + return RetVal = NextPHI; // Stopped evolving! + if (NextPHI == 0) + return 0; // Couldn't evaluate! + PHIVal = NextPHI; + } +} + +/// ComputeBackedgeTakenCountExhaustively - If the trip is known to execute a +/// constant number of times (the condition evolves only from constants), +/// try to evaluate a few iterations of the loop until we get the exit +/// condition gets a value of ExitWhen (true or false). If we cannot +/// evaluate the trip count of the loop, return UnknownValue. +SCEVHandle ScalarEvolution:: +ComputeBackedgeTakenCountExhaustively(const Loop *L, Value *Cond, bool ExitWhen) { + PHINode *PN = getConstantEvolvingPHI(Cond, L); + if (PN == 0) return UnknownValue; + + // Since the loop is canonicalized, the PHI node must have two entries. One + // entry must be a constant (coming in from outside of the loop), and the + // second must be derived from the same PHI. + bool SecondIsBackedge = L->contains(PN->getIncomingBlock(1)); + Constant *StartCST = + dyn_cast(PN->getIncomingValue(!SecondIsBackedge)); + if (StartCST == 0) return UnknownValue; // Must be a constant. + + Value *BEValue = PN->getIncomingValue(SecondIsBackedge); + PHINode *PN2 = getConstantEvolvingPHI(BEValue, L); + if (PN2 != PN) return UnknownValue; // Not derived from same PHI. + + // Okay, we find a PHI node that defines the trip count of this loop. Execute + // the loop symbolically to determine when the condition gets a value of + // "ExitWhen". + unsigned IterationNum = 0; + unsigned MaxIterations = MaxBruteForceIterations; // Limit analysis. + for (Constant *PHIVal = StartCST; + IterationNum != MaxIterations; ++IterationNum) { + ConstantInt *CondVal = + dyn_cast_or_null(EvaluateExpression(Cond, PHIVal)); + + // Couldn't symbolically evaluate. + if (!CondVal) return UnknownValue; + + if (CondVal->getValue() == uint64_t(ExitWhen)) { + ConstantEvolutionLoopExitValue[PN] = PHIVal; + ++NumBruteForceTripCountsComputed; + return getConstant(ConstantInt::get(Type::Int32Ty, IterationNum)); + } + + // Compute the value of the PHI node for the next iteration. + Constant *NextPHI = EvaluateExpression(BEValue, PHIVal); + if (NextPHI == 0 || NextPHI == PHIVal) + return UnknownValue; // Couldn't evaluate or not making progress... + PHIVal = NextPHI; + } + + // Too many iterations were needed to evaluate. + return UnknownValue; +} + +/// getSCEVAtScope - Return a SCEV expression handle for the specified value +/// at the specified scope in the program. The L value specifies a loop +/// nest to evaluate the expression at, where null is the top-level or a +/// specified loop is immediately inside of the loop. +/// +/// This method can be used to compute the exit value for a variable defined +/// in a loop by querying what the value will hold in the parent loop. +/// +/// In the case that a relevant loop exit value cannot be computed, the +/// original value V is returned. +SCEVHandle ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) { + // FIXME: this should be turned into a virtual method on SCEV! + + if (isa(V)) return V; + + // If this instruction is evolved from a constant-evolving PHI, compute the + // exit value from the loop without using SCEVs. + if (const SCEVUnknown *SU = dyn_cast(V)) { + if (Instruction *I = dyn_cast(SU->getValue())) { + const Loop *LI = (*this->LI)[I->getParent()]; + if (LI && LI->getParentLoop() == L) // Looking for loop exit value. + if (PHINode *PN = dyn_cast(I)) + if (PN->getParent() == LI->getHeader()) { + // Okay, there is no closed form solution for the PHI node. Check + // to see if the loop that contains it has a known backedge-taken + // count. If so, we may be able to force computation of the exit + // value. + SCEVHandle BackedgeTakenCount = getBackedgeTakenCount(LI); + if (const SCEVConstant *BTCC = + dyn_cast(BackedgeTakenCount)) { + // Okay, we know how many times the containing loop executes. If + // this is a constant evolving PHI node, get the final value at + // the specified iteration number. + Constant *RV = getConstantEvolutionLoopExitValue(PN, + BTCC->getValue()->getValue(), + LI); + if (RV) return getUnknown(RV); + } + } + + // Okay, this is an expression that we cannot symbolically evaluate + // into a SCEV. Check to see if it's possible to symbolically evaluate + // the arguments into constants, and if so, try to constant propagate the + // result. This is particularly useful for computing loop exit values. + if (CanConstantFold(I)) { + // Check to see if we've folded this instruction at this loop before. + std::map &Values = ValuesAtScopes[I]; + std::pair::iterator, bool> Pair = + Values.insert(std::make_pair(L, static_cast(0))); + if (!Pair.second) + return Pair.first->second ? &*getUnknown(Pair.first->second) : V; + + std::vector Operands; + Operands.reserve(I->getNumOperands()); + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { + Value *Op = I->getOperand(i); + if (Constant *C = dyn_cast(Op)) { + Operands.push_back(C); + } else { + // If any of the operands is non-constant and if they are + // non-integer and non-pointer, don't even try to analyze them + // with scev techniques. + if (!isSCEVable(Op->getType())) + return V; + + SCEVHandle OpV = getSCEVAtScope(getSCEV(Op), L); + if (const SCEVConstant *SC = dyn_cast(OpV)) { + Constant *C = SC->getValue(); + if (C->getType() != Op->getType()) + C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false, + Op->getType(), + false), + C, Op->getType()); + Operands.push_back(C); + } else if (const SCEVUnknown *SU = dyn_cast(OpV)) { + if (Constant *C = dyn_cast(SU->getValue())) { + if (C->getType() != Op->getType()) + C = + ConstantExpr::getCast(CastInst::getCastOpcode(C, false, + Op->getType(), + false), + C, Op->getType()); + Operands.push_back(C); + } else + return V; + } else { + return V; + } + } + } + + Constant *C; + if (const CmpInst *CI = dyn_cast(I)) + C = ConstantFoldCompareInstOperands(CI->getPredicate(), + &Operands[0], Operands.size()); + else + C = ConstantFoldInstOperands(I->getOpcode(), I->getType(), + &Operands[0], Operands.size()); + Pair.first->second = C; + return getUnknown(C); + } + } + + // This is some other type of SCEVUnknown, just return it. + return V; + } + + if (const SCEVCommutativeExpr *Comm = dyn_cast(V)) { + // Avoid performing the look-up in the common case where the specified + // expression has no loop-variant portions. + for (unsigned i = 0, e = Comm->getNumOperands(); i != e; ++i) { + SCEVHandle OpAtScope = getSCEVAtScope(Comm->getOperand(i), L); + if (OpAtScope != Comm->getOperand(i)) { + // Okay, at least one of these operands is loop variant but might be + // foldable. Build a new instance of the folded commutative expression. + std::vector NewOps(Comm->op_begin(), Comm->op_begin()+i); + NewOps.push_back(OpAtScope); + + for (++i; i != e; ++i) { + OpAtScope = getSCEVAtScope(Comm->getOperand(i), L); + NewOps.push_back(OpAtScope); + } + if (isa(Comm)) + return getAddExpr(NewOps); + if (isa(Comm)) + return getMulExpr(NewOps); + if (isa(Comm)) + return getSMaxExpr(NewOps); + if (isa(Comm)) + return getUMaxExpr(NewOps); + assert(0 && "Unknown commutative SCEV type!"); + } + } + // If we got here, all operands are loop invariant. + return Comm; + } + + if (const SCEVUDivExpr *Div = dyn_cast(V)) { + SCEVHandle LHS = getSCEVAtScope(Div->getLHS(), L); + SCEVHandle RHS = getSCEVAtScope(Div->getRHS(), L); + if (LHS == Div->getLHS() && RHS == Div->getRHS()) + return Div; // must be loop invariant + return getUDivExpr(LHS, RHS); + } + + // If this is a loop recurrence for a loop that does not contain L, then we + // are dealing with the final value computed by the loop. + if (const SCEVAddRecExpr *AddRec = dyn_cast(V)) { + if (!L || !AddRec->getLoop()->contains(L->getHeader())) { + // To evaluate this recurrence, we need to know how many times the AddRec + // loop iterates. Compute this now. + SCEVHandle BackedgeTakenCount = getBackedgeTakenCount(AddRec->getLoop()); + if (BackedgeTakenCount == UnknownValue) return AddRec; + + // Then, evaluate the AddRec. + return AddRec->evaluateAtIteration(BackedgeTakenCount, *this); + } + return AddRec; + } + + if (const SCEVZeroExtendExpr *Cast = dyn_cast(V)) { + SCEVHandle Op = getSCEVAtScope(Cast->getOperand(), L); + if (Op == Cast->getOperand()) + return Cast; // must be loop invariant + return getZeroExtendExpr(Op, Cast->getType()); + } + + if (const SCEVSignExtendExpr *Cast = dyn_cast(V)) { + SCEVHandle Op = getSCEVAtScope(Cast->getOperand(), L); + if (Op == Cast->getOperand()) + return Cast; // must be loop invariant + return getSignExtendExpr(Op, Cast->getType()); + } + + if (const SCEVTruncateExpr *Cast = dyn_cast(V)) { + SCEVHandle Op = getSCEVAtScope(Cast->getOperand(), L); + if (Op == Cast->getOperand()) + return Cast; // must be loop invariant + return getTruncateExpr(Op, Cast->getType()); + } + + assert(0 && "Unknown SCEV type!"); + return 0; +} + +/// getSCEVAtScope - This is a convenience function which does +/// getSCEVAtScope(getSCEV(V), L). +SCEVHandle ScalarEvolution::getSCEVAtScope(Value *V, const Loop *L) { + return getSCEVAtScope(getSCEV(V), L); +} + +/// SolveLinEquationWithOverflow - Finds the minimum unsigned root of the +/// following equation: +/// +/// A * X = B (mod N) +/// +/// where N = 2^BW and BW is the common bit width of A and B. The signedness of +/// A and B isn't important. +/// +/// If the equation does not have a solution, SCEVCouldNotCompute is returned. +static SCEVHandle SolveLinEquationWithOverflow(const APInt &A, const APInt &B, + ScalarEvolution &SE) { + uint32_t BW = A.getBitWidth(); + assert(BW == B.getBitWidth() && "Bit widths must be the same."); + assert(A != 0 && "A must be non-zero."); + + // 1. D = gcd(A, N) + // + // The gcd of A and N may have only one prime factor: 2. The number of + // trailing zeros in A is its multiplicity + uint32_t Mult2 = A.countTrailingZeros(); + // D = 2^Mult2 + + // 2. Check if B is divisible by D. + // + // B is divisible by D if and only if the multiplicity of prime factor 2 for B + // is not less than multiplicity of this prime factor for D. + if (B.countTrailingZeros() < Mult2) + return SE.getCouldNotCompute(); + + // 3. Compute I: the multiplicative inverse of (A / D) in arithmetic + // modulo (N / D). + // + // (N / D) may need BW+1 bits in its representation. Hence, we'll use this + // bit width during computations. + APInt AD = A.lshr(Mult2).zext(BW + 1); // AD = A / D + APInt Mod(BW + 1, 0); + Mod.set(BW - Mult2); // Mod = N / D + APInt I = AD.multiplicativeInverse(Mod); + + // 4. Compute the minimum unsigned root of the equation: + // I * (B / D) mod (N / D) + APInt Result = (I * B.lshr(Mult2).zext(BW + 1)).urem(Mod); + + // The result is guaranteed to be less than 2^BW so we may truncate it to BW + // bits. + return SE.getConstant(Result.trunc(BW)); +} + +/// SolveQuadraticEquation - Find the roots of the quadratic equation for the +/// given quadratic chrec {L,+,M,+,N}. This returns either the two roots (which +/// might be the same) or two SCEVCouldNotCompute objects. +/// +static std::pair +SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) { + assert(AddRec->getNumOperands() == 3 && "This is not a quadratic chrec!"); + const SCEVConstant *LC = dyn_cast(AddRec->getOperand(0)); + const SCEVConstant *MC = dyn_cast(AddRec->getOperand(1)); + const SCEVConstant *NC = dyn_cast(AddRec->getOperand(2)); + + // We currently can only solve this if the coefficients are constants. + if (!LC || !MC || !NC) { + const SCEV *CNC = SE.getCouldNotCompute(); + return std::make_pair(CNC, CNC); + } + + uint32_t BitWidth = LC->getValue()->getValue().getBitWidth(); + const APInt &L = LC->getValue()->getValue(); + const APInt &M = MC->getValue()->getValue(); + const APInt &N = NC->getValue()->getValue(); + APInt Two(BitWidth, 2); + APInt Four(BitWidth, 4); + + { + using namespace APIntOps; + const APInt& C = L; + // Convert from chrec coefficients to polynomial coefficients AX^2+BX+C + // The B coefficient is M-N/2 + APInt B(M); + B -= sdiv(N,Two); + + // The A coefficient is N/2 + APInt A(N.sdiv(Two)); + + // Compute the B^2-4ac term. + APInt SqrtTerm(B); + SqrtTerm *= B; + SqrtTerm -= Four * (A * C); + + // Compute sqrt(B^2-4ac). This is guaranteed to be the nearest + // integer value or else APInt::sqrt() will assert. + APInt SqrtVal(SqrtTerm.sqrt()); + + // Compute the two solutions for the quadratic formula. + // The divisions must be performed as signed divisions. + APInt NegB(-B); + APInt TwoA( A << 1 ); + if (TwoA.isMinValue()) { + const SCEV *CNC = SE.getCouldNotCompute(); + return std::make_pair(CNC, CNC); + } + + ConstantInt *Solution1 = ConstantInt::get((NegB + SqrtVal).sdiv(TwoA)); + ConstantInt *Solution2 = ConstantInt::get((NegB - SqrtVal).sdiv(TwoA)); + + return std::make_pair(SE.getConstant(Solution1), + SE.getConstant(Solution2)); + } // end APIntOps namespace +} + +/// HowFarToZero - Return the number of times a backedge comparing the specified +/// value to zero will execute. If not computable, return UnknownValue. +SCEVHandle ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) { + // If the value is a constant + if (const SCEVConstant *C = dyn_cast(V)) { + // If the value is already zero, the branch will execute zero times. + if (C->getValue()->isZero()) return C; + return UnknownValue; // Otherwise it will loop infinitely. + } + + const SCEVAddRecExpr *AddRec = dyn_cast(V); + if (!AddRec || AddRec->getLoop() != L) + return UnknownValue; + + if (AddRec->isAffine()) { + // If this is an affine expression, the execution count of this branch is + // the minimum unsigned root of the following equation: + // + // Start + Step*N = 0 (mod 2^BW) + // + // equivalent to: + // + // Step*N = -Start (mod 2^BW) + // + // where BW is the common bit width of Start and Step. + + // Get the initial value for the loop. + SCEVHandle Start = getSCEVAtScope(AddRec->getStart(), L->getParentLoop()); + SCEVHandle Step = getSCEVAtScope(AddRec->getOperand(1), L->getParentLoop()); + + if (const SCEVConstant *StepC = dyn_cast(Step)) { + // For now we handle only constant steps. + + // First, handle unitary steps. + if (StepC->getValue()->equalsInt(1)) // 1*N = -Start (mod 2^BW), so: + return getNegativeSCEV(Start); // N = -Start (as unsigned) + if (StepC->getValue()->isAllOnesValue()) // -1*N = -Start (mod 2^BW), so: + return Start; // N = Start (as unsigned) + + // Then, try to solve the above equation provided that Start is constant. + if (const SCEVConstant *StartC = dyn_cast(Start)) + return SolveLinEquationWithOverflow(StepC->getValue()->getValue(), + -StartC->getValue()->getValue(), + *this); + } + } else if (AddRec->isQuadratic() && AddRec->getType()->isInteger()) { + // If this is a quadratic (3-term) AddRec {L,+,M,+,N}, find the roots of + // the quadratic equation to solve it. + std::pair Roots = SolveQuadraticEquation(AddRec, + *this); + const SCEVConstant *R1 = dyn_cast(Roots.first); + const SCEVConstant *R2 = dyn_cast(Roots.second); + if (R1) { +#if 0 + errs() << "HFTZ: " << *V << " - sol#1: " << *R1 + << " sol#2: " << *R2 << "\n"; +#endif + // Pick the smallest positive root value. + if (ConstantInt *CB = + dyn_cast(ConstantExpr::getICmp(ICmpInst::ICMP_ULT, + R1->getValue(), R2->getValue()))) { + if (CB->getZExtValue() == false) + std::swap(R1, R2); // R1 is the minimum root now. + + // We can only use this value if the chrec ends up with an exact zero + // value at this index. When solving for "X*X != 5", for example, we + // should not accept a root of 2. + SCEVHandle Val = AddRec->evaluateAtIteration(R1, *this); + if (Val->isZero()) + return R1; // We found a quadratic root! + } + } + } + + return UnknownValue; +} + +/// HowFarToNonZero - Return the number of times a backedge checking the +/// specified value for nonzero will execute. If not computable, return +/// UnknownValue +SCEVHandle ScalarEvolution::HowFarToNonZero(const SCEV *V, const Loop *L) { + // Loops that look like: while (X == 0) are very strange indeed. We don't + // handle them yet except for the trivial case. This could be expanded in the + // future as needed. + + // If the value is a constant, check to see if it is known to be non-zero + // already. If so, the backedge will execute zero times. + if (const SCEVConstant *C = dyn_cast(V)) { + if (!C->getValue()->isNullValue()) + return getIntegerSCEV(0, C->getType()); + return UnknownValue; // Otherwise it will loop infinitely. + } + + // We could implement others, but I really doubt anyone writes loops like + // this, and if they did, they would already be constant folded. + return UnknownValue; +} + +/// getLoopPredecessor - If the given loop's header has exactly one unique +/// predecessor outside the loop, return it. Otherwise return null. +/// +BasicBlock *ScalarEvolution::getLoopPredecessor(const Loop *L) { + BasicBlock *Header = L->getHeader(); + BasicBlock *Pred = 0; + for (pred_iterator PI = pred_begin(Header), E = pred_end(Header); + PI != E; ++PI) + if (!L->contains(*PI)) { + if (Pred && Pred != *PI) return 0; // Multiple predecessors. + Pred = *PI; + } + return Pred; +} + +/// getPredecessorWithUniqueSuccessorForBB - Return a predecessor of BB +/// (which may not be an immediate predecessor) which has exactly one +/// successor from which BB is reachable, or null if no such block is +/// found. +/// +BasicBlock * +ScalarEvolution::getPredecessorWithUniqueSuccessorForBB(BasicBlock *BB) { + // If the block has a unique predecessor, then there is no path from the + // predecessor to the block that does not go through the direct edge + // from the predecessor to the block. + if (BasicBlock *Pred = BB->getSinglePredecessor()) + return Pred; + + // A loop's header is defined to be a block that dominates the loop. + // If the header has a unique predecessor outside the loop, it must be + // a block that has exactly one successor that can reach the loop. + if (Loop *L = LI->getLoopFor(BB)) + return getLoopPredecessor(L); + + return 0; +} + +/// isLoopGuardedByCond - Test whether entry to the loop is protected by +/// a conditional between LHS and RHS. This is used to help avoid max +/// expressions in loop trip counts. +bool ScalarEvolution::isLoopGuardedByCond(const Loop *L, + ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS) { + // Interpret a null as meaning no loop, where there is obviously no guard + // (interprocedural conditions notwithstanding). + if (!L) return false; + + BasicBlock *Predecessor = getLoopPredecessor(L); + BasicBlock *PredecessorDest = L->getHeader(); + + // Starting at the loop predecessor, climb up the predecessor chain, as long + // as there are predecessors that can be found that have unique successors + // leading to the original header. + for (; Predecessor; + PredecessorDest = Predecessor, + Predecessor = getPredecessorWithUniqueSuccessorForBB(Predecessor)) { + + BranchInst *LoopEntryPredicate = + dyn_cast(Predecessor->getTerminator()); + if (!LoopEntryPredicate || + LoopEntryPredicate->isUnconditional()) + continue; + + ICmpInst *ICI = dyn_cast(LoopEntryPredicate->getCondition()); + if (!ICI) continue; + + // Now that we found a conditional branch that dominates the loop, check to + // see if it is the comparison we are looking for. + Value *PreCondLHS = ICI->getOperand(0); + Value *PreCondRHS = ICI->getOperand(1); + ICmpInst::Predicate Cond; + if (LoopEntryPredicate->getSuccessor(0) == PredecessorDest) + Cond = ICI->getPredicate(); + else + Cond = ICI->getInversePredicate(); + + if (Cond == Pred) + ; // An exact match. + else if (!ICmpInst::isTrueWhenEqual(Cond) && Pred == ICmpInst::ICMP_NE) + ; // The actual condition is beyond sufficient. + else + // Check a few special cases. + switch (Cond) { + case ICmpInst::ICMP_UGT: + if (Pred == ICmpInst::ICMP_ULT) { + std::swap(PreCondLHS, PreCondRHS); + Cond = ICmpInst::ICMP_ULT; + break; + } + continue; + case ICmpInst::ICMP_SGT: + if (Pred == ICmpInst::ICMP_SLT) { + std::swap(PreCondLHS, PreCondRHS); + Cond = ICmpInst::ICMP_SLT; + break; + } + continue; + case ICmpInst::ICMP_NE: + // Expressions like (x >u 0) are often canonicalized to (x != 0), + // so check for this case by checking if the NE is comparing against + // a minimum or maximum constant. + if (!ICmpInst::isTrueWhenEqual(Pred)) + if (ConstantInt *CI = dyn_cast(PreCondRHS)) { + const APInt &A = CI->getValue(); + switch (Pred) { + case ICmpInst::ICMP_SLT: + if (A.isMaxSignedValue()) break; + continue; + case ICmpInst::ICMP_SGT: + if (A.isMinSignedValue()) break; + continue; + case ICmpInst::ICMP_ULT: + if (A.isMaxValue()) break; + continue; + case ICmpInst::ICMP_UGT: + if (A.isMinValue()) break; + continue; + default: + continue; + } + Cond = ICmpInst::ICMP_NE; + // NE is symmetric but the original comparison may not be. Swap + // the operands if necessary so that they match below. + if (isa(LHS)) + std::swap(PreCondLHS, PreCondRHS); + break; + } + continue; + default: + // We weren't able to reconcile the condition. + continue; + } + + if (!PreCondLHS->getType()->isInteger()) continue; + + SCEVHandle PreCondLHSSCEV = getSCEV(PreCondLHS); + SCEVHandle PreCondRHSSCEV = getSCEV(PreCondRHS); + if ((LHS == PreCondLHSSCEV && RHS == PreCondRHSSCEV) || + (LHS == getNotSCEV(PreCondRHSSCEV) && + RHS == getNotSCEV(PreCondLHSSCEV))) + return true; + } + + return false; +} + +/// HowManyLessThans - Return the number of times a backedge containing the +/// specified less-than comparison will execute. If not computable, return +/// UnknownValue. +ScalarEvolution::BackedgeTakenInfo ScalarEvolution:: +HowManyLessThans(const SCEV *LHS, const SCEV *RHS, + const Loop *L, bool isSigned) { + // Only handle: "ADDREC < LoopInvariant". + if (!RHS->isLoopInvariant(L)) return UnknownValue; + + const SCEVAddRecExpr *AddRec = dyn_cast(LHS); + if (!AddRec || AddRec->getLoop() != L) + return UnknownValue; + + if (AddRec->isAffine()) { + // FORNOW: We only support unit strides. + unsigned BitWidth = getTypeSizeInBits(AddRec->getType()); + SCEVHandle Step = AddRec->getStepRecurrence(*this); + SCEVHandle NegOne = getIntegerSCEV(-1, AddRec->getType()); + + // TODO: handle non-constant strides. + const SCEVConstant *CStep = dyn_cast(Step); + if (!CStep || CStep->isZero()) + return UnknownValue; + if (CStep->isOne()) { + // With unit stride, the iteration never steps past the limit value. + } else if (CStep->getValue()->getValue().isStrictlyPositive()) { + if (const SCEVConstant *CLimit = dyn_cast(RHS)) { + // Test whether a positive iteration iteration can step past the limit + // value and past the maximum value for its type in a single step. + if (isSigned) { + APInt Max = APInt::getSignedMaxValue(BitWidth); + if ((Max - CStep->getValue()->getValue()) + .slt(CLimit->getValue()->getValue())) + return UnknownValue; + } else { + APInt Max = APInt::getMaxValue(BitWidth); + if ((Max - CStep->getValue()->getValue()) + .ult(CLimit->getValue()->getValue())) + return UnknownValue; + } + } else + // TODO: handle non-constant limit values below. + return UnknownValue; + } else + // TODO: handle negative strides below. + return UnknownValue; + + // We know the LHS is of the form {n,+,s} and the RHS is some loop-invariant + // m. So, we count the number of iterations in which {n,+,s} < m is true. + // Note that we cannot simply return max(m-n,0)/s because it's not safe to + // treat m-n as signed nor unsigned due to overflow possibility. + + // First, we get the value of the LHS in the first iteration: n + SCEVHandle Start = AddRec->getOperand(0); + + // Determine the minimum constant start value. + SCEVHandle MinStart = isa(Start) ? Start : + getConstant(isSigned ? APInt::getSignedMinValue(BitWidth) : + APInt::getMinValue(BitWidth)); + + // If we know that the condition is true in order to enter the loop, + // then we know that it will run exactly (m-n)/s times. Otherwise, we + // only know that it will execute (max(m,n)-n)/s times. In both cases, + // the division must round up. + SCEVHandle End = RHS; + if (!isLoopGuardedByCond(L, + isSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, + getMinusSCEV(Start, Step), RHS)) + End = isSigned ? getSMaxExpr(RHS, Start) + : getUMaxExpr(RHS, Start); + + // Determine the maximum constant end value. + SCEVHandle MaxEnd = isa(End) ? End : + getConstant(isSigned ? APInt::getSignedMaxValue(BitWidth) : + APInt::getMaxValue(BitWidth)); + + // Finally, we subtract these two values and divide, rounding up, to get + // the number of times the backedge is executed. + SCEVHandle BECount = getUDivExpr(getAddExpr(getMinusSCEV(End, Start), + getAddExpr(Step, NegOne)), + Step); + + // The maximum backedge count is similar, except using the minimum start + // value and the maximum end value. + SCEVHandle MaxBECount = getUDivExpr(getAddExpr(getMinusSCEV(MaxEnd, + MinStart), + getAddExpr(Step, NegOne)), + Step); + + return BackedgeTakenInfo(BECount, MaxBECount); + } + + return UnknownValue; +} + +/// getNumIterationsInRange - Return the number of iterations of this loop that +/// produce values in the specified constant range. Another way of looking at +/// this is that it returns the first iteration number where the value is not in +/// the condition, thus computing the exit count. If the iteration count can't +/// be computed, an instance of SCEVCouldNotCompute is returned. +SCEVHandle SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range, + ScalarEvolution &SE) const { + if (Range.isFullSet()) // Infinite loop. + return SE.getCouldNotCompute(); + + // If the start is a non-zero constant, shift the range to simplify things. + if (const SCEVConstant *SC = dyn_cast(getStart())) + if (!SC->getValue()->isZero()) { + std::vector Operands(op_begin(), op_end()); + Operands[0] = SE.getIntegerSCEV(0, SC->getType()); + SCEVHandle Shifted = SE.getAddRecExpr(Operands, getLoop()); + if (const SCEVAddRecExpr *ShiftedAddRec = + dyn_cast(Shifted)) + return ShiftedAddRec->getNumIterationsInRange( + Range.subtract(SC->getValue()->getValue()), SE); + // This is strange and shouldn't happen. + return SE.getCouldNotCompute(); + } + + // The only time we can solve this is when we have all constant indices. + // Otherwise, we cannot determine the overflow conditions. + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) + if (!isa(getOperand(i))) + return SE.getCouldNotCompute(); + + + // Okay at this point we know that all elements of the chrec are constants and + // that the start element is zero. + + // First check to see if the range contains zero. If not, the first + // iteration exits. + unsigned BitWidth = SE.getTypeSizeInBits(getType()); + if (!Range.contains(APInt(BitWidth, 0))) + return SE.getConstant(ConstantInt::get(getType(),0)); + + if (isAffine()) { + // If this is an affine expression then we have this situation: + // Solve {0,+,A} in Range === Ax in Range + + // We know that zero is in the range. If A is positive then we know that + // the upper value of the range must be the first possible exit value. + // If A is negative then the lower of the range is the last possible loop + // value. Also note that we already checked for a full range. + APInt One(BitWidth,1); + APInt A = cast(getOperand(1))->getValue()->getValue(); + APInt End = A.sge(One) ? (Range.getUpper() - One) : Range.getLower(); + + // The exit value should be (End+A)/A. + APInt ExitVal = (End + A).udiv(A); + ConstantInt *ExitValue = ConstantInt::get(ExitVal); + + // Evaluate at the exit value. If we really did fall out of the valid + // range, then we computed our trip count, otherwise wrap around or other + // things must have happened. + ConstantInt *Val = EvaluateConstantChrecAtConstant(this, ExitValue, SE); + if (Range.contains(Val->getValue())) + return SE.getCouldNotCompute(); // Something strange happened + + // Ensure that the previous value is in the range. This is a sanity check. + assert(Range.contains( + EvaluateConstantChrecAtConstant(this, + ConstantInt::get(ExitVal - One), SE)->getValue()) && + "Linear scev computation is off in a bad way!"); + return SE.getConstant(ExitValue); + } else if (isQuadratic()) { + // If this is a quadratic (3-term) AddRec {L,+,M,+,N}, find the roots of the + // quadratic equation to solve it. To do this, we must frame our problem in + // terms of figuring out when zero is crossed, instead of when + // Range.getUpper() is crossed. + std::vector NewOps(op_begin(), op_end()); + NewOps[0] = SE.getNegativeSCEV(SE.getConstant(Range.getUpper())); + SCEVHandle NewAddRec = SE.getAddRecExpr(NewOps, getLoop()); + + // Next, solve the constructed addrec + std::pair Roots = + SolveQuadraticEquation(cast(NewAddRec), SE); + const SCEVConstant *R1 = dyn_cast(Roots.first); + const SCEVConstant *R2 = dyn_cast(Roots.second); + if (R1) { + // Pick the smallest positive root value. + if (ConstantInt *CB = + dyn_cast(ConstantExpr::getICmp(ICmpInst::ICMP_ULT, + R1->getValue(), R2->getValue()))) { + if (CB->getZExtValue() == false) + std::swap(R1, R2); // R1 is the minimum root now. + + // Make sure the root is not off by one. The returned iteration should + // not be in the range, but the previous one should be. When solving + // for "X*X < 5", for example, we should not return a root of 2. + ConstantInt *R1Val = EvaluateConstantChrecAtConstant(this, + R1->getValue(), + SE); + if (Range.contains(R1Val->getValue())) { + // The next iteration must be out of the range... + ConstantInt *NextVal = ConstantInt::get(R1->getValue()->getValue()+1); + + R1Val = EvaluateConstantChrecAtConstant(this, NextVal, SE); + if (!Range.contains(R1Val->getValue())) + return SE.getConstant(NextVal); + return SE.getCouldNotCompute(); // Something strange happened + } + + // If R1 was not in the range, then it is a good return value. Make + // sure that R1-1 WAS in the range though, just in case. + ConstantInt *NextVal = ConstantInt::get(R1->getValue()->getValue()-1); + R1Val = EvaluateConstantChrecAtConstant(this, NextVal, SE); + if (Range.contains(R1Val->getValue())) + return R1; + return SE.getCouldNotCompute(); // Something strange happened + } + } + } + + return SE.getCouldNotCompute(); +} + + + +//===----------------------------------------------------------------------===// +// SCEVCallbackVH Class Implementation +//===----------------------------------------------------------------------===// + +void ScalarEvolution::SCEVCallbackVH::deleted() { + assert(SE && "SCEVCallbackVH called with a non-null ScalarEvolution!"); + if (PHINode *PN = dyn_cast(getValPtr())) + SE->ConstantEvolutionLoopExitValue.erase(PN); + if (Instruction *I = dyn_cast(getValPtr())) + SE->ValuesAtScopes.erase(I); + SE->Scalars.erase(getValPtr()); + // this now dangles! +} + +void ScalarEvolution::SCEVCallbackVH::allUsesReplacedWith(Value *) { + assert(SE && "SCEVCallbackVH called with a non-null ScalarEvolution!"); + + // Forget all the expressions associated with users of the old value, + // so that future queries will recompute the expressions using the new + // value. + SmallVector Worklist; + Value *Old = getValPtr(); + bool DeleteOld = false; + for (Value::use_iterator UI = Old->use_begin(), UE = Old->use_end(); + UI != UE; ++UI) + Worklist.push_back(*UI); + while (!Worklist.empty()) { + User *U = Worklist.pop_back_val(); + // Deleting the Old value will cause this to dangle. Postpone + // that until everything else is done. + if (U == Old) { + DeleteOld = true; + continue; + } + if (PHINode *PN = dyn_cast(U)) + SE->ConstantEvolutionLoopExitValue.erase(PN); + if (Instruction *I = dyn_cast(U)) + SE->ValuesAtScopes.erase(I); + if (SE->Scalars.erase(U)) + for (Value::use_iterator UI = U->use_begin(), UE = U->use_end(); + UI != UE; ++UI) + Worklist.push_back(*UI); + } + if (DeleteOld) { + if (PHINode *PN = dyn_cast(Old)) + SE->ConstantEvolutionLoopExitValue.erase(PN); + if (Instruction *I = dyn_cast(Old)) + SE->ValuesAtScopes.erase(I); + SE->Scalars.erase(Old); + // this now dangles! + } + // this may dangle! +} + +ScalarEvolution::SCEVCallbackVH::SCEVCallbackVH(Value *V, ScalarEvolution *se) + : CallbackVH(V), SE(se) {} + +//===----------------------------------------------------------------------===// +// ScalarEvolution Class Implementation +//===----------------------------------------------------------------------===// + +ScalarEvolution::ScalarEvolution() + : FunctionPass(&ID), UnknownValue(new SCEVCouldNotCompute()) { +} + +bool ScalarEvolution::runOnFunction(Function &F) { + this->F = &F; + LI = &getAnalysis(); + TD = getAnalysisIfAvailable(); + return false; +} + +void ScalarEvolution::releaseMemory() { + Scalars.clear(); + BackedgeTakenCounts.clear(); + ConstantEvolutionLoopExitValue.clear(); + ValuesAtScopes.clear(); +} + +void ScalarEvolution::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequiredTransitive(); +} + +bool ScalarEvolution::hasLoopInvariantBackedgeTakenCount(const Loop *L) { + return !isa(getBackedgeTakenCount(L)); +} + +static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE, + const Loop *L) { + // Print all inner loops first + for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) + PrintLoopInfo(OS, SE, *I); + + OS << "Loop " << L->getHeader()->getName() << ": "; + + SmallVector ExitBlocks; + L->getExitBlocks(ExitBlocks); + if (ExitBlocks.size() != 1) + OS << " "; + + if (SE->hasLoopInvariantBackedgeTakenCount(L)) { + OS << "backedge-taken count is " << *SE->getBackedgeTakenCount(L); + } else { + OS << "Unpredictable backedge-taken count. "; + } + + OS << "\n"; +} + +void ScalarEvolution::print(raw_ostream &OS, const Module* ) const { + // ScalarEvolution's implementaiton of the print method is to print + // out SCEV values of all instructions that are interesting. Doing + // this potentially causes it to create new SCEV objects though, + // which technically conflicts with the const qualifier. This isn't + // observable from outside the class though (the hasSCEV function + // notwithstanding), so casting away the const isn't dangerous. + ScalarEvolution &SE = *const_cast(this); + + OS << "Classifying expressions for: " << F->getName() << "\n"; + for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) + if (isSCEVable(I->getType())) { + OS << *I; + OS << " --> "; + SCEVHandle SV = SE.getSCEV(&*I); + SV->print(OS); + OS << "\t\t"; + + if (const Loop *L = LI->getLoopFor((*I).getParent())) { + OS << "Exits: "; + SCEVHandle ExitValue = SE.getSCEVAtScope(&*I, L->getParentLoop()); + if (!ExitValue->isLoopInvariant(L)) { + OS << "<>"; + } else { + OS << *ExitValue; + } + } + + OS << "\n"; + } + + OS << "Determining loop execution counts for: " << F->getName() << "\n"; + for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) + PrintLoopInfo(OS, &SE, *I); +} + +void ScalarEvolution::print(std::ostream &o, const Module *M) const { + raw_os_ostream OS(o); + print(OS, M); +} diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp new file mode 100644 index 000000000000..7ba8268b508a --- /dev/null +++ b/lib/Analysis/ScalarEvolutionExpander.cpp @@ -0,0 +1,646 @@ +//===- ScalarEvolutionExpander.cpp - Scalar Evolution Analysis --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the implementation of the scalar evolution expander, +// which is used to generate the code corresponding to a given scalar evolution +// expression. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/ScalarEvolutionExpander.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Target/TargetData.h" +using namespace llvm; + +/// InsertCastOfTo - Insert a cast of V to the specified type, doing what +/// we can to share the casts. +Value *SCEVExpander::InsertCastOfTo(Instruction::CastOps opcode, Value *V, + const Type *Ty) { + // Short-circuit unnecessary bitcasts. + if (opcode == Instruction::BitCast && V->getType() == Ty) + return V; + + // Short-circuit unnecessary inttoptr<->ptrtoint casts. + if ((opcode == Instruction::PtrToInt || opcode == Instruction::IntToPtr) && + SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(V->getType())) { + if (CastInst *CI = dyn_cast(V)) + if ((CI->getOpcode() == Instruction::PtrToInt || + CI->getOpcode() == Instruction::IntToPtr) && + SE.getTypeSizeInBits(CI->getType()) == + SE.getTypeSizeInBits(CI->getOperand(0)->getType())) + return CI->getOperand(0); + if (ConstantExpr *CE = dyn_cast(V)) + if ((CE->getOpcode() == Instruction::PtrToInt || + CE->getOpcode() == Instruction::IntToPtr) && + SE.getTypeSizeInBits(CE->getType()) == + SE.getTypeSizeInBits(CE->getOperand(0)->getType())) + return CE->getOperand(0); + } + + // FIXME: keep track of the cast instruction. + if (Constant *C = dyn_cast(V)) + return ConstantExpr::getCast(opcode, C, Ty); + + if (Argument *A = dyn_cast(V)) { + // Check to see if there is already a cast! + for (Value::use_iterator UI = A->use_begin(), E = A->use_end(); + UI != E; ++UI) { + if ((*UI)->getType() == Ty) + if (CastInst *CI = dyn_cast(cast(*UI))) + if (CI->getOpcode() == opcode) { + // If the cast isn't the first instruction of the function, move it. + if (BasicBlock::iterator(CI) != + A->getParent()->getEntryBlock().begin()) { + // If the CastInst is the insert point, change the insert point. + if (CI == InsertPt) ++InsertPt; + // Splice the cast at the beginning of the entry block. + CI->moveBefore(A->getParent()->getEntryBlock().begin()); + } + return CI; + } + } + Instruction *I = CastInst::Create(opcode, V, Ty, V->getName(), + A->getParent()->getEntryBlock().begin()); + InsertedValues.insert(I); + return I; + } + + Instruction *I = cast(V); + + // Check to see if there is already a cast. If there is, use it. + for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); + UI != E; ++UI) { + if ((*UI)->getType() == Ty) + if (CastInst *CI = dyn_cast(cast(*UI))) + if (CI->getOpcode() == opcode) { + BasicBlock::iterator It = I; ++It; + if (isa(I)) + It = cast(I)->getNormalDest()->begin(); + while (isa(It)) ++It; + if (It != BasicBlock::iterator(CI)) { + // If the CastInst is the insert point, change the insert point. + if (CI == InsertPt) ++InsertPt; + // Splice the cast immediately after the operand in question. + CI->moveBefore(It); + } + return CI; + } + } + BasicBlock::iterator IP = I; ++IP; + if (InvokeInst *II = dyn_cast(I)) + IP = II->getNormalDest()->begin(); + while (isa(IP)) ++IP; + Instruction *CI = CastInst::Create(opcode, V, Ty, V->getName(), IP); + InsertedValues.insert(CI); + return CI; +} + +/// InsertNoopCastOfTo - Insert a cast of V to the specified type, +/// which must be possible with a noop cast. +Value *SCEVExpander::InsertNoopCastOfTo(Value *V, const Type *Ty) { + Instruction::CastOps Op = CastInst::getCastOpcode(V, false, Ty, false); + assert((Op == Instruction::BitCast || + Op == Instruction::PtrToInt || + Op == Instruction::IntToPtr) && + "InsertNoopCastOfTo cannot perform non-noop casts!"); + assert(SE.getTypeSizeInBits(V->getType()) == SE.getTypeSizeInBits(Ty) && + "InsertNoopCastOfTo cannot change sizes!"); + return InsertCastOfTo(Op, V, Ty); +} + +/// InsertBinop - Insert the specified binary operator, doing a small amount +/// of work to avoid inserting an obviously redundant operation. +Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode, Value *LHS, + Value *RHS, BasicBlock::iterator InsertPt) { + // Fold a binop with constant operands. + if (Constant *CLHS = dyn_cast(LHS)) + if (Constant *CRHS = dyn_cast(RHS)) + return ConstantExpr::get(Opcode, CLHS, CRHS); + + // Do a quick scan to see if we have this binop nearby. If so, reuse it. + unsigned ScanLimit = 6; + BasicBlock::iterator BlockBegin = InsertPt->getParent()->begin(); + if (InsertPt != BlockBegin) { + // Scanning starts from the last instruction before InsertPt. + BasicBlock::iterator IP = InsertPt; + --IP; + for (; ScanLimit; --IP, --ScanLimit) { + if (IP->getOpcode() == (unsigned)Opcode && IP->getOperand(0) == LHS && + IP->getOperand(1) == RHS) + return IP; + if (IP == BlockBegin) break; + } + } + + // If we haven't found this binop, insert it. + Instruction *BO = BinaryOperator::Create(Opcode, LHS, RHS, "tmp", InsertPt); + InsertedValues.insert(BO); + return BO; +} + +/// FactorOutConstant - Test if S is divisible by Factor, using signed +/// division. If so, update S with Factor divided out and return true. +/// S need not be evenly divisble if a reasonable remainder can be +/// computed. +/// TODO: When ScalarEvolution gets a SCEVSDivExpr, this can be made +/// unnecessary; in its place, just signed-divide Ops[i] by the scale and +/// check to see if the divide was folded. +static bool FactorOutConstant(SCEVHandle &S, + SCEVHandle &Remainder, + const APInt &Factor, + ScalarEvolution &SE) { + // Everything is divisible by one. + if (Factor == 1) + return true; + + // For a Constant, check for a multiple of the given factor. + if (const SCEVConstant *C = dyn_cast(S)) { + ConstantInt *CI = + ConstantInt::get(C->getValue()->getValue().sdiv(Factor)); + // If the quotient is zero and the remainder is non-zero, reject + // the value at this scale. It will be considered for subsequent + // smaller scales. + if (C->isZero() || !CI->isZero()) { + SCEVHandle Div = SE.getConstant(CI); + S = Div; + Remainder = + SE.getAddExpr(Remainder, + SE.getConstant(C->getValue()->getValue().srem(Factor))); + return true; + } + } + + // In a Mul, check if there is a constant operand which is a multiple + // of the given factor. + if (const SCEVMulExpr *M = dyn_cast(S)) + if (const SCEVConstant *C = dyn_cast(M->getOperand(0))) + if (!C->getValue()->getValue().srem(Factor)) { + std::vector NewMulOps(M->getOperands()); + NewMulOps[0] = + SE.getConstant(C->getValue()->getValue().sdiv(Factor)); + S = SE.getMulExpr(NewMulOps); + return true; + } + + // In an AddRec, check if both start and step are divisible. + if (const SCEVAddRecExpr *A = dyn_cast(S)) { + SCEVHandle Step = A->getStepRecurrence(SE); + SCEVHandle StepRem = SE.getIntegerSCEV(0, Step->getType()); + if (!FactorOutConstant(Step, StepRem, Factor, SE)) + return false; + if (!StepRem->isZero()) + return false; + SCEVHandle Start = A->getStart(); + if (!FactorOutConstant(Start, Remainder, Factor, SE)) + return false; + S = SE.getAddRecExpr(Start, Step, A->getLoop()); + return true; + } + + return false; +} + +/// expandAddToGEP - Expand a SCEVAddExpr with a pointer type into a GEP +/// instead of using ptrtoint+arithmetic+inttoptr. This helps +/// BasicAliasAnalysis analyze the result. However, it suffers from the +/// underlying bug described in PR2831. Addition in LLVM currently always +/// has two's complement wrapping guaranteed. However, the semantics for +/// getelementptr overflow are ambiguous. In the common case though, this +/// expansion gets used when a GEP in the original code has been converted +/// into integer arithmetic, in which case the resulting code will be no +/// more undefined than it was originally. +/// +/// Design note: It might seem desirable for this function to be more +/// loop-aware. If some of the indices are loop-invariant while others +/// aren't, it might seem desirable to emit multiple GEPs, keeping the +/// loop-invariant portions of the overall computation outside the loop. +/// However, there are a few reasons this is not done here. Hoisting simple +/// arithmetic is a low-level optimization that often isn't very +/// important until late in the optimization process. In fact, passes +/// like InstructionCombining will combine GEPs, even if it means +/// pushing loop-invariant computation down into loops, so even if the +/// GEPs were split here, the work would quickly be undone. The +/// LoopStrengthReduction pass, which is usually run quite late (and +/// after the last InstructionCombining pass), takes care of hoisting +/// loop-invariant portions of expressions, after considering what +/// can be folded using target addressing modes. +/// +Value *SCEVExpander::expandAddToGEP(const SCEVHandle *op_begin, + const SCEVHandle *op_end, + const PointerType *PTy, + const Type *Ty, + Value *V) { + const Type *ElTy = PTy->getElementType(); + SmallVector GepIndices; + std::vector Ops(op_begin, op_end); + bool AnyNonZeroIndices = false; + + // Decend down the pointer's type and attempt to convert the other + // operands into GEP indices, at each level. The first index in a GEP + // indexes into the array implied by the pointer operand; the rest of + // the indices index into the element or field type selected by the + // preceding index. + for (;;) { + APInt ElSize = APInt(SE.getTypeSizeInBits(Ty), + ElTy->isSized() ? SE.TD->getTypeAllocSize(ElTy) : 0); + std::vector NewOps; + std::vector ScaledOps; + for (unsigned i = 0, e = Ops.size(); i != e; ++i) { + // Split AddRecs up into parts as either of the parts may be usable + // without the other. + if (const SCEVAddRecExpr *A = dyn_cast(Ops[i])) + if (!A->getStart()->isZero()) { + SCEVHandle Start = A->getStart(); + Ops.push_back(SE.getAddRecExpr(SE.getIntegerSCEV(0, A->getType()), + A->getStepRecurrence(SE), + A->getLoop())); + Ops[i] = Start; + ++e; + } + // If the scale size is not 0, attempt to factor out a scale. + if (ElSize != 0) { + SCEVHandle Op = Ops[i]; + SCEVHandle Remainder = SE.getIntegerSCEV(0, Op->getType()); + if (FactorOutConstant(Op, Remainder, ElSize, SE)) { + ScaledOps.push_back(Op); // Op now has ElSize factored out. + NewOps.push_back(Remainder); + continue; + } + } + // If the operand was not divisible, add it to the list of operands + // we'll scan next iteration. + NewOps.push_back(Ops[i]); + } + Ops = NewOps; + AnyNonZeroIndices |= !ScaledOps.empty(); + Value *Scaled = ScaledOps.empty() ? + Constant::getNullValue(Ty) : + expandCodeFor(SE.getAddExpr(ScaledOps), Ty); + GepIndices.push_back(Scaled); + + // Collect struct field index operands. + if (!Ops.empty()) + while (const StructType *STy = dyn_cast(ElTy)) { + if (const SCEVConstant *C = dyn_cast(Ops[0])) + if (SE.getTypeSizeInBits(C->getType()) <= 64) { + const StructLayout &SL = *SE.TD->getStructLayout(STy); + uint64_t FullOffset = C->getValue()->getZExtValue(); + if (FullOffset < SL.getSizeInBytes()) { + unsigned ElIdx = SL.getElementContainingOffset(FullOffset); + GepIndices.push_back(ConstantInt::get(Type::Int32Ty, ElIdx)); + ElTy = STy->getTypeAtIndex(ElIdx); + Ops[0] = + SE.getConstant(ConstantInt::get(Ty, + FullOffset - + SL.getElementOffset(ElIdx))); + AnyNonZeroIndices = true; + continue; + } + } + break; + } + + if (const ArrayType *ATy = dyn_cast(ElTy)) { + ElTy = ATy->getElementType(); + continue; + } + break; + } + + // If none of the operands were convertable to proper GEP indices, cast + // the base to i8* and do an ugly getelementptr with that. It's still + // better than ptrtoint+arithmetic+inttoptr at least. + if (!AnyNonZeroIndices) { + V = InsertNoopCastOfTo(V, + Type::Int8Ty->getPointerTo(PTy->getAddressSpace())); + Value *Idx = expand(SE.getAddExpr(Ops)); + Idx = InsertNoopCastOfTo(Idx, Ty); + + // Fold a GEP with constant operands. + if (Constant *CLHS = dyn_cast(V)) + if (Constant *CRHS = dyn_cast(Idx)) + return ConstantExpr::getGetElementPtr(CLHS, &CRHS, 1); + + // Do a quick scan to see if we have this GEP nearby. If so, reuse it. + unsigned ScanLimit = 6; + BasicBlock::iterator BlockBegin = InsertPt->getParent()->begin(); + if (InsertPt != BlockBegin) { + // Scanning starts from the last instruction before InsertPt. + BasicBlock::iterator IP = InsertPt; + --IP; + for (; ScanLimit; --IP, --ScanLimit) { + if (IP->getOpcode() == Instruction::GetElementPtr && + IP->getOperand(0) == V && IP->getOperand(1) == Idx) + return IP; + if (IP == BlockBegin) break; + } + } + + Value *GEP = GetElementPtrInst::Create(V, Idx, "scevgep", InsertPt); + InsertedValues.insert(GEP); + return GEP; + } + + // Insert a pretty getelementptr. + Value *GEP = GetElementPtrInst::Create(V, + GepIndices.begin(), + GepIndices.end(), + "scevgep", InsertPt); + Ops.push_back(SE.getUnknown(GEP)); + InsertedValues.insert(GEP); + return expand(SE.getAddExpr(Ops)); +} + +Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) { + const Type *Ty = SE.getEffectiveSCEVType(S->getType()); + Value *V = expand(S->getOperand(S->getNumOperands()-1)); + + // Turn things like ptrtoint+arithmetic+inttoptr into GEP. See the + // comments on expandAddToGEP for details. + if (SE.TD) + if (const PointerType *PTy = dyn_cast(V->getType())) { + const std::vector &Ops = S->getOperands(); + return expandAddToGEP(&Ops[0], &Ops[Ops.size() - 1], + PTy, Ty, V); + } + + V = InsertNoopCastOfTo(V, Ty); + + // Emit a bunch of add instructions + for (int i = S->getNumOperands()-2; i >= 0; --i) { + Value *W = expand(S->getOperand(i)); + W = InsertNoopCastOfTo(W, Ty); + V = InsertBinop(Instruction::Add, V, W, InsertPt); + } + return V; +} + +Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) { + const Type *Ty = SE.getEffectiveSCEVType(S->getType()); + int FirstOp = 0; // Set if we should emit a subtract. + if (const SCEVConstant *SC = dyn_cast(S->getOperand(0))) + if (SC->getValue()->isAllOnesValue()) + FirstOp = 1; + + int i = S->getNumOperands()-2; + Value *V = expand(S->getOperand(i+1)); + V = InsertNoopCastOfTo(V, Ty); + + // Emit a bunch of multiply instructions + for (; i >= FirstOp; --i) { + Value *W = expand(S->getOperand(i)); + W = InsertNoopCastOfTo(W, Ty); + V = InsertBinop(Instruction::Mul, V, W, InsertPt); + } + + // -1 * ... ---> 0 - ... + if (FirstOp == 1) + V = InsertBinop(Instruction::Sub, Constant::getNullValue(Ty), V, InsertPt); + return V; +} + +Value *SCEVExpander::visitUDivExpr(const SCEVUDivExpr *S) { + const Type *Ty = SE.getEffectiveSCEVType(S->getType()); + + Value *LHS = expand(S->getLHS()); + LHS = InsertNoopCastOfTo(LHS, Ty); + if (const SCEVConstant *SC = dyn_cast(S->getRHS())) { + const APInt &RHS = SC->getValue()->getValue(); + if (RHS.isPowerOf2()) + return InsertBinop(Instruction::LShr, LHS, + ConstantInt::get(Ty, RHS.logBase2()), + InsertPt); + } + + Value *RHS = expand(S->getRHS()); + RHS = InsertNoopCastOfTo(RHS, Ty); + return InsertBinop(Instruction::UDiv, LHS, RHS, InsertPt); +} + +/// Move parts of Base into Rest to leave Base with the minimal +/// expression that provides a pointer operand suitable for a +/// GEP expansion. +static void ExposePointerBase(SCEVHandle &Base, SCEVHandle &Rest, + ScalarEvolution &SE) { + while (const SCEVAddRecExpr *A = dyn_cast(Base)) { + Base = A->getStart(); + Rest = SE.getAddExpr(Rest, + SE.getAddRecExpr(SE.getIntegerSCEV(0, A->getType()), + A->getStepRecurrence(SE), + A->getLoop())); + } + if (const SCEVAddExpr *A = dyn_cast(Base)) { + Base = A->getOperand(A->getNumOperands()-1); + std::vector NewAddOps(A->op_begin(), A->op_end()); + NewAddOps.back() = Rest; + Rest = SE.getAddExpr(NewAddOps); + ExposePointerBase(Base, Rest, SE); + } +} + +Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { + const Type *Ty = SE.getEffectiveSCEVType(S->getType()); + const Loop *L = S->getLoop(); + + // {X,+,F} --> X + {0,+,F} + if (!S->getStart()->isZero()) { + std::vector NewOps(S->getOperands()); + NewOps[0] = SE.getIntegerSCEV(0, Ty); + SCEVHandle Rest = SE.getAddRecExpr(NewOps, L); + + // Turn things like ptrtoint+arithmetic+inttoptr into GEP. See the + // comments on expandAddToGEP for details. + if (SE.TD) { + SCEVHandle Base = S->getStart(); + SCEVHandle RestArray[1] = { Rest }; + // Dig into the expression to find the pointer base for a GEP. + ExposePointerBase(Base, RestArray[0], SE); + // If we found a pointer, expand the AddRec with a GEP. + if (const PointerType *PTy = dyn_cast(Base->getType())) { + // Make sure the Base isn't something exotic, such as a multiplied + // or divided pointer value. In those cases, the result type isn't + // actually a pointer type. + if (!isa(Base) && !isa(Base)) { + Value *StartV = expand(Base); + assert(StartV->getType() == PTy && "Pointer type mismatch for GEP!"); + return expandAddToGEP(RestArray, RestArray+1, PTy, Ty, StartV); + } + } + } + + Value *RestV = expand(Rest); + return expand(SE.getAddExpr(S->getStart(), SE.getUnknown(RestV))); + } + + // {0,+,1} --> Insert a canonical induction variable into the loop! + if (S->isAffine() && + S->getOperand(1) == SE.getIntegerSCEV(1, Ty)) { + // Create and insert the PHI node for the induction variable in the + // specified loop. + BasicBlock *Header = L->getHeader(); + PHINode *PN = PHINode::Create(Ty, "indvar", Header->begin()); + InsertedValues.insert(PN); + PN->addIncoming(Constant::getNullValue(Ty), L->getLoopPreheader()); + + pred_iterator HPI = pred_begin(Header); + assert(HPI != pred_end(Header) && "Loop with zero preds???"); + if (!L->contains(*HPI)) ++HPI; + assert(HPI != pred_end(Header) && L->contains(*HPI) && + "No backedge in loop?"); + + // Insert a unit add instruction right before the terminator corresponding + // to the back-edge. + Constant *One = ConstantInt::get(Ty, 1); + Instruction *Add = BinaryOperator::CreateAdd(PN, One, "indvar.next", + (*HPI)->getTerminator()); + InsertedValues.insert(Add); + + pred_iterator PI = pred_begin(Header); + if (*PI == L->getLoopPreheader()) + ++PI; + PN->addIncoming(Add, *PI); + return PN; + } + + // Get the canonical induction variable I for this loop. + Value *I = getOrInsertCanonicalInductionVariable(L, Ty); + + // If this is a simple linear addrec, emit it now as a special case. + if (S->isAffine()) { // {0,+,F} --> i*F + Value *F = expand(S->getOperand(1)); + F = InsertNoopCastOfTo(F, Ty); + + // IF the step is by one, just return the inserted IV. + if (ConstantInt *CI = dyn_cast(F)) + if (CI->getValue() == 1) + return I; + + // If the insert point is directly inside of the loop, emit the multiply at + // the insert point. Otherwise, L is a loop that is a parent of the insert + // point loop. If we can, move the multiply to the outer most loop that it + // is safe to be in. + BasicBlock::iterator MulInsertPt = getInsertionPoint(); + Loop *InsertPtLoop = SE.LI->getLoopFor(MulInsertPt->getParent()); + if (InsertPtLoop != L && InsertPtLoop && + L->contains(InsertPtLoop->getHeader())) { + do { + // If we cannot hoist the multiply out of this loop, don't. + if (!InsertPtLoop->isLoopInvariant(F)) break; + + BasicBlock *InsertPtLoopPH = InsertPtLoop->getLoopPreheader(); + + // If this loop hasn't got a preheader, we aren't able to hoist the + // multiply. + if (!InsertPtLoopPH) + break; + + // Otherwise, move the insert point to the preheader. + MulInsertPt = InsertPtLoopPH->getTerminator(); + InsertPtLoop = InsertPtLoop->getParentLoop(); + } while (InsertPtLoop != L); + } + + return InsertBinop(Instruction::Mul, I, F, MulInsertPt); + } + + // If this is a chain of recurrences, turn it into a closed form, using the + // folders, then expandCodeFor the closed form. This allows the folders to + // simplify the expression without having to build a bunch of special code + // into this folder. + SCEVHandle IH = SE.getUnknown(I); // Get I as a "symbolic" SCEV. + + SCEVHandle V = S->evaluateAtIteration(IH, SE); + //cerr << "Evaluated: " << *this << "\n to: " << *V << "\n"; + + return expand(V); +} + +Value *SCEVExpander::visitTruncateExpr(const SCEVTruncateExpr *S) { + const Type *Ty = SE.getEffectiveSCEVType(S->getType()); + Value *V = expand(S->getOperand()); + V = InsertNoopCastOfTo(V, SE.getEffectiveSCEVType(V->getType())); + Instruction *I = new TruncInst(V, Ty, "tmp.", InsertPt); + InsertedValues.insert(I); + return I; +} + +Value *SCEVExpander::visitZeroExtendExpr(const SCEVZeroExtendExpr *S) { + const Type *Ty = SE.getEffectiveSCEVType(S->getType()); + Value *V = expand(S->getOperand()); + V = InsertNoopCastOfTo(V, SE.getEffectiveSCEVType(V->getType())); + Instruction *I = new ZExtInst(V, Ty, "tmp.", InsertPt); + InsertedValues.insert(I); + return I; +} + +Value *SCEVExpander::visitSignExtendExpr(const SCEVSignExtendExpr *S) { + const Type *Ty = SE.getEffectiveSCEVType(S->getType()); + Value *V = expand(S->getOperand()); + V = InsertNoopCastOfTo(V, SE.getEffectiveSCEVType(V->getType())); + Instruction *I = new SExtInst(V, Ty, "tmp.", InsertPt); + InsertedValues.insert(I); + return I; +} + +Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) { + const Type *Ty = SE.getEffectiveSCEVType(S->getType()); + Value *LHS = expand(S->getOperand(0)); + LHS = InsertNoopCastOfTo(LHS, Ty); + for (unsigned i = 1; i < S->getNumOperands(); ++i) { + Value *RHS = expand(S->getOperand(i)); + RHS = InsertNoopCastOfTo(RHS, Ty); + Instruction *ICmp = + new ICmpInst(ICmpInst::ICMP_SGT, LHS, RHS, "tmp", InsertPt); + InsertedValues.insert(ICmp); + Instruction *Sel = SelectInst::Create(ICmp, LHS, RHS, "smax", InsertPt); + InsertedValues.insert(Sel); + LHS = Sel; + } + return LHS; +} + +Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) { + const Type *Ty = SE.getEffectiveSCEVType(S->getType()); + Value *LHS = expand(S->getOperand(0)); + LHS = InsertNoopCastOfTo(LHS, Ty); + for (unsigned i = 1; i < S->getNumOperands(); ++i) { + Value *RHS = expand(S->getOperand(i)); + RHS = InsertNoopCastOfTo(RHS, Ty); + Instruction *ICmp = + new ICmpInst(ICmpInst::ICMP_UGT, LHS, RHS, "tmp", InsertPt); + InsertedValues.insert(ICmp); + Instruction *Sel = SelectInst::Create(ICmp, LHS, RHS, "umax", InsertPt); + InsertedValues.insert(Sel); + LHS = Sel; + } + return LHS; +} + +Value *SCEVExpander::expandCodeFor(SCEVHandle SH, const Type *Ty) { + // Expand the code for this SCEV. + Value *V = expand(SH); + if (Ty) { + assert(SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(SH->getType()) && + "non-trivial casts should be done with the SCEVs directly!"); + V = InsertNoopCastOfTo(V, Ty); + } + return V; +} + +Value *SCEVExpander::expand(const SCEV *S) { + // Check to see if we already expanded this. + std::map >::iterator I = + InsertedExpressions.find(S); + if (I != InsertedExpressions.end()) + return I->second; + + Value *V = visit(S); + InsertedExpressions[S] = V; + return V; +} diff --git a/lib/Analysis/SparsePropagation.cpp b/lib/Analysis/SparsePropagation.cpp new file mode 100644 index 000000000000..543306854ced --- /dev/null +++ b/lib/Analysis/SparsePropagation.cpp @@ -0,0 +1,331 @@ +//===- SparsePropagation.cpp - Sparse Conditional Property Propagation ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements an abstract sparse conditional propagation algorithm, +// modeled after SCCP, but with a customizable lattice function. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "sparseprop" +#include "llvm/Analysis/SparsePropagation.h" +#include "llvm/Constants.h" +#include "llvm/Function.h" +#include "llvm/Instructions.h" +#include "llvm/Support/Debug.h" +using namespace llvm; + +//===----------------------------------------------------------------------===// +// AbstractLatticeFunction Implementation +//===----------------------------------------------------------------------===// + +AbstractLatticeFunction::~AbstractLatticeFunction() {} + +/// PrintValue - Render the specified lattice value to the specified stream. +void AbstractLatticeFunction::PrintValue(LatticeVal V, std::ostream &OS) { + if (V == UndefVal) + OS << "undefined"; + else if (V == OverdefinedVal) + OS << "overdefined"; + else if (V == UntrackedVal) + OS << "untracked"; + else + OS << "unknown lattice value"; +} + +//===----------------------------------------------------------------------===// +// SparseSolver Implementation +//===----------------------------------------------------------------------===// + +/// getOrInitValueState - Return the LatticeVal object that corresponds to the +/// value, initializing the value's state if it hasn't been entered into the +/// map yet. This function is necessary because not all values should start +/// out in the underdefined state... Arguments should be overdefined, and +/// constants should be marked as constants. +/// +SparseSolver::LatticeVal SparseSolver::getOrInitValueState(Value *V) { + DenseMap::iterator I = ValueState.find(V); + if (I != ValueState.end()) return I->second; // Common case, in the map + + LatticeVal LV; + if (LatticeFunc->IsUntrackedValue(V)) + return LatticeFunc->getUntrackedVal(); + else if (Constant *C = dyn_cast(V)) + LV = LatticeFunc->ComputeConstant(C); + else if (Argument *A = dyn_cast(V)) + LV = LatticeFunc->ComputeArgument(A); + else if (!isa(V)) + // All other non-instructions are overdefined. + LV = LatticeFunc->getOverdefinedVal(); + else + // All instructions are underdefined by default. + LV = LatticeFunc->getUndefVal(); + + // If this value is untracked, don't add it to the map. + if (LV == LatticeFunc->getUntrackedVal()) + return LV; + return ValueState[V] = LV; +} + +/// UpdateState - When the state for some instruction is potentially updated, +/// this function notices and adds I to the worklist if needed. +void SparseSolver::UpdateState(Instruction &Inst, LatticeVal V) { + DenseMap::iterator I = ValueState.find(&Inst); + if (I != ValueState.end() && I->second == V) + return; // No change. + + // An update. Visit uses of I. + ValueState[&Inst] = V; + InstWorkList.push_back(&Inst); +} + +/// MarkBlockExecutable - This method can be used by clients to mark all of +/// the blocks that are known to be intrinsically live in the processed unit. +void SparseSolver::MarkBlockExecutable(BasicBlock *BB) { + DOUT << "Marking Block Executable: " << BB->getNameStart() << "\n"; + BBExecutable.insert(BB); // Basic block is executable! + BBWorkList.push_back(BB); // Add the block to the work list! +} + +/// markEdgeExecutable - Mark a basic block as executable, adding it to the BB +/// work list if it is not already executable... +void SparseSolver::markEdgeExecutable(BasicBlock *Source, BasicBlock *Dest) { + if (!KnownFeasibleEdges.insert(Edge(Source, Dest)).second) + return; // This edge is already known to be executable! + + DOUT << "Marking Edge Executable: " << Source->getNameStart() + << " -> " << Dest->getNameStart() << "\n"; + + if (BBExecutable.count(Dest)) { + // The destination is already executable, but we just made an edge + // feasible that wasn't before. Revisit the PHI nodes in the block + // because they have potentially new operands. + for (BasicBlock::iterator I = Dest->begin(); isa(I); ++I) + visitPHINode(*cast(I)); + + } else { + MarkBlockExecutable(Dest); + } +} + + +/// getFeasibleSuccessors - Return a vector of booleans to indicate which +/// successors are reachable from a given terminator instruction. +void SparseSolver::getFeasibleSuccessors(TerminatorInst &TI, + SmallVectorImpl &Succs, + bool AggressiveUndef) { + Succs.resize(TI.getNumSuccessors()); + if (TI.getNumSuccessors() == 0) return; + + if (BranchInst *BI = dyn_cast(&TI)) { + if (BI->isUnconditional()) { + Succs[0] = true; + return; + } + + LatticeVal BCValue; + if (AggressiveUndef) + BCValue = getOrInitValueState(BI->getCondition()); + else + BCValue = getLatticeState(BI->getCondition()); + + if (BCValue == LatticeFunc->getOverdefinedVal() || + BCValue == LatticeFunc->getUntrackedVal()) { + // Overdefined condition variables can branch either way. + Succs[0] = Succs[1] = true; + return; + } + + // If undefined, neither is feasible yet. + if (BCValue == LatticeFunc->getUndefVal()) + return; + + Constant *C = LatticeFunc->GetConstant(BCValue, BI->getCondition(), *this); + if (C == 0 || !isa(C)) { + // Non-constant values can go either way. + Succs[0] = Succs[1] = true; + return; + } + + // Constant condition variables mean the branch can only go a single way + Succs[C == ConstantInt::getFalse()] = true; + return; + } + + if (isa(TI)) { + // Invoke instructions successors are always executable. + // TODO: Could ask the lattice function if the value can throw. + Succs[0] = Succs[1] = true; + return; + } + + SwitchInst &SI = cast(TI); + LatticeVal SCValue; + if (AggressiveUndef) + SCValue = getOrInitValueState(SI.getCondition()); + else + SCValue = getLatticeState(SI.getCondition()); + + if (SCValue == LatticeFunc->getOverdefinedVal() || + SCValue == LatticeFunc->getUntrackedVal()) { + // All destinations are executable! + Succs.assign(TI.getNumSuccessors(), true); + return; + } + + // If undefined, neither is feasible yet. + if (SCValue == LatticeFunc->getUndefVal()) + return; + + Constant *C = LatticeFunc->GetConstant(SCValue, SI.getCondition(), *this); + if (C == 0 || !isa(C)) { + // All destinations are executable! + Succs.assign(TI.getNumSuccessors(), true); + return; + } + + Succs[SI.findCaseValue(cast(C))] = true; +} + + +/// isEdgeFeasible - Return true if the control flow edge from the 'From' +/// basic block to the 'To' basic block is currently feasible... +bool SparseSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To, + bool AggressiveUndef) { + SmallVector SuccFeasible; + TerminatorInst *TI = From->getTerminator(); + getFeasibleSuccessors(*TI, SuccFeasible, AggressiveUndef); + + for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) + if (TI->getSuccessor(i) == To && SuccFeasible[i]) + return true; + + return false; +} + +void SparseSolver::visitTerminatorInst(TerminatorInst &TI) { + SmallVector SuccFeasible; + getFeasibleSuccessors(TI, SuccFeasible, true); + + BasicBlock *BB = TI.getParent(); + + // Mark all feasible successors executable... + for (unsigned i = 0, e = SuccFeasible.size(); i != e; ++i) + if (SuccFeasible[i]) + markEdgeExecutable(BB, TI.getSuccessor(i)); +} + +void SparseSolver::visitPHINode(PHINode &PN) { + LatticeVal PNIV = getOrInitValueState(&PN); + LatticeVal Overdefined = LatticeFunc->getOverdefinedVal(); + + // If this value is already overdefined (common) just return. + if (PNIV == Overdefined || PNIV == LatticeFunc->getUntrackedVal()) + return; // Quick exit + + // Super-extra-high-degree PHI nodes are unlikely to ever be interesting, + // and slow us down a lot. Just mark them overdefined. + if (PN.getNumIncomingValues() > 64) { + UpdateState(PN, Overdefined); + return; + } + + // Look at all of the executable operands of the PHI node. If any of them + // are overdefined, the PHI becomes overdefined as well. Otherwise, ask the + // transfer function to give us the merge of the incoming values. + for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) { + // If the edge is not yet known to be feasible, it doesn't impact the PHI. + if (!isEdgeFeasible(PN.getIncomingBlock(i), PN.getParent(), true)) + continue; + + // Merge in this value. + LatticeVal OpVal = getOrInitValueState(PN.getIncomingValue(i)); + if (OpVal != PNIV) + PNIV = LatticeFunc->MergeValues(PNIV, OpVal); + + if (PNIV == Overdefined) + break; // Rest of input values don't matter. + } + + // Update the PHI with the compute value, which is the merge of the inputs. + UpdateState(PN, PNIV); +} + + +void SparseSolver::visitInst(Instruction &I) { + // PHIs are handled by the propagation logic, they are never passed into the + // transfer functions. + if (PHINode *PN = dyn_cast(&I)) + return visitPHINode(*PN); + + // Otherwise, ask the transfer function what the result is. If this is + // something that we care about, remember it. + LatticeVal IV = LatticeFunc->ComputeInstructionState(I, *this); + if (IV != LatticeFunc->getUntrackedVal()) + UpdateState(I, IV); + + if (TerminatorInst *TI = dyn_cast(&I)) + visitTerminatorInst(*TI); +} + +void SparseSolver::Solve(Function &F) { + MarkBlockExecutable(&F.getEntryBlock()); + + // Process the work lists until they are empty! + while (!BBWorkList.empty() || !InstWorkList.empty()) { + // Process the instruction work list. + while (!InstWorkList.empty()) { + Instruction *I = InstWorkList.back(); + InstWorkList.pop_back(); + + DOUT << "\nPopped off I-WL: " << *I; + + // "I" got into the work list because it made a transition. See if any + // users are both live and in need of updating. + for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); + UI != E; ++UI) { + Instruction *U = cast(*UI); + if (BBExecutable.count(U->getParent())) // Inst is executable? + visitInst(*U); + } + } + + // Process the basic block work list. + while (!BBWorkList.empty()) { + BasicBlock *BB = BBWorkList.back(); + BBWorkList.pop_back(); + + DOUT << "\nPopped off BBWL: " << *BB; + + // Notify all instructions in this basic block that they are newly + // executable. + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) + visitInst(*I); + } + } +} + +void SparseSolver::Print(Function &F, std::ostream &OS) const { + OS << "\nFUNCTION: " << F.getNameStr() << "\n"; + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { + if (!BBExecutable.count(BB)) + OS << "INFEASIBLE: "; + OS << "\t"; + if (BB->hasName()) + OS << BB->getNameStr() << ":\n"; + else + OS << "; anon bb\n"; + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { + LatticeFunc->PrintValue(getLatticeState(I), OS); + OS << *I; + } + + OS << "\n"; + } +} + diff --git a/lib/Analysis/Trace.cpp b/lib/Analysis/Trace.cpp new file mode 100644 index 000000000000..8f19fda953dd --- /dev/null +++ b/lib/Analysis/Trace.cpp @@ -0,0 +1,50 @@ +//===- Trace.cpp - Implementation of Trace class --------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class represents a single trace of LLVM basic blocks. A trace is a +// single entry, multiple exit, region of code that is often hot. Trace-based +// optimizations treat traces almost like they are a large, strange, basic +// block: because the trace path is assumed to be hot, optimizations for the +// fall-through path are made at the expense of the non-fall-through paths. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/Trace.h" +#include "llvm/Function.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/Support/Streams.h" +using namespace llvm; + +Function *Trace::getFunction() const { + return getEntryBasicBlock()->getParent(); +} + +Module *Trace::getModule() const { + return getFunction()->getParent(); +} + +/// print - Write trace to output stream. +/// +void Trace::print(std::ostream &O) const { + Function *F = getFunction (); + O << "; Trace from function " << F->getName() << ", blocks:\n"; + for (const_iterator i = begin(), e = end(); i != e; ++i) { + O << "; "; + WriteAsOperand(O, *i, true, getModule()); + O << "\n"; + } + O << "; Trace parent function: \n" << *F; +} + +/// dump - Debugger convenience method; writes trace to standard error +/// output stream. +/// +void Trace::dump() const { + print(cerr); +} diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp new file mode 100644 index 000000000000..29ff8aa4f4d0 --- /dev/null +++ b/lib/Analysis/ValueTracking.cpp @@ -0,0 +1,1079 @@ +//===- ValueTracking.cpp - Walk computations to compute properties --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains routines that help analyze properties that chains of +// computations have. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Constants.h" +#include "llvm/Instructions.h" +#include "llvm/GlobalVariable.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Support/GetElementPtrTypeIterator.h" +#include "llvm/Support/MathExtras.h" +#include +using namespace llvm; + +/// getOpcode - If this is an Instruction or a ConstantExpr, return the +/// opcode value. Otherwise return UserOp1. +static unsigned getOpcode(const Value *V) { + if (const Instruction *I = dyn_cast(V)) + return I->getOpcode(); + if (const ConstantExpr *CE = dyn_cast(V)) + return CE->getOpcode(); + // Use UserOp1 to mean there's no opcode. + return Instruction::UserOp1; +} + + +/// ComputeMaskedBits - Determine which of the bits specified in Mask are +/// known to be either zero or one and return them in the KnownZero/KnownOne +/// bit sets. This code only analyzes bits in Mask, in order to short-circuit +/// processing. +/// NOTE: we cannot consider 'undef' to be "IsZero" here. The problem is that +/// we cannot optimize based on the assumption that it is zero without changing +/// it to be an explicit zero. If we don't change it to zero, other code could +/// optimized based on the contradictory assumption that it is non-zero. +/// Because instcombine aggressively folds operations with undef args anyway, +/// this won't lose us code quality. +void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, + APInt &KnownZero, APInt &KnownOne, + TargetData *TD, unsigned Depth) { + const unsigned MaxDepth = 6; + assert(V && "No Value?"); + assert(Depth <= MaxDepth && "Limit Search Depth"); + unsigned BitWidth = Mask.getBitWidth(); + assert((V->getType()->isInteger() || isa(V->getType())) && + "Not integer or pointer type!"); + assert((!TD || TD->getTypeSizeInBits(V->getType()) == BitWidth) && + (!isa(V->getType()) || + V->getType()->getPrimitiveSizeInBits() == BitWidth) && + KnownZero.getBitWidth() == BitWidth && + KnownOne.getBitWidth() == BitWidth && + "V, Mask, KnownOne and KnownZero should have same BitWidth"); + + if (ConstantInt *CI = dyn_cast(V)) { + // We know all of the bits for a constant! + KnownOne = CI->getValue() & Mask; + KnownZero = ~KnownOne & Mask; + return; + } + // Null is all-zeros. + if (isa(V)) { + KnownOne.clear(); + KnownZero = Mask; + return; + } + // The address of an aligned GlobalValue has trailing zeros. + if (GlobalValue *GV = dyn_cast(V)) { + unsigned Align = GV->getAlignment(); + if (Align == 0 && TD && GV->getType()->getElementType()->isSized()) + Align = TD->getPrefTypeAlignment(GV->getType()->getElementType()); + if (Align > 0) + KnownZero = Mask & APInt::getLowBitsSet(BitWidth, + CountTrailingZeros_32(Align)); + else + KnownZero.clear(); + KnownOne.clear(); + return; + } + + KnownZero.clear(); KnownOne.clear(); // Start out not knowing anything. + + if (Depth == MaxDepth || Mask == 0) + return; // Limit search depth. + + User *I = dyn_cast(V); + if (!I) return; + + APInt KnownZero2(KnownZero), KnownOne2(KnownOne); + switch (getOpcode(I)) { + default: break; + case Instruction::And: { + // If either the LHS or the RHS are Zero, the result is zero. + ComputeMaskedBits(I->getOperand(1), Mask, KnownZero, KnownOne, TD, Depth+1); + APInt Mask2(Mask & ~KnownZero); + ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero2, KnownOne2, TD, + Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // Output known-1 bits are only known if set in both the LHS & RHS. + KnownOne &= KnownOne2; + // Output known-0 are known to be clear if zero in either the LHS | RHS. + KnownZero |= KnownZero2; + return; + } + case Instruction::Or: { + ComputeMaskedBits(I->getOperand(1), Mask, KnownZero, KnownOne, TD, Depth+1); + APInt Mask2(Mask & ~KnownOne); + ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero2, KnownOne2, TD, + Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // Output known-0 bits are only known if clear in both the LHS & RHS. + KnownZero &= KnownZero2; + // Output known-1 are known to be set if set in either the LHS | RHS. + KnownOne |= KnownOne2; + return; + } + case Instruction::Xor: { + ComputeMaskedBits(I->getOperand(1), Mask, KnownZero, KnownOne, TD, Depth+1); + ComputeMaskedBits(I->getOperand(0), Mask, KnownZero2, KnownOne2, TD, + Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // Output known-0 bits are known if clear or set in both the LHS & RHS. + APInt KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2); + // Output known-1 are known to be set if set in only one of the LHS, RHS. + KnownOne = (KnownZero & KnownOne2) | (KnownOne & KnownZero2); + KnownZero = KnownZeroOut; + return; + } + case Instruction::Mul: { + APInt Mask2 = APInt::getAllOnesValue(BitWidth); + ComputeMaskedBits(I->getOperand(1), Mask2, KnownZero, KnownOne, TD,Depth+1); + ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero2, KnownOne2, TD, + Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // If low bits are zero in either operand, output low known-0 bits. + // Also compute a conserative estimate for high known-0 bits. + // More trickiness is possible, but this is sufficient for the + // interesting case of alignment computation. + KnownOne.clear(); + unsigned TrailZ = KnownZero.countTrailingOnes() + + KnownZero2.countTrailingOnes(); + unsigned LeadZ = std::max(KnownZero.countLeadingOnes() + + KnownZero2.countLeadingOnes(), + BitWidth) - BitWidth; + + TrailZ = std::min(TrailZ, BitWidth); + LeadZ = std::min(LeadZ, BitWidth); + KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) | + APInt::getHighBitsSet(BitWidth, LeadZ); + KnownZero &= Mask; + return; + } + case Instruction::UDiv: { + // For the purposes of computing leading zeros we can conservatively + // treat a udiv as a logical right shift by the power of 2 known to + // be less than the denominator. + APInt AllOnes = APInt::getAllOnesValue(BitWidth); + ComputeMaskedBits(I->getOperand(0), + AllOnes, KnownZero2, KnownOne2, TD, Depth+1); + unsigned LeadZ = KnownZero2.countLeadingOnes(); + + KnownOne2.clear(); + KnownZero2.clear(); + ComputeMaskedBits(I->getOperand(1), + AllOnes, KnownZero2, KnownOne2, TD, Depth+1); + unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros(); + if (RHSUnknownLeadingOnes != BitWidth) + LeadZ = std::min(BitWidth, + LeadZ + BitWidth - RHSUnknownLeadingOnes - 1); + + KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ) & Mask; + return; + } + case Instruction::Select: + ComputeMaskedBits(I->getOperand(2), Mask, KnownZero, KnownOne, TD, Depth+1); + ComputeMaskedBits(I->getOperand(1), Mask, KnownZero2, KnownOne2, TD, + Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // Only known if known in both the LHS and RHS. + KnownOne &= KnownOne2; + KnownZero &= KnownZero2; + return; + case Instruction::FPTrunc: + case Instruction::FPExt: + case Instruction::FPToUI: + case Instruction::FPToSI: + case Instruction::SIToFP: + case Instruction::UIToFP: + return; // Can't work with floating point. + case Instruction::PtrToInt: + case Instruction::IntToPtr: + // We can't handle these if we don't know the pointer size. + if (!TD) return; + // FALL THROUGH and handle them the same as zext/trunc. + case Instruction::ZExt: + case Instruction::Trunc: { + // Note that we handle pointer operands here because of inttoptr/ptrtoint + // which fall through here. + const Type *SrcTy = I->getOperand(0)->getType(); + unsigned SrcBitWidth = TD ? + TD->getTypeSizeInBits(SrcTy) : + SrcTy->getPrimitiveSizeInBits(); + APInt MaskIn(Mask); + MaskIn.zextOrTrunc(SrcBitWidth); + KnownZero.zextOrTrunc(SrcBitWidth); + KnownOne.zextOrTrunc(SrcBitWidth); + ComputeMaskedBits(I->getOperand(0), MaskIn, KnownZero, KnownOne, TD, + Depth+1); + KnownZero.zextOrTrunc(BitWidth); + KnownOne.zextOrTrunc(BitWidth); + // Any top bits are known to be zero. + if (BitWidth > SrcBitWidth) + KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth); + return; + } + case Instruction::BitCast: { + const Type *SrcTy = I->getOperand(0)->getType(); + if (SrcTy->isInteger() || isa(SrcTy)) { + ComputeMaskedBits(I->getOperand(0), Mask, KnownZero, KnownOne, TD, + Depth+1); + return; + } + break; + } + case Instruction::SExt: { + // Compute the bits in the result that are not present in the input. + const IntegerType *SrcTy = cast(I->getOperand(0)->getType()); + unsigned SrcBitWidth = SrcTy->getBitWidth(); + + APInt MaskIn(Mask); + MaskIn.trunc(SrcBitWidth); + KnownZero.trunc(SrcBitWidth); + KnownOne.trunc(SrcBitWidth); + ComputeMaskedBits(I->getOperand(0), MaskIn, KnownZero, KnownOne, TD, + Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero.zext(BitWidth); + KnownOne.zext(BitWidth); + + // If the sign bit of the input is known set or clear, then we know the + // top bits of the result. + if (KnownZero[SrcBitWidth-1]) // Input sign bit known zero + KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth); + else if (KnownOne[SrcBitWidth-1]) // Input sign bit known set + KnownOne |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth); + return; + } + case Instruction::Shl: + // (shl X, C1) & C2 == 0 iff (X & C2 >>u C1) == 0 + if (ConstantInt *SA = dyn_cast(I->getOperand(1))) { + uint64_t ShiftAmt = SA->getLimitedValue(BitWidth); + APInt Mask2(Mask.lshr(ShiftAmt)); + ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero, KnownOne, TD, + Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero <<= ShiftAmt; + KnownOne <<= ShiftAmt; + KnownZero |= APInt::getLowBitsSet(BitWidth, ShiftAmt); // low bits known 0 + return; + } + break; + case Instruction::LShr: + // (ushr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0 + if (ConstantInt *SA = dyn_cast(I->getOperand(1))) { + // Compute the new bits that are at the top now. + uint64_t ShiftAmt = SA->getLimitedValue(BitWidth); + + // Unsigned shift right. + APInt Mask2(Mask.shl(ShiftAmt)); + ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero,KnownOne, TD, + Depth+1); + assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?"); + KnownZero = APIntOps::lshr(KnownZero, ShiftAmt); + KnownOne = APIntOps::lshr(KnownOne, ShiftAmt); + // high bits known zero. + KnownZero |= APInt::getHighBitsSet(BitWidth, ShiftAmt); + return; + } + break; + case Instruction::AShr: + // (ashr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0 + if (ConstantInt *SA = dyn_cast(I->getOperand(1))) { + // Compute the new bits that are at the top now. + uint64_t ShiftAmt = SA->getLimitedValue(BitWidth); + + // Signed shift right. + APInt Mask2(Mask.shl(ShiftAmt)); + ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero, KnownOne, TD, + Depth+1); + assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?"); + KnownZero = APIntOps::lshr(KnownZero, ShiftAmt); + KnownOne = APIntOps::lshr(KnownOne, ShiftAmt); + + APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt)); + if (KnownZero[BitWidth-ShiftAmt-1]) // New bits are known zero. + KnownZero |= HighBits; + else if (KnownOne[BitWidth-ShiftAmt-1]) // New bits are known one. + KnownOne |= HighBits; + return; + } + break; + case Instruction::Sub: { + if (ConstantInt *CLHS = dyn_cast(I->getOperand(0))) { + // We know that the top bits of C-X are clear if X contains less bits + // than C (i.e. no wrap-around can happen). For example, 20-X is + // positive if we can prove that X is >= 0 and < 16. + if (!CLHS->getValue().isNegative()) { + unsigned NLZ = (CLHS->getValue()+1).countLeadingZeros(); + // NLZ can't be BitWidth with no sign bit + APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1); + ComputeMaskedBits(I->getOperand(1), MaskV, KnownZero2, KnownOne2, + TD, Depth+1); + + // If all of the MaskV bits are known to be zero, then we know the + // output top bits are zero, because we now know that the output is + // from [0-C]. + if ((KnownZero2 & MaskV) == MaskV) { + unsigned NLZ2 = CLHS->getValue().countLeadingZeros(); + // Top bits known zero. + KnownZero = APInt::getHighBitsSet(BitWidth, NLZ2) & Mask; + } + } + } + } + // fall through + case Instruction::Add: { + // If one of the operands has trailing zeros, than the bits that the + // other operand has in those bit positions will be preserved in the + // result. For an add, this works with either operand. For a subtract, + // this only works if the known zeros are in the right operand. + APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); + APInt Mask2 = APInt::getLowBitsSet(BitWidth, + BitWidth - Mask.countLeadingZeros()); + ComputeMaskedBits(I->getOperand(0), Mask2, LHSKnownZero, LHSKnownOne, TD, + Depth+1); + assert((LHSKnownZero & LHSKnownOne) == 0 && + "Bits known to be one AND zero?"); + unsigned LHSKnownZeroOut = LHSKnownZero.countTrailingOnes(); + + ComputeMaskedBits(I->getOperand(1), Mask2, KnownZero2, KnownOne2, TD, + Depth+1); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + unsigned RHSKnownZeroOut = KnownZero2.countTrailingOnes(); + + // Determine which operand has more trailing zeros, and use that + // many bits from the other operand. + if (LHSKnownZeroOut > RHSKnownZeroOut) { + if (getOpcode(I) == Instruction::Add) { + APInt Mask = APInt::getLowBitsSet(BitWidth, LHSKnownZeroOut); + KnownZero |= KnownZero2 & Mask; + KnownOne |= KnownOne2 & Mask; + } else { + // If the known zeros are in the left operand for a subtract, + // fall back to the minimum known zeros in both operands. + KnownZero |= APInt::getLowBitsSet(BitWidth, + std::min(LHSKnownZeroOut, + RHSKnownZeroOut)); + } + } else if (RHSKnownZeroOut >= LHSKnownZeroOut) { + APInt Mask = APInt::getLowBitsSet(BitWidth, RHSKnownZeroOut); + KnownZero |= LHSKnownZero & Mask; + KnownOne |= LHSKnownOne & Mask; + } + return; + } + case Instruction::SRem: + if (ConstantInt *Rem = dyn_cast(I->getOperand(1))) { + APInt RA = Rem->getValue(); + if (RA.isPowerOf2() || (-RA).isPowerOf2()) { + APInt LowBits = RA.isStrictlyPositive() ? (RA - 1) : ~RA; + APInt Mask2 = LowBits | APInt::getSignBit(BitWidth); + ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero2, KnownOne2, TD, + Depth+1); + + // If the sign bit of the first operand is zero, the sign bit of + // the result is zero. If the first operand has no one bits below + // the second operand's single 1 bit, its sign will be zero. + if (KnownZero2[BitWidth-1] || ((KnownZero2 & LowBits) == LowBits)) + KnownZero2 |= ~LowBits; + + KnownZero |= KnownZero2 & Mask; + + assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?"); + } + } + break; + case Instruction::URem: { + if (ConstantInt *Rem = dyn_cast(I->getOperand(1))) { + APInt RA = Rem->getValue(); + if (RA.isPowerOf2()) { + APInt LowBits = (RA - 1); + APInt Mask2 = LowBits & Mask; + KnownZero |= ~LowBits & Mask; + ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero, KnownOne, TD, + Depth+1); + assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?"); + break; + } + } + + // Since the result is less than or equal to either operand, any leading + // zero bits in either operand must also exist in the result. + APInt AllOnes = APInt::getAllOnesValue(BitWidth); + ComputeMaskedBits(I->getOperand(0), AllOnes, KnownZero, KnownOne, + TD, Depth+1); + ComputeMaskedBits(I->getOperand(1), AllOnes, KnownZero2, KnownOne2, + TD, Depth+1); + + unsigned Leaders = std::max(KnownZero.countLeadingOnes(), + KnownZero2.countLeadingOnes()); + KnownOne.clear(); + KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & Mask; + break; + } + + case Instruction::Alloca: + case Instruction::Malloc: { + AllocationInst *AI = cast(V); + unsigned Align = AI->getAlignment(); + if (Align == 0 && TD) { + if (isa(AI)) + Align = TD->getABITypeAlignment(AI->getType()->getElementType()); + else if (isa(AI)) { + // Malloc returns maximally aligned memory. + Align = TD->getABITypeAlignment(AI->getType()->getElementType()); + Align = + std::max(Align, + (unsigned)TD->getABITypeAlignment(Type::DoubleTy)); + Align = + std::max(Align, + (unsigned)TD->getABITypeAlignment(Type::Int64Ty)); + } + } + + if (Align > 0) + KnownZero = Mask & APInt::getLowBitsSet(BitWidth, + CountTrailingZeros_32(Align)); + break; + } + case Instruction::GetElementPtr: { + // Analyze all of the subscripts of this getelementptr instruction + // to determine if we can prove known low zero bits. + APInt LocalMask = APInt::getAllOnesValue(BitWidth); + APInt LocalKnownZero(BitWidth, 0), LocalKnownOne(BitWidth, 0); + ComputeMaskedBits(I->getOperand(0), LocalMask, + LocalKnownZero, LocalKnownOne, TD, Depth+1); + unsigned TrailZ = LocalKnownZero.countTrailingOnes(); + + gep_type_iterator GTI = gep_type_begin(I); + for (unsigned i = 1, e = I->getNumOperands(); i != e; ++i, ++GTI) { + Value *Index = I->getOperand(i); + if (const StructType *STy = dyn_cast(*GTI)) { + // Handle struct member offset arithmetic. + if (!TD) return; + const StructLayout *SL = TD->getStructLayout(STy); + unsigned Idx = cast(Index)->getZExtValue(); + uint64_t Offset = SL->getElementOffset(Idx); + TrailZ = std::min(TrailZ, + CountTrailingZeros_64(Offset)); + } else { + // Handle array index arithmetic. + const Type *IndexedTy = GTI.getIndexedType(); + if (!IndexedTy->isSized()) return; + unsigned GEPOpiBits = Index->getType()->getPrimitiveSizeInBits(); + uint64_t TypeSize = TD ? TD->getTypeAllocSize(IndexedTy) : 1; + LocalMask = APInt::getAllOnesValue(GEPOpiBits); + LocalKnownZero = LocalKnownOne = APInt(GEPOpiBits, 0); + ComputeMaskedBits(Index, LocalMask, + LocalKnownZero, LocalKnownOne, TD, Depth+1); + TrailZ = std::min(TrailZ, + unsigned(CountTrailingZeros_64(TypeSize) + + LocalKnownZero.countTrailingOnes())); + } + } + + KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) & Mask; + break; + } + case Instruction::PHI: { + PHINode *P = cast(I); + // Handle the case of a simple two-predecessor recurrence PHI. + // There's a lot more that could theoretically be done here, but + // this is sufficient to catch some interesting cases. + if (P->getNumIncomingValues() == 2) { + for (unsigned i = 0; i != 2; ++i) { + Value *L = P->getIncomingValue(i); + Value *R = P->getIncomingValue(!i); + User *LU = dyn_cast(L); + if (!LU) + continue; + unsigned Opcode = getOpcode(LU); + // Check for operations that have the property that if + // both their operands have low zero bits, the result + // will have low zero bits. + if (Opcode == Instruction::Add || + Opcode == Instruction::Sub || + Opcode == Instruction::And || + Opcode == Instruction::Or || + Opcode == Instruction::Mul) { + Value *LL = LU->getOperand(0); + Value *LR = LU->getOperand(1); + // Find a recurrence. + if (LL == I) + L = LR; + else if (LR == I) + L = LL; + else + break; + // Ok, we have a PHI of the form L op= R. Check for low + // zero bits. + APInt Mask2 = APInt::getAllOnesValue(BitWidth); + ComputeMaskedBits(R, Mask2, KnownZero2, KnownOne2, TD, Depth+1); + Mask2 = APInt::getLowBitsSet(BitWidth, + KnownZero2.countTrailingOnes()); + + // We need to take the minimum number of known bits + APInt KnownZero3(KnownZero), KnownOne3(KnownOne); + ComputeMaskedBits(L, Mask2, KnownZero3, KnownOne3, TD, Depth+1); + + KnownZero = Mask & + APInt::getLowBitsSet(BitWidth, + std::min(KnownZero2.countTrailingOnes(), + KnownZero3.countTrailingOnes())); + break; + } + } + } + + // Otherwise take the unions of the known bit sets of the operands, + // taking conservative care to avoid excessive recursion. + if (Depth < MaxDepth - 1 && !KnownZero && !KnownOne) { + KnownZero = APInt::getAllOnesValue(BitWidth); + KnownOne = APInt::getAllOnesValue(BitWidth); + for (unsigned i = 0, e = P->getNumIncomingValues(); i != e; ++i) { + // Skip direct self references. + if (P->getIncomingValue(i) == P) continue; + + KnownZero2 = APInt(BitWidth, 0); + KnownOne2 = APInt(BitWidth, 0); + // Recurse, but cap the recursion to one level, because we don't + // want to waste time spinning around in loops. + ComputeMaskedBits(P->getIncomingValue(i), KnownZero | KnownOne, + KnownZero2, KnownOne2, TD, MaxDepth-1); + KnownZero &= KnownZero2; + KnownOne &= KnownOne2; + // If all bits have been ruled out, there's no need to check + // more operands. + if (!KnownZero && !KnownOne) + break; + } + } + break; + } + case Instruction::Call: + if (IntrinsicInst *II = dyn_cast(I)) { + switch (II->getIntrinsicID()) { + default: break; + case Intrinsic::ctpop: + case Intrinsic::ctlz: + case Intrinsic::cttz: { + unsigned LowBits = Log2_32(BitWidth)+1; + KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits); + break; + } + } + } + break; + } +} + +/// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero. We use +/// this predicate to simplify operations downstream. Mask is known to be zero +/// for bits that V cannot have. +bool llvm::MaskedValueIsZero(Value *V, const APInt &Mask, + TargetData *TD, unsigned Depth) { + APInt KnownZero(Mask.getBitWidth(), 0), KnownOne(Mask.getBitWidth(), 0); + ComputeMaskedBits(V, Mask, KnownZero, KnownOne, TD, Depth); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + return (KnownZero & Mask) == Mask; +} + + + +/// ComputeNumSignBits - Return the number of times the sign bit of the +/// register is replicated into the other bits. We know that at least 1 bit +/// is always equal to the sign bit (itself), but other cases can give us +/// information. For example, immediately after an "ashr X, 2", we know that +/// the top 3 bits are all equal to each other, so we return 3. +/// +/// 'Op' must have a scalar integer type. +/// +unsigned llvm::ComputeNumSignBits(Value *V, TargetData *TD, unsigned Depth) { + const IntegerType *Ty = cast(V->getType()); + unsigned TyBits = Ty->getBitWidth(); + unsigned Tmp, Tmp2; + unsigned FirstAnswer = 1; + + // Note that ConstantInt is handled by the general ComputeMaskedBits case + // below. + + if (Depth == 6) + return 1; // Limit search depth. + + User *U = dyn_cast(V); + switch (getOpcode(V)) { + default: break; + case Instruction::SExt: + Tmp = TyBits-cast(U->getOperand(0)->getType())->getBitWidth(); + return ComputeNumSignBits(U->getOperand(0), TD, Depth+1) + Tmp; + + case Instruction::AShr: + Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1); + // ashr X, C -> adds C sign bits. + if (ConstantInt *C = dyn_cast(U->getOperand(1))) { + Tmp += C->getZExtValue(); + if (Tmp > TyBits) Tmp = TyBits; + } + return Tmp; + case Instruction::Shl: + if (ConstantInt *C = dyn_cast(U->getOperand(1))) { + // shl destroys sign bits. + Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1); + if (C->getZExtValue() >= TyBits || // Bad shift. + C->getZExtValue() >= Tmp) break; // Shifted all sign bits out. + return Tmp - C->getZExtValue(); + } + break; + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: // NOT is handled here. + // Logical binary ops preserve the number of sign bits at the worst. + Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1); + if (Tmp != 1) { + Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1); + FirstAnswer = std::min(Tmp, Tmp2); + // We computed what we know about the sign bits as our first + // answer. Now proceed to the generic code that uses + // ComputeMaskedBits, and pick whichever answer is better. + } + break; + + case Instruction::Select: + Tmp = ComputeNumSignBits(U->getOperand(1), TD, Depth+1); + if (Tmp == 1) return 1; // Early out. + Tmp2 = ComputeNumSignBits(U->getOperand(2), TD, Depth+1); + return std::min(Tmp, Tmp2); + + case Instruction::Add: + // Add can have at most one carry bit. Thus we know that the output + // is, at worst, one more bit than the inputs. + Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1); + if (Tmp == 1) return 1; // Early out. + + // Special case decrementing a value (ADD X, -1): + if (ConstantInt *CRHS = dyn_cast(U->getOperand(1))) + if (CRHS->isAllOnesValue()) { + APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0); + APInt Mask = APInt::getAllOnesValue(TyBits); + ComputeMaskedBits(U->getOperand(0), Mask, KnownZero, KnownOne, TD, + Depth+1); + + // If the input is known to be 0 or 1, the output is 0/-1, which is all + // sign bits set. + if ((KnownZero | APInt(TyBits, 1)) == Mask) + return TyBits; + + // If we are subtracting one from a positive number, there is no carry + // out of the result. + if (KnownZero.isNegative()) + return Tmp; + } + + Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1); + if (Tmp2 == 1) return 1; + return std::min(Tmp, Tmp2)-1; + break; + + case Instruction::Sub: + Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1); + if (Tmp2 == 1) return 1; + + // Handle NEG. + if (ConstantInt *CLHS = dyn_cast(U->getOperand(0))) + if (CLHS->isNullValue()) { + APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0); + APInt Mask = APInt::getAllOnesValue(TyBits); + ComputeMaskedBits(U->getOperand(1), Mask, KnownZero, KnownOne, + TD, Depth+1); + // If the input is known to be 0 or 1, the output is 0/-1, which is all + // sign bits set. + if ((KnownZero | APInt(TyBits, 1)) == Mask) + return TyBits; + + // If the input is known to be positive (the sign bit is known clear), + // the output of the NEG has the same number of sign bits as the input. + if (KnownZero.isNegative()) + return Tmp2; + + // Otherwise, we treat this like a SUB. + } + + // Sub can have at most one carry bit. Thus we know that the output + // is, at worst, one more bit than the inputs. + Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1); + if (Tmp == 1) return 1; // Early out. + return std::min(Tmp, Tmp2)-1; + break; + case Instruction::Trunc: + // FIXME: it's tricky to do anything useful for this, but it is an important + // case for targets like X86. + break; + } + + // Finally, if we can prove that the top bits of the result are 0's or 1's, + // use this information. + APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0); + APInt Mask = APInt::getAllOnesValue(TyBits); + ComputeMaskedBits(V, Mask, KnownZero, KnownOne, TD, Depth); + + if (KnownZero.isNegative()) { // sign bit is 0 + Mask = KnownZero; + } else if (KnownOne.isNegative()) { // sign bit is 1; + Mask = KnownOne; + } else { + // Nothing known. + return FirstAnswer; + } + + // Okay, we know that the sign bit in Mask is set. Use CLZ to determine + // the number of identical bits in the top of the input value. + Mask = ~Mask; + Mask <<= Mask.getBitWidth()-TyBits; + // Return # leading zeros. We use 'min' here in case Val was zero before + // shifting. We don't want to return '64' as for an i32 "0". + return std::max(FirstAnswer, std::min(TyBits, Mask.countLeadingZeros())); +} + +/// CannotBeNegativeZero - Return true if we can prove that the specified FP +/// value is never equal to -0.0. +/// +/// NOTE: this function will need to be revisited when we support non-default +/// rounding modes! +/// +bool llvm::CannotBeNegativeZero(const Value *V, unsigned Depth) { + if (const ConstantFP *CFP = dyn_cast(V)) + return !CFP->getValueAPF().isNegZero(); + + if (Depth == 6) + return 1; // Limit search depth. + + const Instruction *I = dyn_cast(V); + if (I == 0) return false; + + // (add x, 0.0) is guaranteed to return +0.0, not -0.0. + if (I->getOpcode() == Instruction::Add && + isa(I->getOperand(1)) && + cast(I->getOperand(1))->isNullValue()) + return true; + + // sitofp and uitofp turn into +0.0 for zero. + if (isa(I) || isa(I)) + return true; + + if (const IntrinsicInst *II = dyn_cast(I)) + // sqrt(-0.0) = -0.0, no other negative results are possible. + if (II->getIntrinsicID() == Intrinsic::sqrt) + return CannotBeNegativeZero(II->getOperand(1), Depth+1); + + if (const CallInst *CI = dyn_cast(I)) + if (const Function *F = CI->getCalledFunction()) { + if (F->isDeclaration()) { + switch (F->getNameLen()) { + case 3: // abs(x) != -0.0 + if (!strcmp(F->getNameStart(), "abs")) return true; + break; + case 4: // abs[lf](x) != -0.0 + if (!strcmp(F->getNameStart(), "absf")) return true; + if (!strcmp(F->getNameStart(), "absl")) return true; + break; + } + } + } + + return false; +} + +// This is the recursive version of BuildSubAggregate. It takes a few different +// arguments. Idxs is the index within the nested struct From that we are +// looking at now (which is of type IndexedType). IdxSkip is the number of +// indices from Idxs that should be left out when inserting into the resulting +// struct. To is the result struct built so far, new insertvalue instructions +// build on that. +Value *BuildSubAggregate(Value *From, Value* To, const Type *IndexedType, + SmallVector &Idxs, + unsigned IdxSkip, + Instruction *InsertBefore) { + const llvm::StructType *STy = llvm::dyn_cast(IndexedType); + if (STy) { + // Save the original To argument so we can modify it + Value *OrigTo = To; + // General case, the type indexed by Idxs is a struct + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { + // Process each struct element recursively + Idxs.push_back(i); + Value *PrevTo = To; + To = BuildSubAggregate(From, To, STy->getElementType(i), Idxs, IdxSkip, + InsertBefore); + Idxs.pop_back(); + if (!To) { + // Couldn't find any inserted value for this index? Cleanup + while (PrevTo != OrigTo) { + InsertValueInst* Del = cast(PrevTo); + PrevTo = Del->getAggregateOperand(); + Del->eraseFromParent(); + } + // Stop processing elements + break; + } + } + // If we succesfully found a value for each of our subaggregates + if (To) + return To; + } + // Base case, the type indexed by SourceIdxs is not a struct, or not all of + // the struct's elements had a value that was inserted directly. In the latter + // case, perhaps we can't determine each of the subelements individually, but + // we might be able to find the complete struct somewhere. + + // Find the value that is at that particular spot + Value *V = FindInsertedValue(From, Idxs.begin(), Idxs.end()); + + if (!V) + return NULL; + + // Insert the value in the new (sub) aggregrate + return llvm::InsertValueInst::Create(To, V, Idxs.begin() + IdxSkip, + Idxs.end(), "tmp", InsertBefore); +} + +// This helper takes a nested struct and extracts a part of it (which is again a +// struct) into a new value. For example, given the struct: +// { a, { b, { c, d }, e } } +// and the indices "1, 1" this returns +// { c, d }. +// +// It does this by inserting an insertvalue for each element in the resulting +// struct, as opposed to just inserting a single struct. This will only work if +// each of the elements of the substruct are known (ie, inserted into From by an +// insertvalue instruction somewhere). +// +// All inserted insertvalue instructions are inserted before InsertBefore +Value *BuildSubAggregate(Value *From, const unsigned *idx_begin, + const unsigned *idx_end, Instruction *InsertBefore) { + assert(InsertBefore && "Must have someplace to insert!"); + const Type *IndexedType = ExtractValueInst::getIndexedType(From->getType(), + idx_begin, + idx_end); + Value *To = UndefValue::get(IndexedType); + SmallVector Idxs(idx_begin, idx_end); + unsigned IdxSkip = Idxs.size(); + + return BuildSubAggregate(From, To, IndexedType, Idxs, IdxSkip, InsertBefore); +} + +/// FindInsertedValue - Given an aggregrate and an sequence of indices, see if +/// the scalar value indexed is already around as a register, for example if it +/// were inserted directly into the aggregrate. +/// +/// If InsertBefore is not null, this function will duplicate (modified) +/// insertvalues when a part of a nested struct is extracted. +Value *llvm::FindInsertedValue(Value *V, const unsigned *idx_begin, + const unsigned *idx_end, Instruction *InsertBefore) { + // Nothing to index? Just return V then (this is useful at the end of our + // recursion) + if (idx_begin == idx_end) + return V; + // We have indices, so V should have an indexable type + assert((isa(V->getType()) || isa(V->getType())) + && "Not looking at a struct or array?"); + assert(ExtractValueInst::getIndexedType(V->getType(), idx_begin, idx_end) + && "Invalid indices for type?"); + const CompositeType *PTy = cast(V->getType()); + + if (isa(V)) + return UndefValue::get(ExtractValueInst::getIndexedType(PTy, + idx_begin, + idx_end)); + else if (isa(V)) + return Constant::getNullValue(ExtractValueInst::getIndexedType(PTy, + idx_begin, + idx_end)); + else if (Constant *C = dyn_cast(V)) { + if (isa(C) || isa(C)) + // Recursively process this constant + return FindInsertedValue(C->getOperand(*idx_begin), idx_begin + 1, idx_end, + InsertBefore); + } else if (InsertValueInst *I = dyn_cast(V)) { + // Loop the indices for the insertvalue instruction in parallel with the + // requested indices + const unsigned *req_idx = idx_begin; + for (const unsigned *i = I->idx_begin(), *e = I->idx_end(); + i != e; ++i, ++req_idx) { + if (req_idx == idx_end) { + if (InsertBefore) + // The requested index identifies a part of a nested aggregate. Handle + // this specially. For example, + // %A = insertvalue { i32, {i32, i32 } } undef, i32 10, 1, 0 + // %B = insertvalue { i32, {i32, i32 } } %A, i32 11, 1, 1 + // %C = extractvalue {i32, { i32, i32 } } %B, 1 + // This can be changed into + // %A = insertvalue {i32, i32 } undef, i32 10, 0 + // %C = insertvalue {i32, i32 } %A, i32 11, 1 + // which allows the unused 0,0 element from the nested struct to be + // removed. + return BuildSubAggregate(V, idx_begin, req_idx, InsertBefore); + else + // We can't handle this without inserting insertvalues + return 0; + } + + // This insert value inserts something else than what we are looking for. + // See if the (aggregrate) value inserted into has the value we are + // looking for, then. + if (*req_idx != *i) + return FindInsertedValue(I->getAggregateOperand(), idx_begin, idx_end, + InsertBefore); + } + // If we end up here, the indices of the insertvalue match with those + // requested (though possibly only partially). Now we recursively look at + // the inserted value, passing any remaining indices. + return FindInsertedValue(I->getInsertedValueOperand(), req_idx, idx_end, + InsertBefore); + } else if (ExtractValueInst *I = dyn_cast(V)) { + // If we're extracting a value from an aggregrate that was extracted from + // something else, we can extract from that something else directly instead. + // However, we will need to chain I's indices with the requested indices. + + // Calculate the number of indices required + unsigned size = I->getNumIndices() + (idx_end - idx_begin); + // Allocate some space to put the new indices in + SmallVector Idxs; + Idxs.reserve(size); + // Add indices from the extract value instruction + for (const unsigned *i = I->idx_begin(), *e = I->idx_end(); + i != e; ++i) + Idxs.push_back(*i); + + // Add requested indices + for (const unsigned *i = idx_begin, *e = idx_end; i != e; ++i) + Idxs.push_back(*i); + + assert(Idxs.size() == size + && "Number of indices added not correct?"); + + return FindInsertedValue(I->getAggregateOperand(), Idxs.begin(), Idxs.end(), + InsertBefore); + } + // Otherwise, we don't know (such as, extracting from a function return value + // or load instruction) + return 0; +} + +/// GetConstantStringInfo - This function computes the length of a +/// null-terminated C string pointed to by V. If successful, it returns true +/// and returns the string in Str. If unsuccessful, it returns false. +bool llvm::GetConstantStringInfo(Value *V, std::string &Str, uint64_t Offset, + bool StopAtNul) { + // If V is NULL then return false; + if (V == NULL) return false; + + // Look through bitcast instructions. + if (BitCastInst *BCI = dyn_cast(V)) + return GetConstantStringInfo(BCI->getOperand(0), Str, Offset, StopAtNul); + + // If the value is not a GEP instruction nor a constant expression with a + // GEP instruction, then return false because ConstantArray can't occur + // any other way + User *GEP = 0; + if (GetElementPtrInst *GEPI = dyn_cast(V)) { + GEP = GEPI; + } else if (ConstantExpr *CE = dyn_cast(V)) { + if (CE->getOpcode() == Instruction::BitCast) + return GetConstantStringInfo(CE->getOperand(0), Str, Offset, StopAtNul); + if (CE->getOpcode() != Instruction::GetElementPtr) + return false; + GEP = CE; + } + + if (GEP) { + // Make sure the GEP has exactly three arguments. + if (GEP->getNumOperands() != 3) + return false; + + // Make sure the index-ee is a pointer to array of i8. + const PointerType *PT = cast(GEP->getOperand(0)->getType()); + const ArrayType *AT = dyn_cast(PT->getElementType()); + if (AT == 0 || AT->getElementType() != Type::Int8Ty) + return false; + + // Check to make sure that the first operand of the GEP is an integer and + // has value 0 so that we are sure we're indexing into the initializer. + ConstantInt *FirstIdx = dyn_cast(GEP->getOperand(1)); + if (FirstIdx == 0 || !FirstIdx->isZero()) + return false; + + // If the second index isn't a ConstantInt, then this is a variable index + // into the array. If this occurs, we can't say anything meaningful about + // the string. + uint64_t StartIdx = 0; + if (ConstantInt *CI = dyn_cast(GEP->getOperand(2))) + StartIdx = CI->getZExtValue(); + else + return false; + return GetConstantStringInfo(GEP->getOperand(0), Str, StartIdx+Offset, + StopAtNul); + } + + // The GEP instruction, constant or instruction, must reference a global + // variable that is a constant and is initialized. The referenced constant + // initializer is the array that we'll use for optimization. + GlobalVariable* GV = dyn_cast(V); + if (!GV || !GV->isConstant() || !GV->hasInitializer()) + return false; + Constant *GlobalInit = GV->getInitializer(); + + // Handle the ConstantAggregateZero case + if (isa(GlobalInit)) { + // This is a degenerate case. The initializer is constant zero so the + // length of the string must be zero. + Str.clear(); + return true; + } + + // Must be a Constant Array + ConstantArray *Array = dyn_cast(GlobalInit); + if (Array == 0 || Array->getType()->getElementType() != Type::Int8Ty) + return false; + + // Get the number of elements in the array + uint64_t NumElts = Array->getType()->getNumElements(); + + if (Offset > NumElts) + return false; + + // Traverse the constant array from 'Offset' which is the place the GEP refers + // to in the array. + Str.reserve(NumElts-Offset); + for (unsigned i = Offset; i != NumElts; ++i) { + Constant *Elt = Array->getOperand(i); + ConstantInt *CI = dyn_cast(Elt); + if (!CI) // This array isn't suitable, non-int initializer. + return false; + if (StopAtNul && CI->isZero()) + return true; // we found end of string, success! + Str += (char)CI->getZExtValue(); + } + + // The array isn't null terminated, but maybe this is a memcpy, not a strcpy. + return true; +} diff --git a/lib/Archive/Archive.cpp b/lib/Archive/Archive.cpp new file mode 100644 index 000000000000..c6c89d27dbb0 --- /dev/null +++ b/lib/Archive/Archive.cpp @@ -0,0 +1,266 @@ +//===-- Archive.cpp - Generic LLVM archive functions ------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the implementation of the Archive and ArchiveMember +// classes that is common to both reading and writing archives.. +// +//===----------------------------------------------------------------------===// + +#include "ArchiveInternals.h" +#include "llvm/Bitcode/ReaderWriter.h" +#include "llvm/ModuleProvider.h" +#include "llvm/Module.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/System/Process.h" +#include +#include +using namespace llvm; + +// getMemberSize - compute the actual physical size of the file member as seen +// on disk. This isn't the size of member's payload. Use getSize() for that. +unsigned +ArchiveMember::getMemberSize() const { + // Basically its the file size plus the header size + unsigned result = info.fileSize + sizeof(ArchiveMemberHeader); + + // If it has a long filename, include the name length + if (hasLongFilename()) + result += path.toString().length() + 1; + + // If its now odd lengthed, include the padding byte + if (result % 2 != 0 ) + result++; + + return result; +} + +// This default constructor is only use by the ilist when it creates its +// sentry node. We give it specific static values to make it stand out a bit. +ArchiveMember::ArchiveMember() + : parent(0), path("--invalid--"), flags(0), data(0) +{ + info.user = sys::Process::GetCurrentUserId(); + info.group = sys::Process::GetCurrentGroupId(); + info.mode = 0777; + info.fileSize = 0; + info.modTime = sys::TimeValue::now(); +} + +// This is the constructor that the Archive class uses when it is building or +// reading an archive. It just defaults a few things and ensures the parent is +// set for the iplist. The Archive class fills in the ArchiveMember's data. +// This is required because correctly setting the data may depend on other +// things in the Archive. +ArchiveMember::ArchiveMember(Archive* PAR) + : parent(PAR), path(), flags(0), data(0) +{ +} + +// This method allows an ArchiveMember to be replaced with the data for a +// different file, presumably as an update to the member. It also makes sure +// the flags are reset correctly. +bool ArchiveMember::replaceWith(const sys::Path& newFile, std::string* ErrMsg) { + if (!newFile.exists()) { + if (ErrMsg) + *ErrMsg = "Can not replace an archive member with a non-existent file"; + return true; + } + + data = 0; + path = newFile; + + // SVR4 symbol tables have an empty name + if (path.toString() == ARFILE_SVR4_SYMTAB_NAME) + flags |= SVR4SymbolTableFlag; + else + flags &= ~SVR4SymbolTableFlag; + + // BSD4.4 symbol tables have a special name + if (path.toString() == ARFILE_BSD4_SYMTAB_NAME) + flags |= BSD4SymbolTableFlag; + else + flags &= ~BSD4SymbolTableFlag; + + // LLVM symbol tables have a very specific name + if (path.toString() == ARFILE_LLVM_SYMTAB_NAME) + flags |= LLVMSymbolTableFlag; + else + flags &= ~LLVMSymbolTableFlag; + + // String table name + if (path.toString() == ARFILE_STRTAB_NAME) + flags |= StringTableFlag; + else + flags &= ~StringTableFlag; + + // If it has a slash then it has a path + bool hasSlash = path.toString().find('/') != std::string::npos; + if (hasSlash) + flags |= HasPathFlag; + else + flags &= ~HasPathFlag; + + // If it has a slash or its over 15 chars then its a long filename format + if (hasSlash || path.toString().length() > 15) + flags |= HasLongFilenameFlag; + else + flags &= ~HasLongFilenameFlag; + + // Get the signature and status info + const char* signature = (const char*) data; + std::string magic; + if (!signature) { + path.getMagicNumber(magic,4); + signature = magic.c_str(); + std::string err; + const sys::FileStatus *FSinfo = path.getFileStatus(false, ErrMsg); + if (FSinfo) + info = *FSinfo; + else + return true; + } + + // Determine what kind of file it is + switch (sys::IdentifyFileType(signature,4)) { + default: + flags &= ~BitcodeFlag; + break; + } + return false; +} + +// Archive constructor - this is the only constructor that gets used for the +// Archive class. Everything else (default,copy) is deprecated. This just +// initializes and maps the file into memory, if requested. +Archive::Archive(const sys::Path& filename) + : archPath(filename), members(), mapfile(0), base(0), symTab(), strtab(), + symTabSize(0), firstFileOffset(0), modules(), foreignST(0) { +} + +bool +Archive::mapToMemory(std::string* ErrMsg) { + mapfile = MemoryBuffer::getFile(archPath.c_str(), ErrMsg); + if (mapfile == 0) + return true; + base = mapfile->getBufferStart(); + return false; +} + +void Archive::cleanUpMemory() { + // Shutdown the file mapping + delete mapfile; + mapfile = 0; + base = 0; + + // Forget the entire symbol table + symTab.clear(); + symTabSize = 0; + + firstFileOffset = 0; + + // Free the foreign symbol table member + if (foreignST) { + delete foreignST; + foreignST = 0; + } + + // Delete any ModuleProviders and ArchiveMember's we've allocated as a result + // of symbol table searches. + for (ModuleMap::iterator I=modules.begin(), E=modules.end(); I != E; ++I ) { + delete I->second.first; + delete I->second.second; + } +} + +// Archive destructor - just clean up memory +Archive::~Archive() { + cleanUpMemory(); +} + + + +static void getSymbols(Module*M, std::vector& symbols) { + // Loop over global variables + for (Module::global_iterator GI = M->global_begin(), GE=M->global_end(); GI != GE; ++GI) + if (!GI->isDeclaration() && !GI->hasLocalLinkage()) + if (!GI->getName().empty()) + symbols.push_back(GI->getName()); + + // Loop over functions + for (Module::iterator FI = M->begin(), FE = M->end(); FI != FE; ++FI) + if (!FI->isDeclaration() && !FI->hasLocalLinkage()) + if (!FI->getName().empty()) + symbols.push_back(FI->getName()); + + // Loop over aliases + for (Module::alias_iterator AI = M->alias_begin(), AE = M->alias_end(); + AI != AE; ++AI) { + if (AI->hasName()) + symbols.push_back(AI->getName()); + } +} + +// Get just the externally visible defined symbols from the bitcode +bool llvm::GetBitcodeSymbols(const sys::Path& fName, + std::vector& symbols, + std::string* ErrMsg) { + std::auto_ptr Buffer( + MemoryBuffer::getFileOrSTDIN(fName.c_str())); + if (!Buffer.get()) { + if (ErrMsg) *ErrMsg = "Could not open file '" + fName.toString() + "'"; + return true; + } + + ModuleProvider *MP = getBitcodeModuleProvider(Buffer.get(), ErrMsg); + if (!MP) + return true; + + // Get the module from the provider + Module* M = MP->materializeModule(); + if (M == 0) { + delete MP; + return true; + } + + // Get the symbols + getSymbols(M, symbols); + + // Done with the module. + delete MP; + return true; +} + +ModuleProvider* +llvm::GetBitcodeSymbols(const unsigned char *BufPtr, unsigned Length, + const std::string& ModuleID, + std::vector& symbols, + std::string* ErrMsg) { + // Get the module provider + MemoryBuffer *Buffer =MemoryBuffer::getNewMemBuffer(Length, ModuleID.c_str()); + memcpy((char*)Buffer->getBufferStart(), BufPtr, Length); + + ModuleProvider *MP = getBitcodeModuleProvider(Buffer, ErrMsg); + if (!MP) + return 0; + + // Get the module from the provider + Module* M = MP->materializeModule(); + if (M == 0) { + delete MP; + return 0; + } + + // Get the symbols + getSymbols(M, symbols); + + // Done with the module. Note that ModuleProvider will delete the + // Module when it is deleted. Also note that its the caller's responsibility + // to delete the ModuleProvider. + return MP; +} diff --git a/lib/Archive/ArchiveInternals.h b/lib/Archive/ArchiveInternals.h new file mode 100644 index 000000000000..7ba30244a213 --- /dev/null +++ b/lib/Archive/ArchiveInternals.h @@ -0,0 +1,85 @@ +//===-- lib/Archive/ArchiveInternals.h -------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Internal implementation header for LLVM Archive files. +// +//===----------------------------------------------------------------------===// + +#ifndef LIB_ARCHIVE_ARCHIVEINTERNALS_H +#define LIB_ARCHIVE_ARCHIVEINTERNALS_H + +#include "llvm/Bitcode/Archive.h" +#include "llvm/System/TimeValue.h" +#include "llvm/ADT/StringExtras.h" + +#include + +#define ARFILE_MAGIC "!\n" ///< magic string +#define ARFILE_MAGIC_LEN (sizeof(ARFILE_MAGIC)-1) ///< length of magic string +#define ARFILE_SVR4_SYMTAB_NAME "/ " ///< SVR4 symtab entry name +#define ARFILE_LLVM_SYMTAB_NAME "#_LLVM_SYM_TAB_#" ///< LLVM symtab entry name +#define ARFILE_BSD4_SYMTAB_NAME "__.SYMDEF SORTED" ///< BSD4 symtab entry name +#define ARFILE_STRTAB_NAME "// " ///< Name of string table +#define ARFILE_PAD "\n" ///< inter-file align padding +#define ARFILE_MEMBER_MAGIC "`\n" ///< fmag field magic # + +namespace llvm { + + /// The ArchiveMemberHeader structure is used internally for bitcode + /// archives. + /// The header precedes each file member in the archive. This structure is + /// defined using character arrays for direct and correct interpretation + /// regardless of the endianess of the machine that produced it. + /// @brief Archive File Member Header + class ArchiveMemberHeader { + /// @name Data + /// @{ + public: + char name[16]; ///< Name of the file member. + char date[12]; ///< File date, decimal seconds since Epoch + char uid[6]; ///< user id in ASCII decimal + char gid[6]; ///< group id in ASCII decimal + char mode[8]; ///< file mode in ASCII octal + char size[10]; ///< file size in ASCII decimal + char fmag[2]; ///< Always contains ARFILE_MAGIC_TERMINATOR + + /// @} + /// @name Methods + /// @{ + public: + void init() { + memset(name,' ',16); + memset(date,' ',12); + memset(uid,' ',6); + memset(gid,' ',6); + memset(mode,' ',8); + memset(size,' ',10); + fmag[0] = '`'; + fmag[1] = '\n'; + } + + bool checkSignature() { + return 0 == memcmp(fmag, ARFILE_MEMBER_MAGIC,2); + } + }; + + // Get just the externally visible defined symbols from the bitcode + bool GetBitcodeSymbols(const sys::Path& fName, + std::vector& symbols, + std::string* ErrMsg); + + ModuleProvider* GetBitcodeSymbols(const unsigned char*Buffer,unsigned Length, + const std::string& ModuleID, + std::vector& symbols, + std::string* ErrMsg); +} + +#endif + +// vim: sw=2 ai diff --git a/lib/Archive/ArchiveReader.cpp b/lib/Archive/ArchiveReader.cpp new file mode 100644 index 000000000000..b07e884b6547 --- /dev/null +++ b/lib/Archive/ArchiveReader.cpp @@ -0,0 +1,627 @@ +//===-- ArchiveReader.cpp - Read LLVM archive files -------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Builds up standard unix archive files (.a) containing LLVM bitcode. +// +//===----------------------------------------------------------------------===// + +#include "ArchiveInternals.h" +#include "llvm/Bitcode/ReaderWriter.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Module.h" +#include +#include +using namespace llvm; + +/// Read a variable-bit-rate encoded unsigned integer +static inline unsigned readInteger(const char*&At, const char*End) { + unsigned Shift = 0; + unsigned Result = 0; + + do { + if (At == End) + return Result; + Result |= (unsigned)((*At++) & 0x7F) << Shift; + Shift += 7; + } while (At[-1] & 0x80); + return Result; +} + +// Completely parse the Archive's symbol table and populate symTab member var. +bool +Archive::parseSymbolTable(const void* data, unsigned size, std::string* error) { + const char* At = (const char*) data; + const char* End = At + size; + while (At < End) { + unsigned offset = readInteger(At, End); + if (At == End) { + if (error) + *error = "Ran out of data reading vbr_uint for symtab offset!"; + return false; + } + unsigned length = readInteger(At, End); + if (At == End) { + if (error) + *error = "Ran out of data reading vbr_uint for symtab length!"; + return false; + } + if (At + length > End) { + if (error) + *error = "Malformed symbol table: length not consistent with size"; + return false; + } + // we don't care if it can't be inserted (duplicate entry) + symTab.insert(std::make_pair(std::string(At, length), offset)); + At += length; + } + symTabSize = size; + return true; +} + +// This member parses an ArchiveMemberHeader that is presumed to be pointed to +// by At. The At pointer is updated to the byte just after the header, which +// can be variable in size. +ArchiveMember* +Archive::parseMemberHeader(const char*& At, const char* End, std::string* error) +{ + if (At + sizeof(ArchiveMemberHeader) >= End) { + if (error) + *error = "Unexpected end of file"; + return 0; + } + + // Cast archive member header + ArchiveMemberHeader* Hdr = (ArchiveMemberHeader*)At; + At += sizeof(ArchiveMemberHeader); + + // Extract the size and determine if the file is + // compressed or not (negative length). + int flags = 0; + int MemberSize = atoi(Hdr->size); + if (MemberSize < 0) { + flags |= ArchiveMember::CompressedFlag; + MemberSize = -MemberSize; + } + + // Check the size of the member for sanity + if (At + MemberSize > End) { + if (error) + *error = "invalid member length in archive file"; + return 0; + } + + // Check the member signature + if (!Hdr->checkSignature()) { + if (error) + *error = "invalid file member signature"; + return 0; + } + + // Convert and check the member name + // The empty name ( '/' and 15 blanks) is for a foreign (non-LLVM) symbol + // table. The special name "//" and 14 blanks is for a string table, used + // for long file names. This library doesn't generate either of those but + // it will accept them. If the name starts with #1/ and the remainder is + // digits, then those digits specify the length of the name that is + // stored immediately following the header. The special name + // __LLVM_SYM_TAB__ identifies the symbol table for LLVM bitcode. + // Anything else is a regular, short filename that is terminated with + // a '/' and blanks. + + std::string pathname; + switch (Hdr->name[0]) { + case '#': + if (Hdr->name[1] == '1' && Hdr->name[2] == '/') { + if (isdigit(Hdr->name[3])) { + unsigned len = atoi(&Hdr->name[3]); + pathname.assign(At, len); + At += len; + MemberSize -= len; + flags |= ArchiveMember::HasLongFilenameFlag; + } else { + if (error) + *error = "invalid long filename"; + return 0; + } + } else if (Hdr->name[1] == '_' && + (0 == memcmp(Hdr->name, ARFILE_LLVM_SYMTAB_NAME, 16))) { + // The member is using a long file name (>15 chars) format. + // This format is standard for 4.4BSD and Mac OSX operating + // systems. LLVM uses it similarly. In this format, the + // remainder of the name field (after #1/) specifies the + // length of the file name which occupy the first bytes of + // the member's data. The pathname already has the #1/ stripped. + pathname.assign(ARFILE_LLVM_SYMTAB_NAME); + flags |= ArchiveMember::LLVMSymbolTableFlag; + } + break; + case '/': + if (Hdr->name[1]== '/') { + if (0 == memcmp(Hdr->name, ARFILE_STRTAB_NAME, 16)) { + pathname.assign(ARFILE_STRTAB_NAME); + flags |= ArchiveMember::StringTableFlag; + } else { + if (error) + *error = "invalid string table name"; + return 0; + } + } else if (Hdr->name[1] == ' ') { + if (0 == memcmp(Hdr->name, ARFILE_SVR4_SYMTAB_NAME, 16)) { + pathname.assign(ARFILE_SVR4_SYMTAB_NAME); + flags |= ArchiveMember::SVR4SymbolTableFlag; + } else { + if (error) + *error = "invalid SVR4 symbol table name"; + return 0; + } + } else if (isdigit(Hdr->name[1])) { + unsigned index = atoi(&Hdr->name[1]); + if (index < strtab.length()) { + const char* namep = strtab.c_str() + index; + const char* endp = strtab.c_str() + strtab.length(); + const char* p = namep; + const char* last_p = p; + while (p < endp) { + if (*p == '\n' && *last_p == '/') { + pathname.assign(namep, last_p - namep); + flags |= ArchiveMember::HasLongFilenameFlag; + break; + } + last_p = p; + p++; + } + if (p >= endp) { + if (error) + *error = "missing name termiantor in string table"; + return 0; + } + } else { + if (error) + *error = "name index beyond string table"; + return 0; + } + } + break; + case '_': + if (Hdr->name[1] == '_' && + (0 == memcmp(Hdr->name, ARFILE_BSD4_SYMTAB_NAME, 16))) { + pathname.assign(ARFILE_BSD4_SYMTAB_NAME); + flags |= ArchiveMember::BSD4SymbolTableFlag; + break; + } + /* FALL THROUGH */ + + default: + char* slash = (char*) memchr(Hdr->name, '/', 16); + if (slash == 0) + slash = Hdr->name + 16; + pathname.assign(Hdr->name, slash - Hdr->name); + break; + } + + // Determine if this is a bitcode file + switch (sys::IdentifyFileType(At, 4)) { + case sys::Bitcode_FileType: + flags |= ArchiveMember::BitcodeFlag; + break; + default: + flags &= ~ArchiveMember::BitcodeFlag; + break; + } + + // Instantiate the ArchiveMember to be filled + ArchiveMember* member = new ArchiveMember(this); + + // Fill in fields of the ArchiveMember + member->parent = this; + member->path.set(pathname); + member->info.fileSize = MemberSize; + member->info.modTime.fromEpochTime(atoi(Hdr->date)); + unsigned int mode; + sscanf(Hdr->mode, "%o", &mode); + member->info.mode = mode; + member->info.user = atoi(Hdr->uid); + member->info.group = atoi(Hdr->gid); + member->flags = flags; + member->data = At; + + return member; +} + +bool +Archive::checkSignature(std::string* error) { + // Check the magic string at file's header + if (mapfile->getBufferSize() < 8 || memcmp(base, ARFILE_MAGIC, 8)) { + if (error) + *error = "invalid signature for an archive file"; + return false; + } + return true; +} + +// This function loads the entire archive and fully populates its ilist with +// the members of the archive file. This is typically used in preparation for +// editing the contents of the archive. +bool +Archive::loadArchive(std::string* error) { + + // Set up parsing + members.clear(); + symTab.clear(); + const char *At = base; + const char *End = mapfile->getBufferEnd(); + + if (!checkSignature(error)) + return false; + + At += 8; // Skip the magic string. + + bool seenSymbolTable = false; + bool foundFirstFile = false; + while (At < End) { + // parse the member header + const char* Save = At; + ArchiveMember* mbr = parseMemberHeader(At, End, error); + if (!mbr) + return false; + + // check if this is the foreign symbol table + if (mbr->isSVR4SymbolTable() || mbr->isBSD4SymbolTable()) { + // We just save this but don't do anything special + // with it. It doesn't count as the "first file". + if (foreignST) { + // What? Multiple foreign symbol tables? Just chuck it + // and retain the last one found. + delete foreignST; + } + foreignST = mbr; + At += mbr->getSize(); + if ((intptr_t(At) & 1) == 1) + At++; + } else if (mbr->isStringTable()) { + // Simply suck the entire string table into a string + // variable. This will be used to get the names of the + // members that use the "/ddd" format for their names + // (SVR4 style long names). + strtab.assign(At, mbr->getSize()); + At += mbr->getSize(); + if ((intptr_t(At) & 1) == 1) + At++; + delete mbr; + } else if (mbr->isLLVMSymbolTable()) { + // This is the LLVM symbol table for the archive. If we've seen it + // already, its an error. Otherwise, parse the symbol table and move on. + if (seenSymbolTable) { + if (error) + *error = "invalid archive: multiple symbol tables"; + return false; + } + if (!parseSymbolTable(mbr->getData(), mbr->getSize(), error)) + return false; + seenSymbolTable = true; + At += mbr->getSize(); + if ((intptr_t(At) & 1) == 1) + At++; + delete mbr; // We don't need this member in the list of members. + } else { + // This is just a regular file. If its the first one, save its offset. + // Otherwise just push it on the list and move on to the next file. + if (!foundFirstFile) { + firstFileOffset = Save - base; + foundFirstFile = true; + } + members.push_back(mbr); + At += mbr->getSize(); + if ((intptr_t(At) & 1) == 1) + At++; + } + } + return true; +} + +// Open and completely load the archive file. +Archive* +Archive::OpenAndLoad(const sys::Path& file, std::string* ErrorMessage) +{ + std::auto_ptr result ( new Archive(file)); + if (result->mapToMemory(ErrorMessage)) + return 0; + if (!result->loadArchive(ErrorMessage)) + return 0; + return result.release(); +} + +// Get all the bitcode modules from the archive +bool +Archive::getAllModules(std::vector& Modules, std::string* ErrMessage) { + + for (iterator I=begin(), E=end(); I != E; ++I) { + if (I->isBitcode()) { + std::string FullMemberName = archPath.toString() + + "(" + I->getPath().toString() + ")"; + MemoryBuffer *Buffer = + MemoryBuffer::getNewMemBuffer(I->getSize(), FullMemberName.c_str()); + memcpy((char*)Buffer->getBufferStart(), I->getData(), I->getSize()); + + Module *M = ParseBitcodeFile(Buffer, ErrMessage); + delete Buffer; + if (!M) + return true; + + Modules.push_back(M); + } + } + return false; +} + +// Load just the symbol table from the archive file +bool +Archive::loadSymbolTable(std::string* ErrorMsg) { + + // Set up parsing + members.clear(); + symTab.clear(); + const char *At = base; + const char *End = mapfile->getBufferEnd(); + + // Make sure we're dealing with an archive + if (!checkSignature(ErrorMsg)) + return false; + + At += 8; // Skip signature + + // Parse the first file member header + const char* FirstFile = At; + ArchiveMember* mbr = parseMemberHeader(At, End, ErrorMsg); + if (!mbr) + return false; + + if (mbr->isSVR4SymbolTable() || mbr->isBSD4SymbolTable()) { + // Skip the foreign symbol table, we don't do anything with it + At += mbr->getSize(); + if ((intptr_t(At) & 1) == 1) + At++; + delete mbr; + + // Read the next one + FirstFile = At; + mbr = parseMemberHeader(At, End, ErrorMsg); + if (!mbr) { + delete mbr; + return false; + } + } + + if (mbr->isStringTable()) { + // Process the string table entry + strtab.assign((const char*)mbr->getData(), mbr->getSize()); + At += mbr->getSize(); + if ((intptr_t(At) & 1) == 1) + At++; + delete mbr; + // Get the next one + FirstFile = At; + mbr = parseMemberHeader(At, End, ErrorMsg); + if (!mbr) { + delete mbr; + return false; + } + } + + // See if its the symbol table + if (mbr->isLLVMSymbolTable()) { + if (!parseSymbolTable(mbr->getData(), mbr->getSize(), ErrorMsg)) { + delete mbr; + return false; + } + + At += mbr->getSize(); + if ((intptr_t(At) & 1) == 1) + At++; + delete mbr; + // Can't be any more symtab headers so just advance + FirstFile = At; + } else { + // There's no symbol table in the file. We have to rebuild it from scratch + // because the intent of this method is to get the symbol table loaded so + // it can be searched efficiently. + // Add the member to the members list + members.push_back(mbr); + } + + firstFileOffset = FirstFile - base; + return true; +} + +// Open the archive and load just the symbol tables +Archive* +Archive::OpenAndLoadSymbols(const sys::Path& file, std::string* ErrorMessage) { + std::auto_ptr result ( new Archive(file) ); + if (result->mapToMemory(ErrorMessage)) + return 0; + if (!result->loadSymbolTable(ErrorMessage)) + return 0; + return result.release(); +} + +// Look up one symbol in the symbol table and return a ModuleProvider for the +// module that defines that symbol. +ModuleProvider* +Archive::findModuleDefiningSymbol(const std::string& symbol, + std::string* ErrMsg) { + SymTabType::iterator SI = symTab.find(symbol); + if (SI == symTab.end()) + return 0; + + // The symbol table was previously constructed assuming that the members were + // written without the symbol table header. Because VBR encoding is used, the + // values could not be adjusted to account for the offset of the symbol table + // because that could affect the size of the symbol table due to VBR encoding. + // We now have to account for this by adjusting the offset by the size of the + // symbol table and its header. + unsigned fileOffset = + SI->second + // offset in symbol-table-less file + firstFileOffset; // add offset to first "real" file in archive + + // See if the module is already loaded + ModuleMap::iterator MI = modules.find(fileOffset); + if (MI != modules.end()) + return MI->second.first; + + // Module hasn't been loaded yet, we need to load it + const char* modptr = base + fileOffset; + ArchiveMember* mbr = parseMemberHeader(modptr, mapfile->getBufferEnd(), + ErrMsg); + if (!mbr) + return 0; + + // Now, load the bitcode module to get the ModuleProvider + std::string FullMemberName = archPath.toString() + "(" + + mbr->getPath().toString() + ")"; + MemoryBuffer *Buffer =MemoryBuffer::getNewMemBuffer(mbr->getSize(), + FullMemberName.c_str()); + memcpy((char*)Buffer->getBufferStart(), mbr->getData(), mbr->getSize()); + + ModuleProvider *mp = getBitcodeModuleProvider(Buffer, ErrMsg); + if (!mp) + return 0; + + modules.insert(std::make_pair(fileOffset, std::make_pair(mp, mbr))); + + return mp; +} + +// Look up multiple symbols in the symbol table and return a set of +// ModuleProviders that define those symbols. +bool +Archive::findModulesDefiningSymbols(std::set& symbols, + std::set& result, + std::string* error) { + if (!mapfile || !base) { + if (error) + *error = "Empty archive invalid for finding modules defining symbols"; + return false; + } + + if (symTab.empty()) { + // We don't have a symbol table, so we must build it now but lets also + // make sure that we populate the modules table as we do this to ensure + // that we don't load them twice when findModuleDefiningSymbol is called + // below. + + // Get a pointer to the first file + const char* At = base + firstFileOffset; + const char* End = mapfile->getBufferEnd(); + + while ( At < End) { + // Compute the offset to be put in the symbol table + unsigned offset = At - base - firstFileOffset; + + // Parse the file's header + ArchiveMember* mbr = parseMemberHeader(At, End, error); + if (!mbr) + return false; + + // If it contains symbols + if (mbr->isBitcode()) { + // Get the symbols + std::vector symbols; + std::string FullMemberName = archPath.toString() + "(" + + mbr->getPath().toString() + ")"; + ModuleProvider* MP = + GetBitcodeSymbols((const unsigned char*)At, mbr->getSize(), + FullMemberName, symbols, error); + + if (MP) { + // Insert the module's symbols into the symbol table + for (std::vector::iterator I = symbols.begin(), + E=symbols.end(); I != E; ++I ) { + symTab.insert(std::make_pair(*I, offset)); + } + // Insert the ModuleProvider and the ArchiveMember into the table of + // modules. + modules.insert(std::make_pair(offset, std::make_pair(MP, mbr))); + } else { + if (error) + *error = "Can't parse bitcode member: " + + mbr->getPath().toString() + ": " + *error; + delete mbr; + return false; + } + } + + // Go to the next file location + At += mbr->getSize(); + if ((intptr_t(At) & 1) == 1) + At++; + } + } + + // At this point we have a valid symbol table (one way or another) so we + // just use it to quickly find the symbols requested. + + for (std::set::iterator I=symbols.begin(), + E=symbols.end(); I != E;) { + // See if this symbol exists + ModuleProvider* mp = findModuleDefiningSymbol(*I,error); + if (mp) { + // The symbol exists, insert the ModuleProvider into our result, + // duplicates wil be ignored + result.insert(mp); + + // Remove the symbol now that its been resolved, being careful to + // post-increment the iterator. + symbols.erase(I++); + } else { + ++I; + } + } + return true; +} + +bool Archive::isBitcodeArchive() { + // Make sure the symTab has been loaded. In most cases this should have been + // done when the archive was constructed, but still, this is just in case. + if (symTab.empty()) + if (!loadSymbolTable(0)) + return false; + + // Now that we know it's been loaded, return true + // if it has a size + if (symTab.size()) return true; + + // We still can't be sure it isn't a bitcode archive + if (!loadArchive(0)) + return false; + + std::vector Modules; + std::string ErrorMessage; + + // Scan the archive, trying to load a bitcode member. We only load one to + // see if this works. + for (iterator I = begin(), E = end(); I != E; ++I) { + if (!I->isBitcode()) + continue; + + std::string FullMemberName = + archPath.toString() + "(" + I->getPath().toString() + ")"; + + MemoryBuffer *Buffer = + MemoryBuffer::getNewMemBuffer(I->getSize(), FullMemberName.c_str()); + memcpy((char*)Buffer->getBufferStart(), I->getData(), I->getSize()); + Module *M = ParseBitcodeFile(Buffer); + delete Buffer; + if (!M) + return false; // Couldn't parse bitcode, not a bitcode archive. + delete M; + return true; + } + + return false; +} diff --git a/lib/Archive/ArchiveWriter.cpp b/lib/Archive/ArchiveWriter.cpp new file mode 100644 index 000000000000..336a2bdc6586 --- /dev/null +++ b/lib/Archive/ArchiveWriter.cpp @@ -0,0 +1,482 @@ +//===-- ArchiveWriter.cpp - Write LLVM archive files ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Builds up an LLVM archive file (.a) containing LLVM bitcode. +// +//===----------------------------------------------------------------------===// + +#include "ArchiveInternals.h" +#include "llvm/Bitcode/ReaderWriter.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/System/Signals.h" +#include "llvm/System/Process.h" +#include "llvm/ModuleProvider.h" +#include +#include +#include +using namespace llvm; + +// Write an integer using variable bit rate encoding. This saves a few bytes +// per entry in the symbol table. +static inline void writeInteger(unsigned num, std::ofstream& ARFile) { + while (1) { + if (num < 0x80) { // done? + ARFile << (unsigned char)num; + return; + } + + // Nope, we are bigger than a character, output the next 7 bits and set the + // high bit to say that there is more coming... + ARFile << (unsigned char)(0x80 | ((unsigned char)num & 0x7F)); + num >>= 7; // Shift out 7 bits now... + } +} + +// Compute how many bytes are taken by a given VBR encoded value. This is needed +// to pre-compute the size of the symbol table. +static inline unsigned numVbrBytes(unsigned num) { + + // Note that the following nested ifs are somewhat equivalent to a binary + // search. We split it in half by comparing against 2^14 first. This allows + // most reasonable values to be done in 2 comparisons instead of 1 for + // small ones and four for large ones. We expect this to access file offsets + // in the 2^10 to 2^24 range and symbol lengths in the 2^0 to 2^8 range, + // so this approach is reasonable. + if (num < 1<<14) { + if (num < 1<<7) + return 1; + else + return 2; + } + if (num < 1<<21) + return 3; + + if (num < 1<<28) + return 4; + return 5; // anything >= 2^28 takes 5 bytes +} + +// Create an empty archive. +Archive* +Archive::CreateEmpty(const sys::Path& FilePath ) { + Archive* result = new Archive(FilePath); + return result; +} + +// Fill the ArchiveMemberHeader with the information from a member. If +// TruncateNames is true, names are flattened to 15 chars or less. The sz field +// is provided here instead of coming from the mbr because the member might be +// stored compressed and the compressed size is not the ArchiveMember's size. +// Furthermore compressed files have negative size fields to identify them as +// compressed. +bool +Archive::fillHeader(const ArchiveMember &mbr, ArchiveMemberHeader& hdr, + int sz, bool TruncateNames) const { + + // Set the permissions mode, uid and gid + hdr.init(); + char buffer[32]; + sprintf(buffer, "%-8o", mbr.getMode()); + memcpy(hdr.mode,buffer,8); + sprintf(buffer, "%-6u", mbr.getUser()); + memcpy(hdr.uid,buffer,6); + sprintf(buffer, "%-6u", mbr.getGroup()); + memcpy(hdr.gid,buffer,6); + + // Set the last modification date + uint64_t secondsSinceEpoch = mbr.getModTime().toEpochTime(); + sprintf(buffer,"%-12u", unsigned(secondsSinceEpoch)); + memcpy(hdr.date,buffer,12); + + // Get rid of trailing blanks in the name + std::string mbrPath = mbr.getPath().toString(); + size_t mbrLen = mbrPath.length(); + while (mbrLen > 0 && mbrPath[mbrLen-1] == ' ') { + mbrPath.erase(mbrLen-1,1); + mbrLen--; + } + + // Set the name field in one of its various flavors. + bool writeLongName = false; + if (mbr.isStringTable()) { + memcpy(hdr.name,ARFILE_STRTAB_NAME,16); + } else if (mbr.isSVR4SymbolTable()) { + memcpy(hdr.name,ARFILE_SVR4_SYMTAB_NAME,16); + } else if (mbr.isBSD4SymbolTable()) { + memcpy(hdr.name,ARFILE_BSD4_SYMTAB_NAME,16); + } else if (mbr.isLLVMSymbolTable()) { + memcpy(hdr.name,ARFILE_LLVM_SYMTAB_NAME,16); + } else if (TruncateNames) { + const char* nm = mbrPath.c_str(); + unsigned len = mbrPath.length(); + size_t slashpos = mbrPath.rfind('/'); + if (slashpos != std::string::npos) { + nm += slashpos + 1; + len -= slashpos +1; + } + if (len > 15) + len = 15; + memcpy(hdr.name,nm,len); + hdr.name[len] = '/'; + } else if (mbrPath.length() < 16 && mbrPath.find('/') == std::string::npos) { + memcpy(hdr.name,mbrPath.c_str(),mbrPath.length()); + hdr.name[mbrPath.length()] = '/'; + } else { + std::string nm = "#1/"; + nm += utostr(mbrPath.length()); + memcpy(hdr.name,nm.data(),nm.length()); + if (sz < 0) + sz -= mbrPath.length(); + else + sz += mbrPath.length(); + writeLongName = true; + } + + // Set the size field + if (sz < 0) { + buffer[0] = '-'; + sprintf(&buffer[1],"%-9u",(unsigned)-sz); + } else { + sprintf(buffer, "%-10u", (unsigned)sz); + } + memcpy(hdr.size,buffer,10); + + return writeLongName; +} + +// Insert a file into the archive before some other member. This also takes care +// of extracting the necessary flags and information from the file. +bool +Archive::addFileBefore(const sys::Path& filePath, iterator where, + std::string* ErrMsg) { + if (!filePath.exists()) { + if (ErrMsg) + *ErrMsg = "Can not add a non-existent file to archive"; + return true; + } + + ArchiveMember* mbr = new ArchiveMember(this); + + mbr->data = 0; + mbr->path = filePath; + const sys::FileStatus *FSInfo = mbr->path.getFileStatus(false, ErrMsg); + if (FSInfo) + mbr->info = *FSInfo; + else + return true; + + unsigned flags = 0; + bool hasSlash = filePath.toString().find('/') != std::string::npos; + if (hasSlash) + flags |= ArchiveMember::HasPathFlag; + if (hasSlash || filePath.toString().length() > 15) + flags |= ArchiveMember::HasLongFilenameFlag; + std::string magic; + mbr->path.getMagicNumber(magic,4); + switch (sys::IdentifyFileType(magic.c_str(),4)) { + case sys::Bitcode_FileType: + flags |= ArchiveMember::BitcodeFlag; + break; + default: + break; + } + mbr->flags = flags; + members.insert(where,mbr); + return false; +} + +// Write one member out to the file. +bool +Archive::writeMember( + const ArchiveMember& member, + std::ofstream& ARFile, + bool CreateSymbolTable, + bool TruncateNames, + bool ShouldCompress, + std::string* ErrMsg +) { + + unsigned filepos = ARFile.tellp(); + filepos -= 8; + + // Get the data and its size either from the + // member's in-memory data or directly from the file. + size_t fSize = member.getSize(); + const char *data = (const char*)member.getData(); + MemoryBuffer *mFile = 0; + if (!data) { + mFile = MemoryBuffer::getFile(member.getPath().c_str(), ErrMsg); + if (mFile == 0) + return true; + data = mFile->getBufferStart(); + fSize = mFile->getBufferSize(); + } + + // Now that we have the data in memory, update the + // symbol table if its a bitcode file. + if (CreateSymbolTable && member.isBitcode()) { + std::vector symbols; + std::string FullMemberName = archPath.toString() + "(" + + member.getPath().toString() + + ")"; + ModuleProvider* MP = + GetBitcodeSymbols((const unsigned char*)data,fSize, + FullMemberName, symbols, ErrMsg); + + // If the bitcode parsed successfully + if ( MP ) { + for (std::vector::iterator SI = symbols.begin(), + SE = symbols.end(); SI != SE; ++SI) { + + std::pair Res = + symTab.insert(std::make_pair(*SI,filepos)); + + if (Res.second) { + symTabSize += SI->length() + + numVbrBytes(SI->length()) + + numVbrBytes(filepos); + } + } + // We don't need this module any more. + delete MP; + } else { + delete mFile; + if (ErrMsg) + *ErrMsg = "Can't parse bitcode member: " + member.getPath().toString() + + ": " + *ErrMsg; + return true; + } + } + + int hdrSize = fSize; + + // Compute the fields of the header + ArchiveMemberHeader Hdr; + bool writeLongName = fillHeader(member,Hdr,hdrSize,TruncateNames); + + // Write header to archive file + ARFile.write((char*)&Hdr, sizeof(Hdr)); + + // Write the long filename if its long + if (writeLongName) { + ARFile.write(member.getPath().toString().data(), + member.getPath().toString().length()); + } + + // Write the (possibly compressed) member's content to the file. + ARFile.write(data,fSize); + + // Make sure the member is an even length + if ((ARFile.tellp() & 1) == 1) + ARFile << ARFILE_PAD; + + // Close the mapped file if it was opened + delete mFile; + return false; +} + +// Write out the LLVM symbol table as an archive member to the file. +void +Archive::writeSymbolTable(std::ofstream& ARFile) { + + // Construct the symbol table's header + ArchiveMemberHeader Hdr; + Hdr.init(); + memcpy(Hdr.name,ARFILE_LLVM_SYMTAB_NAME,16); + uint64_t secondsSinceEpoch = sys::TimeValue::now().toEpochTime(); + char buffer[32]; + sprintf(buffer, "%-8o", 0644); + memcpy(Hdr.mode,buffer,8); + sprintf(buffer, "%-6u", sys::Process::GetCurrentUserId()); + memcpy(Hdr.uid,buffer,6); + sprintf(buffer, "%-6u", sys::Process::GetCurrentGroupId()); + memcpy(Hdr.gid,buffer,6); + sprintf(buffer,"%-12u", unsigned(secondsSinceEpoch)); + memcpy(Hdr.date,buffer,12); + sprintf(buffer,"%-10u",symTabSize); + memcpy(Hdr.size,buffer,10); + + // Write the header + ARFile.write((char*)&Hdr, sizeof(Hdr)); + +#ifndef NDEBUG + // Save the starting position of the symbol tables data content. + unsigned startpos = ARFile.tellp(); +#endif + + // Write out the symbols sequentially + for ( Archive::SymTabType::iterator I = symTab.begin(), E = symTab.end(); + I != E; ++I) + { + // Write out the file index + writeInteger(I->second, ARFile); + // Write out the length of the symbol + writeInteger(I->first.length(), ARFile); + // Write out the symbol + ARFile.write(I->first.data(), I->first.length()); + } + +#ifndef NDEBUG + // Now that we're done with the symbol table, get the ending file position + unsigned endpos = ARFile.tellp(); +#endif + + // Make sure that the amount we wrote is what we pre-computed. This is + // critical for file integrity purposes. + assert(endpos - startpos == symTabSize && "Invalid symTabSize computation"); + + // Make sure the symbol table is even sized + if (symTabSize % 2 != 0 ) + ARFile << ARFILE_PAD; +} + +// Write the entire archive to the file specified when the archive was created. +// This writes to a temporary file first. Options are for creating a symbol +// table, flattening the file names (no directories, 15 chars max) and +// compressing each archive member. +bool +Archive::writeToDisk(bool CreateSymbolTable, bool TruncateNames, bool Compress, + std::string* ErrMsg) +{ + // Make sure they haven't opened up the file, not loaded it, + // but are now trying to write it which would wipe out the file. + if (members.empty() && mapfile && mapfile->getBufferSize() > 8) { + if (ErrMsg) + *ErrMsg = "Can't write an archive not opened for writing"; + return true; + } + + // Create a temporary file to store the archive in + sys::Path TmpArchive = archPath; + if (TmpArchive.createTemporaryFileOnDisk(ErrMsg)) + return true; + + // Make sure the temporary gets removed if we crash + sys::RemoveFileOnSignal(TmpArchive); + + // Create archive file for output. + std::ios::openmode io_mode = std::ios::out | std::ios::trunc | + std::ios::binary; + std::ofstream ArchiveFile(TmpArchive.c_str(), io_mode); + + // Check for errors opening or creating archive file. + if (!ArchiveFile.is_open() || ArchiveFile.bad()) { + if (TmpArchive.exists()) + TmpArchive.eraseFromDisk(); + if (ErrMsg) + *ErrMsg = "Error opening archive file: " + archPath.toString(); + return true; + } + + // If we're creating a symbol table, reset it now + if (CreateSymbolTable) { + symTabSize = 0; + symTab.clear(); + } + + // Write magic string to archive. + ArchiveFile << ARFILE_MAGIC; + + // Loop over all member files, and write them out. Note that this also + // builds the symbol table, symTab. + for (MembersList::iterator I = begin(), E = end(); I != E; ++I) { + if (writeMember(*I, ArchiveFile, CreateSymbolTable, + TruncateNames, Compress, ErrMsg)) { + if (TmpArchive.exists()) + TmpArchive.eraseFromDisk(); + ArchiveFile.close(); + return true; + } + } + + // Close archive file. + ArchiveFile.close(); + + // Write the symbol table + if (CreateSymbolTable) { + // At this point we have written a file that is a legal archive but it + // doesn't have a symbol table in it. To aid in faster reading and to + // ensure compatibility with other archivers we need to put the symbol + // table first in the file. Unfortunately, this means mapping the file + // we just wrote back in and copying it to the destination file. + sys::Path FinalFilePath = archPath; + + // Map in the archive we just wrote. + { + OwningPtr arch(MemoryBuffer::getFile(TmpArchive.c_str())); + if (arch == 0) return true; + const char* base = arch->getBufferStart(); + + // Open another temporary file in order to avoid invalidating the + // mmapped data + if (FinalFilePath.createTemporaryFileOnDisk(ErrMsg)) + return true; + sys::RemoveFileOnSignal(FinalFilePath); + + std::ofstream FinalFile(FinalFilePath.c_str(), io_mode); + if (!FinalFile.is_open() || FinalFile.bad()) { + if (TmpArchive.exists()) + TmpArchive.eraseFromDisk(); + if (ErrMsg) + *ErrMsg = "Error opening archive file: " + FinalFilePath.toString(); + return true; + } + + // Write the file magic number + FinalFile << ARFILE_MAGIC; + + // If there is a foreign symbol table, put it into the file now. Most + // ar(1) implementations require the symbol table to be first but llvm-ar + // can deal with it being after a foreign symbol table. This ensures + // compatibility with other ar(1) implementations as well as allowing the + // archive to store both native .o and LLVM .bc files, both indexed. + if (foreignST) { + if (writeMember(*foreignST, FinalFile, false, false, false, ErrMsg)) { + FinalFile.close(); + if (TmpArchive.exists()) + TmpArchive.eraseFromDisk(); + return true; + } + } + + // Put out the LLVM symbol table now. + writeSymbolTable(FinalFile); + + // Copy the temporary file contents being sure to skip the file's magic + // number. + FinalFile.write(base + sizeof(ARFILE_MAGIC)-1, + arch->getBufferSize()-sizeof(ARFILE_MAGIC)+1); + + // Close up shop + FinalFile.close(); + } // free arch. + + // Move the final file over top of TmpArchive + if (FinalFilePath.renamePathOnDisk(TmpArchive, ErrMsg)) + return true; + } + + // Before we replace the actual archive, we need to forget all the + // members, since they point to data in that old archive. We need to do + // this because we cannot replace an open file on Windows. + cleanUpMemory(); + + if (TmpArchive.renamePathOnDisk(archPath, ErrMsg)) + return true; + + // Set correct read and write permissions after temporary file is moved + // to final destination path. + if (archPath.makeReadableOnDisk(ErrMsg)) + return true; + if (archPath.makeWriteableOnDisk(ErrMsg)) + return true; + + return false; +} diff --git a/lib/Archive/CMakeLists.txt b/lib/Archive/CMakeLists.txt new file mode 100644 index 000000000000..27698cb17182 --- /dev/null +++ b/lib/Archive/CMakeLists.txt @@ -0,0 +1,5 @@ +add_llvm_library(LLVMArchive + Archive.cpp + ArchiveReader.cpp + ArchiveWriter.cpp + ) \ No newline at end of file diff --git a/lib/Archive/Makefile b/lib/Archive/Makefile new file mode 100644 index 000000000000..da9780403a08 --- /dev/null +++ b/lib/Archive/Makefile @@ -0,0 +1,17 @@ +##===- lib/Archive/Makefile --------------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../.. +LIBRARYNAME = LLVMArchive + +# We only want an archive so only those modules actually used by a tool are +# included. +BUILD_ARCHIVE := 1 + +include $(LEVEL)/Makefile.common diff --git a/lib/AsmParser/CMakeLists.txt b/lib/AsmParser/CMakeLists.txt new file mode 100644 index 000000000000..985ebe200988 --- /dev/null +++ b/lib/AsmParser/CMakeLists.txt @@ -0,0 +1,6 @@ +# AsmParser +add_llvm_library(LLVMAsmParser + LLLexer.cpp + LLParser.cpp + Parser.cpp + ) diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp new file mode 100644 index 000000000000..f2e689017603 --- /dev/null +++ b/lib/AsmParser/LLLexer.cpp @@ -0,0 +1,835 @@ +//===- LLLexer.cpp - Lexer for .ll Files ----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implement the Lexer for .ll files. +// +//===----------------------------------------------------------------------===// + +#include "LLLexer.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Instruction.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Assembly/Parser.h" +#include +#include +using namespace llvm; + +bool LLLexer::Error(LocTy ErrorLoc, const std::string &Msg) const { + // Scan backward to find the start of the line. + const char *LineStart = ErrorLoc; + while (LineStart != CurBuf->getBufferStart() && + LineStart[-1] != '\n' && LineStart[-1] != '\r') + --LineStart; + // Get the end of the line. + const char *LineEnd = ErrorLoc; + while (LineEnd != CurBuf->getBufferEnd() && + LineEnd[0] != '\n' && LineEnd[0] != '\r') + ++LineEnd; + + unsigned LineNo = 1; + for (const char *FP = CurBuf->getBufferStart(); FP != ErrorLoc; ++FP) + if (*FP == '\n') ++LineNo; + + std::string LineContents(LineStart, LineEnd); + ErrorInfo.setError(Msg, LineNo, ErrorLoc-LineStart, LineContents); + return true; +} + +//===----------------------------------------------------------------------===// +// Helper functions. +//===----------------------------------------------------------------------===// + +// atoull - Convert an ascii string of decimal digits into the unsigned long +// long representation... this does not have to do input error checking, +// because we know that the input will be matched by a suitable regex... +// +uint64_t LLLexer::atoull(const char *Buffer, const char *End) { + uint64_t Result = 0; + for (; Buffer != End; Buffer++) { + uint64_t OldRes = Result; + Result *= 10; + Result += *Buffer-'0'; + if (Result < OldRes) { // Uh, oh, overflow detected!!! + Error("constant bigger than 64 bits detected!"); + return 0; + } + } + return Result; +} + +uint64_t LLLexer::HexIntToVal(const char *Buffer, const char *End) { + uint64_t Result = 0; + for (; Buffer != End; ++Buffer) { + uint64_t OldRes = Result; + Result *= 16; + char C = *Buffer; + if (C >= '0' && C <= '9') + Result += C-'0'; + else if (C >= 'A' && C <= 'F') + Result += C-'A'+10; + else if (C >= 'a' && C <= 'f') + Result += C-'a'+10; + + if (Result < OldRes) { // Uh, oh, overflow detected!!! + Error("constant bigger than 64 bits detected!"); + return 0; + } + } + return Result; +} + +void LLLexer::HexToIntPair(const char *Buffer, const char *End, + uint64_t Pair[2]) { + Pair[0] = 0; + for (int i=0; i<16; i++, Buffer++) { + assert(Buffer != End); + Pair[0] *= 16; + char C = *Buffer; + if (C >= '0' && C <= '9') + Pair[0] += C-'0'; + else if (C >= 'A' && C <= 'F') + Pair[0] += C-'A'+10; + else if (C >= 'a' && C <= 'f') + Pair[0] += C-'a'+10; + } + Pair[1] = 0; + for (int i=0; i<16 && Buffer != End; i++, Buffer++) { + Pair[1] *= 16; + char C = *Buffer; + if (C >= '0' && C <= '9') + Pair[1] += C-'0'; + else if (C >= 'A' && C <= 'F') + Pair[1] += C-'A'+10; + else if (C >= 'a' && C <= 'f') + Pair[1] += C-'a'+10; + } + if (Buffer != End) + Error("constant bigger than 128 bits detected!"); +} + +/// FP80HexToIntPair - translate an 80 bit FP80 number (20 hexits) into +/// { low64, high16 } as usual for an APInt. +void LLLexer::FP80HexToIntPair(const char *Buffer, const char *End, + uint64_t Pair[2]) { + Pair[1] = 0; + for (int i=0; i<4 && Buffer != End; i++, Buffer++) { + assert(Buffer != End); + Pair[1] *= 16; + char C = *Buffer; + if (C >= '0' && C <= '9') + Pair[1] += C-'0'; + else if (C >= 'A' && C <= 'F') + Pair[1] += C-'A'+10; + else if (C >= 'a' && C <= 'f') + Pair[1] += C-'a'+10; + } + Pair[0] = 0; + for (int i=0; i<16; i++, Buffer++) { + Pair[0] *= 16; + char C = *Buffer; + if (C >= '0' && C <= '9') + Pair[0] += C-'0'; + else if (C >= 'A' && C <= 'F') + Pair[0] += C-'A'+10; + else if (C >= 'a' && C <= 'f') + Pair[0] += C-'a'+10; + } + if (Buffer != End) + Error("constant bigger than 128 bits detected!"); +} + +// UnEscapeLexed - Run through the specified buffer and change \xx codes to the +// appropriate character. +static void UnEscapeLexed(std::string &Str) { + if (Str.empty()) return; + + char *Buffer = &Str[0], *EndBuffer = Buffer+Str.size(); + char *BOut = Buffer; + for (char *BIn = Buffer; BIn != EndBuffer; ) { + if (BIn[0] == '\\') { + if (BIn < EndBuffer-1 && BIn[1] == '\\') { + *BOut++ = '\\'; // Two \ becomes one + BIn += 2; + } else if (BIn < EndBuffer-2 && isxdigit(BIn[1]) && isxdigit(BIn[2])) { + char Tmp = BIn[3]; BIn[3] = 0; // Terminate string + *BOut = (char)strtol(BIn+1, 0, 16); // Convert to number + BIn[3] = Tmp; // Restore character + BIn += 3; // Skip over handled chars + ++BOut; + } else { + *BOut++ = *BIn++; + } + } else { + *BOut++ = *BIn++; + } + } + Str.resize(BOut-Buffer); +} + +/// isLabelChar - Return true for [-a-zA-Z$._0-9]. +static bool isLabelChar(char C) { + return isalnum(C) || C == '-' || C == '$' || C == '.' || C == '_'; +} + + +/// isLabelTail - Return true if this pointer points to a valid end of a label. +static const char *isLabelTail(const char *CurPtr) { + while (1) { + if (CurPtr[0] == ':') return CurPtr+1; + if (!isLabelChar(CurPtr[0])) return 0; + ++CurPtr; + } +} + + + +//===----------------------------------------------------------------------===// +// Lexer definition. +//===----------------------------------------------------------------------===// + +LLLexer::LLLexer(MemoryBuffer *StartBuf, ParseError &Err) + : CurBuf(StartBuf), ErrorInfo(Err), APFloatVal(0.0) { + CurPtr = CurBuf->getBufferStart(); +} + +std::string LLLexer::getFilename() const { + return CurBuf->getBufferIdentifier(); +} + +int LLLexer::getNextChar() { + char CurChar = *CurPtr++; + switch (CurChar) { + default: return (unsigned char)CurChar; + case 0: + // A nul character in the stream is either the end of the current buffer or + // a random nul in the file. Disambiguate that here. + if (CurPtr-1 != CurBuf->getBufferEnd()) + return 0; // Just whitespace. + + // Otherwise, return end of file. + --CurPtr; // Another call to lex will return EOF again. + return EOF; + } +} + + +lltok::Kind LLLexer::LexToken() { + TokStart = CurPtr; + + int CurChar = getNextChar(); + switch (CurChar) { + default: + // Handle letters: [a-zA-Z_] + if (isalpha(CurChar) || CurChar == '_') + return LexIdentifier(); + + return lltok::Error; + case EOF: return lltok::Eof; + case 0: + case ' ': + case '\t': + case '\n': + case '\r': + // Ignore whitespace. + return LexToken(); + case '+': return LexPositive(); + case '@': return LexAt(); + case '%': return LexPercent(); + case '"': return LexQuote(); + case '.': + if (const char *Ptr = isLabelTail(CurPtr)) { + CurPtr = Ptr; + StrVal.assign(TokStart, CurPtr-1); + return lltok::LabelStr; + } + if (CurPtr[0] == '.' && CurPtr[1] == '.') { + CurPtr += 2; + return lltok::dotdotdot; + } + return lltok::Error; + case '$': + if (const char *Ptr = isLabelTail(CurPtr)) { + CurPtr = Ptr; + StrVal.assign(TokStart, CurPtr-1); + return lltok::LabelStr; + } + return lltok::Error; + case ';': + SkipLineComment(); + return LexToken(); + case '!': return lltok::Metadata; + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + case '-': + return LexDigitOrNegative(); + case '=': return lltok::equal; + case '[': return lltok::lsquare; + case ']': return lltok::rsquare; + case '{': return lltok::lbrace; + case '}': return lltok::rbrace; + case '<': return lltok::less; + case '>': return lltok::greater; + case '(': return lltok::lparen; + case ')': return lltok::rparen; + case ',': return lltok::comma; + case '*': return lltok::star; + case '\\': return lltok::backslash; + } +} + +void LLLexer::SkipLineComment() { + while (1) { + if (CurPtr[0] == '\n' || CurPtr[0] == '\r' || getNextChar() == EOF) + return; + } +} + +/// LexAt - Lex all tokens that start with an @ character: +/// GlobalVar @\"[^\"]*\" +/// GlobalVar @[-a-zA-Z$._][-a-zA-Z$._0-9]* +/// GlobalVarID @[0-9]+ +lltok::Kind LLLexer::LexAt() { + // Handle AtStringConstant: @\"[^\"]*\" + if (CurPtr[0] == '"') { + ++CurPtr; + + while (1) { + int CurChar = getNextChar(); + + if (CurChar == EOF) { + Error("end of file in global variable name"); + return lltok::Error; + } + if (CurChar == '"') { + StrVal.assign(TokStart+2, CurPtr-1); + UnEscapeLexed(StrVal); + return lltok::GlobalVar; + } + } + } + + // Handle GlobalVarName: @[-a-zA-Z$._][-a-zA-Z$._0-9]* + if (isalpha(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' || + CurPtr[0] == '.' || CurPtr[0] == '_') { + ++CurPtr; + while (isalnum(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' || + CurPtr[0] == '.' || CurPtr[0] == '_') + ++CurPtr; + + StrVal.assign(TokStart+1, CurPtr); // Skip @ + return lltok::GlobalVar; + } + + // Handle GlobalVarID: @[0-9]+ + if (isdigit(CurPtr[0])) { + for (++CurPtr; isdigit(CurPtr[0]); ++CurPtr) + /*empty*/; + + uint64_t Val = atoull(TokStart+1, CurPtr); + if ((unsigned)Val != Val) + Error("invalid value number (too large)!"); + UIntVal = unsigned(Val); + return lltok::GlobalID; + } + + return lltok::Error; +} + + +/// LexPercent - Lex all tokens that start with a % character: +/// LocalVar ::= %\"[^\"]*\" +/// LocalVar ::= %[-a-zA-Z$._][-a-zA-Z$._0-9]* +/// LocalVarID ::= %[0-9]+ +lltok::Kind LLLexer::LexPercent() { + // Handle LocalVarName: %\"[^\"]*\" + if (CurPtr[0] == '"') { + ++CurPtr; + + while (1) { + int CurChar = getNextChar(); + + if (CurChar == EOF) { + Error("end of file in string constant"); + return lltok::Error; + } + if (CurChar == '"') { + StrVal.assign(TokStart+2, CurPtr-1); + UnEscapeLexed(StrVal); + return lltok::LocalVar; + } + } + } + + // Handle LocalVarName: %[-a-zA-Z$._][-a-zA-Z$._0-9]* + if (isalpha(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' || + CurPtr[0] == '.' || CurPtr[0] == '_') { + ++CurPtr; + while (isalnum(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' || + CurPtr[0] == '.' || CurPtr[0] == '_') + ++CurPtr; + + StrVal.assign(TokStart+1, CurPtr); // Skip % + return lltok::LocalVar; + } + + // Handle LocalVarID: %[0-9]+ + if (isdigit(CurPtr[0])) { + for (++CurPtr; isdigit(CurPtr[0]); ++CurPtr) + /*empty*/; + + uint64_t Val = atoull(TokStart+1, CurPtr); + if ((unsigned)Val != Val) + Error("invalid value number (too large)!"); + UIntVal = unsigned(Val); + return lltok::LocalVarID; + } + + return lltok::Error; +} + +/// LexQuote - Lex all tokens that start with a " character: +/// QuoteLabel "[^"]+": +/// StringConstant "[^"]*" +lltok::Kind LLLexer::LexQuote() { + while (1) { + int CurChar = getNextChar(); + + if (CurChar == EOF) { + Error("end of file in quoted string"); + return lltok::Error; + } + + if (CurChar != '"') continue; + + if (CurPtr[0] != ':') { + StrVal.assign(TokStart+1, CurPtr-1); + UnEscapeLexed(StrVal); + return lltok::StringConstant; + } + + ++CurPtr; + StrVal.assign(TokStart+1, CurPtr-2); + UnEscapeLexed(StrVal); + return lltok::LabelStr; + } +} + +static bool JustWhitespaceNewLine(const char *&Ptr) { + const char *ThisPtr = Ptr; + while (*ThisPtr == ' ' || *ThisPtr == '\t') + ++ThisPtr; + if (*ThisPtr == '\n' || *ThisPtr == '\r') { + Ptr = ThisPtr; + return true; + } + return false; +} + + +/// LexIdentifier: Handle several related productions: +/// Label [-a-zA-Z$._0-9]+: +/// IntegerType i[0-9]+ +/// Keyword sdiv, float, ... +/// HexIntConstant [us]0x[0-9A-Fa-f]+ +lltok::Kind LLLexer::LexIdentifier() { + const char *StartChar = CurPtr; + const char *IntEnd = CurPtr[-1] == 'i' ? 0 : StartChar; + const char *KeywordEnd = 0; + + for (; isLabelChar(*CurPtr); ++CurPtr) { + // If we decide this is an integer, remember the end of the sequence. + if (!IntEnd && !isdigit(*CurPtr)) IntEnd = CurPtr; + if (!KeywordEnd && !isalnum(*CurPtr) && *CurPtr != '_') KeywordEnd = CurPtr; + } + + // If we stopped due to a colon, this really is a label. + if (*CurPtr == ':') { + StrVal.assign(StartChar-1, CurPtr++); + return lltok::LabelStr; + } + + // Otherwise, this wasn't a label. If this was valid as an integer type, + // return it. + if (IntEnd == 0) IntEnd = CurPtr; + if (IntEnd != StartChar) { + CurPtr = IntEnd; + uint64_t NumBits = atoull(StartChar, CurPtr); + if (NumBits < IntegerType::MIN_INT_BITS || + NumBits > IntegerType::MAX_INT_BITS) { + Error("bitwidth for integer type out of range!"); + return lltok::Error; + } + TyVal = IntegerType::get(NumBits); + return lltok::Type; + } + + // Otherwise, this was a letter sequence. See which keyword this is. + if (KeywordEnd == 0) KeywordEnd = CurPtr; + CurPtr = KeywordEnd; + --StartChar; + unsigned Len = CurPtr-StartChar; +#define KEYWORD(STR) \ + if (Len == strlen(#STR) && !memcmp(StartChar, #STR, strlen(#STR))) \ + return lltok::kw_##STR; + + KEYWORD(begin); KEYWORD(end); + KEYWORD(true); KEYWORD(false); + KEYWORD(declare); KEYWORD(define); + KEYWORD(global); KEYWORD(constant); + + KEYWORD(private); + KEYWORD(internal); + KEYWORD(available_externally); + KEYWORD(linkonce); + KEYWORD(linkonce_odr); + KEYWORD(weak); + KEYWORD(weak_odr); + KEYWORD(appending); + KEYWORD(dllimport); + KEYWORD(dllexport); + KEYWORD(common); + KEYWORD(default); + KEYWORD(hidden); + KEYWORD(protected); + KEYWORD(extern_weak); + KEYWORD(external); + KEYWORD(thread_local); + KEYWORD(zeroinitializer); + KEYWORD(undef); + KEYWORD(null); + KEYWORD(to); + KEYWORD(tail); + KEYWORD(target); + KEYWORD(triple); + KEYWORD(deplibs); + KEYWORD(datalayout); + KEYWORD(volatile); + KEYWORD(align); + KEYWORD(addrspace); + KEYWORD(section); + KEYWORD(alias); + KEYWORD(module); + KEYWORD(asm); + KEYWORD(sideeffect); + KEYWORD(gc); + + KEYWORD(ccc); + KEYWORD(fastcc); + KEYWORD(coldcc); + KEYWORD(x86_stdcallcc); + KEYWORD(x86_fastcallcc); + KEYWORD(cc); + KEYWORD(c); + + KEYWORD(signext); + KEYWORD(zeroext); + KEYWORD(inreg); + KEYWORD(sret); + KEYWORD(nounwind); + KEYWORD(noreturn); + KEYWORD(noalias); + KEYWORD(nocapture); + KEYWORD(byval); + KEYWORD(nest); + KEYWORD(readnone); + KEYWORD(readonly); + + KEYWORD(noinline); + KEYWORD(alwaysinline); + KEYWORD(optsize); + KEYWORD(ssp); + KEYWORD(sspreq); + + KEYWORD(type); + KEYWORD(opaque); + + KEYWORD(eq); KEYWORD(ne); KEYWORD(slt); KEYWORD(sgt); KEYWORD(sle); + KEYWORD(sge); KEYWORD(ult); KEYWORD(ugt); KEYWORD(ule); KEYWORD(uge); + KEYWORD(oeq); KEYWORD(one); KEYWORD(olt); KEYWORD(ogt); KEYWORD(ole); + KEYWORD(oge); KEYWORD(ord); KEYWORD(uno); KEYWORD(ueq); KEYWORD(une); + + KEYWORD(x); +#undef KEYWORD + + // Keywords for types. +#define TYPEKEYWORD(STR, LLVMTY) \ + if (Len == strlen(STR) && !memcmp(StartChar, STR, strlen(STR))) { \ + TyVal = LLVMTY; return lltok::Type; } + TYPEKEYWORD("void", Type::VoidTy); + TYPEKEYWORD("float", Type::FloatTy); + TYPEKEYWORD("double", Type::DoubleTy); + TYPEKEYWORD("x86_fp80", Type::X86_FP80Ty); + TYPEKEYWORD("fp128", Type::FP128Ty); + TYPEKEYWORD("ppc_fp128", Type::PPC_FP128Ty); + TYPEKEYWORD("label", Type::LabelTy); + TYPEKEYWORD("metadata", Type::MetadataTy); +#undef TYPEKEYWORD + + // Handle special forms for autoupgrading. Drop these in LLVM 3.0. This is + // to avoid conflicting with the sext/zext instructions, below. + if (Len == 4 && !memcmp(StartChar, "sext", 4)) { + // Scan CurPtr ahead, seeing if there is just whitespace before the newline. + if (JustWhitespaceNewLine(CurPtr)) + return lltok::kw_signext; + } else if (Len == 4 && !memcmp(StartChar, "zext", 4)) { + // Scan CurPtr ahead, seeing if there is just whitespace before the newline. + if (JustWhitespaceNewLine(CurPtr)) + return lltok::kw_zeroext; + } + + // Keywords for instructions. +#define INSTKEYWORD(STR, Enum) \ + if (Len == strlen(#STR) && !memcmp(StartChar, #STR, strlen(#STR))) { \ + UIntVal = Instruction::Enum; return lltok::kw_##STR; } + + INSTKEYWORD(add, Add); INSTKEYWORD(sub, Sub); INSTKEYWORD(mul, Mul); + INSTKEYWORD(udiv, UDiv); INSTKEYWORD(sdiv, SDiv); INSTKEYWORD(fdiv, FDiv); + INSTKEYWORD(urem, URem); INSTKEYWORD(srem, SRem); INSTKEYWORD(frem, FRem); + INSTKEYWORD(shl, Shl); INSTKEYWORD(lshr, LShr); INSTKEYWORD(ashr, AShr); + INSTKEYWORD(and, And); INSTKEYWORD(or, Or); INSTKEYWORD(xor, Xor); + INSTKEYWORD(icmp, ICmp); INSTKEYWORD(fcmp, FCmp); + INSTKEYWORD(vicmp, VICmp); INSTKEYWORD(vfcmp, VFCmp); + + INSTKEYWORD(phi, PHI); + INSTKEYWORD(call, Call); + INSTKEYWORD(trunc, Trunc); + INSTKEYWORD(zext, ZExt); + INSTKEYWORD(sext, SExt); + INSTKEYWORD(fptrunc, FPTrunc); + INSTKEYWORD(fpext, FPExt); + INSTKEYWORD(uitofp, UIToFP); + INSTKEYWORD(sitofp, SIToFP); + INSTKEYWORD(fptoui, FPToUI); + INSTKEYWORD(fptosi, FPToSI); + INSTKEYWORD(inttoptr, IntToPtr); + INSTKEYWORD(ptrtoint, PtrToInt); + INSTKEYWORD(bitcast, BitCast); + INSTKEYWORD(select, Select); + INSTKEYWORD(va_arg, VAArg); + INSTKEYWORD(ret, Ret); + INSTKEYWORD(br, Br); + INSTKEYWORD(switch, Switch); + INSTKEYWORD(invoke, Invoke); + INSTKEYWORD(unwind, Unwind); + INSTKEYWORD(unreachable, Unreachable); + + INSTKEYWORD(malloc, Malloc); + INSTKEYWORD(alloca, Alloca); + INSTKEYWORD(free, Free); + INSTKEYWORD(load, Load); + INSTKEYWORD(store, Store); + INSTKEYWORD(getelementptr, GetElementPtr); + + INSTKEYWORD(extractelement, ExtractElement); + INSTKEYWORD(insertelement, InsertElement); + INSTKEYWORD(shufflevector, ShuffleVector); + INSTKEYWORD(getresult, ExtractValue); + INSTKEYWORD(extractvalue, ExtractValue); + INSTKEYWORD(insertvalue, InsertValue); +#undef INSTKEYWORD + + // Check for [us]0x[0-9A-Fa-f]+ which are Hexadecimal constant generated by + // the CFE to avoid forcing it to deal with 64-bit numbers. + if ((TokStart[0] == 'u' || TokStart[0] == 's') && + TokStart[1] == '0' && TokStart[2] == 'x' && isxdigit(TokStart[3])) { + int len = CurPtr-TokStart-3; + uint32_t bits = len * 4; + APInt Tmp(bits, TokStart+3, len, 16); + uint32_t activeBits = Tmp.getActiveBits(); + if (activeBits > 0 && activeBits < bits) + Tmp.trunc(activeBits); + APSIntVal = APSInt(Tmp, TokStart[0] == 'u'); + return lltok::APSInt; + } + + // If this is "cc1234", return this as just "cc". + if (TokStart[0] == 'c' && TokStart[1] == 'c') { + CurPtr = TokStart+2; + return lltok::kw_cc; + } + + // If this starts with "call", return it as CALL. This is to support old + // broken .ll files. FIXME: remove this with LLVM 3.0. + if (CurPtr-TokStart > 4 && !memcmp(TokStart, "call", 4)) { + CurPtr = TokStart+4; + UIntVal = Instruction::Call; + return lltok::kw_call; + } + + // Finally, if this isn't known, return an error. + CurPtr = TokStart+1; + return lltok::Error; +} + + +/// Lex0x: Handle productions that start with 0x, knowing that it matches and +/// that this is not a label: +/// HexFPConstant 0x[0-9A-Fa-f]+ +/// HexFP80Constant 0xK[0-9A-Fa-f]+ +/// HexFP128Constant 0xL[0-9A-Fa-f]+ +/// HexPPC128Constant 0xM[0-9A-Fa-f]+ +lltok::Kind LLLexer::Lex0x() { + CurPtr = TokStart + 2; + + char Kind; + if (CurPtr[0] >= 'K' && CurPtr[0] <= 'M') { + Kind = *CurPtr++; + } else { + Kind = 'J'; + } + + if (!isxdigit(CurPtr[0])) { + // Bad token, return it as an error. + CurPtr = TokStart+1; + return lltok::Error; + } + + while (isxdigit(CurPtr[0])) + ++CurPtr; + + if (Kind == 'J') { + // HexFPConstant - Floating point constant represented in IEEE format as a + // hexadecimal number for when exponential notation is not precise enough. + // Float and double only. + APFloatVal = APFloat(BitsToDouble(HexIntToVal(TokStart+2, CurPtr))); + return lltok::APFloat; + } + + uint64_t Pair[2]; + switch (Kind) { + default: assert(0 && "Unknown kind!"); + case 'K': + // F80HexFPConstant - x87 long double in hexadecimal format (10 bytes) + FP80HexToIntPair(TokStart+3, CurPtr, Pair); + APFloatVal = APFloat(APInt(80, 2, Pair)); + return lltok::APFloat; + case 'L': + // F128HexFPConstant - IEEE 128-bit in hexadecimal format (16 bytes) + HexToIntPair(TokStart+3, CurPtr, Pair); + APFloatVal = APFloat(APInt(128, 2, Pair), true); + return lltok::APFloat; + case 'M': + // PPC128HexFPConstant - PowerPC 128-bit in hexadecimal format (16 bytes) + HexToIntPair(TokStart+3, CurPtr, Pair); + APFloatVal = APFloat(APInt(128, 2, Pair)); + return lltok::APFloat; + } +} + +/// LexIdentifier: Handle several related productions: +/// Label [-a-zA-Z$._0-9]+: +/// NInteger -[0-9]+ +/// FPConstant [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)? +/// PInteger [0-9]+ +/// HexFPConstant 0x[0-9A-Fa-f]+ +/// HexFP80Constant 0xK[0-9A-Fa-f]+ +/// HexFP128Constant 0xL[0-9A-Fa-f]+ +/// HexPPC128Constant 0xM[0-9A-Fa-f]+ +lltok::Kind LLLexer::LexDigitOrNegative() { + // If the letter after the negative is a number, this is probably a label. + if (!isdigit(TokStart[0]) && !isdigit(CurPtr[0])) { + // Okay, this is not a number after the -, it's probably a label. + if (const char *End = isLabelTail(CurPtr)) { + StrVal.assign(TokStart, End-1); + CurPtr = End; + return lltok::LabelStr; + } + + return lltok::Error; + } + + // At this point, it is either a label, int or fp constant. + + // Skip digits, we have at least one. + for (; isdigit(CurPtr[0]); ++CurPtr) + /*empty*/; + + // Check to see if this really is a label afterall, e.g. "-1:". + if (isLabelChar(CurPtr[0]) || CurPtr[0] == ':') { + if (const char *End = isLabelTail(CurPtr)) { + StrVal.assign(TokStart, End-1); + CurPtr = End; + return lltok::LabelStr; + } + } + + // If the next character is a '.', then it is a fp value, otherwise its + // integer. + if (CurPtr[0] != '.') { + if (TokStart[0] == '0' && TokStart[1] == 'x') + return Lex0x(); + unsigned Len = CurPtr-TokStart; + uint32_t numBits = ((Len * 64) / 19) + 2; + APInt Tmp(numBits, TokStart, Len, 10); + if (TokStart[0] == '-') { + uint32_t minBits = Tmp.getMinSignedBits(); + if (minBits > 0 && minBits < numBits) + Tmp.trunc(minBits); + APSIntVal = APSInt(Tmp, false); + } else { + uint32_t activeBits = Tmp.getActiveBits(); + if (activeBits > 0 && activeBits < numBits) + Tmp.trunc(activeBits); + APSIntVal = APSInt(Tmp, true); + } + return lltok::APSInt; + } + + ++CurPtr; + + // Skip over [0-9]*([eE][-+]?[0-9]+)? + while (isdigit(CurPtr[0])) ++CurPtr; + + if (CurPtr[0] == 'e' || CurPtr[0] == 'E') { + if (isdigit(CurPtr[1]) || + ((CurPtr[1] == '-' || CurPtr[1] == '+') && isdigit(CurPtr[2]))) { + CurPtr += 2; + while (isdigit(CurPtr[0])) ++CurPtr; + } + } + + APFloatVal = APFloat(atof(TokStart)); + return lltok::APFloat; +} + +/// FPConstant [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)? +lltok::Kind LLLexer::LexPositive() { + // If the letter after the negative is a number, this is probably not a + // label. + if (!isdigit(CurPtr[0])) + return lltok::Error; + + // Skip digits. + for (++CurPtr; isdigit(CurPtr[0]); ++CurPtr) + /*empty*/; + + // At this point, we need a '.'. + if (CurPtr[0] != '.') { + CurPtr = TokStart+1; + return lltok::Error; + } + + ++CurPtr; + + // Skip over [0-9]*([eE][-+]?[0-9]+)? + while (isdigit(CurPtr[0])) ++CurPtr; + + if (CurPtr[0] == 'e' || CurPtr[0] == 'E') { + if (isdigit(CurPtr[1]) || + ((CurPtr[1] == '-' || CurPtr[1] == '+') && isdigit(CurPtr[2]))) { + CurPtr += 2; + while (isdigit(CurPtr[0])) ++CurPtr; + } + } + + APFloatVal = APFloat(atof(TokStart)); + return lltok::APFloat; +} diff --git a/lib/AsmParser/LLLexer.h b/lib/AsmParser/LLLexer.h new file mode 100644 index 000000000000..995aa4eb0794 --- /dev/null +++ b/lib/AsmParser/LLLexer.h @@ -0,0 +1,84 @@ +//===- LLLexer.h - Lexer for LLVM Assembly Files ----------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class represents the Lexer for .ll files. +// +//===----------------------------------------------------------------------===// + +#ifndef LIB_ASMPARSER_LLLEXER_H +#define LIB_ASMPARSER_LLLEXER_H + +#include "LLToken.h" +#include "llvm/ADT/APSInt.h" +#include "llvm/ADT/APFloat.h" +#include + +namespace llvm { + class MemoryBuffer; + class Type; + class ParseError; + + class LLLexer { + const char *CurPtr; + MemoryBuffer *CurBuf; + ParseError &ErrorInfo; + + // Information about the current token. + const char *TokStart; + lltok::Kind CurKind; + std::string StrVal; + unsigned UIntVal; + const Type *TyVal; + APFloat APFloatVal; + APSInt APSIntVal; + + std::string TheError; + public: + explicit LLLexer(MemoryBuffer *StartBuf, ParseError &); + ~LLLexer() {} + + lltok::Kind Lex() { + return CurKind = LexToken(); + } + + typedef const char* LocTy; + LocTy getLoc() const { return TokStart; } + lltok::Kind getKind() const { return CurKind; } + const std::string getStrVal() const { return StrVal; } + const Type *getTyVal() const { return TyVal; } + unsigned getUIntVal() const { return UIntVal; } + const APSInt &getAPSIntVal() const { return APSIntVal; } + const APFloat &getAPFloatVal() const { return APFloatVal; } + + + bool Error(LocTy L, const std::string &Msg) const; + bool Error(const std::string &Msg) const { return Error(CurPtr, Msg); } + std::string getFilename() const; + + private: + lltok::Kind LexToken(); + + int getNextChar(); + void SkipLineComment(); + lltok::Kind LexIdentifier(); + lltok::Kind LexDigitOrNegative(); + lltok::Kind LexPositive(); + lltok::Kind LexAt(); + lltok::Kind LexPercent(); + lltok::Kind LexQuote(); + lltok::Kind Lex0x(); + + uint64_t atoull(const char *Buffer, const char *End); + uint64_t HexIntToVal(const char *Buffer, const char *End); + void HexToIntPair(const char *Buffer, const char *End, uint64_t Pair[2]); + void FP80HexToIntPair(const char *Buff, const char *End, uint64_t Pair[2]); + }; +} // end namespace llvm + +#endif diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp new file mode 100644 index 000000000000..8db4c715b793 --- /dev/null +++ b/lib/AsmParser/LLParser.cpp @@ -0,0 +1,3279 @@ +//===-- LLParser.cpp - Parser Class ---------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the parser class for .ll files. +// +//===----------------------------------------------------------------------===// + +#include "LLParser.h" +#include "llvm/AutoUpgrade.h" +#include "llvm/CallingConv.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/InlineAsm.h" +#include "llvm/Instructions.h" +#include "llvm/MDNode.h" +#include "llvm/Module.h" +#include "llvm/ValueSymbolTable.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +namespace llvm { + /// ValID - Represents a reference of a definition of some sort with no type. + /// There are several cases where we have to parse the value but where the + /// type can depend on later context. This may either be a numeric reference + /// or a symbolic (%var) reference. This is just a discriminated union. + struct ValID { + enum { + t_LocalID, t_GlobalID, // ID in UIntVal. + t_LocalName, t_GlobalName, // Name in StrVal. + t_APSInt, t_APFloat, // Value in APSIntVal/APFloatVal. + t_Null, t_Undef, t_Zero, // No value. + t_EmptyArray, // No value: [] + t_Constant, // Value in ConstantVal. + t_InlineAsm // Value in StrVal/StrVal2/UIntVal. + } Kind; + + LLParser::LocTy Loc; + unsigned UIntVal; + std::string StrVal, StrVal2; + APSInt APSIntVal; + APFloat APFloatVal; + Constant *ConstantVal; + ValID() : APFloatVal(0.0) {} + }; +} + +/// Run: module ::= toplevelentity* +bool LLParser::Run() { + // Prime the lexer. + Lex.Lex(); + + return ParseTopLevelEntities() || + ValidateEndOfModule(); +} + +/// ValidateEndOfModule - Do final validity and sanity checks at the end of the +/// module. +bool LLParser::ValidateEndOfModule() { + if (!ForwardRefTypes.empty()) + return Error(ForwardRefTypes.begin()->second.second, + "use of undefined type named '" + + ForwardRefTypes.begin()->first + "'"); + if (!ForwardRefTypeIDs.empty()) + return Error(ForwardRefTypeIDs.begin()->second.second, + "use of undefined type '%" + + utostr(ForwardRefTypeIDs.begin()->first) + "'"); + + if (!ForwardRefVals.empty()) + return Error(ForwardRefVals.begin()->second.second, + "use of undefined value '@" + ForwardRefVals.begin()->first + + "'"); + + if (!ForwardRefValIDs.empty()) + return Error(ForwardRefValIDs.begin()->second.second, + "use of undefined value '@" + + utostr(ForwardRefValIDs.begin()->first) + "'"); + + // Look for intrinsic functions and CallInst that need to be upgraded + for (Module::iterator FI = M->begin(), FE = M->end(); FI != FE; ) + UpgradeCallsToIntrinsic(FI++); // must be post-increment, as we remove + + return false; +} + +//===----------------------------------------------------------------------===// +// Top-Level Entities +//===----------------------------------------------------------------------===// + +bool LLParser::ParseTopLevelEntities() { + while (1) { + switch (Lex.getKind()) { + default: return TokError("expected top-level entity"); + case lltok::Eof: return false; + //case lltok::kw_define: + case lltok::kw_declare: if (ParseDeclare()) return true; break; + case lltok::kw_define: if (ParseDefine()) return true; break; + case lltok::kw_module: if (ParseModuleAsm()) return true; break; + case lltok::kw_target: if (ParseTargetDefinition()) return true; break; + case lltok::kw_deplibs: if (ParseDepLibs()) return true; break; + case lltok::kw_type: if (ParseUnnamedType()) return true; break; + case lltok::StringConstant: // FIXME: REMOVE IN LLVM 3.0 + case lltok::LocalVar: if (ParseNamedType()) return true; break; + case lltok::GlobalVar: if (ParseNamedGlobal()) return true; break; + + // The Global variable production with no name can have many different + // optional leading prefixes, the production is: + // GlobalVar ::= OptionalLinkage OptionalVisibility OptionalThreadLocal + // OptionalAddrSpace ('constant'|'global') ... + case lltok::kw_private: // OptionalLinkage + case lltok::kw_internal: // OptionalLinkage + case lltok::kw_weak: // OptionalLinkage + case lltok::kw_weak_odr: // OptionalLinkage + case lltok::kw_linkonce: // OptionalLinkage + case lltok::kw_linkonce_odr: // OptionalLinkage + case lltok::kw_appending: // OptionalLinkage + case lltok::kw_dllexport: // OptionalLinkage + case lltok::kw_common: // OptionalLinkage + case lltok::kw_dllimport: // OptionalLinkage + case lltok::kw_extern_weak: // OptionalLinkage + case lltok::kw_external: { // OptionalLinkage + unsigned Linkage, Visibility; + if (ParseOptionalLinkage(Linkage) || + ParseOptionalVisibility(Visibility) || + ParseGlobal("", 0, Linkage, true, Visibility)) + return true; + break; + } + case lltok::kw_default: // OptionalVisibility + case lltok::kw_hidden: // OptionalVisibility + case lltok::kw_protected: { // OptionalVisibility + unsigned Visibility; + if (ParseOptionalVisibility(Visibility) || + ParseGlobal("", 0, 0, false, Visibility)) + return true; + break; + } + + case lltok::kw_thread_local: // OptionalThreadLocal + case lltok::kw_addrspace: // OptionalAddrSpace + case lltok::kw_constant: // GlobalType + case lltok::kw_global: // GlobalType + if (ParseGlobal("", 0, 0, false, 0)) return true; + break; + } + } +} + + +/// toplevelentity +/// ::= 'module' 'asm' STRINGCONSTANT +bool LLParser::ParseModuleAsm() { + assert(Lex.getKind() == lltok::kw_module); + Lex.Lex(); + + std::string AsmStr; + if (ParseToken(lltok::kw_asm, "expected 'module asm'") || + ParseStringConstant(AsmStr)) return true; + + const std::string &AsmSoFar = M->getModuleInlineAsm(); + if (AsmSoFar.empty()) + M->setModuleInlineAsm(AsmStr); + else + M->setModuleInlineAsm(AsmSoFar+"\n"+AsmStr); + return false; +} + +/// toplevelentity +/// ::= 'target' 'triple' '=' STRINGCONSTANT +/// ::= 'target' 'datalayout' '=' STRINGCONSTANT +bool LLParser::ParseTargetDefinition() { + assert(Lex.getKind() == lltok::kw_target); + std::string Str; + switch (Lex.Lex()) { + default: return TokError("unknown target property"); + case lltok::kw_triple: + Lex.Lex(); + if (ParseToken(lltok::equal, "expected '=' after target triple") || + ParseStringConstant(Str)) + return true; + M->setTargetTriple(Str); + return false; + case lltok::kw_datalayout: + Lex.Lex(); + if (ParseToken(lltok::equal, "expected '=' after target datalayout") || + ParseStringConstant(Str)) + return true; + M->setDataLayout(Str); + return false; + } +} + +/// toplevelentity +/// ::= 'deplibs' '=' '[' ']' +/// ::= 'deplibs' '=' '[' STRINGCONSTANT (',' STRINGCONSTANT)* ']' +bool LLParser::ParseDepLibs() { + assert(Lex.getKind() == lltok::kw_deplibs); + Lex.Lex(); + if (ParseToken(lltok::equal, "expected '=' after deplibs") || + ParseToken(lltok::lsquare, "expected '=' after deplibs")) + return true; + + if (EatIfPresent(lltok::rsquare)) + return false; + + std::string Str; + if (ParseStringConstant(Str)) return true; + M->addLibrary(Str); + + while (EatIfPresent(lltok::comma)) { + if (ParseStringConstant(Str)) return true; + M->addLibrary(Str); + } + + return ParseToken(lltok::rsquare, "expected ']' at end of list"); +} + +/// toplevelentity +/// ::= 'type' type +bool LLParser::ParseUnnamedType() { + assert(Lex.getKind() == lltok::kw_type); + LocTy TypeLoc = Lex.getLoc(); + Lex.Lex(); // eat kw_type + + PATypeHolder Ty(Type::VoidTy); + if (ParseType(Ty)) return true; + + unsigned TypeID = NumberedTypes.size(); + + // See if this type was previously referenced. + std::map >::iterator + FI = ForwardRefTypeIDs.find(TypeID); + if (FI != ForwardRefTypeIDs.end()) { + if (FI->second.first.get() == Ty) + return Error(TypeLoc, "self referential type is invalid"); + + cast(FI->second.first.get())->refineAbstractTypeTo(Ty); + Ty = FI->second.first.get(); + ForwardRefTypeIDs.erase(FI); + } + + NumberedTypes.push_back(Ty); + + return false; +} + +/// toplevelentity +/// ::= LocalVar '=' 'type' type +bool LLParser::ParseNamedType() { + std::string Name = Lex.getStrVal(); + LocTy NameLoc = Lex.getLoc(); + Lex.Lex(); // eat LocalVar. + + PATypeHolder Ty(Type::VoidTy); + + if (ParseToken(lltok::equal, "expected '=' after name") || + ParseToken(lltok::kw_type, "expected 'type' after name") || + ParseType(Ty)) + return true; + + // Set the type name, checking for conflicts as we do so. + bool AlreadyExists = M->addTypeName(Name, Ty); + if (!AlreadyExists) return false; + + // See if this type is a forward reference. We need to eagerly resolve + // types to allow recursive type redefinitions below. + std::map >::iterator + FI = ForwardRefTypes.find(Name); + if (FI != ForwardRefTypes.end()) { + if (FI->second.first.get() == Ty) + return Error(NameLoc, "self referential type is invalid"); + + cast(FI->second.first.get())->refineAbstractTypeTo(Ty); + Ty = FI->second.first.get(); + ForwardRefTypes.erase(FI); + } + + // Inserting a name that is already defined, get the existing name. + const Type *Existing = M->getTypeByName(Name); + assert(Existing && "Conflict but no matching type?!"); + + // Otherwise, this is an attempt to redefine a type. That's okay if + // the redefinition is identical to the original. + // FIXME: REMOVE REDEFINITIONS IN LLVM 3.0 + if (Existing == Ty) return false; + + // Any other kind of (non-equivalent) redefinition is an error. + return Error(NameLoc, "redefinition of type named '" + Name + "' of type '" + + Ty->getDescription() + "'"); +} + + +/// toplevelentity +/// ::= 'declare' FunctionHeader +bool LLParser::ParseDeclare() { + assert(Lex.getKind() == lltok::kw_declare); + Lex.Lex(); + + Function *F; + return ParseFunctionHeader(F, false); +} + +/// toplevelentity +/// ::= 'define' FunctionHeader '{' ... +bool LLParser::ParseDefine() { + assert(Lex.getKind() == lltok::kw_define); + Lex.Lex(); + + Function *F; + return ParseFunctionHeader(F, true) || + ParseFunctionBody(*F); +} + +/// ParseGlobalType +/// ::= 'constant' +/// ::= 'global' +bool LLParser::ParseGlobalType(bool &IsConstant) { + if (Lex.getKind() == lltok::kw_constant) + IsConstant = true; + else if (Lex.getKind() == lltok::kw_global) + IsConstant = false; + else { + IsConstant = false; + return TokError("expected 'global' or 'constant'"); + } + Lex.Lex(); + return false; +} + +/// ParseNamedGlobal: +/// GlobalVar '=' OptionalVisibility ALIAS ... +/// GlobalVar '=' OptionalLinkage OptionalVisibility ... -> global variable +bool LLParser::ParseNamedGlobal() { + assert(Lex.getKind() == lltok::GlobalVar); + LocTy NameLoc = Lex.getLoc(); + std::string Name = Lex.getStrVal(); + Lex.Lex(); + + bool HasLinkage; + unsigned Linkage, Visibility; + if (ParseToken(lltok::equal, "expected '=' in global variable") || + ParseOptionalLinkage(Linkage, HasLinkage) || + ParseOptionalVisibility(Visibility)) + return true; + + if (HasLinkage || Lex.getKind() != lltok::kw_alias) + return ParseGlobal(Name, NameLoc, Linkage, HasLinkage, Visibility); + return ParseAlias(Name, NameLoc, Visibility); +} + +/// ParseAlias: +/// ::= GlobalVar '=' OptionalVisibility 'alias' OptionalLinkage Aliasee +/// Aliasee +/// ::= TypeAndValue +/// ::= 'bitcast' '(' TypeAndValue 'to' Type ')' +/// ::= 'getelementptr' '(' ... ')' +/// +/// Everything through visibility has already been parsed. +/// +bool LLParser::ParseAlias(const std::string &Name, LocTy NameLoc, + unsigned Visibility) { + assert(Lex.getKind() == lltok::kw_alias); + Lex.Lex(); + unsigned Linkage; + LocTy LinkageLoc = Lex.getLoc(); + if (ParseOptionalLinkage(Linkage)) + return true; + + if (Linkage != GlobalValue::ExternalLinkage && + Linkage != GlobalValue::WeakAnyLinkage && + Linkage != GlobalValue::WeakODRLinkage && + Linkage != GlobalValue::InternalLinkage && + Linkage != GlobalValue::PrivateLinkage) + return Error(LinkageLoc, "invalid linkage type for alias"); + + Constant *Aliasee; + LocTy AliaseeLoc = Lex.getLoc(); + if (Lex.getKind() != lltok::kw_bitcast && + Lex.getKind() != lltok::kw_getelementptr) { + if (ParseGlobalTypeAndValue(Aliasee)) return true; + } else { + // The bitcast dest type is not present, it is implied by the dest type. + ValID ID; + if (ParseValID(ID)) return true; + if (ID.Kind != ValID::t_Constant) + return Error(AliaseeLoc, "invalid aliasee"); + Aliasee = ID.ConstantVal; + } + + if (!isa(Aliasee->getType())) + return Error(AliaseeLoc, "alias must have pointer type"); + + // Okay, create the alias but do not insert it into the module yet. + GlobalAlias* GA = new GlobalAlias(Aliasee->getType(), + (GlobalValue::LinkageTypes)Linkage, Name, + Aliasee); + GA->setVisibility((GlobalValue::VisibilityTypes)Visibility); + + // See if this value already exists in the symbol table. If so, it is either + // a redefinition or a definition of a forward reference. + if (GlobalValue *Val = + cast_or_null(M->getValueSymbolTable().lookup(Name))) { + // See if this was a redefinition. If so, there is no entry in + // ForwardRefVals. + std::map >::iterator + I = ForwardRefVals.find(Name); + if (I == ForwardRefVals.end()) + return Error(NameLoc, "redefinition of global named '@" + Name + "'"); + + // Otherwise, this was a definition of forward ref. Verify that types + // agree. + if (Val->getType() != GA->getType()) + return Error(NameLoc, + "forward reference and definition of alias have different types"); + + // If they agree, just RAUW the old value with the alias and remove the + // forward ref info. + Val->replaceAllUsesWith(GA); + Val->eraseFromParent(); + ForwardRefVals.erase(I); + } + + // Insert into the module, we know its name won't collide now. + M->getAliasList().push_back(GA); + assert(GA->getNameStr() == Name && "Should not be a name conflict!"); + + return false; +} + +/// ParseGlobal +/// ::= GlobalVar '=' OptionalLinkage OptionalVisibility OptionalThreadLocal +/// OptionalAddrSpace GlobalType Type Const +/// ::= OptionalLinkage OptionalVisibility OptionalThreadLocal +/// OptionalAddrSpace GlobalType Type Const +/// +/// Everything through visibility has been parsed already. +/// +bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc, + unsigned Linkage, bool HasLinkage, + unsigned Visibility) { + unsigned AddrSpace; + bool ThreadLocal, IsConstant; + LocTy TyLoc; + + PATypeHolder Ty(Type::VoidTy); + if (ParseOptionalToken(lltok::kw_thread_local, ThreadLocal) || + ParseOptionalAddrSpace(AddrSpace) || + ParseGlobalType(IsConstant) || + ParseType(Ty, TyLoc)) + return true; + + // If the linkage is specified and is external, then no initializer is + // present. + Constant *Init = 0; + if (!HasLinkage || (Linkage != GlobalValue::DLLImportLinkage && + Linkage != GlobalValue::ExternalWeakLinkage && + Linkage != GlobalValue::ExternalLinkage)) { + if (ParseGlobalValue(Ty, Init)) + return true; + } + + if (isa(Ty) || Ty == Type::LabelTy) + return Error(TyLoc, "invalid type for global variable"); + + GlobalVariable *GV = 0; + + // See if the global was forward referenced, if so, use the global. + if (!Name.empty()) { + if ((GV = M->getGlobalVariable(Name, true)) && + !ForwardRefVals.erase(Name)) + return Error(NameLoc, "redefinition of global '@" + Name + "'"); + } else { + std::map >::iterator + I = ForwardRefValIDs.find(NumberedVals.size()); + if (I != ForwardRefValIDs.end()) { + GV = cast(I->second.first); + ForwardRefValIDs.erase(I); + } + } + + if (GV == 0) { + GV = new GlobalVariable(Ty, false, GlobalValue::ExternalLinkage, 0, Name, + M, false, AddrSpace); + } else { + if (GV->getType()->getElementType() != Ty) + return Error(TyLoc, + "forward reference and definition of global have different types"); + + // Move the forward-reference to the correct spot in the module. + M->getGlobalList().splice(M->global_end(), M->getGlobalList(), GV); + } + + if (Name.empty()) + NumberedVals.push_back(GV); + + // Set the parsed properties on the global. + if (Init) + GV->setInitializer(Init); + GV->setConstant(IsConstant); + GV->setLinkage((GlobalValue::LinkageTypes)Linkage); + GV->setVisibility((GlobalValue::VisibilityTypes)Visibility); + GV->setThreadLocal(ThreadLocal); + + // Parse attributes on the global. + while (Lex.getKind() == lltok::comma) { + Lex.Lex(); + + if (Lex.getKind() == lltok::kw_section) { + Lex.Lex(); + GV->setSection(Lex.getStrVal()); + if (ParseToken(lltok::StringConstant, "expected global section string")) + return true; + } else if (Lex.getKind() == lltok::kw_align) { + unsigned Alignment; + if (ParseOptionalAlignment(Alignment)) return true; + GV->setAlignment(Alignment); + } else { + TokError("unknown global variable property!"); + } + } + + return false; +} + + +//===----------------------------------------------------------------------===// +// GlobalValue Reference/Resolution Routines. +//===----------------------------------------------------------------------===// + +/// GetGlobalVal - Get a value with the specified name or ID, creating a +/// forward reference record if needed. This can return null if the value +/// exists but does not have the right type. +GlobalValue *LLParser::GetGlobalVal(const std::string &Name, const Type *Ty, + LocTy Loc) { + const PointerType *PTy = dyn_cast(Ty); + if (PTy == 0) { + Error(Loc, "global variable reference must have pointer type"); + return 0; + } + + // Look this name up in the normal function symbol table. + GlobalValue *Val = + cast_or_null(M->getValueSymbolTable().lookup(Name)); + + // If this is a forward reference for the value, see if we already created a + // forward ref record. + if (Val == 0) { + std::map >::iterator + I = ForwardRefVals.find(Name); + if (I != ForwardRefVals.end()) + Val = I->second.first; + } + + // If we have the value in the symbol table or fwd-ref table, return it. + if (Val) { + if (Val->getType() == Ty) return Val; + Error(Loc, "'@" + Name + "' defined with type '" + + Val->getType()->getDescription() + "'"); + return 0; + } + + // Otherwise, create a new forward reference for this value and remember it. + GlobalValue *FwdVal; + if (const FunctionType *FT = dyn_cast(PTy->getElementType())) { + // Function types can return opaque but functions can't. + if (isa(FT->getReturnType())) { + Error(Loc, "function may not return opaque type"); + return 0; + } + + FwdVal = Function::Create(FT, GlobalValue::ExternalWeakLinkage, Name, M); + } else { + FwdVal = new GlobalVariable(PTy->getElementType(), false, + GlobalValue::ExternalWeakLinkage, 0, Name, M); + } + + ForwardRefVals[Name] = std::make_pair(FwdVal, Loc); + return FwdVal; +} + +GlobalValue *LLParser::GetGlobalVal(unsigned ID, const Type *Ty, LocTy Loc) { + const PointerType *PTy = dyn_cast(Ty); + if (PTy == 0) { + Error(Loc, "global variable reference must have pointer type"); + return 0; + } + + GlobalValue *Val = ID < NumberedVals.size() ? NumberedVals[ID] : 0; + + // If this is a forward reference for the value, see if we already created a + // forward ref record. + if (Val == 0) { + std::map >::iterator + I = ForwardRefValIDs.find(ID); + if (I != ForwardRefValIDs.end()) + Val = I->second.first; + } + + // If we have the value in the symbol table or fwd-ref table, return it. + if (Val) { + if (Val->getType() == Ty) return Val; + Error(Loc, "'@" + utostr(ID) + "' defined with type '" + + Val->getType()->getDescription() + "'"); + return 0; + } + + // Otherwise, create a new forward reference for this value and remember it. + GlobalValue *FwdVal; + if (const FunctionType *FT = dyn_cast(PTy->getElementType())) { + // Function types can return opaque but functions can't. + if (isa(FT->getReturnType())) { + Error(Loc, "function may not return opaque type"); + return 0; + } + FwdVal = Function::Create(FT, GlobalValue::ExternalWeakLinkage, "", M); + } else { + FwdVal = new GlobalVariable(PTy->getElementType(), false, + GlobalValue::ExternalWeakLinkage, 0, "", M); + } + + ForwardRefValIDs[ID] = std::make_pair(FwdVal, Loc); + return FwdVal; +} + + +//===----------------------------------------------------------------------===// +// Helper Routines. +//===----------------------------------------------------------------------===// + +/// ParseToken - If the current token has the specified kind, eat it and return +/// success. Otherwise, emit the specified error and return failure. +bool LLParser::ParseToken(lltok::Kind T, const char *ErrMsg) { + if (Lex.getKind() != T) + return TokError(ErrMsg); + Lex.Lex(); + return false; +} + +/// ParseStringConstant +/// ::= StringConstant +bool LLParser::ParseStringConstant(std::string &Result) { + if (Lex.getKind() != lltok::StringConstant) + return TokError("expected string constant"); + Result = Lex.getStrVal(); + Lex.Lex(); + return false; +} + +/// ParseUInt32 +/// ::= uint32 +bool LLParser::ParseUInt32(unsigned &Val) { + if (Lex.getKind() != lltok::APSInt || Lex.getAPSIntVal().isSigned()) + return TokError("expected integer"); + uint64_t Val64 = Lex.getAPSIntVal().getLimitedValue(0xFFFFFFFFULL+1); + if (Val64 != unsigned(Val64)) + return TokError("expected 32-bit integer (too large)"); + Val = Val64; + Lex.Lex(); + return false; +} + + +/// ParseOptionalAddrSpace +/// := /*empty*/ +/// := 'addrspace' '(' uint32 ')' +bool LLParser::ParseOptionalAddrSpace(unsigned &AddrSpace) { + AddrSpace = 0; + if (!EatIfPresent(lltok::kw_addrspace)) + return false; + return ParseToken(lltok::lparen, "expected '(' in address space") || + ParseUInt32(AddrSpace) || + ParseToken(lltok::rparen, "expected ')' in address space"); +} + +/// ParseOptionalAttrs - Parse a potentially empty attribute list. AttrKind +/// indicates what kind of attribute list this is: 0: function arg, 1: result, +/// 2: function attr. +/// 3: function arg after value: FIXME: REMOVE IN LLVM 3.0 +bool LLParser::ParseOptionalAttrs(unsigned &Attrs, unsigned AttrKind) { + Attrs = Attribute::None; + LocTy AttrLoc = Lex.getLoc(); + + while (1) { + switch (Lex.getKind()) { + case lltok::kw_sext: + case lltok::kw_zext: + // Treat these as signext/zeroext if they occur in the argument list after + // the value, as in "call i8 @foo(i8 10 sext)". If they occur before the + // value, as in "call i8 @foo(i8 sext (" then it is part of a constant + // expr. + // FIXME: REMOVE THIS IN LLVM 3.0 + if (AttrKind == 3) { + if (Lex.getKind() == lltok::kw_sext) + Attrs |= Attribute::SExt; + else + Attrs |= Attribute::ZExt; + break; + } + // FALL THROUGH. + default: // End of attributes. + if (AttrKind != 2 && (Attrs & Attribute::FunctionOnly)) + return Error(AttrLoc, "invalid use of function-only attribute"); + + if (AttrKind != 0 && AttrKind != 3 && (Attrs & Attribute::ParameterOnly)) + return Error(AttrLoc, "invalid use of parameter-only attribute"); + + return false; + case lltok::kw_zeroext: Attrs |= Attribute::ZExt; break; + case lltok::kw_signext: Attrs |= Attribute::SExt; break; + case lltok::kw_inreg: Attrs |= Attribute::InReg; break; + case lltok::kw_sret: Attrs |= Attribute::StructRet; break; + case lltok::kw_noalias: Attrs |= Attribute::NoAlias; break; + case lltok::kw_nocapture: Attrs |= Attribute::NoCapture; break; + case lltok::kw_byval: Attrs |= Attribute::ByVal; break; + case lltok::kw_nest: Attrs |= Attribute::Nest; break; + + case lltok::kw_noreturn: Attrs |= Attribute::NoReturn; break; + case lltok::kw_nounwind: Attrs |= Attribute::NoUnwind; break; + case lltok::kw_noinline: Attrs |= Attribute::NoInline; break; + case lltok::kw_readnone: Attrs |= Attribute::ReadNone; break; + case lltok::kw_readonly: Attrs |= Attribute::ReadOnly; break; + case lltok::kw_alwaysinline: Attrs |= Attribute::AlwaysInline; break; + case lltok::kw_optsize: Attrs |= Attribute::OptimizeForSize; break; + case lltok::kw_ssp: Attrs |= Attribute::StackProtect; break; + case lltok::kw_sspreq: Attrs |= Attribute::StackProtectReq; break; + + + case lltok::kw_align: { + unsigned Alignment; + if (ParseOptionalAlignment(Alignment)) + return true; + Attrs |= Attribute::constructAlignmentFromInt(Alignment); + continue; + } + } + Lex.Lex(); + } +} + +/// ParseOptionalLinkage +/// ::= /*empty*/ +/// ::= 'private' +/// ::= 'internal' +/// ::= 'weak' +/// ::= 'weak_odr' +/// ::= 'linkonce' +/// ::= 'linkonce_odr' +/// ::= 'appending' +/// ::= 'dllexport' +/// ::= 'common' +/// ::= 'dllimport' +/// ::= 'extern_weak' +/// ::= 'external' +bool LLParser::ParseOptionalLinkage(unsigned &Res, bool &HasLinkage) { + HasLinkage = false; + switch (Lex.getKind()) { + default: Res = GlobalValue::ExternalLinkage; return false; + case lltok::kw_private: Res = GlobalValue::PrivateLinkage; break; + case lltok::kw_internal: Res = GlobalValue::InternalLinkage; break; + case lltok::kw_weak: Res = GlobalValue::WeakAnyLinkage; break; + case lltok::kw_weak_odr: Res = GlobalValue::WeakODRLinkage; break; + case lltok::kw_linkonce: Res = GlobalValue::LinkOnceAnyLinkage; break; + case lltok::kw_linkonce_odr: Res = GlobalValue::LinkOnceODRLinkage; break; + case lltok::kw_available_externally: + Res = GlobalValue::AvailableExternallyLinkage; + break; + case lltok::kw_appending: Res = GlobalValue::AppendingLinkage; break; + case lltok::kw_dllexport: Res = GlobalValue::DLLExportLinkage; break; + case lltok::kw_common: Res = GlobalValue::CommonLinkage; break; + case lltok::kw_dllimport: Res = GlobalValue::DLLImportLinkage; break; + case lltok::kw_extern_weak: Res = GlobalValue::ExternalWeakLinkage; break; + case lltok::kw_external: Res = GlobalValue::ExternalLinkage; break; + } + Lex.Lex(); + HasLinkage = true; + return false; +} + +/// ParseOptionalVisibility +/// ::= /*empty*/ +/// ::= 'default' +/// ::= 'hidden' +/// ::= 'protected' +/// +bool LLParser::ParseOptionalVisibility(unsigned &Res) { + switch (Lex.getKind()) { + default: Res = GlobalValue::DefaultVisibility; return false; + case lltok::kw_default: Res = GlobalValue::DefaultVisibility; break; + case lltok::kw_hidden: Res = GlobalValue::HiddenVisibility; break; + case lltok::kw_protected: Res = GlobalValue::ProtectedVisibility; break; + } + Lex.Lex(); + return false; +} + +/// ParseOptionalCallingConv +/// ::= /*empty*/ +/// ::= 'ccc' +/// ::= 'fastcc' +/// ::= 'coldcc' +/// ::= 'x86_stdcallcc' +/// ::= 'x86_fastcallcc' +/// ::= 'cc' UINT +/// +bool LLParser::ParseOptionalCallingConv(unsigned &CC) { + switch (Lex.getKind()) { + default: CC = CallingConv::C; return false; + case lltok::kw_ccc: CC = CallingConv::C; break; + case lltok::kw_fastcc: CC = CallingConv::Fast; break; + case lltok::kw_coldcc: CC = CallingConv::Cold; break; + case lltok::kw_x86_stdcallcc: CC = CallingConv::X86_StdCall; break; + case lltok::kw_x86_fastcallcc: CC = CallingConv::X86_FastCall; break; + case lltok::kw_cc: Lex.Lex(); return ParseUInt32(CC); + } + Lex.Lex(); + return false; +} + +/// ParseOptionalAlignment +/// ::= /* empty */ +/// ::= 'align' 4 +bool LLParser::ParseOptionalAlignment(unsigned &Alignment) { + Alignment = 0; + if (!EatIfPresent(lltok::kw_align)) + return false; + LocTy AlignLoc = Lex.getLoc(); + if (ParseUInt32(Alignment)) return true; + if (!isPowerOf2_32(Alignment)) + return Error(AlignLoc, "alignment is not a power of two"); + return false; +} + +/// ParseOptionalCommaAlignment +/// ::= /* empty */ +/// ::= ',' 'align' 4 +bool LLParser::ParseOptionalCommaAlignment(unsigned &Alignment) { + Alignment = 0; + if (!EatIfPresent(lltok::comma)) + return false; + return ParseToken(lltok::kw_align, "expected 'align'") || + ParseUInt32(Alignment); +} + +/// ParseIndexList +/// ::= (',' uint32)+ +bool LLParser::ParseIndexList(SmallVectorImpl &Indices) { + if (Lex.getKind() != lltok::comma) + return TokError("expected ',' as start of index list"); + + while (EatIfPresent(lltok::comma)) { + unsigned Idx; + if (ParseUInt32(Idx)) return true; + Indices.push_back(Idx); + } + + return false; +} + +//===----------------------------------------------------------------------===// +// Type Parsing. +//===----------------------------------------------------------------------===// + +/// ParseType - Parse and resolve a full type. +bool LLParser::ParseType(PATypeHolder &Result, bool AllowVoid) { + LocTy TypeLoc = Lex.getLoc(); + if (ParseTypeRec(Result)) return true; + + // Verify no unresolved uprefs. + if (!UpRefs.empty()) + return Error(UpRefs.back().Loc, "invalid unresolved type up reference"); + + if (!AllowVoid && Result.get() == Type::VoidTy) + return Error(TypeLoc, "void type only allowed for function results"); + + return false; +} + +/// HandleUpRefs - Every time we finish a new layer of types, this function is +/// called. It loops through the UpRefs vector, which is a list of the +/// currently active types. For each type, if the up-reference is contained in +/// the newly completed type, we decrement the level count. When the level +/// count reaches zero, the up-referenced type is the type that is passed in: +/// thus we can complete the cycle. +/// +PATypeHolder LLParser::HandleUpRefs(const Type *ty) { + // If Ty isn't abstract, or if there are no up-references in it, then there is + // nothing to resolve here. + if (!ty->isAbstract() || UpRefs.empty()) return ty; + + PATypeHolder Ty(ty); +#if 0 + errs() << "Type '" << Ty->getDescription() + << "' newly formed. Resolving upreferences.\n" + << UpRefs.size() << " upreferences active!\n"; +#endif + + // If we find any resolvable upreferences (i.e., those whose NestingLevel goes + // to zero), we resolve them all together before we resolve them to Ty. At + // the end of the loop, if there is anything to resolve to Ty, it will be in + // this variable. + OpaqueType *TypeToResolve = 0; + + for (unsigned i = 0; i != UpRefs.size(); ++i) { + // Determine if 'Ty' directly contains this up-references 'LastContainedTy'. + bool ContainsType = + std::find(Ty->subtype_begin(), Ty->subtype_end(), + UpRefs[i].LastContainedTy) != Ty->subtype_end(); + +#if 0 + errs() << " UR#" << i << " - TypeContains(" << Ty->getDescription() << ", " + << UpRefs[i].LastContainedTy->getDescription() << ") = " + << (ContainsType ? "true" : "false") + << " level=" << UpRefs[i].NestingLevel << "\n"; +#endif + if (!ContainsType) + continue; + + // Decrement level of upreference + unsigned Level = --UpRefs[i].NestingLevel; + UpRefs[i].LastContainedTy = Ty; + + // If the Up-reference has a non-zero level, it shouldn't be resolved yet. + if (Level != 0) + continue; + +#if 0 + errs() << " * Resolving upreference for " << UpRefs[i].UpRefTy << "\n"; +#endif + if (!TypeToResolve) + TypeToResolve = UpRefs[i].UpRefTy; + else + UpRefs[i].UpRefTy->refineAbstractTypeTo(TypeToResolve); + UpRefs.erase(UpRefs.begin()+i); // Remove from upreference list. + --i; // Do not skip the next element. + } + + if (TypeToResolve) + TypeToResolve->refineAbstractTypeTo(Ty); + + return Ty; +} + + +/// ParseTypeRec - The recursive function used to process the internal +/// implementation details of types. +bool LLParser::ParseTypeRec(PATypeHolder &Result) { + switch (Lex.getKind()) { + default: + return TokError("expected type"); + case lltok::Type: + // TypeRec ::= 'float' | 'void' (etc) + Result = Lex.getTyVal(); + Lex.Lex(); + break; + case lltok::kw_opaque: + // TypeRec ::= 'opaque' + Result = OpaqueType::get(); + Lex.Lex(); + break; + case lltok::lbrace: + // TypeRec ::= '{' ... '}' + if (ParseStructType(Result, false)) + return true; + break; + case lltok::lsquare: + // TypeRec ::= '[' ... ']' + Lex.Lex(); // eat the lsquare. + if (ParseArrayVectorType(Result, false)) + return true; + break; + case lltok::less: // Either vector or packed struct. + // TypeRec ::= '<' ... '>' + Lex.Lex(); + if (Lex.getKind() == lltok::lbrace) { + if (ParseStructType(Result, true) || + ParseToken(lltok::greater, "expected '>' at end of packed struct")) + return true; + } else if (ParseArrayVectorType(Result, true)) + return true; + break; + case lltok::LocalVar: + case lltok::StringConstant: // FIXME: REMOVE IN LLVM 3.0 + // TypeRec ::= %foo + if (const Type *T = M->getTypeByName(Lex.getStrVal())) { + Result = T; + } else { + Result = OpaqueType::get(); + ForwardRefTypes.insert(std::make_pair(Lex.getStrVal(), + std::make_pair(Result, + Lex.getLoc()))); + M->addTypeName(Lex.getStrVal(), Result.get()); + } + Lex.Lex(); + break; + + case lltok::LocalVarID: + // TypeRec ::= %4 + if (Lex.getUIntVal() < NumberedTypes.size()) + Result = NumberedTypes[Lex.getUIntVal()]; + else { + std::map >::iterator + I = ForwardRefTypeIDs.find(Lex.getUIntVal()); + if (I != ForwardRefTypeIDs.end()) + Result = I->second.first; + else { + Result = OpaqueType::get(); + ForwardRefTypeIDs.insert(std::make_pair(Lex.getUIntVal(), + std::make_pair(Result, + Lex.getLoc()))); + } + } + Lex.Lex(); + break; + case lltok::backslash: { + // TypeRec ::= '\' 4 + Lex.Lex(); + unsigned Val; + if (ParseUInt32(Val)) return true; + OpaqueType *OT = OpaqueType::get(); // Use temporary placeholder. + UpRefs.push_back(UpRefRecord(Lex.getLoc(), Val, OT)); + Result = OT; + break; + } + } + + // Parse the type suffixes. + while (1) { + switch (Lex.getKind()) { + // End of type. + default: return false; + + // TypeRec ::= TypeRec '*' + case lltok::star: + if (Result.get() == Type::LabelTy) + return TokError("basic block pointers are invalid"); + if (Result.get() == Type::VoidTy) + return TokError("pointers to void are invalid; use i8* instead"); + Result = HandleUpRefs(PointerType::getUnqual(Result.get())); + Lex.Lex(); + break; + + // TypeRec ::= TypeRec 'addrspace' '(' uint32 ')' '*' + case lltok::kw_addrspace: { + if (Result.get() == Type::LabelTy) + return TokError("basic block pointers are invalid"); + if (Result.get() == Type::VoidTy) + return TokError("pointers to void are invalid; use i8* instead"); + unsigned AddrSpace; + if (ParseOptionalAddrSpace(AddrSpace) || + ParseToken(lltok::star, "expected '*' in address space")) + return true; + + Result = HandleUpRefs(PointerType::get(Result.get(), AddrSpace)); + break; + } + + /// Types '(' ArgTypeListI ')' OptFuncAttrs + case lltok::lparen: + if (ParseFunctionType(Result)) + return true; + break; + } + } +} + +/// ParseParameterList +/// ::= '(' ')' +/// ::= '(' Arg (',' Arg)* ')' +/// Arg +/// ::= Type OptionalAttributes Value OptionalAttributes +bool LLParser::ParseParameterList(SmallVectorImpl &ArgList, + PerFunctionState &PFS) { + if (ParseToken(lltok::lparen, "expected '(' in call")) + return true; + + while (Lex.getKind() != lltok::rparen) { + // If this isn't the first argument, we need a comma. + if (!ArgList.empty() && + ParseToken(lltok::comma, "expected ',' in argument list")) + return true; + + // Parse the argument. + LocTy ArgLoc; + PATypeHolder ArgTy(Type::VoidTy); + unsigned ArgAttrs1, ArgAttrs2; + Value *V; + if (ParseType(ArgTy, ArgLoc) || + ParseOptionalAttrs(ArgAttrs1, 0) || + ParseValue(ArgTy, V, PFS) || + // FIXME: Should not allow attributes after the argument, remove this in + // LLVM 3.0. + ParseOptionalAttrs(ArgAttrs2, 3)) + return true; + ArgList.push_back(ParamInfo(ArgLoc, V, ArgAttrs1|ArgAttrs2)); + } + + Lex.Lex(); // Lex the ')'. + return false; +} + + + +/// ParseArgumentList - Parse the argument list for a function type or function +/// prototype. If 'inType' is true then we are parsing a FunctionType. +/// ::= '(' ArgTypeListI ')' +/// ArgTypeListI +/// ::= /*empty*/ +/// ::= '...' +/// ::= ArgTypeList ',' '...' +/// ::= ArgType (',' ArgType)* +/// +bool LLParser::ParseArgumentList(std::vector &ArgList, + bool &isVarArg, bool inType) { + isVarArg = false; + assert(Lex.getKind() == lltok::lparen); + Lex.Lex(); // eat the (. + + if (Lex.getKind() == lltok::rparen) { + // empty + } else if (Lex.getKind() == lltok::dotdotdot) { + isVarArg = true; + Lex.Lex(); + } else { + LocTy TypeLoc = Lex.getLoc(); + PATypeHolder ArgTy(Type::VoidTy); + unsigned Attrs; + std::string Name; + + // If we're parsing a type, use ParseTypeRec, because we allow recursive + // types (such as a function returning a pointer to itself). If parsing a + // function prototype, we require fully resolved types. + if ((inType ? ParseTypeRec(ArgTy) : ParseType(ArgTy)) || + ParseOptionalAttrs(Attrs, 0)) return true; + + if (ArgTy == Type::VoidTy) + return Error(TypeLoc, "argument can not have void type"); + + if (Lex.getKind() == lltok::LocalVar || + Lex.getKind() == lltok::StringConstant) { // FIXME: REMOVE IN LLVM 3.0 + Name = Lex.getStrVal(); + Lex.Lex(); + } + + if (!ArgTy->isFirstClassType() && !isa(ArgTy)) + return Error(TypeLoc, "invalid type for function argument"); + + ArgList.push_back(ArgInfo(TypeLoc, ArgTy, Attrs, Name)); + + while (EatIfPresent(lltok::comma)) { + // Handle ... at end of arg list. + if (EatIfPresent(lltok::dotdotdot)) { + isVarArg = true; + break; + } + + // Otherwise must be an argument type. + TypeLoc = Lex.getLoc(); + if ((inType ? ParseTypeRec(ArgTy) : ParseType(ArgTy)) || + ParseOptionalAttrs(Attrs, 0)) return true; + + if (ArgTy == Type::VoidTy) + return Error(TypeLoc, "argument can not have void type"); + + if (Lex.getKind() == lltok::LocalVar || + Lex.getKind() == lltok::StringConstant) { // FIXME: REMOVE IN LLVM 3.0 + Name = Lex.getStrVal(); + Lex.Lex(); + } else { + Name = ""; + } + + if (!ArgTy->isFirstClassType() && !isa(ArgTy)) + return Error(TypeLoc, "invalid type for function argument"); + + ArgList.push_back(ArgInfo(TypeLoc, ArgTy, Attrs, Name)); + } + } + + return ParseToken(lltok::rparen, "expected ')' at end of argument list"); +} + +/// ParseFunctionType +/// ::= Type ArgumentList OptionalAttrs +bool LLParser::ParseFunctionType(PATypeHolder &Result) { + assert(Lex.getKind() == lltok::lparen); + + if (!FunctionType::isValidReturnType(Result)) + return TokError("invalid function return type"); + + std::vector ArgList; + bool isVarArg; + unsigned Attrs; + if (ParseArgumentList(ArgList, isVarArg, true) || + // FIXME: Allow, but ignore attributes on function types! + // FIXME: Remove in LLVM 3.0 + ParseOptionalAttrs(Attrs, 2)) + return true; + + // Reject names on the arguments lists. + for (unsigned i = 0, e = ArgList.size(); i != e; ++i) { + if (!ArgList[i].Name.empty()) + return Error(ArgList[i].Loc, "argument name invalid in function type"); + if (!ArgList[i].Attrs != 0) { + // Allow but ignore attributes on function types; this permits + // auto-upgrade. + // FIXME: REJECT ATTRIBUTES ON FUNCTION TYPES in LLVM 3.0 + } + } + + std::vector ArgListTy; + for (unsigned i = 0, e = ArgList.size(); i != e; ++i) + ArgListTy.push_back(ArgList[i].Type); + + Result = HandleUpRefs(FunctionType::get(Result.get(), ArgListTy, isVarArg)); + return false; +} + +/// ParseStructType: Handles packed and unpacked types. parsed elsewhere. +/// TypeRec +/// ::= '{' '}' +/// ::= '{' TypeRec (',' TypeRec)* '}' +/// ::= '<' '{' '}' '>' +/// ::= '<' '{' TypeRec (',' TypeRec)* '}' '>' +bool LLParser::ParseStructType(PATypeHolder &Result, bool Packed) { + assert(Lex.getKind() == lltok::lbrace); + Lex.Lex(); // Consume the '{' + + if (EatIfPresent(lltok::rbrace)) { + Result = StructType::get(std::vector(), Packed); + return false; + } + + std::vector ParamsList; + LocTy EltTyLoc = Lex.getLoc(); + if (ParseTypeRec(Result)) return true; + ParamsList.push_back(Result); + + if (Result == Type::VoidTy) + return Error(EltTyLoc, "struct element can not have void type"); + + while (EatIfPresent(lltok::comma)) { + EltTyLoc = Lex.getLoc(); + if (ParseTypeRec(Result)) return true; + + if (Result == Type::VoidTy) + return Error(EltTyLoc, "struct element can not have void type"); + + ParamsList.push_back(Result); + } + + if (ParseToken(lltok::rbrace, "expected '}' at end of struct")) + return true; + + std::vector ParamsListTy; + for (unsigned i = 0, e = ParamsList.size(); i != e; ++i) + ParamsListTy.push_back(ParamsList[i].get()); + Result = HandleUpRefs(StructType::get(ParamsListTy, Packed)); + return false; +} + +/// ParseArrayVectorType - Parse an array or vector type, assuming the first +/// token has already been consumed. +/// TypeRec +/// ::= '[' APSINTVAL 'x' Types ']' +/// ::= '<' APSINTVAL 'x' Types '>' +bool LLParser::ParseArrayVectorType(PATypeHolder &Result, bool isVector) { + if (Lex.getKind() != lltok::APSInt || Lex.getAPSIntVal().isSigned() || + Lex.getAPSIntVal().getBitWidth() > 64) + return TokError("expected number in address space"); + + LocTy SizeLoc = Lex.getLoc(); + uint64_t Size = Lex.getAPSIntVal().getZExtValue(); + Lex.Lex(); + + if (ParseToken(lltok::kw_x, "expected 'x' after element count")) + return true; + + LocTy TypeLoc = Lex.getLoc(); + PATypeHolder EltTy(Type::VoidTy); + if (ParseTypeRec(EltTy)) return true; + + if (EltTy == Type::VoidTy) + return Error(TypeLoc, "array and vector element type cannot be void"); + + if (ParseToken(isVector ? lltok::greater : lltok::rsquare, + "expected end of sequential type")) + return true; + + if (isVector) { + if (Size == 0) + return Error(SizeLoc, "zero element vector is illegal"); + if ((unsigned)Size != Size) + return Error(SizeLoc, "size too large for vector"); + if (!EltTy->isFloatingPoint() && !EltTy->isInteger()) + return Error(TypeLoc, "vector element type must be fp or integer"); + Result = VectorType::get(EltTy, unsigned(Size)); + } else { + if (!EltTy->isFirstClassType() && !isa(EltTy)) + return Error(TypeLoc, "invalid array element type"); + Result = HandleUpRefs(ArrayType::get(EltTy, Size)); + } + return false; +} + +//===----------------------------------------------------------------------===// +// Function Semantic Analysis. +//===----------------------------------------------------------------------===// + +LLParser::PerFunctionState::PerFunctionState(LLParser &p, Function &f) + : P(p), F(f) { + + // Insert unnamed arguments into the NumberedVals list. + for (Function::arg_iterator AI = F.arg_begin(), E = F.arg_end(); + AI != E; ++AI) + if (!AI->hasName()) + NumberedVals.push_back(AI); +} + +LLParser::PerFunctionState::~PerFunctionState() { + // If there were any forward referenced non-basicblock values, delete them. + for (std::map >::iterator + I = ForwardRefVals.begin(), E = ForwardRefVals.end(); I != E; ++I) + if (!isa(I->second.first)) { + I->second.first->replaceAllUsesWith(UndefValue::get(I->second.first + ->getType())); + delete I->second.first; + I->second.first = 0; + } + + for (std::map >::iterator + I = ForwardRefValIDs.begin(), E = ForwardRefValIDs.end(); I != E; ++I) + if (!isa(I->second.first)) { + I->second.first->replaceAllUsesWith(UndefValue::get(I->second.first + ->getType())); + delete I->second.first; + I->second.first = 0; + } +} + +bool LLParser::PerFunctionState::VerifyFunctionComplete() { + if (!ForwardRefVals.empty()) + return P.Error(ForwardRefVals.begin()->second.second, + "use of undefined value '%" + ForwardRefVals.begin()->first + + "'"); + if (!ForwardRefValIDs.empty()) + return P.Error(ForwardRefValIDs.begin()->second.second, + "use of undefined value '%" + + utostr(ForwardRefValIDs.begin()->first) + "'"); + return false; +} + + +/// GetVal - Get a value with the specified name or ID, creating a +/// forward reference record if needed. This can return null if the value +/// exists but does not have the right type. +Value *LLParser::PerFunctionState::GetVal(const std::string &Name, + const Type *Ty, LocTy Loc) { + // Look this name up in the normal function symbol table. + Value *Val = F.getValueSymbolTable().lookup(Name); + + // If this is a forward reference for the value, see if we already created a + // forward ref record. + if (Val == 0) { + std::map >::iterator + I = ForwardRefVals.find(Name); + if (I != ForwardRefVals.end()) + Val = I->second.first; + } + + // If we have the value in the symbol table or fwd-ref table, return it. + if (Val) { + if (Val->getType() == Ty) return Val; + if (Ty == Type::LabelTy) + P.Error(Loc, "'%" + Name + "' is not a basic block"); + else + P.Error(Loc, "'%" + Name + "' defined with type '" + + Val->getType()->getDescription() + "'"); + return 0; + } + + // Don't make placeholders with invalid type. + if (!Ty->isFirstClassType() && !isa(Ty) && Ty != Type::LabelTy) { + P.Error(Loc, "invalid use of a non-first-class type"); + return 0; + } + + // Otherwise, create a new forward reference for this value and remember it. + Value *FwdVal; + if (Ty == Type::LabelTy) + FwdVal = BasicBlock::Create(Name, &F); + else + FwdVal = new Argument(Ty, Name); + + ForwardRefVals[Name] = std::make_pair(FwdVal, Loc); + return FwdVal; +} + +Value *LLParser::PerFunctionState::GetVal(unsigned ID, const Type *Ty, + LocTy Loc) { + // Look this name up in the normal function symbol table. + Value *Val = ID < NumberedVals.size() ? NumberedVals[ID] : 0; + + // If this is a forward reference for the value, see if we already created a + // forward ref record. + if (Val == 0) { + std::map >::iterator + I = ForwardRefValIDs.find(ID); + if (I != ForwardRefValIDs.end()) + Val = I->second.first; + } + + // If we have the value in the symbol table or fwd-ref table, return it. + if (Val) { + if (Val->getType() == Ty) return Val; + if (Ty == Type::LabelTy) + P.Error(Loc, "'%" + utostr(ID) + "' is not a basic block"); + else + P.Error(Loc, "'%" + utostr(ID) + "' defined with type '" + + Val->getType()->getDescription() + "'"); + return 0; + } + + if (!Ty->isFirstClassType() && !isa(Ty) && Ty != Type::LabelTy) { + P.Error(Loc, "invalid use of a non-first-class type"); + return 0; + } + + // Otherwise, create a new forward reference for this value and remember it. + Value *FwdVal; + if (Ty == Type::LabelTy) + FwdVal = BasicBlock::Create("", &F); + else + FwdVal = new Argument(Ty); + + ForwardRefValIDs[ID] = std::make_pair(FwdVal, Loc); + return FwdVal; +} + +/// SetInstName - After an instruction is parsed and inserted into its +/// basic block, this installs its name. +bool LLParser::PerFunctionState::SetInstName(int NameID, + const std::string &NameStr, + LocTy NameLoc, Instruction *Inst) { + // If this instruction has void type, it cannot have a name or ID specified. + if (Inst->getType() == Type::VoidTy) { + if (NameID != -1 || !NameStr.empty()) + return P.Error(NameLoc, "instructions returning void cannot have a name"); + return false; + } + + // If this was a numbered instruction, verify that the instruction is the + // expected value and resolve any forward references. + if (NameStr.empty()) { + // If neither a name nor an ID was specified, just use the next ID. + if (NameID == -1) + NameID = NumberedVals.size(); + + if (unsigned(NameID) != NumberedVals.size()) + return P.Error(NameLoc, "instruction expected to be numbered '%" + + utostr(NumberedVals.size()) + "'"); + + std::map >::iterator FI = + ForwardRefValIDs.find(NameID); + if (FI != ForwardRefValIDs.end()) { + if (FI->second.first->getType() != Inst->getType()) + return P.Error(NameLoc, "instruction forward referenced with type '" + + FI->second.first->getType()->getDescription() + "'"); + FI->second.first->replaceAllUsesWith(Inst); + ForwardRefValIDs.erase(FI); + } + + NumberedVals.push_back(Inst); + return false; + } + + // Otherwise, the instruction had a name. Resolve forward refs and set it. + std::map >::iterator + FI = ForwardRefVals.find(NameStr); + if (FI != ForwardRefVals.end()) { + if (FI->second.first->getType() != Inst->getType()) + return P.Error(NameLoc, "instruction forward referenced with type '" + + FI->second.first->getType()->getDescription() + "'"); + FI->second.first->replaceAllUsesWith(Inst); + ForwardRefVals.erase(FI); + } + + // Set the name on the instruction. + Inst->setName(NameStr); + + if (Inst->getNameStr() != NameStr) + return P.Error(NameLoc, "multiple definition of local value named '" + + NameStr + "'"); + return false; +} + +/// GetBB - Get a basic block with the specified name or ID, creating a +/// forward reference record if needed. +BasicBlock *LLParser::PerFunctionState::GetBB(const std::string &Name, + LocTy Loc) { + return cast_or_null(GetVal(Name, Type::LabelTy, Loc)); +} + +BasicBlock *LLParser::PerFunctionState::GetBB(unsigned ID, LocTy Loc) { + return cast_or_null(GetVal(ID, Type::LabelTy, Loc)); +} + +/// DefineBB - Define the specified basic block, which is either named or +/// unnamed. If there is an error, this returns null otherwise it returns +/// the block being defined. +BasicBlock *LLParser::PerFunctionState::DefineBB(const std::string &Name, + LocTy Loc) { + BasicBlock *BB; + if (Name.empty()) + BB = GetBB(NumberedVals.size(), Loc); + else + BB = GetBB(Name, Loc); + if (BB == 0) return 0; // Already diagnosed error. + + // Move the block to the end of the function. Forward ref'd blocks are + // inserted wherever they happen to be referenced. + F.getBasicBlockList().splice(F.end(), F.getBasicBlockList(), BB); + + // Remove the block from forward ref sets. + if (Name.empty()) { + ForwardRefValIDs.erase(NumberedVals.size()); + NumberedVals.push_back(BB); + } else { + // BB forward references are already in the function symbol table. + ForwardRefVals.erase(Name); + } + + return BB; +} + +//===----------------------------------------------------------------------===// +// Constants. +//===----------------------------------------------------------------------===// + +/// ParseValID - Parse an abstract value that doesn't necessarily have a +/// type implied. For example, if we parse "4" we don't know what integer type +/// it has. The value will later be combined with its type and checked for +/// sanity. +bool LLParser::ParseValID(ValID &ID) { + ID.Loc = Lex.getLoc(); + switch (Lex.getKind()) { + default: return TokError("expected value token"); + case lltok::GlobalID: // @42 + ID.UIntVal = Lex.getUIntVal(); + ID.Kind = ValID::t_GlobalID; + break; + case lltok::GlobalVar: // @foo + ID.StrVal = Lex.getStrVal(); + ID.Kind = ValID::t_GlobalName; + break; + case lltok::LocalVarID: // %42 + ID.UIntVal = Lex.getUIntVal(); + ID.Kind = ValID::t_LocalID; + break; + case lltok::LocalVar: // %foo + case lltok::StringConstant: // "foo" - FIXME: REMOVE IN LLVM 3.0 + ID.StrVal = Lex.getStrVal(); + ID.Kind = ValID::t_LocalName; + break; + case lltok::Metadata: { // !{...} MDNode, !"foo" MDString + ID.Kind = ValID::t_Constant; + Lex.Lex(); + if (Lex.getKind() == lltok::lbrace) { + SmallVector Elts; + if (ParseMDNodeVector(Elts) || + ParseToken(lltok::rbrace, "expected end of metadata node")) + return true; + + ID.ConstantVal = MDNode::get(Elts.data(), Elts.size()); + return false; + } + + // MDString: + // ::= '!' STRINGCONSTANT + std::string Str; + if (ParseStringConstant(Str)) return true; + + ID.ConstantVal = MDString::get(Str.data(), Str.data() + Str.size()); + return false; + } + case lltok::APSInt: + ID.APSIntVal = Lex.getAPSIntVal(); + ID.Kind = ValID::t_APSInt; + break; + case lltok::APFloat: + ID.APFloatVal = Lex.getAPFloatVal(); + ID.Kind = ValID::t_APFloat; + break; + case lltok::kw_true: + ID.ConstantVal = ConstantInt::getTrue(); + ID.Kind = ValID::t_Constant; + break; + case lltok::kw_false: + ID.ConstantVal = ConstantInt::getFalse(); + ID.Kind = ValID::t_Constant; + break; + case lltok::kw_null: ID.Kind = ValID::t_Null; break; + case lltok::kw_undef: ID.Kind = ValID::t_Undef; break; + case lltok::kw_zeroinitializer: ID.Kind = ValID::t_Zero; break; + + case lltok::lbrace: { + // ValID ::= '{' ConstVector '}' + Lex.Lex(); + SmallVector Elts; + if (ParseGlobalValueVector(Elts) || + ParseToken(lltok::rbrace, "expected end of struct constant")) + return true; + + ID.ConstantVal = ConstantStruct::get(Elts.data(), Elts.size(), false); + ID.Kind = ValID::t_Constant; + return false; + } + case lltok::less: { + // ValID ::= '<' ConstVector '>' --> Vector. + // ValID ::= '<' '{' ConstVector '}' '>' --> Packed Struct. + Lex.Lex(); + bool isPackedStruct = EatIfPresent(lltok::lbrace); + + SmallVector Elts; + LocTy FirstEltLoc = Lex.getLoc(); + if (ParseGlobalValueVector(Elts) || + (isPackedStruct && + ParseToken(lltok::rbrace, "expected end of packed struct")) || + ParseToken(lltok::greater, "expected end of constant")) + return true; + + if (isPackedStruct) { + ID.ConstantVal = ConstantStruct::get(Elts.data(), Elts.size(), true); + ID.Kind = ValID::t_Constant; + return false; + } + + if (Elts.empty()) + return Error(ID.Loc, "constant vector must not be empty"); + + if (!Elts[0]->getType()->isInteger() && + !Elts[0]->getType()->isFloatingPoint()) + return Error(FirstEltLoc, + "vector elements must have integer or floating point type"); + + // Verify that all the vector elements have the same type. + for (unsigned i = 1, e = Elts.size(); i != e; ++i) + if (Elts[i]->getType() != Elts[0]->getType()) + return Error(FirstEltLoc, + "vector element #" + utostr(i) + + " is not of type '" + Elts[0]->getType()->getDescription()); + + ID.ConstantVal = ConstantVector::get(Elts.data(), Elts.size()); + ID.Kind = ValID::t_Constant; + return false; + } + case lltok::lsquare: { // Array Constant + Lex.Lex(); + SmallVector Elts; + LocTy FirstEltLoc = Lex.getLoc(); + if (ParseGlobalValueVector(Elts) || + ParseToken(lltok::rsquare, "expected end of array constant")) + return true; + + // Handle empty element. + if (Elts.empty()) { + // Use undef instead of an array because it's inconvenient to determine + // the element type at this point, there being no elements to examine. + ID.Kind = ValID::t_EmptyArray; + return false; + } + + if (!Elts[0]->getType()->isFirstClassType()) + return Error(FirstEltLoc, "invalid array element type: " + + Elts[0]->getType()->getDescription()); + + ArrayType *ATy = ArrayType::get(Elts[0]->getType(), Elts.size()); + + // Verify all elements are correct type! + for (unsigned i = 0, e = Elts.size(); i != e; ++i) { + if (Elts[i]->getType() != Elts[0]->getType()) + return Error(FirstEltLoc, + "array element #" + utostr(i) + + " is not of type '" +Elts[0]->getType()->getDescription()); + } + + ID.ConstantVal = ConstantArray::get(ATy, Elts.data(), Elts.size()); + ID.Kind = ValID::t_Constant; + return false; + } + case lltok::kw_c: // c "foo" + Lex.Lex(); + ID.ConstantVal = ConstantArray::get(Lex.getStrVal(), false); + if (ParseToken(lltok::StringConstant, "expected string")) return true; + ID.Kind = ValID::t_Constant; + return false; + + case lltok::kw_asm: { + // ValID ::= 'asm' SideEffect? STRINGCONSTANT ',' STRINGCONSTANT + bool HasSideEffect; + Lex.Lex(); + if (ParseOptionalToken(lltok::kw_sideeffect, HasSideEffect) || + ParseStringConstant(ID.StrVal) || + ParseToken(lltok::comma, "expected comma in inline asm expression") || + ParseToken(lltok::StringConstant, "expected constraint string")) + return true; + ID.StrVal2 = Lex.getStrVal(); + ID.UIntVal = HasSideEffect; + ID.Kind = ValID::t_InlineAsm; + return false; + } + + case lltok::kw_trunc: + case lltok::kw_zext: + case lltok::kw_sext: + case lltok::kw_fptrunc: + case lltok::kw_fpext: + case lltok::kw_bitcast: + case lltok::kw_uitofp: + case lltok::kw_sitofp: + case lltok::kw_fptoui: + case lltok::kw_fptosi: + case lltok::kw_inttoptr: + case lltok::kw_ptrtoint: { + unsigned Opc = Lex.getUIntVal(); + PATypeHolder DestTy(Type::VoidTy); + Constant *SrcVal; + Lex.Lex(); + if (ParseToken(lltok::lparen, "expected '(' after constantexpr cast") || + ParseGlobalTypeAndValue(SrcVal) || + ParseToken(lltok::kw_to, "expected 'to' int constantexpr cast") || + ParseType(DestTy) || + ParseToken(lltok::rparen, "expected ')' at end of constantexpr cast")) + return true; + if (!CastInst::castIsValid((Instruction::CastOps)Opc, SrcVal, DestTy)) + return Error(ID.Loc, "invalid cast opcode for cast from '" + + SrcVal->getType()->getDescription() + "' to '" + + DestTy->getDescription() + "'"); + ID.ConstantVal = ConstantExpr::getCast((Instruction::CastOps)Opc, SrcVal, + DestTy); + ID.Kind = ValID::t_Constant; + return false; + } + case lltok::kw_extractvalue: { + Lex.Lex(); + Constant *Val; + SmallVector Indices; + if (ParseToken(lltok::lparen, "expected '(' in extractvalue constantexpr")|| + ParseGlobalTypeAndValue(Val) || + ParseIndexList(Indices) || + ParseToken(lltok::rparen, "expected ')' in extractvalue constantexpr")) + return true; + if (!isa(Val->getType()) && !isa(Val->getType())) + return Error(ID.Loc, "extractvalue operand must be array or struct"); + if (!ExtractValueInst::getIndexedType(Val->getType(), Indices.begin(), + Indices.end())) + return Error(ID.Loc, "invalid indices for extractvalue"); + ID.ConstantVal = + ConstantExpr::getExtractValue(Val, Indices.data(), Indices.size()); + ID.Kind = ValID::t_Constant; + return false; + } + case lltok::kw_insertvalue: { + Lex.Lex(); + Constant *Val0, *Val1; + SmallVector Indices; + if (ParseToken(lltok::lparen, "expected '(' in insertvalue constantexpr")|| + ParseGlobalTypeAndValue(Val0) || + ParseToken(lltok::comma, "expected comma in insertvalue constantexpr")|| + ParseGlobalTypeAndValue(Val1) || + ParseIndexList(Indices) || + ParseToken(lltok::rparen, "expected ')' in insertvalue constantexpr")) + return true; + if (!isa(Val0->getType()) && !isa(Val0->getType())) + return Error(ID.Loc, "extractvalue operand must be array or struct"); + if (!ExtractValueInst::getIndexedType(Val0->getType(), Indices.begin(), + Indices.end())) + return Error(ID.Loc, "invalid indices for insertvalue"); + ID.ConstantVal = + ConstantExpr::getInsertValue(Val0, Val1, Indices.data(), Indices.size()); + ID.Kind = ValID::t_Constant; + return false; + } + case lltok::kw_icmp: + case lltok::kw_fcmp: + case lltok::kw_vicmp: + case lltok::kw_vfcmp: { + unsigned PredVal, Opc = Lex.getUIntVal(); + Constant *Val0, *Val1; + Lex.Lex(); + if (ParseCmpPredicate(PredVal, Opc) || + ParseToken(lltok::lparen, "expected '(' in compare constantexpr") || + ParseGlobalTypeAndValue(Val0) || + ParseToken(lltok::comma, "expected comma in compare constantexpr") || + ParseGlobalTypeAndValue(Val1) || + ParseToken(lltok::rparen, "expected ')' in compare constantexpr")) + return true; + + if (Val0->getType() != Val1->getType()) + return Error(ID.Loc, "compare operands must have the same type"); + + CmpInst::Predicate Pred = (CmpInst::Predicate)PredVal; + + if (Opc == Instruction::FCmp) { + if (!Val0->getType()->isFPOrFPVector()) + return Error(ID.Loc, "fcmp requires floating point operands"); + ID.ConstantVal = ConstantExpr::getFCmp(Pred, Val0, Val1); + } else if (Opc == Instruction::ICmp) { + if (!Val0->getType()->isIntOrIntVector() && + !isa(Val0->getType())) + return Error(ID.Loc, "icmp requires pointer or integer operands"); + ID.ConstantVal = ConstantExpr::getICmp(Pred, Val0, Val1); + } else if (Opc == Instruction::VFCmp) { + // FIXME: REMOVE VFCMP Support + if (!Val0->getType()->isFPOrFPVector() || + !isa(Val0->getType())) + return Error(ID.Loc, "vfcmp requires vector floating point operands"); + ID.ConstantVal = ConstantExpr::getVFCmp(Pred, Val0, Val1); + } else if (Opc == Instruction::VICmp) { + // FIXME: REMOVE VICMP Support + if (!Val0->getType()->isIntOrIntVector() || + !isa(Val0->getType())) + return Error(ID.Loc, "vicmp requires vector floating point operands"); + ID.ConstantVal = ConstantExpr::getVICmp(Pred, Val0, Val1); + } + ID.Kind = ValID::t_Constant; + return false; + } + + // Binary Operators. + case lltok::kw_add: + case lltok::kw_sub: + case lltok::kw_mul: + case lltok::kw_udiv: + case lltok::kw_sdiv: + case lltok::kw_fdiv: + case lltok::kw_urem: + case lltok::kw_srem: + case lltok::kw_frem: { + unsigned Opc = Lex.getUIntVal(); + Constant *Val0, *Val1; + Lex.Lex(); + if (ParseToken(lltok::lparen, "expected '(' in binary constantexpr") || + ParseGlobalTypeAndValue(Val0) || + ParseToken(lltok::comma, "expected comma in binary constantexpr") || + ParseGlobalTypeAndValue(Val1) || + ParseToken(lltok::rparen, "expected ')' in binary constantexpr")) + return true; + if (Val0->getType() != Val1->getType()) + return Error(ID.Loc, "operands of constexpr must have same type"); + if (!Val0->getType()->isIntOrIntVector() && + !Val0->getType()->isFPOrFPVector()) + return Error(ID.Loc,"constexpr requires integer, fp, or vector operands"); + ID.ConstantVal = ConstantExpr::get(Opc, Val0, Val1); + ID.Kind = ValID::t_Constant; + return false; + } + + // Logical Operations + case lltok::kw_shl: + case lltok::kw_lshr: + case lltok::kw_ashr: + case lltok::kw_and: + case lltok::kw_or: + case lltok::kw_xor: { + unsigned Opc = Lex.getUIntVal(); + Constant *Val0, *Val1; + Lex.Lex(); + if (ParseToken(lltok::lparen, "expected '(' in logical constantexpr") || + ParseGlobalTypeAndValue(Val0) || + ParseToken(lltok::comma, "expected comma in logical constantexpr") || + ParseGlobalTypeAndValue(Val1) || + ParseToken(lltok::rparen, "expected ')' in logical constantexpr")) + return true; + if (Val0->getType() != Val1->getType()) + return Error(ID.Loc, "operands of constexpr must have same type"); + if (!Val0->getType()->isIntOrIntVector()) + return Error(ID.Loc, + "constexpr requires integer or integer vector operands"); + ID.ConstantVal = ConstantExpr::get(Opc, Val0, Val1); + ID.Kind = ValID::t_Constant; + return false; + } + + case lltok::kw_getelementptr: + case lltok::kw_shufflevector: + case lltok::kw_insertelement: + case lltok::kw_extractelement: + case lltok::kw_select: { + unsigned Opc = Lex.getUIntVal(); + SmallVector Elts; + Lex.Lex(); + if (ParseToken(lltok::lparen, "expected '(' in constantexpr") || + ParseGlobalValueVector(Elts) || + ParseToken(lltok::rparen, "expected ')' in constantexpr")) + return true; + + if (Opc == Instruction::GetElementPtr) { + if (Elts.size() == 0 || !isa(Elts[0]->getType())) + return Error(ID.Loc, "getelementptr requires pointer operand"); + + if (!GetElementPtrInst::getIndexedType(Elts[0]->getType(), + (Value**)&Elts[1], Elts.size()-1)) + return Error(ID.Loc, "invalid indices for getelementptr"); + ID.ConstantVal = ConstantExpr::getGetElementPtr(Elts[0], + &Elts[1], Elts.size()-1); + } else if (Opc == Instruction::Select) { + if (Elts.size() != 3) + return Error(ID.Loc, "expected three operands to select"); + if (const char *Reason = SelectInst::areInvalidOperands(Elts[0], Elts[1], + Elts[2])) + return Error(ID.Loc, Reason); + ID.ConstantVal = ConstantExpr::getSelect(Elts[0], Elts[1], Elts[2]); + } else if (Opc == Instruction::ShuffleVector) { + if (Elts.size() != 3) + return Error(ID.Loc, "expected three operands to shufflevector"); + if (!ShuffleVectorInst::isValidOperands(Elts[0], Elts[1], Elts[2])) + return Error(ID.Loc, "invalid operands to shufflevector"); + ID.ConstantVal = ConstantExpr::getShuffleVector(Elts[0], Elts[1],Elts[2]); + } else if (Opc == Instruction::ExtractElement) { + if (Elts.size() != 2) + return Error(ID.Loc, "expected two operands to extractelement"); + if (!ExtractElementInst::isValidOperands(Elts[0], Elts[1])) + return Error(ID.Loc, "invalid extractelement operands"); + ID.ConstantVal = ConstantExpr::getExtractElement(Elts[0], Elts[1]); + } else { + assert(Opc == Instruction::InsertElement && "Unknown opcode"); + if (Elts.size() != 3) + return Error(ID.Loc, "expected three operands to insertelement"); + if (!InsertElementInst::isValidOperands(Elts[0], Elts[1], Elts[2])) + return Error(ID.Loc, "invalid insertelement operands"); + ID.ConstantVal = ConstantExpr::getInsertElement(Elts[0], Elts[1],Elts[2]); + } + + ID.Kind = ValID::t_Constant; + return false; + } + } + + Lex.Lex(); + return false; +} + +/// ParseGlobalValue - Parse a global value with the specified type. +bool LLParser::ParseGlobalValue(const Type *Ty, Constant *&V) { + V = 0; + ValID ID; + return ParseValID(ID) || + ConvertGlobalValIDToValue(Ty, ID, V); +} + +/// ConvertGlobalValIDToValue - Apply a type to a ValID to get a fully resolved +/// constant. +bool LLParser::ConvertGlobalValIDToValue(const Type *Ty, ValID &ID, + Constant *&V) { + if (isa(Ty)) + return Error(ID.Loc, "functions are not values, refer to them as pointers"); + + switch (ID.Kind) { + default: assert(0 && "Unknown ValID!"); + case ValID::t_LocalID: + case ValID::t_LocalName: + return Error(ID.Loc, "invalid use of function-local name"); + case ValID::t_InlineAsm: + return Error(ID.Loc, "inline asm can only be an operand of call/invoke"); + case ValID::t_GlobalName: + V = GetGlobalVal(ID.StrVal, Ty, ID.Loc); + return V == 0; + case ValID::t_GlobalID: + V = GetGlobalVal(ID.UIntVal, Ty, ID.Loc); + return V == 0; + case ValID::t_APSInt: + if (!isa(Ty)) + return Error(ID.Loc, "integer constant must have integer type"); + ID.APSIntVal.extOrTrunc(Ty->getPrimitiveSizeInBits()); + V = ConstantInt::get(ID.APSIntVal); + return false; + case ValID::t_APFloat: + if (!Ty->isFloatingPoint() || + !ConstantFP::isValueValidForType(Ty, ID.APFloatVal)) + return Error(ID.Loc, "floating point constant invalid for type"); + + // The lexer has no type info, so builds all float and double FP constants + // as double. Fix this here. Long double does not need this. + if (&ID.APFloatVal.getSemantics() == &APFloat::IEEEdouble && + Ty == Type::FloatTy) { + bool Ignored; + ID.APFloatVal.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, + &Ignored); + } + V = ConstantFP::get(ID.APFloatVal); + + if (V->getType() != Ty) + return Error(ID.Loc, "floating point constant does not have type '" + + Ty->getDescription() + "'"); + + return false; + case ValID::t_Null: + if (!isa(Ty)) + return Error(ID.Loc, "null must be a pointer type"); + V = ConstantPointerNull::get(cast(Ty)); + return false; + case ValID::t_Undef: + // FIXME: LabelTy should not be a first-class type. + if ((!Ty->isFirstClassType() || Ty == Type::LabelTy) && + !isa(Ty)) + return Error(ID.Loc, "invalid type for undef constant"); + V = UndefValue::get(Ty); + return false; + case ValID::t_EmptyArray: + if (!isa(Ty) || cast(Ty)->getNumElements() != 0) + return Error(ID.Loc, "invalid empty array initializer"); + V = UndefValue::get(Ty); + return false; + case ValID::t_Zero: + // FIXME: LabelTy should not be a first-class type. + if (!Ty->isFirstClassType() || Ty == Type::LabelTy) + return Error(ID.Loc, "invalid type for null constant"); + V = Constant::getNullValue(Ty); + return false; + case ValID::t_Constant: + if (ID.ConstantVal->getType() != Ty) + return Error(ID.Loc, "constant expression type mismatch"); + V = ID.ConstantVal; + return false; + } +} + +bool LLParser::ParseGlobalTypeAndValue(Constant *&V) { + PATypeHolder Type(Type::VoidTy); + return ParseType(Type) || + ParseGlobalValue(Type, V); +} + +/// ParseGlobalValueVector +/// ::= /*empty*/ +/// ::= TypeAndValue (',' TypeAndValue)* +bool LLParser::ParseGlobalValueVector(SmallVectorImpl &Elts) { + // Empty list. + if (Lex.getKind() == lltok::rbrace || + Lex.getKind() == lltok::rsquare || + Lex.getKind() == lltok::greater || + Lex.getKind() == lltok::rparen) + return false; + + Constant *C; + if (ParseGlobalTypeAndValue(C)) return true; + Elts.push_back(C); + + while (EatIfPresent(lltok::comma)) { + if (ParseGlobalTypeAndValue(C)) return true; + Elts.push_back(C); + } + + return false; +} + + +//===----------------------------------------------------------------------===// +// Function Parsing. +//===----------------------------------------------------------------------===// + +bool LLParser::ConvertValIDToValue(const Type *Ty, ValID &ID, Value *&V, + PerFunctionState &PFS) { + if (ID.Kind == ValID::t_LocalID) + V = PFS.GetVal(ID.UIntVal, Ty, ID.Loc); + else if (ID.Kind == ValID::t_LocalName) + V = PFS.GetVal(ID.StrVal, Ty, ID.Loc); + else if (ID.Kind == ValID::t_InlineAsm) { + const PointerType *PTy = dyn_cast(Ty); + const FunctionType *FTy = + PTy ? dyn_cast(PTy->getElementType()) : 0; + if (!FTy || !InlineAsm::Verify(FTy, ID.StrVal2)) + return Error(ID.Loc, "invalid type for inline asm constraint string"); + V = InlineAsm::get(FTy, ID.StrVal, ID.StrVal2, ID.UIntVal); + return false; + } else { + Constant *C; + if (ConvertGlobalValIDToValue(Ty, ID, C)) return true; + V = C; + return false; + } + + return V == 0; +} + +bool LLParser::ParseValue(const Type *Ty, Value *&V, PerFunctionState &PFS) { + V = 0; + ValID ID; + return ParseValID(ID) || + ConvertValIDToValue(Ty, ID, V, PFS); +} + +bool LLParser::ParseTypeAndValue(Value *&V, PerFunctionState &PFS) { + PATypeHolder T(Type::VoidTy); + return ParseType(T) || + ParseValue(T, V, PFS); +} + +/// FunctionHeader +/// ::= OptionalLinkage OptionalVisibility OptionalCallingConv OptRetAttrs +/// Type GlobalName '(' ArgList ')' OptFuncAttrs OptSection +/// OptionalAlign OptGC +bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { + // Parse the linkage. + LocTy LinkageLoc = Lex.getLoc(); + unsigned Linkage; + + unsigned Visibility, CC, RetAttrs; + PATypeHolder RetType(Type::VoidTy); + LocTy RetTypeLoc = Lex.getLoc(); + if (ParseOptionalLinkage(Linkage) || + ParseOptionalVisibility(Visibility) || + ParseOptionalCallingConv(CC) || + ParseOptionalAttrs(RetAttrs, 1) || + ParseType(RetType, RetTypeLoc, true /*void allowed*/)) + return true; + + // Verify that the linkage is ok. + switch ((GlobalValue::LinkageTypes)Linkage) { + case GlobalValue::ExternalLinkage: + break; // always ok. + case GlobalValue::DLLImportLinkage: + case GlobalValue::ExternalWeakLinkage: + if (isDefine) + return Error(LinkageLoc, "invalid linkage for function definition"); + break; + case GlobalValue::PrivateLinkage: + case GlobalValue::InternalLinkage: + case GlobalValue::AvailableExternallyLinkage: + case GlobalValue::LinkOnceAnyLinkage: + case GlobalValue::LinkOnceODRLinkage: + case GlobalValue::WeakAnyLinkage: + case GlobalValue::WeakODRLinkage: + case GlobalValue::DLLExportLinkage: + if (!isDefine) + return Error(LinkageLoc, "invalid linkage for function declaration"); + break; + case GlobalValue::AppendingLinkage: + case GlobalValue::GhostLinkage: + case GlobalValue::CommonLinkage: + return Error(LinkageLoc, "invalid function linkage type"); + } + + if (!FunctionType::isValidReturnType(RetType) || + isa(RetType)) + return Error(RetTypeLoc, "invalid function return type"); + + LocTy NameLoc = Lex.getLoc(); + + std::string FunctionName; + if (Lex.getKind() == lltok::GlobalVar) { + FunctionName = Lex.getStrVal(); + } else if (Lex.getKind() == lltok::GlobalID) { // @42 is ok. + unsigned NameID = Lex.getUIntVal(); + + if (NameID != NumberedVals.size()) + return TokError("function expected to be numbered '%" + + utostr(NumberedVals.size()) + "'"); + } else { + return TokError("expected function name"); + } + + Lex.Lex(); + + if (Lex.getKind() != lltok::lparen) + return TokError("expected '(' in function argument list"); + + std::vector ArgList; + bool isVarArg; + unsigned FuncAttrs; + std::string Section; + unsigned Alignment; + std::string GC; + + if (ParseArgumentList(ArgList, isVarArg, false) || + ParseOptionalAttrs(FuncAttrs, 2) || + (EatIfPresent(lltok::kw_section) && + ParseStringConstant(Section)) || + ParseOptionalAlignment(Alignment) || + (EatIfPresent(lltok::kw_gc) && + ParseStringConstant(GC))) + return true; + + // If the alignment was parsed as an attribute, move to the alignment field. + if (FuncAttrs & Attribute::Alignment) { + Alignment = Attribute::getAlignmentFromAttrs(FuncAttrs); + FuncAttrs &= ~Attribute::Alignment; + } + + // Okay, if we got here, the function is syntactically valid. Convert types + // and do semantic checks. + std::vector ParamTypeList; + SmallVector Attrs; + // FIXME : In 3.0, stop accepting zext, sext and inreg as optional function + // attributes. + unsigned ObsoleteFuncAttrs = Attribute::ZExt|Attribute::SExt|Attribute::InReg; + if (FuncAttrs & ObsoleteFuncAttrs) { + RetAttrs |= FuncAttrs & ObsoleteFuncAttrs; + FuncAttrs &= ~ObsoleteFuncAttrs; + } + + if (RetAttrs != Attribute::None) + Attrs.push_back(AttributeWithIndex::get(0, RetAttrs)); + + for (unsigned i = 0, e = ArgList.size(); i != e; ++i) { + ParamTypeList.push_back(ArgList[i].Type); + if (ArgList[i].Attrs != Attribute::None) + Attrs.push_back(AttributeWithIndex::get(i+1, ArgList[i].Attrs)); + } + + if (FuncAttrs != Attribute::None) + Attrs.push_back(AttributeWithIndex::get(~0, FuncAttrs)); + + AttrListPtr PAL = AttrListPtr::get(Attrs.begin(), Attrs.end()); + + if (PAL.paramHasAttr(1, Attribute::StructRet) && + RetType != Type::VoidTy) + return Error(RetTypeLoc, "functions with 'sret' argument must return void"); + + const FunctionType *FT = FunctionType::get(RetType, ParamTypeList, isVarArg); + const PointerType *PFT = PointerType::getUnqual(FT); + + Fn = 0; + if (!FunctionName.empty()) { + // If this was a definition of a forward reference, remove the definition + // from the forward reference table and fill in the forward ref. + std::map >::iterator FRVI = + ForwardRefVals.find(FunctionName); + if (FRVI != ForwardRefVals.end()) { + Fn = M->getFunction(FunctionName); + ForwardRefVals.erase(FRVI); + } else if ((Fn = M->getFunction(FunctionName))) { + // If this function already exists in the symbol table, then it is + // multiply defined. We accept a few cases for old backwards compat. + // FIXME: Remove this stuff for LLVM 3.0. + if (Fn->getType() != PFT || Fn->getAttributes() != PAL || + (!Fn->isDeclaration() && isDefine)) { + // If the redefinition has different type or different attributes, + // reject it. If both have bodies, reject it. + return Error(NameLoc, "invalid redefinition of function '" + + FunctionName + "'"); + } else if (Fn->isDeclaration()) { + // Make sure to strip off any argument names so we can't get conflicts. + for (Function::arg_iterator AI = Fn->arg_begin(), AE = Fn->arg_end(); + AI != AE; ++AI) + AI->setName(""); + } + } + + } else if (FunctionName.empty()) { + // If this is a definition of a forward referenced function, make sure the + // types agree. + std::map >::iterator I + = ForwardRefValIDs.find(NumberedVals.size()); + if (I != ForwardRefValIDs.end()) { + Fn = cast(I->second.first); + if (Fn->getType() != PFT) + return Error(NameLoc, "type of definition and forward reference of '@" + + utostr(NumberedVals.size()) +"' disagree"); + ForwardRefValIDs.erase(I); + } + } + + if (Fn == 0) + Fn = Function::Create(FT, GlobalValue::ExternalLinkage, FunctionName, M); + else // Move the forward-reference to the correct spot in the module. + M->getFunctionList().splice(M->end(), M->getFunctionList(), Fn); + + if (FunctionName.empty()) + NumberedVals.push_back(Fn); + + Fn->setLinkage((GlobalValue::LinkageTypes)Linkage); + Fn->setVisibility((GlobalValue::VisibilityTypes)Visibility); + Fn->setCallingConv(CC); + Fn->setAttributes(PAL); + Fn->setAlignment(Alignment); + Fn->setSection(Section); + if (!GC.empty()) Fn->setGC(GC.c_str()); + + // Add all of the arguments we parsed to the function. + Function::arg_iterator ArgIt = Fn->arg_begin(); + for (unsigned i = 0, e = ArgList.size(); i != e; ++i, ++ArgIt) { + // If the argument has a name, insert it into the argument symbol table. + if (ArgList[i].Name.empty()) continue; + + // Set the name, if it conflicted, it will be auto-renamed. + ArgIt->setName(ArgList[i].Name); + + if (ArgIt->getNameStr() != ArgList[i].Name) + return Error(ArgList[i].Loc, "redefinition of argument '%" + + ArgList[i].Name + "'"); + } + + return false; +} + + +/// ParseFunctionBody +/// ::= '{' BasicBlock+ '}' +/// ::= 'begin' BasicBlock+ 'end' // FIXME: remove in LLVM 3.0 +/// +bool LLParser::ParseFunctionBody(Function &Fn) { + if (Lex.getKind() != lltok::lbrace && Lex.getKind() != lltok::kw_begin) + return TokError("expected '{' in function body"); + Lex.Lex(); // eat the {. + + PerFunctionState PFS(*this, Fn); + + while (Lex.getKind() != lltok::rbrace && Lex.getKind() != lltok::kw_end) + if (ParseBasicBlock(PFS)) return true; + + // Eat the }. + Lex.Lex(); + + // Verify function is ok. + return PFS.VerifyFunctionComplete(); +} + +/// ParseBasicBlock +/// ::= LabelStr? Instruction* +bool LLParser::ParseBasicBlock(PerFunctionState &PFS) { + // If this basic block starts out with a name, remember it. + std::string Name; + LocTy NameLoc = Lex.getLoc(); + if (Lex.getKind() == lltok::LabelStr) { + Name = Lex.getStrVal(); + Lex.Lex(); + } + + BasicBlock *BB = PFS.DefineBB(Name, NameLoc); + if (BB == 0) return true; + + std::string NameStr; + + // Parse the instructions in this block until we get a terminator. + Instruction *Inst; + do { + // This instruction may have three possibilities for a name: a) none + // specified, b) name specified "%foo =", c) number specified: "%4 =". + LocTy NameLoc = Lex.getLoc(); + int NameID = -1; + NameStr = ""; + + if (Lex.getKind() == lltok::LocalVarID) { + NameID = Lex.getUIntVal(); + Lex.Lex(); + if (ParseToken(lltok::equal, "expected '=' after instruction id")) + return true; + } else if (Lex.getKind() == lltok::LocalVar || + // FIXME: REMOVE IN LLVM 3.0 + Lex.getKind() == lltok::StringConstant) { + NameStr = Lex.getStrVal(); + Lex.Lex(); + if (ParseToken(lltok::equal, "expected '=' after instruction name")) + return true; + } + + if (ParseInstruction(Inst, BB, PFS)) return true; + + BB->getInstList().push_back(Inst); + + // Set the name on the instruction. + if (PFS.SetInstName(NameID, NameStr, NameLoc, Inst)) return true; + } while (!isa(Inst)); + + return false; +} + +//===----------------------------------------------------------------------===// +// Instruction Parsing. +//===----------------------------------------------------------------------===// + +/// ParseInstruction - Parse one of the many different instructions. +/// +bool LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB, + PerFunctionState &PFS) { + lltok::Kind Token = Lex.getKind(); + if (Token == lltok::Eof) + return TokError("found end of file when expecting more instructions"); + LocTy Loc = Lex.getLoc(); + unsigned KeywordVal = Lex.getUIntVal(); + Lex.Lex(); // Eat the keyword. + + switch (Token) { + default: return Error(Loc, "expected instruction opcode"); + // Terminator Instructions. + case lltok::kw_unwind: Inst = new UnwindInst(); return false; + case lltok::kw_unreachable: Inst = new UnreachableInst(); return false; + case lltok::kw_ret: return ParseRet(Inst, BB, PFS); + case lltok::kw_br: return ParseBr(Inst, PFS); + case lltok::kw_switch: return ParseSwitch(Inst, PFS); + case lltok::kw_invoke: return ParseInvoke(Inst, PFS); + // Binary Operators. + case lltok::kw_add: + case lltok::kw_sub: + case lltok::kw_mul: return ParseArithmetic(Inst, PFS, KeywordVal, 0); + + case lltok::kw_udiv: + case lltok::kw_sdiv: + case lltok::kw_urem: + case lltok::kw_srem: return ParseArithmetic(Inst, PFS, KeywordVal, 1); + case lltok::kw_fdiv: + case lltok::kw_frem: return ParseArithmetic(Inst, PFS, KeywordVal, 2); + case lltok::kw_shl: + case lltok::kw_lshr: + case lltok::kw_ashr: + case lltok::kw_and: + case lltok::kw_or: + case lltok::kw_xor: return ParseLogical(Inst, PFS, KeywordVal); + case lltok::kw_icmp: + case lltok::kw_fcmp: + case lltok::kw_vicmp: + case lltok::kw_vfcmp: return ParseCompare(Inst, PFS, KeywordVal); + // Casts. + case lltok::kw_trunc: + case lltok::kw_zext: + case lltok::kw_sext: + case lltok::kw_fptrunc: + case lltok::kw_fpext: + case lltok::kw_bitcast: + case lltok::kw_uitofp: + case lltok::kw_sitofp: + case lltok::kw_fptoui: + case lltok::kw_fptosi: + case lltok::kw_inttoptr: + case lltok::kw_ptrtoint: return ParseCast(Inst, PFS, KeywordVal); + // Other. + case lltok::kw_select: return ParseSelect(Inst, PFS); + case lltok::kw_va_arg: return ParseVA_Arg(Inst, PFS); + case lltok::kw_extractelement: return ParseExtractElement(Inst, PFS); + case lltok::kw_insertelement: return ParseInsertElement(Inst, PFS); + case lltok::kw_shufflevector: return ParseShuffleVector(Inst, PFS); + case lltok::kw_phi: return ParsePHI(Inst, PFS); + case lltok::kw_call: return ParseCall(Inst, PFS, false); + case lltok::kw_tail: return ParseCall(Inst, PFS, true); + // Memory. + case lltok::kw_alloca: + case lltok::kw_malloc: return ParseAlloc(Inst, PFS, KeywordVal); + case lltok::kw_free: return ParseFree(Inst, PFS); + case lltok::kw_load: return ParseLoad(Inst, PFS, false); + case lltok::kw_store: return ParseStore(Inst, PFS, false); + case lltok::kw_volatile: + if (EatIfPresent(lltok::kw_load)) + return ParseLoad(Inst, PFS, true); + else if (EatIfPresent(lltok::kw_store)) + return ParseStore(Inst, PFS, true); + else + return TokError("expected 'load' or 'store'"); + case lltok::kw_getresult: return ParseGetResult(Inst, PFS); + case lltok::kw_getelementptr: return ParseGetElementPtr(Inst, PFS); + case lltok::kw_extractvalue: return ParseExtractValue(Inst, PFS); + case lltok::kw_insertvalue: return ParseInsertValue(Inst, PFS); + } +} + +/// ParseCmpPredicate - Parse an integer or fp predicate, based on Kind. +bool LLParser::ParseCmpPredicate(unsigned &P, unsigned Opc) { + // FIXME: REMOVE vicmp/vfcmp! + if (Opc == Instruction::FCmp || Opc == Instruction::VFCmp) { + switch (Lex.getKind()) { + default: TokError("expected fcmp predicate (e.g. 'oeq')"); + case lltok::kw_oeq: P = CmpInst::FCMP_OEQ; break; + case lltok::kw_one: P = CmpInst::FCMP_ONE; break; + case lltok::kw_olt: P = CmpInst::FCMP_OLT; break; + case lltok::kw_ogt: P = CmpInst::FCMP_OGT; break; + case lltok::kw_ole: P = CmpInst::FCMP_OLE; break; + case lltok::kw_oge: P = CmpInst::FCMP_OGE; break; + case lltok::kw_ord: P = CmpInst::FCMP_ORD; break; + case lltok::kw_uno: P = CmpInst::FCMP_UNO; break; + case lltok::kw_ueq: P = CmpInst::FCMP_UEQ; break; + case lltok::kw_une: P = CmpInst::FCMP_UNE; break; + case lltok::kw_ult: P = CmpInst::FCMP_ULT; break; + case lltok::kw_ugt: P = CmpInst::FCMP_UGT; break; + case lltok::kw_ule: P = CmpInst::FCMP_ULE; break; + case lltok::kw_uge: P = CmpInst::FCMP_UGE; break; + case lltok::kw_true: P = CmpInst::FCMP_TRUE; break; + case lltok::kw_false: P = CmpInst::FCMP_FALSE; break; + } + } else { + switch (Lex.getKind()) { + default: TokError("expected icmp predicate (e.g. 'eq')"); + case lltok::kw_eq: P = CmpInst::ICMP_EQ; break; + case lltok::kw_ne: P = CmpInst::ICMP_NE; break; + case lltok::kw_slt: P = CmpInst::ICMP_SLT; break; + case lltok::kw_sgt: P = CmpInst::ICMP_SGT; break; + case lltok::kw_sle: P = CmpInst::ICMP_SLE; break; + case lltok::kw_sge: P = CmpInst::ICMP_SGE; break; + case lltok::kw_ult: P = CmpInst::ICMP_ULT; break; + case lltok::kw_ugt: P = CmpInst::ICMP_UGT; break; + case lltok::kw_ule: P = CmpInst::ICMP_ULE; break; + case lltok::kw_uge: P = CmpInst::ICMP_UGE; break; + } + } + Lex.Lex(); + return false; +} + +//===----------------------------------------------------------------------===// +// Terminator Instructions. +//===----------------------------------------------------------------------===// + +/// ParseRet - Parse a return instruction. +/// ::= 'ret' void +/// ::= 'ret' TypeAndValue +/// ::= 'ret' TypeAndValue (',' TypeAndValue)+ [[obsolete: LLVM 3.0]] +bool LLParser::ParseRet(Instruction *&Inst, BasicBlock *BB, + PerFunctionState &PFS) { + PATypeHolder Ty(Type::VoidTy); + if (ParseType(Ty, true /*void allowed*/)) return true; + + if (Ty == Type::VoidTy) { + Inst = ReturnInst::Create(); + return false; + } + + Value *RV; + if (ParseValue(Ty, RV, PFS)) return true; + + // The normal case is one return value. + if (Lex.getKind() == lltok::comma) { + // FIXME: LLVM 3.0 remove MRV support for 'ret i32 1, i32 2', requiring use + // of 'ret {i32,i32} {i32 1, i32 2}' + SmallVector RVs; + RVs.push_back(RV); + + while (EatIfPresent(lltok::comma)) { + if (ParseTypeAndValue(RV, PFS)) return true; + RVs.push_back(RV); + } + + RV = UndefValue::get(PFS.getFunction().getReturnType()); + for (unsigned i = 0, e = RVs.size(); i != e; ++i) { + Instruction *I = InsertValueInst::Create(RV, RVs[i], i, "mrv"); + BB->getInstList().push_back(I); + RV = I; + } + } + Inst = ReturnInst::Create(RV); + return false; +} + + +/// ParseBr +/// ::= 'br' TypeAndValue +/// ::= 'br' TypeAndValue ',' TypeAndValue ',' TypeAndValue +bool LLParser::ParseBr(Instruction *&Inst, PerFunctionState &PFS) { + LocTy Loc, Loc2; + Value *Op0, *Op1, *Op2; + if (ParseTypeAndValue(Op0, Loc, PFS)) return true; + + if (BasicBlock *BB = dyn_cast(Op0)) { + Inst = BranchInst::Create(BB); + return false; + } + + if (Op0->getType() != Type::Int1Ty) + return Error(Loc, "branch condition must have 'i1' type"); + + if (ParseToken(lltok::comma, "expected ',' after branch condition") || + ParseTypeAndValue(Op1, Loc, PFS) || + ParseToken(lltok::comma, "expected ',' after true destination") || + ParseTypeAndValue(Op2, Loc2, PFS)) + return true; + + if (!isa(Op1)) + return Error(Loc, "true destination of branch must be a basic block"); + if (!isa(Op2)) + return Error(Loc2, "true destination of branch must be a basic block"); + + Inst = BranchInst::Create(cast(Op1), cast(Op2), Op0); + return false; +} + +/// ParseSwitch +/// Instruction +/// ::= 'switch' TypeAndValue ',' TypeAndValue '[' JumpTable ']' +/// JumpTable +/// ::= (TypeAndValue ',' TypeAndValue)* +bool LLParser::ParseSwitch(Instruction *&Inst, PerFunctionState &PFS) { + LocTy CondLoc, BBLoc; + Value *Cond, *DefaultBB; + if (ParseTypeAndValue(Cond, CondLoc, PFS) || + ParseToken(lltok::comma, "expected ',' after switch condition") || + ParseTypeAndValue(DefaultBB, BBLoc, PFS) || + ParseToken(lltok::lsquare, "expected '[' with switch table")) + return true; + + if (!isa(Cond->getType())) + return Error(CondLoc, "switch condition must have integer type"); + if (!isa(DefaultBB)) + return Error(BBLoc, "default destination must be a basic block"); + + // Parse the jump table pairs. + SmallPtrSet SeenCases; + SmallVector, 32> Table; + while (Lex.getKind() != lltok::rsquare) { + Value *Constant, *DestBB; + + if (ParseTypeAndValue(Constant, CondLoc, PFS) || + ParseToken(lltok::comma, "expected ',' after case value") || + ParseTypeAndValue(DestBB, BBLoc, PFS)) + return true; + + if (!SeenCases.insert(Constant)) + return Error(CondLoc, "duplicate case value in switch"); + if (!isa(Constant)) + return Error(CondLoc, "case value is not a constant integer"); + if (!isa(DestBB)) + return Error(BBLoc, "case destination is not a basic block"); + + Table.push_back(std::make_pair(cast(Constant), + cast(DestBB))); + } + + Lex.Lex(); // Eat the ']'. + + SwitchInst *SI = SwitchInst::Create(Cond, cast(DefaultBB), + Table.size()); + for (unsigned i = 0, e = Table.size(); i != e; ++i) + SI->addCase(Table[i].first, Table[i].second); + Inst = SI; + return false; +} + +/// ParseInvoke +/// ::= 'invoke' OptionalCallingConv OptionalAttrs Type Value ParamList +/// OptionalAttrs 'to' TypeAndValue 'unwind' TypeAndValue +bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) { + LocTy CallLoc = Lex.getLoc(); + unsigned CC, RetAttrs, FnAttrs; + PATypeHolder RetType(Type::VoidTy); + LocTy RetTypeLoc; + ValID CalleeID; + SmallVector ArgList; + + Value *NormalBB, *UnwindBB; + if (ParseOptionalCallingConv(CC) || + ParseOptionalAttrs(RetAttrs, 1) || + ParseType(RetType, RetTypeLoc, true /*void allowed*/) || + ParseValID(CalleeID) || + ParseParameterList(ArgList, PFS) || + ParseOptionalAttrs(FnAttrs, 2) || + ParseToken(lltok::kw_to, "expected 'to' in invoke") || + ParseTypeAndValue(NormalBB, PFS) || + ParseToken(lltok::kw_unwind, "expected 'unwind' in invoke") || + ParseTypeAndValue(UnwindBB, PFS)) + return true; + + if (!isa(NormalBB)) + return Error(CallLoc, "normal destination is not a basic block"); + if (!isa(UnwindBB)) + return Error(CallLoc, "unwind destination is not a basic block"); + + // If RetType is a non-function pointer type, then this is the short syntax + // for the call, which means that RetType is just the return type. Infer the + // rest of the function argument types from the arguments that are present. + const PointerType *PFTy = 0; + const FunctionType *Ty = 0; + if (!(PFTy = dyn_cast(RetType)) || + !(Ty = dyn_cast(PFTy->getElementType()))) { + // Pull out the types of all of the arguments... + std::vector ParamTypes; + for (unsigned i = 0, e = ArgList.size(); i != e; ++i) + ParamTypes.push_back(ArgList[i].V->getType()); + + if (!FunctionType::isValidReturnType(RetType)) + return Error(RetTypeLoc, "Invalid result type for LLVM function"); + + Ty = FunctionType::get(RetType, ParamTypes, false); + PFTy = PointerType::getUnqual(Ty); + } + + // Look up the callee. + Value *Callee; + if (ConvertValIDToValue(PFTy, CalleeID, Callee, PFS)) return true; + + // FIXME: In LLVM 3.0, stop accepting zext, sext and inreg as optional + // function attributes. + unsigned ObsoleteFuncAttrs = Attribute::ZExt|Attribute::SExt|Attribute::InReg; + if (FnAttrs & ObsoleteFuncAttrs) { + RetAttrs |= FnAttrs & ObsoleteFuncAttrs; + FnAttrs &= ~ObsoleteFuncAttrs; + } + + // Set up the Attributes for the function. + SmallVector Attrs; + if (RetAttrs != Attribute::None) + Attrs.push_back(AttributeWithIndex::get(0, RetAttrs)); + + SmallVector Args; + + // Loop through FunctionType's arguments and ensure they are specified + // correctly. Also, gather any parameter attributes. + FunctionType::param_iterator I = Ty->param_begin(); + FunctionType::param_iterator E = Ty->param_end(); + for (unsigned i = 0, e = ArgList.size(); i != e; ++i) { + const Type *ExpectedTy = 0; + if (I != E) { + ExpectedTy = *I++; + } else if (!Ty->isVarArg()) { + return Error(ArgList[i].Loc, "too many arguments specified"); + } + + if (ExpectedTy && ExpectedTy != ArgList[i].V->getType()) + return Error(ArgList[i].Loc, "argument is not of expected type '" + + ExpectedTy->getDescription() + "'"); + Args.push_back(ArgList[i].V); + if (ArgList[i].Attrs != Attribute::None) + Attrs.push_back(AttributeWithIndex::get(i+1, ArgList[i].Attrs)); + } + + if (I != E) + return Error(CallLoc, "not enough parameters specified for call"); + + if (FnAttrs != Attribute::None) + Attrs.push_back(AttributeWithIndex::get(~0, FnAttrs)); + + // Finish off the Attributes and check them + AttrListPtr PAL = AttrListPtr::get(Attrs.begin(), Attrs.end()); + + InvokeInst *II = InvokeInst::Create(Callee, cast(NormalBB), + cast(UnwindBB), + Args.begin(), Args.end()); + II->setCallingConv(CC); + II->setAttributes(PAL); + Inst = II; + return false; +} + + + +//===----------------------------------------------------------------------===// +// Binary Operators. +//===----------------------------------------------------------------------===// + +/// ParseArithmetic +/// ::= ArithmeticOps TypeAndValue ',' Value +/// +/// If OperandType is 0, then any FP or integer operand is allowed. If it is 1, +/// then any integer operand is allowed, if it is 2, any fp operand is allowed. +bool LLParser::ParseArithmetic(Instruction *&Inst, PerFunctionState &PFS, + unsigned Opc, unsigned OperandType) { + LocTy Loc; Value *LHS, *RHS; + if (ParseTypeAndValue(LHS, Loc, PFS) || + ParseToken(lltok::comma, "expected ',' in arithmetic operation") || + ParseValue(LHS->getType(), RHS, PFS)) + return true; + + bool Valid; + switch (OperandType) { + default: assert(0 && "Unknown operand type!"); + case 0: // int or FP. + Valid = LHS->getType()->isIntOrIntVector() || + LHS->getType()->isFPOrFPVector(); + break; + case 1: Valid = LHS->getType()->isIntOrIntVector(); break; + case 2: Valid = LHS->getType()->isFPOrFPVector(); break; + } + + if (!Valid) + return Error(Loc, "invalid operand type for instruction"); + + Inst = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS); + return false; +} + +/// ParseLogical +/// ::= ArithmeticOps TypeAndValue ',' Value { +bool LLParser::ParseLogical(Instruction *&Inst, PerFunctionState &PFS, + unsigned Opc) { + LocTy Loc; Value *LHS, *RHS; + if (ParseTypeAndValue(LHS, Loc, PFS) || + ParseToken(lltok::comma, "expected ',' in logical operation") || + ParseValue(LHS->getType(), RHS, PFS)) + return true; + + if (!LHS->getType()->isIntOrIntVector()) + return Error(Loc,"instruction requires integer or integer vector operands"); + + Inst = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS); + return false; +} + + +/// ParseCompare +/// ::= 'icmp' IPredicates TypeAndValue ',' Value +/// ::= 'fcmp' FPredicates TypeAndValue ',' Value +/// ::= 'vicmp' IPredicates TypeAndValue ',' Value +/// ::= 'vfcmp' FPredicates TypeAndValue ',' Value +bool LLParser::ParseCompare(Instruction *&Inst, PerFunctionState &PFS, + unsigned Opc) { + // Parse the integer/fp comparison predicate. + LocTy Loc; + unsigned Pred; + Value *LHS, *RHS; + if (ParseCmpPredicate(Pred, Opc) || + ParseTypeAndValue(LHS, Loc, PFS) || + ParseToken(lltok::comma, "expected ',' after compare value") || + ParseValue(LHS->getType(), RHS, PFS)) + return true; + + if (Opc == Instruction::FCmp) { + if (!LHS->getType()->isFPOrFPVector()) + return Error(Loc, "fcmp requires floating point operands"); + Inst = new FCmpInst(CmpInst::Predicate(Pred), LHS, RHS); + } else if (Opc == Instruction::ICmp) { + if (!LHS->getType()->isIntOrIntVector() && + !isa(LHS->getType())) + return Error(Loc, "icmp requires integer operands"); + Inst = new ICmpInst(CmpInst::Predicate(Pred), LHS, RHS); + } else if (Opc == Instruction::VFCmp) { + if (!LHS->getType()->isFPOrFPVector() || !isa(LHS->getType())) + return Error(Loc, "vfcmp requires vector floating point operands"); + Inst = new VFCmpInst(CmpInst::Predicate(Pred), LHS, RHS); + } else if (Opc == Instruction::VICmp) { + if (!LHS->getType()->isIntOrIntVector() || !isa(LHS->getType())) + return Error(Loc, "vicmp requires vector floating point operands"); + Inst = new VICmpInst(CmpInst::Predicate(Pred), LHS, RHS); + } + return false; +} + +//===----------------------------------------------------------------------===// +// Other Instructions. +//===----------------------------------------------------------------------===// + + +/// ParseCast +/// ::= CastOpc TypeAndValue 'to' Type +bool LLParser::ParseCast(Instruction *&Inst, PerFunctionState &PFS, + unsigned Opc) { + LocTy Loc; Value *Op; + PATypeHolder DestTy(Type::VoidTy); + if (ParseTypeAndValue(Op, Loc, PFS) || + ParseToken(lltok::kw_to, "expected 'to' after cast value") || + ParseType(DestTy)) + return true; + + if (!CastInst::castIsValid((Instruction::CastOps)Opc, Op, DestTy)) { + CastInst::castIsValid((Instruction::CastOps)Opc, Op, DestTy); + return Error(Loc, "invalid cast opcode for cast from '" + + Op->getType()->getDescription() + "' to '" + + DestTy->getDescription() + "'"); + } + Inst = CastInst::Create((Instruction::CastOps)Opc, Op, DestTy); + return false; +} + +/// ParseSelect +/// ::= 'select' TypeAndValue ',' TypeAndValue ',' TypeAndValue +bool LLParser::ParseSelect(Instruction *&Inst, PerFunctionState &PFS) { + LocTy Loc; + Value *Op0, *Op1, *Op2; + if (ParseTypeAndValue(Op0, Loc, PFS) || + ParseToken(lltok::comma, "expected ',' after select condition") || + ParseTypeAndValue(Op1, PFS) || + ParseToken(lltok::comma, "expected ',' after select value") || + ParseTypeAndValue(Op2, PFS)) + return true; + + if (const char *Reason = SelectInst::areInvalidOperands(Op0, Op1, Op2)) + return Error(Loc, Reason); + + Inst = SelectInst::Create(Op0, Op1, Op2); + return false; +} + +/// ParseVA_Arg +/// ::= 'va_arg' TypeAndValue ',' Type +bool LLParser::ParseVA_Arg(Instruction *&Inst, PerFunctionState &PFS) { + Value *Op; + PATypeHolder EltTy(Type::VoidTy); + LocTy TypeLoc; + if (ParseTypeAndValue(Op, PFS) || + ParseToken(lltok::comma, "expected ',' after vaarg operand") || + ParseType(EltTy, TypeLoc)) + return true; + + if (!EltTy->isFirstClassType()) + return Error(TypeLoc, "va_arg requires operand with first class type"); + + Inst = new VAArgInst(Op, EltTy); + return false; +} + +/// ParseExtractElement +/// ::= 'extractelement' TypeAndValue ',' TypeAndValue +bool LLParser::ParseExtractElement(Instruction *&Inst, PerFunctionState &PFS) { + LocTy Loc; + Value *Op0, *Op1; + if (ParseTypeAndValue(Op0, Loc, PFS) || + ParseToken(lltok::comma, "expected ',' after extract value") || + ParseTypeAndValue(Op1, PFS)) + return true; + + if (!ExtractElementInst::isValidOperands(Op0, Op1)) + return Error(Loc, "invalid extractelement operands"); + + Inst = new ExtractElementInst(Op0, Op1); + return false; +} + +/// ParseInsertElement +/// ::= 'insertelement' TypeAndValue ',' TypeAndValue ',' TypeAndValue +bool LLParser::ParseInsertElement(Instruction *&Inst, PerFunctionState &PFS) { + LocTy Loc; + Value *Op0, *Op1, *Op2; + if (ParseTypeAndValue(Op0, Loc, PFS) || + ParseToken(lltok::comma, "expected ',' after insertelement value") || + ParseTypeAndValue(Op1, PFS) || + ParseToken(lltok::comma, "expected ',' after insertelement value") || + ParseTypeAndValue(Op2, PFS)) + return true; + + if (!InsertElementInst::isValidOperands(Op0, Op1, Op2)) + return Error(Loc, "invalid extractelement operands"); + + Inst = InsertElementInst::Create(Op0, Op1, Op2); + return false; +} + +/// ParseShuffleVector +/// ::= 'shufflevector' TypeAndValue ',' TypeAndValue ',' TypeAndValue +bool LLParser::ParseShuffleVector(Instruction *&Inst, PerFunctionState &PFS) { + LocTy Loc; + Value *Op0, *Op1, *Op2; + if (ParseTypeAndValue(Op0, Loc, PFS) || + ParseToken(lltok::comma, "expected ',' after shuffle mask") || + ParseTypeAndValue(Op1, PFS) || + ParseToken(lltok::comma, "expected ',' after shuffle value") || + ParseTypeAndValue(Op2, PFS)) + return true; + + if (!ShuffleVectorInst::isValidOperands(Op0, Op1, Op2)) + return Error(Loc, "invalid extractelement operands"); + + Inst = new ShuffleVectorInst(Op0, Op1, Op2); + return false; +} + +/// ParsePHI +/// ::= 'phi' Type '[' Value ',' Value ']' (',' '[' Value ',' Valueß ']')* +bool LLParser::ParsePHI(Instruction *&Inst, PerFunctionState &PFS) { + PATypeHolder Ty(Type::VoidTy); + Value *Op0, *Op1; + LocTy TypeLoc = Lex.getLoc(); + + if (ParseType(Ty) || + ParseToken(lltok::lsquare, "expected '[' in phi value list") || + ParseValue(Ty, Op0, PFS) || + ParseToken(lltok::comma, "expected ',' after insertelement value") || + ParseValue(Type::LabelTy, Op1, PFS) || + ParseToken(lltok::rsquare, "expected ']' in phi value list")) + return true; + + SmallVector, 16> PHIVals; + while (1) { + PHIVals.push_back(std::make_pair(Op0, cast(Op1))); + + if (!EatIfPresent(lltok::comma)) + break; + + if (ParseToken(lltok::lsquare, "expected '[' in phi value list") || + ParseValue(Ty, Op0, PFS) || + ParseToken(lltok::comma, "expected ',' after insertelement value") || + ParseValue(Type::LabelTy, Op1, PFS) || + ParseToken(lltok::rsquare, "expected ']' in phi value list")) + return true; + } + + if (!Ty->isFirstClassType()) + return Error(TypeLoc, "phi node must have first class type"); + + PHINode *PN = PHINode::Create(Ty); + PN->reserveOperandSpace(PHIVals.size()); + for (unsigned i = 0, e = PHIVals.size(); i != e; ++i) + PN->addIncoming(PHIVals[i].first, PHIVals[i].second); + Inst = PN; + return false; +} + +/// ParseCall +/// ::= 'tail'? 'call' OptionalCallingConv OptionalAttrs Type Value +/// ParameterList OptionalAttrs +bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS, + bool isTail) { + unsigned CC, RetAttrs, FnAttrs; + PATypeHolder RetType(Type::VoidTy); + LocTy RetTypeLoc; + ValID CalleeID; + SmallVector ArgList; + LocTy CallLoc = Lex.getLoc(); + + if ((isTail && ParseToken(lltok::kw_call, "expected 'tail call'")) || + ParseOptionalCallingConv(CC) || + ParseOptionalAttrs(RetAttrs, 1) || + ParseType(RetType, RetTypeLoc, true /*void allowed*/) || + ParseValID(CalleeID) || + ParseParameterList(ArgList, PFS) || + ParseOptionalAttrs(FnAttrs, 2)) + return true; + + // If RetType is a non-function pointer type, then this is the short syntax + // for the call, which means that RetType is just the return type. Infer the + // rest of the function argument types from the arguments that are present. + const PointerType *PFTy = 0; + const FunctionType *Ty = 0; + if (!(PFTy = dyn_cast(RetType)) || + !(Ty = dyn_cast(PFTy->getElementType()))) { + // Pull out the types of all of the arguments... + std::vector ParamTypes; + for (unsigned i = 0, e = ArgList.size(); i != e; ++i) + ParamTypes.push_back(ArgList[i].V->getType()); + + if (!FunctionType::isValidReturnType(RetType)) + return Error(RetTypeLoc, "Invalid result type for LLVM function"); + + Ty = FunctionType::get(RetType, ParamTypes, false); + PFTy = PointerType::getUnqual(Ty); + } + + // Look up the callee. + Value *Callee; + if (ConvertValIDToValue(PFTy, CalleeID, Callee, PFS)) return true; + + // FIXME: In LLVM 3.0, stop accepting zext, sext and inreg as optional + // function attributes. + unsigned ObsoleteFuncAttrs = Attribute::ZExt|Attribute::SExt|Attribute::InReg; + if (FnAttrs & ObsoleteFuncAttrs) { + RetAttrs |= FnAttrs & ObsoleteFuncAttrs; + FnAttrs &= ~ObsoleteFuncAttrs; + } + + // Set up the Attributes for the function. + SmallVector Attrs; + if (RetAttrs != Attribute::None) + Attrs.push_back(AttributeWithIndex::get(0, RetAttrs)); + + SmallVector Args; + + // Loop through FunctionType's arguments and ensure they are specified + // correctly. Also, gather any parameter attributes. + FunctionType::param_iterator I = Ty->param_begin(); + FunctionType::param_iterator E = Ty->param_end(); + for (unsigned i = 0, e = ArgList.size(); i != e; ++i) { + const Type *ExpectedTy = 0; + if (I != E) { + ExpectedTy = *I++; + } else if (!Ty->isVarArg()) { + return Error(ArgList[i].Loc, "too many arguments specified"); + } + + if (ExpectedTy && ExpectedTy != ArgList[i].V->getType()) + return Error(ArgList[i].Loc, "argument is not of expected type '" + + ExpectedTy->getDescription() + "'"); + Args.push_back(ArgList[i].V); + if (ArgList[i].Attrs != Attribute::None) + Attrs.push_back(AttributeWithIndex::get(i+1, ArgList[i].Attrs)); + } + + if (I != E) + return Error(CallLoc, "not enough parameters specified for call"); + + if (FnAttrs != Attribute::None) + Attrs.push_back(AttributeWithIndex::get(~0, FnAttrs)); + + // Finish off the Attributes and check them + AttrListPtr PAL = AttrListPtr::get(Attrs.begin(), Attrs.end()); + + CallInst *CI = CallInst::Create(Callee, Args.begin(), Args.end()); + CI->setTailCall(isTail); + CI->setCallingConv(CC); + CI->setAttributes(PAL); + Inst = CI; + return false; +} + +//===----------------------------------------------------------------------===// +// Memory Instructions. +//===----------------------------------------------------------------------===// + +/// ParseAlloc +/// ::= 'malloc' Type (',' TypeAndValue)? (',' OptionalAlignment)? +/// ::= 'alloca' Type (',' TypeAndValue)? (',' OptionalAlignment)? +bool LLParser::ParseAlloc(Instruction *&Inst, PerFunctionState &PFS, + unsigned Opc) { + PATypeHolder Ty(Type::VoidTy); + Value *Size = 0; + LocTy SizeLoc = 0; + unsigned Alignment = 0; + if (ParseType(Ty)) return true; + + if (EatIfPresent(lltok::comma)) { + if (Lex.getKind() == lltok::kw_align) { + if (ParseOptionalAlignment(Alignment)) return true; + } else if (ParseTypeAndValue(Size, SizeLoc, PFS) || + ParseOptionalCommaAlignment(Alignment)) { + return true; + } + } + + if (Size && Size->getType() != Type::Int32Ty) + return Error(SizeLoc, "element count must be i32"); + + if (Opc == Instruction::Malloc) + Inst = new MallocInst(Ty, Size, Alignment); + else + Inst = new AllocaInst(Ty, Size, Alignment); + return false; +} + +/// ParseFree +/// ::= 'free' TypeAndValue +bool LLParser::ParseFree(Instruction *&Inst, PerFunctionState &PFS) { + Value *Val; LocTy Loc; + if (ParseTypeAndValue(Val, Loc, PFS)) return true; + if (!isa(Val->getType())) + return Error(Loc, "operand to free must be a pointer"); + Inst = new FreeInst(Val); + return false; +} + +/// ParseLoad +/// ::= 'volatile'? 'load' TypeAndValue (',' 'align' uint)? +bool LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS, + bool isVolatile) { + Value *Val; LocTy Loc; + unsigned Alignment; + if (ParseTypeAndValue(Val, Loc, PFS) || + ParseOptionalCommaAlignment(Alignment)) + return true; + + if (!isa(Val->getType()) || + !cast(Val->getType())->getElementType()->isFirstClassType()) + return Error(Loc, "load operand must be a pointer to a first class type"); + + Inst = new LoadInst(Val, "", isVolatile, Alignment); + return false; +} + +/// ParseStore +/// ::= 'volatile'? 'store' TypeAndValue ',' TypeAndValue (',' 'align' uint)? +bool LLParser::ParseStore(Instruction *&Inst, PerFunctionState &PFS, + bool isVolatile) { + Value *Val, *Ptr; LocTy Loc, PtrLoc; + unsigned Alignment; + if (ParseTypeAndValue(Val, Loc, PFS) || + ParseToken(lltok::comma, "expected ',' after store operand") || + ParseTypeAndValue(Ptr, PtrLoc, PFS) || + ParseOptionalCommaAlignment(Alignment)) + return true; + + if (!isa(Ptr->getType())) + return Error(PtrLoc, "store operand must be a pointer"); + if (!Val->getType()->isFirstClassType()) + return Error(Loc, "store operand must be a first class value"); + if (cast(Ptr->getType())->getElementType() != Val->getType()) + return Error(Loc, "stored value and pointer type do not match"); + + Inst = new StoreInst(Val, Ptr, isVolatile, Alignment); + return false; +} + +/// ParseGetResult +/// ::= 'getresult' TypeAndValue ',' uint +/// FIXME: Remove support for getresult in LLVM 3.0 +bool LLParser::ParseGetResult(Instruction *&Inst, PerFunctionState &PFS) { + Value *Val; LocTy ValLoc, EltLoc; + unsigned Element; + if (ParseTypeAndValue(Val, ValLoc, PFS) || + ParseToken(lltok::comma, "expected ',' after getresult operand") || + ParseUInt32(Element, EltLoc)) + return true; + + if (!isa(Val->getType()) && !isa(Val->getType())) + return Error(ValLoc, "getresult inst requires an aggregate operand"); + if (!ExtractValueInst::getIndexedType(Val->getType(), Element)) + return Error(EltLoc, "invalid getresult index for value"); + Inst = ExtractValueInst::Create(Val, Element); + return false; +} + +/// ParseGetElementPtr +/// ::= 'getelementptr' TypeAndValue (',' TypeAndValue)* +bool LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) { + Value *Ptr, *Val; LocTy Loc, EltLoc; + if (ParseTypeAndValue(Ptr, Loc, PFS)) return true; + + if (!isa(Ptr->getType())) + return Error(Loc, "base of getelementptr must be a pointer"); + + SmallVector Indices; + while (EatIfPresent(lltok::comma)) { + if (ParseTypeAndValue(Val, EltLoc, PFS)) return true; + if (!isa(Val->getType())) + return Error(EltLoc, "getelementptr index must be an integer"); + Indices.push_back(Val); + } + + if (!GetElementPtrInst::getIndexedType(Ptr->getType(), + Indices.begin(), Indices.end())) + return Error(Loc, "invalid getelementptr indices"); + Inst = GetElementPtrInst::Create(Ptr, Indices.begin(), Indices.end()); + return false; +} + +/// ParseExtractValue +/// ::= 'extractvalue' TypeAndValue (',' uint32)+ +bool LLParser::ParseExtractValue(Instruction *&Inst, PerFunctionState &PFS) { + Value *Val; LocTy Loc; + SmallVector Indices; + if (ParseTypeAndValue(Val, Loc, PFS) || + ParseIndexList(Indices)) + return true; + + if (!isa(Val->getType()) && !isa(Val->getType())) + return Error(Loc, "extractvalue operand must be array or struct"); + + if (!ExtractValueInst::getIndexedType(Val->getType(), Indices.begin(), + Indices.end())) + return Error(Loc, "invalid indices for extractvalue"); + Inst = ExtractValueInst::Create(Val, Indices.begin(), Indices.end()); + return false; +} + +/// ParseInsertValue +/// ::= 'insertvalue' TypeAndValue ',' TypeAndValue (',' uint32)+ +bool LLParser::ParseInsertValue(Instruction *&Inst, PerFunctionState &PFS) { + Value *Val0, *Val1; LocTy Loc0, Loc1; + SmallVector Indices; + if (ParseTypeAndValue(Val0, Loc0, PFS) || + ParseToken(lltok::comma, "expected comma after insertvalue operand") || + ParseTypeAndValue(Val1, Loc1, PFS) || + ParseIndexList(Indices)) + return true; + + if (!isa(Val0->getType()) && !isa(Val0->getType())) + return Error(Loc0, "extractvalue operand must be array or struct"); + + if (!ExtractValueInst::getIndexedType(Val0->getType(), Indices.begin(), + Indices.end())) + return Error(Loc0, "invalid indices for insertvalue"); + Inst = InsertValueInst::Create(Val0, Val1, Indices.begin(), Indices.end()); + return false; +} + +//===----------------------------------------------------------------------===// +// Embedded metadata. +//===----------------------------------------------------------------------===// + +/// ParseMDNodeVector +/// ::= Element (',' Element)* +/// Element +/// ::= 'null' | TypeAndValue +bool LLParser::ParseMDNodeVector(SmallVectorImpl &Elts) { + assert(Lex.getKind() == lltok::lbrace); + Lex.Lex(); + do { + Value *V; + if (Lex.getKind() == lltok::kw_null) { + Lex.Lex(); + V = 0; + } else { + Constant *C; + if (ParseGlobalTypeAndValue(C)) return true; + V = C; + } + Elts.push_back(V); + } while (EatIfPresent(lltok::comma)); + + return false; +} diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h new file mode 100644 index 000000000000..7106689081d3 --- /dev/null +++ b/lib/AsmParser/LLParser.h @@ -0,0 +1,276 @@ +//===-- LLParser.h - Parser Class -------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the parser class for .ll files. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ASMPARSER_LLPARSER_H +#define LLVM_ASMPARSER_LLPARSER_H + +#include "LLLexer.h" +#include "llvm/Type.h" +#include + +namespace llvm { + class Module; + class OpaqueType; + class Function; + class Value; + class BasicBlock; + class Instruction; + class Constant; + class GlobalValue; + class MDString; + class MDNode; + struct ValID; + + class LLParser { + public: + typedef LLLexer::LocTy LocTy; + private: + + LLLexer Lex; + Module *M; + + // Type resolution handling data structures. + std::map > ForwardRefTypes; + std::map > ForwardRefTypeIDs; + std::vector NumberedTypes; + + struct UpRefRecord { + /// Loc - This is the location of the upref. + LocTy Loc; + + /// NestingLevel - The number of nesting levels that need to be popped + /// before this type is resolved. + unsigned NestingLevel; + + /// LastContainedTy - This is the type at the current binding level for + /// the type. Every time we reduce the nesting level, this gets updated. + const Type *LastContainedTy; + + /// UpRefTy - This is the actual opaque type that the upreference is + /// represented with. + OpaqueType *UpRefTy; + + UpRefRecord(LocTy L, unsigned NL, OpaqueType *URTy) + : Loc(L), NestingLevel(NL), LastContainedTy((Type*)URTy), + UpRefTy(URTy) {} + }; + std::vector UpRefs; + + // Global Value reference information. + std::map > ForwardRefVals; + std::map > ForwardRefValIDs; + std::vector NumberedVals; + public: + LLParser(MemoryBuffer *F, ParseError &Err, Module *m) : Lex(F, Err), M(m) {} + bool Run(); + + private: + + bool Error(LocTy L, const std::string &Msg) const { + return Lex.Error(L, Msg); + } + bool TokError(const std::string &Msg) const { + return Error(Lex.getLoc(), Msg); + } + + /// GetGlobalVal - Get a value with the specified name or ID, creating a + /// forward reference record if needed. This can return null if the value + /// exists but does not have the right type. + GlobalValue *GetGlobalVal(const std::string &N, const Type *Ty, LocTy Loc); + GlobalValue *GetGlobalVal(unsigned ID, const Type *Ty, LocTy Loc); + + // Helper Routines. + bool ParseToken(lltok::Kind T, const char *ErrMsg); + bool EatIfPresent(lltok::Kind T) { + if (Lex.getKind() != T) return false; + Lex.Lex(); + return true; + } + bool ParseOptionalToken(lltok::Kind T, bool &Present) { + if (Lex.getKind() != T) { + Present = false; + } else { + Lex.Lex(); + Present = true; + } + return false; + } + bool ParseStringConstant(std::string &Result); + bool ParseUInt32(unsigned &Val); + bool ParseUInt32(unsigned &Val, LocTy &Loc) { + Loc = Lex.getLoc(); + return ParseUInt32(Val); + } + bool ParseOptionalAddrSpace(unsigned &AddrSpace); + bool ParseOptionalAttrs(unsigned &Attrs, unsigned AttrKind); + bool ParseOptionalLinkage(unsigned &Linkage, bool &HasLinkage); + bool ParseOptionalLinkage(unsigned &Linkage) { + bool HasLinkage; return ParseOptionalLinkage(Linkage, HasLinkage); + } + bool ParseOptionalVisibility(unsigned &Visibility); + bool ParseOptionalCallingConv(unsigned &CC); + bool ParseOptionalAlignment(unsigned &Alignment); + bool ParseOptionalCommaAlignment(unsigned &Alignment); + bool ParseIndexList(SmallVectorImpl &Indices); + + // Top-Level Entities + bool ParseTopLevelEntities(); + bool ValidateEndOfModule(); + bool ParseTargetDefinition(); + bool ParseDepLibs(); + bool ParseModuleAsm(); + bool ParseUnnamedType(); + bool ParseNamedType(); + bool ParseDeclare(); + bool ParseDefine(); + + bool ParseGlobalType(bool &IsConstant); + bool ParseNamedGlobal(); + bool ParseGlobal(const std::string &Name, LocTy Loc, unsigned Linkage, + bool HasLinkage, unsigned Visibility); + bool ParseAlias(const std::string &Name, LocTy Loc, unsigned Visibility); + + // Type Parsing. + bool ParseType(PATypeHolder &Result, bool AllowVoid = false); + bool ParseType(PATypeHolder &Result, LocTy &Loc, bool AllowVoid = false) { + Loc = Lex.getLoc(); + return ParseType(Result, AllowVoid); + } + bool ParseTypeRec(PATypeHolder &H); + bool ParseStructType(PATypeHolder &H, bool Packed); + bool ParseArrayVectorType(PATypeHolder &H, bool isVector); + bool ParseFunctionType(PATypeHolder &Result); + PATypeHolder HandleUpRefs(const Type *Ty); + + // Constants. + bool ParseValID(ValID &ID); + bool ConvertGlobalValIDToValue(const Type *Ty, ValID &ID, Constant *&V); + bool ParseGlobalValue(const Type *Ty, Constant *&V); + bool ParseGlobalTypeAndValue(Constant *&V); + bool ParseGlobalValueVector(SmallVectorImpl &Elts); + bool ParseMDNodeVector(SmallVectorImpl &); + + + // Function Semantic Analysis. + class PerFunctionState { + LLParser &P; + Function &F; + std::map > ForwardRefVals; + std::map > ForwardRefValIDs; + std::vector NumberedVals; + public: + PerFunctionState(LLParser &p, Function &f); + ~PerFunctionState(); + + Function &getFunction() const { return F; } + + bool VerifyFunctionComplete(); + + /// GetVal - Get a value with the specified name or ID, creating a + /// forward reference record if needed. This can return null if the value + /// exists but does not have the right type. + Value *GetVal(const std::string &Name, const Type *Ty, LocTy Loc); + Value *GetVal(unsigned ID, const Type *Ty, LocTy Loc); + + /// SetInstName - After an instruction is parsed and inserted into its + /// basic block, this installs its name. + bool SetInstName(int NameID, const std::string &NameStr, LocTy NameLoc, + Instruction *Inst); + + /// GetBB - Get a basic block with the specified name or ID, creating a + /// forward reference record if needed. This can return null if the value + /// is not a BasicBlock. + BasicBlock *GetBB(const std::string &Name, LocTy Loc); + BasicBlock *GetBB(unsigned ID, LocTy Loc); + + /// DefineBB - Define the specified basic block, which is either named or + /// unnamed. If there is an error, this returns null otherwise it returns + /// the block being defined. + BasicBlock *DefineBB(const std::string &Name, LocTy Loc); + }; + + bool ConvertValIDToValue(const Type *Ty, ValID &ID, Value *&V, + PerFunctionState &PFS); + + bool ParseValue(const Type *Ty, Value *&V, PerFunctionState &PFS); + bool ParseValue(const Type *Ty, Value *&V, LocTy &Loc, + PerFunctionState &PFS) { + Loc = Lex.getLoc(); + return ParseValue(Ty, V, PFS); + } + + bool ParseTypeAndValue(Value *&V, PerFunctionState &PFS); + bool ParseTypeAndValue(Value *&V, LocTy &Loc, PerFunctionState &PFS) { + Loc = Lex.getLoc(); + return ParseTypeAndValue(V, PFS); + } + + struct ParamInfo { + LocTy Loc; + Value *V; + unsigned Attrs; + ParamInfo(LocTy loc, Value *v, unsigned attrs) + : Loc(loc), V(v), Attrs(attrs) {} + }; + bool ParseParameterList(SmallVectorImpl &ArgList, + PerFunctionState &PFS); + + // Function Parsing. + struct ArgInfo { + LocTy Loc; + PATypeHolder Type; + unsigned Attrs; + std::string Name; + ArgInfo(LocTy L, PATypeHolder Ty, unsigned Attr, const std::string &N) + : Loc(L), Type(Ty), Attrs(Attr), Name(N) {} + }; + bool ParseArgumentList(std::vector &ArgList, + bool &isVarArg, bool inType); + bool ParseFunctionHeader(Function *&Fn, bool isDefine); + bool ParseFunctionBody(Function &Fn); + bool ParseBasicBlock(PerFunctionState &PFS); + + // Instruction Parsing. + bool ParseInstruction(Instruction *&Inst, BasicBlock *BB, + PerFunctionState &PFS); + bool ParseCmpPredicate(unsigned &Pred, unsigned Opc); + + bool ParseRet(Instruction *&Inst, BasicBlock *BB, PerFunctionState &PFS); + bool ParseBr(Instruction *&Inst, PerFunctionState &PFS); + bool ParseSwitch(Instruction *&Inst, PerFunctionState &PFS); + bool ParseInvoke(Instruction *&Inst, PerFunctionState &PFS); + + bool ParseArithmetic(Instruction *&I, PerFunctionState &PFS, unsigned Opc, + unsigned OperandType); + bool ParseLogical(Instruction *&I, PerFunctionState &PFS, unsigned Opc); + bool ParseCompare(Instruction *&I, PerFunctionState &PFS, unsigned Opc); + bool ParseCast(Instruction *&I, PerFunctionState &PFS, unsigned Opc); + bool ParseSelect(Instruction *&I, PerFunctionState &PFS); + bool ParseVA_Arg(Instruction *&I, PerFunctionState &PFS); + bool ParseExtractElement(Instruction *&I, PerFunctionState &PFS); + bool ParseInsertElement(Instruction *&I, PerFunctionState &PFS); + bool ParseShuffleVector(Instruction *&I, PerFunctionState &PFS); + bool ParsePHI(Instruction *&I, PerFunctionState &PFS); + bool ParseCall(Instruction *&I, PerFunctionState &PFS, bool isTail); + bool ParseAlloc(Instruction *&I, PerFunctionState &PFS, unsigned Opc); + bool ParseFree(Instruction *&I, PerFunctionState &PFS); + bool ParseLoad(Instruction *&I, PerFunctionState &PFS, bool isVolatile); + bool ParseStore(Instruction *&I, PerFunctionState &PFS, bool isVolatile); + bool ParseGetResult(Instruction *&I, PerFunctionState &PFS); + bool ParseGetElementPtr(Instruction *&I, PerFunctionState &PFS); + bool ParseExtractValue(Instruction *&I, PerFunctionState &PFS); + bool ParseInsertValue(Instruction *&I, PerFunctionState &PFS); + }; +} // End llvm namespace + +#endif diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h new file mode 100644 index 000000000000..d8bd38a4a61d --- /dev/null +++ b/lib/AsmParser/LLToken.h @@ -0,0 +1,130 @@ +//===- LLToken.h - Token Codes for LLVM Assembly Files ----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the enums for the .ll lexer. +// +//===----------------------------------------------------------------------===// + +#ifndef LIBS_ASMPARSER_LLTOKEN_H +#define LIBS_ASMPARSER_LLTOKEN_H + +namespace llvm { +namespace lltok { + enum Kind { + // Markers + Eof, Error, + + // Tokens with no info. + dotdotdot, // ... + equal, comma, // = , + star, // * + lsquare, rsquare, // [ ] + lbrace, rbrace, // { } + less, greater, // < > + lparen, rparen, // ( ) + backslash, // \ (not /) + + kw_x, + kw_begin, kw_end, + kw_true, kw_false, + kw_declare, kw_define, + kw_global, kw_constant, + + kw_private, kw_internal, kw_linkonce, kw_linkonce_odr, kw_weak, kw_weak_odr, + kw_appending, kw_dllimport, kw_dllexport, kw_common,kw_available_externally, + kw_default, kw_hidden, kw_protected, + kw_extern_weak, + kw_external, kw_thread_local, + kw_zeroinitializer, + kw_undef, kw_null, + kw_to, + kw_tail, + kw_target, + kw_triple, + kw_deplibs, + kw_datalayout, + kw_volatile, + kw_align, + kw_addrspace, + kw_section, + kw_alias, + kw_module, + kw_asm, + kw_sideeffect, + kw_gc, + kw_c, + + kw_cc, kw_ccc, kw_fastcc, kw_coldcc, kw_x86_stdcallcc, kw_x86_fastcallcc, + + kw_signext, + kw_zeroext, + kw_inreg, + kw_sret, + kw_nounwind, + kw_noreturn, + kw_noalias, + kw_nocapture, + kw_byval, + kw_nest, + kw_readnone, + kw_readonly, + + kw_noinline, + kw_alwaysinline, + kw_optsize, + kw_ssp, + kw_sspreq, + + kw_type, + kw_opaque, + + kw_eq, kw_ne, kw_slt, kw_sgt, kw_sle, kw_sge, kw_ult, kw_ugt, kw_ule, + kw_uge, kw_oeq, kw_one, kw_olt, kw_ogt, kw_ole, kw_oge, kw_ord, kw_uno, + kw_ueq, kw_une, + + // Instruction Opcodes (Opcode in UIntVal). + kw_add, kw_sub, kw_mul, kw_udiv, kw_sdiv, kw_fdiv, + kw_urem, kw_srem, kw_frem, kw_shl, kw_lshr, kw_ashr, + kw_and, kw_or, kw_xor, kw_icmp, kw_fcmp, kw_vicmp, kw_vfcmp, + + kw_phi, kw_call, + kw_trunc, kw_zext, kw_sext, kw_fptrunc, kw_fpext, kw_uitofp, kw_sitofp, + kw_fptoui, kw_fptosi, kw_inttoptr, kw_ptrtoint, kw_bitcast, + kw_select, kw_va_arg, + + kw_ret, kw_br, kw_switch, kw_invoke, kw_unwind, kw_unreachable, + + kw_malloc, kw_alloca, kw_free, kw_load, kw_store, kw_getelementptr, + + kw_extractelement, kw_insertelement, kw_shufflevector, kw_getresult, + kw_extractvalue, kw_insertvalue, + + // Unsigned Valued tokens (UIntVal). + GlobalID, // @42 + LocalVarID, // %42 + + // String valued tokens (StrVal). + LabelStr, // foo: + GlobalVar, // @foo @"foo" + LocalVar, // %foo %"foo" + StringConstant, // "foo" + + // Metadata valued tokens. + Metadata, // !"foo" !{i8 42} + + // Type valued tokens (TyVal). + Type, + + APFloat, // APFloatVal + APSInt // APSInt + }; +} // end namespace lltok +} // end namespace llvm + +#endif diff --git a/lib/AsmParser/Makefile b/lib/AsmParser/Makefile new file mode 100644 index 000000000000..995bb0e130e2 --- /dev/null +++ b/lib/AsmParser/Makefile @@ -0,0 +1,14 @@ +##===- lib/AsmParser/Makefile ------------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../.. +LIBRARYNAME := LLVMAsmParser +BUILD_ARCHIVE = 1 + +include $(LEVEL)/Makefile.common diff --git a/lib/AsmParser/Parser.cpp b/lib/AsmParser/Parser.cpp new file mode 100644 index 000000000000..759e00e3217a --- /dev/null +++ b/lib/AsmParser/Parser.cpp @@ -0,0 +1,87 @@ +//===- Parser.cpp - Main dispatch module for the Parser library -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This library implements the functionality defined in llvm/Assembly/Parser.h +// +//===----------------------------------------------------------------------===// + +#include "llvm/Assembly/Parser.h" +#include "LLParser.h" +#include "llvm/Module.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/raw_ostream.h" +#include +using namespace llvm; + +Module *llvm::ParseAssemblyFile(const std::string &Filename, ParseError &Err) { + Err.setFilename(Filename); + + std::string ErrorStr; + OwningPtr + F(MemoryBuffer::getFileOrSTDIN(Filename.c_str(), &ErrorStr)); + if (F == 0) { + Err.setError("Could not open input file '" + Filename + "'"); + return 0; + } + + OwningPtr M(new Module(Filename)); + if (LLParser(F.get(), Err, M.get()).Run()) + return 0; + return M.take(); +} + +Module *llvm::ParseAssemblyString(const char *AsmString, Module *M, + ParseError &Err) { + Err.setFilename(""); + + OwningPtr + F(MemoryBuffer::getMemBuffer(AsmString, AsmString+strlen(AsmString), + "")); + + // If we are parsing into an existing module, do it. + if (M) + return LLParser(F.get(), Err, M).Run() ? 0 : M; + + // Otherwise create a new module. + OwningPtr M2(new Module("")); + if (LLParser(F.get(), Err, M2.get()).Run()) + return 0; + return M2.take(); +} + + +//===------------------------------------------------------------------------=== +// ParseError Class +//===------------------------------------------------------------------------=== + +void ParseError::PrintError(const char *ProgName, raw_ostream &S) { + errs() << ProgName << ": "; + if (Filename == "-") + errs() << ""; + else + errs() << Filename; + + if (LineNo != -1) { + errs() << ':' << LineNo; + if (ColumnNo != -1) + errs() << ':' << (ColumnNo+1); + } + + errs() << ": " << Message << '\n'; + + if (LineNo != -1 && ColumnNo != -1) { + errs() << LineContents << '\n'; + + // Print out spaces/tabs before the caret. + for (unsigned i = 0; i != unsigned(ColumnNo); ++i) + errs() << (LineContents[i] == '\t' ? '\t' : ' '); + errs() << "^\n"; + } +} diff --git a/lib/Bitcode/Makefile b/lib/Bitcode/Makefile new file mode 100644 index 000000000000..2d6b5ad1fe88 --- /dev/null +++ b/lib/Bitcode/Makefile @@ -0,0 +1,14 @@ +##===- lib/Bitcode/Makefile --------------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../.. +PARALLEL_DIRS = Reader Writer + +include $(LEVEL)/Makefile.common + diff --git a/lib/Bitcode/Reader/BitReader.cpp b/lib/Bitcode/Reader/BitReader.cpp new file mode 100644 index 000000000000..52851cd142da --- /dev/null +++ b/lib/Bitcode/Reader/BitReader.cpp @@ -0,0 +1,51 @@ +//===-- BitReader.cpp -----------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm-c/BitReader.h" +#include "llvm/Bitcode/ReaderWriter.h" +#include "llvm/Support/MemoryBuffer.h" +#include +#include + +using namespace llvm; + +/* Builds a module from the bitcode in the specified memory buffer, returning a + reference to the module via the OutModule parameter. Returns 0 on success. + Optionally returns a human-readable error message via OutMessage. */ +int LLVMParseBitcode(LLVMMemoryBufferRef MemBuf, + LLVMModuleRef *OutModule, char **OutMessage) { + std::string Message; + + *OutModule = wrap(ParseBitcodeFile(unwrap(MemBuf), &Message)); + if (!*OutModule) { + if (OutMessage) + *OutMessage = strdup(Message.c_str()); + return 1; + } + + return 0; +} + +/* Reads a module from the specified path, returning via the OutModule parameter + a module provider which performs lazy deserialization. Returns 0 on success. + Optionally returns a human-readable error message via OutMessage. */ +int LLVMGetBitcodeModuleProvider(LLVMMemoryBufferRef MemBuf, + LLVMModuleProviderRef *OutMP, + char **OutMessage) { + std::string Message; + + *OutMP = wrap(getBitcodeModuleProvider(unwrap(MemBuf), &Message)); + if (!*OutMP) { + if (OutMessage) + *OutMessage = strdup(Message.c_str()); + return 1; + } + + return 0; +} diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp new file mode 100644 index 000000000000..1dad04bd8f6f --- /dev/null +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -0,0 +1,2126 @@ +//===- BitcodeReader.cpp - Internal BitcodeReader implementation ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This header defines the BitcodeReader class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Bitcode/ReaderWriter.h" +#include "BitcodeReader.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/InlineAsm.h" +#include "llvm/Instructions.h" +#include "llvm/MDNode.h" +#include "llvm/Module.h" +#include "llvm/AutoUpgrade.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/OperandTraits.h" +using namespace llvm; + +void BitcodeReader::FreeState() { + delete Buffer; + Buffer = 0; + std::vector().swap(TypeList); + ValueList.clear(); + + std::vector().swap(MAttributes); + std::vector().swap(FunctionBBs); + std::vector().swap(FunctionsWithBodies); + DeferredFunctionInfo.clear(); +} + +//===----------------------------------------------------------------------===// +// Helper functions to implement forward reference resolution, etc. +//===----------------------------------------------------------------------===// + +/// ConvertToString - Convert a string from a record into an std::string, return +/// true on failure. +template +static bool ConvertToString(SmallVector &Record, unsigned Idx, + StrTy &Result) { + if (Idx > Record.size()) + return true; + + for (unsigned i = Idx, e = Record.size(); i != e; ++i) + Result += (char)Record[i]; + return false; +} + +static GlobalValue::LinkageTypes GetDecodedLinkage(unsigned Val) { + switch (Val) { + default: // Map unknown/new linkages to external + case 0: return GlobalValue::ExternalLinkage; + case 1: return GlobalValue::WeakAnyLinkage; + case 2: return GlobalValue::AppendingLinkage; + case 3: return GlobalValue::InternalLinkage; + case 4: return GlobalValue::LinkOnceAnyLinkage; + case 5: return GlobalValue::DLLImportLinkage; + case 6: return GlobalValue::DLLExportLinkage; + case 7: return GlobalValue::ExternalWeakLinkage; + case 8: return GlobalValue::CommonLinkage; + case 9: return GlobalValue::PrivateLinkage; + case 10: return GlobalValue::WeakODRLinkage; + case 11: return GlobalValue::LinkOnceODRLinkage; + case 12: return GlobalValue::AvailableExternallyLinkage; + } +} + +static GlobalValue::VisibilityTypes GetDecodedVisibility(unsigned Val) { + switch (Val) { + default: // Map unknown visibilities to default. + case 0: return GlobalValue::DefaultVisibility; + case 1: return GlobalValue::HiddenVisibility; + case 2: return GlobalValue::ProtectedVisibility; + } +} + +static int GetDecodedCastOpcode(unsigned Val) { + switch (Val) { + default: return -1; + case bitc::CAST_TRUNC : return Instruction::Trunc; + case bitc::CAST_ZEXT : return Instruction::ZExt; + case bitc::CAST_SEXT : return Instruction::SExt; + case bitc::CAST_FPTOUI : return Instruction::FPToUI; + case bitc::CAST_FPTOSI : return Instruction::FPToSI; + case bitc::CAST_UITOFP : return Instruction::UIToFP; + case bitc::CAST_SITOFP : return Instruction::SIToFP; + case bitc::CAST_FPTRUNC : return Instruction::FPTrunc; + case bitc::CAST_FPEXT : return Instruction::FPExt; + case bitc::CAST_PTRTOINT: return Instruction::PtrToInt; + case bitc::CAST_INTTOPTR: return Instruction::IntToPtr; + case bitc::CAST_BITCAST : return Instruction::BitCast; + } +} +static int GetDecodedBinaryOpcode(unsigned Val, const Type *Ty) { + switch (Val) { + default: return -1; + case bitc::BINOP_ADD: return Instruction::Add; + case bitc::BINOP_SUB: return Instruction::Sub; + case bitc::BINOP_MUL: return Instruction::Mul; + case bitc::BINOP_UDIV: return Instruction::UDiv; + case bitc::BINOP_SDIV: + return Ty->isFPOrFPVector() ? Instruction::FDiv : Instruction::SDiv; + case bitc::BINOP_UREM: return Instruction::URem; + case bitc::BINOP_SREM: + return Ty->isFPOrFPVector() ? Instruction::FRem : Instruction::SRem; + case bitc::BINOP_SHL: return Instruction::Shl; + case bitc::BINOP_LSHR: return Instruction::LShr; + case bitc::BINOP_ASHR: return Instruction::AShr; + case bitc::BINOP_AND: return Instruction::And; + case bitc::BINOP_OR: return Instruction::Or; + case bitc::BINOP_XOR: return Instruction::Xor; + } +} + +namespace llvm { +namespace { + /// @brief A class for maintaining the slot number definition + /// as a placeholder for the actual definition for forward constants defs. + class ConstantPlaceHolder : public ConstantExpr { + ConstantPlaceHolder(); // DO NOT IMPLEMENT + void operator=(const ConstantPlaceHolder &); // DO NOT IMPLEMENT + public: + // allocate space for exactly one operand + void *operator new(size_t s) { + return User::operator new(s, 1); + } + explicit ConstantPlaceHolder(const Type *Ty) + : ConstantExpr(Ty, Instruction::UserOp1, &Op<0>(), 1) { + Op<0>() = UndefValue::get(Type::Int32Ty); + } + + /// @brief Methods to support type inquiry through isa, cast, and dyn_cast. + static inline bool classof(const ConstantPlaceHolder *) { return true; } + static bool classof(const Value *V) { + return isa(V) && + cast(V)->getOpcode() == Instruction::UserOp1; + } + + + /// Provide fast operand accessors + //DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value); + }; +} + +// FIXME: can we inherit this from ConstantExpr? +template <> +struct OperandTraits : FixedNumOperandTraits<1> { +}; +} + + +void BitcodeReaderValueList::AssignValue(Value *V, unsigned Idx) { + if (Idx == size()) { + push_back(V); + return; + } + + if (Idx >= size()) + resize(Idx+1); + + WeakVH &OldV = ValuePtrs[Idx]; + if (OldV == 0) { + OldV = V; + return; + } + + // Handle constants and non-constants (e.g. instrs) differently for + // efficiency. + if (Constant *PHC = dyn_cast(&*OldV)) { + ResolveConstants.push_back(std::make_pair(PHC, Idx)); + OldV = V; + } else { + // If there was a forward reference to this value, replace it. + Value *PrevVal = OldV; + OldV->replaceAllUsesWith(V); + delete PrevVal; + } +} + + +Constant *BitcodeReaderValueList::getConstantFwdRef(unsigned Idx, + const Type *Ty) { + if (Idx >= size()) + resize(Idx + 1); + + if (Value *V = ValuePtrs[Idx]) { + assert(Ty == V->getType() && "Type mismatch in constant table!"); + return cast(V); + } + + // Create and return a placeholder, which will later be RAUW'd. + Constant *C = new ConstantPlaceHolder(Ty); + ValuePtrs[Idx] = C; + return C; +} + +Value *BitcodeReaderValueList::getValueFwdRef(unsigned Idx, const Type *Ty) { + if (Idx >= size()) + resize(Idx + 1); + + if (Value *V = ValuePtrs[Idx]) { + assert((Ty == 0 || Ty == V->getType()) && "Type mismatch in value table!"); + return V; + } + + // No type specified, must be invalid reference. + if (Ty == 0) return 0; + + // Create and return a placeholder, which will later be RAUW'd. + Value *V = new Argument(Ty); + ValuePtrs[Idx] = V; + return V; +} + +/// ResolveConstantForwardRefs - Once all constants are read, this method bulk +/// resolves any forward references. The idea behind this is that we sometimes +/// get constants (such as large arrays) which reference *many* forward ref +/// constants. Replacing each of these causes a lot of thrashing when +/// building/reuniquing the constant. Instead of doing this, we look at all the +/// uses and rewrite all the place holders at once for any constant that uses +/// a placeholder. +void BitcodeReaderValueList::ResolveConstantForwardRefs() { + // Sort the values by-pointer so that they are efficient to look up with a + // binary search. + std::sort(ResolveConstants.begin(), ResolveConstants.end()); + + SmallVector NewOps; + + while (!ResolveConstants.empty()) { + Value *RealVal = operator[](ResolveConstants.back().second); + Constant *Placeholder = ResolveConstants.back().first; + ResolveConstants.pop_back(); + + // Loop over all users of the placeholder, updating them to reference the + // new value. If they reference more than one placeholder, update them all + // at once. + while (!Placeholder->use_empty()) { + Value::use_iterator UI = Placeholder->use_begin(); + + // If the using object isn't uniqued, just update the operands. This + // handles instructions and initializers for global variables. + if (!isa(*UI) || isa(*UI)) { + UI.getUse().set(RealVal); + continue; + } + + // Otherwise, we have a constant that uses the placeholder. Replace that + // constant with a new constant that has *all* placeholder uses updated. + Constant *UserC = cast(*UI); + for (User::op_iterator I = UserC->op_begin(), E = UserC->op_end(); + I != E; ++I) { + Value *NewOp; + if (!isa(*I)) { + // Not a placeholder reference. + NewOp = *I; + } else if (*I == Placeholder) { + // Common case is that it just references this one placeholder. + NewOp = RealVal; + } else { + // Otherwise, look up the placeholder in ResolveConstants. + ResolveConstantsTy::iterator It = + std::lower_bound(ResolveConstants.begin(), ResolveConstants.end(), + std::pair(cast(*I), + 0)); + assert(It != ResolveConstants.end() && It->first == *I); + NewOp = operator[](It->second); + } + + NewOps.push_back(cast(NewOp)); + } + + // Make the new constant. + Constant *NewC; + if (ConstantArray *UserCA = dyn_cast(UserC)) { + NewC = ConstantArray::get(UserCA->getType(), &NewOps[0], NewOps.size()); + } else if (ConstantStruct *UserCS = dyn_cast(UserC)) { + NewC = ConstantStruct::get(&NewOps[0], NewOps.size(), + UserCS->getType()->isPacked()); + } else if (isa(UserC)) { + NewC = ConstantVector::get(&NewOps[0], NewOps.size()); + } else { + assert(isa(UserC) && "Must be a ConstantExpr."); + NewC = cast(UserC)->getWithOperands(&NewOps[0], + NewOps.size()); + } + + UserC->replaceAllUsesWith(NewC); + UserC->destroyConstant(); + NewOps.clear(); + } + + // Update all ValueHandles, they should be the only users at this point. + Placeholder->replaceAllUsesWith(RealVal); + delete Placeholder; + } +} + + +const Type *BitcodeReader::getTypeByID(unsigned ID, bool isTypeTable) { + // If the TypeID is in range, return it. + if (ID < TypeList.size()) + return TypeList[ID].get(); + if (!isTypeTable) return 0; + + // The type table allows forward references. Push as many Opaque types as + // needed to get up to ID. + while (TypeList.size() <= ID) + TypeList.push_back(OpaqueType::get()); + return TypeList.back().get(); +} + +//===----------------------------------------------------------------------===// +// Functions for parsing blocks from the bitcode file +//===----------------------------------------------------------------------===// + +bool BitcodeReader::ParseAttributeBlock() { + if (Stream.EnterSubBlock(bitc::PARAMATTR_BLOCK_ID)) + return Error("Malformed block record"); + + if (!MAttributes.empty()) + return Error("Multiple PARAMATTR blocks found!"); + + SmallVector Record; + + SmallVector Attrs; + + // Read all the records. + while (1) { + unsigned Code = Stream.ReadCode(); + if (Code == bitc::END_BLOCK) { + if (Stream.ReadBlockEnd()) + return Error("Error at end of PARAMATTR block"); + return false; + } + + if (Code == bitc::ENTER_SUBBLOCK) { + // No known subblocks, always skip them. + Stream.ReadSubBlockID(); + if (Stream.SkipBlock()) + return Error("Malformed block record"); + continue; + } + + if (Code == bitc::DEFINE_ABBREV) { + Stream.ReadAbbrevRecord(); + continue; + } + + // Read a record. + Record.clear(); + switch (Stream.ReadRecord(Code, Record)) { + default: // Default behavior: ignore. + break; + case bitc::PARAMATTR_CODE_ENTRY: { // ENTRY: [paramidx0, attr0, ...] + if (Record.size() & 1) + return Error("Invalid ENTRY record"); + + // FIXME : Remove this autoupgrade code in LLVM 3.0. + // If Function attributes are using index 0 then transfer them + // to index ~0. Index 0 is used for return value attributes but used to be + // used for function attributes. + Attributes RetAttribute = Attribute::None; + Attributes FnAttribute = Attribute::None; + for (unsigned i = 0, e = Record.size(); i != e; i += 2) { + // FIXME: remove in LLVM 3.0 + // The alignment is stored as a 16-bit raw value from bits 31--16. + // We shift the bits above 31 down by 11 bits. + + unsigned Alignment = (Record[i+1] & (0xffffull << 16)) >> 16; + if (Alignment && !isPowerOf2_32(Alignment)) + return Error("Alignment is not a power of two."); + + Attributes ReconstitutedAttr = Record[i+1] & 0xffff; + if (Alignment) + ReconstitutedAttr |= Attribute::constructAlignmentFromInt(Alignment); + ReconstitutedAttr |= (Record[i+1] & (0xffffull << 32)) >> 11; + Record[i+1] = ReconstitutedAttr; + + if (Record[i] == 0) + RetAttribute = Record[i+1]; + else if (Record[i] == ~0U) + FnAttribute = Record[i+1]; + } + + unsigned OldRetAttrs = (Attribute::NoUnwind|Attribute::NoReturn| + Attribute::ReadOnly|Attribute::ReadNone); + + if (FnAttribute == Attribute::None && RetAttribute != Attribute::None && + (RetAttribute & OldRetAttrs) != 0) { + if (FnAttribute == Attribute::None) { // add a slot so they get added. + Record.push_back(~0U); + Record.push_back(0); + } + + FnAttribute |= RetAttribute & OldRetAttrs; + RetAttribute &= ~OldRetAttrs; + } + + for (unsigned i = 0, e = Record.size(); i != e; i += 2) { + if (Record[i] == 0) { + if (RetAttribute != Attribute::None) + Attrs.push_back(AttributeWithIndex::get(0, RetAttribute)); + } else if (Record[i] == ~0U) { + if (FnAttribute != Attribute::None) + Attrs.push_back(AttributeWithIndex::get(~0U, FnAttribute)); + } else if (Record[i+1] != Attribute::None) + Attrs.push_back(AttributeWithIndex::get(Record[i], Record[i+1])); + } + + MAttributes.push_back(AttrListPtr::get(Attrs.begin(), Attrs.end())); + Attrs.clear(); + break; + } + } + } +} + + +bool BitcodeReader::ParseTypeTable() { + if (Stream.EnterSubBlock(bitc::TYPE_BLOCK_ID)) + return Error("Malformed block record"); + + if (!TypeList.empty()) + return Error("Multiple TYPE_BLOCKs found!"); + + SmallVector Record; + unsigned NumRecords = 0; + + // Read all the records for this type table. + while (1) { + unsigned Code = Stream.ReadCode(); + if (Code == bitc::END_BLOCK) { + if (NumRecords != TypeList.size()) + return Error("Invalid type forward reference in TYPE_BLOCK"); + if (Stream.ReadBlockEnd()) + return Error("Error at end of type table block"); + return false; + } + + if (Code == bitc::ENTER_SUBBLOCK) { + // No known subblocks, always skip them. + Stream.ReadSubBlockID(); + if (Stream.SkipBlock()) + return Error("Malformed block record"); + continue; + } + + if (Code == bitc::DEFINE_ABBREV) { + Stream.ReadAbbrevRecord(); + continue; + } + + // Read a record. + Record.clear(); + const Type *ResultTy = 0; + switch (Stream.ReadRecord(Code, Record)) { + default: // Default behavior: unknown type. + ResultTy = 0; + break; + case bitc::TYPE_CODE_NUMENTRY: // TYPE_CODE_NUMENTRY: [numentries] + // TYPE_CODE_NUMENTRY contains a count of the number of types in the + // type list. This allows us to reserve space. + if (Record.size() < 1) + return Error("Invalid TYPE_CODE_NUMENTRY record"); + TypeList.reserve(Record[0]); + continue; + case bitc::TYPE_CODE_VOID: // VOID + ResultTy = Type::VoidTy; + break; + case bitc::TYPE_CODE_FLOAT: // FLOAT + ResultTy = Type::FloatTy; + break; + case bitc::TYPE_CODE_DOUBLE: // DOUBLE + ResultTy = Type::DoubleTy; + break; + case bitc::TYPE_CODE_X86_FP80: // X86_FP80 + ResultTy = Type::X86_FP80Ty; + break; + case bitc::TYPE_CODE_FP128: // FP128 + ResultTy = Type::FP128Ty; + break; + case bitc::TYPE_CODE_PPC_FP128: // PPC_FP128 + ResultTy = Type::PPC_FP128Ty; + break; + case bitc::TYPE_CODE_LABEL: // LABEL + ResultTy = Type::LabelTy; + break; + case bitc::TYPE_CODE_OPAQUE: // OPAQUE + ResultTy = 0; + break; + case bitc::TYPE_CODE_METADATA: // METADATA + ResultTy = Type::MetadataTy; + break; + case bitc::TYPE_CODE_INTEGER: // INTEGER: [width] + if (Record.size() < 1) + return Error("Invalid Integer type record"); + + ResultTy = IntegerType::get(Record[0]); + break; + case bitc::TYPE_CODE_POINTER: { // POINTER: [pointee type] or + // [pointee type, address space] + if (Record.size() < 1) + return Error("Invalid POINTER type record"); + unsigned AddressSpace = 0; + if (Record.size() == 2) + AddressSpace = Record[1]; + ResultTy = PointerType::get(getTypeByID(Record[0], true), AddressSpace); + break; + } + case bitc::TYPE_CODE_FUNCTION: { + // FIXME: attrid is dead, remove it in LLVM 3.0 + // FUNCTION: [vararg, attrid, retty, paramty x N] + if (Record.size() < 3) + return Error("Invalid FUNCTION type record"); + std::vector ArgTys; + for (unsigned i = 3, e = Record.size(); i != e; ++i) + ArgTys.push_back(getTypeByID(Record[i], true)); + + ResultTy = FunctionType::get(getTypeByID(Record[2], true), ArgTys, + Record[0]); + break; + } + case bitc::TYPE_CODE_STRUCT: { // STRUCT: [ispacked, eltty x N] + if (Record.size() < 1) + return Error("Invalid STRUCT type record"); + std::vector EltTys; + for (unsigned i = 1, e = Record.size(); i != e; ++i) + EltTys.push_back(getTypeByID(Record[i], true)); + ResultTy = StructType::get(EltTys, Record[0]); + break; + } + case bitc::TYPE_CODE_ARRAY: // ARRAY: [numelts, eltty] + if (Record.size() < 2) + return Error("Invalid ARRAY type record"); + ResultTy = ArrayType::get(getTypeByID(Record[1], true), Record[0]); + break; + case bitc::TYPE_CODE_VECTOR: // VECTOR: [numelts, eltty] + if (Record.size() < 2) + return Error("Invalid VECTOR type record"); + ResultTy = VectorType::get(getTypeByID(Record[1], true), Record[0]); + break; + } + + if (NumRecords == TypeList.size()) { + // If this is a new type slot, just append it. + TypeList.push_back(ResultTy ? ResultTy : OpaqueType::get()); + ++NumRecords; + } else if (ResultTy == 0) { + // Otherwise, this was forward referenced, so an opaque type was created, + // but the result type is actually just an opaque. Leave the one we + // created previously. + ++NumRecords; + } else { + // Otherwise, this was forward referenced, so an opaque type was created. + // Resolve the opaque type to the real type now. + assert(NumRecords < TypeList.size() && "Typelist imbalance"); + const OpaqueType *OldTy = cast(TypeList[NumRecords++].get()); + + // Don't directly push the new type on the Tab. Instead we want to replace + // the opaque type we previously inserted with the new concrete value. The + // refinement from the abstract (opaque) type to the new type causes all + // uses of the abstract type to use the concrete type (NewTy). This will + // also cause the opaque type to be deleted. + const_cast(OldTy)->refineAbstractTypeTo(ResultTy); + + // This should have replaced the old opaque type with the new type in the + // value table... or with a preexisting type that was already in the + // system. Let's just make sure it did. + assert(TypeList[NumRecords-1].get() != OldTy && + "refineAbstractType didn't work!"); + } + } +} + + +bool BitcodeReader::ParseTypeSymbolTable() { + if (Stream.EnterSubBlock(bitc::TYPE_SYMTAB_BLOCK_ID)) + return Error("Malformed block record"); + + SmallVector Record; + + // Read all the records for this type table. + std::string TypeName; + while (1) { + unsigned Code = Stream.ReadCode(); + if (Code == bitc::END_BLOCK) { + if (Stream.ReadBlockEnd()) + return Error("Error at end of type symbol table block"); + return false; + } + + if (Code == bitc::ENTER_SUBBLOCK) { + // No known subblocks, always skip them. + Stream.ReadSubBlockID(); + if (Stream.SkipBlock()) + return Error("Malformed block record"); + continue; + } + + if (Code == bitc::DEFINE_ABBREV) { + Stream.ReadAbbrevRecord(); + continue; + } + + // Read a record. + Record.clear(); + switch (Stream.ReadRecord(Code, Record)) { + default: // Default behavior: unknown type. + break; + case bitc::TST_CODE_ENTRY: // TST_ENTRY: [typeid, namechar x N] + if (ConvertToString(Record, 1, TypeName)) + return Error("Invalid TST_ENTRY record"); + unsigned TypeID = Record[0]; + if (TypeID >= TypeList.size()) + return Error("Invalid Type ID in TST_ENTRY record"); + + TheModule->addTypeName(TypeName, TypeList[TypeID].get()); + TypeName.clear(); + break; + } + } +} + +bool BitcodeReader::ParseValueSymbolTable() { + if (Stream.EnterSubBlock(bitc::VALUE_SYMTAB_BLOCK_ID)) + return Error("Malformed block record"); + + SmallVector Record; + + // Read all the records for this value table. + SmallString<128> ValueName; + while (1) { + unsigned Code = Stream.ReadCode(); + if (Code == bitc::END_BLOCK) { + if (Stream.ReadBlockEnd()) + return Error("Error at end of value symbol table block"); + return false; + } + if (Code == bitc::ENTER_SUBBLOCK) { + // No known subblocks, always skip them. + Stream.ReadSubBlockID(); + if (Stream.SkipBlock()) + return Error("Malformed block record"); + continue; + } + + if (Code == bitc::DEFINE_ABBREV) { + Stream.ReadAbbrevRecord(); + continue; + } + + // Read a record. + Record.clear(); + switch (Stream.ReadRecord(Code, Record)) { + default: // Default behavior: unknown type. + break; + case bitc::VST_CODE_ENTRY: { // VST_ENTRY: [valueid, namechar x N] + if (ConvertToString(Record, 1, ValueName)) + return Error("Invalid VST_ENTRY record"); + unsigned ValueID = Record[0]; + if (ValueID >= ValueList.size()) + return Error("Invalid Value ID in VST_ENTRY record"); + Value *V = ValueList[ValueID]; + + V->setName(&ValueName[0], ValueName.size()); + ValueName.clear(); + break; + } + case bitc::VST_CODE_BBENTRY: { + if (ConvertToString(Record, 1, ValueName)) + return Error("Invalid VST_BBENTRY record"); + BasicBlock *BB = getBasicBlock(Record[0]); + if (BB == 0) + return Error("Invalid BB ID in VST_BBENTRY record"); + + BB->setName(&ValueName[0], ValueName.size()); + ValueName.clear(); + break; + } + } + } +} + +/// DecodeSignRotatedValue - Decode a signed value stored with the sign bit in +/// the LSB for dense VBR encoding. +static uint64_t DecodeSignRotatedValue(uint64_t V) { + if ((V & 1) == 0) + return V >> 1; + if (V != 1) + return -(V >> 1); + // There is no such thing as -0 with integers. "-0" really means MININT. + return 1ULL << 63; +} + +/// ResolveGlobalAndAliasInits - Resolve all of the initializers for global +/// values and aliases that we can. +bool BitcodeReader::ResolveGlobalAndAliasInits() { + std::vector > GlobalInitWorklist; + std::vector > AliasInitWorklist; + + GlobalInitWorklist.swap(GlobalInits); + AliasInitWorklist.swap(AliasInits); + + while (!GlobalInitWorklist.empty()) { + unsigned ValID = GlobalInitWorklist.back().second; + if (ValID >= ValueList.size()) { + // Not ready to resolve this yet, it requires something later in the file. + GlobalInits.push_back(GlobalInitWorklist.back()); + } else { + if (Constant *C = dyn_cast(ValueList[ValID])) + GlobalInitWorklist.back().first->setInitializer(C); + else + return Error("Global variable initializer is not a constant!"); + } + GlobalInitWorklist.pop_back(); + } + + while (!AliasInitWorklist.empty()) { + unsigned ValID = AliasInitWorklist.back().second; + if (ValID >= ValueList.size()) { + AliasInits.push_back(AliasInitWorklist.back()); + } else { + if (Constant *C = dyn_cast(ValueList[ValID])) + AliasInitWorklist.back().first->setAliasee(C); + else + return Error("Alias initializer is not a constant!"); + } + AliasInitWorklist.pop_back(); + } + return false; +} + + +bool BitcodeReader::ParseConstants() { + if (Stream.EnterSubBlock(bitc::CONSTANTS_BLOCK_ID)) + return Error("Malformed block record"); + + SmallVector Record; + + // Read all the records for this value table. + const Type *CurTy = Type::Int32Ty; + unsigned NextCstNo = ValueList.size(); + while (1) { + unsigned Code = Stream.ReadCode(); + if (Code == bitc::END_BLOCK) + break; + + if (Code == bitc::ENTER_SUBBLOCK) { + // No known subblocks, always skip them. + Stream.ReadSubBlockID(); + if (Stream.SkipBlock()) + return Error("Malformed block record"); + continue; + } + + if (Code == bitc::DEFINE_ABBREV) { + Stream.ReadAbbrevRecord(); + continue; + } + + // Read a record. + Record.clear(); + Value *V = 0; + switch (Stream.ReadRecord(Code, Record)) { + default: // Default behavior: unknown constant + case bitc::CST_CODE_UNDEF: // UNDEF + V = UndefValue::get(CurTy); + break; + case bitc::CST_CODE_SETTYPE: // SETTYPE: [typeid] + if (Record.empty()) + return Error("Malformed CST_SETTYPE record"); + if (Record[0] >= TypeList.size()) + return Error("Invalid Type ID in CST_SETTYPE record"); + CurTy = TypeList[Record[0]]; + continue; // Skip the ValueList manipulation. + case bitc::CST_CODE_NULL: // NULL + V = Constant::getNullValue(CurTy); + break; + case bitc::CST_CODE_INTEGER: // INTEGER: [intval] + if (!isa(CurTy) || Record.empty()) + return Error("Invalid CST_INTEGER record"); + V = ConstantInt::get(CurTy, DecodeSignRotatedValue(Record[0])); + break; + case bitc::CST_CODE_WIDE_INTEGER: {// WIDE_INTEGER: [n x intval] + if (!isa(CurTy) || Record.empty()) + return Error("Invalid WIDE_INTEGER record"); + + unsigned NumWords = Record.size(); + SmallVector Words; + Words.resize(NumWords); + for (unsigned i = 0; i != NumWords; ++i) + Words[i] = DecodeSignRotatedValue(Record[i]); + V = ConstantInt::get(APInt(cast(CurTy)->getBitWidth(), + NumWords, &Words[0])); + break; + } + case bitc::CST_CODE_FLOAT: { // FLOAT: [fpval] + if (Record.empty()) + return Error("Invalid FLOAT record"); + if (CurTy == Type::FloatTy) + V = ConstantFP::get(APFloat(APInt(32, (uint32_t)Record[0]))); + else if (CurTy == Type::DoubleTy) + V = ConstantFP::get(APFloat(APInt(64, Record[0]))); + else if (CurTy == Type::X86_FP80Ty) { + // Bits are not stored the same way as a normal i80 APInt, compensate. + uint64_t Rearrange[2]; + Rearrange[0] = (Record[1] & 0xffffLL) | (Record[0] << 16); + Rearrange[1] = Record[0] >> 48; + V = ConstantFP::get(APFloat(APInt(80, 2, Rearrange))); + } else if (CurTy == Type::FP128Ty) + V = ConstantFP::get(APFloat(APInt(128, 2, &Record[0]), true)); + else if (CurTy == Type::PPC_FP128Ty) + V = ConstantFP::get(APFloat(APInt(128, 2, &Record[0]))); + else + V = UndefValue::get(CurTy); + break; + } + + case bitc::CST_CODE_AGGREGATE: {// AGGREGATE: [n x value number] + if (Record.empty()) + return Error("Invalid CST_AGGREGATE record"); + + unsigned Size = Record.size(); + std::vector Elts; + + if (const StructType *STy = dyn_cast(CurTy)) { + for (unsigned i = 0; i != Size; ++i) + Elts.push_back(ValueList.getConstantFwdRef(Record[i], + STy->getElementType(i))); + V = ConstantStruct::get(STy, Elts); + } else if (const ArrayType *ATy = dyn_cast(CurTy)) { + const Type *EltTy = ATy->getElementType(); + for (unsigned i = 0; i != Size; ++i) + Elts.push_back(ValueList.getConstantFwdRef(Record[i], EltTy)); + V = ConstantArray::get(ATy, Elts); + } else if (const VectorType *VTy = dyn_cast(CurTy)) { + const Type *EltTy = VTy->getElementType(); + for (unsigned i = 0; i != Size; ++i) + Elts.push_back(ValueList.getConstantFwdRef(Record[i], EltTy)); + V = ConstantVector::get(Elts); + } else { + V = UndefValue::get(CurTy); + } + break; + } + case bitc::CST_CODE_STRING: { // STRING: [values] + if (Record.empty()) + return Error("Invalid CST_AGGREGATE record"); + + const ArrayType *ATy = cast(CurTy); + const Type *EltTy = ATy->getElementType(); + + unsigned Size = Record.size(); + std::vector Elts; + for (unsigned i = 0; i != Size; ++i) + Elts.push_back(ConstantInt::get(EltTy, Record[i])); + V = ConstantArray::get(ATy, Elts); + break; + } + case bitc::CST_CODE_CSTRING: { // CSTRING: [values] + if (Record.empty()) + return Error("Invalid CST_AGGREGATE record"); + + const ArrayType *ATy = cast(CurTy); + const Type *EltTy = ATy->getElementType(); + + unsigned Size = Record.size(); + std::vector Elts; + for (unsigned i = 0; i != Size; ++i) + Elts.push_back(ConstantInt::get(EltTy, Record[i])); + Elts.push_back(Constant::getNullValue(EltTy)); + V = ConstantArray::get(ATy, Elts); + break; + } + case bitc::CST_CODE_CE_BINOP: { // CE_BINOP: [opcode, opval, opval] + if (Record.size() < 3) return Error("Invalid CE_BINOP record"); + int Opc = GetDecodedBinaryOpcode(Record[0], CurTy); + if (Opc < 0) { + V = UndefValue::get(CurTy); // Unknown binop. + } else { + Constant *LHS = ValueList.getConstantFwdRef(Record[1], CurTy); + Constant *RHS = ValueList.getConstantFwdRef(Record[2], CurTy); + V = ConstantExpr::get(Opc, LHS, RHS); + } + break; + } + case bitc::CST_CODE_CE_CAST: { // CE_CAST: [opcode, opty, opval] + if (Record.size() < 3) return Error("Invalid CE_CAST record"); + int Opc = GetDecodedCastOpcode(Record[0]); + if (Opc < 0) { + V = UndefValue::get(CurTy); // Unknown cast. + } else { + const Type *OpTy = getTypeByID(Record[1]); + if (!OpTy) return Error("Invalid CE_CAST record"); + Constant *Op = ValueList.getConstantFwdRef(Record[2], OpTy); + V = ConstantExpr::getCast(Opc, Op, CurTy); + } + break; + } + case bitc::CST_CODE_CE_GEP: { // CE_GEP: [n x operands] + if (Record.size() & 1) return Error("Invalid CE_GEP record"); + SmallVector Elts; + for (unsigned i = 0, e = Record.size(); i != e; i += 2) { + const Type *ElTy = getTypeByID(Record[i]); + if (!ElTy) return Error("Invalid CE_GEP record"); + Elts.push_back(ValueList.getConstantFwdRef(Record[i+1], ElTy)); + } + V = ConstantExpr::getGetElementPtr(Elts[0], &Elts[1], Elts.size()-1); + break; + } + case bitc::CST_CODE_CE_SELECT: // CE_SELECT: [opval#, opval#, opval#] + if (Record.size() < 3) return Error("Invalid CE_SELECT record"); + V = ConstantExpr::getSelect(ValueList.getConstantFwdRef(Record[0], + Type::Int1Ty), + ValueList.getConstantFwdRef(Record[1],CurTy), + ValueList.getConstantFwdRef(Record[2],CurTy)); + break; + case bitc::CST_CODE_CE_EXTRACTELT: { // CE_EXTRACTELT: [opty, opval, opval] + if (Record.size() < 3) return Error("Invalid CE_EXTRACTELT record"); + const VectorType *OpTy = + dyn_cast_or_null(getTypeByID(Record[0])); + if (OpTy == 0) return Error("Invalid CE_EXTRACTELT record"); + Constant *Op0 = ValueList.getConstantFwdRef(Record[1], OpTy); + Constant *Op1 = ValueList.getConstantFwdRef(Record[2], Type::Int32Ty); + V = ConstantExpr::getExtractElement(Op0, Op1); + break; + } + case bitc::CST_CODE_CE_INSERTELT: { // CE_INSERTELT: [opval, opval, opval] + const VectorType *OpTy = dyn_cast(CurTy); + if (Record.size() < 3 || OpTy == 0) + return Error("Invalid CE_INSERTELT record"); + Constant *Op0 = ValueList.getConstantFwdRef(Record[0], OpTy); + Constant *Op1 = ValueList.getConstantFwdRef(Record[1], + OpTy->getElementType()); + Constant *Op2 = ValueList.getConstantFwdRef(Record[2], Type::Int32Ty); + V = ConstantExpr::getInsertElement(Op0, Op1, Op2); + break; + } + case bitc::CST_CODE_CE_SHUFFLEVEC: { // CE_SHUFFLEVEC: [opval, opval, opval] + const VectorType *OpTy = dyn_cast(CurTy); + if (Record.size() < 3 || OpTy == 0) + return Error("Invalid CE_SHUFFLEVEC record"); + Constant *Op0 = ValueList.getConstantFwdRef(Record[0], OpTy); + Constant *Op1 = ValueList.getConstantFwdRef(Record[1], OpTy); + const Type *ShufTy=VectorType::get(Type::Int32Ty, OpTy->getNumElements()); + Constant *Op2 = ValueList.getConstantFwdRef(Record[2], ShufTy); + V = ConstantExpr::getShuffleVector(Op0, Op1, Op2); + break; + } + case bitc::CST_CODE_CE_SHUFVEC_EX: { // [opty, opval, opval, opval] + const VectorType *RTy = dyn_cast(CurTy); + const VectorType *OpTy = dyn_cast(getTypeByID(Record[0])); + if (Record.size() < 4 || RTy == 0 || OpTy == 0) + return Error("Invalid CE_SHUFVEC_EX record"); + Constant *Op0 = ValueList.getConstantFwdRef(Record[1], OpTy); + Constant *Op1 = ValueList.getConstantFwdRef(Record[2], OpTy); + const Type *ShufTy=VectorType::get(Type::Int32Ty, RTy->getNumElements()); + Constant *Op2 = ValueList.getConstantFwdRef(Record[3], ShufTy); + V = ConstantExpr::getShuffleVector(Op0, Op1, Op2); + break; + } + case bitc::CST_CODE_CE_CMP: { // CE_CMP: [opty, opval, opval, pred] + if (Record.size() < 4) return Error("Invalid CE_CMP record"); + const Type *OpTy = getTypeByID(Record[0]); + if (OpTy == 0) return Error("Invalid CE_CMP record"); + Constant *Op0 = ValueList.getConstantFwdRef(Record[1], OpTy); + Constant *Op1 = ValueList.getConstantFwdRef(Record[2], OpTy); + + if (OpTy->isFloatingPoint()) + V = ConstantExpr::getFCmp(Record[3], Op0, Op1); + else if (!isa(OpTy)) + V = ConstantExpr::getICmp(Record[3], Op0, Op1); + else if (OpTy->isFPOrFPVector()) + V = ConstantExpr::getVFCmp(Record[3], Op0, Op1); + else + V = ConstantExpr::getVICmp(Record[3], Op0, Op1); + break; + } + case bitc::CST_CODE_INLINEASM: { + if (Record.size() < 2) return Error("Invalid INLINEASM record"); + std::string AsmStr, ConstrStr; + bool HasSideEffects = Record[0]; + unsigned AsmStrSize = Record[1]; + if (2+AsmStrSize >= Record.size()) + return Error("Invalid INLINEASM record"); + unsigned ConstStrSize = Record[2+AsmStrSize]; + if (3+AsmStrSize+ConstStrSize > Record.size()) + return Error("Invalid INLINEASM record"); + + for (unsigned i = 0; i != AsmStrSize; ++i) + AsmStr += (char)Record[2+i]; + for (unsigned i = 0; i != ConstStrSize; ++i) + ConstrStr += (char)Record[3+AsmStrSize+i]; + const PointerType *PTy = cast(CurTy); + V = InlineAsm::get(cast(PTy->getElementType()), + AsmStr, ConstrStr, HasSideEffects); + break; + } + case bitc::CST_CODE_MDSTRING: { + if (Record.size() < 2) return Error("Invalid MDSTRING record"); + unsigned MDStringLength = Record.size(); + SmallString<8> String; + String.resize(MDStringLength); + for (unsigned i = 0; i != MDStringLength; ++i) + String[i] = Record[i]; + V = MDString::get(String.c_str(), String.c_str() + MDStringLength); + break; + } + case bitc::CST_CODE_MDNODE: { + if (Record.empty() || Record.size() % 2 == 1) + return Error("Invalid CST_MDNODE record"); + + unsigned Size = Record.size(); + SmallVector Elts; + for (unsigned i = 0; i != Size; i += 2) { + const Type *Ty = getTypeByID(Record[i], false); + if (Ty != Type::VoidTy) + Elts.push_back(ValueList.getValueFwdRef(Record[i+1], Ty)); + else + Elts.push_back(NULL); + } + V = MDNode::get(&Elts[0], Elts.size()); + break; + } + } + + ValueList.AssignValue(V, NextCstNo); + ++NextCstNo; + } + + if (NextCstNo != ValueList.size()) + return Error("Invalid constant reference!"); + + if (Stream.ReadBlockEnd()) + return Error("Error at end of constants block"); + + // Once all the constants have been read, go through and resolve forward + // references. + ValueList.ResolveConstantForwardRefs(); + return false; +} + +/// RememberAndSkipFunctionBody - When we see the block for a function body, +/// remember where it is and then skip it. This lets us lazily deserialize the +/// functions. +bool BitcodeReader::RememberAndSkipFunctionBody() { + // Get the function we are talking about. + if (FunctionsWithBodies.empty()) + return Error("Insufficient function protos"); + + Function *Fn = FunctionsWithBodies.back(); + FunctionsWithBodies.pop_back(); + + // Save the current stream state. + uint64_t CurBit = Stream.GetCurrentBitNo(); + DeferredFunctionInfo[Fn] = std::make_pair(CurBit, Fn->getLinkage()); + + // Set the functions linkage to GhostLinkage so we know it is lazily + // deserialized. + Fn->setLinkage(GlobalValue::GhostLinkage); + + // Skip over the function block for now. + if (Stream.SkipBlock()) + return Error("Malformed block record"); + return false; +} + +bool BitcodeReader::ParseModule(const std::string &ModuleID) { + // Reject multiple MODULE_BLOCK's in a single bitstream. + if (TheModule) + return Error("Multiple MODULE_BLOCKs in same stream"); + + if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID)) + return Error("Malformed block record"); + + // Otherwise, create the module. + TheModule = new Module(ModuleID); + + SmallVector Record; + std::vector SectionTable; + std::vector GCTable; + + // Read all the records for this module. + while (!Stream.AtEndOfStream()) { + unsigned Code = Stream.ReadCode(); + if (Code == bitc::END_BLOCK) { + if (Stream.ReadBlockEnd()) + return Error("Error at end of module block"); + + // Patch the initializers for globals and aliases up. + ResolveGlobalAndAliasInits(); + if (!GlobalInits.empty() || !AliasInits.empty()) + return Error("Malformed global initializer set"); + if (!FunctionsWithBodies.empty()) + return Error("Too few function bodies found"); + + // Look for intrinsic functions which need to be upgraded at some point + for (Module::iterator FI = TheModule->begin(), FE = TheModule->end(); + FI != FE; ++FI) { + Function* NewFn; + if (UpgradeIntrinsicFunction(FI, NewFn)) + UpgradedIntrinsics.push_back(std::make_pair(FI, NewFn)); + } + + // Force deallocation of memory for these vectors to favor the client that + // want lazy deserialization. + std::vector >().swap(GlobalInits); + std::vector >().swap(AliasInits); + std::vector().swap(FunctionsWithBodies); + return false; + } + + if (Code == bitc::ENTER_SUBBLOCK) { + switch (Stream.ReadSubBlockID()) { + default: // Skip unknown content. + if (Stream.SkipBlock()) + return Error("Malformed block record"); + break; + case bitc::BLOCKINFO_BLOCK_ID: + if (Stream.ReadBlockInfoBlock()) + return Error("Malformed BlockInfoBlock"); + break; + case bitc::PARAMATTR_BLOCK_ID: + if (ParseAttributeBlock()) + return true; + break; + case bitc::TYPE_BLOCK_ID: + if (ParseTypeTable()) + return true; + break; + case bitc::TYPE_SYMTAB_BLOCK_ID: + if (ParseTypeSymbolTable()) + return true; + break; + case bitc::VALUE_SYMTAB_BLOCK_ID: + if (ParseValueSymbolTable()) + return true; + break; + case bitc::CONSTANTS_BLOCK_ID: + if (ParseConstants() || ResolveGlobalAndAliasInits()) + return true; + break; + case bitc::FUNCTION_BLOCK_ID: + // If this is the first function body we've seen, reverse the + // FunctionsWithBodies list. + if (!HasReversedFunctionsWithBodies) { + std::reverse(FunctionsWithBodies.begin(), FunctionsWithBodies.end()); + HasReversedFunctionsWithBodies = true; + } + + if (RememberAndSkipFunctionBody()) + return true; + break; + } + continue; + } + + if (Code == bitc::DEFINE_ABBREV) { + Stream.ReadAbbrevRecord(); + continue; + } + + // Read a record. + switch (Stream.ReadRecord(Code, Record)) { + default: break; // Default behavior, ignore unknown content. + case bitc::MODULE_CODE_VERSION: // VERSION: [version#] + if (Record.size() < 1) + return Error("Malformed MODULE_CODE_VERSION"); + // Only version #0 is supported so far. + if (Record[0] != 0) + return Error("Unknown bitstream version!"); + break; + case bitc::MODULE_CODE_TRIPLE: { // TRIPLE: [strchr x N] + std::string S; + if (ConvertToString(Record, 0, S)) + return Error("Invalid MODULE_CODE_TRIPLE record"); + TheModule->setTargetTriple(S); + break; + } + case bitc::MODULE_CODE_DATALAYOUT: { // DATALAYOUT: [strchr x N] + std::string S; + if (ConvertToString(Record, 0, S)) + return Error("Invalid MODULE_CODE_DATALAYOUT record"); + TheModule->setDataLayout(S); + break; + } + case bitc::MODULE_CODE_ASM: { // ASM: [strchr x N] + std::string S; + if (ConvertToString(Record, 0, S)) + return Error("Invalid MODULE_CODE_ASM record"); + TheModule->setModuleInlineAsm(S); + break; + } + case bitc::MODULE_CODE_DEPLIB: { // DEPLIB: [strchr x N] + std::string S; + if (ConvertToString(Record, 0, S)) + return Error("Invalid MODULE_CODE_DEPLIB record"); + TheModule->addLibrary(S); + break; + } + case bitc::MODULE_CODE_SECTIONNAME: { // SECTIONNAME: [strchr x N] + std::string S; + if (ConvertToString(Record, 0, S)) + return Error("Invalid MODULE_CODE_SECTIONNAME record"); + SectionTable.push_back(S); + break; + } + case bitc::MODULE_CODE_GCNAME: { // SECTIONNAME: [strchr x N] + std::string S; + if (ConvertToString(Record, 0, S)) + return Error("Invalid MODULE_CODE_GCNAME record"); + GCTable.push_back(S); + break; + } + // GLOBALVAR: [pointer type, isconst, initid, + // linkage, alignment, section, visibility, threadlocal] + case bitc::MODULE_CODE_GLOBALVAR: { + if (Record.size() < 6) + return Error("Invalid MODULE_CODE_GLOBALVAR record"); + const Type *Ty = getTypeByID(Record[0]); + if (!isa(Ty)) + return Error("Global not a pointer type!"); + unsigned AddressSpace = cast(Ty)->getAddressSpace(); + Ty = cast(Ty)->getElementType(); + + bool isConstant = Record[1]; + GlobalValue::LinkageTypes Linkage = GetDecodedLinkage(Record[3]); + unsigned Alignment = (1 << Record[4]) >> 1; + std::string Section; + if (Record[5]) { + if (Record[5]-1 >= SectionTable.size()) + return Error("Invalid section ID"); + Section = SectionTable[Record[5]-1]; + } + GlobalValue::VisibilityTypes Visibility = GlobalValue::DefaultVisibility; + if (Record.size() > 6) + Visibility = GetDecodedVisibility(Record[6]); + bool isThreadLocal = false; + if (Record.size() > 7) + isThreadLocal = Record[7]; + + GlobalVariable *NewGV = + new GlobalVariable(Ty, isConstant, Linkage, 0, "", TheModule, + isThreadLocal, AddressSpace); + NewGV->setAlignment(Alignment); + if (!Section.empty()) + NewGV->setSection(Section); + NewGV->setVisibility(Visibility); + NewGV->setThreadLocal(isThreadLocal); + + ValueList.push_back(NewGV); + + // Remember which value to use for the global initializer. + if (unsigned InitID = Record[2]) + GlobalInits.push_back(std::make_pair(NewGV, InitID-1)); + break; + } + // FUNCTION: [type, callingconv, isproto, linkage, paramattr, + // alignment, section, visibility, gc] + case bitc::MODULE_CODE_FUNCTION: { + if (Record.size() < 8) + return Error("Invalid MODULE_CODE_FUNCTION record"); + const Type *Ty = getTypeByID(Record[0]); + if (!isa(Ty)) + return Error("Function not a pointer type!"); + const FunctionType *FTy = + dyn_cast(cast(Ty)->getElementType()); + if (!FTy) + return Error("Function not a pointer to function type!"); + + Function *Func = Function::Create(FTy, GlobalValue::ExternalLinkage, + "", TheModule); + + Func->setCallingConv(Record[1]); + bool isProto = Record[2]; + Func->setLinkage(GetDecodedLinkage(Record[3])); + Func->setAttributes(getAttributes(Record[4])); + + Func->setAlignment((1 << Record[5]) >> 1); + if (Record[6]) { + if (Record[6]-1 >= SectionTable.size()) + return Error("Invalid section ID"); + Func->setSection(SectionTable[Record[6]-1]); + } + Func->setVisibility(GetDecodedVisibility(Record[7])); + if (Record.size() > 8 && Record[8]) { + if (Record[8]-1 > GCTable.size()) + return Error("Invalid GC ID"); + Func->setGC(GCTable[Record[8]-1].c_str()); + } + ValueList.push_back(Func); + + // If this is a function with a body, remember the prototype we are + // creating now, so that we can match up the body with them later. + if (!isProto) + FunctionsWithBodies.push_back(Func); + break; + } + // ALIAS: [alias type, aliasee val#, linkage] + // ALIAS: [alias type, aliasee val#, linkage, visibility] + case bitc::MODULE_CODE_ALIAS: { + if (Record.size() < 3) + return Error("Invalid MODULE_ALIAS record"); + const Type *Ty = getTypeByID(Record[0]); + if (!isa(Ty)) + return Error("Function not a pointer type!"); + + GlobalAlias *NewGA = new GlobalAlias(Ty, GetDecodedLinkage(Record[2]), + "", 0, TheModule); + // Old bitcode files didn't have visibility field. + if (Record.size() > 3) + NewGA->setVisibility(GetDecodedVisibility(Record[3])); + ValueList.push_back(NewGA); + AliasInits.push_back(std::make_pair(NewGA, Record[1])); + break; + } + /// MODULE_CODE_PURGEVALS: [numvals] + case bitc::MODULE_CODE_PURGEVALS: + // Trim down the value list to the specified size. + if (Record.size() < 1 || Record[0] > ValueList.size()) + return Error("Invalid MODULE_PURGEVALS record"); + ValueList.shrinkTo(Record[0]); + break; + } + Record.clear(); + } + + return Error("Premature end of bitstream"); +} + +bool BitcodeReader::ParseBitcode() { + TheModule = 0; + + if (Buffer->getBufferSize() & 3) + return Error("Bitcode stream should be a multiple of 4 bytes in length"); + + unsigned char *BufPtr = (unsigned char *)Buffer->getBufferStart(); + unsigned char *BufEnd = BufPtr+Buffer->getBufferSize(); + + // If we have a wrapper header, parse it and ignore the non-bc file contents. + // The magic number is 0x0B17C0DE stored in little endian. + if (isBitcodeWrapper(BufPtr, BufEnd)) + if (SkipBitcodeWrapperHeader(BufPtr, BufEnd)) + return Error("Invalid bitcode wrapper header"); + + StreamFile.init(BufPtr, BufEnd); + Stream.init(StreamFile); + + // Sniff for the signature. + if (Stream.Read(8) != 'B' || + Stream.Read(8) != 'C' || + Stream.Read(4) != 0x0 || + Stream.Read(4) != 0xC || + Stream.Read(4) != 0xE || + Stream.Read(4) != 0xD) + return Error("Invalid bitcode signature"); + + // We expect a number of well-defined blocks, though we don't necessarily + // need to understand them all. + while (!Stream.AtEndOfStream()) { + unsigned Code = Stream.ReadCode(); + + if (Code != bitc::ENTER_SUBBLOCK) + return Error("Invalid record at top-level"); + + unsigned BlockID = Stream.ReadSubBlockID(); + + // We only know the MODULE subblock ID. + switch (BlockID) { + case bitc::BLOCKINFO_BLOCK_ID: + if (Stream.ReadBlockInfoBlock()) + return Error("Malformed BlockInfoBlock"); + break; + case bitc::MODULE_BLOCK_ID: + if (ParseModule(Buffer->getBufferIdentifier())) + return true; + break; + default: + if (Stream.SkipBlock()) + return Error("Malformed block record"); + break; + } + } + + return false; +} + + +/// ParseFunctionBody - Lazily parse the specified function body block. +bool BitcodeReader::ParseFunctionBody(Function *F) { + if (Stream.EnterSubBlock(bitc::FUNCTION_BLOCK_ID)) + return Error("Malformed block record"); + + unsigned ModuleValueListSize = ValueList.size(); + + // Add all the function arguments to the value table. + for(Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I) + ValueList.push_back(I); + + unsigned NextValueNo = ValueList.size(); + BasicBlock *CurBB = 0; + unsigned CurBBNo = 0; + + // Read all the records. + SmallVector Record; + while (1) { + unsigned Code = Stream.ReadCode(); + if (Code == bitc::END_BLOCK) { + if (Stream.ReadBlockEnd()) + return Error("Error at end of function block"); + break; + } + + if (Code == bitc::ENTER_SUBBLOCK) { + switch (Stream.ReadSubBlockID()) { + default: // Skip unknown content. + if (Stream.SkipBlock()) + return Error("Malformed block record"); + break; + case bitc::CONSTANTS_BLOCK_ID: + if (ParseConstants()) return true; + NextValueNo = ValueList.size(); + break; + case bitc::VALUE_SYMTAB_BLOCK_ID: + if (ParseValueSymbolTable()) return true; + break; + } + continue; + } + + if (Code == bitc::DEFINE_ABBREV) { + Stream.ReadAbbrevRecord(); + continue; + } + + // Read a record. + Record.clear(); + Instruction *I = 0; + switch (Stream.ReadRecord(Code, Record)) { + default: // Default behavior: reject + return Error("Unknown instruction"); + case bitc::FUNC_CODE_DECLAREBLOCKS: // DECLAREBLOCKS: [nblocks] + if (Record.size() < 1 || Record[0] == 0) + return Error("Invalid DECLAREBLOCKS record"); + // Create all the basic blocks for the function. + FunctionBBs.resize(Record[0]); + for (unsigned i = 0, e = FunctionBBs.size(); i != e; ++i) + FunctionBBs[i] = BasicBlock::Create("", F); + CurBB = FunctionBBs[0]; + continue; + + case bitc::FUNC_CODE_INST_BINOP: { // BINOP: [opval, ty, opval, opcode] + unsigned OpNum = 0; + Value *LHS, *RHS; + if (getValueTypePair(Record, OpNum, NextValueNo, LHS) || + getValue(Record, OpNum, LHS->getType(), RHS) || + OpNum+1 != Record.size()) + return Error("Invalid BINOP record"); + + int Opc = GetDecodedBinaryOpcode(Record[OpNum], LHS->getType()); + if (Opc == -1) return Error("Invalid BINOP record"); + I = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS); + break; + } + case bitc::FUNC_CODE_INST_CAST: { // CAST: [opval, opty, destty, castopc] + unsigned OpNum = 0; + Value *Op; + if (getValueTypePair(Record, OpNum, NextValueNo, Op) || + OpNum+2 != Record.size()) + return Error("Invalid CAST record"); + + const Type *ResTy = getTypeByID(Record[OpNum]); + int Opc = GetDecodedCastOpcode(Record[OpNum+1]); + if (Opc == -1 || ResTy == 0) + return Error("Invalid CAST record"); + I = CastInst::Create((Instruction::CastOps)Opc, Op, ResTy); + break; + } + case bitc::FUNC_CODE_INST_GEP: { // GEP: [n x operands] + unsigned OpNum = 0; + Value *BasePtr; + if (getValueTypePair(Record, OpNum, NextValueNo, BasePtr)) + return Error("Invalid GEP record"); + + SmallVector GEPIdx; + while (OpNum != Record.size()) { + Value *Op; + if (getValueTypePair(Record, OpNum, NextValueNo, Op)) + return Error("Invalid GEP record"); + GEPIdx.push_back(Op); + } + + I = GetElementPtrInst::Create(BasePtr, GEPIdx.begin(), GEPIdx.end()); + break; + } + + case bitc::FUNC_CODE_INST_EXTRACTVAL: { + // EXTRACTVAL: [opty, opval, n x indices] + unsigned OpNum = 0; + Value *Agg; + if (getValueTypePair(Record, OpNum, NextValueNo, Agg)) + return Error("Invalid EXTRACTVAL record"); + + SmallVector EXTRACTVALIdx; + for (unsigned RecSize = Record.size(); + OpNum != RecSize; ++OpNum) { + uint64_t Index = Record[OpNum]; + if ((unsigned)Index != Index) + return Error("Invalid EXTRACTVAL index"); + EXTRACTVALIdx.push_back((unsigned)Index); + } + + I = ExtractValueInst::Create(Agg, + EXTRACTVALIdx.begin(), EXTRACTVALIdx.end()); + break; + } + + case bitc::FUNC_CODE_INST_INSERTVAL: { + // INSERTVAL: [opty, opval, opty, opval, n x indices] + unsigned OpNum = 0; + Value *Agg; + if (getValueTypePair(Record, OpNum, NextValueNo, Agg)) + return Error("Invalid INSERTVAL record"); + Value *Val; + if (getValueTypePair(Record, OpNum, NextValueNo, Val)) + return Error("Invalid INSERTVAL record"); + + SmallVector INSERTVALIdx; + for (unsigned RecSize = Record.size(); + OpNum != RecSize; ++OpNum) { + uint64_t Index = Record[OpNum]; + if ((unsigned)Index != Index) + return Error("Invalid INSERTVAL index"); + INSERTVALIdx.push_back((unsigned)Index); + } + + I = InsertValueInst::Create(Agg, Val, + INSERTVALIdx.begin(), INSERTVALIdx.end()); + break; + } + + case bitc::FUNC_CODE_INST_SELECT: { // SELECT: [opval, ty, opval, opval] + // obsolete form of select + // handles select i1 ... in old bitcode + unsigned OpNum = 0; + Value *TrueVal, *FalseVal, *Cond; + if (getValueTypePair(Record, OpNum, NextValueNo, TrueVal) || + getValue(Record, OpNum, TrueVal->getType(), FalseVal) || + getValue(Record, OpNum, Type::Int1Ty, Cond)) + return Error("Invalid SELECT record"); + + I = SelectInst::Create(Cond, TrueVal, FalseVal); + break; + } + + case bitc::FUNC_CODE_INST_VSELECT: {// VSELECT: [ty,opval,opval,predty,pred] + // new form of select + // handles select i1 or select [N x i1] + unsigned OpNum = 0; + Value *TrueVal, *FalseVal, *Cond; + if (getValueTypePair(Record, OpNum, NextValueNo, TrueVal) || + getValue(Record, OpNum, TrueVal->getType(), FalseVal) || + getValueTypePair(Record, OpNum, NextValueNo, Cond)) + return Error("Invalid SELECT record"); + + // select condition can be either i1 or [N x i1] + if (const VectorType* vector_type = + dyn_cast(Cond->getType())) { + // expect + if (vector_type->getElementType() != Type::Int1Ty) + return Error("Invalid SELECT condition type"); + } else { + // expect i1 + if (Cond->getType() != Type::Int1Ty) + return Error("Invalid SELECT condition type"); + } + + I = SelectInst::Create(Cond, TrueVal, FalseVal); + break; + } + + case bitc::FUNC_CODE_INST_EXTRACTELT: { // EXTRACTELT: [opty, opval, opval] + unsigned OpNum = 0; + Value *Vec, *Idx; + if (getValueTypePair(Record, OpNum, NextValueNo, Vec) || + getValue(Record, OpNum, Type::Int32Ty, Idx)) + return Error("Invalid EXTRACTELT record"); + I = new ExtractElementInst(Vec, Idx); + break; + } + + case bitc::FUNC_CODE_INST_INSERTELT: { // INSERTELT: [ty, opval,opval,opval] + unsigned OpNum = 0; + Value *Vec, *Elt, *Idx; + if (getValueTypePair(Record, OpNum, NextValueNo, Vec) || + getValue(Record, OpNum, + cast(Vec->getType())->getElementType(), Elt) || + getValue(Record, OpNum, Type::Int32Ty, Idx)) + return Error("Invalid INSERTELT record"); + I = InsertElementInst::Create(Vec, Elt, Idx); + break; + } + + case bitc::FUNC_CODE_INST_SHUFFLEVEC: {// SHUFFLEVEC: [opval,ty,opval,opval] + unsigned OpNum = 0; + Value *Vec1, *Vec2, *Mask; + if (getValueTypePair(Record, OpNum, NextValueNo, Vec1) || + getValue(Record, OpNum, Vec1->getType(), Vec2)) + return Error("Invalid SHUFFLEVEC record"); + + if (getValueTypePair(Record, OpNum, NextValueNo, Mask)) + return Error("Invalid SHUFFLEVEC record"); + I = new ShuffleVectorInst(Vec1, Vec2, Mask); + break; + } + + case bitc::FUNC_CODE_INST_CMP: { // CMP: [opty, opval, opval, pred] + // VFCmp/VICmp + // or old form of ICmp/FCmp returning bool + unsigned OpNum = 0; + Value *LHS, *RHS; + if (getValueTypePair(Record, OpNum, NextValueNo, LHS) || + getValue(Record, OpNum, LHS->getType(), RHS) || + OpNum+1 != Record.size()) + return Error("Invalid CMP record"); + + if (LHS->getType()->isFloatingPoint()) + I = new FCmpInst((FCmpInst::Predicate)Record[OpNum], LHS, RHS); + else if (!isa(LHS->getType())) + I = new ICmpInst((ICmpInst::Predicate)Record[OpNum], LHS, RHS); + else if (LHS->getType()->isFPOrFPVector()) + I = new VFCmpInst((FCmpInst::Predicate)Record[OpNum], LHS, RHS); + else + I = new VICmpInst((ICmpInst::Predicate)Record[OpNum], LHS, RHS); + break; + } + case bitc::FUNC_CODE_INST_CMP2: { // CMP2: [opty, opval, opval, pred] + // Fcmp/ICmp returning bool or vector of bool + unsigned OpNum = 0; + Value *LHS, *RHS; + if (getValueTypePair(Record, OpNum, NextValueNo, LHS) || + getValue(Record, OpNum, LHS->getType(), RHS) || + OpNum+1 != Record.size()) + return Error("Invalid CMP2 record"); + + if (LHS->getType()->isFPOrFPVector()) + I = new FCmpInst((FCmpInst::Predicate)Record[OpNum], LHS, RHS); + else + I = new ICmpInst((ICmpInst::Predicate)Record[OpNum], LHS, RHS); + break; + } + case bitc::FUNC_CODE_INST_GETRESULT: { // GETRESULT: [ty, val, n] + if (Record.size() != 2) + return Error("Invalid GETRESULT record"); + unsigned OpNum = 0; + Value *Op; + getValueTypePair(Record, OpNum, NextValueNo, Op); + unsigned Index = Record[1]; + I = ExtractValueInst::Create(Op, Index); + break; + } + + case bitc::FUNC_CODE_INST_RET: // RET: [opty,opval] + { + unsigned Size = Record.size(); + if (Size == 0) { + I = ReturnInst::Create(); + break; + } + + unsigned OpNum = 0; + SmallVector Vs; + do { + Value *Op = NULL; + if (getValueTypePair(Record, OpNum, NextValueNo, Op)) + return Error("Invalid RET record"); + Vs.push_back(Op); + } while(OpNum != Record.size()); + + const Type *ReturnType = F->getReturnType(); + if (Vs.size() > 1 || + (isa(ReturnType) && + (Vs.empty() || Vs[0]->getType() != ReturnType))) { + Value *RV = UndefValue::get(ReturnType); + for (unsigned i = 0, e = Vs.size(); i != e; ++i) { + I = InsertValueInst::Create(RV, Vs[i], i, "mrv"); + CurBB->getInstList().push_back(I); + ValueList.AssignValue(I, NextValueNo++); + RV = I; + } + I = ReturnInst::Create(RV); + break; + } + + I = ReturnInst::Create(Vs[0]); + break; + } + case bitc::FUNC_CODE_INST_BR: { // BR: [bb#, bb#, opval] or [bb#] + if (Record.size() != 1 && Record.size() != 3) + return Error("Invalid BR record"); + BasicBlock *TrueDest = getBasicBlock(Record[0]); + if (TrueDest == 0) + return Error("Invalid BR record"); + + if (Record.size() == 1) + I = BranchInst::Create(TrueDest); + else { + BasicBlock *FalseDest = getBasicBlock(Record[1]); + Value *Cond = getFnValueByID(Record[2], Type::Int1Ty); + if (FalseDest == 0 || Cond == 0) + return Error("Invalid BR record"); + I = BranchInst::Create(TrueDest, FalseDest, Cond); + } + break; + } + case bitc::FUNC_CODE_INST_SWITCH: { // SWITCH: [opty, opval, n, n x ops] + if (Record.size() < 3 || (Record.size() & 1) == 0) + return Error("Invalid SWITCH record"); + const Type *OpTy = getTypeByID(Record[0]); + Value *Cond = getFnValueByID(Record[1], OpTy); + BasicBlock *Default = getBasicBlock(Record[2]); + if (OpTy == 0 || Cond == 0 || Default == 0) + return Error("Invalid SWITCH record"); + unsigned NumCases = (Record.size()-3)/2; + SwitchInst *SI = SwitchInst::Create(Cond, Default, NumCases); + for (unsigned i = 0, e = NumCases; i != e; ++i) { + ConstantInt *CaseVal = + dyn_cast_or_null(getFnValueByID(Record[3+i*2], OpTy)); + BasicBlock *DestBB = getBasicBlock(Record[1+3+i*2]); + if (CaseVal == 0 || DestBB == 0) { + delete SI; + return Error("Invalid SWITCH record!"); + } + SI->addCase(CaseVal, DestBB); + } + I = SI; + break; + } + + case bitc::FUNC_CODE_INST_INVOKE: { + // INVOKE: [attrs, cc, normBB, unwindBB, fnty, op0,op1,op2, ...] + if (Record.size() < 4) return Error("Invalid INVOKE record"); + AttrListPtr PAL = getAttributes(Record[0]); + unsigned CCInfo = Record[1]; + BasicBlock *NormalBB = getBasicBlock(Record[2]); + BasicBlock *UnwindBB = getBasicBlock(Record[3]); + + unsigned OpNum = 4; + Value *Callee; + if (getValueTypePair(Record, OpNum, NextValueNo, Callee)) + return Error("Invalid INVOKE record"); + + const PointerType *CalleeTy = dyn_cast(Callee->getType()); + const FunctionType *FTy = !CalleeTy ? 0 : + dyn_cast(CalleeTy->getElementType()); + + // Check that the right number of fixed parameters are here. + if (FTy == 0 || NormalBB == 0 || UnwindBB == 0 || + Record.size() < OpNum+FTy->getNumParams()) + return Error("Invalid INVOKE record"); + + SmallVector Ops; + for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i, ++OpNum) { + Ops.push_back(getFnValueByID(Record[OpNum], FTy->getParamType(i))); + if (Ops.back() == 0) return Error("Invalid INVOKE record"); + } + + if (!FTy->isVarArg()) { + if (Record.size() != OpNum) + return Error("Invalid INVOKE record"); + } else { + // Read type/value pairs for varargs params. + while (OpNum != Record.size()) { + Value *Op; + if (getValueTypePair(Record, OpNum, NextValueNo, Op)) + return Error("Invalid INVOKE record"); + Ops.push_back(Op); + } + } + + I = InvokeInst::Create(Callee, NormalBB, UnwindBB, + Ops.begin(), Ops.end()); + cast(I)->setCallingConv(CCInfo); + cast(I)->setAttributes(PAL); + break; + } + case bitc::FUNC_CODE_INST_UNWIND: // UNWIND + I = new UnwindInst(); + break; + case bitc::FUNC_CODE_INST_UNREACHABLE: // UNREACHABLE + I = new UnreachableInst(); + break; + case bitc::FUNC_CODE_INST_PHI: { // PHI: [ty, val0,bb0, ...] + if (Record.size() < 1 || ((Record.size()-1)&1)) + return Error("Invalid PHI record"); + const Type *Ty = getTypeByID(Record[0]); + if (!Ty) return Error("Invalid PHI record"); + + PHINode *PN = PHINode::Create(Ty); + PN->reserveOperandSpace((Record.size()-1)/2); + + for (unsigned i = 0, e = Record.size()-1; i != e; i += 2) { + Value *V = getFnValueByID(Record[1+i], Ty); + BasicBlock *BB = getBasicBlock(Record[2+i]); + if (!V || !BB) return Error("Invalid PHI record"); + PN->addIncoming(V, BB); + } + I = PN; + break; + } + + case bitc::FUNC_CODE_INST_MALLOC: { // MALLOC: [instty, op, align] + if (Record.size() < 3) + return Error("Invalid MALLOC record"); + const PointerType *Ty = + dyn_cast_or_null(getTypeByID(Record[0])); + Value *Size = getFnValueByID(Record[1], Type::Int32Ty); + unsigned Align = Record[2]; + if (!Ty || !Size) return Error("Invalid MALLOC record"); + I = new MallocInst(Ty->getElementType(), Size, (1 << Align) >> 1); + break; + } + case bitc::FUNC_CODE_INST_FREE: { // FREE: [op, opty] + unsigned OpNum = 0; + Value *Op; + if (getValueTypePair(Record, OpNum, NextValueNo, Op) || + OpNum != Record.size()) + return Error("Invalid FREE record"); + I = new FreeInst(Op); + break; + } + case bitc::FUNC_CODE_INST_ALLOCA: { // ALLOCA: [instty, op, align] + if (Record.size() < 3) + return Error("Invalid ALLOCA record"); + const PointerType *Ty = + dyn_cast_or_null(getTypeByID(Record[0])); + Value *Size = getFnValueByID(Record[1], Type::Int32Ty); + unsigned Align = Record[2]; + if (!Ty || !Size) return Error("Invalid ALLOCA record"); + I = new AllocaInst(Ty->getElementType(), Size, (1 << Align) >> 1); + break; + } + case bitc::FUNC_CODE_INST_LOAD: { // LOAD: [opty, op, align, vol] + unsigned OpNum = 0; + Value *Op; + if (getValueTypePair(Record, OpNum, NextValueNo, Op) || + OpNum+2 != Record.size()) + return Error("Invalid LOAD record"); + + I = new LoadInst(Op, "", Record[OpNum+1], (1 << Record[OpNum]) >> 1); + break; + } + case bitc::FUNC_CODE_INST_STORE2: { // STORE2:[ptrty, ptr, val, align, vol] + unsigned OpNum = 0; + Value *Val, *Ptr; + if (getValueTypePair(Record, OpNum, NextValueNo, Ptr) || + getValue(Record, OpNum, + cast(Ptr->getType())->getElementType(), Val) || + OpNum+2 != Record.size()) + return Error("Invalid STORE record"); + + I = new StoreInst(Val, Ptr, Record[OpNum+1], (1 << Record[OpNum]) >> 1); + break; + } + case bitc::FUNC_CODE_INST_STORE: { // STORE:[val, valty, ptr, align, vol] + // FIXME: Legacy form of store instruction. Should be removed in LLVM 3.0. + unsigned OpNum = 0; + Value *Val, *Ptr; + if (getValueTypePair(Record, OpNum, NextValueNo, Val) || + getValue(Record, OpNum, PointerType::getUnqual(Val->getType()), Ptr)|| + OpNum+2 != Record.size()) + return Error("Invalid STORE record"); + + I = new StoreInst(Val, Ptr, Record[OpNum+1], (1 << Record[OpNum]) >> 1); + break; + } + case bitc::FUNC_CODE_INST_CALL: { + // CALL: [paramattrs, cc, fnty, fnid, arg0, arg1...] + if (Record.size() < 3) + return Error("Invalid CALL record"); + + AttrListPtr PAL = getAttributes(Record[0]); + unsigned CCInfo = Record[1]; + + unsigned OpNum = 2; + Value *Callee; + if (getValueTypePair(Record, OpNum, NextValueNo, Callee)) + return Error("Invalid CALL record"); + + const PointerType *OpTy = dyn_cast(Callee->getType()); + const FunctionType *FTy = 0; + if (OpTy) FTy = dyn_cast(OpTy->getElementType()); + if (!FTy || Record.size() < FTy->getNumParams()+OpNum) + return Error("Invalid CALL record"); + + SmallVector Args; + // Read the fixed params. + for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i, ++OpNum) { + if (FTy->getParamType(i)->getTypeID()==Type::LabelTyID) + Args.push_back(getBasicBlock(Record[OpNum])); + else + Args.push_back(getFnValueByID(Record[OpNum], FTy->getParamType(i))); + if (Args.back() == 0) return Error("Invalid CALL record"); + } + + // Read type/value pairs for varargs params. + if (!FTy->isVarArg()) { + if (OpNum != Record.size()) + return Error("Invalid CALL record"); + } else { + while (OpNum != Record.size()) { + Value *Op; + if (getValueTypePair(Record, OpNum, NextValueNo, Op)) + return Error("Invalid CALL record"); + Args.push_back(Op); + } + } + + I = CallInst::Create(Callee, Args.begin(), Args.end()); + cast(I)->setCallingConv(CCInfo>>1); + cast(I)->setTailCall(CCInfo & 1); + cast(I)->setAttributes(PAL); + break; + } + case bitc::FUNC_CODE_INST_VAARG: { // VAARG: [valistty, valist, instty] + if (Record.size() < 3) + return Error("Invalid VAARG record"); + const Type *OpTy = getTypeByID(Record[0]); + Value *Op = getFnValueByID(Record[1], OpTy); + const Type *ResTy = getTypeByID(Record[2]); + if (!OpTy || !Op || !ResTy) + return Error("Invalid VAARG record"); + I = new VAArgInst(Op, ResTy); + break; + } + } + + // Add instruction to end of current BB. If there is no current BB, reject + // this file. + if (CurBB == 0) { + delete I; + return Error("Invalid instruction with no BB"); + } + CurBB->getInstList().push_back(I); + + // If this was a terminator instruction, move to the next block. + if (isa(I)) { + ++CurBBNo; + CurBB = CurBBNo < FunctionBBs.size() ? FunctionBBs[CurBBNo] : 0; + } + + // Non-void values get registered in the value table for future use. + if (I && I->getType() != Type::VoidTy) + ValueList.AssignValue(I, NextValueNo++); + } + + // Check the function list for unresolved values. + if (Argument *A = dyn_cast(ValueList.back())) { + if (A->getParent() == 0) { + // We found at least one unresolved value. Nuke them all to avoid leaks. + for (unsigned i = ModuleValueListSize, e = ValueList.size(); i != e; ++i){ + if ((A = dyn_cast(ValueList.back())) && A->getParent() == 0) { + A->replaceAllUsesWith(UndefValue::get(A->getType())); + delete A; + } + } + return Error("Never resolved value found in function!"); + } + } + + // Trim the value list down to the size it was before we parsed this function. + ValueList.shrinkTo(ModuleValueListSize); + std::vector().swap(FunctionBBs); + + return false; +} + +//===----------------------------------------------------------------------===// +// ModuleProvider implementation +//===----------------------------------------------------------------------===// + + +bool BitcodeReader::materializeFunction(Function *F, std::string *ErrInfo) { + // If it already is material, ignore the request. + if (!F->hasNotBeenReadFromBitcode()) return false; + + DenseMap >::iterator DFII = + DeferredFunctionInfo.find(F); + assert(DFII != DeferredFunctionInfo.end() && "Deferred function not found!"); + + // Move the bit stream to the saved position of the deferred function body and + // restore the real linkage type for the function. + Stream.JumpToBit(DFII->second.first); + F->setLinkage((GlobalValue::LinkageTypes)DFII->second.second); + + if (ParseFunctionBody(F)) { + if (ErrInfo) *ErrInfo = ErrorString; + return true; + } + + // Upgrade any old intrinsic calls in the function. + for (UpgradedIntrinsicMap::iterator I = UpgradedIntrinsics.begin(), + E = UpgradedIntrinsics.end(); I != E; ++I) { + if (I->first != I->second) { + for (Value::use_iterator UI = I->first->use_begin(), + UE = I->first->use_end(); UI != UE; ) { + if (CallInst* CI = dyn_cast(*UI++)) + UpgradeIntrinsicCall(CI, I->second); + } + } + } + + return false; +} + +void BitcodeReader::dematerializeFunction(Function *F) { + // If this function isn't materialized, or if it is a proto, this is a noop. + if (F->hasNotBeenReadFromBitcode() || F->isDeclaration()) + return; + + assert(DeferredFunctionInfo.count(F) && "No info to read function later?"); + + // Just forget the function body, we can remat it later. + F->deleteBody(); + F->setLinkage(GlobalValue::GhostLinkage); +} + + +Module *BitcodeReader::materializeModule(std::string *ErrInfo) { + for (DenseMap >::iterator I = + DeferredFunctionInfo.begin(), E = DeferredFunctionInfo.end(); I != E; + ++I) { + Function *F = I->first; + if (F->hasNotBeenReadFromBitcode() && + materializeFunction(F, ErrInfo)) + return 0; + } + + // Upgrade any intrinsic calls that slipped through (should not happen!) and + // delete the old functions to clean up. We can't do this unless the entire + // module is materialized because there could always be another function body + // with calls to the old function. + for (std::vector >::iterator I = + UpgradedIntrinsics.begin(), E = UpgradedIntrinsics.end(); I != E; ++I) { + if (I->first != I->second) { + for (Value::use_iterator UI = I->first->use_begin(), + UE = I->first->use_end(); UI != UE; ) { + if (CallInst* CI = dyn_cast(*UI++)) + UpgradeIntrinsicCall(CI, I->second); + } + if (!I->first->use_empty()) + I->first->replaceAllUsesWith(I->second); + I->first->eraseFromParent(); + } + } + std::vector >().swap(UpgradedIntrinsics); + + return TheModule; +} + + +/// This method is provided by the parent ModuleProvde class and overriden +/// here. It simply releases the module from its provided and frees up our +/// state. +/// @brief Release our hold on the generated module +Module *BitcodeReader::releaseModule(std::string *ErrInfo) { + // Since we're losing control of this Module, we must hand it back complete + Module *M = ModuleProvider::releaseModule(ErrInfo); + FreeState(); + return M; +} + + +//===----------------------------------------------------------------------===// +// External interface +//===----------------------------------------------------------------------===// + +/// getBitcodeModuleProvider - lazy function-at-a-time loading from a file. +/// +ModuleProvider *llvm::getBitcodeModuleProvider(MemoryBuffer *Buffer, + std::string *ErrMsg) { + BitcodeReader *R = new BitcodeReader(Buffer); + if (R->ParseBitcode()) { + if (ErrMsg) + *ErrMsg = R->getErrorString(); + + // Don't let the BitcodeReader dtor delete 'Buffer'. + R->releaseMemoryBuffer(); + delete R; + return 0; + } + return R; +} + +/// ParseBitcodeFile - Read the specified bitcode file, returning the module. +/// If an error occurs, return null and fill in *ErrMsg if non-null. +Module *llvm::ParseBitcodeFile(MemoryBuffer *Buffer, std::string *ErrMsg){ + BitcodeReader *R; + R = static_cast(getBitcodeModuleProvider(Buffer, ErrMsg)); + if (!R) return 0; + + // Read in the entire module. + Module *M = R->materializeModule(ErrMsg); + + // Don't let the BitcodeReader dtor delete 'Buffer', regardless of whether + // there was an error. + R->releaseMemoryBuffer(); + + // If there was no error, tell ModuleProvider not to delete it when its dtor + // is run. + if (M) + M = R->releaseModule(ErrMsg); + + delete R; + return M; +} diff --git a/lib/Bitcode/Reader/BitcodeReader.h b/lib/Bitcode/Reader/BitcodeReader.h new file mode 100644 index 000000000000..0dc470b24a23 --- /dev/null +++ b/lib/Bitcode/Reader/BitcodeReader.h @@ -0,0 +1,214 @@ +//===- BitcodeReader.h - Internal BitcodeReader impl ------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This header defines the BitcodeReader class. +// +//===----------------------------------------------------------------------===// + +#ifndef BITCODE_READER_H +#define BITCODE_READER_H + +#include "llvm/ModuleProvider.h" +#include "llvm/Attributes.h" +#include "llvm/Type.h" +#include "llvm/OperandTraits.h" +#include "llvm/Bitcode/BitstreamReader.h" +#include "llvm/Bitcode/LLVMBitCodes.h" +#include "llvm/Support/ValueHandle.h" +#include "llvm/ADT/DenseMap.h" +#include + +namespace llvm { + class MemoryBuffer; + +//===----------------------------------------------------------------------===// +// BitcodeReaderValueList Class +//===----------------------------------------------------------------------===// + +class BitcodeReaderValueList { + std::vector ValuePtrs; + + /// ResolveConstants - As we resolve forward-referenced constants, we add + /// information about them to this vector. This allows us to resolve them in + /// bulk instead of resolving each reference at a time. See the code in + /// ResolveConstantForwardRefs for more information about this. + /// + /// The key of this vector is the placeholder constant, the value is the slot + /// number that holds the resolved value. + typedef std::vector > ResolveConstantsTy; + ResolveConstantsTy ResolveConstants; +public: + BitcodeReaderValueList() {} + ~BitcodeReaderValueList() { + assert(ResolveConstants.empty() && "Constants not resolved?"); + } + + // vector compatibility methods + unsigned size() const { return ValuePtrs.size(); } + void resize(unsigned N) { ValuePtrs.resize(N); } + void push_back(Value *V) { + ValuePtrs.push_back(V); + } + + void clear() { + assert(ResolveConstants.empty() && "Constants not resolved?"); + ValuePtrs.clear(); + } + + Value *operator[](unsigned i) const { + assert(i < ValuePtrs.size()); + return ValuePtrs[i]; + } + + Value *back() const { return ValuePtrs.back(); } + void pop_back() { ValuePtrs.pop_back(); } + bool empty() const { return ValuePtrs.empty(); } + void shrinkTo(unsigned N) { + assert(N <= size() && "Invalid shrinkTo request!"); + ValuePtrs.resize(N); + } + + Constant *getConstantFwdRef(unsigned Idx, const Type *Ty); + Value *getValueFwdRef(unsigned Idx, const Type *Ty); + + void AssignValue(Value *V, unsigned Idx); + + /// ResolveConstantForwardRefs - Once all constants are read, this method bulk + /// resolves any forward references. + void ResolveConstantForwardRefs(); +}; + +class BitcodeReader : public ModuleProvider { + MemoryBuffer *Buffer; + BitstreamReader StreamFile; + BitstreamCursor Stream; + + const char *ErrorString; + + std::vector TypeList; + BitcodeReaderValueList ValueList; + std::vector > GlobalInits; + std::vector > AliasInits; + + /// MAttributes - The set of attributes by index. Index zero in the + /// file is for null, and is thus not represented here. As such all indices + /// are off by one. + std::vector MAttributes; + + /// FunctionBBs - While parsing a function body, this is a list of the basic + /// blocks for the function. + std::vector FunctionBBs; + + // When reading the module header, this list is populated with functions that + // have bodies later in the file. + std::vector FunctionsWithBodies; + + // When intrinsic functions are encountered which require upgrading they are + // stored here with their replacement function. + typedef std::vector > UpgradedIntrinsicMap; + UpgradedIntrinsicMap UpgradedIntrinsics; + + // After the module header has been read, the FunctionsWithBodies list is + // reversed. This keeps track of whether we've done this yet. + bool HasReversedFunctionsWithBodies; + + /// DeferredFunctionInfo - When function bodies are initially scanned, this + /// map contains info about where to find deferred function body (in the + /// stream) and what linkage the original function had. + DenseMap > DeferredFunctionInfo; +public: + explicit BitcodeReader(MemoryBuffer *buffer) + : Buffer(buffer), ErrorString(0) { + HasReversedFunctionsWithBodies = false; + } + ~BitcodeReader() { + FreeState(); + } + + void FreeState(); + + /// releaseMemoryBuffer - This causes the reader to completely forget about + /// the memory buffer it contains, which prevents the buffer from being + /// destroyed when it is deleted. + void releaseMemoryBuffer() { + Buffer = 0; + } + + virtual bool materializeFunction(Function *F, std::string *ErrInfo = 0); + virtual Module *materializeModule(std::string *ErrInfo = 0); + virtual void dematerializeFunction(Function *F); + virtual Module *releaseModule(std::string *ErrInfo = 0); + + bool Error(const char *Str) { + ErrorString = Str; + return true; + } + const char *getErrorString() const { return ErrorString; } + + /// @brief Main interface to parsing a bitcode buffer. + /// @returns true if an error occurred. + bool ParseBitcode(); +private: + const Type *getTypeByID(unsigned ID, bool isTypeTable = false); + Value *getFnValueByID(unsigned ID, const Type *Ty) { + return ValueList.getValueFwdRef(ID, Ty); + } + BasicBlock *getBasicBlock(unsigned ID) const { + if (ID >= FunctionBBs.size()) return 0; // Invalid ID + return FunctionBBs[ID]; + } + AttrListPtr getAttributes(unsigned i) const { + if (i-1 < MAttributes.size()) + return MAttributes[i-1]; + return AttrListPtr(); + } + + /// getValueTypePair - Read a value/type pair out of the specified record from + /// slot 'Slot'. Increment Slot past the number of slots used in the record. + /// Return true on failure. + bool getValueTypePair(SmallVector &Record, unsigned &Slot, + unsigned InstNum, Value *&ResVal) { + if (Slot == Record.size()) return true; + unsigned ValNo = (unsigned)Record[Slot++]; + if (ValNo < InstNum) { + // If this is not a forward reference, just return the value we already + // have. + ResVal = getFnValueByID(ValNo, 0); + return ResVal == 0; + } else if (Slot == Record.size()) { + return true; + } + + unsigned TypeNo = (unsigned)Record[Slot++]; + ResVal = getFnValueByID(ValNo, getTypeByID(TypeNo)); + return ResVal == 0; + } + bool getValue(SmallVector &Record, unsigned &Slot, + const Type *Ty, Value *&ResVal) { + if (Slot == Record.size()) return true; + unsigned ValNo = (unsigned)Record[Slot++]; + ResVal = getFnValueByID(ValNo, Ty); + return ResVal == 0; + } + + + bool ParseModule(const std::string &ModuleID); + bool ParseAttributeBlock(); + bool ParseTypeTable(); + bool ParseTypeSymbolTable(); + bool ParseValueSymbolTable(); + bool ParseConstants(); + bool RememberAndSkipFunctionBody(); + bool ParseFunctionBody(Function *F); + bool ResolveGlobalAndAliasInits(); +}; + +} // End llvm namespace + +#endif diff --git a/lib/Bitcode/Reader/CMakeLists.txt b/lib/Bitcode/Reader/CMakeLists.txt new file mode 100644 index 000000000000..a19c79aacfe1 --- /dev/null +++ b/lib/Bitcode/Reader/CMakeLists.txt @@ -0,0 +1,7 @@ +add_llvm_library(LLVMBitReader + BitReader.cpp + BitcodeReader.cpp + Deserialize.cpp + DeserializeAPFloat.cpp + DeserializeAPInt.cpp + ) \ No newline at end of file diff --git a/lib/Bitcode/Reader/Deserialize.cpp b/lib/Bitcode/Reader/Deserialize.cpp new file mode 100644 index 000000000000..06da6ce72721 --- /dev/null +++ b/lib/Bitcode/Reader/Deserialize.cpp @@ -0,0 +1,454 @@ +//==- Deserialize.cpp - Generic Object Serialization to Bitcode --*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the internal methods used for object serialization. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Bitcode/Deserialize.h" + +#ifdef DEBUG_BACKPATCH +#include "llvm/Support/Streams.h" +#endif + +using namespace llvm; + +Deserializer::Deserializer(BitstreamReader& stream) + : Stream(stream), RecIdx(0), FreeList(NULL), AbbrevNo(0), RecordCode(0) { + + StreamStart = Stream.GetCurrentBitNo(); +} + +Deserializer::~Deserializer() { + assert (RecIdx >= Record.size() && + "Still scanning bitcode record when deserialization completed."); + +#ifdef DEBUG_BACKPATCH + for (MapTy::iterator I=BPatchMap.begin(), E=BPatchMap.end(); I!=E; ++I) + assert (I->first.hasFinalPtr() && + "Some pointers were not backpatched."); +#endif +} + + +bool Deserializer::inRecord() { + if (Record.size() > 0) { + if (RecIdx >= Record.size()) { + RecIdx = 0; + Record.clear(); + AbbrevNo = 0; + return false; + } + else + return true; + } + + return false; +} + +bool Deserializer::AdvanceStream() { + assert (!inRecord() && + "Cannot advance stream. Still processing a record."); + + if (AbbrevNo == bitc::ENTER_SUBBLOCK || + AbbrevNo >= bitc::UNABBREV_RECORD) + return true; + + while (!Stream.AtEndOfStream()) { + + uint64_t Pos = Stream.GetCurrentBitNo(); + AbbrevNo = Stream.ReadCode(); + + switch (AbbrevNo) { + case bitc::ENTER_SUBBLOCK: { + unsigned id = Stream.ReadSubBlockID(); + + // Determine the extent of the block. This is useful for jumping around + // the stream. This is hack: we read the header of the block, save + // the length, and then revert the bitstream to a location just before + // the block is entered. + uint64_t BPos = Stream.GetCurrentBitNo(); + Stream.ReadVBR(bitc::CodeLenWidth); // Skip the code size. + Stream.SkipToWord(); + unsigned NumWords = Stream.Read(bitc::BlockSizeWidth); + Stream.JumpToBit(BPos); + + BlockStack.push_back(Location(Pos,id,NumWords)); + break; + } + + case bitc::END_BLOCK: { + bool x = Stream.ReadBlockEnd(); + assert(!x && "Error at block end."); x=x; + BlockStack.pop_back(); + continue; + } + + case bitc::DEFINE_ABBREV: + Stream.ReadAbbrevRecord(); + continue; + + default: + break; + } + + return true; + } + + return false; +} + +void Deserializer::ReadRecord() { + + while (AdvanceStream() && AbbrevNo == bitc::ENTER_SUBBLOCK) { + assert (!BlockStack.empty()); + Stream.EnterSubBlock(BlockStack.back().BlockID); + AbbrevNo = 0; + } + + if (Stream.AtEndOfStream()) + return; + + assert (Record.empty()); + assert (AbbrevNo >= bitc::UNABBREV_RECORD); + RecordCode = Stream.ReadRecord(AbbrevNo,Record); + assert (Record.size() > 0); +} + +void Deserializer::SkipBlock() { + assert (!inRecord()); + + if (AtEnd()) + return; + + AdvanceStream(); + + assert (AbbrevNo == bitc::ENTER_SUBBLOCK); + BlockStack.pop_back(); + Stream.SkipBlock(); + + AbbrevNo = 0; + AdvanceStream(); +} + +bool Deserializer::SkipToBlock(unsigned BlockID) { + assert (!inRecord()); + + AdvanceStream(); + assert (AbbrevNo == bitc::ENTER_SUBBLOCK); + + unsigned BlockLevel = BlockStack.size(); + + while (!AtEnd() && + BlockLevel == BlockStack.size() && + getCurrentBlockID() != BlockID) + SkipBlock(); + + return !(AtEnd() || BlockLevel != BlockStack.size()); +} + +Deserializer::Location Deserializer::getCurrentBlockLocation() { + if (!inRecord()) + AdvanceStream(); + + return BlockStack.back(); +} + +bool Deserializer::JumpTo(const Location& Loc) { + + assert (!inRecord()); + + AdvanceStream(); + + assert (!BlockStack.empty() || AtEnd()); + + uint64_t LastBPos = StreamStart; + + while (!BlockStack.empty()) { + + LastBPos = BlockStack.back().BitNo; + + // Determine of the current block contains the location of the block + // we are looking for. + if (BlockStack.back().contains(Loc)) { + // We found the enclosing block. We must first POP it off to + // destroy any accumulated context within the block scope. We then + // jump to the position of the block and enter it. + Stream.JumpToBit(LastBPos); + + if (BlockStack.size() == Stream.BlockScope.size()) + Stream.PopBlockScope(); + + BlockStack.pop_back(); + + AbbrevNo = 0; + AdvanceStream(); + assert (AbbrevNo == bitc::ENTER_SUBBLOCK); + + Stream.EnterSubBlock(BlockStack.back().BlockID); + break; + } + + // This block does not contain the block we are looking for. Pop it. + if (BlockStack.size() == Stream.BlockScope.size()) + Stream.PopBlockScope(); + + BlockStack.pop_back(); + + } + + // Check if we have popped our way to the outermost scope. If so, + // we need to adjust our position. + if (BlockStack.empty()) { + assert (Stream.BlockScope.empty()); + + Stream.JumpToBit(Loc.BitNo < LastBPos ? StreamStart : LastBPos); + AbbrevNo = 0; + AdvanceStream(); + } + + assert (AbbrevNo == bitc::ENTER_SUBBLOCK); + assert (!BlockStack.empty()); + + while (!AtEnd() && BlockStack.back() != Loc) { + if (BlockStack.back().contains(Loc)) { + Stream.EnterSubBlock(BlockStack.back().BlockID); + AbbrevNo = 0; + AdvanceStream(); + continue; + } + else + SkipBlock(); + } + + if (AtEnd()) + return false; + + assert (BlockStack.back() == Loc); + + return true; +} + +void Deserializer::Rewind() { + while (!Stream.BlockScope.empty()) + Stream.PopBlockScope(); + + while (!BlockStack.empty()) + BlockStack.pop_back(); + + Stream.JumpToBit(StreamStart); + AbbrevNo = 0; +} + + +unsigned Deserializer::getCurrentBlockID() { + if (!inRecord()) + AdvanceStream(); + + return BlockStack.back().BlockID; +} + +unsigned Deserializer::getRecordCode() { + if (!inRecord()) { + AdvanceStream(); + assert (AbbrevNo >= bitc::UNABBREV_RECORD); + ReadRecord(); + } + + return RecordCode; +} + +bool Deserializer::FinishedBlock(Location BlockLoc) { + if (!inRecord()) + AdvanceStream(); + + for (llvm::SmallVector::reverse_iterator + I=BlockStack.rbegin(), E=BlockStack.rend(); I!=E; ++I) + if (*I == BlockLoc) + return false; + + return true; +} + +unsigned Deserializer::getAbbrevNo() { + if (!inRecord()) + AdvanceStream(); + + return AbbrevNo; +} + +bool Deserializer::AtEnd() { + if (inRecord()) + return false; + + if (!AdvanceStream()) + return true; + + return false; +} + +uint64_t Deserializer::ReadInt() { + // FIXME: Any error recovery/handling with incomplete or bad files? + if (!inRecord()) + ReadRecord(); + + return Record[RecIdx++]; +} + +int64_t Deserializer::ReadSInt() { + uint64_t x = ReadInt(); + int64_t magnitude = x >> 1; + return x & 0x1 ? -magnitude : magnitude; +} + +char* Deserializer::ReadCStr(char* cstr, unsigned MaxLen, bool isNullTerm) { + if (cstr == NULL) + MaxLen = 0; // Zero this just in case someone does something funny. + + unsigned len = ReadInt(); + + assert (MaxLen == 0 || (len + (isNullTerm ? 1 : 0)) <= MaxLen); + + if (!cstr) + cstr = new char[len + (isNullTerm ? 1 : 0)]; + + assert (cstr != NULL); + + for (unsigned i = 0; i < len; ++i) + cstr[i] = (char) ReadInt(); + + if (isNullTerm) + cstr[len] = '\0'; + + return cstr; +} + +void Deserializer::ReadCStr(std::vector& buff, bool isNullTerm, + unsigned Idx) { + + unsigned len = ReadInt(); + + // If Idx is beyond the current before size, reduce Idx to refer to the + // element after the last element. + if (Idx > buff.size()) + Idx = buff.size(); + + buff.reserve(len+Idx); + buff.resize(Idx); + + for (unsigned i = 0; i < len; ++i) + buff.push_back((char) ReadInt()); + + if (isNullTerm) + buff.push_back('\0'); +} + +void Deserializer::RegisterPtr(const SerializedPtrID& PtrId, + const void* Ptr) { + + MapTy::value_type& E = BPatchMap.FindAndConstruct(BPKey(PtrId)); + + assert (!HasFinalPtr(E) && "Pointer already registered."); + +#ifdef DEBUG_BACKPATCH + llvm::cerr << "RegisterPtr: " << PtrId << " => " << Ptr << "\n"; +#endif + + SetPtr(E,Ptr); +} + +void Deserializer::ReadUIntPtr(uintptr_t& PtrRef, + const SerializedPtrID& PtrId, + bool AllowBackpatch) { + if (PtrId == 0) { + PtrRef = 0; + return; + } + + MapTy::value_type& E = BPatchMap.FindAndConstruct(BPKey(PtrId)); + + if (HasFinalPtr(E)) { + PtrRef = GetFinalPtr(E); + +#ifdef DEBUG_BACKPATCH + llvm::cerr << "ReadUintPtr: " << PtrId + << " <-- " << (void*) GetFinalPtr(E) << '\n'; +#endif + } + else { + assert (AllowBackpatch && + "Client forbids backpatching for this pointer."); + +#ifdef DEBUG_BACKPATCH + llvm::cerr << "ReadUintPtr: " << PtrId << " (NO PTR YET)\n"; +#endif + + // Register backpatch. Check the freelist for a BPNode. + BPNode* N; + + if (FreeList) { + N = FreeList; + FreeList = FreeList->Next; + } + else // No available BPNode. Allocate one. + N = (BPNode*) Allocator.Allocate(); + + new (N) BPNode(GetBPNode(E),PtrRef); + SetBPNode(E,N); + } +} + +uintptr_t Deserializer::ReadInternalRefPtr() { + SerializedPtrID PtrId = ReadPtrID(); + + assert (PtrId != 0 && "References cannot refer the NULL address."); + + MapTy::value_type& E = BPatchMap.FindAndConstruct(BPKey(PtrId)); + + assert (HasFinalPtr(E) && + "Cannot backpatch references. Object must be already deserialized."); + + return GetFinalPtr(E); +} + +void Deserializer::BPEntry::SetPtr(BPNode*& FreeList, void* P) { + BPNode* Last = NULL; + + for (BPNode* N = Head; N != NULL; N=N->Next) { + Last = N; + N->PtrRef |= reinterpret_cast(P); + } + + if (Last) { + Last->Next = FreeList; + FreeList = Head; + } + + Ptr = const_cast(P); +} + + +#define INT_READ(TYPE)\ +void SerializeTrait::Read(Deserializer& D, TYPE& X) {\ + X = (TYPE) D.ReadInt(); } + +INT_READ(bool) +INT_READ(unsigned char) +INT_READ(unsigned short) +INT_READ(unsigned int) +INT_READ(unsigned long) + +#define SINT_READ(TYPE)\ +void SerializeTrait::Read(Deserializer& D, TYPE& X) {\ + X = (TYPE) D.ReadSInt(); } + +INT_READ(signed char) +INT_READ(signed short) +INT_READ(signed int) +INT_READ(signed long) diff --git a/lib/Bitcode/Reader/DeserializeAPFloat.cpp b/lib/Bitcode/Reader/DeserializeAPFloat.cpp new file mode 100644 index 000000000000..ee24b681f026 --- /dev/null +++ b/lib/Bitcode/Reader/DeserializeAPFloat.cpp @@ -0,0 +1,24 @@ +//===-- SerializeAPInt.cpp - Serialization for APFloat ---------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements deserialization of APFloat. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/APFloat.h" +#include "llvm/Bitcode/Deserialize.h" + +using namespace llvm; + +APFloat APFloat::ReadVal(Deserializer& D) { + APInt x; + D.Read(x); + return APFloat(x); +} + diff --git a/lib/Bitcode/Reader/DeserializeAPInt.cpp b/lib/Bitcode/Reader/DeserializeAPInt.cpp new file mode 100644 index 000000000000..1b5b2bf1ff14 --- /dev/null +++ b/lib/Bitcode/Reader/DeserializeAPInt.cpp @@ -0,0 +1,33 @@ +//===-- DeserializeAPInt.cpp - Deserialization for APInts ------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements deserialization of APInts. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/APInt.h" +#include "llvm/Bitcode/Deserialize.h" +#include + +using namespace llvm; + +void APInt::Read(Deserializer& D) { + BitWidth = D.ReadInt(); + + if (isSingleWord()) + VAL = D.ReadInt(); + else { + uint32_t NumWords = D.ReadInt(); + assert (NumWords > 1); + pVal = new uint64_t[NumWords]; + assert (pVal && "Allocation in deserialization of APInt failed."); + for (unsigned i = 0; i < NumWords; ++i) + pVal[i] = D.ReadInt(); + } +} diff --git a/lib/Bitcode/Reader/Makefile b/lib/Bitcode/Reader/Makefile new file mode 100644 index 000000000000..59af8d53a73e --- /dev/null +++ b/lib/Bitcode/Reader/Makefile @@ -0,0 +1,15 @@ +##===- lib/Bitcode/Reader/Makefile -------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../.. +LIBRARYNAME = LLVMBitReader +BUILD_ARCHIVE = 1 + +include $(LEVEL)/Makefile.common + diff --git a/lib/Bitcode/Writer/BitWriter.cpp b/lib/Bitcode/Writer/BitWriter.cpp new file mode 100644 index 000000000000..8834964b040c --- /dev/null +++ b/lib/Bitcode/Writer/BitWriter.cpp @@ -0,0 +1,58 @@ +//===-- BitWriter.cpp -----------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm-c/BitWriter.h" +#include "llvm/Bitcode/ReaderWriter.h" +#include + +using namespace llvm; + + +/*===-- Operations on modules ---------------------------------------------===*/ + +int LLVMWriteBitcodeToFile(LLVMModuleRef M, const char *Path) { + std::ofstream OS(Path, std::ios_base::out|std::ios::trunc|std::ios::binary); + + if (!OS.fail()) + WriteBitcodeToFile(unwrap(M), OS); + + if (OS.fail()) + return -1; + + return 0; +} + +#if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR >= 4) +#include + +// FIXME: Control this with configure? Provide some portable abstraction in +// libSystem? As is, the user will just get a linker error if they use this on +// non-GCC. Some C++ stdlibs even have ofstream::ofstream(int fd). +int LLVMWriteBitcodeToFileHandle(LLVMModuleRef M, int FileHandle) { + __gnu_cxx::stdio_filebuf Buffer(FileHandle, std::ios_base::out | + std::ios::trunc | + std::ios::binary); + std::ostream OS(&Buffer); + + if (!OS.fail()) + WriteBitcodeToFile(unwrap(M), OS); + + if (OS.fail()) + return -1; + + return 0; +} + +#else + +int LLVMWriteBitcodeToFileHandle(LLVMModuleRef M, int FileHandle) { + return -1; // Not supported. +} + +#endif diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp new file mode 100644 index 000000000000..bfc029c1f277 --- /dev/null +++ b/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -0,0 +1,1449 @@ +//===--- Bitcode/Writer/BitcodeWriter.cpp - Bitcode Writer ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Bitcode writer implementation. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Bitcode/ReaderWriter.h" +#include "llvm/Bitcode/BitstreamWriter.h" +#include "llvm/Bitcode/LLVMBitCodes.h" +#include "ValueEnumerator.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/InlineAsm.h" +#include "llvm/Instructions.h" +#include "llvm/MDNode.h" +#include "llvm/Module.h" +#include "llvm/TypeSymbolTable.h" +#include "llvm/ValueSymbolTable.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/Streams.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/System/Program.h" +using namespace llvm; + +/// These are manifest constants used by the bitcode writer. They do not need to +/// be kept in sync with the reader, but need to be consistent within this file. +enum { + CurVersion = 0, + + // VALUE_SYMTAB_BLOCK abbrev id's. + VST_ENTRY_8_ABBREV = bitc::FIRST_APPLICATION_ABBREV, + VST_ENTRY_7_ABBREV, + VST_ENTRY_6_ABBREV, + VST_BBENTRY_6_ABBREV, + + // CONSTANTS_BLOCK abbrev id's. + CONSTANTS_SETTYPE_ABBREV = bitc::FIRST_APPLICATION_ABBREV, + CONSTANTS_INTEGER_ABBREV, + CONSTANTS_CE_CAST_Abbrev, + CONSTANTS_NULL_Abbrev, + + // FUNCTION_BLOCK abbrev id's. + FUNCTION_INST_LOAD_ABBREV = bitc::FIRST_APPLICATION_ABBREV, + FUNCTION_INST_BINOP_ABBREV, + FUNCTION_INST_CAST_ABBREV, + FUNCTION_INST_RET_VOID_ABBREV, + FUNCTION_INST_RET_VAL_ABBREV, + FUNCTION_INST_UNREACHABLE_ABBREV +}; + + +static unsigned GetEncodedCastOpcode(unsigned Opcode) { + switch (Opcode) { + default: assert(0 && "Unknown cast instruction!"); + case Instruction::Trunc : return bitc::CAST_TRUNC; + case Instruction::ZExt : return bitc::CAST_ZEXT; + case Instruction::SExt : return bitc::CAST_SEXT; + case Instruction::FPToUI : return bitc::CAST_FPTOUI; + case Instruction::FPToSI : return bitc::CAST_FPTOSI; + case Instruction::UIToFP : return bitc::CAST_UITOFP; + case Instruction::SIToFP : return bitc::CAST_SITOFP; + case Instruction::FPTrunc : return bitc::CAST_FPTRUNC; + case Instruction::FPExt : return bitc::CAST_FPEXT; + case Instruction::PtrToInt: return bitc::CAST_PTRTOINT; + case Instruction::IntToPtr: return bitc::CAST_INTTOPTR; + case Instruction::BitCast : return bitc::CAST_BITCAST; + } +} + +static unsigned GetEncodedBinaryOpcode(unsigned Opcode) { + switch (Opcode) { + default: assert(0 && "Unknown binary instruction!"); + case Instruction::Add: return bitc::BINOP_ADD; + case Instruction::Sub: return bitc::BINOP_SUB; + case Instruction::Mul: return bitc::BINOP_MUL; + case Instruction::UDiv: return bitc::BINOP_UDIV; + case Instruction::FDiv: + case Instruction::SDiv: return bitc::BINOP_SDIV; + case Instruction::URem: return bitc::BINOP_UREM; + case Instruction::FRem: + case Instruction::SRem: return bitc::BINOP_SREM; + case Instruction::Shl: return bitc::BINOP_SHL; + case Instruction::LShr: return bitc::BINOP_LSHR; + case Instruction::AShr: return bitc::BINOP_ASHR; + case Instruction::And: return bitc::BINOP_AND; + case Instruction::Or: return bitc::BINOP_OR; + case Instruction::Xor: return bitc::BINOP_XOR; + } +} + + + +static void WriteStringRecord(unsigned Code, const std::string &Str, + unsigned AbbrevToUse, BitstreamWriter &Stream) { + SmallVector Vals; + + // Code: [strchar x N] + for (unsigned i = 0, e = Str.size(); i != e; ++i) + Vals.push_back(Str[i]); + + // Emit the finished record. + Stream.EmitRecord(Code, Vals, AbbrevToUse); +} + +// Emit information about parameter attributes. +static void WriteAttributeTable(const ValueEnumerator &VE, + BitstreamWriter &Stream) { + const std::vector &Attrs = VE.getAttributes(); + if (Attrs.empty()) return; + + Stream.EnterSubblock(bitc::PARAMATTR_BLOCK_ID, 3); + + SmallVector Record; + for (unsigned i = 0, e = Attrs.size(); i != e; ++i) { + const AttrListPtr &A = Attrs[i]; + for (unsigned i = 0, e = A.getNumSlots(); i != e; ++i) { + const AttributeWithIndex &PAWI = A.getSlot(i); + Record.push_back(PAWI.Index); + + // FIXME: remove in LLVM 3.0 + // Store the alignment in the bitcode as a 16-bit raw value instead of a + // 5-bit log2 encoded value. Shift the bits above the alignment up by + // 11 bits. + uint64_t FauxAttr = PAWI.Attrs & 0xffff; + if (PAWI.Attrs & Attribute::Alignment) + FauxAttr |= (1ull<<16)<<(((PAWI.Attrs & Attribute::Alignment)-1) >> 16); + FauxAttr |= (PAWI.Attrs & (0x3FFull << 21)) << 11; + + Record.push_back(FauxAttr); + } + + Stream.EmitRecord(bitc::PARAMATTR_CODE_ENTRY, Record); + Record.clear(); + } + + Stream.ExitBlock(); +} + +/// WriteTypeTable - Write out the type table for a module. +static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) { + const ValueEnumerator::TypeList &TypeList = VE.getTypes(); + + Stream.EnterSubblock(bitc::TYPE_BLOCK_ID, 4 /*count from # abbrevs */); + SmallVector TypeVals; + + // Abbrev for TYPE_CODE_POINTER. + BitCodeAbbrev *Abbv = new BitCodeAbbrev(); + Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_POINTER)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, + Log2_32_Ceil(VE.getTypes().size()+1))); + Abbv->Add(BitCodeAbbrevOp(0)); // Addrspace = 0 + unsigned PtrAbbrev = Stream.EmitAbbrev(Abbv); + + // Abbrev for TYPE_CODE_FUNCTION. + Abbv = new BitCodeAbbrev(); + Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_FUNCTION)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // isvararg + Abbv->Add(BitCodeAbbrevOp(0)); // FIXME: DEAD value, remove in LLVM 3.0 + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, + Log2_32_Ceil(VE.getTypes().size()+1))); + unsigned FunctionAbbrev = Stream.EmitAbbrev(Abbv); + + // Abbrev for TYPE_CODE_STRUCT. + Abbv = new BitCodeAbbrev(); + Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_STRUCT)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // ispacked + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, + Log2_32_Ceil(VE.getTypes().size()+1))); + unsigned StructAbbrev = Stream.EmitAbbrev(Abbv); + + // Abbrev for TYPE_CODE_ARRAY. + Abbv = new BitCodeAbbrev(); + Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_ARRAY)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // size + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, + Log2_32_Ceil(VE.getTypes().size()+1))); + unsigned ArrayAbbrev = Stream.EmitAbbrev(Abbv); + + // Emit an entry count so the reader can reserve space. + TypeVals.push_back(TypeList.size()); + Stream.EmitRecord(bitc::TYPE_CODE_NUMENTRY, TypeVals); + TypeVals.clear(); + + // Loop over all of the types, emitting each in turn. + for (unsigned i = 0, e = TypeList.size(); i != e; ++i) { + const Type *T = TypeList[i].first; + int AbbrevToUse = 0; + unsigned Code = 0; + + switch (T->getTypeID()) { + default: assert(0 && "Unknown type!"); + case Type::VoidTyID: Code = bitc::TYPE_CODE_VOID; break; + case Type::FloatTyID: Code = bitc::TYPE_CODE_FLOAT; break; + case Type::DoubleTyID: Code = bitc::TYPE_CODE_DOUBLE; break; + case Type::X86_FP80TyID: Code = bitc::TYPE_CODE_X86_FP80; break; + case Type::FP128TyID: Code = bitc::TYPE_CODE_FP128; break; + case Type::PPC_FP128TyID: Code = bitc::TYPE_CODE_PPC_FP128; break; + case Type::LabelTyID: Code = bitc::TYPE_CODE_LABEL; break; + case Type::OpaqueTyID: Code = bitc::TYPE_CODE_OPAQUE; break; + case Type::MetadataTyID: Code = bitc::TYPE_CODE_METADATA; break; + case Type::IntegerTyID: + // INTEGER: [width] + Code = bitc::TYPE_CODE_INTEGER; + TypeVals.push_back(cast(T)->getBitWidth()); + break; + case Type::PointerTyID: { + const PointerType *PTy = cast(T); + // POINTER: [pointee type, address space] + Code = bitc::TYPE_CODE_POINTER; + TypeVals.push_back(VE.getTypeID(PTy->getElementType())); + unsigned AddressSpace = PTy->getAddressSpace(); + TypeVals.push_back(AddressSpace); + if (AddressSpace == 0) AbbrevToUse = PtrAbbrev; + break; + } + case Type::FunctionTyID: { + const FunctionType *FT = cast(T); + // FUNCTION: [isvararg, attrid, retty, paramty x N] + Code = bitc::TYPE_CODE_FUNCTION; + TypeVals.push_back(FT->isVarArg()); + TypeVals.push_back(0); // FIXME: DEAD: remove in llvm 3.0 + TypeVals.push_back(VE.getTypeID(FT->getReturnType())); + for (unsigned i = 0, e = FT->getNumParams(); i != e; ++i) + TypeVals.push_back(VE.getTypeID(FT->getParamType(i))); + AbbrevToUse = FunctionAbbrev; + break; + } + case Type::StructTyID: { + const StructType *ST = cast(T); + // STRUCT: [ispacked, eltty x N] + Code = bitc::TYPE_CODE_STRUCT; + TypeVals.push_back(ST->isPacked()); + // Output all of the element types. + for (StructType::element_iterator I = ST->element_begin(), + E = ST->element_end(); I != E; ++I) + TypeVals.push_back(VE.getTypeID(*I)); + AbbrevToUse = StructAbbrev; + break; + } + case Type::ArrayTyID: { + const ArrayType *AT = cast(T); + // ARRAY: [numelts, eltty] + Code = bitc::TYPE_CODE_ARRAY; + TypeVals.push_back(AT->getNumElements()); + TypeVals.push_back(VE.getTypeID(AT->getElementType())); + AbbrevToUse = ArrayAbbrev; + break; + } + case Type::VectorTyID: { + const VectorType *VT = cast(T); + // VECTOR [numelts, eltty] + Code = bitc::TYPE_CODE_VECTOR; + TypeVals.push_back(VT->getNumElements()); + TypeVals.push_back(VE.getTypeID(VT->getElementType())); + break; + } + } + + // Emit the finished record. + Stream.EmitRecord(Code, TypeVals, AbbrevToUse); + TypeVals.clear(); + } + + Stream.ExitBlock(); +} + +static unsigned getEncodedLinkage(const GlobalValue *GV) { + switch (GV->getLinkage()) { + default: assert(0 && "Invalid linkage!"); + case GlobalValue::GhostLinkage: // Map ghost linkage onto external. + case GlobalValue::ExternalLinkage: return 0; + case GlobalValue::WeakAnyLinkage: return 1; + case GlobalValue::AppendingLinkage: return 2; + case GlobalValue::InternalLinkage: return 3; + case GlobalValue::LinkOnceAnyLinkage: return 4; + case GlobalValue::DLLImportLinkage: return 5; + case GlobalValue::DLLExportLinkage: return 6; + case GlobalValue::ExternalWeakLinkage: return 7; + case GlobalValue::CommonLinkage: return 8; + case GlobalValue::PrivateLinkage: return 9; + case GlobalValue::WeakODRLinkage: return 10; + case GlobalValue::LinkOnceODRLinkage: return 11; + case GlobalValue::AvailableExternallyLinkage: return 12; + } +} + +static unsigned getEncodedVisibility(const GlobalValue *GV) { + switch (GV->getVisibility()) { + default: assert(0 && "Invalid visibility!"); + case GlobalValue::DefaultVisibility: return 0; + case GlobalValue::HiddenVisibility: return 1; + case GlobalValue::ProtectedVisibility: return 2; + } +} + +// Emit top-level description of module, including target triple, inline asm, +// descriptors for global variables, and function prototype info. +static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE, + BitstreamWriter &Stream) { + // Emit the list of dependent libraries for the Module. + for (Module::lib_iterator I = M->lib_begin(), E = M->lib_end(); I != E; ++I) + WriteStringRecord(bitc::MODULE_CODE_DEPLIB, *I, 0/*TODO*/, Stream); + + // Emit various pieces of data attached to a module. + if (!M->getTargetTriple().empty()) + WriteStringRecord(bitc::MODULE_CODE_TRIPLE, M->getTargetTriple(), + 0/*TODO*/, Stream); + if (!M->getDataLayout().empty()) + WriteStringRecord(bitc::MODULE_CODE_DATALAYOUT, M->getDataLayout(), + 0/*TODO*/, Stream); + if (!M->getModuleInlineAsm().empty()) + WriteStringRecord(bitc::MODULE_CODE_ASM, M->getModuleInlineAsm(), + 0/*TODO*/, Stream); + + // Emit information about sections and GC, computing how many there are. Also + // compute the maximum alignment value. + std::map SectionMap; + std::map GCMap; + unsigned MaxAlignment = 0; + unsigned MaxGlobalType = 0; + for (Module::const_global_iterator GV = M->global_begin(),E = M->global_end(); + GV != E; ++GV) { + MaxAlignment = std::max(MaxAlignment, GV->getAlignment()); + MaxGlobalType = std::max(MaxGlobalType, VE.getTypeID(GV->getType())); + + if (!GV->hasSection()) continue; + // Give section names unique ID's. + unsigned &Entry = SectionMap[GV->getSection()]; + if (Entry != 0) continue; + WriteStringRecord(bitc::MODULE_CODE_SECTIONNAME, GV->getSection(), + 0/*TODO*/, Stream); + Entry = SectionMap.size(); + } + for (Module::const_iterator F = M->begin(), E = M->end(); F != E; ++F) { + MaxAlignment = std::max(MaxAlignment, F->getAlignment()); + if (F->hasSection()) { + // Give section names unique ID's. + unsigned &Entry = SectionMap[F->getSection()]; + if (!Entry) { + WriteStringRecord(bitc::MODULE_CODE_SECTIONNAME, F->getSection(), + 0/*TODO*/, Stream); + Entry = SectionMap.size(); + } + } + if (F->hasGC()) { + // Same for GC names. + unsigned &Entry = GCMap[F->getGC()]; + if (!Entry) { + WriteStringRecord(bitc::MODULE_CODE_GCNAME, F->getGC(), + 0/*TODO*/, Stream); + Entry = GCMap.size(); + } + } + } + + // Emit abbrev for globals, now that we know # sections and max alignment. + unsigned SimpleGVarAbbrev = 0; + if (!M->global_empty()) { + // Add an abbrev for common globals with no visibility or thread localness. + BitCodeAbbrev *Abbv = new BitCodeAbbrev(); + Abbv->Add(BitCodeAbbrevOp(bitc::MODULE_CODE_GLOBALVAR)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, + Log2_32_Ceil(MaxGlobalType+1))); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Constant. + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Initializer. + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 4)); // Linkage. + if (MaxAlignment == 0) // Alignment. + Abbv->Add(BitCodeAbbrevOp(0)); + else { + unsigned MaxEncAlignment = Log2_32(MaxAlignment)+1; + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, + Log2_32_Ceil(MaxEncAlignment+1))); + } + if (SectionMap.empty()) // Section. + Abbv->Add(BitCodeAbbrevOp(0)); + else + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, + Log2_32_Ceil(SectionMap.size()+1))); + // Don't bother emitting vis + thread local. + SimpleGVarAbbrev = Stream.EmitAbbrev(Abbv); + } + + // Emit the global variable information. + SmallVector Vals; + for (Module::const_global_iterator GV = M->global_begin(),E = M->global_end(); + GV != E; ++GV) { + unsigned AbbrevToUse = 0; + + // GLOBALVAR: [type, isconst, initid, + // linkage, alignment, section, visibility, threadlocal] + Vals.push_back(VE.getTypeID(GV->getType())); + Vals.push_back(GV->isConstant()); + Vals.push_back(GV->isDeclaration() ? 0 : + (VE.getValueID(GV->getInitializer()) + 1)); + Vals.push_back(getEncodedLinkage(GV)); + Vals.push_back(Log2_32(GV->getAlignment())+1); + Vals.push_back(GV->hasSection() ? SectionMap[GV->getSection()] : 0); + if (GV->isThreadLocal() || + GV->getVisibility() != GlobalValue::DefaultVisibility) { + Vals.push_back(getEncodedVisibility(GV)); + Vals.push_back(GV->isThreadLocal()); + } else { + AbbrevToUse = SimpleGVarAbbrev; + } + + Stream.EmitRecord(bitc::MODULE_CODE_GLOBALVAR, Vals, AbbrevToUse); + Vals.clear(); + } + + // Emit the function proto information. + for (Module::const_iterator F = M->begin(), E = M->end(); F != E; ++F) { + // FUNCTION: [type, callingconv, isproto, paramattr, + // linkage, alignment, section, visibility, gc] + Vals.push_back(VE.getTypeID(F->getType())); + Vals.push_back(F->getCallingConv()); + Vals.push_back(F->isDeclaration()); + Vals.push_back(getEncodedLinkage(F)); + Vals.push_back(VE.getAttributeID(F->getAttributes())); + Vals.push_back(Log2_32(F->getAlignment())+1); + Vals.push_back(F->hasSection() ? SectionMap[F->getSection()] : 0); + Vals.push_back(getEncodedVisibility(F)); + Vals.push_back(F->hasGC() ? GCMap[F->getGC()] : 0); + + unsigned AbbrevToUse = 0; + Stream.EmitRecord(bitc::MODULE_CODE_FUNCTION, Vals, AbbrevToUse); + Vals.clear(); + } + + + // Emit the alias information. + for (Module::const_alias_iterator AI = M->alias_begin(), E = M->alias_end(); + AI != E; ++AI) { + Vals.push_back(VE.getTypeID(AI->getType())); + Vals.push_back(VE.getValueID(AI->getAliasee())); + Vals.push_back(getEncodedLinkage(AI)); + Vals.push_back(getEncodedVisibility(AI)); + unsigned AbbrevToUse = 0; + Stream.EmitRecord(bitc::MODULE_CODE_ALIAS, Vals, AbbrevToUse); + Vals.clear(); + } +} + + +static void WriteConstants(unsigned FirstVal, unsigned LastVal, + const ValueEnumerator &VE, + BitstreamWriter &Stream, bool isGlobal) { + if (FirstVal == LastVal) return; + + Stream.EnterSubblock(bitc::CONSTANTS_BLOCK_ID, 4); + + unsigned AggregateAbbrev = 0; + unsigned String8Abbrev = 0; + unsigned CString7Abbrev = 0; + unsigned CString6Abbrev = 0; + unsigned MDString8Abbrev = 0; + unsigned MDString6Abbrev = 0; + // If this is a constant pool for the module, emit module-specific abbrevs. + if (isGlobal) { + // Abbrev for CST_CODE_AGGREGATE. + BitCodeAbbrev *Abbv = new BitCodeAbbrev(); + Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_AGGREGATE)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, Log2_32_Ceil(LastVal+1))); + AggregateAbbrev = Stream.EmitAbbrev(Abbv); + + // Abbrev for CST_CODE_STRING. + Abbv = new BitCodeAbbrev(); + Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_STRING)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8)); + String8Abbrev = Stream.EmitAbbrev(Abbv); + // Abbrev for CST_CODE_CSTRING. + Abbv = new BitCodeAbbrev(); + Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_CSTRING)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7)); + CString7Abbrev = Stream.EmitAbbrev(Abbv); + // Abbrev for CST_CODE_CSTRING. + Abbv = new BitCodeAbbrev(); + Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_CSTRING)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6)); + CString6Abbrev = Stream.EmitAbbrev(Abbv); + + // Abbrev for CST_CODE_MDSTRING. + Abbv = new BitCodeAbbrev(); + Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_MDSTRING)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8)); + MDString8Abbrev = Stream.EmitAbbrev(Abbv); + // Abbrev for CST_CODE_MDSTRING. + Abbv = new BitCodeAbbrev(); + Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_MDSTRING)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6)); + MDString6Abbrev = Stream.EmitAbbrev(Abbv); + } + + SmallVector Record; + + const ValueEnumerator::ValueList &Vals = VE.getValues(); + const Type *LastTy = 0; + for (unsigned i = FirstVal; i != LastVal; ++i) { + const Value *V = Vals[i].first; + // If we need to switch types, do so now. + if (V->getType() != LastTy) { + LastTy = V->getType(); + Record.push_back(VE.getTypeID(LastTy)); + Stream.EmitRecord(bitc::CST_CODE_SETTYPE, Record, + CONSTANTS_SETTYPE_ABBREV); + Record.clear(); + } + + if (const InlineAsm *IA = dyn_cast(V)) { + Record.push_back(unsigned(IA->hasSideEffects())); + + // Add the asm string. + const std::string &AsmStr = IA->getAsmString(); + Record.push_back(AsmStr.size()); + for (unsigned i = 0, e = AsmStr.size(); i != e; ++i) + Record.push_back(AsmStr[i]); + + // Add the constraint string. + const std::string &ConstraintStr = IA->getConstraintString(); + Record.push_back(ConstraintStr.size()); + for (unsigned i = 0, e = ConstraintStr.size(); i != e; ++i) + Record.push_back(ConstraintStr[i]); + Stream.EmitRecord(bitc::CST_CODE_INLINEASM, Record); + Record.clear(); + continue; + } + const Constant *C = cast(V); + unsigned Code = -1U; + unsigned AbbrevToUse = 0; + if (C->isNullValue()) { + Code = bitc::CST_CODE_NULL; + } else if (isa(C)) { + Code = bitc::CST_CODE_UNDEF; + } else if (const ConstantInt *IV = dyn_cast(C)) { + if (IV->getBitWidth() <= 64) { + int64_t V = IV->getSExtValue(); + if (V >= 0) + Record.push_back(V << 1); + else + Record.push_back((-V << 1) | 1); + Code = bitc::CST_CODE_INTEGER; + AbbrevToUse = CONSTANTS_INTEGER_ABBREV; + } else { // Wide integers, > 64 bits in size. + // We have an arbitrary precision integer value to write whose + // bit width is > 64. However, in canonical unsigned integer + // format it is likely that the high bits are going to be zero. + // So, we only write the number of active words. + unsigned NWords = IV->getValue().getActiveWords(); + const uint64_t *RawWords = IV->getValue().getRawData(); + for (unsigned i = 0; i != NWords; ++i) { + int64_t V = RawWords[i]; + if (V >= 0) + Record.push_back(V << 1); + else + Record.push_back((-V << 1) | 1); + } + Code = bitc::CST_CODE_WIDE_INTEGER; + } + } else if (const ConstantFP *CFP = dyn_cast(C)) { + Code = bitc::CST_CODE_FLOAT; + const Type *Ty = CFP->getType(); + if (Ty == Type::FloatTy || Ty == Type::DoubleTy) { + Record.push_back(CFP->getValueAPF().bitcastToAPInt().getZExtValue()); + } else if (Ty == Type::X86_FP80Ty) { + // api needed to prevent premature destruction + // bits are not in the same order as a normal i80 APInt, compensate. + APInt api = CFP->getValueAPF().bitcastToAPInt(); + const uint64_t *p = api.getRawData(); + Record.push_back((p[1] << 48) | (p[0] >> 16)); + Record.push_back(p[0] & 0xffffLL); + } else if (Ty == Type::FP128Ty || Ty == Type::PPC_FP128Ty) { + APInt api = CFP->getValueAPF().bitcastToAPInt(); + const uint64_t *p = api.getRawData(); + Record.push_back(p[0]); + Record.push_back(p[1]); + } else { + assert (0 && "Unknown FP type!"); + } + } else if (isa(C) && cast(C)->isString()) { + // Emit constant strings specially. + unsigned NumOps = C->getNumOperands(); + // If this is a null-terminated string, use the denser CSTRING encoding. + if (C->getOperand(NumOps-1)->isNullValue()) { + Code = bitc::CST_CODE_CSTRING; + --NumOps; // Don't encode the null, which isn't allowed by char6. + } else { + Code = bitc::CST_CODE_STRING; + AbbrevToUse = String8Abbrev; + } + bool isCStr7 = Code == bitc::CST_CODE_CSTRING; + bool isCStrChar6 = Code == bitc::CST_CODE_CSTRING; + for (unsigned i = 0; i != NumOps; ++i) { + unsigned char V = cast(C->getOperand(i))->getZExtValue(); + Record.push_back(V); + isCStr7 &= (V & 128) == 0; + if (isCStrChar6) + isCStrChar6 = BitCodeAbbrevOp::isChar6(V); + } + + if (isCStrChar6) + AbbrevToUse = CString6Abbrev; + else if (isCStr7) + AbbrevToUse = CString7Abbrev; + } else if (isa(C) || isa(V) || + isa(V)) { + Code = bitc::CST_CODE_AGGREGATE; + for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) + Record.push_back(VE.getValueID(C->getOperand(i))); + AbbrevToUse = AggregateAbbrev; + } else if (const ConstantExpr *CE = dyn_cast(C)) { + switch (CE->getOpcode()) { + default: + if (Instruction::isCast(CE->getOpcode())) { + Code = bitc::CST_CODE_CE_CAST; + Record.push_back(GetEncodedCastOpcode(CE->getOpcode())); + Record.push_back(VE.getTypeID(C->getOperand(0)->getType())); + Record.push_back(VE.getValueID(C->getOperand(0))); + AbbrevToUse = CONSTANTS_CE_CAST_Abbrev; + } else { + assert(CE->getNumOperands() == 2 && "Unknown constant expr!"); + Code = bitc::CST_CODE_CE_BINOP; + Record.push_back(GetEncodedBinaryOpcode(CE->getOpcode())); + Record.push_back(VE.getValueID(C->getOperand(0))); + Record.push_back(VE.getValueID(C->getOperand(1))); + } + break; + case Instruction::GetElementPtr: + Code = bitc::CST_CODE_CE_GEP; + for (unsigned i = 0, e = CE->getNumOperands(); i != e; ++i) { + Record.push_back(VE.getTypeID(C->getOperand(i)->getType())); + Record.push_back(VE.getValueID(C->getOperand(i))); + } + break; + case Instruction::Select: + Code = bitc::CST_CODE_CE_SELECT; + Record.push_back(VE.getValueID(C->getOperand(0))); + Record.push_back(VE.getValueID(C->getOperand(1))); + Record.push_back(VE.getValueID(C->getOperand(2))); + break; + case Instruction::ExtractElement: + Code = bitc::CST_CODE_CE_EXTRACTELT; + Record.push_back(VE.getTypeID(C->getOperand(0)->getType())); + Record.push_back(VE.getValueID(C->getOperand(0))); + Record.push_back(VE.getValueID(C->getOperand(1))); + break; + case Instruction::InsertElement: + Code = bitc::CST_CODE_CE_INSERTELT; + Record.push_back(VE.getValueID(C->getOperand(0))); + Record.push_back(VE.getValueID(C->getOperand(1))); + Record.push_back(VE.getValueID(C->getOperand(2))); + break; + case Instruction::ShuffleVector: + // If the return type and argument types are the same, this is a + // standard shufflevector instruction. If the types are different, + // then the shuffle is widening or truncating the input vectors, and + // the argument type must also be encoded. + if (C->getType() == C->getOperand(0)->getType()) { + Code = bitc::CST_CODE_CE_SHUFFLEVEC; + } else { + Code = bitc::CST_CODE_CE_SHUFVEC_EX; + Record.push_back(VE.getTypeID(C->getOperand(0)->getType())); + } + Record.push_back(VE.getValueID(C->getOperand(0))); + Record.push_back(VE.getValueID(C->getOperand(1))); + Record.push_back(VE.getValueID(C->getOperand(2))); + break; + case Instruction::ICmp: + case Instruction::FCmp: + case Instruction::VICmp: + case Instruction::VFCmp: + if (isa(C->getOperand(0)->getType()) + && (CE->getOpcode() == Instruction::ICmp + || CE->getOpcode() == Instruction::FCmp)) { + // compare returning vector of Int1Ty + assert(0 && "Unsupported constant!"); + } else { + Code = bitc::CST_CODE_CE_CMP; + } + Record.push_back(VE.getTypeID(C->getOperand(0)->getType())); + Record.push_back(VE.getValueID(C->getOperand(0))); + Record.push_back(VE.getValueID(C->getOperand(1))); + Record.push_back(CE->getPredicate()); + break; + } + } else if (const MDString *S = dyn_cast(C)) { + Code = bitc::CST_CODE_MDSTRING; + AbbrevToUse = MDString6Abbrev; + for (unsigned i = 0, e = S->size(); i != e; ++i) { + char V = S->begin()[i]; + Record.push_back(V); + + if (!BitCodeAbbrevOp::isChar6(V)) + AbbrevToUse = MDString8Abbrev; + } + } else if (const MDNode *N = dyn_cast(C)) { + Code = bitc::CST_CODE_MDNODE; + for (unsigned i = 0, e = N->getNumElements(); i != e; ++i) { + if (N->getElement(i)) { + Record.push_back(VE.getTypeID(N->getElement(i)->getType())); + Record.push_back(VE.getValueID(N->getElement(i))); + } else { + Record.push_back(VE.getTypeID(Type::VoidTy)); + Record.push_back(0); + } + } + } else { + assert(0 && "Unknown constant!"); + } + Stream.EmitRecord(Code, Record, AbbrevToUse); + Record.clear(); + } + + Stream.ExitBlock(); +} + +static void WriteModuleConstants(const ValueEnumerator &VE, + BitstreamWriter &Stream) { + const ValueEnumerator::ValueList &Vals = VE.getValues(); + + // Find the first constant to emit, which is the first non-globalvalue value. + // We know globalvalues have been emitted by WriteModuleInfo. + for (unsigned i = 0, e = Vals.size(); i != e; ++i) { + if (!isa(Vals[i].first)) { + WriteConstants(i, Vals.size(), VE, Stream, true); + return; + } + } +} + +/// PushValueAndType - The file has to encode both the value and type id for +/// many values, because we need to know what type to create for forward +/// references. However, most operands are not forward references, so this type +/// field is not needed. +/// +/// This function adds V's value ID to Vals. If the value ID is higher than the +/// instruction ID, then it is a forward reference, and it also includes the +/// type ID. +static bool PushValueAndType(const Value *V, unsigned InstID, + SmallVector &Vals, + ValueEnumerator &VE) { + unsigned ValID = VE.getValueID(V); + Vals.push_back(ValID); + if (ValID >= InstID) { + Vals.push_back(VE.getTypeID(V->getType())); + return true; + } + return false; +} + +/// WriteInstruction - Emit an instruction to the specified stream. +static void WriteInstruction(const Instruction &I, unsigned InstID, + ValueEnumerator &VE, BitstreamWriter &Stream, + SmallVector &Vals) { + unsigned Code = 0; + unsigned AbbrevToUse = 0; + switch (I.getOpcode()) { + default: + if (Instruction::isCast(I.getOpcode())) { + Code = bitc::FUNC_CODE_INST_CAST; + if (!PushValueAndType(I.getOperand(0), InstID, Vals, VE)) + AbbrevToUse = FUNCTION_INST_CAST_ABBREV; + Vals.push_back(VE.getTypeID(I.getType())); + Vals.push_back(GetEncodedCastOpcode(I.getOpcode())); + } else { + assert(isa(I) && "Unknown instruction!"); + Code = bitc::FUNC_CODE_INST_BINOP; + if (!PushValueAndType(I.getOperand(0), InstID, Vals, VE)) + AbbrevToUse = FUNCTION_INST_BINOP_ABBREV; + Vals.push_back(VE.getValueID(I.getOperand(1))); + Vals.push_back(GetEncodedBinaryOpcode(I.getOpcode())); + } + break; + + case Instruction::GetElementPtr: + Code = bitc::FUNC_CODE_INST_GEP; + for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) + PushValueAndType(I.getOperand(i), InstID, Vals, VE); + break; + case Instruction::ExtractValue: { + Code = bitc::FUNC_CODE_INST_EXTRACTVAL; + PushValueAndType(I.getOperand(0), InstID, Vals, VE); + const ExtractValueInst *EVI = cast(&I); + for (const unsigned *i = EVI->idx_begin(), *e = EVI->idx_end(); i != e; ++i) + Vals.push_back(*i); + break; + } + case Instruction::InsertValue: { + Code = bitc::FUNC_CODE_INST_INSERTVAL; + PushValueAndType(I.getOperand(0), InstID, Vals, VE); + PushValueAndType(I.getOperand(1), InstID, Vals, VE); + const InsertValueInst *IVI = cast(&I); + for (const unsigned *i = IVI->idx_begin(), *e = IVI->idx_end(); i != e; ++i) + Vals.push_back(*i); + break; + } + case Instruction::Select: + Code = bitc::FUNC_CODE_INST_VSELECT; + PushValueAndType(I.getOperand(1), InstID, Vals, VE); + Vals.push_back(VE.getValueID(I.getOperand(2))); + PushValueAndType(I.getOperand(0), InstID, Vals, VE); + break; + case Instruction::ExtractElement: + Code = bitc::FUNC_CODE_INST_EXTRACTELT; + PushValueAndType(I.getOperand(0), InstID, Vals, VE); + Vals.push_back(VE.getValueID(I.getOperand(1))); + break; + case Instruction::InsertElement: + Code = bitc::FUNC_CODE_INST_INSERTELT; + PushValueAndType(I.getOperand(0), InstID, Vals, VE); + Vals.push_back(VE.getValueID(I.getOperand(1))); + Vals.push_back(VE.getValueID(I.getOperand(2))); + break; + case Instruction::ShuffleVector: + Code = bitc::FUNC_CODE_INST_SHUFFLEVEC; + PushValueAndType(I.getOperand(0), InstID, Vals, VE); + Vals.push_back(VE.getValueID(I.getOperand(1))); + Vals.push_back(VE.getValueID(I.getOperand(2))); + break; + case Instruction::ICmp: + case Instruction::FCmp: + case Instruction::VICmp: + case Instruction::VFCmp: + if (I.getOpcode() == Instruction::ICmp + || I.getOpcode() == Instruction::FCmp) { + // compare returning Int1Ty or vector of Int1Ty + Code = bitc::FUNC_CODE_INST_CMP2; + } else { + Code = bitc::FUNC_CODE_INST_CMP; + } + PushValueAndType(I.getOperand(0), InstID, Vals, VE); + Vals.push_back(VE.getValueID(I.getOperand(1))); + Vals.push_back(cast(I).getPredicate()); + break; + + case Instruction::Ret: + { + Code = bitc::FUNC_CODE_INST_RET; + unsigned NumOperands = I.getNumOperands(); + if (NumOperands == 0) + AbbrevToUse = FUNCTION_INST_RET_VOID_ABBREV; + else if (NumOperands == 1) { + if (!PushValueAndType(I.getOperand(0), InstID, Vals, VE)) + AbbrevToUse = FUNCTION_INST_RET_VAL_ABBREV; + } else { + for (unsigned i = 0, e = NumOperands; i != e; ++i) + PushValueAndType(I.getOperand(i), InstID, Vals, VE); + } + } + break; + case Instruction::Br: + { + Code = bitc::FUNC_CODE_INST_BR; + BranchInst &II(cast(I)); + Vals.push_back(VE.getValueID(II.getSuccessor(0))); + if (II.isConditional()) { + Vals.push_back(VE.getValueID(II.getSuccessor(1))); + Vals.push_back(VE.getValueID(II.getCondition())); + } + } + break; + case Instruction::Switch: + Code = bitc::FUNC_CODE_INST_SWITCH; + Vals.push_back(VE.getTypeID(I.getOperand(0)->getType())); + for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) + Vals.push_back(VE.getValueID(I.getOperand(i))); + break; + case Instruction::Invoke: { + const InvokeInst *II = cast(&I); + const Value *Callee(II->getCalledValue()); + const PointerType *PTy = cast(Callee->getType()); + const FunctionType *FTy = cast(PTy->getElementType()); + Code = bitc::FUNC_CODE_INST_INVOKE; + + Vals.push_back(VE.getAttributeID(II->getAttributes())); + Vals.push_back(II->getCallingConv()); + Vals.push_back(VE.getValueID(II->getNormalDest())); + Vals.push_back(VE.getValueID(II->getUnwindDest())); + PushValueAndType(Callee, InstID, Vals, VE); + + // Emit value #'s for the fixed parameters. + for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) + Vals.push_back(VE.getValueID(I.getOperand(i+3))); // fixed param. + + // Emit type/value pairs for varargs params. + if (FTy->isVarArg()) { + for (unsigned i = 3+FTy->getNumParams(), e = I.getNumOperands(); + i != e; ++i) + PushValueAndType(I.getOperand(i), InstID, Vals, VE); // vararg + } + break; + } + case Instruction::Unwind: + Code = bitc::FUNC_CODE_INST_UNWIND; + break; + case Instruction::Unreachable: + Code = bitc::FUNC_CODE_INST_UNREACHABLE; + AbbrevToUse = FUNCTION_INST_UNREACHABLE_ABBREV; + break; + + case Instruction::PHI: + Code = bitc::FUNC_CODE_INST_PHI; + Vals.push_back(VE.getTypeID(I.getType())); + for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) + Vals.push_back(VE.getValueID(I.getOperand(i))); + break; + + case Instruction::Malloc: + Code = bitc::FUNC_CODE_INST_MALLOC; + Vals.push_back(VE.getTypeID(I.getType())); + Vals.push_back(VE.getValueID(I.getOperand(0))); // size. + Vals.push_back(Log2_32(cast(I).getAlignment())+1); + break; + + case Instruction::Free: + Code = bitc::FUNC_CODE_INST_FREE; + PushValueAndType(I.getOperand(0), InstID, Vals, VE); + break; + + case Instruction::Alloca: + Code = bitc::FUNC_CODE_INST_ALLOCA; + Vals.push_back(VE.getTypeID(I.getType())); + Vals.push_back(VE.getValueID(I.getOperand(0))); // size. + Vals.push_back(Log2_32(cast(I).getAlignment())+1); + break; + + case Instruction::Load: + Code = bitc::FUNC_CODE_INST_LOAD; + if (!PushValueAndType(I.getOperand(0), InstID, Vals, VE)) // ptr + AbbrevToUse = FUNCTION_INST_LOAD_ABBREV; + + Vals.push_back(Log2_32(cast(I).getAlignment())+1); + Vals.push_back(cast(I).isVolatile()); + break; + case Instruction::Store: + Code = bitc::FUNC_CODE_INST_STORE2; + PushValueAndType(I.getOperand(1), InstID, Vals, VE); // ptrty + ptr + Vals.push_back(VE.getValueID(I.getOperand(0))); // val. + Vals.push_back(Log2_32(cast(I).getAlignment())+1); + Vals.push_back(cast(I).isVolatile()); + break; + case Instruction::Call: { + const PointerType *PTy = cast(I.getOperand(0)->getType()); + const FunctionType *FTy = cast(PTy->getElementType()); + + Code = bitc::FUNC_CODE_INST_CALL; + + const CallInst *CI = cast(&I); + Vals.push_back(VE.getAttributeID(CI->getAttributes())); + Vals.push_back((CI->getCallingConv() << 1) | unsigned(CI->isTailCall())); + PushValueAndType(CI->getOperand(0), InstID, Vals, VE); // Callee + + // Emit value #'s for the fixed parameters. + for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) + Vals.push_back(VE.getValueID(I.getOperand(i+1))); // fixed param. + + // Emit type/value pairs for varargs params. + if (FTy->isVarArg()) { + unsigned NumVarargs = I.getNumOperands()-1-FTy->getNumParams(); + for (unsigned i = I.getNumOperands()-NumVarargs, e = I.getNumOperands(); + i != e; ++i) + PushValueAndType(I.getOperand(i), InstID, Vals, VE); // varargs + } + break; + } + case Instruction::VAArg: + Code = bitc::FUNC_CODE_INST_VAARG; + Vals.push_back(VE.getTypeID(I.getOperand(0)->getType())); // valistty + Vals.push_back(VE.getValueID(I.getOperand(0))); // valist. + Vals.push_back(VE.getTypeID(I.getType())); // restype. + break; + } + + Stream.EmitRecord(Code, Vals, AbbrevToUse); + Vals.clear(); +} + +// Emit names for globals/functions etc. +static void WriteValueSymbolTable(const ValueSymbolTable &VST, + const ValueEnumerator &VE, + BitstreamWriter &Stream) { + if (VST.empty()) return; + Stream.EnterSubblock(bitc::VALUE_SYMTAB_BLOCK_ID, 4); + + // FIXME: Set up the abbrev, we know how many values there are! + // FIXME: We know if the type names can use 7-bit ascii. + SmallVector NameVals; + + for (ValueSymbolTable::const_iterator SI = VST.begin(), SE = VST.end(); + SI != SE; ++SI) { + + const ValueName &Name = *SI; + + // Figure out the encoding to use for the name. + bool is7Bit = true; + bool isChar6 = true; + for (const char *C = Name.getKeyData(), *E = C+Name.getKeyLength(); + C != E; ++C) { + if (isChar6) + isChar6 = BitCodeAbbrevOp::isChar6(*C); + if ((unsigned char)*C & 128) { + is7Bit = false; + break; // don't bother scanning the rest. + } + } + + unsigned AbbrevToUse = VST_ENTRY_8_ABBREV; + + // VST_ENTRY: [valueid, namechar x N] + // VST_BBENTRY: [bbid, namechar x N] + unsigned Code; + if (isa(SI->getValue())) { + Code = bitc::VST_CODE_BBENTRY; + if (isChar6) + AbbrevToUse = VST_BBENTRY_6_ABBREV; + } else { + Code = bitc::VST_CODE_ENTRY; + if (isChar6) + AbbrevToUse = VST_ENTRY_6_ABBREV; + else if (is7Bit) + AbbrevToUse = VST_ENTRY_7_ABBREV; + } + + NameVals.push_back(VE.getValueID(SI->getValue())); + for (const char *P = Name.getKeyData(), + *E = Name.getKeyData()+Name.getKeyLength(); P != E; ++P) + NameVals.push_back((unsigned char)*P); + + // Emit the finished record. + Stream.EmitRecord(Code, NameVals, AbbrevToUse); + NameVals.clear(); + } + Stream.ExitBlock(); +} + +/// WriteFunction - Emit a function body to the module stream. +static void WriteFunction(const Function &F, ValueEnumerator &VE, + BitstreamWriter &Stream) { + Stream.EnterSubblock(bitc::FUNCTION_BLOCK_ID, 4); + VE.incorporateFunction(F); + + SmallVector Vals; + + // Emit the number of basic blocks, so the reader can create them ahead of + // time. + Vals.push_back(VE.getBasicBlocks().size()); + Stream.EmitRecord(bitc::FUNC_CODE_DECLAREBLOCKS, Vals); + Vals.clear(); + + // If there are function-local constants, emit them now. + unsigned CstStart, CstEnd; + VE.getFunctionConstantRange(CstStart, CstEnd); + WriteConstants(CstStart, CstEnd, VE, Stream, false); + + // Keep a running idea of what the instruction ID is. + unsigned InstID = CstEnd; + + // Finally, emit all the instructions, in order. + for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) + for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); + I != E; ++I) { + WriteInstruction(*I, InstID, VE, Stream, Vals); + if (I->getType() != Type::VoidTy) + ++InstID; + } + + // Emit names for all the instructions etc. + WriteValueSymbolTable(F.getValueSymbolTable(), VE, Stream); + + VE.purgeFunction(); + Stream.ExitBlock(); +} + +/// WriteTypeSymbolTable - Emit a block for the specified type symtab. +static void WriteTypeSymbolTable(const TypeSymbolTable &TST, + const ValueEnumerator &VE, + BitstreamWriter &Stream) { + if (TST.empty()) return; + + Stream.EnterSubblock(bitc::TYPE_SYMTAB_BLOCK_ID, 3); + + // 7-bit fixed width VST_CODE_ENTRY strings. + BitCodeAbbrev *Abbv = new BitCodeAbbrev(); + Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_ENTRY)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, + Log2_32_Ceil(VE.getTypes().size()+1))); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7)); + unsigned V7Abbrev = Stream.EmitAbbrev(Abbv); + + SmallVector NameVals; + + for (TypeSymbolTable::const_iterator TI = TST.begin(), TE = TST.end(); + TI != TE; ++TI) { + // TST_ENTRY: [typeid, namechar x N] + NameVals.push_back(VE.getTypeID(TI->second)); + + const std::string &Str = TI->first; + bool is7Bit = true; + for (unsigned i = 0, e = Str.size(); i != e; ++i) { + NameVals.push_back((unsigned char)Str[i]); + if (Str[i] & 128) + is7Bit = false; + } + + // Emit the finished record. + Stream.EmitRecord(bitc::VST_CODE_ENTRY, NameVals, is7Bit ? V7Abbrev : 0); + NameVals.clear(); + } + + Stream.ExitBlock(); +} + +// Emit blockinfo, which defines the standard abbreviations etc. +static void WriteBlockInfo(const ValueEnumerator &VE, BitstreamWriter &Stream) { + // We only want to emit block info records for blocks that have multiple + // instances: CONSTANTS_BLOCK, FUNCTION_BLOCK and VALUE_SYMTAB_BLOCK. Other + // blocks can defined their abbrevs inline. + Stream.EnterBlockInfoBlock(2); + + { // 8-bit fixed-width VST_ENTRY/VST_BBENTRY strings. + BitCodeAbbrev *Abbv = new BitCodeAbbrev(); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 3)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8)); + if (Stream.EmitBlockInfoAbbrev(bitc::VALUE_SYMTAB_BLOCK_ID, + Abbv) != VST_ENTRY_8_ABBREV) + assert(0 && "Unexpected abbrev ordering!"); + } + + { // 7-bit fixed width VST_ENTRY strings. + BitCodeAbbrev *Abbv = new BitCodeAbbrev(); + Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_ENTRY)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7)); + if (Stream.EmitBlockInfoAbbrev(bitc::VALUE_SYMTAB_BLOCK_ID, + Abbv) != VST_ENTRY_7_ABBREV) + assert(0 && "Unexpected abbrev ordering!"); + } + { // 6-bit char6 VST_ENTRY strings. + BitCodeAbbrev *Abbv = new BitCodeAbbrev(); + Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_ENTRY)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6)); + if (Stream.EmitBlockInfoAbbrev(bitc::VALUE_SYMTAB_BLOCK_ID, + Abbv) != VST_ENTRY_6_ABBREV) + assert(0 && "Unexpected abbrev ordering!"); + } + { // 6-bit char6 VST_BBENTRY strings. + BitCodeAbbrev *Abbv = new BitCodeAbbrev(); + Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_BBENTRY)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6)); + if (Stream.EmitBlockInfoAbbrev(bitc::VALUE_SYMTAB_BLOCK_ID, + Abbv) != VST_BBENTRY_6_ABBREV) + assert(0 && "Unexpected abbrev ordering!"); + } + + + + { // SETTYPE abbrev for CONSTANTS_BLOCK. + BitCodeAbbrev *Abbv = new BitCodeAbbrev(); + Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_SETTYPE)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, + Log2_32_Ceil(VE.getTypes().size()+1))); + if (Stream.EmitBlockInfoAbbrev(bitc::CONSTANTS_BLOCK_ID, + Abbv) != CONSTANTS_SETTYPE_ABBREV) + assert(0 && "Unexpected abbrev ordering!"); + } + + { // INTEGER abbrev for CONSTANTS_BLOCK. + BitCodeAbbrev *Abbv = new BitCodeAbbrev(); + Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_INTEGER)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); + if (Stream.EmitBlockInfoAbbrev(bitc::CONSTANTS_BLOCK_ID, + Abbv) != CONSTANTS_INTEGER_ABBREV) + assert(0 && "Unexpected abbrev ordering!"); + } + + { // CE_CAST abbrev for CONSTANTS_BLOCK. + BitCodeAbbrev *Abbv = new BitCodeAbbrev(); + Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_CE_CAST)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 4)); // cast opc + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, // typeid + Log2_32_Ceil(VE.getTypes().size()+1))); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // value id + + if (Stream.EmitBlockInfoAbbrev(bitc::CONSTANTS_BLOCK_ID, + Abbv) != CONSTANTS_CE_CAST_Abbrev) + assert(0 && "Unexpected abbrev ordering!"); + } + { // NULL abbrev for CONSTANTS_BLOCK. + BitCodeAbbrev *Abbv = new BitCodeAbbrev(); + Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_NULL)); + if (Stream.EmitBlockInfoAbbrev(bitc::CONSTANTS_BLOCK_ID, + Abbv) != CONSTANTS_NULL_Abbrev) + assert(0 && "Unexpected abbrev ordering!"); + } + + // FIXME: This should only use space for first class types! + + { // INST_LOAD abbrev for FUNCTION_BLOCK. + BitCodeAbbrev *Abbv = new BitCodeAbbrev(); + Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_LOAD)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Ptr + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // Align + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // volatile + if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID, + Abbv) != FUNCTION_INST_LOAD_ABBREV) + assert(0 && "Unexpected abbrev ordering!"); + } + { // INST_BINOP abbrev for FUNCTION_BLOCK. + BitCodeAbbrev *Abbv = new BitCodeAbbrev(); + Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_BINOP)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // LHS + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // RHS + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 4)); // opc + if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID, + Abbv) != FUNCTION_INST_BINOP_ABBREV) + assert(0 && "Unexpected abbrev ordering!"); + } + { // INST_CAST abbrev for FUNCTION_BLOCK. + BitCodeAbbrev *Abbv = new BitCodeAbbrev(); + Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_CAST)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // OpVal + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, // dest ty + Log2_32_Ceil(VE.getTypes().size()+1))); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 4)); // opc + if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID, + Abbv) != FUNCTION_INST_CAST_ABBREV) + assert(0 && "Unexpected abbrev ordering!"); + } + + { // INST_RET abbrev for FUNCTION_BLOCK. + BitCodeAbbrev *Abbv = new BitCodeAbbrev(); + Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_RET)); + if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID, + Abbv) != FUNCTION_INST_RET_VOID_ABBREV) + assert(0 && "Unexpected abbrev ordering!"); + } + { // INST_RET abbrev for FUNCTION_BLOCK. + BitCodeAbbrev *Abbv = new BitCodeAbbrev(); + Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_RET)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // ValID + if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID, + Abbv) != FUNCTION_INST_RET_VAL_ABBREV) + assert(0 && "Unexpected abbrev ordering!"); + } + { // INST_UNREACHABLE abbrev for FUNCTION_BLOCK. + BitCodeAbbrev *Abbv = new BitCodeAbbrev(); + Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_UNREACHABLE)); + if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID, + Abbv) != FUNCTION_INST_UNREACHABLE_ABBREV) + assert(0 && "Unexpected abbrev ordering!"); + } + + Stream.ExitBlock(); +} + + +/// WriteModule - Emit the specified module to the bitstream. +static void WriteModule(const Module *M, BitstreamWriter &Stream) { + Stream.EnterSubblock(bitc::MODULE_BLOCK_ID, 3); + + // Emit the version number if it is non-zero. + if (CurVersion) { + SmallVector Vals; + Vals.push_back(CurVersion); + Stream.EmitRecord(bitc::MODULE_CODE_VERSION, Vals); + } + + // Analyze the module, enumerating globals, functions, etc. + ValueEnumerator VE(M); + + // Emit blockinfo, which defines the standard abbreviations etc. + WriteBlockInfo(VE, Stream); + + // Emit information about parameter attributes. + WriteAttributeTable(VE, Stream); + + // Emit information describing all of the types in the module. + WriteTypeTable(VE, Stream); + + // Emit top-level description of module, including target triple, inline asm, + // descriptors for global variables, and function prototype info. + WriteModuleInfo(M, VE, Stream); + + // Emit constants. + WriteModuleConstants(VE, Stream); + + // If we have any aggregate values in the value table, purge them - these can + // only be used to initialize global variables. Doing so makes the value + // namespace smaller for code in functions. + int NumNonAggregates = VE.PurgeAggregateValues(); + if (NumNonAggregates != -1) { + SmallVector Vals; + Vals.push_back(NumNonAggregates); + Stream.EmitRecord(bitc::MODULE_CODE_PURGEVALS, Vals); + } + + // Emit function bodies. + for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) + if (!I->isDeclaration()) + WriteFunction(*I, VE, Stream); + + // Emit the type symbol table information. + WriteTypeSymbolTable(M->getTypeSymbolTable(), VE, Stream); + + // Emit names for globals/functions etc. + WriteValueSymbolTable(M->getValueSymbolTable(), VE, Stream); + + Stream.ExitBlock(); +} + +/// EmitDarwinBCHeader - If generating a bc file on darwin, we have to emit a +/// header and trailer to make it compatible with the system archiver. To do +/// this we emit the following header, and then emit a trailer that pads the +/// file out to be a multiple of 16 bytes. +/// +/// struct bc_header { +/// uint32_t Magic; // 0x0B17C0DE +/// uint32_t Version; // Version, currently always 0. +/// uint32_t BitcodeOffset; // Offset to traditional bitcode file. +/// uint32_t BitcodeSize; // Size of traditional bitcode file. +/// uint32_t CPUType; // CPU specifier. +/// ... potentially more later ... +/// }; +enum { + DarwinBCSizeFieldOffset = 3*4, // Offset to bitcode_size. + DarwinBCHeaderSize = 5*4 +}; + +static void EmitDarwinBCHeader(BitstreamWriter &Stream, + const std::string &TT) { + unsigned CPUType = ~0U; + + // Match x86_64-*, i[3-9]86-*, powerpc-*, powerpc64-*. The CPUType is a + // magic number from /usr/include/mach/machine.h. It is ok to reproduce the + // specific constants here because they are implicitly part of the Darwin ABI. + enum { + DARWIN_CPU_ARCH_ABI64 = 0x01000000, + DARWIN_CPU_TYPE_X86 = 7, + DARWIN_CPU_TYPE_POWERPC = 18 + }; + + if (TT.find("x86_64-") == 0) + CPUType = DARWIN_CPU_TYPE_X86 | DARWIN_CPU_ARCH_ABI64; + else if (TT.size() >= 5 && TT[0] == 'i' && TT[2] == '8' && TT[3] == '6' && + TT[4] == '-' && TT[1] - '3' < 6) + CPUType = DARWIN_CPU_TYPE_X86; + else if (TT.find("powerpc-") == 0) + CPUType = DARWIN_CPU_TYPE_POWERPC; + else if (TT.find("powerpc64-") == 0) + CPUType = DARWIN_CPU_TYPE_POWERPC | DARWIN_CPU_ARCH_ABI64; + + // Traditional Bitcode starts after header. + unsigned BCOffset = DarwinBCHeaderSize; + + Stream.Emit(0x0B17C0DE, 32); + Stream.Emit(0 , 32); // Version. + Stream.Emit(BCOffset , 32); + Stream.Emit(0 , 32); // Filled in later. + Stream.Emit(CPUType , 32); +} + +/// EmitDarwinBCTrailer - Emit the darwin epilog after the bitcode file and +/// finalize the header. +static void EmitDarwinBCTrailer(BitstreamWriter &Stream, unsigned BufferSize) { + // Update the size field in the header. + Stream.BackpatchWord(DarwinBCSizeFieldOffset, BufferSize-DarwinBCHeaderSize); + + // If the file is not a multiple of 16 bytes, insert dummy padding. + while (BufferSize & 15) { + Stream.Emit(0, 8); + ++BufferSize; + } +} + + +/// WriteBitcodeToFile - Write the specified module to the specified output +/// stream. +void llvm::WriteBitcodeToFile(const Module *M, std::ostream &Out) { + raw_os_ostream RawOut(Out); + // If writing to stdout, set binary mode. + if (llvm::cout == Out) + sys::Program::ChangeStdoutToBinary(); + WriteBitcodeToFile(M, RawOut); +} + +/// WriteBitcodeToFile - Write the specified module to the specified output +/// stream. +void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out) { + std::vector Buffer; + BitstreamWriter Stream(Buffer); + + Buffer.reserve(256*1024); + + WriteBitcodeToStream( M, Stream ); + + // If writing to stdout, set binary mode. + if (&llvm::outs() == &Out) + sys::Program::ChangeStdoutToBinary(); + + // Write the generated bitstream to "Out". + Out.write((char*)&Buffer.front(), Buffer.size()); + + // Make sure it hits disk now. + Out.flush(); +} + +/// WriteBitcodeToStream - Write the specified module to the specified output +/// stream. +void llvm::WriteBitcodeToStream(const Module *M, BitstreamWriter &Stream) { + // If this is darwin, emit a file header and trailer if needed. + bool isDarwin = M->getTargetTriple().find("-darwin") != std::string::npos; + if (isDarwin) + EmitDarwinBCHeader(Stream, M->getTargetTriple()); + + // Emit the file header. + Stream.Emit((unsigned)'B', 8); + Stream.Emit((unsigned)'C', 8); + Stream.Emit(0x0, 4); + Stream.Emit(0xC, 4); + Stream.Emit(0xE, 4); + Stream.Emit(0xD, 4); + + // Emit the module. + WriteModule(M, Stream); + + if (isDarwin) + EmitDarwinBCTrailer(Stream, Stream.getBuffer().size()); +} diff --git a/lib/Bitcode/Writer/BitcodeWriterPass.cpp b/lib/Bitcode/Writer/BitcodeWriterPass.cpp new file mode 100644 index 000000000000..209cf0980d2d --- /dev/null +++ b/lib/Bitcode/Writer/BitcodeWriterPass.cpp @@ -0,0 +1,56 @@ +//===--- Bitcode/Writer/BitcodeWriterPass.cpp - Bitcode Writer ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// BitcodeWriterPass implementation. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Bitcode/ReaderWriter.h" +#include "llvm/Pass.h" +using namespace llvm; + +namespace { + class WriteBitcodePass : public ModulePass { + // FIXME: Kill off std::ostream + std::ostream *Out; + raw_ostream *RawOut; // raw_ostream to print on + public: + static char ID; // Pass identification, replacement for typeid + explicit WriteBitcodePass(std::ostream &o) + : ModulePass(&ID), Out(&o), RawOut(0) {} + explicit WriteBitcodePass(raw_ostream &o) + : ModulePass(&ID), Out(0), RawOut(&o) {} + + const char *getPassName() const { return "Bitcode Writer"; } + + bool runOnModule(Module &M) { + if (Out) { + WriteBitcodeToFile(&M, *Out); + } else { + WriteBitcodeToFile(&M, *RawOut); + } + return false; + } + }; +} + +char WriteBitcodePass::ID = 0; + +/// CreateBitcodeWriterPass - Create and return a pass that writes the module +/// to the specified ostream. +ModulePass *llvm::CreateBitcodeWriterPass(std::ostream &Str) { + return new WriteBitcodePass(Str); +} + + +/// createBitcodeWriterPass - Create and return a pass that writes the module +/// to the specified ostream. +ModulePass *llvm::createBitcodeWriterPass(raw_ostream &Str) { + return new WriteBitcodePass(Str); +} diff --git a/lib/Bitcode/Writer/CMakeLists.txt b/lib/Bitcode/Writer/CMakeLists.txt new file mode 100644 index 000000000000..ac5bb991af5c --- /dev/null +++ b/lib/Bitcode/Writer/CMakeLists.txt @@ -0,0 +1,9 @@ +add_llvm_library(LLVMBitWriter + BitWriter.cpp + BitcodeWriter.cpp + BitcodeWriterPass.cpp + Serialize.cpp + SerializeAPFloat.cpp + SerializeAPInt.cpp + ValueEnumerator.cpp + ) diff --git a/lib/Bitcode/Writer/Makefile b/lib/Bitcode/Writer/Makefile new file mode 100644 index 000000000000..7b0bd72159ad --- /dev/null +++ b/lib/Bitcode/Writer/Makefile @@ -0,0 +1,15 @@ +##===- lib/Bitcode/Reader/Makefile -------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../.. +LIBRARYNAME = LLVMBitWriter +BUILD_ARCHIVE = 1 + +include $(LEVEL)/Makefile.common + diff --git a/lib/Bitcode/Writer/Serialize.cpp b/lib/Bitcode/Writer/Serialize.cpp new file mode 100644 index 000000000000..79464a61be46 --- /dev/null +++ b/lib/Bitcode/Writer/Serialize.cpp @@ -0,0 +1,118 @@ +//==- Serialize.cpp - Generic Object Serialization to Bitcode ----*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the internal methods used for object serialization. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Bitcode/Serialize.h" +#include "string.h" + +#ifdef DEBUG_BACKPATCH +#include "llvm/Support/Streams.h" +#endif + +using namespace llvm; + +Serializer::Serializer(BitstreamWriter& stream) + : Stream(stream), BlockLevel(0) {} + +Serializer::~Serializer() { + if (inRecord()) + EmitRecord(); + + while (BlockLevel > 0) + Stream.ExitBlock(); + + Stream.FlushToWord(); +} + +void Serializer::EmitRecord() { + assert(Record.size() > 0 && "Cannot emit empty record."); + Stream.EmitRecord(8,Record); + Record.clear(); +} + +void Serializer::EnterBlock(unsigned BlockID,unsigned CodeLen) { + FlushRecord(); + Stream.EnterSubblock(BlockID,CodeLen); + ++BlockLevel; +} + +void Serializer::ExitBlock() { + assert (BlockLevel > 0); + --BlockLevel; + FlushRecord(); + Stream.ExitBlock(); +} + +void Serializer::EmitInt(uint64_t X) { + assert (BlockLevel > 0); + Record.push_back(X); +} + +void Serializer::EmitSInt(int64_t X) { + if (X >= 0) + EmitInt(X << 1); + else + EmitInt((-X << 1) | 1); +} + +void Serializer::EmitCStr(const char* s, const char* end) { + Record.push_back(end - s); + + while(s != end) { + Record.push_back(*s); + ++s; + } +} + +void Serializer::EmitCStr(const char* s) { + EmitCStr(s,s+strlen(s)); +} + +SerializedPtrID Serializer::getPtrId(const void* ptr) { + if (!ptr) + return 0; + + MapTy::iterator I = PtrMap.find(ptr); + + if (I == PtrMap.end()) { + unsigned id = PtrMap.size()+1; +#ifdef DEBUG_BACKPATCH + llvm::cerr << "Registered PTR: " << ptr << " => " << id << "\n"; +#endif + PtrMap[ptr] = id; + return id; + } + else return I->second; +} + +bool Serializer::isRegistered(const void* ptr) const { + MapTy::const_iterator I = PtrMap.find(ptr); + return I != PtrMap.end(); +} + + +#define INT_EMIT(TYPE)\ +void SerializeTrait::Emit(Serializer&S, TYPE X) { S.EmitInt(X); } + +INT_EMIT(bool) +INT_EMIT(unsigned char) +INT_EMIT(unsigned short) +INT_EMIT(unsigned int) +INT_EMIT(unsigned long) + +#define SINT_EMIT(TYPE)\ +void SerializeTrait::Emit(Serializer&S, TYPE X) { S.EmitSInt(X); } + +SINT_EMIT(signed char) +SINT_EMIT(signed short) +SINT_EMIT(signed int) +SINT_EMIT(signed long) diff --git a/lib/Bitcode/Writer/SerializeAPFloat.cpp b/lib/Bitcode/Writer/SerializeAPFloat.cpp new file mode 100644 index 000000000000..25d954faa138 --- /dev/null +++ b/lib/Bitcode/Writer/SerializeAPFloat.cpp @@ -0,0 +1,21 @@ +//===-- SerializeAPInt.cpp - Serialization for APFloat ---------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements serialization of APFloat. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/APFloat.h" +#include "llvm/Bitcode/Serialize.h" + +using namespace llvm; + +void APFloat::Emit(Serializer& S) const { + S.Emit(bitcastToAPInt()); +} diff --git a/lib/Bitcode/Writer/SerializeAPInt.cpp b/lib/Bitcode/Writer/SerializeAPInt.cpp new file mode 100644 index 000000000000..47792c7d0894 --- /dev/null +++ b/lib/Bitcode/Writer/SerializeAPInt.cpp @@ -0,0 +1,31 @@ +//===-- SerializeAPInt.cpp - Serialization for APInts ----------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements serialization of APInts. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/APInt.h" +#include "llvm/Bitcode/Serialize.h" +#include + +using namespace llvm; + +void APInt::Emit(Serializer& S) const { + S.EmitInt(BitWidth); + + if (isSingleWord()) + S.EmitInt(VAL); + else { + uint32_t NumWords = getNumWords(); + S.EmitInt(NumWords); + for (unsigned i = 0; i < NumWords; ++i) + S.EmitInt(pVal[i]); + } +} diff --git a/lib/Bitcode/Writer/ValueEnumerator.cpp b/lib/Bitcode/Writer/ValueEnumerator.cpp new file mode 100644 index 000000000000..8002a36b4745 --- /dev/null +++ b/lib/Bitcode/Writer/ValueEnumerator.cpp @@ -0,0 +1,347 @@ +//===-- ValueEnumerator.cpp - Number values and types for bitcode writer --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the ValueEnumerator class. +// +//===----------------------------------------------------------------------===// + +#include "ValueEnumerator.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/MDNode.h" +#include "llvm/Module.h" +#include "llvm/TypeSymbolTable.h" +#include "llvm/ValueSymbolTable.h" +#include "llvm/Instructions.h" +#include +using namespace llvm; + +static bool isSingleValueType(const std::pair &P) { + return P.first->isSingleValueType(); +} + +static bool isIntegerValue(const std::pair &V) { + return isa(V.first->getType()); +} + +static bool CompareByFrequency(const std::pair &P1, + const std::pair &P2) { + return P1.second > P2.second; +} + +/// ValueEnumerator - Enumerate module-level information. +ValueEnumerator::ValueEnumerator(const Module *M) { + // Enumerate the global variables. + for (Module::const_global_iterator I = M->global_begin(), + E = M->global_end(); I != E; ++I) + EnumerateValue(I); + + // Enumerate the functions. + for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) { + EnumerateValue(I); + EnumerateAttributes(cast(I)->getAttributes()); + } + + // Enumerate the aliases. + for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end(); + I != E; ++I) + EnumerateValue(I); + + // Remember what is the cutoff between globalvalue's and other constants. + unsigned FirstConstant = Values.size(); + + // Enumerate the global variable initializers. + for (Module::const_global_iterator I = M->global_begin(), + E = M->global_end(); I != E; ++I) + if (I->hasInitializer()) + EnumerateValue(I->getInitializer()); + + // Enumerate the aliasees. + for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end(); + I != E; ++I) + EnumerateValue(I->getAliasee()); + + // Enumerate types used by the type symbol table. + EnumerateTypeSymbolTable(M->getTypeSymbolTable()); + + // Insert constants that are named at module level into the slot pool so that + // the module symbol table can refer to them... + EnumerateValueSymbolTable(M->getValueSymbolTable()); + + // Enumerate types used by function bodies and argument lists. + for (Module::const_iterator F = M->begin(), E = M->end(); F != E; ++F) { + + for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); + I != E; ++I) + EnumerateType(I->getType()); + + for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB) + for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E;++I){ + for (User::const_op_iterator OI = I->op_begin(), E = I->op_end(); + OI != E; ++OI) + EnumerateOperandType(*OI); + EnumerateType(I->getType()); + if (const CallInst *CI = dyn_cast(I)) + EnumerateAttributes(CI->getAttributes()); + else if (const InvokeInst *II = dyn_cast(I)) + EnumerateAttributes(II->getAttributes()); + } + } + + // Optimize constant ordering. + OptimizeConstants(FirstConstant, Values.size()); + + // Sort the type table by frequency so that most commonly used types are early + // in the table (have low bit-width). + std::stable_sort(Types.begin(), Types.end(), CompareByFrequency); + + // Partition the Type ID's so that the single-value types occur before the + // aggregate types. This allows the aggregate types to be dropped from the + // type table after parsing the global variable initializers. + std::partition(Types.begin(), Types.end(), isSingleValueType); + + // Now that we rearranged the type table, rebuild TypeMap. + for (unsigned i = 0, e = Types.size(); i != e; ++i) + TypeMap[Types[i].first] = i+1; +} + +// Optimize constant ordering. +namespace { + struct CstSortPredicate { + ValueEnumerator &VE; + explicit CstSortPredicate(ValueEnumerator &ve) : VE(ve) {} + bool operator()(const std::pair &LHS, + const std::pair &RHS) { + // Sort by plane. + if (LHS.first->getType() != RHS.first->getType()) + return VE.getTypeID(LHS.first->getType()) < + VE.getTypeID(RHS.first->getType()); + // Then by frequency. + return LHS.second > RHS.second; + } + }; +} + +/// OptimizeConstants - Reorder constant pool for denser encoding. +void ValueEnumerator::OptimizeConstants(unsigned CstStart, unsigned CstEnd) { + if (CstStart == CstEnd || CstStart+1 == CstEnd) return; + + CstSortPredicate P(*this); + std::stable_sort(Values.begin()+CstStart, Values.begin()+CstEnd, P); + + // Ensure that integer constants are at the start of the constant pool. This + // is important so that GEP structure indices come before gep constant exprs. + std::partition(Values.begin()+CstStart, Values.begin()+CstEnd, + isIntegerValue); + + // Rebuild the modified portion of ValueMap. + for (; CstStart != CstEnd; ++CstStart) + ValueMap[Values[CstStart].first] = CstStart+1; +} + + +/// EnumerateTypeSymbolTable - Insert all of the types in the specified symbol +/// table. +void ValueEnumerator::EnumerateTypeSymbolTable(const TypeSymbolTable &TST) { + for (TypeSymbolTable::const_iterator TI = TST.begin(), TE = TST.end(); + TI != TE; ++TI) + EnumerateType(TI->second); +} + +/// EnumerateValueSymbolTable - Insert all of the values in the specified symbol +/// table into the values table. +void ValueEnumerator::EnumerateValueSymbolTable(const ValueSymbolTable &VST) { + for (ValueSymbolTable::const_iterator VI = VST.begin(), VE = VST.end(); + VI != VE; ++VI) + EnumerateValue(VI->getValue()); +} + +void ValueEnumerator::EnumerateValue(const Value *V) { + assert(V->getType() != Type::VoidTy && "Can't insert void values!"); + + // Check to see if it's already in! + unsigned &ValueID = ValueMap[V]; + if (ValueID) { + // Increment use count. + Values[ValueID-1].second++; + return; + } + + // Enumerate the type of this value. + EnumerateType(V->getType()); + + if (const Constant *C = dyn_cast(V)) { + if (isa(C)) { + // Initializers for globals are handled explicitly elsewhere. + } else if (isa(C) && cast(C)->isString()) { + // Do not enumerate the initializers for an array of simple characters. + // The initializers just polute the value table, and we emit the strings + // specially. + } else if (C->getNumOperands()) { + // If a constant has operands, enumerate them. This makes sure that if a + // constant has uses (for example an array of const ints), that they are + // inserted also. + + // We prefer to enumerate them with values before we enumerate the user + // itself. This makes it more likely that we can avoid forward references + // in the reader. We know that there can be no cycles in the constants + // graph that don't go through a global variable. + for (User::const_op_iterator I = C->op_begin(), E = C->op_end(); + I != E; ++I) + EnumerateValue(*I); + + // Finally, add the value. Doing this could make the ValueID reference be + // dangling, don't reuse it. + Values.push_back(std::make_pair(V, 1U)); + ValueMap[V] = Values.size(); + return; + } else if (const MDNode *N = dyn_cast(C)) { + for (MDNode::const_elem_iterator I = N->elem_begin(), E = N->elem_end(); + I != E; ++I) { + if (*I) + EnumerateValue(*I); + else + EnumerateType(Type::VoidTy); + } + + Values.push_back(std::make_pair(V, 1U)); + ValueMap[V] = Values.size(); + return; + } + } + + // Add the value. + Values.push_back(std::make_pair(V, 1U)); + ValueID = Values.size(); +} + + +void ValueEnumerator::EnumerateType(const Type *Ty) { + unsigned &TypeID = TypeMap[Ty]; + + if (TypeID) { + // If we've already seen this type, just increase its occurrence count. + Types[TypeID-1].second++; + return; + } + + // First time we saw this type, add it. + Types.push_back(std::make_pair(Ty, 1U)); + TypeID = Types.size(); + + // Enumerate subtypes. + for (Type::subtype_iterator I = Ty->subtype_begin(), E = Ty->subtype_end(); + I != E; ++I) + EnumerateType(*I); +} + +// Enumerate the types for the specified value. If the value is a constant, +// walk through it, enumerating the types of the constant. +void ValueEnumerator::EnumerateOperandType(const Value *V) { + EnumerateType(V->getType()); + if (const Constant *C = dyn_cast(V)) { + // If this constant is already enumerated, ignore it, we know its type must + // be enumerated. + if (ValueMap.count(V)) return; + + // This constant may have operands, make sure to enumerate the types in + // them. + for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) + EnumerateOperandType(C->getOperand(i)); + + if (const MDNode *N = dyn_cast(V)) { + for (unsigned i = 0, e = N->getNumElements(); i != e; ++i) + EnumerateOperandType(N->getElement(i)); + } + } +} + +void ValueEnumerator::EnumerateAttributes(const AttrListPtr &PAL) { + if (PAL.isEmpty()) return; // null is always 0. + // Do a lookup. + unsigned &Entry = AttributeMap[PAL.getRawPointer()]; + if (Entry == 0) { + // Never saw this before, add it. + Attributes.push_back(PAL); + Entry = Attributes.size(); + } +} + + +/// PurgeAggregateValues - If there are any aggregate values at the end of the +/// value list, remove them and return the count of the remaining values. If +/// there are none, return -1. +int ValueEnumerator::PurgeAggregateValues() { + // If there are no aggregate values at the end of the list, return -1. + if (Values.empty() || Values.back().first->getType()->isSingleValueType()) + return -1; + + // Otherwise, remove aggregate values... + while (!Values.empty() && !Values.back().first->getType()->isSingleValueType()) + Values.pop_back(); + + // ... and return the new size. + return Values.size(); +} + +void ValueEnumerator::incorporateFunction(const Function &F) { + NumModuleValues = Values.size(); + + // Adding function arguments to the value table. + for(Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); + I != E; ++I) + EnumerateValue(I); + + FirstFuncConstantID = Values.size(); + + // Add all function-level constants to the value table. + for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { + for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E; ++I) + for (User::const_op_iterator OI = I->op_begin(), E = I->op_end(); + OI != E; ++OI) { + if ((isa(*OI) && !isa(*OI)) || + isa(*OI)) + EnumerateValue(*OI); + } + BasicBlocks.push_back(BB); + ValueMap[BB] = BasicBlocks.size(); + } + + // Optimize the constant layout. + OptimizeConstants(FirstFuncConstantID, Values.size()); + + // Add the function's parameter attributes so they are available for use in + // the function's instruction. + EnumerateAttributes(F.getAttributes()); + + FirstInstID = Values.size(); + + // Add all of the instructions. + for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { + for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E; ++I) { + if (I->getType() != Type::VoidTy) + EnumerateValue(I); + } + } +} + +void ValueEnumerator::purgeFunction() { + /// Remove purged values from the ValueMap. + for (unsigned i = NumModuleValues, e = Values.size(); i != e; ++i) + ValueMap.erase(Values[i].first); + for (unsigned i = 0, e = BasicBlocks.size(); i != e; ++i) + ValueMap.erase(BasicBlocks[i]); + + Values.resize(NumModuleValues); + BasicBlocks.clear(); +} + diff --git a/lib/Bitcode/Writer/ValueEnumerator.h b/lib/Bitcode/Writer/ValueEnumerator.h new file mode 100644 index 000000000000..bb0324b1c572 --- /dev/null +++ b/lib/Bitcode/Writer/ValueEnumerator.h @@ -0,0 +1,127 @@ +//===-- Bitcode/Writer/ValueEnumerator.h - Number values --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class gives values and types Unique ID's. +// +//===----------------------------------------------------------------------===// + +#ifndef VALUE_ENUMERATOR_H +#define VALUE_ENUMERATOR_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/Attributes.h" +#include + +namespace llvm { + +class Type; +class Value; +class BasicBlock; +class Function; +class Module; +class AttrListPtr; +class TypeSymbolTable; +class ValueSymbolTable; + +class ValueEnumerator { +public: + // For each type, we remember its Type* and occurrence frequency. + typedef std::vector > TypeList; + + // For each value, we remember its Value* and occurrence frequency. + typedef std::vector > ValueList; +private: + typedef DenseMap TypeMapType; + TypeMapType TypeMap; + TypeList Types; + + typedef DenseMap ValueMapType; + ValueMapType ValueMap; + ValueList Values; + + typedef DenseMap AttributeMapType; + AttributeMapType AttributeMap; + std::vector Attributes; + + /// BasicBlocks - This contains all the basic blocks for the currently + /// incorporated function. Their reverse mapping is stored in ValueMap. + std::vector BasicBlocks; + + /// When a function is incorporated, this is the size of the Values list + /// before incorporation. + unsigned NumModuleValues; + unsigned FirstFuncConstantID; + unsigned FirstInstID; + + ValueEnumerator(const ValueEnumerator &); // DO NOT IMPLEMENT + void operator=(const ValueEnumerator &); // DO NOT IMPLEMENT +public: + ValueEnumerator(const Module *M); + + unsigned getValueID(const Value *V) const { + ValueMapType::const_iterator I = ValueMap.find(V); + assert(I != ValueMap.end() && "Value not in slotcalculator!"); + return I->second-1; + } + + unsigned getTypeID(const Type *T) const { + TypeMapType::const_iterator I = TypeMap.find(T); + assert(I != TypeMap.end() && "Type not in ValueEnumerator!"); + return I->second-1; + } + + unsigned getAttributeID(const AttrListPtr &PAL) const { + if (PAL.isEmpty()) return 0; // Null maps to zero. + AttributeMapType::const_iterator I = AttributeMap.find(PAL.getRawPointer()); + assert(I != AttributeMap.end() && "Attribute not in ValueEnumerator!"); + return I->second; + } + + /// getFunctionConstantRange - Return the range of values that corresponds to + /// function-local constants. + void getFunctionConstantRange(unsigned &Start, unsigned &End) const { + Start = FirstFuncConstantID; + End = FirstInstID; + } + + const ValueList &getValues() const { return Values; } + const TypeList &getTypes() const { return Types; } + const std::vector &getBasicBlocks() const { + return BasicBlocks; + } + const std::vector &getAttributes() const { + return Attributes; + } + + /// PurgeAggregateValues - If there are any aggregate values at the end of the + /// value list, remove them and return the count of the remaining values. If + /// there are none, return -1. + int PurgeAggregateValues(); + + /// incorporateFunction/purgeFunction - If you'd like to deal with a function, + /// use these two methods to get its data into the ValueEnumerator! + /// + void incorporateFunction(const Function &F); + void purgeFunction(); + +private: + void OptimizeConstants(unsigned CstStart, unsigned CstEnd); + + void EnumerateValue(const Value *V); + void EnumerateType(const Type *T); + void EnumerateOperandType(const Value *V); + void EnumerateAttributes(const AttrListPtr &PAL); + + void EnumerateTypeSymbolTable(const TypeSymbolTable &ST); + void EnumerateValueSymbolTable(const ValueSymbolTable &ST); +}; + +} // End llvm namespace + +#endif diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp new file mode 100644 index 000000000000..45462da0d26a --- /dev/null +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -0,0 +1,1724 @@ +//===-- AsmPrinter.cpp - Common AsmPrinter code ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the AsmPrinter class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Constants.h" +#include "llvm/Module.h" +#include "llvm/CodeGen/GCMetadataPrinter.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/DwarfWriter.h" +#include "llvm/Analysis/DebugInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Mangler.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" +#include +using namespace llvm; + +static cl::opt +AsmVerbose("asm-verbose", cl::desc("Add comments to directives."), + cl::init(cl::BOU_UNSET)); + +char AsmPrinter::ID = 0; +AsmPrinter::AsmPrinter(raw_ostream &o, TargetMachine &tm, + const TargetAsmInfo *T, CodeGenOpt::Level OL, bool VDef) + : MachineFunctionPass(&ID), FunctionNumber(0), OptLevel(OL), O(o), + TM(tm), TAI(T), TRI(tm.getRegisterInfo()), + IsInTextSection(false) +{ + switch (AsmVerbose) { + case cl::BOU_UNSET: VerboseAsm = VDef; break; + case cl::BOU_TRUE: VerboseAsm = true; break; + case cl::BOU_FALSE: VerboseAsm = false; break; + } +} + +AsmPrinter::~AsmPrinter() { + for (gcp_iterator I = GCMetadataPrinters.begin(), + E = GCMetadataPrinters.end(); I != E; ++I) + delete I->second; +} + +/// SwitchToTextSection - Switch to the specified text section of the executable +/// if we are not already in it! +/// +void AsmPrinter::SwitchToTextSection(const char *NewSection, + const GlobalValue *GV) { + std::string NS; + if (GV && GV->hasSection()) + NS = TAI->getSwitchToSectionDirective() + GV->getSection(); + else + NS = NewSection; + + // If we're already in this section, we're done. + if (CurrentSection == NS) return; + + // Close the current section, if applicable. + if (TAI->getSectionEndDirectiveSuffix() && !CurrentSection.empty()) + O << CurrentSection << TAI->getSectionEndDirectiveSuffix() << '\n'; + + CurrentSection = NS; + + if (!CurrentSection.empty()) + O << CurrentSection << TAI->getTextSectionStartSuffix() << '\n'; + + IsInTextSection = true; +} + +/// SwitchToDataSection - Switch to the specified data section of the executable +/// if we are not already in it! +/// +void AsmPrinter::SwitchToDataSection(const char *NewSection, + const GlobalValue *GV) { + std::string NS; + if (GV && GV->hasSection()) + NS = TAI->getSwitchToSectionDirective() + GV->getSection(); + else + NS = NewSection; + + // If we're already in this section, we're done. + if (CurrentSection == NS) return; + + // Close the current section, if applicable. + if (TAI->getSectionEndDirectiveSuffix() && !CurrentSection.empty()) + O << CurrentSection << TAI->getSectionEndDirectiveSuffix() << '\n'; + + CurrentSection = NS; + + if (!CurrentSection.empty()) + O << CurrentSection << TAI->getDataSectionStartSuffix() << '\n'; + + IsInTextSection = false; +} + +/// SwitchToSection - Switch to the specified section of the executable if we +/// are not already in it! +void AsmPrinter::SwitchToSection(const Section* NS) { + const std::string& NewSection = NS->getName(); + + // If we're already in this section, we're done. + if (CurrentSection == NewSection) return; + + // Close the current section, if applicable. + if (TAI->getSectionEndDirectiveSuffix() && !CurrentSection.empty()) + O << CurrentSection << TAI->getSectionEndDirectiveSuffix() << '\n'; + + // FIXME: Make CurrentSection a Section* in the future + CurrentSection = NewSection; + CurrentSection_ = NS; + + if (!CurrentSection.empty()) { + // If section is named we need to switch into it via special '.section' + // directive and also append funky flags. Otherwise - section name is just + // some magic assembler directive. + if (NS->isNamed()) + O << TAI->getSwitchToSectionDirective() + << CurrentSection + << TAI->getSectionFlags(NS->getFlags()); + else + O << CurrentSection; + O << TAI->getDataSectionStartSuffix() << '\n'; + } + + IsInTextSection = (NS->getFlags() & SectionFlags::Code); +} + +void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const { + MachineFunctionPass::getAnalysisUsage(AU); + AU.addRequired(); +} + +bool AsmPrinter::doInitialization(Module &M) { + Mang = new Mangler(M, TAI->getGlobalPrefix(), TAI->getPrivateGlobalPrefix()); + + GCModuleInfo *MI = getAnalysisIfAvailable(); + assert(MI && "AsmPrinter didn't require GCModuleInfo?"); + + if (TAI->hasSingleParameterDotFile()) { + /* Very minimal debug info. It is ignored if we emit actual + debug info. If we don't, this at helps the user find where + a function came from. */ + O << "\t.file\t\"" << M.getModuleIdentifier() << "\"\n"; + } + + for (GCModuleInfo::iterator I = MI->begin(), E = MI->end(); I != E; ++I) + if (GCMetadataPrinter *MP = GetOrCreateGCPrinter(*I)) + MP->beginAssembly(O, *this, *TAI); + + if (!M.getModuleInlineAsm().empty()) + O << TAI->getCommentString() << " Start of file scope inline assembly\n" + << M.getModuleInlineAsm() + << '\n' << TAI->getCommentString() + << " End of file scope inline assembly\n"; + + SwitchToDataSection(""); // Reset back to no section. + + MachineModuleInfo *MMI = getAnalysisIfAvailable(); + if (MMI) MMI->AnalyzeModule(M); + DW = getAnalysisIfAvailable(); + return false; +} + +bool AsmPrinter::doFinalization(Module &M) { + if (TAI->getWeakRefDirective()) { + if (!ExtWeakSymbols.empty()) + SwitchToDataSection(""); + + for (std::set::iterator i = ExtWeakSymbols.begin(), + e = ExtWeakSymbols.end(); i != e; ++i) + O << TAI->getWeakRefDirective() << Mang->getValueName(*i) << '\n'; + } + + if (TAI->getSetDirective()) { + if (!M.alias_empty()) + SwitchToSection(TAI->getTextSection()); + + O << '\n'; + for (Module::const_alias_iterator I = M.alias_begin(), E = M.alias_end(); + I!=E; ++I) { + std::string Name = Mang->getValueName(I); + std::string Target; + + const GlobalValue *GV = cast(I->getAliasedGlobal()); + Target = Mang->getValueName(GV); + + if (I->hasExternalLinkage() || !TAI->getWeakRefDirective()) + O << "\t.globl\t" << Name << '\n'; + else if (I->hasWeakLinkage()) + O << TAI->getWeakRefDirective() << Name << '\n'; + else if (!I->hasLocalLinkage()) + assert(0 && "Invalid alias linkage"); + + printVisibility(Name, I->getVisibility()); + + O << TAI->getSetDirective() << ' ' << Name << ", " << Target << '\n'; + } + } + + GCModuleInfo *MI = getAnalysisIfAvailable(); + assert(MI && "AsmPrinter didn't require GCModuleInfo?"); + for (GCModuleInfo::iterator I = MI->end(), E = MI->begin(); I != E; ) + if (GCMetadataPrinter *MP = GetOrCreateGCPrinter(*--I)) + MP->finishAssembly(O, *this, *TAI); + + // If we don't have any trampolines, then we don't require stack memory + // to be executable. Some targets have a directive to declare this. + Function* InitTrampolineIntrinsic = M.getFunction("llvm.init.trampoline"); + if (!InitTrampolineIntrinsic || InitTrampolineIntrinsic->use_empty()) + if (TAI->getNonexecutableStackDirective()) + O << TAI->getNonexecutableStackDirective() << '\n'; + + delete Mang; Mang = 0; + return false; +} + +const std::string & +AsmPrinter::getCurrentFunctionEHName(const MachineFunction *MF, + std::string &Name) const { + assert(MF && "No machine function?"); + Name = MF->getFunction()->getName(); + if (Name.empty()) + Name = Mang->getValueName(MF->getFunction()); + Name = Mang->makeNameProper(TAI->getEHGlobalPrefix() + + Name + ".eh", TAI->getGlobalPrefix()); + return Name; +} + +void AsmPrinter::SetupMachineFunction(MachineFunction &MF) { + // What's my mangled name? + CurrentFnName = Mang->getValueName(MF.getFunction()); + IncrementFunctionNumber(); +} + +namespace { + // SectionCPs - Keep track the alignment, constpool entries per Section. + struct SectionCPs { + const Section *S; + unsigned Alignment; + SmallVector CPEs; + SectionCPs(const Section *s, unsigned a) : S(s), Alignment(a) {}; + }; +} + +/// EmitConstantPool - Print to the current output stream assembly +/// representations of the constants in the constant pool MCP. This is +/// used to print out constants which have been "spilled to memory" by +/// the code generator. +/// +void AsmPrinter::EmitConstantPool(MachineConstantPool *MCP) { + const std::vector &CP = MCP->getConstants(); + if (CP.empty()) return; + + // Calculate sections for constant pool entries. We collect entries to go into + // the same section together to reduce amount of section switch statements. + SmallVector CPSections; + for (unsigned i = 0, e = CP.size(); i != e; ++i) { + MachineConstantPoolEntry CPE = CP[i]; + unsigned Align = CPE.getAlignment(); + const Section* S = TAI->SelectSectionForMachineConst(CPE.getType()); + // The number of sections are small, just do a linear search from the + // last section to the first. + bool Found = false; + unsigned SecIdx = CPSections.size(); + while (SecIdx != 0) { + if (CPSections[--SecIdx].S == S) { + Found = true; + break; + } + } + if (!Found) { + SecIdx = CPSections.size(); + CPSections.push_back(SectionCPs(S, Align)); + } + + if (Align > CPSections[SecIdx].Alignment) + CPSections[SecIdx].Alignment = Align; + CPSections[SecIdx].CPEs.push_back(i); + } + + // Now print stuff into the calculated sections. + for (unsigned i = 0, e = CPSections.size(); i != e; ++i) { + SwitchToSection(CPSections[i].S); + EmitAlignment(Log2_32(CPSections[i].Alignment)); + + unsigned Offset = 0; + for (unsigned j = 0, ee = CPSections[i].CPEs.size(); j != ee; ++j) { + unsigned CPI = CPSections[i].CPEs[j]; + MachineConstantPoolEntry CPE = CP[CPI]; + + // Emit inter-object padding for alignment. + unsigned AlignMask = CPE.getAlignment() - 1; + unsigned NewOffset = (Offset + AlignMask) & ~AlignMask; + EmitZeros(NewOffset - Offset); + + const Type *Ty = CPE.getType(); + Offset = NewOffset + TM.getTargetData()->getTypeAllocSize(Ty); + + O << TAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << '_' + << CPI << ":\t\t\t\t\t"; + if (VerboseAsm) { + O << TAI->getCommentString() << ' '; + WriteTypeSymbolic(O, CPE.getType(), 0); + } + O << '\n'; + if (CPE.isMachineConstantPoolEntry()) + EmitMachineConstantPoolValue(CPE.Val.MachineCPVal); + else + EmitGlobalConstant(CPE.Val.ConstVal); + } + } +} + +/// EmitJumpTableInfo - Print assembly representations of the jump tables used +/// by the current function to the current output stream. +/// +void AsmPrinter::EmitJumpTableInfo(MachineJumpTableInfo *MJTI, + MachineFunction &MF) { + const std::vector &JT = MJTI->getJumpTables(); + if (JT.empty()) return; + + bool IsPic = TM.getRelocationModel() == Reloc::PIC_; + + // Pick the directive to use to print the jump table entries, and switch to + // the appropriate section. + TargetLowering *LoweringInfo = TM.getTargetLowering(); + + const char* JumpTableDataSection = TAI->getJumpTableDataSection(); + const Function *F = MF.getFunction(); + unsigned SectionFlags = TAI->SectionFlagsForGlobal(F); + if ((IsPic && !(LoweringInfo && LoweringInfo->usesGlobalOffsetTable())) || + !JumpTableDataSection || + SectionFlags & SectionFlags::Linkonce) { + // In PIC mode, we need to emit the jump table to the same section as the + // function body itself, otherwise the label differences won't make sense. + // We should also do if the section name is NULL or function is declared in + // discardable section. + SwitchToSection(TAI->SectionForGlobal(F)); + } else { + SwitchToDataSection(JumpTableDataSection); + } + + EmitAlignment(Log2_32(MJTI->getAlignment())); + + for (unsigned i = 0, e = JT.size(); i != e; ++i) { + const std::vector &JTBBs = JT[i].MBBs; + + // If this jump table was deleted, ignore it. + if (JTBBs.empty()) continue; + + // For PIC codegen, if possible we want to use the SetDirective to reduce + // the number of relocations the assembler will generate for the jump table. + // Set directives are all printed before the jump table itself. + SmallPtrSet EmittedSets; + if (TAI->getSetDirective() && IsPic) + for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii) + if (EmittedSets.insert(JTBBs[ii])) + printPICJumpTableSetLabel(i, JTBBs[ii]); + + // On some targets (e.g. darwin) we want to emit two consequtive labels + // before each jump table. The first label is never referenced, but tells + // the assembler and linker the extents of the jump table object. The + // second label is actually referenced by the code. + if (const char *JTLabelPrefix = TAI->getJumpTableSpecialLabelPrefix()) + O << JTLabelPrefix << "JTI" << getFunctionNumber() << '_' << i << ":\n"; + + O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() + << '_' << i << ":\n"; + + for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii) { + printPICJumpTableEntry(MJTI, JTBBs[ii], i); + O << '\n'; + } + } +} + +void AsmPrinter::printPICJumpTableEntry(const MachineJumpTableInfo *MJTI, + const MachineBasicBlock *MBB, + unsigned uid) const { + bool IsPic = TM.getRelocationModel() == Reloc::PIC_; + + // Use JumpTableDirective otherwise honor the entry size from the jump table + // info. + const char *JTEntryDirective = TAI->getJumpTableDirective(); + bool HadJTEntryDirective = JTEntryDirective != NULL; + if (!HadJTEntryDirective) { + JTEntryDirective = MJTI->getEntrySize() == 4 ? + TAI->getData32bitsDirective() : TAI->getData64bitsDirective(); + } + + O << JTEntryDirective << ' '; + + // If we have emitted set directives for the jump table entries, print + // them rather than the entries themselves. If we're emitting PIC, then + // emit the table entries as differences between two text section labels. + // If we're emitting non-PIC code, then emit the entries as direct + // references to the target basic blocks. + if (IsPic) { + if (TAI->getSetDirective()) { + O << TAI->getPrivateGlobalPrefix() << getFunctionNumber() + << '_' << uid << "_set_" << MBB->getNumber(); + } else { + printBasicBlockLabel(MBB, false, false, false); + // If the arch uses custom Jump Table directives, don't calc relative to + // JT + if (!HadJTEntryDirective) + O << '-' << TAI->getPrivateGlobalPrefix() << "JTI" + << getFunctionNumber() << '_' << uid; + } + } else { + printBasicBlockLabel(MBB, false, false, false); + } +} + + +/// EmitSpecialLLVMGlobal - Check to see if the specified global is a +/// special global used by LLVM. If so, emit it and return true, otherwise +/// do nothing and return false. +bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) { + if (GV->getName() == "llvm.used") { + if (TAI->getUsedDirective() != 0) // No need to emit this at all. + EmitLLVMUsedList(GV->getInitializer()); + return true; + } + + // Ignore debug and non-emitted data. + if (GV->getSection() == "llvm.metadata" || + GV->hasAvailableExternallyLinkage()) + return true; + + if (!GV->hasAppendingLinkage()) return false; + + assert(GV->hasInitializer() && "Not a special LLVM global!"); + + const TargetData *TD = TM.getTargetData(); + unsigned Align = Log2_32(TD->getPointerPrefAlignment()); + if (GV->getName() == "llvm.global_ctors") { + SwitchToDataSection(TAI->getStaticCtorsSection()); + EmitAlignment(Align, 0); + EmitXXStructorList(GV->getInitializer()); + return true; + } + + if (GV->getName() == "llvm.global_dtors") { + SwitchToDataSection(TAI->getStaticDtorsSection()); + EmitAlignment(Align, 0); + EmitXXStructorList(GV->getInitializer()); + return true; + } + + return false; +} + +/// findGlobalValue - if CV is an expression equivalent to a single +/// global value, return that value. +const GlobalValue * AsmPrinter::findGlobalValue(const Constant *CV) { + if (const GlobalValue *GV = dyn_cast(CV)) + return GV; + else if (const ConstantExpr *CE = dyn_cast(CV)) { + const TargetData *TD = TM.getTargetData(); + unsigned Opcode = CE->getOpcode(); + switch (Opcode) { + case Instruction::GetElementPtr: { + const Constant *ptrVal = CE->getOperand(0); + SmallVector idxVec(CE->op_begin()+1, CE->op_end()); + if (TD->getIndexedOffset(ptrVal->getType(), &idxVec[0], idxVec.size())) + return 0; + return findGlobalValue(ptrVal); + } + case Instruction::BitCast: + return findGlobalValue(CE->getOperand(0)); + default: + return 0; + } + } + return 0; +} + +/// EmitLLVMUsedList - For targets that define a TAI::UsedDirective, mark each +/// global in the specified llvm.used list for which emitUsedDirectiveFor +/// is true, as being used with this directive. + +void AsmPrinter::EmitLLVMUsedList(Constant *List) { + const char *Directive = TAI->getUsedDirective(); + + // Should be an array of 'sbyte*'. + ConstantArray *InitList = dyn_cast(List); + if (InitList == 0) return; + + for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) { + const GlobalValue *GV = findGlobalValue(InitList->getOperand(i)); + if (TAI->emitUsedDirectiveFor(GV, Mang)) { + O << Directive; + EmitConstantValueOnly(InitList->getOperand(i)); + O << '\n'; + } + } +} + +/// EmitXXStructorList - Emit the ctor or dtor list. This just prints out the +/// function pointers, ignoring the init priority. +void AsmPrinter::EmitXXStructorList(Constant *List) { + // Should be an array of '{ int, void ()* }' structs. The first value is the + // init priority, which we ignore. + if (!isa(List)) return; + ConstantArray *InitList = cast(List); + for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) + if (ConstantStruct *CS = dyn_cast(InitList->getOperand(i))){ + if (CS->getNumOperands() != 2) return; // Not array of 2-element structs. + + if (CS->getOperand(1)->isNullValue()) + return; // Found a null terminator, exit printing. + // Emit the function pointer. + EmitGlobalConstant(CS->getOperand(1)); + } +} + +/// getGlobalLinkName - Returns the asm/link name of of the specified +/// global variable. Should be overridden by each target asm printer to +/// generate the appropriate value. +const std::string &AsmPrinter::getGlobalLinkName(const GlobalVariable *GV, + std::string &LinkName) const { + if (isa(GV)) { + LinkName += TAI->getFunctionAddrPrefix(); + LinkName += Mang->getValueName(GV); + LinkName += TAI->getFunctionAddrSuffix(); + } else { + LinkName += TAI->getGlobalVarAddrPrefix(); + LinkName += Mang->getValueName(GV); + LinkName += TAI->getGlobalVarAddrSuffix(); + } + + return LinkName; +} + +/// EmitExternalGlobal - Emit the external reference to a global variable. +/// Should be overridden if an indirect reference should be used. +void AsmPrinter::EmitExternalGlobal(const GlobalVariable *GV) { + std::string GLN; + O << getGlobalLinkName(GV, GLN); +} + + + +//===----------------------------------------------------------------------===// +/// LEB 128 number encoding. + +/// PrintULEB128 - Print a series of hexidecimal values (separated by commas) +/// representing an unsigned leb128 value. +void AsmPrinter::PrintULEB128(unsigned Value) const { + char Buffer[20]; + do { + unsigned char Byte = static_cast(Value & 0x7f); + Value >>= 7; + if (Value) Byte |= 0x80; + O << "0x" << utohex_buffer(Byte, Buffer+20); + if (Value) O << ", "; + } while (Value); +} + +/// PrintSLEB128 - Print a series of hexidecimal values (separated by commas) +/// representing a signed leb128 value. +void AsmPrinter::PrintSLEB128(int Value) const { + int Sign = Value >> (8 * sizeof(Value) - 1); + bool IsMore; + char Buffer[20]; + + do { + unsigned char Byte = static_cast(Value & 0x7f); + Value >>= 7; + IsMore = Value != Sign || ((Byte ^ Sign) & 0x40) != 0; + if (IsMore) Byte |= 0x80; + O << "0x" << utohex_buffer(Byte, Buffer+20); + if (IsMore) O << ", "; + } while (IsMore); +} + +//===--------------------------------------------------------------------===// +// Emission and print routines +// + +/// PrintHex - Print a value as a hexidecimal value. +/// +void AsmPrinter::PrintHex(int Value) const { + char Buffer[20]; + O << "0x" << utohex_buffer(static_cast(Value), Buffer+20); +} + +/// EOL - Print a newline character to asm stream. If a comment is present +/// then it will be printed first. Comments should not contain '\n'. +void AsmPrinter::EOL() const { + O << '\n'; +} + +void AsmPrinter::EOL(const std::string &Comment) const { + if (VerboseAsm && !Comment.empty()) { + O << '\t' + << TAI->getCommentString() + << ' ' + << Comment; + } + O << '\n'; +} + +void AsmPrinter::EOL(const char* Comment) const { + if (VerboseAsm && *Comment) { + O << '\t' + << TAI->getCommentString() + << ' ' + << Comment; + } + O << '\n'; +} + +/// EmitULEB128Bytes - Emit an assembler byte data directive to compose an +/// unsigned leb128 value. +void AsmPrinter::EmitULEB128Bytes(unsigned Value) const { + if (TAI->hasLEB128()) { + O << "\t.uleb128\t" + << Value; + } else { + O << TAI->getData8bitsDirective(); + PrintULEB128(Value); + } +} + +/// EmitSLEB128Bytes - print an assembler byte data directive to compose a +/// signed leb128 value. +void AsmPrinter::EmitSLEB128Bytes(int Value) const { + if (TAI->hasLEB128()) { + O << "\t.sleb128\t" + << Value; + } else { + O << TAI->getData8bitsDirective(); + PrintSLEB128(Value); + } +} + +/// EmitInt8 - Emit a byte directive and value. +/// +void AsmPrinter::EmitInt8(int Value) const { + O << TAI->getData8bitsDirective(); + PrintHex(Value & 0xFF); +} + +/// EmitInt16 - Emit a short directive and value. +/// +void AsmPrinter::EmitInt16(int Value) const { + O << TAI->getData16bitsDirective(); + PrintHex(Value & 0xFFFF); +} + +/// EmitInt32 - Emit a long directive and value. +/// +void AsmPrinter::EmitInt32(int Value) const { + O << TAI->getData32bitsDirective(); + PrintHex(Value); +} + +/// EmitInt64 - Emit a long long directive and value. +/// +void AsmPrinter::EmitInt64(uint64_t Value) const { + if (TAI->getData64bitsDirective()) { + O << TAI->getData64bitsDirective(); + PrintHex(Value); + } else { + if (TM.getTargetData()->isBigEndian()) { + EmitInt32(unsigned(Value >> 32)); O << '\n'; + EmitInt32(unsigned(Value)); + } else { + EmitInt32(unsigned(Value)); O << '\n'; + EmitInt32(unsigned(Value >> 32)); + } + } +} + +/// toOctal - Convert the low order bits of X into an octal digit. +/// +static inline char toOctal(int X) { + return (X&7)+'0'; +} + +/// printStringChar - Print a char, escaped if necessary. +/// +static void printStringChar(raw_ostream &O, unsigned char C) { + if (C == '"') { + O << "\\\""; + } else if (C == '\\') { + O << "\\\\"; + } else if (isprint((unsigned char)C)) { + O << C; + } else { + switch(C) { + case '\b': O << "\\b"; break; + case '\f': O << "\\f"; break; + case '\n': O << "\\n"; break; + case '\r': O << "\\r"; break; + case '\t': O << "\\t"; break; + default: + O << '\\'; + O << toOctal(C >> 6); + O << toOctal(C >> 3); + O << toOctal(C >> 0); + break; + } + } +} + +/// EmitString - Emit a string with quotes and a null terminator. +/// Special characters are emitted properly. +/// \literal (Eg. '\t') \endliteral +void AsmPrinter::EmitString(const std::string &String) const { + EmitString(String.c_str(), String.size()); +} + +void AsmPrinter::EmitString(const char *String, unsigned Size) const { + const char* AscizDirective = TAI->getAscizDirective(); + if (AscizDirective) + O << AscizDirective; + else + O << TAI->getAsciiDirective(); + O << '\"'; + for (unsigned i = 0; i < Size; ++i) + printStringChar(O, String[i]); + if (AscizDirective) + O << '\"'; + else + O << "\\0\""; +} + + +/// EmitFile - Emit a .file directive. +void AsmPrinter::EmitFile(unsigned Number, const std::string &Name) const { + O << "\t.file\t" << Number << " \""; + for (unsigned i = 0, N = Name.size(); i < N; ++i) + printStringChar(O, Name[i]); + O << '\"'; +} + + +//===----------------------------------------------------------------------===// + +// EmitAlignment - Emit an alignment directive to the specified power of +// two boundary. For example, if you pass in 3 here, you will get an 8 +// byte alignment. If a global value is specified, and if that global has +// an explicit alignment requested, it will unconditionally override the +// alignment request. However, if ForcedAlignBits is specified, this value +// has final say: the ultimate alignment will be the max of ForcedAlignBits +// and the alignment computed with NumBits and the global. +// +// The algorithm is: +// Align = NumBits; +// if (GV && GV->hasalignment) Align = GV->getalignment(); +// Align = std::max(Align, ForcedAlignBits); +// +void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalValue *GV, + unsigned ForcedAlignBits, + bool UseFillExpr) const { + if (GV && GV->getAlignment()) + NumBits = Log2_32(GV->getAlignment()); + NumBits = std::max(NumBits, ForcedAlignBits); + + if (NumBits == 0) return; // No need to emit alignment. + if (TAI->getAlignmentIsInBytes()) NumBits = 1 << NumBits; + O << TAI->getAlignDirective() << NumBits; + + unsigned FillValue = TAI->getTextAlignFillValue(); + UseFillExpr &= IsInTextSection && FillValue; + if (UseFillExpr) { + O << ','; + PrintHex(FillValue); + } + O << '\n'; +} + + +/// EmitZeros - Emit a block of zeros. +/// +void AsmPrinter::EmitZeros(uint64_t NumZeros, unsigned AddrSpace) const { + if (NumZeros) { + if (TAI->getZeroDirective()) { + O << TAI->getZeroDirective() << NumZeros; + if (TAI->getZeroDirectiveSuffix()) + O << TAI->getZeroDirectiveSuffix(); + O << '\n'; + } else { + for (; NumZeros; --NumZeros) + O << TAI->getData8bitsDirective(AddrSpace) << "0\n"; + } + } +} + +// Print out the specified constant, without a storage class. Only the +// constants valid in constant expressions can occur here. +void AsmPrinter::EmitConstantValueOnly(const Constant *CV) { + if (CV->isNullValue() || isa(CV)) + O << '0'; + else if (const ConstantInt *CI = dyn_cast(CV)) { + O << CI->getZExtValue(); + } else if (const GlobalValue *GV = dyn_cast(CV)) { + // This is a constant address for a global variable or function. Use the + // name of the variable or function as the address value, possibly + // decorating it with GlobalVarAddrPrefix/Suffix or + // FunctionAddrPrefix/Suffix (these all default to "" ) + if (isa(GV)) { + O << TAI->getFunctionAddrPrefix() + << Mang->getValueName(GV) + << TAI->getFunctionAddrSuffix(); + } else { + O << TAI->getGlobalVarAddrPrefix() + << Mang->getValueName(GV) + << TAI->getGlobalVarAddrSuffix(); + } + } else if (const ConstantExpr *CE = dyn_cast(CV)) { + const TargetData *TD = TM.getTargetData(); + unsigned Opcode = CE->getOpcode(); + switch (Opcode) { + case Instruction::GetElementPtr: { + // generate a symbolic expression for the byte address + const Constant *ptrVal = CE->getOperand(0); + SmallVector idxVec(CE->op_begin()+1, CE->op_end()); + if (int64_t Offset = TD->getIndexedOffset(ptrVal->getType(), &idxVec[0], + idxVec.size())) { + // Truncate/sext the offset to the pointer size. + if (TD->getPointerSizeInBits() != 64) { + int SExtAmount = 64-TD->getPointerSizeInBits(); + Offset = (Offset << SExtAmount) >> SExtAmount; + } + + if (Offset) + O << '('; + EmitConstantValueOnly(ptrVal); + if (Offset > 0) + O << ") + " << Offset; + else if (Offset < 0) + O << ") - " << -Offset; + } else { + EmitConstantValueOnly(ptrVal); + } + break; + } + case Instruction::Trunc: + case Instruction::ZExt: + case Instruction::SExt: + case Instruction::FPTrunc: + case Instruction::FPExt: + case Instruction::UIToFP: + case Instruction::SIToFP: + case Instruction::FPToUI: + case Instruction::FPToSI: + assert(0 && "FIXME: Don't yet support this kind of constant cast expr"); + break; + case Instruction::BitCast: + return EmitConstantValueOnly(CE->getOperand(0)); + + case Instruction::IntToPtr: { + // Handle casts to pointers by changing them into casts to the appropriate + // integer type. This promotes constant folding and simplifies this code. + Constant *Op = CE->getOperand(0); + Op = ConstantExpr::getIntegerCast(Op, TD->getIntPtrType(), false/*ZExt*/); + return EmitConstantValueOnly(Op); + } + + + case Instruction::PtrToInt: { + // Support only foldable casts to/from pointers that can be eliminated by + // changing the pointer to the appropriately sized integer type. + Constant *Op = CE->getOperand(0); + const Type *Ty = CE->getType(); + + // We can emit the pointer value into this slot if the slot is an + // integer slot greater or equal to the size of the pointer. + if (TD->getTypeAllocSize(Ty) >= TD->getTypeAllocSize(Op->getType())) + return EmitConstantValueOnly(Op); + + O << "(("; + EmitConstantValueOnly(Op); + APInt ptrMask = APInt::getAllOnesValue(TD->getTypeAllocSizeInBits(Ty)); + + SmallString<40> S; + ptrMask.toStringUnsigned(S); + O << ") & " << S.c_str() << ')'; + break; + } + case Instruction::Add: + case Instruction::Sub: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + O << '('; + EmitConstantValueOnly(CE->getOperand(0)); + O << ')'; + switch (Opcode) { + case Instruction::Add: + O << " + "; + break; + case Instruction::Sub: + O << " - "; + break; + case Instruction::And: + O << " & "; + break; + case Instruction::Or: + O << " | "; + break; + case Instruction::Xor: + O << " ^ "; + break; + default: + break; + } + O << '('; + EmitConstantValueOnly(CE->getOperand(1)); + O << ')'; + break; + default: + assert(0 && "Unsupported operator!"); + } + } else { + assert(0 && "Unknown constant value!"); + } +} + +/// printAsCString - Print the specified array as a C compatible string, only if +/// the predicate isString is true. +/// +static void printAsCString(raw_ostream &O, const ConstantArray *CVA, + unsigned LastElt) { + assert(CVA->isString() && "Array is not string compatible!"); + + O << '\"'; + for (unsigned i = 0; i != LastElt; ++i) { + unsigned char C = + (unsigned char)cast(CVA->getOperand(i))->getZExtValue(); + printStringChar(O, C); + } + O << '\"'; +} + +/// EmitString - Emit a zero-byte-terminated string constant. +/// +void AsmPrinter::EmitString(const ConstantArray *CVA) const { + unsigned NumElts = CVA->getNumOperands(); + if (TAI->getAscizDirective() && NumElts && + cast(CVA->getOperand(NumElts-1))->getZExtValue() == 0) { + O << TAI->getAscizDirective(); + printAsCString(O, CVA, NumElts-1); + } else { + O << TAI->getAsciiDirective(); + printAsCString(O, CVA, NumElts); + } + O << '\n'; +} + +void AsmPrinter::EmitGlobalConstantArray(const ConstantArray *CVA, + unsigned AddrSpace) { + if (CVA->isString()) { + EmitString(CVA); + } else { // Not a string. Print the values in successive locations + for (unsigned i = 0, e = CVA->getNumOperands(); i != e; ++i) + EmitGlobalConstant(CVA->getOperand(i), AddrSpace); + } +} + +void AsmPrinter::EmitGlobalConstantVector(const ConstantVector *CP) { + const VectorType *PTy = CP->getType(); + + for (unsigned I = 0, E = PTy->getNumElements(); I < E; ++I) + EmitGlobalConstant(CP->getOperand(I)); +} + +void AsmPrinter::EmitGlobalConstantStruct(const ConstantStruct *CVS, + unsigned AddrSpace) { + // Print the fields in successive locations. Pad to align if needed! + const TargetData *TD = TM.getTargetData(); + unsigned Size = TD->getTypeAllocSize(CVS->getType()); + const StructLayout *cvsLayout = TD->getStructLayout(CVS->getType()); + uint64_t sizeSoFar = 0; + for (unsigned i = 0, e = CVS->getNumOperands(); i != e; ++i) { + const Constant* field = CVS->getOperand(i); + + // Check if padding is needed and insert one or more 0s. + uint64_t fieldSize = TD->getTypeAllocSize(field->getType()); + uint64_t padSize = ((i == e-1 ? Size : cvsLayout->getElementOffset(i+1)) + - cvsLayout->getElementOffset(i)) - fieldSize; + sizeSoFar += fieldSize + padSize; + + // Now print the actual field value. + EmitGlobalConstant(field, AddrSpace); + + // Insert padding - this may include padding to increase the size of the + // current field up to the ABI size (if the struct is not packed) as well + // as padding to ensure that the next field starts at the right offset. + EmitZeros(padSize, AddrSpace); + } + assert(sizeSoFar == cvsLayout->getSizeInBytes() && + "Layout of constant struct may be incorrect!"); +} + +void AsmPrinter::EmitGlobalConstantFP(const ConstantFP *CFP, + unsigned AddrSpace) { + // FP Constants are printed as integer constants to avoid losing + // precision... + const TargetData *TD = TM.getTargetData(); + if (CFP->getType() == Type::DoubleTy) { + double Val = CFP->getValueAPF().convertToDouble(); // for comment only + uint64_t i = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); + if (TAI->getData64bitsDirective(AddrSpace)) { + O << TAI->getData64bitsDirective(AddrSpace) << i; + if (VerboseAsm) + O << '\t' << TAI->getCommentString() << " double value: " << Val; + O << '\n'; + } else if (TD->isBigEndian()) { + O << TAI->getData32bitsDirective(AddrSpace) << unsigned(i >> 32); + if (VerboseAsm) + O << '\t' << TAI->getCommentString() + << " double most significant word " << Val; + O << '\n'; + O << TAI->getData32bitsDirective(AddrSpace) << unsigned(i); + if (VerboseAsm) + O << '\t' << TAI->getCommentString() + << " double least significant word " << Val; + O << '\n'; + } else { + O << TAI->getData32bitsDirective(AddrSpace) << unsigned(i); + if (VerboseAsm) + O << '\t' << TAI->getCommentString() + << " double least significant word " << Val; + O << '\n'; + O << TAI->getData32bitsDirective(AddrSpace) << unsigned(i >> 32); + if (VerboseAsm) + O << '\t' << TAI->getCommentString() + << " double most significant word " << Val; + O << '\n'; + } + return; + } else if (CFP->getType() == Type::FloatTy) { + float Val = CFP->getValueAPF().convertToFloat(); // for comment only + O << TAI->getData32bitsDirective(AddrSpace) + << CFP->getValueAPF().bitcastToAPInt().getZExtValue(); + if (VerboseAsm) + O << '\t' << TAI->getCommentString() << " float " << Val; + O << '\n'; + return; + } else if (CFP->getType() == Type::X86_FP80Ty) { + // all long double variants are printed as hex + // api needed to prevent premature destruction + APInt api = CFP->getValueAPF().bitcastToAPInt(); + const uint64_t *p = api.getRawData(); + // Convert to double so we can print the approximate val as a comment. + APFloat DoubleVal = CFP->getValueAPF(); + bool ignored; + DoubleVal.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, + &ignored); + if (TD->isBigEndian()) { + O << TAI->getData16bitsDirective(AddrSpace) << uint16_t(p[1]); + if (VerboseAsm) + O << '\t' << TAI->getCommentString() + << " long double most significant halfword of ~" + << DoubleVal.convertToDouble(); + O << '\n'; + O << TAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0] >> 48); + if (VerboseAsm) + O << '\t' << TAI->getCommentString() << " long double next halfword"; + O << '\n'; + O << TAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0] >> 32); + if (VerboseAsm) + O << '\t' << TAI->getCommentString() << " long double next halfword"; + O << '\n'; + O << TAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0] >> 16); + if (VerboseAsm) + O << '\t' << TAI->getCommentString() << " long double next halfword"; + O << '\n'; + O << TAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0]); + if (VerboseAsm) + O << '\t' << TAI->getCommentString() + << " long double least significant halfword"; + O << '\n'; + } else { + O << TAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0]); + if (VerboseAsm) + O << '\t' << TAI->getCommentString() + << " long double least significant halfword of ~" + << DoubleVal.convertToDouble(); + O << '\n'; + O << TAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0] >> 16); + if (VerboseAsm) + O << '\t' << TAI->getCommentString() + << " long double next halfword"; + O << '\n'; + O << TAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0] >> 32); + if (VerboseAsm) + O << '\t' << TAI->getCommentString() + << " long double next halfword"; + O << '\n'; + O << TAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0] >> 48); + if (VerboseAsm) + O << '\t' << TAI->getCommentString() + << " long double next halfword"; + O << '\n'; + O << TAI->getData16bitsDirective(AddrSpace) << uint16_t(p[1]); + if (VerboseAsm) + O << '\t' << TAI->getCommentString() + << " long double most significant halfword"; + O << '\n'; + } + EmitZeros(TD->getTypeAllocSize(Type::X86_FP80Ty) - + TD->getTypeStoreSize(Type::X86_FP80Ty), AddrSpace); + return; + } else if (CFP->getType() == Type::PPC_FP128Ty) { + // all long double variants are printed as hex + // api needed to prevent premature destruction + APInt api = CFP->getValueAPF().bitcastToAPInt(); + const uint64_t *p = api.getRawData(); + if (TD->isBigEndian()) { + O << TAI->getData32bitsDirective(AddrSpace) << uint32_t(p[0] >> 32); + if (VerboseAsm) + O << '\t' << TAI->getCommentString() + << " long double most significant word"; + O << '\n'; + O << TAI->getData32bitsDirective(AddrSpace) << uint32_t(p[0]); + if (VerboseAsm) + O << '\t' << TAI->getCommentString() + << " long double next word"; + O << '\n'; + O << TAI->getData32bitsDirective(AddrSpace) << uint32_t(p[1] >> 32); + if (VerboseAsm) + O << '\t' << TAI->getCommentString() + << " long double next word"; + O << '\n'; + O << TAI->getData32bitsDirective(AddrSpace) << uint32_t(p[1]); + if (VerboseAsm) + O << '\t' << TAI->getCommentString() + << " long double least significant word"; + O << '\n'; + } else { + O << TAI->getData32bitsDirective(AddrSpace) << uint32_t(p[1]); + if (VerboseAsm) + O << '\t' << TAI->getCommentString() + << " long double least significant word"; + O << '\n'; + O << TAI->getData32bitsDirective(AddrSpace) << uint32_t(p[1] >> 32); + if (VerboseAsm) + O << '\t' << TAI->getCommentString() + << " long double next word"; + O << '\n'; + O << TAI->getData32bitsDirective(AddrSpace) << uint32_t(p[0]); + if (VerboseAsm) + O << '\t' << TAI->getCommentString() + << " long double next word"; + O << '\n'; + O << TAI->getData32bitsDirective(AddrSpace) << uint32_t(p[0] >> 32); + if (VerboseAsm) + O << '\t' << TAI->getCommentString() + << " long double most significant word"; + O << '\n'; + } + return; + } else assert(0 && "Floating point constant type not handled"); +} + +void AsmPrinter::EmitGlobalConstantLargeInt(const ConstantInt *CI, + unsigned AddrSpace) { + const TargetData *TD = TM.getTargetData(); + unsigned BitWidth = CI->getBitWidth(); + assert(isPowerOf2_32(BitWidth) && + "Non-power-of-2-sized integers not handled!"); + + // We don't expect assemblers to support integer data directives + // for more than 64 bits, so we emit the data in at most 64-bit + // quantities at a time. + const uint64_t *RawData = CI->getValue().getRawData(); + for (unsigned i = 0, e = BitWidth / 64; i != e; ++i) { + uint64_t Val; + if (TD->isBigEndian()) + Val = RawData[e - i - 1]; + else + Val = RawData[i]; + + if (TAI->getData64bitsDirective(AddrSpace)) + O << TAI->getData64bitsDirective(AddrSpace) << Val << '\n'; + else if (TD->isBigEndian()) { + O << TAI->getData32bitsDirective(AddrSpace) << unsigned(Val >> 32); + if (VerboseAsm) + O << '\t' << TAI->getCommentString() + << " Double-word most significant word " << Val; + O << '\n'; + O << TAI->getData32bitsDirective(AddrSpace) << unsigned(Val); + if (VerboseAsm) + O << '\t' << TAI->getCommentString() + << " Double-word least significant word " << Val; + O << '\n'; + } else { + O << TAI->getData32bitsDirective(AddrSpace) << unsigned(Val); + if (VerboseAsm) + O << '\t' << TAI->getCommentString() + << " Double-word least significant word " << Val; + O << '\n'; + O << TAI->getData32bitsDirective(AddrSpace) << unsigned(Val >> 32); + if (VerboseAsm) + O << '\t' << TAI->getCommentString() + << " Double-word most significant word " << Val; + O << '\n'; + } + } +} + +/// EmitGlobalConstant - Print a general LLVM constant to the .s file. +void AsmPrinter::EmitGlobalConstant(const Constant *CV, unsigned AddrSpace) { + const TargetData *TD = TM.getTargetData(); + const Type *type = CV->getType(); + unsigned Size = TD->getTypeAllocSize(type); + + if (CV->isNullValue() || isa(CV)) { + EmitZeros(Size, AddrSpace); + return; + } else if (const ConstantArray *CVA = dyn_cast(CV)) { + EmitGlobalConstantArray(CVA , AddrSpace); + return; + } else if (const ConstantStruct *CVS = dyn_cast(CV)) { + EmitGlobalConstantStruct(CVS, AddrSpace); + return; + } else if (const ConstantFP *CFP = dyn_cast(CV)) { + EmitGlobalConstantFP(CFP, AddrSpace); + return; + } else if (const ConstantInt *CI = dyn_cast(CV)) { + // Small integers are handled below; large integers are handled here. + if (Size > 4) { + EmitGlobalConstantLargeInt(CI, AddrSpace); + return; + } + } else if (const ConstantVector *CP = dyn_cast(CV)) { + EmitGlobalConstantVector(CP); + return; + } + + printDataDirective(type, AddrSpace); + EmitConstantValueOnly(CV); + if (VerboseAsm) { + if (const ConstantInt *CI = dyn_cast(CV)) { + SmallString<40> S; + CI->getValue().toStringUnsigned(S, 16); + O << "\t\t\t" << TAI->getCommentString() << " 0x" << S.c_str(); + } + } + O << '\n'; +} + +void AsmPrinter::EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) { + // Target doesn't support this yet! + abort(); +} + +/// PrintSpecial - Print information related to the specified machine instr +/// that is independent of the operand, and may be independent of the instr +/// itself. This can be useful for portably encoding the comment character +/// or other bits of target-specific knowledge into the asmstrings. The +/// syntax used is ${:comment}. Targets can override this to add support +/// for their own strange codes. +void AsmPrinter::PrintSpecial(const MachineInstr *MI, const char *Code) const { + if (!strcmp(Code, "private")) { + O << TAI->getPrivateGlobalPrefix(); + } else if (!strcmp(Code, "comment")) { + if (VerboseAsm) + O << TAI->getCommentString(); + } else if (!strcmp(Code, "uid")) { + // Assign a unique ID to this machine instruction. + static const MachineInstr *LastMI = 0; + static const Function *F = 0; + static unsigned Counter = 0U-1; + + // Comparing the address of MI isn't sufficient, because machineinstrs may + // be allocated to the same address across functions. + const Function *ThisF = MI->getParent()->getParent()->getFunction(); + + // If this is a new machine instruction, bump the counter. + if (LastMI != MI || F != ThisF) { + ++Counter; + LastMI = MI; + F = ThisF; + } + O << Counter; + } else { + cerr << "Unknown special formatter '" << Code + << "' for machine instr: " << *MI; + exit(1); + } +} + +/// processDebugLoc - Processes the debug information of each machine +/// instruction's DebugLoc. +void AsmPrinter::processDebugLoc(DebugLoc DL) { + if (TAI->doesSupportDebugInformation() && DW->ShouldEmitDwarfDebug()) { + if (!DL.isUnknown()) { + static DebugLocTuple PrevDLT(0, ~0U, ~0U); + DebugLocTuple CurDLT = MF->getDebugLocTuple(DL); + + if (CurDLT.CompileUnit != 0 && PrevDLT != CurDLT) + printLabel(DW->RecordSourceLine(CurDLT.Line, CurDLT.Col, + DICompileUnit(CurDLT.CompileUnit))); + + PrevDLT = CurDLT; + } + } +} + +/// printInlineAsm - This method formats and prints the specified machine +/// instruction that is an inline asm. +void AsmPrinter::printInlineAsm(const MachineInstr *MI) const { + unsigned NumOperands = MI->getNumOperands(); + + // Count the number of register definitions. + unsigned NumDefs = 0; + for (; MI->getOperand(NumDefs).isReg() && MI->getOperand(NumDefs).isDef(); + ++NumDefs) + assert(NumDefs != NumOperands-1 && "No asm string?"); + + assert(MI->getOperand(NumDefs).isSymbol() && "No asm string?"); + + // Disassemble the AsmStr, printing out the literal pieces, the operands, etc. + const char *AsmStr = MI->getOperand(NumDefs).getSymbolName(); + + // If this asmstr is empty, just print the #APP/#NOAPP markers. + // These are useful to see where empty asm's wound up. + if (AsmStr[0] == 0) { + O << TAI->getInlineAsmStart() << "\n\t" << TAI->getInlineAsmEnd() << '\n'; + return; + } + + O << TAI->getInlineAsmStart() << "\n\t"; + + // The variant of the current asmprinter. + int AsmPrinterVariant = TAI->getAssemblerDialect(); + + int CurVariant = -1; // The number of the {.|.|.} region we are in. + const char *LastEmitted = AsmStr; // One past the last character emitted. + + while (*LastEmitted) { + switch (*LastEmitted) { + default: { + // Not a special case, emit the string section literally. + const char *LiteralEnd = LastEmitted+1; + while (*LiteralEnd && *LiteralEnd != '{' && *LiteralEnd != '|' && + *LiteralEnd != '}' && *LiteralEnd != '$' && *LiteralEnd != '\n') + ++LiteralEnd; + if (CurVariant == -1 || CurVariant == AsmPrinterVariant) + O.write(LastEmitted, LiteralEnd-LastEmitted); + LastEmitted = LiteralEnd; + break; + } + case '\n': + ++LastEmitted; // Consume newline character. + O << '\n'; // Indent code with newline. + break; + case '$': { + ++LastEmitted; // Consume '$' character. + bool Done = true; + + // Handle escapes. + switch (*LastEmitted) { + default: Done = false; break; + case '$': // $$ -> $ + if (CurVariant == -1 || CurVariant == AsmPrinterVariant) + O << '$'; + ++LastEmitted; // Consume second '$' character. + break; + case '(': // $( -> same as GCC's { character. + ++LastEmitted; // Consume '(' character. + if (CurVariant != -1) { + cerr << "Nested variants found in inline asm string: '" + << AsmStr << "'\n"; + exit(1); + } + CurVariant = 0; // We're in the first variant now. + break; + case '|': + ++LastEmitted; // consume '|' character. + if (CurVariant == -1) + O << '|'; // this is gcc's behavior for | outside a variant + else + ++CurVariant; // We're in the next variant. + break; + case ')': // $) -> same as GCC's } char. + ++LastEmitted; // consume ')' character. + if (CurVariant == -1) + O << '}'; // this is gcc's behavior for } outside a variant + else + CurVariant = -1; + break; + } + if (Done) break; + + bool HasCurlyBraces = false; + if (*LastEmitted == '{') { // ${variable} + ++LastEmitted; // Consume '{' character. + HasCurlyBraces = true; + } + + // If we have ${:foo}, then this is not a real operand reference, it is a + // "magic" string reference, just like in .td files. Arrange to call + // PrintSpecial. + if (HasCurlyBraces && *LastEmitted == ':') { + ++LastEmitted; + const char *StrStart = LastEmitted; + const char *StrEnd = strchr(StrStart, '}'); + if (StrEnd == 0) { + cerr << "Unterminated ${:foo} operand in inline asm string: '" + << AsmStr << "'\n"; + exit(1); + } + + std::string Val(StrStart, StrEnd); + PrintSpecial(MI, Val.c_str()); + LastEmitted = StrEnd+1; + break; + } + + const char *IDStart = LastEmitted; + char *IDEnd; + errno = 0; + long Val = strtol(IDStart, &IDEnd, 10); // We only accept numbers for IDs. + if (!isdigit(*IDStart) || (Val == 0 && errno == EINVAL)) { + cerr << "Bad $ operand number in inline asm string: '" + << AsmStr << "'\n"; + exit(1); + } + LastEmitted = IDEnd; + + char Modifier[2] = { 0, 0 }; + + if (HasCurlyBraces) { + // If we have curly braces, check for a modifier character. This + // supports syntax like ${0:u}, which correspond to "%u0" in GCC asm. + if (*LastEmitted == ':') { + ++LastEmitted; // Consume ':' character. + if (*LastEmitted == 0) { + cerr << "Bad ${:} expression in inline asm string: '" + << AsmStr << "'\n"; + exit(1); + } + + Modifier[0] = *LastEmitted; + ++LastEmitted; // Consume modifier character. + } + + if (*LastEmitted != '}') { + cerr << "Bad ${} expression in inline asm string: '" + << AsmStr << "'\n"; + exit(1); + } + ++LastEmitted; // Consume '}' character. + } + + if ((unsigned)Val >= NumOperands-1) { + cerr << "Invalid $ operand number in inline asm string: '" + << AsmStr << "'\n"; + exit(1); + } + + // Okay, we finally have a value number. Ask the target to print this + // operand! + if (CurVariant == -1 || CurVariant == AsmPrinterVariant) { + unsigned OpNo = 1; + + bool Error = false; + + // Scan to find the machine operand number for the operand. + for (; Val; --Val) { + if (OpNo >= MI->getNumOperands()) break; + unsigned OpFlags = MI->getOperand(OpNo).getImm(); + OpNo += InlineAsm::getNumOperandRegisters(OpFlags) + 1; + } + + if (OpNo >= MI->getNumOperands()) { + Error = true; + } else { + unsigned OpFlags = MI->getOperand(OpNo).getImm(); + ++OpNo; // Skip over the ID number. + + if (Modifier[0]=='l') // labels are target independent + printBasicBlockLabel(MI->getOperand(OpNo).getMBB(), + false, false, false); + else { + AsmPrinter *AP = const_cast(this); + if ((OpFlags & 7) == 4) { + Error = AP->PrintAsmMemoryOperand(MI, OpNo, AsmPrinterVariant, + Modifier[0] ? Modifier : 0); + } else { + Error = AP->PrintAsmOperand(MI, OpNo, AsmPrinterVariant, + Modifier[0] ? Modifier : 0); + } + } + } + if (Error) { + cerr << "Invalid operand found in inline asm: '" + << AsmStr << "'\n"; + MI->dump(); + exit(1); + } + } + break; + } + } + } + O << "\n\t" << TAI->getInlineAsmEnd() << '\n'; +} + +/// printImplicitDef - This method prints the specified machine instruction +/// that is an implicit def. +void AsmPrinter::printImplicitDef(const MachineInstr *MI) const { + if (VerboseAsm) + O << '\t' << TAI->getCommentString() << " implicit-def: " + << TRI->getAsmName(MI->getOperand(0).getReg()) << '\n'; +} + +/// printLabel - This method prints a local label used by debug and +/// exception handling tables. +void AsmPrinter::printLabel(const MachineInstr *MI) const { + printLabel(MI->getOperand(0).getImm()); +} + +void AsmPrinter::printLabel(unsigned Id) const { + O << TAI->getPrivateGlobalPrefix() << "label" << Id << ":\n"; +} + +/// printDeclare - This method prints a local variable declaration used by +/// debug tables. +/// FIXME: It doesn't really print anything rather it inserts a DebugVariable +/// entry into dwarf table. +void AsmPrinter::printDeclare(const MachineInstr *MI) const { + unsigned FI = MI->getOperand(0).getIndex(); + GlobalValue *GV = MI->getOperand(1).getGlobal(); + DW->RecordVariable(cast(GV), FI, MI); +} + +/// PrintAsmOperand - Print the specified operand of MI, an INLINEASM +/// instruction, using the specified assembler variant. Targets should +/// overried this to format as appropriate. +bool AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode) { + // Target doesn't support this yet! + return true; +} + +bool AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, + const char *ExtraCode) { + // Target doesn't support this yet! + return true; +} + +/// printBasicBlockLabel - This method prints the label for the specified +/// MachineBasicBlock +void AsmPrinter::printBasicBlockLabel(const MachineBasicBlock *MBB, + bool printAlign, + bool printColon, + bool printComment) const { + if (printAlign) { + unsigned Align = MBB->getAlignment(); + if (Align) + EmitAlignment(Log2_32(Align)); + } + + O << TAI->getPrivateGlobalPrefix() << "BB" << getFunctionNumber() << '_' + << MBB->getNumber(); + if (printColon) + O << ':'; + if (printComment && MBB->getBasicBlock()) + O << '\t' << TAI->getCommentString() << ' ' + << MBB->getBasicBlock()->getNameStart(); +} + +/// printPICJumpTableSetLabel - This method prints a set label for the +/// specified MachineBasicBlock for a jumptable entry. +void AsmPrinter::printPICJumpTableSetLabel(unsigned uid, + const MachineBasicBlock *MBB) const { + if (!TAI->getSetDirective()) + return; + + O << TAI->getSetDirective() << ' ' << TAI->getPrivateGlobalPrefix() + << getFunctionNumber() << '_' << uid << "_set_" << MBB->getNumber() << ','; + printBasicBlockLabel(MBB, false, false, false); + O << '-' << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() + << '_' << uid << '\n'; +} + +void AsmPrinter::printPICJumpTableSetLabel(unsigned uid, unsigned uid2, + const MachineBasicBlock *MBB) const { + if (!TAI->getSetDirective()) + return; + + O << TAI->getSetDirective() << ' ' << TAI->getPrivateGlobalPrefix() + << getFunctionNumber() << '_' << uid << '_' << uid2 + << "_set_" << MBB->getNumber() << ','; + printBasicBlockLabel(MBB, false, false, false); + O << '-' << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() + << '_' << uid << '_' << uid2 << '\n'; +} + +/// printDataDirective - This method prints the asm directive for the +/// specified type. +void AsmPrinter::printDataDirective(const Type *type, unsigned AddrSpace) { + const TargetData *TD = TM.getTargetData(); + switch (type->getTypeID()) { + case Type::IntegerTyID: { + unsigned BitWidth = cast(type)->getBitWidth(); + if (BitWidth <= 8) + O << TAI->getData8bitsDirective(AddrSpace); + else if (BitWidth <= 16) + O << TAI->getData16bitsDirective(AddrSpace); + else if (BitWidth <= 32) + O << TAI->getData32bitsDirective(AddrSpace); + else if (BitWidth <= 64) { + assert(TAI->getData64bitsDirective(AddrSpace) && + "Target cannot handle 64-bit constant exprs!"); + O << TAI->getData64bitsDirective(AddrSpace); + } else { + assert(0 && "Target cannot handle given data directive width!"); + } + break; + } + case Type::PointerTyID: + if (TD->getPointerSize() == 8) { + assert(TAI->getData64bitsDirective(AddrSpace) && + "Target cannot handle 64-bit pointer exprs!"); + O << TAI->getData64bitsDirective(AddrSpace); + } else if (TD->getPointerSize() == 2) { + O << TAI->getData16bitsDirective(AddrSpace); + } else if (TD->getPointerSize() == 1) { + O << TAI->getData8bitsDirective(AddrSpace); + } else { + O << TAI->getData32bitsDirective(AddrSpace); + } + break; + case Type::FloatTyID: case Type::DoubleTyID: + case Type::X86_FP80TyID: case Type::FP128TyID: case Type::PPC_FP128TyID: + assert (0 && "Should have already output floating point constant."); + default: + assert (0 && "Can't handle printing this type of thing"); + break; + } +} + +void AsmPrinter::printSuffixedName(const char *Name, const char *Suffix, + const char *Prefix) { + if (Name[0]=='\"') + O << '\"'; + O << TAI->getPrivateGlobalPrefix(); + if (Prefix) O << Prefix; + if (Name[0]=='\"') + O << '\"'; + if (Name[0]=='\"') + O << Name[1]; + else + O << Name; + O << Suffix; + if (Name[0]=='\"') + O << '\"'; +} + +void AsmPrinter::printSuffixedName(const std::string &Name, const char* Suffix) { + printSuffixedName(Name.c_str(), Suffix); +} + +void AsmPrinter::printVisibility(const std::string& Name, + unsigned Visibility) const { + if (Visibility == GlobalValue::HiddenVisibility) { + if (const char *Directive = TAI->getHiddenDirective()) + O << Directive << Name << '\n'; + } else if (Visibility == GlobalValue::ProtectedVisibility) { + if (const char *Directive = TAI->getProtectedDirective()) + O << Directive << Name << '\n'; + } +} + +void AsmPrinter::printOffset(int64_t Offset) const { + if (Offset > 0) + O << '+' << Offset; + else if (Offset < 0) + O << Offset; +} + +GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy *S) { + if (!S->usesMetadata()) + return 0; + + gcp_iterator GCPI = GCMetadataPrinters.find(S); + if (GCPI != GCMetadataPrinters.end()) + return GCPI->second; + + const char *Name = S->getName().c_str(); + + for (GCMetadataPrinterRegistry::iterator + I = GCMetadataPrinterRegistry::begin(), + E = GCMetadataPrinterRegistry::end(); I != E; ++I) + if (strcmp(Name, I->getName()) == 0) { + GCMetadataPrinter *GMP = I->instantiate(); + GMP->S = S; + GCMetadataPrinters.insert(std::make_pair(S, GMP)); + return GMP; + } + + cerr << "no GCMetadataPrinter registered for GC: " << Name << "\n"; + abort(); +} diff --git a/lib/CodeGen/AsmPrinter/CMakeLists.txt b/lib/CodeGen/AsmPrinter/CMakeLists.txt new file mode 100644 index 000000000000..066aaab48cc7 --- /dev/null +++ b/lib/CodeGen/AsmPrinter/CMakeLists.txt @@ -0,0 +1,10 @@ +add_llvm_library(LLVMAsmPrinter + AsmPrinter.cpp + DIE.cpp + DwarfDebug.cpp + DwarfException.cpp + DwarfLabel.cpp + DwarfPrinter.cpp + DwarfWriter.cpp + OcamlGCPrinter.cpp + ) diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp new file mode 100644 index 000000000000..dc149cf8bc52 --- /dev/null +++ b/lib/CodeGen/AsmPrinter/DIE.cpp @@ -0,0 +1,518 @@ +//===--- lib/CodeGen/DIE.cpp - DWARF Info Entries -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Data structures for DWARF info entries. +// +//===----------------------------------------------------------------------===// + +#include "DIE.h" +#include "DwarfPrinter.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/Target/TargetData.h" +#include +using namespace llvm; + +//===----------------------------------------------------------------------===// +// DIEAbbrevData Implementation +//===----------------------------------------------------------------------===// + +/// Profile - Used to gather unique data for the abbreviation folding set. +/// +void DIEAbbrevData::Profile(FoldingSetNodeID &ID) const { + ID.AddInteger(Attribute); + ID.AddInteger(Form); +} + +//===----------------------------------------------------------------------===// +// DIEAbbrev Implementation +//===----------------------------------------------------------------------===// + +/// Profile - Used to gather unique data for the abbreviation folding set. +/// +void DIEAbbrev::Profile(FoldingSetNodeID &ID) const { + ID.AddInteger(Tag); + ID.AddInteger(ChildrenFlag); + + // For each attribute description. + for (unsigned i = 0, N = Data.size(); i < N; ++i) + Data[i].Profile(ID); +} + +/// Emit - Print the abbreviation using the specified asm printer. +/// +void DIEAbbrev::Emit(const AsmPrinter *Asm) const { + // Emit its Dwarf tag type. + Asm->EmitULEB128Bytes(Tag); + Asm->EOL(dwarf::TagString(Tag)); + + // Emit whether it has children DIEs. + Asm->EmitULEB128Bytes(ChildrenFlag); + Asm->EOL(dwarf::ChildrenString(ChildrenFlag)); + + // For each attribute description. + for (unsigned i = 0, N = Data.size(); i < N; ++i) { + const DIEAbbrevData &AttrData = Data[i]; + + // Emit attribute type. + Asm->EmitULEB128Bytes(AttrData.getAttribute()); + Asm->EOL(dwarf::AttributeString(AttrData.getAttribute())); + + // Emit form type. + Asm->EmitULEB128Bytes(AttrData.getForm()); + Asm->EOL(dwarf::FormEncodingString(AttrData.getForm())); + } + + // Mark end of abbreviation. + Asm->EmitULEB128Bytes(0); Asm->EOL("EOM(1)"); + Asm->EmitULEB128Bytes(0); Asm->EOL("EOM(2)"); +} + +#ifndef NDEBUG +void DIEAbbrev::print(std::ostream &O) { + O << "Abbreviation @" + << std::hex << (intptr_t)this << std::dec + << " " + << dwarf::TagString(Tag) + << " " + << dwarf::ChildrenString(ChildrenFlag) + << "\n"; + + for (unsigned i = 0, N = Data.size(); i < N; ++i) { + O << " " + << dwarf::AttributeString(Data[i].getAttribute()) + << " " + << dwarf::FormEncodingString(Data[i].getForm()) + << "\n"; + } +} +void DIEAbbrev::dump() { print(cerr); } +#endif + +//===----------------------------------------------------------------------===// +// DIE Implementation +//===----------------------------------------------------------------------===// + +DIE::~DIE() { + for (unsigned i = 0, N = Children.size(); i < N; ++i) + delete Children[i]; +} + +/// AddSiblingOffset - Add a sibling offset field to the front of the DIE. +/// +void DIE::AddSiblingOffset() { + DIEInteger *DI = new DIEInteger(0); + Values.insert(Values.begin(), DI); + Abbrev.AddFirstAttribute(dwarf::DW_AT_sibling, dwarf::DW_FORM_ref4); +} + +/// Profile - Used to gather unique data for the value folding set. +/// +void DIE::Profile(FoldingSetNodeID &ID) { + Abbrev.Profile(ID); + + for (unsigned i = 0, N = Children.size(); i < N; ++i) + ID.AddPointer(Children[i]); + + for (unsigned j = 0, M = Values.size(); j < M; ++j) + ID.AddPointer(Values[j]); +} + +#ifndef NDEBUG +void DIE::print(std::ostream &O, unsigned IncIndent) { + static unsigned IndentCount = 0; + IndentCount += IncIndent; + const std::string Indent(IndentCount, ' '); + bool isBlock = Abbrev.getTag() == 0; + + if (!isBlock) { + O << Indent + << "Die: " + << "0x" << std::hex << (intptr_t)this << std::dec + << ", Offset: " << Offset + << ", Size: " << Size + << "\n"; + + O << Indent + << dwarf::TagString(Abbrev.getTag()) + << " " + << dwarf::ChildrenString(Abbrev.getChildrenFlag()); + } else { + O << "Size: " << Size; + } + O << "\n"; + + const SmallVector &Data = Abbrev.getData(); + + IndentCount += 2; + for (unsigned i = 0, N = Data.size(); i < N; ++i) { + O << Indent; + + if (!isBlock) + O << dwarf::AttributeString(Data[i].getAttribute()); + else + O << "Blk[" << i << "]"; + + O << " " + << dwarf::FormEncodingString(Data[i].getForm()) + << " "; + Values[i]->print(O); + O << "\n"; + } + IndentCount -= 2; + + for (unsigned j = 0, M = Children.size(); j < M; ++j) { + Children[j]->print(O, 4); + } + + if (!isBlock) O << "\n"; + IndentCount -= IncIndent; +} + +void DIE::dump() { + print(cerr); +} +#endif + + +#ifndef NDEBUG +void DIEValue::dump() { + print(cerr); +} +#endif + +//===----------------------------------------------------------------------===// +// DIEInteger Implementation +//===----------------------------------------------------------------------===// + +/// EmitValue - Emit integer of appropriate size. +/// +void DIEInteger::EmitValue(Dwarf *D, unsigned Form) const { + const AsmPrinter *Asm = D->getAsm(); + switch (Form) { + case dwarf::DW_FORM_flag: // Fall thru + case dwarf::DW_FORM_ref1: // Fall thru + case dwarf::DW_FORM_data1: Asm->EmitInt8(Integer); break; + case dwarf::DW_FORM_ref2: // Fall thru + case dwarf::DW_FORM_data2: Asm->EmitInt16(Integer); break; + case dwarf::DW_FORM_ref4: // Fall thru + case dwarf::DW_FORM_data4: Asm->EmitInt32(Integer); break; + case dwarf::DW_FORM_ref8: // Fall thru + case dwarf::DW_FORM_data8: Asm->EmitInt64(Integer); break; + case dwarf::DW_FORM_udata: Asm->EmitULEB128Bytes(Integer); break; + case dwarf::DW_FORM_sdata: Asm->EmitSLEB128Bytes(Integer); break; + default: assert(0 && "DIE Value form not supported yet"); break; + } +} + +/// SizeOf - Determine size of integer value in bytes. +/// +unsigned DIEInteger::SizeOf(const TargetData *TD, unsigned Form) const { + switch (Form) { + case dwarf::DW_FORM_flag: // Fall thru + case dwarf::DW_FORM_ref1: // Fall thru + case dwarf::DW_FORM_data1: return sizeof(int8_t); + case dwarf::DW_FORM_ref2: // Fall thru + case dwarf::DW_FORM_data2: return sizeof(int16_t); + case dwarf::DW_FORM_ref4: // Fall thru + case dwarf::DW_FORM_data4: return sizeof(int32_t); + case dwarf::DW_FORM_ref8: // Fall thru + case dwarf::DW_FORM_data8: return sizeof(int64_t); + case dwarf::DW_FORM_udata: return TargetAsmInfo::getULEB128Size(Integer); + case dwarf::DW_FORM_sdata: return TargetAsmInfo::getSLEB128Size(Integer); + default: assert(0 && "DIE Value form not supported yet"); break; + } + return 0; +} + +/// Profile - Used to gather unique data for the value folding set. +/// +void DIEInteger::Profile(FoldingSetNodeID &ID, unsigned Int) { + ID.AddInteger(isInteger); + ID.AddInteger(Int); +} +void DIEInteger::Profile(FoldingSetNodeID &ID) { + Profile(ID, Integer); +} + +#ifndef NDEBUG +void DIEInteger::print(std::ostream &O) { + O << "Int: " << (int64_t)Integer + << " 0x" << std::hex << Integer << std::dec; +} +#endif + +//===----------------------------------------------------------------------===// +// DIEString Implementation +//===----------------------------------------------------------------------===// + +/// EmitValue - Emit string value. +/// +void DIEString::EmitValue(Dwarf *D, unsigned Form) const { + D->getAsm()->EmitString(Str); +} + +/// Profile - Used to gather unique data for the value folding set. +/// +void DIEString::Profile(FoldingSetNodeID &ID, const std::string &Str) { + ID.AddInteger(isString); + ID.AddString(Str); +} +void DIEString::Profile(FoldingSetNodeID &ID) { + Profile(ID, Str); +} + +#ifndef NDEBUG +void DIEString::print(std::ostream &O) { + O << "Str: \"" << Str << "\""; +} +#endif + +//===----------------------------------------------------------------------===// +// DIEDwarfLabel Implementation +//===----------------------------------------------------------------------===// + +/// EmitValue - Emit label value. +/// +void DIEDwarfLabel::EmitValue(Dwarf *D, unsigned Form) const { + bool IsSmall = Form == dwarf::DW_FORM_data4; + D->EmitReference(Label, false, IsSmall); +} + +/// SizeOf - Determine size of label value in bytes. +/// +unsigned DIEDwarfLabel::SizeOf(const TargetData *TD, unsigned Form) const { + if (Form == dwarf::DW_FORM_data4) return 4; + return TD->getPointerSize(); +} + +/// Profile - Used to gather unique data for the value folding set. +/// +void DIEDwarfLabel::Profile(FoldingSetNodeID &ID, const DWLabel &Label) { + ID.AddInteger(isLabel); + Label.Profile(ID); +} +void DIEDwarfLabel::Profile(FoldingSetNodeID &ID) { + Profile(ID, Label); +} + +#ifndef NDEBUG +void DIEDwarfLabel::print(std::ostream &O) { + O << "Lbl: "; + Label.print(O); +} +#endif + +//===----------------------------------------------------------------------===// +// DIEObjectLabel Implementation +//===----------------------------------------------------------------------===// + +/// EmitValue - Emit label value. +/// +void DIEObjectLabel::EmitValue(Dwarf *D, unsigned Form) const { + bool IsSmall = Form == dwarf::DW_FORM_data4; + D->EmitReference(Label, false, IsSmall); +} + +/// SizeOf - Determine size of label value in bytes. +/// +unsigned DIEObjectLabel::SizeOf(const TargetData *TD, unsigned Form) const { + if (Form == dwarf::DW_FORM_data4) return 4; + return TD->getPointerSize(); +} + +/// Profile - Used to gather unique data for the value folding set. +/// +void DIEObjectLabel::Profile(FoldingSetNodeID &ID, const std::string &Label) { + ID.AddInteger(isAsIsLabel); + ID.AddString(Label); +} +void DIEObjectLabel::Profile(FoldingSetNodeID &ID) { + Profile(ID, Label.c_str()); +} + +#ifndef NDEBUG +void DIEObjectLabel::print(std::ostream &O) { + O << "Obj: " << Label; +} +#endif + +//===----------------------------------------------------------------------===// +// DIESectionOffset Implementation +//===----------------------------------------------------------------------===// + +/// EmitValue - Emit delta value. +/// +void DIESectionOffset::EmitValue(Dwarf *D, unsigned Form) const { + bool IsSmall = Form == dwarf::DW_FORM_data4; + D->EmitSectionOffset(Label.getTag(), Section.getTag(), + Label.getNumber(), Section.getNumber(), + IsSmall, IsEH, UseSet); +} + +/// SizeOf - Determine size of delta value in bytes. +/// +unsigned DIESectionOffset::SizeOf(const TargetData *TD, unsigned Form) const { + if (Form == dwarf::DW_FORM_data4) return 4; + return TD->getPointerSize(); +} + +/// Profile - Used to gather unique data for the value folding set. +/// +void DIESectionOffset::Profile(FoldingSetNodeID &ID, const DWLabel &Label, + const DWLabel &Section) { + ID.AddInteger(isSectionOffset); + Label.Profile(ID); + Section.Profile(ID); + // IsEH and UseSet are specific to the Label/Section that we will emit the + // offset for; so Label/Section are enough for uniqueness. +} +void DIESectionOffset::Profile(FoldingSetNodeID &ID) { + Profile(ID, Label, Section); +} + +#ifndef NDEBUG +void DIESectionOffset::print(std::ostream &O) { + O << "Off: "; + Label.print(O); + O << "-"; + Section.print(O); + O << "-" << IsEH << "-" << UseSet; +} +#endif + +//===----------------------------------------------------------------------===// +// DIEDelta Implementation +//===----------------------------------------------------------------------===// + +/// EmitValue - Emit delta value. +/// +void DIEDelta::EmitValue(Dwarf *D, unsigned Form) const { + bool IsSmall = Form == dwarf::DW_FORM_data4; + D->EmitDifference(LabelHi, LabelLo, IsSmall); +} + +/// SizeOf - Determine size of delta value in bytes. +/// +unsigned DIEDelta::SizeOf(const TargetData *TD, unsigned Form) const { + if (Form == dwarf::DW_FORM_data4) return 4; + return TD->getPointerSize(); +} + +/// Profile - Used to gather unique data for the value folding set. +/// +void DIEDelta::Profile(FoldingSetNodeID &ID, const DWLabel &LabelHi, + const DWLabel &LabelLo) { + ID.AddInteger(isDelta); + LabelHi.Profile(ID); + LabelLo.Profile(ID); +} +void DIEDelta::Profile(FoldingSetNodeID &ID) { + Profile(ID, LabelHi, LabelLo); +} + +#ifndef NDEBUG +void DIEDelta::print(std::ostream &O) { + O << "Del: "; + LabelHi.print(O); + O << "-"; + LabelLo.print(O); +} +#endif + +//===----------------------------------------------------------------------===// +// DIEEntry Implementation +//===----------------------------------------------------------------------===// + +/// EmitValue - Emit debug information entry offset. +/// +void DIEEntry::EmitValue(Dwarf *D, unsigned Form) const { + D->getAsm()->EmitInt32(Entry->getOffset()); +} + +/// Profile - Used to gather unique data for the value folding set. +/// +void DIEEntry::Profile(FoldingSetNodeID &ID, DIE *Entry) { + ID.AddInteger(isEntry); + ID.AddPointer(Entry); +} +void DIEEntry::Profile(FoldingSetNodeID &ID) { + ID.AddInteger(isEntry); + + if (Entry) + ID.AddPointer(Entry); + else + ID.AddPointer(this); +} + +#ifndef NDEBUG +void DIEEntry::print(std::ostream &O) { + O << "Die: 0x" << std::hex << (intptr_t)Entry << std::dec; +} +#endif + +//===----------------------------------------------------------------------===// +// DIEBlock Implementation +//===----------------------------------------------------------------------===// + +/// ComputeSize - calculate the size of the block. +/// +unsigned DIEBlock::ComputeSize(const TargetData *TD) { + if (!Size) { + const SmallVector &AbbrevData = Abbrev.getData(); + for (unsigned i = 0, N = Values.size(); i < N; ++i) + Size += Values[i]->SizeOf(TD, AbbrevData[i].getForm()); + } + + return Size; +} + +/// EmitValue - Emit block data. +/// +void DIEBlock::EmitValue(Dwarf *D, unsigned Form) const { + const AsmPrinter *Asm = D->getAsm(); + switch (Form) { + case dwarf::DW_FORM_block1: Asm->EmitInt8(Size); break; + case dwarf::DW_FORM_block2: Asm->EmitInt16(Size); break; + case dwarf::DW_FORM_block4: Asm->EmitInt32(Size); break; + case dwarf::DW_FORM_block: Asm->EmitULEB128Bytes(Size); break; + default: assert(0 && "Improper form for block"); break; + } + + const SmallVector &AbbrevData = Abbrev.getData(); + for (unsigned i = 0, N = Values.size(); i < N; ++i) { + Asm->EOL(); + Values[i]->EmitValue(D, AbbrevData[i].getForm()); + } +} + +/// SizeOf - Determine size of block data in bytes. +/// +unsigned DIEBlock::SizeOf(const TargetData *TD, unsigned Form) const { + switch (Form) { + case dwarf::DW_FORM_block1: return Size + sizeof(int8_t); + case dwarf::DW_FORM_block2: return Size + sizeof(int16_t); + case dwarf::DW_FORM_block4: return Size + sizeof(int32_t); + case dwarf::DW_FORM_block: return Size + TargetAsmInfo::getULEB128Size(Size); + default: assert(0 && "Improper form for block"); break; + } + return 0; +} + +void DIEBlock::Profile(FoldingSetNodeID &ID) { + ID.AddInteger(isBlock); + DIE::Profile(ID); +} + +#ifndef NDEBUG +void DIEBlock::print(std::ostream &O) { + O << "Blk: "; + DIE::print(O, 5); +} +#endif diff --git a/lib/CodeGen/AsmPrinter/DIE.h b/lib/CodeGen/AsmPrinter/DIE.h new file mode 100644 index 000000000000..b14d91ca8b09 --- /dev/null +++ b/lib/CodeGen/AsmPrinter/DIE.h @@ -0,0 +1,549 @@ +//===--- lib/CodeGen/DIE.h - DWARF Info Entries -----------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Data structures for DWARF info entries. +// +//===----------------------------------------------------------------------===// + +#ifndef CODEGEN_ASMPRINTER_DIE_H__ +#define CODEGEN_ASMPRINTER_DIE_H__ + +#include "DwarfLabel.h" +#include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/raw_ostream.h" +#include + +namespace llvm { + class AsmPrinter; + class Dwarf; + class TargetData; + + //===--------------------------------------------------------------------===// + /// DIEAbbrevData - Dwarf abbreviation data, describes the one attribute of a + /// Dwarf abbreviation. + class VISIBILITY_HIDDEN DIEAbbrevData { + /// Attribute - Dwarf attribute code. + /// + unsigned Attribute; + + /// Form - Dwarf form code. + /// + unsigned Form; + public: + DIEAbbrevData(unsigned A, unsigned F) : Attribute(A), Form(F) {} + + // Accessors. + unsigned getAttribute() const { return Attribute; } + unsigned getForm() const { return Form; } + + /// Profile - Used to gather unique data for the abbreviation folding set. + /// + void Profile(FoldingSetNodeID &ID) const; + }; + + //===--------------------------------------------------------------------===// + /// DIEAbbrev - Dwarf abbreviation, describes the organization of a debug + /// information object. + class VISIBILITY_HIDDEN DIEAbbrev : public FoldingSetNode { + /// Tag - Dwarf tag code. + /// + unsigned Tag; + + /// Unique number for node. + /// + unsigned Number; + + /// ChildrenFlag - Dwarf children flag. + /// + unsigned ChildrenFlag; + + /// Data - Raw data bytes for abbreviation. + /// + SmallVector Data; + public: + DIEAbbrev(unsigned T, unsigned C) : Tag(T), ChildrenFlag(C), Data() {} + virtual ~DIEAbbrev() {} + + // Accessors. + unsigned getTag() const { return Tag; } + unsigned getNumber() const { return Number; } + unsigned getChildrenFlag() const { return ChildrenFlag; } + const SmallVector &getData() const { return Data; } + void setTag(unsigned T) { Tag = T; } + void setChildrenFlag(unsigned CF) { ChildrenFlag = CF; } + void setNumber(unsigned N) { Number = N; } + + /// AddAttribute - Adds another set of attribute information to the + /// abbreviation. + void AddAttribute(unsigned Attribute, unsigned Form) { + Data.push_back(DIEAbbrevData(Attribute, Form)); + } + + /// AddFirstAttribute - Adds a set of attribute information to the front + /// of the abbreviation. + void AddFirstAttribute(unsigned Attribute, unsigned Form) { + Data.insert(Data.begin(), DIEAbbrevData(Attribute, Form)); + } + + /// Profile - Used to gather unique data for the abbreviation folding set. + /// + void Profile(FoldingSetNodeID &ID) const; + + /// Emit - Print the abbreviation using the specified asm printer. + /// + void Emit(const AsmPrinter *Asm) const; + +#ifndef NDEBUG + void print(std::ostream *O) { + if (O) print(*O); + } + void print(std::ostream &O); + void dump(); +#endif + }; + + //===--------------------------------------------------------------------===// + /// DIE - A structured debug information entry. Has an abbreviation which + /// describes it's organization. + class CompileUnit; + class DIEValue; + + class VISIBILITY_HIDDEN DIE : public FoldingSetNode { + protected: + /// Abbrev - Buffer for constructing abbreviation. + /// + DIEAbbrev Abbrev; + + /// Offset - Offset in debug info section. + /// + unsigned Offset; + + /// Size - Size of instance + children. + /// + unsigned Size; + + /// Children DIEs. + /// + std::vector Children; + + /// Attributes values. + /// + SmallVector Values; + + /// Abstract compile unit. + CompileUnit *AbstractCU; + public: + explicit DIE(unsigned Tag) + : Abbrev(Tag, dwarf::DW_CHILDREN_no), Offset(0), Size(0) {} + virtual ~DIE(); + + // Accessors. + DIEAbbrev &getAbbrev() { return Abbrev; } + unsigned getAbbrevNumber() const { return Abbrev.getNumber(); } + unsigned getTag() const { return Abbrev.getTag(); } + unsigned getOffset() const { return Offset; } + unsigned getSize() const { return Size; } + const std::vector &getChildren() const { return Children; } + SmallVector &getValues() { return Values; } + CompileUnit *getAbstractCompileUnit() const { return AbstractCU; } + + void setTag(unsigned Tag) { Abbrev.setTag(Tag); } + void setOffset(unsigned O) { Offset = O; } + void setSize(unsigned S) { Size = S; } + void setAbstractCompileUnit(CompileUnit *CU) { AbstractCU = CU; } + + /// AddValue - Add a value and attributes to a DIE. + /// + void AddValue(unsigned Attribute, unsigned Form, DIEValue *Value) { + Abbrev.AddAttribute(Attribute, Form); + Values.push_back(Value); + } + + /// SiblingOffset - Return the offset of the debug information entry's + /// sibling. + unsigned SiblingOffset() const { return Offset + Size; } + + /// AddSiblingOffset - Add a sibling offset field to the front of the DIE. + /// + void AddSiblingOffset(); + + /// AddChild - Add a child to the DIE. + /// + void AddChild(DIE *Child) { + Abbrev.setChildrenFlag(dwarf::DW_CHILDREN_yes); + Children.push_back(Child); + } + + /// Detach - Detaches objects connected to it after copying. + /// + void Detach() { + Children.clear(); + } + + /// Profile - Used to gather unique data for the value folding set. + /// + void Profile(FoldingSetNodeID &ID) ; + +#ifndef NDEBUG + void print(std::ostream *O, unsigned IncIndent = 0) { + if (O) print(*O, IncIndent); + } + void print(std::ostream &O, unsigned IncIndent = 0); + void dump(); +#endif + }; + + //===--------------------------------------------------------------------===// + /// DIEValue - A debug information entry value. + /// + class VISIBILITY_HIDDEN DIEValue : public FoldingSetNode { + public: + enum { + isInteger, + isString, + isLabel, + isAsIsLabel, + isSectionOffset, + isDelta, + isEntry, + isBlock + }; + protected: + /// Type - Type of data stored in the value. + /// + unsigned Type; + public: + explicit DIEValue(unsigned T) : Type(T) {} + virtual ~DIEValue() {} + + // Accessors + unsigned getType() const { return Type; } + + /// EmitValue - Emit value via the Dwarf writer. + /// + virtual void EmitValue(Dwarf *D, unsigned Form) const = 0; + + /// SizeOf - Return the size of a value in bytes. + /// + virtual unsigned SizeOf(const TargetData *TD, unsigned Form) const = 0; + + /// Profile - Used to gather unique data for the value folding set. + /// + virtual void Profile(FoldingSetNodeID &ID) = 0; + + // Implement isa/cast/dyncast. + static bool classof(const DIEValue *) { return true; } + +#ifndef NDEBUG + void print(std::ostream *O) { + if (O) print(*O); + } + virtual void print(std::ostream &O) = 0; + void dump(); +#endif + }; + + //===--------------------------------------------------------------------===// + /// DIEInteger - An integer value DIE. + /// + class VISIBILITY_HIDDEN DIEInteger : public DIEValue { + uint64_t Integer; + public: + explicit DIEInteger(uint64_t I) : DIEValue(isInteger), Integer(I) {} + + /// BestForm - Choose the best form for integer. + /// + static unsigned BestForm(bool IsSigned, uint64_t Int) { + if (IsSigned) { + if ((char)Int == (signed)Int) return dwarf::DW_FORM_data1; + if ((short)Int == (signed)Int) return dwarf::DW_FORM_data2; + if ((int)Int == (signed)Int) return dwarf::DW_FORM_data4; + } else { + if ((unsigned char)Int == Int) return dwarf::DW_FORM_data1; + if ((unsigned short)Int == Int) return dwarf::DW_FORM_data2; + if ((unsigned int)Int == Int) return dwarf::DW_FORM_data4; + } + return dwarf::DW_FORM_data8; + } + + /// EmitValue - Emit integer of appropriate size. + /// + virtual void EmitValue(Dwarf *D, unsigned Form) const; + + /// SizeOf - Determine size of integer value in bytes. + /// + virtual unsigned SizeOf(const TargetData *TD, unsigned Form) const; + + /// Profile - Used to gather unique data for the value folding set. + /// + static void Profile(FoldingSetNodeID &ID, unsigned Int); + virtual void Profile(FoldingSetNodeID &ID); + + // Implement isa/cast/dyncast. + static bool classof(const DIEInteger *) { return true; } + static bool classof(const DIEValue *I) { return I->getType() == isInteger; } + +#ifndef NDEBUG + virtual void print(std::ostream &O); +#endif + }; + + //===--------------------------------------------------------------------===// + /// DIEString - A string value DIE. + /// + class VISIBILITY_HIDDEN DIEString : public DIEValue { + const std::string Str; + public: + explicit DIEString(const std::string &S) : DIEValue(isString), Str(S) {} + + /// EmitValue - Emit string value. + /// + virtual void EmitValue(Dwarf *D, unsigned Form) const; + + /// SizeOf - Determine size of string value in bytes. + /// + virtual unsigned SizeOf(const TargetData *, unsigned /*Form*/) const { + return Str.size() + sizeof(char); // sizeof('\0'); + } + + /// Profile - Used to gather unique data for the value folding set. + /// + static void Profile(FoldingSetNodeID &ID, const std::string &Str); + virtual void Profile(FoldingSetNodeID &ID); + + // Implement isa/cast/dyncast. + static bool classof(const DIEString *) { return true; } + static bool classof(const DIEValue *S) { return S->getType() == isString; } + +#ifndef NDEBUG + virtual void print(std::ostream &O); +#endif + }; + + //===--------------------------------------------------------------------===// + /// DIEDwarfLabel - A Dwarf internal label expression DIE. + // + class VISIBILITY_HIDDEN DIEDwarfLabel : public DIEValue { + const DWLabel Label; + public: + explicit DIEDwarfLabel(const DWLabel &L) : DIEValue(isLabel), Label(L) {} + + /// EmitValue - Emit label value. + /// + virtual void EmitValue(Dwarf *D, unsigned Form) const; + + /// SizeOf - Determine size of label value in bytes. + /// + virtual unsigned SizeOf(const TargetData *TD, unsigned Form) const; + + /// Profile - Used to gather unique data for the value folding set. + /// + static void Profile(FoldingSetNodeID &ID, const DWLabel &Label); + virtual void Profile(FoldingSetNodeID &ID); + + // Implement isa/cast/dyncast. + static bool classof(const DIEDwarfLabel *) { return true; } + static bool classof(const DIEValue *L) { return L->getType() == isLabel; } + +#ifndef NDEBUG + virtual void print(std::ostream &O); +#endif + }; + + //===--------------------------------------------------------------------===// + /// DIEObjectLabel - A label to an object in code or data. + // + class VISIBILITY_HIDDEN DIEObjectLabel : public DIEValue { + const std::string Label; + public: + explicit DIEObjectLabel(const std::string &L) + : DIEValue(isAsIsLabel), Label(L) {} + + /// EmitValue - Emit label value. + /// + virtual void EmitValue(Dwarf *D, unsigned Form) const; + + /// SizeOf - Determine size of label value in bytes. + /// + virtual unsigned SizeOf(const TargetData *TD, unsigned Form) const; + + /// Profile - Used to gather unique data for the value folding set. + /// + static void Profile(FoldingSetNodeID &ID, const std::string &Label); + virtual void Profile(FoldingSetNodeID &ID); + + // Implement isa/cast/dyncast. + static bool classof(const DIEObjectLabel *) { return true; } + static bool classof(const DIEValue *L) { + return L->getType() == isAsIsLabel; + } + +#ifndef NDEBUG + virtual void print(std::ostream &O); +#endif + }; + + //===--------------------------------------------------------------------===// + /// DIESectionOffset - A section offset DIE. + /// + class VISIBILITY_HIDDEN DIESectionOffset : public DIEValue { + const DWLabel Label; + const DWLabel Section; + bool IsEH : 1; + bool UseSet : 1; + public: + DIESectionOffset(const DWLabel &Lab, const DWLabel &Sec, + bool isEH = false, bool useSet = true) + : DIEValue(isSectionOffset), Label(Lab), Section(Sec), + IsEH(isEH), UseSet(useSet) {} + + /// EmitValue - Emit section offset. + /// + virtual void EmitValue(Dwarf *D, unsigned Form) const; + + /// SizeOf - Determine size of section offset value in bytes. + /// + virtual unsigned SizeOf(const TargetData *TD, unsigned Form) const; + + /// Profile - Used to gather unique data for the value folding set. + /// + static void Profile(FoldingSetNodeID &ID, const DWLabel &Label, + const DWLabel &Section); + virtual void Profile(FoldingSetNodeID &ID); + + // Implement isa/cast/dyncast. + static bool classof(const DIESectionOffset *) { return true; } + static bool classof(const DIEValue *D) { + return D->getType() == isSectionOffset; + } + +#ifndef NDEBUG + virtual void print(std::ostream &O); +#endif + }; + + //===--------------------------------------------------------------------===// + /// DIEDelta - A simple label difference DIE. + /// + class VISIBILITY_HIDDEN DIEDelta : public DIEValue { + const DWLabel LabelHi; + const DWLabel LabelLo; + public: + DIEDelta(const DWLabel &Hi, const DWLabel &Lo) + : DIEValue(isDelta), LabelHi(Hi), LabelLo(Lo) {} + + /// EmitValue - Emit delta value. + /// + virtual void EmitValue(Dwarf *D, unsigned Form) const; + + /// SizeOf - Determine size of delta value in bytes. + /// + virtual unsigned SizeOf(const TargetData *TD, unsigned Form) const; + + /// Profile - Used to gather unique data for the value folding set. + /// + static void Profile(FoldingSetNodeID &ID, const DWLabel &LabelHi, + const DWLabel &LabelLo); + virtual void Profile(FoldingSetNodeID &ID); + + // Implement isa/cast/dyncast. + static bool classof(const DIEDelta *) { return true; } + static bool classof(const DIEValue *D) { return D->getType() == isDelta; } + +#ifndef NDEBUG + virtual void print(std::ostream &O); +#endif + }; + + //===--------------------------------------------------------------------===// + /// DIEntry - A pointer to another debug information entry. An instance of + /// this class can also be used as a proxy for a debug information entry not + /// yet defined (ie. types.) + class VISIBILITY_HIDDEN DIEEntry : public DIEValue { + DIE *Entry; + public: + explicit DIEEntry(DIE *E) : DIEValue(isEntry), Entry(E) {} + + DIE *getEntry() const { return Entry; } + void setEntry(DIE *E) { Entry = E; } + + /// EmitValue - Emit debug information entry offset. + /// + virtual void EmitValue(Dwarf *D, unsigned Form) const; + + /// SizeOf - Determine size of debug information entry in bytes. + /// + virtual unsigned SizeOf(const TargetData *TD, unsigned Form) const { + return sizeof(int32_t); + } + + /// Profile - Used to gather unique data for the value folding set. + /// + static void Profile(FoldingSetNodeID &ID, DIE *Entry); + virtual void Profile(FoldingSetNodeID &ID); + + // Implement isa/cast/dyncast. + static bool classof(const DIEEntry *) { return true; } + static bool classof(const DIEValue *E) { return E->getType() == isEntry; } + +#ifndef NDEBUG + virtual void print(std::ostream &O); +#endif + }; + + //===--------------------------------------------------------------------===// + /// DIEBlock - A block of values. Primarily used for location expressions. + // + class VISIBILITY_HIDDEN DIEBlock : public DIEValue, public DIE { + unsigned Size; // Size in bytes excluding size header. + public: + DIEBlock() + : DIEValue(isBlock), DIE(0), Size(0) {} + virtual ~DIEBlock() {} + + /// ComputeSize - calculate the size of the block. + /// + unsigned ComputeSize(const TargetData *TD); + + /// BestForm - Choose the best form for data. + /// + unsigned BestForm() const { + if ((unsigned char)Size == Size) return dwarf::DW_FORM_block1; + if ((unsigned short)Size == Size) return dwarf::DW_FORM_block2; + if ((unsigned int)Size == Size) return dwarf::DW_FORM_block4; + return dwarf::DW_FORM_block; + } + + /// EmitValue - Emit block data. + /// + virtual void EmitValue(Dwarf *D, unsigned Form) const; + + /// SizeOf - Determine size of block data in bytes. + /// + virtual unsigned SizeOf(const TargetData *TD, unsigned Form) const; + + /// Profile - Used to gather unique data for the value folding set. + /// + virtual void Profile(FoldingSetNodeID &ID); + + // Implement isa/cast/dyncast. + static bool classof(const DIEBlock *) { return true; } + static bool classof(const DIEValue *E) { return E->getType() == isBlock; } + +#ifndef NDEBUG + virtual void print(std::ostream &O); +#endif + }; + +} // end llvm namespace + +#endif diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp new file mode 100644 index 000000000000..25217b088099 --- /dev/null +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -0,0 +1,2610 @@ +//===-- llvm/CodeGen/DwarfDebug.cpp - Dwarf Debug Framework ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing dwarf debug info into asm files. +// +//===----------------------------------------------------------------------===// + +#include "DwarfDebug.h" +#include "llvm/Module.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/Support/Timer.h" +#include "llvm/System/Path.h" +#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetFrameInfo.h" +#include +using namespace llvm; + +static TimerGroup &getDwarfTimerGroup() { + static TimerGroup DwarfTimerGroup("Dwarf Debugging"); + return DwarfTimerGroup; +} + +//===----------------------------------------------------------------------===// + +/// Configuration values for initial hash set sizes (log2). +/// +static const unsigned InitDiesSetSize = 9; // log2(512) +static const unsigned InitAbbreviationsSetSize = 9; // log2(512) +static const unsigned InitValuesSetSize = 9; // log2(512) + +namespace llvm { + +//===----------------------------------------------------------------------===// +/// CompileUnit - This dwarf writer support class manages information associate +/// with a source file. +class VISIBILITY_HIDDEN CompileUnit { + /// ID - File identifier for source. + /// + unsigned ID; + + /// Die - Compile unit debug information entry. + /// + DIE *Die; + + /// GVToDieMap - Tracks the mapping of unit level debug informaton + /// variables to debug information entries. + std::map GVToDieMap; + + /// GVToDIEEntryMap - Tracks the mapping of unit level debug informaton + /// descriptors to debug information entries using a DIEEntry proxy. + std::map GVToDIEEntryMap; + + /// Globals - A map of globally visible named entities for this unit. + /// + StringMap Globals; + + /// DiesSet - Used to uniquely define dies within the compile unit. + /// + FoldingSet DiesSet; +public: + CompileUnit(unsigned I, DIE *D) + : ID(I), Die(D), DiesSet(InitDiesSetSize) {} + ~CompileUnit() { delete Die; } + + // Accessors. + unsigned getID() const { return ID; } + DIE* getDie() const { return Die; } + StringMap &getGlobals() { return Globals; } + + /// hasContent - Return true if this compile unit has something to write out. + /// + bool hasContent() const { return !Die->getChildren().empty(); } + + /// AddGlobal - Add a new global entity to the compile unit. + /// + void AddGlobal(const std::string &Name, DIE *Die) { Globals[Name] = Die; } + + /// getDieMapSlotFor - Returns the debug information entry map slot for the + /// specified debug variable. + DIE *&getDieMapSlotFor(GlobalVariable *GV) { return GVToDieMap[GV]; } + + /// getDIEEntrySlotFor - Returns the debug information entry proxy slot for the + /// specified debug variable. + DIEEntry *&getDIEEntrySlotFor(GlobalVariable *GV) { + return GVToDIEEntryMap[GV]; + } + + /// AddDie - Adds or interns the DIE to the compile unit. + /// + DIE *AddDie(DIE &Buffer) { + FoldingSetNodeID ID; + Buffer.Profile(ID); + void *Where; + DIE *Die = DiesSet.FindNodeOrInsertPos(ID, Where); + + if (!Die) { + Die = new DIE(Buffer); + DiesSet.InsertNode(Die, Where); + this->Die->AddChild(Die); + Buffer.Detach(); + } + + return Die; + } +}; + +//===----------------------------------------------------------------------===// +/// DbgVariable - This class is used to track local variable information. +/// +class VISIBILITY_HIDDEN DbgVariable { + DIVariable Var; // Variable Descriptor. + unsigned FrameIndex; // Variable frame index. + bool InlinedFnVar; // Variable for an inlined function. +public: + DbgVariable(DIVariable V, unsigned I, bool IFV) + : Var(V), FrameIndex(I), InlinedFnVar(IFV) {} + + // Accessors. + DIVariable getVariable() const { return Var; } + unsigned getFrameIndex() const { return FrameIndex; } + bool isInlinedFnVar() const { return InlinedFnVar; } +}; + +//===----------------------------------------------------------------------===// +/// DbgScope - This class is used to track scope information. +/// +class DbgConcreteScope; +class VISIBILITY_HIDDEN DbgScope { + DbgScope *Parent; // Parent to this scope. + DIDescriptor Desc; // Debug info descriptor for scope. + // Either subprogram or block. + unsigned StartLabelID; // Label ID of the beginning of scope. + unsigned EndLabelID; // Label ID of the end of scope. + SmallVector Scopes; // Scopes defined in scope. + SmallVector Variables;// Variables declared in scope. + SmallVector ConcreteInsts;// Concrete insts of funcs. +public: + DbgScope(DbgScope *P, DIDescriptor D) + : Parent(P), Desc(D), StartLabelID(0), EndLabelID(0) {} + virtual ~DbgScope(); + + // Accessors. + DbgScope *getParent() const { return Parent; } + DIDescriptor getDesc() const { return Desc; } + unsigned getStartLabelID() const { return StartLabelID; } + unsigned getEndLabelID() const { return EndLabelID; } + SmallVector &getScopes() { return Scopes; } + SmallVector &getVariables() { return Variables; } + SmallVector &getConcreteInsts() { return ConcreteInsts; } + void setStartLabelID(unsigned S) { StartLabelID = S; } + void setEndLabelID(unsigned E) { EndLabelID = E; } + + /// AddScope - Add a scope to the scope. + /// + void AddScope(DbgScope *S) { Scopes.push_back(S); } + + /// AddVariable - Add a variable to the scope. + /// + void AddVariable(DbgVariable *V) { Variables.push_back(V); } + + /// AddConcreteInst - Add a concrete instance to the scope. + /// + void AddConcreteInst(DbgConcreteScope *C) { ConcreteInsts.push_back(C); } + +#ifndef NDEBUG + void dump() const; +#endif +}; + +#ifndef NDEBUG +void DbgScope::dump() const { + static unsigned IndentLevel = 0; + std::string Indent(IndentLevel, ' '); + + cerr << Indent; Desc.dump(); + cerr << " [" << StartLabelID << ", " << EndLabelID << "]\n"; + + IndentLevel += 2; + + for (unsigned i = 0, e = Scopes.size(); i != e; ++i) + if (Scopes[i] != this) + Scopes[i]->dump(); + + IndentLevel -= 2; +} +#endif + +//===----------------------------------------------------------------------===// +/// DbgConcreteScope - This class is used to track a scope that holds concrete +/// instance information. +/// +class VISIBILITY_HIDDEN DbgConcreteScope : public DbgScope { + CompileUnit *Unit; + DIE *Die; // Debug info for this concrete scope. +public: + DbgConcreteScope(DIDescriptor D) : DbgScope(NULL, D) {} + + // Accessors. + DIE *getDie() const { return Die; } + void setDie(DIE *D) { Die = D; } +}; + +DbgScope::~DbgScope() { + for (unsigned i = 0, N = Scopes.size(); i < N; ++i) + delete Scopes[i]; + for (unsigned j = 0, M = Variables.size(); j < M; ++j) + delete Variables[j]; + for (unsigned k = 0, O = ConcreteInsts.size(); k < O; ++k) + delete ConcreteInsts[k]; +} + +} // end llvm namespace + +DwarfDebug::DwarfDebug(raw_ostream &OS, AsmPrinter *A, const TargetAsmInfo *T) + : Dwarf(OS, A, T, "dbg"), MainCU(0), + AbbreviationsSet(InitAbbreviationsSetSize), Abbreviations(), + ValuesSet(InitValuesSetSize), Values(), StringPool(), SectionMap(), + SectionSourceLines(), didInitial(false), shouldEmit(false), + FunctionDbgScope(0), DebugTimer(0) { + if (TimePassesIsEnabled) + DebugTimer = new Timer("Dwarf Debug Writer", + getDwarfTimerGroup()); +} +DwarfDebug::~DwarfDebug() { + for (unsigned j = 0, M = Values.size(); j < M; ++j) + delete Values[j]; + + for (DenseMap::iterator + I = AbstractInstanceRootMap.begin(), + E = AbstractInstanceRootMap.end(); I != E;++I) + delete I->second; + + delete DebugTimer; +} + +/// AssignAbbrevNumber - Define a unique number for the abbreviation. +/// +void DwarfDebug::AssignAbbrevNumber(DIEAbbrev &Abbrev) { + // Profile the node so that we can make it unique. + FoldingSetNodeID ID; + Abbrev.Profile(ID); + + // Check the set for priors. + DIEAbbrev *InSet = AbbreviationsSet.GetOrInsertNode(&Abbrev); + + // If it's newly added. + if (InSet == &Abbrev) { + // Add to abbreviation list. + Abbreviations.push_back(&Abbrev); + + // Assign the vector position + 1 as its number. + Abbrev.setNumber(Abbreviations.size()); + } else { + // Assign existing abbreviation number. + Abbrev.setNumber(InSet->getNumber()); + } +} + +/// CreateDIEEntry - Creates a new DIEEntry to be a proxy for a debug +/// information entry. +DIEEntry *DwarfDebug::CreateDIEEntry(DIE *Entry) { + DIEEntry *Value; + + if (Entry) { + FoldingSetNodeID ID; + DIEEntry::Profile(ID, Entry); + void *Where; + Value = static_cast(ValuesSet.FindNodeOrInsertPos(ID, Where)); + + if (Value) return Value; + + Value = new DIEEntry(Entry); + ValuesSet.InsertNode(Value, Where); + } else { + Value = new DIEEntry(Entry); + } + + Values.push_back(Value); + return Value; +} + +/// SetDIEEntry - Set a DIEEntry once the debug information entry is defined. +/// +void DwarfDebug::SetDIEEntry(DIEEntry *Value, DIE *Entry) { + Value->setEntry(Entry); + + // Add to values set if not already there. If it is, we merely have a + // duplicate in the values list (no harm.) + ValuesSet.GetOrInsertNode(Value); +} + +/// AddUInt - Add an unsigned integer attribute data and value. +/// +void DwarfDebug::AddUInt(DIE *Die, unsigned Attribute, + unsigned Form, uint64_t Integer) { + if (!Form) Form = DIEInteger::BestForm(false, Integer); + + FoldingSetNodeID ID; + DIEInteger::Profile(ID, Integer); + void *Where; + DIEValue *Value = ValuesSet.FindNodeOrInsertPos(ID, Where); + + if (!Value) { + Value = new DIEInteger(Integer); + ValuesSet.InsertNode(Value, Where); + Values.push_back(Value); + } + + Die->AddValue(Attribute, Form, Value); +} + +/// AddSInt - Add an signed integer attribute data and value. +/// +void DwarfDebug::AddSInt(DIE *Die, unsigned Attribute, + unsigned Form, int64_t Integer) { + if (!Form) Form = DIEInteger::BestForm(true, Integer); + + FoldingSetNodeID ID; + DIEInteger::Profile(ID, (uint64_t)Integer); + void *Where; + DIEValue *Value = ValuesSet.FindNodeOrInsertPos(ID, Where); + + if (!Value) { + Value = new DIEInteger(Integer); + ValuesSet.InsertNode(Value, Where); + Values.push_back(Value); + } + + Die->AddValue(Attribute, Form, Value); +} + +/// AddString - Add a string attribute data and value. +/// +void DwarfDebug::AddString(DIE *Die, unsigned Attribute, unsigned Form, + const std::string &String) { + FoldingSetNodeID ID; + DIEString::Profile(ID, String); + void *Where; + DIEValue *Value = ValuesSet.FindNodeOrInsertPos(ID, Where); + + if (!Value) { + Value = new DIEString(String); + ValuesSet.InsertNode(Value, Where); + Values.push_back(Value); + } + + Die->AddValue(Attribute, Form, Value); +} + +/// AddLabel - Add a Dwarf label attribute data and value. +/// +void DwarfDebug::AddLabel(DIE *Die, unsigned Attribute, unsigned Form, + const DWLabel &Label) { + FoldingSetNodeID ID; + DIEDwarfLabel::Profile(ID, Label); + void *Where; + DIEValue *Value = ValuesSet.FindNodeOrInsertPos(ID, Where); + + if (!Value) { + Value = new DIEDwarfLabel(Label); + ValuesSet.InsertNode(Value, Where); + Values.push_back(Value); + } + + Die->AddValue(Attribute, Form, Value); +} + +/// AddObjectLabel - Add an non-Dwarf label attribute data and value. +/// +void DwarfDebug::AddObjectLabel(DIE *Die, unsigned Attribute, unsigned Form, + const std::string &Label) { + FoldingSetNodeID ID; + DIEObjectLabel::Profile(ID, Label); + void *Where; + DIEValue *Value = ValuesSet.FindNodeOrInsertPos(ID, Where); + + if (!Value) { + Value = new DIEObjectLabel(Label); + ValuesSet.InsertNode(Value, Where); + Values.push_back(Value); + } + + Die->AddValue(Attribute, Form, Value); +} + +/// AddSectionOffset - Add a section offset label attribute data and value. +/// +void DwarfDebug::AddSectionOffset(DIE *Die, unsigned Attribute, unsigned Form, + const DWLabel &Label, const DWLabel &Section, + bool isEH, bool useSet) { + FoldingSetNodeID ID; + DIESectionOffset::Profile(ID, Label, Section); + void *Where; + DIEValue *Value = ValuesSet.FindNodeOrInsertPos(ID, Where); + + if (!Value) { + Value = new DIESectionOffset(Label, Section, isEH, useSet); + ValuesSet.InsertNode(Value, Where); + Values.push_back(Value); + } + + Die->AddValue(Attribute, Form, Value); +} + +/// AddDelta - Add a label delta attribute data and value. +/// +void DwarfDebug::AddDelta(DIE *Die, unsigned Attribute, unsigned Form, + const DWLabel &Hi, const DWLabel &Lo) { + FoldingSetNodeID ID; + DIEDelta::Profile(ID, Hi, Lo); + void *Where; + DIEValue *Value = ValuesSet.FindNodeOrInsertPos(ID, Where); + + if (!Value) { + Value = new DIEDelta(Hi, Lo); + ValuesSet.InsertNode(Value, Where); + Values.push_back(Value); + } + + Die->AddValue(Attribute, Form, Value); +} + +/// AddBlock - Add block data. +/// +void DwarfDebug::AddBlock(DIE *Die, unsigned Attribute, unsigned Form, + DIEBlock *Block) { + Block->ComputeSize(TD); + FoldingSetNodeID ID; + Block->Profile(ID); + void *Where; + DIEValue *Value = ValuesSet.FindNodeOrInsertPos(ID, Where); + + if (!Value) { + Value = Block; + ValuesSet.InsertNode(Value, Where); + Values.push_back(Value); + } else { + // Already exists, reuse the previous one. + delete Block; + Block = cast(Value); + } + + Die->AddValue(Attribute, Block->BestForm(), Value); +} + +/// AddSourceLine - Add location information to specified debug information +/// entry. +void DwarfDebug::AddSourceLine(DIE *Die, const DIVariable *V) { + // If there is no compile unit specified, don't add a line #. + if (V->getCompileUnit().isNull()) + return; + + unsigned Line = V->getLineNumber(); + unsigned FileID = FindCompileUnit(V->getCompileUnit()).getID(); + assert(FileID && "Invalid file id"); + AddUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); + AddUInt(Die, dwarf::DW_AT_decl_line, 0, Line); +} + +/// AddSourceLine - Add location information to specified debug information +/// entry. +void DwarfDebug::AddSourceLine(DIE *Die, const DIGlobal *G) { + // If there is no compile unit specified, don't add a line #. + if (G->getCompileUnit().isNull()) + return; + + unsigned Line = G->getLineNumber(); + unsigned FileID = FindCompileUnit(G->getCompileUnit()).getID(); + assert(FileID && "Invalid file id"); + AddUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); + AddUInt(Die, dwarf::DW_AT_decl_line, 0, Line); +} +void DwarfDebug::AddSourceLine(DIE *Die, const DIType *Ty) { + // If there is no compile unit specified, don't add a line #. + DICompileUnit CU = Ty->getCompileUnit(); + if (CU.isNull()) + return; + + unsigned Line = Ty->getLineNumber(); + unsigned FileID = FindCompileUnit(CU).getID(); + assert(FileID && "Invalid file id"); + AddUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); + AddUInt(Die, dwarf::DW_AT_decl_line, 0, Line); +} + +/// AddAddress - Add an address attribute to a die based on the location +/// provided. +void DwarfDebug::AddAddress(DIE *Die, unsigned Attribute, + const MachineLocation &Location) { + unsigned Reg = RI->getDwarfRegNum(Location.getReg(), false); + DIEBlock *Block = new DIEBlock(); + + if (Location.isReg()) { + if (Reg < 32) { + AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg); + } else { + AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_regx); + AddUInt(Block, 0, dwarf::DW_FORM_udata, Reg); + } + } else { + if (Reg < 32) { + AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg); + } else { + AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx); + AddUInt(Block, 0, dwarf::DW_FORM_udata, Reg); + } + + AddUInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset()); + } + + AddBlock(Die, Attribute, 0, Block); +} + +/// AddType - Add a new type attribute to the specified entity. +void DwarfDebug::AddType(CompileUnit *DW_Unit, DIE *Entity, DIType Ty) { + if (Ty.isNull()) + return; + + // Check for pre-existence. + DIEEntry *&Slot = DW_Unit->getDIEEntrySlotFor(Ty.getGV()); + + // If it exists then use the existing value. + if (Slot) { + Entity->AddValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Slot); + return; + } + + // Set up proxy. + Slot = CreateDIEEntry(); + + // Construct type. + DIE Buffer(dwarf::DW_TAG_base_type); + if (Ty.isBasicType(Ty.getTag())) + ConstructTypeDIE(DW_Unit, Buffer, DIBasicType(Ty.getGV())); + else if (Ty.isDerivedType(Ty.getTag())) + ConstructTypeDIE(DW_Unit, Buffer, DIDerivedType(Ty.getGV())); + else { + assert(Ty.isCompositeType(Ty.getTag()) && "Unknown kind of DIType"); + ConstructTypeDIE(DW_Unit, Buffer, DICompositeType(Ty.getGV())); + } + + // Add debug information entry to entity and appropriate context. + DIE *Die = NULL; + DIDescriptor Context = Ty.getContext(); + if (!Context.isNull()) + Die = DW_Unit->getDieMapSlotFor(Context.getGV()); + + if (Die) { + DIE *Child = new DIE(Buffer); + Die->AddChild(Child); + Buffer.Detach(); + SetDIEEntry(Slot, Child); + } else { + Die = DW_Unit->AddDie(Buffer); + SetDIEEntry(Slot, Die); + } + + Entity->AddValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Slot); +} + +/// ConstructTypeDIE - Construct basic type die from DIBasicType. +void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, + DIBasicType BTy) { + // Get core information. + std::string Name; + BTy.getName(Name); + Buffer.setTag(dwarf::DW_TAG_base_type); + AddUInt(&Buffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, + BTy.getEncoding()); + + // Add name if not anonymous or intermediate type. + if (!Name.empty()) + AddString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); + uint64_t Size = BTy.getSizeInBits() >> 3; + AddUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size); +} + +/// ConstructTypeDIE - Construct derived type die from DIDerivedType. +void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, + DIDerivedType DTy) { + // Get core information. + std::string Name; + DTy.getName(Name); + uint64_t Size = DTy.getSizeInBits() >> 3; + unsigned Tag = DTy.getTag(); + + // FIXME - Workaround for templates. + if (Tag == dwarf::DW_TAG_inheritance) Tag = dwarf::DW_TAG_reference_type; + + Buffer.setTag(Tag); + + // Map to main type, void will not have a type. + DIType FromTy = DTy.getTypeDerivedFrom(); + AddType(DW_Unit, &Buffer, FromTy); + + // Add name if not anonymous or intermediate type. + if (!Name.empty()) + AddString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); + + // Add size if non-zero (derived types might be zero-sized.) + if (Size) + AddUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size); + + // Add source line info if available and TyDesc is not a forward declaration. + if (!DTy.isForwardDecl()) + AddSourceLine(&Buffer, &DTy); +} + +/// ConstructTypeDIE - Construct type DIE from DICompositeType. +void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, + DICompositeType CTy) { + // Get core information. + std::string Name; + CTy.getName(Name); + + uint64_t Size = CTy.getSizeInBits() >> 3; + unsigned Tag = CTy.getTag(); + Buffer.setTag(Tag); + + switch (Tag) { + case dwarf::DW_TAG_vector_type: + case dwarf::DW_TAG_array_type: + ConstructArrayTypeDIE(DW_Unit, Buffer, &CTy); + break; + case dwarf::DW_TAG_enumeration_type: { + DIArray Elements = CTy.getTypeArray(); + + // Add enumerators to enumeration type. + for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { + DIE *ElemDie = NULL; + DIEnumerator Enum(Elements.getElement(i).getGV()); + ElemDie = ConstructEnumTypeDIE(DW_Unit, &Enum); + Buffer.AddChild(ElemDie); + } + } + break; + case dwarf::DW_TAG_subroutine_type: { + // Add return type. + DIArray Elements = CTy.getTypeArray(); + DIDescriptor RTy = Elements.getElement(0); + AddType(DW_Unit, &Buffer, DIType(RTy.getGV())); + + // Add prototype flag. + AddUInt(&Buffer, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1); + + // Add arguments. + for (unsigned i = 1, N = Elements.getNumElements(); i < N; ++i) { + DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter); + DIDescriptor Ty = Elements.getElement(i); + AddType(DW_Unit, Arg, DIType(Ty.getGV())); + Buffer.AddChild(Arg); + } + } + break; + case dwarf::DW_TAG_structure_type: + case dwarf::DW_TAG_union_type: + case dwarf::DW_TAG_class_type: { + // Add elements to structure type. + DIArray Elements = CTy.getTypeArray(); + + // A forward struct declared type may not have elements available. + if (Elements.isNull()) + break; + + // Add elements to structure type. + for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { + DIDescriptor Element = Elements.getElement(i); + DIE *ElemDie = NULL; + if (Element.getTag() == dwarf::DW_TAG_subprogram) + ElemDie = CreateSubprogramDIE(DW_Unit, + DISubprogram(Element.getGV())); + else if (Element.getTag() == dwarf::DW_TAG_variable) // ?? + ElemDie = CreateGlobalVariableDIE(DW_Unit, + DIGlobalVariable(Element.getGV())); + else + ElemDie = CreateMemberDIE(DW_Unit, + DIDerivedType(Element.getGV())); + Buffer.AddChild(ElemDie); + } + + // FIXME: We'd like an API to register additional attributes for the + // frontend to use while synthesizing, and then we'd use that api in clang + // instead of this. + if (Name == "__block_literal_generic") + AddUInt(&Buffer, dwarf::DW_AT_APPLE_block, dwarf::DW_FORM_flag, 1); + + unsigned RLang = CTy.getRunTimeLang(); + if (RLang) + AddUInt(&Buffer, dwarf::DW_AT_APPLE_runtime_class, + dwarf::DW_FORM_data1, RLang); + break; + } + default: + break; + } + + // Add name if not anonymous or intermediate type. + if (!Name.empty()) + AddString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); + + if (Tag == dwarf::DW_TAG_enumeration_type || + Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type) { + // Add size if non-zero (derived types might be zero-sized.) + if (Size) + AddUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size); + else { + // Add zero size if it is not a forward declaration. + if (CTy.isForwardDecl()) + AddUInt(&Buffer, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); + else + AddUInt(&Buffer, dwarf::DW_AT_byte_size, 0, 0); + } + + // Add source line info if available. + if (!CTy.isForwardDecl()) + AddSourceLine(&Buffer, &CTy); + } +} + +/// ConstructSubrangeDIE - Construct subrange DIE from DISubrange. +void DwarfDebug::ConstructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy){ + int64_t L = SR.getLo(); + int64_t H = SR.getHi(); + DIE *DW_Subrange = new DIE(dwarf::DW_TAG_subrange_type); + + if (L != H) { + AddDIEEntry(DW_Subrange, dwarf::DW_AT_type, dwarf::DW_FORM_ref4, IndexTy); + if (L) + AddSInt(DW_Subrange, dwarf::DW_AT_lower_bound, 0, L); + AddSInt(DW_Subrange, dwarf::DW_AT_upper_bound, 0, H); + } + + Buffer.AddChild(DW_Subrange); +} + +/// ConstructArrayTypeDIE - Construct array type DIE from DICompositeType. +void DwarfDebug::ConstructArrayTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, + DICompositeType *CTy) { + Buffer.setTag(dwarf::DW_TAG_array_type); + if (CTy->getTag() == dwarf::DW_TAG_vector_type) + AddUInt(&Buffer, dwarf::DW_AT_GNU_vector, dwarf::DW_FORM_flag, 1); + + // Emit derived type. + AddType(DW_Unit, &Buffer, CTy->getTypeDerivedFrom()); + DIArray Elements = CTy->getTypeArray(); + + // Construct an anonymous type for index type. + DIE IdxBuffer(dwarf::DW_TAG_base_type); + AddUInt(&IdxBuffer, dwarf::DW_AT_byte_size, 0, sizeof(int32_t)); + AddUInt(&IdxBuffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, + dwarf::DW_ATE_signed); + DIE *IndexTy = DW_Unit->AddDie(IdxBuffer); + + // Add subranges to array type. + for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { + DIDescriptor Element = Elements.getElement(i); + if (Element.getTag() == dwarf::DW_TAG_subrange_type) + ConstructSubrangeDIE(Buffer, DISubrange(Element.getGV()), IndexTy); + } +} + +/// ConstructEnumTypeDIE - Construct enum type DIE from DIEnumerator. +DIE *DwarfDebug::ConstructEnumTypeDIE(CompileUnit *DW_Unit, DIEnumerator *ETy) { + DIE *Enumerator = new DIE(dwarf::DW_TAG_enumerator); + std::string Name; + ETy->getName(Name); + AddString(Enumerator, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); + int64_t Value = ETy->getEnumValue(); + AddSInt(Enumerator, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, Value); + return Enumerator; +} + +/// CreateGlobalVariableDIE - Create new DIE using GV. +DIE *DwarfDebug::CreateGlobalVariableDIE(CompileUnit *DW_Unit, + const DIGlobalVariable &GV) { + DIE *GVDie = new DIE(dwarf::DW_TAG_variable); + std::string Name; + GV.getDisplayName(Name); + AddString(GVDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); + std::string LinkageName; + GV.getLinkageName(LinkageName); + if (!LinkageName.empty()) + AddString(GVDie, dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_FORM_string, + LinkageName); + AddType(DW_Unit, GVDie, GV.getType()); + if (!GV.isLocalToUnit()) + AddUInt(GVDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); + AddSourceLine(GVDie, &GV); + return GVDie; +} + +/// CreateMemberDIE - Create new member DIE. +DIE *DwarfDebug::CreateMemberDIE(CompileUnit *DW_Unit, const DIDerivedType &DT){ + DIE *MemberDie = new DIE(DT.getTag()); + std::string Name; + DT.getName(Name); + if (!Name.empty()) + AddString(MemberDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); + + AddType(DW_Unit, MemberDie, DT.getTypeDerivedFrom()); + + AddSourceLine(MemberDie, &DT); + + uint64_t Size = DT.getSizeInBits(); + uint64_t FieldSize = DT.getOriginalTypeSize(); + + if (Size != FieldSize) { + // Handle bitfield. + AddUInt(MemberDie, dwarf::DW_AT_byte_size, 0, DT.getOriginalTypeSize()>>3); + AddUInt(MemberDie, dwarf::DW_AT_bit_size, 0, DT.getSizeInBits()); + + uint64_t Offset = DT.getOffsetInBits(); + uint64_t FieldOffset = Offset; + uint64_t AlignMask = ~(DT.getAlignInBits() - 1); + uint64_t HiMark = (Offset + FieldSize) & AlignMask; + FieldOffset = (HiMark - FieldSize); + Offset -= FieldOffset; + + // Maybe we need to work from the other end. + if (TD->isLittleEndian()) Offset = FieldSize - (Offset + Size); + AddUInt(MemberDie, dwarf::DW_AT_bit_offset, 0, Offset); + } + + DIEBlock *Block = new DIEBlock(); + AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); + AddUInt(Block, 0, dwarf::DW_FORM_udata, DT.getOffsetInBits() >> 3); + AddBlock(MemberDie, dwarf::DW_AT_data_member_location, 0, Block); + + if (DT.isProtected()) + AddUInt(MemberDie, dwarf::DW_AT_accessibility, 0, + dwarf::DW_ACCESS_protected); + else if (DT.isPrivate()) + AddUInt(MemberDie, dwarf::DW_AT_accessibility, 0, + dwarf::DW_ACCESS_private); + + return MemberDie; +} + +/// CreateSubprogramDIE - Create new DIE using SP. +DIE *DwarfDebug::CreateSubprogramDIE(CompileUnit *DW_Unit, + const DISubprogram &SP, + bool IsConstructor, + bool IsInlined) { + DIE *SPDie = new DIE(dwarf::DW_TAG_subprogram); + + std::string Name; + SP.getName(Name); + AddString(SPDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); + + std::string LinkageName; + SP.getLinkageName(LinkageName); + + if (!LinkageName.empty()) + AddString(SPDie, dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_FORM_string, + LinkageName); + + AddSourceLine(SPDie, &SP); + + DICompositeType SPTy = SP.getType(); + DIArray Args = SPTy.getTypeArray(); + + // Add prototyped tag, if C or ObjC. + unsigned Lang = SP.getCompileUnit().getLanguage(); + if (Lang == dwarf::DW_LANG_C99 || Lang == dwarf::DW_LANG_C89 || + Lang == dwarf::DW_LANG_ObjC) + AddUInt(SPDie, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1); + + // Add Return Type. + unsigned SPTag = SPTy.getTag(); + if (!IsConstructor) { + if (Args.isNull() || SPTag != dwarf::DW_TAG_subroutine_type) + AddType(DW_Unit, SPDie, SPTy); + else + AddType(DW_Unit, SPDie, DIType(Args.getElement(0).getGV())); + } + + if (!SP.isDefinition()) { + AddUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); + + // Add arguments. Do not add arguments for subprogram definition. They will + // be handled through RecordVariable. + if (SPTag == dwarf::DW_TAG_subroutine_type) + for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) { + DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter); + AddType(DW_Unit, Arg, DIType(Args.getElement(i).getGV())); + AddUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); // ?? + SPDie->AddChild(Arg); + } + } + + if (!SP.isLocalToUnit() && !IsInlined) + AddUInt(SPDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); + + // DW_TAG_inlined_subroutine may refer to this DIE. + DIE *&Slot = DW_Unit->getDieMapSlotFor(SP.getGV()); + Slot = SPDie; + return SPDie; +} + +/// FindCompileUnit - Get the compile unit for the given descriptor. +/// +CompileUnit &DwarfDebug::FindCompileUnit(DICompileUnit Unit) const { + DenseMap::const_iterator I = + CompileUnitMap.find(Unit.getGV()); + assert(I != CompileUnitMap.end() && "Missing compile unit."); + return *I->second; +} + +/// CreateDbgScopeVariable - Create a new scope variable. +/// +DIE *DwarfDebug::CreateDbgScopeVariable(DbgVariable *DV, CompileUnit *Unit) { + // Get the descriptor. + const DIVariable &VD = DV->getVariable(); + + // Translate tag to proper Dwarf tag. The result variable is dropped for + // now. + unsigned Tag; + switch (VD.getTag()) { + case dwarf::DW_TAG_return_variable: + return NULL; + case dwarf::DW_TAG_arg_variable: + Tag = dwarf::DW_TAG_formal_parameter; + break; + case dwarf::DW_TAG_auto_variable: // fall thru + default: + Tag = dwarf::DW_TAG_variable; + break; + } + + // Define variable debug information entry. + DIE *VariableDie = new DIE(Tag); + std::string Name; + VD.getName(Name); + AddString(VariableDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); + + // Add source line info if available. + AddSourceLine(VariableDie, &VD); + + // Add variable type. + AddType(Unit, VariableDie, VD.getType()); + + // Add variable address. + if (!DV->isInlinedFnVar()) { + // Variables for abstract instances of inlined functions don't get a + // location. + MachineLocation Location; + Location.set(RI->getFrameRegister(*MF), + RI->getFrameIndexOffset(*MF, DV->getFrameIndex())); + AddAddress(VariableDie, dwarf::DW_AT_location, Location); + } + + return VariableDie; +} + +/// getOrCreateScope - Returns the scope associated with the given descriptor. +/// +DbgScope *DwarfDebug::getOrCreateScope(GlobalVariable *V) { + DbgScope *&Slot = DbgScopeMap[V]; + if (Slot) return Slot; + + DbgScope *Parent = NULL; + DIBlock Block(V); + + // Don't create a new scope if we already created one for an inlined function. + DenseMap::iterator + II = AbstractInstanceRootMap.find(V); + if (II != AbstractInstanceRootMap.end()) + return LexicalScopeStack.back(); + + if (!Block.isNull()) { + DIDescriptor ParentDesc = Block.getContext(); + Parent = + ParentDesc.isNull() ? NULL : getOrCreateScope(ParentDesc.getGV()); + } + + Slot = new DbgScope(Parent, DIDescriptor(V)); + + if (Parent) + Parent->AddScope(Slot); + else + // First function is top level function. + FunctionDbgScope = Slot; + + return Slot; +} + +/// ConstructDbgScope - Construct the components of a scope. +/// +void DwarfDebug::ConstructDbgScope(DbgScope *ParentScope, + unsigned ParentStartID, + unsigned ParentEndID, + DIE *ParentDie, CompileUnit *Unit) { + // Add variables to scope. + SmallVector &Variables = ParentScope->getVariables(); + for (unsigned i = 0, N = Variables.size(); i < N; ++i) { + DIE *VariableDie = CreateDbgScopeVariable(Variables[i], Unit); + if (VariableDie) ParentDie->AddChild(VariableDie); + } + + // Add concrete instances to scope. + SmallVector &ConcreteInsts = + ParentScope->getConcreteInsts(); + for (unsigned i = 0, N = ConcreteInsts.size(); i < N; ++i) { + DbgConcreteScope *ConcreteInst = ConcreteInsts[i]; + DIE *Die = ConcreteInst->getDie(); + + unsigned StartID = ConcreteInst->getStartLabelID(); + unsigned EndID = ConcreteInst->getEndLabelID(); + + // Add the scope bounds. + if (StartID) + AddLabel(Die, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, + DWLabel("label", StartID)); + else + AddLabel(Die, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, + DWLabel("func_begin", SubprogramCount)); + + if (EndID) + AddLabel(Die, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, + DWLabel("label", EndID)); + else + AddLabel(Die, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, + DWLabel("func_end", SubprogramCount)); + + ParentDie->AddChild(Die); + } + + // Add nested scopes. + SmallVector &Scopes = ParentScope->getScopes(); + for (unsigned j = 0, M = Scopes.size(); j < M; ++j) { + // Define the Scope debug information entry. + DbgScope *Scope = Scopes[j]; + + unsigned StartID = MMI->MappedLabel(Scope->getStartLabelID()); + unsigned EndID = MMI->MappedLabel(Scope->getEndLabelID()); + + // Ignore empty scopes. + if (StartID == EndID && StartID != 0) continue; + + // Do not ignore inlined scopes even if they don't have any variables or + // scopes. + if (Scope->getScopes().empty() && Scope->getVariables().empty() && + Scope->getConcreteInsts().empty()) + continue; + + if (StartID == ParentStartID && EndID == ParentEndID) { + // Just add stuff to the parent scope. + ConstructDbgScope(Scope, ParentStartID, ParentEndID, ParentDie, Unit); + } else { + DIE *ScopeDie = new DIE(dwarf::DW_TAG_lexical_block); + + // Add the scope bounds. + if (StartID) + AddLabel(ScopeDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, + DWLabel("label", StartID)); + else + AddLabel(ScopeDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, + DWLabel("func_begin", SubprogramCount)); + + if (EndID) + AddLabel(ScopeDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, + DWLabel("label", EndID)); + else + AddLabel(ScopeDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, + DWLabel("func_end", SubprogramCount)); + + // Add the scope's contents. + ConstructDbgScope(Scope, StartID, EndID, ScopeDie, Unit); + ParentDie->AddChild(ScopeDie); + } + } +} + +/// ConstructFunctionDbgScope - Construct the scope for the subprogram. +/// +void DwarfDebug::ConstructFunctionDbgScope(DbgScope *RootScope, + bool AbstractScope) { + // Exit if there is no root scope. + if (!RootScope) return; + DIDescriptor Desc = RootScope->getDesc(); + if (Desc.isNull()) + return; + + // Get the subprogram debug information entry. + DISubprogram SPD(Desc.getGV()); + + // Get the compile unit context. + CompileUnit *Unit = MainCU; + if (!Unit) + Unit = &FindCompileUnit(SPD.getCompileUnit()); + + // Get the subprogram die. + DIE *SPDie = Unit->getDieMapSlotFor(SPD.getGV()); + assert(SPDie && "Missing subprogram descriptor"); + + if (!AbstractScope) { + // Add the function bounds. + AddLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, + DWLabel("func_begin", SubprogramCount)); + AddLabel(SPDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, + DWLabel("func_end", SubprogramCount)); + MachineLocation Location(RI->getFrameRegister(*MF)); + AddAddress(SPDie, dwarf::DW_AT_frame_base, Location); + } + + ConstructDbgScope(RootScope, 0, 0, SPDie, Unit); +} + +/// ConstructDefaultDbgScope - Construct a default scope for the subprogram. +/// +void DwarfDebug::ConstructDefaultDbgScope(MachineFunction *MF) { + const char *FnName = MF->getFunction()->getNameStart(); + if (MainCU) { + StringMap &Globals = MainCU->getGlobals(); + StringMap::iterator GI = Globals.find(FnName); + if (GI != Globals.end()) { + DIE *SPDie = GI->second; + + // Add the function bounds. + AddLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, + DWLabel("func_begin", SubprogramCount)); + AddLabel(SPDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, + DWLabel("func_end", SubprogramCount)); + + MachineLocation Location(RI->getFrameRegister(*MF)); + AddAddress(SPDie, dwarf::DW_AT_frame_base, Location); + return; + } + } else { + for (unsigned i = 0, e = CompileUnits.size(); i != e; ++i) { + CompileUnit *Unit = CompileUnits[i]; + StringMap &Globals = Unit->getGlobals(); + StringMap::iterator GI = Globals.find(FnName); + if (GI != Globals.end()) { + DIE *SPDie = GI->second; + + // Add the function bounds. + AddLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, + DWLabel("func_begin", SubprogramCount)); + AddLabel(SPDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, + DWLabel("func_end", SubprogramCount)); + + MachineLocation Location(RI->getFrameRegister(*MF)); + AddAddress(SPDie, dwarf::DW_AT_frame_base, Location); + return; + } + } + } + +#if 0 + // FIXME: This is causing an abort because C++ mangled names are compared with + // their unmangled counterparts. See PR2885. Don't do this assert. + assert(0 && "Couldn't find DIE for machine function!"); +#endif +} + +/// GetOrCreateSourceID - Look up the source id with the given directory and +/// source file names. If none currently exists, create a new id and insert it +/// in the SourceIds map. This can update DirectoryNames and SourceFileNames +/// maps as well. +unsigned DwarfDebug::GetOrCreateSourceID(const std::string &DirName, + const std::string &FileName) { + unsigned DId; + StringMap::iterator DI = DirectoryIdMap.find(DirName); + if (DI != DirectoryIdMap.end()) { + DId = DI->getValue(); + } else { + DId = DirectoryNames.size() + 1; + DirectoryIdMap[DirName] = DId; + DirectoryNames.push_back(DirName); + } + + unsigned FId; + StringMap::iterator FI = SourceFileIdMap.find(FileName); + if (FI != SourceFileIdMap.end()) { + FId = FI->getValue(); + } else { + FId = SourceFileNames.size() + 1; + SourceFileIdMap[FileName] = FId; + SourceFileNames.push_back(FileName); + } + + DenseMap, unsigned>::iterator SI = + SourceIdMap.find(std::make_pair(DId, FId)); + if (SI != SourceIdMap.end()) + return SI->second; + + unsigned SrcId = SourceIds.size() + 1; // DW_AT_decl_file cannot be 0. + SourceIdMap[std::make_pair(DId, FId)] = SrcId; + SourceIds.push_back(std::make_pair(DId, FId)); + + return SrcId; +} + +void DwarfDebug::ConstructCompileUnit(GlobalVariable *GV) { + DICompileUnit DIUnit(GV); + std::string Dir, FN, Prod; + unsigned ID = GetOrCreateSourceID(DIUnit.getDirectory(Dir), + DIUnit.getFilename(FN)); + + DIE *Die = new DIE(dwarf::DW_TAG_compile_unit); + AddSectionOffset(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, + DWLabel("section_line", 0), DWLabel("section_line", 0), + false); + AddString(Die, dwarf::DW_AT_producer, dwarf::DW_FORM_string, + DIUnit.getProducer(Prod)); + AddUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data1, + DIUnit.getLanguage()); + AddString(Die, dwarf::DW_AT_name, dwarf::DW_FORM_string, FN); + + if (!Dir.empty()) + AddString(Die, dwarf::DW_AT_comp_dir, dwarf::DW_FORM_string, Dir); + if (DIUnit.isOptimized()) + AddUInt(Die, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1); + + std::string Flags; + DIUnit.getFlags(Flags); + if (!Flags.empty()) + AddString(Die, dwarf::DW_AT_APPLE_flags, dwarf::DW_FORM_string, Flags); + + unsigned RVer = DIUnit.getRunTimeVersion(); + if (RVer) + AddUInt(Die, dwarf::DW_AT_APPLE_major_runtime_vers, + dwarf::DW_FORM_data1, RVer); + + CompileUnit *Unit = new CompileUnit(ID, Die); + if (DIUnit.isMain()) { + assert(!MainCU && "Multiple main compile units are found!"); + MainCU = Unit; + } + + CompileUnitMap[DIUnit.getGV()] = Unit; + CompileUnits.push_back(Unit); +} + +/// ConstructCompileUnits - Create a compile unit DIEs. +void DwarfDebug::ConstructCompileUnits() { + GlobalVariable *Root = M->getGlobalVariable("llvm.dbg.compile_units"); + if (!Root) + return; + assert(Root->hasLinkOnceLinkage() && Root->hasOneUse() && + "Malformed compile unit descriptor anchor type"); + Constant *RootC = cast(*Root->use_begin()); + assert(RootC->hasNUsesOrMore(1) && + "Malformed compile unit descriptor anchor type"); + + for (Value::use_iterator UI = RootC->use_begin(), UE = Root->use_end(); + UI != UE; ++UI) + for (Value::use_iterator UUI = UI->use_begin(), UUE = UI->use_end(); + UUI != UUE; ++UUI) { + GlobalVariable *GV = cast(*UUI); + ConstructCompileUnit(GV); + } +} + +bool DwarfDebug::ConstructGlobalVariableDIE(GlobalVariable *GV) { + DIGlobalVariable DI_GV(GV); + CompileUnit *DW_Unit = MainCU; + if (!DW_Unit) + DW_Unit = &FindCompileUnit(DI_GV.getCompileUnit()); + + // Check for pre-existence. + DIE *&Slot = DW_Unit->getDieMapSlotFor(DI_GV.getGV()); + if (Slot) + return false; + + DIE *VariableDie = CreateGlobalVariableDIE(DW_Unit, DI_GV); + + // Add address. + DIEBlock *Block = new DIEBlock(); + AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr); + std::string GLN; + AddObjectLabel(Block, 0, dwarf::DW_FORM_udata, + Asm->getGlobalLinkName(DI_GV.getGlobal(), GLN)); + AddBlock(VariableDie, dwarf::DW_AT_location, 0, Block); + + // Add to map. + Slot = VariableDie; + + // Add to context owner. + DW_Unit->getDie()->AddChild(VariableDie); + + // Expose as global. FIXME - need to check external flag. + std::string Name; + DW_Unit->AddGlobal(DI_GV.getName(Name), VariableDie); + return true; +} + +/// ConstructGlobalVariableDIEs - Create DIEs for each of the externally visible +/// global variables. Return true if at least one global DIE is created. +bool DwarfDebug::ConstructGlobalVariableDIEs() { + GlobalVariable *Root = M->getGlobalVariable("llvm.dbg.global_variables"); + if (!Root) + return false; + + assert(Root->hasLinkOnceLinkage() && Root->hasOneUse() && + "Malformed global variable descriptor anchor type"); + Constant *RootC = cast(*Root->use_begin()); + assert(RootC->hasNUsesOrMore(1) && + "Malformed global variable descriptor anchor type"); + + bool Result = false; + for (Value::use_iterator UI = RootC->use_begin(), UE = Root->use_end(); + UI != UE; ++UI) + for (Value::use_iterator UUI = UI->use_begin(), UUE = UI->use_end(); + UUI != UUE; ++UUI) + Result |= ConstructGlobalVariableDIE(cast(*UUI)); + + return Result; +} + +bool DwarfDebug::ConstructSubprogram(GlobalVariable *GV) { + DISubprogram SP(GV); + CompileUnit *Unit = MainCU; + if (!Unit) + Unit = &FindCompileUnit(SP.getCompileUnit()); + + // Check for pre-existence. + DIE *&Slot = Unit->getDieMapSlotFor(GV); + if (Slot) + return false; + + if (!SP.isDefinition()) + // This is a method declaration which will be handled while constructing + // class type. + return false; + + DIE *SubprogramDie = CreateSubprogramDIE(Unit, SP); + + // Add to map. + Slot = SubprogramDie; + + // Add to context owner. + Unit->getDie()->AddChild(SubprogramDie); + + // Expose as global. + std::string Name; + Unit->AddGlobal(SP.getName(Name), SubprogramDie); + return true; +} + +/// ConstructSubprograms - Create DIEs for each of the externally visible +/// subprograms. Return true if at least one subprogram DIE is created. +bool DwarfDebug::ConstructSubprograms() { + GlobalVariable *Root = M->getGlobalVariable("llvm.dbg.subprograms"); + if (!Root) + return false; + + assert(Root->hasLinkOnceLinkage() && Root->hasOneUse() && + "Malformed subprogram descriptor anchor type"); + Constant *RootC = cast(*Root->use_begin()); + assert(RootC->hasNUsesOrMore(1) && + "Malformed subprogram descriptor anchor type"); + + bool Result = false; + for (Value::use_iterator UI = RootC->use_begin(), UE = Root->use_end(); + UI != UE; ++UI) + for (Value::use_iterator UUI = UI->use_begin(), UUE = UI->use_end(); + UUI != UUE; ++UUI) + Result |= ConstructSubprogram(cast(*UUI)); + + return Result; +} + +/// SetDebugInfo - Create global DIEs and emit initial debug info sections. +/// This is inovked by the target AsmPrinter. +void DwarfDebug::SetDebugInfo(MachineModuleInfo *mmi) { + if (TimePassesIsEnabled) + DebugTimer->startTimer(); + + // Create all the compile unit DIEs. + ConstructCompileUnits(); + + if (CompileUnits.empty()) { + if (TimePassesIsEnabled) + DebugTimer->stopTimer(); + + return; + } + + // Create DIEs for each of the externally visible global variables. + bool globalDIEs = ConstructGlobalVariableDIEs(); + + // Create DIEs for each of the externally visible subprograms. + bool subprogramDIEs = ConstructSubprograms(); + + // If there is not any debug info available for any global variables and any + // subprograms then there is not any debug info to emit. + if (!globalDIEs && !subprogramDIEs) { + if (TimePassesIsEnabled) + DebugTimer->stopTimer(); + + return; + } + + MMI = mmi; + shouldEmit = true; + MMI->setDebugInfoAvailability(true); + + // Prime section data. + SectionMap.insert(TAI->getTextSection()); + + // Print out .file directives to specify files for .loc directives. These are + // printed out early so that they precede any .loc directives. + if (TAI->hasDotLocAndDotFile()) { + for (unsigned i = 1, e = getNumSourceIds()+1; i != e; ++i) { + // Remember source id starts at 1. + std::pair Id = getSourceDirectoryAndFileIds(i); + sys::Path FullPath(getSourceDirectoryName(Id.first)); + bool AppendOk = + FullPath.appendComponent(getSourceFileName(Id.second)); + assert(AppendOk && "Could not append filename to directory!"); + AppendOk = false; + Asm->EmitFile(i, FullPath.toString()); + Asm->EOL(); + } + } + + // Emit initial sections + EmitInitial(); + + if (TimePassesIsEnabled) + DebugTimer->stopTimer(); +} + +/// EndModule - Emit all Dwarf sections that should come after the content. +/// +void DwarfDebug::EndModule() { + if (!ShouldEmitDwarfDebug()) + return; + + if (TimePassesIsEnabled) + DebugTimer->startTimer(); + + // Standard sections final addresses. + Asm->SwitchToSection(TAI->getTextSection()); + EmitLabel("text_end", 0); + Asm->SwitchToSection(TAI->getDataSection()); + EmitLabel("data_end", 0); + + // End text sections. + for (unsigned i = 1, N = SectionMap.size(); i <= N; ++i) { + Asm->SwitchToSection(SectionMap[i]); + EmitLabel("section_end", i); + } + + // Emit common frame information. + EmitCommonDebugFrame(); + + // Emit function debug frame information + for (std::vector::iterator I = DebugFrames.begin(), + E = DebugFrames.end(); I != E; ++I) + EmitFunctionDebugFrame(*I); + + // Compute DIE offsets and sizes. + SizeAndOffsets(); + + // Emit all the DIEs into a debug info section + EmitDebugInfo(); + + // Corresponding abbreviations into a abbrev section. + EmitAbbreviations(); + + // Emit source line correspondence into a debug line section. + EmitDebugLines(); + + // Emit info into a debug pubnames section. + EmitDebugPubNames(); + + // Emit info into a debug str section. + EmitDebugStr(); + + // Emit info into a debug loc section. + EmitDebugLoc(); + + // Emit info into a debug aranges section. + EmitDebugARanges(); + + // Emit info into a debug ranges section. + EmitDebugRanges(); + + // Emit info into a debug macinfo section. + EmitDebugMacInfo(); + + // Emit inline info. + EmitDebugInlineInfo(); + + if (TimePassesIsEnabled) + DebugTimer->stopTimer(); +} + +/// BeginFunction - Gather pre-function debug information. Assumes being +/// emitted immediately after the function entry point. +void DwarfDebug::BeginFunction(MachineFunction *MF) { + this->MF = MF; + + if (!ShouldEmitDwarfDebug()) return; + + if (TimePassesIsEnabled) + DebugTimer->startTimer(); + + // Begin accumulating function debug information. + MMI->BeginFunction(MF); + + // Assumes in correct section after the entry point. + EmitLabel("func_begin", ++SubprogramCount); + + // Emit label for the implicitly defined dbg.stoppoint at the start of the + // function. + DebugLoc FDL = MF->getDefaultDebugLoc(); + if (!FDL.isUnknown()) { + DebugLocTuple DLT = MF->getDebugLocTuple(FDL); + unsigned LabelID = RecordSourceLine(DLT.Line, DLT.Col, + DICompileUnit(DLT.CompileUnit)); + Asm->printLabel(LabelID); + } + + if (TimePassesIsEnabled) + DebugTimer->stopTimer(); +} + +/// EndFunction - Gather and emit post-function debug information. +/// +void DwarfDebug::EndFunction(MachineFunction *MF) { + if (!ShouldEmitDwarfDebug()) return; + + if (TimePassesIsEnabled) + DebugTimer->startTimer(); + + // Define end label for subprogram. + EmitLabel("func_end", SubprogramCount); + + // Get function line info. + if (!Lines.empty()) { + // Get section line info. + unsigned ID = SectionMap.insert(Asm->CurrentSection_); + if (SectionSourceLines.size() < ID) SectionSourceLines.resize(ID); + std::vector &SectionLineInfos = SectionSourceLines[ID-1]; + // Append the function info to section info. + SectionLineInfos.insert(SectionLineInfos.end(), + Lines.begin(), Lines.end()); + } + + // Construct the DbgScope for abstract instances. + for (SmallVector::iterator + I = AbstractInstanceRootList.begin(), + E = AbstractInstanceRootList.end(); I != E; ++I) + ConstructFunctionDbgScope(*I); + + // Construct scopes for subprogram. + if (FunctionDbgScope) + ConstructFunctionDbgScope(FunctionDbgScope); + else + // FIXME: This is wrong. We are essentially getting past a problem with + // debug information not being able to handle unreachable blocks that have + // debug information in them. In particular, those unreachable blocks that + // have "region end" info in them. That situation results in the "root + // scope" not being created. If that's the case, then emit a "default" + // scope, i.e., one that encompasses the whole function. This isn't + // desirable. And a better way of handling this (and all of the debugging + // information) needs to be explored. + ConstructDefaultDbgScope(MF); + + DebugFrames.push_back(FunctionDebugFrameInfo(SubprogramCount, + MMI->getFrameMoves())); + + // Clear debug info + if (FunctionDbgScope) { + delete FunctionDbgScope; + DbgScopeMap.clear(); + DbgAbstractScopeMap.clear(); + DbgConcreteScopeMap.clear(); + InlinedVariableScopes.clear(); + FunctionDbgScope = NULL; + LexicalScopeStack.clear(); + AbstractInstanceRootList.clear(); + } + + Lines.clear(); + + if (TimePassesIsEnabled) + DebugTimer->stopTimer(); +} + +/// RecordSourceLine - Records location information and associates it with a +/// label. Returns a unique label ID used to generate a label and provide +/// correspondence to the source line list. +unsigned DwarfDebug::RecordSourceLine(Value *V, unsigned Line, unsigned Col) { + if (TimePassesIsEnabled) + DebugTimer->startTimer(); + + CompileUnit *Unit = CompileUnitMap[V]; + assert(Unit && "Unable to find CompileUnit"); + unsigned ID = MMI->NextLabelID(); + Lines.push_back(SrcLineInfo(Line, Col, Unit->getID(), ID)); + + if (TimePassesIsEnabled) + DebugTimer->stopTimer(); + + return ID; +} + +/// RecordSourceLine - Records location information and associates it with a +/// label. Returns a unique label ID used to generate a label and provide +/// correspondence to the source line list. +unsigned DwarfDebug::RecordSourceLine(unsigned Line, unsigned Col, + DICompileUnit CU) { + if (TimePassesIsEnabled) + DebugTimer->startTimer(); + + std::string Dir, Fn; + unsigned Src = GetOrCreateSourceID(CU.getDirectory(Dir), + CU.getFilename(Fn)); + unsigned ID = MMI->NextLabelID(); + Lines.push_back(SrcLineInfo(Line, Col, Src, ID)); + + if (TimePassesIsEnabled) + DebugTimer->stopTimer(); + + return ID; +} + +/// getOrCreateSourceID - Public version of GetOrCreateSourceID. This can be +/// timed. Look up the source id with the given directory and source file +/// names. If none currently exists, create a new id and insert it in the +/// SourceIds map. This can update DirectoryNames and SourceFileNames maps as +/// well. +unsigned DwarfDebug::getOrCreateSourceID(const std::string &DirName, + const std::string &FileName) { + if (TimePassesIsEnabled) + DebugTimer->startTimer(); + + unsigned SrcId = GetOrCreateSourceID(DirName, FileName); + + if (TimePassesIsEnabled) + DebugTimer->stopTimer(); + + return SrcId; +} + +/// RecordRegionStart - Indicate the start of a region. +unsigned DwarfDebug::RecordRegionStart(GlobalVariable *V) { + if (TimePassesIsEnabled) + DebugTimer->startTimer(); + + DbgScope *Scope = getOrCreateScope(V); + unsigned ID = MMI->NextLabelID(); + if (!Scope->getStartLabelID()) Scope->setStartLabelID(ID); + LexicalScopeStack.push_back(Scope); + + if (TimePassesIsEnabled) + DebugTimer->stopTimer(); + + return ID; +} + +/// RecordRegionEnd - Indicate the end of a region. +unsigned DwarfDebug::RecordRegionEnd(GlobalVariable *V) { + if (TimePassesIsEnabled) + DebugTimer->startTimer(); + + DbgScope *Scope = getOrCreateScope(V); + unsigned ID = MMI->NextLabelID(); + Scope->setEndLabelID(ID); + if (LexicalScopeStack.size() != 0) + LexicalScopeStack.pop_back(); + + if (TimePassesIsEnabled) + DebugTimer->stopTimer(); + + return ID; +} + +/// RecordVariable - Indicate the declaration of a local variable. +void DwarfDebug::RecordVariable(GlobalVariable *GV, unsigned FrameIndex, + const MachineInstr *MI) { + if (TimePassesIsEnabled) + DebugTimer->startTimer(); + + DIDescriptor Desc(GV); + DbgScope *Scope = NULL; + bool InlinedFnVar = false; + + if (Desc.getTag() == dwarf::DW_TAG_variable) { + // GV is a global variable. + DIGlobalVariable DG(GV); + Scope = getOrCreateScope(DG.getContext().getGV()); + } else { + DenseMap::iterator + SI = InlinedVariableScopes.find(MI); + + if (SI != InlinedVariableScopes.end()) { + // or GV is an inlined local variable. + Scope = SI->second; + } else { + DIVariable DV(GV); + GlobalVariable *V = DV.getContext().getGV(); + + // FIXME: The code that checks for the inlined local variable is a hack! + DenseMap::iterator + AI = AbstractInstanceRootMap.find(V); + + if (AI != AbstractInstanceRootMap.end()) { + // This method is called each time a DECLARE node is encountered. For an + // inlined function, this could be many, many times. We don't want to + // re-add variables to that DIE for each time. We just want to add them + // once. Check to make sure that we haven't added them already. + DenseMap >::iterator + IP = InlinedParamMap.find(V); + + if (IP != InlinedParamMap.end() && IP->second.count(GV) > 0) { + if (TimePassesIsEnabled) + DebugTimer->stopTimer(); + return; + } + + // or GV is an inlined local variable. + Scope = AI->second; + InlinedParamMap[V].insert(GV); + InlinedFnVar = true; + } else { + // or GV is a local variable. + Scope = getOrCreateScope(V); + } + } + } + + assert(Scope && "Unable to find the variable's scope"); + DbgVariable *DV = new DbgVariable(DIVariable(GV), FrameIndex, InlinedFnVar); + Scope->AddVariable(DV); + + if (TimePassesIsEnabled) + DebugTimer->stopTimer(); +} + +//// RecordInlinedFnStart - Indicate the start of inlined subroutine. +unsigned DwarfDebug::RecordInlinedFnStart(DISubprogram &SP, DICompileUnit CU, + unsigned Line, unsigned Col) { + unsigned LabelID = MMI->NextLabelID(); + + if (!TAI->doesDwarfUsesInlineInfoSection()) + return LabelID; + + if (TimePassesIsEnabled) + DebugTimer->startTimer(); + + GlobalVariable *GV = SP.getGV(); + DenseMap::iterator + II = AbstractInstanceRootMap.find(GV); + + if (II == AbstractInstanceRootMap.end()) { + // Create an abstract instance entry for this inlined function if it doesn't + // already exist. + DbgScope *Scope = new DbgScope(NULL, DIDescriptor(GV)); + + // Get the compile unit context. + CompileUnit *Unit = &FindCompileUnit(SP.getCompileUnit()); + DIE *SPDie = Unit->getDieMapSlotFor(GV); + if (!SPDie) + SPDie = CreateSubprogramDIE(Unit, SP, false, true); + + // Mark as being inlined. This makes this subprogram entry an abstract + // instance root. + // FIXME: Our debugger doesn't care about the value of DW_AT_inline, only + // that it's defined. That probably won't change in the future. However, + // this could be more elegant. + AddUInt(SPDie, dwarf::DW_AT_inline, 0, dwarf::DW_INL_declared_not_inlined); + + // Keep track of the abstract scope for this function. + DbgAbstractScopeMap[GV] = Scope; + + AbstractInstanceRootMap[GV] = Scope; + AbstractInstanceRootList.push_back(Scope); + } + + // Create a concrete inlined instance for this inlined function. + DbgConcreteScope *ConcreteScope = new DbgConcreteScope(DIDescriptor(GV)); + DIE *ScopeDie = new DIE(dwarf::DW_TAG_inlined_subroutine); + CompileUnit *Unit = &FindCompileUnit(SP.getCompileUnit()); + ScopeDie->setAbstractCompileUnit(Unit); + + DIE *Origin = Unit->getDieMapSlotFor(GV); + AddDIEEntry(ScopeDie, dwarf::DW_AT_abstract_origin, + dwarf::DW_FORM_ref4, Origin); + AddUInt(ScopeDie, dwarf::DW_AT_call_file, 0, Unit->getID()); + AddUInt(ScopeDie, dwarf::DW_AT_call_line, 0, Line); + AddUInt(ScopeDie, dwarf::DW_AT_call_column, 0, Col); + + ConcreteScope->setDie(ScopeDie); + ConcreteScope->setStartLabelID(LabelID); + MMI->RecordUsedDbgLabel(LabelID); + + LexicalScopeStack.back()->AddConcreteInst(ConcreteScope); + + // Keep track of the concrete scope that's inlined into this function. + DenseMap >::iterator + SI = DbgConcreteScopeMap.find(GV); + + if (SI == DbgConcreteScopeMap.end()) + DbgConcreteScopeMap[GV].push_back(ConcreteScope); + else + SI->second.push_back(ConcreteScope); + + // Track the start label for this inlined function. + DenseMap >::iterator + I = InlineInfo.find(GV); + + if (I == InlineInfo.end()) + InlineInfo[GV].push_back(LabelID); + else + I->second.push_back(LabelID); + + if (TimePassesIsEnabled) + DebugTimer->stopTimer(); + + return LabelID; +} + +/// RecordInlinedFnEnd - Indicate the end of inlined subroutine. +unsigned DwarfDebug::RecordInlinedFnEnd(DISubprogram &SP) { + if (!TAI->doesDwarfUsesInlineInfoSection()) + return 0; + + if (TimePassesIsEnabled) + DebugTimer->startTimer(); + + GlobalVariable *GV = SP.getGV(); + DenseMap >::iterator + I = DbgConcreteScopeMap.find(GV); + + if (I == DbgConcreteScopeMap.end()) { + // FIXME: Can this situation actually happen? And if so, should it? + if (TimePassesIsEnabled) + DebugTimer->stopTimer(); + + return 0; + } + + SmallVector &Scopes = I->second; + assert(!Scopes.empty() && "We should have at least one debug scope!"); + DbgScope *Scope = Scopes.back(); Scopes.pop_back(); + unsigned ID = MMI->NextLabelID(); + MMI->RecordUsedDbgLabel(ID); + Scope->setEndLabelID(ID); + + if (TimePassesIsEnabled) + DebugTimer->stopTimer(); + + return ID; +} + +/// RecordVariableScope - Record scope for the variable declared by +/// DeclareMI. DeclareMI must describe TargetInstrInfo::DECLARE. Record scopes +/// for only inlined subroutine variables. Other variables's scopes are +/// determined during RecordVariable(). +void DwarfDebug::RecordVariableScope(DIVariable &DV, + const MachineInstr *DeclareMI) { + if (TimePassesIsEnabled) + DebugTimer->startTimer(); + + DISubprogram SP(DV.getContext().getGV()); + + if (SP.isNull()) { + if (TimePassesIsEnabled) + DebugTimer->stopTimer(); + + return; + } + + DenseMap::iterator + I = DbgAbstractScopeMap.find(SP.getGV()); + if (I != DbgAbstractScopeMap.end()) + InlinedVariableScopes[DeclareMI] = I->second; + + if (TimePassesIsEnabled) + DebugTimer->stopTimer(); +} + +//===----------------------------------------------------------------------===// +// Emit Methods +//===----------------------------------------------------------------------===// + +/// SizeAndOffsetDie - Compute the size and offset of a DIE. +/// +unsigned DwarfDebug::SizeAndOffsetDie(DIE *Die, unsigned Offset, bool Last) { + // Get the children. + const std::vector &Children = Die->getChildren(); + + // If not last sibling and has children then add sibling offset attribute. + if (!Last && !Children.empty()) Die->AddSiblingOffset(); + + // Record the abbreviation. + AssignAbbrevNumber(Die->getAbbrev()); + + // Get the abbreviation for this DIE. + unsigned AbbrevNumber = Die->getAbbrevNumber(); + const DIEAbbrev *Abbrev = Abbreviations[AbbrevNumber - 1]; + + // Set DIE offset + Die->setOffset(Offset); + + // Start the size with the size of abbreviation code. + Offset += TargetAsmInfo::getULEB128Size(AbbrevNumber); + + const SmallVector &Values = Die->getValues(); + const SmallVector &AbbrevData = Abbrev->getData(); + + // Size the DIE attribute values. + for (unsigned i = 0, N = Values.size(); i < N; ++i) + // Size attribute value. + Offset += Values[i]->SizeOf(TD, AbbrevData[i].getForm()); + + // Size the DIE children if any. + if (!Children.empty()) { + assert(Abbrev->getChildrenFlag() == dwarf::DW_CHILDREN_yes && + "Children flag not set"); + + for (unsigned j = 0, M = Children.size(); j < M; ++j) + Offset = SizeAndOffsetDie(Children[j], Offset, (j + 1) == M); + + // End of children marker. + Offset += sizeof(int8_t); + } + + Die->setSize(Offset - Die->getOffset()); + return Offset; +} + +/// SizeAndOffsets - Compute the size and offset of all the DIEs. +/// +void DwarfDebug::SizeAndOffsets() { + // Compute size of compile unit header. + static unsigned Offset = + sizeof(int32_t) + // Length of Compilation Unit Info + sizeof(int16_t) + // DWARF version number + sizeof(int32_t) + // Offset Into Abbrev. Section + sizeof(int8_t); // Pointer Size (in bytes) + + // Process base compile unit. + if (MainCU) { + SizeAndOffsetDie(MainCU->getDie(), Offset, true); + CompileUnitOffsets[MainCU] = 0; + return; + } + + // Process all compile units. + unsigned PrevOffset = 0; + + for (unsigned i = 0, e = CompileUnits.size(); i != e; ++i) { + CompileUnit *Unit = CompileUnits[i]; + CompileUnitOffsets[Unit] = PrevOffset; + PrevOffset += SizeAndOffsetDie(Unit->getDie(), Offset, true) + + sizeof(int32_t); // FIXME - extra pad for gdb bug. + } +} + +/// EmitInitial - Emit initial Dwarf declarations. This is necessary for cc +/// tools to recognize the object file contains Dwarf information. +void DwarfDebug::EmitInitial() { + // Check to see if we already emitted intial headers. + if (didInitial) return; + didInitial = true; + + // Dwarf sections base addresses. + if (TAI->doesDwarfRequireFrameSection()) { + Asm->SwitchToDataSection(TAI->getDwarfFrameSection()); + EmitLabel("section_debug_frame", 0); + } + + Asm->SwitchToDataSection(TAI->getDwarfInfoSection()); + EmitLabel("section_info", 0); + Asm->SwitchToDataSection(TAI->getDwarfAbbrevSection()); + EmitLabel("section_abbrev", 0); + Asm->SwitchToDataSection(TAI->getDwarfARangesSection()); + EmitLabel("section_aranges", 0); + + if (TAI->doesSupportMacInfoSection()) { + Asm->SwitchToDataSection(TAI->getDwarfMacInfoSection()); + EmitLabel("section_macinfo", 0); + } + + Asm->SwitchToDataSection(TAI->getDwarfLineSection()); + EmitLabel("section_line", 0); + Asm->SwitchToDataSection(TAI->getDwarfLocSection()); + EmitLabel("section_loc", 0); + Asm->SwitchToDataSection(TAI->getDwarfPubNamesSection()); + EmitLabel("section_pubnames", 0); + Asm->SwitchToDataSection(TAI->getDwarfStrSection()); + EmitLabel("section_str", 0); + Asm->SwitchToDataSection(TAI->getDwarfRangesSection()); + EmitLabel("section_ranges", 0); + + Asm->SwitchToSection(TAI->getTextSection()); + EmitLabel("text_begin", 0); + Asm->SwitchToSection(TAI->getDataSection()); + EmitLabel("data_begin", 0); +} + +/// EmitDIE - Recusively Emits a debug information entry. +/// +void DwarfDebug::EmitDIE(DIE *Die) { + // Get the abbreviation for this DIE. + unsigned AbbrevNumber = Die->getAbbrevNumber(); + const DIEAbbrev *Abbrev = Abbreviations[AbbrevNumber - 1]; + + Asm->EOL(); + + // Emit the code (index) for the abbreviation. + Asm->EmitULEB128Bytes(AbbrevNumber); + + if (Asm->isVerbose()) + Asm->EOL(std::string("Abbrev [" + + utostr(AbbrevNumber) + + "] 0x" + utohexstr(Die->getOffset()) + + ":0x" + utohexstr(Die->getSize()) + " " + + dwarf::TagString(Abbrev->getTag()))); + else + Asm->EOL(); + + SmallVector &Values = Die->getValues(); + const SmallVector &AbbrevData = Abbrev->getData(); + + // Emit the DIE attribute values. + for (unsigned i = 0, N = Values.size(); i < N; ++i) { + unsigned Attr = AbbrevData[i].getAttribute(); + unsigned Form = AbbrevData[i].getForm(); + assert(Form && "Too many attributes for DIE (check abbreviation)"); + + switch (Attr) { + case dwarf::DW_AT_sibling: + Asm->EmitInt32(Die->SiblingOffset()); + break; + case dwarf::DW_AT_abstract_origin: { + DIEEntry *E = cast(Values[i]); + DIE *Origin = E->getEntry(); + unsigned Addr = + CompileUnitOffsets[Die->getAbstractCompileUnit()] + + Origin->getOffset(); + + Asm->EmitInt32(Addr); + break; + } + default: + // Emit an attribute using the defined form. + Values[i]->EmitValue(this, Form); + break; + } + + Asm->EOL(dwarf::AttributeString(Attr)); + } + + // Emit the DIE children if any. + if (Abbrev->getChildrenFlag() == dwarf::DW_CHILDREN_yes) { + const std::vector &Children = Die->getChildren(); + + for (unsigned j = 0, M = Children.size(); j < M; ++j) + EmitDIE(Children[j]); + + Asm->EmitInt8(0); Asm->EOL("End Of Children Mark"); + } +} + +/// EmitDebugInfo / EmitDebugInfoPerCU - Emit the debug info section. +/// +void DwarfDebug::EmitDebugInfoPerCU(CompileUnit *Unit) { + DIE *Die = Unit->getDie(); + + // Emit the compile units header. + EmitLabel("info_begin", Unit->getID()); + + // Emit size of content not including length itself + unsigned ContentSize = Die->getSize() + + sizeof(int16_t) + // DWARF version number + sizeof(int32_t) + // Offset Into Abbrev. Section + sizeof(int8_t) + // Pointer Size (in bytes) + sizeof(int32_t); // FIXME - extra pad for gdb bug. + + Asm->EmitInt32(ContentSize); Asm->EOL("Length of Compilation Unit Info"); + Asm->EmitInt16(dwarf::DWARF_VERSION); Asm->EOL("DWARF version number"); + EmitSectionOffset("abbrev_begin", "section_abbrev", 0, 0, true, false); + Asm->EOL("Offset Into Abbrev. Section"); + Asm->EmitInt8(TD->getPointerSize()); Asm->EOL("Address Size (in bytes)"); + + EmitDIE(Die); + // FIXME - extra padding for gdb bug. + Asm->EmitInt8(0); Asm->EOL("Extra Pad For GDB"); + Asm->EmitInt8(0); Asm->EOL("Extra Pad For GDB"); + Asm->EmitInt8(0); Asm->EOL("Extra Pad For GDB"); + Asm->EmitInt8(0); Asm->EOL("Extra Pad For GDB"); + EmitLabel("info_end", Unit->getID()); + + Asm->EOL(); +} + +void DwarfDebug::EmitDebugInfo() { + // Start debug info section. + Asm->SwitchToDataSection(TAI->getDwarfInfoSection()); + + if (MainCU) { + EmitDebugInfoPerCU(MainCU); + return; + } + + for (unsigned i = 0, e = CompileUnits.size(); i != e; ++i) + EmitDebugInfoPerCU(CompileUnits[i]); +} + +/// EmitAbbreviations - Emit the abbreviation section. +/// +void DwarfDebug::EmitAbbreviations() const { + // Check to see if it is worth the effort. + if (!Abbreviations.empty()) { + // Start the debug abbrev section. + Asm->SwitchToDataSection(TAI->getDwarfAbbrevSection()); + + EmitLabel("abbrev_begin", 0); + + // For each abbrevation. + for (unsigned i = 0, N = Abbreviations.size(); i < N; ++i) { + // Get abbreviation data + const DIEAbbrev *Abbrev = Abbreviations[i]; + + // Emit the abbrevations code (base 1 index.) + Asm->EmitULEB128Bytes(Abbrev->getNumber()); + Asm->EOL("Abbreviation Code"); + + // Emit the abbreviations data. + Abbrev->Emit(Asm); + + Asm->EOL(); + } + + // Mark end of abbreviations. + Asm->EmitULEB128Bytes(0); Asm->EOL("EOM(3)"); + + EmitLabel("abbrev_end", 0); + Asm->EOL(); + } +} + +/// EmitEndOfLineMatrix - Emit the last address of the section and the end of +/// the line matrix. +/// +void DwarfDebug::EmitEndOfLineMatrix(unsigned SectionEnd) { + // Define last address of section. + Asm->EmitInt8(0); Asm->EOL("Extended Op"); + Asm->EmitInt8(TD->getPointerSize() + 1); Asm->EOL("Op size"); + Asm->EmitInt8(dwarf::DW_LNE_set_address); Asm->EOL("DW_LNE_set_address"); + EmitReference("section_end", SectionEnd); Asm->EOL("Section end label"); + + // Mark end of matrix. + Asm->EmitInt8(0); Asm->EOL("DW_LNE_end_sequence"); + Asm->EmitULEB128Bytes(1); Asm->EOL(); + Asm->EmitInt8(1); Asm->EOL(); +} + +/// EmitDebugLines - Emit source line information. +/// +void DwarfDebug::EmitDebugLines() { + // If the target is using .loc/.file, the assembler will be emitting the + // .debug_line table automatically. + if (TAI->hasDotLocAndDotFile()) + return; + + // Minimum line delta, thus ranging from -10..(255-10). + const int MinLineDelta = -(dwarf::DW_LNS_fixed_advance_pc + 1); + // Maximum line delta, thus ranging from -10..(255-10). + const int MaxLineDelta = 255 + MinLineDelta; + + // Start the dwarf line section. + Asm->SwitchToDataSection(TAI->getDwarfLineSection()); + + // Construct the section header. + EmitDifference("line_end", 0, "line_begin", 0, true); + Asm->EOL("Length of Source Line Info"); + EmitLabel("line_begin", 0); + + Asm->EmitInt16(dwarf::DWARF_VERSION); Asm->EOL("DWARF version number"); + + EmitDifference("line_prolog_end", 0, "line_prolog_begin", 0, true); + Asm->EOL("Prolog Length"); + EmitLabel("line_prolog_begin", 0); + + Asm->EmitInt8(1); Asm->EOL("Minimum Instruction Length"); + + Asm->EmitInt8(1); Asm->EOL("Default is_stmt_start flag"); + + Asm->EmitInt8(MinLineDelta); Asm->EOL("Line Base Value (Special Opcodes)"); + + Asm->EmitInt8(MaxLineDelta); Asm->EOL("Line Range Value (Special Opcodes)"); + + Asm->EmitInt8(-MinLineDelta); Asm->EOL("Special Opcode Base"); + + // Line number standard opcode encodings argument count + Asm->EmitInt8(0); Asm->EOL("DW_LNS_copy arg count"); + Asm->EmitInt8(1); Asm->EOL("DW_LNS_advance_pc arg count"); + Asm->EmitInt8(1); Asm->EOL("DW_LNS_advance_line arg count"); + Asm->EmitInt8(1); Asm->EOL("DW_LNS_set_file arg count"); + Asm->EmitInt8(1); Asm->EOL("DW_LNS_set_column arg count"); + Asm->EmitInt8(0); Asm->EOL("DW_LNS_negate_stmt arg count"); + Asm->EmitInt8(0); Asm->EOL("DW_LNS_set_basic_block arg count"); + Asm->EmitInt8(0); Asm->EOL("DW_LNS_const_add_pc arg count"); + Asm->EmitInt8(1); Asm->EOL("DW_LNS_fixed_advance_pc arg count"); + + // Emit directories. + for (unsigned DI = 1, DE = getNumSourceDirectories()+1; DI != DE; ++DI) { + Asm->EmitString(getSourceDirectoryName(DI)); + Asm->EOL("Directory"); + } + + Asm->EmitInt8(0); Asm->EOL("End of directories"); + + // Emit files. + for (unsigned SI = 1, SE = getNumSourceIds()+1; SI != SE; ++SI) { + // Remember source id starts at 1. + std::pair Id = getSourceDirectoryAndFileIds(SI); + Asm->EmitString(getSourceFileName(Id.second)); + Asm->EOL("Source"); + Asm->EmitULEB128Bytes(Id.first); + Asm->EOL("Directory #"); + Asm->EmitULEB128Bytes(0); + Asm->EOL("Mod date"); + Asm->EmitULEB128Bytes(0); + Asm->EOL("File size"); + } + + Asm->EmitInt8(0); Asm->EOL("End of files"); + + EmitLabel("line_prolog_end", 0); + + // A sequence for each text section. + unsigned SecSrcLinesSize = SectionSourceLines.size(); + + for (unsigned j = 0; j < SecSrcLinesSize; ++j) { + // Isolate current sections line info. + const std::vector &LineInfos = SectionSourceLines[j]; + + if (Asm->isVerbose()) { + const Section* S = SectionMap[j + 1]; + O << '\t' << TAI->getCommentString() << " Section" + << S->getName() << '\n'; + } else { + Asm->EOL(); + } + + // Dwarf assumes we start with first line of first source file. + unsigned Source = 1; + unsigned Line = 1; + + // Construct rows of the address, source, line, column matrix. + for (unsigned i = 0, N = LineInfos.size(); i < N; ++i) { + const SrcLineInfo &LineInfo = LineInfos[i]; + unsigned LabelID = MMI->MappedLabel(LineInfo.getLabelID()); + if (!LabelID) continue; + + if (!Asm->isVerbose()) + Asm->EOL(); + else { + std::pair SourceID = + getSourceDirectoryAndFileIds(LineInfo.getSourceID()); + O << '\t' << TAI->getCommentString() << ' ' + << getSourceDirectoryName(SourceID.first) << ' ' + << getSourceFileName(SourceID.second) + <<" :" << utostr_32(LineInfo.getLine()) << '\n'; + } + + // Define the line address. + Asm->EmitInt8(0); Asm->EOL("Extended Op"); + Asm->EmitInt8(TD->getPointerSize() + 1); Asm->EOL("Op size"); + Asm->EmitInt8(dwarf::DW_LNE_set_address); Asm->EOL("DW_LNE_set_address"); + EmitReference("label", LabelID); Asm->EOL("Location label"); + + // If change of source, then switch to the new source. + if (Source != LineInfo.getSourceID()) { + Source = LineInfo.getSourceID(); + Asm->EmitInt8(dwarf::DW_LNS_set_file); Asm->EOL("DW_LNS_set_file"); + Asm->EmitULEB128Bytes(Source); Asm->EOL("New Source"); + } + + // If change of line. + if (Line != LineInfo.getLine()) { + // Determine offset. + int Offset = LineInfo.getLine() - Line; + int Delta = Offset - MinLineDelta; + + // Update line. + Line = LineInfo.getLine(); + + // If delta is small enough and in range... + if (Delta >= 0 && Delta < (MaxLineDelta - 1)) { + // ... then use fast opcode. + Asm->EmitInt8(Delta - MinLineDelta); Asm->EOL("Line Delta"); + } else { + // ... otherwise use long hand. + Asm->EmitInt8(dwarf::DW_LNS_advance_line); + Asm->EOL("DW_LNS_advance_line"); + Asm->EmitSLEB128Bytes(Offset); Asm->EOL("Line Offset"); + Asm->EmitInt8(dwarf::DW_LNS_copy); Asm->EOL("DW_LNS_copy"); + } + } else { + // Copy the previous row (different address or source) + Asm->EmitInt8(dwarf::DW_LNS_copy); Asm->EOL("DW_LNS_copy"); + } + } + + EmitEndOfLineMatrix(j + 1); + } + + if (SecSrcLinesSize == 0) + // Because we're emitting a debug_line section, we still need a line + // table. The linker and friends expect it to exist. If there's nothing to + // put into it, emit an empty table. + EmitEndOfLineMatrix(1); + + EmitLabel("line_end", 0); + Asm->EOL(); +} + +/// EmitCommonDebugFrame - Emit common frame info into a debug frame section. +/// +void DwarfDebug::EmitCommonDebugFrame() { + if (!TAI->doesDwarfRequireFrameSection()) + return; + + int stackGrowth = + Asm->TM.getFrameInfo()->getStackGrowthDirection() == + TargetFrameInfo::StackGrowsUp ? + TD->getPointerSize() : -TD->getPointerSize(); + + // Start the dwarf frame section. + Asm->SwitchToDataSection(TAI->getDwarfFrameSection()); + + EmitLabel("debug_frame_common", 0); + EmitDifference("debug_frame_common_end", 0, + "debug_frame_common_begin", 0, true); + Asm->EOL("Length of Common Information Entry"); + + EmitLabel("debug_frame_common_begin", 0); + Asm->EmitInt32((int)dwarf::DW_CIE_ID); + Asm->EOL("CIE Identifier Tag"); + Asm->EmitInt8(dwarf::DW_CIE_VERSION); + Asm->EOL("CIE Version"); + Asm->EmitString(""); + Asm->EOL("CIE Augmentation"); + Asm->EmitULEB128Bytes(1); + Asm->EOL("CIE Code Alignment Factor"); + Asm->EmitSLEB128Bytes(stackGrowth); + Asm->EOL("CIE Data Alignment Factor"); + Asm->EmitInt8(RI->getDwarfRegNum(RI->getRARegister(), false)); + Asm->EOL("CIE RA Column"); + + std::vector Moves; + RI->getInitialFrameState(Moves); + + EmitFrameMoves(NULL, 0, Moves, false); + + Asm->EmitAlignment(2, 0, 0, false); + EmitLabel("debug_frame_common_end", 0); + + Asm->EOL(); +} + +/// EmitFunctionDebugFrame - Emit per function frame info into a debug frame +/// section. +void +DwarfDebug::EmitFunctionDebugFrame(const FunctionDebugFrameInfo&DebugFrameInfo){ + if (!TAI->doesDwarfRequireFrameSection()) + return; + + // Start the dwarf frame section. + Asm->SwitchToDataSection(TAI->getDwarfFrameSection()); + + EmitDifference("debug_frame_end", DebugFrameInfo.Number, + "debug_frame_begin", DebugFrameInfo.Number, true); + Asm->EOL("Length of Frame Information Entry"); + + EmitLabel("debug_frame_begin", DebugFrameInfo.Number); + + EmitSectionOffset("debug_frame_common", "section_debug_frame", + 0, 0, true, false); + Asm->EOL("FDE CIE offset"); + + EmitReference("func_begin", DebugFrameInfo.Number); + Asm->EOL("FDE initial location"); + EmitDifference("func_end", DebugFrameInfo.Number, + "func_begin", DebugFrameInfo.Number); + Asm->EOL("FDE address range"); + + EmitFrameMoves("func_begin", DebugFrameInfo.Number, DebugFrameInfo.Moves, + false); + + Asm->EmitAlignment(2, 0, 0, false); + EmitLabel("debug_frame_end", DebugFrameInfo.Number); + + Asm->EOL(); +} + +void DwarfDebug::EmitDebugPubNamesPerCU(CompileUnit *Unit) { + EmitDifference("pubnames_end", Unit->getID(), + "pubnames_begin", Unit->getID(), true); + Asm->EOL("Length of Public Names Info"); + + EmitLabel("pubnames_begin", Unit->getID()); + + Asm->EmitInt16(dwarf::DWARF_VERSION); Asm->EOL("DWARF Version"); + + EmitSectionOffset("info_begin", "section_info", + Unit->getID(), 0, true, false); + Asm->EOL("Offset of Compilation Unit Info"); + + EmitDifference("info_end", Unit->getID(), "info_begin", Unit->getID(), + true); + Asm->EOL("Compilation Unit Length"); + + StringMap &Globals = Unit->getGlobals(); + for (StringMap::const_iterator + GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) { + const char *Name = GI->getKeyData(); + DIE * Entity = GI->second; + + Asm->EmitInt32(Entity->getOffset()); Asm->EOL("DIE offset"); + Asm->EmitString(Name, strlen(Name)); Asm->EOL("External Name"); + } + + Asm->EmitInt32(0); Asm->EOL("End Mark"); + EmitLabel("pubnames_end", Unit->getID()); + + Asm->EOL(); +} + +/// EmitDebugPubNames - Emit visible names into a debug pubnames section. +/// +void DwarfDebug::EmitDebugPubNames() { + // Start the dwarf pubnames section. + Asm->SwitchToDataSection(TAI->getDwarfPubNamesSection()); + + if (MainCU) { + EmitDebugPubNamesPerCU(MainCU); + return; + } + + for (unsigned i = 0, e = CompileUnits.size(); i != e; ++i) + EmitDebugPubNamesPerCU(CompileUnits[i]); +} + +/// EmitDebugStr - Emit visible names into a debug str section. +/// +void DwarfDebug::EmitDebugStr() { + // Check to see if it is worth the effort. + if (!StringPool.empty()) { + // Start the dwarf str section. + Asm->SwitchToDataSection(TAI->getDwarfStrSection()); + + // For each of strings in the string pool. + for (unsigned StringID = 1, N = StringPool.size(); + StringID <= N; ++StringID) { + // Emit a label for reference from debug information entries. + EmitLabel("string", StringID); + + // Emit the string itself. + const std::string &String = StringPool[StringID]; + Asm->EmitString(String); Asm->EOL(); + } + + Asm->EOL(); + } +} + +/// EmitDebugLoc - Emit visible names into a debug loc section. +/// +void DwarfDebug::EmitDebugLoc() { + // Start the dwarf loc section. + Asm->SwitchToDataSection(TAI->getDwarfLocSection()); + Asm->EOL(); +} + +/// EmitDebugARanges - Emit visible names into a debug aranges section. +/// +void DwarfDebug::EmitDebugARanges() { + // Start the dwarf aranges section. + Asm->SwitchToDataSection(TAI->getDwarfARangesSection()); + + // FIXME - Mock up +#if 0 + CompileUnit *Unit = GetBaseCompileUnit(); + + // Don't include size of length + Asm->EmitInt32(0x1c); Asm->EOL("Length of Address Ranges Info"); + + Asm->EmitInt16(dwarf::DWARF_VERSION); Asm->EOL("Dwarf Version"); + + EmitReference("info_begin", Unit->getID()); + Asm->EOL("Offset of Compilation Unit Info"); + + Asm->EmitInt8(TD->getPointerSize()); Asm->EOL("Size of Address"); + + Asm->EmitInt8(0); Asm->EOL("Size of Segment Descriptor"); + + Asm->EmitInt16(0); Asm->EOL("Pad (1)"); + Asm->EmitInt16(0); Asm->EOL("Pad (2)"); + + // Range 1 + EmitReference("text_begin", 0); Asm->EOL("Address"); + EmitDifference("text_end", 0, "text_begin", 0, true); Asm->EOL("Length"); + + Asm->EmitInt32(0); Asm->EOL("EOM (1)"); + Asm->EmitInt32(0); Asm->EOL("EOM (2)"); +#endif + + Asm->EOL(); +} + +/// EmitDebugRanges - Emit visible names into a debug ranges section. +/// +void DwarfDebug::EmitDebugRanges() { + // Start the dwarf ranges section. + Asm->SwitchToDataSection(TAI->getDwarfRangesSection()); + Asm->EOL(); +} + +/// EmitDebugMacInfo - Emit visible names into a debug macinfo section. +/// +void DwarfDebug::EmitDebugMacInfo() { + if (TAI->doesSupportMacInfoSection()) { + // Start the dwarf macinfo section. + Asm->SwitchToDataSection(TAI->getDwarfMacInfoSection()); + Asm->EOL(); + } +} + +/// EmitDebugInlineInfo - Emit inline info using following format. +/// Section Header: +/// 1. length of section +/// 2. Dwarf version number +/// 3. address size. +/// +/// Entries (one "entry" for each function that was inlined): +/// +/// 1. offset into __debug_str section for MIPS linkage name, if exists; +/// otherwise offset into __debug_str for regular function name. +/// 2. offset into __debug_str section for regular function name. +/// 3. an unsigned LEB128 number indicating the number of distinct inlining +/// instances for the function. +/// +/// The rest of the entry consists of a {die_offset, low_pc} pair for each +/// inlined instance; the die_offset points to the inlined_subroutine die in the +/// __debug_info section, and the low_pc is the starting address for the +/// inlining instance. +void DwarfDebug::EmitDebugInlineInfo() { + if (!TAI->doesDwarfUsesInlineInfoSection()) + return; + + if (!MainCU) + return; + + Asm->SwitchToDataSection(TAI->getDwarfDebugInlineSection()); + Asm->EOL(); + EmitDifference("debug_inlined_end", 1, + "debug_inlined_begin", 1, true); + Asm->EOL("Length of Debug Inlined Information Entry"); + + EmitLabel("debug_inlined_begin", 1); + + Asm->EmitInt16(dwarf::DWARF_VERSION); Asm->EOL("Dwarf Version"); + Asm->EmitInt8(TD->getPointerSize()); Asm->EOL("Address Size (in bytes)"); + + for (DenseMap >::iterator + I = InlineInfo.begin(), E = InlineInfo.end(); I != E; ++I) { + GlobalVariable *GV = I->first; + SmallVector &Labels = I->second; + DISubprogram SP(GV); + std::string Name; + std::string LName; + + SP.getLinkageName(LName); + SP.getName(Name); + + Asm->EmitString(LName.empty() ? Name : LName); + Asm->EOL("MIPS linkage name"); + + Asm->EmitString(Name); Asm->EOL("Function name"); + + Asm->EmitULEB128Bytes(Labels.size()); Asm->EOL("Inline count"); + + for (SmallVector::iterator LI = Labels.begin(), + LE = Labels.end(); LI != LE; ++LI) { + DIE *SP = MainCU->getDieMapSlotFor(GV); + Asm->EmitInt32(SP->getOffset()); Asm->EOL("DIE offset"); + + if (TD->getPointerSize() == sizeof(int32_t)) + O << TAI->getData32bitsDirective(); + else + O << TAI->getData64bitsDirective(); + + PrintLabelName("label", *LI); Asm->EOL("low_pc"); + } + } + + EmitLabel("debug_inlined_end", 1); + Asm->EOL(); +} diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h new file mode 100644 index 000000000000..982456619859 --- /dev/null +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -0,0 +1,561 @@ +//===-- llvm/CodeGen/DwarfDebug.h - Dwarf Debug Framework ------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing dwarf debug info into asm files. +// +//===----------------------------------------------------------------------===// + +#ifndef CODEGEN_ASMPRINTER_DWARFDEBUG_H__ +#define CODEGEN_ASMPRINTER_DWARFDEBUG_H__ + +#include "DIE.h" +#include "DwarfPrinter.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineLocation.h" +#include "llvm/Analysis/DebugInfo.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/UniqueVector.h" +#include + +namespace llvm { + +class CompileUnit; +class DbgVariable; +class DbgScope; +class DbgConcreteScope; +class MachineFrameInfo; +class MachineModuleInfo; +class TargetAsmInfo; +class Timer; + +//===----------------------------------------------------------------------===// +/// SrcLineInfo - This class is used to record source line correspondence. +/// +class VISIBILITY_HIDDEN SrcLineInfo { + unsigned Line; // Source line number. + unsigned Column; // Source column. + unsigned SourceID; // Source ID number. + unsigned LabelID; // Label in code ID number. +public: + SrcLineInfo(unsigned L, unsigned C, unsigned S, unsigned I) + : Line(L), Column(C), SourceID(S), LabelID(I) {} + + // Accessors + unsigned getLine() const { return Line; } + unsigned getColumn() const { return Column; } + unsigned getSourceID() const { return SourceID; } + unsigned getLabelID() const { return LabelID; } +}; + +class VISIBILITY_HIDDEN DwarfDebug : public Dwarf { + //===--------------------------------------------------------------------===// + // Attributes used to construct specific Dwarf sections. + // + + /// CompileUnitMap - A map of global variables representing compile units to + /// compile units. + DenseMap CompileUnitMap; + + /// CompileUnits - All the compile units in this module. + /// + SmallVector CompileUnits; + + /// MainCU - Some platform prefers one compile unit per .o file. In such + /// cases, all dies are inserted in MainCU. + CompileUnit *MainCU; + + /// AbbreviationsSet - Used to uniquely define abbreviations. + /// + FoldingSet AbbreviationsSet; + + /// Abbreviations - A list of all the unique abbreviations in use. + /// + std::vector Abbreviations; + + /// DirectoryIdMap - Directory name to directory id map. + /// + StringMap DirectoryIdMap; + + /// DirectoryNames - A list of directory names. + SmallVector DirectoryNames; + + /// SourceFileIdMap - Source file name to source file id map. + /// + StringMap SourceFileIdMap; + + /// SourceFileNames - A list of source file names. + SmallVector SourceFileNames; + + /// SourceIdMap - Source id map, i.e. pair of directory id and source file + /// id mapped to a unique id. + DenseMap, unsigned> SourceIdMap; + + /// SourceIds - Reverse map from source id to directory id + file id pair. + /// + SmallVector, 8> SourceIds; + + /// Lines - List of of source line correspondence. + std::vector Lines; + + /// ValuesSet - Used to uniquely define values. + /// + FoldingSet ValuesSet; + + /// Values - A list of all the unique values in use. + /// + std::vector Values; + + /// StringPool - A UniqueVector of strings used by indirect references. + /// + UniqueVector StringPool; + + /// SectionMap - Provides a unique id per text section. + /// + UniqueVector SectionMap; + + /// SectionSourceLines - Tracks line numbers per text section. + /// + std::vector > SectionSourceLines; + + /// didInitial - Flag to indicate if initial emission has been done. + /// + bool didInitial; + + /// shouldEmit - Flag to indicate if debug information should be emitted. + /// + bool shouldEmit; + + // FunctionDbgScope - Top level scope for the current function. + // + DbgScope *FunctionDbgScope; + + /// DbgScopeMap - Tracks the scopes in the current function. + DenseMap DbgScopeMap; + + /// DbgAbstractScopeMap - Tracks abstract instance scopes in the current + /// function. + DenseMap DbgAbstractScopeMap; + + /// DbgConcreteScopeMap - Tracks concrete instance scopes in the current + /// function. + DenseMap > DbgConcreteScopeMap; + + /// InlineInfo - Keep track of inlined functions and their location. This + /// information is used to populate debug_inlined section. + DenseMap > InlineInfo; + + /// InlinedVariableScopes - Scopes information for the inlined subroutine + /// variables. + DenseMap InlinedVariableScopes; + + /// AbstractInstanceRootMap - Map of abstract instance roots of inlined + /// functions. These are subroutine entries that contain a DW_AT_inline + /// attribute. + DenseMap AbstractInstanceRootMap; + + /// InlinedParamMap - A map keeping track of which parameters are assigned to + /// which abstract instance. + DenseMap > InlinedParamMap; + + /// AbstractInstanceRootList - List of abstract instance roots of inlined + /// functions. These are subroutine entries that contain a DW_AT_inline + /// attribute. + SmallVector AbstractInstanceRootList; + + /// LexicalScopeStack - A stack of lexical scopes. The top one is the current + /// scope. + SmallVector LexicalScopeStack; + + /// CompileUnitOffsets - A vector of the offsets of the compile units. This is + /// used when calculating the "origin" of a concrete instance of an inlined + /// function. + DenseMap CompileUnitOffsets; + + /// DebugTimer - Timer for the Dwarf debug writer. + Timer *DebugTimer; + + struct FunctionDebugFrameInfo { + unsigned Number; + std::vector Moves; + + FunctionDebugFrameInfo(unsigned Num, const std::vector &M) + : Number(Num), Moves(M) {} + }; + + std::vector DebugFrames; + + /// getSourceDirectoryAndFileIds - Return the directory and file ids that + /// maps to the source id. Source id starts at 1. + std::pair + getSourceDirectoryAndFileIds(unsigned SId) const { + return SourceIds[SId-1]; + } + + /// getNumSourceDirectories - Return the number of source directories in the + /// debug info. + unsigned getNumSourceDirectories() const { + return DirectoryNames.size(); + } + + /// getSourceDirectoryName - Return the name of the directory corresponding + /// to the id. + const std::string &getSourceDirectoryName(unsigned Id) const { + return DirectoryNames[Id - 1]; + } + + /// getSourceFileName - Return the name of the source file corresponding + /// to the id. + const std::string &getSourceFileName(unsigned Id) const { + return SourceFileNames[Id - 1]; + } + + /// getNumSourceIds - Return the number of unique source ids. + unsigned getNumSourceIds() const { + return SourceIds.size(); + } + + /// AssignAbbrevNumber - Define a unique number for the abbreviation. + /// + void AssignAbbrevNumber(DIEAbbrev &Abbrev); + + /// CreateDIEEntry - Creates a new DIEEntry to be a proxy for a debug + /// information entry. + DIEEntry *CreateDIEEntry(DIE *Entry = NULL); + + /// SetDIEEntry - Set a DIEEntry once the debug information entry is defined. + /// + void SetDIEEntry(DIEEntry *Value, DIE *Entry); + + /// AddUInt - Add an unsigned integer attribute data and value. + /// + void AddUInt(DIE *Die, unsigned Attribute, unsigned Form, uint64_t Integer); + + /// AddSInt - Add an signed integer attribute data and value. + /// + void AddSInt(DIE *Die, unsigned Attribute, unsigned Form, int64_t Integer); + + /// AddString - Add a string attribute data and value. + /// + void AddString(DIE *Die, unsigned Attribute, unsigned Form, + const std::string &String); + + /// AddLabel - Add a Dwarf label attribute data and value. + /// + void AddLabel(DIE *Die, unsigned Attribute, unsigned Form, + const DWLabel &Label); + + /// AddObjectLabel - Add an non-Dwarf label attribute data and value. + /// + void AddObjectLabel(DIE *Die, unsigned Attribute, unsigned Form, + const std::string &Label); + + /// AddSectionOffset - Add a section offset label attribute data and value. + /// + void AddSectionOffset(DIE *Die, unsigned Attribute, unsigned Form, + const DWLabel &Label, const DWLabel &Section, + bool isEH = false, bool useSet = true); + + /// AddDelta - Add a label delta attribute data and value. + /// + void AddDelta(DIE *Die, unsigned Attribute, unsigned Form, + const DWLabel &Hi, const DWLabel &Lo); + + /// AddDIEEntry - Add a DIE attribute data and value. + /// + void AddDIEEntry(DIE *Die, unsigned Attribute, unsigned Form, DIE *Entry) { + Die->AddValue(Attribute, Form, CreateDIEEntry(Entry)); + } + + /// AddBlock - Add block data. + /// + void AddBlock(DIE *Die, unsigned Attribute, unsigned Form, DIEBlock *Block); + + /// AddSourceLine - Add location information to specified debug information + /// entry. + void AddSourceLine(DIE *Die, const DIVariable *V); + + /// AddSourceLine - Add location information to specified debug information + /// entry. + void AddSourceLine(DIE *Die, const DIGlobal *G); + + void AddSourceLine(DIE *Die, const DIType *Ty); + + /// AddAddress - Add an address attribute to a die based on the location + /// provided. + void AddAddress(DIE *Die, unsigned Attribute, + const MachineLocation &Location); + + /// AddType - Add a new type attribute to the specified entity. + void AddType(CompileUnit *DW_Unit, DIE *Entity, DIType Ty); + + /// ConstructTypeDIE - Construct basic type die from DIBasicType. + void ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, + DIBasicType BTy); + + /// ConstructTypeDIE - Construct derived type die from DIDerivedType. + void ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, + DIDerivedType DTy); + + /// ConstructTypeDIE - Construct type DIE from DICompositeType. + void ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, + DICompositeType CTy); + + /// ConstructSubrangeDIE - Construct subrange DIE from DISubrange. + void ConstructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy); + + /// ConstructArrayTypeDIE - Construct array type DIE from DICompositeType. + void ConstructArrayTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, + DICompositeType *CTy); + + /// ConstructEnumTypeDIE - Construct enum type DIE from DIEnumerator. + DIE *ConstructEnumTypeDIE(CompileUnit *DW_Unit, DIEnumerator *ETy); + + /// CreateGlobalVariableDIE - Create new DIE using GV. + DIE *CreateGlobalVariableDIE(CompileUnit *DW_Unit, + const DIGlobalVariable &GV); + + /// CreateMemberDIE - Create new member DIE. + DIE *CreateMemberDIE(CompileUnit *DW_Unit, const DIDerivedType &DT); + + /// CreateSubprogramDIE - Create new DIE using SP. + DIE *CreateSubprogramDIE(CompileUnit *DW_Unit, + const DISubprogram &SP, + bool IsConstructor = false, + bool IsInlined = false); + + /// FindCompileUnit - Get the compile unit for the given descriptor. + /// + CompileUnit &FindCompileUnit(DICompileUnit Unit) const; + + /// CreateDbgScopeVariable - Create a new scope variable. + /// + DIE *CreateDbgScopeVariable(DbgVariable *DV, CompileUnit *Unit); + + /// getOrCreateScope - Returns the scope associated with the given descriptor. + /// + DbgScope *getOrCreateScope(GlobalVariable *V); + + /// ConstructDbgScope - Construct the components of a scope. + /// + void ConstructDbgScope(DbgScope *ParentScope, + unsigned ParentStartID, unsigned ParentEndID, + DIE *ParentDie, CompileUnit *Unit); + + /// ConstructFunctionDbgScope - Construct the scope for the subprogram. + /// + void ConstructFunctionDbgScope(DbgScope *RootScope, + bool AbstractScope = false); + + /// ConstructDefaultDbgScope - Construct a default scope for the subprogram. + /// + void ConstructDefaultDbgScope(MachineFunction *MF); + + /// EmitInitial - Emit initial Dwarf declarations. This is necessary for cc + /// tools to recognize the object file contains Dwarf information. + void EmitInitial(); + + /// EmitDIE - Recusively Emits a debug information entry. + /// + void EmitDIE(DIE *Die); + + /// SizeAndOffsetDie - Compute the size and offset of a DIE. + /// + unsigned SizeAndOffsetDie(DIE *Die, unsigned Offset, bool Last); + + /// SizeAndOffsets - Compute the size and offset of all the DIEs. + /// + void SizeAndOffsets(); + + /// EmitDebugInfo / EmitDebugInfoPerCU - Emit the debug info section. + /// + void EmitDebugInfoPerCU(CompileUnit *Unit); + + void EmitDebugInfo(); + + /// EmitAbbreviations - Emit the abbreviation section. + /// + void EmitAbbreviations() const; + + /// EmitEndOfLineMatrix - Emit the last address of the section and the end of + /// the line matrix. + /// + void EmitEndOfLineMatrix(unsigned SectionEnd); + + /// EmitDebugLines - Emit source line information. + /// + void EmitDebugLines(); + + /// EmitCommonDebugFrame - Emit common frame info into a debug frame section. + /// + void EmitCommonDebugFrame(); + + /// EmitFunctionDebugFrame - Emit per function frame info into a debug frame + /// section. + void EmitFunctionDebugFrame(const FunctionDebugFrameInfo &DebugFrameInfo); + + void EmitDebugPubNamesPerCU(CompileUnit *Unit); + + /// EmitDebugPubNames - Emit visible names into a debug pubnames section. + /// + void EmitDebugPubNames(); + + /// EmitDebugStr - Emit visible names into a debug str section. + /// + void EmitDebugStr(); + + /// EmitDebugLoc - Emit visible names into a debug loc section. + /// + void EmitDebugLoc(); + + /// EmitDebugARanges - Emit visible names into a debug aranges section. + /// + void EmitDebugARanges(); + + /// EmitDebugRanges - Emit visible names into a debug ranges section. + /// + void EmitDebugRanges(); + + /// EmitDebugMacInfo - Emit visible names into a debug macinfo section. + /// + void EmitDebugMacInfo(); + + /// EmitDebugInlineInfo - Emit inline info using following format. + /// Section Header: + /// 1. length of section + /// 2. Dwarf version number + /// 3. address size. + /// + /// Entries (one "entry" for each function that was inlined): + /// + /// 1. offset into __debug_str section for MIPS linkage name, if exists; + /// otherwise offset into __debug_str for regular function name. + /// 2. offset into __debug_str section for regular function name. + /// 3. an unsigned LEB128 number indicating the number of distinct inlining + /// instances for the function. + /// + /// The rest of the entry consists of a {die_offset, low_pc} pair for each + /// inlined instance; the die_offset points to the inlined_subroutine die in + /// the __debug_info section, and the low_pc is the starting address for the + /// inlining instance. + void EmitDebugInlineInfo(); + + /// GetOrCreateSourceID - Look up the source id with the given directory and + /// source file names. If none currently exists, create a new id and insert it + /// in the SourceIds map. This can update DirectoryNames and SourceFileNames maps + /// as well. + unsigned GetOrCreateSourceID(const std::string &DirName, + const std::string &FileName); + + void ConstructCompileUnit(GlobalVariable *GV); + + /// ConstructCompileUnits - Create a compile unit DIEs. + void ConstructCompileUnits(); + + bool ConstructGlobalVariableDIE(GlobalVariable *GV); + + /// ConstructGlobalVariableDIEs - Create DIEs for each of the externally + /// visible global variables. Return true if at least one global DIE is + /// created. + bool ConstructGlobalVariableDIEs(); + + bool ConstructSubprogram(GlobalVariable *GV); + + /// ConstructSubprograms - Create DIEs for each of the externally visible + /// subprograms. Return true if at least one subprogram DIE is created. + bool ConstructSubprograms(); +public: + //===--------------------------------------------------------------------===// + // Main entry points. + // + DwarfDebug(raw_ostream &OS, AsmPrinter *A, const TargetAsmInfo *T); + virtual ~DwarfDebug(); + + /// ShouldEmitDwarfDebug - Returns true if Dwarf debugging declarations should + /// be emitted. + bool ShouldEmitDwarfDebug() const { return shouldEmit; } + + /// SetDebugInfo - Create global DIEs and emit initial debug info sections. + /// This is inovked by the target AsmPrinter. + void SetDebugInfo(MachineModuleInfo *mmi); + + /// BeginModule - Emit all Dwarf sections that should come prior to the + /// content. + void BeginModule(Module *M) { + this->M = M; + } + + /// EndModule - Emit all Dwarf sections that should come after the content. + /// + void EndModule(); + + /// BeginFunction - Gather pre-function debug information. Assumes being + /// emitted immediately after the function entry point. + void BeginFunction(MachineFunction *MF); + + /// EndFunction - Gather and emit post-function debug information. + /// + void EndFunction(MachineFunction *MF); + + /// RecordSourceLine - Records location information and associates it with a + /// label. Returns a unique label ID used to generate a label and provide + /// correspondence to the source line list. + unsigned RecordSourceLine(Value *V, unsigned Line, unsigned Col); + + /// RecordSourceLine - Records location information and associates it with a + /// label. Returns a unique label ID used to generate a label and provide + /// correspondence to the source line list. + unsigned RecordSourceLine(unsigned Line, unsigned Col, DICompileUnit CU); + + /// getRecordSourceLineCount - Return the number of source lines in the debug + /// info. + unsigned getRecordSourceLineCount() const { + return Lines.size(); + } + + /// getOrCreateSourceID - Public version of GetOrCreateSourceID. This can be + /// timed. Look up the source id with the given directory and source file + /// names. If none currently exists, create a new id and insert it in the + /// SourceIds map. This can update DirectoryNames and SourceFileNames maps as + /// well. + unsigned getOrCreateSourceID(const std::string &DirName, + const std::string &FileName); + + /// RecordRegionStart - Indicate the start of a region. + unsigned RecordRegionStart(GlobalVariable *V); + + /// RecordRegionEnd - Indicate the end of a region. + unsigned RecordRegionEnd(GlobalVariable *V); + + /// RecordVariable - Indicate the declaration of a local variable. + void RecordVariable(GlobalVariable *GV, unsigned FrameIndex, + const MachineInstr *MI); + + //// RecordInlinedFnStart - Indicate the start of inlined subroutine. + unsigned RecordInlinedFnStart(DISubprogram &SP, DICompileUnit CU, + unsigned Line, unsigned Col); + + /// RecordInlinedFnEnd - Indicate the end of inlined subroutine. + unsigned RecordInlinedFnEnd(DISubprogram &SP); + + /// RecordVariableScope - Record scope for the variable declared by + /// DeclareMI. DeclareMI must describe TargetInstrInfo::DECLARE. Record scopes + /// for only inlined subroutine variables. Other variables's scopes are + /// determined during RecordVariable(). + void RecordVariableScope(DIVariable &DV, const MachineInstr *DeclareMI); +}; + +} // End of namespace llvm + +#endif diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/DwarfException.cpp new file mode 100644 index 000000000000..37466ab39a23 --- /dev/null +++ b/lib/CodeGen/AsmPrinter/DwarfException.cpp @@ -0,0 +1,706 @@ +//===-- CodeGen/AsmPrinter/DwarfException.cpp - Dwarf Exception Impl ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing dwarf exception info into asm files. +// +//===----------------------------------------------------------------------===// + +#include "DwarfException.h" +#include "llvm/Module.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineLocation.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/Timer.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/ADT/StringExtras.h" +using namespace llvm; + +static TimerGroup &getDwarfTimerGroup() { + static TimerGroup DwarfTimerGroup("Dwarf Exception"); + return DwarfTimerGroup; +} + +DwarfException::DwarfException(raw_ostream &OS, AsmPrinter *A, + const TargetAsmInfo *T) + : Dwarf(OS, A, T, "eh"), shouldEmitTable(false), shouldEmitMoves(false), + shouldEmitTableModule(false), shouldEmitMovesModule(false), + ExceptionTimer(0) { + if (TimePassesIsEnabled) + ExceptionTimer = new Timer("Dwarf Exception Writer", + getDwarfTimerGroup()); +} + +DwarfException::~DwarfException() { + delete ExceptionTimer; +} + +void DwarfException::EmitCommonEHFrame(const Function *Personality, + unsigned Index) { + // Size and sign of stack growth. + int stackGrowth = + Asm->TM.getFrameInfo()->getStackGrowthDirection() == + TargetFrameInfo::StackGrowsUp ? + TD->getPointerSize() : -TD->getPointerSize(); + + // Begin eh frame section. + Asm->SwitchToTextSection(TAI->getDwarfEHFrameSection()); + + if (!TAI->doesRequireNonLocalEHFrameLabel()) + O << TAI->getEHGlobalPrefix(); + + O << "EH_frame" << Index << ":\n"; + EmitLabel("section_eh_frame", Index); + + // Define base labels. + EmitLabel("eh_frame_common", Index); + + // Define the eh frame length. + EmitDifference("eh_frame_common_end", Index, + "eh_frame_common_begin", Index, true); + Asm->EOL("Length of Common Information Entry"); + + // EH frame header. + EmitLabel("eh_frame_common_begin", Index); + Asm->EmitInt32((int)0); + Asm->EOL("CIE Identifier Tag"); + Asm->EmitInt8(dwarf::DW_CIE_VERSION); + Asm->EOL("CIE Version"); + + // The personality presence indicates that language specific information will + // show up in the eh frame. + Asm->EmitString(Personality ? "zPLR" : "zR"); + Asm->EOL("CIE Augmentation"); + + // Round out reader. + Asm->EmitULEB128Bytes(1); + Asm->EOL("CIE Code Alignment Factor"); + Asm->EmitSLEB128Bytes(stackGrowth); + Asm->EOL("CIE Data Alignment Factor"); + Asm->EmitInt8(RI->getDwarfRegNum(RI->getRARegister(), true)); + Asm->EOL("CIE Return Address Column"); + + // If there is a personality, we need to indicate the functions location. + if (Personality) { + Asm->EmitULEB128Bytes(7); + Asm->EOL("Augmentation Size"); + + if (TAI->getNeedsIndirectEncoding()) { + Asm->EmitInt8(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4 | + dwarf::DW_EH_PE_indirect); + Asm->EOL("Personality (pcrel sdata4 indirect)"); + } else { + Asm->EmitInt8(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4); + Asm->EOL("Personality (pcrel sdata4)"); + } + + PrintRelDirective(true); + O << TAI->getPersonalityPrefix(); + Asm->EmitExternalGlobal((const GlobalVariable *)(Personality)); + O << TAI->getPersonalitySuffix(); + if (strcmp(TAI->getPersonalitySuffix(), "+4@GOTPCREL")) + O << "-" << TAI->getPCSymbol(); + Asm->EOL("Personality"); + + Asm->EmitInt8(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4); + Asm->EOL("LSDA Encoding (pcrel sdata4)"); + + Asm->EmitInt8(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4); + Asm->EOL("FDE Encoding (pcrel sdata4)"); + } else { + Asm->EmitULEB128Bytes(1); + Asm->EOL("Augmentation Size"); + + Asm->EmitInt8(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4); + Asm->EOL("FDE Encoding (pcrel sdata4)"); + } + + // Indicate locations of general callee saved registers in frame. + std::vector Moves; + RI->getInitialFrameState(Moves); + EmitFrameMoves(NULL, 0, Moves, true); + + // On Darwin the linker honors the alignment of eh_frame, which means it must + // be 8-byte on 64-bit targets to match what gcc does. Otherwise you get + // holes which confuse readers of eh_frame. + Asm->EmitAlignment(TD->getPointerSize() == sizeof(int32_t) ? 2 : 3, + 0, 0, false); + EmitLabel("eh_frame_common_end", Index); + + Asm->EOL(); +} + +/// EmitEHFrame - Emit function exception frame information. +/// +void DwarfException::EmitEHFrame(const FunctionEHFrameInfo &EHFrameInfo) { + assert(!EHFrameInfo.function->hasAvailableExternallyLinkage() && + "Should not emit 'available externally' functions at all"); + + Function::LinkageTypes linkage = EHFrameInfo.function->getLinkage(); + Asm->SwitchToTextSection(TAI->getDwarfEHFrameSection()); + + // Externally visible entry into the functions eh frame info. If the + // corresponding function is static, this should not be externally visible. + if (linkage != Function::InternalLinkage && + linkage != Function::PrivateLinkage) { + if (const char *GlobalEHDirective = TAI->getGlobalEHDirective()) + O << GlobalEHDirective << EHFrameInfo.FnName << "\n"; + } + + // If corresponding function is weak definition, this should be too. + if ((linkage == Function::WeakAnyLinkage || + linkage == Function::WeakODRLinkage || + linkage == Function::LinkOnceAnyLinkage || + linkage == Function::LinkOnceODRLinkage) && + TAI->getWeakDefDirective()) + O << TAI->getWeakDefDirective() << EHFrameInfo.FnName << "\n"; + + // If there are no calls then you can't unwind. This may mean we can omit the + // EH Frame, but some environments do not handle weak absolute symbols. If + // UnwindTablesMandatory is set we cannot do this optimization; the unwind + // info is to be available for non-EH uses. + if (!EHFrameInfo.hasCalls && + !UnwindTablesMandatory && + ((linkage != Function::WeakAnyLinkage && + linkage != Function::WeakODRLinkage && + linkage != Function::LinkOnceAnyLinkage && + linkage != Function::LinkOnceODRLinkage) || + !TAI->getWeakDefDirective() || + TAI->getSupportsWeakOmittedEHFrame())) { + O << EHFrameInfo.FnName << " = 0\n"; + // This name has no connection to the function, so it might get + // dead-stripped when the function is not, erroneously. Prohibit + // dead-stripping unconditionally. + if (const char *UsedDirective = TAI->getUsedDirective()) + O << UsedDirective << EHFrameInfo.FnName << "\n\n"; + } else { + O << EHFrameInfo.FnName << ":\n"; + + // EH frame header. + EmitDifference("eh_frame_end", EHFrameInfo.Number, + "eh_frame_begin", EHFrameInfo.Number, true); + Asm->EOL("Length of Frame Information Entry"); + + EmitLabel("eh_frame_begin", EHFrameInfo.Number); + + if (TAI->doesRequireNonLocalEHFrameLabel()) { + PrintRelDirective(true, true); + PrintLabelName("eh_frame_begin", EHFrameInfo.Number); + + if (!TAI->isAbsoluteEHSectionOffsets()) + O << "-EH_frame" << EHFrameInfo.PersonalityIndex; + } else { + EmitSectionOffset("eh_frame_begin", "eh_frame_common", + EHFrameInfo.Number, EHFrameInfo.PersonalityIndex, + true, true, false); + } + + Asm->EOL("FDE CIE offset"); + + EmitReference("eh_func_begin", EHFrameInfo.Number, true, true); + Asm->EOL("FDE initial location"); + EmitDifference("eh_func_end", EHFrameInfo.Number, + "eh_func_begin", EHFrameInfo.Number, true); + Asm->EOL("FDE address range"); + + // If there is a personality and landing pads then point to the language + // specific data area in the exception table. + if (EHFrameInfo.PersonalityIndex) { + Asm->EmitULEB128Bytes(4); + Asm->EOL("Augmentation size"); + + if (EHFrameInfo.hasLandingPads) + EmitReference("exception", EHFrameInfo.Number, true, true); + else + Asm->EmitInt32((int)0); + Asm->EOL("Language Specific Data Area"); + } else { + Asm->EmitULEB128Bytes(0); + Asm->EOL("Augmentation size"); + } + + // Indicate locations of function specific callee saved registers in frame. + EmitFrameMoves("eh_func_begin", EHFrameInfo.Number, EHFrameInfo.Moves, + true); + + // On Darwin the linker honors the alignment of eh_frame, which means it + // must be 8-byte on 64-bit targets to match what gcc does. Otherwise you + // get holes which confuse readers of eh_frame. + Asm->EmitAlignment(TD->getPointerSize() == sizeof(int32_t) ? 2 : 3, + 0, 0, false); + EmitLabel("eh_frame_end", EHFrameInfo.Number); + + // If the function is marked used, this table should be also. We cannot + // make the mark unconditional in this case, since retaining the table also + // retains the function in this case, and there is code around that depends + // on unused functions (calling undefined externals) being dead-stripped to + // link correctly. Yes, there really is. + if (MMI->getUsedFunctions().count(EHFrameInfo.function)) + if (const char *UsedDirective = TAI->getUsedDirective()) + O << UsedDirective << EHFrameInfo.FnName << "\n\n"; + } +} + +/// EmitExceptionTable - Emit landing pads and actions. +/// +/// The general organization of the table is complex, but the basic concepts are +/// easy. First there is a header which describes the location and organization +/// of the three components that follow. +/// +/// 1. The landing pad site information describes the range of code covered by +/// the try. In our case it's an accumulation of the ranges covered by the +/// invokes in the try. There is also a reference to the landing pad that +/// handles the exception once processed. Finally an index into the actions +/// table. +/// 2. The action table, in our case, is composed of pairs of type ids and next +/// action offset. Starting with the action index from the landing pad +/// site, each type Id is checked for a match to the current exception. If +/// it matches then the exception and type id are passed on to the landing +/// pad. Otherwise the next action is looked up. This chain is terminated +/// with a next action of zero. If no type id is found the the frame is +/// unwound and handling continues. +/// 3. Type id table contains references to all the C++ typeinfo for all +/// catches in the function. This tables is reversed indexed base 1. + +/// SharedTypeIds - How many leading type ids two landing pads have in common. +unsigned DwarfException::SharedTypeIds(const LandingPadInfo *L, + const LandingPadInfo *R) { + const std::vector &LIds = L->TypeIds, &RIds = R->TypeIds; + unsigned LSize = LIds.size(), RSize = RIds.size(); + unsigned MinSize = LSize < RSize ? LSize : RSize; + unsigned Count = 0; + + for (; Count != MinSize; ++Count) + if (LIds[Count] != RIds[Count]) + return Count; + + return Count; +} + +/// PadLT - Order landing pads lexicographically by type id. +bool DwarfException::PadLT(const LandingPadInfo *L, const LandingPadInfo *R) { + const std::vector &LIds = L->TypeIds, &RIds = R->TypeIds; + unsigned LSize = LIds.size(), RSize = RIds.size(); + unsigned MinSize = LSize < RSize ? LSize : RSize; + + for (unsigned i = 0; i != MinSize; ++i) + if (LIds[i] != RIds[i]) + return LIds[i] < RIds[i]; + + return LSize < RSize; +} + +void DwarfException::EmitExceptionTable() { + const std::vector &TypeInfos = MMI->getTypeInfos(); + const std::vector &FilterIds = MMI->getFilterIds(); + const std::vector &PadInfos = MMI->getLandingPads(); + if (PadInfos.empty()) return; + + // Sort the landing pads in order of their type ids. This is used to fold + // duplicate actions. + SmallVector LandingPads; + LandingPads.reserve(PadInfos.size()); + for (unsigned i = 0, N = PadInfos.size(); i != N; ++i) + LandingPads.push_back(&PadInfos[i]); + std::sort(LandingPads.begin(), LandingPads.end(), PadLT); + + // Negative type ids index into FilterIds, positive type ids index into + // TypeInfos. The value written for a positive type id is just the type id + // itself. For a negative type id, however, the value written is the + // (negative) byte offset of the corresponding FilterIds entry. The byte + // offset is usually equal to the type id, because the FilterIds entries are + // written using a variable width encoding which outputs one byte per entry as + // long as the value written is not too large, but can differ. This kind of + // complication does not occur for positive type ids because type infos are + // output using a fixed width encoding. FilterOffsets[i] holds the byte + // offset corresponding to FilterIds[i]. + SmallVector FilterOffsets; + FilterOffsets.reserve(FilterIds.size()); + int Offset = -1; + for(std::vector::const_iterator I = FilterIds.begin(), + E = FilterIds.end(); I != E; ++I) { + FilterOffsets.push_back(Offset); + Offset -= TargetAsmInfo::getULEB128Size(*I); + } + + // Compute the actions table and gather the first action index for each + // landing pad site. + SmallVector Actions; + SmallVector FirstActions; + FirstActions.reserve(LandingPads.size()); + + int FirstAction = 0; + unsigned SizeActions = 0; + for (unsigned i = 0, N = LandingPads.size(); i != N; ++i) { + const LandingPadInfo *LP = LandingPads[i]; + const std::vector &TypeIds = LP->TypeIds; + const unsigned NumShared = i ? SharedTypeIds(LP, LandingPads[i-1]) : 0; + unsigned SizeSiteActions = 0; + + if (NumShared < TypeIds.size()) { + unsigned SizeAction = 0; + ActionEntry *PrevAction = 0; + + if (NumShared) { + const unsigned SizePrevIds = LandingPads[i-1]->TypeIds.size(); + assert(Actions.size()); + PrevAction = &Actions.back(); + SizeAction = TargetAsmInfo::getSLEB128Size(PrevAction->NextAction) + + TargetAsmInfo::getSLEB128Size(PrevAction->ValueForTypeID); + + for (unsigned j = NumShared; j != SizePrevIds; ++j) { + SizeAction -= + TargetAsmInfo::getSLEB128Size(PrevAction->ValueForTypeID); + SizeAction += -PrevAction->NextAction; + PrevAction = PrevAction->Previous; + } + } + + // Compute the actions. + for (unsigned I = NumShared, M = TypeIds.size(); I != M; ++I) { + int TypeID = TypeIds[I]; + assert(-1-TypeID < (int)FilterOffsets.size() && "Unknown filter id!"); + int ValueForTypeID = TypeID < 0 ? FilterOffsets[-1 - TypeID] : TypeID; + unsigned SizeTypeID = TargetAsmInfo::getSLEB128Size(ValueForTypeID); + + int NextAction = SizeAction ? -(SizeAction + SizeTypeID) : 0; + SizeAction = SizeTypeID + TargetAsmInfo::getSLEB128Size(NextAction); + SizeSiteActions += SizeAction; + + ActionEntry Action = {ValueForTypeID, NextAction, PrevAction}; + Actions.push_back(Action); + + PrevAction = &Actions.back(); + } + + // Record the first action of the landing pad site. + FirstAction = SizeActions + SizeSiteActions - SizeAction + 1; + } // else identical - re-use previous FirstAction + + FirstActions.push_back(FirstAction); + + // Compute this sites contribution to size. + SizeActions += SizeSiteActions; + } + + // Compute the call-site table. The entry for an invoke has a try-range + // containing the call, a non-zero landing pad and an appropriate action. The + // entry for an ordinary call has a try-range containing the call and zero for + // the landing pad and the action. Calls marked 'nounwind' have no entry and + // must not be contained in the try-range of any entry - they form gaps in the + // table. Entries must be ordered by try-range address. + SmallVector CallSites; + + RangeMapType PadMap; + + // Invokes and nounwind calls have entries in PadMap (due to being bracketed + // by try-range labels when lowered). Ordinary calls do not, so appropriate + // try-ranges for them need be deduced. + for (unsigned i = 0, N = LandingPads.size(); i != N; ++i) { + const LandingPadInfo *LandingPad = LandingPads[i]; + for (unsigned j = 0, E = LandingPad->BeginLabels.size(); j != E; ++j) { + unsigned BeginLabel = LandingPad->BeginLabels[j]; + assert(!PadMap.count(BeginLabel) && "Duplicate landing pad labels!"); + PadRange P = { i, j }; + PadMap[BeginLabel] = P; + } + } + + // The end label of the previous invoke or nounwind try-range. + unsigned LastLabel = 0; + + // Whether there is a potentially throwing instruction (currently this means + // an ordinary call) between the end of the previous try-range and now. + bool SawPotentiallyThrowing = false; + + // Whether the last callsite entry was for an invoke. + bool PreviousIsInvoke = false; + + // Visit all instructions in order of address. + for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); + I != E; ++I) { + for (MachineBasicBlock::const_iterator MI = I->begin(), E = I->end(); + MI != E; ++MI) { + if (!MI->isLabel()) { + SawPotentiallyThrowing |= MI->getDesc().isCall(); + continue; + } + + unsigned BeginLabel = MI->getOperand(0).getImm(); + assert(BeginLabel && "Invalid label!"); + + // End of the previous try-range? + if (BeginLabel == LastLabel) + SawPotentiallyThrowing = false; + + // Beginning of a new try-range? + RangeMapType::iterator L = PadMap.find(BeginLabel); + if (L == PadMap.end()) + // Nope, it was just some random label. + continue; + + PadRange P = L->second; + const LandingPadInfo *LandingPad = LandingPads[P.PadIndex]; + + assert(BeginLabel == LandingPad->BeginLabels[P.RangeIndex] && + "Inconsistent landing pad map!"); + + // If some instruction between the previous try-range and this one may + // throw, create a call-site entry with no landing pad for the region + // between the try-ranges. + if (SawPotentiallyThrowing) { + CallSiteEntry Site = {LastLabel, BeginLabel, 0, 0}; + CallSites.push_back(Site); + PreviousIsInvoke = false; + } + + LastLabel = LandingPad->EndLabels[P.RangeIndex]; + assert(BeginLabel && LastLabel && "Invalid landing pad!"); + + if (LandingPad->LandingPadLabel) { + // This try-range is for an invoke. + CallSiteEntry Site = {BeginLabel, LastLabel, + LandingPad->LandingPadLabel, + FirstActions[P.PadIndex]}; + + // Try to merge with the previous call-site. + if (PreviousIsInvoke) { + CallSiteEntry &Prev = CallSites.back(); + if (Site.PadLabel == Prev.PadLabel && Site.Action == Prev.Action) { + // Extend the range of the previous entry. + Prev.EndLabel = Site.EndLabel; + continue; + } + } + + // Otherwise, create a new call-site. + CallSites.push_back(Site); + PreviousIsInvoke = true; + } else { + // Create a gap. + PreviousIsInvoke = false; + } + } + } + + // If some instruction between the previous try-range and the end of the + // function may throw, create a call-site entry with no landing pad for the + // region following the try-range. + if (SawPotentiallyThrowing) { + CallSiteEntry Site = {LastLabel, 0, 0, 0}; + CallSites.push_back(Site); + } + + // Final tallies. + + // Call sites. + const unsigned SiteStartSize = sizeof(int32_t); // DW_EH_PE_udata4 + const unsigned SiteLengthSize = sizeof(int32_t); // DW_EH_PE_udata4 + const unsigned LandingPadSize = sizeof(int32_t); // DW_EH_PE_udata4 + unsigned SizeSites = CallSites.size() * (SiteStartSize + + SiteLengthSize + + LandingPadSize); + for (unsigned i = 0, e = CallSites.size(); i < e; ++i) + SizeSites += TargetAsmInfo::getULEB128Size(CallSites[i].Action); + + // Type infos. + const unsigned TypeInfoSize = TD->getPointerSize(); // DW_EH_PE_absptr + unsigned SizeTypes = TypeInfos.size() * TypeInfoSize; + + unsigned TypeOffset = sizeof(int8_t) + // Call site format + TargetAsmInfo::getULEB128Size(SizeSites) + // Call-site table length + SizeSites + SizeActions + SizeTypes; + + unsigned TotalSize = sizeof(int8_t) + // LPStart format + sizeof(int8_t) + // TType format + TargetAsmInfo::getULEB128Size(TypeOffset) + // TType base offset + TypeOffset; + + unsigned SizeAlign = (4 - TotalSize) & 3; + + // Begin the exception table. + Asm->SwitchToDataSection(TAI->getDwarfExceptionSection()); + Asm->EmitAlignment(2, 0, 0, false); + O << "GCC_except_table" << SubprogramCount << ":\n"; + + for (unsigned i = 0; i != SizeAlign; ++i) { + Asm->EmitInt8(0); + Asm->EOL("Padding"); + } + + EmitLabel("exception", SubprogramCount); + + // Emit the header. + Asm->EmitInt8(dwarf::DW_EH_PE_omit); + Asm->EOL("LPStart format (DW_EH_PE_omit)"); + Asm->EmitInt8(dwarf::DW_EH_PE_absptr); + Asm->EOL("TType format (DW_EH_PE_absptr)"); + Asm->EmitULEB128Bytes(TypeOffset); + Asm->EOL("TType base offset"); + Asm->EmitInt8(dwarf::DW_EH_PE_udata4); + Asm->EOL("Call site format (DW_EH_PE_udata4)"); + Asm->EmitULEB128Bytes(SizeSites); + Asm->EOL("Call-site table length"); + + // Emit the landing pad site information. + for (unsigned i = 0; i < CallSites.size(); ++i) { + CallSiteEntry &S = CallSites[i]; + const char *BeginTag; + unsigned BeginNumber; + + if (!S.BeginLabel) { + BeginTag = "eh_func_begin"; + BeginNumber = SubprogramCount; + } else { + BeginTag = "label"; + BeginNumber = S.BeginLabel; + } + + EmitSectionOffset(BeginTag, "eh_func_begin", BeginNumber, SubprogramCount, + true, true); + Asm->EOL("Region start"); + + if (!S.EndLabel) + EmitDifference("eh_func_end", SubprogramCount, BeginTag, BeginNumber, + true); + else + EmitDifference("label", S.EndLabel, BeginTag, BeginNumber, true); + + Asm->EOL("Region length"); + + if (!S.PadLabel) + Asm->EmitInt32(0); + else + EmitSectionOffset("label", "eh_func_begin", S.PadLabel, SubprogramCount, + true, true); + + Asm->EOL("Landing pad"); + + Asm->EmitULEB128Bytes(S.Action); + Asm->EOL("Action"); + } + + // Emit the actions. + for (unsigned I = 0, N = Actions.size(); I != N; ++I) { + ActionEntry &Action = Actions[I]; + + Asm->EmitSLEB128Bytes(Action.ValueForTypeID); + Asm->EOL("TypeInfo index"); + Asm->EmitSLEB128Bytes(Action.NextAction); + Asm->EOL("Next action"); + } + + // Emit the type ids. + for (unsigned M = TypeInfos.size(); M; --M) { + GlobalVariable *GV = TypeInfos[M - 1]; + PrintRelDirective(); + + if (GV) { + std::string GLN; + O << Asm->getGlobalLinkName(GV, GLN); + } else { + O << "0"; + } + + Asm->EOL("TypeInfo"); + } + + // Emit the filter typeids. + for (unsigned j = 0, M = FilterIds.size(); j < M; ++j) { + unsigned TypeID = FilterIds[j]; + Asm->EmitULEB128Bytes(TypeID); + Asm->EOL("Filter TypeInfo index"); + } + + Asm->EmitAlignment(2, 0, 0, false); +} + +/// EndModule - Emit all exception information that should come after the +/// content. +void DwarfException::EndModule() { + if (TimePassesIsEnabled) + ExceptionTimer->startTimer(); + + if (shouldEmitMovesModule || shouldEmitTableModule) { + const std::vector Personalities = MMI->getPersonalities(); + for (unsigned i = 0; i < Personalities.size(); ++i) + EmitCommonEHFrame(Personalities[i], i); + + for (std::vector::iterator I = EHFrames.begin(), + E = EHFrames.end(); I != E; ++I) + EmitEHFrame(*I); + } + + if (TimePassesIsEnabled) + ExceptionTimer->stopTimer(); +} + +/// BeginFunction - Gather pre-function exception information. Assumes being +/// emitted immediately after the function entry point. +void DwarfException::BeginFunction(MachineFunction *MF) { + if (TimePassesIsEnabled) + ExceptionTimer->startTimer(); + + this->MF = MF; + shouldEmitTable = shouldEmitMoves = false; + + if (MMI && TAI->doesSupportExceptionHandling()) { + // Map all labels and get rid of any dead landing pads. + MMI->TidyLandingPads(); + + // If any landing pads survive, we need an EH table. + if (MMI->getLandingPads().size()) + shouldEmitTable = true; + + // See if we need frame move info. + if (!MF->getFunction()->doesNotThrow() || UnwindTablesMandatory) + shouldEmitMoves = true; + + if (shouldEmitMoves || shouldEmitTable) + // Assumes in correct section after the entry point. + EmitLabel("eh_func_begin", ++SubprogramCount); + } + + shouldEmitTableModule |= shouldEmitTable; + shouldEmitMovesModule |= shouldEmitMoves; + + if (TimePassesIsEnabled) + ExceptionTimer->stopTimer(); +} + +/// EndFunction - Gather and emit post-function exception information. +/// +void DwarfException::EndFunction() { + if (TimePassesIsEnabled) + ExceptionTimer->startTimer(); + + if (shouldEmitMoves || shouldEmitTable) { + EmitLabel("eh_func_end", SubprogramCount); + EmitExceptionTable(); + + // Save EH frame information + std::string Name; + EHFrames.push_back( + FunctionEHFrameInfo(getAsm()->getCurrentFunctionEHName(MF, Name), + SubprogramCount, + MMI->getPersonalityIndex(), + MF->getFrameInfo()->hasCalls(), + !MMI->getLandingPads().empty(), + MMI->getFrameMoves(), + MF->getFunction())); + } + + if (TimePassesIsEnabled) + ExceptionTimer->stopTimer(); +} diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h new file mode 100644 index 000000000000..4479af243e90 --- /dev/null +++ b/lib/CodeGen/AsmPrinter/DwarfException.h @@ -0,0 +1,178 @@ +//===-- DwarfException.h - Dwarf Exception Framework -----------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing dwarf exception info into asm files. +// +//===----------------------------------------------------------------------===// + +#ifndef CODEGEN_ASMPRINTER_DWARFEXCEPTION_H__ +#define CODEGEN_ASMPRINTER_DWARFEXCEPTION_H__ + +#include "DIE.h" +#include "DwarfPrinter.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/ADT/DenseMap.h" +#include + +namespace llvm { + +struct LandingPadInfo; +class MachineModuleInfo; +class TargetAsmInfo; +class Timer; +class raw_ostream; + +//===----------------------------------------------------------------------===// +/// DwarfException - Emits Dwarf exception handling directives. +/// +class VISIBILITY_HIDDEN DwarfException : public Dwarf { + struct FunctionEHFrameInfo { + std::string FnName; + unsigned Number; + unsigned PersonalityIndex; + bool hasCalls; + bool hasLandingPads; + std::vector Moves; + const Function * function; + + FunctionEHFrameInfo(const std::string &FN, unsigned Num, unsigned P, + bool hC, bool hL, + const std::vector &M, + const Function *f): + FnName(FN), Number(Num), PersonalityIndex(P), + hasCalls(hC), hasLandingPads(hL), Moves(M), function (f) { } + }; + + std::vector EHFrames; + + /// shouldEmitTable - Per-function flag to indicate if EH tables should + /// be emitted. + bool shouldEmitTable; + + /// shouldEmitMoves - Per-function flag to indicate if frame moves info + /// should be emitted. + bool shouldEmitMoves; + + /// shouldEmitTableModule - Per-module flag to indicate if EH tables + /// should be emitted. + bool shouldEmitTableModule; + + /// shouldEmitFrameModule - Per-module flag to indicate if frame moves + /// should be emitted. + bool shouldEmitMovesModule; + + /// ExceptionTimer - Timer for the Dwarf exception writer. + Timer *ExceptionTimer; + + /// EmitCommonEHFrame - Emit the common eh unwind frame. + /// + void EmitCommonEHFrame(const Function *Personality, unsigned Index); + + /// EmitEHFrame - Emit function exception frame information. + /// + void EmitEHFrame(const FunctionEHFrameInfo &EHFrameInfo); + + /// EmitExceptionTable - Emit landing pads and actions. + /// + /// The general organization of the table is complex, but the basic concepts + /// are easy. First there is a header which describes the location and + /// organization of the three components that follow. + /// 1. The landing pad site information describes the range of code covered + /// by the try. In our case it's an accumulation of the ranges covered + /// by the invokes in the try. There is also a reference to the landing + /// pad that handles the exception once processed. Finally an index into + /// the actions table. + /// 2. The action table, in our case, is composed of pairs of type ids + /// and next action offset. Starting with the action index from the + /// landing pad site, each type Id is checked for a match to the current + /// exception. If it matches then the exception and type id are passed + /// on to the landing pad. Otherwise the next action is looked up. This + /// chain is terminated with a next action of zero. If no type id is + /// found the the frame is unwound and handling continues. + /// 3. Type id table contains references to all the C++ typeinfo for all + /// catches in the function. This tables is reversed indexed base 1. + + /// SharedTypeIds - How many leading type ids two landing pads have in common. + static unsigned SharedTypeIds(const LandingPadInfo *L, + const LandingPadInfo *R); + + /// PadLT - Order landing pads lexicographically by type id. + static bool PadLT(const LandingPadInfo *L, const LandingPadInfo *R); + + struct KeyInfo { + static inline unsigned getEmptyKey() { return -1U; } + static inline unsigned getTombstoneKey() { return -2U; } + static unsigned getHashValue(const unsigned &Key) { return Key; } + static bool isEqual(unsigned LHS, unsigned RHS) { return LHS == RHS; } + static bool isPod() { return true; } + }; + + /// ActionEntry - Structure describing an entry in the actions table. + struct ActionEntry { + int ValueForTypeID; // The value to write - may not be equal to the type id. + int NextAction; + struct ActionEntry *Previous; + }; + + /// PadRange - Structure holding a try-range and the associated landing pad. + struct PadRange { + // The index of the landing pad. + unsigned PadIndex; + // The index of the begin and end labels in the landing pad's label lists. + unsigned RangeIndex; + }; + + typedef DenseMap RangeMapType; + + /// CallSiteEntry - Structure describing an entry in the call-site table. + struct CallSiteEntry { + // The 'try-range' is BeginLabel .. EndLabel. + unsigned BeginLabel; // zero indicates the start of the function. + unsigned EndLabel; // zero indicates the end of the function. + // The landing pad starts at PadLabel. + unsigned PadLabel; // zero indicates that there is no landing pad. + unsigned Action; + }; + + void EmitExceptionTable(); + +public: + //===--------------------------------------------------------------------===// + // Main entry points. + // + DwarfException(raw_ostream &OS, AsmPrinter *A, const TargetAsmInfo *T); + virtual ~DwarfException(); + + /// SetModuleInfo - Set machine module information when it's known that pass + /// manager has created it. Set by the target AsmPrinter. + void SetModuleInfo(MachineModuleInfo *mmi) { + MMI = mmi; + } + + /// BeginModule - Emit all exception information that should come prior to the + /// content. + void BeginModule(Module *M) { + this->M = M; + } + + /// EndModule - Emit all exception information that should come after the + /// content. + void EndModule(); + + /// BeginFunction - Gather pre-function exception information. Assumes being + /// emitted immediately after the function entry point. + void BeginFunction(MachineFunction *MF); + + /// EndFunction - Gather and emit post-function exception information. + void EndFunction(); +}; + +} // End of namespace llvm + +#endif diff --git a/lib/CodeGen/AsmPrinter/DwarfLabel.cpp b/lib/CodeGen/AsmPrinter/DwarfLabel.cpp new file mode 100644 index 000000000000..8021b7c97bb0 --- /dev/null +++ b/lib/CodeGen/AsmPrinter/DwarfLabel.cpp @@ -0,0 +1,35 @@ +//===--- lib/CodeGen/DwarfLabel.cpp - Dwarf Label -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// DWARF Labels +// +//===----------------------------------------------------------------------===// + +#include "DwarfLabel.h" +#include "llvm/ADT/FoldingSet.h" +#include + +using namespace llvm; + +/// Profile - Used to gather unique data for the folding set. +/// +void DWLabel::Profile(FoldingSetNodeID &ID) const { + ID.AddString(Tag); + ID.AddInteger(Number); +} + +#ifndef NDEBUG +void DWLabel::print(std::ostream *O) const { + if (O) print(*O); +} +void DWLabel::print(std::ostream &O) const { + O << "." << Tag; + if (Number) O << Number; +} +#endif diff --git a/lib/CodeGen/AsmPrinter/DwarfLabel.h b/lib/CodeGen/AsmPrinter/DwarfLabel.h new file mode 100644 index 000000000000..b49390334bd2 --- /dev/null +++ b/lib/CodeGen/AsmPrinter/DwarfLabel.h @@ -0,0 +1,56 @@ +//===--- lib/CodeGen/DwarfLabel.h - Dwarf Label -----------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// DWARF Labels. +// +//===----------------------------------------------------------------------===// + +#ifndef CODEGEN_ASMPRINTER_DWARFLABEL_H__ +#define CODEGEN_ASMPRINTER_DWARFLABEL_H__ + +#include "llvm/Support/Compiler.h" +#include +#include + +namespace llvm { + class FoldingSetNodeID; + + //===--------------------------------------------------------------------===// + /// DWLabel - Labels are used to track locations in the assembler file. + /// Labels appear in the form @verbatim @endverbatim, + /// where the tag is a category of label (Ex. location) and number is a value + /// unique in that category. + class VISIBILITY_HIDDEN DWLabel { + /// Tag - Label category tag. Should always be a statically declared C + /// string. + /// + const char *Tag; + + /// Number - Value to make label unique. + /// + unsigned Number; + public: + DWLabel(const char *T, unsigned N) : Tag(T), Number(N) {} + + // Accessors. + const char *getTag() const { return Tag; } + unsigned getNumber() const { return Number; } + + /// Profile - Used to gather unique data for the folding set. + /// + void Profile(FoldingSetNodeID &ID) const; + +#ifndef NDEBUG + void print(std::ostream *O) const; + void print(std::ostream &O) const; +#endif + }; +} // end llvm namespace + +#endif diff --git a/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp b/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp new file mode 100644 index 000000000000..45e7dd305807 --- /dev/null +++ b/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp @@ -0,0 +1,235 @@ +//===--- lib/CodeGen/DwarfPrinter.cpp - Dwarf Printer ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Emit general DWARF directives. +// +//===----------------------------------------------------------------------===// + +#include "DwarfPrinter.h" +#include "llvm/Module.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include + +using namespace llvm; + +Dwarf::Dwarf(raw_ostream &OS, AsmPrinter *A, const TargetAsmInfo *T, + const char *flavor) +: O(OS), Asm(A), TAI(T), TD(Asm->TM.getTargetData()), + RI(Asm->TM.getRegisterInfo()), M(NULL), MF(NULL), MMI(NULL), + SubprogramCount(0), Flavor(flavor), SetCounter(1) {} + +void Dwarf::PrintRelDirective(bool Force32Bit, bool isInSection) const { + if (isInSection && TAI->getDwarfSectionOffsetDirective()) + O << TAI->getDwarfSectionOffsetDirective(); + else if (Force32Bit || TD->getPointerSize() == sizeof(int32_t)) + O << TAI->getData32bitsDirective(); + else + O << TAI->getData64bitsDirective(); +} + +/// PrintLabelName - Print label name in form used by Dwarf writer. +/// +void Dwarf::PrintLabelName(const char *Tag, unsigned Number) const { + O << TAI->getPrivateGlobalPrefix() << Tag; + if (Number) O << Number; +} +void Dwarf::PrintLabelName(const char *Tag, unsigned Number, + const char *Suffix) const { + O << TAI->getPrivateGlobalPrefix() << Tag; + if (Number) O << Number; + O << Suffix; +} + +/// EmitLabel - Emit location label for internal use by Dwarf. +/// +void Dwarf::EmitLabel(const char *Tag, unsigned Number) const { + PrintLabelName(Tag, Number); + O << ":\n"; +} + +/// EmitReference - Emit a reference to a label. +/// +void Dwarf::EmitReference(const char *Tag, unsigned Number, + bool IsPCRelative, bool Force32Bit) const { + PrintRelDirective(Force32Bit); + PrintLabelName(Tag, Number); + if (IsPCRelative) O << "-" << TAI->getPCSymbol(); +} +void Dwarf::EmitReference(const std::string &Name, bool IsPCRelative, + bool Force32Bit) const { + PrintRelDirective(Force32Bit); + O << Name; + if (IsPCRelative) O << "-" << TAI->getPCSymbol(); +} + +/// EmitDifference - Emit the difference between two labels. Some assemblers do +/// not behave with absolute expressions with data directives, so there is an +/// option (needsSet) to use an intermediary set expression. +void Dwarf::EmitDifference(const char *TagHi, unsigned NumberHi, + const char *TagLo, unsigned NumberLo, + bool IsSmall) { + if (TAI->needsSet()) { + O << "\t.set\t"; + PrintLabelName("set", SetCounter, Flavor); + O << ","; + PrintLabelName(TagHi, NumberHi); + O << "-"; + PrintLabelName(TagLo, NumberLo); + O << "\n"; + + PrintRelDirective(IsSmall); + PrintLabelName("set", SetCounter, Flavor); + ++SetCounter; + } else { + PrintRelDirective(IsSmall); + PrintLabelName(TagHi, NumberHi); + O << "-"; + PrintLabelName(TagLo, NumberLo); + } +} + +void Dwarf::EmitSectionOffset(const char* Label, const char* Section, + unsigned LabelNumber, unsigned SectionNumber, + bool IsSmall, bool isEH, + bool useSet) { + bool printAbsolute = false; + if (isEH) + printAbsolute = TAI->isAbsoluteEHSectionOffsets(); + else + printAbsolute = TAI->isAbsoluteDebugSectionOffsets(); + + if (TAI->needsSet() && useSet) { + O << "\t.set\t"; + PrintLabelName("set", SetCounter, Flavor); + O << ","; + PrintLabelName(Label, LabelNumber); + + if (!printAbsolute) { + O << "-"; + PrintLabelName(Section, SectionNumber); + } + + O << "\n"; + PrintRelDirective(IsSmall); + PrintLabelName("set", SetCounter, Flavor); + ++SetCounter; + } else { + PrintRelDirective(IsSmall, true); + PrintLabelName(Label, LabelNumber); + + if (!printAbsolute) { + O << "-"; + PrintLabelName(Section, SectionNumber); + } + } +} + +/// EmitFrameMoves - Emit frame instructions to describe the layout of the +/// frame. +void Dwarf::EmitFrameMoves(const char *BaseLabel, unsigned BaseLabelID, + const std::vector &Moves, bool isEH) { + int stackGrowth = + Asm->TM.getFrameInfo()->getStackGrowthDirection() == + TargetFrameInfo::StackGrowsUp ? + TD->getPointerSize() : -TD->getPointerSize(); + bool IsLocal = BaseLabel && strcmp(BaseLabel, "label") == 0; + + for (unsigned i = 0, N = Moves.size(); i < N; ++i) { + const MachineMove &Move = Moves[i]; + unsigned LabelID = Move.getLabelID(); + + if (LabelID) { + LabelID = MMI->MappedLabel(LabelID); + + // Throw out move if the label is invalid. + if (!LabelID) continue; + } + + const MachineLocation &Dst = Move.getDestination(); + const MachineLocation &Src = Move.getSource(); + + // Advance row if new location. + if (BaseLabel && LabelID && (BaseLabelID != LabelID || !IsLocal)) { + Asm->EmitInt8(dwarf::DW_CFA_advance_loc4); + Asm->EOL("DW_CFA_advance_loc4"); + EmitDifference("label", LabelID, BaseLabel, BaseLabelID, true); + Asm->EOL(); + + BaseLabelID = LabelID; + BaseLabel = "label"; + IsLocal = true; + } + + // If advancing cfa. + if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) { + if (!Src.isReg()) { + if (Src.getReg() == MachineLocation::VirtualFP) { + Asm->EmitInt8(dwarf::DW_CFA_def_cfa_offset); + Asm->EOL("DW_CFA_def_cfa_offset"); + } else { + Asm->EmitInt8(dwarf::DW_CFA_def_cfa); + Asm->EOL("DW_CFA_def_cfa"); + Asm->EmitULEB128Bytes(RI->getDwarfRegNum(Src.getReg(), isEH)); + Asm->EOL("Register"); + } + + int Offset = -Src.getOffset(); + + Asm->EmitULEB128Bytes(Offset); + Asm->EOL("Offset"); + } else { + assert(0 && "Machine move no supported yet."); + } + } else if (Src.isReg() && + Src.getReg() == MachineLocation::VirtualFP) { + if (Dst.isReg()) { + Asm->EmitInt8(dwarf::DW_CFA_def_cfa_register); + Asm->EOL("DW_CFA_def_cfa_register"); + Asm->EmitULEB128Bytes(RI->getDwarfRegNum(Dst.getReg(), isEH)); + Asm->EOL("Register"); + } else { + assert(0 && "Machine move no supported yet."); + } + } else { + unsigned Reg = RI->getDwarfRegNum(Src.getReg(), isEH); + int Offset = Dst.getOffset() / stackGrowth; + + if (Offset < 0) { + Asm->EmitInt8(dwarf::DW_CFA_offset_extended_sf); + Asm->EOL("DW_CFA_offset_extended_sf"); + Asm->EmitULEB128Bytes(Reg); + Asm->EOL("Reg"); + Asm->EmitSLEB128Bytes(Offset); + Asm->EOL("Offset"); + } else if (Reg < 64) { + Asm->EmitInt8(dwarf::DW_CFA_offset + Reg); + if (Asm->isVerbose()) + Asm->EOL("DW_CFA_offset + Reg (" + utostr(Reg) + ")"); + else + Asm->EOL(); + Asm->EmitULEB128Bytes(Offset); + Asm->EOL("Offset"); + } else { + Asm->EmitInt8(dwarf::DW_CFA_offset_extended); + Asm->EOL("DW_CFA_offset_extended"); + Asm->EmitULEB128Bytes(Reg); + Asm->EOL("Reg"); + Asm->EmitULEB128Bytes(Offset); + Asm->EOL("Offset"); + } + } + } +} diff --git a/lib/CodeGen/AsmPrinter/DwarfPrinter.h b/lib/CodeGen/AsmPrinter/DwarfPrinter.h new file mode 100644 index 000000000000..6e75992cb07c --- /dev/null +++ b/lib/CodeGen/AsmPrinter/DwarfPrinter.h @@ -0,0 +1,153 @@ +//===--- lib/CodeGen/DwarfPrinter.h - Dwarf Printer -------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Emit general DWARF directives. +// +//===----------------------------------------------------------------------===// + +#ifndef CODEGEN_ASMPRINTER_DWARFPRINTER_H__ +#define CODEGEN_ASMPRINTER_DWARFPRINTER_H__ + +#include "DwarfLabel.h" +#include "llvm/CodeGen/MachineLocation.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/raw_ostream.h" +#include + +namespace llvm { + class AsmPrinter; + class MachineFunction; + class MachineModuleInfo; + class Module; + class TargetAsmInfo; + class TargetData; + class TargetRegisterInfo; + + class VISIBILITY_HIDDEN Dwarf { + protected: + //===-------------------------------------------------------------==---===// + // Core attributes used by the DWARF printer. + // + + /// O - Stream to .s file. + /// + raw_ostream &O; + + /// Asm - Target of Dwarf emission. + /// + AsmPrinter *Asm; + + /// TAI - Target asm information. + /// + const TargetAsmInfo *TAI; + + /// TD - Target data. + /// + const TargetData *TD; + + /// RI - Register Information. + /// + const TargetRegisterInfo *RI; + + /// M - Current module. + /// + Module *M; + + /// MF - Current machine function. + /// + MachineFunction *MF; + + /// MMI - Collected machine module information. + /// + MachineModuleInfo *MMI; + + /// SubprogramCount - The running count of functions being compiled. + /// + unsigned SubprogramCount; + + /// Flavor - A unique string indicating what dwarf producer this is, used to + /// unique labels. + /// + const char * const Flavor; + + /// SetCounter - A unique number for each '.set' directive. + /// + unsigned SetCounter; + + Dwarf(raw_ostream &OS, AsmPrinter *A, const TargetAsmInfo *T, + const char *flavor); + public: + //===------------------------------------------------------------------===// + // Accessors. + // + const AsmPrinter *getAsm() const { return Asm; } + MachineModuleInfo *getMMI() const { return MMI; } + const TargetAsmInfo *getTargetAsmInfo() const { return TAI; } + const TargetData *getTargetData() const { return TD; } + + void PrintRelDirective(bool Force32Bit = false, + bool isInSection = false) const; + + + /// PrintLabelName - Print label name in form used by Dwarf writer. + /// + void PrintLabelName(const DWLabel &Label) const { + PrintLabelName(Label.getTag(), Label.getNumber()); + } + void PrintLabelName(const char *Tag, unsigned Number) const; + void PrintLabelName(const char *Tag, unsigned Number, + const char *Suffix) const; + + /// EmitLabel - Emit location label for internal use by Dwarf. + /// + void EmitLabel(const DWLabel &Label) const { + EmitLabel(Label.getTag(), Label.getNumber()); + } + void EmitLabel(const char *Tag, unsigned Number) const; + + /// EmitReference - Emit a reference to a label. + /// + void EmitReference(const DWLabel &Label, bool IsPCRelative = false, + bool Force32Bit = false) const { + EmitReference(Label.getTag(), Label.getNumber(), + IsPCRelative, Force32Bit); + } + void EmitReference(const char *Tag, unsigned Number, + bool IsPCRelative = false, + bool Force32Bit = false) const; + void EmitReference(const std::string &Name, bool IsPCRelative = false, + bool Force32Bit = false) const; + + /// EmitDifference - Emit the difference between two labels. Some + /// assemblers do not behave with absolute expressions with data directives, + /// so there is an option (needsSet) to use an intermediary set expression. + void EmitDifference(const DWLabel &LabelHi, const DWLabel &LabelLo, + bool IsSmall = false) { + EmitDifference(LabelHi.getTag(), LabelHi.getNumber(), + LabelLo.getTag(), LabelLo.getNumber(), + IsSmall); + } + void EmitDifference(const char *TagHi, unsigned NumberHi, + const char *TagLo, unsigned NumberLo, + bool IsSmall = false); + + void EmitSectionOffset(const char* Label, const char* Section, + unsigned LabelNumber, unsigned SectionNumber, + bool IsSmall = false, bool isEH = false, + bool useSet = true); + + /// EmitFrameMoves - Emit frame instructions to describe the layout of the + /// frame. + void EmitFrameMoves(const char *BaseLabel, unsigned BaseLabelID, + const std::vector &Moves, bool isEH); +}; + +} // end llvm namespace + +#endif diff --git a/lib/CodeGen/AsmPrinter/DwarfWriter.cpp b/lib/CodeGen/AsmPrinter/DwarfWriter.cpp new file mode 100644 index 000000000000..483ee559ffe9 --- /dev/null +++ b/lib/CodeGen/AsmPrinter/DwarfWriter.cpp @@ -0,0 +1,129 @@ +//===-- llvm/CodeGen/DwarfWriter.cpp - Dwarf Framework --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing dwarf info into asm files. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/DwarfWriter.h" +#include "DwarfDebug.h" +#include "DwarfException.h" +#include "llvm/CodeGen/MachineModuleInfo.h" + +using namespace llvm; + +static RegisterPass +X("dwarfwriter", "DWARF Information Writer"); +char DwarfWriter::ID = 0; + +//===----------------------------------------------------------------------===// +/// DwarfWriter Implementation +/// + +DwarfWriter::DwarfWriter() + : ImmutablePass(&ID), DD(0), DE(0) {} + +DwarfWriter::~DwarfWriter() { + delete DE; + delete DD; +} + +/// BeginModule - Emit all Dwarf sections that should come prior to the +/// content. +void DwarfWriter::BeginModule(Module *M, + MachineModuleInfo *MMI, + raw_ostream &OS, AsmPrinter *A, + const TargetAsmInfo *T) { + DE = new DwarfException(OS, A, T); + DD = new DwarfDebug(OS, A, T); + DE->BeginModule(M); + DD->BeginModule(M); + DD->SetDebugInfo(MMI); + DE->SetModuleInfo(MMI); +} + +/// EndModule - Emit all Dwarf sections that should come after the content. +/// +void DwarfWriter::EndModule() { + DE->EndModule(); + DD->EndModule(); +} + +/// BeginFunction - Gather pre-function debug information. Assumes being +/// emitted immediately after the function entry point. +void DwarfWriter::BeginFunction(MachineFunction *MF) { + DE->BeginFunction(MF); + DD->BeginFunction(MF); +} + +/// EndFunction - Gather and emit post-function debug information. +/// +void DwarfWriter::EndFunction(MachineFunction *MF) { + DD->EndFunction(MF); + DE->EndFunction(); + + if (MachineModuleInfo *MMI = DD->getMMI() ? DD->getMMI() : DE->getMMI()) + // Clear function debug information. + MMI->EndFunction(); +} + +/// RecordSourceLine - Records location information and associates it with a +/// label. Returns a unique label ID used to generate a label and provide +/// correspondence to the source line list. +unsigned DwarfWriter::RecordSourceLine(unsigned Line, unsigned Col, + DICompileUnit CU) { + return DD->RecordSourceLine(Line, Col, CU); +} + +/// RecordRegionStart - Indicate the start of a region. +unsigned DwarfWriter::RecordRegionStart(GlobalVariable *V) { + return DD->RecordRegionStart(V); +} + +/// RecordRegionEnd - Indicate the end of a region. +unsigned DwarfWriter::RecordRegionEnd(GlobalVariable *V) { + return DD->RecordRegionEnd(V); +} + +/// getRecordSourceLineCount - Count source lines. +unsigned DwarfWriter::getRecordSourceLineCount() { + return DD->getRecordSourceLineCount(); +} + +/// RecordVariable - Indicate the declaration of a local variable. +/// +void DwarfWriter::RecordVariable(GlobalVariable *GV, unsigned FrameIndex, + const MachineInstr *MI) { + DD->RecordVariable(GV, FrameIndex, MI); +} + +/// ShouldEmitDwarfDebug - Returns true if Dwarf debugging declarations should +/// be emitted. +bool DwarfWriter::ShouldEmitDwarfDebug() const { + return DD && DD->ShouldEmitDwarfDebug(); +} + +//// RecordInlinedFnStart - Global variable GV is inlined at the location marked +//// by LabelID label. +unsigned DwarfWriter::RecordInlinedFnStart(DISubprogram SP, DICompileUnit CU, + unsigned Line, unsigned Col) { + return DD->RecordInlinedFnStart(SP, CU, Line, Col); +} + +/// RecordInlinedFnEnd - Indicate the end of inlined subroutine. +unsigned DwarfWriter::RecordInlinedFnEnd(DISubprogram SP) { + return DD->RecordInlinedFnEnd(SP); +} + +/// RecordVariableScope - Record scope for the variable declared by +/// DeclareMI. DeclareMI must describe TargetInstrInfo::DECLARE. +void DwarfWriter::RecordVariableScope(DIVariable &DV, + const MachineInstr *DeclareMI) { + DD->RecordVariableScope(DV, DeclareMI); +} diff --git a/lib/CodeGen/AsmPrinter/Makefile b/lib/CodeGen/AsmPrinter/Makefile new file mode 100644 index 000000000000..cb5b3f6c59d5 --- /dev/null +++ b/lib/CodeGen/AsmPrinter/Makefile @@ -0,0 +1,15 @@ +##===- lib/CodeGen/SelectionDAG/Makefile -------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../.. +LIBRARYNAME = LLVMAsmPrinter +PARALLEL_DIRS = +BUILD_ARCHIVE = 1 +DONT_BUILD_RELINKED = 1 + +include $(LEVEL)/Makefile.common diff --git a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp new file mode 100644 index 000000000000..8ba903a65d79 --- /dev/null +++ b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp @@ -0,0 +1,160 @@ +//===-- OcamlGCPrinter.cpp - Ocaml frametable emitter ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements printing the assembly code for an Ocaml frametable. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GCs.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/GCMetadataPrinter.h" +#include "llvm/Module.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetMachine.h" + +using namespace llvm; + +namespace { + + class VISIBILITY_HIDDEN OcamlGCMetadataPrinter : public GCMetadataPrinter { + public: + void beginAssembly(raw_ostream &OS, AsmPrinter &AP, + const TargetAsmInfo &TAI); + + void finishAssembly(raw_ostream &OS, AsmPrinter &AP, + const TargetAsmInfo &TAI); + }; + +} + +static GCMetadataPrinterRegistry::Add +Y("ocaml", "ocaml 3.10-compatible collector"); + +void llvm::linkOcamlGCPrinter() { } + +static void EmitCamlGlobal(const Module &M, raw_ostream &OS, AsmPrinter &AP, + const TargetAsmInfo &TAI, const char *Id) { + const std::string &MId = M.getModuleIdentifier(); + + std::string Mangled; + Mangled += TAI.getGlobalPrefix(); + Mangled += "caml"; + size_t Letter = Mangled.size(); + Mangled.append(MId.begin(), std::find(MId.begin(), MId.end(), '.')); + Mangled += "__"; + Mangled += Id; + + // Capitalize the first letter of the module name. + Mangled[Letter] = toupper(Mangled[Letter]); + + if (const char *GlobalDirective = TAI.getGlobalDirective()) + OS << GlobalDirective << Mangled << "\n"; + OS << Mangled << ":\n"; +} + +void OcamlGCMetadataPrinter::beginAssembly(raw_ostream &OS, AsmPrinter &AP, + const TargetAsmInfo &TAI) { + AP.SwitchToSection(TAI.getTextSection()); + EmitCamlGlobal(getModule(), OS, AP, TAI, "code_begin"); + + AP.SwitchToSection(TAI.getDataSection()); + EmitCamlGlobal(getModule(), OS, AP, TAI, "data_begin"); +} + +/// emitAssembly - Print the frametable. The ocaml frametable format is thus: +/// +/// extern "C" struct align(sizeof(intptr_t)) { +/// uint16_t NumDescriptors; +/// struct align(sizeof(intptr_t)) { +/// void *ReturnAddress; +/// uint16_t FrameSize; +/// uint16_t NumLiveOffsets; +/// uint16_t LiveOffsets[NumLiveOffsets]; +/// } Descriptors[NumDescriptors]; +/// } caml${module}__frametable; +/// +/// Note that this precludes programs from stack frames larger than 64K +/// (FrameSize and LiveOffsets would overflow). FrameTablePrinter will abort if +/// either condition is detected in a function which uses the GC. +/// +void OcamlGCMetadataPrinter::finishAssembly(raw_ostream &OS, AsmPrinter &AP, + const TargetAsmInfo &TAI) { + const char *AddressDirective; + int AddressAlignLog; + if (AP.TM.getTargetData()->getPointerSize() == sizeof(int32_t)) { + AddressDirective = TAI.getData32bitsDirective(); + AddressAlignLog = 2; + } else { + AddressDirective = TAI.getData64bitsDirective(); + AddressAlignLog = 3; + } + + AP.SwitchToSection(TAI.getTextSection()); + EmitCamlGlobal(getModule(), OS, AP, TAI, "code_end"); + + AP.SwitchToSection(TAI.getDataSection()); + EmitCamlGlobal(getModule(), OS, AP, TAI, "data_end"); + + OS << AddressDirective << 0; // FIXME: Why does ocaml emit this?? + AP.EOL(); + + AP.SwitchToSection(TAI.getDataSection()); + EmitCamlGlobal(getModule(), OS, AP, TAI, "frametable"); + + for (iterator I = begin(), IE = end(); I != IE; ++I) { + GCFunctionInfo &FI = **I; + + uint64_t FrameSize = FI.getFrameSize(); + if (FrameSize >= 1<<16) { + cerr << "Function '" << FI.getFunction().getNameStart() + << "' is too large for the ocaml GC! " + << "Frame size " << FrameSize << " >= 65536.\n"; + cerr << "(" << uintptr_t(&FI) << ")\n"; + abort(); // Very rude! + } + + OS << "\t" << TAI.getCommentString() << " live roots for " + << FI.getFunction().getNameStart() << "\n"; + + for (GCFunctionInfo::iterator J = FI.begin(), JE = FI.end(); J != JE; ++J) { + size_t LiveCount = FI.live_size(J); + if (LiveCount >= 1<<16) { + cerr << "Function '" << FI.getFunction().getNameStart() + << "' is too large for the ocaml GC! " + << "Live root count " << LiveCount << " >= 65536.\n"; + abort(); // Very rude! + } + + OS << AddressDirective + << TAI.getPrivateGlobalPrefix() << "label" << J->Num; + AP.EOL("call return address"); + + AP.EmitInt16(FrameSize); + AP.EOL("stack frame size"); + + AP.EmitInt16(LiveCount); + AP.EOL("live root count"); + + for (GCFunctionInfo::live_iterator K = FI.live_begin(J), + KE = FI.live_end(J); K != KE; ++K) { + assert(K->StackOffset < 1<<16 && + "GC root stack offset is outside of fixed stack frame and out " + "of range for ocaml GC!"); + + OS << "\t.word\t" << K->StackOffset; + AP.EOL("stack offset"); + } + + AP.EmitAlignment(AddressAlignLog); + } + } +} diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp new file mode 100644 index 000000000000..26353035ae2f --- /dev/null +++ b/lib/CodeGen/BranchFolding.cpp @@ -0,0 +1,1204 @@ +//===-- BranchFolding.cpp - Fold machine code branch instructions ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass forwards branches to unconditional branches to make them branch +// directly to the target block. This pass often results in dead MBB's, which +// it then removes. +// +// Note that this pass must be run after register allocation, it cannot handle +// SSA form. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "branchfolding" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +#include +using namespace llvm; + +STATISTIC(NumDeadBlocks, "Number of dead blocks removed"); +STATISTIC(NumBranchOpts, "Number of branches optimized"); +STATISTIC(NumTailMerge , "Number of block tails merged"); +static cl::opt FlagEnableTailMerge("enable-tail-merge", + cl::init(cl::BOU_UNSET), cl::Hidden); +// Throttle for huge numbers of predecessors (compile speed problems) +static cl::opt +TailMergeThreshold("tail-merge-threshold", + cl::desc("Max number of predecessors to consider tail merging"), + cl::init(150), cl::Hidden); + +namespace { + struct VISIBILITY_HIDDEN BranchFolder : public MachineFunctionPass { + static char ID; + explicit BranchFolder(bool defaultEnableTailMerge) : + MachineFunctionPass(&ID) { + switch (FlagEnableTailMerge) { + case cl::BOU_UNSET: EnableTailMerge = defaultEnableTailMerge; break; + case cl::BOU_TRUE: EnableTailMerge = true; break; + case cl::BOU_FALSE: EnableTailMerge = false; break; + } + } + + virtual bool runOnMachineFunction(MachineFunction &MF); + virtual const char *getPassName() const { return "Control Flow Optimizer"; } + const TargetInstrInfo *TII; + MachineModuleInfo *MMI; + bool MadeChange; + private: + // Tail Merging. + bool EnableTailMerge; + bool TailMergeBlocks(MachineFunction &MF); + bool TryMergeBlocks(MachineBasicBlock* SuccBB, + MachineBasicBlock* PredBB); + void ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst, + MachineBasicBlock *NewDest); + MachineBasicBlock *SplitMBBAt(MachineBasicBlock &CurMBB, + MachineBasicBlock::iterator BBI1); + unsigned ComputeSameTails(unsigned CurHash, unsigned minCommonTailLength); + void RemoveBlocksWithHash(unsigned CurHash, MachineBasicBlock* SuccBB, + MachineBasicBlock* PredBB); + unsigned CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, + unsigned maxCommonTailLength); + + typedef std::pair MergePotentialsElt; + typedef std::vector::iterator MPIterator; + std::vector MergePotentials; + + typedef std::pair SameTailElt; + std::vector SameTails; + + const TargetRegisterInfo *RegInfo; + RegScavenger *RS; + // Branch optzn. + bool OptimizeBranches(MachineFunction &MF); + void OptimizeBlock(MachineBasicBlock *MBB); + void RemoveDeadBlock(MachineBasicBlock *MBB); + bool OptimizeImpDefsBlock(MachineBasicBlock *MBB); + + bool CanFallThrough(MachineBasicBlock *CurBB); + bool CanFallThrough(MachineBasicBlock *CurBB, bool BranchUnAnalyzable, + MachineBasicBlock *TBB, MachineBasicBlock *FBB, + const SmallVectorImpl &Cond); + }; + char BranchFolder::ID = 0; +} + +FunctionPass *llvm::createBranchFoldingPass(bool DefaultEnableTailMerge) { + return new BranchFolder(DefaultEnableTailMerge); } + +/// RemoveDeadBlock - Remove the specified dead machine basic block from the +/// function, updating the CFG. +void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) { + assert(MBB->pred_empty() && "MBB must be dead!"); + DOUT << "\nRemoving MBB: " << *MBB; + + MachineFunction *MF = MBB->getParent(); + // drop all successors. + while (!MBB->succ_empty()) + MBB->removeSuccessor(MBB->succ_end()-1); + + // If there are any labels in the basic block, unregister them from + // MachineModuleInfo. + if (MMI && !MBB->empty()) { + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); + I != E; ++I) { + if (I->isLabel()) + // The label ID # is always operand #0, an immediate. + MMI->InvalidateLabel(I->getOperand(0).getImm()); + } + } + + // Remove the block. + MF->erase(MBB); +} + +/// OptimizeImpDefsBlock - If a basic block is just a bunch of implicit_def +/// followed by terminators, and if the implicitly defined registers are not +/// used by the terminators, remove those implicit_def's. e.g. +/// BB1: +/// r0 = implicit_def +/// r1 = implicit_def +/// br +/// This block can be optimized away later if the implicit instructions are +/// removed. +bool BranchFolder::OptimizeImpDefsBlock(MachineBasicBlock *MBB) { + SmallSet ImpDefRegs; + MachineBasicBlock::iterator I = MBB->begin(); + while (I != MBB->end()) { + if (I->getOpcode() != TargetInstrInfo::IMPLICIT_DEF) + break; + unsigned Reg = I->getOperand(0).getReg(); + ImpDefRegs.insert(Reg); + for (const unsigned *SubRegs = RegInfo->getSubRegisters(Reg); + unsigned SubReg = *SubRegs; ++SubRegs) + ImpDefRegs.insert(SubReg); + ++I; + } + if (ImpDefRegs.empty()) + return false; + + MachineBasicBlock::iterator FirstTerm = I; + while (I != MBB->end()) { + if (!TII->isUnpredicatedTerminator(I)) + return false; + // See if it uses any of the implicitly defined registers. + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { + MachineOperand &MO = I->getOperand(i); + if (!MO.isReg() || !MO.isUse()) + continue; + unsigned Reg = MO.getReg(); + if (ImpDefRegs.count(Reg)) + return false; + } + ++I; + } + + I = MBB->begin(); + while (I != FirstTerm) { + MachineInstr *ImpDefMI = &*I; + ++I; + MBB->erase(ImpDefMI); + } + + return true; +} + +bool BranchFolder::runOnMachineFunction(MachineFunction &MF) { + TII = MF.getTarget().getInstrInfo(); + if (!TII) return false; + + RegInfo = MF.getTarget().getRegisterInfo(); + + // Fix CFG. The later algorithms expect it to be right. + bool EverMadeChange = false; + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; I++) { + MachineBasicBlock *MBB = I, *TBB = 0, *FBB = 0; + SmallVector Cond; + if (!TII->AnalyzeBranch(*MBB, TBB, FBB, Cond, true)) + EverMadeChange |= MBB->CorrectExtraCFGEdges(TBB, FBB, !Cond.empty()); + EverMadeChange |= OptimizeImpDefsBlock(MBB); + } + + RS = RegInfo->requiresRegisterScavenging(MF) ? new RegScavenger() : NULL; + + MMI = getAnalysisIfAvailable(); + + bool MadeChangeThisIteration = true; + while (MadeChangeThisIteration) { + MadeChangeThisIteration = false; + MadeChangeThisIteration |= TailMergeBlocks(MF); + MadeChangeThisIteration |= OptimizeBranches(MF); + EverMadeChange |= MadeChangeThisIteration; + } + + // See if any jump tables have become mergable or dead as the code generator + // did its thing. + MachineJumpTableInfo *JTI = MF.getJumpTableInfo(); + const std::vector &JTs = JTI->getJumpTables(); + if (!JTs.empty()) { + // Figure out how these jump tables should be merged. + std::vector JTMapping; + JTMapping.reserve(JTs.size()); + + // We always keep the 0th jump table. + JTMapping.push_back(0); + + // Scan the jump tables, seeing if there are any duplicates. Note that this + // is N^2, which should be fixed someday. + for (unsigned i = 1, e = JTs.size(); i != e; ++i) + JTMapping.push_back(JTI->getJumpTableIndex(JTs[i].MBBs)); + + // If a jump table was merge with another one, walk the function rewriting + // references to jump tables to reference the new JT ID's. Keep track of + // whether we see a jump table idx, if not, we can delete the JT. + BitVector JTIsLive(JTs.size()); + for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); + BB != E; ++BB) { + for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); + I != E; ++I) + for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) { + MachineOperand &Op = I->getOperand(op); + if (!Op.isJTI()) continue; + unsigned NewIdx = JTMapping[Op.getIndex()]; + Op.setIndex(NewIdx); + + // Remember that this JT is live. + JTIsLive.set(NewIdx); + } + } + + // Finally, remove dead jump tables. This happens either because the + // indirect jump was unreachable (and thus deleted) or because the jump + // table was merged with some other one. + for (unsigned i = 0, e = JTIsLive.size(); i != e; ++i) + if (!JTIsLive.test(i)) { + JTI->RemoveJumpTable(i); + EverMadeChange = true; + } + } + + delete RS; + return EverMadeChange; +} + +//===----------------------------------------------------------------------===// +// Tail Merging of Blocks +//===----------------------------------------------------------------------===// + +/// HashMachineInstr - Compute a hash value for MI and its operands. +static unsigned HashMachineInstr(const MachineInstr *MI) { + unsigned Hash = MI->getOpcode(); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &Op = MI->getOperand(i); + + // Merge in bits from the operand if easy. + unsigned OperandHash = 0; + switch (Op.getType()) { + case MachineOperand::MO_Register: OperandHash = Op.getReg(); break; + case MachineOperand::MO_Immediate: OperandHash = Op.getImm(); break; + case MachineOperand::MO_MachineBasicBlock: + OperandHash = Op.getMBB()->getNumber(); + break; + case MachineOperand::MO_FrameIndex: + case MachineOperand::MO_ConstantPoolIndex: + case MachineOperand::MO_JumpTableIndex: + OperandHash = Op.getIndex(); + break; + case MachineOperand::MO_GlobalAddress: + case MachineOperand::MO_ExternalSymbol: + // Global address / external symbol are too hard, don't bother, but do + // pull in the offset. + OperandHash = Op.getOffset(); + break; + default: break; + } + + Hash += ((OperandHash << 3) | Op.getType()) << (i&31); + } + return Hash; +} + +/// HashEndOfMBB - Hash the last few instructions in the MBB. For blocks +/// with no successors, we hash two instructions, because cross-jumping +/// only saves code when at least two instructions are removed (since a +/// branch must be inserted). For blocks with a successor, one of the +/// two blocks to be tail-merged will end with a branch already, so +/// it gains to cross-jump even for one instruction. + +static unsigned HashEndOfMBB(const MachineBasicBlock *MBB, + unsigned minCommonTailLength) { + MachineBasicBlock::const_iterator I = MBB->end(); + if (I == MBB->begin()) + return 0; // Empty MBB. + + --I; + unsigned Hash = HashMachineInstr(I); + + if (I == MBB->begin() || minCommonTailLength == 1) + return Hash; // Single instr MBB. + + --I; + // Hash in the second-to-last instruction. + Hash ^= HashMachineInstr(I) << 2; + return Hash; +} + +/// ComputeCommonTailLength - Given two machine basic blocks, compute the number +/// of instructions they actually have in common together at their end. Return +/// iterators for the first shared instruction in each block. +static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1, + MachineBasicBlock *MBB2, + MachineBasicBlock::iterator &I1, + MachineBasicBlock::iterator &I2) { + I1 = MBB1->end(); + I2 = MBB2->end(); + + unsigned TailLen = 0; + while (I1 != MBB1->begin() && I2 != MBB2->begin()) { + --I1; --I2; + if (!I1->isIdenticalTo(I2) || + // FIXME: This check is dubious. It's used to get around a problem where + // people incorrectly expect inline asm directives to remain in the same + // relative order. This is untenable because normal compiler + // optimizations (like this one) may reorder and/or merge these + // directives. + I1->getOpcode() == TargetInstrInfo::INLINEASM) { + ++I1; ++I2; + break; + } + ++TailLen; + } + return TailLen; +} + +/// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything +/// after it, replacing it with an unconditional branch to NewDest. This +/// returns true if OldInst's block is modified, false if NewDest is modified. +void BranchFolder::ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst, + MachineBasicBlock *NewDest) { + MachineBasicBlock *OldBB = OldInst->getParent(); + + // Remove all the old successors of OldBB from the CFG. + while (!OldBB->succ_empty()) + OldBB->removeSuccessor(OldBB->succ_begin()); + + // Remove all the dead instructions from the end of OldBB. + OldBB->erase(OldInst, OldBB->end()); + + // If OldBB isn't immediately before OldBB, insert a branch to it. + if (++MachineFunction::iterator(OldBB) != MachineFunction::iterator(NewDest)) + TII->InsertBranch(*OldBB, NewDest, 0, SmallVector()); + OldBB->addSuccessor(NewDest); + ++NumTailMerge; +} + +/// SplitMBBAt - Given a machine basic block and an iterator into it, split the +/// MBB so that the part before the iterator falls into the part starting at the +/// iterator. This returns the new MBB. +MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB, + MachineBasicBlock::iterator BBI1) { + MachineFunction &MF = *CurMBB.getParent(); + + // Create the fall-through block. + MachineFunction::iterator MBBI = &CurMBB; + MachineBasicBlock *NewMBB =MF.CreateMachineBasicBlock(CurMBB.getBasicBlock()); + CurMBB.getParent()->insert(++MBBI, NewMBB); + + // Move all the successors of this block to the specified block. + NewMBB->transferSuccessors(&CurMBB); + + // Add an edge from CurMBB to NewMBB for the fall-through. + CurMBB.addSuccessor(NewMBB); + + // Splice the code over. + NewMBB->splice(NewMBB->end(), &CurMBB, BBI1, CurMBB.end()); + + // For targets that use the register scavenger, we must maintain LiveIns. + if (RS) { + RS->enterBasicBlock(&CurMBB); + if (!CurMBB.empty()) + RS->forward(prior(CurMBB.end())); + BitVector RegsLiveAtExit(RegInfo->getNumRegs()); + RS->getRegsUsed(RegsLiveAtExit, false); + for (unsigned int i=0, e=RegInfo->getNumRegs(); i!=e; i++) + if (RegsLiveAtExit[i]) + NewMBB->addLiveIn(i); + } + + return NewMBB; +} + +/// EstimateRuntime - Make a rough estimate for how long it will take to run +/// the specified code. +static unsigned EstimateRuntime(MachineBasicBlock::iterator I, + MachineBasicBlock::iterator E) { + unsigned Time = 0; + for (; I != E; ++I) { + const TargetInstrDesc &TID = I->getDesc(); + if (TID.isCall()) + Time += 10; + else if (TID.mayLoad() || TID.mayStore()) + Time += 2; + else + ++Time; + } + return Time; +} + +// CurMBB needs to add an unconditional branch to SuccMBB (we removed these +// branches temporarily for tail merging). In the case where CurMBB ends +// with a conditional branch to the next block, optimize by reversing the +// test and conditionally branching to SuccMBB instead. + +static void FixTail(MachineBasicBlock* CurMBB, MachineBasicBlock *SuccBB, + const TargetInstrInfo *TII) { + MachineFunction *MF = CurMBB->getParent(); + MachineFunction::iterator I = next(MachineFunction::iterator(CurMBB)); + MachineBasicBlock *TBB = 0, *FBB = 0; + SmallVector Cond; + if (I != MF->end() && + !TII->AnalyzeBranch(*CurMBB, TBB, FBB, Cond, true)) { + MachineBasicBlock *NextBB = I; + if (TBB == NextBB && !Cond.empty() && !FBB) { + if (!TII->ReverseBranchCondition(Cond)) { + TII->RemoveBranch(*CurMBB); + TII->InsertBranch(*CurMBB, SuccBB, NULL, Cond); + return; + } + } + } + TII->InsertBranch(*CurMBB, SuccBB, NULL, SmallVector()); +} + +static bool MergeCompare(const std::pair &p, + const std::pair &q) { + if (p.first < q.first) + return true; + else if (p.first > q.first) + return false; + else if (p.second->getNumber() < q.second->getNumber()) + return true; + else if (p.second->getNumber() > q.second->getNumber()) + return false; + else { + // _GLIBCXX_DEBUG checks strict weak ordering, which involves comparing + // an object with itself. +#ifndef _GLIBCXX_DEBUG + assert(0 && "Predecessor appears twice"); +#endif + return false; + } +} + +/// ComputeSameTails - Look through all the blocks in MergePotentials that have +/// hash CurHash (guaranteed to match the last element). Build the vector +/// SameTails of all those that have the (same) largest number of instructions +/// in common of any pair of these blocks. SameTails entries contain an +/// iterator into MergePotentials (from which the MachineBasicBlock can be +/// found) and a MachineBasicBlock::iterator into that MBB indicating the +/// instruction where the matching code sequence begins. +/// Order of elements in SameTails is the reverse of the order in which +/// those blocks appear in MergePotentials (where they are not necessarily +/// consecutive). +unsigned BranchFolder::ComputeSameTails(unsigned CurHash, + unsigned minCommonTailLength) { + unsigned maxCommonTailLength = 0U; + SameTails.clear(); + MachineBasicBlock::iterator TrialBBI1, TrialBBI2; + MPIterator HighestMPIter = prior(MergePotentials.end()); + for (MPIterator CurMPIter = prior(MergePotentials.end()), + B = MergePotentials.begin(); + CurMPIter!=B && CurMPIter->first==CurHash; + --CurMPIter) { + for (MPIterator I = prior(CurMPIter); I->first==CurHash ; --I) { + unsigned CommonTailLen = ComputeCommonTailLength( + CurMPIter->second, + I->second, + TrialBBI1, TrialBBI2); + // If we will have to split a block, there should be at least + // minCommonTailLength instructions in common; if not, at worst + // we will be replacing a fallthrough into the common tail with a + // branch, which at worst breaks even with falling through into + // the duplicated common tail, so 1 instruction in common is enough. + // We will always pick a block we do not have to split as the common + // tail if there is one. + // (Empty blocks will get forwarded and need not be considered.) + if (CommonTailLen >= minCommonTailLength || + (CommonTailLen > 0 && + (TrialBBI1==CurMPIter->second->begin() || + TrialBBI2==I->second->begin()))) { + if (CommonTailLen > maxCommonTailLength) { + SameTails.clear(); + maxCommonTailLength = CommonTailLen; + HighestMPIter = CurMPIter; + SameTails.push_back(std::make_pair(CurMPIter, TrialBBI1)); + } + if (HighestMPIter == CurMPIter && + CommonTailLen == maxCommonTailLength) + SameTails.push_back(std::make_pair(I, TrialBBI2)); + } + if (I==B) + break; + } + } + return maxCommonTailLength; +} + +/// RemoveBlocksWithHash - Remove all blocks with hash CurHash from +/// MergePotentials, restoring branches at ends of blocks as appropriate. +void BranchFolder::RemoveBlocksWithHash(unsigned CurHash, + MachineBasicBlock* SuccBB, + MachineBasicBlock* PredBB) { + MPIterator CurMPIter, B; + for (CurMPIter = prior(MergePotentials.end()), B = MergePotentials.begin(); + CurMPIter->first==CurHash; + --CurMPIter) { + // Put the unconditional branch back, if we need one. + MachineBasicBlock *CurMBB = CurMPIter->second; + if (SuccBB && CurMBB != PredBB) + FixTail(CurMBB, SuccBB, TII); + if (CurMPIter==B) + break; + } + if (CurMPIter->first!=CurHash) + CurMPIter++; + MergePotentials.erase(CurMPIter, MergePotentials.end()); +} + +/// CreateCommonTailOnlyBlock - None of the blocks to be tail-merged consist +/// only of the common tail. Create a block that does by splitting one. +unsigned BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, + unsigned maxCommonTailLength) { + unsigned i, commonTailIndex; + unsigned TimeEstimate = ~0U; + for (i=0, commonTailIndex=0; isecond==PredBB) { + commonTailIndex = i; + break; + } + // Otherwise, make a (fairly bogus) choice based on estimate of + // how long it will take the various blocks to execute. + unsigned t = EstimateRuntime(SameTails[i].first->second->begin(), + SameTails[i].second); + if (t<=TimeEstimate) { + TimeEstimate = t; + commonTailIndex = i; + } + } + + MachineBasicBlock::iterator BBI = SameTails[commonTailIndex].second; + MachineBasicBlock *MBB = SameTails[commonTailIndex].first->second; + + DOUT << "\nSplitting " << MBB->getNumber() << ", size " << + maxCommonTailLength; + + MachineBasicBlock *newMBB = SplitMBBAt(*MBB, BBI); + SameTails[commonTailIndex].first->second = newMBB; + SameTails[commonTailIndex].second = newMBB->begin(); + // If we split PredBB, newMBB is the new predecessor. + if (PredBB==MBB) + PredBB = newMBB; + + return commonTailIndex; +} + +// See if any of the blocks in MergePotentials (which all have a common single +// successor, or all have no successor) can be tail-merged. If there is a +// successor, any blocks in MergePotentials that are not tail-merged and +// are not immediately before Succ must have an unconditional branch to +// Succ added (but the predecessor/successor lists need no adjustment). +// The lone predecessor of Succ that falls through into Succ, +// if any, is given in PredBB. + +bool BranchFolder::TryMergeBlocks(MachineBasicBlock *SuccBB, + MachineBasicBlock* PredBB) { + // It doesn't make sense to save a single instruction since tail merging + // will add a jump. + // FIXME: Ask the target to provide the threshold? + unsigned minCommonTailLength = (SuccBB ? 1 : 2) + 1; + MadeChange = false; + + DOUT << "\nTryMergeBlocks " << MergePotentials.size() << '\n'; + + // Sort by hash value so that blocks with identical end sequences sort + // together. + std::stable_sort(MergePotentials.begin(), MergePotentials.end(),MergeCompare); + + // Walk through equivalence sets looking for actual exact matches. + while (MergePotentials.size() > 1) { + unsigned CurHash = prior(MergePotentials.end())->first; + + // Build SameTails, identifying the set of blocks with this hash code + // and with the maximum number of instructions in common. + unsigned maxCommonTailLength = ComputeSameTails(CurHash, + minCommonTailLength); + + // If we didn't find any pair that has at least minCommonTailLength + // instructions in common, remove all blocks with this hash code and retry. + if (SameTails.empty()) { + RemoveBlocksWithHash(CurHash, SuccBB, PredBB); + continue; + } + + // If one of the blocks is the entire common tail (and not the entry + // block, which we can't jump to), we can treat all blocks with this same + // tail at once. Use PredBB if that is one of the possibilities, as that + // will not introduce any extra branches. + MachineBasicBlock *EntryBB = MergePotentials.begin()->second-> + getParent()->begin(); + unsigned int commonTailIndex, i; + for (commonTailIndex=SameTails.size(), i=0; isecond; + if (MBB->begin() == SameTails[i].second && MBB != EntryBB) { + commonTailIndex = i; + if (MBB==PredBB) + break; + } + } + + if (commonTailIndex==SameTails.size()) { + // None of the blocks consist entirely of the common tail. + // Split a block so that one does. + commonTailIndex = CreateCommonTailOnlyBlock(PredBB, maxCommonTailLength); + } + + MachineBasicBlock *MBB = SameTails[commonTailIndex].first->second; + // MBB is common tail. Adjust all other BB's to jump to this one. + // Traversal must be forwards so erases work. + DOUT << "\nUsing common tail " << MBB->getNumber() << " for "; + for (unsigned int i=0; isecond->getNumber() << ","; + // Hack the end off BB i, making it jump to BB commonTailIndex instead. + ReplaceTailWithBranchTo(SameTails[i].second, MBB); + // BB i is no longer a predecessor of SuccBB; remove it from the worklist. + MergePotentials.erase(SameTails[i].first); + } + DOUT << "\n"; + // We leave commonTailIndex in the worklist in case there are other blocks + // that match it with a smaller number of instructions. + MadeChange = true; + } + return MadeChange; +} + +bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { + + if (!EnableTailMerge) return false; + + MadeChange = false; + + // First find blocks with no successors. + MergePotentials.clear(); + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { + if (I->succ_empty()) + MergePotentials.push_back(std::make_pair(HashEndOfMBB(I, 2U), I)); + } + // See if we can do any tail merging on those. + if (MergePotentials.size() < TailMergeThreshold && + MergePotentials.size() >= 2) + MadeChange |= TryMergeBlocks(NULL, NULL); + + // Look at blocks (IBB) with multiple predecessors (PBB). + // We change each predecessor to a canonical form, by + // (1) temporarily removing any unconditional branch from the predecessor + // to IBB, and + // (2) alter conditional branches so they branch to the other block + // not IBB; this may require adding back an unconditional branch to IBB + // later, where there wasn't one coming in. E.g. + // Bcc IBB + // fallthrough to QBB + // here becomes + // Bncc QBB + // with a conceptual B to IBB after that, which never actually exists. + // With those changes, we see whether the predecessors' tails match, + // and merge them if so. We change things out of canonical form and + // back to the way they were later in the process. (OptimizeBranches + // would undo some of this, but we can't use it, because we'd get into + // a compile-time infinite loop repeatedly doing and undoing the same + // transformations.) + + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { + if (I->pred_size() >= 2 && I->pred_size() < TailMergeThreshold) { + MachineBasicBlock *IBB = I; + MachineBasicBlock *PredBB = prior(I); + MergePotentials.clear(); + for (MachineBasicBlock::pred_iterator P = I->pred_begin(), + E2 = I->pred_end(); + P != E2; ++P) { + MachineBasicBlock* PBB = *P; + // Skip blocks that loop to themselves, can't tail merge these. + if (PBB==IBB) + continue; + MachineBasicBlock *TBB = 0, *FBB = 0; + SmallVector Cond; + if (!TII->AnalyzeBranch(*PBB, TBB, FBB, Cond, true)) { + // Failing case: IBB is the target of a cbr, and + // we cannot reverse the branch. + SmallVector NewCond(Cond); + if (!Cond.empty() && TBB==IBB) { + if (TII->ReverseBranchCondition(NewCond)) + continue; + // This is the QBB case described above + if (!FBB) + FBB = next(MachineFunction::iterator(PBB)); + } + // Failing case: the only way IBB can be reached from PBB is via + // exception handling. Happens for landing pads. Would be nice + // to have a bit in the edge so we didn't have to do all this. + if (IBB->isLandingPad()) { + MachineFunction::iterator IP = PBB; IP++; + MachineBasicBlock* PredNextBB = NULL; + if (IP!=MF.end()) + PredNextBB = IP; + if (TBB==NULL) { + if (IBB!=PredNextBB) // fallthrough + continue; + } else if (FBB) { + if (TBB!=IBB && FBB!=IBB) // cbr then ubr + continue; + } else if (Cond.empty()) { + if (TBB!=IBB) // ubr + continue; + } else { + if (TBB!=IBB && IBB!=PredNextBB) // cbr + continue; + } + } + // Remove the unconditional branch at the end, if any. + if (TBB && (Cond.empty() || FBB)) { + TII->RemoveBranch(*PBB); + if (!Cond.empty()) + // reinsert conditional branch only, for now + TII->InsertBranch(*PBB, (TBB==IBB) ? FBB : TBB, 0, NewCond); + } + MergePotentials.push_back(std::make_pair(HashEndOfMBB(PBB, 1U), *P)); + } + } + if (MergePotentials.size() >= 2) + MadeChange |= TryMergeBlocks(I, PredBB); + // Reinsert an unconditional branch if needed. + // The 1 below can occur as a result of removing blocks in TryMergeBlocks. + PredBB = prior(I); // this may have been changed in TryMergeBlocks + if (MergePotentials.size()==1 && + MergePotentials.begin()->second != PredBB) + FixTail(MergePotentials.begin()->second, I, TII); + } + } + return MadeChange; +} + +//===----------------------------------------------------------------------===// +// Branch Optimization +//===----------------------------------------------------------------------===// + +bool BranchFolder::OptimizeBranches(MachineFunction &MF) { + MadeChange = false; + + // Make sure blocks are numbered in order + MF.RenumberBlocks(); + + for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ) { + MachineBasicBlock *MBB = I++; + OptimizeBlock(MBB); + + // If it is dead, remove it. + if (MBB->pred_empty()) { + RemoveDeadBlock(MBB); + MadeChange = true; + ++NumDeadBlocks; + } + } + return MadeChange; +} + + +/// CanFallThrough - Return true if the specified block (with the specified +/// branch condition) can implicitly transfer control to the block after it by +/// falling off the end of it. This should return false if it can reach the +/// block after it, but it uses an explicit branch to do so (e.g. a table jump). +/// +/// True is a conservative answer. +/// +bool BranchFolder::CanFallThrough(MachineBasicBlock *CurBB, + bool BranchUnAnalyzable, + MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + const SmallVectorImpl &Cond) { + MachineFunction::iterator Fallthrough = CurBB; + ++Fallthrough; + // If FallthroughBlock is off the end of the function, it can't fall through. + if (Fallthrough == CurBB->getParent()->end()) + return false; + + // If FallthroughBlock isn't a successor of CurBB, no fallthrough is possible. + if (!CurBB->isSuccessor(Fallthrough)) + return false; + + // If we couldn't analyze the branch, assume it could fall through. + if (BranchUnAnalyzable) return true; + + // If there is no branch, control always falls through. + if (TBB == 0) return true; + + // If there is some explicit branch to the fallthrough block, it can obviously + // reach, even though the branch should get folded to fall through implicitly. + if (MachineFunction::iterator(TBB) == Fallthrough || + MachineFunction::iterator(FBB) == Fallthrough) + return true; + + // If it's an unconditional branch to some block not the fall through, it + // doesn't fall through. + if (Cond.empty()) return false; + + // Otherwise, if it is conditional and has no explicit false block, it falls + // through. + return FBB == 0; +} + +/// CanFallThrough - Return true if the specified can implicitly transfer +/// control to the block after it by falling off the end of it. This should +/// return false if it can reach the block after it, but it uses an explicit +/// branch to do so (e.g. a table jump). +/// +/// True is a conservative answer. +/// +bool BranchFolder::CanFallThrough(MachineBasicBlock *CurBB) { + MachineBasicBlock *TBB = 0, *FBB = 0; + SmallVector Cond; + bool CurUnAnalyzable = TII->AnalyzeBranch(*CurBB, TBB, FBB, Cond, true); + return CanFallThrough(CurBB, CurUnAnalyzable, TBB, FBB, Cond); +} + +/// IsBetterFallthrough - Return true if it would be clearly better to +/// fall-through to MBB1 than to fall through into MBB2. This has to return +/// a strict ordering, returning true for both (MBB1,MBB2) and (MBB2,MBB1) will +/// result in infinite loops. +static bool IsBetterFallthrough(MachineBasicBlock *MBB1, + MachineBasicBlock *MBB2) { + // Right now, we use a simple heuristic. If MBB2 ends with a call, and + // MBB1 doesn't, we prefer to fall through into MBB1. This allows us to + // optimize branches that branch to either a return block or an assert block + // into a fallthrough to the return. + if (MBB1->empty() || MBB2->empty()) return false; + + // If there is a clear successor ordering we make sure that one block + // will fall through to the next + if (MBB1->isSuccessor(MBB2)) return true; + if (MBB2->isSuccessor(MBB1)) return false; + + MachineInstr *MBB1I = --MBB1->end(); + MachineInstr *MBB2I = --MBB2->end(); + return MBB2I->getDesc().isCall() && !MBB1I->getDesc().isCall(); +} + +/// OptimizeBlock - Analyze and optimize control flow related to the specified +/// block. This is never called on the entry block. +void BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { + MachineFunction::iterator FallThrough = MBB; + ++FallThrough; + + // If this block is empty, make everyone use its fall-through, not the block + // explicitly. Landing pads should not do this since the landing-pad table + // points to this block. + if (MBB->empty() && !MBB->isLandingPad()) { + // Dead block? Leave for cleanup later. + if (MBB->pred_empty()) return; + + if (FallThrough == MBB->getParent()->end()) { + // TODO: Simplify preds to not branch here if possible! + } else { + // Rewrite all predecessors of the old block to go to the fallthrough + // instead. + while (!MBB->pred_empty()) { + MachineBasicBlock *Pred = *(MBB->pred_end()-1); + Pred->ReplaceUsesOfBlockWith(MBB, FallThrough); + } + + // If MBB was the target of a jump table, update jump tables to go to the + // fallthrough instead. + MBB->getParent()->getJumpTableInfo()-> + ReplaceMBBInJumpTables(MBB, FallThrough); + MadeChange = true; + } + return; + } + + // Check to see if we can simplify the terminator of the block before this + // one. + MachineBasicBlock &PrevBB = *prior(MachineFunction::iterator(MBB)); + + MachineBasicBlock *PriorTBB = 0, *PriorFBB = 0; + SmallVector PriorCond; + bool PriorUnAnalyzable = + TII->AnalyzeBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, true); + if (!PriorUnAnalyzable) { + // If the CFG for the prior block has extra edges, remove them. + MadeChange |= PrevBB.CorrectExtraCFGEdges(PriorTBB, PriorFBB, + !PriorCond.empty()); + + // If the previous branch is conditional and both conditions go to the same + // destination, remove the branch, replacing it with an unconditional one or + // a fall-through. + if (PriorTBB && PriorTBB == PriorFBB) { + TII->RemoveBranch(PrevBB); + PriorCond.clear(); + if (PriorTBB != MBB) + TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond); + MadeChange = true; + ++NumBranchOpts; + return OptimizeBlock(MBB); + } + + // If the previous branch *only* branches to *this* block (conditional or + // not) remove the branch. + if (PriorTBB == MBB && PriorFBB == 0) { + TII->RemoveBranch(PrevBB); + MadeChange = true; + ++NumBranchOpts; + return OptimizeBlock(MBB); + } + + // If the prior block branches somewhere else on the condition and here if + // the condition is false, remove the uncond second branch. + if (PriorFBB == MBB) { + TII->RemoveBranch(PrevBB); + TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond); + MadeChange = true; + ++NumBranchOpts; + return OptimizeBlock(MBB); + } + + // If the prior block branches here on true and somewhere else on false, and + // if the branch condition is reversible, reverse the branch to create a + // fall-through. + if (PriorTBB == MBB) { + SmallVector NewPriorCond(PriorCond); + if (!TII->ReverseBranchCondition(NewPriorCond)) { + TII->RemoveBranch(PrevBB); + TII->InsertBranch(PrevBB, PriorFBB, 0, NewPriorCond); + MadeChange = true; + ++NumBranchOpts; + return OptimizeBlock(MBB); + } + } + + // If this block doesn't fall through (e.g. it ends with an uncond branch or + // has no successors) and if the pred falls through into this block, and if + // it would otherwise fall through into the block after this, move this + // block to the end of the function. + // + // We consider it more likely that execution will stay in the function (e.g. + // due to loops) than it is to exit it. This asserts in loops etc, moving + // the assert condition out of the loop body. + if (!PriorCond.empty() && PriorFBB == 0 && + MachineFunction::iterator(PriorTBB) == FallThrough && + !CanFallThrough(MBB)) { + bool DoTransform = true; + + // We have to be careful that the succs of PredBB aren't both no-successor + // blocks. If neither have successors and if PredBB is the second from + // last block in the function, we'd just keep swapping the two blocks for + // last. Only do the swap if one is clearly better to fall through than + // the other. + if (FallThrough == --MBB->getParent()->end() && + !IsBetterFallthrough(PriorTBB, MBB)) + DoTransform = false; + + // We don't want to do this transformation if we have control flow like: + // br cond BB2 + // BB1: + // .. + // jmp BBX + // BB2: + // .. + // ret + // + // In this case, we could actually be moving the return block *into* a + // loop! + if (DoTransform && !MBB->succ_empty() && + (!CanFallThrough(PriorTBB) || PriorTBB->empty())) + DoTransform = false; + + + if (DoTransform) { + // Reverse the branch so we will fall through on the previous true cond. + SmallVector NewPriorCond(PriorCond); + if (!TII->ReverseBranchCondition(NewPriorCond)) { + DOUT << "\nMoving MBB: " << *MBB; + DOUT << "To make fallthrough to: " << *PriorTBB << "\n"; + + TII->RemoveBranch(PrevBB); + TII->InsertBranch(PrevBB, MBB, 0, NewPriorCond); + + // Move this block to the end of the function. + MBB->moveAfter(--MBB->getParent()->end()); + MadeChange = true; + ++NumBranchOpts; + return; + } + } + } + } + + // Analyze the branch in the current block. + MachineBasicBlock *CurTBB = 0, *CurFBB = 0; + SmallVector CurCond; + bool CurUnAnalyzable= TII->AnalyzeBranch(*MBB, CurTBB, CurFBB, CurCond, true); + if (!CurUnAnalyzable) { + // If the CFG for the prior block has extra edges, remove them. + MadeChange |= MBB->CorrectExtraCFGEdges(CurTBB, CurFBB, !CurCond.empty()); + + // If this is a two-way branch, and the FBB branches to this block, reverse + // the condition so the single-basic-block loop is faster. Instead of: + // Loop: xxx; jcc Out; jmp Loop + // we want: + // Loop: xxx; jncc Loop; jmp Out + if (CurTBB && CurFBB && CurFBB == MBB && CurTBB != MBB) { + SmallVector NewCond(CurCond); + if (!TII->ReverseBranchCondition(NewCond)) { + TII->RemoveBranch(*MBB); + TII->InsertBranch(*MBB, CurFBB, CurTBB, NewCond); + MadeChange = true; + ++NumBranchOpts; + return OptimizeBlock(MBB); + } + } + + + // If this branch is the only thing in its block, see if we can forward + // other blocks across it. + if (CurTBB && CurCond.empty() && CurFBB == 0 && + MBB->begin()->getDesc().isBranch() && CurTBB != MBB) { + // This block may contain just an unconditional branch. Because there can + // be 'non-branch terminators' in the block, try removing the branch and + // then seeing if the block is empty. + TII->RemoveBranch(*MBB); + + // If this block is just an unconditional branch to CurTBB, we can + // usually completely eliminate the block. The only case we cannot + // completely eliminate the block is when the block before this one + // falls through into MBB and we can't understand the prior block's branch + // condition. + if (MBB->empty()) { + bool PredHasNoFallThrough = TII->BlockHasNoFallThrough(PrevBB); + if (PredHasNoFallThrough || !PriorUnAnalyzable || + !PrevBB.isSuccessor(MBB)) { + // If the prior block falls through into us, turn it into an + // explicit branch to us to make updates simpler. + if (!PredHasNoFallThrough && PrevBB.isSuccessor(MBB) && + PriorTBB != MBB && PriorFBB != MBB) { + if (PriorTBB == 0) { + assert(PriorCond.empty() && PriorFBB == 0 && + "Bad branch analysis"); + PriorTBB = MBB; + } else { + assert(PriorFBB == 0 && "Machine CFG out of date!"); + PriorFBB = MBB; + } + TII->RemoveBranch(PrevBB); + TII->InsertBranch(PrevBB, PriorTBB, PriorFBB, PriorCond); + } + + // Iterate through all the predecessors, revectoring each in-turn. + size_t PI = 0; + bool DidChange = false; + bool HasBranchToSelf = false; + while(PI != MBB->pred_size()) { + MachineBasicBlock *PMBB = *(MBB->pred_begin() + PI); + if (PMBB == MBB) { + // If this block has an uncond branch to itself, leave it. + ++PI; + HasBranchToSelf = true; + } else { + DidChange = true; + PMBB->ReplaceUsesOfBlockWith(MBB, CurTBB); + // If this change resulted in PMBB ending in a conditional + // branch where both conditions go to the same destination, + // change this to an unconditional branch (and fix the CFG). + MachineBasicBlock *NewCurTBB = 0, *NewCurFBB = 0; + SmallVector NewCurCond; + bool NewCurUnAnalyzable = TII->AnalyzeBranch(*PMBB, NewCurTBB, + NewCurFBB, NewCurCond, true); + if (!NewCurUnAnalyzable && NewCurTBB && NewCurTBB == NewCurFBB) { + TII->RemoveBranch(*PMBB); + NewCurCond.clear(); + TII->InsertBranch(*PMBB, NewCurTBB, 0, NewCurCond); + MadeChange = true; + ++NumBranchOpts; + PMBB->CorrectExtraCFGEdges(NewCurTBB, NewCurFBB, false); + } + } + } + + // Change any jumptables to go to the new MBB. + MBB->getParent()->getJumpTableInfo()-> + ReplaceMBBInJumpTables(MBB, CurTBB); + if (DidChange) { + ++NumBranchOpts; + MadeChange = true; + if (!HasBranchToSelf) return; + } + } + } + + // Add the branch back if the block is more than just an uncond branch. + TII->InsertBranch(*MBB, CurTBB, 0, CurCond); + } + } + + // If the prior block doesn't fall through into this block, and if this + // block doesn't fall through into some other block, see if we can find a + // place to move this block where a fall-through will happen. + if (!CanFallThrough(&PrevBB, PriorUnAnalyzable, + PriorTBB, PriorFBB, PriorCond)) { + // Now we know that there was no fall-through into this block, check to + // see if it has a fall-through into its successor. + bool CurFallsThru = CanFallThrough(MBB, CurUnAnalyzable, CurTBB, CurFBB, + CurCond); + + if (!MBB->isLandingPad()) { + // Check all the predecessors of this block. If one of them has no fall + // throughs, move this block right after it. + for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), + E = MBB->pred_end(); PI != E; ++PI) { + // Analyze the branch at the end of the pred. + MachineBasicBlock *PredBB = *PI; + MachineFunction::iterator PredFallthrough = PredBB; ++PredFallthrough; + if (PredBB != MBB && !CanFallThrough(PredBB) + && (!CurFallsThru || !CurTBB || !CurFBB) + && (!CurFallsThru || MBB->getNumber() >= PredBB->getNumber())) { + // If the current block doesn't fall through, just move it. + // If the current block can fall through and does not end with a + // conditional branch, we need to append an unconditional jump to + // the (current) next block. To avoid a possible compile-time + // infinite loop, move blocks only backward in this case. + // Also, if there are already 2 branches here, we cannot add a third; + // this means we have the case + // Bcc next + // B elsewhere + // next: + if (CurFallsThru) { + MachineBasicBlock *NextBB = next(MachineFunction::iterator(MBB)); + CurCond.clear(); + TII->InsertBranch(*MBB, NextBB, 0, CurCond); + } + MBB->moveAfter(PredBB); + MadeChange = true; + return OptimizeBlock(MBB); + } + } + } + + if (!CurFallsThru) { + // Check all successors to see if we can move this block before it. + for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), + E = MBB->succ_end(); SI != E; ++SI) { + // Analyze the branch at the end of the block before the succ. + MachineBasicBlock *SuccBB = *SI; + MachineFunction::iterator SuccPrev = SuccBB; --SuccPrev; + std::vector SuccPrevCond; + + // If this block doesn't already fall-through to that successor, and if + // the succ doesn't already have a block that can fall through into it, + // and if the successor isn't an EH destination, we can arrange for the + // fallthrough to happen. + if (SuccBB != MBB && !CanFallThrough(SuccPrev) && + !SuccBB->isLandingPad()) { + MBB->moveBefore(SuccBB); + MadeChange = true; + return OptimizeBlock(MBB); + } + } + + // Okay, there is no really great place to put this block. If, however, + // the block before this one would be a fall-through if this block were + // removed, move this block to the end of the function. + if (FallThrough != MBB->getParent()->end() && + PrevBB.isSuccessor(FallThrough)) { + MBB->moveAfter(--MBB->getParent()->end()); + MadeChange = true; + return; + } + } + } +} diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt new file mode 100644 index 000000000000..ca4b31c63774 --- /dev/null +++ b/lib/CodeGen/CMakeLists.txt @@ -0,0 +1,62 @@ +add_llvm_library(LLVMCodeGen + BranchFolding.cpp + CodePlacementOpt.cpp + DeadMachineInstructionElim.cpp + DwarfEHPrepare.cpp + ELFWriter.cpp + GCMetadata.cpp + GCMetadataPrinter.cpp + GCStrategy.cpp + IfConversion.cpp + IntrinsicLowering.cpp + LLVMTargetMachine.cpp + LatencyPriorityQueue.cpp + LiveInterval.cpp + LiveIntervalAnalysis.cpp + LiveStackAnalysis.cpp + LiveVariables.cpp + LowerSubregs.cpp + MachOWriter.cpp + MachineBasicBlock.cpp + MachineDominators.cpp + MachineFunction.cpp + MachineInstr.cpp + MachineLICM.cpp + MachineLoopInfo.cpp + MachineModuleInfo.cpp + MachinePassRegistry.cpp + MachineRegisterInfo.cpp + MachineSink.cpp + MachineVerifier.cpp + OcamlGC.cpp + PBQP.cpp + PHIElimination.cpp + Passes.cpp + PostRASchedulerList.cpp + PreAllocSplitting.cpp + PrologEpilogInserter.cpp + PseudoSourceValue.cpp + RegAllocBigBlock.cpp + RegAllocLinearScan.cpp + RegAllocLocal.cpp + RegAllocPBQP.cpp + RegAllocSimple.cpp + RegisterCoalescer.cpp + RegisterScavenging.cpp + ScheduleDAG.cpp + ScheduleDAGEmit.cpp + ScheduleDAGInstrs.cpp + ScheduleDAGPrinter.cpp + ShadowStackGC.cpp + ShrinkWrapping.cpp + SimpleRegisterCoalescing.cpp + Spiller.cpp + StackProtector.cpp + StackSlotColoring.cpp + StrongPHIElimination.cpp + TargetInstrInfoImpl.cpp + TwoAddressInstructionPass.cpp + UnreachableBlockElim.cpp + VirtRegMap.cpp + VirtRegRewriter.cpp + ) diff --git a/lib/CodeGen/CodePlacementOpt.cpp b/lib/CodeGen/CodePlacementOpt.cpp new file mode 100644 index 000000000000..383098e11efd --- /dev/null +++ b/lib/CodeGen/CodePlacementOpt.cpp @@ -0,0 +1,358 @@ +//===-- CodePlacementOpt.cpp - Code Placement pass. -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the pass that optimize code placement and align loop +// headers to target specific alignment boundary. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "code-placement" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/Statistic.h" +using namespace llvm; + +STATISTIC(NumHeaderAligned, "Number of loop header aligned"); +STATISTIC(NumIntraElim, "Number of intra loop branches eliminated"); +STATISTIC(NumIntraMoved, "Number of intra loop branches moved"); + +namespace { + class CodePlacementOpt : public MachineFunctionPass { + const MachineLoopInfo *MLI; + const TargetInstrInfo *TII; + const TargetLowering *TLI; + + /// ChangedMBBs - BBs which are modified by OptimizeIntraLoopEdges. + SmallPtrSet ChangedMBBs; + + /// UncondJmpMBBs - A list of BBs which are in loops and end with + /// unconditional branches. + SmallVector, 4> + UncondJmpMBBs; + + /// LoopHeaders - A list of BBs which are loop headers. + SmallVector LoopHeaders; + + public: + static char ID; + CodePlacementOpt() : MachineFunctionPass(&ID) {} + + virtual bool runOnMachineFunction(MachineFunction &MF); + virtual const char *getPassName() const { + return "Code Placement Optimizater"; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + AU.addPreservedID(MachineDominatorsID); + MachineFunctionPass::getAnalysisUsage(AU); + } + + private: + bool OptimizeIntraLoopEdges(); + bool HeaderShouldBeAligned(MachineBasicBlock *MBB, MachineLoop *L, + SmallPtrSet &DoNotAlign); + bool AlignLoops(MachineFunction &MF); + }; + + char CodePlacementOpt::ID = 0; +} // end anonymous namespace + +FunctionPass *llvm::createCodePlacementOptPass() { + return new CodePlacementOpt(); +} + +/// OptimizeBackEdges - Place loop back edges to move unconditional branches +/// out of the loop. +/// +/// A: +/// ... +/// +/// +/// B: --> loop header +/// ... +/// jcc C, [exit] +/// +/// C: +/// ... +/// jmp B +/// +/// ==> +/// +/// A: +/// ... +/// jmp B +/// +/// C: --> new loop header +/// ... +/// +/// +/// B: +/// ... +/// jcc C, [exit] +/// +bool CodePlacementOpt::OptimizeIntraLoopEdges() { + if (!TLI->shouldOptimizeCodePlacement()) + return false; + + bool Changed = false; + for (unsigned i = 0, e = UncondJmpMBBs.size(); i != e; ++i) { + MachineBasicBlock *MBB = UncondJmpMBBs[i].first; + MachineBasicBlock *SuccMBB = UncondJmpMBBs[i].second; + MachineLoop *L = MLI->getLoopFor(MBB); + assert(L && "BB is expected to be in a loop!"); + + if (ChangedMBBs.count(MBB)) { + // BB has been modified, re-analyze. + MachineBasicBlock *TBB = 0, *FBB = 0; + SmallVector Cond; + if (TII->AnalyzeBranch(*MBB, TBB, FBB, Cond) || !Cond.empty()) + continue; + if (MLI->getLoopFor(TBB) != L || TBB->isLandingPad()) + continue; + SuccMBB = TBB; + } else { + assert(MLI->getLoopFor(SuccMBB) == L && + "Successor is not in the same loop!"); + } + + if (MBB->isLayoutSuccessor(SuccMBB)) { + // Successor is right after MBB, just eliminate the unconditional jmp. + // Can this happen? + TII->RemoveBranch(*MBB); + ChangedMBBs.insert(MBB); + ++NumIntraElim; + Changed = true; + continue; + } + + // Now check if the predecessor is fallthrough from any BB. If there is, + // that BB should be from outside the loop since edge will become a jmp. + bool OkToMove = true; + MachineBasicBlock *FtMBB = 0, *FtTBB = 0, *FtFBB = 0; + SmallVector FtCond; + for (MachineBasicBlock::pred_iterator PI = SuccMBB->pred_begin(), + PE = SuccMBB->pred_end(); PI != PE; ++PI) { + MachineBasicBlock *PredMBB = *PI; + if (PredMBB->isLayoutSuccessor(SuccMBB)) { + if (TII->AnalyzeBranch(*PredMBB, FtTBB, FtFBB, FtCond)) { + OkToMove = false; + break; + } + if (!FtTBB) + FtTBB = SuccMBB; + else if (!FtFBB) { + assert(FtFBB != SuccMBB && "Unexpected control flow!"); + FtFBB = SuccMBB; + } + + // A fallthrough. + FtMBB = PredMBB; + MachineLoop *PL = MLI->getLoopFor(PredMBB); + if (PL && (PL == L || PL->getLoopDepth() >= L->getLoopDepth())) + OkToMove = false; + + break; + } + } + + if (!OkToMove) + continue; + + // Is it profitable? If SuccMBB can fallthrough itself, that can be changed + // into a jmp. + MachineBasicBlock *TBB = 0, *FBB = 0; + SmallVector Cond; + if (TII->AnalyzeBranch(*SuccMBB, TBB, FBB, Cond)) + continue; + if (!TBB && Cond.empty()) + TBB = next(MachineFunction::iterator(SuccMBB)); + else if (!FBB && !Cond.empty()) + FBB = next(MachineFunction::iterator(SuccMBB)); + + // This calculate the cost of the transformation. Also, it finds the *only* + // intra-loop edge if there is one. + int Cost = 0; + bool HasOneIntraSucc = true; + MachineBasicBlock *IntraSucc = 0; + for (MachineBasicBlock::succ_iterator SI = SuccMBB->succ_begin(), + SE = SuccMBB->succ_end(); SI != SE; ++SI) { + MachineBasicBlock *SSMBB = *SI; + if (MLI->getLoopFor(SSMBB) == L) { + if (!IntraSucc) + IntraSucc = SSMBB; + else + HasOneIntraSucc = false; + } + + if (SuccMBB->isLayoutSuccessor(SSMBB)) + // This will become a jmp. + ++Cost; + else if (MBB->isLayoutSuccessor(SSMBB)) { + // One of the successor will become the new fallthrough. + if (SSMBB == FBB) { + FBB = 0; + --Cost; + } else if (!FBB && SSMBB == TBB && Cond.empty()) { + TBB = 0; + --Cost; + } else if (!Cond.empty() && !TII->ReverseBranchCondition(Cond)) { + assert(SSMBB == TBB); + TBB = FBB; + FBB = 0; + --Cost; + } + } + } + if (Cost) + continue; + + // Now, let's move the successor to below the BB to eliminate the jmp. + SuccMBB->moveAfter(MBB); + TII->RemoveBranch(*MBB); + TII->RemoveBranch(*SuccMBB); + if (TBB) + TII->InsertBranch(*SuccMBB, TBB, FBB, Cond); + ChangedMBBs.insert(MBB); + ChangedMBBs.insert(SuccMBB); + if (FtMBB) { + TII->RemoveBranch(*FtMBB); + TII->InsertBranch(*FtMBB, FtTBB, FtFBB, FtCond); + ChangedMBBs.insert(FtMBB); + } + Changed = true; + + // If BB is the loop latch, we may have a new loop headr. + if (MBB == L->getLoopLatch()) { + assert(MLI->isLoopHeader(SuccMBB) && + "Only succ of loop latch is not the header?"); + if (HasOneIntraSucc && IntraSucc) + std::replace(LoopHeaders.begin(),LoopHeaders.end(), SuccMBB, IntraSucc); + } + } + + ++NumIntraMoved; + return Changed; +} + +/// HeaderShouldBeAligned - Return true if the specified loop header block +/// should be aligned. For now, we will not align it if all the predcessors +/// (i.e. loop back edges) are laid out above the header. FIXME: Do not +/// align small loops. +bool +CodePlacementOpt::HeaderShouldBeAligned(MachineBasicBlock *MBB, MachineLoop *L, + SmallPtrSet &DoNotAlign) { + if (DoNotAlign.count(MBB)) + return false; + + bool BackEdgeBelow = false; + for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), + PE = MBB->pred_end(); PI != PE; ++PI) { + MachineBasicBlock *PredMBB = *PI; + if (PredMBB == MBB || PredMBB->getNumber() > MBB->getNumber()) { + BackEdgeBelow = true; + break; + } + } + + if (!BackEdgeBelow) + return false; + + // Ok, we are going to align this loop header. If it's an inner loop, + // do not align its outer loop. + MachineBasicBlock *PreHeader = L->getLoopPreheader(); + if (PreHeader) { + MachineLoop *L = MLI->getLoopFor(PreHeader); + if (L) { + MachineBasicBlock *HeaderBlock = L->getHeader(); + HeaderBlock->setAlignment(0); + DoNotAlign.insert(HeaderBlock); + } + } + return true; +} + +/// AlignLoops - Align loop headers to target preferred alignments. +/// +bool CodePlacementOpt::AlignLoops(MachineFunction &MF) { + const Function *F = MF.getFunction(); + if (F->hasFnAttr(Attribute::OptimizeForSize)) + return false; + + unsigned Align = TLI->getPrefLoopAlignment(); + if (!Align) + return false; // Don't care about loop alignment. + + // Make sure blocks are numbered in order + MF.RenumberBlocks(); + + bool Changed = false; + SmallPtrSet DoNotAlign; + for (unsigned i = 0, e = LoopHeaders.size(); i != e; ++i) { + MachineBasicBlock *HeaderMBB = LoopHeaders[i]; + MachineBasicBlock *PredMBB = prior(MachineFunction::iterator(HeaderMBB)); + MachineLoop *L = MLI->getLoopFor(HeaderMBB); + if (L == MLI->getLoopFor(PredMBB)) + // If previously BB is in the same loop, don't align this BB. We want + // to prevent adding noop's inside a loop. + continue; + if (HeaderShouldBeAligned(HeaderMBB, L, DoNotAlign)) { + HeaderMBB->setAlignment(Align); + Changed = true; + ++NumHeaderAligned; + } + } + + return Changed; +} + +bool CodePlacementOpt::runOnMachineFunction(MachineFunction &MF) { + MLI = &getAnalysis(); + if (MLI->empty()) + return false; // No loops. + + TLI = MF.getTarget().getTargetLowering(); + TII = MF.getTarget().getInstrInfo(); + + // Analyze the BBs first and keep track of loop headers and BBs that + // end with an unconditional jmp to another block in the same loop. + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { + MachineBasicBlock *MBB = I; + if (MBB->isLandingPad()) + continue; + MachineLoop *L = MLI->getLoopFor(MBB); + if (!L) + continue; + if (MLI->isLoopHeader(MBB)) + LoopHeaders.push_back(MBB); + + MachineBasicBlock *TBB = 0, *FBB = 0; + SmallVector Cond; + if (TII->AnalyzeBranch(*MBB, TBB, FBB, Cond) || !Cond.empty()) + continue; + if (MLI->getLoopFor(TBB) == L && !TBB->isLandingPad()) + UncondJmpMBBs.push_back(std::make_pair(MBB, TBB)); + } + + bool Changed = OptimizeIntraLoopEdges(); + + Changed |= AlignLoops(MF); + + ChangedMBBs.clear(); + UncondJmpMBBs.clear(); + LoopHeaders.clear(); + + return Changed; +} diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp new file mode 100644 index 000000000000..4832a5ee9ae0 --- /dev/null +++ b/lib/CodeGen/DeadMachineInstructionElim.cpp @@ -0,0 +1,161 @@ +//===- DeadMachineInstructionElim.cpp - Remove dead machine instructions --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is an extremely simple MachineInstr-level dead-code-elimination pass. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/Passes.h" +#include "llvm/Pass.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +using namespace llvm; + +namespace { + class VISIBILITY_HIDDEN DeadMachineInstructionElim : + public MachineFunctionPass { + virtual bool runOnMachineFunction(MachineFunction &MF); + + const TargetRegisterInfo *TRI; + const MachineRegisterInfo *MRI; + const TargetInstrInfo *TII; + BitVector LivePhysRegs; + + public: + static char ID; // Pass identification, replacement for typeid + DeadMachineInstructionElim() : MachineFunctionPass(&ID) {} + + private: + bool isDead(MachineInstr *MI) const; + }; +} +char DeadMachineInstructionElim::ID = 0; + +static RegisterPass +Y("dead-mi-elimination", + "Remove dead machine instructions"); + +FunctionPass *llvm::createDeadMachineInstructionElimPass() { + return new DeadMachineInstructionElim(); +} + +bool DeadMachineInstructionElim::isDead(MachineInstr *MI) const { + // Don't delete instructions with side effects. + bool SawStore = false; + if (!MI->isSafeToMove(TII, SawStore)) + return false; + + // Examine each operand. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.isDef()) { + unsigned Reg = MO.getReg(); + if (TargetRegisterInfo::isPhysicalRegister(Reg) ? + LivePhysRegs[Reg] : !MRI->use_empty(Reg)) { + // This def has a use. Don't delete the instruction! + return false; + } + } + } + + // If there are no defs with uses, the instruction is dead. + return true; +} + +bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) { + bool AnyChanges = false; + MRI = &MF.getRegInfo(); + TRI = MF.getTarget().getRegisterInfo(); + TII = MF.getTarget().getInstrInfo(); + + // Compute a bitvector to represent all non-allocatable physregs. + BitVector NonAllocatableRegs = TRI->getAllocatableSet(MF); + NonAllocatableRegs.flip(); + + // Loop over all instructions in all blocks, from bottom to top, so that it's + // more likely that chains of dependent but ultimately dead instructions will + // be cleaned up. + for (MachineFunction::reverse_iterator I = MF.rbegin(), E = MF.rend(); + I != E; ++I) { + MachineBasicBlock *MBB = &*I; + + // Start out assuming that all non-allocatable registers are live + // out of this block. + LivePhysRegs = NonAllocatableRegs; + + // Also add any explicit live-out physregs for this block. + if (!MBB->empty() && MBB->back().getDesc().isReturn()) + for (MachineRegisterInfo::liveout_iterator LOI = MRI->liveout_begin(), + LOE = MRI->liveout_end(); LOI != LOE; ++LOI) { + unsigned Reg = *LOI; + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + LivePhysRegs.set(Reg); + } + + // Now scan the instructions and delete dead ones, tracking physreg + // liveness as we go. + for (MachineBasicBlock::reverse_iterator MII = MBB->rbegin(), + MIE = MBB->rend(); MII != MIE; ) { + MachineInstr *MI = &*MII; + + // If the instruction is dead, delete it! + if (isDead(MI)) { + DOUT << "DeadMachineInstructionElim: DELETING: " << *MI; + AnyChanges = true; + MI->eraseFromParent(); + MIE = MBB->rend(); + // MII is now pointing to the next instruction to process, + // so don't increment it. + continue; + } + + // Record the physreg defs. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.isDef()) { + unsigned Reg = MO.getReg(); + if (Reg != 0 && TargetRegisterInfo::isPhysicalRegister(Reg)) { + LivePhysRegs.reset(Reg); + // Check the subreg set, not the alias set, because a def + // of a super-register may still be partially live after + // this def. + for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + *SubRegs; ++SubRegs) + LivePhysRegs.reset(*SubRegs); + } + } + } + // Record the physreg uses, after the defs, in case a physreg is + // both defined and used in the same instruction. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.isUse()) { + unsigned Reg = MO.getReg(); + if (Reg != 0 && TargetRegisterInfo::isPhysicalRegister(Reg)) { + LivePhysRegs.set(Reg); + for (const unsigned *AliasSet = TRI->getAliasSet(Reg); + *AliasSet; ++AliasSet) + LivePhysRegs.set(*AliasSet); + } + } + } + + // We didn't delete the current instruction, so increment MII to + // the next one. + ++MII; + } + } + + LivePhysRegs.clear(); + return AnyChanges; +} diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp new file mode 100644 index 000000000000..720e3d19b759 --- /dev/null +++ b/lib/CodeGen/DwarfEHPrepare.cpp @@ -0,0 +1,397 @@ +//===-- DwarfEHPrepare - Prepare exception handling for code generation ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass mulches exception handling code into a form adapted to code +// generation. Required if using dwarf exception handling. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "dwarfehprepare" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Function.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/PromoteMemToReg.h" +using namespace llvm; + +STATISTIC(NumLandingPadsSplit, "Number of landing pads split"); +STATISTIC(NumUnwindsLowered, "Number of unwind instructions lowered"); +STATISTIC(NumExceptionValuesMoved, "Number of eh.exception calls moved"); +STATISTIC(NumStackTempsIntroduced, "Number of stack temporaries introduced"); + +namespace { + class VISIBILITY_HIDDEN DwarfEHPrepare : public FunctionPass { + const TargetLowering *TLI; + bool CompileFast; + + // The eh.exception intrinsic. + Function *ExceptionValueIntrinsic; + + // _Unwind_Resume or the target equivalent. + Constant *RewindFunction; + + // Dominator info is used when turning stack temporaries into registers. + DominatorTree *DT; + DominanceFrontier *DF; + + // The function we are running on. + Function *F; + + // The landing pads for this function. + typedef SmallPtrSet BBSet; + BBSet LandingPads; + + // Stack temporary used to hold eh.exception values. + AllocaInst *ExceptionValueVar; + + bool NormalizeLandingPads(); + bool LowerUnwinds(); + bool MoveExceptionValueCalls(); + bool FinishStackTemporaries(); + bool PromoteStackTemporaries(); + + Instruction *CreateExceptionValueCall(BasicBlock *BB); + Instruction *CreateValueLoad(BasicBlock *BB); + + /// CreateReadOfExceptionValue - Return the result of the eh.exception + /// intrinsic by calling the intrinsic if in a landing pad, or loading + /// it from the exception value variable otherwise. + Instruction *CreateReadOfExceptionValue(BasicBlock *BB) { + return LandingPads.count(BB) ? + CreateExceptionValueCall(BB) : CreateValueLoad(BB); + } + + public: + static char ID; // Pass identification, replacement for typeid. + DwarfEHPrepare(const TargetLowering *tli, bool fast) : + FunctionPass(&ID), TLI(tli), CompileFast(fast), + ExceptionValueIntrinsic(0), RewindFunction(0) {} + + virtual bool runOnFunction(Function &Fn); + + // getAnalysisUsage - We need dominance frontiers for memory promotion. + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + if (!CompileFast) + AU.addRequired(); + AU.addPreserved(); + if (!CompileFast) + AU.addRequired(); + AU.addPreserved(); + } + + const char *getPassName() const { + return "Exception handling preparation"; + } + + }; +} // end anonymous namespace + +char DwarfEHPrepare::ID = 0; + +FunctionPass *llvm::createDwarfEHPass(const TargetLowering *tli, bool fast) { + return new DwarfEHPrepare(tli, fast); +} + +/// NormalizeLandingPads - Normalize and discover landing pads, noting them +/// in the LandingPads set. A landing pad is normal if the only CFG edges +/// that end at it are unwind edges from invoke instructions. +/// Abnormal landing pads are fixed up by redirecting all unwind edges to +/// a new basic block which falls through to the original. +bool DwarfEHPrepare::NormalizeLandingPads() { + bool Changed = false; + + for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) { + TerminatorInst *TI = I->getTerminator(); + if (!isa(TI)) + continue; + BasicBlock *LPad = TI->getSuccessor(1); + // Skip landing pads that have already been normalized. + if (LandingPads.count(LPad)) + continue; + + // Check that only invoke unwind edges end at the landing pad. + bool OnlyUnwoundTo = true; + for (pred_iterator PI = pred_begin(LPad), PE = pred_end(LPad); + PI != PE; ++PI) { + TerminatorInst *PT = (*PI)->getTerminator(); + if (!isa(PT) || LPad == PT->getSuccessor(0)) { + OnlyUnwoundTo = false; + break; + } + } + if (OnlyUnwoundTo) { + // Only unwind edges lead to the landing pad. Remember the landing pad. + LandingPads.insert(LPad); + continue; + } + + // At least one normal edge ends at the landing pad. Redirect the unwind + // edges to a new basic block which falls through into this one. + + // Create the new basic block. + BasicBlock *NewBB = BasicBlock::Create(LPad->getName() + "_unwind_edge"); + + // Insert it into the function right before the original landing pad. + LPad->getParent()->getBasicBlockList().insert(LPad, NewBB); + + // Redirect unwind edges from the original landing pad to NewBB. + for (pred_iterator PI = pred_begin(LPad), PE = pred_end(LPad); PI != PE; ) { + TerminatorInst *PT = (*PI++)->getTerminator(); + if (isa(PT) && PT->getSuccessor(1) == LPad) + // Unwind to the new block. + PT->setSuccessor(1, NewBB); + } + + // If there are any PHI nodes in LPad, we need to update them so that they + // merge incoming values from NewBB instead. + for (BasicBlock::iterator II = LPad->begin(); isa(II); ++II) { + PHINode *PN = cast(II); + pred_iterator PB = pred_begin(NewBB), PE = pred_end(NewBB); + + // Check to see if all of the values coming in via unwind edges are the + // same. If so, we don't need to create a new PHI node. + Value *InVal = PN->getIncomingValueForBlock(*PB); + for (pred_iterator PI = PB; PI != PE; ++PI) { + if (PI != PB && InVal != PN->getIncomingValueForBlock(*PI)) { + InVal = 0; + break; + } + } + + if (InVal == 0) { + // Different unwind edges have different values. Create a new PHI node + // in NewBB. + PHINode *NewPN = PHINode::Create(PN->getType(), PN->getName()+".unwind", + NewBB); + // Add an entry for each unwind edge, using the value from the old PHI. + for (pred_iterator PI = PB; PI != PE; ++PI) + NewPN->addIncoming(PN->getIncomingValueForBlock(*PI), *PI); + + // Now use this new PHI as the common incoming value for NewBB in PN. + InVal = NewPN; + } + + // Revector exactly one entry in the PHI node to come from NewBB + // and delete all other entries that come from unwind edges. If + // there are both normal and unwind edges from the same predecessor, + // this leaves an entry for the normal edge. + for (pred_iterator PI = PB; PI != PE; ++PI) + PN->removeIncomingValue(*PI); + PN->addIncoming(InVal, NewBB); + } + + // Add a fallthrough from NewBB to the original landing pad. + BranchInst::Create(LPad, NewBB); + + // Now update DominatorTree and DominanceFrontier analysis information. + if (DT) + DT->splitBlock(NewBB); + if (DF) + DF->splitBlock(NewBB); + + // Remember the newly constructed landing pad. The original landing pad + // LPad is no longer a landing pad now that all unwind edges have been + // revectored to NewBB. + LandingPads.insert(NewBB); + ++NumLandingPadsSplit; + Changed = true; + } + + return Changed; +} + +/// LowerUnwinds - Turn unwind instructions into calls to _Unwind_Resume, +/// rethrowing any previously caught exception. This will crash horribly +/// at runtime if there is no such exception: using unwind to throw a new +/// exception is currently not supported. +bool DwarfEHPrepare::LowerUnwinds() { + bool Changed = false; + + for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) { + TerminatorInst *TI = I->getTerminator(); + if (!isa(TI)) + continue; + + // Replace the unwind instruction with a call to _Unwind_Resume (or the + // appropriate target equivalent) followed by an UnreachableInst. + + // Find the rewind function if we didn't already. + if (!RewindFunction) { + std::vector Params(1, PointerType::getUnqual(Type::Int8Ty)); + FunctionType *FTy = FunctionType::get(Type::VoidTy, Params, false); + const char *RewindName = TLI->getLibcallName(RTLIB::UNWIND_RESUME); + RewindFunction = F->getParent()->getOrInsertFunction(RewindName, FTy); + } + + // Create the call... + CallInst::Create(RewindFunction, CreateReadOfExceptionValue(I), "", TI); + // ...followed by an UnreachableInst. + new UnreachableInst(TI); + + // Nuke the unwind instruction. + TI->eraseFromParent(); + ++NumUnwindsLowered; + Changed = true; + } + + return Changed; +} + +/// MoveExceptionValueCalls - Ensure that eh.exception is only ever called from +/// landing pads by replacing calls outside of landing pads with loads from a +/// stack temporary. Move eh.exception calls inside landing pads to the start +/// of the landing pad (optional, but may make things simpler for later passes). +bool DwarfEHPrepare::MoveExceptionValueCalls() { + // If the eh.exception intrinsic is not declared in the module then there is + // nothing to do. Speed up compilation by checking for this common case. + if (!ExceptionValueIntrinsic && + !F->getParent()->getFunction(Intrinsic::getName(Intrinsic::eh_exception))) + return false; + + bool Changed = false; + + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { + for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) + if (IntrinsicInst *CI = dyn_cast(II++)) + if (CI->getIntrinsicID() == Intrinsic::eh_exception) { + if (!CI->use_empty()) { + Value *ExceptionValue = CreateReadOfExceptionValue(BB); + if (CI == ExceptionValue) { + // The call was at the start of a landing pad - leave it alone. + assert(LandingPads.count(BB) && + "Created eh.exception call outside landing pad!"); + continue; + } + CI->replaceAllUsesWith(ExceptionValue); + } + CI->eraseFromParent(); + ++NumExceptionValuesMoved; + Changed = true; + } + } + + return Changed; +} + +/// FinishStackTemporaries - If we introduced a stack variable to hold the +/// exception value then initialize it in each landing pad. +bool DwarfEHPrepare::FinishStackTemporaries() { + if (!ExceptionValueVar) + // Nothing to do. + return false; + + bool Changed = false; + + // Make sure that there is a store of the exception value at the start of + // each landing pad. + for (BBSet::iterator LI = LandingPads.begin(), LE = LandingPads.end(); + LI != LE; ++LI) { + Instruction *ExceptionValue = CreateReadOfExceptionValue(*LI); + Instruction *Store = new StoreInst(ExceptionValue, ExceptionValueVar); + Store->insertAfter(ExceptionValue); + Changed = true; + } + + return Changed; +} + +/// PromoteStackTemporaries - Turn any stack temporaries we introduced into +/// registers if possible. +bool DwarfEHPrepare::PromoteStackTemporaries() { + if (ExceptionValueVar && DT && DF && isAllocaPromotable(ExceptionValueVar)) { + // Turn the exception temporary into registers and phi nodes if possible. + std::vector Allocas(1, ExceptionValueVar); + PromoteMemToReg(Allocas, *DT, *DF); + return true; + } + return false; +} + +/// CreateExceptionValueCall - Insert a call to the eh.exception intrinsic at +/// the start of the basic block (unless there already is one, in which case +/// the existing call is returned). +Instruction *DwarfEHPrepare::CreateExceptionValueCall(BasicBlock *BB) { + Instruction *Start = BB->getFirstNonPHI(); + // Is this a call to eh.exception? + if (IntrinsicInst *CI = dyn_cast(Start)) + if (CI->getIntrinsicID() == Intrinsic::eh_exception) + // Reuse the existing call. + return Start; + + // Find the eh.exception intrinsic if we didn't already. + if (!ExceptionValueIntrinsic) + ExceptionValueIntrinsic = Intrinsic::getDeclaration(F->getParent(), + Intrinsic::eh_exception); + + // Create the call. + return CallInst::Create(ExceptionValueIntrinsic, "eh.value.call", Start); +} + +/// CreateValueLoad - Insert a load of the exception value stack variable +/// (creating it if necessary) at the start of the basic block (unless +/// there already is a load, in which case the existing load is returned). +Instruction *DwarfEHPrepare::CreateValueLoad(BasicBlock *BB) { + Instruction *Start = BB->getFirstNonPHI(); + // Is this a load of the exception temporary? + if (ExceptionValueVar) + if (LoadInst* LI = dyn_cast(Start)) + if (LI->getPointerOperand() == ExceptionValueVar) + // Reuse the existing load. + return Start; + + // Create the temporary if we didn't already. + if (!ExceptionValueVar) { + ExceptionValueVar = new AllocaInst(PointerType::getUnqual(Type::Int8Ty), + "eh.value", F->begin()->begin()); + ++NumStackTempsIntroduced; + } + + // Load the value. + return new LoadInst(ExceptionValueVar, "eh.value.load", Start); +} + +bool DwarfEHPrepare::runOnFunction(Function &Fn) { + bool Changed = false; + + // Initialize internal state. + DT = getAnalysisIfAvailable(); + DF = getAnalysisIfAvailable(); + ExceptionValueVar = 0; + F = &Fn; + + // Ensure that only unwind edges end at landing pads (a landing pad is a + // basic block where an invoke unwind edge ends). + Changed |= NormalizeLandingPads(); + + // Turn unwind instructions into libcalls. + Changed |= LowerUnwinds(); + + // TODO: Move eh.selector calls to landing pads and combine them. + + // Move eh.exception calls to landing pads. + Changed |= MoveExceptionValueCalls(); + + // Initialize any stack temporaries we introduced. + Changed |= FinishStackTemporaries(); + + // Turn any stack temporaries into registers if possible. + if (!CompileFast) + Changed |= PromoteStackTemporaries(); + + LandingPads.clear(); + + return Changed; +} diff --git a/lib/CodeGen/ELFWriter.cpp b/lib/CodeGen/ELFWriter.cpp new file mode 100644 index 000000000000..7cc116235243 --- /dev/null +++ b/lib/CodeGen/ELFWriter.cpp @@ -0,0 +1,575 @@ +//===-- ELFWriter.cpp - Target-independent ELF Writer code ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the target-independent ELF writer. This file writes out +// the ELF file in the following order: +// +// #1. ELF Header +// #2. '.text' section +// #3. '.data' section +// #4. '.bss' section (conceptual position in file) +// ... +// #X. '.shstrtab' section +// #Y. Section Table +// +// The entries in the section table are laid out as: +// #0. Null entry [required] +// #1. ".text" entry - the program code +// #2. ".data" entry - global variables with initializers. [ if needed ] +// #3. ".bss" entry - global variables without initializers. [ if needed ] +// ... +// #N. ".shstrtab" entry - String table for the section names. +// +// NOTE: This code should eventually be extended to support 64-bit ELF (this +// won't be hard), but we haven't done so yet! +// +//===----------------------------------------------------------------------===// + +#include "ELFWriter.h" +#include "llvm/Module.h" +#include "llvm/PassManager.h" +#include "llvm/DerivedTypes.h" +#include "llvm/CodeGen/FileWriters.h" +#include "llvm/CodeGen/MachineCodeEmitter.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetELFWriterInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/Mangler.h" +#include "llvm/Support/OutputBuffer.h" +#include "llvm/Support/Streams.h" +#include "llvm/Support/raw_ostream.h" +#include +using namespace llvm; + +char ELFWriter::ID = 0; +/// AddELFWriter - Concrete function to add the ELF writer to the function pass +/// manager. +MachineCodeEmitter *llvm::AddELFWriter(PassManagerBase &PM, + raw_ostream &O, + TargetMachine &TM) { + ELFWriter *EW = new ELFWriter(O, TM); + PM.add(EW); + return &EW->getMachineCodeEmitter(); +} + +//===----------------------------------------------------------------------===// +// ELFCodeEmitter Implementation +//===----------------------------------------------------------------------===// + +namespace llvm { + /// ELFCodeEmitter - This class is used by the ELFWriter to emit the code for + /// functions to the ELF file. + class ELFCodeEmitter : public MachineCodeEmitter { + ELFWriter &EW; + TargetMachine &TM; + ELFWriter::ELFSection *ES; // Section to write to. + std::vector *OutBuffer; + size_t FnStart; + public: + explicit ELFCodeEmitter(ELFWriter &ew) : EW(ew), TM(EW.TM), OutBuffer(0) {} + + void startFunction(MachineFunction &F); + bool finishFunction(MachineFunction &F); + + void addRelocation(const MachineRelocation &MR) { + assert(0 && "relo not handled yet!"); + } + + virtual void StartMachineBasicBlock(MachineBasicBlock *MBB) { + } + + virtual uintptr_t getConstantPoolEntryAddress(unsigned Index) const { + assert(0 && "CP not implementated yet!"); + return 0; + } + virtual uintptr_t getJumpTableEntryAddress(unsigned Index) const { + assert(0 && "JT not implementated yet!"); + return 0; + } + + virtual uintptr_t getMachineBasicBlockAddress(MachineBasicBlock *MBB) const { + assert(0 && "JT not implementated yet!"); + return 0; + } + + virtual uintptr_t getLabelAddress(uint64_t Label) const { + assert(0 && "Label address not implementated yet!"); + abort(); + return 0; + } + + virtual void emitLabel(uint64_t LabelID) { + assert(0 && "emit Label not implementated yet!"); + abort(); + } + + + virtual void setModuleInfo(llvm::MachineModuleInfo* MMI) { } + + + /// JIT SPECIFIC FUNCTIONS - DO NOT IMPLEMENT THESE HERE! + void startGVStub(const GlobalValue* F, unsigned StubSize, + unsigned Alignment = 1) { + assert(0 && "JIT specific function called!"); + abort(); + } + void startGVStub(const GlobalValue* F, void *Buffer, unsigned StubSize) { + assert(0 && "JIT specific function called!"); + abort(); + } + void *finishGVStub(const GlobalValue *F) { + assert(0 && "JIT specific function called!"); + abort(); + return 0; + } + }; +} + +/// startFunction - This callback is invoked when a new machine function is +/// about to be emitted. +void ELFCodeEmitter::startFunction(MachineFunction &F) { + // Align the output buffer to the appropriate alignment. + unsigned Align = 16; // FIXME: GENERICIZE!! + // Get the ELF Section that this function belongs in. + ES = &EW.getSection(".text", ELFWriter::ELFSection::SHT_PROGBITS, + ELFWriter::ELFSection::SHF_EXECINSTR | + ELFWriter::ELFSection::SHF_ALLOC); + OutBuffer = &ES->SectionData; + cerr << "FIXME: This code needs to be updated for changes in the " + << "CodeEmitter interfaces. In particular, this should set " + << "BufferBegin/BufferEnd/CurBufferPtr, not deal with OutBuffer!"; + abort(); + + // Upgrade the section alignment if required. + if (ES->Align < Align) ES->Align = Align; + + // Add padding zeros to the end of the buffer to make sure that the + // function will start on the correct byte alignment within the section. + OutputBuffer OB(*OutBuffer, + TM.getTargetData()->getPointerSizeInBits() == 64, + TM.getTargetData()->isLittleEndian()); + OB.align(Align); + FnStart = OutBuffer->size(); +} + +/// finishFunction - This callback is invoked after the function is completely +/// finished. +bool ELFCodeEmitter::finishFunction(MachineFunction &F) { + // We now know the size of the function, add a symbol to represent it. + ELFWriter::ELFSym FnSym(F.getFunction()); + + // Figure out the binding (linkage) of the symbol. + switch (F.getFunction()->getLinkage()) { + default: + // appending linkage is illegal for functions. + assert(0 && "Unknown linkage type!"); + case GlobalValue::ExternalLinkage: + FnSym.SetBind(ELFWriter::ELFSym::STB_GLOBAL); + break; + case GlobalValue::LinkOnceAnyLinkage: + case GlobalValue::LinkOnceODRLinkage: + case GlobalValue::WeakAnyLinkage: + case GlobalValue::WeakODRLinkage: + FnSym.SetBind(ELFWriter::ELFSym::STB_WEAK); + break; + case GlobalValue::PrivateLinkage: + assert (0 && "PrivateLinkage should not be in the symbol table."); + case GlobalValue::InternalLinkage: + FnSym.SetBind(ELFWriter::ELFSym::STB_LOCAL); + break; + } + + ES->Size = OutBuffer->size(); + + FnSym.SetType(ELFWriter::ELFSym::STT_FUNC); + FnSym.SectionIdx = ES->SectionIdx; + FnSym.Value = FnStart; // Value = Offset from start of Section. + FnSym.Size = OutBuffer->size()-FnStart; + + // Finally, add it to the symtab. + EW.SymbolTable.push_back(FnSym); + return false; +} + +//===----------------------------------------------------------------------===// +// ELFWriter Implementation +//===----------------------------------------------------------------------===// + +ELFWriter::ELFWriter(raw_ostream &o, TargetMachine &tm) + : MachineFunctionPass(&ID), O(o), TM(tm) { + e_flags = 0; // e_flags defaults to 0, no flags. + + is64Bit = TM.getTargetData()->getPointerSizeInBits() == 64; + isLittleEndian = TM.getTargetData()->isLittleEndian(); + + // Create the machine code emitter object for this target. + MCE = new ELFCodeEmitter(*this); + NumSections = 0; +} + +ELFWriter::~ELFWriter() { + delete MCE; +} + +// doInitialization - Emit the file header and all of the global variables for +// the module to the ELF file. +bool ELFWriter::doInitialization(Module &M) { + Mang = new Mangler(M); + + // Local alias to shortenify coming code. + std::vector &FH = FileHeader; + OutputBuffer FHOut(FH, is64Bit, isLittleEndian); + + FHOut.outbyte(0x7F); // EI_MAG0 + FHOut.outbyte('E'); // EI_MAG1 + FHOut.outbyte('L'); // EI_MAG2 + FHOut.outbyte('F'); // EI_MAG3 + FHOut.outbyte(is64Bit ? 2 : 1); // EI_CLASS + FHOut.outbyte(isLittleEndian ? 1 : 2); // EI_DATA + FHOut.outbyte(1); // EI_VERSION + FH.resize(16); // EI_PAD up to 16 bytes. + + // This should change for shared objects. + FHOut.outhalf(1); // e_type = ET_REL + FHOut.outhalf(TM.getELFWriterInfo()->getEMachine()); // target-defined + FHOut.outword(1); // e_version = 1 + FHOut.outaddr(0); // e_entry = 0 -> no entry point in .o file + FHOut.outaddr(0); // e_phoff = 0 -> no program header for .o + + ELFHeader_e_shoff_Offset = FH.size(); + FHOut.outaddr(0); // e_shoff + FHOut.outword(e_flags); // e_flags = whatever the target wants + + FHOut.outhalf(is64Bit ? 64 : 52); // e_ehsize = ELF header size + FHOut.outhalf(0); // e_phentsize = prog header entry size + FHOut.outhalf(0); // e_phnum = # prog header entries = 0 + FHOut.outhalf(is64Bit ? 64 : 40); // e_shentsize = sect hdr entry size + + + ELFHeader_e_shnum_Offset = FH.size(); + FHOut.outhalf(0); // e_shnum = # of section header ents + ELFHeader_e_shstrndx_Offset = FH.size(); + FHOut.outhalf(0); // e_shstrndx = Section # of '.shstrtab' + + // Add the null section, which is required to be first in the file. + getSection("", 0, 0); + + // Start up the symbol table. The first entry in the symtab is the null + // entry. + SymbolTable.push_back(ELFSym(0)); + + return false; +} + +void ELFWriter::EmitGlobal(GlobalVariable *GV) { + // If this is an external global, emit it now. TODO: Note that it would be + // better to ignore the symbol here and only add it to the symbol table if + // referenced. + if (!GV->hasInitializer()) { + ELFSym ExternalSym(GV); + ExternalSym.SetBind(ELFSym::STB_GLOBAL); + ExternalSym.SetType(ELFSym::STT_NOTYPE); + ExternalSym.SectionIdx = ELFSection::SHN_UNDEF; + SymbolTable.push_back(ExternalSym); + return; + } + + unsigned Align = TM.getTargetData()->getPreferredAlignment(GV); + unsigned Size = + TM.getTargetData()->getTypeAllocSize(GV->getType()->getElementType()); + + // If this global has a zero initializer, it is part of the .bss or common + // section. + if (GV->getInitializer()->isNullValue()) { + // If this global is part of the common block, add it now. Variables are + // part of the common block if they are zero initialized and allowed to be + // merged with other symbols. + if (GV->hasLinkOnceLinkage() || GV->hasWeakLinkage() || + GV->hasCommonLinkage()) { + ELFSym CommonSym(GV); + // Value for common symbols is the alignment required. + CommonSym.Value = Align; + CommonSym.Size = Size; + CommonSym.SetBind(ELFSym::STB_GLOBAL); + CommonSym.SetType(ELFSym::STT_OBJECT); + // TODO SOMEDAY: add ELF visibility. + CommonSym.SectionIdx = ELFSection::SHN_COMMON; + SymbolTable.push_back(CommonSym); + return; + } + + // Otherwise, this symbol is part of the .bss section. Emit it now. + + // Handle alignment. Ensure section is aligned at least as much as required + // by this symbol. + ELFSection &BSSSection = getBSSSection(); + BSSSection.Align = std::max(BSSSection.Align, Align); + + // Within the section, emit enough virtual padding to get us to an alignment + // boundary. + if (Align) + BSSSection.Size = (BSSSection.Size + Align - 1) & ~(Align-1); + + ELFSym BSSSym(GV); + BSSSym.Value = BSSSection.Size; + BSSSym.Size = Size; + BSSSym.SetType(ELFSym::STT_OBJECT); + + switch (GV->getLinkage()) { + default: // weak/linkonce/common handled above + assert(0 && "Unexpected linkage type!"); + case GlobalValue::AppendingLinkage: // FIXME: This should be improved! + case GlobalValue::ExternalLinkage: + BSSSym.SetBind(ELFSym::STB_GLOBAL); + break; + case GlobalValue::InternalLinkage: + BSSSym.SetBind(ELFSym::STB_LOCAL); + break; + } + + // Set the idx of the .bss section + BSSSym.SectionIdx = BSSSection.SectionIdx; + if (!GV->hasPrivateLinkage()) + SymbolTable.push_back(BSSSym); + + // Reserve space in the .bss section for this symbol. + BSSSection.Size += Size; + return; + } + + // FIXME: handle .rodata + //assert(!GV->isConstant() && "unimp"); + + // FIXME: handle .data + //assert(0 && "unimp"); +} + + +bool ELFWriter::runOnMachineFunction(MachineFunction &MF) { + // Nothing to do here, this is all done through the MCE object above. + return false; +} + +/// doFinalization - Now that the module has been completely processed, emit +/// the ELF file to 'O'. +bool ELFWriter::doFinalization(Module &M) { + // Okay, the ELF header and .text sections have been completed, build the + // .data, .bss, and "common" sections next. + for (Module::global_iterator I = M.global_begin(), E = M.global_end(); + I != E; ++I) + EmitGlobal(I); + + // Emit the symbol table now, if non-empty. + EmitSymbolTable(); + + // FIXME: Emit the relocations now. + + // Emit the string table for the sections in the ELF file we have. + EmitSectionTableStringTable(); + + // Emit the sections to the .o file, and emit the section table for the file. + OutputSectionsAndSectionTable(); + + // We are done with the abstract symbols. + SectionList.clear(); + NumSections = 0; + + // Release the name mangler object. + delete Mang; Mang = 0; + return false; +} + +/// EmitSymbolTable - If the current symbol table is non-empty, emit the string +/// table for it and then the symbol table itself. +void ELFWriter::EmitSymbolTable() { + if (SymbolTable.size() == 1) return; // Only the null entry. + + // FIXME: compact all local symbols to the start of the symtab. + unsigned FirstNonLocalSymbol = 1; + + ELFSection &StrTab = getSection(".strtab", ELFSection::SHT_STRTAB, 0); + StrTab.Align = 1; + + DataBuffer &StrTabBuf = StrTab.SectionData; + OutputBuffer StrTabOut(StrTabBuf, is64Bit, isLittleEndian); + + // Set the zero'th symbol to a null byte, as required. + StrTabOut.outbyte(0); + SymbolTable[0].NameIdx = 0; + unsigned Index = 1; + for (unsigned i = 1, e = SymbolTable.size(); i != e; ++i) { + // Use the name mangler to uniquify the LLVM symbol. + std::string Name = Mang->getValueName(SymbolTable[i].GV); + + if (Name.empty()) { + SymbolTable[i].NameIdx = 0; + } else { + SymbolTable[i].NameIdx = Index; + + // Add the name to the output buffer, including the null terminator. + StrTabBuf.insert(StrTabBuf.end(), Name.begin(), Name.end()); + + // Add a null terminator. + StrTabBuf.push_back(0); + + // Keep track of the number of bytes emitted to this section. + Index += Name.size()+1; + } + } + assert(Index == StrTabBuf.size()); + StrTab.Size = Index; + + // Now that we have emitted the string table and know the offset into the + // string table of each symbol, emit the symbol table itself. + ELFSection &SymTab = getSection(".symtab", ELFSection::SHT_SYMTAB, 0); + SymTab.Align = is64Bit ? 8 : 4; + SymTab.Link = SymTab.SectionIdx; // Section Index of .strtab. + SymTab.Info = FirstNonLocalSymbol; // First non-STB_LOCAL symbol. + SymTab.EntSize = 16; // Size of each symtab entry. FIXME: wrong for ELF64 + DataBuffer &SymTabBuf = SymTab.SectionData; + OutputBuffer SymTabOut(SymTabBuf, is64Bit, isLittleEndian); + + if (!is64Bit) { // 32-bit and 64-bit formats are shuffled a bit. + for (unsigned i = 0, e = SymbolTable.size(); i != e; ++i) { + ELFSym &Sym = SymbolTable[i]; + SymTabOut.outword(Sym.NameIdx); + SymTabOut.outaddr32(Sym.Value); + SymTabOut.outword(Sym.Size); + SymTabOut.outbyte(Sym.Info); + SymTabOut.outbyte(Sym.Other); + SymTabOut.outhalf(Sym.SectionIdx); + } + } else { + for (unsigned i = 0, e = SymbolTable.size(); i != e; ++i) { + ELFSym &Sym = SymbolTable[i]; + SymTabOut.outword(Sym.NameIdx); + SymTabOut.outbyte(Sym.Info); + SymTabOut.outbyte(Sym.Other); + SymTabOut.outhalf(Sym.SectionIdx); + SymTabOut.outaddr64(Sym.Value); + SymTabOut.outxword(Sym.Size); + } + } + + SymTab.Size = SymTabBuf.size(); +} + +/// EmitSectionTableStringTable - This method adds and emits a section for the +/// ELF Section Table string table: the string table that holds all of the +/// section names. +void ELFWriter::EmitSectionTableStringTable() { + // First step: add the section for the string table to the list of sections: + ELFSection &SHStrTab = getSection(".shstrtab", ELFSection::SHT_STRTAB, 0); + + // Now that we know which section number is the .shstrtab section, update the + // e_shstrndx entry in the ELF header. + OutputBuffer FHOut(FileHeader, is64Bit, isLittleEndian); + FHOut.fixhalf(SHStrTab.SectionIdx, ELFHeader_e_shstrndx_Offset); + + // Set the NameIdx of each section in the string table and emit the bytes for + // the string table. + unsigned Index = 0; + DataBuffer &Buf = SHStrTab.SectionData; + + for (std::list::iterator I = SectionList.begin(), + E = SectionList.end(); I != E; ++I) { + // Set the index into the table. Note if we have lots of entries with + // common suffixes, we could memoize them here if we cared. + I->NameIdx = Index; + + // Add the name to the output buffer, including the null terminator. + Buf.insert(Buf.end(), I->Name.begin(), I->Name.end()); + + // Add a null terminator. + Buf.push_back(0); + + // Keep track of the number of bytes emitted to this section. + Index += I->Name.size()+1; + } + + // Set the size of .shstrtab now that we know what it is. + assert(Index == Buf.size()); + SHStrTab.Size = Index; +} + +/// OutputSectionsAndSectionTable - Now that we have constructed the file header +/// and all of the sections, emit these to the ostream destination and emit the +/// SectionTable. +void ELFWriter::OutputSectionsAndSectionTable() { + // Pass #1: Compute the file offset for each section. + size_t FileOff = FileHeader.size(); // File header first. + + // Emit all of the section data in order. + for (std::list::iterator I = SectionList.begin(), + E = SectionList.end(); I != E; ++I) { + // Align FileOff to whatever the alignment restrictions of the section are. + if (I->Align) + FileOff = (FileOff+I->Align-1) & ~(I->Align-1); + I->Offset = FileOff; + FileOff += I->SectionData.size(); + } + + // Align Section Header. + unsigned TableAlign = is64Bit ? 8 : 4; + FileOff = (FileOff+TableAlign-1) & ~(TableAlign-1); + + // Now that we know where all of the sections will be emitted, set the e_shnum + // entry in the ELF header. + OutputBuffer FHOut(FileHeader, is64Bit, isLittleEndian); + FHOut.fixhalf(NumSections, ELFHeader_e_shnum_Offset); + + // Now that we know the offset in the file of the section table, update the + // e_shoff address in the ELF header. + FHOut.fixaddr(FileOff, ELFHeader_e_shoff_Offset); + + // Now that we know all of the data in the file header, emit it and all of the + // sections! + O.write((char*)&FileHeader[0], FileHeader.size()); + FileOff = FileHeader.size(); + DataBuffer().swap(FileHeader); + + DataBuffer Table; + OutputBuffer TableOut(Table, is64Bit, isLittleEndian); + + // Emit all of the section data and build the section table itself. + while (!SectionList.empty()) { + const ELFSection &S = *SectionList.begin(); + + // Align FileOff to whatever the alignment restrictions of the section are. + if (S.Align) + for (size_t NewFileOff = (FileOff+S.Align-1) & ~(S.Align-1); + FileOff != NewFileOff; ++FileOff) + O << (char)0xAB; + O.write((char*)&S.SectionData[0], S.SectionData.size()); + FileOff += S.SectionData.size(); + + TableOut.outword(S.NameIdx); // sh_name - Symbol table name idx + TableOut.outword(S.Type); // sh_type - Section contents & semantics + TableOut.outword(S.Flags); // sh_flags - Section flags. + TableOut.outaddr(S.Addr); // sh_addr - The mem addr this section is in. + TableOut.outaddr(S.Offset); // sh_offset - Offset from the file start. + TableOut.outword(S.Size); // sh_size - The section size. + TableOut.outword(S.Link); // sh_link - Section header table index link. + TableOut.outword(S.Info); // sh_info - Auxillary information. + TableOut.outword(S.Align); // sh_addralign - Alignment of section. + TableOut.outword(S.EntSize); // sh_entsize - Size of entries in the section + + SectionList.pop_front(); + } + + // Align output for the section table. + for (size_t NewFileOff = (FileOff+TableAlign-1) & ~(TableAlign-1); + FileOff != NewFileOff; ++FileOff) + O << (char)0xAB; + + // Emit the section table itself. + O.write((char*)&Table[0], Table.size()); +} diff --git a/lib/CodeGen/ELFWriter.h b/lib/CodeGen/ELFWriter.h new file mode 100644 index 000000000000..31aa05a9c4a5 --- /dev/null +++ b/lib/CodeGen/ELFWriter.h @@ -0,0 +1,230 @@ +//===-- ELFWriter.h - Target-independent ELF writer support -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the ELFWriter class. +// +//===----------------------------------------------------------------------===// + +#ifndef ELFWRITER_H +#define ELFWRITER_H + +#include "llvm/CodeGen/MachineFunctionPass.h" +#include +#include + +namespace llvm { + class GlobalVariable; + class Mangler; + class MachineCodeEmitter; + class ELFCodeEmitter; + class raw_ostream; + + /// ELFWriter - This class implements the common target-independent code for + /// writing ELF files. Targets should derive a class from this to + /// parameterize the output format. + /// + class ELFWriter : public MachineFunctionPass { + friend class ELFCodeEmitter; + public: + static char ID; + + MachineCodeEmitter &getMachineCodeEmitter() const { + return *(MachineCodeEmitter*)MCE; + } + + ELFWriter(raw_ostream &O, TargetMachine &TM); + ~ELFWriter(); + + typedef std::vector DataBuffer; + + protected: + /// Output stream to send the resultant object file to. + /// + raw_ostream &O; + + /// Target machine description. + /// + TargetMachine &TM; + + /// Mang - The object used to perform name mangling for this module. + /// + Mangler *Mang; + + /// MCE - The MachineCodeEmitter object that we are exposing to emit machine + /// code for functions to the .o file. + ELFCodeEmitter *MCE; + + //===------------------------------------------------------------------===// + // Properties to be set by the derived class ctor, used to configure the + // ELFWriter. + + // e_machine - This field is the target specific value to emit as the + // e_machine member of the ELF header. + unsigned short e_machine; + + // e_flags - The machine flags for the target. This defaults to zero. + unsigned e_flags; + + //===------------------------------------------------------------------===// + // Properties inferred automatically from the target machine. + // + + /// is64Bit/isLittleEndian - This information is inferred from the target + /// machine directly, indicating whether to emit a 32- or 64-bit ELF file. + bool is64Bit, isLittleEndian; + + /// doInitialization - Emit the file header and all of the global variables + /// for the module to the ELF file. + bool doInitialization(Module &M); + + bool runOnMachineFunction(MachineFunction &MF); + + + /// doFinalization - Now that the module has been completely processed, emit + /// the ELF file to 'O'. + bool doFinalization(Module &M); + + private: + // The buffer we accumulate the file header into. Note that this should be + // changed into something much more efficient later (and the bitcode writer + // as well!). + DataBuffer FileHeader; + + /// ELFSection - This struct contains information about each section that is + /// emitted to the file. This is eventually turned into the section header + /// table at the end of the file. + struct ELFSection { + std::string Name; // Name of the section. + unsigned NameIdx; // Index in .shstrtab of name, once emitted. + unsigned Type; + unsigned Flags; + uint64_t Addr; + unsigned Offset; + unsigned Size; + unsigned Link; + unsigned Info; + unsigned Align; + unsigned EntSize; + + /// SectionIdx - The number of the section in the Section Table. + /// + unsigned short SectionIdx; + + /// SectionData - The actual data for this section which we are building + /// up for emission to the file. + DataBuffer SectionData; + + enum { SHT_NULL = 0, SHT_PROGBITS = 1, SHT_SYMTAB = 2, SHT_STRTAB = 3, + SHT_RELA = 4, SHT_HASH = 5, SHT_DYNAMIC = 6, SHT_NOTE = 7, + SHT_NOBITS = 8, SHT_REL = 9, SHT_SHLIB = 10, SHT_DYNSYM = 11 }; + enum { SHN_UNDEF = 0, SHN_ABS = 0xFFF1, SHN_COMMON = 0xFFF2 }; + enum { // SHF - ELF Section Header Flags + SHF_WRITE = 1 << 0, // Writable + SHF_ALLOC = 1 << 1, // Mapped into the process addr space + SHF_EXECINSTR = 1 << 2, // Executable + SHF_MERGE = 1 << 4, // Might be merged if equal + SHF_STRINGS = 1 << 5, // Contains null-terminated strings + SHF_INFO_LINK = 1 << 6, // 'sh_info' contains SHT index + SHF_LINK_ORDER = 1 << 7, // Preserve order after combining + SHF_OS_NONCONFORMING = 1 << 8, // nonstandard OS support required + SHF_GROUP = 1 << 9, // Section is a member of a group + SHF_TLS = 1 << 10 // Section holds thread-local data + }; + + ELFSection(const std::string &name) + : Name(name), Type(0), Flags(0), Addr(0), Offset(0), Size(0), + Link(0), Info(0), Align(0), EntSize(0) { + } + }; + + /// SectionList - This is the list of sections that we have emitted to the + /// file. Once the file has been completely built, the section header table + /// is constructed from this info. + std::list SectionList; + unsigned NumSections; // Always = SectionList.size() + + /// SectionLookup - This is a mapping from section name to section number in + /// the SectionList. + std::map SectionLookup; + + /// getSection - Return the section with the specified name, creating a new + /// section if one does not already exist. + ELFSection &getSection(const std::string &Name, + unsigned Type, unsigned Flags = 0) { + ELFSection *&SN = SectionLookup[Name]; + if (SN) return *SN; + + SectionList.push_back(Name); + SN = &SectionList.back(); + SN->SectionIdx = NumSections++; + SN->Type = Type; + SN->Flags = Flags; + return *SN; + } + + ELFSection &getDataSection() { + return getSection(".data", ELFSection::SHT_PROGBITS, + ELFSection::SHF_WRITE | ELFSection::SHF_ALLOC); + } + ELFSection &getBSSSection() { + return getSection(".bss", ELFSection::SHT_NOBITS, + ELFSection::SHF_WRITE | ELFSection::SHF_ALLOC); + } + + /// ELFSym - This struct contains information about each symbol that is + /// added to logical symbol table for the module. This is eventually + /// turned into a real symbol table in the file. + struct ELFSym { + const GlobalValue *GV; // The global value this corresponds to. + unsigned NameIdx; // Index in .strtab of name, once emitted. + uint64_t Value; + unsigned Size; + unsigned char Info; + unsigned char Other; + unsigned short SectionIdx; + + enum { STB_LOCAL = 0, STB_GLOBAL = 1, STB_WEAK = 2 }; + enum { STT_NOTYPE = 0, STT_OBJECT = 1, STT_FUNC = 2, STT_SECTION = 3, + STT_FILE = 4 }; + ELFSym(const GlobalValue *gv) : GV(gv), Value(0), Size(0), Info(0), + Other(0), SectionIdx(0) {} + + void SetBind(unsigned X) { + assert(X == (X & 0xF) && "Bind value out of range!"); + Info = (Info & 0x0F) | (X << 4); + } + void SetType(unsigned X) { + assert(X == (X & 0xF) && "Type value out of range!"); + Info = (Info & 0xF0) | X; + } + }; + + /// SymbolTable - This is the list of symbols we have emitted to the file. + /// This actually gets rearranged before emission to the file (to put the + /// local symbols first in the list). + std::vector SymbolTable; + + // As we complete the ELF file, we need to update fields in the ELF header + // (e.g. the location of the section table). These members keep track of + // the offset in ELFHeader of these various pieces to update and other + // locations in the file. + unsigned ELFHeader_e_shoff_Offset; // e_shoff in ELF header. + unsigned ELFHeader_e_shstrndx_Offset; // e_shstrndx in ELF header. + unsigned ELFHeader_e_shnum_Offset; // e_shnum in ELF header. + private: + void EmitGlobal(GlobalVariable *GV); + + void EmitSymbolTable(); + + void EmitSectionTableStringTable(); + void OutputSectionsAndSectionTable(); + }; +} + +#endif diff --git a/lib/CodeGen/GCMetadata.cpp b/lib/CodeGen/GCMetadata.cpp new file mode 100644 index 000000000000..cf2ebb39ad82 --- /dev/null +++ b/lib/CodeGen/GCMetadata.cpp @@ -0,0 +1,212 @@ +//===-- GCMetadata.cpp - Garbage collector metadata -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the GCFunctionInfo class and GCModuleInfo pass. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GCMetadata.h" +#include "llvm/CodeGen/GCStrategy.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/Pass.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Function.h" +#include "llvm/Support/Compiler.h" + +using namespace llvm; + +namespace { + + class VISIBILITY_HIDDEN Printer : public FunctionPass { + static char ID; + std::ostream &OS; + + public: + explicit Printer(std::ostream &OS = *cerr); + + const char *getPassName() const; + void getAnalysisUsage(AnalysisUsage &AU) const; + + bool runOnFunction(Function &F); + }; + + class VISIBILITY_HIDDEN Deleter : public FunctionPass { + static char ID; + + public: + Deleter(); + + const char *getPassName() const; + void getAnalysisUsage(AnalysisUsage &AU) const; + + bool runOnFunction(Function &F); + bool doFinalization(Module &M); + }; + +} + +static RegisterPass +X("collector-metadata", "Create Garbage Collector Module Metadata"); + +// ----------------------------------------------------------------------------- + +GCFunctionInfo::GCFunctionInfo(const Function &F, GCStrategy &S) + : F(F), S(S), FrameSize(~0LL) {} + +GCFunctionInfo::~GCFunctionInfo() {} + +// ----------------------------------------------------------------------------- + +char GCModuleInfo::ID = 0; + +GCModuleInfo::GCModuleInfo() + : ImmutablePass(&ID) {} + +GCModuleInfo::~GCModuleInfo() { + clear(); +} + +GCStrategy *GCModuleInfo::getOrCreateStrategy(const Module *M, + const std::string &Name) { + const char *Start = Name.c_str(); + + strategy_map_type::iterator NMI = + StrategyMap.find(Start, Start + Name.size()); + if (NMI != StrategyMap.end()) + return NMI->getValue(); + + for (GCRegistry::iterator I = GCRegistry::begin(), + E = GCRegistry::end(); I != E; ++I) { + if (strcmp(Start, I->getName()) == 0) { + GCStrategy *S = I->instantiate(); + S->M = M; + S->Name = Name; + StrategyMap.GetOrCreateValue(Start, Start + Name.size()).setValue(S); + StrategyList.push_back(S); + return S; + } + } + + cerr << "unsupported GC: " << Name << "\n"; + abort(); +} + +GCFunctionInfo &GCModuleInfo::getFunctionInfo(const Function &F) { + assert(!F.isDeclaration() && "Can only get GCFunctionInfo for a definition!"); + assert(F.hasGC()); + + finfo_map_type::iterator I = FInfoMap.find(&F); + if (I != FInfoMap.end()) + return *I->second; + + GCStrategy *S = getOrCreateStrategy(F.getParent(), F.getGC()); + GCFunctionInfo *GFI = S->insertFunctionInfo(F); + FInfoMap[&F] = GFI; + return *GFI; +} + +void GCModuleInfo::clear() { + FInfoMap.clear(); + StrategyMap.clear(); + + for (iterator I = begin(), E = end(); I != E; ++I) + delete *I; + StrategyList.clear(); +} + +// ----------------------------------------------------------------------------- + +char Printer::ID = 0; + +FunctionPass *llvm::createGCInfoPrinter(std::ostream &OS) { + return new Printer(OS); +} + +Printer::Printer(std::ostream &OS) + : FunctionPass(&ID), OS(OS) {} + +const char *Printer::getPassName() const { + return "Print Garbage Collector Information"; +} + +void Printer::getAnalysisUsage(AnalysisUsage &AU) const { + FunctionPass::getAnalysisUsage(AU); + AU.setPreservesAll(); + AU.addRequired(); +} + +static const char *DescKind(GC::PointKind Kind) { + switch (Kind) { + default: assert(0 && "Unknown GC point kind"); + case GC::Loop: return "loop"; + case GC::Return: return "return"; + case GC::PreCall: return "pre-call"; + case GC::PostCall: return "post-call"; + } +} + +bool Printer::runOnFunction(Function &F) { + if (!F.hasGC()) { + GCFunctionInfo *FD = &getAnalysis().getFunctionInfo(F); + + OS << "GC roots for " << FD->getFunction().getNameStart() << ":\n"; + for (GCFunctionInfo::roots_iterator RI = FD->roots_begin(), + RE = FD->roots_end(); RI != RE; ++RI) + OS << "\t" << RI->Num << "\t" << RI->StackOffset << "[sp]\n"; + + OS << "GC safe points for " << FD->getFunction().getNameStart() << ":\n"; + for (GCFunctionInfo::iterator PI = FD->begin(), + PE = FD->end(); PI != PE; ++PI) { + + OS << "\tlabel " << PI->Num << ": " << DescKind(PI->Kind) << ", live = {"; + + for (GCFunctionInfo::live_iterator RI = FD->live_begin(PI), + RE = FD->live_end(PI);;) { + OS << " " << RI->Num; + if (++RI == RE) + break; + OS << ","; + } + + OS << " }\n"; + } + } + + return false; +} + +// ----------------------------------------------------------------------------- + +char Deleter::ID = 0; + +FunctionPass *llvm::createGCInfoDeleter() { + return new Deleter(); +} + +Deleter::Deleter() : FunctionPass(&ID) {} + +const char *Deleter::getPassName() const { + return "Delete Garbage Collector Information"; +} + +void Deleter::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired(); +} + +bool Deleter::runOnFunction(Function &MF) { + return false; +} + +bool Deleter::doFinalization(Module &M) { + GCModuleInfo *GMI = getAnalysisIfAvailable(); + assert(GMI && "Deleter didn't require GCModuleInfo?!"); + GMI->clear(); + return false; +} diff --git a/lib/CodeGen/GCMetadataPrinter.cpp b/lib/CodeGen/GCMetadataPrinter.cpp new file mode 100644 index 000000000000..5a5ef84fa4eb --- /dev/null +++ b/lib/CodeGen/GCMetadataPrinter.cpp @@ -0,0 +1,30 @@ +//===-- GCMetadataPrinter.cpp - Garbage collection infrastructure ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the abstract base class GCMetadataPrinter. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GCMetadataPrinter.h" + +using namespace llvm; + +GCMetadataPrinter::GCMetadataPrinter() { } + +GCMetadataPrinter::~GCMetadataPrinter() { } + +void GCMetadataPrinter::beginAssembly(raw_ostream &OS, AsmPrinter &AP, + const TargetAsmInfo &TAI) { + // Default is no action. +} + +void GCMetadataPrinter::finishAssembly(raw_ostream &OS, AsmPrinter &AP, + const TargetAsmInfo &TAI) { + // Default is no action. +} diff --git a/lib/CodeGen/GCStrategy.cpp b/lib/CodeGen/GCStrategy.cpp new file mode 100644 index 000000000000..ad7421abc211 --- /dev/null +++ b/lib/CodeGen/GCStrategy.cpp @@ -0,0 +1,392 @@ +//===-- GCStrategy.cpp - Garbage collection infrastructure -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements target- and collector-independent garbage collection +// infrastructure. +// +// MachineCodeAnalysis identifies the GC safe points in the machine code. Roots +// are identified in SelectionDAGISel. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GCStrategy.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Module.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/Compiler.h" + +using namespace llvm; + +namespace { + + /// LowerIntrinsics - This pass rewrites calls to the llvm.gcread or + /// llvm.gcwrite intrinsics, replacing them with simple loads and stores as + /// directed by the GCStrategy. It also performs automatic root initialization + /// and custom intrinsic lowering. + class VISIBILITY_HIDDEN LowerIntrinsics : public FunctionPass { + static bool NeedsDefaultLoweringPass(const GCStrategy &C); + static bool NeedsCustomLoweringPass(const GCStrategy &C); + static bool CouldBecomeSafePoint(Instruction *I); + bool PerformDefaultLowering(Function &F, GCStrategy &Coll); + static bool InsertRootInitializers(Function &F, + AllocaInst **Roots, unsigned Count); + + public: + static char ID; + + LowerIntrinsics(); + const char *getPassName() const; + void getAnalysisUsage(AnalysisUsage &AU) const; + + bool doInitialization(Module &M); + bool runOnFunction(Function &F); + }; + + + /// MachineCodeAnalysis - This is a target-independent pass over the machine + /// function representation to identify safe points for the garbage collector + /// in the machine code. It inserts labels at safe points and populates a + /// GCMetadata record for each function. + class VISIBILITY_HIDDEN MachineCodeAnalysis : public MachineFunctionPass { + const TargetMachine *TM; + GCFunctionInfo *FI; + MachineModuleInfo *MMI; + const TargetInstrInfo *TII; + + void FindSafePoints(MachineFunction &MF); + void VisitCallPoint(MachineBasicBlock::iterator MI); + unsigned InsertLabel(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const; + + void FindStackOffsets(MachineFunction &MF); + + public: + static char ID; + + MachineCodeAnalysis(); + const char *getPassName() const; + void getAnalysisUsage(AnalysisUsage &AU) const; + + bool runOnMachineFunction(MachineFunction &MF); + }; + +} + +// ----------------------------------------------------------------------------- + +GCStrategy::GCStrategy() : + NeededSafePoints(0), + CustomReadBarriers(false), + CustomWriteBarriers(false), + CustomRoots(false), + InitRoots(true), + UsesMetadata(false) +{} + +GCStrategy::~GCStrategy() { + for (iterator I = begin(), E = end(); I != E; ++I) + delete *I; + + Functions.clear(); +} + +bool GCStrategy::initializeCustomLowering(Module &M) { return false; } + +bool GCStrategy::performCustomLowering(Function &F) { + cerr << "gc " << getName() << " must override performCustomLowering.\n"; + abort(); + return 0; +} + +GCFunctionInfo *GCStrategy::insertFunctionInfo(const Function &F) { + GCFunctionInfo *FI = new GCFunctionInfo(F, *this); + Functions.push_back(FI); + return FI; +} + +// ----------------------------------------------------------------------------- + +FunctionPass *llvm::createGCLoweringPass() { + return new LowerIntrinsics(); +} + +char LowerIntrinsics::ID = 0; + +LowerIntrinsics::LowerIntrinsics() + : FunctionPass(&ID) {} + +const char *LowerIntrinsics::getPassName() const { + return "Lower Garbage Collection Instructions"; +} + +void LowerIntrinsics::getAnalysisUsage(AnalysisUsage &AU) const { + FunctionPass::getAnalysisUsage(AU); + AU.addRequired(); +} + +/// doInitialization - If this module uses the GC intrinsics, find them now. +bool LowerIntrinsics::doInitialization(Module &M) { + // FIXME: This is rather antisocial in the context of a JIT since it performs + // work against the entire module. But this cannot be done at + // runFunction time (initializeCustomLowering likely needs to change + // the module). + GCModuleInfo *MI = getAnalysisIfAvailable(); + assert(MI && "LowerIntrinsics didn't require GCModuleInfo!?"); + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) + if (!I->isDeclaration() && I->hasGC()) + MI->getFunctionInfo(*I); // Instantiate the GC strategy. + + bool MadeChange = false; + for (GCModuleInfo::iterator I = MI->begin(), E = MI->end(); I != E; ++I) + if (NeedsCustomLoweringPass(**I)) + if ((*I)->initializeCustomLowering(M)) + MadeChange = true; + + return MadeChange; +} + +bool LowerIntrinsics::InsertRootInitializers(Function &F, AllocaInst **Roots, + unsigned Count) { + // Scroll past alloca instructions. + BasicBlock::iterator IP = F.getEntryBlock().begin(); + while (isa(IP)) ++IP; + + // Search for initializers in the initial BB. + SmallPtrSet InitedRoots; + for (; !CouldBecomeSafePoint(IP); ++IP) + if (StoreInst *SI = dyn_cast(IP)) + if (AllocaInst *AI = + dyn_cast(SI->getOperand(1)->stripPointerCasts())) + InitedRoots.insert(AI); + + // Add root initializers. + bool MadeChange = false; + + for (AllocaInst **I = Roots, **E = Roots + Count; I != E; ++I) + if (!InitedRoots.count(*I)) { + new StoreInst(ConstantPointerNull::get(cast( + cast((*I)->getType())->getElementType())), + *I, IP); + MadeChange = true; + } + + return MadeChange; +} + +bool LowerIntrinsics::NeedsDefaultLoweringPass(const GCStrategy &C) { + // Default lowering is necessary only if read or write barriers have a default + // action. The default for roots is no action. + return !C.customWriteBarrier() + || !C.customReadBarrier() + || C.initializeRoots(); +} + +bool LowerIntrinsics::NeedsCustomLoweringPass(const GCStrategy &C) { + // Custom lowering is only necessary if enabled for some action. + return C.customWriteBarrier() + || C.customReadBarrier() + || C.customRoots(); +} + +/// CouldBecomeSafePoint - Predicate to conservatively determine whether the +/// instruction could introduce a safe point. +bool LowerIntrinsics::CouldBecomeSafePoint(Instruction *I) { + // The natural definition of instructions which could introduce safe points + // are: + // + // - call, invoke (AfterCall, BeforeCall) + // - phis (Loops) + // - invoke, ret, unwind (Exit) + // + // However, instructions as seemingly inoccuous as arithmetic can become + // libcalls upon lowering (e.g., div i64 on a 32-bit platform), so instead + // it is necessary to take a conservative approach. + + if (isa(I) || isa(I) || + isa(I) || isa(I)) + return false; + + // llvm.gcroot is safe because it doesn't do anything at runtime. + if (CallInst *CI = dyn_cast(I)) + if (Function *F = CI->getCalledFunction()) + if (unsigned IID = F->getIntrinsicID()) + if (IID == Intrinsic::gcroot) + return false; + + return true; +} + +/// runOnFunction - Replace gcread/gcwrite intrinsics with loads and stores. +/// Leave gcroot intrinsics; the code generator needs to see those. +bool LowerIntrinsics::runOnFunction(Function &F) { + // Quick exit for functions that do not use GC. + if (!F.hasGC()) + return false; + + GCFunctionInfo &FI = getAnalysis().getFunctionInfo(F); + GCStrategy &S = FI.getStrategy(); + + bool MadeChange = false; + + if (NeedsDefaultLoweringPass(S)) + MadeChange |= PerformDefaultLowering(F, S); + + if (NeedsCustomLoweringPass(S)) + MadeChange |= S.performCustomLowering(F); + + return MadeChange; +} + +bool LowerIntrinsics::PerformDefaultLowering(Function &F, GCStrategy &S) { + bool LowerWr = !S.customWriteBarrier(); + bool LowerRd = !S.customReadBarrier(); + bool InitRoots = S.initializeRoots(); + + SmallVector Roots; + + bool MadeChange = false; + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { + for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) { + if (IntrinsicInst *CI = dyn_cast(II++)) { + Function *F = CI->getCalledFunction(); + switch (F->getIntrinsicID()) { + case Intrinsic::gcwrite: + if (LowerWr) { + // Replace a write barrier with a simple store. + Value *St = new StoreInst(CI->getOperand(1), CI->getOperand(3), CI); + CI->replaceAllUsesWith(St); + CI->eraseFromParent(); + } + break; + case Intrinsic::gcread: + if (LowerRd) { + // Replace a read barrier with a simple load. + Value *Ld = new LoadInst(CI->getOperand(2), "", CI); + Ld->takeName(CI); + CI->replaceAllUsesWith(Ld); + CI->eraseFromParent(); + } + break; + case Intrinsic::gcroot: + if (InitRoots) { + // Initialize the GC root, but do not delete the intrinsic. The + // backend needs the intrinsic to flag the stack slot. + Roots.push_back(cast( + CI->getOperand(1)->stripPointerCasts())); + } + break; + default: + continue; + } + + MadeChange = true; + } + } + } + + if (Roots.size()) + MadeChange |= InsertRootInitializers(F, Roots.begin(), Roots.size()); + + return MadeChange; +} + +// ----------------------------------------------------------------------------- + +FunctionPass *llvm::createGCMachineCodeAnalysisPass() { + return new MachineCodeAnalysis(); +} + +char MachineCodeAnalysis::ID = 0; + +MachineCodeAnalysis::MachineCodeAnalysis() + : MachineFunctionPass(&ID) {} + +const char *MachineCodeAnalysis::getPassName() const { + return "Analyze Machine Code For Garbage Collection"; +} + +void MachineCodeAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { + MachineFunctionPass::getAnalysisUsage(AU); + AU.setPreservesAll(); + AU.addRequired(); + AU.addRequired(); +} + +unsigned MachineCodeAnalysis::InsertLabel(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const { + unsigned Label = MMI->NextLabelID(); + // N.B. we assume that MI is *not* equal to the "end()" iterator. + BuildMI(MBB, MI, MI->getDebugLoc(), + TII->get(TargetInstrInfo::GC_LABEL)).addImm(Label); + return Label; +} + +void MachineCodeAnalysis::VisitCallPoint(MachineBasicBlock::iterator CI) { + // Find the return address (next instruction), too, so as to bracket the call + // instruction. + MachineBasicBlock::iterator RAI = CI; + ++RAI; + + if (FI->getStrategy().needsSafePoint(GC::PreCall)) + FI->addSafePoint(GC::PreCall, InsertLabel(*CI->getParent(), CI)); + + if (FI->getStrategy().needsSafePoint(GC::PostCall)) + FI->addSafePoint(GC::PostCall, InsertLabel(*CI->getParent(), RAI)); +} + +void MachineCodeAnalysis::FindSafePoints(MachineFunction &MF) { + for (MachineFunction::iterator BBI = MF.begin(), + BBE = MF.end(); BBI != BBE; ++BBI) + for (MachineBasicBlock::iterator MI = BBI->begin(), + ME = BBI->end(); MI != ME; ++MI) + if (MI->getDesc().isCall()) + VisitCallPoint(MI); +} + +void MachineCodeAnalysis::FindStackOffsets(MachineFunction &MF) { + const TargetRegisterInfo *TRI = TM->getRegisterInfo(); + assert(TRI && "TargetRegisterInfo not available!"); + + for (GCFunctionInfo::roots_iterator RI = FI->roots_begin(), + RE = FI->roots_end(); RI != RE; ++RI) + RI->StackOffset = TRI->getFrameIndexOffset(MF, RI->Num); +} + +bool MachineCodeAnalysis::runOnMachineFunction(MachineFunction &MF) { + // Quick exit for functions that do not use GC. + if (!MF.getFunction()->hasGC()) + return false; + + FI = &getAnalysis().getFunctionInfo(*MF.getFunction()); + if (!FI->getStrategy().needsSafePoints()) + return false; + + TM = &MF.getTarget(); + MMI = &getAnalysis(); + TII = TM->getInstrInfo(); + + // Find the size of the stack frame. + FI->setFrameSize(MF.getFrameInfo()->getStackSize()); + + // Find all safe points. + FindSafePoints(MF); + + // Find the stack offsets for all roots. + FindStackOffsets(MF); + + return false; +} diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp new file mode 100644 index 000000000000..1d0887f843d8 --- /dev/null +++ b/lib/CodeGen/IfConversion.cpp @@ -0,0 +1,1229 @@ +//===-- IfConversion.cpp - Machine code if conversion pass. ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the machine instruction level if-conversion pass. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "ifcvt" +#include "llvm/Function.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +using namespace llvm; + +// Hidden options for help debugging. +static cl::opt IfCvtFnStart("ifcvt-fn-start", cl::init(-1), cl::Hidden); +static cl::opt IfCvtFnStop("ifcvt-fn-stop", cl::init(-1), cl::Hidden); +static cl::opt IfCvtLimit("ifcvt-limit", cl::init(-1), cl::Hidden); +static cl::opt DisableSimple("disable-ifcvt-simple", + cl::init(false), cl::Hidden); +static cl::opt DisableSimpleF("disable-ifcvt-simple-false", + cl::init(false), cl::Hidden); +static cl::opt DisableTriangle("disable-ifcvt-triangle", + cl::init(false), cl::Hidden); +static cl::opt DisableTriangleR("disable-ifcvt-triangle-rev", + cl::init(false), cl::Hidden); +static cl::opt DisableTriangleF("disable-ifcvt-triangle-false", + cl::init(false), cl::Hidden); +static cl::opt DisableTriangleFR("disable-ifcvt-triangle-false-rev", + cl::init(false), cl::Hidden); +static cl::opt DisableDiamond("disable-ifcvt-diamond", + cl::init(false), cl::Hidden); + +STATISTIC(NumSimple, "Number of simple if-conversions performed"); +STATISTIC(NumSimpleFalse, "Number of simple (F) if-conversions performed"); +STATISTIC(NumTriangle, "Number of triangle if-conversions performed"); +STATISTIC(NumTriangleRev, "Number of triangle (R) if-conversions performed"); +STATISTIC(NumTriangleFalse,"Number of triangle (F) if-conversions performed"); +STATISTIC(NumTriangleFRev, "Number of triangle (F/R) if-conversions performed"); +STATISTIC(NumDiamonds, "Number of diamond if-conversions performed"); +STATISTIC(NumIfConvBBs, "Number of if-converted blocks"); +STATISTIC(NumDupBBs, "Number of duplicated blocks"); + +namespace { + class VISIBILITY_HIDDEN IfConverter : public MachineFunctionPass { + enum IfcvtKind { + ICNotClassfied, // BB data valid, but not classified. + ICSimpleFalse, // Same as ICSimple, but on the false path. + ICSimple, // BB is entry of an one split, no rejoin sub-CFG. + ICTriangleFRev, // Same as ICTriangleFalse, but false path rev condition. + ICTriangleRev, // Same as ICTriangle, but true path rev condition. + ICTriangleFalse, // Same as ICTriangle, but on the false path. + ICTriangle, // BB is entry of a triangle sub-CFG. + ICDiamond // BB is entry of a diamond sub-CFG. + }; + + /// BBInfo - One per MachineBasicBlock, this is used to cache the result + /// if-conversion feasibility analysis. This includes results from + /// TargetInstrInfo::AnalyzeBranch() (i.e. TBB, FBB, and Cond), and its + /// classification, and common tail block of its successors (if it's a + /// diamond shape), its size, whether it's predicable, and whether any + /// instruction can clobber the 'would-be' predicate. + /// + /// IsDone - True if BB is not to be considered for ifcvt. + /// IsBeingAnalyzed - True if BB is currently being analyzed. + /// IsAnalyzed - True if BB has been analyzed (info is still valid). + /// IsEnqueued - True if BB has been enqueued to be ifcvt'ed. + /// IsBrAnalyzable - True if AnalyzeBranch() returns false. + /// HasFallThrough - True if BB may fallthrough to the following BB. + /// IsUnpredicable - True if BB is known to be unpredicable. + /// ClobbersPred - True if BB could modify predicates (e.g. has + /// cmp, call, etc.) + /// NonPredSize - Number of non-predicated instructions. + /// BB - Corresponding MachineBasicBlock. + /// TrueBB / FalseBB- See AnalyzeBranch(). + /// BrCond - Conditions for end of block conditional branches. + /// Predicate - Predicate used in the BB. + struct BBInfo { + bool IsDone : 1; + bool IsBeingAnalyzed : 1; + bool IsAnalyzed : 1; + bool IsEnqueued : 1; + bool IsBrAnalyzable : 1; + bool HasFallThrough : 1; + bool IsUnpredicable : 1; + bool CannotBeCopied : 1; + bool ClobbersPred : 1; + unsigned NonPredSize; + MachineBasicBlock *BB; + MachineBasicBlock *TrueBB; + MachineBasicBlock *FalseBB; + SmallVector BrCond; + SmallVector Predicate; + BBInfo() : IsDone(false), IsBeingAnalyzed(false), + IsAnalyzed(false), IsEnqueued(false), IsBrAnalyzable(false), + HasFallThrough(false), IsUnpredicable(false), + CannotBeCopied(false), ClobbersPred(false), NonPredSize(0), + BB(0), TrueBB(0), FalseBB(0) {} + }; + + /// IfcvtToken - Record information about pending if-conversions to attemp: + /// BBI - Corresponding BBInfo. + /// Kind - Type of block. See IfcvtKind. + /// NeedSubsumption - True if the to-be-predicated BB has already been + /// predicated. + /// NumDups - Number of instructions that would be duplicated due + /// to this if-conversion. (For diamonds, the number of + /// identical instructions at the beginnings of both + /// paths). + /// NumDups2 - For diamonds, the number of identical instructions + /// at the ends of both paths. + struct IfcvtToken { + BBInfo &BBI; + IfcvtKind Kind; + bool NeedSubsumption; + unsigned NumDups; + unsigned NumDups2; + IfcvtToken(BBInfo &b, IfcvtKind k, bool s, unsigned d, unsigned d2 = 0) + : BBI(b), Kind(k), NeedSubsumption(s), NumDups(d), NumDups2(d2) {} + }; + + /// Roots - Basic blocks that do not have successors. These are the starting + /// points of Graph traversal. + std::vector Roots; + + /// BBAnalysis - Results of if-conversion feasibility analysis indexed by + /// basic block number. + std::vector BBAnalysis; + + const TargetLowering *TLI; + const TargetInstrInfo *TII; + bool MadeChange; + public: + static char ID; + IfConverter() : MachineFunctionPass(&ID) {} + + virtual bool runOnMachineFunction(MachineFunction &MF); + virtual const char *getPassName() const { return "If Converter"; } + + private: + bool ReverseBranchCondition(BBInfo &BBI); + bool ValidSimple(BBInfo &TrueBBI, unsigned &Dups) const; + bool ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI, + bool FalseBranch, unsigned &Dups) const; + bool ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI, + unsigned &Dups1, unsigned &Dups2) const; + void ScanInstructions(BBInfo &BBI); + BBInfo &AnalyzeBlock(MachineBasicBlock *BB, + std::vector &Tokens); + bool FeasibilityAnalysis(BBInfo &BBI, SmallVectorImpl &Cond, + bool isTriangle = false, bool RevBranch = false); + bool AnalyzeBlocks(MachineFunction &MF, + std::vector &Tokens); + void InvalidatePreds(MachineBasicBlock *BB); + void RemoveExtraEdges(BBInfo &BBI); + bool IfConvertSimple(BBInfo &BBI, IfcvtKind Kind); + bool IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind); + bool IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, + unsigned NumDups1, unsigned NumDups2); + void PredicateBlock(BBInfo &BBI, + MachineBasicBlock::iterator E, + SmallVectorImpl &Cond); + void CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, + SmallVectorImpl &Cond, + bool IgnoreBr = false); + void MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI); + + bool MeetIfcvtSizeLimit(unsigned Size) const { + return Size > 0 && Size <= TLI->getIfCvtBlockSizeLimit(); + } + + // blockAlwaysFallThrough - Block ends without a terminator. + bool blockAlwaysFallThrough(BBInfo &BBI) const { + return BBI.IsBrAnalyzable && BBI.TrueBB == NULL; + } + + // IfcvtTokenCmp - Used to sort if-conversion candidates. + static bool IfcvtTokenCmp(IfcvtToken *C1, IfcvtToken *C2) { + int Incr1 = (C1->Kind == ICDiamond) + ? -(int)(C1->NumDups + C1->NumDups2) : (int)C1->NumDups; + int Incr2 = (C2->Kind == ICDiamond) + ? -(int)(C2->NumDups + C2->NumDups2) : (int)C2->NumDups; + if (Incr1 > Incr2) + return true; + else if (Incr1 == Incr2) { + // Favors subsumption. + if (C1->NeedSubsumption == false && C2->NeedSubsumption == true) + return true; + else if (C1->NeedSubsumption == C2->NeedSubsumption) { + // Favors diamond over triangle, etc. + if ((unsigned)C1->Kind < (unsigned)C2->Kind) + return true; + else if (C1->Kind == C2->Kind) + return C1->BBI.BB->getNumber() < C2->BBI.BB->getNumber(); + } + } + return false; + } + }; + + char IfConverter::ID = 0; +} + +static RegisterPass +X("if-converter", "If Converter"); + +FunctionPass *llvm::createIfConverterPass() { return new IfConverter(); } + +bool IfConverter::runOnMachineFunction(MachineFunction &MF) { + TLI = MF.getTarget().getTargetLowering(); + TII = MF.getTarget().getInstrInfo(); + if (!TII) return false; + + static int FnNum = -1; + DOUT << "\nIfcvt: function (" << ++FnNum << ") \'" + << MF.getFunction()->getName() << "\'"; + + if (FnNum < IfCvtFnStart || (IfCvtFnStop != -1 && FnNum > IfCvtFnStop)) { + DOUT << " skipped\n"; + return false; + } + DOUT << "\n"; + + MF.RenumberBlocks(); + BBAnalysis.resize(MF.getNumBlockIDs()); + + // Look for root nodes, i.e. blocks without successors. + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) + if (I->succ_empty()) + Roots.push_back(I); + + std::vector Tokens; + MadeChange = false; + unsigned NumIfCvts = NumSimple + NumSimpleFalse + NumTriangle + + NumTriangleRev + NumTriangleFalse + NumTriangleFRev + NumDiamonds; + while (IfCvtLimit == -1 || (int)NumIfCvts < IfCvtLimit) { + // Do an initial analysis for each basic block and find all the potential + // candidates to perform if-conversion. + bool Change = AnalyzeBlocks(MF, Tokens); + while (!Tokens.empty()) { + IfcvtToken *Token = Tokens.back(); + Tokens.pop_back(); + BBInfo &BBI = Token->BBI; + IfcvtKind Kind = Token->Kind; + unsigned NumDups = Token->NumDups; + unsigned NumDups2 = Token->NumDups2; + + delete Token; + + // If the block has been evicted out of the queue or it has already been + // marked dead (due to it being predicated), then skip it. + if (BBI.IsDone) + BBI.IsEnqueued = false; + if (!BBI.IsEnqueued) + continue; + + BBI.IsEnqueued = false; + + bool RetVal = false; + switch (Kind) { + default: assert(false && "Unexpected!"); + break; + case ICSimple: + case ICSimpleFalse: { + bool isFalse = Kind == ICSimpleFalse; + if ((isFalse && DisableSimpleF) || (!isFalse && DisableSimple)) break; + DOUT << "Ifcvt (Simple" << (Kind == ICSimpleFalse ? " false" :"") + << "): BB#" << BBI.BB->getNumber() << " (" + << ((Kind == ICSimpleFalse) + ? BBI.FalseBB->getNumber() + : BBI.TrueBB->getNumber()) << ") "; + RetVal = IfConvertSimple(BBI, Kind); + DOUT << (RetVal ? "succeeded!" : "failed!") << "\n"; + if (RetVal) { + if (isFalse) NumSimpleFalse++; + else NumSimple++; + } + break; + } + case ICTriangle: + case ICTriangleRev: + case ICTriangleFalse: + case ICTriangleFRev: { + bool isFalse = Kind == ICTriangleFalse; + bool isRev = (Kind == ICTriangleRev || Kind == ICTriangleFRev); + if (DisableTriangle && !isFalse && !isRev) break; + if (DisableTriangleR && !isFalse && isRev) break; + if (DisableTriangleF && isFalse && !isRev) break; + if (DisableTriangleFR && isFalse && isRev) break; + DOUT << "Ifcvt (Triangle"; + if (isFalse) + DOUT << " false"; + if (isRev) + DOUT << " rev"; + DOUT << "): BB#" << BBI.BB->getNumber() << " (T:" + << BBI.TrueBB->getNumber() << ",F:" + << BBI.FalseBB->getNumber() << ") "; + RetVal = IfConvertTriangle(BBI, Kind); + DOUT << (RetVal ? "succeeded!" : "failed!") << "\n"; + if (RetVal) { + if (isFalse) { + if (isRev) NumTriangleFRev++; + else NumTriangleFalse++; + } else { + if (isRev) NumTriangleRev++; + else NumTriangle++; + } + } + break; + } + case ICDiamond: { + if (DisableDiamond) break; + DOUT << "Ifcvt (Diamond): BB#" << BBI.BB->getNumber() << " (T:" + << BBI.TrueBB->getNumber() << ",F:" + << BBI.FalseBB->getNumber() << ") "; + RetVal = IfConvertDiamond(BBI, Kind, NumDups, NumDups2); + DOUT << (RetVal ? "succeeded!" : "failed!") << "\n"; + if (RetVal) NumDiamonds++; + break; + } + } + + Change |= RetVal; + + NumIfCvts = NumSimple + NumSimpleFalse + NumTriangle + NumTriangleRev + + NumTriangleFalse + NumTriangleFRev + NumDiamonds; + if (IfCvtLimit != -1 && (int)NumIfCvts >= IfCvtLimit) + break; + } + + if (!Change) + break; + MadeChange |= Change; + } + + // Delete tokens in case of early exit. + while (!Tokens.empty()) { + IfcvtToken *Token = Tokens.back(); + Tokens.pop_back(); + delete Token; + } + + Tokens.clear(); + Roots.clear(); + BBAnalysis.clear(); + + return MadeChange; +} + +/// findFalseBlock - BB has a fallthrough. Find its 'false' successor given +/// its 'true' successor. +static MachineBasicBlock *findFalseBlock(MachineBasicBlock *BB, + MachineBasicBlock *TrueBB) { + for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), + E = BB->succ_end(); SI != E; ++SI) { + MachineBasicBlock *SuccBB = *SI; + if (SuccBB != TrueBB) + return SuccBB; + } + return NULL; +} + +/// ReverseBranchCondition - Reverse the condition of the end of the block +/// branch. Swap block's 'true' and 'false' successors. +bool IfConverter::ReverseBranchCondition(BBInfo &BBI) { + if (!TII->ReverseBranchCondition(BBI.BrCond)) { + TII->RemoveBranch(*BBI.BB); + TII->InsertBranch(*BBI.BB, BBI.FalseBB, BBI.TrueBB, BBI.BrCond); + std::swap(BBI.TrueBB, BBI.FalseBB); + return true; + } + return false; +} + +/// getNextBlock - Returns the next block in the function blocks ordering. If +/// it is the end, returns NULL. +static inline MachineBasicBlock *getNextBlock(MachineBasicBlock *BB) { + MachineFunction::iterator I = BB; + MachineFunction::iterator E = BB->getParent()->end(); + if (++I == E) + return NULL; + return I; +} + +/// ValidSimple - Returns true if the 'true' block (along with its +/// predecessor) forms a valid simple shape for ifcvt. It also returns the +/// number of instructions that the ifcvt would need to duplicate if performed +/// in Dups. +bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups) const { + Dups = 0; + if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone) + return false; + + if (TrueBBI.IsBrAnalyzable) + return false; + + if (TrueBBI.BB->pred_size() > 1) { + if (TrueBBI.CannotBeCopied || + TrueBBI.NonPredSize > TLI->getIfCvtDupBlockSizeLimit()) + return false; + Dups = TrueBBI.NonPredSize; + } + + return true; +} + +/// ValidTriangle - Returns true if the 'true' and 'false' blocks (along +/// with their common predecessor) forms a valid triangle shape for ifcvt. +/// If 'FalseBranch' is true, it checks if 'true' block's false branch +/// branches to the false branch rather than the other way around. It also +/// returns the number of instructions that the ifcvt would need to duplicate +/// if performed in 'Dups'. +bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI, + bool FalseBranch, unsigned &Dups) const { + Dups = 0; + if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone) + return false; + + if (TrueBBI.BB->pred_size() > 1) { + if (TrueBBI.CannotBeCopied) + return false; + + unsigned Size = TrueBBI.NonPredSize; + if (TrueBBI.IsBrAnalyzable) { + if (TrueBBI.TrueBB && TrueBBI.BrCond.empty()) + // Ends with an unconditional branch. It will be removed. + --Size; + else { + MachineBasicBlock *FExit = FalseBranch + ? TrueBBI.TrueBB : TrueBBI.FalseBB; + if (FExit) + // Require a conditional branch + ++Size; + } + } + if (Size > TLI->getIfCvtDupBlockSizeLimit()) + return false; + Dups = Size; + } + + MachineBasicBlock *TExit = FalseBranch ? TrueBBI.FalseBB : TrueBBI.TrueBB; + if (!TExit && blockAlwaysFallThrough(TrueBBI)) { + MachineFunction::iterator I = TrueBBI.BB; + if (++I == TrueBBI.BB->getParent()->end()) + return false; + TExit = I; + } + return TExit && TExit == FalseBBI.BB; +} + +static +MachineBasicBlock::iterator firstNonBranchInst(MachineBasicBlock *BB, + const TargetInstrInfo *TII) { + MachineBasicBlock::iterator I = BB->end(); + while (I != BB->begin()) { + --I; + if (!I->getDesc().isBranch()) + break; + } + return I; +} + +/// ValidDiamond - Returns true if the 'true' and 'false' blocks (along +/// with their common predecessor) forms a valid diamond shape for ifcvt. +bool IfConverter::ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI, + unsigned &Dups1, unsigned &Dups2) const { + Dups1 = Dups2 = 0; + if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone || + FalseBBI.IsBeingAnalyzed || FalseBBI.IsDone) + return false; + + MachineBasicBlock *TT = TrueBBI.TrueBB; + MachineBasicBlock *FT = FalseBBI.TrueBB; + + if (!TT && blockAlwaysFallThrough(TrueBBI)) + TT = getNextBlock(TrueBBI.BB); + if (!FT && blockAlwaysFallThrough(FalseBBI)) + FT = getNextBlock(FalseBBI.BB); + if (TT != FT) + return false; + if (TT == NULL && (TrueBBI.IsBrAnalyzable || FalseBBI.IsBrAnalyzable)) + return false; + if (TrueBBI.BB->pred_size() > 1 || FalseBBI.BB->pred_size() > 1) + return false; + + // FIXME: Allow true block to have an early exit? + if (TrueBBI.FalseBB || FalseBBI.FalseBB || + (TrueBBI.ClobbersPred && FalseBBI.ClobbersPred)) + return false; + + MachineBasicBlock::iterator TI = TrueBBI.BB->begin(); + MachineBasicBlock::iterator FI = FalseBBI.BB->begin(); + while (TI != TrueBBI.BB->end() && FI != FalseBBI.BB->end()) { + if (!TI->isIdenticalTo(FI)) + break; + ++Dups1; + ++TI; + ++FI; + } + + TI = firstNonBranchInst(TrueBBI.BB, TII); + FI = firstNonBranchInst(FalseBBI.BB, TII); + while (TI != TrueBBI.BB->begin() && FI != FalseBBI.BB->begin()) { + if (!TI->isIdenticalTo(FI)) + break; + ++Dups2; + --TI; + --FI; + } + + return true; +} + +/// ScanInstructions - Scan all the instructions in the block to determine if +/// the block is predicable. In most cases, that means all the instructions +/// in the block are isPredicable(). Also checks if the block contains any +/// instruction which can clobber a predicate (e.g. condition code register). +/// If so, the block is not predicable unless it's the last instruction. +void IfConverter::ScanInstructions(BBInfo &BBI) { + if (BBI.IsDone) + return; + + bool AlreadyPredicated = BBI.Predicate.size() > 0; + // First analyze the end of BB branches. + BBI.TrueBB = BBI.FalseBB = NULL; + BBI.BrCond.clear(); + BBI.IsBrAnalyzable = + !TII->AnalyzeBranch(*BBI.BB, BBI.TrueBB, BBI.FalseBB, BBI.BrCond); + BBI.HasFallThrough = BBI.IsBrAnalyzable && BBI.FalseBB == NULL; + + if (BBI.BrCond.size()) { + // No false branch. This BB must end with a conditional branch and a + // fallthrough. + if (!BBI.FalseBB) + BBI.FalseBB = findFalseBlock(BBI.BB, BBI.TrueBB); + assert(BBI.FalseBB && "Expected to find the fallthrough block!"); + } + + // Then scan all the instructions. + BBI.NonPredSize = 0; + BBI.ClobbersPred = false; + for (MachineBasicBlock::iterator I = BBI.BB->begin(), E = BBI.BB->end(); + I != E; ++I) { + const TargetInstrDesc &TID = I->getDesc(); + if (TID.isNotDuplicable()) + BBI.CannotBeCopied = true; + + bool isPredicated = TII->isPredicated(I); + bool isCondBr = BBI.IsBrAnalyzable && TID.isConditionalBranch(); + + if (!isCondBr) { + if (!isPredicated) + BBI.NonPredSize++; + else if (!AlreadyPredicated) { + // FIXME: This instruction is already predicated before the + // if-conversion pass. It's probably something like a conditional move. + // Mark this block unpredicable for now. + BBI.IsUnpredicable = true; + return; + } + } + + if (BBI.ClobbersPred && !isPredicated) { + // Predicate modification instruction should end the block (except for + // already predicated instructions and end of block branches). + if (isCondBr) { + // A conditional branch is not predicable, but it may be eliminated. + continue; + } + + // Predicate may have been modified, the subsequent (currently) + // unpredicated instructions cannot be correctly predicated. + BBI.IsUnpredicable = true; + return; + } + + // FIXME: Make use of PredDefs? e.g. ADDC, SUBC sets predicates but are + // still potentially predicable. + std::vector PredDefs; + if (TII->DefinesPredicate(I, PredDefs)) + BBI.ClobbersPred = true; + + if (!TID.isPredicable()) { + BBI.IsUnpredicable = true; + return; + } + } +} + +/// FeasibilityAnalysis - Determine if the block is a suitable candidate to be +/// predicated by the specified predicate. +bool IfConverter::FeasibilityAnalysis(BBInfo &BBI, + SmallVectorImpl &Pred, + bool isTriangle, bool RevBranch) { + // If the block is dead or unpredicable, then it cannot be predicated. + if (BBI.IsDone || BBI.IsUnpredicable) + return false; + + // If it is already predicated, check if its predicate subsumes the new + // predicate. + if (BBI.Predicate.size() && !TII->SubsumesPredicate(BBI.Predicate, Pred)) + return false; + + if (BBI.BrCond.size()) { + if (!isTriangle) + return false; + + // Test predicate subsumption. + SmallVector RevPred(Pred.begin(), Pred.end()); + SmallVector Cond(BBI.BrCond.begin(), BBI.BrCond.end()); + if (RevBranch) { + if (TII->ReverseBranchCondition(Cond)) + return false; + } + if (TII->ReverseBranchCondition(RevPred) || + !TII->SubsumesPredicate(Cond, RevPred)) + return false; + } + + return true; +} + +/// AnalyzeBlock - Analyze the structure of the sub-CFG starting from +/// the specified block. Record its successors and whether it looks like an +/// if-conversion candidate. +IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB, + std::vector &Tokens) { + BBInfo &BBI = BBAnalysis[BB->getNumber()]; + + if (BBI.IsAnalyzed || BBI.IsBeingAnalyzed) + return BBI; + + BBI.BB = BB; + BBI.IsBeingAnalyzed = true; + + ScanInstructions(BBI); + + // Unanalyzable or ends with fallthrough or unconditional branch. + if (!BBI.IsBrAnalyzable || BBI.BrCond.empty()) { + BBI.IsBeingAnalyzed = false; + BBI.IsAnalyzed = true; + return BBI; + } + + // Do not ifcvt if either path is a back edge to the entry block. + if (BBI.TrueBB == BB || BBI.FalseBB == BB) { + BBI.IsBeingAnalyzed = false; + BBI.IsAnalyzed = true; + return BBI; + } + + BBInfo &TrueBBI = AnalyzeBlock(BBI.TrueBB, Tokens); + BBInfo &FalseBBI = AnalyzeBlock(BBI.FalseBB, Tokens); + + if (TrueBBI.IsDone && FalseBBI.IsDone) { + BBI.IsBeingAnalyzed = false; + BBI.IsAnalyzed = true; + return BBI; + } + + SmallVector RevCond(BBI.BrCond.begin(), BBI.BrCond.end()); + bool CanRevCond = !TII->ReverseBranchCondition(RevCond); + + unsigned Dups = 0; + unsigned Dups2 = 0; + bool TNeedSub = TrueBBI.Predicate.size() > 0; + bool FNeedSub = FalseBBI.Predicate.size() > 0; + bool Enqueued = false; + if (CanRevCond && ValidDiamond(TrueBBI, FalseBBI, Dups, Dups2) && + MeetIfcvtSizeLimit(TrueBBI.NonPredSize - (Dups + Dups2)) && + MeetIfcvtSizeLimit(FalseBBI.NonPredSize - (Dups + Dups2)) && + FeasibilityAnalysis(TrueBBI, BBI.BrCond) && + FeasibilityAnalysis(FalseBBI, RevCond)) { + // Diamond: + // EBB + // / \_ + // | | + // TBB FBB + // \ / + // TailBB + // Note TailBB can be empty. + Tokens.push_back(new IfcvtToken(BBI, ICDiamond, TNeedSub|FNeedSub, Dups, + Dups2)); + Enqueued = true; + } + + if (ValidTriangle(TrueBBI, FalseBBI, false, Dups) && + MeetIfcvtSizeLimit(TrueBBI.NonPredSize) && + FeasibilityAnalysis(TrueBBI, BBI.BrCond, true)) { + // Triangle: + // EBB + // | \_ + // | | + // | TBB + // | / + // FBB + Tokens.push_back(new IfcvtToken(BBI, ICTriangle, TNeedSub, Dups)); + Enqueued = true; + } + + if (ValidTriangle(TrueBBI, FalseBBI, true, Dups) && + MeetIfcvtSizeLimit(TrueBBI.NonPredSize) && + FeasibilityAnalysis(TrueBBI, BBI.BrCond, true, true)) { + Tokens.push_back(new IfcvtToken(BBI, ICTriangleRev, TNeedSub, Dups)); + Enqueued = true; + } + + if (ValidSimple(TrueBBI, Dups) && + MeetIfcvtSizeLimit(TrueBBI.NonPredSize) && + FeasibilityAnalysis(TrueBBI, BBI.BrCond)) { + // Simple (split, no rejoin): + // EBB + // | \_ + // | | + // | TBB---> exit + // | + // FBB + Tokens.push_back(new IfcvtToken(BBI, ICSimple, TNeedSub, Dups)); + Enqueued = true; + } + + if (CanRevCond) { + // Try the other path... + if (ValidTriangle(FalseBBI, TrueBBI, false, Dups) && + MeetIfcvtSizeLimit(FalseBBI.NonPredSize) && + FeasibilityAnalysis(FalseBBI, RevCond, true)) { + Tokens.push_back(new IfcvtToken(BBI, ICTriangleFalse, FNeedSub, Dups)); + Enqueued = true; + } + + if (ValidTriangle(FalseBBI, TrueBBI, true, Dups) && + MeetIfcvtSizeLimit(FalseBBI.NonPredSize) && + FeasibilityAnalysis(FalseBBI, RevCond, true, true)) { + Tokens.push_back(new IfcvtToken(BBI, ICTriangleFRev, FNeedSub, Dups)); + Enqueued = true; + } + + if (ValidSimple(FalseBBI, Dups) && + MeetIfcvtSizeLimit(FalseBBI.NonPredSize) && + FeasibilityAnalysis(FalseBBI, RevCond)) { + Tokens.push_back(new IfcvtToken(BBI, ICSimpleFalse, FNeedSub, Dups)); + Enqueued = true; + } + } + + BBI.IsEnqueued = Enqueued; + BBI.IsBeingAnalyzed = false; + BBI.IsAnalyzed = true; + return BBI; +} + +/// AnalyzeBlocks - Analyze all blocks and find entries for all if-conversion +/// candidates. It returns true if any CFG restructuring is done to expose more +/// if-conversion opportunities. +bool IfConverter::AnalyzeBlocks(MachineFunction &MF, + std::vector &Tokens) { + bool Change = false; + std::set Visited; + for (unsigned i = 0, e = Roots.size(); i != e; ++i) { + for (idf_ext_iterator I=idf_ext_begin(Roots[i],Visited), + E = idf_ext_end(Roots[i], Visited); I != E; ++I) { + MachineBasicBlock *BB = *I; + AnalyzeBlock(BB, Tokens); + } + } + + // Sort to favor more complex ifcvt scheme. + std::stable_sort(Tokens.begin(), Tokens.end(), IfcvtTokenCmp); + + return Change; +} + +/// canFallThroughTo - Returns true either if ToBB is the next block after BB or +/// that all the intervening blocks are empty (given BB can fall through to its +/// next block). +static bool canFallThroughTo(MachineBasicBlock *BB, MachineBasicBlock *ToBB) { + MachineFunction::iterator I = BB; + MachineFunction::iterator TI = ToBB; + MachineFunction::iterator E = BB->getParent()->end(); + while (++I != TI) + if (I == E || !I->empty()) + return false; + return true; +} + +/// InvalidatePreds - Invalidate predecessor BB info so it would be re-analyzed +/// to determine if it can be if-converted. If predecessor is already enqueued, +/// dequeue it! +void IfConverter::InvalidatePreds(MachineBasicBlock *BB) { + for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(), + E = BB->pred_end(); PI != E; ++PI) { + BBInfo &PBBI = BBAnalysis[(*PI)->getNumber()]; + if (PBBI.IsDone || PBBI.BB == BB) + continue; + PBBI.IsAnalyzed = false; + PBBI.IsEnqueued = false; + } +} + +/// InsertUncondBranch - Inserts an unconditional branch from BB to ToBB. +/// +static void InsertUncondBranch(MachineBasicBlock *BB, MachineBasicBlock *ToBB, + const TargetInstrInfo *TII) { + SmallVector NoCond; + TII->InsertBranch(*BB, ToBB, NULL, NoCond); +} + +/// RemoveExtraEdges - Remove true / false edges if either / both are no longer +/// successors. +void IfConverter::RemoveExtraEdges(BBInfo &BBI) { + MachineBasicBlock *TBB = NULL, *FBB = NULL; + SmallVector Cond; + if (!TII->AnalyzeBranch(*BBI.BB, TBB, FBB, Cond)) + BBI.BB->CorrectExtraCFGEdges(TBB, FBB, !Cond.empty()); +} + +/// IfConvertSimple - If convert a simple (split, no rejoin) sub-CFG. +/// +bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) { + BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()]; + BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()]; + BBInfo *CvtBBI = &TrueBBI; + BBInfo *NextBBI = &FalseBBI; + + SmallVector Cond(BBI.BrCond.begin(), BBI.BrCond.end()); + if (Kind == ICSimpleFalse) + std::swap(CvtBBI, NextBBI); + + if (CvtBBI->IsDone || + (CvtBBI->CannotBeCopied && CvtBBI->BB->pred_size() > 1)) { + // Something has changed. It's no longer safe to predicate this block. + BBI.IsAnalyzed = false; + CvtBBI->IsAnalyzed = false; + return false; + } + + if (Kind == ICSimpleFalse) + if (TII->ReverseBranchCondition(Cond)) + assert(false && "Unable to reverse branch condition!"); + + if (CvtBBI->BB->pred_size() > 1) { + BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); + // Copy instructions in the true block, predicate them, and add them to + // the entry block. + CopyAndPredicateBlock(BBI, *CvtBBI, Cond); + } else { + PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond); + + // Merge converted block into entry block. + BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); + MergeBlocks(BBI, *CvtBBI); + } + + bool IterIfcvt = true; + if (!canFallThroughTo(BBI.BB, NextBBI->BB)) { + InsertUncondBranch(BBI.BB, NextBBI->BB, TII); + BBI.HasFallThrough = false; + // Now ifcvt'd block will look like this: + // BB: + // ... + // t, f = cmp + // if t op + // b BBf + // + // We cannot further ifcvt this block because the unconditional branch + // will have to be predicated on the new condition, that will not be + // available if cmp executes. + IterIfcvt = false; + } + + RemoveExtraEdges(BBI); + + // Update block info. BB can be iteratively if-converted. + if (!IterIfcvt) + BBI.IsDone = true; + InvalidatePreds(BBI.BB); + CvtBBI->IsDone = true; + + // FIXME: Must maintain LiveIns. + return true; +} + +/// IfConvertTriangle - If convert a triangle sub-CFG. +/// +bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { + BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()]; + BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()]; + BBInfo *CvtBBI = &TrueBBI; + BBInfo *NextBBI = &FalseBBI; + + SmallVector Cond(BBI.BrCond.begin(), BBI.BrCond.end()); + if (Kind == ICTriangleFalse || Kind == ICTriangleFRev) + std::swap(CvtBBI, NextBBI); + + if (CvtBBI->IsDone || + (CvtBBI->CannotBeCopied && CvtBBI->BB->pred_size() > 1)) { + // Something has changed. It's no longer safe to predicate this block. + BBI.IsAnalyzed = false; + CvtBBI->IsAnalyzed = false; + return false; + } + + if (Kind == ICTriangleFalse || Kind == ICTriangleFRev) + if (TII->ReverseBranchCondition(Cond)) + assert(false && "Unable to reverse branch condition!"); + + if (Kind == ICTriangleRev || Kind == ICTriangleFRev) { + if (ReverseBranchCondition(*CvtBBI)) { + // BB has been changed, modify its predecessors (except for this + // one) so they don't get ifcvt'ed based on bad intel. + for (MachineBasicBlock::pred_iterator PI = CvtBBI->BB->pred_begin(), + E = CvtBBI->BB->pred_end(); PI != E; ++PI) { + MachineBasicBlock *PBB = *PI; + if (PBB == BBI.BB) + continue; + BBInfo &PBBI = BBAnalysis[PBB->getNumber()]; + if (PBBI.IsEnqueued) { + PBBI.IsAnalyzed = false; + PBBI.IsEnqueued = false; + } + } + } + } + + bool HasEarlyExit = CvtBBI->FalseBB != NULL; + bool DupBB = CvtBBI->BB->pred_size() > 1; + if (DupBB) { + BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); + // Copy instructions in the true block, predicate them, and add them to + // the entry block. + CopyAndPredicateBlock(BBI, *CvtBBI, Cond, true); + } else { + // Predicate the 'true' block after removing its branch. + CvtBBI->NonPredSize -= TII->RemoveBranch(*CvtBBI->BB); + PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond); + + // Now merge the entry of the triangle with the true block. + BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); + MergeBlocks(BBI, *CvtBBI); + } + + // If 'true' block has a 'false' successor, add an exit branch to it. + if (HasEarlyExit) { + SmallVector RevCond(CvtBBI->BrCond.begin(), + CvtBBI->BrCond.end()); + if (TII->ReverseBranchCondition(RevCond)) + assert(false && "Unable to reverse branch condition!"); + TII->InsertBranch(*BBI.BB, CvtBBI->FalseBB, NULL, RevCond); + BBI.BB->addSuccessor(CvtBBI->FalseBB); + } + + // Merge in the 'false' block if the 'false' block has no other + // predecessors. Otherwise, add an unconditional branch to 'false'. + bool FalseBBDead = false; + bool IterIfcvt = true; + bool isFallThrough = canFallThroughTo(BBI.BB, NextBBI->BB); + if (!isFallThrough) { + // Only merge them if the true block does not fallthrough to the false + // block. By not merging them, we make it possible to iteratively + // ifcvt the blocks. + if (!HasEarlyExit && + NextBBI->BB->pred_size() == 1 && !NextBBI->HasFallThrough) { + MergeBlocks(BBI, *NextBBI); + FalseBBDead = true; + } else { + InsertUncondBranch(BBI.BB, NextBBI->BB, TII); + BBI.HasFallThrough = false; + } + // Mixed predicated and unpredicated code. This cannot be iteratively + // predicated. + IterIfcvt = false; + } + + RemoveExtraEdges(BBI); + + // Update block info. BB can be iteratively if-converted. + if (!IterIfcvt) + BBI.IsDone = true; + InvalidatePreds(BBI.BB); + CvtBBI->IsDone = true; + if (FalseBBDead) + NextBBI->IsDone = true; + + // FIXME: Must maintain LiveIns. + return true; +} + +/// IfConvertDiamond - If convert a diamond sub-CFG. +/// +bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, + unsigned NumDups1, unsigned NumDups2) { + BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()]; + BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()]; + MachineBasicBlock *TailBB = TrueBBI.TrueBB; + // True block must fall through or end with an unanalyzable terminator. + if (!TailBB) { + if (blockAlwaysFallThrough(TrueBBI)) + TailBB = FalseBBI.TrueBB; + assert((TailBB || !TrueBBI.IsBrAnalyzable) && "Unexpected!"); + } + + if (TrueBBI.IsDone || FalseBBI.IsDone || + TrueBBI.BB->pred_size() > 1 || + FalseBBI.BB->pred_size() > 1) { + // Something has changed. It's no longer safe to predicate these blocks. + BBI.IsAnalyzed = false; + TrueBBI.IsAnalyzed = false; + FalseBBI.IsAnalyzed = false; + return false; + } + + // Merge the 'true' and 'false' blocks by copying the instructions + // from the 'false' block to the 'true' block. That is, unless the true + // block would clobber the predicate, in that case, do the opposite. + BBInfo *BBI1 = &TrueBBI; + BBInfo *BBI2 = &FalseBBI; + SmallVector RevCond(BBI.BrCond.begin(), BBI.BrCond.end()); + if (TII->ReverseBranchCondition(RevCond)) + assert(false && "Unable to reverse branch condition!"); + SmallVector *Cond1 = &BBI.BrCond; + SmallVector *Cond2 = &RevCond; + + // Figure out the more profitable ordering. + bool DoSwap = false; + if (TrueBBI.ClobbersPred && !FalseBBI.ClobbersPred) + DoSwap = true; + else if (TrueBBI.ClobbersPred == FalseBBI.ClobbersPred) { + if (TrueBBI.NonPredSize > FalseBBI.NonPredSize) + DoSwap = true; + } + if (DoSwap) { + std::swap(BBI1, BBI2); + std::swap(Cond1, Cond2); + } + + // Remove the conditional branch from entry to the blocks. + BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); + + // Remove the duplicated instructions at the beginnings of both paths. + MachineBasicBlock::iterator DI1 = BBI1->BB->begin(); + MachineBasicBlock::iterator DI2 = BBI2->BB->begin(); + BBI1->NonPredSize -= NumDups1; + BBI2->NonPredSize -= NumDups1; + while (NumDups1 != 0) { + ++DI1; + ++DI2; + --NumDups1; + } + BBI.BB->splice(BBI.BB->end(), BBI1->BB, BBI1->BB->begin(), DI1); + BBI2->BB->erase(BBI2->BB->begin(), DI2); + + // Predicate the 'true' block after removing its branch. + BBI1->NonPredSize -= TII->RemoveBranch(*BBI1->BB); + DI1 = BBI1->BB->end(); + for (unsigned i = 0; i != NumDups2; ++i) + --DI1; + BBI1->BB->erase(DI1, BBI1->BB->end()); + PredicateBlock(*BBI1, BBI1->BB->end(), *Cond1); + + // Predicate the 'false' block. + BBI2->NonPredSize -= TII->RemoveBranch(*BBI2->BB); + DI2 = BBI2->BB->end(); + while (NumDups2 != 0) { + --DI2; + --NumDups2; + } + PredicateBlock(*BBI2, DI2, *Cond2); + + // Merge the true block into the entry of the diamond. + MergeBlocks(BBI, *BBI1); + MergeBlocks(BBI, *BBI2); + + // If the if-converted block falls through or unconditionally branches into + // the tail block, and the tail block does not have other predecessors, then + // fold the tail block in as well. Otherwise, unless it falls through to the + // tail, add a unconditional branch to it. + if (TailBB) { + BBInfo TailBBI = BBAnalysis[TailBB->getNumber()]; + if (TailBB->pred_size() == 1 && !TailBBI.HasFallThrough) { + BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); + MergeBlocks(BBI, TailBBI); + TailBBI.IsDone = true; + } else { + InsertUncondBranch(BBI.BB, TailBB, TII); + BBI.HasFallThrough = false; + } + } + + RemoveExtraEdges(BBI); + + // Update block info. + BBI.IsDone = TrueBBI.IsDone = FalseBBI.IsDone = true; + InvalidatePreds(BBI.BB); + + // FIXME: Must maintain LiveIns. + return true; +} + +/// PredicateBlock - Predicate instructions from the start of the block to the +/// specified end with the specified condition. +void IfConverter::PredicateBlock(BBInfo &BBI, + MachineBasicBlock::iterator E, + SmallVectorImpl &Cond) { + for (MachineBasicBlock::iterator I = BBI.BB->begin(); I != E; ++I) { + if (TII->isPredicated(I)) + continue; + if (!TII->PredicateInstruction(I, Cond)) { + cerr << "Unable to predicate " << *I << "!\n"; + abort(); + } + } + + std::copy(Cond.begin(), Cond.end(), std::back_inserter(BBI.Predicate)); + + BBI.IsAnalyzed = false; + BBI.NonPredSize = 0; + + NumIfConvBBs++; +} + +/// CopyAndPredicateBlock - Copy and predicate instructions from source BB to +/// the destination block. Skip end of block branches if IgnoreBr is true. +void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, + SmallVectorImpl &Cond, + bool IgnoreBr) { + MachineFunction &MF = *ToBBI.BB->getParent(); + + for (MachineBasicBlock::iterator I = FromBBI.BB->begin(), + E = FromBBI.BB->end(); I != E; ++I) { + const TargetInstrDesc &TID = I->getDesc(); + bool isPredicated = TII->isPredicated(I); + // Do not copy the end of the block branches. + if (IgnoreBr && !isPredicated && TID.isBranch()) + break; + + MachineInstr *MI = MF.CloneMachineInstr(I); + ToBBI.BB->insert(ToBBI.BB->end(), MI); + ToBBI.NonPredSize++; + + if (!isPredicated) + if (!TII->PredicateInstruction(MI, Cond)) { + cerr << "Unable to predicate " << *MI << "!\n"; + abort(); + } + } + + std::vector Succs(FromBBI.BB->succ_begin(), + FromBBI.BB->succ_end()); + MachineBasicBlock *NBB = getNextBlock(FromBBI.BB); + MachineBasicBlock *FallThrough = FromBBI.HasFallThrough ? NBB : NULL; + + for (unsigned i = 0, e = Succs.size(); i != e; ++i) { + MachineBasicBlock *Succ = Succs[i]; + // Fallthrough edge can't be transferred. + if (Succ == FallThrough) + continue; + ToBBI.BB->addSuccessor(Succ); + } + + std::copy(FromBBI.Predicate.begin(), FromBBI.Predicate.end(), + std::back_inserter(ToBBI.Predicate)); + std::copy(Cond.begin(), Cond.end(), std::back_inserter(ToBBI.Predicate)); + + ToBBI.ClobbersPred |= FromBBI.ClobbersPred; + ToBBI.IsAnalyzed = false; + + NumDupBBs++; +} + +/// MergeBlocks - Move all instructions from FromBB to the end of ToBB. +/// +void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI) { + ToBBI.BB->splice(ToBBI.BB->end(), + FromBBI.BB, FromBBI.BB->begin(), FromBBI.BB->end()); + + // Redirect all branches to FromBB to ToBB. + std::vector Preds(FromBBI.BB->pred_begin(), + FromBBI.BB->pred_end()); + for (unsigned i = 0, e = Preds.size(); i != e; ++i) { + MachineBasicBlock *Pred = Preds[i]; + if (Pred == ToBBI.BB) + continue; + Pred->ReplaceUsesOfBlockWith(FromBBI.BB, ToBBI.BB); + } + + std::vector Succs(FromBBI.BB->succ_begin(), + FromBBI.BB->succ_end()); + MachineBasicBlock *NBB = getNextBlock(FromBBI.BB); + MachineBasicBlock *FallThrough = FromBBI.HasFallThrough ? NBB : NULL; + + for (unsigned i = 0, e = Succs.size(); i != e; ++i) { + MachineBasicBlock *Succ = Succs[i]; + // Fallthrough edge can't be transferred. + if (Succ == FallThrough) + continue; + FromBBI.BB->removeSuccessor(Succ); + ToBBI.BB->addSuccessor(Succ); + } + + // Now FromBBI always falls through to the next block! + if (NBB && !FromBBI.BB->isSuccessor(NBB)) + FromBBI.BB->addSuccessor(NBB); + + std::copy(FromBBI.Predicate.begin(), FromBBI.Predicate.end(), + std::back_inserter(ToBBI.Predicate)); + FromBBI.Predicate.clear(); + + ToBBI.NonPredSize += FromBBI.NonPredSize; + FromBBI.NonPredSize = 0; + + ToBBI.ClobbersPred |= FromBBI.ClobbersPred; + ToBBI.HasFallThrough = FromBBI.HasFallThrough; + ToBBI.IsAnalyzed = false; + FromBBI.IsAnalyzed = false; +} diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp new file mode 100644 index 000000000000..e6912b82c38f --- /dev/null +++ b/lib/CodeGen/IntrinsicLowering.cpp @@ -0,0 +1,892 @@ +//===-- IntrinsicLowering.cpp - Intrinsic Lowering default implementation -===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the IntrinsicLowering class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Module.h" +#include "llvm/Type.h" +#include "llvm/CodeGen/IntrinsicLowering.h" +#include "llvm/Support/IRBuilder.h" +#include "llvm/Target/TargetData.h" +#include "llvm/ADT/SmallVector.h" +using namespace llvm; + +template +static void EnsureFunctionExists(Module &M, const char *Name, + ArgIt ArgBegin, ArgIt ArgEnd, + const Type *RetTy) { + // Insert a correctly-typed definition now. + std::vector ParamTys; + for (ArgIt I = ArgBegin; I != ArgEnd; ++I) + ParamTys.push_back(I->getType()); + M.getOrInsertFunction(Name, FunctionType::get(RetTy, ParamTys, false)); +} + +static void EnsureFPIntrinsicsExist(Module &M, Function *Fn, + const char *FName, + const char *DName, const char *LDName) { + // Insert definitions for all the floating point types. + switch((int)Fn->arg_begin()->getType()->getTypeID()) { + case Type::FloatTyID: + EnsureFunctionExists(M, FName, Fn->arg_begin(), Fn->arg_end(), + Type::FloatTy); + break; + case Type::DoubleTyID: + EnsureFunctionExists(M, DName, Fn->arg_begin(), Fn->arg_end(), + Type::DoubleTy); + break; + case Type::X86_FP80TyID: + case Type::FP128TyID: + case Type::PPC_FP128TyID: + EnsureFunctionExists(M, LDName, Fn->arg_begin(), Fn->arg_end(), + Fn->arg_begin()->getType()); + break; + } +} + +/// ReplaceCallWith - This function is used when we want to lower an intrinsic +/// call to a call of an external function. This handles hard cases such as +/// when there was already a prototype for the external function, and if that +/// prototype doesn't match the arguments we expect to pass in. +template +static CallInst *ReplaceCallWith(const char *NewFn, CallInst *CI, + ArgIt ArgBegin, ArgIt ArgEnd, + const Type *RetTy, Constant *&FCache) { + if (!FCache) { + // If we haven't already looked up this function, check to see if the + // program already contains a function with this name. + Module *M = CI->getParent()->getParent()->getParent(); + // Get or insert the definition now. + std::vector ParamTys; + for (ArgIt I = ArgBegin; I != ArgEnd; ++I) + ParamTys.push_back((*I)->getType()); + FCache = M->getOrInsertFunction(NewFn, + FunctionType::get(RetTy, ParamTys, false)); + } + + IRBuilder<> Builder(CI->getParent(), CI); + SmallVector Args(ArgBegin, ArgEnd); + CallInst *NewCI = Builder.CreateCall(FCache, Args.begin(), Args.end()); + NewCI->setName(CI->getName()); + if (!CI->use_empty()) + CI->replaceAllUsesWith(NewCI); + return NewCI; +} + +void IntrinsicLowering::AddPrototypes(Module &M) { + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) + if (I->isDeclaration() && !I->use_empty()) + switch (I->getIntrinsicID()) { + default: break; + case Intrinsic::setjmp: + EnsureFunctionExists(M, "setjmp", I->arg_begin(), I->arg_end(), + Type::Int32Ty); + break; + case Intrinsic::longjmp: + EnsureFunctionExists(M, "longjmp", I->arg_begin(), I->arg_end(), + Type::VoidTy); + break; + case Intrinsic::siglongjmp: + EnsureFunctionExists(M, "abort", I->arg_end(), I->arg_end(), + Type::VoidTy); + break; + case Intrinsic::memcpy: + M.getOrInsertFunction("memcpy", PointerType::getUnqual(Type::Int8Ty), + PointerType::getUnqual(Type::Int8Ty), + PointerType::getUnqual(Type::Int8Ty), + TD.getIntPtrType(), (Type *)0); + break; + case Intrinsic::memmove: + M.getOrInsertFunction("memmove", PointerType::getUnqual(Type::Int8Ty), + PointerType::getUnqual(Type::Int8Ty), + PointerType::getUnqual(Type::Int8Ty), + TD.getIntPtrType(), (Type *)0); + break; + case Intrinsic::memset: + M.getOrInsertFunction("memset", PointerType::getUnqual(Type::Int8Ty), + PointerType::getUnqual(Type::Int8Ty), + Type::Int32Ty, + TD.getIntPtrType(), (Type *)0); + break; + case Intrinsic::sqrt: + EnsureFPIntrinsicsExist(M, I, "sqrtf", "sqrt", "sqrtl"); + break; + case Intrinsic::sin: + EnsureFPIntrinsicsExist(M, I, "sinf", "sin", "sinl"); + break; + case Intrinsic::cos: + EnsureFPIntrinsicsExist(M, I, "cosf", "cos", "cosl"); + break; + case Intrinsic::pow: + EnsureFPIntrinsicsExist(M, I, "powf", "pow", "powl"); + break; + case Intrinsic::log: + EnsureFPIntrinsicsExist(M, I, "logf", "log", "logl"); + break; + case Intrinsic::log2: + EnsureFPIntrinsicsExist(M, I, "log2f", "log2", "log2l"); + break; + case Intrinsic::log10: + EnsureFPIntrinsicsExist(M, I, "log10f", "log10", "log10l"); + break; + case Intrinsic::exp: + EnsureFPIntrinsicsExist(M, I, "expf", "exp", "expl"); + break; + case Intrinsic::exp2: + EnsureFPIntrinsicsExist(M, I, "exp2f", "exp2", "exp2l"); + break; + } +} + +/// LowerBSWAP - Emit the code to lower bswap of V before the specified +/// instruction IP. +static Value *LowerBSWAP(Value *V, Instruction *IP) { + assert(V->getType()->isInteger() && "Can't bswap a non-integer type!"); + + unsigned BitSize = V->getType()->getPrimitiveSizeInBits(); + + IRBuilder<> Builder(IP->getParent(), IP); + + switch(BitSize) { + default: assert(0 && "Unhandled type size of value to byteswap!"); + case 16: { + Value *Tmp1 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 8), + "bswap.2"); + Value *Tmp2 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 8), + "bswap.1"); + V = Builder.CreateOr(Tmp1, Tmp2, "bswap.i16"); + break; + } + case 32: { + Value *Tmp4 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 24), + "bswap.4"); + Value *Tmp3 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 8), + "bswap.3"); + Value *Tmp2 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 8), + "bswap.2"); + Value *Tmp1 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 24), + "bswap.1"); + Tmp3 = Builder.CreateAnd(Tmp3, ConstantInt::get(Type::Int32Ty, 0xFF0000), + "bswap.and3"); + Tmp2 = Builder.CreateAnd(Tmp2, ConstantInt::get(Type::Int32Ty, 0xFF00), + "bswap.and2"); + Tmp4 = Builder.CreateOr(Tmp4, Tmp3, "bswap.or1"); + Tmp2 = Builder.CreateOr(Tmp2, Tmp1, "bswap.or2"); + V = Builder.CreateOr(Tmp4, Tmp2, "bswap.i32"); + break; + } + case 64: { + Value *Tmp8 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 56), + "bswap.8"); + Value *Tmp7 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 40), + "bswap.7"); + Value *Tmp6 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 24), + "bswap.6"); + Value *Tmp5 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 8), + "bswap.5"); + Value* Tmp4 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 8), + "bswap.4"); + Value* Tmp3 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 24), + "bswap.3"); + Value* Tmp2 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 40), + "bswap.2"); + Value* Tmp1 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 56), + "bswap.1"); + Tmp7 = Builder.CreateAnd(Tmp7, + ConstantInt::get(Type::Int64Ty, + 0xFF000000000000ULL), + "bswap.and7"); + Tmp6 = Builder.CreateAnd(Tmp6, + ConstantInt::get(Type::Int64Ty, + 0xFF0000000000ULL), + "bswap.and6"); + Tmp5 = Builder.CreateAnd(Tmp5, + ConstantInt::get(Type::Int64Ty, 0xFF00000000ULL), + "bswap.and5"); + Tmp4 = Builder.CreateAnd(Tmp4, + ConstantInt::get(Type::Int64Ty, 0xFF000000ULL), + "bswap.and4"); + Tmp3 = Builder.CreateAnd(Tmp3, + ConstantInt::get(Type::Int64Ty, 0xFF0000ULL), + "bswap.and3"); + Tmp2 = Builder.CreateAnd(Tmp2, + ConstantInt::get(Type::Int64Ty, 0xFF00ULL), + "bswap.and2"); + Tmp8 = Builder.CreateOr(Tmp8, Tmp7, "bswap.or1"); + Tmp6 = Builder.CreateOr(Tmp6, Tmp5, "bswap.or2"); + Tmp4 = Builder.CreateOr(Tmp4, Tmp3, "bswap.or3"); + Tmp2 = Builder.CreateOr(Tmp2, Tmp1, "bswap.or4"); + Tmp8 = Builder.CreateOr(Tmp8, Tmp6, "bswap.or5"); + Tmp4 = Builder.CreateOr(Tmp4, Tmp2, "bswap.or6"); + V = Builder.CreateOr(Tmp8, Tmp4, "bswap.i64"); + break; + } + } + return V; +} + +/// LowerCTPOP - Emit the code to lower ctpop of V before the specified +/// instruction IP. +static Value *LowerCTPOP(Value *V, Instruction *IP) { + assert(V->getType()->isInteger() && "Can't ctpop a non-integer type!"); + + static const uint64_t MaskValues[6] = { + 0x5555555555555555ULL, 0x3333333333333333ULL, + 0x0F0F0F0F0F0F0F0FULL, 0x00FF00FF00FF00FFULL, + 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL + }; + + IRBuilder<> Builder(IP->getParent(), IP); + + unsigned BitSize = V->getType()->getPrimitiveSizeInBits(); + unsigned WordSize = (BitSize + 63) / 64; + Value *Count = ConstantInt::get(V->getType(), 0); + + for (unsigned n = 0; n < WordSize; ++n) { + Value *PartValue = V; + for (unsigned i = 1, ct = 0; i < (BitSize>64 ? 64 : BitSize); + i <<= 1, ++ct) { + Value *MaskCst = ConstantInt::get(V->getType(), MaskValues[ct]); + Value *LHS = Builder.CreateAnd(PartValue, MaskCst, "cppop.and1"); + Value *VShift = Builder.CreateLShr(PartValue, + ConstantInt::get(V->getType(), i), + "ctpop.sh"); + Value *RHS = Builder.CreateAnd(VShift, MaskCst, "cppop.and2"); + PartValue = Builder.CreateAdd(LHS, RHS, "ctpop.step"); + } + Count = Builder.CreateAdd(PartValue, Count, "ctpop.part"); + if (BitSize > 64) { + V = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 64), + "ctpop.part.sh"); + BitSize -= 64; + } + } + + return Count; +} + +/// LowerCTLZ - Emit the code to lower ctlz of V before the specified +/// instruction IP. +static Value *LowerCTLZ(Value *V, Instruction *IP) { + + IRBuilder<> Builder(IP->getParent(), IP); + + unsigned BitSize = V->getType()->getPrimitiveSizeInBits(); + for (unsigned i = 1; i < BitSize; i <<= 1) { + Value *ShVal = ConstantInt::get(V->getType(), i); + ShVal = Builder.CreateLShr(V, ShVal, "ctlz.sh"); + V = Builder.CreateOr(V, ShVal, "ctlz.step"); + } + + V = Builder.CreateNot(V); + return LowerCTPOP(V, IP); +} + +/// Convert the llvm.part.select.iX.iY intrinsic. This intrinsic takes +/// three integer arguments. The first argument is the Value from which the +/// bits will be selected. It may be of any bit width. The second and third +/// arguments specify a range of bits to select with the second argument +/// specifying the low bit and the third argument specifying the high bit. Both +/// must be type i32. The result is the corresponding selected bits from the +/// Value in the same width as the Value (first argument). If the low bit index +/// is higher than the high bit index then the inverse selection is done and +/// the bits are returned in inverse order. +/// @brief Lowering of llvm.part.select intrinsic. +static Instruction *LowerPartSelect(CallInst *CI) { + IRBuilder<> Builder; + + // Make sure we're dealing with a part select intrinsic here + Function *F = CI->getCalledFunction(); + const FunctionType *FT = F->getFunctionType(); + if (!F->isDeclaration() || !FT->getReturnType()->isInteger() || + FT->getNumParams() != 3 || !FT->getParamType(0)->isInteger() || + !FT->getParamType(1)->isInteger() || !FT->getParamType(2)->isInteger()) + return CI; + + // Get the intrinsic implementation function by converting all the . to _ + // in the intrinsic's function name and then reconstructing the function + // declaration. + std::string Name(F->getName()); + for (unsigned i = 4; i < Name.length(); ++i) + if (Name[i] == '.') + Name[i] = '_'; + Module* M = F->getParent(); + F = cast(M->getOrInsertFunction(Name, FT)); + F->setLinkage(GlobalValue::WeakAnyLinkage); + + // If we haven't defined the impl function yet, do so now + if (F->isDeclaration()) { + + // Get the arguments to the function + Function::arg_iterator args = F->arg_begin(); + Value* Val = args++; Val->setName("Val"); + Value* Lo = args++; Lo->setName("Lo"); + Value* Hi = args++; Hi->setName("High"); + + // We want to select a range of bits here such that [Hi, Lo] is shifted + // down to the low bits. However, it is quite possible that Hi is smaller + // than Lo in which case the bits have to be reversed. + + // Create the blocks we will need for the two cases (forward, reverse) + BasicBlock* CurBB = BasicBlock::Create("entry", F); + BasicBlock *RevSize = BasicBlock::Create("revsize", CurBB->getParent()); + BasicBlock *FwdSize = BasicBlock::Create("fwdsize", CurBB->getParent()); + BasicBlock *Compute = BasicBlock::Create("compute", CurBB->getParent()); + BasicBlock *Reverse = BasicBlock::Create("reverse", CurBB->getParent()); + BasicBlock *RsltBlk = BasicBlock::Create("result", CurBB->getParent()); + + Builder.SetInsertPoint(CurBB); + + // Cast Hi and Lo to the size of Val so the widths are all the same + if (Hi->getType() != Val->getType()) + Hi = Builder.CreateIntCast(Hi, Val->getType(), /* isSigned */ false, + "tmp"); + if (Lo->getType() != Val->getType()) + Lo = Builder.CreateIntCast(Lo, Val->getType(), /* isSigned */ false, + "tmp"); + + // Compute a few things that both cases will need, up front. + Constant* Zero = ConstantInt::get(Val->getType(), 0); + Constant* One = ConstantInt::get(Val->getType(), 1); + Constant* AllOnes = ConstantInt::getAllOnesValue(Val->getType()); + + // Compare the Hi and Lo bit positions. This is used to determine + // which case we have (forward or reverse) + Value *Cmp = Builder.CreateICmpULT(Hi, Lo, "less"); + Builder.CreateCondBr(Cmp, RevSize, FwdSize); + + // First, compute the number of bits in the forward case. + Builder.SetInsertPoint(FwdSize); + Value* FBitSize = Builder.CreateSub(Hi, Lo, "fbits"); + Builder.CreateBr(Compute); + + // Second, compute the number of bits in the reverse case. + Builder.SetInsertPoint(RevSize); + Value* RBitSize = Builder.CreateSub(Lo, Hi, "rbits"); + Builder.CreateBr(Compute); + + // Now, compute the bit range. Start by getting the bitsize and the shift + // amount (either Hi or Lo) from PHI nodes. Then we compute a mask for + // the number of bits we want in the range. We shift the bits down to the + // least significant bits, apply the mask to zero out unwanted high bits, + // and we have computed the "forward" result. It may still need to be + // reversed. + Builder.SetInsertPoint(Compute); + + // Get the BitSize from one of the two subtractions + PHINode *BitSize = Builder.CreatePHI(Val->getType(), "bits"); + BitSize->reserveOperandSpace(2); + BitSize->addIncoming(FBitSize, FwdSize); + BitSize->addIncoming(RBitSize, RevSize); + + // Get the ShiftAmount as the smaller of Hi/Lo + PHINode *ShiftAmt = Builder.CreatePHI(Val->getType(), "shiftamt"); + ShiftAmt->reserveOperandSpace(2); + ShiftAmt->addIncoming(Lo, FwdSize); + ShiftAmt->addIncoming(Hi, RevSize); + + // Increment the bit size + Value *BitSizePlusOne = Builder.CreateAdd(BitSize, One, "bits"); + + // Create a Mask to zero out the high order bits. + Value* Mask = Builder.CreateShl(AllOnes, BitSizePlusOne, "mask"); + Mask = Builder.CreateNot(Mask, "mask"); + + // Shift the bits down and apply the mask + Value* FRes = Builder.CreateLShr(Val, ShiftAmt, "fres"); + FRes = Builder.CreateAnd(FRes, Mask, "fres"); + Builder.CreateCondBr(Cmp, Reverse, RsltBlk); + + // In the Reverse block we have the mask already in FRes but we must reverse + // it by shifting FRes bits right and putting them in RRes by shifting them + // in from left. + Builder.SetInsertPoint(Reverse); + + // First set up our loop counters + PHINode *Count = Builder.CreatePHI(Val->getType(), "count"); + Count->reserveOperandSpace(2); + Count->addIncoming(BitSizePlusOne, Compute); + + // Next, get the value that we are shifting. + PHINode *BitsToShift = Builder.CreatePHI(Val->getType(), "val"); + BitsToShift->reserveOperandSpace(2); + BitsToShift->addIncoming(FRes, Compute); + + // Finally, get the result of the last computation + PHINode *RRes = Builder.CreatePHI(Val->getType(), "rres"); + RRes->reserveOperandSpace(2); + RRes->addIncoming(Zero, Compute); + + // Decrement the counter + Value *Decr = Builder.CreateSub(Count, One, "decr"); + Count->addIncoming(Decr, Reverse); + + // Compute the Bit that we want to move + Value *Bit = Builder.CreateAnd(BitsToShift, One, "bit"); + + // Compute the new value for next iteration. + Value *NewVal = Builder.CreateLShr(BitsToShift, One, "rshift"); + BitsToShift->addIncoming(NewVal, Reverse); + + // Shift the bit into the low bits of the result. + Value *NewRes = Builder.CreateShl(RRes, One, "lshift"); + NewRes = Builder.CreateOr(NewRes, Bit, "addbit"); + RRes->addIncoming(NewRes, Reverse); + + // Terminate loop if we've moved all the bits. + Value *Cond = Builder.CreateICmpEQ(Decr, Zero, "cond"); + Builder.CreateCondBr(Cond, RsltBlk, Reverse); + + // Finally, in the result block, select one of the two results with a PHI + // node and return the result; + Builder.SetInsertPoint(RsltBlk); + PHINode *BitSelect = Builder.CreatePHI(Val->getType(), "part_select"); + BitSelect->reserveOperandSpace(2); + BitSelect->addIncoming(FRes, Compute); + BitSelect->addIncoming(NewRes, Reverse); + Builder.CreateRet(BitSelect); + } + + // Return a call to the implementation function + Builder.SetInsertPoint(CI->getParent(), CI); + CallInst *NewCI = Builder.CreateCall3(F, CI->getOperand(1), + CI->getOperand(2), CI->getOperand(3)); + NewCI->setName(CI->getName()); + return NewCI; +} + +/// Convert the llvm.part.set.iX.iY.iZ intrinsic. This intrinsic takes +/// four integer arguments (iAny %Value, iAny %Replacement, i32 %Low, i32 %High) +/// The first two arguments can be any bit width. The result is the same width +/// as %Value. The operation replaces bits between %Low and %High with the value +/// in %Replacement. If %Replacement is not the same width, it is truncated or +/// zero extended as appropriate to fit the bits being replaced. If %Low is +/// greater than %High then the inverse set of bits are replaced. +/// @brief Lowering of llvm.bit.part.set intrinsic. +static Instruction *LowerPartSet(CallInst *CI) { + IRBuilder<> Builder; + + // Make sure we're dealing with a part select intrinsic here + Function *F = CI->getCalledFunction(); + const FunctionType *FT = F->getFunctionType(); + if (!F->isDeclaration() || !FT->getReturnType()->isInteger() || + FT->getNumParams() != 4 || !FT->getParamType(0)->isInteger() || + !FT->getParamType(1)->isInteger() || !FT->getParamType(2)->isInteger() || + !FT->getParamType(3)->isInteger()) + return CI; + + // Get the intrinsic implementation function by converting all the . to _ + // in the intrinsic's function name and then reconstructing the function + // declaration. + std::string Name(F->getName()); + for (unsigned i = 4; i < Name.length(); ++i) + if (Name[i] == '.') + Name[i] = '_'; + Module* M = F->getParent(); + F = cast(M->getOrInsertFunction(Name, FT)); + F->setLinkage(GlobalValue::WeakAnyLinkage); + + // If we haven't defined the impl function yet, do so now + if (F->isDeclaration()) { + // Get the arguments for the function. + Function::arg_iterator args = F->arg_begin(); + Value* Val = args++; Val->setName("Val"); + Value* Rep = args++; Rep->setName("Rep"); + Value* Lo = args++; Lo->setName("Lo"); + Value* Hi = args++; Hi->setName("Hi"); + + // Get some types we need + const IntegerType* ValTy = cast(Val->getType()); + const IntegerType* RepTy = cast(Rep->getType()); + uint32_t RepBits = RepTy->getBitWidth(); + + // Constant Definitions + ConstantInt* RepBitWidth = ConstantInt::get(Type::Int32Ty, RepBits); + ConstantInt* RepMask = ConstantInt::getAllOnesValue(RepTy); + ConstantInt* ValMask = ConstantInt::getAllOnesValue(ValTy); + ConstantInt* One = ConstantInt::get(Type::Int32Ty, 1); + ConstantInt* ValOne = ConstantInt::get(ValTy, 1); + ConstantInt* Zero = ConstantInt::get(Type::Int32Ty, 0); + ConstantInt* ValZero = ConstantInt::get(ValTy, 0); + + // Basic blocks we fill in below. + BasicBlock* entry = BasicBlock::Create("entry", F, 0); + BasicBlock* large = BasicBlock::Create("large", F, 0); + BasicBlock* small = BasicBlock::Create("small", F, 0); + BasicBlock* reverse = BasicBlock::Create("reverse", F, 0); + BasicBlock* result = BasicBlock::Create("result", F, 0); + + // BASIC BLOCK: entry + Builder.SetInsertPoint(entry); + // First, get the number of bits that we're placing as an i32 + Value* is_forward = Builder.CreateICmpULT(Lo, Hi); + Value* Hi_pn = Builder.CreateSelect(is_forward, Hi, Lo); + Value* Lo_pn = Builder.CreateSelect(is_forward, Lo, Hi); + Value* NumBits = Builder.CreateSub(Hi_pn, Lo_pn); + NumBits = Builder.CreateAdd(NumBits, One); + // Now, convert Lo and Hi to ValTy bit width + Lo = Builder.CreateIntCast(Lo_pn, ValTy, /* isSigned */ false); + // Determine if the replacement bits are larger than the number of bits we + // are replacing and deal with it. + Value* is_large = Builder.CreateICmpULT(NumBits, RepBitWidth); + Builder.CreateCondBr(is_large, large, small); + + // BASIC BLOCK: large + Builder.SetInsertPoint(large); + Value* MaskBits = Builder.CreateSub(RepBitWidth, NumBits); + MaskBits = Builder.CreateIntCast(MaskBits, RepMask->getType(), + /* isSigned */ false); + Value* Mask1 = Builder.CreateLShr(RepMask, MaskBits); + Value* Rep2 = Builder.CreateAnd(Mask1, Rep); + Builder.CreateBr(small); + + // BASIC BLOCK: small + Builder.SetInsertPoint(small); + PHINode* Rep3 = Builder.CreatePHI(RepTy); + Rep3->reserveOperandSpace(2); + Rep3->addIncoming(Rep2, large); + Rep3->addIncoming(Rep, entry); + Value* Rep4 = Builder.CreateIntCast(Rep3, ValTy, /* isSigned */ false); + Builder.CreateCondBr(is_forward, result, reverse); + + // BASIC BLOCK: reverse (reverses the bits of the replacement) + Builder.SetInsertPoint(reverse); + // Set up our loop counter as a PHI so we can decrement on each iteration. + // We will loop for the number of bits in the replacement value. + PHINode *Count = Builder.CreatePHI(Type::Int32Ty, "count"); + Count->reserveOperandSpace(2); + Count->addIncoming(NumBits, small); + + // Get the value that we are shifting bits out of as a PHI because + // we'll change this with each iteration. + PHINode *BitsToShift = Builder.CreatePHI(Val->getType(), "val"); + BitsToShift->reserveOperandSpace(2); + BitsToShift->addIncoming(Rep4, small); + + // Get the result of the last computation or zero on first iteration + PHINode *RRes = Builder.CreatePHI(Val->getType(), "rres"); + RRes->reserveOperandSpace(2); + RRes->addIncoming(ValZero, small); + + // Decrement the loop counter by one + Value *Decr = Builder.CreateSub(Count, One); + Count->addIncoming(Decr, reverse); + + // Get the bit that we want to move into the result + Value *Bit = Builder.CreateAnd(BitsToShift, ValOne); + + // Compute the new value of the bits to shift for the next iteration. + Value *NewVal = Builder.CreateLShr(BitsToShift, ValOne); + BitsToShift->addIncoming(NewVal, reverse); + + // Shift the bit we extracted into the low bit of the result. + Value *NewRes = Builder.CreateShl(RRes, ValOne); + NewRes = Builder.CreateOr(NewRes, Bit); + RRes->addIncoming(NewRes, reverse); + + // Terminate loop if we've moved all the bits. + Value *Cond = Builder.CreateICmpEQ(Decr, Zero); + Builder.CreateCondBr(Cond, result, reverse); + + // BASIC BLOCK: result + Builder.SetInsertPoint(result); + PHINode *Rplcmnt = Builder.CreatePHI(Val->getType()); + Rplcmnt->reserveOperandSpace(2); + Rplcmnt->addIncoming(NewRes, reverse); + Rplcmnt->addIncoming(Rep4, small); + Value* t0 = Builder.CreateIntCast(NumBits, ValTy, /* isSigned */ false); + Value* t1 = Builder.CreateShl(ValMask, Lo); + Value* t2 = Builder.CreateNot(t1); + Value* t3 = Builder.CreateShl(t1, t0); + Value* t4 = Builder.CreateOr(t2, t3); + Value* t5 = Builder.CreateAnd(t4, Val); + Value* t6 = Builder.CreateShl(Rplcmnt, Lo); + Value* Rslt = Builder.CreateOr(t5, t6, "part_set"); + Builder.CreateRet(Rslt); + } + + // Return a call to the implementation function + Builder.SetInsertPoint(CI->getParent(), CI); + CallInst *NewCI = Builder.CreateCall4(F, CI->getOperand(1), + CI->getOperand(2), CI->getOperand(3), + CI->getOperand(4)); + NewCI->setName(CI->getName()); + return NewCI; +} + +static void ReplaceFPIntrinsicWithCall(CallInst *CI, Constant *FCache, + Constant *DCache, Constant *LDCache, + const char *Fname, const char *Dname, + const char *LDname) { + switch (CI->getOperand(1)->getType()->getTypeID()) { + default: assert(0 && "Invalid type in intrinsic"); abort(); + case Type::FloatTyID: + ReplaceCallWith(Fname, CI, CI->op_begin() + 1, CI->op_end(), + Type::FloatTy, FCache); + break; + case Type::DoubleTyID: + ReplaceCallWith(Dname, CI, CI->op_begin() + 1, CI->op_end(), + Type::DoubleTy, DCache); + break; + case Type::X86_FP80TyID: + case Type::FP128TyID: + case Type::PPC_FP128TyID: + ReplaceCallWith(LDname, CI, CI->op_begin() + 1, CI->op_end(), + CI->getOperand(1)->getType(), LDCache); + break; + } +} + +void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { + IRBuilder<> Builder(CI->getParent(), CI); + + Function *Callee = CI->getCalledFunction(); + assert(Callee && "Cannot lower an indirect call!"); + + switch (Callee->getIntrinsicID()) { + case Intrinsic::not_intrinsic: + cerr << "Cannot lower a call to a non-intrinsic function '" + << Callee->getName() << "'!\n"; + abort(); + default: + cerr << "Error: Code generator does not support intrinsic function '" + << Callee->getName() << "'!\n"; + abort(); + + // The setjmp/longjmp intrinsics should only exist in the code if it was + // never optimized (ie, right out of the CFE), or if it has been hacked on + // by the lowerinvoke pass. In both cases, the right thing to do is to + // convert the call to an explicit setjmp or longjmp call. + case Intrinsic::setjmp: { + static Constant *SetjmpFCache = 0; + Value *V = ReplaceCallWith("setjmp", CI, CI->op_begin() + 1, CI->op_end(), + Type::Int32Ty, SetjmpFCache); + if (CI->getType() != Type::VoidTy) + CI->replaceAllUsesWith(V); + break; + } + case Intrinsic::sigsetjmp: + if (CI->getType() != Type::VoidTy) + CI->replaceAllUsesWith(Constant::getNullValue(CI->getType())); + break; + + case Intrinsic::longjmp: { + static Constant *LongjmpFCache = 0; + ReplaceCallWith("longjmp", CI, CI->op_begin() + 1, CI->op_end(), + Type::VoidTy, LongjmpFCache); + break; + } + + case Intrinsic::siglongjmp: { + // Insert the call to abort + static Constant *AbortFCache = 0; + ReplaceCallWith("abort", CI, CI->op_end(), CI->op_end(), + Type::VoidTy, AbortFCache); + break; + } + case Intrinsic::ctpop: + CI->replaceAllUsesWith(LowerCTPOP(CI->getOperand(1), CI)); + break; + + case Intrinsic::bswap: + CI->replaceAllUsesWith(LowerBSWAP(CI->getOperand(1), CI)); + break; + + case Intrinsic::ctlz: + CI->replaceAllUsesWith(LowerCTLZ(CI->getOperand(1), CI)); + break; + + case Intrinsic::cttz: { + // cttz(x) -> ctpop(~X & (X-1)) + Value *Src = CI->getOperand(1); + Value *NotSrc = Builder.CreateNot(Src); + NotSrc->setName(Src->getName() + ".not"); + Value *SrcM1 = ConstantInt::get(Src->getType(), 1); + SrcM1 = Builder.CreateSub(Src, SrcM1); + Src = LowerCTPOP(Builder.CreateAnd(NotSrc, SrcM1), CI); + CI->replaceAllUsesWith(Src); + break; + } + + case Intrinsic::part_select: + CI->replaceAllUsesWith(LowerPartSelect(CI)); + break; + + case Intrinsic::part_set: + CI->replaceAllUsesWith(LowerPartSet(CI)); + break; + + case Intrinsic::stacksave: + case Intrinsic::stackrestore: { + static bool Warned = false; + if (!Warned) + cerr << "WARNING: this target does not support the llvm.stack" + << (Callee->getIntrinsicID() == Intrinsic::stacksave ? + "save" : "restore") << " intrinsic.\n"; + Warned = true; + if (Callee->getIntrinsicID() == Intrinsic::stacksave) + CI->replaceAllUsesWith(Constant::getNullValue(CI->getType())); + break; + } + + case Intrinsic::returnaddress: + case Intrinsic::frameaddress: + cerr << "WARNING: this target does not support the llvm." + << (Callee->getIntrinsicID() == Intrinsic::returnaddress ? + "return" : "frame") << "address intrinsic.\n"; + CI->replaceAllUsesWith(ConstantPointerNull::get( + cast(CI->getType()))); + break; + + case Intrinsic::prefetch: + break; // Simply strip out prefetches on unsupported architectures + + case Intrinsic::pcmarker: + break; // Simply strip out pcmarker on unsupported architectures + case Intrinsic::readcyclecounter: { + cerr << "WARNING: this target does not support the llvm.readcyclecoun" + << "ter intrinsic. It is being lowered to a constant 0\n"; + CI->replaceAllUsesWith(ConstantInt::get(Type::Int64Ty, 0)); + break; + } + + case Intrinsic::dbg_stoppoint: + case Intrinsic::dbg_region_start: + case Intrinsic::dbg_region_end: + case Intrinsic::dbg_func_start: + case Intrinsic::dbg_declare: + break; // Simply strip out debugging intrinsics + + case Intrinsic::eh_exception: + case Intrinsic::eh_selector_i32: + case Intrinsic::eh_selector_i64: + CI->replaceAllUsesWith(Constant::getNullValue(CI->getType())); + break; + + case Intrinsic::eh_typeid_for_i32: + case Intrinsic::eh_typeid_for_i64: + // Return something different to eh_selector. + CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1)); + break; + + case Intrinsic::var_annotation: + break; // Strip out annotate intrinsic + + case Intrinsic::memcpy: { + static Constant *MemcpyFCache = 0; + const IntegerType *IntPtr = TD.getIntPtrType(); + Value *Size = Builder.CreateIntCast(CI->getOperand(3), IntPtr, + /* isSigned */ false); + Value *Ops[3]; + Ops[0] = CI->getOperand(1); + Ops[1] = CI->getOperand(2); + Ops[2] = Size; + ReplaceCallWith("memcpy", CI, Ops, Ops+3, CI->getOperand(1)->getType(), + MemcpyFCache); + break; + } + case Intrinsic::memmove: { + static Constant *MemmoveFCache = 0; + const IntegerType *IntPtr = TD.getIntPtrType(); + Value *Size = Builder.CreateIntCast(CI->getOperand(3), IntPtr, + /* isSigned */ false); + Value *Ops[3]; + Ops[0] = CI->getOperand(1); + Ops[1] = CI->getOperand(2); + Ops[2] = Size; + ReplaceCallWith("memmove", CI, Ops, Ops+3, CI->getOperand(1)->getType(), + MemmoveFCache); + break; + } + case Intrinsic::memset: { + static Constant *MemsetFCache = 0; + const IntegerType *IntPtr = TD.getIntPtrType(); + Value *Size = Builder.CreateIntCast(CI->getOperand(3), IntPtr, + /* isSigned */ false); + Value *Ops[3]; + Ops[0] = CI->getOperand(1); + // Extend the amount to i32. + Ops[1] = Builder.CreateIntCast(CI->getOperand(2), Type::Int32Ty, + /* isSigned */ false); + Ops[2] = Size; + ReplaceCallWith("memset", CI, Ops, Ops+3, CI->getOperand(1)->getType(), + MemsetFCache); + break; + } + case Intrinsic::sqrt: { + static Constant *sqrtFCache = 0; + static Constant *sqrtDCache = 0; + static Constant *sqrtLDCache = 0; + ReplaceFPIntrinsicWithCall(CI, sqrtFCache, sqrtDCache, sqrtLDCache, + "sqrtf", "sqrt", "sqrtl"); + break; + } + case Intrinsic::log: { + static Constant *logFCache = 0; + static Constant *logDCache = 0; + static Constant *logLDCache = 0; + ReplaceFPIntrinsicWithCall(CI, logFCache, logDCache, logLDCache, + "logf", "log", "logl"); + break; + } + case Intrinsic::log2: { + static Constant *log2FCache = 0; + static Constant *log2DCache = 0; + static Constant *log2LDCache = 0; + ReplaceFPIntrinsicWithCall(CI, log2FCache, log2DCache, log2LDCache, + "log2f", "log2", "log2l"); + break; + } + case Intrinsic::log10: { + static Constant *log10FCache = 0; + static Constant *log10DCache = 0; + static Constant *log10LDCache = 0; + ReplaceFPIntrinsicWithCall(CI, log10FCache, log10DCache, log10LDCache, + "log10f", "log10", "log10l"); + break; + } + case Intrinsic::exp: { + static Constant *expFCache = 0; + static Constant *expDCache = 0; + static Constant *expLDCache = 0; + ReplaceFPIntrinsicWithCall(CI, expFCache, expDCache, expLDCache, + "expf", "exp", "expl"); + break; + } + case Intrinsic::exp2: { + static Constant *exp2FCache = 0; + static Constant *exp2DCache = 0; + static Constant *exp2LDCache = 0; + ReplaceFPIntrinsicWithCall(CI, exp2FCache, exp2DCache, exp2LDCache, + "exp2f", "exp2", "exp2l"); + break; + } + case Intrinsic::pow: { + static Constant *powFCache = 0; + static Constant *powDCache = 0; + static Constant *powLDCache = 0; + ReplaceFPIntrinsicWithCall(CI, powFCache, powDCache, powLDCache, + "powf", "pow", "powl"); + break; + } + case Intrinsic::flt_rounds: + // Lower to "round to the nearest" + if (CI->getType() != Type::VoidTy) + CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1)); + break; + } + + assert(CI->use_empty() && + "Lowering should have eliminated any uses of the intrinsic call!"); + CI->eraseFromParent(); +} diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp new file mode 100644 index 000000000000..b3c60e63932f --- /dev/null +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -0,0 +1,289 @@ +//===-- LLVMTargetMachine.cpp - Implement the LLVMTargetMachine class -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the LLVMTargetMachine class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Target/TargetMachine.h" +#include "llvm/PassManager.h" +#include "llvm/Pass.h" +#include "llvm/Assembly/PrintModulePass.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/GCStrategy.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +namespace llvm { + bool EnableFastISel; +} + +static cl::opt PrintLSR("print-lsr-output", cl::Hidden, + cl::desc("Print LLVM IR produced by the loop-reduce pass")); +static cl::opt PrintISelInput("print-isel-input", cl::Hidden, + cl::desc("Print LLVM IR input to isel pass")); +static cl::opt PrintEmittedAsm("print-emitted-asm", cl::Hidden, + cl::desc("Dump emitter generated instructions as assembly")); +static cl::opt PrintGCInfo("print-gc", cl::Hidden, + cl::desc("Dump garbage collector data")); +static cl::opt VerifyMachineCode("verify-machineinstrs", cl::Hidden, + cl::desc("Verify generated machine code"), + cl::init(getenv("LLVM_VERIFY_MACHINEINSTRS")!=NULL)); + +// When this works it will be on by default. +static cl::opt +DisablePostRAScheduler("disable-post-RA-scheduler", + cl::desc("Disable scheduling after register allocation"), + cl::init(true)); + +// Enable or disable FastISel. Both options are needed, because +// FastISel is enabled by default with -fast, and we wish to be +// able to enable or disable fast-isel independently from -fast. +static cl::opt +EnableFastISelOption("fast-isel", cl::Hidden, + cl::desc("Enable the experimental \"fast\" instruction selector")); + +FileModel::Model +LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, + raw_ostream &Out, + CodeGenFileType FileType, + CodeGenOpt::Level OptLevel) { + // Add common CodeGen passes. + if (addCommonCodeGenPasses(PM, OptLevel)) + return FileModel::Error; + + // Fold redundant debug labels. + PM.add(createDebugLabelFoldingPass()); + + if (PrintMachineCode) + PM.add(createMachineFunctionPrinterPass(cerr)); + + if (addPreEmitPass(PM, OptLevel) && PrintMachineCode) + PM.add(createMachineFunctionPrinterPass(cerr)); + + if (OptLevel != CodeGenOpt::None) + PM.add(createCodePlacementOptPass()); + + switch (FileType) { + default: + break; + case TargetMachine::AssemblyFile: + if (addAssemblyEmitter(PM, OptLevel, getAsmVerbosityDefault(), Out)) + return FileModel::Error; + return FileModel::AsmFile; + case TargetMachine::ObjectFile: + if (getMachOWriterInfo()) + return FileModel::MachOFile; + else if (getELFWriterInfo()) + return FileModel::ElfFile; + } + + return FileModel::Error; +} + +/// addPassesToEmitFileFinish - If the passes to emit the specified file had to +/// be split up (e.g., to add an object writer pass), this method can be used to +/// finish up adding passes to emit the file, if necessary. +bool LLVMTargetMachine::addPassesToEmitFileFinish(PassManagerBase &PM, + MachineCodeEmitter *MCE, + CodeGenOpt::Level OptLevel) { + if (MCE) + addSimpleCodeEmitter(PM, OptLevel, PrintEmittedAsm, *MCE); + + PM.add(createGCInfoDeleter()); + + // Delete machine code for this function + PM.add(createMachineCodeDeleter()); + + return false; // success! +} + +/// addPassesToEmitFileFinish - If the passes to emit the specified file had to +/// be split up (e.g., to add an object writer pass), this method can be used to +/// finish up adding passes to emit the file, if necessary. +bool LLVMTargetMachine::addPassesToEmitFileFinish(PassManagerBase &PM, + JITCodeEmitter *JCE, + CodeGenOpt::Level OptLevel) { + if (JCE) + addSimpleCodeEmitter(PM, OptLevel, PrintEmittedAsm, *JCE); + + PM.add(createGCInfoDeleter()); + + // Delete machine code for this function + PM.add(createMachineCodeDeleter()); + + return false; // success! +} + +/// addPassesToEmitMachineCode - Add passes to the specified pass manager to +/// get machine code emitted. This uses a MachineCodeEmitter object to handle +/// actually outputting the machine code and resolving things like the address +/// of functions. This method should returns true if machine code emission is +/// not supported. +/// +bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM, + MachineCodeEmitter &MCE, + CodeGenOpt::Level OptLevel) { + // Add common CodeGen passes. + if (addCommonCodeGenPasses(PM, OptLevel)) + return true; + + if (addPreEmitPass(PM, OptLevel) && PrintMachineCode) + PM.add(createMachineFunctionPrinterPass(cerr)); + + addCodeEmitter(PM, OptLevel, PrintEmittedAsm, MCE); + + PM.add(createGCInfoDeleter()); + + // Delete machine code for this function + PM.add(createMachineCodeDeleter()); + + return false; // success! +} + +/// addPassesToEmitMachineCode - Add passes to the specified pass manager to +/// get machine code emitted. This uses a MachineCodeEmitter object to handle +/// actually outputting the machine code and resolving things like the address +/// of functions. This method should returns true if machine code emission is +/// not supported. +/// +bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM, + JITCodeEmitter &JCE, + CodeGenOpt::Level OptLevel) { + // Add common CodeGen passes. + if (addCommonCodeGenPasses(PM, OptLevel)) + return true; + + if (addPreEmitPass(PM, OptLevel) && PrintMachineCode) + PM.add(createMachineFunctionPrinterPass(cerr)); + + addCodeEmitter(PM, OptLevel, PrintEmittedAsm, JCE); + + PM.add(createGCInfoDeleter()); + + // Delete machine code for this function + PM.add(createMachineCodeDeleter()); + + return false; // success! +} + +static void printAndVerify(PassManagerBase &PM, + bool allowDoubleDefs = false) { + if (PrintMachineCode) + PM.add(createMachineFunctionPrinterPass(cerr)); + + if (VerifyMachineCode) + PM.add(createMachineVerifierPass(allowDoubleDefs)); +} + +/// addCommonCodeGenPasses - Add standard LLVM codegen passes used for both +/// emitting to assembly files or machine code output. +/// +bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, + CodeGenOpt::Level OptLevel) { + // Standard LLVM-Level Passes. + + // Run loop strength reduction before anything else. + if (OptLevel != CodeGenOpt::None) { + PM.add(createLoopStrengthReducePass(getTargetLowering())); + if (PrintLSR) + PM.add(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &errs())); + } + + // Turn exception handling constructs into something the code generators can + // handle. + if (!getTargetAsmInfo()->doesSupportExceptionHandling()) + PM.add(createLowerInvokePass(getTargetLowering())); + else + PM.add(createDwarfEHPass(getTargetLowering(), OptLevel==CodeGenOpt::None)); + + PM.add(createGCLoweringPass()); + + // Make sure that no unreachable blocks are instruction selected. + PM.add(createUnreachableBlockEliminationPass()); + + if (OptLevel != CodeGenOpt::None) + PM.add(createCodeGenPreparePass(getTargetLowering())); + + PM.add(createStackProtectorPass(getTargetLowering())); + + if (PrintISelInput) + PM.add(createPrintFunctionPass("\n\n" + "*** Final LLVM Code input to ISel ***\n", + &errs())); + + // Standard Lower-Level Passes. + + // Enable FastISel with -fast, but allow that to be overridden. + if (EnableFastISelOption == cl::BOU_TRUE || + (OptLevel == CodeGenOpt::None && EnableFastISelOption != cl::BOU_FALSE)) + EnableFastISel = true; + + // Ask the target for an isel. + if (addInstSelector(PM, OptLevel)) + return true; + + // Print the instruction selected machine code... + printAndVerify(PM, /* allowDoubleDefs= */ true); + + if (OptLevel != CodeGenOpt::None) { + PM.add(createMachineLICMPass()); + PM.add(createMachineSinkingPass()); + printAndVerify(PM, /* allowDoubleDefs= */ true); + } + + // Run pre-ra passes. + if (addPreRegAlloc(PM, OptLevel)) + printAndVerify(PM); + + // Perform register allocation. + PM.add(createRegisterAllocator()); + + // Perform stack slot coloring. + if (OptLevel != CodeGenOpt::None) + PM.add(createStackSlotColoringPass(OptLevel >= CodeGenOpt::Aggressive)); + + printAndVerify(PM); // Print the register-allocated code + + // Run post-ra passes. + if (addPostRegAlloc(PM, OptLevel)) + printAndVerify(PM); + + PM.add(createLowerSubregsPass()); + printAndVerify(PM); + + // Insert prolog/epilog code. Eliminate abstract frame index references... + PM.add(createPrologEpilogCodeInserter()); + printAndVerify(PM); + + // Second pass scheduler. + if (OptLevel != CodeGenOpt::None && !DisablePostRAScheduler) { + PM.add(createPostRAScheduler()); + printAndVerify(PM); + } + + // Branch folding must be run after regalloc and prolog/epilog insertion. + if (OptLevel != CodeGenOpt::None) { + PM.add(createBranchFoldingPass(getEnableTailMergeDefault())); + printAndVerify(PM); + } + + PM.add(createGCMachineCodeAnalysisPass()); + printAndVerify(PM); + + if (PrintGCInfo) + PM.add(createGCInfoPrinter(*cerr)); + + return false; +} diff --git a/lib/CodeGen/LatencyPriorityQueue.cpp b/lib/CodeGen/LatencyPriorityQueue.cpp new file mode 100644 index 000000000000..2e7b89c494f6 --- /dev/null +++ b/lib/CodeGen/LatencyPriorityQueue.cpp @@ -0,0 +1,114 @@ +//===---- LatencyPriorityQueue.cpp - A latency-oriented priority queue ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the LatencyPriorityQueue class, which is a +// SchedulingPriorityQueue that schedules using latency information to +// reduce the length of the critical path through the basic block. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "scheduler" +#include "llvm/CodeGen/LatencyPriorityQueue.h" +#include "llvm/Support/Debug.h" +using namespace llvm; + +bool latency_sort::operator()(const SUnit *LHS, const SUnit *RHS) const { + // The isScheduleHigh flag allows nodes with wraparound dependencies that + // cannot easily be modeled as edges with latencies to be scheduled as + // soon as possible in a top-down schedule. + if (LHS->isScheduleHigh && !RHS->isScheduleHigh) + return false; + if (!LHS->isScheduleHigh && RHS->isScheduleHigh) + return true; + + unsigned LHSNum = LHS->NodeNum; + unsigned RHSNum = RHS->NodeNum; + + // The most important heuristic is scheduling the critical path. + unsigned LHSLatency = PQ->getLatency(LHSNum); + unsigned RHSLatency = PQ->getLatency(RHSNum); + if (LHSLatency < RHSLatency) return true; + if (LHSLatency > RHSLatency) return false; + + // After that, if two nodes have identical latencies, look to see if one will + // unblock more other nodes than the other. + unsigned LHSBlocked = PQ->getNumSolelyBlockNodes(LHSNum); + unsigned RHSBlocked = PQ->getNumSolelyBlockNodes(RHSNum); + if (LHSBlocked < RHSBlocked) return true; + if (LHSBlocked > RHSBlocked) return false; + + // Finally, just to provide a stable ordering, use the node number as a + // deciding factor. + return LHSNum < RHSNum; +} + + +/// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor +/// of SU, return it, otherwise return null. +SUnit *LatencyPriorityQueue::getSingleUnscheduledPred(SUnit *SU) { + SUnit *OnlyAvailablePred = 0; + for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + SUnit &Pred = *I->getSUnit(); + if (!Pred.isScheduled) { + // We found an available, but not scheduled, predecessor. If it's the + // only one we have found, keep track of it... otherwise give up. + if (OnlyAvailablePred && OnlyAvailablePred != &Pred) + return 0; + OnlyAvailablePred = &Pred; + } + } + + return OnlyAvailablePred; +} + +void LatencyPriorityQueue::push_impl(SUnit *SU) { + // Look at all of the successors of this node. Count the number of nodes that + // this node is the sole unscheduled node for. + unsigned NumNodesBlocking = 0; + for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) + if (getSingleUnscheduledPred(I->getSUnit()) == SU) + ++NumNodesBlocking; + NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking; + + Queue.push(SU); +} + + +// ScheduledNode - As nodes are scheduled, we look to see if there are any +// successor nodes that have a single unscheduled predecessor. If so, that +// single predecessor has a higher priority, since scheduling it will make +// the node available. +void LatencyPriorityQueue::ScheduledNode(SUnit *SU) { + for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) + AdjustPriorityOfUnscheduledPreds(I->getSUnit()); +} + +/// AdjustPriorityOfUnscheduledPreds - One of the predecessors of SU was just +/// scheduled. If SU is not itself available, then there is at least one +/// predecessor node that has not been scheduled yet. If SU has exactly ONE +/// unscheduled predecessor, we want to increase its priority: it getting +/// scheduled will make this node available, so it is better than some other +/// node of the same priority that will not make a node available. +void LatencyPriorityQueue::AdjustPriorityOfUnscheduledPreds(SUnit *SU) { + if (SU->isAvailable) return; // All preds scheduled. + + SUnit *OnlyAvailablePred = getSingleUnscheduledPred(SU); + if (OnlyAvailablePred == 0 || !OnlyAvailablePred->isAvailable) return; + + // Okay, we found a single predecessor that is available, but not scheduled. + // Since it is available, it must be in the priority queue. First remove it. + remove(OnlyAvailablePred); + + // Reinsert the node into the priority queue, which recomputes its + // NumNodesSolelyBlocking value. + push(OnlyAvailablePred); +} diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp new file mode 100644 index 000000000000..67120b879886 --- /dev/null +++ b/lib/CodeGen/LiveInterval.cpp @@ -0,0 +1,853 @@ +//===-- LiveInterval.cpp - Live Interval Representation -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the LiveRange and LiveInterval classes. Given some +// numbering of each the machine instructions an interval [i, j) is said to be a +// live interval for register v if there is no instruction with number j' > j +// such that v is live at j' abd there is no instruction with number i' < i such +// that v is live at i'. In this implementation intervals can have holes, +// i.e. an interval might look like [1,20), [50,65), [1000,1001). Each +// individual range is represented as an instance of LiveRange, and the whole +// interval is represented as an instance of LiveInterval. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/LiveInterval.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Streams.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include +#include +using namespace llvm; + +// An example for liveAt(): +// +// this = [1,4), liveAt(0) will return false. The instruction defining this +// spans slots [0,3]. The interval belongs to an spilled definition of the +// variable it represents. This is because slot 1 is used (def slot) and spans +// up to slot 3 (store slot). +// +bool LiveInterval::liveAt(unsigned I) const { + Ranges::const_iterator r = std::upper_bound(ranges.begin(), ranges.end(), I); + + if (r == ranges.begin()) + return false; + + --r; + return r->contains(I); +} + +// liveBeforeAndAt - Check if the interval is live at the index and the index +// just before it. If index is liveAt, check if it starts a new live range. +// If it does, then check if the previous live range ends at index-1. +bool LiveInterval::liveBeforeAndAt(unsigned I) const { + Ranges::const_iterator r = std::upper_bound(ranges.begin(), ranges.end(), I); + + if (r == ranges.begin()) + return false; + + --r; + if (!r->contains(I)) + return false; + if (I != r->start) + return true; + // I is the start of a live range. Check if the previous live range ends + // at I-1. + if (r == ranges.begin()) + return false; + return r->end == I; +} + +// overlaps - Return true if the intersection of the two live intervals is +// not empty. +// +// An example for overlaps(): +// +// 0: A = ... +// 4: B = ... +// 8: C = A + B ;; last use of A +// +// The live intervals should look like: +// +// A = [3, 11) +// B = [7, x) +// C = [11, y) +// +// A->overlaps(C) should return false since we want to be able to join +// A and C. +// +bool LiveInterval::overlapsFrom(const LiveInterval& other, + const_iterator StartPos) const { + const_iterator i = begin(); + const_iterator ie = end(); + const_iterator j = StartPos; + const_iterator je = other.end(); + + assert((StartPos->start <= i->start || StartPos == other.begin()) && + StartPos != other.end() && "Bogus start position hint!"); + + if (i->start < j->start) { + i = std::upper_bound(i, ie, j->start); + if (i != ranges.begin()) --i; + } else if (j->start < i->start) { + ++StartPos; + if (StartPos != other.end() && StartPos->start <= i->start) { + assert(StartPos < other.end() && i < end()); + j = std::upper_bound(j, je, i->start); + if (j != other.ranges.begin()) --j; + } + } else { + return true; + } + + if (j == je) return false; + + while (i != ie) { + if (i->start > j->start) { + std::swap(i, j); + std::swap(ie, je); + } + + if (i->end > j->start) + return true; + ++i; + } + + return false; +} + +/// overlaps - Return true if the live interval overlaps a range specified +/// by [Start, End). +bool LiveInterval::overlaps(unsigned Start, unsigned End) const { + assert(Start < End && "Invalid range"); + const_iterator I = begin(); + const_iterator E = end(); + const_iterator si = std::upper_bound(I, E, Start); + const_iterator ei = std::upper_bound(I, E, End); + if (si != ei) + return true; + if (si == I) + return false; + --si; + return si->contains(Start); +} + +/// extendIntervalEndTo - This method is used when we want to extend the range +/// specified by I to end at the specified endpoint. To do this, we should +/// merge and eliminate all ranges that this will overlap with. The iterator is +/// not invalidated. +void LiveInterval::extendIntervalEndTo(Ranges::iterator I, unsigned NewEnd) { + assert(I != ranges.end() && "Not a valid interval!"); + VNInfo *ValNo = I->valno; + unsigned OldEnd = I->end; + + // Search for the first interval that we can't merge with. + Ranges::iterator MergeTo = next(I); + for (; MergeTo != ranges.end() && NewEnd >= MergeTo->end; ++MergeTo) { + assert(MergeTo->valno == ValNo && "Cannot merge with differing values!"); + } + + // If NewEnd was in the middle of an interval, make sure to get its endpoint. + I->end = std::max(NewEnd, prior(MergeTo)->end); + + // Erase any dead ranges. + ranges.erase(next(I), MergeTo); + + // Update kill info. + removeKills(ValNo, OldEnd, I->end-1); + + // If the newly formed range now touches the range after it and if they have + // the same value number, merge the two ranges into one range. + Ranges::iterator Next = next(I); + if (Next != ranges.end() && Next->start <= I->end && Next->valno == ValNo) { + I->end = Next->end; + ranges.erase(Next); + } +} + + +/// extendIntervalStartTo - This method is used when we want to extend the range +/// specified by I to start at the specified endpoint. To do this, we should +/// merge and eliminate all ranges that this will overlap with. +LiveInterval::Ranges::iterator +LiveInterval::extendIntervalStartTo(Ranges::iterator I, unsigned NewStart) { + assert(I != ranges.end() && "Not a valid interval!"); + VNInfo *ValNo = I->valno; + + // Search for the first interval that we can't merge with. + Ranges::iterator MergeTo = I; + do { + if (MergeTo == ranges.begin()) { + I->start = NewStart; + ranges.erase(MergeTo, I); + return I; + } + assert(MergeTo->valno == ValNo && "Cannot merge with differing values!"); + --MergeTo; + } while (NewStart <= MergeTo->start); + + // If we start in the middle of another interval, just delete a range and + // extend that interval. + if (MergeTo->end >= NewStart && MergeTo->valno == ValNo) { + MergeTo->end = I->end; + } else { + // Otherwise, extend the interval right after. + ++MergeTo; + MergeTo->start = NewStart; + MergeTo->end = I->end; + } + + ranges.erase(next(MergeTo), next(I)); + return MergeTo; +} + +LiveInterval::iterator +LiveInterval::addRangeFrom(LiveRange LR, iterator From) { + unsigned Start = LR.start, End = LR.end; + iterator it = std::upper_bound(From, ranges.end(), Start); + + // If the inserted interval starts in the middle or right at the end of + // another interval, just extend that interval to contain the range of LR. + if (it != ranges.begin()) { + iterator B = prior(it); + if (LR.valno == B->valno) { + if (B->start <= Start && B->end >= Start) { + extendIntervalEndTo(B, End); + return B; + } + } else { + // Check to make sure that we are not overlapping two live ranges with + // different valno's. + assert(B->end <= Start && + "Cannot overlap two LiveRanges with differing ValID's" + " (did you def the same reg twice in a MachineInstr?)"); + } + } + + // Otherwise, if this range ends in the middle of, or right next to, another + // interval, merge it into that interval. + if (it != ranges.end()) { + if (LR.valno == it->valno) { + if (it->start <= End) { + it = extendIntervalStartTo(it, Start); + + // If LR is a complete superset of an interval, we may need to grow its + // endpoint as well. + if (End > it->end) + extendIntervalEndTo(it, End); + else if (End < it->end) + // Overlapping intervals, there might have been a kill here. + removeKill(it->valno, End); + return it; + } + } else { + // Check to make sure that we are not overlapping two live ranges with + // different valno's. + assert(it->start >= End && + "Cannot overlap two LiveRanges with differing ValID's"); + } + } + + // Otherwise, this is just a new range that doesn't interact with anything. + // Insert it. + return ranges.insert(it, LR); +} + +/// isInOneLiveRange - Return true if the range specified is entirely in the +/// a single LiveRange of the live interval. +bool LiveInterval::isInOneLiveRange(unsigned Start, unsigned End) { + Ranges::iterator I = std::upper_bound(ranges.begin(), ranges.end(), Start); + if (I == ranges.begin()) + return false; + --I; + return I->contains(Start) && I->contains(End-1); +} + + +/// removeRange - Remove the specified range from this interval. Note that +/// the range must be in a single LiveRange in its entirety. +void LiveInterval::removeRange(unsigned Start, unsigned End, + bool RemoveDeadValNo) { + // Find the LiveRange containing this span. + Ranges::iterator I = std::upper_bound(ranges.begin(), ranges.end(), Start); + assert(I != ranges.begin() && "Range is not in interval!"); + --I; + assert(I->contains(Start) && I->contains(End-1) && + "Range is not entirely in interval!"); + + // If the span we are removing is at the start of the LiveRange, adjust it. + VNInfo *ValNo = I->valno; + if (I->start == Start) { + if (I->end == End) { + removeKills(I->valno, Start, End); + if (RemoveDeadValNo) { + // Check if val# is dead. + bool isDead = true; + for (const_iterator II = begin(), EE = end(); II != EE; ++II) + if (II != I && II->valno == ValNo) { + isDead = false; + break; + } + if (isDead) { + // Now that ValNo is dead, remove it. If it is the largest value + // number, just nuke it (and any other deleted values neighboring it), + // otherwise mark it as ~1U so it can be nuked later. + if (ValNo->id == getNumValNums()-1) { + do { + VNInfo *VNI = valnos.back(); + valnos.pop_back(); + VNI->~VNInfo(); + } while (!valnos.empty() && valnos.back()->def == ~1U); + } else { + ValNo->def = ~1U; + } + } + } + + ranges.erase(I); // Removed the whole LiveRange. + } else + I->start = End; + return; + } + + // Otherwise if the span we are removing is at the end of the LiveRange, + // adjust the other way. + if (I->end == End) { + removeKills(ValNo, Start, End); + I->end = Start; + return; + } + + // Otherwise, we are splitting the LiveRange into two pieces. + unsigned OldEnd = I->end; + I->end = Start; // Trim the old interval. + + // Insert the new one. + ranges.insert(next(I), LiveRange(End, OldEnd, ValNo)); +} + +/// removeValNo - Remove all the ranges defined by the specified value#. +/// Also remove the value# from value# list. +void LiveInterval::removeValNo(VNInfo *ValNo) { + if (empty()) return; + Ranges::iterator I = ranges.end(); + Ranges::iterator E = ranges.begin(); + do { + --I; + if (I->valno == ValNo) + ranges.erase(I); + } while (I != E); + // Now that ValNo is dead, remove it. If it is the largest value + // number, just nuke it (and any other deleted values neighboring it), + // otherwise mark it as ~1U so it can be nuked later. + if (ValNo->id == getNumValNums()-1) { + do { + VNInfo *VNI = valnos.back(); + valnos.pop_back(); + VNI->~VNInfo(); + } while (!valnos.empty() && valnos.back()->def == ~1U); + } else { + ValNo->def = ~1U; + } +} + +/// scaleNumbering - Renumber VNI and ranges to provide gaps for new +/// instructions. +void LiveInterval::scaleNumbering(unsigned factor) { + // Scale ranges. + for (iterator RI = begin(), RE = end(); RI != RE; ++RI) { + RI->start = InstrSlots::scale(RI->start, factor); + RI->end = InstrSlots::scale(RI->end, factor); + } + + // Scale VNI info. + for (vni_iterator VNI = vni_begin(), VNIE = vni_end(); VNI != VNIE; ++VNI) { + VNInfo *vni = *VNI; + if (vni->def != ~0U && vni->def != ~1U) { + vni->def = InstrSlots::scale(vni->def, factor); + } + + for (unsigned i = 0; i < vni->kills.size(); ++i) { + if (vni->kills[i] != 0) + vni->kills[i] = InstrSlots::scale(vni->kills[i], factor); + } + } +} + +/// getLiveRangeContaining - Return the live range that contains the +/// specified index, or null if there is none. +LiveInterval::const_iterator +LiveInterval::FindLiveRangeContaining(unsigned Idx) const { + const_iterator It = std::upper_bound(begin(), end(), Idx); + if (It != ranges.begin()) { + --It; + if (It->contains(Idx)) + return It; + } + + return end(); +} + +LiveInterval::iterator +LiveInterval::FindLiveRangeContaining(unsigned Idx) { + iterator It = std::upper_bound(begin(), end(), Idx); + if (It != begin()) { + --It; + if (It->contains(Idx)) + return It; + } + + return end(); +} + +/// findDefinedVNInfo - Find the VNInfo that's defined at the specified index +/// (register interval) or defined by the specified register (stack inteval). +VNInfo *LiveInterval::findDefinedVNInfo(unsigned DefIdxOrReg) const { + VNInfo *VNI = NULL; + for (LiveInterval::const_vni_iterator i = vni_begin(), e = vni_end(); + i != e; ++i) + if ((*i)->def == DefIdxOrReg) { + VNI = *i; + break; + } + return VNI; +} + + +/// join - Join two live intervals (this, and other) together. This applies +/// mappings to the value numbers in the LHS/RHS intervals as specified. If +/// the intervals are not joinable, this aborts. +void LiveInterval::join(LiveInterval &Other, const int *LHSValNoAssignments, + const int *RHSValNoAssignments, + SmallVector &NewVNInfo) { + // Determine if any of our live range values are mapped. This is uncommon, so + // we want to avoid the interval scan if not. + bool MustMapCurValNos = false; + unsigned NumVals = getNumValNums(); + unsigned NumNewVals = NewVNInfo.size(); + for (unsigned i = 0; i != NumVals; ++i) { + unsigned LHSValID = LHSValNoAssignments[i]; + if (i != LHSValID || + (NewVNInfo[LHSValID] && NewVNInfo[LHSValID] != getValNumInfo(i))) + MustMapCurValNos = true; + } + + // If we have to apply a mapping to our base interval assignment, rewrite it + // now. + if (MustMapCurValNos) { + // Map the first live range. + iterator OutIt = begin(); + OutIt->valno = NewVNInfo[LHSValNoAssignments[OutIt->valno->id]]; + ++OutIt; + for (iterator I = OutIt, E = end(); I != E; ++I) { + OutIt->valno = NewVNInfo[LHSValNoAssignments[I->valno->id]]; + + // If this live range has the same value # as its immediate predecessor, + // and if they are neighbors, remove one LiveRange. This happens when we + // have [0,3:0)[4,7:1) and map 0/1 onto the same value #. + if (OutIt->valno == (OutIt-1)->valno && (OutIt-1)->end == OutIt->start) { + (OutIt-1)->end = OutIt->end; + } else { + if (I != OutIt) { + OutIt->start = I->start; + OutIt->end = I->end; + } + + // Didn't merge, on to the next one. + ++OutIt; + } + } + + // If we merge some live ranges, chop off the end. + ranges.erase(OutIt, end()); + } + + // Remember assignements because val# ids are changing. + SmallVector OtherAssignments; + for (iterator I = Other.begin(), E = Other.end(); I != E; ++I) + OtherAssignments.push_back(RHSValNoAssignments[I->valno->id]); + + // Update val# info. Renumber them and make sure they all belong to this + // LiveInterval now. Also remove dead val#'s. + unsigned NumValNos = 0; + for (unsigned i = 0; i < NumNewVals; ++i) { + VNInfo *VNI = NewVNInfo[i]; + if (VNI) { + if (NumValNos >= NumVals) + valnos.push_back(VNI); + else + valnos[NumValNos] = VNI; + VNI->id = NumValNos++; // Renumber val#. + } + } + if (NumNewVals < NumVals) + valnos.resize(NumNewVals); // shrinkify + + // Okay, now insert the RHS live ranges into the LHS. + iterator InsertPos = begin(); + unsigned RangeNo = 0; + for (iterator I = Other.begin(), E = Other.end(); I != E; ++I, ++RangeNo) { + // Map the valno in the other live range to the current live range. + I->valno = NewVNInfo[OtherAssignments[RangeNo]]; + assert(I->valno && "Adding a dead range?"); + InsertPos = addRangeFrom(*I, InsertPos); + } + + weight += Other.weight; + if (Other.preference && !preference) + preference = Other.preference; +} + +/// MergeRangesInAsValue - Merge all of the intervals in RHS into this live +/// interval as the specified value number. The LiveRanges in RHS are +/// allowed to overlap with LiveRanges in the current interval, but only if +/// the overlapping LiveRanges have the specified value number. +void LiveInterval::MergeRangesInAsValue(const LiveInterval &RHS, + VNInfo *LHSValNo) { + // TODO: Make this more efficient. + iterator InsertPos = begin(); + for (const_iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) { + // Map the valno in the other live range to the current live range. + LiveRange Tmp = *I; + Tmp.valno = LHSValNo; + InsertPos = addRangeFrom(Tmp, InsertPos); + } +} + + +/// MergeValueInAsValue - Merge all of the live ranges of a specific val# +/// in RHS into this live interval as the specified value number. +/// The LiveRanges in RHS are allowed to overlap with LiveRanges in the +/// current interval, it will replace the value numbers of the overlaped +/// live ranges with the specified value number. +void LiveInterval::MergeValueInAsValue(const LiveInterval &RHS, + const VNInfo *RHSValNo, VNInfo *LHSValNo) { + SmallVector ReplacedValNos; + iterator IP = begin(); + for (const_iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) { + if (I->valno != RHSValNo) + continue; + unsigned Start = I->start, End = I->end; + IP = std::upper_bound(IP, end(), Start); + // If the start of this range overlaps with an existing liverange, trim it. + if (IP != begin() && IP[-1].end > Start) { + if (IP[-1].valno != LHSValNo) { + ReplacedValNos.push_back(IP[-1].valno); + IP[-1].valno = LHSValNo; // Update val#. + } + Start = IP[-1].end; + // Trimmed away the whole range? + if (Start >= End) continue; + } + // If the end of this range overlaps with an existing liverange, trim it. + if (IP != end() && End > IP->start) { + if (IP->valno != LHSValNo) { + ReplacedValNos.push_back(IP->valno); + IP->valno = LHSValNo; // Update val#. + } + End = IP->start; + // If this trimmed away the whole range, ignore it. + if (Start == End) continue; + } + + // Map the valno in the other live range to the current live range. + IP = addRangeFrom(LiveRange(Start, End, LHSValNo), IP); + } + + + SmallSet Seen; + for (unsigned i = 0, e = ReplacedValNos.size(); i != e; ++i) { + VNInfo *V1 = ReplacedValNos[i]; + if (Seen.insert(V1)) { + bool isDead = true; + for (const_iterator I = begin(), E = end(); I != E; ++I) + if (I->valno == V1) { + isDead = false; + break; + } + if (isDead) { + // Now that V1 is dead, remove it. If it is the largest value number, + // just nuke it (and any other deleted values neighboring it), otherwise + // mark it as ~1U so it can be nuked later. + if (V1->id == getNumValNums()-1) { + do { + VNInfo *VNI = valnos.back(); + valnos.pop_back(); + VNI->~VNInfo(); + } while (!valnos.empty() && valnos.back()->def == ~1U); + } else { + V1->def = ~1U; + } + } + } + } +} + + +/// MergeInClobberRanges - For any live ranges that are not defined in the +/// current interval, but are defined in the Clobbers interval, mark them +/// used with an unknown definition value. +void LiveInterval::MergeInClobberRanges(const LiveInterval &Clobbers, + BumpPtrAllocator &VNInfoAllocator) { + if (Clobbers.empty()) return; + + DenseMap ValNoMaps; + VNInfo *UnusedValNo = 0; + iterator IP = begin(); + for (const_iterator I = Clobbers.begin(), E = Clobbers.end(); I != E; ++I) { + // For every val# in the Clobbers interval, create a new "unknown" val#. + VNInfo *ClobberValNo = 0; + DenseMap::iterator VI = ValNoMaps.find(I->valno); + if (VI != ValNoMaps.end()) + ClobberValNo = VI->second; + else if (UnusedValNo) + ClobberValNo = UnusedValNo; + else { + UnusedValNo = ClobberValNo = getNextValue(~0U, 0, VNInfoAllocator); + ValNoMaps.insert(std::make_pair(I->valno, ClobberValNo)); + } + + bool Done = false; + unsigned Start = I->start, End = I->end; + // If a clobber range starts before an existing range and ends after + // it, the clobber range will need to be split into multiple ranges. + // Loop until the entire clobber range is handled. + while (!Done) { + Done = true; + IP = std::upper_bound(IP, end(), Start); + unsigned SubRangeStart = Start; + unsigned SubRangeEnd = End; + + // If the start of this range overlaps with an existing liverange, trim it. + if (IP != begin() && IP[-1].end > SubRangeStart) { + SubRangeStart = IP[-1].end; + // Trimmed away the whole range? + if (SubRangeStart >= SubRangeEnd) continue; + } + // If the end of this range overlaps with an existing liverange, trim it. + if (IP != end() && SubRangeEnd > IP->start) { + // If the clobber live range extends beyond the existing live range, + // it'll need at least another live range, so set the flag to keep + // iterating. + if (SubRangeEnd > IP->end) { + Start = IP->end; + Done = false; + } + SubRangeEnd = IP->start; + // If this trimmed away the whole range, ignore it. + if (SubRangeStart == SubRangeEnd) continue; + } + + // Insert the clobber interval. + IP = addRangeFrom(LiveRange(SubRangeStart, SubRangeEnd, ClobberValNo), + IP); + UnusedValNo = 0; + } + } + + if (UnusedValNo) { + // Delete the last unused val#. + valnos.pop_back(); + UnusedValNo->~VNInfo(); + } +} + +void LiveInterval::MergeInClobberRange(unsigned Start, unsigned End, + BumpPtrAllocator &VNInfoAllocator) { + // Find a value # to use for the clobber ranges. If there is already a value# + // for unknown values, use it. + VNInfo *ClobberValNo = getNextValue(~0U, 0, VNInfoAllocator); + + iterator IP = begin(); + IP = std::upper_bound(IP, end(), Start); + + // If the start of this range overlaps with an existing liverange, trim it. + if (IP != begin() && IP[-1].end > Start) { + Start = IP[-1].end; + // Trimmed away the whole range? + if (Start >= End) return; + } + // If the end of this range overlaps with an existing liverange, trim it. + if (IP != end() && End > IP->start) { + End = IP->start; + // If this trimmed away the whole range, ignore it. + if (Start == End) return; + } + + // Insert the clobber interval. + addRangeFrom(LiveRange(Start, End, ClobberValNo), IP); +} + +/// MergeValueNumberInto - This method is called when two value nubmers +/// are found to be equivalent. This eliminates V1, replacing all +/// LiveRanges with the V1 value number with the V2 value number. This can +/// cause merging of V1/V2 values numbers and compaction of the value space. +VNInfo* LiveInterval::MergeValueNumberInto(VNInfo *V1, VNInfo *V2) { + assert(V1 != V2 && "Identical value#'s are always equivalent!"); + + // This code actually merges the (numerically) larger value number into the + // smaller value number, which is likely to allow us to compactify the value + // space. The only thing we have to be careful of is to preserve the + // instruction that defines the result value. + + // Make sure V2 is smaller than V1. + if (V1->id < V2->id) { + copyValNumInfo(V1, V2); + std::swap(V1, V2); + } + + // Merge V1 live ranges into V2. + for (iterator I = begin(); I != end(); ) { + iterator LR = I++; + if (LR->valno != V1) continue; // Not a V1 LiveRange. + + // Okay, we found a V1 live range. If it had a previous, touching, V2 live + // range, extend it. + if (LR != begin()) { + iterator Prev = LR-1; + if (Prev->valno == V2 && Prev->end == LR->start) { + Prev->end = LR->end; + + // Erase this live-range. + ranges.erase(LR); + I = Prev+1; + LR = Prev; + } + } + + // Okay, now we have a V1 or V2 live range that is maximally merged forward. + // Ensure that it is a V2 live-range. + LR->valno = V2; + + // If we can merge it into later V2 live ranges, do so now. We ignore any + // following V1 live ranges, as they will be merged in subsequent iterations + // of the loop. + if (I != end()) { + if (I->start == LR->end && I->valno == V2) { + LR->end = I->end; + ranges.erase(I); + I = LR+1; + } + } + } + + // Now that V1 is dead, remove it. If it is the largest value number, just + // nuke it (and any other deleted values neighboring it), otherwise mark it as + // ~1U so it can be nuked later. + if (V1->id == getNumValNums()-1) { + do { + VNInfo *VNI = valnos.back(); + valnos.pop_back(); + VNI->~VNInfo(); + } while (valnos.back()->def == ~1U); + } else { + V1->def = ~1U; + } + + return V2; +} + +void LiveInterval::Copy(const LiveInterval &RHS, + BumpPtrAllocator &VNInfoAllocator) { + ranges.clear(); + valnos.clear(); + preference = RHS.preference; + weight = RHS.weight; + for (unsigned i = 0, e = RHS.getNumValNums(); i != e; ++i) { + const VNInfo *VNI = RHS.getValNumInfo(i); + VNInfo *NewVNI = getNextValue(~0U, 0, VNInfoAllocator); + copyValNumInfo(NewVNI, VNI); + } + for (unsigned i = 0, e = RHS.ranges.size(); i != e; ++i) { + const LiveRange &LR = RHS.ranges[i]; + addRange(LiveRange(LR.start, LR.end, getValNumInfo(LR.valno->id))); + } +} + +unsigned LiveInterval::getSize() const { + unsigned Sum = 0; + for (const_iterator I = begin(), E = end(); I != E; ++I) + Sum += I->end - I->start; + return Sum; +} + +std::ostream& llvm::operator<<(std::ostream& os, const LiveRange &LR) { + return os << '[' << LR.start << ',' << LR.end << ':' << LR.valno->id << ")"; +} + +void LiveRange::dump() const { + cerr << *this << "\n"; +} + +void LiveInterval::print(std::ostream &OS, + const TargetRegisterInfo *TRI) const { + if (isStackSlot()) + OS << "SS#" << getStackSlotIndex(); + else if (TRI && TargetRegisterInfo::isPhysicalRegister(reg)) + OS << TRI->getName(reg); + else + OS << "%reg" << reg; + + OS << ',' << weight; + + if (empty()) + OS << " EMPTY"; + else { + OS << " = "; + for (LiveInterval::Ranges::const_iterator I = ranges.begin(), + E = ranges.end(); I != E; ++I) + OS << *I; + } + + // Print value number info. + if (getNumValNums()) { + OS << " "; + unsigned vnum = 0; + for (const_vni_iterator i = vni_begin(), e = vni_end(); i != e; + ++i, ++vnum) { + const VNInfo *vni = *i; + if (vnum) OS << " "; + OS << vnum << "@"; + if (vni->def == ~1U) { + OS << "x"; + } else { + if (vni->def == ~0U) + OS << "?"; + else + OS << vni->def; + unsigned ee = vni->kills.size(); + if (ee || vni->hasPHIKill) { + OS << "-("; + for (unsigned j = 0; j != ee; ++j) { + OS << vni->kills[j]; + if (j != ee-1) + OS << " "; + } + if (vni->hasPHIKill) { + if (ee) + OS << " "; + OS << "phi"; + } + OS << ")"; + } + } + } + } +} + +void LiveInterval::dump() const { + cerr << *this << "\n"; +} + + +void LiveRange::print(std::ostream &os) const { + os << *this; +} diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp new file mode 100644 index 000000000000..cf0a648b629d --- /dev/null +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -0,0 +1,2298 @@ +//===-- LiveIntervalAnalysis.cpp - Live Interval Analysis -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the LiveInterval analysis pass which is used +// by the Linear Scan Register allocator. This pass linearizes the +// basic blocks of the function in DFS order and uses the +// LiveVariables pass to conservatively compute live intervals for +// each virtual and physical register. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "liveintervals" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "VirtRegMap.h" +#include "llvm/Value.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +#include +#include +#include +using namespace llvm; + +// Hidden options for help debugging. +static cl::opt DisableReMat("disable-rematerialization", + cl::init(false), cl::Hidden); + +static cl::opt SplitAtBB("split-intervals-at-bb", + cl::init(true), cl::Hidden); +static cl::opt SplitLimit("split-limit", + cl::init(-1), cl::Hidden); + +static cl::opt EnableAggressiveRemat("aggressive-remat", cl::Hidden); + +static cl::opt EnableFastSpilling("fast-spill", + cl::init(false), cl::Hidden); + +STATISTIC(numIntervals, "Number of original intervals"); +STATISTIC(numFolds , "Number of loads/stores folded into instructions"); +STATISTIC(numSplits , "Number of intervals split"); + +char LiveIntervals::ID = 0; +static RegisterPass X("liveintervals", "Live Interval Analysis"); + +void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + AU.addPreserved(); + AU.addPreserved(); + AU.addRequired(); + AU.addPreservedID(MachineLoopInfoID); + AU.addPreservedID(MachineDominatorsID); + + if (!StrongPHIElim) { + AU.addPreservedID(PHIEliminationID); + AU.addRequiredID(PHIEliminationID); + } + + AU.addRequiredID(TwoAddressInstructionPassID); + MachineFunctionPass::getAnalysisUsage(AU); +} + +void LiveIntervals::releaseMemory() { + // Free the live intervals themselves. + for (DenseMap::iterator I = r2iMap_.begin(), + E = r2iMap_.end(); I != E; ++I) + delete I->second; + + MBB2IdxMap.clear(); + Idx2MBBMap.clear(); + mi2iMap_.clear(); + i2miMap_.clear(); + r2iMap_.clear(); + // Release VNInfo memroy regions after all VNInfo objects are dtor'd. + VNInfoAllocator.Reset(); + while (!ClonedMIs.empty()) { + MachineInstr *MI = ClonedMIs.back(); + ClonedMIs.pop_back(); + mf_->DeleteMachineInstr(MI); + } +} + +void LiveIntervals::computeNumbering() { + Index2MiMap OldI2MI = i2miMap_; + std::vector OldI2MBB = Idx2MBBMap; + + Idx2MBBMap.clear(); + MBB2IdxMap.clear(); + mi2iMap_.clear(); + i2miMap_.clear(); + + FunctionSize = 0; + + // Number MachineInstrs and MachineBasicBlocks. + // Initialize MBB indexes to a sentinal. + MBB2IdxMap.resize(mf_->getNumBlockIDs(), std::make_pair(~0U,~0U)); + + unsigned MIIndex = 0; + for (MachineFunction::iterator MBB = mf_->begin(), E = mf_->end(); + MBB != E; ++MBB) { + unsigned StartIdx = MIIndex; + + // Insert an empty slot at the beginning of each block. + MIIndex += InstrSlots::NUM; + i2miMap_.push_back(0); + + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); + I != E; ++I) { + bool inserted = mi2iMap_.insert(std::make_pair(I, MIIndex)).second; + assert(inserted && "multiple MachineInstr -> index mappings"); + inserted = true; + i2miMap_.push_back(I); + MIIndex += InstrSlots::NUM; + FunctionSize++; + + // Insert max(1, numdefs) empty slots after every instruction. + unsigned Slots = I->getDesc().getNumDefs(); + if (Slots == 0) + Slots = 1; + MIIndex += InstrSlots::NUM * Slots; + while (Slots--) + i2miMap_.push_back(0); + } + + // Set the MBB2IdxMap entry for this MBB. + MBB2IdxMap[MBB->getNumber()] = std::make_pair(StartIdx, MIIndex - 1); + Idx2MBBMap.push_back(std::make_pair(StartIdx, MBB)); + } + std::sort(Idx2MBBMap.begin(), Idx2MBBMap.end(), Idx2MBBCompare()); + + if (!OldI2MI.empty()) + for (iterator OI = begin(), OE = end(); OI != OE; ++OI) { + for (LiveInterval::iterator LI = OI->second->begin(), + LE = OI->second->end(); LI != LE; ++LI) { + + // Remap the start index of the live range to the corresponding new + // number, or our best guess at what it _should_ correspond to if the + // original instruction has been erased. This is either the following + // instruction or its predecessor. + unsigned index = LI->start / InstrSlots::NUM; + unsigned offset = LI->start % InstrSlots::NUM; + if (offset == InstrSlots::LOAD) { + std::vector::const_iterator I = + std::lower_bound(OldI2MBB.begin(), OldI2MBB.end(), LI->start); + // Take the pair containing the index + std::vector::const_iterator J = + (I == OldI2MBB.end() && OldI2MBB.size()>0) ? (I-1): I; + + LI->start = getMBBStartIdx(J->second); + } else { + LI->start = mi2iMap_[OldI2MI[index]] + offset; + } + + // Remap the ending index in the same way that we remapped the start, + // except for the final step where we always map to the immediately + // following instruction. + index = (LI->end - 1) / InstrSlots::NUM; + offset = LI->end % InstrSlots::NUM; + if (offset == InstrSlots::LOAD) { + // VReg dies at end of block. + std::vector::const_iterator I = + std::lower_bound(OldI2MBB.begin(), OldI2MBB.end(), LI->end); + --I; + + LI->end = getMBBEndIdx(I->second) + 1; + } else { + unsigned idx = index; + while (index < OldI2MI.size() && !OldI2MI[index]) ++index; + + if (index != OldI2MI.size()) + LI->end = mi2iMap_[OldI2MI[index]] + (idx == index ? offset : 0); + else + LI->end = InstrSlots::NUM * i2miMap_.size(); + } + } + + for (LiveInterval::vni_iterator VNI = OI->second->vni_begin(), + VNE = OI->second->vni_end(); VNI != VNE; ++VNI) { + VNInfo* vni = *VNI; + + // Remap the VNInfo def index, which works the same as the + // start indices above. VN's with special sentinel defs + // don't need to be remapped. + if (vni->def != ~0U && vni->def != ~1U) { + unsigned index = vni->def / InstrSlots::NUM; + unsigned offset = vni->def % InstrSlots::NUM; + if (offset == InstrSlots::LOAD) { + std::vector::const_iterator I = + std::lower_bound(OldI2MBB.begin(), OldI2MBB.end(), vni->def); + // Take the pair containing the index + std::vector::const_iterator J = + (I == OldI2MBB.end() && OldI2MBB.size()>0) ? (I-1): I; + + vni->def = getMBBStartIdx(J->second); + } else { + vni->def = mi2iMap_[OldI2MI[index]] + offset; + } + } + + // Remap the VNInfo kill indices, which works the same as + // the end indices above. + for (size_t i = 0; i < vni->kills.size(); ++i) { + // PHI kills don't need to be remapped. + if (!vni->kills[i]) continue; + + unsigned index = (vni->kills[i]-1) / InstrSlots::NUM; + unsigned offset = vni->kills[i] % InstrSlots::NUM; + if (offset == InstrSlots::LOAD) { + std::vector::const_iterator I = + std::lower_bound(OldI2MBB.begin(), OldI2MBB.end(), vni->kills[i]); + --I; + + vni->kills[i] = getMBBEndIdx(I->second); + } else { + unsigned idx = index; + while (index < OldI2MI.size() && !OldI2MI[index]) ++index; + + if (index != OldI2MI.size()) + vni->kills[i] = mi2iMap_[OldI2MI[index]] + + (idx == index ? offset : 0); + else + vni->kills[i] = InstrSlots::NUM * i2miMap_.size(); + } + } + } + } +} + +void LiveIntervals::scaleNumbering(int factor) { + // Need to + // * scale MBB begin and end points + // * scale all ranges. + // * Update VNI structures. + // * Scale instruction numberings + + // Scale the MBB indices. + Idx2MBBMap.clear(); + for (MachineFunction::iterator MBB = mf_->begin(), MBBE = mf_->end(); + MBB != MBBE; ++MBB) { + std::pair &mbbIndices = MBB2IdxMap[MBB->getNumber()]; + mbbIndices.first = InstrSlots::scale(mbbIndices.first, factor); + mbbIndices.second = InstrSlots::scale(mbbIndices.second, factor); + Idx2MBBMap.push_back(std::make_pair(mbbIndices.first, MBB)); + } + std::sort(Idx2MBBMap.begin(), Idx2MBBMap.end(), Idx2MBBCompare()); + + // Scale the intervals. + for (iterator LI = begin(), LE = end(); LI != LE; ++LI) { + LI->second->scaleNumbering(factor); + } + + // Scale MachineInstrs. + Mi2IndexMap oldmi2iMap = mi2iMap_; + unsigned highestSlot = 0; + for (Mi2IndexMap::iterator MI = oldmi2iMap.begin(), ME = oldmi2iMap.end(); + MI != ME; ++MI) { + unsigned newSlot = InstrSlots::scale(MI->second, factor); + mi2iMap_[MI->first] = newSlot; + highestSlot = std::max(highestSlot, newSlot); + } + + i2miMap_.clear(); + i2miMap_.resize(highestSlot + 1); + for (Mi2IndexMap::iterator MI = mi2iMap_.begin(), ME = mi2iMap_.end(); + MI != ME; ++MI) { + i2miMap_[MI->second] = MI->first; + } + +} + + +/// runOnMachineFunction - Register allocate the whole function +/// +bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) { + mf_ = &fn; + mri_ = &mf_->getRegInfo(); + tm_ = &fn.getTarget(); + tri_ = tm_->getRegisterInfo(); + tii_ = tm_->getInstrInfo(); + aa_ = &getAnalysis(); + lv_ = &getAnalysis(); + allocatableRegs_ = tri_->getAllocatableSet(fn); + + computeNumbering(); + computeIntervals(); + + numIntervals += getNumIntervals(); + + DEBUG(dump()); + return true; +} + +/// print - Implement the dump method. +void LiveIntervals::print(std::ostream &O, const Module* ) const { + O << "********** INTERVALS **********\n"; + for (const_iterator I = begin(), E = end(); I != E; ++I) { + I->second->print(O, tri_); + O << "\n"; + } + + O << "********** MACHINEINSTRS **********\n"; + for (MachineFunction::iterator mbbi = mf_->begin(), mbbe = mf_->end(); + mbbi != mbbe; ++mbbi) { + O << ((Value*)mbbi->getBasicBlock())->getName() << ":\n"; + for (MachineBasicBlock::iterator mii = mbbi->begin(), + mie = mbbi->end(); mii != mie; ++mii) { + O << getInstructionIndex(mii) << '\t' << *mii; + } + } +} + +/// conflictsWithPhysRegDef - Returns true if the specified register +/// is defined during the duration of the specified interval. +bool LiveIntervals::conflictsWithPhysRegDef(const LiveInterval &li, + VirtRegMap &vrm, unsigned reg) { + for (LiveInterval::Ranges::const_iterator + I = li.ranges.begin(), E = li.ranges.end(); I != E; ++I) { + for (unsigned index = getBaseIndex(I->start), + end = getBaseIndex(I->end-1) + InstrSlots::NUM; index != end; + index += InstrSlots::NUM) { + // skip deleted instructions + while (index != end && !getInstructionFromIndex(index)) + index += InstrSlots::NUM; + if (index == end) break; + + MachineInstr *MI = getInstructionFromIndex(index); + unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; + if (tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg)) + if (SrcReg == li.reg || DstReg == li.reg) + continue; + for (unsigned i = 0; i != MI->getNumOperands(); ++i) { + MachineOperand& mop = MI->getOperand(i); + if (!mop.isReg()) + continue; + unsigned PhysReg = mop.getReg(); + if (PhysReg == 0 || PhysReg == li.reg) + continue; + if (TargetRegisterInfo::isVirtualRegister(PhysReg)) { + if (!vrm.hasPhys(PhysReg)) + continue; + PhysReg = vrm.getPhys(PhysReg); + } + if (PhysReg && tri_->regsOverlap(PhysReg, reg)) + return true; + } + } + } + + return false; +} + +/// conflictsWithPhysRegRef - Similar to conflictsWithPhysRegRef except +/// it can check use as well. +bool LiveIntervals::conflictsWithPhysRegRef(LiveInterval &li, + unsigned Reg, bool CheckUse, + SmallPtrSet &JoinedCopies) { + for (LiveInterval::Ranges::const_iterator + I = li.ranges.begin(), E = li.ranges.end(); I != E; ++I) { + for (unsigned index = getBaseIndex(I->start), + end = getBaseIndex(I->end-1) + InstrSlots::NUM; index != end; + index += InstrSlots::NUM) { + // Skip deleted instructions. + MachineInstr *MI = 0; + while (index != end) { + MI = getInstructionFromIndex(index); + if (MI) + break; + index += InstrSlots::NUM; + } + if (index == end) break; + + if (JoinedCopies.count(MI)) + continue; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand& MO = MI->getOperand(i); + if (!MO.isReg()) + continue; + if (MO.isUse() && !CheckUse) + continue; + unsigned PhysReg = MO.getReg(); + if (PhysReg == 0 || TargetRegisterInfo::isVirtualRegister(PhysReg)) + continue; + if (tri_->isSubRegister(Reg, PhysReg)) + return true; + } + } + } + + return false; +} + + +void LiveIntervals::printRegName(unsigned reg) const { + if (TargetRegisterInfo::isPhysicalRegister(reg)) + cerr << tri_->getName(reg); + else + cerr << "%reg" << reg; +} + +void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, + MachineBasicBlock::iterator mi, + unsigned MIIdx, MachineOperand& MO, + unsigned MOIdx, + LiveInterval &interval) { + DOUT << "\t\tregister: "; DEBUG(printRegName(interval.reg)); + LiveVariables::VarInfo& vi = lv_->getVarInfo(interval.reg); + + if (mi->getOpcode() == TargetInstrInfo::IMPLICIT_DEF) { + DOUT << "is a implicit_def\n"; + return; + } + + // Virtual registers may be defined multiple times (due to phi + // elimination and 2-addr elimination). Much of what we do only has to be + // done once for the vreg. We use an empty interval to detect the first + // time we see a vreg. + if (interval.empty()) { + // Get the Idx of the defining instructions. + unsigned defIndex = getDefIndex(MIIdx); + // Earlyclobbers move back one. + if (MO.isEarlyClobber()) + defIndex = getUseIndex(MIIdx); + VNInfo *ValNo; + MachineInstr *CopyMI = NULL; + unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; + if (mi->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG || + mi->getOpcode() == TargetInstrInfo::INSERT_SUBREG || + mi->getOpcode() == TargetInstrInfo::SUBREG_TO_REG || + tii_->isMoveInstr(*mi, SrcReg, DstReg, SrcSubReg, DstSubReg)) + CopyMI = mi; + // Earlyclobbers move back one. + ValNo = interval.getNextValue(defIndex, CopyMI, VNInfoAllocator); + + assert(ValNo->id == 0 && "First value in interval is not 0?"); + + // Loop over all of the blocks that the vreg is defined in. There are + // two cases we have to handle here. The most common case is a vreg + // whose lifetime is contained within a basic block. In this case there + // will be a single kill, in MBB, which comes after the definition. + if (vi.Kills.size() == 1 && vi.Kills[0]->getParent() == mbb) { + // FIXME: what about dead vars? + unsigned killIdx; + if (vi.Kills[0] != mi) + killIdx = getUseIndex(getInstructionIndex(vi.Kills[0]))+1; + else + killIdx = defIndex+1; + + // If the kill happens after the definition, we have an intra-block + // live range. + if (killIdx > defIndex) { + assert(vi.AliveBlocks.empty() && + "Shouldn't be alive across any blocks!"); + LiveRange LR(defIndex, killIdx, ValNo); + interval.addRange(LR); + DOUT << " +" << LR << "\n"; + interval.addKill(ValNo, killIdx); + return; + } + } + + // The other case we handle is when a virtual register lives to the end + // of the defining block, potentially live across some blocks, then is + // live into some number of blocks, but gets killed. Start by adding a + // range that goes from this definition to the end of the defining block. + LiveRange NewLR(defIndex, getMBBEndIdx(mbb)+1, ValNo); + DOUT << " +" << NewLR; + interval.addRange(NewLR); + + // Iterate over all of the blocks that the variable is completely + // live in, adding [insrtIndex(begin), instrIndex(end)+4) to the + // live interval. + for (SparseBitVector<>::iterator I = vi.AliveBlocks.begin(), + E = vi.AliveBlocks.end(); I != E; ++I) { + LiveRange LR(getMBBStartIdx(*I), + getMBBEndIdx(*I)+1, // MBB ends at -1. + ValNo); + interval.addRange(LR); + DOUT << " +" << LR; + } + + // Finally, this virtual register is live from the start of any killing + // block to the 'use' slot of the killing instruction. + for (unsigned i = 0, e = vi.Kills.size(); i != e; ++i) { + MachineInstr *Kill = vi.Kills[i]; + unsigned killIdx = getUseIndex(getInstructionIndex(Kill))+1; + LiveRange LR(getMBBStartIdx(Kill->getParent()), + killIdx, ValNo); + interval.addRange(LR); + interval.addKill(ValNo, killIdx); + DOUT << " +" << LR; + } + + } else { + // If this is the second time we see a virtual register definition, it + // must be due to phi elimination or two addr elimination. If this is + // the result of two address elimination, then the vreg is one of the + // def-and-use register operand. + if (mi->isRegTiedToUseOperand(MOIdx)) { + // If this is a two-address definition, then we have already processed + // the live range. The only problem is that we didn't realize there + // are actually two values in the live interval. Because of this we + // need to take the LiveRegion that defines this register and split it + // into two values. + assert(interval.containsOneValue()); + unsigned DefIndex = getDefIndex(interval.getValNumInfo(0)->def); + unsigned RedefIndex = getDefIndex(MIIdx); + if (MO.isEarlyClobber()) + RedefIndex = getUseIndex(MIIdx); + + const LiveRange *OldLR = interval.getLiveRangeContaining(RedefIndex-1); + VNInfo *OldValNo = OldLR->valno; + + // Delete the initial value, which should be short and continuous, + // because the 2-addr copy must be in the same MBB as the redef. + interval.removeRange(DefIndex, RedefIndex); + + // Two-address vregs should always only be redefined once. This means + // that at this point, there should be exactly one value number in it. + assert(interval.containsOneValue() && "Unexpected 2-addr liveint!"); + + // The new value number (#1) is defined by the instruction we claimed + // defined value #0. + VNInfo *ValNo = interval.getNextValue(OldValNo->def, OldValNo->copy, + VNInfoAllocator); + + // Value#0 is now defined by the 2-addr instruction. + OldValNo->def = RedefIndex; + OldValNo->copy = 0; + if (MO.isEarlyClobber()) + OldValNo->redefByEC = true; + + // Add the new live interval which replaces the range for the input copy. + LiveRange LR(DefIndex, RedefIndex, ValNo); + DOUT << " replace range with " << LR; + interval.addRange(LR); + interval.addKill(ValNo, RedefIndex); + + // If this redefinition is dead, we need to add a dummy unit live + // range covering the def slot. + if (MO.isDead()) + interval.addRange(LiveRange(RedefIndex, RedefIndex+1, OldValNo)); + + DOUT << " RESULT: "; + interval.print(DOUT, tri_); + + } else { + // Otherwise, this must be because of phi elimination. If this is the + // first redefinition of the vreg that we have seen, go back and change + // the live range in the PHI block to be a different value number. + if (interval.containsOneValue()) { + assert(vi.Kills.size() == 1 && + "PHI elimination vreg should have one kill, the PHI itself!"); + + // Remove the old range that we now know has an incorrect number. + VNInfo *VNI = interval.getValNumInfo(0); + MachineInstr *Killer = vi.Kills[0]; + unsigned Start = getMBBStartIdx(Killer->getParent()); + unsigned End = getUseIndex(getInstructionIndex(Killer))+1; + DOUT << " Removing [" << Start << "," << End << "] from: "; + interval.print(DOUT, tri_); DOUT << "\n"; + interval.removeRange(Start, End); + VNI->hasPHIKill = true; + DOUT << " RESULT: "; interval.print(DOUT, tri_); + + // Replace the interval with one of a NEW value number. Note that this + // value number isn't actually defined by an instruction, weird huh? :) + LiveRange LR(Start, End, interval.getNextValue(~0, 0, VNInfoAllocator)); + DOUT << " replace range with " << LR; + interval.addRange(LR); + interval.addKill(LR.valno, End); + DOUT << " RESULT: "; interval.print(DOUT, tri_); + } + + // In the case of PHI elimination, each variable definition is only + // live until the end of the block. We've already taken care of the + // rest of the live range. + unsigned defIndex = getDefIndex(MIIdx); + if (MO.isEarlyClobber()) + defIndex = getUseIndex(MIIdx); + + VNInfo *ValNo; + MachineInstr *CopyMI = NULL; + unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; + if (mi->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG || + mi->getOpcode() == TargetInstrInfo::INSERT_SUBREG || + mi->getOpcode() == TargetInstrInfo::SUBREG_TO_REG || + tii_->isMoveInstr(*mi, SrcReg, DstReg, SrcSubReg, DstSubReg)) + CopyMI = mi; + ValNo = interval.getNextValue(defIndex, CopyMI, VNInfoAllocator); + + unsigned killIndex = getMBBEndIdx(mbb) + 1; + LiveRange LR(defIndex, killIndex, ValNo); + interval.addRange(LR); + interval.addKill(ValNo, killIndex); + ValNo->hasPHIKill = true; + DOUT << " +" << LR; + } + } + + DOUT << '\n'; +} + +void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB, + MachineBasicBlock::iterator mi, + unsigned MIIdx, + MachineOperand& MO, + LiveInterval &interval, + MachineInstr *CopyMI) { + // A physical register cannot be live across basic block, so its + // lifetime must end somewhere in its defining basic block. + DOUT << "\t\tregister: "; DEBUG(printRegName(interval.reg)); + + unsigned baseIndex = MIIdx; + unsigned start = getDefIndex(baseIndex); + // Earlyclobbers move back one. + if (MO.isEarlyClobber()) + start = getUseIndex(MIIdx); + unsigned end = start; + + // If it is not used after definition, it is considered dead at + // the instruction defining it. Hence its interval is: + // [defSlot(def), defSlot(def)+1) + if (MO.isDead()) { + DOUT << " dead"; + end = start + 1; + goto exit; + } + + // If it is not dead on definition, it must be killed by a + // subsequent instruction. Hence its interval is: + // [defSlot(def), useSlot(kill)+1) + baseIndex += InstrSlots::NUM; + while (++mi != MBB->end()) { + while (baseIndex / InstrSlots::NUM < i2miMap_.size() && + getInstructionFromIndex(baseIndex) == 0) + baseIndex += InstrSlots::NUM; + if (mi->killsRegister(interval.reg, tri_)) { + DOUT << " killed"; + end = getUseIndex(baseIndex) + 1; + goto exit; + } else { + int DefIdx = mi->findRegisterDefOperandIdx(interval.reg, false, tri_); + if (DefIdx != -1) { + if (mi->isRegTiedToUseOperand(DefIdx)) { + // Two-address instruction. + end = getDefIndex(baseIndex); + if (mi->getOperand(DefIdx).isEarlyClobber()) + end = getUseIndex(baseIndex); + } else { + // Another instruction redefines the register before it is ever read. + // Then the register is essentially dead at the instruction that defines + // it. Hence its interval is: + // [defSlot(def), defSlot(def)+1) + DOUT << " dead"; + end = start + 1; + } + goto exit; + } + } + + baseIndex += InstrSlots::NUM; + } + + // The only case we should have a dead physreg here without a killing or + // instruction where we know it's dead is if it is live-in to the function + // and never used. Another possible case is the implicit use of the + // physical register has been deleted by two-address pass. + end = start + 1; + +exit: + assert(start < end && "did not find end of interval?"); + + // Already exists? Extend old live interval. + LiveInterval::iterator OldLR = interval.FindLiveRangeContaining(start); + bool Extend = OldLR != interval.end(); + VNInfo *ValNo = Extend + ? OldLR->valno : interval.getNextValue(start, CopyMI, VNInfoAllocator); + if (MO.isEarlyClobber() && Extend) + ValNo->redefByEC = true; + LiveRange LR(start, end, ValNo); + interval.addRange(LR); + interval.addKill(LR.valno, end); + DOUT << " +" << LR << '\n'; +} + +void LiveIntervals::handleRegisterDef(MachineBasicBlock *MBB, + MachineBasicBlock::iterator MI, + unsigned MIIdx, + MachineOperand& MO, + unsigned MOIdx) { + if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) + handleVirtualRegisterDef(MBB, MI, MIIdx, MO, MOIdx, + getOrCreateInterval(MO.getReg())); + else if (allocatableRegs_[MO.getReg()]) { + MachineInstr *CopyMI = NULL; + unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; + if (MI->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG || + MI->getOpcode() == TargetInstrInfo::INSERT_SUBREG || + MI->getOpcode() == TargetInstrInfo::SUBREG_TO_REG || + tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg)) + CopyMI = MI; + handlePhysicalRegisterDef(MBB, MI, MIIdx, MO, + getOrCreateInterval(MO.getReg()), CopyMI); + // Def of a register also defines its sub-registers. + for (const unsigned* AS = tri_->getSubRegisters(MO.getReg()); *AS; ++AS) + // If MI also modifies the sub-register explicitly, avoid processing it + // more than once. Do not pass in TRI here so it checks for exact match. + if (!MI->modifiesRegister(*AS)) + handlePhysicalRegisterDef(MBB, MI, MIIdx, MO, + getOrCreateInterval(*AS), 0); + } +} + +void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB, + unsigned MIIdx, + LiveInterval &interval, bool isAlias) { + DOUT << "\t\tlivein register: "; DEBUG(printRegName(interval.reg)); + + // Look for kills, if it reaches a def before it's killed, then it shouldn't + // be considered a livein. + MachineBasicBlock::iterator mi = MBB->begin(); + unsigned baseIndex = MIIdx; + unsigned start = baseIndex; + while (baseIndex / InstrSlots::NUM < i2miMap_.size() && + getInstructionFromIndex(baseIndex) == 0) + baseIndex += InstrSlots::NUM; + unsigned end = baseIndex; + bool SeenDefUse = false; + + while (mi != MBB->end()) { + if (mi->killsRegister(interval.reg, tri_)) { + DOUT << " killed"; + end = getUseIndex(baseIndex) + 1; + SeenDefUse = true; + goto exit; + } else if (mi->modifiesRegister(interval.reg, tri_)) { + // Another instruction redefines the register before it is ever read. + // Then the register is essentially dead at the instruction that defines + // it. Hence its interval is: + // [defSlot(def), defSlot(def)+1) + DOUT << " dead"; + end = getDefIndex(start) + 1; + SeenDefUse = true; + goto exit; + } + + baseIndex += InstrSlots::NUM; + ++mi; + if (mi != MBB->end()) { + while (baseIndex / InstrSlots::NUM < i2miMap_.size() && + getInstructionFromIndex(baseIndex) == 0) + baseIndex += InstrSlots::NUM; + } + } + +exit: + // Live-in register might not be used at all. + if (!SeenDefUse) { + if (isAlias) { + DOUT << " dead"; + end = getDefIndex(MIIdx) + 1; + } else { + DOUT << " live through"; + end = baseIndex; + } + } + + LiveRange LR(start, end, interval.getNextValue(~0U, 0, VNInfoAllocator)); + interval.addRange(LR); + interval.addKill(LR.valno, end); + DOUT << " +" << LR << '\n'; +} + +/// computeIntervals - computes the live intervals for virtual +/// registers. for some ordering of the machine instructions [1,N] a +/// live interval is an interval [i, j) where 1 <= i <= j < N for +/// which a variable is live +void LiveIntervals::computeIntervals() { + + DOUT << "********** COMPUTING LIVE INTERVALS **********\n" + << "********** Function: " + << ((Value*)mf_->getFunction())->getName() << '\n'; + + for (MachineFunction::iterator MBBI = mf_->begin(), E = mf_->end(); + MBBI != E; ++MBBI) { + MachineBasicBlock *MBB = MBBI; + // Track the index of the current machine instr. + unsigned MIIndex = getMBBStartIdx(MBB); + DOUT << ((Value*)MBB->getBasicBlock())->getName() << ":\n"; + + MachineBasicBlock::iterator MI = MBB->begin(), miEnd = MBB->end(); + + // Create intervals for live-ins to this BB first. + for (MachineBasicBlock::const_livein_iterator LI = MBB->livein_begin(), + LE = MBB->livein_end(); LI != LE; ++LI) { + handleLiveInRegister(MBB, MIIndex, getOrCreateInterval(*LI)); + // Multiple live-ins can alias the same register. + for (const unsigned* AS = tri_->getSubRegisters(*LI); *AS; ++AS) + if (!hasInterval(*AS)) + handleLiveInRegister(MBB, MIIndex, getOrCreateInterval(*AS), + true); + } + + // Skip over empty initial indices. + while (MIIndex / InstrSlots::NUM < i2miMap_.size() && + getInstructionFromIndex(MIIndex) == 0) + MIIndex += InstrSlots::NUM; + + for (; MI != miEnd; ++MI) { + DOUT << MIIndex << "\t" << *MI; + + // Handle defs. + for (int i = MI->getNumOperands() - 1; i >= 0; --i) { + MachineOperand &MO = MI->getOperand(i); + // handle register defs - build intervals + if (MO.isReg() && MO.getReg() && MO.isDef()) { + handleRegisterDef(MBB, MI, MIIndex, MO, i); + } + } + + // Skip over the empty slots after each instruction. + unsigned Slots = MI->getDesc().getNumDefs(); + if (Slots == 0) + Slots = 1; + MIIndex += InstrSlots::NUM * Slots; + + // Skip over empty indices. + while (MIIndex / InstrSlots::NUM < i2miMap_.size() && + getInstructionFromIndex(MIIndex) == 0) + MIIndex += InstrSlots::NUM; + } + } +} + +bool LiveIntervals::findLiveInMBBs(unsigned Start, unsigned End, + SmallVectorImpl &MBBs) const { + std::vector::const_iterator I = + std::lower_bound(Idx2MBBMap.begin(), Idx2MBBMap.end(), Start); + + bool ResVal = false; + while (I != Idx2MBBMap.end()) { + if (I->first >= End) + break; + MBBs.push_back(I->second); + ResVal = true; + ++I; + } + return ResVal; +} + +bool LiveIntervals::findReachableMBBs(unsigned Start, unsigned End, + SmallVectorImpl &MBBs) const { + std::vector::const_iterator I = + std::lower_bound(Idx2MBBMap.begin(), Idx2MBBMap.end(), Start); + + bool ResVal = false; + while (I != Idx2MBBMap.end()) { + if (I->first > End) + break; + MachineBasicBlock *MBB = I->second; + if (getMBBEndIdx(MBB) > End) + break; + for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), + SE = MBB->succ_end(); SI != SE; ++SI) + MBBs.push_back(*SI); + ResVal = true; + ++I; + } + return ResVal; +} + +LiveInterval* LiveIntervals::createInterval(unsigned reg) { + float Weight = TargetRegisterInfo::isPhysicalRegister(reg) ? HUGE_VALF : 0.0F; + return new LiveInterval(reg, Weight); +} + +/// dupInterval - Duplicate a live interval. The caller is responsible for +/// managing the allocated memory. +LiveInterval* LiveIntervals::dupInterval(LiveInterval *li) { + LiveInterval *NewLI = createInterval(li->reg); + NewLI->Copy(*li, getVNInfoAllocator()); + return NewLI; +} + +/// getVNInfoSourceReg - Helper function that parses the specified VNInfo +/// copy field and returns the source register that defines it. +unsigned LiveIntervals::getVNInfoSourceReg(const VNInfo *VNI) const { + if (!VNI->copy) + return 0; + + if (VNI->copy->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG) { + // If it's extracting out of a physical register, return the sub-register. + unsigned Reg = VNI->copy->getOperand(1).getReg(); + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + Reg = tri_->getSubReg(Reg, VNI->copy->getOperand(2).getImm()); + return Reg; + } else if (VNI->copy->getOpcode() == TargetInstrInfo::INSERT_SUBREG || + VNI->copy->getOpcode() == TargetInstrInfo::SUBREG_TO_REG) + return VNI->copy->getOperand(2).getReg(); + + unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; + if (tii_->isMoveInstr(*VNI->copy, SrcReg, DstReg, SrcSubReg, DstSubReg)) + return SrcReg; + assert(0 && "Unrecognized copy instruction!"); + return 0; +} + +//===----------------------------------------------------------------------===// +// Register allocator hooks. +// + +/// getReMatImplicitUse - If the remat definition MI has one (for now, we only +/// allow one) virtual register operand, then its uses are implicitly using +/// the register. Returns the virtual register. +unsigned LiveIntervals::getReMatImplicitUse(const LiveInterval &li, + MachineInstr *MI) const { + unsigned RegOp = 0; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isUse()) + continue; + unsigned Reg = MO.getReg(); + if (Reg == 0 || Reg == li.reg) + continue; + // FIXME: For now, only remat MI with at most one register operand. + assert(!RegOp && + "Can't rematerialize instruction with multiple register operand!"); + RegOp = MO.getReg(); +#ifndef NDEBUG + break; +#endif + } + return RegOp; +} + +/// isValNoAvailableAt - Return true if the val# of the specified interval +/// which reaches the given instruction also reaches the specified use index. +bool LiveIntervals::isValNoAvailableAt(const LiveInterval &li, MachineInstr *MI, + unsigned UseIdx) const { + unsigned Index = getInstructionIndex(MI); + VNInfo *ValNo = li.FindLiveRangeContaining(Index)->valno; + LiveInterval::const_iterator UI = li.FindLiveRangeContaining(UseIdx); + return UI != li.end() && UI->valno == ValNo; +} + +/// isReMaterializable - Returns true if the definition MI of the specified +/// val# of the specified interval is re-materializable. +bool LiveIntervals::isReMaterializable(const LiveInterval &li, + const VNInfo *ValNo, MachineInstr *MI, + SmallVectorImpl &SpillIs, + bool &isLoad) { + if (DisableReMat) + return false; + + if (MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF) + return true; + + int FrameIdx = 0; + if (tii_->isLoadFromStackSlot(MI, FrameIdx) && + mf_->getFrameInfo()->isImmutableObjectIndex(FrameIdx)) + // FIXME: Let target specific isReallyTriviallyReMaterializable determines + // this but remember this is not safe to fold into a two-address + // instruction. + // This is a load from fixed stack slot. It can be rematerialized. + return true; + + // If the target-specific rules don't identify an instruction as + // being trivially rematerializable, use some target-independent + // rules. + if (!MI->getDesc().isRematerializable() || + !tii_->isTriviallyReMaterializable(MI)) { + if (!EnableAggressiveRemat) + return false; + + // If the instruction accesses memory but the memoperands have been lost, + // we can't analyze it. + const TargetInstrDesc &TID = MI->getDesc(); + if ((TID.mayLoad() || TID.mayStore()) && MI->memoperands_empty()) + return false; + + // Avoid instructions obviously unsafe for remat. + if (TID.hasUnmodeledSideEffects() || TID.isNotDuplicable()) + return false; + + // If the instruction accesses memory and the memory could be non-constant, + // assume the instruction is not rematerializable. + for (std::list::const_iterator + I = MI->memoperands_begin(), E = MI->memoperands_end(); I != E; ++I){ + const MachineMemOperand &MMO = *I; + if (MMO.isVolatile() || MMO.isStore()) + return false; + const Value *V = MMO.getValue(); + if (!V) + return false; + if (const PseudoSourceValue *PSV = dyn_cast(V)) { + if (!PSV->isConstant(mf_->getFrameInfo())) + return false; + } else if (!aa_->pointsToConstantMemory(V)) + return false; + } + + // If any of the registers accessed are non-constant, conservatively assume + // the instruction is not rematerializable. + unsigned ImpUse = 0; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (MO.isReg()) { + unsigned Reg = MO.getReg(); + if (Reg == 0) + continue; + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + return false; + + // Only allow one def, and that in the first operand. + if (MO.isDef() != (i == 0)) + return false; + + // Only allow constant-valued registers. + bool IsLiveIn = mri_->isLiveIn(Reg); + MachineRegisterInfo::def_iterator I = mri_->def_begin(Reg), + E = mri_->def_end(); + + // For the def, it should be the only def of that register. + if (MO.isDef() && (next(I) != E || IsLiveIn)) + return false; + + if (MO.isUse()) { + // Only allow one use other register use, as that's all the + // remat mechanisms support currently. + if (Reg != li.reg) { + if (ImpUse == 0) + ImpUse = Reg; + else if (Reg != ImpUse) + return false; + } + // For the use, there should be only one associated def. + if (I != E && (next(I) != E || IsLiveIn)) + return false; + } + } + } + } + + unsigned ImpUse = getReMatImplicitUse(li, MI); + if (ImpUse) { + const LiveInterval &ImpLi = getInterval(ImpUse); + for (MachineRegisterInfo::use_iterator ri = mri_->use_begin(li.reg), + re = mri_->use_end(); ri != re; ++ri) { + MachineInstr *UseMI = &*ri; + unsigned UseIdx = getInstructionIndex(UseMI); + if (li.FindLiveRangeContaining(UseIdx)->valno != ValNo) + continue; + if (!isValNoAvailableAt(ImpLi, MI, UseIdx)) + return false; + } + + // If a register operand of the re-materialized instruction is going to + // be spilled next, then it's not legal to re-materialize this instruction. + for (unsigned i = 0, e = SpillIs.size(); i != e; ++i) + if (ImpUse == SpillIs[i]->reg) + return false; + } + return true; +} + +/// isReMaterializable - Returns true if the definition MI of the specified +/// val# of the specified interval is re-materializable. +bool LiveIntervals::isReMaterializable(const LiveInterval &li, + const VNInfo *ValNo, MachineInstr *MI) { + SmallVector Dummy1; + bool Dummy2; + return isReMaterializable(li, ValNo, MI, Dummy1, Dummy2); +} + +/// isReMaterializable - Returns true if every definition of MI of every +/// val# of the specified interval is re-materializable. +bool LiveIntervals::isReMaterializable(const LiveInterval &li, + SmallVectorImpl &SpillIs, + bool &isLoad) { + isLoad = false; + for (LiveInterval::const_vni_iterator i = li.vni_begin(), e = li.vni_end(); + i != e; ++i) { + const VNInfo *VNI = *i; + unsigned DefIdx = VNI->def; + if (DefIdx == ~1U) + continue; // Dead val#. + // Is the def for the val# rematerializable? + if (DefIdx == ~0u) + return false; + MachineInstr *ReMatDefMI = getInstructionFromIndex(DefIdx); + bool DefIsLoad = false; + if (!ReMatDefMI || + !isReMaterializable(li, VNI, ReMatDefMI, SpillIs, DefIsLoad)) + return false; + isLoad |= DefIsLoad; + } + return true; +} + +/// FilterFoldedOps - Filter out two-address use operands. Return +/// true if it finds any issue with the operands that ought to prevent +/// folding. +static bool FilterFoldedOps(MachineInstr *MI, + SmallVector &Ops, + unsigned &MRInfo, + SmallVector &FoldOps) { + MRInfo = 0; + for (unsigned i = 0, e = Ops.size(); i != e; ++i) { + unsigned OpIdx = Ops[i]; + MachineOperand &MO = MI->getOperand(OpIdx); + // FIXME: fold subreg use. + if (MO.getSubReg()) + return true; + if (MO.isDef()) + MRInfo |= (unsigned)VirtRegMap::isMod; + else { + // Filter out two-address use operand(s). + if (MI->isRegTiedToDefOperand(OpIdx)) { + MRInfo = VirtRegMap::isModRef; + continue; + } + MRInfo |= (unsigned)VirtRegMap::isRef; + } + FoldOps.push_back(OpIdx); + } + return false; +} + + +/// tryFoldMemoryOperand - Attempts to fold either a spill / restore from +/// slot / to reg or any rematerialized load into ith operand of specified +/// MI. If it is successul, MI is updated with the newly created MI and +/// returns true. +bool LiveIntervals::tryFoldMemoryOperand(MachineInstr* &MI, + VirtRegMap &vrm, MachineInstr *DefMI, + unsigned InstrIdx, + SmallVector &Ops, + bool isSS, int Slot, unsigned Reg) { + // If it is an implicit def instruction, just delete it. + if (MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF) { + RemoveMachineInstrFromMaps(MI); + vrm.RemoveMachineInstrFromMaps(MI); + MI->eraseFromParent(); + ++numFolds; + return true; + } + + // Filter the list of operand indexes that are to be folded. Abort if + // any operand will prevent folding. + unsigned MRInfo = 0; + SmallVector FoldOps; + if (FilterFoldedOps(MI, Ops, MRInfo, FoldOps)) + return false; + + // The only time it's safe to fold into a two address instruction is when + // it's folding reload and spill from / into a spill stack slot. + if (DefMI && (MRInfo & VirtRegMap::isMod)) + return false; + + MachineInstr *fmi = isSS ? tii_->foldMemoryOperand(*mf_, MI, FoldOps, Slot) + : tii_->foldMemoryOperand(*mf_, MI, FoldOps, DefMI); + if (fmi) { + // Remember this instruction uses the spill slot. + if (isSS) vrm.addSpillSlotUse(Slot, fmi); + + // Attempt to fold the memory reference into the instruction. If + // we can do this, we don't need to insert spill code. + MachineBasicBlock &MBB = *MI->getParent(); + if (isSS && !mf_->getFrameInfo()->isImmutableObjectIndex(Slot)) + vrm.virtFolded(Reg, MI, fmi, (VirtRegMap::ModRef)MRInfo); + vrm.transferSpillPts(MI, fmi); + vrm.transferRestorePts(MI, fmi); + vrm.transferEmergencySpills(MI, fmi); + mi2iMap_.erase(MI); + i2miMap_[InstrIdx /InstrSlots::NUM] = fmi; + mi2iMap_[fmi] = InstrIdx; + MI = MBB.insert(MBB.erase(MI), fmi); + ++numFolds; + return true; + } + return false; +} + +/// canFoldMemoryOperand - Returns true if the specified load / store +/// folding is possible. +bool LiveIntervals::canFoldMemoryOperand(MachineInstr *MI, + SmallVector &Ops, + bool ReMat) const { + // Filter the list of operand indexes that are to be folded. Abort if + // any operand will prevent folding. + unsigned MRInfo = 0; + SmallVector FoldOps; + if (FilterFoldedOps(MI, Ops, MRInfo, FoldOps)) + return false; + + // It's only legal to remat for a use, not a def. + if (ReMat && (MRInfo & VirtRegMap::isMod)) + return false; + + return tii_->canFoldMemoryOperand(MI, FoldOps); +} + +bool LiveIntervals::intervalIsInOneMBB(const LiveInterval &li) const { + SmallPtrSet MBBs; + for (LiveInterval::Ranges::const_iterator + I = li.ranges.begin(), E = li.ranges.end(); I != E; ++I) { + std::vector::const_iterator II = + std::lower_bound(Idx2MBBMap.begin(), Idx2MBBMap.end(), I->start); + if (II == Idx2MBBMap.end()) + continue; + if (I->end > II->first) // crossing a MBB. + return false; + MBBs.insert(II->second); + if (MBBs.size() > 1) + return false; + } + return true; +} + +/// rewriteImplicitOps - Rewrite implicit use operands of MI (i.e. uses of +/// interval on to-be re-materialized operands of MI) with new register. +void LiveIntervals::rewriteImplicitOps(const LiveInterval &li, + MachineInstr *MI, unsigned NewVReg, + VirtRegMap &vrm) { + // There is an implicit use. That means one of the other operand is + // being remat'ed and the remat'ed instruction has li.reg as an + // use operand. Make sure we rewrite that as well. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (Reg == 0 || TargetRegisterInfo::isPhysicalRegister(Reg)) + continue; + if (!vrm.isReMaterialized(Reg)) + continue; + MachineInstr *ReMatMI = vrm.getReMaterializedMI(Reg); + MachineOperand *UseMO = ReMatMI->findRegisterUseOperand(li.reg); + if (UseMO) + UseMO->setReg(NewVReg); + } +} + +/// rewriteInstructionForSpills, rewriteInstructionsForSpills - Helper functions +/// for addIntervalsForSpills to rewrite uses / defs for the given live range. +bool LiveIntervals:: +rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI, + bool TrySplit, unsigned index, unsigned end, MachineInstr *MI, + MachineInstr *ReMatOrigDefMI, MachineInstr *ReMatDefMI, + unsigned Slot, int LdSlot, + bool isLoad, bool isLoadSS, bool DefIsReMat, bool CanDelete, + VirtRegMap &vrm, + const TargetRegisterClass* rc, + SmallVector &ReMatIds, + const MachineLoopInfo *loopInfo, + unsigned &NewVReg, unsigned ImpUse, bool &HasDef, bool &HasUse, + DenseMap &MBBVRegsMap, + std::vector &NewLIs) { + bool CanFold = false; + RestartInstruction: + for (unsigned i = 0; i != MI->getNumOperands(); ++i) { + MachineOperand& mop = MI->getOperand(i); + if (!mop.isReg()) + continue; + unsigned Reg = mop.getReg(); + unsigned RegI = Reg; + if (Reg == 0 || TargetRegisterInfo::isPhysicalRegister(Reg)) + continue; + if (Reg != li.reg) + continue; + + bool TryFold = !DefIsReMat; + bool FoldSS = true; // Default behavior unless it's a remat. + int FoldSlot = Slot; + if (DefIsReMat) { + // If this is the rematerializable definition MI itself and + // all of its uses are rematerialized, simply delete it. + if (MI == ReMatOrigDefMI && CanDelete) { + DOUT << "\t\t\t\tErasing re-materlizable def: "; + DOUT << MI << '\n'; + RemoveMachineInstrFromMaps(MI); + vrm.RemoveMachineInstrFromMaps(MI); + MI->eraseFromParent(); + break; + } + + // If def for this use can't be rematerialized, then try folding. + // If def is rematerializable and it's a load, also try folding. + TryFold = !ReMatDefMI || (ReMatDefMI && (MI == ReMatOrigDefMI || isLoad)); + if (isLoad) { + // Try fold loads (from stack slot, constant pool, etc.) into uses. + FoldSS = isLoadSS; + FoldSlot = LdSlot; + } + } + + // Scan all of the operands of this instruction rewriting operands + // to use NewVReg instead of li.reg as appropriate. We do this for + // two reasons: + // + // 1. If the instr reads the same spilled vreg multiple times, we + // want to reuse the NewVReg. + // 2. If the instr is a two-addr instruction, we are required to + // keep the src/dst regs pinned. + // + // Keep track of whether we replace a use and/or def so that we can + // create the spill interval with the appropriate range. + + HasUse = mop.isUse(); + HasDef = mop.isDef(); + SmallVector Ops; + Ops.push_back(i); + for (unsigned j = i+1, e = MI->getNumOperands(); j != e; ++j) { + const MachineOperand &MOj = MI->getOperand(j); + if (!MOj.isReg()) + continue; + unsigned RegJ = MOj.getReg(); + if (RegJ == 0 || TargetRegisterInfo::isPhysicalRegister(RegJ)) + continue; + if (RegJ == RegI) { + Ops.push_back(j); + HasUse |= MOj.isUse(); + HasDef |= MOj.isDef(); + } + } + + if (HasUse && !li.liveAt(getUseIndex(index))) + // Must be defined by an implicit def. It should not be spilled. Note, + // this is for correctness reason. e.g. + // 8 %reg1024 = IMPLICIT_DEF + // 12 %reg1024 = INSERT_SUBREG %reg1024, %reg1025, 2 + // The live range [12, 14) are not part of the r1024 live interval since + // it's defined by an implicit def. It will not conflicts with live + // interval of r1025. Now suppose both registers are spilled, you can + // easily see a situation where both registers are reloaded before + // the INSERT_SUBREG and both target registers that would overlap. + HasUse = false; + + // Create a new virtual register for the spill interval. + // Create the new register now so we can map the fold instruction + // to the new register so when it is unfolded we get the correct + // answer. + bool CreatedNewVReg = false; + if (NewVReg == 0) { + NewVReg = mri_->createVirtualRegister(rc); + vrm.grow(); + CreatedNewVReg = true; + } + + if (!TryFold) + CanFold = false; + else { + // Do not fold load / store here if we are splitting. We'll find an + // optimal point to insert a load / store later. + if (!TrySplit) { + if (tryFoldMemoryOperand(MI, vrm, ReMatDefMI, index, + Ops, FoldSS, FoldSlot, NewVReg)) { + // Folding the load/store can completely change the instruction in + // unpredictable ways, rescan it from the beginning. + + if (FoldSS) { + // We need to give the new vreg the same stack slot as the + // spilled interval. + vrm.assignVirt2StackSlot(NewVReg, FoldSlot); + } + + HasUse = false; + HasDef = false; + CanFold = false; + if (isNotInMIMap(MI)) + break; + goto RestartInstruction; + } + } else { + // We'll try to fold it later if it's profitable. + CanFold = canFoldMemoryOperand(MI, Ops, DefIsReMat); + } + } + + mop.setReg(NewVReg); + if (mop.isImplicit()) + rewriteImplicitOps(li, MI, NewVReg, vrm); + + // Reuse NewVReg for other reads. + for (unsigned j = 0, e = Ops.size(); j != e; ++j) { + MachineOperand &mopj = MI->getOperand(Ops[j]); + mopj.setReg(NewVReg); + if (mopj.isImplicit()) + rewriteImplicitOps(li, MI, NewVReg, vrm); + } + + if (CreatedNewVReg) { + if (DefIsReMat) { + vrm.setVirtIsReMaterialized(NewVReg, ReMatDefMI/*, CanDelete*/); + if (ReMatIds[VNI->id] == VirtRegMap::MAX_STACK_SLOT) { + // Each valnum may have its own remat id. + ReMatIds[VNI->id] = vrm.assignVirtReMatId(NewVReg); + } else { + vrm.assignVirtReMatId(NewVReg, ReMatIds[VNI->id]); + } + if (!CanDelete || (HasUse && HasDef)) { + // If this is a two-addr instruction then its use operands are + // rematerializable but its def is not. It should be assigned a + // stack slot. + vrm.assignVirt2StackSlot(NewVReg, Slot); + } + } else { + vrm.assignVirt2StackSlot(NewVReg, Slot); + } + } else if (HasUse && HasDef && + vrm.getStackSlot(NewVReg) == VirtRegMap::NO_STACK_SLOT) { + // If this interval hasn't been assigned a stack slot (because earlier + // def is a deleted remat def), do it now. + assert(Slot != VirtRegMap::NO_STACK_SLOT); + vrm.assignVirt2StackSlot(NewVReg, Slot); + } + + // Re-matting an instruction with virtual register use. Add the + // register as an implicit use on the use MI. + if (DefIsReMat && ImpUse) + MI->addOperand(MachineOperand::CreateReg(ImpUse, false, true)); + + // Create a new register interval for this spill / remat. + LiveInterval &nI = getOrCreateInterval(NewVReg); + if (CreatedNewVReg) { + NewLIs.push_back(&nI); + MBBVRegsMap.insert(std::make_pair(MI->getParent()->getNumber(), NewVReg)); + if (TrySplit) + vrm.setIsSplitFromReg(NewVReg, li.reg); + } + + if (HasUse) { + if (CreatedNewVReg) { + LiveRange LR(getLoadIndex(index), getUseIndex(index)+1, + nI.getNextValue(~0U, 0, VNInfoAllocator)); + DOUT << " +" << LR; + nI.addRange(LR); + } else { + // Extend the split live interval to this def / use. + unsigned End = getUseIndex(index)+1; + LiveRange LR(nI.ranges[nI.ranges.size()-1].end, End, + nI.getValNumInfo(nI.getNumValNums()-1)); + DOUT << " +" << LR; + nI.addRange(LR); + } + } + if (HasDef) { + LiveRange LR(getDefIndex(index), getStoreIndex(index), + nI.getNextValue(~0U, 0, VNInfoAllocator)); + DOUT << " +" << LR; + nI.addRange(LR); + } + + DOUT << "\t\t\t\tAdded new interval: "; + nI.print(DOUT, tri_); + DOUT << '\n'; + } + return CanFold; +} +bool LiveIntervals::anyKillInMBBAfterIdx(const LiveInterval &li, + const VNInfo *VNI, + MachineBasicBlock *MBB, unsigned Idx) const { + unsigned End = getMBBEndIdx(MBB); + for (unsigned j = 0, ee = VNI->kills.size(); j != ee; ++j) { + unsigned KillIdx = VNI->kills[j]; + if (KillIdx > Idx && KillIdx < End) + return true; + } + return false; +} + +/// RewriteInfo - Keep track of machine instrs that will be rewritten +/// during spilling. +namespace { + struct RewriteInfo { + unsigned Index; + MachineInstr *MI; + bool HasUse; + bool HasDef; + RewriteInfo(unsigned i, MachineInstr *mi, bool u, bool d) + : Index(i), MI(mi), HasUse(u), HasDef(d) {} + }; + + struct RewriteInfoCompare { + bool operator()(const RewriteInfo &LHS, const RewriteInfo &RHS) const { + return LHS.Index < RHS.Index; + } + }; +} + +void LiveIntervals:: +rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit, + LiveInterval::Ranges::const_iterator &I, + MachineInstr *ReMatOrigDefMI, MachineInstr *ReMatDefMI, + unsigned Slot, int LdSlot, + bool isLoad, bool isLoadSS, bool DefIsReMat, bool CanDelete, + VirtRegMap &vrm, + const TargetRegisterClass* rc, + SmallVector &ReMatIds, + const MachineLoopInfo *loopInfo, + BitVector &SpillMBBs, + DenseMap > &SpillIdxes, + BitVector &RestoreMBBs, + DenseMap > &RestoreIdxes, + DenseMap &MBBVRegsMap, + std::vector &NewLIs) { + bool AllCanFold = true; + unsigned NewVReg = 0; + unsigned start = getBaseIndex(I->start); + unsigned end = getBaseIndex(I->end-1) + InstrSlots::NUM; + + // First collect all the def / use in this live range that will be rewritten. + // Make sure they are sorted according to instruction index. + std::vector RewriteMIs; + for (MachineRegisterInfo::reg_iterator ri = mri_->reg_begin(li.reg), + re = mri_->reg_end(); ri != re; ) { + MachineInstr *MI = &*ri; + MachineOperand &O = ri.getOperand(); + ++ri; + assert(!O.isImplicit() && "Spilling register that's used as implicit use?"); + unsigned index = getInstructionIndex(MI); + if (index < start || index >= end) + continue; + if (O.isUse() && !li.liveAt(getUseIndex(index))) + // Must be defined by an implicit def. It should not be spilled. Note, + // this is for correctness reason. e.g. + // 8 %reg1024 = IMPLICIT_DEF + // 12 %reg1024 = INSERT_SUBREG %reg1024, %reg1025, 2 + // The live range [12, 14) are not part of the r1024 live interval since + // it's defined by an implicit def. It will not conflicts with live + // interval of r1025. Now suppose both registers are spilled, you can + // easily see a situation where both registers are reloaded before + // the INSERT_SUBREG and both target registers that would overlap. + continue; + RewriteMIs.push_back(RewriteInfo(index, MI, O.isUse(), O.isDef())); + } + std::sort(RewriteMIs.begin(), RewriteMIs.end(), RewriteInfoCompare()); + + unsigned ImpUse = DefIsReMat ? getReMatImplicitUse(li, ReMatDefMI) : 0; + // Now rewrite the defs and uses. + for (unsigned i = 0, e = RewriteMIs.size(); i != e; ) { + RewriteInfo &rwi = RewriteMIs[i]; + ++i; + unsigned index = rwi.Index; + bool MIHasUse = rwi.HasUse; + bool MIHasDef = rwi.HasDef; + MachineInstr *MI = rwi.MI; + // If MI def and/or use the same register multiple times, then there + // are multiple entries. + unsigned NumUses = MIHasUse; + while (i != e && RewriteMIs[i].MI == MI) { + assert(RewriteMIs[i].Index == index); + bool isUse = RewriteMIs[i].HasUse; + if (isUse) ++NumUses; + MIHasUse |= isUse; + MIHasDef |= RewriteMIs[i].HasDef; + ++i; + } + MachineBasicBlock *MBB = MI->getParent(); + + if (ImpUse && MI != ReMatDefMI) { + // Re-matting an instruction with virtual register use. Update the + // register interval's spill weight to HUGE_VALF to prevent it from + // being spilled. + LiveInterval &ImpLi = getInterval(ImpUse); + ImpLi.weight = HUGE_VALF; + } + + unsigned MBBId = MBB->getNumber(); + unsigned ThisVReg = 0; + if (TrySplit) { + DenseMap::iterator NVI = MBBVRegsMap.find(MBBId); + if (NVI != MBBVRegsMap.end()) { + ThisVReg = NVI->second; + // One common case: + // x = use + // ... + // ... + // def = ... + // = use + // It's better to start a new interval to avoid artifically + // extend the new interval. + if (MIHasDef && !MIHasUse) { + MBBVRegsMap.erase(MBB->getNumber()); + ThisVReg = 0; + } + } + } + + bool IsNew = ThisVReg == 0; + if (IsNew) { + // This ends the previous live interval. If all of its def / use + // can be folded, give it a low spill weight. + if (NewVReg && TrySplit && AllCanFold) { + LiveInterval &nI = getOrCreateInterval(NewVReg); + nI.weight /= 10.0F; + } + AllCanFold = true; + } + NewVReg = ThisVReg; + + bool HasDef = false; + bool HasUse = false; + bool CanFold = rewriteInstructionForSpills(li, I->valno, TrySplit, + index, end, MI, ReMatOrigDefMI, ReMatDefMI, + Slot, LdSlot, isLoad, isLoadSS, DefIsReMat, + CanDelete, vrm, rc, ReMatIds, loopInfo, NewVReg, + ImpUse, HasDef, HasUse, MBBVRegsMap, NewLIs); + if (!HasDef && !HasUse) + continue; + + AllCanFold &= CanFold; + + // Update weight of spill interval. + LiveInterval &nI = getOrCreateInterval(NewVReg); + if (!TrySplit) { + // The spill weight is now infinity as it cannot be spilled again. + nI.weight = HUGE_VALF; + continue; + } + + // Keep track of the last def and first use in each MBB. + if (HasDef) { + if (MI != ReMatOrigDefMI || !CanDelete) { + bool HasKill = false; + if (!HasUse) + HasKill = anyKillInMBBAfterIdx(li, I->valno, MBB, getDefIndex(index)); + else { + // If this is a two-address code, then this index starts a new VNInfo. + const VNInfo *VNI = li.findDefinedVNInfo(getDefIndex(index)); + if (VNI) + HasKill = anyKillInMBBAfterIdx(li, VNI, MBB, getDefIndex(index)); + } + DenseMap >::iterator SII = + SpillIdxes.find(MBBId); + if (!HasKill) { + if (SII == SpillIdxes.end()) { + std::vector S; + S.push_back(SRInfo(index, NewVReg, true)); + SpillIdxes.insert(std::make_pair(MBBId, S)); + } else if (SII->second.back().vreg != NewVReg) { + SII->second.push_back(SRInfo(index, NewVReg, true)); + } else if ((int)index > SII->second.back().index) { + // If there is an earlier def and this is a two-address + // instruction, then it's not possible to fold the store (which + // would also fold the load). + SRInfo &Info = SII->second.back(); + Info.index = index; + Info.canFold = !HasUse; + } + SpillMBBs.set(MBBId); + } else if (SII != SpillIdxes.end() && + SII->second.back().vreg == NewVReg && + (int)index > SII->second.back().index) { + // There is an earlier def that's not killed (must be two-address). + // The spill is no longer needed. + SII->second.pop_back(); + if (SII->second.empty()) { + SpillIdxes.erase(MBBId); + SpillMBBs.reset(MBBId); + } + } + } + } + + if (HasUse) { + DenseMap >::iterator SII = + SpillIdxes.find(MBBId); + if (SII != SpillIdxes.end() && + SII->second.back().vreg == NewVReg && + (int)index > SII->second.back().index) + // Use(s) following the last def, it's not safe to fold the spill. + SII->second.back().canFold = false; + DenseMap >::iterator RII = + RestoreIdxes.find(MBBId); + if (RII != RestoreIdxes.end() && RII->second.back().vreg == NewVReg) + // If we are splitting live intervals, only fold if it's the first + // use and there isn't another use later in the MBB. + RII->second.back().canFold = false; + else if (IsNew) { + // Only need a reload if there isn't an earlier def / use. + if (RII == RestoreIdxes.end()) { + std::vector Infos; + Infos.push_back(SRInfo(index, NewVReg, true)); + RestoreIdxes.insert(std::make_pair(MBBId, Infos)); + } else { + RII->second.push_back(SRInfo(index, NewVReg, true)); + } + RestoreMBBs.set(MBBId); + } + } + + // Update spill weight. + unsigned loopDepth = loopInfo->getLoopDepth(MBB); + nI.weight += getSpillWeight(HasDef, HasUse, loopDepth); + } + + if (NewVReg && TrySplit && AllCanFold) { + // If all of its def / use can be folded, give it a low spill weight. + LiveInterval &nI = getOrCreateInterval(NewVReg); + nI.weight /= 10.0F; + } +} + +bool LiveIntervals::alsoFoldARestore(int Id, int index, unsigned vr, + BitVector &RestoreMBBs, + DenseMap > &RestoreIdxes) { + if (!RestoreMBBs[Id]) + return false; + std::vector &Restores = RestoreIdxes[Id]; + for (unsigned i = 0, e = Restores.size(); i != e; ++i) + if (Restores[i].index == index && + Restores[i].vreg == vr && + Restores[i].canFold) + return true; + return false; +} + +void LiveIntervals::eraseRestoreInfo(int Id, int index, unsigned vr, + BitVector &RestoreMBBs, + DenseMap > &RestoreIdxes) { + if (!RestoreMBBs[Id]) + return; + std::vector &Restores = RestoreIdxes[Id]; + for (unsigned i = 0, e = Restores.size(); i != e; ++i) + if (Restores[i].index == index && Restores[i].vreg) + Restores[i].index = -1; +} + +/// handleSpilledImpDefs - Remove IMPLICIT_DEF instructions which are being +/// spilled and create empty intervals for their uses. +void +LiveIntervals::handleSpilledImpDefs(const LiveInterval &li, VirtRegMap &vrm, + const TargetRegisterClass* rc, + std::vector &NewLIs) { + for (MachineRegisterInfo::reg_iterator ri = mri_->reg_begin(li.reg), + re = mri_->reg_end(); ri != re; ) { + MachineOperand &O = ri.getOperand(); + MachineInstr *MI = &*ri; + ++ri; + if (O.isDef()) { + assert(MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF && + "Register def was not rewritten?"); + RemoveMachineInstrFromMaps(MI); + vrm.RemoveMachineInstrFromMaps(MI); + MI->eraseFromParent(); + } else { + // This must be an use of an implicit_def so it's not part of the live + // interval. Create a new empty live interval for it. + // FIXME: Can we simply erase some of the instructions? e.g. Stores? + unsigned NewVReg = mri_->createVirtualRegister(rc); + vrm.grow(); + vrm.setIsImplicitlyDefined(NewVReg); + NewLIs.push_back(&getOrCreateInterval(NewVReg)); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.getReg() == li.reg) + MO.setReg(NewVReg); + } + } + } +} + +std::vector LiveIntervals:: +addIntervalsForSpillsFast(const LiveInterval &li, + const MachineLoopInfo *loopInfo, + VirtRegMap &vrm) { + unsigned slot = vrm.assignVirt2StackSlot(li.reg); + + std::vector added; + + assert(li.weight != HUGE_VALF && + "attempt to spill already spilled interval!"); + + DOUT << "\t\t\t\tadding intervals for spills for interval: "; + DEBUG(li.dump()); + DOUT << '\n'; + + const TargetRegisterClass* rc = mri_->getRegClass(li.reg); + + MachineRegisterInfo::reg_iterator RI = mri_->reg_begin(li.reg); + while (RI != mri_->reg_end()) { + MachineInstr* MI = &*RI; + + SmallVector Indices; + bool HasUse = false; + bool HasDef = false; + + for (unsigned i = 0; i != MI->getNumOperands(); ++i) { + MachineOperand& mop = MI->getOperand(i); + if (!mop.isReg() || mop.getReg() != li.reg) continue; + + HasUse |= MI->getOperand(i).isUse(); + HasDef |= MI->getOperand(i).isDef(); + + Indices.push_back(i); + } + + if (!tryFoldMemoryOperand(MI, vrm, NULL, getInstructionIndex(MI), + Indices, true, slot, li.reg)) { + unsigned NewVReg = mri_->createVirtualRegister(rc); + vrm.grow(); + vrm.assignVirt2StackSlot(NewVReg, slot); + + // create a new register for this spill + LiveInterval &nI = getOrCreateInterval(NewVReg); + + // the spill weight is now infinity as it + // cannot be spilled again + nI.weight = HUGE_VALF; + + // Rewrite register operands to use the new vreg. + for (SmallVectorImpl::iterator I = Indices.begin(), + E = Indices.end(); I != E; ++I) { + MI->getOperand(*I).setReg(NewVReg); + + if (MI->getOperand(*I).isUse()) + MI->getOperand(*I).setIsKill(true); + } + + // Fill in the new live interval. + unsigned index = getInstructionIndex(MI); + if (HasUse) { + LiveRange LR(getLoadIndex(index), getUseIndex(index), + nI.getNextValue(~0U, 0, getVNInfoAllocator())); + DOUT << " +" << LR; + nI.addRange(LR); + vrm.addRestorePoint(NewVReg, MI); + } + if (HasDef) { + LiveRange LR(getDefIndex(index), getStoreIndex(index), + nI.getNextValue(~0U, 0, getVNInfoAllocator())); + DOUT << " +" << LR; + nI.addRange(LR); + vrm.addSpillPoint(NewVReg, true, MI); + } + + added.push_back(&nI); + + DOUT << "\t\t\t\tadded new interval: "; + DEBUG(nI.dump()); + DOUT << '\n'; + } + + + RI = mri_->reg_begin(li.reg); + } + + return added; +} + +std::vector LiveIntervals:: +addIntervalsForSpills(const LiveInterval &li, + SmallVectorImpl &SpillIs, + const MachineLoopInfo *loopInfo, VirtRegMap &vrm) { + + if (EnableFastSpilling) + return addIntervalsForSpillsFast(li, loopInfo, vrm); + + assert(li.weight != HUGE_VALF && + "attempt to spill already spilled interval!"); + + DOUT << "\t\t\t\tadding intervals for spills for interval: "; + li.print(DOUT, tri_); + DOUT << '\n'; + + // Each bit specify whether a spill is required in the MBB. + BitVector SpillMBBs(mf_->getNumBlockIDs()); + DenseMap > SpillIdxes; + BitVector RestoreMBBs(mf_->getNumBlockIDs()); + DenseMap > RestoreIdxes; + DenseMap MBBVRegsMap; + std::vector NewLIs; + const TargetRegisterClass* rc = mri_->getRegClass(li.reg); + + unsigned NumValNums = li.getNumValNums(); + SmallVector ReMatDefs; + ReMatDefs.resize(NumValNums, NULL); + SmallVector ReMatOrigDefs; + ReMatOrigDefs.resize(NumValNums, NULL); + SmallVector ReMatIds; + ReMatIds.resize(NumValNums, VirtRegMap::MAX_STACK_SLOT); + BitVector ReMatDelete(NumValNums); + unsigned Slot = VirtRegMap::MAX_STACK_SLOT; + + // Spilling a split live interval. It cannot be split any further. Also, + // it's also guaranteed to be a single val# / range interval. + if (vrm.getPreSplitReg(li.reg)) { + vrm.setIsSplitFromReg(li.reg, 0); + // Unset the split kill marker on the last use. + unsigned KillIdx = vrm.getKillPoint(li.reg); + if (KillIdx) { + MachineInstr *KillMI = getInstructionFromIndex(KillIdx); + assert(KillMI && "Last use disappeared?"); + int KillOp = KillMI->findRegisterUseOperandIdx(li.reg, true); + assert(KillOp != -1 && "Last use disappeared?"); + KillMI->getOperand(KillOp).setIsKill(false); + } + vrm.removeKillPoint(li.reg); + bool DefIsReMat = vrm.isReMaterialized(li.reg); + Slot = vrm.getStackSlot(li.reg); + assert(Slot != VirtRegMap::MAX_STACK_SLOT); + MachineInstr *ReMatDefMI = DefIsReMat ? + vrm.getReMaterializedMI(li.reg) : NULL; + int LdSlot = 0; + bool isLoadSS = DefIsReMat && tii_->isLoadFromStackSlot(ReMatDefMI, LdSlot); + bool isLoad = isLoadSS || + (DefIsReMat && (ReMatDefMI->getDesc().canFoldAsLoad())); + bool IsFirstRange = true; + for (LiveInterval::Ranges::const_iterator + I = li.ranges.begin(), E = li.ranges.end(); I != E; ++I) { + // If this is a split live interval with multiple ranges, it means there + // are two-address instructions that re-defined the value. Only the + // first def can be rematerialized! + if (IsFirstRange) { + // Note ReMatOrigDefMI has already been deleted. + rewriteInstructionsForSpills(li, false, I, NULL, ReMatDefMI, + Slot, LdSlot, isLoad, isLoadSS, DefIsReMat, + false, vrm, rc, ReMatIds, loopInfo, + SpillMBBs, SpillIdxes, RestoreMBBs, RestoreIdxes, + MBBVRegsMap, NewLIs); + } else { + rewriteInstructionsForSpills(li, false, I, NULL, 0, + Slot, 0, false, false, false, + false, vrm, rc, ReMatIds, loopInfo, + SpillMBBs, SpillIdxes, RestoreMBBs, RestoreIdxes, + MBBVRegsMap, NewLIs); + } + IsFirstRange = false; + } + + handleSpilledImpDefs(li, vrm, rc, NewLIs); + return NewLIs; + } + + bool TrySplit = SplitAtBB && !intervalIsInOneMBB(li); + if (SplitLimit != -1 && (int)numSplits >= SplitLimit) + TrySplit = false; + if (TrySplit) + ++numSplits; + bool NeedStackSlot = false; + for (LiveInterval::const_vni_iterator i = li.vni_begin(), e = li.vni_end(); + i != e; ++i) { + const VNInfo *VNI = *i; + unsigned VN = VNI->id; + unsigned DefIdx = VNI->def; + if (DefIdx == ~1U) + continue; // Dead val#. + // Is the def for the val# rematerializable? + MachineInstr *ReMatDefMI = (DefIdx == ~0u) + ? 0 : getInstructionFromIndex(DefIdx); + bool dummy; + if (ReMatDefMI && isReMaterializable(li, VNI, ReMatDefMI, SpillIs, dummy)) { + // Remember how to remat the def of this val#. + ReMatOrigDefs[VN] = ReMatDefMI; + // Original def may be modified so we have to make a copy here. + MachineInstr *Clone = mf_->CloneMachineInstr(ReMatDefMI); + ClonedMIs.push_back(Clone); + ReMatDefs[VN] = Clone; + + bool CanDelete = true; + if (VNI->hasPHIKill) { + // A kill is a phi node, not all of its uses can be rematerialized. + // It must not be deleted. + CanDelete = false; + // Need a stack slot if there is any live range where uses cannot be + // rematerialized. + NeedStackSlot = true; + } + if (CanDelete) + ReMatDelete.set(VN); + } else { + // Need a stack slot if there is any live range where uses cannot be + // rematerialized. + NeedStackSlot = true; + } + } + + // One stack slot per live interval. + if (NeedStackSlot && vrm.getPreSplitReg(li.reg) == 0) { + if (vrm.getStackSlot(li.reg) == VirtRegMap::NO_STACK_SLOT) + Slot = vrm.assignVirt2StackSlot(li.reg); + + // This case only occurs when the prealloc splitter has already assigned + // a stack slot to this vreg. + else + Slot = vrm.getStackSlot(li.reg); + } + + // Create new intervals and rewrite defs and uses. + for (LiveInterval::Ranges::const_iterator + I = li.ranges.begin(), E = li.ranges.end(); I != E; ++I) { + MachineInstr *ReMatDefMI = ReMatDefs[I->valno->id]; + MachineInstr *ReMatOrigDefMI = ReMatOrigDefs[I->valno->id]; + bool DefIsReMat = ReMatDefMI != NULL; + bool CanDelete = ReMatDelete[I->valno->id]; + int LdSlot = 0; + bool isLoadSS = DefIsReMat && tii_->isLoadFromStackSlot(ReMatDefMI, LdSlot); + bool isLoad = isLoadSS || + (DefIsReMat && ReMatDefMI->getDesc().canFoldAsLoad()); + rewriteInstructionsForSpills(li, TrySplit, I, ReMatOrigDefMI, ReMatDefMI, + Slot, LdSlot, isLoad, isLoadSS, DefIsReMat, + CanDelete, vrm, rc, ReMatIds, loopInfo, + SpillMBBs, SpillIdxes, RestoreMBBs, RestoreIdxes, + MBBVRegsMap, NewLIs); + } + + // Insert spills / restores if we are splitting. + if (!TrySplit) { + handleSpilledImpDefs(li, vrm, rc, NewLIs); + return NewLIs; + } + + SmallPtrSet AddedKill; + SmallVector Ops; + if (NeedStackSlot) { + int Id = SpillMBBs.find_first(); + while (Id != -1) { + std::vector &spills = SpillIdxes[Id]; + for (unsigned i = 0, e = spills.size(); i != e; ++i) { + int index = spills[i].index; + unsigned VReg = spills[i].vreg; + LiveInterval &nI = getOrCreateInterval(VReg); + bool isReMat = vrm.isReMaterialized(VReg); + MachineInstr *MI = getInstructionFromIndex(index); + bool CanFold = false; + bool FoundUse = false; + Ops.clear(); + if (spills[i].canFold) { + CanFold = true; + for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) { + MachineOperand &MO = MI->getOperand(j); + if (!MO.isReg() || MO.getReg() != VReg) + continue; + + Ops.push_back(j); + if (MO.isDef()) + continue; + if (isReMat || + (!FoundUse && !alsoFoldARestore(Id, index, VReg, + RestoreMBBs, RestoreIdxes))) { + // MI has two-address uses of the same register. If the use + // isn't the first and only use in the BB, then we can't fold + // it. FIXME: Move this to rewriteInstructionsForSpills. + CanFold = false; + break; + } + FoundUse = true; + } + } + // Fold the store into the def if possible. + bool Folded = false; + if (CanFold && !Ops.empty()) { + if (tryFoldMemoryOperand(MI, vrm, NULL, index, Ops, true, Slot,VReg)){ + Folded = true; + if (FoundUse) { + // Also folded uses, do not issue a load. + eraseRestoreInfo(Id, index, VReg, RestoreMBBs, RestoreIdxes); + nI.removeRange(getLoadIndex(index), getUseIndex(index)+1); + } + nI.removeRange(getDefIndex(index), getStoreIndex(index)); + } + } + + // Otherwise tell the spiller to issue a spill. + if (!Folded) { + LiveRange *LR = &nI.ranges[nI.ranges.size()-1]; + bool isKill = LR->end == getStoreIndex(index); + if (!MI->registerDefIsDead(nI.reg)) + // No need to spill a dead def. + vrm.addSpillPoint(VReg, isKill, MI); + if (isKill) + AddedKill.insert(&nI); + } + } + Id = SpillMBBs.find_next(Id); + } + } + + int Id = RestoreMBBs.find_first(); + while (Id != -1) { + std::vector &restores = RestoreIdxes[Id]; + for (unsigned i = 0, e = restores.size(); i != e; ++i) { + int index = restores[i].index; + if (index == -1) + continue; + unsigned VReg = restores[i].vreg; + LiveInterval &nI = getOrCreateInterval(VReg); + bool isReMat = vrm.isReMaterialized(VReg); + MachineInstr *MI = getInstructionFromIndex(index); + bool CanFold = false; + Ops.clear(); + if (restores[i].canFold) { + CanFold = true; + for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) { + MachineOperand &MO = MI->getOperand(j); + if (!MO.isReg() || MO.getReg() != VReg) + continue; + + if (MO.isDef()) { + // If this restore were to be folded, it would have been folded + // already. + CanFold = false; + break; + } + Ops.push_back(j); + } + } + + // Fold the load into the use if possible. + bool Folded = false; + if (CanFold && !Ops.empty()) { + if (!isReMat) + Folded = tryFoldMemoryOperand(MI, vrm, NULL,index,Ops,true,Slot,VReg); + else { + MachineInstr *ReMatDefMI = vrm.getReMaterializedMI(VReg); + int LdSlot = 0; + bool isLoadSS = tii_->isLoadFromStackSlot(ReMatDefMI, LdSlot); + // If the rematerializable def is a load, also try to fold it. + if (isLoadSS || ReMatDefMI->getDesc().canFoldAsLoad()) + Folded = tryFoldMemoryOperand(MI, vrm, ReMatDefMI, index, + Ops, isLoadSS, LdSlot, VReg); + if (!Folded) { + unsigned ImpUse = getReMatImplicitUse(li, ReMatDefMI); + if (ImpUse) { + // Re-matting an instruction with virtual register use. Add the + // register as an implicit use on the use MI and update the register + // interval's spill weight to HUGE_VALF to prevent it from being + // spilled. + LiveInterval &ImpLi = getInterval(ImpUse); + ImpLi.weight = HUGE_VALF; + MI->addOperand(MachineOperand::CreateReg(ImpUse, false, true)); + } + } + } + } + // If folding is not possible / failed, then tell the spiller to issue a + // load / rematerialization for us. + if (Folded) + nI.removeRange(getLoadIndex(index), getUseIndex(index)+1); + else + vrm.addRestorePoint(VReg, MI); + } + Id = RestoreMBBs.find_next(Id); + } + + // Finalize intervals: add kills, finalize spill weights, and filter out + // dead intervals. + std::vector RetNewLIs; + for (unsigned i = 0, e = NewLIs.size(); i != e; ++i) { + LiveInterval *LI = NewLIs[i]; + if (!LI->empty()) { + LI->weight /= InstrSlots::NUM * getApproximateInstructionCount(*LI); + if (!AddedKill.count(LI)) { + LiveRange *LR = &LI->ranges[LI->ranges.size()-1]; + unsigned LastUseIdx = getBaseIndex(LR->end); + MachineInstr *LastUse = getInstructionFromIndex(LastUseIdx); + int UseIdx = LastUse->findRegisterUseOperandIdx(LI->reg, false); + assert(UseIdx != -1); + if (!LastUse->isRegTiedToDefOperand(UseIdx)) { + LastUse->getOperand(UseIdx).setIsKill(); + vrm.addKillPoint(LI->reg, LastUseIdx); + } + } + RetNewLIs.push_back(LI); + } + } + + handleSpilledImpDefs(li, vrm, rc, RetNewLIs); + return RetNewLIs; +} + +/// hasAllocatableSuperReg - Return true if the specified physical register has +/// any super register that's allocatable. +bool LiveIntervals::hasAllocatableSuperReg(unsigned Reg) const { + for (const unsigned* AS = tri_->getSuperRegisters(Reg); *AS; ++AS) + if (allocatableRegs_[*AS] && hasInterval(*AS)) + return true; + return false; +} + +/// getRepresentativeReg - Find the largest super register of the specified +/// physical register. +unsigned LiveIntervals::getRepresentativeReg(unsigned Reg) const { + // Find the largest super-register that is allocatable. + unsigned BestReg = Reg; + for (const unsigned* AS = tri_->getSuperRegisters(Reg); *AS; ++AS) { + unsigned SuperReg = *AS; + if (!hasAllocatableSuperReg(SuperReg) && hasInterval(SuperReg)) { + BestReg = SuperReg; + break; + } + } + return BestReg; +} + +/// getNumConflictsWithPhysReg - Return the number of uses and defs of the +/// specified interval that conflicts with the specified physical register. +unsigned LiveIntervals::getNumConflictsWithPhysReg(const LiveInterval &li, + unsigned PhysReg) const { + unsigned NumConflicts = 0; + const LiveInterval &pli = getInterval(getRepresentativeReg(PhysReg)); + for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(li.reg), + E = mri_->reg_end(); I != E; ++I) { + MachineOperand &O = I.getOperand(); + MachineInstr *MI = O.getParent(); + unsigned Index = getInstructionIndex(MI); + if (pli.liveAt(Index)) + ++NumConflicts; + } + return NumConflicts; +} + +/// spillPhysRegAroundRegDefsUses - Spill the specified physical register +/// around all defs and uses of the specified interval. Return true if it +/// was able to cut its interval. +bool LiveIntervals::spillPhysRegAroundRegDefsUses(const LiveInterval &li, + unsigned PhysReg, VirtRegMap &vrm) { + unsigned SpillReg = getRepresentativeReg(PhysReg); + + for (const unsigned *AS = tri_->getAliasSet(PhysReg); *AS; ++AS) + // If there are registers which alias PhysReg, but which are not a + // sub-register of the chosen representative super register. Assert + // since we can't handle it yet. + assert(*AS == SpillReg || !allocatableRegs_[*AS] || !hasInterval(*AS) || + tri_->isSuperRegister(*AS, SpillReg)); + + bool Cut = false; + LiveInterval &pli = getInterval(SpillReg); + SmallPtrSet SeenMIs; + for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(li.reg), + E = mri_->reg_end(); I != E; ++I) { + MachineOperand &O = I.getOperand(); + MachineInstr *MI = O.getParent(); + if (SeenMIs.count(MI)) + continue; + SeenMIs.insert(MI); + unsigned Index = getInstructionIndex(MI); + if (pli.liveAt(Index)) { + vrm.addEmergencySpill(SpillReg, MI); + unsigned StartIdx = getLoadIndex(Index); + unsigned EndIdx = getStoreIndex(Index)+1; + if (pli.isInOneLiveRange(StartIdx, EndIdx)) { + pli.removeRange(StartIdx, EndIdx); + Cut = true; + } else { + cerr << "Ran out of registers during register allocation!\n"; + if (MI->getOpcode() == TargetInstrInfo::INLINEASM) { + cerr << "Please check your inline asm statement for invalid " + << "constraints:\n"; + MI->print(cerr.stream(), tm_); + } + exit(1); + } + for (const unsigned* AS = tri_->getSubRegisters(SpillReg); *AS; ++AS) { + if (!hasInterval(*AS)) + continue; + LiveInterval &spli = getInterval(*AS); + if (spli.liveAt(Index)) + spli.removeRange(getLoadIndex(Index), getStoreIndex(Index)+1); + } + } + } + return Cut; +} + +LiveRange LiveIntervals::addLiveRangeToEndOfBlock(unsigned reg, + MachineInstr* startInst) { + LiveInterval& Interval = getOrCreateInterval(reg); + VNInfo* VN = Interval.getNextValue( + getInstructionIndex(startInst) + InstrSlots::DEF, + startInst, getVNInfoAllocator()); + VN->hasPHIKill = true; + VN->kills.push_back(getMBBEndIdx(startInst->getParent())); + LiveRange LR(getInstructionIndex(startInst) + InstrSlots::DEF, + getMBBEndIdx(startInst->getParent()) + 1, VN); + Interval.addRange(LR); + + return LR; +} diff --git a/lib/CodeGen/LiveStackAnalysis.cpp b/lib/CodeGen/LiveStackAnalysis.cpp new file mode 100644 index 000000000000..86f7ea20c9be --- /dev/null +++ b/lib/CodeGen/LiveStackAnalysis.cpp @@ -0,0 +1,66 @@ +//===-- LiveStackAnalysis.cpp - Live Stack Slot Analysis ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the live stack slot analysis pass. It is analogous to +// live interval analysis except it's analyzing liveness of stack slots rather +// than registers. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "livestacks" +#include "llvm/CodeGen/LiveStackAnalysis.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/Statistic.h" +#include +using namespace llvm; + +char LiveStacks::ID = 0; +static RegisterPass X("livestacks", "Live Stack Slot Analysis"); + +void LiveStacks::scaleNumbering(int factor) { + // Scale the intervals. + for (iterator LI = begin(), LE = end(); LI != LE; ++LI) { + LI->second.scaleNumbering(factor); + } +} + +void LiveStacks::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +void LiveStacks::releaseMemory() { + // Release VNInfo memroy regions after all VNInfo objects are dtor'd. + VNInfoAllocator.Reset(); + S2IMap.clear(); + S2RCMap.clear(); +} + +bool LiveStacks::runOnMachineFunction(MachineFunction &) { + // FIXME: No analysis is being done right now. We are relying on the + // register allocators to provide the information. + return false; +} + +/// print - Implement the dump method. +void LiveStacks::print(std::ostream &O, const Module*) const { + O << "********** INTERVALS **********\n"; + for (const_iterator I = begin(), E = end(); I != E; ++I) { + I->second.print(O); + int Slot = I->first; + const TargetRegisterClass *RC = getIntervalRegClass(Slot); + if (RC) + O << " [" << RC->getName() << "]\n"; + else + O << " [Unknown]\n"; + } +} diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp new file mode 100644 index 000000000000..c33d81e8a875 --- /dev/null +++ b/lib/CodeGen/LiveVariables.cpp @@ -0,0 +1,695 @@ +//===-- LiveVariables.cpp - Live Variable Analysis for Machine Code -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the LiveVariable analysis pass. For each machine +// instruction in the function, this pass calculates the set of registers that +// are immediately dead after the instruction (i.e., the instruction calculates +// the value, but it is never used) and the set of registers that are used by +// the instruction, but are never used after the instruction (i.e., they are +// killed). +// +// This class computes live variables using are sparse implementation based on +// the machine code SSA form. This class computes live variable information for +// each virtual and _register allocatable_ physical register in a function. It +// uses the dominance properties of SSA form to efficiently compute live +// variables for virtual registers, and assumes that physical registers are only +// live within a single basic block (allowing it to do a single local analysis +// to resolve physical register lifetimes in each basic block). If a physical +// register is not register allocatable, it is not tracked. This is useful for +// things like the stack pointer and condition codes. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Config/alloca.h" +#include +using namespace llvm; + +char LiveVariables::ID = 0; +static RegisterPass X("livevars", "Live Variable Analysis"); + + +void LiveVariables::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequiredID(UnreachableMachineBlockElimID); + AU.setPreservesAll(); +} + +void LiveVariables::VarInfo::dump() const { + cerr << " Alive in blocks: "; + for (SparseBitVector<>::iterator I = AliveBlocks.begin(), + E = AliveBlocks.end(); I != E; ++I) + cerr << *I << ", "; + cerr << "\n Killed by:"; + if (Kills.empty()) + cerr << " No instructions.\n"; + else { + for (unsigned i = 0, e = Kills.size(); i != e; ++i) + cerr << "\n #" << i << ": " << *Kills[i]; + cerr << "\n"; + } +} + +/// getVarInfo - Get (possibly creating) a VarInfo object for the given vreg. +LiveVariables::VarInfo &LiveVariables::getVarInfo(unsigned RegIdx) { + assert(TargetRegisterInfo::isVirtualRegister(RegIdx) && + "getVarInfo: not a virtual register!"); + RegIdx -= TargetRegisterInfo::FirstVirtualRegister; + if (RegIdx >= VirtRegInfo.size()) { + if (RegIdx >= 2*VirtRegInfo.size()) + VirtRegInfo.resize(RegIdx*2); + else + VirtRegInfo.resize(2*VirtRegInfo.size()); + } + return VirtRegInfo[RegIdx]; +} + +void LiveVariables::MarkVirtRegAliveInBlock(VarInfo& VRInfo, + MachineBasicBlock *DefBlock, + MachineBasicBlock *MBB, + std::vector &WorkList) { + unsigned BBNum = MBB->getNumber(); + + // Check to see if this basic block is one of the killing blocks. If so, + // remove it. + for (unsigned i = 0, e = VRInfo.Kills.size(); i != e; ++i) + if (VRInfo.Kills[i]->getParent() == MBB) { + VRInfo.Kills.erase(VRInfo.Kills.begin()+i); // Erase entry + break; + } + + if (MBB == DefBlock) return; // Terminate recursion + + if (VRInfo.AliveBlocks.test(BBNum)) + return; // We already know the block is live + + // Mark the variable known alive in this bb + VRInfo.AliveBlocks.set(BBNum); + + for (MachineBasicBlock::const_pred_reverse_iterator PI = MBB->pred_rbegin(), + E = MBB->pred_rend(); PI != E; ++PI) + WorkList.push_back(*PI); +} + +void LiveVariables::MarkVirtRegAliveInBlock(VarInfo &VRInfo, + MachineBasicBlock *DefBlock, + MachineBasicBlock *MBB) { + std::vector WorkList; + MarkVirtRegAliveInBlock(VRInfo, DefBlock, MBB, WorkList); + + while (!WorkList.empty()) { + MachineBasicBlock *Pred = WorkList.back(); + WorkList.pop_back(); + MarkVirtRegAliveInBlock(VRInfo, DefBlock, Pred, WorkList); + } +} + +void LiveVariables::HandleVirtRegUse(unsigned reg, MachineBasicBlock *MBB, + MachineInstr *MI) { + assert(MRI->getVRegDef(reg) && "Register use before def!"); + + unsigned BBNum = MBB->getNumber(); + + VarInfo& VRInfo = getVarInfo(reg); + VRInfo.NumUses++; + + // Check to see if this basic block is already a kill block. + if (!VRInfo.Kills.empty() && VRInfo.Kills.back()->getParent() == MBB) { + // Yes, this register is killed in this basic block already. Increase the + // live range by updating the kill instruction. + VRInfo.Kills.back() = MI; + return; + } + +#ifndef NDEBUG + for (unsigned i = 0, e = VRInfo.Kills.size(); i != e; ++i) + assert(VRInfo.Kills[i]->getParent() != MBB && "entry should be at end!"); +#endif + + // This situation can occur: + // + // ,------. + // | | + // | v + // | t2 = phi ... t1 ... + // | | + // | v + // | t1 = ... + // | ... = ... t1 ... + // | | + // `------' + // + // where there is a use in a PHI node that's a predecessor to the defining + // block. We don't want to mark all predecessors as having the value "alive" + // in this case. + if (MBB == MRI->getVRegDef(reg)->getParent()) return; + + // Add a new kill entry for this basic block. If this virtual register is + // already marked as alive in this basic block, that means it is alive in at + // least one of the successor blocks, it's not a kill. + if (!VRInfo.AliveBlocks.test(BBNum)) + VRInfo.Kills.push_back(MI); + + // Update all dominating blocks to mark them as "known live". + for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), + E = MBB->pred_end(); PI != E; ++PI) + MarkVirtRegAliveInBlock(VRInfo, MRI->getVRegDef(reg)->getParent(), *PI); +} + +void LiveVariables::HandleVirtRegDef(unsigned Reg, MachineInstr *MI) { + VarInfo &VRInfo = getVarInfo(Reg); + + if (VRInfo.AliveBlocks.empty()) + // If vr is not alive in any block, then defaults to dead. + VRInfo.Kills.push_back(MI); +} + +/// FindLastPartialDef - Return the last partial def of the specified register. +/// Also returns the sub-register that's defined. +MachineInstr *LiveVariables::FindLastPartialDef(unsigned Reg, + unsigned &PartDefReg) { + unsigned LastDefReg = 0; + unsigned LastDefDist = 0; + MachineInstr *LastDef = NULL; + for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + unsigned SubReg = *SubRegs; ++SubRegs) { + MachineInstr *Def = PhysRegDef[SubReg]; + if (!Def) + continue; + unsigned Dist = DistanceMap[Def]; + if (Dist > LastDefDist) { + LastDefReg = SubReg; + LastDef = Def; + LastDefDist = Dist; + } + } + PartDefReg = LastDefReg; + return LastDef; +} + +/// HandlePhysRegUse - Turn previous partial def's into read/mod/writes. Add +/// implicit defs to a machine instruction if there was an earlier def of its +/// super-register. +void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr *MI) { + // If there was a previous use or a "full" def all is well. + if (!PhysRegDef[Reg] && !PhysRegUse[Reg]) { + // Otherwise, the last sub-register def implicitly defines this register. + // e.g. + // AH = + // AL = ... , + // = AH + // ... + // = EAX + // All of the sub-registers must have been defined before the use of Reg! + unsigned PartDefReg = 0; + MachineInstr *LastPartialDef = FindLastPartialDef(Reg, PartDefReg); + // If LastPartialDef is NULL, it must be using a livein register. + if (LastPartialDef) { + LastPartialDef->addOperand(MachineOperand::CreateReg(Reg, true/*IsDef*/, + true/*IsImp*/)); + PhysRegDef[Reg] = LastPartialDef; + SmallSet Processed; + for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + unsigned SubReg = *SubRegs; ++SubRegs) { + if (Processed.count(SubReg)) + continue; + if (SubReg == PartDefReg || TRI->isSubRegister(PartDefReg, SubReg)) + continue; + // This part of Reg was defined before the last partial def. It's killed + // here. + LastPartialDef->addOperand(MachineOperand::CreateReg(SubReg, + false/*IsDef*/, + true/*IsImp*/)); + PhysRegDef[SubReg] = LastPartialDef; + for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS) + Processed.insert(*SS); + } + } + } + + // There was an earlier def of a super-register. Add implicit def to that MI. + // + // A: EAX = ... + // B: ... = AX + // + // Add implicit def to A if there isn't a use of AX (or EAX) before B. + if (!PhysRegUse[Reg]) { + MachineInstr *Def = PhysRegDef[Reg]; + if (Def && !Def->modifiesRegister(Reg)) + Def->addOperand(MachineOperand::CreateReg(Reg, + true /*IsDef*/, + true /*IsImp*/)); + } + + // Remember this use. + PhysRegUse[Reg] = MI; + for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + unsigned SubReg = *SubRegs; ++SubRegs) + PhysRegUse[SubReg] = MI; +} + +/// hasRegisterUseBelow - Return true if the specified register is used after +/// the current instruction and before it's next definition. +bool LiveVariables::hasRegisterUseBelow(unsigned Reg, + MachineBasicBlock::iterator I, + MachineBasicBlock *MBB) { + if (I == MBB->end()) + return false; + + // First find out if there are any uses / defs below. + bool hasDistInfo = true; + unsigned CurDist = DistanceMap[I]; + SmallVector Uses; + SmallVector Defs; + for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(Reg), + RE = MRI->reg_end(); RI != RE; ++RI) { + MachineOperand &UDO = RI.getOperand(); + MachineInstr *UDMI = &*RI; + if (UDMI->getParent() != MBB) + continue; + DenseMap::iterator DI = DistanceMap.find(UDMI); + bool isBelow = false; + if (DI == DistanceMap.end()) { + // Must be below if it hasn't been assigned a distance yet. + isBelow = true; + hasDistInfo = false; + } else if (DI->second > CurDist) + isBelow = true; + if (isBelow) { + if (UDO.isUse()) + Uses.push_back(UDMI); + if (UDO.isDef()) + Defs.push_back(UDMI); + } + } + + if (Uses.empty()) + // No uses below. + return false; + else if (!Uses.empty() && Defs.empty()) + // There are uses below but no defs below. + return true; + // There are both uses and defs below. We need to know which comes first. + if (!hasDistInfo) { + // Complete DistanceMap for this MBB. This information is computed only + // once per MBB. + ++I; + ++CurDist; + for (MachineBasicBlock::iterator E = MBB->end(); I != E; ++I, ++CurDist) + DistanceMap.insert(std::make_pair(I, CurDist)); + } + + unsigned EarliestUse = DistanceMap[Uses[0]]; + for (unsigned i = 1, e = Uses.size(); i != e; ++i) { + unsigned Dist = DistanceMap[Uses[i]]; + if (Dist < EarliestUse) + EarliestUse = Dist; + } + for (unsigned i = 0, e = Defs.size(); i != e; ++i) { + unsigned Dist = DistanceMap[Defs[i]]; + if (Dist < EarliestUse) + // The register is defined before its first use below. + return false; + } + return true; +} + +bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) { + if (!PhysRegUse[Reg] && !PhysRegDef[Reg]) + return false; + + MachineInstr *LastRefOrPartRef = PhysRegUse[Reg] + ? PhysRegUse[Reg] : PhysRegDef[Reg]; + unsigned LastRefOrPartRefDist = DistanceMap[LastRefOrPartRef]; + // The whole register is used. + // AL = + // AH = + // + // = AX + // = AL, AX + // AX = + // + // Or whole register is defined, but not used at all. + // AX = + // ... + // AX = + // + // Or whole register is defined, but only partly used. + // AX = AL + // = AL + // AX = + SmallSet PartUses; + for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + unsigned SubReg = *SubRegs; ++SubRegs) { + if (MachineInstr *Use = PhysRegUse[SubReg]) { + PartUses.insert(SubReg); + for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS) + PartUses.insert(*SS); + unsigned Dist = DistanceMap[Use]; + if (Dist > LastRefOrPartRefDist) { + LastRefOrPartRefDist = Dist; + LastRefOrPartRef = Use; + } + } + } + + if (LastRefOrPartRef == PhysRegDef[Reg] && LastRefOrPartRef != MI) + // If the last reference is the last def, then it's not used at all. + // That is, unless we are currently processing the last reference itself. + LastRefOrPartRef->addRegisterDead(Reg, TRI, true); + + /* Partial uses. Mark register def dead and add implicit def of + sub-registers which are used. + FIXME: LiveIntervalAnalysis can't handle this yet! + EAX = op AL + That is, EAX def is dead but AL def extends pass it. + Enable this after live interval analysis is fixed to improve codegen! + else if (!PhysRegUse[Reg]) { + PhysRegDef[Reg]->addRegisterDead(Reg, TRI, true); + for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + unsigned SubReg = *SubRegs; ++SubRegs) { + if (PartUses.count(SubReg)) { + PhysRegDef[Reg]->addOperand(MachineOperand::CreateReg(SubReg, + true, true)); + LastRefOrPartRef->addRegisterKilled(SubReg, TRI, true); + for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS) + PartUses.erase(*SS); + } + } + } */ + else + LastRefOrPartRef->addRegisterKilled(Reg, TRI, true); + return true; +} + +void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI) { + // What parts of the register are previously defined? + SmallSet Live; + if (PhysRegDef[Reg] || PhysRegUse[Reg]) { + Live.insert(Reg); + for (const unsigned *SS = TRI->getSubRegisters(Reg); *SS; ++SS) + Live.insert(*SS); + } else { + for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + unsigned SubReg = *SubRegs; ++SubRegs) { + // If a register isn't itself defined, but all parts that make up of it + // are defined, then consider it also defined. + // e.g. + // AL = + // AH = + // = AX + if (PhysRegDef[SubReg] || PhysRegUse[SubReg]) { + Live.insert(SubReg); + for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS) + Live.insert(*SS); + } + } + } + + // Start from the largest piece, find the last time any part of the register + // is referenced. + if (!HandlePhysRegKill(Reg, MI)) { + // Only some of the sub-registers are used. + for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + unsigned SubReg = *SubRegs; ++SubRegs) { + if (!Live.count(SubReg)) + // Skip if this sub-register isn't defined. + continue; + if (HandlePhysRegKill(SubReg, MI)) { + Live.erase(SubReg); + for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS) + Live.erase(*SS); + } + } + assert(Live.empty() && "Not all defined registers are killed / dead?"); + } + + if (MI) { + // Does this extend the live range of a super-register? + SmallSet Processed; + for (const unsigned *SuperRegs = TRI->getSuperRegisters(Reg); + unsigned SuperReg = *SuperRegs; ++SuperRegs) { + if (Processed.count(SuperReg)) + continue; + MachineInstr *LastRef = PhysRegUse[SuperReg] + ? PhysRegUse[SuperReg] : PhysRegDef[SuperReg]; + if (LastRef && LastRef != MI) { + // The larger register is previously defined. Now a smaller part is + // being re-defined. Treat it as read/mod/write if there are uses + // below. + // EAX = + // AX = EAX, EAX + // ... + /// = EAX + if (hasRegisterUseBelow(SuperReg, MI, MI->getParent())) { + MI->addOperand(MachineOperand::CreateReg(SuperReg, false/*IsDef*/, + true/*IsImp*/,true/*IsKill*/)); + MI->addOperand(MachineOperand::CreateReg(SuperReg, true/*IsDef*/, + true/*IsImp*/)); + PhysRegDef[SuperReg] = MI; + PhysRegUse[SuperReg] = NULL; + Processed.insert(SuperReg); + for (const unsigned *SS = TRI->getSubRegisters(SuperReg); *SS; ++SS) { + PhysRegDef[*SS] = MI; + PhysRegUse[*SS] = NULL; + Processed.insert(*SS); + } + } else { + // Otherwise, the super register is killed. + if (HandlePhysRegKill(SuperReg, MI)) { + PhysRegDef[SuperReg] = NULL; + PhysRegUse[SuperReg] = NULL; + for (const unsigned *SS = TRI->getSubRegisters(SuperReg); *SS; ++SS) { + PhysRegDef[*SS] = NULL; + PhysRegUse[*SS] = NULL; + Processed.insert(*SS); + } + } + } + } + } + + // Remember this def. + PhysRegDef[Reg] = MI; + PhysRegUse[Reg] = NULL; + for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + unsigned SubReg = *SubRegs; ++SubRegs) { + PhysRegDef[SubReg] = MI; + PhysRegUse[SubReg] = NULL; + } + } +} + +bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { + MF = &mf; + MRI = &mf.getRegInfo(); + TRI = MF->getTarget().getRegisterInfo(); + + ReservedRegisters = TRI->getReservedRegs(mf); + + unsigned NumRegs = TRI->getNumRegs(); + PhysRegDef = new MachineInstr*[NumRegs]; + PhysRegUse = new MachineInstr*[NumRegs]; + PHIVarInfo = new SmallVector[MF->getNumBlockIDs()]; + std::fill(PhysRegDef, PhysRegDef + NumRegs, (MachineInstr*)0); + std::fill(PhysRegUse, PhysRegUse + NumRegs, (MachineInstr*)0); + + /// Get some space for a respectable number of registers. + VirtRegInfo.resize(64); + + analyzePHINodes(mf); + + // Calculate live variable information in depth first order on the CFG of the + // function. This guarantees that we will see the definition of a virtual + // register before its uses due to dominance properties of SSA (except for PHI + // nodes, which are treated as a special case). + MachineBasicBlock *Entry = MF->begin(); + SmallPtrSet Visited; + + for (df_ext_iterator > + DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited); + DFI != E; ++DFI) { + MachineBasicBlock *MBB = *DFI; + + // Mark live-in registers as live-in. + for (MachineBasicBlock::const_livein_iterator II = MBB->livein_begin(), + EE = MBB->livein_end(); II != EE; ++II) { + assert(TargetRegisterInfo::isPhysicalRegister(*II) && + "Cannot have a live-in virtual register!"); + HandlePhysRegDef(*II, 0); + } + + // Loop over all of the instructions, processing them. + DistanceMap.clear(); + unsigned Dist = 0; + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); + I != E; ++I) { + MachineInstr *MI = I; + DistanceMap.insert(std::make_pair(MI, Dist++)); + + // Process all of the operands of the instruction... + unsigned NumOperandsToProcess = MI->getNumOperands(); + + // Unless it is a PHI node. In this case, ONLY process the DEF, not any + // of the uses. They will be handled in other basic blocks. + if (MI->getOpcode() == TargetInstrInfo::PHI) + NumOperandsToProcess = 1; + + SmallVector UseRegs; + SmallVector DefRegs; + for (unsigned i = 0; i != NumOperandsToProcess; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || MO.getReg() == 0) + continue; + unsigned MOReg = MO.getReg(); + if (MO.isUse()) + UseRegs.push_back(MOReg); + if (MO.isDef()) + DefRegs.push_back(MOReg); + } + + // Process all uses. + for (unsigned i = 0, e = UseRegs.size(); i != e; ++i) { + unsigned MOReg = UseRegs[i]; + if (TargetRegisterInfo::isVirtualRegister(MOReg)) + HandleVirtRegUse(MOReg, MBB, MI); + else if (!ReservedRegisters[MOReg]) + HandlePhysRegUse(MOReg, MI); + } + + // Process all defs. + for (unsigned i = 0, e = DefRegs.size(); i != e; ++i) { + unsigned MOReg = DefRegs[i]; + if (TargetRegisterInfo::isVirtualRegister(MOReg)) + HandleVirtRegDef(MOReg, MI); + else if (!ReservedRegisters[MOReg]) + HandlePhysRegDef(MOReg, MI); + } + } + + // Handle any virtual assignments from PHI nodes which might be at the + // bottom of this basic block. We check all of our successor blocks to see + // if they have PHI nodes, and if so, we simulate an assignment at the end + // of the current block. + if (!PHIVarInfo[MBB->getNumber()].empty()) { + SmallVector& VarInfoVec = PHIVarInfo[MBB->getNumber()]; + + for (SmallVector::iterator I = VarInfoVec.begin(), + E = VarInfoVec.end(); I != E; ++I) + // Mark it alive only in the block we are representing. + MarkVirtRegAliveInBlock(getVarInfo(*I),MRI->getVRegDef(*I)->getParent(), + MBB); + } + + // Finally, if the last instruction in the block is a return, make sure to + // mark it as using all of the live-out values in the function. + if (!MBB->empty() && MBB->back().getDesc().isReturn()) { + MachineInstr *Ret = &MBB->back(); + + for (MachineRegisterInfo::liveout_iterator + I = MF->getRegInfo().liveout_begin(), + E = MF->getRegInfo().liveout_end(); I != E; ++I) { + assert(TargetRegisterInfo::isPhysicalRegister(*I) && + "Cannot have a live-out virtual register!"); + HandlePhysRegUse(*I, Ret); + + // Add live-out registers as implicit uses. + if (!Ret->readsRegister(*I)) + Ret->addOperand(MachineOperand::CreateReg(*I, false, true)); + } + } + + // Loop over PhysRegDef / PhysRegUse, killing any registers that are + // available at the end of the basic block. + for (unsigned i = 0; i != NumRegs; ++i) + if (PhysRegDef[i] || PhysRegUse[i]) + HandlePhysRegDef(i, 0); + + std::fill(PhysRegDef, PhysRegDef + NumRegs, (MachineInstr*)0); + std::fill(PhysRegUse, PhysRegUse + NumRegs, (MachineInstr*)0); + } + + // Convert and transfer the dead / killed information we have gathered into + // VirtRegInfo onto MI's. + for (unsigned i = 0, e1 = VirtRegInfo.size(); i != e1; ++i) + for (unsigned j = 0, e2 = VirtRegInfo[i].Kills.size(); j != e2; ++j) + if (VirtRegInfo[i].Kills[j] == + MRI->getVRegDef(i + TargetRegisterInfo::FirstVirtualRegister)) + VirtRegInfo[i] + .Kills[j]->addRegisterDead(i + + TargetRegisterInfo::FirstVirtualRegister, + TRI); + else + VirtRegInfo[i] + .Kills[j]->addRegisterKilled(i + + TargetRegisterInfo::FirstVirtualRegister, + TRI); + + // Check to make sure there are no unreachable blocks in the MC CFG for the + // function. If so, it is due to a bug in the instruction selector or some + // other part of the code generator if this happens. +#ifndef NDEBUG + for(MachineFunction::iterator i = MF->begin(), e = MF->end(); i != e; ++i) + assert(Visited.count(&*i) != 0 && "unreachable basic block found"); +#endif + + delete[] PhysRegDef; + delete[] PhysRegUse; + delete[] PHIVarInfo; + + return false; +} + +/// replaceKillInstruction - Update register kill info by replacing a kill +/// instruction with a new one. +void LiveVariables::replaceKillInstruction(unsigned Reg, MachineInstr *OldMI, + MachineInstr *NewMI) { + VarInfo &VI = getVarInfo(Reg); + std::replace(VI.Kills.begin(), VI.Kills.end(), OldMI, NewMI); +} + +/// removeVirtualRegistersKilled - Remove all killed info for the specified +/// instruction. +void LiveVariables::removeVirtualRegistersKilled(MachineInstr *MI) { + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.isKill()) { + MO.setIsKill(false); + unsigned Reg = MO.getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + bool removed = getVarInfo(Reg).removeKill(MI); + assert(removed && "kill not in register's VarInfo?"); + removed = true; + } + } + } +} + +/// analyzePHINodes - Gather information about the PHI nodes in here. In +/// particular, we want to map the variable information of a virtual register +/// which is used in a PHI node. We map that to the BB the vreg is coming from. +/// +void LiveVariables::analyzePHINodes(const MachineFunction& Fn) { + for (MachineFunction::const_iterator I = Fn.begin(), E = Fn.end(); + I != E; ++I) + for (MachineBasicBlock::const_iterator BBI = I->begin(), BBE = I->end(); + BBI != BBE && BBI->getOpcode() == TargetInstrInfo::PHI; ++BBI) + for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) + PHIVarInfo[BBI->getOperand(i + 1).getMBB()->getNumber()] + .push_back(BBI->getOperand(i).getReg()); +} diff --git a/lib/CodeGen/LowerSubregs.cpp b/lib/CodeGen/LowerSubregs.cpp new file mode 100644 index 000000000000..14acb71eeb40 --- /dev/null +++ b/lib/CodeGen/LowerSubregs.cpp @@ -0,0 +1,292 @@ +//===-- LowerSubregs.cpp - Subregister Lowering instruction pass ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines a MachineFunction pass which runs after register +// allocation that turns subreg insert/extract instructions into register +// copies, as needed. This ensures correct codegen even if the coalescer +// isn't able to remove all subreg instructions. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "lowersubregs" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Function.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Compiler.h" +using namespace llvm; + +namespace { + struct VISIBILITY_HIDDEN LowerSubregsInstructionPass + : public MachineFunctionPass { + static char ID; // Pass identification, replacement for typeid + LowerSubregsInstructionPass() : MachineFunctionPass(&ID) {} + + const char *getPassName() const { + return "Subregister lowering instruction pass"; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addPreservedID(MachineLoopInfoID); + AU.addPreservedID(MachineDominatorsID); + MachineFunctionPass::getAnalysisUsage(AU); + } + + /// runOnMachineFunction - pass entry point + bool runOnMachineFunction(MachineFunction&); + + bool LowerExtract(MachineInstr *MI); + bool LowerInsert(MachineInstr *MI); + bool LowerSubregToReg(MachineInstr *MI); + + void TransferDeadFlag(MachineInstr *MI, unsigned DstReg, + const TargetRegisterInfo &TRI); + void TransferKillFlag(MachineInstr *MI, unsigned SrcReg, + const TargetRegisterInfo &TRI); + }; + + char LowerSubregsInstructionPass::ID = 0; +} + +FunctionPass *llvm::createLowerSubregsPass() { + return new LowerSubregsInstructionPass(); +} + +/// TransferDeadFlag - MI is a pseudo-instruction with DstReg dead, +/// and the lowered replacement instructions immediately precede it. +/// Mark the replacement instructions with the dead flag. +void +LowerSubregsInstructionPass::TransferDeadFlag(MachineInstr *MI, + unsigned DstReg, + const TargetRegisterInfo &TRI) { + for (MachineBasicBlock::iterator MII = + prior(MachineBasicBlock::iterator(MI)); ; --MII) { + if (MII->addRegisterDead(DstReg, &TRI)) + break; + assert(MII != MI->getParent()->begin() && + "copyRegToReg output doesn't reference destination register!"); + } +} + +/// TransferKillFlag - MI is a pseudo-instruction with SrcReg killed, +/// and the lowered replacement instructions immediately precede it. +/// Mark the replacement instructions with the kill flag. +void +LowerSubregsInstructionPass::TransferKillFlag(MachineInstr *MI, + unsigned SrcReg, + const TargetRegisterInfo &TRI) { + for (MachineBasicBlock::iterator MII = + prior(MachineBasicBlock::iterator(MI)); ; --MII) { + if (MII->addRegisterKilled(SrcReg, &TRI)) + break; + assert(MII != MI->getParent()->begin() && + "copyRegToReg output doesn't reference source register!"); + } +} + +bool LowerSubregsInstructionPass::LowerExtract(MachineInstr *MI) { + MachineBasicBlock *MBB = MI->getParent(); + MachineFunction &MF = *MBB->getParent(); + const TargetRegisterInfo &TRI = *MF.getTarget().getRegisterInfo(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + + assert(MI->getOperand(0).isReg() && MI->getOperand(0).isDef() && + MI->getOperand(1).isReg() && MI->getOperand(1).isUse() && + MI->getOperand(2).isImm() && "Malformed extract_subreg"); + + unsigned DstReg = MI->getOperand(0).getReg(); + unsigned SuperReg = MI->getOperand(1).getReg(); + unsigned SubIdx = MI->getOperand(2).getImm(); + unsigned SrcReg = TRI.getSubReg(SuperReg, SubIdx); + + assert(TargetRegisterInfo::isPhysicalRegister(SuperReg) && + "Extract supperg source must be a physical register"); + assert(TargetRegisterInfo::isPhysicalRegister(DstReg) && + "Extract destination must be in a physical register"); + + DOUT << "subreg: CONVERTING: " << *MI; + + if (SrcReg == DstReg) { + // No need to insert an identify copy instruction. + DOUT << "subreg: eliminated!"; + // Find the kill of the destination register's live range, and insert + // a kill of the source register at that point. + if (MI->getOperand(1).isKill() && !MI->getOperand(0).isDead()) + for (MachineBasicBlock::iterator MII = + next(MachineBasicBlock::iterator(MI)); + MII != MBB->end(); ++MII) + if (MII->killsRegister(DstReg, &TRI)) { + MII->addRegisterKilled(SuperReg, &TRI, /*AddIfNotFound=*/true); + break; + } + } else { + // Insert copy + const TargetRegisterClass *TRC = TRI.getPhysicalRegisterRegClass(DstReg); + assert(TRC == TRI.getPhysicalRegisterRegClass(SrcReg) && + "Extract subreg and Dst must be of same register class"); + TII.copyRegToReg(*MBB, MI, DstReg, SrcReg, TRC, TRC); + // Transfer the kill/dead flags, if needed. + if (MI->getOperand(0).isDead()) + TransferDeadFlag(MI, DstReg, TRI); + if (MI->getOperand(1).isKill()) + TransferKillFlag(MI, SrcReg, TRI); + +#ifndef NDEBUG + MachineBasicBlock::iterator dMI = MI; + DOUT << "subreg: " << *(--dMI); +#endif + } + + DOUT << "\n"; + MBB->erase(MI); + return true; +} + +bool LowerSubregsInstructionPass::LowerSubregToReg(MachineInstr *MI) { + MachineBasicBlock *MBB = MI->getParent(); + MachineFunction &MF = *MBB->getParent(); + const TargetRegisterInfo &TRI = *MF.getTarget().getRegisterInfo(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + assert((MI->getOperand(0).isReg() && MI->getOperand(0).isDef()) && + MI->getOperand(1).isImm() && + (MI->getOperand(2).isReg() && MI->getOperand(2).isUse()) && + MI->getOperand(3).isImm() && "Invalid subreg_to_reg"); + + unsigned DstReg = MI->getOperand(0).getReg(); + unsigned InsReg = MI->getOperand(2).getReg(); + unsigned InsSIdx = MI->getOperand(2).getSubReg(); + unsigned SubIdx = MI->getOperand(3).getImm(); + + assert(SubIdx != 0 && "Invalid index for insert_subreg"); + unsigned DstSubReg = TRI.getSubReg(DstReg, SubIdx); + + assert(TargetRegisterInfo::isPhysicalRegister(DstReg) && + "Insert destination must be in a physical register"); + assert(TargetRegisterInfo::isPhysicalRegister(InsReg) && + "Inserted value must be in a physical register"); + + DOUT << "subreg: CONVERTING: " << *MI; + + if (DstSubReg == InsReg && InsSIdx == 0) { + // No need to insert an identify copy instruction. + // Watch out for case like this: + // %RAX = ... + // %RAX = SUBREG_TO_REG 0, %EAX:3, 3 + // The first def is defining RAX, not EAX so the top bits were not + // zero extended. + DOUT << "subreg: eliminated!"; + } else { + // Insert sub-register copy + const TargetRegisterClass *TRC0= TRI.getPhysicalRegisterRegClass(DstSubReg); + const TargetRegisterClass *TRC1= TRI.getPhysicalRegisterRegClass(InsReg); + TII.copyRegToReg(*MBB, MI, DstSubReg, InsReg, TRC0, TRC1); + // Transfer the kill/dead flags, if needed. + if (MI->getOperand(0).isDead()) + TransferDeadFlag(MI, DstSubReg, TRI); + if (MI->getOperand(2).isKill()) + TransferKillFlag(MI, InsReg, TRI); + +#ifndef NDEBUG + MachineBasicBlock::iterator dMI = MI; + DOUT << "subreg: " << *(--dMI); +#endif + } + + DOUT << "\n"; + MBB->erase(MI); + return true; +} + +bool LowerSubregsInstructionPass::LowerInsert(MachineInstr *MI) { + MachineBasicBlock *MBB = MI->getParent(); + MachineFunction &MF = *MBB->getParent(); + const TargetRegisterInfo &TRI = *MF.getTarget().getRegisterInfo(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + assert((MI->getOperand(0).isReg() && MI->getOperand(0).isDef()) && + (MI->getOperand(1).isReg() && MI->getOperand(1).isUse()) && + (MI->getOperand(2).isReg() && MI->getOperand(2).isUse()) && + MI->getOperand(3).isImm() && "Invalid insert_subreg"); + + unsigned DstReg = MI->getOperand(0).getReg(); +#ifndef NDEBUG + unsigned SrcReg = MI->getOperand(1).getReg(); +#endif + unsigned InsReg = MI->getOperand(2).getReg(); + unsigned SubIdx = MI->getOperand(3).getImm(); + + assert(DstReg == SrcReg && "insert_subreg not a two-address instruction?"); + assert(SubIdx != 0 && "Invalid index for insert_subreg"); + unsigned DstSubReg = TRI.getSubReg(DstReg, SubIdx); + + assert(TargetRegisterInfo::isPhysicalRegister(SrcReg) && + "Insert superreg source must be in a physical register"); + assert(TargetRegisterInfo::isPhysicalRegister(InsReg) && + "Inserted value must be in a physical register"); + + DOUT << "subreg: CONVERTING: " << *MI; + + if (DstSubReg == InsReg) { + // No need to insert an identify copy instruction. + DOUT << "subreg: eliminated!"; + } else { + // Insert sub-register copy + const TargetRegisterClass *TRC0= TRI.getPhysicalRegisterRegClass(DstSubReg); + const TargetRegisterClass *TRC1= TRI.getPhysicalRegisterRegClass(InsReg); + TII.copyRegToReg(*MBB, MI, DstSubReg, InsReg, TRC0, TRC1); + // Transfer the kill/dead flags, if needed. + if (MI->getOperand(0).isDead()) + TransferDeadFlag(MI, DstSubReg, TRI); + if (MI->getOperand(1).isKill()) + TransferKillFlag(MI, InsReg, TRI); + +#ifndef NDEBUG + MachineBasicBlock::iterator dMI = MI; + DOUT << "subreg: " << *(--dMI); +#endif + } + + DOUT << "\n"; + MBB->erase(MI); + return true; +} + +/// runOnMachineFunction - Reduce subregister inserts and extracts to register +/// copies. +/// +bool LowerSubregsInstructionPass::runOnMachineFunction(MachineFunction &MF) { + DOUT << "Machine Function\n"; + + bool MadeChange = false; + + DOUT << "********** LOWERING SUBREG INSTRS **********\n"; + DOUT << "********** Function: " << MF.getFunction()->getName() << '\n'; + + for (MachineFunction::iterator mbbi = MF.begin(), mbbe = MF.end(); + mbbi != mbbe; ++mbbi) { + for (MachineBasicBlock::iterator mi = mbbi->begin(), me = mbbi->end(); + mi != me;) { + MachineInstr *MI = mi++; + + if (MI->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG) { + MadeChange |= LowerExtract(MI); + } else if (MI->getOpcode() == TargetInstrInfo::INSERT_SUBREG) { + MadeChange |= LowerInsert(MI); + } else if (MI->getOpcode() == TargetInstrInfo::SUBREG_TO_REG) { + MadeChange |= LowerSubregToReg(MI); + } + } + } + + return MadeChange; +} diff --git a/lib/CodeGen/MachOWriter.cpp b/lib/CodeGen/MachOWriter.cpp new file mode 100644 index 000000000000..43326272c1b0 --- /dev/null +++ b/lib/CodeGen/MachOWriter.cpp @@ -0,0 +1,976 @@ +//===-- MachOWriter.cpp - Target-independent Mach-O Writer code -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the target-independent Mach-O writer. This file writes +// out the Mach-O file in the following order: +// +// #1 FatHeader (universal-only) +// #2 FatArch (universal-only, 1 per universal arch) +// Per arch: +// #3 Header +// #4 Load Commands +// #5 Sections +// #6 Relocations +// #7 Symbols +// #8 Strings +// +//===----------------------------------------------------------------------===// + +#include "MachOWriter.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Module.h" +#include "llvm/PassManager.h" +#include "llvm/CodeGen/FileWriters.h" +#include "llvm/CodeGen/MachineCodeEmitter.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/Target/TargetJITInfo.h" +#include "llvm/Support/Mangler.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/OutputBuffer.h" +#include "llvm/Support/Streams.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +using namespace llvm; + +/// AddMachOWriter - Concrete function to add the Mach-O writer to the function +/// pass manager. +MachineCodeEmitter *llvm::AddMachOWriter(PassManagerBase &PM, + raw_ostream &O, + TargetMachine &TM) { + MachOWriter *MOW = new MachOWriter(O, TM); + PM.add(MOW); + return &MOW->getMachineCodeEmitter(); +} + +//===----------------------------------------------------------------------===// +// MachOCodeEmitter Implementation +//===----------------------------------------------------------------------===// + +namespace llvm { + /// MachOCodeEmitter - This class is used by the MachOWriter to emit the code + /// for functions to the Mach-O file. + class MachOCodeEmitter : public MachineCodeEmitter { + MachOWriter &MOW; + + /// Target machine description. + TargetMachine &TM; + + /// is64Bit/isLittleEndian - This information is inferred from the target + /// machine directly, indicating what header values and flags to set. + bool is64Bit, isLittleEndian; + + /// Relocations - These are the relocations that the function needs, as + /// emitted. + std::vector Relocations; + + /// CPLocations - This is a map of constant pool indices to offsets from the + /// start of the section for that constant pool index. + std::vector CPLocations; + + /// CPSections - This is a map of constant pool indices to the MachOSection + /// containing the constant pool entry for that index. + std::vector CPSections; + + /// JTLocations - This is a map of jump table indices to offsets from the + /// start of the section for that jump table index. + std::vector JTLocations; + + /// MBBLocations - This vector is a mapping from MBB ID's to their address. + /// It is filled in by the StartMachineBasicBlock callback and queried by + /// the getMachineBasicBlockAddress callback. + std::vector MBBLocations; + + public: + MachOCodeEmitter(MachOWriter &mow) : MOW(mow), TM(MOW.TM) { + is64Bit = TM.getTargetData()->getPointerSizeInBits() == 64; + isLittleEndian = TM.getTargetData()->isLittleEndian(); + } + + virtual void startFunction(MachineFunction &MF); + virtual bool finishFunction(MachineFunction &MF); + + virtual void addRelocation(const MachineRelocation &MR) { + Relocations.push_back(MR); + } + + void emitConstantPool(MachineConstantPool *MCP); + void emitJumpTables(MachineJumpTableInfo *MJTI); + + virtual uintptr_t getConstantPoolEntryAddress(unsigned Index) const { + assert(CPLocations.size() > Index && "CP not emitted!"); + return CPLocations[Index]; + } + virtual uintptr_t getJumpTableEntryAddress(unsigned Index) const { + assert(JTLocations.size() > Index && "JT not emitted!"); + return JTLocations[Index]; + } + + virtual void StartMachineBasicBlock(MachineBasicBlock *MBB) { + if (MBBLocations.size() <= (unsigned)MBB->getNumber()) + MBBLocations.resize((MBB->getNumber()+1)*2); + MBBLocations[MBB->getNumber()] = getCurrentPCOffset(); + } + + virtual uintptr_t getMachineBasicBlockAddress(MachineBasicBlock *MBB) const { + assert(MBBLocations.size() > (unsigned)MBB->getNumber() && + MBBLocations[MBB->getNumber()] && "MBB not emitted!"); + return MBBLocations[MBB->getNumber()]; + } + + virtual uintptr_t getLabelAddress(uint64_t Label) const { + assert(0 && "get Label not implemented"); + abort(); + return 0; + } + + virtual void emitLabel(uint64_t LabelID) { + assert(0 && "emit Label not implemented"); + abort(); + } + + + virtual void setModuleInfo(llvm::MachineModuleInfo* MMI) { } + + /// JIT SPECIFIC FUNCTIONS - DO NOT IMPLEMENT THESE HERE! + virtual void startGVStub(const GlobalValue* F, unsigned StubSize, + unsigned Alignment = 1) { + assert(0 && "JIT specific function called!"); + abort(); + } + virtual void startGVStub(const GlobalValue* F, void *Buffer, + unsigned StubSize) { + assert(0 && "JIT specific function called!"); + abort(); + } + virtual void *finishGVStub(const GlobalValue* F) { + assert(0 && "JIT specific function called!"); + abort(); + return 0; + } + }; +} + +/// startFunction - This callback is invoked when a new machine function is +/// about to be emitted. +void MachOCodeEmitter::startFunction(MachineFunction &MF) { + const TargetData *TD = TM.getTargetData(); + const Function *F = MF.getFunction(); + + // Align the output buffer to the appropriate alignment, power of 2. + unsigned FnAlign = F->getAlignment(); + unsigned TDAlign = TD->getPrefTypeAlignment(F->getType()); + unsigned Align = Log2_32(std::max(FnAlign, TDAlign)); + assert(!(Align & (Align-1)) && "Alignment is not a power of two!"); + + // Get the Mach-O Section that this function belongs in. + MachOWriter::MachOSection *MOS = MOW.getTextSection(); + + // FIXME: better memory management + MOS->SectionData.reserve(4096); + BufferBegin = &MOS->SectionData[0]; + BufferEnd = BufferBegin + MOS->SectionData.capacity(); + + // Upgrade the section alignment if required. + if (MOS->align < Align) MOS->align = Align; + + // Round the size up to the correct alignment for starting the new function. + if ((MOS->size & ((1 << Align) - 1)) != 0) { + MOS->size += (1 << Align); + MOS->size &= ~((1 << Align) - 1); + } + + // FIXME: Using MOS->size directly here instead of calculating it from the + // output buffer size (impossible because the code emitter deals only in raw + // bytes) forces us to manually synchronize size and write padding zero bytes + // to the output buffer for all non-text sections. For text sections, we do + // not synchonize the output buffer, and we just blow up if anyone tries to + // write non-code to it. An assert should probably be added to + // AddSymbolToSection to prevent calling it on the text section. + CurBufferPtr = BufferBegin + MOS->size; + + // Clear per-function data structures. + CPLocations.clear(); + CPSections.clear(); + JTLocations.clear(); + MBBLocations.clear(); +} + +/// finishFunction - This callback is invoked after the function is completely +/// finished. +bool MachOCodeEmitter::finishFunction(MachineFunction &MF) { + // Get the Mach-O Section that this function belongs in. + MachOWriter::MachOSection *MOS = MOW.getTextSection(); + + // Get a symbol for the function to add to the symbol table + // FIXME: it seems like we should call something like AddSymbolToSection + // in startFunction rather than changing the section size and symbol n_value + // here. + const GlobalValue *FuncV = MF.getFunction(); + MachOSym FnSym(FuncV, MOW.Mang->getValueName(FuncV), MOS->Index, TM); + FnSym.n_value = MOS->size; + MOS->size = CurBufferPtr - BufferBegin; + + // Emit constant pool to appropriate section(s) + emitConstantPool(MF.getConstantPool()); + + // Emit jump tables to appropriate section + emitJumpTables(MF.getJumpTableInfo()); + + // If we have emitted any relocations to function-specific objects such as + // basic blocks, constant pools entries, or jump tables, record their + // addresses now so that we can rewrite them with the correct addresses + // later. + for (unsigned i = 0, e = Relocations.size(); i != e; ++i) { + MachineRelocation &MR = Relocations[i]; + intptr_t Addr; + + if (MR.isBasicBlock()) { + Addr = getMachineBasicBlockAddress(MR.getBasicBlock()); + MR.setConstantVal(MOS->Index); + MR.setResultPointer((void*)Addr); + } else if (MR.isJumpTableIndex()) { + Addr = getJumpTableEntryAddress(MR.getJumpTableIndex()); + MR.setConstantVal(MOW.getJumpTableSection()->Index); + MR.setResultPointer((void*)Addr); + } else if (MR.isConstantPoolIndex()) { + Addr = getConstantPoolEntryAddress(MR.getConstantPoolIndex()); + MR.setConstantVal(CPSections[MR.getConstantPoolIndex()]); + MR.setResultPointer((void*)Addr); + } else if (MR.isGlobalValue()) { + // FIXME: This should be a set or something that uniques + MOW.PendingGlobals.push_back(MR.getGlobalValue()); + } else { + assert(0 && "Unhandled relocation type"); + } + MOS->Relocations.push_back(MR); + } + Relocations.clear(); + + // Finally, add it to the symtab. + MOW.SymbolTable.push_back(FnSym); + return false; +} + +/// emitConstantPool - For each constant pool entry, figure out which section +/// the constant should live in, allocate space for it, and emit it to the +/// Section data buffer. +void MachOCodeEmitter::emitConstantPool(MachineConstantPool *MCP) { + const std::vector &CP = MCP->getConstants(); + if (CP.empty()) return; + + // FIXME: handle PIC codegen + assert(TM.getRelocationModel() != Reloc::PIC_ && + "PIC codegen not yet handled for mach-o jump tables!"); + + // Although there is no strict necessity that I am aware of, we will do what + // gcc for OS X does and put each constant pool entry in a section of constant + // objects of a certain size. That means that float constants go in the + // literal4 section, and double objects go in literal8, etc. + // + // FIXME: revisit this decision if we ever do the "stick everything into one + // "giant object for PIC" optimization. + for (unsigned i = 0, e = CP.size(); i != e; ++i) { + const Type *Ty = CP[i].getType(); + unsigned Size = TM.getTargetData()->getTypeAllocSize(Ty); + + MachOWriter::MachOSection *Sec = MOW.getConstSection(CP[i].Val.ConstVal); + OutputBuffer SecDataOut(Sec->SectionData, is64Bit, isLittleEndian); + + CPLocations.push_back(Sec->SectionData.size()); + CPSections.push_back(Sec->Index); + + // FIXME: remove when we have unified size + output buffer + Sec->size += Size; + + // Allocate space in the section for the global. + // FIXME: need alignment? + // FIXME: share between here and AddSymbolToSection? + for (unsigned j = 0; j < Size; ++j) + SecDataOut.outbyte(0); + + MOW.InitMem(CP[i].Val.ConstVal, &Sec->SectionData[0], CPLocations[i], + TM.getTargetData(), Sec->Relocations); + } +} + +/// emitJumpTables - Emit all the jump tables for a given jump table info +/// record to the appropriate section. +void MachOCodeEmitter::emitJumpTables(MachineJumpTableInfo *MJTI) { + const std::vector &JT = MJTI->getJumpTables(); + if (JT.empty()) return; + + // FIXME: handle PIC codegen + assert(TM.getRelocationModel() != Reloc::PIC_ && + "PIC codegen not yet handled for mach-o jump tables!"); + + MachOWriter::MachOSection *Sec = MOW.getJumpTableSection(); + unsigned TextSecIndex = MOW.getTextSection()->Index; + OutputBuffer SecDataOut(Sec->SectionData, is64Bit, isLittleEndian); + + for (unsigned i = 0, e = JT.size(); i != e; ++i) { + // For each jump table, record its offset from the start of the section, + // reserve space for the relocations to the MBBs, and add the relocations. + const std::vector &MBBs = JT[i].MBBs; + JTLocations.push_back(Sec->SectionData.size()); + for (unsigned mi = 0, me = MBBs.size(); mi != me; ++mi) { + MachineRelocation MR(MOW.GetJTRelocation(Sec->SectionData.size(), + MBBs[mi])); + MR.setResultPointer((void *)JTLocations[i]); + MR.setConstantVal(TextSecIndex); + Sec->Relocations.push_back(MR); + SecDataOut.outaddr(0); + } + } + // FIXME: remove when we have unified size + output buffer + Sec->size = Sec->SectionData.size(); +} + +//===----------------------------------------------------------------------===// +// MachOWriter Implementation +//===----------------------------------------------------------------------===// + +char MachOWriter::ID = 0; +MachOWriter::MachOWriter(raw_ostream &o, TargetMachine &tm) + : MachineFunctionPass(&ID), O(o), TM(tm) { + is64Bit = TM.getTargetData()->getPointerSizeInBits() == 64; + isLittleEndian = TM.getTargetData()->isLittleEndian(); + + // Create the machine code emitter object for this target. + MCE = new MachOCodeEmitter(*this); +} + +MachOWriter::~MachOWriter() { + delete MCE; +} + +void MachOWriter::AddSymbolToSection(MachOSection *Sec, GlobalVariable *GV) { + const Type *Ty = GV->getType()->getElementType(); + unsigned Size = TM.getTargetData()->getTypeAllocSize(Ty); + unsigned Align = TM.getTargetData()->getPreferredAlignment(GV); + + // Reserve space in the .bss section for this symbol while maintaining the + // desired section alignment, which must be at least as much as required by + // this symbol. + OutputBuffer SecDataOut(Sec->SectionData, is64Bit, isLittleEndian); + + if (Align) { + uint64_t OrigSize = Sec->size; + Align = Log2_32(Align); + Sec->align = std::max(unsigned(Sec->align), Align); + Sec->size = (Sec->size + Align - 1) & ~(Align-1); + + // Add alignment padding to buffer as well. + // FIXME: remove when we have unified size + output buffer + unsigned AlignedSize = Sec->size - OrigSize; + for (unsigned i = 0; i < AlignedSize; ++i) + SecDataOut.outbyte(0); + } + // Globals without external linkage apparently do not go in the symbol table. + if (!GV->hasLocalLinkage()) { + MachOSym Sym(GV, Mang->getValueName(GV), Sec->Index, TM); + Sym.n_value = Sec->size; + SymbolTable.push_back(Sym); + } + + // Record the offset of the symbol, and then allocate space for it. + // FIXME: remove when we have unified size + output buffer + Sec->size += Size; + + // Now that we know what section the GlovalVariable is going to be emitted + // into, update our mappings. + // FIXME: We may also need to update this when outputting non-GlobalVariable + // GlobalValues such as functions. + GVSection[GV] = Sec; + GVOffset[GV] = Sec->SectionData.size(); + + // Allocate space in the section for the global. + for (unsigned i = 0; i < Size; ++i) + SecDataOut.outbyte(0); +} + +void MachOWriter::EmitGlobal(GlobalVariable *GV) { + const Type *Ty = GV->getType()->getElementType(); + unsigned Size = TM.getTargetData()->getTypeAllocSize(Ty); + bool NoInit = !GV->hasInitializer(); + + // If this global has a zero initializer, it is part of the .bss or common + // section. + if (NoInit || GV->getInitializer()->isNullValue()) { + // If this global is part of the common block, add it now. Variables are + // part of the common block if they are zero initialized and allowed to be + // merged with other symbols. + if (NoInit || GV->hasLinkOnceLinkage() || GV->hasWeakLinkage() || + GV->hasCommonLinkage()) { + MachOSym ExtOrCommonSym(GV, Mang->getValueName(GV), MachOSym::NO_SECT,TM); + // For undefined (N_UNDF) external (N_EXT) types, n_value is the size in + // bytes of the symbol. + ExtOrCommonSym.n_value = Size; + SymbolTable.push_back(ExtOrCommonSym); + // Remember that we've seen this symbol + GVOffset[GV] = Size; + return; + } + // Otherwise, this symbol is part of the .bss section. + MachOSection *BSS = getBSSSection(); + AddSymbolToSection(BSS, GV); + return; + } + + // Scalar read-only data goes in a literal section if the scalar is 4, 8, or + // 16 bytes, or a cstring. Other read only data goes into a regular const + // section. Read-write data goes in the data section. + MachOSection *Sec = GV->isConstant() ? getConstSection(GV->getInitializer()) : + getDataSection(); + AddSymbolToSection(Sec, GV); + InitMem(GV->getInitializer(), &Sec->SectionData[0], GVOffset[GV], + TM.getTargetData(), Sec->Relocations); +} + + +bool MachOWriter::runOnMachineFunction(MachineFunction &MF) { + // Nothing to do here, this is all done through the MCE object. + return false; +} + +bool MachOWriter::doInitialization(Module &M) { + // Set the magic value, now that we know the pointer size and endianness + Header.setMagic(isLittleEndian, is64Bit); + + // Set the file type + // FIXME: this only works for object files, we do not support the creation + // of dynamic libraries or executables at this time. + Header.filetype = MachOHeader::MH_OBJECT; + + Mang = new Mangler(M); + return false; +} + +/// doFinalization - Now that the module has been completely processed, emit +/// the Mach-O file to 'O'. +bool MachOWriter::doFinalization(Module &M) { + // FIXME: we don't handle debug info yet, we should probably do that. + + // Okay, the.text section has been completed, build the .data, .bss, and + // "common" sections next. + for (Module::global_iterator I = M.global_begin(), E = M.global_end(); + I != E; ++I) + EmitGlobal(I); + + // Emit the header and load commands. + EmitHeaderAndLoadCommands(); + + // Emit the various sections and their relocation info. + EmitSections(); + + // Write the symbol table and the string table to the end of the file. + O.write((char*)&SymT[0], SymT.size()); + O.write((char*)&StrT[0], StrT.size()); + + // We are done with the abstract symbols. + SectionList.clear(); + SymbolTable.clear(); + DynamicSymbolTable.clear(); + + // Release the name mangler object. + delete Mang; Mang = 0; + return false; +} + +void MachOWriter::EmitHeaderAndLoadCommands() { + // Step #0: Fill in the segment load command size, since we need it to figure + // out the rest of the header fields + MachOSegment SEG("", is64Bit); + SEG.nsects = SectionList.size(); + SEG.cmdsize = SEG.cmdSize(is64Bit) + + SEG.nsects * SectionList[0]->cmdSize(is64Bit); + + // Step #1: calculate the number of load commands. We always have at least + // one, for the LC_SEGMENT load command, plus two for the normal + // and dynamic symbol tables, if there are any symbols. + Header.ncmds = SymbolTable.empty() ? 1 : 3; + + // Step #2: calculate the size of the load commands + Header.sizeofcmds = SEG.cmdsize; + if (!SymbolTable.empty()) + Header.sizeofcmds += SymTab.cmdsize + DySymTab.cmdsize; + + // Step #3: write the header to the file + // Local alias to shortenify coming code. + DataBuffer &FH = Header.HeaderData; + OutputBuffer FHOut(FH, is64Bit, isLittleEndian); + + FHOut.outword(Header.magic); + FHOut.outword(TM.getMachOWriterInfo()->getCPUType()); + FHOut.outword(TM.getMachOWriterInfo()->getCPUSubType()); + FHOut.outword(Header.filetype); + FHOut.outword(Header.ncmds); + FHOut.outword(Header.sizeofcmds); + FHOut.outword(Header.flags); + if (is64Bit) + FHOut.outword(Header.reserved); + + // Step #4: Finish filling in the segment load command and write it out + for (std::vector::iterator I = SectionList.begin(), + E = SectionList.end(); I != E; ++I) + SEG.filesize += (*I)->size; + + SEG.vmsize = SEG.filesize; + SEG.fileoff = Header.cmdSize(is64Bit) + Header.sizeofcmds; + + FHOut.outword(SEG.cmd); + FHOut.outword(SEG.cmdsize); + FHOut.outstring(SEG.segname, 16); + FHOut.outaddr(SEG.vmaddr); + FHOut.outaddr(SEG.vmsize); + FHOut.outaddr(SEG.fileoff); + FHOut.outaddr(SEG.filesize); + FHOut.outword(SEG.maxprot); + FHOut.outword(SEG.initprot); + FHOut.outword(SEG.nsects); + FHOut.outword(SEG.flags); + + // Step #5: Finish filling in the fields of the MachOSections + uint64_t currentAddr = 0; + for (std::vector::iterator I = SectionList.begin(), + E = SectionList.end(); I != E; ++I) { + MachOSection *MOS = *I; + MOS->addr = currentAddr; + MOS->offset = currentAddr + SEG.fileoff; + + // FIXME: do we need to do something with alignment here? + currentAddr += MOS->size; + } + + // Step #6: Emit the symbol table to temporary buffers, so that we know the + // size of the string table when we write the next load command. This also + // sorts and assigns indices to each of the symbols, which is necessary for + // emitting relocations to externally-defined objects. + BufferSymbolAndStringTable(); + + // Step #7: Calculate the number of relocations for each section and write out + // the section commands for each section + currentAddr += SEG.fileoff; + for (std::vector::iterator I = SectionList.begin(), + E = SectionList.end(); I != E; ++I) { + MachOSection *MOS = *I; + // Convert the relocations to target-specific relocations, and fill in the + // relocation offset for this section. + CalculateRelocations(*MOS); + MOS->reloff = MOS->nreloc ? currentAddr : 0; + currentAddr += MOS->nreloc * 8; + + // write the finalized section command to the output buffer + FHOut.outstring(MOS->sectname, 16); + FHOut.outstring(MOS->segname, 16); + FHOut.outaddr(MOS->addr); + FHOut.outaddr(MOS->size); + FHOut.outword(MOS->offset); + FHOut.outword(MOS->align); + FHOut.outword(MOS->reloff); + FHOut.outword(MOS->nreloc); + FHOut.outword(MOS->flags); + FHOut.outword(MOS->reserved1); + FHOut.outword(MOS->reserved2); + if (is64Bit) + FHOut.outword(MOS->reserved3); + } + + // Step #8: Emit LC_SYMTAB/LC_DYSYMTAB load commands + SymTab.symoff = currentAddr; + SymTab.nsyms = SymbolTable.size(); + SymTab.stroff = SymTab.symoff + SymT.size(); + SymTab.strsize = StrT.size(); + FHOut.outword(SymTab.cmd); + FHOut.outword(SymTab.cmdsize); + FHOut.outword(SymTab.symoff); + FHOut.outword(SymTab.nsyms); + FHOut.outword(SymTab.stroff); + FHOut.outword(SymTab.strsize); + + // FIXME: set DySymTab fields appropriately + // We should probably just update these in BufferSymbolAndStringTable since + // thats where we're partitioning up the different kinds of symbols. + FHOut.outword(DySymTab.cmd); + FHOut.outword(DySymTab.cmdsize); + FHOut.outword(DySymTab.ilocalsym); + FHOut.outword(DySymTab.nlocalsym); + FHOut.outword(DySymTab.iextdefsym); + FHOut.outword(DySymTab.nextdefsym); + FHOut.outword(DySymTab.iundefsym); + FHOut.outword(DySymTab.nundefsym); + FHOut.outword(DySymTab.tocoff); + FHOut.outword(DySymTab.ntoc); + FHOut.outword(DySymTab.modtaboff); + FHOut.outword(DySymTab.nmodtab); + FHOut.outword(DySymTab.extrefsymoff); + FHOut.outword(DySymTab.nextrefsyms); + FHOut.outword(DySymTab.indirectsymoff); + FHOut.outword(DySymTab.nindirectsyms); + FHOut.outword(DySymTab.extreloff); + FHOut.outword(DySymTab.nextrel); + FHOut.outword(DySymTab.locreloff); + FHOut.outword(DySymTab.nlocrel); + + O.write((char*)&FH[0], FH.size()); +} + +/// EmitSections - Now that we have constructed the file header and load +/// commands, emit the data for each section to the file. +void MachOWriter::EmitSections() { + for (std::vector::iterator I = SectionList.begin(), + E = SectionList.end(); I != E; ++I) + // Emit the contents of each section + O.write((char*)&(*I)->SectionData[0], (*I)->size); + for (std::vector::iterator I = SectionList.begin(), + E = SectionList.end(); I != E; ++I) + // Emit the relocation entry data for each section. + O.write((char*)&(*I)->RelocBuffer[0], (*I)->RelocBuffer.size()); +} + +/// PartitionByLocal - Simple boolean predicate that returns true if Sym is +/// a local symbol rather than an external symbol. +bool MachOWriter::PartitionByLocal(const MachOSym &Sym) { + return (Sym.n_type & (MachOSym::N_EXT | MachOSym::N_PEXT)) == 0; +} + +/// PartitionByDefined - Simple boolean predicate that returns true if Sym is +/// defined in this module. +bool MachOWriter::PartitionByDefined(const MachOSym &Sym) { + // FIXME: Do N_ABS or N_INDR count as defined? + return (Sym.n_type & MachOSym::N_SECT) == MachOSym::N_SECT; +} + +/// BufferSymbolAndStringTable - Sort the symbols we encountered and assign them +/// each a string table index so that they appear in the correct order in the +/// output file. +void MachOWriter::BufferSymbolAndStringTable() { + // The order of the symbol table is: + // 1. local symbols + // 2. defined external symbols (sorted by name) + // 3. undefined external symbols (sorted by name) + + // Before sorting the symbols, check the PendingGlobals for any undefined + // globals that need to be put in the symbol table. + for (std::vector::iterator I = PendingGlobals.begin(), + E = PendingGlobals.end(); I != E; ++I) { + if (GVOffset[*I] == 0 && GVSection[*I] == 0) { + MachOSym UndfSym(*I, Mang->getValueName(*I), MachOSym::NO_SECT, TM); + SymbolTable.push_back(UndfSym); + GVOffset[*I] = -1; + } + } + + // Sort the symbols by name, so that when we partition the symbols by scope + // of definition, we won't have to sort by name within each partition. + std::sort(SymbolTable.begin(), SymbolTable.end(), MachOSymCmp()); + + // Parition the symbol table entries so that all local symbols come before + // all symbols with external linkage. { 1 | 2 3 } + std::partition(SymbolTable.begin(), SymbolTable.end(), PartitionByLocal); + + // Advance iterator to beginning of external symbols and partition so that + // all external symbols defined in this module come before all external + // symbols defined elsewhere. { 1 | 2 | 3 } + for (std::vector::iterator I = SymbolTable.begin(), + E = SymbolTable.end(); I != E; ++I) { + if (!PartitionByLocal(*I)) { + std::partition(I, E, PartitionByDefined); + break; + } + } + + // Calculate the starting index for each of the local, extern defined, and + // undefined symbols, as well as the number of each to put in the LC_DYSYMTAB + // load command. + for (std::vector::iterator I = SymbolTable.begin(), + E = SymbolTable.end(); I != E; ++I) { + if (PartitionByLocal(*I)) { + ++DySymTab.nlocalsym; + ++DySymTab.iextdefsym; + ++DySymTab.iundefsym; + } else if (PartitionByDefined(*I)) { + ++DySymTab.nextdefsym; + ++DySymTab.iundefsym; + } else { + ++DySymTab.nundefsym; + } + } + + // Write out a leading zero byte when emitting string table, for n_strx == 0 + // which means an empty string. + OutputBuffer StrTOut(StrT, is64Bit, isLittleEndian); + StrTOut.outbyte(0); + + // The order of the string table is: + // 1. strings for external symbols + // 2. strings for local symbols + // Since this is the opposite order from the symbol table, which we have just + // sorted, we can walk the symbol table backwards to output the string table. + for (std::vector::reverse_iterator I = SymbolTable.rbegin(), + E = SymbolTable.rend(); I != E; ++I) { + if (I->GVName == "") { + I->n_strx = 0; + } else { + I->n_strx = StrT.size(); + StrTOut.outstring(I->GVName, I->GVName.length()+1); + } + } + + OutputBuffer SymTOut(SymT, is64Bit, isLittleEndian); + + unsigned index = 0; + for (std::vector::iterator I = SymbolTable.begin(), + E = SymbolTable.end(); I != E; ++I, ++index) { + // Add the section base address to the section offset in the n_value field + // to calculate the full address. + // FIXME: handle symbols where the n_value field is not the address + GlobalValue *GV = const_cast(I->GV); + if (GV && GVSection[GV]) + I->n_value += GVSection[GV]->addr; + if (GV && (GVOffset[GV] == -1)) + GVOffset[GV] = index; + + // Emit nlist to buffer + SymTOut.outword(I->n_strx); + SymTOut.outbyte(I->n_type); + SymTOut.outbyte(I->n_sect); + SymTOut.outhalf(I->n_desc); + SymTOut.outaddr(I->n_value); + } +} + +/// CalculateRelocations - For each MachineRelocation in the current section, +/// calculate the index of the section containing the object to be relocated, +/// and the offset into that section. From this information, create the +/// appropriate target-specific MachORelocation type and add buffer it to be +/// written out after we are finished writing out sections. +void MachOWriter::CalculateRelocations(MachOSection &MOS) { + for (unsigned i = 0, e = MOS.Relocations.size(); i != e; ++i) { + MachineRelocation &MR = MOS.Relocations[i]; + unsigned TargetSection = MR.getConstantVal(); + unsigned TargetAddr = 0; + unsigned TargetIndex = 0; + + // This is a scattered relocation entry if it points to a global value with + // a non-zero offset. + bool Scattered = false; + bool Extern = false; + + // Since we may not have seen the GlobalValue we were interested in yet at + // the time we emitted the relocation for it, fix it up now so that it + // points to the offset into the correct section. + if (MR.isGlobalValue()) { + GlobalValue *GV = MR.getGlobalValue(); + MachOSection *MOSPtr = GVSection[GV]; + intptr_t Offset = GVOffset[GV]; + + // If we have never seen the global before, it must be to a symbol + // defined in another module (N_UNDF). + if (!MOSPtr) { + // FIXME: need to append stub suffix + Extern = true; + TargetAddr = 0; + TargetIndex = GVOffset[GV]; + } else { + Scattered = TargetSection != 0; + TargetSection = MOSPtr->Index; + } + MR.setResultPointer((void*)Offset); + } + + // If the symbol is locally defined, pass in the address of the section and + // the section index to the code which will generate the target relocation. + if (!Extern) { + MachOSection &To = *SectionList[TargetSection - 1]; + TargetAddr = To.addr; + TargetIndex = To.Index; + } + + OutputBuffer RelocOut(MOS.RelocBuffer, is64Bit, isLittleEndian); + OutputBuffer SecOut(MOS.SectionData, is64Bit, isLittleEndian); + + MOS.nreloc += GetTargetRelocation(MR, MOS.Index, TargetAddr, TargetIndex, + RelocOut, SecOut, Scattered, Extern); + } +} + +// InitMem - Write the value of a Constant to the specified memory location, +// converting it into bytes and relocations. +void MachOWriter::InitMem(const Constant *C, void *Addr, intptr_t Offset, + const TargetData *TD, + std::vector &MRs) { + typedef std::pair CPair; + std::vector WorkList; + + WorkList.push_back(CPair(C,(intptr_t)Addr + Offset)); + + intptr_t ScatteredOffset = 0; + + while (!WorkList.empty()) { + const Constant *PC = WorkList.back().first; + intptr_t PA = WorkList.back().second; + WorkList.pop_back(); + + if (isa(PC)) { + continue; + } else if (const ConstantVector *CP = dyn_cast(PC)) { + unsigned ElementSize = + TD->getTypeAllocSize(CP->getType()->getElementType()); + for (unsigned i = 0, e = CP->getNumOperands(); i != e; ++i) + WorkList.push_back(CPair(CP->getOperand(i), PA+i*ElementSize)); + } else if (const ConstantExpr *CE = dyn_cast(PC)) { + // + // FIXME: Handle ConstantExpression. See EE::getConstantValue() + // + switch (CE->getOpcode()) { + case Instruction::GetElementPtr: { + SmallVector Indices(CE->op_begin()+1, CE->op_end()); + ScatteredOffset = TD->getIndexedOffset(CE->getOperand(0)->getType(), + &Indices[0], Indices.size()); + WorkList.push_back(CPair(CE->getOperand(0), PA)); + break; + } + case Instruction::Add: + default: + cerr << "ConstantExpr not handled as global var init: " << *CE << "\n"; + abort(); + break; + } + } else if (PC->getType()->isSingleValueType()) { + uint8_t *ptr = (uint8_t *)PA; + switch (PC->getType()->getTypeID()) { + case Type::IntegerTyID: { + unsigned NumBits = cast(PC->getType())->getBitWidth(); + uint64_t val = cast(PC)->getZExtValue(); + if (NumBits <= 8) + ptr[0] = val; + else if (NumBits <= 16) { + if (TD->isBigEndian()) + val = ByteSwap_16(val); + ptr[0] = val; + ptr[1] = val >> 8; + } else if (NumBits <= 32) { + if (TD->isBigEndian()) + val = ByteSwap_32(val); + ptr[0] = val; + ptr[1] = val >> 8; + ptr[2] = val >> 16; + ptr[3] = val >> 24; + } else if (NumBits <= 64) { + if (TD->isBigEndian()) + val = ByteSwap_64(val); + ptr[0] = val; + ptr[1] = val >> 8; + ptr[2] = val >> 16; + ptr[3] = val >> 24; + ptr[4] = val >> 32; + ptr[5] = val >> 40; + ptr[6] = val >> 48; + ptr[7] = val >> 56; + } else { + assert(0 && "Not implemented: bit widths > 64"); + } + break; + } + case Type::FloatTyID: { + uint32_t val = cast(PC)->getValueAPF().bitcastToAPInt(). + getZExtValue(); + if (TD->isBigEndian()) + val = ByteSwap_32(val); + ptr[0] = val; + ptr[1] = val >> 8; + ptr[2] = val >> 16; + ptr[3] = val >> 24; + break; + } + case Type::DoubleTyID: { + uint64_t val = cast(PC)->getValueAPF().bitcastToAPInt(). + getZExtValue(); + if (TD->isBigEndian()) + val = ByteSwap_64(val); + ptr[0] = val; + ptr[1] = val >> 8; + ptr[2] = val >> 16; + ptr[3] = val >> 24; + ptr[4] = val >> 32; + ptr[5] = val >> 40; + ptr[6] = val >> 48; + ptr[7] = val >> 56; + break; + } + case Type::PointerTyID: + if (isa(PC)) + memset(ptr, 0, TD->getPointerSize()); + else if (const GlobalValue* GV = dyn_cast(PC)) { + // FIXME: what about function stubs? + MRs.push_back(MachineRelocation::getGV(PA-(intptr_t)Addr, + MachineRelocation::VANILLA, + const_cast(GV), + ScatteredOffset)); + ScatteredOffset = 0; + } else + assert(0 && "Unknown constant pointer type!"); + break; + default: + cerr << "ERROR: Constant unimp for type: " << *PC->getType() << "\n"; + abort(); + } + } else if (isa(PC)) { + memset((void*)PA, 0, (size_t)TD->getTypeAllocSize(PC->getType())); + } else if (const ConstantArray *CPA = dyn_cast(PC)) { + unsigned ElementSize = + TD->getTypeAllocSize(CPA->getType()->getElementType()); + for (unsigned i = 0, e = CPA->getNumOperands(); i != e; ++i) + WorkList.push_back(CPair(CPA->getOperand(i), PA+i*ElementSize)); + } else if (const ConstantStruct *CPS = dyn_cast(PC)) { + const StructLayout *SL = + TD->getStructLayout(cast(CPS->getType())); + for (unsigned i = 0, e = CPS->getNumOperands(); i != e; ++i) + WorkList.push_back(CPair(CPS->getOperand(i), + PA+SL->getElementOffset(i))); + } else { + cerr << "Bad Type: " << *PC->getType() << "\n"; + assert(0 && "Unknown constant type to initialize memory with!"); + } + } +} + +MachOSym::MachOSym(const GlobalValue *gv, std::string name, uint8_t sect, + TargetMachine &TM) : + GV(gv), n_strx(0), n_type(sect == NO_SECT ? N_UNDF : N_SECT), n_sect(sect), + n_desc(0), n_value(0) { + + const TargetAsmInfo *TAI = TM.getTargetAsmInfo(); + + switch (GV->getLinkage()) { + default: + assert(0 && "Unexpected linkage type!"); + break; + case GlobalValue::WeakAnyLinkage: + case GlobalValue::WeakODRLinkage: + case GlobalValue::LinkOnceAnyLinkage: + case GlobalValue::LinkOnceODRLinkage: + case GlobalValue::CommonLinkage: + assert(!isa(gv) && "Unexpected linkage type for Function!"); + case GlobalValue::ExternalLinkage: + GVName = TAI->getGlobalPrefix() + name; + n_type |= GV->hasHiddenVisibility() ? N_PEXT : N_EXT; + break; + case GlobalValue::PrivateLinkage: + GVName = TAI->getPrivateGlobalPrefix() + name; + break; + case GlobalValue::InternalLinkage: + GVName = TAI->getGlobalPrefix() + name; + break; + } +} diff --git a/lib/CodeGen/MachOWriter.h b/lib/CodeGen/MachOWriter.h new file mode 100644 index 000000000000..6ab66eee926b --- /dev/null +++ b/lib/CodeGen/MachOWriter.h @@ -0,0 +1,629 @@ +//=== MachOWriter.h - Target-independent Mach-O writer support --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the MachOWriter class. +// +//===----------------------------------------------------------------------===// + +#ifndef MACHOWRITER_H +#define MACHOWRITER_H + +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRelocation.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetMachOWriterInfo.h" +#include + +namespace llvm { + class GlobalVariable; + class Mangler; + class MachineCodeEmitter; + class MachOCodeEmitter; + class OutputBuffer; + class raw_ostream; + + /// MachOSym - This struct contains information about each symbol that is + /// added to logical symbol table for the module. This is eventually + /// turned into a real symbol table in the file. + struct MachOSym { + const GlobalValue *GV; // The global value this corresponds to. + std::string GVName; // The mangled name of the global value. + uint32_t n_strx; // index into the string table + uint8_t n_type; // type flag + uint8_t n_sect; // section number or NO_SECT + int16_t n_desc; // see + uint64_t n_value; // value for this symbol (or stab offset) + + // Constants for the n_sect field + // see + enum { NO_SECT = 0 }; // symbol is not in any section + + // Constants for the n_type field + // see + enum { N_UNDF = 0x0, // undefined, n_sect == NO_SECT + N_ABS = 0x2, // absolute, n_sect == NO_SECT + N_SECT = 0xe, // defined in section number n_sect + N_PBUD = 0xc, // prebound undefined (defined in a dylib) + N_INDR = 0xa // indirect + }; + // The following bits are OR'd into the types above. For example, a type + // of 0x0f would be an external N_SECT symbol (0x0e | 0x01). + enum { N_EXT = 0x01, // external symbol bit + N_PEXT = 0x10 // private external symbol bit + }; + + // Constants for the n_desc field + // see + enum { REFERENCE_FLAG_UNDEFINED_NON_LAZY = 0, + REFERENCE_FLAG_UNDEFINED_LAZY = 1, + REFERENCE_FLAG_DEFINED = 2, + REFERENCE_FLAG_PRIVATE_DEFINED = 3, + REFERENCE_FLAG_PRIVATE_UNDEFINED_NON_LAZY = 4, + REFERENCE_FLAG_PRIVATE_UNDEFINED_LAZY = 5 + }; + enum { N_NO_DEAD_STRIP = 0x0020, // symbol is not to be dead stripped + N_WEAK_REF = 0x0040, // symbol is weak referenced + N_WEAK_DEF = 0x0080 // coalesced symbol is a weak definition + }; + + MachOSym(const GlobalValue *gv, std::string name, uint8_t sect, + TargetMachine &TM); + }; + + /// MachOWriter - This class implements the common target-independent code for + /// writing Mach-O files. Targets should derive a class from this to + /// parameterize the output format. + /// + class MachOWriter : public MachineFunctionPass { + friend class MachOCodeEmitter; + public: + static char ID; + MachineCodeEmitter &getMachineCodeEmitter() const { + return *(MachineCodeEmitter*)MCE; + } + + MachOWriter(raw_ostream &O, TargetMachine &TM); + virtual ~MachOWriter(); + + virtual const char *getPassName() const { + return "Mach-O Writer"; + } + + typedef std::vector DataBuffer; + protected: + /// Output stream to send the resultant object file to. + /// + raw_ostream &O; + + /// Target machine description. + /// + TargetMachine &TM; + + /// Mang - The object used to perform name mangling for this module. + /// + Mangler *Mang; + + /// MCE - The MachineCodeEmitter object that we are exposing to emit machine + /// code for functions to the .o file. + MachOCodeEmitter *MCE; + + /// is64Bit/isLittleEndian - This information is inferred from the target + /// machine directly, indicating what header values and flags to set. + bool is64Bit, isLittleEndian; + + /// doInitialization - Emit the file header and all of the global variables + /// for the module to the Mach-O file. + bool doInitialization(Module &M); + + bool runOnMachineFunction(MachineFunction &MF); + + /// doFinalization - Now that the module has been completely processed, emit + /// the Mach-O file to 'O'. + bool doFinalization(Module &M); + + /// MachOHeader - This struct contains the header information about a + /// specific architecture type/subtype pair that is emitted to the file. + struct MachOHeader { + uint32_t magic; // mach magic number identifier + uint32_t filetype; // type of file + uint32_t ncmds; // number of load commands + uint32_t sizeofcmds; // the size of all the load commands + uint32_t flags; // flags + uint32_t reserved; // 64-bit only + + /// HeaderData - The actual data for the header which we are building + /// up for emission to the file. + DataBuffer HeaderData; + + // Constants for the filetype field + // see for additional info on the various types + enum { MH_OBJECT = 1, // relocatable object file + MH_EXECUTE = 2, // demand paged executable file + MH_FVMLIB = 3, // fixed VM shared library file + MH_CORE = 4, // core file + MH_PRELOAD = 5, // preloaded executable file + MH_DYLIB = 6, // dynamically bound shared library + MH_DYLINKER = 7, // dynamic link editor + MH_BUNDLE = 8, // dynamically bound bundle file + MH_DYLIB_STUB = 9, // shared library stub for static linking only + MH_DSYM = 10 // companion file wiht only debug sections + }; + + // Constants for the flags field + enum { MH_NOUNDEFS = 1 << 0, + // the object file has no undefined references + MH_INCRLINK = 1 << 1, + // the object file is the output of an incremental link against + // a base file and cannot be link edited again + MH_DYLDLINK = 1 << 2, + // the object file is input for the dynamic linker and cannot be + // statically link edited again. + MH_BINDATLOAD = 1 << 3, + // the object file's undefined references are bound by the + // dynamic linker when loaded. + MH_PREBOUND = 1 << 4, + // the file has its dynamic undefined references prebound + MH_SPLIT_SEGS = 1 << 5, + // the file has its read-only and read-write segments split + // see + MH_LAZY_INIT = 1 << 6, + // the shared library init routine is to be run lazily via + // catching memory faults to its writable segments (obsolete) + MH_TWOLEVEL = 1 << 7, + // the image is using two-level namespace bindings + MH_FORCE_FLAT = 1 << 8, + // the executable is forcing all images to use flat namespace + // bindings. + MH_NOMULTIDEFS = 1 << 8, + // this umbrella guarantees no multiple definitions of symbols + // in its sub-images so the two-level namespace hints can + // always be used. + MH_NOFIXPREBINDING = 1 << 10, + // do not have dyld notify the prebidning agent about this + // executable. + MH_PREBINDABLE = 1 << 11, + // the binary is not prebound but can have its prebinding + // redone. only used when MH_PREBOUND is not set. + MH_ALLMODSBOUND = 1 << 12, + // indicates that this binary binds to all two-level namespace + // modules of its dependent libraries. Only used when + // MH_PREBINDABLE and MH_TWOLEVEL are both set. + MH_SUBSECTIONS_VIA_SYMBOLS = 1 << 13, + // safe to divide up the sections into sub-sections via symbols + // for dead code stripping. + MH_CANONICAL = 1 << 14, + // the binary has been canonicalized via the unprebind operation + MH_WEAK_DEFINES = 1 << 15, + // the final linked image contains external weak symbols + MH_BINDS_TO_WEAK = 1 << 16, + // the final linked image uses weak symbols + MH_ALLOW_STACK_EXECUTION = 1 << 17 + // When this bit is set, all stacks in the task will be given + // stack execution privilege. Only used in MH_EXECUTE filetype + }; + + MachOHeader() : magic(0), filetype(0), ncmds(0), sizeofcmds(0), flags(0), + reserved(0) { } + + /// cmdSize - This routine returns the size of the MachOSection as written + /// to disk, depending on whether the destination is a 64 bit Mach-O file. + unsigned cmdSize(bool is64Bit) const { + if (is64Bit) + return 8 * sizeof(uint32_t); + else + return 7 * sizeof(uint32_t); + } + + /// setMagic - This routine sets the appropriate value for the 'magic' + /// field based on pointer size and endianness. + void setMagic(bool isLittleEndian, bool is64Bit) { + if (isLittleEndian) + if (is64Bit) magic = 0xcffaedfe; + else magic = 0xcefaedfe; + else + if (is64Bit) magic = 0xfeedfacf; + else magic = 0xfeedface; + } + }; + + /// Header - An instance of MachOHeader that we will update while we build + /// the file, and then emit during finalization. + MachOHeader Header; + + /// MachOSegment - This struct contains the necessary information to + /// emit the load commands for each section in the file. + struct MachOSegment { + uint32_t cmd; // LC_SEGMENT or LC_SEGMENT_64 + uint32_t cmdsize; // Total size of this struct and section commands + std::string segname; // segment name + uint64_t vmaddr; // address of this segment + uint64_t vmsize; // size of this segment, may be larger than filesize + uint64_t fileoff; // offset in file + uint64_t filesize; // amount to read from file + uint32_t maxprot; // maximum VM protection + uint32_t initprot; // initial VM protection + uint32_t nsects; // number of sections in this segment + uint32_t flags; // flags + + // The following constants are getting pulled in by one of the + // system headers, which creates a neat clash with the enum. +#if !defined(VM_PROT_NONE) +#define VM_PROT_NONE 0x00 +#endif +#if !defined(VM_PROT_READ) +#define VM_PROT_READ 0x01 +#endif +#if !defined(VM_PROT_WRITE) +#define VM_PROT_WRITE 0x02 +#endif +#if !defined(VM_PROT_EXECUTE) +#define VM_PROT_EXECUTE 0x04 +#endif +#if !defined(VM_PROT_ALL) +#define VM_PROT_ALL 0x07 +#endif + + // Constants for the vm protection fields + // see + enum { SEG_VM_PROT_NONE = VM_PROT_NONE, + SEG_VM_PROT_READ = VM_PROT_READ, // read permission + SEG_VM_PROT_WRITE = VM_PROT_WRITE, // write permission + SEG_VM_PROT_EXECUTE = VM_PROT_EXECUTE, + SEG_VM_PROT_ALL = VM_PROT_ALL + }; + + // Constants for the cmd field + // see + enum { LC_SEGMENT = 0x01, // segment of this file to be mapped + LC_SEGMENT_64 = 0x19 // 64-bit segment of this file to be mapped + }; + + /// cmdSize - This routine returns the size of the MachOSection as written + /// to disk, depending on whether the destination is a 64 bit Mach-O file. + unsigned cmdSize(bool is64Bit) const { + if (is64Bit) + return 6 * sizeof(uint32_t) + 4 * sizeof(uint64_t) + 16; + else + return 10 * sizeof(uint32_t) + 16; // addresses only 32 bits + } + + MachOSegment(const std::string &seg, bool is64Bit) + : cmd(is64Bit ? LC_SEGMENT_64 : LC_SEGMENT), cmdsize(0), segname(seg), + vmaddr(0), vmsize(0), fileoff(0), filesize(0), maxprot(VM_PROT_ALL), + initprot(VM_PROT_ALL), nsects(0), flags(0) { } + }; + + /// MachOSection - This struct contains information about each section in a + /// particular segment that is emitted to the file. This is eventually + /// turned into the SectionCommand in the load command for a particlar + /// segment. + struct MachOSection { + std::string sectname; // name of this section, + std::string segname; // segment this section goes in + uint64_t addr; // memory address of this section + uint64_t size; // size in bytes of this section + uint32_t offset; // file offset of this section + uint32_t align; // section alignment (power of 2) + uint32_t reloff; // file offset of relocation entries + uint32_t nreloc; // number of relocation entries + uint32_t flags; // flags (section type and attributes) + uint32_t reserved1; // reserved (for offset or index) + uint32_t reserved2; // reserved (for count or sizeof) + uint32_t reserved3; // reserved (64 bit only) + + /// A unique number for this section, which will be used to match symbols + /// to the correct section. + uint32_t Index; + + /// SectionData - The actual data for this section which we are building + /// up for emission to the file. + DataBuffer SectionData; + + /// RelocBuffer - A buffer to hold the mach-o relocations before we write + /// them out at the appropriate location in the file. + DataBuffer RelocBuffer; + + /// Relocations - The relocations that we have encountered so far in this + /// section that we will need to convert to MachORelocation entries when + /// the file is written. + std::vector Relocations; + + // Constants for the section types (low 8 bits of flags field) + // see + enum { S_REGULAR = 0, + // regular section + S_ZEROFILL = 1, + // zero fill on demand section + S_CSTRING_LITERALS = 2, + // section with only literal C strings + S_4BYTE_LITERALS = 3, + // section with only 4 byte literals + S_8BYTE_LITERALS = 4, + // section with only 8 byte literals + S_LITERAL_POINTERS = 5, + // section with only pointers to literals + S_NON_LAZY_SYMBOL_POINTERS = 6, + // section with only non-lazy symbol pointers + S_LAZY_SYMBOL_POINTERS = 7, + // section with only lazy symbol pointers + S_SYMBOL_STUBS = 8, + // section with only symbol stubs + // byte size of stub in the reserved2 field + S_MOD_INIT_FUNC_POINTERS = 9, + // section with only function pointers for initialization + S_MOD_TERM_FUNC_POINTERS = 10, + // section with only function pointers for termination + S_COALESCED = 11, + // section contains symbols that are coalesced + S_GB_ZEROFILL = 12, + // zero fill on demand section (that can be larger than 4GB) + S_INTERPOSING = 13, + // section with only pairs of function pointers for interposing + S_16BYTE_LITERALS = 14 + // section with only 16 byte literals + }; + + // Constants for the section flags (high 24 bits of flags field) + // see + enum { S_ATTR_PURE_INSTRUCTIONS = 1 << 31, + // section contains only true machine instructions + S_ATTR_NO_TOC = 1 << 30, + // section contains coalesced symbols that are not to be in a + // ranlib table of contents + S_ATTR_STRIP_STATIC_SYMS = 1 << 29, + // ok to strip static symbols in this section in files with the + // MY_DYLDLINK flag + S_ATTR_NO_DEAD_STRIP = 1 << 28, + // no dead stripping + S_ATTR_LIVE_SUPPORT = 1 << 27, + // blocks are live if they reference live blocks + S_ATTR_SELF_MODIFYING_CODE = 1 << 26, + // used with i386 code stubs written on by dyld + S_ATTR_DEBUG = 1 << 25, + // a debug section + S_ATTR_SOME_INSTRUCTIONS = 1 << 10, + // section contains some machine instructions + S_ATTR_EXT_RELOC = 1 << 9, + // section has external relocation entries + S_ATTR_LOC_RELOC = 1 << 8 + // section has local relocation entries + }; + + /// cmdSize - This routine returns the size of the MachOSection as written + /// to disk, depending on whether the destination is a 64 bit Mach-O file. + unsigned cmdSize(bool is64Bit) const { + if (is64Bit) + return 7 * sizeof(uint32_t) + 2 * sizeof(uint64_t) + 32; + else + return 9 * sizeof(uint32_t) + 32; // addresses only 32 bits + } + + MachOSection(const std::string &seg, const std::string §) + : sectname(sect), segname(seg), addr(0), size(0), offset(0), align(2), + reloff(0), nreloc(0), flags(0), reserved1(0), reserved2(0), + reserved3(0) { } + }; + + private: + + /// SectionList - This is the list of sections that we have emitted to the + /// file. Once the file has been completely built, the segment load command + /// SectionCommands are constructed from this info. + std::vector SectionList; + + /// SectionLookup - This is a mapping from section name to SectionList entry + std::map SectionLookup; + + /// GVSection - This is a mapping from a GlobalValue to a MachOSection, + /// to aid in emitting relocations. + std::map GVSection; + + /// GVOffset - This is a mapping from a GlobalValue to an offset from the + /// start of the section in which the GV resides, to aid in emitting + /// relocations. + std::map GVOffset; + + /// getSection - Return the section with the specified name, creating a new + /// section if one does not already exist. + MachOSection *getSection(const std::string &seg, const std::string §, + unsigned Flags = 0) { + MachOSection *MOS = SectionLookup[seg+sect]; + if (MOS) return MOS; + + MOS = new MachOSection(seg, sect); + SectionList.push_back(MOS); + MOS->Index = SectionList.size(); + MOS->flags = MachOSection::S_REGULAR | Flags; + SectionLookup[seg+sect] = MOS; + return MOS; + } + MachOSection *getTextSection(bool isCode = true) { + if (isCode) + return getSection("__TEXT", "__text", + MachOSection::S_ATTR_PURE_INSTRUCTIONS | + MachOSection::S_ATTR_SOME_INSTRUCTIONS); + else + return getSection("__TEXT", "__text"); + } + MachOSection *getBSSSection() { + return getSection("__DATA", "__bss", MachOSection::S_ZEROFILL); + } + MachOSection *getDataSection() { + return getSection("__DATA", "__data"); + } + MachOSection *getConstSection(Constant *C) { + const ConstantArray *CVA = dyn_cast(C); + if (CVA && CVA->isCString()) + return getSection("__TEXT", "__cstring", + MachOSection::S_CSTRING_LITERALS); + + const Type *Ty = C->getType(); + if (Ty->isPrimitiveType() || Ty->isInteger()) { + unsigned Size = TM.getTargetData()->getTypeAllocSize(Ty); + switch(Size) { + default: break; // Fall through to __TEXT,__const + case 4: + return getSection("__TEXT", "__literal4", + MachOSection::S_4BYTE_LITERALS); + case 8: + return getSection("__TEXT", "__literal8", + MachOSection::S_8BYTE_LITERALS); + case 16: + return getSection("__TEXT", "__literal16", + MachOSection::S_16BYTE_LITERALS); + } + } + return getSection("__TEXT", "__const"); + } + MachOSection *getJumpTableSection() { + if (TM.getRelocationModel() == Reloc::PIC_) + return getTextSection(false); + else + return getSection("__TEXT", "__const"); + } + + /// MachOSymTab - This struct contains information about the offsets and + /// size of symbol table information. + /// segment. + struct MachOSymTab { + uint32_t cmd; // LC_SYMTAB + uint32_t cmdsize; // sizeof( MachOSymTab ) + uint32_t symoff; // symbol table offset + uint32_t nsyms; // number of symbol table entries + uint32_t stroff; // string table offset + uint32_t strsize; // string table size in bytes + + // Constants for the cmd field + // see + enum { LC_SYMTAB = 0x02 // link-edit stab symbol table info + }; + + MachOSymTab() : cmd(LC_SYMTAB), cmdsize(6 * sizeof(uint32_t)), symoff(0), + nsyms(0), stroff(0), strsize(0) { } + }; + + /// MachOSymTab - This struct contains information about the offsets and + /// size of symbol table information. + /// segment. + struct MachODySymTab { + uint32_t cmd; // LC_DYSYMTAB + uint32_t cmdsize; // sizeof( MachODySymTab ) + uint32_t ilocalsym; // index to local symbols + uint32_t nlocalsym; // number of local symbols + uint32_t iextdefsym; // index to externally defined symbols + uint32_t nextdefsym; // number of externally defined symbols + uint32_t iundefsym; // index to undefined symbols + uint32_t nundefsym; // number of undefined symbols + uint32_t tocoff; // file offset to table of contents + uint32_t ntoc; // number of entries in table of contents + uint32_t modtaboff; // file offset to module table + uint32_t nmodtab; // number of module table entries + uint32_t extrefsymoff; // offset to referenced symbol table + uint32_t nextrefsyms; // number of referenced symbol table entries + uint32_t indirectsymoff; // file offset to the indirect symbol table + uint32_t nindirectsyms; // number of indirect symbol table entries + uint32_t extreloff; // offset to external relocation entries + uint32_t nextrel; // number of external relocation entries + uint32_t locreloff; // offset to local relocation entries + uint32_t nlocrel; // number of local relocation entries + + // Constants for the cmd field + // see + enum { LC_DYSYMTAB = 0x0B // dynamic link-edit symbol table info + }; + + MachODySymTab() : cmd(LC_DYSYMTAB), cmdsize(20 * sizeof(uint32_t)), + ilocalsym(0), nlocalsym(0), iextdefsym(0), nextdefsym(0), + iundefsym(0), nundefsym(0), tocoff(0), ntoc(0), modtaboff(0), + nmodtab(0), extrefsymoff(0), nextrefsyms(0), indirectsymoff(0), + nindirectsyms(0), extreloff(0), nextrel(0), locreloff(0), nlocrel(0) { } + }; + + /// SymTab - The "stab" style symbol table information + MachOSymTab SymTab; + /// DySymTab - symbol table info for the dynamic link editor + MachODySymTab DySymTab; + + struct MachOSymCmp { + // FIXME: this does not appear to be sorting 'f' after 'F' + bool operator()(const MachOSym &LHS, const MachOSym &RHS) { + return LHS.GVName < RHS.GVName; + } + }; + + /// PartitionByLocal - Simple boolean predicate that returns true if Sym is + /// a local symbol rather than an external symbol. + static bool PartitionByLocal(const MachOSym &Sym); + + /// PartitionByDefined - Simple boolean predicate that returns true if Sym + /// is defined in this module. + static bool PartitionByDefined(const MachOSym &Sym); + + protected: + + /// SymbolTable - This is the list of symbols we have emitted to the file. + /// This actually gets rearranged before emission to the file (to put the + /// local symbols first in the list). + std::vector SymbolTable; + + /// SymT - A buffer to hold the symbol table before we write it out at the + /// appropriate location in the file. + DataBuffer SymT; + + /// StrT - A buffer to hold the string table before we write it out at the + /// appropriate location in the file. + DataBuffer StrT; + + /// PendingSyms - This is a list of externally defined symbols that we have + /// been asked to emit, but have not seen a reference to. When a reference + /// is seen, the symbol will move from this list to the SymbolTable. + std::vector PendingGlobals; + + /// DynamicSymbolTable - This is just a vector of indices into + /// SymbolTable to aid in emitting the DYSYMTAB load command. + std::vector DynamicSymbolTable; + + static void InitMem(const Constant *C, void *Addr, intptr_t Offset, + const TargetData *TD, + std::vector &MRs); + + private: + void AddSymbolToSection(MachOSection *MOS, GlobalVariable *GV); + void EmitGlobal(GlobalVariable *GV); + void EmitHeaderAndLoadCommands(); + void EmitSections(); + void BufferSymbolAndStringTable(); + void CalculateRelocations(MachOSection &MOS); + + MachineRelocation GetJTRelocation(unsigned Offset, + MachineBasicBlock *MBB) const { + return TM.getMachOWriterInfo()->GetJTRelocation(Offset, MBB); + } + + /// GetTargetRelocation - Returns the number of relocations. + unsigned GetTargetRelocation(MachineRelocation &MR, + unsigned FromIdx, + unsigned ToAddr, + unsigned ToIndex, + OutputBuffer &RelocOut, + OutputBuffer &SecOut, + bool Scattered, + bool Extern) { + return TM.getMachOWriterInfo()->GetTargetRelocation(MR, FromIdx, ToAddr, + ToIndex, RelocOut, + SecOut, Scattered, + Extern); + } + }; +} + +#endif diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp new file mode 100644 index 000000000000..71e6b3e4d0f8 --- /dev/null +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -0,0 +1,372 @@ +//===-- llvm/CodeGen/MachineBasicBlock.cpp ----------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Collect the sequence of machine instructions for a basic block. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/BasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetInstrDesc.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/LeakDetector.h" +#include +using namespace llvm; + +MachineBasicBlock::MachineBasicBlock(MachineFunction &mf, const BasicBlock *bb) + : BB(bb), Number(-1), xParent(&mf), Alignment(0), IsLandingPad(false) { + Insts.Parent = this; +} + +MachineBasicBlock::~MachineBasicBlock() { + LeakDetector::removeGarbageObject(this); +} + +std::ostream& llvm::operator<<(std::ostream &OS, const MachineBasicBlock &MBB) { + MBB.print(OS); + return OS; +} + +/// addNodeToList (MBB) - When an MBB is added to an MF, we need to update the +/// parent pointer of the MBB, the MBB numbering, and any instructions in the +/// MBB to be on the right operand list for registers. +/// +/// MBBs start out as #-1. When a MBB is added to a MachineFunction, it +/// gets the next available unique MBB number. If it is removed from a +/// MachineFunction, it goes back to being #-1. +void ilist_traits::addNodeToList(MachineBasicBlock* N) { + MachineFunction &MF = *N->getParent(); + N->Number = MF.addToMBBNumbering(N); + + // Make sure the instructions have their operands in the reginfo lists. + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + for (MachineBasicBlock::iterator I = N->begin(), E = N->end(); I != E; ++I) + I->AddRegOperandsToUseLists(RegInfo); + + LeakDetector::removeGarbageObject(N); +} + +void ilist_traits::removeNodeFromList(MachineBasicBlock* N) { + N->getParent()->removeFromMBBNumbering(N->Number); + N->Number = -1; + LeakDetector::addGarbageObject(N); +} + + +/// addNodeToList (MI) - When we add an instruction to a basic block +/// list, we update its parent pointer and add its operands from reg use/def +/// lists if appropriate. +void ilist_traits::addNodeToList(MachineInstr* N) { + assert(N->getParent() == 0 && "machine instruction already in a basic block"); + N->setParent(Parent); + + // Add the instruction's register operands to their corresponding + // use/def lists. + MachineFunction *MF = Parent->getParent(); + N->AddRegOperandsToUseLists(MF->getRegInfo()); + + LeakDetector::removeGarbageObject(N); +} + +/// removeNodeFromList (MI) - When we remove an instruction from a basic block +/// list, we update its parent pointer and remove its operands from reg use/def +/// lists if appropriate. +void ilist_traits::removeNodeFromList(MachineInstr* N) { + assert(N->getParent() != 0 && "machine instruction not in a basic block"); + + // Remove from the use/def lists. + N->RemoveRegOperandsFromUseLists(); + + N->setParent(0); + + LeakDetector::addGarbageObject(N); +} + +/// transferNodesFromList (MI) - When moving a range of instructions from one +/// MBB list to another, we need to update the parent pointers and the use/def +/// lists. +void ilist_traits::transferNodesFromList( + ilist_traits& fromList, + MachineBasicBlock::iterator first, + MachineBasicBlock::iterator last) { + assert(Parent->getParent() == fromList.Parent->getParent() && + "MachineInstr parent mismatch!"); + + // Splice within the same MBB -> no change. + if (Parent == fromList.Parent) return; + + // If splicing between two blocks within the same function, just update the + // parent pointers. + for (; first != last; ++first) + first->setParent(Parent); +} + +void ilist_traits::deleteNode(MachineInstr* MI) { + assert(!MI->getParent() && "MI is still in a block!"); + Parent->getParent()->DeleteMachineInstr(MI); +} + +MachineBasicBlock::iterator MachineBasicBlock::getFirstTerminator() { + iterator I = end(); + while (I != begin() && (--I)->getDesc().isTerminator()) + ; /*noop */ + if (I != end() && !I->getDesc().isTerminator()) ++I; + return I; +} + +bool +MachineBasicBlock::isOnlyReachableByFallthrough() const { + return !isLandingPad() && + !pred_empty() && + next(pred_begin()) == pred_end() && + (*pred_begin())->isLayoutSuccessor(this) && + ((*pred_begin())->empty() || + !(*pred_begin())->back().getDesc().isBarrier()); +} + +void MachineBasicBlock::dump() const { + print(*cerr.stream()); +} + +static inline void OutputReg(std::ostream &os, unsigned RegNo, + const TargetRegisterInfo *TRI = 0) { + if (!RegNo || TargetRegisterInfo::isPhysicalRegister(RegNo)) { + if (TRI) + os << " %" << TRI->get(RegNo).Name; + else + os << " %mreg(" << RegNo << ")"; + } else + os << " %reg" << RegNo; +} + +void MachineBasicBlock::print(std::ostream &OS) const { + const MachineFunction *MF = getParent(); + if(!MF) { + OS << "Can't print out MachineBasicBlock because parent MachineFunction" + << " is null\n"; + return; + } + + const BasicBlock *LBB = getBasicBlock(); + OS << "\n"; + if (LBB) OS << LBB->getName() << ": "; + OS << (const void*)this + << ", LLVM BB @" << (const void*) LBB << ", ID#" << getNumber(); + if (Alignment) OS << ", Alignment " << Alignment; + if (isLandingPad()) OS << ", EH LANDING PAD"; + OS << ":\n"; + + const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo(); + if (!livein_empty()) { + OS << "Live Ins:"; + for (const_livein_iterator I = livein_begin(),E = livein_end(); I != E; ++I) + OutputReg(OS, *I, TRI); + OS << "\n"; + } + // Print the preds of this block according to the CFG. + if (!pred_empty()) { + OS << " Predecessors according to CFG:"; + for (const_pred_iterator PI = pred_begin(), E = pred_end(); PI != E; ++PI) + OS << " " << *PI << " (#" << (*PI)->getNumber() << ")"; + OS << "\n"; + } + + for (const_iterator I = begin(); I != end(); ++I) { + OS << "\t"; + I->print(OS, &getParent()->getTarget()); + } + + // Print the successors of this block according to the CFG. + if (!succ_empty()) { + OS << " Successors according to CFG:"; + for (const_succ_iterator SI = succ_begin(), E = succ_end(); SI != E; ++SI) + OS << " " << *SI << " (#" << (*SI)->getNumber() << ")"; + OS << "\n"; + } +} + +void MachineBasicBlock::removeLiveIn(unsigned Reg) { + livein_iterator I = std::find(livein_begin(), livein_end(), Reg); + assert(I != livein_end() && "Not a live in!"); + LiveIns.erase(I); +} + +bool MachineBasicBlock::isLiveIn(unsigned Reg) const { + const_livein_iterator I = std::find(livein_begin(), livein_end(), Reg); + return I != livein_end(); +} + +void MachineBasicBlock::moveBefore(MachineBasicBlock *NewAfter) { + getParent()->splice(NewAfter, this); +} + +void MachineBasicBlock::moveAfter(MachineBasicBlock *NewBefore) { + MachineFunction::iterator BBI = NewBefore; + getParent()->splice(++BBI, this); +} + + +void MachineBasicBlock::addSuccessor(MachineBasicBlock *succ) { + Successors.push_back(succ); + succ->addPredecessor(this); +} + +void MachineBasicBlock::removeSuccessor(MachineBasicBlock *succ) { + succ->removePredecessor(this); + succ_iterator I = std::find(Successors.begin(), Successors.end(), succ); + assert(I != Successors.end() && "Not a current successor!"); + Successors.erase(I); +} + +MachineBasicBlock::succ_iterator +MachineBasicBlock::removeSuccessor(succ_iterator I) { + assert(I != Successors.end() && "Not a current successor!"); + (*I)->removePredecessor(this); + return Successors.erase(I); +} + +void MachineBasicBlock::addPredecessor(MachineBasicBlock *pred) { + Predecessors.push_back(pred); +} + +void MachineBasicBlock::removePredecessor(MachineBasicBlock *pred) { + std::vector::iterator I = + std::find(Predecessors.begin(), Predecessors.end(), pred); + assert(I != Predecessors.end() && "Pred is not a predecessor of this block!"); + Predecessors.erase(I); +} + +void MachineBasicBlock::transferSuccessors(MachineBasicBlock *fromMBB) +{ + if (this == fromMBB) + return; + + for(MachineBasicBlock::succ_iterator iter = fromMBB->succ_begin(), + end = fromMBB->succ_end(); iter != end; ++iter) { + addSuccessor(*iter); + } + while(!fromMBB->succ_empty()) + fromMBB->removeSuccessor(fromMBB->succ_begin()); +} + +bool MachineBasicBlock::isSuccessor(const MachineBasicBlock *MBB) const { + std::vector::const_iterator I = + std::find(Successors.begin(), Successors.end(), MBB); + return I != Successors.end(); +} + +bool MachineBasicBlock::isLayoutSuccessor(const MachineBasicBlock *MBB) const { + MachineFunction::const_iterator I(this); + return next(I) == MachineFunction::const_iterator(MBB); +} + +/// removeFromParent - This method unlinks 'this' from the containing function, +/// and returns it, but does not delete it. +MachineBasicBlock *MachineBasicBlock::removeFromParent() { + assert(getParent() && "Not embedded in a function!"); + getParent()->remove(this); + return this; +} + + +/// eraseFromParent - This method unlinks 'this' from the containing function, +/// and deletes it. +void MachineBasicBlock::eraseFromParent() { + assert(getParent() && "Not embedded in a function!"); + getParent()->erase(this); +} + + +/// ReplaceUsesOfBlockWith - Given a machine basic block that branched to +/// 'Old', change the code and CFG so that it branches to 'New' instead. +void MachineBasicBlock::ReplaceUsesOfBlockWith(MachineBasicBlock *Old, + MachineBasicBlock *New) { + assert(Old != New && "Cannot replace self with self!"); + + MachineBasicBlock::iterator I = end(); + while (I != begin()) { + --I; + if (!I->getDesc().isTerminator()) break; + + // Scan the operands of this machine instruction, replacing any uses of Old + // with New. + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) + if (I->getOperand(i).isMBB() && + I->getOperand(i).getMBB() == Old) + I->getOperand(i).setMBB(New); + } + + // Update the successor information. + removeSuccessor(Old); + addSuccessor(New); +} + +/// CorrectExtraCFGEdges - Various pieces of code can cause excess edges in the +/// CFG to be inserted. If we have proven that MBB can only branch to DestA and +/// DestB, remove any other MBB successors from the CFG. DestA and DestB can +/// be null. +/// Besides DestA and DestB, retain other edges leading to LandingPads +/// (currently there can be only one; we don't check or require that here). +/// Note it is possible that DestA and/or DestB are LandingPads. +bool MachineBasicBlock::CorrectExtraCFGEdges(MachineBasicBlock *DestA, + MachineBasicBlock *DestB, + bool isCond) { + bool MadeChange = false; + bool AddedFallThrough = false; + + MachineFunction::iterator FallThru = next(MachineFunction::iterator(this)); + + // If this block ends with a conditional branch that falls through to its + // successor, set DestB as the successor. + if (isCond) { + if (DestB == 0 && FallThru != getParent()->end()) { + DestB = FallThru; + AddedFallThrough = true; + } + } else { + // If this is an unconditional branch with no explicit dest, it must just be + // a fallthrough into DestB. + if (DestA == 0 && FallThru != getParent()->end()) { + DestA = FallThru; + AddedFallThrough = true; + } + } + + MachineBasicBlock::succ_iterator SI = succ_begin(); + MachineBasicBlock *OrigDestA = DestA, *OrigDestB = DestB; + while (SI != succ_end()) { + if (*SI == DestA && DestA == DestB) { + DestA = DestB = 0; + ++SI; + } else if (*SI == DestA) { + DestA = 0; + ++SI; + } else if (*SI == DestB) { + DestB = 0; + ++SI; + } else if ((*SI)->isLandingPad() && + *SI!=OrigDestA && *SI!=OrigDestB) { + ++SI; + } else { + // Otherwise, this is a superfluous edge, remove it. + SI = removeSuccessor(SI); + MadeChange = true; + } + } + if (!AddedFallThrough) { + assert(DestA == 0 && DestB == 0 && + "MachineCFG is missing edges!"); + } else if (isCond) { + assert(DestA == 0 && "MachineCFG is missing edges!"); + } + return MadeChange; +} diff --git a/lib/CodeGen/MachineDominators.cpp b/lib/CodeGen/MachineDominators.cpp new file mode 100644 index 000000000000..37c86019d4a2 --- /dev/null +++ b/lib/CodeGen/MachineDominators.cpp @@ -0,0 +1,53 @@ +//===- MachineDominators.cpp - Machine Dominator Calculation --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements simple dominator construction algorithms for finding +// forward dominators on machine functions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/Passes.h" + +using namespace llvm; + +TEMPLATE_INSTANTIATION(class DomTreeNodeBase); +TEMPLATE_INSTANTIATION(class DominatorTreeBase); + +char MachineDominatorTree::ID = 0; + +static RegisterPass +E("machinedomtree", "MachineDominator Tree Construction", true); + +const PassInfo *const llvm::MachineDominatorsID = &E; + +void MachineDominatorTree::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +bool MachineDominatorTree::runOnMachineFunction(MachineFunction &F) { + DT->recalculate(F); + + return false; +} + +MachineDominatorTree::MachineDominatorTree() + : MachineFunctionPass(&ID) { + DT = new DominatorTreeBase(false); +} + +MachineDominatorTree::~MachineDominatorTree() { + DT->releaseMemory(); + delete DT; +} + +void MachineDominatorTree::releaseMemory() { + DT->releaseMemory(); +} diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp new file mode 100644 index 000000000000..cacfed1d9f7b --- /dev/null +++ b/lib/CodeGen/MachineFunction.cpp @@ -0,0 +1,598 @@ +//===-- MachineFunction.cpp -----------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Collect native machine code information for a function. This allows +// target-specific information about the generated code to be stored with each +// function. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DerivedTypes.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Function.h" +#include "llvm/Instructions.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/GraphWriter.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Config/config.h" +#include +#include +using namespace llvm; + +bool MachineFunctionPass::runOnFunction(Function &F) { + // Do not codegen any 'available_externally' functions at all, they have + // definitions outside the translation unit. + if (F.hasAvailableExternallyLinkage()) + return false; + + return runOnMachineFunction(MachineFunction::get(&F)); +} + +namespace { + struct VISIBILITY_HIDDEN Printer : public MachineFunctionPass { + static char ID; + + std::ostream *OS; + const std::string Banner; + + Printer (std::ostream *os, const std::string &banner) + : MachineFunctionPass(&ID), OS(os), Banner(banner) {} + + const char *getPassName() const { return "MachineFunction Printer"; } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + } + + bool runOnMachineFunction(MachineFunction &MF) { + (*OS) << Banner; + MF.print (*OS); + return false; + } + }; + char Printer::ID = 0; +} + +/// Returns a newly-created MachineFunction Printer pass. The default output +/// stream is std::cerr; the default banner is empty. +/// +FunctionPass *llvm::createMachineFunctionPrinterPass(std::ostream *OS, + const std::string &Banner){ + return new Printer(OS, Banner); +} + +namespace { + struct VISIBILITY_HIDDEN Deleter : public MachineFunctionPass { + static char ID; + Deleter() : MachineFunctionPass(&ID) {} + + const char *getPassName() const { return "Machine Code Deleter"; } + + bool runOnMachineFunction(MachineFunction &MF) { + // Delete the annotation from the function now. + MachineFunction::destruct(MF.getFunction()); + return true; + } + }; + char Deleter::ID = 0; +} + +/// MachineCodeDeletion Pass - This pass deletes all of the machine code for +/// the current function, which should happen after the function has been +/// emitted to a .s file or to memory. +FunctionPass *llvm::createMachineCodeDeleter() { + return new Deleter(); +} + + + +//===---------------------------------------------------------------------===// +// MachineFunction implementation +//===---------------------------------------------------------------------===// + +void ilist_traits::deleteNode(MachineBasicBlock *MBB) { + MBB->getParent()->DeleteMachineBasicBlock(MBB); +} + +MachineFunction::MachineFunction(const Function *F, + const TargetMachine &TM) + : Annotation(AnnotationManager::getID("CodeGen::MachineCodeForFunction")), + Fn(F), Target(TM) { + if (TM.getRegisterInfo()) + RegInfo = new (Allocator.Allocate()) + MachineRegisterInfo(*TM.getRegisterInfo()); + else + RegInfo = 0; + MFInfo = 0; + FrameInfo = new (Allocator.Allocate()) + MachineFrameInfo(*TM.getFrameInfo()); + ConstantPool = new (Allocator.Allocate()) + MachineConstantPool(TM.getTargetData()); + + // Set up jump table. + const TargetData &TD = *TM.getTargetData(); + bool IsPic = TM.getRelocationModel() == Reloc::PIC_; + unsigned EntrySize = IsPic ? 4 : TD.getPointerSize(); + unsigned Alignment = IsPic ? TD.getABITypeAlignment(Type::Int32Ty) + : TD.getPointerABIAlignment(); + JumpTableInfo = new (Allocator.Allocate()) + MachineJumpTableInfo(EntrySize, Alignment); +} + +MachineFunction::~MachineFunction() { + BasicBlocks.clear(); + InstructionRecycler.clear(Allocator); + BasicBlockRecycler.clear(Allocator); + if (RegInfo) + RegInfo->~MachineRegisterInfo(); Allocator.Deallocate(RegInfo); + if (MFInfo) { + MFInfo->~MachineFunctionInfo(); Allocator.Deallocate(MFInfo); + } + FrameInfo->~MachineFrameInfo(); Allocator.Deallocate(FrameInfo); + ConstantPool->~MachineConstantPool(); Allocator.Deallocate(ConstantPool); + JumpTableInfo->~MachineJumpTableInfo(); Allocator.Deallocate(JumpTableInfo); +} + + +/// RenumberBlocks - This discards all of the MachineBasicBlock numbers and +/// recomputes them. This guarantees that the MBB numbers are sequential, +/// dense, and match the ordering of the blocks within the function. If a +/// specific MachineBasicBlock is specified, only that block and those after +/// it are renumbered. +void MachineFunction::RenumberBlocks(MachineBasicBlock *MBB) { + if (empty()) { MBBNumbering.clear(); return; } + MachineFunction::iterator MBBI, E = end(); + if (MBB == 0) + MBBI = begin(); + else + MBBI = MBB; + + // Figure out the block number this should have. + unsigned BlockNo = 0; + if (MBBI != begin()) + BlockNo = prior(MBBI)->getNumber()+1; + + for (; MBBI != E; ++MBBI, ++BlockNo) { + if (MBBI->getNumber() != (int)BlockNo) { + // Remove use of the old number. + if (MBBI->getNumber() != -1) { + assert(MBBNumbering[MBBI->getNumber()] == &*MBBI && + "MBB number mismatch!"); + MBBNumbering[MBBI->getNumber()] = 0; + } + + // If BlockNo is already taken, set that block's number to -1. + if (MBBNumbering[BlockNo]) + MBBNumbering[BlockNo]->setNumber(-1); + + MBBNumbering[BlockNo] = MBBI; + MBBI->setNumber(BlockNo); + } + } + + // Okay, all the blocks are renumbered. If we have compactified the block + // numbering, shrink MBBNumbering now. + assert(BlockNo <= MBBNumbering.size() && "Mismatch!"); + MBBNumbering.resize(BlockNo); +} + +/// CreateMachineInstr - Allocate a new MachineInstr. Use this instead +/// of `new MachineInstr'. +/// +MachineInstr * +MachineFunction::CreateMachineInstr(const TargetInstrDesc &TID, + DebugLoc DL, bool NoImp) { + return new (InstructionRecycler.Allocate(Allocator)) + MachineInstr(TID, DL, NoImp); +} + +/// CloneMachineInstr - Create a new MachineInstr which is a copy of the +/// 'Orig' instruction, identical in all ways except the the instruction +/// has no parent, prev, or next. +/// +MachineInstr * +MachineFunction::CloneMachineInstr(const MachineInstr *Orig) { + return new (InstructionRecycler.Allocate(Allocator)) + MachineInstr(*this, *Orig); +} + +/// DeleteMachineInstr - Delete the given MachineInstr. +/// +void +MachineFunction::DeleteMachineInstr(MachineInstr *MI) { + // Clear the instructions memoperands. This must be done manually because + // the instruction's parent pointer is now null, so it can't properly + // deallocate them on its own. + MI->clearMemOperands(*this); + + MI->~MachineInstr(); + InstructionRecycler.Deallocate(Allocator, MI); +} + +/// CreateMachineBasicBlock - Allocate a new MachineBasicBlock. Use this +/// instead of `new MachineBasicBlock'. +/// +MachineBasicBlock * +MachineFunction::CreateMachineBasicBlock(const BasicBlock *bb) { + return new (BasicBlockRecycler.Allocate(Allocator)) + MachineBasicBlock(*this, bb); +} + +/// DeleteMachineBasicBlock - Delete the given MachineBasicBlock. +/// +void +MachineFunction::DeleteMachineBasicBlock(MachineBasicBlock *MBB) { + assert(MBB->getParent() == this && "MBB parent mismatch!"); + MBB->~MachineBasicBlock(); + BasicBlockRecycler.Deallocate(Allocator, MBB); +} + +void MachineFunction::dump() const { + print(*cerr.stream()); +} + +void MachineFunction::print(std::ostream &OS) const { + OS << "# Machine code for " << Fn->getName () << "():\n"; + + // Print Frame Information + FrameInfo->print(*this, OS); + + // Print JumpTable Information + JumpTableInfo->print(OS); + + // Print Constant Pool + { + raw_os_ostream OSS(OS); + ConstantPool->print(OSS); + } + + const TargetRegisterInfo *TRI = getTarget().getRegisterInfo(); + + if (RegInfo && !RegInfo->livein_empty()) { + OS << "Live Ins:"; + for (MachineRegisterInfo::livein_iterator + I = RegInfo->livein_begin(), E = RegInfo->livein_end(); I != E; ++I) { + if (TRI) + OS << " " << TRI->getName(I->first); + else + OS << " Reg #" << I->first; + + if (I->second) + OS << " in VR#" << I->second << " "; + } + OS << "\n"; + } + if (RegInfo && !RegInfo->liveout_empty()) { + OS << "Live Outs:"; + for (MachineRegisterInfo::liveout_iterator + I = RegInfo->liveout_begin(), E = RegInfo->liveout_end(); I != E; ++I) + if (TRI) + OS << " " << TRI->getName(*I); + else + OS << " Reg #" << *I; + OS << "\n"; + } + + for (const_iterator BB = begin(); BB != end(); ++BB) + BB->print(OS); + + OS << "\n# End machine code for " << Fn->getName () << "().\n\n"; +} + +/// CFGOnly flag - This is used to control whether or not the CFG graph printer +/// prints out the contents of basic blocks or not. This is acceptable because +/// this code is only really used for debugging purposes. +/// +static bool CFGOnly = false; + +namespace llvm { + template<> + struct DOTGraphTraits : public DefaultDOTGraphTraits { + static std::string getGraphName(const MachineFunction *F) { + return "CFG for '" + F->getFunction()->getName() + "' function"; + } + + static std::string getNodeLabel(const MachineBasicBlock *Node, + const MachineFunction *Graph) { + if (CFGOnly && Node->getBasicBlock() && + !Node->getBasicBlock()->getName().empty()) + return Node->getBasicBlock()->getName() + ":"; + + std::ostringstream Out; + if (CFGOnly) { + Out << Node->getNumber() << ':'; + return Out.str(); + } + + Node->print(Out); + + std::string OutStr = Out.str(); + if (OutStr[0] == '\n') OutStr.erase(OutStr.begin()); + + // Process string output to make it nicer... + for (unsigned i = 0; i != OutStr.length(); ++i) + if (OutStr[i] == '\n') { // Left justify + OutStr[i] = '\\'; + OutStr.insert(OutStr.begin()+i+1, 'l'); + } + return OutStr; + } + }; +} + +void MachineFunction::viewCFG() const +{ +#ifndef NDEBUG + ViewGraph(this, "mf" + getFunction()->getName()); +#else + cerr << "SelectionDAG::viewGraph is only available in debug builds on " + << "systems with Graphviz or gv!\n"; +#endif // NDEBUG +} + +void MachineFunction::viewCFGOnly() const +{ + CFGOnly = true; + viewCFG(); + CFGOnly = false; +} + +// The next two methods are used to construct and to retrieve +// the MachineCodeForFunction object for the given function. +// construct() -- Allocates and initializes for a given function and target +// get() -- Returns a handle to the object. +// This should not be called before "construct()" +// for a given Function. +// +MachineFunction& +MachineFunction::construct(const Function *Fn, const TargetMachine &Tar) +{ + AnnotationID MF_AID = + AnnotationManager::getID("CodeGen::MachineCodeForFunction"); + assert(Fn->getAnnotation(MF_AID) == 0 && + "Object already exists for this function!"); + MachineFunction* mcInfo = new MachineFunction(Fn, Tar); + Fn->addAnnotation(mcInfo); + return *mcInfo; +} + +void MachineFunction::destruct(const Function *Fn) { + AnnotationID MF_AID = + AnnotationManager::getID("CodeGen::MachineCodeForFunction"); + bool Deleted = Fn->deleteAnnotation(MF_AID); + assert(Deleted && "Machine code did not exist for function!"); + Deleted = Deleted; // silence warning when no assertions. +} + +MachineFunction& MachineFunction::get(const Function *F) +{ + AnnotationID MF_AID = + AnnotationManager::getID("CodeGen::MachineCodeForFunction"); + MachineFunction *mc = (MachineFunction*)F->getAnnotation(MF_AID); + assert(mc && "Call construct() method first to allocate the object"); + return *mc; +} + +/// addLiveIn - Add the specified physical register as a live-in value and +/// create a corresponding virtual register for it. +unsigned MachineFunction::addLiveIn(unsigned PReg, + const TargetRegisterClass *RC) { + assert(RC->contains(PReg) && "Not the correct regclass!"); + unsigned VReg = getRegInfo().createVirtualRegister(RC); + getRegInfo().addLiveIn(PReg, VReg); + return VReg; +} + +/// getOrCreateDebugLocID - Look up the DebugLocTuple index with the given +/// source file, line, and column. If none currently exists, create a new +/// DebugLocTuple, and insert it into the DebugIdMap. +unsigned MachineFunction::getOrCreateDebugLocID(GlobalVariable *CompileUnit, + unsigned Line, unsigned Col) { + DebugLocTuple Tuple(CompileUnit, Line, Col); + DenseMap::iterator II + = DebugLocInfo.DebugIdMap.find(Tuple); + if (II != DebugLocInfo.DebugIdMap.end()) + return II->second; + // Add a new tuple. + unsigned Id = DebugLocInfo.DebugLocations.size(); + DebugLocInfo.DebugLocations.push_back(Tuple); + DebugLocInfo.DebugIdMap[Tuple] = Id; + return Id; +} + +/// getDebugLocTuple - Get the DebugLocTuple for a given DebugLoc object. +DebugLocTuple MachineFunction::getDebugLocTuple(DebugLoc DL) const { + unsigned Idx = DL.getIndex(); + assert(Idx < DebugLocInfo.DebugLocations.size() && + "Invalid index into debug locations!"); + return DebugLocInfo.DebugLocations[Idx]; +} + +//===----------------------------------------------------------------------===// +// MachineFrameInfo implementation +//===----------------------------------------------------------------------===// + +/// CreateFixedObject - Create a new object at a fixed location on the stack. +/// All fixed objects should be created before other objects are created for +/// efficiency. By default, fixed objects are immutable. This returns an +/// index with a negative value. +/// +int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset, + bool Immutable) { + assert(Size != 0 && "Cannot allocate zero size fixed stack objects!"); + Objects.insert(Objects.begin(), StackObject(Size, 1, SPOffset, Immutable)); + return -++NumFixedObjects; +} + + +void MachineFrameInfo::print(const MachineFunction &MF, std::ostream &OS) const{ + const TargetFrameInfo *FI = MF.getTarget().getFrameInfo(); + int ValOffset = (FI ? FI->getOffsetOfLocalArea() : 0); + + for (unsigned i = 0, e = Objects.size(); i != e; ++i) { + const StackObject &SO = Objects[i]; + OS << " : "; + if (SO.Size == ~0ULL) { + OS << "dead\n"; + continue; + } + if (SO.Size == 0) + OS << "variable sized"; + else + OS << "size is " << SO.Size << " byte" << (SO.Size != 1 ? "s," : ","); + OS << " alignment is " << SO.Alignment << " byte" + << (SO.Alignment != 1 ? "s," : ","); + + if (i < NumFixedObjects) + OS << " fixed"; + if (i < NumFixedObjects || SO.SPOffset != -1) { + int64_t Off = SO.SPOffset - ValOffset; + OS << " at location [SP"; + if (Off > 0) + OS << "+" << Off; + else if (Off < 0) + OS << Off; + OS << "]"; + } + OS << "\n"; + } + + if (HasVarSizedObjects) + OS << " Stack frame contains variable sized objects\n"; +} + +void MachineFrameInfo::dump(const MachineFunction &MF) const { + print(MF, *cerr.stream()); +} + + +//===----------------------------------------------------------------------===// +// MachineJumpTableInfo implementation +//===----------------------------------------------------------------------===// + +/// getJumpTableIndex - Create a new jump table entry in the jump table info +/// or return an existing one. +/// +unsigned MachineJumpTableInfo::getJumpTableIndex( + const std::vector &DestBBs) { + assert(!DestBBs.empty() && "Cannot create an empty jump table!"); + for (unsigned i = 0, e = JumpTables.size(); i != e; ++i) + if (JumpTables[i].MBBs == DestBBs) + return i; + + JumpTables.push_back(MachineJumpTableEntry(DestBBs)); + return JumpTables.size()-1; +} + +/// ReplaceMBBInJumpTables - If Old is the target of any jump tables, update +/// the jump tables to branch to New instead. +bool +MachineJumpTableInfo::ReplaceMBBInJumpTables(MachineBasicBlock *Old, + MachineBasicBlock *New) { + assert(Old != New && "Not making a change?"); + bool MadeChange = false; + for (size_t i = 0, e = JumpTables.size(); i != e; ++i) { + MachineJumpTableEntry &JTE = JumpTables[i]; + for (size_t j = 0, e = JTE.MBBs.size(); j != e; ++j) + if (JTE.MBBs[j] == Old) { + JTE.MBBs[j] = New; + MadeChange = true; + } + } + return MadeChange; +} + +void MachineJumpTableInfo::print(std::ostream &OS) const { + // FIXME: this is lame, maybe we could print out the MBB numbers or something + // like {1, 2, 4, 5, 3, 0} + for (unsigned i = 0, e = JumpTables.size(); i != e; ++i) { + OS << " has " << JumpTables[i].MBBs.size() + << " entries\n"; + } +} + +void MachineJumpTableInfo::dump() const { print(*cerr.stream()); } + + +//===----------------------------------------------------------------------===// +// MachineConstantPool implementation +//===----------------------------------------------------------------------===// + +const Type *MachineConstantPoolEntry::getType() const { + if (isMachineConstantPoolEntry()) + return Val.MachineCPVal->getType(); + return Val.ConstVal->getType(); +} + +MachineConstantPool::~MachineConstantPool() { + for (unsigned i = 0, e = Constants.size(); i != e; ++i) + if (Constants[i].isMachineConstantPoolEntry()) + delete Constants[i].Val.MachineCPVal; +} + +/// getConstantPoolIndex - Create a new entry in the constant pool or return +/// an existing one. User must specify the log2 of the minimum required +/// alignment for the object. +/// +unsigned MachineConstantPool::getConstantPoolIndex(Constant *C, + unsigned Alignment) { + assert(Alignment && "Alignment must be specified!"); + if (Alignment > PoolAlignment) PoolAlignment = Alignment; + + // Check to see if we already have this constant. + // + // FIXME, this could be made much more efficient for large constant pools. + for (unsigned i = 0, e = Constants.size(); i != e; ++i) + if (Constants[i].Val.ConstVal == C && + (Constants[i].getAlignment() & (Alignment - 1)) == 0) + return i; + + Constants.push_back(MachineConstantPoolEntry(C, Alignment)); + return Constants.size()-1; +} + +unsigned MachineConstantPool::getConstantPoolIndex(MachineConstantPoolValue *V, + unsigned Alignment) { + assert(Alignment && "Alignment must be specified!"); + if (Alignment > PoolAlignment) PoolAlignment = Alignment; + + // Check to see if we already have this constant. + // + // FIXME, this could be made much more efficient for large constant pools. + int Idx = V->getExistingMachineCPValue(this, Alignment); + if (Idx != -1) + return (unsigned)Idx; + + Constants.push_back(MachineConstantPoolEntry(V, Alignment)); + return Constants.size()-1; +} + +void MachineConstantPool::print(raw_ostream &OS) const { + for (unsigned i = 0, e = Constants.size(); i != e; ++i) { + OS << " is"; + if (Constants[i].isMachineConstantPoolEntry()) + Constants[i].Val.MachineCPVal->print(OS); + else + OS << *(Value*)Constants[i].Val.ConstVal; + OS << " , alignment=" << Constants[i].getAlignment(); + OS << "\n"; + } +} + +void MachineConstantPool::dump() const { print(errs()); } diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp new file mode 100644 index 000000000000..b8c8563eab45 --- /dev/null +++ b/lib/CodeGen/MachineInstr.cpp @@ -0,0 +1,1105 @@ +//===-- lib/CodeGen/MachineInstr.cpp --------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Methods common to all machine instructions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/Constants.h" +#include "llvm/InlineAsm.h" +#include "llvm/Value.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetInstrDesc.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Analysis/DebugInfo.h" +#include "llvm/Support/LeakDetector.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/Streams.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/FoldingSet.h" +#include +using namespace llvm; + +//===----------------------------------------------------------------------===// +// MachineOperand Implementation +//===----------------------------------------------------------------------===// + +/// AddRegOperandToRegInfo - Add this register operand to the specified +/// MachineRegisterInfo. If it is null, then the next/prev fields should be +/// explicitly nulled out. +void MachineOperand::AddRegOperandToRegInfo(MachineRegisterInfo *RegInfo) { + assert(isReg() && "Can only add reg operand to use lists"); + + // If the reginfo pointer is null, just explicitly null out or next/prev + // pointers, to ensure they are not garbage. + if (RegInfo == 0) { + Contents.Reg.Prev = 0; + Contents.Reg.Next = 0; + return; + } + + // Otherwise, add this operand to the head of the registers use/def list. + MachineOperand **Head = &RegInfo->getRegUseDefListHead(getReg()); + + // For SSA values, we prefer to keep the definition at the start of the list. + // we do this by skipping over the definition if it is at the head of the + // list. + if (*Head && (*Head)->isDef()) + Head = &(*Head)->Contents.Reg.Next; + + Contents.Reg.Next = *Head; + if (Contents.Reg.Next) { + assert(getReg() == Contents.Reg.Next->getReg() && + "Different regs on the same list!"); + Contents.Reg.Next->Contents.Reg.Prev = &Contents.Reg.Next; + } + + Contents.Reg.Prev = Head; + *Head = this; +} + +/// RemoveRegOperandFromRegInfo - Remove this register operand from the +/// MachineRegisterInfo it is linked with. +void MachineOperand::RemoveRegOperandFromRegInfo() { + assert(isOnRegUseList() && "Reg operand is not on a use list"); + // Unlink this from the doubly linked list of operands. + MachineOperand *NextOp = Contents.Reg.Next; + *Contents.Reg.Prev = NextOp; + if (NextOp) { + assert(NextOp->getReg() == getReg() && "Corrupt reg use/def chain!"); + NextOp->Contents.Reg.Prev = Contents.Reg.Prev; + } + Contents.Reg.Prev = 0; + Contents.Reg.Next = 0; +} + +void MachineOperand::setReg(unsigned Reg) { + if (getReg() == Reg) return; // No change. + + // Otherwise, we have to change the register. If this operand is embedded + // into a machine function, we need to update the old and new register's + // use/def lists. + if (MachineInstr *MI = getParent()) + if (MachineBasicBlock *MBB = MI->getParent()) + if (MachineFunction *MF = MBB->getParent()) { + RemoveRegOperandFromRegInfo(); + Contents.Reg.RegNo = Reg; + AddRegOperandToRegInfo(&MF->getRegInfo()); + return; + } + + // Otherwise, just change the register, no problem. :) + Contents.Reg.RegNo = Reg; +} + +/// ChangeToImmediate - Replace this operand with a new immediate operand of +/// the specified value. If an operand is known to be an immediate already, +/// the setImm method should be used. +void MachineOperand::ChangeToImmediate(int64_t ImmVal) { + // If this operand is currently a register operand, and if this is in a + // function, deregister the operand from the register's use/def list. + if (isReg() && getParent() && getParent()->getParent() && + getParent()->getParent()->getParent()) + RemoveRegOperandFromRegInfo(); + + OpKind = MO_Immediate; + Contents.ImmVal = ImmVal; +} + +/// ChangeToRegister - Replace this operand with a new register operand of +/// the specified value. If an operand is known to be an register already, +/// the setReg method should be used. +void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp, + bool isKill, bool isDead) { + // If this operand is already a register operand, use setReg to update the + // register's use/def lists. + if (isReg()) { + assert(!isEarlyClobber()); + setReg(Reg); + } else { + // Otherwise, change this to a register and set the reg#. + OpKind = MO_Register; + Contents.Reg.RegNo = Reg; + + // If this operand is embedded in a function, add the operand to the + // register's use/def list. + if (MachineInstr *MI = getParent()) + if (MachineBasicBlock *MBB = MI->getParent()) + if (MachineFunction *MF = MBB->getParent()) + AddRegOperandToRegInfo(&MF->getRegInfo()); + } + + IsDef = isDef; + IsImp = isImp; + IsKill = isKill; + IsDead = isDead; + IsEarlyClobber = false; + SubReg = 0; +} + +/// isIdenticalTo - Return true if this operand is identical to the specified +/// operand. +bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const { + if (getType() != Other.getType()) return false; + + switch (getType()) { + default: assert(0 && "Unrecognized operand type"); + case MachineOperand::MO_Register: + return getReg() == Other.getReg() && isDef() == Other.isDef() && + getSubReg() == Other.getSubReg(); + case MachineOperand::MO_Immediate: + return getImm() == Other.getImm(); + case MachineOperand::MO_FPImmediate: + return getFPImm() == Other.getFPImm(); + case MachineOperand::MO_MachineBasicBlock: + return getMBB() == Other.getMBB(); + case MachineOperand::MO_FrameIndex: + return getIndex() == Other.getIndex(); + case MachineOperand::MO_ConstantPoolIndex: + return getIndex() == Other.getIndex() && getOffset() == Other.getOffset(); + case MachineOperand::MO_JumpTableIndex: + return getIndex() == Other.getIndex(); + case MachineOperand::MO_GlobalAddress: + return getGlobal() == Other.getGlobal() && getOffset() == Other.getOffset(); + case MachineOperand::MO_ExternalSymbol: + return !strcmp(getSymbolName(), Other.getSymbolName()) && + getOffset() == Other.getOffset(); + } +} + +/// print - Print the specified machine operand. +/// +void MachineOperand::print(std::ostream &OS, const TargetMachine *TM) const { + raw_os_ostream RawOS(OS); + print(RawOS, TM); +} + +void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const { + switch (getType()) { + case MachineOperand::MO_Register: + if (getReg() == 0 || TargetRegisterInfo::isVirtualRegister(getReg())) { + OS << "%reg" << getReg(); + } else { + // If the instruction is embedded into a basic block, we can find the + // target info for the instruction. + if (TM == 0) + if (const MachineInstr *MI = getParent()) + if (const MachineBasicBlock *MBB = MI->getParent()) + if (const MachineFunction *MF = MBB->getParent()) + TM = &MF->getTarget(); + + if (TM) + OS << "%" << TM->getRegisterInfo()->get(getReg()).Name; + else + OS << "%mreg" << getReg(); + } + + if (getSubReg() != 0) { + OS << ":" << getSubReg(); + } + + if (isDef() || isKill() || isDead() || isImplicit() || isEarlyClobber()) { + OS << "<"; + bool NeedComma = false; + if (isImplicit()) { + if (NeedComma) OS << ","; + OS << (isDef() ? "imp-def" : "imp-use"); + NeedComma = true; + } else if (isDef()) { + if (NeedComma) OS << ","; + if (isEarlyClobber()) + OS << "earlyclobber,"; + OS << "def"; + NeedComma = true; + } + if (isKill() || isDead()) { + if (NeedComma) OS << ","; + if (isKill()) OS << "kill"; + if (isDead()) OS << "dead"; + } + OS << ">"; + } + break; + case MachineOperand::MO_Immediate: + OS << getImm(); + break; + case MachineOperand::MO_FPImmediate: + if (getFPImm()->getType() == Type::FloatTy) { + OS << getFPImm()->getValueAPF().convertToFloat(); + } else { + OS << getFPImm()->getValueAPF().convertToDouble(); + } + break; + case MachineOperand::MO_MachineBasicBlock: + OS << "mbb<" + << ((Value*)getMBB()->getBasicBlock())->getName() + << "," << (void*)getMBB() << ">"; + break; + case MachineOperand::MO_FrameIndex: + OS << ""; + break; + case MachineOperand::MO_ConstantPoolIndex: + OS << ""; + break; + case MachineOperand::MO_JumpTableIndex: + OS << ""; + break; + case MachineOperand::MO_GlobalAddress: + OS << "getName(); + if (getOffset()) OS << "+" << getOffset(); + OS << ">"; + break; + case MachineOperand::MO_ExternalSymbol: + OS << ""; + break; + default: + assert(0 && "Unrecognized operand type"); + } +} + +//===----------------------------------------------------------------------===// +// MachineMemOperand Implementation +//===----------------------------------------------------------------------===// + +MachineMemOperand::MachineMemOperand(const Value *v, unsigned int f, + int64_t o, uint64_t s, unsigned int a) + : Offset(o), Size(s), V(v), + Flags((f & 7) | ((Log2_32(a) + 1) << 3)) { + assert(isPowerOf2_32(a) && "Alignment is not a power of 2!"); + assert((isLoad() || isStore()) && "Not a load/store!"); +} + +/// Profile - Gather unique data for the object. +/// +void MachineMemOperand::Profile(FoldingSetNodeID &ID) const { + ID.AddInteger(Offset); + ID.AddInteger(Size); + ID.AddPointer(V); + ID.AddInteger(Flags); +} + +//===----------------------------------------------------------------------===// +// MachineInstr Implementation +//===----------------------------------------------------------------------===// + +/// MachineInstr ctor - This constructor creates a dummy MachineInstr with +/// TID NULL and no operands. +MachineInstr::MachineInstr() + : TID(0), NumImplicitOps(0), Parent(0), debugLoc(DebugLoc::getUnknownLoc()) { + // Make sure that we get added to a machine basicblock + LeakDetector::addGarbageObject(this); +} + +void MachineInstr::addImplicitDefUseOperands() { + if (TID->ImplicitDefs) + for (const unsigned *ImpDefs = TID->ImplicitDefs; *ImpDefs; ++ImpDefs) + addOperand(MachineOperand::CreateReg(*ImpDefs, true, true)); + if (TID->ImplicitUses) + for (const unsigned *ImpUses = TID->ImplicitUses; *ImpUses; ++ImpUses) + addOperand(MachineOperand::CreateReg(*ImpUses, false, true)); +} + +/// MachineInstr ctor - This constructor create a MachineInstr and add the +/// implicit operands. It reserves space for number of operands specified by +/// TargetInstrDesc or the numOperands if it is not zero. (for +/// instructions with variable number of operands). +MachineInstr::MachineInstr(const TargetInstrDesc &tid, bool NoImp) + : TID(&tid), NumImplicitOps(0), Parent(0), + debugLoc(DebugLoc::getUnknownLoc()) { + if (!NoImp && TID->getImplicitDefs()) + for (const unsigned *ImpDefs = TID->getImplicitDefs(); *ImpDefs; ++ImpDefs) + NumImplicitOps++; + if (!NoImp && TID->getImplicitUses()) + for (const unsigned *ImpUses = TID->getImplicitUses(); *ImpUses; ++ImpUses) + NumImplicitOps++; + Operands.reserve(NumImplicitOps + TID->getNumOperands()); + if (!NoImp) + addImplicitDefUseOperands(); + // Make sure that we get added to a machine basicblock + LeakDetector::addGarbageObject(this); +} + +/// MachineInstr ctor - As above, but with a DebugLoc. +MachineInstr::MachineInstr(const TargetInstrDesc &tid, const DebugLoc dl, + bool NoImp) + : TID(&tid), NumImplicitOps(0), Parent(0), debugLoc(dl) { + if (!NoImp && TID->getImplicitDefs()) + for (const unsigned *ImpDefs = TID->getImplicitDefs(); *ImpDefs; ++ImpDefs) + NumImplicitOps++; + if (!NoImp && TID->getImplicitUses()) + for (const unsigned *ImpUses = TID->getImplicitUses(); *ImpUses; ++ImpUses) + NumImplicitOps++; + Operands.reserve(NumImplicitOps + TID->getNumOperands()); + if (!NoImp) + addImplicitDefUseOperands(); + // Make sure that we get added to a machine basicblock + LeakDetector::addGarbageObject(this); +} + +/// MachineInstr ctor - Work exactly the same as the ctor two above, except +/// that the MachineInstr is created and added to the end of the specified +/// basic block. +/// +MachineInstr::MachineInstr(MachineBasicBlock *MBB, const TargetInstrDesc &tid) + : TID(&tid), NumImplicitOps(0), Parent(0), + debugLoc(DebugLoc::getUnknownLoc()) { + assert(MBB && "Cannot use inserting ctor with null basic block!"); + if (TID->ImplicitDefs) + for (const unsigned *ImpDefs = TID->getImplicitDefs(); *ImpDefs; ++ImpDefs) + NumImplicitOps++; + if (TID->ImplicitUses) + for (const unsigned *ImpUses = TID->getImplicitUses(); *ImpUses; ++ImpUses) + NumImplicitOps++; + Operands.reserve(NumImplicitOps + TID->getNumOperands()); + addImplicitDefUseOperands(); + // Make sure that we get added to a machine basicblock + LeakDetector::addGarbageObject(this); + MBB->push_back(this); // Add instruction to end of basic block! +} + +/// MachineInstr ctor - As above, but with a DebugLoc. +/// +MachineInstr::MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl, + const TargetInstrDesc &tid) + : TID(&tid), NumImplicitOps(0), Parent(0), debugLoc(dl) { + assert(MBB && "Cannot use inserting ctor with null basic block!"); + if (TID->ImplicitDefs) + for (const unsigned *ImpDefs = TID->getImplicitDefs(); *ImpDefs; ++ImpDefs) + NumImplicitOps++; + if (TID->ImplicitUses) + for (const unsigned *ImpUses = TID->getImplicitUses(); *ImpUses; ++ImpUses) + NumImplicitOps++; + Operands.reserve(NumImplicitOps + TID->getNumOperands()); + addImplicitDefUseOperands(); + // Make sure that we get added to a machine basicblock + LeakDetector::addGarbageObject(this); + MBB->push_back(this); // Add instruction to end of basic block! +} + +/// MachineInstr ctor - Copies MachineInstr arg exactly +/// +MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI) + : TID(&MI.getDesc()), NumImplicitOps(0), Parent(0), + debugLoc(MI.getDebugLoc()) { + Operands.reserve(MI.getNumOperands()); + + // Add operands + for (unsigned i = 0; i != MI.getNumOperands(); ++i) + addOperand(MI.getOperand(i)); + NumImplicitOps = MI.NumImplicitOps; + + // Add memory operands. + for (std::list::const_iterator i = MI.memoperands_begin(), + j = MI.memoperands_end(); i != j; ++i) + addMemOperand(MF, *i); + + // Set parent to null. + Parent = 0; + + LeakDetector::addGarbageObject(this); +} + +MachineInstr::~MachineInstr() { + LeakDetector::removeGarbageObject(this); + assert(MemOperands.empty() && + "MachineInstr being deleted with live memoperands!"); +#ifndef NDEBUG + for (unsigned i = 0, e = Operands.size(); i != e; ++i) { + assert(Operands[i].ParentMI == this && "ParentMI mismatch!"); + assert((!Operands[i].isReg() || !Operands[i].isOnRegUseList()) && + "Reg operand def/use list corrupted"); + } +#endif +} + +/// getRegInfo - If this instruction is embedded into a MachineFunction, +/// return the MachineRegisterInfo object for the current function, otherwise +/// return null. +MachineRegisterInfo *MachineInstr::getRegInfo() { + if (MachineBasicBlock *MBB = getParent()) + return &MBB->getParent()->getRegInfo(); + return 0; +} + +/// RemoveRegOperandsFromUseLists - Unlink all of the register operands in +/// this instruction from their respective use lists. This requires that the +/// operands already be on their use lists. +void MachineInstr::RemoveRegOperandsFromUseLists() { + for (unsigned i = 0, e = Operands.size(); i != e; ++i) { + if (Operands[i].isReg()) + Operands[i].RemoveRegOperandFromRegInfo(); + } +} + +/// AddRegOperandsToUseLists - Add all of the register operands in +/// this instruction from their respective use lists. This requires that the +/// operands not be on their use lists yet. +void MachineInstr::AddRegOperandsToUseLists(MachineRegisterInfo &RegInfo) { + for (unsigned i = 0, e = Operands.size(); i != e; ++i) { + if (Operands[i].isReg()) + Operands[i].AddRegOperandToRegInfo(&RegInfo); + } +} + + +/// addOperand - Add the specified operand to the instruction. If it is an +/// implicit operand, it is added to the end of the operand list. If it is +/// an explicit operand it is added at the end of the explicit operand list +/// (before the first implicit operand). +void MachineInstr::addOperand(const MachineOperand &Op) { + bool isImpReg = Op.isReg() && Op.isImplicit(); + assert((isImpReg || !OperandsComplete()) && + "Trying to add an operand to a machine instr that is already done!"); + + MachineRegisterInfo *RegInfo = getRegInfo(); + + // If we are adding the operand to the end of the list, our job is simpler. + // This is true most of the time, so this is a reasonable optimization. + if (isImpReg || NumImplicitOps == 0) { + // We can only do this optimization if we know that the operand list won't + // reallocate. + if (Operands.empty() || Operands.size()+1 <= Operands.capacity()) { + Operands.push_back(Op); + + // Set the parent of the operand. + Operands.back().ParentMI = this; + + // If the operand is a register, update the operand's use list. + if (Op.isReg()) + Operands.back().AddRegOperandToRegInfo(RegInfo); + return; + } + } + + // Otherwise, we have to insert a real operand before any implicit ones. + unsigned OpNo = Operands.size()-NumImplicitOps; + + // If this instruction isn't embedded into a function, then we don't need to + // update any operand lists. + if (RegInfo == 0) { + // Simple insertion, no reginfo update needed for other register operands. + Operands.insert(Operands.begin()+OpNo, Op); + Operands[OpNo].ParentMI = this; + + // Do explicitly set the reginfo for this operand though, to ensure the + // next/prev fields are properly nulled out. + if (Operands[OpNo].isReg()) + Operands[OpNo].AddRegOperandToRegInfo(0); + + } else if (Operands.size()+1 <= Operands.capacity()) { + // Otherwise, we have to remove register operands from their register use + // list, add the operand, then add the register operands back to their use + // list. This also must handle the case when the operand list reallocates + // to somewhere else. + + // If insertion of this operand won't cause reallocation of the operand + // list, just remove the implicit operands, add the operand, then re-add all + // the rest of the operands. + for (unsigned i = OpNo, e = Operands.size(); i != e; ++i) { + assert(Operands[i].isReg() && "Should only be an implicit reg!"); + Operands[i].RemoveRegOperandFromRegInfo(); + } + + // Add the operand. If it is a register, add it to the reg list. + Operands.insert(Operands.begin()+OpNo, Op); + Operands[OpNo].ParentMI = this; + + if (Operands[OpNo].isReg()) + Operands[OpNo].AddRegOperandToRegInfo(RegInfo); + + // Re-add all the implicit ops. + for (unsigned i = OpNo+1, e = Operands.size(); i != e; ++i) { + assert(Operands[i].isReg() && "Should only be an implicit reg!"); + Operands[i].AddRegOperandToRegInfo(RegInfo); + } + } else { + // Otherwise, we will be reallocating the operand list. Remove all reg + // operands from their list, then readd them after the operand list is + // reallocated. + RemoveRegOperandsFromUseLists(); + + Operands.insert(Operands.begin()+OpNo, Op); + Operands[OpNo].ParentMI = this; + + // Re-add all the operands. + AddRegOperandsToUseLists(*RegInfo); + } +} + +/// RemoveOperand - Erase an operand from an instruction, leaving it with one +/// fewer operand than it started with. +/// +void MachineInstr::RemoveOperand(unsigned OpNo) { + assert(OpNo < Operands.size() && "Invalid operand number"); + + // Special case removing the last one. + if (OpNo == Operands.size()-1) { + // If needed, remove from the reg def/use list. + if (Operands.back().isReg() && Operands.back().isOnRegUseList()) + Operands.back().RemoveRegOperandFromRegInfo(); + + Operands.pop_back(); + return; + } + + // Otherwise, we are removing an interior operand. If we have reginfo to + // update, remove all operands that will be shifted down from their reg lists, + // move everything down, then re-add them. + MachineRegisterInfo *RegInfo = getRegInfo(); + if (RegInfo) { + for (unsigned i = OpNo, e = Operands.size(); i != e; ++i) { + if (Operands[i].isReg()) + Operands[i].RemoveRegOperandFromRegInfo(); + } + } + + Operands.erase(Operands.begin()+OpNo); + + if (RegInfo) { + for (unsigned i = OpNo, e = Operands.size(); i != e; ++i) { + if (Operands[i].isReg()) + Operands[i].AddRegOperandToRegInfo(RegInfo); + } + } +} + +/// addMemOperand - Add a MachineMemOperand to the machine instruction, +/// referencing arbitrary storage. +void MachineInstr::addMemOperand(MachineFunction &MF, + const MachineMemOperand &MO) { + MemOperands.push_back(MO); +} + +/// clearMemOperands - Erase all of this MachineInstr's MachineMemOperands. +void MachineInstr::clearMemOperands(MachineFunction &MF) { + MemOperands.clear(); +} + + +/// removeFromParent - This method unlinks 'this' from the containing basic +/// block, and returns it, but does not delete it. +MachineInstr *MachineInstr::removeFromParent() { + assert(getParent() && "Not embedded in a basic block!"); + getParent()->remove(this); + return this; +} + + +/// eraseFromParent - This method unlinks 'this' from the containing basic +/// block, and deletes it. +void MachineInstr::eraseFromParent() { + assert(getParent() && "Not embedded in a basic block!"); + getParent()->erase(this); +} + + +/// OperandComplete - Return true if it's illegal to add a new operand +/// +bool MachineInstr::OperandsComplete() const { + unsigned short NumOperands = TID->getNumOperands(); + if (!TID->isVariadic() && getNumOperands()-NumImplicitOps >= NumOperands) + return true; // Broken: we have all the operands of this instruction! + return false; +} + +/// getNumExplicitOperands - Returns the number of non-implicit operands. +/// +unsigned MachineInstr::getNumExplicitOperands() const { + unsigned NumOperands = TID->getNumOperands(); + if (!TID->isVariadic()) + return NumOperands; + + for (unsigned i = NumOperands, e = getNumOperands(); i != e; ++i) { + const MachineOperand &MO = getOperand(i); + if (!MO.isReg() || !MO.isImplicit()) + NumOperands++; + } + return NumOperands; +} + + +/// isLabel - Returns true if the MachineInstr represents a label. +/// +bool MachineInstr::isLabel() const { + return getOpcode() == TargetInstrInfo::DBG_LABEL || + getOpcode() == TargetInstrInfo::EH_LABEL || + getOpcode() == TargetInstrInfo::GC_LABEL; +} + +/// isDebugLabel - Returns true if the MachineInstr represents a debug label. +/// +bool MachineInstr::isDebugLabel() const { + return getOpcode() == TargetInstrInfo::DBG_LABEL; +} + +/// findRegisterUseOperandIdx() - Returns the MachineOperand that is a use of +/// the specific register or -1 if it is not found. It further tightening +/// the search criteria to a use that kills the register if isKill is true. +int MachineInstr::findRegisterUseOperandIdx(unsigned Reg, bool isKill, + const TargetRegisterInfo *TRI) const { + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + const MachineOperand &MO = getOperand(i); + if (!MO.isReg() || !MO.isUse()) + continue; + unsigned MOReg = MO.getReg(); + if (!MOReg) + continue; + if (MOReg == Reg || + (TRI && + TargetRegisterInfo::isPhysicalRegister(MOReg) && + TargetRegisterInfo::isPhysicalRegister(Reg) && + TRI->isSubRegister(MOReg, Reg))) + if (!isKill || MO.isKill()) + return i; + } + return -1; +} + +/// findRegisterDefOperandIdx() - Returns the operand index that is a def of +/// the specified register or -1 if it is not found. If isDead is true, defs +/// that are not dead are skipped. If TargetRegisterInfo is non-null, then it +/// also checks if there is a def of a super-register. +int MachineInstr::findRegisterDefOperandIdx(unsigned Reg, bool isDead, + const TargetRegisterInfo *TRI) const { + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + const MachineOperand &MO = getOperand(i); + if (!MO.isReg() || !MO.isDef()) + continue; + unsigned MOReg = MO.getReg(); + if (MOReg == Reg || + (TRI && + TargetRegisterInfo::isPhysicalRegister(MOReg) && + TargetRegisterInfo::isPhysicalRegister(Reg) && + TRI->isSubRegister(MOReg, Reg))) + if (!isDead || MO.isDead()) + return i; + } + return -1; +} + +/// findFirstPredOperandIdx() - Find the index of the first operand in the +/// operand list that is used to represent the predicate. It returns -1 if +/// none is found. +int MachineInstr::findFirstPredOperandIdx() const { + const TargetInstrDesc &TID = getDesc(); + if (TID.isPredicable()) { + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) + if (TID.OpInfo[i].isPredicate()) + return i; + } + + return -1; +} + +/// isRegTiedToUseOperand - Given the index of a register def operand, +/// check if the register def is tied to a source operand, due to either +/// two-address elimination or inline assembly constraints. Returns the +/// first tied use operand index by reference is UseOpIdx is not null. +bool MachineInstr:: +isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx) const { + if (getOpcode() == TargetInstrInfo::INLINEASM) { + assert(DefOpIdx >= 2); + const MachineOperand &MO = getOperand(DefOpIdx); + if (!MO.isReg() || !MO.isDef() || MO.getReg() == 0) + return false; + // Determine the actual operand no corresponding to this index. + unsigned DefNo = 0; + for (unsigned i = 1, e = getNumOperands(); i < e; ) { + const MachineOperand &FMO = getOperand(i); + assert(FMO.isImm()); + // Skip over this def. + i += InlineAsm::getNumOperandRegisters(FMO.getImm()) + 1; + if (i > DefOpIdx) + break; + ++DefNo; + } + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + const MachineOperand &FMO = getOperand(i); + if (!FMO.isImm()) + continue; + if (i+1 >= e || !getOperand(i+1).isReg() || !getOperand(i+1).isUse()) + continue; + unsigned Idx; + if (InlineAsm::isUseOperandTiedToDef(FMO.getImm(), Idx) && + Idx == DefNo) { + if (UseOpIdx) + *UseOpIdx = (unsigned)i + 1; + return true; + } + } + } + + assert(getOperand(DefOpIdx).isDef() && "DefOpIdx is not a def!"); + const TargetInstrDesc &TID = getDesc(); + for (unsigned i = 0, e = TID.getNumOperands(); i != e; ++i) { + const MachineOperand &MO = getOperand(i); + if (MO.isReg() && MO.isUse() && + TID.getOperandConstraint(i, TOI::TIED_TO) == (int)DefOpIdx) { + if (UseOpIdx) + *UseOpIdx = (unsigned)i; + return true; + } + } + return false; +} + +/// isRegTiedToDefOperand - Return true if the operand of the specified index +/// is a register use and it is tied to an def operand. It also returns the def +/// operand index by reference. +bool MachineInstr:: +isRegTiedToDefOperand(unsigned UseOpIdx, unsigned *DefOpIdx) const { + if (getOpcode() == TargetInstrInfo::INLINEASM) { + const MachineOperand &MO = getOperand(UseOpIdx); + if (!MO.isReg() || !MO.isUse() || MO.getReg() == 0) + return false; + assert(UseOpIdx > 0); + const MachineOperand &UFMO = getOperand(UseOpIdx-1); + if (!UFMO.isImm()) + return false; // Must be physreg uses. + unsigned DefNo; + if (InlineAsm::isUseOperandTiedToDef(UFMO.getImm(), DefNo)) { + if (!DefOpIdx) + return true; + + unsigned DefIdx = 1; + // Remember to adjust the index. First operand is asm string, then there + // is a flag for each. + while (DefNo) { + const MachineOperand &FMO = getOperand(DefIdx); + assert(FMO.isImm()); + // Skip over this def. + DefIdx += InlineAsm::getNumOperandRegisters(FMO.getImm()) + 1; + --DefNo; + } + *DefOpIdx = DefIdx+1; + return true; + } + return false; + } + + const TargetInstrDesc &TID = getDesc(); + if (UseOpIdx >= TID.getNumOperands()) + return false; + const MachineOperand &MO = getOperand(UseOpIdx); + if (!MO.isReg() || !MO.isUse()) + return false; + int DefIdx = TID.getOperandConstraint(UseOpIdx, TOI::TIED_TO); + if (DefIdx == -1) + return false; + if (DefOpIdx) + *DefOpIdx = (unsigned)DefIdx; + return true; +} + +/// copyKillDeadInfo - Copies kill / dead operand properties from MI. +/// +void MachineInstr::copyKillDeadInfo(const MachineInstr *MI) { + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || (!MO.isKill() && !MO.isDead())) + continue; + for (unsigned j = 0, ee = getNumOperands(); j != ee; ++j) { + MachineOperand &MOp = getOperand(j); + if (!MOp.isIdenticalTo(MO)) + continue; + if (MO.isKill()) + MOp.setIsKill(); + else + MOp.setIsDead(); + break; + } + } +} + +/// copyPredicates - Copies predicate operand(s) from MI. +void MachineInstr::copyPredicates(const MachineInstr *MI) { + const TargetInstrDesc &TID = MI->getDesc(); + if (!TID.isPredicable()) + return; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + if (TID.OpInfo[i].isPredicate()) { + // Predicated operands must be last operands. + addOperand(MI->getOperand(i)); + } + } +} + +/// isSafeToMove - Return true if it is safe to move this instruction. If +/// SawStore is set to true, it means that there is a store (or call) between +/// the instruction's location and its intended destination. +bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII, + bool &SawStore) const { + // Ignore stuff that we obviously can't move. + if (TID->mayStore() || TID->isCall()) { + SawStore = true; + return false; + } + if (TID->isTerminator() || TID->hasUnmodeledSideEffects()) + return false; + + // See if this instruction does a load. If so, we have to guarantee that the + // loaded value doesn't change between the load and the its intended + // destination. The check for isInvariantLoad gives the targe the chance to + // classify the load as always returning a constant, e.g. a constant pool + // load. + if (TID->mayLoad() && !TII->isInvariantLoad(this)) + // Otherwise, this is a real load. If there is a store between the load and + // end of block, or if the laod is volatile, we can't move it. + return !SawStore && !hasVolatileMemoryRef(); + + return true; +} + +/// isSafeToReMat - Return true if it's safe to rematerialize the specified +/// instruction which defined the specified register instead of copying it. +bool MachineInstr::isSafeToReMat(const TargetInstrInfo *TII, + unsigned DstReg) const { + bool SawStore = false; + if (!getDesc().isRematerializable() || + !TII->isTriviallyReMaterializable(this) || + !isSafeToMove(TII, SawStore)) + return false; + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + const MachineOperand &MO = getOperand(i); + if (!MO.isReg()) + continue; + // FIXME: For now, do not remat any instruction with register operands. + // Later on, we can loosen the restriction is the register operands have + // not been modified between the def and use. Note, this is different from + // MachineSink because the code is no longer in two-address form (at least + // partially). + if (MO.isUse()) + return false; + else if (!MO.isDead() && MO.getReg() != DstReg) + return false; + } + return true; +} + +/// hasVolatileMemoryRef - Return true if this instruction may have a +/// volatile memory reference, or if the information describing the +/// memory reference is not available. Return false if it is known to +/// have no volatile memory references. +bool MachineInstr::hasVolatileMemoryRef() const { + // An instruction known never to access memory won't have a volatile access. + if (!TID->mayStore() && + !TID->mayLoad() && + !TID->isCall() && + !TID->hasUnmodeledSideEffects()) + return false; + + // Otherwise, if the instruction has no memory reference information, + // conservatively assume it wasn't preserved. + if (memoperands_empty()) + return true; + + // Check the memory reference information for volatile references. + for (std::list::const_iterator I = memoperands_begin(), + E = memoperands_end(); I != E; ++I) + if (I->isVolatile()) + return true; + + return false; +} + +void MachineInstr::dump() const { + cerr << " " << *this; +} + +void MachineInstr::print(std::ostream &OS, const TargetMachine *TM) const { + raw_os_ostream RawOS(OS); + print(RawOS, TM); +} + +void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { + // Specialize printing if op#0 is definition + unsigned StartOp = 0; + if (getNumOperands() && getOperand(0).isReg() && getOperand(0).isDef()) { + getOperand(0).print(OS, TM); + OS << " = "; + ++StartOp; // Don't print this operand again! + } + + OS << getDesc().getName(); + + for (unsigned i = StartOp, e = getNumOperands(); i != e; ++i) { + if (i != StartOp) + OS << ","; + OS << " "; + getOperand(i).print(OS, TM); + } + + if (!memoperands_empty()) { + OS << ", Mem:"; + for (std::list::const_iterator i = memoperands_begin(), + e = memoperands_end(); i != e; ++i) { + const MachineMemOperand &MRO = *i; + const Value *V = MRO.getValue(); + + assert((MRO.isLoad() || MRO.isStore()) && + "SV has to be a load, store or both."); + + if (MRO.isVolatile()) + OS << "Volatile "; + + if (MRO.isLoad()) + OS << "LD"; + if (MRO.isStore()) + OS << "ST"; + + OS << "(" << MRO.getSize() << "," << MRO.getAlignment() << ") ["; + + if (!V) + OS << ""; + else if (!V->getName().empty()) + OS << V->getName(); + else if (const PseudoSourceValue *PSV = dyn_cast(V)) { + PSV->print(OS); + } else + OS << V; + + OS << " + " << MRO.getOffset() << "]"; + } + } + + if (!debugLoc.isUnknown()) { + const MachineFunction *MF = getParent()->getParent(); + DebugLocTuple DLT = MF->getDebugLocTuple(debugLoc); + DICompileUnit CU(DLT.CompileUnit); + std::string Dir, Fn; + OS << " [dbg: " + << CU.getDirectory(Dir) << '/' << CU.getFilename(Fn) << "," + << DLT.Line << "," + << DLT.Col << "]"; + } + + OS << "\n"; +} + +bool MachineInstr::addRegisterKilled(unsigned IncomingReg, + const TargetRegisterInfo *RegInfo, + bool AddIfNotFound) { + bool isPhysReg = TargetRegisterInfo::isPhysicalRegister(IncomingReg); + bool hasAliases = isPhysReg && RegInfo->getAliasSet(IncomingReg); + bool Found = false; + SmallVector DeadOps; + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + MachineOperand &MO = getOperand(i); + if (!MO.isReg() || !MO.isUse()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + + if (Reg == IncomingReg) { + if (!Found) { + if (MO.isKill()) + // The register is already marked kill. + return true; + MO.setIsKill(); + Found = true; + } + } else if (hasAliases && MO.isKill() && + TargetRegisterInfo::isPhysicalRegister(Reg)) { + // A super-register kill already exists. + if (RegInfo->isSuperRegister(IncomingReg, Reg)) + return true; + if (RegInfo->isSubRegister(IncomingReg, Reg)) + DeadOps.push_back(i); + } + } + + // Trim unneeded kill operands. + while (!DeadOps.empty()) { + unsigned OpIdx = DeadOps.back(); + if (getOperand(OpIdx).isImplicit()) + RemoveOperand(OpIdx); + else + getOperand(OpIdx).setIsKill(false); + DeadOps.pop_back(); + } + + // If not found, this means an alias of one of the operands is killed. Add a + // new implicit operand if required. + if (!Found && AddIfNotFound) { + addOperand(MachineOperand::CreateReg(IncomingReg, + false /*IsDef*/, + true /*IsImp*/, + true /*IsKill*/)); + return true; + } + return Found; +} + +bool MachineInstr::addRegisterDead(unsigned IncomingReg, + const TargetRegisterInfo *RegInfo, + bool AddIfNotFound) { + bool isPhysReg = TargetRegisterInfo::isPhysicalRegister(IncomingReg); + bool hasAliases = isPhysReg && RegInfo->getAliasSet(IncomingReg); + bool Found = false; + SmallVector DeadOps; + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + MachineOperand &MO = getOperand(i); + if (!MO.isReg() || !MO.isDef()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + + if (Reg == IncomingReg) { + if (!Found) { + if (MO.isDead()) + // The register is already marked dead. + return true; + MO.setIsDead(); + Found = true; + } + } else if (hasAliases && MO.isDead() && + TargetRegisterInfo::isPhysicalRegister(Reg)) { + // There exists a super-register that's marked dead. + if (RegInfo->isSuperRegister(IncomingReg, Reg)) + return true; + if (RegInfo->getSubRegisters(IncomingReg) && + RegInfo->getSuperRegisters(Reg) && + RegInfo->isSubRegister(IncomingReg, Reg)) + DeadOps.push_back(i); + } + } + + // Trim unneeded dead operands. + while (!DeadOps.empty()) { + unsigned OpIdx = DeadOps.back(); + if (getOperand(OpIdx).isImplicit()) + RemoveOperand(OpIdx); + else + getOperand(OpIdx).setIsDead(false); + DeadOps.pop_back(); + } + + // If not found, this means an alias of one of the operands is dead. Add a + // new implicit operand if required. + if (!Found && AddIfNotFound) { + addOperand(MachineOperand::CreateReg(IncomingReg, + true /*IsDef*/, + true /*IsImp*/, + false /*IsKill*/, + true /*IsDead*/)); + return true; + } + return Found; +} diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp new file mode 100644 index 000000000000..aaa4de4b2c15 --- /dev/null +++ b/lib/CodeGen/MachineLICM.cpp @@ -0,0 +1,406 @@ +//===-- MachineLICM.cpp - Machine Loop Invariant Code Motion Pass ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass performs loop invariant code motion on machine instructions. We +// attempt to remove as much code from the body of a loop as possible. +// +// This pass does not attempt to throttle itself to limit register pressure. +// The register allocation phases are expected to perform rematerialization +// to recover when register pressure is high. +// +// This pass is not intended to be a replacement or a complete alternative +// for the LLVM-IR-level LICM pass. It is only designed to hoist simple +// constructs that are not exposed before lowering and instruction selection. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "machine-licm" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +STATISTIC(NumHoisted, "Number of machine instructions hoisted out of loops"); +STATISTIC(NumCSEed, "Number of hoisted machine instructions CSEed"); + +namespace { + class VISIBILITY_HIDDEN MachineLICM : public MachineFunctionPass { + const TargetMachine *TM; + const TargetInstrInfo *TII; + + // Various analyses that we use... + MachineLoopInfo *LI; // Current MachineLoopInfo + MachineDominatorTree *DT; // Machine dominator tree for the cur loop + MachineRegisterInfo *RegInfo; // Machine register information + + // State that is updated as we process loops + bool Changed; // True if a loop is changed. + MachineLoop *CurLoop; // The current loop we are working on. + MachineBasicBlock *CurPreheader; // The preheader for CurLoop. + + // For each BB and opcode pair, keep a list of hoisted instructions. + DenseMap, + std::vector > CSEMap; + public: + static char ID; // Pass identification, replacement for typeid + MachineLICM() : MachineFunctionPass(&ID) {} + + virtual bool runOnMachineFunction(MachineFunction &MF); + + const char *getPassName() const { return "Machine Instruction LICM"; } + + // FIXME: Loop preheaders? + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequired(); + AU.addRequired(); + AU.addPreserved(); + AU.addPreserved(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + virtual void releaseMemory() { + CSEMap.clear(); + } + + private: + /// IsLoopInvariantInst - Returns true if the instruction is loop + /// invariant. I.e., all virtual register operands are defined outside of + /// the loop, physical registers aren't accessed (explicitly or implicitly), + /// and the instruction is hoistable. + /// + bool IsLoopInvariantInst(MachineInstr &I); + + /// IsProfitableToHoist - Return true if it is potentially profitable to + /// hoist the given loop invariant. + bool IsProfitableToHoist(MachineInstr &MI); + + /// HoistRegion - Walk the specified region of the CFG (defined by all + /// blocks dominated by the specified block, and that are in the current + /// loop) in depth first order w.r.t the DominatorTree. This allows us to + /// visit definitions before uses, allowing us to hoist a loop body in one + /// pass without iteration. + /// + void HoistRegion(MachineDomTreeNode *N); + + /// Hoist - When an instruction is found to only use loop invariant operands + /// that is safe to hoist, this instruction is called to do the dirty work. + /// + void Hoist(MachineInstr &MI); + }; +} // end anonymous namespace + +char MachineLICM::ID = 0; +static RegisterPass +X("machinelicm", "Machine Loop Invariant Code Motion"); + +FunctionPass *llvm::createMachineLICMPass() { return new MachineLICM(); } + +/// LoopIsOuterMostWithPreheader - Test if the given loop is the outer-most +/// loop that has a preheader. +static bool LoopIsOuterMostWithPreheader(MachineLoop *CurLoop) { + for (MachineLoop *L = CurLoop->getParentLoop(); L; L = L->getParentLoop()) + if (L->getLoopPreheader()) + return false; + return true; +} + +/// Hoist expressions out of the specified loop. Note, alias info for inner loop +/// is not preserved so it is not a good idea to run LICM multiple times on one +/// loop. +/// +bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { + const Function *F = MF.getFunction(); + if (F->hasFnAttr(Attribute::OptimizeForSize)) + return false; + + DOUT << "******** Machine LICM ********\n"; + + Changed = false; + TM = &MF.getTarget(); + TII = TM->getInstrInfo(); + RegInfo = &MF.getRegInfo(); + + // Get our Loop information... + LI = &getAnalysis(); + DT = &getAnalysis(); + + for (MachineLoopInfo::iterator + I = LI->begin(), E = LI->end(); I != E; ++I) { + CurLoop = *I; + + // Only visit outer-most preheader-sporting loops. + if (!LoopIsOuterMostWithPreheader(CurLoop)) + continue; + + // Determine the block to which to hoist instructions. If we can't find a + // suitable loop preheader, we can't do any hoisting. + // + // FIXME: We are only hoisting if the basic block coming into this loop + // has only one successor. This isn't the case in general because we haven't + // broken critical edges or added preheaders. + CurPreheader = CurLoop->getLoopPreheader(); + if (!CurPreheader) + continue; + + HoistRegion(DT->getNode(CurLoop->getHeader())); + } + + return Changed; +} + +/// HoistRegion - Walk the specified region of the CFG (defined by all blocks +/// dominated by the specified block, and that are in the current loop) in depth +/// first order w.r.t the DominatorTree. This allows us to visit definitions +/// before uses, allowing us to hoist a loop body in one pass without iteration. +/// +void MachineLICM::HoistRegion(MachineDomTreeNode *N) { + assert(N != 0 && "Null dominator tree node?"); + MachineBasicBlock *BB = N->getBlock(); + + // If this subregion is not in the top level loop at all, exit. + if (!CurLoop->contains(BB)) return; + + for (MachineBasicBlock::iterator + MII = BB->begin(), E = BB->end(); MII != E; ) { + MachineBasicBlock::iterator NextMII = MII; ++NextMII; + MachineInstr &MI = *MII; + + Hoist(MI); + + MII = NextMII; + } + + const std::vector &Children = N->getChildren(); + + for (unsigned I = 0, E = Children.size(); I != E; ++I) + HoistRegion(Children[I]); +} + +/// IsLoopInvariantInst - Returns true if the instruction is loop +/// invariant. I.e., all virtual register operands are defined outside of the +/// loop, physical registers aren't accessed explicitly, and there are no side +/// effects that aren't captured by the operands or other flags. +/// +bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) { + const TargetInstrDesc &TID = I.getDesc(); + + // Ignore stuff that we obviously can't hoist. + if (TID.mayStore() || TID.isCall() || TID.isTerminator() || + TID.hasUnmodeledSideEffects()) + return false; + + if (TID.mayLoad()) { + // Okay, this instruction does a load. As a refinement, we allow the target + // to decide whether the loaded value is actually a constant. If so, we can + // actually use it as a load. + if (!TII->isInvariantLoad(&I)) + // FIXME: we should be able to sink loads with no other side effects if + // there is nothing that can change memory from here until the end of + // block. This is a trivial form of alias analysis. + return false; + } + + DEBUG({ + DOUT << "--- Checking if we can hoist " << I; + if (I.getDesc().getImplicitUses()) { + DOUT << " * Instruction has implicit uses:\n"; + + const TargetRegisterInfo *TRI = TM->getRegisterInfo(); + for (const unsigned *ImpUses = I.getDesc().getImplicitUses(); + *ImpUses; ++ImpUses) + DOUT << " -> " << TRI->getName(*ImpUses) << "\n"; + } + + if (I.getDesc().getImplicitDefs()) { + DOUT << " * Instruction has implicit defines:\n"; + + const TargetRegisterInfo *TRI = TM->getRegisterInfo(); + for (const unsigned *ImpDefs = I.getDesc().getImplicitDefs(); + *ImpDefs; ++ImpDefs) + DOUT << " -> " << TRI->getName(*ImpDefs) << "\n"; + } + }); + + if (I.getDesc().getImplicitDefs() || I.getDesc().getImplicitUses()) { + DOUT << "Cannot hoist with implicit defines or uses\n"; + return false; + } + + // The instruction is loop invariant if all of its operands are. + for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) { + const MachineOperand &MO = I.getOperand(i); + + if (!MO.isReg()) + continue; + + unsigned Reg = MO.getReg(); + if (Reg == 0) continue; + + // Don't hoist an instruction that uses or defines a physical register. + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + return false; + + if (!MO.isUse()) + continue; + + assert(RegInfo->getVRegDef(Reg) && + "Machine instr not mapped for this vreg?!"); + + // If the loop contains the definition of an operand, then the instruction + // isn't loop invariant. + if (CurLoop->contains(RegInfo->getVRegDef(Reg)->getParent())) + return false; + } + + // If we got this far, the instruction is loop invariant! + return true; +} + + +/// HasPHIUses - Return true if the specified register has any PHI use. +static bool HasPHIUses(unsigned Reg, MachineRegisterInfo *RegInfo) { + for (MachineRegisterInfo::use_iterator UI = RegInfo->use_begin(Reg), + UE = RegInfo->use_end(); UI != UE; ++UI) { + MachineInstr *UseMI = &*UI; + if (UseMI->getOpcode() == TargetInstrInfo::PHI) + return true; + } + return false; +} + +/// IsProfitableToHoist - Return true if it is potentially profitable to hoist +/// the given loop invariant. +bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) { + if (MI.getOpcode() == TargetInstrInfo::IMPLICIT_DEF) + return false; + + const TargetInstrDesc &TID = MI.getDesc(); + + // FIXME: For now, only hoist re-materilizable instructions. LICM will + // increase register pressure. We want to make sure it doesn't increase + // spilling. + if (!TID.mayLoad() && (!TID.isRematerializable() || + !TII->isTriviallyReMaterializable(&MI))) + return false; + + // If result(s) of this instruction is used by PHIs, then don't hoist it. + // The presence of joins makes it difficult for current register allocator + // implementation to perform remat. + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI.getOperand(i); + if (!MO.isReg() || !MO.isDef()) + continue; + if (HasPHIUses(MO.getReg(), RegInfo)) + return false; + } + + return true; +} + +static const MachineInstr *LookForDuplicate(const MachineInstr *MI, + std::vector &PrevMIs, + MachineRegisterInfo *RegInfo) { + unsigned NumOps = MI->getNumOperands(); + for (unsigned i = 0, e = PrevMIs.size(); i != e; ++i) { + const MachineInstr *PrevMI = PrevMIs[i]; + unsigned NumOps2 = PrevMI->getNumOperands(); + if (NumOps != NumOps2) + continue; + bool IsSame = true; + for (unsigned j = 0; j != NumOps; ++j) { + const MachineOperand &MO = MI->getOperand(j); + if (MO.isReg() && MO.isDef()) { + if (RegInfo->getRegClass(MO.getReg()) != + RegInfo->getRegClass(PrevMI->getOperand(j).getReg())) { + IsSame = false; + break; + } + continue; + } + if (!MO.isIdenticalTo(PrevMI->getOperand(j))) { + IsSame = false; + break; + } + } + if (IsSame) + return PrevMI; + } + return 0; +} + +/// Hoist - When an instruction is found to use only loop invariant operands +/// that are safe to hoist, this instruction is called to do the dirty work. +/// +void MachineLICM::Hoist(MachineInstr &MI) { + if (!IsLoopInvariantInst(MI)) return; + if (!IsProfitableToHoist(MI)) return; + + // Now move the instructions to the predecessor, inserting it before any + // terminator instructions. + DEBUG({ + DOUT << "Hoisting " << MI; + if (CurPreheader->getBasicBlock()) + DOUT << " to MachineBasicBlock " + << CurPreheader->getBasicBlock()->getName(); + if (MI.getParent()->getBasicBlock()) + DOUT << " from MachineBasicBlock " + << MI.getParent()->getBasicBlock()->getName(); + DOUT << "\n"; + }); + + // Look for opportunity to CSE the hoisted instruction. + std::pair BBOpcPair = + std::make_pair(CurPreheader->getNumber(), MI.getOpcode()); + DenseMap, + std::vector >::iterator CI = CSEMap.find(BBOpcPair); + bool DoneCSE = false; + if (CI != CSEMap.end()) { + const MachineInstr *Dup = LookForDuplicate(&MI, CI->second, RegInfo); + if (Dup) { + DOUT << "CSEing " << MI; + DOUT << " with " << *Dup; + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI.getOperand(i); + if (MO.isReg() && MO.isDef()) + RegInfo->replaceRegWith(MO.getReg(), Dup->getOperand(i).getReg()); + } + MI.eraseFromParent(); + DoneCSE = true; + ++NumCSEed; + } + } + + // Otherwise, splice the instruction to the preheader. + if (!DoneCSE) { + CurPreheader->splice(CurPreheader->getFirstTerminator(), + MI.getParent(), &MI); + // Add to the CSE map. + if (CI != CSEMap.end()) + CI->second.push_back(&MI); + else { + std::vector CSEMIs; + CSEMIs.push_back(&MI); + CSEMap.insert(std::make_pair(BBOpcPair, CSEMIs)); + } + } + + ++NumHoisted; + Changed = true; +} diff --git a/lib/CodeGen/MachineLoopInfo.cpp b/lib/CodeGen/MachineLoopInfo.cpp new file mode 100644 index 000000000000..68ddb7b3f473 --- /dev/null +++ b/lib/CodeGen/MachineLoopInfo.cpp @@ -0,0 +1,40 @@ +//===- MachineLoopInfo.cpp - Natural Loop Calculator ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the MachineLoopInfo class that is used to identify natural +// loops and determine the loop depth of various nodes of the CFG. Note that +// the loops identified may actually be several natural loops that share the +// same header node... not just a single natural loop. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/Passes.h" +using namespace llvm; + +TEMPLATE_INSTANTIATION(class LoopBase); +TEMPLATE_INSTANTIATION(class LoopInfoBase); + +char MachineLoopInfo::ID = 0; +static RegisterPass +X("machine-loops", "Machine Natural Loop Construction", true); + +const PassInfo *const llvm::MachineLoopInfoID = &X; + +bool MachineLoopInfo::runOnMachineFunction(MachineFunction &) { + releaseMemory(); + LI->Calculate(getAnalysis().getBase()); // Update + return false; +} + +void MachineLoopInfo::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired(); +} diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp new file mode 100644 index 000000000000..1d8109eb8d99 --- /dev/null +++ b/lib/CodeGen/MachineModuleInfo.cpp @@ -0,0 +1,368 @@ +//===-- llvm/CodeGen/MachineModuleInfo.cpp ----------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineModuleInfo.h" + +#include "llvm/Constants.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/DerivedTypes.h" +#include "llvm/GlobalVariable.h" +#include "llvm/Intrinsics.h" +#include "llvm/Instructions.h" +#include "llvm/Module.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/Streams.h" +using namespace llvm; +using namespace llvm::dwarf; + +// Handle the Pass registration stuff necessary to use TargetData's. +static RegisterPass +X("machinemoduleinfo", "Module Information"); +char MachineModuleInfo::ID = 0; + +//===----------------------------------------------------------------------===// + +MachineModuleInfo::MachineModuleInfo() +: ImmutablePass(&ID) +, LabelIDList() +, FrameMoves() +, LandingPads() +, Personalities() +, CallsEHReturn(0) +, CallsUnwindInit(0) +, DbgInfoAvailable(false) +{ + // Always emit "no personality" info + Personalities.push_back(NULL); +} +MachineModuleInfo::~MachineModuleInfo() { + +} + +/// doInitialization - Initialize the state for a new module. +/// +bool MachineModuleInfo::doInitialization() { + return false; +} + +/// doFinalization - Tear down the state after completion of a module. +/// +bool MachineModuleInfo::doFinalization() { + return false; +} + +/// BeginFunction - Begin gathering function meta information. +/// +void MachineModuleInfo::BeginFunction(MachineFunction *MF) { + // Coming soon. +} + +/// EndFunction - Discard function meta information. +/// +void MachineModuleInfo::EndFunction() { + // Clean up frame info. + FrameMoves.clear(); + + // Clean up exception info. + LandingPads.clear(); + TypeInfos.clear(); + FilterIds.clear(); + FilterEnds.clear(); + CallsEHReturn = 0; + CallsUnwindInit = 0; +} + +/// AnalyzeModule - Scan the module for global debug information. +/// +void MachineModuleInfo::AnalyzeModule(Module &M) { + // Insert functions in the llvm.used array into UsedFunctions. + GlobalVariable *GV = M.getGlobalVariable("llvm.used"); + if (!GV || !GV->hasInitializer()) return; + + // Should be an array of 'i8*'. + ConstantArray *InitList = dyn_cast(GV->getInitializer()); + if (InitList == 0) return; + + for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) { + if (ConstantExpr *CE = dyn_cast(InitList->getOperand(i))) + if (CE->getOpcode() == Instruction::BitCast) + if (Function *F = dyn_cast(CE->getOperand(0))) + UsedFunctions.insert(F); + } +} + +//===-EH-------------------------------------------------------------------===// + +/// getOrCreateLandingPadInfo - Find or create an LandingPadInfo for the +/// specified MachineBasicBlock. +LandingPadInfo &MachineModuleInfo::getOrCreateLandingPadInfo + (MachineBasicBlock *LandingPad) { + unsigned N = LandingPads.size(); + for (unsigned i = 0; i < N; ++i) { + LandingPadInfo &LP = LandingPads[i]; + if (LP.LandingPadBlock == LandingPad) + return LP; + } + + LandingPads.push_back(LandingPadInfo(LandingPad)); + return LandingPads[N]; +} + +/// addInvoke - Provide the begin and end labels of an invoke style call and +/// associate it with a try landing pad block. +void MachineModuleInfo::addInvoke(MachineBasicBlock *LandingPad, + unsigned BeginLabel, unsigned EndLabel) { + LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); + LP.BeginLabels.push_back(BeginLabel); + LP.EndLabels.push_back(EndLabel); +} + +/// addLandingPad - Provide the label of a try LandingPad block. +/// +unsigned MachineModuleInfo::addLandingPad(MachineBasicBlock *LandingPad) { + unsigned LandingPadLabel = NextLabelID(); + LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); + LP.LandingPadLabel = LandingPadLabel; + return LandingPadLabel; +} + +/// addPersonality - Provide the personality function for the exception +/// information. +void MachineModuleInfo::addPersonality(MachineBasicBlock *LandingPad, + Function *Personality) { + LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); + LP.Personality = Personality; + + for (unsigned i = 0; i < Personalities.size(); ++i) + if (Personalities[i] == Personality) + return; + + Personalities.push_back(Personality); +} + +/// addCatchTypeInfo - Provide the catch typeinfo for a landing pad. +/// +void MachineModuleInfo::addCatchTypeInfo(MachineBasicBlock *LandingPad, + std::vector &TyInfo) { + LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); + for (unsigned N = TyInfo.size(); N; --N) + LP.TypeIds.push_back(getTypeIDFor(TyInfo[N - 1])); +} + +/// addFilterTypeInfo - Provide the filter typeinfo for a landing pad. +/// +void MachineModuleInfo::addFilterTypeInfo(MachineBasicBlock *LandingPad, + std::vector &TyInfo) { + LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); + std::vector IdsInFilter(TyInfo.size()); + for (unsigned I = 0, E = TyInfo.size(); I != E; ++I) + IdsInFilter[I] = getTypeIDFor(TyInfo[I]); + LP.TypeIds.push_back(getFilterIDFor(IdsInFilter)); +} + +/// addCleanup - Add a cleanup action for a landing pad. +/// +void MachineModuleInfo::addCleanup(MachineBasicBlock *LandingPad) { + LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); + LP.TypeIds.push_back(0); +} + +/// TidyLandingPads - Remap landing pad labels and remove any deleted landing +/// pads. +void MachineModuleInfo::TidyLandingPads() { + for (unsigned i = 0; i != LandingPads.size(); ) { + LandingPadInfo &LandingPad = LandingPads[i]; + LandingPad.LandingPadLabel = MappedLabel(LandingPad.LandingPadLabel); + + // Special case: we *should* emit LPs with null LP MBB. This indicates + // "nounwind" case. + if (!LandingPad.LandingPadLabel && LandingPad.LandingPadBlock) { + LandingPads.erase(LandingPads.begin() + i); + continue; + } + + for (unsigned j=0; j != LandingPads[i].BeginLabels.size(); ) { + unsigned BeginLabel = MappedLabel(LandingPad.BeginLabels[j]); + unsigned EndLabel = MappedLabel(LandingPad.EndLabels[j]); + + if (!BeginLabel || !EndLabel) { + LandingPad.BeginLabels.erase(LandingPad.BeginLabels.begin() + j); + LandingPad.EndLabels.erase(LandingPad.EndLabels.begin() + j); + continue; + } + + LandingPad.BeginLabels[j] = BeginLabel; + LandingPad.EndLabels[j] = EndLabel; + ++j; + } + + // Remove landing pads with no try-ranges. + if (LandingPads[i].BeginLabels.empty()) { + LandingPads.erase(LandingPads.begin() + i); + continue; + } + + // If there is no landing pad, ensure that the list of typeids is empty. + // If the only typeid is a cleanup, this is the same as having no typeids. + if (!LandingPad.LandingPadBlock || + (LandingPad.TypeIds.size() == 1 && !LandingPad.TypeIds[0])) + LandingPad.TypeIds.clear(); + + ++i; + } +} + +/// getTypeIDFor - Return the type id for the specified typeinfo. This is +/// function wide. +unsigned MachineModuleInfo::getTypeIDFor(GlobalVariable *TI) { + for (unsigned i = 0, N = TypeInfos.size(); i != N; ++i) + if (TypeInfos[i] == TI) return i + 1; + + TypeInfos.push_back(TI); + return TypeInfos.size(); +} + +/// getFilterIDFor - Return the filter id for the specified typeinfos. This is +/// function wide. +int MachineModuleInfo::getFilterIDFor(std::vector &TyIds) { + // If the new filter coincides with the tail of an existing filter, then + // re-use the existing filter. Folding filters more than this requires + // re-ordering filters and/or their elements - probably not worth it. + for (std::vector::iterator I = FilterEnds.begin(), + E = FilterEnds.end(); I != E; ++I) { + unsigned i = *I, j = TyIds.size(); + + while (i && j) + if (FilterIds[--i] != TyIds[--j]) + goto try_next; + + if (!j) + // The new filter coincides with range [i, end) of the existing filter. + return -(1 + i); + +try_next:; + } + + // Add the new filter. + int FilterID = -(1 + FilterIds.size()); + FilterIds.reserve(FilterIds.size() + TyIds.size() + 1); + for (unsigned I = 0, N = TyIds.size(); I != N; ++I) + FilterIds.push_back(TyIds[I]); + FilterEnds.push_back(FilterIds.size()); + FilterIds.push_back(0); // terminator + return FilterID; +} + +/// getPersonality - Return the personality function for the current function. +Function *MachineModuleInfo::getPersonality() const { + // FIXME: Until PR1414 will be fixed, we're using 1 personality function per + // function + return !LandingPads.empty() ? LandingPads[0].Personality : NULL; +} + +/// getPersonalityIndex - Return unique index for current personality +/// function. NULL personality function should always get zero index. +unsigned MachineModuleInfo::getPersonalityIndex() const { + const Function* Personality = NULL; + + // Scan landing pads. If there is at least one non-NULL personality - use it. + for (unsigned i = 0; i != LandingPads.size(); ++i) + if (LandingPads[i].Personality) { + Personality = LandingPads[i].Personality; + break; + } + + for (unsigned i = 0; i < Personalities.size(); ++i) { + if (Personalities[i] == Personality) + return i; + } + + // This should never happen + assert(0 && "Personality function should be set!"); + return 0; +} + +//===----------------------------------------------------------------------===// +/// DebugLabelFolding pass - This pass prunes out redundant labels. This allows +/// a info consumer to determine if the range of two labels is empty, by seeing +/// if the labels map to the same reduced label. + +namespace llvm { + +struct DebugLabelFolder : public MachineFunctionPass { + static char ID; + DebugLabelFolder() : MachineFunctionPass(&ID) {} + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addPreservedID(MachineLoopInfoID); + AU.addPreservedID(MachineDominatorsID); + MachineFunctionPass::getAnalysisUsage(AU); + } + + virtual bool runOnMachineFunction(MachineFunction &MF); + virtual const char *getPassName() const { return "Label Folder"; } +}; + +char DebugLabelFolder::ID = 0; + +bool DebugLabelFolder::runOnMachineFunction(MachineFunction &MF) { + // Get machine module info. + MachineModuleInfo *MMI = getAnalysisIfAvailable(); + if (!MMI) return false; + + // Track if change is made. + bool MadeChange = false; + // No prior label to begin. + unsigned PriorLabel = 0; + + // Iterate through basic blocks. + for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); + BB != E; ++BB) { + // Iterate through instructions. + for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) { + // Is it a label. + if (I->isDebugLabel() && !MMI->isDbgLabelUsed(I->getOperand(0).getImm())){ + // The label ID # is always operand #0, an immediate. + unsigned NextLabel = I->getOperand(0).getImm(); + + // If there was an immediate prior label. + if (PriorLabel) { + // Remap the current label to prior label. + MMI->RemapLabel(NextLabel, PriorLabel); + // Delete the current label. + I = BB->erase(I); + // Indicate a change has been made. + MadeChange = true; + continue; + } else { + // Start a new round. + PriorLabel = NextLabel; + } + } else { + // No consecutive labels. + PriorLabel = 0; + } + + ++I; + } + } + + return MadeChange; +} + +FunctionPass *createDebugLabelFoldingPass() { return new DebugLabelFolder(); } + +} + diff --git a/lib/CodeGen/MachinePassRegistry.cpp b/lib/CodeGen/MachinePassRegistry.cpp new file mode 100644 index 000000000000..9f4ef1287803 --- /dev/null +++ b/lib/CodeGen/MachinePassRegistry.cpp @@ -0,0 +1,41 @@ +//===-- CodeGen/MachineInstr.cpp ------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the machine function pass registry for register allocators +// and instruction schedulers. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachinePassRegistry.h" + +using namespace llvm; + + +/// Add - Adds a function pass to the registration list. +/// +void MachinePassRegistry::Add(MachinePassRegistryNode *Node) { + Node->setNext(List); + List = Node; + if (Listener) Listener->NotifyAdd(Node->getName(), + Node->getCtor(), + Node->getDescription()); +} + + +/// Remove - Removes a function pass from the registration list. +/// +void MachinePassRegistry::Remove(MachinePassRegistryNode *Node) { + for (MachinePassRegistryNode **I = &List; *I; I = (*I)->getNextAddress()) { + if (*I == Node) { + if (Listener) Listener->NotifyRemove(Node->getName()); + *I = (*I)->getNext(); + break; + } + } +} diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp new file mode 100644 index 000000000000..4f5ab1f5860e --- /dev/null +++ b/lib/CodeGen/MachineRegisterInfo.cpp @@ -0,0 +1,125 @@ +//===-- lib/Codegen/MachineRegisterInfo.cpp -------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implementation of the MachineRegisterInfo class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineRegisterInfo.h" +using namespace llvm; + +MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI) { + VRegInfo.reserve(256); + RegClass2VRegMap.resize(TRI.getNumRegClasses()+1); // RC ID starts at 1. + UsedPhysRegs.resize(TRI.getNumRegs()); + + // Create the physreg use/def lists. + PhysRegUseDefLists = new MachineOperand*[TRI.getNumRegs()]; + memset(PhysRegUseDefLists, 0, sizeof(MachineOperand*)*TRI.getNumRegs()); +} + +MachineRegisterInfo::~MachineRegisterInfo() { +#ifndef NDEBUG + for (unsigned i = 0, e = VRegInfo.size(); i != e; ++i) + assert(VRegInfo[i].second == 0 && "Vreg use list non-empty still?"); + for (unsigned i = 0, e = UsedPhysRegs.size(); i != e; ++i) + assert(!PhysRegUseDefLists[i] && + "PhysRegUseDefLists has entries after all instructions are deleted"); +#endif + delete [] PhysRegUseDefLists; +} + +/// setRegClass - Set the register class of the specified virtual register. +/// +void +MachineRegisterInfo::setRegClass(unsigned Reg, const TargetRegisterClass *RC) { + unsigned VR = Reg; + Reg -= TargetRegisterInfo::FirstVirtualRegister; + assert(Reg < VRegInfo.size() && "Invalid vreg!"); + const TargetRegisterClass *OldRC = VRegInfo[Reg].first; + VRegInfo[Reg].first = RC; + + // Remove from old register class's vregs list. This may be slow but + // fortunately this operation is rarely needed. + std::vector &VRegs = RegClass2VRegMap[OldRC->getID()]; + std::vector::iterator I=std::find(VRegs.begin(), VRegs.end(), VR); + VRegs.erase(I); + + // Add to new register class's vregs list. + RegClass2VRegMap[RC->getID()].push_back(VR); +} + +/// createVirtualRegister - Create and return a new virtual register in the +/// function with the specified register class. +/// +unsigned +MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass){ + assert(RegClass && "Cannot create register without RegClass!"); + // Add a reg, but keep track of whether the vector reallocated or not. + void *ArrayBase = VRegInfo.empty() ? 0 : &VRegInfo[0]; + VRegInfo.push_back(std::make_pair(RegClass, (MachineOperand*)0)); + + if (!((&VRegInfo[0] == ArrayBase || VRegInfo.size() == 1))) + // The vector reallocated, handle this now. + HandleVRegListReallocation(); + unsigned VR = getLastVirtReg(); + RegClass2VRegMap[RegClass->getID()].push_back(VR); + return VR; +} + +/// HandleVRegListReallocation - We just added a virtual register to the +/// VRegInfo info list and it reallocated. Update the use/def lists info +/// pointers. +void MachineRegisterInfo::HandleVRegListReallocation() { + // The back pointers for the vreg lists point into the previous vector. + // Update them to point to their correct slots. + for (unsigned i = 0, e = VRegInfo.size(); i != e; ++i) { + MachineOperand *List = VRegInfo[i].second; + if (!List) continue; + // Update the back-pointer to be accurate once more. + List->Contents.Reg.Prev = &VRegInfo[i].second; + } +} + +/// replaceRegWith - Replace all instances of FromReg with ToReg in the +/// machine function. This is like llvm-level X->replaceAllUsesWith(Y), +/// except that it also changes any definitions of the register as well. +void MachineRegisterInfo::replaceRegWith(unsigned FromReg, unsigned ToReg) { + assert(FromReg != ToReg && "Cannot replace a reg with itself"); + + // TODO: This could be more efficient by bulk changing the operands. + for (reg_iterator I = reg_begin(FromReg), E = reg_end(); I != E; ) { + MachineOperand &O = I.getOperand(); + ++I; + O.setReg(ToReg); + } +} + + +/// getVRegDef - Return the machine instr that defines the specified virtual +/// register or null if none is found. This assumes that the code is in SSA +/// form, so there should only be one definition. +MachineInstr *MachineRegisterInfo::getVRegDef(unsigned Reg) const { + assert(Reg-TargetRegisterInfo::FirstVirtualRegister < VRegInfo.size() && + "Invalid vreg!"); + for (reg_iterator I = reg_begin(Reg), E = reg_end(); I != E; ++I) { + // Since we are in SSA form, we can stop at the first definition. + if (I.getOperand().isDef()) + return &*I; + } + return 0; +} + + +#ifndef NDEBUG +void MachineRegisterInfo::dumpUses(unsigned Reg) const { + for (use_iterator I = use_begin(Reg), E = use_end(); I != E; ++I) + I.getOperand().getParent()->dump(); +} +#endif diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp new file mode 100644 index 000000000000..0e18fa742f5b --- /dev/null +++ b/lib/CodeGen/MachineSink.cpp @@ -0,0 +1,257 @@ +//===-- MachineSink.cpp - Sinking for machine instructions ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "machine-sink" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +using namespace llvm; + +STATISTIC(NumSunk, "Number of machine instructions sunk"); + +namespace { + class VISIBILITY_HIDDEN MachineSinking : public MachineFunctionPass { + const TargetMachine *TM; + const TargetInstrInfo *TII; + MachineFunction *CurMF; // Current MachineFunction + MachineRegisterInfo *RegInfo; // Machine register information + MachineDominatorTree *DT; // Machine dominator tree for the current Loop + + public: + static char ID; // Pass identification + MachineSinking() : MachineFunctionPass(&ID) {} + + virtual bool runOnMachineFunction(MachineFunction &MF); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + MachineFunctionPass::getAnalysisUsage(AU); + AU.addRequired(); + AU.addPreserved(); + } + private: + bool ProcessBlock(MachineBasicBlock &MBB); + bool SinkInstruction(MachineInstr *MI, bool &SawStore); + bool AllUsesDominatedByBlock(unsigned Reg, MachineBasicBlock *MBB) const; + }; +} // end anonymous namespace + +char MachineSinking::ID = 0; +static RegisterPass +X("machine-sink", "Machine code sinking"); + +FunctionPass *llvm::createMachineSinkingPass() { return new MachineSinking(); } + +/// AllUsesDominatedByBlock - Return true if all uses of the specified register +/// occur in blocks dominated by the specified block. +bool MachineSinking::AllUsesDominatedByBlock(unsigned Reg, + MachineBasicBlock *MBB) const { + assert(TargetRegisterInfo::isVirtualRegister(Reg) && + "Only makes sense for vregs"); + for (MachineRegisterInfo::reg_iterator I = RegInfo->reg_begin(Reg), + E = RegInfo->reg_end(); I != E; ++I) { + if (I.getOperand().isDef()) continue; // ignore def. + + // Determine the block of the use. + MachineInstr *UseInst = &*I; + MachineBasicBlock *UseBlock = UseInst->getParent(); + if (UseInst->getOpcode() == TargetInstrInfo::PHI) { + // PHI nodes use the operand in the predecessor block, not the block with + // the PHI. + UseBlock = UseInst->getOperand(I.getOperandNo()+1).getMBB(); + } + // Check that it dominates. + if (!DT->dominates(MBB, UseBlock)) + return false; + } + return true; +} + + + +bool MachineSinking::runOnMachineFunction(MachineFunction &MF) { + DOUT << "******** Machine Sinking ********\n"; + + CurMF = &MF; + TM = &CurMF->getTarget(); + TII = TM->getInstrInfo(); + RegInfo = &CurMF->getRegInfo(); + DT = &getAnalysis(); + + bool EverMadeChange = false; + + while (1) { + bool MadeChange = false; + + // Process all basic blocks. + for (MachineFunction::iterator I = CurMF->begin(), E = CurMF->end(); + I != E; ++I) + MadeChange |= ProcessBlock(*I); + + // If this iteration over the code changed anything, keep iterating. + if (!MadeChange) break; + EverMadeChange = true; + } + return EverMadeChange; +} + +bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) { + // Can't sink anything out of a block that has less than two successors. + if (MBB.succ_size() <= 1 || MBB.empty()) return false; + + bool MadeChange = false; + + // Walk the basic block bottom-up. Remember if we saw a store. + MachineBasicBlock::iterator I = MBB.end(); + --I; + bool ProcessedBegin, SawStore = false; + do { + MachineInstr *MI = I; // The instruction to sink. + + // Predecrement I (if it's not begin) so that it isn't invalidated by + // sinking. + ProcessedBegin = I == MBB.begin(); + if (!ProcessedBegin) + --I; + + if (SinkInstruction(MI, SawStore)) + ++NumSunk, MadeChange = true; + + // If we just processed the first instruction in the block, we're done. + } while (!ProcessedBegin); + + return MadeChange; +} + +/// SinkInstruction - Determine whether it is safe to sink the specified machine +/// instruction out of its current block into a successor. +bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { + // Check if it's safe to move the instruction. + if (!MI->isSafeToMove(TII, SawStore)) + return false; + + // FIXME: This should include support for sinking instructions within the + // block they are currently in to shorten the live ranges. We often get + // instructions sunk into the top of a large block, but it would be better to + // also sink them down before their first use in the block. This xform has to + // be careful not to *increase* register pressure though, e.g. sinking + // "x = y + z" down if it kills y and z would increase the live ranges of y + // and z only the shrink the live range of x. + + // Loop over all the operands of the specified instruction. If there is + // anything we can't handle, bail out. + MachineBasicBlock *ParentBlock = MI->getParent(); + + // SuccToSinkTo - This is the successor to sink this instruction to, once we + // decide. + MachineBasicBlock *SuccToSinkTo = 0; + + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) continue; // Ignore non-register operands. + + unsigned Reg = MO.getReg(); + if (Reg == 0) continue; + + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + // If this is a physical register use, we can't move it. If it is a def, + // we can move it, but only if the def is dead. + if (MO.isUse() || !MO.isDead()) + return false; + } else { + // Virtual register uses are always safe to sink. + if (MO.isUse()) continue; + + // If it's not safe to move defs of the register class, then abort. + if (!TII->isSafeToMoveRegClassDefs(RegInfo->getRegClass(Reg))) + return false; + + // FIXME: This picks a successor to sink into based on having one + // successor that dominates all the uses. However, there are cases where + // sinking can happen but where the sink point isn't a successor. For + // example: + // x = computation + // if () {} else {} + // use x + // the instruction could be sunk over the whole diamond for the + // if/then/else (or loop, etc), allowing it to be sunk into other blocks + // after that. + + // Virtual register defs can only be sunk if all their uses are in blocks + // dominated by one of the successors. + if (SuccToSinkTo) { + // If a previous operand picked a block to sink to, then this operand + // must be sinkable to the same block. + if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo)) + return false; + continue; + } + + // Otherwise, we should look at all the successors and decide which one + // we should sink to. + for (MachineBasicBlock::succ_iterator SI = ParentBlock->succ_begin(), + E = ParentBlock->succ_end(); SI != E; ++SI) { + if (AllUsesDominatedByBlock(Reg, *SI)) { + SuccToSinkTo = *SI; + break; + } + } + + // If we couldn't find a block to sink to, ignore this instruction. + if (SuccToSinkTo == 0) + return false; + } + } + + // If there are no outputs, it must have side-effects. + if (SuccToSinkTo == 0) + return false; + + // It's not safe to sink instructions to EH landing pad. Control flow into + // landing pad is implicitly defined. + if (SuccToSinkTo->isLandingPad()) + return false; + + // If is not possible to sink an instruction into its own block. This can + // happen with loops. + if (MI->getParent() == SuccToSinkTo) + return false; + + DEBUG(cerr << "Sink instr " << *MI); + DEBUG(cerr << "to block " << *SuccToSinkTo); + + // If the block has multiple predecessors, this would introduce computation on + // a path that it doesn't already exist. We could split the critical edge, + // but for now we just punt. + // FIXME: Split critical edges if not backedges. + if (SuccToSinkTo->pred_size() > 1) { + DEBUG(cerr << " *** PUNTING: Critical edge found\n"); + return false; + } + + // Determine where to insert into. Skip phi nodes. + MachineBasicBlock::iterator InsertPos = SuccToSinkTo->begin(); + while (InsertPos != SuccToSinkTo->end() && + InsertPos->getOpcode() == TargetInstrInfo::PHI) + ++InsertPos; + + // Move the instruction. + SuccToSinkTo->splice(InsertPos, ParentBlock, MI, + ++MachineBasicBlock::iterator(MI)); + return true; +} diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp new file mode 100644 index 000000000000..be1396c7a810 --- /dev/null +++ b/lib/CodeGen/MachineVerifier.cpp @@ -0,0 +1,690 @@ +//===-- MachineVerifier.cpp - Machine Code Verifier -------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Pass to verify generated machine code. The following is checked: +// +// Operand counts: All explicit operands must be present. +// +// Register classes: All physical and virtual register operands must be +// compatible with the register class required by the instruction descriptor. +// +// Register live intervals: Registers must be defined only once, and must be +// defined before use. +// +// The machine code verifier is enabled from LLVMTargetMachine.cpp with the +// command-line option -verify-machineinstrs, or by defining the environment +// variable LLVM_VERIFY_MACHINEINSTRS to the name of a file that will receive +// the verifier errors. +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SetOperations.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Function.h" +#include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include + +using namespace llvm; + +namespace { + struct VISIBILITY_HIDDEN MachineVerifier : public MachineFunctionPass { + static char ID; // Pass ID, replacement for typeid + + MachineVerifier(bool allowDoubleDefs = false) : + MachineFunctionPass(&ID), + allowVirtDoubleDefs(allowDoubleDefs), + allowPhysDoubleDefs(allowDoubleDefs), + OutFileName(getenv("LLVM_VERIFY_MACHINEINSTRS")) + {} + + void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + } + + bool runOnMachineFunction(MachineFunction &MF); + + const bool allowVirtDoubleDefs; + const bool allowPhysDoubleDefs; + + const char *const OutFileName; + std::ostream *OS; + const MachineFunction *MF; + const TargetMachine *TM; + const TargetRegisterInfo *TRI; + const MachineRegisterInfo *MRI; + + unsigned foundErrors; + + typedef SmallVector RegVector; + typedef DenseSet RegSet; + typedef DenseMap RegMap; + + BitVector regsReserved; + RegSet regsLive; + RegVector regsDefined, regsImpDefined, regsDead, regsKilled; + + // Add Reg and any sub-registers to RV + void addRegWithSubRegs(RegVector &RV, unsigned Reg) { + RV.push_back(Reg); + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + for (const unsigned *R = TRI->getSubRegisters(Reg); *R; R++) + RV.push_back(*R); + } + + // Does RS contain any super-registers of Reg? + bool anySuperRegisters(const RegSet &RS, unsigned Reg) { + for (const unsigned *R = TRI->getSuperRegisters(Reg); *R; R++) + if (RS.count(*R)) + return true; + return false; + } + + struct BBInfo { + // Is this MBB reachable from the MF entry point? + bool reachable; + + // Vregs that must be live in because they are used without being + // defined. Map value is the user. + RegMap vregsLiveIn; + + // Vregs that must be dead in because they are defined without being + // killed first. Map value is the defining instruction. + RegMap vregsDeadIn; + + // Regs killed in MBB. They may be defined again, and will then be in both + // regsKilled and regsLiveOut. + RegSet regsKilled; + + // Regs defined in MBB and live out. Note that vregs passing through may + // be live out without being mentioned here. + RegSet regsLiveOut; + + // Vregs that pass through MBB untouched. This set is disjoint from + // regsKilled and regsLiveOut. + RegSet vregsPassed; + + BBInfo() : reachable(false) {} + + // Add register to vregsPassed if it belongs there. Return true if + // anything changed. + bool addPassed(unsigned Reg) { + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + return false; + if (regsKilled.count(Reg) || regsLiveOut.count(Reg)) + return false; + return vregsPassed.insert(Reg).second; + } + + // Same for a full set. + bool addPassed(const RegSet &RS) { + bool changed = false; + for (RegSet::const_iterator I = RS.begin(), E = RS.end(); I != E; ++I) + if (addPassed(*I)) + changed = true; + return changed; + } + + // Live-out registers are either in regsLiveOut or vregsPassed. + bool isLiveOut(unsigned Reg) const { + return regsLiveOut.count(Reg) || vregsPassed.count(Reg); + } + }; + + // Extra register info per MBB. + DenseMap MBBInfoMap; + + bool isReserved(unsigned Reg) { + return Reg < regsReserved.size() && regsReserved[Reg]; + } + + void visitMachineFunctionBefore(); + void visitMachineBasicBlockBefore(const MachineBasicBlock *MBB); + void visitMachineInstrBefore(const MachineInstr *MI); + void visitMachineOperand(const MachineOperand *MO, unsigned MONum); + void visitMachineInstrAfter(const MachineInstr *MI); + void visitMachineBasicBlockAfter(const MachineBasicBlock *MBB); + void visitMachineFunctionAfter(); + + void report(const char *msg, const MachineFunction *MF); + void report(const char *msg, const MachineBasicBlock *MBB); + void report(const char *msg, const MachineInstr *MI); + void report(const char *msg, const MachineOperand *MO, unsigned MONum); + + void markReachable(const MachineBasicBlock *MBB); + void calcMaxRegsPassed(); + void calcMinRegsPassed(); + void checkPHIOps(const MachineBasicBlock *MBB); + }; +} + +char MachineVerifier::ID = 0; +static RegisterPass +MachineVer("machineverifier", "Verify generated machine code"); +static const PassInfo *const MachineVerifyID = &MachineVer; + +FunctionPass * +llvm::createMachineVerifierPass(bool allowPhysDoubleDefs) +{ + return new MachineVerifier(allowPhysDoubleDefs); +} + +bool +MachineVerifier::runOnMachineFunction(MachineFunction &MF) +{ + std::ofstream OutFile; + if (OutFileName) { + OutFile.open(OutFileName, std::ios::out | std::ios::app); + OS = &OutFile; + } else { + OS = cerr.stream(); + } + + foundErrors = 0; + + this->MF = &MF; + TM = &MF.getTarget(); + TRI = TM->getRegisterInfo(); + MRI = &MF.getRegInfo(); + + visitMachineFunctionBefore(); + for (MachineFunction::const_iterator MFI = MF.begin(), MFE = MF.end(); + MFI!=MFE; ++MFI) { + visitMachineBasicBlockBefore(MFI); + for (MachineBasicBlock::const_iterator MBBI = MFI->begin(), + MBBE = MFI->end(); MBBI != MBBE; ++MBBI) { + visitMachineInstrBefore(MBBI); + for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) + visitMachineOperand(&MBBI->getOperand(I), I); + visitMachineInstrAfter(MBBI); + } + visitMachineBasicBlockAfter(MFI); + } + visitMachineFunctionAfter(); + + if (OutFileName) + OutFile.close(); + + if (foundErrors) { + cerr << "\nStopping with " << foundErrors << " machine code errors.\n"; + exit(1); + } + + return false; // no changes +} + +void +MachineVerifier::report(const char *msg, const MachineFunction *MF) +{ + assert(MF); + *OS << "\n"; + if (!foundErrors++) + MF->print(OS); + *OS << "*** Bad machine code: " << msg << " ***\n" + << "- function: " << MF->getFunction()->getName() << "\n"; +} + +void +MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB) +{ + assert(MBB); + report(msg, MBB->getParent()); + *OS << "- basic block: " << MBB->getBasicBlock()->getName() + << " " << (void*)MBB + << " (#" << MBB->getNumber() << ")\n"; +} + +void +MachineVerifier::report(const char *msg, const MachineInstr *MI) +{ + assert(MI); + report(msg, MI->getParent()); + *OS << "- instruction: "; + MI->print(OS, TM); +} + +void +MachineVerifier::report(const char *msg, + const MachineOperand *MO, unsigned MONum) +{ + assert(MO); + report(msg, MO->getParent()); + *OS << "- operand " << MONum << ": "; + MO->print(*OS, TM); + *OS << "\n"; +} + +void +MachineVerifier::markReachable(const MachineBasicBlock *MBB) +{ + BBInfo &MInfo = MBBInfoMap[MBB]; + if (!MInfo.reachable) { + MInfo.reachable = true; + for (MachineBasicBlock::const_succ_iterator SuI = MBB->succ_begin(), + SuE = MBB->succ_end(); SuI != SuE; ++SuI) + markReachable(*SuI); + } +} + +void +MachineVerifier::visitMachineFunctionBefore() +{ + regsReserved = TRI->getReservedRegs(*MF); + markReachable(&MF->front()); +} + +void +MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) +{ + regsLive.clear(); + for (MachineBasicBlock::const_livein_iterator I = MBB->livein_begin(), + E = MBB->livein_end(); I != E; ++I) { + if (!TargetRegisterInfo::isPhysicalRegister(*I)) { + report("MBB live-in list contains non-physical register", MBB); + continue; + } + regsLive.insert(*I); + for (const unsigned *R = TRI->getSubRegisters(*I); *R; R++) + regsLive.insert(*R); + } + regsKilled.clear(); + regsDefined.clear(); + regsImpDefined.clear(); +} + +void +MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) +{ + const TargetInstrDesc &TI = MI->getDesc(); + if (MI->getNumExplicitOperands() < TI.getNumOperands()) { + report("Too few operands", MI); + *OS << TI.getNumOperands() << " operands expected, but " + << MI->getNumExplicitOperands() << " given.\n"; + } + if (!TI.isVariadic()) { + if (MI->getNumExplicitOperands() > TI.getNumOperands()) { + report("Too many operands", MI); + *OS << TI.getNumOperands() << " operands expected, but " + << MI->getNumExplicitOperands() << " given.\n"; + } + } +} + +void +MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) +{ + const MachineInstr *MI = MO->getParent(); + const TargetInstrDesc &TI = MI->getDesc(); + + // The first TI.NumDefs operands must be explicit register defines + if (MONum < TI.getNumDefs()) { + if (!MO->isReg()) + report("Explicit definition must be a register", MO, MONum); + else if (!MO->isDef()) + report("Explicit definition marked as use", MO, MONum); + else if (MO->isImplicit()) + report("Explicit definition marked as implicit", MO, MONum); + } + + switch (MO->getType()) { + case MachineOperand::MO_Register: { + const unsigned Reg = MO->getReg(); + if (!Reg) + return; + + // Check Live Variables. + if (MO->isUse()) { + if (MO->isKill()) { + addRegWithSubRegs(regsKilled, Reg); + } else { + // TwoAddress instr modyfying a reg is treated as kill+def. + unsigned defIdx; + if (MI->isRegTiedToDefOperand(MONum, &defIdx) && + MI->getOperand(defIdx).getReg() == Reg) + addRegWithSubRegs(regsKilled, Reg); + } + // Explicit use of a dead register. + if (!MO->isImplicit() && !regsLive.count(Reg)) { + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + // Reserved registers may be used even when 'dead'. + if (!isReserved(Reg)) + report("Using an undefined physical register", MO, MONum); + } else { + BBInfo &MInfo = MBBInfoMap[MI->getParent()]; + // We don't know which virtual registers are live in, so only complain + // if vreg was killed in this MBB. Otherwise keep track of vregs that + // must be live in. PHI instructions are handled separately. + if (MInfo.regsKilled.count(Reg)) + report("Using a killed virtual register", MO, MONum); + else if (MI->getOpcode() != TargetInstrInfo::PHI) + MInfo.vregsLiveIn.insert(std::make_pair(Reg, MI)); + } + } + } else { + // Register defined. + // TODO: verify that earlyclobber ops are not used. + if (MO->isImplicit()) + addRegWithSubRegs(regsImpDefined, Reg); + else + addRegWithSubRegs(regsDefined, Reg); + + if (MO->isDead()) + addRegWithSubRegs(regsDead, Reg); + } + + // Check register classes. + if (MONum < TI.getNumOperands() && !MO->isImplicit()) { + const TargetOperandInfo &TOI = TI.OpInfo[MONum]; + unsigned SubIdx = MO->getSubReg(); + + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + unsigned sr = Reg; + if (SubIdx) { + unsigned s = TRI->getSubReg(Reg, SubIdx); + if (!s) { + report("Invalid subregister index for physical register", + MO, MONum); + return; + } + sr = s; + } + if (TOI.RegClass) { + const TargetRegisterClass *DRC = TRI->getRegClass(TOI.RegClass); + if (!DRC->contains(sr)) { + report("Illegal physical register for instruction", MO, MONum); + *OS << TRI->getName(sr) << " is not a " + << DRC->getName() << " register.\n"; + } + } + } else { + // Virtual register. + const TargetRegisterClass *RC = MRI->getRegClass(Reg); + if (SubIdx) { + if (RC->subregclasses_begin()+SubIdx >= RC->subregclasses_end()) { + report("Invalid subregister index for virtual register", MO, MONum); + return; + } + RC = *(RC->subregclasses_begin()+SubIdx); + } + if (TOI.RegClass) { + const TargetRegisterClass *DRC = TRI->getRegClass(TOI.RegClass); + if (RC != DRC && !RC->hasSuperClass(DRC)) { + report("Illegal virtual register for instruction", MO, MONum); + *OS << "Expected a " << DRC->getName() << " register, but got a " + << RC->getName() << " register\n"; + } + } + } + } + break; + } + // Can PHI instrs refer to MBBs not in the CFG? X86 and ARM do. + // case MachineOperand::MO_MachineBasicBlock: + // if (MI->getOpcode() == TargetInstrInfo::PHI) { + // if (!MO->getMBB()->isSuccessor(MI->getParent())) + // report("PHI operand is not in the CFG", MO, MONum); + // } + // break; + default: + break; + } +} + +void +MachineVerifier::visitMachineInstrAfter(const MachineInstr *MI) +{ + BBInfo &MInfo = MBBInfoMap[MI->getParent()]; + set_union(MInfo.regsKilled, regsKilled); + set_subtract(regsLive, regsKilled); + regsKilled.clear(); + + for (RegVector::const_iterator I = regsDefined.begin(), + E = regsDefined.end(); I != E; ++I) { + if (regsLive.count(*I)) { + if (TargetRegisterInfo::isPhysicalRegister(*I)) { + // We allow double defines to physical registers with live + // super-registers. + if (!allowPhysDoubleDefs && !isReserved(*I) && + !anySuperRegisters(regsLive, *I)) { + report("Redefining a live physical register", MI); + *OS << "Register " << TRI->getName(*I) + << " was defined but already live.\n"; + } + } else { + if (!allowVirtDoubleDefs) { + report("Redefining a live virtual register", MI); + *OS << "Virtual register %reg" << *I + << " was defined but already live.\n"; + } + } + } else if (TargetRegisterInfo::isVirtualRegister(*I) && + !MInfo.regsKilled.count(*I)) { + // Virtual register defined without being killed first must be dead on + // entry. + MInfo.vregsDeadIn.insert(std::make_pair(*I, MI)); + } + } + + set_union(regsLive, regsDefined); regsDefined.clear(); + set_union(regsLive, regsImpDefined); regsImpDefined.clear(); + set_subtract(regsLive, regsDead); regsDead.clear(); +} + +void +MachineVerifier::visitMachineBasicBlockAfter(const MachineBasicBlock *MBB) +{ + MBBInfoMap[MBB].regsLiveOut = regsLive; + regsLive.clear(); +} + +// Calculate the largest possible vregsPassed sets. These are the registers that +// can pass through an MBB live, but may not be live every time. It is assumed +// that all vregsPassed sets are empty before the call. +void +MachineVerifier::calcMaxRegsPassed() +{ + // First push live-out regs to successors' vregsPassed. Remember the MBBs that + // have any vregsPassed. + DenseSet todo; + for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end(); + MFI != MFE; ++MFI) { + const MachineBasicBlock &MBB(*MFI); + BBInfo &MInfo = MBBInfoMap[&MBB]; + if (!MInfo.reachable) + continue; + for (MachineBasicBlock::const_succ_iterator SuI = MBB.succ_begin(), + SuE = MBB.succ_end(); SuI != SuE; ++SuI) { + BBInfo &SInfo = MBBInfoMap[*SuI]; + if (SInfo.addPassed(MInfo.regsLiveOut)) + todo.insert(*SuI); + } + } + + // Iteratively push vregsPassed to successors. This will converge to the same + // final state regardless of DenseSet iteration order. + while (!todo.empty()) { + const MachineBasicBlock *MBB = *todo.begin(); + todo.erase(MBB); + BBInfo &MInfo = MBBInfoMap[MBB]; + for (MachineBasicBlock::const_succ_iterator SuI = MBB->succ_begin(), + SuE = MBB->succ_end(); SuI != SuE; ++SuI) { + if (*SuI == MBB) + continue; + BBInfo &SInfo = MBBInfoMap[*SuI]; + if (SInfo.addPassed(MInfo.vregsPassed)) + todo.insert(*SuI); + } + } +} + +// Calculate the minimum vregsPassed set. These are the registers that always +// pass live through an MBB. The calculation assumes that calcMaxRegsPassed has +// been called earlier. +void +MachineVerifier::calcMinRegsPassed() +{ + DenseSet todo; + for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end(); + MFI != MFE; ++MFI) + todo.insert(MFI); + + while (!todo.empty()) { + const MachineBasicBlock *MBB = *todo.begin(); + todo.erase(MBB); + BBInfo &MInfo = MBBInfoMap[MBB]; + + // Remove entries from vRegsPassed that are not live out from all + // reachable predecessors. + RegSet dead; + for (RegSet::iterator I = MInfo.vregsPassed.begin(), + E = MInfo.vregsPassed.end(); I != E; ++I) { + for (MachineBasicBlock::const_pred_iterator PrI = MBB->pred_begin(), + PrE = MBB->pred_end(); PrI != PrE; ++PrI) { + BBInfo &PrInfo = MBBInfoMap[*PrI]; + if (PrInfo.reachable && !PrInfo.isLiveOut(*I)) { + dead.insert(*I); + break; + } + } + } + // If any regs removed, we need to recheck successors. + if (!dead.empty()) { + set_subtract(MInfo.vregsPassed, dead); + todo.insert(MBB->succ_begin(), MBB->succ_end()); + } + } +} + +// Check PHI instructions at the beginning of MBB. It is assumed that +// calcMinRegsPassed has been run so BBInfo::isLiveOut is valid. +void +MachineVerifier::checkPHIOps(const MachineBasicBlock *MBB) +{ + for (MachineBasicBlock::const_iterator BBI = MBB->begin(), BBE = MBB->end(); + BBI != BBE && BBI->getOpcode() == TargetInstrInfo::PHI; ++BBI) { + DenseSet seen; + + for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) { + unsigned Reg = BBI->getOperand(i).getReg(); + const MachineBasicBlock *Pre = BBI->getOperand(i + 1).getMBB(); + if (!Pre->isSuccessor(MBB)) + continue; + seen.insert(Pre); + BBInfo &PrInfo = MBBInfoMap[Pre]; + if (PrInfo.reachable && !PrInfo.isLiveOut(Reg)) + report("PHI operand is not live-out from predecessor", + &BBI->getOperand(i), i); + } + + // Did we see all predecessors? + for (MachineBasicBlock::const_pred_iterator PrI = MBB->pred_begin(), + PrE = MBB->pred_end(); PrI != PrE; ++PrI) { + if (!seen.count(*PrI)) { + report("Missing PHI operand", BBI); + *OS << "MBB #" << (*PrI)->getNumber() + << " is a predecessor according to the CFG.\n"; + } + } + } +} + +void +MachineVerifier::visitMachineFunctionAfter() +{ + calcMaxRegsPassed(); + + // With the maximal set of vregsPassed we can verify dead-in registers. + for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end(); + MFI != MFE; ++MFI) { + BBInfo &MInfo = MBBInfoMap[MFI]; + + // Skip unreachable MBBs. + if (!MInfo.reachable) + continue; + + for (MachineBasicBlock::const_pred_iterator PrI = MFI->pred_begin(), + PrE = MFI->pred_end(); PrI != PrE; ++PrI) { + BBInfo &PrInfo = MBBInfoMap[*PrI]; + if (!PrInfo.reachable) + continue; + + // Verify physical live-ins. EH landing pads have magic live-ins so we + // ignore them. + if (!MFI->isLandingPad()) { + for (MachineBasicBlock::const_livein_iterator I = MFI->livein_begin(), + E = MFI->livein_end(); I != E; ++I) { + if (TargetRegisterInfo::isPhysicalRegister(*I) && + !isReserved (*I) && !PrInfo.isLiveOut(*I)) { + report("Live-in physical register is not live-out from predecessor", + MFI); + *OS << "Register " << TRI->getName(*I) + << " is not live-out from MBB #" << (*PrI)->getNumber() + << ".\n"; + } + } + } + + + // Verify dead-in virtual registers. + if (!allowVirtDoubleDefs) { + for (RegMap::iterator I = MInfo.vregsDeadIn.begin(), + E = MInfo.vregsDeadIn.end(); I != E; ++I) { + // DeadIn register must be in neither regsLiveOut or vregsPassed of + // any predecessor. + if (PrInfo.isLiveOut(I->first)) { + report("Live-in virtual register redefined", I->second); + *OS << "Register %reg" << I->first + << " was live-out from predecessor MBB #" + << (*PrI)->getNumber() << ".\n"; + } + } + } + } + } + + calcMinRegsPassed(); + + // With the minimal set of vregsPassed we can verify live-in virtual + // registers, including PHI instructions. + for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end(); + MFI != MFE; ++MFI) { + BBInfo &MInfo = MBBInfoMap[MFI]; + + // Skip unreachable MBBs. + if (!MInfo.reachable) + continue; + + checkPHIOps(MFI); + + for (MachineBasicBlock::const_pred_iterator PrI = MFI->pred_begin(), + PrE = MFI->pred_end(); PrI != PrE; ++PrI) { + BBInfo &PrInfo = MBBInfoMap[*PrI]; + if (!PrInfo.reachable) + continue; + + for (RegMap::iterator I = MInfo.vregsLiveIn.begin(), + E = MInfo.vregsLiveIn.end(); I != E; ++I) { + if (!PrInfo.isLiveOut(I->first)) { + report("Used virtual register is not live-in", I->second); + *OS << "Register %reg" << I->first + << " is not live-out from predecessor MBB #" + << (*PrI)->getNumber() + << ".\n"; + } + } + } + } +} diff --git a/lib/CodeGen/Makefile b/lib/CodeGen/Makefile new file mode 100644 index 000000000000..4ab3e3c0013e --- /dev/null +++ b/lib/CodeGen/Makefile @@ -0,0 +1,22 @@ +##===- lib/CodeGen/Makefile --------------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../.. +LIBRARYNAME = LLVMCodeGen +PARALLEL_DIRS = SelectionDAG AsmPrinter +BUILD_ARCHIVE = 1 + +include $(LEVEL)/Makefile.common + +# Xcode prior to 2.4 generates an error in -pedantic mode with use of HUGE_VAL +# in this directory. Disable -pedantic for this broken compiler. +ifneq ($(HUGE_VAL_SANITY),yes) +CompileCommonOpts := $(filter-out -pedantic, $(CompileCommonOpts)) +endif + diff --git a/lib/CodeGen/OcamlGC.cpp b/lib/CodeGen/OcamlGC.cpp new file mode 100644 index 000000000000..f7bc9f3d31dd --- /dev/null +++ b/lib/CodeGen/OcamlGC.cpp @@ -0,0 +1,38 @@ +//===-- OcamlGC.cpp - Ocaml frametable GC strategy ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements lowering for the llvm.gc* intrinsics compatible with +// Objective Caml 3.10.0, which uses a liveness-accurate static stack map. +// +// The frametable emitter is in OcamlGCPrinter.cpp. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GCs.h" +#include "llvm/CodeGen/GCStrategy.h" +#include "llvm/Support/Compiler.h" + +using namespace llvm; + +namespace { + class VISIBILITY_HIDDEN OcamlGC : public GCStrategy { + public: + OcamlGC(); + }; +} + +static GCRegistry::Add +X("ocaml", "ocaml 3.10-compatible GC"); + +void llvm::linkOcamlGC() { } + +OcamlGC::OcamlGC() { + NeededSafePoints = 1 << GC::PostCall; + UsesMetadata = true; +} diff --git a/lib/CodeGen/PBQP.cpp b/lib/CodeGen/PBQP.cpp new file mode 100644 index 000000000000..562300f94e1f --- /dev/null +++ b/lib/CodeGen/PBQP.cpp @@ -0,0 +1,1395 @@ +//===---------------- PBQP.cpp --------- PBQP Solver ------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Developed by: Bernhard Scholz +// The University of Sydney +// http://www.it.usyd.edu.au/~scholz +//===----------------------------------------------------------------------===// + +#include "PBQP.h" +#include "llvm/Config/alloca.h" +#include +#include +#include + +namespace llvm { + +/************************************************************************** + * Data Structures + **************************************************************************/ + +/* edge of PBQP graph */ +typedef struct adjnode { + struct adjnode *prev, /* doubly chained list */ + *succ, + *reverse; /* reverse edge */ + int adj; /* adj. node */ + PBQPMatrix *costs; /* cost matrix of edge */ + + bool tc_valid; /* flag whether following fields are valid */ + int *tc_safe_regs; /* safe registers */ + int tc_impact; /* impact */ +} adjnode; + +/* bucket node */ +typedef struct bucketnode { + struct bucketnode *prev; /* doubly chained list */ + struct bucketnode *succ; + int u; /* node */ +} bucketnode; + +/* data structure of partitioned boolean quadratic problem */ +struct pbqp { + int num_nodes; /* number of nodes */ + int max_deg; /* maximal degree of a node */ + bool solved; /* flag that indicates whether PBQP has been solved yet */ + bool optimal; /* flag that indicates whether PBQP is optimal */ + PBQPNum min; + bool changed; /* flag whether graph has changed in simplification */ + + /* node fields */ + PBQPVector **node_costs; /* cost vectors of nodes */ + int *node_deg; /* node degree of nodes */ + int *solution; /* solution for node */ + adjnode **adj_list; /* adj. list */ + bucketnode **bucket_ptr; /* bucket pointer of a node */ + + /* node stack */ + int *stack; /* stack of nodes */ + int stack_ptr; /* stack pointer */ + + /* bucket fields */ + bucketnode **bucket_list; /* bucket list */ + + int num_r0; /* counters for number statistics */ + int num_ri; + int num_rii; + int num_rn; + int num_rn_special; +}; + +bool isInf(PBQPNum n) { return n == std::numeric_limits::infinity(); } + +/***************************************************************************** + * allocation/de-allocation of pbqp problem + ****************************************************************************/ + +/* allocate new partitioned boolean quadratic program problem */ +pbqp *alloc_pbqp(int num_nodes) +{ + pbqp *this_; + int u; + + assert(num_nodes > 0); + + /* allocate memory for pbqp data structure */ + this_ = (pbqp *)malloc(sizeof(pbqp)); + + /* Initialize pbqp fields */ + this_->num_nodes = num_nodes; + this_->solved = false; + this_->optimal = true; + this_->min = 0.0; + this_->max_deg = 0; + this_->changed = false; + this_->num_r0 = 0; + this_->num_ri = 0; + this_->num_rii = 0; + this_->num_rn = 0; + this_->num_rn_special = 0; + + /* initialize/allocate stack fields of pbqp */ + this_->stack = (int *) malloc(sizeof(int)*num_nodes); + this_->stack_ptr = 0; + + /* initialize/allocate node fields of pbqp */ + this_->adj_list = (adjnode **) malloc(sizeof(adjnode *)*num_nodes); + this_->node_deg = (int *) malloc(sizeof(int)*num_nodes); + this_->solution = (int *) malloc(sizeof(int)*num_nodes); + this_->bucket_ptr = (bucketnode **) malloc(sizeof(bucketnode **)*num_nodes); + this_->node_costs = (PBQPVector**) malloc(sizeof(PBQPVector*) * num_nodes); + for(u=0;usolution[u]=-1; + this_->adj_list[u]=NULL; + this_->node_deg[u]=0; + this_->bucket_ptr[u]=NULL; + this_->node_costs[u]=NULL; + } + + /* initialize bucket list */ + this_->bucket_list = NULL; + + return this_; +} + +/* free pbqp problem */ +void free_pbqp(pbqp *this_) +{ + int u; + int deg; + adjnode *adj_ptr,*adj_next; + bucketnode *bucket,*bucket_next; + + assert(this_ != NULL); + + /* free node cost fields */ + for(u=0;u < this_->num_nodes;u++) { + delete this_->node_costs[u]; + } + free(this_->node_costs); + + /* free bucket list */ + for(deg=0;deg<=this_->max_deg;deg++) { + for(bucket=this_->bucket_list[deg];bucket!=NULL;bucket=bucket_next) { + this_->bucket_ptr[bucket->u] = NULL; + bucket_next = bucket-> succ; + free(bucket); + } + } + free(this_->bucket_list); + + /* free adj. list */ + assert(this_->adj_list != NULL); + for(u=0;u < this_->num_nodes; u++) { + for(adj_ptr = this_->adj_list[u]; adj_ptr != NULL; adj_ptr = adj_next) { + adj_next = adj_ptr -> succ; + if (u < adj_ptr->adj) { + assert(adj_ptr != NULL); + delete adj_ptr->costs; + } + if (adj_ptr -> tc_safe_regs != NULL) { + free(adj_ptr -> tc_safe_regs); + } + free(adj_ptr); + } + } + free(this_->adj_list); + + /* free other node fields */ + free(this_->node_deg); + free(this_->solution); + free(this_->bucket_ptr); + + /* free stack */ + free(this_->stack); + + /* free pbqp data structure itself */ + free(this_); +} + + +/**************************************************************************** + * adj. node routines + ****************************************************************************/ + +/* find data structure of adj. node of a given node */ +static +adjnode *find_adjnode(pbqp *this_,int u,int v) +{ + adjnode *adj_ptr; + + assert (this_ != NULL); + assert (u >= 0 && u < this_->num_nodes); + assert (v >= 0 && v < this_->num_nodes); + assert(this_->adj_list != NULL); + + for(adj_ptr = this_ -> adj_list[u];adj_ptr != NULL; adj_ptr = adj_ptr -> succ) { + if (adj_ptr->adj == v) { + return adj_ptr; + } + } + return NULL; +} + +/* allocate a new data structure for adj. node */ +static +adjnode *alloc_adjnode(pbqp *this_,int u, PBQPMatrix *costs) +{ + adjnode *p; + + assert(this_ != NULL); + assert(costs != NULL); + assert(u >= 0 && u < this_->num_nodes); + + p = (adjnode *)malloc(sizeof(adjnode)); + assert(p != NULL); + + p->adj = u; + p->costs = costs; + + p->tc_valid= false; + p->tc_safe_regs = NULL; + p->tc_impact = 0; + + return p; +} + +/* insert adjacence node to adj. list */ +static +void insert_adjnode(pbqp *this_, int u, adjnode *adj_ptr) +{ + + assert(this_ != NULL); + assert(adj_ptr != NULL); + assert(u >= 0 && u < this_->num_nodes); + + /* if adjacency list of node is not empty -> update + first node of the list */ + if (this_ -> adj_list[u] != NULL) { + assert(this_->adj_list[u]->prev == NULL); + this_->adj_list[u] -> prev = adj_ptr; + } + + /* update doubly chained list pointers of pointers */ + adj_ptr -> succ = this_->adj_list[u]; + adj_ptr -> prev = NULL; + + /* update adjacency list pointer of node u */ + this_->adj_list[u] = adj_ptr; +} + +/* remove entry in an adj. list */ +static +void remove_adjnode(pbqp *this_, int u, adjnode *adj_ptr) +{ + assert(this_!= NULL); + assert(u >= 0 && u <= this_->num_nodes); + assert(this_->adj_list != NULL); + assert(adj_ptr != NULL); + + if (adj_ptr -> prev == NULL) { + this_->adj_list[u] = adj_ptr -> succ; + } else { + adj_ptr -> prev -> succ = adj_ptr -> succ; + } + + if (adj_ptr -> succ != NULL) { + adj_ptr -> succ -> prev = adj_ptr -> prev; + } + + if(adj_ptr->reverse != NULL) { + adjnode *rev = adj_ptr->reverse; + rev->reverse = NULL; + } + + if (adj_ptr -> tc_safe_regs != NULL) { + free(adj_ptr -> tc_safe_regs); + } + + free(adj_ptr); +} + +/***************************************************************************** + * node functions + ****************************************************************************/ + +/* get degree of a node */ +static +int get_deg(pbqp *this_,int u) +{ + adjnode *adj_ptr; + int deg = 0; + + assert(this_ != NULL); + assert(u >= 0 && u < this_->num_nodes); + assert(this_->adj_list != NULL); + + for(adj_ptr = this_ -> adj_list[u];adj_ptr != NULL; adj_ptr = adj_ptr -> succ) { + deg ++; + } + return deg; +} + +/* reinsert node */ +static +void reinsert_node(pbqp *this_,int u) +{ + adjnode *adj_u, + *adj_v; + + assert(this_!= NULL); + assert(u >= 0 && u <= this_->num_nodes); + assert(this_->adj_list != NULL); + + for(adj_u = this_ -> adj_list[u]; adj_u != NULL; adj_u = adj_u -> succ) { + int v = adj_u -> adj; + adj_v = alloc_adjnode(this_,u,adj_u->costs); + insert_adjnode(this_,v,adj_v); + } +} + +/* remove node */ +static +void remove_node(pbqp *this_,int u) +{ + adjnode *adj_ptr; + + assert(this_!= NULL); + assert(u >= 0 && u <= this_->num_nodes); + assert(this_->adj_list != NULL); + + for(adj_ptr = this_ -> adj_list[u]; adj_ptr != NULL; adj_ptr = adj_ptr -> succ) { + remove_adjnode(this_,adj_ptr->adj,adj_ptr -> reverse); + } +} + +/***************************************************************************** + * edge functions + ****************************************************************************/ + +/* insert edge to graph */ +/* (does not check whether edge exists in graph */ +static +void insert_edge(pbqp *this_, int u, int v, PBQPMatrix *costs) +{ + adjnode *adj_u, + *adj_v; + + /* create adjanceny entry for u */ + adj_u = alloc_adjnode(this_,v,costs); + insert_adjnode(this_,u,adj_u); + + + /* create adjanceny entry for v */ + adj_v = alloc_adjnode(this_,u,costs); + insert_adjnode(this_,v,adj_v); + + /* create link for reverse edge */ + adj_u -> reverse = adj_v; + adj_v -> reverse = adj_u; +} + +/* delete edge */ +static +void delete_edge(pbqp *this_,int u,int v) +{ + adjnode *adj_ptr; + adjnode *rev; + + assert(this_ != NULL); + assert( u >= 0 && u < this_->num_nodes); + assert( v >= 0 && v < this_->num_nodes); + + adj_ptr=find_adjnode(this_,u,v); + assert(adj_ptr != NULL); + assert(adj_ptr->reverse != NULL); + + delete adj_ptr -> costs; + + rev = adj_ptr->reverse; + remove_adjnode(this_,u,adj_ptr); + remove_adjnode(this_,v,rev); +} + +/***************************************************************************** + * cost functions + ****************************************************************************/ + +/* Note: Since cost(u,v) = transpose(cost(v,u)), it would be necessary to store + two matrices for both edges (u,v) and (v,u). However, we only store the + matrix for the case u < v. For the other case we transpose the stored matrix + if required. +*/ + +/* add costs to cost vector of a node */ +void add_pbqp_nodecosts(pbqp *this_,int u, PBQPVector *costs) +{ + assert(this_ != NULL); + assert(costs != NULL); + assert(u >= 0 && u <= this_->num_nodes); + + if (!this_->node_costs[u]) { + this_->node_costs[u] = new PBQPVector(*costs); + } else { + *this_->node_costs[u] += *costs; + } +} + +/* get cost matrix ptr */ +static +PBQPMatrix *get_costmatrix_ptr(pbqp *this_, int u, int v) +{ + adjnode *adj_ptr; + PBQPMatrix *m = NULL; + + assert (this_ != NULL); + assert (u >= 0 && u < this_->num_nodes); + assert (v >= 0 && v < this_->num_nodes); + + adj_ptr = find_adjnode(this_,u,v); + + if (adj_ptr != NULL) { + m = adj_ptr -> costs; + } + + return m; +} + +/* get cost matrix ptr */ +/* Note: only the pointer is returned for + cost(u,v), if u < v. +*/ +static +PBQPMatrix *pbqp_get_costmatrix(pbqp *this_, int u, int v) +{ + adjnode *adj_ptr = find_adjnode(this_,u,v); + + if (adj_ptr != NULL) { + if ( u < v) { + return new PBQPMatrix(*adj_ptr->costs); + } else { + return new PBQPMatrix(adj_ptr->costs->transpose()); + } + } else { + return NULL; + } +} + +/* add costs to cost matrix of an edge */ +void add_pbqp_edgecosts(pbqp *this_,int u,int v, PBQPMatrix *costs) +{ + PBQPMatrix *adj_costs; + + assert(this_!= NULL); + assert(costs != NULL); + assert(u >= 0 && u <= this_->num_nodes); + assert(v >= 0 && v <= this_->num_nodes); + + /* does the edge u-v exists ? */ + if (u == v) { + PBQPVector *diag = new PBQPVector(costs->diagonalize()); + add_pbqp_nodecosts(this_,v,diag); + delete diag; + } else if ((adj_costs = get_costmatrix_ptr(this_,u,v))!=NULL) { + if ( u < v) { + *adj_costs += *costs; + } else { + *adj_costs += costs->transpose(); + } + } else { + adj_costs = new PBQPMatrix((u < v) ? *costs : costs->transpose()); + insert_edge(this_,u,v,adj_costs); + } +} + +/* remove bucket from bucket list */ +static +void pbqp_remove_bucket(pbqp *this_, bucketnode *bucket) +{ + int u = bucket->u; + + assert(this_ != NULL); + assert(u >= 0 && u < this_->num_nodes); + assert(this_->bucket_list != NULL); + assert(this_->bucket_ptr[u] != NULL); + + /* update predecessor node in bucket list + (if no preceeding bucket exists, then + the bucket_list pointer needs to be + updated.) + */ + if (bucket->prev != NULL) { + bucket->prev-> succ = bucket->succ; + } else { + this_->bucket_list[this_->node_deg[u]] = bucket -> succ; + } + + /* update successor node in bucket list */ + if (bucket->succ != NULL) { + bucket->succ-> prev = bucket->prev; + } +} + +/********************************************************************************** + * pop functions + **********************************************************************************/ + +/* pop node of given degree */ +static +int pop_node(pbqp *this_,int deg) +{ + bucketnode *bucket; + int u; + + assert(this_ != NULL); + assert(deg >= 0 && deg <= this_->max_deg); + assert(this_->bucket_list != NULL); + + /* get first bucket of bucket list */ + bucket = this_->bucket_list[deg]; + assert(bucket != NULL); + + /* remove bucket */ + pbqp_remove_bucket(this_,bucket); + u = bucket->u; + free(bucket); + return u; +} + +/********************************************************************************** + * reorder functions + **********************************************************************************/ + +/* add bucket to bucketlist */ +static +void add_to_bucketlist(pbqp *this_,bucketnode *bucket, int deg) +{ + bucketnode *old_head; + + assert(bucket != NULL); + assert(this_ != NULL); + assert(deg >= 0 && deg <= this_->max_deg); + assert(this_->bucket_list != NULL); + + /* store node degree (for re-ordering purposes)*/ + this_->node_deg[bucket->u] = deg; + + /* put bucket to front of doubly chained list */ + old_head = this_->bucket_list[deg]; + bucket -> prev = NULL; + bucket -> succ = old_head; + this_ -> bucket_list[deg] = bucket; + if (bucket -> succ != NULL ) { + assert ( old_head -> prev == NULL); + old_head -> prev = bucket; + } +} + + +/* reorder node in bucket list according to + current node degree */ +static +void reorder_node(pbqp *this_, int u) +{ + int deg; + + assert(this_ != NULL); + assert(u>= 0 && u < this_->num_nodes); + assert(this_->bucket_list != NULL); + assert(this_->bucket_ptr[u] != NULL); + + /* get current node degree */ + deg = get_deg(this_,u); + + /* remove bucket from old bucket list only + if degree of node has changed. */ + if (deg != this_->node_deg[u]) { + pbqp_remove_bucket(this_,this_->bucket_ptr[u]); + add_to_bucketlist(this_,this_->bucket_ptr[u],deg); + } +} + +/* reorder adj. nodes of a node */ +static +void reorder_adjnodes(pbqp *this_,int u) +{ + adjnode *adj_ptr; + + assert(this_!= NULL); + assert(u >= 0 && u <= this_->num_nodes); + assert(this_->adj_list != NULL); + + for(adj_ptr = this_ -> adj_list[u]; adj_ptr != NULL; adj_ptr = adj_ptr -> succ) { + reorder_node(this_,adj_ptr->adj); + } +} + +/********************************************************************************** + * creation functions + **********************************************************************************/ + +/* create new bucket entry */ +/* consistency of the bucket list is not checked! */ +static +void create_bucket(pbqp *this_,int u,int deg) +{ + bucketnode *bucket; + + assert(this_ != NULL); + assert(u >= 0 && u < this_->num_nodes); + assert(this_->bucket_list != NULL); + + bucket = (bucketnode *)malloc(sizeof(bucketnode)); + assert(bucket != NULL); + + bucket -> u = u; + this_->bucket_ptr[u] = bucket; + + add_to_bucketlist(this_,bucket,deg); +} + +/* create bucket list */ +static +void create_bucketlist(pbqp *this_) +{ + int u; + int max_deg; + int deg; + + assert(this_ != NULL); + assert(this_->bucket_list == NULL); + + /* determine max. degree of the nodes */ + max_deg = 2; /* at least of degree two! */ + for(u=0;unum_nodes;u++) { + deg = this_->node_deg[u] = get_deg(this_,u); + if (deg > max_deg) { + max_deg = deg; + } + } + this_->max_deg = max_deg; + + /* allocate bucket list */ + this_ -> bucket_list = (bucketnode **)malloc(sizeof(bucketnode *)*(max_deg + 1)); + memset(this_->bucket_list,0,sizeof(bucketnode *)*(max_deg + 1)); + assert(this_->bucket_list != NULL); + + /* insert nodes to the list */ + for(u=0;unum_nodes;u++) { + create_bucket(this_,u,this_->node_deg[u]); + } +} + +/***************************************************************************** + * PBQP simplification for trivial nodes + ****************************************************************************/ + +/* remove trivial node with cost vector length of one */ +static +void disconnect_trivialnode(pbqp *this_,int u) +{ + int v; + adjnode *adj_ptr, + *next; + PBQPMatrix *c_uv; + PBQPVector *c_v; + + assert(this_ != NULL); + assert(this_->node_costs != NULL); + assert(u >= 0 && u < this_ -> num_nodes); + assert(this_->node_costs[u]->getLength() == 1); + + /* add edge costs to node costs of adj. nodes */ + for(adj_ptr = this_->adj_list[u]; adj_ptr != NULL; adj_ptr = next){ + next = adj_ptr -> succ; + v = adj_ptr -> adj; + assert(v >= 0 && v < this_ -> num_nodes); + + /* convert matrix to cost vector offset for adj. node */ + c_uv = pbqp_get_costmatrix(this_,u,v); + c_v = new PBQPVector(c_uv->getRowAsVector(0)); + *this_->node_costs[v] += *c_v; + + /* delete edge & free vec/mat */ + delete c_v; + delete c_uv; + delete_edge(this_,u,v); + } +} + +/* find all trivial nodes and disconnect them */ +static +void eliminate_trivial_nodes(pbqp *this_) +{ + int u; + + assert(this_ != NULL); + assert(this_ -> node_costs != NULL); + + for(u=0;u < this_ -> num_nodes; u++) { + if (this_->node_costs[u]->getLength() == 1) { + disconnect_trivialnode(this_,u); + } + } +} + +/***************************************************************************** + * Normal form for PBQP + ****************************************************************************/ + +/* simplify a cost matrix. If the matrix + is independent, then simplify_matrix + returns true - otherwise false. In + vectors u and v the offset values of + the decomposition are stored. +*/ + +static +bool normalize_matrix(PBQPMatrix *m, PBQPVector *u, PBQPVector *v) +{ + assert( m != NULL); + assert( u != NULL); + assert( v != NULL); + assert( u->getLength() > 0); + assert( v->getLength() > 0); + + assert(m->getRows() == u->getLength()); + assert(m->getCols() == v->getLength()); + + /* determine u vector */ + for(unsigned r = 0; r < m->getRows(); ++r) { + PBQPNum min = m->getRowMin(r); + (*u)[r] += min; + if (!isInf(min)) { + m->subFromRow(r, min); + } else { + m->setRow(r, 0); + } + } + + /* determine v vector */ + for(unsigned c = 0; c < m->getCols(); ++c) { + PBQPNum min = m->getColMin(c); + (*v)[c] += min; + if (!isInf(min)) { + m->subFromCol(c, min); + } else { + m->setCol(c, 0); + } + } + + /* determine whether matrix is + independent or not. + */ + return m->isZero(); +} + +/* simplify single edge */ +static +void simplify_edge(pbqp *this_,int u,int v) +{ + PBQPMatrix *costs; + bool is_zero; + + assert (this_ != NULL); + assert (u >= 0 && u num_nodes); + assert (v >= 0 && v num_nodes); + assert (u != v); + + /* swap u and v if u > v in order to avoid un-necessary + tranpositions of the cost matrix */ + + if (u > v) { + int swap = u; + u = v; + v = swap; + } + + /* get cost matrix and simplify it */ + costs = get_costmatrix_ptr(this_,u,v); + is_zero=normalize_matrix(costs,this_->node_costs[u],this_->node_costs[v]); + + /* delete edge */ + if(is_zero){ + delete_edge(this_,u,v); + this_->changed = true; + } +} + +/* normalize cost matrices and remove + edges in PBQP if they ary independent, + i.e. can be decomposed into two + cost vectors. +*/ +static +void eliminate_independent_edges(pbqp *this_) +{ + int u,v; + adjnode *adj_ptr,*next; + + assert(this_ != NULL); + assert(this_ -> adj_list != NULL); + + this_->changed = false; + for(u=0;u < this_->num_nodes;u++) { + for (adj_ptr = this_ -> adj_list[u]; adj_ptr != NULL; adj_ptr = next) { + next = adj_ptr -> succ; + v = adj_ptr -> adj; + assert(v >= 0 && v < this_->num_nodes); + if (u < v) { + simplify_edge(this_,u,v); + } + } + } +} + + +/***************************************************************************** + * PBQP reduction rules + ****************************************************************************/ + +/* RI reduction + This reduction rule is applied for nodes + of degree one. */ + +static +void apply_RI(pbqp *this_,int x) +{ + int y; + unsigned xlen, + ylen; + PBQPMatrix *c_yx; + PBQPVector *c_x, *delta; + + assert(this_ != NULL); + assert(x >= 0 && x < this_->num_nodes); + assert(this_ -> adj_list[x] != NULL); + assert(this_ -> adj_list[x] -> succ == NULL); + + /* get adjacence matrix */ + y = this_ -> adj_list[x] -> adj; + assert(y >= 0 && y < this_->num_nodes); + + /* determine length of cost vectors for node x and y */ + xlen = this_ -> node_costs[x]->getLength(); + ylen = this_ -> node_costs[y]->getLength(); + + /* get cost vector c_x and matrix c_yx */ + c_x = this_ -> node_costs[x]; + c_yx = pbqp_get_costmatrix(this_,y,x); + assert (c_yx != NULL); + + + /* allocate delta vector */ + delta = new PBQPVector(ylen); + + /* compute delta vector */ + for(unsigned i = 0; i < ylen; ++i) { + PBQPNum min = (*c_yx)[i][0] + (*c_x)[0]; + for(unsigned j = 1; j < xlen; ++j) { + PBQPNum c = (*c_yx)[i][j] + (*c_x)[j]; + if ( c < min ) + min = c; + } + (*delta)[i] = min; + } + + /* add delta vector */ + *this_ -> node_costs[y] += *delta; + + /* delete node x */ + remove_node(this_,x); + + /* reorder adj. nodes of node x */ + reorder_adjnodes(this_,x); + + /* push node x on stack */ + assert(this_ -> stack_ptr < this_ -> num_nodes); + this_->stack[this_ -> stack_ptr++] = x; + + /* free vec/mat */ + delete c_yx; + delete delta; + + /* increment counter for number statistic */ + this_->num_ri++; +} + +/* RII reduction + This reduction rule is applied for nodes + of degree two. */ + +static +void apply_RII(pbqp *this_,int x) +{ + int y,z; + unsigned xlen,ylen,zlen; + adjnode *adj_yz; + + PBQPMatrix *c_yx, *c_zx; + PBQPVector *cx; + PBQPMatrix *delta; + + assert(this_ != NULL); + assert(x >= 0 && x < this_->num_nodes); + assert(this_ -> adj_list[x] != NULL); + assert(this_ -> adj_list[x] -> succ != NULL); + assert(this_ -> adj_list[x] -> succ -> succ == NULL); + + /* get adjacence matrix */ + y = this_ -> adj_list[x] -> adj; + z = this_ -> adj_list[x] -> succ -> adj; + assert(y >= 0 && y < this_->num_nodes); + assert(z >= 0 && z < this_->num_nodes); + + /* determine length of cost vectors for node x and y */ + xlen = this_ -> node_costs[x]->getLength(); + ylen = this_ -> node_costs[y]->getLength(); + zlen = this_ -> node_costs[z]->getLength(); + + /* get cost vector c_x and matrix c_yx */ + cx = this_ -> node_costs[x]; + c_yx = pbqp_get_costmatrix(this_,y,x); + c_zx = pbqp_get_costmatrix(this_,z,x); + assert(c_yx != NULL); + assert(c_zx != NULL); + + /* Colour Heuristic */ + if ( (adj_yz = find_adjnode(this_,y,z)) != NULL) { + adj_yz->tc_valid = false; + adj_yz->reverse->tc_valid = false; + } + + /* allocate delta matrix */ + delta = new PBQPMatrix(ylen, zlen); + + /* compute delta matrix */ + for(unsigned i=0;i stack_ptr < this_ -> num_nodes); + this_->stack[this_ -> stack_ptr++] = x; + + /* free vec/mat */ + delete c_yx; + delete c_zx; + delete delta; + + /* increment counter for number statistic */ + this_->num_rii++; + +} + +/* RN reduction */ +static +void apply_RN(pbqp *this_,int x) +{ + unsigned xlen; + + assert(this_ != NULL); + assert(x >= 0 && x < this_->num_nodes); + assert(this_ -> node_costs[x] != NULL); + + xlen = this_ -> node_costs[x] -> getLength(); + + /* after application of RN rule no optimality + can be guaranteed! */ + this_ -> optimal = false; + + /* push node x on stack */ + assert(this_ -> stack_ptr < this_ -> num_nodes); + this_->stack[this_ -> stack_ptr++] = x; + + /* delete node x */ + remove_node(this_,x); + + /* reorder adj. nodes of node x */ + reorder_adjnodes(this_,x); + + /* increment counter for number statistic */ + this_->num_rn++; +} + + +static +void compute_tc_info(pbqp *this_, adjnode *p) +{ + adjnode *r; + PBQPMatrix *m; + int x,y; + PBQPVector *c_x, *c_y; + int *row_inf_counts; + + assert(p->reverse != NULL); + + /* set flags */ + r = p->reverse; + p->tc_valid = true; + r->tc_valid = true; + + /* get edge */ + x = r->adj; + y = p->adj; + + /* get cost vectors */ + c_x = this_ -> node_costs[x]; + c_y = this_ -> node_costs[y]; + + /* get cost matrix */ + m = pbqp_get_costmatrix(this_, x, y); + + + /* allocate allowed set for edge (x,y) and (y,x) */ + if (p->tc_safe_regs == NULL) { + p->tc_safe_regs = (int *) malloc(sizeof(int) * c_x->getLength()); + } + + if (r->tc_safe_regs == NULL ) { + r->tc_safe_regs = (int *) malloc(sizeof(int) * c_y->getLength()); + } + + p->tc_impact = r->tc_impact = 0; + + row_inf_counts = (int *) alloca(sizeof(int) * c_x->getLength()); + + /* init arrays */ + p->tc_safe_regs[0] = 0; + row_inf_counts[0] = 0; + for(unsigned i = 1; i < c_x->getLength(); ++i){ + p->tc_safe_regs[i] = 1; + row_inf_counts[i] = 0; + } + + r->tc_safe_regs[0] = 0; + for(unsigned j = 1; j < c_y->getLength(); ++j){ + r->tc_safe_regs[j] = 1; + } + + for(unsigned j = 0; j < c_y->getLength(); ++j) { + int col_inf_counts = 0; + for (unsigned i = 0; i < c_x->getLength(); ++i) { + if (isInf((*m)[i][j])) { + ++col_inf_counts; + ++row_inf_counts[i]; + + p->tc_safe_regs[i] = 0; + r->tc_safe_regs[j] = 0; + } + } + if (col_inf_counts > p->tc_impact) { + p->tc_impact = col_inf_counts; + } + } + + for(unsigned i = 0; i < c_x->getLength(); ++i){ + if (row_inf_counts[i] > r->tc_impact) + { + r->tc_impact = row_inf_counts[i]; + } + } + + delete m; +} + +/* + * Checks whether node x can be locally coloured. + */ +static +int is_colorable(pbqp *this_,int x) +{ + adjnode *adj_ptr; + PBQPVector *c_x; + int result = 1; + int *allowed; + int num_allowed = 0; + unsigned total_impact = 0; + + assert(this_ != NULL); + assert(x >= 0 && x < this_->num_nodes); + assert(this_ -> node_costs[x] != NULL); + + c_x = this_ -> node_costs[x]; + + /* allocate allowed set */ + allowed = (int *)malloc(sizeof(int) * c_x->getLength()); + for(unsigned i = 0; i < c_x->getLength(); ++i){ + if (!isInf((*c_x)[i]) && i > 0) { + allowed[i] = 1; + ++num_allowed; + } else { + allowed[i] = 0; + } + } + + /* determine local minimum */ + for(adj_ptr=this_->adj_list[x] ;adj_ptr != NULL; adj_ptr = adj_ptr -> succ) { + if (!adj_ptr -> tc_valid) { + compute_tc_info(this_, adj_ptr); + } + + total_impact += adj_ptr->tc_impact; + + if (num_allowed > 0) { + for (unsigned i = 1; i < c_x->getLength(); ++i){ + if (allowed[i]){ + if (!adj_ptr->tc_safe_regs[i]){ + allowed[i] = 0; + --num_allowed; + if (num_allowed == 0) + break; + } + } + } + } + + if ( total_impact >= c_x->getLength() - 1 && num_allowed == 0 ) { + result = 0; + break; + } + } + free(allowed); + + return result; +} + +/* use briggs heuristic + note: this_ is not a general heuristic. it only is useful for + interference graphs. + */ +int pop_colorablenode(pbqp *this_) +{ + int deg; + bucketnode *min_bucket=NULL; + PBQPNum min = std::numeric_limits::infinity(); + + /* select node where the number of colors is less than the node degree */ + for(deg=this_->max_deg;deg > 2;deg--) { + bucketnode *bucket; + for(bucket=this_->bucket_list[deg];bucket!= NULL;bucket = bucket -> succ) { + int u = bucket->u; + if (is_colorable(this_,u)) { + pbqp_remove_bucket(this_,bucket); + this_->num_rn_special++; + free(bucket); + return u; + } + } + } + + /* select node with minimal ratio between average node costs and degree of node */ + for(deg=this_->max_deg;deg >2; deg--) { + bucketnode *bucket; + for(bucket=this_->bucket_list[deg];bucket!= NULL;bucket = bucket -> succ) { + PBQPNum h; + int u; + + u = bucket->u; + assert(u>=0 && u < this_->num_nodes); + h = (*this_->node_costs[u])[0] / (PBQPNum) deg; + if (h < min) { + min_bucket = bucket; + min = h; + } + } + } + + /* return node and free bucket */ + if (min_bucket != NULL) { + int u; + + pbqp_remove_bucket(this_,min_bucket); + u = min_bucket->u; + free(min_bucket); + return u; + } else { + return -1; + } +} + + +/***************************************************************************** + * PBQP graph parsing + ****************************************************************************/ + +/* reduce pbqp problem (first phase) */ +static +void reduce_pbqp(pbqp *this_) +{ + int u; + + assert(this_ != NULL); + assert(this_->bucket_list != NULL); + + for(;;){ + + if (this_->bucket_list[1] != NULL) { + u = pop_node(this_,1); + apply_RI(this_,u); + } else if (this_->bucket_list[2] != NULL) { + u = pop_node(this_,2); + apply_RII(this_,u); + } else if ((u = pop_colorablenode(this_)) != -1) { + apply_RN(this_,u); + } else { + break; + } + } +} + +/***************************************************************************** + * PBQP back propagation + ****************************************************************************/ + +/* determine solution of a reduced node. Either + RI or RII was applied for this_ node. */ +static +void determine_solution(pbqp *this_,int x) +{ + PBQPVector *v = new PBQPVector(*this_ -> node_costs[x]); + adjnode *adj_ptr; + + assert(this_ != NULL); + assert(x >= 0 && x < this_->num_nodes); + assert(this_ -> adj_list != NULL); + assert(this_ -> solution != NULL); + + for(adj_ptr=this_->adj_list[x] ;adj_ptr != NULL; adj_ptr = adj_ptr -> succ) { + int y = adj_ptr -> adj; + int y_sol = this_ -> solution[y]; + + PBQPMatrix *c_yx = pbqp_get_costmatrix(this_,y,x); + assert(y_sol >= 0 && y_sol < (int)this_->node_costs[y]->getLength()); + (*v) += c_yx->getRowAsVector(y_sol); + delete c_yx; + } + this_ -> solution[x] = v->minIndex(); + + delete v; +} + +/* back popagation phase of PBQP */ +static +void back_propagate(pbqp *this_) +{ + int i; + + assert(this_ != NULL); + assert(this_->stack != NULL); + assert(this_->stack_ptr < this_->num_nodes); + + for(i=this_ -> stack_ptr-1;i>=0;i--) { + int x = this_ -> stack[i]; + assert( x >= 0 && x < this_ -> num_nodes); + reinsert_node(this_,x); + determine_solution(this_,x); + } +} + +/* solve trivial nodes of degree zero */ +static +void determine_trivialsolution(pbqp *this_) +{ + int u; + PBQPNum delta; + + assert( this_ != NULL); + assert( this_ -> bucket_list != NULL); + + /* determine trivial solution */ + while (this_->bucket_list[0] != NULL) { + u = pop_node(this_,0); + + assert( u >= 0 && u < this_ -> num_nodes); + + this_->solution[u] = this_->node_costs[u]->minIndex(); + delta = (*this_->node_costs[u])[this_->solution[u]]; + this_->min = this_->min + delta; + + /* increment counter for number statistic */ + this_->num_r0++; + } +} + +/***************************************************************************** + * debug facilities + ****************************************************************************/ +static +void check_pbqp(pbqp *this_) +{ + int u,v; + PBQPMatrix *costs; + adjnode *adj_ptr; + + assert( this_ != NULL); + + for(u=0;u< this_->num_nodes; u++) { + assert (this_ -> node_costs[u] != NULL); + for(adj_ptr = this_ -> adj_list[u];adj_ptr != NULL; adj_ptr = adj_ptr -> succ) { + v = adj_ptr -> adj; + assert( v>= 0 && v < this_->num_nodes); + if (u < v ) { + costs = adj_ptr -> costs; + assert( costs->getRows() == this_->node_costs[u]->getLength() && + costs->getCols() == this_->node_costs[v]->getLength()); + } + } + } +} + +/***************************************************************************** + * PBQP solve routines + ****************************************************************************/ + +/* solve PBQP problem */ +void solve_pbqp(pbqp *this_) +{ + assert(this_ != NULL); + assert(!this_->solved); + + /* check vector & matrix dimensions */ + check_pbqp(this_); + + /* simplify PBQP problem */ + + /* eliminate trivial nodes, i.e. + nodes with cost vectors of length one. */ + eliminate_trivial_nodes(this_); + + /* eliminate edges with independent + cost matrices and normalize matrices */ + eliminate_independent_edges(this_); + + /* create bucket list for graph parsing */ + create_bucketlist(this_); + + /* reduce phase */ + reduce_pbqp(this_); + + /* solve trivial nodes */ + determine_trivialsolution(this_); + + /* back propagation phase */ + back_propagate(this_); + + this_->solved = true; +} + +/* get solution of a node */ +int get_pbqp_solution(pbqp *this_,int x) +{ + assert(this_ != NULL); + assert(this_->solution != NULL); + assert(this_ -> solved); + + return this_->solution[x]; +} + +/* is solution optimal? */ +bool is_pbqp_optimal(pbqp *this_) +{ + assert(this_ -> solved); + return this_->optimal; +} + +} + +/* end of pbqp.c */ diff --git a/lib/CodeGen/PBQP.h b/lib/CodeGen/PBQP.h new file mode 100644 index 000000000000..5fd2c06c335e --- /dev/null +++ b/lib/CodeGen/PBQP.h @@ -0,0 +1,284 @@ +//===---------------- PBQP.cpp --------- PBQP Solver ------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Developed by: Bernhard Scholz +// The University of Sydney +// http://www.it.usyd.edu.au/~scholz +//===----------------------------------------------------------------------===// + +// TODO: +// +// * Default to null costs on vector initialisation? +// * C++-ify the rest of the solver. + +#ifndef LLVM_CODEGEN_PBQPSOLVER_H +#define LLVM_CODEGEN_PBQPSOLVER_H + +#include +#include +#include + +namespace llvm { + +//! \brief Floating point type to use in PBQP solver. +typedef double PBQPNum; + +//! \brief PBQP Vector class. +class PBQPVector { +public: + + //! \brief Construct a PBQP vector of the given size. + explicit PBQPVector(unsigned length) : + length(length), data(new PBQPNum[length]) { + std::fill(data, data + length, 0); + } + + //! \brief Copy construct a PBQP vector. + PBQPVector(const PBQPVector &v) : + length(v.length), data(new PBQPNum[length]) { + std::copy(v.data, v.data + length, data); + } + + ~PBQPVector() { delete[] data; } + + //! \brief Assignment operator. + PBQPVector& operator=(const PBQPVector &v) { + delete[] data; + length = v.length; + data = new PBQPNum[length]; + std::copy(v.data, v.data + length, data); + return *this; + } + + //! \brief Return the length of the vector + unsigned getLength() const throw () { + return length; + } + + //! \brief Element access. + PBQPNum& operator[](unsigned index) { + assert(index < length && "PBQPVector element access out of bounds."); + return data[index]; + } + + //! \brief Const element access. + const PBQPNum& operator[](unsigned index) const { + assert(index < length && "PBQPVector element access out of bounds."); + return data[index]; + } + + //! \brief Add another vector to this one. + PBQPVector& operator+=(const PBQPVector &v) { + assert(length == v.length && "PBQPVector length mismatch."); + std::transform(data, data + length, v.data, data, std::plus()); + return *this; + } + + //! \brief Subtract another vector from this one. + PBQPVector& operator-=(const PBQPVector &v) { + assert(length == v.length && "PBQPVector length mismatch."); + std::transform(data, data + length, v.data, data, std::minus()); + return *this; + } + + //! \brief Returns the index of the minimum value in this vector + unsigned minIndex() const { + return std::min_element(data, data + length) - data; + } + +private: + unsigned length; + PBQPNum *data; +}; + + +//! \brief PBQP Matrix class +class PBQPMatrix { +public: + + //! \brief Construct a PBQP Matrix with the given dimensions. + PBQPMatrix(unsigned rows, unsigned cols) : + rows(rows), cols(cols), data(new PBQPNum[rows * cols]) { + std::fill(data, data + (rows * cols), 0); + } + + //! \brief Copy construct a PBQP matrix. + PBQPMatrix(const PBQPMatrix &m) : + rows(m.rows), cols(m.cols), data(new PBQPNum[rows * cols]) { + std::copy(m.data, m.data + (rows * cols), data); + } + + ~PBQPMatrix() { delete[] data; } + + //! \brief Assignment operator. + PBQPMatrix& operator=(const PBQPMatrix &m) { + delete[] data; + rows = m.rows; cols = m.cols; + data = new PBQPNum[rows * cols]; + std::copy(m.data, m.data + (rows * cols), data); + return *this; + } + + //! \brief Return the number of rows in this matrix. + unsigned getRows() const throw () { return rows; } + + //! \brief Return the number of cols in this matrix. + unsigned getCols() const throw () { return cols; } + + //! \brief Matrix element access. + PBQPNum* operator[](unsigned r) { + assert(r < rows && "Row out of bounds."); + return data + (r * cols); + } + + //! \brief Matrix element access. + const PBQPNum* operator[](unsigned r) const { + assert(r < rows && "Row out of bounds."); + return data + (r * cols); + } + + //! \brief Returns the given row as a vector. + PBQPVector getRowAsVector(unsigned r) const { + PBQPVector v(cols); + for (unsigned c = 0; c < cols; ++c) + v[c] = (*this)[r][c]; + return v; + } + + //! \brief Reset the matrix to the given value. + PBQPMatrix& reset(PBQPNum val = 0) { + std::fill(data, data + (rows * cols), val); + return *this; + } + + //! \brief Set a single row of this matrix to the given value. + PBQPMatrix& setRow(unsigned r, PBQPNum val) { + assert(r < rows && "Row out of bounds."); + std::fill(data + (r * cols), data + ((r + 1) * cols), val); + return *this; + } + + //! \brief Set a single column of this matrix to the given value. + PBQPMatrix& setCol(unsigned c, PBQPNum val) { + assert(c < cols && "Column out of bounds."); + for (unsigned r = 0; r < rows; ++r) + (*this)[r][c] = val; + return *this; + } + + //! \brief Matrix transpose. + PBQPMatrix transpose() const { + PBQPMatrix m(cols, rows); + for (unsigned r = 0; r < rows; ++r) + for (unsigned c = 0; c < cols; ++c) + m[c][r] = (*this)[r][c]; + return m; + } + + //! \brief Returns the diagonal of the matrix as a vector. + //! + //! Matrix must be square. + PBQPVector diagonalize() const { + assert(rows == cols && "Attempt to diagonalize non-square matrix."); + + PBQPVector v(rows); + for (unsigned r = 0; r < rows; ++r) + v[r] = (*this)[r][r]; + return v; + } + + //! \brief Add the given matrix to this one. + PBQPMatrix& operator+=(const PBQPMatrix &m) { + assert(rows == m.rows && cols == m.cols && + "Matrix dimensions mismatch."); + std::transform(data, data + (rows * cols), m.data, data, + std::plus()); + return *this; + } + + //! \brief Returns the minimum of the given row + PBQPNum getRowMin(unsigned r) const { + assert(r < rows && "Row out of bounds"); + return *std::min_element(data + (r * cols), data + ((r + 1) * cols)); + } + + //! \brief Returns the minimum of the given column + PBQPNum getColMin(unsigned c) const { + PBQPNum minElem = (*this)[0][c]; + for (unsigned r = 1; r < rows; ++r) + if ((*this)[r][c] < minElem) minElem = (*this)[r][c]; + return minElem; + } + + //! \brief Subtracts the given scalar from the elements of the given row. + PBQPMatrix& subFromRow(unsigned r, PBQPNum val) { + assert(r < rows && "Row out of bounds"); + std::transform(data + (r * cols), data + ((r + 1) * cols), + data + (r * cols), + std::bind2nd(std::minus(), val)); + return *this; + } + + //! \brief Subtracts the given scalar from the elements of the given column. + PBQPMatrix& subFromCol(unsigned c, PBQPNum val) { + for (unsigned r = 0; r < rows; ++r) + (*this)[r][c] -= val; + return *this; + } + + //! \brief Returns true if this is a zero matrix. + bool isZero() const { + return find_if(data, data + (rows * cols), + std::bind2nd(std::not_equal_to(), 0)) == + data + (rows * cols); + } + +private: + unsigned rows, cols; + PBQPNum *data; +}; + +#define EPS (1E-8) + +#ifndef PBQP_TYPE +#define PBQP_TYPE +struct pbqp; +typedef struct pbqp pbqp; +#endif + +/***************** + * PBQP routines * + *****************/ + +/* allocate pbqp problem */ +pbqp *alloc_pbqp(int num); + +/* add node costs */ +void add_pbqp_nodecosts(pbqp *this_,int u, PBQPVector *costs); + +/* add edge mat */ +void add_pbqp_edgecosts(pbqp *this_,int u,int v,PBQPMatrix *costs); + +/* solve PBQP problem */ +void solve_pbqp(pbqp *this_); + +/* get solution of a node */ +int get_pbqp_solution(pbqp *this_,int u); + +/* alloc PBQP */ +pbqp *alloc_pbqp(int num); + +/* free PBQP */ +void free_pbqp(pbqp *this_); + +/* is optimal */ +bool is_pbqp_optimal(pbqp *this_); + +} +#endif diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp new file mode 100644 index 000000000000..c5c76fc79467 --- /dev/null +++ b/lib/CodeGen/PHIElimination.cpp @@ -0,0 +1,431 @@ +//===-- PhiElimination.cpp - Eliminate PHI nodes by inserting copies ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass eliminates machine instruction PHI nodes by inserting copy +// instructions. This destroys SSA information, but is the desired input for +// some register allocators. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "phielim" +#include "llvm/BasicBlock.h" +#include "llvm/Instructions.h" +#include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/Compiler.h" +#include +#include +using namespace llvm; + +STATISTIC(NumAtomic, "Number of atomic phis lowered"); + +namespace { + class VISIBILITY_HIDDEN PNE : public MachineFunctionPass { + MachineRegisterInfo *MRI; // Machine register information + + public: + static char ID; // Pass identification, replacement for typeid + PNE() : MachineFunctionPass(&ID) {} + + virtual bool runOnMachineFunction(MachineFunction &Fn); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addPreserved(); + AU.addPreservedID(MachineLoopInfoID); + AU.addPreservedID(MachineDominatorsID); + MachineFunctionPass::getAnalysisUsage(AU); + } + + private: + /// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions + /// in predecessor basic blocks. + /// + bool EliminatePHINodes(MachineFunction &MF, MachineBasicBlock &MBB); + void LowerAtomicPHINode(MachineBasicBlock &MBB, + MachineBasicBlock::iterator AfterPHIsIt); + + /// analyzePHINodes - Gather information about the PHI nodes in + /// here. In particular, we want to map the number of uses of a virtual + /// register which is used in a PHI node. We map that to the BB the + /// vreg is coming from. This is used later to determine when the vreg + /// is killed in the BB. + /// + void analyzePHINodes(const MachineFunction& Fn); + + // FindCopyInsertPoint - Find a safe place in MBB to insert a copy from + // SrcReg. This needs to be after any def or uses of SrcReg, but before + // any subsequent point where control flow might jump out of the basic + // block. + MachineBasicBlock::iterator FindCopyInsertPoint(MachineBasicBlock &MBB, + unsigned SrcReg); + + // SkipPHIsAndLabels - Copies need to be inserted after phi nodes and + // also after any exception handling labels: in landing pads execution + // starts at the label, so any copies placed before it won't be executed! + MachineBasicBlock::iterator SkipPHIsAndLabels(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) { + // Rather than assuming that EH labels come before other kinds of labels, + // just skip all labels. + while (I != MBB.end() && + (I->getOpcode() == TargetInstrInfo::PHI || I->isLabel())) + ++I; + return I; + } + + typedef std::pair BBVRegPair; + typedef std::map VRegPHIUse; + + VRegPHIUse VRegPHIUseCount; + + // Defs of PHI sources which are implicit_def. + SmallPtrSet ImpDefs; + }; +} + +char PNE::ID = 0; +static RegisterPass +X("phi-node-elimination", "Eliminate PHI nodes for register allocation"); + +const PassInfo *const llvm::PHIEliminationID = &X; + +bool PNE::runOnMachineFunction(MachineFunction &Fn) { + MRI = &Fn.getRegInfo(); + + analyzePHINodes(Fn); + + bool Changed = false; + + // Eliminate PHI instructions by inserting copies into predecessor blocks. + for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) + Changed |= EliminatePHINodes(Fn, *I); + + // Remove dead IMPLICIT_DEF instructions. + for (SmallPtrSet::iterator I = ImpDefs.begin(), + E = ImpDefs.end(); I != E; ++I) { + MachineInstr *DefMI = *I; + unsigned DefReg = DefMI->getOperand(0).getReg(); + if (MRI->use_empty(DefReg)) + DefMI->eraseFromParent(); + } + + ImpDefs.clear(); + VRegPHIUseCount.clear(); + return Changed; +} + + +/// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions in +/// predecessor basic blocks. +/// +bool PNE::EliminatePHINodes(MachineFunction &MF, MachineBasicBlock &MBB) { + if (MBB.empty() || MBB.front().getOpcode() != TargetInstrInfo::PHI) + return false; // Quick exit for basic blocks without PHIs. + + // Get an iterator to the first instruction after the last PHI node (this may + // also be the end of the basic block). + MachineBasicBlock::iterator AfterPHIsIt = SkipPHIsAndLabels(MBB, MBB.begin()); + + while (MBB.front().getOpcode() == TargetInstrInfo::PHI) + LowerAtomicPHINode(MBB, AfterPHIsIt); + + return true; +} + +/// isSourceDefinedByImplicitDef - Return true if all sources of the phi node +/// are implicit_def's. +static bool isSourceDefinedByImplicitDef(const MachineInstr *MPhi, + const MachineRegisterInfo *MRI) { + for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2) { + unsigned SrcReg = MPhi->getOperand(i).getReg(); + const MachineInstr *DefMI = MRI->getVRegDef(SrcReg); + if (!DefMI || DefMI->getOpcode() != TargetInstrInfo::IMPLICIT_DEF) + return false; + } + return true; +} + +// FindCopyInsertPoint - Find a safe place in MBB to insert a copy from SrcReg. +// This needs to be after any def or uses of SrcReg, but before any subsequent +// point where control flow might jump out of the basic block. +MachineBasicBlock::iterator PNE::FindCopyInsertPoint(MachineBasicBlock &MBB, + unsigned SrcReg) { + // Handle the trivial case trivially. + if (MBB.empty()) + return MBB.begin(); + + // If this basic block does not contain an invoke, then control flow always + // reaches the end of it, so place the copy there. The logic below works in + // this case too, but is more expensive. + if (!isa(MBB.getBasicBlock()->getTerminator())) + return MBB.getFirstTerminator(); + + // Discover any definition/uses in this basic block. + SmallPtrSet DefUsesInMBB; + for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(SrcReg), + RE = MRI->reg_end(); RI != RE; ++RI) { + MachineInstr *DefUseMI = &*RI; + if (DefUseMI->getParent() == &MBB) + DefUsesInMBB.insert(DefUseMI); + } + + MachineBasicBlock::iterator InsertPoint; + if (DefUsesInMBB.empty()) { + // No def/uses. Insert the copy at the start of the basic block. + InsertPoint = MBB.begin(); + } else if (DefUsesInMBB.size() == 1) { + // Insert the copy immediately after the definition/use. + InsertPoint = *DefUsesInMBB.begin(); + ++InsertPoint; + } else { + // Insert the copy immediately after the last definition/use. + InsertPoint = MBB.end(); + while (!DefUsesInMBB.count(&*--InsertPoint)) {} + ++InsertPoint; + } + + // Make sure the copy goes after any phi nodes however. + return SkipPHIsAndLabels(MBB, InsertPoint); +} + +/// LowerAtomicPHINode - Lower the PHI node at the top of the specified block, +/// under the assuption that it needs to be lowered in a way that supports +/// atomic execution of PHIs. This lowering method is always correct all of the +/// time. +/// +void PNE::LowerAtomicPHINode(MachineBasicBlock &MBB, + MachineBasicBlock::iterator AfterPHIsIt) { + // Unlink the PHI node from the basic block, but don't delete the PHI yet. + MachineInstr *MPhi = MBB.remove(MBB.begin()); + + unsigned NumSrcs = (MPhi->getNumOperands() - 1) / 2; + unsigned DestReg = MPhi->getOperand(0).getReg(); + bool isDead = MPhi->getOperand(0).isDead(); + + // Create a new register for the incoming PHI arguments. + MachineFunction &MF = *MBB.getParent(); + const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(DestReg); + unsigned IncomingReg = 0; + + // Insert a register to register copy at the top of the current block (but + // after any remaining phi nodes) which copies the new incoming register + // into the phi node destination. + const TargetInstrInfo *TII = MF.getTarget().getInstrInfo(); + if (isSourceDefinedByImplicitDef(MPhi, MRI)) + // If all sources of a PHI node are implicit_def, just emit an + // implicit_def instead of a copy. + BuildMI(MBB, AfterPHIsIt, MPhi->getDebugLoc(), + TII->get(TargetInstrInfo::IMPLICIT_DEF), DestReg); + else { + IncomingReg = MF.getRegInfo().createVirtualRegister(RC); + TII->copyRegToReg(MBB, AfterPHIsIt, DestReg, IncomingReg, RC, RC); + } + + // Update live variable information if there is any. + LiveVariables *LV = getAnalysisIfAvailable(); + if (LV) { + MachineInstr *PHICopy = prior(AfterPHIsIt); + + if (IncomingReg) { + // Increment use count of the newly created virtual register. + LV->getVarInfo(IncomingReg).NumUses++; + + // Add information to LiveVariables to know that the incoming value is + // killed. Note that because the value is defined in several places (once + // each for each incoming block), the "def" block and instruction fields + // for the VarInfo is not filled in. + LV->addVirtualRegisterKilled(IncomingReg, PHICopy); + } + + // Since we are going to be deleting the PHI node, if it is the last use of + // any registers, or if the value itself is dead, we need to move this + // information over to the new copy we just inserted. + LV->removeVirtualRegistersKilled(MPhi); + + // If the result is dead, update LV. + if (isDead) { + LV->addVirtualRegisterDead(DestReg, PHICopy); + LV->removeVirtualRegisterDead(DestReg, MPhi); + } + } + + // Adjust the VRegPHIUseCount map to account for the removal of this PHI node. + for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2) + --VRegPHIUseCount[BBVRegPair(MPhi->getOperand(i + 1).getMBB(), + MPhi->getOperand(i).getReg())]; + + // Now loop over all of the incoming arguments, changing them to copy into the + // IncomingReg register in the corresponding predecessor basic block. + SmallPtrSet MBBsInsertedInto; + for (int i = NumSrcs - 1; i >= 0; --i) { + unsigned SrcReg = MPhi->getOperand(i*2+1).getReg(); + assert(TargetRegisterInfo::isVirtualRegister(SrcReg) && + "Machine PHI Operands must all be virtual registers!"); + + // If source is defined by an implicit def, there is no need to insert a + // copy. + MachineInstr *DefMI = MRI->getVRegDef(SrcReg); + if (DefMI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF) { + ImpDefs.insert(DefMI); + continue; + } + + // Get the MachineBasicBlock equivalent of the BasicBlock that is the source + // path the PHI. + MachineBasicBlock &opBlock = *MPhi->getOperand(i*2+2).getMBB(); + + // Check to make sure we haven't already emitted the copy for this block. + // This can happen because PHI nodes may have multiple entries for the same + // basic block. + if (!MBBsInsertedInto.insert(&opBlock)) + continue; // If the copy has already been emitted, we're done. + + // Find a safe location to insert the copy, this may be the first terminator + // in the block (or end()). + MachineBasicBlock::iterator InsertPos = FindCopyInsertPoint(opBlock, SrcReg); + + // Insert the copy. + TII->copyRegToReg(opBlock, InsertPos, IncomingReg, SrcReg, RC, RC); + + // Now update live variable information if we have it. Otherwise we're done + if (!LV) continue; + + // We want to be able to insert a kill of the register if this PHI (aka, the + // copy we just inserted) is the last use of the source value. Live + // variable analysis conservatively handles this by saying that the value is + // live until the end of the block the PHI entry lives in. If the value + // really is dead at the PHI copy, there will be no successor blocks which + // have the value live-in. + // + // Check to see if the copy is the last use, and if so, update the live + // variables information so that it knows the copy source instruction kills + // the incoming value. + LiveVariables::VarInfo &InRegVI = LV->getVarInfo(SrcReg); + + // Loop over all of the successors of the basic block, checking to see if + // the value is either live in the block, or if it is killed in the block. + // Also check to see if this register is in use by another PHI node which + // has not yet been eliminated. If so, it will be killed at an appropriate + // point later. + + // Is it used by any PHI instructions in this block? + bool ValueIsLive = VRegPHIUseCount[BBVRegPair(&opBlock, SrcReg)] != 0; + + std::vector OpSuccBlocks; + + // Otherwise, scan successors, including the BB the PHI node lives in. + for (MachineBasicBlock::succ_iterator SI = opBlock.succ_begin(), + E = opBlock.succ_end(); SI != E && !ValueIsLive; ++SI) { + MachineBasicBlock *SuccMBB = *SI; + + // Is it alive in this successor? + unsigned SuccIdx = SuccMBB->getNumber(); + if (InRegVI.AliveBlocks.test(SuccIdx)) { + ValueIsLive = true; + break; + } + + OpSuccBlocks.push_back(SuccMBB); + } + + // Check to see if this value is live because there is a use in a successor + // that kills it. + if (!ValueIsLive) { + switch (OpSuccBlocks.size()) { + case 1: { + MachineBasicBlock *MBB = OpSuccBlocks[0]; + for (unsigned i = 0, e = InRegVI.Kills.size(); i != e; ++i) + if (InRegVI.Kills[i]->getParent() == MBB) { + ValueIsLive = true; + break; + } + break; + } + case 2: { + MachineBasicBlock *MBB1 = OpSuccBlocks[0], *MBB2 = OpSuccBlocks[1]; + for (unsigned i = 0, e = InRegVI.Kills.size(); i != e; ++i) + if (InRegVI.Kills[i]->getParent() == MBB1 || + InRegVI.Kills[i]->getParent() == MBB2) { + ValueIsLive = true; + break; + } + break; + } + default: + std::sort(OpSuccBlocks.begin(), OpSuccBlocks.end()); + for (unsigned i = 0, e = InRegVI.Kills.size(); i != e; ++i) + if (std::binary_search(OpSuccBlocks.begin(), OpSuccBlocks.end(), + InRegVI.Kills[i]->getParent())) { + ValueIsLive = true; + break; + } + } + } + + // Okay, if we now know that the value is not live out of the block, we can + // add a kill marker in this block saying that it kills the incoming value! + if (!ValueIsLive) { + // In our final twist, we have to decide which instruction kills the + // register. In most cases this is the copy, however, the first + // terminator instruction at the end of the block may also use the value. + // In this case, we should mark *it* as being the killing block, not the + // copy. + MachineBasicBlock::iterator KillInst = prior(InsertPos); + MachineBasicBlock::iterator Term = opBlock.getFirstTerminator(); + if (Term != opBlock.end()) { + if (Term->readsRegister(SrcReg)) + KillInst = Term; + + // Check that no other terminators use values. +#ifndef NDEBUG + for (MachineBasicBlock::iterator TI = next(Term); TI != opBlock.end(); + ++TI) { + assert(!TI->readsRegister(SrcReg) && + "Terminator instructions cannot use virtual registers unless" + "they are the first terminator in a block!"); + } +#endif + } + + // Finally, mark it killed. + LV->addVirtualRegisterKilled(SrcReg, KillInst); + + // This vreg no longer lives all of the way through opBlock. + unsigned opBlockNum = opBlock.getNumber(); + InRegVI.AliveBlocks.reset(opBlockNum); + } + } + + // Really delete the PHI instruction now! + MF.DeleteMachineInstr(MPhi); + ++NumAtomic; +} + +/// analyzePHINodes - Gather information about the PHI nodes in here. In +/// particular, we want to map the number of uses of a virtual register which is +/// used in a PHI node. We map that to the BB the vreg is coming from. This is +/// used later to determine when the vreg is killed in the BB. +/// +void PNE::analyzePHINodes(const MachineFunction& Fn) { + for (MachineFunction::const_iterator I = Fn.begin(), E = Fn.end(); + I != E; ++I) + for (MachineBasicBlock::const_iterator BBI = I->begin(), BBE = I->end(); + BBI != BBE && BBI->getOpcode() == TargetInstrInfo::PHI; ++BBI) + for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) + ++VRegPHIUseCount[BBVRegPair(BBI->getOperand(i + 1).getMBB(), + BBI->getOperand(i).getReg())]; +} diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp new file mode 100644 index 000000000000..f67eb79be3e1 --- /dev/null +++ b/lib/CodeGen/Passes.cpp @@ -0,0 +1,54 @@ +//===-- Passes.cpp - Target independent code generation passes ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines interfaces to access the target independent code +// generation passes provided by the LLVM backend. +// +//===---------------------------------------------------------------------===// + +#include "llvm/CodeGen/RegAllocRegistry.h" +#include "llvm/CodeGen/Passes.h" + +using namespace llvm; + +//===---------------------------------------------------------------------===// +/// +/// RegisterRegAlloc class - Track the registration of register allocators. +/// +//===---------------------------------------------------------------------===// +MachinePassRegistry RegisterRegAlloc::Registry; + + +//===---------------------------------------------------------------------===// +/// +/// RegAlloc command line options. +/// +//===---------------------------------------------------------------------===// +static cl::opt > +RegAlloc("regalloc", + cl::init(&createLinearScanRegisterAllocator), + cl::desc("Register allocator to use: (default = linearscan)")); + + +//===---------------------------------------------------------------------===// +/// +/// createRegisterAllocator - choose the appropriate register allocator. +/// +//===---------------------------------------------------------------------===// +FunctionPass *llvm::createRegisterAllocator() { + RegisterRegAlloc::FunctionPassCtor Ctor = RegisterRegAlloc::getDefault(); + + if (!Ctor) { + Ctor = RegAlloc; + RegisterRegAlloc::setDefault(RegAlloc); + } + + return Ctor(); +} diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp new file mode 100644 index 000000000000..de7746855b3f --- /dev/null +++ b/lib/CodeGen/PostRASchedulerList.cpp @@ -0,0 +1,941 @@ +//===----- SchedulePostRAList.cpp - list scheduler ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements a top-down list scheduler, using standard algorithms. +// The basic approach uses a priority queue of available nodes to schedule. +// One at a time, nodes are taken from the priority queue (thus in priority +// order), checked for legality to schedule, and emitted if legal. +// +// Nodes may not be legal to schedule either due to structural hazards (e.g. +// pipeline or resource constraints) or because an input to the instruction has +// not completed execution. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "post-RA-sched" +#include "ScheduleDAGInstrs.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/LatencyPriorityQueue.h" +#include "llvm/CodeGen/SchedulerRegistry.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/Statistic.h" +#include +using namespace llvm; + +STATISTIC(NumNoops, "Number of noops inserted"); +STATISTIC(NumStalls, "Number of pipeline stalls"); + +static cl::opt +EnableAntiDepBreaking("break-anti-dependencies", + cl::desc("Break post-RA scheduling anti-dependencies"), + cl::init(true), cl::Hidden); + +static cl::opt +EnablePostRAHazardAvoidance("avoid-hazards", + cl::desc("Enable simple hazard-avoidance"), + cl::init(true), cl::Hidden); + +namespace { + class VISIBILITY_HIDDEN PostRAScheduler : public MachineFunctionPass { + public: + static char ID; + PostRAScheduler() : MachineFunctionPass(&ID) {} + + void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + const char *getPassName() const { + return "Post RA top-down list latency scheduler"; + } + + bool runOnMachineFunction(MachineFunction &Fn); + }; + char PostRAScheduler::ID = 0; + + class VISIBILITY_HIDDEN SchedulePostRATDList : public ScheduleDAGInstrs { + /// AvailableQueue - The priority queue to use for the available SUnits. + /// + LatencyPriorityQueue AvailableQueue; + + /// PendingQueue - This contains all of the instructions whose operands have + /// been issued, but their results are not ready yet (due to the latency of + /// the operation). Once the operands becomes available, the instruction is + /// added to the AvailableQueue. + std::vector PendingQueue; + + /// Topo - A topological ordering for SUnits. + ScheduleDAGTopologicalSort Topo; + + /// AllocatableSet - The set of allocatable registers. + /// We'll be ignoring anti-dependencies on non-allocatable registers, + /// because they may not be safe to break. + const BitVector AllocatableSet; + + /// HazardRec - The hazard recognizer to use. + ScheduleHazardRecognizer *HazardRec; + + /// Classes - For live regs that are only used in one register class in a + /// live range, the register class. If the register is not live, the + /// corresponding value is null. If the register is live but used in + /// multiple register classes, the corresponding value is -1 casted to a + /// pointer. + const TargetRegisterClass * + Classes[TargetRegisterInfo::FirstVirtualRegister]; + + /// RegRegs - Map registers to all their references within a live range. + std::multimap RegRefs; + + /// The index of the most recent kill (proceding bottom-up), or ~0u if + /// the register is not live. + unsigned KillIndices[TargetRegisterInfo::FirstVirtualRegister]; + + /// The index of the most recent complete def (proceding bottom up), or ~0u + /// if the register is live. + unsigned DefIndices[TargetRegisterInfo::FirstVirtualRegister]; + + public: + SchedulePostRATDList(MachineFunction &MF, + const MachineLoopInfo &MLI, + const MachineDominatorTree &MDT, + ScheduleHazardRecognizer *HR) + : ScheduleDAGInstrs(MF, MLI, MDT), Topo(SUnits), + AllocatableSet(TRI->getAllocatableSet(MF)), + HazardRec(HR) {} + + ~SchedulePostRATDList() { + delete HazardRec; + } + + /// StartBlock - Initialize register live-range state for scheduling in + /// this block. + /// + void StartBlock(MachineBasicBlock *BB); + + /// Schedule - Schedule the instruction range using list scheduling. + /// + void Schedule(); + + /// Observe - Update liveness information to account for the current + /// instruction, which will not be scheduled. + /// + void Observe(MachineInstr *MI, unsigned Count); + + /// FinishBlock - Clean up register live-range state. + /// + void FinishBlock(); + + private: + void PrescanInstruction(MachineInstr *MI); + void ScanInstruction(MachineInstr *MI, unsigned Count); + void ReleaseSucc(SUnit *SU, SDep *SuccEdge); + void ReleaseSuccessors(SUnit *SU); + void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle); + void ListScheduleTopDown(); + bool BreakAntiDependencies(); + }; + + /// SimpleHazardRecognizer - A *very* simple hazard recognizer. It uses + /// a coarse classification and attempts to avoid that instructions of + /// a given class aren't grouped too densely together. + class SimpleHazardRecognizer : public ScheduleHazardRecognizer { + /// Class - A simple classification for SUnits. + enum Class { + Other, Load, Store + }; + + /// Window - The Class values of the most recently issued + /// instructions. + Class Window[8]; + + /// getClass - Classify the given SUnit. + Class getClass(const SUnit *SU) { + const MachineInstr *MI = SU->getInstr(); + const TargetInstrDesc &TID = MI->getDesc(); + if (TID.mayLoad()) + return Load; + if (TID.mayStore()) + return Store; + return Other; + } + + /// Step - Rotate the existing entries in Window and insert the + /// given class value in position as the most recent. + void Step(Class C) { + std::copy(Window+1, array_endof(Window), Window); + Window[array_lengthof(Window)-1] = C; + } + + public: + SimpleHazardRecognizer() : Window() {} + + virtual HazardType getHazardType(SUnit *SU) { + Class C = getClass(SU); + if (C == Other) + return NoHazard; + unsigned Score = 0; + for (unsigned i = 0; i != array_lengthof(Window); ++i) + if (Window[i] == C) + Score += i + 1; + if (Score > array_lengthof(Window) * 2) + return Hazard; + return NoHazard; + } + + virtual void EmitInstruction(SUnit *SU) { + Step(getClass(SU)); + } + + virtual void AdvanceCycle() { + Step(Other); + } + }; +} + +/// isSchedulingBoundary - Test if the given instruction should be +/// considered a scheduling boundary. This primarily includes labels +/// and terminators. +/// +static bool isSchedulingBoundary(const MachineInstr *MI, + const MachineFunction &MF) { + // Terminators and labels can't be scheduled around. + if (MI->getDesc().isTerminator() || MI->isLabel()) + return true; + + // Don't attempt to schedule around any instruction that modifies + // a stack-oriented pointer, as it's unlikely to be profitable. This + // saves compile time, because it doesn't require every single + // stack slot reference to depend on the instruction that does the + // modification. + const TargetLowering &TLI = *MF.getTarget().getTargetLowering(); + if (MI->modifiesRegister(TLI.getStackPointerRegisterToSaveRestore())) + return true; + + return false; +} + +bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { + DOUT << "PostRAScheduler\n"; + + const MachineLoopInfo &MLI = getAnalysis(); + const MachineDominatorTree &MDT = getAnalysis(); + ScheduleHazardRecognizer *HR = EnablePostRAHazardAvoidance ? + new SimpleHazardRecognizer : + new ScheduleHazardRecognizer(); + + SchedulePostRATDList Scheduler(Fn, MLI, MDT, HR); + + // Loop over all of the basic blocks + for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); + MBB != MBBe; ++MBB) { + // Initialize register live-range state for scheduling in this block. + Scheduler.StartBlock(MBB); + + // Schedule each sequence of instructions not interrupted by a label + // or anything else that effectively needs to shut down scheduling. + MachineBasicBlock::iterator Current = MBB->end(); + unsigned Count = MBB->size(), CurrentCount = Count; + for (MachineBasicBlock::iterator I = Current; I != MBB->begin(); ) { + MachineInstr *MI = prior(I); + if (isSchedulingBoundary(MI, Fn)) { + Scheduler.Run(MBB, I, Current, CurrentCount); + Scheduler.EmitSchedule(); + Current = MI; + CurrentCount = Count - 1; + Scheduler.Observe(MI, CurrentCount); + } + I = MI; + --Count; + } + assert(Count == 0 && "Instruction count mismatch!"); + assert((MBB->begin() == Current || CurrentCount != 0) && + "Instruction count mismatch!"); + Scheduler.Run(MBB, MBB->begin(), Current, CurrentCount); + Scheduler.EmitSchedule(); + + // Clean up register live-range state. + Scheduler.FinishBlock(); + } + + return true; +} + +/// StartBlock - Initialize register live-range state for scheduling in +/// this block. +/// +void SchedulePostRATDList::StartBlock(MachineBasicBlock *BB) { + // Call the superclass. + ScheduleDAGInstrs::StartBlock(BB); + + // Clear out the register class data. + std::fill(Classes, array_endof(Classes), + static_cast(0)); + + // Initialize the indices to indicate that no registers are live. + std::fill(KillIndices, array_endof(KillIndices), ~0u); + std::fill(DefIndices, array_endof(DefIndices), BB->size()); + + // Determine the live-out physregs for this block. + if (!BB->empty() && BB->back().getDesc().isReturn()) + // In a return block, examine the function live-out regs. + for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(), + E = MRI.liveout_end(); I != E; ++I) { + unsigned Reg = *I; + Classes[Reg] = reinterpret_cast(-1); + KillIndices[Reg] = BB->size(); + DefIndices[Reg] = ~0u; + // Repeat, for all aliases. + for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { + unsigned AliasReg = *Alias; + Classes[AliasReg] = reinterpret_cast(-1); + KillIndices[AliasReg] = BB->size(); + DefIndices[AliasReg] = ~0u; + } + } + else + // In a non-return block, examine the live-in regs of all successors. + for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), + SE = BB->succ_end(); SI != SE; ++SI) + for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), + E = (*SI)->livein_end(); I != E; ++I) { + unsigned Reg = *I; + Classes[Reg] = reinterpret_cast(-1); + KillIndices[Reg] = BB->size(); + DefIndices[Reg] = ~0u; + // Repeat, for all aliases. + for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { + unsigned AliasReg = *Alias; + Classes[AliasReg] = reinterpret_cast(-1); + KillIndices[AliasReg] = BB->size(); + DefIndices[AliasReg] = ~0u; + } + } + + // Consider callee-saved registers as live-out, since we're running after + // prologue/epilogue insertion so there's no way to add additional + // saved registers. + // + // TODO: If the callee saves and restores these, then we can potentially + // use them between the save and the restore. To do that, we could scan + // the exit blocks to see which of these registers are defined. + // Alternatively, callee-saved registers that aren't saved and restored + // could be marked live-in in every block. + for (const unsigned *I = TRI->getCalleeSavedRegs(); *I; ++I) { + unsigned Reg = *I; + Classes[Reg] = reinterpret_cast(-1); + KillIndices[Reg] = BB->size(); + DefIndices[Reg] = ~0u; + // Repeat, for all aliases. + for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { + unsigned AliasReg = *Alias; + Classes[AliasReg] = reinterpret_cast(-1); + KillIndices[AliasReg] = BB->size(); + DefIndices[AliasReg] = ~0u; + } + } +} + +/// Schedule - Schedule the instruction range using list scheduling. +/// +void SchedulePostRATDList::Schedule() { + DOUT << "********** List Scheduling **********\n"; + + // Build the scheduling graph. + BuildSchedGraph(); + + if (EnableAntiDepBreaking) { + if (BreakAntiDependencies()) { + // We made changes. Update the dependency graph. + // Theoretically we could update the graph in place: + // When a live range is changed to use a different register, remove + // the def's anti-dependence *and* output-dependence edges due to + // that register, and add new anti-dependence and output-dependence + // edges based on the next live range of the register. + SUnits.clear(); + EntrySU = SUnit(); + ExitSU = SUnit(); + BuildSchedGraph(); + } + } + + AvailableQueue.initNodes(SUnits); + + ListScheduleTopDown(); + + AvailableQueue.releaseState(); +} + +/// Observe - Update liveness information to account for the current +/// instruction, which will not be scheduled. +/// +void SchedulePostRATDList::Observe(MachineInstr *MI, unsigned Count) { + assert(Count < InsertPosIndex && "Instruction index out of expected range!"); + + // Any register which was defined within the previous scheduling region + // may have been rescheduled and its lifetime may overlap with registers + // in ways not reflected in our current liveness state. For each such + // register, adjust the liveness state to be conservatively correct. + for (unsigned Reg = 0; Reg != TargetRegisterInfo::FirstVirtualRegister; ++Reg) + if (DefIndices[Reg] < InsertPosIndex && DefIndices[Reg] >= Count) { + assert(KillIndices[Reg] == ~0u && "Clobbered register is live!"); + // Mark this register to be non-renamable. + Classes[Reg] = reinterpret_cast(-1); + // Move the def index to the end of the previous region, to reflect + // that the def could theoretically have been scheduled at the end. + DefIndices[Reg] = InsertPosIndex; + } + + PrescanInstruction(MI); + ScanInstruction(MI, Count); +} + +/// FinishBlock - Clean up register live-range state. +/// +void SchedulePostRATDList::FinishBlock() { + RegRefs.clear(); + + // Call the superclass. + ScheduleDAGInstrs::FinishBlock(); +} + +/// CriticalPathStep - Return the next SUnit after SU on the bottom-up +/// critical path. +static SDep *CriticalPathStep(SUnit *SU) { + SDep *Next = 0; + unsigned NextDepth = 0; + // Find the predecessor edge with the greatest depth. + for (SUnit::pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end(); + P != PE; ++P) { + SUnit *PredSU = P->getSUnit(); + unsigned PredLatency = P->getLatency(); + unsigned PredTotalLatency = PredSU->getDepth() + PredLatency; + // In the case of a latency tie, prefer an anti-dependency edge over + // other types of edges. + if (NextDepth < PredTotalLatency || + (NextDepth == PredTotalLatency && P->getKind() == SDep::Anti)) { + NextDepth = PredTotalLatency; + Next = &*P; + } + } + return Next; +} + +void SchedulePostRATDList::PrescanInstruction(MachineInstr *MI) { + // Scan the register operands for this instruction and update + // Classes and RegRefs. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) continue; + const TargetRegisterClass *NewRC = + getInstrOperandRegClass(TRI, MI->getDesc(), i); + + // For now, only allow the register to be changed if its register + // class is consistent across all uses. + if (!Classes[Reg] && NewRC) + Classes[Reg] = NewRC; + else if (!NewRC || Classes[Reg] != NewRC) + Classes[Reg] = reinterpret_cast(-1); + + // Now check for aliases. + for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { + // If an alias of the reg is used during the live range, give up. + // Note that this allows us to skip checking if AntiDepReg + // overlaps with any of the aliases, among other things. + unsigned AliasReg = *Alias; + if (Classes[AliasReg]) { + Classes[AliasReg] = reinterpret_cast(-1); + Classes[Reg] = reinterpret_cast(-1); + } + } + + // If we're still willing to consider this register, note the reference. + if (Classes[Reg] != reinterpret_cast(-1)) + RegRefs.insert(std::make_pair(Reg, &MO)); + } +} + +void SchedulePostRATDList::ScanInstruction(MachineInstr *MI, + unsigned Count) { + // Update liveness. + // Proceding upwards, registers that are defed but not used in this + // instruction are now dead. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) continue; + if (!MO.isDef()) continue; + // Ignore two-addr defs. + if (MI->isRegTiedToUseOperand(i)) continue; + + DefIndices[Reg] = Count; + KillIndices[Reg] = ~0u; + assert(((KillIndices[Reg] == ~0u) != + (DefIndices[Reg] == ~0u)) && + "Kill and Def maps aren't consistent for Reg!"); + Classes[Reg] = 0; + RegRefs.erase(Reg); + // Repeat, for all subregs. + for (const unsigned *Subreg = TRI->getSubRegisters(Reg); + *Subreg; ++Subreg) { + unsigned SubregReg = *Subreg; + DefIndices[SubregReg] = Count; + KillIndices[SubregReg] = ~0u; + Classes[SubregReg] = 0; + RegRefs.erase(SubregReg); + } + // Conservatively mark super-registers as unusable. + for (const unsigned *Super = TRI->getSuperRegisters(Reg); + *Super; ++Super) { + unsigned SuperReg = *Super; + Classes[SuperReg] = reinterpret_cast(-1); + } + } + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) continue; + if (!MO.isUse()) continue; + + const TargetRegisterClass *NewRC = + getInstrOperandRegClass(TRI, MI->getDesc(), i); + + // For now, only allow the register to be changed if its register + // class is consistent across all uses. + if (!Classes[Reg] && NewRC) + Classes[Reg] = NewRC; + else if (!NewRC || Classes[Reg] != NewRC) + Classes[Reg] = reinterpret_cast(-1); + + RegRefs.insert(std::make_pair(Reg, &MO)); + + // It wasn't previously live but now it is, this is a kill. + if (KillIndices[Reg] == ~0u) { + KillIndices[Reg] = Count; + DefIndices[Reg] = ~0u; + assert(((KillIndices[Reg] == ~0u) != + (DefIndices[Reg] == ~0u)) && + "Kill and Def maps aren't consistent for Reg!"); + } + // Repeat, for all aliases. + for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { + unsigned AliasReg = *Alias; + if (KillIndices[AliasReg] == ~0u) { + KillIndices[AliasReg] = Count; + DefIndices[AliasReg] = ~0u; + } + } + } +} + +/// BreakAntiDependencies - Identifiy anti-dependencies along the critical path +/// of the ScheduleDAG and break them by renaming registers. +/// +bool SchedulePostRATDList::BreakAntiDependencies() { + // The code below assumes that there is at least one instruction, + // so just duck out immediately if the block is empty. + if (SUnits.empty()) return false; + + // Find the node at the bottom of the critical path. + SUnit *Max = 0; + for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { + SUnit *SU = &SUnits[i]; + if (!Max || SU->getDepth() + SU->Latency > Max->getDepth() + Max->Latency) + Max = SU; + } + + DOUT << "Critical path has total latency " + << (Max->getDepth() + Max->Latency) << "\n"; + + // Track progress along the critical path through the SUnit graph as we walk + // the instructions. + SUnit *CriticalPathSU = Max; + MachineInstr *CriticalPathMI = CriticalPathSU->getInstr(); + + // Consider this pattern: + // A = ... + // ... = A + // A = ... + // ... = A + // A = ... + // ... = A + // A = ... + // ... = A + // There are three anti-dependencies here, and without special care, + // we'd break all of them using the same register: + // A = ... + // ... = A + // B = ... + // ... = B + // B = ... + // ... = B + // B = ... + // ... = B + // because at each anti-dependence, B is the first register that + // isn't A which is free. This re-introduces anti-dependencies + // at all but one of the original anti-dependencies that we were + // trying to break. To avoid this, keep track of the most recent + // register that each register was replaced with, avoid avoid + // using it to repair an anti-dependence on the same register. + // This lets us produce this: + // A = ... + // ... = A + // B = ... + // ... = B + // C = ... + // ... = C + // B = ... + // ... = B + // This still has an anti-dependence on B, but at least it isn't on the + // original critical path. + // + // TODO: If we tracked more than one register here, we could potentially + // fix that remaining critical edge too. This is a little more involved, + // because unlike the most recent register, less recent registers should + // still be considered, though only if no other registers are available. + unsigned LastNewReg[TargetRegisterInfo::FirstVirtualRegister] = {}; + + // Attempt to break anti-dependence edges on the critical path. Walk the + // instructions from the bottom up, tracking information about liveness + // as we go to help determine which registers are available. + bool Changed = false; + unsigned Count = InsertPosIndex - 1; + for (MachineBasicBlock::iterator I = InsertPos, E = Begin; + I != E; --Count) { + MachineInstr *MI = --I; + + // After regalloc, IMPLICIT_DEF instructions aren't safe to treat as + // dependence-breaking. In the case of an INSERT_SUBREG, the IMPLICIT_DEF + // is left behind appearing to clobber the super-register, while the + // subregister needs to remain live. So we just ignore them. + if (MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF) + continue; + + // Check if this instruction has a dependence on the critical path that + // is an anti-dependence that we may be able to break. If it is, set + // AntiDepReg to the non-zero register associated with the anti-dependence. + // + // We limit our attention to the critical path as a heuristic to avoid + // breaking anti-dependence edges that aren't going to significantly + // impact the overall schedule. There are a limited number of registers + // and we want to save them for the important edges. + // + // TODO: Instructions with multiple defs could have multiple + // anti-dependencies. The current code here only knows how to break one + // edge per instruction. Note that we'd have to be able to break all of + // the anti-dependencies in an instruction in order to be effective. + unsigned AntiDepReg = 0; + if (MI == CriticalPathMI) { + if (SDep *Edge = CriticalPathStep(CriticalPathSU)) { + SUnit *NextSU = Edge->getSUnit(); + + // Only consider anti-dependence edges. + if (Edge->getKind() == SDep::Anti) { + AntiDepReg = Edge->getReg(); + assert(AntiDepReg != 0 && "Anti-dependence on reg0?"); + // Don't break anti-dependencies on non-allocatable registers. + if (!AllocatableSet.test(AntiDepReg)) + AntiDepReg = 0; + else { + // If the SUnit has other dependencies on the SUnit that it + // anti-depends on, don't bother breaking the anti-dependency + // since those edges would prevent such units from being + // scheduled past each other regardless. + // + // Also, if there are dependencies on other SUnits with the + // same register as the anti-dependency, don't attempt to + // break it. + for (SUnit::pred_iterator P = CriticalPathSU->Preds.begin(), + PE = CriticalPathSU->Preds.end(); P != PE; ++P) + if (P->getSUnit() == NextSU ? + (P->getKind() != SDep::Anti || P->getReg() != AntiDepReg) : + (P->getKind() == SDep::Data && P->getReg() == AntiDepReg)) { + AntiDepReg = 0; + break; + } + } + } + CriticalPathSU = NextSU; + CriticalPathMI = CriticalPathSU->getInstr(); + } else { + // We've reached the end of the critical path. + CriticalPathSU = 0; + CriticalPathMI = 0; + } + } + + PrescanInstruction(MI); + + // If this instruction has a use of AntiDepReg, breaking it + // is invalid. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) continue; + if (MO.isUse() && AntiDepReg == Reg) { + AntiDepReg = 0; + break; + } + } + + // Determine AntiDepReg's register class, if it is live and is + // consistently used within a single class. + const TargetRegisterClass *RC = AntiDepReg != 0 ? Classes[AntiDepReg] : 0; + assert((AntiDepReg == 0 || RC != NULL) && + "Register should be live if it's causing an anti-dependence!"); + if (RC == reinterpret_cast(-1)) + AntiDepReg = 0; + + // Look for a suitable register to use to break the anti-depenence. + // + // TODO: Instead of picking the first free register, consider which might + // be the best. + if (AntiDepReg != 0) { + for (TargetRegisterClass::iterator R = RC->allocation_order_begin(MF), + RE = RC->allocation_order_end(MF); R != RE; ++R) { + unsigned NewReg = *R; + // Don't replace a register with itself. + if (NewReg == AntiDepReg) continue; + // Don't replace a register with one that was recently used to repair + // an anti-dependence with this AntiDepReg, because that would + // re-introduce that anti-dependence. + if (NewReg == LastNewReg[AntiDepReg]) continue; + // If NewReg is dead and NewReg's most recent def is not before + // AntiDepReg's kill, it's safe to replace AntiDepReg with NewReg. + assert(((KillIndices[AntiDepReg] == ~0u) != (DefIndices[AntiDepReg] == ~0u)) && + "Kill and Def maps aren't consistent for AntiDepReg!"); + assert(((KillIndices[NewReg] == ~0u) != (DefIndices[NewReg] == ~0u)) && + "Kill and Def maps aren't consistent for NewReg!"); + if (KillIndices[NewReg] == ~0u && + Classes[NewReg] != reinterpret_cast(-1) && + KillIndices[AntiDepReg] <= DefIndices[NewReg]) { + DOUT << "Breaking anti-dependence edge on " + << TRI->getName(AntiDepReg) + << " with " << RegRefs.count(AntiDepReg) << " references" + << " using " << TRI->getName(NewReg) << "!\n"; + + // Update the references to the old register to refer to the new + // register. + std::pair::iterator, + std::multimap::iterator> + Range = RegRefs.equal_range(AntiDepReg); + for (std::multimap::iterator + Q = Range.first, QE = Range.second; Q != QE; ++Q) + Q->second->setReg(NewReg); + + // We just went back in time and modified history; the + // liveness information for the anti-depenence reg is now + // inconsistent. Set the state as if it were dead. + Classes[NewReg] = Classes[AntiDepReg]; + DefIndices[NewReg] = DefIndices[AntiDepReg]; + KillIndices[NewReg] = KillIndices[AntiDepReg]; + assert(((KillIndices[NewReg] == ~0u) != + (DefIndices[NewReg] == ~0u)) && + "Kill and Def maps aren't consistent for NewReg!"); + + Classes[AntiDepReg] = 0; + DefIndices[AntiDepReg] = KillIndices[AntiDepReg]; + KillIndices[AntiDepReg] = ~0u; + assert(((KillIndices[AntiDepReg] == ~0u) != + (DefIndices[AntiDepReg] == ~0u)) && + "Kill and Def maps aren't consistent for AntiDepReg!"); + + RegRefs.erase(AntiDepReg); + Changed = true; + LastNewReg[AntiDepReg] = NewReg; + break; + } + } + } + + ScanInstruction(MI, Count); + } + + return Changed; +} + +//===----------------------------------------------------------------------===// +// Top-Down Scheduling +//===----------------------------------------------------------------------===// + +/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to +/// the PendingQueue if the count reaches zero. Also update its cycle bound. +void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge) { + SUnit *SuccSU = SuccEdge->getSUnit(); + --SuccSU->NumPredsLeft; + +#ifndef NDEBUG + if (SuccSU->NumPredsLeft < 0) { + cerr << "*** Scheduling failed! ***\n"; + SuccSU->dump(this); + cerr << " has been released too many times!\n"; + assert(0); + } +#endif + + // Compute how many cycles it will be before this actually becomes + // available. This is the max of the start time of all predecessors plus + // their latencies. + SuccSU->setDepthToAtLeast(SU->getDepth() + SuccEdge->getLatency()); + + // If all the node's predecessors are scheduled, this node is ready + // to be scheduled. Ignore the special ExitSU node. + if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) + PendingQueue.push_back(SuccSU); +} + +/// ReleaseSuccessors - Call ReleaseSucc on each of SU's successors. +void SchedulePostRATDList::ReleaseSuccessors(SUnit *SU) { + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) + ReleaseSucc(SU, &*I); +} + +/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending +/// count of its successors. If a successor pending count is zero, add it to +/// the Available queue. +void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { + DOUT << "*** Scheduling [" << CurCycle << "]: "; + DEBUG(SU->dump(this)); + + Sequence.push_back(SU); + assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!"); + SU->setDepthToAtLeast(CurCycle); + + ReleaseSuccessors(SU); + SU->isScheduled = true; + AvailableQueue.ScheduledNode(SU); +} + +/// ListScheduleTopDown - The main loop of list scheduling for top-down +/// schedulers. +void SchedulePostRATDList::ListScheduleTopDown() { + unsigned CurCycle = 0; + + // Release any successors of the special Entry node. + ReleaseSuccessors(&EntrySU); + + // All leaves to Available queue. + for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { + // It is available if it has no predecessors. + if (SUnits[i].Preds.empty()) { + AvailableQueue.push(&SUnits[i]); + SUnits[i].isAvailable = true; + } + } + + // While Available queue is not empty, grab the node with the highest + // priority. If it is not ready put it back. Schedule the node. + std::vector NotReady; + Sequence.reserve(SUnits.size()); + while (!AvailableQueue.empty() || !PendingQueue.empty()) { + // Check to see if any of the pending instructions are ready to issue. If + // so, add them to the available queue. + unsigned MinDepth = ~0u; + for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) { + if (PendingQueue[i]->getDepth() <= CurCycle) { + AvailableQueue.push(PendingQueue[i]); + PendingQueue[i]->isAvailable = true; + PendingQueue[i] = PendingQueue.back(); + PendingQueue.pop_back(); + --i; --e; + } else if (PendingQueue[i]->getDepth() < MinDepth) + MinDepth = PendingQueue[i]->getDepth(); + } + + // If there are no instructions available, don't try to issue anything, and + // don't advance the hazard recognizer. + if (AvailableQueue.empty()) { + CurCycle = MinDepth != ~0u ? MinDepth : CurCycle + 1; + continue; + } + + SUnit *FoundSUnit = 0; + + bool HasNoopHazards = false; + while (!AvailableQueue.empty()) { + SUnit *CurSUnit = AvailableQueue.pop(); + + ScheduleHazardRecognizer::HazardType HT = + HazardRec->getHazardType(CurSUnit); + if (HT == ScheduleHazardRecognizer::NoHazard) { + FoundSUnit = CurSUnit; + break; + } + + // Remember if this is a noop hazard. + HasNoopHazards |= HT == ScheduleHazardRecognizer::NoopHazard; + + NotReady.push_back(CurSUnit); + } + + // Add the nodes that aren't ready back onto the available list. + if (!NotReady.empty()) { + AvailableQueue.push_all(NotReady); + NotReady.clear(); + } + + // If we found a node to schedule, do it now. + if (FoundSUnit) { + ScheduleNodeTopDown(FoundSUnit, CurCycle); + HazardRec->EmitInstruction(FoundSUnit); + + // If this is a pseudo-op node, we don't want to increment the current + // cycle. + if (FoundSUnit->Latency) // Don't increment CurCycle for pseudo-ops! + ++CurCycle; + } else if (!HasNoopHazards) { + // Otherwise, we have a pipeline stall, but no other problem, just advance + // the current cycle and try again. + DOUT << "*** Advancing cycle, no work to do\n"; + HazardRec->AdvanceCycle(); + ++NumStalls; + ++CurCycle; + } else { + // Otherwise, we have no instructions to issue and we have instructions + // that will fault if we don't do this right. This is the case for + // processors without pipeline interlocks and other cases. + DOUT << "*** Emitting noop\n"; + HazardRec->EmitNoop(); + Sequence.push_back(0); // NULL here means noop + ++NumNoops; + ++CurCycle; + } + } + +#ifndef NDEBUG + VerifySchedule(/*isBottomUp=*/false); +#endif +} + +//===----------------------------------------------------------------------===// +// Public Constructor Functions +//===----------------------------------------------------------------------===// + +FunctionPass *llvm::createPostRAScheduler() { + return new PostRAScheduler(); +} diff --git a/lib/CodeGen/PreAllocSplitting.cpp b/lib/CodeGen/PreAllocSplitting.cpp new file mode 100644 index 000000000000..97d4728348e5 --- /dev/null +++ b/lib/CodeGen/PreAllocSplitting.cpp @@ -0,0 +1,1485 @@ +//===-- PreAllocSplitting.cpp - Pre-allocation Interval Spltting Pass. ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the machine instruction level pre-register allocation +// live interval splitting pass. It finds live interval barriers, i.e. +// instructions which will kill all physical registers in certain register +// classes, and split all live intervals which cross the barrier. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "pre-alloc-split" +#include "VirtRegMap.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveStackAnalysis.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/RegisterCoalescer.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +using namespace llvm; + +static cl::opt PreSplitLimit("pre-split-limit", cl::init(-1), cl::Hidden); +static cl::opt DeadSplitLimit("dead-split-limit", cl::init(-1), cl::Hidden); +static cl::opt RestoreFoldLimit("restore-fold-limit", cl::init(-1), cl::Hidden); + +STATISTIC(NumSplits, "Number of intervals split"); +STATISTIC(NumRemats, "Number of intervals split by rematerialization"); +STATISTIC(NumFolds, "Number of intervals split with spill folding"); +STATISTIC(NumRestoreFolds, "Number of intervals split with restore folding"); +STATISTIC(NumRenumbers, "Number of intervals renumbered into new registers"); +STATISTIC(NumDeadSpills, "Number of dead spills removed"); + +namespace { + class VISIBILITY_HIDDEN PreAllocSplitting : public MachineFunctionPass { + MachineFunction *CurrMF; + const TargetMachine *TM; + const TargetInstrInfo *TII; + const TargetRegisterInfo* TRI; + MachineFrameInfo *MFI; + MachineRegisterInfo *MRI; + LiveIntervals *LIs; + LiveStacks *LSs; + VirtRegMap *VRM; + + // Barrier - Current barrier being processed. + MachineInstr *Barrier; + + // BarrierMBB - Basic block where the barrier resides in. + MachineBasicBlock *BarrierMBB; + + // Barrier - Current barrier index. + unsigned BarrierIdx; + + // CurrLI - Current live interval being split. + LiveInterval *CurrLI; + + // CurrSLI - Current stack slot live interval. + LiveInterval *CurrSLI; + + // CurrSValNo - Current val# for the stack slot live interval. + VNInfo *CurrSValNo; + + // IntervalSSMap - A map from live interval to spill slots. + DenseMap IntervalSSMap; + + // Def2SpillMap - A map from a def instruction index to spill index. + DenseMap Def2SpillMap; + + public: + static char ID; + PreAllocSplitting() : MachineFunctionPass(&ID) {} + + virtual bool runOnMachineFunction(MachineFunction &MF); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); + AU.addPreserved(); + if (StrongPHIElim) + AU.addPreservedID(StrongPHIEliminationID); + else + AU.addPreservedID(PHIEliminationID); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addPreserved(); + AU.addPreserved(); + AU.addPreserved(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + virtual void releaseMemory() { + IntervalSSMap.clear(); + Def2SpillMap.clear(); + } + + virtual const char *getPassName() const { + return "Pre-Register Allocaton Live Interval Splitting"; + } + + /// print - Implement the dump method. + virtual void print(std::ostream &O, const Module* M = 0) const { + LIs->print(O, M); + } + + void print(std::ostream *O, const Module* M = 0) const { + if (O) print(*O, M); + } + + private: + MachineBasicBlock::iterator + findNextEmptySlot(MachineBasicBlock*, MachineInstr*, + unsigned&); + + MachineBasicBlock::iterator + findSpillPoint(MachineBasicBlock*, MachineInstr*, MachineInstr*, + SmallPtrSet&, unsigned&); + + MachineBasicBlock::iterator + findRestorePoint(MachineBasicBlock*, MachineInstr*, unsigned, + SmallPtrSet&, unsigned&); + + int CreateSpillStackSlot(unsigned, const TargetRegisterClass *); + + bool IsAvailableInStack(MachineBasicBlock*, unsigned, unsigned, unsigned, + unsigned&, int&) const; + + void UpdateSpillSlotInterval(VNInfo*, unsigned, unsigned); + + bool SplitRegLiveInterval(LiveInterval*); + + bool SplitRegLiveIntervals(const TargetRegisterClass **, + SmallPtrSet&); + + bool createsNewJoin(LiveRange* LR, MachineBasicBlock* DefMBB, + MachineBasicBlock* BarrierMBB); + bool Rematerialize(unsigned vreg, VNInfo* ValNo, + MachineInstr* DefMI, + MachineBasicBlock::iterator RestorePt, + unsigned RestoreIdx, + SmallPtrSet& RefsInMBB); + MachineInstr* FoldSpill(unsigned vreg, const TargetRegisterClass* RC, + MachineInstr* DefMI, + MachineInstr* Barrier, + MachineBasicBlock* MBB, + int& SS, + SmallPtrSet& RefsInMBB); + MachineInstr* FoldRestore(unsigned vreg, + const TargetRegisterClass* RC, + MachineInstr* Barrier, + MachineBasicBlock* MBB, + int SS, + SmallPtrSet& RefsInMBB); + void RenumberValno(VNInfo* VN); + void ReconstructLiveInterval(LiveInterval* LI); + bool removeDeadSpills(SmallPtrSet& split); + unsigned getNumberOfNonSpills(SmallPtrSet& MIs, + unsigned Reg, int FrameIndex, bool& TwoAddr); + VNInfo* PerformPHIConstruction(MachineBasicBlock::iterator Use, + MachineBasicBlock* MBB, LiveInterval* LI, + SmallPtrSet& Visited, + DenseMap >& Defs, + DenseMap >& Uses, + DenseMap& NewVNs, + DenseMap& LiveOut, + DenseMap& Phis, + bool IsTopLevel, bool IsIntraBlock); + VNInfo* PerformPHIConstructionFallBack(MachineBasicBlock::iterator Use, + MachineBasicBlock* MBB, LiveInterval* LI, + SmallPtrSet& Visited, + DenseMap >& Defs, + DenseMap >& Uses, + DenseMap& NewVNs, + DenseMap& LiveOut, + DenseMap& Phis, + bool IsTopLevel, bool IsIntraBlock); +}; +} // end anonymous namespace + +char PreAllocSplitting::ID = 0; + +static RegisterPass +X("pre-alloc-splitting", "Pre-Register Allocation Live Interval Splitting"); + +const PassInfo *const llvm::PreAllocSplittingID = &X; + + +/// findNextEmptySlot - Find a gap after the given machine instruction in the +/// instruction index map. If there isn't one, return end(). +MachineBasicBlock::iterator +PreAllocSplitting::findNextEmptySlot(MachineBasicBlock *MBB, MachineInstr *MI, + unsigned &SpotIndex) { + MachineBasicBlock::iterator MII = MI; + if (++MII != MBB->end()) { + unsigned Index = LIs->findGapBeforeInstr(LIs->getInstructionIndex(MII)); + if (Index) { + SpotIndex = Index; + return MII; + } + } + return MBB->end(); +} + +/// findSpillPoint - Find a gap as far away from the given MI that's suitable +/// for spilling the current live interval. The index must be before any +/// defs and uses of the live interval register in the mbb. Return begin() if +/// none is found. +MachineBasicBlock::iterator +PreAllocSplitting::findSpillPoint(MachineBasicBlock *MBB, MachineInstr *MI, + MachineInstr *DefMI, + SmallPtrSet &RefsInMBB, + unsigned &SpillIndex) { + MachineBasicBlock::iterator Pt = MBB->begin(); + + MachineBasicBlock::iterator MII = MI; + MachineBasicBlock::iterator EndPt = DefMI + ? MachineBasicBlock::iterator(DefMI) : MBB->begin(); + + while (MII != EndPt && !RefsInMBB.count(MII) && + MII->getOpcode() != TRI->getCallFrameSetupOpcode()) + --MII; + if (MII == EndPt || RefsInMBB.count(MII)) return Pt; + + while (MII != EndPt && !RefsInMBB.count(MII)) { + unsigned Index = LIs->getInstructionIndex(MII); + + // We can't insert the spill between the barrier (a call), and its + // corresponding call frame setup. + if (MII->getOpcode() == TRI->getCallFrameDestroyOpcode()) { + while (MII->getOpcode() != TRI->getCallFrameSetupOpcode()) { + --MII; + if (MII == EndPt) { + return Pt; + } + } + continue; + } else if (LIs->hasGapBeforeInstr(Index)) { + Pt = MII; + SpillIndex = LIs->findGapBeforeInstr(Index, true); + } + + if (RefsInMBB.count(MII)) + return Pt; + + + --MII; + } + + return Pt; +} + +/// findRestorePoint - Find a gap in the instruction index map that's suitable +/// for restoring the current live interval value. The index must be before any +/// uses of the live interval register in the mbb. Return end() if none is +/// found. +MachineBasicBlock::iterator +PreAllocSplitting::findRestorePoint(MachineBasicBlock *MBB, MachineInstr *MI, + unsigned LastIdx, + SmallPtrSet &RefsInMBB, + unsigned &RestoreIndex) { + // FIXME: Allow spill to be inserted to the beginning of the mbb. Update mbb + // begin index accordingly. + MachineBasicBlock::iterator Pt = MBB->end(); + MachineBasicBlock::iterator EndPt = MBB->getFirstTerminator(); + + // We start at the call, so walk forward until we find the call frame teardown + // since we can't insert restores before that. Bail if we encounter a use + // during this time. + MachineBasicBlock::iterator MII = MI; + if (MII == EndPt) return Pt; + + while (MII != EndPt && !RefsInMBB.count(MII) && + MII->getOpcode() != TRI->getCallFrameDestroyOpcode()) + ++MII; + if (MII == EndPt || RefsInMBB.count(MII)) return Pt; + ++MII; + + // FIXME: Limit the number of instructions to examine to reduce + // compile time? + while (MII != EndPt) { + unsigned Index = LIs->getInstructionIndex(MII); + if (Index > LastIdx) + break; + unsigned Gap = LIs->findGapBeforeInstr(Index); + + // We can't insert a restore between the barrier (a call) and its + // corresponding call frame teardown. + if (MII->getOpcode() == TRI->getCallFrameSetupOpcode()) { + do { + if (MII == EndPt || RefsInMBB.count(MII)) return Pt; + ++MII; + } while (MII->getOpcode() != TRI->getCallFrameDestroyOpcode()); + } else if (Gap) { + Pt = MII; + RestoreIndex = Gap; + } + + if (RefsInMBB.count(MII)) + return Pt; + + ++MII; + } + + return Pt; +} + +/// CreateSpillStackSlot - Create a stack slot for the live interval being +/// split. If the live interval was previously split, just reuse the same +/// slot. +int PreAllocSplitting::CreateSpillStackSlot(unsigned Reg, + const TargetRegisterClass *RC) { + int SS; + DenseMap::iterator I = IntervalSSMap.find(Reg); + if (I != IntervalSSMap.end()) { + SS = I->second; + } else { + SS = MFI->CreateStackObject(RC->getSize(), RC->getAlignment()); + IntervalSSMap[Reg] = SS; + } + + // Create live interval for stack slot. + CurrSLI = &LSs->getOrCreateInterval(SS, RC); + if (CurrSLI->hasAtLeastOneValue()) + CurrSValNo = CurrSLI->getValNumInfo(0); + else + CurrSValNo = CurrSLI->getNextValue(~0U, 0, LSs->getVNInfoAllocator()); + return SS; +} + +/// IsAvailableInStack - Return true if register is available in a split stack +/// slot at the specified index. +bool +PreAllocSplitting::IsAvailableInStack(MachineBasicBlock *DefMBB, + unsigned Reg, unsigned DefIndex, + unsigned RestoreIndex, unsigned &SpillIndex, + int& SS) const { + if (!DefMBB) + return false; + + DenseMap::iterator I = IntervalSSMap.find(Reg); + if (I == IntervalSSMap.end()) + return false; + DenseMap::iterator II = Def2SpillMap.find(DefIndex); + if (II == Def2SpillMap.end()) + return false; + + // If last spill of def is in the same mbb as barrier mbb (where restore will + // be), make sure it's not below the intended restore index. + // FIXME: Undo the previous spill? + assert(LIs->getMBBFromIndex(II->second) == DefMBB); + if (DefMBB == BarrierMBB && II->second >= RestoreIndex) + return false; + + SS = I->second; + SpillIndex = II->second; + return true; +} + +/// UpdateSpillSlotInterval - Given the specified val# of the register live +/// interval being split, and the spill and restore indicies, update the live +/// interval of the spill stack slot. +void +PreAllocSplitting::UpdateSpillSlotInterval(VNInfo *ValNo, unsigned SpillIndex, + unsigned RestoreIndex) { + assert(LIs->getMBBFromIndex(RestoreIndex) == BarrierMBB && + "Expect restore in the barrier mbb"); + + MachineBasicBlock *MBB = LIs->getMBBFromIndex(SpillIndex); + if (MBB == BarrierMBB) { + // Intra-block spill + restore. We are done. + LiveRange SLR(SpillIndex, RestoreIndex, CurrSValNo); + CurrSLI->addRange(SLR); + return; + } + + SmallPtrSet Processed; + unsigned EndIdx = LIs->getMBBEndIdx(MBB); + LiveRange SLR(SpillIndex, EndIdx+1, CurrSValNo); + CurrSLI->addRange(SLR); + Processed.insert(MBB); + + // Start from the spill mbb, figure out the extend of the spill slot's + // live interval. + SmallVector WorkList; + const LiveRange *LR = CurrLI->getLiveRangeContaining(SpillIndex); + if (LR->end > EndIdx) + // If live range extend beyond end of mbb, add successors to work list. + for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), + SE = MBB->succ_end(); SI != SE; ++SI) + WorkList.push_back(*SI); + + while (!WorkList.empty()) { + MachineBasicBlock *MBB = WorkList.back(); + WorkList.pop_back(); + if (Processed.count(MBB)) + continue; + unsigned Idx = LIs->getMBBStartIdx(MBB); + LR = CurrLI->getLiveRangeContaining(Idx); + if (LR && LR->valno == ValNo) { + EndIdx = LIs->getMBBEndIdx(MBB); + if (Idx <= RestoreIndex && RestoreIndex < EndIdx) { + // Spill slot live interval stops at the restore. + LiveRange SLR(Idx, RestoreIndex, CurrSValNo); + CurrSLI->addRange(SLR); + } else if (LR->end > EndIdx) { + // Live range extends beyond end of mbb, process successors. + LiveRange SLR(Idx, EndIdx+1, CurrSValNo); + CurrSLI->addRange(SLR); + for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), + SE = MBB->succ_end(); SI != SE; ++SI) + WorkList.push_back(*SI); + } else { + LiveRange SLR(Idx, LR->end, CurrSValNo); + CurrSLI->addRange(SLR); + } + Processed.insert(MBB); + } + } +} + +/// PerformPHIConstruction - From properly set up use and def lists, use a PHI +/// construction algorithm to compute the ranges and valnos for an interval. +VNInfo* +PreAllocSplitting::PerformPHIConstruction(MachineBasicBlock::iterator UseI, + MachineBasicBlock* MBB, LiveInterval* LI, + SmallPtrSet& Visited, + DenseMap >& Defs, + DenseMap >& Uses, + DenseMap& NewVNs, + DenseMap& LiveOut, + DenseMap& Phis, + bool IsTopLevel, bool IsIntraBlock) { + // Return memoized result if it's available. + if (IsTopLevel && Visited.count(UseI) && NewVNs.count(UseI)) + return NewVNs[UseI]; + else if (!IsTopLevel && IsIntraBlock && NewVNs.count(UseI)) + return NewVNs[UseI]; + else if (!IsIntraBlock && LiveOut.count(MBB)) + return LiveOut[MBB]; + + // Check if our block contains any uses or defs. + bool ContainsDefs = Defs.count(MBB); + bool ContainsUses = Uses.count(MBB); + + VNInfo* RetVNI = 0; + + // Enumerate the cases of use/def contaning blocks. + if (!ContainsDefs && !ContainsUses) { + return PerformPHIConstructionFallBack(UseI, MBB, LI, Visited, Defs, Uses, + NewVNs, LiveOut, Phis, + IsTopLevel, IsIntraBlock); + } else if (ContainsDefs && !ContainsUses) { + SmallPtrSet& BlockDefs = Defs[MBB]; + + // Search for the def in this block. If we don't find it before the + // instruction we care about, go to the fallback case. Note that that + // should never happen: this cannot be intrablock, so use should + // always be an end() iterator. + assert(UseI == MBB->end() && "No use marked in intrablock"); + + MachineBasicBlock::iterator Walker = UseI; + --Walker; + while (Walker != MBB->begin()) { + if (BlockDefs.count(Walker)) + break; + --Walker; + } + + // Once we've found it, extend its VNInfo to our instruction. + unsigned DefIndex = LIs->getInstructionIndex(Walker); + DefIndex = LiveIntervals::getDefIndex(DefIndex); + unsigned EndIndex = LIs->getMBBEndIdx(MBB); + + RetVNI = NewVNs[Walker]; + LI->addRange(LiveRange(DefIndex, EndIndex+1, RetVNI)); + } else if (!ContainsDefs && ContainsUses) { + SmallPtrSet& BlockUses = Uses[MBB]; + + // Search for the use in this block that precedes the instruction we care + // about, going to the fallback case if we don't find it. + if (UseI == MBB->begin()) + return PerformPHIConstructionFallBack(UseI, MBB, LI, Visited, Defs, + Uses, NewVNs, LiveOut, Phis, + IsTopLevel, IsIntraBlock); + + MachineBasicBlock::iterator Walker = UseI; + --Walker; + bool found = false; + while (Walker != MBB->begin()) { + if (BlockUses.count(Walker)) { + found = true; + break; + } + --Walker; + } + + // Must check begin() too. + if (!found) { + if (BlockUses.count(Walker)) + found = true; + else + return PerformPHIConstructionFallBack(UseI, MBB, LI, Visited, Defs, + Uses, NewVNs, LiveOut, Phis, + IsTopLevel, IsIntraBlock); + } + + unsigned UseIndex = LIs->getInstructionIndex(Walker); + UseIndex = LiveIntervals::getUseIndex(UseIndex); + unsigned EndIndex = 0; + if (IsIntraBlock) { + EndIndex = LIs->getInstructionIndex(UseI); + EndIndex = LiveIntervals::getUseIndex(EndIndex); + } else + EndIndex = LIs->getMBBEndIdx(MBB); + + // Now, recursively phi construct the VNInfo for the use we found, + // and then extend it to include the instruction we care about + RetVNI = PerformPHIConstruction(Walker, MBB, LI, Visited, Defs, Uses, + NewVNs, LiveOut, Phis, false, true); + + LI->addRange(LiveRange(UseIndex, EndIndex+1, RetVNI)); + + // FIXME: Need to set kills properly for inter-block stuff. + if (LI->isKill(RetVNI, UseIndex)) LI->removeKill(RetVNI, UseIndex); + if (IsIntraBlock) + LI->addKill(RetVNI, EndIndex); + } else if (ContainsDefs && ContainsUses) { + SmallPtrSet& BlockDefs = Defs[MBB]; + SmallPtrSet& BlockUses = Uses[MBB]; + + // This case is basically a merging of the two preceding case, with the + // special note that checking for defs must take precedence over checking + // for uses, because of two-address instructions. + + if (UseI == MBB->begin()) + return PerformPHIConstructionFallBack(UseI, MBB, LI, Visited, Defs, Uses, + NewVNs, LiveOut, Phis, + IsTopLevel, IsIntraBlock); + + MachineBasicBlock::iterator Walker = UseI; + --Walker; + bool foundDef = false; + bool foundUse = false; + while (Walker != MBB->begin()) { + if (BlockDefs.count(Walker)) { + foundDef = true; + break; + } else if (BlockUses.count(Walker)) { + foundUse = true; + break; + } + --Walker; + } + + // Must check begin() too. + if (!foundDef && !foundUse) { + if (BlockDefs.count(Walker)) + foundDef = true; + else if (BlockUses.count(Walker)) + foundUse = true; + else + return PerformPHIConstructionFallBack(UseI, MBB, LI, Visited, Defs, + Uses, NewVNs, LiveOut, Phis, + IsTopLevel, IsIntraBlock); + } + + unsigned StartIndex = LIs->getInstructionIndex(Walker); + StartIndex = foundDef ? LiveIntervals::getDefIndex(StartIndex) : + LiveIntervals::getUseIndex(StartIndex); + unsigned EndIndex = 0; + if (IsIntraBlock) { + EndIndex = LIs->getInstructionIndex(UseI); + EndIndex = LiveIntervals::getUseIndex(EndIndex); + } else + EndIndex = LIs->getMBBEndIdx(MBB); + + if (foundDef) + RetVNI = NewVNs[Walker]; + else + RetVNI = PerformPHIConstruction(Walker, MBB, LI, Visited, Defs, Uses, + NewVNs, LiveOut, Phis, false, true); + + LI->addRange(LiveRange(StartIndex, EndIndex+1, RetVNI)); + + if (foundUse && LI->isKill(RetVNI, StartIndex)) + LI->removeKill(RetVNI, StartIndex); + if (IsIntraBlock) { + LI->addKill(RetVNI, EndIndex); + } + } + + // Memoize results so we don't have to recompute them. + if (!IsIntraBlock) LiveOut[MBB] = RetVNI; + else { + if (!NewVNs.count(UseI)) + NewVNs[UseI] = RetVNI; + Visited.insert(UseI); + } + + return RetVNI; +} + +/// PerformPHIConstructionFallBack - PerformPHIConstruction fall back path. +/// +VNInfo* +PreAllocSplitting::PerformPHIConstructionFallBack(MachineBasicBlock::iterator UseI, + MachineBasicBlock* MBB, LiveInterval* LI, + SmallPtrSet& Visited, + DenseMap >& Defs, + DenseMap >& Uses, + DenseMap& NewVNs, + DenseMap& LiveOut, + DenseMap& Phis, + bool IsTopLevel, bool IsIntraBlock) { + // NOTE: Because this is the fallback case from other cases, we do NOT + // assume that we are not intrablock here. + if (Phis.count(MBB)) return Phis[MBB]; + + unsigned StartIndex = LIs->getMBBStartIdx(MBB); + VNInfo *RetVNI = Phis[MBB] = LI->getNextValue(~0U, /*FIXME*/ 0, + LIs->getVNInfoAllocator()); + if (!IsIntraBlock) LiveOut[MBB] = RetVNI; + + // If there are no uses or defs between our starting point and the + // beginning of the block, then recursive perform phi construction + // on our predecessors. + DenseMap IncomingVNs; + for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), + PE = MBB->pred_end(); PI != PE; ++PI) { + VNInfo* Incoming = PerformPHIConstruction((*PI)->end(), *PI, LI, + Visited, Defs, Uses, NewVNs, + LiveOut, Phis, false, false); + if (Incoming != 0) + IncomingVNs[*PI] = Incoming; + } + + if (MBB->pred_size() == 1 && !RetVNI->hasPHIKill) { + VNInfo* OldVN = RetVNI; + VNInfo* NewVN = IncomingVNs.begin()->second; + VNInfo* MergedVN = LI->MergeValueNumberInto(OldVN, NewVN); + if (MergedVN == OldVN) std::swap(OldVN, NewVN); + + for (DenseMap::iterator LOI = LiveOut.begin(), + LOE = LiveOut.end(); LOI != LOE; ++LOI) + if (LOI->second == OldVN) + LOI->second = MergedVN; + for (DenseMap::iterator NVI = NewVNs.begin(), + NVE = NewVNs.end(); NVI != NVE; ++NVI) + if (NVI->second == OldVN) + NVI->second = MergedVN; + for (DenseMap::iterator PI = Phis.begin(), + PE = Phis.end(); PI != PE; ++PI) + if (PI->second == OldVN) + PI->second = MergedVN; + RetVNI = MergedVN; + } else { + // Otherwise, merge the incoming VNInfos with a phi join. Create a new + // VNInfo to represent the joined value. + for (DenseMap::iterator I = + IncomingVNs.begin(), E = IncomingVNs.end(); I != E; ++I) { + I->second->hasPHIKill = true; + unsigned KillIndex = LIs->getMBBEndIdx(I->first); + if (!LiveInterval::isKill(I->second, KillIndex)) + LI->addKill(I->second, KillIndex); + } + } + + unsigned EndIndex = 0; + if (IsIntraBlock) { + EndIndex = LIs->getInstructionIndex(UseI); + EndIndex = LiveIntervals::getUseIndex(EndIndex); + } else + EndIndex = LIs->getMBBEndIdx(MBB); + LI->addRange(LiveRange(StartIndex, EndIndex+1, RetVNI)); + if (IsIntraBlock) + LI->addKill(RetVNI, EndIndex); + + // Memoize results so we don't have to recompute them. + if (!IsIntraBlock) + LiveOut[MBB] = RetVNI; + else { + if (!NewVNs.count(UseI)) + NewVNs[UseI] = RetVNI; + Visited.insert(UseI); + } + + return RetVNI; +} + +/// ReconstructLiveInterval - Recompute a live interval from scratch. +void PreAllocSplitting::ReconstructLiveInterval(LiveInterval* LI) { + BumpPtrAllocator& Alloc = LIs->getVNInfoAllocator(); + + // Clear the old ranges and valnos; + LI->clear(); + + // Cache the uses and defs of the register + typedef DenseMap > RegMap; + RegMap Defs, Uses; + + // Keep track of the new VNs we're creating. + DenseMap NewVNs; + SmallPtrSet PhiVNs; + + // Cache defs, and create a new VNInfo for each def. + for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(LI->reg), + DE = MRI->def_end(); DI != DE; ++DI) { + Defs[(*DI).getParent()].insert(&*DI); + + unsigned DefIdx = LIs->getInstructionIndex(&*DI); + DefIdx = LiveIntervals::getDefIndex(DefIdx); + + VNInfo* NewVN = LI->getNextValue(DefIdx, 0, Alloc); + + // If the def is a move, set the copy field. + unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; + if (TII->isMoveInstr(*DI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) + if (DstReg == LI->reg) + NewVN->copy = &*DI; + + NewVNs[&*DI] = NewVN; + } + + // Cache uses as a separate pass from actually processing them. + for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(LI->reg), + UE = MRI->use_end(); UI != UE; ++UI) + Uses[(*UI).getParent()].insert(&*UI); + + // Now, actually process every use and use a phi construction algorithm + // to walk from it to its reaching definitions, building VNInfos along + // the way. + DenseMap LiveOut; + DenseMap Phis; + SmallPtrSet Visited; + for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(LI->reg), + UE = MRI->use_end(); UI != UE; ++UI) { + PerformPHIConstruction(&*UI, UI->getParent(), LI, Visited, Defs, + Uses, NewVNs, LiveOut, Phis, true, true); + } + + // Add ranges for dead defs + for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(LI->reg), + DE = MRI->def_end(); DI != DE; ++DI) { + unsigned DefIdx = LIs->getInstructionIndex(&*DI); + DefIdx = LiveIntervals::getDefIndex(DefIdx); + + if (LI->liveAt(DefIdx)) continue; + + VNInfo* DeadVN = NewVNs[&*DI]; + LI->addRange(LiveRange(DefIdx, DefIdx+1, DeadVN)); + LI->addKill(DeadVN, DefIdx); + } +} + +/// RenumberValno - Split the given valno out into a new vreg, allowing it to +/// be allocated to a different register. This function creates a new vreg, +/// copies the valno and its live ranges over to the new vreg's interval, +/// removes them from the old interval, and rewrites all uses and defs of +/// the original reg to the new vreg within those ranges. +void PreAllocSplitting::RenumberValno(VNInfo* VN) { + SmallVector Stack; + SmallVector VNsToCopy; + Stack.push_back(VN); + + // Walk through and copy the valno we care about, and any other valnos + // that are two-address redefinitions of the one we care about. These + // will need to be rewritten as well. We also check for safety of the + // renumbering here, by making sure that none of the valno involved has + // phi kills. + while (!Stack.empty()) { + VNInfo* OldVN = Stack.back(); + Stack.pop_back(); + + // Bail out if we ever encounter a valno that has a PHI kill. We can't + // renumber these. + if (OldVN->hasPHIKill) return; + + VNsToCopy.push_back(OldVN); + + // Locate two-address redefinitions + for (SmallVector::iterator KI = OldVN->kills.begin(), + KE = OldVN->kills.end(); KI != KE; ++KI) { + MachineInstr* MI = LIs->getInstructionFromIndex(*KI); + unsigned DefIdx = MI->findRegisterDefOperandIdx(CurrLI->reg); + if (DefIdx == ~0U) continue; + if (MI->isRegTiedToUseOperand(DefIdx)) { + VNInfo* NextVN = + CurrLI->findDefinedVNInfo(LiveIntervals::getDefIndex(*KI)); + if (NextVN == OldVN) continue; + Stack.push_back(NextVN); + } + } + } + + // Create the new vreg + unsigned NewVReg = MRI->createVirtualRegister(MRI->getRegClass(CurrLI->reg)); + + // Create the new live interval + LiveInterval& NewLI = LIs->getOrCreateInterval(NewVReg); + + for (SmallVector::iterator OI = VNsToCopy.begin(), OE = + VNsToCopy.end(); OI != OE; ++OI) { + VNInfo* OldVN = *OI; + + // Copy the valno over + VNInfo* NewVN = NewLI.getNextValue(OldVN->def, OldVN->copy, + LIs->getVNInfoAllocator()); + NewLI.copyValNumInfo(NewVN, OldVN); + NewLI.MergeValueInAsValue(*CurrLI, OldVN, NewVN); + + // Remove the valno from the old interval + CurrLI->removeValNo(OldVN); + } + + // Rewrite defs and uses. This is done in two stages to avoid invalidating + // the reg_iterator. + SmallVector, 8> OpsToChange; + + for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(CurrLI->reg), + E = MRI->reg_end(); I != E; ++I) { + MachineOperand& MO = I.getOperand(); + unsigned InstrIdx = LIs->getInstructionIndex(&*I); + + if ((MO.isUse() && NewLI.liveAt(LiveIntervals::getUseIndex(InstrIdx))) || + (MO.isDef() && NewLI.liveAt(LiveIntervals::getDefIndex(InstrIdx)))) + OpsToChange.push_back(std::make_pair(&*I, I.getOperandNo())); + } + + for (SmallVector, 8>::iterator I = + OpsToChange.begin(), E = OpsToChange.end(); I != E; ++I) { + MachineInstr* Inst = I->first; + unsigned OpIdx = I->second; + MachineOperand& MO = Inst->getOperand(OpIdx); + MO.setReg(NewVReg); + } + + // Grow the VirtRegMap, since we've created a new vreg. + VRM->grow(); + + // The renumbered vreg shares a stack slot with the old register. + if (IntervalSSMap.count(CurrLI->reg)) + IntervalSSMap[NewVReg] = IntervalSSMap[CurrLI->reg]; + + NumRenumbers++; +} + +bool PreAllocSplitting::Rematerialize(unsigned vreg, VNInfo* ValNo, + MachineInstr* DefMI, + MachineBasicBlock::iterator RestorePt, + unsigned RestoreIdx, + SmallPtrSet& RefsInMBB) { + MachineBasicBlock& MBB = *RestorePt->getParent(); + + MachineBasicBlock::iterator KillPt = BarrierMBB->end(); + unsigned KillIdx = 0; + if (ValNo->def == ~0U || DefMI->getParent() == BarrierMBB) + KillPt = findSpillPoint(BarrierMBB, Barrier, NULL, RefsInMBB, KillIdx); + else + KillPt = findNextEmptySlot(DefMI->getParent(), DefMI, KillIdx); + + if (KillPt == DefMI->getParent()->end()) + return false; + + TII->reMaterialize(MBB, RestorePt, vreg, DefMI); + LIs->InsertMachineInstrInMaps(prior(RestorePt), RestoreIdx); + + ReconstructLiveInterval(CurrLI); + unsigned RematIdx = LIs->getInstructionIndex(prior(RestorePt)); + RematIdx = LiveIntervals::getDefIndex(RematIdx); + RenumberValno(CurrLI->findDefinedVNInfo(RematIdx)); + + ++NumSplits; + ++NumRemats; + return true; +} + +MachineInstr* PreAllocSplitting::FoldSpill(unsigned vreg, + const TargetRegisterClass* RC, + MachineInstr* DefMI, + MachineInstr* Barrier, + MachineBasicBlock* MBB, + int& SS, + SmallPtrSet& RefsInMBB) { + MachineBasicBlock::iterator Pt = MBB->begin(); + + // Go top down if RefsInMBB is empty. + if (RefsInMBB.empty()) + return 0; + + MachineBasicBlock::iterator FoldPt = Barrier; + while (&*FoldPt != DefMI && FoldPt != MBB->begin() && + !RefsInMBB.count(FoldPt)) + --FoldPt; + + int OpIdx = FoldPt->findRegisterDefOperandIdx(vreg, false); + if (OpIdx == -1) + return 0; + + SmallVector Ops; + Ops.push_back(OpIdx); + + if (!TII->canFoldMemoryOperand(FoldPt, Ops)) + return 0; + + DenseMap::iterator I = IntervalSSMap.find(vreg); + if (I != IntervalSSMap.end()) { + SS = I->second; + } else { + SS = MFI->CreateStackObject(RC->getSize(), RC->getAlignment()); + } + + MachineInstr* FMI = TII->foldMemoryOperand(*MBB->getParent(), + FoldPt, Ops, SS); + + if (FMI) { + LIs->ReplaceMachineInstrInMaps(FoldPt, FMI); + FMI = MBB->insert(MBB->erase(FoldPt), FMI); + ++NumFolds; + + IntervalSSMap[vreg] = SS; + CurrSLI = &LSs->getOrCreateInterval(SS, RC); + if (CurrSLI->hasAtLeastOneValue()) + CurrSValNo = CurrSLI->getValNumInfo(0); + else + CurrSValNo = CurrSLI->getNextValue(~0U, 0, LSs->getVNInfoAllocator()); + } + + return FMI; +} + +MachineInstr* PreAllocSplitting::FoldRestore(unsigned vreg, + const TargetRegisterClass* RC, + MachineInstr* Barrier, + MachineBasicBlock* MBB, + int SS, + SmallPtrSet& RefsInMBB) { + if ((int)RestoreFoldLimit != -1 && RestoreFoldLimit == (int)NumRestoreFolds) + return 0; + + // Go top down if RefsInMBB is empty. + if (RefsInMBB.empty()) + return 0; + + // Can't fold a restore between a call stack setup and teardown. + MachineBasicBlock::iterator FoldPt = Barrier; + + // Advance from barrier to call frame teardown. + while (FoldPt != MBB->getFirstTerminator() && + FoldPt->getOpcode() != TRI->getCallFrameDestroyOpcode()) { + if (RefsInMBB.count(FoldPt)) + return 0; + + ++FoldPt; + } + + if (FoldPt == MBB->getFirstTerminator()) + return 0; + else + ++FoldPt; + + // Now find the restore point. + while (FoldPt != MBB->getFirstTerminator() && !RefsInMBB.count(FoldPt)) { + if (FoldPt->getOpcode() == TRI->getCallFrameSetupOpcode()) { + while (FoldPt != MBB->getFirstTerminator() && + FoldPt->getOpcode() != TRI->getCallFrameDestroyOpcode()) { + if (RefsInMBB.count(FoldPt)) + return 0; + + ++FoldPt; + } + + if (FoldPt == MBB->getFirstTerminator()) + return 0; + } + + ++FoldPt; + } + + if (FoldPt == MBB->getFirstTerminator()) + return 0; + + int OpIdx = FoldPt->findRegisterUseOperandIdx(vreg, true); + if (OpIdx == -1) + return 0; + + SmallVector Ops; + Ops.push_back(OpIdx); + + if (!TII->canFoldMemoryOperand(FoldPt, Ops)) + return 0; + + MachineInstr* FMI = TII->foldMemoryOperand(*MBB->getParent(), + FoldPt, Ops, SS); + + if (FMI) { + LIs->ReplaceMachineInstrInMaps(FoldPt, FMI); + FMI = MBB->insert(MBB->erase(FoldPt), FMI); + ++NumRestoreFolds; + } + + return FMI; +} + +/// SplitRegLiveInterval - Split (spill and restore) the given live interval +/// so it would not cross the barrier that's being processed. Shrink wrap +/// (minimize) the live interval to the last uses. +bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) { + CurrLI = LI; + + // Find live range where current interval cross the barrier. + LiveInterval::iterator LR = + CurrLI->FindLiveRangeContaining(LIs->getUseIndex(BarrierIdx)); + VNInfo *ValNo = LR->valno; + + if (ValNo->def == ~1U) { + // Defined by a dead def? How can this be? + assert(0 && "Val# is defined by a dead def?"); + abort(); + } + + MachineInstr *DefMI = (ValNo->def != ~0U) + ? LIs->getInstructionFromIndex(ValNo->def) : NULL; + + // If this would create a new join point, do not split. + if (DefMI && createsNewJoin(LR, DefMI->getParent(), Barrier->getParent())) + return false; + + // Find all references in the barrier mbb. + SmallPtrSet RefsInMBB; + for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(CurrLI->reg), + E = MRI->reg_end(); I != E; ++I) { + MachineInstr *RefMI = &*I; + if (RefMI->getParent() == BarrierMBB) + RefsInMBB.insert(RefMI); + } + + // Find a point to restore the value after the barrier. + unsigned RestoreIndex = 0; + MachineBasicBlock::iterator RestorePt = + findRestorePoint(BarrierMBB, Barrier, LR->end, RefsInMBB, RestoreIndex); + if (RestorePt == BarrierMBB->end()) + return false; + + if (DefMI && LIs->isReMaterializable(*LI, ValNo, DefMI)) + if (Rematerialize(LI->reg, ValNo, DefMI, RestorePt, + RestoreIndex, RefsInMBB)) + return true; + + // Add a spill either before the barrier or after the definition. + MachineBasicBlock *DefMBB = DefMI ? DefMI->getParent() : NULL; + const TargetRegisterClass *RC = MRI->getRegClass(CurrLI->reg); + unsigned SpillIndex = 0; + MachineInstr *SpillMI = NULL; + int SS = -1; + if (ValNo->def == ~0U) { + // If it's defined by a phi, we must split just before the barrier. + if ((SpillMI = FoldSpill(LI->reg, RC, 0, Barrier, + BarrierMBB, SS, RefsInMBB))) { + SpillIndex = LIs->getInstructionIndex(SpillMI); + } else { + MachineBasicBlock::iterator SpillPt = + findSpillPoint(BarrierMBB, Barrier, NULL, RefsInMBB, SpillIndex); + if (SpillPt == BarrierMBB->begin()) + return false; // No gap to insert spill. + // Add spill. + + SS = CreateSpillStackSlot(CurrLI->reg, RC); + TII->storeRegToStackSlot(*BarrierMBB, SpillPt, CurrLI->reg, true, SS, RC); + SpillMI = prior(SpillPt); + LIs->InsertMachineInstrInMaps(SpillMI, SpillIndex); + } + } else if (!IsAvailableInStack(DefMBB, CurrLI->reg, ValNo->def, + RestoreIndex, SpillIndex, SS)) { + // If it's already split, just restore the value. There is no need to spill + // the def again. + if (!DefMI) + return false; // Def is dead. Do nothing. + + if ((SpillMI = FoldSpill(LI->reg, RC, DefMI, Barrier, + BarrierMBB, SS, RefsInMBB))) { + SpillIndex = LIs->getInstructionIndex(SpillMI); + } else { + // Check if it's possible to insert a spill after the def MI. + MachineBasicBlock::iterator SpillPt; + if (DefMBB == BarrierMBB) { + // Add spill after the def and the last use before the barrier. + SpillPt = findSpillPoint(BarrierMBB, Barrier, DefMI, + RefsInMBB, SpillIndex); + if (SpillPt == DefMBB->begin()) + return false; // No gap to insert spill. + } else { + SpillPt = findNextEmptySlot(DefMBB, DefMI, SpillIndex); + if (SpillPt == DefMBB->end()) + return false; // No gap to insert spill. + } + // Add spill. The store instruction kills the register if def is before + // the barrier in the barrier block. + SS = CreateSpillStackSlot(CurrLI->reg, RC); + TII->storeRegToStackSlot(*DefMBB, SpillPt, CurrLI->reg, + DefMBB == BarrierMBB, SS, RC); + SpillMI = prior(SpillPt); + LIs->InsertMachineInstrInMaps(SpillMI, SpillIndex); + } + } + + // Remember def instruction index to spill index mapping. + if (DefMI && SpillMI) + Def2SpillMap[ValNo->def] = SpillIndex; + + // Add restore. + bool FoldedRestore = false; + if (MachineInstr* LMI = FoldRestore(CurrLI->reg, RC, Barrier, + BarrierMBB, SS, RefsInMBB)) { + RestorePt = LMI; + RestoreIndex = LIs->getInstructionIndex(RestorePt); + FoldedRestore = true; + } else { + TII->loadRegFromStackSlot(*BarrierMBB, RestorePt, CurrLI->reg, SS, RC); + MachineInstr *LoadMI = prior(RestorePt); + LIs->InsertMachineInstrInMaps(LoadMI, RestoreIndex); + } + + // Update spill stack slot live interval. + UpdateSpillSlotInterval(ValNo, LIs->getUseIndex(SpillIndex)+1, + LIs->getDefIndex(RestoreIndex)); + + ReconstructLiveInterval(CurrLI); + + if (!FoldedRestore) { + unsigned RestoreIdx = LIs->getInstructionIndex(prior(RestorePt)); + RestoreIdx = LiveIntervals::getDefIndex(RestoreIdx); + RenumberValno(CurrLI->findDefinedVNInfo(RestoreIdx)); + } + + ++NumSplits; + return true; +} + +/// SplitRegLiveIntervals - Split all register live intervals that cross the +/// barrier that's being processed. +bool +PreAllocSplitting::SplitRegLiveIntervals(const TargetRegisterClass **RCs, + SmallPtrSet& Split) { + // First find all the virtual registers whose live intervals are intercepted + // by the current barrier. + SmallVector Intervals; + for (const TargetRegisterClass **RC = RCs; *RC; ++RC) { + // FIXME: If it's not safe to move any instruction that defines the barrier + // register class, then it means there are some special dependencies which + // codegen is not modelling. Ignore these barriers for now. + if (!TII->isSafeToMoveRegClassDefs(*RC)) + continue; + std::vector &VRs = MRI->getRegClassVirtRegs(*RC); + for (unsigned i = 0, e = VRs.size(); i != e; ++i) { + unsigned Reg = VRs[i]; + if (!LIs->hasInterval(Reg)) + continue; + LiveInterval *LI = &LIs->getInterval(Reg); + if (LI->liveAt(BarrierIdx) && !Barrier->readsRegister(Reg)) + // Virtual register live interval is intercepted by the barrier. We + // should split and shrink wrap its interval if possible. + Intervals.push_back(LI); + } + } + + // Process the affected live intervals. + bool Change = false; + while (!Intervals.empty()) { + if (PreSplitLimit != -1 && (int)NumSplits == PreSplitLimit) + break; + else if (NumSplits == 4) + Change |= Change; + LiveInterval *LI = Intervals.back(); + Intervals.pop_back(); + bool result = SplitRegLiveInterval(LI); + if (result) Split.insert(LI); + Change |= result; + } + + return Change; +} + +unsigned PreAllocSplitting::getNumberOfNonSpills( + SmallPtrSet& MIs, + unsigned Reg, int FrameIndex, + bool& FeedsTwoAddr) { + unsigned NonSpills = 0; + for (SmallPtrSet::iterator UI = MIs.begin(), UE = MIs.end(); + UI != UE; ++UI) { + int StoreFrameIndex; + unsigned StoreVReg = TII->isStoreToStackSlot(*UI, StoreFrameIndex); + if (StoreVReg != Reg || StoreFrameIndex != FrameIndex) + NonSpills++; + + int DefIdx = (*UI)->findRegisterDefOperandIdx(Reg); + if (DefIdx != -1 && (*UI)->isRegTiedToUseOperand(DefIdx)) + FeedsTwoAddr = true; + } + + return NonSpills; +} + +/// removeDeadSpills - After doing splitting, filter through all intervals we've +/// split, and see if any of the spills are unnecessary. If so, remove them. +bool PreAllocSplitting::removeDeadSpills(SmallPtrSet& split) { + bool changed = false; + + // Walk over all of the live intervals that were touched by the splitter, + // and see if we can do any DCE and/or folding. + for (SmallPtrSet::iterator LI = split.begin(), + LE = split.end(); LI != LE; ++LI) { + DenseMap > VNUseCount; + + // First, collect all the uses of the vreg, and sort them by their + // reaching definition (VNInfo). + for (MachineRegisterInfo::use_iterator UI = MRI->use_begin((*LI)->reg), + UE = MRI->use_end(); UI != UE; ++UI) { + unsigned index = LIs->getInstructionIndex(&*UI); + index = LiveIntervals::getUseIndex(index); + + const LiveRange* LR = (*LI)->getLiveRangeContaining(index); + VNUseCount[LR->valno].insert(&*UI); + } + + // Now, take the definitions (VNInfo's) one at a time and try to DCE + // and/or fold them away. + for (LiveInterval::vni_iterator VI = (*LI)->vni_begin(), + VE = (*LI)->vni_end(); VI != VE; ++VI) { + + if (DeadSplitLimit != -1 && (int)NumDeadSpills == DeadSplitLimit) + return changed; + + VNInfo* CurrVN = *VI; + + // We don't currently try to handle definitions with PHI kills, because + // it would involve processing more than one VNInfo at once. + if (CurrVN->hasPHIKill) continue; + + // We also don't try to handle the results of PHI joins, since there's + // no defining instruction to analyze. + unsigned DefIdx = CurrVN->def; + if (DefIdx == ~0U || DefIdx == ~1U) continue; + + // We're only interested in eliminating cruft introduced by the splitter, + // is of the form load-use or load-use-store. First, check that the + // definition is a load, and remember what stack slot we loaded it from. + MachineInstr* DefMI = LIs->getInstructionFromIndex(DefIdx); + int FrameIndex; + if (!TII->isLoadFromStackSlot(DefMI, FrameIndex)) continue; + + // If the definition has no uses at all, just DCE it. + if (VNUseCount[CurrVN].size() == 0) { + LIs->RemoveMachineInstrFromMaps(DefMI); + (*LI)->removeValNo(CurrVN); + DefMI->eraseFromParent(); + VNUseCount.erase(CurrVN); + NumDeadSpills++; + changed = true; + continue; + } + + // Second, get the number of non-store uses of the definition, as well as + // a flag indicating whether it feeds into a later two-address definition. + bool FeedsTwoAddr = false; + unsigned NonSpillCount = getNumberOfNonSpills(VNUseCount[CurrVN], + (*LI)->reg, FrameIndex, + FeedsTwoAddr); + + // If there's one non-store use and it doesn't feed a two-addr, then + // this is a load-use-store case that we can try to fold. + if (NonSpillCount == 1 && !FeedsTwoAddr) { + // Start by finding the non-store use MachineInstr. + SmallPtrSet::iterator UI = VNUseCount[CurrVN].begin(); + int StoreFrameIndex; + unsigned StoreVReg = TII->isStoreToStackSlot(*UI, StoreFrameIndex); + while (UI != VNUseCount[CurrVN].end() && + (StoreVReg == (*LI)->reg && StoreFrameIndex == FrameIndex)) { + ++UI; + if (UI != VNUseCount[CurrVN].end()) + StoreVReg = TII->isStoreToStackSlot(*UI, StoreFrameIndex); + } + if (UI == VNUseCount[CurrVN].end()) continue; + + MachineInstr* use = *UI; + + // Attempt to fold it away! + int OpIdx = use->findRegisterUseOperandIdx((*LI)->reg, false); + if (OpIdx == -1) continue; + SmallVector Ops; + Ops.push_back(OpIdx); + if (!TII->canFoldMemoryOperand(use, Ops)) continue; + + MachineInstr* NewMI = + TII->foldMemoryOperand(*use->getParent()->getParent(), + use, Ops, FrameIndex); + + if (!NewMI) continue; + + // Update relevant analyses. + LIs->RemoveMachineInstrFromMaps(DefMI); + LIs->ReplaceMachineInstrInMaps(use, NewMI); + (*LI)->removeValNo(CurrVN); + + DefMI->eraseFromParent(); + MachineBasicBlock* MBB = use->getParent(); + NewMI = MBB->insert(MBB->erase(use), NewMI); + VNUseCount[CurrVN].erase(use); + + // Remove deleted instructions. Note that we need to remove them from + // the VNInfo->use map as well, just to be safe. + for (SmallPtrSet::iterator II = + VNUseCount[CurrVN].begin(), IE = VNUseCount[CurrVN].end(); + II != IE; ++II) { + for (DenseMap >::iterator + VNI = VNUseCount.begin(), VNE = VNUseCount.end(); VNI != VNE; + ++VNI) + if (VNI->first != CurrVN) + VNI->second.erase(*II); + LIs->RemoveMachineInstrFromMaps(*II); + (*II)->eraseFromParent(); + } + + VNUseCount.erase(CurrVN); + + for (DenseMap >::iterator + VI = VNUseCount.begin(), VE = VNUseCount.end(); VI != VE; ++VI) + if (VI->second.erase(use)) + VI->second.insert(NewMI); + + NumDeadSpills++; + changed = true; + continue; + } + + // If there's more than one non-store instruction, we can't profitably + // fold it, so bail. + if (NonSpillCount) continue; + + // Otherwise, this is a load-store case, so DCE them. + for (SmallPtrSet::iterator UI = + VNUseCount[CurrVN].begin(), UE = VNUseCount[CurrVN].end(); + UI != UI; ++UI) { + LIs->RemoveMachineInstrFromMaps(*UI); + (*UI)->eraseFromParent(); + } + + VNUseCount.erase(CurrVN); + + LIs->RemoveMachineInstrFromMaps(DefMI); + (*LI)->removeValNo(CurrVN); + DefMI->eraseFromParent(); + NumDeadSpills++; + changed = true; + } + } + + return changed; +} + +bool PreAllocSplitting::createsNewJoin(LiveRange* LR, + MachineBasicBlock* DefMBB, + MachineBasicBlock* BarrierMBB) { + if (DefMBB == BarrierMBB) + return false; + + if (LR->valno->hasPHIKill) + return false; + + unsigned MBBEnd = LIs->getMBBEndIdx(BarrierMBB); + if (LR->end < MBBEnd) + return false; + + MachineLoopInfo& MLI = getAnalysis(); + if (MLI.getLoopFor(DefMBB) != MLI.getLoopFor(BarrierMBB)) + return true; + + MachineDominatorTree& MDT = getAnalysis(); + SmallPtrSet Visited; + typedef std::pair ItPair; + SmallVector Stack; + Stack.push_back(std::make_pair(BarrierMBB, BarrierMBB->succ_begin())); + + while (!Stack.empty()) { + ItPair P = Stack.back(); + Stack.pop_back(); + + MachineBasicBlock* PredMBB = P.first; + MachineBasicBlock::succ_iterator S = P.second; + + if (S == PredMBB->succ_end()) + continue; + else if (Visited.count(*S)) { + Stack.push_back(std::make_pair(PredMBB, ++S)); + continue; + } else + Stack.push_back(std::make_pair(PredMBB, S+1)); + + MachineBasicBlock* MBB = *S; + Visited.insert(MBB); + + if (MBB == BarrierMBB) + return true; + + MachineDomTreeNode* DefMDTN = MDT.getNode(DefMBB); + MachineDomTreeNode* BarrierMDTN = MDT.getNode(BarrierMBB); + MachineDomTreeNode* MDTN = MDT.getNode(MBB)->getIDom(); + while (MDTN) { + if (MDTN == DefMDTN) + return true; + else if (MDTN == BarrierMDTN) + break; + MDTN = MDTN->getIDom(); + } + + MBBEnd = LIs->getMBBEndIdx(MBB); + if (LR->end > MBBEnd) + Stack.push_back(std::make_pair(MBB, MBB->succ_begin())); + } + + return false; +} + + +bool PreAllocSplitting::runOnMachineFunction(MachineFunction &MF) { + CurrMF = &MF; + TM = &MF.getTarget(); + TRI = TM->getRegisterInfo(); + TII = TM->getInstrInfo(); + MFI = MF.getFrameInfo(); + MRI = &MF.getRegInfo(); + LIs = &getAnalysis(); + LSs = &getAnalysis(); + VRM = &getAnalysis(); + + bool MadeChange = false; + + // Make sure blocks are numbered in order. + MF.RenumberBlocks(); + + MachineBasicBlock *Entry = MF.begin(); + SmallPtrSet Visited; + + SmallPtrSet Split; + + for (df_ext_iterator > + DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited); + DFI != E; ++DFI) { + BarrierMBB = *DFI; + for (MachineBasicBlock::iterator I = BarrierMBB->begin(), + E = BarrierMBB->end(); I != E; ++I) { + Barrier = &*I; + const TargetRegisterClass **BarrierRCs = + Barrier->getDesc().getRegClassBarriers(); + if (!BarrierRCs) + continue; + BarrierIdx = LIs->getInstructionIndex(Barrier); + MadeChange |= SplitRegLiveIntervals(BarrierRCs, Split); + } + } + + MadeChange |= removeDeadSpills(Split); + + return MadeChange; +} diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp new file mode 100644 index 000000000000..9e7ad6752a73 --- /dev/null +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -0,0 +1,679 @@ +//===-- PrologEpilogInserter.cpp - Insert Prolog/Epilog code in function --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass is responsible for finalizing the functions frame layout, saving +// callee saved registers, and for emitting prolog & epilog code for the +// function. +// +// This pass must be run after register allocation. After this pass is +// executed, it is illegal to construct MO_FrameIndex operands. +// +// This pass provides an optional shrink wrapping variant of prolog/epilog +// insertion, enabled via --shrink-wrap. See ShrinkWrapping.cpp. +// +//===----------------------------------------------------------------------===// + +#include "PrologEpilogInserter.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Support/Compiler.h" +#include "llvm/ADT/STLExtras.h" +#include + +using namespace llvm; + +char PEI::ID = 0; + +static RegisterPass +X("prologepilog", "Prologue/Epilogue Insertion"); + +/// createPrologEpilogCodeInserter - This function returns a pass that inserts +/// prolog and epilog code, and eliminates abstract frame references. +/// +FunctionPass *llvm::createPrologEpilogCodeInserter() { return new PEI(); } + +/// runOnMachineFunction - Insert prolog/epilog code and replace abstract +/// frame indexes with appropriate references. +/// +bool PEI::runOnMachineFunction(MachineFunction &Fn) { + const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo(); + RS = TRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : NULL; + + // Get MachineModuleInfo so that we can track the construction of the + // frame. + if (MachineModuleInfo *MMI = getAnalysisIfAvailable()) + Fn.getFrameInfo()->setMachineModuleInfo(MMI); + + // Allow the target machine to make some adjustments to the function + // e.g. UsedPhysRegs before calculateCalleeSavedRegisters. + TRI->processFunctionBeforeCalleeSavedScan(Fn, RS); + + // Scan the function for modified callee saved registers and insert spill + // code for any callee saved registers that are modified. Also calculate + // the MaxCallFrameSize and HasCalls variables for the function's frame + // information and eliminates call frame pseudo instructions. + calculateCalleeSavedRegisters(Fn); + + // Determine placement of CSR spill/restore code: + // - with shrink wrapping, place spills and restores to tightly + // enclose regions in the Machine CFG of the function where + // they are used. Without shrink wrapping + // - default (no shrink wrapping), place all spills in the + // entry block, all restores in return blocks. + placeCSRSpillsAndRestores(Fn); + + // Add the code to save and restore the callee saved registers + insertCSRSpillsAndRestores(Fn); + + // Allow the target machine to make final modifications to the function + // before the frame layout is finalized. + TRI->processFunctionBeforeFrameFinalized(Fn); + + // Calculate actual frame offsets for all abstract stack objects... + calculateFrameObjectOffsets(Fn); + + // Add prolog and epilog code to the function. This function is required + // to align the stack frame as necessary for any stack variables or + // called functions. Because of this, calculateCalleeSavedRegisters + // must be called before this function in order to set the HasCalls + // and MaxCallFrameSize variables. + insertPrologEpilogCode(Fn); + + // Replace all MO_FrameIndex operands with physical register references + // and actual offsets. + // + replaceFrameIndices(Fn); + + delete RS; + clearAllSets(); + return true; +} + +#if 0 +void PEI::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + if (ShrinkWrapping || ShrinkWrapFunc != "") { + AU.addRequired(); + AU.addRequired(); + } + AU.addPreserved(); + AU.addPreserved(); + MachineFunctionPass::getAnalysisUsage(AU); +} +#endif + +/// calculateCalleeSavedRegisters - Scan the function for modified callee saved +/// registers. Also calculate the MaxCallFrameSize and HasCalls variables for +/// the function's frame information and eliminates call frame pseudo +/// instructions. +/// +void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) { + const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo(); + const TargetFrameInfo *TFI = Fn.getTarget().getFrameInfo(); + + // Get the callee saved register list... + const unsigned *CSRegs = RegInfo->getCalleeSavedRegs(&Fn); + + // Get the function call frame set-up and tear-down instruction opcode + int FrameSetupOpcode = RegInfo->getCallFrameSetupOpcode(); + int FrameDestroyOpcode = RegInfo->getCallFrameDestroyOpcode(); + + // These are used to keep track the callee-save area. Initialize them. + MinCSFrameIndex = INT_MAX; + MaxCSFrameIndex = 0; + + // Early exit for targets which have no callee saved registers and no call + // frame setup/destroy pseudo instructions. + if ((CSRegs == 0 || CSRegs[0] == 0) && + FrameSetupOpcode == -1 && FrameDestroyOpcode == -1) + return; + + unsigned MaxCallFrameSize = 0; + bool HasCalls = false; + + std::vector FrameSDOps; + for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) + for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) + if (I->getOpcode() == FrameSetupOpcode || + I->getOpcode() == FrameDestroyOpcode) { + assert(I->getNumOperands() >= 1 && "Call Frame Setup/Destroy Pseudo" + " instructions should have a single immediate argument!"); + unsigned Size = I->getOperand(0).getImm(); + if (Size > MaxCallFrameSize) MaxCallFrameSize = Size; + HasCalls = true; + FrameSDOps.push_back(I); + } + + MachineFrameInfo *FFI = Fn.getFrameInfo(); + FFI->setHasCalls(HasCalls); + FFI->setMaxCallFrameSize(MaxCallFrameSize); + + for (unsigned i = 0, e = FrameSDOps.size(); i != e; ++i) { + MachineBasicBlock::iterator I = FrameSDOps[i]; + // If call frames are not being included as part of the stack frame, + // and there is no dynamic allocation (therefore referencing frame slots + // off sp), leave the pseudo ops alone. We'll eliminate them later. + if (RegInfo->hasReservedCallFrame(Fn) || RegInfo->hasFP(Fn)) + RegInfo->eliminateCallFramePseudoInstr(Fn, *I->getParent(), I); + } + + // Now figure out which *callee saved* registers are modified by the current + // function, thus needing to be saved and restored in the prolog/epilog. + // + const TargetRegisterClass* const *CSRegClasses = + RegInfo->getCalleeSavedRegClasses(&Fn); + std::vector CSI; + for (unsigned i = 0; CSRegs[i]; ++i) { + unsigned Reg = CSRegs[i]; + if (Fn.getRegInfo().isPhysRegUsed(Reg)) { + // If the reg is modified, save it! + CSI.push_back(CalleeSavedInfo(Reg, CSRegClasses[i])); + } else { + for (const unsigned *AliasSet = RegInfo->getAliasSet(Reg); + *AliasSet; ++AliasSet) { // Check alias registers too. + if (Fn.getRegInfo().isPhysRegUsed(*AliasSet)) { + CSI.push_back(CalleeSavedInfo(Reg, CSRegClasses[i])); + break; + } + } + } + } + + if (CSI.empty()) + return; // Early exit if no callee saved registers are modified! + + unsigned NumFixedSpillSlots; + const std::pair *FixedSpillSlots = + TFI->getCalleeSavedSpillSlots(NumFixedSpillSlots); + + // Now that we know which registers need to be saved and restored, allocate + // stack slots for them. + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + unsigned Reg = CSI[i].getReg(); + const TargetRegisterClass *RC = CSI[i].getRegClass(); + + // Check to see if this physreg must be spilled to a particular stack slot + // on this target. + const std::pair *FixedSlot = FixedSpillSlots; + while (FixedSlot != FixedSpillSlots+NumFixedSpillSlots && + FixedSlot->first != Reg) + ++FixedSlot; + + int FrameIdx; + if (FixedSlot == FixedSpillSlots+NumFixedSpillSlots) { + // Nope, just spill it anywhere convenient. + unsigned Align = RC->getAlignment(); + unsigned StackAlign = TFI->getStackAlignment(); + // We may not be able to sastify the desired alignment specification of + // the TargetRegisterClass if the stack alignment is smaller. + // Use the min. + Align = std::min(Align, StackAlign); + FrameIdx = FFI->CreateStackObject(RC->getSize(), Align); + if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx; + if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx; + } else { + // Spill it to the stack where we must. + FrameIdx = FFI->CreateFixedObject(RC->getSize(), FixedSlot->second); + } + CSI[i].setFrameIdx(FrameIdx); + } + + FFI->setCalleeSavedInfo(CSI); +} + +/// insertCSRSpillsAndRestores - Insert spill and restore code for +/// callee saved registers used in the function, handling shrink wrapping. +/// +void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { + // Get callee saved register information. + MachineFrameInfo *FFI = Fn.getFrameInfo(); + const std::vector &CSI = FFI->getCalleeSavedInfo(); + + // Early exit if no callee saved registers are modified! + if (CSI.empty()) + return; + + const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo(); + MachineBasicBlock::iterator I; + + if (! ShrinkWrapThisFunction) { + // Spill using target interface. + I = EntryBlock->begin(); + if (!TII.spillCalleeSavedRegisters(*EntryBlock, I, CSI)) { + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + // Add the callee-saved register as live-in. + // It's killed at the spill. + EntryBlock->addLiveIn(CSI[i].getReg()); + + // Insert the spill to the stack frame. + TII.storeRegToStackSlot(*EntryBlock, I, CSI[i].getReg(), true, + CSI[i].getFrameIdx(), CSI[i].getRegClass()); + } + } + + // Restore using target interface. + for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri) { + MachineBasicBlock* MBB = ReturnBlocks[ri]; + I = MBB->end(); --I; + + // Skip over all terminator instructions, which are part of the return + // sequence. + MachineBasicBlock::iterator I2 = I; + while (I2 != MBB->begin() && (--I2)->getDesc().isTerminator()) + I = I2; + + bool AtStart = I == MBB->begin(); + MachineBasicBlock::iterator BeforeI = I; + if (!AtStart) + --BeforeI; + + // Restore all registers immediately before the return and any + // terminators that preceed it. + if (!TII.restoreCalleeSavedRegisters(*MBB, I, CSI)) { + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + TII.loadRegFromStackSlot(*MBB, I, CSI[i].getReg(), + CSI[i].getFrameIdx(), + CSI[i].getRegClass()); + assert(I != MBB->begin() && + "loadRegFromStackSlot didn't insert any code!"); + // Insert in reverse order. loadRegFromStackSlot can insert + // multiple instructions. + if (AtStart) + I = MBB->begin(); + else { + I = BeforeI; + ++I; + } + } + } + } + return; + } + + // Insert spills. + std::vector blockCSI; + for (CSRegBlockMap::iterator BI = CSRSave.begin(), + BE = CSRSave.end(); BI != BE; ++BI) { + MachineBasicBlock* MBB = BI->first; + CSRegSet save = BI->second; + + if (save.empty()) + continue; + + blockCSI.clear(); + for (CSRegSet::iterator RI = save.begin(), + RE = save.end(); RI != RE; ++RI) { + blockCSI.push_back(CSI[*RI]); + } + assert(blockCSI.size() > 0 && + "Could not collect callee saved register info"); + + I = MBB->begin(); + + // When shrink wrapping, use stack slot stores/loads. + for (unsigned i = 0, e = blockCSI.size(); i != e; ++i) { + // Add the callee-saved register as live-in. + // It's killed at the spill. + MBB->addLiveIn(blockCSI[i].getReg()); + + // Insert the spill to the stack frame. + TII.storeRegToStackSlot(*MBB, I, blockCSI[i].getReg(), + true, + blockCSI[i].getFrameIdx(), + blockCSI[i].getRegClass()); + } + } + + for (CSRegBlockMap::iterator BI = CSRRestore.begin(), + BE = CSRRestore.end(); BI != BE; ++BI) { + MachineBasicBlock* MBB = BI->first; + CSRegSet restore = BI->second; + + if (restore.empty()) + continue; + + blockCSI.clear(); + for (CSRegSet::iterator RI = restore.begin(), + RE = restore.end(); RI != RE; ++RI) { + blockCSI.push_back(CSI[*RI]); + } + assert(blockCSI.size() > 0 && + "Could not find callee saved register info"); + + // If MBB is empty and needs restores, insert at the _beginning_. + if (MBB->empty()) { + I = MBB->begin(); + } else { + I = MBB->end(); + --I; + + // Skip over all terminator instructions, which are part of the + // return sequence. + if (! I->getDesc().isTerminator()) { + ++I; + } else { + MachineBasicBlock::iterator I2 = I; + while (I2 != MBB->begin() && (--I2)->getDesc().isTerminator()) + I = I2; + } + } + + bool AtStart = I == MBB->begin(); + MachineBasicBlock::iterator BeforeI = I; + if (!AtStart) + --BeforeI; + + // Restore all registers immediately before the return and any + // terminators that preceed it. + for (unsigned i = 0, e = blockCSI.size(); i != e; ++i) { + TII.loadRegFromStackSlot(*MBB, I, blockCSI[i].getReg(), + blockCSI[i].getFrameIdx(), + blockCSI[i].getRegClass()); + assert(I != MBB->begin() && + "loadRegFromStackSlot didn't insert any code!"); + // Insert in reverse order. loadRegFromStackSlot can insert + // multiple instructions. + if (AtStart) + I = MBB->begin(); + else { + I = BeforeI; + ++I; + } + } + } +} + +/// AdjustStackOffset - Helper function used to adjust the stack frame offset. +static inline void +AdjustStackOffset(MachineFrameInfo *FFI, int FrameIdx, + bool StackGrowsDown, int64_t &Offset, + unsigned &MaxAlign) { + // If stack grows down, we need to add size of find the lowest address of the + // object. + if (StackGrowsDown) + Offset += FFI->getObjectSize(FrameIdx); + + unsigned Align = FFI->getObjectAlignment(FrameIdx); + + // If the alignment of this object is greater than that of the stack, then + // increase the stack alignment to match. + MaxAlign = std::max(MaxAlign, Align); + + // Adjust to alignment boundary. + Offset = (Offset + Align - 1) / Align * Align; + + if (StackGrowsDown) { + FFI->setObjectOffset(FrameIdx, -Offset); // Set the computed offset + } else { + FFI->setObjectOffset(FrameIdx, Offset); + Offset += FFI->getObjectSize(FrameIdx); + } +} + +/// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the +/// abstract stack objects. +/// +void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { + const TargetFrameInfo &TFI = *Fn.getTarget().getFrameInfo(); + + bool StackGrowsDown = + TFI.getStackGrowthDirection() == TargetFrameInfo::StackGrowsDown; + + // Loop over all of the stack objects, assigning sequential addresses... + MachineFrameInfo *FFI = Fn.getFrameInfo(); + + unsigned MaxAlign = FFI->getMaxAlignment(); + + // Start at the beginning of the local area. + // The Offset is the distance from the stack top in the direction + // of stack growth -- so it's always nonnegative. + int64_t Offset = TFI.getOffsetOfLocalArea(); + if (StackGrowsDown) + Offset = -Offset; + assert(Offset >= 0 + && "Local area offset should be in direction of stack growth"); + + // If there are fixed sized objects that are preallocated in the local area, + // non-fixed objects can't be allocated right at the start of local area. + // We currently don't support filling in holes in between fixed sized + // objects, so we adjust 'Offset' to point to the end of last fixed sized + // preallocated object. + for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) { + int64_t FixedOff; + if (StackGrowsDown) { + // The maximum distance from the stack pointer is at lower address of + // the object -- which is given by offset. For down growing stack + // the offset is negative, so we negate the offset to get the distance. + FixedOff = -FFI->getObjectOffset(i); + } else { + // The maximum distance from the start pointer is at the upper + // address of the object. + FixedOff = FFI->getObjectOffset(i) + FFI->getObjectSize(i); + } + if (FixedOff > Offset) Offset = FixedOff; + } + + // First assign frame offsets to stack objects that are used to spill + // callee saved registers. + if (StackGrowsDown) { + for (unsigned i = MinCSFrameIndex; i <= MaxCSFrameIndex; ++i) { + // If stack grows down, we need to add size of find the lowest + // address of the object. + Offset += FFI->getObjectSize(i); + + unsigned Align = FFI->getObjectAlignment(i); + // If the alignment of this object is greater than that of the stack, + // then increase the stack alignment to match. + MaxAlign = std::max(MaxAlign, Align); + // Adjust to alignment boundary + Offset = (Offset+Align-1)/Align*Align; + + FFI->setObjectOffset(i, -Offset); // Set the computed offset + } + } else { + int MaxCSFI = MaxCSFrameIndex, MinCSFI = MinCSFrameIndex; + for (int i = MaxCSFI; i >= MinCSFI ; --i) { + unsigned Align = FFI->getObjectAlignment(i); + // If the alignment of this object is greater than that of the stack, + // then increase the stack alignment to match. + MaxAlign = std::max(MaxAlign, Align); + // Adjust to alignment boundary + Offset = (Offset+Align-1)/Align*Align; + + FFI->setObjectOffset(i, Offset); + Offset += FFI->getObjectSize(i); + } + } + + // Make sure the special register scavenging spill slot is closest to the + // frame pointer if a frame pointer is required. + const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo(); + if (RS && RegInfo->hasFP(Fn)) { + int SFI = RS->getScavengingFrameIndex(); + if (SFI >= 0) + AdjustStackOffset(FFI, SFI, StackGrowsDown, Offset, MaxAlign); + } + + // Make sure that the stack protector comes before the local variables on the + // stack. + if (FFI->getStackProtectorIndex() >= 0) + AdjustStackOffset(FFI, FFI->getStackProtectorIndex(), StackGrowsDown, + Offset, MaxAlign); + + // Then assign frame offsets to stack objects that are not used to spill + // callee saved registers. + for (unsigned i = 0, e = FFI->getObjectIndexEnd(); i != e; ++i) { + if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex) + continue; + if (RS && (int)i == RS->getScavengingFrameIndex()) + continue; + if (FFI->isDeadObjectIndex(i)) + continue; + if (FFI->getStackProtectorIndex() == (int)i) + continue; + + AdjustStackOffset(FFI, i, StackGrowsDown, Offset, MaxAlign); + } + + // Make sure the special register scavenging spill slot is closest to the + // stack pointer. + if (RS && !RegInfo->hasFP(Fn)) { + int SFI = RS->getScavengingFrameIndex(); + if (SFI >= 0) + AdjustStackOffset(FFI, SFI, StackGrowsDown, Offset, MaxAlign); + } + + // Round up the size to a multiple of the alignment, but only if there are + // calls or alloca's in the function. This ensures that any calls to + // subroutines have their stack frames suitable aligned. + // Also do this if we need runtime alignment of the stack. In this case + // offsets will be relative to SP not FP; round up the stack size so this + // works. + if (!RegInfo->targetHandlesStackFrameRounding() && + (FFI->hasCalls() || FFI->hasVarSizedObjects() || + (RegInfo->needsStackRealignment(Fn) && + FFI->getObjectIndexEnd() != 0))) { + // If we have reserved argument space for call sites in the function + // immediately on entry to the current function, count it as part of the + // overall stack size. + if (RegInfo->hasReservedCallFrame(Fn)) + Offset += FFI->getMaxCallFrameSize(); + + unsigned AlignMask = std::max(TFI.getStackAlignment(),MaxAlign) - 1; + Offset = (Offset + AlignMask) & ~uint64_t(AlignMask); + } + + // Update frame info to pretend that this is part of the stack... + FFI->setStackSize(Offset+TFI.getOffsetOfLocalArea()); + + // Remember the required stack alignment in case targets need it to perform + // dynamic stack alignment. + FFI->setMaxAlignment(MaxAlign); +} + + +/// insertPrologEpilogCode - Scan the function for modified callee saved +/// registers, insert spill code for these callee saved registers, then add +/// prolog and epilog code to the function. +/// +void PEI::insertPrologEpilogCode(MachineFunction &Fn) { + const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo(); + + // Add prologue to the function... + TRI->emitPrologue(Fn); + + // Add epilogue to restore the callee-save registers in each exiting block + for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) { + // If last instruction is a return instruction, add an epilogue + if (!I->empty() && I->back().getDesc().isReturn()) + TRI->emitEpilogue(Fn, *I); + } +} + + +/// replaceFrameIndices - Replace all MO_FrameIndex operands with physical +/// register references and actual offsets. +/// +void PEI::replaceFrameIndices(MachineFunction &Fn) { + if (!Fn.getFrameInfo()->hasStackObjects()) return; // Nothing to do? + + const TargetMachine &TM = Fn.getTarget(); + assert(TM.getRegisterInfo() && "TM::getRegisterInfo() must be implemented!"); + const TargetRegisterInfo &TRI = *TM.getRegisterInfo(); + const TargetFrameInfo *TFI = TM.getFrameInfo(); + bool StackGrowsDown = + TFI->getStackGrowthDirection() == TargetFrameInfo::StackGrowsDown; + int FrameSetupOpcode = TRI.getCallFrameSetupOpcode(); + int FrameDestroyOpcode = TRI.getCallFrameDestroyOpcode(); + + for (MachineFunction::iterator BB = Fn.begin(), + E = Fn.end(); BB != E; ++BB) { + int SPAdj = 0; // SP offset due to call frame setup / destroy. + if (RS) RS->enterBasicBlock(BB); + + for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) { + if (I->getOpcode() == TargetInstrInfo::DECLARE) { + // Ignore it. + ++I; + continue; + } + + if (I->getOpcode() == FrameSetupOpcode || + I->getOpcode() == FrameDestroyOpcode) { + // Remember how much SP has been adjusted to create the call + // frame. + int Size = I->getOperand(0).getImm(); + + if ((!StackGrowsDown && I->getOpcode() == FrameSetupOpcode) || + (StackGrowsDown && I->getOpcode() == FrameDestroyOpcode)) + Size = -Size; + + SPAdj += Size; + + MachineBasicBlock::iterator PrevI = BB->end(); + if (I != BB->begin()) PrevI = prior(I); + TRI.eliminateCallFramePseudoInstr(Fn, *BB, I); + + // Visit the instructions created by eliminateCallFramePseudoInstr(). + if (PrevI == BB->end()) + I = BB->begin(); // The replaced instr was the first in the block. + else + I = next(PrevI); + continue; + } + + MachineInstr *MI = I; + bool DoIncr = true; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) + if (MI->getOperand(i).isFI()) { + // Some instructions (e.g. inline asm instructions) can have + // multiple frame indices and/or cause eliminateFrameIndex + // to insert more than one instruction. We need the register + // scavenger to go through all of these instructions so that + // it can update its register information. We keep the + // iterator at the point before insertion so that we can + // revisit them in full. + bool AtBeginning = (I == BB->begin()); + if (!AtBeginning) --I; + + // If this instruction has a FrameIndex operand, we need to + // use that target machine register info object to eliminate + // it. + + TRI.eliminateFrameIndex(MI, SPAdj, RS); + + // Reset the iterator if we were at the beginning of the BB. + if (AtBeginning) { + I = BB->begin(); + DoIncr = false; + } + + MI = 0; + break; + } + + if (DoIncr && I != BB->end()) ++I; + + // Update register states. + if (RS && MI) RS->forward(MI); + } + + assert(SPAdj == 0 && "Unbalanced call frame setup / destroy pairs?"); + } +} + diff --git a/lib/CodeGen/PrologEpilogInserter.h b/lib/CodeGen/PrologEpilogInserter.h new file mode 100644 index 000000000000..c158dd8ac232 --- /dev/null +++ b/lib/CodeGen/PrologEpilogInserter.h @@ -0,0 +1,167 @@ +//===-- PrologEpilogInserter.h - Prolog/Epilog code insertion -*- C++ -* --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass is responsible for finalizing the functions frame layout, saving +// callee saved registers, and for emitting prolog & epilog code for the +// function. +// +// This pass must be run after register allocation. After this pass is +// executed, it is illegal to construct MO_FrameIndex operands. +// +// This pass also implements a shrink wrapping variant of prolog/epilog +// insertion. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_PEI_H +#define LLVM_CODEGEN_PEI_H + +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/ADT/SparseBitVector.h" +#include "llvm/ADT/DenseMap.h" + +namespace llvm { + class RegScavenger; + class MachineBasicBlock; + + class PEI : public MachineFunctionPass { + public: + static char ID; + PEI() : MachineFunctionPass(&ID) {} + + const char *getPassName() const { + return "Prolog/Epilog Insertion & Frame Finalization"; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + + /// runOnMachineFunction - Insert prolog/epilog code and replace abstract + /// frame indexes with appropriate references. + /// + bool runOnMachineFunction(MachineFunction &Fn); + + private: + RegScavenger *RS; + + // MinCSFrameIndex, MaxCSFrameIndex - Keeps the range of callee saved + // stack frame indexes. + unsigned MinCSFrameIndex, MaxCSFrameIndex; + + // Analysis info for spill/restore placement. + // "CSR": "callee saved register". + + // CSRegSet contains indices into the Callee Saved Register Info + // vector built by calculateCalleeSavedRegisters() and accessed + // via MF.getFrameInfo()->getCalleeSavedInfo(). + typedef SparseBitVector<> CSRegSet; + + // CSRegBlockMap maps MachineBasicBlocks to sets of callee + // saved register indices. + typedef DenseMap CSRegBlockMap; + + // Set and maps for computing CSR spill/restore placement: + // used in function (UsedCSRegs) + // used in a basic block (CSRUsed) + // anticipatable in a basic block (Antic{In,Out}) + // available in a basic block (Avail{In,Out}) + // to be spilled at the entry to a basic block (CSRSave) + // to be restored at the end of a basic block (CSRRestore) + CSRegSet UsedCSRegs; + CSRegBlockMap CSRUsed; + CSRegBlockMap AnticIn, AnticOut; + CSRegBlockMap AvailIn, AvailOut; + CSRegBlockMap CSRSave; + CSRegBlockMap CSRRestore; + + // Entry and return blocks of the current function. + MachineBasicBlock* EntryBlock; + SmallVector ReturnBlocks; + + // Map of MBBs to top level MachineLoops. + DenseMap TLLoops; + + // Flag to control shrink wrapping per-function: + // may choose to skip shrink wrapping for certain + // functions. + bool ShrinkWrapThisFunction; + +#ifndef NDEBUG + // Machine function handle. + MachineFunction* MF; + + // Flag indicating that the current function + // has at least one "short" path in the machine + // CFG from the entry block to an exit block. + bool HasFastExitPath; +#endif + + bool calculateSets(MachineFunction &Fn); + bool calcAnticInOut(MachineBasicBlock* MBB); + bool calcAvailInOut(MachineBasicBlock* MBB); + void calculateAnticAvail(MachineFunction &Fn); + bool addUsesForMEMERegion(MachineBasicBlock* MBB, + SmallVector& blks); + bool addUsesForTopLevelLoops(SmallVector& blks); + bool calcSpillPlacements(MachineBasicBlock* MBB, + SmallVector &blks, + CSRegBlockMap &prevSpills); + bool calcRestorePlacements(MachineBasicBlock* MBB, + SmallVector &blks, + CSRegBlockMap &prevRestores); + void placeSpillsAndRestores(MachineFunction &Fn); + void placeCSRSpillsAndRestores(MachineFunction &Fn); + void calculateCalleeSavedRegisters(MachineFunction &Fn); + void insertCSRSpillsAndRestores(MachineFunction &Fn); + void calculateFrameObjectOffsets(MachineFunction &Fn); + void replaceFrameIndices(MachineFunction &Fn); + void insertPrologEpilogCode(MachineFunction &Fn); + + // Initialize DFA sets, called before iterations. + void clearAnticAvailSets(); + // Clear all sets constructed by shrink wrapping. + void clearAllSets(); + + // Initialize all shrink wrapping data. + void initShrinkWrappingInfo(); + + // Convienences for dealing with machine loops. + MachineBasicBlock* getTopLevelLoopPreheader(MachineLoop* LP); + MachineLoop* getTopLevelLoopParent(MachineLoop *LP); + + // Propgate CSRs used in MBB to all MBBs of loop LP. + void propagateUsesAroundLoop(MachineBasicBlock* MBB, MachineLoop* LP); + + // Convenience for recognizing return blocks. + bool isReturnBlock(MachineBasicBlock* MBB); + +#ifndef NDEBUG + // Debugging methods. + + // Mark this function as having fast exit paths. + void findFastExitPath(); + + // Verify placement of spills/restores. + void verifySpillRestorePlacement(); + + std::string getBasicBlockName(const MachineBasicBlock* MBB); + std::string stringifyCSRegSet(const CSRegSet& s); + void dumpSet(const CSRegSet& s); + void dumpUsed(MachineBasicBlock* MBB); + void dumpAllUsed(); + void dumpSets(MachineBasicBlock* MBB); + void dumpSets1(MachineBasicBlock* MBB); + void dumpAllSets(); + void dumpSRSets(); +#endif + + }; +} // End llvm namespace +#endif diff --git a/lib/CodeGen/PseudoSourceValue.cpp b/lib/CodeGen/PseudoSourceValue.cpp new file mode 100644 index 000000000000..b4c20e6bfd31 --- /dev/null +++ b/lib/CodeGen/PseudoSourceValue.cpp @@ -0,0 +1,92 @@ +//===-- llvm/CodeGen/PseudoSourceValue.cpp ----------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the PseudoSourceValue class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/raw_ostream.h" +#include +using namespace llvm; + +static ManagedStatic PSVs; + +const PseudoSourceValue *PseudoSourceValue::getStack() +{ return &(*PSVs)[0]; } +const PseudoSourceValue *PseudoSourceValue::getGOT() +{ return &(*PSVs)[1]; } +const PseudoSourceValue *PseudoSourceValue::getJumpTable() +{ return &(*PSVs)[2]; } +const PseudoSourceValue *PseudoSourceValue::getConstantPool() +{ return &(*PSVs)[3]; } + +static const char *const PSVNames[] = { + "Stack", + "GOT", + "JumpTable", + "ConstantPool" +}; + +PseudoSourceValue::PseudoSourceValue() : + Value(PointerType::getUnqual(Type::Int8Ty), PseudoSourceValueVal) {} + +void PseudoSourceValue::dump() const { + print(errs()); errs() << '\n'; +} + +void PseudoSourceValue::print(raw_ostream &OS) const { + OS << PSVNames[this - *PSVs]; +} + +namespace { + /// FixedStackPseudoSourceValue - A specialized PseudoSourceValue + /// for holding FixedStack values, which must include a frame + /// index. + class VISIBILITY_HIDDEN FixedStackPseudoSourceValue + : public PseudoSourceValue { + const int FI; + public: + explicit FixedStackPseudoSourceValue(int fi) : FI(fi) {} + + virtual bool isConstant(const MachineFrameInfo *MFI) const; + + virtual void print(raw_ostream &OS) const { + OS << "FixedStack" << FI; + } + }; +} + +static ManagedStatic > FSValues; + +const PseudoSourceValue *PseudoSourceValue::getFixedStack(int FI) { + const PseudoSourceValue *&V = (*FSValues)[FI]; + if (!V) + V = new FixedStackPseudoSourceValue(FI); + return V; +} + +bool PseudoSourceValue::isConstant(const MachineFrameInfo *) const { + if (this == getStack()) + return false; + if (this == getGOT() || + this == getConstantPool() || + this == getJumpTable()) + return true; + assert(0 && "Unknown PseudoSourceValue!"); + return false; +} + +bool FixedStackPseudoSourceValue::isConstant(const MachineFrameInfo *MFI) const{ + return MFI && MFI->isImmutableObjectIndex(FI); +} diff --git a/lib/CodeGen/README.txt b/lib/CodeGen/README.txt new file mode 100644 index 000000000000..64374ce137fd --- /dev/null +++ b/lib/CodeGen/README.txt @@ -0,0 +1,208 @@ +//===---------------------------------------------------------------------===// + +Common register allocation / spilling problem: + + mul lr, r4, lr + str lr, [sp, #+52] + ldr lr, [r1, #+32] + sxth r3, r3 + ldr r4, [sp, #+52] + mla r4, r3, lr, r4 + +can be: + + mul lr, r4, lr + mov r4, lr + str lr, [sp, #+52] + ldr lr, [r1, #+32] + sxth r3, r3 + mla r4, r3, lr, r4 + +and then "merge" mul and mov: + + mul r4, r4, lr + str lr, [sp, #+52] + ldr lr, [r1, #+32] + sxth r3, r3 + mla r4, r3, lr, r4 + +It also increase the likelyhood the store may become dead. + +//===---------------------------------------------------------------------===// + +I think we should have a "hasSideEffects" flag (which is automatically set for +stuff that "isLoad" "isCall" etc), and the remat pass should eventually be able +to remat any instruction that has no side effects, if it can handle it and if +profitable. + +For now, I'd suggest having the remat stuff work like this: + +1. I need to spill/reload this thing. +2. Check to see if it has side effects. +3. Check to see if it is simple enough: e.g. it only has one register +destination and no register input. +4. If so, clone the instruction, do the xform, etc. + +Advantages of this are: + +1. the .td file describes the behavior of the instructions, not the way the + algorithm should work. +2. as remat gets smarter in the future, we shouldn't have to be changing the .td + files. +3. it is easier to explain what the flag means in the .td file, because you + don't have to pull in the explanation of how the current remat algo works. + +Some potential added complexities: + +1. Some instructions have to be glued to it's predecessor or successor. All of + the PC relative instructions and condition code setting instruction. We could + mark them as hasSideEffects, but that's not quite right. PC relative loads + from constantpools can be remat'ed, for example. But it requires more than + just cloning the instruction. Some instructions can be remat'ed but it + expands to more than one instruction. But allocator will have to make a + decision. + +4. As stated in 3, not as simple as cloning in some cases. The target will have + to decide how to remat it. For example, an ARM 2-piece constant generation + instruction is remat'ed as a load from constantpool. + +//===---------------------------------------------------------------------===// + +bb27 ... + ... + %reg1037 = ADDri %reg1039, 1 + %reg1038 = ADDrs %reg1032, %reg1039, %NOREG, 10 + Successors according to CFG: 0x8b03bf0 (#5) + +bb76 (0x8b03bf0, LLVM BB @0x8b032d0, ID#5): + Predecessors according to CFG: 0x8b0c5f0 (#3) 0x8b0a7c0 (#4) + %reg1039 = PHI %reg1070, mbb, %reg1037, mbb + +Note ADDri is not a two-address instruction. However, its result %reg1037 is an +operand of the PHI node in bb76 and its operand %reg1039 is the result of the +PHI node. We should treat it as a two-address code and make sure the ADDri is +scheduled after any node that reads %reg1039. + +//===---------------------------------------------------------------------===// + +Use local info (i.e. register scavenger) to assign it a free register to allow +reuse: + ldr r3, [sp, #+4] + add r3, r3, #3 + ldr r2, [sp, #+8] + add r2, r2, #2 + ldr r1, [sp, #+4] <== + add r1, r1, #1 + ldr r0, [sp, #+4] + add r0, r0, #2 + +//===---------------------------------------------------------------------===// + +LLVM aggressively lift CSE out of loop. Sometimes this can be negative side- +effects: + +R1 = X + 4 +R2 = X + 7 +R3 = X + 15 + +loop: +load [i + R1] +... +load [i + R2] +... +load [i + R3] + +Suppose there is high register pressure, R1, R2, R3, can be spilled. We need +to implement proper re-materialization to handle this: + +R1 = X + 4 +R2 = X + 7 +R3 = X + 15 + +loop: +R1 = X + 4 @ re-materialized +load [i + R1] +... +R2 = X + 7 @ re-materialized +load [i + R2] +... +R3 = X + 15 @ re-materialized +load [i + R3] + +Furthermore, with re-association, we can enable sharing: + +R1 = X + 4 +R2 = X + 7 +R3 = X + 15 + +loop: +T = i + X +load [T + 4] +... +load [T + 7] +... +load [T + 15] +//===---------------------------------------------------------------------===// + +It's not always a good idea to choose rematerialization over spilling. If all +the load / store instructions would be folded then spilling is cheaper because +it won't require new live intervals / registers. See 2003-05-31-LongShifts for +an example. + +//===---------------------------------------------------------------------===// + +With a copying garbage collector, derived pointers must not be retained across +collector safe points; the collector could move the objects and invalidate the +derived pointer. This is bad enough in the first place, but safe points can +crop up unpredictably. Consider: + + %array = load { i32, [0 x %obj] }** %array_addr + %nth_el = getelementptr { i32, [0 x %obj] }* %array, i32 0, i32 %n + %old = load %obj** %nth_el + %z = div i64 %x, %y + store %obj* %new, %obj** %nth_el + +If the i64 division is lowered to a libcall, then a safe point will (must) +appear for the call site. If a collection occurs, %array and %nth_el no longer +point into the correct object. + +The fix for this is to copy address calculations so that dependent pointers +are never live across safe point boundaries. But the loads cannot be copied +like this if there was an intervening store, so may be hard to get right. + +Only a concurrent mutator can trigger a collection at the libcall safe point. +So single-threaded programs do not have this requirement, even with a copying +collector. Still, LLVM optimizations would probably undo a front-end's careful +work. + +//===---------------------------------------------------------------------===// + +The ocaml frametable structure supports liveness information. It would be good +to support it. + +//===---------------------------------------------------------------------===// + +The FIXME in ComputeCommonTailLength in BranchFolding.cpp needs to be +revisited. The check is there to work around a misuse of directives in inline +assembly. + +//===---------------------------------------------------------------------===// + +It would be good to detect collector/target compatibility instead of silently +doing the wrong thing. + +//===---------------------------------------------------------------------===// + +It would be really nice to be able to write patterns in .td files for copies, +which would eliminate a bunch of explicit predicates on them (e.g. no side +effects). Once this is in place, it would be even better to have tblgen +synthesize the various copy insertion/inspection methods in TargetInstrInfo. + +//===---------------------------------------------------------------------===// + +Stack coloring improvments: + +1. Do proper LiveStackAnalysis on all stack objects including those which are + not spill slots. +2. Reorder objects to fill in gaps between objects. + e.g. 4, 1, , 4, 1, 1, 1, , 4 => 4, 1, 1, 1, 1, 4, 4 diff --git a/lib/CodeGen/RegAllocBigBlock.cpp b/lib/CodeGen/RegAllocBigBlock.cpp new file mode 100644 index 000000000000..91e4099d0c45 --- /dev/null +++ b/lib/CodeGen/RegAllocBigBlock.cpp @@ -0,0 +1,892 @@ +//===- RegAllocBigBlock.cpp - A register allocator for large basic blocks -===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the RABigBlock class +// +//===----------------------------------------------------------------------===// + +// This register allocator is derived from RegAllocLocal.cpp. Like it, this +// allocator works on one basic block at a time, oblivious to others. +// However, the algorithm used here is suited for long blocks of +// instructions - registers are spilled by greedily choosing those holding +// values that will not be needed for the longest amount of time. This works +// particularly well for blocks with 10 or more times as many instructions +// as machine registers, but can be used for general code. +// +//===----------------------------------------------------------------------===// +// +// TODO: - automagically invoke linearscan for (groups of) small BBs? +// - break ties when picking regs? (probably not worth it in a +// JIT context) +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "regalloc" +#include "llvm/BasicBlock.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/RegAllocRegistry.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Compiler.h" +#include "llvm/ADT/IndexedMap.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include +using namespace llvm; + +STATISTIC(NumStores, "Number of stores added"); +STATISTIC(NumLoads , "Number of loads added"); +STATISTIC(NumFolded, "Number of loads/stores folded into instructions"); + +static RegisterRegAlloc + bigBlockRegAlloc("bigblock", "Big-block register allocator", + createBigBlockRegisterAllocator); + +namespace { +/// VRegKeyInfo - Defines magic values required to use VirtRegs as DenseMap +/// keys. + struct VRegKeyInfo { + static inline unsigned getEmptyKey() { return -1U; } + static inline unsigned getTombstoneKey() { return -2U; } + static bool isEqual(unsigned LHS, unsigned RHS) { return LHS == RHS; } + static unsigned getHashValue(const unsigned &Key) { return Key; } + }; + + +/// This register allocator is derived from RegAllocLocal.cpp. Like it, this +/// allocator works on one basic block at a time, oblivious to others. +/// However, the algorithm used here is suited for long blocks of +/// instructions - registers are spilled by greedily choosing those holding +/// values that will not be needed for the longest amount of time. This works +/// particularly well for blocks with 10 or more times as many instructions +/// as machine registers, but can be used for general code. +/// +/// TODO: - automagically invoke linearscan for (groups of) small BBs? +/// - break ties when picking regs? (probably not worth it in a +/// JIT context) +/// + class VISIBILITY_HIDDEN RABigBlock : public MachineFunctionPass { + public: + static char ID; + RABigBlock() : MachineFunctionPass(&ID) {} + private: + /// TM - For getting at TargetMachine info + /// + const TargetMachine *TM; + + /// MF - Our generic MachineFunction pointer + /// + MachineFunction *MF; + + /// RegInfo - For dealing with machine register info (aliases, folds + /// etc) + const TargetRegisterInfo *RegInfo; + + typedef SmallVector VRegTimes; + + /// VRegReadTable - maps VRegs in a BB to the set of times they are read + /// + DenseMap VRegReadTable; + + /// VRegReadIdx - keeps track of the "current time" in terms of + /// positions in VRegReadTable + DenseMap VRegReadIdx; + + /// StackSlotForVirtReg - Maps virtual regs to the frame index where these + /// values are spilled. + IndexedMap StackSlotForVirtReg; + + /// Virt2PhysRegMap - This map contains entries for each virtual register + /// that is currently available in a physical register. + IndexedMap Virt2PhysRegMap; + + /// PhysRegsUsed - This array is effectively a map, containing entries for + /// each physical register that currently has a value (ie, it is in + /// Virt2PhysRegMap). The value mapped to is the virtual register + /// corresponding to the physical register (the inverse of the + /// Virt2PhysRegMap), or 0. The value is set to 0 if this register is pinned + /// because it is used by a future instruction, and to -2 if it is not + /// allocatable. If the entry for a physical register is -1, then the + /// physical register is "not in the map". + /// + std::vector PhysRegsUsed; + + /// VirtRegModified - This bitset contains information about which virtual + /// registers need to be spilled back to memory when their registers are + /// scavenged. If a virtual register has simply been rematerialized, there + /// is no reason to spill it to memory when we need the register back. + /// + std::vector VirtRegModified; + + /// MBBLastInsnTime - the number of the the last instruction in MBB + /// + int MBBLastInsnTime; + + /// MBBCurTime - the number of the the instruction being currently processed + /// + int MBBCurTime; + + unsigned &getVirt2PhysRegMapSlot(unsigned VirtReg) { + return Virt2PhysRegMap[VirtReg]; + } + + unsigned &getVirt2StackSlot(unsigned VirtReg) { + return StackSlotForVirtReg[VirtReg]; + } + + /// markVirtRegModified - Lets us flip bits in the VirtRegModified bitset + /// + void markVirtRegModified(unsigned Reg, bool Val = true) { + assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!"); + Reg -= TargetRegisterInfo::FirstVirtualRegister; + if (VirtRegModified.size() <= Reg) + VirtRegModified.resize(Reg+1); + VirtRegModified[Reg] = Val; + } + + /// isVirtRegModified - Lets us query the VirtRegModified bitset + /// + bool isVirtRegModified(unsigned Reg) const { + assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!"); + assert(Reg - TargetRegisterInfo::FirstVirtualRegister < VirtRegModified.size() + && "Illegal virtual register!"); + return VirtRegModified[Reg - TargetRegisterInfo::FirstVirtualRegister]; + } + + public: + /// getPassName - returns the BigBlock allocator's name + /// + virtual const char *getPassName() const { + return "BigBlock Register Allocator"; + } + + /// getAnalaysisUsage - declares the required analyses + /// + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequiredID(PHIEliminationID); + AU.addRequiredID(TwoAddressInstructionPassID); + MachineFunctionPass::getAnalysisUsage(AU); + } + + private: + /// runOnMachineFunction - Register allocate the whole function + /// + bool runOnMachineFunction(MachineFunction &Fn); + + /// AllocateBasicBlock - Register allocate the specified basic block. + /// + void AllocateBasicBlock(MachineBasicBlock &MBB); + + /// FillVRegReadTable - Fill out the table of vreg read times given a BB + /// + void FillVRegReadTable(MachineBasicBlock &MBB); + + /// areRegsEqual - This method returns true if the specified registers are + /// related to each other. To do this, it checks to see if they are equal + /// or if the first register is in the alias set of the second register. + /// + bool areRegsEqual(unsigned R1, unsigned R2) const { + if (R1 == R2) return true; + for (const unsigned *AliasSet = RegInfo->getAliasSet(R2); + *AliasSet; ++AliasSet) { + if (*AliasSet == R1) return true; + } + return false; + } + + /// getStackSpaceFor - This returns the frame index of the specified virtual + /// register on the stack, allocating space if necessary. + int getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC); + + /// removePhysReg - This method marks the specified physical register as no + /// longer being in use. + /// + void removePhysReg(unsigned PhysReg); + + /// spillVirtReg - This method spills the value specified by PhysReg into + /// the virtual register slot specified by VirtReg. It then updates the RA + /// data structures to indicate the fact that PhysReg is now available. + /// + void spillVirtReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + unsigned VirtReg, unsigned PhysReg); + + /// spillPhysReg - This method spills the specified physical register into + /// the virtual register slot associated with it. If OnlyVirtRegs is set to + /// true, then the request is ignored if the physical register does not + /// contain a virtual register. + /// + void spillPhysReg(MachineBasicBlock &MBB, MachineInstr *I, + unsigned PhysReg, bool OnlyVirtRegs = false); + + /// assignVirtToPhysReg - This method updates local state so that we know + /// that PhysReg is the proper container for VirtReg now. The physical + /// register must not be used for anything else when this is called. + /// + void assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg); + + /// isPhysRegAvailable - Return true if the specified physical register is + /// free and available for use. This also includes checking to see if + /// aliased registers are all free... + /// + bool isPhysRegAvailable(unsigned PhysReg) const; + + /// getFreeReg - Look to see if there is a free register available in the + /// specified register class. If not, return 0. + /// + unsigned getFreeReg(const TargetRegisterClass *RC); + + /// chooseReg - Pick a physical register to hold the specified + /// virtual register by choosing the one which will be read furthest + /// in the future. + /// + unsigned chooseReg(MachineBasicBlock &MBB, MachineInstr *MI, + unsigned VirtReg); + + /// reloadVirtReg - This method transforms the specified specified virtual + /// register use to refer to a physical register. This method may do this + /// in one of several ways: if the register is available in a physical + /// register already, it uses that physical register. If the value is not + /// in a physical register, and if there are physical registers available, + /// it loads it into a register. If register pressure is high, and it is + /// possible, it tries to fold the load of the virtual register into the + /// instruction itself. It avoids doing this if register pressure is low to + /// improve the chance that subsequent instructions can use the reloaded + /// value. This method returns the modified instruction. + /// + MachineInstr *reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI, + unsigned OpNum); + + }; + char RABigBlock::ID = 0; +} + +/// getStackSpaceFor - This allocates space for the specified virtual register +/// to be held on the stack. +int RABigBlock::getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC) { + // Find the location Reg would belong... + int FrameIdx = getVirt2StackSlot(VirtReg); + + if (FrameIdx) + return FrameIdx - 1; // Already has space allocated? + + // Allocate a new stack object for this spill location... + FrameIdx = MF->getFrameInfo()->CreateStackObject(RC->getSize(), + RC->getAlignment()); + + // Assign the slot... + getVirt2StackSlot(VirtReg) = FrameIdx + 1; + return FrameIdx; +} + + +/// removePhysReg - This method marks the specified physical register as no +/// longer being in use. +/// +void RABigBlock::removePhysReg(unsigned PhysReg) { + PhysRegsUsed[PhysReg] = -1; // PhyReg no longer used +} + + +/// spillVirtReg - This method spills the value specified by PhysReg into the +/// virtual register slot specified by VirtReg. It then updates the RA data +/// structures to indicate the fact that PhysReg is now available. +/// +void RABigBlock::spillVirtReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned VirtReg, unsigned PhysReg) { + assert(VirtReg && "Spilling a physical register is illegal!" + " Must not have appropriate kill for the register or use exists beyond" + " the intended one."); + DOUT << " Spilling register " << RegInfo->getName(PhysReg) + << " containing %reg" << VirtReg; + + const TargetInstrInfo* TII = MBB.getParent()->getTarget().getInstrInfo(); + + if (!isVirtRegModified(VirtReg)) + DOUT << " which has not been modified, so no store necessary!"; + + // Otherwise, there is a virtual register corresponding to this physical + // register. We only need to spill it into its stack slot if it has been + // modified. + if (isVirtRegModified(VirtReg)) { + const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg); + int FrameIndex = getStackSpaceFor(VirtReg, RC); + DOUT << " to stack slot #" << FrameIndex; + TII->storeRegToStackSlot(MBB, I, PhysReg, true, FrameIndex, RC); + ++NumStores; // Update statistics + } + + getVirt2PhysRegMapSlot(VirtReg) = 0; // VirtReg no longer available + + DOUT << "\n"; + removePhysReg(PhysReg); +} + + +/// spillPhysReg - This method spills the specified physical register into the +/// virtual register slot associated with it. If OnlyVirtRegs is set to true, +/// then the request is ignored if the physical register does not contain a +/// virtual register. +/// +void RABigBlock::spillPhysReg(MachineBasicBlock &MBB, MachineInstr *I, + unsigned PhysReg, bool OnlyVirtRegs) { + if (PhysRegsUsed[PhysReg] != -1) { // Only spill it if it's used! + assert(PhysRegsUsed[PhysReg] != -2 && "Non allocable reg used!"); + if (PhysRegsUsed[PhysReg] || !OnlyVirtRegs) + spillVirtReg(MBB, I, PhysRegsUsed[PhysReg], PhysReg); + } else { + // If the selected register aliases any other registers, we must make + // sure that one of the aliases isn't alive. + for (const unsigned *AliasSet = RegInfo->getAliasSet(PhysReg); + *AliasSet; ++AliasSet) + if (PhysRegsUsed[*AliasSet] != -1 && // Spill aliased register. + PhysRegsUsed[*AliasSet] != -2) // If allocatable. + if (PhysRegsUsed[*AliasSet]) + spillVirtReg(MBB, I, PhysRegsUsed[*AliasSet], *AliasSet); + } +} + + +/// assignVirtToPhysReg - This method updates local state so that we know +/// that PhysReg is the proper container for VirtReg now. The physical +/// register must not be used for anything else when this is called. +/// +void RABigBlock::assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg) { + assert(PhysRegsUsed[PhysReg] == -1 && "Phys reg already assigned!"); + // Update information to note the fact that this register was just used, and + // it holds VirtReg. + PhysRegsUsed[PhysReg] = VirtReg; + getVirt2PhysRegMapSlot(VirtReg) = PhysReg; +} + + +/// isPhysRegAvailable - Return true if the specified physical register is free +/// and available for use. This also includes checking to see if aliased +/// registers are all free... +/// +bool RABigBlock::isPhysRegAvailable(unsigned PhysReg) const { + if (PhysRegsUsed[PhysReg] != -1) return false; + + // If the selected register aliases any other allocated registers, it is + // not free! + for (const unsigned *AliasSet = RegInfo->getAliasSet(PhysReg); + *AliasSet; ++AliasSet) + if (PhysRegsUsed[*AliasSet] >= 0) // Aliased register in use? + return false; // Can't use this reg then. + return true; +} + + +/// getFreeReg - Look to see if there is a free register available in the +/// specified register class. If not, return 0. +/// +unsigned RABigBlock::getFreeReg(const TargetRegisterClass *RC) { + // Get iterators defining the range of registers that are valid to allocate in + // this class, which also specifies the preferred allocation order. + TargetRegisterClass::iterator RI = RC->allocation_order_begin(*MF); + TargetRegisterClass::iterator RE = RC->allocation_order_end(*MF); + + for (; RI != RE; ++RI) + if (isPhysRegAvailable(*RI)) { // Is reg unused? + assert(*RI != 0 && "Cannot use register!"); + return *RI; // Found an unused register! + } + return 0; +} + + +/// chooseReg - Pick a physical register to hold the specified +/// virtual register by choosing the one whose value will be read +/// furthest in the future. +/// +unsigned RABigBlock::chooseReg(MachineBasicBlock &MBB, MachineInstr *I, + unsigned VirtReg) { + const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg); + // First check to see if we have a free register of the requested type... + unsigned PhysReg = getFreeReg(RC); + + // If we didn't find an unused register, find the one which will be + // read at the most distant point in time. + if (PhysReg == 0) { + unsigned delay=0, longest_delay=0; + VRegTimes* ReadTimes; + + unsigned curTime = MBBCurTime; + + // for all physical regs in the RC, + for(TargetRegisterClass::iterator pReg = RC->begin(); + pReg != RC->end(); ++pReg) { + // how long until they're read? + if(PhysRegsUsed[*pReg]>0) { // ignore non-allocatable regs + ReadTimes = VRegReadTable[PhysRegsUsed[*pReg]]; + if(ReadTimes && !ReadTimes->empty()) { + unsigned& pt = VRegReadIdx[PhysRegsUsed[*pReg]]; + while(pt < ReadTimes->size() && (*ReadTimes)[pt] < curTime) { + ++pt; + } + + if(pt < ReadTimes->size()) + delay = (*ReadTimes)[pt] - curTime; + else + delay = MBBLastInsnTime + 1 - curTime; + } else { + // This register is only defined, but never + // read in this MBB. Therefore the next read + // happens after the end of this MBB + delay = MBBLastInsnTime + 1 - curTime; + } + + + if(delay > longest_delay) { + longest_delay = delay; + PhysReg = *pReg; + } + } + } + + if(PhysReg == 0) { // ok, now we're desperate. We couldn't choose + // a register to spill by looking through the + // read timetable, so now we just spill the + // first allocatable register we find. + + // for all physical regs in the RC, + for(TargetRegisterClass::iterator pReg = RC->begin(); + pReg != RC->end(); ++pReg) { + // if we find a register we can spill + if(PhysRegsUsed[*pReg]>=-1) + PhysReg = *pReg; // choose it to be spilled + } + } + + assert(PhysReg && "couldn't choose a register to spill :( "); + // TODO: assert that RC->contains(PhysReg) / handle aliased registers? + + // since we needed to look in the table we need to spill this register. + spillPhysReg(MBB, I, PhysReg); + } + + // assign the vreg to our chosen physical register + assignVirtToPhysReg(VirtReg, PhysReg); + return PhysReg; // and return it +} + + +/// reloadVirtReg - This method transforms an instruction with a virtual +/// register use to one that references a physical register. It does this as +/// follows: +/// +/// 1) If the register is already in a physical register, it uses it. +/// 2) Otherwise, if there is a free physical register, it uses that. +/// 3) Otherwise, it calls chooseReg() to get the physical register +/// holding the most distantly needed value, generating a spill in +/// the process. +/// +/// This method returns the modified instruction. +MachineInstr *RABigBlock::reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI, + unsigned OpNum) { + unsigned VirtReg = MI->getOperand(OpNum).getReg(); + const TargetInstrInfo* TII = MBB.getParent()->getTarget().getInstrInfo(); + + // If the virtual register is already available in a physical register, + // just update the instruction and return. + if (unsigned PR = getVirt2PhysRegMapSlot(VirtReg)) { + MI->getOperand(OpNum).setReg(PR); + return MI; + } + + // Otherwise, if we have free physical registers available to hold the + // value, use them. + const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg); + unsigned PhysReg = getFreeReg(RC); + int FrameIndex = getStackSpaceFor(VirtReg, RC); + + if (PhysReg) { // we have a free register, so use it. + assignVirtToPhysReg(VirtReg, PhysReg); + } else { // no free registers available. + // try to fold the spill into the instruction + SmallVector Ops; + Ops.push_back(OpNum); + if(MachineInstr* FMI = TII->foldMemoryOperand(*MF, MI, Ops, FrameIndex)) { + ++NumFolded; + FMI->copyKillDeadInfo(MI); + return MBB.insert(MBB.erase(MI), FMI); + } + + // determine which of the physical registers we'll kill off, since we + // couldn't fold. + PhysReg = chooseReg(MBB, MI, VirtReg); + } + + // this virtual register is now unmodified (since we just reloaded it) + markVirtRegModified(VirtReg, false); + + DOUT << " Reloading %reg" << VirtReg << " into " + << RegInfo->getName(PhysReg) << "\n"; + + // Add move instruction(s) + TII->loadRegFromStackSlot(MBB, MI, PhysReg, FrameIndex, RC); + ++NumLoads; // Update statistics + + MF->getRegInfo().setPhysRegUsed(PhysReg); + MI->getOperand(OpNum).setReg(PhysReg); // Assign the input register + return MI; +} + +/// Fill out the vreg read timetable. Since ReadTime increases +/// monotonically, the individual readtime sets will be sorted +/// in ascending order. +void RABigBlock::FillVRegReadTable(MachineBasicBlock &MBB) { + // loop over each instruction + MachineBasicBlock::iterator MII; + unsigned ReadTime; + + for(ReadTime=0, MII = MBB.begin(); MII != MBB.end(); ++ReadTime, ++MII) { + MachineInstr *MI = MII; + + for (unsigned i = 0; i != MI->getNumOperands(); ++i) { + MachineOperand& MO = MI->getOperand(i); + // look for vreg reads.. + if (MO.isReg() && !MO.isDef() && MO.getReg() && + TargetRegisterInfo::isVirtualRegister(MO.getReg())) { + // ..and add them to the read table. + VRegTimes* &Times = VRegReadTable[MO.getReg()]; + if(!VRegReadTable[MO.getReg()]) { + Times = new VRegTimes; + VRegReadIdx[MO.getReg()] = 0; + } + Times->push_back(ReadTime); + } + } + + } + + MBBLastInsnTime = ReadTime; + + for(DenseMap::iterator Reads = VRegReadTable.begin(); + Reads != VRegReadTable.end(); ++Reads) { + if(Reads->second) { + DOUT << "Reads[" << Reads->first << "]=" << Reads->second->size() << "\n"; + } + } +} + +/// isReadModWriteImplicitKill - True if this is an implicit kill for a +/// read/mod/write register, i.e. update partial register. +static bool isReadModWriteImplicitKill(MachineInstr *MI, unsigned Reg) { + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand& MO = MI->getOperand(i); + if (MO.isReg() && MO.getReg() == Reg && MO.isImplicit() && + MO.isDef() && !MO.isDead()) + return true; + } + return false; +} + +/// isReadModWriteImplicitDef - True if this is an implicit def for a +/// read/mod/write register, i.e. update partial register. +static bool isReadModWriteImplicitDef(MachineInstr *MI, unsigned Reg) { + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand& MO = MI->getOperand(i); + if (MO.isReg() && MO.getReg() == Reg && MO.isImplicit() && + !MO.isDef() && MO.isKill()) + return true; + } + return false; +} + + +void RABigBlock::AllocateBasicBlock(MachineBasicBlock &MBB) { + // loop over each instruction + MachineBasicBlock::iterator MII = MBB.begin(); + const TargetInstrInfo &TII = *TM->getInstrInfo(); + + DEBUG(const BasicBlock *LBB = MBB.getBasicBlock(); + if (LBB) DOUT << "\nStarting RegAlloc of BB: " << LBB->getName()); + + // If this is the first basic block in the machine function, add live-in + // registers as active. + if (&MBB == &*MF->begin()) { + for (MachineRegisterInfo::livein_iterator + I = MF->getRegInfo().livein_begin(), + E = MF->getRegInfo().livein_end(); I != E; ++I) { + unsigned Reg = I->first; + MF->getRegInfo().setPhysRegUsed(Reg); + PhysRegsUsed[Reg] = 0; // It is free and reserved now + for (const unsigned *AliasSet = RegInfo->getSubRegisters(Reg); + *AliasSet; ++AliasSet) { + if (PhysRegsUsed[*AliasSet] != -2) { + PhysRegsUsed[*AliasSet] = 0; // It is free and reserved now + MF->getRegInfo().setPhysRegUsed(*AliasSet); + } + } + } + } + + // Otherwise, sequentially allocate each instruction in the MBB. + MBBCurTime = -1; + while (MII != MBB.end()) { + MachineInstr *MI = MII++; + MBBCurTime++; + const TargetInstrDesc &TID = MI->getDesc(); + DEBUG(DOUT << "\nTime=" << MBBCurTime << " Starting RegAlloc of: " << *MI; + DOUT << " Regs have values: "; + for (unsigned i = 0; i != RegInfo->getNumRegs(); ++i) + if (PhysRegsUsed[i] != -1 && PhysRegsUsed[i] != -2) + DOUT << "[" << RegInfo->getName(i) + << ",%reg" << PhysRegsUsed[i] << "] "; + DOUT << "\n"); + + SmallVector Kills; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand& MO = MI->getOperand(i); + if (MO.isReg() && MO.isKill()) { + if (!MO.isImplicit()) + Kills.push_back(MO.getReg()); + else if (!isReadModWriteImplicitKill(MI, MO.getReg())) + // These are extra physical register kills when a sub-register + // is defined (def of a sub-register is a read/mod/write of the + // larger registers). Ignore. + Kills.push_back(MO.getReg()); + } + } + + // Get the used operands into registers. This has the potential to spill + // incoming values if we are out of registers. Note that we completely + // ignore physical register uses here. We assume that if an explicit + // physical register is referenced by the instruction, that it is guaranteed + // to be live-in, or the input is badly hosed. + // + for (unsigned i = 0; i != MI->getNumOperands(); ++i) { + MachineOperand& MO = MI->getOperand(i); + // here we are looking for only used operands (never def&use) + if (MO.isReg() && !MO.isDef() && MO.getReg() && !MO.isImplicit() && + TargetRegisterInfo::isVirtualRegister(MO.getReg())) + MI = reloadVirtReg(MBB, MI, i); + } + + // If this instruction is the last user of this register, kill the + // value, freeing the register being used, so it doesn't need to be + // spilled to memory. + // + for (unsigned i = 0, e = Kills.size(); i != e; ++i) { + unsigned VirtReg = Kills[i]; + unsigned PhysReg = VirtReg; + if (TargetRegisterInfo::isVirtualRegister(VirtReg)) { + // If the virtual register was never materialized into a register, it + // might not be in the map, but it won't hurt to zero it out anyway. + unsigned &PhysRegSlot = getVirt2PhysRegMapSlot(VirtReg); + PhysReg = PhysRegSlot; + PhysRegSlot = 0; + } else if (PhysRegsUsed[PhysReg] == -2) { + // Unallocatable register dead, ignore. + continue; + } else { + assert((!PhysRegsUsed[PhysReg] || PhysRegsUsed[PhysReg] == -1) && + "Silently clearing a virtual register?"); + } + + if (PhysReg) { + DOUT << " Last use of " << RegInfo->getName(PhysReg) + << "[%reg" << VirtReg <<"], removing it from live set\n"; + removePhysReg(PhysReg); + for (const unsigned *AliasSet = RegInfo->getSubRegisters(PhysReg); + *AliasSet; ++AliasSet) { + if (PhysRegsUsed[*AliasSet] != -2) { + DOUT << " Last use of " + << RegInfo->getName(*AliasSet) + << "[%reg" << VirtReg <<"], removing it from live set\n"; + removePhysReg(*AliasSet); + } + } + } + } + + // Loop over all of the operands of the instruction, spilling registers that + // are defined, and marking explicit destinations in the PhysRegsUsed map. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand& MO = MI->getOperand(i); + if (MO.isReg() && MO.isDef() && !MO.isImplicit() && MO.getReg() && + TargetRegisterInfo::isPhysicalRegister(MO.getReg())) { + unsigned Reg = MO.getReg(); + if (PhysRegsUsed[Reg] == -2) continue; // Something like ESP. + // These are extra physical register defs when a sub-register + // is defined (def of a sub-register is a read/mod/write of the + // larger registers). Ignore. + if (isReadModWriteImplicitDef(MI, MO.getReg())) continue; + + MF->getRegInfo().setPhysRegUsed(Reg); + spillPhysReg(MBB, MI, Reg, true); // Spill any existing value in reg + PhysRegsUsed[Reg] = 0; // It is free and reserved now + for (const unsigned *AliasSet = RegInfo->getSubRegisters(Reg); + *AliasSet; ++AliasSet) { + if (PhysRegsUsed[*AliasSet] != -2) { + PhysRegsUsed[*AliasSet] = 0; // It is free and reserved now + MF->getRegInfo().setPhysRegUsed(*AliasSet); + } + } + } + } + + // Loop over the implicit defs, spilling them as well. + if (TID.getImplicitDefs()) { + for (const unsigned *ImplicitDefs = TID.getImplicitDefs(); + *ImplicitDefs; ++ImplicitDefs) { + unsigned Reg = *ImplicitDefs; + if (PhysRegsUsed[Reg] != -2) { + spillPhysReg(MBB, MI, Reg, true); + PhysRegsUsed[Reg] = 0; // It is free and reserved now + } + MF->getRegInfo().setPhysRegUsed(Reg); + for (const unsigned *AliasSet = RegInfo->getSubRegisters(Reg); + *AliasSet; ++AliasSet) { + if (PhysRegsUsed[*AliasSet] != -2) { + PhysRegsUsed[*AliasSet] = 0; // It is free and reserved now + MF->getRegInfo().setPhysRegUsed(*AliasSet); + } + } + } + } + + SmallVector DeadDefs; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand& MO = MI->getOperand(i); + if (MO.isReg() && MO.isDead()) + DeadDefs.push_back(MO.getReg()); + } + + // Okay, we have allocated all of the source operands and spilled any values + // that would be destroyed by defs of this instruction. Loop over the + // explicit defs and assign them to a register, spilling incoming values if + // we need to scavenge a register. + // + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand& MO = MI->getOperand(i); + if (MO.isReg() && MO.isDef() && MO.getReg() && + TargetRegisterInfo::isVirtualRegister(MO.getReg())) { + unsigned DestVirtReg = MO.getReg(); + unsigned DestPhysReg; + + // If DestVirtReg already has a value, use it. + if (!(DestPhysReg = getVirt2PhysRegMapSlot(DestVirtReg))) + DestPhysReg = chooseReg(MBB, MI, DestVirtReg); + MF->getRegInfo().setPhysRegUsed(DestPhysReg); + markVirtRegModified(DestVirtReg); + MI->getOperand(i).setReg(DestPhysReg); // Assign the output register + } + } + + // If this instruction defines any registers that are immediately dead, + // kill them now. + // + for (unsigned i = 0, e = DeadDefs.size(); i != e; ++i) { + unsigned VirtReg = DeadDefs[i]; + unsigned PhysReg = VirtReg; + if (TargetRegisterInfo::isVirtualRegister(VirtReg)) { + unsigned &PhysRegSlot = getVirt2PhysRegMapSlot(VirtReg); + PhysReg = PhysRegSlot; + assert(PhysReg != 0); + PhysRegSlot = 0; + } else if (PhysRegsUsed[PhysReg] == -2) { + // Unallocatable register dead, ignore. + continue; + } + + if (PhysReg) { + DOUT << " Register " << RegInfo->getName(PhysReg) + << " [%reg" << VirtReg + << "] is never used, removing it from live set\n"; + removePhysReg(PhysReg); + for (const unsigned *AliasSet = RegInfo->getAliasSet(PhysReg); + *AliasSet; ++AliasSet) { + if (PhysRegsUsed[*AliasSet] != -2) { + DOUT << " Register " << RegInfo->getName(*AliasSet) + << " [%reg" << *AliasSet + << "] is never used, removing it from live set\n"; + removePhysReg(*AliasSet); + } + } + } + } + + // Finally, if this is a noop copy instruction, zap it. + unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; + if (TII.isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg) && + SrcReg == DstReg) + MBB.erase(MI); + } + + MachineBasicBlock::iterator MI = MBB.getFirstTerminator(); + + // Spill all physical registers holding virtual registers now. + for (unsigned i = 0, e = RegInfo->getNumRegs(); i != e; ++i) + if (PhysRegsUsed[i] != -1 && PhysRegsUsed[i] != -2) { + if (unsigned VirtReg = PhysRegsUsed[i]) + spillVirtReg(MBB, MI, VirtReg, i); + else + removePhysReg(i); + } +} + +/// runOnMachineFunction - Register allocate the whole function +/// +bool RABigBlock::runOnMachineFunction(MachineFunction &Fn) { + DOUT << "Machine Function " << "\n"; + MF = &Fn; + TM = &Fn.getTarget(); + RegInfo = TM->getRegisterInfo(); + + PhysRegsUsed.assign(RegInfo->getNumRegs(), -1); + + // At various places we want to efficiently check to see whether a register + // is allocatable. To handle this, we mark all unallocatable registers as + // being pinned down, permanently. + { + BitVector Allocable = RegInfo->getAllocatableSet(Fn); + for (unsigned i = 0, e = Allocable.size(); i != e; ++i) + if (!Allocable[i]) + PhysRegsUsed[i] = -2; // Mark the reg unallocable. + } + + // initialize the virtual->physical register map to have a 'null' + // mapping for all virtual registers + Virt2PhysRegMap.grow(MF->getRegInfo().getLastVirtReg()); + StackSlotForVirtReg.grow(MF->getRegInfo().getLastVirtReg()); + VirtRegModified.resize(MF->getRegInfo().getLastVirtReg() - + TargetRegisterInfo::FirstVirtualRegister + 1, 0); + + // Loop over all of the basic blocks, eliminating virtual register references + for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); + MBB != MBBe; ++MBB) { + // fill out the read timetable + FillVRegReadTable(*MBB); + // use it to allocate the BB + AllocateBasicBlock(*MBB); + // clear it + VRegReadTable.clear(); + } + + StackSlotForVirtReg.clear(); + PhysRegsUsed.clear(); + VirtRegModified.clear(); + Virt2PhysRegMap.clear(); + return true; +} + +FunctionPass *llvm::createBigBlockRegisterAllocator() { + return new RABigBlock(); +} + diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp new file mode 100644 index 000000000000..ee118de4f496 --- /dev/null +++ b/lib/CodeGen/RegAllocLinearScan.cpp @@ -0,0 +1,1535 @@ +//===-- RegAllocLinearScan.cpp - Linear Scan register allocator -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a linear scan register allocator. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "regalloc" +#include "VirtRegMap.h" +#include "VirtRegRewriter.h" +#include "Spiller.h" +#include "llvm/Function.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveStackAnalysis.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/RegAllocRegistry.h" +#include "llvm/CodeGen/RegisterCoalescer.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/ADT/EquivalenceClasses.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Compiler.h" +#include +#include +#include +#include +#include +#include + +using namespace llvm; + +STATISTIC(NumIters , "Number of iterations performed"); +STATISTIC(NumBacktracks, "Number of times we had to backtrack"); +STATISTIC(NumCoalesce, "Number of copies coalesced"); +STATISTIC(NumDowngrade, "Number of registers downgraded"); + +static cl::opt +NewHeuristic("new-spilling-heuristic", + cl::desc("Use new spilling heuristic"), + cl::init(false), cl::Hidden); + +static cl::opt +PreSplitIntervals("pre-alloc-split", + cl::desc("Pre-register allocation live interval splitting"), + cl::init(false), cl::Hidden); + +static cl::opt +NewSpillFramework("new-spill-framework", + cl::desc("New spilling framework"), + cl::init(false), cl::Hidden); + +static RegisterRegAlloc +linearscanRegAlloc("linearscan", "linear scan register allocator", + createLinearScanRegisterAllocator); + +namespace { + struct VISIBILITY_HIDDEN RALinScan : public MachineFunctionPass { + static char ID; + RALinScan() : MachineFunctionPass(&ID) {} + + typedef std::pair IntervalPtr; + typedef SmallVector IntervalPtrs; + private: + /// RelatedRegClasses - This structure is built the first time a function is + /// compiled, and keeps track of which register classes have registers that + /// belong to multiple classes or have aliases that are in other classes. + EquivalenceClasses RelatedRegClasses; + DenseMap OneClassForEachPhysReg; + + // NextReloadMap - For each register in the map, it maps to the another + // register which is defined by a reload from the same stack slot and + // both reloads are in the same basic block. + DenseMap NextReloadMap; + + // DowngradedRegs - A set of registers which are being "downgraded", i.e. + // un-favored for allocation. + SmallSet DowngradedRegs; + + // DowngradeMap - A map from virtual registers to physical registers being + // downgraded for the virtual registers. + DenseMap DowngradeMap; + + MachineFunction* mf_; + MachineRegisterInfo* mri_; + const TargetMachine* tm_; + const TargetRegisterInfo* tri_; + const TargetInstrInfo* tii_; + BitVector allocatableRegs_; + LiveIntervals* li_; + LiveStacks* ls_; + const MachineLoopInfo *loopInfo; + + /// handled_ - Intervals are added to the handled_ set in the order of their + /// start value. This is uses for backtracking. + std::vector handled_; + + /// fixed_ - Intervals that correspond to machine registers. + /// + IntervalPtrs fixed_; + + /// active_ - Intervals that are currently being processed, and which have a + /// live range active for the current point. + IntervalPtrs active_; + + /// inactive_ - Intervals that are currently being processed, but which have + /// a hold at the current point. + IntervalPtrs inactive_; + + typedef std::priority_queue, + greater_ptr > IntervalHeap; + IntervalHeap unhandled_; + + /// regUse_ - Tracks register usage. + SmallVector regUse_; + SmallVector regUseBackUp_; + + /// vrm_ - Tracks register assignments. + VirtRegMap* vrm_; + + std::auto_ptr rewriter_; + + std::auto_ptr spiller_; + + public: + virtual const char* getPassName() const { + return "Linear Scan Register Allocator"; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + if (StrongPHIElim) + AU.addRequiredID(StrongPHIEliminationID); + // Make sure PassManager knows which analyses to make available + // to coalescing and which analyses coalescing invalidates. + AU.addRequiredTransitive(); + if (PreSplitIntervals) + AU.addRequiredID(PreAllocSplittingID); + AU.addRequired(); + AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); + AU.addPreservedID(MachineDominatorsID); + MachineFunctionPass::getAnalysisUsage(AU); + } + + /// runOnMachineFunction - register allocate the whole function + bool runOnMachineFunction(MachineFunction&); + + private: + /// linearScan - the linear scan algorithm + void linearScan(); + + /// initIntervalSets - initialize the interval sets. + /// + void initIntervalSets(); + + /// processActiveIntervals - expire old intervals and move non-overlapping + /// ones to the inactive list. + void processActiveIntervals(unsigned CurPoint); + + /// processInactiveIntervals - expire old intervals and move overlapping + /// ones to the active list. + void processInactiveIntervals(unsigned CurPoint); + + /// hasNextReloadInterval - Return the next liveinterval that's being + /// defined by a reload from the same SS as the specified one. + LiveInterval *hasNextReloadInterval(LiveInterval *cur); + + /// DowngradeRegister - Downgrade a register for allocation. + void DowngradeRegister(LiveInterval *li, unsigned Reg); + + /// UpgradeRegister - Upgrade a register for allocation. + void UpgradeRegister(unsigned Reg); + + /// assignRegOrStackSlotAtInterval - assign a register if one + /// is available, or spill. + void assignRegOrStackSlotAtInterval(LiveInterval* cur); + + void updateSpillWeights(std::vector &Weights, + unsigned reg, float weight, + const TargetRegisterClass *RC); + + /// findIntervalsToSpill - Determine the intervals to spill for the + /// specified interval. It's passed the physical registers whose spill + /// weight is the lowest among all the registers whose live intervals + /// conflict with the interval. + void findIntervalsToSpill(LiveInterval *cur, + std::vector > &Candidates, + unsigned NumCands, + SmallVector &SpillIntervals); + + /// attemptTrivialCoalescing - If a simple interval is defined by a copy, + /// try allocate the definition the same register as the source register + /// if the register is not defined during live time of the interval. This + /// eliminate a copy. This is used to coalesce copies which were not + /// coalesced away before allocation either due to dest and src being in + /// different register classes or because the coalescer was overly + /// conservative. + unsigned attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg); + + /// + /// Register usage / availability tracking helpers. + /// + + void initRegUses() { + regUse_.resize(tri_->getNumRegs(), 0); + regUseBackUp_.resize(tri_->getNumRegs(), 0); + } + + void finalizeRegUses() { +#ifndef NDEBUG + // Verify all the registers are "freed". + bool Error = false; + for (unsigned i = 0, e = tri_->getNumRegs(); i != e; ++i) { + if (regUse_[i] != 0) { + cerr << tri_->getName(i) << " is still in use!\n"; + Error = true; + } + } + if (Error) + abort(); +#endif + regUse_.clear(); + regUseBackUp_.clear(); + } + + void addRegUse(unsigned physReg) { + assert(TargetRegisterInfo::isPhysicalRegister(physReg) && + "should be physical register!"); + ++regUse_[physReg]; + for (const unsigned* as = tri_->getAliasSet(physReg); *as; ++as) + ++regUse_[*as]; + } + + void delRegUse(unsigned physReg) { + assert(TargetRegisterInfo::isPhysicalRegister(physReg) && + "should be physical register!"); + assert(regUse_[physReg] != 0); + --regUse_[physReg]; + for (const unsigned* as = tri_->getAliasSet(physReg); *as; ++as) { + assert(regUse_[*as] != 0); + --regUse_[*as]; + } + } + + bool isRegAvail(unsigned physReg) const { + assert(TargetRegisterInfo::isPhysicalRegister(physReg) && + "should be physical register!"); + return regUse_[physReg] == 0; + } + + void backUpRegUses() { + regUseBackUp_ = regUse_; + } + + void restoreRegUses() { + regUse_ = regUseBackUp_; + } + + /// + /// Register handling helpers. + /// + + /// getFreePhysReg - return a free physical register for this virtual + /// register interval if we have one, otherwise return 0. + unsigned getFreePhysReg(LiveInterval* cur); + unsigned getFreePhysReg(const TargetRegisterClass *RC, + unsigned MaxInactiveCount, + SmallVector &inactiveCounts, + bool SkipDGRegs); + + /// assignVirt2StackSlot - assigns this virtual register to a + /// stack slot. returns the stack slot + int assignVirt2StackSlot(unsigned virtReg); + + void ComputeRelatedRegClasses(); + + template + void printIntervals(const char* const str, ItTy i, ItTy e) const { + if (str) DOUT << str << " intervals:\n"; + for (; i != e; ++i) { + DOUT << "\t" << *i->first << " -> "; + unsigned reg = i->first->reg; + if (TargetRegisterInfo::isVirtualRegister(reg)) { + reg = vrm_->getPhys(reg); + } + DOUT << tri_->getName(reg) << '\n'; + } + } + }; + char RALinScan::ID = 0; +} + +static RegisterPass +X("linearscan-regalloc", "Linear Scan Register Allocator"); + +bool validateRegAlloc(MachineFunction *mf, LiveIntervals *lis, + VirtRegMap *vrm) { + + MachineRegisterInfo *mri = &mf->getRegInfo(); + const TargetRegisterInfo *tri = mf->getTarget().getRegisterInfo(); + bool allocationValid = true; + + + for (LiveIntervals::iterator itr = lis->begin(), end = lis->end(); + itr != end; ++itr) { + + LiveInterval *li = itr->second; + + if (TargetRegisterInfo::isPhysicalRegister(li->reg)) { + continue; + } + + if (vrm->hasPhys(li->reg)) { + const TargetRegisterClass *trc = mri->getRegClass(li->reg); + + if (lis->hasInterval(vrm->getPhys(li->reg))) { + if (li->overlaps(lis->getInterval(vrm->getPhys(li->reg)))) { + std::cerr << "vreg " << li->reg << " overlaps its assigned preg " + << vrm->getPhys(li->reg) << "(" << tri->getName(vrm->getPhys(li->reg)) << ")\n"; + } + } + + TargetRegisterClass::iterator fReg = + std::find(trc->allocation_order_begin(*mf), trc->allocation_order_end(*mf), + vrm->getPhys(li->reg)); + + if (fReg == trc->allocation_order_end(*mf)) { + std::cerr << "preg " << vrm->getPhys(li->reg) + << "(" << tri->getName(vrm->getPhys(li->reg)) << ") is not in the allocation set for vreg " + << li->reg << "\n"; + allocationValid &= false; + } + } + else { + std::cerr << "No preg for vreg " << li->reg << "\n"; + // What about conflicting loads/stores? + continue; + } + + for (LiveIntervals::iterator itr2 = next(itr); itr2 != end; ++itr2) { + + LiveInterval *li2 = itr2->second; + + if (li2->empty()) + continue; + + if (TargetRegisterInfo::isPhysicalRegister(li2->reg)) { + if (li->overlaps(*li2)) { + if (vrm->getPhys(li->reg) == li2->reg || + tri->areAliases(vrm->getPhys(li->reg), li2->reg)) { + std::cerr << "vreg " << li->reg << " overlaps preg " + << li2->reg << "(" << tri->getName(li2->reg) << ") which aliases " + << vrm->getPhys(li->reg) << "(" << tri->getName(vrm->getPhys(li->reg)) << ")\n"; + allocationValid &= false; + } + } + } + else { + + if (!vrm->hasPhys(li2->reg)) { + continue; + } + + if (li->overlaps(*li2)) { + if (vrm->getPhys(li->reg) == vrm->getPhys(li2->reg) || + tri->areAliases(vrm->getPhys(li->reg), vrm->getPhys(li2->reg))) { + std::cerr << "vreg " << li->reg << " (preg " << vrm->getPhys(li->reg) + << ") overlaps vreg " << li2->reg << " (preg " << vrm->getPhys(li2->reg) + << ") and " << vrm->getPhys(li->reg) << " aliases " << vrm->getPhys(li2->reg) << "\n"; + allocationValid &= false; + } + } + } + } + + } + + return allocationValid; + +} + + +void RALinScan::ComputeRelatedRegClasses() { + // First pass, add all reg classes to the union, and determine at least one + // reg class that each register is in. + bool HasAliases = false; + for (TargetRegisterInfo::regclass_iterator RCI = tri_->regclass_begin(), + E = tri_->regclass_end(); RCI != E; ++RCI) { + RelatedRegClasses.insert(*RCI); + for (TargetRegisterClass::iterator I = (*RCI)->begin(), E = (*RCI)->end(); + I != E; ++I) { + HasAliases = HasAliases || *tri_->getAliasSet(*I) != 0; + + const TargetRegisterClass *&PRC = OneClassForEachPhysReg[*I]; + if (PRC) { + // Already processed this register. Just make sure we know that + // multiple register classes share a register. + RelatedRegClasses.unionSets(PRC, *RCI); + } else { + PRC = *RCI; + } + } + } + + // Second pass, now that we know conservatively what register classes each reg + // belongs to, add info about aliases. We don't need to do this for targets + // without register aliases. + if (HasAliases) + for (DenseMap::iterator + I = OneClassForEachPhysReg.begin(), E = OneClassForEachPhysReg.end(); + I != E; ++I) + for (const unsigned *AS = tri_->getAliasSet(I->first); *AS; ++AS) + RelatedRegClasses.unionSets(I->second, OneClassForEachPhysReg[*AS]); +} + +/// attemptTrivialCoalescing - If a simple interval is defined by a copy, +/// try allocate the definition the same register as the source register +/// if the register is not defined during live time of the interval. This +/// eliminate a copy. This is used to coalesce copies which were not +/// coalesced away before allocation either due to dest and src being in +/// different register classes or because the coalescer was overly +/// conservative. +unsigned RALinScan::attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg) { + if ((cur.preference && cur.preference == Reg) || !cur.containsOneValue()) + return Reg; + + VNInfo *vni = cur.begin()->valno; + if (!vni->def || vni->def == ~1U || vni->def == ~0U) + return Reg; + MachineInstr *CopyMI = li_->getInstructionFromIndex(vni->def); + unsigned SrcReg, DstReg, SrcSubReg, DstSubReg, PhysReg; + if (!CopyMI || + !tii_->isMoveInstr(*CopyMI, SrcReg, DstReg, SrcSubReg, DstSubReg)) + return Reg; + PhysReg = SrcReg; + if (TargetRegisterInfo::isVirtualRegister(SrcReg)) { + if (!vrm_->isAssignedReg(SrcReg)) + return Reg; + PhysReg = vrm_->getPhys(SrcReg); + } + if (Reg == PhysReg) + return Reg; + + const TargetRegisterClass *RC = mri_->getRegClass(cur.reg); + if (!RC->contains(PhysReg)) + return Reg; + + // Try to coalesce. + if (!li_->conflictsWithPhysRegDef(cur, *vrm_, PhysReg)) { + DOUT << "Coalescing: " << cur << " -> " << tri_->getName(PhysReg) + << '\n'; + vrm_->clearVirt(cur.reg); + vrm_->assignVirt2Phys(cur.reg, PhysReg); + + // Remove unnecessary kills since a copy does not clobber the register. + if (li_->hasInterval(SrcReg)) { + LiveInterval &SrcLI = li_->getInterval(SrcReg); + for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(cur.reg), + E = mri_->reg_end(); I != E; ++I) { + MachineOperand &O = I.getOperand(); + if (!O.isUse() || !O.isKill()) + continue; + MachineInstr *MI = &*I; + if (SrcLI.liveAt(li_->getDefIndex(li_->getInstructionIndex(MI)))) + O.setIsKill(false); + } + } + + ++NumCoalesce; + return SrcReg; + } + + return Reg; +} + +bool RALinScan::runOnMachineFunction(MachineFunction &fn) { + mf_ = &fn; + mri_ = &fn.getRegInfo(); + tm_ = &fn.getTarget(); + tri_ = tm_->getRegisterInfo(); + tii_ = tm_->getInstrInfo(); + allocatableRegs_ = tri_->getAllocatableSet(fn); + li_ = &getAnalysis(); + ls_ = &getAnalysis(); + loopInfo = &getAnalysis(); + + // We don't run the coalescer here because we have no reason to + // interact with it. If the coalescer requires interaction, it + // won't do anything. If it doesn't require interaction, we assume + // it was run as a separate pass. + + // If this is the first function compiled, compute the related reg classes. + if (RelatedRegClasses.empty()) + ComputeRelatedRegClasses(); + + // Also resize register usage trackers. + initRegUses(); + + vrm_ = &getAnalysis(); + if (!rewriter_.get()) rewriter_.reset(createVirtRegRewriter()); + + if (NewSpillFramework) { + spiller_.reset(createSpiller(mf_, li_, ls_, vrm_)); + } + + initIntervalSets(); + + linearScan(); + + if (NewSpillFramework) { + bool allocValid = validateRegAlloc(mf_, li_, vrm_); + } + + // Rewrite spill code and update the PhysRegsUsed set. + rewriter_->runOnMachineFunction(*mf_, *vrm_, li_); + + assert(unhandled_.empty() && "Unhandled live intervals remain!"); + + finalizeRegUses(); + + fixed_.clear(); + active_.clear(); + inactive_.clear(); + handled_.clear(); + NextReloadMap.clear(); + DowngradedRegs.clear(); + DowngradeMap.clear(); + spiller_.reset(0); + + return true; +} + +/// initIntervalSets - initialize the interval sets. +/// +void RALinScan::initIntervalSets() +{ + assert(unhandled_.empty() && fixed_.empty() && + active_.empty() && inactive_.empty() && + "interval sets should be empty on initialization"); + + handled_.reserve(li_->getNumIntervals()); + + for (LiveIntervals::iterator i = li_->begin(), e = li_->end(); i != e; ++i) { + if (TargetRegisterInfo::isPhysicalRegister(i->second->reg)) { + mri_->setPhysRegUsed(i->second->reg); + fixed_.push_back(std::make_pair(i->second, i->second->begin())); + } else + unhandled_.push(i->second); + } +} + +void RALinScan::linearScan() +{ + // linear scan algorithm + DOUT << "********** LINEAR SCAN **********\n"; + DOUT << "********** Function: " << mf_->getFunction()->getName() << '\n'; + + DEBUG(printIntervals("fixed", fixed_.begin(), fixed_.end())); + + while (!unhandled_.empty()) { + // pick the interval with the earliest start point + LiveInterval* cur = unhandled_.top(); + unhandled_.pop(); + ++NumIters; + DOUT << "\n*** CURRENT ***: " << *cur << '\n'; + + if (!cur->empty()) { + processActiveIntervals(cur->beginNumber()); + processInactiveIntervals(cur->beginNumber()); + + assert(TargetRegisterInfo::isVirtualRegister(cur->reg) && + "Can only allocate virtual registers!"); + } + + // Allocating a virtual register. try to find a free + // physical register or spill an interval (possibly this one) in order to + // assign it one. + assignRegOrStackSlotAtInterval(cur); + + DEBUG(printIntervals("active", active_.begin(), active_.end())); + DEBUG(printIntervals("inactive", inactive_.begin(), inactive_.end())); + } + + // Expire any remaining active intervals + while (!active_.empty()) { + IntervalPtr &IP = active_.back(); + unsigned reg = IP.first->reg; + DOUT << "\tinterval " << *IP.first << " expired\n"; + assert(TargetRegisterInfo::isVirtualRegister(reg) && + "Can only allocate virtual registers!"); + reg = vrm_->getPhys(reg); + delRegUse(reg); + active_.pop_back(); + } + + // Expire any remaining inactive intervals + DEBUG(for (IntervalPtrs::reverse_iterator + i = inactive_.rbegin(); i != inactive_.rend(); ++i) + DOUT << "\tinterval " << *i->first << " expired\n"); + inactive_.clear(); + + // Add live-ins to every BB except for entry. Also perform trivial coalescing. + MachineFunction::iterator EntryMBB = mf_->begin(); + SmallVector LiveInMBBs; + for (LiveIntervals::iterator i = li_->begin(), e = li_->end(); i != e; ++i) { + LiveInterval &cur = *i->second; + unsigned Reg = 0; + bool isPhys = TargetRegisterInfo::isPhysicalRegister(cur.reg); + if (isPhys) + Reg = cur.reg; + else if (vrm_->isAssignedReg(cur.reg)) + Reg = attemptTrivialCoalescing(cur, vrm_->getPhys(cur.reg)); + if (!Reg) + continue; + // Ignore splited live intervals. + if (!isPhys && vrm_->getPreSplitReg(cur.reg)) + continue; + for (LiveInterval::Ranges::const_iterator I = cur.begin(), E = cur.end(); + I != E; ++I) { + const LiveRange &LR = *I; + if (li_->findLiveInMBBs(LR.start, LR.end, LiveInMBBs)) { + for (unsigned i = 0, e = LiveInMBBs.size(); i != e; ++i) + if (LiveInMBBs[i] != EntryMBB) + LiveInMBBs[i]->addLiveIn(Reg); + LiveInMBBs.clear(); + } + } + } + + DOUT << *vrm_; + + // Look for physical registers that end up not being allocated even though + // register allocator had to spill other registers in its register class. + if (ls_->getNumIntervals() == 0) + return; + if (!vrm_->FindUnusedRegisters(tri_, li_)) + return; +} + +/// processActiveIntervals - expire old intervals and move non-overlapping ones +/// to the inactive list. +void RALinScan::processActiveIntervals(unsigned CurPoint) +{ + DOUT << "\tprocessing active intervals:\n"; + + for (unsigned i = 0, e = active_.size(); i != e; ++i) { + LiveInterval *Interval = active_[i].first; + LiveInterval::iterator IntervalPos = active_[i].second; + unsigned reg = Interval->reg; + + IntervalPos = Interval->advanceTo(IntervalPos, CurPoint); + + if (IntervalPos == Interval->end()) { // Remove expired intervals. + DOUT << "\t\tinterval " << *Interval << " expired\n"; + assert(TargetRegisterInfo::isVirtualRegister(reg) && + "Can only allocate virtual registers!"); + reg = vrm_->getPhys(reg); + delRegUse(reg); + + // Pop off the end of the list. + active_[i] = active_.back(); + active_.pop_back(); + --i; --e; + + } else if (IntervalPos->start > CurPoint) { + // Move inactive intervals to inactive list. + DOUT << "\t\tinterval " << *Interval << " inactive\n"; + assert(TargetRegisterInfo::isVirtualRegister(reg) && + "Can only allocate virtual registers!"); + reg = vrm_->getPhys(reg); + delRegUse(reg); + // add to inactive. + inactive_.push_back(std::make_pair(Interval, IntervalPos)); + + // Pop off the end of the list. + active_[i] = active_.back(); + active_.pop_back(); + --i; --e; + } else { + // Otherwise, just update the iterator position. + active_[i].second = IntervalPos; + } + } +} + +/// processInactiveIntervals - expire old intervals and move overlapping +/// ones to the active list. +void RALinScan::processInactiveIntervals(unsigned CurPoint) +{ + DOUT << "\tprocessing inactive intervals:\n"; + + for (unsigned i = 0, e = inactive_.size(); i != e; ++i) { + LiveInterval *Interval = inactive_[i].first; + LiveInterval::iterator IntervalPos = inactive_[i].second; + unsigned reg = Interval->reg; + + IntervalPos = Interval->advanceTo(IntervalPos, CurPoint); + + if (IntervalPos == Interval->end()) { // remove expired intervals. + DOUT << "\t\tinterval " << *Interval << " expired\n"; + + // Pop off the end of the list. + inactive_[i] = inactive_.back(); + inactive_.pop_back(); + --i; --e; + } else if (IntervalPos->start <= CurPoint) { + // move re-activated intervals in active list + DOUT << "\t\tinterval " << *Interval << " active\n"; + assert(TargetRegisterInfo::isVirtualRegister(reg) && + "Can only allocate virtual registers!"); + reg = vrm_->getPhys(reg); + addRegUse(reg); + // add to active + active_.push_back(std::make_pair(Interval, IntervalPos)); + + // Pop off the end of the list. + inactive_[i] = inactive_.back(); + inactive_.pop_back(); + --i; --e; + } else { + // Otherwise, just update the iterator position. + inactive_[i].second = IntervalPos; + } + } +} + +/// updateSpillWeights - updates the spill weights of the specifed physical +/// register and its weight. +void RALinScan::updateSpillWeights(std::vector &Weights, + unsigned reg, float weight, + const TargetRegisterClass *RC) { + SmallSet Processed; + SmallSet SuperAdded; + SmallVector Supers; + Weights[reg] += weight; + Processed.insert(reg); + for (const unsigned* as = tri_->getAliasSet(reg); *as; ++as) { + Weights[*as] += weight; + Processed.insert(*as); + if (tri_->isSubRegister(*as, reg) && + SuperAdded.insert(*as) && + RC->contains(*as)) { + Supers.push_back(*as); + } + } + + // If the alias is a super-register, and the super-register is in the + // register class we are trying to allocate. Then add the weight to all + // sub-registers of the super-register even if they are not aliases. + // e.g. allocating for GR32, bh is not used, updating bl spill weight. + // bl should get the same spill weight otherwise it will be choosen + // as a spill candidate since spilling bh doesn't make ebx available. + for (unsigned i = 0, e = Supers.size(); i != e; ++i) { + for (const unsigned *sr = tri_->getSubRegisters(Supers[i]); *sr; ++sr) + if (!Processed.count(*sr)) + Weights[*sr] += weight; + } +} + +static +RALinScan::IntervalPtrs::iterator +FindIntervalInVector(RALinScan::IntervalPtrs &IP, LiveInterval *LI) { + for (RALinScan::IntervalPtrs::iterator I = IP.begin(), E = IP.end(); + I != E; ++I) + if (I->first == LI) return I; + return IP.end(); +} + +static void RevertVectorIteratorsTo(RALinScan::IntervalPtrs &V, unsigned Point){ + for (unsigned i = 0, e = V.size(); i != e; ++i) { + RALinScan::IntervalPtr &IP = V[i]; + LiveInterval::iterator I = std::upper_bound(IP.first->begin(), + IP.second, Point); + if (I != IP.first->begin()) --I; + IP.second = I; + } +} + +/// addStackInterval - Create a LiveInterval for stack if the specified live +/// interval has been spilled. +static void addStackInterval(LiveInterval *cur, LiveStacks *ls_, + LiveIntervals *li_, + MachineRegisterInfo* mri_, VirtRegMap &vrm_) { + int SS = vrm_.getStackSlot(cur->reg); + if (SS == VirtRegMap::NO_STACK_SLOT) + return; + + const TargetRegisterClass *RC = mri_->getRegClass(cur->reg); + LiveInterval &SI = ls_->getOrCreateInterval(SS, RC); + + VNInfo *VNI; + if (SI.hasAtLeastOneValue()) + VNI = SI.getValNumInfo(0); + else + VNI = SI.getNextValue(~0U, 0, ls_->getVNInfoAllocator()); + + LiveInterval &RI = li_->getInterval(cur->reg); + // FIXME: This may be overly conservative. + SI.MergeRangesInAsValue(RI, VNI); +} + +/// getConflictWeight - Return the number of conflicts between cur +/// live interval and defs and uses of Reg weighted by loop depthes. +static +float getConflictWeight(LiveInterval *cur, unsigned Reg, LiveIntervals *li_, + MachineRegisterInfo *mri_, + const MachineLoopInfo *loopInfo) { + float Conflicts = 0; + for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(Reg), + E = mri_->reg_end(); I != E; ++I) { + MachineInstr *MI = &*I; + if (cur->liveAt(li_->getInstructionIndex(MI))) { + unsigned loopDepth = loopInfo->getLoopDepth(MI->getParent()); + Conflicts += powf(10.0f, (float)loopDepth); + } + } + return Conflicts; +} + +/// findIntervalsToSpill - Determine the intervals to spill for the +/// specified interval. It's passed the physical registers whose spill +/// weight is the lowest among all the registers whose live intervals +/// conflict with the interval. +void RALinScan::findIntervalsToSpill(LiveInterval *cur, + std::vector > &Candidates, + unsigned NumCands, + SmallVector &SpillIntervals) { + // We have figured out the *best* register to spill. But there are other + // registers that are pretty good as well (spill weight within 3%). Spill + // the one that has fewest defs and uses that conflict with cur. + float Conflicts[3] = { 0.0f, 0.0f, 0.0f }; + SmallVector SLIs[3]; + + DOUT << "\tConsidering " << NumCands << " candidates: "; + DEBUG(for (unsigned i = 0; i != NumCands; ++i) + DOUT << tri_->getName(Candidates[i].first) << " "; + DOUT << "\n";); + + // Calculate the number of conflicts of each candidate. + for (IntervalPtrs::iterator i = active_.begin(); i != active_.end(); ++i) { + unsigned Reg = i->first->reg; + unsigned PhysReg = vrm_->getPhys(Reg); + if (!cur->overlapsFrom(*i->first, i->second)) + continue; + for (unsigned j = 0; j < NumCands; ++j) { + unsigned Candidate = Candidates[j].first; + if (tri_->regsOverlap(PhysReg, Candidate)) { + if (NumCands > 1) + Conflicts[j] += getConflictWeight(cur, Reg, li_, mri_, loopInfo); + SLIs[j].push_back(i->first); + } + } + } + + for (IntervalPtrs::iterator i = inactive_.begin(); i != inactive_.end(); ++i){ + unsigned Reg = i->first->reg; + unsigned PhysReg = vrm_->getPhys(Reg); + if (!cur->overlapsFrom(*i->first, i->second-1)) + continue; + for (unsigned j = 0; j < NumCands; ++j) { + unsigned Candidate = Candidates[j].first; + if (tri_->regsOverlap(PhysReg, Candidate)) { + if (NumCands > 1) + Conflicts[j] += getConflictWeight(cur, Reg, li_, mri_, loopInfo); + SLIs[j].push_back(i->first); + } + } + } + + // Which is the best candidate? + unsigned BestCandidate = 0; + float MinConflicts = Conflicts[0]; + for (unsigned i = 1; i != NumCands; ++i) { + if (Conflicts[i] < MinConflicts) { + BestCandidate = i; + MinConflicts = Conflicts[i]; + } + } + + std::copy(SLIs[BestCandidate].begin(), SLIs[BestCandidate].end(), + std::back_inserter(SpillIntervals)); +} + +namespace { + struct WeightCompare { + typedef std::pair RegWeightPair; + bool operator()(const RegWeightPair &LHS, const RegWeightPair &RHS) const { + return LHS.second < RHS.second; + } + }; +} + +static bool weightsAreClose(float w1, float w2) { + if (!NewHeuristic) + return false; + + float diff = w1 - w2; + if (diff <= 0.02f) // Within 0.02f + return true; + return (diff / w2) <= 0.05f; // Within 5%. +} + +LiveInterval *RALinScan::hasNextReloadInterval(LiveInterval *cur) { + DenseMap::iterator I = NextReloadMap.find(cur->reg); + if (I == NextReloadMap.end()) + return 0; + return &li_->getInterval(I->second); +} + +void RALinScan::DowngradeRegister(LiveInterval *li, unsigned Reg) { + bool isNew = DowngradedRegs.insert(Reg); + isNew = isNew; // Silence compiler warning. + assert(isNew && "Multiple reloads holding the same register?"); + DowngradeMap.insert(std::make_pair(li->reg, Reg)); + for (const unsigned *AS = tri_->getAliasSet(Reg); *AS; ++AS) { + isNew = DowngradedRegs.insert(*AS); + isNew = isNew; // Silence compiler warning. + assert(isNew && "Multiple reloads holding the same register?"); + DowngradeMap.insert(std::make_pair(li->reg, *AS)); + } + ++NumDowngrade; +} + +void RALinScan::UpgradeRegister(unsigned Reg) { + if (Reg) { + DowngradedRegs.erase(Reg); + for (const unsigned *AS = tri_->getAliasSet(Reg); *AS; ++AS) + DowngradedRegs.erase(*AS); + } +} + +namespace { + struct LISorter { + bool operator()(LiveInterval* A, LiveInterval* B) { + return A->beginNumber() < B->beginNumber(); + } + }; +} + +/// assignRegOrStackSlotAtInterval - assign a register if one is available, or +/// spill. +void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) +{ + DOUT << "\tallocating current interval: "; + + // This is an implicitly defined live interval, just assign any register. + const TargetRegisterClass *RC = mri_->getRegClass(cur->reg); + if (cur->empty()) { + unsigned physReg = cur->preference; + if (!physReg) + physReg = *RC->allocation_order_begin(*mf_); + DOUT << tri_->getName(physReg) << '\n'; + // Note the register is not really in use. + vrm_->assignVirt2Phys(cur->reg, physReg); + return; + } + + backUpRegUses(); + + std::vector > SpillWeightsToAdd; + unsigned StartPosition = cur->beginNumber(); + const TargetRegisterClass *RCLeader = RelatedRegClasses.getLeaderValue(RC); + + // If start of this live interval is defined by a move instruction and its + // source is assigned a physical register that is compatible with the target + // register class, then we should try to assign it the same register. + // This can happen when the move is from a larger register class to a smaller + // one, e.g. X86::mov32to32_. These move instructions are not coalescable. + if (!cur->preference && cur->hasAtLeastOneValue()) { + VNInfo *vni = cur->begin()->valno; + if (vni->def && vni->def != ~1U && vni->def != ~0U) { + MachineInstr *CopyMI = li_->getInstructionFromIndex(vni->def); + unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; + if (CopyMI && + tii_->isMoveInstr(*CopyMI, SrcReg, DstReg, SrcSubReg, DstSubReg)) { + unsigned Reg = 0; + if (TargetRegisterInfo::isPhysicalRegister(SrcReg)) + Reg = SrcReg; + else if (vrm_->isAssignedReg(SrcReg)) + Reg = vrm_->getPhys(SrcReg); + if (Reg) { + if (SrcSubReg) + Reg = tri_->getSubReg(Reg, SrcSubReg); + if (DstSubReg) + Reg = tri_->getMatchingSuperReg(Reg, DstSubReg, RC); + if (Reg && allocatableRegs_[Reg] && RC->contains(Reg)) + cur->preference = Reg; + } + } + } + } + + // For every interval in inactive we overlap with, mark the + // register as not free and update spill weights. + for (IntervalPtrs::const_iterator i = inactive_.begin(), + e = inactive_.end(); i != e; ++i) { + unsigned Reg = i->first->reg; + assert(TargetRegisterInfo::isVirtualRegister(Reg) && + "Can only allocate virtual registers!"); + const TargetRegisterClass *RegRC = mri_->getRegClass(Reg); + // If this is not in a related reg class to the register we're allocating, + // don't check it. + if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader && + cur->overlapsFrom(*i->first, i->second-1)) { + Reg = vrm_->getPhys(Reg); + addRegUse(Reg); + SpillWeightsToAdd.push_back(std::make_pair(Reg, i->first->weight)); + } + } + + // Speculatively check to see if we can get a register right now. If not, + // we know we won't be able to by adding more constraints. If so, we can + // check to see if it is valid. Doing an exhaustive search of the fixed_ list + // is very bad (it contains all callee clobbered registers for any functions + // with a call), so we want to avoid doing that if possible. + unsigned physReg = getFreePhysReg(cur); + unsigned BestPhysReg = physReg; + if (physReg) { + // We got a register. However, if it's in the fixed_ list, we might + // conflict with it. Check to see if we conflict with it or any of its + // aliases. + SmallSet RegAliases; + for (const unsigned *AS = tri_->getAliasSet(physReg); *AS; ++AS) + RegAliases.insert(*AS); + + bool ConflictsWithFixed = false; + for (unsigned i = 0, e = fixed_.size(); i != e; ++i) { + IntervalPtr &IP = fixed_[i]; + if (physReg == IP.first->reg || RegAliases.count(IP.first->reg)) { + // Okay, this reg is on the fixed list. Check to see if we actually + // conflict. + LiveInterval *I = IP.first; + if (I->endNumber() > StartPosition) { + LiveInterval::iterator II = I->advanceTo(IP.second, StartPosition); + IP.second = II; + if (II != I->begin() && II->start > StartPosition) + --II; + if (cur->overlapsFrom(*I, II)) { + ConflictsWithFixed = true; + break; + } + } + } + } + + // Okay, the register picked by our speculative getFreePhysReg call turned + // out to be in use. Actually add all of the conflicting fixed registers to + // regUse_ so we can do an accurate query. + if (ConflictsWithFixed) { + // For every interval in fixed we overlap with, mark the register as not + // free and update spill weights. + for (unsigned i = 0, e = fixed_.size(); i != e; ++i) { + IntervalPtr &IP = fixed_[i]; + LiveInterval *I = IP.first; + + const TargetRegisterClass *RegRC = OneClassForEachPhysReg[I->reg]; + if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader && + I->endNumber() > StartPosition) { + LiveInterval::iterator II = I->advanceTo(IP.second, StartPosition); + IP.second = II; + if (II != I->begin() && II->start > StartPosition) + --II; + if (cur->overlapsFrom(*I, II)) { + unsigned reg = I->reg; + addRegUse(reg); + SpillWeightsToAdd.push_back(std::make_pair(reg, I->weight)); + } + } + } + + // Using the newly updated regUse_ object, which includes conflicts in the + // future, see if there are any registers available. + physReg = getFreePhysReg(cur); + } + } + + // Restore the physical register tracker, removing information about the + // future. + restoreRegUses(); + + // If we find a free register, we are done: assign this virtual to + // the free physical register and add this interval to the active + // list. + if (physReg) { + DOUT << tri_->getName(physReg) << '\n'; + vrm_->assignVirt2Phys(cur->reg, physReg); + addRegUse(physReg); + active_.push_back(std::make_pair(cur, cur->begin())); + handled_.push_back(cur); + + // "Upgrade" the physical register since it has been allocated. + UpgradeRegister(physReg); + if (LiveInterval *NextReloadLI = hasNextReloadInterval(cur)) { + // "Downgrade" physReg to try to keep physReg from being allocated until + // the next reload from the same SS is allocated. + NextReloadLI->preference = physReg; + DowngradeRegister(cur, physReg); + } + return; + } + DOUT << "no free registers\n"; + + // Compile the spill weights into an array that is better for scanning. + std::vector SpillWeights(tri_->getNumRegs(), 0.0f); + for (std::vector >::iterator + I = SpillWeightsToAdd.begin(), E = SpillWeightsToAdd.end(); I != E; ++I) + updateSpillWeights(SpillWeights, I->first, I->second, RC); + + // for each interval in active, update spill weights. + for (IntervalPtrs::const_iterator i = active_.begin(), e = active_.end(); + i != e; ++i) { + unsigned reg = i->first->reg; + assert(TargetRegisterInfo::isVirtualRegister(reg) && + "Can only allocate virtual registers!"); + reg = vrm_->getPhys(reg); + updateSpillWeights(SpillWeights, reg, i->first->weight, RC); + } + + DOUT << "\tassigning stack slot at interval "<< *cur << ":\n"; + + // Find a register to spill. + float minWeight = HUGE_VALF; + unsigned minReg = 0; /*cur->preference*/; // Try the pref register first. + + bool Found = false; + std::vector > RegsWeights; + if (!minReg || SpillWeights[minReg] == HUGE_VALF) + for (TargetRegisterClass::iterator i = RC->allocation_order_begin(*mf_), + e = RC->allocation_order_end(*mf_); i != e; ++i) { + unsigned reg = *i; + float regWeight = SpillWeights[reg]; + if (minWeight > regWeight) + Found = true; + RegsWeights.push_back(std::make_pair(reg, regWeight)); + } + + // If we didn't find a register that is spillable, try aliases? + if (!Found) { + for (TargetRegisterClass::iterator i = RC->allocation_order_begin(*mf_), + e = RC->allocation_order_end(*mf_); i != e; ++i) { + unsigned reg = *i; + // No need to worry about if the alias register size < regsize of RC. + // We are going to spill all registers that alias it anyway. + for (const unsigned* as = tri_->getAliasSet(reg); *as; ++as) + RegsWeights.push_back(std::make_pair(*as, SpillWeights[*as])); + } + } + + // Sort all potential spill candidates by weight. + std::sort(RegsWeights.begin(), RegsWeights.end(), WeightCompare()); + minReg = RegsWeights[0].first; + minWeight = RegsWeights[0].second; + if (minWeight == HUGE_VALF) { + // All registers must have inf weight. Just grab one! + minReg = BestPhysReg ? BestPhysReg : *RC->allocation_order_begin(*mf_); + if (cur->weight == HUGE_VALF || + li_->getApproximateInstructionCount(*cur) == 0) { + // Spill a physical register around defs and uses. + if (li_->spillPhysRegAroundRegDefsUses(*cur, minReg, *vrm_)) { + // spillPhysRegAroundRegDefsUses may have invalidated iterator stored + // in fixed_. Reset them. + for (unsigned i = 0, e = fixed_.size(); i != e; ++i) { + IntervalPtr &IP = fixed_[i]; + LiveInterval *I = IP.first; + if (I->reg == minReg || tri_->isSubRegister(minReg, I->reg)) + IP.second = I->advanceTo(I->begin(), StartPosition); + } + + DowngradedRegs.clear(); + assignRegOrStackSlotAtInterval(cur); + } else { + cerr << "Ran out of registers during register allocation!\n"; + exit(1); + } + return; + } + } + + // Find up to 3 registers to consider as spill candidates. + unsigned LastCandidate = RegsWeights.size() >= 3 ? 3 : 1; + while (LastCandidate > 1) { + if (weightsAreClose(RegsWeights[LastCandidate-1].second, minWeight)) + break; + --LastCandidate; + } + + DOUT << "\t\tregister(s) with min weight(s): "; + DEBUG(for (unsigned i = 0; i != LastCandidate; ++i) + DOUT << tri_->getName(RegsWeights[i].first) + << " (" << RegsWeights[i].second << ")\n"); + + // If the current has the minimum weight, we need to spill it and + // add any added intervals back to unhandled, and restart + // linearscan. + if (cur->weight != HUGE_VALF && cur->weight <= minWeight) { + DOUT << "\t\t\tspilling(c): " << *cur << '\n'; + SmallVector spillIs; + std::vector added; + + if (!NewSpillFramework) { + added = li_->addIntervalsForSpills(*cur, spillIs, loopInfo, *vrm_); + } else { + added = spiller_->spill(cur); + } + + std::sort(added.begin(), added.end(), LISorter()); + addStackInterval(cur, ls_, li_, mri_, *vrm_); + if (added.empty()) + return; // Early exit if all spills were folded. + + // Merge added with unhandled. Note that we have already sorted + // intervals returned by addIntervalsForSpills by their starting + // point. + // This also update the NextReloadMap. That is, it adds mapping from a + // register defined by a reload from SS to the next reload from SS in the + // same basic block. + MachineBasicBlock *LastReloadMBB = 0; + LiveInterval *LastReload = 0; + int LastReloadSS = VirtRegMap::NO_STACK_SLOT; + for (unsigned i = 0, e = added.size(); i != e; ++i) { + LiveInterval *ReloadLi = added[i]; + if (ReloadLi->weight == HUGE_VALF && + li_->getApproximateInstructionCount(*ReloadLi) == 0) { + unsigned ReloadIdx = ReloadLi->beginNumber(); + MachineBasicBlock *ReloadMBB = li_->getMBBFromIndex(ReloadIdx); + int ReloadSS = vrm_->getStackSlot(ReloadLi->reg); + if (LastReloadMBB == ReloadMBB && LastReloadSS == ReloadSS) { + // Last reload of same SS is in the same MBB. We want to try to + // allocate both reloads the same register and make sure the reg + // isn't clobbered in between if at all possible. + assert(LastReload->beginNumber() < ReloadIdx); + NextReloadMap.insert(std::make_pair(LastReload->reg, ReloadLi->reg)); + } + LastReloadMBB = ReloadMBB; + LastReload = ReloadLi; + LastReloadSS = ReloadSS; + } + unhandled_.push(ReloadLi); + } + return; + } + + ++NumBacktracks; + + // Push the current interval back to unhandled since we are going + // to re-run at least this iteration. Since we didn't modify it it + // should go back right in the front of the list + unhandled_.push(cur); + + assert(TargetRegisterInfo::isPhysicalRegister(minReg) && + "did not choose a register to spill?"); + + // We spill all intervals aliasing the register with + // minimum weight, rollback to the interval with the earliest + // start point and let the linear scan algorithm run again + SmallVector spillIs; + + // Determine which intervals have to be spilled. + findIntervalsToSpill(cur, RegsWeights, LastCandidate, spillIs); + + // Set of spilled vregs (used later to rollback properly) + SmallSet spilled; + + // The earliest start of a Spilled interval indicates up to where + // in handled we need to roll back + + unsigned earliestStart = cur->beginNumber(); + LiveInterval *earliestStartInterval = cur; + + // Spill live intervals of virtual regs mapped to the physical register we + // want to clear (and its aliases). We only spill those that overlap with the + // current interval as the rest do not affect its allocation. we also keep + // track of the earliest start of all spilled live intervals since this will + // mark our rollback point. + std::vector added; + while (!spillIs.empty()) { + bool epicFail = false; + LiveInterval *sli = spillIs.back(); + spillIs.pop_back(); + DOUT << "\t\t\tspilling(a): " << *sli << '\n'; + earliestStart = std::min(earliestStart, sli->beginNumber()); + earliestStartInterval = + (earliestStartInterval->beginNumber() < sli->beginNumber()) ? + earliestStartInterval : sli; + + if (earliestStartInterval->beginNumber()!=earliestStart) { + epicFail |= true; + std::cerr << "What the 1 - " + << "earliestStart = " << earliestStart + << "earliestStartInterval = " << earliestStartInterval->beginNumber() + << "\n"; + } + + std::vector newIs; + if (!NewSpillFramework) { + newIs = li_->addIntervalsForSpills(*sli, spillIs, loopInfo, *vrm_); + } else { + newIs = spiller_->spill(sli); + } + addStackInterval(sli, ls_, li_, mri_, *vrm_); + std::copy(newIs.begin(), newIs.end(), std::back_inserter(added)); + spilled.insert(sli->reg); + + if (earliestStartInterval->beginNumber()!=earliestStart) { + epicFail |= true; + std::cerr << "What the 2 - " + << "earliestStart = " << earliestStart + << "earliestStartInterval = " << earliestStartInterval->beginNumber() + << "\n"; + } + + if (epicFail) { + //abort(); + } + } + + earliestStart = earliestStartInterval->beginNumber(); + + DOUT << "\t\trolling back to: " << earliestStart << '\n'; + + // Scan handled in reverse order up to the earliest start of a + // spilled live interval and undo each one, restoring the state of + // unhandled. + while (!handled_.empty()) { + LiveInterval* i = handled_.back(); + // If this interval starts before t we are done. + if (i->beginNumber() < earliestStart) + break; + DOUT << "\t\t\tundo changes for: " << *i << '\n'; + handled_.pop_back(); + + // When undoing a live interval allocation we must know if it is active or + // inactive to properly update regUse_ and the VirtRegMap. + IntervalPtrs::iterator it; + if ((it = FindIntervalInVector(active_, i)) != active_.end()) { + active_.erase(it); + assert(!TargetRegisterInfo::isPhysicalRegister(i->reg)); + if (!spilled.count(i->reg)) + unhandled_.push(i); + delRegUse(vrm_->getPhys(i->reg)); + vrm_->clearVirt(i->reg); + } else if ((it = FindIntervalInVector(inactive_, i)) != inactive_.end()) { + inactive_.erase(it); + assert(!TargetRegisterInfo::isPhysicalRegister(i->reg)); + if (!spilled.count(i->reg)) + unhandled_.push(i); + vrm_->clearVirt(i->reg); + } else { + assert(TargetRegisterInfo::isVirtualRegister(i->reg) && + "Can only allocate virtual registers!"); + vrm_->clearVirt(i->reg); + unhandled_.push(i); + } + + DenseMap::iterator ii = DowngradeMap.find(i->reg); + if (ii == DowngradeMap.end()) + // It interval has a preference, it must be defined by a copy. Clear the + // preference now since the source interval allocation may have been + // undone as well. + i->preference = 0; + else { + UpgradeRegister(ii->second); + } + } + + // Rewind the iterators in the active, inactive, and fixed lists back to the + // point we reverted to. + RevertVectorIteratorsTo(active_, earliestStart); + RevertVectorIteratorsTo(inactive_, earliestStart); + RevertVectorIteratorsTo(fixed_, earliestStart); + + // Scan the rest and undo each interval that expired after t and + // insert it in active (the next iteration of the algorithm will + // put it in inactive if required) + for (unsigned i = 0, e = handled_.size(); i != e; ++i) { + LiveInterval *HI = handled_[i]; + if (!HI->expiredAt(earliestStart) && + HI->expiredAt(cur->beginNumber())) { + DOUT << "\t\t\tundo changes for: " << *HI << '\n'; + active_.push_back(std::make_pair(HI, HI->begin())); + assert(!TargetRegisterInfo::isPhysicalRegister(HI->reg)); + addRegUse(vrm_->getPhys(HI->reg)); + } + } + + // Merge added with unhandled. + // This also update the NextReloadMap. That is, it adds mapping from a + // register defined by a reload from SS to the next reload from SS in the + // same basic block. + MachineBasicBlock *LastReloadMBB = 0; + LiveInterval *LastReload = 0; + int LastReloadSS = VirtRegMap::NO_STACK_SLOT; + std::sort(added.begin(), added.end(), LISorter()); + for (unsigned i = 0, e = added.size(); i != e; ++i) { + LiveInterval *ReloadLi = added[i]; + if (ReloadLi->weight == HUGE_VALF && + li_->getApproximateInstructionCount(*ReloadLi) == 0) { + unsigned ReloadIdx = ReloadLi->beginNumber(); + MachineBasicBlock *ReloadMBB = li_->getMBBFromIndex(ReloadIdx); + int ReloadSS = vrm_->getStackSlot(ReloadLi->reg); + if (LastReloadMBB == ReloadMBB && LastReloadSS == ReloadSS) { + // Last reload of same SS is in the same MBB. We want to try to + // allocate both reloads the same register and make sure the reg + // isn't clobbered in between if at all possible. + assert(LastReload->beginNumber() < ReloadIdx); + NextReloadMap.insert(std::make_pair(LastReload->reg, ReloadLi->reg)); + } + LastReloadMBB = ReloadMBB; + LastReload = ReloadLi; + LastReloadSS = ReloadSS; + } + unhandled_.push(ReloadLi); + } +} + +unsigned RALinScan::getFreePhysReg(const TargetRegisterClass *RC, + unsigned MaxInactiveCount, + SmallVector &inactiveCounts, + bool SkipDGRegs) { + unsigned FreeReg = 0; + unsigned FreeRegInactiveCount = 0; + + TargetRegisterClass::iterator I = RC->allocation_order_begin(*mf_); + TargetRegisterClass::iterator E = RC->allocation_order_end(*mf_); + assert(I != E && "No allocatable register in this register class!"); + + // Scan for the first available register. + for (; I != E; ++I) { + unsigned Reg = *I; + // Ignore "downgraded" registers. + if (SkipDGRegs && DowngradedRegs.count(Reg)) + continue; + if (isRegAvail(Reg)) { + FreeReg = Reg; + if (FreeReg < inactiveCounts.size()) + FreeRegInactiveCount = inactiveCounts[FreeReg]; + else + FreeRegInactiveCount = 0; + break; + } + } + + // If there are no free regs, or if this reg has the max inactive count, + // return this register. + if (FreeReg == 0 || FreeRegInactiveCount == MaxInactiveCount) + return FreeReg; + + // Continue scanning the registers, looking for the one with the highest + // inactive count. Alkis found that this reduced register pressure very + // slightly on X86 (in rev 1.94 of this file), though this should probably be + // reevaluated now. + for (; I != E; ++I) { + unsigned Reg = *I; + // Ignore "downgraded" registers. + if (SkipDGRegs && DowngradedRegs.count(Reg)) + continue; + if (isRegAvail(Reg) && Reg < inactiveCounts.size() && + FreeRegInactiveCount < inactiveCounts[Reg]) { + FreeReg = Reg; + FreeRegInactiveCount = inactiveCounts[Reg]; + if (FreeRegInactiveCount == MaxInactiveCount) + break; // We found the one with the max inactive count. + } + } + + return FreeReg; +} + +/// getFreePhysReg - return a free physical register for this virtual register +/// interval if we have one, otherwise return 0. +unsigned RALinScan::getFreePhysReg(LiveInterval *cur) { + SmallVector inactiveCounts; + unsigned MaxInactiveCount = 0; + + const TargetRegisterClass *RC = mri_->getRegClass(cur->reg); + const TargetRegisterClass *RCLeader = RelatedRegClasses.getLeaderValue(RC); + + for (IntervalPtrs::iterator i = inactive_.begin(), e = inactive_.end(); + i != e; ++i) { + unsigned reg = i->first->reg; + assert(TargetRegisterInfo::isVirtualRegister(reg) && + "Can only allocate virtual registers!"); + + // If this is not in a related reg class to the register we're allocating, + // don't check it. + const TargetRegisterClass *RegRC = mri_->getRegClass(reg); + if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader) { + reg = vrm_->getPhys(reg); + if (inactiveCounts.size() <= reg) + inactiveCounts.resize(reg+1); + ++inactiveCounts[reg]; + MaxInactiveCount = std::max(MaxInactiveCount, inactiveCounts[reg]); + } + } + + // If copy coalescer has assigned a "preferred" register, check if it's + // available first. + if (cur->preference) { + DOUT << "(preferred: " << tri_->getName(cur->preference) << ") "; + if (isRegAvail(cur->preference) && + RC->contains(cur->preference)) + return cur->preference; + } + + if (!DowngradedRegs.empty()) { + unsigned FreeReg = getFreePhysReg(RC, MaxInactiveCount, inactiveCounts, + true); + if (FreeReg) + return FreeReg; + } + return getFreePhysReg(RC, MaxInactiveCount, inactiveCounts, false); +} + +FunctionPass* llvm::createLinearScanRegisterAllocator() { + return new RALinScan(); +} diff --git a/lib/CodeGen/RegAllocLocal.cpp b/lib/CodeGen/RegAllocLocal.cpp new file mode 100644 index 000000000000..e1cc20cf4fb1 --- /dev/null +++ b/lib/CodeGen/RegAllocLocal.cpp @@ -0,0 +1,1068 @@ +//===-- RegAllocLocal.cpp - A BasicBlock generic register allocator -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This register allocator allocates registers to a basic block at a time, +// attempting to keep values in registers and reusing registers as appropriate. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "regalloc" +#include "llvm/BasicBlock.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/RegAllocRegistry.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Compiler.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/IndexedMap.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +#include +using namespace llvm; + +STATISTIC(NumStores, "Number of stores added"); +STATISTIC(NumLoads , "Number of loads added"); + +static RegisterRegAlloc + localRegAlloc("local", "local register allocator", + createLocalRegisterAllocator); + +namespace { + class VISIBILITY_HIDDEN RALocal : public MachineFunctionPass { + public: + static char ID; + RALocal() : MachineFunctionPass(&ID), StackSlotForVirtReg(-1) {} + private: + const TargetMachine *TM; + MachineFunction *MF; + const TargetRegisterInfo *TRI; + const TargetInstrInfo *TII; + + // StackSlotForVirtReg - Maps virtual regs to the frame index where these + // values are spilled. + IndexedMap StackSlotForVirtReg; + + // Virt2PhysRegMap - This map contains entries for each virtual register + // that is currently available in a physical register. + IndexedMap Virt2PhysRegMap; + + unsigned &getVirt2PhysRegMapSlot(unsigned VirtReg) { + return Virt2PhysRegMap[VirtReg]; + } + + // PhysRegsUsed - This array is effectively a map, containing entries for + // each physical register that currently has a value (ie, it is in + // Virt2PhysRegMap). The value mapped to is the virtual register + // corresponding to the physical register (the inverse of the + // Virt2PhysRegMap), or 0. The value is set to 0 if this register is pinned + // because it is used by a future instruction, and to -2 if it is not + // allocatable. If the entry for a physical register is -1, then the + // physical register is "not in the map". + // + std::vector PhysRegsUsed; + + // PhysRegsUseOrder - This contains a list of the physical registers that + // currently have a virtual register value in them. This list provides an + // ordering of registers, imposing a reallocation order. This list is only + // used if all registers are allocated and we have to spill one, in which + // case we spill the least recently used register. Entries at the front of + // the list are the least recently used registers, entries at the back are + // the most recently used. + // + std::vector PhysRegsUseOrder; + + // Virt2LastUseMap - This maps each virtual register to its last use + // (MachineInstr*, operand index pair). + IndexedMap, VirtReg2IndexFunctor> + Virt2LastUseMap; + + std::pair& getVirtRegLastUse(unsigned Reg) { + assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!"); + return Virt2LastUseMap[Reg]; + } + + // VirtRegModified - This bitset contains information about which virtual + // registers need to be spilled back to memory when their registers are + // scavenged. If a virtual register has simply been rematerialized, there + // is no reason to spill it to memory when we need the register back. + // + BitVector VirtRegModified; + + // UsedInMultipleBlocks - Tracks whether a particular register is used in + // more than one block. + BitVector UsedInMultipleBlocks; + + void markVirtRegModified(unsigned Reg, bool Val = true) { + assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!"); + Reg -= TargetRegisterInfo::FirstVirtualRegister; + if (Val) + VirtRegModified.set(Reg); + else + VirtRegModified.reset(Reg); + } + + bool isVirtRegModified(unsigned Reg) const { + assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!"); + assert(Reg - TargetRegisterInfo::FirstVirtualRegister < VirtRegModified.size() + && "Illegal virtual register!"); + return VirtRegModified[Reg - TargetRegisterInfo::FirstVirtualRegister]; + } + + void AddToPhysRegsUseOrder(unsigned Reg) { + std::vector::iterator It = + std::find(PhysRegsUseOrder.begin(), PhysRegsUseOrder.end(), Reg); + if (It != PhysRegsUseOrder.end()) + PhysRegsUseOrder.erase(It); + PhysRegsUseOrder.push_back(Reg); + } + + void MarkPhysRegRecentlyUsed(unsigned Reg) { + if (PhysRegsUseOrder.empty() || + PhysRegsUseOrder.back() == Reg) return; // Already most recently used + + for (unsigned i = PhysRegsUseOrder.size(); i != 0; --i) + if (areRegsEqual(Reg, PhysRegsUseOrder[i-1])) { + unsigned RegMatch = PhysRegsUseOrder[i-1]; // remove from middle + PhysRegsUseOrder.erase(PhysRegsUseOrder.begin()+i-1); + // Add it to the end of the list + PhysRegsUseOrder.push_back(RegMatch); + if (RegMatch == Reg) + return; // Found an exact match, exit early + } + } + + public: + virtual const char *getPassName() const { + return "Local Register Allocator"; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequiredID(PHIEliminationID); + AU.addRequiredID(TwoAddressInstructionPassID); + MachineFunctionPass::getAnalysisUsage(AU); + } + + private: + /// runOnMachineFunction - Register allocate the whole function + bool runOnMachineFunction(MachineFunction &Fn); + + /// AllocateBasicBlock - Register allocate the specified basic block. + void AllocateBasicBlock(MachineBasicBlock &MBB); + + + /// areRegsEqual - This method returns true if the specified registers are + /// related to each other. To do this, it checks to see if they are equal + /// or if the first register is in the alias set of the second register. + /// + bool areRegsEqual(unsigned R1, unsigned R2) const { + if (R1 == R2) return true; + for (const unsigned *AliasSet = TRI->getAliasSet(R2); + *AliasSet; ++AliasSet) { + if (*AliasSet == R1) return true; + } + return false; + } + + /// getStackSpaceFor - This returns the frame index of the specified virtual + /// register on the stack, allocating space if necessary. + int getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC); + + /// removePhysReg - This method marks the specified physical register as no + /// longer being in use. + /// + void removePhysReg(unsigned PhysReg); + + /// spillVirtReg - This method spills the value specified by PhysReg into + /// the virtual register slot specified by VirtReg. It then updates the RA + /// data structures to indicate the fact that PhysReg is now available. + /// + void spillVirtReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + unsigned VirtReg, unsigned PhysReg); + + /// spillPhysReg - This method spills the specified physical register into + /// the virtual register slot associated with it. If OnlyVirtRegs is set to + /// true, then the request is ignored if the physical register does not + /// contain a virtual register. + /// + void spillPhysReg(MachineBasicBlock &MBB, MachineInstr *I, + unsigned PhysReg, bool OnlyVirtRegs = false); + + /// assignVirtToPhysReg - This method updates local state so that we know + /// that PhysReg is the proper container for VirtReg now. The physical + /// register must not be used for anything else when this is called. + /// + void assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg); + + /// isPhysRegAvailable - Return true if the specified physical register is + /// free and available for use. This also includes checking to see if + /// aliased registers are all free... + /// + bool isPhysRegAvailable(unsigned PhysReg) const; + + /// getFreeReg - Look to see if there is a free register available in the + /// specified register class. If not, return 0. + /// + unsigned getFreeReg(const TargetRegisterClass *RC); + + /// getReg - Find a physical register to hold the specified virtual + /// register. If all compatible physical registers are used, this method + /// spills the last used virtual register to the stack, and uses that + /// register. If NoFree is true, that means the caller knows there isn't + /// a free register, do not call getFreeReg(). + unsigned getReg(MachineBasicBlock &MBB, MachineInstr *MI, + unsigned VirtReg, bool NoFree = false); + + /// reloadVirtReg - This method transforms the specified virtual + /// register use to refer to a physical register. This method may do this + /// in one of several ways: if the register is available in a physical + /// register already, it uses that physical register. If the value is not + /// in a physical register, and if there are physical registers available, + /// it loads it into a register. If register pressure is high, and it is + /// possible, it tries to fold the load of the virtual register into the + /// instruction itself. It avoids doing this if register pressure is low to + /// improve the chance that subsequent instructions can use the reloaded + /// value. This method returns the modified instruction. + /// + MachineInstr *reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI, + unsigned OpNum, SmallSet &RRegs); + + /// ComputeLocalLiveness - Computes liveness of registers within a basic + /// block, setting the killed/dead flags as appropriate. + void ComputeLocalLiveness(MachineBasicBlock& MBB); + + void reloadPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I, + unsigned PhysReg); + }; + char RALocal::ID = 0; +} + +/// getStackSpaceFor - This allocates space for the specified virtual register +/// to be held on the stack. +int RALocal::getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC) { + // Find the location Reg would belong... + int SS = StackSlotForVirtReg[VirtReg]; + if (SS != -1) + return SS; // Already has space allocated? + + // Allocate a new stack object for this spill location... + int FrameIdx = MF->getFrameInfo()->CreateStackObject(RC->getSize(), + RC->getAlignment()); + + // Assign the slot... + StackSlotForVirtReg[VirtReg] = FrameIdx; + return FrameIdx; +} + + +/// removePhysReg - This method marks the specified physical register as no +/// longer being in use. +/// +void RALocal::removePhysReg(unsigned PhysReg) { + PhysRegsUsed[PhysReg] = -1; // PhyReg no longer used + + std::vector::iterator It = + std::find(PhysRegsUseOrder.begin(), PhysRegsUseOrder.end(), PhysReg); + if (It != PhysRegsUseOrder.end()) + PhysRegsUseOrder.erase(It); +} + + +/// spillVirtReg - This method spills the value specified by PhysReg into the +/// virtual register slot specified by VirtReg. It then updates the RA data +/// structures to indicate the fact that PhysReg is now available. +/// +void RALocal::spillVirtReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned VirtReg, unsigned PhysReg) { + assert(VirtReg && "Spilling a physical register is illegal!" + " Must not have appropriate kill for the register or use exists beyond" + " the intended one."); + DOUT << " Spilling register " << TRI->getName(PhysReg) + << " containing %reg" << VirtReg; + + if (!isVirtRegModified(VirtReg)) { + DOUT << " which has not been modified, so no store necessary!"; + std::pair &LastUse = getVirtRegLastUse(VirtReg); + if (LastUse.first) + LastUse.first->getOperand(LastUse.second).setIsKill(); + } else { + // Otherwise, there is a virtual register corresponding to this physical + // register. We only need to spill it into its stack slot if it has been + // modified. + const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg); + int FrameIndex = getStackSpaceFor(VirtReg, RC); + DOUT << " to stack slot #" << FrameIndex; + // If the instruction reads the register that's spilled, (e.g. this can + // happen if it is a move to a physical register), then the spill + // instruction is not a kill. + bool isKill = !(I != MBB.end() && I->readsRegister(PhysReg)); + TII->storeRegToStackSlot(MBB, I, PhysReg, isKill, FrameIndex, RC); + ++NumStores; // Update statistics + } + + getVirt2PhysRegMapSlot(VirtReg) = 0; // VirtReg no longer available + + DOUT << "\n"; + removePhysReg(PhysReg); +} + + +/// spillPhysReg - This method spills the specified physical register into the +/// virtual register slot associated with it. If OnlyVirtRegs is set to true, +/// then the request is ignored if the physical register does not contain a +/// virtual register. +/// +void RALocal::spillPhysReg(MachineBasicBlock &MBB, MachineInstr *I, + unsigned PhysReg, bool OnlyVirtRegs) { + if (PhysRegsUsed[PhysReg] != -1) { // Only spill it if it's used! + assert(PhysRegsUsed[PhysReg] != -2 && "Non allocable reg used!"); + if (PhysRegsUsed[PhysReg] || !OnlyVirtRegs) + spillVirtReg(MBB, I, PhysRegsUsed[PhysReg], PhysReg); + } else { + // If the selected register aliases any other registers, we must make + // sure that one of the aliases isn't alive. + for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg); + *AliasSet; ++AliasSet) + if (PhysRegsUsed[*AliasSet] != -1 && // Spill aliased register. + PhysRegsUsed[*AliasSet] != -2) // If allocatable. + if (PhysRegsUsed[*AliasSet]) + spillVirtReg(MBB, I, PhysRegsUsed[*AliasSet], *AliasSet); + } +} + + +/// assignVirtToPhysReg - This method updates local state so that we know +/// that PhysReg is the proper container for VirtReg now. The physical +/// register must not be used for anything else when this is called. +/// +void RALocal::assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg) { + assert(PhysRegsUsed[PhysReg] == -1 && "Phys reg already assigned!"); + // Update information to note the fact that this register was just used, and + // it holds VirtReg. + PhysRegsUsed[PhysReg] = VirtReg; + getVirt2PhysRegMapSlot(VirtReg) = PhysReg; + AddToPhysRegsUseOrder(PhysReg); // New use of PhysReg +} + + +/// isPhysRegAvailable - Return true if the specified physical register is free +/// and available for use. This also includes checking to see if aliased +/// registers are all free... +/// +bool RALocal::isPhysRegAvailable(unsigned PhysReg) const { + if (PhysRegsUsed[PhysReg] != -1) return false; + + // If the selected register aliases any other allocated registers, it is + // not free! + for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg); + *AliasSet; ++AliasSet) + if (PhysRegsUsed[*AliasSet] >= 0) // Aliased register in use? + return false; // Can't use this reg then. + return true; +} + + +/// getFreeReg - Look to see if there is a free register available in the +/// specified register class. If not, return 0. +/// +unsigned RALocal::getFreeReg(const TargetRegisterClass *RC) { + // Get iterators defining the range of registers that are valid to allocate in + // this class, which also specifies the preferred allocation order. + TargetRegisterClass::iterator RI = RC->allocation_order_begin(*MF); + TargetRegisterClass::iterator RE = RC->allocation_order_end(*MF); + + for (; RI != RE; ++RI) + if (isPhysRegAvailable(*RI)) { // Is reg unused? + assert(*RI != 0 && "Cannot use register!"); + return *RI; // Found an unused register! + } + return 0; +} + + +/// getReg - Find a physical register to hold the specified virtual +/// register. If all compatible physical registers are used, this method spills +/// the last used virtual register to the stack, and uses that register. +/// +unsigned RALocal::getReg(MachineBasicBlock &MBB, MachineInstr *I, + unsigned VirtReg, bool NoFree) { + const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg); + + // First check to see if we have a free register of the requested type... + unsigned PhysReg = NoFree ? 0 : getFreeReg(RC); + + // If we didn't find an unused register, scavenge one now! + if (PhysReg == 0) { + assert(!PhysRegsUseOrder.empty() && "No allocated registers??"); + + // Loop over all of the preallocated registers from the least recently used + // to the most recently used. When we find one that is capable of holding + // our register, use it. + for (unsigned i = 0; PhysReg == 0; ++i) { + assert(i != PhysRegsUseOrder.size() && + "Couldn't find a register of the appropriate class!"); + + unsigned R = PhysRegsUseOrder[i]; + + // We can only use this register if it holds a virtual register (ie, it + // can be spilled). Do not use it if it is an explicitly allocated + // physical register! + assert(PhysRegsUsed[R] != -1 && + "PhysReg in PhysRegsUseOrder, but is not allocated?"); + if (PhysRegsUsed[R] && PhysRegsUsed[R] != -2) { + // If the current register is compatible, use it. + if (RC->contains(R)) { + PhysReg = R; + break; + } else { + // If one of the registers aliased to the current register is + // compatible, use it. + for (const unsigned *AliasIt = TRI->getAliasSet(R); + *AliasIt; ++AliasIt) { + if (RC->contains(*AliasIt) && + // If this is pinned down for some reason, don't use it. For + // example, if CL is pinned, and we run across CH, don't use + // CH as justification for using scavenging ECX (which will + // fail). + PhysRegsUsed[*AliasIt] != 0 && + + // Make sure the register is allocatable. Don't allocate SIL on + // x86-32. + PhysRegsUsed[*AliasIt] != -2) { + PhysReg = *AliasIt; // Take an aliased register + break; + } + } + } + } + } + + assert(PhysReg && "Physical register not assigned!?!?"); + + // At this point PhysRegsUseOrder[i] is the least recently used register of + // compatible register class. Spill it to memory and reap its remains. + spillPhysReg(MBB, I, PhysReg); + } + + // Now that we know which register we need to assign this to, do it now! + assignVirtToPhysReg(VirtReg, PhysReg); + return PhysReg; +} + + +/// reloadVirtReg - This method transforms the specified virtual +/// register use to refer to a physical register. This method may do this in +/// one of several ways: if the register is available in a physical register +/// already, it uses that physical register. If the value is not in a physical +/// register, and if there are physical registers available, it loads it into a +/// register. If register pressure is high, and it is possible, it tries to +/// fold the load of the virtual register into the instruction itself. It +/// avoids doing this if register pressure is low to improve the chance that +/// subsequent instructions can use the reloaded value. This method returns the +/// modified instruction. +/// +MachineInstr *RALocal::reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI, + unsigned OpNum, + SmallSet &ReloadedRegs) { + unsigned VirtReg = MI->getOperand(OpNum).getReg(); + + // If the virtual register is already available, just update the instruction + // and return. + if (unsigned PR = getVirt2PhysRegMapSlot(VirtReg)) { + MarkPhysRegRecentlyUsed(PR); // Already have this value available! + MI->getOperand(OpNum).setReg(PR); // Assign the input register + getVirtRegLastUse(VirtReg) = std::make_pair(MI, OpNum); + return MI; + } + + // Otherwise, we need to fold it into the current instruction, or reload it. + // If we have registers available to hold the value, use them. + const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg); + unsigned PhysReg = getFreeReg(RC); + int FrameIndex = getStackSpaceFor(VirtReg, RC); + + if (PhysReg) { // Register is available, allocate it! + assignVirtToPhysReg(VirtReg, PhysReg); + } else { // No registers available. + // Force some poor hapless value out of the register file to + // make room for the new register, and reload it. + PhysReg = getReg(MBB, MI, VirtReg, true); + } + + markVirtRegModified(VirtReg, false); // Note that this reg was just reloaded + + DOUT << " Reloading %reg" << VirtReg << " into " + << TRI->getName(PhysReg) << "\n"; + + // Add move instruction(s) + TII->loadRegFromStackSlot(MBB, MI, PhysReg, FrameIndex, RC); + ++NumLoads; // Update statistics + + MF->getRegInfo().setPhysRegUsed(PhysReg); + MI->getOperand(OpNum).setReg(PhysReg); // Assign the input register + getVirtRegLastUse(VirtReg) = std::make_pair(MI, OpNum); + + if (!ReloadedRegs.insert(PhysReg)) { + cerr << "Ran out of registers during register allocation!\n"; + if (MI->getOpcode() == TargetInstrInfo::INLINEASM) { + cerr << "Please check your inline asm statement for invalid " + << "constraints:\n"; + MI->print(cerr.stream(), TM); + } + exit(1); + } + for (const unsigned *SubRegs = TRI->getSubRegisters(PhysReg); + *SubRegs; ++SubRegs) { + if (!ReloadedRegs.insert(*SubRegs)) { + cerr << "Ran out of registers during register allocation!\n"; + if (MI->getOpcode() == TargetInstrInfo::INLINEASM) { + cerr << "Please check your inline asm statement for invalid " + << "constraints:\n"; + MI->print(cerr.stream(), TM); + } + exit(1); + } + } + + return MI; +} + +/// isReadModWriteImplicitKill - True if this is an implicit kill for a +/// read/mod/write register, i.e. update partial register. +static bool isReadModWriteImplicitKill(MachineInstr *MI, unsigned Reg) { + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand& MO = MI->getOperand(i); + if (MO.isReg() && MO.getReg() == Reg && MO.isImplicit() && + MO.isDef() && !MO.isDead()) + return true; + } + return false; +} + +/// isReadModWriteImplicitDef - True if this is an implicit def for a +/// read/mod/write register, i.e. update partial register. +static bool isReadModWriteImplicitDef(MachineInstr *MI, unsigned Reg) { + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand& MO = MI->getOperand(i); + if (MO.isReg() && MO.getReg() == Reg && MO.isImplicit() && + !MO.isDef() && MO.isKill()) + return true; + } + return false; +} + +// precedes - Helper function to determine with MachineInstr A +// precedes MachineInstr B within the same MBB. +static bool precedes(MachineBasicBlock::iterator A, + MachineBasicBlock::iterator B) { + if (A == B) + return false; + + MachineBasicBlock::iterator I = A->getParent()->begin(); + while (I != A->getParent()->end()) { + if (I == A) + return true; + else if (I == B) + return false; + + ++I; + } + + return false; +} + +/// ComputeLocalLiveness - Computes liveness of registers within a basic +/// block, setting the killed/dead flags as appropriate. +void RALocal::ComputeLocalLiveness(MachineBasicBlock& MBB) { + MachineRegisterInfo& MRI = MBB.getParent()->getRegInfo(); + // Keep track of the most recently seen previous use or def of each reg, + // so that we can update them with dead/kill markers. + DenseMap > LastUseDef; + for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); + I != E; ++I) { + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { + MachineOperand& MO = I->getOperand(i); + // Uses don't trigger any flags, but we need to save + // them for later. Also, we have to process these + // _before_ processing the defs, since an instr + // uses regs before it defs them. + if (MO.isReg() && MO.getReg() && MO.isUse()) { + LastUseDef[MO.getReg()] = std::make_pair(I, i); + + + if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) continue; + + const unsigned* Aliases = TRI->getAliasSet(MO.getReg()); + if (Aliases) { + while (*Aliases) { + DenseMap >::iterator + alias = LastUseDef.find(*Aliases); + + if (alias != LastUseDef.end() && alias->second.first != I) + LastUseDef[*Aliases] = std::make_pair(I, i); + + ++Aliases; + } + } + } + } + + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { + MachineOperand& MO = I->getOperand(i); + // Defs others than 2-addr redefs _do_ trigger flag changes: + // - A def followed by a def is dead + // - A use followed by a def is a kill + if (MO.isReg() && MO.getReg() && MO.isDef()) { + DenseMap >::iterator + last = LastUseDef.find(MO.getReg()); + if (last != LastUseDef.end()) { + // Check if this is a two address instruction. If so, then + // the def does not kill the use. + if (last->second.first == I && + I->isRegTiedToUseOperand(i)) + continue; + + MachineOperand& lastUD = + last->second.first->getOperand(last->second.second); + if (lastUD.isDef()) + lastUD.setIsDead(true); + else + lastUD.setIsKill(true); + } + + LastUseDef[MO.getReg()] = std::make_pair(I, i); + } + } + } + + // Live-out (of the function) registers contain return values of the function, + // so we need to make sure they are alive at return time. + if (!MBB.empty() && MBB.back().getDesc().isReturn()) { + MachineInstr* Ret = &MBB.back(); + for (MachineRegisterInfo::liveout_iterator + I = MF->getRegInfo().liveout_begin(), + E = MF->getRegInfo().liveout_end(); I != E; ++I) + if (!Ret->readsRegister(*I)) { + Ret->addOperand(MachineOperand::CreateReg(*I, false, true)); + LastUseDef[*I] = std::make_pair(Ret, Ret->getNumOperands()-1); + } + } + + // Finally, loop over the final use/def of each reg + // in the block and determine if it is dead. + for (DenseMap >::iterator + I = LastUseDef.begin(), E = LastUseDef.end(); I != E; ++I) { + MachineInstr* MI = I->second.first; + unsigned idx = I->second.second; + MachineOperand& MO = MI->getOperand(idx); + + bool isPhysReg = TargetRegisterInfo::isPhysicalRegister(MO.getReg()); + + // A crude approximation of "live-out" calculation + bool usedOutsideBlock = isPhysReg ? false : + UsedInMultipleBlocks.test(MO.getReg() - + TargetRegisterInfo::FirstVirtualRegister); + if (!isPhysReg && !usedOutsideBlock) + for (MachineRegisterInfo::reg_iterator UI = MRI.reg_begin(MO.getReg()), + UE = MRI.reg_end(); UI != UE; ++UI) + // Two cases: + // - used in another block + // - used in the same block before it is defined (loop) + if (UI->getParent() != &MBB || + (MO.isDef() && UI.getOperand().isUse() && precedes(&*UI, MI))) { + UsedInMultipleBlocks.set(MO.getReg() - + TargetRegisterInfo::FirstVirtualRegister); + usedOutsideBlock = true; + break; + } + + // Physical registers and those that are not live-out of the block + // are killed/dead at their last use/def within this block. + if (isPhysReg || !usedOutsideBlock) { + if (MO.isUse()) { + // Don't mark uses that are tied to defs as kills. + if (!MI->isRegTiedToDefOperand(idx)) + MO.setIsKill(true); + } else + MO.setIsDead(true); + } + } +} + +void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) { + // loop over each instruction + MachineBasicBlock::iterator MII = MBB.begin(); + + DEBUG(const BasicBlock *LBB = MBB.getBasicBlock(); + if (LBB) DOUT << "\nStarting RegAlloc of BB: " << LBB->getName()); + + // Add live-in registers as active. + for (MachineBasicBlock::livein_iterator I = MBB.livein_begin(), + E = MBB.livein_end(); I != E; ++I) { + unsigned Reg = *I; + MF->getRegInfo().setPhysRegUsed(Reg); + PhysRegsUsed[Reg] = 0; // It is free and reserved now + AddToPhysRegsUseOrder(Reg); + for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + *SubRegs; ++SubRegs) { + if (PhysRegsUsed[*SubRegs] != -2) { + AddToPhysRegsUseOrder(*SubRegs); + PhysRegsUsed[*SubRegs] = 0; // It is free and reserved now + MF->getRegInfo().setPhysRegUsed(*SubRegs); + } + } + } + + ComputeLocalLiveness(MBB); + + // Otherwise, sequentially allocate each instruction in the MBB. + while (MII != MBB.end()) { + MachineInstr *MI = MII++; + const TargetInstrDesc &TID = MI->getDesc(); + DEBUG(DOUT << "\nStarting RegAlloc of: " << *MI; + DOUT << " Regs have values: "; + for (unsigned i = 0; i != TRI->getNumRegs(); ++i) + if (PhysRegsUsed[i] != -1 && PhysRegsUsed[i] != -2) + DOUT << "[" << TRI->getName(i) + << ",%reg" << PhysRegsUsed[i] << "] "; + DOUT << "\n"); + + // Loop over the implicit uses, making sure that they are at the head of the + // use order list, so they don't get reallocated. + if (TID.ImplicitUses) { + for (const unsigned *ImplicitUses = TID.ImplicitUses; + *ImplicitUses; ++ImplicitUses) + MarkPhysRegRecentlyUsed(*ImplicitUses); + } + + SmallVector Kills; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand& MO = MI->getOperand(i); + if (MO.isReg() && MO.isKill()) { + if (!MO.isImplicit()) + Kills.push_back(MO.getReg()); + else if (!isReadModWriteImplicitKill(MI, MO.getReg())) + // These are extra physical register kills when a sub-register + // is defined (def of a sub-register is a read/mod/write of the + // larger registers). Ignore. + Kills.push_back(MO.getReg()); + } + } + + // If any physical regs are earlyclobber, spill any value they might + // have in them, then mark them unallocatable. + // If any virtual regs are earlyclobber, allocate them now (before + // freeing inputs that are killed). + if (MI->getOpcode()==TargetInstrInfo::INLINEASM) { + for (unsigned i = 0; i != MI->getNumOperands(); ++i) { + MachineOperand& MO = MI->getOperand(i); + if (MO.isReg() && MO.isDef() && MO.isEarlyClobber() && + MO.getReg()) { + if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) { + unsigned DestVirtReg = MO.getReg(); + unsigned DestPhysReg; + + // If DestVirtReg already has a value, use it. + if (!(DestPhysReg = getVirt2PhysRegMapSlot(DestVirtReg))) + DestPhysReg = getReg(MBB, MI, DestVirtReg); + MF->getRegInfo().setPhysRegUsed(DestPhysReg); + markVirtRegModified(DestVirtReg); + getVirtRegLastUse(DestVirtReg) = + std::make_pair((MachineInstr*)0, 0); + DOUT << " Assigning " << TRI->getName(DestPhysReg) + << " to %reg" << DestVirtReg << "\n"; + MO.setReg(DestPhysReg); // Assign the earlyclobber register + } else { + unsigned Reg = MO.getReg(); + if (PhysRegsUsed[Reg] == -2) continue; // Something like ESP. + // These are extra physical register defs when a sub-register + // is defined (def of a sub-register is a read/mod/write of the + // larger registers). Ignore. + if (isReadModWriteImplicitDef(MI, MO.getReg())) continue; + + MF->getRegInfo().setPhysRegUsed(Reg); + spillPhysReg(MBB, MI, Reg, true); // Spill any existing value in reg + PhysRegsUsed[Reg] = 0; // It is free and reserved now + AddToPhysRegsUseOrder(Reg); + + for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + *SubRegs; ++SubRegs) { + if (PhysRegsUsed[*SubRegs] != -2) { + MF->getRegInfo().setPhysRegUsed(*SubRegs); + PhysRegsUsed[*SubRegs] = 0; // It is free and reserved now + AddToPhysRegsUseOrder(*SubRegs); + } + } + } + } + } + } + + // Get the used operands into registers. This has the potential to spill + // incoming values if we are out of registers. Note that we completely + // ignore physical register uses here. We assume that if an explicit + // physical register is referenced by the instruction, that it is guaranteed + // to be live-in, or the input is badly hosed. + // + SmallSet ReloadedRegs; + for (unsigned i = 0; i != MI->getNumOperands(); ++i) { + MachineOperand& MO = MI->getOperand(i); + // here we are looking for only used operands (never def&use) + if (MO.isReg() && !MO.isDef() && MO.getReg() && !MO.isImplicit() && + TargetRegisterInfo::isVirtualRegister(MO.getReg())) + MI = reloadVirtReg(MBB, MI, i, ReloadedRegs); + } + + // If this instruction is the last user of this register, kill the + // value, freeing the register being used, so it doesn't need to be + // spilled to memory. + // + for (unsigned i = 0, e = Kills.size(); i != e; ++i) { + unsigned VirtReg = Kills[i]; + unsigned PhysReg = VirtReg; + if (TargetRegisterInfo::isVirtualRegister(VirtReg)) { + // If the virtual register was never materialized into a register, it + // might not be in the map, but it won't hurt to zero it out anyway. + unsigned &PhysRegSlot = getVirt2PhysRegMapSlot(VirtReg); + PhysReg = PhysRegSlot; + PhysRegSlot = 0; + } else if (PhysRegsUsed[PhysReg] == -2) { + // Unallocatable register dead, ignore. + continue; + } else { + assert((!PhysRegsUsed[PhysReg] || PhysRegsUsed[PhysReg] == -1) && + "Silently clearing a virtual register?"); + } + + if (PhysReg) { + DOUT << " Last use of " << TRI->getName(PhysReg) + << "[%reg" << VirtReg <<"], removing it from live set\n"; + removePhysReg(PhysReg); + for (const unsigned *SubRegs = TRI->getSubRegisters(PhysReg); + *SubRegs; ++SubRegs) { + if (PhysRegsUsed[*SubRegs] != -2) { + DOUT << " Last use of " + << TRI->getName(*SubRegs) + << "[%reg" << VirtReg <<"], removing it from live set\n"; + removePhysReg(*SubRegs); + } + } + } + } + + // Loop over all of the operands of the instruction, spilling registers that + // are defined, and marking explicit destinations in the PhysRegsUsed map. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand& MO = MI->getOperand(i); + if (MO.isReg() && MO.isDef() && !MO.isImplicit() && MO.getReg() && + !MO.isEarlyClobber() && + TargetRegisterInfo::isPhysicalRegister(MO.getReg())) { + unsigned Reg = MO.getReg(); + if (PhysRegsUsed[Reg] == -2) continue; // Something like ESP. + // These are extra physical register defs when a sub-register + // is defined (def of a sub-register is a read/mod/write of the + // larger registers). Ignore. + if (isReadModWriteImplicitDef(MI, MO.getReg())) continue; + + MF->getRegInfo().setPhysRegUsed(Reg); + spillPhysReg(MBB, MI, Reg, true); // Spill any existing value in reg + PhysRegsUsed[Reg] = 0; // It is free and reserved now + AddToPhysRegsUseOrder(Reg); + + for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + *SubRegs; ++SubRegs) { + if (PhysRegsUsed[*SubRegs] != -2) { + MF->getRegInfo().setPhysRegUsed(*SubRegs); + PhysRegsUsed[*SubRegs] = 0; // It is free and reserved now + AddToPhysRegsUseOrder(*SubRegs); + } + } + } + } + + // Loop over the implicit defs, spilling them as well. + if (TID.ImplicitDefs) { + for (const unsigned *ImplicitDefs = TID.ImplicitDefs; + *ImplicitDefs; ++ImplicitDefs) { + unsigned Reg = *ImplicitDefs; + if (PhysRegsUsed[Reg] != -2) { + spillPhysReg(MBB, MI, Reg, true); + AddToPhysRegsUseOrder(Reg); + PhysRegsUsed[Reg] = 0; // It is free and reserved now + } + MF->getRegInfo().setPhysRegUsed(Reg); + for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + *SubRegs; ++SubRegs) { + if (PhysRegsUsed[*SubRegs] != -2) { + AddToPhysRegsUseOrder(*SubRegs); + PhysRegsUsed[*SubRegs] = 0; // It is free and reserved now + MF->getRegInfo().setPhysRegUsed(*SubRegs); + } + } + } + } + + SmallVector DeadDefs; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand& MO = MI->getOperand(i); + if (MO.isReg() && MO.isDead()) + DeadDefs.push_back(MO.getReg()); + } + + // Okay, we have allocated all of the source operands and spilled any values + // that would be destroyed by defs of this instruction. Loop over the + // explicit defs and assign them to a register, spilling incoming values if + // we need to scavenge a register. + // + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand& MO = MI->getOperand(i); + if (MO.isReg() && MO.isDef() && MO.getReg() && + !MO.isEarlyClobber() && + TargetRegisterInfo::isVirtualRegister(MO.getReg())) { + unsigned DestVirtReg = MO.getReg(); + unsigned DestPhysReg; + + // If DestVirtReg already has a value, use it. + if (!(DestPhysReg = getVirt2PhysRegMapSlot(DestVirtReg))) + DestPhysReg = getReg(MBB, MI, DestVirtReg); + MF->getRegInfo().setPhysRegUsed(DestPhysReg); + markVirtRegModified(DestVirtReg); + getVirtRegLastUse(DestVirtReg) = std::make_pair((MachineInstr*)0, 0); + DOUT << " Assigning " << TRI->getName(DestPhysReg) + << " to %reg" << DestVirtReg << "\n"; + MO.setReg(DestPhysReg); // Assign the output register + } + } + + // If this instruction defines any registers that are immediately dead, + // kill them now. + // + for (unsigned i = 0, e = DeadDefs.size(); i != e; ++i) { + unsigned VirtReg = DeadDefs[i]; + unsigned PhysReg = VirtReg; + if (TargetRegisterInfo::isVirtualRegister(VirtReg)) { + unsigned &PhysRegSlot = getVirt2PhysRegMapSlot(VirtReg); + PhysReg = PhysRegSlot; + assert(PhysReg != 0); + PhysRegSlot = 0; + } else if (PhysRegsUsed[PhysReg] == -2) { + // Unallocatable register dead, ignore. + continue; + } + + if (PhysReg) { + DOUT << " Register " << TRI->getName(PhysReg) + << " [%reg" << VirtReg + << "] is never used, removing it from live set\n"; + removePhysReg(PhysReg); + for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg); + *AliasSet; ++AliasSet) { + if (PhysRegsUsed[*AliasSet] != -2) { + DOUT << " Register " << TRI->getName(*AliasSet) + << " [%reg" << *AliasSet + << "] is never used, removing it from live set\n"; + removePhysReg(*AliasSet); + } + } + } + } + + // Finally, if this is a noop copy instruction, zap it. (Except that if + // the copy is dead, it must be kept to avoid messing up liveness info for + // the register scavenger. See pr4100.) + unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; + if (TII->isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg) && + SrcReg == DstReg && DeadDefs.empty()) + MBB.erase(MI); + } + + MachineBasicBlock::iterator MI = MBB.getFirstTerminator(); + + // Spill all physical registers holding virtual registers now. + for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i) + if (PhysRegsUsed[i] != -1 && PhysRegsUsed[i] != -2) { + if (unsigned VirtReg = PhysRegsUsed[i]) + spillVirtReg(MBB, MI, VirtReg, i); + else + removePhysReg(i); + } + +#if 0 + // This checking code is very expensive. + bool AllOk = true; + for (unsigned i = TargetRegisterInfo::FirstVirtualRegister, + e = MF->getRegInfo().getLastVirtReg(); i <= e; ++i) + if (unsigned PR = Virt2PhysRegMap[i]) { + cerr << "Register still mapped: " << i << " -> " << PR << "\n"; + AllOk = false; + } + assert(AllOk && "Virtual registers still in phys regs?"); +#endif + + // Clear any physical register which appear live at the end of the basic + // block, but which do not hold any virtual registers. e.g., the stack + // pointer. + PhysRegsUseOrder.clear(); +} + +/// runOnMachineFunction - Register allocate the whole function +/// +bool RALocal::runOnMachineFunction(MachineFunction &Fn) { + DOUT << "Machine Function " << "\n"; + MF = &Fn; + TM = &Fn.getTarget(); + TRI = TM->getRegisterInfo(); + TII = TM->getInstrInfo(); + + PhysRegsUsed.assign(TRI->getNumRegs(), -1); + + // At various places we want to efficiently check to see whether a register + // is allocatable. To handle this, we mark all unallocatable registers as + // being pinned down, permanently. + { + BitVector Allocable = TRI->getAllocatableSet(Fn); + for (unsigned i = 0, e = Allocable.size(); i != e; ++i) + if (!Allocable[i]) + PhysRegsUsed[i] = -2; // Mark the reg unallocable. + } + + // initialize the virtual->physical register map to have a 'null' + // mapping for all virtual registers + unsigned LastVirtReg = MF->getRegInfo().getLastVirtReg(); + StackSlotForVirtReg.grow(LastVirtReg); + Virt2PhysRegMap.grow(LastVirtReg); + Virt2LastUseMap.grow(LastVirtReg); + VirtRegModified.resize(LastVirtReg+1-TargetRegisterInfo::FirstVirtualRegister); + UsedInMultipleBlocks.resize(LastVirtReg+1-TargetRegisterInfo::FirstVirtualRegister); + + // Loop over all of the basic blocks, eliminating virtual register references + for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); + MBB != MBBe; ++MBB) + AllocateBasicBlock(*MBB); + + StackSlotForVirtReg.clear(); + PhysRegsUsed.clear(); + VirtRegModified.clear(); + UsedInMultipleBlocks.clear(); + Virt2PhysRegMap.clear(); + Virt2LastUseMap.clear(); + return true; +} + +FunctionPass *llvm::createLocalRegisterAllocator() { + return new RALocal(); +} diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp new file mode 100644 index 000000000000..61450a7cca7c --- /dev/null +++ b/lib/CodeGen/RegAllocPBQP.cpp @@ -0,0 +1,871 @@ +//===------ RegAllocPBQP.cpp ---- PBQP Register Allocator -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a Partitioned Boolean Quadratic Programming (PBQP) based +// register allocator for LLVM. This allocator works by constructing a PBQP +// problem representing the register allocation problem under consideration, +// solving this using a PBQP solver, and mapping the solution back to a +// register assignment. If any variables are selected for spilling then spill +// code is inserted and the process repeated. +// +// The PBQP solver (pbqp.c) provided for this allocator uses a heuristic tuned +// for register allocation. For more information on PBQP for register +// allocation, see the following papers: +// +// (1) Hames, L. and Scholz, B. 2006. Nearly optimal register allocation with +// PBQP. In Proceedings of the 7th Joint Modular Languages Conference +// (JMLC'06). LNCS, vol. 4228. Springer, New York, NY, USA. 346-361. +// +// (2) Scholz, B., Eckstein, E. 2002. Register allocation for irregular +// architectures. In Proceedings of the Joint Conference on Languages, +// Compilers and Tools for Embedded Systems (LCTES'02), ACM Press, New York, +// NY, USA, 139-148. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "regalloc" + +#include "PBQP.h" +#include "VirtRegMap.h" +#include "VirtRegRewriter.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveStackAnalysis.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegAllocRegistry.h" +#include "llvm/CodeGen/RegisterCoalescer.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include +#include +#include +#include +#include + +using namespace llvm; + +static RegisterRegAlloc +registerPBQPRepAlloc("pbqp", "PBQP register allocator", + createPBQPRegisterAllocator); + +namespace { + + //! + //! PBQP based allocators solve the register allocation problem by mapping + //! register allocation problems to Partitioned Boolean Quadratic + //! Programming problems. + class VISIBILITY_HIDDEN PBQPRegAlloc : public MachineFunctionPass { + public: + + static char ID; + + //! Construct a PBQP register allocator. + PBQPRegAlloc() : MachineFunctionPass((intptr_t)&ID) {} + + //! Return the pass name. + virtual const char* getPassName() const throw() { + return "PBQP Register Allocator"; + } + + //! PBQP analysis usage. + virtual void getAnalysisUsage(AnalysisUsage &au) const { + au.addRequired(); + au.addRequiredTransitive(); + au.addRequired(); + au.addPreserved(); + au.addRequired(); + au.addPreserved(); + au.addRequired(); + MachineFunctionPass::getAnalysisUsage(au); + } + + //! Perform register allocation + virtual bool runOnMachineFunction(MachineFunction &MF); + + private: + typedef std::map LI2NodeMap; + typedef std::vector Node2LIMap; + typedef std::vector AllowedSet; + typedef std::vector AllowedSetMap; + typedef std::set RegSet; + typedef std::pair RegPair; + typedef std::map CoalesceMap; + + typedef std::set LiveIntervalSet; + + MachineFunction *mf; + const TargetMachine *tm; + const TargetRegisterInfo *tri; + const TargetInstrInfo *tii; + const MachineLoopInfo *loopInfo; + MachineRegisterInfo *mri; + + LiveIntervals *lis; + LiveStacks *lss; + VirtRegMap *vrm; + + LI2NodeMap li2Node; + Node2LIMap node2LI; + AllowedSetMap allowedSets; + LiveIntervalSet vregIntervalsToAlloc, + emptyVRegIntervals; + + + //! Builds a PBQP cost vector. + template + PBQPVector* buildCostVector(unsigned vReg, + const RegContainer &allowed, + const CoalesceMap &cealesces, + PBQPNum spillCost) const; + + //! \brief Builds a PBQP interference matrix. + //! + //! @return Either a pointer to a non-zero PBQP matrix representing the + //! allocation option costs, or a null pointer for a zero matrix. + //! + //! Expects allowed sets for two interfering LiveIntervals. These allowed + //! sets should contain only allocable registers from the LiveInterval's + //! register class, with any interfering pre-colored registers removed. + template + PBQPMatrix* buildInterferenceMatrix(const RegContainer &allowed1, + const RegContainer &allowed2) const; + + //! + //! Expects allowed sets for two potentially coalescable LiveIntervals, + //! and an estimated benefit due to coalescing. The allowed sets should + //! contain only allocable registers from the LiveInterval's register + //! classes, with any interfering pre-colored registers removed. + template + PBQPMatrix* buildCoalescingMatrix(const RegContainer &allowed1, + const RegContainer &allowed2, + PBQPNum cBenefit) const; + + //! \brief Finds coalescing opportunities and returns them as a map. + //! + //! Any entries in the map are guaranteed coalescable, even if their + //! corresponding live intervals overlap. + CoalesceMap findCoalesces(); + + //! \brief Finds the initial set of vreg intervals to allocate. + void findVRegIntervalsToAlloc(); + + //! \brief Constructs a PBQP problem representation of the register + //! allocation problem for this function. + //! + //! @return a PBQP solver object for the register allocation problem. + pbqp* constructPBQPProblem(); + + //! \brief Adds a stack interval if the given live interval has been + //! spilled. Used to support stack slot coloring. + void addStackInterval(const LiveInterval *spilled,MachineRegisterInfo* mri); + + //! \brief Given a solved PBQP problem maps this solution back to a register + //! assignment. + bool mapPBQPToRegAlloc(pbqp *problem); + + //! \brief Postprocessing before final spilling. Sets basic block "live in" + //! variables. + void finalizeAlloc() const; + + }; + + char PBQPRegAlloc::ID = 0; +} + + +template +PBQPVector* PBQPRegAlloc::buildCostVector(unsigned vReg, + const RegContainer &allowed, + const CoalesceMap &coalesces, + PBQPNum spillCost) const { + + typedef typename RegContainer::const_iterator AllowedItr; + + // Allocate vector. Additional element (0th) used for spill option + PBQPVector *v = new PBQPVector(allowed.size() + 1); + + (*v)[0] = spillCost; + + // Iterate over the allowed registers inserting coalesce benefits if there + // are any. + unsigned ai = 0; + for (AllowedItr itr = allowed.begin(), end = allowed.end(); + itr != end; ++itr, ++ai) { + + unsigned pReg = *itr; + + CoalesceMap::const_iterator cmItr = + coalesces.find(RegPair(vReg, pReg)); + + // No coalesce - on to the next preg. + if (cmItr == coalesces.end()) + continue; + + // We have a coalesce - insert the benefit. + (*v)[ai + 1] = -cmItr->second; + } + + return v; +} + +template +PBQPMatrix* PBQPRegAlloc::buildInterferenceMatrix( + const RegContainer &allowed1, const RegContainer &allowed2) const { + + typedef typename RegContainer::const_iterator RegContainerIterator; + + // Construct a PBQP matrix representing the cost of allocation options. The + // rows and columns correspond to the allocation options for the two live + // intervals. Elements will be infinite where corresponding registers alias, + // since we cannot allocate aliasing registers to interfering live intervals. + // All other elements (non-aliasing combinations) will have zero cost. Note + // that the spill option (element 0,0) has zero cost, since we can allocate + // both intervals to memory safely (the cost for each individual allocation + // to memory is accounted for by the cost vectors for each live interval). + PBQPMatrix *m = new PBQPMatrix(allowed1.size() + 1, allowed2.size() + 1); + + // Assume this is a zero matrix until proven otherwise. Zero matrices occur + // between interfering live ranges with non-overlapping register sets (e.g. + // non-overlapping reg classes, or disjoint sets of allowed regs within the + // same class). The term "overlapping" is used advisedly: sets which do not + // intersect, but contain registers which alias, will have non-zero matrices. + // We optimize zero matrices away to improve solver speed. + bool isZeroMatrix = true; + + + // Row index. Starts at 1, since the 0th row is for the spill option, which + // is always zero. + unsigned ri = 1; + + // Iterate over allowed sets, insert infinities where required. + for (RegContainerIterator a1Itr = allowed1.begin(), a1End = allowed1.end(); + a1Itr != a1End; ++a1Itr) { + + // Column index, starts at 1 as for row index. + unsigned ci = 1; + unsigned reg1 = *a1Itr; + + for (RegContainerIterator a2Itr = allowed2.begin(), a2End = allowed2.end(); + a2Itr != a2End; ++a2Itr) { + + unsigned reg2 = *a2Itr; + + // If the row/column regs are identical or alias insert an infinity. + if ((reg1 == reg2) || tri->areAliases(reg1, reg2)) { + (*m)[ri][ci] = std::numeric_limits::infinity(); + isZeroMatrix = false; + } + + ++ci; + } + + ++ri; + } + + // If this turns out to be a zero matrix... + if (isZeroMatrix) { + // free it and return null. + delete m; + return 0; + } + + // ...otherwise return the cost matrix. + return m; +} + +template +PBQPMatrix* PBQPRegAlloc::buildCoalescingMatrix( + const RegContainer &allowed1, const RegContainer &allowed2, + PBQPNum cBenefit) const { + + typedef typename RegContainer::const_iterator RegContainerIterator; + + // Construct a PBQP Matrix representing the benefits of coalescing. As with + // interference matrices the rows and columns represent allowed registers + // for the LiveIntervals which are (potentially) to be coalesced. The amount + // -cBenefit will be placed in any element representing the same register + // for both intervals. + PBQPMatrix *m = new PBQPMatrix(allowed1.size() + 1, allowed2.size() + 1); + + // Reset costs to zero. + m->reset(0); + + // Assume the matrix is zero till proven otherwise. Zero matrices will be + // optimized away as in the interference case. + bool isZeroMatrix = true; + + // Row index. Starts at 1, since the 0th row is for the spill option, which + // is always zero. + unsigned ri = 1; + + // Iterate over the allowed sets, insert coalescing benefits where + // appropriate. + for (RegContainerIterator a1Itr = allowed1.begin(), a1End = allowed1.end(); + a1Itr != a1End; ++a1Itr) { + + // Column index, starts at 1 as for row index. + unsigned ci = 1; + unsigned reg1 = *a1Itr; + + for (RegContainerIterator a2Itr = allowed2.begin(), a2End = allowed2.end(); + a2Itr != a2End; ++a2Itr) { + + // If the row and column represent the same register insert a beneficial + // cost to preference this allocation - it would allow us to eliminate a + // move instruction. + if (reg1 == *a2Itr) { + (*m)[ri][ci] = -cBenefit; + isZeroMatrix = false; + } + + ++ci; + } + + ++ri; + } + + // If this turns out to be a zero matrix... + if (isZeroMatrix) { + // ...free it and return null. + delete m; + return 0; + } + + return m; +} + +PBQPRegAlloc::CoalesceMap PBQPRegAlloc::findCoalesces() { + + typedef MachineFunction::const_iterator MFIterator; + typedef MachineBasicBlock::const_iterator MBBIterator; + typedef LiveInterval::const_vni_iterator VNIIterator; + + CoalesceMap coalescesFound; + + // To find coalesces we need to iterate over the function looking for + // copy instructions. + for (MFIterator bbItr = mf->begin(), bbEnd = mf->end(); + bbItr != bbEnd; ++bbItr) { + + const MachineBasicBlock *mbb = &*bbItr; + + for (MBBIterator iItr = mbb->begin(), iEnd = mbb->end(); + iItr != iEnd; ++iItr) { + + const MachineInstr *instr = &*iItr; + unsigned srcReg, dstReg, srcSubReg, dstSubReg; + + // If this isn't a copy then continue to the next instruction. + if (!tii->isMoveInstr(*instr, srcReg, dstReg, srcSubReg, dstSubReg)) + continue; + + // If the registers are already the same our job is nice and easy. + if (dstReg == srcReg) + continue; + + bool srcRegIsPhysical = TargetRegisterInfo::isPhysicalRegister(srcReg), + dstRegIsPhysical = TargetRegisterInfo::isPhysicalRegister(dstReg); + + // If both registers are physical then we can't coalesce. + if (srcRegIsPhysical && dstRegIsPhysical) + continue; + + // If it's a copy that includes a virtual register but the source and + // destination classes differ then we can't coalesce, so continue with + // the next instruction. + const TargetRegisterClass *srcRegClass = srcRegIsPhysical ? + tri->getPhysicalRegisterRegClass(srcReg) : mri->getRegClass(srcReg); + + const TargetRegisterClass *dstRegClass = dstRegIsPhysical ? + tri->getPhysicalRegisterRegClass(dstReg) : mri->getRegClass(dstReg); + + if (srcRegClass != dstRegClass) + continue; + + // We also need any physical regs to be allocable, coalescing with + // a non-allocable register is invalid. + if (srcRegIsPhysical) { + if (std::find(srcRegClass->allocation_order_begin(*mf), + srcRegClass->allocation_order_end(*mf), srcReg) == + srcRegClass->allocation_order_end(*mf)) + continue; + } + + if (dstRegIsPhysical) { + if (std::find(dstRegClass->allocation_order_begin(*mf), + dstRegClass->allocation_order_end(*mf), dstReg) == + dstRegClass->allocation_order_end(*mf)) + continue; + } + + // If we've made it here we have a copy with compatible register classes. + // We can probably coalesce, but we need to consider overlap. + const LiveInterval *srcLI = &lis->getInterval(srcReg), + *dstLI = &lis->getInterval(dstReg); + + if (srcLI->overlaps(*dstLI)) { + // Even in the case of an overlap we might still be able to coalesce, + // but we need to make sure that no definition of either range occurs + // while the other range is live. + + // Otherwise start by assuming we're ok. + bool badDef = false; + + // Test all defs of the source range. + for (VNIIterator + vniItr = srcLI->vni_begin(), vniEnd = srcLI->vni_end(); + vniItr != vniEnd; ++vniItr) { + + // If we find a def that kills the coalescing opportunity then + // record it and break from the loop. + if (dstLI->liveAt((*vniItr)->def)) { + badDef = true; + break; + } + } + + // If we have a bad def give up, continue to the next instruction. + if (badDef) + continue; + + // Otherwise test definitions of the destination range. + for (VNIIterator + vniItr = dstLI->vni_begin(), vniEnd = dstLI->vni_end(); + vniItr != vniEnd; ++vniItr) { + + // We want to make sure we skip the copy instruction itself. + if ((*vniItr)->copy == instr) + continue; + + if (srcLI->liveAt((*vniItr)->def)) { + badDef = true; + break; + } + } + + // As before a bad def we give up and continue to the next instr. + if (badDef) + continue; + } + + // If we make it to here then either the ranges didn't overlap, or they + // did, but none of their definitions would prevent us from coalescing. + // We're good to go with the coalesce. + + float cBenefit = powf(10.0f, loopInfo->getLoopDepth(mbb)) / 5.0; + + coalescesFound[RegPair(srcReg, dstReg)] = cBenefit; + coalescesFound[RegPair(dstReg, srcReg)] = cBenefit; + } + + } + + return coalescesFound; +} + +void PBQPRegAlloc::findVRegIntervalsToAlloc() { + + // Iterate over all live ranges. + for (LiveIntervals::iterator itr = lis->begin(), end = lis->end(); + itr != end; ++itr) { + + // Ignore physical ones. + if (TargetRegisterInfo::isPhysicalRegister(itr->first)) + continue; + + LiveInterval *li = itr->second; + + // If this live interval is non-empty we will use pbqp to allocate it. + // Empty intervals we allocate in a simple post-processing stage in + // finalizeAlloc. + if (!li->empty()) { + vregIntervalsToAlloc.insert(li); + } + else { + emptyVRegIntervals.insert(li); + } + } +} + +pbqp* PBQPRegAlloc::constructPBQPProblem() { + + typedef std::vector LIVector; + typedef std::vector RegVector; + + // This will store the physical intervals for easy reference. + LIVector physIntervals; + + // Start by clearing the old node <-> live interval mappings & allowed sets + li2Node.clear(); + node2LI.clear(); + allowedSets.clear(); + + // Populate physIntervals, update preg use: + for (LiveIntervals::iterator itr = lis->begin(), end = lis->end(); + itr != end; ++itr) { + + if (TargetRegisterInfo::isPhysicalRegister(itr->first)) { + physIntervals.push_back(itr->second); + mri->setPhysRegUsed(itr->second->reg); + } + } + + // Iterate over vreg intervals, construct live interval <-> node number + // mappings. + for (LiveIntervalSet::const_iterator + itr = vregIntervalsToAlloc.begin(), end = vregIntervalsToAlloc.end(); + itr != end; ++itr) { + const LiveInterval *li = *itr; + + li2Node[li] = node2LI.size(); + node2LI.push_back(li); + } + + // Get the set of potential coalesces. + CoalesceMap coalesces(findCoalesces()); + + // Construct a PBQP solver for this problem + pbqp *solver = alloc_pbqp(vregIntervalsToAlloc.size()); + + // Resize allowedSets container appropriately. + allowedSets.resize(vregIntervalsToAlloc.size()); + + // Iterate over virtual register intervals to compute allowed sets... + for (unsigned node = 0; node < node2LI.size(); ++node) { + + // Grab pointers to the interval and its register class. + const LiveInterval *li = node2LI[node]; + const TargetRegisterClass *liRC = mri->getRegClass(li->reg); + + // Start by assuming all allocable registers in the class are allowed... + RegVector liAllowed(liRC->allocation_order_begin(*mf), + liRC->allocation_order_end(*mf)); + + // Eliminate the physical registers which overlap with this range, along + // with all their aliases. + for (LIVector::iterator pItr = physIntervals.begin(), + pEnd = physIntervals.end(); pItr != pEnd; ++pItr) { + + if (!li->overlaps(**pItr)) + continue; + + unsigned pReg = (*pItr)->reg; + + // If we get here then the live intervals overlap, but we're still ok + // if they're coalescable. + if (coalesces.find(RegPair(li->reg, pReg)) != coalesces.end()) + continue; + + // If we get here then we have a genuine exclusion. + + // Remove the overlapping reg... + RegVector::iterator eraseItr = + std::find(liAllowed.begin(), liAllowed.end(), pReg); + + if (eraseItr != liAllowed.end()) + liAllowed.erase(eraseItr); + + const unsigned *aliasItr = tri->getAliasSet(pReg); + + if (aliasItr != 0) { + // ...and its aliases. + for (; *aliasItr != 0; ++aliasItr) { + RegVector::iterator eraseItr = + std::find(liAllowed.begin(), liAllowed.end(), *aliasItr); + + if (eraseItr != liAllowed.end()) { + liAllowed.erase(eraseItr); + } + } + } + } + + // Copy the allowed set into a member vector for use when constructing cost + // vectors & matrices, and mapping PBQP solutions back to assignments. + allowedSets[node] = AllowedSet(liAllowed.begin(), liAllowed.end()); + + // Set the spill cost to the interval weight, or epsilon if the + // interval weight is zero + PBQPNum spillCost = (li->weight != 0.0) ? + li->weight : std::numeric_limits::min(); + + // Build a cost vector for this interval. + add_pbqp_nodecosts(solver, node, + buildCostVector(li->reg, allowedSets[node], coalesces, + spillCost)); + + } + + + // Now add the cost matrices... + for (unsigned node1 = 0; node1 < node2LI.size(); ++node1) { + const LiveInterval *li = node2LI[node1]; + + // Test for live range overlaps and insert interference matrices. + for (unsigned node2 = node1 + 1; node2 < node2LI.size(); ++node2) { + const LiveInterval *li2 = node2LI[node2]; + + CoalesceMap::const_iterator cmItr = + coalesces.find(RegPair(li->reg, li2->reg)); + + PBQPMatrix *m = 0; + + if (cmItr != coalesces.end()) { + m = buildCoalescingMatrix(allowedSets[node1], allowedSets[node2], + cmItr->second); + } + else if (li->overlaps(*li2)) { + m = buildInterferenceMatrix(allowedSets[node1], allowedSets[node2]); + } + + if (m != 0) { + add_pbqp_edgecosts(solver, node1, node2, m); + delete m; + } + } + } + + // We're done, PBQP problem constructed - return it. + return solver; +} + +void PBQPRegAlloc::addStackInterval(const LiveInterval *spilled, + MachineRegisterInfo* mri) { + int stackSlot = vrm->getStackSlot(spilled->reg); + + if (stackSlot == VirtRegMap::NO_STACK_SLOT) + return; + + const TargetRegisterClass *RC = mri->getRegClass(spilled->reg); + LiveInterval &stackInterval = lss->getOrCreateInterval(stackSlot, RC); + + VNInfo *vni; + if (stackInterval.getNumValNums() != 0) + vni = stackInterval.getValNumInfo(0); + else + vni = stackInterval.getNextValue(-0U, 0, lss->getVNInfoAllocator()); + + LiveInterval &rhsInterval = lis->getInterval(spilled->reg); + stackInterval.MergeRangesInAsValue(rhsInterval, vni); +} + +bool PBQPRegAlloc::mapPBQPToRegAlloc(pbqp *problem) { + + // Set to true if we have any spills + bool anotherRoundNeeded = false; + + // Clear the existing allocation. + vrm->clearAllVirt(); + + // Iterate over the nodes mapping the PBQP solution to a register assignment. + for (unsigned node = 0; node < node2LI.size(); ++node) { + unsigned virtReg = node2LI[node]->reg, + allocSelection = get_pbqp_solution(problem, node); + + // If the PBQP solution is non-zero it's a physical register... + if (allocSelection != 0) { + // Get the physical reg, subtracting 1 to account for the spill option. + unsigned physReg = allowedSets[node][allocSelection - 1]; + + DOUT << "VREG " << virtReg << " -> " << tri->getName(physReg) << "\n"; + + assert(physReg != 0); + + // Add to the virt reg map and update the used phys regs. + vrm->assignVirt2Phys(virtReg, physReg); + } + // ...Otherwise it's a spill. + else { + + // Make sure we ignore this virtual reg on the next round + // of allocation + vregIntervalsToAlloc.erase(&lis->getInterval(virtReg)); + + // Insert spill ranges for this live range + const LiveInterval *spillInterval = node2LI[node]; + double oldSpillWeight = spillInterval->weight; + SmallVector spillIs; + std::vector newSpills = + lis->addIntervalsForSpills(*spillInterval, spillIs, loopInfo, *vrm); + addStackInterval(spillInterval, mri); + + DOUT << "VREG " << virtReg << " -> SPILLED (Cost: " + << oldSpillWeight << ", New vregs: "; + + // Copy any newly inserted live intervals into the list of regs to + // allocate. + for (std::vector::const_iterator + itr = newSpills.begin(), end = newSpills.end(); + itr != end; ++itr) { + + assert(!(*itr)->empty() && "Empty spill range."); + + DOUT << (*itr)->reg << " "; + + vregIntervalsToAlloc.insert(*itr); + } + + DOUT << ")\n"; + + // We need another round if spill intervals were added. + anotherRoundNeeded |= !newSpills.empty(); + } + } + + return !anotherRoundNeeded; +} + +void PBQPRegAlloc::finalizeAlloc() const { + typedef LiveIntervals::iterator LIIterator; + typedef LiveInterval::Ranges::const_iterator LRIterator; + + // First allocate registers for the empty intervals. + for (LiveIntervalSet::const_iterator + itr = emptyVRegIntervals.begin(), end = emptyVRegIntervals.end(); + itr != end; ++itr) { + LiveInterval *li = *itr; + + unsigned physReg = li->preference; + + if (physReg == 0) { + const TargetRegisterClass *liRC = mri->getRegClass(li->reg); + physReg = *liRC->allocation_order_begin(*mf); + } + + vrm->assignVirt2Phys(li->reg, physReg); + } + + // Finally iterate over the basic blocks to compute and set the live-in sets. + SmallVector liveInMBBs; + MachineBasicBlock *entryMBB = &*mf->begin(); + + for (LIIterator liItr = lis->begin(), liEnd = lis->end(); + liItr != liEnd; ++liItr) { + + const LiveInterval *li = liItr->second; + unsigned reg = 0; + + // Get the physical register for this interval + if (TargetRegisterInfo::isPhysicalRegister(li->reg)) { + reg = li->reg; + } + else if (vrm->isAssignedReg(li->reg)) { + reg = vrm->getPhys(li->reg); + } + else { + // Ranges which are assigned a stack slot only are ignored. + continue; + } + + // Ignore unallocated vregs: + if (reg == 0) { + continue; + } + + // Iterate over the ranges of the current interval... + for (LRIterator lrItr = li->begin(), lrEnd = li->end(); + lrItr != lrEnd; ++lrItr) { + + // Find the set of basic blocks which this range is live into... + if (lis->findLiveInMBBs(lrItr->start, lrItr->end, liveInMBBs)) { + // And add the physreg for this interval to their live-in sets. + for (unsigned i = 0; i < liveInMBBs.size(); ++i) { + if (liveInMBBs[i] != entryMBB) { + if (!liveInMBBs[i]->isLiveIn(reg)) { + liveInMBBs[i]->addLiveIn(reg); + } + } + } + liveInMBBs.clear(); + } + } + } + +} + +bool PBQPRegAlloc::runOnMachineFunction(MachineFunction &MF) { + + mf = &MF; + tm = &mf->getTarget(); + tri = tm->getRegisterInfo(); + tii = tm->getInstrInfo(); + mri = &mf->getRegInfo(); + + lis = &getAnalysis(); + lss = &getAnalysis(); + loopInfo = &getAnalysis(); + + vrm = &getAnalysis(); + + DOUT << "PBQP Register Allocating for " << mf->getFunction()->getName() << "\n"; + + // Allocator main loop: + // + // * Map current regalloc problem to a PBQP problem + // * Solve the PBQP problem + // * Map the solution back to a register allocation + // * Spill if necessary + // + // This process is continued till no more spills are generated. + + // Find the vreg intervals in need of allocation. + findVRegIntervalsToAlloc(); + + // If there aren't any then we're done here. + if (vregIntervalsToAlloc.empty() && emptyVRegIntervals.empty()) + return true; + + // If there are non-empty intervals allocate them using pbqp. + if (!vregIntervalsToAlloc.empty()) { + + bool pbqpAllocComplete = false; + unsigned round = 0; + + while (!pbqpAllocComplete) { + DOUT << " PBQP Regalloc round " << round << ":\n"; + + pbqp *problem = constructPBQPProblem(); + + solve_pbqp(problem); + + pbqpAllocComplete = mapPBQPToRegAlloc(problem); + + free_pbqp(problem); + + ++round; + } + } + + // Finalise allocation, allocate empty ranges. + finalizeAlloc(); + + vregIntervalsToAlloc.clear(); + emptyVRegIntervals.clear(); + li2Node.clear(); + node2LI.clear(); + allowedSets.clear(); + + DOUT << "Post alloc VirtRegMap:\n" << *vrm << "\n"; + + // Run rewriter + std::auto_ptr rewriter(createVirtRegRewriter()); + + rewriter->runOnMachineFunction(*mf, *vrm, lis); + + return true; +} + +FunctionPass* llvm::createPBQPRegisterAllocator() { + return new PBQPRegAlloc(); +} + + +#undef DEBUG_TYPE diff --git a/lib/CodeGen/RegAllocSimple.cpp b/lib/CodeGen/RegAllocSimple.cpp new file mode 100644 index 000000000000..447e54cf790b --- /dev/null +++ b/lib/CodeGen/RegAllocSimple.cpp @@ -0,0 +1,257 @@ +//===-- RegAllocSimple.cpp - A simple generic register allocator ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a simple register allocator. *Very* simple: It immediate +// spills every value right after it is computed, and it reloads all used +// operands from the spill area to temporary registers before each instruction. +// It does not keep values in registers across instructions. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "regalloc" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegAllocRegistry.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Compiler.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +#include +using namespace llvm; + +STATISTIC(NumStores, "Number of stores added"); +STATISTIC(NumLoads , "Number of loads added"); + +namespace { + static RegisterRegAlloc + simpleRegAlloc("simple", "simple register allocator", + createSimpleRegisterAllocator); + + class VISIBILITY_HIDDEN RegAllocSimple : public MachineFunctionPass { + public: + static char ID; + RegAllocSimple() : MachineFunctionPass(&ID) {} + private: + MachineFunction *MF; + const TargetMachine *TM; + const TargetRegisterInfo *TRI; + const TargetInstrInfo *TII; + + // StackSlotForVirtReg - Maps SSA Regs => frame index on the stack where + // these values are spilled + std::map StackSlotForVirtReg; + + // RegsUsed - Keep track of what registers are currently in use. This is a + // bitset. + std::vector RegsUsed; + + // RegClassIdx - Maps RegClass => which index we can take a register + // from. Since this is a simple register allocator, when we need a register + // of a certain class, we just take the next available one. + std::map RegClassIdx; + + public: + virtual const char *getPassName() const { + return "Simple Register Allocator"; + } + + /// runOnMachineFunction - Register allocate the whole function + bool runOnMachineFunction(MachineFunction &Fn); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequiredID(PHIEliminationID); // Eliminate PHI nodes + MachineFunctionPass::getAnalysisUsage(AU); + } + private: + /// AllocateBasicBlock - Register allocate the specified basic block. + void AllocateBasicBlock(MachineBasicBlock &MBB); + + /// getStackSpaceFor - This returns the offset of the specified virtual + /// register on the stack, allocating space if necessary. + int getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC); + + /// Given a virtual register, return a compatible physical register that is + /// currently unused. + /// + /// Side effect: marks that register as being used until manually cleared + /// + unsigned getFreeReg(unsigned virtualReg); + + /// Moves value from memory into that register + unsigned reloadVirtReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, unsigned VirtReg); + + /// Saves reg value on the stack (maps virtual register to stack value) + void spillVirtReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + unsigned VirtReg, unsigned PhysReg); + }; + char RegAllocSimple::ID = 0; +} + +/// getStackSpaceFor - This allocates space for the specified virtual +/// register to be held on the stack. +int RegAllocSimple::getStackSpaceFor(unsigned VirtReg, + const TargetRegisterClass *RC) { + // Find the location VirtReg would belong... + std::map::iterator I = StackSlotForVirtReg.find(VirtReg); + + if (I != StackSlotForVirtReg.end()) + return I->second; // Already has space allocated? + + // Allocate a new stack object for this spill location... + int FrameIdx = MF->getFrameInfo()->CreateStackObject(RC->getSize(), + RC->getAlignment()); + + // Assign the slot... + StackSlotForVirtReg.insert(I, std::make_pair(VirtReg, FrameIdx)); + + return FrameIdx; +} + +unsigned RegAllocSimple::getFreeReg(unsigned virtualReg) { + const TargetRegisterClass* RC = MF->getRegInfo().getRegClass(virtualReg); + TargetRegisterClass::iterator RI = RC->allocation_order_begin(*MF); +#ifndef NDEBUG + TargetRegisterClass::iterator RE = RC->allocation_order_end(*MF); +#endif + + while (1) { + unsigned regIdx = RegClassIdx[RC]++; + assert(RI+regIdx != RE && "Not enough registers!"); + unsigned PhysReg = *(RI+regIdx); + + if (!RegsUsed[PhysReg]) { + MF->getRegInfo().setPhysRegUsed(PhysReg); + return PhysReg; + } + } +} + +unsigned RegAllocSimple::reloadVirtReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned VirtReg) { + const TargetRegisterClass* RC = MF->getRegInfo().getRegClass(VirtReg); + int FrameIdx = getStackSpaceFor(VirtReg, RC); + unsigned PhysReg = getFreeReg(VirtReg); + + // Add move instruction(s) + ++NumLoads; + TII->loadRegFromStackSlot(MBB, I, PhysReg, FrameIdx, RC); + return PhysReg; +} + +void RegAllocSimple::spillVirtReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned VirtReg, unsigned PhysReg) { + const TargetRegisterClass* RC = MF->getRegInfo().getRegClass(VirtReg); + + int FrameIdx = getStackSpaceFor(VirtReg, RC); + + // Add move instruction(s) + ++NumStores; + TII->storeRegToStackSlot(MBB, I, PhysReg, true, FrameIdx, RC); +} + + +void RegAllocSimple::AllocateBasicBlock(MachineBasicBlock &MBB) { + // loop over each instruction + for (MachineBasicBlock::iterator MI = MBB.begin(); MI != MBB.end(); ++MI) { + // Made to combat the incorrect allocation of r2 = add r1, r1 + std::map Virt2PhysRegMap; + + RegsUsed.resize(TRI->getNumRegs()); + + // This is a preliminary pass that will invalidate any registers that are + // used by the instruction (including implicit uses). + const TargetInstrDesc &Desc = MI->getDesc(); + const unsigned *Regs; + if (Desc.ImplicitUses) { + for (Regs = Desc.ImplicitUses; *Regs; ++Regs) + RegsUsed[*Regs] = true; + } + + if (Desc.ImplicitDefs) { + for (Regs = Desc.ImplicitDefs; *Regs; ++Regs) { + RegsUsed[*Regs] = true; + MF->getRegInfo().setPhysRegUsed(*Regs); + } + } + + // Loop over uses, move from memory into registers. + for (int i = MI->getNumOperands() - 1; i >= 0; --i) { + MachineOperand &MO = MI->getOperand(i); + + if (MO.isReg() && MO.getReg() && + TargetRegisterInfo::isVirtualRegister(MO.getReg())) { + unsigned virtualReg = (unsigned) MO.getReg(); + DOUT << "op: " << MO << "\n"; + DOUT << "\t inst[" << i << "]: "; + DEBUG(MI->print(*cerr.stream(), TM)); + + // make sure the same virtual register maps to the same physical + // register in any given instruction + unsigned physReg = Virt2PhysRegMap[virtualReg]; + if (physReg == 0) { + if (MO.isDef()) { + unsigned TiedOp; + if (!MI->isRegTiedToUseOperand(i, &TiedOp)) { + physReg = getFreeReg(virtualReg); + } else { + // must be same register number as the source operand that is + // tied to. This maps a = b + c into b = b + c, and saves b into + // a's spot. + assert(MI->getOperand(TiedOp).isReg() && + MI->getOperand(TiedOp).getReg() && + MI->getOperand(TiedOp).isUse() && + "Two address instruction invalid!"); + + physReg = MI->getOperand(TiedOp).getReg(); + } + spillVirtReg(MBB, next(MI), virtualReg, physReg); + } else { + physReg = reloadVirtReg(MBB, MI, virtualReg); + Virt2PhysRegMap[virtualReg] = physReg; + } + } + MO.setReg(physReg); + DOUT << "virt: " << virtualReg << ", phys: " << MO.getReg() << "\n"; + } + } + RegClassIdx.clear(); + RegsUsed.clear(); + } +} + + +/// runOnMachineFunction - Register allocate the whole function +/// +bool RegAllocSimple::runOnMachineFunction(MachineFunction &Fn) { + DOUT << "Machine Function\n"; + MF = &Fn; + TM = &MF->getTarget(); + TRI = TM->getRegisterInfo(); + TII = TM->getInstrInfo(); + + // Loop over all of the basic blocks, eliminating virtual register references + for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); + MBB != MBBe; ++MBB) + AllocateBasicBlock(*MBB); + + StackSlotForVirtReg.clear(); + return true; +} + +FunctionPass *llvm::createSimpleRegisterAllocator() { + return new RegAllocSimple(); +} diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp new file mode 100644 index 000000000000..1131e3db4e7d --- /dev/null +++ b/lib/CodeGen/RegisterCoalescer.cpp @@ -0,0 +1,41 @@ +//===- RegisterCoalescer.cpp - Generic Register Coalescing Interface -------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the generic RegisterCoalescer interface which +// is used as the common interface used by all clients and +// implementations of register coalescing. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/RegisterCoalescer.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Pass.h" + +using namespace llvm; + +// Register the RegisterCoalescer interface, providing a nice name to refer to. +static RegisterAnalysisGroup Z("Register Coalescer"); +char RegisterCoalescer::ID = 0; + +// RegisterCoalescer destructor: DO NOT move this to the header file +// for RegisterCoalescer or else clients of the RegisterCoalescer +// class may not depend on the RegisterCoalescer.o file in the current +// .a file, causing alias analysis support to not be included in the +// tool correctly! +// +RegisterCoalescer::~RegisterCoalescer() {} + +// Because of the way .a files work, we must force the SimpleRC +// implementation to be pulled in if the RegisterCoalescer classes are +// pulled in. Otherwise we run the risk of RegisterCoalescer being +// used, but the default implementation not being linked into the tool +// that uses it. +DEFINING_FILE_FOR(RegisterCoalescer) diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp new file mode 100644 index 000000000000..944468ea11b8 --- /dev/null +++ b/lib/CodeGen/RegisterScavenging.cpp @@ -0,0 +1,480 @@ +//===-- RegisterScavenging.cpp - Machine register scavenging --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the machine register scavenger. It can provide +// information, such as unused registers, at any point in a machine basic block. +// It also provides a mechanism to make registers available by evicting them to +// spill slots. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "reg-scavenging" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/STLExtras.h" +using namespace llvm; + +/// RedefinesSuperRegPart - Return true if the specified register is redefining +/// part of a super-register. +static bool RedefinesSuperRegPart(const MachineInstr *MI, unsigned SubReg, + const TargetRegisterInfo *TRI) { + bool SeenSuperUse = false; + bool SeenSuperDef = false; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) + continue; + if (TRI->isSuperRegister(SubReg, MO.getReg())) { + if (MO.isUse()) + SeenSuperUse = true; + else if (MO.isImplicit()) + SeenSuperDef = true; + } + } + + return SeenSuperDef && SeenSuperUse; +} + +static bool RedefinesSuperRegPart(const MachineInstr *MI, + const MachineOperand &MO, + const TargetRegisterInfo *TRI) { + assert(MO.isReg() && MO.isDef() && "Not a register def!"); + return RedefinesSuperRegPart(MI, MO.getReg(), TRI); +} + +/// setUsed - Set the register and its sub-registers as being used. +void RegScavenger::setUsed(unsigned Reg, bool ImpDef) { + RegsAvailable.reset(Reg); + ImplicitDefed[Reg] = ImpDef; + + for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + unsigned SubReg = *SubRegs; ++SubRegs) { + RegsAvailable.reset(SubReg); + ImplicitDefed[SubReg] = ImpDef; + } +} + +/// setUnused - Set the register and its sub-registers as being unused. +void RegScavenger::setUnused(unsigned Reg, const MachineInstr *MI) { + RegsAvailable.set(Reg); + ImplicitDefed.reset(Reg); + + for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + unsigned SubReg = *SubRegs; ++SubRegs) + if (!RedefinesSuperRegPart(MI, Reg, TRI)) { + RegsAvailable.set(SubReg); + ImplicitDefed.reset(SubReg); + } +} + +void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) { + MachineFunction &MF = *mbb->getParent(); + const TargetMachine &TM = MF.getTarget(); + TII = TM.getInstrInfo(); + TRI = TM.getRegisterInfo(); + MRI = &MF.getRegInfo(); + + assert((NumPhysRegs == 0 || NumPhysRegs == TRI->getNumRegs()) && + "Target changed?"); + + if (!MBB) { + NumPhysRegs = TRI->getNumRegs(); + RegsAvailable.resize(NumPhysRegs); + ImplicitDefed.resize(NumPhysRegs); + + // Create reserved registers bitvector. + ReservedRegs = TRI->getReservedRegs(MF); + + // Create callee-saved registers bitvector. + CalleeSavedRegs.resize(NumPhysRegs); + const unsigned *CSRegs = TRI->getCalleeSavedRegs(); + if (CSRegs != NULL) + for (unsigned i = 0; CSRegs[i]; ++i) + CalleeSavedRegs.set(CSRegs[i]); + } + + MBB = mbb; + ScavengedReg = 0; + ScavengedRC = NULL; + ScavengeRestore = NULL; + CurrDist = 0; + DistanceMap.clear(); + ImplicitDefed.reset(); + + // All registers started out unused. + RegsAvailable.set(); + + // Reserved registers are always used. + RegsAvailable ^= ReservedRegs; + + // Live-in registers are in use. + if (!MBB->livein_empty()) + for (MachineBasicBlock::const_livein_iterator I = MBB->livein_begin(), + E = MBB->livein_end(); I != E; ++I) + setUsed(*I); + + Tracking = false; +} + +void RegScavenger::restoreScavengedReg() { + TII->loadRegFromStackSlot(*MBB, MBBI, ScavengedReg, + ScavengingFrameIndex, ScavengedRC); + MachineBasicBlock::iterator II = prior(MBBI); + TRI->eliminateFrameIndex(II, 0, this); + setUsed(ScavengedReg); + ScavengedReg = 0; + ScavengedRC = NULL; +} + +#ifndef NDEBUG +/// isLiveInButUnusedBefore - Return true if register is livein the MBB not +/// not used before it reaches the MI that defines register. +static bool isLiveInButUnusedBefore(unsigned Reg, MachineInstr *MI, + MachineBasicBlock *MBB, + const TargetRegisterInfo *TRI, + MachineRegisterInfo* MRI) { + // First check if register is livein. + bool isLiveIn = false; + for (MachineBasicBlock::const_livein_iterator I = MBB->livein_begin(), + E = MBB->livein_end(); I != E; ++I) + if (Reg == *I || TRI->isSuperRegister(Reg, *I)) { + isLiveIn = true; + break; + } + if (!isLiveIn) + return false; + + // Is there any use of it before the specified MI? + SmallPtrSet UsesInMBB; + for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg), + UE = MRI->use_end(); UI != UE; ++UI) { + MachineInstr *UseMI = &*UI; + if (UseMI->getParent() == MBB) + UsesInMBB.insert(UseMI); + } + if (UsesInMBB.empty()) + return true; + + for (MachineBasicBlock::iterator I = MBB->begin(), E = MI; I != E; ++I) + if (UsesInMBB.count(&*I)) + return false; + return true; +} +#endif + +void RegScavenger::forward() { + // Move ptr forward. + if (!Tracking) { + MBBI = MBB->begin(); + Tracking = true; + } else { + assert(MBBI != MBB->end() && "Already at the end of the basic block!"); + MBBI = next(MBBI); + } + + MachineInstr *MI = MBBI; + DistanceMap.insert(std::make_pair(MI, CurrDist++)); + + if (MI == ScavengeRestore) { + ScavengedReg = 0; + ScavengedRC = NULL; + ScavengeRestore = NULL; + } + + bool IsImpDef = MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF; + + // Separate register operands into 3 classes: uses, defs, earlyclobbers. + SmallVector, 4> UseMOs; + SmallVector, 4> DefMOs; + SmallVector, 4> EarlyClobberMOs; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || MO.getReg() == 0) + continue; + if (MO.isUse()) + UseMOs.push_back(std::make_pair(&MO,i)); + else if (MO.isEarlyClobber()) + EarlyClobberMOs.push_back(std::make_pair(&MO,i)); + else + DefMOs.push_back(std::make_pair(&MO,i)); + } + + // Process uses first. + BitVector UseRegs(NumPhysRegs); + for (unsigned i = 0, e = UseMOs.size(); i != e; ++i) { + const MachineOperand MO = *UseMOs[i].first; + unsigned Reg = MO.getReg(); + + assert(isUsed(Reg) && "Using an undefined register!"); + + if (MO.isKill() && !isReserved(Reg)) { + UseRegs.set(Reg); + + // Mark sub-registers as used. + for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + unsigned SubReg = *SubRegs; ++SubRegs) + UseRegs.set(SubReg); + } + } + + // Change states of all registers after all the uses are processed to guard + // against multiple uses. + setUnused(UseRegs); + + // Process early clobber defs then process defs. We can have a early clobber + // that is dead, it should not conflict with a def that happens one "slot" + // (see InstrSlots in LiveIntervalAnalysis.h) later. + unsigned NumECs = EarlyClobberMOs.size(); + unsigned NumDefs = DefMOs.size(); + + for (unsigned i = 0, e = NumECs + NumDefs; i != e; ++i) { + const MachineOperand &MO = (i < NumECs) + ? *EarlyClobberMOs[i].first : *DefMOs[i-NumECs].first; + unsigned Idx = (i < NumECs) + ? EarlyClobberMOs[i].second : DefMOs[i-NumECs].second; + unsigned Reg = MO.getReg(); + + // If it's dead upon def, then it is now free. + if (MO.isDead()) { + setUnused(Reg, MI); + continue; + } + + // Skip two-address destination operand. + if (MI->isRegTiedToUseOperand(Idx)) { + assert(isUsed(Reg) && "Using an undefined register!"); + continue; + } + + // Skip if this is merely redefining part of a super-register. + if (RedefinesSuperRegPart(MI, MO, TRI)) + continue; + + // Implicit def is allowed to "re-define" any register. Similarly, + // implicitly defined registers can be clobbered. + assert((isReserved(Reg) || isUnused(Reg) || + IsImpDef || isImplicitlyDefined(Reg) || + isLiveInButUnusedBefore(Reg, MI, MBB, TRI, MRI)) && + "Re-defining a live register!"); + setUsed(Reg, IsImpDef); + } +} + +void RegScavenger::backward() { + assert(Tracking && "Not tracking states!"); + assert(MBBI != MBB->begin() && "Already at start of basic block!"); + // Move ptr backward. + MBBI = prior(MBBI); + + MachineInstr *MI = MBBI; + DistanceMap.erase(MI); + --CurrDist; + + // Separate register operands into 3 classes: uses, defs, earlyclobbers. + SmallVector, 4> UseMOs; + SmallVector, 4> DefMOs; + SmallVector, 4> EarlyClobberMOs; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || MO.getReg() == 0) + continue; + if (MO.isUse()) + UseMOs.push_back(std::make_pair(&MO,i)); + else if (MO.isEarlyClobber()) + EarlyClobberMOs.push_back(std::make_pair(&MO,i)); + else + DefMOs.push_back(std::make_pair(&MO,i)); + } + + + // Process defs first. + unsigned NumECs = EarlyClobberMOs.size(); + unsigned NumDefs = DefMOs.size(); + for (unsigned i = 0, e = NumECs + NumDefs; i != e; ++i) { + const MachineOperand &MO = (i < NumDefs) + ? *DefMOs[i].first : *EarlyClobberMOs[i-NumDefs].first; + unsigned Idx = (i < NumECs) + ? DefMOs[i].second : EarlyClobberMOs[i-NumDefs].second; + + // Skip two-address destination operand. + if (MI->isRegTiedToUseOperand(Idx)) + continue; + + unsigned Reg = MO.getReg(); + assert(isUsed(Reg)); + if (!isReserved(Reg)) + setUnused(Reg, MI); + } + + // Process uses. + BitVector UseRegs(NumPhysRegs); + for (unsigned i = 0, e = UseMOs.size(); i != e; ++i) { + const MachineOperand MO = *UseMOs[i].first; + unsigned Reg = MO.getReg(); + assert(isUnused(Reg) || isReserved(Reg)); + UseRegs.set(Reg); + + // Set the sub-registers as "used". + for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + unsigned SubReg = *SubRegs; ++SubRegs) + UseRegs.set(SubReg); + } + setUsed(UseRegs); +} + +void RegScavenger::getRegsUsed(BitVector &used, bool includeReserved) { + if (includeReserved) + used = ~RegsAvailable; + else + used = ~RegsAvailable & ~ReservedRegs; +} + +/// CreateRegClassMask - Set the bits that represent the registers in the +/// TargetRegisterClass. +static void CreateRegClassMask(const TargetRegisterClass *RC, BitVector &Mask) { + for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); I != E; + ++I) + Mask.set(*I); +} + +unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RegClass, + const BitVector &Candidates) const { + // Mask off the registers which are not in the TargetRegisterClass. + BitVector RegsAvailableCopy(NumPhysRegs, false); + CreateRegClassMask(RegClass, RegsAvailableCopy); + RegsAvailableCopy &= RegsAvailable; + + // Restrict the search to candidates. + RegsAvailableCopy &= Candidates; + + // Returns the first unused (bit is set) register, or 0 is none is found. + int Reg = RegsAvailableCopy.find_first(); + return (Reg == -1) ? 0 : Reg; +} + +unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RegClass, + bool ExCalleeSaved) const { + // Mask off the registers which are not in the TargetRegisterClass. + BitVector RegsAvailableCopy(NumPhysRegs, false); + CreateRegClassMask(RegClass, RegsAvailableCopy); + RegsAvailableCopy &= RegsAvailable; + + // If looking for a non-callee-saved register, mask off all the callee-saved + // registers. + if (ExCalleeSaved) + RegsAvailableCopy &= ~CalleeSavedRegs; + + // Returns the first unused (bit is set) register, or 0 is none is found. + int Reg = RegsAvailableCopy.find_first(); + return (Reg == -1) ? 0 : Reg; +} + +/// findFirstUse - Calculate the distance to the first use of the +/// specified register. +MachineInstr* +RegScavenger::findFirstUse(MachineBasicBlock *MBB, + MachineBasicBlock::iterator I, unsigned Reg, + unsigned &Dist) { + MachineInstr *UseMI = 0; + Dist = ~0U; + for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(Reg), + RE = MRI->reg_end(); RI != RE; ++RI) { + MachineInstr *UDMI = &*RI; + if (UDMI->getParent() != MBB) + continue; + DenseMap::iterator DI = DistanceMap.find(UDMI); + if (DI == DistanceMap.end()) { + // If it's not in map, it's below current MI, let's initialize the + // map. + I = next(I); + unsigned Dist = CurrDist + 1; + while (I != MBB->end()) { + DistanceMap.insert(std::make_pair(I, Dist++)); + I = next(I); + } + } + DI = DistanceMap.find(UDMI); + if (DI->second > CurrDist && DI->second < Dist) { + Dist = DI->second; + UseMI = UDMI; + } + } + return UseMI; +} + +unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, + MachineBasicBlock::iterator I, + int SPAdj) { + assert(ScavengingFrameIndex >= 0 && + "Cannot scavenge a register without an emergency spill slot!"); + + // Mask off the registers which are not in the TargetRegisterClass. + BitVector Candidates(NumPhysRegs, false); + CreateRegClassMask(RC, Candidates); + Candidates ^= ReservedRegs; // Do not include reserved registers. + + // Exclude all the registers being used by the instruction. + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { + MachineOperand &MO = I->getOperand(i); + if (MO.isReg()) + Candidates.reset(MO.getReg()); + } + + // Find the register whose use is furthest away. + unsigned SReg = 0; + unsigned MaxDist = 0; + MachineInstr *MaxUseMI = 0; + int Reg = Candidates.find_first(); + while (Reg != -1) { + unsigned Dist; + MachineInstr *UseMI = findFirstUse(MBB, I, Reg, Dist); + for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) { + unsigned AsDist; + MachineInstr *AsUseMI = findFirstUse(MBB, I, *AS, AsDist); + if (AsDist < Dist) { + Dist = AsDist; + UseMI = AsUseMI; + } + } + if (Dist >= MaxDist) { + MaxDist = Dist; + MaxUseMI = UseMI; + SReg = Reg; + } + Reg = Candidates.find_next(Reg); + } + + if (ScavengedReg != 0) { + assert(0 && "Scavenger slot is live, unable to scavenge another register!"); + abort(); + } + + // Spill the scavenged register before I. + TII->storeRegToStackSlot(*MBB, I, SReg, true, ScavengingFrameIndex, RC); + MachineBasicBlock::iterator II = prior(I); + TRI->eliminateFrameIndex(II, SPAdj, this); + + // Restore the scavenged register before its use (or first terminator). + II = MaxUseMI + ? MachineBasicBlock::iterator(MaxUseMI) : MBB->getFirstTerminator(); + TII->loadRegFromStackSlot(*MBB, II, SReg, ScavengingFrameIndex, RC); + ScavengeRestore = prior(II); + ScavengedReg = SReg; + ScavengedRC = RC; + + return SReg; +} diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp new file mode 100644 index 000000000000..a8452dff272b --- /dev/null +++ b/lib/CodeGen/ScheduleDAG.cpp @@ -0,0 +1,572 @@ +//===---- ScheduleDAG.cpp - Implement the ScheduleDAG class ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the ScheduleDAG class, which is a base class used by +// scheduling implementation classes. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "pre-RA-sched" +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/Debug.h" +#include +using namespace llvm; + +ScheduleDAG::ScheduleDAG(MachineFunction &mf) + : TM(mf.getTarget()), + TII(TM.getInstrInfo()), + TRI(TM.getRegisterInfo()), + TLI(TM.getTargetLowering()), + MF(mf), MRI(mf.getRegInfo()), + ConstPool(MF.getConstantPool()), + EntrySU(), ExitSU() { +} + +ScheduleDAG::~ScheduleDAG() {} + +/// dump - dump the schedule. +void ScheduleDAG::dumpSchedule() const { + for (unsigned i = 0, e = Sequence.size(); i != e; i++) { + if (SUnit *SU = Sequence[i]) + SU->dump(this); + else + cerr << "**** NOOP ****\n"; + } +} + + +/// Run - perform scheduling. +/// +void ScheduleDAG::Run(MachineBasicBlock *bb, + MachineBasicBlock::iterator insertPos) { + BB = bb; + InsertPos = insertPos; + + SUnits.clear(); + Sequence.clear(); + EntrySU = SUnit(); + ExitSU = SUnit(); + + Schedule(); + + DOUT << "*** Final schedule ***\n"; + DEBUG(dumpSchedule()); + DOUT << "\n"; +} + +/// addPred - This adds the specified edge as a pred of the current node if +/// not already. It also adds the current node as a successor of the +/// specified node. +void SUnit::addPred(const SDep &D) { + // If this node already has this depenence, don't add a redundant one. + for (SmallVector::const_iterator I = Preds.begin(), E = Preds.end(); + I != E; ++I) + if (*I == D) + return; + // Now add a corresponding succ to N. + SDep P = D; + P.setSUnit(this); + SUnit *N = D.getSUnit(); + // Update the bookkeeping. + if (D.getKind() == SDep::Data) { + ++NumPreds; + ++N->NumSuccs; + } + if (!N->isScheduled) + ++NumPredsLeft; + if (!isScheduled) + ++N->NumSuccsLeft; + Preds.push_back(D); + N->Succs.push_back(P); + if (P.getLatency() != 0) { + this->setDepthDirty(); + N->setHeightDirty(); + } +} + +/// removePred - This removes the specified edge as a pred of the current +/// node if it exists. It also removes the current node as a successor of +/// the specified node. +void SUnit::removePred(const SDep &D) { + // Find the matching predecessor. + for (SmallVector::iterator I = Preds.begin(), E = Preds.end(); + I != E; ++I) + if (*I == D) { + bool FoundSucc = false; + // Find the corresponding successor in N. + SDep P = D; + P.setSUnit(this); + SUnit *N = D.getSUnit(); + for (SmallVector::iterator II = N->Succs.begin(), + EE = N->Succs.end(); II != EE; ++II) + if (*II == P) { + FoundSucc = true; + N->Succs.erase(II); + break; + } + assert(FoundSucc && "Mismatching preds / succs lists!"); + Preds.erase(I); + // Update the bookkeeping. + if (P.getKind() == SDep::Data) { + --NumPreds; + --N->NumSuccs; + } + if (!N->isScheduled) + --NumPredsLeft; + if (!isScheduled) + --N->NumSuccsLeft; + if (P.getLatency() != 0) { + this->setDepthDirty(); + N->setHeightDirty(); + } + return; + } +} + +void SUnit::setDepthDirty() { + if (!isDepthCurrent) return; + SmallVector WorkList; + WorkList.push_back(this); + do { + SUnit *SU = WorkList.pop_back_val(); + SU->isDepthCurrent = false; + for (SUnit::const_succ_iterator I = SU->Succs.begin(), + E = SU->Succs.end(); I != E; ++I) { + SUnit *SuccSU = I->getSUnit(); + if (SuccSU->isDepthCurrent) + WorkList.push_back(SuccSU); + } + } while (!WorkList.empty()); +} + +void SUnit::setHeightDirty() { + if (!isHeightCurrent) return; + SmallVector WorkList; + WorkList.push_back(this); + do { + SUnit *SU = WorkList.pop_back_val(); + SU->isHeightCurrent = false; + for (SUnit::const_pred_iterator I = SU->Preds.begin(), + E = SU->Preds.end(); I != E; ++I) { + SUnit *PredSU = I->getSUnit(); + if (PredSU->isHeightCurrent) + WorkList.push_back(PredSU); + } + } while (!WorkList.empty()); +} + +/// setDepthToAtLeast - Update this node's successors to reflect the +/// fact that this node's depth just increased. +/// +void SUnit::setDepthToAtLeast(unsigned NewDepth) { + if (NewDepth <= getDepth()) + return; + setDepthDirty(); + Depth = NewDepth; + isDepthCurrent = true; +} + +/// setHeightToAtLeast - Update this node's predecessors to reflect the +/// fact that this node's height just increased. +/// +void SUnit::setHeightToAtLeast(unsigned NewHeight) { + if (NewHeight <= getHeight()) + return; + setHeightDirty(); + Height = NewHeight; + isHeightCurrent = true; +} + +/// ComputeDepth - Calculate the maximal path from the node to the exit. +/// +void SUnit::ComputeDepth() { + SmallVector WorkList; + WorkList.push_back(this); + do { + SUnit *Cur = WorkList.back(); + + bool Done = true; + unsigned MaxPredDepth = 0; + for (SUnit::const_pred_iterator I = Cur->Preds.begin(), + E = Cur->Preds.end(); I != E; ++I) { + SUnit *PredSU = I->getSUnit(); + if (PredSU->isDepthCurrent) + MaxPredDepth = std::max(MaxPredDepth, + PredSU->Depth + I->getLatency()); + else { + Done = false; + WorkList.push_back(PredSU); + } + } + + if (Done) { + WorkList.pop_back(); + if (MaxPredDepth != Cur->Depth) { + Cur->setDepthDirty(); + Cur->Depth = MaxPredDepth; + } + Cur->isDepthCurrent = true; + } + } while (!WorkList.empty()); +} + +/// ComputeHeight - Calculate the maximal path from the node to the entry. +/// +void SUnit::ComputeHeight() { + SmallVector WorkList; + WorkList.push_back(this); + do { + SUnit *Cur = WorkList.back(); + + bool Done = true; + unsigned MaxSuccHeight = 0; + for (SUnit::const_succ_iterator I = Cur->Succs.begin(), + E = Cur->Succs.end(); I != E; ++I) { + SUnit *SuccSU = I->getSUnit(); + if (SuccSU->isHeightCurrent) + MaxSuccHeight = std::max(MaxSuccHeight, + SuccSU->Height + I->getLatency()); + else { + Done = false; + WorkList.push_back(SuccSU); + } + } + + if (Done) { + WorkList.pop_back(); + if (MaxSuccHeight != Cur->Height) { + Cur->setHeightDirty(); + Cur->Height = MaxSuccHeight; + } + Cur->isHeightCurrent = true; + } + } while (!WorkList.empty()); +} + +/// SUnit - Scheduling unit. It's an wrapper around either a single SDNode or +/// a group of nodes flagged together. +void SUnit::dump(const ScheduleDAG *G) const { + cerr << "SU(" << NodeNum << "): "; + G->dumpNode(this); +} + +void SUnit::dumpAll(const ScheduleDAG *G) const { + dump(G); + + cerr << " # preds left : " << NumPredsLeft << "\n"; + cerr << " # succs left : " << NumSuccsLeft << "\n"; + cerr << " Latency : " << Latency << "\n"; + cerr << " Depth : " << Depth << "\n"; + cerr << " Height : " << Height << "\n"; + + if (Preds.size() != 0) { + cerr << " Predecessors:\n"; + for (SUnit::const_succ_iterator I = Preds.begin(), E = Preds.end(); + I != E; ++I) { + cerr << " "; + switch (I->getKind()) { + case SDep::Data: cerr << "val "; break; + case SDep::Anti: cerr << "anti"; break; + case SDep::Output: cerr << "out "; break; + case SDep::Order: cerr << "ch "; break; + } + cerr << "#"; + cerr << I->getSUnit() << " - SU(" << I->getSUnit()->NodeNum << ")"; + if (I->isArtificial()) + cerr << " *"; + cerr << "\n"; + } + } + if (Succs.size() != 0) { + cerr << " Successors:\n"; + for (SUnit::const_succ_iterator I = Succs.begin(), E = Succs.end(); + I != E; ++I) { + cerr << " "; + switch (I->getKind()) { + case SDep::Data: cerr << "val "; break; + case SDep::Anti: cerr << "anti"; break; + case SDep::Output: cerr << "out "; break; + case SDep::Order: cerr << "ch "; break; + } + cerr << "#"; + cerr << I->getSUnit() << " - SU(" << I->getSUnit()->NodeNum << ")"; + if (I->isArtificial()) + cerr << " *"; + cerr << "\n"; + } + } + cerr << "\n"; +} + +#ifndef NDEBUG +/// VerifySchedule - Verify that all SUnits were scheduled and that +/// their state is consistent. +/// +void ScheduleDAG::VerifySchedule(bool isBottomUp) { + bool AnyNotSched = false; + unsigned DeadNodes = 0; + unsigned Noops = 0; + for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { + if (!SUnits[i].isScheduled) { + if (SUnits[i].NumPreds == 0 && SUnits[i].NumSuccs == 0) { + ++DeadNodes; + continue; + } + if (!AnyNotSched) + cerr << "*** Scheduling failed! ***\n"; + SUnits[i].dump(this); + cerr << "has not been scheduled!\n"; + AnyNotSched = true; + } + if (SUnits[i].isScheduled && + (isBottomUp ? SUnits[i].getHeight() : SUnits[i].getHeight()) > + unsigned(INT_MAX)) { + if (!AnyNotSched) + cerr << "*** Scheduling failed! ***\n"; + SUnits[i].dump(this); + cerr << "has an unexpected " + << (isBottomUp ? "Height" : "Depth") << " value!\n"; + AnyNotSched = true; + } + if (isBottomUp) { + if (SUnits[i].NumSuccsLeft != 0) { + if (!AnyNotSched) + cerr << "*** Scheduling failed! ***\n"; + SUnits[i].dump(this); + cerr << "has successors left!\n"; + AnyNotSched = true; + } + } else { + if (SUnits[i].NumPredsLeft != 0) { + if (!AnyNotSched) + cerr << "*** Scheduling failed! ***\n"; + SUnits[i].dump(this); + cerr << "has predecessors left!\n"; + AnyNotSched = true; + } + } + } + for (unsigned i = 0, e = Sequence.size(); i != e; ++i) + if (!Sequence[i]) + ++Noops; + assert(!AnyNotSched); + assert(Sequence.size() + DeadNodes - Noops == SUnits.size() && + "The number of nodes scheduled doesn't match the expected number!"); +} +#endif + +/// InitDAGTopologicalSorting - create the initial topological +/// ordering from the DAG to be scheduled. +/// +/// The idea of the algorithm is taken from +/// "Online algorithms for managing the topological order of +/// a directed acyclic graph" by David J. Pearce and Paul H.J. Kelly +/// This is the MNR algorithm, which was first introduced by +/// A. Marchetti-Spaccamela, U. Nanni and H. Rohnert in +/// "Maintaining a topological order under edge insertions". +/// +/// Short description of the algorithm: +/// +/// Topological ordering, ord, of a DAG maps each node to a topological +/// index so that for all edges X->Y it is the case that ord(X) < ord(Y). +/// +/// This means that if there is a path from the node X to the node Z, +/// then ord(X) < ord(Z). +/// +/// This property can be used to check for reachability of nodes: +/// if Z is reachable from X, then an insertion of the edge Z->X would +/// create a cycle. +/// +/// The algorithm first computes a topological ordering for the DAG by +/// initializing the Index2Node and Node2Index arrays and then tries to keep +/// the ordering up-to-date after edge insertions by reordering the DAG. +/// +/// On insertion of the edge X->Y, the algorithm first marks by calling DFS +/// the nodes reachable from Y, and then shifts them using Shift to lie +/// immediately after X in Index2Node. +void ScheduleDAGTopologicalSort::InitDAGTopologicalSorting() { + unsigned DAGSize = SUnits.size(); + std::vector WorkList; + WorkList.reserve(DAGSize); + + Index2Node.resize(DAGSize); + Node2Index.resize(DAGSize); + + // Initialize the data structures. + for (unsigned i = 0, e = DAGSize; i != e; ++i) { + SUnit *SU = &SUnits[i]; + int NodeNum = SU->NodeNum; + unsigned Degree = SU->Succs.size(); + // Temporarily use the Node2Index array as scratch space for degree counts. + Node2Index[NodeNum] = Degree; + + // Is it a node without dependencies? + if (Degree == 0) { + assert(SU->Succs.empty() && "SUnit should have no successors"); + // Collect leaf nodes. + WorkList.push_back(SU); + } + } + + int Id = DAGSize; + while (!WorkList.empty()) { + SUnit *SU = WorkList.back(); + WorkList.pop_back(); + Allocate(SU->NodeNum, --Id); + for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + SUnit *SU = I->getSUnit(); + if (!--Node2Index[SU->NodeNum]) + // If all dependencies of the node are processed already, + // then the node can be computed now. + WorkList.push_back(SU); + } + } + + Visited.resize(DAGSize); + +#ifndef NDEBUG + // Check correctness of the ordering + for (unsigned i = 0, e = DAGSize; i != e; ++i) { + SUnit *SU = &SUnits[i]; + for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + assert(Node2Index[SU->NodeNum] > Node2Index[I->getSUnit()->NodeNum] && + "Wrong topological sorting"); + } + } +#endif +} + +/// AddPred - Updates the topological ordering to accomodate an edge +/// to be added from SUnit X to SUnit Y. +void ScheduleDAGTopologicalSort::AddPred(SUnit *Y, SUnit *X) { + int UpperBound, LowerBound; + LowerBound = Node2Index[Y->NodeNum]; + UpperBound = Node2Index[X->NodeNum]; + bool HasLoop = false; + // Is Ord(X) < Ord(Y) ? + if (LowerBound < UpperBound) { + // Update the topological order. + Visited.reset(); + DFS(Y, UpperBound, HasLoop); + assert(!HasLoop && "Inserted edge creates a loop!"); + // Recompute topological indexes. + Shift(Visited, LowerBound, UpperBound); + } +} + +/// RemovePred - Updates the topological ordering to accomodate an +/// an edge to be removed from the specified node N from the predecessors +/// of the current node M. +void ScheduleDAGTopologicalSort::RemovePred(SUnit *M, SUnit *N) { + // InitDAGTopologicalSorting(); +} + +/// DFS - Make a DFS traversal to mark all nodes reachable from SU and mark +/// all nodes affected by the edge insertion. These nodes will later get new +/// topological indexes by means of the Shift method. +void ScheduleDAGTopologicalSort::DFS(const SUnit *SU, int UpperBound, + bool& HasLoop) { + std::vector WorkList; + WorkList.reserve(SUnits.size()); + + WorkList.push_back(SU); + do { + SU = WorkList.back(); + WorkList.pop_back(); + Visited.set(SU->NodeNum); + for (int I = SU->Succs.size()-1; I >= 0; --I) { + int s = SU->Succs[I].getSUnit()->NodeNum; + if (Node2Index[s] == UpperBound) { + HasLoop = true; + return; + } + // Visit successors if not already and in affected region. + if (!Visited.test(s) && Node2Index[s] < UpperBound) { + WorkList.push_back(SU->Succs[I].getSUnit()); + } + } + } while (!WorkList.empty()); +} + +/// Shift - Renumber the nodes so that the topological ordering is +/// preserved. +void ScheduleDAGTopologicalSort::Shift(BitVector& Visited, int LowerBound, + int UpperBound) { + std::vector L; + int shift = 0; + int i; + + for (i = LowerBound; i <= UpperBound; ++i) { + // w is node at topological index i. + int w = Index2Node[i]; + if (Visited.test(w)) { + // Unmark. + Visited.reset(w); + L.push_back(w); + shift = shift + 1; + } else { + Allocate(w, i - shift); + } + } + + for (unsigned j = 0; j < L.size(); ++j) { + Allocate(L[j], i - shift); + i = i + 1; + } +} + + +/// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will +/// create a cycle. +bool ScheduleDAGTopologicalSort::WillCreateCycle(SUnit *SU, SUnit *TargetSU) { + if (IsReachable(TargetSU, SU)) + return true; + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) + if (I->isAssignedRegDep() && + IsReachable(TargetSU, I->getSUnit())) + return true; + return false; +} + +/// IsReachable - Checks if SU is reachable from TargetSU. +bool ScheduleDAGTopologicalSort::IsReachable(const SUnit *SU, + const SUnit *TargetSU) { + // If insertion of the edge SU->TargetSU would create a cycle + // then there is a path from TargetSU to SU. + int UpperBound, LowerBound; + LowerBound = Node2Index[TargetSU->NodeNum]; + UpperBound = Node2Index[SU->NodeNum]; + bool HasLoop = false; + // Is Ord(TargetSU) < Ord(SU) ? + if (LowerBound < UpperBound) { + Visited.reset(); + // There may be a path from TargetSU to SU. Check for it. + DFS(TargetSU, UpperBound, HasLoop); + } + return HasLoop; +} + +/// Allocate - assign the topological index to the node n. +void ScheduleDAGTopologicalSort::Allocate(int n, int index) { + Node2Index[n] = index; + Index2Node[index] = n; +} + +ScheduleDAGTopologicalSort::ScheduleDAGTopologicalSort( + std::vector &sunits) + : SUnits(sunits) {} + +ScheduleHazardRecognizer::~ScheduleHazardRecognizer() {} diff --git a/lib/CodeGen/ScheduleDAGEmit.cpp b/lib/CodeGen/ScheduleDAGEmit.cpp new file mode 100644 index 000000000000..770f5bbbdbb1 --- /dev/null +++ b/lib/CodeGen/ScheduleDAGEmit.cpp @@ -0,0 +1,71 @@ +//===---- ScheduleDAGEmit.cpp - Emit routines for the ScheduleDAG class ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the Emit routines for the ScheduleDAG class, which creates +// MachineInstrs according to the computed schedule. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "pre-RA-sched" +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" +using namespace llvm; + +void ScheduleDAG::AddMemOperand(MachineInstr *MI, const MachineMemOperand &MO) { + MI->addMemOperand(MF, MO); +} + +void ScheduleDAG::EmitNoop() { + TII->insertNoop(*BB, InsertPos); +} + +void ScheduleDAG::EmitPhysRegCopy(SUnit *SU, + DenseMap &VRBaseMap) { + for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) continue; // ignore chain preds + if (I->getSUnit()->CopyDstRC) { + // Copy to physical register. + DenseMap::iterator VRI = VRBaseMap.find(I->getSUnit()); + assert(VRI != VRBaseMap.end() && "Node emitted out of order - late"); + // Find the destination physical register. + unsigned Reg = 0; + for (SUnit::const_succ_iterator II = SU->Succs.begin(), + EE = SU->Succs.end(); II != EE; ++II) { + if (II->getReg()) { + Reg = II->getReg(); + break; + } + } + TII->copyRegToReg(*BB, InsertPos, Reg, VRI->second, + SU->CopyDstRC, SU->CopySrcRC); + } else { + // Copy from physical register. + assert(I->getReg() && "Unknown physical register!"); + unsigned VRBase = MRI.createVirtualRegister(SU->CopyDstRC); + bool isNew = VRBaseMap.insert(std::make_pair(SU, VRBase)).second; + isNew = isNew; // Silence compiler warning. + assert(isNew && "Node emitted out of order - early"); + TII->copyRegToReg(*BB, InsertPos, VRBase, I->getReg(), + SU->CopyDstRC, SU->CopySrcRC); + } + break; + } +} diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp new file mode 100644 index 000000000000..8e18b3d17fda --- /dev/null +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -0,0 +1,468 @@ +//===---- ScheduleDAGInstrs.cpp - MachineInstr Rescheduling ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the ScheduleDAGInstrs class, which implements re-scheduling +// of MachineInstrs. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "sched-instrs" +#include "ScheduleDAGInstrs.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtarget.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/SmallSet.h" +using namespace llvm; + +ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf, + const MachineLoopInfo &mli, + const MachineDominatorTree &mdt) + : ScheduleDAG(mf), MLI(mli), MDT(mdt), LoopRegs(MLI, MDT) {} + +/// Run - perform scheduling. +/// +void ScheduleDAGInstrs::Run(MachineBasicBlock *bb, + MachineBasicBlock::iterator begin, + MachineBasicBlock::iterator end, + unsigned endcount) { + BB = bb; + Begin = begin; + InsertPosIndex = endcount; + + ScheduleDAG::Run(bb, end); +} + +/// getOpcode - If this is an Instruction or a ConstantExpr, return the +/// opcode value. Otherwise return UserOp1. +static unsigned getOpcode(const Value *V) { + if (const Instruction *I = dyn_cast(V)) + return I->getOpcode(); + if (const ConstantExpr *CE = dyn_cast(V)) + return CE->getOpcode(); + // Use UserOp1 to mean there's no opcode. + return Instruction::UserOp1; +} + +/// getUnderlyingObjectFromInt - This is the function that does the work of +/// looking through basic ptrtoint+arithmetic+inttoptr sequences. +static const Value *getUnderlyingObjectFromInt(const Value *V) { + do { + if (const User *U = dyn_cast(V)) { + // If we find a ptrtoint, we can transfer control back to the + // regular getUnderlyingObjectFromInt. + if (getOpcode(U) == Instruction::PtrToInt) + return U->getOperand(0); + // If we find an add of a constant or a multiplied value, it's + // likely that the other operand will lead us to the base + // object. We don't have to worry about the case where the + // object address is somehow being computed bt the multiply, + // because our callers only care when the result is an + // identifibale object. + if (getOpcode(U) != Instruction::Add || + (!isa(U->getOperand(1)) && + getOpcode(U->getOperand(1)) != Instruction::Mul)) + return V; + V = U->getOperand(0); + } else { + return V; + } + assert(isa(V->getType()) && "Unexpected operand type!"); + } while (1); +} + +/// getUnderlyingObject - This is a wrapper around Value::getUnderlyingObject +/// and adds support for basic ptrtoint+arithmetic+inttoptr sequences. +static const Value *getUnderlyingObject(const Value *V) { + // First just call Value::getUnderlyingObject to let it do what it does. + do { + V = V->getUnderlyingObject(); + // If it found an inttoptr, use special code to continue climing. + if (getOpcode(V) != Instruction::IntToPtr) + break; + const Value *O = getUnderlyingObjectFromInt(cast(V)->getOperand(0)); + // If that succeeded in finding a pointer, continue the search. + if (!isa(O->getType())) + break; + V = O; + } while (1); + return V; +} + +/// getUnderlyingObjectForInstr - If this machine instr has memory reference +/// information and it can be tracked to a normal reference to a known +/// object, return the Value for that object. Otherwise return null. +static const Value *getUnderlyingObjectForInstr(const MachineInstr *MI) { + if (!MI->hasOneMemOperand() || + !MI->memoperands_begin()->getValue() || + MI->memoperands_begin()->isVolatile()) + return 0; + + const Value *V = MI->memoperands_begin()->getValue(); + if (!V) + return 0; + + V = getUnderlyingObject(V); + if (!isa(V) && !isIdentifiedObject(V)) + return 0; + + return V; +} + +void ScheduleDAGInstrs::StartBlock(MachineBasicBlock *BB) { + if (MachineLoop *ML = MLI.getLoopFor(BB)) + if (BB == ML->getLoopLatch()) { + MachineBasicBlock *Header = ML->getHeader(); + for (MachineBasicBlock::livein_iterator I = Header->livein_begin(), + E = Header->livein_end(); I != E; ++I) + LoopLiveInRegs.insert(*I); + LoopRegs.VisitLoop(ML); + } +} + +void ScheduleDAGInstrs::BuildSchedGraph() { + // We'll be allocating one SUnit for each instruction, plus one for + // the region exit node. + SUnits.reserve(BB->size()); + + // We build scheduling units by walking a block's instruction list from bottom + // to top. + + // Remember where a generic side-effecting instruction is as we procede. If + // ChainMMO is null, this is assumed to have arbitrary side-effects. If + // ChainMMO is non-null, then Chain makes only a single memory reference. + SUnit *Chain = 0; + MachineMemOperand *ChainMMO = 0; + + // Memory references to specific known memory locations are tracked so that + // they can be given more precise dependencies. + std::map MemDefs; + std::map > MemUses; + + // Check to see if the scheduler cares about latencies. + bool UnitLatencies = ForceUnitLatencies(); + + // Ask the target if address-backscheduling is desirable, and if so how much. + unsigned SpecialAddressLatency = + TM.getSubtarget().getSpecialAddressLatency(); + + // Walk the list of instructions, from bottom moving up. + for (MachineBasicBlock::iterator MII = InsertPos, MIE = Begin; + MII != MIE; --MII) { + MachineInstr *MI = prior(MII); + const TargetInstrDesc &TID = MI->getDesc(); + assert(!TID.isTerminator() && !MI->isLabel() && + "Cannot schedule terminators or labels!"); + // Create the SUnit for this MI. + SUnit *SU = NewSUnit(MI); + + // Assign the Latency field of SU using target-provided information. + if (UnitLatencies) + SU->Latency = 1; + else + ComputeLatency(SU); + + // Add register-based dependencies (data, anti, and output). + for (unsigned j = 0, n = MI->getNumOperands(); j != n; ++j) { + const MachineOperand &MO = MI->getOperand(j); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) continue; + + assert(TRI->isPhysicalRegister(Reg) && "Virtual register encountered!"); + std::vector &UseList = Uses[Reg]; + std::vector &DefList = Defs[Reg]; + // Optionally add output and anti dependencies. + // TODO: Using a latency of 1 here assumes there's no cost for + // reusing registers. + SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output; + for (unsigned i = 0, e = DefList.size(); i != e; ++i) { + SUnit *DefSU = DefList[i]; + if (DefSU != SU && + (Kind != SDep::Output || !MO.isDead() || + !DefSU->getInstr()->registerDefIsDead(Reg))) + DefSU->addPred(SDep(SU, Kind, /*Latency=*/1, /*Reg=*/Reg)); + } + for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { + std::vector &DefList = Defs[*Alias]; + for (unsigned i = 0, e = DefList.size(); i != e; ++i) { + SUnit *DefSU = DefList[i]; + if (DefSU != SU && + (Kind != SDep::Output || !MO.isDead() || + !DefSU->getInstr()->registerDefIsDead(Reg))) + DefSU->addPred(SDep(SU, Kind, /*Latency=*/1, /*Reg=*/ *Alias)); + } + } + + if (MO.isDef()) { + // Add any data dependencies. + unsigned DataLatency = SU->Latency; + for (unsigned i = 0, e = UseList.size(); i != e; ++i) { + SUnit *UseSU = UseList[i]; + if (UseSU != SU) { + unsigned LDataLatency = DataLatency; + // Optionally add in a special extra latency for nodes that + // feed addresses. + // TODO: Do this for register aliases too. + if (SpecialAddressLatency != 0 && !UnitLatencies) { + MachineInstr *UseMI = UseSU->getInstr(); + const TargetInstrDesc &UseTID = UseMI->getDesc(); + int RegUseIndex = UseMI->findRegisterUseOperandIdx(Reg); + assert(RegUseIndex >= 0 && "UseMI doesn's use register!"); + if ((UseTID.mayLoad() || UseTID.mayStore()) && + (unsigned)RegUseIndex < UseTID.getNumOperands() && + UseTID.OpInfo[RegUseIndex].isLookupPtrRegClass()) + LDataLatency += SpecialAddressLatency; + } + UseSU->addPred(SDep(SU, SDep::Data, LDataLatency, Reg)); + } + } + for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { + std::vector &UseList = Uses[*Alias]; + for (unsigned i = 0, e = UseList.size(); i != e; ++i) { + SUnit *UseSU = UseList[i]; + if (UseSU != SU) + UseSU->addPred(SDep(SU, SDep::Data, DataLatency, *Alias)); + } + } + + // If a def is going to wrap back around to the top of the loop, + // backschedule it. + if (!UnitLatencies && DefList.empty()) { + LoopDependencies::LoopDeps::iterator I = LoopRegs.Deps.find(Reg); + if (I != LoopRegs.Deps.end()) { + const MachineOperand *UseMO = I->second.first; + unsigned Count = I->second.second; + const MachineInstr *UseMI = UseMO->getParent(); + unsigned UseMOIdx = UseMO - &UseMI->getOperand(0); + const TargetInstrDesc &UseTID = UseMI->getDesc(); + // TODO: If we knew the total depth of the region here, we could + // handle the case where the whole loop is inside the region but + // is large enough that the isScheduleHigh trick isn't needed. + if (UseMOIdx < UseTID.getNumOperands()) { + // Currently, we only support scheduling regions consisting of + // single basic blocks. Check to see if the instruction is in + // the same region by checking to see if it has the same parent. + if (UseMI->getParent() != MI->getParent()) { + unsigned Latency = SU->Latency; + if (UseTID.OpInfo[UseMOIdx].isLookupPtrRegClass()) + Latency += SpecialAddressLatency; + // This is a wild guess as to the portion of the latency which + // will be overlapped by work done outside the current + // scheduling region. + Latency -= std::min(Latency, Count); + // Add the artifical edge. + ExitSU.addPred(SDep(SU, SDep::Order, Latency, + /*Reg=*/0, /*isNormalMemory=*/false, + /*isMustAlias=*/false, + /*isArtificial=*/true)); + } else if (SpecialAddressLatency > 0 && + UseTID.OpInfo[UseMOIdx].isLookupPtrRegClass()) { + // The entire loop body is within the current scheduling region + // and the latency of this operation is assumed to be greater + // than the latency of the loop. + // TODO: Recursively mark data-edge predecessors as + // isScheduleHigh too. + SU->isScheduleHigh = true; + } + } + LoopRegs.Deps.erase(I); + } + } + + UseList.clear(); + if (!MO.isDead()) + DefList.clear(); + DefList.push_back(SU); + } else { + UseList.push_back(SU); + } + } + + // Add chain dependencies. + // Note that isStoreToStackSlot and isLoadFromStackSLot are not usable + // after stack slots are lowered to actual addresses. + // TODO: Use an AliasAnalysis and do real alias-analysis queries, and + // produce more precise dependence information. + if (TID.isCall() || TID.hasUnmodeledSideEffects()) { + new_chain: + // This is the conservative case. Add dependencies on all memory + // references. + if (Chain) + Chain->addPred(SDep(SU, SDep::Order, SU->Latency)); + Chain = SU; + for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k) + PendingLoads[k]->addPred(SDep(SU, SDep::Order, SU->Latency)); + PendingLoads.clear(); + for (std::map::iterator I = MemDefs.begin(), + E = MemDefs.end(); I != E; ++I) { + I->second->addPred(SDep(SU, SDep::Order, SU->Latency)); + I->second = SU; + } + for (std::map >::iterator I = + MemUses.begin(), E = MemUses.end(); I != E; ++I) { + for (unsigned i = 0, e = I->second.size(); i != e; ++i) + I->second[i]->addPred(SDep(SU, SDep::Order, SU->Latency)); + I->second.clear(); + } + // See if it is known to just have a single memory reference. + MachineInstr *ChainMI = Chain->getInstr(); + const TargetInstrDesc &ChainTID = ChainMI->getDesc(); + if (!ChainTID.isCall() && + !ChainTID.hasUnmodeledSideEffects() && + ChainMI->hasOneMemOperand() && + !ChainMI->memoperands_begin()->isVolatile() && + ChainMI->memoperands_begin()->getValue()) + // We know that the Chain accesses one specific memory location. + ChainMMO = &*ChainMI->memoperands_begin(); + else + // Unknown memory accesses. Assume the worst. + ChainMMO = 0; + } else if (TID.mayStore()) { + if (const Value *V = getUnderlyingObjectForInstr(MI)) { + // A store to a specific PseudoSourceValue. Add precise dependencies. + // Handle the def in MemDefs, if there is one. + std::map::iterator I = MemDefs.find(V); + if (I != MemDefs.end()) { + I->second->addPred(SDep(SU, SDep::Order, SU->Latency, /*Reg=*/0, + /*isNormalMemory=*/true)); + I->second = SU; + } else { + MemDefs[V] = SU; + } + // Handle the uses in MemUses, if there are any. + std::map >::iterator J = + MemUses.find(V); + if (J != MemUses.end()) { + for (unsigned i = 0, e = J->second.size(); i != e; ++i) + J->second[i]->addPred(SDep(SU, SDep::Order, SU->Latency, /*Reg=*/0, + /*isNormalMemory=*/true)); + J->second.clear(); + } + // Add dependencies from all the PendingLoads, since without + // memoperands we must assume they alias anything. + for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k) + PendingLoads[k]->addPred(SDep(SU, SDep::Order, SU->Latency)); + // Add a general dependence too, if needed. + if (Chain) + Chain->addPred(SDep(SU, SDep::Order, SU->Latency)); + } else + // Treat all other stores conservatively. + goto new_chain; + } else if (TID.mayLoad()) { + if (TII->isInvariantLoad(MI)) { + // Invariant load, no chain dependencies needed! + } else if (const Value *V = getUnderlyingObjectForInstr(MI)) { + // A load from a specific PseudoSourceValue. Add precise dependencies. + std::map::iterator I = MemDefs.find(V); + if (I != MemDefs.end()) + I->second->addPred(SDep(SU, SDep::Order, SU->Latency, /*Reg=*/0, + /*isNormalMemory=*/true)); + MemUses[V].push_back(SU); + + // Add a general dependence too, if needed. + if (Chain && (!ChainMMO || + (ChainMMO->isStore() || ChainMMO->isVolatile()))) + Chain->addPred(SDep(SU, SDep::Order, SU->Latency)); + } else if (MI->hasVolatileMemoryRef()) { + // Treat volatile loads conservatively. Note that this includes + // cases where memoperand information is unavailable. + goto new_chain; + } else { + // A normal load. Depend on the general chain, as well as on + // all stores. In the absense of MachineMemOperand information, + // we can't even assume that the load doesn't alias well-behaved + // memory locations. + if (Chain) + Chain->addPred(SDep(SU, SDep::Order, SU->Latency)); + for (std::map::iterator I = MemDefs.begin(), + E = MemDefs.end(); I != E; ++I) + I->second->addPred(SDep(SU, SDep::Order, SU->Latency)); + PendingLoads.push_back(SU); + } + } + } + + for (int i = 0, e = TRI->getNumRegs(); i != e; ++i) { + Defs[i].clear(); + Uses[i].clear(); + } + PendingLoads.clear(); +} + +void ScheduleDAGInstrs::FinishBlock() { + // Nothing to do. +} + +void ScheduleDAGInstrs::ComputeLatency(SUnit *SU) { + const InstrItineraryData &InstrItins = TM.getInstrItineraryData(); + + // Compute the latency for the node. We use the sum of the latencies for + // all nodes flagged together into this SUnit. + SU->Latency = + InstrItins.getLatency(SU->getInstr()->getDesc().getSchedClass()); + + // Simplistic target-independent heuristic: assume that loads take + // extra time. + if (InstrItins.isEmpty()) + if (SU->getInstr()->getDesc().mayLoad()) + SU->Latency += 2; +} + +void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const { + SU->getInstr()->dump(); +} + +std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const { + std::string s; + raw_string_ostream oss(s); + if (SU == &EntrySU) + oss << ""; + else if (SU == &ExitSU) + oss << ""; + else + SU->getInstr()->print(oss); + return oss.str(); +} + +// EmitSchedule - Emit the machine code in scheduled order. +MachineBasicBlock *ScheduleDAGInstrs::EmitSchedule() { + // For MachineInstr-based scheduling, we're rescheduling the instructions in + // the block, so start by removing them from the block. + while (Begin != InsertPos) { + MachineBasicBlock::iterator I = Begin; + ++Begin; + BB->remove(I); + } + + // Then re-insert them according to the given schedule. + for (unsigned i = 0, e = Sequence.size(); i != e; i++) { + SUnit *SU = Sequence[i]; + if (!SU) { + // Null SUnit* is a noop. + EmitNoop(); + continue; + } + + BB->insert(InsertPos, SU->getInstr()); + } + + // Update the Begin iterator, as the first instruction in the block + // may have been scheduled later. + if (!Sequence.empty()) + Begin = Sequence[0]->getInstr(); + + return BB; +} diff --git a/lib/CodeGen/ScheduleDAGInstrs.h b/lib/CodeGen/ScheduleDAGInstrs.h new file mode 100644 index 000000000000..00d6268d1a14 --- /dev/null +++ b/lib/CodeGen/ScheduleDAGInstrs.h @@ -0,0 +1,184 @@ +//==- ScheduleDAGInstrs.h - MachineInstr Scheduling --------------*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the ScheduleDAGInstrs class, which implements +// scheduling for a MachineInstr-based dependency graph. +// +//===----------------------------------------------------------------------===// + +#ifndef SCHEDULEDAGINSTRS_H +#define SCHEDULEDAGINSTRS_H + +#include "llvm/ADT/SmallSet.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include + +namespace llvm { + class MachineLoopInfo; + class MachineDominatorTree; + + /// LoopDependencies - This class analyzes loop-oriented register + /// dependencies, which are used to guide scheduling decisions. + /// For example, loop induction variable increments should be + /// scheduled as soon as possible after the variable's last use. + /// + class VISIBILITY_HIDDEN LoopDependencies { + const MachineLoopInfo &MLI; + const MachineDominatorTree &MDT; + + public: + typedef std::map > + LoopDeps; + LoopDeps Deps; + + LoopDependencies(const MachineLoopInfo &mli, + const MachineDominatorTree &mdt) : + MLI(mli), MDT(mdt) {} + + /// VisitLoop - Clear out any previous state and analyze the given loop. + /// + void VisitLoop(const MachineLoop *Loop) { + Deps.clear(); + MachineBasicBlock *Header = Loop->getHeader(); + SmallSet LoopLiveIns; + for (MachineBasicBlock::livein_iterator LI = Header->livein_begin(), + LE = Header->livein_end(); LI != LE; ++LI) + LoopLiveIns.insert(*LI); + + const MachineDomTreeNode *Node = MDT.getNode(Header); + const MachineBasicBlock *MBB = Node->getBlock(); + assert(Loop->contains(MBB) && + "Loop does not contain header!"); + VisitRegion(Node, MBB, Loop, LoopLiveIns); + } + + private: + void VisitRegion(const MachineDomTreeNode *Node, + const MachineBasicBlock *MBB, + const MachineLoop *Loop, + const SmallSet &LoopLiveIns) { + unsigned Count = 0; + for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end(); + I != E; ++I, ++Count) { + const MachineInstr *MI = I; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isUse()) + continue; + unsigned MOReg = MO.getReg(); + if (LoopLiveIns.count(MOReg)) + Deps.insert(std::make_pair(MOReg, std::make_pair(&MO, Count))); + } + } + + const std::vector &Children = Node->getChildren(); + for (std::vector::const_iterator I = + Children.begin(), E = Children.end(); I != E; ++I) { + const MachineDomTreeNode *ChildNode = *I; + MachineBasicBlock *ChildBlock = ChildNode->getBlock(); + if (Loop->contains(ChildBlock)) + VisitRegion(ChildNode, ChildBlock, Loop, LoopLiveIns); + } + } + }; + + /// ScheduleDAGInstrs - A ScheduleDAG subclass for scheduling lists of + /// MachineInstrs. + class VISIBILITY_HIDDEN ScheduleDAGInstrs : public ScheduleDAG { + const MachineLoopInfo &MLI; + const MachineDominatorTree &MDT; + + /// Defs, Uses - Remember where defs and uses of each physical register + /// are as we iterate upward through the instructions. This is allocated + /// here instead of inside BuildSchedGraph to avoid the need for it to be + /// initialized and destructed for each block. + std::vector Defs[TargetRegisterInfo::FirstVirtualRegister]; + std::vector Uses[TargetRegisterInfo::FirstVirtualRegister]; + + /// PendingLoads - Remember where unknown loads are after the most recent + /// unknown store, as we iterate. As with Defs and Uses, this is here + /// to minimize construction/destruction. + std::vector PendingLoads; + + /// LoopRegs - Track which registers are used for loop-carried dependencies. + /// + LoopDependencies LoopRegs; + + /// LoopLiveInRegs - Track which regs are live into a loop, to help guide + /// back-edge-aware scheduling. + /// + SmallSet LoopLiveInRegs; + + public: + MachineBasicBlock *BB; // Current basic block + MachineBasicBlock::iterator Begin; // The beginning of the range to + // be scheduled. The range extends + // to InsertPos. + unsigned InsertPosIndex; // The index in BB of InsertPos. + + explicit ScheduleDAGInstrs(MachineFunction &mf, + const MachineLoopInfo &mli, + const MachineDominatorTree &mdt); + + virtual ~ScheduleDAGInstrs() {} + + /// NewSUnit - Creates a new SUnit and return a ptr to it. + /// + SUnit *NewSUnit(MachineInstr *MI) { +#ifndef NDEBUG + const SUnit *Addr = SUnits.empty() ? 0 : &SUnits[0]; +#endif + SUnits.push_back(SUnit(MI, (unsigned)SUnits.size())); + assert((Addr == 0 || Addr == &SUnits[0]) && + "SUnits std::vector reallocated on the fly!"); + SUnits.back().OrigNode = &SUnits.back(); + return &SUnits.back(); + } + + /// Run - perform scheduling. + /// + void Run(MachineBasicBlock *bb, + MachineBasicBlock::iterator begin, + MachineBasicBlock::iterator end, + unsigned endindex); + + /// BuildSchedGraph - Build SUnits from the MachineBasicBlock that we are + /// input. + virtual void BuildSchedGraph(); + + /// ComputeLatency - Compute node latency. + /// + virtual void ComputeLatency(SUnit *SU); + + virtual MachineBasicBlock *EmitSchedule(); + + /// StartBlock - Prepare to perform scheduling in the given block. + /// + virtual void StartBlock(MachineBasicBlock *BB); + + /// Schedule - Order nodes according to selected style, filling + /// in the Sequence member. + /// + virtual void Schedule() = 0; + + /// FinishBlock - Clean up after scheduling in the given block. + /// + virtual void FinishBlock(); + + virtual void dumpNode(const SUnit *SU) const; + + virtual std::string getGraphNodeLabel(const SUnit *SU) const; + }; +} + +#endif diff --git a/lib/CodeGen/ScheduleDAGPrinter.cpp b/lib/CodeGen/ScheduleDAGPrinter.cpp new file mode 100644 index 000000000000..594c24d11d1e --- /dev/null +++ b/lib/CodeGen/ScheduleDAGPrinter.cpp @@ -0,0 +1,97 @@ +//===-- ScheduleDAGPrinter.cpp - Implement ScheduleDAG::viewGraph() -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the ScheduleDAG::viewGraph method. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Constants.h" +#include "llvm/Function.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/GraphWriter.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Config/config.h" +#include +using namespace llvm; + +namespace llvm { + template<> + struct DOTGraphTraits : public DefaultDOTGraphTraits { + static std::string getGraphName(const ScheduleDAG *G) { + return G->MF.getFunction()->getName(); + } + + static bool renderGraphFromBottomUp() { + return true; + } + + static bool hasNodeAddressLabel(const SUnit *Node, + const ScheduleDAG *Graph) { + return true; + } + + /// If you want to override the dot attributes printed for a particular + /// edge, override this method. + static std::string getEdgeAttributes(const SUnit *Node, + SUnitIterator EI) { + if (EI.isArtificialDep()) + return "color=cyan,style=dashed"; + if (EI.isCtrlDep()) + return "color=blue,style=dashed"; + return ""; + } + + + static std::string getNodeLabel(const SUnit *Node, + const ScheduleDAG *Graph); + static std::string getNodeAttributes(const SUnit *N, + const ScheduleDAG *Graph) { + return "shape=Mrecord"; + } + + static void addCustomGraphFeatures(ScheduleDAG *G, + GraphWriter &GW) { + return G->addCustomGraphFeatures(GW); + } + }; +} + +std::string DOTGraphTraits::getNodeLabel(const SUnit *SU, + const ScheduleDAG *G) { + return G->getGraphNodeLabel(SU); +} + +/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG +/// rendered using 'dot'. +/// +void ScheduleDAG::viewGraph() { +// This code is only for debugging! +#ifndef NDEBUG + if (BB->getBasicBlock()) + ViewGraph(this, "dag." + MF.getFunction()->getName(), + "Scheduling-Units Graph for " + MF.getFunction()->getName() + ':' + + BB->getBasicBlock()->getName()); + else + ViewGraph(this, "dag." + MF.getFunction()->getName(), + "Scheduling-Units Graph for " + MF.getFunction()->getName()); +#else + cerr << "ScheduleDAG::viewGraph is only available in debug builds on " + << "systems with Graphviz or gv!\n"; +#endif // NDEBUG +} diff --git a/lib/CodeGen/SelectionDAG/CMakeLists.txt b/lib/CodeGen/SelectionDAG/CMakeLists.txt new file mode 100644 index 000000000000..9ea59ea80c61 --- /dev/null +++ b/lib/CodeGen/SelectionDAG/CMakeLists.txt @@ -0,0 +1,22 @@ +add_llvm_library(LLVMSelectionDAG + CallingConvLower.cpp + DAGCombiner.cpp + FastISel.cpp + LegalizeDAG.cpp + LegalizeFloatTypes.cpp + LegalizeIntegerTypes.cpp + LegalizeTypes.cpp + LegalizeTypesGeneric.cpp + LegalizeVectorOps.cpp + LegalizeVectorTypes.cpp + ScheduleDAGSDNodes.cpp + ScheduleDAGSDNodesEmit.cpp + ScheduleDAGFast.cpp + ScheduleDAGList.cpp + ScheduleDAGRRList.cpp + SelectionDAGBuild.cpp + SelectionDAG.cpp + SelectionDAGISel.cpp + SelectionDAGPrinter.cpp + TargetLowering.cpp + ) diff --git a/lib/CodeGen/SelectionDAG/CallingConvLower.cpp b/lib/CodeGen/SelectionDAG/CallingConvLower.cpp new file mode 100644 index 000000000000..7cd2b73e8704 --- /dev/null +++ b/lib/CodeGen/SelectionDAG/CallingConvLower.cpp @@ -0,0 +1,148 @@ +//===-- CallingConvLower.cpp - Calling Conventions ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the CCState class, used for lowering and implementing +// calling conventions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetMachine.h" +using namespace llvm; + +CCState::CCState(unsigned CC, bool isVarArg, const TargetMachine &tm, + SmallVector &locs) + : CallingConv(CC), IsVarArg(isVarArg), TM(tm), + TRI(*TM.getRegisterInfo()), Locs(locs) { + // No stack is used. + StackOffset = 0; + + UsedRegs.resize((TRI.getNumRegs()+31)/32); +} + +// HandleByVal - Allocate a stack slot large enough to pass an argument by +// value. The size and alignment information of the argument is encoded in its +// parameter attribute. +void CCState::HandleByVal(unsigned ValNo, MVT ValVT, + MVT LocVT, CCValAssign::LocInfo LocInfo, + int MinSize, int MinAlign, + ISD::ArgFlagsTy ArgFlags) { + unsigned Align = ArgFlags.getByValAlign(); + unsigned Size = ArgFlags.getByValSize(); + if (MinSize > (int)Size) + Size = MinSize; + if (MinAlign > (int)Align) + Align = MinAlign; + unsigned Offset = AllocateStack(Size, Align); + + addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); +} + +/// MarkAllocated - Mark a register and all of its aliases as allocated. +void CCState::MarkAllocated(unsigned Reg) { + UsedRegs[Reg/32] |= 1 << (Reg&31); + + if (const unsigned *RegAliases = TRI.getAliasSet(Reg)) + for (; (Reg = *RegAliases); ++RegAliases) + UsedRegs[Reg/32] |= 1 << (Reg&31); +} + +/// AnalyzeFormalArguments - Analyze an ISD::FORMAL_ARGUMENTS node, +/// incorporating info about the formals into this state. +void CCState::AnalyzeFormalArguments(SDNode *TheArgs, CCAssignFn Fn) { + unsigned NumArgs = TheArgs->getNumValues()-1; + + for (unsigned i = 0; i != NumArgs; ++i) { + MVT ArgVT = TheArgs->getValueType(i); + ISD::ArgFlagsTy ArgFlags = + cast(TheArgs->getOperand(3+i))->getArgFlags(); + if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) { + cerr << "Formal argument #" << i << " has unhandled type " + << ArgVT.getMVTString() << "\n"; + abort(); + } + } +} + +/// AnalyzeReturn - Analyze the returned values of an ISD::RET node, +/// incorporating info about the result values into this state. +void CCState::AnalyzeReturn(SDNode *TheRet, CCAssignFn Fn) { + // Determine which register each value should be copied into. + for (unsigned i = 0, e = TheRet->getNumOperands() / 2; i != e; ++i) { + MVT VT = TheRet->getOperand(i*2+1).getValueType(); + ISD::ArgFlagsTy ArgFlags = + cast(TheRet->getOperand(i*2+2))->getArgFlags(); + if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this)){ + cerr << "Return operand #" << i << " has unhandled type " + << VT.getMVTString() << "\n"; + abort(); + } + } +} + + +/// AnalyzeCallOperands - Analyze an ISD::CALL node, incorporating info +/// about the passed values into this state. +void CCState::AnalyzeCallOperands(CallSDNode *TheCall, CCAssignFn Fn) { + unsigned NumOps = TheCall->getNumArgs(); + for (unsigned i = 0; i != NumOps; ++i) { + MVT ArgVT = TheCall->getArg(i).getValueType(); + ISD::ArgFlagsTy ArgFlags = TheCall->getArgFlags(i); + if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) { + cerr << "Call operand #" << i << " has unhandled type " + << ArgVT.getMVTString() << "\n"; + abort(); + } + } +} + +/// AnalyzeCallOperands - Same as above except it takes vectors of types +/// and argument flags. +void CCState::AnalyzeCallOperands(SmallVectorImpl &ArgVTs, + SmallVectorImpl &Flags, + CCAssignFn Fn) { + unsigned NumOps = ArgVTs.size(); + for (unsigned i = 0; i != NumOps; ++i) { + MVT ArgVT = ArgVTs[i]; + ISD::ArgFlagsTy ArgFlags = Flags[i]; + if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) { + cerr << "Call operand #" << i << " has unhandled type " + << ArgVT.getMVTString() << "\n"; + abort(); + } + } +} + +/// AnalyzeCallResult - Analyze the return values of an ISD::CALL node, +/// incorporating info about the passed values into this state. +void CCState::AnalyzeCallResult(CallSDNode *TheCall, CCAssignFn Fn) { + for (unsigned i = 0, e = TheCall->getNumRetVals(); i != e; ++i) { + MVT VT = TheCall->getRetValType(i); + ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); + if (TheCall->isInreg()) + Flags.setInReg(); + if (Fn(i, VT, VT, CCValAssign::Full, Flags, *this)) { + cerr << "Call result #" << i << " has unhandled type " + << VT.getMVTString() << "\n"; + abort(); + } + } +} + +/// AnalyzeCallResult - Same as above except it's specialized for calls which +/// produce a single value. +void CCState::AnalyzeCallResult(MVT VT, CCAssignFn Fn) { + if (Fn(0, VT, VT, CCValAssign::Full, ISD::ArgFlagsTy(), *this)) { + cerr << "Call result has unhandled type " + << VT.getMVTString() << "\n"; + abort(); + } +} diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp new file mode 100644 index 000000000000..4c1710dd81fa --- /dev/null +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -0,0 +1,6203 @@ +//===-- DAGCombiner.cpp - Implement a DAG node combiner -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run +// both before and after the DAG is legalized. +// +// This pass is not a substitute for the LLVM IR instcombine pass. This pass is +// primarily intended to handle simplification opportunities that are implicit +// in the LLVM IR and exposed by the various codegen lowering phases. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "dagcombine" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/DerivedTypes.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" +#include +#include +using namespace llvm; + +STATISTIC(NodesCombined , "Number of dag nodes combined"); +STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created"); +STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created"); +STATISTIC(OpsNarrowed , "Number of load/op/store narrowed"); + +namespace { + static cl::opt + CombinerAA("combiner-alias-analysis", cl::Hidden, + cl::desc("Turn on alias analysis during testing")); + + static cl::opt + CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden, + cl::desc("Include global information in alias analysis")); + +//------------------------------ DAGCombiner ---------------------------------// + + class VISIBILITY_HIDDEN DAGCombiner { + SelectionDAG &DAG; + const TargetLowering &TLI; + CombineLevel Level; + CodeGenOpt::Level OptLevel; + bool LegalOperations; + bool LegalTypes; + + // Worklist of all of the nodes that need to be simplified. + std::vector WorkList; + + // AA - Used for DAG load/store alias analysis. + AliasAnalysis &AA; + + /// AddUsersToWorkList - When an instruction is simplified, add all users of + /// the instruction to the work lists because they might get more simplified + /// now. + /// + void AddUsersToWorkList(SDNode *N) { + for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); + UI != UE; ++UI) + AddToWorkList(*UI); + } + + /// visit - call the node-specific routine that knows how to fold each + /// particular type of node. + SDValue visit(SDNode *N); + + public: + /// AddToWorkList - Add to the work list making sure it's instance is at the + /// the back (next to be processed.) + void AddToWorkList(SDNode *N) { + removeFromWorkList(N); + WorkList.push_back(N); + } + + /// removeFromWorkList - remove all instances of N from the worklist. + /// + void removeFromWorkList(SDNode *N) { + WorkList.erase(std::remove(WorkList.begin(), WorkList.end(), N), + WorkList.end()); + } + + SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, + bool AddTo = true); + + SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) { + return CombineTo(N, &Res, 1, AddTo); + } + + SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1, + bool AddTo = true) { + SDValue To[] = { Res0, Res1 }; + return CombineTo(N, To, 2, AddTo); + } + + void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO); + + private: + + /// SimplifyDemandedBits - Check the specified integer node value to see if + /// it can be simplified or if things it uses can be simplified by bit + /// propagation. If so, return true. + bool SimplifyDemandedBits(SDValue Op) { + APInt Demanded = APInt::getAllOnesValue(Op.getValueSizeInBits()); + return SimplifyDemandedBits(Op, Demanded); + } + + bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded); + + bool CombineToPreIndexedLoadStore(SDNode *N); + bool CombineToPostIndexedLoadStore(SDNode *N); + + + /// combine - call the node-specific routine that knows how to fold each + /// particular type of node. If that doesn't do anything, try the + /// target-specific DAG combines. + SDValue combine(SDNode *N); + + // Visitation implementation - Implement dag node combining for different + // node types. The semantics are as follows: + // Return Value: + // SDValue.getNode() == 0 - No change was made + // SDValue.getNode() == N - N was replaced, is dead and has been handled. + // otherwise - N should be replaced by the returned Operand. + // + SDValue visitTokenFactor(SDNode *N); + SDValue visitMERGE_VALUES(SDNode *N); + SDValue visitADD(SDNode *N); + SDValue visitSUB(SDNode *N); + SDValue visitADDC(SDNode *N); + SDValue visitADDE(SDNode *N); + SDValue visitMUL(SDNode *N); + SDValue visitSDIV(SDNode *N); + SDValue visitUDIV(SDNode *N); + SDValue visitSREM(SDNode *N); + SDValue visitUREM(SDNode *N); + SDValue visitMULHU(SDNode *N); + SDValue visitMULHS(SDNode *N); + SDValue visitSMUL_LOHI(SDNode *N); + SDValue visitUMUL_LOHI(SDNode *N); + SDValue visitSDIVREM(SDNode *N); + SDValue visitUDIVREM(SDNode *N); + SDValue visitAND(SDNode *N); + SDValue visitOR(SDNode *N); + SDValue visitXOR(SDNode *N); + SDValue SimplifyVBinOp(SDNode *N); + SDValue visitSHL(SDNode *N); + SDValue visitSRA(SDNode *N); + SDValue visitSRL(SDNode *N); + SDValue visitCTLZ(SDNode *N); + SDValue visitCTTZ(SDNode *N); + SDValue visitCTPOP(SDNode *N); + SDValue visitSELECT(SDNode *N); + SDValue visitSELECT_CC(SDNode *N); + SDValue visitSETCC(SDNode *N); + SDValue visitSIGN_EXTEND(SDNode *N); + SDValue visitZERO_EXTEND(SDNode *N); + SDValue visitANY_EXTEND(SDNode *N); + SDValue visitSIGN_EXTEND_INREG(SDNode *N); + SDValue visitTRUNCATE(SDNode *N); + SDValue visitBIT_CONVERT(SDNode *N); + SDValue visitBUILD_PAIR(SDNode *N); + SDValue visitFADD(SDNode *N); + SDValue visitFSUB(SDNode *N); + SDValue visitFMUL(SDNode *N); + SDValue visitFDIV(SDNode *N); + SDValue visitFREM(SDNode *N); + SDValue visitFCOPYSIGN(SDNode *N); + SDValue visitSINT_TO_FP(SDNode *N); + SDValue visitUINT_TO_FP(SDNode *N); + SDValue visitFP_TO_SINT(SDNode *N); + SDValue visitFP_TO_UINT(SDNode *N); + SDValue visitFP_ROUND(SDNode *N); + SDValue visitFP_ROUND_INREG(SDNode *N); + SDValue visitFP_EXTEND(SDNode *N); + SDValue visitFNEG(SDNode *N); + SDValue visitFABS(SDNode *N); + SDValue visitBRCOND(SDNode *N); + SDValue visitBR_CC(SDNode *N); + SDValue visitLOAD(SDNode *N); + SDValue visitSTORE(SDNode *N); + SDValue visitINSERT_VECTOR_ELT(SDNode *N); + SDValue visitEXTRACT_VECTOR_ELT(SDNode *N); + SDValue visitBUILD_VECTOR(SDNode *N); + SDValue visitCONCAT_VECTORS(SDNode *N); + SDValue visitVECTOR_SHUFFLE(SDNode *N); + + SDValue XformToShuffleWithZero(SDNode *N); + SDValue ReassociateOps(unsigned Opc, DebugLoc DL, SDValue LHS, SDValue RHS); + + SDValue visitShiftByConstant(SDNode *N, unsigned Amt); + + bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS); + SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N); + SDValue SimplifySelect(DebugLoc DL, SDValue N0, SDValue N1, SDValue N2); + SDValue SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, SDValue N2, + SDValue N3, ISD::CondCode CC, + bool NotExtCompare = false); + SDValue SimplifySetCC(MVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, + DebugLoc DL, bool foldBooleans = true); + SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, + unsigned HiOp); + SDValue CombineConsecutiveLoads(SDNode *N, MVT VT); + SDValue ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *, MVT); + SDValue BuildSDIV(SDNode *N); + SDValue BuildUDIV(SDNode *N); + SDNode *MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL); + SDValue ReduceLoadWidth(SDNode *N); + SDValue ReduceLoadOpStoreWidth(SDNode *N); + + SDValue GetDemandedBits(SDValue V, const APInt &Mask); + + /// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes, + /// looking for aliasing nodes and adding them to the Aliases vector. + void GatherAllAliases(SDNode *N, SDValue OriginalChain, + SmallVector &Aliases); + + /// isAlias - Return true if there is any possibility that the two addresses + /// overlap. + bool isAlias(SDValue Ptr1, int64_t Size1, + const Value *SrcValue1, int SrcValueOffset1, + SDValue Ptr2, int64_t Size2, + const Value *SrcValue2, int SrcValueOffset2) const; + + /// FindAliasInfo - Extracts the relevant alias information from the memory + /// node. Returns true if the operand was a load. + bool FindAliasInfo(SDNode *N, + SDValue &Ptr, int64_t &Size, + const Value *&SrcValue, int &SrcValueOffset) const; + + /// FindBetterChain - Walk up chain skipping non-aliasing memory nodes, + /// looking for a better chain (aliasing node.) + SDValue FindBetterChain(SDNode *N, SDValue Chain); + + /// getShiftAmountTy - Returns a type large enough to hold any valid + /// shift amount - before type legalization these can be huge. + MVT getShiftAmountTy() { + return LegalTypes ? TLI.getShiftAmountTy() : TLI.getPointerTy(); + } + +public: + DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL) + : DAG(D), + TLI(D.getTargetLoweringInfo()), + Level(Unrestricted), + OptLevel(OL), + LegalOperations(false), + LegalTypes(false), + AA(A) {} + + /// Run - runs the dag combiner on all nodes in the work list + void Run(CombineLevel AtLevel); + }; +} + + +namespace { +/// WorkListRemover - This class is a DAGUpdateListener that removes any deleted +/// nodes from the worklist. +class VISIBILITY_HIDDEN WorkListRemover : + public SelectionDAG::DAGUpdateListener { + DAGCombiner &DC; +public: + explicit WorkListRemover(DAGCombiner &dc) : DC(dc) {} + + virtual void NodeDeleted(SDNode *N, SDNode *E) { + DC.removeFromWorkList(N); + } + + virtual void NodeUpdated(SDNode *N) { + // Ignore updates. + } +}; +} + +//===----------------------------------------------------------------------===// +// TargetLowering::DAGCombinerInfo implementation +//===----------------------------------------------------------------------===// + +void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) { + ((DAGCombiner*)DC)->AddToWorkList(N); +} + +SDValue TargetLowering::DAGCombinerInfo:: +CombineTo(SDNode *N, const std::vector &To, bool AddTo) { + return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo); +} + +SDValue TargetLowering::DAGCombinerInfo:: +CombineTo(SDNode *N, SDValue Res, bool AddTo) { + return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo); +} + + +SDValue TargetLowering::DAGCombinerInfo:: +CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) { + return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo); +} + +void TargetLowering::DAGCombinerInfo:: +CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { + return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO); +} + +//===----------------------------------------------------------------------===// +// Helper Functions +//===----------------------------------------------------------------------===// + +/// isNegatibleForFree - Return 1 if we can compute the negated form of the +/// specified expression for the same cost as the expression itself, or 2 if we +/// can compute the negated form more cheaply than the expression itself. +static char isNegatibleForFree(SDValue Op, bool LegalOperations, + unsigned Depth = 0) { + // No compile time optimizations on this type. + if (Op.getValueType() == MVT::ppcf128) + return 0; + + // fneg is removable even if it has multiple uses. + if (Op.getOpcode() == ISD::FNEG) return 2; + + // Don't allow anything with multiple uses. + if (!Op.hasOneUse()) return 0; + + // Don't recurse exponentially. + if (Depth > 6) return 0; + + switch (Op.getOpcode()) { + default: return false; + case ISD::ConstantFP: + // Don't invert constant FP values after legalize. The negated constant + // isn't necessarily legal. + return LegalOperations ? 0 : 1; + case ISD::FADD: + // FIXME: determine better conditions for this xform. + if (!UnsafeFPMath) return 0; + + // fold (fsub (fadd A, B)) -> (fsub (fneg A), B) + if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1)) + return V; + // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) + return isNegatibleForFree(Op.getOperand(1), LegalOperations, Depth+1); + case ISD::FSUB: + // We can't turn -(A-B) into B-A when we honor signed zeros. + if (!UnsafeFPMath) return 0; + + // fold (fneg (fsub A, B)) -> (fsub B, A) + return 1; + + case ISD::FMUL: + case ISD::FDIV: + if (HonorSignDependentRoundingFPMath()) return 0; + + // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y)) + if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1)) + return V; + + return isNegatibleForFree(Op.getOperand(1), LegalOperations, Depth+1); + + case ISD::FP_EXTEND: + case ISD::FP_ROUND: + case ISD::FSIN: + return isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1); + } +} + +/// GetNegatedExpression - If isNegatibleForFree returns true, this function +/// returns the newly negated expression. +static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, + bool LegalOperations, unsigned Depth = 0) { + // fneg is removable even if it has multiple uses. + if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0); + + // Don't allow anything with multiple uses. + assert(Op.hasOneUse() && "Unknown reuse!"); + + assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree"); + switch (Op.getOpcode()) { + default: assert(0 && "Unknown code"); + case ISD::ConstantFP: { + APFloat V = cast(Op)->getValueAPF(); + V.changeSign(); + return DAG.getConstantFP(V, Op.getValueType()); + } + case ISD::FADD: + // FIXME: determine better conditions for this xform. + assert(UnsafeFPMath); + + // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) + if (isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1)) + return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(), + GetNegatedExpression(Op.getOperand(0), DAG, + LegalOperations, Depth+1), + Op.getOperand(1)); + // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) + return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(), + GetNegatedExpression(Op.getOperand(1), DAG, + LegalOperations, Depth+1), + Op.getOperand(0)); + case ISD::FSUB: + // We can't turn -(A-B) into B-A when we honor signed zeros. + assert(UnsafeFPMath); + + // fold (fneg (fsub 0, B)) -> B + if (ConstantFPSDNode *N0CFP = dyn_cast(Op.getOperand(0))) + if (N0CFP->getValueAPF().isZero()) + return Op.getOperand(1); + + // fold (fneg (fsub A, B)) -> (fsub B, A) + return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(), + Op.getOperand(1), Op.getOperand(0)); + + case ISD::FMUL: + case ISD::FDIV: + assert(!HonorSignDependentRoundingFPMath()); + + // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) + if (isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1)) + return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(), + GetNegatedExpression(Op.getOperand(0), DAG, + LegalOperations, Depth+1), + Op.getOperand(1)); + + // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y)) + return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(), + Op.getOperand(0), + GetNegatedExpression(Op.getOperand(1), DAG, + LegalOperations, Depth+1)); + + case ISD::FP_EXTEND: + case ISD::FSIN: + return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(), + GetNegatedExpression(Op.getOperand(0), DAG, + LegalOperations, Depth+1)); + case ISD::FP_ROUND: + return DAG.getNode(ISD::FP_ROUND, Op.getDebugLoc(), Op.getValueType(), + GetNegatedExpression(Op.getOperand(0), DAG, + LegalOperations, Depth+1), + Op.getOperand(1)); + } +} + + +// isSetCCEquivalent - Return true if this node is a setcc, or is a select_cc +// that selects between the values 1 and 0, making it equivalent to a setcc. +// Also, set the incoming LHS, RHS, and CC references to the appropriate +// nodes based on the type of node we are checking. This simplifies life a +// bit for the callers. +static bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, + SDValue &CC) { + if (N.getOpcode() == ISD::SETCC) { + LHS = N.getOperand(0); + RHS = N.getOperand(1); + CC = N.getOperand(2); + return true; + } + if (N.getOpcode() == ISD::SELECT_CC && + N.getOperand(2).getOpcode() == ISD::Constant && + N.getOperand(3).getOpcode() == ISD::Constant && + cast(N.getOperand(2))->getAPIntValue() == 1 && + cast(N.getOperand(3))->isNullValue()) { + LHS = N.getOperand(0); + RHS = N.getOperand(1); + CC = N.getOperand(4); + return true; + } + return false; +} + +// isOneUseSetCC - Return true if this is a SetCC-equivalent operation with only +// one use. If this is true, it allows the users to invert the operation for +// free when it is profitable to do so. +static bool isOneUseSetCC(SDValue N) { + SDValue N0, N1, N2; + if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse()) + return true; + return false; +} + +SDValue DAGCombiner::ReassociateOps(unsigned Opc, DebugLoc DL, + SDValue N0, SDValue N1) { + MVT VT = N0.getValueType(); + if (N0.getOpcode() == Opc && isa(N0.getOperand(1))) { + if (isa(N1)) { + // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2)) + SDValue OpNode = + DAG.FoldConstantArithmetic(Opc, VT, + cast(N0.getOperand(1)), + cast(N1)); + return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode); + } else if (N0.hasOneUse()) { + // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one use + SDValue OpNode = DAG.getNode(Opc, N0.getDebugLoc(), VT, + N0.getOperand(0), N1); + AddToWorkList(OpNode.getNode()); + return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1)); + } + } + + if (N1.getOpcode() == Opc && isa(N1.getOperand(1))) { + if (isa(N0)) { + // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2)) + SDValue OpNode = + DAG.FoldConstantArithmetic(Opc, VT, + cast(N1.getOperand(1)), + cast(N0)); + return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode); + } else if (N1.hasOneUse()) { + // reassoc. (op y, (op x, c1)) -> (op (op x, y), c1) iff x+c1 has one use + SDValue OpNode = DAG.getNode(Opc, N0.getDebugLoc(), VT, + N1.getOperand(0), N0); + AddToWorkList(OpNode.getNode()); + return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1)); + } + } + + return SDValue(); +} + +SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, + bool AddTo) { + assert(N->getNumValues() == NumTo && "Broken CombineTo call!"); + ++NodesCombined; + DOUT << "\nReplacing.1 "; DEBUG(N->dump(&DAG)); + DOUT << "\nWith: "; DEBUG(To[0].getNode()->dump(&DAG)); + DOUT << " and " << NumTo-1 << " other values\n"; + DEBUG(for (unsigned i = 0, e = NumTo; i != e; ++i) + assert(N->getValueType(i) == To[i].getValueType() && + "Cannot combine value to value of different type!")); + WorkListRemover DeadNodes(*this); + DAG.ReplaceAllUsesWith(N, To, &DeadNodes); + + if (AddTo) { + // Push the new nodes and any users onto the worklist + for (unsigned i = 0, e = NumTo; i != e; ++i) { + if (To[i].getNode()) { + AddToWorkList(To[i].getNode()); + AddUsersToWorkList(To[i].getNode()); + } + } + } + + // Finally, if the node is now dead, remove it from the graph. The node + // may not be dead if the replacement process recursively simplified to + // something else needing this node. + if (N->use_empty()) { + // Nodes can be reintroduced into the worklist. Make sure we do not + // process a node that has been replaced. + removeFromWorkList(N); + + // Finally, since the node is now dead, remove it from the graph. + DAG.DeleteNode(N); + } + return SDValue(N, 0); +} + +void +DAGCombiner::CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt & + TLO) { + // Replace all uses. If any nodes become isomorphic to other nodes and + // are deleted, make sure to remove them from our worklist. + WorkListRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New, &DeadNodes); + + // Push the new node and any (possibly new) users onto the worklist. + AddToWorkList(TLO.New.getNode()); + AddUsersToWorkList(TLO.New.getNode()); + + // Finally, if the node is now dead, remove it from the graph. The node + // may not be dead if the replacement process recursively simplified to + // something else needing this node. + if (TLO.Old.getNode()->use_empty()) { + removeFromWorkList(TLO.Old.getNode()); + + // If the operands of this node are only used by the node, they will now + // be dead. Make sure to visit them first to delete dead nodes early. + for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands(); i != e; ++i) + if (TLO.Old.getNode()->getOperand(i).getNode()->hasOneUse()) + AddToWorkList(TLO.Old.getNode()->getOperand(i).getNode()); + + DAG.DeleteNode(TLO.Old.getNode()); + } +} + +/// SimplifyDemandedBits - Check the specified integer node value to see if +/// it can be simplified or if things it uses can be simplified by bit +/// propagation. If so, return true. +bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) { + TargetLowering::TargetLoweringOpt TLO(DAG); + APInt KnownZero, KnownOne; + if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO)) + return false; + + // Revisit the node. + AddToWorkList(Op.getNode()); + + // Replace the old value with the new one. + ++NodesCombined; + DOUT << "\nReplacing.2 "; DEBUG(TLO.Old.getNode()->dump(&DAG)); + DOUT << "\nWith: "; DEBUG(TLO.New.getNode()->dump(&DAG)); + DOUT << '\n'; + + CommitTargetLoweringOpt(TLO); + return true; +} + +//===----------------------------------------------------------------------===// +// Main DAG Combiner implementation +//===----------------------------------------------------------------------===// + +void DAGCombiner::Run(CombineLevel AtLevel) { + // set the instance variables, so that the various visit routines may use it. + Level = AtLevel; + LegalOperations = Level >= NoIllegalOperations; + LegalTypes = Level >= NoIllegalTypes; + + // Add all the dag nodes to the worklist. + WorkList.reserve(DAG.allnodes_size()); + for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), + E = DAG.allnodes_end(); I != E; ++I) + WorkList.push_back(I); + + // Create a dummy node (which is not added to allnodes), that adds a reference + // to the root node, preventing it from being deleted, and tracking any + // changes of the root. + HandleSDNode Dummy(DAG.getRoot()); + + // The root of the dag may dangle to deleted nodes until the dag combiner is + // done. Set it to null to avoid confusion. + DAG.setRoot(SDValue()); + + // while the worklist isn't empty, inspect the node on the end of it and + // try and combine it. + while (!WorkList.empty()) { + SDNode *N = WorkList.back(); + WorkList.pop_back(); + + // If N has no uses, it is dead. Make sure to revisit all N's operands once + // N is deleted from the DAG, since they too may now be dead or may have a + // reduced number of uses, allowing other xforms. + if (N->use_empty() && N != &Dummy) { + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + AddToWorkList(N->getOperand(i).getNode()); + + DAG.DeleteNode(N); + continue; + } + + SDValue RV = combine(N); + + if (RV.getNode() == 0) + continue; + + ++NodesCombined; + + // If we get back the same node we passed in, rather than a new node or + // zero, we know that the node must have defined multiple values and + // CombineTo was used. Since CombineTo takes care of the worklist + // mechanics for us, we have no work to do in this case. + if (RV.getNode() == N) + continue; + + assert(N->getOpcode() != ISD::DELETED_NODE && + RV.getNode()->getOpcode() != ISD::DELETED_NODE && + "Node was deleted but visit returned new node!"); + + DOUT << "\nReplacing.3 "; DEBUG(N->dump(&DAG)); + DOUT << "\nWith: "; DEBUG(RV.getNode()->dump(&DAG)); + DOUT << '\n'; + WorkListRemover DeadNodes(*this); + if (N->getNumValues() == RV.getNode()->getNumValues()) + DAG.ReplaceAllUsesWith(N, RV.getNode(), &DeadNodes); + else { + assert(N->getValueType(0) == RV.getValueType() && + N->getNumValues() == 1 && "Type mismatch"); + SDValue OpV = RV; + DAG.ReplaceAllUsesWith(N, &OpV, &DeadNodes); + } + + // Push the new node and any users onto the worklist + AddToWorkList(RV.getNode()); + AddUsersToWorkList(RV.getNode()); + + // Add any uses of the old node to the worklist in case this node is the + // last one that uses them. They may become dead after this node is + // deleted. + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + AddToWorkList(N->getOperand(i).getNode()); + + // Finally, if the node is now dead, remove it from the graph. The node + // may not be dead if the replacement process recursively simplified to + // something else needing this node. + if (N->use_empty()) { + // Nodes can be reintroduced into the worklist. Make sure we do not + // process a node that has been replaced. + removeFromWorkList(N); + + // Finally, since the node is now dead, remove it from the graph. + DAG.DeleteNode(N); + } + } + + // If the root changed (e.g. it was a dead load, update the root). + DAG.setRoot(Dummy.getValue()); +} + +SDValue DAGCombiner::visit(SDNode *N) { + switch(N->getOpcode()) { + default: break; + case ISD::TokenFactor: return visitTokenFactor(N); + case ISD::MERGE_VALUES: return visitMERGE_VALUES(N); + case ISD::ADD: return visitADD(N); + case ISD::SUB: return visitSUB(N); + case ISD::ADDC: return visitADDC(N); + case ISD::ADDE: return visitADDE(N); + case ISD::MUL: return visitMUL(N); + case ISD::SDIV: return visitSDIV(N); + case ISD::UDIV: return visitUDIV(N); + case ISD::SREM: return visitSREM(N); + case ISD::UREM: return visitUREM(N); + case ISD::MULHU: return visitMULHU(N); + case ISD::MULHS: return visitMULHS(N); + case ISD::SMUL_LOHI: return visitSMUL_LOHI(N); + case ISD::UMUL_LOHI: return visitUMUL_LOHI(N); + case ISD::SDIVREM: return visitSDIVREM(N); + case ISD::UDIVREM: return visitUDIVREM(N); + case ISD::AND: return visitAND(N); + case ISD::OR: return visitOR(N); + case ISD::XOR: return visitXOR(N); + case ISD::SHL: return visitSHL(N); + case ISD::SRA: return visitSRA(N); + case ISD::SRL: return visitSRL(N); + case ISD::CTLZ: return visitCTLZ(N); + case ISD::CTTZ: return visitCTTZ(N); + case ISD::CTPOP: return visitCTPOP(N); + case ISD::SELECT: return visitSELECT(N); + case ISD::SELECT_CC: return visitSELECT_CC(N); + case ISD::SETCC: return visitSETCC(N); + case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N); + case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N); + case ISD::ANY_EXTEND: return visitANY_EXTEND(N); + case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N); + case ISD::TRUNCATE: return visitTRUNCATE(N); + case ISD::BIT_CONVERT: return visitBIT_CONVERT(N); + case ISD::BUILD_PAIR: return visitBUILD_PAIR(N); + case ISD::FADD: return visitFADD(N); + case ISD::FSUB: return visitFSUB(N); + case ISD::FMUL: return visitFMUL(N); + case ISD::FDIV: return visitFDIV(N); + case ISD::FREM: return visitFREM(N); + case ISD::FCOPYSIGN: return visitFCOPYSIGN(N); + case ISD::SINT_TO_FP: return visitSINT_TO_FP(N); + case ISD::UINT_TO_FP: return visitUINT_TO_FP(N); + case ISD::FP_TO_SINT: return visitFP_TO_SINT(N); + case ISD::FP_TO_UINT: return visitFP_TO_UINT(N); + case ISD::FP_ROUND: return visitFP_ROUND(N); + case ISD::FP_ROUND_INREG: return visitFP_ROUND_INREG(N); + case ISD::FP_EXTEND: return visitFP_EXTEND(N); + case ISD::FNEG: return visitFNEG(N); + case ISD::FABS: return visitFABS(N); + case ISD::BRCOND: return visitBRCOND(N); + case ISD::BR_CC: return visitBR_CC(N); + case ISD::LOAD: return visitLOAD(N); + case ISD::STORE: return visitSTORE(N); + case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N); + case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N); + case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N); + case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N); + case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N); + } + return SDValue(); +} + +SDValue DAGCombiner::combine(SDNode *N) { + SDValue RV = visit(N); + + // If nothing happened, try a target-specific DAG combine. + if (RV.getNode() == 0) { + assert(N->getOpcode() != ISD::DELETED_NODE && + "Node was deleted but visit returned NULL!"); + + if (N->getOpcode() >= ISD::BUILTIN_OP_END || + TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) { + + // Expose the DAG combiner to the target combiner impls. + TargetLowering::DAGCombinerInfo + DagCombineInfo(DAG, Level == Unrestricted, false, this); + + RV = TLI.PerformDAGCombine(N, DagCombineInfo); + } + } + + // If N is a commutative binary node, try commuting it to enable more + // sdisel CSE. + if (RV.getNode() == 0 && + SelectionDAG::isCommutativeBinOp(N->getOpcode()) && + N->getNumValues() == 1) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + + // Constant operands are canonicalized to RHS. + if (isa(N0) || !isa(N1)) { + SDValue Ops[] = { N1, N0 }; + SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), + Ops, 2); + if (CSENode) + return SDValue(CSENode, 0); + } + } + + return RV; +} + +/// getInputChainForNode - Given a node, return its input chain if it has one, +/// otherwise return a null sd operand. +static SDValue getInputChainForNode(SDNode *N) { + if (unsigned NumOps = N->getNumOperands()) { + if (N->getOperand(0).getValueType() == MVT::Other) + return N->getOperand(0); + else if (N->getOperand(NumOps-1).getValueType() == MVT::Other) + return N->getOperand(NumOps-1); + for (unsigned i = 1; i < NumOps-1; ++i) + if (N->getOperand(i).getValueType() == MVT::Other) + return N->getOperand(i); + } + return SDValue(); +} + +SDValue DAGCombiner::visitTokenFactor(SDNode *N) { + // If N has two operands, where one has an input chain equal to the other, + // the 'other' chain is redundant. + if (N->getNumOperands() == 2) { + if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1)) + return N->getOperand(0); + if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0)) + return N->getOperand(1); + } + + SmallVector TFs; // List of token factors to visit. + SmallVector Ops; // Ops for replacing token factor. + SmallPtrSet SeenOps; + bool Changed = false; // If we should replace this token factor. + + // Start out with this token factor. + TFs.push_back(N); + + // Iterate through token factors. The TFs grows when new token factors are + // encountered. + for (unsigned i = 0; i < TFs.size(); ++i) { + SDNode *TF = TFs[i]; + + // Check each of the operands. + for (unsigned i = 0, ie = TF->getNumOperands(); i != ie; ++i) { + SDValue Op = TF->getOperand(i); + + switch (Op.getOpcode()) { + case ISD::EntryToken: + // Entry tokens don't need to be added to the list. They are + // rededundant. + Changed = true; + break; + + case ISD::TokenFactor: + if ((CombinerAA || Op.hasOneUse()) && + std::find(TFs.begin(), TFs.end(), Op.getNode()) == TFs.end()) { + // Queue up for processing. + TFs.push_back(Op.getNode()); + // Clean up in case the token factor is removed. + AddToWorkList(Op.getNode()); + Changed = true; + break; + } + // Fall thru + + default: + // Only add if it isn't already in the list. + if (SeenOps.insert(Op.getNode())) + Ops.push_back(Op); + else + Changed = true; + break; + } + } + } + + SDValue Result; + + // If we've change things around then replace token factor. + if (Changed) { + if (Ops.empty()) { + // The entry token is the only possible outcome. + Result = DAG.getEntryNode(); + } else { + // New and improved token factor. + Result = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), + MVT::Other, &Ops[0], Ops.size()); + } + + // Don't add users to work list. + return CombineTo(N, Result, false); + } + + return Result; +} + +/// MERGE_VALUES can always be eliminated. +SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) { + WorkListRemover DeadNodes(*this); + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i), + &DeadNodes); + removeFromWorkList(N); + DAG.DeleteNode(N); + return SDValue(N, 0); // Return N so it doesn't get rechecked! +} + +static +SDValue combineShlAddConstant(DebugLoc DL, SDValue N0, SDValue N1, + SelectionDAG &DAG) { + MVT VT = N0.getValueType(); + SDValue N00 = N0.getOperand(0); + SDValue N01 = N0.getOperand(1); + ConstantSDNode *N01C = dyn_cast(N01); + + if (N01C && N00.getOpcode() == ISD::ADD && N00.getNode()->hasOneUse() && + isa(N00.getOperand(1))) { + // fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantSDNode *N0C = dyn_cast(N0); + ConstantSDNode *N1C = dyn_cast(N1); + MVT VT = N0.getValueType(); + + // fold vector ops + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + } + + // fold (add x, undef) -> undef + if (N0.getOpcode() == ISD::UNDEF) + return N0; + if (N1.getOpcode() == ISD::UNDEF) + return N1; + // fold (add c1, c2) -> c1+c2 + if (N0C && N1C) + return DAG.FoldConstantArithmetic(ISD::ADD, VT, N0C, N1C); + // canonicalize constant to RHS + if (N0C && !N1C) + return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1, N0); + // fold (add x, 0) -> x + if (N1C && N1C->isNullValue()) + return N0; + // fold (add Sym, c) -> Sym+c + if (GlobalAddressSDNode *GA = dyn_cast(N0)) + if (!LegalOperations && TLI.isOffsetFoldingLegal(GA) && N1C && + GA->getOpcode() == ISD::GlobalAddress) + return DAG.getGlobalAddress(GA->getGlobal(), VT, + GA->getOffset() + + (uint64_t)N1C->getSExtValue()); + // fold ((c1-A)+c2) -> (c1+c2)-A + if (N1C && N0.getOpcode() == ISD::SUB) + if (ConstantSDNode *N0C = dyn_cast(N0.getOperand(0))) + return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, + DAG.getConstant(N1C->getAPIntValue()+ + N0C->getAPIntValue(), VT), + N0.getOperand(1)); + // reassociate add + SDValue RADD = ReassociateOps(ISD::ADD, N->getDebugLoc(), N0, N1); + if (RADD.getNode() != 0) + return RADD; + // fold ((0-A) + B) -> B-A + if (N0.getOpcode() == ISD::SUB && isa(N0.getOperand(0)) && + cast(N0.getOperand(0))->isNullValue()) + return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1, N0.getOperand(1)); + // fold (A + (0-B)) -> A-B + if (N1.getOpcode() == ISD::SUB && isa(N1.getOperand(0)) && + cast(N1.getOperand(0))->isNullValue()) + return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, N1.getOperand(1)); + // fold (A+(B-A)) -> B + if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1)) + return N1.getOperand(0); + // fold ((B-A)+A) -> B + if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1)) + return N0.getOperand(0); + // fold (A+(B-(A+C))) to (B-C) + if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD && + N0 == N1.getOperand(1).getOperand(0)) + return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1.getOperand(0), + N1.getOperand(1).getOperand(1)); + // fold (A+(B-(C+A))) to (B-C) + if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD && + N0 == N1.getOperand(1).getOperand(1)) + return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1.getOperand(0), + N1.getOperand(1).getOperand(0)); + // fold (A+((B-A)+or-C)) to (B+or-C) + if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) && + N1.getOperand(0).getOpcode() == ISD::SUB && + N0 == N1.getOperand(0).getOperand(1)) + return DAG.getNode(N1.getOpcode(), N->getDebugLoc(), VT, + N1.getOperand(0).getOperand(0), N1.getOperand(1)); + + // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant + if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) { + SDValue N00 = N0.getOperand(0); + SDValue N01 = N0.getOperand(1); + SDValue N10 = N1.getOperand(0); + SDValue N11 = N1.getOperand(1); + + if (isa(N00) || isa(N10)) + return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, + DAG.getNode(ISD::ADD, N0.getDebugLoc(), VT, N00, N10), + DAG.getNode(ISD::ADD, N1.getDebugLoc(), VT, N01, N11)); + } + + if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + + // fold (a+b) -> (a|b) iff a and b share no bits. + if (VT.isInteger() && !VT.isVector()) { + APInt LHSZero, LHSOne; + APInt RHSZero, RHSOne; + APInt Mask = APInt::getAllOnesValue(VT.getSizeInBits()); + DAG.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne); + + if (LHSZero.getBoolValue()) { + DAG.ComputeMaskedBits(N1, Mask, RHSZero, RHSOne); + + // If all possibly-set bits on the LHS are clear on the RHS, return an OR. + // If all possibly-set bits on the RHS are clear on the LHS, return an OR. + if ((RHSZero & (~LHSZero & Mask)) == (~LHSZero & Mask) || + (LHSZero & (~RHSZero & Mask)) == (~RHSZero & Mask)) + return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1); + } + } + + // fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<hasOneUse()) { + SDValue Result = combineShlAddConstant(N->getDebugLoc(), N0, N1, DAG); + if (Result.getNode()) return Result; + } + if (N1.getOpcode() == ISD::SHL && N1.getNode()->hasOneUse()) { + SDValue Result = combineShlAddConstant(N->getDebugLoc(), N1, N0, DAG); + if (Result.getNode()) return Result; + } + + return SDValue(); +} + +SDValue DAGCombiner::visitADDC(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantSDNode *N0C = dyn_cast(N0); + ConstantSDNode *N1C = dyn_cast(N1); + MVT VT = N0.getValueType(); + + // If the flag result is dead, turn this into an ADD. + if (N->hasNUsesOfValue(0, 1)) + return CombineTo(N, DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1, N0), + DAG.getNode(ISD::CARRY_FALSE, + N->getDebugLoc(), MVT::Flag)); + + // canonicalize constant to RHS. + if (N0C && !N1C) + return DAG.getNode(ISD::ADDC, N->getDebugLoc(), N->getVTList(), N1, N0); + + // fold (addc x, 0) -> x + no carry out + if (N1C && N1C->isNullValue()) + return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, + N->getDebugLoc(), MVT::Flag)); + + // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits. + APInt LHSZero, LHSOne; + APInt RHSZero, RHSOne; + APInt Mask = APInt::getAllOnesValue(VT.getSizeInBits()); + DAG.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne); + + if (LHSZero.getBoolValue()) { + DAG.ComputeMaskedBits(N1, Mask, RHSZero, RHSOne); + + // If all possibly-set bits on the LHS are clear on the RHS, return an OR. + // If all possibly-set bits on the RHS are clear on the LHS, return an OR. + if ((RHSZero & (~LHSZero & Mask)) == (~LHSZero & Mask) || + (LHSZero & (~RHSZero & Mask)) == (~RHSZero & Mask)) + return CombineTo(N, DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1), + DAG.getNode(ISD::CARRY_FALSE, + N->getDebugLoc(), MVT::Flag)); + } + + return SDValue(); +} + +SDValue DAGCombiner::visitADDE(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue CarryIn = N->getOperand(2); + ConstantSDNode *N0C = dyn_cast(N0); + ConstantSDNode *N1C = dyn_cast(N1); + + // canonicalize constant to RHS + if (N0C && !N1C) + return DAG.getNode(ISD::ADDE, N->getDebugLoc(), N->getVTList(), + N1, N0, CarryIn); + + // fold (adde x, y, false) -> (addc x, y) + if (CarryIn.getOpcode() == ISD::CARRY_FALSE) + return DAG.getNode(ISD::ADDC, N->getDebugLoc(), N->getVTList(), N1, N0); + + return SDValue(); +} + +SDValue DAGCombiner::visitSUB(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantSDNode *N0C = dyn_cast(N0.getNode()); + ConstantSDNode *N1C = dyn_cast(N1.getNode()); + MVT VT = N0.getValueType(); + + // fold vector ops + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + } + + // fold (sub x, x) -> 0 + if (N0 == N1) + return DAG.getConstant(0, N->getValueType(0)); + // fold (sub c1, c2) -> c1-c2 + if (N0C && N1C) + return DAG.FoldConstantArithmetic(ISD::SUB, VT, N0C, N1C); + // fold (sub x, c) -> (add x, -c) + if (N1C) + return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0, + DAG.getConstant(-N1C->getAPIntValue(), VT)); + // fold (A+B)-A -> B + if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1) + return N0.getOperand(1); + // fold (A+B)-B -> A + if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1) + return N0.getOperand(0); + // fold ((A+(B+or-C))-B) -> A+or-C + if (N0.getOpcode() == ISD::ADD && + (N0.getOperand(1).getOpcode() == ISD::SUB || + N0.getOperand(1).getOpcode() == ISD::ADD) && + N0.getOperand(1).getOperand(0) == N1) + return DAG.getNode(N0.getOperand(1).getOpcode(), N->getDebugLoc(), VT, + N0.getOperand(0), N0.getOperand(1).getOperand(1)); + // fold ((A+(C+B))-B) -> A+C + if (N0.getOpcode() == ISD::ADD && + N0.getOperand(1).getOpcode() == ISD::ADD && + N0.getOperand(1).getOperand(1) == N1) + return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, + N0.getOperand(0), N0.getOperand(1).getOperand(0)); + // fold ((A-(B-C))-C) -> A-B + if (N0.getOpcode() == ISD::SUB && + N0.getOperand(1).getOpcode() == ISD::SUB && + N0.getOperand(1).getOperand(1) == N1) + return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, + N0.getOperand(0), N0.getOperand(1).getOperand(0)); + + // If either operand of a sub is undef, the result is undef + if (N0.getOpcode() == ISD::UNDEF) + return N0; + if (N1.getOpcode() == ISD::UNDEF) + return N1; + + // If the relocation model supports it, consider symbol offsets. + if (GlobalAddressSDNode *GA = dyn_cast(N0)) + if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) { + // fold (sub Sym, c) -> Sym-c + if (N1C && GA->getOpcode() == ISD::GlobalAddress) + return DAG.getGlobalAddress(GA->getGlobal(), VT, + GA->getOffset() - + (uint64_t)N1C->getSExtValue()); + // fold (sub Sym+c1, Sym+c2) -> c1-c2 + if (GlobalAddressSDNode *GB = dyn_cast(N1)) + if (GA->getGlobal() == GB->getGlobal()) + return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(), + VT); + } + + return SDValue(); +} + +SDValue DAGCombiner::visitMUL(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantSDNode *N0C = dyn_cast(N0); + ConstantSDNode *N1C = dyn_cast(N1); + MVT VT = N0.getValueType(); + + // fold vector ops + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + } + + // fold (mul x, undef) -> 0 + if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) + return DAG.getConstant(0, VT); + // fold (mul c1, c2) -> c1*c2 + if (N0C && N1C) + return DAG.FoldConstantArithmetic(ISD::MUL, VT, N0C, N1C); + // canonicalize constant to RHS + if (N0C && !N1C) + return DAG.getNode(ISD::MUL, N->getDebugLoc(), VT, N1, N0); + // fold (mul x, 0) -> 0 + if (N1C && N1C->isNullValue()) + return N1; + // fold (mul x, -1) -> 0-x + if (N1C && N1C->isAllOnesValue()) + return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, + DAG.getConstant(0, VT), N0); + // fold (mul x, (1 << c)) -> x << c + if (N1C && N1C->getAPIntValue().isPowerOf2()) + return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0, + DAG.getConstant(N1C->getAPIntValue().logBase2(), + getShiftAmountTy())); + // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c + if (N1C && (-N1C->getAPIntValue()).isPowerOf2()) { + unsigned Log2Val = (-N1C->getAPIntValue()).logBase2(); + // FIXME: If the input is something that is easily negated (e.g. a + // single-use add), we should put the negate there. + return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, + DAG.getConstant(0, VT), + DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0, + DAG.getConstant(Log2Val, getShiftAmountTy()))); + } + // (mul (shl X, c1), c2) -> (mul X, c2 << c1) + if (N1C && N0.getOpcode() == ISD::SHL && + isa(N0.getOperand(1))) { + SDValue C3 = DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, + N1, N0.getOperand(1)); + AddToWorkList(C3.getNode()); + return DAG.getNode(ISD::MUL, N->getDebugLoc(), VT, + N0.getOperand(0), C3); + } + + // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one + // use. + { + SDValue Sh(0,0), Y(0,0); + // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)). + if (N0.getOpcode() == ISD::SHL && isa(N0.getOperand(1)) && + N0.getNode()->hasOneUse()) { + Sh = N0; Y = N1; + } else if (N1.getOpcode() == ISD::SHL && + isa(N1.getOperand(1)) && + N1.getNode()->hasOneUse()) { + Sh = N1; Y = N0; + } + + if (Sh.getNode()) { + SDValue Mul = DAG.getNode(ISD::MUL, N->getDebugLoc(), VT, + Sh.getOperand(0), Y); + return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, + Mul, Sh.getOperand(1)); + } + } + + // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2) + if (N1C && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() && + isa(N0.getOperand(1))) + return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, + DAG.getNode(ISD::MUL, N0.getDebugLoc(), VT, + N0.getOperand(0), N1), + DAG.getNode(ISD::MUL, N1.getDebugLoc(), VT, + N0.getOperand(1), N1)); + + // reassociate mul + SDValue RMUL = ReassociateOps(ISD::MUL, N->getDebugLoc(), N0, N1); + if (RMUL.getNode() != 0) + return RMUL; + + return SDValue(); +} + +SDValue DAGCombiner::visitSDIV(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantSDNode *N0C = dyn_cast(N0.getNode()); + ConstantSDNode *N1C = dyn_cast(N1.getNode()); + MVT VT = N->getValueType(0); + + // fold vector ops + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + } + + // fold (sdiv c1, c2) -> c1/c2 + if (N0C && N1C && !N1C->isNullValue()) + return DAG.FoldConstantArithmetic(ISD::SDIV, VT, N0C, N1C); + // fold (sdiv X, 1) -> X + if (N1C && N1C->getSExtValue() == 1LL) + return N0; + // fold (sdiv X, -1) -> 0-X + if (N1C && N1C->isAllOnesValue()) + return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, + DAG.getConstant(0, VT), N0); + // If we know the sign bits of both operands are zero, strength reduce to a + // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2 + if (!VT.isVector()) { + if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) + return DAG.getNode(ISD::UDIV, N->getDebugLoc(), N1.getValueType(), + N0, N1); + } + // fold (sdiv X, pow2) -> simple ops after legalize + if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap() && + (isPowerOf2_64(N1C->getSExtValue()) || + isPowerOf2_64(-N1C->getSExtValue()))) { + // If dividing by powers of two is cheap, then don't perform the following + // fold. + if (TLI.isPow2DivCheap()) + return SDValue(); + + int64_t pow2 = N1C->getSExtValue(); + int64_t abs2 = pow2 > 0 ? pow2 : -pow2; + unsigned lg2 = Log2_64(abs2); + + // Splat the sign bit into the register + SDValue SGN = DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0, + DAG.getConstant(VT.getSizeInBits()-1, + getShiftAmountTy())); + AddToWorkList(SGN.getNode()); + + // Add (N0 < 0) ? abs2 - 1 : 0; + SDValue SRL = DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, SGN, + DAG.getConstant(VT.getSizeInBits() - lg2, + getShiftAmountTy())); + SDValue ADD = DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0, SRL); + AddToWorkList(SRL.getNode()); + AddToWorkList(ADD.getNode()); // Divide by pow2 + SDValue SRA = DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, ADD, + DAG.getConstant(lg2, getShiftAmountTy())); + + // If we're dividing by a positive value, we're done. Otherwise, we must + // negate the result. + if (pow2 > 0) + return SRA; + + AddToWorkList(SRA.getNode()); + return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, + DAG.getConstant(0, VT), SRA); + } + + // if integer divide is expensive and we satisfy the requirements, emit an + // alternate sequence. + if (N1C && (N1C->getSExtValue() < -1 || N1C->getSExtValue() > 1) && + !TLI.isIntDivCheap()) { + SDValue Op = BuildSDIV(N); + if (Op.getNode()) return Op; + } + + // undef / X -> 0 + if (N0.getOpcode() == ISD::UNDEF) + return DAG.getConstant(0, VT); + // X / undef -> undef + if (N1.getOpcode() == ISD::UNDEF) + return N1; + + return SDValue(); +} + +SDValue DAGCombiner::visitUDIV(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantSDNode *N0C = dyn_cast(N0.getNode()); + ConstantSDNode *N1C = dyn_cast(N1.getNode()); + MVT VT = N->getValueType(0); + + // fold vector ops + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + } + + // fold (udiv c1, c2) -> c1/c2 + if (N0C && N1C && !N1C->isNullValue()) + return DAG.FoldConstantArithmetic(ISD::UDIV, VT, N0C, N1C); + // fold (udiv x, (1 << c)) -> x >>u c + if (N1C && N1C->getAPIntValue().isPowerOf2()) + return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, + DAG.getConstant(N1C->getAPIntValue().logBase2(), + getShiftAmountTy())); + // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2 + if (N1.getOpcode() == ISD::SHL) { + if (ConstantSDNode *SHC = dyn_cast(N1.getOperand(0))) { + if (SHC->getAPIntValue().isPowerOf2()) { + MVT ADDVT = N1.getOperand(1).getValueType(); + SDValue Add = DAG.getNode(ISD::ADD, N->getDebugLoc(), ADDVT, + N1.getOperand(1), + DAG.getConstant(SHC->getAPIntValue() + .logBase2(), + ADDVT)); + AddToWorkList(Add.getNode()); + return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, Add); + } + } + } + // fold (udiv x, c) -> alternate + if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap()) { + SDValue Op = BuildUDIV(N); + if (Op.getNode()) return Op; + } + + // undef / X -> 0 + if (N0.getOpcode() == ISD::UNDEF) + return DAG.getConstant(0, VT); + // X / undef -> undef + if (N1.getOpcode() == ISD::UNDEF) + return N1; + + return SDValue(); +} + +SDValue DAGCombiner::visitSREM(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantSDNode *N0C = dyn_cast(N0); + ConstantSDNode *N1C = dyn_cast(N1); + MVT VT = N->getValueType(0); + + // fold (srem c1, c2) -> c1%c2 + if (N0C && N1C && !N1C->isNullValue()) + return DAG.FoldConstantArithmetic(ISD::SREM, VT, N0C, N1C); + // If we know the sign bits of both operands are zero, strength reduce to a + // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15 + if (!VT.isVector()) { + if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) + return DAG.getNode(ISD::UREM, N->getDebugLoc(), VT, N0, N1); + } + + // If X/C can be simplified by the division-by-constant logic, lower + // X%C to the equivalent of X-X/C*C. + if (N1C && !N1C->isNullValue()) { + SDValue Div = DAG.getNode(ISD::SDIV, N->getDebugLoc(), VT, N0, N1); + AddToWorkList(Div.getNode()); + SDValue OptimizedDiv = combine(Div.getNode()); + if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) { + SDValue Mul = DAG.getNode(ISD::MUL, N->getDebugLoc(), VT, + OptimizedDiv, N1); + SDValue Sub = DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, Mul); + AddToWorkList(Mul.getNode()); + return Sub; + } + } + + // undef % X -> 0 + if (N0.getOpcode() == ISD::UNDEF) + return DAG.getConstant(0, VT); + // X % undef -> undef + if (N1.getOpcode() == ISD::UNDEF) + return N1; + + return SDValue(); +} + +SDValue DAGCombiner::visitUREM(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantSDNode *N0C = dyn_cast(N0); + ConstantSDNode *N1C = dyn_cast(N1); + MVT VT = N->getValueType(0); + + // fold (urem c1, c2) -> c1%c2 + if (N0C && N1C && !N1C->isNullValue()) + return DAG.FoldConstantArithmetic(ISD::UREM, VT, N0C, N1C); + // fold (urem x, pow2) -> (and x, pow2-1) + if (N1C && !N1C->isNullValue() && N1C->getAPIntValue().isPowerOf2()) + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, + DAG.getConstant(N1C->getAPIntValue()-1,VT)); + // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1)) + if (N1.getOpcode() == ISD::SHL) { + if (ConstantSDNode *SHC = dyn_cast(N1.getOperand(0))) { + if (SHC->getAPIntValue().isPowerOf2()) { + SDValue Add = + DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1, + DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), + VT)); + AddToWorkList(Add.getNode()); + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, Add); + } + } + } + + // If X/C can be simplified by the division-by-constant logic, lower + // X%C to the equivalent of X-X/C*C. + if (N1C && !N1C->isNullValue()) { + SDValue Div = DAG.getNode(ISD::UDIV, N->getDebugLoc(), VT, N0, N1); + AddToWorkList(Div.getNode()); + SDValue OptimizedDiv = combine(Div.getNode()); + if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) { + SDValue Mul = DAG.getNode(ISD::MUL, N->getDebugLoc(), VT, + OptimizedDiv, N1); + SDValue Sub = DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, Mul); + AddToWorkList(Mul.getNode()); + return Sub; + } + } + + // undef % X -> 0 + if (N0.getOpcode() == ISD::UNDEF) + return DAG.getConstant(0, VT); + // X % undef -> undef + if (N1.getOpcode() == ISD::UNDEF) + return N1; + + return SDValue(); +} + +SDValue DAGCombiner::visitMULHS(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantSDNode *N1C = dyn_cast(N1); + MVT VT = N->getValueType(0); + + // fold (mulhs x, 0) -> 0 + if (N1C && N1C->isNullValue()) + return N1; + // fold (mulhs x, 1) -> (sra x, size(x)-1) + if (N1C && N1C->getAPIntValue() == 1) + return DAG.getNode(ISD::SRA, N->getDebugLoc(), N0.getValueType(), N0, + DAG.getConstant(N0.getValueType().getSizeInBits() - 1, + getShiftAmountTy())); + // fold (mulhs x, undef) -> 0 + if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) + return DAG.getConstant(0, VT); + + return SDValue(); +} + +SDValue DAGCombiner::visitMULHU(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantSDNode *N1C = dyn_cast(N1); + MVT VT = N->getValueType(0); + + // fold (mulhu x, 0) -> 0 + if (N1C && N1C->isNullValue()) + return N1; + // fold (mulhu x, 1) -> 0 + if (N1C && N1C->getAPIntValue() == 1) + return DAG.getConstant(0, N0.getValueType()); + // fold (mulhu x, undef) -> 0 + if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) + return DAG.getConstant(0, VT); + + return SDValue(); +} + +/// SimplifyNodeWithTwoResults - Perform optimizations common to nodes that +/// compute two values. LoOp and HiOp give the opcodes for the two computations +/// that are being performed. Return true if a simplification was made. +/// +SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, + unsigned HiOp) { + // If the high half is not needed, just compute the low half. + bool HiExists = N->hasAnyUseOfValue(1); + if (!HiExists && + (!LegalOperations || + TLI.isOperationLegal(LoOp, N->getValueType(0)))) { + SDValue Res = DAG.getNode(LoOp, N->getDebugLoc(), N->getValueType(0), + N->op_begin(), N->getNumOperands()); + return CombineTo(N, Res, Res); + } + + // If the low half is not needed, just compute the high half. + bool LoExists = N->hasAnyUseOfValue(0); + if (!LoExists && + (!LegalOperations || + TLI.isOperationLegal(HiOp, N->getValueType(1)))) { + SDValue Res = DAG.getNode(HiOp, N->getDebugLoc(), N->getValueType(1), + N->op_begin(), N->getNumOperands()); + return CombineTo(N, Res, Res); + } + + // If both halves are used, return as it is. + if (LoExists && HiExists) + return SDValue(); + + // If the two computed results can be simplified separately, separate them. + if (LoExists) { + SDValue Lo = DAG.getNode(LoOp, N->getDebugLoc(), N->getValueType(0), + N->op_begin(), N->getNumOperands()); + AddToWorkList(Lo.getNode()); + SDValue LoOpt = combine(Lo.getNode()); + if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() && + (!LegalOperations || + TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType()))) + return CombineTo(N, LoOpt, LoOpt); + } + + if (HiExists) { + SDValue Hi = DAG.getNode(HiOp, N->getDebugLoc(), N->getValueType(1), + N->op_begin(), N->getNumOperands()); + AddToWorkList(Hi.getNode()); + SDValue HiOpt = combine(Hi.getNode()); + if (HiOpt.getNode() && HiOpt != Hi && + (!LegalOperations || + TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType()))) + return CombineTo(N, HiOpt, HiOpt); + } + + return SDValue(); +} + +SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) { + SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS); + if (Res.getNode()) return Res; + + return SDValue(); +} + +SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) { + SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU); + if (Res.getNode()) return Res; + + return SDValue(); +} + +SDValue DAGCombiner::visitSDIVREM(SDNode *N) { + SDValue Res = SimplifyNodeWithTwoResults(N, ISD::SDIV, ISD::SREM); + if (Res.getNode()) return Res; + + return SDValue(); +} + +SDValue DAGCombiner::visitUDIVREM(SDNode *N) { + SDValue Res = SimplifyNodeWithTwoResults(N, ISD::UDIV, ISD::UREM); + if (Res.getNode()) return Res; + + return SDValue(); +} + +/// SimplifyBinOpWithSameOpcodeHands - If this is a binary operator with +/// two operands of the same opcode, try to simplify it. +SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { + SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); + MVT VT = N0.getValueType(); + assert(N0.getOpcode() == N1.getOpcode() && "Bad input!"); + + // For each of OP in AND/OR/XOR: + // fold (OP (zext x), (zext y)) -> (zext (OP x, y)) + // fold (OP (sext x), (sext y)) -> (sext (OP x, y)) + // fold (OP (aext x), (aext y)) -> (aext (OP x, y)) + // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free) + if ((N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND|| + N0.getOpcode() == ISD::SIGN_EXTEND || + (N0.getOpcode() == ISD::TRUNCATE && + !TLI.isTruncateFree(N0.getOperand(0).getValueType(), VT))) && + N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()) { + SDValue ORNode = DAG.getNode(N->getOpcode(), N0.getDebugLoc(), + N0.getOperand(0).getValueType(), + N0.getOperand(0), N1.getOperand(0)); + AddToWorkList(ORNode.getNode()); + return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, ORNode); + } + + // For each of OP in SHL/SRL/SRA/AND... + // fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z) + // fold (or (OP x, z), (OP y, z)) -> (OP (or x, y), z) + // fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z) + if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL || + N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) && + N0.getOperand(1) == N1.getOperand(1)) { + SDValue ORNode = DAG.getNode(N->getOpcode(), N0.getDebugLoc(), + N0.getOperand(0).getValueType(), + N0.getOperand(0), N1.getOperand(0)); + AddToWorkList(ORNode.getNode()); + return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, + ORNode, N0.getOperand(1)); + } + + return SDValue(); +} + +SDValue DAGCombiner::visitAND(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue LL, LR, RL, RR, CC0, CC1; + ConstantSDNode *N0C = dyn_cast(N0); + ConstantSDNode *N1C = dyn_cast(N1); + MVT VT = N1.getValueType(); + unsigned BitWidth = VT.getSizeInBits(); + + // fold vector ops + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + } + + // fold (and x, undef) -> 0 + if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) + return DAG.getConstant(0, VT); + // fold (and c1, c2) -> c1&c2 + if (N0C && N1C) + return DAG.FoldConstantArithmetic(ISD::AND, VT, N0C, N1C); + // canonicalize constant to RHS + if (N0C && !N1C) + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N1, N0); + // fold (and x, -1) -> x + if (N1C && N1C->isAllOnesValue()) + return N0; + // if (and x, c) is known to be zero, return 0 + if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), + APInt::getAllOnesValue(BitWidth))) + return DAG.getConstant(0, VT); + // reassociate and + SDValue RAND = ReassociateOps(ISD::AND, N->getDebugLoc(), N0, N1); + if (RAND.getNode() != 0) + return RAND; + // fold (and (or x, 0xFFFF), 0xFF) -> 0xFF + if (N1C && N0.getOpcode() == ISD::OR) + if (ConstantSDNode *ORI = dyn_cast(N0.getOperand(1))) + if ((ORI->getAPIntValue() & N1C->getAPIntValue()) == N1C->getAPIntValue()) + return N1; + // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits. + if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { + SDValue N0Op0 = N0.getOperand(0); + APInt Mask = ~N1C->getAPIntValue(); + Mask.trunc(N0Op0.getValueSizeInBits()); + if (DAG.MaskedValueIsZero(N0Op0, Mask)) { + SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), + N0.getValueType(), N0Op0); + + // Replace uses of the AND with uses of the Zero extend node. + CombineTo(N, Zext); + + // We actually want to replace all uses of the any_extend with the + // zero_extend, to avoid duplicating things. This will later cause this + // AND to be folded. + CombineTo(N0.getNode(), Zext); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + // fold (and (setcc x), (setcc y)) -> (setcc (and x, y)) + if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ + ISD::CondCode Op0 = cast(CC0)->get(); + ISD::CondCode Op1 = cast(CC1)->get(); + + if (LR == RR && isa(LR) && Op0 == Op1 && + LL.getValueType().isInteger()) { + // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0) + if (cast(LR)->isNullValue() && Op1 == ISD::SETEQ) { + SDValue ORNode = DAG.getNode(ISD::OR, N0.getDebugLoc(), + LR.getValueType(), LL, RL); + AddToWorkList(ORNode.getNode()); + return DAG.getSetCC(N->getDebugLoc(), VT, ORNode, LR, Op1); + } + // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1) + if (cast(LR)->isAllOnesValue() && Op1 == ISD::SETEQ) { + SDValue ANDNode = DAG.getNode(ISD::AND, N0.getDebugLoc(), + LR.getValueType(), LL, RL); + AddToWorkList(ANDNode.getNode()); + return DAG.getSetCC(N->getDebugLoc(), VT, ANDNode, LR, Op1); + } + // fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1) + if (cast(LR)->isAllOnesValue() && Op1 == ISD::SETGT) { + SDValue ORNode = DAG.getNode(ISD::OR, N0.getDebugLoc(), + LR.getValueType(), LL, RL); + AddToWorkList(ORNode.getNode()); + return DAG.getSetCC(N->getDebugLoc(), VT, ORNode, LR, Op1); + } + } + // canonicalize equivalent to ll == rl + if (LL == RR && LR == RL) { + Op1 = ISD::getSetCCSwappedOperands(Op1); + std::swap(RL, RR); + } + if (LL == RL && LR == RR) { + bool isInteger = LL.getValueType().isInteger(); + ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger); + if (Result != ISD::SETCC_INVALID && + (!LegalOperations || TLI.isCondCodeLegal(Result, LL.getValueType()))) + return DAG.getSetCC(N->getDebugLoc(), N0.getValueType(), + LL, LR, Result); + } + } + + // Simplify: (and (op x...), (op y...)) -> (op (and x, y)) + if (N0.getOpcode() == N1.getOpcode()) { + SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); + if (Tmp.getNode()) return Tmp; + } + + // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1) + // fold (and (sra)) -> (and (srl)) when possible. + if (!VT.isVector() && + SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + // fold (zext_inreg (extload x)) -> (zextload x) + if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) { + LoadSDNode *LN0 = cast(N0); + MVT EVT = LN0->getMemoryVT(); + // If we zero all the possible extended bits, then we can turn this into + // a zextload if we are running before legalize or the operation is legal. + unsigned BitWidth = N1.getValueSizeInBits(); + if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, + BitWidth - EVT.getSizeInBits())) && + ((!LegalOperations && !LN0->isVolatile()) || + TLI.isLoadExtLegal(ISD::ZEXTLOAD, EVT))) { + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT, + LN0->getChain(), LN0->getBasePtr(), + LN0->getSrcValue(), + LN0->getSrcValueOffset(), EVT, + LN0->isVolatile(), LN0->getAlignment()); + AddToWorkList(N); + CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use + if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && + N0.hasOneUse()) { + LoadSDNode *LN0 = cast(N0); + MVT EVT = LN0->getMemoryVT(); + // If we zero all the possible extended bits, then we can turn this into + // a zextload if we are running before legalize or the operation is legal. + unsigned BitWidth = N1.getValueSizeInBits(); + if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, + BitWidth - EVT.getSizeInBits())) && + ((!LegalOperations && !LN0->isVolatile()) || + TLI.isLoadExtLegal(ISD::ZEXTLOAD, EVT))) { + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT, + LN0->getChain(), + LN0->getBasePtr(), LN0->getSrcValue(), + LN0->getSrcValueOffset(), EVT, + LN0->isVolatile(), LN0->getAlignment()); + AddToWorkList(N); + CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + + // fold (and (load x), 255) -> (zextload x, i8) + // fold (and (extload x, i16), 255) -> (zextload x, i8) + if (N1C && N0.getOpcode() == ISD::LOAD) { + LoadSDNode *LN0 = cast(N0); + if (LN0->getExtensionType() != ISD::SEXTLOAD && + LN0->isUnindexed() && N0.hasOneUse() && + // Do not change the width of a volatile load. + !LN0->isVolatile()) { + MVT EVT = MVT::Other; + uint32_t ActiveBits = N1C->getAPIntValue().getActiveBits(); + if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue())) + EVT = MVT::getIntegerVT(ActiveBits); + + MVT LoadedVT = LN0->getMemoryVT(); + + // Do not generate loads of non-round integer types since these can + // be expensive (and would be wrong if the type is not byte sized). + if (EVT != MVT::Other && LoadedVT.bitsGT(EVT) && EVT.isRound() && + (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, EVT))) { + MVT PtrType = N0.getOperand(1).getValueType(); + + // For big endian targets, we need to add an offset to the pointer to + // load the correct bytes. For little endian systems, we merely need to + // read fewer bytes from the same pointer. + unsigned LVTStoreBytes = LoadedVT.getStoreSizeInBits()/8; + unsigned EVTStoreBytes = EVT.getStoreSizeInBits()/8; + unsigned PtrOff = LVTStoreBytes - EVTStoreBytes; + unsigned Alignment = LN0->getAlignment(); + SDValue NewPtr = LN0->getBasePtr(); + + if (TLI.isBigEndian()) { + NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(), PtrType, + NewPtr, DAG.getConstant(PtrOff, PtrType)); + Alignment = MinAlign(Alignment, PtrOff); + } + + AddToWorkList(NewPtr.getNode()); + SDValue Load = + DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), VT, LN0->getChain(), + NewPtr, LN0->getSrcValue(), LN0->getSrcValueOffset(), + EVT, LN0->isVolatile(), Alignment); + AddToWorkList(N); + CombineTo(N0.getNode(), Load, Load.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + } + + return SDValue(); +} + +SDValue DAGCombiner::visitOR(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue LL, LR, RL, RR, CC0, CC1; + ConstantSDNode *N0C = dyn_cast(N0); + ConstantSDNode *N1C = dyn_cast(N1); + MVT VT = N1.getValueType(); + + // fold vector ops + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + } + + // fold (or x, undef) -> -1 + if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) + return DAG.getConstant(~0ULL, VT); + // fold (or c1, c2) -> c1|c2 + if (N0C && N1C) + return DAG.FoldConstantArithmetic(ISD::OR, VT, N0C, N1C); + // canonicalize constant to RHS + if (N0C && !N1C) + return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N1, N0); + // fold (or x, 0) -> x + if (N1C && N1C->isNullValue()) + return N0; + // fold (or x, -1) -> -1 + if (N1C && N1C->isAllOnesValue()) + return N1; + // fold (or x, c) -> c iff (x & ~c) == 0 + if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue())) + return N1; + // reassociate or + SDValue ROR = ReassociateOps(ISD::OR, N->getDebugLoc(), N0, N1); + if (ROR.getNode() != 0) + return ROR; + // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2) + if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && + isa(N0.getOperand(1))) { + ConstantSDNode *C1 = cast(N0.getOperand(1)); + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, + DAG.getNode(ISD::OR, N0.getDebugLoc(), VT, + N0.getOperand(0), N1), + DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1)); + } + // fold (or (setcc x), (setcc y)) -> (setcc (or x, y)) + if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ + ISD::CondCode Op0 = cast(CC0)->get(); + ISD::CondCode Op1 = cast(CC1)->get(); + + if (LR == RR && isa(LR) && Op0 == Op1 && + LL.getValueType().isInteger()) { + // fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0) + // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0) + if (cast(LR)->isNullValue() && + (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) { + SDValue ORNode = DAG.getNode(ISD::OR, LR.getDebugLoc(), + LR.getValueType(), LL, RL); + AddToWorkList(ORNode.getNode()); + return DAG.getSetCC(N->getDebugLoc(), VT, ORNode, LR, Op1); + } + // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1) + // fold (or (setgt X, -1), (setgt Y -1)) -> (setgt (and X, Y), -1) + if (cast(LR)->isAllOnesValue() && + (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) { + SDValue ANDNode = DAG.getNode(ISD::AND, LR.getDebugLoc(), + LR.getValueType(), LL, RL); + AddToWorkList(ANDNode.getNode()); + return DAG.getSetCC(N->getDebugLoc(), VT, ANDNode, LR, Op1); + } + } + // canonicalize equivalent to ll == rl + if (LL == RR && LR == RL) { + Op1 = ISD::getSetCCSwappedOperands(Op1); + std::swap(RL, RR); + } + if (LL == RL && LR == RR) { + bool isInteger = LL.getValueType().isInteger(); + ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger); + if (Result != ISD::SETCC_INVALID && + (!LegalOperations || TLI.isCondCodeLegal(Result, LL.getValueType()))) + return DAG.getSetCC(N->getDebugLoc(), N0.getValueType(), + LL, LR, Result); + } + } + + // Simplify: (or (op x...), (op y...)) -> (op (or x, y)) + if (N0.getOpcode() == N1.getOpcode()) { + SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); + if (Tmp.getNode()) return Tmp; + } + + // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible. + if (N0.getOpcode() == ISD::AND && + N1.getOpcode() == ISD::AND && + N0.getOperand(1).getOpcode() == ISD::Constant && + N1.getOperand(1).getOpcode() == ISD::Constant && + // Don't increase # computations. + (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) { + // We can only do this xform if we know that bits from X that are set in C2 + // but not in C1 are already zero. Likewise for Y. + const APInt &LHSMask = + cast(N0.getOperand(1))->getAPIntValue(); + const APInt &RHSMask = + cast(N1.getOperand(1))->getAPIntValue(); + + if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) && + DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) { + SDValue X = DAG.getNode(ISD::OR, N0.getDebugLoc(), VT, + N0.getOperand(0), N1.getOperand(0)); + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, X, + DAG.getConstant(LHSMask | RHSMask, VT)); + } + } + + // See if this is some rotate idiom. + if (SDNode *Rot = MatchRotate(N0, N1, N->getDebugLoc())) + return SDValue(Rot, 0); + + return SDValue(); +} + +/// MatchRotateHalf - Match "(X shl/srl V1) & V2" where V2 may not be present. +static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) { + if (Op.getOpcode() == ISD::AND) { + if (isa(Op.getOperand(1))) { + Mask = Op.getOperand(1); + Op = Op.getOperand(0); + } else { + return false; + } + } + + if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) { + Shift = Op; + return true; + } + + return false; +} + +// MatchRotate - Handle an 'or' of two operands. If this is one of the many +// idioms for rotate, and if the target supports rotation instructions, generate +// a rot[lr]. +SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL) { + // Must be a legal type. Expanded 'n promoted things won't work with rotates. + MVT VT = LHS.getValueType(); + if (!TLI.isTypeLegal(VT)) return 0; + + // The target must have at least one rotate flavor. + bool HasROTL = TLI.isOperationLegalOrCustom(ISD::ROTL, VT); + bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT); + if (!HasROTL && !HasROTR) return 0; + + // Match "(X shl/srl V1) & V2" where V2 may not be present. + SDValue LHSShift; // The shift. + SDValue LHSMask; // AND value if any. + if (!MatchRotateHalf(LHS, LHSShift, LHSMask)) + return 0; // Not part of a rotate. + + SDValue RHSShift; // The shift. + SDValue RHSMask; // AND value if any. + if (!MatchRotateHalf(RHS, RHSShift, RHSMask)) + return 0; // Not part of a rotate. + + if (LHSShift.getOperand(0) != RHSShift.getOperand(0)) + return 0; // Not shifting the same value. + + if (LHSShift.getOpcode() == RHSShift.getOpcode()) + return 0; // Shifts must disagree. + + // Canonicalize shl to left side in a shl/srl pair. + if (RHSShift.getOpcode() == ISD::SHL) { + std::swap(LHS, RHS); + std::swap(LHSShift, RHSShift); + std::swap(LHSMask , RHSMask ); + } + + unsigned OpSizeInBits = VT.getSizeInBits(); + SDValue LHSShiftArg = LHSShift.getOperand(0); + SDValue LHSShiftAmt = LHSShift.getOperand(1); + SDValue RHSShiftAmt = RHSShift.getOperand(1); + + // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1) + // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2) + if (LHSShiftAmt.getOpcode() == ISD::Constant && + RHSShiftAmt.getOpcode() == ISD::Constant) { + uint64_t LShVal = cast(LHSShiftAmt)->getZExtValue(); + uint64_t RShVal = cast(RHSShiftAmt)->getZExtValue(); + if ((LShVal + RShVal) != OpSizeInBits) + return 0; + + SDValue Rot; + if (HasROTL) + Rot = DAG.getNode(ISD::ROTL, DL, VT, LHSShiftArg, LHSShiftAmt); + else + Rot = DAG.getNode(ISD::ROTR, DL, VT, LHSShiftArg, RHSShiftAmt); + + // If there is an AND of either shifted operand, apply it to the result. + if (LHSMask.getNode() || RHSMask.getNode()) { + APInt Mask = APInt::getAllOnesValue(OpSizeInBits); + + if (LHSMask.getNode()) { + APInt RHSBits = APInt::getLowBitsSet(OpSizeInBits, LShVal); + Mask &= cast(LHSMask)->getAPIntValue() | RHSBits; + } + if (RHSMask.getNode()) { + APInt LHSBits = APInt::getHighBitsSet(OpSizeInBits, RShVal); + Mask &= cast(RHSMask)->getAPIntValue() | LHSBits; + } + + Rot = DAG.getNode(ISD::AND, DL, VT, Rot, DAG.getConstant(Mask, VT)); + } + + return Rot.getNode(); + } + + // If there is a mask here, and we have a variable shift, we can't be sure + // that we're masking out the right stuff. + if (LHSMask.getNode() || RHSMask.getNode()) + return 0; + + // fold (or (shl x, y), (srl x, (sub 32, y))) -> (rotl x, y) + // fold (or (shl x, y), (srl x, (sub 32, y))) -> (rotr x, (sub 32, y)) + if (RHSShiftAmt.getOpcode() == ISD::SUB && + LHSShiftAmt == RHSShiftAmt.getOperand(1)) { + if (ConstantSDNode *SUBC = + dyn_cast(RHSShiftAmt.getOperand(0))) { + if (SUBC->getAPIntValue() == OpSizeInBits) { + if (HasROTL) + return DAG.getNode(ISD::ROTL, DL, VT, + LHSShiftArg, LHSShiftAmt).getNode(); + else + return DAG.getNode(ISD::ROTR, DL, VT, + LHSShiftArg, RHSShiftAmt).getNode(); + } + } + } + + // fold (or (shl x, (sub 32, y)), (srl x, r)) -> (rotr x, y) + // fold (or (shl x, (sub 32, y)), (srl x, r)) -> (rotl x, (sub 32, y)) + if (LHSShiftAmt.getOpcode() == ISD::SUB && + RHSShiftAmt == LHSShiftAmt.getOperand(1)) { + if (ConstantSDNode *SUBC = + dyn_cast(LHSShiftAmt.getOperand(0))) { + if (SUBC->getAPIntValue() == OpSizeInBits) { + if (HasROTR) + return DAG.getNode(ISD::ROTR, DL, VT, + LHSShiftArg, RHSShiftAmt).getNode(); + else + return DAG.getNode(ISD::ROTL, DL, VT, + LHSShiftArg, LHSShiftAmt).getNode(); + } + } + } + + // Look for sign/zext/any-extended or truncate cases: + if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND + || LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND + || LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND + || LHSShiftAmt.getOpcode() == ISD::TRUNCATE) && + (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND + || RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND + || RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND + || RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) { + SDValue LExtOp0 = LHSShiftAmt.getOperand(0); + SDValue RExtOp0 = RHSShiftAmt.getOperand(0); + if (RExtOp0.getOpcode() == ISD::SUB && + RExtOp0.getOperand(1) == LExtOp0) { + // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) -> + // (rotl x, y) + // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) -> + // (rotr x, (sub 32, y)) + if (ConstantSDNode *SUBC = + dyn_cast(RExtOp0.getOperand(0))) { + if (SUBC->getAPIntValue() == OpSizeInBits) { + return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, + LHSShiftArg, + HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode(); + } + } + } else if (LExtOp0.getOpcode() == ISD::SUB && + RExtOp0 == LExtOp0.getOperand(1)) { + // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) -> + // (rotr x, y) + // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) -> + // (rotl x, (sub 32, y)) + if (ConstantSDNode *SUBC = + dyn_cast(LExtOp0.getOperand(0))) { + if (SUBC->getAPIntValue() == OpSizeInBits) { + return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT, + LHSShiftArg, + HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode(); + } + } + } + } + + return 0; +} + +SDValue DAGCombiner::visitXOR(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue LHS, RHS, CC; + ConstantSDNode *N0C = dyn_cast(N0); + ConstantSDNode *N1C = dyn_cast(N1); + MVT VT = N0.getValueType(); + + // fold vector ops + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + } + + // fold (xor undef, undef) -> 0. This is a common idiom (misuse). + if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF) + return DAG.getConstant(0, VT); + // fold (xor x, undef) -> undef + if (N0.getOpcode() == ISD::UNDEF) + return N0; + if (N1.getOpcode() == ISD::UNDEF) + return N1; + // fold (xor c1, c2) -> c1^c2 + if (N0C && N1C) + return DAG.FoldConstantArithmetic(ISD::XOR, VT, N0C, N1C); + // canonicalize constant to RHS + if (N0C && !N1C) + return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N1, N0); + // fold (xor x, 0) -> x + if (N1C && N1C->isNullValue()) + return N0; + // reassociate xor + SDValue RXOR = ReassociateOps(ISD::XOR, N->getDebugLoc(), N0, N1); + if (RXOR.getNode() != 0) + return RXOR; + + // fold !(x cc y) -> (x !cc y) + if (N1C && N1C->getAPIntValue() == 1 && isSetCCEquivalent(N0, LHS, RHS, CC)) { + bool isInt = LHS.getValueType().isInteger(); + ISD::CondCode NotCC = ISD::getSetCCInverse(cast(CC)->get(), + isInt); + + if (!LegalOperations || TLI.isCondCodeLegal(NotCC, LHS.getValueType())) { + switch (N0.getOpcode()) { + default: + assert(0 && "Unhandled SetCC Equivalent!"); + abort(); + case ISD::SETCC: + return DAG.getSetCC(N->getDebugLoc(), VT, LHS, RHS, NotCC); + case ISD::SELECT_CC: + return DAG.getSelectCC(N->getDebugLoc(), LHS, RHS, N0.getOperand(2), + N0.getOperand(3), NotCC); + } + } + } + + // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y))) + if (N1C && N1C->getAPIntValue() == 1 && N0.getOpcode() == ISD::ZERO_EXTEND && + N0.getNode()->hasOneUse() && + isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){ + SDValue V = N0.getOperand(0); + V = DAG.getNode(ISD::XOR, N0.getDebugLoc(), V.getValueType(), V, + DAG.getConstant(1, V.getValueType())); + AddToWorkList(V.getNode()); + return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, V); + } + + // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc + if (N1C && N1C->getAPIntValue() == 1 && VT == MVT::i1 && + (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) { + SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); + if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) { + unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND; + LHS = DAG.getNode(ISD::XOR, LHS.getDebugLoc(), VT, LHS, N1); // LHS = ~LHS + RHS = DAG.getNode(ISD::XOR, RHS.getDebugLoc(), VT, RHS, N1); // RHS = ~RHS + AddToWorkList(LHS.getNode()); AddToWorkList(RHS.getNode()); + return DAG.getNode(NewOpcode, N->getDebugLoc(), VT, LHS, RHS); + } + } + // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants + if (N1C && N1C->isAllOnesValue() && + (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) { + SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); + if (isa(RHS) || isa(LHS)) { + unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND; + LHS = DAG.getNode(ISD::XOR, LHS.getDebugLoc(), VT, LHS, N1); // LHS = ~LHS + RHS = DAG.getNode(ISD::XOR, RHS.getDebugLoc(), VT, RHS, N1); // RHS = ~RHS + AddToWorkList(LHS.getNode()); AddToWorkList(RHS.getNode()); + return DAG.getNode(NewOpcode, N->getDebugLoc(), VT, LHS, RHS); + } + } + // fold (xor (xor x, c1), c2) -> (xor x, (xor c1, c2)) + if (N1C && N0.getOpcode() == ISD::XOR) { + ConstantSDNode *N00C = dyn_cast(N0.getOperand(0)); + ConstantSDNode *N01C = dyn_cast(N0.getOperand(1)); + if (N00C) + return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N0.getOperand(1), + DAG.getConstant(N1C->getAPIntValue() ^ + N00C->getAPIntValue(), VT)); + if (N01C) + return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N0.getOperand(0), + DAG.getConstant(N1C->getAPIntValue() ^ + N01C->getAPIntValue(), VT)); + } + // fold (xor x, x) -> 0 + if (N0 == N1) { + if (!VT.isVector()) { + return DAG.getConstant(0, VT); + } else if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)){ + // Produce a vector of zeros. + SDValue El = DAG.getConstant(0, VT.getVectorElementType()); + std::vector Ops(VT.getVectorNumElements(), El); + return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT, + &Ops[0], Ops.size()); + } + } + + // Simplify: xor (op x...), (op y...) -> (op (xor x, y)) + if (N0.getOpcode() == N1.getOpcode()) { + SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); + if (Tmp.getNode()) return Tmp; + } + + // Simplify the expression using non-local knowledge. + if (!VT.isVector() && + SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + + return SDValue(); +} + +/// visitShiftByConstant - Handle transforms common to the three shifts, when +/// the shift amount is a constant. +SDValue DAGCombiner::visitShiftByConstant(SDNode *N, unsigned Amt) { + SDNode *LHS = N->getOperand(0).getNode(); + if (!LHS->hasOneUse()) return SDValue(); + + // We want to pull some binops through shifts, so that we have (and (shift)) + // instead of (shift (and)), likewise for add, or, xor, etc. This sort of + // thing happens with address calculations, so it's important to canonicalize + // it. + bool HighBitSet = false; // Can we transform this if the high bit is set? + + switch (LHS->getOpcode()) { + default: return SDValue(); + case ISD::OR: + case ISD::XOR: + HighBitSet = false; // We can only transform sra if the high bit is clear. + break; + case ISD::AND: + HighBitSet = true; // We can only transform sra if the high bit is set. + break; + case ISD::ADD: + if (N->getOpcode() != ISD::SHL) + return SDValue(); // only shl(add) not sr[al](add). + HighBitSet = false; // We can only transform sra if the high bit is clear. + break; + } + + // We require the RHS of the binop to be a constant as well. + ConstantSDNode *BinOpCst = dyn_cast(LHS->getOperand(1)); + if (!BinOpCst) return SDValue(); + + // FIXME: disable this unless the input to the binop is a shift by a constant. + // If it is not a shift, it pessimizes some common cases like: + // + // void foo(int *X, int i) { X[i & 1235] = 1; } + // int bar(int *X, int i) { return X[i & 255]; } + SDNode *BinOpLHSVal = LHS->getOperand(0).getNode(); + if ((BinOpLHSVal->getOpcode() != ISD::SHL && + BinOpLHSVal->getOpcode() != ISD::SRA && + BinOpLHSVal->getOpcode() != ISD::SRL) || + !isa(BinOpLHSVal->getOperand(1))) + return SDValue(); + + MVT VT = N->getValueType(0); + + // If this is a signed shift right, and the high bit is modified by the + // logical operation, do not perform the transformation. The highBitSet + // boolean indicates the value of the high bit of the constant which would + // cause it to be modified for this operation. + if (N->getOpcode() == ISD::SRA) { + bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative(); + if (BinOpRHSSignSet != HighBitSet) + return SDValue(); + } + + // Fold the constants, shifting the binop RHS by the shift amount. + SDValue NewRHS = DAG.getNode(N->getOpcode(), LHS->getOperand(1).getDebugLoc(), + N->getValueType(0), + LHS->getOperand(1), N->getOperand(1)); + + // Create the new shift. + SDValue NewShift = DAG.getNode(N->getOpcode(), LHS->getOperand(0).getDebugLoc(), + VT, LHS->getOperand(0), N->getOperand(1)); + + // Create the new binop. + return DAG.getNode(LHS->getOpcode(), N->getDebugLoc(), VT, NewShift, NewRHS); +} + +SDValue DAGCombiner::visitSHL(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantSDNode *N0C = dyn_cast(N0); + ConstantSDNode *N1C = dyn_cast(N1); + MVT VT = N0.getValueType(); + unsigned OpSizeInBits = VT.getSizeInBits(); + + // fold (shl c1, c2) -> c1< 0 + if (N0C && N0C->isNullValue()) + return N0; + // fold (shl x, c >= size(x)) -> undef + if (N1C && N1C->getZExtValue() >= OpSizeInBits) + return DAG.getUNDEF(VT); + // fold (shl x, 0) -> x + if (N1C && N1C->isNullValue()) + return N0; + // if (shl x, c) is known to be zero, return 0 + if (DAG.MaskedValueIsZero(SDValue(N, 0), + APInt::getAllOnesValue(VT.getSizeInBits()))) + return DAG.getConstant(0, VT); + // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))). + if (N1.getOpcode() == ISD::TRUNCATE && + N1.getOperand(0).getOpcode() == ISD::AND && + N1.hasOneUse() && N1.getOperand(0).hasOneUse()) { + SDValue N101 = N1.getOperand(0).getOperand(1); + if (ConstantSDNode *N101C = dyn_cast(N101)) { + MVT TruncVT = N1.getValueType(); + SDValue N100 = N1.getOperand(0).getOperand(0); + APInt TruncC = N101C->getAPIntValue(); + TruncC.trunc(TruncVT.getSizeInBits()); + return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0, + DAG.getNode(ISD::AND, N->getDebugLoc(), TruncVT, + DAG.getNode(ISD::TRUNCATE, + N->getDebugLoc(), + TruncVT, N100), + DAG.getConstant(TruncC, TruncVT))); + } + } + + if (N1C && SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + + // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2)) + if (N1C && N0.getOpcode() == ISD::SHL && + N0.getOperand(1).getOpcode() == ISD::Constant) { + uint64_t c1 = cast(N0.getOperand(1))->getZExtValue(); + uint64_t c2 = N1C->getZExtValue(); + if (c1 + c2 > OpSizeInBits) + return DAG.getConstant(0, VT); + return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0.getOperand(0), + DAG.getConstant(c1 + c2, N1.getValueType())); + } + // fold (shl (srl x, c1), c2) -> (shl (and x, (shl -1, c1)), (sub c2, c1)) or + // (srl (and x, (shl -1, c1)), (sub c1, c2)) + if (N1C && N0.getOpcode() == ISD::SRL && + N0.getOperand(1).getOpcode() == ISD::Constant) { + uint64_t c1 = cast(N0.getOperand(1))->getZExtValue(); + uint64_t c2 = N1C->getZExtValue(); + SDValue Mask = DAG.getNode(ISD::AND, N0.getDebugLoc(), VT, N0.getOperand(0), + DAG.getConstant(~0ULL << c1, VT)); + if (c2 > c1) + return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, Mask, + DAG.getConstant(c2-c1, N1.getValueType())); + else + return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, Mask, + DAG.getConstant(c1-c2, N1.getValueType())); + } + // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1)) + if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1)) + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0.getOperand(0), + DAG.getConstant(~0ULL << N1C->getZExtValue(), VT)); + + return N1C ? visitShiftByConstant(N, N1C->getZExtValue()) : SDValue(); +} + +SDValue DAGCombiner::visitSRA(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantSDNode *N0C = dyn_cast(N0); + ConstantSDNode *N1C = dyn_cast(N1); + MVT VT = N0.getValueType(); + + // fold (sra c1, c2) -> (sra c1, c2) + if (N0C && N1C) + return DAG.FoldConstantArithmetic(ISD::SRA, VT, N0C, N1C); + // fold (sra 0, x) -> 0 + if (N0C && N0C->isNullValue()) + return N0; + // fold (sra -1, x) -> -1 + if (N0C && N0C->isAllOnesValue()) + return N0; + // fold (sra x, (setge c, size(x))) -> undef + if (N1C && N1C->getZExtValue() >= VT.getSizeInBits()) + return DAG.getUNDEF(VT); + // fold (sra x, 0) -> x + if (N1C && N1C->isNullValue()) + return N0; + // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports + // sext_inreg. + if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) { + unsigned LowBits = VT.getSizeInBits() - (unsigned)N1C->getZExtValue(); + MVT EVT = MVT::getIntegerVT(LowBits); + if ((!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, EVT))) + return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, + N0.getOperand(0), DAG.getValueType(EVT)); + } + + // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2)) + if (N1C && N0.getOpcode() == ISD::SRA) { + if (ConstantSDNode *C1 = dyn_cast(N0.getOperand(1))) { + unsigned Sum = N1C->getZExtValue() + C1->getZExtValue(); + if (Sum >= VT.getSizeInBits()) Sum = VT.getSizeInBits()-1; + return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0.getOperand(0), + DAG.getConstant(Sum, N1C->getValueType(0))); + } + } + + // fold (sra (shl X, m), (sub result_size, n)) + // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for + // result_size - n != m. + // If truncate is free for the target sext(shl) is likely to result in better + // code. + if (N0.getOpcode() == ISD::SHL) { + // Get the two constanst of the shifts, CN0 = m, CN = n. + const ConstantSDNode *N01C = dyn_cast(N0.getOperand(1)); + if (N01C && N1C) { + // Determine what the truncate's result bitsize and type would be. + unsigned VTValSize = VT.getSizeInBits(); + MVT TruncVT = + MVT::getIntegerVT(VTValSize - N1C->getZExtValue()); + // Determine the residual right-shift amount. + signed ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue(); + + // If the shift is not a no-op (in which case this should be just a sign + // extend already), the truncated to type is legal, sign_extend is legal + // on that type, and the the truncate to that type is both legal and free, + // perform the transform. + if ((ShiftAmt > 0) && + TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) && + TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) && + TLI.isTruncateFree(VT, TruncVT)) { + + SDValue Amt = DAG.getConstant(ShiftAmt, getShiftAmountTy()); + SDValue Shift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), VT, + N0.getOperand(0), Amt); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), TruncVT, + Shift); + return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), + N->getValueType(0), Trunc); + } + } + } + + // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))). + if (N1.getOpcode() == ISD::TRUNCATE && + N1.getOperand(0).getOpcode() == ISD::AND && + N1.hasOneUse() && N1.getOperand(0).hasOneUse()) { + SDValue N101 = N1.getOperand(0).getOperand(1); + if (ConstantSDNode *N101C = dyn_cast(N101)) { + MVT TruncVT = N1.getValueType(); + SDValue N100 = N1.getOperand(0).getOperand(0); + APInt TruncC = N101C->getAPIntValue(); + TruncC.trunc(TruncVT.getSizeInBits()); + return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0, + DAG.getNode(ISD::AND, N->getDebugLoc(), + TruncVT, + DAG.getNode(ISD::TRUNCATE, + N->getDebugLoc(), + TruncVT, N100), + DAG.getConstant(TruncC, TruncVT))); + } + } + + // Simplify, based on bits shifted out of the LHS. + if (N1C && SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + + + // If the sign bit is known to be zero, switch this to a SRL. + if (DAG.SignBitIsZero(N0)) + return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, N1); + + return N1C ? visitShiftByConstant(N, N1C->getZExtValue()) : SDValue(); +} + +SDValue DAGCombiner::visitSRL(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantSDNode *N0C = dyn_cast(N0); + ConstantSDNode *N1C = dyn_cast(N1); + MVT VT = N0.getValueType(); + unsigned OpSizeInBits = VT.getSizeInBits(); + + // fold (srl c1, c2) -> c1 >>u c2 + if (N0C && N1C) + return DAG.FoldConstantArithmetic(ISD::SRL, VT, N0C, N1C); + // fold (srl 0, x) -> 0 + if (N0C && N0C->isNullValue()) + return N0; + // fold (srl x, c >= size(x)) -> undef + if (N1C && N1C->getZExtValue() >= OpSizeInBits) + return DAG.getUNDEF(VT); + // fold (srl x, 0) -> x + if (N1C && N1C->isNullValue()) + return N0; + // if (srl x, c) is known to be zero, return 0 + if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), + APInt::getAllOnesValue(OpSizeInBits))) + return DAG.getConstant(0, VT); + + // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2)) + if (N1C && N0.getOpcode() == ISD::SRL && + N0.getOperand(1).getOpcode() == ISD::Constant) { + uint64_t c1 = cast(N0.getOperand(1))->getZExtValue(); + uint64_t c2 = N1C->getZExtValue(); + if (c1 + c2 > OpSizeInBits) + return DAG.getConstant(0, VT); + return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0), + DAG.getConstant(c1 + c2, N1.getValueType())); + } + + // fold (srl (anyextend x), c) -> (anyextend (srl x, c)) + if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { + // Shifting in all undef bits? + MVT SmallVT = N0.getOperand(0).getValueType(); + if (N1C->getZExtValue() >= SmallVT.getSizeInBits()) + return DAG.getUNDEF(VT); + + SDValue SmallShift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), SmallVT, + N0.getOperand(0), N1); + AddToWorkList(SmallShift.getNode()); + return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, SmallShift); + } + + // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign + // bit, which is unmodified by sra. + if (N1C && N1C->getZExtValue() + 1 == VT.getSizeInBits()) { + if (N0.getOpcode() == ISD::SRA) + return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0), N1); + } + + // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit). + if (N1C && N0.getOpcode() == ISD::CTLZ && + N1C->getAPIntValue() == Log2_32(VT.getSizeInBits())) { + APInt KnownZero, KnownOne; + APInt Mask = APInt::getAllOnesValue(VT.getSizeInBits()); + DAG.ComputeMaskedBits(N0.getOperand(0), Mask, KnownZero, KnownOne); + + // If any of the input bits are KnownOne, then the input couldn't be all + // zeros, thus the result of the srl will always be zero. + if (KnownOne.getBoolValue()) return DAG.getConstant(0, VT); + + // If all of the bits input the to ctlz node are known to be zero, then + // the result of the ctlz is "32" and the result of the shift is one. + APInt UnknownBits = ~KnownZero & Mask; + if (UnknownBits == 0) return DAG.getConstant(1, VT); + + // Otherwise, check to see if there is exactly one bit input to the ctlz. + if ((UnknownBits & (UnknownBits - 1)) == 0) { + // Okay, we know that only that the single bit specified by UnknownBits + // could be set on input to the CTLZ node. If this bit is set, the SRL + // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair + // to an SRL/XOR pair, which is likely to simplify more. + unsigned ShAmt = UnknownBits.countTrailingZeros(); + SDValue Op = N0.getOperand(0); + + if (ShAmt) { + Op = DAG.getNode(ISD::SRL, N0.getDebugLoc(), VT, Op, + DAG.getConstant(ShAmt, getShiftAmountTy())); + AddToWorkList(Op.getNode()); + } + + return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, + Op, DAG.getConstant(1, VT)); + } + } + + // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))). + if (N1.getOpcode() == ISD::TRUNCATE && + N1.getOperand(0).getOpcode() == ISD::AND && + N1.hasOneUse() && N1.getOperand(0).hasOneUse()) { + SDValue N101 = N1.getOperand(0).getOperand(1); + if (ConstantSDNode *N101C = dyn_cast(N101)) { + MVT TruncVT = N1.getValueType(); + SDValue N100 = N1.getOperand(0).getOperand(0); + APInt TruncC = N101C->getAPIntValue(); + TruncC.trunc(TruncVT.getSizeInBits()); + return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, + DAG.getNode(ISD::AND, N->getDebugLoc(), + TruncVT, + DAG.getNode(ISD::TRUNCATE, + N->getDebugLoc(), + TruncVT, N100), + DAG.getConstant(TruncC, TruncVT))); + } + } + + // fold operands of srl based on knowledge that the low bits are not + // demanded. + if (N1C && SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + + return N1C ? visitShiftByConstant(N, N1C->getZExtValue()) : SDValue(); +} + +SDValue DAGCombiner::visitCTLZ(SDNode *N) { + SDValue N0 = N->getOperand(0); + MVT VT = N->getValueType(0); + + // fold (ctlz c1) -> c2 + if (isa(N0)) + return DAG.getNode(ISD::CTLZ, N->getDebugLoc(), VT, N0); + return SDValue(); +} + +SDValue DAGCombiner::visitCTTZ(SDNode *N) { + SDValue N0 = N->getOperand(0); + MVT VT = N->getValueType(0); + + // fold (cttz c1) -> c2 + if (isa(N0)) + return DAG.getNode(ISD::CTTZ, N->getDebugLoc(), VT, N0); + return SDValue(); +} + +SDValue DAGCombiner::visitCTPOP(SDNode *N) { + SDValue N0 = N->getOperand(0); + MVT VT = N->getValueType(0); + + // fold (ctpop c1) -> c2 + if (isa(N0)) + return DAG.getNode(ISD::CTPOP, N->getDebugLoc(), VT, N0); + return SDValue(); +} + +SDValue DAGCombiner::visitSELECT(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue N2 = N->getOperand(2); + ConstantSDNode *N0C = dyn_cast(N0); + ConstantSDNode *N1C = dyn_cast(N1); + ConstantSDNode *N2C = dyn_cast(N2); + MVT VT = N->getValueType(0); + MVT VT0 = N0.getValueType(); + + // fold (select C, X, X) -> X + if (N1 == N2) + return N1; + // fold (select true, X, Y) -> X + if (N0C && !N0C->isNullValue()) + return N1; + // fold (select false, X, Y) -> Y + if (N0C && N0C->isNullValue()) + return N2; + // fold (select C, 1, X) -> (or C, X) + if (VT == MVT::i1 && N1C && N1C->getAPIntValue() == 1) + return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N2); + // fold (select C, 0, 1) -> (xor C, 1) + if (VT.isInteger() && + (VT0 == MVT::i1 || + (VT0.isInteger() && + TLI.getBooleanContents() == TargetLowering::ZeroOrOneBooleanContent)) && + N1C && N2C && N1C->isNullValue() && N2C->getAPIntValue() == 1) { + SDValue XORNode; + if (VT == VT0) + return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT0, + N0, DAG.getConstant(1, VT0)); + XORNode = DAG.getNode(ISD::XOR, N0.getDebugLoc(), VT0, + N0, DAG.getConstant(1, VT0)); + AddToWorkList(XORNode.getNode()); + if (VT.bitsGT(VT0)) + return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, XORNode); + return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, XORNode); + } + // fold (select C, 0, X) -> (and (not C), X) + if (VT == VT0 && VT == MVT::i1 && N1C && N1C->isNullValue()) { + SDValue NOTNode = DAG.getNOT(N0.getDebugLoc(), N0, VT); + AddToWorkList(NOTNode.getNode()); + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, NOTNode, N2); + } + // fold (select C, X, 1) -> (or (not C), X) + if (VT == VT0 && VT == MVT::i1 && N2C && N2C->getAPIntValue() == 1) { + SDValue NOTNode = DAG.getNOT(N0.getDebugLoc(), N0, VT); + AddToWorkList(NOTNode.getNode()); + return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, NOTNode, N1); + } + // fold (select C, X, 0) -> (and C, X) + if (VT == MVT::i1 && N2C && N2C->isNullValue()) + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, N1); + // fold (select X, X, Y) -> (or X, Y) + // fold (select X, 1, Y) -> (or X, Y) + if (VT == MVT::i1 && (N0 == N1 || (N1C && N1C->getAPIntValue() == 1))) + return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N2); + // fold (select X, Y, X) -> (and X, Y) + // fold (select X, Y, 0) -> (and X, Y) + if (VT == MVT::i1 && (N0 == N2 || (N2C && N2C->getAPIntValue() == 0))) + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, N1); + + // If we can fold this based on the true/false value, do so. + if (SimplifySelectOps(N, N1, N2)) + return SDValue(N, 0); // Don't revisit N. + + // fold selects based on a setcc into other things, such as min/max/abs + if (N0.getOpcode() == ISD::SETCC) { + // FIXME: + // Check against MVT::Other for SELECT_CC, which is a workaround for targets + // having to say they don't support SELECT_CC on every type the DAG knows + // about, since there is no way to mark an opcode illegal at all value types + if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other)) + return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT, + N0.getOperand(0), N0.getOperand(1), + N1, N2, N0.getOperand(2)); + return SimplifySelect(N->getDebugLoc(), N0, N1, N2); + } + + return SDValue(); +} + +SDValue DAGCombiner::visitSELECT_CC(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue N2 = N->getOperand(2); + SDValue N3 = N->getOperand(3); + SDValue N4 = N->getOperand(4); + ISD::CondCode CC = cast(N4)->get(); + + // fold select_cc lhs, rhs, x, x, cc -> x + if (N2 == N3) + return N2; + + // Determine if the condition we're dealing with is constant + SDValue SCC = SimplifySetCC(TLI.getSetCCResultType(N0.getValueType()), + N0, N1, CC, N->getDebugLoc(), false); + if (SCC.getNode()) AddToWorkList(SCC.getNode()); + + if (ConstantSDNode *SCCC = dyn_cast_or_null(SCC.getNode())) { + if (!SCCC->isNullValue()) + return N2; // cond always true -> true val + else + return N3; // cond always false -> false val + } + + // Fold to a simpler select_cc + if (SCC.getNode() && SCC.getOpcode() == ISD::SETCC) + return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), N2.getValueType(), + SCC.getOperand(0), SCC.getOperand(1), N2, N3, + SCC.getOperand(2)); + + // If we can fold this based on the true/false value, do so. + if (SimplifySelectOps(N, N2, N3)) + return SDValue(N, 0); // Don't revisit N. + + // fold select_cc into other things, such as min/max/abs + return SimplifySelectCC(N->getDebugLoc(), N0, N1, N2, N3, CC); +} + +SDValue DAGCombiner::visitSETCC(SDNode *N) { + return SimplifySetCC(N->getValueType(0), N->getOperand(0), N->getOperand(1), + cast(N->getOperand(2))->get(), + N->getDebugLoc()); +} + +// ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this: +// "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))" +// transformation. Returns true if extension are possible and the above +// mentioned transformation is profitable. +static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0, + unsigned ExtOpc, + SmallVector &ExtendNodes, + const TargetLowering &TLI) { + bool HasCopyToRegUses = false; + bool isTruncFree = TLI.isTruncateFree(N->getValueType(0), N0.getValueType()); + for (SDNode::use_iterator UI = N0.getNode()->use_begin(), + UE = N0.getNode()->use_end(); + UI != UE; ++UI) { + SDNode *User = *UI; + if (User == N) + continue; + if (UI.getUse().getResNo() != N0.getResNo()) + continue; + // FIXME: Only extend SETCC N, N and SETCC N, c for now. + if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) { + ISD::CondCode CC = cast(User->getOperand(2))->get(); + if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC)) + // Sign bits will be lost after a zext. + return false; + bool Add = false; + for (unsigned i = 0; i != 2; ++i) { + SDValue UseOp = User->getOperand(i); + if (UseOp == N0) + continue; + if (!isa(UseOp)) + return false; + Add = true; + } + if (Add) + ExtendNodes.push_back(User); + continue; + } + // If truncates aren't free and there are users we can't + // extend, it isn't worthwhile. + if (!isTruncFree) + return false; + // Remember if this value is live-out. + if (User->getOpcode() == ISD::CopyToReg) + HasCopyToRegUses = true; + } + + if (HasCopyToRegUses) { + bool BothLiveOut = false; + for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); + UI != UE; ++UI) { + SDUse &Use = UI.getUse(); + if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) { + BothLiveOut = true; + break; + } + } + if (BothLiveOut) + // Both unextended and extended values are live out. There had better be + // good a reason for the transformation. + return ExtendNodes.size(); + } + return true; +} + +SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { + SDValue N0 = N->getOperand(0); + MVT VT = N->getValueType(0); + + // fold (sext c1) -> c1 + if (isa(N0)) + return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, N0); + + // fold (sext (sext x)) -> (sext x) + // fold (sext (aext x)) -> (sext x) + if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) + return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, + N0.getOperand(0)); + + if (N0.getOpcode() == ISD::TRUNCATE) { + // fold (sext (truncate (load x))) -> (sext (smaller load x)) + // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n))) + SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); + if (NarrowLoad.getNode()) { + if (NarrowLoad.getNode() != N0.getNode()) + CombineTo(N0.getNode(), NarrowLoad); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + + // See if the value being truncated is already sign extended. If so, just + // eliminate the trunc/sext pair. + SDValue Op = N0.getOperand(0); + unsigned OpBits = Op.getValueType().getSizeInBits(); + unsigned MidBits = N0.getValueType().getSizeInBits(); + unsigned DestBits = VT.getSizeInBits(); + unsigned NumSignBits = DAG.ComputeNumSignBits(Op); + + if (OpBits == DestBits) { + // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign + // bits, it is already ready. + if (NumSignBits > DestBits-MidBits) + return Op; + } else if (OpBits < DestBits) { + // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign + // bits, just sext from i32. + if (NumSignBits > OpBits-MidBits) + return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, Op); + } else { + // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign + // bits, just truncate to i32. + if (NumSignBits > OpBits-MidBits) + return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op); + } + + // fold (sext (truncate x)) -> (sextinreg x). + if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, + N0.getValueType())) { + if (Op.getValueType().bitsLT(VT)) + Op = DAG.getNode(ISD::ANY_EXTEND, N0.getDebugLoc(), VT, Op); + else if (Op.getValueType().bitsGT(VT)) + Op = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), VT, Op); + return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, Op, + DAG.getValueType(N0.getValueType())); + } + } + + // fold (sext (load x)) -> (sext (truncate (sextload x))) + if (ISD::isNON_EXTLoad(N0.getNode()) && + ((!LegalOperations && !cast(N0)->isVolatile()) || + TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()))) { + bool DoXform = true; + SmallVector SetCCs; + if (!N0.hasOneUse()) + DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI); + if (DoXform) { + LoadSDNode *LN0 = cast(N0); + SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT, + LN0->getChain(), + LN0->getBasePtr(), LN0->getSrcValue(), + LN0->getSrcValueOffset(), + N0.getValueType(), + LN0->isVolatile(), LN0->getAlignment()); + CombineTo(N, ExtLoad); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), + N0.getValueType(), ExtLoad); + CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); + + // Extend SetCC uses if necessary. + for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) { + SDNode *SetCC = SetCCs[i]; + SmallVector Ops; + + for (unsigned j = 0; j != 2; ++j) { + SDValue SOp = SetCC->getOperand(j); + if (SOp == Trunc) + Ops.push_back(ExtLoad); + else + Ops.push_back(DAG.getNode(ISD::SIGN_EXTEND, + N->getDebugLoc(), VT, SOp)); + } + + Ops.push_back(SetCC->getOperand(2)); + CombineTo(SetCC, DAG.getNode(ISD::SETCC, N->getDebugLoc(), + SetCC->getValueType(0), + &Ops[0], Ops.size())); + } + + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + + // fold (sext (sextload x)) -> (sext (truncate (sextload x))) + // fold (sext ( extload x)) -> (sext (truncate (sextload x))) + if ((ISD::isSEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) && + ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) { + LoadSDNode *LN0 = cast(N0); + MVT EVT = LN0->getMemoryVT(); + if ((!LegalOperations && !LN0->isVolatile()) || + TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT)) { + SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT, + LN0->getChain(), + LN0->getBasePtr(), LN0->getSrcValue(), + LN0->getSrcValueOffset(), EVT, + LN0->isVolatile(), LN0->getAlignment()); + CombineTo(N, ExtLoad); + CombineTo(N0.getNode(), + DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), + N0.getValueType(), ExtLoad), + ExtLoad.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + + // sext(setcc x, y, cc) -> (select_cc x, y, -1, 0, cc) + if (N0.getOpcode() == ISD::SETCC) { + SDValue SCC = + SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1), + DAG.getConstant(~0ULL, VT), DAG.getConstant(0, VT), + cast(N0.getOperand(2))->get(), true); + if (SCC.getNode()) return SCC; + } + + // fold (sext x) -> (zext x) if the sign bit is known zero. + if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) && + DAG.SignBitIsZero(N0)) + return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, N0); + + return SDValue(); +} + +SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { + SDValue N0 = N->getOperand(0); + MVT VT = N->getValueType(0); + + // fold (zext c1) -> c1 + if (isa(N0)) + return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, N0); + // fold (zext (zext x)) -> (zext x) + // fold (zext (aext x)) -> (zext x) + if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) + return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, + N0.getOperand(0)); + + // fold (zext (truncate (load x))) -> (zext (smaller load x)) + // fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n))) + if (N0.getOpcode() == ISD::TRUNCATE) { + SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); + if (NarrowLoad.getNode()) { + if (NarrowLoad.getNode() != N0.getNode()) + CombineTo(N0.getNode(), NarrowLoad); + return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, NarrowLoad); + } + } + + // fold (zext (truncate x)) -> (and x, mask) + if (N0.getOpcode() == ISD::TRUNCATE && + (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT))) { + SDValue Op = N0.getOperand(0); + if (Op.getValueType().bitsLT(VT)) { + Op = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, Op); + } else if (Op.getValueType().bitsGT(VT)) { + Op = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op); + } + return DAG.getZeroExtendInReg(Op, N->getDebugLoc(), N0.getValueType()); + } + + // Fold (zext (and (trunc x), cst)) -> (and x, cst), + // if either of the casts is not free. + if (N0.getOpcode() == ISD::AND && + N0.getOperand(0).getOpcode() == ISD::TRUNCATE && + N0.getOperand(1).getOpcode() == ISD::Constant && + (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(), + N0.getValueType()) || + !TLI.isZExtFree(N0.getValueType(), VT))) { + SDValue X = N0.getOperand(0).getOperand(0); + if (X.getValueType().bitsLT(VT)) { + X = DAG.getNode(ISD::ANY_EXTEND, X.getDebugLoc(), VT, X); + } else if (X.getValueType().bitsGT(VT)) { + X = DAG.getNode(ISD::TRUNCATE, X.getDebugLoc(), VT, X); + } + APInt Mask = cast(N0.getOperand(1))->getAPIntValue(); + Mask.zext(VT.getSizeInBits()); + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, + X, DAG.getConstant(Mask, VT)); + } + + // fold (zext (load x)) -> (zext (truncate (zextload x))) + if (ISD::isNON_EXTLoad(N0.getNode()) && + ((!LegalOperations && !cast(N0)->isVolatile()) || + TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()))) { + bool DoXform = true; + SmallVector SetCCs; + if (!N0.hasOneUse()) + DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI); + if (DoXform) { + LoadSDNode *LN0 = cast(N0); + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT, + LN0->getChain(), + LN0->getBasePtr(), LN0->getSrcValue(), + LN0->getSrcValueOffset(), + N0.getValueType(), + LN0->isVolatile(), LN0->getAlignment()); + CombineTo(N, ExtLoad); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), + N0.getValueType(), ExtLoad); + CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); + + // Extend SetCC uses if necessary. + for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) { + SDNode *SetCC = SetCCs[i]; + SmallVector Ops; + + for (unsigned j = 0; j != 2; ++j) { + SDValue SOp = SetCC->getOperand(j); + if (SOp == Trunc) + Ops.push_back(ExtLoad); + else + Ops.push_back(DAG.getNode(ISD::ZERO_EXTEND, + N->getDebugLoc(), VT, SOp)); + } + + Ops.push_back(SetCC->getOperand(2)); + CombineTo(SetCC, DAG.getNode(ISD::SETCC, N->getDebugLoc(), + SetCC->getValueType(0), + &Ops[0], Ops.size())); + } + + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + + // fold (zext (zextload x)) -> (zext (truncate (zextload x))) + // fold (zext ( extload x)) -> (zext (truncate (zextload x))) + if ((ISD::isZEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) && + ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) { + LoadSDNode *LN0 = cast(N0); + MVT EVT = LN0->getMemoryVT(); + if ((!LegalOperations && !LN0->isVolatile()) || + TLI.isLoadExtLegal(ISD::ZEXTLOAD, EVT)) { + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT, + LN0->getChain(), + LN0->getBasePtr(), LN0->getSrcValue(), + LN0->getSrcValueOffset(), EVT, + LN0->isVolatile(), LN0->getAlignment()); + CombineTo(N, ExtLoad); + CombineTo(N0.getNode(), + DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), N0.getValueType(), + ExtLoad), + ExtLoad.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + + // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc + if (N0.getOpcode() == ISD::SETCC) { + SDValue SCC = + SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1), + DAG.getConstant(1, VT), DAG.getConstant(0, VT), + cast(N0.getOperand(2))->get(), true); + if (SCC.getNode()) return SCC; + } + + return SDValue(); +} + +SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { + SDValue N0 = N->getOperand(0); + MVT VT = N->getValueType(0); + + // fold (aext c1) -> c1 + if (isa(N0)) + return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, N0); + // fold (aext (aext x)) -> (aext x) + // fold (aext (zext x)) -> (zext x) + // fold (aext (sext x)) -> (sext x) + if (N0.getOpcode() == ISD::ANY_EXTEND || + N0.getOpcode() == ISD::ZERO_EXTEND || + N0.getOpcode() == ISD::SIGN_EXTEND) + return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, N0.getOperand(0)); + + // fold (aext (truncate (load x))) -> (aext (smaller load x)) + // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n))) + if (N0.getOpcode() == ISD::TRUNCATE) { + SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); + if (NarrowLoad.getNode()) { + if (NarrowLoad.getNode() != N0.getNode()) + CombineTo(N0.getNode(), NarrowLoad); + return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, NarrowLoad); + } + } + + // fold (aext (truncate x)) + if (N0.getOpcode() == ISD::TRUNCATE) { + SDValue TruncOp = N0.getOperand(0); + if (TruncOp.getValueType() == VT) + return TruncOp; // x iff x size == zext size. + if (TruncOp.getValueType().bitsGT(VT)) + return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, TruncOp); + return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, TruncOp); + } + + // Fold (aext (and (trunc x), cst)) -> (and x, cst) + // if the trunc is not free. + if (N0.getOpcode() == ISD::AND && + N0.getOperand(0).getOpcode() == ISD::TRUNCATE && + N0.getOperand(1).getOpcode() == ISD::Constant && + !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(), + N0.getValueType())) { + SDValue X = N0.getOperand(0).getOperand(0); + if (X.getValueType().bitsLT(VT)) { + X = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, X); + } else if (X.getValueType().bitsGT(VT)) { + X = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, X); + } + APInt Mask = cast(N0.getOperand(1))->getAPIntValue(); + Mask.zext(VT.getSizeInBits()); + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, + X, DAG.getConstant(Mask, VT)); + } + + // fold (aext (load x)) -> (aext (truncate (extload x))) + if (ISD::isNON_EXTLoad(N0.getNode()) && + ((!LegalOperations && !cast(N0)->isVolatile()) || + TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) { + bool DoXform = true; + SmallVector SetCCs; + if (!N0.hasOneUse()) + DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI); + if (DoXform) { + LoadSDNode *LN0 = cast(N0); + SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT, + LN0->getChain(), + LN0->getBasePtr(), LN0->getSrcValue(), + LN0->getSrcValueOffset(), + N0.getValueType(), + LN0->isVolatile(), LN0->getAlignment()); + CombineTo(N, ExtLoad); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), + N0.getValueType(), ExtLoad); + CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); + + // Extend SetCC uses if necessary. + for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) { + SDNode *SetCC = SetCCs[i]; + SmallVector Ops; + + for (unsigned j = 0; j != 2; ++j) { + SDValue SOp = SetCC->getOperand(j); + if (SOp == Trunc) + Ops.push_back(ExtLoad); + else + Ops.push_back(DAG.getNode(ISD::ANY_EXTEND, + N->getDebugLoc(), VT, SOp)); + } + + Ops.push_back(SetCC->getOperand(2)); + CombineTo(SetCC, DAG.getNode(ISD::SETCC, N->getDebugLoc(), + SetCC->getValueType(0), + &Ops[0], Ops.size())); + } + + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + + // fold (aext (zextload x)) -> (aext (truncate (zextload x))) + // fold (aext (sextload x)) -> (aext (truncate (sextload x))) + // fold (aext ( extload x)) -> (aext (truncate (extload x))) + if (N0.getOpcode() == ISD::LOAD && + !ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && + N0.hasOneUse()) { + LoadSDNode *LN0 = cast(N0); + MVT EVT = LN0->getMemoryVT(); + SDValue ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), N->getDebugLoc(), + VT, LN0->getChain(), LN0->getBasePtr(), + LN0->getSrcValue(), + LN0->getSrcValueOffset(), EVT, + LN0->isVolatile(), LN0->getAlignment()); + CombineTo(N, ExtLoad); + CombineTo(N0.getNode(), + DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), + N0.getValueType(), ExtLoad), + ExtLoad.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + + // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc + if (N0.getOpcode() == ISD::SETCC) { + SDValue SCC = + SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1), + DAG.getConstant(1, VT), DAG.getConstant(0, VT), + cast(N0.getOperand(2))->get(), true); + if (SCC.getNode()) + return SCC; + } + + return SDValue(); +} + +/// GetDemandedBits - See if the specified operand can be simplified with the +/// knowledge that only the bits specified by Mask are used. If so, return the +/// simpler operand, otherwise return a null SDValue. +SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) { + switch (V.getOpcode()) { + default: break; + case ISD::OR: + case ISD::XOR: + // If the LHS or RHS don't contribute bits to the or, drop them. + if (DAG.MaskedValueIsZero(V.getOperand(0), Mask)) + return V.getOperand(1); + if (DAG.MaskedValueIsZero(V.getOperand(1), Mask)) + return V.getOperand(0); + break; + case ISD::SRL: + // Only look at single-use SRLs. + if (!V.getNode()->hasOneUse()) + break; + if (ConstantSDNode *RHSC = dyn_cast(V.getOperand(1))) { + // See if we can recursively simplify the LHS. + unsigned Amt = RHSC->getZExtValue(); + + // Watch out for shift count overflow though. + if (Amt >= Mask.getBitWidth()) break; + APInt NewMask = Mask << Amt; + SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask); + if (SimplifyLHS.getNode()) + return DAG.getNode(ISD::SRL, V.getDebugLoc(), V.getValueType(), + SimplifyLHS, V.getOperand(1)); + } + } + return SDValue(); +} + +/// ReduceLoadWidth - If the result of a wider load is shifted to right of N +/// bits and then truncated to a narrower type and where N is a multiple +/// of number of bits of the narrower type, transform it to a narrower load +/// from address + N / num of bits of new type. If the result is to be +/// extended, also fold the extension to form a extending load. +SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { + unsigned Opc = N->getOpcode(); + ISD::LoadExtType ExtType = ISD::NON_EXTLOAD; + SDValue N0 = N->getOperand(0); + MVT VT = N->getValueType(0); + MVT EVT = VT; + + // This transformation isn't valid for vector loads. + if (VT.isVector()) + return SDValue(); + + // Special case: SIGN_EXTEND_INREG is basically truncating to EVT then + // extended to VT. + if (Opc == ISD::SIGN_EXTEND_INREG) { + ExtType = ISD::SEXTLOAD; + EVT = cast(N->getOperand(1))->getVT(); + if (LegalOperations && !TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT)) + return SDValue(); + } + + unsigned EVTBits = EVT.getSizeInBits(); + unsigned ShAmt = 0; + if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) { + if (ConstantSDNode *N01 = dyn_cast(N0.getOperand(1))) { + ShAmt = N01->getZExtValue(); + // Is the shift amount a multiple of size of VT? + if ((ShAmt & (EVTBits-1)) == 0) { + N0 = N0.getOperand(0); + if (N0.getValueType().getSizeInBits() <= EVTBits) + return SDValue(); + } + } + } + + // Do not generate loads of non-round integer types since these can + // be expensive (and would be wrong if the type is not byte sized). + if (isa(N0) && N0.hasOneUse() && EVT.isRound() && + cast(N0)->getMemoryVT().getSizeInBits() > EVTBits && + // Do not change the width of a volatile load. + !cast(N0)->isVolatile()) { + LoadSDNode *LN0 = cast(N0); + MVT PtrType = N0.getOperand(1).getValueType(); + + // For big endian targets, we need to adjust the offset to the pointer to + // load the correct bytes. + if (TLI.isBigEndian()) { + unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits(); + unsigned EVTStoreBits = EVT.getStoreSizeInBits(); + ShAmt = LVTStoreBits - EVTStoreBits - ShAmt; + } + + uint64_t PtrOff = ShAmt / 8; + unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff); + SDValue NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(), + PtrType, LN0->getBasePtr(), + DAG.getConstant(PtrOff, PtrType)); + AddToWorkList(NewPtr.getNode()); + + SDValue Load = (ExtType == ISD::NON_EXTLOAD) + ? DAG.getLoad(VT, N0.getDebugLoc(), LN0->getChain(), NewPtr, + LN0->getSrcValue(), LN0->getSrcValueOffset() + PtrOff, + LN0->isVolatile(), NewAlign) + : DAG.getExtLoad(ExtType, N0.getDebugLoc(), VT, LN0->getChain(), NewPtr, + LN0->getSrcValue(), LN0->getSrcValueOffset() + PtrOff, + EVT, LN0->isVolatile(), NewAlign); + + // Replace the old load's chain with the new load's chain. + WorkListRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1), + &DeadNodes); + + // Return the new loaded value. + return Load; + } + + return SDValue(); +} + +SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + MVT VT = N->getValueType(0); + MVT EVT = cast(N1)->getVT(); + unsigned VTBits = VT.getSizeInBits(); + unsigned EVTBits = EVT.getSizeInBits(); + + // fold (sext_in_reg c1) -> c1 + if (isa(N0) || N0.getOpcode() == ISD::UNDEF) + return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, N0, N1); + + // If the input is already sign extended, just drop the extension. + if (DAG.ComputeNumSignBits(N0) >= VT.getSizeInBits()-EVTBits+1) + return N0; + + // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2 + if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && + EVT.bitsLT(cast(N0.getOperand(1))->getVT())) { + return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, + N0.getOperand(0), N1); + } + + // fold (sext_in_reg (sext x)) -> (sext x) + // fold (sext_in_reg (aext x)) -> (sext x) + // if x is small enough. + if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) { + SDValue N00 = N0.getOperand(0); + if (N00.getValueType().getSizeInBits() < EVTBits) + return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, N00, N1); + } + + // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero. + if (DAG.MaskedValueIsZero(N0, APInt::getBitsSet(VTBits, EVTBits-1, EVTBits))) + return DAG.getZeroExtendInReg(N0, N->getDebugLoc(), EVT); + + // fold operands of sext_in_reg based on knowledge that the top bits are not + // demanded. + if (SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + + // fold (sext_in_reg (load x)) -> (smaller sextload x) + // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits)) + SDValue NarrowLoad = ReduceLoadWidth(N); + if (NarrowLoad.getNode()) + return NarrowLoad; + + // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24) + // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible. + // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above. + if (N0.getOpcode() == ISD::SRL) { + if (ConstantSDNode *ShAmt = dyn_cast(N0.getOperand(1))) + if (ShAmt->getZExtValue()+EVTBits <= VT.getSizeInBits()) { + // We can turn this into an SRA iff the input to the SRL is already sign + // extended enough. + unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0)); + if (VT.getSizeInBits()-(ShAmt->getZExtValue()+EVTBits) < InSignBits) + return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, + N0.getOperand(0), N0.getOperand(1)); + } + } + + // fold (sext_inreg (extload x)) -> (sextload x) + if (ISD::isEXTLoad(N0.getNode()) && + ISD::isUNINDEXEDLoad(N0.getNode()) && + EVT == cast(N0)->getMemoryVT() && + ((!LegalOperations && !cast(N0)->isVolatile()) || + TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) { + LoadSDNode *LN0 = cast(N0); + SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT, + LN0->getChain(), + LN0->getBasePtr(), LN0->getSrcValue(), + LN0->getSrcValueOffset(), EVT, + LN0->isVolatile(), LN0->getAlignment()); + CombineTo(N, ExtLoad); + CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use + if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && + N0.hasOneUse() && + EVT == cast(N0)->getMemoryVT() && + ((!LegalOperations && !cast(N0)->isVolatile()) || + TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) { + LoadSDNode *LN0 = cast(N0); + SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT, + LN0->getChain(), + LN0->getBasePtr(), LN0->getSrcValue(), + LN0->getSrcValueOffset(), EVT, + LN0->isVolatile(), LN0->getAlignment()); + CombineTo(N, ExtLoad); + CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + return SDValue(); +} + +SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { + SDValue N0 = N->getOperand(0); + MVT VT = N->getValueType(0); + + // noop truncate + if (N0.getValueType() == N->getValueType(0)) + return N0; + // fold (truncate c1) -> c1 + if (isa(N0)) + return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0); + // fold (truncate (truncate x)) -> (truncate x) + if (N0.getOpcode() == ISD::TRUNCATE) + return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0.getOperand(0)); + // fold (truncate (ext x)) -> (ext x) or (truncate x) or x + if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::SIGN_EXTEND|| + N0.getOpcode() == ISD::ANY_EXTEND) { + if (N0.getOperand(0).getValueType().bitsLT(VT)) + // if the source is smaller than the dest, we still need an extend + return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, + N0.getOperand(0)); + else if (N0.getOperand(0).getValueType().bitsGT(VT)) + // if the source is larger than the dest, than we just need the truncate + return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0.getOperand(0)); + else + // if the source and dest are the same type, we can drop both the extend + // and the truncate + return N0.getOperand(0); + } + + // See if we can simplify the input to this truncate through knowledge that + // only the low bits are being used. For example "trunc (or (shl x, 8), y)" + // -> trunc y + SDValue Shorter = + GetDemandedBits(N0, APInt::getLowBitsSet(N0.getValueSizeInBits(), + VT.getSizeInBits())); + if (Shorter.getNode()) + return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Shorter); + + // fold (truncate (load x)) -> (smaller load x) + // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits)) + return ReduceLoadWidth(N); +} + +static SDNode *getBuildPairElt(SDNode *N, unsigned i) { + SDValue Elt = N->getOperand(i); + if (Elt.getOpcode() != ISD::MERGE_VALUES) + return Elt.getNode(); + return Elt.getOperand(Elt.getResNo()).getNode(); +} + +/// CombineConsecutiveLoads - build_pair (load, load) -> load +/// if load locations are consecutive. +SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, MVT VT) { + assert(N->getOpcode() == ISD::BUILD_PAIR); + + SDNode *LD1 = getBuildPairElt(N, 0); + if (!ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse()) + return SDValue(); + MVT LD1VT = LD1->getValueType(0); + SDNode *LD2 = getBuildPairElt(N, 1); + const MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + + if (ISD::isNON_EXTLoad(LD2) && + LD2->hasOneUse() && + // If both are volatile this would reduce the number of volatile loads. + // If one is volatile it might be ok, but play conservative and bail out. + !cast(LD1)->isVolatile() && + !cast(LD2)->isVolatile() && + TLI.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1, MFI)) { + LoadSDNode *LD = cast(LD1); + unsigned Align = LD->getAlignment(); + unsigned NewAlign = TLI.getTargetData()-> + getABITypeAlignment(VT.getTypeForMVT()); + + if (NewAlign <= Align && + (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) + return DAG.getLoad(VT, N->getDebugLoc(), LD->getChain(), LD->getBasePtr(), + LD->getSrcValue(), LD->getSrcValueOffset(), + false, Align); + } + + return SDValue(); +} + +SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) { + SDValue N0 = N->getOperand(0); + MVT VT = N->getValueType(0); + + // If the input is a BUILD_VECTOR with all constant elements, fold this now. + // Only do this before legalize, since afterward the target may be depending + // on the bitconvert. + // First check to see if this is all constant. + if (!LegalTypes && + N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() && + VT.isVector()) { + bool isSimple = true; + for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) + if (N0.getOperand(i).getOpcode() != ISD::UNDEF && + N0.getOperand(i).getOpcode() != ISD::Constant && + N0.getOperand(i).getOpcode() != ISD::ConstantFP) { + isSimple = false; + break; + } + + MVT DestEltVT = N->getValueType(0).getVectorElementType(); + assert(!DestEltVT.isVector() && + "Element type of vector ValueType must not be vector!"); + if (isSimple) + return ConstantFoldBIT_CONVERTofBUILD_VECTOR(N0.getNode(), DestEltVT); + } + + // If the input is a constant, let getNode fold it. + if (isa(N0) || isa(N0)) { + SDValue Res = DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT, N0); + if (Res.getNode() != N) return Res; + } + + // (conv (conv x, t1), t2) -> (conv x, t2) + if (N0.getOpcode() == ISD::BIT_CONVERT) + return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT, + N0.getOperand(0)); + + // fold (conv (load x)) -> (load (conv*)x) + // If the resultant load doesn't need a higher alignment than the original! + if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && + // Do not change the width of a volatile load. + !cast(N0)->isVolatile() && + (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) { + LoadSDNode *LN0 = cast(N0); + unsigned Align = TLI.getTargetData()-> + getABITypeAlignment(VT.getTypeForMVT()); + unsigned OrigAlign = LN0->getAlignment(); + + if (Align <= OrigAlign) { + SDValue Load = DAG.getLoad(VT, N->getDebugLoc(), LN0->getChain(), + LN0->getBasePtr(), + LN0->getSrcValue(), LN0->getSrcValueOffset(), + LN0->isVolatile(), OrigAlign); + AddToWorkList(N); + CombineTo(N0.getNode(), + DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(), + N0.getValueType(), Load), + Load.getValue(1)); + return Load; + } + } + + // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) + // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) + // This often reduces constant pool loads. + if ((N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FABS) && + N0.getNode()->hasOneUse() && VT.isInteger() && !VT.isVector()) { + SDValue NewConv = DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(), VT, + N0.getOperand(0)); + AddToWorkList(NewConv.getNode()); + + APInt SignBit = APInt::getSignBit(VT.getSizeInBits()); + if (N0.getOpcode() == ISD::FNEG) + return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, + NewConv, DAG.getConstant(SignBit, VT)); + assert(N0.getOpcode() == ISD::FABS); + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, + NewConv, DAG.getConstant(~SignBit, VT)); + } + + // fold (bitconvert (fcopysign cst, x)) -> + // (or (and (bitconvert x), sign), (and cst, (not sign))) + // Note that we don't handle (copysign x, cst) because this can always be + // folded to an fneg or fabs. + if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() && + isa(N0.getOperand(0)) && + VT.isInteger() && !VT.isVector()) { + unsigned OrigXWidth = N0.getOperand(1).getValueType().getSizeInBits(); + MVT IntXVT = MVT::getIntegerVT(OrigXWidth); + if (TLI.isTypeLegal(IntXVT) || !LegalTypes) { + SDValue X = DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(), + IntXVT, N0.getOperand(1)); + AddToWorkList(X.getNode()); + + // If X has a different width than the result/lhs, sext it or truncate it. + unsigned VTWidth = VT.getSizeInBits(); + if (OrigXWidth < VTWidth) { + X = DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, X); + AddToWorkList(X.getNode()); + } else if (OrigXWidth > VTWidth) { + // To get the sign bit in the right place, we have to shift it right + // before truncating. + X = DAG.getNode(ISD::SRL, X.getDebugLoc(), + X.getValueType(), X, + DAG.getConstant(OrigXWidth-VTWidth, X.getValueType())); + AddToWorkList(X.getNode()); + X = DAG.getNode(ISD::TRUNCATE, X.getDebugLoc(), VT, X); + AddToWorkList(X.getNode()); + } + + APInt SignBit = APInt::getSignBit(VT.getSizeInBits()); + X = DAG.getNode(ISD::AND, X.getDebugLoc(), VT, + X, DAG.getConstant(SignBit, VT)); + AddToWorkList(X.getNode()); + + SDValue Cst = DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(), + VT, N0.getOperand(0)); + Cst = DAG.getNode(ISD::AND, Cst.getDebugLoc(), VT, + Cst, DAG.getConstant(~SignBit, VT)); + AddToWorkList(Cst.getNode()); + + return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, X, Cst); + } + } + + // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive. + if (N0.getOpcode() == ISD::BUILD_PAIR) { + SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT); + if (CombineLD.getNode()) + return CombineLD; + } + + return SDValue(); +} + +SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) { + MVT VT = N->getValueType(0); + return CombineConsecutiveLoads(N, VT); +} + +/// ConstantFoldBIT_CONVERTofBUILD_VECTOR - We know that BV is a build_vector +/// node with Constant, ConstantFP or Undef operands. DstEltVT indicates the +/// destination element value type. +SDValue DAGCombiner:: +ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, MVT DstEltVT) { + MVT SrcEltVT = BV->getValueType(0).getVectorElementType(); + + // If this is already the right type, we're done. + if (SrcEltVT == DstEltVT) return SDValue(BV, 0); + + unsigned SrcBitSize = SrcEltVT.getSizeInBits(); + unsigned DstBitSize = DstEltVT.getSizeInBits(); + + // If this is a conversion of N elements of one type to N elements of another + // type, convert each element. This handles FP<->INT cases. + if (SrcBitSize == DstBitSize) { + SmallVector Ops; + for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { + SDValue Op = BV->getOperand(i); + // If the vector element type is not legal, the BUILD_VECTOR operands + // are promoted and implicitly truncated. Make that explicit here. + if (Op.getValueType() != SrcEltVT) + Op = DAG.getNode(ISD::TRUNCATE, BV->getDebugLoc(), SrcEltVT, Op); + Ops.push_back(DAG.getNode(ISD::BIT_CONVERT, BV->getDebugLoc(), + DstEltVT, Op)); + AddToWorkList(Ops.back().getNode()); + } + MVT VT = MVT::getVectorVT(DstEltVT, + BV->getValueType(0).getVectorNumElements()); + return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT, + &Ops[0], Ops.size()); + } + + // Otherwise, we're growing or shrinking the elements. To avoid having to + // handle annoying details of growing/shrinking FP values, we convert them to + // int first. + if (SrcEltVT.isFloatingPoint()) { + // Convert the input float vector to a int vector where the elements are the + // same sizes. + assert((SrcEltVT == MVT::f32 || SrcEltVT == MVT::f64) && "Unknown FP VT!"); + MVT IntVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits()); + BV = ConstantFoldBIT_CONVERTofBUILD_VECTOR(BV, IntVT).getNode(); + SrcEltVT = IntVT; + } + + // Now we know the input is an integer vector. If the output is a FP type, + // convert to integer first, then to FP of the right size. + if (DstEltVT.isFloatingPoint()) { + assert((DstEltVT == MVT::f32 || DstEltVT == MVT::f64) && "Unknown FP VT!"); + MVT TmpVT = MVT::getIntegerVT(DstEltVT.getSizeInBits()); + SDNode *Tmp = ConstantFoldBIT_CONVERTofBUILD_VECTOR(BV, TmpVT).getNode(); + + // Next, convert to FP elements of the same size. + return ConstantFoldBIT_CONVERTofBUILD_VECTOR(Tmp, DstEltVT); + } + + // Okay, we know the src/dst types are both integers of differing types. + // Handling growing first. + assert(SrcEltVT.isInteger() && DstEltVT.isInteger()); + if (SrcBitSize < DstBitSize) { + unsigned NumInputsPerOutput = DstBitSize/SrcBitSize; + + SmallVector Ops; + for (unsigned i = 0, e = BV->getNumOperands(); i != e; + i += NumInputsPerOutput) { + bool isLE = TLI.isLittleEndian(); + APInt NewBits = APInt(DstBitSize, 0); + bool EltIsUndef = true; + for (unsigned j = 0; j != NumInputsPerOutput; ++j) { + // Shift the previously computed bits over. + NewBits <<= SrcBitSize; + SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j)); + if (Op.getOpcode() == ISD::UNDEF) continue; + EltIsUndef = false; + + NewBits |= (APInt(cast(Op)->getAPIntValue()). + zextOrTrunc(SrcBitSize).zext(DstBitSize)); + } + + if (EltIsUndef) + Ops.push_back(DAG.getUNDEF(DstEltVT)); + else + Ops.push_back(DAG.getConstant(NewBits, DstEltVT)); + } + + MVT VT = MVT::getVectorVT(DstEltVT, Ops.size()); + return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT, + &Ops[0], Ops.size()); + } + + // Finally, this must be the case where we are shrinking elements: each input + // turns into multiple outputs. + bool isS2V = ISD::isScalarToVector(BV); + unsigned NumOutputsPerInput = SrcBitSize/DstBitSize; + MVT VT = MVT::getVectorVT(DstEltVT, NumOutputsPerInput*BV->getNumOperands()); + SmallVector Ops; + + for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { + if (BV->getOperand(i).getOpcode() == ISD::UNDEF) { + for (unsigned j = 0; j != NumOutputsPerInput; ++j) + Ops.push_back(DAG.getUNDEF(DstEltVT)); + continue; + } + + APInt OpVal = APInt(cast(BV->getOperand(i))-> + getAPIntValue()).zextOrTrunc(SrcBitSize); + + for (unsigned j = 0; j != NumOutputsPerInput; ++j) { + APInt ThisVal = APInt(OpVal).trunc(DstBitSize); + Ops.push_back(DAG.getConstant(ThisVal, DstEltVT)); + if (isS2V && i == 0 && j == 0 && APInt(ThisVal).zext(SrcBitSize) == OpVal) + // Simply turn this into a SCALAR_TO_VECTOR of the new type. + return DAG.getNode(ISD::SCALAR_TO_VECTOR, BV->getDebugLoc(), VT, + Ops[0]); + OpVal = OpVal.lshr(DstBitSize); + } + + // For big endian targets, swap the order of the pieces of each element. + if (TLI.isBigEndian()) + std::reverse(Ops.end()-NumOutputsPerInput, Ops.end()); + } + + return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT, + &Ops[0], Ops.size()); +} + +SDValue DAGCombiner::visitFADD(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantFPSDNode *N0CFP = dyn_cast(N0); + ConstantFPSDNode *N1CFP = dyn_cast(N1); + MVT VT = N->getValueType(0); + + // fold vector ops + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + } + + // fold (fadd c1, c2) -> (fadd c1, c2) + if (N0CFP && N1CFP && VT != MVT::ppcf128) + return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N1); + // canonicalize constant to RHS + if (N0CFP && !N1CFP) + return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N1, N0); + // fold (fadd A, 0) -> A + if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero()) + return N0; + // fold (fadd A, (fneg B)) -> (fsub A, B) + if (isNegatibleForFree(N1, LegalOperations) == 2) + return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0, + GetNegatedExpression(N1, DAG, LegalOperations)); + // fold (fadd (fneg A), B) -> (fsub B, A) + if (isNegatibleForFree(N0, LegalOperations) == 2) + return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N1, + GetNegatedExpression(N0, DAG, LegalOperations)); + + // If allowed, fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2)) + if (UnsafeFPMath && N1CFP && N0.getOpcode() == ISD::FADD && + N0.getNode()->hasOneUse() && isa(N0.getOperand(1))) + return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0.getOperand(0), + DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + N0.getOperand(1), N1)); + + return SDValue(); +} + +SDValue DAGCombiner::visitFSUB(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantFPSDNode *N0CFP = dyn_cast(N0); + ConstantFPSDNode *N1CFP = dyn_cast(N1); + MVT VT = N->getValueType(0); + + // fold vector ops + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + } + + // fold (fsub c1, c2) -> c1-c2 + if (N0CFP && N1CFP && VT != MVT::ppcf128) + return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0, N1); + // fold (fsub A, 0) -> A + if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero()) + return N0; + // fold (fsub 0, B) -> -B + if (UnsafeFPMath && N0CFP && N0CFP->getValueAPF().isZero()) { + if (isNegatibleForFree(N1, LegalOperations)) + return GetNegatedExpression(N1, DAG, LegalOperations); + if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) + return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N1); + } + // fold (fsub A, (fneg B)) -> (fadd A, B) + if (isNegatibleForFree(N1, LegalOperations)) + return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, + GetNegatedExpression(N1, DAG, LegalOperations)); + + return SDValue(); +} + +SDValue DAGCombiner::visitFMUL(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantFPSDNode *N0CFP = dyn_cast(N0); + ConstantFPSDNode *N1CFP = dyn_cast(N1); + MVT VT = N->getValueType(0); + + // fold vector ops + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + } + + // fold (fmul c1, c2) -> c1*c2 + if (N0CFP && N1CFP && VT != MVT::ppcf128) + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0, N1); + // canonicalize constant to RHS + if (N0CFP && !N1CFP) + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N1, N0); + // fold (fmul A, 0) -> 0 + if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero()) + return N1; + // fold (fmul X, 2.0) -> (fadd X, X) + if (N1CFP && N1CFP->isExactlyValue(+2.0)) + return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N0); + // fold (fmul X, (fneg 1.0)) -> (fneg X) + if (N1CFP && N1CFP->isExactlyValue(-1.0)) + if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) + return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N0); + + // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y) + if (char LHSNeg = isNegatibleForFree(N0, LegalOperations)) { + if (char RHSNeg = isNegatibleForFree(N1, LegalOperations)) { + // Both can be negated for free, check to see if at least one is cheaper + // negated. + if (LHSNeg == 2 || RHSNeg == 2) + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + GetNegatedExpression(N0, DAG, LegalOperations), + GetNegatedExpression(N1, DAG, LegalOperations)); + } + } + + // If allowed, fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2)) + if (UnsafeFPMath && N1CFP && N0.getOpcode() == ISD::FMUL && + N0.getNode()->hasOneUse() && isa(N0.getOperand(1))) + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0.getOperand(0), + DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N0.getOperand(1), N1)); + + return SDValue(); +} + +SDValue DAGCombiner::visitFDIV(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantFPSDNode *N0CFP = dyn_cast(N0); + ConstantFPSDNode *N1CFP = dyn_cast(N1); + MVT VT = N->getValueType(0); + + // fold vector ops + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + } + + // fold (fdiv c1, c2) -> c1/c2 + if (N0CFP && N1CFP && VT != MVT::ppcf128) + return DAG.getNode(ISD::FDIV, N->getDebugLoc(), VT, N0, N1); + + + // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y) + if (char LHSNeg = isNegatibleForFree(N0, LegalOperations)) { + if (char RHSNeg = isNegatibleForFree(N1, LegalOperations)) { + // Both can be negated for free, check to see if at least one is cheaper + // negated. + if (LHSNeg == 2 || RHSNeg == 2) + return DAG.getNode(ISD::FDIV, N->getDebugLoc(), VT, + GetNegatedExpression(N0, DAG, LegalOperations), + GetNegatedExpression(N1, DAG, LegalOperations)); + } + } + + return SDValue(); +} + +SDValue DAGCombiner::visitFREM(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantFPSDNode *N0CFP = dyn_cast(N0); + ConstantFPSDNode *N1CFP = dyn_cast(N1); + MVT VT = N->getValueType(0); + + // fold (frem c1, c2) -> fmod(c1,c2) + if (N0CFP && N1CFP && VT != MVT::ppcf128) + return DAG.getNode(ISD::FREM, N->getDebugLoc(), VT, N0, N1); + + return SDValue(); +} + +SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantFPSDNode *N0CFP = dyn_cast(N0); + ConstantFPSDNode *N1CFP = dyn_cast(N1); + MVT VT = N->getValueType(0); + + if (N0CFP && N1CFP && VT != MVT::ppcf128) // Constant fold + return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, N0, N1); + + if (N1CFP) { + const APFloat& V = N1CFP->getValueAPF(); + // copysign(x, c1) -> fabs(x) iff ispos(c1) + // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1) + if (!V.isNegative()) { + if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT)) + return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0); + } else { + if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) + return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, + DAG.getNode(ISD::FABS, N0.getDebugLoc(), VT, N0)); + } + } + + // copysign(fabs(x), y) -> copysign(x, y) + // copysign(fneg(x), y) -> copysign(x, y) + // copysign(copysign(x,z), y) -> copysign(x, y) + if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG || + N0.getOpcode() == ISD::FCOPYSIGN) + return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, + N0.getOperand(0), N1); + + // copysign(x, abs(y)) -> abs(x) + if (N1.getOpcode() == ISD::FABS) + return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0); + + // copysign(x, copysign(y,z)) -> copysign(x, z) + if (N1.getOpcode() == ISD::FCOPYSIGN) + return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, + N0, N1.getOperand(1)); + + // copysign(x, fp_extend(y)) -> copysign(x, y) + // copysign(x, fp_round(y)) -> copysign(x, y) + if (N1.getOpcode() == ISD::FP_EXTEND || N1.getOpcode() == ISD::FP_ROUND) + return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, + N0, N1.getOperand(0)); + + return SDValue(); +} + +SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { + SDValue N0 = N->getOperand(0); + ConstantSDNode *N0C = dyn_cast(N0); + MVT VT = N->getValueType(0); + MVT OpVT = N0.getValueType(); + + // fold (sint_to_fp c1) -> c1fp + if (N0C && OpVT != MVT::ppcf128) + return DAG.getNode(ISD::SINT_TO_FP, N->getDebugLoc(), VT, N0); + + // If the input is a legal type, and SINT_TO_FP is not legal on this target, + // but UINT_TO_FP is legal on this target, try to convert. + if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT) && + TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT)) { + // If the sign bit is known to be zero, we can change this to UINT_TO_FP. + if (DAG.SignBitIsZero(N0)) + return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), VT, N0); + } + + return SDValue(); +} + +SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { + SDValue N0 = N->getOperand(0); + ConstantSDNode *N0C = dyn_cast(N0); + MVT VT = N->getValueType(0); + MVT OpVT = N0.getValueType(); + + // fold (uint_to_fp c1) -> c1fp + if (N0C && OpVT != MVT::ppcf128) + return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), VT, N0); + + // If the input is a legal type, and UINT_TO_FP is not legal on this target, + // but SINT_TO_FP is legal on this target, try to convert. + if (!TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT) && + TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT)) { + // If the sign bit is known to be zero, we can change this to SINT_TO_FP. + if (DAG.SignBitIsZero(N0)) + return DAG.getNode(ISD::SINT_TO_FP, N->getDebugLoc(), VT, N0); + } + + return SDValue(); +} + +SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) { + SDValue N0 = N->getOperand(0); + ConstantFPSDNode *N0CFP = dyn_cast(N0); + MVT VT = N->getValueType(0); + + // fold (fp_to_sint c1fp) -> c1 + if (N0CFP) + return DAG.getNode(ISD::FP_TO_SINT, N->getDebugLoc(), VT, N0); + + return SDValue(); +} + +SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) { + SDValue N0 = N->getOperand(0); + ConstantFPSDNode *N0CFP = dyn_cast(N0); + MVT VT = N->getValueType(0); + + // fold (fp_to_uint c1fp) -> c1 + if (N0CFP && VT != MVT::ppcf128) + return DAG.getNode(ISD::FP_TO_UINT, N->getDebugLoc(), VT, N0); + + return SDValue(); +} + +SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantFPSDNode *N0CFP = dyn_cast(N0); + MVT VT = N->getValueType(0); + + // fold (fp_round c1fp) -> c1fp + if (N0CFP && N0.getValueType() != MVT::ppcf128) + return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT, N0, N1); + + // fold (fp_round (fp_extend x)) -> x + if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType()) + return N0.getOperand(0); + + // fold (fp_round (fp_round x)) -> (fp_round x) + if (N0.getOpcode() == ISD::FP_ROUND) { + // This is a value preserving truncation if both round's are. + bool IsTrunc = N->getConstantOperandVal(1) == 1 && + N0.getNode()->getConstantOperandVal(1) == 1; + return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT, N0.getOperand(0), + DAG.getIntPtrConstant(IsTrunc)); + } + + // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y) + if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) { + SDValue Tmp = DAG.getNode(ISD::FP_ROUND, N0.getDebugLoc(), VT, + N0.getOperand(0), N1); + AddToWorkList(Tmp.getNode()); + return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, + Tmp, N0.getOperand(1)); + } + + return SDValue(); +} + +SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) { + SDValue N0 = N->getOperand(0); + MVT VT = N->getValueType(0); + MVT EVT = cast(N->getOperand(1))->getVT(); + ConstantFPSDNode *N0CFP = dyn_cast(N0); + + // fold (fp_round_inreg c1fp) -> c1fp + if (N0CFP && (TLI.isTypeLegal(EVT) || !LegalTypes)) { + SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), EVT); + return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, Round); + } + + return SDValue(); +} + +SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { + SDValue N0 = N->getOperand(0); + ConstantFPSDNode *N0CFP = dyn_cast(N0); + MVT VT = N->getValueType(0); + + // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded. + if (N->hasOneUse() && + N->use_begin()->getOpcode() == ISD::FP_ROUND) + return SDValue(); + + // fold (fp_extend c1fp) -> c1fp + if (N0CFP && VT != MVT::ppcf128) + return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, N0); + + // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the + // value of X. + if (N0.getOpcode() == ISD::FP_ROUND + && N0.getNode()->getConstantOperandVal(1) == 1) { + SDValue In = N0.getOperand(0); + if (In.getValueType() == VT) return In; + if (VT.bitsLT(In.getValueType())) + return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT, + In, N0.getOperand(1)); + return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, In); + } + + // fold (fpext (load x)) -> (fpext (fptrunc (extload x))) + if (ISD::isNON_EXTLoad(N0.getNode()) && N0.hasOneUse() && + ((!LegalOperations && !cast(N0)->isVolatile()) || + TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) { + LoadSDNode *LN0 = cast(N0); + SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT, + LN0->getChain(), + LN0->getBasePtr(), LN0->getSrcValue(), + LN0->getSrcValueOffset(), + N0.getValueType(), + LN0->isVolatile(), LN0->getAlignment()); + CombineTo(N, ExtLoad); + CombineTo(N0.getNode(), + DAG.getNode(ISD::FP_ROUND, N0.getDebugLoc(), + N0.getValueType(), ExtLoad, DAG.getIntPtrConstant(1)), + ExtLoad.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + + return SDValue(); +} + +SDValue DAGCombiner::visitFNEG(SDNode *N) { + SDValue N0 = N->getOperand(0); + + if (isNegatibleForFree(N0, LegalOperations)) + return GetNegatedExpression(N0, DAG, LegalOperations); + + // Transform fneg(bitconvert(x)) -> bitconvert(x^sign) to avoid loading + // constant pool values. + if (N0.getOpcode() == ISD::BIT_CONVERT && N0.getNode()->hasOneUse() && + N0.getOperand(0).getValueType().isInteger() && + !N0.getOperand(0).getValueType().isVector()) { + SDValue Int = N0.getOperand(0); + MVT IntVT = Int.getValueType(); + if (IntVT.isInteger() && !IntVT.isVector()) { + Int = DAG.getNode(ISD::XOR, N0.getDebugLoc(), IntVT, Int, + DAG.getConstant(APInt::getSignBit(IntVT.getSizeInBits()), IntVT)); + AddToWorkList(Int.getNode()); + return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), + N->getValueType(0), Int); + } + } + + return SDValue(); +} + +SDValue DAGCombiner::visitFABS(SDNode *N) { + SDValue N0 = N->getOperand(0); + ConstantFPSDNode *N0CFP = dyn_cast(N0); + MVT VT = N->getValueType(0); + + // fold (fabs c1) -> fabs(c1) + if (N0CFP && VT != MVT::ppcf128) + return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0); + // fold (fabs (fabs x)) -> (fabs x) + if (N0.getOpcode() == ISD::FABS) + return N->getOperand(0); + // fold (fabs (fneg x)) -> (fabs x) + // fold (fabs (fcopysign x, y)) -> (fabs x) + if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN) + return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0.getOperand(0)); + + // Transform fabs(bitconvert(x)) -> bitconvert(x&~sign) to avoid loading + // constant pool values. + if (N0.getOpcode() == ISD::BIT_CONVERT && N0.getNode()->hasOneUse() && + N0.getOperand(0).getValueType().isInteger() && + !N0.getOperand(0).getValueType().isVector()) { + SDValue Int = N0.getOperand(0); + MVT IntVT = Int.getValueType(); + if (IntVT.isInteger() && !IntVT.isVector()) { + Int = DAG.getNode(ISD::AND, N0.getDebugLoc(), IntVT, Int, + DAG.getConstant(~APInt::getSignBit(IntVT.getSizeInBits()), IntVT)); + AddToWorkList(Int.getNode()); + return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), + N->getValueType(0), Int); + } + } + + return SDValue(); +} + +SDValue DAGCombiner::visitBRCOND(SDNode *N) { + SDValue Chain = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue N2 = N->getOperand(2); + ConstantSDNode *N1C = dyn_cast(N1); + + // never taken branch, fold to chain + if (N1C && N1C->isNullValue()) + return Chain; + // unconditional branch + if (N1C && N1C->getAPIntValue() == 1) + return DAG.getNode(ISD::BR, N->getDebugLoc(), MVT::Other, Chain, N2); + // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal + // on the target. + if (N1.getOpcode() == ISD::SETCC && + TLI.isOperationLegalOrCustom(ISD::BR_CC, MVT::Other)) { + return DAG.getNode(ISD::BR_CC, N->getDebugLoc(), MVT::Other, + Chain, N1.getOperand(2), + N1.getOperand(0), N1.getOperand(1), N2); + } + + if (N1.hasOneUse() && N1.getOpcode() == ISD::SRL) { + // Match this pattern so that we can generate simpler code: + // + // %a = ... + // %b = and i32 %a, 2 + // %c = srl i32 %b, 1 + // brcond i32 %c ... + // + // into + // + // %a = ... + // %b = and %a, 2 + // %c = setcc eq %b, 0 + // brcond %c ... + // + // This applies only when the AND constant value has one bit set and the + // SRL constant is equal to the log2 of the AND constant. The back-end is + // smart enough to convert the result into a TEST/JMP sequence. + SDValue Op0 = N1.getOperand(0); + SDValue Op1 = N1.getOperand(1); + + if (Op0.getOpcode() == ISD::AND && + Op0.hasOneUse() && + Op1.getOpcode() == ISD::Constant) { + SDValue AndOp0 = Op0.getOperand(0); + SDValue AndOp1 = Op0.getOperand(1); + + if (AndOp1.getOpcode() == ISD::Constant) { + const APInt &AndConst = cast(AndOp1)->getAPIntValue(); + + if (AndConst.isPowerOf2() && + cast(Op1)->getAPIntValue()==AndConst.logBase2()) { + SDValue SetCC = + DAG.getSetCC(N->getDebugLoc(), + TLI.getSetCCResultType(Op0.getValueType()), + Op0, DAG.getConstant(0, Op0.getValueType()), + ISD::SETNE); + + // Replace the uses of SRL with SETCC + DAG.ReplaceAllUsesOfValueWith(N1, SetCC); + removeFromWorkList(N1.getNode()); + DAG.DeleteNode(N1.getNode()); + return DAG.getNode(ISD::BRCOND, N->getDebugLoc(), + MVT::Other, Chain, SetCC, N2); + } + } + } + } + + return SDValue(); +} + +// Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB. +// +SDValue DAGCombiner::visitBR_CC(SDNode *N) { + CondCodeSDNode *CC = cast(N->getOperand(1)); + SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3); + + // Use SimplifySetCC to simplify SETCC's. + SDValue Simp = SimplifySetCC(TLI.getSetCCResultType(CondLHS.getValueType()), + CondLHS, CondRHS, CC->get(), N->getDebugLoc(), + false); + if (Simp.getNode()) AddToWorkList(Simp.getNode()); + + ConstantSDNode *SCCC = dyn_cast_or_null(Simp.getNode()); + + // fold br_cc true, dest -> br dest (unconditional branch) + if (SCCC && !SCCC->isNullValue()) + return DAG.getNode(ISD::BR, N->getDebugLoc(), MVT::Other, + N->getOperand(0), N->getOperand(4)); + // fold br_cc false, dest -> unconditional fall through + if (SCCC && SCCC->isNullValue()) + return N->getOperand(0); + + // fold to a simpler setcc + if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC) + return DAG.getNode(ISD::BR_CC, N->getDebugLoc(), MVT::Other, + N->getOperand(0), Simp.getOperand(2), + Simp.getOperand(0), Simp.getOperand(1), + N->getOperand(4)); + + return SDValue(); +} + +/// CombineToPreIndexedLoadStore - Try turning a load / store into a +/// pre-indexed load / store when the base pointer is an add or subtract +/// and it has other uses besides the load / store. After the +/// transformation, the new indexed load / store has effectively folded +/// the add / subtract in and all of its other uses are redirected to the +/// new load / store. +bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { + if (!LegalOperations) + return false; + + bool isLoad = true; + SDValue Ptr; + MVT VT; + if (LoadSDNode *LD = dyn_cast(N)) { + if (LD->isIndexed()) + return false; + VT = LD->getMemoryVT(); + if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) && + !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT)) + return false; + Ptr = LD->getBasePtr(); + } else if (StoreSDNode *ST = dyn_cast(N)) { + if (ST->isIndexed()) + return false; + VT = ST->getMemoryVT(); + if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) && + !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT)) + return false; + Ptr = ST->getBasePtr(); + isLoad = false; + } else { + return false; + } + + // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail + // out. There is no reason to make this a preinc/predec. + if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) || + Ptr.getNode()->hasOneUse()) + return false; + + // Ask the target to do addressing mode selection. + SDValue BasePtr; + SDValue Offset; + ISD::MemIndexedMode AM = ISD::UNINDEXED; + if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG)) + return false; + // Don't create a indexed load / store with zero offset. + if (isa(Offset) && + cast(Offset)->isNullValue()) + return false; + + // Try turning it into a pre-indexed load / store except when: + // 1) The new base ptr is a frame index. + // 2) If N is a store and the new base ptr is either the same as or is a + // predecessor of the value being stored. + // 3) Another use of old base ptr is a predecessor of N. If ptr is folded + // that would create a cycle. + // 4) All uses are load / store ops that use it as old base ptr. + + // Check #1. Preinc'ing a frame index would require copying the stack pointer + // (plus the implicit offset) to a register to preinc anyway. + if (isa(BasePtr) || isa(BasePtr)) + return false; + + // Check #2. + if (!isLoad) { + SDValue Val = cast(N)->getValue(); + if (Val == BasePtr || BasePtr.getNode()->isPredecessorOf(Val.getNode())) + return false; + } + + // Now check for #3 and #4. + bool RealUse = false; + for (SDNode::use_iterator I = Ptr.getNode()->use_begin(), + E = Ptr.getNode()->use_end(); I != E; ++I) { + SDNode *Use = *I; + if (Use == N) + continue; + if (Use->isPredecessorOf(N)) + return false; + + if (!((Use->getOpcode() == ISD::LOAD && + cast(Use)->getBasePtr() == Ptr) || + (Use->getOpcode() == ISD::STORE && + cast(Use)->getBasePtr() == Ptr))) + RealUse = true; + } + + if (!RealUse) + return false; + + SDValue Result; + if (isLoad) + Result = DAG.getIndexedLoad(SDValue(N,0), N->getDebugLoc(), + BasePtr, Offset, AM); + else + Result = DAG.getIndexedStore(SDValue(N,0), N->getDebugLoc(), + BasePtr, Offset, AM); + ++PreIndexedNodes; + ++NodesCombined; + DOUT << "\nReplacing.4 "; DEBUG(N->dump(&DAG)); + DOUT << "\nWith: "; DEBUG(Result.getNode()->dump(&DAG)); + DOUT << '\n'; + WorkListRemover DeadNodes(*this); + if (isLoad) { + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0), + &DeadNodes); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2), + &DeadNodes); + } else { + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1), + &DeadNodes); + } + + // Finally, since the node is now dead, remove it from the graph. + DAG.DeleteNode(N); + + // Replace the uses of Ptr with uses of the updated base value. + DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0), + &DeadNodes); + removeFromWorkList(Ptr.getNode()); + DAG.DeleteNode(Ptr.getNode()); + + return true; +} + +/// CombineToPostIndexedLoadStore - Try to combine a load / store with a +/// add / sub of the base pointer node into a post-indexed load / store. +/// The transformation folded the add / subtract into the new indexed +/// load / store effectively and all of its uses are redirected to the +/// new load / store. +bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { + if (!LegalOperations) + return false; + + bool isLoad = true; + SDValue Ptr; + MVT VT; + if (LoadSDNode *LD = dyn_cast(N)) { + if (LD->isIndexed()) + return false; + VT = LD->getMemoryVT(); + if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) && + !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT)) + return false; + Ptr = LD->getBasePtr(); + } else if (StoreSDNode *ST = dyn_cast(N)) { + if (ST->isIndexed()) + return false; + VT = ST->getMemoryVT(); + if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) && + !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT)) + return false; + Ptr = ST->getBasePtr(); + isLoad = false; + } else { + return false; + } + + if (Ptr.getNode()->hasOneUse()) + return false; + + for (SDNode::use_iterator I = Ptr.getNode()->use_begin(), + E = Ptr.getNode()->use_end(); I != E; ++I) { + SDNode *Op = *I; + if (Op == N || + (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)) + continue; + + SDValue BasePtr; + SDValue Offset; + ISD::MemIndexedMode AM = ISD::UNINDEXED; + if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) { + if (Ptr == Offset) + std::swap(BasePtr, Offset); + if (Ptr != BasePtr) + continue; + // Don't create a indexed load / store with zero offset. + if (isa(Offset) && + cast(Offset)->isNullValue()) + continue; + + // Try turning it into a post-indexed load / store except when + // 1) All uses are load / store ops that use it as base ptr. + // 2) Op must be independent of N, i.e. Op is neither a predecessor + // nor a successor of N. Otherwise, if Op is folded that would + // create a cycle. + + if (isa(BasePtr) || isa(BasePtr)) + continue; + + // Check for #1. + bool TryNext = false; + for (SDNode::use_iterator II = BasePtr.getNode()->use_begin(), + EE = BasePtr.getNode()->use_end(); II != EE; ++II) { + SDNode *Use = *II; + if (Use == Ptr.getNode()) + continue; + + // If all the uses are load / store addresses, then don't do the + // transformation. + if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){ + bool RealUse = false; + for (SDNode::use_iterator III = Use->use_begin(), + EEE = Use->use_end(); III != EEE; ++III) { + SDNode *UseUse = *III; + if (!((UseUse->getOpcode() == ISD::LOAD && + cast(UseUse)->getBasePtr().getNode() == Use) || + (UseUse->getOpcode() == ISD::STORE && + cast(UseUse)->getBasePtr().getNode() == Use))) + RealUse = true; + } + + if (!RealUse) { + TryNext = true; + break; + } + } + } + + if (TryNext) + continue; + + // Check for #2 + if (!Op->isPredecessorOf(N) && !N->isPredecessorOf(Op)) { + SDValue Result = isLoad + ? DAG.getIndexedLoad(SDValue(N,0), N->getDebugLoc(), + BasePtr, Offset, AM) + : DAG.getIndexedStore(SDValue(N,0), N->getDebugLoc(), + BasePtr, Offset, AM); + ++PostIndexedNodes; + ++NodesCombined; + DOUT << "\nReplacing.5 "; DEBUG(N->dump(&DAG)); + DOUT << "\nWith: "; DEBUG(Result.getNode()->dump(&DAG)); + DOUT << '\n'; + WorkListRemover DeadNodes(*this); + if (isLoad) { + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0), + &DeadNodes); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2), + &DeadNodes); + } else { + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1), + &DeadNodes); + } + + // Finally, since the node is now dead, remove it from the graph. + DAG.DeleteNode(N); + + // Replace the uses of Use with uses of the updated base value. + DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0), + Result.getValue(isLoad ? 1 : 0), + &DeadNodes); + removeFromWorkList(Op); + DAG.DeleteNode(Op); + return true; + } + } + } + + return false; +} + +/// InferAlignment - If we can infer some alignment information from this +/// pointer, return it. +static unsigned InferAlignment(SDValue Ptr, SelectionDAG &DAG) { + // If this is a direct reference to a stack slot, use information about the + // stack slot's alignment. + int FrameIdx = 1 << 31; + int64_t FrameOffset = 0; + if (FrameIndexSDNode *FI = dyn_cast(Ptr)) { + FrameIdx = FI->getIndex(); + } else if (Ptr.getOpcode() == ISD::ADD && + isa(Ptr.getOperand(1)) && + isa(Ptr.getOperand(0))) { + FrameIdx = cast(Ptr.getOperand(0))->getIndex(); + FrameOffset = Ptr.getConstantOperandVal(1); + } + + if (FrameIdx != (1 << 31)) { + // FIXME: Handle FI+CST. + const MachineFrameInfo &MFI = *DAG.getMachineFunction().getFrameInfo(); + if (MFI.isFixedObjectIndex(FrameIdx)) { + int64_t ObjectOffset = MFI.getObjectOffset(FrameIdx) + FrameOffset; + + // The alignment of the frame index can be determined from its offset from + // the incoming frame position. If the frame object is at offset 32 and + // the stack is guaranteed to be 16-byte aligned, then we know that the + // object is 16-byte aligned. + unsigned StackAlign = DAG.getTarget().getFrameInfo()->getStackAlignment(); + unsigned Align = MinAlign(ObjectOffset, StackAlign); + + // Finally, the frame object itself may have a known alignment. Factor + // the alignment + offset into a new alignment. For example, if we know + // the FI is 8 byte aligned, but the pointer is 4 off, we really have a + // 4-byte alignment of the resultant pointer. Likewise align 4 + 4-byte + // offset = 4-byte alignment, align 4 + 1-byte offset = align 1, etc. + unsigned FIInfoAlign = MinAlign(MFI.getObjectAlignment(FrameIdx), + FrameOffset); + return std::max(Align, FIInfoAlign); + } + } + + return 0; +} + +SDValue DAGCombiner::visitLOAD(SDNode *N) { + LoadSDNode *LD = cast(N); + SDValue Chain = LD->getChain(); + SDValue Ptr = LD->getBasePtr(); + + // Try to infer better alignment information than the load already has. + if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) { + if (unsigned Align = InferAlignment(Ptr, DAG)) { + if (Align > LD->getAlignment()) + return DAG.getExtLoad(LD->getExtensionType(), N->getDebugLoc(), + LD->getValueType(0), + Chain, Ptr, LD->getSrcValue(), + LD->getSrcValueOffset(), LD->getMemoryVT(), + LD->isVolatile(), Align); + } + } + + // If load is not volatile and there are no uses of the loaded value (and + // the updated indexed value in case of indexed loads), change uses of the + // chain value into uses of the chain input (i.e. delete the dead load). + if (!LD->isVolatile()) { + if (N->getValueType(1) == MVT::Other) { + // Unindexed loads. + if (N->hasNUsesOfValue(0, 0)) { + // It's not safe to use the two value CombineTo variant here. e.g. + // v1, chain2 = load chain1, loc + // v2, chain3 = load chain2, loc + // v3 = add v2, c + // Now we replace use of chain2 with chain1. This makes the second load + // isomorphic to the one we are deleting, and thus makes this load live. + DOUT << "\nReplacing.6 "; DEBUG(N->dump(&DAG)); + DOUT << "\nWith chain: "; DEBUG(Chain.getNode()->dump(&DAG)); + DOUT << "\n"; + WorkListRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain, &DeadNodes); + + if (N->use_empty()) { + removeFromWorkList(N); + DAG.DeleteNode(N); + } + + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } else { + // Indexed loads. + assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?"); + if (N->hasNUsesOfValue(0, 0) && N->hasNUsesOfValue(0, 1)) { + SDValue Undef = DAG.getUNDEF(N->getValueType(0)); + DOUT << "\nReplacing.6 "; DEBUG(N->dump(&DAG)); + DOUT << "\nWith: "; DEBUG(Undef.getNode()->dump(&DAG)); + DOUT << " and 2 other values\n"; + WorkListRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef, &DeadNodes); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), + DAG.getUNDEF(N->getValueType(1)), + &DeadNodes); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain, &DeadNodes); + removeFromWorkList(N); + DAG.DeleteNode(N); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + } + + // If this load is directly stored, replace the load value with the stored + // value. + // TODO: Handle store large -> read small portion. + // TODO: Handle TRUNCSTORE/LOADEXT + if (LD->getExtensionType() == ISD::NON_EXTLOAD && + !LD->isVolatile()) { + if (ISD::isNON_TRUNCStore(Chain.getNode())) { + StoreSDNode *PrevST = cast(Chain); + if (PrevST->getBasePtr() == Ptr && + PrevST->getValue().getValueType() == N->getValueType(0)) + return CombineTo(N, Chain.getOperand(1), Chain); + } + } + + if (CombinerAA) { + // Walk up chain skipping non-aliasing memory nodes. + SDValue BetterChain = FindBetterChain(N, Chain); + + // If there is a better chain. + if (Chain != BetterChain) { + SDValue ReplLoad; + + // Replace the chain to void dependency. + if (LD->getExtensionType() == ISD::NON_EXTLOAD) { + ReplLoad = DAG.getLoad(N->getValueType(0), LD->getDebugLoc(), + BetterChain, Ptr, + LD->getSrcValue(), LD->getSrcValueOffset(), + LD->isVolatile(), LD->getAlignment()); + } else { + ReplLoad = DAG.getExtLoad(LD->getExtensionType(), LD->getDebugLoc(), + LD->getValueType(0), + BetterChain, Ptr, LD->getSrcValue(), + LD->getSrcValueOffset(), + LD->getMemoryVT(), + LD->isVolatile(), + LD->getAlignment()); + } + + // Create token factor to keep old chain connected. + SDValue Token = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), + MVT::Other, Chain, ReplLoad.getValue(1)); + + // Replace uses with load result and token factor. Don't add users + // to work list. + return CombineTo(N, ReplLoad.getValue(0), Token, false); + } + } + + // Try transforming N to an indexed load. + if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) + return SDValue(N, 0); + + return SDValue(); +} + + +/// ReduceLoadOpStoreWidth - Look for sequence of load / op / store where op is +/// one of 'or', 'xor', and 'and' of immediates. If 'op' is only touching some +/// of the loaded bits, try narrowing the load and store if it would end up +/// being a win for performance or code size. +SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { + StoreSDNode *ST = cast(N); + if (ST->isVolatile()) + return SDValue(); + + SDValue Chain = ST->getChain(); + SDValue Value = ST->getValue(); + SDValue Ptr = ST->getBasePtr(); + MVT VT = Value.getValueType(); + + if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse()) + return SDValue(); + + unsigned Opc = Value.getOpcode(); + if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) || + Value.getOperand(1).getOpcode() != ISD::Constant) + return SDValue(); + + SDValue N0 = Value.getOperand(0); + if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse()) { + LoadSDNode *LD = cast(N0); + if (LD->getBasePtr() != Ptr) + return SDValue(); + + // Find the type to narrow it the load / op / store to. + SDValue N1 = Value.getOperand(1); + unsigned BitWidth = N1.getValueSizeInBits(); + APInt Imm = cast(N1)->getAPIntValue(); + if (Opc == ISD::AND) + Imm ^= APInt::getAllOnesValue(BitWidth); + if (Imm == 0 || Imm.isAllOnesValue()) + return SDValue(); + unsigned ShAmt = Imm.countTrailingZeros(); + unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1; + unsigned NewBW = NextPowerOf2(MSB - ShAmt); + MVT NewVT = MVT::getIntegerVT(NewBW); + while (NewBW < BitWidth && + !(TLI.isOperationLegalOrCustom(Opc, NewVT) && + TLI.isNarrowingProfitable(VT, NewVT))) { + NewBW = NextPowerOf2(NewBW); + NewVT = MVT::getIntegerVT(NewBW); + } + if (NewBW >= BitWidth) + return SDValue(); + + // If the lsb changed does not start at the type bitwidth boundary, + // start at the previous one. + if (ShAmt % NewBW) + ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW; + APInt Mask = APInt::getBitsSet(BitWidth, ShAmt, ShAmt + NewBW); + if ((Imm & Mask) == Imm) { + APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW); + if (Opc == ISD::AND) + NewImm ^= APInt::getAllOnesValue(NewBW); + uint64_t PtrOff = ShAmt / 8; + // For big endian targets, we need to adjust the offset to the pointer to + // load the correct bytes. + if (TLI.isBigEndian()) + PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff; + + unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff); + if (NewAlign < + TLI.getTargetData()->getABITypeAlignment(NewVT.getTypeForMVT())) + return SDValue(); + + SDValue NewPtr = DAG.getNode(ISD::ADD, LD->getDebugLoc(), + Ptr.getValueType(), Ptr, + DAG.getConstant(PtrOff, Ptr.getValueType())); + SDValue NewLD = DAG.getLoad(NewVT, N0.getDebugLoc(), + LD->getChain(), NewPtr, + LD->getSrcValue(), LD->getSrcValueOffset(), + LD->isVolatile(), NewAlign); + SDValue NewVal = DAG.getNode(Opc, Value.getDebugLoc(), NewVT, NewLD, + DAG.getConstant(NewImm, NewVT)); + SDValue NewST = DAG.getStore(Chain, N->getDebugLoc(), + NewVal, NewPtr, + ST->getSrcValue(), ST->getSrcValueOffset(), + false, NewAlign); + + AddToWorkList(NewPtr.getNode()); + AddToWorkList(NewLD.getNode()); + AddToWorkList(NewVal.getNode()); + WorkListRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1), + &DeadNodes); + ++OpsNarrowed; + return NewST; + } + } + + return SDValue(); +} + +SDValue DAGCombiner::visitSTORE(SDNode *N) { + StoreSDNode *ST = cast(N); + SDValue Chain = ST->getChain(); + SDValue Value = ST->getValue(); + SDValue Ptr = ST->getBasePtr(); + + // Try to infer better alignment information than the store already has. + if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) { + if (unsigned Align = InferAlignment(Ptr, DAG)) { + if (Align > ST->getAlignment()) + return DAG.getTruncStore(Chain, N->getDebugLoc(), Value, + Ptr, ST->getSrcValue(), + ST->getSrcValueOffset(), ST->getMemoryVT(), + ST->isVolatile(), Align); + } + } + + // If this is a store of a bit convert, store the input value if the + // resultant store does not need a higher alignment than the original. + if (Value.getOpcode() == ISD::BIT_CONVERT && !ST->isTruncatingStore() && + ST->isUnindexed()) { + unsigned OrigAlign = ST->getAlignment(); + MVT SVT = Value.getOperand(0).getValueType(); + unsigned Align = TLI.getTargetData()-> + getABITypeAlignment(SVT.getTypeForMVT()); + if (Align <= OrigAlign && + ((!LegalOperations && !ST->isVolatile()) || + TLI.isOperationLegalOrCustom(ISD::STORE, SVT))) + return DAG.getStore(Chain, N->getDebugLoc(), Value.getOperand(0), + Ptr, ST->getSrcValue(), + ST->getSrcValueOffset(), ST->isVolatile(), OrigAlign); + } + + // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr' + if (ConstantFPSDNode *CFP = dyn_cast(Value)) { + // NOTE: If the original store is volatile, this transform must not increase + // the number of stores. For example, on x86-32 an f64 can be stored in one + // processor operation but an i64 (which is not legal) requires two. So the + // transform should not be done in this case. + if (Value.getOpcode() != ISD::TargetConstantFP) { + SDValue Tmp; + switch (CFP->getValueType(0).getSimpleVT()) { + default: assert(0 && "Unknown FP type"); + case MVT::f80: // We don't do this for these yet. + case MVT::f128: + case MVT::ppcf128: + break; + case MVT::f32: + if (((TLI.isTypeLegal(MVT::i32) || !LegalTypes) && !LegalOperations && + !ST->isVolatile()) || + TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { + Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF(). + bitcastToAPInt().getZExtValue(), MVT::i32); + return DAG.getStore(Chain, N->getDebugLoc(), Tmp, + Ptr, ST->getSrcValue(), + ST->getSrcValueOffset(), ST->isVolatile(), + ST->getAlignment()); + } + break; + case MVT::f64: + if (((TLI.isTypeLegal(MVT::i64) || !LegalTypes) && !LegalOperations && + !ST->isVolatile()) || + TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) { + Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). + getZExtValue(), MVT::i64); + return DAG.getStore(Chain, N->getDebugLoc(), Tmp, + Ptr, ST->getSrcValue(), + ST->getSrcValueOffset(), ST->isVolatile(), + ST->getAlignment()); + } else if (!ST->isVolatile() && + TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { + // Many FP stores are not made apparent until after legalize, e.g. for + // argument passing. Since this is so common, custom legalize the + // 64-bit integer store into two 32-bit stores. + uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); + SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, MVT::i32); + SDValue Hi = DAG.getConstant(Val >> 32, MVT::i32); + if (TLI.isBigEndian()) std::swap(Lo, Hi); + + int SVOffset = ST->getSrcValueOffset(); + unsigned Alignment = ST->getAlignment(); + bool isVolatile = ST->isVolatile(); + + SDValue St0 = DAG.getStore(Chain, ST->getDebugLoc(), Lo, + Ptr, ST->getSrcValue(), + ST->getSrcValueOffset(), + isVolatile, ST->getAlignment()); + Ptr = DAG.getNode(ISD::ADD, N->getDebugLoc(), Ptr.getValueType(), Ptr, + DAG.getConstant(4, Ptr.getValueType())); + SVOffset += 4; + Alignment = MinAlign(Alignment, 4U); + SDValue St1 = DAG.getStore(Chain, ST->getDebugLoc(), Hi, + Ptr, ST->getSrcValue(), + SVOffset, isVolatile, Alignment); + return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other, + St0, St1); + } + + break; + } + } + } + + if (CombinerAA) { + // Walk up chain skipping non-aliasing memory nodes. + SDValue BetterChain = FindBetterChain(N, Chain); + + // If there is a better chain. + if (Chain != BetterChain) { + // Replace the chain to avoid dependency. + SDValue ReplStore; + if (ST->isTruncatingStore()) { + ReplStore = DAG.getTruncStore(BetterChain, N->getDebugLoc(), Value, Ptr, + ST->getSrcValue(),ST->getSrcValueOffset(), + ST->getMemoryVT(), + ST->isVolatile(), ST->getAlignment()); + } else { + ReplStore = DAG.getStore(BetterChain, N->getDebugLoc(), Value, Ptr, + ST->getSrcValue(), ST->getSrcValueOffset(), + ST->isVolatile(), ST->getAlignment()); + } + + // Create token to keep both nodes around. + SDValue Token = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), + MVT::Other, Chain, ReplStore); + + // Don't add users to work list. + return CombineTo(N, Token, false); + } + } + + // Try transforming N to an indexed store. + if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) + return SDValue(N, 0); + + // FIXME: is there such a thing as a truncating indexed store? + if (ST->isTruncatingStore() && ST->isUnindexed() && + Value.getValueType().isInteger()) { + // See if we can simplify the input to this truncstore with knowledge that + // only the low bits are being used. For example: + // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8" + SDValue Shorter = + GetDemandedBits(Value, + APInt::getLowBitsSet(Value.getValueSizeInBits(), + ST->getMemoryVT().getSizeInBits())); + AddToWorkList(Value.getNode()); + if (Shorter.getNode()) + return DAG.getTruncStore(Chain, N->getDebugLoc(), Shorter, + Ptr, ST->getSrcValue(), + ST->getSrcValueOffset(), ST->getMemoryVT(), + ST->isVolatile(), ST->getAlignment()); + + // Otherwise, see if we can simplify the operation with + // SimplifyDemandedBits, which only works if the value has a single use. + if (SimplifyDemandedBits(Value, + APInt::getLowBitsSet( + Value.getValueSizeInBits(), + ST->getMemoryVT().getSizeInBits()))) + return SDValue(N, 0); + } + + // If this is a load followed by a store to the same location, then the store + // is dead/noop. + if (LoadSDNode *Ld = dyn_cast(Value)) { + if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() && + ST->isUnindexed() && !ST->isVolatile() && + // There can't be any side effects between the load and store, such as + // a call or store. + Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) { + // The store is dead, remove it. + return Chain; + } + } + + // If this is an FP_ROUND or TRUNC followed by a store, fold this into a + // truncating store. We can do this even if this is already a truncstore. + if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE) + && Value.getNode()->hasOneUse() && ST->isUnindexed() && + TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(), + ST->getMemoryVT())) { + return DAG.getTruncStore(Chain, N->getDebugLoc(), Value.getOperand(0), + Ptr, ST->getSrcValue(), + ST->getSrcValueOffset(), ST->getMemoryVT(), + ST->isVolatile(), ST->getAlignment()); + } + + return ReduceLoadOpStoreWidth(N); +} + +SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { + SDValue InVec = N->getOperand(0); + SDValue InVal = N->getOperand(1); + SDValue EltNo = N->getOperand(2); + + // If the invec is a BUILD_VECTOR and if EltNo is a constant, build a new + // vector with the inserted element. + if (InVec.getOpcode() == ISD::BUILD_VECTOR && isa(EltNo)) { + unsigned Elt = cast(EltNo)->getZExtValue(); + SmallVector Ops(InVec.getNode()->op_begin(), + InVec.getNode()->op_end()); + if (Elt < Ops.size()) + Ops[Elt] = InVal; + return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), + InVec.getValueType(), &Ops[0], Ops.size()); + } + // If the invec is an UNDEF and if EltNo is a constant, create a new + // BUILD_VECTOR with undef elements and the inserted element. + if (!LegalOperations && InVec.getOpcode() == ISD::UNDEF && + isa(EltNo)) { + MVT VT = InVec.getValueType(); + MVT EVT = VT.getVectorElementType(); + unsigned NElts = VT.getVectorNumElements(); + SmallVector Ops(NElts, DAG.getUNDEF(EVT)); + + unsigned Elt = cast(EltNo)->getZExtValue(); + if (Elt < Ops.size()) + Ops[Elt] = InVal; + return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), + InVec.getValueType(), &Ops[0], Ops.size()); + } + return SDValue(); +} + +SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { + // (vextract (scalar_to_vector val, 0) -> val + SDValue InVec = N->getOperand(0); + + if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) { + // If the operand is wider than the vector element type then it is implicitly + // truncated. Make that explicit here. + MVT EltVT = InVec.getValueType().getVectorElementType(); + SDValue InOp = InVec.getOperand(0); + if (InOp.getValueType() != EltVT) + return DAG.getNode(ISD::TRUNCATE, InVec.getDebugLoc(), EltVT, InOp); + return InOp; + } + + // Perform only after legalization to ensure build_vector / vector_shuffle + // optimizations have already been done. + if (!LegalOperations) return SDValue(); + + // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size) + // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size) + // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr) + SDValue EltNo = N->getOperand(1); + + if (isa(EltNo)) { + unsigned Elt = cast(EltNo)->getZExtValue(); + bool NewLoad = false; + bool BCNumEltsChanged = false; + MVT VT = InVec.getValueType(); + MVT EVT = VT.getVectorElementType(); + MVT LVT = EVT; + + if (InVec.getOpcode() == ISD::BIT_CONVERT) { + MVT BCVT = InVec.getOperand(0).getValueType(); + if (!BCVT.isVector() || EVT.bitsGT(BCVT.getVectorElementType())) + return SDValue(); + if (VT.getVectorNumElements() != BCVT.getVectorNumElements()) + BCNumEltsChanged = true; + InVec = InVec.getOperand(0); + EVT = BCVT.getVectorElementType(); + NewLoad = true; + } + + LoadSDNode *LN0 = NULL; + const ShuffleVectorSDNode *SVN = NULL; + if (ISD::isNormalLoad(InVec.getNode())) { + LN0 = cast(InVec); + } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR && + InVec.getOperand(0).getValueType() == EVT && + ISD::isNormalLoad(InVec.getOperand(0).getNode())) { + LN0 = cast(InVec.getOperand(0)); + } else if ((SVN = dyn_cast(InVec))) { + // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1) + // => + // (load $addr+1*size) + + // If the bit convert changed the number of elements, it is unsafe + // to examine the mask. + if (BCNumEltsChanged) + return SDValue(); + + // Select the input vector, guarding against out of range extract vector. + unsigned NumElems = VT.getVectorNumElements(); + int Idx = (Elt > NumElems) ? -1 : SVN->getMaskElt(Elt); + InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1); + + if (InVec.getOpcode() == ISD::BIT_CONVERT) + InVec = InVec.getOperand(0); + if (ISD::isNormalLoad(InVec.getNode())) { + LN0 = cast(InVec); + Elt = (Idx < (int)NumElems) ? Idx : Idx - NumElems; + } + } + + if (!LN0 || !LN0->hasOneUse() || LN0->isVolatile()) + return SDValue(); + + unsigned Align = LN0->getAlignment(); + if (NewLoad) { + // Check the resultant load doesn't need a higher alignment than the + // original load. + unsigned NewAlign = + TLI.getTargetData()->getABITypeAlignment(LVT.getTypeForMVT()); + + if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, LVT)) + return SDValue(); + + Align = NewAlign; + } + + SDValue NewPtr = LN0->getBasePtr(); + if (Elt) { + unsigned PtrOff = LVT.getSizeInBits() * Elt / 8; + MVT PtrType = NewPtr.getValueType(); + if (TLI.isBigEndian()) + PtrOff = VT.getSizeInBits() / 8 - PtrOff; + NewPtr = DAG.getNode(ISD::ADD, N->getDebugLoc(), PtrType, NewPtr, + DAG.getConstant(PtrOff, PtrType)); + } + + return DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr, + LN0->getSrcValue(), LN0->getSrcValueOffset(), + LN0->isVolatile(), Align); + } + + return SDValue(); +} + +SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { + unsigned NumInScalars = N->getNumOperands(); + MVT VT = N->getValueType(0); + MVT EltType = VT.getVectorElementType(); + + // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT + // operations. If so, and if the EXTRACT_VECTOR_ELT vector inputs come from + // at most two distinct vectors, turn this into a shuffle node. + SDValue VecIn1, VecIn2; + for (unsigned i = 0; i != NumInScalars; ++i) { + // Ignore undef inputs. + if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; + + // If this input is something other than a EXTRACT_VECTOR_ELT with a + // constant index, bail out. + if (N->getOperand(i).getOpcode() != ISD::EXTRACT_VECTOR_ELT || + !isa(N->getOperand(i).getOperand(1))) { + VecIn1 = VecIn2 = SDValue(0, 0); + break; + } + + // If the input vector type disagrees with the result of the build_vector, + // we can't make a shuffle. + SDValue ExtractedFromVec = N->getOperand(i).getOperand(0); + if (ExtractedFromVec.getValueType() != VT) { + VecIn1 = VecIn2 = SDValue(0, 0); + break; + } + + // Otherwise, remember this. We allow up to two distinct input vectors. + if (ExtractedFromVec == VecIn1 || ExtractedFromVec == VecIn2) + continue; + + if (VecIn1.getNode() == 0) { + VecIn1 = ExtractedFromVec; + } else if (VecIn2.getNode() == 0) { + VecIn2 = ExtractedFromVec; + } else { + // Too many inputs. + VecIn1 = VecIn2 = SDValue(0, 0); + break; + } + } + + // If everything is good, we can make a shuffle operation. + if (VecIn1.getNode()) { + SmallVector Mask; + for (unsigned i = 0; i != NumInScalars; ++i) { + if (N->getOperand(i).getOpcode() == ISD::UNDEF) { + Mask.push_back(-1); + continue; + } + + // If extracting from the first vector, just use the index directly. + SDValue Extract = N->getOperand(i); + SDValue ExtVal = Extract.getOperand(1); + if (Extract.getOperand(0) == VecIn1) { + unsigned ExtIndex = cast(ExtVal)->getZExtValue(); + if (ExtIndex > VT.getVectorNumElements()) + return SDValue(); + + Mask.push_back(ExtIndex); + continue; + } + + // Otherwise, use InIdx + VecSize + unsigned Idx = cast(ExtVal)->getZExtValue(); + Mask.push_back(Idx+NumInScalars); + } + + // Add count and size info. + if (!TLI.isTypeLegal(VT) && LegalTypes) + return SDValue(); + + // Return the new VECTOR_SHUFFLE node. + SDValue Ops[2]; + Ops[0] = VecIn1; + Ops[1] = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT); + return DAG.getVectorShuffle(VT, N->getDebugLoc(), Ops[0], Ops[1], &Mask[0]); + } + + return SDValue(); +} + +SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { + // TODO: Check to see if this is a CONCAT_VECTORS of a bunch of + // EXTRACT_SUBVECTOR operations. If so, and if the EXTRACT_SUBVECTOR vector + // inputs come from at most two distinct vectors, turn this into a shuffle + // node. + + // If we only have one input vector, we don't need to do any concatenation. + if (N->getNumOperands() == 1) + return N->getOperand(0); + + return SDValue(); +} + +SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { + return SDValue(); + + MVT VT = N->getValueType(0); + unsigned NumElts = VT.getVectorNumElements(); + + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + + assert(N0.getValueType().getVectorNumElements() == NumElts && + "Vector shuffle must be normalized in DAG"); + + // FIXME: implement canonicalizations from DAG.getVectorShuffle() + + // If it is a splat, check if the argument vector is a build_vector with + // all scalar elements the same. + if (cast(N)->isSplat()) { + SDNode *V = N0.getNode(); + + + // If this is a bit convert that changes the element type of the vector but + // not the number of vector elements, look through it. Be careful not to + // look though conversions that change things like v4f32 to v2f64. + if (V->getOpcode() == ISD::BIT_CONVERT) { + SDValue ConvInput = V->getOperand(0); + if (ConvInput.getValueType().isVector() && + ConvInput.getValueType().getVectorNumElements() == NumElts) + V = ConvInput.getNode(); + } + + if (V->getOpcode() == ISD::BUILD_VECTOR) { + unsigned NumElems = V->getNumOperands(); + unsigned BaseIdx = cast(N)->getSplatIndex(); + if (NumElems > BaseIdx) { + SDValue Base; + bool AllSame = true; + for (unsigned i = 0; i != NumElems; ++i) { + if (V->getOperand(i).getOpcode() != ISD::UNDEF) { + Base = V->getOperand(i); + break; + } + } + // Splat of , return + if (!Base.getNode()) + return N0; + for (unsigned i = 0; i != NumElems; ++i) { + if (V->getOperand(i) != Base) { + AllSame = false; + break; + } + } + // Splat of , return + if (AllSame) + return N0; + } + } + } + return SDValue(); +} + +/// XformToShuffleWithZero - Returns a vector_shuffle if it able to transform +/// an AND to a vector_shuffle with the destination vector and a zero vector. +/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==> +/// vector_shuffle V, Zero, <0, 4, 2, 4> +SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { + MVT VT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + if (N->getOpcode() == ISD::AND) { + if (RHS.getOpcode() == ISD::BIT_CONVERT) + RHS = RHS.getOperand(0); + if (RHS.getOpcode() == ISD::BUILD_VECTOR) { + SmallVector Indices; + unsigned NumElts = RHS.getNumOperands(); + for (unsigned i = 0; i != NumElts; ++i) { + SDValue Elt = RHS.getOperand(i); + if (!isa(Elt)) + return SDValue(); + else if (cast(Elt)->isAllOnesValue()) + Indices.push_back(i); + else if (cast(Elt)->isNullValue()) + Indices.push_back(NumElts); + else + return SDValue(); + } + + // Let's see if the target supports this vector_shuffle. + MVT RVT = RHS.getValueType(); + if (!TLI.isVectorClearMaskLegal(Indices, RVT)) + return SDValue(); + + // Return the new VECTOR_SHUFFLE node. + MVT EVT = RVT.getVectorElementType(); + SmallVector ZeroOps(RVT.getVectorNumElements(), + DAG.getConstant(0, EVT)); + SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), + RVT, &ZeroOps[0], ZeroOps.size()); + LHS = DAG.getNode(ISD::BIT_CONVERT, dl, RVT, LHS); + SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]); + return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Shuf); + } + } + + return SDValue(); +} + +/// SimplifyVBinOp - Visit a binary vector operation, like ADD. +SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { + // After legalize, the target may be depending on adds and other + // binary ops to provide legal ways to construct constants or other + // things. Simplifying them may result in a loss of legality. + if (LegalOperations) return SDValue(); + + MVT VT = N->getValueType(0); + assert(VT.isVector() && "SimplifyVBinOp only works on vectors!"); + + MVT EltType = VT.getVectorElementType(); + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + SDValue Shuffle = XformToShuffleWithZero(N); + if (Shuffle.getNode()) return Shuffle; + + // If the LHS and RHS are BUILD_VECTOR nodes, see if we can constant fold + // this operation. + if (LHS.getOpcode() == ISD::BUILD_VECTOR && + RHS.getOpcode() == ISD::BUILD_VECTOR) { + SmallVector Ops; + for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) { + SDValue LHSOp = LHS.getOperand(i); + SDValue RHSOp = RHS.getOperand(i); + // If these two elements can't be folded, bail out. + if ((LHSOp.getOpcode() != ISD::UNDEF && + LHSOp.getOpcode() != ISD::Constant && + LHSOp.getOpcode() != ISD::ConstantFP) || + (RHSOp.getOpcode() != ISD::UNDEF && + RHSOp.getOpcode() != ISD::Constant && + RHSOp.getOpcode() != ISD::ConstantFP)) + break; + + // Can't fold divide by zero. + if (N->getOpcode() == ISD::SDIV || N->getOpcode() == ISD::UDIV || + N->getOpcode() == ISD::FDIV) { + if ((RHSOp.getOpcode() == ISD::Constant && + cast(RHSOp.getNode())->isNullValue()) || + (RHSOp.getOpcode() == ISD::ConstantFP && + cast(RHSOp.getNode())->getValueAPF().isZero())) + break; + } + + Ops.push_back(DAG.getNode(N->getOpcode(), LHS.getDebugLoc(), + EltType, LHSOp, RHSOp)); + AddToWorkList(Ops.back().getNode()); + assert((Ops.back().getOpcode() == ISD::UNDEF || + Ops.back().getOpcode() == ISD::Constant || + Ops.back().getOpcode() == ISD::ConstantFP) && + "Scalar binop didn't fold!"); + } + + if (Ops.size() == LHS.getNumOperands()) { + MVT VT = LHS.getValueType(); + return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT, + &Ops[0], Ops.size()); + } + } + + return SDValue(); +} + +SDValue DAGCombiner::SimplifySelect(DebugLoc DL, SDValue N0, + SDValue N1, SDValue N2){ + assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!"); + + SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2, + cast(N0.getOperand(2))->get()); + + // If we got a simplified select_cc node back from SimplifySelectCC, then + // break it down into a new SETCC node, and a new SELECT node, and then return + // the SELECT node, since we were called with a SELECT node. + if (SCC.getNode()) { + // Check to see if we got a select_cc back (to turn into setcc/select). + // Otherwise, just return whatever node we got back, like fabs. + if (SCC.getOpcode() == ISD::SELECT_CC) { + SDValue SETCC = DAG.getNode(ISD::SETCC, N0.getDebugLoc(), + N0.getValueType(), + SCC.getOperand(0), SCC.getOperand(1), + SCC.getOperand(4)); + AddToWorkList(SETCC.getNode()); + return DAG.getNode(ISD::SELECT, SCC.getDebugLoc(), SCC.getValueType(), + SCC.getOperand(2), SCC.getOperand(3), SETCC); + } + + return SCC; + } + return SDValue(); +} + +/// SimplifySelectOps - Given a SELECT or a SELECT_CC node, where LHS and RHS +/// are the two values being selected between, see if we can simplify the +/// select. Callers of this should assume that TheSelect is deleted if this +/// returns true. As such, they should return the appropriate thing (e.g. the +/// node) back to the top-level of the DAG combiner loop to avoid it being +/// looked at. +bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, + SDValue RHS) { + + // If this is a select from two identical things, try to pull the operation + // through the select. + if (LHS.getOpcode() == RHS.getOpcode() && LHS.hasOneUse() && RHS.hasOneUse()){ + // If this is a load and the token chain is identical, replace the select + // of two loads with a load through a select of the address to load from. + // This triggers in things like "select bool X, 10.0, 123.0" after the FP + // constants have been dropped into the constant pool. + if (LHS.getOpcode() == ISD::LOAD && + // Do not let this transformation reduce the number of volatile loads. + !cast(LHS)->isVolatile() && + !cast(RHS)->isVolatile() && + // Token chains must be identical. + LHS.getOperand(0) == RHS.getOperand(0)) { + LoadSDNode *LLD = cast(LHS); + LoadSDNode *RLD = cast(RHS); + + // If this is an EXTLOAD, the VT's must match. + if (LLD->getMemoryVT() == RLD->getMemoryVT()) { + // FIXME: this conflates two src values, discarding one. This is not + // the right thing to do, but nothing uses srcvalues now. When they do, + // turn SrcValue into a list of locations. + SDValue Addr; + if (TheSelect->getOpcode() == ISD::SELECT) { + // Check that the condition doesn't reach either load. If so, folding + // this will induce a cycle into the DAG. + if (!LLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) && + !RLD->isPredecessorOf(TheSelect->getOperand(0).getNode())) { + Addr = DAG.getNode(ISD::SELECT, TheSelect->getDebugLoc(), + LLD->getBasePtr().getValueType(), + TheSelect->getOperand(0), LLD->getBasePtr(), + RLD->getBasePtr()); + } + } else { + // Check that the condition doesn't reach either load. If so, folding + // this will induce a cycle into the DAG. + if (!LLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) && + !RLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) && + !LLD->isPredecessorOf(TheSelect->getOperand(1).getNode()) && + !RLD->isPredecessorOf(TheSelect->getOperand(1).getNode())) { + Addr = DAG.getNode(ISD::SELECT_CC, TheSelect->getDebugLoc(), + LLD->getBasePtr().getValueType(), + TheSelect->getOperand(0), + TheSelect->getOperand(1), + LLD->getBasePtr(), RLD->getBasePtr(), + TheSelect->getOperand(4)); + } + } + + if (Addr.getNode()) { + SDValue Load; + if (LLD->getExtensionType() == ISD::NON_EXTLOAD) { + Load = DAG.getLoad(TheSelect->getValueType(0), + TheSelect->getDebugLoc(), + LLD->getChain(), + Addr,LLD->getSrcValue(), + LLD->getSrcValueOffset(), + LLD->isVolatile(), + LLD->getAlignment()); + } else { + Load = DAG.getExtLoad(LLD->getExtensionType(), + TheSelect->getDebugLoc(), + TheSelect->getValueType(0), + LLD->getChain(), Addr, LLD->getSrcValue(), + LLD->getSrcValueOffset(), + LLD->getMemoryVT(), + LLD->isVolatile(), + LLD->getAlignment()); + } + + // Users of the select now use the result of the load. + CombineTo(TheSelect, Load); + + // Users of the old loads now use the new load's chain. We know the + // old-load value is dead now. + CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1)); + CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1)); + return true; + } + } + } + } + + return false; +} + +/// SimplifySelectCC - Simplify an expression of the form (N0 cond N1) ? N2 : N3 +/// where 'cond' is the comparison specified by CC. +SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, + SDValue N2, SDValue N3, + ISD::CondCode CC, bool NotExtCompare) { + // (x ? y : y) -> y. + if (N2 == N3) return N2; + + MVT VT = N2.getValueType(); + ConstantSDNode *N1C = dyn_cast(N1.getNode()); + ConstantSDNode *N2C = dyn_cast(N2.getNode()); + ConstantSDNode *N3C = dyn_cast(N3.getNode()); + + // Determine if the condition we're dealing with is constant + SDValue SCC = SimplifySetCC(TLI.getSetCCResultType(N0.getValueType()), + N0, N1, CC, DL, false); + if (SCC.getNode()) AddToWorkList(SCC.getNode()); + ConstantSDNode *SCCC = dyn_cast_or_null(SCC.getNode()); + + // fold select_cc true, x, y -> x + if (SCCC && !SCCC->isNullValue()) + return N2; + // fold select_cc false, x, y -> y + if (SCCC && SCCC->isNullValue()) + return N3; + + // Check to see if we can simplify the select into an fabs node + if (ConstantFPSDNode *CFP = dyn_cast(N1)) { + // Allow either -0.0 or 0.0 + if (CFP->getValueAPF().isZero()) { + // select (setg[te] X, +/-0.0), X, fneg(X) -> fabs + if ((CC == ISD::SETGE || CC == ISD::SETGT) && + N0 == N2 && N3.getOpcode() == ISD::FNEG && + N2 == N3.getOperand(0)) + return DAG.getNode(ISD::FABS, DL, VT, N0); + + // select (setl[te] X, +/-0.0), fneg(X), X -> fabs + if ((CC == ISD::SETLT || CC == ISD::SETLE) && + N0 == N3 && N2.getOpcode() == ISD::FNEG && + N2.getOperand(0) == N3) + return DAG.getNode(ISD::FABS, DL, VT, N3); + } + } + + // Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)" + // where "tmp" is a constant pool entry containing an array with 1.0 and 2.0 + // in it. This is a win when the constant is not otherwise available because + // it replaces two constant pool loads with one. We only do this if the FP + // type is known to be legal, because if it isn't, then we are before legalize + // types an we want the other legalization to happen first (e.g. to avoid + // messing with soft float) and if the ConstantFP is not legal, because if + // it is legal, we may not need to store the FP constant in a constant pool. + if (ConstantFPSDNode *TV = dyn_cast(N2)) + if (ConstantFPSDNode *FV = dyn_cast(N3)) { + if (TLI.isTypeLegal(N2.getValueType()) && + (TLI.getOperationAction(ISD::ConstantFP, N2.getValueType()) != + TargetLowering::Legal) && + // If both constants have multiple uses, then we won't need to do an + // extra load, they are likely around in registers for other users. + (TV->hasOneUse() || FV->hasOneUse())) { + Constant *Elts[] = { + const_cast(FV->getConstantFPValue()), + const_cast(TV->getConstantFPValue()) + }; + const Type *FPTy = Elts[0]->getType(); + const TargetData &TD = *TLI.getTargetData(); + + // Create a ConstantArray of the two constants. + Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts, 2); + SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(), + TD.getPrefTypeAlignment(FPTy)); + unsigned Alignment = cast(CPIdx)->getAlignment(); + + // Get the offsets to the 0 and 1 element of the array so that we can + // select between them. + SDValue Zero = DAG.getIntPtrConstant(0); + unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType()); + SDValue One = DAG.getIntPtrConstant(EltSize); + + SDValue Cond = DAG.getSetCC(DL, + TLI.getSetCCResultType(N0.getValueType()), + N0, N1, CC); + SDValue CstOffset = DAG.getNode(ISD::SELECT, DL, Zero.getValueType(), + Cond, One, Zero); + CPIdx = DAG.getNode(ISD::ADD, DL, TLI.getPointerTy(), CPIdx, + CstOffset); + return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx, + PseudoSourceValue::getConstantPool(), 0, false, + Alignment); + + } + } + + // Check to see if we can perform the "gzip trick", transforming + // (select_cc setlt X, 0, A, 0) -> (and (sra X, (sub size(X), 1), A) + if (N1C && N3C && N3C->isNullValue() && CC == ISD::SETLT && + N0.getValueType().isInteger() && + N2.getValueType().isInteger() && + (N1C->isNullValue() || // (a < 0) ? b : 0 + (N1C->getAPIntValue() == 1 && N0 == N2))) { // (a < 1) ? a : 0 + MVT XType = N0.getValueType(); + MVT AType = N2.getValueType(); + if (XType.bitsGE(AType)) { + // and (sra X, size(X)-1, A) -> "and (srl X, C2), A" iff A is a + // single-bit constant. + if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue()-1)) == 0)) { + unsigned ShCtV = N2C->getAPIntValue().logBase2(); + ShCtV = XType.getSizeInBits()-ShCtV-1; + SDValue ShCt = DAG.getConstant(ShCtV, getShiftAmountTy()); + SDValue Shift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), + XType, N0, ShCt); + AddToWorkList(Shift.getNode()); + + if (XType.bitsGT(AType)) { + Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift); + AddToWorkList(Shift.getNode()); + } + + return DAG.getNode(ISD::AND, DL, AType, Shift, N2); + } + + SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), + XType, N0, + DAG.getConstant(XType.getSizeInBits()-1, + getShiftAmountTy())); + AddToWorkList(Shift.getNode()); + + if (XType.bitsGT(AType)) { + Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift); + AddToWorkList(Shift.getNode()); + } + + return DAG.getNode(ISD::AND, DL, AType, Shift, N2); + } + } + + // fold select C, 16, 0 -> shl C, 4 + if (N2C && N3C && N3C->isNullValue() && N2C->getAPIntValue().isPowerOf2() && + TLI.getBooleanContents() == TargetLowering::ZeroOrOneBooleanContent) { + + // If the caller doesn't want us to simplify this into a zext of a compare, + // don't do it. + if (NotExtCompare && N2C->getAPIntValue() == 1) + return SDValue(); + + // Get a SetCC of the condition + // FIXME: Should probably make sure that setcc is legal if we ever have a + // target where it isn't. + SDValue Temp, SCC; + // cast from setcc result type to select result type + if (LegalTypes) { + SCC = DAG.getSetCC(DL, TLI.getSetCCResultType(N0.getValueType()), + N0, N1, CC); + if (N2.getValueType().bitsLT(SCC.getValueType())) + Temp = DAG.getZeroExtendInReg(SCC, N2.getDebugLoc(), N2.getValueType()); + else + Temp = DAG.getNode(ISD::ZERO_EXTEND, N2.getDebugLoc(), + N2.getValueType(), SCC); + } else { + SCC = DAG.getSetCC(N0.getDebugLoc(), MVT::i1, N0, N1, CC); + Temp = DAG.getNode(ISD::ZERO_EXTEND, N2.getDebugLoc(), + N2.getValueType(), SCC); + } + + AddToWorkList(SCC.getNode()); + AddToWorkList(Temp.getNode()); + + if (N2C->getAPIntValue() == 1) + return Temp; + + // shl setcc result by log2 n2c + return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp, + DAG.getConstant(N2C->getAPIntValue().logBase2(), + getShiftAmountTy())); + } + + // Check to see if this is the equivalent of setcc + // FIXME: Turn all of these into setcc if setcc if setcc is legal + // otherwise, go ahead with the folds. + if (0 && N3C && N3C->isNullValue() && N2C && (N2C->getAPIntValue() == 1ULL)) { + MVT XType = N0.getValueType(); + if (!LegalOperations || + TLI.isOperationLegal(ISD::SETCC, TLI.getSetCCResultType(XType))) { + SDValue Res = DAG.getSetCC(DL, TLI.getSetCCResultType(XType), N0, N1, CC); + if (Res.getValueType() != VT) + Res = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Res); + return Res; + } + + // fold (seteq X, 0) -> (srl (ctlz X, log2(size(X)))) + if (N1C && N1C->isNullValue() && CC == ISD::SETEQ && + (!LegalOperations || + TLI.isOperationLegal(ISD::CTLZ, XType))) { + SDValue Ctlz = DAG.getNode(ISD::CTLZ, N0.getDebugLoc(), XType, N0); + return DAG.getNode(ISD::SRL, DL, XType, Ctlz, + DAG.getConstant(Log2_32(XType.getSizeInBits()), + getShiftAmountTy())); + } + // fold (setgt X, 0) -> (srl (and (-X, ~X), size(X)-1)) + if (N1C && N1C->isNullValue() && CC == ISD::SETGT) { + SDValue NegN0 = DAG.getNode(ISD::SUB, N0.getDebugLoc(), + XType, DAG.getConstant(0, XType), N0); + SDValue NotN0 = DAG.getNOT(N0.getDebugLoc(), N0, XType); + return DAG.getNode(ISD::SRL, DL, XType, + DAG.getNode(ISD::AND, DL, XType, NegN0, NotN0), + DAG.getConstant(XType.getSizeInBits()-1, + getShiftAmountTy())); + } + // fold (setgt X, -1) -> (xor (srl (X, size(X)-1), 1)) + if (N1C && N1C->isAllOnesValue() && CC == ISD::SETGT) { + SDValue Sign = DAG.getNode(ISD::SRL, N0.getDebugLoc(), XType, N0, + DAG.getConstant(XType.getSizeInBits()-1, + getShiftAmountTy())); + return DAG.getNode(ISD::XOR, DL, XType, Sign, DAG.getConstant(1, XType)); + } + } + + // Check to see if this is an integer abs. select_cc setl[te] X, 0, -X, X -> + // Y = sra (X, size(X)-1); xor (add (X, Y), Y) + if (N1C && N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE) && + N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1) && + N2.getOperand(0) == N1 && N0.getValueType().isInteger()) { + MVT XType = N0.getValueType(); + SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), XType, N0, + DAG.getConstant(XType.getSizeInBits()-1, + getShiftAmountTy())); + SDValue Add = DAG.getNode(ISD::ADD, N0.getDebugLoc(), XType, + N0, Shift); + AddToWorkList(Shift.getNode()); + AddToWorkList(Add.getNode()); + return DAG.getNode(ISD::XOR, DL, XType, Add, Shift); + } + // Check to see if this is an integer abs. select_cc setgt X, -1, X, -X -> + // Y = sra (X, size(X)-1); xor (add (X, Y), Y) + if (N1C && N1C->isAllOnesValue() && CC == ISD::SETGT && + N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1)) { + if (ConstantSDNode *SubC = dyn_cast(N3.getOperand(0))) { + MVT XType = N0.getValueType(); + if (SubC->isNullValue() && XType.isInteger()) { + SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), XType, + N0, + DAG.getConstant(XType.getSizeInBits()-1, + getShiftAmountTy())); + SDValue Add = DAG.getNode(ISD::ADD, N0.getDebugLoc(), + XType, N0, Shift); + AddToWorkList(Shift.getNode()); + AddToWorkList(Add.getNode()); + return DAG.getNode(ISD::XOR, DL, XType, Add, Shift); + } + } + } + + return SDValue(); +} + +/// SimplifySetCC - This is a stub for TargetLowering::SimplifySetCC. +SDValue DAGCombiner::SimplifySetCC(MVT VT, SDValue N0, + SDValue N1, ISD::CondCode Cond, + DebugLoc DL, bool foldBooleans) { + TargetLowering::DAGCombinerInfo + DagCombineInfo(DAG, Level == Unrestricted, false, this); + return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL); +} + +/// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant, +/// return a DAG expression to select that will generate the same value by +/// multiplying by a magic number. See: +/// +SDValue DAGCombiner::BuildSDIV(SDNode *N) { + std::vector Built; + SDValue S = TLI.BuildSDIV(N, DAG, &Built); + + for (std::vector::iterator ii = Built.begin(), ee = Built.end(); + ii != ee; ++ii) + AddToWorkList(*ii); + return S; +} + +/// BuildUDIVSequence - Given an ISD::UDIV node expressing a divide by constant, +/// return a DAG expression to select that will generate the same value by +/// multiplying by a magic number. See: +/// +SDValue DAGCombiner::BuildUDIV(SDNode *N) { + std::vector Built; + SDValue S = TLI.BuildUDIV(N, DAG, &Built); + + for (std::vector::iterator ii = Built.begin(), ee = Built.end(); + ii != ee; ++ii) + AddToWorkList(*ii); + return S; +} + +/// FindBaseOffset - Return true if base is known not to alias with anything +/// but itself. Provides base object and offset as results. +static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset) { + // Assume it is a primitive operation. + Base = Ptr; Offset = 0; + + // If it's an adding a simple constant then integrate the offset. + if (Base.getOpcode() == ISD::ADD) { + if (ConstantSDNode *C = dyn_cast(Base.getOperand(1))) { + Base = Base.getOperand(0); + Offset += C->getZExtValue(); + } + } + + // If it's any of the following then it can't alias with anything but itself. + return isa(Base) || + isa(Base) || + isa(Base); +} + +/// isAlias - Return true if there is any possibility that the two addresses +/// overlap. +bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, + const Value *SrcValue1, int SrcValueOffset1, + SDValue Ptr2, int64_t Size2, + const Value *SrcValue2, int SrcValueOffset2) const { + // If they are the same then they must be aliases. + if (Ptr1 == Ptr2) return true; + + // Gather base node and offset information. + SDValue Base1, Base2; + int64_t Offset1, Offset2; + bool KnownBase1 = FindBaseOffset(Ptr1, Base1, Offset1); + bool KnownBase2 = FindBaseOffset(Ptr2, Base2, Offset2); + + // If they have a same base address then... + if (Base1 == Base2) + // Check to see if the addresses overlap. + return !((Offset1 + Size1) <= Offset2 || (Offset2 + Size2) <= Offset1); + + // If we know both bases then they can't alias. + if (KnownBase1 && KnownBase2) return false; + + if (CombinerGlobalAA) { + // Use alias analysis information. + int64_t MinOffset = std::min(SrcValueOffset1, SrcValueOffset2); + int64_t Overlap1 = Size1 + SrcValueOffset1 - MinOffset; + int64_t Overlap2 = Size2 + SrcValueOffset2 - MinOffset; + AliasAnalysis::AliasResult AAResult = + AA.alias(SrcValue1, Overlap1, SrcValue2, Overlap2); + if (AAResult == AliasAnalysis::NoAlias) + return false; + } + + // Otherwise we have to assume they alias. + return true; +} + +/// FindAliasInfo - Extracts the relevant alias information from the memory +/// node. Returns true if the operand was a load. +bool DAGCombiner::FindAliasInfo(SDNode *N, + SDValue &Ptr, int64_t &Size, + const Value *&SrcValue, int &SrcValueOffset) const { + if (LoadSDNode *LD = dyn_cast(N)) { + Ptr = LD->getBasePtr(); + Size = LD->getMemoryVT().getSizeInBits() >> 3; + SrcValue = LD->getSrcValue(); + SrcValueOffset = LD->getSrcValueOffset(); + return true; + } else if (StoreSDNode *ST = dyn_cast(N)) { + Ptr = ST->getBasePtr(); + Size = ST->getMemoryVT().getSizeInBits() >> 3; + SrcValue = ST->getSrcValue(); + SrcValueOffset = ST->getSrcValueOffset(); + } else { + assert(0 && "FindAliasInfo expected a memory operand"); + } + + return false; +} + +/// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes, +/// looking for aliasing nodes and adding them to the Aliases vector. +void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, + SmallVector &Aliases) { + SmallVector Chains; // List of chains to visit. + std::set Visited; // Visited node set. + + // Get alias information for node. + SDValue Ptr; + int64_t Size = 0; + const Value *SrcValue = 0; + int SrcValueOffset = 0; + bool IsLoad = FindAliasInfo(N, Ptr, Size, SrcValue, SrcValueOffset); + + // Starting off. + Chains.push_back(OriginalChain); + + // Look at each chain and determine if it is an alias. If so, add it to the + // aliases list. If not, then continue up the chain looking for the next + // candidate. + while (!Chains.empty()) { + SDValue Chain = Chains.back(); + Chains.pop_back(); + + // Don't bother if we've been before. + if (Visited.find(Chain.getNode()) != Visited.end()) continue; + Visited.insert(Chain.getNode()); + + switch (Chain.getOpcode()) { + case ISD::EntryToken: + // Entry token is ideal chain operand, but handled in FindBetterChain. + break; + + case ISD::LOAD: + case ISD::STORE: { + // Get alias information for Chain. + SDValue OpPtr; + int64_t OpSize = 0; + const Value *OpSrcValue = 0; + int OpSrcValueOffset = 0; + bool IsOpLoad = FindAliasInfo(Chain.getNode(), OpPtr, OpSize, + OpSrcValue, OpSrcValueOffset); + + // If chain is alias then stop here. + if (!(IsLoad && IsOpLoad) && + isAlias(Ptr, Size, SrcValue, SrcValueOffset, + OpPtr, OpSize, OpSrcValue, OpSrcValueOffset)) { + Aliases.push_back(Chain); + } else { + // Look further up the chain. + Chains.push_back(Chain.getOperand(0)); + // Clean up old chain. + AddToWorkList(Chain.getNode()); + } + break; + } + + case ISD::TokenFactor: + // We have to check each of the operands of the token factor, so we queue + // then up. Adding the operands to the queue (stack) in reverse order + // maintains the original order and increases the likelihood that getNode + // will find a matching token factor (CSE.) + for (unsigned n = Chain.getNumOperands(); n;) + Chains.push_back(Chain.getOperand(--n)); + // Eliminate the token factor if we can. + AddToWorkList(Chain.getNode()); + break; + + default: + // For all other instructions we will just have to take what we can get. + Aliases.push_back(Chain); + break; + } + } +} + +/// FindBetterChain - Walk up chain skipping non-aliasing memory nodes, looking +/// for a better chain (aliasing node.) +SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) { + SmallVector Aliases; // Ops for replacing token factor. + + // Accumulate all the aliases to this node. + GatherAllAliases(N, OldChain, Aliases); + + if (Aliases.size() == 0) { + // If no operands then chain to entry token. + return DAG.getEntryNode(); + } else if (Aliases.size() == 1) { + // If a single operand then chain to it. We don't need to revisit it. + return Aliases[0]; + } + + // Construct a custom tailored token factor. + SDValue NewChain = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other, + &Aliases[0], Aliases.size()); + + // Make sure the old chain gets cleaned up. + if (NewChain != OldChain) AddToWorkList(OldChain.getNode()); + + return NewChain; +} + +// SelectionDAG::Combine - This is the entry point for the file. +// +void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis &AA, + CodeGenOpt::Level OptLevel) { + /// run - This is the main entry point to this class. + /// + DAGCombiner(*this, AA, OptLevel).Run(Level); +} diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp new file mode 100644 index 000000000000..6becff32176e --- /dev/null +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -0,0 +1,1033 @@ +///===-- FastISel.cpp - Implementation of the FastISel class --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the implementation of the FastISel class. +// +// "Fast" instruction selection is designed to emit very poor code quickly. +// Also, it is not designed to be able to do much lowering, so most illegal +// types (e.g. i64 on 32-bit targets) and operations are not supported. It is +// also not intended to be able to do much optimization, except in a few cases +// where doing optimizations reduces overall compile time. For example, folding +// constants into immediate fields is often done, because it's cheap and it +// reduces the number of instructions later phases have to examine. +// +// "Fast" instruction selection is able to fail gracefully and transfer +// control to the SelectionDAG selector for operations that it doesn't +// support. In many cases, this allows us to avoid duplicating a lot of +// the complicated lowering logic that SelectionDAG currently has. +// +// The intended use for "fast" instruction selection is "-O0" mode +// compilation, where the quality of the generated code is irrelevant when +// weighed against the speed at which the code can be generated. Also, +// at -O0, the LLVM optimizers are not running, and this makes the +// compile time of codegen a much higher portion of the overall compile +// time. Despite its limitations, "fast" instruction selection is able to +// handle enough code on its own to provide noticeable overall speedups +// in -O0 compiles. +// +// Basic operations are supported in a target-independent way, by reading +// the same instruction descriptions that the SelectionDAG selector reads, +// and identifying simple arithmetic operations that can be directly selected +// from simple operators. More complicated operations currently require +// target-specific code. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Function.h" +#include "llvm/GlobalVariable.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/CodeGen/FastISel.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/DebugLoc.h" +#include "llvm/CodeGen/DwarfWriter.h" +#include "llvm/Analysis/DebugInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "SelectionDAGBuild.h" +using namespace llvm; + +unsigned FastISel::getRegForValue(Value *V) { + MVT RealVT = TLI.getValueType(V->getType(), /*AllowUnknown=*/true); + // Don't handle non-simple values in FastISel. + if (!RealVT.isSimple()) + return 0; + + // Ignore illegal types. We must do this before looking up the value + // in ValueMap because Arguments are given virtual registers regardless + // of whether FastISel can handle them. + MVT::SimpleValueType VT = RealVT.getSimpleVT(); + if (!TLI.isTypeLegal(VT)) { + // Promote MVT::i1 to a legal type though, because it's common and easy. + if (VT == MVT::i1) + VT = TLI.getTypeToTransformTo(VT).getSimpleVT(); + else + return 0; + } + + // Look up the value to see if we already have a register for it. We + // cache values defined by Instructions across blocks, and other values + // only locally. This is because Instructions already have the SSA + // def-dominatess-use requirement enforced. + if (ValueMap.count(V)) + return ValueMap[V]; + unsigned Reg = LocalValueMap[V]; + if (Reg != 0) + return Reg; + + if (ConstantInt *CI = dyn_cast(V)) { + if (CI->getValue().getActiveBits() <= 64) + Reg = FastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue()); + } else if (isa(V)) { + Reg = TargetMaterializeAlloca(cast(V)); + } else if (isa(V)) { + // Translate this as an integer zero so that it can be + // local-CSE'd with actual integer zeros. + Reg = getRegForValue(Constant::getNullValue(TD.getIntPtrType())); + } else if (ConstantFP *CF = dyn_cast(V)) { + Reg = FastEmit_f(VT, VT, ISD::ConstantFP, CF); + + if (!Reg) { + const APFloat &Flt = CF->getValueAPF(); + MVT IntVT = TLI.getPointerTy(); + + uint64_t x[2]; + uint32_t IntBitWidth = IntVT.getSizeInBits(); + bool isExact; + (void) Flt.convertToInteger(x, IntBitWidth, /*isSigned=*/true, + APFloat::rmTowardZero, &isExact); + if (isExact) { + APInt IntVal(IntBitWidth, 2, x); + + unsigned IntegerReg = getRegForValue(ConstantInt::get(IntVal)); + if (IntegerReg != 0) + Reg = FastEmit_r(IntVT.getSimpleVT(), VT, ISD::SINT_TO_FP, IntegerReg); + } + } + } else if (ConstantExpr *CE = dyn_cast(V)) { + if (!SelectOperator(CE, CE->getOpcode())) return 0; + Reg = LocalValueMap[CE]; + } else if (isa(V)) { + Reg = createResultReg(TLI.getRegClassFor(VT)); + BuildMI(MBB, DL, TII.get(TargetInstrInfo::IMPLICIT_DEF), Reg); + } + + // If target-independent code couldn't handle the value, give target-specific + // code a try. + if (!Reg && isa(V)) + Reg = TargetMaterializeConstant(cast(V)); + + // Don't cache constant materializations in the general ValueMap. + // To do so would require tracking what uses they dominate. + if (Reg != 0) + LocalValueMap[V] = Reg; + return Reg; +} + +unsigned FastISel::lookUpRegForValue(Value *V) { + // Look up the value to see if we already have a register for it. We + // cache values defined by Instructions across blocks, and other values + // only locally. This is because Instructions already have the SSA + // def-dominatess-use requirement enforced. + if (ValueMap.count(V)) + return ValueMap[V]; + return LocalValueMap[V]; +} + +/// UpdateValueMap - Update the value map to include the new mapping for this +/// instruction, or insert an extra copy to get the result in a previous +/// determined register. +/// NOTE: This is only necessary because we might select a block that uses +/// a value before we select the block that defines the value. It might be +/// possible to fix this by selecting blocks in reverse postorder. +unsigned FastISel::UpdateValueMap(Value* I, unsigned Reg) { + if (!isa(I)) { + LocalValueMap[I] = Reg; + return Reg; + } + + unsigned &AssignedReg = ValueMap[I]; + if (AssignedReg == 0) + AssignedReg = Reg; + else if (Reg != AssignedReg) { + const TargetRegisterClass *RegClass = MRI.getRegClass(Reg); + TII.copyRegToReg(*MBB, MBB->end(), AssignedReg, + Reg, RegClass, RegClass); + } + return AssignedReg; +} + +unsigned FastISel::getRegForGEPIndex(Value *Idx) { + unsigned IdxN = getRegForValue(Idx); + if (IdxN == 0) + // Unhandled operand. Halt "fast" selection and bail. + return 0; + + // If the index is smaller or larger than intptr_t, truncate or extend it. + MVT PtrVT = TLI.getPointerTy(); + MVT IdxVT = MVT::getMVT(Idx->getType(), /*HandleUnknown=*/false); + if (IdxVT.bitsLT(PtrVT)) + IdxN = FastEmit_r(IdxVT.getSimpleVT(), PtrVT.getSimpleVT(), + ISD::SIGN_EXTEND, IdxN); + else if (IdxVT.bitsGT(PtrVT)) + IdxN = FastEmit_r(IdxVT.getSimpleVT(), PtrVT.getSimpleVT(), + ISD::TRUNCATE, IdxN); + return IdxN; +} + +/// SelectBinaryOp - Select and emit code for a binary operator instruction, +/// which has an opcode which directly corresponds to the given ISD opcode. +/// +bool FastISel::SelectBinaryOp(User *I, ISD::NodeType ISDOpcode) { + MVT VT = MVT::getMVT(I->getType(), /*HandleUnknown=*/true); + if (VT == MVT::Other || !VT.isSimple()) + // Unhandled type. Halt "fast" selection and bail. + return false; + + // We only handle legal types. For example, on x86-32 the instruction + // selector contains all of the 64-bit instructions from x86-64, + // under the assumption that i64 won't be used if the target doesn't + // support it. + if (!TLI.isTypeLegal(VT)) { + // MVT::i1 is special. Allow AND, OR, or XOR because they + // don't require additional zeroing, which makes them easy. + if (VT == MVT::i1 && + (ISDOpcode == ISD::AND || ISDOpcode == ISD::OR || + ISDOpcode == ISD::XOR)) + VT = TLI.getTypeToTransformTo(VT); + else + return false; + } + + unsigned Op0 = getRegForValue(I->getOperand(0)); + if (Op0 == 0) + // Unhandled operand. Halt "fast" selection and bail. + return false; + + // Check if the second operand is a constant and handle it appropriately. + if (ConstantInt *CI = dyn_cast(I->getOperand(1))) { + unsigned ResultReg = FastEmit_ri(VT.getSimpleVT(), VT.getSimpleVT(), + ISDOpcode, Op0, CI->getZExtValue()); + if (ResultReg != 0) { + // We successfully emitted code for the given LLVM Instruction. + UpdateValueMap(I, ResultReg); + return true; + } + } + + // Check if the second operand is a constant float. + if (ConstantFP *CF = dyn_cast(I->getOperand(1))) { + unsigned ResultReg = FastEmit_rf(VT.getSimpleVT(), VT.getSimpleVT(), + ISDOpcode, Op0, CF); + if (ResultReg != 0) { + // We successfully emitted code for the given LLVM Instruction. + UpdateValueMap(I, ResultReg); + return true; + } + } + + unsigned Op1 = getRegForValue(I->getOperand(1)); + if (Op1 == 0) + // Unhandled operand. Halt "fast" selection and bail. + return false; + + // Now we have both operands in registers. Emit the instruction. + unsigned ResultReg = FastEmit_rr(VT.getSimpleVT(), VT.getSimpleVT(), + ISDOpcode, Op0, Op1); + if (ResultReg == 0) + // Target-specific code wasn't able to find a machine opcode for + // the given ISD opcode and type. Halt "fast" selection and bail. + return false; + + // We successfully emitted code for the given LLVM Instruction. + UpdateValueMap(I, ResultReg); + return true; +} + +bool FastISel::SelectGetElementPtr(User *I) { + unsigned N = getRegForValue(I->getOperand(0)); + if (N == 0) + // Unhandled operand. Halt "fast" selection and bail. + return false; + + const Type *Ty = I->getOperand(0)->getType(); + MVT::SimpleValueType VT = TLI.getPointerTy().getSimpleVT(); + for (GetElementPtrInst::op_iterator OI = I->op_begin()+1, E = I->op_end(); + OI != E; ++OI) { + Value *Idx = *OI; + if (const StructType *StTy = dyn_cast(Ty)) { + unsigned Field = cast(Idx)->getZExtValue(); + if (Field) { + // N = N + Offset + uint64_t Offs = TD.getStructLayout(StTy)->getElementOffset(Field); + // FIXME: This can be optimized by combining the add with a + // subsequent one. + N = FastEmit_ri_(VT, ISD::ADD, N, Offs, VT); + if (N == 0) + // Unhandled operand. Halt "fast" selection and bail. + return false; + } + Ty = StTy->getElementType(Field); + } else { + Ty = cast(Ty)->getElementType(); + + // If this is a constant subscript, handle it quickly. + if (ConstantInt *CI = dyn_cast(Idx)) { + if (CI->getZExtValue() == 0) continue; + uint64_t Offs = + TD.getTypeAllocSize(Ty)*cast(CI)->getSExtValue(); + N = FastEmit_ri_(VT, ISD::ADD, N, Offs, VT); + if (N == 0) + // Unhandled operand. Halt "fast" selection and bail. + return false; + continue; + } + + // N = N + Idx * ElementSize; + uint64_t ElementSize = TD.getTypeAllocSize(Ty); + unsigned IdxN = getRegForGEPIndex(Idx); + if (IdxN == 0) + // Unhandled operand. Halt "fast" selection and bail. + return false; + + if (ElementSize != 1) { + IdxN = FastEmit_ri_(VT, ISD::MUL, IdxN, ElementSize, VT); + if (IdxN == 0) + // Unhandled operand. Halt "fast" selection and bail. + return false; + } + N = FastEmit_rr(VT, VT, ISD::ADD, N, IdxN); + if (N == 0) + // Unhandled operand. Halt "fast" selection and bail. + return false; + } + } + + // We successfully emitted code for the given LLVM Instruction. + UpdateValueMap(I, N); + return true; +} + +bool FastISel::SelectCall(User *I) { + Function *F = cast(I)->getCalledFunction(); + if (!F) return false; + + unsigned IID = F->getIntrinsicID(); + switch (IID) { + default: break; + case Intrinsic::dbg_stoppoint: { + DbgStopPointInst *SPI = cast(I); + if (DIDescriptor::ValidDebugInfo(SPI->getContext(), CodeGenOpt::None)) { + DICompileUnit CU(cast(SPI->getContext())); + unsigned Line = SPI->getLine(); + unsigned Col = SPI->getColumn(); + unsigned Idx = MF.getOrCreateDebugLocID(CU.getGV(), Line, Col); + setCurDebugLoc(DebugLoc::get(Idx)); + } + return true; + } + case Intrinsic::dbg_region_start: { + DbgRegionStartInst *RSI = cast(I); + if (DIDescriptor::ValidDebugInfo(RSI->getContext(), CodeGenOpt::None) && + DW && DW->ShouldEmitDwarfDebug()) { + unsigned ID = + DW->RecordRegionStart(cast(RSI->getContext())); + const TargetInstrDesc &II = TII.get(TargetInstrInfo::DBG_LABEL); + BuildMI(MBB, DL, II).addImm(ID); + } + return true; + } + case Intrinsic::dbg_region_end: { + DbgRegionEndInst *REI = cast(I); + if (DIDescriptor::ValidDebugInfo(REI->getContext(), CodeGenOpt::None) && + DW && DW->ShouldEmitDwarfDebug()) { + unsigned ID = 0; + DISubprogram Subprogram(cast(REI->getContext())); + if (!Subprogram.isNull() && !Subprogram.describes(MF.getFunction())) { + // This is end of an inlined function. + const TargetInstrDesc &II = TII.get(TargetInstrInfo::DBG_LABEL); + ID = DW->RecordInlinedFnEnd(Subprogram); + if (ID) + // Returned ID is 0 if this is unbalanced "end of inlined + // scope". This could happen if optimizer eats dbg intrinsics + // or "beginning of inlined scope" is not recoginized due to + // missing location info. In such cases, do ignore this region.end. + BuildMI(MBB, DL, II).addImm(ID); + } else { + const TargetInstrDesc &II = TII.get(TargetInstrInfo::DBG_LABEL); + ID = DW->RecordRegionEnd(cast(REI->getContext())); + BuildMI(MBB, DL, II).addImm(ID); + } + } + return true; + } + case Intrinsic::dbg_func_start: { + DbgFuncStartInst *FSI = cast(I); + Value *SP = FSI->getSubprogram(); + if (!DIDescriptor::ValidDebugInfo(SP, CodeGenOpt::None)) + return true; + + // llvm.dbg.func.start implicitly defines a dbg_stoppoint which is what + // (most?) gdb expects. + DebugLoc PrevLoc = DL; + DISubprogram Subprogram(cast(SP)); + DICompileUnit CompileUnit = Subprogram.getCompileUnit(); + + if (!Subprogram.describes(MF.getFunction())) { + // This is a beginning of an inlined function. + + // If llvm.dbg.func.start is seen in a new block before any + // llvm.dbg.stoppoint intrinsic then the location info is unknown. + // FIXME : Why DebugLoc is reset at the beginning of each block ? + if (PrevLoc.isUnknown()) + return true; + // Record the source line. + unsigned Line = Subprogram.getLineNumber(); + setCurDebugLoc(DebugLoc::get(MF.getOrCreateDebugLocID( + CompileUnit.getGV(), Line, 0))); + + if (DW && DW->ShouldEmitDwarfDebug()) { + DebugLocTuple PrevLocTpl = MF.getDebugLocTuple(PrevLoc); + unsigned LabelID = DW->RecordInlinedFnStart(Subprogram, + DICompileUnit(PrevLocTpl.CompileUnit), + PrevLocTpl.Line, + PrevLocTpl.Col); + const TargetInstrDesc &II = TII.get(TargetInstrInfo::DBG_LABEL); + BuildMI(MBB, DL, II).addImm(LabelID); + } + } else { + // Record the source line. + unsigned Line = Subprogram.getLineNumber(); + MF.setDefaultDebugLoc(DebugLoc::get(MF.getOrCreateDebugLocID( + CompileUnit.getGV(), Line, 0))); + if (DW && DW->ShouldEmitDwarfDebug()) { + // llvm.dbg.func_start also defines beginning of function scope. + DW->RecordRegionStart(cast(FSI->getSubprogram())); + } + } + + return true; + } + case Intrinsic::dbg_declare: { + DbgDeclareInst *DI = cast(I); + Value *Variable = DI->getVariable(); + if (DIDescriptor::ValidDebugInfo(Variable, CodeGenOpt::None) && + DW && DW->ShouldEmitDwarfDebug()) { + // Determine the address of the declared object. + Value *Address = DI->getAddress(); + if (BitCastInst *BCI = dyn_cast(Address)) + Address = BCI->getOperand(0); + AllocaInst *AI = dyn_cast(Address); + // Don't handle byval struct arguments or VLAs, for example. + if (!AI) break; + DenseMap::iterator SI = + StaticAllocaMap.find(AI); + if (SI == StaticAllocaMap.end()) break; // VLAs. + int FI = SI->second; + + // Determine the debug globalvariable. + GlobalValue *GV = cast(Variable); + + // Build the DECLARE instruction. + const TargetInstrDesc &II = TII.get(TargetInstrInfo::DECLARE); + MachineInstr *DeclareMI + = BuildMI(MBB, DL, II).addFrameIndex(FI).addGlobalAddress(GV); + DIVariable DV(cast(GV)); + if (!DV.isNull()) { + // This is a local variable + DW->RecordVariableScope(DV, DeclareMI); + } + } + return true; + } + case Intrinsic::eh_exception: { + MVT VT = TLI.getValueType(I->getType()); + switch (TLI.getOperationAction(ISD::EXCEPTIONADDR, VT)) { + default: break; + case TargetLowering::Expand: { + assert(MBB->isLandingPad() && "Call to eh.exception not in landing pad!"); + unsigned Reg = TLI.getExceptionAddressRegister(); + const TargetRegisterClass *RC = TLI.getRegClassFor(VT); + unsigned ResultReg = createResultReg(RC); + bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, + Reg, RC, RC); + assert(InsertedCopy && "Can't copy address registers!"); + InsertedCopy = InsertedCopy; + UpdateValueMap(I, ResultReg); + return true; + } + } + break; + } + case Intrinsic::eh_selector_i32: + case Intrinsic::eh_selector_i64: { + MVT VT = TLI.getValueType(I->getType()); + switch (TLI.getOperationAction(ISD::EHSELECTION, VT)) { + default: break; + case TargetLowering::Expand: { + MVT VT = (IID == Intrinsic::eh_selector_i32 ? + MVT::i32 : MVT::i64); + + if (MMI) { + if (MBB->isLandingPad()) + AddCatchInfo(*cast(I), MMI, MBB); + else { +#ifndef NDEBUG + CatchInfoLost.insert(cast(I)); +#endif + // FIXME: Mark exception selector register as live in. Hack for PR1508. + unsigned Reg = TLI.getExceptionSelectorRegister(); + if (Reg) MBB->addLiveIn(Reg); + } + + unsigned Reg = TLI.getExceptionSelectorRegister(); + const TargetRegisterClass *RC = TLI.getRegClassFor(VT); + unsigned ResultReg = createResultReg(RC); + bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, + Reg, RC, RC); + assert(InsertedCopy && "Can't copy address registers!"); + InsertedCopy = InsertedCopy; + UpdateValueMap(I, ResultReg); + } else { + unsigned ResultReg = + getRegForValue(Constant::getNullValue(I->getType())); + UpdateValueMap(I, ResultReg); + } + return true; + } + } + break; + } + } + return false; +} + +bool FastISel::SelectCast(User *I, ISD::NodeType Opcode) { + MVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()); + MVT DstVT = TLI.getValueType(I->getType()); + + if (SrcVT == MVT::Other || !SrcVT.isSimple() || + DstVT == MVT::Other || !DstVT.isSimple()) + // Unhandled type. Halt "fast" selection and bail. + return false; + + // Check if the destination type is legal. Or as a special case, + // it may be i1 if we're doing a truncate because that's + // easy and somewhat common. + if (!TLI.isTypeLegal(DstVT)) + if (DstVT != MVT::i1 || Opcode != ISD::TRUNCATE) + // Unhandled type. Halt "fast" selection and bail. + return false; + + // Check if the source operand is legal. Or as a special case, + // it may be i1 if we're doing zero-extension because that's + // easy and somewhat common. + if (!TLI.isTypeLegal(SrcVT)) + if (SrcVT != MVT::i1 || Opcode != ISD::ZERO_EXTEND) + // Unhandled type. Halt "fast" selection and bail. + return false; + + unsigned InputReg = getRegForValue(I->getOperand(0)); + if (!InputReg) + // Unhandled operand. Halt "fast" selection and bail. + return false; + + // If the operand is i1, arrange for the high bits in the register to be zero. + if (SrcVT == MVT::i1) { + SrcVT = TLI.getTypeToTransformTo(SrcVT); + InputReg = FastEmitZExtFromI1(SrcVT.getSimpleVT(), InputReg); + if (!InputReg) + return false; + } + // If the result is i1, truncate to the target's type for i1 first. + if (DstVT == MVT::i1) + DstVT = TLI.getTypeToTransformTo(DstVT); + + unsigned ResultReg = FastEmit_r(SrcVT.getSimpleVT(), + DstVT.getSimpleVT(), + Opcode, + InputReg); + if (!ResultReg) + return false; + + UpdateValueMap(I, ResultReg); + return true; +} + +bool FastISel::SelectBitCast(User *I) { + // If the bitcast doesn't change the type, just use the operand value. + if (I->getType() == I->getOperand(0)->getType()) { + unsigned Reg = getRegForValue(I->getOperand(0)); + if (Reg == 0) + return false; + UpdateValueMap(I, Reg); + return true; + } + + // Bitcasts of other values become reg-reg copies or BIT_CONVERT operators. + MVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()); + MVT DstVT = TLI.getValueType(I->getType()); + + if (SrcVT == MVT::Other || !SrcVT.isSimple() || + DstVT == MVT::Other || !DstVT.isSimple() || + !TLI.isTypeLegal(SrcVT) || !TLI.isTypeLegal(DstVT)) + // Unhandled type. Halt "fast" selection and bail. + return false; + + unsigned Op0 = getRegForValue(I->getOperand(0)); + if (Op0 == 0) + // Unhandled operand. Halt "fast" selection and bail. + return false; + + // First, try to perform the bitcast by inserting a reg-reg copy. + unsigned ResultReg = 0; + if (SrcVT.getSimpleVT() == DstVT.getSimpleVT()) { + TargetRegisterClass* SrcClass = TLI.getRegClassFor(SrcVT); + TargetRegisterClass* DstClass = TLI.getRegClassFor(DstVT); + ResultReg = createResultReg(DstClass); + + bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, + Op0, DstClass, SrcClass); + if (!InsertedCopy) + ResultReg = 0; + } + + // If the reg-reg copy failed, select a BIT_CONVERT opcode. + if (!ResultReg) + ResultReg = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), + ISD::BIT_CONVERT, Op0); + + if (!ResultReg) + return false; + + UpdateValueMap(I, ResultReg); + return true; +} + +bool +FastISel::SelectInstruction(Instruction *I) { + return SelectOperator(I, I->getOpcode()); +} + +/// FastEmitBranch - Emit an unconditional branch to the given block, +/// unless it is the immediate (fall-through) successor, and update +/// the CFG. +void +FastISel::FastEmitBranch(MachineBasicBlock *MSucc) { + MachineFunction::iterator NextMBB = + next(MachineFunction::iterator(MBB)); + + if (MBB->isLayoutSuccessor(MSucc)) { + // The unconditional fall-through case, which needs no instructions. + } else { + // The unconditional branch case. + TII.InsertBranch(*MBB, MSucc, NULL, SmallVector()); + } + MBB->addSuccessor(MSucc); +} + +bool +FastISel::SelectOperator(User *I, unsigned Opcode) { + switch (Opcode) { + case Instruction::Add: { + ISD::NodeType Opc = I->getType()->isFPOrFPVector() ? ISD::FADD : ISD::ADD; + return SelectBinaryOp(I, Opc); + } + case Instruction::Sub: { + ISD::NodeType Opc = I->getType()->isFPOrFPVector() ? ISD::FSUB : ISD::SUB; + return SelectBinaryOp(I, Opc); + } + case Instruction::Mul: { + ISD::NodeType Opc = I->getType()->isFPOrFPVector() ? ISD::FMUL : ISD::MUL; + return SelectBinaryOp(I, Opc); + } + case Instruction::SDiv: + return SelectBinaryOp(I, ISD::SDIV); + case Instruction::UDiv: + return SelectBinaryOp(I, ISD::UDIV); + case Instruction::FDiv: + return SelectBinaryOp(I, ISD::FDIV); + case Instruction::SRem: + return SelectBinaryOp(I, ISD::SREM); + case Instruction::URem: + return SelectBinaryOp(I, ISD::UREM); + case Instruction::FRem: + return SelectBinaryOp(I, ISD::FREM); + case Instruction::Shl: + return SelectBinaryOp(I, ISD::SHL); + case Instruction::LShr: + return SelectBinaryOp(I, ISD::SRL); + case Instruction::AShr: + return SelectBinaryOp(I, ISD::SRA); + case Instruction::And: + return SelectBinaryOp(I, ISD::AND); + case Instruction::Or: + return SelectBinaryOp(I, ISD::OR); + case Instruction::Xor: + return SelectBinaryOp(I, ISD::XOR); + + case Instruction::GetElementPtr: + return SelectGetElementPtr(I); + + case Instruction::Br: { + BranchInst *BI = cast(I); + + if (BI->isUnconditional()) { + BasicBlock *LLVMSucc = BI->getSuccessor(0); + MachineBasicBlock *MSucc = MBBMap[LLVMSucc]; + FastEmitBranch(MSucc); + return true; + } + + // Conditional branches are not handed yet. + // Halt "fast" selection and bail. + return false; + } + + case Instruction::Unreachable: + // Nothing to emit. + return true; + + case Instruction::PHI: + // PHI nodes are already emitted. + return true; + + case Instruction::Alloca: + // FunctionLowering has the static-sized case covered. + if (StaticAllocaMap.count(cast(I))) + return true; + + // Dynamic-sized alloca is not handled yet. + return false; + + case Instruction::Call: + return SelectCall(I); + + case Instruction::BitCast: + return SelectBitCast(I); + + case Instruction::FPToSI: + return SelectCast(I, ISD::FP_TO_SINT); + case Instruction::ZExt: + return SelectCast(I, ISD::ZERO_EXTEND); + case Instruction::SExt: + return SelectCast(I, ISD::SIGN_EXTEND); + case Instruction::Trunc: + return SelectCast(I, ISD::TRUNCATE); + case Instruction::SIToFP: + return SelectCast(I, ISD::SINT_TO_FP); + + case Instruction::IntToPtr: // Deliberate fall-through. + case Instruction::PtrToInt: { + MVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()); + MVT DstVT = TLI.getValueType(I->getType()); + if (DstVT.bitsGT(SrcVT)) + return SelectCast(I, ISD::ZERO_EXTEND); + if (DstVT.bitsLT(SrcVT)) + return SelectCast(I, ISD::TRUNCATE); + unsigned Reg = getRegForValue(I->getOperand(0)); + if (Reg == 0) return false; + UpdateValueMap(I, Reg); + return true; + } + + default: + // Unhandled instruction. Halt "fast" selection and bail. + return false; + } +} + +FastISel::FastISel(MachineFunction &mf, + MachineModuleInfo *mmi, + DwarfWriter *dw, + DenseMap &vm, + DenseMap &bm, + DenseMap &am +#ifndef NDEBUG + , SmallSet &cil +#endif + ) + : MBB(0), + ValueMap(vm), + MBBMap(bm), + StaticAllocaMap(am), +#ifndef NDEBUG + CatchInfoLost(cil), +#endif + MF(mf), + MMI(mmi), + DW(dw), + MRI(MF.getRegInfo()), + MFI(*MF.getFrameInfo()), + MCP(*MF.getConstantPool()), + TM(MF.getTarget()), + TD(*TM.getTargetData()), + TII(*TM.getInstrInfo()), + TLI(*TM.getTargetLowering()) { +} + +FastISel::~FastISel() {} + +unsigned FastISel::FastEmit_(MVT::SimpleValueType, MVT::SimpleValueType, + ISD::NodeType) { + return 0; +} + +unsigned FastISel::FastEmit_r(MVT::SimpleValueType, MVT::SimpleValueType, + ISD::NodeType, unsigned /*Op0*/) { + return 0; +} + +unsigned FastISel::FastEmit_rr(MVT::SimpleValueType, MVT::SimpleValueType, + ISD::NodeType, unsigned /*Op0*/, + unsigned /*Op0*/) { + return 0; +} + +unsigned FastISel::FastEmit_i(MVT::SimpleValueType, MVT::SimpleValueType, + ISD::NodeType, uint64_t /*Imm*/) { + return 0; +} + +unsigned FastISel::FastEmit_f(MVT::SimpleValueType, MVT::SimpleValueType, + ISD::NodeType, ConstantFP * /*FPImm*/) { + return 0; +} + +unsigned FastISel::FastEmit_ri(MVT::SimpleValueType, MVT::SimpleValueType, + ISD::NodeType, unsigned /*Op0*/, + uint64_t /*Imm*/) { + return 0; +} + +unsigned FastISel::FastEmit_rf(MVT::SimpleValueType, MVT::SimpleValueType, + ISD::NodeType, unsigned /*Op0*/, + ConstantFP * /*FPImm*/) { + return 0; +} + +unsigned FastISel::FastEmit_rri(MVT::SimpleValueType, MVT::SimpleValueType, + ISD::NodeType, + unsigned /*Op0*/, unsigned /*Op1*/, + uint64_t /*Imm*/) { + return 0; +} + +/// FastEmit_ri_ - This method is a wrapper of FastEmit_ri. It first tries +/// to emit an instruction with an immediate operand using FastEmit_ri. +/// If that fails, it materializes the immediate into a register and try +/// FastEmit_rr instead. +unsigned FastISel::FastEmit_ri_(MVT::SimpleValueType VT, ISD::NodeType Opcode, + unsigned Op0, uint64_t Imm, + MVT::SimpleValueType ImmType) { + // First check if immediate type is legal. If not, we can't use the ri form. + unsigned ResultReg = FastEmit_ri(VT, VT, Opcode, Op0, Imm); + if (ResultReg != 0) + return ResultReg; + unsigned MaterialReg = FastEmit_i(ImmType, ImmType, ISD::Constant, Imm); + if (MaterialReg == 0) + return 0; + return FastEmit_rr(VT, VT, Opcode, Op0, MaterialReg); +} + +/// FastEmit_rf_ - This method is a wrapper of FastEmit_ri. It first tries +/// to emit an instruction with a floating-point immediate operand using +/// FastEmit_rf. If that fails, it materializes the immediate into a register +/// and try FastEmit_rr instead. +unsigned FastISel::FastEmit_rf_(MVT::SimpleValueType VT, ISD::NodeType Opcode, + unsigned Op0, ConstantFP *FPImm, + MVT::SimpleValueType ImmType) { + // First check if immediate type is legal. If not, we can't use the rf form. + unsigned ResultReg = FastEmit_rf(VT, VT, Opcode, Op0, FPImm); + if (ResultReg != 0) + return ResultReg; + + // Materialize the constant in a register. + unsigned MaterialReg = FastEmit_f(ImmType, ImmType, ISD::ConstantFP, FPImm); + if (MaterialReg == 0) { + // If the target doesn't have a way to directly enter a floating-point + // value into a register, use an alternate approach. + // TODO: The current approach only supports floating-point constants + // that can be constructed by conversion from integer values. This should + // be replaced by code that creates a load from a constant-pool entry, + // which will require some target-specific work. + const APFloat &Flt = FPImm->getValueAPF(); + MVT IntVT = TLI.getPointerTy(); + + uint64_t x[2]; + uint32_t IntBitWidth = IntVT.getSizeInBits(); + bool isExact; + (void) Flt.convertToInteger(x, IntBitWidth, /*isSigned=*/true, + APFloat::rmTowardZero, &isExact); + if (!isExact) + return 0; + APInt IntVal(IntBitWidth, 2, x); + + unsigned IntegerReg = FastEmit_i(IntVT.getSimpleVT(), IntVT.getSimpleVT(), + ISD::Constant, IntVal.getZExtValue()); + if (IntegerReg == 0) + return 0; + MaterialReg = FastEmit_r(IntVT.getSimpleVT(), VT, + ISD::SINT_TO_FP, IntegerReg); + if (MaterialReg == 0) + return 0; + } + return FastEmit_rr(VT, VT, Opcode, Op0, MaterialReg); +} + +unsigned FastISel::createResultReg(const TargetRegisterClass* RC) { + return MRI.createVirtualRegister(RC); +} + +unsigned FastISel::FastEmitInst_(unsigned MachineInstOpcode, + const TargetRegisterClass* RC) { + unsigned ResultReg = createResultReg(RC); + const TargetInstrDesc &II = TII.get(MachineInstOpcode); + + BuildMI(MBB, DL, II, ResultReg); + return ResultReg; +} + +unsigned FastISel::FastEmitInst_r(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0) { + unsigned ResultReg = createResultReg(RC); + const TargetInstrDesc &II = TII.get(MachineInstOpcode); + + if (II.getNumDefs() >= 1) + BuildMI(MBB, DL, II, ResultReg).addReg(Op0); + else { + BuildMI(MBB, DL, II).addReg(Op0); + bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, + II.ImplicitDefs[0], RC, RC); + if (!InsertedCopy) + ResultReg = 0; + } + + return ResultReg; +} + +unsigned FastISel::FastEmitInst_rr(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, unsigned Op1) { + unsigned ResultReg = createResultReg(RC); + const TargetInstrDesc &II = TII.get(MachineInstOpcode); + + if (II.getNumDefs() >= 1) + BuildMI(MBB, DL, II, ResultReg).addReg(Op0).addReg(Op1); + else { + BuildMI(MBB, DL, II).addReg(Op0).addReg(Op1); + bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, + II.ImplicitDefs[0], RC, RC); + if (!InsertedCopy) + ResultReg = 0; + } + return ResultReg; +} + +unsigned FastISel::FastEmitInst_ri(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, uint64_t Imm) { + unsigned ResultReg = createResultReg(RC); + const TargetInstrDesc &II = TII.get(MachineInstOpcode); + + if (II.getNumDefs() >= 1) + BuildMI(MBB, DL, II, ResultReg).addReg(Op0).addImm(Imm); + else { + BuildMI(MBB, DL, II).addReg(Op0).addImm(Imm); + bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, + II.ImplicitDefs[0], RC, RC); + if (!InsertedCopy) + ResultReg = 0; + } + return ResultReg; +} + +unsigned FastISel::FastEmitInst_rf(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, ConstantFP *FPImm) { + unsigned ResultReg = createResultReg(RC); + const TargetInstrDesc &II = TII.get(MachineInstOpcode); + + if (II.getNumDefs() >= 1) + BuildMI(MBB, DL, II, ResultReg).addReg(Op0).addFPImm(FPImm); + else { + BuildMI(MBB, DL, II).addReg(Op0).addFPImm(FPImm); + bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, + II.ImplicitDefs[0], RC, RC); + if (!InsertedCopy) + ResultReg = 0; + } + return ResultReg; +} + +unsigned FastISel::FastEmitInst_rri(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, unsigned Op1, uint64_t Imm) { + unsigned ResultReg = createResultReg(RC); + const TargetInstrDesc &II = TII.get(MachineInstOpcode); + + if (II.getNumDefs() >= 1) + BuildMI(MBB, DL, II, ResultReg).addReg(Op0).addReg(Op1).addImm(Imm); + else { + BuildMI(MBB, DL, II).addReg(Op0).addReg(Op1).addImm(Imm); + bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, + II.ImplicitDefs[0], RC, RC); + if (!InsertedCopy) + ResultReg = 0; + } + return ResultReg; +} + +unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + uint64_t Imm) { + unsigned ResultReg = createResultReg(RC); + const TargetInstrDesc &II = TII.get(MachineInstOpcode); + + if (II.getNumDefs() >= 1) + BuildMI(MBB, DL, II, ResultReg).addImm(Imm); + else { + BuildMI(MBB, DL, II).addImm(Imm); + bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, + II.ImplicitDefs[0], RC, RC); + if (!InsertedCopy) + ResultReg = 0; + } + return ResultReg; +} + +unsigned FastISel::FastEmitInst_extractsubreg(MVT::SimpleValueType RetVT, + unsigned Op0, uint32_t Idx) { + const TargetRegisterClass* RC = MRI.getRegClass(Op0); + + unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT)); + const TargetInstrDesc &II = TII.get(TargetInstrInfo::EXTRACT_SUBREG); + + if (II.getNumDefs() >= 1) + BuildMI(MBB, DL, II, ResultReg).addReg(Op0).addImm(Idx); + else { + BuildMI(MBB, DL, II).addReg(Op0).addImm(Idx); + bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, + II.ImplicitDefs[0], RC, RC); + if (!InsertedCopy) + ResultReg = 0; + } + return ResultReg; +} + +/// FastEmitZExtFromI1 - Emit MachineInstrs to compute the value of Op +/// with all but the least significant bit set to zero. +unsigned FastISel::FastEmitZExtFromI1(MVT::SimpleValueType VT, unsigned Op) { + return FastEmit_ri(VT, VT, ISD::AND, Op, 1); +} diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp new file mode 100644 index 000000000000..2cd67e61907f --- /dev/null +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -0,0 +1,3091 @@ +//===-- LegalizeDAG.cpp - Implement SelectionDAG::Legalize ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the SelectionDAG::Legalize method. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/DwarfWriter.h" +#include "llvm/Analysis/DebugInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetSubtarget.h" +#include "llvm/CallingConv.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/GlobalVariable.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include +using namespace llvm; + +//===----------------------------------------------------------------------===// +/// SelectionDAGLegalize - This takes an arbitrary SelectionDAG as input and +/// hacks on it until the target machine can handle it. This involves +/// eliminating value sizes the machine cannot handle (promoting small sizes to +/// large sizes or splitting up large values into small values) as well as +/// eliminating operations the machine cannot handle. +/// +/// This code also does a small amount of optimization and recognition of idioms +/// as part of its processing. For example, if a target does not support a +/// 'setcc' instruction efficiently, but does support 'brcc' instruction, this +/// will attempt merge setcc and brc instructions into brcc's. +/// +namespace { +class VISIBILITY_HIDDEN SelectionDAGLegalize { + TargetLowering &TLI; + SelectionDAG &DAG; + CodeGenOpt::Level OptLevel; + + // Libcall insertion helpers. + + /// LastCALLSEQ_END - This keeps track of the CALLSEQ_END node that has been + /// legalized. We use this to ensure that calls are properly serialized + /// against each other, including inserted libcalls. + SDValue LastCALLSEQ_END; + + /// IsLegalizingCall - This member is used *only* for purposes of providing + /// helpful assertions that a libcall isn't created while another call is + /// being legalized (which could lead to non-serialized call sequences). + bool IsLegalizingCall; + + enum LegalizeAction { + Legal, // The target natively supports this operation. + Promote, // This operation should be executed in a larger type. + Expand // Try to expand this to other ops, otherwise use a libcall. + }; + + /// ValueTypeActions - This is a bitvector that contains two bits for each + /// value type, where the two bits correspond to the LegalizeAction enum. + /// This can be queried with "getTypeAction(VT)". + TargetLowering::ValueTypeActionImpl ValueTypeActions; + + /// LegalizedNodes - For nodes that are of legal width, and that have more + /// than one use, this map indicates what regularized operand to use. This + /// allows us to avoid legalizing the same thing more than once. + DenseMap LegalizedNodes; + + void AddLegalizedOperand(SDValue From, SDValue To) { + LegalizedNodes.insert(std::make_pair(From, To)); + // If someone requests legalization of the new node, return itself. + if (From != To) + LegalizedNodes.insert(std::make_pair(To, To)); + } + +public: + SelectionDAGLegalize(SelectionDAG &DAG, CodeGenOpt::Level ol); + + /// getTypeAction - Return how we should legalize values of this type, either + /// it is already legal or we need to expand it into multiple registers of + /// smaller integer type, or we need to promote it to a larger type. + LegalizeAction getTypeAction(MVT VT) const { + return (LegalizeAction)ValueTypeActions.getTypeAction(VT); + } + + /// isTypeLegal - Return true if this type is legal on this target. + /// + bool isTypeLegal(MVT VT) const { + return getTypeAction(VT) == Legal; + } + + void LegalizeDAG(); + +private: + /// LegalizeOp - We know that the specified value has a legal type. + /// Recursively ensure that the operands have legal types, then return the + /// result. + SDValue LegalizeOp(SDValue O); + + /// PerformInsertVectorEltInMemory - Some target cannot handle a variable + /// insertion index for the INSERT_VECTOR_ELT instruction. In this case, it + /// is necessary to spill the vector being inserted into to memory, perform + /// the insert there, and then read the result back. + SDValue PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, + SDValue Idx, DebugLoc dl); + SDValue ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val, + SDValue Idx, DebugLoc dl); + + /// ShuffleWithNarrowerEltType - Return a vector shuffle operation which + /// performs the same shuffe in terms of order or result bytes, but on a type + /// whose vector element type is narrower than the original shuffle type. + /// e.g. <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3> + SDValue ShuffleWithNarrowerEltType(MVT NVT, MVT VT, DebugLoc dl, + SDValue N1, SDValue N2, + SmallVectorImpl &Mask) const; + + bool LegalizeAllNodesNotLeadingTo(SDNode *N, SDNode *Dest, + SmallPtrSet &NodesLeadingTo); + + void LegalizeSetCCCondCode(MVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, + DebugLoc dl); + + SDValue ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned); + SDValue ExpandFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32, + RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80, + RTLIB::Libcall Call_PPCF128); + SDValue ExpandIntLibCall(SDNode *Node, bool isSigned, RTLIB::Libcall Call_I16, + RTLIB::Libcall Call_I32, RTLIB::Libcall Call_I64, + RTLIB::Libcall Call_I128); + + SDValue EmitStackConvert(SDValue SrcOp, MVT SlotVT, MVT DestVT, DebugLoc dl); + SDValue ExpandBUILD_VECTOR(SDNode *Node); + SDValue ExpandSCALAR_TO_VECTOR(SDNode *Node); + SDValue ExpandDBG_STOPPOINT(SDNode *Node); + void ExpandDYNAMIC_STACKALLOC(SDNode *Node, + SmallVectorImpl &Results); + SDValue ExpandFCOPYSIGN(SDNode *Node); + SDValue ExpandLegalINT_TO_FP(bool isSigned, SDValue LegalOp, MVT DestVT, + DebugLoc dl); + SDValue PromoteLegalINT_TO_FP(SDValue LegalOp, MVT DestVT, bool isSigned, + DebugLoc dl); + SDValue PromoteLegalFP_TO_INT(SDValue LegalOp, MVT DestVT, bool isSigned, + DebugLoc dl); + + SDValue ExpandBSWAP(SDValue Op, DebugLoc dl); + SDValue ExpandBitCount(unsigned Opc, SDValue Op, DebugLoc dl); + + SDValue ExpandExtractFromVectorThroughStack(SDValue Op); + + void ExpandNode(SDNode *Node, SmallVectorImpl &Results); + void PromoteNode(SDNode *Node, SmallVectorImpl &Results); +}; +} + +/// ShuffleWithNarrowerEltType - Return a vector shuffle operation which +/// performs the same shuffe in terms of order or result bytes, but on a type +/// whose vector element type is narrower than the original shuffle type. +/// e.g. <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3> +SDValue +SelectionDAGLegalize::ShuffleWithNarrowerEltType(MVT NVT, MVT VT, DebugLoc dl, + SDValue N1, SDValue N2, + SmallVectorImpl &Mask) const { + MVT EltVT = NVT.getVectorElementType(); + unsigned NumMaskElts = VT.getVectorNumElements(); + unsigned NumDestElts = NVT.getVectorNumElements(); + unsigned NumEltsGrowth = NumDestElts / NumMaskElts; + + assert(NumEltsGrowth && "Cannot promote to vector type with fewer elts!"); + + if (NumEltsGrowth == 1) + return DAG.getVectorShuffle(NVT, dl, N1, N2, &Mask[0]); + + SmallVector NewMask; + for (unsigned i = 0; i != NumMaskElts; ++i) { + int Idx = Mask[i]; + for (unsigned j = 0; j != NumEltsGrowth; ++j) { + if (Idx < 0) + NewMask.push_back(-1); + else + NewMask.push_back(Idx * NumEltsGrowth + j); + } + } + assert(NewMask.size() == NumDestElts && "Non-integer NumEltsGrowth?"); + assert(TLI.isShuffleMaskLegal(NewMask, NVT) && "Shuffle not legal?"); + return DAG.getVectorShuffle(NVT, dl, N1, N2, &NewMask[0]); +} + +SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag, + CodeGenOpt::Level ol) + : TLI(dag.getTargetLoweringInfo()), DAG(dag), OptLevel(ol), + ValueTypeActions(TLI.getValueTypeActions()) { + assert(MVT::LAST_VALUETYPE <= 32 && + "Too many value types for ValueTypeActions to hold!"); +} + +void SelectionDAGLegalize::LegalizeDAG() { + LastCALLSEQ_END = DAG.getEntryNode(); + IsLegalizingCall = false; + + // The legalize process is inherently a bottom-up recursive process (users + // legalize their uses before themselves). Given infinite stack space, we + // could just start legalizing on the root and traverse the whole graph. In + // practice however, this causes us to run out of stack space on large basic + // blocks. To avoid this problem, compute an ordering of the nodes where each + // node is only legalized after all of its operands are legalized. + DAG.AssignTopologicalOrder(); + for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), + E = prior(DAG.allnodes_end()); I != next(E); ++I) + LegalizeOp(SDValue(I, 0)); + + // Finally, it's possible the root changed. Get the new root. + SDValue OldRoot = DAG.getRoot(); + assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?"); + DAG.setRoot(LegalizedNodes[OldRoot]); + + LegalizedNodes.clear(); + + // Remove dead nodes now. + DAG.RemoveDeadNodes(); +} + + +/// FindCallEndFromCallStart - Given a chained node that is part of a call +/// sequence, find the CALLSEQ_END node that terminates the call sequence. +static SDNode *FindCallEndFromCallStart(SDNode *Node) { + if (Node->getOpcode() == ISD::CALLSEQ_END) + return Node; + if (Node->use_empty()) + return 0; // No CallSeqEnd + + // The chain is usually at the end. + SDValue TheChain(Node, Node->getNumValues()-1); + if (TheChain.getValueType() != MVT::Other) { + // Sometimes it's at the beginning. + TheChain = SDValue(Node, 0); + if (TheChain.getValueType() != MVT::Other) { + // Otherwise, hunt for it. + for (unsigned i = 1, e = Node->getNumValues(); i != e; ++i) + if (Node->getValueType(i) == MVT::Other) { + TheChain = SDValue(Node, i); + break; + } + + // Otherwise, we walked into a node without a chain. + if (TheChain.getValueType() != MVT::Other) + return 0; + } + } + + for (SDNode::use_iterator UI = Node->use_begin(), + E = Node->use_end(); UI != E; ++UI) { + + // Make sure to only follow users of our token chain. + SDNode *User = *UI; + for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) + if (User->getOperand(i) == TheChain) + if (SDNode *Result = FindCallEndFromCallStart(User)) + return Result; + } + return 0; +} + +/// FindCallStartFromCallEnd - Given a chained node that is part of a call +/// sequence, find the CALLSEQ_START node that initiates the call sequence. +static SDNode *FindCallStartFromCallEnd(SDNode *Node) { + assert(Node && "Didn't find callseq_start for a call??"); + if (Node->getOpcode() == ISD::CALLSEQ_START) return Node; + + assert(Node->getOperand(0).getValueType() == MVT::Other && + "Node doesn't have a token chain argument!"); + return FindCallStartFromCallEnd(Node->getOperand(0).getNode()); +} + +/// LegalizeAllNodesNotLeadingTo - Recursively walk the uses of N, looking to +/// see if any uses can reach Dest. If no dest operands can get to dest, +/// legalize them, legalize ourself, and return false, otherwise, return true. +/// +/// Keep track of the nodes we fine that actually do lead to Dest in +/// NodesLeadingTo. This avoids retraversing them exponential number of times. +/// +bool SelectionDAGLegalize::LegalizeAllNodesNotLeadingTo(SDNode *N, SDNode *Dest, + SmallPtrSet &NodesLeadingTo) { + if (N == Dest) return true; // N certainly leads to Dest :) + + // If we've already processed this node and it does lead to Dest, there is no + // need to reprocess it. + if (NodesLeadingTo.count(N)) return true; + + // If the first result of this node has been already legalized, then it cannot + // reach N. + if (LegalizedNodes.count(SDValue(N, 0))) return false; + + // Okay, this node has not already been legalized. Check and legalize all + // operands. If none lead to Dest, then we can legalize this node. + bool OperandsLeadToDest = false; + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + OperandsLeadToDest |= // If an operand leads to Dest, so do we. + LegalizeAllNodesNotLeadingTo(N->getOperand(i).getNode(), Dest, NodesLeadingTo); + + if (OperandsLeadToDest) { + NodesLeadingTo.insert(N); + return true; + } + + // Okay, this node looks safe, legalize it and return false. + LegalizeOp(SDValue(N, 0)); + return false; +} + +/// ExpandConstantFP - Expands the ConstantFP node to an integer constant or +/// a load from the constant pool. +static SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP, + SelectionDAG &DAG, const TargetLowering &TLI) { + bool Extend = false; + DebugLoc dl = CFP->getDebugLoc(); + + // If a FP immediate is precise when represented as a float and if the + // target can do an extending load from float to double, we put it into + // the constant pool as a float, even if it's is statically typed as a + // double. This shrinks FP constants and canonicalizes them for targets where + // an FP extending load is the same cost as a normal load (such as on the x87 + // fp stack or PPC FP unit). + MVT VT = CFP->getValueType(0); + ConstantFP *LLVMC = const_cast(CFP->getConstantFPValue()); + if (!UseCP) { + assert((VT == MVT::f64 || VT == MVT::f32) && "Invalid type expansion"); + return DAG.getConstant(LLVMC->getValueAPF().bitcastToAPInt(), + (VT == MVT::f64) ? MVT::i64 : MVT::i32); + } + + MVT OrigVT = VT; + MVT SVT = VT; + while (SVT != MVT::f32) { + SVT = (MVT::SimpleValueType)(SVT.getSimpleVT() - 1); + if (CFP->isValueValidForType(SVT, CFP->getValueAPF()) && + // Only do this if the target has a native EXTLOAD instruction from + // smaller type. + TLI.isLoadExtLegal(ISD::EXTLOAD, SVT) && + TLI.ShouldShrinkFPConstant(OrigVT)) { + const Type *SType = SVT.getTypeForMVT(); + LLVMC = cast(ConstantExpr::getFPTrunc(LLVMC, SType)); + VT = SVT; + Extend = true; + } + } + + SDValue CPIdx = DAG.getConstantPool(LLVMC, TLI.getPointerTy()); + unsigned Alignment = cast(CPIdx)->getAlignment(); + if (Extend) + return DAG.getExtLoad(ISD::EXTLOAD, dl, + OrigVT, DAG.getEntryNode(), + CPIdx, PseudoSourceValue::getConstantPool(), + 0, VT, false, Alignment); + return DAG.getLoad(OrigVT, dl, DAG.getEntryNode(), CPIdx, + PseudoSourceValue::getConstantPool(), 0, false, Alignment); +} + +/// ExpandUnalignedStore - Expands an unaligned store to 2 half-size stores. +static +SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, + const TargetLowering &TLI) { + SDValue Chain = ST->getChain(); + SDValue Ptr = ST->getBasePtr(); + SDValue Val = ST->getValue(); + MVT VT = Val.getValueType(); + int Alignment = ST->getAlignment(); + int SVOffset = ST->getSrcValueOffset(); + DebugLoc dl = ST->getDebugLoc(); + if (ST->getMemoryVT().isFloatingPoint() || + ST->getMemoryVT().isVector()) { + MVT intVT = MVT::getIntegerVT(VT.getSizeInBits()); + if (TLI.isTypeLegal(intVT)) { + // Expand to a bitconvert of the value to the integer type of the + // same size, then a (misaligned) int store. + // FIXME: Does not handle truncating floating point stores! + SDValue Result = DAG.getNode(ISD::BIT_CONVERT, dl, intVT, Val); + return DAG.getStore(Chain, dl, Result, Ptr, ST->getSrcValue(), + SVOffset, ST->isVolatile(), Alignment); + } else { + // Do a (aligned) store to a stack slot, then copy from the stack slot + // to the final destination using (unaligned) integer loads and stores. + MVT StoredVT = ST->getMemoryVT(); + MVT RegVT = + TLI.getRegisterType(MVT::getIntegerVT(StoredVT.getSizeInBits())); + unsigned StoredBytes = StoredVT.getSizeInBits() / 8; + unsigned RegBytes = RegVT.getSizeInBits() / 8; + unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes; + + // Make sure the stack slot is also aligned for the register type. + SDValue StackPtr = DAG.CreateStackTemporary(StoredVT, RegVT); + + // Perform the original store, only redirected to the stack slot. + SDValue Store = DAG.getTruncStore(Chain, dl, + Val, StackPtr, NULL, 0, StoredVT); + SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy()); + SmallVector Stores; + unsigned Offset = 0; + + // Do all but one copies using the full register width. + for (unsigned i = 1; i < NumRegs; i++) { + // Load one integer register's worth from the stack slot. + SDValue Load = DAG.getLoad(RegVT, dl, Store, StackPtr, NULL, 0); + // Store it to the final location. Remember the store. + Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr, + ST->getSrcValue(), SVOffset + Offset, + ST->isVolatile(), + MinAlign(ST->getAlignment(), Offset))); + // Increment the pointers. + Offset += RegBytes; + StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, + Increment); + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment); + } + + // The last store may be partial. Do a truncating store. On big-endian + // machines this requires an extending load from the stack slot to ensure + // that the bits are in the right place. + MVT MemVT = MVT::getIntegerVT(8 * (StoredBytes - Offset)); + + // Load from the stack slot. + SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Store, StackPtr, + NULL, 0, MemVT); + + Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr, + ST->getSrcValue(), SVOffset + Offset, + MemVT, ST->isVolatile(), + MinAlign(ST->getAlignment(), Offset))); + // The order of the stores doesn't matter - say it with a TokenFactor. + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0], + Stores.size()); + } + } + assert(ST->getMemoryVT().isInteger() && + !ST->getMemoryVT().isVector() && + "Unaligned store of unknown type."); + // Get the half-size VT + MVT NewStoredVT = + (MVT::SimpleValueType)(ST->getMemoryVT().getSimpleVT() - 1); + int NumBits = NewStoredVT.getSizeInBits(); + int IncrementSize = NumBits / 8; + + // Divide the stored value in two parts. + SDValue ShiftAmount = DAG.getConstant(NumBits, TLI.getShiftAmountTy()); + SDValue Lo = Val; + SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount); + + // Store the two parts + SDValue Store1, Store2; + Store1 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Lo:Hi, Ptr, + ST->getSrcValue(), SVOffset, NewStoredVT, + ST->isVolatile(), Alignment); + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getConstant(IncrementSize, TLI.getPointerTy())); + Alignment = MinAlign(Alignment, IncrementSize); + Store2 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Hi:Lo, Ptr, + ST->getSrcValue(), SVOffset + IncrementSize, + NewStoredVT, ST->isVolatile(), Alignment); + + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2); +} + +/// ExpandUnalignedLoad - Expands an unaligned load to 2 half-size loads. +static +SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, + const TargetLowering &TLI) { + int SVOffset = LD->getSrcValueOffset(); + SDValue Chain = LD->getChain(); + SDValue Ptr = LD->getBasePtr(); + MVT VT = LD->getValueType(0); + MVT LoadedVT = LD->getMemoryVT(); + DebugLoc dl = LD->getDebugLoc(); + if (VT.isFloatingPoint() || VT.isVector()) { + MVT intVT = MVT::getIntegerVT(LoadedVT.getSizeInBits()); + if (TLI.isTypeLegal(intVT)) { + // Expand to a (misaligned) integer load of the same size, + // then bitconvert to floating point or vector. + SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr, LD->getSrcValue(), + SVOffset, LD->isVolatile(), + LD->getAlignment()); + SDValue Result = DAG.getNode(ISD::BIT_CONVERT, dl, LoadedVT, newLoad); + if (VT.isFloatingPoint() && LoadedVT != VT) + Result = DAG.getNode(ISD::FP_EXTEND, dl, VT, Result); + + SDValue Ops[] = { Result, Chain }; + return DAG.getMergeValues(Ops, 2, dl); + } else { + // Copy the value to a (aligned) stack slot using (unaligned) integer + // loads and stores, then do a (aligned) load from the stack slot. + MVT RegVT = TLI.getRegisterType(intVT); + unsigned LoadedBytes = LoadedVT.getSizeInBits() / 8; + unsigned RegBytes = RegVT.getSizeInBits() / 8; + unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes; + + // Make sure the stack slot is also aligned for the register type. + SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT); + + SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy()); + SmallVector Stores; + SDValue StackPtr = StackBase; + unsigned Offset = 0; + + // Do all but one copies using the full register width. + for (unsigned i = 1; i < NumRegs; i++) { + // Load one integer register's worth from the original location. + SDValue Load = DAG.getLoad(RegVT, dl, Chain, Ptr, LD->getSrcValue(), + SVOffset + Offset, LD->isVolatile(), + MinAlign(LD->getAlignment(), Offset)); + // Follow the load with a store to the stack slot. Remember the store. + Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, StackPtr, + NULL, 0)); + // Increment the pointers. + Offset += RegBytes; + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment); + StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, + Increment); + } + + // The last copy may be partial. Do an extending load. + MVT MemVT = MVT::getIntegerVT(8 * (LoadedBytes - Offset)); + SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr, + LD->getSrcValue(), SVOffset + Offset, + MemVT, LD->isVolatile(), + MinAlign(LD->getAlignment(), Offset)); + // Follow the load with a store to the stack slot. Remember the store. + // On big-endian machines this requires a truncating store to ensure + // that the bits end up in the right place. + Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, StackPtr, + NULL, 0, MemVT)); + + // The order of the stores doesn't matter - say it with a TokenFactor. + SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0], + Stores.size()); + + // Finally, perform the original load only redirected to the stack slot. + Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase, + NULL, 0, LoadedVT); + + // Callers expect a MERGE_VALUES node. + SDValue Ops[] = { Load, TF }; + return DAG.getMergeValues(Ops, 2, dl); + } + } + assert(LoadedVT.isInteger() && !LoadedVT.isVector() && + "Unaligned load of unsupported type."); + + // Compute the new VT that is half the size of the old one. This is an + // integer MVT. + unsigned NumBits = LoadedVT.getSizeInBits(); + MVT NewLoadedVT; + NewLoadedVT = MVT::getIntegerVT(NumBits/2); + NumBits >>= 1; + + unsigned Alignment = LD->getAlignment(); + unsigned IncrementSize = NumBits / 8; + ISD::LoadExtType HiExtType = LD->getExtensionType(); + + // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD. + if (HiExtType == ISD::NON_EXTLOAD) + HiExtType = ISD::ZEXTLOAD; + + // Load the value in two parts + SDValue Lo, Hi; + if (TLI.isLittleEndian()) { + Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getSrcValue(), + SVOffset, NewLoadedVT, LD->isVolatile(), Alignment); + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getConstant(IncrementSize, TLI.getPointerTy())); + Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getSrcValue(), + SVOffset + IncrementSize, NewLoadedVT, LD->isVolatile(), + MinAlign(Alignment, IncrementSize)); + } else { + Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getSrcValue(), + SVOffset, NewLoadedVT, LD->isVolatile(), Alignment); + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getConstant(IncrementSize, TLI.getPointerTy())); + Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getSrcValue(), + SVOffset + IncrementSize, NewLoadedVT, LD->isVolatile(), + MinAlign(Alignment, IncrementSize)); + } + + // aggregate the two parts + SDValue ShiftAmount = DAG.getConstant(NumBits, TLI.getShiftAmountTy()); + SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount); + Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo); + + SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + SDValue Ops[] = { Result, TF }; + return DAG.getMergeValues(Ops, 2, dl); +} + +/// PerformInsertVectorEltInMemory - Some target cannot handle a variable +/// insertion index for the INSERT_VECTOR_ELT instruction. In this case, it +/// is necessary to spill the vector being inserted into to memory, perform +/// the insert there, and then read the result back. +SDValue SelectionDAGLegalize:: +PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx, + DebugLoc dl) { + SDValue Tmp1 = Vec; + SDValue Tmp2 = Val; + SDValue Tmp3 = Idx; + + // If the target doesn't support this, we have to spill the input vector + // to a temporary stack slot, update the element, then reload it. This is + // badness. We could also load the value into a vector register (either + // with a "move to register" or "extload into register" instruction, then + // permute it into place, if the idx is a constant and if the idx is + // supported by the target. + MVT VT = Tmp1.getValueType(); + MVT EltVT = VT.getVectorElementType(); + MVT IdxVT = Tmp3.getValueType(); + MVT PtrVT = TLI.getPointerTy(); + SDValue StackPtr = DAG.CreateStackTemporary(VT); + + int SPFI = cast(StackPtr.getNode())->getIndex(); + + // Store the vector. + SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Tmp1, StackPtr, + PseudoSourceValue::getFixedStack(SPFI), 0); + + // Truncate or zero extend offset to target pointer type. + unsigned CastOpc = IdxVT.bitsGT(PtrVT) ? ISD::TRUNCATE : ISD::ZERO_EXTEND; + Tmp3 = DAG.getNode(CastOpc, dl, PtrVT, Tmp3); + // Add the offset to the index. + unsigned EltSize = EltVT.getSizeInBits()/8; + Tmp3 = DAG.getNode(ISD::MUL, dl, IdxVT, Tmp3,DAG.getConstant(EltSize, IdxVT)); + SDValue StackPtr2 = DAG.getNode(ISD::ADD, dl, IdxVT, Tmp3, StackPtr); + // Store the scalar value. + Ch = DAG.getTruncStore(Ch, dl, Tmp2, StackPtr2, + PseudoSourceValue::getFixedStack(SPFI), 0, EltVT); + // Load the updated vector. + return DAG.getLoad(VT, dl, Ch, StackPtr, + PseudoSourceValue::getFixedStack(SPFI), 0); +} + + +SDValue SelectionDAGLegalize:: +ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val, SDValue Idx, DebugLoc dl) { + if (ConstantSDNode *InsertPos = dyn_cast(Idx)) { + // SCALAR_TO_VECTOR requires that the type of the value being inserted + // match the element type of the vector being created, except for + // integers in which case the inserted value can be over width. + MVT EltVT = Vec.getValueType().getVectorElementType(); + if (Val.getValueType() == EltVT || + (EltVT.isInteger() && Val.getValueType().bitsGE(EltVT))) { + SDValue ScVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, + Vec.getValueType(), Val); + + unsigned NumElts = Vec.getValueType().getVectorNumElements(); + // We generate a shuffle of InVec and ScVec, so the shuffle mask + // should be 0,1,2,3,4,5... with the appropriate element replaced with + // elt 0 of the RHS. + SmallVector ShufOps; + for (unsigned i = 0; i != NumElts; ++i) + ShufOps.push_back(i != InsertPos->getZExtValue() ? i : NumElts); + + return DAG.getVectorShuffle(Vec.getValueType(), dl, Vec, ScVec, + &ShufOps[0]); + } + } + return PerformInsertVectorEltInMemory(Vec, Val, Idx, dl); +} + +/// LegalizeOp - We know that the specified value has a legal type, and +/// that its operands are legal. Now ensure that the operation itself +/// is legal, recursively ensuring that the operands' operations remain +/// legal. +SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { + if (Op.getOpcode() == ISD::TargetConstant) // Allow illegal target nodes. + return Op; + + SDNode *Node = Op.getNode(); + DebugLoc dl = Node->getDebugLoc(); + + for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) + assert(getTypeAction(Node->getValueType(i)) == Legal && + "Unexpected illegal type!"); + + for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) + assert((isTypeLegal(Node->getOperand(i).getValueType()) || + Node->getOperand(i).getOpcode() == ISD::TargetConstant) && + "Unexpected illegal type!"); + + // Note that LegalizeOp may be reentered even from single-use nodes, which + // means that we always must cache transformed nodes. + DenseMap::iterator I = LegalizedNodes.find(Op); + if (I != LegalizedNodes.end()) return I->second; + + SDValue Tmp1, Tmp2, Tmp3, Tmp4; + SDValue Result = Op; + bool isCustom = false; + + // Figure out the correct action; the way to query this varies by opcode + TargetLowering::LegalizeAction Action; + bool SimpleFinishLegalizing = true; + switch (Node->getOpcode()) { + case ISD::INTRINSIC_W_CHAIN: + case ISD::INTRINSIC_WO_CHAIN: + case ISD::INTRINSIC_VOID: + case ISD::VAARG: + case ISD::STACKSAVE: + Action = TLI.getOperationAction(Node->getOpcode(), MVT::Other); + break; + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: + case ISD::EXTRACT_VECTOR_ELT: + Action = TLI.getOperationAction(Node->getOpcode(), + Node->getOperand(0).getValueType()); + break; + case ISD::FP_ROUND_INREG: + case ISD::SIGN_EXTEND_INREG: { + MVT InnerType = cast(Node->getOperand(1))->getVT(); + Action = TLI.getOperationAction(Node->getOpcode(), InnerType); + break; + } + case ISD::SELECT_CC: + case ISD::SETCC: + case ISD::BR_CC: { + unsigned CCOperand = Node->getOpcode() == ISD::SELECT_CC ? 4 : + Node->getOpcode() == ISD::SETCC ? 2 : 1; + unsigned CompareOperand = Node->getOpcode() == ISD::BR_CC ? 2 : 0; + MVT OpVT = Node->getOperand(CompareOperand).getValueType(); + ISD::CondCode CCCode = + cast(Node->getOperand(CCOperand))->get(); + Action = TLI.getCondCodeAction(CCCode, OpVT); + if (Action == TargetLowering::Legal) { + if (Node->getOpcode() == ISD::SELECT_CC) + Action = TLI.getOperationAction(Node->getOpcode(), + Node->getValueType(0)); + else + Action = TLI.getOperationAction(Node->getOpcode(), OpVT); + } + break; + } + case ISD::LOAD: + case ISD::STORE: + // FIXME: Model these properly. LOAD and STORE are complicated, and + // STORE expects the unlegalized operand in some cases. + SimpleFinishLegalizing = false; + break; + case ISD::CALLSEQ_START: + case ISD::CALLSEQ_END: + // FIXME: This shouldn't be necessary. These nodes have special properties + // dealing with the recursive nature of legalization. Removing this + // special case should be done as part of making LegalizeDAG non-recursive. + SimpleFinishLegalizing = false; + break; + case ISD::CALL: + // FIXME: Legalization for calls requires custom-lowering the call before + // legalizing the operands! (I haven't looked into precisely why.) + SimpleFinishLegalizing = false; + break; + case ISD::EXTRACT_ELEMENT: + case ISD::FLT_ROUNDS_: + case ISD::SADDO: + case ISD::SSUBO: + case ISD::UADDO: + case ISD::USUBO: + case ISD::SMULO: + case ISD::UMULO: + case ISD::FPOWI: + case ISD::MERGE_VALUES: + case ISD::EH_RETURN: + case ISD::FRAME_TO_ARGS_OFFSET: + // These operations lie about being legal: when they claim to be legal, + // they should actually be expanded. + Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); + if (Action == TargetLowering::Legal) + Action = TargetLowering::Expand; + break; + case ISD::TRAMPOLINE: + case ISD::FRAMEADDR: + case ISD::RETURNADDR: + case ISD::FORMAL_ARGUMENTS: + // These operations lie about being legal: when they claim to be legal, + // they should actually be custom-lowered. + Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); + if (Action == TargetLowering::Legal) + Action = TargetLowering::Custom; + break; + case ISD::BUILD_VECTOR: + // A weird case: legalization for BUILD_VECTOR never legalizes the + // operands! + // FIXME: This really sucks... changing it isn't semantically incorrect, + // but it massively pessimizes the code for floating-point BUILD_VECTORs + // because ConstantFP operands get legalized into constant pool loads + // before the BUILD_VECTOR code can see them. It doesn't usually bite, + // though, because BUILD_VECTORS usually get lowered into other nodes + // which get legalized properly. + SimpleFinishLegalizing = false; + break; + default: + if (Node->getOpcode() >= ISD::BUILTIN_OP_END) { + Action = TargetLowering::Legal; + } else { + Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); + } + break; + } + + if (SimpleFinishLegalizing) { + SmallVector Ops, ResultVals; + for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) + Ops.push_back(LegalizeOp(Node->getOperand(i))); + switch (Node->getOpcode()) { + default: break; + case ISD::BR: + case ISD::BRIND: + case ISD::BR_JT: + case ISD::BR_CC: + case ISD::BRCOND: + case ISD::RET: + // Branches tweak the chain to include LastCALLSEQ_END + Ops[0] = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ops[0], + LastCALLSEQ_END); + Ops[0] = LegalizeOp(Ops[0]); + LastCALLSEQ_END = DAG.getEntryNode(); + break; + case ISD::SHL: + case ISD::SRL: + case ISD::SRA: + case ISD::ROTL: + case ISD::ROTR: + // Legalizing shifts/rotates requires adjusting the shift amount + // to the appropriate width. + if (!Ops[1].getValueType().isVector()) + Ops[1] = LegalizeOp(DAG.getShiftAmountOperand(Ops[1])); + break; + } + + Result = DAG.UpdateNodeOperands(Result.getValue(0), Ops.data(), + Ops.size()); + switch (Action) { + case TargetLowering::Legal: + for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) + ResultVals.push_back(Result.getValue(i)); + break; + case TargetLowering::Custom: + // FIXME: The handling for custom lowering with multiple results is + // a complete mess. + Tmp1 = TLI.LowerOperation(Result, DAG); + if (Tmp1.getNode()) { + for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) { + if (e == 1) + ResultVals.push_back(Tmp1); + else + ResultVals.push_back(Tmp1.getValue(i)); + } + break; + } + + // FALL THROUGH + case TargetLowering::Expand: + ExpandNode(Result.getNode(), ResultVals); + break; + case TargetLowering::Promote: + PromoteNode(Result.getNode(), ResultVals); + break; + } + if (!ResultVals.empty()) { + for (unsigned i = 0, e = ResultVals.size(); i != e; ++i) { + if (ResultVals[i] != SDValue(Node, i)) + ResultVals[i] = LegalizeOp(ResultVals[i]); + AddLegalizedOperand(SDValue(Node, i), ResultVals[i]); + } + return ResultVals[Op.getResNo()]; + } + } + + switch (Node->getOpcode()) { + default: +#ifndef NDEBUG + cerr << "NODE: "; Node->dump(&DAG); cerr << "\n"; +#endif + assert(0 && "Do not know how to legalize this operator!"); + abort(); + case ISD::CALL: + // The only option for this is to custom lower it. + Tmp3 = TLI.LowerOperation(Result.getValue(0), DAG); + assert(Tmp3.getNode() && "Target didn't custom lower this node!"); + // A call within a calling sequence must be legalized to something + // other than the normal CALLSEQ_END. Violating this gets Legalize + // into an infinite loop. + assert ((!IsLegalizingCall || + Node->getOpcode() != ISD::CALL || + Tmp3.getNode()->getOpcode() != ISD::CALLSEQ_END) && + "Nested CALLSEQ_START..CALLSEQ_END not supported."); + + // The number of incoming and outgoing values should match; unless the final + // outgoing value is a flag. + assert((Tmp3.getNode()->getNumValues() == Result.getNode()->getNumValues() || + (Tmp3.getNode()->getNumValues() == Result.getNode()->getNumValues() + 1 && + Tmp3.getNode()->getValueType(Tmp3.getNode()->getNumValues() - 1) == + MVT::Flag)) && + "Lowering call/formal_arguments produced unexpected # results!"); + + // Since CALL/FORMAL_ARGUMENTS nodes produce multiple values, make sure to + // remember that we legalized all of them, so it doesn't get relegalized. + for (unsigned i = 0, e = Tmp3.getNode()->getNumValues(); i != e; ++i) { + if (Tmp3.getNode()->getValueType(i) == MVT::Flag) + continue; + Tmp1 = LegalizeOp(Tmp3.getValue(i)); + if (Op.getResNo() == i) + Tmp2 = Tmp1; + AddLegalizedOperand(SDValue(Node, i), Tmp1); + } + return Tmp2; + case ISD::BUILD_VECTOR: + switch (TLI.getOperationAction(ISD::BUILD_VECTOR, Node->getValueType(0))) { + default: assert(0 && "This action is not supported yet!"); + case TargetLowering::Custom: + Tmp3 = TLI.LowerOperation(Result, DAG); + if (Tmp3.getNode()) { + Result = Tmp3; + break; + } + // FALLTHROUGH + case TargetLowering::Expand: + Result = ExpandBUILD_VECTOR(Result.getNode()); + break; + } + break; + case ISD::CALLSEQ_START: { + SDNode *CallEnd = FindCallEndFromCallStart(Node); + + // Recursively Legalize all of the inputs of the call end that do not lead + // to this call start. This ensures that any libcalls that need be inserted + // are inserted *before* the CALLSEQ_START. + {SmallPtrSet NodesLeadingTo; + for (unsigned i = 0, e = CallEnd->getNumOperands(); i != e; ++i) + LegalizeAllNodesNotLeadingTo(CallEnd->getOperand(i).getNode(), Node, + NodesLeadingTo); + } + + // Now that we legalized all of the inputs (which may have inserted + // libcalls) create the new CALLSEQ_START node. + Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain. + + // Merge in the last call, to ensure that this call start after the last + // call ended. + if (LastCALLSEQ_END.getOpcode() != ISD::EntryToken) { + Tmp1 = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + Tmp1, LastCALLSEQ_END); + Tmp1 = LegalizeOp(Tmp1); + } + + // Do not try to legalize the target-specific arguments (#1+). + if (Tmp1 != Node->getOperand(0)) { + SmallVector Ops(Node->op_begin(), Node->op_end()); + Ops[0] = Tmp1; + Result = DAG.UpdateNodeOperands(Result, &Ops[0], Ops.size()); + } + + // Remember that the CALLSEQ_START is legalized. + AddLegalizedOperand(Op.getValue(0), Result); + if (Node->getNumValues() == 2) // If this has a flag result, remember it. + AddLegalizedOperand(Op.getValue(1), Result.getValue(1)); + + // Now that the callseq_start and all of the non-call nodes above this call + // sequence have been legalized, legalize the call itself. During this + // process, no libcalls can/will be inserted, guaranteeing that no calls + // can overlap. + assert(!IsLegalizingCall && "Inconsistent sequentialization of calls!"); + // Note that we are selecting this call! + LastCALLSEQ_END = SDValue(CallEnd, 0); + IsLegalizingCall = true; + + // Legalize the call, starting from the CALLSEQ_END. + LegalizeOp(LastCALLSEQ_END); + assert(!IsLegalizingCall && "CALLSEQ_END should have cleared this!"); + return Result; + } + case ISD::CALLSEQ_END: + // If the CALLSEQ_START node hasn't been legalized first, legalize it. This + // will cause this node to be legalized as well as handling libcalls right. + if (LastCALLSEQ_END.getNode() != Node) { + LegalizeOp(SDValue(FindCallStartFromCallEnd(Node), 0)); + DenseMap::iterator I = LegalizedNodes.find(Op); + assert(I != LegalizedNodes.end() && + "Legalizing the call start should have legalized this node!"); + return I->second; + } + + // Otherwise, the call start has been legalized and everything is going + // according to plan. Just legalize ourselves normally here. + Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain. + // Do not try to legalize the target-specific arguments (#1+), except for + // an optional flag input. + if (Node->getOperand(Node->getNumOperands()-1).getValueType() != MVT::Flag){ + if (Tmp1 != Node->getOperand(0)) { + SmallVector Ops(Node->op_begin(), Node->op_end()); + Ops[0] = Tmp1; + Result = DAG.UpdateNodeOperands(Result, &Ops[0], Ops.size()); + } + } else { + Tmp2 = LegalizeOp(Node->getOperand(Node->getNumOperands()-1)); + if (Tmp1 != Node->getOperand(0) || + Tmp2 != Node->getOperand(Node->getNumOperands()-1)) { + SmallVector Ops(Node->op_begin(), Node->op_end()); + Ops[0] = Tmp1; + Ops.back() = Tmp2; + Result = DAG.UpdateNodeOperands(Result, &Ops[0], Ops.size()); + } + } + assert(IsLegalizingCall && "Call sequence imbalance between start/end?"); + // This finishes up call legalization. + IsLegalizingCall = false; + + // If the CALLSEQ_END node has a flag, remember that we legalized it. + AddLegalizedOperand(SDValue(Node, 0), Result.getValue(0)); + if (Node->getNumValues() == 2) + AddLegalizedOperand(SDValue(Node, 1), Result.getValue(1)); + return Result.getValue(Op.getResNo()); + case ISD::LOAD: { + LoadSDNode *LD = cast(Node); + Tmp1 = LegalizeOp(LD->getChain()); // Legalize the chain. + Tmp2 = LegalizeOp(LD->getBasePtr()); // Legalize the base pointer. + + ISD::LoadExtType ExtType = LD->getExtensionType(); + if (ExtType == ISD::NON_EXTLOAD) { + MVT VT = Node->getValueType(0); + Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, LD->getOffset()); + Tmp3 = Result.getValue(0); + Tmp4 = Result.getValue(1); + + switch (TLI.getOperationAction(Node->getOpcode(), VT)) { + default: assert(0 && "This action is not supported yet!"); + case TargetLowering::Legal: + // If this is an unaligned load and the target doesn't support it, + // expand it. + if (!TLI.allowsUnalignedMemoryAccesses()) { + unsigned ABIAlignment = TLI.getTargetData()-> + getABITypeAlignment(LD->getMemoryVT().getTypeForMVT()); + if (LD->getAlignment() < ABIAlignment){ + Result = ExpandUnalignedLoad(cast(Result.getNode()), DAG, + TLI); + Tmp3 = Result.getOperand(0); + Tmp4 = Result.getOperand(1); + Tmp3 = LegalizeOp(Tmp3); + Tmp4 = LegalizeOp(Tmp4); + } + } + break; + case TargetLowering::Custom: + Tmp1 = TLI.LowerOperation(Tmp3, DAG); + if (Tmp1.getNode()) { + Tmp3 = LegalizeOp(Tmp1); + Tmp4 = LegalizeOp(Tmp1.getValue(1)); + } + break; + case TargetLowering::Promote: { + // Only promote a load of vector type to another. + assert(VT.isVector() && "Cannot promote this load!"); + // Change base type to a different vector type. + MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT); + + Tmp1 = DAG.getLoad(NVT, dl, Tmp1, Tmp2, LD->getSrcValue(), + LD->getSrcValueOffset(), + LD->isVolatile(), LD->getAlignment()); + Tmp3 = LegalizeOp(DAG.getNode(ISD::BIT_CONVERT, dl, VT, Tmp1)); + Tmp4 = LegalizeOp(Tmp1.getValue(1)); + break; + } + } + // Since loads produce two values, make sure to remember that we + // legalized both of them. + AddLegalizedOperand(SDValue(Node, 0), Tmp3); + AddLegalizedOperand(SDValue(Node, 1), Tmp4); + return Op.getResNo() ? Tmp4 : Tmp3; + } else { + MVT SrcVT = LD->getMemoryVT(); + unsigned SrcWidth = SrcVT.getSizeInBits(); + int SVOffset = LD->getSrcValueOffset(); + unsigned Alignment = LD->getAlignment(); + bool isVolatile = LD->isVolatile(); + + if (SrcWidth != SrcVT.getStoreSizeInBits() && + // Some targets pretend to have an i1 loading operation, and actually + // load an i8. This trick is correct for ZEXTLOAD because the top 7 + // bits are guaranteed to be zero; it helps the optimizers understand + // that these bits are zero. It is also useful for EXTLOAD, since it + // tells the optimizers that those bits are undefined. It would be + // nice to have an effective generic way of getting these benefits... + // Until such a way is found, don't insist on promoting i1 here. + (SrcVT != MVT::i1 || + TLI.getLoadExtAction(ExtType, MVT::i1) == TargetLowering::Promote)) { + // Promote to a byte-sized load if not loading an integral number of + // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24. + unsigned NewWidth = SrcVT.getStoreSizeInBits(); + MVT NVT = MVT::getIntegerVT(NewWidth); + SDValue Ch; + + // The extra bits are guaranteed to be zero, since we stored them that + // way. A zext load from NVT thus automatically gives zext from SrcVT. + + ISD::LoadExtType NewExtType = + ExtType == ISD::ZEXTLOAD ? ISD::ZEXTLOAD : ISD::EXTLOAD; + + Result = DAG.getExtLoad(NewExtType, dl, Node->getValueType(0), + Tmp1, Tmp2, LD->getSrcValue(), SVOffset, + NVT, isVolatile, Alignment); + + Ch = Result.getValue(1); // The chain. + + if (ExtType == ISD::SEXTLOAD) + // Having the top bits zero doesn't help when sign extending. + Result = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, + Result.getValueType(), + Result, DAG.getValueType(SrcVT)); + else if (ExtType == ISD::ZEXTLOAD || NVT == Result.getValueType()) + // All the top bits are guaranteed to be zero - inform the optimizers. + Result = DAG.getNode(ISD::AssertZext, dl, + Result.getValueType(), Result, + DAG.getValueType(SrcVT)); + + Tmp1 = LegalizeOp(Result); + Tmp2 = LegalizeOp(Ch); + } else if (SrcWidth & (SrcWidth - 1)) { + // If not loading a power-of-2 number of bits, expand as two loads. + assert(SrcVT.isExtended() && !SrcVT.isVector() && + "Unsupported extload!"); + unsigned RoundWidth = 1 << Log2_32(SrcWidth); + assert(RoundWidth < SrcWidth); + unsigned ExtraWidth = SrcWidth - RoundWidth; + assert(ExtraWidth < RoundWidth); + assert(!(RoundWidth % 8) && !(ExtraWidth % 8) && + "Load size not an integral number of bytes!"); + MVT RoundVT = MVT::getIntegerVT(RoundWidth); + MVT ExtraVT = MVT::getIntegerVT(ExtraWidth); + SDValue Lo, Hi, Ch; + unsigned IncrementSize; + + if (TLI.isLittleEndian()) { + // EXTLOAD:i24 -> ZEXTLOAD:i16 | (shl EXTLOAD@+2:i8, 16) + // Load the bottom RoundWidth bits. + Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, + Node->getValueType(0), Tmp1, Tmp2, + LD->getSrcValue(), SVOffset, RoundVT, isVolatile, + Alignment); + + // Load the remaining ExtraWidth bits. + IncrementSize = RoundWidth / 8; + Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, + DAG.getIntPtrConstant(IncrementSize)); + Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Tmp1, Tmp2, + LD->getSrcValue(), SVOffset + IncrementSize, + ExtraVT, isVolatile, + MinAlign(Alignment, IncrementSize)); + + // Build a factor node to remember that this load is independent of the + // other one. + Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + // Move the top bits to the right place. + Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi, + DAG.getConstant(RoundWidth, TLI.getShiftAmountTy())); + + // Join the hi and lo parts. + Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); + } else { + // Big endian - avoid unaligned loads. + // EXTLOAD:i24 -> (shl EXTLOAD:i16, 8) | ZEXTLOAD@+2:i8 + // Load the top RoundWidth bits. + Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Tmp1, Tmp2, + LD->getSrcValue(), SVOffset, RoundVT, isVolatile, + Alignment); + + // Load the remaining ExtraWidth bits. + IncrementSize = RoundWidth / 8; + Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, + DAG.getIntPtrConstant(IncrementSize)); + Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, + Node->getValueType(0), Tmp1, Tmp2, + LD->getSrcValue(), SVOffset + IncrementSize, + ExtraVT, isVolatile, + MinAlign(Alignment, IncrementSize)); + + // Build a factor node to remember that this load is independent of the + // other one. + Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + // Move the top bits to the right place. + Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi, + DAG.getConstant(ExtraWidth, TLI.getShiftAmountTy())); + + // Join the hi and lo parts. + Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); + } + + Tmp1 = LegalizeOp(Result); + Tmp2 = LegalizeOp(Ch); + } else { + switch (TLI.getLoadExtAction(ExtType, SrcVT)) { + default: assert(0 && "This action is not supported yet!"); + case TargetLowering::Custom: + isCustom = true; + // FALLTHROUGH + case TargetLowering::Legal: + Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, LD->getOffset()); + Tmp1 = Result.getValue(0); + Tmp2 = Result.getValue(1); + + if (isCustom) { + Tmp3 = TLI.LowerOperation(Result, DAG); + if (Tmp3.getNode()) { + Tmp1 = LegalizeOp(Tmp3); + Tmp2 = LegalizeOp(Tmp3.getValue(1)); + } + } else { + // If this is an unaligned load and the target doesn't support it, + // expand it. + if (!TLI.allowsUnalignedMemoryAccesses()) { + unsigned ABIAlignment = TLI.getTargetData()-> + getABITypeAlignment(LD->getMemoryVT().getTypeForMVT()); + if (LD->getAlignment() < ABIAlignment){ + Result = ExpandUnalignedLoad(cast(Result.getNode()), DAG, + TLI); + Tmp1 = Result.getOperand(0); + Tmp2 = Result.getOperand(1); + Tmp1 = LegalizeOp(Tmp1); + Tmp2 = LegalizeOp(Tmp2); + } + } + } + break; + case TargetLowering::Expand: + // f64 = EXTLOAD f32 should expand to LOAD, FP_EXTEND + if (SrcVT == MVT::f32 && Node->getValueType(0) == MVT::f64) { + SDValue Load = DAG.getLoad(SrcVT, dl, Tmp1, Tmp2, LD->getSrcValue(), + LD->getSrcValueOffset(), + LD->isVolatile(), LD->getAlignment()); + Result = DAG.getNode(ISD::FP_EXTEND, dl, + Node->getValueType(0), Load); + Tmp1 = LegalizeOp(Result); // Relegalize new nodes. + Tmp2 = LegalizeOp(Load.getValue(1)); + break; + } + assert(ExtType != ISD::EXTLOAD &&"EXTLOAD should always be supported!"); + // Turn the unsupported load into an EXTLOAD followed by an explicit + // zero/sign extend inreg. + Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0), + Tmp1, Tmp2, LD->getSrcValue(), + LD->getSrcValueOffset(), SrcVT, + LD->isVolatile(), LD->getAlignment()); + SDValue ValRes; + if (ExtType == ISD::SEXTLOAD) + ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, + Result.getValueType(), + Result, DAG.getValueType(SrcVT)); + else + ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT); + Tmp1 = LegalizeOp(ValRes); // Relegalize new nodes. + Tmp2 = LegalizeOp(Result.getValue(1)); // Relegalize new nodes. + break; + } + } + + // Since loads produce two values, make sure to remember that we legalized + // both of them. + AddLegalizedOperand(SDValue(Node, 0), Tmp1); + AddLegalizedOperand(SDValue(Node, 1), Tmp2); + return Op.getResNo() ? Tmp2 : Tmp1; + } + } + case ISD::STORE: { + StoreSDNode *ST = cast(Node); + Tmp1 = LegalizeOp(ST->getChain()); // Legalize the chain. + Tmp2 = LegalizeOp(ST->getBasePtr()); // Legalize the pointer. + int SVOffset = ST->getSrcValueOffset(); + unsigned Alignment = ST->getAlignment(); + bool isVolatile = ST->isVolatile(); + + if (!ST->isTruncatingStore()) { + // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr' + // FIXME: We shouldn't do this for TargetConstantFP's. + // FIXME: move this to the DAG Combiner! Note that we can't regress due + // to phase ordering between legalized code and the dag combiner. This + // probably means that we need to integrate dag combiner and legalizer + // together. + // We generally can't do this one for long doubles. + if (ConstantFPSDNode *CFP = dyn_cast(ST->getValue())) { + if (CFP->getValueType(0) == MVT::f32 && + getTypeAction(MVT::i32) == Legal) { + Tmp3 = DAG.getConstant(CFP->getValueAPF(). + bitcastToAPInt().zextOrTrunc(32), + MVT::i32); + Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(), + SVOffset, isVolatile, Alignment); + break; + } else if (CFP->getValueType(0) == MVT::f64) { + // If this target supports 64-bit registers, do a single 64-bit store. + if (getTypeAction(MVT::i64) == Legal) { + Tmp3 = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). + zextOrTrunc(64), MVT::i64); + Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(), + SVOffset, isVolatile, Alignment); + break; + } else if (getTypeAction(MVT::i32) == Legal && !ST->isVolatile()) { + // Otherwise, if the target supports 32-bit registers, use 2 32-bit + // stores. If the target supports neither 32- nor 64-bits, this + // xform is certainly not worth it. + const APInt &IntVal =CFP->getValueAPF().bitcastToAPInt(); + SDValue Lo = DAG.getConstant(APInt(IntVal).trunc(32), MVT::i32); + SDValue Hi = DAG.getConstant(IntVal.lshr(32).trunc(32), MVT::i32); + if (TLI.isBigEndian()) std::swap(Lo, Hi); + + Lo = DAG.getStore(Tmp1, dl, Lo, Tmp2, ST->getSrcValue(), + SVOffset, isVolatile, Alignment); + Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, + DAG.getIntPtrConstant(4)); + Hi = DAG.getStore(Tmp1, dl, Hi, Tmp2, ST->getSrcValue(), SVOffset+4, + isVolatile, MinAlign(Alignment, 4U)); + + Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); + break; + } + } + } + + { + Tmp3 = LegalizeOp(ST->getValue()); + Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp3, Tmp2, + ST->getOffset()); + + MVT VT = Tmp3.getValueType(); + switch (TLI.getOperationAction(ISD::STORE, VT)) { + default: assert(0 && "This action is not supported yet!"); + case TargetLowering::Legal: + // If this is an unaligned store and the target doesn't support it, + // expand it. + if (!TLI.allowsUnalignedMemoryAccesses()) { + unsigned ABIAlignment = TLI.getTargetData()-> + getABITypeAlignment(ST->getMemoryVT().getTypeForMVT()); + if (ST->getAlignment() < ABIAlignment) + Result = ExpandUnalignedStore(cast(Result.getNode()), DAG, + TLI); + } + break; + case TargetLowering::Custom: + Tmp1 = TLI.LowerOperation(Result, DAG); + if (Tmp1.getNode()) Result = Tmp1; + break; + case TargetLowering::Promote: + assert(VT.isVector() && "Unknown legal promote case!"); + Tmp3 = DAG.getNode(ISD::BIT_CONVERT, dl, + TLI.getTypeToPromoteTo(ISD::STORE, VT), Tmp3); + Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, + ST->getSrcValue(), SVOffset, isVolatile, + Alignment); + break; + } + break; + } + } else { + Tmp3 = LegalizeOp(ST->getValue()); + + MVT StVT = ST->getMemoryVT(); + unsigned StWidth = StVT.getSizeInBits(); + + if (StWidth != StVT.getStoreSizeInBits()) { + // Promote to a byte-sized store with upper bits zero if not + // storing an integral number of bytes. For example, promote + // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1) + MVT NVT = MVT::getIntegerVT(StVT.getStoreSizeInBits()); + Tmp3 = DAG.getZeroExtendInReg(Tmp3, dl, StVT); + Result = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(), + SVOffset, NVT, isVolatile, Alignment); + } else if (StWidth & (StWidth - 1)) { + // If not storing a power-of-2 number of bits, expand as two stores. + assert(StVT.isExtended() && !StVT.isVector() && + "Unsupported truncstore!"); + unsigned RoundWidth = 1 << Log2_32(StWidth); + assert(RoundWidth < StWidth); + unsigned ExtraWidth = StWidth - RoundWidth; + assert(ExtraWidth < RoundWidth); + assert(!(RoundWidth % 8) && !(ExtraWidth % 8) && + "Store size not an integral number of bytes!"); + MVT RoundVT = MVT::getIntegerVT(RoundWidth); + MVT ExtraVT = MVT::getIntegerVT(ExtraWidth); + SDValue Lo, Hi; + unsigned IncrementSize; + + if (TLI.isLittleEndian()) { + // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 X, TRUNCSTORE@+2:i8 (srl X, 16) + // Store the bottom RoundWidth bits. + Lo = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(), + SVOffset, RoundVT, + isVolatile, Alignment); + + // Store the remaining ExtraWidth bits. + IncrementSize = RoundWidth / 8; + Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, + DAG.getIntPtrConstant(IncrementSize)); + Hi = DAG.getNode(ISD::SRL, dl, Tmp3.getValueType(), Tmp3, + DAG.getConstant(RoundWidth, TLI.getShiftAmountTy())); + Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2, ST->getSrcValue(), + SVOffset + IncrementSize, ExtraVT, isVolatile, + MinAlign(Alignment, IncrementSize)); + } else { + // Big endian - avoid unaligned stores. + // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 (srl X, 8), TRUNCSTORE@+2:i8 X + // Store the top RoundWidth bits. + Hi = DAG.getNode(ISD::SRL, dl, Tmp3.getValueType(), Tmp3, + DAG.getConstant(ExtraWidth, TLI.getShiftAmountTy())); + Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2, ST->getSrcValue(), + SVOffset, RoundVT, isVolatile, Alignment); + + // Store the remaining ExtraWidth bits. + IncrementSize = RoundWidth / 8; + Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, + DAG.getIntPtrConstant(IncrementSize)); + Lo = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(), + SVOffset + IncrementSize, ExtraVT, isVolatile, + MinAlign(Alignment, IncrementSize)); + } + + // The order of the stores doesn't matter. + Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); + } else { + if (Tmp1 != ST->getChain() || Tmp3 != ST->getValue() || + Tmp2 != ST->getBasePtr()) + Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp3, Tmp2, + ST->getOffset()); + + switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) { + default: assert(0 && "This action is not supported yet!"); + case TargetLowering::Legal: + // If this is an unaligned store and the target doesn't support it, + // expand it. + if (!TLI.allowsUnalignedMemoryAccesses()) { + unsigned ABIAlignment = TLI.getTargetData()-> + getABITypeAlignment(ST->getMemoryVT().getTypeForMVT()); + if (ST->getAlignment() < ABIAlignment) + Result = ExpandUnalignedStore(cast(Result.getNode()), DAG, + TLI); + } + break; + case TargetLowering::Custom: + Result = TLI.LowerOperation(Result, DAG); + break; + case Expand: + // TRUNCSTORE:i16 i32 -> STORE i16 + assert(isTypeLegal(StVT) && "Do not know how to expand this store!"); + Tmp3 = DAG.getNode(ISD::TRUNCATE, dl, StVT, Tmp3); + Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(), + SVOffset, isVolatile, Alignment); + break; + } + } + } + break; + } + } + assert(Result.getValueType() == Op.getValueType() && + "Bad legalization!"); + + // Make sure that the generated code is itself legal. + if (Result != Op) + Result = LegalizeOp(Result); + + // Note that LegalizeOp may be reentered even from single-use nodes, which + // means that we always must cache transformed nodes. + AddLegalizedOperand(Op, Result); + return Result; +} + +SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { + SDValue Vec = Op.getOperand(0); + SDValue Idx = Op.getOperand(1); + DebugLoc dl = Op.getDebugLoc(); + // Store the value to a temporary stack slot, then LOAD the returned part. + SDValue StackPtr = DAG.CreateStackTemporary(Vec.getValueType()); + SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, NULL, 0); + + // Add the offset to the index. + unsigned EltSize = + Vec.getValueType().getVectorElementType().getSizeInBits()/8; + Idx = DAG.getNode(ISD::MUL, dl, Idx.getValueType(), Idx, + DAG.getConstant(EltSize, Idx.getValueType())); + + if (Idx.getValueType().bitsGT(TLI.getPointerTy())) + Idx = DAG.getNode(ISD::TRUNCATE, dl, TLI.getPointerTy(), Idx); + else + Idx = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Idx); + + StackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, StackPtr); + + return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, NULL, 0); +} + +SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) { + DebugLoc dl = Node->getDebugLoc(); + SDValue Tmp1 = Node->getOperand(0); + SDValue Tmp2 = Node->getOperand(1); + assert((Tmp2.getValueType() == MVT::f32 || + Tmp2.getValueType() == MVT::f64) && + "Ugly special-cased code!"); + // Get the sign bit of the RHS. + SDValue SignBit; + MVT IVT = Tmp2.getValueType() == MVT::f64 ? MVT::i64 : MVT::i32; + if (isTypeLegal(IVT)) { + SignBit = DAG.getNode(ISD::BIT_CONVERT, dl, IVT, Tmp2); + } else { + assert(isTypeLegal(TLI.getPointerTy()) && + (TLI.getPointerTy() == MVT::i32 || + TLI.getPointerTy() == MVT::i64) && + "Legal type for load?!"); + SDValue StackPtr = DAG.CreateStackTemporary(Tmp2.getValueType()); + SDValue StorePtr = StackPtr, LoadPtr = StackPtr; + SDValue Ch = + DAG.getStore(DAG.getEntryNode(), dl, Tmp2, StorePtr, NULL, 0); + if (Tmp2.getValueType() == MVT::f64 && TLI.isLittleEndian()) + LoadPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), + LoadPtr, DAG.getIntPtrConstant(4)); + SignBit = DAG.getExtLoad(ISD::SEXTLOAD, dl, TLI.getPointerTy(), + Ch, LoadPtr, NULL, 0, MVT::i32); + } + SignBit = + DAG.getSetCC(dl, TLI.getSetCCResultType(SignBit.getValueType()), + SignBit, DAG.getConstant(0, SignBit.getValueType()), + ISD::SETLT); + // Get the absolute value of the result. + SDValue AbsVal = DAG.getNode(ISD::FABS, dl, Tmp1.getValueType(), Tmp1); + // Select between the nabs and abs value based on the sign bit of + // the input. + return DAG.getNode(ISD::SELECT, dl, AbsVal.getValueType(), SignBit, + DAG.getNode(ISD::FNEG, dl, AbsVal.getValueType(), AbsVal), + AbsVal); +} + +SDValue SelectionDAGLegalize::ExpandDBG_STOPPOINT(SDNode* Node) { + DebugLoc dl = Node->getDebugLoc(); + DwarfWriter *DW = DAG.getDwarfWriter(); + bool useDEBUG_LOC = TLI.isOperationLegalOrCustom(ISD::DEBUG_LOC, + MVT::Other); + bool useLABEL = TLI.isOperationLegalOrCustom(ISD::DBG_LABEL, MVT::Other); + + const DbgStopPointSDNode *DSP = cast(Node); + GlobalVariable *CU_GV = cast(DSP->getCompileUnit()); + if (DW && (useDEBUG_LOC || useLABEL) && !CU_GV->isDeclaration()) { + DICompileUnit CU(cast(DSP->getCompileUnit())); + + unsigned Line = DSP->getLine(); + unsigned Col = DSP->getColumn(); + + if (OptLevel == CodeGenOpt::None) { + // A bit self-referential to have DebugLoc on Debug_Loc nodes, but it + // won't hurt anything. + if (useDEBUG_LOC) { + return DAG.getNode(ISD::DEBUG_LOC, dl, MVT::Other, Node->getOperand(0), + DAG.getConstant(Line, MVT::i32), + DAG.getConstant(Col, MVT::i32), + DAG.getSrcValue(CU.getGV())); + } else { + unsigned ID = DW->RecordSourceLine(Line, Col, CU); + return DAG.getLabel(ISD::DBG_LABEL, dl, Node->getOperand(0), ID); + } + } + } + return Node->getOperand(0); +} + +void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node, + SmallVectorImpl &Results) { + unsigned SPReg = TLI.getStackPointerRegisterToSaveRestore(); + assert(SPReg && "Target cannot require DYNAMIC_STACKALLOC expansion and" + " not tell us which reg is the stack pointer!"); + DebugLoc dl = Node->getDebugLoc(); + MVT VT = Node->getValueType(0); + SDValue Tmp1 = SDValue(Node, 0); + SDValue Tmp2 = SDValue(Node, 1); + SDValue Tmp3 = Node->getOperand(2); + SDValue Chain = Tmp1.getOperand(0); + + // Chain the dynamic stack allocation so that it doesn't modify the stack + // pointer when other instructions are using the stack. + Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, true)); + + SDValue Size = Tmp2.getOperand(1); + SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT); + Chain = SP.getValue(1); + unsigned Align = cast(Tmp3)->getZExtValue(); + unsigned StackAlign = + TLI.getTargetMachine().getFrameInfo()->getStackAlignment(); + if (Align > StackAlign) + SP = DAG.getNode(ISD::AND, dl, VT, SP, + DAG.getConstant(-(uint64_t)Align, VT)); + Tmp1 = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value + Chain = DAG.getCopyToReg(Chain, dl, SPReg, Tmp1); // Output chain + + Tmp2 = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, true), + DAG.getIntPtrConstant(0, true), SDValue()); + + Results.push_back(Tmp1); + Results.push_back(Tmp2); +} + +/// LegalizeSetCCCondCode - Legalize a SETCC with given LHS and RHS and +/// condition code CC on the current target. This routine assumes LHS and rHS +/// have already been legalized by LegalizeSetCCOperands. It expands SETCC with +/// illegal condition code into AND / OR of multiple SETCC values. +void SelectionDAGLegalize::LegalizeSetCCCondCode(MVT VT, + SDValue &LHS, SDValue &RHS, + SDValue &CC, + DebugLoc dl) { + MVT OpVT = LHS.getValueType(); + ISD::CondCode CCCode = cast(CC)->get(); + switch (TLI.getCondCodeAction(CCCode, OpVT)) { + default: assert(0 && "Unknown condition code action!"); + case TargetLowering::Legal: + // Nothing to do. + break; + case TargetLowering::Expand: { + ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID; + unsigned Opc = 0; + switch (CCCode) { + default: assert(0 && "Don't know how to expand this condition!"); abort(); + case ISD::SETOEQ: CC1 = ISD::SETEQ; CC2 = ISD::SETO; Opc = ISD::AND; break; + case ISD::SETOGT: CC1 = ISD::SETGT; CC2 = ISD::SETO; Opc = ISD::AND; break; + case ISD::SETOGE: CC1 = ISD::SETGE; CC2 = ISD::SETO; Opc = ISD::AND; break; + case ISD::SETOLT: CC1 = ISD::SETLT; CC2 = ISD::SETO; Opc = ISD::AND; break; + case ISD::SETOLE: CC1 = ISD::SETLE; CC2 = ISD::SETO; Opc = ISD::AND; break; + case ISD::SETONE: CC1 = ISD::SETNE; CC2 = ISD::SETO; Opc = ISD::AND; break; + case ISD::SETUEQ: CC1 = ISD::SETEQ; CC2 = ISD::SETUO; Opc = ISD::OR; break; + case ISD::SETUGT: CC1 = ISD::SETGT; CC2 = ISD::SETUO; Opc = ISD::OR; break; + case ISD::SETUGE: CC1 = ISD::SETGE; CC2 = ISD::SETUO; Opc = ISD::OR; break; + case ISD::SETULT: CC1 = ISD::SETLT; CC2 = ISD::SETUO; Opc = ISD::OR; break; + case ISD::SETULE: CC1 = ISD::SETLE; CC2 = ISD::SETUO; Opc = ISD::OR; break; + case ISD::SETUNE: CC1 = ISD::SETNE; CC2 = ISD::SETUO; Opc = ISD::OR; break; + // FIXME: Implement more expansions. + } + + SDValue SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1); + SDValue SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2); + LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2); + RHS = SDValue(); + CC = SDValue(); + break; + } + } +} + +/// EmitStackConvert - Emit a store/load combination to the stack. This stores +/// SrcOp to a stack slot of type SlotVT, truncating it if needed. It then does +/// a load from the stack slot to DestVT, extending it if needed. +/// The resultant code need not be legal. +SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, + MVT SlotVT, + MVT DestVT, + DebugLoc dl) { + // Create the stack frame object. + unsigned SrcAlign = + TLI.getTargetData()->getPrefTypeAlignment(SrcOp.getValueType(). + getTypeForMVT()); + SDValue FIPtr = DAG.CreateStackTemporary(SlotVT, SrcAlign); + + FrameIndexSDNode *StackPtrFI = cast(FIPtr); + int SPFI = StackPtrFI->getIndex(); + const Value *SV = PseudoSourceValue::getFixedStack(SPFI); + + unsigned SrcSize = SrcOp.getValueType().getSizeInBits(); + unsigned SlotSize = SlotVT.getSizeInBits(); + unsigned DestSize = DestVT.getSizeInBits(); + unsigned DestAlign = + TLI.getTargetData()->getPrefTypeAlignment(DestVT.getTypeForMVT()); + + // Emit a store to the stack slot. Use a truncstore if the input value is + // later than DestVT. + SDValue Store; + + if (SrcSize > SlotSize) + Store = DAG.getTruncStore(DAG.getEntryNode(), dl, SrcOp, FIPtr, + SV, 0, SlotVT, false, SrcAlign); + else { + assert(SrcSize == SlotSize && "Invalid store"); + Store = DAG.getStore(DAG.getEntryNode(), dl, SrcOp, FIPtr, + SV, 0, false, SrcAlign); + } + + // Result is a load from the stack slot. + if (SlotSize == DestSize) + return DAG.getLoad(DestVT, dl, Store, FIPtr, SV, 0, false, DestAlign); + + assert(SlotSize < DestSize && "Unknown extension!"); + return DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, Store, FIPtr, SV, 0, SlotVT, + false, DestAlign); +} + +SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) { + DebugLoc dl = Node->getDebugLoc(); + // Create a vector sized/aligned stack slot, store the value to element #0, + // then load the whole vector back out. + SDValue StackPtr = DAG.CreateStackTemporary(Node->getValueType(0)); + + FrameIndexSDNode *StackPtrFI = cast(StackPtr); + int SPFI = StackPtrFI->getIndex(); + + SDValue Ch = DAG.getTruncStore(DAG.getEntryNode(), dl, Node->getOperand(0), + StackPtr, + PseudoSourceValue::getFixedStack(SPFI), 0, + Node->getValueType(0).getVectorElementType()); + return DAG.getLoad(Node->getValueType(0), dl, Ch, StackPtr, + PseudoSourceValue::getFixedStack(SPFI), 0); +} + + +/// ExpandBUILD_VECTOR - Expand a BUILD_VECTOR node on targets that don't +/// support the operation, but do support the resultant vector type. +SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { + unsigned NumElems = Node->getNumOperands(); + SDValue SplatValue = Node->getOperand(0); + DebugLoc dl = Node->getDebugLoc(); + MVT VT = Node->getValueType(0); + MVT OpVT = SplatValue.getValueType(); + MVT EltVT = VT.getVectorElementType(); + + // If the only non-undef value is the low element, turn this into a + // SCALAR_TO_VECTOR node. If this is { X, X, X, X }, determine X. + bool isOnlyLowElement = true; + + // FIXME: it would be far nicer to change this into map + // and use a bitmask instead of a list of elements. + // FIXME: this doesn't treat <0, u, 0, u> for example, as a splat. + std::map > Values; + Values[SplatValue].push_back(0); + bool isConstant = true; + if (!isa(SplatValue) && !isa(SplatValue) && + SplatValue.getOpcode() != ISD::UNDEF) + isConstant = false; + + for (unsigned i = 1; i < NumElems; ++i) { + SDValue V = Node->getOperand(i); + Values[V].push_back(i); + if (V.getOpcode() != ISD::UNDEF) + isOnlyLowElement = false; + if (SplatValue != V) + SplatValue = SDValue(0, 0); + + // If this isn't a constant element or an undef, we can't use a constant + // pool load. + if (!isa(V) && !isa(V) && + V.getOpcode() != ISD::UNDEF) + isConstant = false; + } + + if (isOnlyLowElement) { + // If the low element is an undef too, then this whole things is an undef. + if (Node->getOperand(0).getOpcode() == ISD::UNDEF) + return DAG.getUNDEF(VT); + // Otherwise, turn this into a scalar_to_vector node. + return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Node->getOperand(0)); + } + + // If all elements are constants, create a load from the constant pool. + if (isConstant) { + std::vector CV; + for (unsigned i = 0, e = NumElems; i != e; ++i) { + if (ConstantFPSDNode *V = + dyn_cast(Node->getOperand(i))) { + CV.push_back(const_cast(V->getConstantFPValue())); + } else if (ConstantSDNode *V = + dyn_cast(Node->getOperand(i))) { + CV.push_back(const_cast(V->getConstantIntValue())); + } else { + assert(Node->getOperand(i).getOpcode() == ISD::UNDEF); + const Type *OpNTy = OpVT.getTypeForMVT(); + CV.push_back(UndefValue::get(OpNTy)); + } + } + Constant *CP = ConstantVector::get(CV); + SDValue CPIdx = DAG.getConstantPool(CP, TLI.getPointerTy()); + unsigned Alignment = cast(CPIdx)->getAlignment(); + return DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, + PseudoSourceValue::getConstantPool(), 0, + false, Alignment); + } + + if (SplatValue.getNode()) { // Splat of one value? + // Build the shuffle constant vector: <0, 0, 0, 0> + SmallVector ZeroVec(NumElems, 0); + + // If the target supports VECTOR_SHUFFLE and this shuffle mask, use it. + if (TLI.isShuffleMaskLegal(ZeroVec, Node->getValueType(0))) { + // Get the splatted value into the low element of a vector register. + SDValue LowValVec = + DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, SplatValue); + + // Return shuffle(LowValVec, undef, <0,0,0,0>) + return DAG.getVectorShuffle(VT, dl, LowValVec, DAG.getUNDEF(VT), + &ZeroVec[0]); + } + } + + // If there are only two unique elements, we may be able to turn this into a + // vector shuffle. + if (Values.size() == 2) { + // Get the two values in deterministic order. + SDValue Val1 = Node->getOperand(1); + SDValue Val2; + std::map >::iterator MI = Values.begin(); + if (MI->first != Val1) + Val2 = MI->first; + else + Val2 = (++MI)->first; + + // If Val1 is an undef, make sure it ends up as Val2, to ensure that our + // vector shuffle has the undef vector on the RHS. + if (Val1.getOpcode() == ISD::UNDEF) + std::swap(Val1, Val2); + + // Build the shuffle constant vector: e.g. <0, 4, 0, 4> + SmallVector ShuffleMask(NumElems, -1); + + // Set elements of the shuffle mask for Val1. + std::vector &Val1Elts = Values[Val1]; + for (unsigned i = 0, e = Val1Elts.size(); i != e; ++i) + ShuffleMask[Val1Elts[i]] = 0; + + // Set elements of the shuffle mask for Val2. + std::vector &Val2Elts = Values[Val2]; + for (unsigned i = 0, e = Val2Elts.size(); i != e; ++i) + if (Val2.getOpcode() != ISD::UNDEF) + ShuffleMask[Val2Elts[i]] = NumElems; + + // If the target supports SCALAR_TO_VECTOR and this shuffle mask, use it. + if (TLI.isOperationLegalOrCustom(ISD::SCALAR_TO_VECTOR, VT) && + TLI.isShuffleMaskLegal(ShuffleMask, VT)) { + Val1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Val1); + Val2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Val2); + return DAG.getVectorShuffle(VT, dl, Val1, Val2, &ShuffleMask[0]); + } + } + + // Otherwise, we can't handle this case efficiently. Allocate a sufficiently + // aligned object on the stack, store each element into it, then load + // the result as a vector. + // Create the stack frame object. + SDValue FIPtr = DAG.CreateStackTemporary(VT); + int FI = cast(FIPtr.getNode())->getIndex(); + const Value *SV = PseudoSourceValue::getFixedStack(FI); + + // Emit a store of each element to the stack slot. + SmallVector Stores; + unsigned TypeByteSize = OpVT.getSizeInBits() / 8; + // Store (in the right endianness) the elements to memory. + for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) { + // Ignore undef elements. + if (Node->getOperand(i).getOpcode() == ISD::UNDEF) continue; + + unsigned Offset = TypeByteSize*i; + + SDValue Idx = DAG.getConstant(Offset, FIPtr.getValueType()); + Idx = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, Idx); + + Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, Node->getOperand(i), + Idx, SV, Offset)); + } + + SDValue StoreChain; + if (!Stores.empty()) // Not all undef elements? + StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &Stores[0], Stores.size()); + else + StoreChain = DAG.getEntryNode(); + + // Result is a load from the stack slot. + return DAG.getLoad(VT, dl, StoreChain, FIPtr, SV, 0); +} + +// ExpandLibCall - Expand a node into a call to a libcall. If the result value +// does not fit into a register, return the lo part and set the hi part to the +// by-reg argument. If it does fit into a single register, return the result +// and leave the Hi part unset. +SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, + bool isSigned) { + assert(!IsLegalizingCall && "Cannot overlap legalization of calls!"); + // The input chain to this libcall is the entry node of the function. + // Legalizing the call will automatically add the previous call to the + // dependence. + SDValue InChain = DAG.getEntryNode(); + + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) { + MVT ArgVT = Node->getOperand(i).getValueType(); + const Type *ArgTy = ArgVT.getTypeForMVT(); + Entry.Node = Node->getOperand(i); Entry.Ty = ArgTy; + Entry.isSExt = isSigned; + Entry.isZExt = !isSigned; + Args.push_back(Entry); + } + SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), + TLI.getPointerTy()); + + // Splice the libcall in wherever FindInputOutputChains tells us to. + const Type *RetTy = Node->getValueType(0).getTypeForMVT(); + std::pair CallInfo = + TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, + CallingConv::C, false, Callee, Args, DAG, + Node->getDebugLoc()); + + // Legalize the call sequence, starting with the chain. This will advance + // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that + // was added by LowerCallTo (guaranteeing proper serialization of calls). + LegalizeOp(CallInfo.second); + return CallInfo.first; +} + +SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node, + RTLIB::Libcall Call_F32, + RTLIB::Libcall Call_F64, + RTLIB::Libcall Call_F80, + RTLIB::Libcall Call_PPCF128) { + RTLIB::Libcall LC; + switch (Node->getValueType(0).getSimpleVT()) { + default: assert(0 && "Unexpected request for libcall!"); + case MVT::f32: LC = Call_F32; break; + case MVT::f64: LC = Call_F64; break; + case MVT::f80: LC = Call_F80; break; + case MVT::ppcf128: LC = Call_PPCF128; break; + } + return ExpandLibCall(LC, Node, false); +} + +SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned, + RTLIB::Libcall Call_I16, + RTLIB::Libcall Call_I32, + RTLIB::Libcall Call_I64, + RTLIB::Libcall Call_I128) { + RTLIB::Libcall LC; + switch (Node->getValueType(0).getSimpleVT()) { + default: assert(0 && "Unexpected request for libcall!"); + case MVT::i16: LC = Call_I16; break; + case MVT::i32: LC = Call_I32; break; + case MVT::i64: LC = Call_I64; break; + case MVT::i128: LC = Call_I128; break; + } + return ExpandLibCall(LC, Node, isSigned); +} + +/// ExpandLegalINT_TO_FP - This function is responsible for legalizing a +/// INT_TO_FP operation of the specified operand when the target requests that +/// we expand it. At this point, we know that the result and operand types are +/// legal for the target. +SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, + SDValue Op0, + MVT DestVT, + DebugLoc dl) { + if (Op0.getValueType() == MVT::i32) { + // simple 32-bit [signed|unsigned] integer to float/double expansion + + // Get the stack frame index of a 8 byte buffer. + SDValue StackSlot = DAG.CreateStackTemporary(MVT::f64); + + // word offset constant for Hi/Lo address computation + SDValue WordOff = DAG.getConstant(sizeof(int), TLI.getPointerTy()); + // set up Hi and Lo (into buffer) address based on endian + SDValue Hi = StackSlot; + SDValue Lo = DAG.getNode(ISD::ADD, dl, + TLI.getPointerTy(), StackSlot, WordOff); + if (TLI.isLittleEndian()) + std::swap(Hi, Lo); + + // if signed map to unsigned space + SDValue Op0Mapped; + if (isSigned) { + // constant used to invert sign bit (signed to unsigned mapping) + SDValue SignBit = DAG.getConstant(0x80000000u, MVT::i32); + Op0Mapped = DAG.getNode(ISD::XOR, dl, MVT::i32, Op0, SignBit); + } else { + Op0Mapped = Op0; + } + // store the lo of the constructed double - based on integer input + SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, + Op0Mapped, Lo, NULL, 0); + // initial hi portion of constructed double + SDValue InitialHi = DAG.getConstant(0x43300000u, MVT::i32); + // store the hi of the constructed double - biased exponent + SDValue Store2=DAG.getStore(Store1, dl, InitialHi, Hi, NULL, 0); + // load the constructed double + SDValue Load = DAG.getLoad(MVT::f64, dl, Store2, StackSlot, NULL, 0); + // FP constant to bias correct the final result + SDValue Bias = DAG.getConstantFP(isSigned ? + BitsToDouble(0x4330000080000000ULL) : + BitsToDouble(0x4330000000000000ULL), + MVT::f64); + // subtract the bias + SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Load, Bias); + // final result + SDValue Result; + // handle final rounding + if (DestVT == MVT::f64) { + // do nothing + Result = Sub; + } else if (DestVT.bitsLT(MVT::f64)) { + Result = DAG.getNode(ISD::FP_ROUND, dl, DestVT, Sub, + DAG.getIntPtrConstant(0)); + } else if (DestVT.bitsGT(MVT::f64)) { + Result = DAG.getNode(ISD::FP_EXTEND, dl, DestVT, Sub); + } + return Result; + } + assert(!isSigned && "Legalize cannot Expand SINT_TO_FP for i64 yet"); + SDValue Tmp1 = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Op0); + + SDValue SignSet = DAG.getSetCC(dl, TLI.getSetCCResultType(Op0.getValueType()), + Op0, DAG.getConstant(0, Op0.getValueType()), + ISD::SETLT); + SDValue Zero = DAG.getIntPtrConstant(0), Four = DAG.getIntPtrConstant(4); + SDValue CstOffset = DAG.getNode(ISD::SELECT, dl, Zero.getValueType(), + SignSet, Four, Zero); + + // If the sign bit of the integer is set, the large number will be treated + // as a negative number. To counteract this, the dynamic code adds an + // offset depending on the data type. + uint64_t FF; + switch (Op0.getValueType().getSimpleVT()) { + default: assert(0 && "Unsupported integer type!"); + case MVT::i8 : FF = 0x43800000ULL; break; // 2^8 (as a float) + case MVT::i16: FF = 0x47800000ULL; break; // 2^16 (as a float) + case MVT::i32: FF = 0x4F800000ULL; break; // 2^32 (as a float) + case MVT::i64: FF = 0x5F800000ULL; break; // 2^64 (as a float) + } + if (TLI.isLittleEndian()) FF <<= 32; + Constant *FudgeFactor = ConstantInt::get(Type::Int64Ty, FF); + + SDValue CPIdx = DAG.getConstantPool(FudgeFactor, TLI.getPointerTy()); + unsigned Alignment = cast(CPIdx)->getAlignment(); + CPIdx = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), CPIdx, CstOffset); + Alignment = std::min(Alignment, 4u); + SDValue FudgeInReg; + if (DestVT == MVT::f32) + FudgeInReg = DAG.getLoad(MVT::f32, dl, DAG.getEntryNode(), CPIdx, + PseudoSourceValue::getConstantPool(), 0, + false, Alignment); + else { + FudgeInReg = + LegalizeOp(DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, + DAG.getEntryNode(), CPIdx, + PseudoSourceValue::getConstantPool(), 0, + MVT::f32, false, Alignment)); + } + + return DAG.getNode(ISD::FADD, dl, DestVT, Tmp1, FudgeInReg); +} + +/// PromoteLegalINT_TO_FP - This function is responsible for legalizing a +/// *INT_TO_FP operation of the specified operand when the target requests that +/// we promote it. At this point, we know that the result and operand types are +/// legal for the target, and that there is a legal UINT_TO_FP or SINT_TO_FP +/// operation that takes a larger input. +SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp, + MVT DestVT, + bool isSigned, + DebugLoc dl) { + // First step, figure out the appropriate *INT_TO_FP operation to use. + MVT NewInTy = LegalOp.getValueType(); + + unsigned OpToUse = 0; + + // Scan for the appropriate larger type to use. + while (1) { + NewInTy = (MVT::SimpleValueType)(NewInTy.getSimpleVT()+1); + assert(NewInTy.isInteger() && "Ran out of possibilities!"); + + // If the target supports SINT_TO_FP of this type, use it. + if (TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, NewInTy)) { + OpToUse = ISD::SINT_TO_FP; + break; + } + if (isSigned) continue; + + // If the target supports UINT_TO_FP of this type, use it. + if (TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, NewInTy)) { + OpToUse = ISD::UINT_TO_FP; + break; + } + + // Otherwise, try a larger type. + } + + // Okay, we found the operation and type to use. Zero extend our input to the + // desired type then run the operation on it. + return DAG.getNode(OpToUse, dl, DestVT, + DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, + dl, NewInTy, LegalOp)); +} + +/// PromoteLegalFP_TO_INT - This function is responsible for legalizing a +/// FP_TO_*INT operation of the specified operand when the target requests that +/// we promote it. At this point, we know that the result and operand types are +/// legal for the target, and that there is a legal FP_TO_UINT or FP_TO_SINT +/// operation that returns a larger result. +SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp, + MVT DestVT, + bool isSigned, + DebugLoc dl) { + // First step, figure out the appropriate FP_TO*INT operation to use. + MVT NewOutTy = DestVT; + + unsigned OpToUse = 0; + + // Scan for the appropriate larger type to use. + while (1) { + NewOutTy = (MVT::SimpleValueType)(NewOutTy.getSimpleVT()+1); + assert(NewOutTy.isInteger() && "Ran out of possibilities!"); + + if (TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NewOutTy)) { + OpToUse = ISD::FP_TO_SINT; + break; + } + + if (TLI.isOperationLegalOrCustom(ISD::FP_TO_UINT, NewOutTy)) { + OpToUse = ISD::FP_TO_UINT; + break; + } + + // Otherwise, try a larger type. + } + + + // Okay, we found the operation and type to use. + SDValue Operation = DAG.getNode(OpToUse, dl, NewOutTy, LegalOp); + + // Truncate the result of the extended FP_TO_*INT operation to the desired + // size. + return DAG.getNode(ISD::TRUNCATE, dl, DestVT, Operation); +} + +/// ExpandBSWAP - Open code the operations for BSWAP of the specified operation. +/// +SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, DebugLoc dl) { + MVT VT = Op.getValueType(); + MVT SHVT = TLI.getShiftAmountTy(); + SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8; + switch (VT.getSimpleVT()) { + default: assert(0 && "Unhandled Expand type in BSWAP!"); abort(); + case MVT::i16: + Tmp2 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, SHVT)); + Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, SHVT)); + return DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); + case MVT::i32: + Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, SHVT)); + Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, SHVT)); + Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, SHVT)); + Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, SHVT)); + Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3, DAG.getConstant(0xFF0000, VT)); + Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, VT)); + Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3); + Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1); + return DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2); + case MVT::i64: + Tmp8 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(56, SHVT)); + Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(40, SHVT)); + Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, SHVT)); + Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, SHVT)); + Tmp4 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, SHVT)); + Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, SHVT)); + Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, SHVT)); + Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, SHVT)); + Tmp7 = DAG.getNode(ISD::AND, dl, VT, Tmp7, DAG.getConstant(255ULL<<48, VT)); + Tmp6 = DAG.getNode(ISD::AND, dl, VT, Tmp6, DAG.getConstant(255ULL<<40, VT)); + Tmp5 = DAG.getNode(ISD::AND, dl, VT, Tmp5, DAG.getConstant(255ULL<<32, VT)); + Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4, DAG.getConstant(255ULL<<24, VT)); + Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3, DAG.getConstant(255ULL<<16, VT)); + Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(255ULL<<8 , VT)); + Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp7); + Tmp6 = DAG.getNode(ISD::OR, dl, VT, Tmp6, Tmp5); + Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3); + Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1); + Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp6); + Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2); + return DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp4); + } +} + +/// ExpandBitCount - Expand the specified bitcount instruction into operations. +/// +SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, + DebugLoc dl) { + switch (Opc) { + default: assert(0 && "Cannot expand this yet!"); + case ISD::CTPOP: { + static const uint64_t mask[6] = { + 0x5555555555555555ULL, 0x3333333333333333ULL, + 0x0F0F0F0F0F0F0F0FULL, 0x00FF00FF00FF00FFULL, + 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL + }; + MVT VT = Op.getValueType(); + MVT ShVT = TLI.getShiftAmountTy(); + unsigned len = VT.getSizeInBits(); + for (unsigned i = 0; (1U << i) <= (len / 2); ++i) { + //x = (x & mask[i][len/8]) + (x >> (1 << i) & mask[i][len/8]) + unsigned EltSize = VT.isVector() ? + VT.getVectorElementType().getSizeInBits() : len; + SDValue Tmp2 = DAG.getConstant(APInt(EltSize, mask[i]), VT); + SDValue Tmp3 = DAG.getConstant(1ULL << i, ShVT); + Op = DAG.getNode(ISD::ADD, dl, VT, + DAG.getNode(ISD::AND, dl, VT, Op, Tmp2), + DAG.getNode(ISD::AND, dl, VT, + DAG.getNode(ISD::SRL, dl, VT, Op, Tmp3), + Tmp2)); + } + return Op; + } + case ISD::CTLZ: { + // for now, we do this: + // x = x | (x >> 1); + // x = x | (x >> 2); + // ... + // x = x | (x >>16); + // x = x | (x >>32); // for 64-bit input + // return popcount(~x); + // + // but see also: http://www.hackersdelight.org/HDcode/nlz.cc + MVT VT = Op.getValueType(); + MVT ShVT = TLI.getShiftAmountTy(); + unsigned len = VT.getSizeInBits(); + for (unsigned i = 0; (1U << i) <= (len / 2); ++i) { + SDValue Tmp3 = DAG.getConstant(1ULL << i, ShVT); + Op = DAG.getNode(ISD::OR, dl, VT, Op, + DAG.getNode(ISD::SRL, dl, VT, Op, Tmp3)); + } + Op = DAG.getNOT(dl, Op, VT); + return DAG.getNode(ISD::CTPOP, dl, VT, Op); + } + case ISD::CTTZ: { + // for now, we use: { return popcount(~x & (x - 1)); } + // unless the target has ctlz but not ctpop, in which case we use: + // { return 32 - nlz(~x & (x-1)); } + // see also http://www.hackersdelight.org/HDcode/ntz.cc + MVT VT = Op.getValueType(); + SDValue Tmp3 = DAG.getNode(ISD::AND, dl, VT, + DAG.getNOT(dl, Op, VT), + DAG.getNode(ISD::SUB, dl, VT, Op, + DAG.getConstant(1, VT))); + // If ISD::CTLZ is legal and CTPOP isn't, then do that instead. + if (!TLI.isOperationLegalOrCustom(ISD::CTPOP, VT) && + TLI.isOperationLegalOrCustom(ISD::CTLZ, VT)) + return DAG.getNode(ISD::SUB, dl, VT, + DAG.getConstant(VT.getSizeInBits(), VT), + DAG.getNode(ISD::CTLZ, dl, VT, Tmp3)); + return DAG.getNode(ISD::CTPOP, dl, VT, Tmp3); + } + } +} + +void SelectionDAGLegalize::ExpandNode(SDNode *Node, + SmallVectorImpl &Results) { + DebugLoc dl = Node->getDebugLoc(); + SDValue Tmp1, Tmp2, Tmp3, Tmp4; + switch (Node->getOpcode()) { + case ISD::CTPOP: + case ISD::CTLZ: + case ISD::CTTZ: + Tmp1 = ExpandBitCount(Node->getOpcode(), Node->getOperand(0), dl); + Results.push_back(Tmp1); + break; + case ISD::BSWAP: + Results.push_back(ExpandBSWAP(Node->getOperand(0), dl)); + break; + case ISD::FRAMEADDR: + case ISD::RETURNADDR: + case ISD::FRAME_TO_ARGS_OFFSET: + Results.push_back(DAG.getConstant(0, Node->getValueType(0))); + break; + case ISD::FLT_ROUNDS_: + Results.push_back(DAG.getConstant(1, Node->getValueType(0))); + break; + case ISD::EH_RETURN: + case ISD::DECLARE: + case ISD::DBG_LABEL: + case ISD::EH_LABEL: + case ISD::PREFETCH: + case ISD::MEMBARRIER: + case ISD::VAEND: + Results.push_back(Node->getOperand(0)); + break; + case ISD::DBG_STOPPOINT: + Results.push_back(ExpandDBG_STOPPOINT(Node)); + break; + case ISD::DYNAMIC_STACKALLOC: + ExpandDYNAMIC_STACKALLOC(Node, Results); + break; + case ISD::MERGE_VALUES: + for (unsigned i = 0; i < Node->getNumValues(); i++) + Results.push_back(Node->getOperand(i)); + break; + case ISD::UNDEF: { + MVT VT = Node->getValueType(0); + if (VT.isInteger()) + Results.push_back(DAG.getConstant(0, VT)); + else if (VT.isFloatingPoint()) + Results.push_back(DAG.getConstantFP(0, VT)); + else + assert(0 && "Unknown value type!"); + break; + } + case ISD::TRAP: { + // If this operation is not supported, lower it to 'abort()' call + TargetLowering::ArgListTy Args; + std::pair CallResult = + TLI.LowerCallTo(Node->getOperand(0), Type::VoidTy, + false, false, false, false, CallingConv::C, false, + DAG.getExternalSymbol("abort", TLI.getPointerTy()), + Args, DAG, dl); + Results.push_back(CallResult.second); + break; + } + case ISD::FP_ROUND: + case ISD::BIT_CONVERT: + Tmp1 = EmitStackConvert(Node->getOperand(0), Node->getValueType(0), + Node->getValueType(0), dl); + Results.push_back(Tmp1); + break; + case ISD::FP_EXTEND: + Tmp1 = EmitStackConvert(Node->getOperand(0), + Node->getOperand(0).getValueType(), + Node->getValueType(0), dl); + Results.push_back(Tmp1); + break; + case ISD::SIGN_EXTEND_INREG: { + // NOTE: we could fall back on load/store here too for targets without + // SAR. However, it is doubtful that any exist. + MVT ExtraVT = cast(Node->getOperand(1))->getVT(); + unsigned BitsDiff = Node->getValueType(0).getSizeInBits() - + ExtraVT.getSizeInBits(); + SDValue ShiftCst = DAG.getConstant(BitsDiff, TLI.getShiftAmountTy()); + Tmp1 = DAG.getNode(ISD::SHL, dl, Node->getValueType(0), + Node->getOperand(0), ShiftCst); + Tmp1 = DAG.getNode(ISD::SRA, dl, Node->getValueType(0), Tmp1, ShiftCst); + Results.push_back(Tmp1); + break; + } + case ISD::FP_ROUND_INREG: { + // The only way we can lower this is to turn it into a TRUNCSTORE, + // EXTLOAD pair, targetting a temporary location (a stack slot). + + // NOTE: there is a choice here between constantly creating new stack + // slots and always reusing the same one. We currently always create + // new ones, as reuse may inhibit scheduling. + MVT ExtraVT = cast(Node->getOperand(1))->getVT(); + Tmp1 = EmitStackConvert(Node->getOperand(0), ExtraVT, + Node->getValueType(0), dl); + Results.push_back(Tmp1); + break; + } + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: + Tmp1 = ExpandLegalINT_TO_FP(Node->getOpcode() == ISD::SINT_TO_FP, + Node->getOperand(0), Node->getValueType(0), dl); + Results.push_back(Tmp1); + break; + case ISD::FP_TO_UINT: { + SDValue True, False; + MVT VT = Node->getOperand(0).getValueType(); + MVT NVT = Node->getValueType(0); + const uint64_t zero[] = {0, 0}; + APFloat apf = APFloat(APInt(VT.getSizeInBits(), 2, zero)); + APInt x = APInt::getSignBit(NVT.getSizeInBits()); + (void)apf.convertFromAPInt(x, false, APFloat::rmNearestTiesToEven); + Tmp1 = DAG.getConstantFP(apf, VT); + Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(VT), + Node->getOperand(0), + Tmp1, ISD::SETLT); + True = DAG.getNode(ISD::FP_TO_SINT, dl, NVT, Node->getOperand(0)); + False = DAG.getNode(ISD::FP_TO_SINT, dl, NVT, + DAG.getNode(ISD::FSUB, dl, VT, + Node->getOperand(0), Tmp1)); + False = DAG.getNode(ISD::XOR, dl, NVT, False, + DAG.getConstant(x, NVT)); + Tmp1 = DAG.getNode(ISD::SELECT, dl, NVT, Tmp2, True, False); + Results.push_back(Tmp1); + break; + } + case ISD::VAARG: { + const Value *V = cast(Node->getOperand(2))->getValue(); + MVT VT = Node->getValueType(0); + Tmp1 = Node->getOperand(0); + Tmp2 = Node->getOperand(1); + SDValue VAList = DAG.getLoad(TLI.getPointerTy(), dl, Tmp1, Tmp2, V, 0); + // Increment the pointer, VAList, to the next vaarg + Tmp3 = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), VAList, + DAG.getConstant(TLI.getTargetData()-> + getTypeAllocSize(VT.getTypeForMVT()), + TLI.getPointerTy())); + // Store the incremented VAList to the legalized pointer + Tmp3 = DAG.getStore(VAList.getValue(1), dl, Tmp3, Tmp2, V, 0); + // Load the actual argument out of the pointer VAList + Results.push_back(DAG.getLoad(VT, dl, Tmp3, VAList, NULL, 0)); + Results.push_back(Results[0].getValue(1)); + break; + } + case ISD::VACOPY: { + // This defaults to loading a pointer from the input and storing it to the + // output, returning the chain. + const Value *VD = cast(Node->getOperand(3))->getValue(); + const Value *VS = cast(Node->getOperand(4))->getValue(); + Tmp1 = DAG.getLoad(TLI.getPointerTy(), dl, Node->getOperand(0), + Node->getOperand(2), VS, 0); + Tmp1 = DAG.getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1), VD, 0); + Results.push_back(Tmp1); + break; + } + case ISD::EXTRACT_VECTOR_ELT: + if (Node->getOperand(0).getValueType().getVectorNumElements() == 1) + // This must be an access of the only element. Return it. + Tmp1 = DAG.getNode(ISD::BIT_CONVERT, dl, Node->getValueType(0), + Node->getOperand(0)); + else + Tmp1 = ExpandExtractFromVectorThroughStack(SDValue(Node, 0)); + Results.push_back(Tmp1); + break; + case ISD::EXTRACT_SUBVECTOR: + Results.push_back(ExpandExtractFromVectorThroughStack(SDValue(Node, 0))); + break; + case ISD::CONCAT_VECTORS: { + // Use extract/insert/build vector for now. We might try to be + // more clever later. + SmallVector Ops; + unsigned NumOperands = Node->getNumOperands(); + for (unsigned i=0; i < NumOperands; ++i) { + SDValue SubOp = Node->getOperand(i); + MVT VVT = SubOp.getNode()->getValueType(0); + MVT EltVT = VVT.getVectorElementType(); + unsigned NumSubElem = VVT.getVectorNumElements(); + for (unsigned j=0; j < NumSubElem; ++j) { + Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, SubOp, + DAG.getIntPtrConstant(j))); + } + } + Tmp1 = DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), + &Ops[0], Ops.size()); + Results.push_back(Tmp1); + break; + } + case ISD::SCALAR_TO_VECTOR: + Results.push_back(ExpandSCALAR_TO_VECTOR(Node)); + break; + case ISD::INSERT_VECTOR_ELT: + Results.push_back(ExpandINSERT_VECTOR_ELT(Node->getOperand(0), + Node->getOperand(1), + Node->getOperand(2), dl)); + break; + case ISD::VECTOR_SHUFFLE: { + SmallVector Mask; + cast(Node)->getMask(Mask); + + MVT VT = Node->getValueType(0); + MVT EltVT = VT.getVectorElementType(); + unsigned NumElems = VT.getVectorNumElements(); + SmallVector Ops; + for (unsigned i = 0; i != NumElems; ++i) { + if (Mask[i] < 0) { + Ops.push_back(DAG.getUNDEF(EltVT)); + continue; + } + unsigned Idx = Mask[i]; + if (Idx < NumElems) + Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, + Node->getOperand(0), + DAG.getIntPtrConstant(Idx))); + else + Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, + Node->getOperand(1), + DAG.getIntPtrConstant(Idx - NumElems))); + } + Tmp1 = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size()); + Results.push_back(Tmp1); + break; + } + case ISD::EXTRACT_ELEMENT: { + MVT OpTy = Node->getOperand(0).getValueType(); + if (cast(Node->getOperand(1))->getZExtValue()) { + // 1 -> Hi + Tmp1 = DAG.getNode(ISD::SRL, dl, OpTy, Node->getOperand(0), + DAG.getConstant(OpTy.getSizeInBits()/2, + TLI.getShiftAmountTy())); + Tmp1 = DAG.getNode(ISD::TRUNCATE, dl, Node->getValueType(0), Tmp1); + } else { + // 0 -> Lo + Tmp1 = DAG.getNode(ISD::TRUNCATE, dl, Node->getValueType(0), + Node->getOperand(0)); + } + Results.push_back(Tmp1); + break; + } + case ISD::STACKSAVE: + // Expand to CopyFromReg if the target set + // StackPointerRegisterToSaveRestore. + if (unsigned SP = TLI.getStackPointerRegisterToSaveRestore()) { + Results.push_back(DAG.getCopyFromReg(Node->getOperand(0), dl, SP, + Node->getValueType(0))); + Results.push_back(Results[0].getValue(1)); + } else { + Results.push_back(DAG.getUNDEF(Node->getValueType(0))); + Results.push_back(Node->getOperand(0)); + } + break; + case ISD::STACKRESTORE: + // Expand to CopyToReg if the target set + // StackPointerRegisterToSaveRestore. + if (unsigned SP = TLI.getStackPointerRegisterToSaveRestore()) { + Results.push_back(DAG.getCopyToReg(Node->getOperand(0), dl, SP, + Node->getOperand(1))); + } else { + Results.push_back(Node->getOperand(0)); + } + break; + case ISD::FCOPYSIGN: + Results.push_back(ExpandFCOPYSIGN(Node)); + break; + case ISD::FNEG: + // Expand Y = FNEG(X) -> Y = SUB -0.0, X + Tmp1 = DAG.getConstantFP(-0.0, Node->getValueType(0)); + Tmp1 = DAG.getNode(ISD::FSUB, dl, Node->getValueType(0), Tmp1, + Node->getOperand(0)); + Results.push_back(Tmp1); + break; + case ISD::FABS: { + // Expand Y = FABS(X) -> Y = (X >u 0.0) ? X : fneg(X). + MVT VT = Node->getValueType(0); + Tmp1 = Node->getOperand(0); + Tmp2 = DAG.getConstantFP(0.0, VT); + Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(Tmp1.getValueType()), + Tmp1, Tmp2, ISD::SETUGT); + Tmp3 = DAG.getNode(ISD::FNEG, dl, VT, Tmp1); + Tmp1 = DAG.getNode(ISD::SELECT, dl, VT, Tmp2, Tmp1, Tmp3); + Results.push_back(Tmp1); + break; + } + case ISD::FSQRT: + Results.push_back(ExpandFPLibCall(Node, RTLIB::SQRT_F32, RTLIB::SQRT_F64, + RTLIB::SQRT_F80, RTLIB::SQRT_PPCF128)); + break; + case ISD::FSIN: + Results.push_back(ExpandFPLibCall(Node, RTLIB::SIN_F32, RTLIB::SIN_F64, + RTLIB::SIN_F80, RTLIB::SIN_PPCF128)); + break; + case ISD::FCOS: + Results.push_back(ExpandFPLibCall(Node, RTLIB::COS_F32, RTLIB::COS_F64, + RTLIB::COS_F80, RTLIB::COS_PPCF128)); + break; + case ISD::FLOG: + Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_F32, RTLIB::LOG_F64, + RTLIB::LOG_F80, RTLIB::LOG_PPCF128)); + break; + case ISD::FLOG2: + Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG2_F32, RTLIB::LOG2_F64, + RTLIB::LOG2_F80, RTLIB::LOG2_PPCF128)); + break; + case ISD::FLOG10: + Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG10_F32, RTLIB::LOG10_F64, + RTLIB::LOG10_F80, RTLIB::LOG10_PPCF128)); + break; + case ISD::FEXP: + Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP_F32, RTLIB::EXP_F64, + RTLIB::EXP_F80, RTLIB::EXP_PPCF128)); + break; + case ISD::FEXP2: + Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP2_F32, RTLIB::EXP2_F64, + RTLIB::EXP2_F80, RTLIB::EXP2_PPCF128)); + break; + case ISD::FTRUNC: + Results.push_back(ExpandFPLibCall(Node, RTLIB::TRUNC_F32, RTLIB::TRUNC_F64, + RTLIB::TRUNC_F80, RTLIB::TRUNC_PPCF128)); + break; + case ISD::FFLOOR: + Results.push_back(ExpandFPLibCall(Node, RTLIB::FLOOR_F32, RTLIB::FLOOR_F64, + RTLIB::FLOOR_F80, RTLIB::FLOOR_PPCF128)); + break; + case ISD::FCEIL: + Results.push_back(ExpandFPLibCall(Node, RTLIB::CEIL_F32, RTLIB::CEIL_F64, + RTLIB::CEIL_F80, RTLIB::CEIL_PPCF128)); + break; + case ISD::FRINT: + Results.push_back(ExpandFPLibCall(Node, RTLIB::RINT_F32, RTLIB::RINT_F64, + RTLIB::RINT_F80, RTLIB::RINT_PPCF128)); + break; + case ISD::FNEARBYINT: + Results.push_back(ExpandFPLibCall(Node, RTLIB::NEARBYINT_F32, + RTLIB::NEARBYINT_F64, + RTLIB::NEARBYINT_F80, + RTLIB::NEARBYINT_PPCF128)); + break; + case ISD::FPOWI: + Results.push_back(ExpandFPLibCall(Node, RTLIB::POWI_F32, RTLIB::POWI_F64, + RTLIB::POWI_F80, RTLIB::POWI_PPCF128)); + break; + case ISD::FPOW: + Results.push_back(ExpandFPLibCall(Node, RTLIB::POW_F32, RTLIB::POW_F64, + RTLIB::POW_F80, RTLIB::POW_PPCF128)); + break; + case ISD::FDIV: + Results.push_back(ExpandFPLibCall(Node, RTLIB::DIV_F32, RTLIB::DIV_F64, + RTLIB::DIV_F80, RTLIB::DIV_PPCF128)); + break; + case ISD::FREM: + Results.push_back(ExpandFPLibCall(Node, RTLIB::REM_F32, RTLIB::REM_F64, + RTLIB::REM_F80, RTLIB::REM_PPCF128)); + break; + case ISD::ConstantFP: { + ConstantFPSDNode *CFP = cast(Node); + // Check to see if this FP immediate is already legal. + bool isLegal = false; + for (TargetLowering::legal_fpimm_iterator I = TLI.legal_fpimm_begin(), + E = TLI.legal_fpimm_end(); I != E; ++I) { + if (CFP->isExactlyValue(*I)) { + isLegal = true; + break; + } + } + // If this is a legal constant, turn it into a TargetConstantFP node. + if (isLegal) + Results.push_back(SDValue(Node, 0)); + else + Results.push_back(ExpandConstantFP(CFP, true, DAG, TLI)); + break; + } + case ISD::EHSELECTION: { + unsigned Reg = TLI.getExceptionSelectorRegister(); + assert(Reg && "Can't expand to unknown register!"); + Results.push_back(DAG.getCopyFromReg(Node->getOperand(1), dl, Reg, + Node->getValueType(0))); + Results.push_back(Results[0].getValue(1)); + break; + } + case ISD::EXCEPTIONADDR: { + unsigned Reg = TLI.getExceptionAddressRegister(); + assert(Reg && "Can't expand to unknown register!"); + Results.push_back(DAG.getCopyFromReg(Node->getOperand(0), dl, Reg, + Node->getValueType(0))); + Results.push_back(Results[0].getValue(1)); + break; + } + case ISD::SUB: { + MVT VT = Node->getValueType(0); + assert(TLI.isOperationLegalOrCustom(ISD::ADD, VT) && + TLI.isOperationLegalOrCustom(ISD::XOR, VT) && + "Don't know how to expand this subtraction!"); + Tmp1 = DAG.getNode(ISD::XOR, dl, VT, Node->getOperand(1), + DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT)); + Tmp1 = DAG.getNode(ISD::ADD, dl, VT, Tmp2, DAG.getConstant(1, VT)); + Results.push_back(DAG.getNode(ISD::ADD, dl, VT, Node->getOperand(0), Tmp1)); + break; + } + case ISD::UREM: + case ISD::SREM: { + MVT VT = Node->getValueType(0); + SDVTList VTs = DAG.getVTList(VT, VT); + bool isSigned = Node->getOpcode() == ISD::SREM; + unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV; + unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM; + Tmp2 = Node->getOperand(0); + Tmp3 = Node->getOperand(1); + if (TLI.isOperationLegalOrCustom(DivRemOpc, VT)) { + Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Tmp2, Tmp3).getValue(1); + } else if (TLI.isOperationLegalOrCustom(DivOpc, VT)) { + // X % Y -> X-X/Y*Y + Tmp1 = DAG.getNode(DivOpc, dl, VT, Tmp2, Tmp3); + Tmp1 = DAG.getNode(ISD::MUL, dl, VT, Tmp1, Tmp3); + Tmp1 = DAG.getNode(ISD::SUB, dl, VT, Tmp2, Tmp1); + } else if (isSigned) { + Tmp1 = ExpandIntLibCall(Node, true, RTLIB::SREM_I16, RTLIB::SREM_I32, + RTLIB::SREM_I64, RTLIB::SREM_I128); + } else { + Tmp1 = ExpandIntLibCall(Node, false, RTLIB::UREM_I16, RTLIB::UREM_I32, + RTLIB::UREM_I64, RTLIB::UREM_I128); + } + Results.push_back(Tmp1); + break; + } + case ISD::UDIV: + case ISD::SDIV: { + bool isSigned = Node->getOpcode() == ISD::SDIV; + unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM; + MVT VT = Node->getValueType(0); + SDVTList VTs = DAG.getVTList(VT, VT); + if (TLI.isOperationLegalOrCustom(DivRemOpc, VT)) + Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Node->getOperand(0), + Node->getOperand(1)); + else if (isSigned) + Tmp1 = ExpandIntLibCall(Node, true, RTLIB::SDIV_I16, RTLIB::SDIV_I32, + RTLIB::SDIV_I64, RTLIB::SDIV_I128); + else + Tmp1 = ExpandIntLibCall(Node, false, RTLIB::UDIV_I16, RTLIB::UDIV_I32, + RTLIB::UDIV_I64, RTLIB::UDIV_I128); + Results.push_back(Tmp1); + break; + } + case ISD::MULHU: + case ISD::MULHS: { + unsigned ExpandOpcode = Node->getOpcode() == ISD::MULHU ? ISD::UMUL_LOHI : + ISD::SMUL_LOHI; + MVT VT = Node->getValueType(0); + SDVTList VTs = DAG.getVTList(VT, VT); + assert(TLI.isOperationLegalOrCustom(ExpandOpcode, VT) && + "If this wasn't legal, it shouldn't have been created!"); + Tmp1 = DAG.getNode(ExpandOpcode, dl, VTs, Node->getOperand(0), + Node->getOperand(1)); + Results.push_back(Tmp1.getValue(1)); + break; + } + case ISD::MUL: { + MVT VT = Node->getValueType(0); + SDVTList VTs = DAG.getVTList(VT, VT); + // See if multiply or divide can be lowered using two-result operations. + // We just need the low half of the multiply; try both the signed + // and unsigned forms. If the target supports both SMUL_LOHI and + // UMUL_LOHI, form a preference by checking which forms of plain + // MULH it supports. + bool HasSMUL_LOHI = TLI.isOperationLegalOrCustom(ISD::SMUL_LOHI, VT); + bool HasUMUL_LOHI = TLI.isOperationLegalOrCustom(ISD::UMUL_LOHI, VT); + bool HasMULHS = TLI.isOperationLegalOrCustom(ISD::MULHS, VT); + bool HasMULHU = TLI.isOperationLegalOrCustom(ISD::MULHU, VT); + unsigned OpToUse = 0; + if (HasSMUL_LOHI && !HasMULHS) { + OpToUse = ISD::SMUL_LOHI; + } else if (HasUMUL_LOHI && !HasMULHU) { + OpToUse = ISD::UMUL_LOHI; + } else if (HasSMUL_LOHI) { + OpToUse = ISD::SMUL_LOHI; + } else if (HasUMUL_LOHI) { + OpToUse = ISD::UMUL_LOHI; + } + if (OpToUse) { + Results.push_back(DAG.getNode(OpToUse, dl, VTs, Node->getOperand(0), + Node->getOperand(1))); + break; + } + Tmp1 = ExpandIntLibCall(Node, false, RTLIB::MUL_I16, RTLIB::MUL_I32, + RTLIB::MUL_I64, RTLIB::MUL_I128); + Results.push_back(Tmp1); + break; + } + case ISD::SADDO: + case ISD::SSUBO: { + SDValue LHS = Node->getOperand(0); + SDValue RHS = Node->getOperand(1); + SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::SADDO ? + ISD::ADD : ISD::SUB, dl, LHS.getValueType(), + LHS, RHS); + Results.push_back(Sum); + MVT OType = Node->getValueType(1); + + SDValue Zero = DAG.getConstant(0, LHS.getValueType()); + + // LHSSign -> LHS >= 0 + // RHSSign -> RHS >= 0 + // SumSign -> Sum >= 0 + // + // Add: + // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign) + // Sub: + // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign) + // + SDValue LHSSign = DAG.getSetCC(dl, OType, LHS, Zero, ISD::SETGE); + SDValue RHSSign = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETGE); + SDValue SignsMatch = DAG.getSetCC(dl, OType, LHSSign, RHSSign, + Node->getOpcode() == ISD::SADDO ? + ISD::SETEQ : ISD::SETNE); + + SDValue SumSign = DAG.getSetCC(dl, OType, Sum, Zero, ISD::SETGE); + SDValue SumSignNE = DAG.getSetCC(dl, OType, LHSSign, SumSign, ISD::SETNE); + + SDValue Cmp = DAG.getNode(ISD::AND, dl, OType, SignsMatch, SumSignNE); + Results.push_back(Cmp); + break; + } + case ISD::UADDO: + case ISD::USUBO: { + SDValue LHS = Node->getOperand(0); + SDValue RHS = Node->getOperand(1); + SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::UADDO ? + ISD::ADD : ISD::SUB, dl, LHS.getValueType(), + LHS, RHS); + Results.push_back(Sum); + Results.push_back(DAG.getSetCC(dl, Node->getValueType(1), Sum, LHS, + Node->getOpcode () == ISD::UADDO ? + ISD::SETULT : ISD::SETUGT)); + break; + } + case ISD::BUILD_PAIR: { + MVT PairTy = Node->getValueType(0); + Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, PairTy, Node->getOperand(0)); + Tmp2 = DAG.getNode(ISD::ANY_EXTEND, dl, PairTy, Node->getOperand(1)); + Tmp2 = DAG.getNode(ISD::SHL, dl, PairTy, Tmp2, + DAG.getConstant(PairTy.getSizeInBits()/2, + TLI.getShiftAmountTy())); + Results.push_back(DAG.getNode(ISD::OR, dl, PairTy, Tmp1, Tmp2)); + break; + } + case ISD::SELECT: + Tmp1 = Node->getOperand(0); + Tmp2 = Node->getOperand(1); + Tmp3 = Node->getOperand(2); + if (Tmp1.getOpcode() == ISD::SETCC) { + Tmp1 = DAG.getSelectCC(dl, Tmp1.getOperand(0), Tmp1.getOperand(1), + Tmp2, Tmp3, + cast(Tmp1.getOperand(2))->get()); + } else { + Tmp1 = DAG.getSelectCC(dl, Tmp1, + DAG.getConstant(0, Tmp1.getValueType()), + Tmp2, Tmp3, ISD::SETNE); + } + Results.push_back(Tmp1); + break; + case ISD::BR_JT: { + SDValue Chain = Node->getOperand(0); + SDValue Table = Node->getOperand(1); + SDValue Index = Node->getOperand(2); + + MVT PTy = TLI.getPointerTy(); + MachineFunction &MF = DAG.getMachineFunction(); + unsigned EntrySize = MF.getJumpTableInfo()->getEntrySize(); + Index= DAG.getNode(ISD::MUL, dl, PTy, + Index, DAG.getConstant(EntrySize, PTy)); + SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table); + + MVT MemVT = MVT::getIntegerVT(EntrySize * 8); + SDValue LD = DAG.getExtLoad(ISD::SEXTLOAD, dl, PTy, Chain, Addr, + PseudoSourceValue::getJumpTable(), 0, MemVT); + Addr = LD; + if (TLI.getTargetMachine().getRelocationModel() == Reloc::PIC_) { + // For PIC, the sequence is: + // BRIND(load(Jumptable + index) + RelocBase) + // RelocBase can be JumpTable, GOT or some sort of global base. + Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, + TLI.getPICJumpTableRelocBase(Table, DAG)); + } + Tmp1 = DAG.getNode(ISD::BRIND, dl, MVT::Other, LD.getValue(1), Addr); + Results.push_back(Tmp1); + break; + } + case ISD::BRCOND: + // Expand brcond's setcc into its constituent parts and create a BR_CC + // Node. + Tmp1 = Node->getOperand(0); + Tmp2 = Node->getOperand(1); + if (Tmp2.getOpcode() == ISD::SETCC) { + Tmp1 = DAG.getNode(ISD::BR_CC, dl, MVT::Other, + Tmp1, Tmp2.getOperand(2), + Tmp2.getOperand(0), Tmp2.getOperand(1), + Node->getOperand(2)); + } else { + Tmp1 = DAG.getNode(ISD::BR_CC, dl, MVT::Other, Tmp1, + DAG.getCondCode(ISD::SETNE), Tmp2, + DAG.getConstant(0, Tmp2.getValueType()), + Node->getOperand(2)); + } + Results.push_back(Tmp1); + break; + case ISD::SETCC: { + Tmp1 = Node->getOperand(0); + Tmp2 = Node->getOperand(1); + Tmp3 = Node->getOperand(2); + LegalizeSetCCCondCode(Node->getValueType(0), Tmp1, Tmp2, Tmp3, dl); + + // If we expanded the SETCC into an AND/OR, return the new node + if (Tmp2.getNode() == 0) { + Results.push_back(Tmp1); + break; + } + + // Otherwise, SETCC for the given comparison type must be completely + // illegal; expand it into a SELECT_CC. + MVT VT = Node->getValueType(0); + Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, VT, Tmp1, Tmp2, + DAG.getConstant(1, VT), DAG.getConstant(0, VT), Tmp3); + Results.push_back(Tmp1); + break; + } + case ISD::SELECT_CC: { + Tmp1 = Node->getOperand(0); // LHS + Tmp2 = Node->getOperand(1); // RHS + Tmp3 = Node->getOperand(2); // True + Tmp4 = Node->getOperand(3); // False + SDValue CC = Node->getOperand(4); + + LegalizeSetCCCondCode(TLI.getSetCCResultType(Tmp1.getValueType()), + Tmp1, Tmp2, CC, dl); + + assert(!Tmp2.getNode() && "Can't legalize SELECT_CC with legal condition!"); + Tmp2 = DAG.getConstant(0, Tmp1.getValueType()); + CC = DAG.getCondCode(ISD::SETNE); + Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0), Tmp1, Tmp2, + Tmp3, Tmp4, CC); + Results.push_back(Tmp1); + break; + } + case ISD::BR_CC: { + Tmp1 = Node->getOperand(0); // Chain + Tmp2 = Node->getOperand(2); // LHS + Tmp3 = Node->getOperand(3); // RHS + Tmp4 = Node->getOperand(1); // CC + + LegalizeSetCCCondCode(TLI.getSetCCResultType(Tmp2.getValueType()), + Tmp2, Tmp3, Tmp4, dl); + LastCALLSEQ_END = DAG.getEntryNode(); + + assert(!Tmp3.getNode() && "Can't legalize BR_CC with legal condition!"); + Tmp3 = DAG.getConstant(0, Tmp2.getValueType()); + Tmp4 = DAG.getCondCode(ISD::SETNE); + Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1, Tmp4, Tmp2, + Tmp3, Node->getOperand(4)); + Results.push_back(Tmp1); + break; + } + case ISD::GLOBAL_OFFSET_TABLE: + case ISD::GlobalAddress: + case ISD::GlobalTLSAddress: + case ISD::ExternalSymbol: + case ISD::ConstantPool: + case ISD::JumpTable: + case ISD::INTRINSIC_W_CHAIN: + case ISD::INTRINSIC_WO_CHAIN: + case ISD::INTRINSIC_VOID: + // FIXME: Custom lowering for these operations shouldn't return null! + for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) + Results.push_back(SDValue(Node, i)); + break; + } +} +void SelectionDAGLegalize::PromoteNode(SDNode *Node, + SmallVectorImpl &Results) { + MVT OVT = Node->getValueType(0); + if (Node->getOpcode() == ISD::UINT_TO_FP || + Node->getOpcode() == ISD::SINT_TO_FP) { + OVT = Node->getOperand(0).getValueType(); + } + MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OVT); + DebugLoc dl = Node->getDebugLoc(); + SDValue Tmp1, Tmp2, Tmp3; + switch (Node->getOpcode()) { + case ISD::CTTZ: + case ISD::CTLZ: + case ISD::CTPOP: + // Zero extend the argument. + Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0)); + // Perform the larger operation. + Tmp1 = DAG.getNode(Node->getOpcode(), dl, Node->getValueType(0), Tmp1); + if (Node->getOpcode() == ISD::CTTZ) { + //if Tmp1 == sizeinbits(NVT) then Tmp1 = sizeinbits(Old VT) + Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(Tmp1.getValueType()), + Tmp1, DAG.getConstant(NVT.getSizeInBits(), NVT), + ISD::SETEQ); + Tmp1 = DAG.getNode(ISD::SELECT, dl, NVT, Tmp2, + DAG.getConstant(OVT.getSizeInBits(), NVT), Tmp1); + } else if (Node->getOpcode() == ISD::CTLZ) { + // Tmp1 = Tmp1 - (sizeinbits(NVT) - sizeinbits(Old VT)) + Tmp1 = DAG.getNode(ISD::SUB, dl, NVT, Tmp1, + DAG.getConstant(NVT.getSizeInBits() - + OVT.getSizeInBits(), NVT)); + } + Results.push_back(Tmp1); + break; + case ISD::BSWAP: { + unsigned DiffBits = NVT.getSizeInBits() - OVT.getSizeInBits(); + Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Tmp1); + Tmp1 = DAG.getNode(ISD::BSWAP, dl, NVT, Tmp1); + Tmp1 = DAG.getNode(ISD::SRL, dl, NVT, Tmp1, + DAG.getConstant(DiffBits, TLI.getShiftAmountTy())); + Results.push_back(Tmp1); + break; + } + case ISD::FP_TO_UINT: + case ISD::FP_TO_SINT: + Tmp1 = PromoteLegalFP_TO_INT(Node->getOperand(0), Node->getValueType(0), + Node->getOpcode() == ISD::FP_TO_SINT, dl); + Results.push_back(Tmp1); + break; + case ISD::UINT_TO_FP: + case ISD::SINT_TO_FP: + Tmp1 = PromoteLegalINT_TO_FP(Node->getOperand(0), Node->getValueType(0), + Node->getOpcode() == ISD::SINT_TO_FP, dl); + Results.push_back(Tmp1); + break; + case ISD::AND: + case ISD::OR: + case ISD::XOR: + assert(OVT.isVector() && "Don't know how to promote scalar logic ops"); + // Bit convert each of the values to the new type. + Tmp1 = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, Node->getOperand(0)); + Tmp2 = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, Node->getOperand(1)); + Tmp1 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2); + // Bit convert the result back the original type. + Results.push_back(DAG.getNode(ISD::BIT_CONVERT, dl, OVT, Tmp1)); + break; + case ISD::SELECT: + unsigned ExtOp, TruncOp; + if (Node->getValueType(0).isVector()) { + ExtOp = ISD::BIT_CONVERT; + TruncOp = ISD::BIT_CONVERT; + } else if (Node->getValueType(0).isInteger()) { + ExtOp = ISD::ANY_EXTEND; + TruncOp = ISD::TRUNCATE; + } else { + ExtOp = ISD::FP_EXTEND; + TruncOp = ISD::FP_ROUND; + } + Tmp1 = Node->getOperand(0); + // Promote each of the values to the new type. + Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1)); + Tmp3 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(2)); + // Perform the larger operation, then round down. + Tmp1 = DAG.getNode(ISD::SELECT, dl, NVT, Tmp1, Tmp2, Tmp3); + if (TruncOp != ISD::FP_ROUND) + Tmp1 = DAG.getNode(TruncOp, dl, Node->getValueType(0), Tmp1); + else + Tmp1 = DAG.getNode(TruncOp, dl, Node->getValueType(0), Tmp1, + DAG.getIntPtrConstant(0)); + Results.push_back(Tmp1); + break; + case ISD::VECTOR_SHUFFLE: { + SmallVector Mask; + cast(Node)->getMask(Mask); + + // Cast the two input vectors. + Tmp1 = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, Node->getOperand(0)); + Tmp2 = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, Node->getOperand(1)); + + // Convert the shuffle mask to the right # elements. + Tmp1 = ShuffleWithNarrowerEltType(NVT, OVT, dl, Tmp1, Tmp2, Mask); + Tmp1 = DAG.getNode(ISD::BIT_CONVERT, dl, OVT, Tmp1); + Results.push_back(Tmp1); + break; + } + case ISD::SETCC: { + // First step, figure out the appropriate operation to use. + // Allow SETCC to not be supported for all legal data types + // Mostly this targets FP + MVT NewInTy = Node->getOperand(0).getValueType(); + MVT OldVT = NewInTy; OldVT = OldVT; + + // Scan for the appropriate larger type to use. + while (1) { + NewInTy = (MVT::SimpleValueType)(NewInTy.getSimpleVT()+1); + + assert(NewInTy.isInteger() == OldVT.isInteger() && + "Fell off of the edge of the integer world"); + assert(NewInTy.isFloatingPoint() == OldVT.isFloatingPoint() && + "Fell off of the edge of the floating point world"); + + // If the target supports SETCC of this type, use it. + if (TLI.isOperationLegalOrCustom(ISD::SETCC, NewInTy)) + break; + } + if (NewInTy.isInteger()) + assert(0 && "Cannot promote Legal Integer SETCC yet"); + else { + Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NewInTy, Tmp1); + Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NewInTy, Tmp2); + } + Results.push_back(DAG.getNode(ISD::SETCC, dl, Node->getValueType(0), + Tmp1, Tmp2, Node->getOperand(2))); + break; + } + } +} + +// SelectionDAG::Legalize - This is the entry point for the file. +// +void SelectionDAG::Legalize(bool TypesNeedLegalizing, + CodeGenOpt::Level OptLevel) { + /// run - This is the main entry point to this class. + /// + SelectionDAGLegalize(*this, OptLevel).LegalizeDAG(); +} + diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp new file mode 100644 index 000000000000..c3c1beabd5f0 --- /dev/null +++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -0,0 +1,1388 @@ +//===-------- LegalizeFloatTypes.cpp - Legalization of float types --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements float type expansion and softening for LegalizeTypes. +// Softening is the act of turning a computation in an illegal floating point +// type into a computation in an integer type of the same size; also known as +// "soft float". For example, turning f32 arithmetic into operations using i32. +// The resulting integer value is the same as what you would get by performing +// the floating point operation and bitcasting the result to the integer type. +// Expansion is the act of changing a computation in an illegal type to be a +// computation in two identical registers of a smaller type. For example, +// implementing ppcf128 arithmetic in two f64 registers. +// +//===----------------------------------------------------------------------===// + +#include "LegalizeTypes.h" +using namespace llvm; + +/// GetFPLibCall - Return the right libcall for the given floating point type. +static RTLIB::Libcall GetFPLibCall(MVT VT, + RTLIB::Libcall Call_F32, + RTLIB::Libcall Call_F64, + RTLIB::Libcall Call_F80, + RTLIB::Libcall Call_PPCF128) { + return + VT == MVT::f32 ? Call_F32 : + VT == MVT::f64 ? Call_F64 : + VT == MVT::f80 ? Call_F80 : + VT == MVT::ppcf128 ? Call_PPCF128 : + RTLIB::UNKNOWN_LIBCALL; +} + +//===----------------------------------------------------------------------===// +// Result Float to Integer Conversion. +//===----------------------------------------------------------------------===// + +void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { + DEBUG(cerr << "Soften float result " << ResNo << ": "; N->dump(&DAG); + cerr << "\n"); + SDValue R = SDValue(); + + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + cerr << "SoftenFloatResult #" << ResNo << ": "; + N->dump(&DAG); cerr << "\n"; +#endif + assert(0 && "Do not know how to soften the result of this operator!"); + abort(); + + case ISD::BIT_CONVERT: R = SoftenFloatRes_BIT_CONVERT(N); break; + case ISD::BUILD_PAIR: R = SoftenFloatRes_BUILD_PAIR(N); break; + case ISD::ConstantFP: + R = SoftenFloatRes_ConstantFP(cast(N)); + break; + case ISD::EXTRACT_VECTOR_ELT: + R = SoftenFloatRes_EXTRACT_VECTOR_ELT(N); break; + case ISD::FABS: R = SoftenFloatRes_FABS(N); break; + case ISD::FADD: R = SoftenFloatRes_FADD(N); break; + case ISD::FCEIL: R = SoftenFloatRes_FCEIL(N); break; + case ISD::FCOPYSIGN: R = SoftenFloatRes_FCOPYSIGN(N); break; + case ISD::FCOS: R = SoftenFloatRes_FCOS(N); break; + case ISD::FDIV: R = SoftenFloatRes_FDIV(N); break; + case ISD::FEXP: R = SoftenFloatRes_FEXP(N); break; + case ISD::FEXP2: R = SoftenFloatRes_FEXP2(N); break; + case ISD::FFLOOR: R = SoftenFloatRes_FFLOOR(N); break; + case ISD::FLOG: R = SoftenFloatRes_FLOG(N); break; + case ISD::FLOG2: R = SoftenFloatRes_FLOG2(N); break; + case ISD::FLOG10: R = SoftenFloatRes_FLOG10(N); break; + case ISD::FMUL: R = SoftenFloatRes_FMUL(N); break; + case ISD::FNEARBYINT: R = SoftenFloatRes_FNEARBYINT(N); break; + case ISD::FNEG: R = SoftenFloatRes_FNEG(N); break; + case ISD::FP_EXTEND: R = SoftenFloatRes_FP_EXTEND(N); break; + case ISD::FP_ROUND: R = SoftenFloatRes_FP_ROUND(N); break; + case ISD::FPOW: R = SoftenFloatRes_FPOW(N); break; + case ISD::FPOWI: R = SoftenFloatRes_FPOWI(N); break; + case ISD::FREM: R = SoftenFloatRes_FREM(N); break; + case ISD::FRINT: R = SoftenFloatRes_FRINT(N); break; + case ISD::FSIN: R = SoftenFloatRes_FSIN(N); break; + case ISD::FSQRT: R = SoftenFloatRes_FSQRT(N); break; + case ISD::FSUB: R = SoftenFloatRes_FSUB(N); break; + case ISD::FTRUNC: R = SoftenFloatRes_FTRUNC(N); break; + case ISD::LOAD: R = SoftenFloatRes_LOAD(N); break; + case ISD::SELECT: R = SoftenFloatRes_SELECT(N); break; + case ISD::SELECT_CC: R = SoftenFloatRes_SELECT_CC(N); break; + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: R = SoftenFloatRes_XINT_TO_FP(N); break; + case ISD::UNDEF: R = SoftenFloatRes_UNDEF(N); break; + case ISD::VAARG: R = SoftenFloatRes_VAARG(N); break; + } + + // If R is null, the sub-method took care of registering the result. + if (R.getNode()) + SetSoftenedFloat(SDValue(N, ResNo), R); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_BIT_CONVERT(SDNode *N) { + return BitConvertToInteger(N->getOperand(0)); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_BUILD_PAIR(SDNode *N) { + // Convert the inputs to integers, and build a new pair out of them. + return DAG.getNode(ISD::BUILD_PAIR, N->getDebugLoc(), + TLI.getTypeToTransformTo(N->getValueType(0)), + BitConvertToInteger(N->getOperand(0)), + BitConvertToInteger(N->getOperand(1))); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(ConstantFPSDNode *N) { + return DAG.getConstant(N->getValueAPF().bitcastToAPInt(), + TLI.getTypeToTransformTo(N->getValueType(0))); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N) { + SDValue NewOp = BitConvertVectorToIntegerVector(N->getOperand(0)); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(), + NewOp.getValueType().getVectorElementType(), + NewOp, N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + unsigned Size = NVT.getSizeInBits(); + + // Mask = ~(1 << (Size-1)) + SDValue Mask = DAG.getConstant(APInt::getAllOnesValue(Size).clear(Size-1), + NVT); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return DAG.getNode(ISD::AND, N->getDebugLoc(), NVT, Op, Mask); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), + GetSoftenedFloat(N->getOperand(1)) }; + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::ADD_F32, + RTLIB::ADD_F64, + RTLIB::ADD_F80, + RTLIB::ADD_PPCF128), + NVT, Ops, 2, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FCEIL(SDNode *N) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::CEIL_F32, + RTLIB::CEIL_F64, + RTLIB::CEIL_F80, + RTLIB::CEIL_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) { + SDValue LHS = GetSoftenedFloat(N->getOperand(0)); + SDValue RHS = BitConvertToInteger(N->getOperand(1)); + DebugLoc dl = N->getDebugLoc(); + + MVT LVT = LHS.getValueType(); + MVT RVT = RHS.getValueType(); + + unsigned LSize = LVT.getSizeInBits(); + unsigned RSize = RVT.getSizeInBits(); + + // First get the sign bit of second operand. + SDValue SignBit = DAG.getNode(ISD::SHL, dl, RVT, DAG.getConstant(1, RVT), + DAG.getConstant(RSize - 1, + TLI.getShiftAmountTy())); + SignBit = DAG.getNode(ISD::AND, dl, RVT, RHS, SignBit); + + // Shift right or sign-extend it if the two operands have different types. + int SizeDiff = RVT.getSizeInBits() - LVT.getSizeInBits(); + if (SizeDiff > 0) { + SignBit = DAG.getNode(ISD::SRL, dl, RVT, SignBit, + DAG.getConstant(SizeDiff, TLI.getShiftAmountTy())); + SignBit = DAG.getNode(ISD::TRUNCATE, dl, LVT, SignBit); + } else if (SizeDiff < 0) { + SignBit = DAG.getNode(ISD::ANY_EXTEND, dl, LVT, SignBit); + SignBit = DAG.getNode(ISD::SHL, dl, LVT, SignBit, + DAG.getConstant(-SizeDiff, TLI.getShiftAmountTy())); + } + + // Clear the sign bit of the first operand. + SDValue Mask = DAG.getNode(ISD::SHL, dl, LVT, DAG.getConstant(1, LVT), + DAG.getConstant(LSize - 1, + TLI.getShiftAmountTy())); + Mask = DAG.getNode(ISD::SUB, dl, LVT, Mask, DAG.getConstant(1, LVT)); + LHS = DAG.getNode(ISD::AND, dl, LVT, LHS, Mask); + + // Or the value with the sign bit. + return DAG.getNode(ISD::OR, dl, LVT, LHS, SignBit); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FCOS(SDNode *N) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::COS_F32, + RTLIB::COS_F64, + RTLIB::COS_F80, + RTLIB::COS_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), + GetSoftenedFloat(N->getOperand(1)) }; + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::DIV_F32, + RTLIB::DIV_F64, + RTLIB::DIV_F80, + RTLIB::DIV_PPCF128), + NVT, Ops, 2, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP(SDNode *N) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::EXP_F32, + RTLIB::EXP_F64, + RTLIB::EXP_F80, + RTLIB::EXP_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP2(SDNode *N) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::EXP2_F32, + RTLIB::EXP2_F64, + RTLIB::EXP2_F80, + RTLIB::EXP2_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FFLOOR(SDNode *N) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::FLOOR_F32, + RTLIB::FLOOR_F64, + RTLIB::FLOOR_F80, + RTLIB::FLOOR_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG(SDNode *N) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::LOG_F32, + RTLIB::LOG_F64, + RTLIB::LOG_F80, + RTLIB::LOG_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG2(SDNode *N) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::LOG2_F32, + RTLIB::LOG2_F64, + RTLIB::LOG2_F80, + RTLIB::LOG2_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::LOG10_F32, + RTLIB::LOG10_F64, + RTLIB::LOG10_F80, + RTLIB::LOG10_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), + GetSoftenedFloat(N->getOperand(1)) }; + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::MUL_F32, + RTLIB::MUL_F64, + RTLIB::MUL_F80, + RTLIB::MUL_PPCF128), + NVT, Ops, 2, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::NEARBYINT_F32, + RTLIB::NEARBYINT_F64, + RTLIB::NEARBYINT_F80, + RTLIB::NEARBYINT_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + // Expand Y = FNEG(X) -> Y = SUB -0.0, X + SDValue Ops[2] = { DAG.getConstantFP(-0.0, N->getValueType(0)), + GetSoftenedFloat(N->getOperand(0)) }; + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::SUB_F32, + RTLIB::SUB_F64, + RTLIB::SUB_F80, + RTLIB::SUB_PPCF128), + NVT, Ops, 2, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + SDValue Op = N->getOperand(0); + RTLIB::Libcall LC = RTLIB::getFPEXT(Op.getValueType(), N->getValueType(0)); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!"); + return MakeLibCall(LC, NVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + SDValue Op = N->getOperand(0); + RTLIB::Libcall LC = RTLIB::getFPROUND(Op.getValueType(), N->getValueType(0)); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND!"); + return MakeLibCall(LC, NVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), + GetSoftenedFloat(N->getOperand(1)) }; + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::POW_F32, + RTLIB::POW_F64, + RTLIB::POW_F80, + RTLIB::POW_PPCF128), + NVT, Ops, 2, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) { + assert(N->getOperand(1).getValueType() == MVT::i32 && + "Unsupported power type!"); + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), N->getOperand(1) }; + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::POWI_F32, + RTLIB::POWI_F64, + RTLIB::POWI_F80, + RTLIB::POWI_PPCF128), + NVT, Ops, 2, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), + GetSoftenedFloat(N->getOperand(1)) }; + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::REM_F32, + RTLIB::REM_F64, + RTLIB::REM_F80, + RTLIB::REM_PPCF128), + NVT, Ops, 2, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FRINT(SDNode *N) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::RINT_F32, + RTLIB::RINT_F64, + RTLIB::RINT_F80, + RTLIB::RINT_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::SIN_F32, + RTLIB::SIN_F64, + RTLIB::SIN_F80, + RTLIB::SIN_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::SQRT_F32, + RTLIB::SQRT_F64, + RTLIB::SQRT_F80, + RTLIB::SQRT_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), + GetSoftenedFloat(N->getOperand(1)) }; + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::SUB_F32, + RTLIB::SUB_F64, + RTLIB::SUB_F80, + RTLIB::SUB_PPCF128), + NVT, Ops, 2, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::TRUNC_F32, + RTLIB::TRUNC_F64, + RTLIB::TRUNC_F80, + RTLIB::TRUNC_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { + LoadSDNode *L = cast(N); + MVT VT = N->getValueType(0); + MVT NVT = TLI.getTypeToTransformTo(VT); + DebugLoc dl = N->getDebugLoc(); + + SDValue NewL; + if (L->getExtensionType() == ISD::NON_EXTLOAD) { + NewL = DAG.getLoad(L->getAddressingMode(), dl, L->getExtensionType(), + NVT, L->getChain(), L->getBasePtr(), L->getOffset(), + L->getSrcValue(), L->getSrcValueOffset(), NVT, + L->isVolatile(), L->getAlignment()); + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), NewL.getValue(1)); + return NewL; + } + + // Do a non-extending load followed by FP_EXTEND. + NewL = DAG.getLoad(L->getAddressingMode(), dl, ISD::NON_EXTLOAD, + L->getMemoryVT(), L->getChain(), + L->getBasePtr(), L->getOffset(), + L->getSrcValue(), L->getSrcValueOffset(), + L->getMemoryVT(), + L->isVolatile(), L->getAlignment()); + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), NewL.getValue(1)); + return BitConvertToInteger(DAG.getNode(ISD::FP_EXTEND, dl, VT, NewL)); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT(SDNode *N) { + SDValue LHS = GetSoftenedFloat(N->getOperand(1)); + SDValue RHS = GetSoftenedFloat(N->getOperand(2)); + return DAG.getNode(ISD::SELECT, N->getDebugLoc(), + LHS.getValueType(), N->getOperand(0),LHS,RHS); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT_CC(SDNode *N) { + SDValue LHS = GetSoftenedFloat(N->getOperand(2)); + SDValue RHS = GetSoftenedFloat(N->getOperand(3)); + return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), + LHS.getValueType(), N->getOperand(0), + N->getOperand(1), LHS, RHS, N->getOperand(4)); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_UNDEF(SDNode *N) { + return DAG.getUNDEF(TLI.getTypeToTransformTo(N->getValueType(0))); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_VAARG(SDNode *N) { + SDValue Chain = N->getOperand(0); // Get the chain. + SDValue Ptr = N->getOperand(1); // Get the pointer. + MVT VT = N->getValueType(0); + MVT NVT = TLI.getTypeToTransformTo(VT); + DebugLoc dl = N->getDebugLoc(); + + SDValue NewVAARG; + NewVAARG = DAG.getVAArg(NVT, dl, Chain, Ptr, N->getOperand(2)); + + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), NewVAARG.getValue(1)); + return NewVAARG; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) { + bool Signed = N->getOpcode() == ISD::SINT_TO_FP; + MVT SVT = N->getOperand(0).getValueType(); + MVT RVT = N->getValueType(0); + MVT NVT = MVT(); + DebugLoc dl = N->getDebugLoc(); + + // If the input is not legal, eg: i1 -> fp, then it needs to be promoted to + // a larger type, eg: i8 -> fp. Even if it is legal, no libcall may exactly + // match. Look for an appropriate libcall. + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + for (unsigned t = MVT::FIRST_INTEGER_VALUETYPE; + t <= MVT::LAST_INTEGER_VALUETYPE && LC == RTLIB::UNKNOWN_LIBCALL; ++t) { + NVT = (MVT::SimpleValueType)t; + // The source needs to big enough to hold the operand. + if (NVT.bitsGE(SVT)) + LC = Signed ? RTLIB::getSINTTOFP(NVT, RVT):RTLIB::getUINTTOFP (NVT, RVT); + } + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XINT_TO_FP!"); + + // Sign/zero extend the argument if the libcall takes a larger type. + SDValue Op = DAG.getNode(Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl, + NVT, N->getOperand(0)); + return MakeLibCall(LC, TLI.getTypeToTransformTo(RVT), &Op, 1, false, dl); +} + + +//===----------------------------------------------------------------------===// +// Operand Float to Integer Conversion.. +//===----------------------------------------------------------------------===// + +bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { + DEBUG(cerr << "Soften float operand " << OpNo << ": "; N->dump(&DAG); + cerr << "\n"); + SDValue Res = SDValue(); + + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + cerr << "SoftenFloatOperand Op #" << OpNo << ": "; + N->dump(&DAG); cerr << "\n"; +#endif + assert(0 && "Do not know how to soften this operator's operand!"); + abort(); + + case ISD::BIT_CONVERT: Res = SoftenFloatOp_BIT_CONVERT(N); break; + case ISD::BR_CC: Res = SoftenFloatOp_BR_CC(N); break; + case ISD::FP_ROUND: Res = SoftenFloatOp_FP_ROUND(N); break; + case ISD::FP_TO_SINT: Res = SoftenFloatOp_FP_TO_SINT(N); break; + case ISD::FP_TO_UINT: Res = SoftenFloatOp_FP_TO_UINT(N); break; + case ISD::SELECT_CC: Res = SoftenFloatOp_SELECT_CC(N); break; + case ISD::SETCC: Res = SoftenFloatOp_SETCC(N); break; + case ISD::STORE: Res = SoftenFloatOp_STORE(N, OpNo); break; + } + + // If the result is null, the sub-method took care of registering results etc. + if (!Res.getNode()) return false; + + // If the result is N, the sub-method updated N in place. Tell the legalizer + // core about this. + if (Res.getNode() == N) + return true; + + assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 && + "Invalid operand expansion"); + + ReplaceValueWith(SDValue(N, 0), Res); + return false; +} + +/// SoftenSetCCOperands - Soften the operands of a comparison. This code is +/// shared among BR_CC, SELECT_CC, and SETCC handlers. +void DAGTypeLegalizer::SoftenSetCCOperands(SDValue &NewLHS, SDValue &NewRHS, + ISD::CondCode &CCCode, DebugLoc dl) { + SDValue LHSInt = GetSoftenedFloat(NewLHS); + SDValue RHSInt = GetSoftenedFloat(NewRHS); + MVT VT = NewLHS.getValueType(); + + assert((VT == MVT::f32 || VT == MVT::f64) && "Unsupported setcc type!"); + + // Expand into one or more soft-fp libcall(s). + RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL; + switch (CCCode) { + case ISD::SETEQ: + case ISD::SETOEQ: + LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 : RTLIB::OEQ_F64; + break; + case ISD::SETNE: + case ISD::SETUNE: + LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 : RTLIB::UNE_F64; + break; + case ISD::SETGE: + case ISD::SETOGE: + LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 : RTLIB::OGE_F64; + break; + case ISD::SETLT: + case ISD::SETOLT: + LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 : RTLIB::OLT_F64; + break; + case ISD::SETLE: + case ISD::SETOLE: + LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 : RTLIB::OLE_F64; + break; + case ISD::SETGT: + case ISD::SETOGT: + LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 : RTLIB::OGT_F64; + break; + case ISD::SETUO: + LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 : RTLIB::UO_F64; + break; + case ISD::SETO: + LC1 = (VT == MVT::f32) ? RTLIB::O_F32 : RTLIB::O_F64; + break; + default: + LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 : RTLIB::UO_F64; + switch (CCCode) { + case ISD::SETONE: + // SETONE = SETOLT | SETOGT + LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 : RTLIB::OLT_F64; + // Fallthrough + case ISD::SETUGT: + LC2 = (VT == MVT::f32) ? RTLIB::OGT_F32 : RTLIB::OGT_F64; + break; + case ISD::SETUGE: + LC2 = (VT == MVT::f32) ? RTLIB::OGE_F32 : RTLIB::OGE_F64; + break; + case ISD::SETULT: + LC2 = (VT == MVT::f32) ? RTLIB::OLT_F32 : RTLIB::OLT_F64; + break; + case ISD::SETULE: + LC2 = (VT == MVT::f32) ? RTLIB::OLE_F32 : RTLIB::OLE_F64; + break; + case ISD::SETUEQ: + LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 : RTLIB::OEQ_F64; + break; + default: assert(false && "Do not know how to soften this setcc!"); + } + } + + MVT RetVT = MVT::i32; // FIXME: is this the correct return type? + SDValue Ops[2] = { LHSInt, RHSInt }; + NewLHS = MakeLibCall(LC1, RetVT, Ops, 2, false/*sign irrelevant*/, dl); + NewRHS = DAG.getConstant(0, RetVT); + CCCode = TLI.getCmpLibcallCC(LC1); + if (LC2 != RTLIB::UNKNOWN_LIBCALL) { + SDValue Tmp = DAG.getNode(ISD::SETCC, dl, TLI.getSetCCResultType(RetVT), + NewLHS, NewRHS, DAG.getCondCode(CCCode)); + NewLHS = MakeLibCall(LC2, RetVT, Ops, 2, false/*sign irrelevant*/, dl); + NewLHS = DAG.getNode(ISD::SETCC, dl, TLI.getSetCCResultType(RetVT), NewLHS, + NewRHS, DAG.getCondCode(TLI.getCmpLibcallCC(LC2))); + NewLHS = DAG.getNode(ISD::OR, dl, Tmp.getValueType(), Tmp, NewLHS); + NewRHS = SDValue(); + } +} + +SDValue DAGTypeLegalizer::SoftenFloatOp_BIT_CONVERT(SDNode *N) { + return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), N->getValueType(0), + GetSoftenedFloat(N->getOperand(0))); +} + +SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) { + MVT SVT = N->getOperand(0).getValueType(); + MVT RVT = N->getValueType(0); + + RTLIB::Libcall LC = RTLIB::getFPROUND(SVT, RVT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND libcall"); + + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return MakeLibCall(LC, RVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) { + SDValue NewLHS = N->getOperand(2), NewRHS = N->getOperand(3); + ISD::CondCode CCCode = cast(N->getOperand(1))->get(); + SoftenSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc()); + + // If SoftenSetCCOperands returned a scalar, we need to compare the result + // against zero to select between true and false values. + if (NewRHS.getNode() == 0) { + NewRHS = DAG.getConstant(0, NewLHS.getValueType()); + CCCode = ISD::SETNE; + } + + // Update N to have the operands specified. + return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0), + DAG.getCondCode(CCCode), NewLHS, NewRHS, + N->getOperand(4)); +} + +SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_SINT(SDNode *N) { + MVT RVT = N->getValueType(0); + RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!"); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return MakeLibCall(LC, RVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_UINT(SDNode *N) { + MVT RVT = N->getValueType(0); + RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!"); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return MakeLibCall(LC, RVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) { + SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1); + ISD::CondCode CCCode = cast(N->getOperand(4))->get(); + SoftenSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc()); + + // If SoftenSetCCOperands returned a scalar, we need to compare the result + // against zero to select between true and false values. + if (NewRHS.getNode() == 0) { + NewRHS = DAG.getConstant(0, NewLHS.getValueType()); + CCCode = ISD::SETNE; + } + + // Update N to have the operands specified. + return DAG.UpdateNodeOperands(SDValue(N, 0), NewLHS, NewRHS, + N->getOperand(2), N->getOperand(3), + DAG.getCondCode(CCCode)); +} + +SDValue DAGTypeLegalizer::SoftenFloatOp_SETCC(SDNode *N) { + SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1); + ISD::CondCode CCCode = cast(N->getOperand(2))->get(); + SoftenSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc()); + + // If SoftenSetCCOperands returned a scalar, use it. + if (NewRHS.getNode() == 0) { + assert(NewLHS.getValueType() == N->getValueType(0) && + "Unexpected setcc expansion!"); + return NewLHS; + } + + // Otherwise, update N to have the operands specified. + return DAG.UpdateNodeOperands(SDValue(N, 0), NewLHS, NewRHS, + DAG.getCondCode(CCCode)); +} + +SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) { + assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!"); + assert(OpNo == 1 && "Can only soften the stored value!"); + StoreSDNode *ST = cast(N); + SDValue Val = ST->getValue(); + DebugLoc dl = N->getDebugLoc(); + + if (ST->isTruncatingStore()) + // Do an FP_ROUND followed by a non-truncating store. + Val = BitConvertToInteger(DAG.getNode(ISD::FP_ROUND, dl, ST->getMemoryVT(), + Val, DAG.getIntPtrConstant(0))); + else + Val = GetSoftenedFloat(Val); + + return DAG.getStore(ST->getChain(), dl, Val, ST->getBasePtr(), + ST->getSrcValue(), ST->getSrcValueOffset(), + ST->isVolatile(), ST->getAlignment()); +} + + +//===----------------------------------------------------------------------===// +// Float Result Expansion +//===----------------------------------------------------------------------===// + +/// ExpandFloatResult - This method is called when the specified result of the +/// specified node is found to need expansion. At this point, the node may also +/// have invalid operands or may have other results that need promotion, we just +/// know that (at least) one result needs expansion. +void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) { + DEBUG(cerr << "Expand float result: "; N->dump(&DAG); cerr << "\n"); + SDValue Lo, Hi; + Lo = Hi = SDValue(); + + // See if the target wants to custom expand this node. + if (CustomLowerNode(N, N->getValueType(ResNo), true)) + return; + + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + cerr << "ExpandFloatResult #" << ResNo << ": "; + N->dump(&DAG); cerr << "\n"; +#endif + assert(0 && "Do not know how to expand the result of this operator!"); + abort(); + + case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, Lo, Hi); break; + case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break; + case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break; + case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break; + + case ISD::BIT_CONVERT: ExpandRes_BIT_CONVERT(N, Lo, Hi); break; + case ISD::BUILD_PAIR: ExpandRes_BUILD_PAIR(N, Lo, Hi); break; + case ISD::EXTRACT_ELEMENT: ExpandRes_EXTRACT_ELEMENT(N, Lo, Hi); break; + case ISD::EXTRACT_VECTOR_ELT: ExpandRes_EXTRACT_VECTOR_ELT(N, Lo, Hi); break; + case ISD::VAARG: ExpandRes_VAARG(N, Lo, Hi); break; + + case ISD::ConstantFP: ExpandFloatRes_ConstantFP(N, Lo, Hi); break; + case ISD::FABS: ExpandFloatRes_FABS(N, Lo, Hi); break; + case ISD::FADD: ExpandFloatRes_FADD(N, Lo, Hi); break; + case ISD::FCEIL: ExpandFloatRes_FCEIL(N, Lo, Hi); break; + case ISD::FCOS: ExpandFloatRes_FCOS(N, Lo, Hi); break; + case ISD::FDIV: ExpandFloatRes_FDIV(N, Lo, Hi); break; + case ISD::FEXP: ExpandFloatRes_FEXP(N, Lo, Hi); break; + case ISD::FEXP2: ExpandFloatRes_FEXP2(N, Lo, Hi); break; + case ISD::FFLOOR: ExpandFloatRes_FFLOOR(N, Lo, Hi); break; + case ISD::FLOG: ExpandFloatRes_FLOG(N, Lo, Hi); break; + case ISD::FLOG2: ExpandFloatRes_FLOG2(N, Lo, Hi); break; + case ISD::FLOG10: ExpandFloatRes_FLOG10(N, Lo, Hi); break; + case ISD::FMUL: ExpandFloatRes_FMUL(N, Lo, Hi); break; + case ISD::FNEARBYINT: ExpandFloatRes_FNEARBYINT(N, Lo, Hi); break; + case ISD::FNEG: ExpandFloatRes_FNEG(N, Lo, Hi); break; + case ISD::FP_EXTEND: ExpandFloatRes_FP_EXTEND(N, Lo, Hi); break; + case ISD::FPOW: ExpandFloatRes_FPOW(N, Lo, Hi); break; + case ISD::FPOWI: ExpandFloatRes_FPOWI(N, Lo, Hi); break; + case ISD::FRINT: ExpandFloatRes_FRINT(N, Lo, Hi); break; + case ISD::FSIN: ExpandFloatRes_FSIN(N, Lo, Hi); break; + case ISD::FSQRT: ExpandFloatRes_FSQRT(N, Lo, Hi); break; + case ISD::FSUB: ExpandFloatRes_FSUB(N, Lo, Hi); break; + case ISD::FTRUNC: ExpandFloatRes_FTRUNC(N, Lo, Hi); break; + case ISD::LOAD: ExpandFloatRes_LOAD(N, Lo, Hi); break; + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: ExpandFloatRes_XINT_TO_FP(N, Lo, Hi); break; + } + + // If Lo/Hi is null, the sub-method took care of registering results etc. + if (Lo.getNode()) + SetExpandedFloat(SDValue(N, ResNo), Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_ConstantFP(SDNode *N, SDValue &Lo, + SDValue &Hi) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + assert(NVT.getSizeInBits() == integerPartWidth && + "Do not know how to expand this float constant!"); + APInt C = cast(N)->getValueAPF().bitcastToAPInt(); + Lo = DAG.getConstantFP(APFloat(APInt(integerPartWidth, 1, + &C.getRawData()[1])), NVT); + Hi = DAG.getConstantFP(APFloat(APInt(integerPartWidth, 1, + &C.getRawData()[0])), NVT); +} + +void DAGTypeLegalizer::ExpandFloatRes_FABS(SDNode *N, SDValue &Lo, + SDValue &Hi) { + assert(N->getValueType(0) == MVT::ppcf128 && + "Logic only correct for ppcf128!"); + DebugLoc dl = N->getDebugLoc(); + SDValue Tmp; + GetExpandedFloat(N->getOperand(0), Lo, Tmp); + Hi = DAG.getNode(ISD::FABS, dl, Tmp.getValueType(), Tmp); + // Lo = Hi==fabs(Hi) ? Lo : -Lo; + Lo = DAG.getNode(ISD::SELECT_CC, dl, Lo.getValueType(), Tmp, Hi, Lo, + DAG.getNode(ISD::FNEG, dl, Lo.getValueType(), Lo), + DAG.getCondCode(ISD::SETEQ)); +} + +void DAGTypeLegalizer::ExpandFloatRes_FADD(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::ADD_F32, RTLIB::ADD_F64, + RTLIB::ADD_F80, RTLIB::ADD_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FCEIL(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::CEIL_F32, RTLIB::CEIL_F64, + RTLIB::CEIL_F80, RTLIB::CEIL_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FCOS(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::COS_F32, RTLIB::COS_F64, + RTLIB::COS_F80, RTLIB::COS_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FDIV(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + SDValue Call = MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::DIV_F32, + RTLIB::DIV_F64, + RTLIB::DIV_F80, + RTLIB::DIV_PPCF128), + N->getValueType(0), Ops, 2, false, + N->getDebugLoc()); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FEXP(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::EXP_F32, RTLIB::EXP_F64, + RTLIB::EXP_F80, RTLIB::EXP_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FEXP2(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::EXP2_F32, RTLIB::EXP2_F64, + RTLIB::EXP2_F80, RTLIB::EXP2_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FFLOOR(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::FLOOR_F32,RTLIB::FLOOR_F64, + RTLIB::FLOOR_F80,RTLIB::FLOOR_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FLOG(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::LOG_F32, RTLIB::LOG_F64, + RTLIB::LOG_F80, RTLIB::LOG_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FLOG2(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::LOG2_F32, RTLIB::LOG2_F64, + RTLIB::LOG2_F80, RTLIB::LOG2_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FLOG10(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::LOG10_F32,RTLIB::LOG10_F64, + RTLIB::LOG10_F80,RTLIB::LOG10_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FMUL(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + SDValue Call = MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::MUL_F32, + RTLIB::MUL_F64, + RTLIB::MUL_F80, + RTLIB::MUL_PPCF128), + N->getValueType(0), Ops, 2, false, + N->getDebugLoc()); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FNEARBYINT(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::NEARBYINT_F32, + RTLIB::NEARBYINT_F64, + RTLIB::NEARBYINT_F80, + RTLIB::NEARBYINT_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FNEG(SDNode *N, SDValue &Lo, + SDValue &Hi) { + DebugLoc dl = N->getDebugLoc(); + GetExpandedFloat(N->getOperand(0), Lo, Hi); + Lo = DAG.getNode(ISD::FNEG, dl, Lo.getValueType(), Lo); + Hi = DAG.getNode(ISD::FNEG, dl, Hi.getValueType(), Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FP_EXTEND(SDNode *N, SDValue &Lo, + SDValue &Hi) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + Hi = DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), NVT, N->getOperand(0)); + Lo = DAG.getConstantFP(APFloat(APInt(NVT.getSizeInBits(), 0)), NVT); +} + +void DAGTypeLegalizer::ExpandFloatRes_FPOW(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::POW_F32, RTLIB::POW_F64, + RTLIB::POW_F80, RTLIB::POW_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FPOWI(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::POWI_F32, RTLIB::POWI_F64, + RTLIB::POWI_F80, RTLIB::POWI_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FRINT(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::RINT_F32, RTLIB::RINT_F64, + RTLIB::RINT_F80, RTLIB::RINT_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FSIN(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::SIN_F32, RTLIB::SIN_F64, + RTLIB::SIN_F80, RTLIB::SIN_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FSQRT(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::SQRT_F32, RTLIB::SQRT_F64, + RTLIB::SQRT_F80, RTLIB::SQRT_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FSUB(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + SDValue Call = MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::SUB_F32, + RTLIB::SUB_F64, + RTLIB::SUB_F80, + RTLIB::SUB_PPCF128), + N->getValueType(0), Ops, 2, false, + N->getDebugLoc()); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FTRUNC(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::TRUNC_F32, RTLIB::TRUNC_F64, + RTLIB::TRUNC_F80, RTLIB::TRUNC_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_LOAD(SDNode *N, SDValue &Lo, + SDValue &Hi) { + if (ISD::isNormalLoad(N)) { + ExpandRes_NormalLoad(N, Lo, Hi); + return; + } + + assert(ISD::isUNINDEXEDLoad(N) && "Indexed load during type legalization!"); + LoadSDNode *LD = cast(N); + SDValue Chain = LD->getChain(); + SDValue Ptr = LD->getBasePtr(); + DebugLoc dl = N->getDebugLoc(); + + MVT NVT = TLI.getTypeToTransformTo(LD->getValueType(0)); + assert(NVT.isByteSized() && "Expanded type not byte sized!"); + assert(LD->getMemoryVT().bitsLE(NVT) && "Float type not round?"); + + Hi = DAG.getExtLoad(LD->getExtensionType(), dl, NVT, Chain, Ptr, + LD->getSrcValue(), LD->getSrcValueOffset(), + LD->getMemoryVT(), + LD->isVolatile(), LD->getAlignment()); + + // Remember the chain. + Chain = Hi.getValue(1); + + // The low part is zero. + Lo = DAG.getConstantFP(APFloat(APInt(NVT.getSizeInBits(), 0)), NVT); + + // Modified the chain - switch anything that used the old chain to use the + // new one. + ReplaceValueWith(SDValue(LD, 1), Chain); +} + +void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, + SDValue &Hi) { + assert(N->getValueType(0) == MVT::ppcf128 && "Unsupported XINT_TO_FP!"); + MVT VT = N->getValueType(0); + MVT NVT = TLI.getTypeToTransformTo(VT); + SDValue Src = N->getOperand(0); + MVT SrcVT = Src.getValueType(); + bool isSigned = N->getOpcode() == ISD::SINT_TO_FP; + DebugLoc dl = N->getDebugLoc(); + + // First do an SINT_TO_FP, whether the original was signed or unsigned. + // When promoting partial word types to i32 we must honor the signedness, + // though. + if (SrcVT.bitsLE(MVT::i32)) { + // The integer can be represented exactly in an f64. + Src = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl, + MVT::i32, Src); + Lo = DAG.getConstantFP(APFloat(APInt(NVT.getSizeInBits(), 0)), NVT); + Hi = DAG.getNode(ISD::SINT_TO_FP, dl, NVT, Src); + } else { + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + if (SrcVT.bitsLE(MVT::i64)) { + Src = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl, + MVT::i64, Src); + LC = RTLIB::SINTTOFP_I64_PPCF128; + } else if (SrcVT.bitsLE(MVT::i128)) { + Src = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i128, Src); + LC = RTLIB::SINTTOFP_I128_PPCF128; + } + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XINT_TO_FP!"); + + Hi = MakeLibCall(LC, VT, &Src, 1, true, dl); + GetPairElements(Hi, Lo, Hi); + } + + if (isSigned) + return; + + // Unsigned - fix up the SINT_TO_FP value just calculated. + Hi = DAG.getNode(ISD::BUILD_PAIR, dl, VT, Lo, Hi); + SrcVT = Src.getValueType(); + + // x>=0 ? (ppcf128)(iN)x : (ppcf128)(iN)x + 2^N; N=32,64,128. + static const uint64_t TwoE32[] = { 0x41f0000000000000LL, 0 }; + static const uint64_t TwoE64[] = { 0x43f0000000000000LL, 0 }; + static const uint64_t TwoE128[] = { 0x47f0000000000000LL, 0 }; + const uint64_t *Parts = 0; + + switch (SrcVT.getSimpleVT()) { + default: + assert(false && "Unsupported UINT_TO_FP!"); + case MVT::i32: + Parts = TwoE32; + break; + case MVT::i64: + Parts = TwoE64; + break; + case MVT::i128: + Parts = TwoE128; + break; + } + + Lo = DAG.getNode(ISD::FADD, dl, VT, Hi, + DAG.getConstantFP(APFloat(APInt(128, 2, Parts)), + MVT::ppcf128)); + Lo = DAG.getNode(ISD::SELECT_CC, dl, VT, Src, DAG.getConstant(0, SrcVT), + Lo, Hi, DAG.getCondCode(ISD::SETLT)); + GetPairElements(Lo, Lo, Hi); +} + + +//===----------------------------------------------------------------------===// +// Float Operand Expansion +//===----------------------------------------------------------------------===// + +/// ExpandFloatOperand - This method is called when the specified operand of the +/// specified node is found to need expansion. At this point, all of the result +/// types of the node are known to be legal, but other operands of the node may +/// need promotion or expansion as well as the specified one. +bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) { + DEBUG(cerr << "Expand float operand: "; N->dump(&DAG); cerr << "\n"); + SDValue Res = SDValue(); + + if (TLI.getOperationAction(N->getOpcode(), N->getOperand(OpNo).getValueType()) + == TargetLowering::Custom) + Res = TLI.LowerOperation(SDValue(N, 0), DAG); + + if (Res.getNode() == 0) { + switch (N->getOpcode()) { + default: + #ifndef NDEBUG + cerr << "ExpandFloatOperand Op #" << OpNo << ": "; + N->dump(&DAG); cerr << "\n"; + #endif + assert(0 && "Do not know how to expand this operator's operand!"); + abort(); + + case ISD::BIT_CONVERT: Res = ExpandOp_BIT_CONVERT(N); break; + case ISD::BUILD_VECTOR: Res = ExpandOp_BUILD_VECTOR(N); break; + case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break; + + case ISD::BR_CC: Res = ExpandFloatOp_BR_CC(N); break; + case ISD::FP_ROUND: Res = ExpandFloatOp_FP_ROUND(N); break; + case ISD::FP_TO_SINT: Res = ExpandFloatOp_FP_TO_SINT(N); break; + case ISD::FP_TO_UINT: Res = ExpandFloatOp_FP_TO_UINT(N); break; + case ISD::SELECT_CC: Res = ExpandFloatOp_SELECT_CC(N); break; + case ISD::SETCC: Res = ExpandFloatOp_SETCC(N); break; + case ISD::STORE: Res = ExpandFloatOp_STORE(cast(N), + OpNo); break; + } + } + + // If the result is null, the sub-method took care of registering results etc. + if (!Res.getNode()) return false; + + // If the result is N, the sub-method updated N in place. Tell the legalizer + // core about this. + if (Res.getNode() == N) + return true; + + assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 && + "Invalid operand expansion"); + + ReplaceValueWith(SDValue(N, 0), Res); + return false; +} + +/// FloatExpandSetCCOperands - Expand the operands of a comparison. This code +/// is shared among BR_CC, SELECT_CC, and SETCC handlers. +void DAGTypeLegalizer::FloatExpandSetCCOperands(SDValue &NewLHS, + SDValue &NewRHS, + ISD::CondCode &CCCode, + DebugLoc dl) { + SDValue LHSLo, LHSHi, RHSLo, RHSHi; + GetExpandedFloat(NewLHS, LHSLo, LHSHi); + GetExpandedFloat(NewRHS, RHSLo, RHSHi); + + MVT VT = NewLHS.getValueType(); + assert(VT == MVT::ppcf128 && "Unsupported setcc type!"); + + // FIXME: This generated code sucks. We want to generate + // FCMPU crN, hi1, hi2 + // BNE crN, L: + // FCMPU crN, lo1, lo2 + // The following can be improved, but not that much. + SDValue Tmp1, Tmp2, Tmp3; + Tmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSHi.getValueType()), + LHSHi, RHSHi, ISD::SETOEQ); + Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSLo.getValueType()), + LHSLo, RHSLo, CCCode); + Tmp3 = DAG.getNode(ISD::AND, dl, Tmp1.getValueType(), Tmp1, Tmp2); + Tmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSHi.getValueType()), + LHSHi, RHSHi, ISD::SETUNE); + Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSHi.getValueType()), + LHSHi, RHSHi, CCCode); + Tmp1 = DAG.getNode(ISD::AND, dl, Tmp1.getValueType(), Tmp1, Tmp2); + NewLHS = DAG.getNode(ISD::OR, dl, Tmp1.getValueType(), Tmp1, Tmp3); + NewRHS = SDValue(); // LHS is the result, not a compare. +} + +SDValue DAGTypeLegalizer::ExpandFloatOp_BR_CC(SDNode *N) { + SDValue NewLHS = N->getOperand(2), NewRHS = N->getOperand(3); + ISD::CondCode CCCode = cast(N->getOperand(1))->get(); + FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc()); + + // If ExpandSetCCOperands returned a scalar, we need to compare the result + // against zero to select between true and false values. + if (NewRHS.getNode() == 0) { + NewRHS = DAG.getConstant(0, NewLHS.getValueType()); + CCCode = ISD::SETNE; + } + + // Update N to have the operands specified. + return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0), + DAG.getCondCode(CCCode), NewLHS, NewRHS, + N->getOperand(4)); +} + +SDValue DAGTypeLegalizer::ExpandFloatOp_FP_ROUND(SDNode *N) { + assert(N->getOperand(0).getValueType() == MVT::ppcf128 && + "Logic only correct for ppcf128!"); + SDValue Lo, Hi; + GetExpandedFloat(N->getOperand(0), Lo, Hi); + // Round it the rest of the way (e.g. to f32) if needed. + return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), + N->getValueType(0), Hi, N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_SINT(SDNode *N) { + MVT RVT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); + + // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on + // PPC (the libcall is not available). FIXME: Do this in a less hacky way. + if (RVT == MVT::i32) { + assert(N->getOperand(0).getValueType() == MVT::ppcf128 && + "Logic only correct for ppcf128!"); + SDValue Res = DAG.getNode(ISD::FP_ROUND_INREG, dl, MVT::ppcf128, + N->getOperand(0), DAG.getValueType(MVT::f64)); + Res = DAG.getNode(ISD::FP_ROUND, dl, MVT::f64, Res, + DAG.getIntPtrConstant(1)); + return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res); + } + + RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!"); + return MakeLibCall(LC, RVT, &N->getOperand(0), 1, false, dl); +} + +SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) { + MVT RVT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); + + // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on + // PPC (the libcall is not available). FIXME: Do this in a less hacky way. + if (RVT == MVT::i32) { + assert(N->getOperand(0).getValueType() == MVT::ppcf128 && + "Logic only correct for ppcf128!"); + const uint64_t TwoE31[] = {0x41e0000000000000LL, 0}; + APFloat APF = APFloat(APInt(128, 2, TwoE31)); + SDValue Tmp = DAG.getConstantFP(APF, MVT::ppcf128); + // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X + // FIXME: generated code sucks. + return DAG.getNode(ISD::SELECT_CC, dl, MVT::i32, N->getOperand(0), Tmp, + DAG.getNode(ISD::ADD, dl, MVT::i32, + DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, + DAG.getNode(ISD::FSUB, dl, + MVT::ppcf128, + N->getOperand(0), + Tmp)), + DAG.getConstant(0x80000000, MVT::i32)), + DAG.getNode(ISD::FP_TO_SINT, dl, + MVT::i32, N->getOperand(0)), + DAG.getCondCode(ISD::SETGE)); + } + + RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!"); + return MakeLibCall(LC, N->getValueType(0), &N->getOperand(0), 1, false, dl); +} + +SDValue DAGTypeLegalizer::ExpandFloatOp_SELECT_CC(SDNode *N) { + SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1); + ISD::CondCode CCCode = cast(N->getOperand(4))->get(); + FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc()); + + // If ExpandSetCCOperands returned a scalar, we need to compare the result + // against zero to select between true and false values. + if (NewRHS.getNode() == 0) { + NewRHS = DAG.getConstant(0, NewLHS.getValueType()); + CCCode = ISD::SETNE; + } + + // Update N to have the operands specified. + return DAG.UpdateNodeOperands(SDValue(N, 0), NewLHS, NewRHS, + N->getOperand(2), N->getOperand(3), + DAG.getCondCode(CCCode)); +} + +SDValue DAGTypeLegalizer::ExpandFloatOp_SETCC(SDNode *N) { + SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1); + ISD::CondCode CCCode = cast(N->getOperand(2))->get(); + FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc()); + + // If ExpandSetCCOperands returned a scalar, use it. + if (NewRHS.getNode() == 0) { + assert(NewLHS.getValueType() == N->getValueType(0) && + "Unexpected setcc expansion!"); + return NewLHS; + } + + // Otherwise, update N to have the operands specified. + return DAG.UpdateNodeOperands(SDValue(N, 0), NewLHS, NewRHS, + DAG.getCondCode(CCCode)); +} + +SDValue DAGTypeLegalizer::ExpandFloatOp_STORE(SDNode *N, unsigned OpNo) { + if (ISD::isNormalStore(N)) + return ExpandOp_NormalStore(N, OpNo); + + assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!"); + assert(OpNo == 1 && "Can only expand the stored value so far"); + StoreSDNode *ST = cast(N); + + SDValue Chain = ST->getChain(); + SDValue Ptr = ST->getBasePtr(); + + MVT NVT = TLI.getTypeToTransformTo(ST->getValue().getValueType()); + assert(NVT.isByteSized() && "Expanded type not byte sized!"); + assert(ST->getMemoryVT().bitsLE(NVT) && "Float type not round?"); + + SDValue Lo, Hi; + GetExpandedOp(ST->getValue(), Lo, Hi); + + return DAG.getTruncStore(Chain, N->getDebugLoc(), Hi, Ptr, + ST->getSrcValue(), ST->getSrcValueOffset(), + ST->getMemoryVT(), + ST->isVolatile(), ST->getAlignment()); +} diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp new file mode 100644 index 000000000000..eb9342cc8b8e --- /dev/null +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -0,0 +1,2382 @@ +//===----- LegalizeIntegerTypes.cpp - Legalization of integer types -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements integer type expansion and promotion for LegalizeTypes. +// Promotion is the act of changing a computation in an illegal type into a +// computation in a larger type. For example, implementing i8 arithmetic in an +// i32 register (often needed on powerpc). +// Expansion is the act of changing a computation in an illegal type into a +// computation in two identical registers of a smaller type. For example, +// implementing i64 arithmetic in two i32 registers (often needed on 32-bit +// targets). +// +//===----------------------------------------------------------------------===// + +#include "LegalizeTypes.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +using namespace llvm; + +//===----------------------------------------------------------------------===// +// Integer Result Promotion +//===----------------------------------------------------------------------===// + +/// PromoteIntegerResult - This method is called when a result of a node is +/// found to be in need of promotion to a larger type. At this point, the node +/// may also have invalid operands or may have other results that need +/// expansion, we just know that (at least) one result needs promotion. +void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { + DEBUG(cerr << "Promote integer result: "; N->dump(&DAG); cerr << "\n"); + SDValue Res = SDValue(); + + // See if the target wants to custom expand this node. + if (CustomLowerNode(N, N->getValueType(ResNo), true)) + return; + + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + cerr << "PromoteIntegerResult #" << ResNo << ": "; + N->dump(&DAG); cerr << "\n"; +#endif + assert(0 && "Do not know how to promote this operator!"); + abort(); + case ISD::AssertSext: Res = PromoteIntRes_AssertSext(N); break; + case ISD::AssertZext: Res = PromoteIntRes_AssertZext(N); break; + case ISD::BIT_CONVERT: Res = PromoteIntRes_BIT_CONVERT(N); break; + case ISD::BSWAP: Res = PromoteIntRes_BSWAP(N); break; + case ISD::BUILD_PAIR: Res = PromoteIntRes_BUILD_PAIR(N); break; + case ISD::Constant: Res = PromoteIntRes_Constant(N); break; + case ISD::CONVERT_RNDSAT: + Res = PromoteIntRes_CONVERT_RNDSAT(N); break; + case ISD::CTLZ: Res = PromoteIntRes_CTLZ(N); break; + case ISD::CTPOP: Res = PromoteIntRes_CTPOP(N); break; + case ISD::CTTZ: Res = PromoteIntRes_CTTZ(N); break; + case ISD::EXTRACT_VECTOR_ELT: + Res = PromoteIntRes_EXTRACT_VECTOR_ELT(N); break; + case ISD::LOAD: Res = PromoteIntRes_LOAD(cast(N));break; + case ISD::SELECT: Res = PromoteIntRes_SELECT(N); break; + case ISD::SELECT_CC: Res = PromoteIntRes_SELECT_CC(N); break; + case ISD::SETCC: Res = PromoteIntRes_SETCC(N); break; + case ISD::SHL: Res = PromoteIntRes_SHL(N); break; + case ISD::SIGN_EXTEND_INREG: + Res = PromoteIntRes_SIGN_EXTEND_INREG(N); break; + case ISD::SRA: Res = PromoteIntRes_SRA(N); break; + case ISD::SRL: Res = PromoteIntRes_SRL(N); break; + case ISD::TRUNCATE: Res = PromoteIntRes_TRUNCATE(N); break; + case ISD::UNDEF: Res = PromoteIntRes_UNDEF(N); break; + case ISD::VAARG: Res = PromoteIntRes_VAARG(N); break; + + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + case ISD::ANY_EXTEND: Res = PromoteIntRes_INT_EXTEND(N); break; + + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: Res = PromoteIntRes_FP_TO_XINT(N); break; + + case ISD::AND: + case ISD::OR: + case ISD::XOR: + case ISD::ADD: + case ISD::SUB: + case ISD::MUL: Res = PromoteIntRes_SimpleIntBinOp(N); break; + + case ISD::SDIV: + case ISD::SREM: Res = PromoteIntRes_SDIV(N); break; + + case ISD::UDIV: + case ISD::UREM: Res = PromoteIntRes_UDIV(N); break; + + case ISD::SADDO: + case ISD::SSUBO: Res = PromoteIntRes_SADDSUBO(N, ResNo); break; + case ISD::UADDO: + case ISD::USUBO: Res = PromoteIntRes_UADDSUBO(N, ResNo); break; + case ISD::SMULO: + case ISD::UMULO: Res = PromoteIntRes_XMULO(N, ResNo); break; + + case ISD::ATOMIC_LOAD_ADD: + case ISD::ATOMIC_LOAD_SUB: + case ISD::ATOMIC_LOAD_AND: + case ISD::ATOMIC_LOAD_OR: + case ISD::ATOMIC_LOAD_XOR: + case ISD::ATOMIC_LOAD_NAND: + case ISD::ATOMIC_LOAD_MIN: + case ISD::ATOMIC_LOAD_MAX: + case ISD::ATOMIC_LOAD_UMIN: + case ISD::ATOMIC_LOAD_UMAX: + case ISD::ATOMIC_SWAP: + Res = PromoteIntRes_Atomic1(cast(N)); break; + + case ISD::ATOMIC_CMP_SWAP: + Res = PromoteIntRes_Atomic2(cast(N)); break; + } + + // If the result is null then the sub-method took care of registering it. + if (Res.getNode()) + SetPromotedInteger(SDValue(N, ResNo), Res); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_AssertSext(SDNode *N) { + // Sign-extend the new bits, and continue the assertion. + SDValue Op = SExtPromotedInteger(N->getOperand(0)); + return DAG.getNode(ISD::AssertSext, N->getDebugLoc(), + Op.getValueType(), Op, N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_AssertZext(SDNode *N) { + // Zero the new bits, and continue the assertion. + SDValue Op = ZExtPromotedInteger(N->getOperand(0)); + return DAG.getNode(ISD::AssertZext, N->getDebugLoc(), + Op.getValueType(), Op, N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_Atomic1(AtomicSDNode *N) { + SDValue Op2 = GetPromotedInteger(N->getOperand(2)); + SDValue Res = DAG.getAtomic(N->getOpcode(), N->getDebugLoc(), + N->getMemoryVT(), + N->getChain(), N->getBasePtr(), + Op2, N->getSrcValue(), N->getAlignment()); + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + return Res; +} + +SDValue DAGTypeLegalizer::PromoteIntRes_Atomic2(AtomicSDNode *N) { + SDValue Op2 = GetPromotedInteger(N->getOperand(2)); + SDValue Op3 = GetPromotedInteger(N->getOperand(3)); + SDValue Res = DAG.getAtomic(N->getOpcode(), N->getDebugLoc(), + N->getMemoryVT(), N->getChain(), N->getBasePtr(), + Op2, Op3, N->getSrcValue(), N->getAlignment()); + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + return Res; +} + +SDValue DAGTypeLegalizer::PromoteIntRes_BIT_CONVERT(SDNode *N) { + SDValue InOp = N->getOperand(0); + MVT InVT = InOp.getValueType(); + MVT NInVT = TLI.getTypeToTransformTo(InVT); + MVT OutVT = N->getValueType(0); + MVT NOutVT = TLI.getTypeToTransformTo(OutVT); + DebugLoc dl = N->getDebugLoc(); + + switch (getTypeAction(InVT)) { + default: + assert(false && "Unknown type action!"); + break; + case Legal: + break; + case PromoteInteger: + if (NOutVT.bitsEq(NInVT)) + // The input promotes to the same size. Convert the promoted value. + return DAG.getNode(ISD::BIT_CONVERT, dl, + NOutVT, GetPromotedInteger(InOp)); + break; + case SoftenFloat: + // Promote the integer operand by hand. + return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, GetSoftenedFloat(InOp)); + case ExpandInteger: + case ExpandFloat: + break; + case ScalarizeVector: + // Convert the element to an integer and promote it by hand. + return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, + BitConvertToInteger(GetScalarizedVector(InOp))); + case SplitVector: { + // For example, i32 = BIT_CONVERT v2i16 on alpha. Convert the split + // pieces of the input into integers and reassemble in the final type. + SDValue Lo, Hi; + GetSplitVector(N->getOperand(0), Lo, Hi); + Lo = BitConvertToInteger(Lo); + Hi = BitConvertToInteger(Hi); + + if (TLI.isBigEndian()) + std::swap(Lo, Hi); + + InOp = DAG.getNode(ISD::ANY_EXTEND, dl, + MVT::getIntegerVT(NOutVT.getSizeInBits()), + JoinIntegers(Lo, Hi)); + return DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, InOp); + } + case WidenVector: + if (OutVT.bitsEq(NInVT)) + // The input is widened to the same size. Convert to the widened value. + return DAG.getNode(ISD::BIT_CONVERT, dl, OutVT, GetWidenedVector(InOp)); + } + + // Otherwise, lower the bit-convert to a store/load from the stack. + // Create the stack frame object. Make sure it is aligned for both + // the source and destination types. + SDValue FIPtr = DAG.CreateStackTemporary(InVT, OutVT); + int FI = cast(FIPtr.getNode())->getIndex(); + const Value *SV = PseudoSourceValue::getFixedStack(FI); + + // Emit a store to the stack slot. + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, FIPtr, SV, 0); + + // Result is an extending load from the stack slot. + return DAG.getExtLoad(ISD::EXTLOAD, dl, NOutVT, Store, FIPtr, SV, 0, OutVT); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) { + SDValue Op = GetPromotedInteger(N->getOperand(0)); + MVT OVT = N->getValueType(0); + MVT NVT = Op.getValueType(); + DebugLoc dl = N->getDebugLoc(); + + unsigned DiffBits = NVT.getSizeInBits() - OVT.getSizeInBits(); + return DAG.getNode(ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op), + DAG.getConstant(DiffBits, TLI.getPointerTy())); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_PAIR(SDNode *N) { + // The pair element type may be legal, or may not promote to the same type as + // the result, for example i14 = BUILD_PAIR (i7, i7). Handle all cases. + return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), + TLI.getTypeToTransformTo(N->getValueType(0)), + JoinIntegers(N->getOperand(0), N->getOperand(1))); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_Constant(SDNode *N) { + MVT VT = N->getValueType(0); + // FIXME there is no actual debug info here + DebugLoc dl = N->getDebugLoc(); + // Zero extend things like i1, sign extend everything else. It shouldn't + // matter in theory which one we pick, but this tends to give better code? + unsigned Opc = VT.isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; + SDValue Result = DAG.getNode(Opc, dl, TLI.getTypeToTransformTo(VT), + SDValue(N, 0)); + assert(isa(Result) && "Didn't constant fold ext?"); + return Result; +} + +SDValue DAGTypeLegalizer::PromoteIntRes_CONVERT_RNDSAT(SDNode *N) { + ISD::CvtCode CvtCode = cast(N)->getCvtCode(); + assert ((CvtCode == ISD::CVT_SS || CvtCode == ISD::CVT_SU || + CvtCode == ISD::CVT_US || CvtCode == ISD::CVT_UU || + CvtCode == ISD::CVT_SF || CvtCode == ISD::CVT_UF) && + "can only promote integers"); + MVT OutVT = TLI.getTypeToTransformTo(N->getValueType(0)); + return DAG.getConvertRndSat(OutVT, N->getDebugLoc(), N->getOperand(0), + N->getOperand(1), N->getOperand(2), + N->getOperand(3), N->getOperand(4), CvtCode); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) { + // Zero extend to the promoted type and do the count there. + SDValue Op = ZExtPromotedInteger(N->getOperand(0)); + DebugLoc dl = N->getDebugLoc(); + MVT OVT = N->getValueType(0); + MVT NVT = Op.getValueType(); + Op = DAG.getNode(ISD::CTLZ, dl, NVT, Op); + // Subtract off the extra leading bits in the bigger type. + return DAG.getNode(ISD::SUB, dl, NVT, Op, + DAG.getConstant(NVT.getSizeInBits() - + OVT.getSizeInBits(), NVT)); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_CTPOP(SDNode *N) { + // Zero extend to the promoted type and do the count there. + SDValue Op = ZExtPromotedInteger(N->getOperand(0)); + return DAG.getNode(ISD::CTPOP, N->getDebugLoc(), Op.getValueType(), Op); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) { + SDValue Op = GetPromotedInteger(N->getOperand(0)); + MVT OVT = N->getValueType(0); + MVT NVT = Op.getValueType(); + DebugLoc dl = N->getDebugLoc(); + // The count is the same in the promoted type except if the original + // value was zero. This can be handled by setting the bit just off + // the top of the original type. + APInt TopBit(NVT.getSizeInBits(), 0); + TopBit.set(OVT.getSizeInBits()); + Op = DAG.getNode(ISD::OR, dl, NVT, Op, DAG.getConstant(TopBit, NVT)); + return DAG.getNode(ISD::CTTZ, dl, NVT, Op); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N) { + MVT OldVT = N->getValueType(0); + SDValue OldVec = N->getOperand(0); + if (getTypeAction(OldVec.getValueType()) == WidenVector) + OldVec = GetWidenedVector(N->getOperand(0)); + unsigned OldElts = OldVec.getValueType().getVectorNumElements(); + DebugLoc dl = N->getDebugLoc(); + + if (OldElts == 1) { + assert(!isTypeLegal(OldVec.getValueType()) && + "Legal one-element vector of a type needing promotion!"); + // It is tempting to follow GetScalarizedVector by a call to + // GetPromotedInteger, but this would be wrong because the + // scalarized value may not yet have been processed. + return DAG.getNode(ISD::ANY_EXTEND, dl, TLI.getTypeToTransformTo(OldVT), + GetScalarizedVector(OldVec)); + } + + // Convert to a vector half as long with an element type of twice the width, + // for example <4 x i16> -> <2 x i32>. + assert(!(OldElts & 1) && "Odd length vectors not supported!"); + MVT NewVT = MVT::getIntegerVT(2 * OldVT.getSizeInBits()); + assert(OldVT.isSimple() && NewVT.isSimple()); + + SDValue NewVec = DAG.getNode(ISD::BIT_CONVERT, dl, + MVT::getVectorVT(NewVT, OldElts / 2), + OldVec); + + // Extract the element at OldIdx / 2 from the new vector. + SDValue OldIdx = N->getOperand(1); + SDValue NewIdx = DAG.getNode(ISD::SRL, dl, OldIdx.getValueType(), OldIdx, + DAG.getConstant(1, TLI.getPointerTy())); + SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, NewVec, NewIdx); + + // Select the appropriate half of the element: Lo if OldIdx was even, + // Hi if it was odd. + SDValue Lo = Elt; + SDValue Hi = DAG.getNode(ISD::SRL, dl, NewVT, Elt, + DAG.getConstant(OldVT.getSizeInBits(), + TLI.getPointerTy())); + if (TLI.isBigEndian()) + std::swap(Lo, Hi); + + // Extend to the promoted type. + SDValue Odd = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, OldIdx); + SDValue Res = DAG.getNode(ISD::SELECT, dl, NewVT, Odd, Hi, Lo); + return DAG.getNode(ISD::ANY_EXTEND, dl, TLI.getTypeToTransformTo(OldVT), Res); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + unsigned NewOpc = N->getOpcode(); + DebugLoc dl = N->getDebugLoc(); + + // If we're promoting a UINT to a larger size, check to see if the new node + // will be legal. If it isn't, check to see if FP_TO_SINT is legal, since + // we can use that instead. This allows us to generate better code for + // FP_TO_UINT for small destination sizes on targets where FP_TO_UINT is not + // legal, such as PowerPC. + if (N->getOpcode() == ISD::FP_TO_UINT && + !TLI.isOperationLegalOrCustom(ISD::FP_TO_UINT, NVT) && + TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT)) + NewOpc = ISD::FP_TO_SINT; + + SDValue Res = DAG.getNode(NewOpc, dl, NVT, N->getOperand(0)); + + // Assert that the converted value fits in the original type. If it doesn't + // (eg: because the value being converted is too big), then the result of the + // original operation was undefined anyway, so the assert is still correct. + return DAG.getNode(N->getOpcode() == ISD::FP_TO_UINT ? + ISD::AssertZext : ISD::AssertSext, dl, + NVT, Res, DAG.getValueType(N->getValueType(0))); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_INT_EXTEND(SDNode *N) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + DebugLoc dl = N->getDebugLoc(); + + if (getTypeAction(N->getOperand(0).getValueType()) == PromoteInteger) { + SDValue Res = GetPromotedInteger(N->getOperand(0)); + assert(Res.getValueType().bitsLE(NVT) && "Extension doesn't make sense!"); + + // If the result and operand types are the same after promotion, simplify + // to an in-register extension. + if (NVT == Res.getValueType()) { + // The high bits are not guaranteed to be anything. Insert an extend. + if (N->getOpcode() == ISD::SIGN_EXTEND) + return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NVT, Res, + DAG.getValueType(N->getOperand(0).getValueType())); + if (N->getOpcode() == ISD::ZERO_EXTEND) + return DAG.getZeroExtendInReg(Res, dl, N->getOperand(0).getValueType()); + assert(N->getOpcode() == ISD::ANY_EXTEND && "Unknown integer extension!"); + return Res; + } + } + + // Otherwise, just extend the original operand all the way to the larger type. + return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0)); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_LOAD(LoadSDNode *N) { + assert(ISD::isUNINDEXEDLoad(N) && "Indexed load during type legalization!"); + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + ISD::LoadExtType ExtType = + ISD::isNON_EXTLoad(N) ? ISD::EXTLOAD : N->getExtensionType(); + DebugLoc dl = N->getDebugLoc(); + SDValue Res = DAG.getExtLoad(ExtType, dl, NVT, N->getChain(), N->getBasePtr(), + N->getSrcValue(), N->getSrcValueOffset(), + N->getMemoryVT(), N->isVolatile(), + N->getAlignment()); + + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + return Res; +} + +/// Promote the overflow flag of an overflowing arithmetic node. +SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) { + // Simply change the return type of the boolean result. + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(1)); + MVT ValueVTs[] = { N->getValueType(0), NVT }; + SDValue Ops[] = { N->getOperand(0), N->getOperand(1) }; + SDValue Res = DAG.getNode(N->getOpcode(), N->getDebugLoc(), + DAG.getVTList(ValueVTs, 2), Ops, 2); + + // Modified the sum result - switch anything that used the old sum to use + // the new one. + ReplaceValueWith(SDValue(N, 0), Res); + + return SDValue(Res.getNode(), 1); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo) { + if (ResNo == 1) + return PromoteIntRes_Overflow(N); + + // The operation overflowed iff the result in the larger type is not the + // sign extension of its truncation to the original type. + SDValue LHS = SExtPromotedInteger(N->getOperand(0)); + SDValue RHS = SExtPromotedInteger(N->getOperand(1)); + MVT OVT = N->getOperand(0).getValueType(); + MVT NVT = LHS.getValueType(); + DebugLoc dl = N->getDebugLoc(); + + // Do the arithmetic in the larger type. + unsigned Opcode = N->getOpcode() == ISD::SADDO ? ISD::ADD : ISD::SUB; + SDValue Res = DAG.getNode(Opcode, dl, NVT, LHS, RHS); + + // Calculate the overflow flag: sign extend the arithmetic result from + // the original type. + SDValue Ofl = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NVT, Res, + DAG.getValueType(OVT)); + // Overflowed if and only if this is not equal to Res. + Ofl = DAG.getSetCC(dl, N->getValueType(1), Ofl, Res, ISD::SETNE); + + // Use the calculated overflow everywhere. + ReplaceValueWith(SDValue(N, 1), Ofl); + + return Res; +} + +SDValue DAGTypeLegalizer::PromoteIntRes_SDIV(SDNode *N) { + // Sign extend the input. + SDValue LHS = SExtPromotedInteger(N->getOperand(0)); + SDValue RHS = SExtPromotedInteger(N->getOperand(1)); + return DAG.getNode(N->getOpcode(), N->getDebugLoc(), + LHS.getValueType(), LHS, RHS); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_SELECT(SDNode *N) { + SDValue LHS = GetPromotedInteger(N->getOperand(1)); + SDValue RHS = GetPromotedInteger(N->getOperand(2)); + return DAG.getNode(ISD::SELECT, N->getDebugLoc(), + LHS.getValueType(), N->getOperand(0),LHS,RHS); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_SELECT_CC(SDNode *N) { + SDValue LHS = GetPromotedInteger(N->getOperand(2)); + SDValue RHS = GetPromotedInteger(N->getOperand(3)); + return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), + LHS.getValueType(), N->getOperand(0), + N->getOperand(1), LHS, RHS, N->getOperand(4)); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) { + MVT SVT = TLI.getSetCCResultType(N->getOperand(0).getValueType()); + assert(isTypeLegal(SVT) && "Illegal SetCC type!"); + DebugLoc dl = N->getDebugLoc(); + + // Get the SETCC result using the canonical SETCC type. + SDValue SetCC = DAG.getNode(ISD::SETCC, dl, SVT, N->getOperand(0), + N->getOperand(1), N->getOperand(2)); + + // Convert to the expected type. + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + assert(NVT.bitsLE(SVT) && "Integer type overpromoted?"); + return DAG.getNode(ISD::TRUNCATE, dl, NVT, SetCC); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N) { + return DAG.getNode(ISD::SHL, N->getDebugLoc(), + TLI.getTypeToTransformTo(N->getValueType(0)), + GetPromotedInteger(N->getOperand(0)), N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N) { + SDValue Op = GetPromotedInteger(N->getOperand(0)); + return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), + Op.getValueType(), Op, N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_SimpleIntBinOp(SDNode *N) { + // The input may have strange things in the top bits of the registers, but + // these operations don't care. They may have weird bits going out, but + // that too is okay if they are integer operations. + SDValue LHS = GetPromotedInteger(N->getOperand(0)); + SDValue RHS = GetPromotedInteger(N->getOperand(1)); + return DAG.getNode(N->getOpcode(), N->getDebugLoc(), + LHS.getValueType(), LHS, RHS); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_SRA(SDNode *N) { + // The input value must be properly sign extended. + SDValue Res = SExtPromotedInteger(N->getOperand(0)); + return DAG.getNode(ISD::SRA, N->getDebugLoc(), + Res.getValueType(), Res, N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_SRL(SDNode *N) { + // The input value must be properly zero extended. + MVT VT = N->getValueType(0); + MVT NVT = TLI.getTypeToTransformTo(VT); + SDValue Res = ZExtPromotedInteger(N->getOperand(0)); + return DAG.getNode(ISD::SRL, N->getDebugLoc(), NVT, Res, N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + SDValue Res; + + switch (getTypeAction(N->getOperand(0).getValueType())) { + default: assert(0 && "Unknown type action!"); + case Legal: + case ExpandInteger: + Res = N->getOperand(0); + break; + case PromoteInteger: + Res = GetPromotedInteger(N->getOperand(0)); + break; + } + + // Truncate to NVT instead of VT + return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), NVT, Res); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo) { + if (ResNo == 1) + return PromoteIntRes_Overflow(N); + + // The operation overflowed iff the result in the larger type is not the + // zero extension of its truncation to the original type. + SDValue LHS = ZExtPromotedInteger(N->getOperand(0)); + SDValue RHS = ZExtPromotedInteger(N->getOperand(1)); + MVT OVT = N->getOperand(0).getValueType(); + MVT NVT = LHS.getValueType(); + DebugLoc dl = N->getDebugLoc(); + + // Do the arithmetic in the larger type. + unsigned Opcode = N->getOpcode() == ISD::UADDO ? ISD::ADD : ISD::SUB; + SDValue Res = DAG.getNode(Opcode, dl, NVT, LHS, RHS); + + // Calculate the overflow flag: zero extend the arithmetic result from + // the original type. + SDValue Ofl = DAG.getZeroExtendInReg(Res, dl, OVT); + // Overflowed if and only if this is not equal to Res. + Ofl = DAG.getSetCC(dl, N->getValueType(1), Ofl, Res, ISD::SETNE); + + // Use the calculated overflow everywhere. + ReplaceValueWith(SDValue(N, 1), Ofl); + + return Res; +} + +SDValue DAGTypeLegalizer::PromoteIntRes_UDIV(SDNode *N) { + // Zero extend the input. + SDValue LHS = ZExtPromotedInteger(N->getOperand(0)); + SDValue RHS = ZExtPromotedInteger(N->getOperand(1)); + return DAG.getNode(N->getOpcode(), N->getDebugLoc(), + LHS.getValueType(), LHS, RHS); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_UNDEF(SDNode *N) { + return DAG.getUNDEF(TLI.getTypeToTransformTo(N->getValueType(0))); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) { + SDValue Chain = N->getOperand(0); // Get the chain. + SDValue Ptr = N->getOperand(1); // Get the pointer. + MVT VT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); + + MVT RegVT = TLI.getRegisterType(VT); + unsigned NumRegs = TLI.getNumRegisters(VT); + // The argument is passed as NumRegs registers of type RegVT. + + SmallVector Parts(NumRegs); + for (unsigned i = 0; i < NumRegs; ++i) { + Parts[i] = DAG.getVAArg(RegVT, dl, Chain, Ptr, N->getOperand(2)); + Chain = Parts[i].getValue(1); + } + + // Handle endianness of the load. + if (TLI.isBigEndian()) + std::reverse(Parts.begin(), Parts.end()); + + // Assemble the parts in the promoted type. + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + SDValue Res = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Parts[0]); + for (unsigned i = 1; i < NumRegs; ++i) { + SDValue Part = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Parts[i]); + // Shift it to the right position and "or" it in. + Part = DAG.getNode(ISD::SHL, dl, NVT, Part, + DAG.getConstant(i * RegVT.getSizeInBits(), + TLI.getPointerTy())); + Res = DAG.getNode(ISD::OR, dl, NVT, Res, Part); + } + + // Modified the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Chain); + + return Res; +} + +SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) { + assert(ResNo == 1 && "Only boolean result promotion currently supported!"); + return PromoteIntRes_Overflow(N); +} + +//===----------------------------------------------------------------------===// +// Integer Operand Promotion +//===----------------------------------------------------------------------===// + +/// PromoteIntegerOperand - This method is called when the specified operand of +/// the specified node is found to need promotion. At this point, all of the +/// result types of the node are known to be legal, but other operands of the +/// node may need promotion or expansion as well as the specified one. +bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { + DEBUG(cerr << "Promote integer operand: "; N->dump(&DAG); cerr << "\n"); + SDValue Res = SDValue(); + + if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) + return false; + + switch (N->getOpcode()) { + default: + #ifndef NDEBUG + cerr << "PromoteIntegerOperand Op #" << OpNo << ": "; + N->dump(&DAG); cerr << "\n"; + #endif + assert(0 && "Do not know how to promote this operator's operand!"); + abort(); + + case ISD::ANY_EXTEND: Res = PromoteIntOp_ANY_EXTEND(N); break; + case ISD::BIT_CONVERT: Res = PromoteIntOp_BIT_CONVERT(N); break; + case ISD::BR_CC: Res = PromoteIntOp_BR_CC(N, OpNo); break; + case ISD::BRCOND: Res = PromoteIntOp_BRCOND(N, OpNo); break; + case ISD::BUILD_PAIR: Res = PromoteIntOp_BUILD_PAIR(N); break; + case ISD::BUILD_VECTOR: Res = PromoteIntOp_BUILD_VECTOR(N); break; + case ISD::CONVERT_RNDSAT: + Res = PromoteIntOp_CONVERT_RNDSAT(N); break; + case ISD::INSERT_VECTOR_ELT: + Res = PromoteIntOp_INSERT_VECTOR_ELT(N, OpNo);break; + case ISD::MEMBARRIER: Res = PromoteIntOp_MEMBARRIER(N); break; + case ISD::SCALAR_TO_VECTOR: + Res = PromoteIntOp_SCALAR_TO_VECTOR(N); break; + case ISD::SELECT: Res = PromoteIntOp_SELECT(N, OpNo); break; + case ISD::SELECT_CC: Res = PromoteIntOp_SELECT_CC(N, OpNo); break; + case ISD::SETCC: Res = PromoteIntOp_SETCC(N, OpNo); break; + case ISD::SIGN_EXTEND: Res = PromoteIntOp_SIGN_EXTEND(N); break; + case ISD::SINT_TO_FP: Res = PromoteIntOp_SINT_TO_FP(N); break; + case ISD::STORE: Res = PromoteIntOp_STORE(cast(N), + OpNo); break; + case ISD::TRUNCATE: Res = PromoteIntOp_TRUNCATE(N); break; + case ISD::UINT_TO_FP: Res = PromoteIntOp_UINT_TO_FP(N); break; + case ISD::ZERO_EXTEND: Res = PromoteIntOp_ZERO_EXTEND(N); break; + + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + case ISD::ROTL: + case ISD::ROTR: Res = PromoteIntOp_Shift(N); break; + } + + // If the result is null, the sub-method took care of registering results etc. + if (!Res.getNode()) return false; + + // If the result is N, the sub-method updated N in place. Tell the legalizer + // core about this. + if (Res.getNode() == N) + return true; + + assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 && + "Invalid operand expansion"); + + ReplaceValueWith(SDValue(N, 0), Res); + return false; +} + +/// PromoteSetCCOperands - Promote the operands of a comparison. This code is +/// shared among BR_CC, SELECT_CC, and SETCC handlers. +void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &NewLHS,SDValue &NewRHS, + ISD::CondCode CCCode) { + // We have to insert explicit sign or zero extends. Note that we could + // insert sign extends for ALL conditions, but zero extend is cheaper on + // many machines (an AND instead of two shifts), so prefer it. + switch (CCCode) { + default: assert(0 && "Unknown integer comparison!"); + case ISD::SETEQ: + case ISD::SETNE: + case ISD::SETUGE: + case ISD::SETUGT: + case ISD::SETULE: + case ISD::SETULT: + // ALL of these operations will work if we either sign or zero extend + // the operands (including the unsigned comparisons!). Zero extend is + // usually a simpler/cheaper operation, so prefer it. + NewLHS = ZExtPromotedInteger(NewLHS); + NewRHS = ZExtPromotedInteger(NewRHS); + break; + case ISD::SETGE: + case ISD::SETGT: + case ISD::SETLT: + case ISD::SETLE: + NewLHS = SExtPromotedInteger(NewLHS); + NewRHS = SExtPromotedInteger(NewRHS); + break; + } +} + +SDValue DAGTypeLegalizer::PromoteIntOp_ANY_EXTEND(SDNode *N) { + SDValue Op = GetPromotedInteger(N->getOperand(0)); + return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), N->getValueType(0), Op); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_BIT_CONVERT(SDNode *N) { + // This should only occur in unusual situations like bitcasting to an + // x86_fp80, so just turn it into a store+load + return CreateStackStoreLoad(N->getOperand(0), N->getValueType(0)); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_BR_CC(SDNode *N, unsigned OpNo) { + assert(OpNo == 2 && "Don't know how to promote this operand!"); + + SDValue LHS = N->getOperand(2); + SDValue RHS = N->getOperand(3); + PromoteSetCCOperands(LHS, RHS, cast(N->getOperand(1))->get()); + + // The chain (Op#0), CC (#1) and basic block destination (Op#4) are always + // legal types. + return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0), + N->getOperand(1), LHS, RHS, N->getOperand(4)); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo) { + assert(OpNo == 1 && "only know how to promote condition"); + + // Promote all the way up to the canonical SetCC type. + MVT SVT = TLI.getSetCCResultType(MVT::Other); + SDValue Cond = PromoteTargetBoolean(N->getOperand(1), SVT); + + // The chain (Op#0) and basic block destination (Op#2) are always legal types. + return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0), Cond, + N->getOperand(2)); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_PAIR(SDNode *N) { + // Since the result type is legal, the operands must promote to it. + MVT OVT = N->getOperand(0).getValueType(); + SDValue Lo = ZExtPromotedInteger(N->getOperand(0)); + SDValue Hi = GetPromotedInteger(N->getOperand(1)); + assert(Lo.getValueType() == N->getValueType(0) && "Operand over promoted?"); + DebugLoc dl = N->getDebugLoc(); + + Hi = DAG.getNode(ISD::SHL, dl, N->getValueType(0), Hi, + DAG.getConstant(OVT.getSizeInBits(), TLI.getPointerTy())); + return DAG.getNode(ISD::OR, dl, N->getValueType(0), Lo, Hi); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_VECTOR(SDNode *N) { + // The vector type is legal but the element type is not. This implies + // that the vector is a power-of-two in length and that the element + // type does not have a strange size (eg: it is not i1). + MVT VecVT = N->getValueType(0); + unsigned NumElts = VecVT.getVectorNumElements(); + assert(!(NumElts & 1) && "Legal vector of one illegal element?"); + + // Promote the inserted value. The type does not need to match the + // vector element type. Check that any extra bits introduced will be + // truncated away. + assert(N->getOperand(0).getValueType().getSizeInBits() >= + N->getValueType(0).getVectorElementType().getSizeInBits() && + "Type of inserted value narrower than vector element type!"); + + SmallVector NewOps; + for (unsigned i = 0; i < NumElts; ++i) + NewOps.push_back(GetPromotedInteger(N->getOperand(i))); + + return DAG.UpdateNodeOperands(SDValue(N, 0), &NewOps[0], NumElts); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_CONVERT_RNDSAT(SDNode *N) { + ISD::CvtCode CvtCode = cast(N)->getCvtCode(); + assert ((CvtCode == ISD::CVT_SS || CvtCode == ISD::CVT_SU || + CvtCode == ISD::CVT_US || CvtCode == ISD::CVT_UU || + CvtCode == ISD::CVT_FS || CvtCode == ISD::CVT_FU) && + "can only promote integer arguments"); + SDValue InOp = GetPromotedInteger(N->getOperand(0)); + return DAG.getConvertRndSat(N->getValueType(0), N->getDebugLoc(), InOp, + N->getOperand(1), N->getOperand(2), + N->getOperand(3), N->getOperand(4), CvtCode); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N, + unsigned OpNo) { + if (OpNo == 1) { + // Promote the inserted value. This is valid because the type does not + // have to match the vector element type. + + // Check that any extra bits introduced will be truncated away. + assert(N->getOperand(1).getValueType().getSizeInBits() >= + N->getValueType(0).getVectorElementType().getSizeInBits() && + "Type of inserted value narrower than vector element type!"); + return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0), + GetPromotedInteger(N->getOperand(1)), + N->getOperand(2)); + } + + assert(OpNo == 2 && "Different operand and result vector types?"); + + // Promote the index. + SDValue Idx = ZExtPromotedInteger(N->getOperand(2)); + return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0), + N->getOperand(1), Idx); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_MEMBARRIER(SDNode *N) { + SDValue NewOps[6]; + DebugLoc dl = N->getDebugLoc(); + NewOps[0] = N->getOperand(0); + for (unsigned i = 1; i < array_lengthof(NewOps); ++i) { + SDValue Flag = GetPromotedInteger(N->getOperand(i)); + NewOps[i] = DAG.getZeroExtendInReg(Flag, dl, MVT::i1); + } + return DAG.UpdateNodeOperands(SDValue (N, 0), NewOps, + array_lengthof(NewOps)); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N) { + // Integer SCALAR_TO_VECTOR operands are implicitly truncated, so just promote + // the operand in place. + return DAG.UpdateNodeOperands(SDValue(N, 0), + GetPromotedInteger(N->getOperand(0))); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_SELECT(SDNode *N, unsigned OpNo) { + assert(OpNo == 0 && "Only know how to promote condition"); + + // Promote all the way up to the canonical SetCC type. + MVT SVT = TLI.getSetCCResultType(N->getOperand(1).getValueType()); + SDValue Cond = PromoteTargetBoolean(N->getOperand(0), SVT); + + return DAG.UpdateNodeOperands(SDValue(N, 0), Cond, + N->getOperand(1), N->getOperand(2)); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo) { + assert(OpNo == 0 && "Don't know how to promote this operand!"); + + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + PromoteSetCCOperands(LHS, RHS, cast(N->getOperand(4))->get()); + + // The CC (#4) and the possible return values (#2 and #3) have legal types. + return DAG.UpdateNodeOperands(SDValue(N, 0), LHS, RHS, N->getOperand(2), + N->getOperand(3), N->getOperand(4)); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_SETCC(SDNode *N, unsigned OpNo) { + assert(OpNo == 0 && "Don't know how to promote this operand!"); + + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + PromoteSetCCOperands(LHS, RHS, cast(N->getOperand(2))->get()); + + // The CC (#2) is always legal. + return DAG.UpdateNodeOperands(SDValue(N, 0), LHS, RHS, N->getOperand(2)); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_Shift(SDNode *N) { + return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0), + ZExtPromotedInteger(N->getOperand(1))); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_SIGN_EXTEND(SDNode *N) { + SDValue Op = GetPromotedInteger(N->getOperand(0)); + DebugLoc dl = N->getDebugLoc(); + Op = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Op); + return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Op.getValueType(), + Op, DAG.getValueType(N->getOperand(0).getValueType())); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_SINT_TO_FP(SDNode *N) { + return DAG.UpdateNodeOperands(SDValue(N, 0), + SExtPromotedInteger(N->getOperand(0))); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){ + assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!"); + SDValue Ch = N->getChain(), Ptr = N->getBasePtr(); + int SVOffset = N->getSrcValueOffset(); + unsigned Alignment = N->getAlignment(); + bool isVolatile = N->isVolatile(); + DebugLoc dl = N->getDebugLoc(); + + SDValue Val = GetPromotedInteger(N->getValue()); // Get promoted value. + + // Truncate the value and store the result. + return DAG.getTruncStore(Ch, dl, Val, Ptr, N->getSrcValue(), + SVOffset, N->getMemoryVT(), + isVolatile, Alignment); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_TRUNCATE(SDNode *N) { + SDValue Op = GetPromotedInteger(N->getOperand(0)); + return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), N->getValueType(0), Op); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_UINT_TO_FP(SDNode *N) { + return DAG.UpdateNodeOperands(SDValue(N, 0), + ZExtPromotedInteger(N->getOperand(0))); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_ZERO_EXTEND(SDNode *N) { + DebugLoc dl = N->getDebugLoc(); + SDValue Op = GetPromotedInteger(N->getOperand(0)); + Op = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Op); + return DAG.getZeroExtendInReg(Op, dl, N->getOperand(0).getValueType()); +} + + +//===----------------------------------------------------------------------===// +// Integer Result Expansion +//===----------------------------------------------------------------------===// + +/// ExpandIntegerResult - This method is called when the specified result of the +/// specified node is found to need expansion. At this point, the node may also +/// have invalid operands or may have other results that need promotion, we just +/// know that (at least) one result needs expansion. +void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { + DEBUG(cerr << "Expand integer result: "; N->dump(&DAG); cerr << "\n"); + SDValue Lo, Hi; + Lo = Hi = SDValue(); + + // See if the target wants to custom expand this node. + if (CustomLowerNode(N, N->getValueType(ResNo), true)) + return; + + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + cerr << "ExpandIntegerResult #" << ResNo << ": "; + N->dump(&DAG); cerr << "\n"; +#endif + assert(0 && "Do not know how to expand the result of this operator!"); + abort(); + + case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, Lo, Hi); break; + case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break; + case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break; + case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break; + + case ISD::BIT_CONVERT: ExpandRes_BIT_CONVERT(N, Lo, Hi); break; + case ISD::BUILD_PAIR: ExpandRes_BUILD_PAIR(N, Lo, Hi); break; + case ISD::EXTRACT_ELEMENT: ExpandRes_EXTRACT_ELEMENT(N, Lo, Hi); break; + case ISD::EXTRACT_VECTOR_ELT: ExpandRes_EXTRACT_VECTOR_ELT(N, Lo, Hi); break; + case ISD::VAARG: ExpandRes_VAARG(N, Lo, Hi); break; + + case ISD::ANY_EXTEND: ExpandIntRes_ANY_EXTEND(N, Lo, Hi); break; + case ISD::AssertSext: ExpandIntRes_AssertSext(N, Lo, Hi); break; + case ISD::AssertZext: ExpandIntRes_AssertZext(N, Lo, Hi); break; + case ISD::BSWAP: ExpandIntRes_BSWAP(N, Lo, Hi); break; + case ISD::Constant: ExpandIntRes_Constant(N, Lo, Hi); break; + case ISD::CTLZ: ExpandIntRes_CTLZ(N, Lo, Hi); break; + case ISD::CTPOP: ExpandIntRes_CTPOP(N, Lo, Hi); break; + case ISD::CTTZ: ExpandIntRes_CTTZ(N, Lo, Hi); break; + case ISD::FP_TO_SINT: ExpandIntRes_FP_TO_SINT(N, Lo, Hi); break; + case ISD::FP_TO_UINT: ExpandIntRes_FP_TO_UINT(N, Lo, Hi); break; + case ISD::LOAD: ExpandIntRes_LOAD(cast(N), Lo, Hi); break; + case ISD::MUL: ExpandIntRes_MUL(N, Lo, Hi); break; + case ISD::SDIV: ExpandIntRes_SDIV(N, Lo, Hi); break; + case ISD::SIGN_EXTEND: ExpandIntRes_SIGN_EXTEND(N, Lo, Hi); break; + case ISD::SIGN_EXTEND_INREG: ExpandIntRes_SIGN_EXTEND_INREG(N, Lo, Hi); break; + case ISD::SREM: ExpandIntRes_SREM(N, Lo, Hi); break; + case ISD::TRUNCATE: ExpandIntRes_TRUNCATE(N, Lo, Hi); break; + case ISD::UDIV: ExpandIntRes_UDIV(N, Lo, Hi); break; + case ISD::UREM: ExpandIntRes_UREM(N, Lo, Hi); break; + case ISD::ZERO_EXTEND: ExpandIntRes_ZERO_EXTEND(N, Lo, Hi); break; + + case ISD::AND: + case ISD::OR: + case ISD::XOR: ExpandIntRes_Logical(N, Lo, Hi); break; + + case ISD::ADD: + case ISD::SUB: ExpandIntRes_ADDSUB(N, Lo, Hi); break; + + case ISD::ADDC: + case ISD::SUBC: ExpandIntRes_ADDSUBC(N, Lo, Hi); break; + + case ISD::ADDE: + case ISD::SUBE: ExpandIntRes_ADDSUBE(N, Lo, Hi); break; + + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: ExpandIntRes_Shift(N, Lo, Hi); break; + } + + // If Lo/Hi is null, the sub-method took care of registering results etc. + if (Lo.getNode()) + SetExpandedInteger(SDValue(N, ResNo), Lo, Hi); +} + +/// ExpandShiftByConstant - N is a shift by a value that needs to be expanded, +/// and the shift amount is a constant 'Amt'. Expand the operation. +void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt, + SDValue &Lo, SDValue &Hi) { + DebugLoc dl = N->getDebugLoc(); + // Expand the incoming operand to be shifted, so that we have its parts + SDValue InL, InH; + GetExpandedInteger(N->getOperand(0), InL, InH); + + MVT NVT = InL.getValueType(); + unsigned VTBits = N->getValueType(0).getSizeInBits(); + unsigned NVTBits = NVT.getSizeInBits(); + MVT ShTy = N->getOperand(1).getValueType(); + + if (N->getOpcode() == ISD::SHL) { + if (Amt > VTBits) { + Lo = Hi = DAG.getConstant(0, NVT); + } else if (Amt > NVTBits) { + Lo = DAG.getConstant(0, NVT); + Hi = DAG.getNode(ISD::SHL, dl, + NVT, InL, DAG.getConstant(Amt-NVTBits,ShTy)); + } else if (Amt == NVTBits) { + Lo = DAG.getConstant(0, NVT); + Hi = InL; + } else if (Amt == 1 && + TLI.isOperationLegalOrCustom(ISD::ADDC, + TLI.getTypeToExpandTo(NVT))) { + // Emit this X << 1 as X+X. + SDVTList VTList = DAG.getVTList(NVT, MVT::Flag); + SDValue LoOps[2] = { InL, InL }; + Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps, 2); + SDValue HiOps[3] = { InH, InH, Lo.getValue(1) }; + Hi = DAG.getNode(ISD::ADDE, dl, VTList, HiOps, 3); + } else { + Lo = DAG.getNode(ISD::SHL, dl, NVT, InL, DAG.getConstant(Amt, ShTy)); + Hi = DAG.getNode(ISD::OR, dl, NVT, + DAG.getNode(ISD::SHL, dl, NVT, InH, + DAG.getConstant(Amt, ShTy)), + DAG.getNode(ISD::SRL, dl, NVT, InL, + DAG.getConstant(NVTBits-Amt, ShTy))); + } + return; + } + + if (N->getOpcode() == ISD::SRL) { + if (Amt > VTBits) { + Lo = DAG.getConstant(0, NVT); + Hi = DAG.getConstant(0, NVT); + } else if (Amt > NVTBits) { + Lo = DAG.getNode(ISD::SRL, dl, + NVT, InH, DAG.getConstant(Amt-NVTBits,ShTy)); + Hi = DAG.getConstant(0, NVT); + } else if (Amt == NVTBits) { + Lo = InH; + Hi = DAG.getConstant(0, NVT); + } else { + Lo = DAG.getNode(ISD::OR, dl, NVT, + DAG.getNode(ISD::SRL, dl, NVT, InL, + DAG.getConstant(Amt, ShTy)), + DAG.getNode(ISD::SHL, dl, NVT, InH, + DAG.getConstant(NVTBits-Amt, ShTy))); + Hi = DAG.getNode(ISD::SRL, dl, NVT, InH, DAG.getConstant(Amt, ShTy)); + } + return; + } + + assert(N->getOpcode() == ISD::SRA && "Unknown shift!"); + if (Amt > VTBits) { + Hi = Lo = DAG.getNode(ISD::SRA, dl, NVT, InH, + DAG.getConstant(NVTBits-1, ShTy)); + } else if (Amt > NVTBits) { + Lo = DAG.getNode(ISD::SRA, dl, NVT, InH, + DAG.getConstant(Amt-NVTBits, ShTy)); + Hi = DAG.getNode(ISD::SRA, dl, NVT, InH, + DAG.getConstant(NVTBits-1, ShTy)); + } else if (Amt == NVTBits) { + Lo = InH; + Hi = DAG.getNode(ISD::SRA, dl, NVT, InH, + DAG.getConstant(NVTBits-1, ShTy)); + } else { + Lo = DAG.getNode(ISD::OR, dl, NVT, + DAG.getNode(ISD::SRL, dl, NVT, InL, + DAG.getConstant(Amt, ShTy)), + DAG.getNode(ISD::SHL, dl, NVT, InH, + DAG.getConstant(NVTBits-Amt, ShTy))); + Hi = DAG.getNode(ISD::SRA, dl, NVT, InH, DAG.getConstant(Amt, ShTy)); + } +} + +/// ExpandShiftWithKnownAmountBit - Try to determine whether we can simplify +/// this shift based on knowledge of the high bit of the shift amount. If we +/// can tell this, we know that it is >= 32 or < 32, without knowing the actual +/// shift amount. +bool DAGTypeLegalizer:: +ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { + SDValue Amt = N->getOperand(1); + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + MVT ShTy = Amt.getValueType(); + unsigned ShBits = ShTy.getSizeInBits(); + unsigned NVTBits = NVT.getSizeInBits(); + assert(isPowerOf2_32(NVTBits) && + "Expanded integer type size not a power of two!"); + DebugLoc dl = N->getDebugLoc(); + + APInt HighBitMask = APInt::getHighBitsSet(ShBits, ShBits - Log2_32(NVTBits)); + APInt KnownZero, KnownOne; + DAG.ComputeMaskedBits(N->getOperand(1), HighBitMask, KnownZero, KnownOne); + + // If we don't know anything about the high bits, exit. + if (((KnownZero|KnownOne) & HighBitMask) == 0) + return false; + + // Get the incoming operand to be shifted. + SDValue InL, InH; + GetExpandedInteger(N->getOperand(0), InL, InH); + + // If we know that any of the high bits of the shift amount are one, then we + // can do this as a couple of simple shifts. + if (KnownOne.intersects(HighBitMask)) { + // Mask out the high bit, which we know is set. + Amt = DAG.getNode(ISD::AND, dl, ShTy, Amt, + DAG.getConstant(~HighBitMask, ShTy)); + + switch (N->getOpcode()) { + default: assert(0 && "Unknown shift"); + case ISD::SHL: + Lo = DAG.getConstant(0, NVT); // Low part is zero. + Hi = DAG.getNode(ISD::SHL, dl, NVT, InL, Amt); // High part from Lo part. + return true; + case ISD::SRL: + Hi = DAG.getConstant(0, NVT); // Hi part is zero. + Lo = DAG.getNode(ISD::SRL, dl, NVT, InH, Amt); // Lo part from Hi part. + return true; + case ISD::SRA: + Hi = DAG.getNode(ISD::SRA, dl, NVT, InH, // Sign extend high part. + DAG.getConstant(NVTBits-1, ShTy)); + Lo = DAG.getNode(ISD::SRA, dl, NVT, InH, Amt); // Lo part from Hi part. + return true; + } + } + +#if 0 + // FIXME: This code is broken for shifts with a zero amount! + // If we know that all of the high bits of the shift amount are zero, then we + // can do this as a couple of simple shifts. + if ((KnownZero & HighBitMask) == HighBitMask) { + // Compute 32-amt. + SDValue Amt2 = DAG.getNode(ISD::SUB, ShTy, + DAG.getConstant(NVTBits, ShTy), + Amt); + unsigned Op1, Op2; + switch (N->getOpcode()) { + default: assert(0 && "Unknown shift"); + case ISD::SHL: Op1 = ISD::SHL; Op2 = ISD::SRL; break; + case ISD::SRL: + case ISD::SRA: Op1 = ISD::SRL; Op2 = ISD::SHL; break; + } + + Lo = DAG.getNode(N->getOpcode(), NVT, InL, Amt); + Hi = DAG.getNode(ISD::OR, NVT, + DAG.getNode(Op1, NVT, InH, Amt), + DAG.getNode(Op2, NVT, InL, Amt2)); + return true; + } +#endif + + return false; +} + +/// ExpandShiftWithUnknownAmountBit - Fully general expansion of integer shift +/// of any size. +bool DAGTypeLegalizer:: +ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { + SDValue Amt = N->getOperand(1); + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + MVT ShTy = Amt.getValueType(); + unsigned NVTBits = NVT.getSizeInBits(); + assert(isPowerOf2_32(NVTBits) && + "Expanded integer type size not a power of two!"); + DebugLoc dl = N->getDebugLoc(); + + // Get the incoming operand to be shifted. + SDValue InL, InH; + GetExpandedInteger(N->getOperand(0), InL, InH); + + SDValue NVBitsNode = DAG.getConstant(NVTBits, ShTy); + SDValue Amt2 = DAG.getNode(ISD::SUB, dl, ShTy, NVBitsNode, Amt); + SDValue Cmp = DAG.getSetCC(dl, TLI.getSetCCResultType(ShTy), + Amt, NVBitsNode, ISD::SETULT); + + SDValue Lo1, Hi1, Lo2, Hi2; + switch (N->getOpcode()) { + default: assert(0 && "Unknown shift"); + case ISD::SHL: + // ShAmt < NVTBits + Lo1 = DAG.getConstant(0, NVT); // Low part is zero. + Hi1 = DAG.getNode(ISD::SHL, dl, NVT, InL, Amt); // High part from Lo part. + + // ShAmt >= NVTBits + Lo2 = DAG.getNode(ISD::SHL, dl, NVT, InL, Amt); + Hi2 = DAG.getNode(ISD::OR, dl, NVT, + DAG.getNode(ISD::SHL, dl, NVT, InH, Amt), + DAG.getNode(ISD::SRL, dl, NVT, InL, Amt2)); + + Lo = DAG.getNode(ISD::SELECT, dl, NVT, Cmp, Lo1, Lo2); + Hi = DAG.getNode(ISD::SELECT, dl, NVT, Cmp, Hi1, Hi2); + return true; + case ISD::SRL: + // ShAmt < NVTBits + Hi1 = DAG.getConstant(0, NVT); // Hi part is zero. + Lo1 = DAG.getNode(ISD::SRL, dl, NVT, InH, Amt); // Lo part from Hi part. + + // ShAmt >= NVTBits + Hi2 = DAG.getNode(ISD::SRL, dl, NVT, InH, Amt); + Lo2 = DAG.getNode(ISD::OR, dl, NVT, + DAG.getNode(ISD::SRL, dl, NVT, InL, Amt), + DAG.getNode(ISD::SHL, dl, NVT, InH, Amt2)); + + Lo = DAG.getNode(ISD::SELECT, dl, NVT, Cmp, Lo1, Lo2); + Hi = DAG.getNode(ISD::SELECT, dl, NVT, Cmp, Hi1, Hi2); + return true; + case ISD::SRA: + // ShAmt < NVTBits + Hi1 = DAG.getNode(ISD::SRA, dl, NVT, InH, // Sign extend high part. + DAG.getConstant(NVTBits-1, ShTy)); + Lo1 = DAG.getNode(ISD::SRA, dl, NVT, InH, Amt); // Lo part from Hi part. + + // ShAmt >= NVTBits + Hi2 = DAG.getNode(ISD::SRA, dl, NVT, InH, Amt); + Lo2 = DAG.getNode(ISD::OR, dl, NVT, + DAG.getNode(ISD::SRL, dl, NVT, InL, Amt), + DAG.getNode(ISD::SHL, dl, NVT, InH, Amt2)); + + Lo = DAG.getNode(ISD::SELECT, dl, NVT, Cmp, Lo1, Lo2); + Hi = DAG.getNode(ISD::SELECT, dl, NVT, Cmp, Hi1, Hi2); + return true; + } + + return false; +} + +void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, + SDValue &Lo, SDValue &Hi) { + DebugLoc dl = N->getDebugLoc(); + // Expand the subcomponents. + SDValue LHSL, LHSH, RHSL, RHSH; + GetExpandedInteger(N->getOperand(0), LHSL, LHSH); + GetExpandedInteger(N->getOperand(1), RHSL, RHSH); + + MVT NVT = LHSL.getValueType(); + SDValue LoOps[2] = { LHSL, RHSL }; + SDValue HiOps[3] = { LHSH, RHSH }; + + // Do not generate ADDC/ADDE or SUBC/SUBE if the target does not support + // them. TODO: Teach operation legalization how to expand unsupported + // ADDC/ADDE/SUBC/SUBE. The problem is that these operations generate + // a carry of type MVT::Flag, but there doesn't seem to be any way to + // generate a value of this type in the expanded code sequence. + bool hasCarry = + TLI.isOperationLegalOrCustom(N->getOpcode() == ISD::ADD ? + ISD::ADDC : ISD::SUBC, + TLI.getTypeToExpandTo(NVT)); + + if (hasCarry) { + SDVTList VTList = DAG.getVTList(NVT, MVT::Flag); + if (N->getOpcode() == ISD::ADD) { + Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps, 2); + HiOps[2] = Lo.getValue(1); + Hi = DAG.getNode(ISD::ADDE, dl, VTList, HiOps, 3); + } else { + Lo = DAG.getNode(ISD::SUBC, dl, VTList, LoOps, 2); + HiOps[2] = Lo.getValue(1); + Hi = DAG.getNode(ISD::SUBE, dl, VTList, HiOps, 3); + } + } else { + if (N->getOpcode() == ISD::ADD) { + Lo = DAG.getNode(ISD::ADD, dl, NVT, LoOps, 2); + Hi = DAG.getNode(ISD::ADD, dl, NVT, HiOps, 2); + SDValue Cmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, LoOps[0], + ISD::SETULT); + SDValue Carry1 = DAG.getNode(ISD::SELECT, dl, NVT, Cmp1, + DAG.getConstant(1, NVT), + DAG.getConstant(0, NVT)); + SDValue Cmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, LoOps[1], + ISD::SETULT); + SDValue Carry2 = DAG.getNode(ISD::SELECT, dl, NVT, Cmp2, + DAG.getConstant(1, NVT), Carry1); + Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry2); + } else { + Lo = DAG.getNode(ISD::SUB, dl, NVT, LoOps, 2); + Hi = DAG.getNode(ISD::SUB, dl, NVT, HiOps, 2); + SDValue Cmp = + DAG.getSetCC(dl, TLI.getSetCCResultType(LoOps[0].getValueType()), + LoOps[0], LoOps[1], ISD::SETULT); + SDValue Borrow = DAG.getNode(ISD::SELECT, dl, NVT, Cmp, + DAG.getConstant(1, NVT), + DAG.getConstant(0, NVT)); + Hi = DAG.getNode(ISD::SUB, dl, NVT, Hi, Borrow); + } + } +} + +void DAGTypeLegalizer::ExpandIntRes_ADDSUBC(SDNode *N, + SDValue &Lo, SDValue &Hi) { + // Expand the subcomponents. + SDValue LHSL, LHSH, RHSL, RHSH; + DebugLoc dl = N->getDebugLoc(); + GetExpandedInteger(N->getOperand(0), LHSL, LHSH); + GetExpandedInteger(N->getOperand(1), RHSL, RHSH); + SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Flag); + SDValue LoOps[2] = { LHSL, RHSL }; + SDValue HiOps[3] = { LHSH, RHSH }; + + if (N->getOpcode() == ISD::ADDC) { + Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps, 2); + HiOps[2] = Lo.getValue(1); + Hi = DAG.getNode(ISD::ADDE, dl, VTList, HiOps, 3); + } else { + Lo = DAG.getNode(ISD::SUBC, dl, VTList, LoOps, 2); + HiOps[2] = Lo.getValue(1); + Hi = DAG.getNode(ISD::SUBE, dl, VTList, HiOps, 3); + } + + // Legalized the flag result - switch anything that used the old flag to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Hi.getValue(1)); +} + +void DAGTypeLegalizer::ExpandIntRes_ADDSUBE(SDNode *N, + SDValue &Lo, SDValue &Hi) { + // Expand the subcomponents. + SDValue LHSL, LHSH, RHSL, RHSH; + DebugLoc dl = N->getDebugLoc(); + GetExpandedInteger(N->getOperand(0), LHSL, LHSH); + GetExpandedInteger(N->getOperand(1), RHSL, RHSH); + SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Flag); + SDValue LoOps[3] = { LHSL, RHSL, N->getOperand(2) }; + SDValue HiOps[3] = { LHSH, RHSH }; + + Lo = DAG.getNode(N->getOpcode(), dl, VTList, LoOps, 3); + HiOps[2] = Lo.getValue(1); + Hi = DAG.getNode(N->getOpcode(), dl, VTList, HiOps, 3); + + // Legalized the flag result - switch anything that used the old flag to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Hi.getValue(1)); +} + +void DAGTypeLegalizer::ExpandIntRes_ANY_EXTEND(SDNode *N, + SDValue &Lo, SDValue &Hi) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + DebugLoc dl = N->getDebugLoc(); + SDValue Op = N->getOperand(0); + if (Op.getValueType().bitsLE(NVT)) { + // The low part is any extension of the input (which degenerates to a copy). + Lo = DAG.getNode(ISD::ANY_EXTEND, dl, NVT, Op); + Hi = DAG.getUNDEF(NVT); // The high part is undefined. + } else { + // For example, extension of an i48 to an i64. The operand type necessarily + // promotes to the result type, so will end up being expanded too. + assert(getTypeAction(Op.getValueType()) == PromoteInteger && + "Only know how to promote this result!"); + SDValue Res = GetPromotedInteger(Op); + assert(Res.getValueType() == N->getValueType(0) && + "Operand over promoted?"); + // Split the promoted operand. This will simplify when it is expanded. + SplitInteger(Res, Lo, Hi); + } +} + +void DAGTypeLegalizer::ExpandIntRes_AssertSext(SDNode *N, + SDValue &Lo, SDValue &Hi) { + DebugLoc dl = N->getDebugLoc(); + GetExpandedInteger(N->getOperand(0), Lo, Hi); + MVT NVT = Lo.getValueType(); + MVT EVT = cast(N->getOperand(1))->getVT(); + unsigned NVTBits = NVT.getSizeInBits(); + unsigned EVTBits = EVT.getSizeInBits(); + + if (NVTBits < EVTBits) { + Hi = DAG.getNode(ISD::AssertSext, dl, NVT, Hi, + DAG.getValueType(MVT::getIntegerVT(EVTBits - NVTBits))); + } else { + Lo = DAG.getNode(ISD::AssertSext, dl, NVT, Lo, DAG.getValueType(EVT)); + // The high part replicates the sign bit of Lo, make it explicit. + Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo, + DAG.getConstant(NVTBits-1, TLI.getPointerTy())); + } +} + +void DAGTypeLegalizer::ExpandIntRes_AssertZext(SDNode *N, + SDValue &Lo, SDValue &Hi) { + DebugLoc dl = N->getDebugLoc(); + GetExpandedInteger(N->getOperand(0), Lo, Hi); + MVT NVT = Lo.getValueType(); + MVT EVT = cast(N->getOperand(1))->getVT(); + unsigned NVTBits = NVT.getSizeInBits(); + unsigned EVTBits = EVT.getSizeInBits(); + + if (NVTBits < EVTBits) { + Hi = DAG.getNode(ISD::AssertZext, dl, NVT, Hi, + DAG.getValueType(MVT::getIntegerVT(EVTBits - NVTBits))); + } else { + Lo = DAG.getNode(ISD::AssertZext, dl, NVT, Lo, DAG.getValueType(EVT)); + // The high part must be zero, make it explicit. + Hi = DAG.getConstant(0, NVT); + } +} + +void DAGTypeLegalizer::ExpandIntRes_BSWAP(SDNode *N, + SDValue &Lo, SDValue &Hi) { + DebugLoc dl = N->getDebugLoc(); + GetExpandedInteger(N->getOperand(0), Hi, Lo); // Note swapped operands. + Lo = DAG.getNode(ISD::BSWAP, dl, Lo.getValueType(), Lo); + Hi = DAG.getNode(ISD::BSWAP, dl, Hi.getValueType(), Hi); +} + +void DAGTypeLegalizer::ExpandIntRes_Constant(SDNode *N, + SDValue &Lo, SDValue &Hi) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + unsigned NBitWidth = NVT.getSizeInBits(); + const APInt &Cst = cast(N)->getAPIntValue(); + Lo = DAG.getConstant(APInt(Cst).trunc(NBitWidth), NVT); + Hi = DAG.getConstant(Cst.lshr(NBitWidth).trunc(NBitWidth), NVT); +} + +void DAGTypeLegalizer::ExpandIntRes_CTLZ(SDNode *N, + SDValue &Lo, SDValue &Hi) { + DebugLoc dl = N->getDebugLoc(); + // ctlz (HiLo) -> Hi != 0 ? ctlz(Hi) : (ctlz(Lo)+32) + GetExpandedInteger(N->getOperand(0), Lo, Hi); + MVT NVT = Lo.getValueType(); + + SDValue HiNotZero = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Hi, + DAG.getConstant(0, NVT), ISD::SETNE); + + SDValue LoLZ = DAG.getNode(ISD::CTLZ, dl, NVT, Lo); + SDValue HiLZ = DAG.getNode(ISD::CTLZ, dl, NVT, Hi); + + Lo = DAG.getNode(ISD::SELECT, dl, NVT, HiNotZero, HiLZ, + DAG.getNode(ISD::ADD, dl, NVT, LoLZ, + DAG.getConstant(NVT.getSizeInBits(), NVT))); + Hi = DAG.getConstant(0, NVT); +} + +void DAGTypeLegalizer::ExpandIntRes_CTPOP(SDNode *N, + SDValue &Lo, SDValue &Hi) { + DebugLoc dl = N->getDebugLoc(); + // ctpop(HiLo) -> ctpop(Hi)+ctpop(Lo) + GetExpandedInteger(N->getOperand(0), Lo, Hi); + MVT NVT = Lo.getValueType(); + Lo = DAG.getNode(ISD::ADD, dl, NVT, DAG.getNode(ISD::CTPOP, dl, NVT, Lo), + DAG.getNode(ISD::CTPOP, dl, NVT, Hi)); + Hi = DAG.getConstant(0, NVT); +} + +void DAGTypeLegalizer::ExpandIntRes_CTTZ(SDNode *N, + SDValue &Lo, SDValue &Hi) { + DebugLoc dl = N->getDebugLoc(); + // cttz (HiLo) -> Lo != 0 ? cttz(Lo) : (cttz(Hi)+32) + GetExpandedInteger(N->getOperand(0), Lo, Hi); + MVT NVT = Lo.getValueType(); + + SDValue LoNotZero = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, + DAG.getConstant(0, NVT), ISD::SETNE); + + SDValue LoLZ = DAG.getNode(ISD::CTTZ, dl, NVT, Lo); + SDValue HiLZ = DAG.getNode(ISD::CTTZ, dl, NVT, Hi); + + Lo = DAG.getNode(ISD::SELECT, dl, NVT, LoNotZero, LoLZ, + DAG.getNode(ISD::ADD, dl, NVT, HiLZ, + DAG.getConstant(NVT.getSizeInBits(), NVT))); + Hi = DAG.getConstant(0, NVT); +} + +void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo, + SDValue &Hi) { + DebugLoc dl = N->getDebugLoc(); + MVT VT = N->getValueType(0); + SDValue Op = N->getOperand(0); + RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), VT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-sint conversion!"); + SplitInteger(MakeLibCall(LC, VT, &Op, 1, true/*irrelevant*/, dl), Lo, Hi); +} + +void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo, + SDValue &Hi) { + DebugLoc dl = N->getDebugLoc(); + MVT VT = N->getValueType(0); + SDValue Op = N->getOperand(0); + RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), VT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-uint conversion!"); + SplitInteger(MakeLibCall(LC, VT, &Op, 1, false/*irrelevant*/, dl), Lo, Hi); +} + +void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, + SDValue &Lo, SDValue &Hi) { + if (ISD::isNormalLoad(N)) { + ExpandRes_NormalLoad(N, Lo, Hi); + return; + } + + assert(ISD::isUNINDEXEDLoad(N) && "Indexed load during type legalization!"); + + MVT VT = N->getValueType(0); + MVT NVT = TLI.getTypeToTransformTo(VT); + SDValue Ch = N->getChain(); + SDValue Ptr = N->getBasePtr(); + ISD::LoadExtType ExtType = N->getExtensionType(); + int SVOffset = N->getSrcValueOffset(); + unsigned Alignment = N->getAlignment(); + bool isVolatile = N->isVolatile(); + DebugLoc dl = N->getDebugLoc(); + + assert(NVT.isByteSized() && "Expanded type not byte sized!"); + + if (N->getMemoryVT().bitsLE(NVT)) { + MVT EVT = N->getMemoryVT(); + + Lo = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getSrcValue(), SVOffset, + EVT, isVolatile, Alignment); + + // Remember the chain. + Ch = Lo.getValue(1); + + if (ExtType == ISD::SEXTLOAD) { + // The high part is obtained by SRA'ing all but one of the bits of the + // lo part. + unsigned LoSize = Lo.getValueType().getSizeInBits(); + Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo, + DAG.getConstant(LoSize-1, TLI.getPointerTy())); + } else if (ExtType == ISD::ZEXTLOAD) { + // The high part is just a zero. + Hi = DAG.getConstant(0, NVT); + } else { + assert(ExtType == ISD::EXTLOAD && "Unknown extload!"); + // The high part is undefined. + Hi = DAG.getUNDEF(NVT); + } + } else if (TLI.isLittleEndian()) { + // Little-endian - low bits are at low addresses. + Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getSrcValue(), SVOffset, + isVolatile, Alignment); + + unsigned ExcessBits = + N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits(); + MVT NEVT = MVT::getIntegerVT(ExcessBits); + + // Increment the pointer to the other half. + unsigned IncrementSize = NVT.getSizeInBits()/8; + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getIntPtrConstant(IncrementSize)); + Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getSrcValue(), + SVOffset+IncrementSize, NEVT, + isVolatile, MinAlign(Alignment, IncrementSize)); + + // Build a factor node to remember that this load is independent of the + // other one. + Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + } else { + // Big-endian - high bits are at low addresses. Favor aligned loads at + // the cost of some bit-fiddling. + MVT EVT = N->getMemoryVT(); + unsigned EBytes = EVT.getStoreSizeInBits()/8; + unsigned IncrementSize = NVT.getSizeInBits()/8; + unsigned ExcessBits = (EBytes - IncrementSize)*8; + + // Load both the high bits and maybe some of the low bits. + Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getSrcValue(), SVOffset, + MVT::getIntegerVT(EVT.getSizeInBits() - ExcessBits), + isVolatile, Alignment); + + // Increment the pointer to the other half. + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getIntPtrConstant(IncrementSize)); + // Load the rest of the low bits. + Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, NVT, Ch, Ptr, N->getSrcValue(), + SVOffset+IncrementSize, + MVT::getIntegerVT(ExcessBits), + isVolatile, MinAlign(Alignment, IncrementSize)); + + // Build a factor node to remember that this load is independent of the + // other one. + Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + if (ExcessBits < NVT.getSizeInBits()) { + // Transfer low bits from the bottom of Hi to the top of Lo. + Lo = DAG.getNode(ISD::OR, dl, NVT, Lo, + DAG.getNode(ISD::SHL, dl, NVT, Hi, + DAG.getConstant(ExcessBits, + TLI.getPointerTy()))); + // Move high bits to the right position in Hi. + Hi = DAG.getNode(ExtType == ISD::SEXTLOAD ? ISD::SRA : ISD::SRL, dl, + NVT, Hi, + DAG.getConstant(NVT.getSizeInBits() - ExcessBits, + TLI.getPointerTy())); + } + } + + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Ch); +} + +void DAGTypeLegalizer::ExpandIntRes_Logical(SDNode *N, + SDValue &Lo, SDValue &Hi) { + DebugLoc dl = N->getDebugLoc(); + SDValue LL, LH, RL, RH; + GetExpandedInteger(N->getOperand(0), LL, LH); + GetExpandedInteger(N->getOperand(1), RL, RH); + Lo = DAG.getNode(N->getOpcode(), dl, LL.getValueType(), LL, RL); + Hi = DAG.getNode(N->getOpcode(), dl, LL.getValueType(), LH, RH); +} + +void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N, + SDValue &Lo, SDValue &Hi) { + MVT VT = N->getValueType(0); + MVT NVT = TLI.getTypeToTransformTo(VT); + DebugLoc dl = N->getDebugLoc(); + + bool HasMULHS = TLI.isOperationLegalOrCustom(ISD::MULHS, NVT); + bool HasMULHU = TLI.isOperationLegalOrCustom(ISD::MULHU, NVT); + bool HasSMUL_LOHI = TLI.isOperationLegalOrCustom(ISD::SMUL_LOHI, NVT); + bool HasUMUL_LOHI = TLI.isOperationLegalOrCustom(ISD::UMUL_LOHI, NVT); + if (HasMULHU || HasMULHS || HasUMUL_LOHI || HasSMUL_LOHI) { + SDValue LL, LH, RL, RH; + GetExpandedInteger(N->getOperand(0), LL, LH); + GetExpandedInteger(N->getOperand(1), RL, RH); + unsigned OuterBitSize = VT.getSizeInBits(); + unsigned InnerBitSize = NVT.getSizeInBits(); + unsigned LHSSB = DAG.ComputeNumSignBits(N->getOperand(0)); + unsigned RHSSB = DAG.ComputeNumSignBits(N->getOperand(1)); + + APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize); + if (DAG.MaskedValueIsZero(N->getOperand(0), HighMask) && + DAG.MaskedValueIsZero(N->getOperand(1), HighMask)) { + // The inputs are both zero-extended. + if (HasUMUL_LOHI) { + // We can emit a umul_lohi. + Lo = DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(NVT, NVT), LL, RL); + Hi = SDValue(Lo.getNode(), 1); + return; + } + if (HasMULHU) { + // We can emit a mulhu+mul. + Lo = DAG.getNode(ISD::MUL, dl, NVT, LL, RL); + Hi = DAG.getNode(ISD::MULHU, dl, NVT, LL, RL); + return; + } + } + if (LHSSB > InnerBitSize && RHSSB > InnerBitSize) { + // The input values are both sign-extended. + if (HasSMUL_LOHI) { + // We can emit a smul_lohi. + Lo = DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(NVT, NVT), LL, RL); + Hi = SDValue(Lo.getNode(), 1); + return; + } + if (HasMULHS) { + // We can emit a mulhs+mul. + Lo = DAG.getNode(ISD::MUL, dl, NVT, LL, RL); + Hi = DAG.getNode(ISD::MULHS, dl, NVT, LL, RL); + return; + } + } + if (HasUMUL_LOHI) { + // Lo,Hi = umul LHS, RHS. + SDValue UMulLOHI = DAG.getNode(ISD::UMUL_LOHI, dl, + DAG.getVTList(NVT, NVT), LL, RL); + Lo = UMulLOHI; + Hi = UMulLOHI.getValue(1); + RH = DAG.getNode(ISD::MUL, dl, NVT, LL, RH); + LH = DAG.getNode(ISD::MUL, dl, NVT, LH, RL); + Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, RH); + Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, LH); + return; + } + if (HasMULHU) { + Lo = DAG.getNode(ISD::MUL, dl, NVT, LL, RL); + Hi = DAG.getNode(ISD::MULHU, dl, NVT, LL, RL); + RH = DAG.getNode(ISD::MUL, dl, NVT, LL, RH); + LH = DAG.getNode(ISD::MUL, dl, NVT, LH, RL); + Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, RH); + Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, LH); + return; + } + } + + // If nothing else, we can make a libcall. + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + if (VT == MVT::i16) + LC = RTLIB::MUL_I16; + else if (VT == MVT::i32) + LC = RTLIB::MUL_I32; + else if (VT == MVT::i64) + LC = RTLIB::MUL_I64; + else if (VT == MVT::i128) + LC = RTLIB::MUL_I128; + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported MUL!"); + + SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + SplitInteger(MakeLibCall(LC, VT, Ops, 2, true/*irrelevant*/, dl), Lo, Hi); +} + +void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N, + SDValue &Lo, SDValue &Hi) { + MVT VT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); + + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + if (VT == MVT::i32) + LC = RTLIB::SDIV_I32; + else if (VT == MVT::i64) + LC = RTLIB::SDIV_I64; + else if (VT == MVT::i128) + LC = RTLIB::SDIV_I128; + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!"); + + SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + SplitInteger(MakeLibCall(LC, VT, Ops, 2, true, dl), Lo, Hi); +} + +void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N, + SDValue &Lo, SDValue &Hi) { + MVT VT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); + + // If we can emit an efficient shift operation, do so now. Check to see if + // the RHS is a constant. + if (ConstantSDNode *CN = dyn_cast(N->getOperand(1))) + return ExpandShiftByConstant(N, CN->getZExtValue(), Lo, Hi); + + // If we can determine that the high bit of the shift is zero or one, even if + // the low bits are variable, emit this shift in an optimized form. + if (ExpandShiftWithKnownAmountBit(N, Lo, Hi)) + return; + + // If this target supports shift_PARTS, use it. First, map to the _PARTS opc. + unsigned PartsOpc; + if (N->getOpcode() == ISD::SHL) { + PartsOpc = ISD::SHL_PARTS; + } else if (N->getOpcode() == ISD::SRL) { + PartsOpc = ISD::SRL_PARTS; + } else { + assert(N->getOpcode() == ISD::SRA && "Unknown shift!"); + PartsOpc = ISD::SRA_PARTS; + } + + // Next check to see if the target supports this SHL_PARTS operation or if it + // will custom expand it. + MVT NVT = TLI.getTypeToTransformTo(VT); + TargetLowering::LegalizeAction Action = TLI.getOperationAction(PartsOpc, NVT); + if ((Action == TargetLowering::Legal && TLI.isTypeLegal(NVT)) || + Action == TargetLowering::Custom) { + // Expand the subcomponents. + SDValue LHSL, LHSH; + GetExpandedInteger(N->getOperand(0), LHSL, LHSH); + + SDValue Ops[] = { LHSL, LHSH, N->getOperand(1) }; + MVT VT = LHSL.getValueType(); + Lo = DAG.getNode(PartsOpc, dl, DAG.getVTList(VT, VT), Ops, 3); + Hi = Lo.getValue(1); + return; + } + + // Otherwise, emit a libcall. + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + bool isSigned; + if (N->getOpcode() == ISD::SHL) { + isSigned = false; /*sign irrelevant*/ + if (VT == MVT::i16) + LC = RTLIB::SHL_I16; + else if (VT == MVT::i32) + LC = RTLIB::SHL_I32; + else if (VT == MVT::i64) + LC = RTLIB::SHL_I64; + else if (VT == MVT::i128) + LC = RTLIB::SHL_I128; + } else if (N->getOpcode() == ISD::SRL) { + isSigned = false; + if (VT == MVT::i16) + LC = RTLIB::SRL_I16; + else if (VT == MVT::i32) + LC = RTLIB::SRL_I32; + else if (VT == MVT::i64) + LC = RTLIB::SRL_I64; + else if (VT == MVT::i128) + LC = RTLIB::SRL_I128; + } else { + assert(N->getOpcode() == ISD::SRA && "Unknown shift!"); + isSigned = true; + if (VT == MVT::i16) + LC = RTLIB::SRA_I16; + else if (VT == MVT::i32) + LC = RTLIB::SRA_I32; + else if (VT == MVT::i64) + LC = RTLIB::SRA_I64; + else if (VT == MVT::i128) + LC = RTLIB::SRA_I128; + } + + if (LC != RTLIB::UNKNOWN_LIBCALL && TLI.getLibcallName(LC)) { + SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + SplitInteger(MakeLibCall(LC, VT, Ops, 2, isSigned, dl), Lo, Hi); + return; + } + + if (!ExpandShiftWithUnknownAmountBit(N, Lo, Hi)) + assert(0 && "Unsupported shift!"); +} + +void DAGTypeLegalizer::ExpandIntRes_SIGN_EXTEND(SDNode *N, + SDValue &Lo, SDValue &Hi) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + DebugLoc dl = N->getDebugLoc(); + SDValue Op = N->getOperand(0); + if (Op.getValueType().bitsLE(NVT)) { + // The low part is sign extension of the input (degenerates to a copy). + Lo = DAG.getNode(ISD::SIGN_EXTEND, dl, NVT, N->getOperand(0)); + // The high part is obtained by SRA'ing all but one of the bits of low part. + unsigned LoSize = NVT.getSizeInBits(); + Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo, + DAG.getConstant(LoSize-1, TLI.getPointerTy())); + } else { + // For example, extension of an i48 to an i64. The operand type necessarily + // promotes to the result type, so will end up being expanded too. + assert(getTypeAction(Op.getValueType()) == PromoteInteger && + "Only know how to promote this result!"); + SDValue Res = GetPromotedInteger(Op); + assert(Res.getValueType() == N->getValueType(0) && + "Operand over promoted?"); + // Split the promoted operand. This will simplify when it is expanded. + SplitInteger(Res, Lo, Hi); + unsigned ExcessBits = + Op.getValueType().getSizeInBits() - NVT.getSizeInBits(); + Hi = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Hi.getValueType(), Hi, + DAG.getValueType(MVT::getIntegerVT(ExcessBits))); + } +} + +void DAGTypeLegalizer:: +ExpandIntRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi) { + DebugLoc dl = N->getDebugLoc(); + GetExpandedInteger(N->getOperand(0), Lo, Hi); + MVT EVT = cast(N->getOperand(1))->getVT(); + + if (EVT.bitsLE(Lo.getValueType())) { + // sext_inreg the low part if needed. + Lo = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Lo.getValueType(), Lo, + N->getOperand(1)); + + // The high part gets the sign extension from the lo-part. This handles + // things like sextinreg V:i64 from i8. + Hi = DAG.getNode(ISD::SRA, dl, Hi.getValueType(), Lo, + DAG.getConstant(Hi.getValueType().getSizeInBits()-1, + TLI.getPointerTy())); + } else { + // For example, extension of an i48 to an i64. Leave the low part alone, + // sext_inreg the high part. + unsigned ExcessBits = + EVT.getSizeInBits() - Lo.getValueType().getSizeInBits(); + Hi = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Hi.getValueType(), Hi, + DAG.getValueType(MVT::getIntegerVT(ExcessBits))); + } +} + +void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N, + SDValue &Lo, SDValue &Hi) { + MVT VT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); + + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + if (VT == MVT::i32) + LC = RTLIB::SREM_I32; + else if (VT == MVT::i64) + LC = RTLIB::SREM_I64; + else if (VT == MVT::i128) + LC = RTLIB::SREM_I128; + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!"); + + SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + SplitInteger(MakeLibCall(LC, VT, Ops, 2, true, dl), Lo, Hi); +} + +void DAGTypeLegalizer::ExpandIntRes_TRUNCATE(SDNode *N, + SDValue &Lo, SDValue &Hi) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + DebugLoc dl = N->getDebugLoc(); + Lo = DAG.getNode(ISD::TRUNCATE, dl, NVT, N->getOperand(0)); + Hi = DAG.getNode(ISD::SRL, dl, + N->getOperand(0).getValueType(), N->getOperand(0), + DAG.getConstant(NVT.getSizeInBits(), TLI.getPointerTy())); + Hi = DAG.getNode(ISD::TRUNCATE, dl, NVT, Hi); +} + +void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N, + SDValue &Lo, SDValue &Hi) { + MVT VT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); + + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + if (VT == MVT::i32) + LC = RTLIB::UDIV_I32; + else if (VT == MVT::i64) + LC = RTLIB::UDIV_I64; + else if (VT == MVT::i128) + LC = RTLIB::UDIV_I128; + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UDIV!"); + + SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + SplitInteger(MakeLibCall(LC, VT, Ops, 2, false, dl), Lo, Hi); +} + +void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N, + SDValue &Lo, SDValue &Hi) { + MVT VT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); + + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + if (VT == MVT::i32) + LC = RTLIB::UREM_I32; + else if (VT == MVT::i64) + LC = RTLIB::UREM_I64; + else if (VT == MVT::i128) + LC = RTLIB::UREM_I128; + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UREM!"); + + SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + SplitInteger(MakeLibCall(LC, VT, Ops, 2, false, dl), Lo, Hi); +} + +void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N, + SDValue &Lo, SDValue &Hi) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + DebugLoc dl = N->getDebugLoc(); + SDValue Op = N->getOperand(0); + if (Op.getValueType().bitsLE(NVT)) { + // The low part is zero extension of the input (degenerates to a copy). + Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, N->getOperand(0)); + Hi = DAG.getConstant(0, NVT); // The high part is just a zero. + } else { + // For example, extension of an i48 to an i64. The operand type necessarily + // promotes to the result type, so will end up being expanded too. + assert(getTypeAction(Op.getValueType()) == PromoteInteger && + "Only know how to promote this result!"); + SDValue Res = GetPromotedInteger(Op); + assert(Res.getValueType() == N->getValueType(0) && + "Operand over promoted?"); + // Split the promoted operand. This will simplify when it is expanded. + SplitInteger(Res, Lo, Hi); + unsigned ExcessBits = + Op.getValueType().getSizeInBits() - NVT.getSizeInBits(); + Hi = DAG.getZeroExtendInReg(Hi, dl, MVT::getIntegerVT(ExcessBits)); + } +} + + +//===----------------------------------------------------------------------===// +// Integer Operand Expansion +//===----------------------------------------------------------------------===// + +/// ExpandIntegerOperand - This method is called when the specified operand of +/// the specified node is found to need expansion. At this point, all of the +/// result types of the node are known to be legal, but other operands of the +/// node may need promotion or expansion as well as the specified one. +bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) { + DEBUG(cerr << "Expand integer operand: "; N->dump(&DAG); cerr << "\n"); + SDValue Res = SDValue(); + + if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) + return false; + + switch (N->getOpcode()) { + default: + #ifndef NDEBUG + cerr << "ExpandIntegerOperand Op #" << OpNo << ": "; + N->dump(&DAG); cerr << "\n"; + #endif + assert(0 && "Do not know how to expand this operator's operand!"); + abort(); + + case ISD::BIT_CONVERT: Res = ExpandOp_BIT_CONVERT(N); break; + case ISD::BR_CC: Res = ExpandIntOp_BR_CC(N); break; + case ISD::BUILD_VECTOR: Res = ExpandOp_BUILD_VECTOR(N); break; + case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break; + case ISD::INSERT_VECTOR_ELT: Res = ExpandOp_INSERT_VECTOR_ELT(N); break; + case ISD::SCALAR_TO_VECTOR: Res = ExpandOp_SCALAR_TO_VECTOR(N); break; + case ISD::SELECT_CC: Res = ExpandIntOp_SELECT_CC(N); break; + case ISD::SETCC: Res = ExpandIntOp_SETCC(N); break; + case ISD::SINT_TO_FP: Res = ExpandIntOp_SINT_TO_FP(N); break; + case ISD::STORE: Res = ExpandIntOp_STORE(cast(N), OpNo); break; + case ISD::TRUNCATE: Res = ExpandIntOp_TRUNCATE(N); break; + case ISD::UINT_TO_FP: Res = ExpandIntOp_UINT_TO_FP(N); break; + + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + case ISD::ROTL: + case ISD::ROTR: Res = ExpandIntOp_Shift(N); break; + } + + // If the result is null, the sub-method took care of registering results etc. + if (!Res.getNode()) return false; + + // If the result is N, the sub-method updated N in place. Tell the legalizer + // core about this. + if (Res.getNode() == N) + return true; + + assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 && + "Invalid operand expansion"); + + ReplaceValueWith(SDValue(N, 0), Res); + return false; +} + +/// IntegerExpandSetCCOperands - Expand the operands of a comparison. This code +/// is shared among BR_CC, SELECT_CC, and SETCC handlers. +void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS, + SDValue &NewRHS, + ISD::CondCode &CCCode, + DebugLoc dl) { + SDValue LHSLo, LHSHi, RHSLo, RHSHi; + GetExpandedInteger(NewLHS, LHSLo, LHSHi); + GetExpandedInteger(NewRHS, RHSLo, RHSHi); + + MVT VT = NewLHS.getValueType(); + + if (CCCode == ISD::SETEQ || CCCode == ISD::SETNE) { + if (RHSLo == RHSHi) { + if (ConstantSDNode *RHSCST = dyn_cast(RHSLo)) { + if (RHSCST->isAllOnesValue()) { + // Equality comparison to -1. + NewLHS = DAG.getNode(ISD::AND, dl, + LHSLo.getValueType(), LHSLo, LHSHi); + NewRHS = RHSLo; + return; + } + } + } + + NewLHS = DAG.getNode(ISD::XOR, dl, LHSLo.getValueType(), LHSLo, RHSLo); + NewRHS = DAG.getNode(ISD::XOR, dl, LHSLo.getValueType(), LHSHi, RHSHi); + NewLHS = DAG.getNode(ISD::OR, dl, NewLHS.getValueType(), NewLHS, NewRHS); + NewRHS = DAG.getConstant(0, NewLHS.getValueType()); + return; + } + + // If this is a comparison of the sign bit, just look at the top part. + // X > -1, x < 0 + if (ConstantSDNode *CST = dyn_cast(NewRHS)) + if ((CCCode == ISD::SETLT && CST->isNullValue()) || // X < 0 + (CCCode == ISD::SETGT && CST->isAllOnesValue())) { // X > -1 + NewLHS = LHSHi; + NewRHS = RHSHi; + return; + } + + // FIXME: This generated code sucks. + ISD::CondCode LowCC; + switch (CCCode) { + default: assert(0 && "Unknown integer setcc!"); + case ISD::SETLT: + case ISD::SETULT: LowCC = ISD::SETULT; break; + case ISD::SETGT: + case ISD::SETUGT: LowCC = ISD::SETUGT; break; + case ISD::SETLE: + case ISD::SETULE: LowCC = ISD::SETULE; break; + case ISD::SETGE: + case ISD::SETUGE: LowCC = ISD::SETUGE; break; + } + + // Tmp1 = lo(op1) < lo(op2) // Always unsigned comparison + // Tmp2 = hi(op1) < hi(op2) // Signedness depends on operands + // dest = hi(op1) == hi(op2) ? Tmp1 : Tmp2; + + // NOTE: on targets without efficient SELECT of bools, we can always use + // this identity: (B1 ? B2 : B3) --> (B1 & B2)|(!B1&B3) + TargetLowering::DAGCombinerInfo DagCombineInfo(DAG, false, true, NULL); + SDValue Tmp1, Tmp2; + Tmp1 = TLI.SimplifySetCC(TLI.getSetCCResultType(LHSLo.getValueType()), + LHSLo, RHSLo, LowCC, false, DagCombineInfo, dl); + if (!Tmp1.getNode()) + Tmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSLo.getValueType()), + LHSLo, RHSLo, LowCC); + Tmp2 = TLI.SimplifySetCC(TLI.getSetCCResultType(LHSHi.getValueType()), + LHSHi, RHSHi, CCCode, false, DagCombineInfo, dl); + if (!Tmp2.getNode()) + Tmp2 = DAG.getNode(ISD::SETCC, dl, + TLI.getSetCCResultType(LHSHi.getValueType()), + LHSHi, RHSHi, DAG.getCondCode(CCCode)); + + ConstantSDNode *Tmp1C = dyn_cast(Tmp1.getNode()); + ConstantSDNode *Tmp2C = dyn_cast(Tmp2.getNode()); + if ((Tmp1C && Tmp1C->isNullValue()) || + (Tmp2C && Tmp2C->isNullValue() && + (CCCode == ISD::SETLE || CCCode == ISD::SETGE || + CCCode == ISD::SETUGE || CCCode == ISD::SETULE)) || + (Tmp2C && Tmp2C->getAPIntValue() == 1 && + (CCCode == ISD::SETLT || CCCode == ISD::SETGT || + CCCode == ISD::SETUGT || CCCode == ISD::SETULT))) { + // low part is known false, returns high part. + // For LE / GE, if high part is known false, ignore the low part. + // For LT / GT, if high part is known true, ignore the low part. + NewLHS = Tmp2; + NewRHS = SDValue(); + return; + } + + NewLHS = TLI.SimplifySetCC(TLI.getSetCCResultType(LHSHi.getValueType()), + LHSHi, RHSHi, ISD::SETEQ, false, + DagCombineInfo, dl); + if (!NewLHS.getNode()) + NewLHS = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSHi.getValueType()), + LHSHi, RHSHi, ISD::SETEQ); + NewLHS = DAG.getNode(ISD::SELECT, dl, Tmp1.getValueType(), + NewLHS, Tmp1, Tmp2); + NewRHS = SDValue(); +} + +SDValue DAGTypeLegalizer::ExpandIntOp_BR_CC(SDNode *N) { + SDValue NewLHS = N->getOperand(2), NewRHS = N->getOperand(3); + ISD::CondCode CCCode = cast(N->getOperand(1))->get(); + IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc()); + + // If ExpandSetCCOperands returned a scalar, we need to compare the result + // against zero to select between true and false values. + if (NewRHS.getNode() == 0) { + NewRHS = DAG.getConstant(0, NewLHS.getValueType()); + CCCode = ISD::SETNE; + } + + // Update N to have the operands specified. + return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0), + DAG.getCondCode(CCCode), NewLHS, NewRHS, + N->getOperand(4)); +} + +SDValue DAGTypeLegalizer::ExpandIntOp_SELECT_CC(SDNode *N) { + SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1); + ISD::CondCode CCCode = cast(N->getOperand(4))->get(); + IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc()); + + // If ExpandSetCCOperands returned a scalar, we need to compare the result + // against zero to select between true and false values. + if (NewRHS.getNode() == 0) { + NewRHS = DAG.getConstant(0, NewLHS.getValueType()); + CCCode = ISD::SETNE; + } + + // Update N to have the operands specified. + return DAG.UpdateNodeOperands(SDValue(N, 0), NewLHS, NewRHS, + N->getOperand(2), N->getOperand(3), + DAG.getCondCode(CCCode)); +} + +SDValue DAGTypeLegalizer::ExpandIntOp_SETCC(SDNode *N) { + SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1); + ISD::CondCode CCCode = cast(N->getOperand(2))->get(); + IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc()); + + // If ExpandSetCCOperands returned a scalar, use it. + if (NewRHS.getNode() == 0) { + assert(NewLHS.getValueType() == N->getValueType(0) && + "Unexpected setcc expansion!"); + return NewLHS; + } + + // Otherwise, update N to have the operands specified. + return DAG.UpdateNodeOperands(SDValue(N, 0), NewLHS, NewRHS, + DAG.getCondCode(CCCode)); +} + +SDValue DAGTypeLegalizer::ExpandIntOp_Shift(SDNode *N) { + // The value being shifted is legal, but the shift amount is too big. + // It follows that either the result of the shift is undefined, or the + // upper half of the shift amount is zero. Just use the lower half. + SDValue Lo, Hi; + GetExpandedInteger(N->getOperand(1), Lo, Hi); + return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0), Lo); +} + +SDValue DAGTypeLegalizer::ExpandIntOp_SINT_TO_FP(SDNode *N) { + SDValue Op = N->getOperand(0); + MVT DstVT = N->getValueType(0); + RTLIB::Libcall LC = RTLIB::getSINTTOFP(Op.getValueType(), DstVT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && + "Don't know how to expand this SINT_TO_FP!"); + return MakeLibCall(LC, DstVT, &Op, 1, true, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { + if (ISD::isNormalStore(N)) + return ExpandOp_NormalStore(N, OpNo); + + assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!"); + assert(OpNo == 1 && "Can only expand the stored value so far"); + + MVT VT = N->getOperand(1).getValueType(); + MVT NVT = TLI.getTypeToTransformTo(VT); + SDValue Ch = N->getChain(); + SDValue Ptr = N->getBasePtr(); + int SVOffset = N->getSrcValueOffset(); + unsigned Alignment = N->getAlignment(); + bool isVolatile = N->isVolatile(); + DebugLoc dl = N->getDebugLoc(); + SDValue Lo, Hi; + + assert(NVT.isByteSized() && "Expanded type not byte sized!"); + + if (N->getMemoryVT().bitsLE(NVT)) { + GetExpandedInteger(N->getValue(), Lo, Hi); + return DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset, + N->getMemoryVT(), isVolatile, Alignment); + } else if (TLI.isLittleEndian()) { + // Little-endian - low bits are at low addresses. + GetExpandedInteger(N->getValue(), Lo, Hi); + + Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset, + isVolatile, Alignment); + + unsigned ExcessBits = + N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits(); + MVT NEVT = MVT::getIntegerVT(ExcessBits); + + // Increment the pointer to the other half. + unsigned IncrementSize = NVT.getSizeInBits()/8; + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getIntPtrConstant(IncrementSize)); + Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getSrcValue(), + SVOffset+IncrementSize, NEVT, + isVolatile, MinAlign(Alignment, IncrementSize)); + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); + } else { + // Big-endian - high bits are at low addresses. Favor aligned stores at + // the cost of some bit-fiddling. + GetExpandedInteger(N->getValue(), Lo, Hi); + + MVT EVT = N->getMemoryVT(); + unsigned EBytes = EVT.getStoreSizeInBits()/8; + unsigned IncrementSize = NVT.getSizeInBits()/8; + unsigned ExcessBits = (EBytes - IncrementSize)*8; + MVT HiVT = MVT::getIntegerVT(EVT.getSizeInBits() - ExcessBits); + + if (ExcessBits < NVT.getSizeInBits()) { + // Transfer high bits from the top of Lo to the bottom of Hi. + Hi = DAG.getNode(ISD::SHL, dl, NVT, Hi, + DAG.getConstant(NVT.getSizeInBits() - ExcessBits, + TLI.getPointerTy())); + Hi = DAG.getNode(ISD::OR, dl, NVT, Hi, + DAG.getNode(ISD::SRL, dl, NVT, Lo, + DAG.getConstant(ExcessBits, + TLI.getPointerTy()))); + } + + // Store both the high bits and maybe some of the low bits. + Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getSrcValue(), + SVOffset, HiVT, isVolatile, Alignment); + + // Increment the pointer to the other half. + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getIntPtrConstant(IncrementSize)); + // Store the lowest ExcessBits bits in the second half. + Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getSrcValue(), + SVOffset+IncrementSize, + MVT::getIntegerVT(ExcessBits), + isVolatile, MinAlign(Alignment, IncrementSize)); + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); + } +} + +SDValue DAGTypeLegalizer::ExpandIntOp_TRUNCATE(SDNode *N) { + SDValue InL, InH; + GetExpandedInteger(N->getOperand(0), InL, InH); + // Just truncate the low part of the source. + return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), N->getValueType(0), InL); +} + +SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) { + SDValue Op = N->getOperand(0); + MVT SrcVT = Op.getValueType(); + MVT DstVT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); + + if (TLI.getOperationAction(ISD::SINT_TO_FP, SrcVT) == TargetLowering::Custom){ + // Do a signed conversion then adjust the result. + SDValue SignedConv = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Op); + SignedConv = TLI.LowerOperation(SignedConv, DAG); + + // The result of the signed conversion needs adjusting if the 'sign bit' of + // the incoming integer was set. To handle this, we dynamically test to see + // if it is set, and, if so, add a fudge factor. + + const uint64_t F32TwoE32 = 0x4F800000ULL; + const uint64_t F32TwoE64 = 0x5F800000ULL; + const uint64_t F32TwoE128 = 0x7F800000ULL; + + APInt FF(32, 0); + if (SrcVT == MVT::i32) + FF = APInt(32, F32TwoE32); + else if (SrcVT == MVT::i64) + FF = APInt(32, F32TwoE64); + else if (SrcVT == MVT::i128) + FF = APInt(32, F32TwoE128); + else + assert(false && "Unsupported UINT_TO_FP!"); + + // Check whether the sign bit is set. + SDValue Lo, Hi; + GetExpandedInteger(Op, Lo, Hi); + SDValue SignSet = DAG.getSetCC(dl, + TLI.getSetCCResultType(Hi.getValueType()), + Hi, DAG.getConstant(0, Hi.getValueType()), + ISD::SETLT); + + // Build a 64 bit pair (0, FF) in the constant pool, with FF in the lo bits. + SDValue FudgePtr = DAG.getConstantPool(ConstantInt::get(FF.zext(64)), + TLI.getPointerTy()); + + // Get a pointer to FF if the sign bit was set, or to 0 otherwise. + SDValue Zero = DAG.getIntPtrConstant(0); + SDValue Four = DAG.getIntPtrConstant(4); + if (TLI.isBigEndian()) std::swap(Zero, Four); + SDValue Offset = DAG.getNode(ISD::SELECT, dl, Zero.getValueType(), SignSet, + Zero, Four); + unsigned Alignment = cast(FudgePtr)->getAlignment(); + FudgePtr = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), FudgePtr, Offset); + Alignment = std::min(Alignment, 4u); + + // Load the value out, extending it from f32 to the destination float type. + // FIXME: Avoid the extend by constructing the right constant pool? + SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, dl, DstVT, DAG.getEntryNode(), + FudgePtr, NULL, 0, MVT::f32, + false, Alignment); + return DAG.getNode(ISD::FADD, dl, DstVT, SignedConv, Fudge); + } + + // Otherwise, use a libcall. + RTLIB::Libcall LC = RTLIB::getUINTTOFP(SrcVT, DstVT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && + "Don't know how to expand this UINT_TO_FP!"); + return MakeLibCall(LC, DstVT, &Op, 1, true, dl); +} diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp new file mode 100644 index 000000000000..00d71e1a4fe9 --- /dev/null +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -0,0 +1,1074 @@ +//===-- LegalizeTypes.cpp - Common code for DAG type legalizer ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the SelectionDAG::LegalizeTypes method. It transforms +// an arbitrary well-formed SelectionDAG to only consist of legal types. This +// is common code shared among the LegalizeTypes*.cpp files. +// +//===----------------------------------------------------------------------===// + +#include "LegalizeTypes.h" +#include "llvm/CallingConv.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Target/TargetData.h" +using namespace llvm; + +static cl::opt +EnableExpensiveChecks("enable-legalize-types-checking", cl::Hidden); + +/// PerformExpensiveChecks - Do extensive, expensive, sanity checking. +void DAGTypeLegalizer::PerformExpensiveChecks() { + // If a node is not processed, then none of its values should be mapped by any + // of PromotedIntegers, ExpandedIntegers, ..., ReplacedValues. + + // If a node is processed, then each value with an illegal type must be mapped + // by exactly one of PromotedIntegers, ExpandedIntegers, ..., ReplacedValues. + // Values with a legal type may be mapped by ReplacedValues, but not by any of + // the other maps. + + // Note that these invariants may not hold momentarily when processing a node: + // the node being processed may be put in a map before being marked Processed. + + // Note that it is possible to have nodes marked NewNode in the DAG. This can + // occur in two ways. Firstly, a node may be created during legalization but + // never passed to the legalization core. This is usually due to the implicit + // folding that occurs when using the DAG.getNode operators. Secondly, a new + // node may be passed to the legalization core, but when analyzed may morph + // into a different node, leaving the original node as a NewNode in the DAG. + // A node may morph if one of its operands changes during analysis. Whether + // it actually morphs or not depends on whether, after updating its operands, + // it is equivalent to an existing node: if so, it morphs into that existing + // node (CSE). An operand can change during analysis if the operand is a new + // node that morphs, or it is a processed value that was mapped to some other + // value (as recorded in ReplacedValues) in which case the operand is turned + // into that other value. If a node morphs then the node it morphed into will + // be used instead of it for legalization, however the original node continues + // to live on in the DAG. + // The conclusion is that though there may be nodes marked NewNode in the DAG, + // all uses of such nodes are also marked NewNode: the result is a fungus of + // NewNodes growing on top of the useful nodes, and perhaps using them, but + // not used by them. + + // If a value is mapped by ReplacedValues, then it must have no uses, except + // by nodes marked NewNode (see above). + + // The final node obtained by mapping by ReplacedValues is not marked NewNode. + // Note that ReplacedValues should be applied iteratively. + + // Note that the ReplacedValues map may also map deleted nodes. By iterating + // over the DAG we only consider non-deleted nodes. + SmallVector NewNodes; + for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), + E = DAG.allnodes_end(); I != E; ++I) { + // Remember nodes marked NewNode - they are subject to extra checking below. + if (I->getNodeId() == NewNode) + NewNodes.push_back(I); + + for (unsigned i = 0, e = I->getNumValues(); i != e; ++i) { + SDValue Res(I, i); + bool Failed = false; + + unsigned Mapped = 0; + if (ReplacedValues.find(Res) != ReplacedValues.end()) { + Mapped |= 1; + // Check that remapped values are only used by nodes marked NewNode. + for (SDNode::use_iterator UI = I->use_begin(), UE = I->use_end(); + UI != UE; ++UI) + if (UI.getUse().getResNo() == i) + assert(UI->getNodeId() == NewNode && + "Remapped value has non-trivial use!"); + + // Check that the final result of applying ReplacedValues is not + // marked NewNode. + SDValue NewVal = ReplacedValues[Res]; + DenseMap::iterator I = ReplacedValues.find(NewVal); + while (I != ReplacedValues.end()) { + NewVal = I->second; + I = ReplacedValues.find(NewVal); + } + assert(NewVal.getNode()->getNodeId() != NewNode && + "ReplacedValues maps to a new node!"); + } + if (PromotedIntegers.find(Res) != PromotedIntegers.end()) + Mapped |= 2; + if (SoftenedFloats.find(Res) != SoftenedFloats.end()) + Mapped |= 4; + if (ScalarizedVectors.find(Res) != ScalarizedVectors.end()) + Mapped |= 8; + if (ExpandedIntegers.find(Res) != ExpandedIntegers.end()) + Mapped |= 16; + if (ExpandedFloats.find(Res) != ExpandedFloats.end()) + Mapped |= 32; + if (SplitVectors.find(Res) != SplitVectors.end()) + Mapped |= 64; + if (WidenedVectors.find(Res) != WidenedVectors.end()) + Mapped |= 128; + + if (I->getNodeId() != Processed) { + if (Mapped != 0) { + cerr << "Unprocessed value in a map!"; + Failed = true; + } + } else if (isTypeLegal(Res.getValueType()) || IgnoreNodeResults(I)) { + if (Mapped > 1) { + cerr << "Value with legal type was transformed!"; + Failed = true; + } + } else { + if (Mapped == 0) { + cerr << "Processed value not in any map!"; + Failed = true; + } else if (Mapped & (Mapped - 1)) { + cerr << "Value in multiple maps!"; + Failed = true; + } + } + + if (Failed) { + if (Mapped & 1) + cerr << " ReplacedValues"; + if (Mapped & 2) + cerr << " PromotedIntegers"; + if (Mapped & 4) + cerr << " SoftenedFloats"; + if (Mapped & 8) + cerr << " ScalarizedVectors"; + if (Mapped & 16) + cerr << " ExpandedIntegers"; + if (Mapped & 32) + cerr << " ExpandedFloats"; + if (Mapped & 64) + cerr << " SplitVectors"; + if (Mapped & 128) + cerr << " WidenedVectors"; + cerr << "\n"; + abort(); + } + } + } + + // Checked that NewNodes are only used by other NewNodes. + for (unsigned i = 0, e = NewNodes.size(); i != e; ++i) { + SDNode *N = NewNodes[i]; + for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); + UI != UE; ++UI) + assert(UI->getNodeId() == NewNode && "NewNode used by non-NewNode!"); + } +} + +/// run - This is the main entry point for the type legalizer. This does a +/// top-down traversal of the dag, legalizing types as it goes. Returns "true" +/// if it made any changes. +bool DAGTypeLegalizer::run() { + bool Changed = false; + + // Create a dummy node (which is not added to allnodes), that adds a reference + // to the root node, preventing it from being deleted, and tracking any + // changes of the root. + HandleSDNode Dummy(DAG.getRoot()); + Dummy.setNodeId(Unanalyzed); + + // The root of the dag may dangle to deleted nodes until the type legalizer is + // done. Set it to null to avoid confusion. + DAG.setRoot(SDValue()); + + // Walk all nodes in the graph, assigning them a NodeId of 'ReadyToProcess' + // (and remembering them) if they are leaves and assigning 'Unanalyzed' if + // non-leaves. + for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), + E = DAG.allnodes_end(); I != E; ++I) { + if (I->getNumOperands() == 0) { + I->setNodeId(ReadyToProcess); + Worklist.push_back(I); + } else { + I->setNodeId(Unanalyzed); + } + } + + // Now that we have a set of nodes to process, handle them all. + while (!Worklist.empty()) { +#ifndef XDEBUG + if (EnableExpensiveChecks) +#endif + PerformExpensiveChecks(); + + SDNode *N = Worklist.back(); + Worklist.pop_back(); + assert(N->getNodeId() == ReadyToProcess && + "Node should be ready if on worklist!"); + + if (IgnoreNodeResults(N)) + goto ScanOperands; + + // Scan the values produced by the node, checking to see if any result + // types are illegal. + for (unsigned i = 0, NumResults = N->getNumValues(); i < NumResults; ++i) { + MVT ResultVT = N->getValueType(i); + switch (getTypeAction(ResultVT)) { + default: + assert(false && "Unknown action!"); + case Legal: + break; + // The following calls must take care of *all* of the node's results, + // not just the illegal result they were passed (this includes results + // with a legal type). Results can be remapped using ReplaceValueWith, + // or their promoted/expanded/etc values registered in PromotedIntegers, + // ExpandedIntegers etc. + case PromoteInteger: + PromoteIntegerResult(N, i); + Changed = true; + goto NodeDone; + case ExpandInteger: + ExpandIntegerResult(N, i); + Changed = true; + goto NodeDone; + case SoftenFloat: + SoftenFloatResult(N, i); + Changed = true; + goto NodeDone; + case ExpandFloat: + ExpandFloatResult(N, i); + Changed = true; + goto NodeDone; + case ScalarizeVector: + ScalarizeVectorResult(N, i); + Changed = true; + goto NodeDone; + case SplitVector: + SplitVectorResult(N, i); + Changed = true; + goto NodeDone; + case WidenVector: + WidenVectorResult(N, i); + Changed = true; + goto NodeDone; + } + } + +ScanOperands: + // Scan the operand list for the node, handling any nodes with operands that + // are illegal. + { + unsigned NumOperands = N->getNumOperands(); + bool NeedsReanalyzing = false; + unsigned i; + for (i = 0; i != NumOperands; ++i) { + if (IgnoreNodeResults(N->getOperand(i).getNode())) + continue; + + MVT OpVT = N->getOperand(i).getValueType(); + switch (getTypeAction(OpVT)) { + default: + assert(false && "Unknown action!"); + case Legal: + continue; + // The following calls must either replace all of the node's results + // using ReplaceValueWith, and return "false"; or update the node's + // operands in place, and return "true". + case PromoteInteger: + NeedsReanalyzing = PromoteIntegerOperand(N, i); + Changed = true; + break; + case ExpandInteger: + NeedsReanalyzing = ExpandIntegerOperand(N, i); + Changed = true; + break; + case SoftenFloat: + NeedsReanalyzing = SoftenFloatOperand(N, i); + Changed = true; + break; + case ExpandFloat: + NeedsReanalyzing = ExpandFloatOperand(N, i); + Changed = true; + break; + case ScalarizeVector: + NeedsReanalyzing = ScalarizeVectorOperand(N, i); + Changed = true; + break; + case SplitVector: + NeedsReanalyzing = SplitVectorOperand(N, i); + Changed = true; + break; + case WidenVector: + NeedsReanalyzing = WidenVectorOperand(N, i); + Changed = true; + break; + } + break; + } + + // The sub-method updated N in place. Check to see if any operands are new, + // and if so, mark them. If the node needs revisiting, don't add all users + // to the worklist etc. + if (NeedsReanalyzing) { + assert(N->getNodeId() == ReadyToProcess && "Node ID recalculated?"); + N->setNodeId(NewNode); + // Recompute the NodeId and correct processed operands, adding the node to + // the worklist if ready. + SDNode *M = AnalyzeNewNode(N); + if (M == N) + // The node didn't morph - nothing special to do, it will be revisited. + continue; + + // The node morphed - this is equivalent to legalizing by replacing every + // value of N with the corresponding value of M. So do that now. However + // there is no need to remember the replacement - morphing will make sure + // it is never used non-trivially. + assert(N->getNumValues() == M->getNumValues() && + "Node morphing changed the number of results!"); + for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) + // Replacing the value takes care of remapping the new value. Do the + // replacement without recording it in ReplacedValues. This does not + // expunge From but that is fine - it is not really a new node. + ReplaceValueWithHelper(SDValue(N, i), SDValue(M, i)); + assert(N->getNodeId() == NewNode && "Unexpected node state!"); + // The node continues to live on as part of the NewNode fungus that + // grows on top of the useful nodes. Nothing more needs to be done + // with it - move on to the next node. + continue; + } + + if (i == NumOperands) { + DEBUG(cerr << "Legally typed node: "; N->dump(&DAG); cerr << "\n"); + } + } +NodeDone: + + // If we reach here, the node was processed, potentially creating new nodes. + // Mark it as processed and add its users to the worklist as appropriate. + assert(N->getNodeId() == ReadyToProcess && "Node ID recalculated?"); + N->setNodeId(Processed); + + for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end(); + UI != E; ++UI) { + SDNode *User = *UI; + int NodeId = User->getNodeId(); + + // This node has two options: it can either be a new node or its Node ID + // may be a count of the number of operands it has that are not ready. + if (NodeId > 0) { + User->setNodeId(NodeId-1); + + // If this was the last use it was waiting on, add it to the ready list. + if (NodeId-1 == ReadyToProcess) + Worklist.push_back(User); + continue; + } + + // If this is an unreachable new node, then ignore it. If it ever becomes + // reachable by being used by a newly created node then it will be handled + // by AnalyzeNewNode. + if (NodeId == NewNode) + continue; + + // Otherwise, this node is new: this is the first operand of it that + // became ready. Its new NodeId is the number of operands it has minus 1 + // (as this node is now processed). + assert(NodeId == Unanalyzed && "Unknown node ID!"); + User->setNodeId(User->getNumOperands() - 1); + + // If the node only has a single operand, it is now ready. + if (User->getNumOperands() == 1) + Worklist.push_back(User); + } + } + +#ifndef XDEBUG + if (EnableExpensiveChecks) +#endif + PerformExpensiveChecks(); + + // If the root changed (e.g. it was a dead load) update the root. + DAG.setRoot(Dummy.getValue()); + + // Remove dead nodes. This is important to do for cleanliness but also before + // the checking loop below. Implicit folding by the DAG.getNode operators and + // node morphing can cause unreachable nodes to be around with their flags set + // to new. + DAG.RemoveDeadNodes(); + + // In a debug build, scan all the nodes to make sure we found them all. This + // ensures that there are no cycles and that everything got processed. +#ifndef NDEBUG + for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), + E = DAG.allnodes_end(); I != E; ++I) { + bool Failed = false; + + // Check that all result types are legal. + if (!IgnoreNodeResults(I)) + for (unsigned i = 0, NumVals = I->getNumValues(); i < NumVals; ++i) + if (!isTypeLegal(I->getValueType(i))) { + cerr << "Result type " << i << " illegal!\n"; + Failed = true; + } + + // Check that all operand types are legal. + for (unsigned i = 0, NumOps = I->getNumOperands(); i < NumOps; ++i) + if (!IgnoreNodeResults(I->getOperand(i).getNode()) && + !isTypeLegal(I->getOperand(i).getValueType())) { + cerr << "Operand type " << i << " illegal!\n"; + Failed = true; + } + + if (I->getNodeId() != Processed) { + if (I->getNodeId() == NewNode) + cerr << "New node not analyzed?\n"; + else if (I->getNodeId() == Unanalyzed) + cerr << "Unanalyzed node not noticed?\n"; + else if (I->getNodeId() > 0) + cerr << "Operand not processed?\n"; + else if (I->getNodeId() == ReadyToProcess) + cerr << "Not added to worklist?\n"; + Failed = true; + } + + if (Failed) { + I->dump(&DAG); cerr << "\n"; + abort(); + } + } +#endif + + return Changed; +} + +/// AnalyzeNewNode - The specified node is the root of a subtree of potentially +/// new nodes. Correct any processed operands (this may change the node) and +/// calculate the NodeId. If the node itself changes to a processed node, it +/// is not remapped - the caller needs to take care of this. +/// Returns the potentially changed node. +SDNode *DAGTypeLegalizer::AnalyzeNewNode(SDNode *N) { + // If this was an existing node that is already done, we're done. + if (N->getNodeId() != NewNode && N->getNodeId() != Unanalyzed) + return N; + + // Remove any stale map entries. + ExpungeNode(N); + + // Okay, we know that this node is new. Recursively walk all of its operands + // to see if they are new also. The depth of this walk is bounded by the size + // of the new tree that was constructed (usually 2-3 nodes), so we don't worry + // about revisiting of nodes. + // + // As we walk the operands, keep track of the number of nodes that are + // processed. If non-zero, this will become the new nodeid of this node. + // Operands may morph when they are analyzed. If so, the node will be + // updated after all operands have been analyzed. Since this is rare, + // the code tries to minimize overhead in the non-morphing case. + + SmallVector NewOps; + unsigned NumProcessed = 0; + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + SDValue OrigOp = N->getOperand(i); + SDValue Op = OrigOp; + + AnalyzeNewValue(Op); // Op may morph. + + if (Op.getNode()->getNodeId() == Processed) + ++NumProcessed; + + if (!NewOps.empty()) { + // Some previous operand changed. Add this one to the list. + NewOps.push_back(Op); + } else if (Op != OrigOp) { + // This is the first operand to change - add all operands so far. + for (unsigned j = 0; j < i; ++j) + NewOps.push_back(N->getOperand(j)); + NewOps.push_back(Op); + } + } + + // Some operands changed - update the node. + if (!NewOps.empty()) { + SDNode *M = DAG.UpdateNodeOperands(SDValue(N, 0), &NewOps[0], + NewOps.size()).getNode(); + if (M != N) { + // The node morphed into a different node. Normally for this to happen + // the original node would have to be marked NewNode. However this can + // in theory momentarily not be the case while ReplaceValueWith is doing + // its stuff. Mark the original node NewNode to help sanity checking. + N->setNodeId(NewNode); + if (M->getNodeId() != NewNode && M->getNodeId() != Unanalyzed) + // It morphed into a previously analyzed node - nothing more to do. + return M; + + // It morphed into a different new node. Do the equivalent of passing + // it to AnalyzeNewNode: expunge it and calculate the NodeId. No need + // to remap the operands, since they are the same as the operands we + // remapped above. + N = M; + ExpungeNode(N); + } + } + + // Calculate the NodeId. + N->setNodeId(N->getNumOperands() - NumProcessed); + if (N->getNodeId() == ReadyToProcess) + Worklist.push_back(N); + + return N; +} + +/// AnalyzeNewValue - Call AnalyzeNewNode, updating the node in Val if needed. +/// If the node changes to a processed node, then remap it. +void DAGTypeLegalizer::AnalyzeNewValue(SDValue &Val) { + Val.setNode(AnalyzeNewNode(Val.getNode())); + if (Val.getNode()->getNodeId() == Processed) + // We were passed a processed node, or it morphed into one - remap it. + RemapValue(Val); +} + +/// ExpungeNode - If N has a bogus mapping in ReplacedValues, eliminate it. +/// This can occur when a node is deleted then reallocated as a new node - +/// the mapping in ReplacedValues applies to the deleted node, not the new +/// one. +/// The only map that can have a deleted node as a source is ReplacedValues. +/// Other maps can have deleted nodes as targets, but since their looked-up +/// values are always immediately remapped using RemapValue, resulting in a +/// not-deleted node, this is harmless as long as ReplacedValues/RemapValue +/// always performs correct mappings. In order to keep the mapping correct, +/// ExpungeNode should be called on any new nodes *before* adding them as +/// either source or target to ReplacedValues (which typically means calling +/// Expunge when a new node is first seen, since it may no longer be marked +/// NewNode by the time it is added to ReplacedValues). +void DAGTypeLegalizer::ExpungeNode(SDNode *N) { + if (N->getNodeId() != NewNode) + return; + + // If N is not remapped by ReplacedValues then there is nothing to do. + unsigned i, e; + for (i = 0, e = N->getNumValues(); i != e; ++i) + if (ReplacedValues.find(SDValue(N, i)) != ReplacedValues.end()) + break; + + if (i == e) + return; + + // Remove N from all maps - this is expensive but rare. + + for (DenseMap::iterator I = PromotedIntegers.begin(), + E = PromotedIntegers.end(); I != E; ++I) { + assert(I->first.getNode() != N); + RemapValue(I->second); + } + + for (DenseMap::iterator I = SoftenedFloats.begin(), + E = SoftenedFloats.end(); I != E; ++I) { + assert(I->first.getNode() != N); + RemapValue(I->second); + } + + for (DenseMap::iterator I = ScalarizedVectors.begin(), + E = ScalarizedVectors.end(); I != E; ++I) { + assert(I->first.getNode() != N); + RemapValue(I->second); + } + + for (DenseMap::iterator I = WidenedVectors.begin(), + E = WidenedVectors.end(); I != E; ++I) { + assert(I->first.getNode() != N); + RemapValue(I->second); + } + + for (DenseMap >::iterator + I = ExpandedIntegers.begin(), E = ExpandedIntegers.end(); I != E; ++I){ + assert(I->first.getNode() != N); + RemapValue(I->second.first); + RemapValue(I->second.second); + } + + for (DenseMap >::iterator + I = ExpandedFloats.begin(), E = ExpandedFloats.end(); I != E; ++I) { + assert(I->first.getNode() != N); + RemapValue(I->second.first); + RemapValue(I->second.second); + } + + for (DenseMap >::iterator + I = SplitVectors.begin(), E = SplitVectors.end(); I != E; ++I) { + assert(I->first.getNode() != N); + RemapValue(I->second.first); + RemapValue(I->second.second); + } + + for (DenseMap::iterator I = ReplacedValues.begin(), + E = ReplacedValues.end(); I != E; ++I) + RemapValue(I->second); + + for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) + ReplacedValues.erase(SDValue(N, i)); +} + +/// RemapValue - If the specified value was already legalized to another value, +/// replace it by that value. +void DAGTypeLegalizer::RemapValue(SDValue &N) { + DenseMap::iterator I = ReplacedValues.find(N); + if (I != ReplacedValues.end()) { + // Use path compression to speed up future lookups if values get multiply + // replaced with other values. + RemapValue(I->second); + N = I->second; + assert(N.getNode()->getNodeId() != NewNode && "Mapped to new node!"); + } +} + +namespace { + /// NodeUpdateListener - This class is a DAGUpdateListener that listens for + /// updates to nodes and recomputes their ready state. + class VISIBILITY_HIDDEN NodeUpdateListener : + public SelectionDAG::DAGUpdateListener { + DAGTypeLegalizer &DTL; + SmallSetVector &NodesToAnalyze; + public: + explicit NodeUpdateListener(DAGTypeLegalizer &dtl, + SmallSetVector &nta) + : DTL(dtl), NodesToAnalyze(nta) {} + + virtual void NodeDeleted(SDNode *N, SDNode *E) { + assert(N->getNodeId() != DAGTypeLegalizer::ReadyToProcess && + N->getNodeId() != DAGTypeLegalizer::Processed && + "Invalid node ID for RAUW deletion!"); + // It is possible, though rare, for the deleted node N to occur as a + // target in a map, so note the replacement N -> E in ReplacedValues. + assert(E && "Node not replaced?"); + DTL.NoteDeletion(N, E); + + // In theory the deleted node could also have been scheduled for analysis. + // So remove it from the set of nodes which will be analyzed. + NodesToAnalyze.remove(N); + + // In general nothing needs to be done for E, since it didn't change but + // only gained new uses. However N -> E was just added to ReplacedValues, + // and the result of a ReplacedValues mapping is not allowed to be marked + // NewNode. So if E is marked NewNode, then it needs to be analyzed. + if (E->getNodeId() == DAGTypeLegalizer::NewNode) + NodesToAnalyze.insert(E); + } + + virtual void NodeUpdated(SDNode *N) { + // Node updates can mean pretty much anything. It is possible that an + // operand was set to something already processed (f.e.) in which case + // this node could become ready. Recompute its flags. + assert(N->getNodeId() != DAGTypeLegalizer::ReadyToProcess && + N->getNodeId() != DAGTypeLegalizer::Processed && + "Invalid node ID for RAUW deletion!"); + N->setNodeId(DAGTypeLegalizer::NewNode); + NodesToAnalyze.insert(N); + } + }; +} + + +/// ReplaceValueWithHelper - Internal helper for ReplaceValueWith. Updates the +/// DAG causing any uses of From to use To instead, but without expunging From +/// or recording the replacement in ReplacedValues. Do not call directly unless +/// you really know what you are doing! +void DAGTypeLegalizer::ReplaceValueWithHelper(SDValue From, SDValue To) { + assert(From.getNode() != To.getNode() && "Potential legalization loop!"); + + // If expansion produced new nodes, make sure they are properly marked. + AnalyzeNewValue(To); // Expunges To. + + // Anything that used the old node should now use the new one. Note that this + // can potentially cause recursive merging. + SmallSetVector NodesToAnalyze; + NodeUpdateListener NUL(*this, NodesToAnalyze); + DAG.ReplaceAllUsesOfValueWith(From, To, &NUL); + + // Process the list of nodes that need to be reanalyzed. + while (!NodesToAnalyze.empty()) { + SDNode *N = NodesToAnalyze.back(); + NodesToAnalyze.pop_back(); + if (N->getNodeId() != DAGTypeLegalizer::NewNode) + // The node was analyzed while reanalyzing an earlier node - it is safe to + // skip. Note that this is not a morphing node - otherwise it would still + // be marked NewNode. + continue; + + // Analyze the node's operands and recalculate the node ID. + SDNode *M = AnalyzeNewNode(N); + if (M != N) { + // The node morphed into a different node. Make everyone use the new node + // instead. + assert(M->getNodeId() != NewNode && "Analysis resulted in NewNode!"); + assert(N->getNumValues() == M->getNumValues() && + "Node morphing changed the number of results!"); + for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { + SDValue OldVal(N, i); + SDValue NewVal(M, i); + if (M->getNodeId() == Processed) + RemapValue(NewVal); + DAG.ReplaceAllUsesOfValueWith(OldVal, NewVal, &NUL); + } + // The original node continues to exist in the DAG, marked NewNode. + } + } +} + +/// ReplaceValueWith - The specified value was legalized to the specified other +/// value. Update the DAG and NodeIds replacing any uses of From to use To +/// instead. +void DAGTypeLegalizer::ReplaceValueWith(SDValue From, SDValue To) { + assert(From.getNode()->getNodeId() == ReadyToProcess && + "Only the node being processed may be remapped!"); + + // If expansion produced new nodes, make sure they are properly marked. + ExpungeNode(From.getNode()); + AnalyzeNewValue(To); // Expunges To. + + // The old node may still be present in a map like ExpandedIntegers or + // PromotedIntegers. Inform maps about the replacement. + ReplacedValues[From] = To; + + // Do the replacement. + ReplaceValueWithHelper(From, To); +} + +void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) { + AnalyzeNewValue(Result); + + SDValue &OpEntry = PromotedIntegers[Op]; + assert(OpEntry.getNode() == 0 && "Node is already promoted!"); + OpEntry = Result; +} + +void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) { + AnalyzeNewValue(Result); + + SDValue &OpEntry = SoftenedFloats[Op]; + assert(OpEntry.getNode() == 0 && "Node is already converted to integer!"); + OpEntry = Result; +} + +void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) { + AnalyzeNewValue(Result); + + SDValue &OpEntry = ScalarizedVectors[Op]; + assert(OpEntry.getNode() == 0 && "Node is already scalarized!"); + OpEntry = Result; +} + +void DAGTypeLegalizer::GetExpandedInteger(SDValue Op, SDValue &Lo, + SDValue &Hi) { + std::pair &Entry = ExpandedIntegers[Op]; + RemapValue(Entry.first); + RemapValue(Entry.second); + assert(Entry.first.getNode() && "Operand isn't expanded"); + Lo = Entry.first; + Hi = Entry.second; +} + +void DAGTypeLegalizer::SetExpandedInteger(SDValue Op, SDValue Lo, + SDValue Hi) { + // Lo/Hi may have been newly allocated, if so, add nodeid's as relevant. + AnalyzeNewValue(Lo); + AnalyzeNewValue(Hi); + + // Remember that this is the result of the node. + std::pair &Entry = ExpandedIntegers[Op]; + assert(Entry.first.getNode() == 0 && "Node already expanded"); + Entry.first = Lo; + Entry.second = Hi; +} + +void DAGTypeLegalizer::GetExpandedFloat(SDValue Op, SDValue &Lo, + SDValue &Hi) { + std::pair &Entry = ExpandedFloats[Op]; + RemapValue(Entry.first); + RemapValue(Entry.second); + assert(Entry.first.getNode() && "Operand isn't expanded"); + Lo = Entry.first; + Hi = Entry.second; +} + +void DAGTypeLegalizer::SetExpandedFloat(SDValue Op, SDValue Lo, + SDValue Hi) { + // Lo/Hi may have been newly allocated, if so, add nodeid's as relevant. + AnalyzeNewValue(Lo); + AnalyzeNewValue(Hi); + + // Remember that this is the result of the node. + std::pair &Entry = ExpandedFloats[Op]; + assert(Entry.first.getNode() == 0 && "Node already expanded"); + Entry.first = Lo; + Entry.second = Hi; +} + +void DAGTypeLegalizer::GetSplitVector(SDValue Op, SDValue &Lo, + SDValue &Hi) { + std::pair &Entry = SplitVectors[Op]; + RemapValue(Entry.first); + RemapValue(Entry.second); + assert(Entry.first.getNode() && "Operand isn't split"); + Lo = Entry.first; + Hi = Entry.second; +} + +void DAGTypeLegalizer::SetSplitVector(SDValue Op, SDValue Lo, + SDValue Hi) { + // Lo/Hi may have been newly allocated, if so, add nodeid's as relevant. + AnalyzeNewValue(Lo); + AnalyzeNewValue(Hi); + + // Remember that this is the result of the node. + std::pair &Entry = SplitVectors[Op]; + assert(Entry.first.getNode() == 0 && "Node already split"); + Entry.first = Lo; + Entry.second = Hi; +} + +void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) { + AnalyzeNewValue(Result); + + SDValue &OpEntry = WidenedVectors[Op]; + assert(OpEntry.getNode() == 0 && "Node already widened!"); + OpEntry = Result; +} + + +//===----------------------------------------------------------------------===// +// Utilities. +//===----------------------------------------------------------------------===// + +/// BitConvertToInteger - Convert to an integer of the same size. +SDValue DAGTypeLegalizer::BitConvertToInteger(SDValue Op) { + unsigned BitWidth = Op.getValueType().getSizeInBits(); + return DAG.getNode(ISD::BIT_CONVERT, Op.getDebugLoc(), + MVT::getIntegerVT(BitWidth), Op); +} + +/// BitConvertVectorToIntegerVector - Convert to a vector of integers of the +/// same size. +SDValue DAGTypeLegalizer::BitConvertVectorToIntegerVector(SDValue Op) { + assert(Op.getValueType().isVector() && "Only applies to vectors!"); + unsigned EltWidth = Op.getValueType().getVectorElementType().getSizeInBits(); + MVT EltNVT = MVT::getIntegerVT(EltWidth); + unsigned NumElts = Op.getValueType().getVectorNumElements(); + return DAG.getNode(ISD::BIT_CONVERT, Op.getDebugLoc(), + MVT::getVectorVT(EltNVT, NumElts), Op); +} + +SDValue DAGTypeLegalizer::CreateStackStoreLoad(SDValue Op, + MVT DestVT) { + DebugLoc dl = Op.getDebugLoc(); + // Create the stack frame object. Make sure it is aligned for both + // the source and destination types. + SDValue StackPtr = DAG.CreateStackTemporary(Op.getValueType(), DestVT); + // Emit a store to the stack slot. + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op, StackPtr, NULL, 0); + // Result is a load from the stack slot. + return DAG.getLoad(DestVT, dl, Store, StackPtr, NULL, 0); +} + +/// CustomLowerNode - Replace the node's results with custom code provided +/// by the target and return "true", or do nothing and return "false". +/// The last parameter is FALSE if we are dealing with a node with legal +/// result types and illegal operand. The second parameter denotes the type of +/// illegal OperandNo in that case. +/// The last parameter being TRUE means we are dealing with a +/// node with illegal result types. The second parameter denotes the type of +/// illegal ResNo in that case. +bool DAGTypeLegalizer::CustomLowerNode(SDNode *N, MVT VT, bool LegalizeResult) { + // See if the target wants to custom lower this node. + if (TLI.getOperationAction(N->getOpcode(), VT) != TargetLowering::Custom) + return false; + + SmallVector Results; + if (LegalizeResult) + TLI.ReplaceNodeResults(N, Results, DAG); + else + TLI.LowerOperationWrapper(N, Results, DAG); + + if (Results.empty()) + // The target didn't want to custom lower it after all. + return false; + + // Make everything that once used N's values now use those in Results instead. + assert(Results.size() == N->getNumValues() && + "Custom lowering returned the wrong number of results!"); + for (unsigned i = 0, e = Results.size(); i != e; ++i) + ReplaceValueWith(SDValue(N, i), Results[i]); + return true; +} + +/// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type +/// which is split into two not necessarily identical pieces. +void DAGTypeLegalizer::GetSplitDestVTs(MVT InVT, MVT &LoVT, MVT &HiVT) { + if (!InVT.isVector()) { + LoVT = HiVT = TLI.getTypeToTransformTo(InVT); + } else { + MVT NewEltVT = InVT.getVectorElementType(); + unsigned NumElements = InVT.getVectorNumElements(); + if ((NumElements & (NumElements-1)) == 0) { // Simple power of two vector. + NumElements >>= 1; + LoVT = HiVT = MVT::getVectorVT(NewEltVT, NumElements); + } else { // Non-power-of-two vectors. + unsigned NewNumElts_Lo = 1 << Log2_32(NumElements); + unsigned NewNumElts_Hi = NumElements - NewNumElts_Lo; + LoVT = MVT::getVectorVT(NewEltVT, NewNumElts_Lo); + HiVT = MVT::getVectorVT(NewEltVT, NewNumElts_Hi); + } + } +} + +/// GetPairElements - Use ISD::EXTRACT_ELEMENT nodes to extract the low and +/// high parts of the given value. +void DAGTypeLegalizer::GetPairElements(SDValue Pair, + SDValue &Lo, SDValue &Hi) { + DebugLoc dl = Pair.getDebugLoc(); + MVT NVT = TLI.getTypeToTransformTo(Pair.getValueType()); + Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, NVT, Pair, + DAG.getIntPtrConstant(0)); + Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, NVT, Pair, + DAG.getIntPtrConstant(1)); +} + +SDValue DAGTypeLegalizer::GetVectorElementPointer(SDValue VecPtr, MVT EltVT, + SDValue Index) { + DebugLoc dl = Index.getDebugLoc(); + // Make sure the index type is big enough to compute in. + if (Index.getValueType().bitsGT(TLI.getPointerTy())) + Index = DAG.getNode(ISD::TRUNCATE, dl, TLI.getPointerTy(), Index); + else + Index = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Index); + + // Calculate the element offset and add it to the pointer. + unsigned EltSize = EltVT.getSizeInBits() / 8; // FIXME: should be ABI size. + + Index = DAG.getNode(ISD::MUL, dl, Index.getValueType(), Index, + DAG.getConstant(EltSize, Index.getValueType())); + return DAG.getNode(ISD::ADD, dl, Index.getValueType(), Index, VecPtr); +} + +/// JoinIntegers - Build an integer with low bits Lo and high bits Hi. +SDValue DAGTypeLegalizer::JoinIntegers(SDValue Lo, SDValue Hi) { + // Arbitrarily use dlHi for result DebugLoc + DebugLoc dlHi = Hi.getDebugLoc(); + DebugLoc dlLo = Lo.getDebugLoc(); + MVT LVT = Lo.getValueType(); + MVT HVT = Hi.getValueType(); + MVT NVT = MVT::getIntegerVT(LVT.getSizeInBits() + HVT.getSizeInBits()); + + Lo = DAG.getNode(ISD::ZERO_EXTEND, dlLo, NVT, Lo); + Hi = DAG.getNode(ISD::ANY_EXTEND, dlHi, NVT, Hi); + Hi = DAG.getNode(ISD::SHL, dlHi, NVT, Hi, + DAG.getConstant(LVT.getSizeInBits(), TLI.getPointerTy())); + return DAG.getNode(ISD::OR, dlHi, NVT, Lo, Hi); +} + +/// LibCallify - Convert the node into a libcall with the same prototype. +SDValue DAGTypeLegalizer::LibCallify(RTLIB::Libcall LC, SDNode *N, + bool isSigned) { + unsigned NumOps = N->getNumOperands(); + DebugLoc dl = N->getDebugLoc(); + if (NumOps == 0) { + return MakeLibCall(LC, N->getValueType(0), 0, 0, isSigned, dl); + } else if (NumOps == 1) { + SDValue Op = N->getOperand(0); + return MakeLibCall(LC, N->getValueType(0), &Op, 1, isSigned, dl); + } else if (NumOps == 2) { + SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + return MakeLibCall(LC, N->getValueType(0), Ops, 2, isSigned, dl); + } + SmallVector Ops(NumOps); + for (unsigned i = 0; i < NumOps; ++i) + Ops[i] = N->getOperand(i); + + return MakeLibCall(LC, N->getValueType(0), &Ops[0], NumOps, isSigned, dl); +} + +/// MakeLibCall - Generate a libcall taking the given operands as arguments and +/// returning a result of type RetVT. +SDValue DAGTypeLegalizer::MakeLibCall(RTLIB::Libcall LC, MVT RetVT, + const SDValue *Ops, unsigned NumOps, + bool isSigned, DebugLoc dl) { + TargetLowering::ArgListTy Args; + Args.reserve(NumOps); + + TargetLowering::ArgListEntry Entry; + for (unsigned i = 0; i != NumOps; ++i) { + Entry.Node = Ops[i]; + Entry.Ty = Entry.Node.getValueType().getTypeForMVT(); + Entry.isSExt = isSigned; + Entry.isZExt = !isSigned; + Args.push_back(Entry); + } + SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), + TLI.getPointerTy()); + + const Type *RetTy = RetVT.getTypeForMVT(); + std::pair CallInfo = + TLI.LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false, + false, CallingConv::C, false, Callee, Args, DAG, dl); + return CallInfo.first; +} + +/// PromoteTargetBoolean - Promote the given target boolean to a target boolean +/// of the given type. A target boolean is an integer value, not necessarily of +/// type i1, the bits of which conform to getBooleanContents. +SDValue DAGTypeLegalizer::PromoteTargetBoolean(SDValue Bool, MVT VT) { + DebugLoc dl = Bool.getDebugLoc(); + ISD::NodeType ExtendCode; + switch (TLI.getBooleanContents()) { + default: + assert(false && "Unknown BooleanContent!"); + case TargetLowering::UndefinedBooleanContent: + // Extend to VT by adding rubbish bits. + ExtendCode = ISD::ANY_EXTEND; + break; + case TargetLowering::ZeroOrOneBooleanContent: + // Extend to VT by adding zero bits. + ExtendCode = ISD::ZERO_EXTEND; + break; + case TargetLowering::ZeroOrNegativeOneBooleanContent: { + // Extend to VT by copying the sign bit. + ExtendCode = ISD::SIGN_EXTEND; + break; + } + } + return DAG.getNode(ExtendCode, dl, VT, Bool); +} + +/// SplitInteger - Return the lower LoVT bits of Op in Lo and the upper HiVT +/// bits in Hi. +void DAGTypeLegalizer::SplitInteger(SDValue Op, + MVT LoVT, MVT HiVT, + SDValue &Lo, SDValue &Hi) { + DebugLoc dl = Op.getDebugLoc(); + assert(LoVT.getSizeInBits() + HiVT.getSizeInBits() == + Op.getValueType().getSizeInBits() && "Invalid integer splitting!"); + Lo = DAG.getNode(ISD::TRUNCATE, dl, LoVT, Op); + Hi = DAG.getNode(ISD::SRL, dl, Op.getValueType(), Op, + DAG.getConstant(LoVT.getSizeInBits(), TLI.getPointerTy())); + Hi = DAG.getNode(ISD::TRUNCATE, dl, HiVT, Hi); +} + +/// SplitInteger - Return the lower and upper halves of Op's bits in a value +/// type half the size of Op's. +void DAGTypeLegalizer::SplitInteger(SDValue Op, + SDValue &Lo, SDValue &Hi) { + MVT HalfVT = MVT::getIntegerVT(Op.getValueType().getSizeInBits()/2); + SplitInteger(Op, HalfVT, HalfVT, Lo, Hi); +} + + +//===----------------------------------------------------------------------===// +// Entry Point +//===----------------------------------------------------------------------===// + +/// LegalizeTypes - This transforms the SelectionDAG into a SelectionDAG that +/// only uses types natively supported by the target. Returns "true" if it made +/// any changes. +/// +/// Note that this is an involved process that may invalidate pointers into +/// the graph. +bool SelectionDAG::LegalizeTypes() { + return DAGTypeLegalizer(*this).run(); +} diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h new file mode 100644 index 000000000000..75c89246a31e --- /dev/null +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -0,0 +1,736 @@ +//===-- LegalizeTypes.h - Definition of the DAG Type Legalizer class ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the DAGTypeLegalizer class. This is a private interface +// shared between the code that implements the SelectionDAG::LegalizeTypes +// method. +// +//===----------------------------------------------------------------------===// + +#ifndef SELECTIONDAG_LEGALIZETYPES_H +#define SELECTIONDAG_LEGALIZETYPES_H + +#define DEBUG_TYPE "legalize-types" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" + +namespace llvm { + +//===----------------------------------------------------------------------===// +/// DAGTypeLegalizer - This takes an arbitrary SelectionDAG as input and hacks +/// on it until only value types the target machine can handle are left. This +/// involves promoting small sizes to large sizes or splitting up large values +/// into small values. +/// +class VISIBILITY_HIDDEN DAGTypeLegalizer { + TargetLowering &TLI; + SelectionDAG &DAG; +public: + // NodeIdFlags - This pass uses the NodeId on the SDNodes to hold information + // about the state of the node. The enum has all the values. + enum NodeIdFlags { + /// ReadyToProcess - All operands have been processed, so this node is ready + /// to be handled. + ReadyToProcess = 0, + + /// NewNode - This is a new node, not before seen, that was created in the + /// process of legalizing some other node. + NewNode = -1, + + /// Unanalyzed - This node's ID needs to be set to the number of its + /// unprocessed operands. + Unanalyzed = -2, + + /// Processed - This is a node that has already been processed. + Processed = -3 + + // 1+ - This is a node which has this many unprocessed operands. + }; +private: + enum LegalizeAction { + Legal, // The target natively supports this type. + PromoteInteger, // Replace this integer type with a larger one. + ExpandInteger, // Split this integer type into two of half the size. + SoftenFloat, // Convert this float type to a same size integer type. + ExpandFloat, // Split this float type into two of half the size. + ScalarizeVector, // Replace this one-element vector with its element type. + SplitVector, // This vector type should be split into smaller vectors. + WidenVector // This vector type should be widened into a larger vector. + }; + + /// ValueTypeActions - This is a bitvector that contains two bits for each + /// simple value type, where the two bits correspond to the LegalizeAction + /// enum from TargetLowering. This can be queried with "getTypeAction(VT)". + TargetLowering::ValueTypeActionImpl ValueTypeActions; + + /// getTypeAction - Return how we should legalize values of this type. + LegalizeAction getTypeAction(MVT VT) const { + switch (ValueTypeActions.getTypeAction(VT)) { + default: + assert(false && "Unknown legalize action!"); + case TargetLowering::Legal: + return Legal; + case TargetLowering::Promote: + // Promote can mean + // 1) For integers, use a larger integer type (e.g. i8 -> i32). + // 2) For vectors, use a wider vector type (e.g. v3i32 -> v4i32). + if (!VT.isVector()) + return PromoteInteger; + else + return WidenVector; + case TargetLowering::Expand: + // Expand can mean + // 1) split scalar in half, 2) convert a float to an integer, + // 3) scalarize a single-element vector, 4) split a vector in two. + if (!VT.isVector()) { + if (VT.isInteger()) + return ExpandInteger; + else if (VT.getSizeInBits() == + TLI.getTypeToTransformTo(VT).getSizeInBits()) + return SoftenFloat; + else + return ExpandFloat; + } else if (VT.getVectorNumElements() == 1) { + return ScalarizeVector; + } else { + return SplitVector; + } + } + } + + /// isTypeLegal - Return true if this type is legal on this target. + bool isTypeLegal(MVT VT) const { + return ValueTypeActions.getTypeAction(VT) == TargetLowering::Legal; + } + + /// IgnoreNodeResults - Pretend all of this node's results are legal. + bool IgnoreNodeResults(SDNode *N) const { + return N->getOpcode() == ISD::TargetConstant; + } + + /// PromotedIntegers - For integer nodes that are below legal width, this map + /// indicates what promoted value to use. + DenseMap PromotedIntegers; + + /// ExpandedIntegers - For integer nodes that need to be expanded this map + /// indicates which operands are the expanded version of the input. + DenseMap > ExpandedIntegers; + + /// SoftenedFloats - For floating point nodes converted to integers of + /// the same size, this map indicates the converted value to use. + DenseMap SoftenedFloats; + + /// ExpandedFloats - For float nodes that need to be expanded this map + /// indicates which operands are the expanded version of the input. + DenseMap > ExpandedFloats; + + /// ScalarizedVectors - For nodes that are <1 x ty>, this map indicates the + /// scalar value of type 'ty' to use. + DenseMap ScalarizedVectors; + + /// SplitVectors - For nodes that need to be split this map indicates + /// which operands are the expanded version of the input. + DenseMap > SplitVectors; + + /// WidenedVectors - For vector nodes that need to be widened, indicates + /// the widened value to use. + DenseMap WidenedVectors; + + /// ReplacedValues - For values that have been replaced with another, + /// indicates the replacement value to use. + DenseMap ReplacedValues; + + /// Worklist - This defines a worklist of nodes to process. In order to be + /// pushed onto this worklist, all operands of a node must have already been + /// processed. + SmallVector Worklist; + +public: + explicit DAGTypeLegalizer(SelectionDAG &dag) + : TLI(dag.getTargetLoweringInfo()), DAG(dag), + ValueTypeActions(TLI.getValueTypeActions()) { + assert(MVT::LAST_VALUETYPE <= 32 && + "Too many value types for ValueTypeActions to hold!"); + } + + /// run - This is the main entry point for the type legalizer. This does a + /// top-down traversal of the dag, legalizing types as it goes. Returns + /// "true" if it made any changes. + bool run(); + + void NoteDeletion(SDNode *Old, SDNode *New) { + ExpungeNode(Old); + ExpungeNode(New); + for (unsigned i = 0, e = Old->getNumValues(); i != e; ++i) + ReplacedValues[SDValue(Old, i)] = SDValue(New, i); + } + +private: + SDNode *AnalyzeNewNode(SDNode *N); + void AnalyzeNewValue(SDValue &Val); + void ExpungeNode(SDNode *N); + void PerformExpensiveChecks(); + void RemapValue(SDValue &N); + + // Common routines. + SDValue BitConvertToInteger(SDValue Op); + SDValue BitConvertVectorToIntegerVector(SDValue Op); + SDValue CreateStackStoreLoad(SDValue Op, MVT DestVT); + bool CustomLowerNode(SDNode *N, MVT VT, bool LegalizeResult); + SDValue GetVectorElementPointer(SDValue VecPtr, MVT EltVT, SDValue Index); + SDValue JoinIntegers(SDValue Lo, SDValue Hi); + SDValue LibCallify(RTLIB::Libcall LC, SDNode *N, bool isSigned); + SDValue MakeLibCall(RTLIB::Libcall LC, MVT RetVT, + const SDValue *Ops, unsigned NumOps, bool isSigned, + DebugLoc dl); + SDValue PromoteTargetBoolean(SDValue Bool, MVT VT); + void ReplaceValueWith(SDValue From, SDValue To); + void ReplaceValueWithHelper(SDValue From, SDValue To); + void SplitInteger(SDValue Op, SDValue &Lo, SDValue &Hi); + void SplitInteger(SDValue Op, MVT LoVT, MVT HiVT, + SDValue &Lo, SDValue &Hi); + + //===--------------------------------------------------------------------===// + // Integer Promotion Support: LegalizeIntegerTypes.cpp + //===--------------------------------------------------------------------===// + + /// GetPromotedInteger - Given a processed operand Op which was promoted to a + /// larger integer type, this returns the promoted value. The low bits of the + /// promoted value corresponding to the original type are exactly equal to Op. + /// The extra bits contain rubbish, so the promoted value may need to be zero- + /// or sign-extended from the original type before it is usable (the helpers + /// SExtPromotedInteger and ZExtPromotedInteger can do this for you). + /// For example, if Op is an i16 and was promoted to an i32, then this method + /// returns an i32, the lower 16 bits of which coincide with Op, and the upper + /// 16 bits of which contain rubbish. + SDValue GetPromotedInteger(SDValue Op) { + SDValue &PromotedOp = PromotedIntegers[Op]; + RemapValue(PromotedOp); + assert(PromotedOp.getNode() && "Operand wasn't promoted?"); + return PromotedOp; + } + void SetPromotedInteger(SDValue Op, SDValue Result); + + /// SExtPromotedInteger - Get a promoted operand and sign extend it to the + /// final size. + SDValue SExtPromotedInteger(SDValue Op) { + MVT OldVT = Op.getValueType(); + DebugLoc dl = Op.getDebugLoc(); + Op = GetPromotedInteger(Op); + return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Op.getValueType(), Op, + DAG.getValueType(OldVT)); + } + + /// ZExtPromotedInteger - Get a promoted operand and zero extend it to the + /// final size. + SDValue ZExtPromotedInteger(SDValue Op) { + MVT OldVT = Op.getValueType(); + DebugLoc dl = Op.getDebugLoc(); + Op = GetPromotedInteger(Op); + return DAG.getZeroExtendInReg(Op, dl, OldVT); + } + + // Integer Result Promotion. + void PromoteIntegerResult(SDNode *N, unsigned ResNo); + SDValue PromoteIntRes_AssertSext(SDNode *N); + SDValue PromoteIntRes_AssertZext(SDNode *N); + SDValue PromoteIntRes_Atomic1(AtomicSDNode *N); + SDValue PromoteIntRes_Atomic2(AtomicSDNode *N); + SDValue PromoteIntRes_BIT_CONVERT(SDNode *N); + SDValue PromoteIntRes_BSWAP(SDNode *N); + SDValue PromoteIntRes_BUILD_PAIR(SDNode *N); + SDValue PromoteIntRes_Constant(SDNode *N); + SDValue PromoteIntRes_CONVERT_RNDSAT(SDNode *N); + SDValue PromoteIntRes_CTLZ(SDNode *N); + SDValue PromoteIntRes_CTPOP(SDNode *N); + SDValue PromoteIntRes_CTTZ(SDNode *N); + SDValue PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N); + SDValue PromoteIntRes_FP_TO_XINT(SDNode *N); + SDValue PromoteIntRes_INT_EXTEND(SDNode *N); + SDValue PromoteIntRes_LOAD(LoadSDNode *N); + SDValue PromoteIntRes_Overflow(SDNode *N); + SDValue PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo); + SDValue PromoteIntRes_SDIV(SDNode *N); + SDValue PromoteIntRes_SELECT(SDNode *N); + SDValue PromoteIntRes_SELECT_CC(SDNode *N); + SDValue PromoteIntRes_SETCC(SDNode *N); + SDValue PromoteIntRes_SHL(SDNode *N); + SDValue PromoteIntRes_SimpleIntBinOp(SDNode *N); + SDValue PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N); + SDValue PromoteIntRes_SRA(SDNode *N); + SDValue PromoteIntRes_SRL(SDNode *N); + SDValue PromoteIntRes_TRUNCATE(SDNode *N); + SDValue PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo); + SDValue PromoteIntRes_UDIV(SDNode *N); + SDValue PromoteIntRes_UNDEF(SDNode *N); + SDValue PromoteIntRes_VAARG(SDNode *N); + SDValue PromoteIntRes_XMULO(SDNode *N, unsigned ResNo); + + // Integer Operand Promotion. + bool PromoteIntegerOperand(SDNode *N, unsigned OperandNo); + SDValue PromoteIntOp_ANY_EXTEND(SDNode *N); + SDValue PromoteIntOp_BIT_CONVERT(SDNode *N); + SDValue PromoteIntOp_BUILD_PAIR(SDNode *N); + SDValue PromoteIntOp_BR_CC(SDNode *N, unsigned OpNo); + SDValue PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo); + SDValue PromoteIntOp_BUILD_VECTOR(SDNode *N); + SDValue PromoteIntOp_CONVERT_RNDSAT(SDNode *N); + SDValue PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N, unsigned OpNo); + SDValue PromoteIntOp_MEMBARRIER(SDNode *N); + SDValue PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N); + SDValue PromoteIntOp_SELECT(SDNode *N, unsigned OpNo); + SDValue PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo); + SDValue PromoteIntOp_SETCC(SDNode *N, unsigned OpNo); + SDValue PromoteIntOp_Shift(SDNode *N); + SDValue PromoteIntOp_SIGN_EXTEND(SDNode *N); + SDValue PromoteIntOp_SINT_TO_FP(SDNode *N); + SDValue PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo); + SDValue PromoteIntOp_TRUNCATE(SDNode *N); + SDValue PromoteIntOp_UINT_TO_FP(SDNode *N); + SDValue PromoteIntOp_ZERO_EXTEND(SDNode *N); + + void PromoteSetCCOperands(SDValue &LHS,SDValue &RHS, ISD::CondCode Code); + + //===--------------------------------------------------------------------===// + // Integer Expansion Support: LegalizeIntegerTypes.cpp + //===--------------------------------------------------------------------===// + + /// GetExpandedInteger - Given a processed operand Op which was expanded into + /// two integers of half the size, this returns the two halves. The low bits + /// of Op are exactly equal to the bits of Lo; the high bits exactly equal Hi. + /// For example, if Op is an i64 which was expanded into two i32's, then this + /// method returns the two i32's, with Lo being equal to the lower 32 bits of + /// Op, and Hi being equal to the upper 32 bits. + void GetExpandedInteger(SDValue Op, SDValue &Lo, SDValue &Hi); + void SetExpandedInteger(SDValue Op, SDValue Lo, SDValue Hi); + + // Integer Result Expansion. + void ExpandIntegerResult(SDNode *N, unsigned ResNo); + void ExpandIntRes_ANY_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_AssertSext (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_AssertZext (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_Constant (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_CTLZ (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_CTPOP (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_CTTZ (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_LOAD (LoadSDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_SIGN_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_SIGN_EXTEND_INREG (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_TRUNCATE (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_ZERO_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_FP_TO_SINT (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_FP_TO_UINT (SDNode *N, SDValue &Lo, SDValue &Hi); + + void ExpandIntRes_Logical (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_ADDSUB (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_ADDSUBC (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_ADDSUBE (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_BSWAP (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_MUL (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_SDIV (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_SREM (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_UDIV (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_UREM (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_Shift (SDNode *N, SDValue &Lo, SDValue &Hi); + + void ExpandShiftByConstant(SDNode *N, unsigned Amt, + SDValue &Lo, SDValue &Hi); + bool ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi); + bool ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi); + + // Integer Operand Expansion. + bool ExpandIntegerOperand(SDNode *N, unsigned OperandNo); + SDValue ExpandIntOp_BIT_CONVERT(SDNode *N); + SDValue ExpandIntOp_BR_CC(SDNode *N); + SDValue ExpandIntOp_BUILD_VECTOR(SDNode *N); + SDValue ExpandIntOp_EXTRACT_ELEMENT(SDNode *N); + SDValue ExpandIntOp_SELECT_CC(SDNode *N); + SDValue ExpandIntOp_SETCC(SDNode *N); + SDValue ExpandIntOp_Shift(SDNode *N); + SDValue ExpandIntOp_SINT_TO_FP(SDNode *N); + SDValue ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo); + SDValue ExpandIntOp_TRUNCATE(SDNode *N); + SDValue ExpandIntOp_UINT_TO_FP(SDNode *N); + + void IntegerExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS, + ISD::CondCode &CCCode, DebugLoc dl); + + //===--------------------------------------------------------------------===// + // Float to Integer Conversion Support: LegalizeFloatTypes.cpp + //===--------------------------------------------------------------------===// + + /// GetSoftenedFloat - Given a processed operand Op which was converted to an + /// integer of the same size, this returns the integer. The integer contains + /// exactly the same bits as Op - only the type changed. For example, if Op + /// is an f32 which was softened to an i32, then this method returns an i32, + /// the bits of which coincide with those of Op. + SDValue GetSoftenedFloat(SDValue Op) { + SDValue &SoftenedOp = SoftenedFloats[Op]; + RemapValue(SoftenedOp); + assert(SoftenedOp.getNode() && "Operand wasn't converted to integer?"); + return SoftenedOp; + } + void SetSoftenedFloat(SDValue Op, SDValue Result); + + // Result Float to Integer Conversion. + void SoftenFloatResult(SDNode *N, unsigned OpNo); + SDValue SoftenFloatRes_BIT_CONVERT(SDNode *N); + SDValue SoftenFloatRes_BUILD_PAIR(SDNode *N); + SDValue SoftenFloatRes_ConstantFP(ConstantFPSDNode *N); + SDValue SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N); + SDValue SoftenFloatRes_FABS(SDNode *N); + SDValue SoftenFloatRes_FADD(SDNode *N); + SDValue SoftenFloatRes_FCEIL(SDNode *N); + SDValue SoftenFloatRes_FCOPYSIGN(SDNode *N); + SDValue SoftenFloatRes_FCOS(SDNode *N); + SDValue SoftenFloatRes_FDIV(SDNode *N); + SDValue SoftenFloatRes_FEXP(SDNode *N); + SDValue SoftenFloatRes_FEXP2(SDNode *N); + SDValue SoftenFloatRes_FFLOOR(SDNode *N); + SDValue SoftenFloatRes_FLOG(SDNode *N); + SDValue SoftenFloatRes_FLOG2(SDNode *N); + SDValue SoftenFloatRes_FLOG10(SDNode *N); + SDValue SoftenFloatRes_FMUL(SDNode *N); + SDValue SoftenFloatRes_FNEARBYINT(SDNode *N); + SDValue SoftenFloatRes_FNEG(SDNode *N); + SDValue SoftenFloatRes_FP_EXTEND(SDNode *N); + SDValue SoftenFloatRes_FP_ROUND(SDNode *N); + SDValue SoftenFloatRes_FPOW(SDNode *N); + SDValue SoftenFloatRes_FPOWI(SDNode *N); + SDValue SoftenFloatRes_FREM(SDNode *N); + SDValue SoftenFloatRes_FRINT(SDNode *N); + SDValue SoftenFloatRes_FSIN(SDNode *N); + SDValue SoftenFloatRes_FSQRT(SDNode *N); + SDValue SoftenFloatRes_FSUB(SDNode *N); + SDValue SoftenFloatRes_FTRUNC(SDNode *N); + SDValue SoftenFloatRes_LOAD(SDNode *N); + SDValue SoftenFloatRes_SELECT(SDNode *N); + SDValue SoftenFloatRes_SELECT_CC(SDNode *N); + SDValue SoftenFloatRes_UNDEF(SDNode *N); + SDValue SoftenFloatRes_VAARG(SDNode *N); + SDValue SoftenFloatRes_XINT_TO_FP(SDNode *N); + + // Operand Float to Integer Conversion. + bool SoftenFloatOperand(SDNode *N, unsigned OpNo); + SDValue SoftenFloatOp_BIT_CONVERT(SDNode *N); + SDValue SoftenFloatOp_BR_CC(SDNode *N); + SDValue SoftenFloatOp_FP_ROUND(SDNode *N); + SDValue SoftenFloatOp_FP_TO_SINT(SDNode *N); + SDValue SoftenFloatOp_FP_TO_UINT(SDNode *N); + SDValue SoftenFloatOp_SELECT_CC(SDNode *N); + SDValue SoftenFloatOp_SETCC(SDNode *N); + SDValue SoftenFloatOp_STORE(SDNode *N, unsigned OpNo); + + void SoftenSetCCOperands(SDValue &NewLHS, SDValue &NewRHS, + ISD::CondCode &CCCode, DebugLoc dl); + + //===--------------------------------------------------------------------===// + // Float Expansion Support: LegalizeFloatTypes.cpp + //===--------------------------------------------------------------------===// + + /// GetExpandedFloat - Given a processed operand Op which was expanded into + /// two floating point values of half the size, this returns the two halves. + /// The low bits of Op are exactly equal to the bits of Lo; the high bits + /// exactly equal Hi. For example, if Op is a ppcf128 which was expanded + /// into two f64's, then this method returns the two f64's, with Lo being + /// equal to the lower 64 bits of Op, and Hi to the upper 64 bits. + void GetExpandedFloat(SDValue Op, SDValue &Lo, SDValue &Hi); + void SetExpandedFloat(SDValue Op, SDValue Lo, SDValue Hi); + + // Float Result Expansion. + void ExpandFloatResult(SDNode *N, unsigned ResNo); + void ExpandFloatRes_ConstantFP(SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FABS (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FADD (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FCEIL (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FCOS (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FDIV (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FEXP (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FEXP2 (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FFLOOR (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FLOG (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FLOG2 (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FLOG10 (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FMUL (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FNEARBYINT(SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FNEG (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FP_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FPOW (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FPOWI (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FRINT (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FSIN (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FSQRT (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FSUB (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FTRUNC (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_LOAD (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, SDValue &Hi); + + // Float Operand Expansion. + bool ExpandFloatOperand(SDNode *N, unsigned OperandNo); + SDValue ExpandFloatOp_BR_CC(SDNode *N); + SDValue ExpandFloatOp_FP_ROUND(SDNode *N); + SDValue ExpandFloatOp_FP_TO_SINT(SDNode *N); + SDValue ExpandFloatOp_FP_TO_UINT(SDNode *N); + SDValue ExpandFloatOp_SELECT_CC(SDNode *N); + SDValue ExpandFloatOp_SETCC(SDNode *N); + SDValue ExpandFloatOp_STORE(SDNode *N, unsigned OpNo); + + void FloatExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS, + ISD::CondCode &CCCode, DebugLoc dl); + + //===--------------------------------------------------------------------===// + // Scalarization Support: LegalizeVectorTypes.cpp + //===--------------------------------------------------------------------===// + + /// GetScalarizedVector - Given a processed one-element vector Op which was + /// scalarized to its element type, this returns the element. For example, + /// if Op is a v1i32, Op = < i32 val >, this method returns val, an i32. + SDValue GetScalarizedVector(SDValue Op) { + SDValue &ScalarizedOp = ScalarizedVectors[Op]; + RemapValue(ScalarizedOp); + assert(ScalarizedOp.getNode() && "Operand wasn't scalarized?"); + return ScalarizedOp; + } + void SetScalarizedVector(SDValue Op, SDValue Result); + + // Vector Result Scalarization: <1 x ty> -> ty. + void ScalarizeVectorResult(SDNode *N, unsigned OpNo); + SDValue ScalarizeVecRes_BinOp(SDNode *N); + SDValue ScalarizeVecRes_ShiftOp(SDNode *N); + SDValue ScalarizeVecRes_UnaryOp(SDNode *N); + + SDValue ScalarizeVecRes_BIT_CONVERT(SDNode *N); + SDValue ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N); + SDValue ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N); + SDValue ScalarizeVecRes_FPOWI(SDNode *N); + SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N); + SDValue ScalarizeVecRes_LOAD(LoadSDNode *N); + SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N); + SDValue ScalarizeVecRes_SELECT(SDNode *N); + SDValue ScalarizeVecRes_SELECT_CC(SDNode *N); + SDValue ScalarizeVecRes_UNDEF(SDNode *N); + SDValue ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N); + SDValue ScalarizeVecRes_VSETCC(SDNode *N); + + // Vector Operand Scalarization: <1 x ty> -> ty. + bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo); + SDValue ScalarizeVecOp_BIT_CONVERT(SDNode *N); + SDValue ScalarizeVecOp_CONCAT_VECTORS(SDNode *N); + SDValue ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N); + SDValue ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo); + + //===--------------------------------------------------------------------===// + // Vector Splitting Support: LegalizeVectorTypes.cpp + //===--------------------------------------------------------------------===// + + /// GetSplitVector - Given a processed vector Op which was split into smaller + /// vectors, this method returns the smaller vectors. The first elements of + /// Op coincide with the elements of Lo; the remaining elements of Op coincide + /// with the elements of Hi: Op is what you would get by concatenating Lo and + /// Hi. For example, if Op is a v8i32 that was split into two v4i32's, then + /// this method returns the two v4i32's, with Lo corresponding to the first 4 + /// elements of Op, and Hi to the last 4 elements. + void GetSplitVector(SDValue Op, SDValue &Lo, SDValue &Hi); + void SetSplitVector(SDValue Op, SDValue Lo, SDValue Hi); + + // Vector Result Splitting: <128 x ty> -> 2 x <64 x ty>. + void SplitVectorResult(SDNode *N, unsigned OpNo); + void SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi); + + void SplitVecRes_BIT_CONVERT(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_BUILD_PAIR(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_CONVERT_RNDSAT(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_LOAD(LoadSDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_UNDEF(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, SDValue &Lo, + SDValue &Hi); + void SplitVecRes_VSETCC(SDNode *N, SDValue &Lo, SDValue &Hi); + + // Vector Operand Splitting: <128 x ty> -> 2 x <64 x ty>. + bool SplitVectorOperand(SDNode *N, unsigned OpNo); + SDValue SplitVecOp_UnaryOp(SDNode *N); + + SDValue SplitVecOp_BIT_CONVERT(SDNode *N); + SDValue SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N); + SDValue SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N); + SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo); + + //===--------------------------------------------------------------------===// + // Vector Widening Support: LegalizeVectorTypes.cpp + //===--------------------------------------------------------------------===// + + /// GetWidenedVector - Given a processed vector Op which was widened into a + /// larger vector, this method returns the larger vector. The elements of + /// the returned vector consist of the elements of Op followed by elements + /// containing rubbish. For example, if Op is a v2i32 that was widened to a + /// v4i32, then this method returns a v4i32 for which the first two elements + /// are the same as those of Op, while the last two elements contain rubbish. + SDValue GetWidenedVector(SDValue Op) { + SDValue &WidenedOp = WidenedVectors[Op]; + RemapValue(WidenedOp); + assert(WidenedOp.getNode() && "Operand wasn't widened?"); + return WidenedOp; + } + void SetWidenedVector(SDValue Op, SDValue Result); + + // Widen Vector Result Promotion. + void WidenVectorResult(SDNode *N, unsigned ResNo); + SDValue WidenVecRes_BIT_CONVERT(SDNode* N); + SDValue WidenVecRes_BUILD_VECTOR(SDNode* N); + SDValue WidenVecRes_CONCAT_VECTORS(SDNode* N); + SDValue WidenVecRes_CONVERT_RNDSAT(SDNode* N); + SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N); + SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N); + SDValue WidenVecRes_LOAD(SDNode* N); + SDValue WidenVecRes_SCALAR_TO_VECTOR(SDNode* N); + SDValue WidenVecRes_SELECT(SDNode* N); + SDValue WidenVecRes_SELECT_CC(SDNode* N); + SDValue WidenVecRes_UNDEF(SDNode *N); + SDValue WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N); + SDValue WidenVecRes_VSETCC(SDNode* N); + + SDValue WidenVecRes_Binary(SDNode *N); + SDValue WidenVecRes_Convert(SDNode *N); + SDValue WidenVecRes_Shift(SDNode *N); + SDValue WidenVecRes_Unary(SDNode *N); + + // Widen Vector Operand. + bool WidenVectorOperand(SDNode *N, unsigned ResNo); + SDValue WidenVecOp_BIT_CONVERT(SDNode *N); + SDValue WidenVecOp_CONCAT_VECTORS(SDNode *N); + SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N); + SDValue WidenVecOp_STORE(SDNode* N); + + SDValue WidenVecOp_Convert(SDNode *N); + + //===--------------------------------------------------------------------===// + // Vector Widening Utilities Support: LegalizeVectorTypes.cpp + //===--------------------------------------------------------------------===// + + /// Helper genWidenVectorLoads - Helper function to generate a set of + /// loads to load a vector with a resulting wider type. It takes + /// ExtType: Extension type + /// LdChain: list of chains for the load we have generated. + /// Chain: incoming chain for the ld vector. + /// BasePtr: base pointer to load from. + /// SV: memory disambiguation source value. + /// SVOffset: memory disambiugation offset. + /// Alignment: alignment of the memory. + /// isVolatile: volatile load. + /// LdWidth: width of memory that we want to load. + /// ResType: the wider result result type for the resulting vector. + /// dl: DebugLoc to be applied to new nodes + SDValue GenWidenVectorLoads(SmallVector& LdChain, SDValue Chain, + SDValue BasePtr, const Value *SV, + int SVOffset, unsigned Alignment, + bool isVolatile, unsigned LdWidth, + MVT ResType, DebugLoc dl); + + /// Helper genWidenVectorStores - Helper function to generate a set of + /// stores to store a widen vector into non widen memory + /// It takes + /// StChain: list of chains for the stores we have generated + /// Chain: incoming chain for the ld vector + /// BasePtr: base pointer to load from + /// SV: memory disambiguation source value + /// SVOffset: memory disambiugation offset + /// Alignment: alignment of the memory + /// isVolatile: volatile lod + /// ValOp: value to store + /// StWidth: width of memory that we want to store + /// dl: DebugLoc to be applied to new nodes + void GenWidenVectorStores(SmallVector& StChain, SDValue Chain, + SDValue BasePtr, const Value *SV, + int SVOffset, unsigned Alignment, + bool isVolatile, SDValue ValOp, + unsigned StWidth, DebugLoc dl); + + /// Modifies a vector input (widen or narrows) to a vector of NVT. The + /// input vector must have the same element type as NVT. + SDValue ModifyToType(SDValue InOp, MVT WidenVT); + + + //===--------------------------------------------------------------------===// + // Generic Splitting: LegalizeTypesGeneric.cpp + //===--------------------------------------------------------------------===// + + // Legalization methods which only use that the illegal type is split into two + // not necessarily identical types. As such they can be used for splitting + // vectors and expanding integers and floats. + + void GetSplitOp(SDValue Op, SDValue &Lo, SDValue &Hi) { + if (Op.getValueType().isVector()) + GetSplitVector(Op, Lo, Hi); + else if (Op.getValueType().isInteger()) + GetExpandedInteger(Op, Lo, Hi); + else + GetExpandedFloat(Op, Lo, Hi); + } + + /// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type + /// which is split (or expanded) into two not necessarily identical pieces. + void GetSplitDestVTs(MVT InVT, MVT &LoVT, MVT &HiVT); + + /// GetPairElements - Use ISD::EXTRACT_ELEMENT nodes to extract the low and + /// high parts of the given value. + void GetPairElements(SDValue Pair, SDValue &Lo, SDValue &Hi); + + // Generic Result Splitting. + void SplitRes_MERGE_VALUES(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitRes_SELECT (SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitRes_SELECT_CC (SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitRes_UNDEF (SDNode *N, SDValue &Lo, SDValue &Hi); + + //===--------------------------------------------------------------------===// + // Generic Expansion: LegalizeTypesGeneric.cpp + //===--------------------------------------------------------------------===// + + // Legalization methods which only use that the illegal type is split into two + // identical types of half the size, and that the Lo/Hi part is stored first + // in memory on little/big-endian machines, followed by the Hi/Lo part. As + // such they can be used for expanding integers and floats. + + void GetExpandedOp(SDValue Op, SDValue &Lo, SDValue &Hi) { + if (Op.getValueType().isInteger()) + GetExpandedInteger(Op, Lo, Hi); + else + GetExpandedFloat(Op, Lo, Hi); + } + + // Generic Result Expansion. + void ExpandRes_BIT_CONVERT (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandRes_BUILD_PAIR (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandRes_EXTRACT_ELEMENT (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandRes_NormalLoad (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandRes_VAARG (SDNode *N, SDValue &Lo, SDValue &Hi); + + // Generic Operand Expansion. + SDValue ExpandOp_BIT_CONVERT (SDNode *N); + SDValue ExpandOp_BUILD_VECTOR (SDNode *N); + SDValue ExpandOp_EXTRACT_ELEMENT (SDNode *N); + SDValue ExpandOp_INSERT_VECTOR_ELT(SDNode *N); + SDValue ExpandOp_SCALAR_TO_VECTOR (SDNode *N); + SDValue ExpandOp_NormalStore (SDNode *N, unsigned OpNo); +}; + +} // end namespace llvm. + +#endif diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp new file mode 100644 index 000000000000..e8ff3fc9efb4 --- /dev/null +++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -0,0 +1,453 @@ +//===-------- LegalizeTypesGeneric.cpp - Generic type legalization --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements generic type expansion and splitting for LegalizeTypes. +// The routines here perform legalization when the details of the type (such as +// whether it is an integer or a float) do not matter. +// Expansion is the act of changing a computation in an illegal type to be a +// computation in two identical registers of a smaller type. +// Splitting is the act of changing a computation in an illegal type to be a +// computation in two not necessarily identical registers of a smaller type. +// +//===----------------------------------------------------------------------===// + +#include "LegalizeTypes.h" +#include "llvm/Target/TargetData.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +using namespace llvm; + +//===----------------------------------------------------------------------===// +// Generic Result Expansion. +//===----------------------------------------------------------------------===// + +// These routines assume that the Lo/Hi part is stored first in memory on +// little/big-endian machines, followed by the Hi/Lo part. This means that +// they cannot be used as is on vectors, for which Lo is always stored first. + +void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo, + SDValue &Hi) { + MVT OutVT = N->getValueType(0); + MVT NOutVT = TLI.getTypeToTransformTo(OutVT); + SDValue InOp = N->getOperand(0); + MVT InVT = InOp.getValueType(); + DebugLoc dl = N->getDebugLoc(); + + // Handle some special cases efficiently. + switch (getTypeAction(InVT)) { + default: + assert(false && "Unknown type action!"); + case Legal: + case PromoteInteger: + break; + case SoftenFloat: + // Convert the integer operand instead. + SplitInteger(GetSoftenedFloat(InOp), Lo, Hi); + Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo); + Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi); + return; + case ExpandInteger: + case ExpandFloat: + // Convert the expanded pieces of the input. + GetExpandedOp(InOp, Lo, Hi); + Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo); + Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi); + return; + case SplitVector: + // Convert the split parts of the input if it was split in two. + GetSplitVector(InOp, Lo, Hi); + if (Lo.getValueType() == Hi.getValueType()) { + if (TLI.isBigEndian()) + std::swap(Lo, Hi); + Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo); + Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi); + return; + } + break; + case ScalarizeVector: + // Convert the element instead. + SplitInteger(BitConvertToInteger(GetScalarizedVector(InOp)), Lo, Hi); + Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo); + Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi); + return; + case WidenVector: { + assert(!(InVT.getVectorNumElements() & 1) && "Unsupported BIT_CONVERT"); + InOp = GetWidenedVector(InOp); + MVT InNVT = MVT::getVectorVT(InVT.getVectorElementType(), + InVT.getVectorNumElements()/2); + Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp, + DAG.getIntPtrConstant(0)); + Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp, + DAG.getIntPtrConstant(InNVT.getVectorNumElements())); + if (TLI.isBigEndian()) + std::swap(Lo, Hi); + Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo); + Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi); + return; + } + } + + // Lower the bit-convert to a store/load from the stack. + assert(NOutVT.isByteSized() && "Expanded type not byte sized!"); + + // Create the stack frame object. Make sure it is aligned for both + // the source and expanded destination types. + unsigned Alignment = + TLI.getTargetData()->getPrefTypeAlignment(NOutVT.getTypeForMVT()); + SDValue StackPtr = DAG.CreateStackTemporary(InVT, Alignment); + int SPFI = cast(StackPtr.getNode())->getIndex(); + const Value *SV = PseudoSourceValue::getFixedStack(SPFI); + + // Emit a store to the stack slot. + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, StackPtr, SV, 0); + + // Load the first half from the stack slot. + Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, SV, 0); + + // Increment the pointer to the other half. + unsigned IncrementSize = NOutVT.getSizeInBits() / 8; + StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, + DAG.getIntPtrConstant(IncrementSize)); + + // Load the second half from the stack slot. + Hi = DAG.getLoad(NOutVT, dl, Store, StackPtr, SV, IncrementSize, false, + MinAlign(Alignment, IncrementSize)); + + // Handle endianness of the load. + if (TLI.isBigEndian()) + std::swap(Lo, Hi); +} + +void DAGTypeLegalizer::ExpandRes_BUILD_PAIR(SDNode *N, SDValue &Lo, + SDValue &Hi) { + // Return the operands. + Lo = N->getOperand(0); + Hi = N->getOperand(1); +} + +void DAGTypeLegalizer::ExpandRes_EXTRACT_ELEMENT(SDNode *N, SDValue &Lo, + SDValue &Hi) { + GetExpandedOp(N->getOperand(0), Lo, Hi); + SDValue Part = cast(N->getOperand(1))->getZExtValue() ? + Hi : Lo; + + assert(Part.getValueType() == N->getValueType(0) && + "Type twice as big as expanded type not itself expanded!"); + + GetPairElements(Part, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue OldVec = N->getOperand(0); + unsigned OldElts = OldVec.getValueType().getVectorNumElements(); + DebugLoc dl = N->getDebugLoc(); + + // Convert to a vector of the expanded element type, for example + // <3 x i64> -> <6 x i32>. + MVT OldVT = N->getValueType(0); + MVT NewVT = TLI.getTypeToTransformTo(OldVT); + + SDValue NewVec = DAG.getNode(ISD::BIT_CONVERT, dl, + MVT::getVectorVT(NewVT, 2*OldElts), + OldVec); + + // Extract the elements at 2 * Idx and 2 * Idx + 1 from the new vector. + SDValue Idx = N->getOperand(1); + + // Make sure the type of Idx is big enough to hold the new values. + if (Idx.getValueType().bitsLT(TLI.getPointerTy())) + Idx = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Idx); + + Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, Idx); + Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, NewVec, Idx); + + Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, + DAG.getConstant(1, Idx.getValueType())); + Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, NewVec, Idx); + + if (TLI.isBigEndian()) + std::swap(Lo, Hi); +} + +void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo, + SDValue &Hi) { + assert(ISD::isNormalLoad(N) && "This routine only for normal loads!"); + DebugLoc dl = N->getDebugLoc(); + + LoadSDNode *LD = cast(N); + MVT NVT = TLI.getTypeToTransformTo(LD->getValueType(0)); + SDValue Chain = LD->getChain(); + SDValue Ptr = LD->getBasePtr(); + int SVOffset = LD->getSrcValueOffset(); + unsigned Alignment = LD->getAlignment(); + bool isVolatile = LD->isVolatile(); + + assert(NVT.isByteSized() && "Expanded type not byte sized!"); + + Lo = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getSrcValue(), SVOffset, + isVolatile, Alignment); + + // Increment the pointer to the other half. + unsigned IncrementSize = NVT.getSizeInBits() / 8; + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getIntPtrConstant(IncrementSize)); + Hi = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getSrcValue(), + SVOffset+IncrementSize, + isVolatile, MinAlign(Alignment, IncrementSize)); + + // Build a factor node to remember that this load is independent of the + // other one. + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + // Handle endianness of the load. + if (TLI.isBigEndian()) + std::swap(Lo, Hi); + + // Modified the chain - switch anything that used the old chain to use + // the new one. + ReplaceValueWith(SDValue(N, 1), Chain); +} + +void DAGTypeLegalizer::ExpandRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + SDValue Chain = N->getOperand(0); + SDValue Ptr = N->getOperand(1); + DebugLoc dl = N->getDebugLoc(); + + Lo = DAG.getVAArg(NVT, dl, Chain, Ptr, N->getOperand(2)); + Hi = DAG.getVAArg(NVT, dl, Lo.getValue(1), Ptr, N->getOperand(2)); + + // Handle endianness of the load. + if (TLI.isBigEndian()) + std::swap(Lo, Hi); + + // Modified the chain - switch anything that used the old chain to use + // the new one. + ReplaceValueWith(SDValue(N, 1), Hi.getValue(1)); +} + + +//===--------------------------------------------------------------------===// +// Generic Operand Expansion. +//===--------------------------------------------------------------------===// + +SDValue DAGTypeLegalizer::ExpandOp_BIT_CONVERT(SDNode *N) { + DebugLoc dl = N->getDebugLoc(); + if (N->getValueType(0).isVector()) { + // An illegal expanding type is being converted to a legal vector type. + // Make a two element vector out of the expanded parts and convert that + // instead, but only if the new vector type is legal (otherwise there + // is no point, and it might create expansion loops). For example, on + // x86 this turns v1i64 = BIT_CONVERT i64 into v1i64 = BIT_CONVERT v2i32. + MVT OVT = N->getOperand(0).getValueType(); + MVT NVT = MVT::getVectorVT(TLI.getTypeToTransformTo(OVT), 2); + + if (isTypeLegal(NVT)) { + SDValue Parts[2]; + GetExpandedOp(N->getOperand(0), Parts[0], Parts[1]); + + if (TLI.isBigEndian()) + std::swap(Parts[0], Parts[1]); + + SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Parts, 2); + return DAG.getNode(ISD::BIT_CONVERT, dl, N->getValueType(0), Vec); + } + } + + // Otherwise, store to a temporary and load out again as the new type. + return CreateStackStoreLoad(N->getOperand(0), N->getValueType(0)); +} + +SDValue DAGTypeLegalizer::ExpandOp_BUILD_VECTOR(SDNode *N) { + // The vector type is legal but the element type needs expansion. + MVT VecVT = N->getValueType(0); + unsigned NumElts = VecVT.getVectorNumElements(); + MVT OldVT = N->getOperand(0).getValueType(); + MVT NewVT = TLI.getTypeToTransformTo(OldVT); + DebugLoc dl = N->getDebugLoc(); + + assert(OldVT == VecVT.getVectorElementType() && + "BUILD_VECTOR operand type doesn't match vector element type!"); + + // Build a vector of twice the length out of the expanded elements. + // For example <3 x i64> -> <6 x i32>. + std::vector NewElts; + NewElts.reserve(NumElts*2); + + for (unsigned i = 0; i < NumElts; ++i) { + SDValue Lo, Hi; + GetExpandedOp(N->getOperand(i), Lo, Hi); + if (TLI.isBigEndian()) + std::swap(Lo, Hi); + NewElts.push_back(Lo); + NewElts.push_back(Hi); + } + + SDValue NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl, + MVT::getVectorVT(NewVT, NewElts.size()), + &NewElts[0], NewElts.size()); + + // Convert the new vector to the old vector type. + return DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, NewVec); +} + +SDValue DAGTypeLegalizer::ExpandOp_EXTRACT_ELEMENT(SDNode *N) { + SDValue Lo, Hi; + GetExpandedOp(N->getOperand(0), Lo, Hi); + return cast(N->getOperand(1))->getZExtValue() ? Hi : Lo; +} + +SDValue DAGTypeLegalizer::ExpandOp_INSERT_VECTOR_ELT(SDNode *N) { + // The vector type is legal but the element type needs expansion. + MVT VecVT = N->getValueType(0); + unsigned NumElts = VecVT.getVectorNumElements(); + DebugLoc dl = N->getDebugLoc(); + + SDValue Val = N->getOperand(1); + MVT OldEVT = Val.getValueType(); + MVT NewEVT = TLI.getTypeToTransformTo(OldEVT); + + assert(OldEVT == VecVT.getVectorElementType() && + "Inserted element type doesn't match vector element type!"); + + // Bitconvert to a vector of twice the length with elements of the expanded + // type, insert the expanded vector elements, and then convert back. + MVT NewVecVT = MVT::getVectorVT(NewEVT, NumElts*2); + SDValue NewVec = DAG.getNode(ISD::BIT_CONVERT, dl, + NewVecVT, N->getOperand(0)); + + SDValue Lo, Hi; + GetExpandedOp(Val, Lo, Hi); + if (TLI.isBigEndian()) + std::swap(Lo, Hi); + + SDValue Idx = N->getOperand(2); + Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, Idx); + NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Lo, Idx); + Idx = DAG.getNode(ISD::ADD, dl, + Idx.getValueType(), Idx, DAG.getIntPtrConstant(1)); + NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Hi, Idx); + + // Convert the new vector to the old vector type. + return DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, NewVec); +} + +SDValue DAGTypeLegalizer::ExpandOp_SCALAR_TO_VECTOR(SDNode *N) { + DebugLoc dl = N->getDebugLoc(); + MVT VT = N->getValueType(0); + assert(VT.getVectorElementType() == N->getOperand(0).getValueType() && + "SCALAR_TO_VECTOR operand type doesn't match vector element type!"); + unsigned NumElts = VT.getVectorNumElements(); + SmallVector Ops(NumElts); + Ops[0] = N->getOperand(0); + SDValue UndefVal = DAG.getUNDEF(Ops[0].getValueType()); + for (unsigned i = 1; i < NumElts; ++i) + Ops[i] = UndefVal; + return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElts); +} + +SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) { + assert(ISD::isNormalStore(N) && "This routine only for normal stores!"); + assert(OpNo == 1 && "Can only expand the stored value so far"); + DebugLoc dl = N->getDebugLoc(); + + StoreSDNode *St = cast(N); + MVT NVT = TLI.getTypeToTransformTo(St->getValue().getValueType()); + SDValue Chain = St->getChain(); + SDValue Ptr = St->getBasePtr(); + int SVOffset = St->getSrcValueOffset(); + unsigned Alignment = St->getAlignment(); + bool isVolatile = St->isVolatile(); + + assert(NVT.isByteSized() && "Expanded type not byte sized!"); + unsigned IncrementSize = NVT.getSizeInBits() / 8; + + SDValue Lo, Hi; + GetExpandedOp(St->getValue(), Lo, Hi); + + if (TLI.isBigEndian()) + std::swap(Lo, Hi); + + Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getSrcValue(), SVOffset, + isVolatile, Alignment); + + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getIntPtrConstant(IncrementSize)); + assert(isTypeLegal(Ptr.getValueType()) && "Pointers must be legal!"); + Hi = DAG.getStore(Chain, dl, Hi, Ptr, St->getSrcValue(), + SVOffset + IncrementSize, + isVolatile, MinAlign(Alignment, IncrementSize)); + + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); +} + + +//===--------------------------------------------------------------------===// +// Generic Result Splitting. +//===--------------------------------------------------------------------===// + +// Be careful to make no assumptions about which of Lo/Hi is stored first in +// memory (for vectors it is always Lo first followed by Hi in the following +// bytes; for integers and floats it is Lo first if and only if the machine is +// little-endian). + +void DAGTypeLegalizer::SplitRes_MERGE_VALUES(SDNode *N, + SDValue &Lo, SDValue &Hi) { + // A MERGE_VALUES node can produce any number of values. We know that the + // first illegal one needs to be expanded into Lo/Hi. + unsigned i; + + // The string of legal results gets turned into input operands, which have + // the same type. + for (i = 0; isTypeLegal(N->getValueType(i)); ++i) + ReplaceValueWith(SDValue(N, i), SDValue(N->getOperand(i))); + + // The first illegal result must be the one that needs to be expanded. + GetSplitOp(N->getOperand(i), Lo, Hi); + + // Legalize the rest of the results into the input operands whether they are + // legal or not. + unsigned e = N->getNumValues(); + for (++i; i != e; ++i) + ReplaceValueWith(SDValue(N, i), SDValue(N->getOperand(i))); +} + +void DAGTypeLegalizer::SplitRes_SELECT(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue LL, LH, RL, RH; + DebugLoc dl = N->getDebugLoc(); + GetSplitOp(N->getOperand(1), LL, LH); + GetSplitOp(N->getOperand(2), RL, RH); + + SDValue Cond = N->getOperand(0); + Lo = DAG.getNode(ISD::SELECT, dl, LL.getValueType(), Cond, LL, RL); + Hi = DAG.getNode(ISD::SELECT, dl, LH.getValueType(), Cond, LH, RH); +} + +void DAGTypeLegalizer::SplitRes_SELECT_CC(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue LL, LH, RL, RH; + DebugLoc dl = N->getDebugLoc(); + GetSplitOp(N->getOperand(2), LL, LH); + GetSplitOp(N->getOperand(3), RL, RH); + + Lo = DAG.getNode(ISD::SELECT_CC, dl, LL.getValueType(), N->getOperand(0), + N->getOperand(1), LL, RL, N->getOperand(4)); + Hi = DAG.getNode(ISD::SELECT_CC, dl, LH.getValueType(), N->getOperand(0), + N->getOperand(1), LH, RH, N->getOperand(4)); +} + +void DAGTypeLegalizer::SplitRes_UNDEF(SDNode *N, SDValue &Lo, SDValue &Hi) { + MVT LoVT, HiVT; + DebugLoc dl = N->getDebugLoc(); + GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); + Lo = DAG.getUNDEF(LoVT); + Hi = DAG.getUNDEF(HiVT); +} diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp new file mode 100644 index 000000000000..df9af2147ca5 --- /dev/null +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -0,0 +1,335 @@ +//===-- LegalizeVectorOps.cpp - Implement SelectionDAG::LegalizeVectors ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the SelectionDAG::LegalizeVectors method. +// +// The vector legalizer looks for vector operations which might need to be +// scalarized and legalizes them. This is a separate step from Legalize because +// scalarizing can introduce illegal types. For example, suppose we have an +// ISD::SDIV of type v2i64 on x86-32. The type is legal (for example, addition +// on a v2i64 is legal), but ISD::SDIV isn't legal, so we have to unroll the +// operation, which introduces nodes with the illegal type i64 which must be +// expanded. Similarly, suppose we have an ISD::SRA of type v16i8 on PowerPC; +// the operation must be unrolled, which introduces nodes with the illegal +// type i8 which must be promoted. +// +// This does not legalize vector manipulations like ISD::BUILD_VECTOR, +// or operations that happen to take a vector which are custom-lowered like +// ISD::CALL; the legalization for such operations never produces nodes +// with illegal types, so it's okay to put off legalizing them until +// SelectionDAG::Legalize runs. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/Target/TargetLowering.h" +using namespace llvm; + +namespace { +class VectorLegalizer { + SelectionDAG& DAG; + TargetLowering& TLI; + bool Changed; // Keep track of whether anything changed + + /// LegalizedNodes - For nodes that are of legal width, and that have more + /// than one use, this map indicates what regularized operand to use. This + /// allows us to avoid legalizing the same thing more than once. + DenseMap LegalizedNodes; + + // Adds a node to the translation cache + void AddLegalizedOperand(SDValue From, SDValue To) { + LegalizedNodes.insert(std::make_pair(From, To)); + // If someone requests legalization of the new node, return itself. + if (From != To) + LegalizedNodes.insert(std::make_pair(To, To)); + } + + // Legalizes the given node + SDValue LegalizeOp(SDValue Op); + // Assuming the node is legal, "legalize" the results + SDValue TranslateLegalizeResults(SDValue Op, SDValue Result); + // Implements unrolling a generic vector operation, i.e. turning it into + // scalar operations. + SDValue UnrollVectorOp(SDValue Op); + // Implements unrolling a VSETCC. + SDValue UnrollVSETCC(SDValue Op); + // Implements expansion for FNEG; falls back to UnrollVectorOp if FSUB + // isn't legal. + SDValue ExpandFNEG(SDValue Op); + // Implements vector promotion; this is essentially just bitcasting the + // operands to a different type and bitcasting the result back to the + // original type. + SDValue PromoteVectorOp(SDValue Op); + + public: + bool Run(); + VectorLegalizer(SelectionDAG& dag) : + DAG(dag), TLI(dag.getTargetLoweringInfo()), Changed(false) {} +}; + +bool VectorLegalizer::Run() { + // The legalize process is inherently a bottom-up recursive process (users + // legalize their uses before themselves). Given infinite stack space, we + // could just start legalizing on the root and traverse the whole graph. In + // practice however, this causes us to run out of stack space on large basic + // blocks. To avoid this problem, compute an ordering of the nodes where each + // node is only legalized after all of its operands are legalized. + DAG.AssignTopologicalOrder(); + for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), + E = prior(DAG.allnodes_end()); I != next(E); ++I) + LegalizeOp(SDValue(I, 0)); + + // Finally, it's possible the root changed. Get the new root. + SDValue OldRoot = DAG.getRoot(); + assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?"); + DAG.setRoot(LegalizedNodes[OldRoot]); + + LegalizedNodes.clear(); + + // Remove dead nodes now. + DAG.RemoveDeadNodes(); + + return Changed; +} + +SDValue VectorLegalizer::TranslateLegalizeResults(SDValue Op, SDValue Result) { + // Generic legalization: just pass the operand through. + for (unsigned i = 0, e = Op.getNode()->getNumValues(); i != e; ++i) + AddLegalizedOperand(Op.getValue(i), Result.getValue(i)); + return Result.getValue(Op.getResNo()); +} + +SDValue VectorLegalizer::LegalizeOp(SDValue Op) { + // Note that LegalizeOp may be reentered even from single-use nodes, which + // means that we always must cache transformed nodes. + DenseMap::iterator I = LegalizedNodes.find(Op); + if (I != LegalizedNodes.end()) return I->second; + + SDNode* Node = Op.getNode(); + + // Legalize the operands + SmallVector Ops; + for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) + Ops.push_back(LegalizeOp(Node->getOperand(i))); + + SDValue Result = + DAG.UpdateNodeOperands(Op.getValue(0), Ops.data(), Ops.size()); + + bool HasVectorValue = false; + for (SDNode::value_iterator J = Node->value_begin(), E = Node->value_end(); + J != E; + ++J) + HasVectorValue |= J->isVector(); + if (!HasVectorValue) + return TranslateLegalizeResults(Op, Result); + + switch (Op.getOpcode()) { + default: + return TranslateLegalizeResults(Op, Result); + case ISD::ADD: + case ISD::SUB: + case ISD::MUL: + case ISD::SDIV: + case ISD::UDIV: + case ISD::SREM: + case ISD::UREM: + case ISD::FADD: + case ISD::FSUB: + case ISD::FMUL: + case ISD::FDIV: + case ISD::FREM: + case ISD::AND: + case ISD::OR: + case ISD::XOR: + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + case ISD::ROTL: + case ISD::ROTR: + case ISD::CTTZ: + case ISD::CTLZ: + case ISD::CTPOP: + case ISD::SELECT: + case ISD::SELECT_CC: + case ISD::VSETCC: + case ISD::ZERO_EXTEND: + case ISD::ANY_EXTEND: + case ISD::TRUNCATE: + case ISD::SIGN_EXTEND: + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + case ISD::FNEG: + case ISD::FABS: + case ISD::FSQRT: + case ISD::FSIN: + case ISD::FCOS: + case ISD::FPOWI: + case ISD::FPOW: + case ISD::FLOG: + case ISD::FLOG2: + case ISD::FLOG10: + case ISD::FEXP: + case ISD::FEXP2: + case ISD::FCEIL: + case ISD::FTRUNC: + case ISD::FRINT: + case ISD::FNEARBYINT: + case ISD::FFLOOR: + break; + } + + switch (TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0))) { + case TargetLowering::Promote: + // "Promote" the operation by bitcasting + Result = PromoteVectorOp(Op); + Changed = true; + break; + case TargetLowering::Legal: break; + case TargetLowering::Custom: { + SDValue Tmp1 = TLI.LowerOperation(Op, DAG); + if (Tmp1.getNode()) { + Result = Tmp1; + break; + } + // FALL THROUGH + } + case TargetLowering::Expand: + if (Node->getOpcode() == ISD::FNEG) + Result = ExpandFNEG(Op); + else if (Node->getOpcode() == ISD::VSETCC) + Result = UnrollVSETCC(Op); + else + Result = UnrollVectorOp(Op); + break; + } + + // Make sure that the generated code is itself legal. + if (Result != Op) { + Result = LegalizeOp(Result); + Changed = true; + } + + // Note that LegalizeOp may be reentered even from single-use nodes, which + // means that we always must cache transformed nodes. + AddLegalizedOperand(Op, Result); + return Result; +} + +SDValue VectorLegalizer::PromoteVectorOp(SDValue Op) { + // Vector "promotion" is basically just bitcasting and doing the operation + // in a different type. For example, x86 promotes ISD::AND on v2i32 to + // v1i64. + MVT VT = Op.getValueType(); + assert(Op.getNode()->getNumValues() == 1 && + "Can't promote a vector with multiple results!"); + MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT); + DebugLoc dl = Op.getDebugLoc(); + SmallVector Operands(Op.getNumOperands()); + + for (unsigned j = 0; j != Op.getNumOperands(); ++j) { + if (Op.getOperand(j).getValueType().isVector()) + Operands[j] = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, Op.getOperand(j)); + else + Operands[j] = Op.getOperand(j); + } + + Op = DAG.getNode(Op.getOpcode(), dl, NVT, &Operands[0], Operands.size()); + + return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Op); +} + +SDValue VectorLegalizer::ExpandFNEG(SDValue Op) { + if (TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) { + SDValue Zero = DAG.getConstantFP(-0.0, Op.getValueType()); + return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(), + Zero, Op.getOperand(0)); + } + return UnrollVectorOp(Op); +} + +SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) { + MVT VT = Op.getValueType(); + unsigned NumElems = VT.getVectorNumElements(); + MVT EltVT = VT.getVectorElementType(); + SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1), CC = Op.getOperand(2); + MVT TmpEltVT = LHS.getValueType().getVectorElementType(); + DebugLoc dl = Op.getDebugLoc(); + SmallVector Ops(NumElems); + for (unsigned i = 0; i < NumElems; ++i) { + SDValue LHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS, + DAG.getIntPtrConstant(i)); + SDValue RHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS, + DAG.getIntPtrConstant(i)); + Ops[i] = DAG.getNode(ISD::SETCC, dl, TLI.getSetCCResultType(TmpEltVT), + LHSElem, RHSElem, CC); + Ops[i] = DAG.getNode(ISD::SELECT, dl, EltVT, Ops[i], + DAG.getConstant(APInt::getAllOnesValue + (EltVT.getSizeInBits()), EltVT), + DAG.getConstant(0, EltVT)); + } + return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElems); +} + +/// UnrollVectorOp - We know that the given vector has a legal type, however +/// the operation it performs is not legal, and the target has requested that +/// the operation be expanded. "Unroll" the vector, splitting out the scalars +/// and operating on each element individually. +SDValue VectorLegalizer::UnrollVectorOp(SDValue Op) { + MVT VT = Op.getValueType(); + assert(Op.getNode()->getNumValues() == 1 && + "Can't unroll a vector with multiple results!"); + unsigned NE = VT.getVectorNumElements(); + MVT EltVT = VT.getVectorElementType(); + DebugLoc dl = Op.getDebugLoc(); + + SmallVector Scalars; + SmallVector Operands(Op.getNumOperands()); + for (unsigned i = 0; i != NE; ++i) { + for (unsigned j = 0; j != Op.getNumOperands(); ++j) { + SDValue Operand = Op.getOperand(j); + MVT OperandVT = Operand.getValueType(); + if (OperandVT.isVector()) { + // A vector operand; extract a single element. + MVT OperandEltVT = OperandVT.getVectorElementType(); + Operands[j] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + OperandEltVT, + Operand, + DAG.getConstant(i, MVT::i32)); + } else { + // A scalar operand; just use it as is. + Operands[j] = Operand; + } + } + + switch (Op.getOpcode()) { + default: + Scalars.push_back(DAG.getNode(Op.getOpcode(), dl, EltVT, + &Operands[0], Operands.size())); + break; + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + case ISD::ROTL: + case ISD::ROTR: + Scalars.push_back(DAG.getNode(Op.getOpcode(), dl, EltVT, Operands[0], + DAG.getShiftAmountOperand(Operands[1]))); + break; + } + } + + return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Scalars[0], Scalars.size()); +} + +} + +bool SelectionDAG::LegalizeVectors() { + return VectorLegalizer(*this).Run(); +} diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp new file mode 100644 index 000000000000..68967cc638fd --- /dev/null +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -0,0 +1,2151 @@ +//===------- LegalizeVectorTypes.cpp - Legalization of vector types -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file performs vector type splitting and scalarization for LegalizeTypes. +// Scalarization is the act of changing a computation in an illegal one-element +// vector type to be a computation in its scalar element type. For example, +// implementing <1 x f32> arithmetic in a scalar f32 register. This is needed +// as a base case when scalarizing vector arithmetic like <4 x f32>, which +// eventually decomposes to scalars if the target doesn't support v4f32 or v2f32 +// types. +// Splitting is the act of changing a computation in an invalid vector type to +// be a computation in multiple vectors of a smaller type. For example, +// implementing <128 x f32> operations in terms of two <64 x f32> operations. +// +//===----------------------------------------------------------------------===// + +#include "LegalizeTypes.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/Target/TargetData.h" +using namespace llvm; + +//===----------------------------------------------------------------------===// +// Result Vector Scalarization: <1 x ty> -> ty. +//===----------------------------------------------------------------------===// + +void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { + DEBUG(cerr << "Scalarize node result " << ResNo << ": "; N->dump(&DAG); + cerr << "\n"); + SDValue R = SDValue(); + + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + cerr << "ScalarizeVectorResult #" << ResNo << ": "; + N->dump(&DAG); cerr << "\n"; +#endif + assert(0 && "Do not know how to scalarize the result of this operator!"); + abort(); + + case ISD::BIT_CONVERT: R = ScalarizeVecRes_BIT_CONVERT(N); break; + case ISD::BUILD_VECTOR: R = N->getOperand(0); break; + case ISD::CONVERT_RNDSAT: R = ScalarizeVecRes_CONVERT_RNDSAT(N); break; + case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break; + case ISD::FPOWI: R = ScalarizeVecRes_FPOWI(N); break; + case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break; + case ISD::LOAD: R = ScalarizeVecRes_LOAD(cast(N));break; + case ISD::SCALAR_TO_VECTOR: R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break; + case ISD::SELECT: R = ScalarizeVecRes_SELECT(N); break; + case ISD::SELECT_CC: R = ScalarizeVecRes_SELECT_CC(N); break; + case ISD::UNDEF: R = ScalarizeVecRes_UNDEF(N); break; + case ISD::VECTOR_SHUFFLE: R = ScalarizeVecRes_VECTOR_SHUFFLE(N); break; + case ISD::VSETCC: R = ScalarizeVecRes_VSETCC(N); break; + + case ISD::CTLZ: + case ISD::CTPOP: + case ISD::CTTZ: + case ISD::FABS: + case ISD::FCOS: + case ISD::FNEG: + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + case ISD::FSIN: + case ISD::FSQRT: + case ISD::FTRUNC: + case ISD::FFLOOR: + case ISD::FCEIL: + case ISD::FRINT: + case ISD::FNEARBYINT: + case ISD::SINT_TO_FP: + case ISD::TRUNCATE: + case ISD::UINT_TO_FP: R = ScalarizeVecRes_UnaryOp(N); break; + + case ISD::ADD: + case ISD::AND: + case ISD::FADD: + case ISD::FDIV: + case ISD::FMUL: + case ISD::FPOW: + case ISD::FREM: + case ISD::FSUB: + case ISD::MUL: + case ISD::OR: + case ISD::SDIV: + case ISD::SREM: + case ISD::SUB: + case ISD::UDIV: + case ISD::UREM: + case ISD::XOR: R = ScalarizeVecRes_BinOp(N); break; + + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: R = ScalarizeVecRes_ShiftOp(N); break; + } + + // If R is null, the sub-method took care of registering the result. + if (R.getNode()) + SetScalarizedVector(SDValue(N, ResNo), R); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_BinOp(SDNode *N) { + SDValue LHS = GetScalarizedVector(N->getOperand(0)); + SDValue RHS = GetScalarizedVector(N->getOperand(1)); + return DAG.getNode(N->getOpcode(), N->getDebugLoc(), + LHS.getValueType(), LHS, RHS); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_ShiftOp(SDNode *N) { + SDValue LHS = GetScalarizedVector(N->getOperand(0)); + SDValue ShiftAmt = GetScalarizedVector(N->getOperand(1)); + return DAG.getNode(N->getOpcode(), N->getDebugLoc(), + LHS.getValueType(), LHS, ShiftAmt); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_BIT_CONVERT(SDNode *N) { + MVT NewVT = N->getValueType(0).getVectorElementType(); + return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), + NewVT, N->getOperand(0)); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N) { + MVT NewVT = N->getValueType(0).getVectorElementType(); + SDValue Op0 = GetScalarizedVector(N->getOperand(0)); + return DAG.getConvertRndSat(NewVT, N->getDebugLoc(), + Op0, DAG.getValueType(NewVT), + DAG.getValueType(Op0.getValueType()), + N->getOperand(3), + N->getOperand(4), + cast(N)->getCvtCode()); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N) { + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(), + N->getValueType(0).getVectorElementType(), + N->getOperand(0), N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_FPOWI(SDNode *N) { + SDValue Op = GetScalarizedVector(N->getOperand(0)); + return DAG.getNode(ISD::FPOWI, N->getDebugLoc(), + Op.getValueType(), Op, N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) { + // The value to insert may have a wider type than the vector element type, + // so be sure to truncate it to the element type if necessary. + SDValue Op = N->getOperand(1); + MVT EltVT = N->getValueType(0).getVectorElementType(); + if (Op.getValueType() != EltVT) + // FIXME: Can this happen for floating point types? + Op = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), EltVT, Op); + return Op; +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) { + assert(N->isUnindexed() && "Indexed vector load?"); + + SDValue Result = DAG.getLoad(ISD::UNINDEXED, N->getDebugLoc(), + N->getExtensionType(), + N->getValueType(0).getVectorElementType(), + N->getChain(), N->getBasePtr(), + DAG.getUNDEF(N->getBasePtr().getValueType()), + N->getSrcValue(), N->getSrcValueOffset(), + N->getMemoryVT().getVectorElementType(), + N->isVolatile(), N->getAlignment()); + + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Result.getValue(1)); + return Result; +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_UnaryOp(SDNode *N) { + // Get the dest type - it doesn't always match the input type, e.g. int_to_fp. + MVT DestVT = N->getValueType(0).getVectorElementType(); + SDValue Op = GetScalarizedVector(N->getOperand(0)); + return DAG.getNode(N->getOpcode(), N->getDebugLoc(), DestVT, Op); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N) { + // If the operand is wider than the vector element type then it is implicitly + // truncated. Make that explicit here. + MVT EltVT = N->getValueType(0).getVectorElementType(); + SDValue InOp = N->getOperand(0); + if (InOp.getValueType() != EltVT) + return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), EltVT, InOp); + return InOp; +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT(SDNode *N) { + SDValue LHS = GetScalarizedVector(N->getOperand(1)); + return DAG.getNode(ISD::SELECT, N->getDebugLoc(), + LHS.getValueType(), N->getOperand(0), LHS, + GetScalarizedVector(N->getOperand(2))); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT_CC(SDNode *N) { + SDValue LHS = GetScalarizedVector(N->getOperand(2)); + return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), LHS.getValueType(), + N->getOperand(0), N->getOperand(1), + LHS, GetScalarizedVector(N->getOperand(3)), + N->getOperand(4)); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_UNDEF(SDNode *N) { + return DAG.getUNDEF(N->getValueType(0).getVectorElementType()); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N) { + // Figure out if the scalar is the LHS or RHS and return it. + SDValue Arg = N->getOperand(2).getOperand(0); + if (Arg.getOpcode() == ISD::UNDEF) + return DAG.getUNDEF(N->getValueType(0).getVectorElementType()); + unsigned Op = !cast(Arg)->isNullValue(); + return GetScalarizedVector(N->getOperand(Op)); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_VSETCC(SDNode *N) { + SDValue LHS = GetScalarizedVector(N->getOperand(0)); + SDValue RHS = GetScalarizedVector(N->getOperand(1)); + MVT NVT = N->getValueType(0).getVectorElementType(); + MVT SVT = TLI.getSetCCResultType(LHS.getValueType()); + DebugLoc dl = N->getDebugLoc(); + + // Turn it into a scalar SETCC. + SDValue Res = DAG.getNode(ISD::SETCC, dl, SVT, LHS, RHS, N->getOperand(2)); + + // VSETCC always returns a sign-extended value, while SETCC may not. The + // SETCC result type may not match the vector element type. Correct these. + if (NVT.bitsLE(SVT)) { + // The SETCC result type is bigger than the vector element type. + // Ensure the SETCC result is sign-extended. + if (TLI.getBooleanContents() != + TargetLowering::ZeroOrNegativeOneBooleanContent) + Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, SVT, Res, + DAG.getValueType(MVT::i1)); + // Truncate to the final type. + return DAG.getNode(ISD::TRUNCATE, dl, NVT, Res); + } else { + // The SETCC result type is smaller than the vector element type. + // If the SetCC result is not sign-extended, chop it down to MVT::i1. + if (TLI.getBooleanContents() != + TargetLowering::ZeroOrNegativeOneBooleanContent) + Res = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Res); + // Sign extend to the final type. + return DAG.getNode(ISD::SIGN_EXTEND, dl, NVT, Res); + } +} + + +//===----------------------------------------------------------------------===// +// Operand Vector Scalarization <1 x ty> -> ty. +//===----------------------------------------------------------------------===// + +bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) { + DEBUG(cerr << "Scalarize node operand " << OpNo << ": "; N->dump(&DAG); + cerr << "\n"); + SDValue Res = SDValue(); + + if (Res.getNode() == 0) { + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + cerr << "ScalarizeVectorOperand Op #" << OpNo << ": "; + N->dump(&DAG); cerr << "\n"; +#endif + assert(0 && "Do not know how to scalarize this operator's operand!"); + abort(); + + case ISD::BIT_CONVERT: + Res = ScalarizeVecOp_BIT_CONVERT(N); break; + + case ISD::CONCAT_VECTORS: + Res = ScalarizeVecOp_CONCAT_VECTORS(N); break; + + case ISD::EXTRACT_VECTOR_ELT: + Res = ScalarizeVecOp_EXTRACT_VECTOR_ELT(N); break; + + case ISD::STORE: + Res = ScalarizeVecOp_STORE(cast(N), OpNo); break; + } + } + + // If the result is null, the sub-method took care of registering results etc. + if (!Res.getNode()) return false; + + // If the result is N, the sub-method updated N in place. Tell the legalizer + // core about this. + if (Res.getNode() == N) + return true; + + assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 && + "Invalid operand expansion"); + + ReplaceValueWith(SDValue(N, 0), Res); + return false; +} + +/// ScalarizeVecOp_BIT_CONVERT - If the value to convert is a vector that needs +/// to be scalarized, it must be <1 x ty>. Convert the element instead. +SDValue DAGTypeLegalizer::ScalarizeVecOp_BIT_CONVERT(SDNode *N) { + SDValue Elt = GetScalarizedVector(N->getOperand(0)); + return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), + N->getValueType(0), Elt); +} + +/// ScalarizeVecOp_CONCAT_VECTORS - The vectors to concatenate have length one - +/// use a BUILD_VECTOR instead. +SDValue DAGTypeLegalizer::ScalarizeVecOp_CONCAT_VECTORS(SDNode *N) { + SmallVector Ops(N->getNumOperands()); + for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) + Ops[i] = GetScalarizedVector(N->getOperand(i)); + return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), N->getValueType(0), + &Ops[0], Ops.size()); +} + +/// ScalarizeVecOp_EXTRACT_VECTOR_ELT - If the input is a vector that needs to +/// be scalarized, it must be <1 x ty>, so just return the element, ignoring the +/// index. +SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { + return GetScalarizedVector(N->getOperand(0)); +} + +/// ScalarizeVecOp_STORE - If the value to store is a vector that needs to be +/// scalarized, it must be <1 x ty>. Just store the element. +SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){ + assert(N->isUnindexed() && "Indexed store of one-element vector?"); + assert(OpNo == 1 && "Do not know how to scalarize this operand!"); + DebugLoc dl = N->getDebugLoc(); + + if (N->isTruncatingStore()) + return DAG.getTruncStore(N->getChain(), dl, + GetScalarizedVector(N->getOperand(1)), + N->getBasePtr(), + N->getSrcValue(), N->getSrcValueOffset(), + N->getMemoryVT().getVectorElementType(), + N->isVolatile(), N->getAlignment()); + + return DAG.getStore(N->getChain(), dl, GetScalarizedVector(N->getOperand(1)), + N->getBasePtr(), N->getSrcValue(), N->getSrcValueOffset(), + N->isVolatile(), N->getAlignment()); +} + + +//===----------------------------------------------------------------------===// +// Result Vector Splitting +//===----------------------------------------------------------------------===// + +/// SplitVectorResult - This method is called when the specified result of the +/// specified node is found to need vector splitting. At this point, the node +/// may also have invalid operands or may have other results that need +/// legalization, we just know that (at least) one result needs vector +/// splitting. +void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { + DEBUG(cerr << "Split node result: "; N->dump(&DAG); cerr << "\n"); + SDValue Lo, Hi; + + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + cerr << "SplitVectorResult #" << ResNo << ": "; + N->dump(&DAG); cerr << "\n"; +#endif + assert(0 && "Do not know how to split the result of this operator!"); + abort(); + + case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, Lo, Hi); break; + case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break; + case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break; + case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break; + + case ISD::BIT_CONVERT: SplitVecRes_BIT_CONVERT(N, Lo, Hi); break; + case ISD::BUILD_VECTOR: SplitVecRes_BUILD_VECTOR(N, Lo, Hi); break; + case ISD::CONCAT_VECTORS: SplitVecRes_CONCAT_VECTORS(N, Lo, Hi); break; + case ISD::CONVERT_RNDSAT: SplitVecRes_CONVERT_RNDSAT(N, Lo, Hi); break; + case ISD::EXTRACT_SUBVECTOR: SplitVecRes_EXTRACT_SUBVECTOR(N, Lo, Hi); break; + case ISD::FPOWI: SplitVecRes_FPOWI(N, Lo, Hi); break; + case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break; + case ISD::SCALAR_TO_VECTOR: SplitVecRes_SCALAR_TO_VECTOR(N, Lo, Hi); break; + case ISD::LOAD: SplitVecRes_LOAD(cast(N), Lo, Hi);break; + case ISD::VECTOR_SHUFFLE: + SplitVecRes_VECTOR_SHUFFLE(cast(N), Lo, Hi); break; + case ISD::VSETCC: SplitVecRes_VSETCC(N, Lo, Hi); break; + + case ISD::CTTZ: + case ISD::CTLZ: + case ISD::CTPOP: + case ISD::FNEG: + case ISD::FABS: + case ISD::FSQRT: + case ISD::FSIN: + case ISD::FCOS: + case ISD::FTRUNC: + case ISD::FFLOOR: + case ISD::FCEIL: + case ISD::FRINT: + case ISD::FNEARBYINT: + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + case ISD::SINT_TO_FP: + case ISD::TRUNCATE: + case ISD::UINT_TO_FP: SplitVecRes_UnaryOp(N, Lo, Hi); break; + + case ISD::ADD: + case ISD::SUB: + case ISD::MUL: + case ISD::FADD: + case ISD::FSUB: + case ISD::FMUL: + case ISD::SDIV: + case ISD::UDIV: + case ISD::FDIV: + case ISD::FPOW: + case ISD::AND: + case ISD::OR: + case ISD::XOR: + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + case ISD::UREM: + case ISD::SREM: + case ISD::FREM: SplitVecRes_BinOp(N, Lo, Hi); break; + } + + // If Lo/Hi is null, the sub-method took care of registering results etc. + if (Lo.getNode()) + SetSplitVector(SDValue(N, ResNo), Lo, Hi); +} + +void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue LHSLo, LHSHi; + GetSplitVector(N->getOperand(0), LHSLo, LHSHi); + SDValue RHSLo, RHSHi; + GetSplitVector(N->getOperand(1), RHSLo, RHSHi); + DebugLoc dl = N->getDebugLoc(); + + Lo = DAG.getNode(N->getOpcode(), dl, LHSLo.getValueType(), LHSLo, RHSLo); + Hi = DAG.getNode(N->getOpcode(), dl, LHSHi.getValueType(), LHSHi, RHSHi); +} + +void DAGTypeLegalizer::SplitVecRes_BIT_CONVERT(SDNode *N, SDValue &Lo, + SDValue &Hi) { + // We know the result is a vector. The input may be either a vector or a + // scalar value. + MVT LoVT, HiVT; + GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); + DebugLoc dl = N->getDebugLoc(); + + SDValue InOp = N->getOperand(0); + MVT InVT = InOp.getValueType(); + + // Handle some special cases efficiently. + switch (getTypeAction(InVT)) { + default: + assert(false && "Unknown type action!"); + case Legal: + case PromoteInteger: + case SoftenFloat: + case ScalarizeVector: + break; + case ExpandInteger: + case ExpandFloat: + // A scalar to vector conversion, where the scalar needs expansion. + // If the vector is being split in two then we can just convert the + // expanded pieces. + if (LoVT == HiVT) { + GetExpandedOp(InOp, Lo, Hi); + if (TLI.isBigEndian()) + std::swap(Lo, Hi); + Lo = DAG.getNode(ISD::BIT_CONVERT, dl, LoVT, Lo); + Hi = DAG.getNode(ISD::BIT_CONVERT, dl, HiVT, Hi); + return; + } + break; + case SplitVector: + // If the input is a vector that needs to be split, convert each split + // piece of the input now. + GetSplitVector(InOp, Lo, Hi); + Lo = DAG.getNode(ISD::BIT_CONVERT, dl, LoVT, Lo); + Hi = DAG.getNode(ISD::BIT_CONVERT, dl, HiVT, Hi); + return; + } + + // In the general case, convert the input to an integer and split it by hand. + MVT LoIntVT = MVT::getIntegerVT(LoVT.getSizeInBits()); + MVT HiIntVT = MVT::getIntegerVT(HiVT.getSizeInBits()); + if (TLI.isBigEndian()) + std::swap(LoIntVT, HiIntVT); + + SplitInteger(BitConvertToInteger(InOp), LoIntVT, HiIntVT, Lo, Hi); + + if (TLI.isBigEndian()) + std::swap(Lo, Hi); + Lo = DAG.getNode(ISD::BIT_CONVERT, dl, LoVT, Lo); + Hi = DAG.getNode(ISD::BIT_CONVERT, dl, HiVT, Hi); +} + +void DAGTypeLegalizer::SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, + SDValue &Hi) { + MVT LoVT, HiVT; + DebugLoc dl = N->getDebugLoc(); + GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); + unsigned LoNumElts = LoVT.getVectorNumElements(); + SmallVector LoOps(N->op_begin(), N->op_begin()+LoNumElts); + Lo = DAG.getNode(ISD::BUILD_VECTOR, dl, LoVT, &LoOps[0], LoOps.size()); + + SmallVector HiOps(N->op_begin()+LoNumElts, N->op_end()); + Hi = DAG.getNode(ISD::BUILD_VECTOR, dl, HiVT, &HiOps[0], HiOps.size()); +} + +void DAGTypeLegalizer::SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, + SDValue &Hi) { + assert(!(N->getNumOperands() & 1) && "Unsupported CONCAT_VECTORS"); + DebugLoc dl = N->getDebugLoc(); + unsigned NumSubvectors = N->getNumOperands() / 2; + if (NumSubvectors == 1) { + Lo = N->getOperand(0); + Hi = N->getOperand(1); + return; + } + + MVT LoVT, HiVT; + GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); + + SmallVector LoOps(N->op_begin(), N->op_begin()+NumSubvectors); + Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, LoVT, &LoOps[0], LoOps.size()); + + SmallVector HiOps(N->op_begin()+NumSubvectors, N->op_end()); + Hi = DAG.getNode(ISD::CONCAT_VECTORS, dl, HiVT, &HiOps[0], HiOps.size()); +} + +void DAGTypeLegalizer::SplitVecRes_CONVERT_RNDSAT(SDNode *N, SDValue &Lo, + SDValue &Hi) { + MVT LoVT, HiVT; + DebugLoc dl = N->getDebugLoc(); + GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); + + SDValue DTyOpLo = DAG.getValueType(LoVT); + SDValue DTyOpHi = DAG.getValueType(HiVT); + + SDValue RndOp = N->getOperand(3); + SDValue SatOp = N->getOperand(4); + ISD::CvtCode CvtCode = cast(N)->getCvtCode(); + + // Split the input. + SDValue VLo, VHi; + MVT InVT = N->getOperand(0).getValueType(); + switch (getTypeAction(InVT)) { + default: assert(0 && "Unexpected type action!"); + case Legal: { + assert(LoVT == HiVT && "Legal non-power-of-two vector type?"); + MVT InNVT = MVT::getVectorVT(InVT.getVectorElementType(), + LoVT.getVectorNumElements()); + VLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0), + DAG.getIntPtrConstant(0)); + VHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0), + DAG.getIntPtrConstant(InNVT.getVectorNumElements())); + break; + } + case SplitVector: + GetSplitVector(N->getOperand(0), VLo, VHi); + break; + case WidenVector: { + // If the result needs to be split and the input needs to be widened, + // the two types must have different lengths. Use the widened result + // and extract from it to do the split. + assert(LoVT == HiVT && "Legal non-power-of-two vector type?"); + SDValue InOp = GetWidenedVector(N->getOperand(0)); + MVT InNVT = MVT::getVectorVT(InVT.getVectorElementType(), + LoVT.getVectorNumElements()); + VLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp, + DAG.getIntPtrConstant(0)); + VHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp, + DAG.getIntPtrConstant(InNVT.getVectorNumElements())); + break; + } + } + + SDValue STyOpLo = DAG.getValueType(VLo.getValueType()); + SDValue STyOpHi = DAG.getValueType(VHi.getValueType()); + + Lo = DAG.getConvertRndSat(LoVT, dl, VLo, DTyOpLo, STyOpLo, RndOp, SatOp, + CvtCode); + Hi = DAG.getConvertRndSat(HiVT, dl, VHi, DTyOpHi, STyOpHi, RndOp, SatOp, + CvtCode); +} + +void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Vec = N->getOperand(0); + SDValue Idx = N->getOperand(1); + MVT IdxVT = Idx.getValueType(); + DebugLoc dl = N->getDebugLoc(); + + MVT LoVT, HiVT; + GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); + // The indices are not guaranteed to be a multiple of the new vector + // size unless the original vector type was split in two. + assert(LoVT == HiVT && "Non power-of-two vectors not supported!"); + + Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, LoVT, Vec, Idx); + Idx = DAG.getNode(ISD::ADD, dl, IdxVT, Idx, + DAG.getConstant(LoVT.getVectorNumElements(), IdxVT)); + Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HiVT, Vec, Idx); +} + +void DAGTypeLegalizer::SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, + SDValue &Hi) { + DebugLoc dl = N->getDebugLoc(); + GetSplitVector(N->getOperand(0), Lo, Hi); + Lo = DAG.getNode(ISD::FPOWI, dl, Lo.getValueType(), Lo, N->getOperand(1)); + Hi = DAG.getNode(ISD::FPOWI, dl, Hi.getValueType(), Hi, N->getOperand(1)); +} + +void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Vec = N->getOperand(0); + SDValue Elt = N->getOperand(1); + SDValue Idx = N->getOperand(2); + DebugLoc dl = N->getDebugLoc(); + GetSplitVector(Vec, Lo, Hi); + + if (ConstantSDNode *CIdx = dyn_cast(Idx)) { + unsigned IdxVal = CIdx->getZExtValue(); + unsigned LoNumElts = Lo.getValueType().getVectorNumElements(); + if (IdxVal < LoNumElts) + Lo = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, + Lo.getValueType(), Lo, Elt, Idx); + else + Hi = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, Hi.getValueType(), Hi, Elt, + DAG.getIntPtrConstant(IdxVal - LoNumElts)); + return; + } + + // Spill the vector to the stack. + MVT VecVT = Vec.getValueType(); + MVT EltVT = VecVT.getVectorElementType(); + SDValue StackPtr = DAG.CreateStackTemporary(VecVT); + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, NULL, 0); + + // Store the new element. This may be larger than the vector element type, + // so use a truncating store. + SDValue EltPtr = GetVectorElementPointer(StackPtr, EltVT, Idx); + unsigned Alignment = + TLI.getTargetData()->getPrefTypeAlignment(VecVT.getTypeForMVT()); + Store = DAG.getTruncStore(Store, dl, Elt, EltPtr, NULL, 0, EltVT); + + // Load the Lo part from the stack slot. + Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, NULL, 0); + + // Increment the pointer to the other part. + unsigned IncrementSize = Lo.getValueType().getSizeInBits() / 8; + StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, + DAG.getIntPtrConstant(IncrementSize)); + + // Load the Hi part from the stack slot. + Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, NULL, 0, false, + MinAlign(Alignment, IncrementSize)); +} + +void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, + SDValue &Hi) { + MVT LoVT, HiVT; + DebugLoc dl = N->getDebugLoc(); + GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); + Lo = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LoVT, N->getOperand(0)); + Hi = DAG.getUNDEF(HiVT); +} + +void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, + SDValue &Hi) { + assert(ISD::isUNINDEXEDLoad(LD) && "Indexed load during type legalization!"); + MVT LoVT, HiVT; + DebugLoc dl = LD->getDebugLoc(); + GetSplitDestVTs(LD->getValueType(0), LoVT, HiVT); + + ISD::LoadExtType ExtType = LD->getExtensionType(); + SDValue Ch = LD->getChain(); + SDValue Ptr = LD->getBasePtr(); + SDValue Offset = DAG.getUNDEF(Ptr.getValueType()); + const Value *SV = LD->getSrcValue(); + int SVOffset = LD->getSrcValueOffset(); + MVT MemoryVT = LD->getMemoryVT(); + unsigned Alignment = LD->getAlignment(); + bool isVolatile = LD->isVolatile(); + + MVT LoMemVT, HiMemVT; + GetSplitDestVTs(MemoryVT, LoMemVT, HiMemVT); + + Lo = DAG.getLoad(ISD::UNINDEXED, dl, ExtType, LoVT, Ch, Ptr, Offset, + SV, SVOffset, LoMemVT, isVolatile, Alignment); + + unsigned IncrementSize = LoMemVT.getSizeInBits()/8; + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getIntPtrConstant(IncrementSize)); + SVOffset += IncrementSize; + Alignment = MinAlign(Alignment, IncrementSize); + Hi = DAG.getLoad(ISD::UNINDEXED, dl, ExtType, HiVT, Ch, Ptr, Offset, + SV, SVOffset, HiMemVT, isVolatile, Alignment); + + // Build a factor node to remember that this load is independent of the + // other one. + Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(LD, 1), Ch); +} + +void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, + SDValue &Hi) { + // Get the dest types - they may not match the input types, e.g. int_to_fp. + MVT LoVT, HiVT; + DebugLoc dl = N->getDebugLoc(); + GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); + + // Split the input. + MVT InVT = N->getOperand(0).getValueType(); + switch (getTypeAction(InVT)) { + default: assert(0 && "Unexpected type action!"); + case Legal: { + assert(LoVT == HiVT && "Legal non-power-of-two vector type?"); + MVT InNVT = MVT::getVectorVT(InVT.getVectorElementType(), + LoVT.getVectorNumElements()); + Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0), + DAG.getIntPtrConstant(0)); + Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0), + DAG.getIntPtrConstant(InNVT.getVectorNumElements())); + break; + } + case SplitVector: + GetSplitVector(N->getOperand(0), Lo, Hi); + break; + case WidenVector: { + // If the result needs to be split and the input needs to be widened, + // the two types must have different lengths. Use the widened result + // and extract from it to do the split. + assert(LoVT == HiVT && "Legal non-power-of-two vector type?"); + SDValue InOp = GetWidenedVector(N->getOperand(0)); + MVT InNVT = MVT::getVectorVT(InVT.getVectorElementType(), + LoVT.getVectorNumElements()); + Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp, + DAG.getIntPtrConstant(0)); + Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp, + DAG.getIntPtrConstant(InNVT.getVectorNumElements())); + break; + } + } + + Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo); + Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi); +} + +void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, + SDValue &Lo, SDValue &Hi) { + // The low and high parts of the original input give four input vectors. + SDValue Inputs[4]; + DebugLoc dl = N->getDebugLoc(); + GetSplitVector(N->getOperand(0), Inputs[0], Inputs[1]); + GetSplitVector(N->getOperand(1), Inputs[2], Inputs[3]); + MVT NewVT = Inputs[0].getValueType(); + unsigned NewElts = NewVT.getVectorNumElements(); + assert(NewVT == Inputs[1].getValueType() && + "Non power-of-two vectors not supported!"); + + // If Lo or Hi uses elements from at most two of the four input vectors, then + // express it as a vector shuffle of those two inputs. Otherwise extract the + // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR. + SmallVector Ops; + for (unsigned High = 0; High < 2; ++High) { + SDValue &Output = High ? Hi : Lo; + + // Build a shuffle mask for the output, discovering on the fly which + // input vectors to use as shuffle operands (recorded in InputUsed). + // If building a suitable shuffle vector proves too hard, then bail + // out with useBuildVector set. + unsigned InputUsed[2] = { -1U, -1U }; // Not yet discovered. + unsigned FirstMaskIdx = High * NewElts; + bool useBuildVector = false; + for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) { + // The mask element. This indexes into the input. + int Idx = N->getMaskElt(FirstMaskIdx + MaskOffset); + + // The input vector this mask element indexes into. + unsigned Input = (unsigned)Idx / NewElts; + + if (Input >= array_lengthof(Inputs)) { + // The mask element does not index into any input vector. + Ops.push_back(-1); + continue; + } + + // Turn the index into an offset from the start of the input vector. + Idx -= Input * NewElts; + + // Find or create a shuffle vector operand to hold this input. + unsigned OpNo; + for (OpNo = 0; OpNo < array_lengthof(InputUsed); ++OpNo) { + if (InputUsed[OpNo] == Input) { + // This input vector is already an operand. + break; + } else if (InputUsed[OpNo] == -1U) { + // Create a new operand for this input vector. + InputUsed[OpNo] = Input; + break; + } + } + + if (OpNo >= array_lengthof(InputUsed)) { + // More than two input vectors used! Give up on trying to create a + // shuffle vector. Insert all elements into a BUILD_VECTOR instead. + useBuildVector = true; + break; + } + + // Add the mask index for the new shuffle vector. + Ops.push_back(Idx + OpNo * NewElts); + } + + if (useBuildVector) { + MVT EltVT = NewVT.getVectorElementType(); + SmallVector SVOps; + + // Extract the input elements by hand. + for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) { + // The mask element. This indexes into the input. + int Idx = N->getMaskElt(FirstMaskIdx + MaskOffset); + + // The input vector this mask element indexes into. + unsigned Input = (unsigned)Idx / NewElts; + + if (Input >= array_lengthof(Inputs)) { + // The mask element is "undef" or indexes off the end of the input. + SVOps.push_back(DAG.getUNDEF(EltVT)); + continue; + } + + // Turn the index into an offset from the start of the input vector. + Idx -= Input * NewElts; + + // Extract the vector element by hand. + SVOps.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, + Inputs[Input], DAG.getIntPtrConstant(Idx))); + } + + // Construct the Lo/Hi output using a BUILD_VECTOR. + Output = DAG.getNode(ISD::BUILD_VECTOR,dl,NewVT, &SVOps[0], SVOps.size()); + } else if (InputUsed[0] == -1U) { + // No input vectors were used! The result is undefined. + Output = DAG.getUNDEF(NewVT); + } else { + SDValue Op0 = Inputs[InputUsed[0]]; + // If only one input was used, use an undefined vector for the other. + SDValue Op1 = InputUsed[1] == -1U ? + DAG.getUNDEF(NewVT) : Inputs[InputUsed[1]]; + // At least one input vector was used. Create a new shuffle vector. + Output = DAG.getVectorShuffle(NewVT, dl, Op0, Op1, &Ops[0]); + } + + Ops.clear(); + } +} + +void DAGTypeLegalizer::SplitVecRes_VSETCC(SDNode *N, SDValue &Lo, + SDValue &Hi) { + MVT LoVT, HiVT; + DebugLoc dl = N->getDebugLoc(); + GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); + + SDValue LL, LH, RL, RH; + GetSplitVector(N->getOperand(0), LL, LH); + GetSplitVector(N->getOperand(1), RL, RH); + + Lo = DAG.getNode(ISD::VSETCC, dl, LoVT, LL, RL, N->getOperand(2)); + Hi = DAG.getNode(ISD::VSETCC, dl, HiVT, LH, RH, N->getOperand(2)); +} + + +//===----------------------------------------------------------------------===// +// Operand Vector Splitting +//===----------------------------------------------------------------------===// + +/// SplitVectorOperand - This method is called when the specified operand of the +/// specified node is found to need vector splitting. At this point, all of the +/// result types of the node are known to be legal, but other operands of the +/// node may need legalization as well as the specified one. +bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { + DEBUG(cerr << "Split node operand: "; N->dump(&DAG); cerr << "\n"); + SDValue Res = SDValue(); + + if (Res.getNode() == 0) { + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + cerr << "SplitVectorOperand Op #" << OpNo << ": "; + N->dump(&DAG); cerr << "\n"; +#endif + assert(0 && "Do not know how to split this operator's operand!"); + abort(); + + case ISD::BIT_CONVERT: Res = SplitVecOp_BIT_CONVERT(N); break; + case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break; + case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break; + case ISD::STORE: Res = SplitVecOp_STORE(cast(N), + OpNo); break; + + case ISD::CTTZ: + case ISD::CTLZ: + case ISD::CTPOP: + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + case ISD::SINT_TO_FP: + case ISD::TRUNCATE: + case ISD::UINT_TO_FP: Res = SplitVecOp_UnaryOp(N); break; + } + } + + // If the result is null, the sub-method took care of registering results etc. + if (!Res.getNode()) return false; + + // If the result is N, the sub-method updated N in place. Tell the legalizer + // core about this. + if (Res.getNode() == N) + return true; + + assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 && + "Invalid operand expansion"); + + ReplaceValueWith(SDValue(N, 0), Res); + return false; +} + +SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) { + // The result has a legal vector type, but the input needs splitting. + MVT ResVT = N->getValueType(0); + SDValue Lo, Hi; + DebugLoc dl = N->getDebugLoc(); + GetSplitVector(N->getOperand(0), Lo, Hi); + assert(Lo.getValueType() == Hi.getValueType() && + "Returns legal non-power-of-two vector type?"); + MVT InVT = Lo.getValueType(); + + MVT OutVT = MVT::getVectorVT(ResVT.getVectorElementType(), + InVT.getVectorNumElements()); + + Lo = DAG.getNode(N->getOpcode(), dl, OutVT, Lo); + Hi = DAG.getNode(N->getOpcode(), dl, OutVT, Hi); + + return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi); +} + +SDValue DAGTypeLegalizer::SplitVecOp_BIT_CONVERT(SDNode *N) { + // For example, i64 = BIT_CONVERT v4i16 on alpha. Typically the vector will + // end up being split all the way down to individual components. Convert the + // split pieces into integers and reassemble. + SDValue Lo, Hi; + GetSplitVector(N->getOperand(0), Lo, Hi); + Lo = BitConvertToInteger(Lo); + Hi = BitConvertToInteger(Hi); + + if (TLI.isBigEndian()) + std::swap(Lo, Hi); + + return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), N->getValueType(0), + JoinIntegers(Lo, Hi)); +} + +SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) { + // We know that the extracted result type is legal. For now, assume the index + // is a constant. + MVT SubVT = N->getValueType(0); + SDValue Idx = N->getOperand(1); + DebugLoc dl = N->getDebugLoc(); + SDValue Lo, Hi; + GetSplitVector(N->getOperand(0), Lo, Hi); + + uint64_t LoElts = Lo.getValueType().getVectorNumElements(); + uint64_t IdxVal = cast(Idx)->getZExtValue(); + + if (IdxVal < LoElts) { + assert(IdxVal + SubVT.getVectorNumElements() <= LoElts && + "Extracted subvector crosses vector split!"); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Lo, Idx); + } else { + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Hi, + DAG.getConstant(IdxVal - LoElts, Idx.getValueType())); + } +} + +SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { + SDValue Vec = N->getOperand(0); + SDValue Idx = N->getOperand(1); + MVT VecVT = Vec.getValueType(); + + if (isa(Idx)) { + uint64_t IdxVal = cast(Idx)->getZExtValue(); + assert(IdxVal < VecVT.getVectorNumElements() && "Invalid vector index!"); + + SDValue Lo, Hi; + GetSplitVector(Vec, Lo, Hi); + + uint64_t LoElts = Lo.getValueType().getVectorNumElements(); + + if (IdxVal < LoElts) + return DAG.UpdateNodeOperands(SDValue(N, 0), Lo, Idx); + else + return DAG.UpdateNodeOperands(SDValue(N, 0), Hi, + DAG.getConstant(IdxVal - LoElts, + Idx.getValueType())); + } + + // Store the vector to the stack. + MVT EltVT = VecVT.getVectorElementType(); + DebugLoc dl = N->getDebugLoc(); + SDValue StackPtr = DAG.CreateStackTemporary(VecVT); + int SPFI = cast(StackPtr.getNode())->getIndex(); + const Value *SV = PseudoSourceValue::getFixedStack(SPFI); + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, SV, 0); + + // Load back the required element. + StackPtr = GetVectorElementPointer(StackPtr, EltVT, Idx); + return DAG.getLoad(EltVT, dl, Store, StackPtr, SV, 0); +} + +SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { + assert(N->isUnindexed() && "Indexed store of vector?"); + assert(OpNo == 1 && "Can only split the stored value"); + DebugLoc dl = N->getDebugLoc(); + + bool isTruncating = N->isTruncatingStore(); + SDValue Ch = N->getChain(); + SDValue Ptr = N->getBasePtr(); + int SVOffset = N->getSrcValueOffset(); + MVT MemoryVT = N->getMemoryVT(); + unsigned Alignment = N->getAlignment(); + bool isVol = N->isVolatile(); + SDValue Lo, Hi; + GetSplitVector(N->getOperand(1), Lo, Hi); + + MVT LoMemVT, HiMemVT; + GetSplitDestVTs(MemoryVT, LoMemVT, HiMemVT); + + unsigned IncrementSize = LoMemVT.getSizeInBits()/8; + + if (isTruncating) + Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset, + LoMemVT, isVol, Alignment); + else + Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset, + isVol, Alignment); + + // Increment the pointer to the other half. + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getIntPtrConstant(IncrementSize)); + + if (isTruncating) + Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, + N->getSrcValue(), SVOffset+IncrementSize, + HiMemVT, + isVol, MinAlign(Alignment, IncrementSize)); + else + Hi = DAG.getStore(Ch, dl, Hi, Ptr, N->getSrcValue(), SVOffset+IncrementSize, + isVol, MinAlign(Alignment, IncrementSize)); + + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); +} + + +//===----------------------------------------------------------------------===// +// Result Vector Widening +//===----------------------------------------------------------------------===// + +void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { + DEBUG(cerr << "Widen node result " << ResNo << ": "; N->dump(&DAG); + cerr << "\n"); + SDValue Res = SDValue(); + + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + cerr << "WidenVectorResult #" << ResNo << ": "; + N->dump(&DAG); cerr << "\n"; +#endif + assert(0 && "Do not know how to widen the result of this operator!"); + abort(); + + case ISD::BIT_CONVERT: Res = WidenVecRes_BIT_CONVERT(N); break; + case ISD::BUILD_VECTOR: Res = WidenVecRes_BUILD_VECTOR(N); break; + case ISD::CONCAT_VECTORS: Res = WidenVecRes_CONCAT_VECTORS(N); break; + case ISD::CONVERT_RNDSAT: Res = WidenVecRes_CONVERT_RNDSAT(N); break; + case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break; + case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break; + case ISD::LOAD: Res = WidenVecRes_LOAD(N); break; + case ISD::SCALAR_TO_VECTOR: Res = WidenVecRes_SCALAR_TO_VECTOR(N); break; + case ISD::SELECT: Res = WidenVecRes_SELECT(N); break; + case ISD::SELECT_CC: Res = WidenVecRes_SELECT_CC(N); break; + case ISD::UNDEF: Res = WidenVecRes_UNDEF(N); break; + case ISD::VECTOR_SHUFFLE: + Res = WidenVecRes_VECTOR_SHUFFLE(cast(N)); break; + case ISD::VSETCC: Res = WidenVecRes_VSETCC(N); break; + + case ISD::ADD: + case ISD::AND: + case ISD::BSWAP: + case ISD::FADD: + case ISD::FCOPYSIGN: + case ISD::FDIV: + case ISD::FMUL: + case ISD::FPOW: + case ISD::FPOWI: + case ISD::FREM: + case ISD::FSUB: + case ISD::MUL: + case ISD::MULHS: + case ISD::MULHU: + case ISD::OR: + case ISD::SDIV: + case ISD::SREM: + case ISD::UDIV: + case ISD::UREM: + case ISD::SUB: + case ISD::XOR: Res = WidenVecRes_Binary(N); break; + + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: Res = WidenVecRes_Shift(N); break; + + case ISD::ANY_EXTEND: + case ISD::FP_ROUND: + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + case ISD::SIGN_EXTEND: + case ISD::SINT_TO_FP: + case ISD::TRUNCATE: + case ISD::ZERO_EXTEND: + case ISD::UINT_TO_FP: Res = WidenVecRes_Convert(N); break; + + case ISD::CTLZ: + case ISD::CTPOP: + case ISD::CTTZ: + case ISD::FABS: + case ISD::FCOS: + case ISD::FNEG: + case ISD::FSIN: + case ISD::FSQRT: Res = WidenVecRes_Unary(N); break; + } + + // If Res is null, the sub-method took care of registering the result. + if (Res.getNode()) + SetWidenedVector(SDValue(N, ResNo), Res); +} + +SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { + // Binary op widening. + MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0)); + SDValue InOp1 = GetWidenedVector(N->getOperand(0)); + SDValue InOp2 = GetWidenedVector(N->getOperand(1)); + return DAG.getNode(N->getOpcode(), N->getDebugLoc(), WidenVT, InOp1, InOp2); +} + +SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { + SDValue InOp = N->getOperand(0); + DebugLoc dl = N->getDebugLoc(); + + MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0)); + unsigned WidenNumElts = WidenVT.getVectorNumElements(); + + MVT InVT = InOp.getValueType(); + MVT InEltVT = InVT.getVectorElementType(); + MVT InWidenVT = MVT::getVectorVT(InEltVT, WidenNumElts); + + unsigned Opcode = N->getOpcode(); + unsigned InVTNumElts = InVT.getVectorNumElements(); + + if (getTypeAction(InVT) == WidenVector) { + InOp = GetWidenedVector(N->getOperand(0)); + InVT = InOp.getValueType(); + InVTNumElts = InVT.getVectorNumElements(); + if (InVTNumElts == WidenNumElts) + return DAG.getNode(Opcode, dl, WidenVT, InOp); + } + + if (TLI.isTypeLegal(InWidenVT)) { + // Because the result and the input are different vector types, widening + // the result could create a legal type but widening the input might make + // it an illegal type that might lead to repeatedly splitting the input + // and then widening it. To avoid this, we widen the input only if + // it results in a legal type. + if (WidenNumElts % InVTNumElts == 0) { + // Widen the input and call convert on the widened input vector. + unsigned NumConcat = WidenNumElts/InVTNumElts; + SmallVector Ops(NumConcat); + Ops[0] = InOp; + SDValue UndefVal = DAG.getUNDEF(InVT); + for (unsigned i = 1; i != NumConcat; ++i) + Ops[i] = UndefVal; + return DAG.getNode(Opcode, dl, WidenVT, + DAG.getNode(ISD::CONCAT_VECTORS, dl, InWidenVT, + &Ops[0], NumConcat)); + } + + if (InVTNumElts % WidenNumElts == 0) { + // Extract the input and convert the shorten input vector. + return DAG.getNode(Opcode, dl, WidenVT, + DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InWidenVT, + InOp, DAG.getIntPtrConstant(0))); + } + } + + // Otherwise unroll into some nasty scalar code and rebuild the vector. + SmallVector Ops(WidenNumElts); + MVT EltVT = WidenVT.getVectorElementType(); + unsigned MinElts = std::min(InVTNumElts, WidenNumElts); + unsigned i; + for (i=0; i < MinElts; ++i) + Ops[i] = DAG.getNode(Opcode, dl, EltVT, + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp, + DAG.getIntPtrConstant(i))); + + SDValue UndefVal = DAG.getUNDEF(EltVT); + for (; i < WidenNumElts; ++i) + Ops[i] = UndefVal; + + return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], WidenNumElts); +} + +SDValue DAGTypeLegalizer::WidenVecRes_Shift(SDNode *N) { + MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0)); + SDValue InOp = GetWidenedVector(N->getOperand(0)); + SDValue ShOp = N->getOperand(1); + + MVT ShVT = ShOp.getValueType(); + if (getTypeAction(ShVT) == WidenVector) { + ShOp = GetWidenedVector(ShOp); + ShVT = ShOp.getValueType(); + } + MVT ShWidenVT = MVT::getVectorVT(ShVT.getVectorElementType(), + WidenVT.getVectorNumElements()); + if (ShVT != ShWidenVT) + ShOp = ModifyToType(ShOp, ShWidenVT); + + return DAG.getNode(N->getOpcode(), N->getDebugLoc(), WidenVT, InOp, ShOp); +} + +SDValue DAGTypeLegalizer::WidenVecRes_Unary(SDNode *N) { + // Unary op widening. + MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0)); + SDValue InOp = GetWidenedVector(N->getOperand(0)); + return DAG.getNode(N->getOpcode(), N->getDebugLoc(), WidenVT, InOp); +} + +SDValue DAGTypeLegalizer::WidenVecRes_BIT_CONVERT(SDNode *N) { + SDValue InOp = N->getOperand(0); + MVT InVT = InOp.getValueType(); + MVT VT = N->getValueType(0); + MVT WidenVT = TLI.getTypeToTransformTo(VT); + DebugLoc dl = N->getDebugLoc(); + + switch (getTypeAction(InVT)) { + default: + assert(false && "Unknown type action!"); + break; + case Legal: + break; + case PromoteInteger: + // If the InOp is promoted to the same size, convert it. Otherwise, + // fall out of the switch and widen the promoted input. + InOp = GetPromotedInteger(InOp); + InVT = InOp.getValueType(); + if (WidenVT.bitsEq(InVT)) + return DAG.getNode(ISD::BIT_CONVERT, dl, WidenVT, InOp); + break; + case SoftenFloat: + case ExpandInteger: + case ExpandFloat: + case ScalarizeVector: + case SplitVector: + break; + case WidenVector: + // If the InOp is widened to the same size, convert it. Otherwise, fall + // out of the switch and widen the widened input. + InOp = GetWidenedVector(InOp); + InVT = InOp.getValueType(); + if (WidenVT.bitsEq(InVT)) + // The input widens to the same size. Convert to the widen value. + return DAG.getNode(ISD::BIT_CONVERT, dl, WidenVT, InOp); + break; + } + + unsigned WidenSize = WidenVT.getSizeInBits(); + unsigned InSize = InVT.getSizeInBits(); + if (WidenSize % InSize == 0) { + // Determine new input vector type. The new input vector type will use + // the same element type (if its a vector) or use the input type as a + // vector. It is the same size as the type to widen to. + MVT NewInVT; + unsigned NewNumElts = WidenSize / InSize; + if (InVT.isVector()) { + MVT InEltVT = InVT.getVectorElementType(); + NewInVT= MVT::getVectorVT(InEltVT, WidenSize / InEltVT.getSizeInBits()); + } else { + NewInVT = MVT::getVectorVT(InVT, NewNumElts); + } + + if (TLI.isTypeLegal(NewInVT)) { + // Because the result and the input are different vector types, widening + // the result could create a legal type but widening the input might make + // it an illegal type that might lead to repeatedly splitting the input + // and then widening it. To avoid this, we widen the input only if + // it results in a legal type. + SmallVector Ops(NewNumElts); + SDValue UndefVal = DAG.getUNDEF(InVT); + Ops[0] = InOp; + for (unsigned i = 1; i < NewNumElts; ++i) + Ops[i] = UndefVal; + + SDValue NewVec; + if (InVT.isVector()) + NewVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, + NewInVT, &Ops[0], NewNumElts); + else + NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl, + NewInVT, &Ops[0], NewNumElts); + return DAG.getNode(ISD::BIT_CONVERT, dl, WidenVT, NewVec); + } + } + + // This should occur rarely. Lower the bit-convert to a store/load + // from the stack. Create the stack frame object. Make sure it is aligned + // for both the source and destination types. + SDValue FIPtr = DAG.CreateStackTemporary(InVT, WidenVT); + int FI = cast(FIPtr.getNode())->getIndex(); + const Value *SV = PseudoSourceValue::getFixedStack(FI); + + // Emit a store to the stack slot. + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, FIPtr, SV, 0); + + // Result is a load from the stack slot. + return DAG.getLoad(WidenVT, dl, Store, FIPtr, SV, 0); +} + +SDValue DAGTypeLegalizer::WidenVecRes_BUILD_VECTOR(SDNode *N) { + DebugLoc dl = N->getDebugLoc(); + // Build a vector with undefined for the new nodes. + MVT VT = N->getValueType(0); + MVT EltVT = VT.getVectorElementType(); + unsigned NumElts = VT.getVectorNumElements(); + + MVT WidenVT = TLI.getTypeToTransformTo(VT); + unsigned WidenNumElts = WidenVT.getVectorNumElements(); + + SmallVector NewOps(N->op_begin(), N->op_end()); + NewOps.reserve(WidenNumElts); + for (unsigned i = NumElts; i < WidenNumElts; ++i) + NewOps.push_back(DAG.getUNDEF(EltVT)); + + return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &NewOps[0], NewOps.size()); +} + +SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) { + MVT InVT = N->getOperand(0).getValueType(); + MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0)); + DebugLoc dl = N->getDebugLoc(); + unsigned WidenNumElts = WidenVT.getVectorNumElements(); + unsigned NumOperands = N->getNumOperands(); + + bool InputWidened = false; // Indicates we need to widen the input. + if (getTypeAction(InVT) != WidenVector) { + if (WidenVT.getVectorNumElements() % InVT.getVectorNumElements() == 0) { + // Add undef vectors to widen to correct length. + unsigned NumConcat = WidenVT.getVectorNumElements() / + InVT.getVectorNumElements(); + SDValue UndefVal = DAG.getUNDEF(InVT); + SmallVector Ops(NumConcat); + for (unsigned i=0; i < NumOperands; ++i) + Ops[i] = N->getOperand(i); + for (unsigned i = NumOperands; i != NumConcat; ++i) + Ops[i] = UndefVal; + return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &Ops[0], NumConcat); + } + } else { + InputWidened = true; + if (WidenVT == TLI.getTypeToTransformTo(InVT)) { + // The inputs and the result are widen to the same value. + unsigned i; + for (i=1; i < NumOperands; ++i) + if (N->getOperand(i).getOpcode() != ISD::UNDEF) + break; + + if (i > NumOperands) + // Everything but the first operand is an UNDEF so just return the + // widened first operand. + return GetWidenedVector(N->getOperand(0)); + + if (NumOperands == 2) { + // Replace concat of two operands with a shuffle. + SmallVector MaskOps(WidenNumElts); + for (unsigned i=0; i < WidenNumElts/2; ++i) { + MaskOps[i] = i; + MaskOps[i+WidenNumElts/2] = i+WidenNumElts; + } + return DAG.getVectorShuffle(WidenVT, dl, + GetWidenedVector(N->getOperand(0)), + GetWidenedVector(N->getOperand(1)), + &MaskOps[0]); + } + } + } + + // Fall back to use extracts and build vector. + MVT EltVT = WidenVT.getVectorElementType(); + unsigned NumInElts = InVT.getVectorNumElements(); + SmallVector Ops(WidenNumElts); + unsigned Idx = 0; + for (unsigned i=0; i < NumOperands; ++i) { + SDValue InOp = N->getOperand(i); + if (InputWidened) + InOp = GetWidenedVector(InOp); + for (unsigned j=0; j < NumInElts; ++j) + Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, + DAG.getIntPtrConstant(j)); + } + SDValue UndefVal = DAG.getUNDEF(EltVT); + for (; Idx < WidenNumElts; ++Idx) + Ops[Idx] = UndefVal; + return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], WidenNumElts); +} + +SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) { + DebugLoc dl = N->getDebugLoc(); + SDValue InOp = N->getOperand(0); + SDValue RndOp = N->getOperand(3); + SDValue SatOp = N->getOperand(4); + + MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0)); + unsigned WidenNumElts = WidenVT.getVectorNumElements(); + + MVT InVT = InOp.getValueType(); + MVT InEltVT = InVT.getVectorElementType(); + MVT InWidenVT = MVT::getVectorVT(InEltVT, WidenNumElts); + + SDValue DTyOp = DAG.getValueType(WidenVT); + SDValue STyOp = DAG.getValueType(InWidenVT); + ISD::CvtCode CvtCode = cast(N)->getCvtCode(); + + unsigned InVTNumElts = InVT.getVectorNumElements(); + if (getTypeAction(InVT) == WidenVector) { + InOp = GetWidenedVector(InOp); + InVT = InOp.getValueType(); + InVTNumElts = InVT.getVectorNumElements(); + if (InVTNumElts == WidenNumElts) + return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp, + SatOp, CvtCode); + } + + if (TLI.isTypeLegal(InWidenVT)) { + // Because the result and the input are different vector types, widening + // the result could create a legal type but widening the input might make + // it an illegal type that might lead to repeatedly splitting the input + // and then widening it. To avoid this, we widen the input only if + // it results in a legal type. + if (WidenNumElts % InVTNumElts == 0) { + // Widen the input and call convert on the widened input vector. + unsigned NumConcat = WidenNumElts/InVTNumElts; + SmallVector Ops(NumConcat); + Ops[0] = InOp; + SDValue UndefVal = DAG.getUNDEF(InVT); + for (unsigned i = 1; i != NumConcat; ++i) { + Ops[i] = UndefVal; + } + InOp = DAG.getNode(ISD::CONCAT_VECTORS, dl, InWidenVT, &Ops[0],NumConcat); + return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp, + SatOp, CvtCode); + } + + if (InVTNumElts % WidenNumElts == 0) { + // Extract the input and convert the shorten input vector. + InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InWidenVT, InOp, + DAG.getIntPtrConstant(0)); + return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp, + SatOp, CvtCode); + } + } + + // Otherwise unroll into some nasty scalar code and rebuild the vector. + SmallVector Ops(WidenNumElts); + MVT EltVT = WidenVT.getVectorElementType(); + DTyOp = DAG.getValueType(EltVT); + STyOp = DAG.getValueType(InEltVT); + + unsigned MinElts = std::min(InVTNumElts, WidenNumElts); + unsigned i; + for (i=0; i < MinElts; ++i) { + SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp, + DAG.getIntPtrConstant(i)); + Ops[i] = DAG.getConvertRndSat(WidenVT, dl, ExtVal, DTyOp, STyOp, RndOp, + SatOp, CvtCode); + } + + SDValue UndefVal = DAG.getUNDEF(EltVT); + for (; i < WidenNumElts; ++i) + Ops[i] = UndefVal; + + return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], WidenNumElts); +} + +SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) { + MVT VT = N->getValueType(0); + MVT WidenVT = TLI.getTypeToTransformTo(VT); + unsigned WidenNumElts = WidenVT.getVectorNumElements(); + SDValue InOp = N->getOperand(0); + SDValue Idx = N->getOperand(1); + DebugLoc dl = N->getDebugLoc(); + + if (getTypeAction(InOp.getValueType()) == WidenVector) + InOp = GetWidenedVector(InOp); + + MVT InVT = InOp.getValueType(); + + ConstantSDNode *CIdx = dyn_cast(Idx); + if (CIdx) { + unsigned IdxVal = CIdx->getZExtValue(); + // Check if we can just return the input vector after widening. + if (IdxVal == 0 && InVT == WidenVT) + return InOp; + + // Check if we can extract from the vector. + unsigned InNumElts = InVT.getVectorNumElements(); + if (IdxVal % WidenNumElts == 0 && IdxVal + WidenNumElts < InNumElts) + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, WidenVT, InOp, Idx); + } + + // We could try widening the input to the right length but for now, extract + // the original elements, fill the rest with undefs and build a vector. + SmallVector Ops(WidenNumElts); + MVT EltVT = VT.getVectorElementType(); + MVT IdxVT = Idx.getValueType(); + unsigned NumElts = VT.getVectorNumElements(); + unsigned i; + if (CIdx) { + unsigned IdxVal = CIdx->getZExtValue(); + for (i=0; i < NumElts; ++i) + Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, + DAG.getConstant(IdxVal+i, IdxVT)); + } else { + Ops[0] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, Idx); + for (i=1; i < NumElts; ++i) { + SDValue NewIdx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, + DAG.getConstant(i, IdxVT)); + Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, NewIdx); + } + } + + SDValue UndefVal = DAG.getUNDEF(EltVT); + for (; i < WidenNumElts; ++i) + Ops[i] = UndefVal; + return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], WidenNumElts); +} + +SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) { + SDValue InOp = GetWidenedVector(N->getOperand(0)); + return DAG.getNode(ISD::INSERT_VECTOR_ELT, N->getDebugLoc(), + InOp.getValueType(), InOp, + N->getOperand(1), N->getOperand(2)); +} + +SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) { + LoadSDNode *LD = cast(N); + MVT WidenVT = TLI.getTypeToTransformTo(LD->getValueType(0)); + MVT LdVT = LD->getMemoryVT(); + DebugLoc dl = N->getDebugLoc(); + assert(LdVT.isVector() && WidenVT.isVector()); + + // Load information + SDValue Chain = LD->getChain(); + SDValue BasePtr = LD->getBasePtr(); + int SVOffset = LD->getSrcValueOffset(); + unsigned Align = LD->getAlignment(); + bool isVolatile = LD->isVolatile(); + const Value *SV = LD->getSrcValue(); + ISD::LoadExtType ExtType = LD->getExtensionType(); + + SDValue Result; + SmallVector LdChain; // Chain for the series of load + if (ExtType != ISD::NON_EXTLOAD) { + // For extension loads, we can not play the tricks of chopping legal + // vector types and bit cast it to the right type. Instead, we unroll + // the load and build a vector. + MVT EltVT = WidenVT.getVectorElementType(); + MVT LdEltVT = LdVT.getVectorElementType(); + unsigned NumElts = LdVT.getVectorNumElements(); + + // Load each element and widen + unsigned WidenNumElts = WidenVT.getVectorNumElements(); + SmallVector Ops(WidenNumElts); + unsigned Increment = LdEltVT.getSizeInBits() / 8; + Ops[0] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, BasePtr, SV, SVOffset, + LdEltVT, isVolatile, Align); + LdChain.push_back(Ops[0].getValue(1)); + unsigned i = 0, Offset = Increment; + for (i=1; i < NumElts; ++i, Offset += Increment) { + SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), + BasePtr, DAG.getIntPtrConstant(Offset)); + Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr, SV, + SVOffset + Offset, LdEltVT, isVolatile, Align); + LdChain.push_back(Ops[i].getValue(1)); + } + + // Fill the rest with undefs + SDValue UndefVal = DAG.getUNDEF(EltVT); + for (; i != WidenNumElts; ++i) + Ops[i] = UndefVal; + + Result = DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], Ops.size()); + } else { + assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType()); + unsigned int LdWidth = LdVT.getSizeInBits(); + Result = GenWidenVectorLoads(LdChain, Chain, BasePtr, SV, SVOffset, + Align, isVolatile, LdWidth, WidenVT, dl); + } + + // If we generate a single load, we can use that for the chain. Otherwise, + // build a factor node to remember the multiple loads are independent and + // chain to that. + SDValue NewChain; + if (LdChain.size() == 1) + NewChain = LdChain[0]; + else + NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &LdChain[0], + LdChain.size()); + + // Modified the chain - switch anything that used the old chain to use + // the new one. + ReplaceValueWith(SDValue(N, 1), Chain); + + return Result; +} + +SDValue DAGTypeLegalizer::WidenVecRes_SCALAR_TO_VECTOR(SDNode *N) { + MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0)); + return DAG.getNode(ISD::SCALAR_TO_VECTOR, N->getDebugLoc(), + WidenVT, N->getOperand(0)); +} + +SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) { + MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0)); + unsigned WidenNumElts = WidenVT.getVectorNumElements(); + + SDValue Cond1 = N->getOperand(0); + MVT CondVT = Cond1.getValueType(); + if (CondVT.isVector()) { + MVT CondEltVT = CondVT.getVectorElementType(); + MVT CondWidenVT = MVT::getVectorVT(CondEltVT, WidenNumElts); + if (getTypeAction(CondVT) == WidenVector) + Cond1 = GetWidenedVector(Cond1); + + if (Cond1.getValueType() != CondWidenVT) + Cond1 = ModifyToType(Cond1, CondWidenVT); + } + + SDValue InOp1 = GetWidenedVector(N->getOperand(1)); + SDValue InOp2 = GetWidenedVector(N->getOperand(2)); + assert(InOp1.getValueType() == WidenVT && InOp2.getValueType() == WidenVT); + return DAG.getNode(ISD::SELECT, N->getDebugLoc(), + WidenVT, Cond1, InOp1, InOp2); +} + +SDValue DAGTypeLegalizer::WidenVecRes_SELECT_CC(SDNode *N) { + SDValue InOp1 = GetWidenedVector(N->getOperand(2)); + SDValue InOp2 = GetWidenedVector(N->getOperand(3)); + return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), + InOp1.getValueType(), N->getOperand(0), + N->getOperand(1), InOp1, InOp2, N->getOperand(4)); +} + +SDValue DAGTypeLegalizer::WidenVecRes_UNDEF(SDNode *N) { + MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0)); + return DAG.getUNDEF(WidenVT); +} + +SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N) { + MVT VT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); + + MVT WidenVT = TLI.getTypeToTransformTo(VT); + unsigned NumElts = VT.getVectorNumElements(); + unsigned WidenNumElts = WidenVT.getVectorNumElements(); + + SDValue InOp1 = GetWidenedVector(N->getOperand(0)); + SDValue InOp2 = GetWidenedVector(N->getOperand(1)); + + // Adjust mask based on new input vector length. + SmallVector NewMask; + for (unsigned i = 0; i != NumElts; ++i) { + int Idx = N->getMaskElt(i); + if (Idx < (int)NumElts) + NewMask.push_back(Idx); + else + NewMask.push_back(Idx - NumElts + WidenNumElts); + } + for (unsigned i = NumElts; i != WidenNumElts; ++i) + NewMask.push_back(-1); + return DAG.getVectorShuffle(WidenVT, dl, InOp1, InOp2, &NewMask[0]); +} + +SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) { + MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0)); + unsigned WidenNumElts = WidenVT.getVectorNumElements(); + + SDValue InOp1 = N->getOperand(0); + MVT InVT = InOp1.getValueType(); + assert(InVT.isVector() && "can not widen non vector type"); + MVT WidenInVT = MVT::getVectorVT(InVT.getVectorElementType(), WidenNumElts); + InOp1 = GetWidenedVector(InOp1); + SDValue InOp2 = GetWidenedVector(N->getOperand(1)); + + // Assume that the input and output will be widen appropriately. If not, + // we will have to unroll it at some point. + assert(InOp1.getValueType() == WidenInVT && + InOp2.getValueType() == WidenInVT && + "Input not widened to expected type!"); + return DAG.getNode(ISD::VSETCC, N->getDebugLoc(), + WidenVT, InOp1, InOp2, N->getOperand(2)); +} + + +//===----------------------------------------------------------------------===// +// Widen Vector Operand +//===----------------------------------------------------------------------===// +bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned ResNo) { + DEBUG(cerr << "Widen node operand " << ResNo << ": "; N->dump(&DAG); + cerr << "\n"); + SDValue Res = SDValue(); + + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + cerr << "WidenVectorOperand op #" << ResNo << ": "; + N->dump(&DAG); cerr << "\n"; +#endif + assert(0 && "Do not know how to widen this operator's operand!"); + abort(); + + case ISD::BIT_CONVERT: Res = WidenVecOp_BIT_CONVERT(N); break; + case ISD::CONCAT_VECTORS: Res = WidenVecOp_CONCAT_VECTORS(N); break; + case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break; + case ISD::STORE: Res = WidenVecOp_STORE(N); break; + + case ISD::FP_ROUND: + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + case ISD::SINT_TO_FP: + case ISD::TRUNCATE: + case ISD::UINT_TO_FP: Res = WidenVecOp_Convert(N); break; + } + + // If Res is null, the sub-method took care of registering the result. + if (!Res.getNode()) return false; + + // If the result is N, the sub-method updated N in place. Tell the legalizer + // core about this. + if (Res.getNode() == N) + return true; + + + assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 && + "Invalid operand expansion"); + + ReplaceValueWith(SDValue(N, 0), Res); + return false; +} + +SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) { + // Since the result is legal and the input is illegal, it is unlikely + // that we can fix the input to a legal type so unroll the convert + // into some scalar code and create a nasty build vector. + MVT VT = N->getValueType(0); + MVT EltVT = VT.getVectorElementType(); + DebugLoc dl = N->getDebugLoc(); + unsigned NumElts = VT.getVectorNumElements(); + SDValue InOp = N->getOperand(0); + if (getTypeAction(InOp.getValueType()) == WidenVector) + InOp = GetWidenedVector(InOp); + MVT InVT = InOp.getValueType(); + MVT InEltVT = InVT.getVectorElementType(); + + unsigned Opcode = N->getOpcode(); + SmallVector Ops(NumElts); + for (unsigned i=0; i < NumElts; ++i) + Ops[i] = DAG.getNode(Opcode, dl, EltVT, + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp, + DAG.getIntPtrConstant(i))); + + return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElts); +} + +SDValue DAGTypeLegalizer::WidenVecOp_BIT_CONVERT(SDNode *N) { + MVT VT = N->getValueType(0); + SDValue InOp = GetWidenedVector(N->getOperand(0)); + MVT InWidenVT = InOp.getValueType(); + DebugLoc dl = N->getDebugLoc(); + + // Check if we can convert between two legal vector types and extract. + unsigned InWidenSize = InWidenVT.getSizeInBits(); + unsigned Size = VT.getSizeInBits(); + if (InWidenSize % Size == 0 && !VT.isVector()) { + unsigned NewNumElts = InWidenSize / Size; + MVT NewVT = MVT::getVectorVT(VT, NewNumElts); + if (TLI.isTypeLegal(NewVT)) { + SDValue BitOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVT, InOp); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, BitOp, + DAG.getIntPtrConstant(0)); + } + } + + // Lower the bit-convert to a store/load from the stack. Create the stack + // frame object. Make sure it is aligned for both the source and destination + // types. + SDValue FIPtr = DAG.CreateStackTemporary(InWidenVT, VT); + int FI = cast(FIPtr.getNode())->getIndex(); + const Value *SV = PseudoSourceValue::getFixedStack(FI); + + // Emit a store to the stack slot. + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, FIPtr, SV, 0); + + // Result is a load from the stack slot. + return DAG.getLoad(VT, dl, Store, FIPtr, SV, 0); +} + +SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) { + // If the input vector is not legal, it is likely that we will not find a + // legal vector of the same size. Replace the concatenate vector with a + // nasty build vector. + MVT VT = N->getValueType(0); + MVT EltVT = VT.getVectorElementType(); + DebugLoc dl = N->getDebugLoc(); + unsigned NumElts = VT.getVectorNumElements(); + SmallVector Ops(NumElts); + + MVT InVT = N->getOperand(0).getValueType(); + unsigned NumInElts = InVT.getVectorNumElements(); + + unsigned Idx = 0; + unsigned NumOperands = N->getNumOperands(); + for (unsigned i=0; i < NumOperands; ++i) { + SDValue InOp = N->getOperand(i); + if (getTypeAction(InOp.getValueType()) == WidenVector) + InOp = GetWidenedVector(InOp); + for (unsigned j=0; j < NumInElts; ++j) + Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, + DAG.getIntPtrConstant(j)); + } + return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElts); +} + +SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { + SDValue InOp = GetWidenedVector(N->getOperand(0)); + MVT EltVT = InOp.getValueType().getVectorElementType(); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(), + EltVT, InOp, N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) { + // We have to widen the value but we want only to store the original + // vector type. + StoreSDNode *ST = cast(N); + SDValue Chain = ST->getChain(); + SDValue BasePtr = ST->getBasePtr(); + const Value *SV = ST->getSrcValue(); + int SVOffset = ST->getSrcValueOffset(); + unsigned Align = ST->getAlignment(); + bool isVolatile = ST->isVolatile(); + SDValue ValOp = GetWidenedVector(ST->getValue()); + DebugLoc dl = N->getDebugLoc(); + + MVT StVT = ST->getMemoryVT(); + MVT ValVT = ValOp.getValueType(); + // It must be true that we the widen vector type is bigger than where + // we need to store. + assert(StVT.isVector() && ValOp.getValueType().isVector()); + assert(StVT.bitsLT(ValOp.getValueType())); + + SmallVector StChain; + if (ST->isTruncatingStore()) { + // For truncating stores, we can not play the tricks of chopping legal + // vector types and bit cast it to the right type. Instead, we unroll + // the store. + MVT StEltVT = StVT.getVectorElementType(); + MVT ValEltVT = ValVT.getVectorElementType(); + unsigned Increment = ValEltVT.getSizeInBits() / 8; + unsigned NumElts = StVT.getVectorNumElements(); + SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp, + DAG.getIntPtrConstant(0)); + StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr, SV, + SVOffset, StEltVT, + isVolatile, Align)); + unsigned Offset = Increment; + for (unsigned i=1; i < NumElts; ++i, Offset += Increment) { + SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), + BasePtr, DAG.getIntPtrConstant(Offset)); + SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp, + DAG.getIntPtrConstant(0)); + StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, NewBasePtr, SV, + SVOffset + Offset, StEltVT, + isVolatile, MinAlign(Align, Offset))); + } + } + else { + assert(StVT.getVectorElementType() == ValVT.getVectorElementType()); + // Store value + GenWidenVectorStores(StChain, Chain, BasePtr, SV, SVOffset, + Align, isVolatile, ValOp, StVT.getSizeInBits(), dl); + } + if (StChain.size() == 1) + return StChain[0]; + else + return DAG.getNode(ISD::TokenFactor, dl, + MVT::Other,&StChain[0],StChain.size()); +} + +//===----------------------------------------------------------------------===// +// Vector Widening Utilities +//===----------------------------------------------------------------------===// + + +// Utility function to find a vector type and its associated element +// type from a preferred width and whose vector type must be the same size +// as the VecVT. +// TLI: Target lowering used to determine legal types. +// Width: Preferred width to store. +// VecVT: Vector value type whose size we must match. +// Returns NewVecVT and NewEltVT - the vector type and its associated +// element type. +static void FindAssocWidenVecType(const TargetLowering &TLI, unsigned Width, + MVT VecVT, + MVT& NewEltVT, MVT& NewVecVT) { + unsigned EltWidth = Width + 1; + if (TLI.isTypeLegal(VecVT)) { + // We start with the preferred with, making it a power of 2 and find a + // legal vector type of that width. If not, we reduce it by another of 2. + // For incoming type is legal, this process will end as a vector of the + // smallest loadable type should always be legal. + do { + assert(EltWidth > 0); + EltWidth = 1 << Log2_32(EltWidth - 1); + NewEltVT = MVT::getIntegerVT(EltWidth); + unsigned NumElts = VecVT.getSizeInBits() / EltWidth; + NewVecVT = MVT::getVectorVT(NewEltVT, NumElts); + } while (!TLI.isTypeLegal(NewVecVT) || + VecVT.getSizeInBits() != NewVecVT.getSizeInBits()); + } else { + // The incoming vector type is illegal and is the result of widening + // a vector to a power of 2. In this case, we will use the preferred + // with as long as it is a multiple of the incoming vector length. + // The legalization process will eventually make this into a legal type + // and remove the illegal bit converts (which would turn to stack converts + // if they are allow to exist). + do { + assert(EltWidth > 0); + EltWidth = 1 << Log2_32(EltWidth - 1); + NewEltVT = MVT::getIntegerVT(EltWidth); + unsigned NumElts = VecVT.getSizeInBits() / EltWidth; + NewVecVT = MVT::getVectorVT(NewEltVT, NumElts); + } while (!TLI.isTypeLegal(NewEltVT) || + VecVT.getSizeInBits() != NewVecVT.getSizeInBits()); + } +} + +SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector& LdChain, + SDValue Chain, + SDValue BasePtr, + const Value *SV, + int SVOffset, + unsigned Alignment, + bool isVolatile, + unsigned LdWidth, + MVT ResType, + DebugLoc dl) { + // The strategy assumes that we can efficiently load powers of two widths. + // The routines chops the vector into the largest power of 2 load and + // can be inserted into a legal vector and then cast the result into the + // vector type we want. This avoids unnecessary stack converts. + + // TODO: If the Ldwidth is legal, alignment is the same as the LdWidth, and + // the load is nonvolatile, we an use a wider load for the value. + + // Find the vector type that can load from. + MVT NewEltVT, NewVecVT; + unsigned NewEltVTWidth; + FindAssocWidenVecType(TLI, LdWidth, ResType, NewEltVT, NewVecVT); + NewEltVTWidth = NewEltVT.getSizeInBits(); + + SDValue LdOp = DAG.getLoad(NewEltVT, dl, Chain, BasePtr, SV, SVOffset, + isVolatile, Alignment); + SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp); + LdChain.push_back(LdOp.getValue(1)); + + // Check if we can load the element with one instruction + if (LdWidth == NewEltVTWidth) { + return DAG.getNode(ISD::BIT_CONVERT, dl, ResType, VecOp); + } + + unsigned Idx = 1; + LdWidth -= NewEltVTWidth; + unsigned Offset = 0; + + while (LdWidth > 0) { + unsigned Increment = NewEltVTWidth / 8; + Offset += Increment; + BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, + DAG.getIntPtrConstant(Increment)); + + if (LdWidth < NewEltVTWidth) { + // Our current type we are using is too large, use a smaller size by + // using a smaller power of 2 + unsigned oNewEltVTWidth = NewEltVTWidth; + FindAssocWidenVecType(TLI, LdWidth, ResType, NewEltVT, NewVecVT); + NewEltVTWidth = NewEltVT.getSizeInBits(); + // Readjust position and vector position based on new load type + Idx = Idx * (oNewEltVTWidth/NewEltVTWidth); + VecOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVecVT, VecOp); + } + + SDValue LdOp = DAG.getLoad(NewEltVT, dl, Chain, BasePtr, SV, + SVOffset+Offset, isVolatile, + MinAlign(Alignment, Offset)); + LdChain.push_back(LdOp.getValue(1)); + VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, VecOp, LdOp, + DAG.getIntPtrConstant(Idx++)); + + LdWidth -= NewEltVTWidth; + } + + return DAG.getNode(ISD::BIT_CONVERT, dl, ResType, VecOp); +} + +void DAGTypeLegalizer::GenWidenVectorStores(SmallVector& StChain, + SDValue Chain, + SDValue BasePtr, + const Value *SV, + int SVOffset, + unsigned Alignment, + bool isVolatile, + SDValue ValOp, + unsigned StWidth, + DebugLoc dl) { + // Breaks the stores into a series of power of 2 width stores. For any + // width, we convert the vector to the vector of element size that we + // want to store. This avoids requiring a stack convert. + + // Find a width of the element type we can store with + MVT WidenVT = ValOp.getValueType(); + MVT NewEltVT, NewVecVT; + + FindAssocWidenVecType(TLI, StWidth, WidenVT, NewEltVT, NewVecVT); + unsigned NewEltVTWidth = NewEltVT.getSizeInBits(); + + SDValue VecOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVecVT, ValOp); + SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewEltVT, VecOp, + DAG.getIntPtrConstant(0)); + SDValue StOp = DAG.getStore(Chain, dl, EOp, BasePtr, SV, SVOffset, + isVolatile, Alignment); + StChain.push_back(StOp); + + // Check if we are done + if (StWidth == NewEltVTWidth) { + return; + } + + unsigned Idx = 1; + StWidth -= NewEltVTWidth; + unsigned Offset = 0; + + while (StWidth > 0) { + unsigned Increment = NewEltVTWidth / 8; + Offset += Increment; + BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, + DAG.getIntPtrConstant(Increment)); + + if (StWidth < NewEltVTWidth) { + // Our current type we are using is too large, use a smaller size by + // using a smaller power of 2 + unsigned oNewEltVTWidth = NewEltVTWidth; + FindAssocWidenVecType(TLI, StWidth, WidenVT, NewEltVT, NewVecVT); + NewEltVTWidth = NewEltVT.getSizeInBits(); + // Readjust position and vector position based on new load type + Idx = Idx * (oNewEltVTWidth/NewEltVTWidth); + VecOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVecVT, VecOp); + } + + EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewEltVT, VecOp, + DAG.getIntPtrConstant(Idx++)); + StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, SV, + SVOffset + Offset, isVolatile, + MinAlign(Alignment, Offset))); + StWidth -= NewEltVTWidth; + } +} + +/// Modifies a vector input (widen or narrows) to a vector of NVT. The +/// input vector must have the same element type as NVT. +SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, MVT NVT) { + // Note that InOp might have been widened so it might already have + // the right width or it might need be narrowed. + MVT InVT = InOp.getValueType(); + assert(InVT.getVectorElementType() == NVT.getVectorElementType() && + "input and widen element type must match"); + DebugLoc dl = InOp.getDebugLoc(); + + // Check if InOp already has the right width. + if (InVT == NVT) + return InOp; + + unsigned InNumElts = InVT.getVectorNumElements(); + unsigned WidenNumElts = NVT.getVectorNumElements(); + if (WidenNumElts > InNumElts && WidenNumElts % InNumElts == 0) { + unsigned NumConcat = WidenNumElts / InNumElts; + SmallVector Ops(NumConcat); + SDValue UndefVal = DAG.getUNDEF(InVT); + Ops[0] = InOp; + for (unsigned i = 1; i != NumConcat; ++i) + Ops[i] = UndefVal; + + return DAG.getNode(ISD::CONCAT_VECTORS, dl, NVT, &Ops[0], NumConcat); + } + + if (WidenNumElts < InNumElts && InNumElts % WidenNumElts) + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT, InOp, + DAG.getIntPtrConstant(0)); + + // Fall back to extract and build. + SmallVector Ops(WidenNumElts); + MVT EltVT = NVT.getVectorElementType(); + unsigned MinNumElts = std::min(WidenNumElts, InNumElts); + unsigned Idx; + for (Idx = 0; Idx < MinNumElts; ++Idx) + Ops[Idx] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, + DAG.getIntPtrConstant(Idx)); + + SDValue UndefVal = DAG.getUNDEF(EltVT); + for ( ; Idx < WidenNumElts; ++Idx) + Ops[Idx] = UndefVal; + return DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, &Ops[0], WidenNumElts); +} diff --git a/lib/CodeGen/SelectionDAG/Makefile b/lib/CodeGen/SelectionDAG/Makefile new file mode 100644 index 000000000000..185222ade98e --- /dev/null +++ b/lib/CodeGen/SelectionDAG/Makefile @@ -0,0 +1,15 @@ +##===- lib/CodeGen/SelectionDAG/Makefile -------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../.. +LIBRARYNAME = LLVMSelectionDAG +PARALLEL_DIRS = +BUILD_ARCHIVE = 1 +DONT_BUILD_RELINKED = 1 + +include $(LEVEL)/Makefile.common diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp new file mode 100644 index 000000000000..af73b28fae93 --- /dev/null +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -0,0 +1,635 @@ +//===----- ScheduleDAGFast.cpp - Fast poor list scheduler -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements a fast scheduler. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "pre-RA-sched" +#include "ScheduleDAGSDNodes.h" +#include "llvm/CodeGen/SchedulerRegistry.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Compiler.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/CommandLine.h" +using namespace llvm; + +STATISTIC(NumUnfolds, "Number of nodes unfolded"); +STATISTIC(NumDups, "Number of duplicated nodes"); +STATISTIC(NumPRCopies, "Number of physical copies"); + +static RegisterScheduler + fastDAGScheduler("fast", "Fast suboptimal list scheduling", + createFastDAGScheduler); + +namespace { + /// FastPriorityQueue - A degenerate priority queue that considers + /// all nodes to have the same priority. + /// + struct VISIBILITY_HIDDEN FastPriorityQueue { + SmallVector Queue; + + bool empty() const { return Queue.empty(); } + + void push(SUnit *U) { + Queue.push_back(U); + } + + SUnit *pop() { + if (empty()) return NULL; + SUnit *V = Queue.back(); + Queue.pop_back(); + return V; + } + }; + +//===----------------------------------------------------------------------===// +/// ScheduleDAGFast - The actual "fast" list scheduler implementation. +/// +class VISIBILITY_HIDDEN ScheduleDAGFast : public ScheduleDAGSDNodes { +private: + /// AvailableQueue - The priority queue to use for the available SUnits. + FastPriorityQueue AvailableQueue; + + /// LiveRegDefs - A set of physical registers and their definition + /// that are "live". These nodes must be scheduled before any other nodes that + /// modifies the registers can be scheduled. + unsigned NumLiveRegs; + std::vector LiveRegDefs; + std::vector LiveRegCycles; + +public: + ScheduleDAGFast(MachineFunction &mf) + : ScheduleDAGSDNodes(mf) {} + + void Schedule(); + + /// AddPred - adds a predecessor edge to SUnit SU. + /// This returns true if this is a new predecessor. + void AddPred(SUnit *SU, const SDep &D) { + SU->addPred(D); + } + + /// RemovePred - removes a predecessor edge from SUnit SU. + /// This returns true if an edge was removed. + void RemovePred(SUnit *SU, const SDep &D) { + SU->removePred(D); + } + +private: + void ReleasePred(SUnit *SU, SDep *PredEdge); + void ReleasePredecessors(SUnit *SU, unsigned CurCycle); + void ScheduleNodeBottomUp(SUnit*, unsigned); + SUnit *CopyAndMoveSuccessors(SUnit*); + void InsertCopiesAndMoveSuccs(SUnit*, unsigned, + const TargetRegisterClass*, + const TargetRegisterClass*, + SmallVector&); + bool DelayForLiveRegsBottomUp(SUnit*, SmallVector&); + void ListScheduleBottomUp(); + + /// ForceUnitLatencies - The fast scheduler doesn't care about real latencies. + bool ForceUnitLatencies() const { return true; } +}; +} // end anonymous namespace + + +/// Schedule - Schedule the DAG using list scheduling. +void ScheduleDAGFast::Schedule() { + DOUT << "********** List Scheduling **********\n"; + + NumLiveRegs = 0; + LiveRegDefs.resize(TRI->getNumRegs(), NULL); + LiveRegCycles.resize(TRI->getNumRegs(), 0); + + // Build the scheduling graph. + BuildSchedGraph(); + + DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) + SUnits[su].dumpAll(this)); + + // Execute the actual scheduling loop. + ListScheduleBottomUp(); +} + +//===----------------------------------------------------------------------===// +// Bottom-Up Scheduling +//===----------------------------------------------------------------------===// + +/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to +/// the AvailableQueue if the count reaches zero. Also update its cycle bound. +void ScheduleDAGFast::ReleasePred(SUnit *SU, SDep *PredEdge) { + SUnit *PredSU = PredEdge->getSUnit(); + --PredSU->NumSuccsLeft; + +#ifndef NDEBUG + if (PredSU->NumSuccsLeft < 0) { + cerr << "*** Scheduling failed! ***\n"; + PredSU->dump(this); + cerr << " has been released too many times!\n"; + assert(0); + } +#endif + + // If all the node's successors are scheduled, this node is ready + // to be scheduled. Ignore the special EntrySU node. + if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) { + PredSU->isAvailable = true; + AvailableQueue.push(PredSU); + } +} + +void ScheduleDAGFast::ReleasePredecessors(SUnit *SU, unsigned CurCycle) { + // Bottom up: release predecessors + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + ReleasePred(SU, &*I); + if (I->isAssignedRegDep()) { + // This is a physical register dependency and it's impossible or + // expensive to copy the register. Make sure nothing that can + // clobber the register is scheduled between the predecessor and + // this node. + if (!LiveRegDefs[I->getReg()]) { + ++NumLiveRegs; + LiveRegDefs[I->getReg()] = I->getSUnit(); + LiveRegCycles[I->getReg()] = CurCycle; + } + } + } +} + +/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending +/// count of its predecessors. If a predecessor pending count is zero, add it to +/// the Available queue. +void ScheduleDAGFast::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { + DOUT << "*** Scheduling [" << CurCycle << "]: "; + DEBUG(SU->dump(this)); + + assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!"); + SU->setHeightToAtLeast(CurCycle); + Sequence.push_back(SU); + + ReleasePredecessors(SU, CurCycle); + + // Release all the implicit physical register defs that are live. + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (I->isAssignedRegDep()) { + if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) { + assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); + assert(LiveRegDefs[I->getReg()] == SU && + "Physical register dependency violated?"); + --NumLiveRegs; + LiveRegDefs[I->getReg()] = NULL; + LiveRegCycles[I->getReg()] = 0; + } + } + } + + SU->isScheduled = true; +} + +/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled +/// successors to the newly created node. +SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { + if (SU->getNode()->getFlaggedNode()) + return NULL; + + SDNode *N = SU->getNode(); + if (!N) + return NULL; + + SUnit *NewSU; + bool TryUnfold = false; + for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { + MVT VT = N->getValueType(i); + if (VT == MVT::Flag) + return NULL; + else if (VT == MVT::Other) + TryUnfold = true; + } + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + const SDValue &Op = N->getOperand(i); + MVT VT = Op.getNode()->getValueType(Op.getResNo()); + if (VT == MVT::Flag) + return NULL; + } + + if (TryUnfold) { + SmallVector NewNodes; + if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes)) + return NULL; + + DOUT << "Unfolding SU # " << SU->NodeNum << "\n"; + assert(NewNodes.size() == 2 && "Expected a load folding node!"); + + N = NewNodes[1]; + SDNode *LoadNode = NewNodes[0]; + unsigned NumVals = N->getNumValues(); + unsigned OldNumVals = SU->getNode()->getNumValues(); + for (unsigned i = 0; i != NumVals; ++i) + DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i)); + DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1), + SDValue(LoadNode, 1)); + + SUnit *NewSU = NewSUnit(N); + assert(N->getNodeId() == -1 && "Node already inserted!"); + N->setNodeId(NewSU->NodeNum); + + const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); + for (unsigned i = 0; i != TID.getNumOperands(); ++i) { + if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) { + NewSU->isTwoAddress = true; + break; + } + } + if (TID.isCommutable()) + NewSU->isCommutable = true; + + // LoadNode may already exist. This can happen when there is another + // load from the same location and producing the same type of value + // but it has different alignment or volatileness. + bool isNewLoad = true; + SUnit *LoadSU; + if (LoadNode->getNodeId() != -1) { + LoadSU = &SUnits[LoadNode->getNodeId()]; + isNewLoad = false; + } else { + LoadSU = NewSUnit(LoadNode); + LoadNode->setNodeId(LoadSU->NodeNum); + } + + SDep ChainPred; + SmallVector ChainSuccs; + SmallVector LoadPreds; + SmallVector NodePreds; + SmallVector NodeSuccs; + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) + ChainPred = *I; + else if (I->getSUnit()->getNode() && + I->getSUnit()->getNode()->isOperandOf(LoadNode)) + LoadPreds.push_back(*I); + else + NodePreds.push_back(*I); + } + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (I->isCtrl()) + ChainSuccs.push_back(*I); + else + NodeSuccs.push_back(*I); + } + + if (ChainPred.getSUnit()) { + RemovePred(SU, ChainPred); + if (isNewLoad) + AddPred(LoadSU, ChainPred); + } + for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) { + const SDep &Pred = LoadPreds[i]; + RemovePred(SU, Pred); + if (isNewLoad) { + AddPred(LoadSU, Pred); + } + } + for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) { + const SDep &Pred = NodePreds[i]; + RemovePred(SU, Pred); + AddPred(NewSU, Pred); + } + for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) { + SDep D = NodeSuccs[i]; + SUnit *SuccDep = D.getSUnit(); + D.setSUnit(SU); + RemovePred(SuccDep, D); + D.setSUnit(NewSU); + AddPred(SuccDep, D); + } + for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) { + SDep D = ChainSuccs[i]; + SUnit *SuccDep = D.getSUnit(); + D.setSUnit(SU); + RemovePred(SuccDep, D); + if (isNewLoad) { + D.setSUnit(LoadSU); + AddPred(SuccDep, D); + } + } + if (isNewLoad) { + AddPred(NewSU, SDep(LoadSU, SDep::Order, LoadSU->Latency)); + } + + ++NumUnfolds; + + if (NewSU->NumSuccsLeft == 0) { + NewSU->isAvailable = true; + return NewSU; + } + SU = NewSU; + } + + DOUT << "Duplicating SU # " << SU->NodeNum << "\n"; + NewSU = Clone(SU); + + // New SUnit has the exact same predecessors. + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) + if (!I->isArtificial()) + AddPred(NewSU, *I); + + // Only copy scheduled successors. Cut them from old node's successor + // list and move them over. + SmallVector, 4> DelDeps; + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (I->isArtificial()) + continue; + SUnit *SuccSU = I->getSUnit(); + if (SuccSU->isScheduled) { + SDep D = *I; + D.setSUnit(NewSU); + AddPred(SuccSU, D); + D.setSUnit(SU); + DelDeps.push_back(std::make_pair(SuccSU, D)); + } + } + for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) + RemovePred(DelDeps[i].first, DelDeps[i].second); + + ++NumDups; + return NewSU; +} + +/// InsertCopiesAndMoveSuccs - Insert register copies and move all +/// scheduled successors of the given SUnit to the last copy. +void ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, + const TargetRegisterClass *DestRC, + const TargetRegisterClass *SrcRC, + SmallVector &Copies) { + SUnit *CopyFromSU = NewSUnit(static_cast(NULL)); + CopyFromSU->CopySrcRC = SrcRC; + CopyFromSU->CopyDstRC = DestRC; + + SUnit *CopyToSU = NewSUnit(static_cast(NULL)); + CopyToSU->CopySrcRC = DestRC; + CopyToSU->CopyDstRC = SrcRC; + + // Only copy scheduled successors. Cut them from old node's successor + // list and move them over. + SmallVector, 4> DelDeps; + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (I->isArtificial()) + continue; + SUnit *SuccSU = I->getSUnit(); + if (SuccSU->isScheduled) { + SDep D = *I; + D.setSUnit(CopyToSU); + AddPred(SuccSU, D); + DelDeps.push_back(std::make_pair(SuccSU, *I)); + } + } + for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) { + RemovePred(DelDeps[i].first, DelDeps[i].second); + } + + AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg)); + AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0)); + + Copies.push_back(CopyFromSU); + Copies.push_back(CopyToSU); + + ++NumPRCopies; +} + +/// getPhysicalRegisterVT - Returns the ValueType of the physical register +/// definition of the specified node. +/// FIXME: Move to SelectionDAG? +static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, + const TargetInstrInfo *TII) { + const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); + assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!"); + unsigned NumRes = TID.getNumDefs(); + for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) { + if (Reg == *ImpDef) + break; + ++NumRes; + } + return N->getValueType(NumRes); +} + +/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay +/// scheduling of the given node to satisfy live physical register dependencies. +/// If the specific node is the last one that's available to schedule, do +/// whatever is necessary (i.e. backtracking or cloning) to make it possible. +bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU, + SmallVector &LRegs){ + if (NumLiveRegs == 0) + return false; + + SmallSet RegAdded; + // If this node would clobber any "live" register, then it's not ready. + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isAssignedRegDep()) { + unsigned Reg = I->getReg(); + if (LiveRegDefs[Reg] && LiveRegDefs[Reg] != I->getSUnit()) { + if (RegAdded.insert(Reg)) + LRegs.push_back(Reg); + } + for (const unsigned *Alias = TRI->getAliasSet(Reg); + *Alias; ++Alias) + if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != I->getSUnit()) { + if (RegAdded.insert(*Alias)) + LRegs.push_back(*Alias); + } + } + } + + for (SDNode *Node = SU->getNode(); Node; Node = Node->getFlaggedNode()) { + if (!Node->isMachineOpcode()) + continue; + const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode()); + if (!TID.ImplicitDefs) + continue; + for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg) { + if (LiveRegDefs[*Reg] && LiveRegDefs[*Reg] != SU) { + if (RegAdded.insert(*Reg)) + LRegs.push_back(*Reg); + } + for (const unsigned *Alias = TRI->getAliasSet(*Reg); + *Alias; ++Alias) + if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) { + if (RegAdded.insert(*Alias)) + LRegs.push_back(*Alias); + } + } + } + return !LRegs.empty(); +} + + +/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up +/// schedulers. +void ScheduleDAGFast::ListScheduleBottomUp() { + unsigned CurCycle = 0; + + // Release any predecessors of the special Exit node. + ReleasePredecessors(&ExitSU, CurCycle); + + // Add root to Available queue. + if (!SUnits.empty()) { + SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()]; + assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!"); + RootSU->isAvailable = true; + AvailableQueue.push(RootSU); + } + + // While Available queue is not empty, grab the node with the highest + // priority. If it is not ready put it back. Schedule the node. + SmallVector NotReady; + DenseMap > LRegsMap; + Sequence.reserve(SUnits.size()); + while (!AvailableQueue.empty()) { + bool Delayed = false; + LRegsMap.clear(); + SUnit *CurSU = AvailableQueue.pop(); + while (CurSU) { + SmallVector LRegs; + if (!DelayForLiveRegsBottomUp(CurSU, LRegs)) + break; + Delayed = true; + LRegsMap.insert(std::make_pair(CurSU, LRegs)); + + CurSU->isPending = true; // This SU is not in AvailableQueue right now. + NotReady.push_back(CurSU); + CurSU = AvailableQueue.pop(); + } + + // All candidates are delayed due to live physical reg dependencies. + // Try code duplication or inserting cross class copies + // to resolve it. + if (Delayed && !CurSU) { + if (!CurSU) { + // Try duplicating the nodes that produces these + // "expensive to copy" values to break the dependency. In case even + // that doesn't work, insert cross class copies. + SUnit *TrySU = NotReady[0]; + SmallVector &LRegs = LRegsMap[TrySU]; + assert(LRegs.size() == 1 && "Can't handle this yet!"); + unsigned Reg = LRegs[0]; + SUnit *LRDef = LiveRegDefs[Reg]; + MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); + const TargetRegisterClass *RC = + TRI->getPhysicalRegisterRegClass(Reg, VT); + const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); + + // If cross copy register class is null, then it must be possible copy + // the value directly. Do not try duplicate the def. + SUnit *NewDef = 0; + if (DestRC) + NewDef = CopyAndMoveSuccessors(LRDef); + else + DestRC = RC; + if (!NewDef) { + // Issue copies, these can be expensive cross register class copies. + SmallVector Copies; + InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); + DOUT << "Adding an edge from SU # " << TrySU->NodeNum + << " to SU #" << Copies.front()->NodeNum << "\n"; + AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1, + /*Reg=*/0, /*isNormalMemory=*/false, + /*isMustAlias=*/false, /*isArtificial=*/true)); + NewDef = Copies.back(); + } + + DOUT << "Adding an edge from SU # " << NewDef->NodeNum + << " to SU #" << TrySU->NodeNum << "\n"; + LiveRegDefs[Reg] = NewDef; + AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1, + /*Reg=*/0, /*isNormalMemory=*/false, + /*isMustAlias=*/false, /*isArtificial=*/true)); + TrySU->isAvailable = false; + CurSU = NewDef; + } + + if (!CurSU) { + assert(false && "Unable to resolve live physical register dependencies!"); + abort(); + } + } + + // Add the nodes that aren't ready back onto the available list. + for (unsigned i = 0, e = NotReady.size(); i != e; ++i) { + NotReady[i]->isPending = false; + // May no longer be available due to backtracking. + if (NotReady[i]->isAvailable) + AvailableQueue.push(NotReady[i]); + } + NotReady.clear(); + + if (CurSU) + ScheduleNodeBottomUp(CurSU, CurCycle); + ++CurCycle; + } + + // Reverse the order if it is bottom up. + std::reverse(Sequence.begin(), Sequence.end()); + + +#ifndef NDEBUG + // Verify that all SUnits were scheduled. + bool AnyNotSched = false; + unsigned DeadNodes = 0; + unsigned Noops = 0; + for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { + if (!SUnits[i].isScheduled) { + if (SUnits[i].NumPreds == 0 && SUnits[i].NumSuccs == 0) { + ++DeadNodes; + continue; + } + if (!AnyNotSched) + cerr << "*** List scheduling failed! ***\n"; + SUnits[i].dump(this); + cerr << "has not been scheduled!\n"; + AnyNotSched = true; + } + if (SUnits[i].NumSuccsLeft != 0) { + if (!AnyNotSched) + cerr << "*** List scheduling failed! ***\n"; + SUnits[i].dump(this); + cerr << "has successors left!\n"; + AnyNotSched = true; + } + } + for (unsigned i = 0, e = Sequence.size(); i != e; ++i) + if (!Sequence[i]) + ++Noops; + assert(!AnyNotSched); + assert(Sequence.size() + DeadNodes - Noops == SUnits.size() && + "The number of nodes scheduled doesn't match the expected number!"); +#endif +} + +//===----------------------------------------------------------------------===// +// Public Constructor Functions +//===----------------------------------------------------------------------===// + +llvm::ScheduleDAGSDNodes * +llvm::createFastDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { + return new ScheduleDAGFast(*IS->MF); +} diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp new file mode 100644 index 000000000000..c4325349990d --- /dev/null +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp @@ -0,0 +1,268 @@ +//===---- ScheduleDAGList.cpp - Implement a list scheduler for isel DAG ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements a top-down list scheduler, using standard algorithms. +// The basic approach uses a priority queue of available nodes to schedule. +// One at a time, nodes are taken from the priority queue (thus in priority +// order), checked for legality to schedule, and emitted if legal. +// +// Nodes may not be legal to schedule either due to structural hazards (e.g. +// pipeline or resource constraints) or because an input to the instruction has +// not completed execution. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "pre-RA-sched" +#include "ScheduleDAGSDNodes.h" +#include "llvm/CodeGen/LatencyPriorityQueue.h" +#include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/CodeGen/SchedulerRegistry.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Compiler.h" +#include "llvm/ADT/PriorityQueue.h" +#include "llvm/ADT/Statistic.h" +#include +using namespace llvm; + +STATISTIC(NumNoops , "Number of noops inserted"); +STATISTIC(NumStalls, "Number of pipeline stalls"); + +static RegisterScheduler + tdListDAGScheduler("list-td", "Top-down list scheduler", + createTDListDAGScheduler); + +namespace { +//===----------------------------------------------------------------------===// +/// ScheduleDAGList - The actual list scheduler implementation. This supports +/// top-down scheduling. +/// +class VISIBILITY_HIDDEN ScheduleDAGList : public ScheduleDAGSDNodes { +private: + /// AvailableQueue - The priority queue to use for the available SUnits. + /// + SchedulingPriorityQueue *AvailableQueue; + + /// PendingQueue - This contains all of the instructions whose operands have + /// been issued, but their results are not ready yet (due to the latency of + /// the operation). Once the operands become available, the instruction is + /// added to the AvailableQueue. + std::vector PendingQueue; + + /// HazardRec - The hazard recognizer to use. + ScheduleHazardRecognizer *HazardRec; + +public: + ScheduleDAGList(MachineFunction &mf, + SchedulingPriorityQueue *availqueue, + ScheduleHazardRecognizer *HR) + : ScheduleDAGSDNodes(mf), + AvailableQueue(availqueue), HazardRec(HR) { + } + + ~ScheduleDAGList() { + delete HazardRec; + delete AvailableQueue; + } + + void Schedule(); + +private: + void ReleaseSucc(SUnit *SU, const SDep &D); + void ReleaseSuccessors(SUnit *SU); + void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle); + void ListScheduleTopDown(); +}; +} // end anonymous namespace + +/// Schedule - Schedule the DAG using list scheduling. +void ScheduleDAGList::Schedule() { + DOUT << "********** List Scheduling **********\n"; + + // Build the scheduling graph. + BuildSchedGraph(); + + AvailableQueue->initNodes(SUnits); + + ListScheduleTopDown(); + + AvailableQueue->releaseState(); +} + +//===----------------------------------------------------------------------===// +// Top-Down Scheduling +//===----------------------------------------------------------------------===// + +/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to +/// the PendingQueue if the count reaches zero. Also update its cycle bound. +void ScheduleDAGList::ReleaseSucc(SUnit *SU, const SDep &D) { + SUnit *SuccSU = D.getSUnit(); + --SuccSU->NumPredsLeft; + +#ifndef NDEBUG + if (SuccSU->NumPredsLeft < 0) { + cerr << "*** Scheduling failed! ***\n"; + SuccSU->dump(this); + cerr << " has been released too many times!\n"; + assert(0); + } +#endif + + SuccSU->setDepthToAtLeast(SU->getDepth() + D.getLatency()); + + // If all the node's predecessors are scheduled, this node is ready + // to be scheduled. Ignore the special ExitSU node. + if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) + PendingQueue.push_back(SuccSU); +} + +void ScheduleDAGList::ReleaseSuccessors(SUnit *SU) { + // Top down: release successors. + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + assert(!I->isAssignedRegDep() && + "The list-td scheduler doesn't yet support physreg dependencies!"); + + ReleaseSucc(SU, *I); + } +} + +/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending +/// count of its successors. If a successor pending count is zero, add it to +/// the Available queue. +void ScheduleDAGList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { + DOUT << "*** Scheduling [" << CurCycle << "]: "; + DEBUG(SU->dump(this)); + + Sequence.push_back(SU); + assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!"); + SU->setDepthToAtLeast(CurCycle); + + ReleaseSuccessors(SU); + SU->isScheduled = true; + AvailableQueue->ScheduledNode(SU); +} + +/// ListScheduleTopDown - The main loop of list scheduling for top-down +/// schedulers. +void ScheduleDAGList::ListScheduleTopDown() { + unsigned CurCycle = 0; + + // Release any successors of the special Entry node. + ReleaseSuccessors(&EntrySU); + + // All leaves to Available queue. + for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { + // It is available if it has no predecessors. + if (SUnits[i].Preds.empty()) { + AvailableQueue->push(&SUnits[i]); + SUnits[i].isAvailable = true; + } + } + + // While Available queue is not empty, grab the node with the highest + // priority. If it is not ready put it back. Schedule the node. + std::vector NotReady; + Sequence.reserve(SUnits.size()); + while (!AvailableQueue->empty() || !PendingQueue.empty()) { + // Check to see if any of the pending instructions are ready to issue. If + // so, add them to the available queue. + for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) { + if (PendingQueue[i]->getDepth() == CurCycle) { + AvailableQueue->push(PendingQueue[i]); + PendingQueue[i]->isAvailable = true; + PendingQueue[i] = PendingQueue.back(); + PendingQueue.pop_back(); + --i; --e; + } else { + assert(PendingQueue[i]->getDepth() > CurCycle && "Negative latency?"); + } + } + + // If there are no instructions available, don't try to issue anything, and + // don't advance the hazard recognizer. + if (AvailableQueue->empty()) { + ++CurCycle; + continue; + } + + SUnit *FoundSUnit = 0; + + bool HasNoopHazards = false; + while (!AvailableQueue->empty()) { + SUnit *CurSUnit = AvailableQueue->pop(); + + ScheduleHazardRecognizer::HazardType HT = + HazardRec->getHazardType(CurSUnit); + if (HT == ScheduleHazardRecognizer::NoHazard) { + FoundSUnit = CurSUnit; + break; + } + + // Remember if this is a noop hazard. + HasNoopHazards |= HT == ScheduleHazardRecognizer::NoopHazard; + + NotReady.push_back(CurSUnit); + } + + // Add the nodes that aren't ready back onto the available list. + if (!NotReady.empty()) { + AvailableQueue->push_all(NotReady); + NotReady.clear(); + } + + // If we found a node to schedule, do it now. + if (FoundSUnit) { + ScheduleNodeTopDown(FoundSUnit, CurCycle); + HazardRec->EmitInstruction(FoundSUnit); + + // If this is a pseudo-op node, we don't want to increment the current + // cycle. + if (FoundSUnit->Latency) // Don't increment CurCycle for pseudo-ops! + ++CurCycle; + } else if (!HasNoopHazards) { + // Otherwise, we have a pipeline stall, but no other problem, just advance + // the current cycle and try again. + DOUT << "*** Advancing cycle, no work to do\n"; + HazardRec->AdvanceCycle(); + ++NumStalls; + ++CurCycle; + } else { + // Otherwise, we have no instructions to issue and we have instructions + // that will fault if we don't do this right. This is the case for + // processors without pipeline interlocks and other cases. + DOUT << "*** Emitting noop\n"; + HazardRec->EmitNoop(); + Sequence.push_back(0); // NULL here means noop + ++NumNoops; + ++CurCycle; + } + } + +#ifndef NDEBUG + VerifySchedule(/*isBottomUp=*/false); +#endif +} + +//===----------------------------------------------------------------------===// +// Public Constructor Functions +//===----------------------------------------------------------------------===// + +/// createTDListDAGScheduler - This creates a top-down list scheduler with a +/// new hazard recognizer. This scheduler takes ownership of the hazard +/// recognizer and deletes it when done. +ScheduleDAGSDNodes * +llvm::createTDListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { + return new ScheduleDAGList(*IS->MF, + new LatencyPriorityQueue(), + IS->CreateTargetHazardRecognizer()); +} diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp new file mode 100644 index 000000000000..c97e2a8c86bf --- /dev/null +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -0,0 +1,1533 @@ +//===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements bottom-up and top-down register pressure reduction list +// schedulers, using standard algorithms. The basic approach uses a priority +// queue of available nodes to schedule. One at a time, nodes are taken from +// the priority queue (thus in priority order), checked for legality to +// schedule, and emitted if legal. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "pre-RA-sched" +#include "ScheduleDAGSDNodes.h" +#include "llvm/CodeGen/SchedulerRegistry.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Compiler.h" +#include "llvm/ADT/PriorityQueue.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +#include +using namespace llvm; + +STATISTIC(NumBacktracks, "Number of times scheduler backtracked"); +STATISTIC(NumUnfolds, "Number of nodes unfolded"); +STATISTIC(NumDups, "Number of duplicated nodes"); +STATISTIC(NumPRCopies, "Number of physical register copies"); + +static RegisterScheduler + burrListDAGScheduler("list-burr", + "Bottom-up register reduction list scheduling", + createBURRListDAGScheduler); +static RegisterScheduler + tdrListrDAGScheduler("list-tdrr", + "Top-down register reduction list scheduling", + createTDRRListDAGScheduler); + +namespace { +//===----------------------------------------------------------------------===// +/// ScheduleDAGRRList - The actual register reduction list scheduler +/// implementation. This supports both top-down and bottom-up scheduling. +/// +class VISIBILITY_HIDDEN ScheduleDAGRRList : public ScheduleDAGSDNodes { +private: + /// isBottomUp - This is true if the scheduling problem is bottom-up, false if + /// it is top-down. + bool isBottomUp; + + /// AvailableQueue - The priority queue to use for the available SUnits. + SchedulingPriorityQueue *AvailableQueue; + + /// LiveRegDefs - A set of physical registers and their definition + /// that are "live". These nodes must be scheduled before any other nodes that + /// modifies the registers can be scheduled. + unsigned NumLiveRegs; + std::vector LiveRegDefs; + std::vector LiveRegCycles; + + /// Topo - A topological ordering for SUnits which permits fast IsReachable + /// and similar queries. + ScheduleDAGTopologicalSort Topo; + +public: + ScheduleDAGRRList(MachineFunction &mf, + bool isbottomup, + SchedulingPriorityQueue *availqueue) + : ScheduleDAGSDNodes(mf), isBottomUp(isbottomup), + AvailableQueue(availqueue), Topo(SUnits) { + } + + ~ScheduleDAGRRList() { + delete AvailableQueue; + } + + void Schedule(); + + /// IsReachable - Checks if SU is reachable from TargetSU. + bool IsReachable(const SUnit *SU, const SUnit *TargetSU) { + return Topo.IsReachable(SU, TargetSU); + } + + /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will + /// create a cycle. + bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) { + return Topo.WillCreateCycle(SU, TargetSU); + } + + /// AddPred - adds a predecessor edge to SUnit SU. + /// This returns true if this is a new predecessor. + /// Updates the topological ordering if required. + void AddPred(SUnit *SU, const SDep &D) { + Topo.AddPred(SU, D.getSUnit()); + SU->addPred(D); + } + + /// RemovePred - removes a predecessor edge from SUnit SU. + /// This returns true if an edge was removed. + /// Updates the topological ordering if required. + void RemovePred(SUnit *SU, const SDep &D) { + Topo.RemovePred(SU, D.getSUnit()); + SU->removePred(D); + } + +private: + void ReleasePred(SUnit *SU, const SDep *PredEdge); + void ReleasePredecessors(SUnit *SU, unsigned CurCycle); + void ReleaseSucc(SUnit *SU, const SDep *SuccEdge); + void ReleaseSuccessors(SUnit *SU); + void CapturePred(SDep *PredEdge); + void ScheduleNodeBottomUp(SUnit*, unsigned); + void ScheduleNodeTopDown(SUnit*, unsigned); + void UnscheduleNodeBottomUp(SUnit*); + void BacktrackBottomUp(SUnit*, unsigned, unsigned&); + SUnit *CopyAndMoveSuccessors(SUnit*); + void InsertCopiesAndMoveSuccs(SUnit*, unsigned, + const TargetRegisterClass*, + const TargetRegisterClass*, + SmallVector&); + bool DelayForLiveRegsBottomUp(SUnit*, SmallVector&); + void ListScheduleTopDown(); + void ListScheduleBottomUp(); + + + /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it. + /// Updates the topological ordering if required. + SUnit *CreateNewSUnit(SDNode *N) { + unsigned NumSUnits = SUnits.size(); + SUnit *NewNode = NewSUnit(N); + // Update the topological ordering. + if (NewNode->NodeNum >= NumSUnits) + Topo.InitDAGTopologicalSorting(); + return NewNode; + } + + /// CreateClone - Creates a new SUnit from an existing one. + /// Updates the topological ordering if required. + SUnit *CreateClone(SUnit *N) { + unsigned NumSUnits = SUnits.size(); + SUnit *NewNode = Clone(N); + // Update the topological ordering. + if (NewNode->NodeNum >= NumSUnits) + Topo.InitDAGTopologicalSorting(); + return NewNode; + } + + /// ForceUnitLatencies - Return true, since register-pressure-reducing + /// scheduling doesn't need actual latency information. + bool ForceUnitLatencies() const { return true; } +}; +} // end anonymous namespace + + +/// Schedule - Schedule the DAG using list scheduling. +void ScheduleDAGRRList::Schedule() { + DOUT << "********** List Scheduling **********\n"; + + NumLiveRegs = 0; + LiveRegDefs.resize(TRI->getNumRegs(), NULL); + LiveRegCycles.resize(TRI->getNumRegs(), 0); + + // Build the scheduling graph. + BuildSchedGraph(); + + DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) + SUnits[su].dumpAll(this)); + Topo.InitDAGTopologicalSorting(); + + AvailableQueue->initNodes(SUnits); + + // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate. + if (isBottomUp) + ListScheduleBottomUp(); + else + ListScheduleTopDown(); + + AvailableQueue->releaseState(); +} + +//===----------------------------------------------------------------------===// +// Bottom-Up Scheduling +//===----------------------------------------------------------------------===// + +/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to +/// the AvailableQueue if the count reaches zero. Also update its cycle bound. +void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) { + SUnit *PredSU = PredEdge->getSUnit(); + --PredSU->NumSuccsLeft; + +#ifndef NDEBUG + if (PredSU->NumSuccsLeft < 0) { + cerr << "*** Scheduling failed! ***\n"; + PredSU->dump(this); + cerr << " has been released too many times!\n"; + assert(0); + } +#endif + + // If all the node's successors are scheduled, this node is ready + // to be scheduled. Ignore the special EntrySU node. + if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) { + PredSU->isAvailable = true; + AvailableQueue->push(PredSU); + } +} + +void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU, unsigned CurCycle) { + // Bottom up: release predecessors + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + ReleasePred(SU, &*I); + if (I->isAssignedRegDep()) { + // This is a physical register dependency and it's impossible or + // expensive to copy the register. Make sure nothing that can + // clobber the register is scheduled between the predecessor and + // this node. + if (!LiveRegDefs[I->getReg()]) { + ++NumLiveRegs; + LiveRegDefs[I->getReg()] = I->getSUnit(); + LiveRegCycles[I->getReg()] = CurCycle; + } + } + } +} + +/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending +/// count of its predecessors. If a predecessor pending count is zero, add it to +/// the Available queue. +void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { + DOUT << "*** Scheduling [" << CurCycle << "]: "; + DEBUG(SU->dump(this)); + + assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!"); + SU->setHeightToAtLeast(CurCycle); + Sequence.push_back(SU); + + ReleasePredecessors(SU, CurCycle); + + // Release all the implicit physical register defs that are live. + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (I->isAssignedRegDep()) { + if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) { + assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); + assert(LiveRegDefs[I->getReg()] == SU && + "Physical register dependency violated?"); + --NumLiveRegs; + LiveRegDefs[I->getReg()] = NULL; + LiveRegCycles[I->getReg()] = 0; + } + } + } + + SU->isScheduled = true; + AvailableQueue->ScheduledNode(SU); +} + +/// CapturePred - This does the opposite of ReleasePred. Since SU is being +/// unscheduled, incrcease the succ left count of its predecessors. Remove +/// them from AvailableQueue if necessary. +void ScheduleDAGRRList::CapturePred(SDep *PredEdge) { + SUnit *PredSU = PredEdge->getSUnit(); + if (PredSU->isAvailable) { + PredSU->isAvailable = false; + if (!PredSU->isPending) + AvailableQueue->remove(PredSU); + } + + ++PredSU->NumSuccsLeft; +} + +/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and +/// its predecessor states to reflect the change. +void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { + DOUT << "*** Unscheduling [" << SU->getHeight() << "]: "; + DEBUG(SU->dump(this)); + + AvailableQueue->UnscheduledNode(SU); + + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + CapturePred(&*I); + if (I->isAssignedRegDep() && SU->getHeight() == LiveRegCycles[I->getReg()]) { + assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); + assert(LiveRegDefs[I->getReg()] == I->getSUnit() && + "Physical register dependency violated?"); + --NumLiveRegs; + LiveRegDefs[I->getReg()] = NULL; + LiveRegCycles[I->getReg()] = 0; + } + } + + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (I->isAssignedRegDep()) { + if (!LiveRegDefs[I->getReg()]) { + LiveRegDefs[I->getReg()] = SU; + ++NumLiveRegs; + } + if (I->getSUnit()->getHeight() < LiveRegCycles[I->getReg()]) + LiveRegCycles[I->getReg()] = I->getSUnit()->getHeight(); + } + } + + SU->setHeightDirty(); + SU->isScheduled = false; + SU->isAvailable = true; + AvailableQueue->push(SU); +} + +/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in +/// BTCycle in order to schedule a specific node. +void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, unsigned BtCycle, + unsigned &CurCycle) { + SUnit *OldSU = NULL; + while (CurCycle > BtCycle) { + OldSU = Sequence.back(); + Sequence.pop_back(); + if (SU->isSucc(OldSU)) + // Don't try to remove SU from AvailableQueue. + SU->isAvailable = false; + UnscheduleNodeBottomUp(OldSU); + --CurCycle; + } + + assert(!SU->isSucc(OldSU) && "Something is wrong!"); + + ++NumBacktracks; +} + +/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled +/// successors to the newly created node. +SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { + if (SU->getNode()->getFlaggedNode()) + return NULL; + + SDNode *N = SU->getNode(); + if (!N) + return NULL; + + SUnit *NewSU; + bool TryUnfold = false; + for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { + MVT VT = N->getValueType(i); + if (VT == MVT::Flag) + return NULL; + else if (VT == MVT::Other) + TryUnfold = true; + } + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + const SDValue &Op = N->getOperand(i); + MVT VT = Op.getNode()->getValueType(Op.getResNo()); + if (VT == MVT::Flag) + return NULL; + } + + if (TryUnfold) { + SmallVector NewNodes; + if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes)) + return NULL; + + DOUT << "Unfolding SU # " << SU->NodeNum << "\n"; + assert(NewNodes.size() == 2 && "Expected a load folding node!"); + + N = NewNodes[1]; + SDNode *LoadNode = NewNodes[0]; + unsigned NumVals = N->getNumValues(); + unsigned OldNumVals = SU->getNode()->getNumValues(); + for (unsigned i = 0; i != NumVals; ++i) + DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i)); + DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1), + SDValue(LoadNode, 1)); + + // LoadNode may already exist. This can happen when there is another + // load from the same location and producing the same type of value + // but it has different alignment or volatileness. + bool isNewLoad = true; + SUnit *LoadSU; + if (LoadNode->getNodeId() != -1) { + LoadSU = &SUnits[LoadNode->getNodeId()]; + isNewLoad = false; + } else { + LoadSU = CreateNewSUnit(LoadNode); + LoadNode->setNodeId(LoadSU->NodeNum); + ComputeLatency(LoadSU); + } + + SUnit *NewSU = CreateNewSUnit(N); + assert(N->getNodeId() == -1 && "Node already inserted!"); + N->setNodeId(NewSU->NodeNum); + + const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); + for (unsigned i = 0; i != TID.getNumOperands(); ++i) { + if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) { + NewSU->isTwoAddress = true; + break; + } + } + if (TID.isCommutable()) + NewSU->isCommutable = true; + ComputeLatency(NewSU); + + // Record all the edges to and from the old SU, by category. + SmallVector ChainPreds; + SmallVector ChainSuccs; + SmallVector LoadPreds; + SmallVector NodePreds; + SmallVector NodeSuccs; + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) + ChainPreds.push_back(*I); + else if (I->getSUnit()->getNode() && + I->getSUnit()->getNode()->isOperandOf(LoadNode)) + LoadPreds.push_back(*I); + else + NodePreds.push_back(*I); + } + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (I->isCtrl()) + ChainSuccs.push_back(*I); + else + NodeSuccs.push_back(*I); + } + + // Now assign edges to the newly-created nodes. + for (unsigned i = 0, e = ChainPreds.size(); i != e; ++i) { + const SDep &Pred = ChainPreds[i]; + RemovePred(SU, Pred); + if (isNewLoad) + AddPred(LoadSU, Pred); + } + for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) { + const SDep &Pred = LoadPreds[i]; + RemovePred(SU, Pred); + if (isNewLoad) + AddPred(LoadSU, Pred); + } + for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) { + const SDep &Pred = NodePreds[i]; + RemovePred(SU, Pred); + AddPred(NewSU, Pred); + } + for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) { + SDep D = NodeSuccs[i]; + SUnit *SuccDep = D.getSUnit(); + D.setSUnit(SU); + RemovePred(SuccDep, D); + D.setSUnit(NewSU); + AddPred(SuccDep, D); + } + for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) { + SDep D = ChainSuccs[i]; + SUnit *SuccDep = D.getSUnit(); + D.setSUnit(SU); + RemovePred(SuccDep, D); + if (isNewLoad) { + D.setSUnit(LoadSU); + AddPred(SuccDep, D); + } + } + + // Add a data dependency to reflect that NewSU reads the value defined + // by LoadSU. + AddPred(NewSU, SDep(LoadSU, SDep::Data, LoadSU->Latency)); + + if (isNewLoad) + AvailableQueue->addNode(LoadSU); + AvailableQueue->addNode(NewSU); + + ++NumUnfolds; + + if (NewSU->NumSuccsLeft == 0) { + NewSU->isAvailable = true; + return NewSU; + } + SU = NewSU; + } + + DOUT << "Duplicating SU # " << SU->NodeNum << "\n"; + NewSU = CreateClone(SU); + + // New SUnit has the exact same predecessors. + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) + if (!I->isArtificial()) + AddPred(NewSU, *I); + + // Only copy scheduled successors. Cut them from old node's successor + // list and move them over. + SmallVector, 4> DelDeps; + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (I->isArtificial()) + continue; + SUnit *SuccSU = I->getSUnit(); + if (SuccSU->isScheduled) { + SDep D = *I; + D.setSUnit(NewSU); + AddPred(SuccSU, D); + D.setSUnit(SU); + DelDeps.push_back(std::make_pair(SuccSU, D)); + } + } + for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) + RemovePred(DelDeps[i].first, DelDeps[i].second); + + AvailableQueue->updateNode(SU); + AvailableQueue->addNode(NewSU); + + ++NumDups; + return NewSU; +} + +/// InsertCopiesAndMoveSuccs - Insert register copies and move all +/// scheduled successors of the given SUnit to the last copy. +void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, + const TargetRegisterClass *DestRC, + const TargetRegisterClass *SrcRC, + SmallVector &Copies) { + SUnit *CopyFromSU = CreateNewSUnit(NULL); + CopyFromSU->CopySrcRC = SrcRC; + CopyFromSU->CopyDstRC = DestRC; + + SUnit *CopyToSU = CreateNewSUnit(NULL); + CopyToSU->CopySrcRC = DestRC; + CopyToSU->CopyDstRC = SrcRC; + + // Only copy scheduled successors. Cut them from old node's successor + // list and move them over. + SmallVector, 4> DelDeps; + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (I->isArtificial()) + continue; + SUnit *SuccSU = I->getSUnit(); + if (SuccSU->isScheduled) { + SDep D = *I; + D.setSUnit(CopyToSU); + AddPred(SuccSU, D); + DelDeps.push_back(std::make_pair(SuccSU, *I)); + } + } + for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) + RemovePred(DelDeps[i].first, DelDeps[i].second); + + AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg)); + AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0)); + + AvailableQueue->updateNode(SU); + AvailableQueue->addNode(CopyFromSU); + AvailableQueue->addNode(CopyToSU); + Copies.push_back(CopyFromSU); + Copies.push_back(CopyToSU); + + ++NumPRCopies; +} + +/// getPhysicalRegisterVT - Returns the ValueType of the physical register +/// definition of the specified node. +/// FIXME: Move to SelectionDAG? +static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, + const TargetInstrInfo *TII) { + const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); + assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!"); + unsigned NumRes = TID.getNumDefs(); + for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) { + if (Reg == *ImpDef) + break; + ++NumRes; + } + return N->getValueType(NumRes); +} + +/// CheckForLiveRegDef - Return true and update live register vector if the +/// specified register def of the specified SUnit clobbers any "live" registers. +static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg, + std::vector &LiveRegDefs, + SmallSet &RegAdded, + SmallVector &LRegs, + const TargetRegisterInfo *TRI) { + bool Added = false; + if (LiveRegDefs[Reg] && LiveRegDefs[Reg] != SU) { + if (RegAdded.insert(Reg)) { + LRegs.push_back(Reg); + Added = true; + } + } + for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) + if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) { + if (RegAdded.insert(*Alias)) { + LRegs.push_back(*Alias); + Added = true; + } + } + return Added; +} + +/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay +/// scheduling of the given node to satisfy live physical register dependencies. +/// If the specific node is the last one that's available to schedule, do +/// whatever is necessary (i.e. backtracking or cloning) to make it possible. +bool ScheduleDAGRRList::DelayForLiveRegsBottomUp(SUnit *SU, + SmallVector &LRegs){ + if (NumLiveRegs == 0) + return false; + + SmallSet RegAdded; + // If this node would clobber any "live" register, then it's not ready. + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isAssignedRegDep()) + CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs, + RegAdded, LRegs, TRI); + } + + for (SDNode *Node = SU->getNode(); Node; Node = Node->getFlaggedNode()) { + if (Node->getOpcode() == ISD::INLINEASM) { + // Inline asm can clobber physical defs. + unsigned NumOps = Node->getNumOperands(); + if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag) + --NumOps; // Ignore the flag operand. + + for (unsigned i = 2; i != NumOps;) { + unsigned Flags = + cast(Node->getOperand(i))->getZExtValue(); + unsigned NumVals = (Flags & 0xffff) >> 3; + + ++i; // Skip the ID value. + if ((Flags & 7) == 2 || (Flags & 7) == 6) { + // Check for def of register or earlyclobber register. + for (; NumVals; --NumVals, ++i) { + unsigned Reg = cast(Node->getOperand(i))->getReg(); + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI); + } + } else + i += NumVals; + } + continue; + } + + if (!Node->isMachineOpcode()) + continue; + const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode()); + if (!TID.ImplicitDefs) + continue; + for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg) + CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI); + } + return !LRegs.empty(); +} + + +/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up +/// schedulers. +void ScheduleDAGRRList::ListScheduleBottomUp() { + unsigned CurCycle = 0; + + // Release any predecessors of the special Exit node. + ReleasePredecessors(&ExitSU, CurCycle); + + // Add root to Available queue. + if (!SUnits.empty()) { + SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()]; + assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!"); + RootSU->isAvailable = true; + AvailableQueue->push(RootSU); + } + + // While Available queue is not empty, grab the node with the highest + // priority. If it is not ready put it back. Schedule the node. + SmallVector NotReady; + DenseMap > LRegsMap; + Sequence.reserve(SUnits.size()); + while (!AvailableQueue->empty()) { + bool Delayed = false; + LRegsMap.clear(); + SUnit *CurSU = AvailableQueue->pop(); + while (CurSU) { + SmallVector LRegs; + if (!DelayForLiveRegsBottomUp(CurSU, LRegs)) + break; + Delayed = true; + LRegsMap.insert(std::make_pair(CurSU, LRegs)); + + CurSU->isPending = true; // This SU is not in AvailableQueue right now. + NotReady.push_back(CurSU); + CurSU = AvailableQueue->pop(); + } + + // All candidates are delayed due to live physical reg dependencies. + // Try backtracking, code duplication, or inserting cross class copies + // to resolve it. + if (Delayed && !CurSU) { + for (unsigned i = 0, e = NotReady.size(); i != e; ++i) { + SUnit *TrySU = NotReady[i]; + SmallVector &LRegs = LRegsMap[TrySU]; + + // Try unscheduling up to the point where it's safe to schedule + // this node. + unsigned LiveCycle = CurCycle; + for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) { + unsigned Reg = LRegs[j]; + unsigned LCycle = LiveRegCycles[Reg]; + LiveCycle = std::min(LiveCycle, LCycle); + } + SUnit *OldSU = Sequence[LiveCycle]; + if (!WillCreateCycle(TrySU, OldSU)) { + BacktrackBottomUp(TrySU, LiveCycle, CurCycle); + // Force the current node to be scheduled before the node that + // requires the physical reg dep. + if (OldSU->isAvailable) { + OldSU->isAvailable = false; + AvailableQueue->remove(OldSU); + } + AddPred(TrySU, SDep(OldSU, SDep::Order, /*Latency=*/1, + /*Reg=*/0, /*isNormalMemory=*/false, + /*isMustAlias=*/false, /*isArtificial=*/true)); + // If one or more successors has been unscheduled, then the current + // node is no longer avaialable. Schedule a successor that's now + // available instead. + if (!TrySU->isAvailable) + CurSU = AvailableQueue->pop(); + else { + CurSU = TrySU; + TrySU->isPending = false; + NotReady.erase(NotReady.begin()+i); + } + break; + } + } + + if (!CurSU) { + // Can't backtrack. If it's too expensive to copy the value, then try + // duplicate the nodes that produces these "too expensive to copy" + // values to break the dependency. In case even that doesn't work, + // insert cross class copies. + // If it's not too expensive, i.e. cost != -1, issue copies. + SUnit *TrySU = NotReady[0]; + SmallVector &LRegs = LRegsMap[TrySU]; + assert(LRegs.size() == 1 && "Can't handle this yet!"); + unsigned Reg = LRegs[0]; + SUnit *LRDef = LiveRegDefs[Reg]; + MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); + const TargetRegisterClass *RC = + TRI->getPhysicalRegisterRegClass(Reg, VT); + const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); + + // If cross copy register class is null, then it must be possible copy + // the value directly. Do not try duplicate the def. + SUnit *NewDef = 0; + if (DestRC) + NewDef = CopyAndMoveSuccessors(LRDef); + else + DestRC = RC; + if (!NewDef) { + // Issue copies, these can be expensive cross register class copies. + SmallVector Copies; + InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); + DOUT << "Adding an edge from SU #" << TrySU->NodeNum + << " to SU #" << Copies.front()->NodeNum << "\n"; + AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1, + /*Reg=*/0, /*isNormalMemory=*/false, + /*isMustAlias=*/false, + /*isArtificial=*/true)); + NewDef = Copies.back(); + } + + DOUT << "Adding an edge from SU #" << NewDef->NodeNum + << " to SU #" << TrySU->NodeNum << "\n"; + LiveRegDefs[Reg] = NewDef; + AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1, + /*Reg=*/0, /*isNormalMemory=*/false, + /*isMustAlias=*/false, + /*isArtificial=*/true)); + TrySU->isAvailable = false; + CurSU = NewDef; + } + + assert(CurSU && "Unable to resolve live physical register dependencies!"); + } + + // Add the nodes that aren't ready back onto the available list. + for (unsigned i = 0, e = NotReady.size(); i != e; ++i) { + NotReady[i]->isPending = false; + // May no longer be available due to backtracking. + if (NotReady[i]->isAvailable) + AvailableQueue->push(NotReady[i]); + } + NotReady.clear(); + + if (CurSU) + ScheduleNodeBottomUp(CurSU, CurCycle); + ++CurCycle; + } + + // Reverse the order if it is bottom up. + std::reverse(Sequence.begin(), Sequence.end()); + +#ifndef NDEBUG + VerifySchedule(isBottomUp); +#endif +} + +//===----------------------------------------------------------------------===// +// Top-Down Scheduling +//===----------------------------------------------------------------------===// + +/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to +/// the AvailableQueue if the count reaches zero. Also update its cycle bound. +void ScheduleDAGRRList::ReleaseSucc(SUnit *SU, const SDep *SuccEdge) { + SUnit *SuccSU = SuccEdge->getSUnit(); + --SuccSU->NumPredsLeft; + +#ifndef NDEBUG + if (SuccSU->NumPredsLeft < 0) { + cerr << "*** Scheduling failed! ***\n"; + SuccSU->dump(this); + cerr << " has been released too many times!\n"; + assert(0); + } +#endif + + // If all the node's predecessors are scheduled, this node is ready + // to be scheduled. Ignore the special ExitSU node. + if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) { + SuccSU->isAvailable = true; + AvailableQueue->push(SuccSU); + } +} + +void ScheduleDAGRRList::ReleaseSuccessors(SUnit *SU) { + // Top down: release successors + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + assert(!I->isAssignedRegDep() && + "The list-tdrr scheduler doesn't yet support physreg dependencies!"); + + ReleaseSucc(SU, &*I); + } +} + +/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending +/// count of its successors. If a successor pending count is zero, add it to +/// the Available queue. +void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { + DOUT << "*** Scheduling [" << CurCycle << "]: "; + DEBUG(SU->dump(this)); + + assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!"); + SU->setDepthToAtLeast(CurCycle); + Sequence.push_back(SU); + + ReleaseSuccessors(SU); + SU->isScheduled = true; + AvailableQueue->ScheduledNode(SU); +} + +/// ListScheduleTopDown - The main loop of list scheduling for top-down +/// schedulers. +void ScheduleDAGRRList::ListScheduleTopDown() { + unsigned CurCycle = 0; + + // Release any successors of the special Entry node. + ReleaseSuccessors(&EntrySU); + + // All leaves to Available queue. + for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { + // It is available if it has no predecessors. + if (SUnits[i].Preds.empty()) { + AvailableQueue->push(&SUnits[i]); + SUnits[i].isAvailable = true; + } + } + + // While Available queue is not empty, grab the node with the highest + // priority. If it is not ready put it back. Schedule the node. + Sequence.reserve(SUnits.size()); + while (!AvailableQueue->empty()) { + SUnit *CurSU = AvailableQueue->pop(); + + if (CurSU) + ScheduleNodeTopDown(CurSU, CurCycle); + ++CurCycle; + } + +#ifndef NDEBUG + VerifySchedule(isBottomUp); +#endif +} + + +//===----------------------------------------------------------------------===// +// RegReductionPriorityQueue Implementation +//===----------------------------------------------------------------------===// +// +// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers +// to reduce register pressure. +// +namespace { + template + class RegReductionPriorityQueue; + + /// Sorting functions for the Available queue. + struct bu_ls_rr_sort : public std::binary_function { + RegReductionPriorityQueue *SPQ; + bu_ls_rr_sort(RegReductionPriorityQueue *spq) : SPQ(spq) {} + bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} + + bool operator()(const SUnit* left, const SUnit* right) const; + }; + + struct td_ls_rr_sort : public std::binary_function { + RegReductionPriorityQueue *SPQ; + td_ls_rr_sort(RegReductionPriorityQueue *spq) : SPQ(spq) {} + td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} + + bool operator()(const SUnit* left, const SUnit* right) const; + }; +} // end anonymous namespace + +/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number. +/// Smaller number is the higher priority. +static unsigned +CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector &SUNumbers) { + unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum]; + if (SethiUllmanNumber != 0) + return SethiUllmanNumber; + + unsigned Extra = 0; + for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) continue; // ignore chain preds + SUnit *PredSU = I->getSUnit(); + unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers); + if (PredSethiUllman > SethiUllmanNumber) { + SethiUllmanNumber = PredSethiUllman; + Extra = 0; + } else if (PredSethiUllman == SethiUllmanNumber) + ++Extra; + } + + SethiUllmanNumber += Extra; + + if (SethiUllmanNumber == 0) + SethiUllmanNumber = 1; + + return SethiUllmanNumber; +} + +namespace { + template + class VISIBILITY_HIDDEN RegReductionPriorityQueue + : public SchedulingPriorityQueue { + PriorityQueue, SF> Queue; + unsigned currentQueueId; + + protected: + // SUnits - The SUnits for the current graph. + std::vector *SUnits; + + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + ScheduleDAGRRList *scheduleDAG; + + // SethiUllmanNumbers - The SethiUllman number for each node. + std::vector SethiUllmanNumbers; + + public: + RegReductionPriorityQueue(const TargetInstrInfo *tii, + const TargetRegisterInfo *tri) : + Queue(SF(this)), currentQueueId(0), + TII(tii), TRI(tri), scheduleDAG(NULL) {} + + void initNodes(std::vector &sunits) { + SUnits = &sunits; + // Add pseudo dependency edges for two-address nodes. + AddPseudoTwoAddrDeps(); + // Reroute edges to nodes with multiple uses. + PrescheduleNodesWithMultipleUses(); + // Calculate node priorities. + CalculateSethiUllmanNumbers(); + } + + void addNode(const SUnit *SU) { + unsigned SUSize = SethiUllmanNumbers.size(); + if (SUnits->size() > SUSize) + SethiUllmanNumbers.resize(SUSize*2, 0); + CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); + } + + void updateNode(const SUnit *SU) { + SethiUllmanNumbers[SU->NodeNum] = 0; + CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); + } + + void releaseState() { + SUnits = 0; + SethiUllmanNumbers.clear(); + } + + unsigned getNodePriority(const SUnit *SU) const { + assert(SU->NodeNum < SethiUllmanNumbers.size()); + unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0; + if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) + // CopyToReg should be close to its uses to facilitate coalescing and + // avoid spilling. + return 0; + if (Opc == TargetInstrInfo::EXTRACT_SUBREG || + Opc == TargetInstrInfo::SUBREG_TO_REG || + Opc == TargetInstrInfo::INSERT_SUBREG) + // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be + // close to their uses to facilitate coalescing. + return 0; + if (SU->NumSuccs == 0 && SU->NumPreds != 0) + // If SU does not have a register use, i.e. it doesn't produce a value + // that would be consumed (e.g. store), then it terminates a chain of + // computation. Give it a large SethiUllman number so it will be + // scheduled right before its predecessors that it doesn't lengthen + // their live ranges. + return 0xffff; + if (SU->NumPreds == 0 && SU->NumSuccs != 0) + // If SU does not have a register def, schedule it close to its uses + // because it does not lengthen any live ranges. + return 0; + return SethiUllmanNumbers[SU->NodeNum]; + } + + unsigned size() const { return Queue.size(); } + + bool empty() const { return Queue.empty(); } + + void push(SUnit *U) { + assert(!U->NodeQueueId && "Node in the queue already"); + U->NodeQueueId = ++currentQueueId; + Queue.push(U); + } + + void push_all(const std::vector &Nodes) { + for (unsigned i = 0, e = Nodes.size(); i != e; ++i) + push(Nodes[i]); + } + + SUnit *pop() { + if (empty()) return NULL; + SUnit *V = Queue.top(); + Queue.pop(); + V->NodeQueueId = 0; + return V; + } + + void remove(SUnit *SU) { + assert(!Queue.empty() && "Queue is empty!"); + assert(SU->NodeQueueId != 0 && "Not in queue!"); + Queue.erase_one(SU); + SU->NodeQueueId = 0; + } + + void setScheduleDAG(ScheduleDAGRRList *scheduleDag) { + scheduleDAG = scheduleDag; + } + + protected: + bool canClobber(const SUnit *SU, const SUnit *Op); + void AddPseudoTwoAddrDeps(); + void PrescheduleNodesWithMultipleUses(); + void CalculateSethiUllmanNumbers(); + }; + + typedef RegReductionPriorityQueue + BURegReductionPriorityQueue; + + typedef RegReductionPriorityQueue + TDRegReductionPriorityQueue; +} + +/// closestSucc - Returns the scheduled cycle of the successor which is +/// closest to the current cycle. +static unsigned closestSucc(const SUnit *SU) { + unsigned MaxHeight = 0; + for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (I->isCtrl()) continue; // ignore chain succs + unsigned Height = I->getSUnit()->getHeight(); + // If there are bunch of CopyToRegs stacked up, they should be considered + // to be at the same position. + if (I->getSUnit()->getNode() && + I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg) + Height = closestSucc(I->getSUnit())+1; + if (Height > MaxHeight) + MaxHeight = Height; + } + return MaxHeight; +} + +/// calcMaxScratches - Returns an cost estimate of the worse case requirement +/// for scratch registers, i.e. number of data dependencies. +static unsigned calcMaxScratches(const SUnit *SU) { + unsigned Scratches = 0; + for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) continue; // ignore chain preds + Scratches++; + } + return Scratches; +} + +// Bottom up +bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { + unsigned LPriority = SPQ->getNodePriority(left); + unsigned RPriority = SPQ->getNodePriority(right); + if (LPriority != RPriority) + return LPriority > RPriority; + + // Try schedule def + use closer when Sethi-Ullman numbers are the same. + // e.g. + // t1 = op t2, c1 + // t3 = op t4, c2 + // + // and the following instructions are both ready. + // t2 = op c3 + // t4 = op c4 + // + // Then schedule t2 = op first. + // i.e. + // t4 = op c4 + // t2 = op c3 + // t1 = op t2, c1 + // t3 = op t4, c2 + // + // This creates more short live intervals. + unsigned LDist = closestSucc(left); + unsigned RDist = closestSucc(right); + if (LDist != RDist) + return LDist < RDist; + + // How many registers becomes live when the node is scheduled. + unsigned LScratch = calcMaxScratches(left); + unsigned RScratch = calcMaxScratches(right); + if (LScratch != RScratch) + return LScratch > RScratch; + + if (left->getHeight() != right->getHeight()) + return left->getHeight() > right->getHeight(); + + if (left->getDepth() != right->getDepth()) + return left->getDepth() < right->getDepth(); + + assert(left->NodeQueueId && right->NodeQueueId && + "NodeQueueId cannot be zero"); + return (left->NodeQueueId > right->NodeQueueId); +} + +template +bool +RegReductionPriorityQueue::canClobber(const SUnit *SU, const SUnit *Op) { + if (SU->isTwoAddress) { + unsigned Opc = SU->getNode()->getMachineOpcode(); + const TargetInstrDesc &TID = TII->get(Opc); + unsigned NumRes = TID.getNumDefs(); + unsigned NumOps = TID.getNumOperands() - NumRes; + for (unsigned i = 0; i != NumOps; ++i) { + if (TID.getOperandConstraint(i+NumRes, TOI::TIED_TO) != -1) { + SDNode *DU = SU->getNode()->getOperand(i).getNode(); + if (DU->getNodeId() != -1 && + Op->OrigNode == &(*SUnits)[DU->getNodeId()]) + return true; + } + } + } + return false; +} + + +/// hasCopyToRegUse - Return true if SU has a value successor that is a +/// CopyToReg node. +static bool hasCopyToRegUse(const SUnit *SU) { + for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (I->isCtrl()) continue; + const SUnit *SuccSU = I->getSUnit(); + if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) + return true; + } + return false; +} + +/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's +/// physical register defs. +static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, + const TargetInstrInfo *TII, + const TargetRegisterInfo *TRI) { + SDNode *N = SuccSU->getNode(); + unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); + const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs(); + assert(ImpDefs && "Caller should check hasPhysRegDefs"); + for (const SDNode *SUNode = SU->getNode(); SUNode; + SUNode = SUNode->getFlaggedNode()) { + if (!SUNode->isMachineOpcode()) + continue; + const unsigned *SUImpDefs = + TII->get(SUNode->getMachineOpcode()).getImplicitDefs(); + if (!SUImpDefs) + return false; + for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { + MVT VT = N->getValueType(i); + if (VT == MVT::Flag || VT == MVT::Other) + continue; + if (!N->hasAnyUseOfValue(i)) + continue; + unsigned Reg = ImpDefs[i - NumDefs]; + for (;*SUImpDefs; ++SUImpDefs) { + unsigned SUReg = *SUImpDefs; + if (TRI->regsOverlap(Reg, SUReg)) + return true; + } + } + } + return false; +} + +/// PrescheduleNodesWithMultipleUses - Nodes with multiple uses +/// are not handled well by the general register pressure reduction +/// heuristics. When presented with code like this: +/// +/// N +/// / | +/// / | +/// U store +/// | +/// ... +/// +/// the heuristics tend to push the store up, but since the +/// operand of the store has another use (U), this would increase +/// the length of that other use (the U->N edge). +/// +/// This function transforms code like the above to route U's +/// dependence through the store when possible, like this: +/// +/// N +/// || +/// || +/// store +/// | +/// U +/// | +/// ... +/// +/// This results in the store being scheduled immediately +/// after N, which shortens the U->N live range, reducing +/// register pressure. +/// +template +void RegReductionPriorityQueue::PrescheduleNodesWithMultipleUses() { + // Visit all the nodes in topological order, working top-down. + for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { + SUnit *SU = &(*SUnits)[i]; + // For now, only look at nodes with no data successors, such as stores. + // These are especially important, due to the heuristics in + // getNodePriority for nodes with no data successors. + if (SU->NumSuccs != 0) + continue; + // For now, only look at nodes with exactly one data predecessor. + if (SU->NumPreds != 1) + continue; + // Avoid prescheduling copies to virtual registers, which don't behave + // like other nodes from the perspective of scheduling heuristics. + if (SDNode *N = SU->getNode()) + if (N->getOpcode() == ISD::CopyToReg && + TargetRegisterInfo::isVirtualRegister + (cast(N->getOperand(1))->getReg())) + continue; + + // Locate the single data predecessor. + SUnit *PredSU = 0; + for (SUnit::const_pred_iterator II = SU->Preds.begin(), + EE = SU->Preds.end(); II != EE; ++II) + if (!II->isCtrl()) { + PredSU = II->getSUnit(); + break; + } + assert(PredSU); + + // Don't rewrite edges that carry physregs, because that requires additional + // support infrastructure. + if (PredSU->hasPhysRegDefs) + continue; + // Short-circuit the case where SU is PredSU's only data successor. + if (PredSU->NumSuccs == 1) + continue; + // Avoid prescheduling to copies from virtual registers, which don't behave + // like other nodes from the perspective of scheduling // heuristics. + if (SDNode *N = SU->getNode()) + if (N->getOpcode() == ISD::CopyFromReg && + TargetRegisterInfo::isVirtualRegister + (cast(N->getOperand(1))->getReg())) + continue; + + // Perform checks on the successors of PredSU. + for (SUnit::const_succ_iterator II = PredSU->Succs.begin(), + EE = PredSU->Succs.end(); II != EE; ++II) { + SUnit *PredSuccSU = II->getSUnit(); + if (PredSuccSU == SU) continue; + // If PredSU has another successor with no data successors, for + // now don't attempt to choose either over the other. + if (PredSuccSU->NumSuccs == 0) + goto outer_loop_continue; + // Don't break physical register dependencies. + if (SU->hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs) + if (canClobberPhysRegDefs(PredSuccSU, SU, TII, TRI)) + goto outer_loop_continue; + // Don't introduce graph cycles. + if (scheduleDAG->IsReachable(SU, PredSuccSU)) + goto outer_loop_continue; + } + + // Ok, the transformation is safe and the heuristics suggest it is + // profitable. Update the graph. + DOUT << "Prescheduling SU # " << SU->NodeNum + << " next to PredSU # " << PredSU->NodeNum + << " to guide scheduling in the presence of multiple uses\n"; + for (unsigned i = 0; i != PredSU->Succs.size(); ++i) { + SDep Edge = PredSU->Succs[i]; + assert(!Edge.isAssignedRegDep()); + SUnit *SuccSU = Edge.getSUnit(); + if (SuccSU != SU) { + Edge.setSUnit(PredSU); + scheduleDAG->RemovePred(SuccSU, Edge); + scheduleDAG->AddPred(SU, Edge); + Edge.setSUnit(SU); + scheduleDAG->AddPred(SuccSU, Edge); + --i; + } + } + outer_loop_continue:; + } +} + +/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses +/// it as a def&use operand. Add a pseudo control edge from it to the other +/// node (if it won't create a cycle) so the two-address one will be scheduled +/// first (lower in the schedule). If both nodes are two-address, favor the +/// one that has a CopyToReg use (more likely to be a loop induction update). +/// If both are two-address, but one is commutable while the other is not +/// commutable, favor the one that's not commutable. +template +void RegReductionPriorityQueue::AddPseudoTwoAddrDeps() { + for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { + SUnit *SU = &(*SUnits)[i]; + if (!SU->isTwoAddress) + continue; + + SDNode *Node = SU->getNode(); + if (!Node || !Node->isMachineOpcode() || SU->getNode()->getFlaggedNode()) + continue; + + unsigned Opc = Node->getMachineOpcode(); + const TargetInstrDesc &TID = TII->get(Opc); + unsigned NumRes = TID.getNumDefs(); + unsigned NumOps = TID.getNumOperands() - NumRes; + for (unsigned j = 0; j != NumOps; ++j) { + if (TID.getOperandConstraint(j+NumRes, TOI::TIED_TO) == -1) + continue; + SDNode *DU = SU->getNode()->getOperand(j).getNode(); + if (DU->getNodeId() == -1) + continue; + const SUnit *DUSU = &(*SUnits)[DU->getNodeId()]; + if (!DUSU) continue; + for (SUnit::const_succ_iterator I = DUSU->Succs.begin(), + E = DUSU->Succs.end(); I != E; ++I) { + if (I->isCtrl()) continue; + SUnit *SuccSU = I->getSUnit(); + if (SuccSU == SU) + continue; + // Be conservative. Ignore if nodes aren't at roughly the same + // depth and height. + if (SuccSU->getHeight() < SU->getHeight() && + (SU->getHeight() - SuccSU->getHeight()) > 1) + continue; + // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge + // constrains whatever is using the copy, instead of the copy + // itself. In the case that the copy is coalesced, this + // preserves the intent of the pseudo two-address heurietics. + while (SuccSU->Succs.size() == 1 && + SuccSU->getNode()->isMachineOpcode() && + SuccSU->getNode()->getMachineOpcode() == + TargetInstrInfo::COPY_TO_REGCLASS) + SuccSU = SuccSU->Succs.front().getSUnit(); + // Don't constrain non-instruction nodes. + if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode()) + continue; + // Don't constrain nodes with physical register defs if the + // predecessor can clobber them. + if (SuccSU->hasPhysRegDefs && SU->hasPhysRegClobbers) { + if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI)) + continue; + } + // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG; + // these may be coalesced away. We want them close to their uses. + unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode(); + if (SuccOpc == TargetInstrInfo::EXTRACT_SUBREG || + SuccOpc == TargetInstrInfo::INSERT_SUBREG || + SuccOpc == TargetInstrInfo::SUBREG_TO_REG) + continue; + if ((!canClobber(SuccSU, DUSU) || + (hasCopyToRegUse(SU) && !hasCopyToRegUse(SuccSU)) || + (!SU->isCommutable && SuccSU->isCommutable)) && + !scheduleDAG->IsReachable(SuccSU, SU)) { + DOUT << "Adding a pseudo-two-addr edge from SU # " << SU->NodeNum + << " to SU #" << SuccSU->NodeNum << "\n"; + scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0, + /*Reg=*/0, /*isNormalMemory=*/false, + /*isMustAlias=*/false, + /*isArtificial=*/true)); + } + } + } + } +} + +/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all +/// scheduling units. +template +void RegReductionPriorityQueue::CalculateSethiUllmanNumbers() { + SethiUllmanNumbers.assign(SUnits->size(), 0); + + for (unsigned i = 0, e = SUnits->size(); i != e; ++i) + CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers); +} + +/// LimitedSumOfUnscheduledPredsOfSuccs - Compute the sum of the unscheduled +/// predecessors of the successors of the SUnit SU. Stop when the provided +/// limit is exceeded. +static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU, + unsigned Limit) { + unsigned Sum = 0; + for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + const SUnit *SuccSU = I->getSUnit(); + for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(), + EE = SuccSU->Preds.end(); II != EE; ++II) { + SUnit *PredSU = II->getSUnit(); + if (!PredSU->isScheduled) + if (++Sum > Limit) + return Sum; + } + } + return Sum; +} + + +// Top down +bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { + unsigned LPriority = SPQ->getNodePriority(left); + unsigned RPriority = SPQ->getNodePriority(right); + bool LIsTarget = left->getNode() && left->getNode()->isMachineOpcode(); + bool RIsTarget = right->getNode() && right->getNode()->isMachineOpcode(); + bool LIsFloater = LIsTarget && left->NumPreds == 0; + bool RIsFloater = RIsTarget && right->NumPreds == 0; + unsigned LBonus = (LimitedSumOfUnscheduledPredsOfSuccs(left,1) == 1) ? 2 : 0; + unsigned RBonus = (LimitedSumOfUnscheduledPredsOfSuccs(right,1) == 1) ? 2 : 0; + + if (left->NumSuccs == 0 && right->NumSuccs != 0) + return false; + else if (left->NumSuccs != 0 && right->NumSuccs == 0) + return true; + + if (LIsFloater) + LBonus -= 2; + if (RIsFloater) + RBonus -= 2; + if (left->NumSuccs == 1) + LBonus += 2; + if (right->NumSuccs == 1) + RBonus += 2; + + if (LPriority+LBonus != RPriority+RBonus) + return LPriority+LBonus < RPriority+RBonus; + + if (left->getDepth() != right->getDepth()) + return left->getDepth() < right->getDepth(); + + if (left->NumSuccsLeft != right->NumSuccsLeft) + return left->NumSuccsLeft > right->NumSuccsLeft; + + assert(left->NodeQueueId && right->NodeQueueId && + "NodeQueueId cannot be zero"); + return (left->NodeQueueId > right->NodeQueueId); +} + +//===----------------------------------------------------------------------===// +// Public Constructor Functions +//===----------------------------------------------------------------------===// + +llvm::ScheduleDAGSDNodes * +llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { + const TargetMachine &TM = IS->TM; + const TargetInstrInfo *TII = TM.getInstrInfo(); + const TargetRegisterInfo *TRI = TM.getRegisterInfo(); + + BURegReductionPriorityQueue *PQ = new BURegReductionPriorityQueue(TII, TRI); + + ScheduleDAGRRList *SD = + new ScheduleDAGRRList(*IS->MF, true, PQ); + PQ->setScheduleDAG(SD); + return SD; +} + +llvm::ScheduleDAGSDNodes * +llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { + const TargetMachine &TM = IS->TM; + const TargetInstrInfo *TII = TM.getInstrInfo(); + const TargetRegisterInfo *TRI = TM.getRegisterInfo(); + + TDRegReductionPriorityQueue *PQ = new TDRegReductionPriorityQueue(TII, TRI); + + ScheduleDAGRRList *SD = + new ScheduleDAGRRList(*IS->MF, false, PQ); + PQ->setScheduleDAG(SD); + return SD; +} diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp new file mode 100644 index 000000000000..7aa15bcc6862 --- /dev/null +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -0,0 +1,294 @@ +//===--- ScheduleDAGSDNodes.cpp - Implement the ScheduleDAGSDNodes class --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the ScheduleDAG class, which is a base class used by +// scheduling implementation classes. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "pre-RA-sched" +#include "ScheduleDAGSDNodes.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf) + : ScheduleDAG(mf) { +} + +/// Run - perform scheduling. +/// +void ScheduleDAGSDNodes::Run(SelectionDAG *dag, MachineBasicBlock *bb, + MachineBasicBlock::iterator insertPos) { + DAG = dag; + ScheduleDAG::Run(bb, insertPos); +} + +SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) { + SUnit *SU = NewSUnit(Old->getNode()); + SU->OrigNode = Old->OrigNode; + SU->Latency = Old->Latency; + SU->isTwoAddress = Old->isTwoAddress; + SU->isCommutable = Old->isCommutable; + SU->hasPhysRegDefs = Old->hasPhysRegDefs; + SU->hasPhysRegClobbers = Old->hasPhysRegClobbers; + Old->isCloned = true; + return SU; +} + +/// CheckForPhysRegDependency - Check if the dependency between def and use of +/// a specified operand is a physical register dependency. If so, returns the +/// register and the cost of copying the register. +static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op, + const TargetRegisterInfo *TRI, + const TargetInstrInfo *TII, + unsigned &PhysReg, int &Cost) { + if (Op != 2 || User->getOpcode() != ISD::CopyToReg) + return; + + unsigned Reg = cast(User->getOperand(1))->getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) + return; + + unsigned ResNo = User->getOperand(2).getResNo(); + if (Def->isMachineOpcode()) { + const TargetInstrDesc &II = TII->get(Def->getMachineOpcode()); + if (ResNo >= II.getNumDefs() && + II.ImplicitDefs[ResNo - II.getNumDefs()] == Reg) { + PhysReg = Reg; + const TargetRegisterClass *RC = + TRI->getPhysicalRegisterRegClass(Reg, Def->getValueType(ResNo)); + Cost = RC->getCopyCost(); + } + } +} + +void ScheduleDAGSDNodes::BuildSchedUnits() { + // During scheduling, the NodeId field of SDNode is used to map SDNodes + // to their associated SUnits by holding SUnits table indices. A value + // of -1 means the SDNode does not yet have an associated SUnit. + unsigned NumNodes = 0; + for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(), + E = DAG->allnodes_end(); NI != E; ++NI) { + NI->setNodeId(-1); + ++NumNodes; + } + + // Reserve entries in the vector for each of the SUnits we are creating. This + // ensure that reallocation of the vector won't happen, so SUnit*'s won't get + // invalidated. + // FIXME: Multiply by 2 because we may clone nodes during scheduling. + // This is a temporary workaround. + SUnits.reserve(NumNodes * 2); + + // Check to see if the scheduler cares about latencies. + bool UnitLatencies = ForceUnitLatencies(); + + for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(), + E = DAG->allnodes_end(); NI != E; ++NI) { + if (isPassiveNode(NI)) // Leaf node, e.g. a TargetImmediate. + continue; + + // If this node has already been processed, stop now. + if (NI->getNodeId() != -1) continue; + + SUnit *NodeSUnit = NewSUnit(NI); + + // See if anything is flagged to this node, if so, add them to flagged + // nodes. Nodes can have at most one flag input and one flag output. Flags + // are required to be the last operand and result of a node. + + // Scan up to find flagged preds. + SDNode *N = NI; + while (N->getNumOperands() && + N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Flag) { + N = N->getOperand(N->getNumOperands()-1).getNode(); + assert(N->getNodeId() == -1 && "Node already inserted!"); + N->setNodeId(NodeSUnit->NodeNum); + } + + // Scan down to find any flagged succs. + N = NI; + while (N->getValueType(N->getNumValues()-1) == MVT::Flag) { + SDValue FlagVal(N, N->getNumValues()-1); + + // There are either zero or one users of the Flag result. + bool HasFlagUse = false; + for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end(); + UI != E; ++UI) + if (FlagVal.isOperandOf(*UI)) { + HasFlagUse = true; + assert(N->getNodeId() == -1 && "Node already inserted!"); + N->setNodeId(NodeSUnit->NodeNum); + N = *UI; + break; + } + if (!HasFlagUse) break; + } + + // If there are flag operands involved, N is now the bottom-most node + // of the sequence of nodes that are flagged together. + // Update the SUnit. + NodeSUnit->setNode(N); + assert(N->getNodeId() == -1 && "Node already inserted!"); + N->setNodeId(NodeSUnit->NodeNum); + + // Assign the Latency field of NodeSUnit using target-provided information. + if (UnitLatencies) + NodeSUnit->Latency = 1; + else + ComputeLatency(NodeSUnit); + } +} + +void ScheduleDAGSDNodes::AddSchedEdges() { + // Pass 2: add the preds, succs, etc. + for (unsigned su = 0, e = SUnits.size(); su != e; ++su) { + SUnit *SU = &SUnits[su]; + SDNode *MainNode = SU->getNode(); + + if (MainNode->isMachineOpcode()) { + unsigned Opc = MainNode->getMachineOpcode(); + const TargetInstrDesc &TID = TII->get(Opc); + for (unsigned i = 0; i != TID.getNumOperands(); ++i) { + if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) { + SU->isTwoAddress = true; + break; + } + } + if (TID.isCommutable()) + SU->isCommutable = true; + } + + // Find all predecessors and successors of the group. + for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode()) { + if (N->isMachineOpcode() && + TII->get(N->getMachineOpcode()).getImplicitDefs()) { + SU->hasPhysRegClobbers = true; + unsigned NumUsed = CountResults(N); + while (NumUsed != 0 && !N->hasAnyUseOfValue(NumUsed - 1)) + --NumUsed; // Skip over unused values at the end. + if (NumUsed > TII->get(N->getMachineOpcode()).getNumDefs()) + SU->hasPhysRegDefs = true; + } + + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + SDNode *OpN = N->getOperand(i).getNode(); + if (isPassiveNode(OpN)) continue; // Not scheduled. + SUnit *OpSU = &SUnits[OpN->getNodeId()]; + assert(OpSU && "Node has no SUnit!"); + if (OpSU == SU) continue; // In the same group. + + MVT OpVT = N->getOperand(i).getValueType(); + assert(OpVT != MVT::Flag && "Flagged nodes should be in same sunit!"); + bool isChain = OpVT == MVT::Other; + + unsigned PhysReg = 0; + int Cost = 1; + // Determine if this is a physical register dependency. + CheckForPhysRegDependency(OpN, N, i, TRI, TII, PhysReg, Cost); + assert((PhysReg == 0 || !isChain) && + "Chain dependence via physreg data?"); + // FIXME: See ScheduleDAGSDNodes::EmitCopyFromReg. For now, scheduler + // emits a copy from the physical register to a virtual register unless + // it requires a cross class copy (cost < 0). That means we are only + // treating "expensive to copy" register dependency as physical register + // dependency. This may change in the future though. + if (Cost >= 0) + PhysReg = 0; + SU->addPred(SDep(OpSU, isChain ? SDep::Order : SDep::Data, + OpSU->Latency, PhysReg)); + } + } + } +} + +/// BuildSchedGraph - Build the SUnit graph from the selection dag that we +/// are input. This SUnit graph is similar to the SelectionDAG, but +/// excludes nodes that aren't interesting to scheduling, and represents +/// flagged together nodes with a single SUnit. +void ScheduleDAGSDNodes::BuildSchedGraph() { + // Populate the SUnits array. + BuildSchedUnits(); + // Compute all the scheduling dependencies between nodes. + AddSchedEdges(); +} + +void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) { + const InstrItineraryData &InstrItins = TM.getInstrItineraryData(); + + // Compute the latency for the node. We use the sum of the latencies for + // all nodes flagged together into this SUnit. + SU->Latency = 0; + bool SawMachineOpcode = false; + for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode()) + if (N->isMachineOpcode()) { + SawMachineOpcode = true; + SU->Latency += + InstrItins.getLatency(TII->get(N->getMachineOpcode()).getSchedClass()); + } +} + +/// CountResults - The results of target nodes have register or immediate +/// operands first, then an optional chain, and optional flag operands (which do +/// not go into the resulting MachineInstr). +unsigned ScheduleDAGSDNodes::CountResults(SDNode *Node) { + unsigned N = Node->getNumValues(); + while (N && Node->getValueType(N - 1) == MVT::Flag) + --N; + if (N && Node->getValueType(N - 1) == MVT::Other) + --N; // Skip over chain result. + return N; +} + +/// CountOperands - The inputs to target nodes have any actual inputs first, +/// followed by special operands that describe memory references, then an +/// optional chain operand, then an optional flag operand. Compute the number +/// of actual operands that will go into the resulting MachineInstr. +unsigned ScheduleDAGSDNodes::CountOperands(SDNode *Node) { + unsigned N = ComputeMemOperandsEnd(Node); + while (N && isa(Node->getOperand(N - 1).getNode())) + --N; // Ignore MEMOPERAND nodes + return N; +} + +/// ComputeMemOperandsEnd - Find the index one past the last MemOperandSDNode +/// operand +unsigned ScheduleDAGSDNodes::ComputeMemOperandsEnd(SDNode *Node) { + unsigned N = Node->getNumOperands(); + while (N && Node->getOperand(N - 1).getValueType() == MVT::Flag) + --N; + if (N && Node->getOperand(N - 1).getValueType() == MVT::Other) + --N; // Ignore chain if it exists. + return N; +} + + +void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const { + if (!SU->getNode()) { + cerr << "PHYS REG COPY\n"; + return; + } + + SU->getNode()->dump(DAG); + cerr << "\n"; + SmallVector FlaggedNodes; + for (SDNode *N = SU->getNode()->getFlaggedNode(); N; N = N->getFlaggedNode()) + FlaggedNodes.push_back(N); + while (!FlaggedNodes.empty()) { + cerr << " "; + FlaggedNodes.back()->dump(DAG); + cerr << "\n"; + FlaggedNodes.pop_back(); + } +} diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h new file mode 100644 index 000000000000..2a278b749a8c --- /dev/null +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -0,0 +1,179 @@ +//===---- ScheduleDAGSDNodes.h - SDNode Scheduling --------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the ScheduleDAGSDNodes class, which implements +// scheduling for an SDNode-based dependency graph. +// +//===----------------------------------------------------------------------===// + +#ifndef SCHEDULEDAGSDNODES_H +#define SCHEDULEDAGSDNODES_H + +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/CodeGen/SelectionDAG.h" + +namespace llvm { + /// ScheduleDAGSDNodes - A ScheduleDAG for scheduling SDNode-based DAGs. + /// + /// Edges between SUnits are initially based on edges in the SelectionDAG, + /// and additional edges can be added by the schedulers as heuristics. + /// SDNodes such as Constants, Registers, and a few others that are not + /// interesting to schedulers are not allocated SUnits. + /// + /// SDNodes with MVT::Flag operands are grouped along with the flagged + /// nodes into a single SUnit so that they are scheduled together. + /// + /// SDNode-based scheduling graphs do not use SDep::Anti or SDep::Output + /// edges. Physical register dependence information is not carried in + /// the DAG and must be handled explicitly by schedulers. + /// + class ScheduleDAGSDNodes : public ScheduleDAG { + public: + SelectionDAG *DAG; // DAG of the current basic block + + explicit ScheduleDAGSDNodes(MachineFunction &mf); + + virtual ~ScheduleDAGSDNodes() {} + + /// Run - perform scheduling. + /// + void Run(SelectionDAG *dag, MachineBasicBlock *bb, + MachineBasicBlock::iterator insertPos); + + /// isPassiveNode - Return true if the node is a non-scheduled leaf. + /// + static bool isPassiveNode(SDNode *Node) { + if (isa(Node)) return true; + if (isa(Node)) return true; + if (isa(Node)) return true; + if (isa(Node)) return true; + if (isa(Node)) return true; + if (isa(Node)) return true; + if (isa(Node)) return true; + if (isa(Node)) return true; + if (isa(Node)) return true; + if (isa(Node)) return true; + if (Node->getOpcode() == ISD::EntryToken) return true; + return false; + } + + /// NewSUnit - Creates a new SUnit and return a ptr to it. + /// + SUnit *NewSUnit(SDNode *N) { +#ifndef NDEBUG + const SUnit *Addr = 0; + if (!SUnits.empty()) + Addr = &SUnits[0]; +#endif + SUnits.push_back(SUnit(N, (unsigned)SUnits.size())); + assert((Addr == 0 || Addr == &SUnits[0]) && + "SUnits std::vector reallocated on the fly!"); + SUnits.back().OrigNode = &SUnits.back(); + return &SUnits.back(); + } + + /// Clone - Creates a clone of the specified SUnit. It does not copy the + /// predecessors / successors info nor the temporary scheduling states. + /// + SUnit *Clone(SUnit *N); + + /// BuildSchedGraph - Build the SUnit graph from the selection dag that we + /// are input. This SUnit graph is similar to the SelectionDAG, but + /// excludes nodes that aren't interesting to scheduling, and represents + /// flagged together nodes with a single SUnit. + virtual void BuildSchedGraph(); + + /// ComputeLatency - Compute node latency. + /// + virtual void ComputeLatency(SUnit *SU); + + /// CountResults - The results of target nodes have register or immediate + /// operands first, then an optional chain, and optional flag operands + /// (which do not go into the machine instrs.) + static unsigned CountResults(SDNode *Node); + + /// CountOperands - The inputs to target nodes have any actual inputs first, + /// followed by special operands that describe memory references, then an + /// optional chain operand, then flag operands. Compute the number of + /// actual operands that will go into the resulting MachineInstr. + static unsigned CountOperands(SDNode *Node); + + /// ComputeMemOperandsEnd - Find the index one past the last + /// MemOperandSDNode operand + static unsigned ComputeMemOperandsEnd(SDNode *Node); + + /// EmitNode - Generate machine code for an node and needed dependencies. + /// VRBaseMap contains, for each already emitted node, the first virtual + /// register number for the results of the node. + /// + void EmitNode(SDNode *Node, bool IsClone, bool HasClone, + DenseMap &VRBaseMap); + + virtual MachineBasicBlock *EmitSchedule(); + + /// Schedule - Order nodes according to selected style, filling + /// in the Sequence member. + /// + virtual void Schedule() = 0; + + virtual void dumpNode(const SUnit *SU) const; + + virtual std::string getGraphNodeLabel(const SUnit *SU) const; + + virtual void getCustomGraphFeatures(GraphWriter &GW) const; + + private: + /// EmitSubregNode - Generate machine code for subreg nodes. + /// + void EmitSubregNode(SDNode *Node, + DenseMap &VRBaseMap); + + /// EmitCopyToRegClassNode - Generate machine code for COPY_TO_REGCLASS + /// nodes. + /// + void EmitCopyToRegClassNode(SDNode *Node, + DenseMap &VRBaseMap); + + /// getVR - Return the virtual register corresponding to the specified result + /// of the specified node. + unsigned getVR(SDValue Op, DenseMap &VRBaseMap); + + /// getDstOfCopyToRegUse - If the only use of the specified result number of + /// node is a CopyToReg, return its destination register. Return 0 otherwise. + unsigned getDstOfOnlyCopyToRegUse(SDNode *Node, unsigned ResNo) const; + + void AddOperand(MachineInstr *MI, SDValue Op, unsigned IIOpNum, + const TargetInstrDesc *II, + DenseMap &VRBaseMap); + + /// AddRegisterOperand - Add the specified register as an operand to the + /// specified machine instr. Insert register copies if the register is + /// not in the required register class. + void AddRegisterOperand(MachineInstr *MI, SDValue Op, + unsigned IIOpNum, const TargetInstrDesc *II, + DenseMap &VRBaseMap); + + /// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an + /// implicit physical register output. + void EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, + bool IsCloned, unsigned SrcReg, + DenseMap &VRBaseMap); + + void CreateVirtualRegisters(SDNode *Node, MachineInstr *MI, + const TargetInstrDesc &II, bool IsClone, + bool IsCloned, + DenseMap &VRBaseMap); + + /// BuildSchedUnits, AddSchedEdges - Helper functions for BuildSchedGraph. + void BuildSchedUnits(); + void AddSchedEdges(); + }; +} + +#endif diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodesEmit.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodesEmit.cpp new file mode 100644 index 000000000000..fb5e207e81bb --- /dev/null +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodesEmit.cpp @@ -0,0 +1,668 @@ +//===---- ScheduleDAGEmit.cpp - Emit routines for the ScheduleDAG class ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the Emit routines for the ScheduleDAG class, which creates +// MachineInstrs according to the computed schedule. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "pre-RA-sched" +#include "ScheduleDAGSDNodes.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" +using namespace llvm; + +/// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an +/// implicit physical register output. +void ScheduleDAGSDNodes::EmitCopyFromReg(SDNode *Node, unsigned ResNo, + bool IsClone, bool IsCloned, + unsigned SrcReg, + DenseMap &VRBaseMap) { + unsigned VRBase = 0; + if (TargetRegisterInfo::isVirtualRegister(SrcReg)) { + // Just use the input register directly! + SDValue Op(Node, ResNo); + if (IsClone) + VRBaseMap.erase(Op); + bool isNew = VRBaseMap.insert(std::make_pair(Op, SrcReg)).second; + isNew = isNew; // Silence compiler warning. + assert(isNew && "Node emitted out of order - early"); + return; + } + + // If the node is only used by a CopyToReg and the dest reg is a vreg, use + // the CopyToReg'd destination register instead of creating a new vreg. + bool MatchReg = true; + const TargetRegisterClass *UseRC = NULL; + if (!IsClone && !IsCloned) + for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end(); + UI != E; ++UI) { + SDNode *User = *UI; + bool Match = true; + if (User->getOpcode() == ISD::CopyToReg && + User->getOperand(2).getNode() == Node && + User->getOperand(2).getResNo() == ResNo) { + unsigned DestReg = cast(User->getOperand(1))->getReg(); + if (TargetRegisterInfo::isVirtualRegister(DestReg)) { + VRBase = DestReg; + Match = false; + } else if (DestReg != SrcReg) + Match = false; + } else { + for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) { + SDValue Op = User->getOperand(i); + if (Op.getNode() != Node || Op.getResNo() != ResNo) + continue; + MVT VT = Node->getValueType(Op.getResNo()); + if (VT == MVT::Other || VT == MVT::Flag) + continue; + Match = false; + if (User->isMachineOpcode()) { + const TargetInstrDesc &II = TII->get(User->getMachineOpcode()); + const TargetRegisterClass *RC = + getInstrOperandRegClass(TRI, II, i+II.getNumDefs()); + if (!UseRC) + UseRC = RC; + else if (RC) { + if (UseRC->hasSuperClass(RC)) + UseRC = RC; + else + assert((UseRC == RC || RC->hasSuperClass(UseRC)) && + "Multiple uses expecting different register classes!"); + } + } + } + } + MatchReg &= Match; + if (VRBase) + break; + } + + MVT VT = Node->getValueType(ResNo); + const TargetRegisterClass *SrcRC = 0, *DstRC = 0; + SrcRC = TRI->getPhysicalRegisterRegClass(SrcReg, VT); + + // Figure out the register class to create for the destreg. + if (VRBase) { + DstRC = MRI.getRegClass(VRBase); + } else if (UseRC) { + assert(UseRC->hasType(VT) && "Incompatible phys register def and uses!"); + DstRC = UseRC; + } else { + DstRC = TLI->getRegClassFor(VT); + } + + // If all uses are reading from the src physical register and copying the + // register is either impossible or very expensive, then don't create a copy. + if (MatchReg && SrcRC->getCopyCost() < 0) { + VRBase = SrcReg; + } else { + // Create the reg, emit the copy. + VRBase = MRI.createVirtualRegister(DstRC); + bool Emitted = TII->copyRegToReg(*BB, InsertPos, VRBase, SrcReg, + DstRC, SrcRC); + + assert(Emitted && "Unable to issue a copy instruction!\n"); + (void) Emitted; + } + + SDValue Op(Node, ResNo); + if (IsClone) + VRBaseMap.erase(Op); + bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second; + isNew = isNew; // Silence compiler warning. + assert(isNew && "Node emitted out of order - early"); +} + +/// getDstOfCopyToRegUse - If the only use of the specified result number of +/// node is a CopyToReg, return its destination register. Return 0 otherwise. +unsigned ScheduleDAGSDNodes::getDstOfOnlyCopyToRegUse(SDNode *Node, + unsigned ResNo) const { + if (!Node->hasOneUse()) + return 0; + + SDNode *User = *Node->use_begin(); + if (User->getOpcode() == ISD::CopyToReg && + User->getOperand(2).getNode() == Node && + User->getOperand(2).getResNo() == ResNo) { + unsigned Reg = cast(User->getOperand(1))->getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) + return Reg; + } + return 0; +} + +void ScheduleDAGSDNodes::CreateVirtualRegisters(SDNode *Node, MachineInstr *MI, + const TargetInstrDesc &II, + bool IsClone, bool IsCloned, + DenseMap &VRBaseMap) { + assert(Node->getMachineOpcode() != TargetInstrInfo::IMPLICIT_DEF && + "IMPLICIT_DEF should have been handled as a special case elsewhere!"); + + for (unsigned i = 0; i < II.getNumDefs(); ++i) { + // If the specific node value is only used by a CopyToReg and the dest reg + // is a vreg in the same register class, use the CopyToReg'd destination + // register instead of creating a new vreg. + unsigned VRBase = 0; + const TargetRegisterClass *RC = getInstrOperandRegClass(TRI, II, i); + + if (!IsClone && !IsCloned) + for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end(); + UI != E; ++UI) { + SDNode *User = *UI; + if (User->getOpcode() == ISD::CopyToReg && + User->getOperand(2).getNode() == Node && + User->getOperand(2).getResNo() == i) { + unsigned Reg = cast(User->getOperand(1))->getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + const TargetRegisterClass *RegRC = MRI.getRegClass(Reg); + if (RegRC == RC) { + VRBase = Reg; + MI->addOperand(MachineOperand::CreateReg(Reg, true)); + break; + } + } + } + } + + // Create the result registers for this node and add the result regs to + // the machine instruction. + if (VRBase == 0) { + assert(RC && "Isn't a register operand!"); + VRBase = MRI.createVirtualRegister(RC); + MI->addOperand(MachineOperand::CreateReg(VRBase, true)); + } + + SDValue Op(Node, i); + if (IsClone) + VRBaseMap.erase(Op); + bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second; + isNew = isNew; // Silence compiler warning. + assert(isNew && "Node emitted out of order - early"); + } +} + +/// getVR - Return the virtual register corresponding to the specified result +/// of the specified node. +unsigned ScheduleDAGSDNodes::getVR(SDValue Op, + DenseMap &VRBaseMap) { + if (Op.isMachineOpcode() && + Op.getMachineOpcode() == TargetInstrInfo::IMPLICIT_DEF) { + // Add an IMPLICIT_DEF instruction before every use. + unsigned VReg = getDstOfOnlyCopyToRegUse(Op.getNode(), Op.getResNo()); + // IMPLICIT_DEF can produce any type of result so its TargetInstrDesc + // does not include operand register class info. + if (!VReg) { + const TargetRegisterClass *RC = TLI->getRegClassFor(Op.getValueType()); + VReg = MRI.createVirtualRegister(RC); + } + BuildMI(BB, Op.getDebugLoc(), TII->get(TargetInstrInfo::IMPLICIT_DEF),VReg); + return VReg; + } + + DenseMap::iterator I = VRBaseMap.find(Op); + assert(I != VRBaseMap.end() && "Node emitted out of order - late"); + return I->second; +} + + +/// AddRegisterOperand - Add the specified register as an operand to the +/// specified machine instr. Insert register copies if the register is +/// not in the required register class. +void +ScheduleDAGSDNodes::AddRegisterOperand(MachineInstr *MI, SDValue Op, + unsigned IIOpNum, + const TargetInstrDesc *II, + DenseMap &VRBaseMap) { + assert(Op.getValueType() != MVT::Other && + Op.getValueType() != MVT::Flag && + "Chain and flag operands should occur at end of operand list!"); + // Get/emit the operand. + unsigned VReg = getVR(Op, VRBaseMap); + assert(TargetRegisterInfo::isVirtualRegister(VReg) && "Not a vreg?"); + + const TargetInstrDesc &TID = MI->getDesc(); + bool isOptDef = IIOpNum < TID.getNumOperands() && + TID.OpInfo[IIOpNum].isOptionalDef(); + + // If the instruction requires a register in a different class, create + // a new virtual register and copy the value into it. + if (II) { + const TargetRegisterClass *SrcRC = + MRI.getRegClass(VReg); + const TargetRegisterClass *DstRC = + getInstrOperandRegClass(TRI, *II, IIOpNum); + assert((DstRC || (TID.isVariadic() && IIOpNum >= TID.getNumOperands())) && + "Don't have operand info for this instruction!"); + if (DstRC && SrcRC != DstRC && !SrcRC->hasSuperClass(DstRC)) { + unsigned NewVReg = MRI.createVirtualRegister(DstRC); + bool Emitted = TII->copyRegToReg(*BB, InsertPos, NewVReg, VReg, + DstRC, SrcRC); + assert(Emitted && "Unable to issue a copy instruction!\n"); + (void) Emitted; + VReg = NewVReg; + } + } + + MI->addOperand(MachineOperand::CreateReg(VReg, isOptDef)); +} + +/// AddOperand - Add the specified operand to the specified machine instr. II +/// specifies the instruction information for the node, and IIOpNum is the +/// operand number (in the II) that we are adding. IIOpNum and II are used for +/// assertions only. +void ScheduleDAGSDNodes::AddOperand(MachineInstr *MI, SDValue Op, + unsigned IIOpNum, + const TargetInstrDesc *II, + DenseMap &VRBaseMap) { + if (Op.isMachineOpcode()) { + AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap); + } else if (ConstantSDNode *C = dyn_cast(Op)) { + MI->addOperand(MachineOperand::CreateImm(C->getZExtValue())); + } else if (ConstantFPSDNode *F = dyn_cast(Op)) { + const ConstantFP *CFP = F->getConstantFPValue(); + MI->addOperand(MachineOperand::CreateFPImm(CFP)); + } else if (RegisterSDNode *R = dyn_cast(Op)) { + MI->addOperand(MachineOperand::CreateReg(R->getReg(), false)); + } else if (GlobalAddressSDNode *TGA = dyn_cast(Op)) { + MI->addOperand(MachineOperand::CreateGA(TGA->getGlobal(),TGA->getOffset())); + } else if (BasicBlockSDNode *BBNode = dyn_cast(Op)) { + MI->addOperand(MachineOperand::CreateMBB(BBNode->getBasicBlock())); + } else if (FrameIndexSDNode *FI = dyn_cast(Op)) { + MI->addOperand(MachineOperand::CreateFI(FI->getIndex())); + } else if (JumpTableSDNode *JT = dyn_cast(Op)) { + MI->addOperand(MachineOperand::CreateJTI(JT->getIndex())); + } else if (ConstantPoolSDNode *CP = dyn_cast(Op)) { + int Offset = CP->getOffset(); + unsigned Align = CP->getAlignment(); + const Type *Type = CP->getType(); + // MachineConstantPool wants an explicit alignment. + if (Align == 0) { + Align = TM.getTargetData()->getPrefTypeAlignment(Type); + if (Align == 0) { + // Alignment of vector types. FIXME! + Align = TM.getTargetData()->getTypeAllocSize(Type); + } + } + + unsigned Idx; + if (CP->isMachineConstantPoolEntry()) + Idx = ConstPool->getConstantPoolIndex(CP->getMachineCPVal(), Align); + else + Idx = ConstPool->getConstantPoolIndex(CP->getConstVal(), Align); + MI->addOperand(MachineOperand::CreateCPI(Idx, Offset)); + } else if (ExternalSymbolSDNode *ES = dyn_cast(Op)) { + MI->addOperand(MachineOperand::CreateES(ES->getSymbol())); + } else { + assert(Op.getValueType() != MVT::Other && + Op.getValueType() != MVT::Flag && + "Chain and flag operands should occur at end of operand list!"); + AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap); + } +} + +/// getSuperRegisterRegClass - Returns the register class of a superreg A whose +/// "SubIdx"'th sub-register class is the specified register class and whose +/// type matches the specified type. +static const TargetRegisterClass* +getSuperRegisterRegClass(const TargetRegisterClass *TRC, + unsigned SubIdx, MVT VT) { + // Pick the register class of the superegister for this type + for (TargetRegisterInfo::regclass_iterator I = TRC->superregclasses_begin(), + E = TRC->superregclasses_end(); I != E; ++I) + if ((*I)->hasType(VT) && (*I)->getSubRegisterRegClass(SubIdx) == TRC) + return *I; + assert(false && "Couldn't find the register class"); + return 0; +} + +/// EmitSubregNode - Generate machine code for subreg nodes. +/// +void ScheduleDAGSDNodes::EmitSubregNode(SDNode *Node, + DenseMap &VRBaseMap) { + unsigned VRBase = 0; + unsigned Opc = Node->getMachineOpcode(); + + // If the node is only used by a CopyToReg and the dest reg is a vreg, use + // the CopyToReg'd destination register instead of creating a new vreg. + for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end(); + UI != E; ++UI) { + SDNode *User = *UI; + if (User->getOpcode() == ISD::CopyToReg && + User->getOperand(2).getNode() == Node) { + unsigned DestReg = cast(User->getOperand(1))->getReg(); + if (TargetRegisterInfo::isVirtualRegister(DestReg)) { + VRBase = DestReg; + break; + } + } + } + + if (Opc == TargetInstrInfo::EXTRACT_SUBREG) { + unsigned SubIdx = cast(Node->getOperand(1))->getZExtValue(); + + // Create the extract_subreg machine instruction. + MachineInstr *MI = BuildMI(MF, Node->getDebugLoc(), + TII->get(TargetInstrInfo::EXTRACT_SUBREG)); + + // Figure out the register class to create for the destreg. + unsigned VReg = getVR(Node->getOperand(0), VRBaseMap); + const TargetRegisterClass *TRC = MRI.getRegClass(VReg); + const TargetRegisterClass *SRC = TRC->getSubRegisterRegClass(SubIdx); + assert(SRC && "Invalid subregister index in EXTRACT_SUBREG"); + + // Figure out the register class to create for the destreg. + // Note that if we're going to directly use an existing register, + // it must be precisely the required class, and not a subclass + // thereof. + if (VRBase == 0 || SRC != MRI.getRegClass(VRBase)) { + // Create the reg + assert(SRC && "Couldn't find source register class"); + VRBase = MRI.createVirtualRegister(SRC); + } + + // Add def, source, and subreg index + MI->addOperand(MachineOperand::CreateReg(VRBase, true)); + AddOperand(MI, Node->getOperand(0), 0, 0, VRBaseMap); + MI->addOperand(MachineOperand::CreateImm(SubIdx)); + BB->insert(InsertPos, MI); + } else if (Opc == TargetInstrInfo::INSERT_SUBREG || + Opc == TargetInstrInfo::SUBREG_TO_REG) { + SDValue N0 = Node->getOperand(0); + SDValue N1 = Node->getOperand(1); + SDValue N2 = Node->getOperand(2); + unsigned SubReg = getVR(N1, VRBaseMap); + unsigned SubIdx = cast(N2)->getZExtValue(); + const TargetRegisterClass *TRC = MRI.getRegClass(SubReg); + const TargetRegisterClass *SRC = + getSuperRegisterRegClass(TRC, SubIdx, + Node->getValueType(0)); + + // Figure out the register class to create for the destreg. + // Note that if we're going to directly use an existing register, + // it must be precisely the required class, and not a subclass + // thereof. + if (VRBase == 0 || SRC != MRI.getRegClass(VRBase)) { + // Create the reg + assert(SRC && "Couldn't find source register class"); + VRBase = MRI.createVirtualRegister(SRC); + } + + // Create the insert_subreg or subreg_to_reg machine instruction. + MachineInstr *MI = BuildMI(MF, Node->getDebugLoc(), TII->get(Opc)); + MI->addOperand(MachineOperand::CreateReg(VRBase, true)); + + // If creating a subreg_to_reg, then the first input operand + // is an implicit value immediate, otherwise it's a register + if (Opc == TargetInstrInfo::SUBREG_TO_REG) { + const ConstantSDNode *SD = cast(N0); + MI->addOperand(MachineOperand::CreateImm(SD->getZExtValue())); + } else + AddOperand(MI, N0, 0, 0, VRBaseMap); + // Add the subregster being inserted + AddOperand(MI, N1, 0, 0, VRBaseMap); + MI->addOperand(MachineOperand::CreateImm(SubIdx)); + BB->insert(InsertPos, MI); + } else + assert(0 && "Node is not insert_subreg, extract_subreg, or subreg_to_reg"); + + SDValue Op(Node, 0); + bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second; + isNew = isNew; // Silence compiler warning. + assert(isNew && "Node emitted out of order - early"); +} + +/// EmitCopyToRegClassNode - Generate machine code for COPY_TO_REGCLASS nodes. +/// COPY_TO_REGCLASS is just a normal copy, except that the destination +/// register is constrained to be in a particular register class. +/// +void +ScheduleDAGSDNodes::EmitCopyToRegClassNode(SDNode *Node, + DenseMap &VRBaseMap) { + unsigned VReg = getVR(Node->getOperand(0), VRBaseMap); + const TargetRegisterClass *SrcRC = MRI.getRegClass(VReg); + + unsigned DstRCIdx = cast(Node->getOperand(1))->getZExtValue(); + const TargetRegisterClass *DstRC = TRI->getRegClass(DstRCIdx); + + // Create the new VReg in the destination class and emit a copy. + unsigned NewVReg = MRI.createVirtualRegister(DstRC); + bool Emitted = TII->copyRegToReg(*BB, InsertPos, NewVReg, VReg, + DstRC, SrcRC); + assert(Emitted && + "Unable to issue a copy instruction for a COPY_TO_REGCLASS node!\n"); + (void) Emitted; + + SDValue Op(Node, 0); + bool isNew = VRBaseMap.insert(std::make_pair(Op, NewVReg)).second; + isNew = isNew; // Silence compiler warning. + assert(isNew && "Node emitted out of order - early"); +} + +/// EmitNode - Generate machine code for an node and needed dependencies. +/// +void ScheduleDAGSDNodes::EmitNode(SDNode *Node, bool IsClone, bool IsCloned, + DenseMap &VRBaseMap) { + // If machine instruction + if (Node->isMachineOpcode()) { + unsigned Opc = Node->getMachineOpcode(); + + // Handle subreg insert/extract specially + if (Opc == TargetInstrInfo::EXTRACT_SUBREG || + Opc == TargetInstrInfo::INSERT_SUBREG || + Opc == TargetInstrInfo::SUBREG_TO_REG) { + EmitSubregNode(Node, VRBaseMap); + return; + } + + // Handle COPY_TO_REGCLASS specially. + if (Opc == TargetInstrInfo::COPY_TO_REGCLASS) { + EmitCopyToRegClassNode(Node, VRBaseMap); + return; + } + + if (Opc == TargetInstrInfo::IMPLICIT_DEF) + // We want a unique VR for each IMPLICIT_DEF use. + return; + + const TargetInstrDesc &II = TII->get(Opc); + unsigned NumResults = CountResults(Node); + unsigned NodeOperands = CountOperands(Node); + unsigned MemOperandsEnd = ComputeMemOperandsEnd(Node); + bool HasPhysRegOuts = (NumResults > II.getNumDefs()) && + II.getImplicitDefs() != 0; +#ifndef NDEBUG + unsigned NumMIOperands = NodeOperands + NumResults; + assert((II.getNumOperands() == NumMIOperands || + HasPhysRegOuts || II.isVariadic()) && + "#operands for dag node doesn't match .td file!"); +#endif + + // Create the new machine instruction. + MachineInstr *MI = BuildMI(MF, Node->getDebugLoc(), II); + + // Add result register values for things that are defined by this + // instruction. + if (NumResults) + CreateVirtualRegisters(Node, MI, II, IsClone, IsCloned, VRBaseMap); + + // Emit all of the actual operands of this instruction, adding them to the + // instruction as appropriate. + for (unsigned i = 0; i != NodeOperands; ++i) + AddOperand(MI, Node->getOperand(i), i+II.getNumDefs(), &II, VRBaseMap); + + // Emit all of the memory operands of this instruction + for (unsigned i = NodeOperands; i != MemOperandsEnd; ++i) + AddMemOperand(MI, cast(Node->getOperand(i))->MO); + + if (II.usesCustomDAGSchedInsertionHook()) { + // Insert this instruction into the basic block using a target + // specific inserter which may returns a new basic block. + BB = TLI->EmitInstrWithCustomInserter(MI, BB); + InsertPos = BB->end(); + } else { + BB->insert(InsertPos, MI); + } + + // Additional results must be an physical register def. + if (HasPhysRegOuts) { + for (unsigned i = II.getNumDefs(); i < NumResults; ++i) { + unsigned Reg = II.getImplicitDefs()[i - II.getNumDefs()]; + if (Node->hasAnyUseOfValue(i)) + EmitCopyFromReg(Node, i, IsClone, IsCloned, Reg, VRBaseMap); + } + } + return; + } + + switch (Node->getOpcode()) { + default: +#ifndef NDEBUG + Node->dump(DAG); +#endif + assert(0 && "This target-independent node should have been selected!"); + break; + case ISD::EntryToken: + assert(0 && "EntryToken should have been excluded from the schedule!"); + break; + case ISD::TokenFactor: // fall thru + break; + case ISD::CopyToReg: { + unsigned SrcReg; + SDValue SrcVal = Node->getOperand(2); + if (RegisterSDNode *R = dyn_cast(SrcVal)) + SrcReg = R->getReg(); + else + SrcReg = getVR(SrcVal, VRBaseMap); + + unsigned DestReg = cast(Node->getOperand(1))->getReg(); + if (SrcReg == DestReg) // Coalesced away the copy? Ignore. + break; + + const TargetRegisterClass *SrcTRC = 0, *DstTRC = 0; + // Get the register classes of the src/dst. + if (TargetRegisterInfo::isVirtualRegister(SrcReg)) + SrcTRC = MRI.getRegClass(SrcReg); + else + SrcTRC = TRI->getPhysicalRegisterRegClass(SrcReg,SrcVal.getValueType()); + + if (TargetRegisterInfo::isVirtualRegister(DestReg)) + DstTRC = MRI.getRegClass(DestReg); + else + DstTRC = TRI->getPhysicalRegisterRegClass(DestReg, + Node->getOperand(1).getValueType()); + + bool Emitted = TII->copyRegToReg(*BB, InsertPos, DestReg, SrcReg, + DstTRC, SrcTRC); + assert(Emitted && "Unable to issue a copy instruction!\n"); + (void) Emitted; + break; + } + case ISD::CopyFromReg: { + unsigned SrcReg = cast(Node->getOperand(1))->getReg(); + EmitCopyFromReg(Node, 0, IsClone, IsCloned, SrcReg, VRBaseMap); + break; + } + case ISD::INLINEASM: { + unsigned NumOps = Node->getNumOperands(); + if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag) + --NumOps; // Ignore the flag operand. + + // Create the inline asm machine instruction. + MachineInstr *MI = BuildMI(MF, Node->getDebugLoc(), + TII->get(TargetInstrInfo::INLINEASM)); + + // Add the asm string as an external symbol operand. + const char *AsmStr = + cast(Node->getOperand(1))->getSymbol(); + MI->addOperand(MachineOperand::CreateES(AsmStr)); + + // Add all of the operand registers to the instruction. + for (unsigned i = 2; i != NumOps;) { + unsigned Flags = + cast(Node->getOperand(i))->getZExtValue(); + unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); + + MI->addOperand(MachineOperand::CreateImm(Flags)); + ++i; // Skip the ID value. + + switch (Flags & 7) { + default: assert(0 && "Bad flags!"); + case 2: // Def of register. + for (; NumVals; --NumVals, ++i) { + unsigned Reg = cast(Node->getOperand(i))->getReg(); + MI->addOperand(MachineOperand::CreateReg(Reg, true)); + } + break; + case 6: // Def of earlyclobber register. + for (; NumVals; --NumVals, ++i) { + unsigned Reg = cast(Node->getOperand(i))->getReg(); + MI->addOperand(MachineOperand::CreateReg(Reg, true, false, false, + false, 0, true)); + } + break; + case 1: // Use of register. + case 3: // Immediate. + case 4: // Addressing mode. + // The addressing mode has been selected, just add all of the + // operands to the machine instruction. + for (; NumVals; --NumVals, ++i) + AddOperand(MI, Node->getOperand(i), 0, 0, VRBaseMap); + break; + } + } + BB->insert(InsertPos, MI); + break; + } + } +} + +/// EmitSchedule - Emit the machine code in scheduled order. +MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() { + DenseMap VRBaseMap; + DenseMap CopyVRBaseMap; + for (unsigned i = 0, e = Sequence.size(); i != e; i++) { + SUnit *SU = Sequence[i]; + if (!SU) { + // Null SUnit* is a noop. + EmitNoop(); + continue; + } + + // For pre-regalloc scheduling, create instructions corresponding to the + // SDNode and any flagged SDNodes and append them to the block. + if (!SU->getNode()) { + // Emit a copy. + EmitPhysRegCopy(SU, CopyVRBaseMap); + continue; + } + + SmallVector FlaggedNodes; + for (SDNode *N = SU->getNode()->getFlaggedNode(); N; + N = N->getFlaggedNode()) + FlaggedNodes.push_back(N); + while (!FlaggedNodes.empty()) { + EmitNode(FlaggedNodes.back(), SU->OrigNode != SU, SU->isCloned,VRBaseMap); + FlaggedNodes.pop_back(); + } + EmitNode(SU->getNode(), SU->OrigNode != SU, SU->isCloned, VRBaseMap); + } + + return BB; +} diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp new file mode 100644 index 000000000000..195896ee89dc --- /dev/null +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -0,0 +1,5743 @@ +//===-- SelectionDAG.cpp - Implement the SelectionDAG data structures -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the SelectionDAG class. +// +//===----------------------------------------------------------------------===// +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/Constants.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/GlobalAlias.h" +#include "llvm/GlobalVariable.h" +#include "llvm/Intrinsics.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/CallingConv.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include +#include +using namespace llvm; + +/// makeVTList - Return an instance of the SDVTList struct initialized with the +/// specified members. +static SDVTList makeVTList(const MVT *VTs, unsigned NumVTs) { + SDVTList Res = {VTs, NumVTs}; + return Res; +} + +static const fltSemantics *MVTToAPFloatSemantics(MVT VT) { + switch (VT.getSimpleVT()) { + default: assert(0 && "Unknown FP format"); + case MVT::f32: return &APFloat::IEEEsingle; + case MVT::f64: return &APFloat::IEEEdouble; + case MVT::f80: return &APFloat::x87DoubleExtended; + case MVT::f128: return &APFloat::IEEEquad; + case MVT::ppcf128: return &APFloat::PPCDoubleDouble; + } +} + +SelectionDAG::DAGUpdateListener::~DAGUpdateListener() {} + +//===----------------------------------------------------------------------===// +// ConstantFPSDNode Class +//===----------------------------------------------------------------------===// + +/// isExactlyValue - We don't rely on operator== working on double values, as +/// it returns true for things that are clearly not equal, like -0.0 and 0.0. +/// As such, this method can be used to do an exact bit-for-bit comparison of +/// two floating point values. +bool ConstantFPSDNode::isExactlyValue(const APFloat& V) const { + return getValueAPF().bitwiseIsEqual(V); +} + +bool ConstantFPSDNode::isValueValidForType(MVT VT, + const APFloat& Val) { + assert(VT.isFloatingPoint() && "Can only convert between FP types"); + + // PPC long double cannot be converted to any other type. + if (VT == MVT::ppcf128 || + &Val.getSemantics() == &APFloat::PPCDoubleDouble) + return false; + + // convert modifies in place, so make a copy. + APFloat Val2 = APFloat(Val); + bool losesInfo; + (void) Val2.convert(*MVTToAPFloatSemantics(VT), APFloat::rmNearestTiesToEven, + &losesInfo); + return !losesInfo; +} + +//===----------------------------------------------------------------------===// +// ISD Namespace +//===----------------------------------------------------------------------===// + +/// isBuildVectorAllOnes - Return true if the specified node is a +/// BUILD_VECTOR where all of the elements are ~0 or undef. +bool ISD::isBuildVectorAllOnes(const SDNode *N) { + // Look through a bit convert. + if (N->getOpcode() == ISD::BIT_CONVERT) + N = N->getOperand(0).getNode(); + + if (N->getOpcode() != ISD::BUILD_VECTOR) return false; + + unsigned i = 0, e = N->getNumOperands(); + + // Skip over all of the undef values. + while (i != e && N->getOperand(i).getOpcode() == ISD::UNDEF) + ++i; + + // Do not accept an all-undef vector. + if (i == e) return false; + + // Do not accept build_vectors that aren't all constants or which have non-~0 + // elements. + SDValue NotZero = N->getOperand(i); + if (isa(NotZero)) { + if (!cast(NotZero)->isAllOnesValue()) + return false; + } else if (isa(NotZero)) { + if (!cast(NotZero)->getValueAPF(). + bitcastToAPInt().isAllOnesValue()) + return false; + } else + return false; + + // Okay, we have at least one ~0 value, check to see if the rest match or are + // undefs. + for (++i; i != e; ++i) + if (N->getOperand(i) != NotZero && + N->getOperand(i).getOpcode() != ISD::UNDEF) + return false; + return true; +} + + +/// isBuildVectorAllZeros - Return true if the specified node is a +/// BUILD_VECTOR where all of the elements are 0 or undef. +bool ISD::isBuildVectorAllZeros(const SDNode *N) { + // Look through a bit convert. + if (N->getOpcode() == ISD::BIT_CONVERT) + N = N->getOperand(0).getNode(); + + if (N->getOpcode() != ISD::BUILD_VECTOR) return false; + + unsigned i = 0, e = N->getNumOperands(); + + // Skip over all of the undef values. + while (i != e && N->getOperand(i).getOpcode() == ISD::UNDEF) + ++i; + + // Do not accept an all-undef vector. + if (i == e) return false; + + // Do not accept build_vectors that aren't all constants or which have non-~0 + // elements. + SDValue Zero = N->getOperand(i); + if (isa(Zero)) { + if (!cast(Zero)->isNullValue()) + return false; + } else if (isa(Zero)) { + if (!cast(Zero)->getValueAPF().isPosZero()) + return false; + } else + return false; + + // Okay, we have at least one ~0 value, check to see if the rest match or are + // undefs. + for (++i; i != e; ++i) + if (N->getOperand(i) != Zero && + N->getOperand(i).getOpcode() != ISD::UNDEF) + return false; + return true; +} + +/// isScalarToVector - Return true if the specified node is a +/// ISD::SCALAR_TO_VECTOR node or a BUILD_VECTOR node where only the low +/// element is not an undef. +bool ISD::isScalarToVector(const SDNode *N) { + if (N->getOpcode() == ISD::SCALAR_TO_VECTOR) + return true; + + if (N->getOpcode() != ISD::BUILD_VECTOR) + return false; + if (N->getOperand(0).getOpcode() == ISD::UNDEF) + return false; + unsigned NumElems = N->getNumOperands(); + for (unsigned i = 1; i < NumElems; ++i) { + SDValue V = N->getOperand(i); + if (V.getOpcode() != ISD::UNDEF) + return false; + } + return true; +} + + +/// isDebugLabel - Return true if the specified node represents a debug +/// label (i.e. ISD::DBG_LABEL or TargetInstrInfo::DBG_LABEL node). +bool ISD::isDebugLabel(const SDNode *N) { + SDValue Zero; + if (N->getOpcode() == ISD::DBG_LABEL) + return true; + if (N->isMachineOpcode() && + N->getMachineOpcode() == TargetInstrInfo::DBG_LABEL) + return true; + return false; +} + +/// getSetCCSwappedOperands - Return the operation corresponding to (Y op X) +/// when given the operation for (X op Y). +ISD::CondCode ISD::getSetCCSwappedOperands(ISD::CondCode Operation) { + // To perform this operation, we just need to swap the L and G bits of the + // operation. + unsigned OldL = (Operation >> 2) & 1; + unsigned OldG = (Operation >> 1) & 1; + return ISD::CondCode((Operation & ~6) | // Keep the N, U, E bits + (OldL << 1) | // New G bit + (OldG << 2)); // New L bit. +} + +/// getSetCCInverse - Return the operation corresponding to !(X op Y), where +/// 'op' is a valid SetCC operation. +ISD::CondCode ISD::getSetCCInverse(ISD::CondCode Op, bool isInteger) { + unsigned Operation = Op; + if (isInteger) + Operation ^= 7; // Flip L, G, E bits, but not U. + else + Operation ^= 15; // Flip all of the condition bits. + + if (Operation > ISD::SETTRUE2) + Operation &= ~8; // Don't let N and U bits get set. + + return ISD::CondCode(Operation); +} + + +/// isSignedOp - For an integer comparison, return 1 if the comparison is a +/// signed operation and 2 if the result is an unsigned comparison. Return zero +/// if the operation does not depend on the sign of the input (setne and seteq). +static int isSignedOp(ISD::CondCode Opcode) { + switch (Opcode) { + default: assert(0 && "Illegal integer setcc operation!"); + case ISD::SETEQ: + case ISD::SETNE: return 0; + case ISD::SETLT: + case ISD::SETLE: + case ISD::SETGT: + case ISD::SETGE: return 1; + case ISD::SETULT: + case ISD::SETULE: + case ISD::SETUGT: + case ISD::SETUGE: return 2; + } +} + +/// getSetCCOrOperation - Return the result of a logical OR between different +/// comparisons of identical values: ((X op1 Y) | (X op2 Y)). This function +/// returns SETCC_INVALID if it is not possible to represent the resultant +/// comparison. +ISD::CondCode ISD::getSetCCOrOperation(ISD::CondCode Op1, ISD::CondCode Op2, + bool isInteger) { + if (isInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3) + // Cannot fold a signed integer setcc with an unsigned integer setcc. + return ISD::SETCC_INVALID; + + unsigned Op = Op1 | Op2; // Combine all of the condition bits. + + // If the N and U bits get set then the resultant comparison DOES suddenly + // care about orderedness, and is true when ordered. + if (Op > ISD::SETTRUE2) + Op &= ~16; // Clear the U bit if the N bit is set. + + // Canonicalize illegal integer setcc's. + if (isInteger && Op == ISD::SETUNE) // e.g. SETUGT | SETULT + Op = ISD::SETNE; + + return ISD::CondCode(Op); +} + +/// getSetCCAndOperation - Return the result of a logical AND between different +/// comparisons of identical values: ((X op1 Y) & (X op2 Y)). This +/// function returns zero if it is not possible to represent the resultant +/// comparison. +ISD::CondCode ISD::getSetCCAndOperation(ISD::CondCode Op1, ISD::CondCode Op2, + bool isInteger) { + if (isInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3) + // Cannot fold a signed setcc with an unsigned setcc. + return ISD::SETCC_INVALID; + + // Combine all of the condition bits. + ISD::CondCode Result = ISD::CondCode(Op1 & Op2); + + // Canonicalize illegal integer setcc's. + if (isInteger) { + switch (Result) { + default: break; + case ISD::SETUO : Result = ISD::SETFALSE; break; // SETUGT & SETULT + case ISD::SETOEQ: // SETEQ & SETU[LG]E + case ISD::SETUEQ: Result = ISD::SETEQ ; break; // SETUGE & SETULE + case ISD::SETOLT: Result = ISD::SETULT ; break; // SETULT & SETNE + case ISD::SETOGT: Result = ISD::SETUGT ; break; // SETUGT & SETNE + } + } + + return Result; +} + +const TargetMachine &SelectionDAG::getTarget() const { + return MF->getTarget(); +} + +//===----------------------------------------------------------------------===// +// SDNode Profile Support +//===----------------------------------------------------------------------===// + +/// AddNodeIDOpcode - Add the node opcode to the NodeID data. +/// +static void AddNodeIDOpcode(FoldingSetNodeID &ID, unsigned OpC) { + ID.AddInteger(OpC); +} + +/// AddNodeIDValueTypes - Value type lists are intern'd so we can represent them +/// solely with their pointer. +static void AddNodeIDValueTypes(FoldingSetNodeID &ID, SDVTList VTList) { + ID.AddPointer(VTList.VTs); +} + +/// AddNodeIDOperands - Various routines for adding operands to the NodeID data. +/// +static void AddNodeIDOperands(FoldingSetNodeID &ID, + const SDValue *Ops, unsigned NumOps) { + for (; NumOps; --NumOps, ++Ops) { + ID.AddPointer(Ops->getNode()); + ID.AddInteger(Ops->getResNo()); + } +} + +/// AddNodeIDOperands - Various routines for adding operands to the NodeID data. +/// +static void AddNodeIDOperands(FoldingSetNodeID &ID, + const SDUse *Ops, unsigned NumOps) { + for (; NumOps; --NumOps, ++Ops) { + ID.AddPointer(Ops->getNode()); + ID.AddInteger(Ops->getResNo()); + } +} + +static void AddNodeIDNode(FoldingSetNodeID &ID, + unsigned short OpC, SDVTList VTList, + const SDValue *OpList, unsigned N) { + AddNodeIDOpcode(ID, OpC); + AddNodeIDValueTypes(ID, VTList); + AddNodeIDOperands(ID, OpList, N); +} + +/// AddNodeIDCustom - If this is an SDNode with special info, add this info to +/// the NodeID data. +static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { + switch (N->getOpcode()) { + default: break; // Normal nodes don't need extra info. + case ISD::ARG_FLAGS: + ID.AddInteger(cast(N)->getArgFlags().getRawBits()); + break; + case ISD::TargetConstant: + case ISD::Constant: + ID.AddPointer(cast(N)->getConstantIntValue()); + break; + case ISD::TargetConstantFP: + case ISD::ConstantFP: { + ID.AddPointer(cast(N)->getConstantFPValue()); + break; + } + case ISD::TargetGlobalAddress: + case ISD::GlobalAddress: + case ISD::TargetGlobalTLSAddress: + case ISD::GlobalTLSAddress: { + const GlobalAddressSDNode *GA = cast(N); + ID.AddPointer(GA->getGlobal()); + ID.AddInteger(GA->getOffset()); + break; + } + case ISD::BasicBlock: + ID.AddPointer(cast(N)->getBasicBlock()); + break; + case ISD::Register: + ID.AddInteger(cast(N)->getReg()); + break; + case ISD::DBG_STOPPOINT: { + const DbgStopPointSDNode *DSP = cast(N); + ID.AddInteger(DSP->getLine()); + ID.AddInteger(DSP->getColumn()); + ID.AddPointer(DSP->getCompileUnit()); + break; + } + case ISD::SRCVALUE: + ID.AddPointer(cast(N)->getValue()); + break; + case ISD::MEMOPERAND: { + const MachineMemOperand &MO = cast(N)->MO; + MO.Profile(ID); + break; + } + case ISD::FrameIndex: + case ISD::TargetFrameIndex: + ID.AddInteger(cast(N)->getIndex()); + break; + case ISD::JumpTable: + case ISD::TargetJumpTable: + ID.AddInteger(cast(N)->getIndex()); + break; + case ISD::ConstantPool: + case ISD::TargetConstantPool: { + const ConstantPoolSDNode *CP = cast(N); + ID.AddInteger(CP->getAlignment()); + ID.AddInteger(CP->getOffset()); + if (CP->isMachineConstantPoolEntry()) + CP->getMachineCPVal()->AddSelectionDAGCSEId(ID); + else + ID.AddPointer(CP->getConstVal()); + break; + } + case ISD::CALL: { + const CallSDNode *Call = cast(N); + ID.AddInteger(Call->getCallingConv()); + ID.AddInteger(Call->isVarArg()); + break; + } + case ISD::LOAD: { + const LoadSDNode *LD = cast(N); + ID.AddInteger(LD->getMemoryVT().getRawBits()); + ID.AddInteger(LD->getRawSubclassData()); + break; + } + case ISD::STORE: { + const StoreSDNode *ST = cast(N); + ID.AddInteger(ST->getMemoryVT().getRawBits()); + ID.AddInteger(ST->getRawSubclassData()); + break; + } + case ISD::ATOMIC_CMP_SWAP: + case ISD::ATOMIC_SWAP: + case ISD::ATOMIC_LOAD_ADD: + case ISD::ATOMIC_LOAD_SUB: + case ISD::ATOMIC_LOAD_AND: + case ISD::ATOMIC_LOAD_OR: + case ISD::ATOMIC_LOAD_XOR: + case ISD::ATOMIC_LOAD_NAND: + case ISD::ATOMIC_LOAD_MIN: + case ISD::ATOMIC_LOAD_MAX: + case ISD::ATOMIC_LOAD_UMIN: + case ISD::ATOMIC_LOAD_UMAX: { + const AtomicSDNode *AT = cast(N); + ID.AddInteger(AT->getMemoryVT().getRawBits()); + ID.AddInteger(AT->getRawSubclassData()); + break; + } + case ISD::VECTOR_SHUFFLE: { + const ShuffleVectorSDNode *SVN = cast(N); + for (unsigned i = 0, e = N->getValueType(0).getVectorNumElements(); + i != e; ++i) + ID.AddInteger(SVN->getMaskElt(i)); + break; + } + } // end switch (N->getOpcode()) +} + +/// AddNodeIDNode - Generic routine for adding a nodes info to the NodeID +/// data. +static void AddNodeIDNode(FoldingSetNodeID &ID, const SDNode *N) { + AddNodeIDOpcode(ID, N->getOpcode()); + // Add the return value info. + AddNodeIDValueTypes(ID, N->getVTList()); + // Add the operand info. + AddNodeIDOperands(ID, N->op_begin(), N->getNumOperands()); + + // Handle SDNode leafs with special info. + AddNodeIDCustom(ID, N); +} + +/// encodeMemSDNodeFlags - Generic routine for computing a value for use in +/// the CSE map that carries alignment, volatility, indexing mode, and +/// extension/truncation information. +/// +static inline unsigned +encodeMemSDNodeFlags(int ConvType, ISD::MemIndexedMode AM, + bool isVolatile, unsigned Alignment) { + assert((ConvType & 3) == ConvType && + "ConvType may not require more than 2 bits!"); + assert((AM & 7) == AM && + "AM may not require more than 3 bits!"); + return ConvType | + (AM << 2) | + (isVolatile << 5) | + ((Log2_32(Alignment) + 1) << 6); +} + +//===----------------------------------------------------------------------===// +// SelectionDAG Class +//===----------------------------------------------------------------------===// + +/// doNotCSE - Return true if CSE should not be performed for this node. +static bool doNotCSE(SDNode *N) { + if (N->getValueType(0) == MVT::Flag) + return true; // Never CSE anything that produces a flag. + + switch (N->getOpcode()) { + default: break; + case ISD::HANDLENODE: + case ISD::DBG_LABEL: + case ISD::DBG_STOPPOINT: + case ISD::EH_LABEL: + case ISD::DECLARE: + return true; // Never CSE these nodes. + } + + // Check that remaining values produced are not flags. + for (unsigned i = 1, e = N->getNumValues(); i != e; ++i) + if (N->getValueType(i) == MVT::Flag) + return true; // Never CSE anything that produces a flag. + + return false; +} + +/// RemoveDeadNodes - This method deletes all unreachable nodes in the +/// SelectionDAG. +void SelectionDAG::RemoveDeadNodes() { + // Create a dummy node (which is not added to allnodes), that adds a reference + // to the root node, preventing it from being deleted. + HandleSDNode Dummy(getRoot()); + + SmallVector DeadNodes; + + // Add all obviously-dead nodes to the DeadNodes worklist. + for (allnodes_iterator I = allnodes_begin(), E = allnodes_end(); I != E; ++I) + if (I->use_empty()) + DeadNodes.push_back(I); + + RemoveDeadNodes(DeadNodes); + + // If the root changed (e.g. it was a dead load, update the root). + setRoot(Dummy.getValue()); +} + +/// RemoveDeadNodes - This method deletes the unreachable nodes in the +/// given list, and any nodes that become unreachable as a result. +void SelectionDAG::RemoveDeadNodes(SmallVectorImpl &DeadNodes, + DAGUpdateListener *UpdateListener) { + + // Process the worklist, deleting the nodes and adding their uses to the + // worklist. + while (!DeadNodes.empty()) { + SDNode *N = DeadNodes.pop_back_val(); + + if (UpdateListener) + UpdateListener->NodeDeleted(N, 0); + + // Take the node out of the appropriate CSE map. + RemoveNodeFromCSEMaps(N); + + // Next, brutally remove the operand list. This is safe to do, as there are + // no cycles in the graph. + for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ) { + SDUse &Use = *I++; + SDNode *Operand = Use.getNode(); + Use.set(SDValue()); + + // Now that we removed this operand, see if there are no uses of it left. + if (Operand->use_empty()) + DeadNodes.push_back(Operand); + } + + DeallocateNode(N); + } +} + +void SelectionDAG::RemoveDeadNode(SDNode *N, DAGUpdateListener *UpdateListener){ + SmallVector DeadNodes(1, N); + RemoveDeadNodes(DeadNodes, UpdateListener); +} + +void SelectionDAG::DeleteNode(SDNode *N) { + // First take this out of the appropriate CSE map. + RemoveNodeFromCSEMaps(N); + + // Finally, remove uses due to operands of this node, remove from the + // AllNodes list, and delete the node. + DeleteNodeNotInCSEMaps(N); +} + +void SelectionDAG::DeleteNodeNotInCSEMaps(SDNode *N) { + assert(N != AllNodes.begin() && "Cannot delete the entry node!"); + assert(N->use_empty() && "Cannot delete a node that is not dead!"); + + // Drop all of the operands and decrement used node's use counts. + N->DropOperands(); + + DeallocateNode(N); +} + +void SelectionDAG::DeallocateNode(SDNode *N) { + if (N->OperandsNeedDelete) + delete[] N->OperandList; + + // Set the opcode to DELETED_NODE to help catch bugs when node + // memory is reallocated. + N->NodeType = ISD::DELETED_NODE; + + NodeAllocator.Deallocate(AllNodes.remove(N)); +} + +/// RemoveNodeFromCSEMaps - Take the specified node out of the CSE map that +/// correspond to it. This is useful when we're about to delete or repurpose +/// the node. We don't want future request for structurally identical nodes +/// to return N anymore. +bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) { + bool Erased = false; + switch (N->getOpcode()) { + case ISD::EntryToken: + assert(0 && "EntryToken should not be in CSEMaps!"); + return false; + case ISD::HANDLENODE: return false; // noop. + case ISD::CONDCODE: + assert(CondCodeNodes[cast(N)->get()] && + "Cond code doesn't exist!"); + Erased = CondCodeNodes[cast(N)->get()] != 0; + CondCodeNodes[cast(N)->get()] = 0; + break; + case ISD::ExternalSymbol: + Erased = ExternalSymbols.erase(cast(N)->getSymbol()); + break; + case ISD::TargetExternalSymbol: + Erased = + TargetExternalSymbols.erase(cast(N)->getSymbol()); + break; + case ISD::VALUETYPE: { + MVT VT = cast(N)->getVT(); + if (VT.isExtended()) { + Erased = ExtendedValueTypeNodes.erase(VT); + } else { + Erased = ValueTypeNodes[VT.getSimpleVT()] != 0; + ValueTypeNodes[VT.getSimpleVT()] = 0; + } + break; + } + default: + // Remove it from the CSE Map. + Erased = CSEMap.RemoveNode(N); + break; + } +#ifndef NDEBUG + // Verify that the node was actually in one of the CSE maps, unless it has a + // flag result (which cannot be CSE'd) or is one of the special cases that are + // not subject to CSE. + if (!Erased && N->getValueType(N->getNumValues()-1) != MVT::Flag && + !N->isMachineOpcode() && !doNotCSE(N)) { + N->dump(this); + cerr << "\n"; + assert(0 && "Node is not in map!"); + } +#endif + return Erased; +} + +/// AddModifiedNodeToCSEMaps - The specified node has been removed from the CSE +/// maps and modified in place. Add it back to the CSE maps, unless an identical +/// node already exists, in which case transfer all its users to the existing +/// node. This transfer can potentially trigger recursive merging. +/// +void +SelectionDAG::AddModifiedNodeToCSEMaps(SDNode *N, + DAGUpdateListener *UpdateListener) { + // For node types that aren't CSE'd, just act as if no identical node + // already exists. + if (!doNotCSE(N)) { + SDNode *Existing = CSEMap.GetOrInsertNode(N); + if (Existing != N) { + // If there was already an existing matching node, use ReplaceAllUsesWith + // to replace the dead one with the existing one. This can cause + // recursive merging of other unrelated nodes down the line. + ReplaceAllUsesWith(N, Existing, UpdateListener); + + // N is now dead. Inform the listener if it exists and delete it. + if (UpdateListener) + UpdateListener->NodeDeleted(N, Existing); + DeleteNodeNotInCSEMaps(N); + return; + } + } + + // If the node doesn't already exist, we updated it. Inform a listener if + // it exists. + if (UpdateListener) + UpdateListener->NodeUpdated(N); +} + +/// FindModifiedNodeSlot - Find a slot for the specified node if its operands +/// were replaced with those specified. If this node is never memoized, +/// return null, otherwise return a pointer to the slot it would take. If a +/// node already exists with these operands, the slot will be non-null. +SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, SDValue Op, + void *&InsertPos) { + if (doNotCSE(N)) + return 0; + + SDValue Ops[] = { Op }; + FoldingSetNodeID ID; + AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, 1); + AddNodeIDCustom(ID, N); + return CSEMap.FindNodeOrInsertPos(ID, InsertPos); +} + +/// FindModifiedNodeSlot - Find a slot for the specified node if its operands +/// were replaced with those specified. If this node is never memoized, +/// return null, otherwise return a pointer to the slot it would take. If a +/// node already exists with these operands, the slot will be non-null. +SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, + SDValue Op1, SDValue Op2, + void *&InsertPos) { + if (doNotCSE(N)) + return 0; + + SDValue Ops[] = { Op1, Op2 }; + FoldingSetNodeID ID; + AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, 2); + AddNodeIDCustom(ID, N); + return CSEMap.FindNodeOrInsertPos(ID, InsertPos); +} + + +/// FindModifiedNodeSlot - Find a slot for the specified node if its operands +/// were replaced with those specified. If this node is never memoized, +/// return null, otherwise return a pointer to the slot it would take. If a +/// node already exists with these operands, the slot will be non-null. +SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, + const SDValue *Ops,unsigned NumOps, + void *&InsertPos) { + if (doNotCSE(N)) + return 0; + + FoldingSetNodeID ID; + AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, NumOps); + AddNodeIDCustom(ID, N); + return CSEMap.FindNodeOrInsertPos(ID, InsertPos); +} + +/// VerifyNode - Sanity check the given node. Aborts if it is invalid. +void SelectionDAG::VerifyNode(SDNode *N) { + switch (N->getOpcode()) { + default: + break; + case ISD::BUILD_PAIR: { + MVT VT = N->getValueType(0); + assert(N->getNumValues() == 1 && "Too many results!"); + assert(!VT.isVector() && (VT.isInteger() || VT.isFloatingPoint()) && + "Wrong return type!"); + assert(N->getNumOperands() == 2 && "Wrong number of operands!"); + assert(N->getOperand(0).getValueType() == N->getOperand(1).getValueType() && + "Mismatched operand types!"); + assert(N->getOperand(0).getValueType().isInteger() == VT.isInteger() && + "Wrong operand type!"); + assert(VT.getSizeInBits() == 2 * N->getOperand(0).getValueSizeInBits() && + "Wrong return type size"); + break; + } + case ISD::BUILD_VECTOR: { + assert(N->getNumValues() == 1 && "Too many results!"); + assert(N->getValueType(0).isVector() && "Wrong return type!"); + assert(N->getNumOperands() == N->getValueType(0).getVectorNumElements() && + "Wrong number of operands!"); + MVT EltVT = N->getValueType(0).getVectorElementType(); + for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ++I) + assert((I->getValueType() == EltVT || + (EltVT.isInteger() && I->getValueType().isInteger() && + EltVT.bitsLE(I->getValueType()))) && + "Wrong operand type!"); + break; + } + } +} + +/// getMVTAlignment - Compute the default alignment value for the +/// given type. +/// +unsigned SelectionDAG::getMVTAlignment(MVT VT) const { + const Type *Ty = VT == MVT::iPTR ? + PointerType::get(Type::Int8Ty, 0) : + VT.getTypeForMVT(); + + return TLI.getTargetData()->getABITypeAlignment(Ty); +} + +// EntryNode could meaningfully have debug info if we can find it... +SelectionDAG::SelectionDAG(TargetLowering &tli, FunctionLoweringInfo &fli) + : TLI(tli), FLI(fli), DW(0), + EntryNode(ISD::EntryToken, DebugLoc::getUnknownLoc(), + getVTList(MVT::Other)), Root(getEntryNode()) { + AllNodes.push_back(&EntryNode); +} + +void SelectionDAG::init(MachineFunction &mf, MachineModuleInfo *mmi, + DwarfWriter *dw) { + MF = &mf; + MMI = mmi; + DW = dw; +} + +SelectionDAG::~SelectionDAG() { + allnodes_clear(); +} + +void SelectionDAG::allnodes_clear() { + assert(&*AllNodes.begin() == &EntryNode); + AllNodes.remove(AllNodes.begin()); + while (!AllNodes.empty()) + DeallocateNode(AllNodes.begin()); +} + +void SelectionDAG::clear() { + allnodes_clear(); + OperandAllocator.Reset(); + CSEMap.clear(); + + ExtendedValueTypeNodes.clear(); + ExternalSymbols.clear(); + TargetExternalSymbols.clear(); + std::fill(CondCodeNodes.begin(), CondCodeNodes.end(), + static_cast(0)); + std::fill(ValueTypeNodes.begin(), ValueTypeNodes.end(), + static_cast(0)); + + EntryNode.UseList = 0; + AllNodes.push_back(&EntryNode); + Root = getEntryNode(); +} + +SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, DebugLoc DL, MVT VT) { + if (Op.getValueType() == VT) return Op; + APInt Imm = APInt::getLowBitsSet(Op.getValueSizeInBits(), + VT.getSizeInBits()); + return getNode(ISD::AND, DL, Op.getValueType(), Op, + getConstant(Imm, Op.getValueType())); +} + +/// getNOT - Create a bitwise NOT operation as (XOR Val, -1). +/// +SDValue SelectionDAG::getNOT(DebugLoc DL, SDValue Val, MVT VT) { + MVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT; + SDValue NegOne = + getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT); + return getNode(ISD::XOR, DL, VT, Val, NegOne); +} + +SDValue SelectionDAG::getConstant(uint64_t Val, MVT VT, bool isT) { + MVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT; + assert((EltVT.getSizeInBits() >= 64 || + (uint64_t)((int64_t)Val >> EltVT.getSizeInBits()) + 1 < 2) && + "getConstant with a uint64_t value that doesn't fit in the type!"); + return getConstant(APInt(EltVT.getSizeInBits(), Val), VT, isT); +} + +SDValue SelectionDAG::getConstant(const APInt &Val, MVT VT, bool isT) { + return getConstant(*ConstantInt::get(Val), VT, isT); +} + +SDValue SelectionDAG::getConstant(const ConstantInt &Val, MVT VT, bool isT) { + assert(VT.isInteger() && "Cannot create FP integer constant!"); + + MVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT; + assert(Val.getBitWidth() == EltVT.getSizeInBits() && + "APInt size does not match type size!"); + + unsigned Opc = isT ? ISD::TargetConstant : ISD::Constant; + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opc, getVTList(EltVT), 0, 0); + ID.AddPointer(&Val); + void *IP = 0; + SDNode *N = NULL; + if ((N = CSEMap.FindNodeOrInsertPos(ID, IP))) + if (!VT.isVector()) + return SDValue(N, 0); + if (!N) { + N = NodeAllocator.Allocate(); + new (N) ConstantSDNode(isT, &Val, EltVT); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + } + + SDValue Result(N, 0); + if (VT.isVector()) { + SmallVector Ops; + Ops.assign(VT.getVectorNumElements(), Result); + Result = getNode(ISD::BUILD_VECTOR, DebugLoc::getUnknownLoc(), + VT, &Ops[0], Ops.size()); + } + return Result; +} + +SDValue SelectionDAG::getIntPtrConstant(uint64_t Val, bool isTarget) { + return getConstant(Val, TLI.getPointerTy(), isTarget); +} + + +SDValue SelectionDAG::getConstantFP(const APFloat& V, MVT VT, bool isTarget) { + return getConstantFP(*ConstantFP::get(V), VT, isTarget); +} + +SDValue SelectionDAG::getConstantFP(const ConstantFP& V, MVT VT, bool isTarget){ + assert(VT.isFloatingPoint() && "Cannot create integer FP constant!"); + + MVT EltVT = + VT.isVector() ? VT.getVectorElementType() : VT; + + // Do the map lookup using the actual bit pattern for the floating point + // value, so that we don't have problems with 0.0 comparing equal to -0.0, and + // we don't have issues with SNANs. + unsigned Opc = isTarget ? ISD::TargetConstantFP : ISD::ConstantFP; + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opc, getVTList(EltVT), 0, 0); + ID.AddPointer(&V); + void *IP = 0; + SDNode *N = NULL; + if ((N = CSEMap.FindNodeOrInsertPos(ID, IP))) + if (!VT.isVector()) + return SDValue(N, 0); + if (!N) { + N = NodeAllocator.Allocate(); + new (N) ConstantFPSDNode(isTarget, &V, EltVT); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + } + + SDValue Result(N, 0); + if (VT.isVector()) { + SmallVector Ops; + Ops.assign(VT.getVectorNumElements(), Result); + // FIXME DebugLoc info might be appropriate here + Result = getNode(ISD::BUILD_VECTOR, DebugLoc::getUnknownLoc(), + VT, &Ops[0], Ops.size()); + } + return Result; +} + +SDValue SelectionDAG::getConstantFP(double Val, MVT VT, bool isTarget) { + MVT EltVT = + VT.isVector() ? VT.getVectorElementType() : VT; + if (EltVT==MVT::f32) + return getConstantFP(APFloat((float)Val), VT, isTarget); + else + return getConstantFP(APFloat(Val), VT, isTarget); +} + +SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, + MVT VT, int64_t Offset, + bool isTargetGA) { + unsigned Opc; + + // Truncate (with sign-extension) the offset value to the pointer size. + unsigned BitWidth = TLI.getPointerTy().getSizeInBits(); + if (BitWidth < 64) + Offset = (Offset << (64 - BitWidth) >> (64 - BitWidth)); + + const GlobalVariable *GVar = dyn_cast(GV); + if (!GVar) { + // If GV is an alias then use the aliasee for determining thread-localness. + if (const GlobalAlias *GA = dyn_cast(GV)) + GVar = dyn_cast_or_null(GA->resolveAliasedGlobal(false)); + } + + if (GVar && GVar->isThreadLocal()) + Opc = isTargetGA ? ISD::TargetGlobalTLSAddress : ISD::GlobalTLSAddress; + else + Opc = isTargetGA ? ISD::TargetGlobalAddress : ISD::GlobalAddress; + + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0); + ID.AddPointer(GV); + ID.AddInteger(Offset); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + SDNode *N = NodeAllocator.Allocate(); + new (N) GlobalAddressSDNode(isTargetGA, GV, VT, Offset); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getFrameIndex(int FI, MVT VT, bool isTarget) { + unsigned Opc = isTarget ? ISD::TargetFrameIndex : ISD::FrameIndex; + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0); + ID.AddInteger(FI); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + SDNode *N = NodeAllocator.Allocate(); + new (N) FrameIndexSDNode(FI, VT, isTarget); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getJumpTable(int JTI, MVT VT, bool isTarget){ + unsigned Opc = isTarget ? ISD::TargetJumpTable : ISD::JumpTable; + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0); + ID.AddInteger(JTI); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + SDNode *N = NodeAllocator.Allocate(); + new (N) JumpTableSDNode(JTI, VT, isTarget); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getConstantPool(Constant *C, MVT VT, + unsigned Alignment, int Offset, + bool isTarget) { + if (Alignment == 0) + Alignment = TLI.getTargetData()->getPrefTypeAlignment(C->getType()); + unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool; + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0); + ID.AddInteger(Alignment); + ID.AddInteger(Offset); + ID.AddPointer(C); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + SDNode *N = NodeAllocator.Allocate(); + new (N) ConstantPoolSDNode(isTarget, C, VT, Offset, Alignment); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + + +SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, MVT VT, + unsigned Alignment, int Offset, + bool isTarget) { + if (Alignment == 0) + Alignment = TLI.getTargetData()->getPrefTypeAlignment(C->getType()); + unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool; + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0); + ID.AddInteger(Alignment); + ID.AddInteger(Offset); + C->AddSelectionDAGCSEId(ID); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + SDNode *N = NodeAllocator.Allocate(); + new (N) ConstantPoolSDNode(isTarget, C, VT, Offset, Alignment); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getBasicBlock(MachineBasicBlock *MBB) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::BasicBlock, getVTList(MVT::Other), 0, 0); + ID.AddPointer(MBB); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + SDNode *N = NodeAllocator.Allocate(); + new (N) BasicBlockSDNode(MBB); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getArgFlags(ISD::ArgFlagsTy Flags) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::ARG_FLAGS, getVTList(MVT::Other), 0, 0); + ID.AddInteger(Flags.getRawBits()); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + SDNode *N = NodeAllocator.Allocate(); + new (N) ARG_FLAGSSDNode(Flags); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getValueType(MVT VT) { + if (VT.isSimple() && (unsigned)VT.getSimpleVT() >= ValueTypeNodes.size()) + ValueTypeNodes.resize(VT.getSimpleVT()+1); + + SDNode *&N = VT.isExtended() ? + ExtendedValueTypeNodes[VT] : ValueTypeNodes[VT.getSimpleVT()]; + + if (N) return SDValue(N, 0); + N = NodeAllocator.Allocate(); + new (N) VTSDNode(VT); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getExternalSymbol(const char *Sym, MVT VT) { + SDNode *&N = ExternalSymbols[Sym]; + if (N) return SDValue(N, 0); + N = NodeAllocator.Allocate(); + new (N) ExternalSymbolSDNode(false, Sym, VT); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getTargetExternalSymbol(const char *Sym, MVT VT) { + SDNode *&N = TargetExternalSymbols[Sym]; + if (N) return SDValue(N, 0); + N = NodeAllocator.Allocate(); + new (N) ExternalSymbolSDNode(true, Sym, VT); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getCondCode(ISD::CondCode Cond) { + if ((unsigned)Cond >= CondCodeNodes.size()) + CondCodeNodes.resize(Cond+1); + + if (CondCodeNodes[Cond] == 0) { + CondCodeSDNode *N = NodeAllocator.Allocate(); + new (N) CondCodeSDNode(Cond); + CondCodeNodes[Cond] = N; + AllNodes.push_back(N); + } + return SDValue(CondCodeNodes[Cond], 0); +} + +// commuteShuffle - swaps the values of N1 and N2, and swaps all indices in +// the shuffle mask M that point at N1 to point at N2, and indices that point +// N2 to point at N1. +static void commuteShuffle(SDValue &N1, SDValue &N2, SmallVectorImpl &M) { + std::swap(N1, N2); + int NElts = M.size(); + for (int i = 0; i != NElts; ++i) { + if (M[i] >= NElts) + M[i] -= NElts; + else if (M[i] >= 0) + M[i] += NElts; + } +} + +SDValue SelectionDAG::getVectorShuffle(MVT VT, DebugLoc dl, SDValue N1, + SDValue N2, const int *Mask) { + assert(N1.getValueType() == N2.getValueType() && "Invalid VECTOR_SHUFFLE"); + assert(VT.isVector() && N1.getValueType().isVector() && + "Vector Shuffle VTs must be a vectors"); + assert(VT.getVectorElementType() == N1.getValueType().getVectorElementType() + && "Vector Shuffle VTs must have same element type"); + + // Canonicalize shuffle undef, undef -> undef + if (N1.getOpcode() == ISD::UNDEF && N2.getOpcode() == ISD::UNDEF) + return N1; + + // Validate that all indices in Mask are within the range of the elements + // input to the shuffle. + unsigned NElts = VT.getVectorNumElements(); + SmallVector MaskVec; + for (unsigned i = 0; i != NElts; ++i) { + assert(Mask[i] < (int)(NElts * 2) && "Index out of range"); + MaskVec.push_back(Mask[i]); + } + + // Canonicalize shuffle v, v -> v, undef + if (N1 == N2) { + N2 = getUNDEF(VT); + for (unsigned i = 0; i != NElts; ++i) + if (MaskVec[i] >= (int)NElts) MaskVec[i] -= NElts; + } + + // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask. + if (N1.getOpcode() == ISD::UNDEF) + commuteShuffle(N1, N2, MaskVec); + + // Canonicalize all index into lhs, -> shuffle lhs, undef + // Canonicalize all index into rhs, -> shuffle rhs, undef + bool AllLHS = true, AllRHS = true; + bool N2Undef = N2.getOpcode() == ISD::UNDEF; + for (unsigned i = 0; i != NElts; ++i) { + if (MaskVec[i] >= (int)NElts) { + if (N2Undef) + MaskVec[i] = -1; + else + AllLHS = false; + } else if (MaskVec[i] >= 0) { + AllRHS = false; + } + } + if (AllLHS && AllRHS) + return getUNDEF(VT); + if (AllLHS && !N2Undef) + N2 = getUNDEF(VT); + if (AllRHS) { + N1 = getUNDEF(VT); + commuteShuffle(N1, N2, MaskVec); + } + + // If Identity shuffle, or all shuffle in to undef, return that node. + bool AllUndef = true; + bool Identity = true; + for (unsigned i = 0; i != NElts; ++i) { + if (MaskVec[i] >= 0 && MaskVec[i] != (int)i) Identity = false; + if (MaskVec[i] >= 0) AllUndef = false; + } + if (Identity) + return N1; + if (AllUndef) + return getUNDEF(VT); + + FoldingSetNodeID ID; + SDValue Ops[2] = { N1, N2 }; + AddNodeIDNode(ID, ISD::VECTOR_SHUFFLE, getVTList(VT), Ops, 2); + for (unsigned i = 0; i != NElts; ++i) + ID.AddInteger(MaskVec[i]); + + void* IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + // Allocate the mask array for the node out of the BumpPtrAllocator, since + // SDNode doesn't have access to it. This memory will be "leaked" when + // the node is deallocated, but recovered when the NodeAllocator is released. + int *MaskAlloc = OperandAllocator.Allocate(NElts); + memcpy(MaskAlloc, &MaskVec[0], NElts * sizeof(int)); + + ShuffleVectorSDNode *N = NodeAllocator.Allocate(); + new (N) ShuffleVectorSDNode(VT, dl, N1, N2, MaskAlloc); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getConvertRndSat(MVT VT, DebugLoc dl, + SDValue Val, SDValue DTy, + SDValue STy, SDValue Rnd, SDValue Sat, + ISD::CvtCode Code) { + // If the src and dest types are the same and the conversion is between + // integer types of the same sign or two floats, no conversion is necessary. + if (DTy == STy && + (Code == ISD::CVT_UU || Code == ISD::CVT_SS || Code == ISD::CVT_FF)) + return Val; + + FoldingSetNodeID ID; + void* IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + CvtRndSatSDNode *N = NodeAllocator.Allocate(); + SDValue Ops[] = { Val, DTy, STy, Rnd, Sat }; + new (N) CvtRndSatSDNode(VT, dl, Ops, 5, Code); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getRegister(unsigned RegNo, MVT VT) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::Register, getVTList(VT), 0, 0); + ID.AddInteger(RegNo); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + SDNode *N = NodeAllocator.Allocate(); + new (N) RegisterSDNode(RegNo, VT); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getDbgStopPoint(DebugLoc DL, SDValue Root, + unsigned Line, unsigned Col, + Value *CU) { + SDNode *N = NodeAllocator.Allocate(); + new (N) DbgStopPointSDNode(Root, Line, Col, CU); + N->setDebugLoc(DL); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getLabel(unsigned Opcode, DebugLoc dl, + SDValue Root, + unsigned LabelID) { + FoldingSetNodeID ID; + SDValue Ops[] = { Root }; + AddNodeIDNode(ID, Opcode, getVTList(MVT::Other), &Ops[0], 1); + ID.AddInteger(LabelID); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + SDNode *N = NodeAllocator.Allocate(); + new (N) LabelSDNode(Opcode, dl, Root, LabelID); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getSrcValue(const Value *V) { + assert((!V || isa(V->getType())) && + "SrcValue is not a pointer?"); + + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::SRCVALUE, getVTList(MVT::Other), 0, 0); + ID.AddPointer(V); + + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + SDNode *N = NodeAllocator.Allocate(); + new (N) SrcValueSDNode(V); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getMemOperand(const MachineMemOperand &MO) { +#ifndef NDEBUG + const Value *v = MO.getValue(); + assert((!v || isa(v->getType())) && + "SrcValue is not a pointer?"); +#endif + + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::MEMOPERAND, getVTList(MVT::Other), 0, 0); + MO.Profile(ID); + + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + SDNode *N = NodeAllocator.Allocate(); + new (N) MemOperandSDNode(MO); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +/// getShiftAmountOperand - Return the specified value casted to +/// the target's desired shift amount type. +SDValue SelectionDAG::getShiftAmountOperand(SDValue Op) { + MVT OpTy = Op.getValueType(); + MVT ShTy = TLI.getShiftAmountTy(); + if (OpTy == ShTy || OpTy.isVector()) return Op; + + ISD::NodeType Opcode = OpTy.bitsGT(ShTy) ? ISD::TRUNCATE : ISD::ZERO_EXTEND; + return getNode(Opcode, Op.getDebugLoc(), ShTy, Op); +} + +/// CreateStackTemporary - Create a stack temporary, suitable for holding the +/// specified value type. +SDValue SelectionDAG::CreateStackTemporary(MVT VT, unsigned minAlign) { + MachineFrameInfo *FrameInfo = getMachineFunction().getFrameInfo(); + unsigned ByteSize = VT.getStoreSizeInBits()/8; + const Type *Ty = VT.getTypeForMVT(); + unsigned StackAlign = + std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty), minAlign); + + int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign); + return getFrameIndex(FrameIdx, TLI.getPointerTy()); +} + +/// CreateStackTemporary - Create a stack temporary suitable for holding +/// either of the specified value types. +SDValue SelectionDAG::CreateStackTemporary(MVT VT1, MVT VT2) { + unsigned Bytes = std::max(VT1.getStoreSizeInBits(), + VT2.getStoreSizeInBits())/8; + const Type *Ty1 = VT1.getTypeForMVT(); + const Type *Ty2 = VT2.getTypeForMVT(); + const TargetData *TD = TLI.getTargetData(); + unsigned Align = std::max(TD->getPrefTypeAlignment(Ty1), + TD->getPrefTypeAlignment(Ty2)); + + MachineFrameInfo *FrameInfo = getMachineFunction().getFrameInfo(); + int FrameIdx = FrameInfo->CreateStackObject(Bytes, Align); + return getFrameIndex(FrameIdx, TLI.getPointerTy()); +} + +SDValue SelectionDAG::FoldSetCC(MVT VT, SDValue N1, + SDValue N2, ISD::CondCode Cond, DebugLoc dl) { + // These setcc operations always fold. + switch (Cond) { + default: break; + case ISD::SETFALSE: + case ISD::SETFALSE2: return getConstant(0, VT); + case ISD::SETTRUE: + case ISD::SETTRUE2: return getConstant(1, VT); + + case ISD::SETOEQ: + case ISD::SETOGT: + case ISD::SETOGE: + case ISD::SETOLT: + case ISD::SETOLE: + case ISD::SETONE: + case ISD::SETO: + case ISD::SETUO: + case ISD::SETUEQ: + case ISD::SETUNE: + assert(!N1.getValueType().isInteger() && "Illegal setcc for integer!"); + break; + } + + if (ConstantSDNode *N2C = dyn_cast(N2.getNode())) { + const APInt &C2 = N2C->getAPIntValue(); + if (ConstantSDNode *N1C = dyn_cast(N1.getNode())) { + const APInt &C1 = N1C->getAPIntValue(); + + switch (Cond) { + default: assert(0 && "Unknown integer setcc!"); + case ISD::SETEQ: return getConstant(C1 == C2, VT); + case ISD::SETNE: return getConstant(C1 != C2, VT); + case ISD::SETULT: return getConstant(C1.ult(C2), VT); + case ISD::SETUGT: return getConstant(C1.ugt(C2), VT); + case ISD::SETULE: return getConstant(C1.ule(C2), VT); + case ISD::SETUGE: return getConstant(C1.uge(C2), VT); + case ISD::SETLT: return getConstant(C1.slt(C2), VT); + case ISD::SETGT: return getConstant(C1.sgt(C2), VT); + case ISD::SETLE: return getConstant(C1.sle(C2), VT); + case ISD::SETGE: return getConstant(C1.sge(C2), VT); + } + } + } + if (ConstantFPSDNode *N1C = dyn_cast(N1.getNode())) { + if (ConstantFPSDNode *N2C = dyn_cast(N2.getNode())) { + // No compile time operations on this type yet. + if (N1C->getValueType(0) == MVT::ppcf128) + return SDValue(); + + APFloat::cmpResult R = N1C->getValueAPF().compare(N2C->getValueAPF()); + switch (Cond) { + default: break; + case ISD::SETEQ: if (R==APFloat::cmpUnordered) + return getUNDEF(VT); + // fall through + case ISD::SETOEQ: return getConstant(R==APFloat::cmpEqual, VT); + case ISD::SETNE: if (R==APFloat::cmpUnordered) + return getUNDEF(VT); + // fall through + case ISD::SETONE: return getConstant(R==APFloat::cmpGreaterThan || + R==APFloat::cmpLessThan, VT); + case ISD::SETLT: if (R==APFloat::cmpUnordered) + return getUNDEF(VT); + // fall through + case ISD::SETOLT: return getConstant(R==APFloat::cmpLessThan, VT); + case ISD::SETGT: if (R==APFloat::cmpUnordered) + return getUNDEF(VT); + // fall through + case ISD::SETOGT: return getConstant(R==APFloat::cmpGreaterThan, VT); + case ISD::SETLE: if (R==APFloat::cmpUnordered) + return getUNDEF(VT); + // fall through + case ISD::SETOLE: return getConstant(R==APFloat::cmpLessThan || + R==APFloat::cmpEqual, VT); + case ISD::SETGE: if (R==APFloat::cmpUnordered) + return getUNDEF(VT); + // fall through + case ISD::SETOGE: return getConstant(R==APFloat::cmpGreaterThan || + R==APFloat::cmpEqual, VT); + case ISD::SETO: return getConstant(R!=APFloat::cmpUnordered, VT); + case ISD::SETUO: return getConstant(R==APFloat::cmpUnordered, VT); + case ISD::SETUEQ: return getConstant(R==APFloat::cmpUnordered || + R==APFloat::cmpEqual, VT); + case ISD::SETUNE: return getConstant(R!=APFloat::cmpEqual, VT); + case ISD::SETULT: return getConstant(R==APFloat::cmpUnordered || + R==APFloat::cmpLessThan, VT); + case ISD::SETUGT: return getConstant(R==APFloat::cmpGreaterThan || + R==APFloat::cmpUnordered, VT); + case ISD::SETULE: return getConstant(R!=APFloat::cmpGreaterThan, VT); + case ISD::SETUGE: return getConstant(R!=APFloat::cmpLessThan, VT); + } + } else { + // Ensure that the constant occurs on the RHS. + return getSetCC(dl, VT, N2, N1, ISD::getSetCCSwappedOperands(Cond)); + } + } + + // Could not fold it. + return SDValue(); +} + +/// SignBitIsZero - Return true if the sign bit of Op is known to be zero. We +/// use this predicate to simplify operations downstream. +bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const { + unsigned BitWidth = Op.getValueSizeInBits(); + return MaskedValueIsZero(Op, APInt::getSignBit(BitWidth), Depth); +} + +/// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero. We use +/// this predicate to simplify operations downstream. Mask is known to be zero +/// for bits that V cannot have. +bool SelectionDAG::MaskedValueIsZero(SDValue Op, const APInt &Mask, + unsigned Depth) const { + APInt KnownZero, KnownOne; + ComputeMaskedBits(Op, Mask, KnownZero, KnownOne, Depth); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + return (KnownZero & Mask) == Mask; +} + +/// ComputeMaskedBits - Determine which of the bits specified in Mask are +/// known to be either zero or one and return them in the KnownZero/KnownOne +/// bitsets. This code only analyzes bits in Mask, in order to short-circuit +/// processing. +void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, + APInt &KnownZero, APInt &KnownOne, + unsigned Depth) const { + unsigned BitWidth = Mask.getBitWidth(); + assert(BitWidth == Op.getValueType().getSizeInBits() && + "Mask size mismatches value type size!"); + + KnownZero = KnownOne = APInt(BitWidth, 0); // Don't know anything. + if (Depth == 6 || Mask == 0) + return; // Limit search depth. + + APInt KnownZero2, KnownOne2; + + switch (Op.getOpcode()) { + case ISD::Constant: + // We know all of the bits for a constant! + KnownOne = cast(Op)->getAPIntValue() & Mask; + KnownZero = ~KnownOne & Mask; + return; + case ISD::AND: + // If either the LHS or the RHS are Zero, the result is zero. + ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), Mask & ~KnownZero, + KnownZero2, KnownOne2, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // Output known-1 bits are only known if set in both the LHS & RHS. + KnownOne &= KnownOne2; + // Output known-0 are known to be clear if zero in either the LHS | RHS. + KnownZero |= KnownZero2; + return; + case ISD::OR: + ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), Mask & ~KnownOne, + KnownZero2, KnownOne2, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // Output known-0 bits are only known if clear in both the LHS & RHS. + KnownZero &= KnownZero2; + // Output known-1 are known to be set if set in either the LHS | RHS. + KnownOne |= KnownOne2; + return; + case ISD::XOR: { + ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // Output known-0 bits are known if clear or set in both the LHS & RHS. + APInt KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2); + // Output known-1 are known to be set if set in only one of the LHS, RHS. + KnownOne = (KnownZero & KnownOne2) | (KnownOne & KnownZero2); + KnownZero = KnownZeroOut; + return; + } + case ISD::MUL: { + APInt Mask2 = APInt::getAllOnesValue(BitWidth); + ComputeMaskedBits(Op.getOperand(1), Mask2, KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), Mask2, KnownZero2, KnownOne2, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // If low bits are zero in either operand, output low known-0 bits. + // Also compute a conserative estimate for high known-0 bits. + // More trickiness is possible, but this is sufficient for the + // interesting case of alignment computation. + KnownOne.clear(); + unsigned TrailZ = KnownZero.countTrailingOnes() + + KnownZero2.countTrailingOnes(); + unsigned LeadZ = std::max(KnownZero.countLeadingOnes() + + KnownZero2.countLeadingOnes(), + BitWidth) - BitWidth; + + TrailZ = std::min(TrailZ, BitWidth); + LeadZ = std::min(LeadZ, BitWidth); + KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) | + APInt::getHighBitsSet(BitWidth, LeadZ); + KnownZero &= Mask; + return; + } + case ISD::UDIV: { + // For the purposes of computing leading zeros we can conservatively + // treat a udiv as a logical right shift by the power of 2 known to + // be less than the denominator. + APInt AllOnes = APInt::getAllOnesValue(BitWidth); + ComputeMaskedBits(Op.getOperand(0), + AllOnes, KnownZero2, KnownOne2, Depth+1); + unsigned LeadZ = KnownZero2.countLeadingOnes(); + + KnownOne2.clear(); + KnownZero2.clear(); + ComputeMaskedBits(Op.getOperand(1), + AllOnes, KnownZero2, KnownOne2, Depth+1); + unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros(); + if (RHSUnknownLeadingOnes != BitWidth) + LeadZ = std::min(BitWidth, + LeadZ + BitWidth - RHSUnknownLeadingOnes - 1); + + KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ) & Mask; + return; + } + case ISD::SELECT: + ComputeMaskedBits(Op.getOperand(2), Mask, KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero2, KnownOne2, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // Only known if known in both the LHS and RHS. + KnownOne &= KnownOne2; + KnownZero &= KnownZero2; + return; + case ISD::SELECT_CC: + ComputeMaskedBits(Op.getOperand(3), Mask, KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(2), Mask, KnownZero2, KnownOne2, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // Only known if known in both the LHS and RHS. + KnownOne &= KnownOne2; + KnownZero &= KnownZero2; + return; + case ISD::SADDO: + case ISD::UADDO: + case ISD::SSUBO: + case ISD::USUBO: + case ISD::SMULO: + case ISD::UMULO: + if (Op.getResNo() != 1) + return; + // The boolean result conforms to getBooleanContents. Fall through. + case ISD::SETCC: + // If we know the result of a setcc has the top bits zero, use this info. + if (TLI.getBooleanContents() == TargetLowering::ZeroOrOneBooleanContent && + BitWidth > 1) + KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1); + return; + case ISD::SHL: + // (shl X, C1) & C2 == 0 iff (X & C2 >>u C1) == 0 + if (ConstantSDNode *SA = dyn_cast(Op.getOperand(1))) { + unsigned ShAmt = SA->getZExtValue(); + + // If the shift count is an invalid immediate, don't do anything. + if (ShAmt >= BitWidth) + return; + + ComputeMaskedBits(Op.getOperand(0), Mask.lshr(ShAmt), + KnownZero, KnownOne, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero <<= ShAmt; + KnownOne <<= ShAmt; + // low bits known zero. + KnownZero |= APInt::getLowBitsSet(BitWidth, ShAmt); + } + return; + case ISD::SRL: + // (ushr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0 + if (ConstantSDNode *SA = dyn_cast(Op.getOperand(1))) { + unsigned ShAmt = SA->getZExtValue(); + + // If the shift count is an invalid immediate, don't do anything. + if (ShAmt >= BitWidth) + return; + + ComputeMaskedBits(Op.getOperand(0), (Mask << ShAmt), + KnownZero, KnownOne, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero = KnownZero.lshr(ShAmt); + KnownOne = KnownOne.lshr(ShAmt); + + APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt) & Mask; + KnownZero |= HighBits; // High bits known zero. + } + return; + case ISD::SRA: + if (ConstantSDNode *SA = dyn_cast(Op.getOperand(1))) { + unsigned ShAmt = SA->getZExtValue(); + + // If the shift count is an invalid immediate, don't do anything. + if (ShAmt >= BitWidth) + return; + + APInt InDemandedMask = (Mask << ShAmt); + // If any of the demanded bits are produced by the sign extension, we also + // demand the input sign bit. + APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt) & Mask; + if (HighBits.getBoolValue()) + InDemandedMask |= APInt::getSignBit(BitWidth); + + ComputeMaskedBits(Op.getOperand(0), InDemandedMask, KnownZero, KnownOne, + Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero = KnownZero.lshr(ShAmt); + KnownOne = KnownOne.lshr(ShAmt); + + // Handle the sign bits. + APInt SignBit = APInt::getSignBit(BitWidth); + SignBit = SignBit.lshr(ShAmt); // Adjust to where it is now in the mask. + + if (KnownZero.intersects(SignBit)) { + KnownZero |= HighBits; // New bits are known zero. + } else if (KnownOne.intersects(SignBit)) { + KnownOne |= HighBits; // New bits are known one. + } + } + return; + case ISD::SIGN_EXTEND_INREG: { + MVT EVT = cast(Op.getOperand(1))->getVT(); + unsigned EBits = EVT.getSizeInBits(); + + // Sign extension. Compute the demanded bits in the result that are not + // present in the input. + APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - EBits) & Mask; + + APInt InSignBit = APInt::getSignBit(EBits); + APInt InputDemandedBits = Mask & APInt::getLowBitsSet(BitWidth, EBits); + + // If the sign extended bits are demanded, we know that the sign + // bit is demanded. + InSignBit.zext(BitWidth); + if (NewBits.getBoolValue()) + InputDemandedBits |= InSignBit; + + ComputeMaskedBits(Op.getOperand(0), InputDemandedBits, + KnownZero, KnownOne, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + + // If the sign bit of the input is known set or clear, then we know the + // top bits of the result. + if (KnownZero.intersects(InSignBit)) { // Input sign bit known clear + KnownZero |= NewBits; + KnownOne &= ~NewBits; + } else if (KnownOne.intersects(InSignBit)) { // Input sign bit known set + KnownOne |= NewBits; + KnownZero &= ~NewBits; + } else { // Input sign bit unknown + KnownZero &= ~NewBits; + KnownOne &= ~NewBits; + } + return; + } + case ISD::CTTZ: + case ISD::CTLZ: + case ISD::CTPOP: { + unsigned LowBits = Log2_32(BitWidth)+1; + KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits); + KnownOne.clear(); + return; + } + case ISD::LOAD: { + if (ISD::isZEXTLoad(Op.getNode())) { + LoadSDNode *LD = cast(Op); + MVT VT = LD->getMemoryVT(); + unsigned MemBits = VT.getSizeInBits(); + KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits) & Mask; + } + return; + } + case ISD::ZERO_EXTEND: { + MVT InVT = Op.getOperand(0).getValueType(); + unsigned InBits = InVT.getSizeInBits(); + APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits) & Mask; + APInt InMask = Mask; + InMask.trunc(InBits); + KnownZero.trunc(InBits); + KnownOne.trunc(InBits); + ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1); + KnownZero.zext(BitWidth); + KnownOne.zext(BitWidth); + KnownZero |= NewBits; + return; + } + case ISD::SIGN_EXTEND: { + MVT InVT = Op.getOperand(0).getValueType(); + unsigned InBits = InVT.getSizeInBits(); + APInt InSignBit = APInt::getSignBit(InBits); + APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits) & Mask; + APInt InMask = Mask; + InMask.trunc(InBits); + + // If any of the sign extended bits are demanded, we know that the sign + // bit is demanded. Temporarily set this bit in the mask for our callee. + if (NewBits.getBoolValue()) + InMask |= InSignBit; + + KnownZero.trunc(InBits); + KnownOne.trunc(InBits); + ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1); + + // Note if the sign bit is known to be zero or one. + bool SignBitKnownZero = KnownZero.isNegative(); + bool SignBitKnownOne = KnownOne.isNegative(); + assert(!(SignBitKnownZero && SignBitKnownOne) && + "Sign bit can't be known to be both zero and one!"); + + // If the sign bit wasn't actually demanded by our caller, we don't + // want it set in the KnownZero and KnownOne result values. Reset the + // mask and reapply it to the result values. + InMask = Mask; + InMask.trunc(InBits); + KnownZero &= InMask; + KnownOne &= InMask; + + KnownZero.zext(BitWidth); + KnownOne.zext(BitWidth); + + // If the sign bit is known zero or one, the top bits match. + if (SignBitKnownZero) + KnownZero |= NewBits; + else if (SignBitKnownOne) + KnownOne |= NewBits; + return; + } + case ISD::ANY_EXTEND: { + MVT InVT = Op.getOperand(0).getValueType(); + unsigned InBits = InVT.getSizeInBits(); + APInt InMask = Mask; + InMask.trunc(InBits); + KnownZero.trunc(InBits); + KnownOne.trunc(InBits); + ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1); + KnownZero.zext(BitWidth); + KnownOne.zext(BitWidth); + return; + } + case ISD::TRUNCATE: { + MVT InVT = Op.getOperand(0).getValueType(); + unsigned InBits = InVT.getSizeInBits(); + APInt InMask = Mask; + InMask.zext(InBits); + KnownZero.zext(InBits); + KnownOne.zext(InBits); + ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero.trunc(BitWidth); + KnownOne.trunc(BitWidth); + break; + } + case ISD::AssertZext: { + MVT VT = cast(Op.getOperand(1))->getVT(); + APInt InMask = APInt::getLowBitsSet(BitWidth, VT.getSizeInBits()); + ComputeMaskedBits(Op.getOperand(0), Mask & InMask, KnownZero, + KnownOne, Depth+1); + KnownZero |= (~InMask) & Mask; + return; + } + case ISD::FGETSIGN: + // All bits are zero except the low bit. + KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - 1); + return; + + case ISD::SUB: { + if (ConstantSDNode *CLHS = dyn_cast(Op.getOperand(0))) { + // We know that the top bits of C-X are clear if X contains less bits + // than C (i.e. no wrap-around can happen). For example, 20-X is + // positive if we can prove that X is >= 0 and < 16. + if (CLHS->getAPIntValue().isNonNegative()) { + unsigned NLZ = (CLHS->getAPIntValue()+1).countLeadingZeros(); + // NLZ can't be BitWidth with no sign bit + APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1); + ComputeMaskedBits(Op.getOperand(1), MaskV, KnownZero2, KnownOne2, + Depth+1); + + // If all of the MaskV bits are known to be zero, then we know the + // output top bits are zero, because we now know that the output is + // from [0-C]. + if ((KnownZero2 & MaskV) == MaskV) { + unsigned NLZ2 = CLHS->getAPIntValue().countLeadingZeros(); + // Top bits known zero. + KnownZero = APInt::getHighBitsSet(BitWidth, NLZ2) & Mask; + } + } + } + } + // fall through + case ISD::ADD: { + // Output known-0 bits are known if clear or set in both the low clear bits + // common to both LHS & RHS. For example, 8+(X<<3) is known to have the + // low 3 bits clear. + APInt Mask2 = APInt::getLowBitsSet(BitWidth, Mask.countTrailingOnes()); + ComputeMaskedBits(Op.getOperand(0), Mask2, KnownZero2, KnownOne2, Depth+1); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + unsigned KnownZeroOut = KnownZero2.countTrailingOnes(); + + ComputeMaskedBits(Op.getOperand(1), Mask2, KnownZero2, KnownOne2, Depth+1); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + KnownZeroOut = std::min(KnownZeroOut, + KnownZero2.countTrailingOnes()); + + KnownZero |= APInt::getLowBitsSet(BitWidth, KnownZeroOut); + return; + } + case ISD::SREM: + if (ConstantSDNode *Rem = dyn_cast(Op.getOperand(1))) { + const APInt &RA = Rem->getAPIntValue(); + if (RA.isPowerOf2() || (-RA).isPowerOf2()) { + APInt LowBits = RA.isStrictlyPositive() ? (RA - 1) : ~RA; + APInt Mask2 = LowBits | APInt::getSignBit(BitWidth); + ComputeMaskedBits(Op.getOperand(0), Mask2,KnownZero2,KnownOne2,Depth+1); + + // If the sign bit of the first operand is zero, the sign bit of + // the result is zero. If the first operand has no one bits below + // the second operand's single 1 bit, its sign will be zero. + if (KnownZero2[BitWidth-1] || ((KnownZero2 & LowBits) == LowBits)) + KnownZero2 |= ~LowBits; + + KnownZero |= KnownZero2 & Mask; + + assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?"); + } + } + return; + case ISD::UREM: { + if (ConstantSDNode *Rem = dyn_cast(Op.getOperand(1))) { + const APInt &RA = Rem->getAPIntValue(); + if (RA.isPowerOf2()) { + APInt LowBits = (RA - 1); + APInt Mask2 = LowBits & Mask; + KnownZero |= ~LowBits & Mask; + ComputeMaskedBits(Op.getOperand(0), Mask2, KnownZero, KnownOne,Depth+1); + assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?"); + break; + } + } + + // Since the result is less than or equal to either operand, any leading + // zero bits in either operand must also exist in the result. + APInt AllOnes = APInt::getAllOnesValue(BitWidth); + ComputeMaskedBits(Op.getOperand(0), AllOnes, KnownZero, KnownOne, + Depth+1); + ComputeMaskedBits(Op.getOperand(1), AllOnes, KnownZero2, KnownOne2, + Depth+1); + + uint32_t Leaders = std::max(KnownZero.countLeadingOnes(), + KnownZero2.countLeadingOnes()); + KnownOne.clear(); + KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & Mask; + return; + } + default: + // Allow the target to implement this method for its nodes. + if (Op.getOpcode() >= ISD::BUILTIN_OP_END) { + case ISD::INTRINSIC_WO_CHAIN: + case ISD::INTRINSIC_W_CHAIN: + case ISD::INTRINSIC_VOID: + TLI.computeMaskedBitsForTargetNode(Op, Mask, KnownZero, KnownOne, *this); + } + return; + } +} + +/// ComputeNumSignBits - Return the number of times the sign bit of the +/// register is replicated into the other bits. We know that at least 1 bit +/// is always equal to the sign bit (itself), but other cases can give us +/// information. For example, immediately after an "SRA X, 2", we know that +/// the top 3 bits are all equal to each other, so we return 3. +unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ + MVT VT = Op.getValueType(); + assert(VT.isInteger() && "Invalid VT!"); + unsigned VTBits = VT.getSizeInBits(); + unsigned Tmp, Tmp2; + unsigned FirstAnswer = 1; + + if (Depth == 6) + return 1; // Limit search depth. + + switch (Op.getOpcode()) { + default: break; + case ISD::AssertSext: + Tmp = cast(Op.getOperand(1))->getVT().getSizeInBits(); + return VTBits-Tmp+1; + case ISD::AssertZext: + Tmp = cast(Op.getOperand(1))->getVT().getSizeInBits(); + return VTBits-Tmp; + + case ISD::Constant: { + const APInt &Val = cast(Op)->getAPIntValue(); + // If negative, return # leading ones. + if (Val.isNegative()) + return Val.countLeadingOnes(); + + // Return # leading zeros. + return Val.countLeadingZeros(); + } + + case ISD::SIGN_EXTEND: + Tmp = VTBits-Op.getOperand(0).getValueType().getSizeInBits(); + return ComputeNumSignBits(Op.getOperand(0), Depth+1) + Tmp; + + case ISD::SIGN_EXTEND_INREG: + // Max of the input and what this extends. + Tmp = cast(Op.getOperand(1))->getVT().getSizeInBits(); + Tmp = VTBits-Tmp+1; + + Tmp2 = ComputeNumSignBits(Op.getOperand(0), Depth+1); + return std::max(Tmp, Tmp2); + + case ISD::SRA: + Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); + // SRA X, C -> adds C sign bits. + if (ConstantSDNode *C = dyn_cast(Op.getOperand(1))) { + Tmp += C->getZExtValue(); + if (Tmp > VTBits) Tmp = VTBits; + } + return Tmp; + case ISD::SHL: + if (ConstantSDNode *C = dyn_cast(Op.getOperand(1))) { + // shl destroys sign bits. + Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); + if (C->getZExtValue() >= VTBits || // Bad shift. + C->getZExtValue() >= Tmp) break; // Shifted all sign bits out. + return Tmp - C->getZExtValue(); + } + break; + case ISD::AND: + case ISD::OR: + case ISD::XOR: // NOT is handled here. + // Logical binary ops preserve the number of sign bits at the worst. + Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); + if (Tmp != 1) { + Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1); + FirstAnswer = std::min(Tmp, Tmp2); + // We computed what we know about the sign bits as our first + // answer. Now proceed to the generic code that uses + // ComputeMaskedBits, and pick whichever answer is better. + } + break; + + case ISD::SELECT: + Tmp = ComputeNumSignBits(Op.getOperand(1), Depth+1); + if (Tmp == 1) return 1; // Early out. + Tmp2 = ComputeNumSignBits(Op.getOperand(2), Depth+1); + return std::min(Tmp, Tmp2); + + case ISD::SADDO: + case ISD::UADDO: + case ISD::SSUBO: + case ISD::USUBO: + case ISD::SMULO: + case ISD::UMULO: + if (Op.getResNo() != 1) + break; + // The boolean result conforms to getBooleanContents. Fall through. + case ISD::SETCC: + // If setcc returns 0/-1, all bits are sign bits. + if (TLI.getBooleanContents() == + TargetLowering::ZeroOrNegativeOneBooleanContent) + return VTBits; + break; + case ISD::ROTL: + case ISD::ROTR: + if (ConstantSDNode *C = dyn_cast(Op.getOperand(1))) { + unsigned RotAmt = C->getZExtValue() & (VTBits-1); + + // Handle rotate right by N like a rotate left by 32-N. + if (Op.getOpcode() == ISD::ROTR) + RotAmt = (VTBits-RotAmt) & (VTBits-1); + + // If we aren't rotating out all of the known-in sign bits, return the + // number that are left. This handles rotl(sext(x), 1) for example. + Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); + if (Tmp > RotAmt+1) return Tmp-RotAmt; + } + break; + case ISD::ADD: + // Add can have at most one carry bit. Thus we know that the output + // is, at worst, one more bit than the inputs. + Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); + if (Tmp == 1) return 1; // Early out. + + // Special case decrementing a value (ADD X, -1): + if (ConstantSDNode *CRHS = dyn_cast(Op.getOperand(1))) + if (CRHS->isAllOnesValue()) { + APInt KnownZero, KnownOne; + APInt Mask = APInt::getAllOnesValue(VTBits); + ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1); + + // If the input is known to be 0 or 1, the output is 0/-1, which is all + // sign bits set. + if ((KnownZero | APInt(VTBits, 1)) == Mask) + return VTBits; + + // If we are subtracting one from a positive number, there is no carry + // out of the result. + if (KnownZero.isNegative()) + return Tmp; + } + + Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1); + if (Tmp2 == 1) return 1; + return std::min(Tmp, Tmp2)-1; + break; + + case ISD::SUB: + Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1); + if (Tmp2 == 1) return 1; + + // Handle NEG. + if (ConstantSDNode *CLHS = dyn_cast(Op.getOperand(0))) + if (CLHS->isNullValue()) { + APInt KnownZero, KnownOne; + APInt Mask = APInt::getAllOnesValue(VTBits); + ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1); + // If the input is known to be 0 or 1, the output is 0/-1, which is all + // sign bits set. + if ((KnownZero | APInt(VTBits, 1)) == Mask) + return VTBits; + + // If the input is known to be positive (the sign bit is known clear), + // the output of the NEG has the same number of sign bits as the input. + if (KnownZero.isNegative()) + return Tmp2; + + // Otherwise, we treat this like a SUB. + } + + // Sub can have at most one carry bit. Thus we know that the output + // is, at worst, one more bit than the inputs. + Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); + if (Tmp == 1) return 1; // Early out. + return std::min(Tmp, Tmp2)-1; + break; + case ISD::TRUNCATE: + // FIXME: it's tricky to do anything useful for this, but it is an important + // case for targets like X86. + break; + } + + // Handle LOADX separately here. EXTLOAD case will fallthrough. + if (Op.getOpcode() == ISD::LOAD) { + LoadSDNode *LD = cast(Op); + unsigned ExtType = LD->getExtensionType(); + switch (ExtType) { + default: break; + case ISD::SEXTLOAD: // '17' bits known + Tmp = LD->getMemoryVT().getSizeInBits(); + return VTBits-Tmp+1; + case ISD::ZEXTLOAD: // '16' bits known + Tmp = LD->getMemoryVT().getSizeInBits(); + return VTBits-Tmp; + } + } + + // Allow the target to implement this method for its nodes. + if (Op.getOpcode() >= ISD::BUILTIN_OP_END || + Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_VOID) { + unsigned NumBits = TLI.ComputeNumSignBitsForTargetNode(Op, Depth); + if (NumBits > 1) FirstAnswer = std::max(FirstAnswer, NumBits); + } + + // Finally, if we can prove that the top bits of the result are 0's or 1's, + // use this information. + APInt KnownZero, KnownOne; + APInt Mask = APInt::getAllOnesValue(VTBits); + ComputeMaskedBits(Op, Mask, KnownZero, KnownOne, Depth); + + if (KnownZero.isNegative()) { // sign bit is 0 + Mask = KnownZero; + } else if (KnownOne.isNegative()) { // sign bit is 1; + Mask = KnownOne; + } else { + // Nothing known. + return FirstAnswer; + } + + // Okay, we know that the sign bit in Mask is set. Use CLZ to determine + // the number of identical bits in the top of the input value. + Mask = ~Mask; + Mask <<= Mask.getBitWidth()-VTBits; + // Return # leading zeros. We use 'min' here in case Val was zero before + // shifting. We don't want to return '64' as for an i32 "0". + return std::max(FirstAnswer, std::min(VTBits, Mask.countLeadingZeros())); +} + + +bool SelectionDAG::isVerifiedDebugInfoDesc(SDValue Op) const { + GlobalAddressSDNode *GA = dyn_cast(Op); + if (!GA) return false; + if (GA->getOffset() != 0) return false; + GlobalVariable *GV = dyn_cast(GA->getGlobal()); + if (!GV) return false; + MachineModuleInfo *MMI = getMachineModuleInfo(); + return MMI && MMI->hasDebugInfo(); +} + + +/// getShuffleScalarElt - Returns the scalar element that will make up the ith +/// element of the result of the vector shuffle. +SDValue SelectionDAG::getShuffleScalarElt(const ShuffleVectorSDNode *N, + unsigned i) { + MVT VT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); + if (N->getMaskElt(i) < 0) + return getUNDEF(VT.getVectorElementType()); + unsigned Index = N->getMaskElt(i); + unsigned NumElems = VT.getVectorNumElements(); + SDValue V = (Index < NumElems) ? N->getOperand(0) : N->getOperand(1); + Index %= NumElems; + + if (V.getOpcode() == ISD::BIT_CONVERT) { + V = V.getOperand(0); + MVT VVT = V.getValueType(); + if (!VVT.isVector() || VVT.getVectorNumElements() != (unsigned)NumElems) + return SDValue(); + } + if (V.getOpcode() == ISD::SCALAR_TO_VECTOR) + return (Index == 0) ? V.getOperand(0) + : getUNDEF(VT.getVectorElementType()); + if (V.getOpcode() == ISD::BUILD_VECTOR) + return V.getOperand(Index); + if (const ShuffleVectorSDNode *SVN = dyn_cast(V)) + return getShuffleScalarElt(SVN, Index); + return SDValue(); +} + + +/// getNode - Gets or creates the specified node. +/// +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opcode, getVTList(VT), 0, 0); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + SDNode *N = NodeAllocator.Allocate(); + new (N) SDNode(Opcode, DL, getVTList(VT)); + CSEMap.InsertNode(N, IP); + + AllNodes.push_back(N); +#ifndef NDEBUG + VerifyNode(N); +#endif + return SDValue(N, 0); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, + MVT VT, SDValue Operand) { + // Constant fold unary operations with an integer constant operand. + if (ConstantSDNode *C = dyn_cast(Operand.getNode())) { + const APInt &Val = C->getAPIntValue(); + unsigned BitWidth = VT.getSizeInBits(); + switch (Opcode) { + default: break; + case ISD::SIGN_EXTEND: + return getConstant(APInt(Val).sextOrTrunc(BitWidth), VT); + case ISD::ANY_EXTEND: + case ISD::ZERO_EXTEND: + case ISD::TRUNCATE: + return getConstant(APInt(Val).zextOrTrunc(BitWidth), VT); + case ISD::UINT_TO_FP: + case ISD::SINT_TO_FP: { + const uint64_t zero[] = {0, 0}; + // No compile time operations on this type. + if (VT==MVT::ppcf128) + break; + APFloat apf = APFloat(APInt(BitWidth, 2, zero)); + (void)apf.convertFromAPInt(Val, + Opcode==ISD::SINT_TO_FP, + APFloat::rmNearestTiesToEven); + return getConstantFP(apf, VT); + } + case ISD::BIT_CONVERT: + if (VT == MVT::f32 && C->getValueType(0) == MVT::i32) + return getConstantFP(Val.bitsToFloat(), VT); + else if (VT == MVT::f64 && C->getValueType(0) == MVT::i64) + return getConstantFP(Val.bitsToDouble(), VT); + break; + case ISD::BSWAP: + return getConstant(Val.byteSwap(), VT); + case ISD::CTPOP: + return getConstant(Val.countPopulation(), VT); + case ISD::CTLZ: + return getConstant(Val.countLeadingZeros(), VT); + case ISD::CTTZ: + return getConstant(Val.countTrailingZeros(), VT); + } + } + + // Constant fold unary operations with a floating point constant operand. + if (ConstantFPSDNode *C = dyn_cast(Operand.getNode())) { + APFloat V = C->getValueAPF(); // make copy + if (VT != MVT::ppcf128 && Operand.getValueType() != MVT::ppcf128) { + switch (Opcode) { + case ISD::FNEG: + V.changeSign(); + return getConstantFP(V, VT); + case ISD::FABS: + V.clearSign(); + return getConstantFP(V, VT); + case ISD::FP_ROUND: + case ISD::FP_EXTEND: { + bool ignored; + // This can return overflow, underflow, or inexact; we don't care. + // FIXME need to be more flexible about rounding mode. + (void)V.convert(*MVTToAPFloatSemantics(VT), + APFloat::rmNearestTiesToEven, &ignored); + return getConstantFP(V, VT); + } + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: { + integerPart x[2]; + bool ignored; + assert(integerPartWidth >= 64); + // FIXME need to be more flexible about rounding mode. + APFloat::opStatus s = V.convertToInteger(x, VT.getSizeInBits(), + Opcode==ISD::FP_TO_SINT, + APFloat::rmTowardZero, &ignored); + if (s==APFloat::opInvalidOp) // inexact is OK, in fact usual + break; + APInt api(VT.getSizeInBits(), 2, x); + return getConstant(api, VT); + } + case ISD::BIT_CONVERT: + if (VT == MVT::i32 && C->getValueType(0) == MVT::f32) + return getConstant((uint32_t)V.bitcastToAPInt().getZExtValue(), VT); + else if (VT == MVT::i64 && C->getValueType(0) == MVT::f64) + return getConstant(V.bitcastToAPInt().getZExtValue(), VT); + break; + } + } + } + + unsigned OpOpcode = Operand.getNode()->getOpcode(); + switch (Opcode) { + case ISD::TokenFactor: + case ISD::MERGE_VALUES: + case ISD::CONCAT_VECTORS: + return Operand; // Factor, merge or concat of one node? No need. + case ISD::FP_ROUND: assert(0 && "Invalid method to make FP_ROUND node"); + case ISD::FP_EXTEND: + assert(VT.isFloatingPoint() && + Operand.getValueType().isFloatingPoint() && "Invalid FP cast!"); + if (Operand.getValueType() == VT) return Operand; // noop conversion. + if (Operand.getOpcode() == ISD::UNDEF) + return getUNDEF(VT); + break; + case ISD::SIGN_EXTEND: + assert(VT.isInteger() && Operand.getValueType().isInteger() && + "Invalid SIGN_EXTEND!"); + if (Operand.getValueType() == VT) return Operand; // noop extension + assert(Operand.getValueType().bitsLT(VT) + && "Invalid sext node, dst < src!"); + if (OpOpcode == ISD::SIGN_EXTEND || OpOpcode == ISD::ZERO_EXTEND) + return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0)); + break; + case ISD::ZERO_EXTEND: + assert(VT.isInteger() && Operand.getValueType().isInteger() && + "Invalid ZERO_EXTEND!"); + if (Operand.getValueType() == VT) return Operand; // noop extension + assert(Operand.getValueType().bitsLT(VT) + && "Invalid zext node, dst < src!"); + if (OpOpcode == ISD::ZERO_EXTEND) // (zext (zext x)) -> (zext x) + return getNode(ISD::ZERO_EXTEND, DL, VT, + Operand.getNode()->getOperand(0)); + break; + case ISD::ANY_EXTEND: + assert(VT.isInteger() && Operand.getValueType().isInteger() && + "Invalid ANY_EXTEND!"); + if (Operand.getValueType() == VT) return Operand; // noop extension + assert(Operand.getValueType().bitsLT(VT) + && "Invalid anyext node, dst < src!"); + if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND) + // (ext (zext x)) -> (zext x) and (ext (sext x)) -> (sext x) + return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0)); + break; + case ISD::TRUNCATE: + assert(VT.isInteger() && Operand.getValueType().isInteger() && + "Invalid TRUNCATE!"); + if (Operand.getValueType() == VT) return Operand; // noop truncate + assert(Operand.getValueType().bitsGT(VT) + && "Invalid truncate node, src < dst!"); + if (OpOpcode == ISD::TRUNCATE) + return getNode(ISD::TRUNCATE, DL, VT, Operand.getNode()->getOperand(0)); + else if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND || + OpOpcode == ISD::ANY_EXTEND) { + // If the source is smaller than the dest, we still need an extend. + if (Operand.getNode()->getOperand(0).getValueType().bitsLT(VT)) + return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0)); + else if (Operand.getNode()->getOperand(0).getValueType().bitsGT(VT)) + return getNode(ISD::TRUNCATE, DL, VT, Operand.getNode()->getOperand(0)); + else + return Operand.getNode()->getOperand(0); + } + break; + case ISD::BIT_CONVERT: + // Basic sanity checking. + assert(VT.getSizeInBits() == Operand.getValueType().getSizeInBits() + && "Cannot BIT_CONVERT between types of different sizes!"); + if (VT == Operand.getValueType()) return Operand; // noop conversion. + if (OpOpcode == ISD::BIT_CONVERT) // bitconv(bitconv(x)) -> bitconv(x) + return getNode(ISD::BIT_CONVERT, DL, VT, Operand.getOperand(0)); + if (OpOpcode == ISD::UNDEF) + return getUNDEF(VT); + break; + case ISD::SCALAR_TO_VECTOR: + assert(VT.isVector() && !Operand.getValueType().isVector() && + (VT.getVectorElementType() == Operand.getValueType() || + (VT.getVectorElementType().isInteger() && + Operand.getValueType().isInteger() && + VT.getVectorElementType().bitsLE(Operand.getValueType()))) && + "Illegal SCALAR_TO_VECTOR node!"); + if (OpOpcode == ISD::UNDEF) + return getUNDEF(VT); + // scalar_to_vector(extract_vector_elt V, 0) -> V, top bits are undefined. + if (OpOpcode == ISD::EXTRACT_VECTOR_ELT && + isa(Operand.getOperand(1)) && + Operand.getConstantOperandVal(1) == 0 && + Operand.getOperand(0).getValueType() == VT) + return Operand.getOperand(0); + break; + case ISD::FNEG: + // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0 + if (UnsafeFPMath && OpOpcode == ISD::FSUB) + return getNode(ISD::FSUB, DL, VT, Operand.getNode()->getOperand(1), + Operand.getNode()->getOperand(0)); + if (OpOpcode == ISD::FNEG) // --X -> X + return Operand.getNode()->getOperand(0); + break; + case ISD::FABS: + if (OpOpcode == ISD::FNEG) // abs(-X) -> abs(X) + return getNode(ISD::FABS, DL, VT, Operand.getNode()->getOperand(0)); + break; + } + + SDNode *N; + SDVTList VTs = getVTList(VT); + if (VT != MVT::Flag) { // Don't CSE flag producing nodes + FoldingSetNodeID ID; + SDValue Ops[1] = { Operand }; + AddNodeIDNode(ID, Opcode, VTs, Ops, 1); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + N = NodeAllocator.Allocate(); + new (N) UnarySDNode(Opcode, DL, VTs, Operand); + CSEMap.InsertNode(N, IP); + } else { + N = NodeAllocator.Allocate(); + new (N) UnarySDNode(Opcode, DL, VTs, Operand); + } + + AllNodes.push_back(N); +#ifndef NDEBUG + VerifyNode(N); +#endif + return SDValue(N, 0); +} + +SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, + MVT VT, + ConstantSDNode *Cst1, + ConstantSDNode *Cst2) { + const APInt &C1 = Cst1->getAPIntValue(), &C2 = Cst2->getAPIntValue(); + + switch (Opcode) { + case ISD::ADD: return getConstant(C1 + C2, VT); + case ISD::SUB: return getConstant(C1 - C2, VT); + case ISD::MUL: return getConstant(C1 * C2, VT); + case ISD::UDIV: + if (C2.getBoolValue()) return getConstant(C1.udiv(C2), VT); + break; + case ISD::UREM: + if (C2.getBoolValue()) return getConstant(C1.urem(C2), VT); + break; + case ISD::SDIV: + if (C2.getBoolValue()) return getConstant(C1.sdiv(C2), VT); + break; + case ISD::SREM: + if (C2.getBoolValue()) return getConstant(C1.srem(C2), VT); + break; + case ISD::AND: return getConstant(C1 & C2, VT); + case ISD::OR: return getConstant(C1 | C2, VT); + case ISD::XOR: return getConstant(C1 ^ C2, VT); + case ISD::SHL: return getConstant(C1 << C2, VT); + case ISD::SRL: return getConstant(C1.lshr(C2), VT); + case ISD::SRA: return getConstant(C1.ashr(C2), VT); + case ISD::ROTL: return getConstant(C1.rotl(C2), VT); + case ISD::ROTR: return getConstant(C1.rotr(C2), VT); + default: break; + } + + return SDValue(); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT, + SDValue N1, SDValue N2) { + ConstantSDNode *N1C = dyn_cast(N1.getNode()); + ConstantSDNode *N2C = dyn_cast(N2.getNode()); + switch (Opcode) { + default: break; + case ISD::TokenFactor: + assert(VT == MVT::Other && N1.getValueType() == MVT::Other && + N2.getValueType() == MVT::Other && "Invalid token factor!"); + // Fold trivial token factors. + if (N1.getOpcode() == ISD::EntryToken) return N2; + if (N2.getOpcode() == ISD::EntryToken) return N1; + if (N1 == N2) return N1; + break; + case ISD::CONCAT_VECTORS: + // A CONCAT_VECTOR with all operands BUILD_VECTOR can be simplified to + // one big BUILD_VECTOR. + if (N1.getOpcode() == ISD::BUILD_VECTOR && + N2.getOpcode() == ISD::BUILD_VECTOR) { + SmallVector Elts(N1.getNode()->op_begin(), N1.getNode()->op_end()); + Elts.insert(Elts.end(), N2.getNode()->op_begin(), N2.getNode()->op_end()); + return getNode(ISD::BUILD_VECTOR, DL, VT, &Elts[0], Elts.size()); + } + break; + case ISD::AND: + assert(VT.isInteger() && N1.getValueType() == N2.getValueType() && + N1.getValueType() == VT && "Binary operator types must match!"); + // (X & 0) -> 0. This commonly occurs when legalizing i64 values, so it's + // worth handling here. + if (N2C && N2C->isNullValue()) + return N2; + if (N2C && N2C->isAllOnesValue()) // X & -1 -> X + return N1; + break; + case ISD::OR: + case ISD::XOR: + case ISD::ADD: + case ISD::SUB: + assert(VT.isInteger() && N1.getValueType() == N2.getValueType() && + N1.getValueType() == VT && "Binary operator types must match!"); + // (X ^|+- 0) -> X. This commonly occurs when legalizing i64 values, so + // it's worth handling here. + if (N2C && N2C->isNullValue()) + return N1; + break; + case ISD::UDIV: + case ISD::UREM: + case ISD::MULHU: + case ISD::MULHS: + case ISD::MUL: + case ISD::SDIV: + case ISD::SREM: + assert(VT.isInteger() && "This operator does not apply to FP types!"); + // fall through + case ISD::FADD: + case ISD::FSUB: + case ISD::FMUL: + case ISD::FDIV: + case ISD::FREM: + if (UnsafeFPMath) { + if (Opcode == ISD::FADD) { + // 0+x --> x + if (ConstantFPSDNode *CFP = dyn_cast(N1)) + if (CFP->getValueAPF().isZero()) + return N2; + // x+0 --> x + if (ConstantFPSDNode *CFP = dyn_cast(N2)) + if (CFP->getValueAPF().isZero()) + return N1; + } else if (Opcode == ISD::FSUB) { + // x-0 --> x + if (ConstantFPSDNode *CFP = dyn_cast(N2)) + if (CFP->getValueAPF().isZero()) + return N1; + } + } + assert(N1.getValueType() == N2.getValueType() && + N1.getValueType() == VT && "Binary operator types must match!"); + break; + case ISD::FCOPYSIGN: // N1 and result must match. N1/N2 need not match. + assert(N1.getValueType() == VT && + N1.getValueType().isFloatingPoint() && + N2.getValueType().isFloatingPoint() && + "Invalid FCOPYSIGN!"); + break; + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + case ISD::ROTL: + case ISD::ROTR: + assert(VT == N1.getValueType() && + "Shift operators return type must be the same as their first arg"); + assert(VT.isInteger() && N2.getValueType().isInteger() && + "Shifts only work on integers"); + + // Always fold shifts of i1 values so the code generator doesn't need to + // handle them. Since we know the size of the shift has to be less than the + // size of the value, the shift/rotate count is guaranteed to be zero. + if (VT == MVT::i1) + return N1; + break; + case ISD::FP_ROUND_INREG: { + MVT EVT = cast(N2)->getVT(); + assert(VT == N1.getValueType() && "Not an inreg round!"); + assert(VT.isFloatingPoint() && EVT.isFloatingPoint() && + "Cannot FP_ROUND_INREG integer types"); + assert(EVT.bitsLE(VT) && "Not rounding down!"); + if (cast(N2)->getVT() == VT) return N1; // Not actually rounding. + break; + } + case ISD::FP_ROUND: + assert(VT.isFloatingPoint() && + N1.getValueType().isFloatingPoint() && + VT.bitsLE(N1.getValueType()) && + isa(N2) && "Invalid FP_ROUND!"); + if (N1.getValueType() == VT) return N1; // noop conversion. + break; + case ISD::AssertSext: + case ISD::AssertZext: { + MVT EVT = cast(N2)->getVT(); + assert(VT == N1.getValueType() && "Not an inreg extend!"); + assert(VT.isInteger() && EVT.isInteger() && + "Cannot *_EXTEND_INREG FP types"); + assert(EVT.bitsLE(VT) && "Not extending!"); + if (VT == EVT) return N1; // noop assertion. + break; + } + case ISD::SIGN_EXTEND_INREG: { + MVT EVT = cast(N2)->getVT(); + assert(VT == N1.getValueType() && "Not an inreg extend!"); + assert(VT.isInteger() && EVT.isInteger() && + "Cannot *_EXTEND_INREG FP types"); + assert(EVT.bitsLE(VT) && "Not extending!"); + if (EVT == VT) return N1; // Not actually extending + + if (N1C) { + APInt Val = N1C->getAPIntValue(); + unsigned FromBits = cast(N2)->getVT().getSizeInBits(); + Val <<= Val.getBitWidth()-FromBits; + Val = Val.ashr(Val.getBitWidth()-FromBits); + return getConstant(Val, VT); + } + break; + } + case ISD::EXTRACT_VECTOR_ELT: + // EXTRACT_VECTOR_ELT of an UNDEF is an UNDEF. + if (N1.getOpcode() == ISD::UNDEF) + return getUNDEF(VT); + + // EXTRACT_VECTOR_ELT of CONCAT_VECTORS is often formed while lowering is + // expanding copies of large vectors from registers. + if (N2C && + N1.getOpcode() == ISD::CONCAT_VECTORS && + N1.getNumOperands() > 0) { + unsigned Factor = + N1.getOperand(0).getValueType().getVectorNumElements(); + return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, + N1.getOperand(N2C->getZExtValue() / Factor), + getConstant(N2C->getZExtValue() % Factor, + N2.getValueType())); + } + + // EXTRACT_VECTOR_ELT of BUILD_VECTOR is often formed while lowering is + // expanding large vector constants. + if (N2C && N1.getOpcode() == ISD::BUILD_VECTOR) { + SDValue Elt = N1.getOperand(N2C->getZExtValue()); + if (Elt.getValueType() != VT) { + // If the vector element type is not legal, the BUILD_VECTOR operands + // are promoted and implicitly truncated. Make that explicit here. + assert(VT.isInteger() && Elt.getValueType().isInteger() && + VT.bitsLE(Elt.getValueType()) && + "Bad type for BUILD_VECTOR operand"); + Elt = getNode(ISD::TRUNCATE, DL, VT, Elt); + } + return Elt; + } + + // EXTRACT_VECTOR_ELT of INSERT_VECTOR_ELT is often formed when vector + // operations are lowered to scalars. + if (N1.getOpcode() == ISD::INSERT_VECTOR_ELT) { + // If the indices are the same, return the inserted element. + if (N1.getOperand(2) == N2) + return N1.getOperand(1); + // If the indices are known different, extract the element from + // the original vector. + else if (isa(N1.getOperand(2)) && + isa(N2)) + return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, N1.getOperand(0), N2); + } + break; + case ISD::EXTRACT_ELEMENT: + assert(N2C && (unsigned)N2C->getZExtValue() < 2 && "Bad EXTRACT_ELEMENT!"); + assert(!N1.getValueType().isVector() && !VT.isVector() && + (N1.getValueType().isInteger() == VT.isInteger()) && + "Wrong types for EXTRACT_ELEMENT!"); + + // EXTRACT_ELEMENT of BUILD_PAIR is often formed while legalize is expanding + // 64-bit integers into 32-bit parts. Instead of building the extract of + // the BUILD_PAIR, only to have legalize rip it apart, just do it now. + if (N1.getOpcode() == ISD::BUILD_PAIR) + return N1.getOperand(N2C->getZExtValue()); + + // EXTRACT_ELEMENT of a constant int is also very common. + if (ConstantSDNode *C = dyn_cast(N1)) { + unsigned ElementSize = VT.getSizeInBits(); + unsigned Shift = ElementSize * N2C->getZExtValue(); + APInt ShiftedVal = C->getAPIntValue().lshr(Shift); + return getConstant(ShiftedVal.trunc(ElementSize), VT); + } + break; + case ISD::EXTRACT_SUBVECTOR: + if (N1.getValueType() == VT) // Trivial extraction. + return N1; + break; + } + + if (N1C) { + if (N2C) { + SDValue SV = FoldConstantArithmetic(Opcode, VT, N1C, N2C); + if (SV.getNode()) return SV; + } else { // Cannonicalize constant to RHS if commutative + if (isCommutativeBinOp(Opcode)) { + std::swap(N1C, N2C); + std::swap(N1, N2); + } + } + } + + // Constant fold FP operations. + ConstantFPSDNode *N1CFP = dyn_cast(N1.getNode()); + ConstantFPSDNode *N2CFP = dyn_cast(N2.getNode()); + if (N1CFP) { + if (!N2CFP && isCommutativeBinOp(Opcode)) { + // Cannonicalize constant to RHS if commutative + std::swap(N1CFP, N2CFP); + std::swap(N1, N2); + } else if (N2CFP && VT != MVT::ppcf128) { + APFloat V1 = N1CFP->getValueAPF(), V2 = N2CFP->getValueAPF(); + APFloat::opStatus s; + switch (Opcode) { + case ISD::FADD: + s = V1.add(V2, APFloat::rmNearestTiesToEven); + if (s != APFloat::opInvalidOp) + return getConstantFP(V1, VT); + break; + case ISD::FSUB: + s = V1.subtract(V2, APFloat::rmNearestTiesToEven); + if (s!=APFloat::opInvalidOp) + return getConstantFP(V1, VT); + break; + case ISD::FMUL: + s = V1.multiply(V2, APFloat::rmNearestTiesToEven); + if (s!=APFloat::opInvalidOp) + return getConstantFP(V1, VT); + break; + case ISD::FDIV: + s = V1.divide(V2, APFloat::rmNearestTiesToEven); + if (s!=APFloat::opInvalidOp && s!=APFloat::opDivByZero) + return getConstantFP(V1, VT); + break; + case ISD::FREM : + s = V1.mod(V2, APFloat::rmNearestTiesToEven); + if (s!=APFloat::opInvalidOp && s!=APFloat::opDivByZero) + return getConstantFP(V1, VT); + break; + case ISD::FCOPYSIGN: + V1.copySign(V2); + return getConstantFP(V1, VT); + default: break; + } + } + } + + // Canonicalize an UNDEF to the RHS, even over a constant. + if (N1.getOpcode() == ISD::UNDEF) { + if (isCommutativeBinOp(Opcode)) { + std::swap(N1, N2); + } else { + switch (Opcode) { + case ISD::FP_ROUND_INREG: + case ISD::SIGN_EXTEND_INREG: + case ISD::SUB: + case ISD::FSUB: + case ISD::FDIV: + case ISD::FREM: + case ISD::SRA: + return N1; // fold op(undef, arg2) -> undef + case ISD::UDIV: + case ISD::SDIV: + case ISD::UREM: + case ISD::SREM: + case ISD::SRL: + case ISD::SHL: + if (!VT.isVector()) + return getConstant(0, VT); // fold op(undef, arg2) -> 0 + // For vectors, we can't easily build an all zero vector, just return + // the LHS. + return N2; + } + } + } + + // Fold a bunch of operators when the RHS is undef. + if (N2.getOpcode() == ISD::UNDEF) { + switch (Opcode) { + case ISD::XOR: + if (N1.getOpcode() == ISD::UNDEF) + // Handle undef ^ undef -> 0 special case. This is a common + // idiom (misuse). + return getConstant(0, VT); + // fallthrough + case ISD::ADD: + case ISD::ADDC: + case ISD::ADDE: + case ISD::SUB: + case ISD::FADD: + case ISD::FSUB: + case ISD::FMUL: + case ISD::FDIV: + case ISD::FREM: + case ISD::UDIV: + case ISD::SDIV: + case ISD::UREM: + case ISD::SREM: + return N2; // fold op(arg1, undef) -> undef + case ISD::MUL: + case ISD::AND: + case ISD::SRL: + case ISD::SHL: + if (!VT.isVector()) + return getConstant(0, VT); // fold op(arg1, undef) -> 0 + // For vectors, we can't easily build an all zero vector, just return + // the LHS. + return N1; + case ISD::OR: + if (!VT.isVector()) + return getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT); + // For vectors, we can't easily build an all one vector, just return + // the LHS. + return N1; + case ISD::SRA: + return N1; + } + } + + // Memoize this node if possible. + SDNode *N; + SDVTList VTs = getVTList(VT); + if (VT != MVT::Flag) { + SDValue Ops[] = { N1, N2 }; + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opcode, VTs, Ops, 2); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + N = NodeAllocator.Allocate(); + new (N) BinarySDNode(Opcode, DL, VTs, N1, N2); + CSEMap.InsertNode(N, IP); + } else { + N = NodeAllocator.Allocate(); + new (N) BinarySDNode(Opcode, DL, VTs, N1, N2); + } + + AllNodes.push_back(N); +#ifndef NDEBUG + VerifyNode(N); +#endif + return SDValue(N, 0); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT, + SDValue N1, SDValue N2, SDValue N3) { + // Perform various simplifications. + ConstantSDNode *N1C = dyn_cast(N1.getNode()); + ConstantSDNode *N2C = dyn_cast(N2.getNode()); + switch (Opcode) { + case ISD::CONCAT_VECTORS: + // A CONCAT_VECTOR with all operands BUILD_VECTOR can be simplified to + // one big BUILD_VECTOR. + if (N1.getOpcode() == ISD::BUILD_VECTOR && + N2.getOpcode() == ISD::BUILD_VECTOR && + N3.getOpcode() == ISD::BUILD_VECTOR) { + SmallVector Elts(N1.getNode()->op_begin(), N1.getNode()->op_end()); + Elts.insert(Elts.end(), N2.getNode()->op_begin(), N2.getNode()->op_end()); + Elts.insert(Elts.end(), N3.getNode()->op_begin(), N3.getNode()->op_end()); + return getNode(ISD::BUILD_VECTOR, DL, VT, &Elts[0], Elts.size()); + } + break; + case ISD::SETCC: { + // Use FoldSetCC to simplify SETCC's. + SDValue Simp = FoldSetCC(VT, N1, N2, cast(N3)->get(), DL); + if (Simp.getNode()) return Simp; + break; + } + case ISD::SELECT: + if (N1C) { + if (N1C->getZExtValue()) + return N2; // select true, X, Y -> X + else + return N3; // select false, X, Y -> Y + } + + if (N2 == N3) return N2; // select C, X, X -> X + break; + case ISD::BRCOND: + if (N2C) { + if (N2C->getZExtValue()) // Unconditional branch + return getNode(ISD::BR, DL, MVT::Other, N1, N3); + else + return N1; // Never-taken branch + } + break; + case ISD::VECTOR_SHUFFLE: + assert(0 && "should use getVectorShuffle constructor!"); + break; + case ISD::BIT_CONVERT: + // Fold bit_convert nodes from a type to themselves. + if (N1.getValueType() == VT) + return N1; + break; + } + + // Memoize node if it doesn't produce a flag. + SDNode *N; + SDVTList VTs = getVTList(VT); + if (VT != MVT::Flag) { + SDValue Ops[] = { N1, N2, N3 }; + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opcode, VTs, Ops, 3); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + N = NodeAllocator.Allocate(); + new (N) TernarySDNode(Opcode, DL, VTs, N1, N2, N3); + CSEMap.InsertNode(N, IP); + } else { + N = NodeAllocator.Allocate(); + new (N) TernarySDNode(Opcode, DL, VTs, N1, N2, N3); + } + AllNodes.push_back(N); +#ifndef NDEBUG + VerifyNode(N); +#endif + return SDValue(N, 0); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT, + SDValue N1, SDValue N2, SDValue N3, + SDValue N4) { + SDValue Ops[] = { N1, N2, N3, N4 }; + return getNode(Opcode, DL, VT, Ops, 4); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT, + SDValue N1, SDValue N2, SDValue N3, + SDValue N4, SDValue N5) { + SDValue Ops[] = { N1, N2, N3, N4, N5 }; + return getNode(Opcode, DL, VT, Ops, 5); +} + +/// getMemsetValue - Vectorized representation of the memset value +/// operand. +static SDValue getMemsetValue(SDValue Value, MVT VT, SelectionDAG &DAG, + DebugLoc dl) { + unsigned NumBits = VT.isVector() ? + VT.getVectorElementType().getSizeInBits() : VT.getSizeInBits(); + if (ConstantSDNode *C = dyn_cast(Value)) { + APInt Val = APInt(NumBits, C->getZExtValue() & 255); + unsigned Shift = 8; + for (unsigned i = NumBits; i > 8; i >>= 1) { + Val = (Val << Shift) | Val; + Shift <<= 1; + } + if (VT.isInteger()) + return DAG.getConstant(Val, VT); + return DAG.getConstantFP(APFloat(Val), VT); + } + + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + Value = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Value); + unsigned Shift = 8; + for (unsigned i = NumBits; i > 8; i >>= 1) { + Value = DAG.getNode(ISD::OR, dl, VT, + DAG.getNode(ISD::SHL, dl, VT, Value, + DAG.getConstant(Shift, + TLI.getShiftAmountTy())), + Value); + Shift <<= 1; + } + + return Value; +} + +/// getMemsetStringVal - Similar to getMemsetValue. Except this is only +/// used when a memcpy is turned into a memset when the source is a constant +/// string ptr. +static SDValue getMemsetStringVal(MVT VT, DebugLoc dl, SelectionDAG &DAG, + const TargetLowering &TLI, + std::string &Str, unsigned Offset) { + // Handle vector with all elements zero. + if (Str.empty()) { + if (VT.isInteger()) + return DAG.getConstant(0, VT); + unsigned NumElts = VT.getVectorNumElements(); + MVT EltVT = (VT.getVectorElementType() == MVT::f32) ? MVT::i32 : MVT::i64; + return DAG.getNode(ISD::BIT_CONVERT, dl, VT, + DAG.getConstant(0, MVT::getVectorVT(EltVT, NumElts))); + } + + assert(!VT.isVector() && "Can't handle vector type here!"); + unsigned NumBits = VT.getSizeInBits(); + unsigned MSB = NumBits / 8; + uint64_t Val = 0; + if (TLI.isLittleEndian()) + Offset = Offset + MSB - 1; + for (unsigned i = 0; i != MSB; ++i) { + Val = (Val << 8) | (unsigned char)Str[Offset]; + Offset += TLI.isLittleEndian() ? -1 : 1; + } + return DAG.getConstant(Val, VT); +} + +/// getMemBasePlusOffset - Returns base and offset node for the +/// +static SDValue getMemBasePlusOffset(SDValue Base, unsigned Offset, + SelectionDAG &DAG) { + MVT VT = Base.getValueType(); + return DAG.getNode(ISD::ADD, Base.getDebugLoc(), + VT, Base, DAG.getConstant(Offset, VT)); +} + +/// isMemSrcFromString - Returns true if memcpy source is a string constant. +/// +static bool isMemSrcFromString(SDValue Src, std::string &Str) { + unsigned SrcDelta = 0; + GlobalAddressSDNode *G = NULL; + if (Src.getOpcode() == ISD::GlobalAddress) + G = cast(Src); + else if (Src.getOpcode() == ISD::ADD && + Src.getOperand(0).getOpcode() == ISD::GlobalAddress && + Src.getOperand(1).getOpcode() == ISD::Constant) { + G = cast(Src.getOperand(0)); + SrcDelta = cast(Src.getOperand(1))->getZExtValue(); + } + if (!G) + return false; + + GlobalVariable *GV = dyn_cast(G->getGlobal()); + if (GV && GetConstantStringInfo(GV, Str, SrcDelta, false)) + return true; + + return false; +} + +/// MeetsMaxMemopRequirement - Determines if the number of memory ops required +/// to replace the memset / memcpy is below the threshold. It also returns the +/// types of the sequence of memory ops to perform memset / memcpy. +static +bool MeetsMaxMemopRequirement(std::vector &MemOps, + SDValue Dst, SDValue Src, + unsigned Limit, uint64_t Size, unsigned &Align, + std::string &Str, bool &isSrcStr, + SelectionDAG &DAG, + const TargetLowering &TLI) { + isSrcStr = isMemSrcFromString(Src, Str); + bool isSrcConst = isa(Src); + bool AllowUnalign = TLI.allowsUnalignedMemoryAccesses(); + MVT VT = TLI.getOptimalMemOpType(Size, Align, isSrcConst, isSrcStr); + if (VT != MVT::iAny) { + unsigned NewAlign = (unsigned) + TLI.getTargetData()->getABITypeAlignment(VT.getTypeForMVT()); + // If source is a string constant, this will require an unaligned load. + if (NewAlign > Align && (isSrcConst || AllowUnalign)) { + if (Dst.getOpcode() != ISD::FrameIndex) { + // Can't change destination alignment. It requires a unaligned store. + if (AllowUnalign) + VT = MVT::iAny; + } else { + int FI = cast(Dst)->getIndex(); + MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + if (MFI->isFixedObjectIndex(FI)) { + // Can't change destination alignment. It requires a unaligned store. + if (AllowUnalign) + VT = MVT::iAny; + } else { + // Give the stack frame object a larger alignment if needed. + if (MFI->getObjectAlignment(FI) < NewAlign) + MFI->setObjectAlignment(FI, NewAlign); + Align = NewAlign; + } + } + } + } + + if (VT == MVT::iAny) { + if (AllowUnalign) { + VT = MVT::i64; + } else { + switch (Align & 7) { + case 0: VT = MVT::i64; break; + case 4: VT = MVT::i32; break; + case 2: VT = MVT::i16; break; + default: VT = MVT::i8; break; + } + } + + MVT LVT = MVT::i64; + while (!TLI.isTypeLegal(LVT)) + LVT = (MVT::SimpleValueType)(LVT.getSimpleVT() - 1); + assert(LVT.isInteger()); + + if (VT.bitsGT(LVT)) + VT = LVT; + } + + unsigned NumMemOps = 0; + while (Size != 0) { + unsigned VTSize = VT.getSizeInBits() / 8; + while (VTSize > Size) { + // For now, only use non-vector load / store's for the left-over pieces. + if (VT.isVector()) { + VT = MVT::i64; + while (!TLI.isTypeLegal(VT)) + VT = (MVT::SimpleValueType)(VT.getSimpleVT() - 1); + VTSize = VT.getSizeInBits() / 8; + } else { + VT = (MVT::SimpleValueType)(VT.getSimpleVT() - 1); + VTSize >>= 1; + } + } + + if (++NumMemOps > Limit) + return false; + MemOps.push_back(VT); + Size -= VTSize; + } + + return true; +} + +static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, + SDValue Chain, SDValue Dst, + SDValue Src, uint64_t Size, + unsigned Align, bool AlwaysInline, + const Value *DstSV, uint64_t DstSVOff, + const Value *SrcSV, uint64_t SrcSVOff){ + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + // Expand memcpy to a series of load and store ops if the size operand falls + // below a certain threshold. + std::vector MemOps; + uint64_t Limit = -1ULL; + if (!AlwaysInline) + Limit = TLI.getMaxStoresPerMemcpy(); + unsigned DstAlign = Align; // Destination alignment can change. + std::string Str; + bool CopyFromStr; + if (!MeetsMaxMemopRequirement(MemOps, Dst, Src, Limit, Size, DstAlign, + Str, CopyFromStr, DAG, TLI)) + return SDValue(); + + + bool isZeroStr = CopyFromStr && Str.empty(); + SmallVector OutChains; + unsigned NumMemOps = MemOps.size(); + uint64_t SrcOff = 0, DstOff = 0; + for (unsigned i = 0; i < NumMemOps; i++) { + MVT VT = MemOps[i]; + unsigned VTSize = VT.getSizeInBits() / 8; + SDValue Value, Store; + + if (CopyFromStr && (isZeroStr || !VT.isVector())) { + // It's unlikely a store of a vector immediate can be done in a single + // instruction. It would require a load from a constantpool first. + // We also handle store a vector with all zero's. + // FIXME: Handle other cases where store of vector immediate is done in + // a single instruction. + Value = getMemsetStringVal(VT, dl, DAG, TLI, Str, SrcOff); + Store = DAG.getStore(Chain, dl, Value, + getMemBasePlusOffset(Dst, DstOff, DAG), + DstSV, DstSVOff + DstOff, false, DstAlign); + } else { + Value = DAG.getLoad(VT, dl, Chain, + getMemBasePlusOffset(Src, SrcOff, DAG), + SrcSV, SrcSVOff + SrcOff, false, Align); + Store = DAG.getStore(Chain, dl, Value, + getMemBasePlusOffset(Dst, DstOff, DAG), + DstSV, DstSVOff + DstOff, false, DstAlign); + } + OutChains.push_back(Store); + SrcOff += VTSize; + DstOff += VTSize; + } + + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &OutChains[0], OutChains.size()); +} + +static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, + SDValue Chain, SDValue Dst, + SDValue Src, uint64_t Size, + unsigned Align, bool AlwaysInline, + const Value *DstSV, uint64_t DstSVOff, + const Value *SrcSV, uint64_t SrcSVOff){ + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + // Expand memmove to a series of load and store ops if the size operand falls + // below a certain threshold. + std::vector MemOps; + uint64_t Limit = -1ULL; + if (!AlwaysInline) + Limit = TLI.getMaxStoresPerMemmove(); + unsigned DstAlign = Align; // Destination alignment can change. + std::string Str; + bool CopyFromStr; + if (!MeetsMaxMemopRequirement(MemOps, Dst, Src, Limit, Size, DstAlign, + Str, CopyFromStr, DAG, TLI)) + return SDValue(); + + uint64_t SrcOff = 0, DstOff = 0; + + SmallVector LoadValues; + SmallVector LoadChains; + SmallVector OutChains; + unsigned NumMemOps = MemOps.size(); + for (unsigned i = 0; i < NumMemOps; i++) { + MVT VT = MemOps[i]; + unsigned VTSize = VT.getSizeInBits() / 8; + SDValue Value, Store; + + Value = DAG.getLoad(VT, dl, Chain, + getMemBasePlusOffset(Src, SrcOff, DAG), + SrcSV, SrcSVOff + SrcOff, false, Align); + LoadValues.push_back(Value); + LoadChains.push_back(Value.getValue(1)); + SrcOff += VTSize; + } + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &LoadChains[0], LoadChains.size()); + OutChains.clear(); + for (unsigned i = 0; i < NumMemOps; i++) { + MVT VT = MemOps[i]; + unsigned VTSize = VT.getSizeInBits() / 8; + SDValue Value, Store; + + Store = DAG.getStore(Chain, dl, LoadValues[i], + getMemBasePlusOffset(Dst, DstOff, DAG), + DstSV, DstSVOff + DstOff, false, DstAlign); + OutChains.push_back(Store); + DstOff += VTSize; + } + + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &OutChains[0], OutChains.size()); +} + +static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl, + SDValue Chain, SDValue Dst, + SDValue Src, uint64_t Size, + unsigned Align, + const Value *DstSV, uint64_t DstSVOff) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + // Expand memset to a series of load/store ops if the size operand + // falls below a certain threshold. + std::vector MemOps; + std::string Str; + bool CopyFromStr; + if (!MeetsMaxMemopRequirement(MemOps, Dst, Src, TLI.getMaxStoresPerMemset(), + Size, Align, Str, CopyFromStr, DAG, TLI)) + return SDValue(); + + SmallVector OutChains; + uint64_t DstOff = 0; + + unsigned NumMemOps = MemOps.size(); + for (unsigned i = 0; i < NumMemOps; i++) { + MVT VT = MemOps[i]; + unsigned VTSize = VT.getSizeInBits() / 8; + SDValue Value = getMemsetValue(Src, VT, DAG, dl); + SDValue Store = DAG.getStore(Chain, dl, Value, + getMemBasePlusOffset(Dst, DstOff, DAG), + DstSV, DstSVOff + DstOff); + OutChains.push_back(Store); + DstOff += VTSize; + } + + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &OutChains[0], OutChains.size()); +} + +SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst, + SDValue Src, SDValue Size, + unsigned Align, bool AlwaysInline, + const Value *DstSV, uint64_t DstSVOff, + const Value *SrcSV, uint64_t SrcSVOff) { + + // Check to see if we should lower the memcpy to loads and stores first. + // For cases within the target-specified limits, this is the best choice. + ConstantSDNode *ConstantSize = dyn_cast(Size); + if (ConstantSize) { + // Memcpy with size zero? Just return the original chain. + if (ConstantSize->isNullValue()) + return Chain; + + SDValue Result = + getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src, + ConstantSize->getZExtValue(), + Align, false, DstSV, DstSVOff, SrcSV, SrcSVOff); + if (Result.getNode()) + return Result; + } + + // Then check to see if we should lower the memcpy with target-specific + // code. If the target chooses to do this, this is the next best. + SDValue Result = + TLI.EmitTargetCodeForMemcpy(*this, dl, Chain, Dst, Src, Size, Align, + AlwaysInline, + DstSV, DstSVOff, SrcSV, SrcSVOff); + if (Result.getNode()) + return Result; + + // If we really need inline code and the target declined to provide it, + // use a (potentially long) sequence of loads and stores. + if (AlwaysInline) { + assert(ConstantSize && "AlwaysInline requires a constant size!"); + return getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src, + ConstantSize->getZExtValue(), Align, true, + DstSV, DstSVOff, SrcSV, SrcSVOff); + } + + // Emit a library call. + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Ty = TLI.getTargetData()->getIntPtrType(); + Entry.Node = Dst; Args.push_back(Entry); + Entry.Node = Src; Args.push_back(Entry); + Entry.Node = Size; Args.push_back(Entry); + // FIXME: pass in DebugLoc + std::pair CallResult = + TLI.LowerCallTo(Chain, Type::VoidTy, + false, false, false, false, CallingConv::C, false, + getExternalSymbol("memcpy", TLI.getPointerTy()), + Args, *this, dl); + return CallResult.second; +} + +SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst, + SDValue Src, SDValue Size, + unsigned Align, + const Value *DstSV, uint64_t DstSVOff, + const Value *SrcSV, uint64_t SrcSVOff) { + + // Check to see if we should lower the memmove to loads and stores first. + // For cases within the target-specified limits, this is the best choice. + ConstantSDNode *ConstantSize = dyn_cast(Size); + if (ConstantSize) { + // Memmove with size zero? Just return the original chain. + if (ConstantSize->isNullValue()) + return Chain; + + SDValue Result = + getMemmoveLoadsAndStores(*this, dl, Chain, Dst, Src, + ConstantSize->getZExtValue(), + Align, false, DstSV, DstSVOff, SrcSV, SrcSVOff); + if (Result.getNode()) + return Result; + } + + // Then check to see if we should lower the memmove with target-specific + // code. If the target chooses to do this, this is the next best. + SDValue Result = + TLI.EmitTargetCodeForMemmove(*this, dl, Chain, Dst, Src, Size, Align, + DstSV, DstSVOff, SrcSV, SrcSVOff); + if (Result.getNode()) + return Result; + + // Emit a library call. + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Ty = TLI.getTargetData()->getIntPtrType(); + Entry.Node = Dst; Args.push_back(Entry); + Entry.Node = Src; Args.push_back(Entry); + Entry.Node = Size; Args.push_back(Entry); + // FIXME: pass in DebugLoc + std::pair CallResult = + TLI.LowerCallTo(Chain, Type::VoidTy, + false, false, false, false, CallingConv::C, false, + getExternalSymbol("memmove", TLI.getPointerTy()), + Args, *this, dl); + return CallResult.second; +} + +SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst, + SDValue Src, SDValue Size, + unsigned Align, + const Value *DstSV, uint64_t DstSVOff) { + + // Check to see if we should lower the memset to stores first. + // For cases within the target-specified limits, this is the best choice. + ConstantSDNode *ConstantSize = dyn_cast(Size); + if (ConstantSize) { + // Memset with size zero? Just return the original chain. + if (ConstantSize->isNullValue()) + return Chain; + + SDValue Result = + getMemsetStores(*this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(), + Align, DstSV, DstSVOff); + if (Result.getNode()) + return Result; + } + + // Then check to see if we should lower the memset with target-specific + // code. If the target chooses to do this, this is the next best. + SDValue Result = + TLI.EmitTargetCodeForMemset(*this, dl, Chain, Dst, Src, Size, Align, + DstSV, DstSVOff); + if (Result.getNode()) + return Result; + + // Emit a library call. + const Type *IntPtrTy = TLI.getTargetData()->getIntPtrType(); + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Node = Dst; Entry.Ty = IntPtrTy; + Args.push_back(Entry); + // Extend or truncate the argument to be an i32 value for the call. + if (Src.getValueType().bitsGT(MVT::i32)) + Src = getNode(ISD::TRUNCATE, dl, MVT::i32, Src); + else + Src = getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src); + Entry.Node = Src; Entry.Ty = Type::Int32Ty; Entry.isSExt = true; + Args.push_back(Entry); + Entry.Node = Size; Entry.Ty = IntPtrTy; Entry.isSExt = false; + Args.push_back(Entry); + // FIXME: pass in DebugLoc + std::pair CallResult = + TLI.LowerCallTo(Chain, Type::VoidTy, + false, false, false, false, CallingConv::C, false, + getExternalSymbol("memset", TLI.getPointerTy()), + Args, *this, dl); + return CallResult.second; +} + +SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, MVT MemVT, + SDValue Chain, + SDValue Ptr, SDValue Cmp, + SDValue Swp, const Value* PtrVal, + unsigned Alignment) { + assert(Opcode == ISD::ATOMIC_CMP_SWAP && "Invalid Atomic Op"); + assert(Cmp.getValueType() == Swp.getValueType() && "Invalid Atomic Op Types"); + + MVT VT = Cmp.getValueType(); + + if (Alignment == 0) // Ensure that codegen never sees alignment 0 + Alignment = getMVTAlignment(MemVT); + + SDVTList VTs = getVTList(VT, MVT::Other); + FoldingSetNodeID ID; + ID.AddInteger(MemVT.getRawBits()); + SDValue Ops[] = {Chain, Ptr, Cmp, Swp}; + AddNodeIDNode(ID, Opcode, VTs, Ops, 4); + void* IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + SDNode* N = NodeAllocator.Allocate(); + new (N) AtomicSDNode(Opcode, dl, VTs, MemVT, + Chain, Ptr, Cmp, Swp, PtrVal, Alignment); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, MVT MemVT, + SDValue Chain, + SDValue Ptr, SDValue Val, + const Value* PtrVal, + unsigned Alignment) { + assert((Opcode == ISD::ATOMIC_LOAD_ADD || + Opcode == ISD::ATOMIC_LOAD_SUB || + Opcode == ISD::ATOMIC_LOAD_AND || + Opcode == ISD::ATOMIC_LOAD_OR || + Opcode == ISD::ATOMIC_LOAD_XOR || + Opcode == ISD::ATOMIC_LOAD_NAND || + Opcode == ISD::ATOMIC_LOAD_MIN || + Opcode == ISD::ATOMIC_LOAD_MAX || + Opcode == ISD::ATOMIC_LOAD_UMIN || + Opcode == ISD::ATOMIC_LOAD_UMAX || + Opcode == ISD::ATOMIC_SWAP) && + "Invalid Atomic Op"); + + MVT VT = Val.getValueType(); + + if (Alignment == 0) // Ensure that codegen never sees alignment 0 + Alignment = getMVTAlignment(MemVT); + + SDVTList VTs = getVTList(VT, MVT::Other); + FoldingSetNodeID ID; + ID.AddInteger(MemVT.getRawBits()); + SDValue Ops[] = {Chain, Ptr, Val}; + AddNodeIDNode(ID, Opcode, VTs, Ops, 3); + void* IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + SDNode* N = NodeAllocator.Allocate(); + new (N) AtomicSDNode(Opcode, dl, VTs, MemVT, + Chain, Ptr, Val, PtrVal, Alignment); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +/// getMergeValues - Create a MERGE_VALUES node from the given operands. +/// Allowed to return something different (and simpler) if Simplify is true. +SDValue SelectionDAG::getMergeValues(const SDValue *Ops, unsigned NumOps, + DebugLoc dl) { + if (NumOps == 1) + return Ops[0]; + + SmallVector VTs; + VTs.reserve(NumOps); + for (unsigned i = 0; i < NumOps; ++i) + VTs.push_back(Ops[i].getValueType()); + return getNode(ISD::MERGE_VALUES, dl, getVTList(&VTs[0], NumOps), + Ops, NumOps); +} + +SDValue +SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, + const MVT *VTs, unsigned NumVTs, + const SDValue *Ops, unsigned NumOps, + MVT MemVT, const Value *srcValue, int SVOff, + unsigned Align, bool Vol, + bool ReadMem, bool WriteMem) { + return getMemIntrinsicNode(Opcode, dl, makeVTList(VTs, NumVTs), Ops, NumOps, + MemVT, srcValue, SVOff, Align, Vol, + ReadMem, WriteMem); +} + +SDValue +SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList, + const SDValue *Ops, unsigned NumOps, + MVT MemVT, const Value *srcValue, int SVOff, + unsigned Align, bool Vol, + bool ReadMem, bool WriteMem) { + // Memoize the node unless it returns a flag. + MemIntrinsicSDNode *N; + if (VTList.VTs[VTList.NumVTs-1] != MVT::Flag) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + N = NodeAllocator.Allocate(); + new (N) MemIntrinsicSDNode(Opcode, dl, VTList, Ops, NumOps, MemVT, + srcValue, SVOff, Align, Vol, ReadMem, WriteMem); + CSEMap.InsertNode(N, IP); + } else { + N = NodeAllocator.Allocate(); + new (N) MemIntrinsicSDNode(Opcode, dl, VTList, Ops, NumOps, MemVT, + srcValue, SVOff, Align, Vol, ReadMem, WriteMem); + } + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue +SelectionDAG::getCall(unsigned CallingConv, DebugLoc dl, bool IsVarArgs, + bool IsTailCall, bool IsInreg, SDVTList VTs, + const SDValue *Operands, unsigned NumOperands) { + // Do not include isTailCall in the folding set profile. + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::CALL, VTs, Operands, NumOperands); + ID.AddInteger(CallingConv); + ID.AddInteger(IsVarArgs); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + // Instead of including isTailCall in the folding set, we just + // set the flag of the existing node. + if (!IsTailCall) + cast(E)->setNotTailCall(); + return SDValue(E, 0); + } + SDNode *N = NodeAllocator.Allocate(); + new (N) CallSDNode(CallingConv, dl, IsVarArgs, IsTailCall, IsInreg, + VTs, Operands, NumOperands); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue +SelectionDAG::getLoad(ISD::MemIndexedMode AM, DebugLoc dl, + ISD::LoadExtType ExtType, MVT VT, SDValue Chain, + SDValue Ptr, SDValue Offset, + const Value *SV, int SVOffset, MVT EVT, + bool isVolatile, unsigned Alignment) { + if (Alignment == 0) // Ensure that codegen never sees alignment 0 + Alignment = getMVTAlignment(VT); + + if (VT == EVT) { + ExtType = ISD::NON_EXTLOAD; + } else if (ExtType == ISD::NON_EXTLOAD) { + assert(VT == EVT && "Non-extending load from different memory type!"); + } else { + // Extending load. + if (VT.isVector()) + assert(EVT.getVectorNumElements() == VT.getVectorNumElements() && + "Invalid vector extload!"); + else + assert(EVT.bitsLT(VT) && + "Should only be an extending load, not truncating!"); + assert((ExtType == ISD::EXTLOAD || VT.isInteger()) && + "Cannot sign/zero extend a FP/Vector load!"); + assert(VT.isInteger() == EVT.isInteger() && + "Cannot convert from FP to Int or Int -> FP!"); + } + + bool Indexed = AM != ISD::UNINDEXED; + assert((Indexed || Offset.getOpcode() == ISD::UNDEF) && + "Unindexed load with an offset!"); + + SDVTList VTs = Indexed ? + getVTList(VT, Ptr.getValueType(), MVT::Other) : getVTList(VT, MVT::Other); + SDValue Ops[] = { Chain, Ptr, Offset }; + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::LOAD, VTs, Ops, 3); + ID.AddInteger(EVT.getRawBits()); + ID.AddInteger(encodeMemSDNodeFlags(ExtType, AM, isVolatile, Alignment)); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + SDNode *N = NodeAllocator.Allocate(); + new (N) LoadSDNode(Ops, dl, VTs, AM, ExtType, EVT, SV, SVOffset, + Alignment, isVolatile); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getLoad(MVT VT, DebugLoc dl, + SDValue Chain, SDValue Ptr, + const Value *SV, int SVOffset, + bool isVolatile, unsigned Alignment) { + SDValue Undef = getUNDEF(Ptr.getValueType()); + return getLoad(ISD::UNINDEXED, dl, ISD::NON_EXTLOAD, VT, Chain, Ptr, Undef, + SV, SVOffset, VT, isVolatile, Alignment); +} + +SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, DebugLoc dl, MVT VT, + SDValue Chain, SDValue Ptr, + const Value *SV, + int SVOffset, MVT EVT, + bool isVolatile, unsigned Alignment) { + SDValue Undef = getUNDEF(Ptr.getValueType()); + return getLoad(ISD::UNINDEXED, dl, ExtType, VT, Chain, Ptr, Undef, + SV, SVOffset, EVT, isVolatile, Alignment); +} + +SDValue +SelectionDAG::getIndexedLoad(SDValue OrigLoad, DebugLoc dl, SDValue Base, + SDValue Offset, ISD::MemIndexedMode AM) { + LoadSDNode *LD = cast(OrigLoad); + assert(LD->getOffset().getOpcode() == ISD::UNDEF && + "Load is already a indexed load!"); + return getLoad(AM, dl, LD->getExtensionType(), OrigLoad.getValueType(), + LD->getChain(), Base, Offset, LD->getSrcValue(), + LD->getSrcValueOffset(), LD->getMemoryVT(), + LD->isVolatile(), LD->getAlignment()); +} + +SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val, + SDValue Ptr, const Value *SV, int SVOffset, + bool isVolatile, unsigned Alignment) { + MVT VT = Val.getValueType(); + + if (Alignment == 0) // Ensure that codegen never sees alignment 0 + Alignment = getMVTAlignment(VT); + + SDVTList VTs = getVTList(MVT::Other); + SDValue Undef = getUNDEF(Ptr.getValueType()); + SDValue Ops[] = { Chain, Val, Ptr, Undef }; + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4); + ID.AddInteger(VT.getRawBits()); + ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, + isVolatile, Alignment)); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + SDNode *N = NodeAllocator.Allocate(); + new (N) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED, false, + VT, SV, SVOffset, Alignment, isVolatile); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val, + SDValue Ptr, const Value *SV, + int SVOffset, MVT SVT, + bool isVolatile, unsigned Alignment) { + MVT VT = Val.getValueType(); + + if (VT == SVT) + return getStore(Chain, dl, Val, Ptr, SV, SVOffset, isVolatile, Alignment); + + assert(VT.bitsGT(SVT) && "Not a truncation?"); + assert(VT.isInteger() == SVT.isInteger() && + "Can't do FP-INT conversion!"); + + if (Alignment == 0) // Ensure that codegen never sees alignment 0 + Alignment = getMVTAlignment(VT); + + SDVTList VTs = getVTList(MVT::Other); + SDValue Undef = getUNDEF(Ptr.getValueType()); + SDValue Ops[] = { Chain, Val, Ptr, Undef }; + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4); + ID.AddInteger(SVT.getRawBits()); + ID.AddInteger(encodeMemSDNodeFlags(true, ISD::UNINDEXED, + isVolatile, Alignment)); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + SDNode *N = NodeAllocator.Allocate(); + new (N) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED, true, + SVT, SV, SVOffset, Alignment, isVolatile); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue +SelectionDAG::getIndexedStore(SDValue OrigStore, DebugLoc dl, SDValue Base, + SDValue Offset, ISD::MemIndexedMode AM) { + StoreSDNode *ST = cast(OrigStore); + assert(ST->getOffset().getOpcode() == ISD::UNDEF && + "Store is already a indexed store!"); + SDVTList VTs = getVTList(Base.getValueType(), MVT::Other); + SDValue Ops[] = { ST->getChain(), ST->getValue(), Base, Offset }; + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4); + ID.AddInteger(ST->getMemoryVT().getRawBits()); + ID.AddInteger(ST->getRawSubclassData()); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + SDNode *N = NodeAllocator.Allocate(); + new (N) StoreSDNode(Ops, dl, VTs, AM, + ST->isTruncatingStore(), ST->getMemoryVT(), + ST->getSrcValue(), ST->getSrcValueOffset(), + ST->getAlignment(), ST->isVolatile()); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getVAArg(MVT VT, DebugLoc dl, + SDValue Chain, SDValue Ptr, + SDValue SV) { + SDValue Ops[] = { Chain, Ptr, SV }; + return getNode(ISD::VAARG, dl, getVTList(VT, MVT::Other), Ops, 3); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT, + const SDUse *Ops, unsigned NumOps) { + switch (NumOps) { + case 0: return getNode(Opcode, DL, VT); + case 1: return getNode(Opcode, DL, VT, Ops[0]); + case 2: return getNode(Opcode, DL, VT, Ops[0], Ops[1]); + case 3: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Ops[2]); + default: break; + } + + // Copy from an SDUse array into an SDValue array for use with + // the regular getNode logic. + SmallVector NewOps(Ops, Ops + NumOps); + return getNode(Opcode, DL, VT, &NewOps[0], NumOps); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT, + const SDValue *Ops, unsigned NumOps) { + switch (NumOps) { + case 0: return getNode(Opcode, DL, VT); + case 1: return getNode(Opcode, DL, VT, Ops[0]); + case 2: return getNode(Opcode, DL, VT, Ops[0], Ops[1]); + case 3: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Ops[2]); + default: break; + } + + switch (Opcode) { + default: break; + case ISD::SELECT_CC: { + assert(NumOps == 5 && "SELECT_CC takes 5 operands!"); + assert(Ops[0].getValueType() == Ops[1].getValueType() && + "LHS and RHS of condition must have same type!"); + assert(Ops[2].getValueType() == Ops[3].getValueType() && + "True and False arms of SelectCC must have same type!"); + assert(Ops[2].getValueType() == VT && + "select_cc node must be of same type as true and false value!"); + break; + } + case ISD::BR_CC: { + assert(NumOps == 5 && "BR_CC takes 5 operands!"); + assert(Ops[2].getValueType() == Ops[3].getValueType() && + "LHS/RHS of comparison should match types!"); + break; + } + } + + // Memoize nodes. + SDNode *N; + SDVTList VTs = getVTList(VT); + + if (VT != MVT::Flag) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opcode, VTs, Ops, NumOps); + void *IP = 0; + + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + N = NodeAllocator.Allocate(); + new (N) SDNode(Opcode, DL, VTs, Ops, NumOps); + CSEMap.InsertNode(N, IP); + } else { + N = NodeAllocator.Allocate(); + new (N) SDNode(Opcode, DL, VTs, Ops, NumOps); + } + + AllNodes.push_back(N); +#ifndef NDEBUG + VerifyNode(N); +#endif + return SDValue(N, 0); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, + const std::vector &ResultTys, + const SDValue *Ops, unsigned NumOps) { + return getNode(Opcode, DL, getVTList(&ResultTys[0], ResultTys.size()), + Ops, NumOps); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, + const MVT *VTs, unsigned NumVTs, + const SDValue *Ops, unsigned NumOps) { + if (NumVTs == 1) + return getNode(Opcode, DL, VTs[0], Ops, NumOps); + return getNode(Opcode, DL, makeVTList(VTs, NumVTs), Ops, NumOps); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList, + const SDValue *Ops, unsigned NumOps) { + if (VTList.NumVTs == 1) + return getNode(Opcode, DL, VTList.VTs[0], Ops, NumOps); + + switch (Opcode) { + // FIXME: figure out how to safely handle things like + // int foo(int x) { return 1 << (x & 255); } + // int bar() { return foo(256); } +#if 0 + case ISD::SRA_PARTS: + case ISD::SRL_PARTS: + case ISD::SHL_PARTS: + if (N3.getOpcode() == ISD::SIGN_EXTEND_INREG && + cast(N3.getOperand(1))->getVT() != MVT::i1) + return getNode(Opcode, DL, VT, N1, N2, N3.getOperand(0)); + else if (N3.getOpcode() == ISD::AND) + if (ConstantSDNode *AndRHS = dyn_cast(N3.getOperand(1))) { + // If the and is only masking out bits that cannot effect the shift, + // eliminate the and. + unsigned NumBits = VT.getSizeInBits()*2; + if ((AndRHS->getValue() & (NumBits-1)) == NumBits-1) + return getNode(Opcode, DL, VT, N1, N2, N3.getOperand(0)); + } + break; +#endif + } + + // Memoize the node unless it returns a flag. + SDNode *N; + if (VTList.VTs[VTList.NumVTs-1] != MVT::Flag) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + if (NumOps == 1) { + N = NodeAllocator.Allocate(); + new (N) UnarySDNode(Opcode, DL, VTList, Ops[0]); + } else if (NumOps == 2) { + N = NodeAllocator.Allocate(); + new (N) BinarySDNode(Opcode, DL, VTList, Ops[0], Ops[1]); + } else if (NumOps == 3) { + N = NodeAllocator.Allocate(); + new (N) TernarySDNode(Opcode, DL, VTList, Ops[0], Ops[1], Ops[2]); + } else { + N = NodeAllocator.Allocate(); + new (N) SDNode(Opcode, DL, VTList, Ops, NumOps); + } + CSEMap.InsertNode(N, IP); + } else { + if (NumOps == 1) { + N = NodeAllocator.Allocate(); + new (N) UnarySDNode(Opcode, DL, VTList, Ops[0]); + } else if (NumOps == 2) { + N = NodeAllocator.Allocate(); + new (N) BinarySDNode(Opcode, DL, VTList, Ops[0], Ops[1]); + } else if (NumOps == 3) { + N = NodeAllocator.Allocate(); + new (N) TernarySDNode(Opcode, DL, VTList, Ops[0], Ops[1], Ops[2]); + } else { + N = NodeAllocator.Allocate(); + new (N) SDNode(Opcode, DL, VTList, Ops, NumOps); + } + } + AllNodes.push_back(N); +#ifndef NDEBUG + VerifyNode(N); +#endif + return SDValue(N, 0); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList) { + return getNode(Opcode, DL, VTList, 0, 0); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList, + SDValue N1) { + SDValue Ops[] = { N1 }; + return getNode(Opcode, DL, VTList, Ops, 1); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList, + SDValue N1, SDValue N2) { + SDValue Ops[] = { N1, N2 }; + return getNode(Opcode, DL, VTList, Ops, 2); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList, + SDValue N1, SDValue N2, SDValue N3) { + SDValue Ops[] = { N1, N2, N3 }; + return getNode(Opcode, DL, VTList, Ops, 3); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList, + SDValue N1, SDValue N2, SDValue N3, + SDValue N4) { + SDValue Ops[] = { N1, N2, N3, N4 }; + return getNode(Opcode, DL, VTList, Ops, 4); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList, + SDValue N1, SDValue N2, SDValue N3, + SDValue N4, SDValue N5) { + SDValue Ops[] = { N1, N2, N3, N4, N5 }; + return getNode(Opcode, DL, VTList, Ops, 5); +} + +SDVTList SelectionDAG::getVTList(MVT VT) { + return makeVTList(SDNode::getValueTypeList(VT), 1); +} + +SDVTList SelectionDAG::getVTList(MVT VT1, MVT VT2) { + for (std::vector::reverse_iterator I = VTList.rbegin(), + E = VTList.rend(); I != E; ++I) + if (I->NumVTs == 2 && I->VTs[0] == VT1 && I->VTs[1] == VT2) + return *I; + + MVT *Array = Allocator.Allocate(2); + Array[0] = VT1; + Array[1] = VT2; + SDVTList Result = makeVTList(Array, 2); + VTList.push_back(Result); + return Result; +} + +SDVTList SelectionDAG::getVTList(MVT VT1, MVT VT2, MVT VT3) { + for (std::vector::reverse_iterator I = VTList.rbegin(), + E = VTList.rend(); I != E; ++I) + if (I->NumVTs == 3 && I->VTs[0] == VT1 && I->VTs[1] == VT2 && + I->VTs[2] == VT3) + return *I; + + MVT *Array = Allocator.Allocate(3); + Array[0] = VT1; + Array[1] = VT2; + Array[2] = VT3; + SDVTList Result = makeVTList(Array, 3); + VTList.push_back(Result); + return Result; +} + +SDVTList SelectionDAG::getVTList(MVT VT1, MVT VT2, MVT VT3, MVT VT4) { + for (std::vector::reverse_iterator I = VTList.rbegin(), + E = VTList.rend(); I != E; ++I) + if (I->NumVTs == 4 && I->VTs[0] == VT1 && I->VTs[1] == VT2 && + I->VTs[2] == VT3 && I->VTs[3] == VT4) + return *I; + + MVT *Array = Allocator.Allocate(3); + Array[0] = VT1; + Array[1] = VT2; + Array[2] = VT3; + Array[3] = VT4; + SDVTList Result = makeVTList(Array, 4); + VTList.push_back(Result); + return Result; +} + +SDVTList SelectionDAG::getVTList(const MVT *VTs, unsigned NumVTs) { + switch (NumVTs) { + case 0: assert(0 && "Cannot have nodes without results!"); + case 1: return getVTList(VTs[0]); + case 2: return getVTList(VTs[0], VTs[1]); + case 3: return getVTList(VTs[0], VTs[1], VTs[2]); + default: break; + } + + for (std::vector::reverse_iterator I = VTList.rbegin(), + E = VTList.rend(); I != E; ++I) { + if (I->NumVTs != NumVTs || VTs[0] != I->VTs[0] || VTs[1] != I->VTs[1]) + continue; + + bool NoMatch = false; + for (unsigned i = 2; i != NumVTs; ++i) + if (VTs[i] != I->VTs[i]) { + NoMatch = true; + break; + } + if (!NoMatch) + return *I; + } + + MVT *Array = Allocator.Allocate(NumVTs); + std::copy(VTs, VTs+NumVTs, Array); + SDVTList Result = makeVTList(Array, NumVTs); + VTList.push_back(Result); + return Result; +} + + +/// UpdateNodeOperands - *Mutate* the specified node in-place to have the +/// specified operands. If the resultant node already exists in the DAG, +/// this does not modify the specified node, instead it returns the node that +/// already exists. If the resultant node does not exist in the DAG, the +/// input node is returned. As a degenerate case, if you specify the same +/// input operands as the node already has, the input node is returned. +SDValue SelectionDAG::UpdateNodeOperands(SDValue InN, SDValue Op) { + SDNode *N = InN.getNode(); + assert(N->getNumOperands() == 1 && "Update with wrong number of operands"); + + // Check to see if there is no change. + if (Op == N->getOperand(0)) return InN; + + // See if the modified node already exists. + void *InsertPos = 0; + if (SDNode *Existing = FindModifiedNodeSlot(N, Op, InsertPos)) + return SDValue(Existing, InN.getResNo()); + + // Nope it doesn't. Remove the node from its current place in the maps. + if (InsertPos) + if (!RemoveNodeFromCSEMaps(N)) + InsertPos = 0; + + // Now we update the operands. + N->OperandList[0].set(Op); + + // If this gets put into a CSE map, add it. + if (InsertPos) CSEMap.InsertNode(N, InsertPos); + return InN; +} + +SDValue SelectionDAG:: +UpdateNodeOperands(SDValue InN, SDValue Op1, SDValue Op2) { + SDNode *N = InN.getNode(); + assert(N->getNumOperands() == 2 && "Update with wrong number of operands"); + + // Check to see if there is no change. + if (Op1 == N->getOperand(0) && Op2 == N->getOperand(1)) + return InN; // No operands changed, just return the input node. + + // See if the modified node already exists. + void *InsertPos = 0; + if (SDNode *Existing = FindModifiedNodeSlot(N, Op1, Op2, InsertPos)) + return SDValue(Existing, InN.getResNo()); + + // Nope it doesn't. Remove the node from its current place in the maps. + if (InsertPos) + if (!RemoveNodeFromCSEMaps(N)) + InsertPos = 0; + + // Now we update the operands. + if (N->OperandList[0] != Op1) + N->OperandList[0].set(Op1); + if (N->OperandList[1] != Op2) + N->OperandList[1].set(Op2); + + // If this gets put into a CSE map, add it. + if (InsertPos) CSEMap.InsertNode(N, InsertPos); + return InN; +} + +SDValue SelectionDAG:: +UpdateNodeOperands(SDValue N, SDValue Op1, SDValue Op2, SDValue Op3) { + SDValue Ops[] = { Op1, Op2, Op3 }; + return UpdateNodeOperands(N, Ops, 3); +} + +SDValue SelectionDAG:: +UpdateNodeOperands(SDValue N, SDValue Op1, SDValue Op2, + SDValue Op3, SDValue Op4) { + SDValue Ops[] = { Op1, Op2, Op3, Op4 }; + return UpdateNodeOperands(N, Ops, 4); +} + +SDValue SelectionDAG:: +UpdateNodeOperands(SDValue N, SDValue Op1, SDValue Op2, + SDValue Op3, SDValue Op4, SDValue Op5) { + SDValue Ops[] = { Op1, Op2, Op3, Op4, Op5 }; + return UpdateNodeOperands(N, Ops, 5); +} + +SDValue SelectionDAG:: +UpdateNodeOperands(SDValue InN, const SDValue *Ops, unsigned NumOps) { + SDNode *N = InN.getNode(); + assert(N->getNumOperands() == NumOps && + "Update with wrong number of operands"); + + // Check to see if there is no change. + bool AnyChange = false; + for (unsigned i = 0; i != NumOps; ++i) { + if (Ops[i] != N->getOperand(i)) { + AnyChange = true; + break; + } + } + + // No operands changed, just return the input node. + if (!AnyChange) return InN; + + // See if the modified node already exists. + void *InsertPos = 0; + if (SDNode *Existing = FindModifiedNodeSlot(N, Ops, NumOps, InsertPos)) + return SDValue(Existing, InN.getResNo()); + + // Nope it doesn't. Remove the node from its current place in the maps. + if (InsertPos) + if (!RemoveNodeFromCSEMaps(N)) + InsertPos = 0; + + // Now we update the operands. + for (unsigned i = 0; i != NumOps; ++i) + if (N->OperandList[i] != Ops[i]) + N->OperandList[i].set(Ops[i]); + + // If this gets put into a CSE map, add it. + if (InsertPos) CSEMap.InsertNode(N, InsertPos); + return InN; +} + +/// DropOperands - Release the operands and set this node to have +/// zero operands. +void SDNode::DropOperands() { + // Unlike the code in MorphNodeTo that does this, we don't need to + // watch for dead nodes here. + for (op_iterator I = op_begin(), E = op_end(); I != E; ) { + SDUse &Use = *I++; + Use.set(SDValue()); + } +} + +/// SelectNodeTo - These are wrappers around MorphNodeTo that accept a +/// machine opcode. +/// +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + MVT VT) { + SDVTList VTs = getVTList(VT); + return SelectNodeTo(N, MachineOpc, VTs, 0, 0); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + MVT VT, SDValue Op1) { + SDVTList VTs = getVTList(VT); + SDValue Ops[] = { Op1 }; + return SelectNodeTo(N, MachineOpc, VTs, Ops, 1); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + MVT VT, SDValue Op1, + SDValue Op2) { + SDVTList VTs = getVTList(VT); + SDValue Ops[] = { Op1, Op2 }; + return SelectNodeTo(N, MachineOpc, VTs, Ops, 2); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + MVT VT, SDValue Op1, + SDValue Op2, SDValue Op3) { + SDVTList VTs = getVTList(VT); + SDValue Ops[] = { Op1, Op2, Op3 }; + return SelectNodeTo(N, MachineOpc, VTs, Ops, 3); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + MVT VT, const SDValue *Ops, + unsigned NumOps) { + SDVTList VTs = getVTList(VT); + return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + MVT VT1, MVT VT2, const SDValue *Ops, + unsigned NumOps) { + SDVTList VTs = getVTList(VT1, VT2); + return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + MVT VT1, MVT VT2) { + SDVTList VTs = getVTList(VT1, VT2); + return SelectNodeTo(N, MachineOpc, VTs, (SDValue *)0, 0); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + MVT VT1, MVT VT2, MVT VT3, + const SDValue *Ops, unsigned NumOps) { + SDVTList VTs = getVTList(VT1, VT2, VT3); + return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + MVT VT1, MVT VT2, MVT VT3, MVT VT4, + const SDValue *Ops, unsigned NumOps) { + SDVTList VTs = getVTList(VT1, VT2, VT3, VT4); + return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + MVT VT1, MVT VT2, + SDValue Op1) { + SDVTList VTs = getVTList(VT1, VT2); + SDValue Ops[] = { Op1 }; + return SelectNodeTo(N, MachineOpc, VTs, Ops, 1); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + MVT VT1, MVT VT2, + SDValue Op1, SDValue Op2) { + SDVTList VTs = getVTList(VT1, VT2); + SDValue Ops[] = { Op1, Op2 }; + return SelectNodeTo(N, MachineOpc, VTs, Ops, 2); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + MVT VT1, MVT VT2, + SDValue Op1, SDValue Op2, + SDValue Op3) { + SDVTList VTs = getVTList(VT1, VT2); + SDValue Ops[] = { Op1, Op2, Op3 }; + return SelectNodeTo(N, MachineOpc, VTs, Ops, 3); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + MVT VT1, MVT VT2, MVT VT3, + SDValue Op1, SDValue Op2, + SDValue Op3) { + SDVTList VTs = getVTList(VT1, VT2, VT3); + SDValue Ops[] = { Op1, Op2, Op3 }; + return SelectNodeTo(N, MachineOpc, VTs, Ops, 3); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + SDVTList VTs, const SDValue *Ops, + unsigned NumOps) { + return MorphNodeTo(N, ~MachineOpc, VTs, Ops, NumOps); +} + +SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, + MVT VT) { + SDVTList VTs = getVTList(VT); + return MorphNodeTo(N, Opc, VTs, 0, 0); +} + +SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, + MVT VT, SDValue Op1) { + SDVTList VTs = getVTList(VT); + SDValue Ops[] = { Op1 }; + return MorphNodeTo(N, Opc, VTs, Ops, 1); +} + +SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, + MVT VT, SDValue Op1, + SDValue Op2) { + SDVTList VTs = getVTList(VT); + SDValue Ops[] = { Op1, Op2 }; + return MorphNodeTo(N, Opc, VTs, Ops, 2); +} + +SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, + MVT VT, SDValue Op1, + SDValue Op2, SDValue Op3) { + SDVTList VTs = getVTList(VT); + SDValue Ops[] = { Op1, Op2, Op3 }; + return MorphNodeTo(N, Opc, VTs, Ops, 3); +} + +SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, + MVT VT, const SDValue *Ops, + unsigned NumOps) { + SDVTList VTs = getVTList(VT); + return MorphNodeTo(N, Opc, VTs, Ops, NumOps); +} + +SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, + MVT VT1, MVT VT2, const SDValue *Ops, + unsigned NumOps) { + SDVTList VTs = getVTList(VT1, VT2); + return MorphNodeTo(N, Opc, VTs, Ops, NumOps); +} + +SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, + MVT VT1, MVT VT2) { + SDVTList VTs = getVTList(VT1, VT2); + return MorphNodeTo(N, Opc, VTs, (SDValue *)0, 0); +} + +SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, + MVT VT1, MVT VT2, MVT VT3, + const SDValue *Ops, unsigned NumOps) { + SDVTList VTs = getVTList(VT1, VT2, VT3); + return MorphNodeTo(N, Opc, VTs, Ops, NumOps); +} + +SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, + MVT VT1, MVT VT2, + SDValue Op1) { + SDVTList VTs = getVTList(VT1, VT2); + SDValue Ops[] = { Op1 }; + return MorphNodeTo(N, Opc, VTs, Ops, 1); +} + +SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, + MVT VT1, MVT VT2, + SDValue Op1, SDValue Op2) { + SDVTList VTs = getVTList(VT1, VT2); + SDValue Ops[] = { Op1, Op2 }; + return MorphNodeTo(N, Opc, VTs, Ops, 2); +} + +SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, + MVT VT1, MVT VT2, + SDValue Op1, SDValue Op2, + SDValue Op3) { + SDVTList VTs = getVTList(VT1, VT2); + SDValue Ops[] = { Op1, Op2, Op3 }; + return MorphNodeTo(N, Opc, VTs, Ops, 3); +} + +/// MorphNodeTo - These *mutate* the specified node to have the specified +/// return type, opcode, and operands. +/// +/// Note that MorphNodeTo returns the resultant node. If there is already a +/// node of the specified opcode and operands, it returns that node instead of +/// the current one. Note that the DebugLoc need not be the same. +/// +/// Using MorphNodeTo is faster than creating a new node and swapping it in +/// with ReplaceAllUsesWith both because it often avoids allocating a new +/// node, and because it doesn't require CSE recalculation for any of +/// the node's users. +/// +SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, + SDVTList VTs, const SDValue *Ops, + unsigned NumOps) { + // If an identical node already exists, use it. + void *IP = 0; + if (VTs.VTs[VTs.NumVTs-1] != MVT::Flag) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opc, VTs, Ops, NumOps); + if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP)) + return ON; + } + + if (!RemoveNodeFromCSEMaps(N)) + IP = 0; + + // Start the morphing. + N->NodeType = Opc; + N->ValueList = VTs.VTs; + N->NumValues = VTs.NumVTs; + + // Clear the operands list, updating used nodes to remove this from their + // use list. Keep track of any operands that become dead as a result. + SmallPtrSet DeadNodeSet; + for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ) { + SDUse &Use = *I++; + SDNode *Used = Use.getNode(); + Use.set(SDValue()); + if (Used->use_empty()) + DeadNodeSet.insert(Used); + } + + // If NumOps is larger than the # of operands we currently have, reallocate + // the operand list. + if (NumOps > N->NumOperands) { + if (N->OperandsNeedDelete) + delete[] N->OperandList; + + if (N->isMachineOpcode()) { + // We're creating a final node that will live unmorphed for the + // remainder of the current SelectionDAG iteration, so we can allocate + // the operands directly out of a pool with no recycling metadata. + N->OperandList = OperandAllocator.Allocate(NumOps); + N->OperandsNeedDelete = false; + } else { + N->OperandList = new SDUse[NumOps]; + N->OperandsNeedDelete = true; + } + } + + // Assign the new operands. + N->NumOperands = NumOps; + for (unsigned i = 0, e = NumOps; i != e; ++i) { + N->OperandList[i].setUser(N); + N->OperandList[i].setInitial(Ops[i]); + } + + // Delete any nodes that are still dead after adding the uses for the + // new operands. + SmallVector DeadNodes; + for (SmallPtrSet::iterator I = DeadNodeSet.begin(), + E = DeadNodeSet.end(); I != E; ++I) + if ((*I)->use_empty()) + DeadNodes.push_back(*I); + RemoveDeadNodes(DeadNodes); + + if (IP) + CSEMap.InsertNode(N, IP); // Memoize the new node. + return N; +} + + +/// getTargetNode - These are used for target selectors to create a new node +/// with specified return type(s), target opcode, and operands. +/// +/// Note that getTargetNode returns the resultant node. If there is already a +/// node of the specified opcode and operands, it returns that node instead of +/// the current one. +SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT) { + return getNode(~Opcode, dl, VT).getNode(); +} + +SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT, + SDValue Op1) { + return getNode(~Opcode, dl, VT, Op1).getNode(); +} + +SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT, + SDValue Op1, SDValue Op2) { + return getNode(~Opcode, dl, VT, Op1, Op2).getNode(); +} + +SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT, + SDValue Op1, SDValue Op2, + SDValue Op3) { + return getNode(~Opcode, dl, VT, Op1, Op2, Op3).getNode(); +} + +SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT, + const SDValue *Ops, unsigned NumOps) { + return getNode(~Opcode, dl, VT, Ops, NumOps).getNode(); +} + +SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, + MVT VT1, MVT VT2) { + SDVTList VTs = getVTList(VT1, VT2); + SDValue Op; + return getNode(~Opcode, dl, VTs, &Op, 0).getNode(); +} + +SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT1, + MVT VT2, SDValue Op1) { + SDVTList VTs = getVTList(VT1, VT2); + return getNode(~Opcode, dl, VTs, &Op1, 1).getNode(); +} + +SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT1, + MVT VT2, SDValue Op1, + SDValue Op2) { + SDVTList VTs = getVTList(VT1, VT2); + SDValue Ops[] = { Op1, Op2 }; + return getNode(~Opcode, dl, VTs, Ops, 2).getNode(); +} + +SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT1, + MVT VT2, SDValue Op1, + SDValue Op2, SDValue Op3) { + SDVTList VTs = getVTList(VT1, VT2); + SDValue Ops[] = { Op1, Op2, Op3 }; + return getNode(~Opcode, dl, VTs, Ops, 3).getNode(); +} + +SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, + MVT VT1, MVT VT2, + const SDValue *Ops, unsigned NumOps) { + SDVTList VTs = getVTList(VT1, VT2); + return getNode(~Opcode, dl, VTs, Ops, NumOps).getNode(); +} + +SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, + MVT VT1, MVT VT2, MVT VT3, + SDValue Op1, SDValue Op2) { + SDVTList VTs = getVTList(VT1, VT2, VT3); + SDValue Ops[] = { Op1, Op2 }; + return getNode(~Opcode, dl, VTs, Ops, 2).getNode(); +} + +SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, + MVT VT1, MVT VT2, MVT VT3, + SDValue Op1, SDValue Op2, + SDValue Op3) { + SDVTList VTs = getVTList(VT1, VT2, VT3); + SDValue Ops[] = { Op1, Op2, Op3 }; + return getNode(~Opcode, dl, VTs, Ops, 3).getNode(); +} + +SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, + MVT VT1, MVT VT2, MVT VT3, + const SDValue *Ops, unsigned NumOps) { + SDVTList VTs = getVTList(VT1, VT2, VT3); + return getNode(~Opcode, dl, VTs, Ops, NumOps).getNode(); +} + +SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT1, + MVT VT2, MVT VT3, MVT VT4, + const SDValue *Ops, unsigned NumOps) { + SDVTList VTs = getVTList(VT1, VT2, VT3, VT4); + return getNode(~Opcode, dl, VTs, Ops, NumOps).getNode(); +} + +SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, + const std::vector &ResultTys, + const SDValue *Ops, unsigned NumOps) { + return getNode(~Opcode, dl, ResultTys, Ops, NumOps).getNode(); +} + +/// getNodeIfExists - Get the specified node if it's already available, or +/// else return NULL. +SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList, + const SDValue *Ops, unsigned NumOps) { + if (VTList.VTs[VTList.NumVTs-1] != MVT::Flag) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return E; + } + return NULL; +} + +/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead. +/// This can cause recursive merging of nodes in the DAG. +/// +/// This version assumes From has a single result value. +/// +void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To, + DAGUpdateListener *UpdateListener) { + SDNode *From = FromN.getNode(); + assert(From->getNumValues() == 1 && FromN.getResNo() == 0 && + "Cannot replace with this method!"); + assert(From != To.getNode() && "Cannot replace uses of with self"); + + // Iterate over all the existing uses of From. New uses will be added + // to the beginning of the use list, which we avoid visiting. + // This specifically avoids visiting uses of From that arise while the + // replacement is happening, because any such uses would be the result + // of CSE: If an existing node looks like From after one of its operands + // is replaced by To, we don't want to replace of all its users with To + // too. See PR3018 for more info. + SDNode::use_iterator UI = From->use_begin(), UE = From->use_end(); + while (UI != UE) { + SDNode *User = *UI; + + // This node is about to morph, remove its old self from the CSE maps. + RemoveNodeFromCSEMaps(User); + + // A user can appear in a use list multiple times, and when this + // happens the uses are usually next to each other in the list. + // To help reduce the number of CSE recomputations, process all + // the uses of this user that we can find this way. + do { + SDUse &Use = UI.getUse(); + ++UI; + Use.set(To); + } while (UI != UE && *UI == User); + + // Now that we have modified User, add it back to the CSE maps. If it + // already exists there, recursively merge the results together. + AddModifiedNodeToCSEMaps(User, UpdateListener); + } +} + +/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead. +/// This can cause recursive merging of nodes in the DAG. +/// +/// This version assumes that for each value of From, there is a +/// corresponding value in To in the same position with the same type. +/// +void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To, + DAGUpdateListener *UpdateListener) { +#ifndef NDEBUG + for (unsigned i = 0, e = From->getNumValues(); i != e; ++i) + assert((!From->hasAnyUseOfValue(i) || + From->getValueType(i) == To->getValueType(i)) && + "Cannot use this version of ReplaceAllUsesWith!"); +#endif + + // Handle the trivial case. + if (From == To) + return; + + // Iterate over just the existing users of From. See the comments in + // the ReplaceAllUsesWith above. + SDNode::use_iterator UI = From->use_begin(), UE = From->use_end(); + while (UI != UE) { + SDNode *User = *UI; + + // This node is about to morph, remove its old self from the CSE maps. + RemoveNodeFromCSEMaps(User); + + // A user can appear in a use list multiple times, and when this + // happens the uses are usually next to each other in the list. + // To help reduce the number of CSE recomputations, process all + // the uses of this user that we can find this way. + do { + SDUse &Use = UI.getUse(); + ++UI; + Use.setNode(To); + } while (UI != UE && *UI == User); + + // Now that we have modified User, add it back to the CSE maps. If it + // already exists there, recursively merge the results together. + AddModifiedNodeToCSEMaps(User, UpdateListener); + } +} + +/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead. +/// This can cause recursive merging of nodes in the DAG. +/// +/// This version can replace From with any result values. To must match the +/// number and types of values returned by From. +void SelectionDAG::ReplaceAllUsesWith(SDNode *From, + const SDValue *To, + DAGUpdateListener *UpdateListener) { + if (From->getNumValues() == 1) // Handle the simple case efficiently. + return ReplaceAllUsesWith(SDValue(From, 0), To[0], UpdateListener); + + // Iterate over just the existing users of From. See the comments in + // the ReplaceAllUsesWith above. + SDNode::use_iterator UI = From->use_begin(), UE = From->use_end(); + while (UI != UE) { + SDNode *User = *UI; + + // This node is about to morph, remove its old self from the CSE maps. + RemoveNodeFromCSEMaps(User); + + // A user can appear in a use list multiple times, and when this + // happens the uses are usually next to each other in the list. + // To help reduce the number of CSE recomputations, process all + // the uses of this user that we can find this way. + do { + SDUse &Use = UI.getUse(); + const SDValue &ToOp = To[Use.getResNo()]; + ++UI; + Use.set(ToOp); + } while (UI != UE && *UI == User); + + // Now that we have modified User, add it back to the CSE maps. If it + // already exists there, recursively merge the results together. + AddModifiedNodeToCSEMaps(User, UpdateListener); + } +} + +/// ReplaceAllUsesOfValueWith - Replace any uses of From with To, leaving +/// uses of other values produced by From.getNode() alone. The Deleted +/// vector is handled the same way as for ReplaceAllUsesWith. +void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To, + DAGUpdateListener *UpdateListener){ + // Handle the really simple, really trivial case efficiently. + if (From == To) return; + + // Handle the simple, trivial, case efficiently. + if (From.getNode()->getNumValues() == 1) { + ReplaceAllUsesWith(From, To, UpdateListener); + return; + } + + // Iterate over just the existing users of From. See the comments in + // the ReplaceAllUsesWith above. + SDNode::use_iterator UI = From.getNode()->use_begin(), + UE = From.getNode()->use_end(); + while (UI != UE) { + SDNode *User = *UI; + bool UserRemovedFromCSEMaps = false; + + // A user can appear in a use list multiple times, and when this + // happens the uses are usually next to each other in the list. + // To help reduce the number of CSE recomputations, process all + // the uses of this user that we can find this way. + do { + SDUse &Use = UI.getUse(); + + // Skip uses of different values from the same node. + if (Use.getResNo() != From.getResNo()) { + ++UI; + continue; + } + + // If this node hasn't been modified yet, it's still in the CSE maps, + // so remove its old self from the CSE maps. + if (!UserRemovedFromCSEMaps) { + RemoveNodeFromCSEMaps(User); + UserRemovedFromCSEMaps = true; + } + + ++UI; + Use.set(To); + } while (UI != UE && *UI == User); + + // We are iterating over all uses of the From node, so if a use + // doesn't use the specific value, no changes are made. + if (!UserRemovedFromCSEMaps) + continue; + + // Now that we have modified User, add it back to the CSE maps. If it + // already exists there, recursively merge the results together. + AddModifiedNodeToCSEMaps(User, UpdateListener); + } +} + +namespace { + /// UseMemo - This class is used by SelectionDAG::ReplaceAllUsesOfValuesWith + /// to record information about a use. + struct UseMemo { + SDNode *User; + unsigned Index; + SDUse *Use; + }; + + /// operator< - Sort Memos by User. + bool operator<(const UseMemo &L, const UseMemo &R) { + return (intptr_t)L.User < (intptr_t)R.User; + } +} + +/// ReplaceAllUsesOfValuesWith - Replace any uses of From with To, leaving +/// uses of other values produced by From.getNode() alone. The same value +/// may appear in both the From and To list. The Deleted vector is +/// handled the same way as for ReplaceAllUsesWith. +void SelectionDAG::ReplaceAllUsesOfValuesWith(const SDValue *From, + const SDValue *To, + unsigned Num, + DAGUpdateListener *UpdateListener){ + // Handle the simple, trivial case efficiently. + if (Num == 1) + return ReplaceAllUsesOfValueWith(*From, *To, UpdateListener); + + // Read up all the uses and make records of them. This helps + // processing new uses that are introduced during the + // replacement process. + SmallVector Uses; + for (unsigned i = 0; i != Num; ++i) { + unsigned FromResNo = From[i].getResNo(); + SDNode *FromNode = From[i].getNode(); + for (SDNode::use_iterator UI = FromNode->use_begin(), + E = FromNode->use_end(); UI != E; ++UI) { + SDUse &Use = UI.getUse(); + if (Use.getResNo() == FromResNo) { + UseMemo Memo = { *UI, i, &Use }; + Uses.push_back(Memo); + } + } + } + + // Sort the uses, so that all the uses from a given User are together. + std::sort(Uses.begin(), Uses.end()); + + for (unsigned UseIndex = 0, UseIndexEnd = Uses.size(); + UseIndex != UseIndexEnd; ) { + // We know that this user uses some value of From. If it is the right + // value, update it. + SDNode *User = Uses[UseIndex].User; + + // This node is about to morph, remove its old self from the CSE maps. + RemoveNodeFromCSEMaps(User); + + // The Uses array is sorted, so all the uses for a given User + // are next to each other in the list. + // To help reduce the number of CSE recomputations, process all + // the uses of this user that we can find this way. + do { + unsigned i = Uses[UseIndex].Index; + SDUse &Use = *Uses[UseIndex].Use; + ++UseIndex; + + Use.set(To[i]); + } while (UseIndex != UseIndexEnd && Uses[UseIndex].User == User); + + // Now that we have modified User, add it back to the CSE maps. If it + // already exists there, recursively merge the results together. + AddModifiedNodeToCSEMaps(User, UpdateListener); + } +} + +/// AssignTopologicalOrder - Assign a unique node id for each node in the DAG +/// based on their topological order. It returns the maximum id and a vector +/// of the SDNodes* in assigned order by reference. +unsigned SelectionDAG::AssignTopologicalOrder() { + + unsigned DAGSize = 0; + + // SortedPos tracks the progress of the algorithm. Nodes before it are + // sorted, nodes after it are unsorted. When the algorithm completes + // it is at the end of the list. + allnodes_iterator SortedPos = allnodes_begin(); + + // Visit all the nodes. Move nodes with no operands to the front of + // the list immediately. Annotate nodes that do have operands with their + // operand count. Before we do this, the Node Id fields of the nodes + // may contain arbitrary values. After, the Node Id fields for nodes + // before SortedPos will contain the topological sort index, and the + // Node Id fields for nodes At SortedPos and after will contain the + // count of outstanding operands. + for (allnodes_iterator I = allnodes_begin(),E = allnodes_end(); I != E; ) { + SDNode *N = I++; + unsigned Degree = N->getNumOperands(); + if (Degree == 0) { + // A node with no uses, add it to the result array immediately. + N->setNodeId(DAGSize++); + allnodes_iterator Q = N; + if (Q != SortedPos) + SortedPos = AllNodes.insert(SortedPos, AllNodes.remove(Q)); + ++SortedPos; + } else { + // Temporarily use the Node Id as scratch space for the degree count. + N->setNodeId(Degree); + } + } + + // Visit all the nodes. As we iterate, moves nodes into sorted order, + // such that by the time the end is reached all nodes will be sorted. + for (allnodes_iterator I = allnodes_begin(),E = allnodes_end(); I != E; ++I) { + SDNode *N = I; + for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); + UI != UE; ++UI) { + SDNode *P = *UI; + unsigned Degree = P->getNodeId(); + --Degree; + if (Degree == 0) { + // All of P's operands are sorted, so P may sorted now. + P->setNodeId(DAGSize++); + if (P != SortedPos) + SortedPos = AllNodes.insert(SortedPos, AllNodes.remove(P)); + ++SortedPos; + } else { + // Update P's outstanding operand count. + P->setNodeId(Degree); + } + } + } + + assert(SortedPos == AllNodes.end() && + "Topological sort incomplete!"); + assert(AllNodes.front().getOpcode() == ISD::EntryToken && + "First node in topological sort is not the entry token!"); + assert(AllNodes.front().getNodeId() == 0 && + "First node in topological sort has non-zero id!"); + assert(AllNodes.front().getNumOperands() == 0 && + "First node in topological sort has operands!"); + assert(AllNodes.back().getNodeId() == (int)DAGSize-1 && + "Last node in topologic sort has unexpected id!"); + assert(AllNodes.back().use_empty() && + "Last node in topologic sort has users!"); + assert(DAGSize == allnodes_size() && "Node count mismatch!"); + return DAGSize; +} + + + +//===----------------------------------------------------------------------===// +// SDNode Class +//===----------------------------------------------------------------------===// + +HandleSDNode::~HandleSDNode() { + DropOperands(); +} + +GlobalAddressSDNode::GlobalAddressSDNode(bool isTarget, const GlobalValue *GA, + MVT VT, int64_t o) + : SDNode(isa(GA) && + cast(GA)->isThreadLocal() ? + // Thread Local + (isTarget ? ISD::TargetGlobalTLSAddress : ISD::GlobalTLSAddress) : + // Non Thread Local + (isTarget ? ISD::TargetGlobalAddress : ISD::GlobalAddress), + DebugLoc::getUnknownLoc(), getSDVTList(VT)), Offset(o) { + TheGlobal = const_cast(GA); +} + +MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, MVT memvt, + const Value *srcValue, int SVO, + unsigned alignment, bool vol) + : SDNode(Opc, dl, VTs), MemoryVT(memvt), SrcValue(srcValue), SVOffset(SVO) { + SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, vol, alignment); + assert(isPowerOf2_32(alignment) && "Alignment is not a power of 2!"); + assert(getAlignment() == alignment && "Alignment representation error!"); + assert(isVolatile() == vol && "Volatile representation error!"); +} + +MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, + const SDValue *Ops, + unsigned NumOps, MVT memvt, const Value *srcValue, + int SVO, unsigned alignment, bool vol) + : SDNode(Opc, dl, VTs, Ops, NumOps), + MemoryVT(memvt), SrcValue(srcValue), SVOffset(SVO) { + SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, vol, alignment); + assert(isPowerOf2_32(alignment) && "Alignment is not a power of 2!"); + assert(getAlignment() == alignment && "Alignment representation error!"); + assert(isVolatile() == vol && "Volatile representation error!"); +} + +/// getMemOperand - Return a MachineMemOperand object describing the memory +/// reference performed by this memory reference. +MachineMemOperand MemSDNode::getMemOperand() const { + int Flags = 0; + if (isa(this)) + Flags = MachineMemOperand::MOLoad; + else if (isa(this)) + Flags = MachineMemOperand::MOStore; + else if (isa(this)) { + Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore; + } + else { + const MemIntrinsicSDNode* MemIntrinNode = dyn_cast(this); + assert(MemIntrinNode && "Unknown MemSDNode opcode!"); + if (MemIntrinNode->readMem()) Flags |= MachineMemOperand::MOLoad; + if (MemIntrinNode->writeMem()) Flags |= MachineMemOperand::MOStore; + } + + int Size = (getMemoryVT().getSizeInBits() + 7) >> 3; + if (isVolatile()) Flags |= MachineMemOperand::MOVolatile; + + // Check if the memory reference references a frame index + const FrameIndexSDNode *FI = + dyn_cast(getBasePtr().getNode()); + if (!getSrcValue() && FI) + return MachineMemOperand(PseudoSourceValue::getFixedStack(FI->getIndex()), + Flags, 0, Size, getAlignment()); + else + return MachineMemOperand(getSrcValue(), Flags, getSrcValueOffset(), + Size, getAlignment()); +} + +/// Profile - Gather unique data for the node. +/// +void SDNode::Profile(FoldingSetNodeID &ID) const { + AddNodeIDNode(ID, this); +} + +/// getValueTypeList - Return a pointer to the specified value type. +/// +const MVT *SDNode::getValueTypeList(MVT VT) { + if (VT.isExtended()) { + static std::set EVTs; + return &(*EVTs.insert(VT).first); + } else { + static MVT VTs[MVT::LAST_VALUETYPE]; + VTs[VT.getSimpleVT()] = VT; + return &VTs[VT.getSimpleVT()]; + } +} + +/// hasNUsesOfValue - Return true if there are exactly NUSES uses of the +/// indicated value. This method ignores uses of other values defined by this +/// operation. +bool SDNode::hasNUsesOfValue(unsigned NUses, unsigned Value) const { + assert(Value < getNumValues() && "Bad value!"); + + // TODO: Only iterate over uses of a given value of the node + for (SDNode::use_iterator UI = use_begin(), E = use_end(); UI != E; ++UI) { + if (UI.getUse().getResNo() == Value) { + if (NUses == 0) + return false; + --NUses; + } + } + + // Found exactly the right number of uses? + return NUses == 0; +} + + +/// hasAnyUseOfValue - Return true if there are any use of the indicated +/// value. This method ignores uses of other values defined by this operation. +bool SDNode::hasAnyUseOfValue(unsigned Value) const { + assert(Value < getNumValues() && "Bad value!"); + + for (SDNode::use_iterator UI = use_begin(), E = use_end(); UI != E; ++UI) + if (UI.getUse().getResNo() == Value) + return true; + + return false; +} + + +/// isOnlyUserOf - Return true if this node is the only use of N. +/// +bool SDNode::isOnlyUserOf(SDNode *N) const { + bool Seen = false; + for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) { + SDNode *User = *I; + if (User == this) + Seen = true; + else + return false; + } + + return Seen; +} + +/// isOperand - Return true if this node is an operand of N. +/// +bool SDValue::isOperandOf(SDNode *N) const { + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + if (*this == N->getOperand(i)) + return true; + return false; +} + +bool SDNode::isOperandOf(SDNode *N) const { + for (unsigned i = 0, e = N->NumOperands; i != e; ++i) + if (this == N->OperandList[i].getNode()) + return true; + return false; +} + +/// reachesChainWithoutSideEffects - Return true if this operand (which must +/// be a chain) reaches the specified operand without crossing any +/// side-effecting instructions. In practice, this looks through token +/// factors and non-volatile loads. In order to remain efficient, this only +/// looks a couple of nodes in, it does not do an exhaustive search. +bool SDValue::reachesChainWithoutSideEffects(SDValue Dest, + unsigned Depth) const { + if (*this == Dest) return true; + + // Don't search too deeply, we just want to be able to see through + // TokenFactor's etc. + if (Depth == 0) return false; + + // If this is a token factor, all inputs to the TF happen in parallel. If any + // of the operands of the TF reach dest, then we can do the xform. + if (getOpcode() == ISD::TokenFactor) { + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) + if (getOperand(i).reachesChainWithoutSideEffects(Dest, Depth-1)) + return true; + return false; + } + + // Loads don't have side effects, look through them. + if (LoadSDNode *Ld = dyn_cast(*this)) { + if (!Ld->isVolatile()) + return Ld->getChain().reachesChainWithoutSideEffects(Dest, Depth-1); + } + return false; +} + + +static void findPredecessor(SDNode *N, const SDNode *P, bool &found, + SmallPtrSet &Visited) { + if (found || !Visited.insert(N)) + return; + + for (unsigned i = 0, e = N->getNumOperands(); !found && i != e; ++i) { + SDNode *Op = N->getOperand(i).getNode(); + if (Op == P) { + found = true; + return; + } + findPredecessor(Op, P, found, Visited); + } +} + +/// isPredecessorOf - Return true if this node is a predecessor of N. This node +/// is either an operand of N or it can be reached by recursively traversing +/// up the operands. +/// NOTE: this is an expensive method. Use it carefully. +bool SDNode::isPredecessorOf(SDNode *N) const { + SmallPtrSet Visited; + bool found = false; + findPredecessor(N, this, found, Visited); + return found; +} + +uint64_t SDNode::getConstantOperandVal(unsigned Num) const { + assert(Num < NumOperands && "Invalid child # of SDNode!"); + return cast(OperandList[Num])->getZExtValue(); +} + +std::string SDNode::getOperationName(const SelectionDAG *G) const { + switch (getOpcode()) { + default: + if (getOpcode() < ISD::BUILTIN_OP_END) + return "<>"; + if (isMachineOpcode()) { + if (G) + if (const TargetInstrInfo *TII = G->getTarget().getInstrInfo()) + if (getMachineOpcode() < TII->getNumOpcodes()) + return TII->get(getMachineOpcode()).getName(); + return "<>"; + } + if (G) { + const TargetLowering &TLI = G->getTargetLoweringInfo(); + const char *Name = TLI.getTargetNodeName(getOpcode()); + if (Name) return Name; + return "<>"; + } + return "<>"; + +#ifndef NDEBUG + case ISD::DELETED_NODE: + return "<>"; +#endif + case ISD::PREFETCH: return "Prefetch"; + case ISD::MEMBARRIER: return "MemBarrier"; + case ISD::ATOMIC_CMP_SWAP: return "AtomicCmpSwap"; + case ISD::ATOMIC_SWAP: return "AtomicSwap"; + case ISD::ATOMIC_LOAD_ADD: return "AtomicLoadAdd"; + case ISD::ATOMIC_LOAD_SUB: return "AtomicLoadSub"; + case ISD::ATOMIC_LOAD_AND: return "AtomicLoadAnd"; + case ISD::ATOMIC_LOAD_OR: return "AtomicLoadOr"; + case ISD::ATOMIC_LOAD_XOR: return "AtomicLoadXor"; + case ISD::ATOMIC_LOAD_NAND: return "AtomicLoadNand"; + case ISD::ATOMIC_LOAD_MIN: return "AtomicLoadMin"; + case ISD::ATOMIC_LOAD_MAX: return "AtomicLoadMax"; + case ISD::ATOMIC_LOAD_UMIN: return "AtomicLoadUMin"; + case ISD::ATOMIC_LOAD_UMAX: return "AtomicLoadUMax"; + case ISD::PCMARKER: return "PCMarker"; + case ISD::READCYCLECOUNTER: return "ReadCycleCounter"; + case ISD::SRCVALUE: return "SrcValue"; + case ISD::MEMOPERAND: return "MemOperand"; + case ISD::EntryToken: return "EntryToken"; + case ISD::TokenFactor: return "TokenFactor"; + case ISD::AssertSext: return "AssertSext"; + case ISD::AssertZext: return "AssertZext"; + + case ISD::BasicBlock: return "BasicBlock"; + case ISD::ARG_FLAGS: return "ArgFlags"; + case ISD::VALUETYPE: return "ValueType"; + case ISD::Register: return "Register"; + + case ISD::Constant: return "Constant"; + case ISD::ConstantFP: return "ConstantFP"; + case ISD::GlobalAddress: return "GlobalAddress"; + case ISD::GlobalTLSAddress: return "GlobalTLSAddress"; + case ISD::FrameIndex: return "FrameIndex"; + case ISD::JumpTable: return "JumpTable"; + case ISD::GLOBAL_OFFSET_TABLE: return "GLOBAL_OFFSET_TABLE"; + case ISD::RETURNADDR: return "RETURNADDR"; + case ISD::FRAMEADDR: return "FRAMEADDR"; + case ISD::FRAME_TO_ARGS_OFFSET: return "FRAME_TO_ARGS_OFFSET"; + case ISD::EXCEPTIONADDR: return "EXCEPTIONADDR"; + case ISD::EHSELECTION: return "EHSELECTION"; + case ISD::EH_RETURN: return "EH_RETURN"; + case ISD::ConstantPool: return "ConstantPool"; + case ISD::ExternalSymbol: return "ExternalSymbol"; + case ISD::INTRINSIC_WO_CHAIN: { + unsigned IID = cast(getOperand(0))->getZExtValue(); + return Intrinsic::getName((Intrinsic::ID)IID); + } + case ISD::INTRINSIC_VOID: + case ISD::INTRINSIC_W_CHAIN: { + unsigned IID = cast(getOperand(1))->getZExtValue(); + return Intrinsic::getName((Intrinsic::ID)IID); + } + + case ISD::BUILD_VECTOR: return "BUILD_VECTOR"; + case ISD::TargetConstant: return "TargetConstant"; + case ISD::TargetConstantFP:return "TargetConstantFP"; + case ISD::TargetGlobalAddress: return "TargetGlobalAddress"; + case ISD::TargetGlobalTLSAddress: return "TargetGlobalTLSAddress"; + case ISD::TargetFrameIndex: return "TargetFrameIndex"; + case ISD::TargetJumpTable: return "TargetJumpTable"; + case ISD::TargetConstantPool: return "TargetConstantPool"; + case ISD::TargetExternalSymbol: return "TargetExternalSymbol"; + + case ISD::CopyToReg: return "CopyToReg"; + case ISD::CopyFromReg: return "CopyFromReg"; + case ISD::UNDEF: return "undef"; + case ISD::MERGE_VALUES: return "merge_values"; + case ISD::INLINEASM: return "inlineasm"; + case ISD::DBG_LABEL: return "dbg_label"; + case ISD::EH_LABEL: return "eh_label"; + case ISD::DECLARE: return "declare"; + case ISD::HANDLENODE: return "handlenode"; + case ISD::FORMAL_ARGUMENTS: return "formal_arguments"; + case ISD::CALL: return "call"; + + // Unary operators + case ISD::FABS: return "fabs"; + case ISD::FNEG: return "fneg"; + case ISD::FSQRT: return "fsqrt"; + case ISD::FSIN: return "fsin"; + case ISD::FCOS: return "fcos"; + case ISD::FPOWI: return "fpowi"; + case ISD::FPOW: return "fpow"; + case ISD::FTRUNC: return "ftrunc"; + case ISD::FFLOOR: return "ffloor"; + case ISD::FCEIL: return "fceil"; + case ISD::FRINT: return "frint"; + case ISD::FNEARBYINT: return "fnearbyint"; + + // Binary operators + case ISD::ADD: return "add"; + case ISD::SUB: return "sub"; + case ISD::MUL: return "mul"; + case ISD::MULHU: return "mulhu"; + case ISD::MULHS: return "mulhs"; + case ISD::SDIV: return "sdiv"; + case ISD::UDIV: return "udiv"; + case ISD::SREM: return "srem"; + case ISD::UREM: return "urem"; + case ISD::SMUL_LOHI: return "smul_lohi"; + case ISD::UMUL_LOHI: return "umul_lohi"; + case ISD::SDIVREM: return "sdivrem"; + case ISD::UDIVREM: return "udivrem"; + case ISD::AND: return "and"; + case ISD::OR: return "or"; + case ISD::XOR: return "xor"; + case ISD::SHL: return "shl"; + case ISD::SRA: return "sra"; + case ISD::SRL: return "srl"; + case ISD::ROTL: return "rotl"; + case ISD::ROTR: return "rotr"; + case ISD::FADD: return "fadd"; + case ISD::FSUB: return "fsub"; + case ISD::FMUL: return "fmul"; + case ISD::FDIV: return "fdiv"; + case ISD::FREM: return "frem"; + case ISD::FCOPYSIGN: return "fcopysign"; + case ISD::FGETSIGN: return "fgetsign"; + + case ISD::SETCC: return "setcc"; + case ISD::VSETCC: return "vsetcc"; + case ISD::SELECT: return "select"; + case ISD::SELECT_CC: return "select_cc"; + case ISD::INSERT_VECTOR_ELT: return "insert_vector_elt"; + case ISD::EXTRACT_VECTOR_ELT: return "extract_vector_elt"; + case ISD::CONCAT_VECTORS: return "concat_vectors"; + case ISD::EXTRACT_SUBVECTOR: return "extract_subvector"; + case ISD::SCALAR_TO_VECTOR: return "scalar_to_vector"; + case ISD::VECTOR_SHUFFLE: return "vector_shuffle"; + case ISD::CARRY_FALSE: return "carry_false"; + case ISD::ADDC: return "addc"; + case ISD::ADDE: return "adde"; + case ISD::SADDO: return "saddo"; + case ISD::UADDO: return "uaddo"; + case ISD::SSUBO: return "ssubo"; + case ISD::USUBO: return "usubo"; + case ISD::SMULO: return "smulo"; + case ISD::UMULO: return "umulo"; + case ISD::SUBC: return "subc"; + case ISD::SUBE: return "sube"; + case ISD::SHL_PARTS: return "shl_parts"; + case ISD::SRA_PARTS: return "sra_parts"; + case ISD::SRL_PARTS: return "srl_parts"; + + // Conversion operators. + case ISD::SIGN_EXTEND: return "sign_extend"; + case ISD::ZERO_EXTEND: return "zero_extend"; + case ISD::ANY_EXTEND: return "any_extend"; + case ISD::SIGN_EXTEND_INREG: return "sign_extend_inreg"; + case ISD::TRUNCATE: return "truncate"; + case ISD::FP_ROUND: return "fp_round"; + case ISD::FLT_ROUNDS_: return "flt_rounds"; + case ISD::FP_ROUND_INREG: return "fp_round_inreg"; + case ISD::FP_EXTEND: return "fp_extend"; + + case ISD::SINT_TO_FP: return "sint_to_fp"; + case ISD::UINT_TO_FP: return "uint_to_fp"; + case ISD::FP_TO_SINT: return "fp_to_sint"; + case ISD::FP_TO_UINT: return "fp_to_uint"; + case ISD::BIT_CONVERT: return "bit_convert"; + + case ISD::CONVERT_RNDSAT: { + switch (cast(this)->getCvtCode()) { + default: assert(0 && "Unknown cvt code!"); + case ISD::CVT_FF: return "cvt_ff"; + case ISD::CVT_FS: return "cvt_fs"; + case ISD::CVT_FU: return "cvt_fu"; + case ISD::CVT_SF: return "cvt_sf"; + case ISD::CVT_UF: return "cvt_uf"; + case ISD::CVT_SS: return "cvt_ss"; + case ISD::CVT_SU: return "cvt_su"; + case ISD::CVT_US: return "cvt_us"; + case ISD::CVT_UU: return "cvt_uu"; + } + } + + // Control flow instructions + case ISD::BR: return "br"; + case ISD::BRIND: return "brind"; + case ISD::BR_JT: return "br_jt"; + case ISD::BRCOND: return "brcond"; + case ISD::BR_CC: return "br_cc"; + case ISD::RET: return "ret"; + case ISD::CALLSEQ_START: return "callseq_start"; + case ISD::CALLSEQ_END: return "callseq_end"; + + // Other operators + case ISD::LOAD: return "load"; + case ISD::STORE: return "store"; + case ISD::VAARG: return "vaarg"; + case ISD::VACOPY: return "vacopy"; + case ISD::VAEND: return "vaend"; + case ISD::VASTART: return "vastart"; + case ISD::DYNAMIC_STACKALLOC: return "dynamic_stackalloc"; + case ISD::EXTRACT_ELEMENT: return "extract_element"; + case ISD::BUILD_PAIR: return "build_pair"; + case ISD::STACKSAVE: return "stacksave"; + case ISD::STACKRESTORE: return "stackrestore"; + case ISD::TRAP: return "trap"; + + // Bit manipulation + case ISD::BSWAP: return "bswap"; + case ISD::CTPOP: return "ctpop"; + case ISD::CTTZ: return "cttz"; + case ISD::CTLZ: return "ctlz"; + + // Debug info + case ISD::DBG_STOPPOINT: return "dbg_stoppoint"; + case ISD::DEBUG_LOC: return "debug_loc"; + + // Trampolines + case ISD::TRAMPOLINE: return "trampoline"; + + case ISD::CONDCODE: + switch (cast(this)->get()) { + default: assert(0 && "Unknown setcc condition!"); + case ISD::SETOEQ: return "setoeq"; + case ISD::SETOGT: return "setogt"; + case ISD::SETOGE: return "setoge"; + case ISD::SETOLT: return "setolt"; + case ISD::SETOLE: return "setole"; + case ISD::SETONE: return "setone"; + + case ISD::SETO: return "seto"; + case ISD::SETUO: return "setuo"; + case ISD::SETUEQ: return "setue"; + case ISD::SETUGT: return "setugt"; + case ISD::SETUGE: return "setuge"; + case ISD::SETULT: return "setult"; + case ISD::SETULE: return "setule"; + case ISD::SETUNE: return "setune"; + + case ISD::SETEQ: return "seteq"; + case ISD::SETGT: return "setgt"; + case ISD::SETGE: return "setge"; + case ISD::SETLT: return "setlt"; + case ISD::SETLE: return "setle"; + case ISD::SETNE: return "setne"; + } + } +} + +const char *SDNode::getIndexedModeName(ISD::MemIndexedMode AM) { + switch (AM) { + default: + return ""; + case ISD::PRE_INC: + return ""; + case ISD::PRE_DEC: + return ""; + case ISD::POST_INC: + return ""; + case ISD::POST_DEC: + return ""; + } +} + +std::string ISD::ArgFlagsTy::getArgFlagsString() { + std::string S = "< "; + + if (isZExt()) + S += "zext "; + if (isSExt()) + S += "sext "; + if (isInReg()) + S += "inreg "; + if (isSRet()) + S += "sret "; + if (isByVal()) + S += "byval "; + if (isNest()) + S += "nest "; + if (getByValAlign()) + S += "byval-align:" + utostr(getByValAlign()) + " "; + if (getOrigAlign()) + S += "orig-align:" + utostr(getOrigAlign()) + " "; + if (getByValSize()) + S += "byval-size:" + utostr(getByValSize()) + " "; + return S + ">"; +} + +void SDNode::dump() const { dump(0); } +void SDNode::dump(const SelectionDAG *G) const { + print(errs(), G); +} + +void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const { + OS << (void*)this << ": "; + + for (unsigned i = 0, e = getNumValues(); i != e; ++i) { + if (i) OS << ","; + if (getValueType(i) == MVT::Other) + OS << "ch"; + else + OS << getValueType(i).getMVTString(); + } + OS << " = " << getOperationName(G); +} + +void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { + if (!isTargetOpcode() && getOpcode() == ISD::VECTOR_SHUFFLE) { + const ShuffleVectorSDNode *SVN = cast(this); + OS << "<"; + for (unsigned i = 0, e = ValueList[0].getVectorNumElements(); i != e; ++i) { + int Idx = SVN->getMaskElt(i); + if (i) OS << ","; + if (Idx < 0) + OS << "u"; + else + OS << Idx; + } + OS << ">"; + } + + if (const ConstantSDNode *CSDN = dyn_cast(this)) { + OS << '<' << CSDN->getAPIntValue() << '>'; + } else if (const ConstantFPSDNode *CSDN = dyn_cast(this)) { + if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEsingle) + OS << '<' << CSDN->getValueAPF().convertToFloat() << '>'; + else if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEdouble) + OS << '<' << CSDN->getValueAPF().convertToDouble() << '>'; + else { + OS << "getValueAPF().bitcastToAPInt().dump(); + OS << ")>"; + } + } else if (const GlobalAddressSDNode *GADN = + dyn_cast(this)) { + int64_t offset = GADN->getOffset(); + OS << '<'; + WriteAsOperand(OS, GADN->getGlobal()); + OS << '>'; + if (offset > 0) + OS << " + " << offset; + else + OS << " " << offset; + } else if (const FrameIndexSDNode *FIDN = dyn_cast(this)) { + OS << "<" << FIDN->getIndex() << ">"; + } else if (const JumpTableSDNode *JTDN = dyn_cast(this)) { + OS << "<" << JTDN->getIndex() << ">"; + } else if (const ConstantPoolSDNode *CP = dyn_cast(this)){ + int offset = CP->getOffset(); + if (CP->isMachineConstantPoolEntry()) + OS << "<" << *CP->getMachineCPVal() << ">"; + else + OS << "<" << *CP->getConstVal() << ">"; + if (offset > 0) + OS << " + " << offset; + else + OS << " " << offset; + } else if (const BasicBlockSDNode *BBDN = dyn_cast(this)) { + OS << "<"; + const Value *LBB = (const Value*)BBDN->getBasicBlock()->getBasicBlock(); + if (LBB) + OS << LBB->getName() << " "; + OS << (const void*)BBDN->getBasicBlock() << ">"; + } else if (const RegisterSDNode *R = dyn_cast(this)) { + if (G && R->getReg() && + TargetRegisterInfo::isPhysicalRegister(R->getReg())) { + OS << " " << G->getTarget().getRegisterInfo()->getName(R->getReg()); + } else { + OS << " #" << R->getReg(); + } + } else if (const ExternalSymbolSDNode *ES = + dyn_cast(this)) { + OS << "'" << ES->getSymbol() << "'"; + } else if (const SrcValueSDNode *M = dyn_cast(this)) { + if (M->getValue()) + OS << "<" << M->getValue() << ">"; + else + OS << ""; + } else if (const MemOperandSDNode *M = dyn_cast(this)) { + if (M->MO.getValue()) + OS << "<" << M->MO.getValue() << ":" << M->MO.getOffset() << ">"; + else + OS << "MO.getOffset() << ">"; + } else if (const ARG_FLAGSSDNode *N = dyn_cast(this)) { + OS << N->getArgFlags().getArgFlagsString(); + } else if (const VTSDNode *N = dyn_cast(this)) { + OS << ":" << N->getVT().getMVTString(); + } + else if (const LoadSDNode *LD = dyn_cast(this)) { + const Value *SrcValue = LD->getSrcValue(); + int SrcOffset = LD->getSrcValueOffset(); + OS << " <"; + if (SrcValue) + OS << SrcValue; + else + OS << "null"; + OS << ":" << SrcOffset << ">"; + + bool doExt = true; + switch (LD->getExtensionType()) { + default: doExt = false; break; + case ISD::EXTLOAD: OS << " getMemoryVT().getMVTString() << ">"; + + const char *AM = getIndexedModeName(LD->getAddressingMode()); + if (*AM) + OS << " " << AM; + if (LD->isVolatile()) + OS << " "; + OS << " alignment=" << LD->getAlignment(); + } else if (const StoreSDNode *ST = dyn_cast(this)) { + const Value *SrcValue = ST->getSrcValue(); + int SrcOffset = ST->getSrcValueOffset(); + OS << " <"; + if (SrcValue) + OS << SrcValue; + else + OS << "null"; + OS << ":" << SrcOffset << ">"; + + if (ST->isTruncatingStore()) + OS << " getMemoryVT().getMVTString() << ">"; + + const char *AM = getIndexedModeName(ST->getAddressingMode()); + if (*AM) + OS << " " << AM; + if (ST->isVolatile()) + OS << " "; + OS << " alignment=" << ST->getAlignment(); + } else if (const AtomicSDNode* AT = dyn_cast(this)) { + const Value *SrcValue = AT->getSrcValue(); + int SrcOffset = AT->getSrcValueOffset(); + OS << " <"; + if (SrcValue) + OS << SrcValue; + else + OS << "null"; + OS << ":" << SrcOffset << ">"; + if (AT->isVolatile()) + OS << " "; + OS << " alignment=" << AT->getAlignment(); + } +} + +void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const { + print_types(OS, G); + OS << " "; + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + if (i) OS << ", "; + OS << (void*)getOperand(i).getNode(); + if (unsigned RN = getOperand(i).getResNo()) + OS << ":" << RN; + } + print_details(OS, G); +} + +static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) { + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + if (N->getOperand(i).getNode()->hasOneUse()) + DumpNodes(N->getOperand(i).getNode(), indent+2, G); + else + cerr << "\n" << std::string(indent+2, ' ') + << (void*)N->getOperand(i).getNode() << ": "; + + + cerr << "\n" << std::string(indent, ' '); + N->dump(G); +} + +void SelectionDAG::dump() const { + cerr << "SelectionDAG has " << AllNodes.size() << " nodes:"; + + for (allnodes_const_iterator I = allnodes_begin(), E = allnodes_end(); + I != E; ++I) { + const SDNode *N = I; + if (!N->hasOneUse() && N != getRoot().getNode()) + DumpNodes(N, 2, this); + } + + if (getRoot().getNode()) DumpNodes(getRoot().getNode(), 2, this); + + cerr << "\n\n"; +} + +void SDNode::printr(raw_ostream &OS, const SelectionDAG *G) const { + print_types(OS, G); + print_details(OS, G); +} + +typedef SmallPtrSet VisitedSDNodeSet; +static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent, + const SelectionDAG *G, VisitedSDNodeSet &once) { + if (!once.insert(N)) // If we've been here before, return now. + return; + // Dump the current SDNode, but don't end the line yet. + OS << std::string(indent, ' '); + N->printr(OS, G); + // Having printed this SDNode, walk the children: + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + const SDNode *child = N->getOperand(i).getNode(); + if (i) OS << ","; + OS << " "; + if (child->getNumOperands() == 0) { + // This child has no grandchildren; print it inline right here. + child->printr(OS, G); + once.insert(child); + } else { // Just the address. FIXME: also print the child's opcode + OS << (void*)child; + if (unsigned RN = N->getOperand(i).getResNo()) + OS << ":" << RN; + } + } + OS << "\n"; + // Dump children that have grandchildren on their own line(s). + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + const SDNode *child = N->getOperand(i).getNode(); + DumpNodesr(OS, child, indent+2, G, once); + } +} + +void SDNode::dumpr() const { + VisitedSDNodeSet once; + DumpNodesr(errs(), this, 0, 0, once); +} + + +// getAddressSpace - Return the address space this GlobalAddress belongs to. +unsigned GlobalAddressSDNode::getAddressSpace() const { + return getGlobal()->getType()->getAddressSpace(); +} + + +const Type *ConstantPoolSDNode::getType() const { + if (isMachineConstantPoolEntry()) + return Val.MachineCPVal->getType(); + return Val.ConstVal->getType(); +} + +bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, + APInt &SplatUndef, + unsigned &SplatBitSize, + bool &HasAnyUndefs, + unsigned MinSplatBits) { + MVT VT = getValueType(0); + assert(VT.isVector() && "Expected a vector type"); + unsigned sz = VT.getSizeInBits(); + if (MinSplatBits > sz) + return false; + + SplatValue = APInt(sz, 0); + SplatUndef = APInt(sz, 0); + + // Get the bits. Bits with undefined values (when the corresponding element + // of the vector is an ISD::UNDEF value) are set in SplatUndef and cleared + // in SplatValue. If any of the values are not constant, give up and return + // false. + unsigned int nOps = getNumOperands(); + assert(nOps > 0 && "isConstantSplat has 0-size build vector"); + unsigned EltBitSize = VT.getVectorElementType().getSizeInBits(); + for (unsigned i = 0; i < nOps; ++i) { + SDValue OpVal = getOperand(i); + unsigned BitPos = i * EltBitSize; + + if (OpVal.getOpcode() == ISD::UNDEF) + SplatUndef |= APInt::getBitsSet(sz, BitPos, BitPos +EltBitSize); + else if (ConstantSDNode *CN = dyn_cast(OpVal)) + SplatValue |= (APInt(CN->getAPIntValue()).zextOrTrunc(EltBitSize). + zextOrTrunc(sz) << BitPos); + else if (ConstantFPSDNode *CN = dyn_cast(OpVal)) + SplatValue |= CN->getValueAPF().bitcastToAPInt().zextOrTrunc(sz) < 8) { + + unsigned HalfSize = sz / 2; + APInt HighValue = APInt(SplatValue).lshr(HalfSize).trunc(HalfSize); + APInt LowValue = APInt(SplatValue).trunc(HalfSize); + APInt HighUndef = APInt(SplatUndef).lshr(HalfSize).trunc(HalfSize); + APInt LowUndef = APInt(SplatUndef).trunc(HalfSize); + + // If the two halves do not match (ignoring undef bits), stop here. + if ((HighValue & ~LowUndef) != (LowValue & ~HighUndef) || + MinSplatBits > HalfSize) + break; + + SplatValue = HighValue | LowValue; + SplatUndef = HighUndef & LowUndef; + + sz = HalfSize; + } + + SplatBitSize = sz; + return true; +} + +bool ShuffleVectorSDNode::isSplatMask(const int *Mask, MVT VT) { + // Find the first non-undef value in the shuffle mask. + unsigned i, e; + for (i = 0, e = VT.getVectorNumElements(); i != e && Mask[i] < 0; ++i) + /* search */; + + assert(i != e && "VECTOR_SHUFFLE node with all undef indices!"); + + // Make sure all remaining elements are either undef or the same as the first + // non-undef value. + for (int Idx = Mask[i]; i != e; ++i) + if (Mask[i] >= 0 && Mask[i] != Idx) + return false; + return true; +} diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp new file mode 100644 index 000000000000..889d7f5dd934 --- /dev/null +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp @@ -0,0 +1,6052 @@ +//===-- SelectionDAGBuild.cpp - Selection-DAG building --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements routines for translating from LLVM IR into SelectionDAG IR. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "isel" +#include "SelectionDAGBuild.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Constants.h" +#include "llvm/CallingConv.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/GlobalVariable.h" +#include "llvm/InlineAsm.h" +#include "llvm/Instructions.h" +#include "llvm/Intrinsics.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Module.h" +#include "llvm/CodeGen/FastISel.h" +#include "llvm/CodeGen/GCStrategy.h" +#include "llvm/CodeGen/GCMetadata.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/DwarfWriter.h" +#include "llvm/Analysis/DebugInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetIntrinsicInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include +using namespace llvm; + +/// LimitFloatPrecision - Generate low-precision inline sequences for +/// some float libcalls (6, 8 or 12 bits). +static unsigned LimitFloatPrecision; + +static cl::opt +LimitFPPrecision("limit-float-precision", + cl::desc("Generate low-precision inline sequences " + "for some float libcalls"), + cl::location(LimitFloatPrecision), + cl::init(0)); + +/// ComputeLinearIndex - Given an LLVM IR aggregate type and a sequence +/// of insertvalue or extractvalue indices that identify a member, return +/// the linearized index of the start of the member. +/// +static unsigned ComputeLinearIndex(const TargetLowering &TLI, const Type *Ty, + const unsigned *Indices, + const unsigned *IndicesEnd, + unsigned CurIndex = 0) { + // Base case: We're done. + if (Indices && Indices == IndicesEnd) + return CurIndex; + + // Given a struct type, recursively traverse the elements. + if (const StructType *STy = dyn_cast(Ty)) { + for (StructType::element_iterator EB = STy->element_begin(), + EI = EB, + EE = STy->element_end(); + EI != EE; ++EI) { + if (Indices && *Indices == unsigned(EI - EB)) + return ComputeLinearIndex(TLI, *EI, Indices+1, IndicesEnd, CurIndex); + CurIndex = ComputeLinearIndex(TLI, *EI, 0, 0, CurIndex); + } + return CurIndex; + } + // Given an array type, recursively traverse the elements. + else if (const ArrayType *ATy = dyn_cast(Ty)) { + const Type *EltTy = ATy->getElementType(); + for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) { + if (Indices && *Indices == i) + return ComputeLinearIndex(TLI, EltTy, Indices+1, IndicesEnd, CurIndex); + CurIndex = ComputeLinearIndex(TLI, EltTy, 0, 0, CurIndex); + } + return CurIndex; + } + // We haven't found the type we're looking for, so keep searching. + return CurIndex + 1; +} + +/// ComputeValueVTs - Given an LLVM IR type, compute a sequence of +/// MVTs that represent all the individual underlying +/// non-aggregate types that comprise it. +/// +/// If Offsets is non-null, it points to a vector to be filled in +/// with the in-memory offsets of each of the individual values. +/// +static void ComputeValueVTs(const TargetLowering &TLI, const Type *Ty, + SmallVectorImpl &ValueVTs, + SmallVectorImpl *Offsets = 0, + uint64_t StartingOffset = 0) { + // Given a struct type, recursively traverse the elements. + if (const StructType *STy = dyn_cast(Ty)) { + const StructLayout *SL = TLI.getTargetData()->getStructLayout(STy); + for (StructType::element_iterator EB = STy->element_begin(), + EI = EB, + EE = STy->element_end(); + EI != EE; ++EI) + ComputeValueVTs(TLI, *EI, ValueVTs, Offsets, + StartingOffset + SL->getElementOffset(EI - EB)); + return; + } + // Given an array type, recursively traverse the elements. + if (const ArrayType *ATy = dyn_cast(Ty)) { + const Type *EltTy = ATy->getElementType(); + uint64_t EltSize = TLI.getTargetData()->getTypeAllocSize(EltTy); + for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) + ComputeValueVTs(TLI, EltTy, ValueVTs, Offsets, + StartingOffset + i * EltSize); + return; + } + // Interpret void as zero return values. + if (Ty == Type::VoidTy) + return; + // Base case: we can get an MVT for this LLVM IR type. + ValueVTs.push_back(TLI.getValueType(Ty)); + if (Offsets) + Offsets->push_back(StartingOffset); +} + +namespace llvm { + /// RegsForValue - This struct represents the registers (physical or virtual) + /// that a particular set of values is assigned, and the type information about + /// the value. The most common situation is to represent one value at a time, + /// but struct or array values are handled element-wise as multiple values. + /// The splitting of aggregates is performed recursively, so that we never + /// have aggregate-typed registers. The values at this point do not necessarily + /// have legal types, so each value may require one or more registers of some + /// legal type. + /// + struct VISIBILITY_HIDDEN RegsForValue { + /// TLI - The TargetLowering object. + /// + const TargetLowering *TLI; + + /// ValueVTs - The value types of the values, which may not be legal, and + /// may need be promoted or synthesized from one or more registers. + /// + SmallVector ValueVTs; + + /// RegVTs - The value types of the registers. This is the same size as + /// ValueVTs and it records, for each value, what the type of the assigned + /// register or registers are. (Individual values are never synthesized + /// from more than one type of register.) + /// + /// With virtual registers, the contents of RegVTs is redundant with TLI's + /// getRegisterType member function, however when with physical registers + /// it is necessary to have a separate record of the types. + /// + SmallVector RegVTs; + + /// Regs - This list holds the registers assigned to the values. + /// Each legal or promoted value requires one register, and each + /// expanded value requires multiple registers. + /// + SmallVector Regs; + + RegsForValue() : TLI(0) {} + + RegsForValue(const TargetLowering &tli, + const SmallVector ®s, + MVT regvt, MVT valuevt) + : TLI(&tli), ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {} + RegsForValue(const TargetLowering &tli, + const SmallVector ®s, + const SmallVector ®vts, + const SmallVector &valuevts) + : TLI(&tli), ValueVTs(valuevts), RegVTs(regvts), Regs(regs) {} + RegsForValue(const TargetLowering &tli, + unsigned Reg, const Type *Ty) : TLI(&tli) { + ComputeValueVTs(tli, Ty, ValueVTs); + + for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { + MVT ValueVT = ValueVTs[Value]; + unsigned NumRegs = TLI->getNumRegisters(ValueVT); + MVT RegisterVT = TLI->getRegisterType(ValueVT); + for (unsigned i = 0; i != NumRegs; ++i) + Regs.push_back(Reg + i); + RegVTs.push_back(RegisterVT); + Reg += NumRegs; + } + } + + /// append - Add the specified values to this one. + void append(const RegsForValue &RHS) { + TLI = RHS.TLI; + ValueVTs.append(RHS.ValueVTs.begin(), RHS.ValueVTs.end()); + RegVTs.append(RHS.RegVTs.begin(), RHS.RegVTs.end()); + Regs.append(RHS.Regs.begin(), RHS.Regs.end()); + } + + + /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from + /// this value and returns the result as a ValueVTs value. This uses + /// Chain/Flag as the input and updates them for the output Chain/Flag. + /// If the Flag pointer is NULL, no flag is used. + SDValue getCopyFromRegs(SelectionDAG &DAG, DebugLoc dl, + SDValue &Chain, SDValue *Flag) const; + + /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the + /// specified value into the registers specified by this object. This uses + /// Chain/Flag as the input and updates them for the output Chain/Flag. + /// If the Flag pointer is NULL, no flag is used. + void getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl, + SDValue &Chain, SDValue *Flag) const; + + /// AddInlineAsmOperands - Add this value to the specified inlineasm node + /// operand list. This adds the code marker, matching input operand index + /// (if applicable), and includes the number of values added into it. + void AddInlineAsmOperands(unsigned Code, + bool HasMatching, unsigned MatchingIdx, + SelectionDAG &DAG, std::vector &Ops) const; + }; +} + +/// isUsedOutsideOfDefiningBlock - Return true if this instruction is used by +/// PHI nodes or outside of the basic block that defines it, or used by a +/// switch or atomic instruction, which may expand to multiple basic blocks. +static bool isUsedOutsideOfDefiningBlock(Instruction *I) { + if (isa(I)) return true; + BasicBlock *BB = I->getParent(); + for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E; ++UI) + if (cast(*UI)->getParent() != BB || isa(*UI)) + return true; + return false; +} + +/// isOnlyUsedInEntryBlock - If the specified argument is only used in the +/// entry block, return true. This includes arguments used by switches, since +/// the switch may expand into multiple basic blocks. +static bool isOnlyUsedInEntryBlock(Argument *A, bool EnableFastISel) { + // With FastISel active, we may be splitting blocks, so force creation + // of virtual registers for all non-dead arguments. + // Don't force virtual registers for byval arguments though, because + // fast-isel can't handle those in all cases. + if (EnableFastISel && !A->hasByValAttr()) + return A->use_empty(); + + BasicBlock *Entry = A->getParent()->begin(); + for (Value::use_iterator UI = A->use_begin(), E = A->use_end(); UI != E; ++UI) + if (cast(*UI)->getParent() != Entry || isa(*UI)) + return false; // Use not in entry block. + return true; +} + +FunctionLoweringInfo::FunctionLoweringInfo(TargetLowering &tli) + : TLI(tli) { +} + +void FunctionLoweringInfo::set(Function &fn, MachineFunction &mf, + SelectionDAG &DAG, + bool EnableFastISel) { + Fn = &fn; + MF = &mf; + RegInfo = &MF->getRegInfo(); + + // Create a vreg for each argument register that is not dead and is used + // outside of the entry block for the function. + for (Function::arg_iterator AI = Fn->arg_begin(), E = Fn->arg_end(); + AI != E; ++AI) + if (!isOnlyUsedInEntryBlock(AI, EnableFastISel)) + InitializeRegForValue(AI); + + // Initialize the mapping of values to registers. This is only set up for + // instruction values that are used outside of the block that defines + // them. + Function::iterator BB = Fn->begin(), EB = Fn->end(); + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) + if (AllocaInst *AI = dyn_cast(I)) + if (ConstantInt *CUI = dyn_cast(AI->getArraySize())) { + const Type *Ty = AI->getAllocatedType(); + uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty); + unsigned Align = + std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty), + AI->getAlignment()); + + TySize *= CUI->getZExtValue(); // Get total allocated size. + if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects. + StaticAllocaMap[AI] = + MF->getFrameInfo()->CreateStackObject(TySize, Align); + } + + for (; BB != EB; ++BB) + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) + if (!I->use_empty() && isUsedOutsideOfDefiningBlock(I)) + if (!isa(I) || + !StaticAllocaMap.count(cast(I))) + InitializeRegForValue(I); + + // Create an initial MachineBasicBlock for each LLVM BasicBlock in F. This + // also creates the initial PHI MachineInstrs, though none of the input + // operands are populated. + for (BB = Fn->begin(), EB = Fn->end(); BB != EB; ++BB) { + MachineBasicBlock *MBB = mf.CreateMachineBasicBlock(BB); + MBBMap[BB] = MBB; + MF->push_back(MBB); + + // Create Machine PHI nodes for LLVM PHI nodes, lowering them as + // appropriate. + PHINode *PN; + DebugLoc DL; + for (BasicBlock::iterator + I = BB->begin(), E = BB->end(); I != E; ++I) { + if (CallInst *CI = dyn_cast(I)) { + if (Function *F = CI->getCalledFunction()) { + switch (F->getIntrinsicID()) { + default: break; + case Intrinsic::dbg_stoppoint: { + DbgStopPointInst *SPI = cast(I); + + if (DIDescriptor::ValidDebugInfo(SPI->getContext(), + CodeGenOpt::Default)) { + DICompileUnit CU(cast(SPI->getContext())); + unsigned idx = MF->getOrCreateDebugLocID(CU.getGV(), + SPI->getLine(), + SPI->getColumn()); + DL = DebugLoc::get(idx); + } + + break; + } + case Intrinsic::dbg_func_start: { + DbgFuncStartInst *FSI = cast(I); + Value *SP = FSI->getSubprogram(); + + if (DIDescriptor::ValidDebugInfo(SP, CodeGenOpt::Default)) { + DISubprogram Subprogram(cast(SP)); + DICompileUnit CU(Subprogram.getCompileUnit()); + unsigned Line = Subprogram.getLineNumber(); + DL = DebugLoc::get(MF->getOrCreateDebugLocID(CU.getGV(), + Line, 0)); + } + + break; + } + } + } + } + + PN = dyn_cast(I); + if (!PN || PN->use_empty()) continue; + + unsigned PHIReg = ValueMap[PN]; + assert(PHIReg && "PHI node does not have an assigned virtual register!"); + + SmallVector ValueVTs; + ComputeValueVTs(TLI, PN->getType(), ValueVTs); + for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) { + MVT VT = ValueVTs[vti]; + unsigned NumRegisters = TLI.getNumRegisters(VT); + const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); + for (unsigned i = 0; i != NumRegisters; ++i) + BuildMI(MBB, DL, TII->get(TargetInstrInfo::PHI), PHIReg + i); + PHIReg += NumRegisters; + } + } + } +} + +unsigned FunctionLoweringInfo::MakeReg(MVT VT) { + return RegInfo->createVirtualRegister(TLI.getRegClassFor(VT)); +} + +/// CreateRegForValue - Allocate the appropriate number of virtual registers of +/// the correctly promoted or expanded types. Assign these registers +/// consecutive vreg numbers and return the first assigned number. +/// +/// In the case that the given value has struct or array type, this function +/// will assign registers for each member or element. +/// +unsigned FunctionLoweringInfo::CreateRegForValue(const Value *V) { + SmallVector ValueVTs; + ComputeValueVTs(TLI, V->getType(), ValueVTs); + + unsigned FirstReg = 0; + for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { + MVT ValueVT = ValueVTs[Value]; + MVT RegisterVT = TLI.getRegisterType(ValueVT); + + unsigned NumRegs = TLI.getNumRegisters(ValueVT); + for (unsigned i = 0; i != NumRegs; ++i) { + unsigned R = MakeReg(RegisterVT); + if (!FirstReg) FirstReg = R; + } + } + return FirstReg; +} + +/// getCopyFromParts - Create a value that contains the specified legal parts +/// combined into the value they represent. If the parts combine to a type +/// larger then ValueVT then AssertOp can be used to specify whether the extra +/// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT +/// (ISD::AssertSext). +static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl, + const SDValue *Parts, + unsigned NumParts, MVT PartVT, MVT ValueVT, + ISD::NodeType AssertOp = ISD::DELETED_NODE) { + assert(NumParts > 0 && "No parts to assemble!"); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDValue Val = Parts[0]; + + if (NumParts > 1) { + // Assemble the value from multiple parts. + if (!ValueVT.isVector() && ValueVT.isInteger()) { + unsigned PartBits = PartVT.getSizeInBits(); + unsigned ValueBits = ValueVT.getSizeInBits(); + + // Assemble the power of 2 part. + unsigned RoundParts = NumParts & (NumParts - 1) ? + 1 << Log2_32(NumParts) : NumParts; + unsigned RoundBits = PartBits * RoundParts; + MVT RoundVT = RoundBits == ValueBits ? + ValueVT : MVT::getIntegerVT(RoundBits); + SDValue Lo, Hi; + + MVT HalfVT = MVT::getIntegerVT(RoundBits/2); + + if (RoundParts > 2) { + Lo = getCopyFromParts(DAG, dl, Parts, RoundParts/2, PartVT, HalfVT); + Hi = getCopyFromParts(DAG, dl, Parts+RoundParts/2, RoundParts/2, + PartVT, HalfVT); + } else { + Lo = DAG.getNode(ISD::BIT_CONVERT, dl, HalfVT, Parts[0]); + Hi = DAG.getNode(ISD::BIT_CONVERT, dl, HalfVT, Parts[1]); + } + if (TLI.isBigEndian()) + std::swap(Lo, Hi); + Val = DAG.getNode(ISD::BUILD_PAIR, dl, RoundVT, Lo, Hi); + + if (RoundParts < NumParts) { + // Assemble the trailing non-power-of-2 part. + unsigned OddParts = NumParts - RoundParts; + MVT OddVT = MVT::getIntegerVT(OddParts * PartBits); + Hi = getCopyFromParts(DAG, dl, + Parts+RoundParts, OddParts, PartVT, OddVT); + + // Combine the round and odd parts. + Lo = Val; + if (TLI.isBigEndian()) + std::swap(Lo, Hi); + MVT TotalVT = MVT::getIntegerVT(NumParts * PartBits); + Hi = DAG.getNode(ISD::ANY_EXTEND, dl, TotalVT, Hi); + Hi = DAG.getNode(ISD::SHL, dl, TotalVT, Hi, + DAG.getConstant(Lo.getValueType().getSizeInBits(), + TLI.getPointerTy())); + Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, TotalVT, Lo); + Val = DAG.getNode(ISD::OR, dl, TotalVT, Lo, Hi); + } + } else if (ValueVT.isVector()) { + // Handle a multi-element vector. + MVT IntermediateVT, RegisterVT; + unsigned NumIntermediates; + unsigned NumRegs = + TLI.getVectorTypeBreakdown(ValueVT, IntermediateVT, NumIntermediates, + RegisterVT); + assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!"); + NumParts = NumRegs; // Silence a compiler warning. + assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!"); + assert(RegisterVT == Parts[0].getValueType() && + "Part type doesn't match part!"); + + // Assemble the parts into intermediate operands. + SmallVector Ops(NumIntermediates); + if (NumIntermediates == NumParts) { + // If the register was not expanded, truncate or copy the value, + // as appropriate. + for (unsigned i = 0; i != NumParts; ++i) + Ops[i] = getCopyFromParts(DAG, dl, &Parts[i], 1, + PartVT, IntermediateVT); + } else if (NumParts > 0) { + // If the intermediate type was expanded, build the intermediate operands + // from the parts. + assert(NumParts % NumIntermediates == 0 && + "Must expand into a divisible number of parts!"); + unsigned Factor = NumParts / NumIntermediates; + for (unsigned i = 0; i != NumIntermediates; ++i) + Ops[i] = getCopyFromParts(DAG, dl, &Parts[i * Factor], Factor, + PartVT, IntermediateVT); + } + + // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the intermediate + // operands. + Val = DAG.getNode(IntermediateVT.isVector() ? + ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR, dl, + ValueVT, &Ops[0], NumIntermediates); + } else if (PartVT.isFloatingPoint()) { + // FP split into multiple FP parts (for ppcf128) + assert(ValueVT == MVT(MVT::ppcf128) && PartVT == MVT(MVT::f64) && + "Unexpected split"); + SDValue Lo, Hi; + Lo = DAG.getNode(ISD::BIT_CONVERT, dl, MVT(MVT::f64), Parts[0]); + Hi = DAG.getNode(ISD::BIT_CONVERT, dl, MVT(MVT::f64), Parts[1]); + if (TLI.isBigEndian()) + std::swap(Lo, Hi); + Val = DAG.getNode(ISD::BUILD_PAIR, dl, ValueVT, Lo, Hi); + } else { + // FP split into integer parts (soft fp) + assert(ValueVT.isFloatingPoint() && PartVT.isInteger() && + !PartVT.isVector() && "Unexpected split"); + MVT IntVT = MVT::getIntegerVT(ValueVT.getSizeInBits()); + Val = getCopyFromParts(DAG, dl, Parts, NumParts, PartVT, IntVT); + } + } + + // There is now one part, held in Val. Correct it to match ValueVT. + PartVT = Val.getValueType(); + + if (PartVT == ValueVT) + return Val; + + if (PartVT.isVector()) { + assert(ValueVT.isVector() && "Unknown vector conversion!"); + return DAG.getNode(ISD::BIT_CONVERT, dl, ValueVT, Val); + } + + if (ValueVT.isVector()) { + assert(ValueVT.getVectorElementType() == PartVT && + ValueVT.getVectorNumElements() == 1 && + "Only trivial scalar-to-vector conversions should get here!"); + return DAG.getNode(ISD::BUILD_VECTOR, dl, ValueVT, Val); + } + + if (PartVT.isInteger() && + ValueVT.isInteger()) { + if (ValueVT.bitsLT(PartVT)) { + // For a truncate, see if we have any information to + // indicate whether the truncated bits will always be + // zero or sign-extension. + if (AssertOp != ISD::DELETED_NODE) + Val = DAG.getNode(AssertOp, dl, PartVT, Val, + DAG.getValueType(ValueVT)); + return DAG.getNode(ISD::TRUNCATE, dl, ValueVT, Val); + } else { + return DAG.getNode(ISD::ANY_EXTEND, dl, ValueVT, Val); + } + } + + if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) { + if (ValueVT.bitsLT(Val.getValueType())) + // FP_ROUND's are always exact here. + return DAG.getNode(ISD::FP_ROUND, dl, ValueVT, Val, + DAG.getIntPtrConstant(1)); + return DAG.getNode(ISD::FP_EXTEND, dl, ValueVT, Val); + } + + if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) + return DAG.getNode(ISD::BIT_CONVERT, dl, ValueVT, Val); + + assert(0 && "Unknown mismatch!"); + return SDValue(); +} + +/// getCopyToParts - Create a series of nodes that contain the specified value +/// split into legal parts. If the parts contain more bits than Val, then, for +/// integers, ExtendKind can be used to specify how to generate the extra bits. +static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl, SDValue Val, + SDValue *Parts, unsigned NumParts, MVT PartVT, + ISD::NodeType ExtendKind = ISD::ANY_EXTEND) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + MVT PtrVT = TLI.getPointerTy(); + MVT ValueVT = Val.getValueType(); + unsigned PartBits = PartVT.getSizeInBits(); + unsigned OrigNumParts = NumParts; + assert(TLI.isTypeLegal(PartVT) && "Copying to an illegal type!"); + + if (!NumParts) + return; + + if (!ValueVT.isVector()) { + if (PartVT == ValueVT) { + assert(NumParts == 1 && "No-op copy with multiple parts!"); + Parts[0] = Val; + return; + } + + if (NumParts * PartBits > ValueVT.getSizeInBits()) { + // If the parts cover more bits than the value has, promote the value. + if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) { + assert(NumParts == 1 && "Do not know what to promote to!"); + Val = DAG.getNode(ISD::FP_EXTEND, dl, PartVT, Val); + } else if (PartVT.isInteger() && ValueVT.isInteger()) { + ValueVT = MVT::getIntegerVT(NumParts * PartBits); + Val = DAG.getNode(ExtendKind, dl, ValueVT, Val); + } else { + assert(0 && "Unknown mismatch!"); + } + } else if (PartBits == ValueVT.getSizeInBits()) { + // Different types of the same size. + assert(NumParts == 1 && PartVT != ValueVT); + Val = DAG.getNode(ISD::BIT_CONVERT, dl, PartVT, Val); + } else if (NumParts * PartBits < ValueVT.getSizeInBits()) { + // If the parts cover less bits than value has, truncate the value. + if (PartVT.isInteger() && ValueVT.isInteger()) { + ValueVT = MVT::getIntegerVT(NumParts * PartBits); + Val = DAG.getNode(ISD::TRUNCATE, dl, ValueVT, Val); + } else { + assert(0 && "Unknown mismatch!"); + } + } + + // The value may have changed - recompute ValueVT. + ValueVT = Val.getValueType(); + assert(NumParts * PartBits == ValueVT.getSizeInBits() && + "Failed to tile the value with PartVT!"); + + if (NumParts == 1) { + assert(PartVT == ValueVT && "Type conversion failed!"); + Parts[0] = Val; + return; + } + + // Expand the value into multiple parts. + if (NumParts & (NumParts - 1)) { + // The number of parts is not a power of 2. Split off and copy the tail. + assert(PartVT.isInteger() && ValueVT.isInteger() && + "Do not know what to expand to!"); + unsigned RoundParts = 1 << Log2_32(NumParts); + unsigned RoundBits = RoundParts * PartBits; + unsigned OddParts = NumParts - RoundParts; + SDValue OddVal = DAG.getNode(ISD::SRL, dl, ValueVT, Val, + DAG.getConstant(RoundBits, + TLI.getPointerTy())); + getCopyToParts(DAG, dl, OddVal, Parts + RoundParts, OddParts, PartVT); + if (TLI.isBigEndian()) + // The odd parts were reversed by getCopyToParts - unreverse them. + std::reverse(Parts + RoundParts, Parts + NumParts); + NumParts = RoundParts; + ValueVT = MVT::getIntegerVT(NumParts * PartBits); + Val = DAG.getNode(ISD::TRUNCATE, dl, ValueVT, Val); + } + + // The number of parts is a power of 2. Repeatedly bisect the value using + // EXTRACT_ELEMENT. + Parts[0] = DAG.getNode(ISD::BIT_CONVERT, dl, + MVT::getIntegerVT(ValueVT.getSizeInBits()), + Val); + for (unsigned StepSize = NumParts; StepSize > 1; StepSize /= 2) { + for (unsigned i = 0; i < NumParts; i += StepSize) { + unsigned ThisBits = StepSize * PartBits / 2; + MVT ThisVT = MVT::getIntegerVT (ThisBits); + SDValue &Part0 = Parts[i]; + SDValue &Part1 = Parts[i+StepSize/2]; + + Part1 = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, + ThisVT, Part0, + DAG.getConstant(1, PtrVT)); + Part0 = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, + ThisVT, Part0, + DAG.getConstant(0, PtrVT)); + + if (ThisBits == PartBits && ThisVT != PartVT) { + Part0 = DAG.getNode(ISD::BIT_CONVERT, dl, + PartVT, Part0); + Part1 = DAG.getNode(ISD::BIT_CONVERT, dl, + PartVT, Part1); + } + } + } + + if (TLI.isBigEndian()) + std::reverse(Parts, Parts + OrigNumParts); + + return; + } + + // Vector ValueVT. + if (NumParts == 1) { + if (PartVT != ValueVT) { + if (PartVT.isVector()) { + Val = DAG.getNode(ISD::BIT_CONVERT, dl, PartVT, Val); + } else { + assert(ValueVT.getVectorElementType() == PartVT && + ValueVT.getVectorNumElements() == 1 && + "Only trivial vector-to-scalar conversions should get here!"); + Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + PartVT, Val, + DAG.getConstant(0, PtrVT)); + } + } + + Parts[0] = Val; + return; + } + + // Handle a multi-element vector. + MVT IntermediateVT, RegisterVT; + unsigned NumIntermediates; + unsigned NumRegs = TLI + .getVectorTypeBreakdown(ValueVT, IntermediateVT, NumIntermediates, + RegisterVT); + unsigned NumElements = ValueVT.getVectorNumElements(); + + assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!"); + NumParts = NumRegs; // Silence a compiler warning. + assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!"); + + // Split the vector into intermediate operands. + SmallVector Ops(NumIntermediates); + for (unsigned i = 0; i != NumIntermediates; ++i) + if (IntermediateVT.isVector()) + Ops[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, + IntermediateVT, Val, + DAG.getConstant(i * (NumElements / NumIntermediates), + PtrVT)); + else + Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + IntermediateVT, Val, + DAG.getConstant(i, PtrVT)); + + // Split the intermediate operands into legal parts. + if (NumParts == NumIntermediates) { + // If the register was not expanded, promote or copy the value, + // as appropriate. + for (unsigned i = 0; i != NumParts; ++i) + getCopyToParts(DAG, dl, Ops[i], &Parts[i], 1, PartVT); + } else if (NumParts > 0) { + // If the intermediate type was expanded, split each the value into + // legal parts. + assert(NumParts % NumIntermediates == 0 && + "Must expand into a divisible number of parts!"); + unsigned Factor = NumParts / NumIntermediates; + for (unsigned i = 0; i != NumIntermediates; ++i) + getCopyToParts(DAG, dl, Ops[i], &Parts[i * Factor], Factor, PartVT); + } +} + + +void SelectionDAGLowering::init(GCFunctionInfo *gfi, AliasAnalysis &aa) { + AA = &aa; + GFI = gfi; + TD = DAG.getTarget().getTargetData(); +} + +/// clear - Clear out the curret SelectionDAG and the associated +/// state and prepare this SelectionDAGLowering object to be used +/// for a new block. This doesn't clear out information about +/// additional blocks that are needed to complete switch lowering +/// or PHI node updating; that information is cleared out as it is +/// consumed. +void SelectionDAGLowering::clear() { + NodeMap.clear(); + PendingLoads.clear(); + PendingExports.clear(); + DAG.clear(); + CurDebugLoc = DebugLoc::getUnknownLoc(); +} + +/// getRoot - Return the current virtual root of the Selection DAG, +/// flushing any PendingLoad items. This must be done before emitting +/// a store or any other node that may need to be ordered after any +/// prior load instructions. +/// +SDValue SelectionDAGLowering::getRoot() { + if (PendingLoads.empty()) + return DAG.getRoot(); + + if (PendingLoads.size() == 1) { + SDValue Root = PendingLoads[0]; + DAG.setRoot(Root); + PendingLoads.clear(); + return Root; + } + + // Otherwise, we have to make a token factor node. + SDValue Root = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other, + &PendingLoads[0], PendingLoads.size()); + PendingLoads.clear(); + DAG.setRoot(Root); + return Root; +} + +/// getControlRoot - Similar to getRoot, but instead of flushing all the +/// PendingLoad items, flush all the PendingExports items. It is necessary +/// to do this before emitting a terminator instruction. +/// +SDValue SelectionDAGLowering::getControlRoot() { + SDValue Root = DAG.getRoot(); + + if (PendingExports.empty()) + return Root; + + // Turn all of the CopyToReg chains into one factored node. + if (Root.getOpcode() != ISD::EntryToken) { + unsigned i = 0, e = PendingExports.size(); + for (; i != e; ++i) { + assert(PendingExports[i].getNode()->getNumOperands() > 1); + if (PendingExports[i].getNode()->getOperand(0) == Root) + break; // Don't add the root if we already indirectly depend on it. + } + + if (i == e) + PendingExports.push_back(Root); + } + + Root = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other, + &PendingExports[0], + PendingExports.size()); + PendingExports.clear(); + DAG.setRoot(Root); + return Root; +} + +void SelectionDAGLowering::visit(Instruction &I) { + visit(I.getOpcode(), I); +} + +void SelectionDAGLowering::visit(unsigned Opcode, User &I) { + // Note: this doesn't use InstVisitor, because it has to work with + // ConstantExpr's in addition to instructions. + switch (Opcode) { + default: assert(0 && "Unknown instruction type encountered!"); + abort(); + // Build the switch statement using the Instruction.def file. +#define HANDLE_INST(NUM, OPCODE, CLASS) \ + case Instruction::OPCODE:return visit##OPCODE((CLASS&)I); +#include "llvm/Instruction.def" + } +} + +void SelectionDAGLowering::visitAdd(User &I) { + if (I.getType()->isFPOrFPVector()) + visitBinary(I, ISD::FADD); + else + visitBinary(I, ISD::ADD); +} + +void SelectionDAGLowering::visitMul(User &I) { + if (I.getType()->isFPOrFPVector()) + visitBinary(I, ISD::FMUL); + else + visitBinary(I, ISD::MUL); +} + +SDValue SelectionDAGLowering::getValue(const Value *V) { + SDValue &N = NodeMap[V]; + if (N.getNode()) return N; + + if (Constant *C = const_cast(dyn_cast(V))) { + MVT VT = TLI.getValueType(V->getType(), true); + + if (ConstantInt *CI = dyn_cast(C)) + return N = DAG.getConstant(*CI, VT); + + if (GlobalValue *GV = dyn_cast(C)) + return N = DAG.getGlobalAddress(GV, VT); + + if (isa(C)) + return N = DAG.getConstant(0, TLI.getPointerTy()); + + if (ConstantFP *CFP = dyn_cast(C)) + return N = DAG.getConstantFP(*CFP, VT); + + if (isa(C) && !V->getType()->isAggregateType()) + return N = DAG.getUNDEF(VT); + + if (ConstantExpr *CE = dyn_cast(C)) { + visit(CE->getOpcode(), *CE); + SDValue N1 = NodeMap[V]; + assert(N1.getNode() && "visit didn't populate the ValueMap!"); + return N1; + } + + if (isa(C) || isa(C)) { + SmallVector Constants; + for (User::const_op_iterator OI = C->op_begin(), OE = C->op_end(); + OI != OE; ++OI) { + SDNode *Val = getValue(*OI).getNode(); + for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i) + Constants.push_back(SDValue(Val, i)); + } + return DAG.getMergeValues(&Constants[0], Constants.size(), + getCurDebugLoc()); + } + + if (isa(C->getType()) || isa(C->getType())) { + assert((isa(C) || isa(C)) && + "Unknown struct or array constant!"); + + SmallVector ValueVTs; + ComputeValueVTs(TLI, C->getType(), ValueVTs); + unsigned NumElts = ValueVTs.size(); + if (NumElts == 0) + return SDValue(); // empty struct + SmallVector Constants(NumElts); + for (unsigned i = 0; i != NumElts; ++i) { + MVT EltVT = ValueVTs[i]; + if (isa(C)) + Constants[i] = DAG.getUNDEF(EltVT); + else if (EltVT.isFloatingPoint()) + Constants[i] = DAG.getConstantFP(0, EltVT); + else + Constants[i] = DAG.getConstant(0, EltVT); + } + return DAG.getMergeValues(&Constants[0], NumElts, getCurDebugLoc()); + } + + const VectorType *VecTy = cast(V->getType()); + unsigned NumElements = VecTy->getNumElements(); + + // Now that we know the number and type of the elements, get that number of + // elements into the Ops array based on what kind of constant it is. + SmallVector Ops; + if (ConstantVector *CP = dyn_cast(C)) { + for (unsigned i = 0; i != NumElements; ++i) + Ops.push_back(getValue(CP->getOperand(i))); + } else { + assert(isa(C) && "Unknown vector constant!"); + MVT EltVT = TLI.getValueType(VecTy->getElementType()); + + SDValue Op; + if (EltVT.isFloatingPoint()) + Op = DAG.getConstantFP(0, EltVT); + else + Op = DAG.getConstant(0, EltVT); + Ops.assign(NumElements, Op); + } + + // Create a BUILD_VECTOR node. + return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(), + VT, &Ops[0], Ops.size()); + } + + // If this is a static alloca, generate it as the frameindex instead of + // computation. + if (const AllocaInst *AI = dyn_cast(V)) { + DenseMap::iterator SI = + FuncInfo.StaticAllocaMap.find(AI); + if (SI != FuncInfo.StaticAllocaMap.end()) + return DAG.getFrameIndex(SI->second, TLI.getPointerTy()); + } + + unsigned InReg = FuncInfo.ValueMap[V]; + assert(InReg && "Value not in map!"); + + RegsForValue RFV(TLI, InReg, V->getType()); + SDValue Chain = DAG.getEntryNode(); + return RFV.getCopyFromRegs(DAG, getCurDebugLoc(), Chain, NULL); +} + + +void SelectionDAGLowering::visitRet(ReturnInst &I) { + if (I.getNumOperands() == 0) { + DAG.setRoot(DAG.getNode(ISD::RET, getCurDebugLoc(), + MVT::Other, getControlRoot())); + return; + } + + SmallVector NewValues; + NewValues.push_back(getControlRoot()); + for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) { + SmallVector ValueVTs; + ComputeValueVTs(TLI, I.getOperand(i)->getType(), ValueVTs); + unsigned NumValues = ValueVTs.size(); + if (NumValues == 0) continue; + + SDValue RetOp = getValue(I.getOperand(i)); + for (unsigned j = 0, f = NumValues; j != f; ++j) { + MVT VT = ValueVTs[j]; + + ISD::NodeType ExtendKind = ISD::ANY_EXTEND; + + const Function *F = I.getParent()->getParent(); + if (F->paramHasAttr(0, Attribute::SExt)) + ExtendKind = ISD::SIGN_EXTEND; + else if (F->paramHasAttr(0, Attribute::ZExt)) + ExtendKind = ISD::ZERO_EXTEND; + + // FIXME: C calling convention requires the return type to be promoted to + // at least 32-bit. But this is not necessary for non-C calling + // conventions. The frontend should mark functions whose return values + // require promoting with signext or zeroext attributes. + if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) { + MVT MinVT = TLI.getRegisterType(MVT::i32); + if (VT.bitsLT(MinVT)) + VT = MinVT; + } + + unsigned NumParts = TLI.getNumRegisters(VT); + MVT PartVT = TLI.getRegisterType(VT); + SmallVector Parts(NumParts); + getCopyToParts(DAG, getCurDebugLoc(), + SDValue(RetOp.getNode(), RetOp.getResNo() + j), + &Parts[0], NumParts, PartVT, ExtendKind); + + // 'inreg' on function refers to return value + ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); + if (F->paramHasAttr(0, Attribute::InReg)) + Flags.setInReg(); + for (unsigned i = 0; i < NumParts; ++i) { + NewValues.push_back(Parts[i]); + NewValues.push_back(DAG.getArgFlags(Flags)); + } + } + } + DAG.setRoot(DAG.getNode(ISD::RET, getCurDebugLoc(), MVT::Other, + &NewValues[0], NewValues.size())); +} + +/// CopyToExportRegsIfNeeded - If the given value has virtual registers +/// created for it, emit nodes to copy the value into the virtual +/// registers. +void SelectionDAGLowering::CopyToExportRegsIfNeeded(Value *V) { + if (!V->use_empty()) { + DenseMap::iterator VMI = FuncInfo.ValueMap.find(V); + if (VMI != FuncInfo.ValueMap.end()) + CopyValueToVirtualRegister(V, VMI->second); + } +} + +/// ExportFromCurrentBlock - If this condition isn't known to be exported from +/// the current basic block, add it to ValueMap now so that we'll get a +/// CopyTo/FromReg. +void SelectionDAGLowering::ExportFromCurrentBlock(Value *V) { + // No need to export constants. + if (!isa(V) && !isa(V)) return; + + // Already exported? + if (FuncInfo.isExportedInst(V)) return; + + unsigned Reg = FuncInfo.InitializeRegForValue(V); + CopyValueToVirtualRegister(V, Reg); +} + +bool SelectionDAGLowering::isExportableFromCurrentBlock(Value *V, + const BasicBlock *FromBB) { + // The operands of the setcc have to be in this block. We don't know + // how to export them from some other block. + if (Instruction *VI = dyn_cast(V)) { + // Can export from current BB. + if (VI->getParent() == FromBB) + return true; + + // Is already exported, noop. + return FuncInfo.isExportedInst(V); + } + + // If this is an argument, we can export it if the BB is the entry block or + // if it is already exported. + if (isa(V)) { + if (FromBB == &FromBB->getParent()->getEntryBlock()) + return true; + + // Otherwise, can only export this if it is already exported. + return FuncInfo.isExportedInst(V); + } + + // Otherwise, constants can always be exported. + return true; +} + +static bool InBlock(const Value *V, const BasicBlock *BB) { + if (const Instruction *I = dyn_cast(V)) + return I->getParent() == BB; + return true; +} + +/// getFCmpCondCode - Return the ISD condition code corresponding to +/// the given LLVM IR floating-point condition code. This includes +/// consideration of global floating-point math flags. +/// +static ISD::CondCode getFCmpCondCode(FCmpInst::Predicate Pred) { + ISD::CondCode FPC, FOC; + switch (Pred) { + case FCmpInst::FCMP_FALSE: FOC = FPC = ISD::SETFALSE; break; + case FCmpInst::FCMP_OEQ: FOC = ISD::SETEQ; FPC = ISD::SETOEQ; break; + case FCmpInst::FCMP_OGT: FOC = ISD::SETGT; FPC = ISD::SETOGT; break; + case FCmpInst::FCMP_OGE: FOC = ISD::SETGE; FPC = ISD::SETOGE; break; + case FCmpInst::FCMP_OLT: FOC = ISD::SETLT; FPC = ISD::SETOLT; break; + case FCmpInst::FCMP_OLE: FOC = ISD::SETLE; FPC = ISD::SETOLE; break; + case FCmpInst::FCMP_ONE: FOC = ISD::SETNE; FPC = ISD::SETONE; break; + case FCmpInst::FCMP_ORD: FOC = FPC = ISD::SETO; break; + case FCmpInst::FCMP_UNO: FOC = FPC = ISD::SETUO; break; + case FCmpInst::FCMP_UEQ: FOC = ISD::SETEQ; FPC = ISD::SETUEQ; break; + case FCmpInst::FCMP_UGT: FOC = ISD::SETGT; FPC = ISD::SETUGT; break; + case FCmpInst::FCMP_UGE: FOC = ISD::SETGE; FPC = ISD::SETUGE; break; + case FCmpInst::FCMP_ULT: FOC = ISD::SETLT; FPC = ISD::SETULT; break; + case FCmpInst::FCMP_ULE: FOC = ISD::SETLE; FPC = ISD::SETULE; break; + case FCmpInst::FCMP_UNE: FOC = ISD::SETNE; FPC = ISD::SETUNE; break; + case FCmpInst::FCMP_TRUE: FOC = FPC = ISD::SETTRUE; break; + default: + assert(0 && "Invalid FCmp predicate opcode!"); + FOC = FPC = ISD::SETFALSE; + break; + } + if (FiniteOnlyFPMath()) + return FOC; + else + return FPC; +} + +/// getICmpCondCode - Return the ISD condition code corresponding to +/// the given LLVM IR integer condition code. +/// +static ISD::CondCode getICmpCondCode(ICmpInst::Predicate Pred) { + switch (Pred) { + case ICmpInst::ICMP_EQ: return ISD::SETEQ; + case ICmpInst::ICMP_NE: return ISD::SETNE; + case ICmpInst::ICMP_SLE: return ISD::SETLE; + case ICmpInst::ICMP_ULE: return ISD::SETULE; + case ICmpInst::ICMP_SGE: return ISD::SETGE; + case ICmpInst::ICMP_UGE: return ISD::SETUGE; + case ICmpInst::ICMP_SLT: return ISD::SETLT; + case ICmpInst::ICMP_ULT: return ISD::SETULT; + case ICmpInst::ICMP_SGT: return ISD::SETGT; + case ICmpInst::ICMP_UGT: return ISD::SETUGT; + default: + assert(0 && "Invalid ICmp predicate opcode!"); + return ISD::SETNE; + } +} + +/// EmitBranchForMergedCondition - Helper method for FindMergedConditions. +/// This function emits a branch and is used at the leaves of an OR or an +/// AND operator tree. +/// +void +SelectionDAGLowering::EmitBranchForMergedCondition(Value *Cond, + MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + MachineBasicBlock *CurBB) { + const BasicBlock *BB = CurBB->getBasicBlock(); + + // If the leaf of the tree is a comparison, merge the condition into + // the caseblock. + if (CmpInst *BOp = dyn_cast(Cond)) { + // The operands of the cmp have to be in this block. We don't know + // how to export them from some other block. If this is the first block + // of the sequence, no exporting is needed. + if (CurBB == CurMBB || + (isExportableFromCurrentBlock(BOp->getOperand(0), BB) && + isExportableFromCurrentBlock(BOp->getOperand(1), BB))) { + ISD::CondCode Condition; + if (ICmpInst *IC = dyn_cast(Cond)) { + Condition = getICmpCondCode(IC->getPredicate()); + } else if (FCmpInst *FC = dyn_cast(Cond)) { + Condition = getFCmpCondCode(FC->getPredicate()); + } else { + Condition = ISD::SETEQ; // silence warning. + assert(0 && "Unknown compare instruction"); + } + + CaseBlock CB(Condition, BOp->getOperand(0), + BOp->getOperand(1), NULL, TBB, FBB, CurBB); + SwitchCases.push_back(CB); + return; + } + } + + // Create a CaseBlock record representing this branch. + CaseBlock CB(ISD::SETEQ, Cond, ConstantInt::getTrue(), + NULL, TBB, FBB, CurBB); + SwitchCases.push_back(CB); +} + +/// FindMergedConditions - If Cond is an expression like +void SelectionDAGLowering::FindMergedConditions(Value *Cond, + MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + MachineBasicBlock *CurBB, + unsigned Opc) { + // If this node is not part of the or/and tree, emit it as a branch. + Instruction *BOp = dyn_cast(Cond); + if (!BOp || !(isa(BOp) || isa(BOp)) || + (unsigned)BOp->getOpcode() != Opc || !BOp->hasOneUse() || + BOp->getParent() != CurBB->getBasicBlock() || + !InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) || + !InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) { + EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB); + return; + } + + // Create TmpBB after CurBB. + MachineFunction::iterator BBI = CurBB; + MachineFunction &MF = DAG.getMachineFunction(); + MachineBasicBlock *TmpBB = MF.CreateMachineBasicBlock(CurBB->getBasicBlock()); + CurBB->getParent()->insert(++BBI, TmpBB); + + if (Opc == Instruction::Or) { + // Codegen X | Y as: + // jmp_if_X TBB + // jmp TmpBB + // TmpBB: + // jmp_if_Y TBB + // jmp FBB + // + + // Emit the LHS condition. + FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, Opc); + + // Emit the RHS condition into TmpBB. + FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, Opc); + } else { + assert(Opc == Instruction::And && "Unknown merge op!"); + // Codegen X & Y as: + // jmp_if_X TmpBB + // jmp FBB + // TmpBB: + // jmp_if_Y TBB + // jmp FBB + // + // This requires creation of TmpBB after CurBB. + + // Emit the LHS condition. + FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, Opc); + + // Emit the RHS condition into TmpBB. + FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, Opc); + } +} + +/// If the set of cases should be emitted as a series of branches, return true. +/// If we should emit this as a bunch of and/or'd together conditions, return +/// false. +bool +SelectionDAGLowering::ShouldEmitAsBranches(const std::vector &Cases){ + if (Cases.size() != 2) return true; + + // If this is two comparisons of the same values or'd or and'd together, they + // will get folded into a single comparison, so don't emit two blocks. + if ((Cases[0].CmpLHS == Cases[1].CmpLHS && + Cases[0].CmpRHS == Cases[1].CmpRHS) || + (Cases[0].CmpRHS == Cases[1].CmpLHS && + Cases[0].CmpLHS == Cases[1].CmpRHS)) { + return false; + } + + return true; +} + +void SelectionDAGLowering::visitBr(BranchInst &I) { + // Update machine-CFG edges. + MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)]; + + // Figure out which block is immediately after the current one. + MachineBasicBlock *NextBlock = 0; + MachineFunction::iterator BBI = CurMBB; + if (++BBI != CurMBB->getParent()->end()) + NextBlock = BBI; + + if (I.isUnconditional()) { + // Update machine-CFG edges. + CurMBB->addSuccessor(Succ0MBB); + + // If this is not a fall-through branch, emit the branch. + if (Succ0MBB != NextBlock) + DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(), + MVT::Other, getControlRoot(), + DAG.getBasicBlock(Succ0MBB))); + return; + } + + // If this condition is one of the special cases we handle, do special stuff + // now. + Value *CondVal = I.getCondition(); + MachineBasicBlock *Succ1MBB = FuncInfo.MBBMap[I.getSuccessor(1)]; + + // If this is a series of conditions that are or'd or and'd together, emit + // this as a sequence of branches instead of setcc's with and/or operations. + // For example, instead of something like: + // cmp A, B + // C = seteq + // cmp D, E + // F = setle + // or C, F + // jnz foo + // Emit: + // cmp A, B + // je foo + // cmp D, E + // jle foo + // + if (BinaryOperator *BOp = dyn_cast(CondVal)) { + if (BOp->hasOneUse() && + (BOp->getOpcode() == Instruction::And || + BOp->getOpcode() == Instruction::Or)) { + FindMergedConditions(BOp, Succ0MBB, Succ1MBB, CurMBB, BOp->getOpcode()); + // If the compares in later blocks need to use values not currently + // exported from this block, export them now. This block should always + // be the first entry. + assert(SwitchCases[0].ThisBB == CurMBB && "Unexpected lowering!"); + + // Allow some cases to be rejected. + if (ShouldEmitAsBranches(SwitchCases)) { + for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i) { + ExportFromCurrentBlock(SwitchCases[i].CmpLHS); + ExportFromCurrentBlock(SwitchCases[i].CmpRHS); + } + + // Emit the branch for this block. + visitSwitchCase(SwitchCases[0]); + SwitchCases.erase(SwitchCases.begin()); + return; + } + + // Okay, we decided not to do this, remove any inserted MBB's and clear + // SwitchCases. + for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i) + CurMBB->getParent()->erase(SwitchCases[i].ThisBB); + + SwitchCases.clear(); + } + } + + // Create a CaseBlock record representing this branch. + CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(), + NULL, Succ0MBB, Succ1MBB, CurMBB); + // Use visitSwitchCase to actually insert the fast branch sequence for this + // cond branch. + visitSwitchCase(CB); +} + +/// visitSwitchCase - Emits the necessary code to represent a single node in +/// the binary search tree resulting from lowering a switch instruction. +void SelectionDAGLowering::visitSwitchCase(CaseBlock &CB) { + SDValue Cond; + SDValue CondLHS = getValue(CB.CmpLHS); + DebugLoc dl = getCurDebugLoc(); + + // Build the setcc now. + if (CB.CmpMHS == NULL) { + // Fold "(X == true)" to X and "(X == false)" to !X to + // handle common cases produced by branch lowering. + if (CB.CmpRHS == ConstantInt::getTrue() && CB.CC == ISD::SETEQ) + Cond = CondLHS; + else if (CB.CmpRHS == ConstantInt::getFalse() && CB.CC == ISD::SETEQ) { + SDValue True = DAG.getConstant(1, CondLHS.getValueType()); + Cond = DAG.getNode(ISD::XOR, dl, CondLHS.getValueType(), CondLHS, True); + } else + Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, getValue(CB.CmpRHS), CB.CC); + } else { + assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now"); + + const APInt& Low = cast(CB.CmpLHS)->getValue(); + const APInt& High = cast(CB.CmpRHS)->getValue(); + + SDValue CmpOp = getValue(CB.CmpMHS); + MVT VT = CmpOp.getValueType(); + + if (cast(CB.CmpLHS)->isMinValue(true)) { + Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, VT), + ISD::SETLE); + } else { + SDValue SUB = DAG.getNode(ISD::SUB, dl, + VT, CmpOp, DAG.getConstant(Low, VT)); + Cond = DAG.getSetCC(dl, MVT::i1, SUB, + DAG.getConstant(High-Low, VT), ISD::SETULE); + } + } + + // Update successor info + CurMBB->addSuccessor(CB.TrueBB); + CurMBB->addSuccessor(CB.FalseBB); + + // Set NextBlock to be the MBB immediately after the current one, if any. + // This is used to avoid emitting unnecessary branches to the next block. + MachineBasicBlock *NextBlock = 0; + MachineFunction::iterator BBI = CurMBB; + if (++BBI != CurMBB->getParent()->end()) + NextBlock = BBI; + + // If the lhs block is the next block, invert the condition so that we can + // fall through to the lhs instead of the rhs block. + if (CB.TrueBB == NextBlock) { + std::swap(CB.TrueBB, CB.FalseBB); + SDValue True = DAG.getConstant(1, Cond.getValueType()); + Cond = DAG.getNode(ISD::XOR, dl, Cond.getValueType(), Cond, True); + } + SDValue BrCond = DAG.getNode(ISD::BRCOND, dl, + MVT::Other, getControlRoot(), Cond, + DAG.getBasicBlock(CB.TrueBB)); + + // If the branch was constant folded, fix up the CFG. + if (BrCond.getOpcode() == ISD::BR) { + CurMBB->removeSuccessor(CB.FalseBB); + DAG.setRoot(BrCond); + } else { + // Otherwise, go ahead and insert the false branch. + if (BrCond == getControlRoot()) + CurMBB->removeSuccessor(CB.TrueBB); + + if (CB.FalseBB == NextBlock) + DAG.setRoot(BrCond); + else + DAG.setRoot(DAG.getNode(ISD::BR, dl, MVT::Other, BrCond, + DAG.getBasicBlock(CB.FalseBB))); + } +} + +/// visitJumpTable - Emit JumpTable node in the current MBB +void SelectionDAGLowering::visitJumpTable(JumpTable &JT) { + // Emit the code for the jump table + assert(JT.Reg != -1U && "Should lower JT Header first!"); + MVT PTy = TLI.getPointerTy(); + SDValue Index = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(), + JT.Reg, PTy); + SDValue Table = DAG.getJumpTable(JT.JTI, PTy); + DAG.setRoot(DAG.getNode(ISD::BR_JT, getCurDebugLoc(), + MVT::Other, Index.getValue(1), + Table, Index)); +} + +/// visitJumpTableHeader - This function emits necessary code to produce index +/// in the JumpTable from switch case. +void SelectionDAGLowering::visitJumpTableHeader(JumpTable &JT, + JumpTableHeader &JTH) { + // Subtract the lowest switch case value from the value being switched on and + // conditional branch to default mbb if the result is greater than the + // difference between smallest and largest cases. + SDValue SwitchOp = getValue(JTH.SValue); + MVT VT = SwitchOp.getValueType(); + SDValue SUB = DAG.getNode(ISD::SUB, getCurDebugLoc(), VT, SwitchOp, + DAG.getConstant(JTH.First, VT)); + + // The SDNode we just created, which holds the value being switched on minus + // the the smallest case value, needs to be copied to a virtual register so it + // can be used as an index into the jump table in a subsequent basic block. + // This value may be smaller or larger than the target's pointer type, and + // therefore require extension or truncating. + if (VT.bitsGT(TLI.getPointerTy())) + SwitchOp = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), + TLI.getPointerTy(), SUB); + else + SwitchOp = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), + TLI.getPointerTy(), SUB); + + unsigned JumpTableReg = FuncInfo.MakeReg(TLI.getPointerTy()); + SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(), + JumpTableReg, SwitchOp); + JT.Reg = JumpTableReg; + + // Emit the range check for the jump table, and branch to the default block + // for the switch statement if the value being switched on exceeds the largest + // case in the switch. + SDValue CMP = DAG.getSetCC(getCurDebugLoc(), + TLI.getSetCCResultType(SUB.getValueType()), SUB, + DAG.getConstant(JTH.Last-JTH.First,VT), + ISD::SETUGT); + + // Set NextBlock to be the MBB immediately after the current one, if any. + // This is used to avoid emitting unnecessary branches to the next block. + MachineBasicBlock *NextBlock = 0; + MachineFunction::iterator BBI = CurMBB; + if (++BBI != CurMBB->getParent()->end()) + NextBlock = BBI; + + SDValue BrCond = DAG.getNode(ISD::BRCOND, getCurDebugLoc(), + MVT::Other, CopyTo, CMP, + DAG.getBasicBlock(JT.Default)); + + if (JT.MBB == NextBlock) + DAG.setRoot(BrCond); + else + DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, BrCond, + DAG.getBasicBlock(JT.MBB))); +} + +/// visitBitTestHeader - This function emits necessary code to produce value +/// suitable for "bit tests" +void SelectionDAGLowering::visitBitTestHeader(BitTestBlock &B) { + // Subtract the minimum value + SDValue SwitchOp = getValue(B.SValue); + MVT VT = SwitchOp.getValueType(); + SDValue SUB = DAG.getNode(ISD::SUB, getCurDebugLoc(), VT, SwitchOp, + DAG.getConstant(B.First, VT)); + + // Check range + SDValue RangeCmp = DAG.getSetCC(getCurDebugLoc(), + TLI.getSetCCResultType(SUB.getValueType()), + SUB, DAG.getConstant(B.Range, VT), + ISD::SETUGT); + + SDValue ShiftOp; + if (VT.bitsGT(TLI.getPointerTy())) + ShiftOp = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), + TLI.getPointerTy(), SUB); + else + ShiftOp = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), + TLI.getPointerTy(), SUB); + + B.Reg = FuncInfo.MakeReg(TLI.getPointerTy()); + SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(), + B.Reg, ShiftOp); + + // Set NextBlock to be the MBB immediately after the current one, if any. + // This is used to avoid emitting unnecessary branches to the next block. + MachineBasicBlock *NextBlock = 0; + MachineFunction::iterator BBI = CurMBB; + if (++BBI != CurMBB->getParent()->end()) + NextBlock = BBI; + + MachineBasicBlock* MBB = B.Cases[0].ThisBB; + + CurMBB->addSuccessor(B.Default); + CurMBB->addSuccessor(MBB); + + SDValue BrRange = DAG.getNode(ISD::BRCOND, getCurDebugLoc(), + MVT::Other, CopyTo, RangeCmp, + DAG.getBasicBlock(B.Default)); + + if (MBB == NextBlock) + DAG.setRoot(BrRange); + else + DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, CopyTo, + DAG.getBasicBlock(MBB))); +} + +/// visitBitTestCase - this function produces one "bit test" +void SelectionDAGLowering::visitBitTestCase(MachineBasicBlock* NextMBB, + unsigned Reg, + BitTestCase &B) { + // Make desired shift + SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(), Reg, + TLI.getPointerTy()); + SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurDebugLoc(), + TLI.getPointerTy(), + DAG.getConstant(1, TLI.getPointerTy()), + ShiftOp); + + // Emit bit tests and jumps + SDValue AndOp = DAG.getNode(ISD::AND, getCurDebugLoc(), + TLI.getPointerTy(), SwitchVal, + DAG.getConstant(B.Mask, TLI.getPointerTy())); + SDValue AndCmp = DAG.getSetCC(getCurDebugLoc(), + TLI.getSetCCResultType(AndOp.getValueType()), + AndOp, DAG.getConstant(0, TLI.getPointerTy()), + ISD::SETNE); + + CurMBB->addSuccessor(B.TargetBB); + CurMBB->addSuccessor(NextMBB); + + SDValue BrAnd = DAG.getNode(ISD::BRCOND, getCurDebugLoc(), + MVT::Other, getControlRoot(), + AndCmp, DAG.getBasicBlock(B.TargetBB)); + + // Set NextBlock to be the MBB immediately after the current one, if any. + // This is used to avoid emitting unnecessary branches to the next block. + MachineBasicBlock *NextBlock = 0; + MachineFunction::iterator BBI = CurMBB; + if (++BBI != CurMBB->getParent()->end()) + NextBlock = BBI; + + if (NextMBB == NextBlock) + DAG.setRoot(BrAnd); + else + DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, BrAnd, + DAG.getBasicBlock(NextMBB))); +} + +void SelectionDAGLowering::visitInvoke(InvokeInst &I) { + // Retrieve successors. + MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)]; + MachineBasicBlock *LandingPad = FuncInfo.MBBMap[I.getSuccessor(1)]; + + const Value *Callee(I.getCalledValue()); + if (isa(Callee)) + visitInlineAsm(&I); + else + LowerCallTo(&I, getValue(Callee), false, LandingPad); + + // If the value of the invoke is used outside of its defining block, make it + // available as a virtual register. + CopyToExportRegsIfNeeded(&I); + + // Update successor info + CurMBB->addSuccessor(Return); + CurMBB->addSuccessor(LandingPad); + + // Drop into normal successor. + DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(), + MVT::Other, getControlRoot(), + DAG.getBasicBlock(Return))); +} + +void SelectionDAGLowering::visitUnwind(UnwindInst &I) { +} + +/// handleSmallSwitchCaseRange - Emit a series of specific tests (suitable for +/// small case ranges). +bool SelectionDAGLowering::handleSmallSwitchRange(CaseRec& CR, + CaseRecVector& WorkList, + Value* SV, + MachineBasicBlock* Default) { + Case& BackCase = *(CR.Range.second-1); + + // Size is the number of Cases represented by this range. + size_t Size = CR.Range.second - CR.Range.first; + if (Size > 3) + return false; + + // Get the MachineFunction which holds the current MBB. This is used when + // inserting any additional MBBs necessary to represent the switch. + MachineFunction *CurMF = CurMBB->getParent(); + + // Figure out which block is immediately after the current one. + MachineBasicBlock *NextBlock = 0; + MachineFunction::iterator BBI = CR.CaseBB; + + if (++BBI != CurMBB->getParent()->end()) + NextBlock = BBI; + + // TODO: If any two of the cases has the same destination, and if one value + // is the same as the other, but has one bit unset that the other has set, + // use bit manipulation to do two compares at once. For example: + // "if (X == 6 || X == 4)" -> "if ((X|2) == 6)" + + // Rearrange the case blocks so that the last one falls through if possible. + if (NextBlock && Default != NextBlock && BackCase.BB != NextBlock) { + // The last case block won't fall through into 'NextBlock' if we emit the + // branches in this order. See if rearranging a case value would help. + for (CaseItr I = CR.Range.first, E = CR.Range.second-1; I != E; ++I) { + if (I->BB == NextBlock) { + std::swap(*I, BackCase); + break; + } + } + } + + // Create a CaseBlock record representing a conditional branch to + // the Case's target mbb if the value being switched on SV is equal + // to C. + MachineBasicBlock *CurBlock = CR.CaseBB; + for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) { + MachineBasicBlock *FallThrough; + if (I != E-1) { + FallThrough = CurMF->CreateMachineBasicBlock(CurBlock->getBasicBlock()); + CurMF->insert(BBI, FallThrough); + + // Put SV in a virtual register to make it available from the new blocks. + ExportFromCurrentBlock(SV); + } else { + // If the last case doesn't match, go to the default block. + FallThrough = Default; + } + + Value *RHS, *LHS, *MHS; + ISD::CondCode CC; + if (I->High == I->Low) { + // This is just small small case range :) containing exactly 1 case + CC = ISD::SETEQ; + LHS = SV; RHS = I->High; MHS = NULL; + } else { + CC = ISD::SETLE; + LHS = I->Low; MHS = SV; RHS = I->High; + } + CaseBlock CB(CC, LHS, RHS, MHS, I->BB, FallThrough, CurBlock); + + // If emitting the first comparison, just call visitSwitchCase to emit the + // code into the current block. Otherwise, push the CaseBlock onto the + // vector to be later processed by SDISel, and insert the node's MBB + // before the next MBB. + if (CurBlock == CurMBB) + visitSwitchCase(CB); + else + SwitchCases.push_back(CB); + + CurBlock = FallThrough; + } + + return true; +} + +static inline bool areJTsAllowed(const TargetLowering &TLI) { + return !DisableJumpTables && + (TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || + TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other)); +} + +static APInt ComputeRange(const APInt &First, const APInt &Last) { + APInt LastExt(Last), FirstExt(First); + uint32_t BitWidth = std::max(Last.getBitWidth(), First.getBitWidth()) + 1; + LastExt.sext(BitWidth); FirstExt.sext(BitWidth); + return (LastExt - FirstExt + 1ULL); +} + +/// handleJTSwitchCase - Emit jumptable for current switch case range +bool SelectionDAGLowering::handleJTSwitchCase(CaseRec& CR, + CaseRecVector& WorkList, + Value* SV, + MachineBasicBlock* Default) { + Case& FrontCase = *CR.Range.first; + Case& BackCase = *(CR.Range.second-1); + + const APInt& First = cast(FrontCase.Low)->getValue(); + const APInt& Last = cast(BackCase.High)->getValue(); + + size_t TSize = 0; + for (CaseItr I = CR.Range.first, E = CR.Range.second; + I!=E; ++I) + TSize += I->size(); + + if (!areJTsAllowed(TLI) || TSize <= 3) + return false; + + APInt Range = ComputeRange(First, Last); + double Density = (double)TSize / Range.roundToDouble(); + if (Density < 0.4) + return false; + + DEBUG(errs() << "Lowering jump table\n" + << "First entry: " << First << ". Last entry: " << Last << '\n' + << "Range: " << Range + << "Size: " << TSize << ". Density: " << Density << "\n\n"); + + // Get the MachineFunction which holds the current MBB. This is used when + // inserting any additional MBBs necessary to represent the switch. + MachineFunction *CurMF = CurMBB->getParent(); + + // Figure out which block is immediately after the current one. + MachineBasicBlock *NextBlock = 0; + MachineFunction::iterator BBI = CR.CaseBB; + + if (++BBI != CurMBB->getParent()->end()) + NextBlock = BBI; + + const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock(); + + // Create a new basic block to hold the code for loading the address + // of the jump table, and jumping to it. Update successor information; + // we will either branch to the default case for the switch, or the jump + // table. + MachineBasicBlock *JumpTableBB = CurMF->CreateMachineBasicBlock(LLVMBB); + CurMF->insert(BBI, JumpTableBB); + CR.CaseBB->addSuccessor(Default); + CR.CaseBB->addSuccessor(JumpTableBB); + + // Build a vector of destination BBs, corresponding to each target + // of the jump table. If the value of the jump table slot corresponds to + // a case statement, push the case's BB onto the vector, otherwise, push + // the default BB. + std::vector DestBBs; + APInt TEI = First; + for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++TEI) { + const APInt& Low = cast(I->Low)->getValue(); + const APInt& High = cast(I->High)->getValue(); + + if (Low.sle(TEI) && TEI.sle(High)) { + DestBBs.push_back(I->BB); + if (TEI==High) + ++I; + } else { + DestBBs.push_back(Default); + } + } + + // Update successor info. Add one edge to each unique successor. + BitVector SuccsHandled(CR.CaseBB->getParent()->getNumBlockIDs()); + for (std::vector::iterator I = DestBBs.begin(), + E = DestBBs.end(); I != E; ++I) { + if (!SuccsHandled[(*I)->getNumber()]) { + SuccsHandled[(*I)->getNumber()] = true; + JumpTableBB->addSuccessor(*I); + } + } + + // Create a jump table index for this jump table, or return an existing + // one. + unsigned JTI = CurMF->getJumpTableInfo()->getJumpTableIndex(DestBBs); + + // Set the jump table information so that we can codegen it as a second + // MachineBasicBlock + JumpTable JT(-1U, JTI, JumpTableBB, Default); + JumpTableHeader JTH(First, Last, SV, CR.CaseBB, (CR.CaseBB == CurMBB)); + if (CR.CaseBB == CurMBB) + visitJumpTableHeader(JT, JTH); + + JTCases.push_back(JumpTableBlock(JTH, JT)); + + return true; +} + +/// handleBTSplitSwitchCase - emit comparison and split binary search tree into +/// 2 subtrees. +bool SelectionDAGLowering::handleBTSplitSwitchCase(CaseRec& CR, + CaseRecVector& WorkList, + Value* SV, + MachineBasicBlock* Default) { + // Get the MachineFunction which holds the current MBB. This is used when + // inserting any additional MBBs necessary to represent the switch. + MachineFunction *CurMF = CurMBB->getParent(); + + // Figure out which block is immediately after the current one. + MachineBasicBlock *NextBlock = 0; + MachineFunction::iterator BBI = CR.CaseBB; + + if (++BBI != CurMBB->getParent()->end()) + NextBlock = BBI; + + Case& FrontCase = *CR.Range.first; + Case& BackCase = *(CR.Range.second-1); + const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock(); + + // Size is the number of Cases represented by this range. + unsigned Size = CR.Range.second - CR.Range.first; + + const APInt& First = cast(FrontCase.Low)->getValue(); + const APInt& Last = cast(BackCase.High)->getValue(); + double FMetric = 0; + CaseItr Pivot = CR.Range.first + Size/2; + + // Select optimal pivot, maximizing sum density of LHS and RHS. This will + // (heuristically) allow us to emit JumpTable's later. + size_t TSize = 0; + for (CaseItr I = CR.Range.first, E = CR.Range.second; + I!=E; ++I) + TSize += I->size(); + + size_t LSize = FrontCase.size(); + size_t RSize = TSize-LSize; + DEBUG(errs() << "Selecting best pivot: \n" + << "First: " << First << ", Last: " << Last <<'\n' + << "LSize: " << LSize << ", RSize: " << RSize << '\n'); + for (CaseItr I = CR.Range.first, J=I+1, E = CR.Range.second; + J!=E; ++I, ++J) { + const APInt& LEnd = cast(I->High)->getValue(); + const APInt& RBegin = cast(J->Low)->getValue(); + APInt Range = ComputeRange(LEnd, RBegin); + assert((Range - 2ULL).isNonNegative() && + "Invalid case distance"); + double LDensity = (double)LSize / (LEnd - First + 1ULL).roundToDouble(); + double RDensity = (double)RSize / (Last - RBegin + 1ULL).roundToDouble(); + double Metric = Range.logBase2()*(LDensity+RDensity); + // Should always split in some non-trivial place + DEBUG(errs() <<"=>Step\n" + << "LEnd: " << LEnd << ", RBegin: " << RBegin << '\n' + << "LDensity: " << LDensity + << ", RDensity: " << RDensity << '\n' + << "Metric: " << Metric << '\n'); + if (FMetric < Metric) { + Pivot = J; + FMetric = Metric; + DEBUG(errs() << "Current metric set to: " << FMetric << '\n'); + } + + LSize += J->size(); + RSize -= J->size(); + } + if (areJTsAllowed(TLI)) { + // If our case is dense we *really* should handle it earlier! + assert((FMetric > 0) && "Should handle dense range earlier!"); + } else { + Pivot = CR.Range.first + Size/2; + } + + CaseRange LHSR(CR.Range.first, Pivot); + CaseRange RHSR(Pivot, CR.Range.second); + Constant *C = Pivot->Low; + MachineBasicBlock *FalseBB = 0, *TrueBB = 0; + + // We know that we branch to the LHS if the Value being switched on is + // less than the Pivot value, C. We use this to optimize our binary + // tree a bit, by recognizing that if SV is greater than or equal to the + // LHS's Case Value, and that Case Value is exactly one less than the + // Pivot's Value, then we can branch directly to the LHS's Target, + // rather than creating a leaf node for it. + if ((LHSR.second - LHSR.first) == 1 && + LHSR.first->High == CR.GE && + cast(C)->getValue() == + (cast(CR.GE)->getValue() + 1LL)) { + TrueBB = LHSR.first->BB; + } else { + TrueBB = CurMF->CreateMachineBasicBlock(LLVMBB); + CurMF->insert(BBI, TrueBB); + WorkList.push_back(CaseRec(TrueBB, C, CR.GE, LHSR)); + + // Put SV in a virtual register to make it available from the new blocks. + ExportFromCurrentBlock(SV); + } + + // Similar to the optimization above, if the Value being switched on is + // known to be less than the Constant CR.LT, and the current Case Value + // is CR.LT - 1, then we can branch directly to the target block for + // the current Case Value, rather than emitting a RHS leaf node for it. + if ((RHSR.second - RHSR.first) == 1 && CR.LT && + cast(RHSR.first->Low)->getValue() == + (cast(CR.LT)->getValue() - 1LL)) { + FalseBB = RHSR.first->BB; + } else { + FalseBB = CurMF->CreateMachineBasicBlock(LLVMBB); + CurMF->insert(BBI, FalseBB); + WorkList.push_back(CaseRec(FalseBB,CR.LT,C,RHSR)); + + // Put SV in a virtual register to make it available from the new blocks. + ExportFromCurrentBlock(SV); + } + + // Create a CaseBlock record representing a conditional branch to + // the LHS node if the value being switched on SV is less than C. + // Otherwise, branch to LHS. + CaseBlock CB(ISD::SETLT, SV, C, NULL, TrueBB, FalseBB, CR.CaseBB); + + if (CR.CaseBB == CurMBB) + visitSwitchCase(CB); + else + SwitchCases.push_back(CB); + + return true; +} + +/// handleBitTestsSwitchCase - if current case range has few destination and +/// range span less, than machine word bitwidth, encode case range into series +/// of masks and emit bit tests with these masks. +bool SelectionDAGLowering::handleBitTestsSwitchCase(CaseRec& CR, + CaseRecVector& WorkList, + Value* SV, + MachineBasicBlock* Default){ + unsigned IntPtrBits = TLI.getPointerTy().getSizeInBits(); + + Case& FrontCase = *CR.Range.first; + Case& BackCase = *(CR.Range.second-1); + + // Get the MachineFunction which holds the current MBB. This is used when + // inserting any additional MBBs necessary to represent the switch. + MachineFunction *CurMF = CurMBB->getParent(); + + // If target does not have legal shift left, do not emit bit tests at all. + if (!TLI.isOperationLegal(ISD::SHL, TLI.getPointerTy())) + return false; + + size_t numCmps = 0; + for (CaseItr I = CR.Range.first, E = CR.Range.second; + I!=E; ++I) { + // Single case counts one, case range - two. + numCmps += (I->Low == I->High ? 1 : 2); + } + + // Count unique destinations + SmallSet Dests; + for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) { + Dests.insert(I->BB); + if (Dests.size() > 3) + // Don't bother the code below, if there are too much unique destinations + return false; + } + DEBUG(errs() << "Total number of unique destinations: " << Dests.size() << '\n' + << "Total number of comparisons: " << numCmps << '\n'); + + // Compute span of values. + const APInt& minValue = cast(FrontCase.Low)->getValue(); + const APInt& maxValue = cast(BackCase.High)->getValue(); + APInt cmpRange = maxValue - minValue; + + DEBUG(errs() << "Compare range: " << cmpRange << '\n' + << "Low bound: " << minValue << '\n' + << "High bound: " << maxValue << '\n'); + + if (cmpRange.uge(APInt(cmpRange.getBitWidth(), IntPtrBits)) || + (!(Dests.size() == 1 && numCmps >= 3) && + !(Dests.size() == 2 && numCmps >= 5) && + !(Dests.size() >= 3 && numCmps >= 6))) + return false; + + DEBUG(errs() << "Emitting bit tests\n"); + APInt lowBound = APInt::getNullValue(cmpRange.getBitWidth()); + + // Optimize the case where all the case values fit in a + // word without having to subtract minValue. In this case, + // we can optimize away the subtraction. + if (minValue.isNonNegative() && + maxValue.slt(APInt(maxValue.getBitWidth(), IntPtrBits))) { + cmpRange = maxValue; + } else { + lowBound = minValue; + } + + CaseBitsVector CasesBits; + unsigned i, count = 0; + + for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) { + MachineBasicBlock* Dest = I->BB; + for (i = 0; i < count; ++i) + if (Dest == CasesBits[i].BB) + break; + + if (i == count) { + assert((count < 3) && "Too much destinations to test!"); + CasesBits.push_back(CaseBits(0, Dest, 0)); + count++; + } + + const APInt& lowValue = cast(I->Low)->getValue(); + const APInt& highValue = cast(I->High)->getValue(); + + uint64_t lo = (lowValue - lowBound).getZExtValue(); + uint64_t hi = (highValue - lowBound).getZExtValue(); + + for (uint64_t j = lo; j <= hi; j++) { + CasesBits[i].Mask |= 1ULL << j; + CasesBits[i].Bits++; + } + + } + std::sort(CasesBits.begin(), CasesBits.end(), CaseBitsCmp()); + + BitTestInfo BTC; + + // Figure out which block is immediately after the current one. + MachineFunction::iterator BBI = CR.CaseBB; + ++BBI; + + const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock(); + + DEBUG(errs() << "Cases:\n"); + for (unsigned i = 0, e = CasesBits.size(); i!=e; ++i) { + DEBUG(errs() << "Mask: " << CasesBits[i].Mask + << ", Bits: " << CasesBits[i].Bits + << ", BB: " << CasesBits[i].BB << '\n'); + + MachineBasicBlock *CaseBB = CurMF->CreateMachineBasicBlock(LLVMBB); + CurMF->insert(BBI, CaseBB); + BTC.push_back(BitTestCase(CasesBits[i].Mask, + CaseBB, + CasesBits[i].BB)); + + // Put SV in a virtual register to make it available from the new blocks. + ExportFromCurrentBlock(SV); + } + + BitTestBlock BTB(lowBound, cmpRange, SV, + -1U, (CR.CaseBB == CurMBB), + CR.CaseBB, Default, BTC); + + if (CR.CaseBB == CurMBB) + visitBitTestHeader(BTB); + + BitTestCases.push_back(BTB); + + return true; +} + + +/// Clusterify - Transform simple list of Cases into list of CaseRange's +size_t SelectionDAGLowering::Clusterify(CaseVector& Cases, + const SwitchInst& SI) { + size_t numCmps = 0; + + // Start with "simple" cases + for (size_t i = 1; i < SI.getNumSuccessors(); ++i) { + MachineBasicBlock *SMBB = FuncInfo.MBBMap[SI.getSuccessor(i)]; + Cases.push_back(Case(SI.getSuccessorValue(i), + SI.getSuccessorValue(i), + SMBB)); + } + std::sort(Cases.begin(), Cases.end(), CaseCmp()); + + // Merge case into clusters + if (Cases.size() >= 2) + // Must recompute end() each iteration because it may be + // invalidated by erase if we hold on to it + for (CaseItr I = Cases.begin(), J = ++(Cases.begin()); J != Cases.end(); ) { + const APInt& nextValue = cast(J->Low)->getValue(); + const APInt& currentValue = cast(I->High)->getValue(); + MachineBasicBlock* nextBB = J->BB; + MachineBasicBlock* currentBB = I->BB; + + // If the two neighboring cases go to the same destination, merge them + // into a single case. + if ((nextValue - currentValue == 1) && (currentBB == nextBB)) { + I->High = J->High; + J = Cases.erase(J); + } else { + I = J++; + } + } + + for (CaseItr I=Cases.begin(), E=Cases.end(); I!=E; ++I, ++numCmps) { + if (I->Low != I->High) + // A range counts double, since it requires two compares. + ++numCmps; + } + + return numCmps; +} + +void SelectionDAGLowering::visitSwitch(SwitchInst &SI) { + // Figure out which block is immediately after the current one. + MachineBasicBlock *NextBlock = 0; + MachineFunction::iterator BBI = CurMBB; + + MachineBasicBlock *Default = FuncInfo.MBBMap[SI.getDefaultDest()]; + + // If there is only the default destination, branch to it if it is not the + // next basic block. Otherwise, just fall through. + if (SI.getNumOperands() == 2) { + // Update machine-CFG edges. + + // If this is not a fall-through branch, emit the branch. + CurMBB->addSuccessor(Default); + if (Default != NextBlock) + DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(), + MVT::Other, getControlRoot(), + DAG.getBasicBlock(Default))); + return; + } + + // If there are any non-default case statements, create a vector of Cases + // representing each one, and sort the vector so that we can efficiently + // create a binary search tree from them. + CaseVector Cases; + size_t numCmps = Clusterify(Cases, SI); + DEBUG(errs() << "Clusterify finished. Total clusters: " << Cases.size() + << ". Total compares: " << numCmps << '\n'); + numCmps = 0; + + // Get the Value to be switched on and default basic blocks, which will be + // inserted into CaseBlock records, representing basic blocks in the binary + // search tree. + Value *SV = SI.getOperand(0); + + // Push the initial CaseRec onto the worklist + CaseRecVector WorkList; + WorkList.push_back(CaseRec(CurMBB,0,0,CaseRange(Cases.begin(),Cases.end()))); + + while (!WorkList.empty()) { + // Grab a record representing a case range to process off the worklist + CaseRec CR = WorkList.back(); + WorkList.pop_back(); + + if (handleBitTestsSwitchCase(CR, WorkList, SV, Default)) + continue; + + // If the range has few cases (two or less) emit a series of specific + // tests. + if (handleSmallSwitchRange(CR, WorkList, SV, Default)) + continue; + + // If the switch has more than 5 blocks, and at least 40% dense, and the + // target supports indirect branches, then emit a jump table rather than + // lowering the switch to a binary tree of conditional branches. + if (handleJTSwitchCase(CR, WorkList, SV, Default)) + continue; + + // Emit binary tree. We need to pick a pivot, and push left and right ranges + // onto the worklist. Leafs are handled via handleSmallSwitchRange() call. + handleBTSplitSwitchCase(CR, WorkList, SV, Default); + } +} + + +void SelectionDAGLowering::visitSub(User &I) { + // -0.0 - X --> fneg + const Type *Ty = I.getType(); + if (isa(Ty)) { + if (ConstantVector *CV = dyn_cast(I.getOperand(0))) { + const VectorType *DestTy = cast(I.getType()); + const Type *ElTy = DestTy->getElementType(); + if (ElTy->isFloatingPoint()) { + unsigned VL = DestTy->getNumElements(); + std::vector NZ(VL, ConstantFP::getNegativeZero(ElTy)); + Constant *CNZ = ConstantVector::get(&NZ[0], NZ.size()); + if (CV == CNZ) { + SDValue Op2 = getValue(I.getOperand(1)); + setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(), + Op2.getValueType(), Op2)); + return; + } + } + } + } + if (Ty->isFloatingPoint()) { + if (ConstantFP *CFP = dyn_cast(I.getOperand(0))) + if (CFP->isExactlyValue(ConstantFP::getNegativeZero(Ty)->getValueAPF())) { + SDValue Op2 = getValue(I.getOperand(1)); + setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(), + Op2.getValueType(), Op2)); + return; + } + } + + visitBinary(I, Ty->isFPOrFPVector() ? ISD::FSUB : ISD::SUB); +} + +void SelectionDAGLowering::visitBinary(User &I, unsigned OpCode) { + SDValue Op1 = getValue(I.getOperand(0)); + SDValue Op2 = getValue(I.getOperand(1)); + + setValue(&I, DAG.getNode(OpCode, getCurDebugLoc(), + Op1.getValueType(), Op1, Op2)); +} + +void SelectionDAGLowering::visitShift(User &I, unsigned Opcode) { + SDValue Op1 = getValue(I.getOperand(0)); + SDValue Op2 = getValue(I.getOperand(1)); + if (!isa(I.getType()) && + Op2.getValueType() != TLI.getShiftAmountTy()) { + // If the operand is smaller than the shift count type, promote it. + if (TLI.getShiftAmountTy().bitsGT(Op2.getValueType())) + Op2 = DAG.getNode(ISD::ANY_EXTEND, getCurDebugLoc(), + TLI.getShiftAmountTy(), Op2); + // If the operand is larger than the shift count type but the shift + // count type has enough bits to represent any shift value, truncate + // it now. This is a common case and it exposes the truncate to + // optimization early. + else if (TLI.getShiftAmountTy().getSizeInBits() >= + Log2_32_Ceil(Op2.getValueType().getSizeInBits())) + Op2 = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), + TLI.getShiftAmountTy(), Op2); + // Otherwise we'll need to temporarily settle for some other + // convenient type; type legalization will make adjustments as + // needed. + else if (TLI.getPointerTy().bitsLT(Op2.getValueType())) + Op2 = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), + TLI.getPointerTy(), Op2); + else if (TLI.getPointerTy().bitsGT(Op2.getValueType())) + Op2 = DAG.getNode(ISD::ANY_EXTEND, getCurDebugLoc(), + TLI.getPointerTy(), Op2); + } + + setValue(&I, DAG.getNode(Opcode, getCurDebugLoc(), + Op1.getValueType(), Op1, Op2)); +} + +void SelectionDAGLowering::visitICmp(User &I) { + ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE; + if (ICmpInst *IC = dyn_cast(&I)) + predicate = IC->getPredicate(); + else if (ConstantExpr *IC = dyn_cast(&I)) + predicate = ICmpInst::Predicate(IC->getPredicate()); + SDValue Op1 = getValue(I.getOperand(0)); + SDValue Op2 = getValue(I.getOperand(1)); + ISD::CondCode Opcode = getICmpCondCode(predicate); + setValue(&I, DAG.getSetCC(getCurDebugLoc(),MVT::i1, Op1, Op2, Opcode)); +} + +void SelectionDAGLowering::visitFCmp(User &I) { + FCmpInst::Predicate predicate = FCmpInst::BAD_FCMP_PREDICATE; + if (FCmpInst *FC = dyn_cast(&I)) + predicate = FC->getPredicate(); + else if (ConstantExpr *FC = dyn_cast(&I)) + predicate = FCmpInst::Predicate(FC->getPredicate()); + SDValue Op1 = getValue(I.getOperand(0)); + SDValue Op2 = getValue(I.getOperand(1)); + ISD::CondCode Condition = getFCmpCondCode(predicate); + setValue(&I, DAG.getSetCC(getCurDebugLoc(), MVT::i1, Op1, Op2, Condition)); +} + +void SelectionDAGLowering::visitVICmp(User &I) { + ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE; + if (VICmpInst *IC = dyn_cast(&I)) + predicate = IC->getPredicate(); + else if (ConstantExpr *IC = dyn_cast(&I)) + predicate = ICmpInst::Predicate(IC->getPredicate()); + SDValue Op1 = getValue(I.getOperand(0)); + SDValue Op2 = getValue(I.getOperand(1)); + ISD::CondCode Opcode = getICmpCondCode(predicate); + setValue(&I, DAG.getVSetCC(getCurDebugLoc(), Op1.getValueType(), + Op1, Op2, Opcode)); +} + +void SelectionDAGLowering::visitVFCmp(User &I) { + FCmpInst::Predicate predicate = FCmpInst::BAD_FCMP_PREDICATE; + if (VFCmpInst *FC = dyn_cast(&I)) + predicate = FC->getPredicate(); + else if (ConstantExpr *FC = dyn_cast(&I)) + predicate = FCmpInst::Predicate(FC->getPredicate()); + SDValue Op1 = getValue(I.getOperand(0)); + SDValue Op2 = getValue(I.getOperand(1)); + ISD::CondCode Condition = getFCmpCondCode(predicate); + MVT DestVT = TLI.getValueType(I.getType()); + + setValue(&I, DAG.getVSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Condition)); +} + +void SelectionDAGLowering::visitSelect(User &I) { + SmallVector ValueVTs; + ComputeValueVTs(TLI, I.getType(), ValueVTs); + unsigned NumValues = ValueVTs.size(); + if (NumValues != 0) { + SmallVector Values(NumValues); + SDValue Cond = getValue(I.getOperand(0)); + SDValue TrueVal = getValue(I.getOperand(1)); + SDValue FalseVal = getValue(I.getOperand(2)); + + for (unsigned i = 0; i != NumValues; ++i) + Values[i] = DAG.getNode(ISD::SELECT, getCurDebugLoc(), + TrueVal.getValueType(), Cond, + SDValue(TrueVal.getNode(), TrueVal.getResNo() + i), + SDValue(FalseVal.getNode(), FalseVal.getResNo() + i)); + + setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(), + DAG.getVTList(&ValueVTs[0], NumValues), + &Values[0], NumValues)); + } +} + + +void SelectionDAGLowering::visitTrunc(User &I) { + // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest). + SDValue N = getValue(I.getOperand(0)); + MVT DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), DestVT, N)); +} + +void SelectionDAGLowering::visitZExt(User &I) { + // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest). + // ZExt also can't be a cast to bool for same reason. So, nothing much to do + SDValue N = getValue(I.getOperand(0)); + MVT DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), DestVT, N)); +} + +void SelectionDAGLowering::visitSExt(User &I) { + // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest). + // SExt also can't be a cast to bool for same reason. So, nothing much to do + SDValue N = getValue(I.getOperand(0)); + MVT DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurDebugLoc(), DestVT, N)); +} + +void SelectionDAGLowering::visitFPTrunc(User &I) { + // FPTrunc is never a no-op cast, no need to check + SDValue N = getValue(I.getOperand(0)); + MVT DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::FP_ROUND, getCurDebugLoc(), + DestVT, N, DAG.getIntPtrConstant(0))); +} + +void SelectionDAGLowering::visitFPExt(User &I){ + // FPTrunc is never a no-op cast, no need to check + SDValue N = getValue(I.getOperand(0)); + MVT DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurDebugLoc(), DestVT, N)); +} + +void SelectionDAGLowering::visitFPToUI(User &I) { + // FPToUI is never a no-op cast, no need to check + SDValue N = getValue(I.getOperand(0)); + MVT DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurDebugLoc(), DestVT, N)); +} + +void SelectionDAGLowering::visitFPToSI(User &I) { + // FPToSI is never a no-op cast, no need to check + SDValue N = getValue(I.getOperand(0)); + MVT DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurDebugLoc(), DestVT, N)); +} + +void SelectionDAGLowering::visitUIToFP(User &I) { + // UIToFP is never a no-op cast, no need to check + SDValue N = getValue(I.getOperand(0)); + MVT DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurDebugLoc(), DestVT, N)); +} + +void SelectionDAGLowering::visitSIToFP(User &I){ + // SIToFP is never a no-op cast, no need to check + SDValue N = getValue(I.getOperand(0)); + MVT DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurDebugLoc(), DestVT, N)); +} + +void SelectionDAGLowering::visitPtrToInt(User &I) { + // What to do depends on the size of the integer and the size of the pointer. + // We can either truncate, zero extend, or no-op, accordingly. + SDValue N = getValue(I.getOperand(0)); + MVT SrcVT = N.getValueType(); + MVT DestVT = TLI.getValueType(I.getType()); + SDValue Result; + if (DestVT.bitsLT(SrcVT)) + Result = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), DestVT, N); + else + // Note: ZERO_EXTEND can handle cases where the sizes are equal too + Result = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), DestVT, N); + setValue(&I, Result); +} + +void SelectionDAGLowering::visitIntToPtr(User &I) { + // What to do depends on the size of the integer and the size of the pointer. + // We can either truncate, zero extend, or no-op, accordingly. + SDValue N = getValue(I.getOperand(0)); + MVT SrcVT = N.getValueType(); + MVT DestVT = TLI.getValueType(I.getType()); + if (DestVT.bitsLT(SrcVT)) + setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), DestVT, N)); + else + // Note: ZERO_EXTEND can handle cases where the sizes are equal too + setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), + DestVT, N)); +} + +void SelectionDAGLowering::visitBitCast(User &I) { + SDValue N = getValue(I.getOperand(0)); + MVT DestVT = TLI.getValueType(I.getType()); + + // BitCast assures us that source and destination are the same size so this + // is either a BIT_CONVERT or a no-op. + if (DestVT != N.getValueType()) + setValue(&I, DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(), + DestVT, N)); // convert types + else + setValue(&I, N); // noop cast. +} + +void SelectionDAGLowering::visitInsertElement(User &I) { + SDValue InVec = getValue(I.getOperand(0)); + SDValue InVal = getValue(I.getOperand(1)); + SDValue InIdx = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), + TLI.getPointerTy(), + getValue(I.getOperand(2))); + + setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurDebugLoc(), + TLI.getValueType(I.getType()), + InVec, InVal, InIdx)); +} + +void SelectionDAGLowering::visitExtractElement(User &I) { + SDValue InVec = getValue(I.getOperand(0)); + SDValue InIdx = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), + TLI.getPointerTy(), + getValue(I.getOperand(1))); + setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(), + TLI.getValueType(I.getType()), InVec, InIdx)); +} + + +// Utility for visitShuffleVector - Returns true if the mask is mask starting +// from SIndx and increasing to the element length (undefs are allowed). +static bool SequentialMask(SmallVectorImpl &Mask, unsigned SIndx) { + unsigned MaskNumElts = Mask.size(); + for (unsigned i = 0; i != MaskNumElts; ++i) + if ((Mask[i] >= 0) && (Mask[i] != (int)(i + SIndx))) + return false; + return true; +} + +void SelectionDAGLowering::visitShuffleVector(User &I) { + SmallVector Mask; + SDValue Src1 = getValue(I.getOperand(0)); + SDValue Src2 = getValue(I.getOperand(1)); + + // Convert the ConstantVector mask operand into an array of ints, with -1 + // representing undef values. + SmallVector MaskElts; + cast(I.getOperand(2))->getVectorElements(MaskElts); + unsigned MaskNumElts = MaskElts.size(); + for (unsigned i = 0; i != MaskNumElts; ++i) { + if (isa(MaskElts[i])) + Mask.push_back(-1); + else + Mask.push_back(cast(MaskElts[i])->getSExtValue()); + } + + MVT VT = TLI.getValueType(I.getType()); + MVT SrcVT = Src1.getValueType(); + unsigned SrcNumElts = SrcVT.getVectorNumElements(); + + if (SrcNumElts == MaskNumElts) { + setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2, + &Mask[0])); + return; + } + + // Normalize the shuffle vector since mask and vector length don't match. + if (SrcNumElts < MaskNumElts && MaskNumElts % SrcNumElts == 0) { + // Mask is longer than the source vectors and is a multiple of the source + // vectors. We can use concatenate vector to make the mask and vectors + // lengths match. + if (SrcNumElts*2 == MaskNumElts && SequentialMask(Mask, 0)) { + // The shuffle is concatenating two vectors together. + setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(), + VT, Src1, Src2)); + return; + } + + // Pad both vectors with undefs to make them the same length as the mask. + unsigned NumConcat = MaskNumElts / SrcNumElts; + bool Src1U = Src1.getOpcode() == ISD::UNDEF; + bool Src2U = Src2.getOpcode() == ISD::UNDEF; + SDValue UndefVal = DAG.getUNDEF(SrcVT); + + SmallVector MOps1(NumConcat, UndefVal); + SmallVector MOps2(NumConcat, UndefVal); + MOps1[0] = Src1; + MOps2[0] = Src2; + + Src1 = Src1U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS, + getCurDebugLoc(), VT, + &MOps1[0], NumConcat); + Src2 = Src2U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS, + getCurDebugLoc(), VT, + &MOps2[0], NumConcat); + + // Readjust mask for new input vector length. + SmallVector MappedOps; + for (unsigned i = 0; i != MaskNumElts; ++i) { + int Idx = Mask[i]; + if (Idx < (int)SrcNumElts) + MappedOps.push_back(Idx); + else + MappedOps.push_back(Idx + MaskNumElts - SrcNumElts); + } + setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2, + &MappedOps[0])); + return; + } + + if (SrcNumElts > MaskNumElts) { + // Analyze the access pattern of the vector to see if we can extract + // two subvectors and do the shuffle. The analysis is done by calculating + // the range of elements the mask access on both vectors. + int MinRange[2] = { SrcNumElts+1, SrcNumElts+1}; + int MaxRange[2] = {-1, -1}; + + for (unsigned i = 0; i != MaskNumElts; ++i) { + int Idx = Mask[i]; + int Input = 0; + if (Idx < 0) + continue; + + if (Idx >= (int)SrcNumElts) { + Input = 1; + Idx -= SrcNumElts; + } + if (Idx > MaxRange[Input]) + MaxRange[Input] = Idx; + if (Idx < MinRange[Input]) + MinRange[Input] = Idx; + } + + // Check if the access is smaller than the vector size and can we find + // a reasonable extract index. + int RangeUse[2] = { 2, 2 }; // 0 = Unused, 1 = Extract, 2 = Can not Extract. + int StartIdx[2]; // StartIdx to extract from + for (int Input=0; Input < 2; ++Input) { + if (MinRange[Input] == (int)(SrcNumElts+1) && MaxRange[Input] == -1) { + RangeUse[Input] = 0; // Unused + StartIdx[Input] = 0; + } else if (MaxRange[Input] - MinRange[Input] < (int)MaskNumElts) { + // Fits within range but we should see if we can find a good + // start index that is a multiple of the mask length. + if (MaxRange[Input] < (int)MaskNumElts) { + RangeUse[Input] = 1; // Extract from beginning of the vector + StartIdx[Input] = 0; + } else { + StartIdx[Input] = (MinRange[Input]/MaskNumElts)*MaskNumElts; + if (MaxRange[Input] - StartIdx[Input] < (int)MaskNumElts && + StartIdx[Input] + MaskNumElts < SrcNumElts) + RangeUse[Input] = 1; // Extract from a multiple of the mask length. + } + } + } + + if (RangeUse[0] == 0 && RangeUse[0] == 0) { + setValue(&I, DAG.getUNDEF(VT)); // Vectors are not used. + return; + } + else if (RangeUse[0] < 2 && RangeUse[1] < 2) { + // Extract appropriate subvector and generate a vector shuffle + for (int Input=0; Input < 2; ++Input) { + SDValue& Src = Input == 0 ? Src1 : Src2; + if (RangeUse[Input] == 0) { + Src = DAG.getUNDEF(VT); + } else { + Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR, getCurDebugLoc(), VT, + Src, DAG.getIntPtrConstant(StartIdx[Input])); + } + } + // Calculate new mask. + SmallVector MappedOps; + for (unsigned i = 0; i != MaskNumElts; ++i) { + int Idx = Mask[i]; + if (Idx < 0) + MappedOps.push_back(Idx); + else if (Idx < (int)SrcNumElts) + MappedOps.push_back(Idx - StartIdx[0]); + else + MappedOps.push_back(Idx - SrcNumElts - StartIdx[1] + MaskNumElts); + } + setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2, + &MappedOps[0])); + return; + } + } + + // We can't use either concat vectors or extract subvectors so fall back to + // replacing the shuffle with extract and build vector. + // to insert and build vector. + MVT EltVT = VT.getVectorElementType(); + MVT PtrVT = TLI.getPointerTy(); + SmallVector Ops; + for (unsigned i = 0; i != MaskNumElts; ++i) { + if (Mask[i] < 0) { + Ops.push_back(DAG.getUNDEF(EltVT)); + } else { + int Idx = Mask[i]; + if (Idx < (int)SrcNumElts) + Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(), + EltVT, Src1, DAG.getConstant(Idx, PtrVT))); + else + Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(), + EltVT, Src2, + DAG.getConstant(Idx - SrcNumElts, PtrVT))); + } + } + setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(), + VT, &Ops[0], Ops.size())); +} + +void SelectionDAGLowering::visitInsertValue(InsertValueInst &I) { + const Value *Op0 = I.getOperand(0); + const Value *Op1 = I.getOperand(1); + const Type *AggTy = I.getType(); + const Type *ValTy = Op1->getType(); + bool IntoUndef = isa(Op0); + bool FromUndef = isa(Op1); + + unsigned LinearIndex = ComputeLinearIndex(TLI, AggTy, + I.idx_begin(), I.idx_end()); + + SmallVector AggValueVTs; + ComputeValueVTs(TLI, AggTy, AggValueVTs); + SmallVector ValValueVTs; + ComputeValueVTs(TLI, ValTy, ValValueVTs); + + unsigned NumAggValues = AggValueVTs.size(); + unsigned NumValValues = ValValueVTs.size(); + SmallVector Values(NumAggValues); + + SDValue Agg = getValue(Op0); + SDValue Val = getValue(Op1); + unsigned i = 0; + // Copy the beginning value(s) from the original aggregate. + for (; i != LinearIndex; ++i) + Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) : + SDValue(Agg.getNode(), Agg.getResNo() + i); + // Copy values from the inserted value(s). + for (; i != LinearIndex + NumValValues; ++i) + Values[i] = FromUndef ? DAG.getUNDEF(AggValueVTs[i]) : + SDValue(Val.getNode(), Val.getResNo() + i - LinearIndex); + // Copy remaining value(s) from the original aggregate. + for (; i != NumAggValues; ++i) + Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) : + SDValue(Agg.getNode(), Agg.getResNo() + i); + + setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(), + DAG.getVTList(&AggValueVTs[0], NumAggValues), + &Values[0], NumAggValues)); +} + +void SelectionDAGLowering::visitExtractValue(ExtractValueInst &I) { + const Value *Op0 = I.getOperand(0); + const Type *AggTy = Op0->getType(); + const Type *ValTy = I.getType(); + bool OutOfUndef = isa(Op0); + + unsigned LinearIndex = ComputeLinearIndex(TLI, AggTy, + I.idx_begin(), I.idx_end()); + + SmallVector ValValueVTs; + ComputeValueVTs(TLI, ValTy, ValValueVTs); + + unsigned NumValValues = ValValueVTs.size(); + SmallVector Values(NumValValues); + + SDValue Agg = getValue(Op0); + // Copy out the selected value(s). + for (unsigned i = LinearIndex; i != LinearIndex + NumValValues; ++i) + Values[i - LinearIndex] = + OutOfUndef ? + DAG.getUNDEF(Agg.getNode()->getValueType(Agg.getResNo() + i)) : + SDValue(Agg.getNode(), Agg.getResNo() + i); + + setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(), + DAG.getVTList(&ValValueVTs[0], NumValValues), + &Values[0], NumValValues)); +} + + +void SelectionDAGLowering::visitGetElementPtr(User &I) { + SDValue N = getValue(I.getOperand(0)); + const Type *Ty = I.getOperand(0)->getType(); + + for (GetElementPtrInst::op_iterator OI = I.op_begin()+1, E = I.op_end(); + OI != E; ++OI) { + Value *Idx = *OI; + if (const StructType *StTy = dyn_cast(Ty)) { + unsigned Field = cast(Idx)->getZExtValue(); + if (Field) { + // N = N + Offset + uint64_t Offset = TD->getStructLayout(StTy)->getElementOffset(Field); + N = DAG.getNode(ISD::ADD, getCurDebugLoc(), N.getValueType(), N, + DAG.getIntPtrConstant(Offset)); + } + Ty = StTy->getElementType(Field); + } else { + Ty = cast(Ty)->getElementType(); + + // If this is a constant subscript, handle it quickly. + if (ConstantInt *CI = dyn_cast(Idx)) { + if (CI->getZExtValue() == 0) continue; + uint64_t Offs = + TD->getTypeAllocSize(Ty)*cast(CI)->getSExtValue(); + SDValue OffsVal; + unsigned PtrBits = TLI.getPointerTy().getSizeInBits(); + if (PtrBits < 64) { + OffsVal = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), + TLI.getPointerTy(), + DAG.getConstant(Offs, MVT::i64)); + } else + OffsVal = DAG.getIntPtrConstant(Offs); + N = DAG.getNode(ISD::ADD, getCurDebugLoc(), N.getValueType(), N, + OffsVal); + continue; + } + + // N = N + Idx * ElementSize; + uint64_t ElementSize = TD->getTypeAllocSize(Ty); + SDValue IdxN = getValue(Idx); + + // If the index is smaller or larger than intptr_t, truncate or extend + // it. + if (IdxN.getValueType().bitsLT(N.getValueType())) + IdxN = DAG.getNode(ISD::SIGN_EXTEND, getCurDebugLoc(), + N.getValueType(), IdxN); + else if (IdxN.getValueType().bitsGT(N.getValueType())) + IdxN = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), + N.getValueType(), IdxN); + + // If this is a multiply by a power of two, turn it into a shl + // immediately. This is a very common case. + if (ElementSize != 1) { + if (isPowerOf2_64(ElementSize)) { + unsigned Amt = Log2_64(ElementSize); + IdxN = DAG.getNode(ISD::SHL, getCurDebugLoc(), + N.getValueType(), IdxN, + DAG.getConstant(Amt, TLI.getPointerTy())); + } else { + SDValue Scale = DAG.getIntPtrConstant(ElementSize); + IdxN = DAG.getNode(ISD::MUL, getCurDebugLoc(), + N.getValueType(), IdxN, Scale); + } + } + + N = DAG.getNode(ISD::ADD, getCurDebugLoc(), + N.getValueType(), N, IdxN); + } + } + setValue(&I, N); +} + +void SelectionDAGLowering::visitAlloca(AllocaInst &I) { + // If this is a fixed sized alloca in the entry block of the function, + // allocate it statically on the stack. + if (FuncInfo.StaticAllocaMap.count(&I)) + return; // getValue will auto-populate this. + + const Type *Ty = I.getAllocatedType(); + uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty); + unsigned Align = + std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty), + I.getAlignment()); + + SDValue AllocSize = getValue(I.getArraySize()); + + AllocSize = DAG.getNode(ISD::MUL, getCurDebugLoc(), AllocSize.getValueType(), + AllocSize, + DAG.getConstant(TySize, AllocSize.getValueType())); + + + + MVT IntPtr = TLI.getPointerTy(); + if (IntPtr.bitsLT(AllocSize.getValueType())) + AllocSize = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), + IntPtr, AllocSize); + else if (IntPtr.bitsGT(AllocSize.getValueType())) + AllocSize = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), + IntPtr, AllocSize); + + // Handle alignment. If the requested alignment is less than or equal to + // the stack alignment, ignore it. If the size is greater than or equal to + // the stack alignment, we note this in the DYNAMIC_STACKALLOC node. + unsigned StackAlign = + TLI.getTargetMachine().getFrameInfo()->getStackAlignment(); + if (Align <= StackAlign) + Align = 0; + + // Round the size of the allocation up to the stack alignment size + // by add SA-1 to the size. + AllocSize = DAG.getNode(ISD::ADD, getCurDebugLoc(), + AllocSize.getValueType(), AllocSize, + DAG.getIntPtrConstant(StackAlign-1)); + // Mask out the low bits for alignment purposes. + AllocSize = DAG.getNode(ISD::AND, getCurDebugLoc(), + AllocSize.getValueType(), AllocSize, + DAG.getIntPtrConstant(~(uint64_t)(StackAlign-1))); + + SDValue Ops[] = { getRoot(), AllocSize, DAG.getIntPtrConstant(Align) }; + SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other); + SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, getCurDebugLoc(), + VTs, Ops, 3); + setValue(&I, DSA); + DAG.setRoot(DSA.getValue(1)); + + // Inform the Frame Information that we have just allocated a variable-sized + // object. + CurMBB->getParent()->getFrameInfo()->CreateVariableSizedObject(); +} + +void SelectionDAGLowering::visitLoad(LoadInst &I) { + const Value *SV = I.getOperand(0); + SDValue Ptr = getValue(SV); + + const Type *Ty = I.getType(); + bool isVolatile = I.isVolatile(); + unsigned Alignment = I.getAlignment(); + + SmallVector ValueVTs; + SmallVector Offsets; + ComputeValueVTs(TLI, Ty, ValueVTs, &Offsets); + unsigned NumValues = ValueVTs.size(); + if (NumValues == 0) + return; + + SDValue Root; + bool ConstantMemory = false; + if (I.isVolatile()) + // Serialize volatile loads with other side effects. + Root = getRoot(); + else if (AA->pointsToConstantMemory(SV)) { + // Do not serialize (non-volatile) loads of constant memory with anything. + Root = DAG.getEntryNode(); + ConstantMemory = true; + } else { + // Do not serialize non-volatile loads against each other. + Root = DAG.getRoot(); + } + + SmallVector Values(NumValues); + SmallVector Chains(NumValues); + MVT PtrVT = Ptr.getValueType(); + for (unsigned i = 0; i != NumValues; ++i) { + SDValue L = DAG.getLoad(ValueVTs[i], getCurDebugLoc(), Root, + DAG.getNode(ISD::ADD, getCurDebugLoc(), + PtrVT, Ptr, + DAG.getConstant(Offsets[i], PtrVT)), + SV, Offsets[i], + isVolatile, Alignment); + Values[i] = L; + Chains[i] = L.getValue(1); + } + + if (!ConstantMemory) { + SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), + MVT::Other, + &Chains[0], NumValues); + if (isVolatile) + DAG.setRoot(Chain); + else + PendingLoads.push_back(Chain); + } + + setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(), + DAG.getVTList(&ValueVTs[0], NumValues), + &Values[0], NumValues)); +} + + +void SelectionDAGLowering::visitStore(StoreInst &I) { + Value *SrcV = I.getOperand(0); + Value *PtrV = I.getOperand(1); + + SmallVector ValueVTs; + SmallVector Offsets; + ComputeValueVTs(TLI, SrcV->getType(), ValueVTs, &Offsets); + unsigned NumValues = ValueVTs.size(); + if (NumValues == 0) + return; + + // Get the lowered operands. Note that we do this after + // checking if NumResults is zero, because with zero results + // the operands won't have values in the map. + SDValue Src = getValue(SrcV); + SDValue Ptr = getValue(PtrV); + + SDValue Root = getRoot(); + SmallVector Chains(NumValues); + MVT PtrVT = Ptr.getValueType(); + bool isVolatile = I.isVolatile(); + unsigned Alignment = I.getAlignment(); + for (unsigned i = 0; i != NumValues; ++i) + Chains[i] = DAG.getStore(Root, getCurDebugLoc(), + SDValue(Src.getNode(), Src.getResNo() + i), + DAG.getNode(ISD::ADD, getCurDebugLoc(), + PtrVT, Ptr, + DAG.getConstant(Offsets[i], PtrVT)), + PtrV, Offsets[i], + isVolatile, Alignment); + + DAG.setRoot(DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), + MVT::Other, &Chains[0], NumValues)); +} + +/// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC +/// node. +void SelectionDAGLowering::visitTargetIntrinsic(CallInst &I, + unsigned Intrinsic) { + bool HasChain = !I.doesNotAccessMemory(); + bool OnlyLoad = HasChain && I.onlyReadsMemory(); + + // Build the operand list. + SmallVector Ops; + if (HasChain) { // If this intrinsic has side-effects, chainify it. + if (OnlyLoad) { + // We don't need to serialize loads against other loads. + Ops.push_back(DAG.getRoot()); + } else { + Ops.push_back(getRoot()); + } + } + + // Info is set by getTgtMemInstrinsic + TargetLowering::IntrinsicInfo Info; + bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I, Intrinsic); + + // Add the intrinsic ID as an integer operand if it's not a target intrinsic. + if (!IsTgtIntrinsic) + Ops.push_back(DAG.getConstant(Intrinsic, TLI.getPointerTy())); + + // Add all operands of the call to the operand list. + for (unsigned i = 1, e = I.getNumOperands(); i != e; ++i) { + SDValue Op = getValue(I.getOperand(i)); + assert(TLI.isTypeLegal(Op.getValueType()) && + "Intrinsic uses a non-legal type?"); + Ops.push_back(Op); + } + + std::vector VTArray; + if (I.getType() != Type::VoidTy) { + MVT VT = TLI.getValueType(I.getType()); + if (VT.isVector()) { + const VectorType *DestTy = cast(I.getType()); + MVT EltVT = TLI.getValueType(DestTy->getElementType()); + + VT = MVT::getVectorVT(EltVT, DestTy->getNumElements()); + assert(VT != MVT::Other && "Intrinsic uses a non-legal type?"); + } + + assert(TLI.isTypeLegal(VT) && "Intrinsic uses a non-legal type?"); + VTArray.push_back(VT); + } + if (HasChain) + VTArray.push_back(MVT::Other); + + SDVTList VTs = DAG.getVTList(&VTArray[0], VTArray.size()); + + // Create the node. + SDValue Result; + if (IsTgtIntrinsic) { + // This is target intrinsic that touches memory + Result = DAG.getMemIntrinsicNode(Info.opc, getCurDebugLoc(), + VTs, &Ops[0], Ops.size(), + Info.memVT, Info.ptrVal, Info.offset, + Info.align, Info.vol, + Info.readMem, Info.writeMem); + } + else if (!HasChain) + Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurDebugLoc(), + VTs, &Ops[0], Ops.size()); + else if (I.getType() != Type::VoidTy) + Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurDebugLoc(), + VTs, &Ops[0], Ops.size()); + else + Result = DAG.getNode(ISD::INTRINSIC_VOID, getCurDebugLoc(), + VTs, &Ops[0], Ops.size()); + + if (HasChain) { + SDValue Chain = Result.getValue(Result.getNode()->getNumValues()-1); + if (OnlyLoad) + PendingLoads.push_back(Chain); + else + DAG.setRoot(Chain); + } + if (I.getType() != Type::VoidTy) { + if (const VectorType *PTy = dyn_cast(I.getType())) { + MVT VT = TLI.getValueType(PTy); + Result = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(), VT, Result); + } + setValue(&I, Result); + } +} + +/// ExtractTypeInfo - Returns the type info, possibly bitcast, encoded in V. +static GlobalVariable *ExtractTypeInfo(Value *V) { + V = V->stripPointerCasts(); + GlobalVariable *GV = dyn_cast(V); + assert ((GV || isa(V)) && + "TypeInfo must be a global variable or NULL"); + return GV; +} + +namespace llvm { + +/// AddCatchInfo - Extract the personality and type infos from an eh.selector +/// call, and add them to the specified machine basic block. +void AddCatchInfo(CallInst &I, MachineModuleInfo *MMI, + MachineBasicBlock *MBB) { + // Inform the MachineModuleInfo of the personality for this landing pad. + ConstantExpr *CE = cast(I.getOperand(2)); + assert(CE->getOpcode() == Instruction::BitCast && + isa(CE->getOperand(0)) && + "Personality should be a function"); + MMI->addPersonality(MBB, cast(CE->getOperand(0))); + + // Gather all the type infos for this landing pad and pass them along to + // MachineModuleInfo. + std::vector TyInfo; + unsigned N = I.getNumOperands(); + + for (unsigned i = N - 1; i > 2; --i) { + if (ConstantInt *CI = dyn_cast(I.getOperand(i))) { + unsigned FilterLength = CI->getZExtValue(); + unsigned FirstCatch = i + FilterLength + !FilterLength; + assert (FirstCatch <= N && "Invalid filter length"); + + if (FirstCatch < N) { + TyInfo.reserve(N - FirstCatch); + for (unsigned j = FirstCatch; j < N; ++j) + TyInfo.push_back(ExtractTypeInfo(I.getOperand(j))); + MMI->addCatchTypeInfo(MBB, TyInfo); + TyInfo.clear(); + } + + if (!FilterLength) { + // Cleanup. + MMI->addCleanup(MBB); + } else { + // Filter. + TyInfo.reserve(FilterLength - 1); + for (unsigned j = i + 1; j < FirstCatch; ++j) + TyInfo.push_back(ExtractTypeInfo(I.getOperand(j))); + MMI->addFilterTypeInfo(MBB, TyInfo); + TyInfo.clear(); + } + + N = i; + } + } + + if (N > 3) { + TyInfo.reserve(N - 3); + for (unsigned j = 3; j < N; ++j) + TyInfo.push_back(ExtractTypeInfo(I.getOperand(j))); + MMI->addCatchTypeInfo(MBB, TyInfo); + } +} + +} + +/// GetSignificand - Get the significand and build it into a floating-point +/// number with exponent of 1: +/// +/// Op = (Op & 0x007fffff) | 0x3f800000; +/// +/// where Op is the hexidecimal representation of floating point value. +static SDValue +GetSignificand(SelectionDAG &DAG, SDValue Op, DebugLoc dl) { + SDValue t1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op, + DAG.getConstant(0x007fffff, MVT::i32)); + SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1, + DAG.getConstant(0x3f800000, MVT::i32)); + return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t2); +} + +/// GetExponent - Get the exponent: +/// +/// (float)(int)(((Op & 0x7f800000) >> 23) - 127); +/// +/// where Op is the hexidecimal representation of floating point value. +static SDValue +GetExponent(SelectionDAG &DAG, SDValue Op, const TargetLowering &TLI, + DebugLoc dl) { + SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op, + DAG.getConstant(0x7f800000, MVT::i32)); + SDValue t1 = DAG.getNode(ISD::SRL, dl, MVT::i32, t0, + DAG.getConstant(23, TLI.getPointerTy())); + SDValue t2 = DAG.getNode(ISD::SUB, dl, MVT::i32, t1, + DAG.getConstant(127, MVT::i32)); + return DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, t2); +} + +/// getF32Constant - Get 32-bit floating point constant. +static SDValue +getF32Constant(SelectionDAG &DAG, unsigned Flt) { + return DAG.getConstantFP(APFloat(APInt(32, Flt)), MVT::f32); +} + +/// Inlined utility function to implement binary input atomic intrinsics for +/// visitIntrinsicCall: I is a call instruction +/// Op is the associated NodeType for I +const char * +SelectionDAGLowering::implVisitBinaryAtomic(CallInst& I, ISD::NodeType Op) { + SDValue Root = getRoot(); + SDValue L = + DAG.getAtomic(Op, getCurDebugLoc(), + getValue(I.getOperand(2)).getValueType().getSimpleVT(), + Root, + getValue(I.getOperand(1)), + getValue(I.getOperand(2)), + I.getOperand(1)); + setValue(&I, L); + DAG.setRoot(L.getValue(1)); + return 0; +} + +// implVisitAluOverflow - Lower arithmetic overflow instrinsics. +const char * +SelectionDAGLowering::implVisitAluOverflow(CallInst &I, ISD::NodeType Op) { + SDValue Op1 = getValue(I.getOperand(1)); + SDValue Op2 = getValue(I.getOperand(2)); + + SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1); + SDValue Result = DAG.getNode(Op, getCurDebugLoc(), VTs, Op1, Op2); + + setValue(&I, Result); + return 0; +} + +/// visitExp - Lower an exp intrinsic. Handles the special sequences for +/// limited-precision mode. +void +SelectionDAGLowering::visitExp(CallInst &I) { + SDValue result; + DebugLoc dl = getCurDebugLoc(); + + if (getValue(I.getOperand(1)).getValueType() == MVT::f32 && + LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { + SDValue Op = getValue(I.getOperand(1)); + + // Put the exponent in the right bit position for later addition to the + // final result: + // + // #define LOG2OFe 1.4426950f + // IntegerPartOfX = ((int32_t)(X * LOG2OFe)); + SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op, + getF32Constant(DAG, 0x3fb8aa3b)); + SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0); + + // FractionalPartOfX = (X * LOG2OFe) - (float)IntegerPartOfX; + SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX); + SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1); + + // IntegerPartOfX <<= 23; + IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX, + DAG.getConstant(23, TLI.getPointerTy())); + + if (LimitFloatPrecision <= 6) { + // For floating-point precision of 6: + // + // TwoToFractionalPartOfX = + // 0.997535578f + + // (0.735607626f + 0.252464424f * x) * x; + // + // error 0.0144103317, which is 6 bits + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0x3e814304)); + SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3f3c50c8)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3f7f5e7e)); + SDValue TwoToFracPartOfX = DAG.getNode(ISD::BIT_CONVERT, dl,MVT::i32, t5); + + // Add the exponent into the result in integer domain. + SDValue t6 = DAG.getNode(ISD::ADD, dl, MVT::i32, + TwoToFracPartOfX, IntegerPartOfX); + + result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t6); + } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) { + // For floating-point precision of 12: + // + // TwoToFractionalPartOfX = + // 0.999892986f + + // (0.696457318f + + // (0.224338339f + 0.792043434e-1f * x) * x) * x; + // + // 0.000107046256 error, which is 13 to 14 bits + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0x3da235e3)); + SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3e65b8f3)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3f324b07)); + SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); + SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, + getF32Constant(DAG, 0x3f7ff8fd)); + SDValue TwoToFracPartOfX = DAG.getNode(ISD::BIT_CONVERT, dl,MVT::i32, t7); + + // Add the exponent into the result in integer domain. + SDValue t8 = DAG.getNode(ISD::ADD, dl, MVT::i32, + TwoToFracPartOfX, IntegerPartOfX); + + result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t8); + } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18 + // For floating-point precision of 18: + // + // TwoToFractionalPartOfX = + // 0.999999982f + + // (0.693148872f + + // (0.240227044f + + // (0.554906021e-1f + + // (0.961591928e-2f + + // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x; + // + // error 2.47208000*10^(-7), which is better than 18 bits + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0x3924b03e)); + SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3ab24b87)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3c1d8c17)); + SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); + SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, + getF32Constant(DAG, 0x3d634a1d)); + SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); + SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, + getF32Constant(DAG, 0x3e75fe14)); + SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); + SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10, + getF32Constant(DAG, 0x3f317234)); + SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X); + SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12, + getF32Constant(DAG, 0x3f800000)); + SDValue TwoToFracPartOfX = DAG.getNode(ISD::BIT_CONVERT, dl, + MVT::i32, t13); + + // Add the exponent into the result in integer domain. + SDValue t14 = DAG.getNode(ISD::ADD, dl, MVT::i32, + TwoToFracPartOfX, IntegerPartOfX); + + result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t14); + } + } else { + // No special expansion. + result = DAG.getNode(ISD::FEXP, dl, + getValue(I.getOperand(1)).getValueType(), + getValue(I.getOperand(1))); + } + + setValue(&I, result); +} + +/// visitLog - Lower a log intrinsic. Handles the special sequences for +/// limited-precision mode. +void +SelectionDAGLowering::visitLog(CallInst &I) { + SDValue result; + DebugLoc dl = getCurDebugLoc(); + + if (getValue(I.getOperand(1)).getValueType() == MVT::f32 && + LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { + SDValue Op = getValue(I.getOperand(1)); + SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op); + + // Scale the exponent by log(2) [0.69314718f]. + SDValue Exp = GetExponent(DAG, Op1, TLI, dl); + SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp, + getF32Constant(DAG, 0x3f317218)); + + // Get the significand and build it into a floating-point number with + // exponent of 1. + SDValue X = GetSignificand(DAG, Op1, dl); + + if (LimitFloatPrecision <= 6) { + // For floating-point precision of 6: + // + // LogofMantissa = + // -1.1609546f + + // (1.4034025f - 0.23903021f * x) * x; + // + // error 0.0034276066, which is better than 8 bits + SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0xbe74c456)); + SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, + getF32Constant(DAG, 0x3fb3a2b1)); + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); + SDValue LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3f949a29)); + + result = DAG.getNode(ISD::FADD, dl, + MVT::f32, LogOfExponent, LogOfMantissa); + } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) { + // For floating-point precision of 12: + // + // LogOfMantissa = + // -1.7417939f + + // (2.8212026f + + // (-1.4699568f + + // (0.44717955f - 0.56570851e-1f * x) * x) * x) * x; + // + // error 0.000061011436, which is 14 bits + SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0xbd67b6d6)); + SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, + getF32Constant(DAG, 0x3ee4f4b8)); + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); + SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3fbc278b)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x40348e95)); + SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); + SDValue LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, + getF32Constant(DAG, 0x3fdef31a)); + + result = DAG.getNode(ISD::FADD, dl, + MVT::f32, LogOfExponent, LogOfMantissa); + } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18 + // For floating-point precision of 18: + // + // LogOfMantissa = + // -2.1072184f + + // (4.2372794f + + // (-3.7029485f + + // (2.2781945f + + // (-0.87823314f + + // (0.19073739f - 0.17809712e-1f * x) * x) * x) * x) * x)*x; + // + // error 0.0000023660568, which is better than 18 bits + SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0xbc91e5ac)); + SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, + getF32Constant(DAG, 0x3e4350aa)); + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); + SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3f60d3e3)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x4011cdf0)); + SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); + SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, + getF32Constant(DAG, 0x406cfd1c)); + SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); + SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, + getF32Constant(DAG, 0x408797cb)); + SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); + SDValue LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10, + getF32Constant(DAG, 0x4006dcab)); + + result = DAG.getNode(ISD::FADD, dl, + MVT::f32, LogOfExponent, LogOfMantissa); + } + } else { + // No special expansion. + result = DAG.getNode(ISD::FLOG, dl, + getValue(I.getOperand(1)).getValueType(), + getValue(I.getOperand(1))); + } + + setValue(&I, result); +} + +/// visitLog2 - Lower a log2 intrinsic. Handles the special sequences for +/// limited-precision mode. +void +SelectionDAGLowering::visitLog2(CallInst &I) { + SDValue result; + DebugLoc dl = getCurDebugLoc(); + + if (getValue(I.getOperand(1)).getValueType() == MVT::f32 && + LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { + SDValue Op = getValue(I.getOperand(1)); + SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op); + + // Get the exponent. + SDValue LogOfExponent = GetExponent(DAG, Op1, TLI, dl); + + // Get the significand and build it into a floating-point number with + // exponent of 1. + SDValue X = GetSignificand(DAG, Op1, dl); + + // Different possible minimax approximations of significand in + // floating-point for various degrees of accuracy over [1,2]. + if (LimitFloatPrecision <= 6) { + // For floating-point precision of 6: + // + // Log2ofMantissa = -1.6749035f + (2.0246817f - .34484768f * x) * x; + // + // error 0.0049451742, which is more than 7 bits + SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0xbeb08fe0)); + SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, + getF32Constant(DAG, 0x40019463)); + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); + SDValue Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3fd6633d)); + + result = DAG.getNode(ISD::FADD, dl, + MVT::f32, LogOfExponent, Log2ofMantissa); + } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) { + // For floating-point precision of 12: + // + // Log2ofMantissa = + // -2.51285454f + + // (4.07009056f + + // (-2.12067489f + + // (.645142248f - 0.816157886e-1f * x) * x) * x) * x; + // + // error 0.0000876136000, which is better than 13 bits + SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0xbda7262e)); + SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, + getF32Constant(DAG, 0x3f25280b)); + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); + SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, + getF32Constant(DAG, 0x4007b923)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x40823e2f)); + SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); + SDValue Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, + getF32Constant(DAG, 0x4020d29c)); + + result = DAG.getNode(ISD::FADD, dl, + MVT::f32, LogOfExponent, Log2ofMantissa); + } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18 + // For floating-point precision of 18: + // + // Log2ofMantissa = + // -3.0400495f + + // (6.1129976f + + // (-5.3420409f + + // (3.2865683f + + // (-1.2669343f + + // (0.27515199f - + // 0.25691327e-1f * x) * x) * x) * x) * x) * x; + // + // error 0.0000018516, which is better than 18 bits + SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0xbcd2769e)); + SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, + getF32Constant(DAG, 0x3e8ce0b9)); + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); + SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3fa22ae7)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x40525723)); + SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); + SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, + getF32Constant(DAG, 0x40aaf200)); + SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); + SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, + getF32Constant(DAG, 0x40c39dad)); + SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); + SDValue Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10, + getF32Constant(DAG, 0x4042902c)); + + result = DAG.getNode(ISD::FADD, dl, + MVT::f32, LogOfExponent, Log2ofMantissa); + } + } else { + // No special expansion. + result = DAG.getNode(ISD::FLOG2, dl, + getValue(I.getOperand(1)).getValueType(), + getValue(I.getOperand(1))); + } + + setValue(&I, result); +} + +/// visitLog10 - Lower a log10 intrinsic. Handles the special sequences for +/// limited-precision mode. +void +SelectionDAGLowering::visitLog10(CallInst &I) { + SDValue result; + DebugLoc dl = getCurDebugLoc(); + + if (getValue(I.getOperand(1)).getValueType() == MVT::f32 && + LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { + SDValue Op = getValue(I.getOperand(1)); + SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op); + + // Scale the exponent by log10(2) [0.30102999f]. + SDValue Exp = GetExponent(DAG, Op1, TLI, dl); + SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp, + getF32Constant(DAG, 0x3e9a209a)); + + // Get the significand and build it into a floating-point number with + // exponent of 1. + SDValue X = GetSignificand(DAG, Op1, dl); + + if (LimitFloatPrecision <= 6) { + // For floating-point precision of 6: + // + // Log10ofMantissa = + // -0.50419619f + + // (0.60948995f - 0.10380950f * x) * x; + // + // error 0.0014886165, which is 6 bits + SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0xbdd49a13)); + SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, + getF32Constant(DAG, 0x3f1c0789)); + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); + SDValue Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3f011300)); + + result = DAG.getNode(ISD::FADD, dl, + MVT::f32, LogOfExponent, Log10ofMantissa); + } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) { + // For floating-point precision of 12: + // + // Log10ofMantissa = + // -0.64831180f + + // (0.91751397f + + // (-0.31664806f + 0.47637168e-1f * x) * x) * x; + // + // error 0.00019228036, which is better than 12 bits + SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0x3d431f31)); + SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, + getF32Constant(DAG, 0x3ea21fb2)); + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); + SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3f6ae232)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3f25f7c3)); + + result = DAG.getNode(ISD::FADD, dl, + MVT::f32, LogOfExponent, Log10ofMantissa); + } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18 + // For floating-point precision of 18: + // + // Log10ofMantissa = + // -0.84299375f + + // (1.5327582f + + // (-1.0688956f + + // (0.49102474f + + // (-0.12539807f + 0.13508273e-1f * x) * x) * x) * x) * x; + // + // error 0.0000037995730, which is better than 18 bits + SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0x3c5d51ce)); + SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, + getF32Constant(DAG, 0x3e00685a)); + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); + SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3efb6798)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3f88d192)); + SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); + SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, + getF32Constant(DAG, 0x3fc4316c)); + SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); + SDValue Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t8, + getF32Constant(DAG, 0x3f57ce70)); + + result = DAG.getNode(ISD::FADD, dl, + MVT::f32, LogOfExponent, Log10ofMantissa); + } + } else { + // No special expansion. + result = DAG.getNode(ISD::FLOG10, dl, + getValue(I.getOperand(1)).getValueType(), + getValue(I.getOperand(1))); + } + + setValue(&I, result); +} + +/// visitExp2 - Lower an exp2 intrinsic. Handles the special sequences for +/// limited-precision mode. +void +SelectionDAGLowering::visitExp2(CallInst &I) { + SDValue result; + DebugLoc dl = getCurDebugLoc(); + + if (getValue(I.getOperand(1)).getValueType() == MVT::f32 && + LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { + SDValue Op = getValue(I.getOperand(1)); + + SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Op); + + // FractionalPartOfX = x - (float)IntegerPartOfX; + SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX); + SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, Op, t1); + + // IntegerPartOfX <<= 23; + IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX, + DAG.getConstant(23, TLI.getPointerTy())); + + if (LimitFloatPrecision <= 6) { + // For floating-point precision of 6: + // + // TwoToFractionalPartOfX = + // 0.997535578f + + // (0.735607626f + 0.252464424f * x) * x; + // + // error 0.0144103317, which is 6 bits + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0x3e814304)); + SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3f3c50c8)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3f7f5e7e)); + SDValue t6 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t5); + SDValue TwoToFractionalPartOfX = + DAG.getNode(ISD::ADD, dl, MVT::i32, t6, IntegerPartOfX); + + result = DAG.getNode(ISD::BIT_CONVERT, dl, + MVT::f32, TwoToFractionalPartOfX); + } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) { + // For floating-point precision of 12: + // + // TwoToFractionalPartOfX = + // 0.999892986f + + // (0.696457318f + + // (0.224338339f + 0.792043434e-1f * x) * x) * x; + // + // error 0.000107046256, which is 13 to 14 bits + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0x3da235e3)); + SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3e65b8f3)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3f324b07)); + SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); + SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, + getF32Constant(DAG, 0x3f7ff8fd)); + SDValue t8 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t7); + SDValue TwoToFractionalPartOfX = + DAG.getNode(ISD::ADD, dl, MVT::i32, t8, IntegerPartOfX); + + result = DAG.getNode(ISD::BIT_CONVERT, dl, + MVT::f32, TwoToFractionalPartOfX); + } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18 + // For floating-point precision of 18: + // + // TwoToFractionalPartOfX = + // 0.999999982f + + // (0.693148872f + + // (0.240227044f + + // (0.554906021e-1f + + // (0.961591928e-2f + + // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x; + // error 2.47208000*10^(-7), which is better than 18 bits + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0x3924b03e)); + SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3ab24b87)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3c1d8c17)); + SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); + SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, + getF32Constant(DAG, 0x3d634a1d)); + SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); + SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, + getF32Constant(DAG, 0x3e75fe14)); + SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); + SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10, + getF32Constant(DAG, 0x3f317234)); + SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X); + SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12, + getF32Constant(DAG, 0x3f800000)); + SDValue t14 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t13); + SDValue TwoToFractionalPartOfX = + DAG.getNode(ISD::ADD, dl, MVT::i32, t14, IntegerPartOfX); + + result = DAG.getNode(ISD::BIT_CONVERT, dl, + MVT::f32, TwoToFractionalPartOfX); + } + } else { + // No special expansion. + result = DAG.getNode(ISD::FEXP2, dl, + getValue(I.getOperand(1)).getValueType(), + getValue(I.getOperand(1))); + } + + setValue(&I, result); +} + +/// visitPow - Lower a pow intrinsic. Handles the special sequences for +/// limited-precision mode with x == 10.0f. +void +SelectionDAGLowering::visitPow(CallInst &I) { + SDValue result; + Value *Val = I.getOperand(1); + DebugLoc dl = getCurDebugLoc(); + bool IsExp10 = false; + + if (getValue(Val).getValueType() == MVT::f32 && + getValue(I.getOperand(2)).getValueType() == MVT::f32 && + LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { + if (Constant *C = const_cast(dyn_cast(Val))) { + if (ConstantFP *CFP = dyn_cast(C)) { + APFloat Ten(10.0f); + IsExp10 = CFP->getValueAPF().bitwiseIsEqual(Ten); + } + } + } + + if (IsExp10 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { + SDValue Op = getValue(I.getOperand(2)); + + // Put the exponent in the right bit position for later addition to the + // final result: + // + // #define LOG2OF10 3.3219281f + // IntegerPartOfX = (int32_t)(x * LOG2OF10); + SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op, + getF32Constant(DAG, 0x40549a78)); + SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0); + + // FractionalPartOfX = x - (float)IntegerPartOfX; + SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX); + SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1); + + // IntegerPartOfX <<= 23; + IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX, + DAG.getConstant(23, TLI.getPointerTy())); + + if (LimitFloatPrecision <= 6) { + // For floating-point precision of 6: + // + // twoToFractionalPartOfX = + // 0.997535578f + + // (0.735607626f + 0.252464424f * x) * x; + // + // error 0.0144103317, which is 6 bits + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0x3e814304)); + SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3f3c50c8)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3f7f5e7e)); + SDValue t6 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t5); + SDValue TwoToFractionalPartOfX = + DAG.getNode(ISD::ADD, dl, MVT::i32, t6, IntegerPartOfX); + + result = DAG.getNode(ISD::BIT_CONVERT, dl, + MVT::f32, TwoToFractionalPartOfX); + } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) { + // For floating-point precision of 12: + // + // TwoToFractionalPartOfX = + // 0.999892986f + + // (0.696457318f + + // (0.224338339f + 0.792043434e-1f * x) * x) * x; + // + // error 0.000107046256, which is 13 to 14 bits + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0x3da235e3)); + SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3e65b8f3)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3f324b07)); + SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); + SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, + getF32Constant(DAG, 0x3f7ff8fd)); + SDValue t8 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t7); + SDValue TwoToFractionalPartOfX = + DAG.getNode(ISD::ADD, dl, MVT::i32, t8, IntegerPartOfX); + + result = DAG.getNode(ISD::BIT_CONVERT, dl, + MVT::f32, TwoToFractionalPartOfX); + } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18 + // For floating-point precision of 18: + // + // TwoToFractionalPartOfX = + // 0.999999982f + + // (0.693148872f + + // (0.240227044f + + // (0.554906021e-1f + + // (0.961591928e-2f + + // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x; + // error 2.47208000*10^(-7), which is better than 18 bits + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0x3924b03e)); + SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3ab24b87)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3c1d8c17)); + SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); + SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, + getF32Constant(DAG, 0x3d634a1d)); + SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); + SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, + getF32Constant(DAG, 0x3e75fe14)); + SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); + SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10, + getF32Constant(DAG, 0x3f317234)); + SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X); + SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12, + getF32Constant(DAG, 0x3f800000)); + SDValue t14 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t13); + SDValue TwoToFractionalPartOfX = + DAG.getNode(ISD::ADD, dl, MVT::i32, t14, IntegerPartOfX); + + result = DAG.getNode(ISD::BIT_CONVERT, dl, + MVT::f32, TwoToFractionalPartOfX); + } + } else { + // No special expansion. + result = DAG.getNode(ISD::FPOW, dl, + getValue(I.getOperand(1)).getValueType(), + getValue(I.getOperand(1)), + getValue(I.getOperand(2))); + } + + setValue(&I, result); +} + +/// visitIntrinsicCall - Lower the call to the specified intrinsic function. If +/// we want to emit this as a call to a named external function, return the name +/// otherwise lower it and return null. +const char * +SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { + DebugLoc dl = getCurDebugLoc(); + switch (Intrinsic) { + default: + // By default, turn this into a target intrinsic node. + visitTargetIntrinsic(I, Intrinsic); + return 0; + case Intrinsic::vastart: visitVAStart(I); return 0; + case Intrinsic::vaend: visitVAEnd(I); return 0; + case Intrinsic::vacopy: visitVACopy(I); return 0; + case Intrinsic::returnaddress: + setValue(&I, DAG.getNode(ISD::RETURNADDR, dl, TLI.getPointerTy(), + getValue(I.getOperand(1)))); + return 0; + case Intrinsic::frameaddress: + setValue(&I, DAG.getNode(ISD::FRAMEADDR, dl, TLI.getPointerTy(), + getValue(I.getOperand(1)))); + return 0; + case Intrinsic::setjmp: + return "_setjmp"+!TLI.usesUnderscoreSetJmp(); + break; + case Intrinsic::longjmp: + return "_longjmp"+!TLI.usesUnderscoreLongJmp(); + break; + case Intrinsic::memcpy: { + SDValue Op1 = getValue(I.getOperand(1)); + SDValue Op2 = getValue(I.getOperand(2)); + SDValue Op3 = getValue(I.getOperand(3)); + unsigned Align = cast(I.getOperand(4))->getZExtValue(); + DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, false, + I.getOperand(1), 0, I.getOperand(2), 0)); + return 0; + } + case Intrinsic::memset: { + SDValue Op1 = getValue(I.getOperand(1)); + SDValue Op2 = getValue(I.getOperand(2)); + SDValue Op3 = getValue(I.getOperand(3)); + unsigned Align = cast(I.getOperand(4))->getZExtValue(); + DAG.setRoot(DAG.getMemset(getRoot(), dl, Op1, Op2, Op3, Align, + I.getOperand(1), 0)); + return 0; + } + case Intrinsic::memmove: { + SDValue Op1 = getValue(I.getOperand(1)); + SDValue Op2 = getValue(I.getOperand(2)); + SDValue Op3 = getValue(I.getOperand(3)); + unsigned Align = cast(I.getOperand(4))->getZExtValue(); + + // If the source and destination are known to not be aliases, we can + // lower memmove as memcpy. + uint64_t Size = -1ULL; + if (ConstantSDNode *C = dyn_cast(Op3)) + Size = C->getZExtValue(); + if (AA->alias(I.getOperand(1), Size, I.getOperand(2), Size) == + AliasAnalysis::NoAlias) { + DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, false, + I.getOperand(1), 0, I.getOperand(2), 0)); + return 0; + } + + DAG.setRoot(DAG.getMemmove(getRoot(), dl, Op1, Op2, Op3, Align, + I.getOperand(1), 0, I.getOperand(2), 0)); + return 0; + } + case Intrinsic::dbg_stoppoint: { + DbgStopPointInst &SPI = cast(I); + if (DIDescriptor::ValidDebugInfo(SPI.getContext(), OptLevel)) { + MachineFunction &MF = DAG.getMachineFunction(); + DICompileUnit CU(cast(SPI.getContext())); + DebugLoc Loc = DebugLoc::get(MF.getOrCreateDebugLocID(CU.getGV(), + SPI.getLine(), SPI.getColumn())); + setCurDebugLoc(Loc); + + if (OptLevel == CodeGenOpt::None) + DAG.setRoot(DAG.getDbgStopPoint(Loc, getRoot(), + SPI.getLine(), + SPI.getColumn(), + SPI.getContext())); + } + return 0; + } + case Intrinsic::dbg_region_start: { + DwarfWriter *DW = DAG.getDwarfWriter(); + DbgRegionStartInst &RSI = cast(I); + + if (DIDescriptor::ValidDebugInfo(RSI.getContext(), OptLevel) && + DW && DW->ShouldEmitDwarfDebug()) { + unsigned LabelID = + DW->RecordRegionStart(cast(RSI.getContext())); + DAG.setRoot(DAG.getLabel(ISD::DBG_LABEL, getCurDebugLoc(), + getRoot(), LabelID)); + } + + return 0; + } + case Intrinsic::dbg_region_end: { + DwarfWriter *DW = DAG.getDwarfWriter(); + DbgRegionEndInst &REI = cast(I); + + if (DIDescriptor::ValidDebugInfo(REI.getContext(), OptLevel) && + DW && DW->ShouldEmitDwarfDebug()) { + MachineFunction &MF = DAG.getMachineFunction(); + DISubprogram Subprogram(cast(REI.getContext())); + + if (Subprogram.isNull() || Subprogram.describes(MF.getFunction())) { + unsigned LabelID = + DW->RecordRegionEnd(cast(REI.getContext())); + DAG.setRoot(DAG.getLabel(ISD::DBG_LABEL, getCurDebugLoc(), + getRoot(), LabelID)); + } else { + // This is end of inlined function. Debugging information for inlined + // function is not handled yet (only supported by FastISel). + if (OptLevel == CodeGenOpt::None) { + unsigned ID = DW->RecordInlinedFnEnd(Subprogram); + if (ID != 0) + // Returned ID is 0 if this is unbalanced "end of inlined + // scope". This could happen if optimizer eats dbg intrinsics or + // "beginning of inlined scope" is not recoginized due to missing + // location info. In such cases, do ignore this region.end. + DAG.setRoot(DAG.getLabel(ISD::DBG_LABEL, getCurDebugLoc(), + getRoot(), ID)); + } + } + } + + return 0; + } + case Intrinsic::dbg_func_start: { + DwarfWriter *DW = DAG.getDwarfWriter(); + DbgFuncStartInst &FSI = cast(I); + Value *SP = FSI.getSubprogram(); + if (!DIDescriptor::ValidDebugInfo(SP, OptLevel)) + return 0; + + MachineFunction &MF = DAG.getMachineFunction(); + if (OptLevel == CodeGenOpt::None) { + // llvm.dbg.func.start implicitly defines a dbg_stoppoint which is what + // (most?) gdb expects. + DebugLoc PrevLoc = CurDebugLoc; + DISubprogram Subprogram(cast(SP)); + DICompileUnit CompileUnit = Subprogram.getCompileUnit(); + + if (!Subprogram.describes(MF.getFunction())) { + // This is a beginning of an inlined function. + + // If llvm.dbg.func.start is seen in a new block before any + // llvm.dbg.stoppoint intrinsic then the location info is unknown. + // FIXME : Why DebugLoc is reset at the beginning of each block ? + if (PrevLoc.isUnknown()) + return 0; + + // Record the source line. + unsigned Line = Subprogram.getLineNumber(); + setCurDebugLoc(DebugLoc::get( + MF.getOrCreateDebugLocID(CompileUnit.getGV(), Line, 0))); + + if (DW && DW->ShouldEmitDwarfDebug()) { + DebugLocTuple PrevLocTpl = MF.getDebugLocTuple(PrevLoc); + unsigned LabelID = DW->RecordInlinedFnStart(Subprogram, + DICompileUnit(PrevLocTpl.CompileUnit), + PrevLocTpl.Line, + PrevLocTpl.Col); + DAG.setRoot(DAG.getLabel(ISD::DBG_LABEL, getCurDebugLoc(), + getRoot(), LabelID)); + } + } else { + // Record the source line. + unsigned Line = Subprogram.getLineNumber(); + MF.setDefaultDebugLoc(DebugLoc::get( + MF.getOrCreateDebugLocID(CompileUnit.getGV(), Line, 0))); + if (DW && DW->ShouldEmitDwarfDebug()) { + // llvm.dbg.func_start also defines beginning of function scope. + DW->RecordRegionStart(cast(FSI.getSubprogram())); + } + } + } else { + DISubprogram Subprogram(cast(SP)); + + std::string SPName; + Subprogram.getLinkageName(SPName); + if (!SPName.empty() + && strcmp(SPName.c_str(), MF.getFunction()->getNameStart())) { + // This is beginning of inlined function. Debugging information for + // inlined function is not handled yet (only supported by FastISel). + return 0; + } + + // llvm.dbg.func.start implicitly defines a dbg_stoppoint which is + // what (most?) gdb expects. + DICompileUnit CompileUnit = Subprogram.getCompileUnit(); + + // Record the source line but does not create a label for the normal + // function start. It will be emitted at asm emission time. However, + // create a label if this is a beginning of inlined function. + unsigned Line = Subprogram.getLineNumber(); + setCurDebugLoc(DebugLoc::get( + MF.getOrCreateDebugLocID(CompileUnit.getGV(), Line, 0))); + // FIXME - Start new region because llvm.dbg.func_start also defines + // beginning of function scope. + } + + return 0; + } + case Intrinsic::dbg_declare: { + if (OptLevel == CodeGenOpt::None) { + DbgDeclareInst &DI = cast(I); + Value *Variable = DI.getVariable(); + if (DIDescriptor::ValidDebugInfo(Variable, OptLevel)) + DAG.setRoot(DAG.getNode(ISD::DECLARE, dl, MVT::Other, getRoot(), + getValue(DI.getAddress()), getValue(Variable))); + } else { + // FIXME: Do something sensible here when we support debug declare. + } + return 0; + } + case Intrinsic::eh_exception: { + // Insert the EXCEPTIONADDR instruction. + assert(CurMBB->isLandingPad() &&"Call to eh.exception not in landing pad!"); + SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other); + SDValue Ops[1]; + Ops[0] = DAG.getRoot(); + SDValue Op = DAG.getNode(ISD::EXCEPTIONADDR, dl, VTs, Ops, 1); + setValue(&I, Op); + DAG.setRoot(Op.getValue(1)); + return 0; + } + + case Intrinsic::eh_selector_i32: + case Intrinsic::eh_selector_i64: { + MachineModuleInfo *MMI = DAG.getMachineModuleInfo(); + MVT VT = (Intrinsic == Intrinsic::eh_selector_i32 ? + MVT::i32 : MVT::i64); + + if (MMI) { + if (CurMBB->isLandingPad()) + AddCatchInfo(I, MMI, CurMBB); + else { +#ifndef NDEBUG + FuncInfo.CatchInfoLost.insert(&I); +#endif + // FIXME: Mark exception selector register as live in. Hack for PR1508. + unsigned Reg = TLI.getExceptionSelectorRegister(); + if (Reg) CurMBB->addLiveIn(Reg); + } + + // Insert the EHSELECTION instruction. + SDVTList VTs = DAG.getVTList(VT, MVT::Other); + SDValue Ops[2]; + Ops[0] = getValue(I.getOperand(1)); + Ops[1] = getRoot(); + SDValue Op = DAG.getNode(ISD::EHSELECTION, dl, VTs, Ops, 2); + setValue(&I, Op); + DAG.setRoot(Op.getValue(1)); + } else { + setValue(&I, DAG.getConstant(0, VT)); + } + + return 0; + } + + case Intrinsic::eh_typeid_for_i32: + case Intrinsic::eh_typeid_for_i64: { + MachineModuleInfo *MMI = DAG.getMachineModuleInfo(); + MVT VT = (Intrinsic == Intrinsic::eh_typeid_for_i32 ? + MVT::i32 : MVT::i64); + + if (MMI) { + // Find the type id for the given typeinfo. + GlobalVariable *GV = ExtractTypeInfo(I.getOperand(1)); + + unsigned TypeID = MMI->getTypeIDFor(GV); + setValue(&I, DAG.getConstant(TypeID, VT)); + } else { + // Return something different to eh_selector. + setValue(&I, DAG.getConstant(1, VT)); + } + + return 0; + } + + case Intrinsic::eh_return_i32: + case Intrinsic::eh_return_i64: + if (MachineModuleInfo *MMI = DAG.getMachineModuleInfo()) { + MMI->setCallsEHReturn(true); + DAG.setRoot(DAG.getNode(ISD::EH_RETURN, dl, + MVT::Other, + getControlRoot(), + getValue(I.getOperand(1)), + getValue(I.getOperand(2)))); + } else { + setValue(&I, DAG.getConstant(0, TLI.getPointerTy())); + } + + return 0; + case Intrinsic::eh_unwind_init: + if (MachineModuleInfo *MMI = DAG.getMachineModuleInfo()) { + MMI->setCallsUnwindInit(true); + } + + return 0; + + case Intrinsic::eh_dwarf_cfa: { + MVT VT = getValue(I.getOperand(1)).getValueType(); + SDValue CfaArg; + if (VT.bitsGT(TLI.getPointerTy())) + CfaArg = DAG.getNode(ISD::TRUNCATE, dl, + TLI.getPointerTy(), getValue(I.getOperand(1))); + else + CfaArg = DAG.getNode(ISD::SIGN_EXTEND, dl, + TLI.getPointerTy(), getValue(I.getOperand(1))); + + SDValue Offset = DAG.getNode(ISD::ADD, dl, + TLI.getPointerTy(), + DAG.getNode(ISD::FRAME_TO_ARGS_OFFSET, dl, + TLI.getPointerTy()), + CfaArg); + setValue(&I, DAG.getNode(ISD::ADD, dl, + TLI.getPointerTy(), + DAG.getNode(ISD::FRAMEADDR, dl, + TLI.getPointerTy(), + DAG.getConstant(0, + TLI.getPointerTy())), + Offset)); + return 0; + } + + case Intrinsic::convertff: + case Intrinsic::convertfsi: + case Intrinsic::convertfui: + case Intrinsic::convertsif: + case Intrinsic::convertuif: + case Intrinsic::convertss: + case Intrinsic::convertsu: + case Intrinsic::convertus: + case Intrinsic::convertuu: { + ISD::CvtCode Code = ISD::CVT_INVALID; + switch (Intrinsic) { + case Intrinsic::convertff: Code = ISD::CVT_FF; break; + case Intrinsic::convertfsi: Code = ISD::CVT_FS; break; + case Intrinsic::convertfui: Code = ISD::CVT_FU; break; + case Intrinsic::convertsif: Code = ISD::CVT_SF; break; + case Intrinsic::convertuif: Code = ISD::CVT_UF; break; + case Intrinsic::convertss: Code = ISD::CVT_SS; break; + case Intrinsic::convertsu: Code = ISD::CVT_SU; break; + case Intrinsic::convertus: Code = ISD::CVT_US; break; + case Intrinsic::convertuu: Code = ISD::CVT_UU; break; + } + MVT DestVT = TLI.getValueType(I.getType()); + Value* Op1 = I.getOperand(1); + setValue(&I, DAG.getConvertRndSat(DestVT, getCurDebugLoc(), getValue(Op1), + DAG.getValueType(DestVT), + DAG.getValueType(getValue(Op1).getValueType()), + getValue(I.getOperand(2)), + getValue(I.getOperand(3)), + Code)); + return 0; + } + + case Intrinsic::sqrt: + setValue(&I, DAG.getNode(ISD::FSQRT, dl, + getValue(I.getOperand(1)).getValueType(), + getValue(I.getOperand(1)))); + return 0; + case Intrinsic::powi: + setValue(&I, DAG.getNode(ISD::FPOWI, dl, + getValue(I.getOperand(1)).getValueType(), + getValue(I.getOperand(1)), + getValue(I.getOperand(2)))); + return 0; + case Intrinsic::sin: + setValue(&I, DAG.getNode(ISD::FSIN, dl, + getValue(I.getOperand(1)).getValueType(), + getValue(I.getOperand(1)))); + return 0; + case Intrinsic::cos: + setValue(&I, DAG.getNode(ISD::FCOS, dl, + getValue(I.getOperand(1)).getValueType(), + getValue(I.getOperand(1)))); + return 0; + case Intrinsic::log: + visitLog(I); + return 0; + case Intrinsic::log2: + visitLog2(I); + return 0; + case Intrinsic::log10: + visitLog10(I); + return 0; + case Intrinsic::exp: + visitExp(I); + return 0; + case Intrinsic::exp2: + visitExp2(I); + return 0; + case Intrinsic::pow: + visitPow(I); + return 0; + case Intrinsic::pcmarker: { + SDValue Tmp = getValue(I.getOperand(1)); + DAG.setRoot(DAG.getNode(ISD::PCMARKER, dl, MVT::Other, getRoot(), Tmp)); + return 0; + } + case Intrinsic::readcyclecounter: { + SDValue Op = getRoot(); + SDValue Tmp = DAG.getNode(ISD::READCYCLECOUNTER, dl, + DAG.getVTList(MVT::i64, MVT::Other), + &Op, 1); + setValue(&I, Tmp); + DAG.setRoot(Tmp.getValue(1)); + return 0; + } + case Intrinsic::part_select: { + // Currently not implemented: just abort + assert(0 && "part_select intrinsic not implemented"); + abort(); + } + case Intrinsic::part_set: { + // Currently not implemented: just abort + assert(0 && "part_set intrinsic not implemented"); + abort(); + } + case Intrinsic::bswap: + setValue(&I, DAG.getNode(ISD::BSWAP, dl, + getValue(I.getOperand(1)).getValueType(), + getValue(I.getOperand(1)))); + return 0; + case Intrinsic::cttz: { + SDValue Arg = getValue(I.getOperand(1)); + MVT Ty = Arg.getValueType(); + SDValue result = DAG.getNode(ISD::CTTZ, dl, Ty, Arg); + setValue(&I, result); + return 0; + } + case Intrinsic::ctlz: { + SDValue Arg = getValue(I.getOperand(1)); + MVT Ty = Arg.getValueType(); + SDValue result = DAG.getNode(ISD::CTLZ, dl, Ty, Arg); + setValue(&I, result); + return 0; + } + case Intrinsic::ctpop: { + SDValue Arg = getValue(I.getOperand(1)); + MVT Ty = Arg.getValueType(); + SDValue result = DAG.getNode(ISD::CTPOP, dl, Ty, Arg); + setValue(&I, result); + return 0; + } + case Intrinsic::stacksave: { + SDValue Op = getRoot(); + SDValue Tmp = DAG.getNode(ISD::STACKSAVE, dl, + DAG.getVTList(TLI.getPointerTy(), MVT::Other), &Op, 1); + setValue(&I, Tmp); + DAG.setRoot(Tmp.getValue(1)); + return 0; + } + case Intrinsic::stackrestore: { + SDValue Tmp = getValue(I.getOperand(1)); + DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, dl, MVT::Other, getRoot(), Tmp)); + return 0; + } + case Intrinsic::stackprotector: { + // Emit code into the DAG to store the stack guard onto the stack. + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + MVT PtrTy = TLI.getPointerTy(); + + SDValue Src = getValue(I.getOperand(1)); // The guard's value. + AllocaInst *Slot = cast(I.getOperand(2)); + + int FI = FuncInfo.StaticAllocaMap[Slot]; + MFI->setStackProtectorIndex(FI); + + SDValue FIN = DAG.getFrameIndex(FI, PtrTy); + + // Store the stack protector onto the stack. + SDValue Result = DAG.getStore(getRoot(), getCurDebugLoc(), Src, FIN, + PseudoSourceValue::getFixedStack(FI), + 0, true); + setValue(&I, Result); + DAG.setRoot(Result); + return 0; + } + case Intrinsic::var_annotation: + // Discard annotate attributes + return 0; + + case Intrinsic::init_trampoline: { + const Function *F = cast(I.getOperand(2)->stripPointerCasts()); + + SDValue Ops[6]; + Ops[0] = getRoot(); + Ops[1] = getValue(I.getOperand(1)); + Ops[2] = getValue(I.getOperand(2)); + Ops[3] = getValue(I.getOperand(3)); + Ops[4] = DAG.getSrcValue(I.getOperand(1)); + Ops[5] = DAG.getSrcValue(F); + + SDValue Tmp = DAG.getNode(ISD::TRAMPOLINE, dl, + DAG.getVTList(TLI.getPointerTy(), MVT::Other), + Ops, 6); + + setValue(&I, Tmp); + DAG.setRoot(Tmp.getValue(1)); + return 0; + } + + case Intrinsic::gcroot: + if (GFI) { + Value *Alloca = I.getOperand(1); + Constant *TypeMap = cast(I.getOperand(2)); + + FrameIndexSDNode *FI = cast(getValue(Alloca).getNode()); + GFI->addStackRoot(FI->getIndex(), TypeMap); + } + return 0; + + case Intrinsic::gcread: + case Intrinsic::gcwrite: + assert(0 && "GC failed to lower gcread/gcwrite intrinsics!"); + return 0; + + case Intrinsic::flt_rounds: { + setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, dl, MVT::i32)); + return 0; + } + + case Intrinsic::trap: { + DAG.setRoot(DAG.getNode(ISD::TRAP, dl,MVT::Other, getRoot())); + return 0; + } + + case Intrinsic::uadd_with_overflow: + return implVisitAluOverflow(I, ISD::UADDO); + case Intrinsic::sadd_with_overflow: + return implVisitAluOverflow(I, ISD::SADDO); + case Intrinsic::usub_with_overflow: + return implVisitAluOverflow(I, ISD::USUBO); + case Intrinsic::ssub_with_overflow: + return implVisitAluOverflow(I, ISD::SSUBO); + case Intrinsic::umul_with_overflow: + return implVisitAluOverflow(I, ISD::UMULO); + case Intrinsic::smul_with_overflow: + return implVisitAluOverflow(I, ISD::SMULO); + + case Intrinsic::prefetch: { + SDValue Ops[4]; + Ops[0] = getRoot(); + Ops[1] = getValue(I.getOperand(1)); + Ops[2] = getValue(I.getOperand(2)); + Ops[3] = getValue(I.getOperand(3)); + DAG.setRoot(DAG.getNode(ISD::PREFETCH, dl, MVT::Other, &Ops[0], 4)); + return 0; + } + + case Intrinsic::memory_barrier: { + SDValue Ops[6]; + Ops[0] = getRoot(); + for (int x = 1; x < 6; ++x) + Ops[x] = getValue(I.getOperand(x)); + + DAG.setRoot(DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, &Ops[0], 6)); + return 0; + } + case Intrinsic::atomic_cmp_swap: { + SDValue Root = getRoot(); + SDValue L = + DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, getCurDebugLoc(), + getValue(I.getOperand(2)).getValueType().getSimpleVT(), + Root, + getValue(I.getOperand(1)), + getValue(I.getOperand(2)), + getValue(I.getOperand(3)), + I.getOperand(1)); + setValue(&I, L); + DAG.setRoot(L.getValue(1)); + return 0; + } + case Intrinsic::atomic_load_add: + return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_ADD); + case Intrinsic::atomic_load_sub: + return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_SUB); + case Intrinsic::atomic_load_or: + return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_OR); + case Intrinsic::atomic_load_xor: + return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_XOR); + case Intrinsic::atomic_load_and: + return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_AND); + case Intrinsic::atomic_load_nand: + return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_NAND); + case Intrinsic::atomic_load_max: + return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_MAX); + case Intrinsic::atomic_load_min: + return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_MIN); + case Intrinsic::atomic_load_umin: + return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_UMIN); + case Intrinsic::atomic_load_umax: + return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_UMAX); + case Intrinsic::atomic_swap: + return implVisitBinaryAtomic(I, ISD::ATOMIC_SWAP); + } +} + + +void SelectionDAGLowering::LowerCallTo(CallSite CS, SDValue Callee, + bool IsTailCall, + MachineBasicBlock *LandingPad) { + const PointerType *PT = cast(CS.getCalledValue()->getType()); + const FunctionType *FTy = cast(PT->getElementType()); + MachineModuleInfo *MMI = DAG.getMachineModuleInfo(); + unsigned BeginLabel = 0, EndLabel = 0; + + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Args.reserve(CS.arg_size()); + for (CallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); + i != e; ++i) { + SDValue ArgNode = getValue(*i); + Entry.Node = ArgNode; Entry.Ty = (*i)->getType(); + + unsigned attrInd = i - CS.arg_begin() + 1; + Entry.isSExt = CS.paramHasAttr(attrInd, Attribute::SExt); + Entry.isZExt = CS.paramHasAttr(attrInd, Attribute::ZExt); + Entry.isInReg = CS.paramHasAttr(attrInd, Attribute::InReg); + Entry.isSRet = CS.paramHasAttr(attrInd, Attribute::StructRet); + Entry.isNest = CS.paramHasAttr(attrInd, Attribute::Nest); + Entry.isByVal = CS.paramHasAttr(attrInd, Attribute::ByVal); + Entry.Alignment = CS.getParamAlignment(attrInd); + Args.push_back(Entry); + } + + if (LandingPad && MMI) { + // Insert a label before the invoke call to mark the try range. This can be + // used to detect deletion of the invoke via the MachineModuleInfo. + BeginLabel = MMI->NextLabelID(); + // Both PendingLoads and PendingExports must be flushed here; + // this call might not return. + (void)getRoot(); + DAG.setRoot(DAG.getLabel(ISD::EH_LABEL, getCurDebugLoc(), + getControlRoot(), BeginLabel)); + } + + std::pair Result = + TLI.LowerCallTo(getRoot(), CS.getType(), + CS.paramHasAttr(0, Attribute::SExt), + CS.paramHasAttr(0, Attribute::ZExt), FTy->isVarArg(), + CS.paramHasAttr(0, Attribute::InReg), + CS.getCallingConv(), + IsTailCall && PerformTailCallOpt, + Callee, Args, DAG, getCurDebugLoc()); + if (CS.getType() != Type::VoidTy) + setValue(CS.getInstruction(), Result.first); + DAG.setRoot(Result.second); + + if (LandingPad && MMI) { + // Insert a label at the end of the invoke call to mark the try range. This + // can be used to detect deletion of the invoke via the MachineModuleInfo. + EndLabel = MMI->NextLabelID(); + DAG.setRoot(DAG.getLabel(ISD::EH_LABEL, getCurDebugLoc(), + getRoot(), EndLabel)); + + // Inform MachineModuleInfo of range. + MMI->addInvoke(LandingPad, BeginLabel, EndLabel); + } +} + + +void SelectionDAGLowering::visitCall(CallInst &I) { + const char *RenameFn = 0; + if (Function *F = I.getCalledFunction()) { + if (F->isDeclaration()) { + const TargetIntrinsicInfo *II = TLI.getTargetMachine().getIntrinsicInfo(); + if (II) { + if (unsigned IID = II->getIntrinsicID(F)) { + RenameFn = visitIntrinsicCall(I, IID); + if (!RenameFn) + return; + } + } + if (unsigned IID = F->getIntrinsicID()) { + RenameFn = visitIntrinsicCall(I, IID); + if (!RenameFn) + return; + } + } + + // Check for well-known libc/libm calls. If the function is internal, it + // can't be a library call. + unsigned NameLen = F->getNameLen(); + if (!F->hasLocalLinkage() && NameLen) { + const char *NameStr = F->getNameStart(); + if (NameStr[0] == 'c' && + ((NameLen == 8 && !strcmp(NameStr, "copysign")) || + (NameLen == 9 && !strcmp(NameStr, "copysignf")))) { + if (I.getNumOperands() == 3 && // Basic sanity checks. + I.getOperand(1)->getType()->isFloatingPoint() && + I.getType() == I.getOperand(1)->getType() && + I.getType() == I.getOperand(2)->getType()) { + SDValue LHS = getValue(I.getOperand(1)); + SDValue RHS = getValue(I.getOperand(2)); + setValue(&I, DAG.getNode(ISD::FCOPYSIGN, getCurDebugLoc(), + LHS.getValueType(), LHS, RHS)); + return; + } + } else if (NameStr[0] == 'f' && + ((NameLen == 4 && !strcmp(NameStr, "fabs")) || + (NameLen == 5 && !strcmp(NameStr, "fabsf")) || + (NameLen == 5 && !strcmp(NameStr, "fabsl")))) { + if (I.getNumOperands() == 2 && // Basic sanity checks. + I.getOperand(1)->getType()->isFloatingPoint() && + I.getType() == I.getOperand(1)->getType()) { + SDValue Tmp = getValue(I.getOperand(1)); + setValue(&I, DAG.getNode(ISD::FABS, getCurDebugLoc(), + Tmp.getValueType(), Tmp)); + return; + } + } else if (NameStr[0] == 's' && + ((NameLen == 3 && !strcmp(NameStr, "sin")) || + (NameLen == 4 && !strcmp(NameStr, "sinf")) || + (NameLen == 4 && !strcmp(NameStr, "sinl")))) { + if (I.getNumOperands() == 2 && // Basic sanity checks. + I.getOperand(1)->getType()->isFloatingPoint() && + I.getType() == I.getOperand(1)->getType()) { + SDValue Tmp = getValue(I.getOperand(1)); + setValue(&I, DAG.getNode(ISD::FSIN, getCurDebugLoc(), + Tmp.getValueType(), Tmp)); + return; + } + } else if (NameStr[0] == 'c' && + ((NameLen == 3 && !strcmp(NameStr, "cos")) || + (NameLen == 4 && !strcmp(NameStr, "cosf")) || + (NameLen == 4 && !strcmp(NameStr, "cosl")))) { + if (I.getNumOperands() == 2 && // Basic sanity checks. + I.getOperand(1)->getType()->isFloatingPoint() && + I.getType() == I.getOperand(1)->getType()) { + SDValue Tmp = getValue(I.getOperand(1)); + setValue(&I, DAG.getNode(ISD::FCOS, getCurDebugLoc(), + Tmp.getValueType(), Tmp)); + return; + } + } + } + } else if (isa(I.getOperand(0))) { + visitInlineAsm(&I); + return; + } + + SDValue Callee; + if (!RenameFn) + Callee = getValue(I.getOperand(0)); + else + Callee = DAG.getExternalSymbol(RenameFn, TLI.getPointerTy()); + + LowerCallTo(&I, Callee, I.isTailCall()); +} + + +/// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from +/// this value and returns the result as a ValueVT value. This uses +/// Chain/Flag as the input and updates them for the output Chain/Flag. +/// If the Flag pointer is NULL, no flag is used. +SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, DebugLoc dl, + SDValue &Chain, + SDValue *Flag) const { + // Assemble the legal parts into the final values. + SmallVector Values(ValueVTs.size()); + SmallVector Parts; + for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) { + // Copy the legal parts from the registers. + MVT ValueVT = ValueVTs[Value]; + unsigned NumRegs = TLI->getNumRegisters(ValueVT); + MVT RegisterVT = RegVTs[Value]; + + Parts.resize(NumRegs); + for (unsigned i = 0; i != NumRegs; ++i) { + SDValue P; + if (Flag == 0) + P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT); + else { + P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT, *Flag); + *Flag = P.getValue(2); + } + Chain = P.getValue(1); + + // If the source register was virtual and if we know something about it, + // add an assert node. + if (TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) && + RegisterVT.isInteger() && !RegisterVT.isVector()) { + unsigned SlotNo = Regs[Part+i]-TargetRegisterInfo::FirstVirtualRegister; + FunctionLoweringInfo &FLI = DAG.getFunctionLoweringInfo(); + if (FLI.LiveOutRegInfo.size() > SlotNo) { + FunctionLoweringInfo::LiveOutInfo &LOI = FLI.LiveOutRegInfo[SlotNo]; + + unsigned RegSize = RegisterVT.getSizeInBits(); + unsigned NumSignBits = LOI.NumSignBits; + unsigned NumZeroBits = LOI.KnownZero.countLeadingOnes(); + + // FIXME: We capture more information than the dag can represent. For + // now, just use the tightest assertzext/assertsext possible. + bool isSExt = true; + MVT FromVT(MVT::Other); + if (NumSignBits == RegSize) + isSExt = true, FromVT = MVT::i1; // ASSERT SEXT 1 + else if (NumZeroBits >= RegSize-1) + isSExt = false, FromVT = MVT::i1; // ASSERT ZEXT 1 + else if (NumSignBits > RegSize-8) + isSExt = true, FromVT = MVT::i8; // ASSERT SEXT 8 + else if (NumZeroBits >= RegSize-8) + isSExt = false, FromVT = MVT::i8; // ASSERT ZEXT 8 + else if (NumSignBits > RegSize-16) + isSExt = true, FromVT = MVT::i16; // ASSERT SEXT 16 + else if (NumZeroBits >= RegSize-16) + isSExt = false, FromVT = MVT::i16; // ASSERT ZEXT 16 + else if (NumSignBits > RegSize-32) + isSExt = true, FromVT = MVT::i32; // ASSERT SEXT 32 + else if (NumZeroBits >= RegSize-32) + isSExt = false, FromVT = MVT::i32; // ASSERT ZEXT 32 + + if (FromVT != MVT::Other) { + P = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl, + RegisterVT, P, DAG.getValueType(FromVT)); + + } + } + } + + Parts[i] = P; + } + + Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(), + NumRegs, RegisterVT, ValueVT); + Part += NumRegs; + Parts.clear(); + } + + return DAG.getNode(ISD::MERGE_VALUES, dl, + DAG.getVTList(&ValueVTs[0], ValueVTs.size()), + &Values[0], ValueVTs.size()); +} + +/// getCopyToRegs - Emit a series of CopyToReg nodes that copies the +/// specified value into the registers specified by this object. This uses +/// Chain/Flag as the input and updates them for the output Chain/Flag. +/// If the Flag pointer is NULL, no flag is used. +void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl, + SDValue &Chain, SDValue *Flag) const { + // Get the list of the values's legal parts. + unsigned NumRegs = Regs.size(); + SmallVector Parts(NumRegs); + for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) { + MVT ValueVT = ValueVTs[Value]; + unsigned NumParts = TLI->getNumRegisters(ValueVT); + MVT RegisterVT = RegVTs[Value]; + + getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value), + &Parts[Part], NumParts, RegisterVT); + Part += NumParts; + } + + // Copy the parts into the registers. + SmallVector Chains(NumRegs); + for (unsigned i = 0; i != NumRegs; ++i) { + SDValue Part; + if (Flag == 0) + Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i]); + else { + Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i], *Flag); + *Flag = Part.getValue(1); + } + Chains[i] = Part.getValue(0); + } + + if (NumRegs == 1 || Flag) + // If NumRegs > 1 && Flag is used then the use of the last CopyToReg is + // flagged to it. That is the CopyToReg nodes and the user are considered + // a single scheduling unit. If we create a TokenFactor and return it as + // chain, then the TokenFactor is both a predecessor (operand) of the + // user as well as a successor (the TF operands are flagged to the user). + // c1, f1 = CopyToReg + // c2, f2 = CopyToReg + // c3 = TokenFactor c1, c2 + // ... + // = op c3, ..., f2 + Chain = Chains[NumRegs-1]; + else + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Chains[0], NumRegs); +} + +/// AddInlineAsmOperands - Add this value to the specified inlineasm node +/// operand list. This adds the code marker and includes the number of +/// values added into it. +void RegsForValue::AddInlineAsmOperands(unsigned Code, + bool HasMatching,unsigned MatchingIdx, + SelectionDAG &DAG, + std::vector &Ops) const { + MVT IntPtrTy = DAG.getTargetLoweringInfo().getPointerTy(); + assert(Regs.size() < (1 << 13) && "Too many inline asm outputs!"); + unsigned Flag = Code | (Regs.size() << 3); + if (HasMatching) + Flag |= 0x80000000 | (MatchingIdx << 16); + Ops.push_back(DAG.getTargetConstant(Flag, IntPtrTy)); + for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) { + unsigned NumRegs = TLI->getNumRegisters(ValueVTs[Value]); + MVT RegisterVT = RegVTs[Value]; + for (unsigned i = 0; i != NumRegs; ++i) { + assert(Reg < Regs.size() && "Mismatch in # registers expected"); + Ops.push_back(DAG.getRegister(Regs[Reg++], RegisterVT)); + } + } +} + +/// isAllocatableRegister - If the specified register is safe to allocate, +/// i.e. it isn't a stack pointer or some other special register, return the +/// register class for the register. Otherwise, return null. +static const TargetRegisterClass * +isAllocatableRegister(unsigned Reg, MachineFunction &MF, + const TargetLowering &TLI, + const TargetRegisterInfo *TRI) { + MVT FoundVT = MVT::Other; + const TargetRegisterClass *FoundRC = 0; + for (TargetRegisterInfo::regclass_iterator RCI = TRI->regclass_begin(), + E = TRI->regclass_end(); RCI != E; ++RCI) { + MVT ThisVT = MVT::Other; + + const TargetRegisterClass *RC = *RCI; + // If none of the the value types for this register class are valid, we + // can't use it. For example, 64-bit reg classes on 32-bit targets. + for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end(); + I != E; ++I) { + if (TLI.isTypeLegal(*I)) { + // If we have already found this register in a different register class, + // choose the one with the largest VT specified. For example, on + // PowerPC, we favor f64 register classes over f32. + if (FoundVT == MVT::Other || FoundVT.bitsLT(*I)) { + ThisVT = *I; + break; + } + } + } + + if (ThisVT == MVT::Other) continue; + + // NOTE: This isn't ideal. In particular, this might allocate the + // frame pointer in functions that need it (due to them not being taken + // out of allocation, because a variable sized allocation hasn't been seen + // yet). This is a slight code pessimization, but should still work. + for (TargetRegisterClass::iterator I = RC->allocation_order_begin(MF), + E = RC->allocation_order_end(MF); I != E; ++I) + if (*I == Reg) { + // We found a matching register class. Keep looking at others in case + // we find one with larger registers that this physreg is also in. + FoundRC = RC; + FoundVT = ThisVT; + break; + } + } + return FoundRC; +} + + +namespace llvm { +/// AsmOperandInfo - This contains information for each constraint that we are +/// lowering. +class VISIBILITY_HIDDEN SDISelAsmOperandInfo : + public TargetLowering::AsmOperandInfo { +public: + /// CallOperand - If this is the result output operand or a clobber + /// this is null, otherwise it is the incoming operand to the CallInst. + /// This gets modified as the asm is processed. + SDValue CallOperand; + + /// AssignedRegs - If this is a register or register class operand, this + /// contains the set of register corresponding to the operand. + RegsForValue AssignedRegs; + + explicit SDISelAsmOperandInfo(const InlineAsm::ConstraintInfo &info) + : TargetLowering::AsmOperandInfo(info), CallOperand(0,0) { + } + + /// MarkAllocatedRegs - Once AssignedRegs is set, mark the assigned registers + /// busy in OutputRegs/InputRegs. + void MarkAllocatedRegs(bool isOutReg, bool isInReg, + std::set &OutputRegs, + std::set &InputRegs, + const TargetRegisterInfo &TRI) const { + if (isOutReg) { + for (unsigned i = 0, e = AssignedRegs.Regs.size(); i != e; ++i) + MarkRegAndAliases(AssignedRegs.Regs[i], OutputRegs, TRI); + } + if (isInReg) { + for (unsigned i = 0, e = AssignedRegs.Regs.size(); i != e; ++i) + MarkRegAndAliases(AssignedRegs.Regs[i], InputRegs, TRI); + } + } + + /// getCallOperandValMVT - Return the MVT of the Value* that this operand + /// corresponds to. If there is no Value* for this operand, it returns + /// MVT::Other. + MVT getCallOperandValMVT(const TargetLowering &TLI, + const TargetData *TD) const { + if (CallOperandVal == 0) return MVT::Other; + + if (isa(CallOperandVal)) + return TLI.getPointerTy(); + + const llvm::Type *OpTy = CallOperandVal->getType(); + + // If this is an indirect operand, the operand is a pointer to the + // accessed type. + if (isIndirect) + OpTy = cast(OpTy)->getElementType(); + + // If OpTy is not a single value, it may be a struct/union that we + // can tile with integers. + if (!OpTy->isSingleValueType() && OpTy->isSized()) { + unsigned BitSize = TD->getTypeSizeInBits(OpTy); + switch (BitSize) { + default: break; + case 1: + case 8: + case 16: + case 32: + case 64: + case 128: + OpTy = IntegerType::get(BitSize); + break; + } + } + + return TLI.getValueType(OpTy, true); + } + +private: + /// MarkRegAndAliases - Mark the specified register and all aliases in the + /// specified set. + static void MarkRegAndAliases(unsigned Reg, std::set &Regs, + const TargetRegisterInfo &TRI) { + assert(TargetRegisterInfo::isPhysicalRegister(Reg) && "Isn't a physreg"); + Regs.insert(Reg); + if (const unsigned *Aliases = TRI.getAliasSet(Reg)) + for (; *Aliases; ++Aliases) + Regs.insert(*Aliases); + } +}; +} // end llvm namespace. + + +/// GetRegistersForValue - Assign registers (virtual or physical) for the +/// specified operand. We prefer to assign virtual registers, to allow the +/// register allocator handle the assignment process. However, if the asm uses +/// features that we can't model on machineinstrs, we have SDISel do the +/// allocation. This produces generally horrible, but correct, code. +/// +/// OpInfo describes the operand. +/// Input and OutputRegs are the set of already allocated physical registers. +/// +void SelectionDAGLowering:: +GetRegistersForValue(SDISelAsmOperandInfo &OpInfo, + std::set &OutputRegs, + std::set &InputRegs) { + // Compute whether this value requires an input register, an output register, + // or both. + bool isOutReg = false; + bool isInReg = false; + switch (OpInfo.Type) { + case InlineAsm::isOutput: + isOutReg = true; + + // If there is an input constraint that matches this, we need to reserve + // the input register so no other inputs allocate to it. + isInReg = OpInfo.hasMatchingInput(); + break; + case InlineAsm::isInput: + isInReg = true; + isOutReg = false; + break; + case InlineAsm::isClobber: + isOutReg = true; + isInReg = true; + break; + } + + + MachineFunction &MF = DAG.getMachineFunction(); + SmallVector Regs; + + // If this is a constraint for a single physreg, or a constraint for a + // register class, find it. + std::pair PhysReg = + TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode, + OpInfo.ConstraintVT); + + unsigned NumRegs = 1; + if (OpInfo.ConstraintVT != MVT::Other) { + // If this is a FP input in an integer register (or visa versa) insert a bit + // cast of the input value. More generally, handle any case where the input + // value disagrees with the register class we plan to stick this in. + if (OpInfo.Type == InlineAsm::isInput && + PhysReg.second && !PhysReg.second->hasType(OpInfo.ConstraintVT)) { + // Try to convert to the first MVT that the reg class contains. If the + // types are identical size, use a bitcast to convert (e.g. two differing + // vector types). + MVT RegVT = *PhysReg.second->vt_begin(); + if (RegVT.getSizeInBits() == OpInfo.ConstraintVT.getSizeInBits()) { + OpInfo.CallOperand = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(), + RegVT, OpInfo.CallOperand); + OpInfo.ConstraintVT = RegVT; + } else if (RegVT.isInteger() && OpInfo.ConstraintVT.isFloatingPoint()) { + // If the input is a FP value and we want it in FP registers, do a + // bitcast to the corresponding integer type. This turns an f64 value + // into i64, which can be passed with two i32 values on a 32-bit + // machine. + RegVT = MVT::getIntegerVT(OpInfo.ConstraintVT.getSizeInBits()); + OpInfo.CallOperand = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(), + RegVT, OpInfo.CallOperand); + OpInfo.ConstraintVT = RegVT; + } + } + + NumRegs = TLI.getNumRegisters(OpInfo.ConstraintVT); + } + + MVT RegVT; + MVT ValueVT = OpInfo.ConstraintVT; + + // If this is a constraint for a specific physical register, like {r17}, + // assign it now. + if (unsigned AssignedReg = PhysReg.first) { + const TargetRegisterClass *RC = PhysReg.second; + if (OpInfo.ConstraintVT == MVT::Other) + ValueVT = *RC->vt_begin(); + + // Get the actual register value type. This is important, because the user + // may have asked for (e.g.) the AX register in i32 type. We need to + // remember that AX is actually i16 to get the right extension. + RegVT = *RC->vt_begin(); + + // This is a explicit reference to a physical register. + Regs.push_back(AssignedReg); + + // If this is an expanded reference, add the rest of the regs to Regs. + if (NumRegs != 1) { + TargetRegisterClass::iterator I = RC->begin(); + for (; *I != AssignedReg; ++I) + assert(I != RC->end() && "Didn't find reg!"); + + // Already added the first reg. + --NumRegs; ++I; + for (; NumRegs; --NumRegs, ++I) { + assert(I != RC->end() && "Ran out of registers to allocate!"); + Regs.push_back(*I); + } + } + OpInfo.AssignedRegs = RegsForValue(TLI, Regs, RegVT, ValueVT); + const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo(); + OpInfo.MarkAllocatedRegs(isOutReg, isInReg, OutputRegs, InputRegs, *TRI); + return; + } + + // Otherwise, if this was a reference to an LLVM register class, create vregs + // for this reference. + if (const TargetRegisterClass *RC = PhysReg.second) { + RegVT = *RC->vt_begin(); + if (OpInfo.ConstraintVT == MVT::Other) + ValueVT = RegVT; + + // Create the appropriate number of virtual registers. + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + for (; NumRegs; --NumRegs) + Regs.push_back(RegInfo.createVirtualRegister(RC)); + + OpInfo.AssignedRegs = RegsForValue(TLI, Regs, RegVT, ValueVT); + return; + } + + // This is a reference to a register class that doesn't directly correspond + // to an LLVM register class. Allocate NumRegs consecutive, available, + // registers from the class. + std::vector RegClassRegs + = TLI.getRegClassForInlineAsmConstraint(OpInfo.ConstraintCode, + OpInfo.ConstraintVT); + + const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo(); + unsigned NumAllocated = 0; + for (unsigned i = 0, e = RegClassRegs.size(); i != e; ++i) { + unsigned Reg = RegClassRegs[i]; + // See if this register is available. + if ((isOutReg && OutputRegs.count(Reg)) || // Already used. + (isInReg && InputRegs.count(Reg))) { // Already used. + // Make sure we find consecutive registers. + NumAllocated = 0; + continue; + } + + // Check to see if this register is allocatable (i.e. don't give out the + // stack pointer). + const TargetRegisterClass *RC = isAllocatableRegister(Reg, MF, TLI, TRI); + if (!RC) { // Couldn't allocate this register. + // Reset NumAllocated to make sure we return consecutive registers. + NumAllocated = 0; + continue; + } + + // Okay, this register is good, we can use it. + ++NumAllocated; + + // If we allocated enough consecutive registers, succeed. + if (NumAllocated == NumRegs) { + unsigned RegStart = (i-NumAllocated)+1; + unsigned RegEnd = i+1; + // Mark all of the allocated registers used. + for (unsigned i = RegStart; i != RegEnd; ++i) + Regs.push_back(RegClassRegs[i]); + + OpInfo.AssignedRegs = RegsForValue(TLI, Regs, *RC->vt_begin(), + OpInfo.ConstraintVT); + OpInfo.MarkAllocatedRegs(isOutReg, isInReg, OutputRegs, InputRegs, *TRI); + return; + } + } + + // Otherwise, we couldn't allocate enough registers for this. +} + +/// hasInlineAsmMemConstraint - Return true if the inline asm instruction being +/// processed uses a memory 'm' constraint. +static bool +hasInlineAsmMemConstraint(std::vector &CInfos, + const TargetLowering &TLI) { + for (unsigned i = 0, e = CInfos.size(); i != e; ++i) { + InlineAsm::ConstraintInfo &CI = CInfos[i]; + for (unsigned j = 0, ee = CI.Codes.size(); j != ee; ++j) { + TargetLowering::ConstraintType CType = TLI.getConstraintType(CI.Codes[j]); + if (CType == TargetLowering::C_Memory) + return true; + } + + // Indirect operand accesses access memory. + if (CI.isIndirect) + return true; + } + + return false; +} + +/// visitInlineAsm - Handle a call to an InlineAsm object. +/// +void SelectionDAGLowering::visitInlineAsm(CallSite CS) { + InlineAsm *IA = cast(CS.getCalledValue()); + + /// ConstraintOperands - Information about all of the constraints. + std::vector ConstraintOperands; + + std::set OutputRegs, InputRegs; + + // Do a prepass over the constraints, canonicalizing them, and building up the + // ConstraintOperands list. + std::vector + ConstraintInfos = IA->ParseConstraints(); + + bool hasMemory = hasInlineAsmMemConstraint(ConstraintInfos, TLI); + + SDValue Chain, Flag; + + // We won't need to flush pending loads if this asm doesn't touch + // memory and is nonvolatile. + if (hasMemory || IA->hasSideEffects()) + Chain = getRoot(); + else + Chain = DAG.getRoot(); + + unsigned ArgNo = 0; // ArgNo - The argument of the CallInst. + unsigned ResNo = 0; // ResNo - The result number of the next output. + for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) { + ConstraintOperands.push_back(SDISelAsmOperandInfo(ConstraintInfos[i])); + SDISelAsmOperandInfo &OpInfo = ConstraintOperands.back(); + + MVT OpVT = MVT::Other; + + // Compute the value type for each operand. + switch (OpInfo.Type) { + case InlineAsm::isOutput: + // Indirect outputs just consume an argument. + if (OpInfo.isIndirect) { + OpInfo.CallOperandVal = CS.getArgument(ArgNo++); + break; + } + + // The return value of the call is this value. As such, there is no + // corresponding argument. + assert(CS.getType() != Type::VoidTy && "Bad inline asm!"); + if (const StructType *STy = dyn_cast(CS.getType())) { + OpVT = TLI.getValueType(STy->getElementType(ResNo)); + } else { + assert(ResNo == 0 && "Asm only has one result!"); + OpVT = TLI.getValueType(CS.getType()); + } + ++ResNo; + break; + case InlineAsm::isInput: + OpInfo.CallOperandVal = CS.getArgument(ArgNo++); + break; + case InlineAsm::isClobber: + // Nothing to do. + break; + } + + // If this is an input or an indirect output, process the call argument. + // BasicBlocks are labels, currently appearing only in asm's. + if (OpInfo.CallOperandVal) { + if (BasicBlock *BB = dyn_cast(OpInfo.CallOperandVal)) { + OpInfo.CallOperand = DAG.getBasicBlock(FuncInfo.MBBMap[BB]); + } else { + OpInfo.CallOperand = getValue(OpInfo.CallOperandVal); + } + + OpVT = OpInfo.getCallOperandValMVT(TLI, TD); + } + + OpInfo.ConstraintVT = OpVT; + } + + // Second pass over the constraints: compute which constraint option to use + // and assign registers to constraints that want a specific physreg. + for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) { + SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i]; + + // If this is an output operand with a matching input operand, look up the + // matching input. If their types mismatch, e.g. one is an integer, the + // other is floating point, or their sizes are different, flag it as an + // error. + if (OpInfo.hasMatchingInput()) { + SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput]; + if (OpInfo.ConstraintVT != Input.ConstraintVT) { + if ((OpInfo.ConstraintVT.isInteger() != + Input.ConstraintVT.isInteger()) || + (OpInfo.ConstraintVT.getSizeInBits() != + Input.ConstraintVT.getSizeInBits())) { + cerr << "llvm: error: Unsupported asm: input constraint with a " + << "matching output constraint of incompatible type!\n"; + exit(1); + } + Input.ConstraintVT = OpInfo.ConstraintVT; + } + } + + // Compute the constraint code and ConstraintType to use. + TLI.ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, hasMemory, &DAG); + + // If this is a memory input, and if the operand is not indirect, do what we + // need to to provide an address for the memory input. + if (OpInfo.ConstraintType == TargetLowering::C_Memory && + !OpInfo.isIndirect) { + assert(OpInfo.Type == InlineAsm::isInput && + "Can only indirectify direct input operands!"); + + // Memory operands really want the address of the value. If we don't have + // an indirect input, put it in the constpool if we can, otherwise spill + // it to a stack slot. + + // If the operand is a float, integer, or vector constant, spill to a + // constant pool entry to get its address. + Value *OpVal = OpInfo.CallOperandVal; + if (isa(OpVal) || isa(OpVal) || + isa(OpVal)) { + OpInfo.CallOperand = DAG.getConstantPool(cast(OpVal), + TLI.getPointerTy()); + } else { + // Otherwise, create a stack slot and emit a store to it before the + // asm. + const Type *Ty = OpVal->getType(); + uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty); + unsigned Align = TLI.getTargetData()->getPrefTypeAlignment(Ty); + MachineFunction &MF = DAG.getMachineFunction(); + int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align); + SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy()); + Chain = DAG.getStore(Chain, getCurDebugLoc(), + OpInfo.CallOperand, StackSlot, NULL, 0); + OpInfo.CallOperand = StackSlot; + } + + // There is no longer a Value* corresponding to this operand. + OpInfo.CallOperandVal = 0; + // It is now an indirect operand. + OpInfo.isIndirect = true; + } + + // If this constraint is for a specific register, allocate it before + // anything else. + if (OpInfo.ConstraintType == TargetLowering::C_Register) + GetRegistersForValue(OpInfo, OutputRegs, InputRegs); + } + ConstraintInfos.clear(); + + + // Second pass - Loop over all of the operands, assigning virtual or physregs + // to register class operands. + for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) { + SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i]; + + // C_Register operands have already been allocated, Other/Memory don't need + // to be. + if (OpInfo.ConstraintType == TargetLowering::C_RegisterClass) + GetRegistersForValue(OpInfo, OutputRegs, InputRegs); + } + + // AsmNodeOperands - The operands for the ISD::INLINEASM node. + std::vector AsmNodeOperands; + AsmNodeOperands.push_back(SDValue()); // reserve space for input chain + AsmNodeOperands.push_back( + DAG.getTargetExternalSymbol(IA->getAsmString().c_str(), MVT::Other)); + + + // Loop over all of the inputs, copying the operand values into the + // appropriate registers and processing the output regs. + RegsForValue RetValRegs; + + // IndirectStoresToEmit - The set of stores to emit after the inline asm node. + std::vector > IndirectStoresToEmit; + + for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) { + SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i]; + + switch (OpInfo.Type) { + case InlineAsm::isOutput: { + if (OpInfo.ConstraintType != TargetLowering::C_RegisterClass && + OpInfo.ConstraintType != TargetLowering::C_Register) { + // Memory output, or 'other' output (e.g. 'X' constraint). + assert(OpInfo.isIndirect && "Memory output must be indirect operand"); + + // Add information to the INLINEASM node to know about this output. + unsigned ResOpType = 4/*MEM*/ | (1<<3); + AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType, + TLI.getPointerTy())); + AsmNodeOperands.push_back(OpInfo.CallOperand); + break; + } + + // Otherwise, this is a register or register class output. + + // Copy the output from the appropriate register. Find a register that + // we can use. + if (OpInfo.AssignedRegs.Regs.empty()) { + cerr << "llvm: error: Couldn't allocate output reg for constraint '" + << OpInfo.ConstraintCode << "'!\n"; + exit(1); + } + + // If this is an indirect operand, store through the pointer after the + // asm. + if (OpInfo.isIndirect) { + IndirectStoresToEmit.push_back(std::make_pair(OpInfo.AssignedRegs, + OpInfo.CallOperandVal)); + } else { + // This is the result value of the call. + assert(CS.getType() != Type::VoidTy && "Bad inline asm!"); + // Concatenate this output onto the outputs list. + RetValRegs.append(OpInfo.AssignedRegs); + } + + // Add information to the INLINEASM node to know that this register is + // set. + OpInfo.AssignedRegs.AddInlineAsmOperands(OpInfo.isEarlyClobber ? + 6 /* EARLYCLOBBER REGDEF */ : + 2 /* REGDEF */ , + false, + 0, + DAG, AsmNodeOperands); + break; + } + case InlineAsm::isInput: { + SDValue InOperandVal = OpInfo.CallOperand; + + if (OpInfo.isMatchingInputConstraint()) { // Matching constraint? + // If this is required to match an output register we have already set, + // just use its register. + unsigned OperandNo = OpInfo.getMatchedOperand(); + + // Scan until we find the definition we already emitted of this operand. + // When we find it, create a RegsForValue operand. + unsigned CurOp = 2; // The first operand. + for (; OperandNo; --OperandNo) { + // Advance to the next operand. + unsigned OpFlag = + cast(AsmNodeOperands[CurOp])->getZExtValue(); + assert(((OpFlag & 7) == 2 /*REGDEF*/ || + (OpFlag & 7) == 6 /*EARLYCLOBBER REGDEF*/ || + (OpFlag & 7) == 4 /*MEM*/) && + "Skipped past definitions?"); + CurOp += InlineAsm::getNumOperandRegisters(OpFlag)+1; + } + + unsigned OpFlag = + cast(AsmNodeOperands[CurOp])->getZExtValue(); + if ((OpFlag & 7) == 2 /*REGDEF*/ + || (OpFlag & 7) == 6 /* EARLYCLOBBER REGDEF */) { + // Add (OpFlag&0xffff)>>3 registers to MatchedRegs. + assert(!OpInfo.isIndirect && + "Don't know how to handle tied indirect register inputs yet!"); + RegsForValue MatchedRegs; + MatchedRegs.TLI = &TLI; + MatchedRegs.ValueVTs.push_back(InOperandVal.getValueType()); + MVT RegVT = AsmNodeOperands[CurOp+1].getValueType(); + MatchedRegs.RegVTs.push_back(RegVT); + MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo(); + for (unsigned i = 0, e = InlineAsm::getNumOperandRegisters(OpFlag); + i != e; ++i) + MatchedRegs.Regs. + push_back(RegInfo.createVirtualRegister(TLI.getRegClassFor(RegVT))); + + // Use the produced MatchedRegs object to + MatchedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(), + Chain, &Flag); + MatchedRegs.AddInlineAsmOperands(1 /*REGUSE*/, + true, OpInfo.getMatchedOperand(), + DAG, AsmNodeOperands); + break; + } else { + assert(((OpFlag & 7) == 4) && "Unknown matching constraint!"); + assert((InlineAsm::getNumOperandRegisters(OpFlag)) == 1 && + "Unexpected number of operands"); + // Add information to the INLINEASM node to know about this input. + // See InlineAsm.h isUseOperandTiedToDef. + OpFlag |= 0x80000000 | (OpInfo.getMatchedOperand() << 16); + AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlag, + TLI.getPointerTy())); + AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]); + break; + } + } + + if (OpInfo.ConstraintType == TargetLowering::C_Other) { + assert(!OpInfo.isIndirect && + "Don't know how to handle indirect other inputs yet!"); + + std::vector Ops; + TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode[0], + hasMemory, Ops, DAG); + if (Ops.empty()) { + cerr << "llvm: error: Invalid operand for inline asm constraint '" + << OpInfo.ConstraintCode << "'!\n"; + exit(1); + } + + // Add information to the INLINEASM node to know about this input. + unsigned ResOpType = 3 /*IMM*/ | (Ops.size() << 3); + AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType, + TLI.getPointerTy())); + AsmNodeOperands.insert(AsmNodeOperands.end(), Ops.begin(), Ops.end()); + break; + } else if (OpInfo.ConstraintType == TargetLowering::C_Memory) { + assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!"); + assert(InOperandVal.getValueType() == TLI.getPointerTy() && + "Memory operands expect pointer values"); + + // Add information to the INLINEASM node to know about this input. + unsigned ResOpType = 4/*MEM*/ | (1<<3); + AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType, + TLI.getPointerTy())); + AsmNodeOperands.push_back(InOperandVal); + break; + } + + assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass || + OpInfo.ConstraintType == TargetLowering::C_Register) && + "Unknown constraint type!"); + assert(!OpInfo.isIndirect && + "Don't know how to handle indirect register inputs yet!"); + + // Copy the input into the appropriate registers. + if (OpInfo.AssignedRegs.Regs.empty()) { + cerr << "llvm: error: Couldn't allocate output reg for constraint '" + << OpInfo.ConstraintCode << "'!\n"; + exit(1); + } + + OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(), + Chain, &Flag); + + OpInfo.AssignedRegs.AddInlineAsmOperands(1/*REGUSE*/, false, 0, + DAG, AsmNodeOperands); + break; + } + case InlineAsm::isClobber: { + // Add the clobbered value to the operand list, so that the register + // allocator is aware that the physreg got clobbered. + if (!OpInfo.AssignedRegs.Regs.empty()) + OpInfo.AssignedRegs.AddInlineAsmOperands(6 /* EARLYCLOBBER REGDEF */, + false, 0, DAG,AsmNodeOperands); + break; + } + } + } + + // Finish up input operands. + AsmNodeOperands[0] = Chain; + if (Flag.getNode()) AsmNodeOperands.push_back(Flag); + + Chain = DAG.getNode(ISD::INLINEASM, getCurDebugLoc(), + DAG.getVTList(MVT::Other, MVT::Flag), + &AsmNodeOperands[0], AsmNodeOperands.size()); + Flag = Chain.getValue(1); + + // If this asm returns a register value, copy the result from that register + // and set it as the value of the call. + if (!RetValRegs.Regs.empty()) { + SDValue Val = RetValRegs.getCopyFromRegs(DAG, getCurDebugLoc(), + Chain, &Flag); + + // FIXME: Why don't we do this for inline asms with MRVs? + if (CS.getType()->isSingleValueType() && CS.getType()->isSized()) { + MVT ResultType = TLI.getValueType(CS.getType()); + + // If any of the results of the inline asm is a vector, it may have the + // wrong width/num elts. This can happen for register classes that can + // contain multiple different value types. The preg or vreg allocated may + // not have the same VT as was expected. Convert it to the right type + // with bit_convert. + if (ResultType != Val.getValueType() && Val.getValueType().isVector()) { + Val = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(), + ResultType, Val); + + } else if (ResultType != Val.getValueType() && + ResultType.isInteger() && Val.getValueType().isInteger()) { + // If a result value was tied to an input value, the computed result may + // have a wider width than the expected result. Extract the relevant + // portion. + Val = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), ResultType, Val); + } + + assert(ResultType == Val.getValueType() && "Asm result value mismatch!"); + } + + setValue(CS.getInstruction(), Val); + // Don't need to use this as a chain in this case. + if (!IA->hasSideEffects() && !hasMemory && IndirectStoresToEmit.empty()) + return; + } + + std::vector > StoresToEmit; + + // Process indirect outputs, first output all of the flagged copies out of + // physregs. + for (unsigned i = 0, e = IndirectStoresToEmit.size(); i != e; ++i) { + RegsForValue &OutRegs = IndirectStoresToEmit[i].first; + Value *Ptr = IndirectStoresToEmit[i].second; + SDValue OutVal = OutRegs.getCopyFromRegs(DAG, getCurDebugLoc(), + Chain, &Flag); + StoresToEmit.push_back(std::make_pair(OutVal, Ptr)); + + } + + // Emit the non-flagged stores from the physregs. + SmallVector OutChains; + for (unsigned i = 0, e = StoresToEmit.size(); i != e; ++i) + OutChains.push_back(DAG.getStore(Chain, getCurDebugLoc(), + StoresToEmit[i].first, + getValue(StoresToEmit[i].second), + StoresToEmit[i].second, 0)); + if (!OutChains.empty()) + Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other, + &OutChains[0], OutChains.size()); + DAG.setRoot(Chain); +} + + +void SelectionDAGLowering::visitMalloc(MallocInst &I) { + SDValue Src = getValue(I.getOperand(0)); + + // Scale up by the type size in the original i32 type width. Various + // mid-level optimizers may make assumptions about demanded bits etc from the + // i32-ness of the optimizer: we do not want to promote to i64 and then + // multiply on 64-bit targets. + // FIXME: Malloc inst should go away: PR715. + uint64_t ElementSize = TD->getTypeAllocSize(I.getType()->getElementType()); + if (ElementSize != 1) + Src = DAG.getNode(ISD::MUL, getCurDebugLoc(), Src.getValueType(), + Src, DAG.getConstant(ElementSize, Src.getValueType())); + + MVT IntPtr = TLI.getPointerTy(); + + if (IntPtr.bitsLT(Src.getValueType())) + Src = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), IntPtr, Src); + else if (IntPtr.bitsGT(Src.getValueType())) + Src = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), IntPtr, Src); + + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Node = Src; + Entry.Ty = TLI.getTargetData()->getIntPtrType(); + Args.push_back(Entry); + + std::pair Result = + TLI.LowerCallTo(getRoot(), I.getType(), false, false, false, false, + CallingConv::C, PerformTailCallOpt, + DAG.getExternalSymbol("malloc", IntPtr), + Args, DAG, getCurDebugLoc()); + setValue(&I, Result.first); // Pointers always fit in registers + DAG.setRoot(Result.second); +} + +void SelectionDAGLowering::visitFree(FreeInst &I) { + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Node = getValue(I.getOperand(0)); + Entry.Ty = TLI.getTargetData()->getIntPtrType(); + Args.push_back(Entry); + MVT IntPtr = TLI.getPointerTy(); + std::pair Result = + TLI.LowerCallTo(getRoot(), Type::VoidTy, false, false, false, false, + CallingConv::C, PerformTailCallOpt, + DAG.getExternalSymbol("free", IntPtr), Args, DAG, + getCurDebugLoc()); + DAG.setRoot(Result.second); +} + +void SelectionDAGLowering::visitVAStart(CallInst &I) { + DAG.setRoot(DAG.getNode(ISD::VASTART, getCurDebugLoc(), + MVT::Other, getRoot(), + getValue(I.getOperand(1)), + DAG.getSrcValue(I.getOperand(1)))); +} + +void SelectionDAGLowering::visitVAArg(VAArgInst &I) { + SDValue V = DAG.getVAArg(TLI.getValueType(I.getType()), getCurDebugLoc(), + getRoot(), getValue(I.getOperand(0)), + DAG.getSrcValue(I.getOperand(0))); + setValue(&I, V); + DAG.setRoot(V.getValue(1)); +} + +void SelectionDAGLowering::visitVAEnd(CallInst &I) { + DAG.setRoot(DAG.getNode(ISD::VAEND, getCurDebugLoc(), + MVT::Other, getRoot(), + getValue(I.getOperand(1)), + DAG.getSrcValue(I.getOperand(1)))); +} + +void SelectionDAGLowering::visitVACopy(CallInst &I) { + DAG.setRoot(DAG.getNode(ISD::VACOPY, getCurDebugLoc(), + MVT::Other, getRoot(), + getValue(I.getOperand(1)), + getValue(I.getOperand(2)), + DAG.getSrcValue(I.getOperand(1)), + DAG.getSrcValue(I.getOperand(2)))); +} + +/// TargetLowering::LowerArguments - This is the default LowerArguments +/// implementation, which just inserts a FORMAL_ARGUMENTS node. FIXME: When all +/// targets are migrated to using FORMAL_ARGUMENTS, this hook should be +/// integrated into SDISel. +void TargetLowering::LowerArguments(Function &F, SelectionDAG &DAG, + SmallVectorImpl &ArgValues, + DebugLoc dl) { + // Add CC# and isVararg as operands to the FORMAL_ARGUMENTS node. + SmallVector Ops; + Ops.push_back(DAG.getRoot()); + Ops.push_back(DAG.getConstant(F.getCallingConv(), getPointerTy())); + Ops.push_back(DAG.getConstant(F.isVarArg(), getPointerTy())); + + // Add one result value for each formal argument. + SmallVector RetVals; + unsigned j = 1; + for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); + I != E; ++I, ++j) { + SmallVector ValueVTs; + ComputeValueVTs(*this, I->getType(), ValueVTs); + for (unsigned Value = 0, NumValues = ValueVTs.size(); + Value != NumValues; ++Value) { + MVT VT = ValueVTs[Value]; + const Type *ArgTy = VT.getTypeForMVT(); + ISD::ArgFlagsTy Flags; + unsigned OriginalAlignment = + getTargetData()->getABITypeAlignment(ArgTy); + + if (F.paramHasAttr(j, Attribute::ZExt)) + Flags.setZExt(); + if (F.paramHasAttr(j, Attribute::SExt)) + Flags.setSExt(); + if (F.paramHasAttr(j, Attribute::InReg)) + Flags.setInReg(); + if (F.paramHasAttr(j, Attribute::StructRet)) + Flags.setSRet(); + if (F.paramHasAttr(j, Attribute::ByVal)) { + Flags.setByVal(); + const PointerType *Ty = cast(I->getType()); + const Type *ElementTy = Ty->getElementType(); + unsigned FrameAlign = getByValTypeAlignment(ElementTy); + unsigned FrameSize = getTargetData()->getTypeAllocSize(ElementTy); + // For ByVal, alignment should be passed from FE. BE will guess if + // this info is not there but there are cases it cannot get right. + if (F.getParamAlignment(j)) + FrameAlign = F.getParamAlignment(j); + Flags.setByValAlign(FrameAlign); + Flags.setByValSize(FrameSize); + } + if (F.paramHasAttr(j, Attribute::Nest)) + Flags.setNest(); + Flags.setOrigAlign(OriginalAlignment); + + MVT RegisterVT = getRegisterType(VT); + unsigned NumRegs = getNumRegisters(VT); + for (unsigned i = 0; i != NumRegs; ++i) { + RetVals.push_back(RegisterVT); + ISD::ArgFlagsTy MyFlags = Flags; + if (NumRegs > 1 && i == 0) + MyFlags.setSplit(); + // if it isn't first piece, alignment must be 1 + else if (i > 0) + MyFlags.setOrigAlign(1); + Ops.push_back(DAG.getArgFlags(MyFlags)); + } + } + } + + RetVals.push_back(MVT::Other); + + // Create the node. + SDNode *Result = DAG.getNode(ISD::FORMAL_ARGUMENTS, dl, + DAG.getVTList(&RetVals[0], RetVals.size()), + &Ops[0], Ops.size()).getNode(); + + // Prelower FORMAL_ARGUMENTS. This isn't required for functionality, but + // allows exposing the loads that may be part of the argument access to the + // first DAGCombiner pass. + SDValue TmpRes = LowerOperation(SDValue(Result, 0), DAG); + + // The number of results should match up, except that the lowered one may have + // an extra flag result. + assert((Result->getNumValues() == TmpRes.getNode()->getNumValues() || + (Result->getNumValues()+1 == TmpRes.getNode()->getNumValues() && + TmpRes.getValue(Result->getNumValues()).getValueType() == MVT::Flag)) + && "Lowering produced unexpected number of results!"); + + // The FORMAL_ARGUMENTS node itself is likely no longer needed. + if (Result != TmpRes.getNode() && Result->use_empty()) { + HandleSDNode Dummy(DAG.getRoot()); + DAG.RemoveDeadNode(Result); + } + + Result = TmpRes.getNode(); + + unsigned NumArgRegs = Result->getNumValues() - 1; + DAG.setRoot(SDValue(Result, NumArgRegs)); + + // Set up the return result vector. + unsigned i = 0; + unsigned Idx = 1; + for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; + ++I, ++Idx) { + SmallVector ValueVTs; + ComputeValueVTs(*this, I->getType(), ValueVTs); + for (unsigned Value = 0, NumValues = ValueVTs.size(); + Value != NumValues; ++Value) { + MVT VT = ValueVTs[Value]; + MVT PartVT = getRegisterType(VT); + + unsigned NumParts = getNumRegisters(VT); + SmallVector Parts(NumParts); + for (unsigned j = 0; j != NumParts; ++j) + Parts[j] = SDValue(Result, i++); + + ISD::NodeType AssertOp = ISD::DELETED_NODE; + if (F.paramHasAttr(Idx, Attribute::SExt)) + AssertOp = ISD::AssertSext; + else if (F.paramHasAttr(Idx, Attribute::ZExt)) + AssertOp = ISD::AssertZext; + + ArgValues.push_back(getCopyFromParts(DAG, dl, &Parts[0], NumParts, + PartVT, VT, AssertOp)); + } + } + assert(i == NumArgRegs && "Argument register count mismatch!"); +} + + +/// TargetLowering::LowerCallTo - This is the default LowerCallTo +/// implementation, which just inserts an ISD::CALL node, which is later custom +/// lowered by the target to something concrete. FIXME: When all targets are +/// migrated to using ISD::CALL, this hook should be integrated into SDISel. +std::pair +TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy, + bool RetSExt, bool RetZExt, bool isVarArg, + bool isInreg, + unsigned CallingConv, bool isTailCall, + SDValue Callee, + ArgListTy &Args, SelectionDAG &DAG, DebugLoc dl) { + assert((!isTailCall || PerformTailCallOpt) && + "isTailCall set when tail-call optimizations are disabled!"); + + SmallVector Ops; + Ops.push_back(Chain); // Op#0 - Chain + Ops.push_back(Callee); + + // Handle all of the outgoing arguments. + for (unsigned i = 0, e = Args.size(); i != e; ++i) { + SmallVector ValueVTs; + ComputeValueVTs(*this, Args[i].Ty, ValueVTs); + for (unsigned Value = 0, NumValues = ValueVTs.size(); + Value != NumValues; ++Value) { + MVT VT = ValueVTs[Value]; + const Type *ArgTy = VT.getTypeForMVT(); + SDValue Op = SDValue(Args[i].Node.getNode(), + Args[i].Node.getResNo() + Value); + ISD::ArgFlagsTy Flags; + unsigned OriginalAlignment = + getTargetData()->getABITypeAlignment(ArgTy); + + if (Args[i].isZExt) + Flags.setZExt(); + if (Args[i].isSExt) + Flags.setSExt(); + if (Args[i].isInReg) + Flags.setInReg(); + if (Args[i].isSRet) + Flags.setSRet(); + if (Args[i].isByVal) { + Flags.setByVal(); + const PointerType *Ty = cast(Args[i].Ty); + const Type *ElementTy = Ty->getElementType(); + unsigned FrameAlign = getByValTypeAlignment(ElementTy); + unsigned FrameSize = getTargetData()->getTypeAllocSize(ElementTy); + // For ByVal, alignment should come from FE. BE will guess if this + // info is not there but there are cases it cannot get right. + if (Args[i].Alignment) + FrameAlign = Args[i].Alignment; + Flags.setByValAlign(FrameAlign); + Flags.setByValSize(FrameSize); + } + if (Args[i].isNest) + Flags.setNest(); + Flags.setOrigAlign(OriginalAlignment); + + MVT PartVT = getRegisterType(VT); + unsigned NumParts = getNumRegisters(VT); + SmallVector Parts(NumParts); + ISD::NodeType ExtendKind = ISD::ANY_EXTEND; + + if (Args[i].isSExt) + ExtendKind = ISD::SIGN_EXTEND; + else if (Args[i].isZExt) + ExtendKind = ISD::ZERO_EXTEND; + + getCopyToParts(DAG, dl, Op, &Parts[0], NumParts, PartVT, ExtendKind); + + for (unsigned i = 0; i != NumParts; ++i) { + // if it isn't first piece, alignment must be 1 + ISD::ArgFlagsTy MyFlags = Flags; + if (NumParts > 1 && i == 0) + MyFlags.setSplit(); + else if (i != 0) + MyFlags.setOrigAlign(1); + + Ops.push_back(Parts[i]); + Ops.push_back(DAG.getArgFlags(MyFlags)); + } + } + } + + // Figure out the result value types. We start by making a list of + // the potentially illegal return value types. + SmallVector LoweredRetTys; + SmallVector RetTys; + ComputeValueVTs(*this, RetTy, RetTys); + + // Then we translate that to a list of legal types. + for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { + MVT VT = RetTys[I]; + MVT RegisterVT = getRegisterType(VT); + unsigned NumRegs = getNumRegisters(VT); + for (unsigned i = 0; i != NumRegs; ++i) + LoweredRetTys.push_back(RegisterVT); + } + + LoweredRetTys.push_back(MVT::Other); // Always has a chain. + + // Create the CALL node. + SDValue Res = DAG.getCall(CallingConv, dl, + isVarArg, isTailCall, isInreg, + DAG.getVTList(&LoweredRetTys[0], + LoweredRetTys.size()), + &Ops[0], Ops.size() + ); + Chain = Res.getValue(LoweredRetTys.size() - 1); + + // Gather up the call result into a single value. + if (RetTy != Type::VoidTy && !RetTys.empty()) { + ISD::NodeType AssertOp = ISD::DELETED_NODE; + + if (RetSExt) + AssertOp = ISD::AssertSext; + else if (RetZExt) + AssertOp = ISD::AssertZext; + + SmallVector ReturnValues; + unsigned RegNo = 0; + for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { + MVT VT = RetTys[I]; + MVT RegisterVT = getRegisterType(VT); + unsigned NumRegs = getNumRegisters(VT); + unsigned RegNoEnd = NumRegs + RegNo; + SmallVector Results; + for (; RegNo != RegNoEnd; ++RegNo) + Results.push_back(Res.getValue(RegNo)); + SDValue ReturnValue = + getCopyFromParts(DAG, dl, &Results[0], NumRegs, RegisterVT, VT, + AssertOp); + ReturnValues.push_back(ReturnValue); + } + Res = DAG.getNode(ISD::MERGE_VALUES, dl, + DAG.getVTList(&RetTys[0], RetTys.size()), + &ReturnValues[0], ReturnValues.size()); + } + + return std::make_pair(Res, Chain); +} + +void TargetLowering::LowerOperationWrapper(SDNode *N, + SmallVectorImpl &Results, + SelectionDAG &DAG) { + SDValue Res = LowerOperation(SDValue(N, 0), DAG); + if (Res.getNode()) + Results.push_back(Res); +} + +SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { + assert(0 && "LowerOperation not implemented for this target!"); + abort(); + return SDValue(); +} + + +void SelectionDAGLowering::CopyValueToVirtualRegister(Value *V, unsigned Reg) { + SDValue Op = getValue(V); + assert((Op.getOpcode() != ISD::CopyFromReg || + cast(Op.getOperand(1))->getReg() != Reg) && + "Copy from a reg to the same reg!"); + assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && "Is a physreg"); + + RegsForValue RFV(TLI, Reg, V->getType()); + SDValue Chain = DAG.getEntryNode(); + RFV.getCopyToRegs(Op, DAG, getCurDebugLoc(), Chain, 0); + PendingExports.push_back(Chain); +} + +#include "llvm/CodeGen/SelectionDAGISel.h" + +void SelectionDAGISel:: +LowerArguments(BasicBlock *LLVMBB) { + // If this is the entry block, emit arguments. + Function &F = *LLVMBB->getParent(); + SDValue OldRoot = SDL->DAG.getRoot(); + SmallVector Args; + TLI.LowerArguments(F, SDL->DAG, Args, SDL->getCurDebugLoc()); + + unsigned a = 0; + for (Function::arg_iterator AI = F.arg_begin(), E = F.arg_end(); + AI != E; ++AI) { + SmallVector ValueVTs; + ComputeValueVTs(TLI, AI->getType(), ValueVTs); + unsigned NumValues = ValueVTs.size(); + if (!AI->use_empty()) { + SDL->setValue(AI, SDL->DAG.getMergeValues(&Args[a], NumValues, + SDL->getCurDebugLoc())); + // If this argument is live outside of the entry block, insert a copy from + // whereever we got it to the vreg that other BB's will reference it as. + SDL->CopyToExportRegsIfNeeded(AI); + } + a += NumValues; + } + + // Finally, if the target has anything special to do, allow it to do so. + // FIXME: this should insert code into the DAG! + EmitFunctionEntryCode(F, SDL->DAG.getMachineFunction()); +} + +/// Handle PHI nodes in successor blocks. Emit code into the SelectionDAG to +/// ensure constants are generated when needed. Remember the virtual registers +/// that need to be added to the Machine PHI nodes as input. We cannot just +/// directly add them, because expansion might result in multiple MBB's for one +/// BB. As such, the start of the BB might correspond to a different MBB than +/// the end. +/// +void +SelectionDAGISel::HandlePHINodesInSuccessorBlocks(BasicBlock *LLVMBB) { + TerminatorInst *TI = LLVMBB->getTerminator(); + + SmallPtrSet SuccsHandled; + + // Check successor nodes' PHI nodes that expect a constant to be available + // from this block. + for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) { + BasicBlock *SuccBB = TI->getSuccessor(succ); + if (!isa(SuccBB->begin())) continue; + MachineBasicBlock *SuccMBB = FuncInfo->MBBMap[SuccBB]; + + // If this terminator has multiple identical successors (common for + // switches), only handle each succ once. + if (!SuccsHandled.insert(SuccMBB)) continue; + + MachineBasicBlock::iterator MBBI = SuccMBB->begin(); + PHINode *PN; + + // At this point we know that there is a 1-1 correspondence between LLVM PHI + // nodes and Machine PHI nodes, but the incoming operands have not been + // emitted yet. + for (BasicBlock::iterator I = SuccBB->begin(); + (PN = dyn_cast(I)); ++I) { + // Ignore dead phi's. + if (PN->use_empty()) continue; + + unsigned Reg; + Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB); + + if (Constant *C = dyn_cast(PHIOp)) { + unsigned &RegOut = SDL->ConstantsOut[C]; + if (RegOut == 0) { + RegOut = FuncInfo->CreateRegForValue(C); + SDL->CopyValueToVirtualRegister(C, RegOut); + } + Reg = RegOut; + } else { + Reg = FuncInfo->ValueMap[PHIOp]; + if (Reg == 0) { + assert(isa(PHIOp) && + FuncInfo->StaticAllocaMap.count(cast(PHIOp)) && + "Didn't codegen value into a register!??"); + Reg = FuncInfo->CreateRegForValue(PHIOp); + SDL->CopyValueToVirtualRegister(PHIOp, Reg); + } + } + + // Remember that this register needs to added to the machine PHI node as + // the input for this MBB. + SmallVector ValueVTs; + ComputeValueVTs(TLI, PN->getType(), ValueVTs); + for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) { + MVT VT = ValueVTs[vti]; + unsigned NumRegisters = TLI.getNumRegisters(VT); + for (unsigned i = 0, e = NumRegisters; i != e; ++i) + SDL->PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg+i)); + Reg += NumRegisters; + } + } + } + SDL->ConstantsOut.clear(); +} + +/// This is the Fast-ISel version of HandlePHINodesInSuccessorBlocks. It only +/// supports legal types, and it emits MachineInstrs directly instead of +/// creating SelectionDAG nodes. +/// +bool +SelectionDAGISel::HandlePHINodesInSuccessorBlocksFast(BasicBlock *LLVMBB, + FastISel *F) { + TerminatorInst *TI = LLVMBB->getTerminator(); + + SmallPtrSet SuccsHandled; + unsigned OrigNumPHINodesToUpdate = SDL->PHINodesToUpdate.size(); + + // Check successor nodes' PHI nodes that expect a constant to be available + // from this block. + for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) { + BasicBlock *SuccBB = TI->getSuccessor(succ); + if (!isa(SuccBB->begin())) continue; + MachineBasicBlock *SuccMBB = FuncInfo->MBBMap[SuccBB]; + + // If this terminator has multiple identical successors (common for + // switches), only handle each succ once. + if (!SuccsHandled.insert(SuccMBB)) continue; + + MachineBasicBlock::iterator MBBI = SuccMBB->begin(); + PHINode *PN; + + // At this point we know that there is a 1-1 correspondence between LLVM PHI + // nodes and Machine PHI nodes, but the incoming operands have not been + // emitted yet. + for (BasicBlock::iterator I = SuccBB->begin(); + (PN = dyn_cast(I)); ++I) { + // Ignore dead phi's. + if (PN->use_empty()) continue; + + // Only handle legal types. Two interesting things to note here. First, + // by bailing out early, we may leave behind some dead instructions, + // since SelectionDAG's HandlePHINodesInSuccessorBlocks will insert its + // own moves. Second, this check is necessary becuase FastISel doesn't + // use CreateRegForValue to create registers, so it always creates + // exactly one register for each non-void instruction. + MVT VT = TLI.getValueType(PN->getType(), /*AllowUnknown=*/true); + if (VT == MVT::Other || !TLI.isTypeLegal(VT)) { + // Promote MVT::i1. + if (VT == MVT::i1) + VT = TLI.getTypeToTransformTo(VT); + else { + SDL->PHINodesToUpdate.resize(OrigNumPHINodesToUpdate); + return false; + } + } + + Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB); + + unsigned Reg = F->getRegForValue(PHIOp); + if (Reg == 0) { + SDL->PHINodesToUpdate.resize(OrigNumPHINodesToUpdate); + return false; + } + SDL->PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg)); + } + } + + return true; +} diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h new file mode 100644 index 000000000000..578aa591ce67 --- /dev/null +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h @@ -0,0 +1,558 @@ +//===-- SelectionDAGBuild.h - Selection-DAG building ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements routines for translating from LLVM IR into SelectionDAG IR. +// +//===----------------------------------------------------------------------===// + +#ifndef SELECTIONDAGBUILD_H +#define SELECTIONDAGBUILD_H + +#include "llvm/Constants.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/DenseMap.h" +#ifndef NDEBUG +#include "llvm/ADT/SmallSet.h" +#endif +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Target/TargetMachine.h" +#include +#include + +namespace llvm { + +class AliasAnalysis; +class AllocaInst; +class BasicBlock; +class BitCastInst; +class BranchInst; +class CallInst; +class ExtractElementInst; +class ExtractValueInst; +class FCmpInst; +class FPExtInst; +class FPToSIInst; +class FPToUIInst; +class FPTruncInst; +class FreeInst; +class Function; +class GetElementPtrInst; +class GCFunctionInfo; +class ICmpInst; +class IntToPtrInst; +class InvokeInst; +class InsertElementInst; +class InsertValueInst; +class Instruction; +class LoadInst; +class MachineBasicBlock; +class MachineFunction; +class MachineInstr; +class MachineModuleInfo; +class MachineRegisterInfo; +class MallocInst; +class PHINode; +class PtrToIntInst; +class ReturnInst; +class SDISelAsmOperandInfo; +class SExtInst; +class SelectInst; +class ShuffleVectorInst; +class SIToFPInst; +class StoreInst; +class SwitchInst; +class TargetData; +class TargetLowering; +class TruncInst; +class UIToFPInst; +class UnreachableInst; +class UnwindInst; +class VICmpInst; +class VFCmpInst; +class VAArgInst; +class ZExtInst; + +//===--------------------------------------------------------------------===// +/// FunctionLoweringInfo - This contains information that is global to a +/// function that is used when lowering a region of the function. +/// +class FunctionLoweringInfo { +public: + TargetLowering &TLI; + Function *Fn; + MachineFunction *MF; + MachineRegisterInfo *RegInfo; + + explicit FunctionLoweringInfo(TargetLowering &TLI); + + /// set - Initialize this FunctionLoweringInfo with the given Function + /// and its associated MachineFunction. + /// + void set(Function &Fn, MachineFunction &MF, SelectionDAG &DAG, + bool EnableFastISel); + + /// MBBMap - A mapping from LLVM basic blocks to their machine code entry. + DenseMap MBBMap; + + /// ValueMap - Since we emit code for the function a basic block at a time, + /// we must remember which virtual registers hold the values for + /// cross-basic-block values. + DenseMap ValueMap; + + /// StaticAllocaMap - Keep track of frame indices for fixed sized allocas in + /// the entry block. This allows the allocas to be efficiently referenced + /// anywhere in the function. + DenseMap StaticAllocaMap; + +#ifndef NDEBUG + SmallSet CatchInfoLost; + SmallSet CatchInfoFound; +#endif + + unsigned MakeReg(MVT VT); + + /// isExportedInst - Return true if the specified value is an instruction + /// exported from its block. + bool isExportedInst(const Value *V) { + return ValueMap.count(V); + } + + unsigned CreateRegForValue(const Value *V); + + unsigned InitializeRegForValue(const Value *V) { + unsigned &R = ValueMap[V]; + assert(R == 0 && "Already initialized this value register!"); + return R = CreateRegForValue(V); + } + + struct LiveOutInfo { + unsigned NumSignBits; + APInt KnownOne, KnownZero; + LiveOutInfo() : NumSignBits(0), KnownOne(1, 0), KnownZero(1, 0) {} + }; + + /// LiveOutRegInfo - Information about live out vregs, indexed by their + /// register number offset by 'FirstVirtualRegister'. + std::vector LiveOutRegInfo; + + /// clear - Clear out all the function-specific state. This returns this + /// FunctionLoweringInfo to an empty state, ready to be used for a + /// different function. + void clear() { + MBBMap.clear(); + ValueMap.clear(); + StaticAllocaMap.clear(); +#ifndef NDEBUG + CatchInfoLost.clear(); + CatchInfoFound.clear(); +#endif + LiveOutRegInfo.clear(); + } +}; + +//===----------------------------------------------------------------------===// +/// SelectionDAGLowering - This is the common target-independent lowering +/// implementation that is parameterized by a TargetLowering object. +/// Also, targets can overload any lowering method. +/// +class SelectionDAGLowering { + MachineBasicBlock *CurMBB; + + /// CurDebugLoc - current file + line number. Changes as we build the DAG. + DebugLoc CurDebugLoc; + + DenseMap NodeMap; + + /// PendingLoads - Loads are not emitted to the program immediately. We bunch + /// them up and then emit token factor nodes when possible. This allows us to + /// get simple disambiguation between loads without worrying about alias + /// analysis. + SmallVector PendingLoads; + + /// PendingExports - CopyToReg nodes that copy values to virtual registers + /// for export to other blocks need to be emitted before any terminator + /// instruction, but they have no other ordering requirements. We bunch them + /// up and the emit a single tokenfactor for them just before terminator + /// instructions. + SmallVector PendingExports; + + /// Case - A struct to record the Value for a switch case, and the + /// case's target basic block. + struct Case { + Constant* Low; + Constant* High; + MachineBasicBlock* BB; + + Case() : Low(0), High(0), BB(0) { } + Case(Constant* low, Constant* high, MachineBasicBlock* bb) : + Low(low), High(high), BB(bb) { } + uint64_t size() const { + uint64_t rHigh = cast(High)->getSExtValue(); + uint64_t rLow = cast(Low)->getSExtValue(); + return (rHigh - rLow + 1ULL); + } + }; + + struct CaseBits { + uint64_t Mask; + MachineBasicBlock* BB; + unsigned Bits; + + CaseBits(uint64_t mask, MachineBasicBlock* bb, unsigned bits): + Mask(mask), BB(bb), Bits(bits) { } + }; + + typedef std::vector CaseVector; + typedef std::vector CaseBitsVector; + typedef CaseVector::iterator CaseItr; + typedef std::pair CaseRange; + + /// CaseRec - A struct with ctor used in lowering switches to a binary tree + /// of conditional branches. + struct CaseRec { + CaseRec(MachineBasicBlock *bb, Constant *lt, Constant *ge, CaseRange r) : + CaseBB(bb), LT(lt), GE(ge), Range(r) {} + + /// CaseBB - The MBB in which to emit the compare and branch + MachineBasicBlock *CaseBB; + /// LT, GE - If nonzero, we know the current case value must be less-than or + /// greater-than-or-equal-to these Constants. + Constant *LT; + Constant *GE; + /// Range - A pair of iterators representing the range of case values to be + /// processed at this point in the binary search tree. + CaseRange Range; + }; + + typedef std::vector CaseRecVector; + + /// The comparison function for sorting the switch case values in the vector. + /// WARNING: Case ranges should be disjoint! + struct CaseCmp { + bool operator () (const Case& C1, const Case& C2) { + assert(isa(C1.Low) && isa(C2.High)); + const ConstantInt* CI1 = cast(C1.Low); + const ConstantInt* CI2 = cast(C2.High); + return CI1->getValue().slt(CI2->getValue()); + } + }; + + struct CaseBitsCmp { + bool operator () (const CaseBits& C1, const CaseBits& C2) { + return C1.Bits > C2.Bits; + } + }; + + size_t Clusterify(CaseVector& Cases, const SwitchInst &SI); + + /// CaseBlock - This structure is used to communicate between SDLowering and + /// SDISel for the code generation of additional basic blocks needed by multi- + /// case switch statements. + struct CaseBlock { + CaseBlock(ISD::CondCode cc, Value *cmplhs, Value *cmprhs, Value *cmpmiddle, + MachineBasicBlock *truebb, MachineBasicBlock *falsebb, + MachineBasicBlock *me) + : CC(cc), CmpLHS(cmplhs), CmpMHS(cmpmiddle), CmpRHS(cmprhs), + TrueBB(truebb), FalseBB(falsebb), ThisBB(me) {} + // CC - the condition code to use for the case block's setcc node + ISD::CondCode CC; + // CmpLHS/CmpRHS/CmpMHS - The LHS/MHS/RHS of the comparison to emit. + // Emit by default LHS op RHS. MHS is used for range comparisons: + // If MHS is not null: (LHS <= MHS) and (MHS <= RHS). + Value *CmpLHS, *CmpMHS, *CmpRHS; + // TrueBB/FalseBB - the block to branch to if the setcc is true/false. + MachineBasicBlock *TrueBB, *FalseBB; + // ThisBB - the block into which to emit the code for the setcc and branches + MachineBasicBlock *ThisBB; + }; + struct JumpTable { + JumpTable(unsigned R, unsigned J, MachineBasicBlock *M, + MachineBasicBlock *D): Reg(R), JTI(J), MBB(M), Default(D) {} + + /// Reg - the virtual register containing the index of the jump table entry + //. to jump to. + unsigned Reg; + /// JTI - the JumpTableIndex for this jump table in the function. + unsigned JTI; + /// MBB - the MBB into which to emit the code for the indirect jump. + MachineBasicBlock *MBB; + /// Default - the MBB of the default bb, which is a successor of the range + /// check MBB. This is when updating PHI nodes in successors. + MachineBasicBlock *Default; + }; + struct JumpTableHeader { + JumpTableHeader(APInt F, APInt L, Value* SV, MachineBasicBlock* H, + bool E = false): + First(F), Last(L), SValue(SV), HeaderBB(H), Emitted(E) {} + APInt First; + APInt Last; + Value *SValue; + MachineBasicBlock *HeaderBB; + bool Emitted; + }; + typedef std::pair JumpTableBlock; + + struct BitTestCase { + BitTestCase(uint64_t M, MachineBasicBlock* T, MachineBasicBlock* Tr): + Mask(M), ThisBB(T), TargetBB(Tr) { } + uint64_t Mask; + MachineBasicBlock* ThisBB; + MachineBasicBlock* TargetBB; + }; + + typedef SmallVector BitTestInfo; + + struct BitTestBlock { + BitTestBlock(APInt F, APInt R, Value* SV, + unsigned Rg, bool E, + MachineBasicBlock* P, MachineBasicBlock* D, + const BitTestInfo& C): + First(F), Range(R), SValue(SV), Reg(Rg), Emitted(E), + Parent(P), Default(D), Cases(C) { } + APInt First; + APInt Range; + Value *SValue; + unsigned Reg; + bool Emitted; + MachineBasicBlock *Parent; + MachineBasicBlock *Default; + BitTestInfo Cases; + }; + +public: + // TLI - This is information that describes the available target features we + // need for lowering. This indicates when operations are unavailable, + // implemented with a libcall, etc. + TargetLowering &TLI; + SelectionDAG &DAG; + const TargetData *TD; + AliasAnalysis *AA; + + /// SwitchCases - Vector of CaseBlock structures used to communicate + /// SwitchInst code generation information. + std::vector SwitchCases; + /// JTCases - Vector of JumpTable structures used to communicate + /// SwitchInst code generation information. + std::vector JTCases; + /// BitTestCases - Vector of BitTestBlock structures used to communicate + /// SwitchInst code generation information. + std::vector BitTestCases; + + std::vector > PHINodesToUpdate; + + // Emit PHI-node-operand constants only once even if used by multiple + // PHI nodes. + DenseMap ConstantsOut; + + /// FuncInfo - Information about the function as a whole. + /// + FunctionLoweringInfo &FuncInfo; + + /// OptLevel - What optimization level we're generating code for. + /// + CodeGenOpt::Level OptLevel; + + /// GFI - Garbage collection metadata for the function. + GCFunctionInfo *GFI; + + SelectionDAGLowering(SelectionDAG &dag, TargetLowering &tli, + FunctionLoweringInfo &funcinfo, + CodeGenOpt::Level ol) + : CurDebugLoc(DebugLoc::getUnknownLoc()), + TLI(tli), DAG(dag), FuncInfo(funcinfo), OptLevel(ol) { + } + + void init(GCFunctionInfo *gfi, AliasAnalysis &aa); + + /// clear - Clear out the curret SelectionDAG and the associated + /// state and prepare this SelectionDAGLowering object to be used + /// for a new block. This doesn't clear out information about + /// additional blocks that are needed to complete switch lowering + /// or PHI node updating; that information is cleared out as it is + /// consumed. + void clear(); + + /// getRoot - Return the current virtual root of the Selection DAG, + /// flushing any PendingLoad items. This must be done before emitting + /// a store or any other node that may need to be ordered after any + /// prior load instructions. + /// + SDValue getRoot(); + + /// getControlRoot - Similar to getRoot, but instead of flushing all the + /// PendingLoad items, flush all the PendingExports items. It is necessary + /// to do this before emitting a terminator instruction. + /// + SDValue getControlRoot(); + + DebugLoc getCurDebugLoc() const { return CurDebugLoc; } + void setCurDebugLoc(DebugLoc dl) { CurDebugLoc = dl; } + + void CopyValueToVirtualRegister(Value *V, unsigned Reg); + + void visit(Instruction &I); + + void visit(unsigned Opcode, User &I); + + void setCurrentBasicBlock(MachineBasicBlock *MBB) { CurMBB = MBB; } + + SDValue getValue(const Value *V); + + void setValue(const Value *V, SDValue NewN) { + SDValue &N = NodeMap[V]; + assert(N.getNode() == 0 && "Already set a value for this node!"); + N = NewN; + } + + void GetRegistersForValue(SDISelAsmOperandInfo &OpInfo, + std::set &OutputRegs, + std::set &InputRegs); + + void FindMergedConditions(Value *Cond, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, MachineBasicBlock *CurBB, + unsigned Opc); + void EmitBranchForMergedCondition(Value *Cond, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + MachineBasicBlock *CurBB); + bool ShouldEmitAsBranches(const std::vector &Cases); + bool isExportableFromCurrentBlock(Value *V, const BasicBlock *FromBB); + void CopyToExportRegsIfNeeded(Value *V); + void ExportFromCurrentBlock(Value *V); + void LowerCallTo(CallSite CS, SDValue Callee, bool IsTailCall, + MachineBasicBlock *LandingPad = NULL); + +private: + // Terminator instructions. + void visitRet(ReturnInst &I); + void visitBr(BranchInst &I); + void visitSwitch(SwitchInst &I); + void visitUnreachable(UnreachableInst &I) { /* noop */ } + + // Helpers for visitSwitch + bool handleSmallSwitchRange(CaseRec& CR, + CaseRecVector& WorkList, + Value* SV, + MachineBasicBlock* Default); + bool handleJTSwitchCase(CaseRec& CR, + CaseRecVector& WorkList, + Value* SV, + MachineBasicBlock* Default); + bool handleBTSplitSwitchCase(CaseRec& CR, + CaseRecVector& WorkList, + Value* SV, + MachineBasicBlock* Default); + bool handleBitTestsSwitchCase(CaseRec& CR, + CaseRecVector& WorkList, + Value* SV, + MachineBasicBlock* Default); +public: + void visitSwitchCase(CaseBlock &CB); + void visitBitTestHeader(BitTestBlock &B); + void visitBitTestCase(MachineBasicBlock* NextMBB, + unsigned Reg, + BitTestCase &B); + void visitJumpTable(JumpTable &JT); + void visitJumpTableHeader(JumpTable &JT, JumpTableHeader &JTH); + +private: + // These all get lowered before this pass. + void visitInvoke(InvokeInst &I); + void visitUnwind(UnwindInst &I); + + void visitBinary(User &I, unsigned OpCode); + void visitShift(User &I, unsigned Opcode); + void visitAdd(User &I); + void visitSub(User &I); + void visitMul(User &I); + void visitURem(User &I) { visitBinary(I, ISD::UREM); } + void visitSRem(User &I) { visitBinary(I, ISD::SREM); } + void visitFRem(User &I) { visitBinary(I, ISD::FREM); } + void visitUDiv(User &I) { visitBinary(I, ISD::UDIV); } + void visitSDiv(User &I) { visitBinary(I, ISD::SDIV); } + void visitFDiv(User &I) { visitBinary(I, ISD::FDIV); } + void visitAnd (User &I) { visitBinary(I, ISD::AND); } + void visitOr (User &I) { visitBinary(I, ISD::OR); } + void visitXor (User &I) { visitBinary(I, ISD::XOR); } + void visitShl (User &I) { visitShift(I, ISD::SHL); } + void visitLShr(User &I) { visitShift(I, ISD::SRL); } + void visitAShr(User &I) { visitShift(I, ISD::SRA); } + void visitICmp(User &I); + void visitFCmp(User &I); + void visitVICmp(User &I); + void visitVFCmp(User &I); + // Visit the conversion instructions + void visitTrunc(User &I); + void visitZExt(User &I); + void visitSExt(User &I); + void visitFPTrunc(User &I); + void visitFPExt(User &I); + void visitFPToUI(User &I); + void visitFPToSI(User &I); + void visitUIToFP(User &I); + void visitSIToFP(User &I); + void visitPtrToInt(User &I); + void visitIntToPtr(User &I); + void visitBitCast(User &I); + + void visitExtractElement(User &I); + void visitInsertElement(User &I); + void visitShuffleVector(User &I); + + void visitExtractValue(ExtractValueInst &I); + void visitInsertValue(InsertValueInst &I); + + void visitGetElementPtr(User &I); + void visitSelect(User &I); + + void visitMalloc(MallocInst &I); + void visitFree(FreeInst &I); + void visitAlloca(AllocaInst &I); + void visitLoad(LoadInst &I); + void visitStore(StoreInst &I); + void visitPHI(PHINode &I) { } // PHI nodes are handled specially. + void visitCall(CallInst &I); + void visitInlineAsm(CallSite CS); + const char *visitIntrinsicCall(CallInst &I, unsigned Intrinsic); + void visitTargetIntrinsic(CallInst &I, unsigned Intrinsic); + + void visitPow(CallInst &I); + void visitExp2(CallInst &I); + void visitExp(CallInst &I); + void visitLog(CallInst &I); + void visitLog2(CallInst &I); + void visitLog10(CallInst &I); + + void visitVAStart(CallInst &I); + void visitVAArg(VAArgInst &I); + void visitVAEnd(CallInst &I); + void visitVACopy(CallInst &I); + + void visitUserOp1(Instruction &I) { + assert(0 && "UserOp1 should not exist at instruction selection time!"); + abort(); + } + void visitUserOp2(Instruction &I) { + assert(0 && "UserOp2 should not exist at instruction selection time!"); + abort(); + } + + const char *implVisitBinaryAtomic(CallInst& I, ISD::NodeType Op); + const char *implVisitAluOverflow(CallInst &I, ISD::NodeType Op); +}; + +/// AddCatchInfo - Extract the personality and type infos from an eh.selector +/// call, and add them to the specified machine basic block. +void AddCatchInfo(CallInst &I, MachineModuleInfo *MMI, + MachineBasicBlock *MBB); + +} // end namespace llvm + +#endif diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp new file mode 100644 index 000000000000..9d72a128d18b --- /dev/null +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -0,0 +1,1347 @@ +//===-- SelectionDAGISel.cpp - Implement the SelectionDAGISel class -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the SelectionDAGISel class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "isel" +#include "ScheduleDAGSDNodes.h" +#include "SelectionDAGBuild.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Constants.h" +#include "llvm/CallingConv.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/GlobalVariable.h" +#include "llvm/InlineAsm.h" +#include "llvm/Instructions.h" +#include "llvm/Intrinsics.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/CodeGen/FastISel.h" +#include "llvm/CodeGen/GCStrategy.h" +#include "llvm/CodeGen/GCMetadata.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/CodeGen/SchedulerRegistry.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/DwarfWriter.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/Timer.h" +#include +using namespace llvm; + +static cl::opt +DisableLegalizeTypes("disable-legalize-types", cl::Hidden); +static cl::opt +EnableFastISelVerbose("fast-isel-verbose", cl::Hidden, + cl::desc("Enable verbose messages in the \"fast\" " + "instruction selector")); +static cl::opt +EnableFastISelAbort("fast-isel-abort", cl::Hidden, + cl::desc("Enable abort calls when \"fast\" instruction fails")); +static cl::opt +SchedLiveInCopies("schedule-livein-copies", + cl::desc("Schedule copies of livein registers"), + cl::init(false)); + +#ifndef NDEBUG +static cl::opt +ViewDAGCombine1("view-dag-combine1-dags", cl::Hidden, + cl::desc("Pop up a window to show dags before the first " + "dag combine pass")); +static cl::opt +ViewLegalizeTypesDAGs("view-legalize-types-dags", cl::Hidden, + cl::desc("Pop up a window to show dags before legalize types")); +static cl::opt +ViewLegalizeDAGs("view-legalize-dags", cl::Hidden, + cl::desc("Pop up a window to show dags before legalize")); +static cl::opt +ViewDAGCombine2("view-dag-combine2-dags", cl::Hidden, + cl::desc("Pop up a window to show dags before the second " + "dag combine pass")); +static cl::opt +ViewDAGCombineLT("view-dag-combine-lt-dags", cl::Hidden, + cl::desc("Pop up a window to show dags before the post legalize types" + " dag combine pass")); +static cl::opt +ViewISelDAGs("view-isel-dags", cl::Hidden, + cl::desc("Pop up a window to show isel dags as they are selected")); +static cl::opt +ViewSchedDAGs("view-sched-dags", cl::Hidden, + cl::desc("Pop up a window to show sched dags as they are processed")); +static cl::opt +ViewSUnitDAGs("view-sunit-dags", cl::Hidden, + cl::desc("Pop up a window to show SUnit dags after they are processed")); +#else +static const bool ViewDAGCombine1 = false, + ViewLegalizeTypesDAGs = false, ViewLegalizeDAGs = false, + ViewDAGCombine2 = false, + ViewDAGCombineLT = false, + ViewISelDAGs = false, ViewSchedDAGs = false, + ViewSUnitDAGs = false; +#endif + +//===---------------------------------------------------------------------===// +/// +/// RegisterScheduler class - Track the registration of instruction schedulers. +/// +//===---------------------------------------------------------------------===// +MachinePassRegistry RegisterScheduler::Registry; + +//===---------------------------------------------------------------------===// +/// +/// ISHeuristic command line option for instruction schedulers. +/// +//===---------------------------------------------------------------------===// +static cl::opt > +ISHeuristic("pre-RA-sched", + cl::init(&createDefaultScheduler), + cl::desc("Instruction schedulers available (before register" + " allocation):")); + +static RegisterScheduler +defaultListDAGScheduler("default", "Best scheduler for the target", + createDefaultScheduler); + +namespace llvm { + //===--------------------------------------------------------------------===// + /// createDefaultScheduler - This creates an instruction scheduler appropriate + /// for the target. + ScheduleDAGSDNodes* createDefaultScheduler(SelectionDAGISel *IS, + CodeGenOpt::Level OptLevel) { + const TargetLowering &TLI = IS->getTargetLowering(); + + if (OptLevel == CodeGenOpt::None) + return createFastDAGScheduler(IS, OptLevel); + if (TLI.getSchedulingPreference() == TargetLowering::SchedulingForLatency) + return createTDListDAGScheduler(IS, OptLevel); + assert(TLI.getSchedulingPreference() == + TargetLowering::SchedulingForRegPressure && "Unknown sched type!"); + return createBURRListDAGScheduler(IS, OptLevel); + } +} + +// EmitInstrWithCustomInserter - This method should be implemented by targets +// that mark instructions with the 'usesCustomDAGSchedInserter' flag. These +// instructions are special in various ways, which require special support to +// insert. The specified MachineInstr is created but not inserted into any +// basic blocks, and the scheduler passes ownership of it to this method. +MachineBasicBlock *TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *MBB) const { + cerr << "If a target marks an instruction with " + << "'usesCustomDAGSchedInserter', it must implement " + << "TargetLowering::EmitInstrWithCustomInserter!\n"; + abort(); + return 0; +} + +/// EmitLiveInCopy - Emit a copy for a live in physical register. If the +/// physical register has only a single copy use, then coalesced the copy +/// if possible. +static void EmitLiveInCopy(MachineBasicBlock *MBB, + MachineBasicBlock::iterator &InsertPos, + unsigned VirtReg, unsigned PhysReg, + const TargetRegisterClass *RC, + DenseMap &CopyRegMap, + const MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI, + const TargetInstrInfo &TII) { + unsigned NumUses = 0; + MachineInstr *UseMI = NULL; + for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(VirtReg), + UE = MRI.use_end(); UI != UE; ++UI) { + UseMI = &*UI; + if (++NumUses > 1) + break; + } + + // If the number of uses is not one, or the use is not a move instruction, + // don't coalesce. Also, only coalesce away a virtual register to virtual + // register copy. + bool Coalesced = false; + unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; + if (NumUses == 1 && + TII.isMoveInstr(*UseMI, SrcReg, DstReg, SrcSubReg, DstSubReg) && + TargetRegisterInfo::isVirtualRegister(DstReg)) { + VirtReg = DstReg; + Coalesced = true; + } + + // Now find an ideal location to insert the copy. + MachineBasicBlock::iterator Pos = InsertPos; + while (Pos != MBB->begin()) { + MachineInstr *PrevMI = prior(Pos); + DenseMap::iterator RI = CopyRegMap.find(PrevMI); + // copyRegToReg might emit multiple instructions to do a copy. + unsigned CopyDstReg = (RI == CopyRegMap.end()) ? 0 : RI->second; + if (CopyDstReg && !TRI.regsOverlap(CopyDstReg, PhysReg)) + // This is what the BB looks like right now: + // r1024 = mov r0 + // ... + // r1 = mov r1024 + // + // We want to insert "r1025 = mov r1". Inserting this copy below the + // move to r1024 makes it impossible for that move to be coalesced. + // + // r1025 = mov r1 + // r1024 = mov r0 + // ... + // r1 = mov 1024 + // r2 = mov 1025 + break; // Woot! Found a good location. + --Pos; + } + + TII.copyRegToReg(*MBB, Pos, VirtReg, PhysReg, RC, RC); + CopyRegMap.insert(std::make_pair(prior(Pos), VirtReg)); + if (Coalesced) { + if (&*InsertPos == UseMI) ++InsertPos; + MBB->erase(UseMI); + } +} + +/// EmitLiveInCopies - If this is the first basic block in the function, +/// and if it has live ins that need to be copied into vregs, emit the +/// copies into the block. +static void EmitLiveInCopies(MachineBasicBlock *EntryMBB, + const MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI, + const TargetInstrInfo &TII) { + if (SchedLiveInCopies) { + // Emit the copies at a heuristically-determined location in the block. + DenseMap CopyRegMap; + MachineBasicBlock::iterator InsertPos = EntryMBB->begin(); + for (MachineRegisterInfo::livein_iterator LI = MRI.livein_begin(), + E = MRI.livein_end(); LI != E; ++LI) + if (LI->second) { + const TargetRegisterClass *RC = MRI.getRegClass(LI->second); + EmitLiveInCopy(EntryMBB, InsertPos, LI->second, LI->first, + RC, CopyRegMap, MRI, TRI, TII); + } + } else { + // Emit the copies into the top of the block. + for (MachineRegisterInfo::livein_iterator LI = MRI.livein_begin(), + E = MRI.livein_end(); LI != E; ++LI) + if (LI->second) { + const TargetRegisterClass *RC = MRI.getRegClass(LI->second); + TII.copyRegToReg(*EntryMBB, EntryMBB->begin(), + LI->second, LI->first, RC, RC); + } + } +} + +//===----------------------------------------------------------------------===// +// SelectionDAGISel code +//===----------------------------------------------------------------------===// + +SelectionDAGISel::SelectionDAGISel(TargetMachine &tm, CodeGenOpt::Level OL) : + FunctionPass(&ID), TM(tm), TLI(*tm.getTargetLowering()), + FuncInfo(new FunctionLoweringInfo(TLI)), + CurDAG(new SelectionDAG(TLI, *FuncInfo)), + SDL(new SelectionDAGLowering(*CurDAG, TLI, *FuncInfo, OL)), + GFI(), + OptLevel(OL), + DAGSize(0) +{} + +SelectionDAGISel::~SelectionDAGISel() { + delete SDL; + delete CurDAG; + delete FuncInfo; +} + +unsigned SelectionDAGISel::MakeReg(MVT VT) { + return RegInfo->createVirtualRegister(TLI.getRegClassFor(VT)); +} + +void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.setPreservesAll(); +} + +bool SelectionDAGISel::runOnFunction(Function &Fn) { + // Do some sanity-checking on the command-line options. + assert((!EnableFastISelVerbose || EnableFastISel) && + "-fast-isel-verbose requires -fast-isel"); + assert((!EnableFastISelAbort || EnableFastISel) && + "-fast-isel-abort requires -fast-isel"); + + // Do not codegen any 'available_externally' functions at all, they have + // definitions outside the translation unit. + if (Fn.hasAvailableExternallyLinkage()) + return false; + + + // Get alias analysis for load/store combining. + AA = &getAnalysis(); + + TargetMachine &TM = TLI.getTargetMachine(); + MF = &MachineFunction::construct(&Fn, TM); + const TargetInstrInfo &TII = *TM.getInstrInfo(); + const TargetRegisterInfo &TRI = *TM.getRegisterInfo(); + + if (MF->getFunction()->hasGC()) + GFI = &getAnalysis().getFunctionInfo(*MF->getFunction()); + else + GFI = 0; + RegInfo = &MF->getRegInfo(); + DOUT << "\n\n\n=== " << Fn.getName() << "\n"; + + MachineModuleInfo *MMI = getAnalysisIfAvailable(); + DwarfWriter *DW = getAnalysisIfAvailable(); + CurDAG->init(*MF, MMI, DW); + FuncInfo->set(Fn, *MF, *CurDAG, EnableFastISel); + SDL->init(GFI, *AA); + + for (Function::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) + if (InvokeInst *Invoke = dyn_cast(I->getTerminator())) + // Mark landing pad. + FuncInfo->MBBMap[Invoke->getSuccessor(1)]->setIsLandingPad(); + + SelectAllBasicBlocks(Fn, *MF, MMI, DW, TII); + + // If the first basic block in the function has live ins that need to be + // copied into vregs, emit the copies into the top of the block before + // emitting the code for the block. + EmitLiveInCopies(MF->begin(), *RegInfo, TRI, TII); + + // Add function live-ins to entry block live-in set. + for (MachineRegisterInfo::livein_iterator I = RegInfo->livein_begin(), + E = RegInfo->livein_end(); I != E; ++I) + MF->begin()->addLiveIn(I->first); + +#ifndef NDEBUG + assert(FuncInfo->CatchInfoFound.size() == FuncInfo->CatchInfoLost.size() && + "Not all catch info was assigned to a landing pad!"); +#endif + + FuncInfo->clear(); + + return true; +} + +static void copyCatchInfo(BasicBlock *SrcBB, BasicBlock *DestBB, + MachineModuleInfo *MMI, FunctionLoweringInfo &FLI) { + for (BasicBlock::iterator I = SrcBB->begin(), E = --SrcBB->end(); I != E; ++I) + if (EHSelectorInst *EHSel = dyn_cast(I)) { + // Apply the catch info to DestBB. + AddCatchInfo(*EHSel, MMI, FLI.MBBMap[DestBB]); +#ifndef NDEBUG + if (!FLI.MBBMap[SrcBB]->isLandingPad()) + FLI.CatchInfoFound.insert(EHSel); +#endif + } +} + +/// IsFixedFrameObjectWithPosOffset - Check if object is a fixed frame object and +/// whether object offset >= 0. +static bool +IsFixedFrameObjectWithPosOffset(MachineFrameInfo *MFI, SDValue Op) { + if (!isa(Op)) return false; + + FrameIndexSDNode * FrameIdxNode = dyn_cast(Op); + int FrameIdx = FrameIdxNode->getIndex(); + return MFI->isFixedObjectIndex(FrameIdx) && + MFI->getObjectOffset(FrameIdx) >= 0; +} + +/// IsPossiblyOverwrittenArgumentOfTailCall - Check if the operand could +/// possibly be overwritten when lowering the outgoing arguments in a tail +/// call. Currently the implementation of this call is very conservative and +/// assumes all arguments sourcing from FORMAL_ARGUMENTS or a CopyFromReg with +/// virtual registers would be overwritten by direct lowering. +static bool IsPossiblyOverwrittenArgumentOfTailCall(SDValue Op, + MachineFrameInfo *MFI) { + RegisterSDNode * OpReg = NULL; + if (Op.getOpcode() == ISD::FORMAL_ARGUMENTS || + (Op.getOpcode()== ISD::CopyFromReg && + (OpReg = dyn_cast(Op.getOperand(1))) && + (OpReg->getReg() >= TargetRegisterInfo::FirstVirtualRegister)) || + (Op.getOpcode() == ISD::LOAD && + IsFixedFrameObjectWithPosOffset(MFI, Op.getOperand(1))) || + (Op.getOpcode() == ISD::MERGE_VALUES && + Op.getOperand(Op.getResNo()).getOpcode() == ISD::LOAD && + IsFixedFrameObjectWithPosOffset(MFI, Op.getOperand(Op.getResNo()). + getOperand(1)))) + return true; + return false; +} + +/// CheckDAGForTailCallsAndFixThem - This Function looks for CALL nodes in the +/// DAG and fixes their tailcall attribute operand. +static void CheckDAGForTailCallsAndFixThem(SelectionDAG &DAG, + const TargetLowering& TLI) { + SDNode * Ret = NULL; + SDValue Terminator = DAG.getRoot(); + + // Find RET node. + if (Terminator.getOpcode() == ISD::RET) { + Ret = Terminator.getNode(); + } + + // Fix tail call attribute of CALL nodes. + for (SelectionDAG::allnodes_iterator BE = DAG.allnodes_begin(), + BI = DAG.allnodes_end(); BI != BE; ) { + --BI; + if (CallSDNode *TheCall = dyn_cast(BI)) { + SDValue OpRet(Ret, 0); + SDValue OpCall(BI, 0); + bool isMarkedTailCall = TheCall->isTailCall(); + // If CALL node has tail call attribute set to true and the call is not + // eligible (no RET or the target rejects) the attribute is fixed to + // false. The TargetLowering::IsEligibleForTailCallOptimization function + // must correctly identify tail call optimizable calls. + if (!isMarkedTailCall) continue; + if (Ret==NULL || + !TLI.IsEligibleForTailCallOptimization(TheCall, OpRet, DAG)) { + // Not eligible. Mark CALL node as non tail call. Note that we + // can modify the call node in place since calls are not CSE'd. + TheCall->setNotTailCall(); + } else { + // Look for tail call clobbered arguments. Emit a series of + // copyto/copyfrom virtual register nodes to protect them. + SmallVector Ops; + SDValue Chain = TheCall->getChain(), InFlag; + Ops.push_back(Chain); + Ops.push_back(TheCall->getCallee()); + for (unsigned i = 0, e = TheCall->getNumArgs(); i != e; ++i) { + SDValue Arg = TheCall->getArg(i); + bool isByVal = TheCall->getArgFlags(i).isByVal(); + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + if (!isByVal && + IsPossiblyOverwrittenArgumentOfTailCall(Arg, MFI)) { + MVT VT = Arg.getValueType(); + unsigned VReg = MF.getRegInfo(). + createVirtualRegister(TLI.getRegClassFor(VT)); + Chain = DAG.getCopyToReg(Chain, Arg.getDebugLoc(), + VReg, Arg, InFlag); + InFlag = Chain.getValue(1); + Arg = DAG.getCopyFromReg(Chain, Arg.getDebugLoc(), + VReg, VT, InFlag); + Chain = Arg.getValue(1); + InFlag = Arg.getValue(2); + } + Ops.push_back(Arg); + Ops.push_back(TheCall->getArgFlagsVal(i)); + } + // Link in chain of CopyTo/CopyFromReg. + Ops[0] = Chain; + DAG.UpdateNodeOperands(OpCall, Ops.begin(), Ops.size()); + } + } + } +} + +void SelectionDAGISel::SelectBasicBlock(BasicBlock *LLVMBB, + BasicBlock::iterator Begin, + BasicBlock::iterator End) { + SDL->setCurrentBasicBlock(BB); + + // Lower all of the non-terminator instructions. + for (BasicBlock::iterator I = Begin; I != End; ++I) + if (!isa(I)) + SDL->visit(*I); + + // Ensure that all instructions which are used outside of their defining + // blocks are available as virtual registers. Invoke is handled elsewhere. + for (BasicBlock::iterator I = Begin; I != End; ++I) + if (!isa(I) && !isa(I)) + SDL->CopyToExportRegsIfNeeded(I); + + // Handle PHI nodes in successor blocks. + if (End == LLVMBB->end()) { + HandlePHINodesInSuccessorBlocks(LLVMBB); + + // Lower the terminator after the copies are emitted. + SDL->visit(*LLVMBB->getTerminator()); + } + + // Make sure the root of the DAG is up-to-date. + CurDAG->setRoot(SDL->getControlRoot()); + + // Check whether calls in this block are real tail calls. Fix up CALL nodes + // with correct tailcall attribute so that the target can rely on the tailcall + // attribute indicating whether the call is really eligible for tail call + // optimization. + if (PerformTailCallOpt) + CheckDAGForTailCallsAndFixThem(*CurDAG, TLI); + + // Final step, emit the lowered DAG as machine code. + CodeGenAndEmitDAG(); + SDL->clear(); +} + +void SelectionDAGISel::ComputeLiveOutVRegInfo() { + SmallPtrSet VisitedNodes; + SmallVector Worklist; + + Worklist.push_back(CurDAG->getRoot().getNode()); + + APInt Mask; + APInt KnownZero; + APInt KnownOne; + + while (!Worklist.empty()) { + SDNode *N = Worklist.back(); + Worklist.pop_back(); + + // If we've already seen this node, ignore it. + if (!VisitedNodes.insert(N)) + continue; + + // Otherwise, add all chain operands to the worklist. + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + if (N->getOperand(i).getValueType() == MVT::Other) + Worklist.push_back(N->getOperand(i).getNode()); + + // If this is a CopyToReg with a vreg dest, process it. + if (N->getOpcode() != ISD::CopyToReg) + continue; + + unsigned DestReg = cast(N->getOperand(1))->getReg(); + if (!TargetRegisterInfo::isVirtualRegister(DestReg)) + continue; + + // Ignore non-scalar or non-integer values. + SDValue Src = N->getOperand(2); + MVT SrcVT = Src.getValueType(); + if (!SrcVT.isInteger() || SrcVT.isVector()) + continue; + + unsigned NumSignBits = CurDAG->ComputeNumSignBits(Src); + Mask = APInt::getAllOnesValue(SrcVT.getSizeInBits()); + CurDAG->ComputeMaskedBits(Src, Mask, KnownZero, KnownOne); + + // Only install this information if it tells us something. + if (NumSignBits != 1 || KnownZero != 0 || KnownOne != 0) { + DestReg -= TargetRegisterInfo::FirstVirtualRegister; + FunctionLoweringInfo &FLI = CurDAG->getFunctionLoweringInfo(); + if (DestReg >= FLI.LiveOutRegInfo.size()) + FLI.LiveOutRegInfo.resize(DestReg+1); + FunctionLoweringInfo::LiveOutInfo &LOI = FLI.LiveOutRegInfo[DestReg]; + LOI.NumSignBits = NumSignBits; + LOI.KnownOne = KnownOne; + LOI.KnownZero = KnownZero; + } + } +} + +void SelectionDAGISel::CodeGenAndEmitDAG() { + std::string GroupName; + if (TimePassesIsEnabled) + GroupName = "Instruction Selection and Scheduling"; + std::string BlockName; + if (ViewDAGCombine1 || ViewLegalizeTypesDAGs || ViewLegalizeDAGs || + ViewDAGCombine2 || ViewDAGCombineLT || ViewISelDAGs || ViewSchedDAGs || + ViewSUnitDAGs) + BlockName = CurDAG->getMachineFunction().getFunction()->getName() + ':' + + BB->getBasicBlock()->getName(); + + DOUT << "Initial selection DAG:\n"; + DEBUG(CurDAG->dump()); + + if (ViewDAGCombine1) CurDAG->viewGraph("dag-combine1 input for " + BlockName); + + // Run the DAG combiner in pre-legalize mode. + if (TimePassesIsEnabled) { + NamedRegionTimer T("DAG Combining 1", GroupName); + CurDAG->Combine(Unrestricted, *AA, OptLevel); + } else { + CurDAG->Combine(Unrestricted, *AA, OptLevel); + } + + DOUT << "Optimized lowered selection DAG:\n"; + DEBUG(CurDAG->dump()); + + // Second step, hack on the DAG until it only uses operations and types that + // the target supports. + if (!DisableLegalizeTypes) { + if (ViewLegalizeTypesDAGs) CurDAG->viewGraph("legalize-types input for " + + BlockName); + + bool Changed; + if (TimePassesIsEnabled) { + NamedRegionTimer T("Type Legalization", GroupName); + Changed = CurDAG->LegalizeTypes(); + } else { + Changed = CurDAG->LegalizeTypes(); + } + + DOUT << "Type-legalized selection DAG:\n"; + DEBUG(CurDAG->dump()); + + if (Changed) { + if (ViewDAGCombineLT) + CurDAG->viewGraph("dag-combine-lt input for " + BlockName); + + // Run the DAG combiner in post-type-legalize mode. + if (TimePassesIsEnabled) { + NamedRegionTimer T("DAG Combining after legalize types", GroupName); + CurDAG->Combine(NoIllegalTypes, *AA, OptLevel); + } else { + CurDAG->Combine(NoIllegalTypes, *AA, OptLevel); + } + + DOUT << "Optimized type-legalized selection DAG:\n"; + DEBUG(CurDAG->dump()); + } + + if (TimePassesIsEnabled) { + NamedRegionTimer T("Vector Legalization", GroupName); + Changed = CurDAG->LegalizeVectors(); + } else { + Changed = CurDAG->LegalizeVectors(); + } + + if (Changed) { + if (TimePassesIsEnabled) { + NamedRegionTimer T("Type Legalization 2", GroupName); + Changed = CurDAG->LegalizeTypes(); + } else { + Changed = CurDAG->LegalizeTypes(); + } + + if (ViewDAGCombineLT) + CurDAG->viewGraph("dag-combine-lv input for " + BlockName); + + // Run the DAG combiner in post-type-legalize mode. + if (TimePassesIsEnabled) { + NamedRegionTimer T("DAG Combining after legalize vectors", GroupName); + CurDAG->Combine(NoIllegalOperations, *AA, OptLevel); + } else { + CurDAG->Combine(NoIllegalOperations, *AA, OptLevel); + } + + DOUT << "Optimized vector-legalized selection DAG:\n"; + DEBUG(CurDAG->dump()); + } + } + + if (ViewLegalizeDAGs) CurDAG->viewGraph("legalize input for " + BlockName); + + if (TimePassesIsEnabled) { + NamedRegionTimer T("DAG Legalization", GroupName); + CurDAG->Legalize(DisableLegalizeTypes, OptLevel); + } else { + CurDAG->Legalize(DisableLegalizeTypes, OptLevel); + } + + DOUT << "Legalized selection DAG:\n"; + DEBUG(CurDAG->dump()); + + if (ViewDAGCombine2) CurDAG->viewGraph("dag-combine2 input for " + BlockName); + + // Run the DAG combiner in post-legalize mode. + if (TimePassesIsEnabled) { + NamedRegionTimer T("DAG Combining 2", GroupName); + CurDAG->Combine(NoIllegalOperations, *AA, OptLevel); + } else { + CurDAG->Combine(NoIllegalOperations, *AA, OptLevel); + } + + DOUT << "Optimized legalized selection DAG:\n"; + DEBUG(CurDAG->dump()); + + if (ViewISelDAGs) CurDAG->viewGraph("isel input for " + BlockName); + + if (OptLevel != CodeGenOpt::None) + ComputeLiveOutVRegInfo(); + + // Third, instruction select all of the operations to machine code, adding the + // code to the MachineBasicBlock. + if (TimePassesIsEnabled) { + NamedRegionTimer T("Instruction Selection", GroupName); + InstructionSelect(); + } else { + InstructionSelect(); + } + + DOUT << "Selected selection DAG:\n"; + DEBUG(CurDAG->dump()); + + if (ViewSchedDAGs) CurDAG->viewGraph("scheduler input for " + BlockName); + + // Schedule machine code. + ScheduleDAGSDNodes *Scheduler = CreateScheduler(); + if (TimePassesIsEnabled) { + NamedRegionTimer T("Instruction Scheduling", GroupName); + Scheduler->Run(CurDAG, BB, BB->end()); + } else { + Scheduler->Run(CurDAG, BB, BB->end()); + } + + if (ViewSUnitDAGs) Scheduler->viewGraph(); + + // Emit machine code to BB. This can change 'BB' to the last block being + // inserted into. + if (TimePassesIsEnabled) { + NamedRegionTimer T("Instruction Creation", GroupName); + BB = Scheduler->EmitSchedule(); + } else { + BB = Scheduler->EmitSchedule(); + } + + // Free the scheduler state. + if (TimePassesIsEnabled) { + NamedRegionTimer T("Instruction Scheduling Cleanup", GroupName); + delete Scheduler; + } else { + delete Scheduler; + } + + DOUT << "Selected machine code:\n"; + DEBUG(BB->dump()); +} + +void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn, + MachineFunction &MF, + MachineModuleInfo *MMI, + DwarfWriter *DW, + const TargetInstrInfo &TII) { + // Initialize the Fast-ISel state, if needed. + FastISel *FastIS = 0; + if (EnableFastISel) + FastIS = TLI.createFastISel(MF, MMI, DW, + FuncInfo->ValueMap, + FuncInfo->MBBMap, + FuncInfo->StaticAllocaMap +#ifndef NDEBUG + , FuncInfo->CatchInfoLost +#endif + ); + + // Iterate over all basic blocks in the function. + for (Function::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) { + BasicBlock *LLVMBB = &*I; + BB = FuncInfo->MBBMap[LLVMBB]; + + BasicBlock::iterator const Begin = LLVMBB->begin(); + BasicBlock::iterator const End = LLVMBB->end(); + BasicBlock::iterator BI = Begin; + + // Lower any arguments needed in this block if this is the entry block. + bool SuppressFastISel = false; + if (LLVMBB == &Fn.getEntryBlock()) { + LowerArguments(LLVMBB); + + // If any of the arguments has the byval attribute, forgo + // fast-isel in the entry block. + if (FastIS) { + unsigned j = 1; + for (Function::arg_iterator I = Fn.arg_begin(), E = Fn.arg_end(); + I != E; ++I, ++j) + if (Fn.paramHasAttr(j, Attribute::ByVal)) { + if (EnableFastISelVerbose || EnableFastISelAbort) + cerr << "FastISel skips entry block due to byval argument\n"; + SuppressFastISel = true; + break; + } + } + } + + if (MMI && BB->isLandingPad()) { + // Add a label to mark the beginning of the landing pad. Deletion of the + // landing pad can thus be detected via the MachineModuleInfo. + unsigned LabelID = MMI->addLandingPad(BB); + + const TargetInstrDesc &II = TII.get(TargetInstrInfo::EH_LABEL); + BuildMI(BB, SDL->getCurDebugLoc(), II).addImm(LabelID); + + // Mark exception register as live in. + unsigned Reg = TLI.getExceptionAddressRegister(); + if (Reg) BB->addLiveIn(Reg); + + // Mark exception selector register as live in. + Reg = TLI.getExceptionSelectorRegister(); + if (Reg) BB->addLiveIn(Reg); + + // FIXME: Hack around an exception handling flaw (PR1508): the personality + // function and list of typeids logically belong to the invoke (or, if you + // like, the basic block containing the invoke), and need to be associated + // with it in the dwarf exception handling tables. Currently however the + // information is provided by an intrinsic (eh.selector) that can be moved + // to unexpected places by the optimizers: if the unwind edge is critical, + // then breaking it can result in the intrinsics being in the successor of + // the landing pad, not the landing pad itself. This results in exceptions + // not being caught because no typeids are associated with the invoke. + // This may not be the only way things can go wrong, but it is the only way + // we try to work around for the moment. + BranchInst *Br = dyn_cast(LLVMBB->getTerminator()); + + if (Br && Br->isUnconditional()) { // Critical edge? + BasicBlock::iterator I, E; + for (I = LLVMBB->begin(), E = --LLVMBB->end(); I != E; ++I) + if (isa(I)) + break; + + if (I == E) + // No catch info found - try to extract some from the successor. + copyCatchInfo(Br->getSuccessor(0), LLVMBB, MMI, *FuncInfo); + } + } + + // Before doing SelectionDAG ISel, see if FastISel has been requested. + if (FastIS && !SuppressFastISel) { + // Emit code for any incoming arguments. This must happen before + // beginning FastISel on the entry block. + if (LLVMBB == &Fn.getEntryBlock()) { + CurDAG->setRoot(SDL->getControlRoot()); + CodeGenAndEmitDAG(); + SDL->clear(); + } + FastIS->startNewBlock(BB); + // Do FastISel on as many instructions as possible. + for (; BI != End; ++BI) { + // Just before the terminator instruction, insert instructions to + // feed PHI nodes in successor blocks. + if (isa(BI)) + if (!HandlePHINodesInSuccessorBlocksFast(LLVMBB, FastIS)) { + if (EnableFastISelVerbose || EnableFastISelAbort) { + cerr << "FastISel miss: "; + BI->dump(); + } + if (EnableFastISelAbort) + assert(0 && "FastISel didn't handle a PHI in a successor"); + break; + } + + // First try normal tablegen-generated "fast" selection. + if (FastIS->SelectInstruction(BI)) + continue; + + // Next, try calling the target to attempt to handle the instruction. + if (FastIS->TargetSelectInstruction(BI)) + continue; + + // Then handle certain instructions as single-LLVM-Instruction blocks. + if (isa(BI)) { + if (EnableFastISelVerbose || EnableFastISelAbort) { + cerr << "FastISel missed call: "; + BI->dump(); + } + + if (BI->getType() != Type::VoidTy) { + unsigned &R = FuncInfo->ValueMap[BI]; + if (!R) + R = FuncInfo->CreateRegForValue(BI); + } + + SDL->setCurDebugLoc(FastIS->getCurDebugLoc()); + SelectBasicBlock(LLVMBB, BI, next(BI)); + // If the instruction was codegen'd with multiple blocks, + // inform the FastISel object where to resume inserting. + FastIS->setCurrentBlock(BB); + continue; + } + + // Otherwise, give up on FastISel for the rest of the block. + // For now, be a little lenient about non-branch terminators. + if (!isa(BI) || isa(BI)) { + if (EnableFastISelVerbose || EnableFastISelAbort) { + cerr << "FastISel miss: "; + BI->dump(); + } + if (EnableFastISelAbort) + // The "fast" selector couldn't handle something and bailed. + // For the purpose of debugging, just abort. + assert(0 && "FastISel didn't select the entire block"); + } + break; + } + } + + // Run SelectionDAG instruction selection on the remainder of the block + // not handled by FastISel. If FastISel is not run, this is the entire + // block. + if (BI != End) { + // If FastISel is run and it has known DebugLoc then use it. + if (FastIS && !FastIS->getCurDebugLoc().isUnknown()) + SDL->setCurDebugLoc(FastIS->getCurDebugLoc()); + SelectBasicBlock(LLVMBB, BI, End); + } + + FinishBasicBlock(); + } + + delete FastIS; +} + +void +SelectionDAGISel::FinishBasicBlock() { + + DOUT << "Target-post-processed machine code:\n"; + DEBUG(BB->dump()); + + DOUT << "Total amount of phi nodes to update: " + << SDL->PHINodesToUpdate.size() << "\n"; + DEBUG(for (unsigned i = 0, e = SDL->PHINodesToUpdate.size(); i != e; ++i) + DOUT << "Node " << i << " : (" << SDL->PHINodesToUpdate[i].first + << ", " << SDL->PHINodesToUpdate[i].second << ")\n";); + + // Next, now that we know what the last MBB the LLVM BB expanded is, update + // PHI nodes in successors. + if (SDL->SwitchCases.empty() && + SDL->JTCases.empty() && + SDL->BitTestCases.empty()) { + for (unsigned i = 0, e = SDL->PHINodesToUpdate.size(); i != e; ++i) { + MachineInstr *PHI = SDL->PHINodesToUpdate[i].first; + assert(PHI->getOpcode() == TargetInstrInfo::PHI && + "This is not a machine PHI node that we are updating!"); + PHI->addOperand(MachineOperand::CreateReg(SDL->PHINodesToUpdate[i].second, + false)); + PHI->addOperand(MachineOperand::CreateMBB(BB)); + } + SDL->PHINodesToUpdate.clear(); + return; + } + + for (unsigned i = 0, e = SDL->BitTestCases.size(); i != e; ++i) { + // Lower header first, if it wasn't already lowered + if (!SDL->BitTestCases[i].Emitted) { + // Set the current basic block to the mbb we wish to insert the code into + BB = SDL->BitTestCases[i].Parent; + SDL->setCurrentBasicBlock(BB); + // Emit the code + SDL->visitBitTestHeader(SDL->BitTestCases[i]); + CurDAG->setRoot(SDL->getRoot()); + CodeGenAndEmitDAG(); + SDL->clear(); + } + + for (unsigned j = 0, ej = SDL->BitTestCases[i].Cases.size(); j != ej; ++j) { + // Set the current basic block to the mbb we wish to insert the code into + BB = SDL->BitTestCases[i].Cases[j].ThisBB; + SDL->setCurrentBasicBlock(BB); + // Emit the code + if (j+1 != ej) + SDL->visitBitTestCase(SDL->BitTestCases[i].Cases[j+1].ThisBB, + SDL->BitTestCases[i].Reg, + SDL->BitTestCases[i].Cases[j]); + else + SDL->visitBitTestCase(SDL->BitTestCases[i].Default, + SDL->BitTestCases[i].Reg, + SDL->BitTestCases[i].Cases[j]); + + + CurDAG->setRoot(SDL->getRoot()); + CodeGenAndEmitDAG(); + SDL->clear(); + } + + // Update PHI Nodes + for (unsigned pi = 0, pe = SDL->PHINodesToUpdate.size(); pi != pe; ++pi) { + MachineInstr *PHI = SDL->PHINodesToUpdate[pi].first; + MachineBasicBlock *PHIBB = PHI->getParent(); + assert(PHI->getOpcode() == TargetInstrInfo::PHI && + "This is not a machine PHI node that we are updating!"); + // This is "default" BB. We have two jumps to it. From "header" BB and + // from last "case" BB. + if (PHIBB == SDL->BitTestCases[i].Default) { + PHI->addOperand(MachineOperand::CreateReg(SDL->PHINodesToUpdate[pi].second, + false)); + PHI->addOperand(MachineOperand::CreateMBB(SDL->BitTestCases[i].Parent)); + PHI->addOperand(MachineOperand::CreateReg(SDL->PHINodesToUpdate[pi].second, + false)); + PHI->addOperand(MachineOperand::CreateMBB(SDL->BitTestCases[i].Cases. + back().ThisBB)); + } + // One of "cases" BB. + for (unsigned j = 0, ej = SDL->BitTestCases[i].Cases.size(); + j != ej; ++j) { + MachineBasicBlock* cBB = SDL->BitTestCases[i].Cases[j].ThisBB; + if (cBB->succ_end() != + std::find(cBB->succ_begin(),cBB->succ_end(), PHIBB)) { + PHI->addOperand(MachineOperand::CreateReg(SDL->PHINodesToUpdate[pi].second, + false)); + PHI->addOperand(MachineOperand::CreateMBB(cBB)); + } + } + } + } + SDL->BitTestCases.clear(); + + // If the JumpTable record is filled in, then we need to emit a jump table. + // Updating the PHI nodes is tricky in this case, since we need to determine + // whether the PHI is a successor of the range check MBB or the jump table MBB + for (unsigned i = 0, e = SDL->JTCases.size(); i != e; ++i) { + // Lower header first, if it wasn't already lowered + if (!SDL->JTCases[i].first.Emitted) { + // Set the current basic block to the mbb we wish to insert the code into + BB = SDL->JTCases[i].first.HeaderBB; + SDL->setCurrentBasicBlock(BB); + // Emit the code + SDL->visitJumpTableHeader(SDL->JTCases[i].second, SDL->JTCases[i].first); + CurDAG->setRoot(SDL->getRoot()); + CodeGenAndEmitDAG(); + SDL->clear(); + } + + // Set the current basic block to the mbb we wish to insert the code into + BB = SDL->JTCases[i].second.MBB; + SDL->setCurrentBasicBlock(BB); + // Emit the code + SDL->visitJumpTable(SDL->JTCases[i].second); + CurDAG->setRoot(SDL->getRoot()); + CodeGenAndEmitDAG(); + SDL->clear(); + + // Update PHI Nodes + for (unsigned pi = 0, pe = SDL->PHINodesToUpdate.size(); pi != pe; ++pi) { + MachineInstr *PHI = SDL->PHINodesToUpdate[pi].first; + MachineBasicBlock *PHIBB = PHI->getParent(); + assert(PHI->getOpcode() == TargetInstrInfo::PHI && + "This is not a machine PHI node that we are updating!"); + // "default" BB. We can go there only from header BB. + if (PHIBB == SDL->JTCases[i].second.Default) { + PHI->addOperand(MachineOperand::CreateReg(SDL->PHINodesToUpdate[pi].second, + false)); + PHI->addOperand(MachineOperand::CreateMBB(SDL->JTCases[i].first.HeaderBB)); + } + // JT BB. Just iterate over successors here + if (BB->succ_end() != std::find(BB->succ_begin(),BB->succ_end(), PHIBB)) { + PHI->addOperand(MachineOperand::CreateReg(SDL->PHINodesToUpdate[pi].second, + false)); + PHI->addOperand(MachineOperand::CreateMBB(BB)); + } + } + } + SDL->JTCases.clear(); + + // If the switch block involved a branch to one of the actual successors, we + // need to update PHI nodes in that block. + for (unsigned i = 0, e = SDL->PHINodesToUpdate.size(); i != e; ++i) { + MachineInstr *PHI = SDL->PHINodesToUpdate[i].first; + assert(PHI->getOpcode() == TargetInstrInfo::PHI && + "This is not a machine PHI node that we are updating!"); + if (BB->isSuccessor(PHI->getParent())) { + PHI->addOperand(MachineOperand::CreateReg(SDL->PHINodesToUpdate[i].second, + false)); + PHI->addOperand(MachineOperand::CreateMBB(BB)); + } + } + + // If we generated any switch lowering information, build and codegen any + // additional DAGs necessary. + for (unsigned i = 0, e = SDL->SwitchCases.size(); i != e; ++i) { + // Set the current basic block to the mbb we wish to insert the code into + BB = SDL->SwitchCases[i].ThisBB; + SDL->setCurrentBasicBlock(BB); + + // Emit the code + SDL->visitSwitchCase(SDL->SwitchCases[i]); + CurDAG->setRoot(SDL->getRoot()); + CodeGenAndEmitDAG(); + SDL->clear(); + + // Handle any PHI nodes in successors of this chunk, as if we were coming + // from the original BB before switch expansion. Note that PHI nodes can + // occur multiple times in PHINodesToUpdate. We have to be very careful to + // handle them the right number of times. + while ((BB = SDL->SwitchCases[i].TrueBB)) { // Handle LHS and RHS. + for (MachineBasicBlock::iterator Phi = BB->begin(); + Phi != BB->end() && Phi->getOpcode() == TargetInstrInfo::PHI; ++Phi){ + // This value for this PHI node is recorded in PHINodesToUpdate, get it. + for (unsigned pn = 0; ; ++pn) { + assert(pn != SDL->PHINodesToUpdate.size() && + "Didn't find PHI entry!"); + if (SDL->PHINodesToUpdate[pn].first == Phi) { + Phi->addOperand(MachineOperand::CreateReg(SDL->PHINodesToUpdate[pn]. + second, false)); + Phi->addOperand(MachineOperand::CreateMBB(SDL->SwitchCases[i].ThisBB)); + break; + } + } + } + + // Don't process RHS if same block as LHS. + if (BB == SDL->SwitchCases[i].FalseBB) + SDL->SwitchCases[i].FalseBB = 0; + + // If we haven't handled the RHS, do so now. Otherwise, we're done. + SDL->SwitchCases[i].TrueBB = SDL->SwitchCases[i].FalseBB; + SDL->SwitchCases[i].FalseBB = 0; + } + assert(SDL->SwitchCases[i].TrueBB == 0 && SDL->SwitchCases[i].FalseBB == 0); + } + SDL->SwitchCases.clear(); + + SDL->PHINodesToUpdate.clear(); +} + + +/// Create the scheduler. If a specific scheduler was specified +/// via the SchedulerRegistry, use it, otherwise select the +/// one preferred by the target. +/// +ScheduleDAGSDNodes *SelectionDAGISel::CreateScheduler() { + RegisterScheduler::FunctionPassCtor Ctor = RegisterScheduler::getDefault(); + + if (!Ctor) { + Ctor = ISHeuristic; + RegisterScheduler::setDefault(Ctor); + } + + return Ctor(this, OptLevel); +} + +ScheduleHazardRecognizer *SelectionDAGISel::CreateTargetHazardRecognizer() { + return new ScheduleHazardRecognizer(); +} + +//===----------------------------------------------------------------------===// +// Helper functions used by the generated instruction selector. +//===----------------------------------------------------------------------===// +// Calls to these methods are generated by tblgen. + +/// CheckAndMask - The isel is trying to match something like (and X, 255). If +/// the dag combiner simplified the 255, we still want to match. RHS is the +/// actual value in the DAG on the RHS of an AND, and DesiredMaskS is the value +/// specified in the .td file (e.g. 255). +bool SelectionDAGISel::CheckAndMask(SDValue LHS, ConstantSDNode *RHS, + int64_t DesiredMaskS) const { + const APInt &ActualMask = RHS->getAPIntValue(); + const APInt &DesiredMask = APInt(LHS.getValueSizeInBits(), DesiredMaskS); + + // If the actual mask exactly matches, success! + if (ActualMask == DesiredMask) + return true; + + // If the actual AND mask is allowing unallowed bits, this doesn't match. + if (ActualMask.intersects(~DesiredMask)) + return false; + + // Otherwise, the DAG Combiner may have proven that the value coming in is + // either already zero or is not demanded. Check for known zero input bits. + APInt NeededMask = DesiredMask & ~ActualMask; + if (CurDAG->MaskedValueIsZero(LHS, NeededMask)) + return true; + + // TODO: check to see if missing bits are just not demanded. + + // Otherwise, this pattern doesn't match. + return false; +} + +/// CheckOrMask - The isel is trying to match something like (or X, 255). If +/// the dag combiner simplified the 255, we still want to match. RHS is the +/// actual value in the DAG on the RHS of an OR, and DesiredMaskS is the value +/// specified in the .td file (e.g. 255). +bool SelectionDAGISel::CheckOrMask(SDValue LHS, ConstantSDNode *RHS, + int64_t DesiredMaskS) const { + const APInt &ActualMask = RHS->getAPIntValue(); + const APInt &DesiredMask = APInt(LHS.getValueSizeInBits(), DesiredMaskS); + + // If the actual mask exactly matches, success! + if (ActualMask == DesiredMask) + return true; + + // If the actual AND mask is allowing unallowed bits, this doesn't match. + if (ActualMask.intersects(~DesiredMask)) + return false; + + // Otherwise, the DAG Combiner may have proven that the value coming in is + // either already zero or is not demanded. Check for known zero input bits. + APInt NeededMask = DesiredMask & ~ActualMask; + + APInt KnownZero, KnownOne; + CurDAG->ComputeMaskedBits(LHS, NeededMask, KnownZero, KnownOne); + + // If all the missing bits in the or are already known to be set, match! + if ((NeededMask & KnownOne) == NeededMask) + return true; + + // TODO: check to see if missing bits are just not demanded. + + // Otherwise, this pattern doesn't match. + return false; +} + + +/// SelectInlineAsmMemoryOperands - Calls to this are automatically generated +/// by tblgen. Others should not call it. +void SelectionDAGISel:: +SelectInlineAsmMemoryOperands(std::vector &Ops) { + std::vector InOps; + std::swap(InOps, Ops); + + Ops.push_back(InOps[0]); // input chain. + Ops.push_back(InOps[1]); // input asm string. + + unsigned i = 2, e = InOps.size(); + if (InOps[e-1].getValueType() == MVT::Flag) + --e; // Don't process a flag operand if it is here. + + while (i != e) { + unsigned Flags = cast(InOps[i])->getZExtValue(); + if ((Flags & 7) != 4 /*MEM*/) { + // Just skip over this operand, copying the operands verbatim. + Ops.insert(Ops.end(), InOps.begin()+i, + InOps.begin()+i+InlineAsm::getNumOperandRegisters(Flags) + 1); + i += InlineAsm::getNumOperandRegisters(Flags) + 1; + } else { + assert(InlineAsm::getNumOperandRegisters(Flags) == 1 && + "Memory operand with multiple values?"); + // Otherwise, this is a memory operand. Ask the target to select it. + std::vector SelOps; + if (SelectInlineAsmMemoryOperand(InOps[i+1], 'm', SelOps)) { + cerr << "Could not match memory address. Inline asm failure!\n"; + exit(1); + } + + // Add this to the output node. + MVT IntPtrTy = CurDAG->getTargetLoweringInfo().getPointerTy(); + Ops.push_back(CurDAG->getTargetConstant(4/*MEM*/ | (SelOps.size()<< 3), + IntPtrTy)); + Ops.insert(Ops.end(), SelOps.begin(), SelOps.end()); + i += 2; + } + } + + // Add the flag input back if present. + if (e != InOps.size()) + Ops.push_back(InOps.back()); +} + +/// findFlagUse - Return use of MVT::Flag value produced by the specified +/// SDNode. +/// +static SDNode *findFlagUse(SDNode *N) { + unsigned FlagResNo = N->getNumValues()-1; + for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) { + SDUse &Use = I.getUse(); + if (Use.getResNo() == FlagResNo) + return Use.getUser(); + } + return NULL; +} + +/// findNonImmUse - Return true if "Use" is a non-immediate use of "Def". +/// This function recursively traverses up the operand chain, ignoring +/// certain nodes. +static bool findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse, + SDNode *Root, + SmallPtrSet &Visited) { + if (Use->getNodeId() < Def->getNodeId() || + !Visited.insert(Use)) + return false; + + for (unsigned i = 0, e = Use->getNumOperands(); i != e; ++i) { + SDNode *N = Use->getOperand(i).getNode(); + if (N == Def) { + if (Use == ImmedUse || Use == Root) + continue; // We are not looking for immediate use. + assert(N != Root); + return true; + } + + // Traverse up the operand chain. + if (findNonImmUse(N, Def, ImmedUse, Root, Visited)) + return true; + } + return false; +} + +/// isNonImmUse - Start searching from Root up the DAG to check is Def can +/// be reached. Return true if that's the case. However, ignore direct uses +/// by ImmedUse (which would be U in the example illustrated in +/// IsLegalAndProfitableToFold) and by Root (which can happen in the store +/// case). +/// FIXME: to be really generic, we should allow direct use by any node +/// that is being folded. But realisticly since we only fold loads which +/// have one non-chain use, we only need to watch out for load/op/store +/// and load/op/cmp case where the root (store / cmp) may reach the load via +/// its chain operand. +static inline bool isNonImmUse(SDNode *Root, SDNode *Def, SDNode *ImmedUse) { + SmallPtrSet Visited; + return findNonImmUse(Root, Def, ImmedUse, Root, Visited); +} + +/// IsLegalAndProfitableToFold - Returns true if the specific operand node N of +/// U can be folded during instruction selection that starts at Root and +/// folding N is profitable. +bool SelectionDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U, + SDNode *Root) const { + if (OptLevel == CodeGenOpt::None) return false; + + // If Root use can somehow reach N through a path that that doesn't contain + // U then folding N would create a cycle. e.g. In the following + // diagram, Root can reach N through X. If N is folded into into Root, then + // X is both a predecessor and a successor of U. + // + // [N*] // + // ^ ^ // + // / \ // + // [U*] [X]? // + // ^ ^ // + // \ / // + // \ / // + // [Root*] // + // + // * indicates nodes to be folded together. + // + // If Root produces a flag, then it gets (even more) interesting. Since it + // will be "glued" together with its flag use in the scheduler, we need to + // check if it might reach N. + // + // [N*] // + // ^ ^ // + // / \ // + // [U*] [X]? // + // ^ ^ // + // \ \ // + // \ | // + // [Root*] | // + // ^ | // + // f | // + // | / // + // [Y] / // + // ^ / // + // f / // + // | / // + // [FU] // + // + // If FU (flag use) indirectly reaches N (the load), and Root folds N + // (call it Fold), then X is a predecessor of FU and a successor of + // Fold. But since Fold and FU are flagged together, this will create + // a cycle in the scheduling graph. + + MVT VT = Root->getValueType(Root->getNumValues()-1); + while (VT == MVT::Flag) { + SDNode *FU = findFlagUse(Root); + if (FU == NULL) + break; + Root = FU; + VT = Root->getValueType(Root->getNumValues()-1); + } + + return !isNonImmUse(Root, N, U); +} + + +char SelectionDAGISel::ID = 0; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp new file mode 100644 index 000000000000..3eec684c6f8c --- /dev/null +++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp @@ -0,0 +1,416 @@ +//===-- SelectionDAGPrinter.cpp - Implement SelectionDAG::viewGraph() -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the SelectionDAG::viewGraph method. +// +//===----------------------------------------------------------------------===// + +#include "ScheduleDAGSDNodes.h" +#include "llvm/Constants.h" +#include "llvm/Function.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/Analysis/DebugInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/GraphWriter.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Config/config.h" +#include +using namespace llvm; + +namespace llvm { + template<> + struct DOTGraphTraits : public DefaultDOTGraphTraits { + static bool hasEdgeDestLabels() { + return true; + } + + static unsigned numEdgeDestLabels(const void *Node) { + return ((const SDNode *) Node)->getNumValues(); + } + + static std::string getEdgeDestLabel(const void *Node, unsigned i) { + return ((const SDNode *) Node)->getValueType(i).getMVTString(); + } + + /// edgeTargetsEdgeSource - This method returns true if this outgoing edge + /// should actually target another edge source, not a node. If this method is + /// implemented, getEdgeTarget should be implemented. + template + static bool edgeTargetsEdgeSource(const void *Node, EdgeIter I) { + return true; + } + + /// getEdgeTarget - If edgeTargetsEdgeSource returns true, this method is + /// called to determine which outgoing edge of Node is the target of this + /// edge. + template + static EdgeIter getEdgeTarget(const void *Node, EdgeIter I) { + SDNode *TargetNode = *I; + SDNodeIterator NI = SDNodeIterator::begin(TargetNode); + std::advance(NI, I.getNode()->getOperand(I.getOperand()).getResNo()); + return NI; + } + + static std::string getGraphName(const SelectionDAG *G) { + return G->getMachineFunction().getFunction()->getName(); + } + + static bool renderGraphFromBottomUp() { + return true; + } + + static bool hasNodeAddressLabel(const SDNode *Node, + const SelectionDAG *Graph) { + return true; + } + + /// If you want to override the dot attributes printed for a particular + /// edge, override this method. + template + static std::string getEdgeAttributes(const void *Node, EdgeIter EI) { + SDValue Op = EI.getNode()->getOperand(EI.getOperand()); + MVT VT = Op.getValueType(); + if (VT == MVT::Flag) + return "color=red,style=bold"; + else if (VT == MVT::Other) + return "color=blue,style=dashed"; + return ""; + } + + + static std::string getNodeLabel(const SDNode *Node, + const SelectionDAG *Graph); + static std::string getNodeAttributes(const SDNode *N, + const SelectionDAG *Graph) { +#ifndef NDEBUG + const std::string &Attrs = Graph->getGraphAttrs(N); + if (!Attrs.empty()) { + if (Attrs.find("shape=") == std::string::npos) + return std::string("shape=Mrecord,") + Attrs; + else + return Attrs; + } +#endif + return "shape=Mrecord"; + } + + static void addCustomGraphFeatures(SelectionDAG *G, + GraphWriter &GW) { + GW.emitSimpleNode(0, "plaintext=circle", "GraphRoot"); + if (G->getRoot().getNode()) + GW.emitEdge(0, -1, G->getRoot().getNode(), G->getRoot().getResNo(), + "color=blue,style=dashed"); + } + }; +} + +std::string DOTGraphTraits::getNodeLabel(const SDNode *Node, + const SelectionDAG *G) { + std::string Op = Node->getOperationName(G); + + if (const ConstantSDNode *CSDN = dyn_cast(Node)) { + Op += ": " + utostr(CSDN->getZExtValue()); + } else if (const ConstantFPSDNode *CSDN = dyn_cast(Node)) { + Op += ": " + ftostr(CSDN->getValueAPF()); + } else if (const GlobalAddressSDNode *GADN = + dyn_cast(Node)) { + Op += ": " + GADN->getGlobal()->getName(); + if (int64_t Offset = GADN->getOffset()) { + if (Offset > 0) + Op += "+" + itostr(Offset); + else + Op += itostr(Offset); + } + } else if (const FrameIndexSDNode *FIDN = dyn_cast(Node)) { + Op += " " + itostr(FIDN->getIndex()); + } else if (const JumpTableSDNode *JTDN = dyn_cast(Node)) { + Op += " " + itostr(JTDN->getIndex()); + } else if (const ConstantPoolSDNode *CP = dyn_cast(Node)){ + if (CP->isMachineConstantPoolEntry()) { + Op += '<'; + { + raw_string_ostream OSS(Op); + OSS << *CP->getMachineCPVal(); + } + Op += '>'; + } else { + if (ConstantFP *CFP = dyn_cast(CP->getConstVal())) + Op += "<" + ftostr(CFP->getValueAPF()) + ">"; + else if (ConstantInt *CI = dyn_cast(CP->getConstVal())) + Op += "<" + utostr(CI->getZExtValue()) + ">"; + else { + Op += '<'; + { + raw_string_ostream OSS(Op); + WriteAsOperand(OSS, CP->getConstVal(), false); + } + Op += '>'; + } + } + Op += " A=" + itostr(CP->getAlignment()); + } else if (const BasicBlockSDNode *BBDN = dyn_cast(Node)) { + Op = "BB: "; + const Value *LBB = (const Value*)BBDN->getBasicBlock()->getBasicBlock(); + if (LBB) + Op += LBB->getName(); + //Op += " " + (const void*)BBDN->getBasicBlock(); + } else if (const RegisterSDNode *R = dyn_cast(Node)) { + if (G && R->getReg() != 0 && + TargetRegisterInfo::isPhysicalRegister(R->getReg())) { + Op = Op + " " + + G->getTarget().getRegisterInfo()->getName(R->getReg()); + } else { + Op += " #" + utostr(R->getReg()); + } + } else if (const DbgStopPointSDNode *D = dyn_cast(Node)) { + DICompileUnit CU(cast(D->getCompileUnit())); + std::string FN; + Op += ": " + CU.getFilename(FN); + Op += ":" + utostr(D->getLine()); + if (D->getColumn() != 0) + Op += ":" + utostr(D->getColumn()); + } else if (const LabelSDNode *L = dyn_cast(Node)) { + Op += ": LabelID=" + utostr(L->getLabelID()); + } else if (const CallSDNode *C = dyn_cast(Node)) { + Op += ": CallingConv=" + utostr(C->getCallingConv()); + if (C->isVarArg()) + Op += ", isVarArg"; + if (C->isTailCall()) + Op += ", isTailCall"; + } else if (const ExternalSymbolSDNode *ES = + dyn_cast(Node)) { + Op += "'" + std::string(ES->getSymbol()) + "'"; + } else if (const SrcValueSDNode *M = dyn_cast(Node)) { + if (M->getValue()) + Op += "<" + M->getValue()->getName() + ">"; + else + Op += ""; + } else if (const MemOperandSDNode *M = dyn_cast(Node)) { + const Value *V = M->MO.getValue(); + Op += '<'; + if (!V) { + Op += "(unknown)"; + } else if (const PseudoSourceValue *PSV = dyn_cast(V)) { + // PseudoSourceValues don't have names, so use their print method. + raw_string_ostream OSS(Op); + PSV->print(OSS); + } else { + Op += V->getName(); + } + Op += '+' + itostr(M->MO.getOffset()) + '>'; + } else if (const ARG_FLAGSSDNode *N = dyn_cast(Node)) { + Op = Op + " AF=" + N->getArgFlags().getArgFlagsString(); + } else if (const VTSDNode *N = dyn_cast(Node)) { + Op = Op + " VT=" + N->getVT().getMVTString(); + } else if (const LoadSDNode *LD = dyn_cast(Node)) { + bool doExt = true; + switch (LD->getExtensionType()) { + default: doExt = false; break; + case ISD::EXTLOAD: + Op = Op + "getMemoryVT().getMVTString() + ">"; + if (LD->isVolatile()) + Op += ""; + Op += LD->getIndexedModeName(LD->getAddressingMode()); + if (LD->getAlignment() > 1) + Op += " A=" + utostr(LD->getAlignment()); + } else if (const StoreSDNode *ST = dyn_cast(Node)) { + if (ST->isTruncatingStore()) + Op += "getMemoryVT().getMVTString() + ">"; + if (ST->isVolatile()) + Op += ""; + Op += ST->getIndexedModeName(ST->getAddressingMode()); + if (ST->getAlignment() > 1) + Op += " A=" + utostr(ST->getAlignment()); + } + +#if 0 + Op += " Id=" + itostr(Node->getNodeId()); +#endif + + return Op; +} + + +/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG +/// rendered using 'dot'. +/// +void SelectionDAG::viewGraph(const std::string &Title) { +// This code is only for debugging! +#ifndef NDEBUG + ViewGraph(this, "dag." + getMachineFunction().getFunction()->getName(), + Title); +#else + cerr << "SelectionDAG::viewGraph is only available in debug builds on " + << "systems with Graphviz or gv!\n"; +#endif // NDEBUG +} + +// This overload is defined out-of-line here instead of just using a +// default parameter because this is easiest for gdb to call. +void SelectionDAG::viewGraph() { + viewGraph(""); +} + +/// clearGraphAttrs - Clear all previously defined node graph attributes. +/// Intended to be used from a debugging tool (eg. gdb). +void SelectionDAG::clearGraphAttrs() { +#ifndef NDEBUG + NodeGraphAttrs.clear(); +#else + cerr << "SelectionDAG::clearGraphAttrs is only available in debug builds" + << " on systems with Graphviz or gv!\n"; +#endif +} + + +/// setGraphAttrs - Set graph attributes for a node. (eg. "color=red".) +/// +void SelectionDAG::setGraphAttrs(const SDNode *N, const char *Attrs) { +#ifndef NDEBUG + NodeGraphAttrs[N] = Attrs; +#else + cerr << "SelectionDAG::setGraphAttrs is only available in debug builds" + << " on systems with Graphviz or gv!\n"; +#endif +} + + +/// getGraphAttrs - Get graph attributes for a node. (eg. "color=red".) +/// Used from getNodeAttributes. +const std::string SelectionDAG::getGraphAttrs(const SDNode *N) const { +#ifndef NDEBUG + std::map::const_iterator I = + NodeGraphAttrs.find(N); + + if (I != NodeGraphAttrs.end()) + return I->second; + else + return ""; +#else + cerr << "SelectionDAG::getGraphAttrs is only available in debug builds" + << " on systems with Graphviz or gv!\n"; + return std::string(""); +#endif +} + +/// setGraphColor - Convenience for setting node color attribute. +/// +void SelectionDAG::setGraphColor(const SDNode *N, const char *Color) { +#ifndef NDEBUG + NodeGraphAttrs[N] = std::string("color=") + Color; +#else + cerr << "SelectionDAG::setGraphColor is only available in debug builds" + << " on systems with Graphviz or gv!\n"; +#endif +} + +/// setSubgraphColorHelper - Implement setSubgraphColor. Return +/// whether we truncated the search. +/// +bool SelectionDAG::setSubgraphColorHelper(SDNode *N, const char *Color, DenseSet &visited, + int level, bool &printed) { + bool hit_limit = false; + +#ifndef NDEBUG + if (level >= 20) { + if (!printed) { + printed = true; + DOUT << "setSubgraphColor hit max level\n"; + } + return true; + } + + unsigned oldSize = visited.size(); + visited.insert(N); + if (visited.size() != oldSize) { + setGraphColor(N, Color); + for(SDNodeIterator i = SDNodeIterator::begin(N), iend = SDNodeIterator::end(N); + i != iend; + ++i) { + hit_limit = setSubgraphColorHelper(*i, Color, visited, level+1, printed) || hit_limit; + } + } +#else + cerr << "SelectionDAG::setSubgraphColor is only available in debug builds" + << " on systems with Graphviz or gv!\n"; +#endif + return hit_limit; +} + +/// setSubgraphColor - Convenience for setting subgraph color attribute. +/// +void SelectionDAG::setSubgraphColor(SDNode *N, const char *Color) { +#ifndef NDEBUG + DenseSet visited; + bool printed = false; + if (setSubgraphColorHelper(N, Color, visited, 0, printed)) { + // Visually mark that we hit the limit + if (strcmp(Color, "red") == 0) { + setSubgraphColorHelper(N, "blue", visited, 0, printed); + } + else if (strcmp(Color, "yellow") == 0) { + setSubgraphColorHelper(N, "green", visited, 0, printed); + } + } + +#else + cerr << "SelectionDAG::setSubgraphColor is only available in debug builds" + << " on systems with Graphviz or gv!\n"; +#endif +} + +std::string ScheduleDAGSDNodes::getGraphNodeLabel(const SUnit *SU) const { + std::string s; + raw_string_ostream O(s); + O << "SU(" << SU->NodeNum << "): "; + if (SU->getNode()) { + SmallVector FlaggedNodes; + for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode()) + FlaggedNodes.push_back(N); + while (!FlaggedNodes.empty()) { + O << DOTGraphTraits::getNodeLabel(FlaggedNodes.back(), DAG); + FlaggedNodes.pop_back(); + if (!FlaggedNodes.empty()) + O << "\n "; + } + } else { + O << "CROSS RC COPY"; + } + return O.str(); +} + +void ScheduleDAGSDNodes::getCustomGraphFeatures(GraphWriter &GW) const { + if (DAG) { + // Draw a special "GraphRoot" node to indicate the root of the graph. + GW.emitSimpleNode(0, "plaintext=circle", "GraphRoot"); + const SDNode *N = DAG->getRoot().getNode(); + if (N && N->getNodeId() != -1) + GW.emitEdge(0, -1, &SUnits[N->getNodeId()], -1, + "color=blue,style=dashed"); + } +} diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp new file mode 100644 index 000000000000..3334e53f0fbc --- /dev/null +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -0,0 +1,2592 @@ +//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the TargetLowering class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetSubtarget.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/GlobalVariable.h" +#include "llvm/DerivedTypes.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/MathExtras.h" +using namespace llvm; + +namespace llvm { +TLSModel::Model getTLSModel(const GlobalValue *GV, Reloc::Model reloc) { + bool isLocal = GV->hasLocalLinkage(); + bool isDeclaration = GV->isDeclaration(); + // FIXME: what should we do for protected and internal visibility? + // For variables, is internal different from hidden? + bool isHidden = GV->hasHiddenVisibility(); + + if (reloc == Reloc::PIC_) { + if (isLocal || isHidden) + return TLSModel::LocalDynamic; + else + return TLSModel::GeneralDynamic; + } else { + if (!isDeclaration || isHidden) + return TLSModel::LocalExec; + else + return TLSModel::InitialExec; + } +} +} + +/// InitLibcallNames - Set default libcall names. +/// +static void InitLibcallNames(const char **Names) { + Names[RTLIB::SHL_I16] = "__ashlhi3"; + Names[RTLIB::SHL_I32] = "__ashlsi3"; + Names[RTLIB::SHL_I64] = "__ashldi3"; + Names[RTLIB::SHL_I128] = "__ashlti3"; + Names[RTLIB::SRL_I16] = "__lshrhi3"; + Names[RTLIB::SRL_I32] = "__lshrsi3"; + Names[RTLIB::SRL_I64] = "__lshrdi3"; + Names[RTLIB::SRL_I128] = "__lshrti3"; + Names[RTLIB::SRA_I16] = "__ashrhi3"; + Names[RTLIB::SRA_I32] = "__ashrsi3"; + Names[RTLIB::SRA_I64] = "__ashrdi3"; + Names[RTLIB::SRA_I128] = "__ashrti3"; + Names[RTLIB::MUL_I16] = "__mulhi3"; + Names[RTLIB::MUL_I32] = "__mulsi3"; + Names[RTLIB::MUL_I64] = "__muldi3"; + Names[RTLIB::MUL_I128] = "__multi3"; + Names[RTLIB::SDIV_I16] = "__divhi3"; + Names[RTLIB::SDIV_I32] = "__divsi3"; + Names[RTLIB::SDIV_I64] = "__divdi3"; + Names[RTLIB::SDIV_I128] = "__divti3"; + Names[RTLIB::UDIV_I16] = "__udivhi3"; + Names[RTLIB::UDIV_I32] = "__udivsi3"; + Names[RTLIB::UDIV_I64] = "__udivdi3"; + Names[RTLIB::UDIV_I128] = "__udivti3"; + Names[RTLIB::SREM_I16] = "__modhi3"; + Names[RTLIB::SREM_I32] = "__modsi3"; + Names[RTLIB::SREM_I64] = "__moddi3"; + Names[RTLIB::SREM_I128] = "__modti3"; + Names[RTLIB::UREM_I16] = "__umodhi3"; + Names[RTLIB::UREM_I32] = "__umodsi3"; + Names[RTLIB::UREM_I64] = "__umoddi3"; + Names[RTLIB::UREM_I128] = "__umodti3"; + Names[RTLIB::NEG_I32] = "__negsi2"; + Names[RTLIB::NEG_I64] = "__negdi2"; + Names[RTLIB::ADD_F32] = "__addsf3"; + Names[RTLIB::ADD_F64] = "__adddf3"; + Names[RTLIB::ADD_F80] = "__addxf3"; + Names[RTLIB::ADD_PPCF128] = "__gcc_qadd"; + Names[RTLIB::SUB_F32] = "__subsf3"; + Names[RTLIB::SUB_F64] = "__subdf3"; + Names[RTLIB::SUB_F80] = "__subxf3"; + Names[RTLIB::SUB_PPCF128] = "__gcc_qsub"; + Names[RTLIB::MUL_F32] = "__mulsf3"; + Names[RTLIB::MUL_F64] = "__muldf3"; + Names[RTLIB::MUL_F80] = "__mulxf3"; + Names[RTLIB::MUL_PPCF128] = "__gcc_qmul"; + Names[RTLIB::DIV_F32] = "__divsf3"; + Names[RTLIB::DIV_F64] = "__divdf3"; + Names[RTLIB::DIV_F80] = "__divxf3"; + Names[RTLIB::DIV_PPCF128] = "__gcc_qdiv"; + Names[RTLIB::REM_F32] = "fmodf"; + Names[RTLIB::REM_F64] = "fmod"; + Names[RTLIB::REM_F80] = "fmodl"; + Names[RTLIB::REM_PPCF128] = "fmodl"; + Names[RTLIB::POWI_F32] = "__powisf2"; + Names[RTLIB::POWI_F64] = "__powidf2"; + Names[RTLIB::POWI_F80] = "__powixf2"; + Names[RTLIB::POWI_PPCF128] = "__powitf2"; + Names[RTLIB::SQRT_F32] = "sqrtf"; + Names[RTLIB::SQRT_F64] = "sqrt"; + Names[RTLIB::SQRT_F80] = "sqrtl"; + Names[RTLIB::SQRT_PPCF128] = "sqrtl"; + Names[RTLIB::LOG_F32] = "logf"; + Names[RTLIB::LOG_F64] = "log"; + Names[RTLIB::LOG_F80] = "logl"; + Names[RTLIB::LOG_PPCF128] = "logl"; + Names[RTLIB::LOG2_F32] = "log2f"; + Names[RTLIB::LOG2_F64] = "log2"; + Names[RTLIB::LOG2_F80] = "log2l"; + Names[RTLIB::LOG2_PPCF128] = "log2l"; + Names[RTLIB::LOG10_F32] = "log10f"; + Names[RTLIB::LOG10_F64] = "log10"; + Names[RTLIB::LOG10_F80] = "log10l"; + Names[RTLIB::LOG10_PPCF128] = "log10l"; + Names[RTLIB::EXP_F32] = "expf"; + Names[RTLIB::EXP_F64] = "exp"; + Names[RTLIB::EXP_F80] = "expl"; + Names[RTLIB::EXP_PPCF128] = "expl"; + Names[RTLIB::EXP2_F32] = "exp2f"; + Names[RTLIB::EXP2_F64] = "exp2"; + Names[RTLIB::EXP2_F80] = "exp2l"; + Names[RTLIB::EXP2_PPCF128] = "exp2l"; + Names[RTLIB::SIN_F32] = "sinf"; + Names[RTLIB::SIN_F64] = "sin"; + Names[RTLIB::SIN_F80] = "sinl"; + Names[RTLIB::SIN_PPCF128] = "sinl"; + Names[RTLIB::COS_F32] = "cosf"; + Names[RTLIB::COS_F64] = "cos"; + Names[RTLIB::COS_F80] = "cosl"; + Names[RTLIB::COS_PPCF128] = "cosl"; + Names[RTLIB::POW_F32] = "powf"; + Names[RTLIB::POW_F64] = "pow"; + Names[RTLIB::POW_F80] = "powl"; + Names[RTLIB::POW_PPCF128] = "powl"; + Names[RTLIB::CEIL_F32] = "ceilf"; + Names[RTLIB::CEIL_F64] = "ceil"; + Names[RTLIB::CEIL_F80] = "ceill"; + Names[RTLIB::CEIL_PPCF128] = "ceill"; + Names[RTLIB::TRUNC_F32] = "truncf"; + Names[RTLIB::TRUNC_F64] = "trunc"; + Names[RTLIB::TRUNC_F80] = "truncl"; + Names[RTLIB::TRUNC_PPCF128] = "truncl"; + Names[RTLIB::RINT_F32] = "rintf"; + Names[RTLIB::RINT_F64] = "rint"; + Names[RTLIB::RINT_F80] = "rintl"; + Names[RTLIB::RINT_PPCF128] = "rintl"; + Names[RTLIB::NEARBYINT_F32] = "nearbyintf"; + Names[RTLIB::NEARBYINT_F64] = "nearbyint"; + Names[RTLIB::NEARBYINT_F80] = "nearbyintl"; + Names[RTLIB::NEARBYINT_PPCF128] = "nearbyintl"; + Names[RTLIB::FLOOR_F32] = "floorf"; + Names[RTLIB::FLOOR_F64] = "floor"; + Names[RTLIB::FLOOR_F80] = "floorl"; + Names[RTLIB::FLOOR_PPCF128] = "floorl"; + Names[RTLIB::FPEXT_F32_F64] = "__extendsfdf2"; + Names[RTLIB::FPROUND_F64_F32] = "__truncdfsf2"; + Names[RTLIB::FPROUND_F80_F32] = "__truncxfsf2"; + Names[RTLIB::FPROUND_PPCF128_F32] = "__trunctfsf2"; + Names[RTLIB::FPROUND_F80_F64] = "__truncxfdf2"; + Names[RTLIB::FPROUND_PPCF128_F64] = "__trunctfdf2"; + Names[RTLIB::FPTOSINT_F32_I32] = "__fixsfsi"; + Names[RTLIB::FPTOSINT_F32_I64] = "__fixsfdi"; + Names[RTLIB::FPTOSINT_F32_I128] = "__fixsfti"; + Names[RTLIB::FPTOSINT_F64_I32] = "__fixdfsi"; + Names[RTLIB::FPTOSINT_F64_I64] = "__fixdfdi"; + Names[RTLIB::FPTOSINT_F64_I128] = "__fixdfti"; + Names[RTLIB::FPTOSINT_F80_I32] = "__fixxfsi"; + Names[RTLIB::FPTOSINT_F80_I64] = "__fixxfdi"; + Names[RTLIB::FPTOSINT_F80_I128] = "__fixxfti"; + Names[RTLIB::FPTOSINT_PPCF128_I32] = "__fixtfsi"; + Names[RTLIB::FPTOSINT_PPCF128_I64] = "__fixtfdi"; + Names[RTLIB::FPTOSINT_PPCF128_I128] = "__fixtfti"; + Names[RTLIB::FPTOUINT_F32_I32] = "__fixunssfsi"; + Names[RTLIB::FPTOUINT_F32_I64] = "__fixunssfdi"; + Names[RTLIB::FPTOUINT_F32_I128] = "__fixunssfti"; + Names[RTLIB::FPTOUINT_F64_I32] = "__fixunsdfsi"; + Names[RTLIB::FPTOUINT_F64_I64] = "__fixunsdfdi"; + Names[RTLIB::FPTOUINT_F64_I128] = "__fixunsdfti"; + Names[RTLIB::FPTOUINT_F80_I32] = "__fixunsxfsi"; + Names[RTLIB::FPTOUINT_F80_I64] = "__fixunsxfdi"; + Names[RTLIB::FPTOUINT_F80_I128] = "__fixunsxfti"; + Names[RTLIB::FPTOUINT_PPCF128_I32] = "__fixunstfsi"; + Names[RTLIB::FPTOUINT_PPCF128_I64] = "__fixunstfdi"; + Names[RTLIB::FPTOUINT_PPCF128_I128] = "__fixunstfti"; + Names[RTLIB::SINTTOFP_I32_F32] = "__floatsisf"; + Names[RTLIB::SINTTOFP_I32_F64] = "__floatsidf"; + Names[RTLIB::SINTTOFP_I32_F80] = "__floatsixf"; + Names[RTLIB::SINTTOFP_I32_PPCF128] = "__floatsitf"; + Names[RTLIB::SINTTOFP_I64_F32] = "__floatdisf"; + Names[RTLIB::SINTTOFP_I64_F64] = "__floatdidf"; + Names[RTLIB::SINTTOFP_I64_F80] = "__floatdixf"; + Names[RTLIB::SINTTOFP_I64_PPCF128] = "__floatditf"; + Names[RTLIB::SINTTOFP_I128_F32] = "__floattisf"; + Names[RTLIB::SINTTOFP_I128_F64] = "__floattidf"; + Names[RTLIB::SINTTOFP_I128_F80] = "__floattixf"; + Names[RTLIB::SINTTOFP_I128_PPCF128] = "__floattitf"; + Names[RTLIB::UINTTOFP_I32_F32] = "__floatunsisf"; + Names[RTLIB::UINTTOFP_I32_F64] = "__floatunsidf"; + Names[RTLIB::UINTTOFP_I32_F80] = "__floatunsixf"; + Names[RTLIB::UINTTOFP_I32_PPCF128] = "__floatunsitf"; + Names[RTLIB::UINTTOFP_I64_F32] = "__floatundisf"; + Names[RTLIB::UINTTOFP_I64_F64] = "__floatundidf"; + Names[RTLIB::UINTTOFP_I64_F80] = "__floatundixf"; + Names[RTLIB::UINTTOFP_I64_PPCF128] = "__floatunditf"; + Names[RTLIB::UINTTOFP_I128_F32] = "__floatuntisf"; + Names[RTLIB::UINTTOFP_I128_F64] = "__floatuntidf"; + Names[RTLIB::UINTTOFP_I128_F80] = "__floatuntixf"; + Names[RTLIB::UINTTOFP_I128_PPCF128] = "__floatuntitf"; + Names[RTLIB::OEQ_F32] = "__eqsf2"; + Names[RTLIB::OEQ_F64] = "__eqdf2"; + Names[RTLIB::UNE_F32] = "__nesf2"; + Names[RTLIB::UNE_F64] = "__nedf2"; + Names[RTLIB::OGE_F32] = "__gesf2"; + Names[RTLIB::OGE_F64] = "__gedf2"; + Names[RTLIB::OLT_F32] = "__ltsf2"; + Names[RTLIB::OLT_F64] = "__ltdf2"; + Names[RTLIB::OLE_F32] = "__lesf2"; + Names[RTLIB::OLE_F64] = "__ledf2"; + Names[RTLIB::OGT_F32] = "__gtsf2"; + Names[RTLIB::OGT_F64] = "__gtdf2"; + Names[RTLIB::UO_F32] = "__unordsf2"; + Names[RTLIB::UO_F64] = "__unorddf2"; + Names[RTLIB::O_F32] = "__unordsf2"; + Names[RTLIB::O_F64] = "__unorddf2"; + Names[RTLIB::UNWIND_RESUME] = "_Unwind_Resume"; +} + +/// getFPEXT - Return the FPEXT_*_* value for the given types, or +/// UNKNOWN_LIBCALL if there is none. +RTLIB::Libcall RTLIB::getFPEXT(MVT OpVT, MVT RetVT) { + if (OpVT == MVT::f32) { + if (RetVT == MVT::f64) + return FPEXT_F32_F64; + } + return UNKNOWN_LIBCALL; +} + +/// getFPROUND - Return the FPROUND_*_* value for the given types, or +/// UNKNOWN_LIBCALL if there is none. +RTLIB::Libcall RTLIB::getFPROUND(MVT OpVT, MVT RetVT) { + if (RetVT == MVT::f32) { + if (OpVT == MVT::f64) + return FPROUND_F64_F32; + if (OpVT == MVT::f80) + return FPROUND_F80_F32; + if (OpVT == MVT::ppcf128) + return FPROUND_PPCF128_F32; + } else if (RetVT == MVT::f64) { + if (OpVT == MVT::f80) + return FPROUND_F80_F64; + if (OpVT == MVT::ppcf128) + return FPROUND_PPCF128_F64; + } + return UNKNOWN_LIBCALL; +} + +/// getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or +/// UNKNOWN_LIBCALL if there is none. +RTLIB::Libcall RTLIB::getFPTOSINT(MVT OpVT, MVT RetVT) { + if (OpVT == MVT::f32) { + if (RetVT == MVT::i32) + return FPTOSINT_F32_I32; + if (RetVT == MVT::i64) + return FPTOSINT_F32_I64; + if (RetVT == MVT::i128) + return FPTOSINT_F32_I128; + } else if (OpVT == MVT::f64) { + if (RetVT == MVT::i32) + return FPTOSINT_F64_I32; + if (RetVT == MVT::i64) + return FPTOSINT_F64_I64; + if (RetVT == MVT::i128) + return FPTOSINT_F64_I128; + } else if (OpVT == MVT::f80) { + if (RetVT == MVT::i32) + return FPTOSINT_F80_I32; + if (RetVT == MVT::i64) + return FPTOSINT_F80_I64; + if (RetVT == MVT::i128) + return FPTOSINT_F80_I128; + } else if (OpVT == MVT::ppcf128) { + if (RetVT == MVT::i32) + return FPTOSINT_PPCF128_I32; + if (RetVT == MVT::i64) + return FPTOSINT_PPCF128_I64; + if (RetVT == MVT::i128) + return FPTOSINT_PPCF128_I128; + } + return UNKNOWN_LIBCALL; +} + +/// getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or +/// UNKNOWN_LIBCALL if there is none. +RTLIB::Libcall RTLIB::getFPTOUINT(MVT OpVT, MVT RetVT) { + if (OpVT == MVT::f32) { + if (RetVT == MVT::i32) + return FPTOUINT_F32_I32; + if (RetVT == MVT::i64) + return FPTOUINT_F32_I64; + if (RetVT == MVT::i128) + return FPTOUINT_F32_I128; + } else if (OpVT == MVT::f64) { + if (RetVT == MVT::i32) + return FPTOUINT_F64_I32; + if (RetVT == MVT::i64) + return FPTOUINT_F64_I64; + if (RetVT == MVT::i128) + return FPTOUINT_F64_I128; + } else if (OpVT == MVT::f80) { + if (RetVT == MVT::i32) + return FPTOUINT_F80_I32; + if (RetVT == MVT::i64) + return FPTOUINT_F80_I64; + if (RetVT == MVT::i128) + return FPTOUINT_F80_I128; + } else if (OpVT == MVT::ppcf128) { + if (RetVT == MVT::i32) + return FPTOUINT_PPCF128_I32; + if (RetVT == MVT::i64) + return FPTOUINT_PPCF128_I64; + if (RetVT == MVT::i128) + return FPTOUINT_PPCF128_I128; + } + return UNKNOWN_LIBCALL; +} + +/// getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or +/// UNKNOWN_LIBCALL if there is none. +RTLIB::Libcall RTLIB::getSINTTOFP(MVT OpVT, MVT RetVT) { + if (OpVT == MVT::i32) { + if (RetVT == MVT::f32) + return SINTTOFP_I32_F32; + else if (RetVT == MVT::f64) + return SINTTOFP_I32_F64; + else if (RetVT == MVT::f80) + return SINTTOFP_I32_F80; + else if (RetVT == MVT::ppcf128) + return SINTTOFP_I32_PPCF128; + } else if (OpVT == MVT::i64) { + if (RetVT == MVT::f32) + return SINTTOFP_I64_F32; + else if (RetVT == MVT::f64) + return SINTTOFP_I64_F64; + else if (RetVT == MVT::f80) + return SINTTOFP_I64_F80; + else if (RetVT == MVT::ppcf128) + return SINTTOFP_I64_PPCF128; + } else if (OpVT == MVT::i128) { + if (RetVT == MVT::f32) + return SINTTOFP_I128_F32; + else if (RetVT == MVT::f64) + return SINTTOFP_I128_F64; + else if (RetVT == MVT::f80) + return SINTTOFP_I128_F80; + else if (RetVT == MVT::ppcf128) + return SINTTOFP_I128_PPCF128; + } + return UNKNOWN_LIBCALL; +} + +/// getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or +/// UNKNOWN_LIBCALL if there is none. +RTLIB::Libcall RTLIB::getUINTTOFP(MVT OpVT, MVT RetVT) { + if (OpVT == MVT::i32) { + if (RetVT == MVT::f32) + return UINTTOFP_I32_F32; + else if (RetVT == MVT::f64) + return UINTTOFP_I32_F64; + else if (RetVT == MVT::f80) + return UINTTOFP_I32_F80; + else if (RetVT == MVT::ppcf128) + return UINTTOFP_I32_PPCF128; + } else if (OpVT == MVT::i64) { + if (RetVT == MVT::f32) + return UINTTOFP_I64_F32; + else if (RetVT == MVT::f64) + return UINTTOFP_I64_F64; + else if (RetVT == MVT::f80) + return UINTTOFP_I64_F80; + else if (RetVT == MVT::ppcf128) + return UINTTOFP_I64_PPCF128; + } else if (OpVT == MVT::i128) { + if (RetVT == MVT::f32) + return UINTTOFP_I128_F32; + else if (RetVT == MVT::f64) + return UINTTOFP_I128_F64; + else if (RetVT == MVT::f80) + return UINTTOFP_I128_F80; + else if (RetVT == MVT::ppcf128) + return UINTTOFP_I128_PPCF128; + } + return UNKNOWN_LIBCALL; +} + +/// InitCmpLibcallCCs - Set default comparison libcall CC. +/// +static void InitCmpLibcallCCs(ISD::CondCode *CCs) { + memset(CCs, ISD::SETCC_INVALID, sizeof(ISD::CondCode)*RTLIB::UNKNOWN_LIBCALL); + CCs[RTLIB::OEQ_F32] = ISD::SETEQ; + CCs[RTLIB::OEQ_F64] = ISD::SETEQ; + CCs[RTLIB::UNE_F32] = ISD::SETNE; + CCs[RTLIB::UNE_F64] = ISD::SETNE; + CCs[RTLIB::OGE_F32] = ISD::SETGE; + CCs[RTLIB::OGE_F64] = ISD::SETGE; + CCs[RTLIB::OLT_F32] = ISD::SETLT; + CCs[RTLIB::OLT_F64] = ISD::SETLT; + CCs[RTLIB::OLE_F32] = ISD::SETLE; + CCs[RTLIB::OLE_F64] = ISD::SETLE; + CCs[RTLIB::OGT_F32] = ISD::SETGT; + CCs[RTLIB::OGT_F64] = ISD::SETGT; + CCs[RTLIB::UO_F32] = ISD::SETNE; + CCs[RTLIB::UO_F64] = ISD::SETNE; + CCs[RTLIB::O_F32] = ISD::SETEQ; + CCs[RTLIB::O_F64] = ISD::SETEQ; +} + +TargetLowering::TargetLowering(TargetMachine &tm) + : TM(tm), TD(TM.getTargetData()) { + // All operations default to being supported. + memset(OpActions, 0, sizeof(OpActions)); + memset(LoadExtActions, 0, sizeof(LoadExtActions)); + memset(TruncStoreActions, 0, sizeof(TruncStoreActions)); + memset(IndexedModeActions, 0, sizeof(IndexedModeActions)); + memset(ConvertActions, 0, sizeof(ConvertActions)); + memset(CondCodeActions, 0, sizeof(CondCodeActions)); + + // Set default actions for various operations. + for (unsigned VT = 0; VT != (unsigned)MVT::LAST_VALUETYPE; ++VT) { + // Default all indexed load / store to expand. + for (unsigned IM = (unsigned)ISD::PRE_INC; + IM != (unsigned)ISD::LAST_INDEXED_MODE; ++IM) { + setIndexedLoadAction(IM, (MVT::SimpleValueType)VT, Expand); + setIndexedStoreAction(IM, (MVT::SimpleValueType)VT, Expand); + } + + // These operations default to expand. + setOperationAction(ISD::FGETSIGN, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::CONCAT_VECTORS, (MVT::SimpleValueType)VT, Expand); + } + + // Most targets ignore the @llvm.prefetch intrinsic. + setOperationAction(ISD::PREFETCH, MVT::Other, Expand); + + // ConstantFP nodes default to expand. Targets can either change this to + // Legal, in which case all fp constants are legal, or use addLegalFPImmediate + // to optimize expansions for certain constants. + setOperationAction(ISD::ConstantFP, MVT::f32, Expand); + setOperationAction(ISD::ConstantFP, MVT::f64, Expand); + setOperationAction(ISD::ConstantFP, MVT::f80, Expand); + + // These library functions default to expand. + setOperationAction(ISD::FLOG , MVT::f64, Expand); + setOperationAction(ISD::FLOG2, MVT::f64, Expand); + setOperationAction(ISD::FLOG10,MVT::f64, Expand); + setOperationAction(ISD::FEXP , MVT::f64, Expand); + setOperationAction(ISD::FEXP2, MVT::f64, Expand); + setOperationAction(ISD::FLOG , MVT::f32, Expand); + setOperationAction(ISD::FLOG2, MVT::f32, Expand); + setOperationAction(ISD::FLOG10,MVT::f32, Expand); + setOperationAction(ISD::FEXP , MVT::f32, Expand); + setOperationAction(ISD::FEXP2, MVT::f32, Expand); + + // Default ISD::TRAP to expand (which turns it into abort). + setOperationAction(ISD::TRAP, MVT::Other, Expand); + + IsLittleEndian = TD->isLittleEndian(); + UsesGlobalOffsetTable = false; + ShiftAmountTy = PointerTy = getValueType(TD->getIntPtrType()); + ShiftAmtHandling = Undefined; + memset(RegClassForVT, 0,MVT::LAST_VALUETYPE*sizeof(TargetRegisterClass*)); + memset(TargetDAGCombineArray, 0, array_lengthof(TargetDAGCombineArray)); + maxStoresPerMemset = maxStoresPerMemcpy = maxStoresPerMemmove = 8; + allowUnalignedMemoryAccesses = false; + benefitFromCodePlacementOpt = false; + UseUnderscoreSetJmp = false; + UseUnderscoreLongJmp = false; + SelectIsExpensive = false; + IntDivIsCheap = false; + Pow2DivIsCheap = false; + StackPointerRegisterToSaveRestore = 0; + ExceptionPointerRegister = 0; + ExceptionSelectorRegister = 0; + BooleanContents = UndefinedBooleanContent; + SchedPreferenceInfo = SchedulingForLatency; + JumpBufSize = 0; + JumpBufAlignment = 0; + IfCvtBlockSizeLimit = 2; + IfCvtDupBlockSizeLimit = 0; + PrefLoopAlignment = 0; + + InitLibcallNames(LibcallRoutineNames); + InitCmpLibcallCCs(CmpLibcallCCs); + + // Tell Legalize whether the assembler supports DEBUG_LOC. + const TargetAsmInfo *TASM = TM.getTargetAsmInfo(); + if (!TASM || !TASM->hasDotLocAndDotFile()) + setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand); +} + +TargetLowering::~TargetLowering() {} + +/// computeRegisterProperties - Once all of the register classes are added, +/// this allows us to compute derived properties we expose. +void TargetLowering::computeRegisterProperties() { + assert(MVT::LAST_VALUETYPE <= 32 && + "Too many value types for ValueTypeActions to hold!"); + + // Everything defaults to needing one register. + for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) { + NumRegistersForVT[i] = 1; + RegisterTypeForVT[i] = TransformToType[i] = (MVT::SimpleValueType)i; + } + // ...except isVoid, which doesn't need any registers. + NumRegistersForVT[MVT::isVoid] = 0; + + // Find the largest integer register class. + unsigned LargestIntReg = MVT::LAST_INTEGER_VALUETYPE; + for (; RegClassForVT[LargestIntReg] == 0; --LargestIntReg) + assert(LargestIntReg != MVT::i1 && "No integer registers defined!"); + + // Every integer value type larger than this largest register takes twice as + // many registers to represent as the previous ValueType. + for (unsigned ExpandedReg = LargestIntReg + 1; ; ++ExpandedReg) { + MVT EVT = (MVT::SimpleValueType)ExpandedReg; + if (!EVT.isInteger()) + break; + NumRegistersForVT[ExpandedReg] = 2*NumRegistersForVT[ExpandedReg-1]; + RegisterTypeForVT[ExpandedReg] = (MVT::SimpleValueType)LargestIntReg; + TransformToType[ExpandedReg] = (MVT::SimpleValueType)(ExpandedReg - 1); + ValueTypeActions.setTypeAction(EVT, Expand); + } + + // Inspect all of the ValueType's smaller than the largest integer + // register to see which ones need promotion. + unsigned LegalIntReg = LargestIntReg; + for (unsigned IntReg = LargestIntReg - 1; + IntReg >= (unsigned)MVT::i1; --IntReg) { + MVT IVT = (MVT::SimpleValueType)IntReg; + if (isTypeLegal(IVT)) { + LegalIntReg = IntReg; + } else { + RegisterTypeForVT[IntReg] = TransformToType[IntReg] = + (MVT::SimpleValueType)LegalIntReg; + ValueTypeActions.setTypeAction(IVT, Promote); + } + } + + // ppcf128 type is really two f64's. + if (!isTypeLegal(MVT::ppcf128)) { + NumRegistersForVT[MVT::ppcf128] = 2*NumRegistersForVT[MVT::f64]; + RegisterTypeForVT[MVT::ppcf128] = MVT::f64; + TransformToType[MVT::ppcf128] = MVT::f64; + ValueTypeActions.setTypeAction(MVT::ppcf128, Expand); + } + + // Decide how to handle f64. If the target does not have native f64 support, + // expand it to i64 and we will be generating soft float library calls. + if (!isTypeLegal(MVT::f64)) { + NumRegistersForVT[MVT::f64] = NumRegistersForVT[MVT::i64]; + RegisterTypeForVT[MVT::f64] = RegisterTypeForVT[MVT::i64]; + TransformToType[MVT::f64] = MVT::i64; + ValueTypeActions.setTypeAction(MVT::f64, Expand); + } + + // Decide how to handle f32. If the target does not have native support for + // f32, promote it to f64 if it is legal. Otherwise, expand it to i32. + if (!isTypeLegal(MVT::f32)) { + if (isTypeLegal(MVT::f64)) { + NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::f64]; + RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::f64]; + TransformToType[MVT::f32] = MVT::f64; + ValueTypeActions.setTypeAction(MVT::f32, Promote); + } else { + NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::i32]; + RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::i32]; + TransformToType[MVT::f32] = MVT::i32; + ValueTypeActions.setTypeAction(MVT::f32, Expand); + } + } + + // Loop over all of the vector value types to see which need transformations. + for (unsigned i = MVT::FIRST_VECTOR_VALUETYPE; + i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) { + MVT VT = (MVT::SimpleValueType)i; + if (!isTypeLegal(VT)) { + MVT IntermediateVT, RegisterVT; + unsigned NumIntermediates; + NumRegistersForVT[i] = + getVectorTypeBreakdown(VT, + IntermediateVT, NumIntermediates, + RegisterVT); + RegisterTypeForVT[i] = RegisterVT; + + // Determine if there is a legal wider type. + bool IsLegalWiderType = false; + MVT EltVT = VT.getVectorElementType(); + unsigned NElts = VT.getVectorNumElements(); + for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { + MVT SVT = (MVT::SimpleValueType)nVT; + if (isTypeLegal(SVT) && SVT.getVectorElementType() == EltVT && + SVT.getVectorNumElements() > NElts) { + TransformToType[i] = SVT; + ValueTypeActions.setTypeAction(VT, Promote); + IsLegalWiderType = true; + break; + } + } + if (!IsLegalWiderType) { + MVT NVT = VT.getPow2VectorType(); + if (NVT == VT) { + // Type is already a power of 2. The default action is to split. + TransformToType[i] = MVT::Other; + ValueTypeActions.setTypeAction(VT, Expand); + } else { + TransformToType[i] = NVT; + ValueTypeActions.setTypeAction(VT, Promote); + } + } + } + } +} + +const char *TargetLowering::getTargetNodeName(unsigned Opcode) const { + return NULL; +} + + +MVT TargetLowering::getSetCCResultType(MVT VT) const { + return getValueType(TD->getIntPtrType()); +} + + +/// getVectorTypeBreakdown - Vector types are broken down into some number of +/// legal first class types. For example, MVT::v8f32 maps to 2 MVT::v4f32 +/// with Altivec or SSE1, or 8 promoted MVT::f64 values with the X86 FP stack. +/// Similarly, MVT::v2i64 turns into 4 MVT::i32 values with both PPC and X86. +/// +/// This method returns the number of registers needed, and the VT for each +/// register. It also returns the VT and quantity of the intermediate values +/// before they are promoted/expanded. +/// +unsigned TargetLowering::getVectorTypeBreakdown(MVT VT, + MVT &IntermediateVT, + unsigned &NumIntermediates, + MVT &RegisterVT) const { + // Figure out the right, legal destination reg to copy into. + unsigned NumElts = VT.getVectorNumElements(); + MVT EltTy = VT.getVectorElementType(); + + unsigned NumVectorRegs = 1; + + // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally we + // could break down into LHS/RHS like LegalizeDAG does. + if (!isPowerOf2_32(NumElts)) { + NumVectorRegs = NumElts; + NumElts = 1; + } + + // Divide the input until we get to a supported size. This will always + // end with a scalar if the target doesn't support vectors. + while (NumElts > 1 && !isTypeLegal(MVT::getVectorVT(EltTy, NumElts))) { + NumElts >>= 1; + NumVectorRegs <<= 1; + } + + NumIntermediates = NumVectorRegs; + + MVT NewVT = MVT::getVectorVT(EltTy, NumElts); + if (!isTypeLegal(NewVT)) + NewVT = EltTy; + IntermediateVT = NewVT; + + MVT DestVT = getRegisterType(NewVT); + RegisterVT = DestVT; + if (DestVT.bitsLT(NewVT)) { + // Value is expanded, e.g. i64 -> i16. + return NumVectorRegs*(NewVT.getSizeInBits()/DestVT.getSizeInBits()); + } else { + // Otherwise, promotion or legal types use the same number of registers as + // the vector decimated to the appropriate level. + return NumVectorRegs; + } + + return 1; +} + +/// getWidenVectorType: given a vector type, returns the type to widen to +/// (e.g., v7i8 to v8i8). If the vector type is legal, it returns itself. +/// If there is no vector type that we want to widen to, returns MVT::Other +/// When and where to widen is target dependent based on the cost of +/// scalarizing vs using the wider vector type. +MVT TargetLowering::getWidenVectorType(MVT VT) const { + assert(VT.isVector()); + if (isTypeLegal(VT)) + return VT; + + // Default is not to widen until moved to LegalizeTypes + return MVT::Other; +} + +/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate +/// function arguments in the caller parameter area. This is the actual +/// alignment, not its logarithm. +unsigned TargetLowering::getByValTypeAlignment(const Type *Ty) const { + return TD->getCallFrameTypeAlignment(Ty); +} + +SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table, + SelectionDAG &DAG) const { + if (usesGlobalOffsetTable()) + return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy()); + return Table; +} + +bool +TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { + // Assume that everything is safe in static mode. + if (getTargetMachine().getRelocationModel() == Reloc::Static) + return true; + + // In dynamic-no-pic mode, assume that known defined values are safe. + if (getTargetMachine().getRelocationModel() == Reloc::DynamicNoPIC && + GA && + !GA->getGlobal()->isDeclaration() && + !GA->getGlobal()->isWeakForLinker()) + return true; + + // Otherwise assume nothing is safe. + return false; +} + +//===----------------------------------------------------------------------===// +// Optimization Methods +//===----------------------------------------------------------------------===// + +/// ShrinkDemandedConstant - Check to see if the specified operand of the +/// specified instruction is a constant integer. If so, check to see if there +/// are any bits set in the constant that are not demanded. If so, shrink the +/// constant and return true. +bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op, + const APInt &Demanded) { + DebugLoc dl = Op.getDebugLoc(); + + // FIXME: ISD::SELECT, ISD::SELECT_CC + switch (Op.getOpcode()) { + default: break; + case ISD::XOR: + case ISD::AND: + case ISD::OR: { + ConstantSDNode *C = dyn_cast(Op.getOperand(1)); + if (!C) return false; + + if (Op.getOpcode() == ISD::XOR && + (C->getAPIntValue() | (~Demanded)).isAllOnesValue()) + return false; + + // if we can expand it to have all bits set, do it + if (C->getAPIntValue().intersects(~Demanded)) { + MVT VT = Op.getValueType(); + SDValue New = DAG.getNode(Op.getOpcode(), dl, VT, Op.getOperand(0), + DAG.getConstant(Demanded & + C->getAPIntValue(), + VT)); + return CombineTo(Op, New); + } + + break; + } + } + + return false; +} + +/// ShrinkDemandedOp - Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the +/// casts are free. This uses isZExtFree and ZERO_EXTEND for the widening +/// cast, but it could be generalized for targets with other types of +/// implicit widening casts. +bool +TargetLowering::TargetLoweringOpt::ShrinkDemandedOp(SDValue Op, + unsigned BitWidth, + const APInt &Demanded, + DebugLoc dl) { + assert(Op.getNumOperands() == 2 && + "ShrinkDemandedOp only supports binary operators!"); + assert(Op.getNode()->getNumValues() == 1 && + "ShrinkDemandedOp only supports nodes with one result!"); + + // Don't do this if the node has another user, which may require the + // full value. + if (!Op.getNode()->hasOneUse()) + return false; + + // Search for the smallest integer type with free casts to and from + // Op's type. For expedience, just check power-of-2 integer types. + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + unsigned SmallVTBits = BitWidth - Demanded.countLeadingZeros(); + if (!isPowerOf2_32(SmallVTBits)) + SmallVTBits = NextPowerOf2(SmallVTBits); + for (; SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) { + MVT SmallVT = MVT::getIntegerVT(SmallVTBits); + if (TLI.isTruncateFree(Op.getValueType(), SmallVT) && + TLI.isZExtFree(SmallVT, Op.getValueType())) { + // We found a type with free casts. + SDValue X = DAG.getNode(Op.getOpcode(), dl, SmallVT, + DAG.getNode(ISD::TRUNCATE, dl, SmallVT, + Op.getNode()->getOperand(0)), + DAG.getNode(ISD::TRUNCATE, dl, SmallVT, + Op.getNode()->getOperand(1))); + SDValue Z = DAG.getNode(ISD::ZERO_EXTEND, dl, Op.getValueType(), X); + return CombineTo(Op, Z); + } + } + return false; +} + +/// SimplifyDemandedBits - Look at Op. At this point, we know that only the +/// DemandedMask bits of the result of Op are ever used downstream. If we can +/// use this information to simplify Op, create a new simplified DAG node and +/// return true, returning the original and new nodes in Old and New. Otherwise, +/// analyze the expression and return a mask of KnownOne and KnownZero bits for +/// the expression (used to simplify the caller). The KnownZero/One bits may +/// only be accurate for those bits in the DemandedMask. +bool TargetLowering::SimplifyDemandedBits(SDValue Op, + const APInt &DemandedMask, + APInt &KnownZero, + APInt &KnownOne, + TargetLoweringOpt &TLO, + unsigned Depth) const { + unsigned BitWidth = DemandedMask.getBitWidth(); + assert(Op.getValueSizeInBits() == BitWidth && + "Mask size mismatches value type size!"); + APInt NewMask = DemandedMask; + DebugLoc dl = Op.getDebugLoc(); + + // Don't know anything. + KnownZero = KnownOne = APInt(BitWidth, 0); + + // Other users may use these bits. + if (!Op.getNode()->hasOneUse()) { + if (Depth != 0) { + // If not at the root, Just compute the KnownZero/KnownOne bits to + // simplify things downstream. + TLO.DAG.ComputeMaskedBits(Op, DemandedMask, KnownZero, KnownOne, Depth); + return false; + } + // If this is the root being simplified, allow it to have multiple uses, + // just set the NewMask to all bits. + NewMask = APInt::getAllOnesValue(BitWidth); + } else if (DemandedMask == 0) { + // Not demanding any bits from Op. + if (Op.getOpcode() != ISD::UNDEF) + return TLO.CombineTo(Op, TLO.DAG.getUNDEF(Op.getValueType())); + return false; + } else if (Depth == 6) { // Limit search depth. + return false; + } + + APInt KnownZero2, KnownOne2, KnownZeroOut, KnownOneOut; + switch (Op.getOpcode()) { + case ISD::Constant: + // We know all of the bits for a constant! + KnownOne = cast(Op)->getAPIntValue() & NewMask; + KnownZero = ~KnownOne & NewMask; + return false; // Don't fall through, will infinitely loop. + case ISD::AND: + // If the RHS is a constant, check to see if the LHS would be zero without + // using the bits from the RHS. Below, we use knowledge about the RHS to + // simplify the LHS, here we're using information from the LHS to simplify + // the RHS. + if (ConstantSDNode *RHSC = dyn_cast(Op.getOperand(1))) { + APInt LHSZero, LHSOne; + TLO.DAG.ComputeMaskedBits(Op.getOperand(0), NewMask, + LHSZero, LHSOne, Depth+1); + // If the LHS already has zeros where RHSC does, this and is dead. + if ((LHSZero & NewMask) == (~RHSC->getAPIntValue() & NewMask)) + return TLO.CombineTo(Op, Op.getOperand(0)); + // If any of the set bits in the RHS are known zero on the LHS, shrink + // the constant. + if (TLO.ShrinkDemandedConstant(Op, ~LHSZero & NewMask)) + return true; + } + + if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero, + KnownOne, TLO, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + if (SimplifyDemandedBits(Op.getOperand(0), ~KnownZero & NewMask, + KnownZero2, KnownOne2, TLO, Depth+1)) + return true; + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // If all of the demanded bits are known one on one side, return the other. + // These bits cannot contribute to the result of the 'and'. + if ((NewMask & ~KnownZero2 & KnownOne) == (~KnownZero2 & NewMask)) + return TLO.CombineTo(Op, Op.getOperand(0)); + if ((NewMask & ~KnownZero & KnownOne2) == (~KnownZero & NewMask)) + return TLO.CombineTo(Op, Op.getOperand(1)); + // If all of the demanded bits in the inputs are known zeros, return zero. + if ((NewMask & (KnownZero|KnownZero2)) == NewMask) + return TLO.CombineTo(Op, TLO.DAG.getConstant(0, Op.getValueType())); + // If the RHS is a constant, see if we can simplify it. + if (TLO.ShrinkDemandedConstant(Op, ~KnownZero2 & NewMask)) + return true; + // If the operation can be done in a smaller type, do so. + if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) + return true; + + // Output known-1 bits are only known if set in both the LHS & RHS. + KnownOne &= KnownOne2; + // Output known-0 are known to be clear if zero in either the LHS | RHS. + KnownZero |= KnownZero2; + break; + case ISD::OR: + if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero, + KnownOne, TLO, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + if (SimplifyDemandedBits(Op.getOperand(0), ~KnownOne & NewMask, + KnownZero2, KnownOne2, TLO, Depth+1)) + return true; + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // If all of the demanded bits are known zero on one side, return the other. + // These bits cannot contribute to the result of the 'or'. + if ((NewMask & ~KnownOne2 & KnownZero) == (~KnownOne2 & NewMask)) + return TLO.CombineTo(Op, Op.getOperand(0)); + if ((NewMask & ~KnownOne & KnownZero2) == (~KnownOne & NewMask)) + return TLO.CombineTo(Op, Op.getOperand(1)); + // If all of the potentially set bits on one side are known to be set on + // the other side, just use the 'other' side. + if ((NewMask & ~KnownZero & KnownOne2) == (~KnownZero & NewMask)) + return TLO.CombineTo(Op, Op.getOperand(0)); + if ((NewMask & ~KnownZero2 & KnownOne) == (~KnownZero2 & NewMask)) + return TLO.CombineTo(Op, Op.getOperand(1)); + // If the RHS is a constant, see if we can simplify it. + if (TLO.ShrinkDemandedConstant(Op, NewMask)) + return true; + // If the operation can be done in a smaller type, do so. + if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) + return true; + + // Output known-0 bits are only known if clear in both the LHS & RHS. + KnownZero &= KnownZero2; + // Output known-1 are known to be set if set in either the LHS | RHS. + KnownOne |= KnownOne2; + break; + case ISD::XOR: + if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero, + KnownOne, TLO, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + if (SimplifyDemandedBits(Op.getOperand(0), NewMask, KnownZero2, + KnownOne2, TLO, Depth+1)) + return true; + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // If all of the demanded bits are known zero on one side, return the other. + // These bits cannot contribute to the result of the 'xor'. + if ((KnownZero & NewMask) == NewMask) + return TLO.CombineTo(Op, Op.getOperand(0)); + if ((KnownZero2 & NewMask) == NewMask) + return TLO.CombineTo(Op, Op.getOperand(1)); + // If the operation can be done in a smaller type, do so. + if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) + return true; + + // If all of the unknown bits are known to be zero on one side or the other + // (but not both) turn this into an *inclusive* or. + // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0 + if ((NewMask & ~KnownZero & ~KnownZero2) == 0) + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, Op.getValueType(), + Op.getOperand(0), + Op.getOperand(1))); + + // Output known-0 bits are known if clear or set in both the LHS & RHS. + KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2); + // Output known-1 are known to be set if set in only one of the LHS, RHS. + KnownOneOut = (KnownZero & KnownOne2) | (KnownOne & KnownZero2); + + // If all of the demanded bits on one side are known, and all of the set + // bits on that side are also known to be set on the other side, turn this + // into an AND, as we know the bits will be cleared. + // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2 + if ((NewMask & (KnownZero|KnownOne)) == NewMask) { // all known + if ((KnownOne & KnownOne2) == KnownOne) { + MVT VT = Op.getValueType(); + SDValue ANDC = TLO.DAG.getConstant(~KnownOne & NewMask, VT); + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, + Op.getOperand(0), ANDC)); + } + } + + // If the RHS is a constant, see if we can simplify it. + // for XOR, we prefer to force bits to 1 if they will make a -1. + // if we can't force bits, try to shrink constant + if (ConstantSDNode *C = dyn_cast(Op.getOperand(1))) { + APInt Expanded = C->getAPIntValue() | (~NewMask); + // if we can expand it to have all bits set, do it + if (Expanded.isAllOnesValue()) { + if (Expanded != C->getAPIntValue()) { + MVT VT = Op.getValueType(); + SDValue New = TLO.DAG.getNode(Op.getOpcode(), dl,VT, Op.getOperand(0), + TLO.DAG.getConstant(Expanded, VT)); + return TLO.CombineTo(Op, New); + } + // if it already has all the bits set, nothing to change + // but don't shrink either! + } else if (TLO.ShrinkDemandedConstant(Op, NewMask)) { + return true; + } + } + + KnownZero = KnownZeroOut; + KnownOne = KnownOneOut; + break; + case ISD::SELECT: + if (SimplifyDemandedBits(Op.getOperand(2), NewMask, KnownZero, + KnownOne, TLO, Depth+1)) + return true; + if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero2, + KnownOne2, TLO, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // If the operands are constants, see if we can simplify them. + if (TLO.ShrinkDemandedConstant(Op, NewMask)) + return true; + + // Only known if known in both the LHS and RHS. + KnownOne &= KnownOne2; + KnownZero &= KnownZero2; + break; + case ISD::SELECT_CC: + if (SimplifyDemandedBits(Op.getOperand(3), NewMask, KnownZero, + KnownOne, TLO, Depth+1)) + return true; + if (SimplifyDemandedBits(Op.getOperand(2), NewMask, KnownZero2, + KnownOne2, TLO, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // If the operands are constants, see if we can simplify them. + if (TLO.ShrinkDemandedConstant(Op, NewMask)) + return true; + + // Only known if known in both the LHS and RHS. + KnownOne &= KnownOne2; + KnownZero &= KnownZero2; + break; + case ISD::SHL: + if (ConstantSDNode *SA = dyn_cast(Op.getOperand(1))) { + unsigned ShAmt = SA->getZExtValue(); + SDValue InOp = Op.getOperand(0); + + // If the shift count is an invalid immediate, don't do anything. + if (ShAmt >= BitWidth) + break; + + // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a + // single shift. We can do this if the bottom bits (which are shifted + // out) are never demanded. + if (InOp.getOpcode() == ISD::SRL && + isa(InOp.getOperand(1))) { + if (ShAmt && (NewMask & APInt::getLowBitsSet(BitWidth, ShAmt)) == 0) { + unsigned C1= cast(InOp.getOperand(1))->getZExtValue(); + unsigned Opc = ISD::SHL; + int Diff = ShAmt-C1; + if (Diff < 0) { + Diff = -Diff; + Opc = ISD::SRL; + } + + SDValue NewSA = + TLO.DAG.getConstant(Diff, Op.getOperand(1).getValueType()); + MVT VT = Op.getValueType(); + return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, + InOp.getOperand(0), NewSA)); + } + } + + if (SimplifyDemandedBits(Op.getOperand(0), NewMask.lshr(ShAmt), + KnownZero, KnownOne, TLO, Depth+1)) + return true; + KnownZero <<= SA->getZExtValue(); + KnownOne <<= SA->getZExtValue(); + // low bits known zero. + KnownZero |= APInt::getLowBitsSet(BitWidth, SA->getZExtValue()); + } + break; + case ISD::SRL: + if (ConstantSDNode *SA = dyn_cast(Op.getOperand(1))) { + MVT VT = Op.getValueType(); + unsigned ShAmt = SA->getZExtValue(); + unsigned VTSize = VT.getSizeInBits(); + SDValue InOp = Op.getOperand(0); + + // If the shift count is an invalid immediate, don't do anything. + if (ShAmt >= BitWidth) + break; + + // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a + // single shift. We can do this if the top bits (which are shifted out) + // are never demanded. + if (InOp.getOpcode() == ISD::SHL && + isa(InOp.getOperand(1))) { + if (ShAmt && (NewMask & APInt::getHighBitsSet(VTSize, ShAmt)) == 0) { + unsigned C1= cast(InOp.getOperand(1))->getZExtValue(); + unsigned Opc = ISD::SRL; + int Diff = ShAmt-C1; + if (Diff < 0) { + Diff = -Diff; + Opc = ISD::SHL; + } + + SDValue NewSA = + TLO.DAG.getConstant(Diff, Op.getOperand(1).getValueType()); + return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, + InOp.getOperand(0), NewSA)); + } + } + + // Compute the new bits that are at the top now. + if (SimplifyDemandedBits(InOp, (NewMask << ShAmt), + KnownZero, KnownOne, TLO, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero = KnownZero.lshr(ShAmt); + KnownOne = KnownOne.lshr(ShAmt); + + APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt); + KnownZero |= HighBits; // High bits known zero. + } + break; + case ISD::SRA: + // If this is an arithmetic shift right and only the low-bit is set, we can + // always convert this into a logical shr, even if the shift amount is + // variable. The low bit of the shift cannot be an input sign bit unless + // the shift amount is >= the size of the datatype, which is undefined. + if (DemandedMask == 1) + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, Op.getValueType(), + Op.getOperand(0), Op.getOperand(1))); + + if (ConstantSDNode *SA = dyn_cast(Op.getOperand(1))) { + MVT VT = Op.getValueType(); + unsigned ShAmt = SA->getZExtValue(); + + // If the shift count is an invalid immediate, don't do anything. + if (ShAmt >= BitWidth) + break; + + APInt InDemandedMask = (NewMask << ShAmt); + + // If any of the demanded bits are produced by the sign extension, we also + // demand the input sign bit. + APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt); + if (HighBits.intersects(NewMask)) + InDemandedMask |= APInt::getSignBit(VT.getSizeInBits()); + + if (SimplifyDemandedBits(Op.getOperand(0), InDemandedMask, + KnownZero, KnownOne, TLO, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero = KnownZero.lshr(ShAmt); + KnownOne = KnownOne.lshr(ShAmt); + + // Handle the sign bit, adjusted to where it is now in the mask. + APInt SignBit = APInt::getSignBit(BitWidth).lshr(ShAmt); + + // If the input sign bit is known to be zero, or if none of the top bits + // are demanded, turn this into an unsigned shift right. + if (KnownZero.intersects(SignBit) || (HighBits & ~NewMask) == HighBits) { + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, + Op.getOperand(0), + Op.getOperand(1))); + } else if (KnownOne.intersects(SignBit)) { // New bits are known one. + KnownOne |= HighBits; + } + } + break; + case ISD::SIGN_EXTEND_INREG: { + MVT EVT = cast(Op.getOperand(1))->getVT(); + + // Sign extension. Compute the demanded bits in the result that are not + // present in the input. + APInt NewBits = APInt::getHighBitsSet(BitWidth, + BitWidth - EVT.getSizeInBits()) & + NewMask; + + // If none of the extended bits are demanded, eliminate the sextinreg. + if (NewBits == 0) + return TLO.CombineTo(Op, Op.getOperand(0)); + + APInt InSignBit = APInt::getSignBit(EVT.getSizeInBits()); + InSignBit.zext(BitWidth); + APInt InputDemandedBits = APInt::getLowBitsSet(BitWidth, + EVT.getSizeInBits()) & + NewMask; + + // Since the sign extended bits are demanded, we know that the sign + // bit is demanded. + InputDemandedBits |= InSignBit; + + if (SimplifyDemandedBits(Op.getOperand(0), InputDemandedBits, + KnownZero, KnownOne, TLO, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + + // If the sign bit of the input is known set or clear, then we know the + // top bits of the result. + + // If the input sign bit is known zero, convert this into a zero extension. + if (KnownZero.intersects(InSignBit)) + return TLO.CombineTo(Op, + TLO.DAG.getZeroExtendInReg(Op.getOperand(0),dl,EVT)); + + if (KnownOne.intersects(InSignBit)) { // Input sign bit known set + KnownOne |= NewBits; + KnownZero &= ~NewBits; + } else { // Input sign bit unknown + KnownZero &= ~NewBits; + KnownOne &= ~NewBits; + } + break; + } + case ISD::ZERO_EXTEND: { + unsigned OperandBitWidth = Op.getOperand(0).getValueSizeInBits(); + APInt InMask = NewMask; + InMask.trunc(OperandBitWidth); + + // If none of the top bits are demanded, convert this into an any_extend. + APInt NewBits = + APInt::getHighBitsSet(BitWidth, BitWidth - OperandBitWidth) & NewMask; + if (!NewBits.intersects(NewMask)) + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, + Op.getValueType(), + Op.getOperand(0))); + + if (SimplifyDemandedBits(Op.getOperand(0), InMask, + KnownZero, KnownOne, TLO, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero.zext(BitWidth); + KnownOne.zext(BitWidth); + KnownZero |= NewBits; + break; + } + case ISD::SIGN_EXTEND: { + MVT InVT = Op.getOperand(0).getValueType(); + unsigned InBits = InVT.getSizeInBits(); + APInt InMask = APInt::getLowBitsSet(BitWidth, InBits); + APInt InSignBit = APInt::getBitsSet(BitWidth, InBits - 1, InBits); + APInt NewBits = ~InMask & NewMask; + + // If none of the top bits are demanded, convert this into an any_extend. + if (NewBits == 0) + return TLO.CombineTo(Op,TLO.DAG.getNode(ISD::ANY_EXTEND, dl, + Op.getValueType(), + Op.getOperand(0))); + + // Since some of the sign extended bits are demanded, we know that the sign + // bit is demanded. + APInt InDemandedBits = InMask & NewMask; + InDemandedBits |= InSignBit; + InDemandedBits.trunc(InBits); + + if (SimplifyDemandedBits(Op.getOperand(0), InDemandedBits, KnownZero, + KnownOne, TLO, Depth+1)) + return true; + KnownZero.zext(BitWidth); + KnownOne.zext(BitWidth); + + // If the sign bit is known zero, convert this to a zero extend. + if (KnownZero.intersects(InSignBit)) + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, + Op.getValueType(), + Op.getOperand(0))); + + // If the sign bit is known one, the top bits match. + if (KnownOne.intersects(InSignBit)) { + KnownOne |= NewBits; + KnownZero &= ~NewBits; + } else { // Otherwise, top bits aren't known. + KnownOne &= ~NewBits; + KnownZero &= ~NewBits; + } + break; + } + case ISD::ANY_EXTEND: { + unsigned OperandBitWidth = Op.getOperand(0).getValueSizeInBits(); + APInt InMask = NewMask; + InMask.trunc(OperandBitWidth); + if (SimplifyDemandedBits(Op.getOperand(0), InMask, + KnownZero, KnownOne, TLO, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero.zext(BitWidth); + KnownOne.zext(BitWidth); + break; + } + case ISD::TRUNCATE: { + // Simplify the input, using demanded bit information, and compute the known + // zero/one bits live out. + APInt TruncMask = NewMask; + TruncMask.zext(Op.getOperand(0).getValueSizeInBits()); + if (SimplifyDemandedBits(Op.getOperand(0), TruncMask, + KnownZero, KnownOne, TLO, Depth+1)) + return true; + KnownZero.trunc(BitWidth); + KnownOne.trunc(BitWidth); + + // If the input is only used by this truncate, see if we can shrink it based + // on the known demanded bits. + if (Op.getOperand(0).getNode()->hasOneUse()) { + SDValue In = Op.getOperand(0); + unsigned InBitWidth = In.getValueSizeInBits(); + switch (In.getOpcode()) { + default: break; + case ISD::SRL: + // Shrink SRL by a constant if none of the high bits shifted in are + // demanded. + if (ConstantSDNode *ShAmt = dyn_cast(In.getOperand(1))){ + APInt HighBits = APInt::getHighBitsSet(InBitWidth, + InBitWidth - BitWidth); + HighBits = HighBits.lshr(ShAmt->getZExtValue()); + HighBits.trunc(BitWidth); + + if (ShAmt->getZExtValue() < BitWidth && !(HighBits & NewMask)) { + // None of the shifted in bits are needed. Add a truncate of the + // shift input, then shift it. + SDValue NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE, dl, + Op.getValueType(), + In.getOperand(0)); + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, + Op.getValueType(), + NewTrunc, + In.getOperand(1))); + } + } + break; + } + } + + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + break; + } + case ISD::AssertZext: { + MVT VT = cast(Op.getOperand(1))->getVT(); + APInt InMask = APInt::getLowBitsSet(BitWidth, + VT.getSizeInBits()); + if (SimplifyDemandedBits(Op.getOperand(0), InMask & NewMask, + KnownZero, KnownOne, TLO, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero |= ~InMask & NewMask; + break; + } + case ISD::BIT_CONVERT: +#if 0 + // If this is an FP->Int bitcast and if the sign bit is the only thing that + // is demanded, turn this into a FGETSIGN. + if (NewMask == MVT::getIntegerVTSignBit(Op.getValueType()) && + MVT::isFloatingPoint(Op.getOperand(0).getValueType()) && + !MVT::isVector(Op.getOperand(0).getValueType())) { + // Only do this xform if FGETSIGN is valid or if before legalize. + if (!TLO.AfterLegalize || + isOperationLegal(ISD::FGETSIGN, Op.getValueType())) { + // Make a FGETSIGN + SHL to move the sign bit into the appropriate + // place. We expect the SHL to be eliminated by other optimizations. + SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, Op.getValueType(), + Op.getOperand(0)); + unsigned ShVal = Op.getValueType().getSizeInBits()-1; + SDValue ShAmt = TLO.DAG.getConstant(ShVal, getShiftAmountTy()); + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, Op.getValueType(), + Sign, ShAmt)); + } + } +#endif + break; + case ISD::ADD: + case ISD::MUL: + case ISD::SUB: { + // Add, Sub, and Mul don't demand any bits in positions beyond that + // of the highest bit demanded of them. + APInt LoMask = APInt::getLowBitsSet(BitWidth, + BitWidth - NewMask.countLeadingZeros()); + if (SimplifyDemandedBits(Op.getOperand(0), LoMask, KnownZero2, + KnownOne2, TLO, Depth+1)) + return true; + if (SimplifyDemandedBits(Op.getOperand(1), LoMask, KnownZero2, + KnownOne2, TLO, Depth+1)) + return true; + // See if the operation should be performed at a smaller bit width. + if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) + return true; + } + // FALL THROUGH + default: + // Just use ComputeMaskedBits to compute output bits. + TLO.DAG.ComputeMaskedBits(Op, NewMask, KnownZero, KnownOne, Depth); + break; + } + + // If we know the value of all of the demanded bits, return this as a + // constant. + if ((NewMask & (KnownZero|KnownOne)) == NewMask) + return TLO.CombineTo(Op, TLO.DAG.getConstant(KnownOne, Op.getValueType())); + + return false; +} + +/// computeMaskedBitsForTargetNode - Determine which of the bits specified +/// in Mask are known to be either zero or one and return them in the +/// KnownZero/KnownOne bitsets. +void TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, + const APInt &Mask, + APInt &KnownZero, + APInt &KnownOne, + const SelectionDAG &DAG, + unsigned Depth) const { + assert((Op.getOpcode() >= ISD::BUILTIN_OP_END || + Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_VOID) && + "Should use MaskedValueIsZero if you don't know whether Op" + " is a target node!"); + KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); +} + +/// ComputeNumSignBitsForTargetNode - This method can be implemented by +/// targets that want to expose additional information about sign bits to the +/// DAG Combiner. +unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op, + unsigned Depth) const { + assert((Op.getOpcode() >= ISD::BUILTIN_OP_END || + Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_VOID) && + "Should use ComputeNumSignBits if you don't know whether Op" + " is a target node!"); + return 1; +} + +/// ValueHasExactlyOneBitSet - Test if the given value is known to have exactly +/// one bit set. This differs from ComputeMaskedBits in that it doesn't need to +/// determine which bit is set. +/// +static bool ValueHasExactlyOneBitSet(SDValue Val, const SelectionDAG &DAG) { + // A left-shift of a constant one will have exactly one bit set, because + // shifting the bit off the end is undefined. + if (Val.getOpcode() == ISD::SHL) + if (ConstantSDNode *C = + dyn_cast(Val.getNode()->getOperand(0))) + if (C->getAPIntValue() == 1) + return true; + + // Similarly, a right-shift of a constant sign-bit will have exactly + // one bit set. + if (Val.getOpcode() == ISD::SRL) + if (ConstantSDNode *C = + dyn_cast(Val.getNode()->getOperand(0))) + if (C->getAPIntValue().isSignBit()) + return true; + + // More could be done here, though the above checks are enough + // to handle some common cases. + + // Fall back to ComputeMaskedBits to catch other known cases. + MVT OpVT = Val.getValueType(); + unsigned BitWidth = OpVT.getSizeInBits(); + APInt Mask = APInt::getAllOnesValue(BitWidth); + APInt KnownZero, KnownOne; + DAG.ComputeMaskedBits(Val, Mask, KnownZero, KnownOne); + return (KnownZero.countPopulation() == BitWidth - 1) && + (KnownOne.countPopulation() == 1); +} + +/// SimplifySetCC - Try to simplify a setcc built with the specified operands +/// and cc. If it is unable to simplify it, return a null SDValue. +SDValue +TargetLowering::SimplifySetCC(MVT VT, SDValue N0, SDValue N1, + ISD::CondCode Cond, bool foldBooleans, + DAGCombinerInfo &DCI, DebugLoc dl) const { + SelectionDAG &DAG = DCI.DAG; + + // These setcc operations always fold. + switch (Cond) { + default: break; + case ISD::SETFALSE: + case ISD::SETFALSE2: return DAG.getConstant(0, VT); + case ISD::SETTRUE: + case ISD::SETTRUE2: return DAG.getConstant(1, VT); + } + + if (ConstantSDNode *N1C = dyn_cast(N1.getNode())) { + const APInt &C1 = N1C->getAPIntValue(); + if (isa(N0.getNode())) { + return DAG.FoldSetCC(VT, N0, N1, Cond, dl); + } else { + // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an + // equality comparison, then we're just comparing whether X itself is + // zero. + if (N0.getOpcode() == ISD::SRL && (C1 == 0 || C1 == 1) && + N0.getOperand(0).getOpcode() == ISD::CTLZ && + N0.getOperand(1).getOpcode() == ISD::Constant) { + unsigned ShAmt = cast(N0.getOperand(1))->getZExtValue(); + if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && + ShAmt == Log2_32(N0.getValueType().getSizeInBits())) { + if ((C1 == 0) == (Cond == ISD::SETEQ)) { + // (srl (ctlz x), 5) == 0 -> X != 0 + // (srl (ctlz x), 5) != 1 -> X != 0 + Cond = ISD::SETNE; + } else { + // (srl (ctlz x), 5) != 0 -> X == 0 + // (srl (ctlz x), 5) == 1 -> X == 0 + Cond = ISD::SETEQ; + } + SDValue Zero = DAG.getConstant(0, N0.getValueType()); + return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0), + Zero, Cond); + } + } + + // If the LHS is '(and load, const)', the RHS is 0, + // the test is for equality or unsigned, and all 1 bits of the const are + // in the same partial word, see if we can shorten the load. + if (DCI.isBeforeLegalize() && + N0.getOpcode() == ISD::AND && C1 == 0 && + N0.getNode()->hasOneUse() && + isa(N0.getOperand(0)) && + N0.getOperand(0).getNode()->hasOneUse() && + isa(N0.getOperand(1))) { + LoadSDNode *Lod = cast(N0.getOperand(0)); + uint64_t bestMask = 0; + unsigned bestWidth = 0, bestOffset = 0; + if (!Lod->isVolatile() && Lod->isUnindexed() && + // FIXME: This uses getZExtValue() below so it only works on i64 and + // below. + N0.getValueType().getSizeInBits() <= 64) { + unsigned origWidth = N0.getValueType().getSizeInBits(); + // We can narrow (e.g.) 16-bit extending loads on 32-bit target to + // 8 bits, but have to be careful... + if (Lod->getExtensionType() != ISD::NON_EXTLOAD) + origWidth = Lod->getMemoryVT().getSizeInBits(); + uint64_t Mask =cast(N0.getOperand(1))->getZExtValue(); + for (unsigned width = origWidth / 2; width>=8; width /= 2) { + uint64_t newMask = (1ULL << width) - 1; + for (unsigned offset=0; offsetisLittleEndian()) + bestOffset = (origWidth/width - offset - 1) * (width/8); + else + bestOffset = (uint64_t)offset * (width/8); + bestMask = Mask >> (offset * (width/8) * 8); + bestWidth = width; + break; + } + newMask = newMask << width; + } + } + } + if (bestWidth) { + MVT newVT = MVT::getIntegerVT(bestWidth); + if (newVT.isRound()) { + MVT PtrType = Lod->getOperand(1).getValueType(); + SDValue Ptr = Lod->getBasePtr(); + if (bestOffset != 0) + Ptr = DAG.getNode(ISD::ADD, dl, PtrType, Lod->getBasePtr(), + DAG.getConstant(bestOffset, PtrType)); + unsigned NewAlign = MinAlign(Lod->getAlignment(), bestOffset); + SDValue NewLoad = DAG.getLoad(newVT, dl, Lod->getChain(), Ptr, + Lod->getSrcValue(), + Lod->getSrcValueOffset() + bestOffset, + false, NewAlign); + return DAG.getSetCC(dl, VT, + DAG.getNode(ISD::AND, dl, newVT, NewLoad, + DAG.getConstant(bestMask, newVT)), + DAG.getConstant(0LL, newVT), Cond); + } + } + } + + // If the LHS is a ZERO_EXTEND, perform the comparison on the input. + if (N0.getOpcode() == ISD::ZERO_EXTEND) { + unsigned InSize = N0.getOperand(0).getValueType().getSizeInBits(); + + // If the comparison constant has bits in the upper part, the + // zero-extended value could never match. + if (C1.intersects(APInt::getHighBitsSet(C1.getBitWidth(), + C1.getBitWidth() - InSize))) { + switch (Cond) { + case ISD::SETUGT: + case ISD::SETUGE: + case ISD::SETEQ: return DAG.getConstant(0, VT); + case ISD::SETULT: + case ISD::SETULE: + case ISD::SETNE: return DAG.getConstant(1, VT); + case ISD::SETGT: + case ISD::SETGE: + // True if the sign bit of C1 is set. + return DAG.getConstant(C1.isNegative(), VT); + case ISD::SETLT: + case ISD::SETLE: + // True if the sign bit of C1 isn't set. + return DAG.getConstant(C1.isNonNegative(), VT); + default: + break; + } + } + + // Otherwise, we can perform the comparison with the low bits. + switch (Cond) { + case ISD::SETEQ: + case ISD::SETNE: + case ISD::SETUGT: + case ISD::SETUGE: + case ISD::SETULT: + case ISD::SETULE: + return DAG.getSetCC(dl, VT, N0.getOperand(0), + DAG.getConstant(APInt(C1).trunc(InSize), + N0.getOperand(0).getValueType()), + Cond); + default: + break; // todo, be more careful with signed comparisons + } + } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && + (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { + MVT ExtSrcTy = cast(N0.getOperand(1))->getVT(); + unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits(); + MVT ExtDstTy = N0.getValueType(); + unsigned ExtDstTyBits = ExtDstTy.getSizeInBits(); + + // If the extended part has any inconsistent bits, it cannot ever + // compare equal. In other words, they have to be all ones or all + // zeros. + APInt ExtBits = + APInt::getHighBitsSet(ExtDstTyBits, ExtDstTyBits - ExtSrcTyBits); + if ((C1 & ExtBits) != 0 && (C1 & ExtBits) != ExtBits) + return DAG.getConstant(Cond == ISD::SETNE, VT); + + SDValue ZextOp; + MVT Op0Ty = N0.getOperand(0).getValueType(); + if (Op0Ty == ExtSrcTy) { + ZextOp = N0.getOperand(0); + } else { + APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits); + ZextOp = DAG.getNode(ISD::AND, dl, Op0Ty, N0.getOperand(0), + DAG.getConstant(Imm, Op0Ty)); + } + if (!DCI.isCalledByLegalizer()) + DCI.AddToWorklist(ZextOp.getNode()); + // Otherwise, make this a use of a zext. + return DAG.getSetCC(dl, VT, ZextOp, + DAG.getConstant(C1 & APInt::getLowBitsSet( + ExtDstTyBits, + ExtSrcTyBits), + ExtDstTy), + Cond); + } else if ((N1C->isNullValue() || N1C->getAPIntValue() == 1) && + (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { + + // SETCC (SETCC), [0|1], [EQ|NE] -> SETCC + if (N0.getOpcode() == ISD::SETCC) { + bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (N1C->getZExtValue() != 1); + if (TrueWhenTrue) + return N0; + + // Invert the condition. + ISD::CondCode CC = cast(N0.getOperand(2))->get(); + CC = ISD::getSetCCInverse(CC, + N0.getOperand(0).getValueType().isInteger()); + return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC); + } + + if ((N0.getOpcode() == ISD::XOR || + (N0.getOpcode() == ISD::AND && + N0.getOperand(0).getOpcode() == ISD::XOR && + N0.getOperand(1) == N0.getOperand(0).getOperand(1))) && + isa(N0.getOperand(1)) && + cast(N0.getOperand(1))->getAPIntValue() == 1) { + // If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We + // can only do this if the top bits are known zero. + unsigned BitWidth = N0.getValueSizeInBits(); + if (DAG.MaskedValueIsZero(N0, + APInt::getHighBitsSet(BitWidth, + BitWidth-1))) { + // Okay, get the un-inverted input value. + SDValue Val; + if (N0.getOpcode() == ISD::XOR) + Val = N0.getOperand(0); + else { + assert(N0.getOpcode() == ISD::AND && + N0.getOperand(0).getOpcode() == ISD::XOR); + // ((X^1)&1)^1 -> X & 1 + Val = DAG.getNode(ISD::AND, dl, N0.getValueType(), + N0.getOperand(0).getOperand(0), + N0.getOperand(1)); + } + return DAG.getSetCC(dl, VT, Val, N1, + Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ); + } + } + } + + APInt MinVal, MaxVal; + unsigned OperandBitSize = N1C->getValueType(0).getSizeInBits(); + if (ISD::isSignedIntSetCC(Cond)) { + MinVal = APInt::getSignedMinValue(OperandBitSize); + MaxVal = APInt::getSignedMaxValue(OperandBitSize); + } else { + MinVal = APInt::getMinValue(OperandBitSize); + MaxVal = APInt::getMaxValue(OperandBitSize); + } + + // Canonicalize GE/LE comparisons to use GT/LT comparisons. + if (Cond == ISD::SETGE || Cond == ISD::SETUGE) { + if (C1 == MinVal) return DAG.getConstant(1, VT); // X >= MIN --> true + // X >= C0 --> X > (C0-1) + return DAG.getSetCC(dl, VT, N0, + DAG.getConstant(C1-1, N1.getValueType()), + (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT); + } + + if (Cond == ISD::SETLE || Cond == ISD::SETULE) { + if (C1 == MaxVal) return DAG.getConstant(1, VT); // X <= MAX --> true + // X <= C0 --> X < (C0+1) + return DAG.getSetCC(dl, VT, N0, + DAG.getConstant(C1+1, N1.getValueType()), + (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT); + } + + if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal) + return DAG.getConstant(0, VT); // X < MIN --> false + if ((Cond == ISD::SETGE || Cond == ISD::SETUGE) && C1 == MinVal) + return DAG.getConstant(1, VT); // X >= MIN --> true + if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal) + return DAG.getConstant(0, VT); // X > MAX --> false + if ((Cond == ISD::SETLE || Cond == ISD::SETULE) && C1 == MaxVal) + return DAG.getConstant(1, VT); // X <= MAX --> true + + // Canonicalize setgt X, Min --> setne X, Min + if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MinVal) + return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE); + // Canonicalize setlt X, Max --> setne X, Max + if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MaxVal) + return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE); + + // If we have setult X, 1, turn it into seteq X, 0 + if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal+1) + return DAG.getSetCC(dl, VT, N0, + DAG.getConstant(MinVal, N0.getValueType()), + ISD::SETEQ); + // If we have setugt X, Max-1, turn it into seteq X, Max + else if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal-1) + return DAG.getSetCC(dl, VT, N0, + DAG.getConstant(MaxVal, N0.getValueType()), + ISD::SETEQ); + + // If we have "setcc X, C0", check to see if we can shrink the immediate + // by changing cc. + + // SETUGT X, SINTMAX -> SETLT X, 0 + if (Cond == ISD::SETUGT && + C1 == APInt::getSignedMaxValue(OperandBitSize)) + return DAG.getSetCC(dl, VT, N0, + DAG.getConstant(0, N1.getValueType()), + ISD::SETLT); + + // SETULT X, SINTMIN -> SETGT X, -1 + if (Cond == ISD::SETULT && + C1 == APInt::getSignedMinValue(OperandBitSize)) { + SDValue ConstMinusOne = + DAG.getConstant(APInt::getAllOnesValue(OperandBitSize), + N1.getValueType()); + return DAG.getSetCC(dl, VT, N0, ConstMinusOne, ISD::SETGT); + } + + // Fold bit comparisons when we can. + if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && + VT == N0.getValueType() && N0.getOpcode() == ISD::AND) + if (ConstantSDNode *AndRHS = + dyn_cast(N0.getOperand(1))) { + MVT ShiftTy = DCI.isBeforeLegalize() ? + getPointerTy() : getShiftAmountTy(); + if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3 + // Perform the xform if the AND RHS is a single bit. + if (isPowerOf2_64(AndRHS->getZExtValue())) { + return DAG.getNode(ISD::SRL, dl, VT, N0, + DAG.getConstant(Log2_64(AndRHS->getZExtValue()), + ShiftTy)); + } + } else if (Cond == ISD::SETEQ && C1 == AndRHS->getZExtValue()) { + // (X & 8) == 8 --> (X & 8) >> 3 + // Perform the xform if C1 is a single bit. + if (C1.isPowerOf2()) { + return DAG.getNode(ISD::SRL, dl, VT, N0, + DAG.getConstant(C1.logBase2(), ShiftTy)); + } + } + } + } + } else if (isa(N0.getNode())) { + // Ensure that the constant occurs on the RHS. + return DAG.getSetCC(dl, VT, N1, N0, ISD::getSetCCSwappedOperands(Cond)); + } + + if (isa(N0.getNode())) { + // Constant fold or commute setcc. + SDValue O = DAG.FoldSetCC(VT, N0, N1, Cond, dl); + if (O.getNode()) return O; + } else if (ConstantFPSDNode *CFP = dyn_cast(N1.getNode())) { + // If the RHS of an FP comparison is a constant, simplify it away in + // some cases. + if (CFP->getValueAPF().isNaN()) { + // If an operand is known to be a nan, we can fold it. + switch (ISD::getUnorderedFlavor(Cond)) { + default: assert(0 && "Unknown flavor!"); + case 0: // Known false. + return DAG.getConstant(0, VT); + case 1: // Known true. + return DAG.getConstant(1, VT); + case 2: // Undefined. + return DAG.getUNDEF(VT); + } + } + + // Otherwise, we know the RHS is not a NaN. Simplify the node to drop the + // constant if knowing that the operand is non-nan is enough. We prefer to + // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to + // materialize 0.0. + if (Cond == ISD::SETO || Cond == ISD::SETUO) + return DAG.getSetCC(dl, VT, N0, N0, Cond); + } + + if (N0 == N1) { + // We can always fold X == X for integer setcc's. + if (N0.getValueType().isInteger()) + return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT); + unsigned UOF = ISD::getUnorderedFlavor(Cond); + if (UOF == 2) // FP operators that are undefined on NaNs. + return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT); + if (UOF == unsigned(ISD::isTrueWhenEqual(Cond))) + return DAG.getConstant(UOF, VT); + // Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO + // if it is not already. + ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO; + if (NewCond != Cond) + return DAG.getSetCC(dl, VT, N0, N1, NewCond); + } + + if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && + N0.getValueType().isInteger()) { + if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB || + N0.getOpcode() == ISD::XOR) { + // Simplify (X+Y) == (X+Z) --> Y == Z + if (N0.getOpcode() == N1.getOpcode()) { + if (N0.getOperand(0) == N1.getOperand(0)) + return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond); + if (N0.getOperand(1) == N1.getOperand(1)) + return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond); + if (DAG.isCommutativeBinOp(N0.getOpcode())) { + // If X op Y == Y op X, try other combinations. + if (N0.getOperand(0) == N1.getOperand(1)) + return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0), + Cond); + if (N0.getOperand(1) == N1.getOperand(0)) + return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1), + Cond); + } + } + + if (ConstantSDNode *RHSC = dyn_cast(N1)) { + if (ConstantSDNode *LHSR = dyn_cast(N0.getOperand(1))) { + // Turn (X+C1) == C2 --> X == C2-C1 + if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse()) { + return DAG.getSetCC(dl, VT, N0.getOperand(0), + DAG.getConstant(RHSC->getAPIntValue()- + LHSR->getAPIntValue(), + N0.getValueType()), Cond); + } + + // Turn (X^C1) == C2 into X == C1^C2 iff X&~C1 = 0. + if (N0.getOpcode() == ISD::XOR) + // If we know that all of the inverted bits are zero, don't bother + // performing the inversion. + if (DAG.MaskedValueIsZero(N0.getOperand(0), ~LHSR->getAPIntValue())) + return + DAG.getSetCC(dl, VT, N0.getOperand(0), + DAG.getConstant(LHSR->getAPIntValue() ^ + RHSC->getAPIntValue(), + N0.getValueType()), + Cond); + } + + // Turn (C1-X) == C2 --> X == C1-C2 + if (ConstantSDNode *SUBC = dyn_cast(N0.getOperand(0))) { + if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse()) { + return + DAG.getSetCC(dl, VT, N0.getOperand(1), + DAG.getConstant(SUBC->getAPIntValue() - + RHSC->getAPIntValue(), + N0.getValueType()), + Cond); + } + } + } + + // Simplify (X+Z) == X --> Z == 0 + if (N0.getOperand(0) == N1) + return DAG.getSetCC(dl, VT, N0.getOperand(1), + DAG.getConstant(0, N0.getValueType()), Cond); + if (N0.getOperand(1) == N1) { + if (DAG.isCommutativeBinOp(N0.getOpcode())) + return DAG.getSetCC(dl, VT, N0.getOperand(0), + DAG.getConstant(0, N0.getValueType()), Cond); + else if (N0.getNode()->hasOneUse()) { + assert(N0.getOpcode() == ISD::SUB && "Unexpected operation!"); + // (Z-X) == X --> Z == X<<1 + SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), + N1, + DAG.getConstant(1, getShiftAmountTy())); + if (!DCI.isCalledByLegalizer()) + DCI.AddToWorklist(SH.getNode()); + return DAG.getSetCC(dl, VT, N0.getOperand(0), SH, Cond); + } + } + } + + if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB || + N1.getOpcode() == ISD::XOR) { + // Simplify X == (X+Z) --> Z == 0 + if (N1.getOperand(0) == N0) { + return DAG.getSetCC(dl, VT, N1.getOperand(1), + DAG.getConstant(0, N1.getValueType()), Cond); + } else if (N1.getOperand(1) == N0) { + if (DAG.isCommutativeBinOp(N1.getOpcode())) { + return DAG.getSetCC(dl, VT, N1.getOperand(0), + DAG.getConstant(0, N1.getValueType()), Cond); + } else if (N1.getNode()->hasOneUse()) { + assert(N1.getOpcode() == ISD::SUB && "Unexpected operation!"); + // X == (Z-X) --> X<<1 == Z + SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), N0, + DAG.getConstant(1, getShiftAmountTy())); + if (!DCI.isCalledByLegalizer()) + DCI.AddToWorklist(SH.getNode()); + return DAG.getSetCC(dl, VT, SH, N1.getOperand(0), Cond); + } + } + } + + // Simplify x&y == y to x&y != 0 if y has exactly one bit set. + // Note that where y is variable and is known to have at most + // one bit set (for example, if it is z&1) we cannot do this; + // the expressions are not equivalent when y==0. + if (N0.getOpcode() == ISD::AND) + if (N0.getOperand(0) == N1 || N0.getOperand(1) == N1) { + if (ValueHasExactlyOneBitSet(N1, DAG)) { + Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true); + SDValue Zero = DAG.getConstant(0, N1.getValueType()); + return DAG.getSetCC(dl, VT, N0, Zero, Cond); + } + } + if (N1.getOpcode() == ISD::AND) + if (N1.getOperand(0) == N0 || N1.getOperand(1) == N0) { + if (ValueHasExactlyOneBitSet(N0, DAG)) { + Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true); + SDValue Zero = DAG.getConstant(0, N0.getValueType()); + return DAG.getSetCC(dl, VT, N1, Zero, Cond); + } + } + } + + // Fold away ALL boolean setcc's. + SDValue Temp; + if (N0.getValueType() == MVT::i1 && foldBooleans) { + switch (Cond) { + default: assert(0 && "Unknown integer setcc!"); + case ISD::SETEQ: // X == Y -> ~(X^Y) + Temp = DAG.getNode(ISD::XOR, dl, MVT::i1, N0, N1); + N0 = DAG.getNOT(dl, Temp, MVT::i1); + if (!DCI.isCalledByLegalizer()) + DCI.AddToWorklist(Temp.getNode()); + break; + case ISD::SETNE: // X != Y --> (X^Y) + N0 = DAG.getNode(ISD::XOR, dl, MVT::i1, N0, N1); + break; + case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y + case ISD::SETULT: // X X == 0 & Y == 1 --> ~X & Y + Temp = DAG.getNOT(dl, N0, MVT::i1); + N0 = DAG.getNode(ISD::AND, dl, MVT::i1, N1, Temp); + if (!DCI.isCalledByLegalizer()) + DCI.AddToWorklist(Temp.getNode()); + break; + case ISD::SETLT: // X X == 1 & Y == 0 --> ~Y & X + case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X + Temp = DAG.getNOT(dl, N1, MVT::i1); + N0 = DAG.getNode(ISD::AND, dl, MVT::i1, N0, Temp); + if (!DCI.isCalledByLegalizer()) + DCI.AddToWorklist(Temp.getNode()); + break; + case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y + case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y + Temp = DAG.getNOT(dl, N0, MVT::i1); + N0 = DAG.getNode(ISD::OR, dl, MVT::i1, N1, Temp); + if (!DCI.isCalledByLegalizer()) + DCI.AddToWorklist(Temp.getNode()); + break; + case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X + case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X + Temp = DAG.getNOT(dl, N1, MVT::i1); + N0 = DAG.getNode(ISD::OR, dl, MVT::i1, N0, Temp); + break; + } + if (VT != MVT::i1) { + if (!DCI.isCalledByLegalizer()) + DCI.AddToWorklist(N0.getNode()); + // FIXME: If running after legalize, we probably can't do this. + N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, N0); + } + return N0; + } + + // Could not fold it. + return SDValue(); +} + +/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the +/// node is a GlobalAddress + offset. +bool TargetLowering::isGAPlusOffset(SDNode *N, GlobalValue* &GA, + int64_t &Offset) const { + if (isa(N)) { + GlobalAddressSDNode *GASD = cast(N); + GA = GASD->getGlobal(); + Offset += GASD->getOffset(); + return true; + } + + if (N->getOpcode() == ISD::ADD) { + SDValue N1 = N->getOperand(0); + SDValue N2 = N->getOperand(1); + if (isGAPlusOffset(N1.getNode(), GA, Offset)) { + ConstantSDNode *V = dyn_cast(N2); + if (V) { + Offset += V->getSExtValue(); + return true; + } + } else if (isGAPlusOffset(N2.getNode(), GA, Offset)) { + ConstantSDNode *V = dyn_cast(N1); + if (V) { + Offset += V->getSExtValue(); + return true; + } + } + } + return false; +} + + +/// isConsecutiveLoad - Return true if LD (which must be a LoadSDNode) is +/// loading 'Bytes' bytes from a location that is 'Dist' units away from the +/// location that the 'Base' load is loading from. +bool TargetLowering::isConsecutiveLoad(SDNode *LD, SDNode *Base, + unsigned Bytes, int Dist, + const MachineFrameInfo *MFI) const { + if (LD->getOperand(0).getNode() != Base->getOperand(0).getNode()) + return false; + MVT VT = LD->getValueType(0); + if (VT.getSizeInBits() / 8 != Bytes) + return false; + + SDValue Loc = LD->getOperand(1); + SDValue BaseLoc = Base->getOperand(1); + if (Loc.getOpcode() == ISD::FrameIndex) { + if (BaseLoc.getOpcode() != ISD::FrameIndex) + return false; + int FI = cast(Loc)->getIndex(); + int BFI = cast(BaseLoc)->getIndex(); + int FS = MFI->getObjectSize(FI); + int BFS = MFI->getObjectSize(BFI); + if (FS != BFS || FS != (int)Bytes) return false; + return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes); + } + + GlobalValue *GV1 = NULL; + GlobalValue *GV2 = NULL; + int64_t Offset1 = 0; + int64_t Offset2 = 0; + bool isGA1 = isGAPlusOffset(Loc.getNode(), GV1, Offset1); + bool isGA2 = isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2); + if (isGA1 && isGA2 && GV1 == GV2) + return Offset1 == (Offset2 + Dist*Bytes); + return false; +} + + +SDValue TargetLowering:: +PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { + // Default implementation: no optimization. + return SDValue(); +} + +//===----------------------------------------------------------------------===// +// Inline Assembler Implementation Methods +//===----------------------------------------------------------------------===// + + +TargetLowering::ConstraintType +TargetLowering::getConstraintType(const std::string &Constraint) const { + // FIXME: lots more standard ones to handle. + if (Constraint.size() == 1) { + switch (Constraint[0]) { + default: break; + case 'r': return C_RegisterClass; + case 'm': // memory + case 'o': // offsetable + case 'V': // not offsetable + return C_Memory; + case 'i': // Simple Integer or Relocatable Constant + case 'n': // Simple Integer + case 's': // Relocatable Constant + case 'X': // Allow ANY value. + case 'I': // Target registers. + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + return C_Other; + } + } + + if (Constraint.size() > 1 && Constraint[0] == '{' && + Constraint[Constraint.size()-1] == '}') + return C_Register; + return C_Unknown; +} + +/// LowerXConstraint - try to replace an X constraint, which matches anything, +/// with another that has more specific requirements based on the type of the +/// corresponding operand. +const char *TargetLowering::LowerXConstraint(MVT ConstraintVT) const{ + if (ConstraintVT.isInteger()) + return "r"; + if (ConstraintVT.isFloatingPoint()) + return "f"; // works for many targets + return 0; +} + +/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops +/// vector. If it is invalid, don't add anything to Ops. +void TargetLowering::LowerAsmOperandForConstraint(SDValue Op, + char ConstraintLetter, + bool hasMemory, + std::vector &Ops, + SelectionDAG &DAG) const { + switch (ConstraintLetter) { + default: break; + case 'X': // Allows any operand; labels (basic block) use this. + if (Op.getOpcode() == ISD::BasicBlock) { + Ops.push_back(Op); + return; + } + // fall through + case 'i': // Simple Integer or Relocatable Constant + case 'n': // Simple Integer + case 's': { // Relocatable Constant + // These operands are interested in values of the form (GV+C), where C may + // be folded in as an offset of GV, or it may be explicitly added. Also, it + // is possible and fine if either GV or C are missing. + ConstantSDNode *C = dyn_cast(Op); + GlobalAddressSDNode *GA = dyn_cast(Op); + + // If we have "(add GV, C)", pull out GV/C + if (Op.getOpcode() == ISD::ADD) { + C = dyn_cast(Op.getOperand(1)); + GA = dyn_cast(Op.getOperand(0)); + if (C == 0 || GA == 0) { + C = dyn_cast(Op.getOperand(0)); + GA = dyn_cast(Op.getOperand(1)); + } + if (C == 0 || GA == 0) + C = 0, GA = 0; + } + + // If we find a valid operand, map to the TargetXXX version so that the + // value itself doesn't get selected. + if (GA) { // Either &GV or &GV+C + if (ConstraintLetter != 'n') { + int64_t Offs = GA->getOffset(); + if (C) Offs += C->getZExtValue(); + Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), + Op.getValueType(), Offs)); + return; + } + } + if (C) { // just C, no GV. + // Simple constants are not allowed for 's'. + if (ConstraintLetter != 's') { + // gcc prints these as sign extended. Sign extend value to 64 bits + // now; without this it would get ZExt'd later in + // ScheduleDAGSDNodes::EmitNode, which is very generic. + Ops.push_back(DAG.getTargetConstant(C->getAPIntValue().getSExtValue(), + MVT::i64)); + return; + } + } + break; + } + } +} + +std::vector TargetLowering:: +getRegClassForInlineAsmConstraint(const std::string &Constraint, + MVT VT) const { + return std::vector(); +} + + +std::pair TargetLowering:: +getRegForInlineAsmConstraint(const std::string &Constraint, + MVT VT) const { + if (Constraint[0] != '{') + return std::pair(0, 0); + assert(*(Constraint.end()-1) == '}' && "Not a brace enclosed constraint?"); + + // Remove the braces from around the name. + std::string RegName(Constraint.begin()+1, Constraint.end()-1); + + // Figure out which register class contains this reg. + const TargetRegisterInfo *RI = TM.getRegisterInfo(); + for (TargetRegisterInfo::regclass_iterator RCI = RI->regclass_begin(), + E = RI->regclass_end(); RCI != E; ++RCI) { + const TargetRegisterClass *RC = *RCI; + + // If none of the the value types for this register class are valid, we + // can't use it. For example, 64-bit reg classes on 32-bit targets. + bool isLegal = false; + for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end(); + I != E; ++I) { + if (isTypeLegal(*I)) { + isLegal = true; + break; + } + } + + if (!isLegal) continue; + + for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); + I != E; ++I) { + if (StringsEqualNoCase(RegName, RI->get(*I).AsmName)) + return std::make_pair(*I, RC); + } + } + + return std::pair(0, 0); +} + +//===----------------------------------------------------------------------===// +// Constraint Selection. + +/// isMatchingInputConstraint - Return true of this is an input operand that is +/// a matching constraint like "4". +bool TargetLowering::AsmOperandInfo::isMatchingInputConstraint() const { + assert(!ConstraintCode.empty() && "No known constraint!"); + return isdigit(ConstraintCode[0]); +} + +/// getMatchedOperand - If this is an input matching constraint, this method +/// returns the output operand it matches. +unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const { + assert(!ConstraintCode.empty() && "No known constraint!"); + return atoi(ConstraintCode.c_str()); +} + + +/// getConstraintGenerality - Return an integer indicating how general CT +/// is. +static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) { + switch (CT) { + default: assert(0 && "Unknown constraint type!"); + case TargetLowering::C_Other: + case TargetLowering::C_Unknown: + return 0; + case TargetLowering::C_Register: + return 1; + case TargetLowering::C_RegisterClass: + return 2; + case TargetLowering::C_Memory: + return 3; + } +} + +/// ChooseConstraint - If there are multiple different constraints that we +/// could pick for this operand (e.g. "imr") try to pick the 'best' one. +/// This is somewhat tricky: constraints fall into four classes: +/// Other -> immediates and magic values +/// Register -> one specific register +/// RegisterClass -> a group of regs +/// Memory -> memory +/// Ideally, we would pick the most specific constraint possible: if we have +/// something that fits into a register, we would pick it. The problem here +/// is that if we have something that could either be in a register or in +/// memory that use of the register could cause selection of *other* +/// operands to fail: they might only succeed if we pick memory. Because of +/// this the heuristic we use is: +/// +/// 1) If there is an 'other' constraint, and if the operand is valid for +/// that constraint, use it. This makes us take advantage of 'i' +/// constraints when available. +/// 2) Otherwise, pick the most general constraint present. This prefers +/// 'm' over 'r', for example. +/// +static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo, + bool hasMemory, const TargetLowering &TLI, + SDValue Op, SelectionDAG *DAG) { + assert(OpInfo.Codes.size() > 1 && "Doesn't have multiple constraint options"); + unsigned BestIdx = 0; + TargetLowering::ConstraintType BestType = TargetLowering::C_Unknown; + int BestGenerality = -1; + + // Loop over the options, keeping track of the most general one. + for (unsigned i = 0, e = OpInfo.Codes.size(); i != e; ++i) { + TargetLowering::ConstraintType CType = + TLI.getConstraintType(OpInfo.Codes[i]); + + // If this is an 'other' constraint, see if the operand is valid for it. + // For example, on X86 we might have an 'rI' constraint. If the operand + // is an integer in the range [0..31] we want to use I (saving a load + // of a register), otherwise we must use 'r'. + if (CType == TargetLowering::C_Other && Op.getNode()) { + assert(OpInfo.Codes[i].size() == 1 && + "Unhandled multi-letter 'other' constraint"); + std::vector ResultOps; + TLI.LowerAsmOperandForConstraint(Op, OpInfo.Codes[i][0], hasMemory, + ResultOps, *DAG); + if (!ResultOps.empty()) { + BestType = CType; + BestIdx = i; + break; + } + } + + // This constraint letter is more general than the previous one, use it. + int Generality = getConstraintGenerality(CType); + if (Generality > BestGenerality) { + BestType = CType; + BestIdx = i; + BestGenerality = Generality; + } + } + + OpInfo.ConstraintCode = OpInfo.Codes[BestIdx]; + OpInfo.ConstraintType = BestType; +} + +/// ComputeConstraintToUse - Determines the constraint code and constraint +/// type to use for the specific AsmOperandInfo, setting +/// OpInfo.ConstraintCode and OpInfo.ConstraintType. +void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo, + SDValue Op, + bool hasMemory, + SelectionDAG *DAG) const { + assert(!OpInfo.Codes.empty() && "Must have at least one constraint"); + + // Single-letter constraints ('r') are very common. + if (OpInfo.Codes.size() == 1) { + OpInfo.ConstraintCode = OpInfo.Codes[0]; + OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode); + } else { + ChooseConstraint(OpInfo, hasMemory, *this, Op, DAG); + } + + // 'X' matches anything. + if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) { + // Labels and constants are handled elsewhere ('X' is the only thing + // that matches labels). + if (isa(OpInfo.CallOperandVal) || + isa(OpInfo.CallOperandVal)) + return; + + // Otherwise, try to resolve it to something we know about by looking at + // the actual operand type. + if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) { + OpInfo.ConstraintCode = Repl; + OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode); + } + } +} + +//===----------------------------------------------------------------------===// +// Loop Strength Reduction hooks +//===----------------------------------------------------------------------===// + +/// isLegalAddressingMode - Return true if the addressing mode represented +/// by AM is legal for this target, for a load/store of the specified type. +bool TargetLowering::isLegalAddressingMode(const AddrMode &AM, + const Type *Ty) const { + // The default implementation of this implements a conservative RISCy, r+r and + // r+i addr mode. + + // Allows a sign-extended 16-bit immediate field. + if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1) + return false; + + // No global is ever allowed as a base. + if (AM.BaseGV) + return false; + + // Only support r+r, + switch (AM.Scale) { + case 0: // "r+i" or just "i", depending on HasBaseReg. + break; + case 1: + if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed. + return false; + // Otherwise we have r+r or r+i. + break; + case 2: + if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed. + return false; + // Allow 2*r as r+r. + break; + } + + return true; +} + +/// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant, +/// return a DAG expression to select that will generate the same value by +/// multiplying by a magic number. See: +/// +SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG, + std::vector* Created) const { + MVT VT = N->getValueType(0); + DebugLoc dl= N->getDebugLoc(); + + // Check to see if we can do this. + // FIXME: We should be more aggressive here. + if (!isTypeLegal(VT)) + return SDValue(); + + APInt d = cast(N->getOperand(1))->getAPIntValue(); + APInt::ms magics = d.magic(); + + // Multiply the numerator (operand 0) by the magic value + // FIXME: We should support doing a MUL in a wider type + SDValue Q; + if (isOperationLegalOrCustom(ISD::MULHS, VT)) + Q = DAG.getNode(ISD::MULHS, dl, VT, N->getOperand(0), + DAG.getConstant(magics.m, VT)); + else if (isOperationLegalOrCustom(ISD::SMUL_LOHI, VT)) + Q = SDValue(DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), + N->getOperand(0), + DAG.getConstant(magics.m, VT)).getNode(), 1); + else + return SDValue(); // No mulhs or equvialent + // If d > 0 and m < 0, add the numerator + if (d.isStrictlyPositive() && magics.m.isNegative()) { + Q = DAG.getNode(ISD::ADD, dl, VT, Q, N->getOperand(0)); + if (Created) + Created->push_back(Q.getNode()); + } + // If d < 0 and m > 0, subtract the numerator. + if (d.isNegative() && magics.m.isStrictlyPositive()) { + Q = DAG.getNode(ISD::SUB, dl, VT, Q, N->getOperand(0)); + if (Created) + Created->push_back(Q.getNode()); + } + // Shift right algebraic if shift value is nonzero + if (magics.s > 0) { + Q = DAG.getNode(ISD::SRA, dl, VT, Q, + DAG.getConstant(magics.s, getShiftAmountTy())); + if (Created) + Created->push_back(Q.getNode()); + } + // Extract the sign bit and add it to the quotient + SDValue T = + DAG.getNode(ISD::SRL, dl, VT, Q, DAG.getConstant(VT.getSizeInBits()-1, + getShiftAmountTy())); + if (Created) + Created->push_back(T.getNode()); + return DAG.getNode(ISD::ADD, dl, VT, Q, T); +} + +/// BuildUDIVSequence - Given an ISD::UDIV node expressing a divide by constant, +/// return a DAG expression to select that will generate the same value by +/// multiplying by a magic number. See: +/// +SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG, + std::vector* Created) const { + MVT VT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); + + // Check to see if we can do this. + // FIXME: We should be more aggressive here. + if (!isTypeLegal(VT)) + return SDValue(); + + // FIXME: We should use a narrower constant when the upper + // bits are known to be zero. + ConstantSDNode *N1C = cast(N->getOperand(1)); + APInt::mu magics = N1C->getAPIntValue().magicu(); + + // Multiply the numerator (operand 0) by the magic value + // FIXME: We should support doing a MUL in a wider type + SDValue Q; + if (isOperationLegalOrCustom(ISD::MULHU, VT)) + Q = DAG.getNode(ISD::MULHU, dl, VT, N->getOperand(0), + DAG.getConstant(magics.m, VT)); + else if (isOperationLegalOrCustom(ISD::UMUL_LOHI, VT)) + Q = SDValue(DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), + N->getOperand(0), + DAG.getConstant(magics.m, VT)).getNode(), 1); + else + return SDValue(); // No mulhu or equvialent + if (Created) + Created->push_back(Q.getNode()); + + if (magics.a == 0) { + assert(magics.s < N1C->getAPIntValue().getBitWidth() && + "We shouldn't generate an undefined shift!"); + return DAG.getNode(ISD::SRL, dl, VT, Q, + DAG.getConstant(magics.s, getShiftAmountTy())); + } else { + SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N->getOperand(0), Q); + if (Created) + Created->push_back(NPQ.getNode()); + NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, + DAG.getConstant(1, getShiftAmountTy())); + if (Created) + Created->push_back(NPQ.getNode()); + NPQ = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q); + if (Created) + Created->push_back(NPQ.getNode()); + return DAG.getNode(ISD::SRL, dl, VT, NPQ, + DAG.getConstant(magics.s-1, getShiftAmountTy())); + } +} + +/// IgnoreHarmlessInstructions - Ignore instructions between a CALL and RET +/// node that don't prevent tail call optimization. +static SDValue IgnoreHarmlessInstructions(SDValue node) { + // Found call return. + if (node.getOpcode() == ISD::CALL) return node; + // Ignore MERGE_VALUES. Will have at least one operand. + if (node.getOpcode() == ISD::MERGE_VALUES) + return IgnoreHarmlessInstructions(node.getOperand(0)); + // Ignore ANY_EXTEND node. + if (node.getOpcode() == ISD::ANY_EXTEND) + return IgnoreHarmlessInstructions(node.getOperand(0)); + if (node.getOpcode() == ISD::TRUNCATE) + return IgnoreHarmlessInstructions(node.getOperand(0)); + // Any other node type. + return node; +} + +bool TargetLowering::CheckTailCallReturnConstraints(CallSDNode *TheCall, + SDValue Ret) { + unsigned NumOps = Ret.getNumOperands(); + // ISD::CALL results:(value0, ..., valuen, chain) + // ISD::RET operands:(chain, value0, flag0, ..., valuen, flagn) + // Value return: + // Check that operand of the RET node sources from the CALL node. The RET node + // has at least two operands. Operand 0 holds the chain. Operand 1 holds the + // value. + if (NumOps > 1 && + IgnoreHarmlessInstructions(Ret.getOperand(1)) == SDValue(TheCall,0)) + return true; + // void return: The RET node has the chain result value of the CALL node as + // input. + if (NumOps == 1 && + Ret.getOperand(0) == SDValue(TheCall, TheCall->getNumValues()-1)) + return true; + + return false; +} diff --git a/lib/CodeGen/ShadowStackGC.cpp b/lib/CodeGen/ShadowStackGC.cpp new file mode 100644 index 000000000000..2402f81bb04f --- /dev/null +++ b/lib/CodeGen/ShadowStackGC.cpp @@ -0,0 +1,439 @@ +//===-- ShadowStackGC.cpp - GC support for uncooperative targets ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements lowering for the llvm.gc* intrinsics for targets that do +// not natively support them (which includes the C backend). Note that the code +// generated is not quite as efficient as algorithms which generate stack maps +// to identify roots. +// +// This pass implements the code transformation described in this paper: +// "Accurate Garbage Collection in an Uncooperative Environment" +// Fergus Henderson, ISMM, 2002 +// +// In runtime/GC/SemiSpace.cpp is a prototype runtime which is compatible with +// ShadowStackGC. +// +// In order to support this particular transformation, all stack roots are +// coallocated in the stack. This allows a fully target-independent stack map +// while introducing only minor runtime overhead. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "shadowstackgc" +#include "llvm/CodeGen/GCs.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/CodeGen/GCStrategy.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Module.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/IRBuilder.h" + +using namespace llvm; + +namespace { + + class VISIBILITY_HIDDEN ShadowStackGC : public GCStrategy { + /// RootChain - This is the global linked-list that contains the chain of GC + /// roots. + GlobalVariable *Head; + + /// StackEntryTy - Abstract type of a link in the shadow stack. + /// + const StructType *StackEntryTy; + + /// Roots - GC roots in the current function. Each is a pair of the + /// intrinsic call and its corresponding alloca. + std::vector > Roots; + + public: + ShadowStackGC(); + + bool initializeCustomLowering(Module &M); + bool performCustomLowering(Function &F); + + private: + bool IsNullValue(Value *V); + Constant *GetFrameMap(Function &F); + const Type* GetConcreteStackEntryType(Function &F); + void CollectRoots(Function &F); + static GetElementPtrInst *CreateGEP(IRBuilder<> &B, Value *BasePtr, + int Idx1, const char *Name); + static GetElementPtrInst *CreateGEP(IRBuilder<> &B, Value *BasePtr, + int Idx1, int Idx2, const char *Name); + }; + +} + +static GCRegistry::Add +X("shadow-stack", "Very portable GC for uncooperative code generators"); + +namespace { + /// EscapeEnumerator - This is a little algorithm to find all escape points + /// from a function so that "finally"-style code can be inserted. In addition + /// to finding the existing return and unwind instructions, it also (if + /// necessary) transforms any call instructions into invokes and sends them to + /// a landing pad. + /// + /// It's wrapped up in a state machine using the same transform C# uses for + /// 'yield return' enumerators, This transform allows it to be non-allocating. + class VISIBILITY_HIDDEN EscapeEnumerator { + Function &F; + const char *CleanupBBName; + + // State. + int State; + Function::iterator StateBB, StateE; + IRBuilder<> Builder; + + public: + EscapeEnumerator(Function &F, const char *N = "cleanup") + : F(F), CleanupBBName(N), State(0) {} + + IRBuilder<> *Next() { + switch (State) { + default: + return 0; + + case 0: + StateBB = F.begin(); + StateE = F.end(); + State = 1; + + case 1: + // Find all 'return' and 'unwind' instructions. + while (StateBB != StateE) { + BasicBlock *CurBB = StateBB++; + + // Branches and invokes do not escape, only unwind and return do. + TerminatorInst *TI = CurBB->getTerminator(); + if (!isa(TI) && !isa(TI)) + continue; + + Builder.SetInsertPoint(TI->getParent(), TI); + return &Builder; + } + + State = 2; + + // Find all 'call' instructions. + SmallVector Calls; + for (Function::iterator BB = F.begin(), + E = F.end(); BB != E; ++BB) + for (BasicBlock::iterator II = BB->begin(), + EE = BB->end(); II != EE; ++II) + if (CallInst *CI = dyn_cast(II)) + if (!CI->getCalledFunction() || + !CI->getCalledFunction()->getIntrinsicID()) + Calls.push_back(CI); + + if (Calls.empty()) + return 0; + + // Create a cleanup block. + BasicBlock *CleanupBB = BasicBlock::Create(CleanupBBName, &F); + UnwindInst *UI = new UnwindInst(CleanupBB); + + // Transform the 'call' instructions into 'invoke's branching to the + // cleanup block. Go in reverse order to make prettier BB names. + SmallVector Args; + for (unsigned I = Calls.size(); I != 0; ) { + CallInst *CI = cast(Calls[--I]); + + // Split the basic block containing the function call. + BasicBlock *CallBB = CI->getParent(); + BasicBlock *NewBB = + CallBB->splitBasicBlock(CI, CallBB->getName() + ".cont"); + + // Remove the unconditional branch inserted at the end of CallBB. + CallBB->getInstList().pop_back(); + NewBB->getInstList().remove(CI); + + // Create a new invoke instruction. + Args.clear(); + Args.append(CI->op_begin() + 1, CI->op_end()); + + InvokeInst *II = InvokeInst::Create(CI->getOperand(0), + NewBB, CleanupBB, + Args.begin(), Args.end(), + CI->getName(), CallBB); + II->setCallingConv(CI->getCallingConv()); + II->setAttributes(CI->getAttributes()); + CI->replaceAllUsesWith(II); + delete CI; + } + + Builder.SetInsertPoint(UI->getParent(), UI); + return &Builder; + } + } + }; +} + +// ----------------------------------------------------------------------------- + +void llvm::linkShadowStackGC() { } + +ShadowStackGC::ShadowStackGC() : Head(0), StackEntryTy(0) { + InitRoots = true; + CustomRoots = true; +} + +Constant *ShadowStackGC::GetFrameMap(Function &F) { + // doInitialization creates the abstract type of this value. + + Type *VoidPtr = PointerType::getUnqual(Type::Int8Ty); + + // Truncate the ShadowStackDescriptor if some metadata is null. + unsigned NumMeta = 0; + SmallVector Metadata; + for (unsigned I = 0; I != Roots.size(); ++I) { + Constant *C = cast(Roots[I].first->getOperand(2)); + if (!C->isNullValue()) + NumMeta = I + 1; + Metadata.push_back(ConstantExpr::getBitCast(C, VoidPtr)); + } + + Constant *BaseElts[] = { + ConstantInt::get(Type::Int32Ty, Roots.size(), false), + ConstantInt::get(Type::Int32Ty, NumMeta, false), + }; + + Constant *DescriptorElts[] = { + ConstantStruct::get(BaseElts, 2), + ConstantArray::get(ArrayType::get(VoidPtr, NumMeta), + Metadata.begin(), NumMeta) + }; + + Constant *FrameMap = ConstantStruct::get(DescriptorElts, 2); + + std::string TypeName("gc_map."); + TypeName += utostr(NumMeta); + F.getParent()->addTypeName(TypeName, FrameMap->getType()); + + // FIXME: Is this actually dangerous as WritingAnLLVMPass.html claims? Seems + // that, short of multithreaded LLVM, it should be safe; all that is + // necessary is that a simple Module::iterator loop not be invalidated. + // Appending to the GlobalVariable list is safe in that sense. + // + // All of the output passes emit globals last. The ExecutionEngine + // explicitly supports adding globals to the module after + // initialization. + // + // Still, if it isn't deemed acceptable, then this transformation needs + // to be a ModulePass (which means it cannot be in the 'llc' pipeline + // (which uses a FunctionPassManager (which segfaults (not asserts) if + // provided a ModulePass))). + Constant *GV = new GlobalVariable(FrameMap->getType(), true, + GlobalVariable::InternalLinkage, + FrameMap, "__gc_" + F.getName(), + F.getParent()); + + Constant *GEPIndices[2] = { ConstantInt::get(Type::Int32Ty, 0), + ConstantInt::get(Type::Int32Ty, 0) }; + return ConstantExpr::getGetElementPtr(GV, GEPIndices, 2); +} + +const Type* ShadowStackGC::GetConcreteStackEntryType(Function &F) { + // doInitialization creates the generic version of this type. + std::vector EltTys; + EltTys.push_back(StackEntryTy); + for (size_t I = 0; I != Roots.size(); I++) + EltTys.push_back(Roots[I].second->getAllocatedType()); + Type *Ty = StructType::get(EltTys); + + std::string TypeName("gc_stackentry."); + TypeName += F.getName(); + F.getParent()->addTypeName(TypeName, Ty); + + return Ty; +} + +/// doInitialization - If this module uses the GC intrinsics, find them now. If +/// not, exit fast. +bool ShadowStackGC::initializeCustomLowering(Module &M) { + // struct FrameMap { + // int32_t NumRoots; // Number of roots in stack frame. + // int32_t NumMeta; // Number of metadata descriptors. May be < NumRoots. + // void *Meta[]; // May be absent for roots without metadata. + // }; + std::vector EltTys; + EltTys.push_back(Type::Int32Ty); // 32 bits is ok up to a 32GB stack frame. :) + EltTys.push_back(Type::Int32Ty); // Specifies length of variable length array. + StructType *FrameMapTy = StructType::get(EltTys); + M.addTypeName("gc_map", FrameMapTy); + PointerType *FrameMapPtrTy = PointerType::getUnqual(FrameMapTy); + + // struct StackEntry { + // ShadowStackEntry *Next; // Caller's stack entry. + // FrameMap *Map; // Pointer to constant FrameMap. + // void *Roots[]; // Stack roots (in-place array, so we pretend). + // }; + OpaqueType *RecursiveTy = OpaqueType::get(); + + EltTys.clear(); + EltTys.push_back(PointerType::getUnqual(RecursiveTy)); + EltTys.push_back(FrameMapPtrTy); + PATypeHolder LinkTyH = StructType::get(EltTys); + + RecursiveTy->refineAbstractTypeTo(LinkTyH.get()); + StackEntryTy = cast(LinkTyH.get()); + const PointerType *StackEntryPtrTy = PointerType::getUnqual(StackEntryTy); + M.addTypeName("gc_stackentry", LinkTyH.get()); // FIXME: Is this safe from + // a FunctionPass? + + // Get the root chain if it already exists. + Head = M.getGlobalVariable("llvm_gc_root_chain"); + if (!Head) { + // If the root chain does not exist, insert a new one with linkonce + // linkage! + Head = new GlobalVariable(StackEntryPtrTy, false, + GlobalValue::LinkOnceAnyLinkage, + Constant::getNullValue(StackEntryPtrTy), + "llvm_gc_root_chain", &M); + } else if (Head->hasExternalLinkage() && Head->isDeclaration()) { + Head->setInitializer(Constant::getNullValue(StackEntryPtrTy)); + Head->setLinkage(GlobalValue::LinkOnceAnyLinkage); + } + + return true; +} + +bool ShadowStackGC::IsNullValue(Value *V) { + if (Constant *C = dyn_cast(V)) + return C->isNullValue(); + return false; +} + +void ShadowStackGC::CollectRoots(Function &F) { + // FIXME: Account for original alignment. Could fragment the root array. + // Approach 1: Null initialize empty slots at runtime. Yuck. + // Approach 2: Emit a map of the array instead of just a count. + + assert(Roots.empty() && "Not cleaned up?"); + + SmallVector,16> MetaRoots; + + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) + for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) + if (IntrinsicInst *CI = dyn_cast(II++)) + if (Function *F = CI->getCalledFunction()) + if (F->getIntrinsicID() == Intrinsic::gcroot) { + std::pair Pair = std::make_pair( + CI, cast(CI->getOperand(1)->stripPointerCasts())); + if (IsNullValue(CI->getOperand(2))) + Roots.push_back(Pair); + else + MetaRoots.push_back(Pair); + } + + // Number roots with metadata (usually empty) at the beginning, so that the + // FrameMap::Meta array can be elided. + Roots.insert(Roots.begin(), MetaRoots.begin(), MetaRoots.end()); +} + +GetElementPtrInst * +ShadowStackGC::CreateGEP(IRBuilder<> &B, Value *BasePtr, + int Idx, int Idx2, const char *Name) { + Value *Indices[] = { ConstantInt::get(Type::Int32Ty, 0), + ConstantInt::get(Type::Int32Ty, Idx), + ConstantInt::get(Type::Int32Ty, Idx2) }; + Value* Val = B.CreateGEP(BasePtr, Indices, Indices + 3, Name); + + assert(isa(Val) && "Unexpected folded constant"); + + return dyn_cast(Val); +} + +GetElementPtrInst * +ShadowStackGC::CreateGEP(IRBuilder<> &B, Value *BasePtr, + int Idx, const char *Name) { + Value *Indices[] = { ConstantInt::get(Type::Int32Ty, 0), + ConstantInt::get(Type::Int32Ty, Idx) }; + Value *Val = B.CreateGEP(BasePtr, Indices, Indices + 2, Name); + + assert(isa(Val) && "Unexpected folded constant"); + + return dyn_cast(Val); +} + +/// runOnFunction - Insert code to maintain the shadow stack. +bool ShadowStackGC::performCustomLowering(Function &F) { + // Find calls to llvm.gcroot. + CollectRoots(F); + + // If there are no roots in this function, then there is no need to add a + // stack map entry for it. + if (Roots.empty()) + return false; + + // Build the constant map and figure the type of the shadow stack entry. + Value *FrameMap = GetFrameMap(F); + const Type *ConcreteStackEntryTy = GetConcreteStackEntryType(F); + + // Build the shadow stack entry at the very start of the function. + BasicBlock::iterator IP = F.getEntryBlock().begin(); + IRBuilder<> AtEntry(IP->getParent(), IP); + + Instruction *StackEntry = AtEntry.CreateAlloca(ConcreteStackEntryTy, 0, + "gc_frame"); + + while (isa(IP)) ++IP; + AtEntry.SetInsertPoint(IP->getParent(), IP); + + // Initialize the map pointer and load the current head of the shadow stack. + Instruction *CurrentHead = AtEntry.CreateLoad(Head, "gc_currhead"); + Instruction *EntryMapPtr = CreateGEP(AtEntry, StackEntry,0,1,"gc_frame.map"); + AtEntry.CreateStore(FrameMap, EntryMapPtr); + + // After all the allocas... + for (unsigned I = 0, E = Roots.size(); I != E; ++I) { + // For each root, find the corresponding slot in the aggregate... + Value *SlotPtr = CreateGEP(AtEntry, StackEntry, 1 + I, "gc_root"); + + // And use it in lieu of the alloca. + AllocaInst *OriginalAlloca = Roots[I].second; + SlotPtr->takeName(OriginalAlloca); + OriginalAlloca->replaceAllUsesWith(SlotPtr); + } + + // Move past the original stores inserted by GCStrategy::InitRoots. This isn't + // really necessary (the collector would never see the intermediate state at + // runtime), but it's nicer not to push the half-initialized entry onto the + // shadow stack. + while (isa(IP)) ++IP; + AtEntry.SetInsertPoint(IP->getParent(), IP); + + // Push the entry onto the shadow stack. + Instruction *EntryNextPtr = CreateGEP(AtEntry,StackEntry,0,0,"gc_frame.next"); + Instruction *NewHeadVal = CreateGEP(AtEntry,StackEntry, 0, "gc_newhead"); + AtEntry.CreateStore(CurrentHead, EntryNextPtr); + AtEntry.CreateStore(NewHeadVal, Head); + + // For each instruction that escapes... + EscapeEnumerator EE(F, "gc_cleanup"); + while (IRBuilder<> *AtExit = EE.Next()) { + // Pop the entry from the shadow stack. Don't reuse CurrentHead from + // AtEntry, since that would make the value live for the entire function. + Instruction *EntryNextPtr2 = CreateGEP(*AtExit, StackEntry, 0, 0, + "gc_frame.next"); + Value *SavedHead = AtExit->CreateLoad(EntryNextPtr2, "gc_savedhead"); + AtExit->CreateStore(SavedHead, Head); + } + + // Delete the original allocas (which are no longer used) and the intrinsic + // calls (which are no longer valid). Doing this last avoids invalidating + // iterators. + for (unsigned I = 0, E = Roots.size(); I != E; ++I) { + Roots[I].first->eraseFromParent(); + Roots[I].second->eraseFromParent(); + } + + Roots.clear(); + return true; +} diff --git a/lib/CodeGen/ShrinkWrapping.cpp b/lib/CodeGen/ShrinkWrapping.cpp new file mode 100644 index 000000000000..e44a138cf925 --- /dev/null +++ b/lib/CodeGen/ShrinkWrapping.cpp @@ -0,0 +1,1141 @@ +//===-- ShrinkWrapping.cpp - Reduce spills/restores of callee-saved regs --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a shrink wrapping variant of prolog/epilog insertion: +// - Spills and restores of callee-saved registers (CSRs) are placed in the +// machine CFG to tightly surround their uses so that execution paths that +// do not use CSRs do not pay the spill/restore penalty. +// +// - Avoiding placment of spills/restores in loops: if a CSR is used inside a +// loop the spills are placed in the loop preheader, and restores are +// placed in the loop exit nodes (the successors of loop _exiting_ nodes). +// +// - Covering paths without CSR uses: +// If a region in a CFG uses CSRs and has multiple entry and/or exit points, +// the use info for the CSRs inside the region is propagated outward in the +// CFG to ensure validity of the spill/restore placements. This decreases +// the effectiveness of shrink wrapping but does not require edge splitting +// in the machine CFG. +// +// This shrink wrapping implementation uses an iterative analysis to determine +// which basic blocks require spills and restores for CSRs. +// +// This pass uses MachineDominators and MachineLoopInfo. Loop information +// is used to prevent placement of callee-saved register spills/restores +// in the bodies of loops. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "shrink-wrap" + +#include "PrologEpilogInserter.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/ADT/SparseBitVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Statistic.h" +#include + +using namespace llvm; + +STATISTIC(numSRReduced, "Number of CSR spills+restores reduced."); + +// Shrink Wrapping: +static cl::opt +ShrinkWrapping("shrink-wrap", + cl::desc("Shrink wrap callee-saved register spills/restores")); + +// Shrink wrap only the specified function, a debugging aid. +static cl::opt +ShrinkWrapFunc("shrink-wrap-func", cl::Hidden, + cl::desc("Shrink wrap the specified function"), + cl::value_desc("funcname"), + cl::init("")); + +// Debugging level for shrink wrapping. +enum ShrinkWrapDebugLevel { + None, BasicInfo, Iterations, Details +}; + +static cl::opt +ShrinkWrapDebugging("shrink-wrap-dbg", cl::Hidden, + cl::desc("Print shrink wrapping debugging information"), + cl::values( + clEnumVal(None , "disable debug output"), + clEnumVal(BasicInfo , "print basic DF sets"), + clEnumVal(Iterations, "print SR sets for each iteration"), + clEnumVal(Details , "print all DF sets"), + clEnumValEnd)); + + +void PEI::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + if (ShrinkWrapping || ShrinkWrapFunc != "") { + AU.addRequired(); + AU.addRequired(); + } + AU.addPreserved(); + AU.addPreserved(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +//===----------------------------------------------------------------------===// +// ShrinkWrapping implementation +//===----------------------------------------------------------------------===// + +// Convienences for dealing with machine loops. +MachineBasicBlock* PEI::getTopLevelLoopPreheader(MachineLoop* LP) { + assert(LP && "Machine loop is NULL."); + MachineBasicBlock* PHDR = LP->getLoopPreheader(); + MachineLoop* PLP = LP->getParentLoop(); + while (PLP) { + PHDR = PLP->getLoopPreheader(); + PLP = PLP->getParentLoop(); + } + return PHDR; +} + +MachineLoop* PEI::getTopLevelLoopParent(MachineLoop *LP) { + if (LP == 0) + return 0; + MachineLoop* PLP = LP->getParentLoop(); + while (PLP) { + LP = PLP; + PLP = PLP->getParentLoop(); + } + return LP; +} + +bool PEI::isReturnBlock(MachineBasicBlock* MBB) { + return (MBB && !MBB->empty() && MBB->back().getDesc().isReturn()); +} + +// Initialize shrink wrapping DFA sets, called before iterations. +void PEI::clearAnticAvailSets() { + AnticIn.clear(); + AnticOut.clear(); + AvailIn.clear(); + AvailOut.clear(); +} + +// Clear all sets constructed by shrink wrapping. +void PEI::clearAllSets() { + ReturnBlocks.clear(); + clearAnticAvailSets(); + UsedCSRegs.clear(); + CSRUsed.clear(); + TLLoops.clear(); + CSRSave.clear(); + CSRRestore.clear(); +} + +// Initialize all shrink wrapping data. +void PEI::initShrinkWrappingInfo() { + clearAllSets(); + EntryBlock = 0; +#ifndef NDEBUG + HasFastExitPath = false; +#endif + ShrinkWrapThisFunction = ShrinkWrapping; + // DEBUG: enable or disable shrink wrapping for the current function + // via --shrink-wrap-func=. +#ifndef NDEBUG + if (ShrinkWrapFunc != "") { + std::string MFName = MF->getFunction()->getName(); + ShrinkWrapThisFunction = (MFName == ShrinkWrapFunc); + } +#endif +} + + +/// placeCSRSpillsAndRestores - determine which MBBs of the function +/// need save, restore code for callee-saved registers by doing a DF analysis +/// similar to the one used in code motion (GVNPRE). This produces maps of MBBs +/// to sets of registers (CSRs) for saves and restores. MachineLoopInfo +/// is used to ensure that CSR save/restore code is not placed inside loops. +/// This function computes the maps of MBBs -> CSRs to spill and restore +/// in CSRSave, CSRRestore. +/// +/// If shrink wrapping is not being performed, place all spills in +/// the entry block, all restores in return blocks. In this case, +/// CSRSave has a single mapping, CSRRestore has mappings for each +/// return block. +/// +void PEI::placeCSRSpillsAndRestores(MachineFunction &Fn) { + + DEBUG(MF = &Fn); + + initShrinkWrappingInfo(); + + DEBUG(if (ShrinkWrapThisFunction) { + DOUT << "Place CSR spills/restores for " + << MF->getFunction()->getName() << "\n"; + }); + + if (calculateSets(Fn)) + placeSpillsAndRestores(Fn); +} + +/// calcAnticInOut - calculate the anticipated in/out reg sets +/// for the given MBB by looking forward in the MCFG at MBB's +/// successors. +/// +bool PEI::calcAnticInOut(MachineBasicBlock* MBB) { + bool changed = false; + + // AnticOut[MBB] = INTERSECT(AnticIn[S] for S in SUCCESSORS(MBB)) + SmallVector successors; + for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), + SE = MBB->succ_end(); SI != SE; ++SI) { + MachineBasicBlock* SUCC = *SI; + if (SUCC != MBB) + successors.push_back(SUCC); + } + + unsigned i = 0, e = successors.size(); + if (i != e) { + CSRegSet prevAnticOut = AnticOut[MBB]; + MachineBasicBlock* SUCC = successors[i]; + + AnticOut[MBB] = AnticIn[SUCC]; + for (++i; i != e; ++i) { + SUCC = successors[i]; + AnticOut[MBB] &= AnticIn[SUCC]; + } + if (prevAnticOut != AnticOut[MBB]) + changed = true; + } + + // AnticIn[MBB] = UNION(CSRUsed[MBB], AnticOut[MBB]); + CSRegSet prevAnticIn = AnticIn[MBB]; + AnticIn[MBB] = CSRUsed[MBB] | AnticOut[MBB]; + if (prevAnticIn |= AnticIn[MBB]) + changed = true; + return changed; +} + +/// calcAvailInOut - calculate the available in/out reg sets +/// for the given MBB by looking backward in the MCFG at MBB's +/// predecessors. +/// +bool PEI::calcAvailInOut(MachineBasicBlock* MBB) { + bool changed = false; + + // AvailIn[MBB] = INTERSECT(AvailOut[P] for P in PREDECESSORS(MBB)) + SmallVector predecessors; + for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), + PE = MBB->pred_end(); PI != PE; ++PI) { + MachineBasicBlock* PRED = *PI; + if (PRED != MBB) + predecessors.push_back(PRED); + } + + unsigned i = 0, e = predecessors.size(); + if (i != e) { + CSRegSet prevAvailIn = AvailIn[MBB]; + MachineBasicBlock* PRED = predecessors[i]; + + AvailIn[MBB] = AvailOut[PRED]; + for (++i; i != e; ++i) { + PRED = predecessors[i]; + AvailIn[MBB] &= AvailOut[PRED]; + } + if (prevAvailIn != AvailIn[MBB]) + changed = true; + } + + // AvailOut[MBB] = UNION(CSRUsed[MBB], AvailIn[MBB]); + CSRegSet prevAvailOut = AvailOut[MBB]; + AvailOut[MBB] = CSRUsed[MBB] | AvailIn[MBB]; + if (prevAvailOut |= AvailOut[MBB]) + changed = true; + return changed; +} + +/// calculateAnticAvail - build the sets anticipated and available +/// registers in the MCFG of the current function iteratively, +/// doing a combined forward and backward analysis. +/// +void PEI::calculateAnticAvail(MachineFunction &Fn) { + // Initialize data flow sets. + clearAnticAvailSets(); + + // Calulate Antic{In,Out} and Avail{In,Out} iteratively on the MCFG. + bool changed = true; + unsigned iterations = 0; + while (changed) { + changed = false; + ++iterations; + for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end(); + MBBI != MBBE; ++MBBI) { + MachineBasicBlock* MBB = MBBI; + + // Calculate anticipated in, out regs at MBB from + // anticipated at successors of MBB. + changed |= calcAnticInOut(MBB); + + // Calculate available in, out regs at MBB from + // available at predecessors of MBB. + changed |= calcAvailInOut(MBB); + } + } + + DEBUG(if (ShrinkWrapDebugging >= Details) { + DOUT << "-----------------------------------------------------------\n"; + DOUT << " Antic/Avail Sets:\n"; + DOUT << "-----------------------------------------------------------\n"; + DOUT << "iterations = " << iterations << "\n"; + DOUT << "-----------------------------------------------------------\n"; + DOUT << "MBB | USED | ANTIC_IN | ANTIC_OUT | AVAIL_IN | AVAIL_OUT\n"; + DOUT << "-----------------------------------------------------------\n"; + for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end(); + MBBI != MBBE; ++MBBI) { + MachineBasicBlock* MBB = MBBI; + dumpSets(MBB); + } + DOUT << "-----------------------------------------------------------\n"; + }); +} + +/// propagateUsesAroundLoop - copy used register info from MBB to all blocks +/// of the loop given by LP and its parent loops. This prevents spills/restores +/// from being placed in the bodies of loops. +/// +void PEI::propagateUsesAroundLoop(MachineBasicBlock* MBB, MachineLoop* LP) { + if (! MBB || !LP) + return; + + std::vector loopBlocks = LP->getBlocks(); + for (unsigned i = 0, e = loopBlocks.size(); i != e; ++i) { + MachineBasicBlock* LBB = loopBlocks[i]; + if (LBB == MBB) + continue; + if (CSRUsed[LBB].contains(CSRUsed[MBB])) + continue; + CSRUsed[LBB] |= CSRUsed[MBB]; + } +} + +/// calculateSets - collect the CSRs used in this function, compute +/// the DF sets that describe the initial minimal regions in the +/// Machine CFG around which CSR spills and restores must be placed. +/// +/// Additionally, this function decides if shrink wrapping should +/// be disabled for the current function, checking the following: +/// 1. the current function has more than 500 MBBs: heuristic limit +/// on function size to reduce compile time impact of the current +/// iterative algorithm. +/// 2. all CSRs are used in the entry block. +/// 3. all CSRs are used in all immediate successors of the entry block. +/// 4. all CSRs are used in a subset of blocks, each of which dominates +/// all return blocks. These blocks, taken as a subgraph of the MCFG, +/// are equivalent to the entry block since all execution paths pass +/// through them. +/// +bool PEI::calculateSets(MachineFunction &Fn) { + // Sets used to compute spill, restore placement sets. + const std::vector CSI = + Fn.getFrameInfo()->getCalleeSavedInfo(); + + // If no CSRs used, we are done. + if (CSI.empty()) { + DEBUG(if (ShrinkWrapThisFunction) + DOUT << "DISABLED: " << Fn.getFunction()->getName() + << ": uses no callee-saved registers\n"); + return false; + } + + // Save refs to entry and return blocks. + EntryBlock = Fn.begin(); + for (MachineFunction::iterator MBB = Fn.begin(), E = Fn.end(); + MBB != E; ++MBB) + if (isReturnBlock(MBB)) + ReturnBlocks.push_back(MBB); + + // Determine if this function has fast exit paths. + DEBUG(if (ShrinkWrapThisFunction) + findFastExitPath()); + + // Limit shrink wrapping via the current iterative bit vector + // implementation to functions with <= 500 MBBs. + if (Fn.size() > 500) { + DEBUG(if (ShrinkWrapThisFunction) + DOUT << "DISABLED: " << Fn.getFunction()->getName() + << ": too large (" << Fn.size() << " MBBs)\n"); + ShrinkWrapThisFunction = false; + } + + // Return now if not shrink wrapping. + if (! ShrinkWrapThisFunction) + return false; + + // Collect set of used CSRs. + for (unsigned inx = 0, e = CSI.size(); inx != e; ++inx) { + UsedCSRegs.set(inx); + } + + // Walk instructions in all MBBs, create CSRUsed[] sets, choose + // whether or not to shrink wrap this function. + MachineLoopInfo &LI = getAnalysis(); + MachineDominatorTree &DT = getAnalysis(); + const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo(); + + bool allCSRUsesInEntryBlock = true; + for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end(); + MBBI != MBBE; ++MBBI) { + MachineBasicBlock* MBB = MBBI; + for (MachineBasicBlock::iterator I = MBB->begin(); I != MBB->end(); ++I) { + for (unsigned inx = 0, e = CSI.size(); inx != e; ++inx) { + unsigned Reg = CSI[inx].getReg(); + // If instruction I reads or modifies Reg, add it to UsedCSRegs, + // CSRUsed map for the current block. + for (unsigned opInx = 0, opEnd = I->getNumOperands(); + opInx != opEnd; ++opInx) { + const MachineOperand &MO = I->getOperand(opInx); + if (! (MO.isReg() && (MO.isUse() || MO.isDef()))) + continue; + unsigned MOReg = MO.getReg(); + if (!MOReg) + continue; + if (MOReg == Reg || + (TargetRegisterInfo::isPhysicalRegister(MOReg) && + TargetRegisterInfo::isPhysicalRegister(Reg) && + TRI->isSubRegister(Reg, MOReg))) { + // CSR Reg is defined/used in block MBB. + CSRUsed[MBB].set(inx); + // Check for uses in EntryBlock. + if (MBB != EntryBlock) + allCSRUsesInEntryBlock = false; + } + } + } + } + + if (CSRUsed[MBB].empty()) + continue; + + // Propagate CSRUsed[MBB] in loops + if (MachineLoop* LP = LI.getLoopFor(MBB)) { + // Add top level loop to work list. + MachineBasicBlock* HDR = getTopLevelLoopPreheader(LP); + MachineLoop* PLP = getTopLevelLoopParent(LP); + + if (! HDR) { + HDR = PLP->getHeader(); + assert(HDR->pred_size() > 0 && "Loop header has no predecessors?"); + MachineBasicBlock::pred_iterator PI = HDR->pred_begin(); + HDR = *PI; + } + TLLoops[HDR] = PLP; + + // Push uses from inside loop to its parent loops, + // or to all other MBBs in its loop. + if (LP->getLoopDepth() > 1) { + for (MachineLoop* PLP = LP->getParentLoop(); PLP; + PLP = PLP->getParentLoop()) { + propagateUsesAroundLoop(MBB, PLP); + } + } else { + propagateUsesAroundLoop(MBB, LP); + } + } + } + + if (allCSRUsesInEntryBlock) { + DEBUG(DOUT << "DISABLED: " << Fn.getFunction()->getName() + << ": all CSRs used in EntryBlock\n"); + ShrinkWrapThisFunction = false; + } else { + bool allCSRsUsedInEntryFanout = true; + for (MachineBasicBlock::succ_iterator SI = EntryBlock->succ_begin(), + SE = EntryBlock->succ_end(); SI != SE; ++SI) { + MachineBasicBlock* SUCC = *SI; + if (CSRUsed[SUCC] != UsedCSRegs) + allCSRsUsedInEntryFanout = false; + } + if (allCSRsUsedInEntryFanout) { + DEBUG(DOUT << "DISABLED: " << Fn.getFunction()->getName() + << ": all CSRs used in imm successors of EntryBlock\n"); + ShrinkWrapThisFunction = false; + } + } + + if (ShrinkWrapThisFunction) { + // Check if MBB uses CSRs and dominates all exit nodes. + // Such nodes are equiv. to the entry node w.r.t. + // CSR uses: every path through the function must + // pass through this node. If each CSR is used at least + // once by these nodes, shrink wrapping is disabled. + CSRegSet CSRUsedInChokePoints; + for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end(); + MBBI != MBBE; ++MBBI) { + MachineBasicBlock* MBB = MBBI; + if (MBB == EntryBlock || CSRUsed[MBB].empty() || MBB->succ_size() < 1) + continue; + bool dominatesExitNodes = true; + for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri) + if (! DT.dominates(MBB, ReturnBlocks[ri])) { + dominatesExitNodes = false; + break; + } + if (dominatesExitNodes) { + CSRUsedInChokePoints |= CSRUsed[MBB]; + if (CSRUsedInChokePoints == UsedCSRegs) { + DEBUG(DOUT << "DISABLED: " << Fn.getFunction()->getName() + << ": all CSRs used in choke point(s) at " + << getBasicBlockName(MBB) << "\n"); + ShrinkWrapThisFunction = false; + break; + } + } + } + } + + // Return now if we have decided not to apply shrink wrapping + // to the current function. + if (! ShrinkWrapThisFunction) + return false; + + DEBUG({ + DOUT << "ENABLED: " << Fn.getFunction()->getName(); + if (HasFastExitPath) + DOUT << " (fast exit path)"; + DOUT << "\n"; + if (ShrinkWrapDebugging >= BasicInfo) { + DOUT << "------------------------------" + << "-----------------------------\n"; + DOUT << "UsedCSRegs = " << stringifyCSRegSet(UsedCSRegs) << "\n"; + if (ShrinkWrapDebugging >= Details) { + DOUT << "------------------------------" + << "-----------------------------\n"; + dumpAllUsed(); + } + } + }); + + // Build initial DF sets to determine minimal regions in the + // Machine CFG around which CSRs must be spilled and restored. + calculateAnticAvail(Fn); + + return true; +} + +/// addUsesForMEMERegion - add uses of CSRs spilled or restored in +/// multi-entry, multi-exit (MEME) regions so spill and restore +/// placement will not break code that enters or leaves a +/// shrink-wrapped region by inducing spills with no matching +/// restores or restores with no matching spills. A MEME region +/// is a subgraph of the MCFG with multiple entry edges, multiple +/// exit edges, or both. This code propagates use information +/// through the MCFG until all paths requiring spills and restores +/// _outside_ the computed minimal placement regions have been covered. +/// +bool PEI::addUsesForMEMERegion(MachineBasicBlock* MBB, + SmallVector& blks) { + if (MBB->succ_size() < 2 && MBB->pred_size() < 2) { + bool processThisBlock = false; + for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), + SE = MBB->succ_end(); SI != SE; ++SI) { + MachineBasicBlock* SUCC = *SI; + if (SUCC->pred_size() > 1) { + processThisBlock = true; + break; + } + } + if (!CSRRestore[MBB].empty() && MBB->succ_size() > 0) { + for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), + PE = MBB->pred_end(); PI != PE; ++PI) { + MachineBasicBlock* PRED = *PI; + if (PRED->succ_size() > 1) { + processThisBlock = true; + break; + } + } + } + if (! processThisBlock) + return false; + } + + CSRegSet prop; + if (!CSRSave[MBB].empty()) + prop = CSRSave[MBB]; + else if (!CSRRestore[MBB].empty()) + prop = CSRRestore[MBB]; + else + prop = CSRUsed[MBB]; + if (prop.empty()) + return false; + + // Propagate selected bits to successors, predecessors of MBB. + bool addedUses = false; + for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), + SE = MBB->succ_end(); SI != SE; ++SI) { + MachineBasicBlock* SUCC = *SI; + // Self-loop + if (SUCC == MBB) + continue; + if (! CSRUsed[SUCC].contains(prop)) { + CSRUsed[SUCC] |= prop; + addedUses = true; + blks.push_back(SUCC); + DEBUG(if (ShrinkWrapDebugging >= Iterations) + DOUT << getBasicBlockName(MBB) + << "(" << stringifyCSRegSet(prop) << ")->" + << "successor " << getBasicBlockName(SUCC) << "\n"); + } + } + for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), + PE = MBB->pred_end(); PI != PE; ++PI) { + MachineBasicBlock* PRED = *PI; + // Self-loop + if (PRED == MBB) + continue; + if (! CSRUsed[PRED].contains(prop)) { + CSRUsed[PRED] |= prop; + addedUses = true; + blks.push_back(PRED); + DEBUG(if (ShrinkWrapDebugging >= Iterations) + DOUT << getBasicBlockName(MBB) + << "(" << stringifyCSRegSet(prop) << ")->" + << "predecessor " << getBasicBlockName(PRED) << "\n"); + } + } + return addedUses; +} + +/// addUsesForTopLevelLoops - add uses for CSRs used inside top +/// level loops to the exit blocks of those loops. +/// +bool PEI::addUsesForTopLevelLoops(SmallVector& blks) { + bool addedUses = false; + + // Place restores for top level loops where needed. + for (DenseMap::iterator + I = TLLoops.begin(), E = TLLoops.end(); I != E; ++I) { + MachineBasicBlock* MBB = I->first; + MachineLoop* LP = I->second; + MachineBasicBlock* HDR = LP->getHeader(); + SmallVector exitBlocks; + CSRegSet loopSpills; + + loopSpills = CSRSave[MBB]; + if (CSRSave[MBB].empty()) { + loopSpills = CSRUsed[HDR]; + assert(!loopSpills.empty() && "No CSRs used in loop?"); + } else if (CSRRestore[MBB].contains(CSRSave[MBB])) + continue; + + LP->getExitBlocks(exitBlocks); + assert(exitBlocks.size() > 0 && "Loop has no top level exit blocks?"); + for (unsigned i = 0, e = exitBlocks.size(); i != e; ++i) { + MachineBasicBlock* EXB = exitBlocks[i]; + if (! CSRUsed[EXB].contains(loopSpills)) { + CSRUsed[EXB] |= loopSpills; + addedUses = true; + DEBUG(if (ShrinkWrapDebugging >= Iterations) + DOUT << "LOOP " << getBasicBlockName(MBB) + << "(" << stringifyCSRegSet(loopSpills) << ")->" + << getBasicBlockName(EXB) << "\n"); + if (EXB->succ_size() > 1 || EXB->pred_size() > 1) + blks.push_back(EXB); + } + } + } + return addedUses; +} + +/// calcSpillPlacements - determine which CSRs should be spilled +/// in MBB using AnticIn sets of MBB's predecessors, keeping track +/// of changes to spilled reg sets. Add MBB to the set of blocks +/// that need to be processed for propagating use info to cover +/// multi-entry/exit regions. +/// +bool PEI::calcSpillPlacements(MachineBasicBlock* MBB, + SmallVector &blks, + CSRegBlockMap &prevSpills) { + bool placedSpills = false; + // Intersect (CSRegs - AnticIn[P]) for P in Predecessors(MBB) + CSRegSet anticInPreds; + SmallVector predecessors; + for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), + PE = MBB->pred_end(); PI != PE; ++PI) { + MachineBasicBlock* PRED = *PI; + if (PRED != MBB) + predecessors.push_back(PRED); + } + unsigned i = 0, e = predecessors.size(); + if (i != e) { + MachineBasicBlock* PRED = predecessors[i]; + anticInPreds = UsedCSRegs - AnticIn[PRED]; + for (++i; i != e; ++i) { + PRED = predecessors[i]; + anticInPreds &= (UsedCSRegs - AnticIn[PRED]); + } + } else { + // Handle uses in entry blocks (which have no predecessors). + // This is necessary because the DFA formulation assumes the + // entry and (multiple) exit nodes cannot have CSR uses, which + // is not the case in the real world. + anticInPreds = UsedCSRegs; + } + // Compute spills required at MBB: + CSRSave[MBB] |= (AnticIn[MBB] - AvailIn[MBB]) & anticInPreds; + + if (! CSRSave[MBB].empty()) { + if (MBB == EntryBlock) { + for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri) + CSRRestore[ReturnBlocks[ri]] |= CSRSave[MBB]; + } else { + // Reset all regs spilled in MBB that are also spilled in EntryBlock. + if (CSRSave[EntryBlock].intersects(CSRSave[MBB])) { + CSRSave[MBB] = CSRSave[MBB] - CSRSave[EntryBlock]; + } + } + } + placedSpills = (CSRSave[MBB] != prevSpills[MBB]); + prevSpills[MBB] = CSRSave[MBB]; + // Remember this block for adding restores to successor + // blocks for multi-entry region. + if (placedSpills) + blks.push_back(MBB); + + DEBUG(if (! CSRSave[MBB].empty() && ShrinkWrapDebugging >= Iterations) + DOUT << "SAVE[" << getBasicBlockName(MBB) << "] = " + << stringifyCSRegSet(CSRSave[MBB]) << "\n"); + + return placedSpills; +} + +/// calcRestorePlacements - determine which CSRs should be restored +/// in MBB using AvailOut sets of MBB's succcessors, keeping track +/// of changes to restored reg sets. Add MBB to the set of blocks +/// that need to be processed for propagating use info to cover +/// multi-entry/exit regions. +/// +bool PEI::calcRestorePlacements(MachineBasicBlock* MBB, + SmallVector &blks, + CSRegBlockMap &prevRestores) { + bool placedRestores = false; + // Intersect (CSRegs - AvailOut[S]) for S in Successors(MBB) + CSRegSet availOutSucc; + SmallVector successors; + for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), + SE = MBB->succ_end(); SI != SE; ++SI) { + MachineBasicBlock* SUCC = *SI; + if (SUCC != MBB) + successors.push_back(SUCC); + } + unsigned i = 0, e = successors.size(); + if (i != e) { + MachineBasicBlock* SUCC = successors[i]; + availOutSucc = UsedCSRegs - AvailOut[SUCC]; + for (++i; i != e; ++i) { + SUCC = successors[i]; + availOutSucc &= (UsedCSRegs - AvailOut[SUCC]); + } + } else { + if (! CSRUsed[MBB].empty() || ! AvailOut[MBB].empty()) { + // Handle uses in return blocks (which have no successors). + // This is necessary because the DFA formulation assumes the + // entry and (multiple) exit nodes cannot have CSR uses, which + // is not the case in the real world. + availOutSucc = UsedCSRegs; + } + } + // Compute restores required at MBB: + CSRRestore[MBB] |= (AvailOut[MBB] - AnticOut[MBB]) & availOutSucc; + + // Postprocess restore placements at MBB. + // Remove the CSRs that are restored in the return blocks. + // Lest this be confusing, note that: + // CSRSave[EntryBlock] == CSRRestore[B] for all B in ReturnBlocks. + if (MBB->succ_size() && ! CSRRestore[MBB].empty()) { + if (! CSRSave[EntryBlock].empty()) + CSRRestore[MBB] = CSRRestore[MBB] - CSRSave[EntryBlock]; + } + placedRestores = (CSRRestore[MBB] != prevRestores[MBB]); + prevRestores[MBB] = CSRRestore[MBB]; + // Remember this block for adding saves to predecessor + // blocks for multi-entry region. + if (placedRestores) + blks.push_back(MBB); + + DEBUG(if (! CSRRestore[MBB].empty() && ShrinkWrapDebugging >= Iterations) + DOUT << "RESTORE[" << getBasicBlockName(MBB) << "] = " + << stringifyCSRegSet(CSRRestore[MBB]) << "\n"); + + return placedRestores; +} + +/// placeSpillsAndRestores - place spills and restores of CSRs +/// used in MBBs in minimal regions that contain the uses. +/// +void PEI::placeSpillsAndRestores(MachineFunction &Fn) { + CSRegBlockMap prevCSRSave; + CSRegBlockMap prevCSRRestore; + SmallVector cvBlocks, ncvBlocks; + bool changed = true; + unsigned iterations = 0; + + // Iterate computation of spill and restore placements in the MCFG until: + // 1. CSR use info has been fully propagated around the MCFG, and + // 2. computation of CSRSave[], CSRRestore[] reach fixed points. + while (changed) { + changed = false; + ++iterations; + + DEBUG(if (ShrinkWrapDebugging >= Iterations) + DOUT << "iter " << iterations + << " --------------------------------------------------\n"); + + // Calculate CSR{Save,Restore} sets using Antic, Avail on the MCFG, + // which determines the placements of spills and restores. + // Keep track of changes to spills, restores in each iteration to + // minimize the total iterations. + bool SRChanged = false; + for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end(); + MBBI != MBBE; ++MBBI) { + MachineBasicBlock* MBB = MBBI; + + // Place spills for CSRs in MBB. + SRChanged |= calcSpillPlacements(MBB, cvBlocks, prevCSRSave); + + // Place restores for CSRs in MBB. + SRChanged |= calcRestorePlacements(MBB, cvBlocks, prevCSRRestore); + } + + // Add uses of CSRs used inside loops where needed. + changed |= addUsesForTopLevelLoops(cvBlocks); + + // Add uses for CSRs spilled or restored at branch, join points. + if (changed || SRChanged) { + while (! cvBlocks.empty()) { + MachineBasicBlock* MBB = cvBlocks.pop_back_val(); + changed |= addUsesForMEMERegion(MBB, ncvBlocks); + } + if (! ncvBlocks.empty()) { + cvBlocks = ncvBlocks; + ncvBlocks.clear(); + } + } + + if (changed) { + calculateAnticAvail(Fn); + CSRSave.clear(); + CSRRestore.clear(); + } + } + + // Check for effectiveness: + // SR0 = {r | r in CSRSave[EntryBlock], CSRRestore[RB], RB in ReturnBlocks} + // numSRReduced = |(UsedCSRegs - SR0)|, approx. SR0 by CSRSave[EntryBlock] + // Gives a measure of how many CSR spills have been moved from EntryBlock + // to minimal regions enclosing their uses. + CSRegSet notSpilledInEntryBlock = (UsedCSRegs - CSRSave[EntryBlock]); + unsigned numSRReducedThisFunc = notSpilledInEntryBlock.count(); + numSRReduced += numSRReducedThisFunc; + DEBUG(if (ShrinkWrapDebugging >= BasicInfo) { + DOUT << "-----------------------------------------------------------\n"; + DOUT << "total iterations = " << iterations << " ( " + << Fn.getFunction()->getName() + << " " << numSRReducedThisFunc + << " " << Fn.size() + << " )\n"; + DOUT << "-----------------------------------------------------------\n"; + dumpSRSets(); + DOUT << "-----------------------------------------------------------\n"; + if (numSRReducedThisFunc) + verifySpillRestorePlacement(); + }); +} + +// Debugging methods. +#ifndef NDEBUG +/// findFastExitPath - debugging method used to detect functions +/// with at least one path from the entry block to a return block +/// directly or which has a very small number of edges. +/// +void PEI::findFastExitPath() { + if (! EntryBlock) + return; + // Fina a path from EntryBlock to any return block that does not branch: + // Entry + // | ... + // v | + // B1<-----+ + // | + // v + // Return + for (MachineBasicBlock::succ_iterator SI = EntryBlock->succ_begin(), + SE = EntryBlock->succ_end(); SI != SE; ++SI) { + MachineBasicBlock* SUCC = *SI; + + // Assume positive, disprove existence of fast path. + HasFastExitPath = true; + + // Check the immediate successors. + if (isReturnBlock(SUCC)) { + if (ShrinkWrapDebugging >= BasicInfo) + DOUT << "Fast exit path: " << getBasicBlockName(EntryBlock) + << "->" << getBasicBlockName(SUCC) << "\n"; + break; + } + // Traverse df from SUCC, look for a branch block. + std::string exitPath = getBasicBlockName(SUCC); + for (df_iterator BI = df_begin(SUCC), + BE = df_end(SUCC); BI != BE; ++BI) { + MachineBasicBlock* SBB = *BI; + // Reject paths with branch nodes. + if (SBB->succ_size() > 1) { + HasFastExitPath = false; + break; + } + exitPath += "->" + getBasicBlockName(SBB); + } + if (HasFastExitPath) { + if (ShrinkWrapDebugging >= BasicInfo) + DOUT << "Fast exit path: " << getBasicBlockName(EntryBlock) + << "->" << exitPath << "\n"; + break; + } + } +} + +/// verifySpillRestorePlacement - check the current spill/restore +/// sets for safety. Attempt to find spills without restores or +/// restores without spills. +/// Spills: walk df from each MBB in spill set ensuring that +/// all CSRs spilled at MMBB are restored on all paths +/// from MBB to all exit blocks. +/// Restores: walk idf from each MBB in restore set ensuring that +/// all CSRs restored at MBB are spilled on all paths +/// reaching MBB. +/// +void PEI::verifySpillRestorePlacement() { + unsigned numReturnBlocks = 0; + for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); + MBBI != MBBE; ++MBBI) { + MachineBasicBlock* MBB = MBBI; + if (isReturnBlock(MBB) || MBB->succ_size() == 0) + ++numReturnBlocks; + } + for (CSRegBlockMap::iterator BI = CSRSave.begin(), + BE = CSRSave.end(); BI != BE; ++BI) { + MachineBasicBlock* MBB = BI->first; + CSRegSet spilled = BI->second; + CSRegSet restored; + + if (spilled.empty()) + continue; + + DOUT << "SAVE[" << getBasicBlockName(MBB) << "] = " + << stringifyCSRegSet(spilled) + << " RESTORE[" << getBasicBlockName(MBB) << "] = " + << stringifyCSRegSet(CSRRestore[MBB]) << "\n"; + + if (CSRRestore[MBB].intersects(spilled)) { + restored |= (CSRRestore[MBB] & spilled); + } + + // Walk depth first from MBB to find restores of all CSRs spilled at MBB: + // we must find restores for all spills w/no intervening spills on all + // paths from MBB to all return blocks. + for (df_iterator BI = df_begin(MBB), + BE = df_end(MBB); BI != BE; ++BI) { + MachineBasicBlock* SBB = *BI; + if (SBB == MBB) + continue; + // Stop when we encounter spills of any CSRs spilled at MBB that + // have not yet been seen to be restored. + if (CSRSave[SBB].intersects(spilled) && + !restored.contains(CSRSave[SBB] & spilled)) + break; + // Collect the CSRs spilled at MBB that are restored + // at this DF successor of MBB. + if (CSRRestore[SBB].intersects(spilled)) + restored |= (CSRRestore[SBB] & spilled); + // If we are at a retun block, check that the restores + // we have seen so far exhaust the spills at MBB, then + // reset the restores. + if (isReturnBlock(SBB) || SBB->succ_size() == 0) { + if (restored != spilled) { + CSRegSet notRestored = (spilled - restored); + DOUT << MF->getFunction()->getName() << ": " + << stringifyCSRegSet(notRestored) + << " spilled at " << getBasicBlockName(MBB) + << " are never restored on path to return " + << getBasicBlockName(SBB) << "\n"; + } + restored.clear(); + } + } + } + + // Check restore placements. + for (CSRegBlockMap::iterator BI = CSRRestore.begin(), + BE = CSRRestore.end(); BI != BE; ++BI) { + MachineBasicBlock* MBB = BI->first; + CSRegSet restored = BI->second; + CSRegSet spilled; + + if (restored.empty()) + continue; + + DOUT << "SAVE[" << getBasicBlockName(MBB) << "] = " + << stringifyCSRegSet(CSRSave[MBB]) + << " RESTORE[" << getBasicBlockName(MBB) << "] = " + << stringifyCSRegSet(restored) << "\n"; + + if (CSRSave[MBB].intersects(restored)) { + spilled |= (CSRSave[MBB] & restored); + } + // Walk inverse depth first from MBB to find spills of all + // CSRs restored at MBB: + for (idf_iterator BI = idf_begin(MBB), + BE = idf_end(MBB); BI != BE; ++BI) { + MachineBasicBlock* PBB = *BI; + if (PBB == MBB) + continue; + // Stop when we encounter restores of any CSRs restored at MBB that + // have not yet been seen to be spilled. + if (CSRRestore[PBB].intersects(restored) && + !spilled.contains(CSRRestore[PBB] & restored)) + break; + // Collect the CSRs restored at MBB that are spilled + // at this DF predecessor of MBB. + if (CSRSave[PBB].intersects(restored)) + spilled |= (CSRSave[PBB] & restored); + } + if (spilled != restored) { + CSRegSet notSpilled = (restored - spilled); + DOUT << MF->getFunction()->getName() << ": " + << stringifyCSRegSet(notSpilled) + << " restored at " << getBasicBlockName(MBB) + << " are never spilled\n"; + } + } +} + +// Debugging print methods. +std::string PEI::getBasicBlockName(const MachineBasicBlock* MBB) { + std::ostringstream name; + if (MBB) { + if (MBB->getBasicBlock()) + name << MBB->getBasicBlock()->getName(); + else + name << "_MBB_" << MBB->getNumber(); + } + return name.str(); +} + +std::string PEI::stringifyCSRegSet(const CSRegSet& s) { + const TargetRegisterInfo* TRI = MF->getTarget().getRegisterInfo(); + const std::vector CSI = + MF->getFrameInfo()->getCalleeSavedInfo(); + + std::ostringstream srep; + if (CSI.size() == 0) { + srep << "[]"; + return srep.str(); + } + srep << "["; + CSRegSet::iterator I = s.begin(), E = s.end(); + if (I != E) { + unsigned reg = CSI[*I].getReg(); + srep << TRI->getName(reg); + for (++I; I != E; ++I) { + reg = CSI[*I].getReg(); + srep << ","; + srep << TRI->getName(reg); + } + } + srep << "]"; + return srep.str(); +} + +void PEI::dumpSet(const CSRegSet& s) { + DOUT << stringifyCSRegSet(s) << "\n"; +} + +void PEI::dumpUsed(MachineBasicBlock* MBB) { + if (MBB) { + DOUT << "CSRUsed[" << getBasicBlockName(MBB) << "] = " + << stringifyCSRegSet(CSRUsed[MBB]) << "\n"; + } +} + +void PEI::dumpAllUsed() { + for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); + MBBI != MBBE; ++MBBI) { + MachineBasicBlock* MBB = MBBI; + dumpUsed(MBB); + } +} + +void PEI::dumpSets(MachineBasicBlock* MBB) { + if (MBB) { + DOUT << getBasicBlockName(MBB) << " | " + << stringifyCSRegSet(CSRUsed[MBB]) << " | " + << stringifyCSRegSet(AnticIn[MBB]) << " | " + << stringifyCSRegSet(AnticOut[MBB]) << " | " + << stringifyCSRegSet(AvailIn[MBB]) << " | " + << stringifyCSRegSet(AvailOut[MBB]) << "\n"; + } +} + +void PEI::dumpSets1(MachineBasicBlock* MBB) { + if (MBB) { + DOUT << getBasicBlockName(MBB) << " | " + << stringifyCSRegSet(CSRUsed[MBB]) << " | " + << stringifyCSRegSet(AnticIn[MBB]) << " | " + << stringifyCSRegSet(AnticOut[MBB]) << " | " + << stringifyCSRegSet(AvailIn[MBB]) << " | " + << stringifyCSRegSet(AvailOut[MBB]) << " | " + << stringifyCSRegSet(CSRSave[MBB]) << " | " + << stringifyCSRegSet(CSRRestore[MBB]) << "\n"; + } +} + +void PEI::dumpAllSets() { + for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); + MBBI != MBBE; ++MBBI) { + MachineBasicBlock* MBB = MBBI; + dumpSets1(MBB); + } +} + +void PEI::dumpSRSets() { + for (MachineFunction::iterator MBB = MF->begin(), E = MF->end(); + MBB != E; ++MBB) { + if (! CSRSave[MBB].empty()) { + DOUT << "SAVE[" << getBasicBlockName(MBB) << "] = " + << stringifyCSRegSet(CSRSave[MBB]); + if (CSRRestore[MBB].empty()) + DOUT << "\n"; + } + if (! CSRRestore[MBB].empty()) { + if (! CSRSave[MBB].empty()) + DOUT << " "; + DOUT << "RESTORE[" << getBasicBlockName(MBB) << "] = " + << stringifyCSRegSet(CSRRestore[MBB]) << "\n"; + } + } +} +#endif diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp new file mode 100644 index 000000000000..2bc234f7d09e --- /dev/null +++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp @@ -0,0 +1,2827 @@ +//===-- SimpleRegisterCoalescing.cpp - Register Coalescing ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a simple register coalescing pass that attempts to +// aggressively coalesce every register copy that it can. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "regcoalescing" +#include "SimpleRegisterCoalescing.h" +#include "VirtRegMap.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/Value.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/RegisterCoalescer.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +#include +#include +using namespace llvm; + +STATISTIC(numJoins , "Number of interval joins performed"); +STATISTIC(numCrossRCs , "Number of cross class joins performed"); +STATISTIC(numCommutes , "Number of instruction commuting performed"); +STATISTIC(numExtends , "Number of copies extended"); +STATISTIC(NumReMats , "Number of instructions re-materialized"); +STATISTIC(numPeep , "Number of identity moves eliminated after coalescing"); +STATISTIC(numAborts , "Number of times interval joining aborted"); +STATISTIC(numDeadValNo, "Number of valno def marked dead"); + +char SimpleRegisterCoalescing::ID = 0; +static cl::opt +EnableJoining("join-liveintervals", + cl::desc("Coalesce copies (default=true)"), + cl::init(true)); + +static cl::opt +NewHeuristic("new-coalescer-heuristic", + cl::desc("Use new coalescer heuristic"), + cl::init(false), cl::Hidden); + +static cl::opt +CrossClassJoin("join-cross-class-copies", + cl::desc("Coalesce cross register class copies"), + cl::init(false), cl::Hidden); + +static cl::opt +PhysJoinTweak("tweak-phys-join-heuristics", + cl::desc("Tweak heuristics for joining phys reg with vr"), + cl::init(false), cl::Hidden); + +static RegisterPass +X("simple-register-coalescing", "Simple Register Coalescing"); + +// Declare that we implement the RegisterCoalescer interface +static RegisterAnalysisGroup V(X); + +const PassInfo *const llvm::SimpleRegisterCoalescingID = &X; + +void SimpleRegisterCoalescing::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); + AU.addPreservedID(MachineDominatorsID); + if (StrongPHIElim) + AU.addPreservedID(StrongPHIEliminationID); + else + AU.addPreservedID(PHIEliminationID); + AU.addPreservedID(TwoAddressInstructionPassID); + MachineFunctionPass::getAnalysisUsage(AU); +} + +/// AdjustCopiesBackFrom - We found a non-trivially-coalescable copy with IntA +/// being the source and IntB being the dest, thus this defines a value number +/// in IntB. If the source value number (in IntA) is defined by a copy from B, +/// see if we can merge these two pieces of B into a single value number, +/// eliminating a copy. For example: +/// +/// A3 = B0 +/// ... +/// B1 = A3 <- this copy +/// +/// In this case, B0 can be extended to where the B1 copy lives, allowing the B1 +/// value number to be replaced with B0 (which simplifies the B liveinterval). +/// +/// This returns true if an interval was modified. +/// +bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA, + LiveInterval &IntB, + MachineInstr *CopyMI) { + unsigned CopyIdx = li_->getDefIndex(li_->getInstructionIndex(CopyMI)); + + // BValNo is a value number in B that is defined by a copy from A. 'B3' in + // the example above. + LiveInterval::iterator BLR = IntB.FindLiveRangeContaining(CopyIdx); + assert(BLR != IntB.end() && "Live range not found!"); + VNInfo *BValNo = BLR->valno; + + // Get the location that B is defined at. Two options: either this value has + // an unknown definition point or it is defined at CopyIdx. If unknown, we + // can't process it. + if (!BValNo->copy) return false; + assert(BValNo->def == CopyIdx && "Copy doesn't define the value?"); + + // AValNo is the value number in A that defines the copy, A3 in the example. + LiveInterval::iterator ALR = IntA.FindLiveRangeContaining(CopyIdx-1); + assert(ALR != IntA.end() && "Live range not found!"); + VNInfo *AValNo = ALR->valno; + // If it's re-defined by an early clobber somewhere in the live range, then + // it's not safe to eliminate the copy. FIXME: This is a temporary workaround. + // See PR3149: + // 172 %ECX = MOV32rr %reg1039 + // 180 INLINEASM , 10, %EAX, 14, %ECX, 9, %EAX, + // 36, , 1, %reg0, 0, 9, %ECX, 36, , 1, %reg0, 0 + // 188 %EAX = MOV32rr %EAX + // 196 %ECX = MOV32rr %ECX + // 204 %ECX = MOV32rr %ECX + // 212 %EAX = MOV32rr %EAX + // 220 %EAX = MOV32rr %EAX + // 228 %reg1039 = MOV32rr %ECX + // The early clobber operand ties ECX input to the ECX def. + // + // The live interval of ECX is represented as this: + // %reg20,inf = [46,47:1)[174,230:0) 0@174-(230) 1@46-(47) + // The coalescer has no idea there was a def in the middle of [174,230]. + if (AValNo->redefByEC) + return false; + + // If AValNo is defined as a copy from IntB, we can potentially process this. + // Get the instruction that defines this value number. + unsigned SrcReg = li_->getVNInfoSourceReg(AValNo); + if (!SrcReg) return false; // Not defined by a copy. + + // If the value number is not defined by a copy instruction, ignore it. + + // If the source register comes from an interval other than IntB, we can't + // handle this. + if (SrcReg != IntB.reg) return false; + + // Get the LiveRange in IntB that this value number starts with. + LiveInterval::iterator ValLR = IntB.FindLiveRangeContaining(AValNo->def-1); + assert(ValLR != IntB.end() && "Live range not found!"); + + // Make sure that the end of the live range is inside the same block as + // CopyMI. + MachineInstr *ValLREndInst = li_->getInstructionFromIndex(ValLR->end-1); + if (!ValLREndInst || + ValLREndInst->getParent() != CopyMI->getParent()) return false; + + // Okay, we now know that ValLR ends in the same block that the CopyMI + // live-range starts. If there are no intervening live ranges between them in + // IntB, we can merge them. + if (ValLR+1 != BLR) return false; + + // If a live interval is a physical register, conservatively check if any + // of its sub-registers is overlapping the live interval of the virtual + // register. If so, do not coalesce. + if (TargetRegisterInfo::isPhysicalRegister(IntB.reg) && + *tri_->getSubRegisters(IntB.reg)) { + for (const unsigned* SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) + if (li_->hasInterval(*SR) && IntA.overlaps(li_->getInterval(*SR))) { + DOUT << "Interfere with sub-register "; + DEBUG(li_->getInterval(*SR).print(DOUT, tri_)); + return false; + } + } + + DOUT << "\nExtending: "; IntB.print(DOUT, tri_); + + unsigned FillerStart = ValLR->end, FillerEnd = BLR->start; + // We are about to delete CopyMI, so need to remove it as the 'instruction + // that defines this value #'. Update the the valnum with the new defining + // instruction #. + BValNo->def = FillerStart; + BValNo->copy = NULL; + + // Okay, we can merge them. We need to insert a new liverange: + // [ValLR.end, BLR.begin) of either value number, then we merge the + // two value numbers. + IntB.addRange(LiveRange(FillerStart, FillerEnd, BValNo)); + + // If the IntB live range is assigned to a physical register, and if that + // physreg has sub-registers, update their live intervals as well. + if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) { + for (const unsigned *SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) { + LiveInterval &SRLI = li_->getInterval(*SR); + SRLI.addRange(LiveRange(FillerStart, FillerEnd, + SRLI.getNextValue(FillerStart, 0, li_->getVNInfoAllocator()))); + } + } + + // Okay, merge "B1" into the same value number as "B0". + if (BValNo != ValLR->valno) { + IntB.addKills(ValLR->valno, BValNo->kills); + IntB.MergeValueNumberInto(BValNo, ValLR->valno); + } + DOUT << " result = "; IntB.print(DOUT, tri_); + DOUT << "\n"; + + // If the source instruction was killing the source register before the + // merge, unset the isKill marker given the live range has been extended. + int UIdx = ValLREndInst->findRegisterUseOperandIdx(IntB.reg, true); + if (UIdx != -1) { + ValLREndInst->getOperand(UIdx).setIsKill(false); + IntB.removeKill(ValLR->valno, FillerStart); + } + + ++numExtends; + return true; +} + +/// HasOtherReachingDefs - Return true if there are definitions of IntB +/// other than BValNo val# that can reach uses of AValno val# of IntA. +bool SimpleRegisterCoalescing::HasOtherReachingDefs(LiveInterval &IntA, + LiveInterval &IntB, + VNInfo *AValNo, + VNInfo *BValNo) { + for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end(); + AI != AE; ++AI) { + if (AI->valno != AValNo) continue; + LiveInterval::Ranges::iterator BI = + std::upper_bound(IntB.ranges.begin(), IntB.ranges.end(), AI->start); + if (BI != IntB.ranges.begin()) + --BI; + for (; BI != IntB.ranges.end() && AI->end >= BI->start; ++BI) { + if (BI->valno == BValNo) + continue; + if (BI->start <= AI->start && BI->end > AI->start) + return true; + if (BI->start > AI->start && BI->start < AI->end) + return true; + } + } + return false; +} + +/// RemoveCopyByCommutingDef - We found a non-trivially-coalescable copy with IntA +/// being the source and IntB being the dest, thus this defines a value number +/// in IntB. If the source value number (in IntA) is defined by a commutable +/// instruction and its other operand is coalesced to the copy dest register, +/// see if we can transform the copy into a noop by commuting the definition. For +/// example, +/// +/// A3 = op A2 B0 +/// ... +/// B1 = A3 <- this copy +/// ... +/// = op A3 <- more uses +/// +/// ==> +/// +/// B2 = op B0 A2 +/// ... +/// B1 = B2 <- now an identify copy +/// ... +/// = op B2 <- more uses +/// +/// This returns true if an interval was modified. +/// +bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA, + LiveInterval &IntB, + MachineInstr *CopyMI) { + unsigned CopyIdx = li_->getDefIndex(li_->getInstructionIndex(CopyMI)); + + // FIXME: For now, only eliminate the copy by commuting its def when the + // source register is a virtual register. We want to guard against cases + // where the copy is a back edge copy and commuting the def lengthen the + // live interval of the source register to the entire loop. + if (TargetRegisterInfo::isPhysicalRegister(IntA.reg)) + return false; + + // BValNo is a value number in B that is defined by a copy from A. 'B3' in + // the example above. + LiveInterval::iterator BLR = IntB.FindLiveRangeContaining(CopyIdx); + assert(BLR != IntB.end() && "Live range not found!"); + VNInfo *BValNo = BLR->valno; + + // Get the location that B is defined at. Two options: either this value has + // an unknown definition point or it is defined at CopyIdx. If unknown, we + // can't process it. + if (!BValNo->copy) return false; + assert(BValNo->def == CopyIdx && "Copy doesn't define the value?"); + + // AValNo is the value number in A that defines the copy, A3 in the example. + LiveInterval::iterator ALR = IntA.FindLiveRangeContaining(CopyIdx-1); + assert(ALR != IntA.end() && "Live range not found!"); + VNInfo *AValNo = ALR->valno; + // If other defs can reach uses of this def, then it's not safe to perform + // the optimization. + if (AValNo->def == ~0U || AValNo->def == ~1U || AValNo->hasPHIKill) + return false; + MachineInstr *DefMI = li_->getInstructionFromIndex(AValNo->def); + const TargetInstrDesc &TID = DefMI->getDesc(); + unsigned NewDstIdx; + if (!TID.isCommutable() || + !tii_->CommuteChangesDestination(DefMI, NewDstIdx)) + return false; + + MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx); + unsigned NewReg = NewDstMO.getReg(); + if (NewReg != IntB.reg || !NewDstMO.isKill()) + return false; + + // Make sure there are no other definitions of IntB that would reach the + // uses which the new definition can reach. + if (HasOtherReachingDefs(IntA, IntB, AValNo, BValNo)) + return false; + + // If some of the uses of IntA.reg is already coalesced away, return false. + // It's not possible to determine whether it's safe to perform the coalescing. + for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(IntA.reg), + UE = mri_->use_end(); UI != UE; ++UI) { + MachineInstr *UseMI = &*UI; + unsigned UseIdx = li_->getInstructionIndex(UseMI); + LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx); + if (ULR == IntA.end()) + continue; + if (ULR->valno == AValNo && JoinedCopies.count(UseMI)) + return false; + } + + // At this point we have decided that it is legal to do this + // transformation. Start by commuting the instruction. + MachineBasicBlock *MBB = DefMI->getParent(); + MachineInstr *NewMI = tii_->commuteInstruction(DefMI); + if (!NewMI) + return false; + if (NewMI != DefMI) { + li_->ReplaceMachineInstrInMaps(DefMI, NewMI); + MBB->insert(DefMI, NewMI); + MBB->erase(DefMI); + } + unsigned OpIdx = NewMI->findRegisterUseOperandIdx(IntA.reg, false); + NewMI->getOperand(OpIdx).setIsKill(); + + bool BHasPHIKill = BValNo->hasPHIKill; + SmallVector BDeadValNos; + SmallVector BKills; + std::map BExtend; + + // If ALR and BLR overlaps and end of BLR extends beyond end of ALR, e.g. + // A = or A, B + // ... + // B = A + // ... + // C = A + // ... + // = B + // + // then do not add kills of A to the newly created B interval. + bool Extended = BLR->end > ALR->end && ALR->end != ALR->start; + if (Extended) + BExtend[ALR->end] = BLR->end; + + // Update uses of IntA of the specific Val# with IntB. + bool BHasSubRegs = false; + if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) + BHasSubRegs = *tri_->getSubRegisters(IntB.reg); + for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(IntA.reg), + UE = mri_->use_end(); UI != UE;) { + MachineOperand &UseMO = UI.getOperand(); + MachineInstr *UseMI = &*UI; + ++UI; + if (JoinedCopies.count(UseMI)) + continue; + unsigned UseIdx = li_->getInstructionIndex(UseMI); + LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx); + if (ULR == IntA.end() || ULR->valno != AValNo) + continue; + UseMO.setReg(NewReg); + if (UseMI == CopyMI) + continue; + if (UseMO.isKill()) { + if (Extended) + UseMO.setIsKill(false); + else + BKills.push_back(li_->getUseIndex(UseIdx)+1); + } + unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; + if (!tii_->isMoveInstr(*UseMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) + continue; + if (DstReg == IntB.reg) { + // This copy will become a noop. If it's defining a new val#, + // remove that val# as well. However this live range is being + // extended to the end of the existing live range defined by the copy. + unsigned DefIdx = li_->getDefIndex(UseIdx); + const LiveRange *DLR = IntB.getLiveRangeContaining(DefIdx); + BHasPHIKill |= DLR->valno->hasPHIKill; + assert(DLR->valno->def == DefIdx); + BDeadValNos.push_back(DLR->valno); + BExtend[DLR->start] = DLR->end; + JoinedCopies.insert(UseMI); + // If this is a kill but it's going to be removed, the last use + // of the same val# is the new kill. + if (UseMO.isKill()) + BKills.pop_back(); + } + } + + // We need to insert a new liverange: [ALR.start, LastUse). It may be we can + // simply extend BLR if CopyMI doesn't end the range. + DOUT << "\nExtending: "; IntB.print(DOUT, tri_); + + // Remove val#'s defined by copies that will be coalesced away. + for (unsigned i = 0, e = BDeadValNos.size(); i != e; ++i) { + VNInfo *DeadVNI = BDeadValNos[i]; + if (BHasSubRegs) { + for (const unsigned *SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) { + LiveInterval &SRLI = li_->getInterval(*SR); + const LiveRange *SRLR = SRLI.getLiveRangeContaining(DeadVNI->def); + SRLI.removeValNo(SRLR->valno); + } + } + IntB.removeValNo(BDeadValNos[i]); + } + + // Extend BValNo by merging in IntA live ranges of AValNo. Val# definition + // is updated. Kills are also updated. + VNInfo *ValNo = BValNo; + ValNo->def = AValNo->def; + ValNo->copy = NULL; + for (unsigned j = 0, ee = ValNo->kills.size(); j != ee; ++j) { + unsigned Kill = ValNo->kills[j]; + if (Kill != BLR->end) + BKills.push_back(Kill); + } + ValNo->kills.clear(); + for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end(); + AI != AE; ++AI) { + if (AI->valno != AValNo) continue; + unsigned End = AI->end; + std::map::iterator EI = BExtend.find(End); + if (EI != BExtend.end()) + End = EI->second; + IntB.addRange(LiveRange(AI->start, End, ValNo)); + + // If the IntB live range is assigned to a physical register, and if that + // physreg has sub-registers, update their live intervals as well. + if (BHasSubRegs) { + for (const unsigned *SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) { + LiveInterval &SRLI = li_->getInterval(*SR); + SRLI.MergeInClobberRange(AI->start, End, li_->getVNInfoAllocator()); + } + } + } + IntB.addKills(ValNo, BKills); + ValNo->hasPHIKill = BHasPHIKill; + + DOUT << " result = "; IntB.print(DOUT, tri_); + DOUT << "\n"; + + DOUT << "\nShortening: "; IntA.print(DOUT, tri_); + IntA.removeValNo(AValNo); + DOUT << " result = "; IntA.print(DOUT, tri_); + DOUT << "\n"; + + ++numCommutes; + return true; +} + +/// isSameOrFallThroughBB - Return true if MBB == SuccMBB or MBB simply +/// fallthoughs to SuccMBB. +static bool isSameOrFallThroughBB(MachineBasicBlock *MBB, + MachineBasicBlock *SuccMBB, + const TargetInstrInfo *tii_) { + if (MBB == SuccMBB) + return true; + MachineBasicBlock *TBB = 0, *FBB = 0; + SmallVector Cond; + return !tii_->AnalyzeBranch(*MBB, TBB, FBB, Cond) && !TBB && !FBB && + MBB->isSuccessor(SuccMBB); +} + +/// removeRange - Wrapper for LiveInterval::removeRange. This removes a range +/// from a physical register live interval as well as from the live intervals +/// of its sub-registers. +static void removeRange(LiveInterval &li, unsigned Start, unsigned End, + LiveIntervals *li_, const TargetRegisterInfo *tri_) { + li.removeRange(Start, End, true); + if (TargetRegisterInfo::isPhysicalRegister(li.reg)) { + for (const unsigned* SR = tri_->getSubRegisters(li.reg); *SR; ++SR) { + if (!li_->hasInterval(*SR)) + continue; + LiveInterval &sli = li_->getInterval(*SR); + unsigned RemoveEnd = Start; + while (RemoveEnd != End) { + LiveInterval::iterator LR = sli.FindLiveRangeContaining(Start); + if (LR == sli.end()) + break; + RemoveEnd = (LR->end < End) ? LR->end : End; + sli.removeRange(Start, RemoveEnd, true); + Start = RemoveEnd; + } + } + } +} + +/// TrimLiveIntervalToLastUse - If there is a last use in the same basic block +/// as the copy instruction, trim the live interval to the last use and return +/// true. +bool +SimpleRegisterCoalescing::TrimLiveIntervalToLastUse(unsigned CopyIdx, + MachineBasicBlock *CopyMBB, + LiveInterval &li, + const LiveRange *LR) { + unsigned MBBStart = li_->getMBBStartIdx(CopyMBB); + unsigned LastUseIdx; + MachineOperand *LastUse = lastRegisterUse(LR->start, CopyIdx-1, li.reg, + LastUseIdx); + if (LastUse) { + MachineInstr *LastUseMI = LastUse->getParent(); + if (!isSameOrFallThroughBB(LastUseMI->getParent(), CopyMBB, tii_)) { + // r1024 = op + // ... + // BB1: + // = r1024 + // + // BB2: + // r1025 = r1024 + if (MBBStart < LR->end) + removeRange(li, MBBStart, LR->end, li_, tri_); + return true; + } + + // There are uses before the copy, just shorten the live range to the end + // of last use. + LastUse->setIsKill(); + removeRange(li, li_->getDefIndex(LastUseIdx), LR->end, li_, tri_); + li.addKill(LR->valno, LastUseIdx+1); + unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; + if (tii_->isMoveInstr(*LastUseMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) && + DstReg == li.reg) { + // Last use is itself an identity code. + int DeadIdx = LastUseMI->findRegisterDefOperandIdx(li.reg, false, tri_); + LastUseMI->getOperand(DeadIdx).setIsDead(); + } + return true; + } + + // Is it livein? + if (LR->start <= MBBStart && LR->end > MBBStart) { + if (LR->start == 0) { + assert(TargetRegisterInfo::isPhysicalRegister(li.reg)); + // Live-in to the function but dead. Remove it from entry live-in set. + mf_->begin()->removeLiveIn(li.reg); + } + // FIXME: Shorten intervals in BBs that reaches this BB. + } + + return false; +} + +/// ReMaterializeTrivialDef - If the source of a copy is defined by a trivial +/// computation, replace the copy by rematerialize the definition. +bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt, + unsigned DstReg, + MachineInstr *CopyMI) { + unsigned CopyIdx = li_->getUseIndex(li_->getInstructionIndex(CopyMI)); + LiveInterval::iterator SrcLR = SrcInt.FindLiveRangeContaining(CopyIdx); + assert(SrcLR != SrcInt.end() && "Live range not found!"); + VNInfo *ValNo = SrcLR->valno; + // If other defs can reach uses of this def, then it's not safe to perform + // the optimization. + if (ValNo->def == ~0U || ValNo->def == ~1U || ValNo->hasPHIKill) + return false; + MachineInstr *DefMI = li_->getInstructionFromIndex(ValNo->def); + const TargetInstrDesc &TID = DefMI->getDesc(); + if (!TID.isAsCheapAsAMove()) + return false; + if (!DefMI->getDesc().isRematerializable() || + !tii_->isTriviallyReMaterializable(DefMI)) + return false; + bool SawStore = false; + if (!DefMI->isSafeToMove(tii_, SawStore)) + return false; + + unsigned DefIdx = li_->getDefIndex(CopyIdx); + const LiveRange *DLR= li_->getInterval(DstReg).getLiveRangeContaining(DefIdx); + DLR->valno->copy = NULL; + // Don't forget to update sub-register intervals. + if (TargetRegisterInfo::isPhysicalRegister(DstReg)) { + for (const unsigned* SR = tri_->getSubRegisters(DstReg); *SR; ++SR) { + if (!li_->hasInterval(*SR)) + continue; + DLR = li_->getInterval(*SR).getLiveRangeContaining(DefIdx); + if (DLR && DLR->valno->copy == CopyMI) + DLR->valno->copy = NULL; + } + } + + // If copy kills the source register, find the last use and propagate + // kill. + bool checkForDeadDef = false; + MachineBasicBlock *MBB = CopyMI->getParent(); + if (CopyMI->killsRegister(SrcInt.reg)) + if (!TrimLiveIntervalToLastUse(CopyIdx, MBB, SrcInt, SrcLR)) { + checkForDeadDef = true; + } + + MachineBasicBlock::iterator MII = next(MachineBasicBlock::iterator(CopyMI)); + CopyMI->removeFromParent(); + tii_->reMaterialize(*MBB, MII, DstReg, DefMI); + MachineInstr *NewMI = prior(MII); + + if (checkForDeadDef) { + // PR4090 fix: Trim interval failed because there was no use of the + // source interval in this MBB. If the def is in this MBB too then we + // should mark it dead: + if (DefMI->getParent() == MBB) { + DefMI->addRegisterDead(SrcInt.reg, tri_); + SrcLR->end = SrcLR->start + 1; + } + + } + + // CopyMI may have implicit operands, transfer them over to the newly + // rematerialized instruction. And update implicit def interval valnos. + for (unsigned i = CopyMI->getDesc().getNumOperands(), + e = CopyMI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = CopyMI->getOperand(i); + if (MO.isReg() && MO.isImplicit()) + NewMI->addOperand(MO); + if (MO.isDef() && li_->hasInterval(MO.getReg())) { + unsigned Reg = MO.getReg(); + DLR = li_->getInterval(Reg).getLiveRangeContaining(DefIdx); + if (DLR && DLR->valno->copy == CopyMI) + DLR->valno->copy = NULL; + } + } + + li_->ReplaceMachineInstrInMaps(CopyMI, NewMI); + MBB->getParent()->DeleteMachineInstr(CopyMI); + ReMatCopies.insert(CopyMI); + ReMatDefs.insert(DefMI); + ++NumReMats; + return true; +} + +/// isBackEdgeCopy - Returns true if CopyMI is a back edge copy. +/// +bool SimpleRegisterCoalescing::isBackEdgeCopy(MachineInstr *CopyMI, + unsigned DstReg) const { + MachineBasicBlock *MBB = CopyMI->getParent(); + const MachineLoop *L = loopInfo->getLoopFor(MBB); + if (!L) + return false; + if (MBB != L->getLoopLatch()) + return false; + + LiveInterval &LI = li_->getInterval(DstReg); + unsigned DefIdx = li_->getInstructionIndex(CopyMI); + LiveInterval::const_iterator DstLR = + LI.FindLiveRangeContaining(li_->getDefIndex(DefIdx)); + if (DstLR == LI.end()) + return false; + unsigned KillIdx = li_->getMBBEndIdx(MBB) + 1; + if (DstLR->valno->kills.size() == 1 && + DstLR->valno->kills[0] == KillIdx && DstLR->valno->hasPHIKill) + return true; + return false; +} + +/// UpdateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and +/// update the subregister number if it is not zero. If DstReg is a +/// physical register and the existing subregister number of the def / use +/// being updated is not zero, make sure to set it to the correct physical +/// subregister. +void +SimpleRegisterCoalescing::UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg, + unsigned SubIdx) { + bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg); + if (DstIsPhys && SubIdx) { + // Figure out the real physical register we are updating with. + DstReg = tri_->getSubReg(DstReg, SubIdx); + SubIdx = 0; + } + + for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(SrcReg), + E = mri_->reg_end(); I != E; ) { + MachineOperand &O = I.getOperand(); + MachineInstr *UseMI = &*I; + ++I; + unsigned OldSubIdx = O.getSubReg(); + if (DstIsPhys) { + unsigned UseDstReg = DstReg; + if (OldSubIdx) + UseDstReg = tri_->getSubReg(DstReg, OldSubIdx); + + unsigned CopySrcReg, CopyDstReg, CopySrcSubIdx, CopyDstSubIdx; + if (tii_->isMoveInstr(*UseMI, CopySrcReg, CopyDstReg, + CopySrcSubIdx, CopyDstSubIdx) && + CopySrcReg != CopyDstReg && + CopySrcReg == SrcReg && CopyDstReg != UseDstReg) { + // If the use is a copy and it won't be coalesced away, and its source + // is defined by a trivial computation, try to rematerialize it instead. + if (ReMaterializeTrivialDef(li_->getInterval(SrcReg), CopyDstReg,UseMI)) + continue; + } + + O.setReg(UseDstReg); + O.setSubReg(0); + continue; + } + + // Sub-register indexes goes from small to large. e.g. + // RAX: 1 -> AL, 2 -> AX, 3 -> EAX + // EAX: 1 -> AL, 2 -> AX + // So RAX's sub-register 2 is AX, RAX's sub-regsiter 3 is EAX, whose + // sub-register 2 is also AX. + if (SubIdx && OldSubIdx && SubIdx != OldSubIdx) + assert(OldSubIdx < SubIdx && "Conflicting sub-register index!"); + else if (SubIdx) + O.setSubReg(SubIdx); + // Remove would-be duplicated kill marker. + if (O.isKill() && UseMI->killsRegister(DstReg)) + O.setIsKill(false); + O.setReg(DstReg); + + // After updating the operand, check if the machine instruction has + // become a copy. If so, update its val# information. + const TargetInstrDesc &TID = UseMI->getDesc(); + unsigned CopySrcReg, CopyDstReg, CopySrcSubIdx, CopyDstSubIdx; + if (TID.getNumDefs() == 1 && TID.getNumOperands() > 2 && + tii_->isMoveInstr(*UseMI, CopySrcReg, CopyDstReg, + CopySrcSubIdx, CopyDstSubIdx) && + CopySrcReg != CopyDstReg && + (TargetRegisterInfo::isVirtualRegister(CopyDstReg) || + allocatableRegs_[CopyDstReg])) { + LiveInterval &LI = li_->getInterval(CopyDstReg); + unsigned DefIdx = li_->getDefIndex(li_->getInstructionIndex(UseMI)); + const LiveRange *DLR = LI.getLiveRangeContaining(DefIdx); + if (DLR->valno->def == DefIdx) + DLR->valno->copy = UseMI; + } + } +} + +/// RemoveDeadImpDef - Remove implicit_def instructions which are "re-defining" +/// registers due to insert_subreg coalescing. e.g. +/// r1024 = op +/// r1025 = implicit_def +/// r1025 = insert_subreg r1025, r1024 +/// = op r1025 +/// => +/// r1025 = op +/// r1025 = implicit_def +/// r1025 = insert_subreg r1025, r1025 +/// = op r1025 +void +SimpleRegisterCoalescing::RemoveDeadImpDef(unsigned Reg, LiveInterval &LI) { + for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(Reg), + E = mri_->reg_end(); I != E; ) { + MachineOperand &O = I.getOperand(); + MachineInstr *DefMI = &*I; + ++I; + if (!O.isDef()) + continue; + if (DefMI->getOpcode() != TargetInstrInfo::IMPLICIT_DEF) + continue; + if (!LI.liveBeforeAndAt(li_->getInstructionIndex(DefMI))) + continue; + li_->RemoveMachineInstrFromMaps(DefMI); + DefMI->eraseFromParent(); + } +} + +/// RemoveUnnecessaryKills - Remove kill markers that are no longer accurate +/// due to live range lengthening as the result of coalescing. +void SimpleRegisterCoalescing::RemoveUnnecessaryKills(unsigned Reg, + LiveInterval &LI) { + for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(Reg), + UE = mri_->use_end(); UI != UE; ++UI) { + MachineOperand &UseMO = UI.getOperand(); + if (UseMO.isKill()) { + MachineInstr *UseMI = UseMO.getParent(); + unsigned UseIdx = li_->getUseIndex(li_->getInstructionIndex(UseMI)); + const LiveRange *UI = LI.getLiveRangeContaining(UseIdx); + if (!UI || !LI.isKill(UI->valno, UseIdx+1)) + UseMO.setIsKill(false); + } + } +} + +/// removeIntervalIfEmpty - Check if the live interval of a physical register +/// is empty, if so remove it and also remove the empty intervals of its +/// sub-registers. Return true if live interval is removed. +static bool removeIntervalIfEmpty(LiveInterval &li, LiveIntervals *li_, + const TargetRegisterInfo *tri_) { + if (li.empty()) { + if (TargetRegisterInfo::isPhysicalRegister(li.reg)) + for (const unsigned* SR = tri_->getSubRegisters(li.reg); *SR; ++SR) { + if (!li_->hasInterval(*SR)) + continue; + LiveInterval &sli = li_->getInterval(*SR); + if (sli.empty()) + li_->removeInterval(*SR); + } + li_->removeInterval(li.reg); + return true; + } + return false; +} + +/// ShortenDeadCopyLiveRange - Shorten a live range defined by a dead copy. +/// Return true if live interval is removed. +bool SimpleRegisterCoalescing::ShortenDeadCopyLiveRange(LiveInterval &li, + MachineInstr *CopyMI) { + unsigned CopyIdx = li_->getInstructionIndex(CopyMI); + LiveInterval::iterator MLR = + li.FindLiveRangeContaining(li_->getDefIndex(CopyIdx)); + if (MLR == li.end()) + return false; // Already removed by ShortenDeadCopySrcLiveRange. + unsigned RemoveStart = MLR->start; + unsigned RemoveEnd = MLR->end; + // Remove the liverange that's defined by this. + if (RemoveEnd == li_->getDefIndex(CopyIdx)+1) { + removeRange(li, RemoveStart, RemoveEnd, li_, tri_); + return removeIntervalIfEmpty(li, li_, tri_); + } + return false; +} + +/// RemoveDeadDef - If a def of a live interval is now determined dead, remove +/// the val# it defines. If the live interval becomes empty, remove it as well. +bool SimpleRegisterCoalescing::RemoveDeadDef(LiveInterval &li, + MachineInstr *DefMI) { + unsigned DefIdx = li_->getDefIndex(li_->getInstructionIndex(DefMI)); + LiveInterval::iterator MLR = li.FindLiveRangeContaining(DefIdx); + if (DefIdx != MLR->valno->def) + return false; + li.removeValNo(MLR->valno); + return removeIntervalIfEmpty(li, li_, tri_); +} + +/// PropagateDeadness - Propagate the dead marker to the instruction which +/// defines the val#. +static void PropagateDeadness(LiveInterval &li, MachineInstr *CopyMI, + unsigned &LRStart, LiveIntervals *li_, + const TargetRegisterInfo* tri_) { + MachineInstr *DefMI = + li_->getInstructionFromIndex(li_->getDefIndex(LRStart)); + if (DefMI && DefMI != CopyMI) { + int DeadIdx = DefMI->findRegisterDefOperandIdx(li.reg, false, tri_); + if (DeadIdx != -1) { + DefMI->getOperand(DeadIdx).setIsDead(); + // A dead def should have a single cycle interval. + ++LRStart; + } + } +} + +/// ShortenDeadCopySrcLiveRange - Shorten a live range as it's artificially +/// extended by a dead copy. Mark the last use (if any) of the val# as kill as +/// ends the live range there. If there isn't another use, then this live range +/// is dead. Return true if live interval is removed. +bool +SimpleRegisterCoalescing::ShortenDeadCopySrcLiveRange(LiveInterval &li, + MachineInstr *CopyMI) { + unsigned CopyIdx = li_->getInstructionIndex(CopyMI); + if (CopyIdx == 0) { + // FIXME: special case: function live in. It can be a general case if the + // first instruction index starts at > 0 value. + assert(TargetRegisterInfo::isPhysicalRegister(li.reg)); + // Live-in to the function but dead. Remove it from entry live-in set. + if (mf_->begin()->isLiveIn(li.reg)) + mf_->begin()->removeLiveIn(li.reg); + const LiveRange *LR = li.getLiveRangeContaining(CopyIdx); + removeRange(li, LR->start, LR->end, li_, tri_); + return removeIntervalIfEmpty(li, li_, tri_); + } + + LiveInterval::iterator LR = li.FindLiveRangeContaining(CopyIdx-1); + if (LR == li.end()) + // Livein but defined by a phi. + return false; + + unsigned RemoveStart = LR->start; + unsigned RemoveEnd = li_->getDefIndex(CopyIdx)+1; + if (LR->end > RemoveEnd) + // More uses past this copy? Nothing to do. + return false; + + // If there is a last use in the same bb, we can't remove the live range. + // Shorten the live interval and return. + MachineBasicBlock *CopyMBB = CopyMI->getParent(); + if (TrimLiveIntervalToLastUse(CopyIdx, CopyMBB, li, LR)) + return false; + + MachineBasicBlock *StartMBB = li_->getMBBFromIndex(RemoveStart); + if (!isSameOrFallThroughBB(StartMBB, CopyMBB, tii_)) + // If the live range starts in another mbb and the copy mbb is not a fall + // through mbb, then we can only cut the range from the beginning of the + // copy mbb. + RemoveStart = li_->getMBBStartIdx(CopyMBB) + 1; + + if (LR->valno->def == RemoveStart) { + // If the def MI defines the val# and this copy is the only kill of the + // val#, then propagate the dead marker. + if (li.isOnlyLROfValNo(LR)) { + PropagateDeadness(li, CopyMI, RemoveStart, li_, tri_); + ++numDeadValNo; + } + if (li.isKill(LR->valno, RemoveEnd)) + li.removeKill(LR->valno, RemoveEnd); + } + + removeRange(li, RemoveStart, RemoveEnd, li_, tri_); + return removeIntervalIfEmpty(li, li_, tri_); +} + +/// CanCoalesceWithImpDef - Returns true if the specified copy instruction +/// from an implicit def to another register can be coalesced away. +bool SimpleRegisterCoalescing::CanCoalesceWithImpDef(MachineInstr *CopyMI, + LiveInterval &li, + LiveInterval &ImpLi) const{ + if (!CopyMI->killsRegister(ImpLi.reg)) + return false; + unsigned CopyIdx = li_->getDefIndex(li_->getInstructionIndex(CopyMI)); + LiveInterval::iterator LR = li.FindLiveRangeContaining(CopyIdx); + if (LR == li.end()) + return false; + if (LR->valno->hasPHIKill) + return false; + if (LR->valno->def != CopyIdx) + return false; + // Make sure all of val# uses are copies. + for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(li.reg), + UE = mri_->use_end(); UI != UE;) { + MachineInstr *UseMI = &*UI; + ++UI; + if (JoinedCopies.count(UseMI)) + continue; + unsigned UseIdx = li_->getUseIndex(li_->getInstructionIndex(UseMI)); + LiveInterval::iterator ULR = li.FindLiveRangeContaining(UseIdx); + if (ULR == li.end() || ULR->valno != LR->valno) + continue; + // If the use is not a use, then it's not safe to coalesce the move. + unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; + if (!tii_->isMoveInstr(*UseMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) { + if (UseMI->getOpcode() == TargetInstrInfo::INSERT_SUBREG && + UseMI->getOperand(1).getReg() == li.reg) + continue; + return false; + } + } + return true; +} + + +/// RemoveCopiesFromValNo - The specified value# is defined by an implicit +/// def and it is being removed. Turn all copies from this value# into +/// identity copies so they will be removed. +void SimpleRegisterCoalescing::RemoveCopiesFromValNo(LiveInterval &li, + VNInfo *VNI) { + SmallVector ImpDefs; + MachineOperand *LastUse = NULL; + unsigned LastUseIdx = li_->getUseIndex(VNI->def); + for (MachineRegisterInfo::reg_iterator RI = mri_->reg_begin(li.reg), + RE = mri_->reg_end(); RI != RE;) { + MachineOperand *MO = &RI.getOperand(); + MachineInstr *MI = &*RI; + ++RI; + if (MO->isDef()) { + if (MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF) { + ImpDefs.push_back(MI); + } + continue; + } + if (JoinedCopies.count(MI)) + continue; + unsigned UseIdx = li_->getUseIndex(li_->getInstructionIndex(MI)); + LiveInterval::iterator ULR = li.FindLiveRangeContaining(UseIdx); + if (ULR == li.end() || ULR->valno != VNI) + continue; + // If the use is a copy, turn it into an identity copy. + unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; + if (tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) && + SrcReg == li.reg) { + // Each use MI may have multiple uses of this register. Change them all. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.getReg() == li.reg) + MO.setReg(DstReg); + } + JoinedCopies.insert(MI); + } else if (UseIdx > LastUseIdx) { + LastUseIdx = UseIdx; + LastUse = MO; + } + } + if (LastUse) { + LastUse->setIsKill(); + li.addKill(VNI, LastUseIdx+1); + } else { + // Remove dead implicit_def's. + while (!ImpDefs.empty()) { + MachineInstr *ImpDef = ImpDefs.back(); + ImpDefs.pop_back(); + li_->RemoveMachineInstrFromMaps(ImpDef); + ImpDef->eraseFromParent(); + } + } +} + +/// isWinToJoinVRWithSrcPhysReg - Return true if it's worth while to join a +/// a virtual destination register with physical source register. +bool +SimpleRegisterCoalescing::isWinToJoinVRWithSrcPhysReg(MachineInstr *CopyMI, + MachineBasicBlock *CopyMBB, + LiveInterval &DstInt, + LiveInterval &SrcInt) { + // If the virtual register live interval is long but it has low use desity, + // do not join them, instead mark the physical register as its allocation + // preference. + const TargetRegisterClass *RC = mri_->getRegClass(DstInt.reg); + unsigned Threshold = allocatableRCRegs_[RC].count() * 2; + unsigned Length = li_->getApproximateInstructionCount(DstInt); + if (Length > Threshold && + (((float)std::distance(mri_->use_begin(DstInt.reg), + mri_->use_end()) / Length) < (1.0 / Threshold))) + return false; + + // If the virtual register live interval extends into a loop, turn down + // aggressiveness. + unsigned CopyIdx = li_->getDefIndex(li_->getInstructionIndex(CopyMI)); + const MachineLoop *L = loopInfo->getLoopFor(CopyMBB); + if (!L) { + // Let's see if the virtual register live interval extends into the loop. + LiveInterval::iterator DLR = DstInt.FindLiveRangeContaining(CopyIdx); + assert(DLR != DstInt.end() && "Live range not found!"); + DLR = DstInt.FindLiveRangeContaining(DLR->end+1); + if (DLR != DstInt.end()) { + CopyMBB = li_->getMBBFromIndex(DLR->start); + L = loopInfo->getLoopFor(CopyMBB); + } + } + + if (!L || Length <= Threshold) + return true; + + unsigned UseIdx = li_->getUseIndex(CopyIdx); + LiveInterval::iterator SLR = SrcInt.FindLiveRangeContaining(UseIdx); + MachineBasicBlock *SMBB = li_->getMBBFromIndex(SLR->start); + if (loopInfo->getLoopFor(SMBB) != L) { + if (!loopInfo->isLoopHeader(CopyMBB)) + return false; + // If vr's live interval extends pass the loop header, do not join. + for (MachineBasicBlock::succ_iterator SI = CopyMBB->succ_begin(), + SE = CopyMBB->succ_end(); SI != SE; ++SI) { + MachineBasicBlock *SuccMBB = *SI; + if (SuccMBB == CopyMBB) + continue; + if (DstInt.overlaps(li_->getMBBStartIdx(SuccMBB), + li_->getMBBEndIdx(SuccMBB)+1)) + return false; + } + } + return true; +} + +/// isWinToJoinVRWithDstPhysReg - Return true if it's worth while to join a +/// copy from a virtual source register to a physical destination register. +bool +SimpleRegisterCoalescing::isWinToJoinVRWithDstPhysReg(MachineInstr *CopyMI, + MachineBasicBlock *CopyMBB, + LiveInterval &DstInt, + LiveInterval &SrcInt) { + // If the virtual register live interval is long but it has low use desity, + // do not join them, instead mark the physical register as its allocation + // preference. + const TargetRegisterClass *RC = mri_->getRegClass(SrcInt.reg); + unsigned Threshold = allocatableRCRegs_[RC].count() * 2; + unsigned Length = li_->getApproximateInstructionCount(SrcInt); + if (Length > Threshold && + (((float)std::distance(mri_->use_begin(SrcInt.reg), + mri_->use_end()) / Length) < (1.0 / Threshold))) + return false; + + if (SrcInt.empty()) + // Must be implicit_def. + return false; + + // If the virtual register live interval is defined or cross a loop, turn + // down aggressiveness. + unsigned CopyIdx = li_->getDefIndex(li_->getInstructionIndex(CopyMI)); + unsigned UseIdx = li_->getUseIndex(CopyIdx); + LiveInterval::iterator SLR = SrcInt.FindLiveRangeContaining(UseIdx); + assert(SLR != SrcInt.end() && "Live range not found!"); + SLR = SrcInt.FindLiveRangeContaining(SLR->start-1); + if (SLR == SrcInt.end()) + return true; + MachineBasicBlock *SMBB = li_->getMBBFromIndex(SLR->start); + const MachineLoop *L = loopInfo->getLoopFor(SMBB); + + if (!L || Length <= Threshold) + return true; + + if (loopInfo->getLoopFor(CopyMBB) != L) { + if (SMBB != L->getLoopLatch()) + return false; + // If vr's live interval is extended from before the loop latch, do not + // join. + for (MachineBasicBlock::pred_iterator PI = SMBB->pred_begin(), + PE = SMBB->pred_end(); PI != PE; ++PI) { + MachineBasicBlock *PredMBB = *PI; + if (PredMBB == SMBB) + continue; + if (SrcInt.overlaps(li_->getMBBStartIdx(PredMBB), + li_->getMBBEndIdx(PredMBB)+1)) + return false; + } + } + return true; +} + +/// isWinToJoinCrossClass - Return true if it's profitable to coalesce +/// two virtual registers from different register classes. +bool +SimpleRegisterCoalescing::isWinToJoinCrossClass(unsigned LargeReg, + unsigned SmallReg, + unsigned Threshold) { + // Then make sure the intervals are *short*. + LiveInterval &LargeInt = li_->getInterval(LargeReg); + LiveInterval &SmallInt = li_->getInterval(SmallReg); + unsigned LargeSize = li_->getApproximateInstructionCount(LargeInt); + unsigned SmallSize = li_->getApproximateInstructionCount(SmallInt); + if (SmallSize > Threshold || LargeSize > Threshold) + if ((float)std::distance(mri_->use_begin(SmallReg), + mri_->use_end()) / SmallSize < + (float)std::distance(mri_->use_begin(LargeReg), + mri_->use_end()) / LargeSize) + return false; + return true; +} + +/// HasIncompatibleSubRegDefUse - If we are trying to coalesce a virtual +/// register with a physical register, check if any of the virtual register +/// operand is a sub-register use or def. If so, make sure it won't result +/// in an illegal extract_subreg or insert_subreg instruction. e.g. +/// vr1024 = extract_subreg vr1025, 1 +/// ... +/// vr1024 = mov8rr AH +/// If vr1024 is coalesced with AH, the extract_subreg is now illegal since +/// AH does not have a super-reg whose sub-register 1 is AH. +bool +SimpleRegisterCoalescing::HasIncompatibleSubRegDefUse(MachineInstr *CopyMI, + unsigned VirtReg, + unsigned PhysReg) { + for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(VirtReg), + E = mri_->reg_end(); I != E; ++I) { + MachineOperand &O = I.getOperand(); + MachineInstr *MI = &*I; + if (MI == CopyMI || JoinedCopies.count(MI)) + continue; + unsigned SubIdx = O.getSubReg(); + if (SubIdx && !tri_->getSubReg(PhysReg, SubIdx)) + return true; + if (MI->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG) { + SubIdx = MI->getOperand(2).getImm(); + if (O.isUse() && !tri_->getSubReg(PhysReg, SubIdx)) + return true; + if (O.isDef()) { + unsigned SrcReg = MI->getOperand(1).getReg(); + const TargetRegisterClass *RC = + TargetRegisterInfo::isPhysicalRegister(SrcReg) + ? tri_->getPhysicalRegisterRegClass(SrcReg) + : mri_->getRegClass(SrcReg); + if (!tri_->getMatchingSuperReg(PhysReg, SubIdx, RC)) + return true; + } + } + if (MI->getOpcode() == TargetInstrInfo::INSERT_SUBREG || + MI->getOpcode() == TargetInstrInfo::SUBREG_TO_REG) { + SubIdx = MI->getOperand(3).getImm(); + if (VirtReg == MI->getOperand(0).getReg()) { + if (!tri_->getSubReg(PhysReg, SubIdx)) + return true; + } else { + unsigned DstReg = MI->getOperand(0).getReg(); + const TargetRegisterClass *RC = + TargetRegisterInfo::isPhysicalRegister(DstReg) + ? tri_->getPhysicalRegisterRegClass(DstReg) + : mri_->getRegClass(DstReg); + if (!tri_->getMatchingSuperReg(PhysReg, SubIdx, RC)) + return true; + } + } + } + return false; +} + + +/// CanJoinExtractSubRegToPhysReg - Return true if it's possible to coalesce +/// an extract_subreg where dst is a physical register, e.g. +/// cl = EXTRACT_SUBREG reg1024, 1 +bool +SimpleRegisterCoalescing::CanJoinExtractSubRegToPhysReg(unsigned DstReg, + unsigned SrcReg, unsigned SubIdx, + unsigned &RealDstReg) { + const TargetRegisterClass *RC = mri_->getRegClass(SrcReg); + RealDstReg = tri_->getMatchingSuperReg(DstReg, SubIdx, RC); + assert(RealDstReg && "Invalid extract_subreg instruction!"); + + // For this type of EXTRACT_SUBREG, conservatively + // check if the live interval of the source register interfere with the + // actual super physical register we are trying to coalesce with. + LiveInterval &RHS = li_->getInterval(SrcReg); + if (li_->hasInterval(RealDstReg) && + RHS.overlaps(li_->getInterval(RealDstReg))) { + DOUT << "Interfere with register "; + DEBUG(li_->getInterval(RealDstReg).print(DOUT, tri_)); + return false; // Not coalescable + } + for (const unsigned* SR = tri_->getSubRegisters(RealDstReg); *SR; ++SR) + if (li_->hasInterval(*SR) && RHS.overlaps(li_->getInterval(*SR))) { + DOUT << "Interfere with sub-register "; + DEBUG(li_->getInterval(*SR).print(DOUT, tri_)); + return false; // Not coalescable + } + return true; +} + +/// CanJoinInsertSubRegToPhysReg - Return true if it's possible to coalesce +/// an insert_subreg where src is a physical register, e.g. +/// reg1024 = INSERT_SUBREG reg1024, c1, 0 +bool +SimpleRegisterCoalescing::CanJoinInsertSubRegToPhysReg(unsigned DstReg, + unsigned SrcReg, unsigned SubIdx, + unsigned &RealSrcReg) { + const TargetRegisterClass *RC = mri_->getRegClass(DstReg); + RealSrcReg = tri_->getMatchingSuperReg(SrcReg, SubIdx, RC); + assert(RealSrcReg && "Invalid extract_subreg instruction!"); + + LiveInterval &RHS = li_->getInterval(DstReg); + if (li_->hasInterval(RealSrcReg) && + RHS.overlaps(li_->getInterval(RealSrcReg))) { + DOUT << "Interfere with register "; + DEBUG(li_->getInterval(RealSrcReg).print(DOUT, tri_)); + return false; // Not coalescable + } + for (const unsigned* SR = tri_->getSubRegisters(RealSrcReg); *SR; ++SR) + if (li_->hasInterval(*SR) && RHS.overlaps(li_->getInterval(*SR))) { + DOUT << "Interfere with sub-register "; + DEBUG(li_->getInterval(*SR).print(DOUT, tri_)); + return false; // Not coalescable + } + return true; +} + +/// JoinCopy - Attempt to join intervals corresponding to SrcReg/DstReg, +/// which are the src/dst of the copy instruction CopyMI. This returns true +/// if the copy was successfully coalesced away. If it is not currently +/// possible to coalesce this interval, but it may be possible if other +/// things get coalesced, then it returns true by reference in 'Again'. +bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { + MachineInstr *CopyMI = TheCopy.MI; + + Again = false; + if (JoinedCopies.count(CopyMI) || ReMatCopies.count(CopyMI)) + return false; // Already done. + + DOUT << li_->getInstructionIndex(CopyMI) << '\t' << *CopyMI; + + unsigned SrcReg, DstReg, SrcSubIdx = 0, DstSubIdx = 0; + bool isExtSubReg = CopyMI->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG; + bool isInsSubReg = CopyMI->getOpcode() == TargetInstrInfo::INSERT_SUBREG; + bool isSubRegToReg = CopyMI->getOpcode() == TargetInstrInfo::SUBREG_TO_REG; + unsigned SubIdx = 0; + if (isExtSubReg) { + DstReg = CopyMI->getOperand(0).getReg(); + DstSubIdx = CopyMI->getOperand(0).getSubReg(); + SrcReg = CopyMI->getOperand(1).getReg(); + SrcSubIdx = CopyMI->getOperand(2).getImm(); + } else if (isInsSubReg || isSubRegToReg) { + if (CopyMI->getOperand(2).getSubReg()) { + DOUT << "\tSource of insert_subreg is already coalesced " + << "to another register.\n"; + return false; // Not coalescable. + } + DstReg = CopyMI->getOperand(0).getReg(); + DstSubIdx = CopyMI->getOperand(3).getImm(); + SrcReg = CopyMI->getOperand(2).getReg(); + } else if (!tii_->isMoveInstr(*CopyMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)){ + assert(0 && "Unrecognized copy instruction!"); + return false; + } + + // If they are already joined we continue. + if (SrcReg == DstReg) { + DOUT << "\tCopy already coalesced.\n"; + return false; // Not coalescable. + } + + bool SrcIsPhys = TargetRegisterInfo::isPhysicalRegister(SrcReg); + bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg); + + // If they are both physical registers, we cannot join them. + if (SrcIsPhys && DstIsPhys) { + DOUT << "\tCan not coalesce physregs.\n"; + return false; // Not coalescable. + } + + // We only join virtual registers with allocatable physical registers. + if (SrcIsPhys && !allocatableRegs_[SrcReg]) { + DOUT << "\tSrc reg is unallocatable physreg.\n"; + return false; // Not coalescable. + } + if (DstIsPhys && !allocatableRegs_[DstReg]) { + DOUT << "\tDst reg is unallocatable physreg.\n"; + return false; // Not coalescable. + } + + // Check that a physical source register is compatible with dst regclass + if (SrcIsPhys) { + unsigned SrcSubReg = SrcSubIdx ? + tri_->getSubReg(SrcReg, SrcSubIdx) : SrcReg; + const TargetRegisterClass *DstRC = mri_->getRegClass(DstReg); + const TargetRegisterClass *DstSubRC = DstRC; + if (DstSubIdx) + DstSubRC = DstRC->getSubRegisterRegClass(DstSubIdx); + assert(DstSubRC && "Illegal subregister index"); + if (!DstSubRC->contains(SrcSubReg)) { + DOUT << "\tIncompatible destination regclass: " + << tri_->getName(SrcSubReg) << " not in " << DstSubRC->getName() + << ".\n"; + return false; // Not coalescable. + } + } + + // Check that a physical dst register is compatible with source regclass + if (DstIsPhys) { + unsigned DstSubReg = DstSubIdx ? + tri_->getSubReg(DstReg, DstSubIdx) : DstReg; + const TargetRegisterClass *SrcRC = mri_->getRegClass(SrcReg); + const TargetRegisterClass *SrcSubRC = SrcRC; + if (SrcSubIdx) + SrcSubRC = SrcRC->getSubRegisterRegClass(SrcSubIdx); + assert(SrcSubRC && "Illegal subregister index"); + if (!SrcSubRC->contains(DstReg)) { + DOUT << "\tIncompatible source regclass: " + << tri_->getName(DstSubReg) << " not in " << SrcSubRC->getName() + << ".\n"; + return false; // Not coalescable. + } + } + + // Should be non-null only when coalescing to a sub-register class. + bool CrossRC = false; + const TargetRegisterClass *NewRC = NULL; + MachineBasicBlock *CopyMBB = CopyMI->getParent(); + unsigned RealDstReg = 0; + unsigned RealSrcReg = 0; + if (isExtSubReg || isInsSubReg || isSubRegToReg) { + SubIdx = CopyMI->getOperand(isExtSubReg ? 2 : 3).getImm(); + if (SrcIsPhys && isExtSubReg) { + // r1024 = EXTRACT_SUBREG EAX, 0 then r1024 is really going to be + // coalesced with AX. + unsigned DstSubIdx = CopyMI->getOperand(0).getSubReg(); + if (DstSubIdx) { + // r1024<2> = EXTRACT_SUBREG EAX, 2. Then r1024 has already been + // coalesced to a larger register so the subreg indices cancel out. + if (DstSubIdx != SubIdx) { + DOUT << "\t Sub-register indices mismatch.\n"; + return false; // Not coalescable. + } + } else + SrcReg = tri_->getSubReg(SrcReg, SubIdx); + SubIdx = 0; + } else if (DstIsPhys && (isInsSubReg || isSubRegToReg)) { + // EAX = INSERT_SUBREG EAX, r1024, 0 + unsigned SrcSubIdx = CopyMI->getOperand(2).getSubReg(); + if (SrcSubIdx) { + // EAX = INSERT_SUBREG EAX, r1024<2>, 2 Then r1024 has already been + // coalesced to a larger register so the subreg indices cancel out. + if (SrcSubIdx != SubIdx) { + DOUT << "\t Sub-register indices mismatch.\n"; + return false; // Not coalescable. + } + } else + DstReg = tri_->getSubReg(DstReg, SubIdx); + SubIdx = 0; + } else if ((DstIsPhys && isExtSubReg) || + (SrcIsPhys && (isInsSubReg || isSubRegToReg))) { + if (!isSubRegToReg && CopyMI->getOperand(1).getSubReg()) { + DOUT << "\tSrc of extract_subreg already coalesced with reg" + << " of a super-class.\n"; + return false; // Not coalescable. + } + + if (isExtSubReg) { + if (!CanJoinExtractSubRegToPhysReg(DstReg, SrcReg, SubIdx, RealDstReg)) + return false; // Not coalescable + } else { + if (!CanJoinInsertSubRegToPhysReg(DstReg, SrcReg, SubIdx, RealSrcReg)) + return false; // Not coalescable + } + SubIdx = 0; + } else { + unsigned OldSubIdx = isExtSubReg ? CopyMI->getOperand(0).getSubReg() + : CopyMI->getOperand(2).getSubReg(); + if (OldSubIdx) { + if (OldSubIdx == SubIdx && !differingRegisterClasses(SrcReg, DstReg)) + // r1024<2> = EXTRACT_SUBREG r1025, 2. Then r1024 has already been + // coalesced to a larger register so the subreg indices cancel out. + // Also check if the other larger register is of the same register + // class as the would be resulting register. + SubIdx = 0; + else { + DOUT << "\t Sub-register indices mismatch.\n"; + return false; // Not coalescable. + } + } + if (SubIdx) { + unsigned LargeReg = isExtSubReg ? SrcReg : DstReg; + unsigned SmallReg = isExtSubReg ? DstReg : SrcReg; + unsigned Limit= allocatableRCRegs_[mri_->getRegClass(SmallReg)].count(); + if (!isWinToJoinCrossClass(LargeReg, SmallReg, Limit)) { + Again = true; // May be possible to coalesce later. + return false; + } + } + } + } else if (differingRegisterClasses(SrcReg, DstReg)) { + if (!CrossClassJoin) + return false; + CrossRC = true; + + // FIXME: What if the result of a EXTRACT_SUBREG is then coalesced + // with another? If it's the resulting destination register, then + // the subidx must be propagated to uses (but only those defined + // by the EXTRACT_SUBREG). If it's being coalesced into another + // register, it should be safe because register is assumed to have + // the register class of the super-register. + + // Process moves where one of the registers have a sub-register index. + MachineOperand *DstMO = CopyMI->findRegisterDefOperand(DstReg); + MachineOperand *SrcMO = CopyMI->findRegisterUseOperand(SrcReg); + SubIdx = DstMO->getSubReg(); + if (SubIdx) { + if (SrcMO->getSubReg()) + // FIXME: can we handle this? + return false; + // This is not an insert_subreg but it looks like one. + // e.g. %reg1024:4 = MOV32rr %EAX + isInsSubReg = true; + if (SrcIsPhys) { + if (!CanJoinInsertSubRegToPhysReg(DstReg, SrcReg, SubIdx, RealSrcReg)) + return false; // Not coalescable + SubIdx = 0; + } + } else { + SubIdx = SrcMO->getSubReg(); + if (SubIdx) { + // This is not a extract_subreg but it looks like one. + // e.g. %cl = MOV16rr %reg1024:1 + isExtSubReg = true; + if (DstIsPhys) { + if (!CanJoinExtractSubRegToPhysReg(DstReg, SrcReg, SubIdx,RealDstReg)) + return false; // Not coalescable + SubIdx = 0; + } + } + } + + const TargetRegisterClass *SrcRC= SrcIsPhys ? 0 : mri_->getRegClass(SrcReg); + const TargetRegisterClass *DstRC= DstIsPhys ? 0 : mri_->getRegClass(DstReg); + unsigned LargeReg = SrcReg; + unsigned SmallReg = DstReg; + unsigned Limit = 0; + + // Now determine the register class of the joined register. + if (isExtSubReg) { + if (SubIdx && DstRC && DstRC->isASubClass()) { + // This is a move to a sub-register class. However, the source is a + // sub-register of a larger register class. We don't know what should + // the register class be. FIXME. + Again = true; + return false; + } + Limit = allocatableRCRegs_[DstRC].count(); + } else if (!SrcIsPhys && !DstIsPhys) { + NewRC = getCommonSubClass(SrcRC, DstRC); + if (!NewRC) { + DOUT << "\tDisjoint regclasses: " + << SrcRC->getName() << ", " + << DstRC->getName() << ".\n"; + return false; // Not coalescable. + } + if (DstRC->getSize() > SrcRC->getSize()) + std::swap(LargeReg, SmallReg); + } + + // If we are joining two virtual registers and the resulting register + // class is more restrictive (fewer register, smaller size). Check if it's + // worth doing the merge. + if (!SrcIsPhys && !DstIsPhys && + (isExtSubReg || DstRC->isASubClass()) && + !isWinToJoinCrossClass(LargeReg, SmallReg, + allocatableRCRegs_[NewRC].count())) { + DOUT << "\tSrc/Dest are different register classes.\n"; + // Allow the coalescer to try again in case either side gets coalesced to + // a physical register that's compatible with the other side. e.g. + // r1024 = MOV32to32_ r1025 + // But later r1024 is assigned EAX then r1025 may be coalesced with EAX. + Again = true; // May be possible to coalesce later. + return false; + } + } + + // Will it create illegal extract_subreg / insert_subreg? + if (SrcIsPhys && HasIncompatibleSubRegDefUse(CopyMI, DstReg, SrcReg)) + return false; + if (DstIsPhys && HasIncompatibleSubRegDefUse(CopyMI, SrcReg, DstReg)) + return false; + + LiveInterval &SrcInt = li_->getInterval(SrcReg); + LiveInterval &DstInt = li_->getInterval(DstReg); + assert(SrcInt.reg == SrcReg && DstInt.reg == DstReg && + "Register mapping is horribly broken!"); + + DOUT << "\t\tInspecting "; SrcInt.print(DOUT, tri_); + DOUT << " and "; DstInt.print(DOUT, tri_); + DOUT << ": "; + + // Save a copy of the virtual register live interval. We'll manually + // merge this into the "real" physical register live interval this is + // coalesced with. + LiveInterval *SavedLI = 0; + if (RealDstReg) + SavedLI = li_->dupInterval(&SrcInt); + else if (RealSrcReg) + SavedLI = li_->dupInterval(&DstInt); + + // Check if it is necessary to propagate "isDead" property. + if (!isExtSubReg && !isInsSubReg && !isSubRegToReg) { + MachineOperand *mopd = CopyMI->findRegisterDefOperand(DstReg, false); + bool isDead = mopd->isDead(); + + // We need to be careful about coalescing a source physical register with a + // virtual register. Once the coalescing is done, it cannot be broken and + // these are not spillable! If the destination interval uses are far away, + // think twice about coalescing them! + if (!isDead && (SrcIsPhys || DstIsPhys)) { + // If the copy is in a loop, take care not to coalesce aggressively if the + // src is coming in from outside the loop (or the dst is out of the loop). + // If it's not in a loop, then determine whether to join them base purely + // by the length of the interval. + if (PhysJoinTweak) { + if (SrcIsPhys) { + if (!isWinToJoinVRWithSrcPhysReg(CopyMI, CopyMBB, DstInt, SrcInt)) { + DstInt.preference = SrcReg; + ++numAborts; + DOUT << "\tMay tie down a physical register, abort!\n"; + Again = true; // May be possible to coalesce later. + return false; + } + } else { + if (!isWinToJoinVRWithDstPhysReg(CopyMI, CopyMBB, DstInt, SrcInt)) { + SrcInt.preference = DstReg; + ++numAborts; + DOUT << "\tMay tie down a physical register, abort!\n"; + Again = true; // May be possible to coalesce later. + return false; + } + } + } else { + // If the virtual register live interval is long but it has low use desity, + // do not join them, instead mark the physical register as its allocation + // preference. + LiveInterval &JoinVInt = SrcIsPhys ? DstInt : SrcInt; + unsigned JoinVReg = SrcIsPhys ? DstReg : SrcReg; + unsigned JoinPReg = SrcIsPhys ? SrcReg : DstReg; + const TargetRegisterClass *RC = mri_->getRegClass(JoinVReg); + unsigned Threshold = allocatableRCRegs_[RC].count() * 2; + if (TheCopy.isBackEdge) + Threshold *= 2; // Favors back edge copies. + + unsigned Length = li_->getApproximateInstructionCount(JoinVInt); + float Ratio = 1.0 / Threshold; + if (Length > Threshold && + (((float)std::distance(mri_->use_begin(JoinVReg), + mri_->use_end()) / Length) < Ratio)) { + JoinVInt.preference = JoinPReg; + ++numAborts; + DOUT << "\tMay tie down a physical register, abort!\n"; + Again = true; // May be possible to coalesce later. + return false; + } + } + } + } + + // Okay, attempt to join these two intervals. On failure, this returns false. + // Otherwise, if one of the intervals being joined is a physreg, this method + // always canonicalizes DstInt to be it. The output "SrcInt" will not have + // been modified, so we can use this information below to update aliases. + bool Swapped = false; + // If SrcInt is implicitly defined, it's safe to coalesce. + bool isEmpty = SrcInt.empty(); + if (isEmpty && !CanCoalesceWithImpDef(CopyMI, DstInt, SrcInt)) { + // Only coalesce an empty interval (defined by implicit_def) with + // another interval which has a valno defined by the CopyMI and the CopyMI + // is a kill of the implicit def. + DOUT << "Not profitable!\n"; + return false; + } + + if (!isEmpty && !JoinIntervals(DstInt, SrcInt, Swapped)) { + // Coalescing failed. + + // If definition of source is defined by trivial computation, try + // rematerializing it. + if (!isExtSubReg && !isInsSubReg && !isSubRegToReg && + ReMaterializeTrivialDef(SrcInt, DstInt.reg, CopyMI)) + return true; + + // If we can eliminate the copy without merging the live ranges, do so now. + if (!isExtSubReg && !isInsSubReg && !isSubRegToReg && + (AdjustCopiesBackFrom(SrcInt, DstInt, CopyMI) || + RemoveCopyByCommutingDef(SrcInt, DstInt, CopyMI))) { + JoinedCopies.insert(CopyMI); + return true; + } + + // Otherwise, we are unable to join the intervals. + DOUT << "Interference!\n"; + Again = true; // May be possible to coalesce later. + return false; + } + + LiveInterval *ResSrcInt = &SrcInt; + LiveInterval *ResDstInt = &DstInt; + if (Swapped) { + std::swap(SrcReg, DstReg); + std::swap(ResSrcInt, ResDstInt); + } + assert(TargetRegisterInfo::isVirtualRegister(SrcReg) && + "LiveInterval::join didn't work right!"); + + // If we're about to merge live ranges into a physical register live interval, + // we have to update any aliased register's live ranges to indicate that they + // have clobbered values for this range. + if (TargetRegisterInfo::isPhysicalRegister(DstReg)) { + // If this is a extract_subreg where dst is a physical register, e.g. + // cl = EXTRACT_SUBREG reg1024, 1 + // then create and update the actual physical register allocated to RHS. + if (RealDstReg || RealSrcReg) { + LiveInterval &RealInt = + li_->getOrCreateInterval(RealDstReg ? RealDstReg : RealSrcReg); + for (LiveInterval::const_vni_iterator I = SavedLI->vni_begin(), + E = SavedLI->vni_end(); I != E; ++I) { + const VNInfo *ValNo = *I; + VNInfo *NewValNo = RealInt.getNextValue(ValNo->def, ValNo->copy, + li_->getVNInfoAllocator()); + NewValNo->hasPHIKill = ValNo->hasPHIKill; + NewValNo->redefByEC = ValNo->redefByEC; + RealInt.addKills(NewValNo, ValNo->kills); + RealInt.MergeValueInAsValue(*SavedLI, ValNo, NewValNo); + } + RealInt.weight += SavedLI->weight; + DstReg = RealDstReg ? RealDstReg : RealSrcReg; + } + + // Update the liveintervals of sub-registers. + for (const unsigned *AS = tri_->getSubRegisters(DstReg); *AS; ++AS) + li_->getOrCreateInterval(*AS).MergeInClobberRanges(*ResSrcInt, + li_->getVNInfoAllocator()); + } + + // If this is a EXTRACT_SUBREG, make sure the result of coalescing is the + // larger super-register. + if ((isExtSubReg || isInsSubReg || isSubRegToReg) && + !SrcIsPhys && !DstIsPhys) { + if ((isExtSubReg && !Swapped) || + ((isInsSubReg || isSubRegToReg) && Swapped)) { + ResSrcInt->Copy(*ResDstInt, li_->getVNInfoAllocator()); + std::swap(SrcReg, DstReg); + std::swap(ResSrcInt, ResDstInt); + } + } + + // Coalescing to a virtual register that is of a sub-register class of the + // other. Make sure the resulting register is set to the right register class. + if (CrossRC) { + ++numCrossRCs; + if (NewRC) + mri_->setRegClass(DstReg, NewRC); + } + + if (NewHeuristic) { + // Add all copies that define val# in the source interval into the queue. + for (LiveInterval::const_vni_iterator i = ResSrcInt->vni_begin(), + e = ResSrcInt->vni_end(); i != e; ++i) { + const VNInfo *vni = *i; + if (!vni->def || vni->def == ~1U || vni->def == ~0U) + continue; + MachineInstr *CopyMI = li_->getInstructionFromIndex(vni->def); + unsigned NewSrcReg, NewDstReg, NewSrcSubIdx, NewDstSubIdx; + if (CopyMI && + JoinedCopies.count(CopyMI) == 0 && + tii_->isMoveInstr(*CopyMI, NewSrcReg, NewDstReg, + NewSrcSubIdx, NewDstSubIdx)) { + unsigned LoopDepth = loopInfo->getLoopDepth(CopyMBB); + JoinQueue->push(CopyRec(CopyMI, LoopDepth, + isBackEdgeCopy(CopyMI, DstReg))); + } + } + } + + // Remember to delete the copy instruction. + JoinedCopies.insert(CopyMI); + + // Some live range has been lengthened due to colaescing, eliminate the + // unnecessary kills. + RemoveUnnecessaryKills(SrcReg, *ResDstInt); + if (TargetRegisterInfo::isVirtualRegister(DstReg)) + RemoveUnnecessaryKills(DstReg, *ResDstInt); + + if (isInsSubReg) + // Avoid: + // r1024 = op + // r1024 = implicit_def + // ... + // = r1024 + RemoveDeadImpDef(DstReg, *ResDstInt); + UpdateRegDefsUses(SrcReg, DstReg, SubIdx); + + // SrcReg is guarateed to be the register whose live interval that is + // being merged. + li_->removeInterval(SrcReg); + + // Manually deleted the live interval copy. + if (SavedLI) { + SavedLI->clear(); + delete SavedLI; + } + + if (isEmpty) { + // Now the copy is being coalesced away, the val# previously defined + // by the copy is being defined by an IMPLICIT_DEF which defines a zero + // length interval. Remove the val#. + unsigned CopyIdx = li_->getDefIndex(li_->getInstructionIndex(CopyMI)); + const LiveRange *LR = ResDstInt->getLiveRangeContaining(CopyIdx); + VNInfo *ImpVal = LR->valno; + assert(ImpVal->def == CopyIdx); + unsigned NextDef = LR->end; + RemoveCopiesFromValNo(*ResDstInt, ImpVal); + ResDstInt->removeValNo(ImpVal); + LR = ResDstInt->FindLiveRangeContaining(NextDef); + if (LR != ResDstInt->end() && LR->valno->def == NextDef) { + // Special case: vr1024 = implicit_def + // vr1024 = insert_subreg vr1024, vr1025, c + // The insert_subreg becomes a "copy" that defines a val# which can itself + // be coalesced away. + MachineInstr *DefMI = li_->getInstructionFromIndex(NextDef); + if (DefMI->getOpcode() == TargetInstrInfo::INSERT_SUBREG) + LR->valno->copy = DefMI; + } + } + + // If resulting interval has a preference that no longer fits because of subreg + // coalescing, just clear the preference. + if (ResDstInt->preference && (isExtSubReg || isInsSubReg || isSubRegToReg) && + TargetRegisterInfo::isVirtualRegister(ResDstInt->reg)) { + const TargetRegisterClass *RC = mri_->getRegClass(ResDstInt->reg); + if (!RC->contains(ResDstInt->preference)) + ResDstInt->preference = 0; + } + + DOUT << "\n\t\tJoined. Result = "; ResDstInt->print(DOUT, tri_); + DOUT << "\n"; + + ++numJoins; + return true; +} + +/// ComputeUltimateVN - Assuming we are going to join two live intervals, +/// compute what the resultant value numbers for each value in the input two +/// ranges will be. This is complicated by copies between the two which can +/// and will commonly cause multiple value numbers to be merged into one. +/// +/// VN is the value number that we're trying to resolve. InstDefiningValue +/// keeps track of the new InstDefiningValue assignment for the result +/// LiveInterval. ThisFromOther/OtherFromThis are sets that keep track of +/// whether a value in this or other is a copy from the opposite set. +/// ThisValNoAssignments/OtherValNoAssignments keep track of value #'s that have +/// already been assigned. +/// +/// ThisFromOther[x] - If x is defined as a copy from the other interval, this +/// contains the value number the copy is from. +/// +static unsigned ComputeUltimateVN(VNInfo *VNI, + SmallVector &NewVNInfo, + DenseMap &ThisFromOther, + DenseMap &OtherFromThis, + SmallVector &ThisValNoAssignments, + SmallVector &OtherValNoAssignments) { + unsigned VN = VNI->id; + + // If the VN has already been computed, just return it. + if (ThisValNoAssignments[VN] >= 0) + return ThisValNoAssignments[VN]; +// assert(ThisValNoAssignments[VN] != -2 && "Cyclic case?"); + + // If this val is not a copy from the other val, then it must be a new value + // number in the destination. + DenseMap::iterator I = ThisFromOther.find(VNI); + if (I == ThisFromOther.end()) { + NewVNInfo.push_back(VNI); + return ThisValNoAssignments[VN] = NewVNInfo.size()-1; + } + VNInfo *OtherValNo = I->second; + + // Otherwise, this *is* a copy from the RHS. If the other side has already + // been computed, return it. + if (OtherValNoAssignments[OtherValNo->id] >= 0) + return ThisValNoAssignments[VN] = OtherValNoAssignments[OtherValNo->id]; + + // Mark this value number as currently being computed, then ask what the + // ultimate value # of the other value is. + ThisValNoAssignments[VN] = -2; + unsigned UltimateVN = + ComputeUltimateVN(OtherValNo, NewVNInfo, OtherFromThis, ThisFromOther, + OtherValNoAssignments, ThisValNoAssignments); + return ThisValNoAssignments[VN] = UltimateVN; +} + +static bool InVector(VNInfo *Val, const SmallVector &V) { + return std::find(V.begin(), V.end(), Val) != V.end(); +} + +/// RangeIsDefinedByCopyFromReg - Return true if the specified live range of +/// the specified live interval is defined by a copy from the specified +/// register. +bool SimpleRegisterCoalescing::RangeIsDefinedByCopyFromReg(LiveInterval &li, + LiveRange *LR, + unsigned Reg) { + unsigned SrcReg = li_->getVNInfoSourceReg(LR->valno); + if (SrcReg == Reg) + return true; + if (LR->valno->def == ~0U && + TargetRegisterInfo::isPhysicalRegister(li.reg) && + *tri_->getSuperRegisters(li.reg)) { + // It's a sub-register live interval, we may not have precise information. + // Re-compute it. + MachineInstr *DefMI = li_->getInstructionFromIndex(LR->start); + unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; + if (DefMI && + tii_->isMoveInstr(*DefMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) && + DstReg == li.reg && SrcReg == Reg) { + // Cache computed info. + LR->valno->def = LR->start; + LR->valno->copy = DefMI; + return true; + } + } + return false; +} + +/// SimpleJoin - Attempt to joint the specified interval into this one. The +/// caller of this method must guarantee that the RHS only contains a single +/// value number and that the RHS is not defined by a copy from this +/// interval. This returns false if the intervals are not joinable, or it +/// joins them and returns true. +bool SimpleRegisterCoalescing::SimpleJoin(LiveInterval &LHS, LiveInterval &RHS){ + assert(RHS.containsOneValue()); + + // Some number (potentially more than one) value numbers in the current + // interval may be defined as copies from the RHS. Scan the overlapping + // portions of the LHS and RHS, keeping track of this and looking for + // overlapping live ranges that are NOT defined as copies. If these exist, we + // cannot coalesce. + + LiveInterval::iterator LHSIt = LHS.begin(), LHSEnd = LHS.end(); + LiveInterval::iterator RHSIt = RHS.begin(), RHSEnd = RHS.end(); + + if (LHSIt->start < RHSIt->start) { + LHSIt = std::upper_bound(LHSIt, LHSEnd, RHSIt->start); + if (LHSIt != LHS.begin()) --LHSIt; + } else if (RHSIt->start < LHSIt->start) { + RHSIt = std::upper_bound(RHSIt, RHSEnd, LHSIt->start); + if (RHSIt != RHS.begin()) --RHSIt; + } + + SmallVector EliminatedLHSVals; + + while (1) { + // Determine if these live intervals overlap. + bool Overlaps = false; + if (LHSIt->start <= RHSIt->start) + Overlaps = LHSIt->end > RHSIt->start; + else + Overlaps = RHSIt->end > LHSIt->start; + + // If the live intervals overlap, there are two interesting cases: if the + // LHS interval is defined by a copy from the RHS, it's ok and we record + // that the LHS value # is the same as the RHS. If it's not, then we cannot + // coalesce these live ranges and we bail out. + if (Overlaps) { + // If we haven't already recorded that this value # is safe, check it. + if (!InVector(LHSIt->valno, EliminatedLHSVals)) { + // Copy from the RHS? + if (!RangeIsDefinedByCopyFromReg(LHS, LHSIt, RHS.reg)) + return false; // Nope, bail out. + + if (LHSIt->contains(RHSIt->valno->def)) + // Here is an interesting situation: + // BB1: + // vr1025 = copy vr1024 + // .. + // BB2: + // vr1024 = op + // = vr1025 + // Even though vr1025 is copied from vr1024, it's not safe to + // coalesce them since the live range of vr1025 intersects the + // def of vr1024. This happens because vr1025 is assigned the + // value of the previous iteration of vr1024. + return false; + EliminatedLHSVals.push_back(LHSIt->valno); + } + + // We know this entire LHS live range is okay, so skip it now. + if (++LHSIt == LHSEnd) break; + continue; + } + + if (LHSIt->end < RHSIt->end) { + if (++LHSIt == LHSEnd) break; + } else { + // One interesting case to check here. It's possible that we have + // something like "X3 = Y" which defines a new value number in the LHS, + // and is the last use of this liverange of the RHS. In this case, we + // want to notice this copy (so that it gets coalesced away) even though + // the live ranges don't actually overlap. + if (LHSIt->start == RHSIt->end) { + if (InVector(LHSIt->valno, EliminatedLHSVals)) { + // We already know that this value number is going to be merged in + // if coalescing succeeds. Just skip the liverange. + if (++LHSIt == LHSEnd) break; + } else { + // Otherwise, if this is a copy from the RHS, mark it as being merged + // in. + if (RangeIsDefinedByCopyFromReg(LHS, LHSIt, RHS.reg)) { + if (LHSIt->contains(RHSIt->valno->def)) + // Here is an interesting situation: + // BB1: + // vr1025 = copy vr1024 + // .. + // BB2: + // vr1024 = op + // = vr1025 + // Even though vr1025 is copied from vr1024, it's not safe to + // coalesced them since live range of vr1025 intersects the + // def of vr1024. This happens because vr1025 is assigned the + // value of the previous iteration of vr1024. + return false; + EliminatedLHSVals.push_back(LHSIt->valno); + + // We know this entire LHS live range is okay, so skip it now. + if (++LHSIt == LHSEnd) break; + } + } + } + + if (++RHSIt == RHSEnd) break; + } + } + + // If we got here, we know that the coalescing will be successful and that + // the value numbers in EliminatedLHSVals will all be merged together. Since + // the most common case is that EliminatedLHSVals has a single number, we + // optimize for it: if there is more than one value, we merge them all into + // the lowest numbered one, then handle the interval as if we were merging + // with one value number. + VNInfo *LHSValNo = NULL; + if (EliminatedLHSVals.size() > 1) { + // Loop through all the equal value numbers merging them into the smallest + // one. + VNInfo *Smallest = EliminatedLHSVals[0]; + for (unsigned i = 1, e = EliminatedLHSVals.size(); i != e; ++i) { + if (EliminatedLHSVals[i]->id < Smallest->id) { + // Merge the current notion of the smallest into the smaller one. + LHS.MergeValueNumberInto(Smallest, EliminatedLHSVals[i]); + Smallest = EliminatedLHSVals[i]; + } else { + // Merge into the smallest. + LHS.MergeValueNumberInto(EliminatedLHSVals[i], Smallest); + } + } + LHSValNo = Smallest; + } else if (EliminatedLHSVals.empty()) { + if (TargetRegisterInfo::isPhysicalRegister(LHS.reg) && + *tri_->getSuperRegisters(LHS.reg)) + // Imprecise sub-register information. Can't handle it. + return false; + assert(0 && "No copies from the RHS?"); + } else { + LHSValNo = EliminatedLHSVals[0]; + } + + // Okay, now that there is a single LHS value number that we're merging the + // RHS into, update the value number info for the LHS to indicate that the + // value number is defined where the RHS value number was. + const VNInfo *VNI = RHS.getValNumInfo(0); + LHSValNo->def = VNI->def; + LHSValNo->copy = VNI->copy; + + // Okay, the final step is to loop over the RHS live intervals, adding them to + // the LHS. + LHSValNo->hasPHIKill |= VNI->hasPHIKill; + LHS.addKills(LHSValNo, VNI->kills); + LHS.MergeRangesInAsValue(RHS, LHSValNo); + LHS.weight += RHS.weight; + if (RHS.preference && !LHS.preference) + LHS.preference = RHS.preference; + + // Update the liveintervals of sub-registers. + if (TargetRegisterInfo::isPhysicalRegister(LHS.reg)) + for (const unsigned *AS = tri_->getSubRegisters(LHS.reg); *AS; ++AS) + li_->getOrCreateInterval(*AS).MergeInClobberRanges(LHS, + li_->getVNInfoAllocator()); + + return true; +} + +/// JoinIntervals - Attempt to join these two intervals. On failure, this +/// returns false. Otherwise, if one of the intervals being joined is a +/// physreg, this method always canonicalizes LHS to be it. The output +/// "RHS" will not have been modified, so we can use this information +/// below to update aliases. +bool +SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS, + bool &Swapped) { + // Compute the final value assignment, assuming that the live ranges can be + // coalesced. + SmallVector LHSValNoAssignments; + SmallVector RHSValNoAssignments; + DenseMap LHSValsDefinedFromRHS; + DenseMap RHSValsDefinedFromLHS; + SmallVector NewVNInfo; + + // If a live interval is a physical register, conservatively check if any + // of its sub-registers is overlapping the live interval of the virtual + // register. If so, do not coalesce. + if (TargetRegisterInfo::isPhysicalRegister(LHS.reg) && + *tri_->getSubRegisters(LHS.reg)) { + // If it's coalescing a virtual register to a physical register, estimate + // its live interval length. This is the *cost* of scanning an entire live + // interval. If the cost is low, we'll do an exhaustive check instead. + + // If this is something like this: + // BB1: + // v1024 = op + // ... + // BB2: + // ... + // RAX = v1024 + // + // That is, the live interval of v1024 crosses a bb. Then we can't rely on + // less conservative check. It's possible a sub-register is defined before + // v1024 (or live in) and live out of BB1. + if (RHS.containsOneValue() && + li_->intervalIsInOneMBB(RHS) && + li_->getApproximateInstructionCount(RHS) <= 10) { + // Perform a more exhaustive check for some common cases. + if (li_->conflictsWithPhysRegRef(RHS, LHS.reg, true, JoinedCopies)) + return false; + } else { + for (const unsigned* SR = tri_->getSubRegisters(LHS.reg); *SR; ++SR) + if (li_->hasInterval(*SR) && RHS.overlaps(li_->getInterval(*SR))) { + DOUT << "Interfere with sub-register "; + DEBUG(li_->getInterval(*SR).print(DOUT, tri_)); + return false; + } + } + } else if (TargetRegisterInfo::isPhysicalRegister(RHS.reg) && + *tri_->getSubRegisters(RHS.reg)) { + if (LHS.containsOneValue() && + li_->getApproximateInstructionCount(LHS) <= 10) { + // Perform a more exhaustive check for some common cases. + if (li_->conflictsWithPhysRegRef(LHS, RHS.reg, false, JoinedCopies)) + return false; + } else { + for (const unsigned* SR = tri_->getSubRegisters(RHS.reg); *SR; ++SR) + if (li_->hasInterval(*SR) && LHS.overlaps(li_->getInterval(*SR))) { + DOUT << "Interfere with sub-register "; + DEBUG(li_->getInterval(*SR).print(DOUT, tri_)); + return false; + } + } + } + + // Compute ultimate value numbers for the LHS and RHS values. + if (RHS.containsOneValue()) { + // Copies from a liveinterval with a single value are simple to handle and + // very common, handle the special case here. This is important, because + // often RHS is small and LHS is large (e.g. a physreg). + + // Find out if the RHS is defined as a copy from some value in the LHS. + int RHSVal0DefinedFromLHS = -1; + int RHSValID = -1; + VNInfo *RHSValNoInfo = NULL; + VNInfo *RHSValNoInfo0 = RHS.getValNumInfo(0); + unsigned RHSSrcReg = li_->getVNInfoSourceReg(RHSValNoInfo0); + if (RHSSrcReg == 0 || RHSSrcReg != LHS.reg) { + // If RHS is not defined as a copy from the LHS, we can use simpler and + // faster checks to see if the live ranges are coalescable. This joiner + // can't swap the LHS/RHS intervals though. + if (!TargetRegisterInfo::isPhysicalRegister(RHS.reg)) { + return SimpleJoin(LHS, RHS); + } else { + RHSValNoInfo = RHSValNoInfo0; + } + } else { + // It was defined as a copy from the LHS, find out what value # it is. + RHSValNoInfo = LHS.getLiveRangeContaining(RHSValNoInfo0->def-1)->valno; + RHSValID = RHSValNoInfo->id; + RHSVal0DefinedFromLHS = RHSValID; + } + + LHSValNoAssignments.resize(LHS.getNumValNums(), -1); + RHSValNoAssignments.resize(RHS.getNumValNums(), -1); + NewVNInfo.resize(LHS.getNumValNums(), NULL); + + // Okay, *all* of the values in LHS that are defined as a copy from RHS + // should now get updated. + for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end(); + i != e; ++i) { + VNInfo *VNI = *i; + unsigned VN = VNI->id; + if (unsigned LHSSrcReg = li_->getVNInfoSourceReg(VNI)) { + if (LHSSrcReg != RHS.reg) { + // If this is not a copy from the RHS, its value number will be + // unmodified by the coalescing. + NewVNInfo[VN] = VNI; + LHSValNoAssignments[VN] = VN; + } else if (RHSValID == -1) { + // Otherwise, it is a copy from the RHS, and we don't already have a + // value# for it. Keep the current value number, but remember it. + LHSValNoAssignments[VN] = RHSValID = VN; + NewVNInfo[VN] = RHSValNoInfo; + LHSValsDefinedFromRHS[VNI] = RHSValNoInfo0; + } else { + // Otherwise, use the specified value #. + LHSValNoAssignments[VN] = RHSValID; + if (VN == (unsigned)RHSValID) { // Else this val# is dead. + NewVNInfo[VN] = RHSValNoInfo; + LHSValsDefinedFromRHS[VNI] = RHSValNoInfo0; + } + } + } else { + NewVNInfo[VN] = VNI; + LHSValNoAssignments[VN] = VN; + } + } + + assert(RHSValID != -1 && "Didn't find value #?"); + RHSValNoAssignments[0] = RHSValID; + if (RHSVal0DefinedFromLHS != -1) { + // This path doesn't go through ComputeUltimateVN so just set + // it to anything. + RHSValsDefinedFromLHS[RHSValNoInfo0] = (VNInfo*)1; + } + } else { + // Loop over the value numbers of the LHS, seeing if any are defined from + // the RHS. + for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end(); + i != e; ++i) { + VNInfo *VNI = *i; + if (VNI->def == ~1U || VNI->copy == 0) // Src not defined by a copy? + continue; + + // DstReg is known to be a register in the LHS interval. If the src is + // from the RHS interval, we can use its value #. + if (li_->getVNInfoSourceReg(VNI) != RHS.reg) + continue; + + // Figure out the value # from the RHS. + LHSValsDefinedFromRHS[VNI]=RHS.getLiveRangeContaining(VNI->def-1)->valno; + } + + // Loop over the value numbers of the RHS, seeing if any are defined from + // the LHS. + for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end(); + i != e; ++i) { + VNInfo *VNI = *i; + if (VNI->def == ~1U || VNI->copy == 0) // Src not defined by a copy? + continue; + + // DstReg is known to be a register in the RHS interval. If the src is + // from the LHS interval, we can use its value #. + if (li_->getVNInfoSourceReg(VNI) != LHS.reg) + continue; + + // Figure out the value # from the LHS. + RHSValsDefinedFromLHS[VNI]=LHS.getLiveRangeContaining(VNI->def-1)->valno; + } + + LHSValNoAssignments.resize(LHS.getNumValNums(), -1); + RHSValNoAssignments.resize(RHS.getNumValNums(), -1); + NewVNInfo.reserve(LHS.getNumValNums() + RHS.getNumValNums()); + + for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end(); + i != e; ++i) { + VNInfo *VNI = *i; + unsigned VN = VNI->id; + if (LHSValNoAssignments[VN] >= 0 || VNI->def == ~1U) + continue; + ComputeUltimateVN(VNI, NewVNInfo, + LHSValsDefinedFromRHS, RHSValsDefinedFromLHS, + LHSValNoAssignments, RHSValNoAssignments); + } + for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end(); + i != e; ++i) { + VNInfo *VNI = *i; + unsigned VN = VNI->id; + if (RHSValNoAssignments[VN] >= 0 || VNI->def == ~1U) + continue; + // If this value number isn't a copy from the LHS, it's a new number. + if (RHSValsDefinedFromLHS.find(VNI) == RHSValsDefinedFromLHS.end()) { + NewVNInfo.push_back(VNI); + RHSValNoAssignments[VN] = NewVNInfo.size()-1; + continue; + } + + ComputeUltimateVN(VNI, NewVNInfo, + RHSValsDefinedFromLHS, LHSValsDefinedFromRHS, + RHSValNoAssignments, LHSValNoAssignments); + } + } + + // Armed with the mappings of LHS/RHS values to ultimate values, walk the + // interval lists to see if these intervals are coalescable. + LiveInterval::const_iterator I = LHS.begin(); + LiveInterval::const_iterator IE = LHS.end(); + LiveInterval::const_iterator J = RHS.begin(); + LiveInterval::const_iterator JE = RHS.end(); + + // Skip ahead until the first place of potential sharing. + if (I->start < J->start) { + I = std::upper_bound(I, IE, J->start); + if (I != LHS.begin()) --I; + } else if (J->start < I->start) { + J = std::upper_bound(J, JE, I->start); + if (J != RHS.begin()) --J; + } + + while (1) { + // Determine if these two live ranges overlap. + bool Overlaps; + if (I->start < J->start) { + Overlaps = I->end > J->start; + } else { + Overlaps = J->end > I->start; + } + + // If so, check value # info to determine if they are really different. + if (Overlaps) { + // If the live range overlap will map to the same value number in the + // result liverange, we can still coalesce them. If not, we can't. + if (LHSValNoAssignments[I->valno->id] != + RHSValNoAssignments[J->valno->id]) + return false; + } + + if (I->end < J->end) { + ++I; + if (I == IE) break; + } else { + ++J; + if (J == JE) break; + } + } + + // Update kill info. Some live ranges are extended due to copy coalescing. + for (DenseMap::iterator I = LHSValsDefinedFromRHS.begin(), + E = LHSValsDefinedFromRHS.end(); I != E; ++I) { + VNInfo *VNI = I->first; + unsigned LHSValID = LHSValNoAssignments[VNI->id]; + LiveInterval::removeKill(NewVNInfo[LHSValID], VNI->def); + NewVNInfo[LHSValID]->hasPHIKill |= VNI->hasPHIKill; + RHS.addKills(NewVNInfo[LHSValID], VNI->kills); + } + + // Update kill info. Some live ranges are extended due to copy coalescing. + for (DenseMap::iterator I = RHSValsDefinedFromLHS.begin(), + E = RHSValsDefinedFromLHS.end(); I != E; ++I) { + VNInfo *VNI = I->first; + unsigned RHSValID = RHSValNoAssignments[VNI->id]; + LiveInterval::removeKill(NewVNInfo[RHSValID], VNI->def); + NewVNInfo[RHSValID]->hasPHIKill |= VNI->hasPHIKill; + LHS.addKills(NewVNInfo[RHSValID], VNI->kills); + } + + // If we get here, we know that we can coalesce the live ranges. Ask the + // intervals to coalesce themselves now. + if ((RHS.ranges.size() > LHS.ranges.size() && + TargetRegisterInfo::isVirtualRegister(LHS.reg)) || + TargetRegisterInfo::isPhysicalRegister(RHS.reg)) { + RHS.join(LHS, &RHSValNoAssignments[0], &LHSValNoAssignments[0], NewVNInfo); + Swapped = true; + } else { + LHS.join(RHS, &LHSValNoAssignments[0], &RHSValNoAssignments[0], NewVNInfo); + Swapped = false; + } + return true; +} + +namespace { + // DepthMBBCompare - Comparison predicate that sort first based on the loop + // depth of the basic block (the unsigned), and then on the MBB number. + struct DepthMBBCompare { + typedef std::pair DepthMBBPair; + bool operator()(const DepthMBBPair &LHS, const DepthMBBPair &RHS) const { + if (LHS.first > RHS.first) return true; // Deeper loops first + return LHS.first == RHS.first && + LHS.second->getNumber() < RHS.second->getNumber(); + } + }; +} + +/// getRepIntervalSize - Returns the size of the interval that represents the +/// specified register. +template +unsigned JoinPriorityQueue::getRepIntervalSize(unsigned Reg) { + return Rc->getRepIntervalSize(Reg); +} + +/// CopyRecSort::operator - Join priority queue sorting function. +/// +bool CopyRecSort::operator()(CopyRec left, CopyRec right) const { + // Inner loops first. + if (left.LoopDepth > right.LoopDepth) + return false; + else if (left.LoopDepth == right.LoopDepth) + if (left.isBackEdge && !right.isBackEdge) + return false; + return true; +} + +void SimpleRegisterCoalescing::CopyCoalesceInMBB(MachineBasicBlock *MBB, + std::vector &TryAgain) { + DOUT << ((Value*)MBB->getBasicBlock())->getName() << ":\n"; + + std::vector VirtCopies; + std::vector PhysCopies; + std::vector ImpDefCopies; + unsigned LoopDepth = loopInfo->getLoopDepth(MBB); + for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end(); + MII != E;) { + MachineInstr *Inst = MII++; + + // If this isn't a copy nor a extract_subreg, we can't join intervals. + unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; + if (Inst->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG) { + DstReg = Inst->getOperand(0).getReg(); + SrcReg = Inst->getOperand(1).getReg(); + } else if (Inst->getOpcode() == TargetInstrInfo::INSERT_SUBREG || + Inst->getOpcode() == TargetInstrInfo::SUBREG_TO_REG) { + DstReg = Inst->getOperand(0).getReg(); + SrcReg = Inst->getOperand(2).getReg(); + } else if (!tii_->isMoveInstr(*Inst, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) + continue; + + bool SrcIsPhys = TargetRegisterInfo::isPhysicalRegister(SrcReg); + bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg); + if (NewHeuristic) { + JoinQueue->push(CopyRec(Inst, LoopDepth, isBackEdgeCopy(Inst, DstReg))); + } else { + if (li_->hasInterval(SrcReg) && li_->getInterval(SrcReg).empty()) + ImpDefCopies.push_back(CopyRec(Inst, 0, false)); + else if (SrcIsPhys || DstIsPhys) + PhysCopies.push_back(CopyRec(Inst, 0, false)); + else + VirtCopies.push_back(CopyRec(Inst, 0, false)); + } + } + + if (NewHeuristic) + return; + + // Try coalescing implicit copies first, followed by copies to / from + // physical registers, then finally copies from virtual registers to + // virtual registers. + for (unsigned i = 0, e = ImpDefCopies.size(); i != e; ++i) { + CopyRec &TheCopy = ImpDefCopies[i]; + bool Again = false; + if (!JoinCopy(TheCopy, Again)) + if (Again) + TryAgain.push_back(TheCopy); + } + for (unsigned i = 0, e = PhysCopies.size(); i != e; ++i) { + CopyRec &TheCopy = PhysCopies[i]; + bool Again = false; + if (!JoinCopy(TheCopy, Again)) + if (Again) + TryAgain.push_back(TheCopy); + } + for (unsigned i = 0, e = VirtCopies.size(); i != e; ++i) { + CopyRec &TheCopy = VirtCopies[i]; + bool Again = false; + if (!JoinCopy(TheCopy, Again)) + if (Again) + TryAgain.push_back(TheCopy); + } +} + +void SimpleRegisterCoalescing::joinIntervals() { + DOUT << "********** JOINING INTERVALS ***********\n"; + + if (NewHeuristic) + JoinQueue = new JoinPriorityQueue(this); + + std::vector TryAgainList; + if (loopInfo->empty()) { + // If there are no loops in the function, join intervals in function order. + for (MachineFunction::iterator I = mf_->begin(), E = mf_->end(); + I != E; ++I) + CopyCoalesceInMBB(I, TryAgainList); + } else { + // Otherwise, join intervals in inner loops before other intervals. + // Unfortunately we can't just iterate over loop hierarchy here because + // there may be more MBB's than BB's. Collect MBB's for sorting. + + // Join intervals in the function prolog first. We want to join physical + // registers with virtual registers before the intervals got too long. + std::vector > MBBs; + for (MachineFunction::iterator I = mf_->begin(), E = mf_->end();I != E;++I){ + MachineBasicBlock *MBB = I; + MBBs.push_back(std::make_pair(loopInfo->getLoopDepth(MBB), I)); + } + + // Sort by loop depth. + std::sort(MBBs.begin(), MBBs.end(), DepthMBBCompare()); + + // Finally, join intervals in loop nest order. + for (unsigned i = 0, e = MBBs.size(); i != e; ++i) + CopyCoalesceInMBB(MBBs[i].second, TryAgainList); + } + + // Joining intervals can allow other intervals to be joined. Iteratively join + // until we make no progress. + if (NewHeuristic) { + SmallVector TryAgain; + bool ProgressMade = true; + while (ProgressMade) { + ProgressMade = false; + while (!JoinQueue->empty()) { + CopyRec R = JoinQueue->pop(); + bool Again = false; + bool Success = JoinCopy(R, Again); + if (Success) + ProgressMade = true; + else if (Again) + TryAgain.push_back(R); + } + + if (ProgressMade) { + while (!TryAgain.empty()) { + JoinQueue->push(TryAgain.back()); + TryAgain.pop_back(); + } + } + } + } else { + bool ProgressMade = true; + while (ProgressMade) { + ProgressMade = false; + + for (unsigned i = 0, e = TryAgainList.size(); i != e; ++i) { + CopyRec &TheCopy = TryAgainList[i]; + if (TheCopy.MI) { + bool Again = false; + bool Success = JoinCopy(TheCopy, Again); + if (Success || !Again) { + TheCopy.MI = 0; // Mark this one as done. + ProgressMade = true; + } + } + } + } + } + + if (NewHeuristic) + delete JoinQueue; +} + +/// Return true if the two specified registers belong to different register +/// classes. The registers may be either phys or virt regs. +bool +SimpleRegisterCoalescing::differingRegisterClasses(unsigned RegA, + unsigned RegB) const { + // Get the register classes for the first reg. + if (TargetRegisterInfo::isPhysicalRegister(RegA)) { + assert(TargetRegisterInfo::isVirtualRegister(RegB) && + "Shouldn't consider two physregs!"); + return !mri_->getRegClass(RegB)->contains(RegA); + } + + // Compare against the regclass for the second reg. + const TargetRegisterClass *RegClassA = mri_->getRegClass(RegA); + if (TargetRegisterInfo::isVirtualRegister(RegB)) { + const TargetRegisterClass *RegClassB = mri_->getRegClass(RegB); + return RegClassA != RegClassB; + } + return !RegClassA->contains(RegB); +} + +/// lastRegisterUse - Returns the last use of the specific register between +/// cycles Start and End or NULL if there are no uses. +MachineOperand * +SimpleRegisterCoalescing::lastRegisterUse(unsigned Start, unsigned End, + unsigned Reg, unsigned &UseIdx) const{ + UseIdx = 0; + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + MachineOperand *LastUse = NULL; + for (MachineRegisterInfo::use_iterator I = mri_->use_begin(Reg), + E = mri_->use_end(); I != E; ++I) { + MachineOperand &Use = I.getOperand(); + MachineInstr *UseMI = Use.getParent(); + unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; + if (tii_->isMoveInstr(*UseMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) && + SrcReg == DstReg) + // Ignore identity copies. + continue; + unsigned Idx = li_->getInstructionIndex(UseMI); + if (Idx >= Start && Idx < End && Idx >= UseIdx) { + LastUse = &Use; + UseIdx = li_->getUseIndex(Idx); + } + } + return LastUse; + } + + int e = (End-1) / InstrSlots::NUM * InstrSlots::NUM; + int s = Start; + while (e >= s) { + // Skip deleted instructions + MachineInstr *MI = li_->getInstructionFromIndex(e); + while ((e - InstrSlots::NUM) >= s && !MI) { + e -= InstrSlots::NUM; + MI = li_->getInstructionFromIndex(e); + } + if (e < s || MI == NULL) + return NULL; + + // Ignore identity copies. + unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; + if (!(tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) && + SrcReg == DstReg)) + for (unsigned i = 0, NumOps = MI->getNumOperands(); i != NumOps; ++i) { + MachineOperand &Use = MI->getOperand(i); + if (Use.isReg() && Use.isUse() && Use.getReg() && + tri_->regsOverlap(Use.getReg(), Reg)) { + UseIdx = li_->getUseIndex(e); + return &Use; + } + } + + e -= InstrSlots::NUM; + } + + return NULL; +} + + +void SimpleRegisterCoalescing::printRegName(unsigned reg) const { + if (TargetRegisterInfo::isPhysicalRegister(reg)) + cerr << tri_->getName(reg); + else + cerr << "%reg" << reg; +} + +void SimpleRegisterCoalescing::releaseMemory() { + JoinedCopies.clear(); + ReMatCopies.clear(); + ReMatDefs.clear(); +} + +static bool isZeroLengthInterval(LiveInterval *li) { + for (LiveInterval::Ranges::const_iterator + i = li->ranges.begin(), e = li->ranges.end(); i != e; ++i) + if (i->end - i->start > LiveInterval::InstrSlots::NUM) + return false; + return true; +} + +/// TurnCopyIntoImpDef - If source of the specified copy is an implicit def, +/// turn the copy into an implicit def. +bool +SimpleRegisterCoalescing::TurnCopyIntoImpDef(MachineBasicBlock::iterator &I, + MachineBasicBlock *MBB, + unsigned DstReg, unsigned SrcReg) { + MachineInstr *CopyMI = &*I; + unsigned CopyIdx = li_->getDefIndex(li_->getInstructionIndex(CopyMI)); + if (!li_->hasInterval(SrcReg)) + return false; + LiveInterval &SrcInt = li_->getInterval(SrcReg); + if (!SrcInt.empty()) + return false; + if (!li_->hasInterval(DstReg)) + return false; + LiveInterval &DstInt = li_->getInterval(DstReg); + const LiveRange *DstLR = DstInt.getLiveRangeContaining(CopyIdx); + DstInt.removeValNo(DstLR->valno); + CopyMI->setDesc(tii_->get(TargetInstrInfo::IMPLICIT_DEF)); + for (int i = CopyMI->getNumOperands() - 1, e = 0; i > e; --i) + CopyMI->RemoveOperand(i); + bool NoUse = mri_->use_empty(SrcReg); + if (NoUse) { + for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(SrcReg), + E = mri_->reg_end(); I != E; ) { + assert(I.getOperand().isDef()); + MachineInstr *DefMI = &*I; + ++I; + // The implicit_def source has no other uses, delete it. + assert(DefMI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF); + li_->RemoveMachineInstrFromMaps(DefMI); + DefMI->eraseFromParent(); + } + } + ++I; + return true; +} + + +bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) { + mf_ = &fn; + mri_ = &fn.getRegInfo(); + tm_ = &fn.getTarget(); + tri_ = tm_->getRegisterInfo(); + tii_ = tm_->getInstrInfo(); + li_ = &getAnalysis(); + loopInfo = &getAnalysis(); + + DOUT << "********** SIMPLE REGISTER COALESCING **********\n" + << "********** Function: " + << ((Value*)mf_->getFunction())->getName() << '\n'; + + allocatableRegs_ = tri_->getAllocatableSet(fn); + for (TargetRegisterInfo::regclass_iterator I = tri_->regclass_begin(), + E = tri_->regclass_end(); I != E; ++I) + allocatableRCRegs_.insert(std::make_pair(*I, + tri_->getAllocatableSet(fn, *I))); + + // Join (coalesce) intervals if requested. + if (EnableJoining) { + joinIntervals(); + DEBUG({ + DOUT << "********** INTERVALS POST JOINING **********\n"; + for (LiveIntervals::iterator I = li_->begin(), E = li_->end(); I != E; ++I){ + I->second->print(DOUT, tri_); + DOUT << "\n"; + } + }); + } + + // Perform a final pass over the instructions and compute spill weights + // and remove identity moves. + SmallVector DeadDefs; + for (MachineFunction::iterator mbbi = mf_->begin(), mbbe = mf_->end(); + mbbi != mbbe; ++mbbi) { + MachineBasicBlock* mbb = mbbi; + unsigned loopDepth = loopInfo->getLoopDepth(mbb); + + for (MachineBasicBlock::iterator mii = mbb->begin(), mie = mbb->end(); + mii != mie; ) { + MachineInstr *MI = mii; + unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; + if (JoinedCopies.count(MI)) { + // Delete all coalesced copies. + if (!tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) { + assert((MI->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG || + MI->getOpcode() == TargetInstrInfo::INSERT_SUBREG || + MI->getOpcode() == TargetInstrInfo::SUBREG_TO_REG) && + "Unrecognized copy instruction"); + DstReg = MI->getOperand(0).getReg(); + } + if (MI->registerDefIsDead(DstReg)) { + LiveInterval &li = li_->getInterval(DstReg); + if (!ShortenDeadCopySrcLiveRange(li, MI)) + ShortenDeadCopyLiveRange(li, MI); + } + li_->RemoveMachineInstrFromMaps(MI); + mii = mbbi->erase(mii); + ++numPeep; + continue; + } + + // Now check if this is a remat'ed def instruction which is now dead. + if (ReMatDefs.count(MI)) { + bool isDead = true; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + if (TargetRegisterInfo::isVirtualRegister(Reg)) + DeadDefs.push_back(Reg); + if (MO.isDead()) + continue; + if (TargetRegisterInfo::isPhysicalRegister(Reg) || + !mri_->use_empty(Reg)) { + isDead = false; + break; + } + } + if (isDead) { + while (!DeadDefs.empty()) { + unsigned DeadDef = DeadDefs.back(); + DeadDefs.pop_back(); + RemoveDeadDef(li_->getInterval(DeadDef), MI); + } + li_->RemoveMachineInstrFromMaps(mii); + mii = mbbi->erase(mii); + continue; + } else + DeadDefs.clear(); + } + + // If the move will be an identity move delete it + bool isMove= tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx); + if (isMove && SrcReg == DstReg) { + if (li_->hasInterval(SrcReg)) { + LiveInterval &RegInt = li_->getInterval(SrcReg); + // If def of this move instruction is dead, remove its live range + // from the dstination register's live interval. + if (MI->registerDefIsDead(DstReg)) { + if (!ShortenDeadCopySrcLiveRange(RegInt, MI)) + ShortenDeadCopyLiveRange(RegInt, MI); + } + } + li_->RemoveMachineInstrFromMaps(MI); + mii = mbbi->erase(mii); + ++numPeep; + } else if (!isMove || !TurnCopyIntoImpDef(mii, mbb, DstReg, SrcReg)) { + SmallSet UniqueUses; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &mop = MI->getOperand(i); + if (mop.isReg() && mop.getReg() && + TargetRegisterInfo::isVirtualRegister(mop.getReg())) { + unsigned reg = mop.getReg(); + // Multiple uses of reg by the same instruction. It should not + // contribute to spill weight again. + if (UniqueUses.count(reg) != 0) + continue; + LiveInterval &RegInt = li_->getInterval(reg); + RegInt.weight += + li_->getSpillWeight(mop.isDef(), mop.isUse(), loopDepth); + UniqueUses.insert(reg); + } + } + ++mii; + } + } + } + + for (LiveIntervals::iterator I = li_->begin(), E = li_->end(); I != E; ++I) { + LiveInterval &LI = *I->second; + if (TargetRegisterInfo::isVirtualRegister(LI.reg)) { + // If the live interval length is essentially zero, i.e. in every live + // range the use follows def immediately, it doesn't make sense to spill + // it and hope it will be easier to allocate for this li. + if (isZeroLengthInterval(&LI)) + LI.weight = HUGE_VALF; + else { + bool isLoad = false; + SmallVector SpillIs; + if (li_->isReMaterializable(LI, SpillIs, isLoad)) { + // If all of the definitions of the interval are re-materializable, + // it is a preferred candidate for spilling. If non of the defs are + // loads, then it's potentially very cheap to re-materialize. + // FIXME: this gets much more complicated once we support non-trivial + // re-materialization. + if (isLoad) + LI.weight *= 0.9F; + else + LI.weight *= 0.5F; + } + } + + // Slightly prefer live interval that has been assigned a preferred reg. + if (LI.preference) + LI.weight *= 1.01F; + + // Divide the weight of the interval by its size. This encourages + // spilling of intervals that are large and have few uses, and + // discourages spilling of small intervals with many uses. + LI.weight /= li_->getApproximateInstructionCount(LI) * InstrSlots::NUM; + } + } + + DEBUG(dump()); + return true; +} + +/// print - Implement the dump method. +void SimpleRegisterCoalescing::print(std::ostream &O, const Module* m) const { + li_->print(O, m); +} + +RegisterCoalescer* llvm::createSimpleRegisterCoalescer() { + return new SimpleRegisterCoalescing(); +} + +// Make sure that anything that uses RegisterCoalescer pulls in this file... +DEFINING_FILE_FOR(SimpleRegisterCoalescing) diff --git a/lib/CodeGen/SimpleRegisterCoalescing.h b/lib/CodeGen/SimpleRegisterCoalescing.h new file mode 100644 index 000000000000..a495bfd644a5 --- /dev/null +++ b/lib/CodeGen/SimpleRegisterCoalescing.h @@ -0,0 +1,313 @@ +//===-- SimpleRegisterCoalescing.h - Register Coalescing --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a simple register copy coalescing phase. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_SIMPLE_REGISTER_COALESCING_H +#define LLVM_CODEGEN_SIMPLE_REGISTER_COALESCING_H + +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/RegisterCoalescer.h" +#include "llvm/ADT/BitVector.h" +#include + +namespace llvm { + class SimpleRegisterCoalescing; + class LiveVariables; + class TargetRegisterInfo; + class TargetInstrInfo; + class VirtRegMap; + class MachineLoopInfo; + + /// CopyRec - Representation for copy instructions in coalescer queue. + /// + struct CopyRec { + MachineInstr *MI; + unsigned LoopDepth; + bool isBackEdge; + CopyRec(MachineInstr *mi, unsigned depth, bool be) + : MI(mi), LoopDepth(depth), isBackEdge(be) {}; + }; + + template class JoinPriorityQueue; + + /// CopyRecSort - Sorting function for coalescer queue. + /// + struct CopyRecSort : public std::binary_function { + JoinPriorityQueue *JPQ; + explicit CopyRecSort(JoinPriorityQueue *jpq) : JPQ(jpq) {} + CopyRecSort(const CopyRecSort &RHS) : JPQ(RHS.JPQ) {} + bool operator()(CopyRec left, CopyRec right) const; + }; + + /// JoinQueue - A priority queue of copy instructions the coalescer is + /// going to process. + template + class JoinPriorityQueue { + SimpleRegisterCoalescing *Rc; + std::priority_queue, SF> Queue; + + public: + explicit JoinPriorityQueue(SimpleRegisterCoalescing *rc) + : Rc(rc), Queue(SF(this)) {} + + bool empty() const { return Queue.empty(); } + void push(CopyRec R) { Queue.push(R); } + CopyRec pop() { + if (empty()) return CopyRec(0, 0, false); + CopyRec R = Queue.top(); + Queue.pop(); + return R; + } + + // Callbacks to SimpleRegisterCoalescing. + unsigned getRepIntervalSize(unsigned Reg); + }; + + class SimpleRegisterCoalescing : public MachineFunctionPass, + public RegisterCoalescer { + MachineFunction* mf_; + MachineRegisterInfo* mri_; + const TargetMachine* tm_; + const TargetRegisterInfo* tri_; + const TargetInstrInfo* tii_; + LiveIntervals *li_; + const MachineLoopInfo* loopInfo; + + BitVector allocatableRegs_; + DenseMap allocatableRCRegs_; + + /// JoinQueue - A priority queue of copy instructions the coalescer is + /// going to process. + JoinPriorityQueue *JoinQueue; + + /// JoinedCopies - Keep track of copies eliminated due to coalescing. + /// + SmallPtrSet JoinedCopies; + + /// ReMatCopies - Keep track of copies eliminated due to remat. + /// + SmallPtrSet ReMatCopies; + + /// ReMatDefs - Keep track of definition instructions which have + /// been remat'ed. + SmallPtrSet ReMatDefs; + + public: + static char ID; // Pass identifcation, replacement for typeid + SimpleRegisterCoalescing() : MachineFunctionPass(&ID) {} + + struct InstrSlots { + enum { + LOAD = 0, + USE = 1, + DEF = 2, + STORE = 3, + NUM = 4 + }; + }; + + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + virtual void releaseMemory(); + + /// runOnMachineFunction - pass entry point + virtual bool runOnMachineFunction(MachineFunction&); + + bool coalesceFunction(MachineFunction &mf, RegallocQuery &) { + // This runs as an independent pass, so don't do anything. + return false; + }; + + /// getRepIntervalSize - Called from join priority queue sorting function. + /// It returns the size of the interval that represent the given register. + unsigned getRepIntervalSize(unsigned Reg) { + if (!li_->hasInterval(Reg)) + return 0; + return li_->getApproximateInstructionCount(li_->getInterval(Reg)) * + LiveInterval::InstrSlots::NUM; + } + + /// print - Implement the dump method. + virtual void print(std::ostream &O, const Module* = 0) const; + void print(std::ostream *O, const Module* M = 0) const { + if (O) print(*O, M); + } + + private: + /// joinIntervals - join compatible live intervals + void joinIntervals(); + + /// CopyCoalesceInMBB - Coalesce copies in the specified MBB, putting + /// copies that cannot yet be coalesced into the "TryAgain" list. + void CopyCoalesceInMBB(MachineBasicBlock *MBB, + std::vector &TryAgain); + + /// JoinCopy - Attempt to join intervals corresponding to SrcReg/DstReg, + /// which are the src/dst of the copy instruction CopyMI. This returns true + /// if the copy was successfully coalesced away. If it is not currently + /// possible to coalesce this interval, but it may be possible if other + /// things get coalesced, then it returns true by reference in 'Again'. + bool JoinCopy(CopyRec &TheCopy, bool &Again); + + /// JoinIntervals - Attempt to join these two intervals. On failure, this + /// returns false. Otherwise, if one of the intervals being joined is a + /// physreg, this method always canonicalizes DestInt to be it. The output + /// "SrcInt" will not have been modified, so we can use this information + /// below to update aliases. + bool JoinIntervals(LiveInterval &LHS, LiveInterval &RHS, bool &Swapped); + + /// SimpleJoin - Attempt to join the specified interval into this one. The + /// caller of this method must guarantee that the RHS only contains a single + /// value number and that the RHS is not defined by a copy from this + /// interval. This returns false if the intervals are not joinable, or it + /// joins them and returns true. + bool SimpleJoin(LiveInterval &LHS, LiveInterval &RHS); + + /// Return true if the two specified registers belong to different register + /// classes. The registers may be either phys or virt regs. + bool differingRegisterClasses(unsigned RegA, unsigned RegB) const; + + + /// AdjustCopiesBackFrom - We found a non-trivially-coalescable copy. If + /// the source value number is defined by a copy from the destination reg + /// see if we can merge these two destination reg valno# into a single + /// value number, eliminating a copy. + bool AdjustCopiesBackFrom(LiveInterval &IntA, LiveInterval &IntB, + MachineInstr *CopyMI); + + /// HasOtherReachingDefs - Return true if there are definitions of IntB + /// other than BValNo val# that can reach uses of AValno val# of IntA. + bool HasOtherReachingDefs(LiveInterval &IntA, LiveInterval &IntB, + VNInfo *AValNo, VNInfo *BValNo); + + /// RemoveCopyByCommutingDef - We found a non-trivially-coalescable copy. + /// If the source value number is defined by a commutable instruction and + /// its other operand is coalesced to the copy dest register, see if we + /// can transform the copy into a noop by commuting the definition. + bool RemoveCopyByCommutingDef(LiveInterval &IntA, LiveInterval &IntB, + MachineInstr *CopyMI); + + /// TrimLiveIntervalToLastUse - If there is a last use in the same basic + /// block as the copy instruction, trim the ive interval to the last use + /// and return true. + bool TrimLiveIntervalToLastUse(unsigned CopyIdx, + MachineBasicBlock *CopyMBB, + LiveInterval &li, const LiveRange *LR); + + /// ReMaterializeTrivialDef - If the source of a copy is defined by a trivial + /// computation, replace the copy by rematerialize the definition. + bool ReMaterializeTrivialDef(LiveInterval &SrcInt, unsigned DstReg, + MachineInstr *CopyMI); + + /// TurnCopyIntoImpDef - If source of the specified copy is an implicit def, + /// turn the copy into an implicit def. + bool TurnCopyIntoImpDef(MachineBasicBlock::iterator &I, + MachineBasicBlock *MBB, + unsigned DstReg, unsigned SrcReg); + + /// CanCoalesceWithImpDef - Returns true if the specified copy instruction + /// from an implicit def to another register can be coalesced away. + bool CanCoalesceWithImpDef(MachineInstr *CopyMI, + LiveInterval &li, LiveInterval &ImpLi) const; + + /// RemoveCopiesFromValNo - The specified value# is defined by an implicit + /// def and it is being removed. Turn all copies from this value# into + /// identity copies so they will be removed. + void RemoveCopiesFromValNo(LiveInterval &li, VNInfo *VNI); + + /// isWinToJoinVRWithSrcPhysReg - Return true if it's worth while to join a + /// a virtual destination register with physical source register. + bool isWinToJoinVRWithSrcPhysReg(MachineInstr *CopyMI, + MachineBasicBlock *CopyMBB, + LiveInterval &DstInt, LiveInterval &SrcInt); + + /// isWinToJoinVRWithDstPhysReg - Return true if it's worth while to join a + /// copy from a virtual source register to a physical destination register. + bool isWinToJoinVRWithDstPhysReg(MachineInstr *CopyMI, + MachineBasicBlock *CopyMBB, + LiveInterval &DstInt, LiveInterval &SrcInt); + + /// isWinToJoinCrossClass - Return true if it's profitable to coalesce + /// two virtual registers from different register classes. + bool isWinToJoinCrossClass(unsigned LargeReg, unsigned SmallReg, + unsigned Threshold); + + /// HasIncompatibleSubRegDefUse - If we are trying to coalesce a virtual + /// register with a physical register, check if any of the virtual register + /// operand is a sub-register use or def. If so, make sure it won't result + /// in an illegal extract_subreg or insert_subreg instruction. + bool HasIncompatibleSubRegDefUse(MachineInstr *CopyMI, + unsigned VirtReg, unsigned PhysReg); + + /// CanJoinExtractSubRegToPhysReg - Return true if it's possible to coalesce + /// an extract_subreg where dst is a physical register, e.g. + /// cl = EXTRACT_SUBREG reg1024, 1 + bool CanJoinExtractSubRegToPhysReg(unsigned DstReg, unsigned SrcReg, + unsigned SubIdx, unsigned &RealDstReg); + + /// CanJoinInsertSubRegToPhysReg - Return true if it's possible to coalesce + /// an insert_subreg where src is a physical register, e.g. + /// reg1024 = INSERT_SUBREG reg1024, c1, 0 + bool CanJoinInsertSubRegToPhysReg(unsigned DstReg, unsigned SrcReg, + unsigned SubIdx, unsigned &RealDstReg); + + /// RangeIsDefinedByCopyFromReg - Return true if the specified live range of + /// the specified live interval is defined by a copy from the specified + /// register. + bool RangeIsDefinedByCopyFromReg(LiveInterval &li, LiveRange *LR, + unsigned Reg); + + /// isBackEdgeCopy - Return true if CopyMI is a back edge copy. + /// + bool isBackEdgeCopy(MachineInstr *CopyMI, unsigned DstReg) const; + + /// UpdateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and + /// update the subregister number if it is not zero. If DstReg is a + /// physical register and the existing subregister number of the def / use + /// being updated is not zero, make sure to set it to the correct physical + /// subregister. + void UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg, unsigned SubIdx); + + /// RemoveDeadImpDef - Remove implicit_def instructions which are + /// "re-defining" registers due to insert_subreg coalescing. e.g. + void RemoveDeadImpDef(unsigned Reg, LiveInterval &LI); + + /// RemoveUnnecessaryKills - Remove kill markers that are no longer accurate + /// due to live range lengthening as the result of coalescing. + void RemoveUnnecessaryKills(unsigned Reg, LiveInterval &LI); + + /// ShortenDeadCopyLiveRange - Shorten a live range defined by a dead copy. + /// Return true if live interval is removed. + bool ShortenDeadCopyLiveRange(LiveInterval &li, MachineInstr *CopyMI); + + /// ShortenDeadCopyLiveRange - Shorten a live range as it's artificially + /// extended by a dead copy. Mark the last use (if any) of the val# as kill + /// as ends the live range there. If there isn't another use, then this + /// live range is dead. Return true if live interval is removed. + bool ShortenDeadCopySrcLiveRange(LiveInterval &li, MachineInstr *CopyMI); + + /// RemoveDeadDef - If a def of a live interval is now determined dead, + /// remove the val# it defines. If the live interval becomes empty, remove + /// it as well. + bool RemoveDeadDef(LiveInterval &li, MachineInstr *DefMI); + + /// lastRegisterUse - Returns the last use of the specific register between + /// cycles Start and End or NULL if there are no uses. + MachineOperand *lastRegisterUse(unsigned Start, unsigned End, unsigned Reg, + unsigned &LastUseIdx) const; + + void printRegName(unsigned reg) const; + }; + +} // End llvm namespace + +#endif diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp new file mode 100644 index 000000000000..ce63121251e3 --- /dev/null +++ b/lib/CodeGen/Spiller.cpp @@ -0,0 +1,229 @@ +//===-- llvm/CodeGen/Spiller.cpp - Spiller -------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "spiller" + +#include "Spiller.h" +#include "VirtRegMap.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveStackAnalysis.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +Spiller::~Spiller() {} + +namespace { + +/// Utility class for spillers. +class SpillerBase : public Spiller { +protected: + + MachineFunction *mf; + LiveIntervals *lis; + LiveStacks *ls; + MachineFrameInfo *mfi; + MachineRegisterInfo *mri; + const TargetInstrInfo *tii; + VirtRegMap *vrm; + + /// Construct a spiller base. + SpillerBase(MachineFunction *mf, LiveIntervals *lis, LiveStacks *ls, VirtRegMap *vrm) : + mf(mf), lis(lis), ls(ls), vrm(vrm) + { + mfi = mf->getFrameInfo(); + mri = &mf->getRegInfo(); + tii = mf->getTarget().getInstrInfo(); + } + + /// Insert a store of the given vreg to the given stack slot immediately + /// after the given instruction. Returns the base index of the inserted + /// instruction. The caller is responsible for adding an appropriate + /// LiveInterval to the LiveIntervals analysis. + unsigned insertStoreFor(MachineInstr *mi, unsigned ss, + unsigned newVReg, + const TargetRegisterClass *trc) { + MachineBasicBlock::iterator nextInstItr(mi); + ++nextInstItr; + + if (!lis->hasGapAfterInstr(lis->getInstructionIndex(mi))) { + lis->scaleNumbering(2); + ls->scaleNumbering(2); + } + + unsigned miIdx = lis->getInstructionIndex(mi); + + assert(lis->hasGapAfterInstr(miIdx)); + + tii->storeRegToStackSlot(*mi->getParent(), nextInstItr, newVReg, + true, ss, trc); + MachineBasicBlock::iterator storeInstItr(mi); + ++storeInstItr; + MachineInstr *storeInst = &*storeInstItr; + unsigned storeInstIdx = miIdx + LiveInterval::InstrSlots::NUM; + + assert(lis->getInstructionFromIndex(storeInstIdx) == 0 && + "Store inst index already in use."); + + lis->InsertMachineInstrInMaps(storeInst, storeInstIdx); + + return storeInstIdx; + } + + /// Insert a load of the given veg from the given stack slot immediately + /// before the given instruction. Returns the base index of the inserted + /// instruction. The caller is responsible for adding an appropriate + /// LiveInterval to the LiveIntervals analysis. + unsigned insertLoadFor(MachineInstr *mi, unsigned ss, + unsigned newVReg, + const TargetRegisterClass *trc) { + MachineBasicBlock::iterator useInstItr(mi); + + if (!lis->hasGapBeforeInstr(lis->getInstructionIndex(mi))) { + lis->scaleNumbering(2); + ls->scaleNumbering(2); + } + + unsigned miIdx = lis->getInstructionIndex(mi); + + assert(lis->hasGapBeforeInstr(miIdx)); + + tii->loadRegFromStackSlot(*mi->getParent(), useInstItr, newVReg, ss, trc); + MachineBasicBlock::iterator loadInstItr(mi); + --loadInstItr; + MachineInstr *loadInst = &*loadInstItr; + unsigned loadInstIdx = miIdx - LiveInterval::InstrSlots::NUM; + + assert(lis->getInstructionFromIndex(loadInstIdx) == 0 && + "Load inst index already in use."); + + lis->InsertMachineInstrInMaps(loadInst, loadInstIdx); + + return loadInstIdx; + } + + + /// Add spill ranges for every use/def of the live interval, inserting loads + /// immediately before each use, and stores after each def. No folding is + /// attempted. + std::vector trivialSpillEverywhere(LiveInterval *li) { + DOUT << "Spilling everywhere " << *li << "\n"; + + assert(li->weight != HUGE_VALF && + "Attempting to spill already spilled value."); + + assert(!li->isStackSlot() && + "Trying to spill a stack slot."); + + std::vector added; + + const TargetRegisterClass *trc = mri->getRegClass(li->reg); + unsigned ss = vrm->assignVirt2StackSlot(li->reg); + + for (MachineRegisterInfo::reg_iterator + regItr = mri->reg_begin(li->reg); regItr != mri->reg_end();) { + + MachineInstr *mi = &*regItr; + do { + ++regItr; + } while (regItr != mri->reg_end() && (&*regItr == mi)); + + SmallVector indices; + bool hasUse = false; + bool hasDef = false; + + for (unsigned i = 0; i != mi->getNumOperands(); ++i) { + MachineOperand &op = mi->getOperand(i); + + if (!op.isReg() || op.getReg() != li->reg) + continue; + + hasUse |= mi->getOperand(i).isUse(); + hasDef |= mi->getOperand(i).isDef(); + + indices.push_back(i); + } + + unsigned newVReg = mri->createVirtualRegister(trc); + vrm->grow(); + vrm->assignVirt2StackSlot(newVReg, ss); + + LiveInterval *newLI = &lis->getOrCreateInterval(newVReg); + newLI->weight = HUGE_VALF; + + for (unsigned i = 0; i < indices.size(); ++i) { + mi->getOperand(indices[i]).setReg(newVReg); + + if (mi->getOperand(indices[i]).isUse()) { + mi->getOperand(indices[i]).setIsKill(true); + } + } + + assert(hasUse || hasDef); + + if (hasUse) { + unsigned loadInstIdx = insertLoadFor(mi, ss, newVReg, trc); + unsigned start = lis->getDefIndex(loadInstIdx), + end = lis->getUseIndex(lis->getInstructionIndex(mi)); + + VNInfo *vni = + newLI->getNextValue(loadInstIdx, 0, lis->getVNInfoAllocator()); + vni->kills.push_back(lis->getInstructionIndex(mi)); + LiveRange lr(start, end, vni); + + newLI->addRange(lr); + } + + if (hasDef) { + unsigned storeInstIdx = insertStoreFor(mi, ss, newVReg, trc); + unsigned start = lis->getDefIndex(lis->getInstructionIndex(mi)), + end = lis->getUseIndex(storeInstIdx); + + VNInfo *vni = + newLI->getNextValue(storeInstIdx, 0, lis->getVNInfoAllocator()); + vni->kills.push_back(storeInstIdx); + LiveRange lr(start, end, vni); + + newLI->addRange(lr); + } + + added.push_back(newLI); + } + + + return added; + } + +}; + + +/// Spills any live range using the spill-everywhere method with no attempt at +/// folding. +class TrivialSpiller : public SpillerBase { +public: + TrivialSpiller(MachineFunction *mf, LiveIntervals *lis, LiveStacks *ls, VirtRegMap *vrm) : + SpillerBase(mf, lis, ls, vrm) {} + + std::vector spill(LiveInterval *li) { + return trivialSpillEverywhere(li); + } + +}; + +} + +llvm::Spiller* llvm::createSpiller(MachineFunction *mf, LiveIntervals *lis, + LiveStacks *ls, VirtRegMap *vrm) { + return new TrivialSpiller(mf, lis, ls, vrm); +} diff --git a/lib/CodeGen/Spiller.h b/lib/CodeGen/Spiller.h new file mode 100644 index 000000000000..cad054d744c4 --- /dev/null +++ b/lib/CodeGen/Spiller.h @@ -0,0 +1,37 @@ +//===-- llvm/CodeGen/Spiller.h - Spiller -*- C++ -*------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_SPILLER_H +#define LLVM_CODEGEN_SPILLER_H + +#include + +namespace llvm { + class LiveInterval; + class LiveIntervals; + class LiveStacks; + class MachineFunction; + class VirtRegMap; + + /// Spiller interface. + /// + /// Implementations are utility classes which insert spill or remat code on + /// demand. + class Spiller { + public: + virtual ~Spiller() = 0; + virtual std::vector spill(LiveInterval *li) = 0; + }; + + /// Create and return a spiller object, as specified on the command line. + Spiller* createSpiller(MachineFunction *mf, LiveIntervals *li, + LiveStacks *ls, VirtRegMap *vrm); +} + +#endif diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp new file mode 100644 index 000000000000..c179f1e3df97 --- /dev/null +++ b/lib/CodeGen/StackProtector.cpp @@ -0,0 +1,224 @@ +//===-- StackProtector.cpp - Stack Protector Insertion --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass inserts stack protectors into functions which need them. A variable +// with a random value in it is stored onto the stack before the local variables +// are allocated. Upon exiting the block, the stored value is checked. If it's +// changed, then there was some sort of violation and the program aborts. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "stack-protector" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Attributes.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/Instructions.h" +#include "llvm/Intrinsics.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLowering.h" +using namespace llvm; + +// SSPBufferSize - The lower bound for a buffer to be considered for stack +// smashing protection. +static cl::opt +SSPBufferSize("stack-protector-buffer-size", cl::init(8), + cl::desc("Lower bound for a buffer to be considered for " + "stack protection")); + +namespace { + class VISIBILITY_HIDDEN StackProtector : public FunctionPass { + /// TLI - Keep a pointer of a TargetLowering to consult for determining + /// target type sizes. + const TargetLowering *TLI; + + Function *F; + Module *M; + + /// InsertStackProtectors - Insert code into the prologue and epilogue of + /// the function. + /// + /// - The prologue code loads and stores the stack guard onto the stack. + /// - The epilogue checks the value stored in the prologue against the + /// original value. It calls __stack_chk_fail if they differ. + bool InsertStackProtectors(); + + /// CreateFailBB - Create a basic block to jump to when the stack protector + /// check fails. + BasicBlock *CreateFailBB(); + + /// RequiresStackProtector - Check whether or not this function needs a + /// stack protector based upon the stack protector level. + bool RequiresStackProtector() const; + public: + static char ID; // Pass identification, replacement for typeid. + StackProtector() : FunctionPass(&ID), TLI(0) {} + StackProtector(const TargetLowering *tli) + : FunctionPass(&ID), TLI(tli) {} + + virtual bool runOnFunction(Function &Fn); + }; +} // end anonymous namespace + +char StackProtector::ID = 0; +static RegisterPass +X("stack-protector", "Insert stack protectors"); + +FunctionPass *llvm::createStackProtectorPass(const TargetLowering *tli) { + return new StackProtector(tli); +} + +bool StackProtector::runOnFunction(Function &Fn) { + F = &Fn; + M = F->getParent(); + + if (!RequiresStackProtector()) return false; + + return InsertStackProtectors(); +} + +/// RequiresStackProtector - Check whether or not this function needs a stack +/// protector based upon the stack protector level. The heuristic we use is to +/// add a guard variable to functions that call alloca, and functions with +/// buffers larger than SSPBufferSize bytes. +bool StackProtector::RequiresStackProtector() const { + if (F->hasFnAttr(Attribute::StackProtectReq)) + return true; + + if (!F->hasFnAttr(Attribute::StackProtect)) + return false; + + const TargetData *TD = TLI->getTargetData(); + + for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) { + BasicBlock *BB = I; + + for (BasicBlock::iterator + II = BB->begin(), IE = BB->end(); II != IE; ++II) + if (AllocaInst *AI = dyn_cast(II)) { + if (AI->isArrayAllocation()) + // This is a call to alloca with a variable size. Emit stack + // protectors. + return true; + + if (const ArrayType *AT = dyn_cast(AI->getAllocatedType())) + // If an array has more than SSPBufferSize bytes of allocated space, + // then we emit stack protectors. + if (SSPBufferSize <= TD->getTypeAllocSize(AT)) + return true; + } + } + + return false; +} + +/// InsertStackProtectors - Insert code into the prologue and epilogue of the +/// function. +/// +/// - The prologue code loads and stores the stack guard onto the stack. +/// - The epilogue checks the value stored in the prologue against the original +/// value. It calls __stack_chk_fail if they differ. +bool StackProtector::InsertStackProtectors() { + BasicBlock *FailBB = 0; // The basic block to jump to if check fails. + AllocaInst *AI = 0; // Place on stack that stores the stack guard. + Constant *StackGuardVar = 0; // The stack guard variable. + + for (Function::iterator I = F->begin(), E = F->end(); I != E; ) { + BasicBlock *BB = I++; + + ReturnInst *RI = dyn_cast(BB->getTerminator()); + if (!RI) continue; + + if (!FailBB) { + // Insert code into the entry block that stores the __stack_chk_guard + // variable onto the stack: + // + // entry: + // StackGuardSlot = alloca i8* + // StackGuard = load __stack_chk_guard + // call void @llvm.stackprotect.create(StackGuard, StackGuardSlot) + // + PointerType *PtrTy = PointerType::getUnqual(Type::Int8Ty); + StackGuardVar = M->getOrInsertGlobal("__stack_chk_guard", PtrTy); + + BasicBlock &Entry = F->getEntryBlock(); + Instruction *InsPt = &Entry.front(); + + AI = new AllocaInst(PtrTy, "StackGuardSlot", InsPt); + LoadInst *LI = new LoadInst(StackGuardVar, "StackGuard", false, InsPt); + + Value *Args[] = { LI, AI }; + CallInst:: + Create(Intrinsic::getDeclaration(M, Intrinsic::stackprotector), + &Args[0], array_endof(Args), "", InsPt); + + // Create the basic block to jump to when the guard check fails. + FailBB = CreateFailBB(); + } + + // For each block with a return instruction, convert this: + // + // return: + // ... + // ret ... + // + // into this: + // + // return: + // ... + // %1 = load __stack_chk_guard + // %2 = load StackGuardSlot + // %3 = cmp i1 %1, %2 + // br i1 %3, label %SP_return, label %CallStackCheckFailBlk + // + // SP_return: + // ret ... + // + // CallStackCheckFailBlk: + // call void @__stack_chk_fail() + // unreachable + + // Split the basic block before the return instruction. + BasicBlock *NewBB = BB->splitBasicBlock(RI, "SP_return"); + + // Remove default branch instruction to the new BB. + BB->getTerminator()->eraseFromParent(); + + // Move the newly created basic block to the point right after the old basic + // block so that it's in the "fall through" position. + NewBB->moveAfter(BB); + + // Generate the stack protector instructions in the old basic block. + LoadInst *LI1 = new LoadInst(StackGuardVar, "", false, BB); + LoadInst *LI2 = new LoadInst(AI, "", true, BB); + ICmpInst *Cmp = new ICmpInst(CmpInst::ICMP_EQ, LI1, LI2, "", BB); + BranchInst::Create(NewBB, FailBB, Cmp, BB); + } + + // Return if we didn't modify any basic blocks. I.e., there are no return + // statements in the function. + if (!FailBB) return false; + + return true; +} + +/// CreateFailBB - Create a basic block to jump to when the stack protector +/// check fails. +BasicBlock *StackProtector::CreateFailBB() { + BasicBlock *FailBB = BasicBlock::Create("CallStackCheckFailBlk", F); + Constant *StackChkFail = + M->getOrInsertFunction("__stack_chk_fail", Type::VoidTy, NULL); + CallInst::Create(StackChkFail, "", FailBB); + new UnreachableInst(FailBB); + return FailBB; +} diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp new file mode 100644 index 000000000000..582464478cfc --- /dev/null +++ b/lib/CodeGen/StackSlotColoring.cpp @@ -0,0 +1,733 @@ +//===-- StackSlotColoring.cpp - Stack slot coloring pass. -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the stack slot coloring pass. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "stackcoloring" +#include "VirtRegMap.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveStackAnalysis.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include +using namespace llvm; + +static cl::opt +DisableSharing("no-stack-slot-sharing", + cl::init(false), cl::Hidden, + cl::desc("Suppress slot sharing during stack coloring")); + +static cl::opt +ColorWithRegsOpt("color-ss-with-regs", + cl::init(false), cl::Hidden, + cl::desc("Color stack slots with free registers")); + + +static cl::opt DCELimit("ssc-dce-limit", cl::init(-1), cl::Hidden); + +STATISTIC(NumEliminated, "Number of stack slots eliminated due to coloring"); +STATISTIC(NumRegRepl, "Number of stack slot refs replaced with reg refs"); +STATISTIC(NumLoadElim, "Number of loads eliminated"); +STATISTIC(NumStoreElim, "Number of stores eliminated"); +STATISTIC(NumDead, "Number of trivially dead stack accesses eliminated"); + +namespace { + class VISIBILITY_HIDDEN StackSlotColoring : public MachineFunctionPass { + bool ColorWithRegs; + LiveStacks* LS; + VirtRegMap* VRM; + MachineFrameInfo *MFI; + MachineRegisterInfo *MRI; + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + const MachineLoopInfo *loopInfo; + + // SSIntervals - Spill slot intervals. + std::vector SSIntervals; + + // SSRefs - Keep a list of frame index references for each spill slot. + SmallVector, 16> SSRefs; + + // OrigAlignments - Alignments of stack objects before coloring. + SmallVector OrigAlignments; + + // OrigSizes - Sizess of stack objects before coloring. + SmallVector OrigSizes; + + // AllColors - If index is set, it's a spill slot, i.e. color. + // FIXME: This assumes PEI locate spill slot with smaller indices + // closest to stack pointer / frame pointer. Therefore, smaller + // index == better color. + BitVector AllColors; + + // NextColor - Next "color" that's not yet used. + int NextColor; + + // UsedColors - "Colors" that have been assigned. + BitVector UsedColors; + + // Assignments - Color to intervals mapping. + SmallVector, 16> Assignments; + + public: + static char ID; // Pass identification + StackSlotColoring() : + MachineFunctionPass(&ID), ColorWithRegs(false), NextColor(-1) {} + StackSlotColoring(bool RegColor) : + MachineFunctionPass(&ID), ColorWithRegs(RegColor), NextColor(-1) {} + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + AU.addRequired(); + AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); + AU.addPreservedID(MachineDominatorsID); + MachineFunctionPass::getAnalysisUsage(AU); + } + + virtual bool runOnMachineFunction(MachineFunction &MF); + virtual const char* getPassName() const { + return "Stack Slot Coloring"; + } + + private: + void InitializeSlots(); + void ScanForSpillSlotRefs(MachineFunction &MF); + bool OverlapWithAssignments(LiveInterval *li, int Color) const; + int ColorSlot(LiveInterval *li); + bool ColorSlots(MachineFunction &MF); + bool ColorSlotsWithFreeRegs(SmallVector &SlotMapping, + SmallVector, 16> &RevMap, + BitVector &SlotIsReg); + void RewriteInstruction(MachineInstr *MI, int OldFI, int NewFI, + MachineFunction &MF); + bool PropagateBackward(MachineBasicBlock::iterator MII, + MachineBasicBlock *MBB, + unsigned OldReg, unsigned NewReg); + bool PropagateForward(MachineBasicBlock::iterator MII, + MachineBasicBlock *MBB, + unsigned OldReg, unsigned NewReg); + void UnfoldAndRewriteInstruction(MachineInstr *MI, int OldFI, + unsigned Reg, const TargetRegisterClass *RC, + SmallSet &Defs, + MachineFunction &MF); + bool AllMemRefsCanBeUnfolded(int SS); + bool RemoveDeadStores(MachineBasicBlock* MBB); + }; +} // end anonymous namespace + +char StackSlotColoring::ID = 0; + +static RegisterPass +X("stack-slot-coloring", "Stack Slot Coloring"); + +FunctionPass *llvm::createStackSlotColoringPass(bool RegColor) { + return new StackSlotColoring(RegColor); +} + +namespace { + // IntervalSorter - Comparison predicate that sort live intervals by + // their weight. + struct IntervalSorter { + bool operator()(LiveInterval* LHS, LiveInterval* RHS) const { + return LHS->weight > RHS->weight; + } + }; +} + +/// ScanForSpillSlotRefs - Scan all the machine instructions for spill slot +/// references and update spill slot weights. +void StackSlotColoring::ScanForSpillSlotRefs(MachineFunction &MF) { + SSRefs.resize(MFI->getObjectIndexEnd()); + + // FIXME: Need the equivalent of MachineRegisterInfo for frameindex operands. + for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end(); + MBBI != E; ++MBBI) { + MachineBasicBlock *MBB = &*MBBI; + unsigned loopDepth = loopInfo->getLoopDepth(MBB); + for (MachineBasicBlock::iterator MII = MBB->begin(), EE = MBB->end(); + MII != EE; ++MII) { + MachineInstr *MI = &*MII; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isFI()) + continue; + int FI = MO.getIndex(); + if (FI < 0) + continue; + if (!LS->hasInterval(FI)) + continue; + LiveInterval &li = LS->getInterval(FI); + li.weight += LiveIntervals::getSpillWeight(false, true, loopDepth); + SSRefs[FI].push_back(MI); + } + } + } +} + +/// InitializeSlots - Process all spill stack slot liveintervals and add them +/// to a sorted (by weight) list. +void StackSlotColoring::InitializeSlots() { + int LastFI = MFI->getObjectIndexEnd(); + OrigAlignments.resize(LastFI); + OrigSizes.resize(LastFI); + AllColors.resize(LastFI); + UsedColors.resize(LastFI); + Assignments.resize(LastFI); + + // Gather all spill slots into a list. + DOUT << "Spill slot intervals:\n"; + for (LiveStacks::iterator i = LS->begin(), e = LS->end(); i != e; ++i) { + LiveInterval &li = i->second; + DEBUG(li.dump()); + int FI = li.getStackSlotIndex(); + if (MFI->isDeadObjectIndex(FI)) + continue; + SSIntervals.push_back(&li); + OrigAlignments[FI] = MFI->getObjectAlignment(FI); + OrigSizes[FI] = MFI->getObjectSize(FI); + AllColors.set(FI); + } + DOUT << '\n'; + + // Sort them by weight. + std::stable_sort(SSIntervals.begin(), SSIntervals.end(), IntervalSorter()); + + // Get first "color". + NextColor = AllColors.find_first(); +} + +/// OverlapWithAssignments - Return true if LiveInterval overlaps with any +/// LiveIntervals that have already been assigned to the specified color. +bool +StackSlotColoring::OverlapWithAssignments(LiveInterval *li, int Color) const { + const SmallVector &OtherLIs = Assignments[Color]; + for (unsigned i = 0, e = OtherLIs.size(); i != e; ++i) { + LiveInterval *OtherLI = OtherLIs[i]; + if (OtherLI->overlaps(*li)) + return true; + } + return false; +} + +/// ColorSlotsWithFreeRegs - If there are any free registers available, try +/// replacing spill slots references with registers instead. +bool +StackSlotColoring::ColorSlotsWithFreeRegs(SmallVector &SlotMapping, + SmallVector, 16> &RevMap, + BitVector &SlotIsReg) { + if (!(ColorWithRegs || ColorWithRegsOpt) || !VRM->HasUnusedRegisters()) + return false; + + bool Changed = false; + DOUT << "Assigning unused registers to spill slots:\n"; + for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) { + LiveInterval *li = SSIntervals[i]; + int SS = li->getStackSlotIndex(); + if (!UsedColors[SS] || li->weight < 20) + // If the weight is < 20, i.e. two references in a loop with depth 1, + // don't bother with it. + continue; + + // These slots allow to share the same registers. + bool AllColored = true; + SmallVector ColoredRegs; + for (unsigned j = 0, ee = RevMap[SS].size(); j != ee; ++j) { + int RSS = RevMap[SS][j]; + const TargetRegisterClass *RC = LS->getIntervalRegClass(RSS); + // If it's not colored to another stack slot, try coloring it + // to a "free" register. + if (!RC) { + AllColored = false; + continue; + } + unsigned Reg = VRM->getFirstUnusedRegister(RC); + if (!Reg) { + AllColored = false; + continue; + } + if (!AllMemRefsCanBeUnfolded(RSS)) { + AllColored = false; + continue; + } else { + DOUT << "Assigning fi#" << RSS << " to " << TRI->getName(Reg) << '\n'; + ColoredRegs.push_back(Reg); + SlotMapping[RSS] = Reg; + SlotIsReg.set(RSS); + Changed = true; + } + } + + // Register and its sub-registers are no longer free. + while (!ColoredRegs.empty()) { + unsigned Reg = ColoredRegs.back(); + ColoredRegs.pop_back(); + VRM->setRegisterUsed(Reg); + // If reg is a callee-saved register, it will have to be spilled in + // the prologue. + MRI->setPhysRegUsed(Reg); + for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) { + VRM->setRegisterUsed(*AS); + MRI->setPhysRegUsed(*AS); + } + } + // This spill slot is dead after the rewrites + if (AllColored) { + MFI->RemoveStackObject(SS); + ++NumEliminated; + } + } + DOUT << '\n'; + + return Changed; +} + +/// ColorSlot - Assign a "color" (stack slot) to the specified stack slot. +/// +int StackSlotColoring::ColorSlot(LiveInterval *li) { + int Color = -1; + bool Share = false; + if (!DisableSharing) { + // Check if it's possible to reuse any of the used colors. + Color = UsedColors.find_first(); + while (Color != -1) { + if (!OverlapWithAssignments(li, Color)) { + Share = true; + ++NumEliminated; + break; + } + Color = UsedColors.find_next(Color); + } + } + + // Assign it to the first available color (assumed to be the best) if it's + // not possible to share a used color with other objects. + if (!Share) { + assert(NextColor != -1 && "No more spill slots?"); + Color = NextColor; + UsedColors.set(Color); + NextColor = AllColors.find_next(NextColor); + } + + // Record the assignment. + Assignments[Color].push_back(li); + int FI = li->getStackSlotIndex(); + DOUT << "Assigning fi#" << FI << " to fi#" << Color << "\n"; + + // Change size and alignment of the allocated slot. If there are multiple + // objects sharing the same slot, then make sure the size and alignment + // are large enough for all. + unsigned Align = OrigAlignments[FI]; + if (!Share || Align > MFI->getObjectAlignment(Color)) + MFI->setObjectAlignment(Color, Align); + int64_t Size = OrigSizes[FI]; + if (!Share || Size > MFI->getObjectSize(Color)) + MFI->setObjectSize(Color, Size); + return Color; +} + +/// Colorslots - Color all spill stack slots and rewrite all frameindex machine +/// operands in the function. +bool StackSlotColoring::ColorSlots(MachineFunction &MF) { + unsigned NumObjs = MFI->getObjectIndexEnd(); + SmallVector SlotMapping(NumObjs, -1); + SmallVector SlotWeights(NumObjs, 0.0); + SmallVector, 16> RevMap(NumObjs); + BitVector SlotIsReg(NumObjs); + BitVector UsedColors(NumObjs); + + DOUT << "Color spill slot intervals:\n"; + bool Changed = false; + for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) { + LiveInterval *li = SSIntervals[i]; + int SS = li->getStackSlotIndex(); + int NewSS = ColorSlot(li); + assert(NewSS >= 0 && "Stack coloring failed?"); + SlotMapping[SS] = NewSS; + RevMap[NewSS].push_back(SS); + SlotWeights[NewSS] += li->weight; + UsedColors.set(NewSS); + Changed |= (SS != NewSS); + } + + DOUT << "\nSpill slots after coloring:\n"; + for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) { + LiveInterval *li = SSIntervals[i]; + int SS = li->getStackSlotIndex(); + li->weight = SlotWeights[SS]; + } + // Sort them by new weight. + std::stable_sort(SSIntervals.begin(), SSIntervals.end(), IntervalSorter()); + +#ifndef NDEBUG + for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) + DEBUG(SSIntervals[i]->dump()); + DOUT << '\n'; +#endif + + // Can we "color" a stack slot with a unused register? + Changed |= ColorSlotsWithFreeRegs(SlotMapping, RevMap, SlotIsReg); + + if (!Changed) + return false; + + // Rewrite all MO_FrameIndex operands. + SmallVector, 4> NewDefs(MF.getNumBlockIDs()); + for (unsigned SS = 0, SE = SSRefs.size(); SS != SE; ++SS) { + bool isReg = SlotIsReg[SS]; + int NewFI = SlotMapping[SS]; + if (NewFI == -1 || (NewFI == (int)SS && !isReg)) + continue; + + const TargetRegisterClass *RC = LS->getIntervalRegClass(SS); + SmallVector &RefMIs = SSRefs[SS]; + for (unsigned i = 0, e = RefMIs.size(); i != e; ++i) + if (!isReg) + RewriteInstruction(RefMIs[i], SS, NewFI, MF); + else { + // Rewrite to use a register instead. + unsigned MBBId = RefMIs[i]->getParent()->getNumber(); + SmallSet &Defs = NewDefs[MBBId]; + UnfoldAndRewriteInstruction(RefMIs[i], SS, NewFI, RC, Defs, MF); + } + } + + // Delete unused stack slots. + while (NextColor != -1) { + DOUT << "Removing unused stack object fi#" << NextColor << "\n"; + MFI->RemoveStackObject(NextColor); + NextColor = AllColors.find_next(NextColor); + } + + return true; +} + +/// AllMemRefsCanBeUnfolded - Return true if all references of the specified +/// spill slot index can be unfolded. +bool StackSlotColoring::AllMemRefsCanBeUnfolded(int SS) { + SmallVector &RefMIs = SSRefs[SS]; + for (unsigned i = 0, e = RefMIs.size(); i != e; ++i) { + MachineInstr *MI = RefMIs[i]; + if (TII->isLoadFromStackSlot(MI, SS) || + TII->isStoreToStackSlot(MI, SS)) + // Restore and spill will become copies. + return true; + if (!TII->getOpcodeAfterMemoryUnfold(MI->getOpcode(), false, false)) + return false; + for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) { + MachineOperand &MO = MI->getOperand(j); + if (MO.isFI() && MO.getIndex() != SS) + // If it uses another frameindex, we can, currently* unfold it. + return false; + } + } + return true; +} + +/// RewriteInstruction - Rewrite specified instruction by replacing references +/// to old frame index with new one. +void StackSlotColoring::RewriteInstruction(MachineInstr *MI, int OldFI, + int NewFI, MachineFunction &MF) { + for (unsigned i = 0, ee = MI->getNumOperands(); i != ee; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isFI()) + continue; + int FI = MO.getIndex(); + if (FI != OldFI) + continue; + MO.setIndex(NewFI); + } + + // Update the MachineMemOperand for the new memory location. + // FIXME: We need a better method of managing these too. + SmallVector MMOs(MI->memoperands_begin(), + MI->memoperands_end()); + MI->clearMemOperands(MF); + const Value *OldSV = PseudoSourceValue::getFixedStack(OldFI); + for (unsigned i = 0, ee = MMOs.size(); i != ee; ++i) { + if (MMOs[i].getValue() != OldSV) + MI->addMemOperand(MF, MMOs[i]); + else { + MachineMemOperand MMO(PseudoSourceValue::getFixedStack(NewFI), + MMOs[i].getFlags(), MMOs[i].getOffset(), + MMOs[i].getSize(), MMOs[i].getAlignment()); + MI->addMemOperand(MF, MMO); + } + } +} + +/// PropagateBackward - Traverse backward and look for the definition of +/// OldReg. If it can successfully update all of the references with NewReg, +/// do so and return true. +bool StackSlotColoring::PropagateBackward(MachineBasicBlock::iterator MII, + MachineBasicBlock *MBB, + unsigned OldReg, unsigned NewReg) { + if (MII == MBB->begin()) + return false; + + SmallVector Uses; + SmallVector Refs; + while (--MII != MBB->begin()) { + bool FoundDef = false; // Not counting 2address def. + + Uses.clear(); + const TargetInstrDesc &TID = MII->getDesc(); + for (unsigned i = 0, e = MII->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MII->getOperand(i); + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) + continue; + if (Reg == OldReg) { + if (MO.isImplicit()) + return false; + const TargetRegisterClass *RC = getInstrOperandRegClass(TRI, TID, i); + if (RC && !RC->contains(NewReg)) + return false; + + if (MO.isUse()) { + Uses.push_back(&MO); + } else { + Refs.push_back(&MO); + if (!MII->isRegTiedToUseOperand(i)) + FoundDef = true; + } + } else if (TRI->regsOverlap(Reg, NewReg)) { + return false; + } else if (TRI->regsOverlap(Reg, OldReg)) { + if (!MO.isUse() || !MO.isKill()) + return false; + } + } + + if (FoundDef) { + // Found non-two-address def. Stop here. + for (unsigned i = 0, e = Refs.size(); i != e; ++i) + Refs[i]->setReg(NewReg); + return true; + } + + // Two-address uses must be updated as well. + for (unsigned i = 0, e = Uses.size(); i != e; ++i) + Refs.push_back(Uses[i]); + } + return false; +} + +/// PropagateForward - Traverse forward and look for the kill of OldReg. If +/// it can successfully update all of the uses with NewReg, do so and +/// return true. +bool StackSlotColoring::PropagateForward(MachineBasicBlock::iterator MII, + MachineBasicBlock *MBB, + unsigned OldReg, unsigned NewReg) { + if (MII == MBB->end()) + return false; + + SmallVector Uses; + while (++MII != MBB->end()) { + bool FoundUse = false; + bool FoundKill = false; + const TargetInstrDesc &TID = MII->getDesc(); + for (unsigned i = 0, e = MII->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MII->getOperand(i); + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) + continue; + if (Reg == OldReg) { + if (MO.isDef() || MO.isImplicit()) + return false; + + const TargetRegisterClass *RC = getInstrOperandRegClass(TRI, TID, i); + if (RC && !RC->contains(NewReg)) + return false; + FoundUse = true; + if (MO.isKill()) + FoundKill = true; + Uses.push_back(&MO); + } else if (TRI->regsOverlap(Reg, NewReg) || + TRI->regsOverlap(Reg, OldReg)) + return false; + } + if (FoundKill) { + for (unsigned i = 0, e = Uses.size(); i != e; ++i) + Uses[i]->setReg(NewReg); + return true; + } + } + return false; +} + +/// UnfoldAndRewriteInstruction - Rewrite specified instruction by unfolding +/// folded memory references and replacing those references with register +/// references instead. +void +StackSlotColoring::UnfoldAndRewriteInstruction(MachineInstr *MI, int OldFI, + unsigned Reg, + const TargetRegisterClass *RC, + SmallSet &Defs, + MachineFunction &MF) { + MachineBasicBlock *MBB = MI->getParent(); + if (unsigned DstReg = TII->isLoadFromStackSlot(MI, OldFI)) { + if (PropagateForward(MI, MBB, DstReg, Reg)) { + DOUT << "Eliminated load: "; + DEBUG(MI->dump()); + ++NumLoadElim; + } else { + TII->copyRegToReg(*MBB, MI, DstReg, Reg, RC, RC); + ++NumRegRepl; + } + + if (!Defs.count(Reg)) { + // If this is the first use of Reg in this MBB and it wasn't previously + // defined in MBB, add it to livein. + MBB->addLiveIn(Reg); + Defs.insert(Reg); + } + } else if (unsigned SrcReg = TII->isStoreToStackSlot(MI, OldFI)) { + if (MI->killsRegister(SrcReg) && PropagateBackward(MI, MBB, SrcReg, Reg)) { + DOUT << "Eliminated store: "; + DEBUG(MI->dump()); + ++NumStoreElim; + } else { + TII->copyRegToReg(*MBB, MI, Reg, SrcReg, RC, RC); + ++NumRegRepl; + } + + // Remember reg has been defined in MBB. + Defs.insert(Reg); + } else { + SmallVector NewMIs; + bool Success = TII->unfoldMemoryOperand(MF, MI, Reg, false, false, NewMIs); + Success = Success; // Silence compiler warning. + assert(Success && "Failed to unfold!"); + MachineInstr *NewMI = NewMIs[0]; + MBB->insert(MI, NewMI); + ++NumRegRepl; + + if (NewMI->readsRegister(Reg)) { + if (!Defs.count(Reg)) + // If this is the first use of Reg in this MBB and it wasn't previously + // defined in MBB, add it to livein. + MBB->addLiveIn(Reg); + Defs.insert(Reg); + } + } + MBB->erase(MI); +} + +/// RemoveDeadStores - Scan through a basic block and look for loads followed +/// by stores. If they're both using the same stack slot, then the store is +/// definitely dead. This could obviously be much more aggressive (consider +/// pairs with instructions between them), but such extensions might have a +/// considerable compile time impact. +bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) { + // FIXME: This could be much more aggressive, but we need to investigate + // the compile time impact of doing so. + bool changed = false; + + SmallVector toErase; + + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); + I != E; ++I) { + if (DCELimit != -1 && (int)NumDead >= DCELimit) + break; + + MachineBasicBlock::iterator NextMI = next(I); + if (NextMI == MBB->end()) continue; + + int FirstSS, SecondSS; + unsigned LoadReg = 0; + unsigned StoreReg = 0; + if (!(LoadReg = TII->isLoadFromStackSlot(I, FirstSS))) continue; + if (!(StoreReg = TII->isStoreToStackSlot(NextMI, SecondSS))) continue; + if (FirstSS != SecondSS || LoadReg != StoreReg || FirstSS == -1) continue; + + ++NumDead; + changed = true; + + if (NextMI->findRegisterUseOperandIdx(LoadReg, true, 0) != -1) { + ++NumDead; + toErase.push_back(I); + } + + toErase.push_back(NextMI); + ++I; + } + + for (SmallVector::iterator I = toErase.begin(), + E = toErase.end(); I != E; ++I) + (*I)->eraseFromParent(); + + return changed; +} + + +bool StackSlotColoring::runOnMachineFunction(MachineFunction &MF) { + DOUT << "********** Stack Slot Coloring **********\n"; + + MFI = MF.getFrameInfo(); + MRI = &MF.getRegInfo(); + TII = MF.getTarget().getInstrInfo(); + TRI = MF.getTarget().getRegisterInfo(); + LS = &getAnalysis(); + VRM = &getAnalysis(); + loopInfo = &getAnalysis(); + + bool Changed = false; + + unsigned NumSlots = LS->getNumIntervals(); + if (NumSlots < 2) { + if (NumSlots == 0 || !VRM->HasUnusedRegisters()) + // Nothing to do! + return false; + } + + // Gather spill slot references + ScanForSpillSlotRefs(MF); + InitializeSlots(); + Changed = ColorSlots(MF); + + NextColor = -1; + SSIntervals.clear(); + for (unsigned i = 0, e = SSRefs.size(); i != e; ++i) + SSRefs[i].clear(); + SSRefs.clear(); + OrigAlignments.clear(); + OrigSizes.clear(); + AllColors.clear(); + UsedColors.clear(); + for (unsigned i = 0, e = Assignments.size(); i != e; ++i) + Assignments[i].clear(); + Assignments.clear(); + + if (Changed) { + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) + Changed |= RemoveDeadStores(I); + } + + return Changed; +} diff --git a/lib/CodeGen/StrongPHIElimination.cpp b/lib/CodeGen/StrongPHIElimination.cpp new file mode 100644 index 000000000000..a2c12554f377 --- /dev/null +++ b/lib/CodeGen/StrongPHIElimination.cpp @@ -0,0 +1,1053 @@ +//===- StrongPhiElimination.cpp - Eliminate PHI nodes by inserting copies -===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass eliminates machine instruction PHI nodes by inserting copy +// instructions, using an intelligent copy-folding technique based on +// dominator information. This is technique is derived from: +// +// Budimlic, et al. Fast copy coalescing and live-range identification. +// In Proceedings of the ACM SIGPLAN 2002 Conference on Programming Language +// Design and Implementation (Berlin, Germany, June 17 - 19, 2002). +// PLDI '02. ACM, New York, NY, 25-32. +// DOI= http://doi.acm.org/10.1145/512529.512534 +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "strongphielim" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterCoalescer.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +using namespace llvm; + +namespace { + struct VISIBILITY_HIDDEN StrongPHIElimination : public MachineFunctionPass { + static char ID; // Pass identification, replacement for typeid + StrongPHIElimination() : MachineFunctionPass(&ID) {} + + // Waiting stores, for each MBB, the set of copies that need to + // be inserted into that MBB + DenseMap > Waiting; + + // Stacks holds the renaming stack for each register + std::map > Stacks; + + // Registers in UsedByAnother are PHI nodes that are themselves + // used as operands to another another PHI node + std::set UsedByAnother; + + // RenameSets are the is a map from a PHI-defined register + // to the input registers to be coalesced along with the + // predecessor block for those input registers. + std::map > RenameSets; + + // PhiValueNumber holds the ID numbers of the VNs for each phi that we're + // eliminating, indexed by the register defined by that phi. + std::map PhiValueNumber; + + // Store the DFS-in number of each block + DenseMap preorder; + + // Store the DFS-out number of each block + DenseMap maxpreorder; + + bool runOnMachineFunction(MachineFunction &Fn); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + AU.addRequired(); + + // TODO: Actually make this true. + AU.addPreserved(); + AU.addPreserved(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + virtual void releaseMemory() { + preorder.clear(); + maxpreorder.clear(); + + Waiting.clear(); + Stacks.clear(); + UsedByAnother.clear(); + RenameSets.clear(); + } + + private: + + /// DomForestNode - Represents a node in the "dominator forest". This is + /// a forest in which the nodes represent registers and the edges + /// represent a dominance relation in the block defining those registers. + struct DomForestNode { + private: + // Store references to our children + std::vector children; + // The register we represent + unsigned reg; + + // Add another node as our child + void addChild(DomForestNode* DFN) { children.push_back(DFN); } + + public: + typedef std::vector::iterator iterator; + + // Create a DomForestNode by providing the register it represents, and + // the node to be its parent. The virtual root node has register 0 + // and a null parent. + DomForestNode(unsigned r, DomForestNode* parent) : reg(r) { + if (parent) + parent->addChild(this); + } + + ~DomForestNode() { + for (iterator I = begin(), E = end(); I != E; ++I) + delete *I; + } + + /// getReg - Return the regiser that this node represents + inline unsigned getReg() { return reg; } + + // Provide iterator access to our children + inline DomForestNode::iterator begin() { return children.begin(); } + inline DomForestNode::iterator end() { return children.end(); } + }; + + void computeDFS(MachineFunction& MF); + void processBlock(MachineBasicBlock* MBB); + + std::vector computeDomForest( + std::map& instrs, + MachineRegisterInfo& MRI); + void processPHIUnion(MachineInstr* Inst, + std::map& PHIUnion, + std::vector& DF, + std::vector >& locals); + void ScheduleCopies(MachineBasicBlock* MBB, std::set& pushed); + void InsertCopies(MachineDomTreeNode* MBB, + SmallPtrSet& v); + bool mergeLiveIntervals(unsigned primary, unsigned secondary); + }; +} + +char StrongPHIElimination::ID = 0; +static RegisterPass +X("strong-phi-node-elimination", + "Eliminate PHI nodes for register allocation, intelligently"); + +const PassInfo *const llvm::StrongPHIEliminationID = &X; + +/// computeDFS - Computes the DFS-in and DFS-out numbers of the dominator tree +/// of the given MachineFunction. These numbers are then used in other parts +/// of the PHI elimination process. +void StrongPHIElimination::computeDFS(MachineFunction& MF) { + SmallPtrSet frontier; + SmallPtrSet visited; + + unsigned time = 0; + + MachineDominatorTree& DT = getAnalysis(); + + MachineDomTreeNode* node = DT.getRootNode(); + + std::vector worklist; + worklist.push_back(node); + + while (!worklist.empty()) { + MachineDomTreeNode* currNode = worklist.back(); + + if (!frontier.count(currNode)) { + frontier.insert(currNode); + ++time; + preorder.insert(std::make_pair(currNode->getBlock(), time)); + } + + bool inserted = false; + for (MachineDomTreeNode::iterator I = currNode->begin(), E = currNode->end(); + I != E; ++I) + if (!frontier.count(*I) && !visited.count(*I)) { + worklist.push_back(*I); + inserted = true; + break; + } + + if (!inserted) { + frontier.erase(currNode); + visited.insert(currNode); + maxpreorder.insert(std::make_pair(currNode->getBlock(), time)); + + worklist.pop_back(); + } + } +} + +namespace { + +/// PreorderSorter - a helper class that is used to sort registers +/// according to the preorder number of their defining blocks +class PreorderSorter { +private: + DenseMap& preorder; + MachineRegisterInfo& MRI; + +public: + PreorderSorter(DenseMap& p, + MachineRegisterInfo& M) : preorder(p), MRI(M) { } + + bool operator()(unsigned A, unsigned B) { + if (A == B) + return false; + + MachineBasicBlock* ABlock = MRI.getVRegDef(A)->getParent(); + MachineBasicBlock* BBlock = MRI.getVRegDef(B)->getParent(); + + if (preorder[ABlock] < preorder[BBlock]) + return true; + else if (preorder[ABlock] > preorder[BBlock]) + return false; + + return false; + } +}; + +} + +/// computeDomForest - compute the subforest of the DomTree corresponding +/// to the defining blocks of the registers in question +std::vector +StrongPHIElimination::computeDomForest( + std::map& regs, + MachineRegisterInfo& MRI) { + // Begin by creating a virtual root node, since the actual results + // may well be a forest. Assume this node has maximum DFS-out number. + DomForestNode* VirtualRoot = new DomForestNode(0, 0); + maxpreorder.insert(std::make_pair((MachineBasicBlock*)0, ~0UL)); + + // Populate a worklist with the registers + std::vector worklist; + worklist.reserve(regs.size()); + for (std::map::iterator I = regs.begin(), + E = regs.end(); I != E; ++I) + worklist.push_back(I->first); + + // Sort the registers by the DFS-in number of their defining block + PreorderSorter PS(preorder, MRI); + std::sort(worklist.begin(), worklist.end(), PS); + + // Create a "current parent" stack, and put the virtual root on top of it + DomForestNode* CurrentParent = VirtualRoot; + std::vector stack; + stack.push_back(VirtualRoot); + + // Iterate over all the registers in the previously computed order + for (std::vector::iterator I = worklist.begin(), E = worklist.end(); + I != E; ++I) { + unsigned pre = preorder[MRI.getVRegDef(*I)->getParent()]; + MachineBasicBlock* parentBlock = CurrentParent->getReg() ? + MRI.getVRegDef(CurrentParent->getReg())->getParent() : + 0; + + // If the DFS-in number of the register is greater than the DFS-out number + // of the current parent, repeatedly pop the parent stack until it isn't. + while (pre > maxpreorder[parentBlock]) { + stack.pop_back(); + CurrentParent = stack.back(); + + parentBlock = CurrentParent->getReg() ? + MRI.getVRegDef(CurrentParent->getReg())->getParent() : + 0; + } + + // Now that we've found the appropriate parent, create a DomForestNode for + // this register and attach it to the forest + DomForestNode* child = new DomForestNode(*I, CurrentParent); + + // Push this new node on the "current parent" stack + stack.push_back(child); + CurrentParent = child; + } + + // Return a vector containing the children of the virtual root node + std::vector ret; + ret.insert(ret.end(), VirtualRoot->begin(), VirtualRoot->end()); + return ret; +} + +/// isLiveIn - helper method that determines, from a regno, if a register +/// is live into a block +static bool isLiveIn(unsigned r, MachineBasicBlock* MBB, + LiveIntervals& LI) { + LiveInterval& I = LI.getOrCreateInterval(r); + unsigned idx = LI.getMBBStartIdx(MBB); + return I.liveAt(idx); +} + +/// isLiveOut - help method that determines, from a regno, if a register is +/// live out of a block. +static bool isLiveOut(unsigned r, MachineBasicBlock* MBB, + LiveIntervals& LI) { + for (MachineBasicBlock::succ_iterator PI = MBB->succ_begin(), + E = MBB->succ_end(); PI != E; ++PI) + if (isLiveIn(r, *PI, LI)) + return true; + + return false; +} + +/// interferes - checks for local interferences by scanning a block. The only +/// trick parameter is 'mode' which tells it the relationship of the two +/// registers. 0 - defined in the same block, 1 - first properly dominates +/// second, 2 - second properly dominates first +static bool interferes(unsigned a, unsigned b, MachineBasicBlock* scan, + LiveIntervals& LV, unsigned mode) { + MachineInstr* def = 0; + MachineInstr* kill = 0; + + // The code is still in SSA form at this point, so there is only one + // definition per VReg. Thus we can safely use MRI->getVRegDef(). + const MachineRegisterInfo* MRI = &scan->getParent()->getRegInfo(); + + bool interference = false; + + // Wallk the block, checking for interferences + for (MachineBasicBlock::iterator MBI = scan->begin(), MBE = scan->end(); + MBI != MBE; ++MBI) { + MachineInstr* curr = MBI; + + // Same defining block... + if (mode == 0) { + if (curr == MRI->getVRegDef(a)) { + // If we find our first definition, save it + if (!def) { + def = curr; + // If there's already an unkilled definition, then + // this is an interference + } else if (!kill) { + interference = true; + break; + // If there's a definition followed by a KillInst, then + // they can't interfere + } else { + interference = false; + break; + } + // Symmetric with the above + } else if (curr == MRI->getVRegDef(b)) { + if (!def) { + def = curr; + } else if (!kill) { + interference = true; + break; + } else { + interference = false; + break; + } + // Store KillInsts if they match up with the definition + } else if (curr->killsRegister(a)) { + if (def == MRI->getVRegDef(a)) { + kill = curr; + } else if (curr->killsRegister(b)) { + if (def == MRI->getVRegDef(b)) { + kill = curr; + } + } + } + // First properly dominates second... + } else if (mode == 1) { + if (curr == MRI->getVRegDef(b)) { + // Definition of second without kill of first is an interference + if (!kill) { + interference = true; + break; + // Definition after a kill is a non-interference + } else { + interference = false; + break; + } + // Save KillInsts of First + } else if (curr->killsRegister(a)) { + kill = curr; + } + // Symmetric with the above + } else if (mode == 2) { + if (curr == MRI->getVRegDef(a)) { + if (!kill) { + interference = true; + break; + } else { + interference = false; + break; + } + } else if (curr->killsRegister(b)) { + kill = curr; + } + } + } + + return interference; +} + +/// processBlock - Determine how to break up PHIs in the current block. Each +/// PHI is broken up by some combination of renaming its operands and inserting +/// copies. This method is responsible for determining which operands receive +/// which treatment. +void StrongPHIElimination::processBlock(MachineBasicBlock* MBB) { + LiveIntervals& LI = getAnalysis(); + MachineRegisterInfo& MRI = MBB->getParent()->getRegInfo(); + + // Holds names that have been added to a set in any PHI within this block + // before the current one. + std::set ProcessedNames; + + // Iterate over all the PHI nodes in this block + MachineBasicBlock::iterator P = MBB->begin(); + while (P != MBB->end() && P->getOpcode() == TargetInstrInfo::PHI) { + unsigned DestReg = P->getOperand(0).getReg(); + + // Don't both doing PHI elimination for dead PHI's. + if (P->registerDefIsDead(DestReg)) { + ++P; + continue; + } + + LiveInterval& PI = LI.getOrCreateInterval(DestReg); + unsigned pIdx = LI.getDefIndex(LI.getInstructionIndex(P)); + VNInfo* PVN = PI.getLiveRangeContaining(pIdx)->valno; + PhiValueNumber.insert(std::make_pair(DestReg, PVN->id)); + + // PHIUnion is the set of incoming registers to the PHI node that + // are going to be renames rather than having copies inserted. This set + // is refinded over the course of this function. UnionedBlocks is the set + // of corresponding MBBs. + std::map PHIUnion; + SmallPtrSet UnionedBlocks; + + // Iterate over the operands of the PHI node + for (int i = P->getNumOperands() - 1; i >= 2; i-=2) { + unsigned SrcReg = P->getOperand(i-1).getReg(); + + // Don't need to try to coalesce a register with itself. + if (SrcReg == DestReg) { + ProcessedNames.insert(SrcReg); + continue; + } + + // We don't need to insert copies for implicit_defs. + MachineInstr* DefMI = MRI.getVRegDef(SrcReg); + if (DefMI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF) + ProcessedNames.insert(SrcReg); + + // Check for trivial interferences via liveness information, allowing us + // to avoid extra work later. Any registers that interfere cannot both + // be in the renaming set, so choose one and add copies for it instead. + // The conditions are: + // 1) if the operand is live into the PHI node's block OR + // 2) if the PHI node is live out of the operand's defining block OR + // 3) if the operand is itself a PHI node and the original PHI is + // live into the operand's defining block OR + // 4) if the operand is already being renamed for another PHI node + // in this block OR + // 5) if any two operands are defined in the same block, insert copies + // for one of them + if (isLiveIn(SrcReg, P->getParent(), LI) || + isLiveOut(P->getOperand(0).getReg(), + MRI.getVRegDef(SrcReg)->getParent(), LI) || + ( MRI.getVRegDef(SrcReg)->getOpcode() == TargetInstrInfo::PHI && + isLiveIn(P->getOperand(0).getReg(), + MRI.getVRegDef(SrcReg)->getParent(), LI) ) || + ProcessedNames.count(SrcReg) || + UnionedBlocks.count(MRI.getVRegDef(SrcReg)->getParent())) { + + // Add a copy for the selected register + MachineBasicBlock* From = P->getOperand(i).getMBB(); + Waiting[From].insert(std::make_pair(SrcReg, DestReg)); + UsedByAnother.insert(SrcReg); + } else { + // Otherwise, add it to the renaming set + PHIUnion.insert(std::make_pair(SrcReg,P->getOperand(i).getMBB())); + UnionedBlocks.insert(MRI.getVRegDef(SrcReg)->getParent()); + } + } + + // Compute the dominator forest for the renaming set. This is a forest + // where the nodes are the registers and the edges represent dominance + // relations between the defining blocks of the registers + std::vector DF = + computeDomForest(PHIUnion, MRI); + + // Walk DomForest to resolve interferences at an inter-block level. This + // will remove registers from the renaming set (and insert copies for them) + // if interferences are found. + std::vector > localInterferences; + processPHIUnion(P, PHIUnion, DF, localInterferences); + + // If one of the inputs is defined in the same block as the current PHI + // then we need to check for a local interference between that input and + // the PHI. + for (std::map::iterator I = PHIUnion.begin(), + E = PHIUnion.end(); I != E; ++I) + if (MRI.getVRegDef(I->first)->getParent() == P->getParent()) + localInterferences.push_back(std::make_pair(I->first, + P->getOperand(0).getReg())); + + // The dominator forest walk may have returned some register pairs whose + // interference cannot be determined from dominator analysis. We now + // examine these pairs for local interferences. + for (std::vector >::iterator I = + localInterferences.begin(), E = localInterferences.end(); I != E; ++I) { + std::pair p = *I; + + MachineDominatorTree& MDT = getAnalysis(); + + // Determine the block we need to scan and the relationship between + // the two registers + MachineBasicBlock* scan = 0; + unsigned mode = 0; + if (MRI.getVRegDef(p.first)->getParent() == + MRI.getVRegDef(p.second)->getParent()) { + scan = MRI.getVRegDef(p.first)->getParent(); + mode = 0; // Same block + } else if (MDT.dominates(MRI.getVRegDef(p.first)->getParent(), + MRI.getVRegDef(p.second)->getParent())) { + scan = MRI.getVRegDef(p.second)->getParent(); + mode = 1; // First dominates second + } else { + scan = MRI.getVRegDef(p.first)->getParent(); + mode = 2; // Second dominates first + } + + // If there's an interference, we need to insert copies + if (interferes(p.first, p.second, scan, LI, mode)) { + // Insert copies for First + for (int i = P->getNumOperands() - 1; i >= 2; i-=2) { + if (P->getOperand(i-1).getReg() == p.first) { + unsigned SrcReg = p.first; + MachineBasicBlock* From = P->getOperand(i).getMBB(); + + Waiting[From].insert(std::make_pair(SrcReg, + P->getOperand(0).getReg())); + UsedByAnother.insert(SrcReg); + + PHIUnion.erase(SrcReg); + } + } + } + } + + // Add the renaming set for this PHI node to our overall renaming information + for (std::map::iterator QI = PHIUnion.begin(), + QE = PHIUnion.end(); QI != QE; ++QI) { + DOUT << "Adding Renaming: " << QI->first << " -> " + << P->getOperand(0).getReg() << "\n"; + } + + RenameSets.insert(std::make_pair(P->getOperand(0).getReg(), PHIUnion)); + + // Remember which registers are already renamed, so that we don't try to + // rename them for another PHI node in this block + for (std::map::iterator I = PHIUnion.begin(), + E = PHIUnion.end(); I != E; ++I) + ProcessedNames.insert(I->first); + + ++P; + } +} + +/// processPHIUnion - Take a set of candidate registers to be coalesced when +/// decomposing the PHI instruction. Use the DominanceForest to remove the ones +/// that are known to interfere, and flag others that need to be checked for +/// local interferences. +void StrongPHIElimination::processPHIUnion(MachineInstr* Inst, + std::map& PHIUnion, + std::vector& DF, + std::vector >& locals) { + + std::vector worklist(DF.begin(), DF.end()); + SmallPtrSet visited; + + // Code is still in SSA form, so we can use MRI::getVRegDef() + MachineRegisterInfo& MRI = Inst->getParent()->getParent()->getRegInfo(); + + LiveIntervals& LI = getAnalysis(); + unsigned DestReg = Inst->getOperand(0).getReg(); + + // DF walk on the DomForest + while (!worklist.empty()) { + DomForestNode* DFNode = worklist.back(); + + visited.insert(DFNode); + + bool inserted = false; + for (DomForestNode::iterator CI = DFNode->begin(), CE = DFNode->end(); + CI != CE; ++CI) { + DomForestNode* child = *CI; + + // If the current node is live-out of the defining block of one of its + // children, insert a copy for it. NOTE: The paper actually calls for + // a more elaborate heuristic for determining whether to insert copies + // for the child or the parent. In the interest of simplicity, we're + // just always choosing the parent. + if (isLiveOut(DFNode->getReg(), + MRI.getVRegDef(child->getReg())->getParent(), LI)) { + // Insert copies for parent + for (int i = Inst->getNumOperands() - 1; i >= 2; i-=2) { + if (Inst->getOperand(i-1).getReg() == DFNode->getReg()) { + unsigned SrcReg = DFNode->getReg(); + MachineBasicBlock* From = Inst->getOperand(i).getMBB(); + + Waiting[From].insert(std::make_pair(SrcReg, DestReg)); + UsedByAnother.insert(SrcReg); + + PHIUnion.erase(SrcReg); + } + } + + // If a node is live-in to the defining block of one of its children, but + // not live-out, then we need to scan that block for local interferences. + } else if (isLiveIn(DFNode->getReg(), + MRI.getVRegDef(child->getReg())->getParent(), LI) || + MRI.getVRegDef(DFNode->getReg())->getParent() == + MRI.getVRegDef(child->getReg())->getParent()) { + // Add (p, c) to possible local interferences + locals.push_back(std::make_pair(DFNode->getReg(), child->getReg())); + } + + if (!visited.count(child)) { + worklist.push_back(child); + inserted = true; + } + } + + if (!inserted) worklist.pop_back(); + } +} + +/// ScheduleCopies - Insert copies into predecessor blocks, scheduling +/// them properly so as to avoid the 'lost copy' and the 'virtual swap' +/// problems. +/// +/// Based on "Practical Improvements to the Construction and Destruction +/// of Static Single Assignment Form" by Briggs, et al. +void StrongPHIElimination::ScheduleCopies(MachineBasicBlock* MBB, + std::set& pushed) { + // FIXME: This function needs to update LiveIntervals + std::multimap& copy_set= Waiting[MBB]; + + std::multimap worklist; + std::map map; + + // Setup worklist of initial copies + for (std::multimap::iterator I = copy_set.begin(), + E = copy_set.end(); I != E; ) { + map.insert(std::make_pair(I->first, I->first)); + map.insert(std::make_pair(I->second, I->second)); + + if (!UsedByAnother.count(I->second)) { + worklist.insert(*I); + + // Avoid iterator invalidation + std::multimap::iterator OI = I; + ++I; + copy_set.erase(OI); + } else { + ++I; + } + } + + LiveIntervals& LI = getAnalysis(); + MachineFunction* MF = MBB->getParent(); + MachineRegisterInfo& MRI = MF->getRegInfo(); + const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); + + SmallVector, 4> InsertedPHIDests; + + // Iterate over the worklist, inserting copies + while (!worklist.empty() || !copy_set.empty()) { + while (!worklist.empty()) { + std::multimap::iterator WI = worklist.begin(); + std::pair curr = *WI; + worklist.erase(WI); + + const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(curr.first); + + if (isLiveOut(curr.second, MBB, LI)) { + // Create a temporary + unsigned t = MF->getRegInfo().createVirtualRegister(RC); + + // Insert copy from curr.second to a temporary at + // the Phi defining curr.second + MachineBasicBlock::iterator PI = MRI.getVRegDef(curr.second); + TII->copyRegToReg(*PI->getParent(), PI, t, + curr.second, RC, RC); + + DOUT << "Inserted copy from " << curr.second << " to " << t << "\n"; + + // Push temporary on Stacks + Stacks[curr.second].push_back(t); + + // Insert curr.second in pushed + pushed.insert(curr.second); + + // Create a live interval for this temporary + InsertedPHIDests.push_back(std::make_pair(t, --PI)); + } + + // Insert copy from map[curr.first] to curr.second + TII->copyRegToReg(*MBB, MBB->getFirstTerminator(), curr.second, + map[curr.first], RC, RC); + map[curr.first] = curr.second; + DOUT << "Inserted copy from " << curr.first << " to " + << curr.second << "\n"; + + // Push this copy onto InsertedPHICopies so we can + // update LiveIntervals with it. + MachineBasicBlock::iterator MI = MBB->getFirstTerminator(); + InsertedPHIDests.push_back(std::make_pair(curr.second, --MI)); + + // If curr.first is a destination in copy_set... + for (std::multimap::iterator I = copy_set.begin(), + E = copy_set.end(); I != E; ) + if (curr.first == I->second) { + std::pair temp = *I; + worklist.insert(temp); + + // Avoid iterator invalidation + std::multimap::iterator OI = I; + ++I; + copy_set.erase(OI); + + break; + } else { + ++I; + } + } + + if (!copy_set.empty()) { + std::multimap::iterator CI = copy_set.begin(); + std::pair curr = *CI; + worklist.insert(curr); + copy_set.erase(CI); + + LiveInterval& I = LI.getInterval(curr.second); + MachineBasicBlock::iterator term = MBB->getFirstTerminator(); + unsigned endIdx = 0; + if (term != MBB->end()) + endIdx = LI.getInstructionIndex(term); + else + endIdx = LI.getMBBEndIdx(MBB); + + if (I.liveAt(endIdx)) { + const TargetRegisterClass *RC = + MF->getRegInfo().getRegClass(curr.first); + + // Insert a copy from dest to a new temporary t at the end of b + unsigned t = MF->getRegInfo().createVirtualRegister(RC); + TII->copyRegToReg(*MBB, MBB->getFirstTerminator(), t, + curr.second, RC, RC); + map[curr.second] = t; + + MachineBasicBlock::iterator TI = MBB->getFirstTerminator(); + InsertedPHIDests.push_back(std::make_pair(t, --TI)); + } + } + } + + // Renumber the instructions so that we can perform the index computations + // needed to create new live intervals. + LI.computeNumbering(); + + // For copies that we inserted at the ends of predecessors, we construct + // live intervals. This is pretty easy, since we know that the destination + // register cannot have be in live at that point previously. We just have + // to make sure that, for registers that serve as inputs to more than one + // PHI, we don't create multiple overlapping live intervals. + std::set RegHandled; + for (SmallVector, 4>::iterator I = + InsertedPHIDests.begin(), E = InsertedPHIDests.end(); I != E; ++I) { + if (RegHandled.insert(I->first).second) { + LiveInterval& Int = LI.getOrCreateInterval(I->first); + unsigned instrIdx = LI.getInstructionIndex(I->second); + if (Int.liveAt(LiveIntervals::getDefIndex(instrIdx))) + Int.removeRange(LiveIntervals::getDefIndex(instrIdx), + LI.getMBBEndIdx(I->second->getParent())+1, + true); + + LiveRange R = LI.addLiveRangeToEndOfBlock(I->first, I->second); + R.valno->copy = I->second; + R.valno->def = + LiveIntervals::getDefIndex(LI.getInstructionIndex(I->second)); + } + } +} + +/// InsertCopies - insert copies into MBB and all of its successors +void StrongPHIElimination::InsertCopies(MachineDomTreeNode* MDTN, + SmallPtrSet& visited) { + MachineBasicBlock* MBB = MDTN->getBlock(); + visited.insert(MBB); + + std::set pushed; + + LiveIntervals& LI = getAnalysis(); + // Rewrite register uses from Stacks + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); + I != E; ++I) { + if (I->getOpcode() == TargetInstrInfo::PHI) + continue; + + for (unsigned i = 0; i < I->getNumOperands(); ++i) + if (I->getOperand(i).isReg() && + Stacks[I->getOperand(i).getReg()].size()) { + // Remove the live range for the old vreg. + LiveInterval& OldInt = LI.getInterval(I->getOperand(i).getReg()); + LiveInterval::iterator OldLR = OldInt.FindLiveRangeContaining( + LiveIntervals::getUseIndex(LI.getInstructionIndex(I))); + if (OldLR != OldInt.end()) + OldInt.removeRange(*OldLR, true); + + // Change the register + I->getOperand(i).setReg(Stacks[I->getOperand(i).getReg()].back()); + + // Add a live range for the new vreg + LiveInterval& Int = LI.getInterval(I->getOperand(i).getReg()); + VNInfo* FirstVN = *Int.vni_begin(); + FirstVN->hasPHIKill = false; + if (I->getOperand(i).isKill()) + FirstVN->kills.push_back( + LiveIntervals::getUseIndex(LI.getInstructionIndex(I))); + + LiveRange LR (LI.getMBBStartIdx(I->getParent()), + LiveIntervals::getUseIndex(LI.getInstructionIndex(I))+1, + FirstVN); + + Int.addRange(LR); + } + } + + // Schedule the copies for this block + ScheduleCopies(MBB, pushed); + + // Recur down the dominator tree. + for (MachineDomTreeNode::iterator I = MDTN->begin(), + E = MDTN->end(); I != E; ++I) + if (!visited.count((*I)->getBlock())) + InsertCopies(*I, visited); + + // As we exit this block, pop the names we pushed while processing it + for (std::set::iterator I = pushed.begin(), + E = pushed.end(); I != E; ++I) + Stacks[*I].pop_back(); +} + +bool StrongPHIElimination::mergeLiveIntervals(unsigned primary, + unsigned secondary) { + + LiveIntervals& LI = getAnalysis(); + LiveInterval& LHS = LI.getOrCreateInterval(primary); + LiveInterval& RHS = LI.getOrCreateInterval(secondary); + + LI.computeNumbering(); + + DenseMap VNMap; + for (LiveInterval::iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) { + LiveRange R = *I; + + unsigned Start = R.start; + unsigned End = R.end; + if (LHS.getLiveRangeContaining(Start)) + return false; + + if (LHS.getLiveRangeContaining(End)) + return false; + + LiveInterval::iterator RI = std::upper_bound(LHS.begin(), LHS.end(), R); + if (RI != LHS.end() && RI->start < End) + return false; + } + + for (LiveInterval::iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) { + LiveRange R = *I; + VNInfo* OldVN = R.valno; + VNInfo*& NewVN = VNMap[OldVN]; + if (!NewVN) { + NewVN = LHS.getNextValue(OldVN->def, + OldVN->copy, + LI.getVNInfoAllocator()); + NewVN->kills = OldVN->kills; + } + + LiveRange LR (R.start, R.end, NewVN); + LHS.addRange(LR); + } + + LI.removeInterval(RHS.reg); + + return true; +} + +bool StrongPHIElimination::runOnMachineFunction(MachineFunction &Fn) { + LiveIntervals& LI = getAnalysis(); + + // Compute DFS numbers of each block + computeDFS(Fn); + + // Determine which phi node operands need copies + for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) + if (!I->empty() && + I->begin()->getOpcode() == TargetInstrInfo::PHI) + processBlock(I); + + // Break interferences where two different phis want to coalesce + // in the same register. + std::set seen; + typedef std::map > + RenameSetType; + for (RenameSetType::iterator I = RenameSets.begin(), E = RenameSets.end(); + I != E; ++I) { + for (std::map::iterator + OI = I->second.begin(), OE = I->second.end(); OI != OE; ) { + if (!seen.count(OI->first)) { + seen.insert(OI->first); + ++OI; + } else { + Waiting[OI->second].insert(std::make_pair(OI->first, I->first)); + unsigned reg = OI->first; + ++OI; + I->second.erase(reg); + DOUT << "Removing Renaming: " << reg << " -> " << I->first << "\n"; + } + } + } + + // Insert copies + // FIXME: This process should probably preserve LiveIntervals + SmallPtrSet visited; + MachineDominatorTree& MDT = getAnalysis(); + InsertCopies(MDT.getRootNode(), visited); + + // Perform renaming + for (RenameSetType::iterator I = RenameSets.begin(), E = RenameSets.end(); + I != E; ++I) + while (I->second.size()) { + std::map::iterator SI = I->second.begin(); + + DOUT << "Renaming: " << SI->first << " -> " << I->first << "\n"; + + if (SI->first != I->first) { + if (mergeLiveIntervals(I->first, SI->first)) { + Fn.getRegInfo().replaceRegWith(SI->first, I->first); + + if (RenameSets.count(SI->first)) { + I->second.insert(RenameSets[SI->first].begin(), + RenameSets[SI->first].end()); + RenameSets.erase(SI->first); + } + } else { + // Insert a last-minute copy if a conflict was detected. + const TargetInstrInfo *TII = Fn.getTarget().getInstrInfo(); + const TargetRegisterClass *RC = Fn.getRegInfo().getRegClass(I->first); + TII->copyRegToReg(*SI->second, SI->second->getFirstTerminator(), + I->first, SI->first, RC, RC); + + LI.computeNumbering(); + + LiveInterval& Int = LI.getOrCreateInterval(I->first); + unsigned instrIdx = + LI.getInstructionIndex(--SI->second->getFirstTerminator()); + if (Int.liveAt(LiveIntervals::getDefIndex(instrIdx))) + Int.removeRange(LiveIntervals::getDefIndex(instrIdx), + LI.getMBBEndIdx(SI->second)+1, true); + + LiveRange R = LI.addLiveRangeToEndOfBlock(I->first, + --SI->second->getFirstTerminator()); + R.valno->copy = --SI->second->getFirstTerminator(); + R.valno->def = LiveIntervals::getDefIndex(instrIdx); + + DOUT << "Renaming failed: " << SI->first << " -> " + << I->first << "\n"; + } + } + + LiveInterval& Int = LI.getOrCreateInterval(I->first); + const LiveRange* LR = + Int.getLiveRangeContaining(LI.getMBBEndIdx(SI->second)); + LR->valno->hasPHIKill = true; + + I->second.erase(SI->first); + } + + // Remove PHIs + std::vector phis; + for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) { + for (MachineBasicBlock::iterator BI = I->begin(), BE = I->end(); + BI != BE; ++BI) + if (BI->getOpcode() == TargetInstrInfo::PHI) + phis.push_back(BI); + } + + for (std::vector::iterator I = phis.begin(), E = phis.end(); + I != E; ) { + MachineInstr* PInstr = *(I++); + + // If this is a dead PHI node, then remove it from LiveIntervals. + unsigned DestReg = PInstr->getOperand(0).getReg(); + LiveInterval& PI = LI.getInterval(DestReg); + if (PInstr->registerDefIsDead(DestReg)) { + if (PI.containsOneValue()) { + LI.removeInterval(DestReg); + } else { + unsigned idx = LI.getDefIndex(LI.getInstructionIndex(PInstr)); + PI.removeRange(*PI.getLiveRangeContaining(idx), true); + } + } else { + // Trim live intervals of input registers. They are no longer live into + // this block if they died after the PHI. If they lived after it, don't + // trim them because they might have other legitimate uses. + for (unsigned i = 1; i < PInstr->getNumOperands(); i += 2) { + unsigned reg = PInstr->getOperand(i).getReg(); + + MachineBasicBlock* MBB = PInstr->getOperand(i+1).getMBB(); + LiveInterval& InputI = LI.getInterval(reg); + if (MBB != PInstr->getParent() && + InputI.liveAt(LI.getMBBStartIdx(PInstr->getParent())) && + InputI.expiredAt(LI.getInstructionIndex(PInstr) + + LiveInterval::InstrSlots::NUM)) + InputI.removeRange(LI.getMBBStartIdx(PInstr->getParent()), + LI.getInstructionIndex(PInstr), + true); + } + + // If the PHI is not dead, then the valno defined by the PHI + // now has an unknown def. + unsigned idx = LI.getDefIndex(LI.getInstructionIndex(PInstr)); + const LiveRange* PLR = PI.getLiveRangeContaining(idx); + PLR->valno->def = ~0U; + LiveRange R (LI.getMBBStartIdx(PInstr->getParent()), + PLR->start, PLR->valno); + PI.addRange(R); + } + + LI.RemoveMachineInstrFromMaps(PInstr); + PInstr->eraseFromParent(); + } + + LI.computeNumbering(); + + return true; +} diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp new file mode 100644 index 000000000000..a5e1ee435529 --- /dev/null +++ b/lib/CodeGen/TargetInstrInfoImpl.cpp @@ -0,0 +1,194 @@ +//===-- TargetInstrInfoImpl.cpp - Target Instruction Information ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the TargetInstrInfoImpl class, it just provides default +// implementations of various methods. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +using namespace llvm; + +// commuteInstruction - The default implementation of this method just exchanges +// operand 1 and 2. +MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI, + bool NewMI) const { + assert(MI->getOperand(1).isReg() && MI->getOperand(2).isReg() && + "This only knows how to commute register operands so far"); + unsigned Reg1 = MI->getOperand(1).getReg(); + unsigned Reg2 = MI->getOperand(2).getReg(); + bool Reg1IsKill = MI->getOperand(1).isKill(); + bool Reg2IsKill = MI->getOperand(2).isKill(); + bool ChangeReg0 = false; + if (MI->getOperand(0).getReg() == Reg1) { + // Must be two address instruction! + assert(MI->getDesc().getOperandConstraint(0, TOI::TIED_TO) && + "Expecting a two-address instruction!"); + Reg2IsKill = false; + ChangeReg0 = true; + } + + if (NewMI) { + // Create a new instruction. + unsigned Reg0 = ChangeReg0 ? Reg2 : MI->getOperand(0).getReg(); + bool Reg0IsDead = MI->getOperand(0).isDead(); + MachineFunction &MF = *MI->getParent()->getParent(); + return BuildMI(MF, MI->getDebugLoc(), MI->getDesc()) + .addReg(Reg0, RegState::Define | getDeadRegState(Reg0IsDead)) + .addReg(Reg2, getKillRegState(Reg2IsKill)) + .addReg(Reg1, getKillRegState(Reg2IsKill)); + } + + if (ChangeReg0) + MI->getOperand(0).setReg(Reg2); + MI->getOperand(2).setReg(Reg1); + MI->getOperand(1).setReg(Reg2); + MI->getOperand(2).setIsKill(Reg1IsKill); + MI->getOperand(1).setIsKill(Reg2IsKill); + return MI; +} + +/// CommuteChangesDestination - Return true if commuting the specified +/// instruction will also changes the destination operand. Also return the +/// current operand index of the would be new destination register by +/// reference. This can happen when the commutable instruction is also a +/// two-address instruction. +bool TargetInstrInfoImpl::CommuteChangesDestination(MachineInstr *MI, + unsigned &OpIdx) const{ + assert(MI->getOperand(1).isReg() && MI->getOperand(2).isReg() && + "This only knows how to commute register operands so far"); + if (MI->getOperand(0).getReg() == MI->getOperand(1).getReg()) { + // Must be two address instruction! + assert(MI->getDesc().getOperandConstraint(0, TOI::TIED_TO) && + "Expecting a two-address instruction!"); + OpIdx = 2; + return true; + } + return false; +} + + +bool TargetInstrInfoImpl::PredicateInstruction(MachineInstr *MI, + const SmallVectorImpl &Pred) const { + bool MadeChange = false; + const TargetInstrDesc &TID = MI->getDesc(); + if (!TID.isPredicable()) + return false; + + for (unsigned j = 0, i = 0, e = MI->getNumOperands(); i != e; ++i) { + if (TID.OpInfo[i].isPredicate()) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isReg()) { + MO.setReg(Pred[j].getReg()); + MadeChange = true; + } else if (MO.isImm()) { + MO.setImm(Pred[j].getImm()); + MadeChange = true; + } else if (MO.isMBB()) { + MO.setMBB(Pred[j].getMBB()); + MadeChange = true; + } + ++j; + } + } + return MadeChange; +} + +void TargetInstrInfoImpl::reMaterialize(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned DestReg, + const MachineInstr *Orig) const { + MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig); + MI->getOperand(0).setReg(DestReg); + MBB.insert(I, MI); +} + +unsigned +TargetInstrInfoImpl::GetFunctionSizeInBytes(const MachineFunction &MF) const { + unsigned FnSize = 0; + for (MachineFunction::const_iterator MBBI = MF.begin(), E = MF.end(); + MBBI != E; ++MBBI) { + const MachineBasicBlock &MBB = *MBBI; + for (MachineBasicBlock::const_iterator I = MBB.begin(),E = MBB.end(); + I != E; ++I) + FnSize += GetInstSizeInBytes(I); + } + return FnSize; +} + +/// foldMemoryOperand - Attempt to fold a load or store of the specified stack +/// slot into the specified machine instruction for the specified operand(s). +/// If this is possible, a new instruction is returned with the specified +/// operand folded, otherwise NULL is returned. The client is responsible for +/// removing the old instruction and adding the new one in the instruction +/// stream. +MachineInstr* +TargetInstrInfo::foldMemoryOperand(MachineFunction &MF, + MachineInstr* MI, + const SmallVectorImpl &Ops, + int FrameIndex) const { + unsigned Flags = 0; + for (unsigned i = 0, e = Ops.size(); i != e; ++i) + if (MI->getOperand(Ops[i]).isDef()) + Flags |= MachineMemOperand::MOStore; + else + Flags |= MachineMemOperand::MOLoad; + + // Ask the target to do the actual folding. + MachineInstr *NewMI = foldMemoryOperandImpl(MF, MI, Ops, FrameIndex); + if (!NewMI) return 0; + + assert((!(Flags & MachineMemOperand::MOStore) || + NewMI->getDesc().mayStore()) && + "Folded a def to a non-store!"); + assert((!(Flags & MachineMemOperand::MOLoad) || + NewMI->getDesc().mayLoad()) && + "Folded a use to a non-load!"); + const MachineFrameInfo &MFI = *MF.getFrameInfo(); + assert(MFI.getObjectOffset(FrameIndex) != -1); + MachineMemOperand MMO(PseudoSourceValue::getFixedStack(FrameIndex), + Flags, + MFI.getObjectOffset(FrameIndex), + MFI.getObjectSize(FrameIndex), + MFI.getObjectAlignment(FrameIndex)); + NewMI->addMemOperand(MF, MMO); + + return NewMI; +} + +/// foldMemoryOperand - Same as the previous version except it allows folding +/// of any load and store from / to any address, not just from a specific +/// stack slot. +MachineInstr* +TargetInstrInfo::foldMemoryOperand(MachineFunction &MF, + MachineInstr* MI, + const SmallVectorImpl &Ops, + MachineInstr* LoadMI) const { + assert(LoadMI->getDesc().canFoldAsLoad() && "LoadMI isn't foldable!"); +#ifndef NDEBUG + for (unsigned i = 0, e = Ops.size(); i != e; ++i) + assert(MI->getOperand(Ops[i]).isUse() && "Folding load into def!"); +#endif + + // Ask the target to do the actual folding. + MachineInstr *NewMI = foldMemoryOperandImpl(MF, MI, Ops, LoadMI); + if (!NewMI) return 0; + + // Copy the memoperands from the load to the folded instruction. + for (std::list::iterator I = LoadMI->memoperands_begin(), + E = LoadMI->memoperands_end(); I != E; ++I) + NewMI->addMemOperand(MF, *I); + + return NewMI; +} diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp new file mode 100644 index 000000000000..3c404046f15e --- /dev/null +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -0,0 +1,997 @@ +//===-- TwoAddressInstructionPass.cpp - Two-Address instruction pass ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the TwoAddress instruction pass which is used +// by most register allocators. Two-Address instructions are rewritten +// from: +// +// A = B op C +// +// to: +// +// A = B +// A op= C +// +// Note that if a register allocator chooses to use this pass, that it +// has to be capable of handling the non-SSA nature of these rewritten +// virtual registers. +// +// It is also worth noting that the duplicate operand of the two +// address instruction is removed. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "twoaddrinstr" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Function.h" +#include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +using namespace llvm; + +STATISTIC(NumTwoAddressInstrs, "Number of two-address instructions"); +STATISTIC(NumCommuted , "Number of instructions commuted to coalesce"); +STATISTIC(NumAggrCommuted , "Number of instructions aggressively commuted"); +STATISTIC(NumConvertedTo3Addr, "Number of instructions promoted to 3-address"); +STATISTIC(Num3AddrSunk, "Number of 3-address instructions sunk"); +STATISTIC(NumReMats, "Number of instructions re-materialized"); +STATISTIC(NumDeletes, "Number of dead instructions deleted"); + +namespace { + class VISIBILITY_HIDDEN TwoAddressInstructionPass + : public MachineFunctionPass { + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + MachineRegisterInfo *MRI; + LiveVariables *LV; + + // DistanceMap - Keep track the distance of a MI from the start of the + // current basic block. + DenseMap DistanceMap; + + // SrcRegMap - A map from virtual registers to physical registers which + // are likely targets to be coalesced to due to copies from physical + // registers to virtual registers. e.g. v1024 = move r0. + DenseMap SrcRegMap; + + // DstRegMap - A map from virtual registers to physical registers which + // are likely targets to be coalesced to due to copies to physical + // registers from virtual registers. e.g. r1 = move v1024. + DenseMap DstRegMap; + + bool Sink3AddrInstruction(MachineBasicBlock *MBB, MachineInstr *MI, + unsigned Reg, + MachineBasicBlock::iterator OldPos); + + bool isProfitableToReMat(unsigned Reg, const TargetRegisterClass *RC, + MachineInstr *MI, MachineInstr *DefMI, + MachineBasicBlock *MBB, unsigned Loc); + + bool NoUseAfterLastDef(unsigned Reg, MachineBasicBlock *MBB, unsigned Dist, + unsigned &LastDef); + + MachineInstr *FindLastUseInMBB(unsigned Reg, MachineBasicBlock *MBB, + unsigned Dist); + + bool isProfitableToCommute(unsigned regB, unsigned regC, + MachineInstr *MI, MachineBasicBlock *MBB, + unsigned Dist); + + bool CommuteInstruction(MachineBasicBlock::iterator &mi, + MachineFunction::iterator &mbbi, + unsigned RegB, unsigned RegC, unsigned Dist); + + bool isProfitableToConv3Addr(unsigned RegA); + + bool ConvertInstTo3Addr(MachineBasicBlock::iterator &mi, + MachineBasicBlock::iterator &nmi, + MachineFunction::iterator &mbbi, + unsigned RegB, unsigned Dist); + + void ProcessCopy(MachineInstr *MI, MachineBasicBlock *MBB, + SmallPtrSet &Processed); + public: + static char ID; // Pass identification, replacement for typeid + TwoAddressInstructionPass() : MachineFunctionPass(&ID) {} + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addPreserved(); + AU.addPreservedID(MachineLoopInfoID); + AU.addPreservedID(MachineDominatorsID); + if (StrongPHIElim) + AU.addPreservedID(StrongPHIEliminationID); + else + AU.addPreservedID(PHIEliminationID); + MachineFunctionPass::getAnalysisUsage(AU); + } + + /// runOnMachineFunction - Pass entry point. + bool runOnMachineFunction(MachineFunction&); + }; +} + +char TwoAddressInstructionPass::ID = 0; +static RegisterPass +X("twoaddressinstruction", "Two-Address instruction pass"); + +const PassInfo *const llvm::TwoAddressInstructionPassID = &X; + +/// Sink3AddrInstruction - A two-address instruction has been converted to a +/// three-address instruction to avoid clobbering a register. Try to sink it +/// past the instruction that would kill the above mentioned register to reduce +/// register pressure. +bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB, + MachineInstr *MI, unsigned SavedReg, + MachineBasicBlock::iterator OldPos) { + // Check if it's safe to move this instruction. + bool SeenStore = true; // Be conservative. + if (!MI->isSafeToMove(TII, SeenStore)) + return false; + + unsigned DefReg = 0; + SmallSet UseRegs; + + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned MOReg = MO.getReg(); + if (!MOReg) + continue; + if (MO.isUse() && MOReg != SavedReg) + UseRegs.insert(MO.getReg()); + if (!MO.isDef()) + continue; + if (MO.isImplicit()) + // Don't try to move it if it implicitly defines a register. + return false; + if (DefReg) + // For now, don't move any instructions that define multiple registers. + return false; + DefReg = MO.getReg(); + } + + // Find the instruction that kills SavedReg. + MachineInstr *KillMI = NULL; + for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(SavedReg), + UE = MRI->use_end(); UI != UE; ++UI) { + MachineOperand &UseMO = UI.getOperand(); + if (!UseMO.isKill()) + continue; + KillMI = UseMO.getParent(); + break; + } + + if (!KillMI || KillMI->getParent() != MBB || KillMI == MI) + return false; + + // If any of the definitions are used by another instruction between the + // position and the kill use, then it's not safe to sink it. + // + // FIXME: This can be sped up if there is an easy way to query whether an + // instruction is before or after another instruction. Then we can use + // MachineRegisterInfo def / use instead. + MachineOperand *KillMO = NULL; + MachineBasicBlock::iterator KillPos = KillMI; + ++KillPos; + + unsigned NumVisited = 0; + for (MachineBasicBlock::iterator I = next(OldPos); I != KillPos; ++I) { + MachineInstr *OtherMI = I; + if (NumVisited > 30) // FIXME: Arbitrary limit to reduce compile time cost. + return false; + ++NumVisited; + for (unsigned i = 0, e = OtherMI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = OtherMI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned MOReg = MO.getReg(); + if (!MOReg) + continue; + if (DefReg == MOReg) + return false; + + if (MO.isKill()) { + if (OtherMI == KillMI && MOReg == SavedReg) + // Save the operand that kills the register. We want to unset the kill + // marker if we can sink MI past it. + KillMO = &MO; + else if (UseRegs.count(MOReg)) + // One of the uses is killed before the destination. + return false; + } + } + } + + // Update kill and LV information. + KillMO->setIsKill(false); + KillMO = MI->findRegisterUseOperand(SavedReg, false, TRI); + KillMO->setIsKill(true); + + if (LV) + LV->replaceKillInstruction(SavedReg, KillMI, MI); + + // Move instruction to its destination. + MBB->remove(MI); + MBB->insert(KillPos, MI); + + ++Num3AddrSunk; + return true; +} + +/// isTwoAddrUse - Return true if the specified MI is using the specified +/// register as a two-address operand. +static bool isTwoAddrUse(MachineInstr *UseMI, unsigned Reg) { + const TargetInstrDesc &TID = UseMI->getDesc(); + for (unsigned i = 0, e = TID.getNumOperands(); i != e; ++i) { + MachineOperand &MO = UseMI->getOperand(i); + if (MO.isReg() && MO.getReg() == Reg && + (MO.isDef() || UseMI->isRegTiedToDefOperand(i))) + // Earlier use is a two-address one. + return true; + } + return false; +} + +/// isProfitableToReMat - Return true if the heuristics determines it is likely +/// to be profitable to re-materialize the definition of Reg rather than copy +/// the register. +bool +TwoAddressInstructionPass::isProfitableToReMat(unsigned Reg, + const TargetRegisterClass *RC, + MachineInstr *MI, MachineInstr *DefMI, + MachineBasicBlock *MBB, unsigned Loc) { + bool OtherUse = false; + for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg), + UE = MRI->use_end(); UI != UE; ++UI) { + MachineOperand &UseMO = UI.getOperand(); + MachineInstr *UseMI = UseMO.getParent(); + MachineBasicBlock *UseMBB = UseMI->getParent(); + if (UseMBB == MBB) { + DenseMap::iterator DI = DistanceMap.find(UseMI); + if (DI != DistanceMap.end() && DI->second == Loc) + continue; // Current use. + OtherUse = true; + // There is at least one other use in the MBB that will clobber the + // register. + if (isTwoAddrUse(UseMI, Reg)) + return true; + } + } + + // If other uses in MBB are not two-address uses, then don't remat. + if (OtherUse) + return false; + + // No other uses in the same block, remat if it's defined in the same + // block so it does not unnecessarily extend the live range. + return MBB == DefMI->getParent(); +} + +/// NoUseAfterLastDef - Return true if there are no intervening uses between the +/// last instruction in the MBB that defines the specified register and the +/// two-address instruction which is being processed. It also returns the last +/// def location by reference +bool TwoAddressInstructionPass::NoUseAfterLastDef(unsigned Reg, + MachineBasicBlock *MBB, unsigned Dist, + unsigned &LastDef) { + LastDef = 0; + unsigned LastUse = Dist; + for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(Reg), + E = MRI->reg_end(); I != E; ++I) { + MachineOperand &MO = I.getOperand(); + MachineInstr *MI = MO.getParent(); + if (MI->getParent() != MBB) + continue; + DenseMap::iterator DI = DistanceMap.find(MI); + if (DI == DistanceMap.end()) + continue; + if (MO.isUse() && DI->second < LastUse) + LastUse = DI->second; + if (MO.isDef() && DI->second > LastDef) + LastDef = DI->second; + } + + return !(LastUse > LastDef && LastUse < Dist); +} + +MachineInstr *TwoAddressInstructionPass::FindLastUseInMBB(unsigned Reg, + MachineBasicBlock *MBB, + unsigned Dist) { + unsigned LastUseDist = 0; + MachineInstr *LastUse = 0; + for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(Reg), + E = MRI->reg_end(); I != E; ++I) { + MachineOperand &MO = I.getOperand(); + MachineInstr *MI = MO.getParent(); + if (MI->getParent() != MBB) + continue; + DenseMap::iterator DI = DistanceMap.find(MI); + if (DI == DistanceMap.end()) + continue; + if (DI->second >= Dist) + continue; + + if (MO.isUse() && DI->second > LastUseDist) { + LastUse = DI->first; + LastUseDist = DI->second; + } + } + return LastUse; +} + +/// isCopyToReg - Return true if the specified MI is a copy instruction or +/// a extract_subreg instruction. It also returns the source and destination +/// registers and whether they are physical registers by reference. +static bool isCopyToReg(MachineInstr &MI, const TargetInstrInfo *TII, + unsigned &SrcReg, unsigned &DstReg, + bool &IsSrcPhys, bool &IsDstPhys) { + SrcReg = 0; + DstReg = 0; + unsigned SrcSubIdx, DstSubIdx; + if (!TII->isMoveInstr(MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) { + if (MI.getOpcode() == TargetInstrInfo::EXTRACT_SUBREG) { + DstReg = MI.getOperand(0).getReg(); + SrcReg = MI.getOperand(1).getReg(); + } else if (MI.getOpcode() == TargetInstrInfo::INSERT_SUBREG) { + DstReg = MI.getOperand(0).getReg(); + SrcReg = MI.getOperand(2).getReg(); + } else if (MI.getOpcode() == TargetInstrInfo::SUBREG_TO_REG) { + DstReg = MI.getOperand(0).getReg(); + SrcReg = MI.getOperand(2).getReg(); + } + } + + if (DstReg) { + IsSrcPhys = TargetRegisterInfo::isPhysicalRegister(SrcReg); + IsDstPhys = TargetRegisterInfo::isPhysicalRegister(DstReg); + return true; + } + return false; +} + +/// isKilled - Test if the given register value, which is used by the given +/// instruction, is killed by the given instruction. This looks through +/// coalescable copies to see if the original value is potentially not killed. +/// +/// For example, in this code: +/// +/// %reg1034 = copy %reg1024 +/// %reg1035 = copy %reg1025 +/// %reg1036 = add %reg1034, %reg1035 +/// +/// %reg1034 is not considered to be killed, since it is copied from a +/// register which is not killed. Treating it as not killed lets the +/// normal heuristics commute the (two-address) add, which lets +/// coalescing eliminate the extra copy. +/// +static bool isKilled(MachineInstr &MI, unsigned Reg, + const MachineRegisterInfo *MRI, + const TargetInstrInfo *TII) { + MachineInstr *DefMI = &MI; + for (;;) { + if (!DefMI->killsRegister(Reg)) + return false; + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + return true; + MachineRegisterInfo::def_iterator Begin = MRI->def_begin(Reg); + // If there are multiple defs, we can't do a simple analysis, so just + // go with what the kill flag says. + if (next(Begin) != MRI->def_end()) + return true; + DefMI = &*Begin; + bool IsSrcPhys, IsDstPhys; + unsigned SrcReg, DstReg; + // If the def is something other than a copy, then it isn't going to + // be coalesced, so follow the kill flag. + if (!isCopyToReg(*DefMI, TII, SrcReg, DstReg, IsSrcPhys, IsDstPhys)) + return true; + Reg = SrcReg; + } +} + +/// isTwoAddrUse - Return true if the specified MI uses the specified register +/// as a two-address use. If so, return the destination register by reference. +static bool isTwoAddrUse(MachineInstr &MI, unsigned Reg, unsigned &DstReg) { + const TargetInstrDesc &TID = MI.getDesc(); + unsigned NumOps = (MI.getOpcode() == TargetInstrInfo::INLINEASM) + ? MI.getNumOperands() : TID.getNumOperands(); + for (unsigned i = 0; i != NumOps; ++i) { + const MachineOperand &MO = MI.getOperand(i); + if (!MO.isReg() || !MO.isUse() || MO.getReg() != Reg) + continue; + unsigned ti; + if (MI.isRegTiedToDefOperand(i, &ti)) { + DstReg = MI.getOperand(ti).getReg(); + return true; + } + } + return false; +} + +/// findOnlyInterestingUse - Given a register, if has a single in-basic block +/// use, return the use instruction if it's a copy or a two-address use. +static +MachineInstr *findOnlyInterestingUse(unsigned Reg, MachineBasicBlock *MBB, + MachineRegisterInfo *MRI, + const TargetInstrInfo *TII, + bool &IsCopy, + unsigned &DstReg, bool &IsDstPhys) { + MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg); + if (UI == MRI->use_end()) + return 0; + MachineInstr &UseMI = *UI; + if (++UI != MRI->use_end()) + // More than one use. + return 0; + if (UseMI.getParent() != MBB) + return 0; + unsigned SrcReg; + bool IsSrcPhys; + if (isCopyToReg(UseMI, TII, SrcReg, DstReg, IsSrcPhys, IsDstPhys)) { + IsCopy = true; + return &UseMI; + } + IsDstPhys = false; + if (isTwoAddrUse(UseMI, Reg, DstReg)) { + IsDstPhys = TargetRegisterInfo::isPhysicalRegister(DstReg); + return &UseMI; + } + return 0; +} + +/// getMappedReg - Return the physical register the specified virtual register +/// might be mapped to. +static unsigned +getMappedReg(unsigned Reg, DenseMap &RegMap) { + while (TargetRegisterInfo::isVirtualRegister(Reg)) { + DenseMap::iterator SI = RegMap.find(Reg); + if (SI == RegMap.end()) + return 0; + Reg = SI->second; + } + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + return Reg; + return 0; +} + +/// regsAreCompatible - Return true if the two registers are equal or aliased. +/// +static bool +regsAreCompatible(unsigned RegA, unsigned RegB, const TargetRegisterInfo *TRI) { + if (RegA == RegB) + return true; + if (!RegA || !RegB) + return false; + return TRI->regsOverlap(RegA, RegB); +} + + +/// isProfitableToReMat - Return true if it's potentially profitable to commute +/// the two-address instruction that's being processed. +bool +TwoAddressInstructionPass::isProfitableToCommute(unsigned regB, unsigned regC, + MachineInstr *MI, MachineBasicBlock *MBB, + unsigned Dist) { + // Determine if it's profitable to commute this two address instruction. In + // general, we want no uses between this instruction and the definition of + // the two-address register. + // e.g. + // %reg1028 = EXTRACT_SUBREG %reg1027, 1 + // %reg1029 = MOV8rr %reg1028 + // %reg1029 = SHR8ri %reg1029, 7, %EFLAGS + // insert => %reg1030 = MOV8rr %reg1028 + // %reg1030 = ADD8rr %reg1028, %reg1029, %EFLAGS + // In this case, it might not be possible to coalesce the second MOV8rr + // instruction if the first one is coalesced. So it would be profitable to + // commute it: + // %reg1028 = EXTRACT_SUBREG %reg1027, 1 + // %reg1029 = MOV8rr %reg1028 + // %reg1029 = SHR8ri %reg1029, 7, %EFLAGS + // insert => %reg1030 = MOV8rr %reg1029 + // %reg1030 = ADD8rr %reg1029, %reg1028, %EFLAGS + + if (!MI->killsRegister(regC)) + return false; + + // Ok, we have something like: + // %reg1030 = ADD8rr %reg1028, %reg1029, %EFLAGS + // let's see if it's worth commuting it. + + // Look for situations like this: + // %reg1024 = MOV r1 + // %reg1025 = MOV r0 + // %reg1026 = ADD %reg1024, %reg1025 + // r0 = MOV %reg1026 + // Commute the ADD to hopefully eliminate an otherwise unavoidable copy. + unsigned FromRegB = getMappedReg(regB, SrcRegMap); + unsigned FromRegC = getMappedReg(regC, SrcRegMap); + unsigned ToRegB = getMappedReg(regB, DstRegMap); + unsigned ToRegC = getMappedReg(regC, DstRegMap); + if (!regsAreCompatible(FromRegB, ToRegB, TRI) && + (regsAreCompatible(FromRegB, ToRegC, TRI) || + regsAreCompatible(FromRegC, ToRegB, TRI))) + return true; + + // If there is a use of regC between its last def (could be livein) and this + // instruction, then bail. + unsigned LastDefC = 0; + if (!NoUseAfterLastDef(regC, MBB, Dist, LastDefC)) + return false; + + // If there is a use of regB between its last def (could be livein) and this + // instruction, then go ahead and make this transformation. + unsigned LastDefB = 0; + if (!NoUseAfterLastDef(regB, MBB, Dist, LastDefB)) + return true; + + // Since there are no intervening uses for both registers, then commute + // if the def of regC is closer. Its live interval is shorter. + return LastDefB && LastDefC && LastDefC > LastDefB; +} + +/// CommuteInstruction - Commute a two-address instruction and update the basic +/// block, distance map, and live variables if needed. Return true if it is +/// successful. +bool +TwoAddressInstructionPass::CommuteInstruction(MachineBasicBlock::iterator &mi, + MachineFunction::iterator &mbbi, + unsigned RegB, unsigned RegC, unsigned Dist) { + MachineInstr *MI = mi; + DOUT << "2addr: COMMUTING : " << *MI; + MachineInstr *NewMI = TII->commuteInstruction(MI); + + if (NewMI == 0) { + DOUT << "2addr: COMMUTING FAILED!\n"; + return false; + } + + DOUT << "2addr: COMMUTED TO: " << *NewMI; + // If the instruction changed to commute it, update livevar. + if (NewMI != MI) { + if (LV) + // Update live variables + LV->replaceKillInstruction(RegC, MI, NewMI); + + mbbi->insert(mi, NewMI); // Insert the new inst + mbbi->erase(mi); // Nuke the old inst. + mi = NewMI; + DistanceMap.insert(std::make_pair(NewMI, Dist)); + } + + // Update source register map. + unsigned FromRegC = getMappedReg(RegC, SrcRegMap); + if (FromRegC) { + unsigned RegA = MI->getOperand(0).getReg(); + SrcRegMap[RegA] = FromRegC; + } + + return true; +} + +/// isProfitableToConv3Addr - Return true if it is profitable to convert the +/// given 2-address instruction to a 3-address one. +bool +TwoAddressInstructionPass::isProfitableToConv3Addr(unsigned RegA) { + // Look for situations like this: + // %reg1024 = MOV r1 + // %reg1025 = MOV r0 + // %reg1026 = ADD %reg1024, %reg1025 + // r2 = MOV %reg1026 + // Turn ADD into a 3-address instruction to avoid a copy. + unsigned FromRegA = getMappedReg(RegA, SrcRegMap); + unsigned ToRegA = getMappedReg(RegA, DstRegMap); + return (FromRegA && ToRegA && !regsAreCompatible(FromRegA, ToRegA, TRI)); +} + +/// ConvertInstTo3Addr - Convert the specified two-address instruction into a +/// three address one. Return true if this transformation was successful. +bool +TwoAddressInstructionPass::ConvertInstTo3Addr(MachineBasicBlock::iterator &mi, + MachineBasicBlock::iterator &nmi, + MachineFunction::iterator &mbbi, + unsigned RegB, unsigned Dist) { + MachineInstr *NewMI = TII->convertToThreeAddress(mbbi, mi, LV); + if (NewMI) { + DOUT << "2addr: CONVERTING 2-ADDR: " << *mi; + DOUT << "2addr: TO 3-ADDR: " << *NewMI; + bool Sunk = false; + + if (NewMI->findRegisterUseOperand(RegB, false, TRI)) + // FIXME: Temporary workaround. If the new instruction doesn't + // uses RegB, convertToThreeAddress must have created more + // then one instruction. + Sunk = Sink3AddrInstruction(mbbi, NewMI, RegB, mi); + + mbbi->erase(mi); // Nuke the old inst. + + if (!Sunk) { + DistanceMap.insert(std::make_pair(NewMI, Dist)); + mi = NewMI; + nmi = next(mi); + } + return true; + } + + return false; +} + +/// ProcessCopy - If the specified instruction is not yet processed, process it +/// if it's a copy. For a copy instruction, we find the physical registers the +/// source and destination registers might be mapped to. These are kept in +/// point-to maps used to determine future optimizations. e.g. +/// v1024 = mov r0 +/// v1025 = mov r1 +/// v1026 = add v1024, v1025 +/// r1 = mov r1026 +/// If 'add' is a two-address instruction, v1024, v1026 are both potentially +/// coalesced to r0 (from the input side). v1025 is mapped to r1. v1026 is +/// potentially joined with r1 on the output side. It's worthwhile to commute +/// 'add' to eliminate a copy. +void TwoAddressInstructionPass::ProcessCopy(MachineInstr *MI, + MachineBasicBlock *MBB, + SmallPtrSet &Processed) { + if (Processed.count(MI)) + return; + + bool IsSrcPhys, IsDstPhys; + unsigned SrcReg, DstReg; + if (!isCopyToReg(*MI, TII, SrcReg, DstReg, IsSrcPhys, IsDstPhys)) + return; + + if (IsDstPhys && !IsSrcPhys) + DstRegMap.insert(std::make_pair(SrcReg, DstReg)); + else if (!IsDstPhys && IsSrcPhys) { + bool isNew = SrcRegMap.insert(std::make_pair(DstReg, SrcReg)).second; + if (!isNew) + assert(SrcRegMap[DstReg] == SrcReg && + "Can't map to two src physical registers!"); + + SmallVector VirtRegPairs; + bool IsCopy = false; + unsigned NewReg = 0; + while (MachineInstr *UseMI = findOnlyInterestingUse(DstReg, MBB, MRI,TII, + IsCopy, NewReg, IsDstPhys)) { + if (IsCopy) { + if (!Processed.insert(UseMI)) + break; + } + + DenseMap::iterator DI = DistanceMap.find(UseMI); + if (DI != DistanceMap.end()) + // Earlier in the same MBB.Reached via a back edge. + break; + + if (IsDstPhys) { + VirtRegPairs.push_back(NewReg); + break; + } + bool isNew = SrcRegMap.insert(std::make_pair(NewReg, DstReg)).second; + if (!isNew) + assert(SrcRegMap[NewReg] == DstReg && + "Can't map to two src physical registers!"); + VirtRegPairs.push_back(NewReg); + DstReg = NewReg; + } + + if (!VirtRegPairs.empty()) { + unsigned ToReg = VirtRegPairs.back(); + VirtRegPairs.pop_back(); + while (!VirtRegPairs.empty()) { + unsigned FromReg = VirtRegPairs.back(); + VirtRegPairs.pop_back(); + bool isNew = DstRegMap.insert(std::make_pair(FromReg, ToReg)).second; + if (!isNew) + assert(DstRegMap[FromReg] == ToReg && + "Can't map to two dst physical registers!"); + ToReg = FromReg; + } + } + } + + Processed.insert(MI); +} + +/// isSafeToDelete - If the specified instruction does not produce any side +/// effects and all of its defs are dead, then it's safe to delete. +static bool isSafeToDelete(MachineInstr *MI, unsigned Reg, + const TargetInstrInfo *TII, + SmallVector &Kills) { + const TargetInstrDesc &TID = MI->getDesc(); + if (TID.mayStore() || TID.isCall()) + return false; + if (TID.isTerminator() || TID.hasUnmodeledSideEffects()) + return false; + + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) + continue; + if (MO.isDef() && !MO.isDead()) + return false; + if (MO.isUse() && MO.getReg() != Reg && MO.isKill()) + Kills.push_back(MO.getReg()); + } + + return true; +} + +/// runOnMachineFunction - Reduce two-address instructions to two operands. +/// +bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { + DOUT << "Machine Function\n"; + const TargetMachine &TM = MF.getTarget(); + MRI = &MF.getRegInfo(); + TII = TM.getInstrInfo(); + TRI = TM.getRegisterInfo(); + LV = getAnalysisIfAvailable(); + + bool MadeChange = false; + + DOUT << "********** REWRITING TWO-ADDR INSTRS **********\n"; + DOUT << "********** Function: " << MF.getFunction()->getName() << '\n'; + + // ReMatRegs - Keep track of the registers whose def's are remat'ed. + BitVector ReMatRegs; + ReMatRegs.resize(MRI->getLastVirtReg()+1); + + SmallPtrSet Processed; + for (MachineFunction::iterator mbbi = MF.begin(), mbbe = MF.end(); + mbbi != mbbe; ++mbbi) { + unsigned Dist = 0; + DistanceMap.clear(); + SrcRegMap.clear(); + DstRegMap.clear(); + Processed.clear(); + for (MachineBasicBlock::iterator mi = mbbi->begin(), me = mbbi->end(); + mi != me; ) { + MachineBasicBlock::iterator nmi = next(mi); + const TargetInstrDesc &TID = mi->getDesc(); + bool FirstTied = true; + + DistanceMap.insert(std::make_pair(mi, ++Dist)); + + ProcessCopy(&*mi, &*mbbi, Processed); + + unsigned NumOps = (mi->getOpcode() == TargetInstrInfo::INLINEASM) + ? mi->getNumOperands() : TID.getNumOperands(); + for (unsigned si = 0; si < NumOps; ++si) { + unsigned ti = 0; + if (!mi->isRegTiedToDefOperand(si, &ti)) + continue; + + if (FirstTied) { + ++NumTwoAddressInstrs; + DOUT << '\t'; DEBUG(mi->print(*cerr.stream(), &TM)); + } + + FirstTied = false; + + assert(mi->getOperand(si).isReg() && mi->getOperand(si).getReg() && + mi->getOperand(si).isUse() && "two address instruction invalid"); + + // If the two operands are the same we just remove the use + // and mark the def as def&use, otherwise we have to insert a copy. + if (mi->getOperand(ti).getReg() != mi->getOperand(si).getReg()) { + // Rewrite: + // a = b op c + // to: + // a = b + // a = a op c + unsigned regA = mi->getOperand(ti).getReg(); + unsigned regB = mi->getOperand(si).getReg(); + + assert(TargetRegisterInfo::isVirtualRegister(regB) && + "cannot update physical register live information"); + +#ifndef NDEBUG + // First, verify that we don't have a use of a in the instruction (a = + // b + a for example) because our transformation will not work. This + // should never occur because we are in SSA form. + for (unsigned i = 0; i != mi->getNumOperands(); ++i) + assert(i == ti || + !mi->getOperand(i).isReg() || + mi->getOperand(i).getReg() != regA); +#endif + + // If this instruction is not the killing user of B, see if we can + // rearrange the code to make it so. Making it the killing user will + // allow us to coalesce A and B together, eliminating the copy we are + // about to insert. + if (!isKilled(*mi, regB, MRI, TII)) { + // If regA is dead and the instruction can be deleted, just delete + // it so it doesn't clobber regB. + SmallVector Kills; + if (mi->getOperand(ti).isDead() && + isSafeToDelete(mi, regB, TII, Kills)) { + SmallVector + ,MachineInstr*>, 4> NewKills; + bool ReallySafe = true; + // If this instruction kills some virtual registers, we need + // update the kill information. If it's not possible to do so, + // then bail out. + while (!Kills.empty()) { + unsigned Kill = Kills.back(); + Kills.pop_back(); + if (TargetRegisterInfo::isPhysicalRegister(Kill)) { + ReallySafe = false; + break; + } + MachineInstr *LastKill = FindLastUseInMBB(Kill, &*mbbi, Dist); + if (LastKill) { + bool isModRef = LastKill->modifiesRegister(Kill); + NewKills.push_back(std::make_pair(std::make_pair(Kill,isModRef), + LastKill)); + } else { + ReallySafe = false; + break; + } + } + + if (ReallySafe) { + if (LV) { + while (!NewKills.empty()) { + MachineInstr *NewKill = NewKills.back().second; + unsigned Kill = NewKills.back().first.first; + bool isDead = NewKills.back().first.second; + NewKills.pop_back(); + if (LV->removeVirtualRegisterKilled(Kill, mi)) { + if (isDead) + LV->addVirtualRegisterDead(Kill, NewKill); + else + LV->addVirtualRegisterKilled(Kill, NewKill); + } + } + } + + // We're really going to nuke the old inst. If regB was marked + // as a kill we need to update its Kills list. + if (mi->getOperand(si).isKill()) + LV->removeVirtualRegisterKilled(regB, mi); + + mbbi->erase(mi); // Nuke the old inst. + mi = nmi; + ++NumDeletes; + break; // Done with this instruction. + } + } + + // If this instruction is commutative, check to see if C dies. If + // so, swap the B and C operands. This makes the live ranges of A + // and C joinable. + // FIXME: This code also works for A := B op C instructions. + if (TID.isCommutable() && mi->getNumOperands() >= 3) { + assert(mi->getOperand(3-si).isReg() && + "Not a proper commutative instruction!"); + unsigned regC = mi->getOperand(3-si).getReg(); + if (isKilled(*mi, regC, MRI, TII)) { + if (CommuteInstruction(mi, mbbi, regB, regC, Dist)) { + ++NumCommuted; + regB = regC; + goto InstructionRearranged; + } + } + } + + // If this instruction is potentially convertible to a true + // three-address instruction, + if (TID.isConvertibleTo3Addr()) { + // FIXME: This assumes there are no more operands which are tied + // to another register. +#ifndef NDEBUG + for (unsigned i = si + 1, e = TID.getNumOperands(); i < e; ++i) + assert(TID.getOperandConstraint(i, TOI::TIED_TO) == -1); +#endif + + if (ConvertInstTo3Addr(mi, nmi, mbbi, regB, Dist)) { + ++NumConvertedTo3Addr; + break; // Done with this instruction. + } + } + } + + // If it's profitable to commute the instruction, do so. + if (TID.isCommutable() && mi->getNumOperands() >= 3) { + unsigned regC = mi->getOperand(3-si).getReg(); + if (isProfitableToCommute(regB, regC, mi, mbbi, Dist)) + if (CommuteInstruction(mi, mbbi, regB, regC, Dist)) { + ++NumAggrCommuted; + ++NumCommuted; + regB = regC; + goto InstructionRearranged; + } + } + + // If it's profitable to convert the 2-address instruction to a + // 3-address one, do so. + if (TID.isConvertibleTo3Addr() && isProfitableToConv3Addr(regA)) { + if (ConvertInstTo3Addr(mi, nmi, mbbi, regB, Dist)) { + ++NumConvertedTo3Addr; + break; // Done with this instruction. + } + } + + InstructionRearranged: + const TargetRegisterClass* rc = MRI->getRegClass(regB); + MachineInstr *DefMI = MRI->getVRegDef(regB); + // If it's safe and profitable, remat the definition instead of + // copying it. + if (DefMI && + DefMI->getDesc().isAsCheapAsAMove() && + DefMI->isSafeToReMat(TII, regB) && + isProfitableToReMat(regB, rc, mi, DefMI, mbbi, Dist)){ + DEBUG(cerr << "2addr: REMATTING : " << *DefMI << "\n"); + TII->reMaterialize(*mbbi, mi, regA, DefMI); + ReMatRegs.set(regB); + ++NumReMats; + } else { + bool Emitted = TII->copyRegToReg(*mbbi, mi, regA, regB, rc, rc); + (void)Emitted; + assert(Emitted && "Unable to issue a copy instruction!\n"); + } + + MachineBasicBlock::iterator prevMI = prior(mi); + // Update DistanceMap. + DistanceMap.insert(std::make_pair(prevMI, Dist)); + DistanceMap[mi] = ++Dist; + + // Update live variables for regB. + if (LV) { + if (LV->removeVirtualRegisterKilled(regB, mi)) + LV->addVirtualRegisterKilled(regB, prevMI); + + if (LV->removeVirtualRegisterDead(regB, mi)) + LV->addVirtualRegisterDead(regB, prevMI); + } + + DOUT << "\t\tprepend:\t"; DEBUG(prevMI->print(*cerr.stream(), &TM)); + + // Replace all occurences of regB with regA. + for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) { + if (mi->getOperand(i).isReg() && + mi->getOperand(i).getReg() == regB) + mi->getOperand(i).setReg(regA); + } + } + + assert(mi->getOperand(ti).isDef() && mi->getOperand(si).isUse()); + mi->getOperand(ti).setReg(mi->getOperand(si).getReg()); + MadeChange = true; + + DOUT << "\t\trewrite to:\t"; DEBUG(mi->print(*cerr.stream(), &TM)); + } + + mi = nmi; + } + } + + // Some remat'ed instructions are dead. + int VReg = ReMatRegs.find_first(); + while (VReg != -1) { + if (MRI->use_empty(VReg)) { + MachineInstr *DefMI = MRI->getVRegDef(VReg); + DefMI->eraseFromParent(); + } + VReg = ReMatRegs.find_next(VReg); + } + + return MadeChange; +} diff --git a/lib/CodeGen/UnreachableBlockElim.cpp b/lib/CodeGen/UnreachableBlockElim.cpp new file mode 100644 index 000000000000..c3b213cebe95 --- /dev/null +++ b/lib/CodeGen/UnreachableBlockElim.cpp @@ -0,0 +1,199 @@ +//===-- UnreachableBlockElim.cpp - Remove unreachable blocks for codegen --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass is an extremely simple version of the SimplifyCFG pass. Its sole +// job is to delete LLVM basic blocks that are not reachable from the entry +// node. To do this, it performs a simple depth first traversal of the CFG, +// then deletes any unvisited nodes. +// +// Note that this pass is really a hack. In particular, the instruction +// selectors for various targets should just not generate code for unreachable +// blocks. Until LLVM has a more systematic way of defining instruction +// selectors, however, we cannot really expect them to handle additional +// complexity. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/Passes.h" +#include "llvm/Constant.h" +#include "llvm/Instructions.h" +#include "llvm/Function.h" +#include "llvm/Pass.h" +#include "llvm/Type.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/SmallPtrSet.h" +using namespace llvm; + +namespace { + class VISIBILITY_HIDDEN UnreachableBlockElim : public FunctionPass { + virtual bool runOnFunction(Function &F); + public: + static char ID; // Pass identification, replacement for typeid + UnreachableBlockElim() : FunctionPass(&ID) {} + }; +} +char UnreachableBlockElim::ID = 0; +static RegisterPass +X("unreachableblockelim", "Remove unreachable blocks from the CFG"); + +FunctionPass *llvm::createUnreachableBlockEliminationPass() { + return new UnreachableBlockElim(); +} + +bool UnreachableBlockElim::runOnFunction(Function &F) { + SmallPtrSet Reachable; + + // Mark all reachable blocks. + for (df_ext_iterator > I = + df_ext_begin(&F, Reachable), E = df_ext_end(&F, Reachable); I != E; ++I) + /* Mark all reachable blocks */; + + // Loop over all dead blocks, remembering them and deleting all instructions + // in them. + std::vector DeadBlocks; + for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) + if (!Reachable.count(I)) { + BasicBlock *BB = I; + DeadBlocks.push_back(BB); + while (PHINode *PN = dyn_cast(BB->begin())) { + PN->replaceAllUsesWith(Constant::getNullValue(PN->getType())); + BB->getInstList().pop_front(); + } + for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI) + (*SI)->removePredecessor(BB); + BB->dropAllReferences(); + } + + // Actually remove the blocks now. + for (unsigned i = 0, e = DeadBlocks.size(); i != e; ++i) + DeadBlocks[i]->eraseFromParent(); + + return DeadBlocks.size(); +} + + +namespace { + class VISIBILITY_HIDDEN UnreachableMachineBlockElim : + public MachineFunctionPass { + virtual bool runOnMachineFunction(MachineFunction &F); + MachineModuleInfo *MMI; + public: + static char ID; // Pass identification, replacement for typeid + UnreachableMachineBlockElim() : MachineFunctionPass(&ID) {} + }; +} +char UnreachableMachineBlockElim::ID = 0; + +static RegisterPass +Y("unreachable-mbb-elimination", + "Remove unreachable machine basic blocks"); + +const PassInfo *const llvm::UnreachableMachineBlockElimID = &Y; + +bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) { + SmallPtrSet Reachable; + + MMI = getAnalysisIfAvailable(); + + // Mark all reachable blocks. + for (df_ext_iterator > + I = df_ext_begin(&F, Reachable), E = df_ext_end(&F, Reachable); + I != E; ++I) + /* Mark all reachable blocks */; + + // Loop over all dead blocks, remembering them and deleting all instructions + // in them. + std::vector DeadBlocks; + for (MachineFunction::iterator I = F.begin(), E = F.end(); I != E; ++I) { + MachineBasicBlock *BB = I; + + // Test for deadness. + if (!Reachable.count(BB)) { + DeadBlocks.push_back(BB); + + while (BB->succ_begin() != BB->succ_end()) { + MachineBasicBlock* succ = *BB->succ_begin(); + + MachineBasicBlock::iterator start = succ->begin(); + while (start != succ->end() && + start->getOpcode() == TargetInstrInfo::PHI) { + for (unsigned i = start->getNumOperands() - 1; i >= 2; i-=2) + if (start->getOperand(i).isMBB() && + start->getOperand(i).getMBB() == BB) { + start->RemoveOperand(i); + start->RemoveOperand(i-1); + } + + start++; + } + + BB->removeSuccessor(BB->succ_begin()); + } + } + } + + // Actually remove the blocks now. + for (unsigned i = 0, e = DeadBlocks.size(); i != e; ++i) { + MachineBasicBlock *MBB = DeadBlocks[i]; + // If there are any labels in the basic block, unregister them from + // MachineModuleInfo. + if (MMI && !MBB->empty()) { + for (MachineBasicBlock::iterator I = MBB->begin(), + E = MBB->end(); I != E; ++I) { + if (I->isLabel()) + // The label ID # is always operand #0, an immediate. + MMI->InvalidateLabel(I->getOperand(0).getImm()); + } + } + MBB->eraseFromParent(); + } + + // Cleanup PHI nodes. + for (MachineFunction::iterator I = F.begin(), E = F.end(); I != E; ++I) { + MachineBasicBlock *BB = I; + // Prune unneeded PHI entries. + SmallPtrSet preds(BB->pred_begin(), + BB->pred_end()); + MachineBasicBlock::iterator phi = BB->begin(); + while (phi != BB->end() && + phi->getOpcode() == TargetInstrInfo::PHI) { + for (unsigned i = phi->getNumOperands() - 1; i >= 2; i-=2) + if (!preds.count(phi->getOperand(i).getMBB())) { + phi->RemoveOperand(i); + phi->RemoveOperand(i-1); + } + + if (phi->getNumOperands() == 3) { + unsigned Input = phi->getOperand(1).getReg(); + unsigned Output = phi->getOperand(0).getReg(); + + MachineInstr* temp = phi; + ++phi; + temp->eraseFromParent(); + + if (Input != Output) + F.getRegInfo().replaceRegWith(Output, Input); + + continue; + } + + ++phi; + } + } + + F.RenumberBlocks(); + + return DeadBlocks.size(); +} diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp new file mode 100644 index 000000000000..29637b954f0b --- /dev/null +++ b/lib/CodeGen/VirtRegMap.cpp @@ -0,0 +1,269 @@ +//===-- llvm/CodeGen/VirtRegMap.cpp - Virtual Register Map ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the VirtRegMap class. +// +// It also contains implementations of the the Spiller interface, which, given a +// virtual register map and a machine function, eliminates all virtual +// references by replacing them with physical register references - adding spill +// code as necessary. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "virtregmap" +#include "VirtRegMap.h" +#include "llvm/Function.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" +#include +using namespace llvm; + +STATISTIC(NumSpills , "Number of register spills"); + +//===----------------------------------------------------------------------===// +// VirtRegMap implementation +//===----------------------------------------------------------------------===// + +char VirtRegMap::ID = 0; + +static RegisterPass +X("virtregmap", "Virtual Register Map"); + +bool VirtRegMap::runOnMachineFunction(MachineFunction &mf) { + TII = mf.getTarget().getInstrInfo(); + TRI = mf.getTarget().getRegisterInfo(); + MF = &mf; + + ReMatId = MAX_STACK_SLOT+1; + LowSpillSlot = HighSpillSlot = NO_STACK_SLOT; + + Virt2PhysMap.clear(); + Virt2StackSlotMap.clear(); + Virt2ReMatIdMap.clear(); + Virt2SplitMap.clear(); + Virt2SplitKillMap.clear(); + ReMatMap.clear(); + ImplicitDefed.clear(); + SpillSlotToUsesMap.clear(); + MI2VirtMap.clear(); + SpillPt2VirtMap.clear(); + RestorePt2VirtMap.clear(); + EmergencySpillMap.clear(); + EmergencySpillSlots.clear(); + + SpillSlotToUsesMap.resize(8); + ImplicitDefed.resize(MF->getRegInfo().getLastVirtReg()+1- + TargetRegisterInfo::FirstVirtualRegister); + + allocatableRCRegs.clear(); + for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), + E = TRI->regclass_end(); I != E; ++I) + allocatableRCRegs.insert(std::make_pair(*I, + TRI->getAllocatableSet(mf, *I))); + + grow(); + + return false; +} + +void VirtRegMap::grow() { + unsigned LastVirtReg = MF->getRegInfo().getLastVirtReg(); + Virt2PhysMap.grow(LastVirtReg); + Virt2StackSlotMap.grow(LastVirtReg); + Virt2ReMatIdMap.grow(LastVirtReg); + Virt2SplitMap.grow(LastVirtReg); + Virt2SplitKillMap.grow(LastVirtReg); + ReMatMap.grow(LastVirtReg); + ImplicitDefed.resize(LastVirtReg-TargetRegisterInfo::FirstVirtualRegister+1); +} + +int VirtRegMap::assignVirt2StackSlot(unsigned virtReg) { + assert(TargetRegisterInfo::isVirtualRegister(virtReg)); + assert(Virt2StackSlotMap[virtReg] == NO_STACK_SLOT && + "attempt to assign stack slot to already spilled register"); + const TargetRegisterClass* RC = MF->getRegInfo().getRegClass(virtReg); + int SS = MF->getFrameInfo()->CreateStackObject(RC->getSize(), + RC->getAlignment()); + if (LowSpillSlot == NO_STACK_SLOT) + LowSpillSlot = SS; + if (HighSpillSlot == NO_STACK_SLOT || SS > HighSpillSlot) + HighSpillSlot = SS; + unsigned Idx = SS-LowSpillSlot; + while (Idx >= SpillSlotToUsesMap.size()) + SpillSlotToUsesMap.resize(SpillSlotToUsesMap.size()*2); + Virt2StackSlotMap[virtReg] = SS; + ++NumSpills; + return SS; +} + +void VirtRegMap::assignVirt2StackSlot(unsigned virtReg, int SS) { + assert(TargetRegisterInfo::isVirtualRegister(virtReg)); + assert(Virt2StackSlotMap[virtReg] == NO_STACK_SLOT && + "attempt to assign stack slot to already spilled register"); + assert((SS >= 0 || + (SS >= MF->getFrameInfo()->getObjectIndexBegin())) && + "illegal fixed frame index"); + Virt2StackSlotMap[virtReg] = SS; +} + +int VirtRegMap::assignVirtReMatId(unsigned virtReg) { + assert(TargetRegisterInfo::isVirtualRegister(virtReg)); + assert(Virt2ReMatIdMap[virtReg] == NO_STACK_SLOT && + "attempt to assign re-mat id to already spilled register"); + Virt2ReMatIdMap[virtReg] = ReMatId; + return ReMatId++; +} + +void VirtRegMap::assignVirtReMatId(unsigned virtReg, int id) { + assert(TargetRegisterInfo::isVirtualRegister(virtReg)); + assert(Virt2ReMatIdMap[virtReg] == NO_STACK_SLOT && + "attempt to assign re-mat id to already spilled register"); + Virt2ReMatIdMap[virtReg] = id; +} + +int VirtRegMap::getEmergencySpillSlot(const TargetRegisterClass *RC) { + std::map::iterator I = + EmergencySpillSlots.find(RC); + if (I != EmergencySpillSlots.end()) + return I->second; + int SS = MF->getFrameInfo()->CreateStackObject(RC->getSize(), + RC->getAlignment()); + if (LowSpillSlot == NO_STACK_SLOT) + LowSpillSlot = SS; + if (HighSpillSlot == NO_STACK_SLOT || SS > HighSpillSlot) + HighSpillSlot = SS; + EmergencySpillSlots[RC] = SS; + return SS; +} + +void VirtRegMap::addSpillSlotUse(int FI, MachineInstr *MI) { + if (!MF->getFrameInfo()->isFixedObjectIndex(FI)) { + // If FI < LowSpillSlot, this stack reference was produced by + // instruction selection and is not a spill + if (FI >= LowSpillSlot) { + assert(FI >= 0 && "Spill slot index should not be negative!"); + assert((unsigned)FI-LowSpillSlot < SpillSlotToUsesMap.size() + && "Invalid spill slot"); + SpillSlotToUsesMap[FI-LowSpillSlot].insert(MI); + } + } +} + +void VirtRegMap::virtFolded(unsigned VirtReg, MachineInstr *OldMI, + MachineInstr *NewMI, ModRef MRInfo) { + // Move previous memory references folded to new instruction. + MI2VirtMapTy::iterator IP = MI2VirtMap.lower_bound(NewMI); + for (MI2VirtMapTy::iterator I = MI2VirtMap.lower_bound(OldMI), + E = MI2VirtMap.end(); I != E && I->first == OldMI; ) { + MI2VirtMap.insert(IP, std::make_pair(NewMI, I->second)); + MI2VirtMap.erase(I++); + } + + // add new memory reference + MI2VirtMap.insert(IP, std::make_pair(NewMI, std::make_pair(VirtReg, MRInfo))); +} + +void VirtRegMap::virtFolded(unsigned VirtReg, MachineInstr *MI, ModRef MRInfo) { + MI2VirtMapTy::iterator IP = MI2VirtMap.lower_bound(MI); + MI2VirtMap.insert(IP, std::make_pair(MI, std::make_pair(VirtReg, MRInfo))); +} + +void VirtRegMap::RemoveMachineInstrFromMaps(MachineInstr *MI) { + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isFI()) + continue; + int FI = MO.getIndex(); + if (MF->getFrameInfo()->isFixedObjectIndex(FI)) + continue; + // This stack reference was produced by instruction selection and + // is not a spill + if (FI < LowSpillSlot) + continue; + assert((unsigned)FI-LowSpillSlot < SpillSlotToUsesMap.size() + && "Invalid spill slot"); + SpillSlotToUsesMap[FI-LowSpillSlot].erase(MI); + } + MI2VirtMap.erase(MI); + SpillPt2VirtMap.erase(MI); + RestorePt2VirtMap.erase(MI); + EmergencySpillMap.erase(MI); +} + +/// FindUnusedRegisters - Gather a list of allocatable registers that +/// have not been allocated to any virtual register. +bool VirtRegMap::FindUnusedRegisters(const TargetRegisterInfo *TRI, + LiveIntervals* LIs) { + unsigned NumRegs = TRI->getNumRegs(); + UnusedRegs.reset(); + UnusedRegs.resize(NumRegs); + + BitVector Used(NumRegs); + for (unsigned i = TargetRegisterInfo::FirstVirtualRegister, + e = MF->getRegInfo().getLastVirtReg(); i <= e; ++i) + if (Virt2PhysMap[i] != (unsigned)VirtRegMap::NO_PHYS_REG) + Used.set(Virt2PhysMap[i]); + + BitVector Allocatable = TRI->getAllocatableSet(*MF); + bool AnyUnused = false; + for (unsigned Reg = 1; Reg < NumRegs; ++Reg) { + if (Allocatable[Reg] && !Used[Reg] && !LIs->hasInterval(Reg)) { + bool ReallyUnused = true; + for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) { + if (Used[*AS] || LIs->hasInterval(*AS)) { + ReallyUnused = false; + break; + } + } + if (ReallyUnused) { + AnyUnused = true; + UnusedRegs.set(Reg); + } + } + } + + return AnyUnused; +} + +void VirtRegMap::print(std::ostream &OS, const Module* M) const { + const TargetRegisterInfo* TRI = MF->getTarget().getRegisterInfo(); + + OS << "********** REGISTER MAP **********\n"; + for (unsigned i = TargetRegisterInfo::FirstVirtualRegister, + e = MF->getRegInfo().getLastVirtReg(); i <= e; ++i) { + if (Virt2PhysMap[i] != (unsigned)VirtRegMap::NO_PHYS_REG) + OS << "[reg" << i << " -> " << TRI->getName(Virt2PhysMap[i]) + << "]\n"; + } + + for (unsigned i = TargetRegisterInfo::FirstVirtualRegister, + e = MF->getRegInfo().getLastVirtReg(); i <= e; ++i) + if (Virt2StackSlotMap[i] != VirtRegMap::NO_STACK_SLOT) + OS << "[reg" << i << " -> fi#" << Virt2StackSlotMap[i] << "]\n"; + OS << '\n'; +} + +void VirtRegMap::dump() const { + print(cerr); +} diff --git a/lib/CodeGen/VirtRegMap.h b/lib/CodeGen/VirtRegMap.h new file mode 100644 index 000000000000..507557d24c08 --- /dev/null +++ b/lib/CodeGen/VirtRegMap.h @@ -0,0 +1,495 @@ +//===-- llvm/CodeGen/VirtRegMap.h - Virtual Register Map -*- C++ -*--------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a virtual register map. This maps virtual registers to +// physical registers and virtual registers to stack slots. It is created and +// updated by a register allocator and then used by a machine code rewriter that +// adds spill code and rewrites virtual into physical register references. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_VIRTREGMAP_H +#define LLVM_CODEGEN_VIRTREGMAP_H + +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/IndexedMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Streams.h" +#include + +namespace llvm { + class LiveIntervals; + class MachineInstr; + class MachineFunction; + class TargetInstrInfo; + class TargetRegisterInfo; + + class VirtRegMap : public MachineFunctionPass { + public: + enum { + NO_PHYS_REG = 0, + NO_STACK_SLOT = (1L << 30)-1, + MAX_STACK_SLOT = (1L << 18)-1 + }; + + enum ModRef { isRef = 1, isMod = 2, isModRef = 3 }; + typedef std::multimap > MI2VirtMapTy; + + private: + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + MachineFunction *MF; + + DenseMap allocatableRCRegs; + + /// Virt2PhysMap - This is a virtual to physical register + /// mapping. Each virtual register is required to have an entry in + /// it; even spilled virtual registers (the register mapped to a + /// spilled register is the temporary used to load it from the + /// stack). + IndexedMap Virt2PhysMap; + + /// Virt2StackSlotMap - This is virtual register to stack slot + /// mapping. Each spilled virtual register has an entry in it + /// which corresponds to the stack slot this register is spilled + /// at. + IndexedMap Virt2StackSlotMap; + + /// Virt2ReMatIdMap - This is virtual register to rematerialization id + /// mapping. Each spilled virtual register that should be remat'd has an + /// entry in it which corresponds to the remat id. + IndexedMap Virt2ReMatIdMap; + + /// Virt2SplitMap - This is virtual register to splitted virtual register + /// mapping. + IndexedMap Virt2SplitMap; + + /// Virt2SplitKillMap - This is splitted virtual register to its last use + /// (kill) index mapping. + IndexedMap Virt2SplitKillMap; + + /// ReMatMap - This is virtual register to re-materialized instruction + /// mapping. Each virtual register whose definition is going to be + /// re-materialized has an entry in it. + IndexedMap ReMatMap; + + /// MI2VirtMap - This is MachineInstr to virtual register + /// mapping. In the case of memory spill code being folded into + /// instructions, we need to know which virtual register was + /// read/written by this instruction. + MI2VirtMapTy MI2VirtMap; + + /// SpillPt2VirtMap - This records the virtual registers which should + /// be spilled right after the MachineInstr due to live interval + /// splitting. + std::map > > + SpillPt2VirtMap; + + /// RestorePt2VirtMap - This records the virtual registers which should + /// be restored right before the MachineInstr due to live interval + /// splitting. + std::map > RestorePt2VirtMap; + + /// EmergencySpillMap - This records the physical registers that should + /// be spilled / restored around the MachineInstr since the register + /// allocator has run out of registers. + std::map > EmergencySpillMap; + + /// EmergencySpillSlots - This records emergency spill slots used to + /// spill physical registers when the register allocator runs out of + /// registers. Ideally only one stack slot is used per function per + /// register class. + std::map EmergencySpillSlots; + + /// ReMatId - Instead of assigning a stack slot to a to be rematerialized + /// virtual register, an unique id is being assigned. This keeps track of + /// the highest id used so far. Note, this starts at (1<<18) to avoid + /// conflicts with stack slot numbers. + int ReMatId; + + /// LowSpillSlot, HighSpillSlot - Lowest and highest spill slot indexes. + int LowSpillSlot, HighSpillSlot; + + /// SpillSlotToUsesMap - Records uses for each register spill slot. + SmallVector, 8> SpillSlotToUsesMap; + + /// ImplicitDefed - One bit for each virtual register. If set it indicates + /// the register is implicitly defined. + BitVector ImplicitDefed; + + /// UnusedRegs - A list of physical registers that have not been used. + BitVector UnusedRegs; + + VirtRegMap(const VirtRegMap&); // DO NOT IMPLEMENT + void operator=(const VirtRegMap&); // DO NOT IMPLEMENT + + public: + static char ID; + VirtRegMap() : MachineFunctionPass(&ID), Virt2PhysMap(NO_PHYS_REG), + Virt2StackSlotMap(NO_STACK_SLOT), + Virt2ReMatIdMap(NO_STACK_SLOT), Virt2SplitMap(0), + Virt2SplitKillMap(0), ReMatMap(NULL), + ReMatId(MAX_STACK_SLOT+1), + LowSpillSlot(NO_STACK_SLOT), HighSpillSlot(NO_STACK_SLOT) { } + virtual bool runOnMachineFunction(MachineFunction &MF); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + void grow(); + + /// @brief returns true if the specified virtual register is + /// mapped to a physical register + bool hasPhys(unsigned virtReg) const { + return getPhys(virtReg) != NO_PHYS_REG; + } + + /// @brief returns the physical register mapped to the specified + /// virtual register + unsigned getPhys(unsigned virtReg) const { + assert(TargetRegisterInfo::isVirtualRegister(virtReg)); + return Virt2PhysMap[virtReg]; + } + + /// @brief creates a mapping for the specified virtual register to + /// the specified physical register + void assignVirt2Phys(unsigned virtReg, unsigned physReg) { + assert(TargetRegisterInfo::isVirtualRegister(virtReg) && + TargetRegisterInfo::isPhysicalRegister(physReg)); + assert(Virt2PhysMap[virtReg] == NO_PHYS_REG && + "attempt to assign physical register to already mapped " + "virtual register"); + Virt2PhysMap[virtReg] = physReg; + } + + /// @brief clears the specified virtual register's, physical + /// register mapping + void clearVirt(unsigned virtReg) { + assert(TargetRegisterInfo::isVirtualRegister(virtReg)); + assert(Virt2PhysMap[virtReg] != NO_PHYS_REG && + "attempt to clear a not assigned virtual register"); + Virt2PhysMap[virtReg] = NO_PHYS_REG; + } + + /// @brief clears all virtual to physical register mappings + void clearAllVirt() { + Virt2PhysMap.clear(); + grow(); + } + + /// @brief records virtReg is a split live interval from SReg. + void setIsSplitFromReg(unsigned virtReg, unsigned SReg) { + Virt2SplitMap[virtReg] = SReg; + } + + /// @brief returns the live interval virtReg is split from. + unsigned getPreSplitReg(unsigned virtReg) { + return Virt2SplitMap[virtReg]; + } + + /// @brief returns true if the specified virtual register is not + /// mapped to a stack slot or rematerialized. + bool isAssignedReg(unsigned virtReg) const { + if (getStackSlot(virtReg) == NO_STACK_SLOT && + getReMatId(virtReg) == NO_STACK_SLOT) + return true; + // Split register can be assigned a physical register as well as a + // stack slot or remat id. + return (Virt2SplitMap[virtReg] && Virt2PhysMap[virtReg] != NO_PHYS_REG); + } + + /// @brief returns the stack slot mapped to the specified virtual + /// register + int getStackSlot(unsigned virtReg) const { + assert(TargetRegisterInfo::isVirtualRegister(virtReg)); + return Virt2StackSlotMap[virtReg]; + } + + /// @brief returns the rematerialization id mapped to the specified virtual + /// register + int getReMatId(unsigned virtReg) const { + assert(TargetRegisterInfo::isVirtualRegister(virtReg)); + return Virt2ReMatIdMap[virtReg]; + } + + /// @brief create a mapping for the specifed virtual register to + /// the next available stack slot + int assignVirt2StackSlot(unsigned virtReg); + /// @brief create a mapping for the specified virtual register to + /// the specified stack slot + void assignVirt2StackSlot(unsigned virtReg, int frameIndex); + + /// @brief assign an unique re-materialization id to the specified + /// virtual register. + int assignVirtReMatId(unsigned virtReg); + /// @brief assign an unique re-materialization id to the specified + /// virtual register. + void assignVirtReMatId(unsigned virtReg, int id); + + /// @brief returns true if the specified virtual register is being + /// re-materialized. + bool isReMaterialized(unsigned virtReg) const { + return ReMatMap[virtReg] != NULL; + } + + /// @brief returns the original machine instruction being re-issued + /// to re-materialize the specified virtual register. + MachineInstr *getReMaterializedMI(unsigned virtReg) const { + return ReMatMap[virtReg]; + } + + /// @brief records the specified virtual register will be + /// re-materialized and the original instruction which will be re-issed + /// for this purpose. If parameter all is true, then all uses of the + /// registers are rematerialized and it's safe to delete the definition. + void setVirtIsReMaterialized(unsigned virtReg, MachineInstr *def) { + ReMatMap[virtReg] = def; + } + + /// @brief record the last use (kill) of a split virtual register. + void addKillPoint(unsigned virtReg, unsigned index) { + Virt2SplitKillMap[virtReg] = index; + } + + unsigned getKillPoint(unsigned virtReg) const { + return Virt2SplitKillMap[virtReg]; + } + + /// @brief remove the last use (kill) of a split virtual register. + void removeKillPoint(unsigned virtReg) { + Virt2SplitKillMap[virtReg] = 0; + } + + /// @brief returns true if the specified MachineInstr is a spill point. + bool isSpillPt(MachineInstr *Pt) const { + return SpillPt2VirtMap.find(Pt) != SpillPt2VirtMap.end(); + } + + /// @brief returns the virtual registers that should be spilled due to + /// splitting right after the specified MachineInstr. + std::vector > &getSpillPtSpills(MachineInstr *Pt) { + return SpillPt2VirtMap[Pt]; + } + + /// @brief records the specified MachineInstr as a spill point for virtReg. + void addSpillPoint(unsigned virtReg, bool isKill, MachineInstr *Pt) { + std::map > >::iterator + I = SpillPt2VirtMap.find(Pt); + if (I != SpillPt2VirtMap.end()) + I->second.push_back(std::make_pair(virtReg, isKill)); + else { + std::vector > Virts; + Virts.push_back(std::make_pair(virtReg, isKill)); + SpillPt2VirtMap.insert(std::make_pair(Pt, Virts)); + } + } + + /// @brief - transfer spill point information from one instruction to + /// another. + void transferSpillPts(MachineInstr *Old, MachineInstr *New) { + std::map > >::iterator + I = SpillPt2VirtMap.find(Old); + if (I == SpillPt2VirtMap.end()) + return; + while (!I->second.empty()) { + unsigned virtReg = I->second.back().first; + bool isKill = I->second.back().second; + I->second.pop_back(); + addSpillPoint(virtReg, isKill, New); + } + SpillPt2VirtMap.erase(I); + } + + /// @brief returns true if the specified MachineInstr is a restore point. + bool isRestorePt(MachineInstr *Pt) const { + return RestorePt2VirtMap.find(Pt) != RestorePt2VirtMap.end(); + } + + /// @brief returns the virtual registers that should be restoreed due to + /// splitting right after the specified MachineInstr. + std::vector &getRestorePtRestores(MachineInstr *Pt) { + return RestorePt2VirtMap[Pt]; + } + + /// @brief records the specified MachineInstr as a restore point for virtReg. + void addRestorePoint(unsigned virtReg, MachineInstr *Pt) { + std::map >::iterator I = + RestorePt2VirtMap.find(Pt); + if (I != RestorePt2VirtMap.end()) + I->second.push_back(virtReg); + else { + std::vector Virts; + Virts.push_back(virtReg); + RestorePt2VirtMap.insert(std::make_pair(Pt, Virts)); + } + } + + /// @brief - transfer restore point information from one instruction to + /// another. + void transferRestorePts(MachineInstr *Old, MachineInstr *New) { + std::map >::iterator I = + RestorePt2VirtMap.find(Old); + if (I == RestorePt2VirtMap.end()) + return; + while (!I->second.empty()) { + unsigned virtReg = I->second.back(); + I->second.pop_back(); + addRestorePoint(virtReg, New); + } + RestorePt2VirtMap.erase(I); + } + + /// @brief records that the specified physical register must be spilled + /// around the specified machine instr. + void addEmergencySpill(unsigned PhysReg, MachineInstr *MI) { + if (EmergencySpillMap.find(MI) != EmergencySpillMap.end()) + EmergencySpillMap[MI].push_back(PhysReg); + else { + std::vector PhysRegs; + PhysRegs.push_back(PhysReg); + EmergencySpillMap.insert(std::make_pair(MI, PhysRegs)); + } + } + + /// @brief returns true if one or more physical registers must be spilled + /// around the specified instruction. + bool hasEmergencySpills(MachineInstr *MI) const { + return EmergencySpillMap.find(MI) != EmergencySpillMap.end(); + } + + /// @brief returns the physical registers to be spilled and restored around + /// the instruction. + std::vector &getEmergencySpills(MachineInstr *MI) { + return EmergencySpillMap[MI]; + } + + /// @brief - transfer emergency spill information from one instruction to + /// another. + void transferEmergencySpills(MachineInstr *Old, MachineInstr *New) { + std::map >::iterator I = + EmergencySpillMap.find(Old); + if (I == EmergencySpillMap.end()) + return; + while (!I->second.empty()) { + unsigned virtReg = I->second.back(); + I->second.pop_back(); + addEmergencySpill(virtReg, New); + } + EmergencySpillMap.erase(I); + } + + /// @brief return or get a emergency spill slot for the register class. + int getEmergencySpillSlot(const TargetRegisterClass *RC); + + /// @brief Return lowest spill slot index. + int getLowSpillSlot() const { + return LowSpillSlot; + } + + /// @brief Return highest spill slot index. + int getHighSpillSlot() const { + return HighSpillSlot; + } + + /// @brief Records a spill slot use. + void addSpillSlotUse(int FrameIndex, MachineInstr *MI); + + /// @brief Returns true if spill slot has been used. + bool isSpillSlotUsed(int FrameIndex) const { + assert(FrameIndex >= 0 && "Spill slot index should not be negative!"); + return !SpillSlotToUsesMap[FrameIndex-LowSpillSlot].empty(); + } + + /// @brief Mark the specified register as being implicitly defined. + void setIsImplicitlyDefined(unsigned VirtReg) { + ImplicitDefed.set(VirtReg-TargetRegisterInfo::FirstVirtualRegister); + } + + /// @brief Returns true if the virtual register is implicitly defined. + bool isImplicitlyDefined(unsigned VirtReg) const { + return ImplicitDefed[VirtReg-TargetRegisterInfo::FirstVirtualRegister]; + } + + /// @brief Updates information about the specified virtual register's value + /// folded into newMI machine instruction. + void virtFolded(unsigned VirtReg, MachineInstr *OldMI, MachineInstr *NewMI, + ModRef MRInfo); + + /// @brief Updates information about the specified virtual register's value + /// folded into the specified machine instruction. + void virtFolded(unsigned VirtReg, MachineInstr *MI, ModRef MRInfo); + + /// @brief returns the virtual registers' values folded in memory + /// operands of this instruction + std::pair + getFoldedVirts(MachineInstr* MI) const { + return MI2VirtMap.equal_range(MI); + } + + /// RemoveMachineInstrFromMaps - MI is being erased, remove it from the + /// the folded instruction map and spill point map. + void RemoveMachineInstrFromMaps(MachineInstr *MI); + + /// FindUnusedRegisters - Gather a list of allocatable registers that + /// have not been allocated to any virtual register. + bool FindUnusedRegisters(const TargetRegisterInfo *TRI, + LiveIntervals* LIs); + + /// HasUnusedRegisters - Return true if there are any allocatable registers + /// that have not been allocated to any virtual register. + bool HasUnusedRegisters() const { + return !UnusedRegs.none(); + } + + /// setRegisterUsed - Remember the physical register is now used. + void setRegisterUsed(unsigned Reg) { + UnusedRegs.reset(Reg); + } + + /// isRegisterUnused - Return true if the physical register has not been + /// used. + bool isRegisterUnused(unsigned Reg) const { + return UnusedRegs[Reg]; + } + + /// getFirstUnusedRegister - Return the first physical register that has not + /// been used. + unsigned getFirstUnusedRegister(const TargetRegisterClass *RC) { + int Reg = UnusedRegs.find_first(); + while (Reg != -1) { + if (allocatableRCRegs[RC][Reg]) + return (unsigned)Reg; + Reg = UnusedRegs.find_next(Reg); + } + return 0; + } + + void print(std::ostream &OS, const Module* M = 0) const; + void print(std::ostream *OS) const { if (OS) print(*OS); } + void dump() const; + }; + + inline std::ostream *operator<<(std::ostream *OS, const VirtRegMap &VRM) { + VRM.print(OS); + return OS; + } + inline std::ostream &operator<<(std::ostream &OS, const VirtRegMap &VRM) { + VRM.print(OS); + return OS; + } +} // End llvm namespace + +#endif diff --git a/lib/CodeGen/VirtRegRewriter.cpp b/lib/CodeGen/VirtRegRewriter.cpp new file mode 100644 index 000000000000..b4c8bc12979a --- /dev/null +++ b/lib/CodeGen/VirtRegRewriter.cpp @@ -0,0 +1,2225 @@ +//===-- llvm/CodeGen/Rewriter.cpp - Rewriter -----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "virtregrewriter" +#include "VirtRegRewriter.h" +#include "llvm/Support/Compiler.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +#include +using namespace llvm; + +STATISTIC(NumDSE , "Number of dead stores elided"); +STATISTIC(NumDSS , "Number of dead spill slots removed"); +STATISTIC(NumCommutes, "Number of instructions commuted"); +STATISTIC(NumDRM , "Number of re-materializable defs elided"); +STATISTIC(NumStores , "Number of stores added"); +STATISTIC(NumPSpills , "Number of physical register spills"); +STATISTIC(NumOmitted , "Number of reloads omited"); +STATISTIC(NumAvoided , "Number of reloads deemed unnecessary"); +STATISTIC(NumCopified, "Number of available reloads turned into copies"); +STATISTIC(NumReMats , "Number of re-materialization"); +STATISTIC(NumLoads , "Number of loads added"); +STATISTIC(NumReused , "Number of values reused"); +STATISTIC(NumDCE , "Number of copies elided"); +STATISTIC(NumSUnfold , "Number of stores unfolded"); +STATISTIC(NumModRefUnfold, "Number of modref unfolded"); + +namespace { + enum RewriterName { simple, local, trivial }; +} + +static cl::opt +RewriterOpt("rewriter", + cl::desc("Rewriter to use: (default: local)"), + cl::Prefix, + cl::values(clEnumVal(simple, "simple rewriter"), + clEnumVal(local, "local rewriter"), + clEnumVal(trivial, "trivial rewriter"), + clEnumValEnd), + cl::init(local)); + +VirtRegRewriter::~VirtRegRewriter() {} + + +// ****************************** // +// Simple Spiller Implementation // +// ****************************** // + +struct VISIBILITY_HIDDEN SimpleRewriter : public VirtRegRewriter { + + bool runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM, + LiveIntervals* LIs) { + DOUT << "********** REWRITE MACHINE CODE **********\n"; + DOUT << "********** Function: " << MF.getFunction()->getName() << '\n'; + const TargetMachine &TM = MF.getTarget(); + const TargetInstrInfo &TII = *TM.getInstrInfo(); + const TargetRegisterInfo &TRI = *TM.getRegisterInfo(); + + + // LoadedRegs - Keep track of which vregs are loaded, so that we only load + // each vreg once (in the case where a spilled vreg is used by multiple + // operands). This is always smaller than the number of operands to the + // current machine instr, so it should be small. + std::vector LoadedRegs; + + for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end(); + MBBI != E; ++MBBI) { + DOUT << MBBI->getBasicBlock()->getName() << ":\n"; + MachineBasicBlock &MBB = *MBBI; + for (MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end(); + MII != E; ++MII) { + MachineInstr &MI = *MII; + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI.getOperand(i); + if (MO.isReg() && MO.getReg()) { + if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) { + unsigned VirtReg = MO.getReg(); + unsigned SubIdx = MO.getSubReg(); + unsigned PhysReg = VRM.getPhys(VirtReg); + unsigned RReg = SubIdx ? TRI.getSubReg(PhysReg, SubIdx) : PhysReg; + if (!VRM.isAssignedReg(VirtReg)) { + int StackSlot = VRM.getStackSlot(VirtReg); + const TargetRegisterClass* RC = + MF.getRegInfo().getRegClass(VirtReg); + + if (MO.isUse() && + std::find(LoadedRegs.begin(), LoadedRegs.end(), VirtReg) + == LoadedRegs.end()) { + TII.loadRegFromStackSlot(MBB, &MI, PhysReg, StackSlot, RC); + MachineInstr *LoadMI = prior(MII); + VRM.addSpillSlotUse(StackSlot, LoadMI); + LoadedRegs.push_back(VirtReg); + ++NumLoads; + DOUT << '\t' << *LoadMI; + } + + if (MO.isDef()) { + TII.storeRegToStackSlot(MBB, next(MII), PhysReg, true, + StackSlot, RC); + MachineInstr *StoreMI = next(MII); + VRM.addSpillSlotUse(StackSlot, StoreMI); + ++NumStores; + } + } + MF.getRegInfo().setPhysRegUsed(RReg); + MI.getOperand(i).setReg(RReg); + MI.getOperand(i).setSubReg(0); + } else { + MF.getRegInfo().setPhysRegUsed(MO.getReg()); + } + } + } + + DOUT << '\t' << MI; + LoadedRegs.clear(); + } + } + return true; + } + +}; + +/// This class is intended for use with the new spilling framework only. It +/// rewrites vreg def/uses to use the assigned preg, but does not insert any +/// spill code. +struct VISIBILITY_HIDDEN TrivialRewriter : public VirtRegRewriter { + + bool runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM, + LiveIntervals* LIs) { + DOUT << "********** REWRITE MACHINE CODE **********\n"; + DOUT << "********** Function: " << MF.getFunction()->getName() << '\n'; + MachineRegisterInfo *mri = &MF.getRegInfo(); + + bool changed = false; + + for (LiveIntervals::iterator liItr = LIs->begin(), liEnd = LIs->end(); + liItr != liEnd; ++liItr) { + + if (TargetRegisterInfo::isVirtualRegister(liItr->first)) { + if (VRM.hasPhys(liItr->first)) { + unsigned preg = VRM.getPhys(liItr->first); + mri->replaceRegWith(liItr->first, preg); + mri->setPhysRegUsed(preg); + changed = true; + } + } + else { + if (!liItr->second->empty()) { + mri->setPhysRegUsed(liItr->first); + } + } + } + + return changed; + } + +}; + +// ************************************************************************ // + +/// AvailableSpills - As the local rewriter is scanning and rewriting an MBB +/// from top down, keep track of which spill slots or remat are available in +/// each register. +/// +/// Note that not all physregs are created equal here. In particular, some +/// physregs are reloads that we are allowed to clobber or ignore at any time. +/// Other physregs are values that the register allocated program is using +/// that we cannot CHANGE, but we can read if we like. We keep track of this +/// on a per-stack-slot / remat id basis as the low bit in the value of the +/// SpillSlotsAvailable entries. The predicate 'canClobberPhysReg()' checks +/// this bit and addAvailable sets it if. +class VISIBILITY_HIDDEN AvailableSpills { + const TargetRegisterInfo *TRI; + const TargetInstrInfo *TII; + + // SpillSlotsOrReMatsAvailable - This map keeps track of all of the spilled + // or remat'ed virtual register values that are still available, due to + // being loaded or stored to, but not invalidated yet. + std::map SpillSlotsOrReMatsAvailable; + + // PhysRegsAvailable - This is the inverse of SpillSlotsOrReMatsAvailable, + // indicating which stack slot values are currently held by a physreg. This + // is used to invalidate entries in SpillSlotsOrReMatsAvailable when a + // physreg is modified. + std::multimap PhysRegsAvailable; + + void disallowClobberPhysRegOnly(unsigned PhysReg); + + void ClobberPhysRegOnly(unsigned PhysReg); +public: + AvailableSpills(const TargetRegisterInfo *tri, const TargetInstrInfo *tii) + : TRI(tri), TII(tii) { + } + + /// clear - Reset the state. + void clear() { + SpillSlotsOrReMatsAvailable.clear(); + PhysRegsAvailable.clear(); + } + + const TargetRegisterInfo *getRegInfo() const { return TRI; } + + /// getSpillSlotOrReMatPhysReg - If the specified stack slot or remat is + /// available in a physical register, return that PhysReg, otherwise + /// return 0. + unsigned getSpillSlotOrReMatPhysReg(int Slot) const { + std::map::const_iterator I = + SpillSlotsOrReMatsAvailable.find(Slot); + if (I != SpillSlotsOrReMatsAvailable.end()) { + return I->second >> 1; // Remove the CanClobber bit. + } + return 0; + } + + /// addAvailable - Mark that the specified stack slot / remat is available + /// in the specified physreg. If CanClobber is true, the physreg can be + /// modified at any time without changing the semantics of the program. + void addAvailable(int SlotOrReMat, unsigned Reg, bool CanClobber = true) { + // If this stack slot is thought to be available in some other physreg, + // remove its record. + ModifyStackSlotOrReMat(SlotOrReMat); + + PhysRegsAvailable.insert(std::make_pair(Reg, SlotOrReMat)); + SpillSlotsOrReMatsAvailable[SlotOrReMat]= (Reg << 1) | + (unsigned)CanClobber; + + if (SlotOrReMat > VirtRegMap::MAX_STACK_SLOT) + DOUT << "Remembering RM#" << SlotOrReMat-VirtRegMap::MAX_STACK_SLOT-1; + else + DOUT << "Remembering SS#" << SlotOrReMat; + DOUT << " in physreg " << TRI->getName(Reg) << "\n"; + } + + /// canClobberPhysRegForSS - Return true if the spiller is allowed to change + /// the value of the specified stackslot register if it desires. The + /// specified stack slot must be available in a physreg for this query to + /// make sense. + bool canClobberPhysRegForSS(int SlotOrReMat) const { + assert(SpillSlotsOrReMatsAvailable.count(SlotOrReMat) && + "Value not available!"); + return SpillSlotsOrReMatsAvailable.find(SlotOrReMat)->second & 1; + } + + /// canClobberPhysReg - Return true if the spiller is allowed to clobber the + /// physical register where values for some stack slot(s) might be + /// available. + bool canClobberPhysReg(unsigned PhysReg) const { + std::multimap::const_iterator I = + PhysRegsAvailable.lower_bound(PhysReg); + while (I != PhysRegsAvailable.end() && I->first == PhysReg) { + int SlotOrReMat = I->second; + I++; + if (!canClobberPhysRegForSS(SlotOrReMat)) + return false; + } + return true; + } + + /// disallowClobberPhysReg - Unset the CanClobber bit of the specified + /// stackslot register. The register is still available but is no longer + /// allowed to be modifed. + void disallowClobberPhysReg(unsigned PhysReg); + + /// ClobberPhysReg - This is called when the specified physreg changes + /// value. We use this to invalidate any info about stuff that lives in + /// it and any of its aliases. + void ClobberPhysReg(unsigned PhysReg); + + /// ModifyStackSlotOrReMat - This method is called when the value in a stack + /// slot changes. This removes information about which register the + /// previous value for this slot lives in (as the previous value is dead + /// now). + void ModifyStackSlotOrReMat(int SlotOrReMat); + + /// AddAvailableRegsToLiveIn - Availability information is being kept coming + /// into the specified MBB. Add available physical registers as potential + /// live-in's. If they are reused in the MBB, they will be added to the + /// live-in set to make register scavenger and post-allocation scheduler. + void AddAvailableRegsToLiveIn(MachineBasicBlock &MBB, BitVector &RegKills, + std::vector &KillOps); +}; + +// ************************************************************************ // + +// ReusedOp - For each reused operand, we keep track of a bit of information, +// in case we need to rollback upon processing a new operand. See comments +// below. +struct ReusedOp { + // The MachineInstr operand that reused an available value. + unsigned Operand; + + // StackSlotOrReMat - The spill slot or remat id of the value being reused. + unsigned StackSlotOrReMat; + + // PhysRegReused - The physical register the value was available in. + unsigned PhysRegReused; + + // AssignedPhysReg - The physreg that was assigned for use by the reload. + unsigned AssignedPhysReg; + + // VirtReg - The virtual register itself. + unsigned VirtReg; + + ReusedOp(unsigned o, unsigned ss, unsigned prr, unsigned apr, + unsigned vreg) + : Operand(o), StackSlotOrReMat(ss), PhysRegReused(prr), + AssignedPhysReg(apr), VirtReg(vreg) {} +}; + +/// ReuseInfo - This maintains a collection of ReuseOp's for each operand that +/// is reused instead of reloaded. +class VISIBILITY_HIDDEN ReuseInfo { + MachineInstr &MI; + std::vector Reuses; + BitVector PhysRegsClobbered; +public: + ReuseInfo(MachineInstr &mi, const TargetRegisterInfo *tri) : MI(mi) { + PhysRegsClobbered.resize(tri->getNumRegs()); + } + + bool hasReuses() const { + return !Reuses.empty(); + } + + /// addReuse - If we choose to reuse a virtual register that is already + /// available instead of reloading it, remember that we did so. + void addReuse(unsigned OpNo, unsigned StackSlotOrReMat, + unsigned PhysRegReused, unsigned AssignedPhysReg, + unsigned VirtReg) { + // If the reload is to the assigned register anyway, no undo will be + // required. + if (PhysRegReused == AssignedPhysReg) return; + + // Otherwise, remember this. + Reuses.push_back(ReusedOp(OpNo, StackSlotOrReMat, PhysRegReused, + AssignedPhysReg, VirtReg)); + } + + void markClobbered(unsigned PhysReg) { + PhysRegsClobbered.set(PhysReg); + } + + bool isClobbered(unsigned PhysReg) const { + return PhysRegsClobbered.test(PhysReg); + } + + /// GetRegForReload - We are about to emit a reload into PhysReg. If there + /// is some other operand that is using the specified register, either pick + /// a new register to use, or evict the previous reload and use this reg. + unsigned GetRegForReload(unsigned PhysReg, MachineInstr *MI, + AvailableSpills &Spills, + std::vector &MaybeDeadStores, + SmallSet &Rejected, + BitVector &RegKills, + std::vector &KillOps, + VirtRegMap &VRM); + + /// GetRegForReload - Helper for the above GetRegForReload(). Add a + /// 'Rejected' set to remember which registers have been considered and + /// rejected for the reload. This avoids infinite looping in case like + /// this: + /// t1 := op t2, t3 + /// t2 <- assigned r0 for use by the reload but ended up reuse r1 + /// t3 <- assigned r1 for use by the reload but ended up reuse r0 + /// t1 <- desires r1 + /// sees r1 is taken by t2, tries t2's reload register r0 + /// sees r0 is taken by t3, tries t3's reload register r1 + /// sees r1 is taken by t2, tries t2's reload register r0 ... + unsigned GetRegForReload(unsigned PhysReg, MachineInstr *MI, + AvailableSpills &Spills, + std::vector &MaybeDeadStores, + BitVector &RegKills, + std::vector &KillOps, + VirtRegMap &VRM) { + SmallSet Rejected; + return GetRegForReload(PhysReg, MI, Spills, MaybeDeadStores, Rejected, + RegKills, KillOps, VRM); + } +}; + + +// ****************** // +// Utility Functions // +// ****************** // + +/// findSinglePredSuccessor - Return via reference a vector of machine basic +/// blocks each of which is a successor of the specified BB and has no other +/// predecessor. +static void findSinglePredSuccessor(MachineBasicBlock *MBB, + SmallVectorImpl &Succs) { + for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), + SE = MBB->succ_end(); SI != SE; ++SI) { + MachineBasicBlock *SuccMBB = *SI; + if (SuccMBB->pred_size() == 1) + Succs.push_back(SuccMBB); + } +} + +/// InvalidateKill - Invalidate register kill information for a specific +/// register. This also unsets the kills marker on the last kill operand. +static void InvalidateKill(unsigned Reg, + const TargetRegisterInfo* TRI, + BitVector &RegKills, + std::vector &KillOps) { + if (RegKills[Reg]) { + KillOps[Reg]->setIsKill(false); + KillOps[Reg] = NULL; + RegKills.reset(Reg); + for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) { + if (RegKills[*SR]) { + KillOps[*SR]->setIsKill(false); + KillOps[*SR] = NULL; + RegKills.reset(*SR); + } + } + } +} + +/// InvalidateKills - MI is going to be deleted. If any of its operands are +/// marked kill, then invalidate the information. +static void InvalidateKills(MachineInstr &MI, + const TargetRegisterInfo* TRI, + BitVector &RegKills, + std::vector &KillOps, + SmallVector *KillRegs = NULL) { + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI.getOperand(i); + if (!MO.isReg() || !MO.isUse() || !MO.isKill()) + continue; + unsigned Reg = MO.getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + if (KillRegs) + KillRegs->push_back(Reg); + assert(Reg < KillOps.size()); + if (KillOps[Reg] == &MO) { + KillOps[Reg] = NULL; + RegKills.reset(Reg); + for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) { + if (RegKills[*SR]) { + KillOps[*SR] = NULL; + RegKills.reset(*SR); + } + } + } + } +} + +/// InvalidateRegDef - If the def operand of the specified def MI is now dead +/// (since it's spill instruction is removed), mark it isDead. Also checks if +/// the def MI has other definition operands that are not dead. Returns it by +/// reference. +static bool InvalidateRegDef(MachineBasicBlock::iterator I, + MachineInstr &NewDef, unsigned Reg, + bool &HasLiveDef) { + // Due to remat, it's possible this reg isn't being reused. That is, + // the def of this reg (by prev MI) is now dead. + MachineInstr *DefMI = I; + MachineOperand *DefOp = NULL; + for (unsigned i = 0, e = DefMI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = DefMI->getOperand(i); + if (MO.isReg() && MO.isDef()) { + if (MO.getReg() == Reg) + DefOp = &MO; + else if (!MO.isDead()) + HasLiveDef = true; + } + } + if (!DefOp) + return false; + + bool FoundUse = false, Done = false; + MachineBasicBlock::iterator E = &NewDef; + ++I; ++E; + for (; !Done && I != E; ++I) { + MachineInstr *NMI = I; + for (unsigned j = 0, ee = NMI->getNumOperands(); j != ee; ++j) { + MachineOperand &MO = NMI->getOperand(j); + if (!MO.isReg() || MO.getReg() != Reg) + continue; + if (MO.isUse()) + FoundUse = true; + Done = true; // Stop after scanning all the operands of this MI. + } + } + if (!FoundUse) { + // Def is dead! + DefOp->setIsDead(); + return true; + } + return false; +} + +/// UpdateKills - Track and update kill info. If a MI reads a register that is +/// marked kill, then it must be due to register reuse. Transfer the kill info +/// over. +static void UpdateKills(MachineInstr &MI, const TargetRegisterInfo* TRI, + BitVector &RegKills, + std::vector &KillOps) { + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI.getOperand(i); + if (!MO.isReg() || !MO.isUse()) + continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) + continue; + + if (RegKills[Reg] && KillOps[Reg]->getParent() != &MI) { + // That can't be right. Register is killed but not re-defined and it's + // being reused. Let's fix that. + KillOps[Reg]->setIsKill(false); + KillOps[Reg] = NULL; + RegKills.reset(Reg); + if (!MI.isRegTiedToDefOperand(i)) + // Unless it's a two-address operand, this is the new kill. + MO.setIsKill(); + } + if (MO.isKill()) { + RegKills.set(Reg); + KillOps[Reg] = &MO; + for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) { + RegKills.set(*SR); + KillOps[*SR] = &MO; + } + } + } + + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI.getOperand(i); + if (!MO.isReg() || !MO.isDef()) + continue; + unsigned Reg = MO.getReg(); + RegKills.reset(Reg); + KillOps[Reg] = NULL; + // It also defines (or partially define) aliases. + for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) { + RegKills.reset(*SR); + KillOps[*SR] = NULL; + } + } +} + +/// ReMaterialize - Re-materialize definition for Reg targetting DestReg. +/// +static void ReMaterialize(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MII, + unsigned DestReg, unsigned Reg, + const TargetInstrInfo *TII, + const TargetRegisterInfo *TRI, + VirtRegMap &VRM) { + TII->reMaterialize(MBB, MII, DestReg, VRM.getReMaterializedMI(Reg)); + MachineInstr *NewMI = prior(MII); + for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = NewMI->getOperand(i); + if (!MO.isReg() || MO.getReg() == 0) + continue; + unsigned VirtReg = MO.getReg(); + if (TargetRegisterInfo::isPhysicalRegister(VirtReg)) + continue; + assert(MO.isUse()); + unsigned SubIdx = MO.getSubReg(); + unsigned Phys = VRM.getPhys(VirtReg); + assert(Phys); + unsigned RReg = SubIdx ? TRI->getSubReg(Phys, SubIdx) : Phys; + MO.setReg(RReg); + MO.setSubReg(0); + } + ++NumReMats; +} + +/// findSuperReg - Find the SubReg's super-register of given register class +/// where its SubIdx sub-register is SubReg. +static unsigned findSuperReg(const TargetRegisterClass *RC, unsigned SubReg, + unsigned SubIdx, const TargetRegisterInfo *TRI) { + for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); + I != E; ++I) { + unsigned Reg = *I; + if (TRI->getSubReg(Reg, SubIdx) == SubReg) + return Reg; + } + return 0; +} + +// ******************************** // +// Available Spills Implementation // +// ******************************** // + +/// disallowClobberPhysRegOnly - Unset the CanClobber bit of the specified +/// stackslot register. The register is still available but is no longer +/// allowed to be modifed. +void AvailableSpills::disallowClobberPhysRegOnly(unsigned PhysReg) { + std::multimap::iterator I = + PhysRegsAvailable.lower_bound(PhysReg); + while (I != PhysRegsAvailable.end() && I->first == PhysReg) { + int SlotOrReMat = I->second; + I++; + assert((SpillSlotsOrReMatsAvailable[SlotOrReMat] >> 1) == PhysReg && + "Bidirectional map mismatch!"); + SpillSlotsOrReMatsAvailable[SlotOrReMat] &= ~1; + DOUT << "PhysReg " << TRI->getName(PhysReg) + << " copied, it is available for use but can no longer be modified\n"; + } +} + +/// disallowClobberPhysReg - Unset the CanClobber bit of the specified +/// stackslot register and its aliases. The register and its aliases may +/// still available but is no longer allowed to be modifed. +void AvailableSpills::disallowClobberPhysReg(unsigned PhysReg) { + for (const unsigned *AS = TRI->getAliasSet(PhysReg); *AS; ++AS) + disallowClobberPhysRegOnly(*AS); + disallowClobberPhysRegOnly(PhysReg); +} + +/// ClobberPhysRegOnly - This is called when the specified physreg changes +/// value. We use this to invalidate any info about stuff we thing lives in it. +void AvailableSpills::ClobberPhysRegOnly(unsigned PhysReg) { + std::multimap::iterator I = + PhysRegsAvailable.lower_bound(PhysReg); + while (I != PhysRegsAvailable.end() && I->first == PhysReg) { + int SlotOrReMat = I->second; + PhysRegsAvailable.erase(I++); + assert((SpillSlotsOrReMatsAvailable[SlotOrReMat] >> 1) == PhysReg && + "Bidirectional map mismatch!"); + SpillSlotsOrReMatsAvailable.erase(SlotOrReMat); + DOUT << "PhysReg " << TRI->getName(PhysReg) + << " clobbered, invalidating "; + if (SlotOrReMat > VirtRegMap::MAX_STACK_SLOT) + DOUT << "RM#" << SlotOrReMat-VirtRegMap::MAX_STACK_SLOT-1 << "\n"; + else + DOUT << "SS#" << SlotOrReMat << "\n"; + } +} + +/// ClobberPhysReg - This is called when the specified physreg changes +/// value. We use this to invalidate any info about stuff we thing lives in +/// it and any of its aliases. +void AvailableSpills::ClobberPhysReg(unsigned PhysReg) { + for (const unsigned *AS = TRI->getAliasSet(PhysReg); *AS; ++AS) + ClobberPhysRegOnly(*AS); + ClobberPhysRegOnly(PhysReg); +} + +/// AddAvailableRegsToLiveIn - Availability information is being kept coming +/// into the specified MBB. Add available physical registers as potential +/// live-in's. If they are reused in the MBB, they will be added to the +/// live-in set to make register scavenger and post-allocation scheduler. +void AvailableSpills::AddAvailableRegsToLiveIn(MachineBasicBlock &MBB, + BitVector &RegKills, + std::vector &KillOps) { + std::set NotAvailable; + for (std::multimap::iterator + I = PhysRegsAvailable.begin(), E = PhysRegsAvailable.end(); + I != E; ++I) { + unsigned Reg = I->first; + const TargetRegisterClass* RC = TRI->getPhysicalRegisterRegClass(Reg); + // FIXME: A temporary workaround. We can't reuse available value if it's + // not safe to move the def of the virtual register's class. e.g. + // X86::RFP* register classes. Do not add it as a live-in. + if (!TII->isSafeToMoveRegClassDefs(RC)) + // This is no longer available. + NotAvailable.insert(Reg); + else { + MBB.addLiveIn(Reg); + InvalidateKill(Reg, TRI, RegKills, KillOps); + } + + // Skip over the same register. + std::multimap::iterator NI = next(I); + while (NI != E && NI->first == Reg) { + ++I; + ++NI; + } + } + + for (std::set::iterator I = NotAvailable.begin(), + E = NotAvailable.end(); I != E; ++I) { + ClobberPhysReg(*I); + for (const unsigned *SubRegs = TRI->getSubRegisters(*I); + *SubRegs; ++SubRegs) + ClobberPhysReg(*SubRegs); + } +} + +/// ModifyStackSlotOrReMat - This method is called when the value in a stack +/// slot changes. This removes information about which register the previous +/// value for this slot lives in (as the previous value is dead now). +void AvailableSpills::ModifyStackSlotOrReMat(int SlotOrReMat) { + std::map::iterator It = + SpillSlotsOrReMatsAvailable.find(SlotOrReMat); + if (It == SpillSlotsOrReMatsAvailable.end()) return; + unsigned Reg = It->second >> 1; + SpillSlotsOrReMatsAvailable.erase(It); + + // This register may hold the value of multiple stack slots, only remove this + // stack slot from the set of values the register contains. + std::multimap::iterator I = PhysRegsAvailable.lower_bound(Reg); + for (; ; ++I) { + assert(I != PhysRegsAvailable.end() && I->first == Reg && + "Map inverse broken!"); + if (I->second == SlotOrReMat) break; + } + PhysRegsAvailable.erase(I); +} + +// ************************** // +// Reuse Info Implementation // +// ************************** // + +/// GetRegForReload - We are about to emit a reload into PhysReg. If there +/// is some other operand that is using the specified register, either pick +/// a new register to use, or evict the previous reload and use this reg. +unsigned ReuseInfo::GetRegForReload(unsigned PhysReg, MachineInstr *MI, + AvailableSpills &Spills, + std::vector &MaybeDeadStores, + SmallSet &Rejected, + BitVector &RegKills, + std::vector &KillOps, + VirtRegMap &VRM) { + const TargetInstrInfo* TII = MI->getParent()->getParent()->getTarget() + .getInstrInfo(); + + if (Reuses.empty()) return PhysReg; // This is most often empty. + + for (unsigned ro = 0, e = Reuses.size(); ro != e; ++ro) { + ReusedOp &Op = Reuses[ro]; + // If we find some other reuse that was supposed to use this register + // exactly for its reload, we can change this reload to use ITS reload + // register. That is, unless its reload register has already been + // considered and subsequently rejected because it has also been reused + // by another operand. + if (Op.PhysRegReused == PhysReg && + Rejected.count(Op.AssignedPhysReg) == 0) { + // Yup, use the reload register that we didn't use before. + unsigned NewReg = Op.AssignedPhysReg; + Rejected.insert(PhysReg); + return GetRegForReload(NewReg, MI, Spills, MaybeDeadStores, Rejected, + RegKills, KillOps, VRM); + } else { + // Otherwise, we might also have a problem if a previously reused + // value aliases the new register. If so, codegen the previous reload + // and use this one. + unsigned PRRU = Op.PhysRegReused; + const TargetRegisterInfo *TRI = Spills.getRegInfo(); + if (TRI->areAliases(PRRU, PhysReg)) { + // Okay, we found out that an alias of a reused register + // was used. This isn't good because it means we have + // to undo a previous reuse. + MachineBasicBlock *MBB = MI->getParent(); + const TargetRegisterClass *AliasRC = + MBB->getParent()->getRegInfo().getRegClass(Op.VirtReg); + + // Copy Op out of the vector and remove it, we're going to insert an + // explicit load for it. + ReusedOp NewOp = Op; + Reuses.erase(Reuses.begin()+ro); + + // Ok, we're going to try to reload the assigned physreg into the + // slot that we were supposed to in the first place. However, that + // register could hold a reuse. Check to see if it conflicts or + // would prefer us to use a different register. + unsigned NewPhysReg = GetRegForReload(NewOp.AssignedPhysReg, + MI, Spills, MaybeDeadStores, + Rejected, RegKills, KillOps, VRM); + + MachineBasicBlock::iterator MII = MI; + if (NewOp.StackSlotOrReMat > VirtRegMap::MAX_STACK_SLOT) { + ReMaterialize(*MBB, MII, NewPhysReg, NewOp.VirtReg, TII, TRI,VRM); + } else { + TII->loadRegFromStackSlot(*MBB, MII, NewPhysReg, + NewOp.StackSlotOrReMat, AliasRC); + MachineInstr *LoadMI = prior(MII); + VRM.addSpillSlotUse(NewOp.StackSlotOrReMat, LoadMI); + // Any stores to this stack slot are not dead anymore. + MaybeDeadStores[NewOp.StackSlotOrReMat] = NULL; + ++NumLoads; + } + Spills.ClobberPhysReg(NewPhysReg); + Spills.ClobberPhysReg(NewOp.PhysRegReused); + + unsigned SubIdx = MI->getOperand(NewOp.Operand).getSubReg(); + unsigned RReg = SubIdx ? TRI->getSubReg(NewPhysReg, SubIdx) : NewPhysReg; + MI->getOperand(NewOp.Operand).setReg(RReg); + MI->getOperand(NewOp.Operand).setSubReg(0); + + Spills.addAvailable(NewOp.StackSlotOrReMat, NewPhysReg); + --MII; + UpdateKills(*MII, TRI, RegKills, KillOps); + DOUT << '\t' << *MII; + + DOUT << "Reuse undone!\n"; + --NumReused; + + // Finally, PhysReg is now available, go ahead and use it. + return PhysReg; + } + } + } + return PhysReg; +} + +// ************************************************************************ // + +/// FoldsStackSlotModRef - Return true if the specified MI folds the specified +/// stack slot mod/ref. It also checks if it's possible to unfold the +/// instruction by having it define a specified physical register instead. +static bool FoldsStackSlotModRef(MachineInstr &MI, int SS, unsigned PhysReg, + const TargetInstrInfo *TII, + const TargetRegisterInfo *TRI, + VirtRegMap &VRM) { + if (VRM.hasEmergencySpills(&MI) || VRM.isSpillPt(&MI)) + return false; + + bool Found = false; + VirtRegMap::MI2VirtMapTy::const_iterator I, End; + for (tie(I, End) = VRM.getFoldedVirts(&MI); I != End; ++I) { + unsigned VirtReg = I->second.first; + VirtRegMap::ModRef MR = I->second.second; + if (MR & VirtRegMap::isModRef) + if (VRM.getStackSlot(VirtReg) == SS) { + Found= TII->getOpcodeAfterMemoryUnfold(MI.getOpcode(), true, true) != 0; + break; + } + } + if (!Found) + return false; + + // Does the instruction uses a register that overlaps the scratch register? + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI.getOperand(i); + if (!MO.isReg() || MO.getReg() == 0) + continue; + unsigned Reg = MO.getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (!VRM.hasPhys(Reg)) + continue; + Reg = VRM.getPhys(Reg); + } + if (TRI->regsOverlap(PhysReg, Reg)) + return false; + } + return true; +} + +/// FindFreeRegister - Find a free register of a given register class by looking +/// at (at most) the last two machine instructions. +static unsigned FindFreeRegister(MachineBasicBlock::iterator MII, + MachineBasicBlock &MBB, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI, + BitVector &AllocatableRegs) { + BitVector Defs(TRI->getNumRegs()); + BitVector Uses(TRI->getNumRegs()); + SmallVector LocalUses; + SmallVector Kills; + + // Take a look at 2 instructions at most. + for (unsigned Count = 0; Count < 2; ++Count) { + if (MII == MBB.begin()) + break; + MachineInstr *PrevMI = prior(MII); + for (unsigned i = 0, e = PrevMI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = PrevMI->getOperand(i); + if (!MO.isReg() || MO.getReg() == 0) + continue; + unsigned Reg = MO.getReg(); + if (MO.isDef()) { + Defs.set(Reg); + for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) + Defs.set(*AS); + } else { + LocalUses.push_back(Reg); + if (MO.isKill() && AllocatableRegs[Reg]) + Kills.push_back(Reg); + } + } + + for (unsigned i = 0, e = Kills.size(); i != e; ++i) { + unsigned Kill = Kills[i]; + if (!Defs[Kill] && !Uses[Kill] && + TRI->getPhysicalRegisterRegClass(Kill) == RC) + return Kill; + } + for (unsigned i = 0, e = LocalUses.size(); i != e; ++i) { + unsigned Reg = LocalUses[i]; + Uses.set(Reg); + for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) + Uses.set(*AS); + } + + MII = PrevMI; + } + + return 0; +} + +static +void AssignPhysToVirtReg(MachineInstr *MI, unsigned VirtReg, unsigned PhysReg) { + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.getReg() == VirtReg) + MO.setReg(PhysReg); + } +} + +namespace { + struct RefSorter { + bool operator()(const std::pair &A, + const std::pair &B) { + return A.second < B.second; + } + }; +} + +// ***************************** // +// Local Spiller Implementation // +// ***************************** // + +class VISIBILITY_HIDDEN LocalRewriter : public VirtRegRewriter { + MachineRegisterInfo *RegInfo; + const TargetRegisterInfo *TRI; + const TargetInstrInfo *TII; + BitVector AllocatableRegs; + DenseMap DistanceMap; +public: + + bool runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM, + LiveIntervals* LIs) { + RegInfo = &MF.getRegInfo(); + TRI = MF.getTarget().getRegisterInfo(); + TII = MF.getTarget().getInstrInfo(); + AllocatableRegs = TRI->getAllocatableSet(MF); + DOUT << "\n**** Local spiller rewriting function '" + << MF.getFunction()->getName() << "':\n"; + DOUT << "**** Machine Instrs (NOTE! Does not include spills and reloads!)" + " ****\n"; + DEBUG(MF.dump()); + + // Spills - Keep track of which spilled values are available in physregs + // so that we can choose to reuse the physregs instead of emitting + // reloads. This is usually refreshed per basic block. + AvailableSpills Spills(TRI, TII); + + // Keep track of kill information. + BitVector RegKills(TRI->getNumRegs()); + std::vector KillOps; + KillOps.resize(TRI->getNumRegs(), NULL); + + // SingleEntrySuccs - Successor blocks which have a single predecessor. + SmallVector SinglePredSuccs; + SmallPtrSet EarlyVisited; + + // Traverse the basic blocks depth first. + MachineBasicBlock *Entry = MF.begin(); + SmallPtrSet Visited; + for (df_ext_iterator > + DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited); + DFI != E; ++DFI) { + MachineBasicBlock *MBB = *DFI; + if (!EarlyVisited.count(MBB)) + RewriteMBB(*MBB, VRM, LIs, Spills, RegKills, KillOps); + + // If this MBB is the only predecessor of a successor. Keep the + // availability information and visit it next. + do { + // Keep visiting single predecessor successor as long as possible. + SinglePredSuccs.clear(); + findSinglePredSuccessor(MBB, SinglePredSuccs); + if (SinglePredSuccs.empty()) + MBB = 0; + else { + // FIXME: More than one successors, each of which has MBB has + // the only predecessor. + MBB = SinglePredSuccs[0]; + if (!Visited.count(MBB) && EarlyVisited.insert(MBB)) { + Spills.AddAvailableRegsToLiveIn(*MBB, RegKills, KillOps); + RewriteMBB(*MBB, VRM, LIs, Spills, RegKills, KillOps); + } + } + } while (MBB); + + // Clear the availability info. + Spills.clear(); + } + + DOUT << "**** Post Machine Instrs ****\n"; + DEBUG(MF.dump()); + + // Mark unused spill slots. + MachineFrameInfo *MFI = MF.getFrameInfo(); + int SS = VRM.getLowSpillSlot(); + if (SS != VirtRegMap::NO_STACK_SLOT) + for (int e = VRM.getHighSpillSlot(); SS <= e; ++SS) + if (!VRM.isSpillSlotUsed(SS)) { + MFI->RemoveStackObject(SS); + ++NumDSS; + } + + return true; + } + +private: + + /// OptimizeByUnfold2 - Unfold a series of load / store folding instructions if + /// a scratch register is available. + /// xorq %r12, %r13 + /// addq %rax, -184(%rbp) + /// addq %r13, -184(%rbp) + /// ==> + /// xorq %r12, %r13 + /// movq -184(%rbp), %r12 + /// addq %rax, %r12 + /// addq %r13, %r12 + /// movq %r12, -184(%rbp) + bool OptimizeByUnfold2(unsigned VirtReg, int SS, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MII, + std::vector &MaybeDeadStores, + AvailableSpills &Spills, + BitVector &RegKills, + std::vector &KillOps, + VirtRegMap &VRM) { + + MachineBasicBlock::iterator NextMII = next(MII); + if (NextMII == MBB.end()) + return false; + + if (TII->getOpcodeAfterMemoryUnfold(MII->getOpcode(), true, true) == 0) + return false; + + // Now let's see if the last couple of instructions happens to have freed up + // a register. + const TargetRegisterClass* RC = RegInfo->getRegClass(VirtReg); + unsigned PhysReg = FindFreeRegister(MII, MBB, RC, TRI, AllocatableRegs); + if (!PhysReg) + return false; + + MachineFunction &MF = *MBB.getParent(); + TRI = MF.getTarget().getRegisterInfo(); + MachineInstr &MI = *MII; + if (!FoldsStackSlotModRef(MI, SS, PhysReg, TII, TRI, VRM)) + return false; + + // If the next instruction also folds the same SS modref and can be unfoled, + // then it's worthwhile to issue a load from SS into the free register and + // then unfold these instructions. + if (!FoldsStackSlotModRef(*NextMII, SS, PhysReg, TII, TRI, VRM)) + return false; + + // Load from SS to the spare physical register. + TII->loadRegFromStackSlot(MBB, MII, PhysReg, SS, RC); + // This invalidates Phys. + Spills.ClobberPhysReg(PhysReg); + // Remember it's available. + Spills.addAvailable(SS, PhysReg); + MaybeDeadStores[SS] = NULL; + + // Unfold current MI. + SmallVector NewMIs; + if (!TII->unfoldMemoryOperand(MF, &MI, VirtReg, false, false, NewMIs)) + assert(0 && "Unable unfold the load / store folding instruction!"); + assert(NewMIs.size() == 1); + AssignPhysToVirtReg(NewMIs[0], VirtReg, PhysReg); + VRM.transferRestorePts(&MI, NewMIs[0]); + MII = MBB.insert(MII, NewMIs[0]); + InvalidateKills(MI, TRI, RegKills, KillOps); + VRM.RemoveMachineInstrFromMaps(&MI); + MBB.erase(&MI); + ++NumModRefUnfold; + + // Unfold next instructions that fold the same SS. + do { + MachineInstr &NextMI = *NextMII; + NextMII = next(NextMII); + NewMIs.clear(); + if (!TII->unfoldMemoryOperand(MF, &NextMI, VirtReg, false, false, NewMIs)) + assert(0 && "Unable unfold the load / store folding instruction!"); + assert(NewMIs.size() == 1); + AssignPhysToVirtReg(NewMIs[0], VirtReg, PhysReg); + VRM.transferRestorePts(&NextMI, NewMIs[0]); + MBB.insert(NextMII, NewMIs[0]); + InvalidateKills(NextMI, TRI, RegKills, KillOps); + VRM.RemoveMachineInstrFromMaps(&NextMI); + MBB.erase(&NextMI); + ++NumModRefUnfold; + } while (FoldsStackSlotModRef(*NextMII, SS, PhysReg, TII, TRI, VRM)); + + // Store the value back into SS. + TII->storeRegToStackSlot(MBB, NextMII, PhysReg, true, SS, RC); + MachineInstr *StoreMI = prior(NextMII); + VRM.addSpillSlotUse(SS, StoreMI); + VRM.virtFolded(VirtReg, StoreMI, VirtRegMap::isMod); + + return true; + } + + /// OptimizeByUnfold - Turn a store folding instruction into a load folding + /// instruction. e.g. + /// xorl %edi, %eax + /// movl %eax, -32(%ebp) + /// movl -36(%ebp), %eax + /// orl %eax, -32(%ebp) + /// ==> + /// xorl %edi, %eax + /// orl -36(%ebp), %eax + /// mov %eax, -32(%ebp) + /// This enables unfolding optimization for a subsequent instruction which will + /// also eliminate the newly introduced store instruction. + bool OptimizeByUnfold(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MII, + std::vector &MaybeDeadStores, + AvailableSpills &Spills, + BitVector &RegKills, + std::vector &KillOps, + VirtRegMap &VRM) { + MachineFunction &MF = *MBB.getParent(); + MachineInstr &MI = *MII; + unsigned UnfoldedOpc = 0; + unsigned UnfoldPR = 0; + unsigned UnfoldVR = 0; + int FoldedSS = VirtRegMap::NO_STACK_SLOT; + VirtRegMap::MI2VirtMapTy::const_iterator I, End; + for (tie(I, End) = VRM.getFoldedVirts(&MI); I != End; ) { + // Only transform a MI that folds a single register. + if (UnfoldedOpc) + return false; + UnfoldVR = I->second.first; + VirtRegMap::ModRef MR = I->second.second; + // MI2VirtMap be can updated which invalidate the iterator. + // Increment the iterator first. + ++I; + if (VRM.isAssignedReg(UnfoldVR)) + continue; + // If this reference is not a use, any previous store is now dead. + // Otherwise, the store to this stack slot is not dead anymore. + FoldedSS = VRM.getStackSlot(UnfoldVR); + MachineInstr* DeadStore = MaybeDeadStores[FoldedSS]; + if (DeadStore && (MR & VirtRegMap::isModRef)) { + unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(FoldedSS); + if (!PhysReg || !DeadStore->readsRegister(PhysReg)) + continue; + UnfoldPR = PhysReg; + UnfoldedOpc = TII->getOpcodeAfterMemoryUnfold(MI.getOpcode(), + false, true); + } + } + + if (!UnfoldedOpc) { + if (!UnfoldVR) + return false; + + // Look for other unfolding opportunities. + return OptimizeByUnfold2(UnfoldVR, FoldedSS, MBB, MII, + MaybeDeadStores, Spills, RegKills, KillOps, VRM); + } + + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI.getOperand(i); + if (!MO.isReg() || MO.getReg() == 0 || !MO.isUse()) + continue; + unsigned VirtReg = MO.getReg(); + if (TargetRegisterInfo::isPhysicalRegister(VirtReg) || MO.getSubReg()) + continue; + if (VRM.isAssignedReg(VirtReg)) { + unsigned PhysReg = VRM.getPhys(VirtReg); + if (PhysReg && TRI->regsOverlap(PhysReg, UnfoldPR)) + return false; + } else if (VRM.isReMaterialized(VirtReg)) + continue; + int SS = VRM.getStackSlot(VirtReg); + unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SS); + if (PhysReg) { + if (TRI->regsOverlap(PhysReg, UnfoldPR)) + return false; + continue; + } + if (VRM.hasPhys(VirtReg)) { + PhysReg = VRM.getPhys(VirtReg); + if (!TRI->regsOverlap(PhysReg, UnfoldPR)) + continue; + } + + // Ok, we'll need to reload the value into a register which makes + // it impossible to perform the store unfolding optimization later. + // Let's see if it is possible to fold the load if the store is + // unfolded. This allows us to perform the store unfolding + // optimization. + SmallVector NewMIs; + if (TII->unfoldMemoryOperand(MF, &MI, UnfoldVR, false, false, NewMIs)) { + assert(NewMIs.size() == 1); + MachineInstr *NewMI = NewMIs.back(); + NewMIs.clear(); + int Idx = NewMI->findRegisterUseOperandIdx(VirtReg, false); + assert(Idx != -1); + SmallVector Ops; + Ops.push_back(Idx); + MachineInstr *FoldedMI = TII->foldMemoryOperand(MF, NewMI, Ops, SS); + if (FoldedMI) { + VRM.addSpillSlotUse(SS, FoldedMI); + if (!VRM.hasPhys(UnfoldVR)) + VRM.assignVirt2Phys(UnfoldVR, UnfoldPR); + VRM.virtFolded(VirtReg, FoldedMI, VirtRegMap::isRef); + MII = MBB.insert(MII, FoldedMI); + InvalidateKills(MI, TRI, RegKills, KillOps); + VRM.RemoveMachineInstrFromMaps(&MI); + MBB.erase(&MI); + MF.DeleteMachineInstr(NewMI); + return true; + } + MF.DeleteMachineInstr(NewMI); + } + } + + return false; + } + + /// CommuteToFoldReload - + /// Look for + /// r1 = load fi#1 + /// r1 = op r1, r2 + /// store r1, fi#1 + /// + /// If op is commutable and r2 is killed, then we can xform these to + /// r2 = op r2, fi#1 + /// store r2, fi#1 + bool CommuteToFoldReload(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MII, + unsigned VirtReg, unsigned SrcReg, int SS, + AvailableSpills &Spills, + BitVector &RegKills, + std::vector &KillOps, + const TargetRegisterInfo *TRI, + VirtRegMap &VRM) { + if (MII == MBB.begin() || !MII->killsRegister(SrcReg)) + return false; + + MachineFunction &MF = *MBB.getParent(); + MachineInstr &MI = *MII; + MachineBasicBlock::iterator DefMII = prior(MII); + MachineInstr *DefMI = DefMII; + const TargetInstrDesc &TID = DefMI->getDesc(); + unsigned NewDstIdx; + if (DefMII != MBB.begin() && + TID.isCommutable() && + TII->CommuteChangesDestination(DefMI, NewDstIdx)) { + MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx); + unsigned NewReg = NewDstMO.getReg(); + if (!NewDstMO.isKill() || TRI->regsOverlap(NewReg, SrcReg)) + return false; + MachineInstr *ReloadMI = prior(DefMII); + int FrameIdx; + unsigned DestReg = TII->isLoadFromStackSlot(ReloadMI, FrameIdx); + if (DestReg != SrcReg || FrameIdx != SS) + return false; + int UseIdx = DefMI->findRegisterUseOperandIdx(DestReg, false); + if (UseIdx == -1) + return false; + unsigned DefIdx; + if (!MI.isRegTiedToDefOperand(UseIdx, &DefIdx)) + return false; + assert(DefMI->getOperand(DefIdx).isReg() && + DefMI->getOperand(DefIdx).getReg() == SrcReg); + + // Now commute def instruction. + MachineInstr *CommutedMI = TII->commuteInstruction(DefMI, true); + if (!CommutedMI) + return false; + SmallVector Ops; + Ops.push_back(NewDstIdx); + MachineInstr *FoldedMI = TII->foldMemoryOperand(MF, CommutedMI, Ops, SS); + // Not needed since foldMemoryOperand returns new MI. + MF.DeleteMachineInstr(CommutedMI); + if (!FoldedMI) + return false; + + VRM.addSpillSlotUse(SS, FoldedMI); + VRM.virtFolded(VirtReg, FoldedMI, VirtRegMap::isRef); + // Insert new def MI and spill MI. + const TargetRegisterClass* RC = RegInfo->getRegClass(VirtReg); + TII->storeRegToStackSlot(MBB, &MI, NewReg, true, SS, RC); + MII = prior(MII); + MachineInstr *StoreMI = MII; + VRM.addSpillSlotUse(SS, StoreMI); + VRM.virtFolded(VirtReg, StoreMI, VirtRegMap::isMod); + MII = MBB.insert(MII, FoldedMI); // Update MII to backtrack. + + // Delete all 3 old instructions. + InvalidateKills(*ReloadMI, TRI, RegKills, KillOps); + VRM.RemoveMachineInstrFromMaps(ReloadMI); + MBB.erase(ReloadMI); + InvalidateKills(*DefMI, TRI, RegKills, KillOps); + VRM.RemoveMachineInstrFromMaps(DefMI); + MBB.erase(DefMI); + InvalidateKills(MI, TRI, RegKills, KillOps); + VRM.RemoveMachineInstrFromMaps(&MI); + MBB.erase(&MI); + + // If NewReg was previously holding value of some SS, it's now clobbered. + // This has to be done now because it's a physical register. When this + // instruction is re-visited, it's ignored. + Spills.ClobberPhysReg(NewReg); + + ++NumCommutes; + return true; + } + + return false; + } + + /// SpillRegToStackSlot - Spill a register to a specified stack slot. Check if + /// the last store to the same slot is now dead. If so, remove the last store. + void SpillRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MII, + int Idx, unsigned PhysReg, int StackSlot, + const TargetRegisterClass *RC, + bool isAvailable, MachineInstr *&LastStore, + AvailableSpills &Spills, + SmallSet &ReMatDefs, + BitVector &RegKills, + std::vector &KillOps, + VirtRegMap &VRM) { + + TII->storeRegToStackSlot(MBB, next(MII), PhysReg, true, StackSlot, RC); + MachineInstr *StoreMI = next(MII); + VRM.addSpillSlotUse(StackSlot, StoreMI); + DOUT << "Store:\t" << *StoreMI; + + // If there is a dead store to this stack slot, nuke it now. + if (LastStore) { + DOUT << "Removed dead store:\t" << *LastStore; + ++NumDSE; + SmallVector KillRegs; + InvalidateKills(*LastStore, TRI, RegKills, KillOps, &KillRegs); + MachineBasicBlock::iterator PrevMII = LastStore; + bool CheckDef = PrevMII != MBB.begin(); + if (CheckDef) + --PrevMII; + VRM.RemoveMachineInstrFromMaps(LastStore); + MBB.erase(LastStore); + if (CheckDef) { + // Look at defs of killed registers on the store. Mark the defs + // as dead since the store has been deleted and they aren't + // being reused. + for (unsigned j = 0, ee = KillRegs.size(); j != ee; ++j) { + bool HasOtherDef = false; + if (InvalidateRegDef(PrevMII, *MII, KillRegs[j], HasOtherDef)) { + MachineInstr *DeadDef = PrevMII; + if (ReMatDefs.count(DeadDef) && !HasOtherDef) { + // FIXME: This assumes a remat def does not have side + // effects. + VRM.RemoveMachineInstrFromMaps(DeadDef); + MBB.erase(DeadDef); + ++NumDRM; + } + } + } + } + } + + LastStore = next(MII); + + // If the stack slot value was previously available in some other + // register, change it now. Otherwise, make the register available, + // in PhysReg. + Spills.ModifyStackSlotOrReMat(StackSlot); + Spills.ClobberPhysReg(PhysReg); + Spills.addAvailable(StackSlot, PhysReg, isAvailable); + ++NumStores; + } + + /// TransferDeadness - A identity copy definition is dead and it's being + /// removed. Find the last def or use and mark it as dead / kill. + void TransferDeadness(MachineBasicBlock *MBB, unsigned CurDist, + unsigned Reg, BitVector &RegKills, + std::vector &KillOps, + VirtRegMap &VRM) { + SmallPtrSet Seens; + SmallVector,8> Refs; + for (MachineRegisterInfo::reg_iterator RI = RegInfo->reg_begin(Reg), + RE = RegInfo->reg_end(); RI != RE; ++RI) { + MachineInstr *UDMI = &*RI; + if (UDMI->getParent() != MBB) + continue; + DenseMap::iterator DI = DistanceMap.find(UDMI); + if (DI == DistanceMap.end() || DI->second > CurDist) + continue; + if (Seens.insert(UDMI)) + Refs.push_back(std::make_pair(UDMI, DI->second)); + } + + if (Refs.empty()) + return; + std::sort(Refs.begin(), Refs.end(), RefSorter()); + + while (!Refs.empty()) { + MachineInstr *LastUDMI = Refs.back().first; + Refs.pop_back(); + + MachineOperand *LastUD = NULL; + for (unsigned i = 0, e = LastUDMI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = LastUDMI->getOperand(i); + if (!MO.isReg() || MO.getReg() != Reg) + continue; + if (!LastUD || (LastUD->isUse() && MO.isDef())) + LastUD = &MO; + if (LastUDMI->isRegTiedToDefOperand(i)) + break; + } + if (LastUD->isDef()) { + // If the instruction has no side effect, delete it and propagate + // backward further. Otherwise, mark is dead and we are done. + const TargetInstrDesc &TID = LastUDMI->getDesc(); + if (TID.mayStore() || TID.isCall() || TID.isTerminator() || + TID.hasUnmodeledSideEffects()) { + LastUD->setIsDead(); + break; + } + VRM.RemoveMachineInstrFromMaps(LastUDMI); + MBB->erase(LastUDMI); + } else { + LastUD->setIsKill(); + RegKills.set(Reg); + KillOps[Reg] = LastUD; + break; + } + } + } + + /// rewriteMBB - Keep track of which spills are available even after the + /// register allocator is done with them. If possible, avid reloading vregs. + void RewriteMBB(MachineBasicBlock &MBB, VirtRegMap &VRM, + LiveIntervals *LIs, + AvailableSpills &Spills, BitVector &RegKills, + std::vector &KillOps) { + + DOUT << "\n**** Local spiller rewriting MBB '" + << MBB.getBasicBlock()->getName() << "':\n"; + + MachineFunction &MF = *MBB.getParent(); + + // MaybeDeadStores - When we need to write a value back into a stack slot, + // keep track of the inserted store. If the stack slot value is never read + // (because the value was used from some available register, for example), and + // subsequently stored to, the original store is dead. This map keeps track + // of inserted stores that are not used. If we see a subsequent store to the + // same stack slot, the original store is deleted. + std::vector MaybeDeadStores; + MaybeDeadStores.resize(MF.getFrameInfo()->getObjectIndexEnd(), NULL); + + // ReMatDefs - These are rematerializable def MIs which are not deleted. + SmallSet ReMatDefs; + + // Clear kill info. + SmallSet KilledMIRegs; + RegKills.reset(); + KillOps.clear(); + KillOps.resize(TRI->getNumRegs(), NULL); + + unsigned Dist = 0; + DistanceMap.clear(); + for (MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end(); + MII != E; ) { + MachineBasicBlock::iterator NextMII = next(MII); + + VirtRegMap::MI2VirtMapTy::const_iterator I, End; + bool Erased = false; + bool BackTracked = false; + if (OptimizeByUnfold(MBB, MII, + MaybeDeadStores, Spills, RegKills, KillOps, VRM)) + NextMII = next(MII); + + MachineInstr &MI = *MII; + + if (VRM.hasEmergencySpills(&MI)) { + // Spill physical register(s) in the rare case the allocator has run out + // of registers to allocate. + SmallSet UsedSS; + std::vector &EmSpills = VRM.getEmergencySpills(&MI); + for (unsigned i = 0, e = EmSpills.size(); i != e; ++i) { + unsigned PhysReg = EmSpills[i]; + const TargetRegisterClass *RC = + TRI->getPhysicalRegisterRegClass(PhysReg); + assert(RC && "Unable to determine register class!"); + int SS = VRM.getEmergencySpillSlot(RC); + if (UsedSS.count(SS)) + assert(0 && "Need to spill more than one physical registers!"); + UsedSS.insert(SS); + TII->storeRegToStackSlot(MBB, MII, PhysReg, true, SS, RC); + MachineInstr *StoreMI = prior(MII); + VRM.addSpillSlotUse(SS, StoreMI); + TII->loadRegFromStackSlot(MBB, next(MII), PhysReg, SS, RC); + MachineInstr *LoadMI = next(MII); + VRM.addSpillSlotUse(SS, LoadMI); + ++NumPSpills; + } + NextMII = next(MII); + } + + // Insert restores here if asked to. + if (VRM.isRestorePt(&MI)) { + std::vector &RestoreRegs = VRM.getRestorePtRestores(&MI); + for (unsigned i = 0, e = RestoreRegs.size(); i != e; ++i) { + unsigned VirtReg = RestoreRegs[e-i-1]; // Reverse order. + if (!VRM.getPreSplitReg(VirtReg)) + continue; // Split interval spilled again. + unsigned Phys = VRM.getPhys(VirtReg); + RegInfo->setPhysRegUsed(Phys); + + // Check if the value being restored if available. If so, it must be + // from a predecessor BB that fallthrough into this BB. We do not + // expect: + // BB1: + // r1 = load fi#1 + // ... + // = r1 + // ... # r1 not clobbered + // ... + // = load fi#1 + bool DoReMat = VRM.isReMaterialized(VirtReg); + int SSorRMId = DoReMat + ? VRM.getReMatId(VirtReg) : VRM.getStackSlot(VirtReg); + const TargetRegisterClass* RC = RegInfo->getRegClass(VirtReg); + unsigned InReg = Spills.getSpillSlotOrReMatPhysReg(SSorRMId); + if (InReg == Phys) { + // If the value is already available in the expected register, save + // a reload / remat. + if (SSorRMId) + DOUT << "Reusing RM#" << SSorRMId-VirtRegMap::MAX_STACK_SLOT-1; + else + DOUT << "Reusing SS#" << SSorRMId; + DOUT << " from physreg " + << TRI->getName(InReg) << " for vreg" + << VirtReg <<" instead of reloading into physreg " + << TRI->getName(Phys) << "\n"; + ++NumOmitted; + continue; + } else if (InReg && InReg != Phys) { + if (SSorRMId) + DOUT << "Reusing RM#" << SSorRMId-VirtRegMap::MAX_STACK_SLOT-1; + else + DOUT << "Reusing SS#" << SSorRMId; + DOUT << " from physreg " + << TRI->getName(InReg) << " for vreg" + << VirtReg <<" by copying it into physreg " + << TRI->getName(Phys) << "\n"; + + // If the reloaded / remat value is available in another register, + // copy it to the desired register. + TII->copyRegToReg(MBB, &MI, Phys, InReg, RC, RC); + + // This invalidates Phys. + Spills.ClobberPhysReg(Phys); + // Remember it's available. + Spills.addAvailable(SSorRMId, Phys); + + // Mark is killed. + MachineInstr *CopyMI = prior(MII); + MachineOperand *KillOpnd = CopyMI->findRegisterUseOperand(InReg); + KillOpnd->setIsKill(); + UpdateKills(*CopyMI, TRI, RegKills, KillOps); + + DOUT << '\t' << *CopyMI; + ++NumCopified; + continue; + } + + if (VRM.isReMaterialized(VirtReg)) { + ReMaterialize(MBB, MII, Phys, VirtReg, TII, TRI, VRM); + } else { + const TargetRegisterClass* RC = RegInfo->getRegClass(VirtReg); + TII->loadRegFromStackSlot(MBB, &MI, Phys, SSorRMId, RC); + MachineInstr *LoadMI = prior(MII); + VRM.addSpillSlotUse(SSorRMId, LoadMI); + ++NumLoads; + } + + // This invalidates Phys. + Spills.ClobberPhysReg(Phys); + // Remember it's available. + Spills.addAvailable(SSorRMId, Phys); + + UpdateKills(*prior(MII), TRI, RegKills, KillOps); + DOUT << '\t' << *prior(MII); + } + } + + // Insert spills here if asked to. + if (VRM.isSpillPt(&MI)) { + std::vector > &SpillRegs = + VRM.getSpillPtSpills(&MI); + for (unsigned i = 0, e = SpillRegs.size(); i != e; ++i) { + unsigned VirtReg = SpillRegs[i].first; + bool isKill = SpillRegs[i].second; + if (!VRM.getPreSplitReg(VirtReg)) + continue; // Split interval spilled again. + const TargetRegisterClass *RC = RegInfo->getRegClass(VirtReg); + unsigned Phys = VRM.getPhys(VirtReg); + int StackSlot = VRM.getStackSlot(VirtReg); + TII->storeRegToStackSlot(MBB, next(MII), Phys, isKill, StackSlot, RC); + MachineInstr *StoreMI = next(MII); + VRM.addSpillSlotUse(StackSlot, StoreMI); + DOUT << "Store:\t" << *StoreMI; + VRM.virtFolded(VirtReg, StoreMI, VirtRegMap::isMod); + } + NextMII = next(MII); + } + + /// ReusedOperands - Keep track of operand reuse in case we need to undo + /// reuse. + ReuseInfo ReusedOperands(MI, TRI); + SmallVector VirtUseOps; + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI.getOperand(i); + if (!MO.isReg() || MO.getReg() == 0) + continue; // Ignore non-register operands. + + unsigned VirtReg = MO.getReg(); + if (TargetRegisterInfo::isPhysicalRegister(VirtReg)) { + // Ignore physregs for spilling, but remember that it is used by this + // function. + RegInfo->setPhysRegUsed(VirtReg); + continue; + } + + // We want to process implicit virtual register uses first. + if (MO.isImplicit()) + // If the virtual register is implicitly defined, emit a implicit_def + // before so scavenger knows it's "defined". + VirtUseOps.insert(VirtUseOps.begin(), i); + else + VirtUseOps.push_back(i); + } + + // Process all of the spilled uses and all non spilled reg references. + SmallVector PotentialDeadStoreSlots; + KilledMIRegs.clear(); + for (unsigned j = 0, e = VirtUseOps.size(); j != e; ++j) { + unsigned i = VirtUseOps[j]; + MachineOperand &MO = MI.getOperand(i); + unsigned VirtReg = MO.getReg(); + assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && + "Not a virtual register?"); + + unsigned SubIdx = MO.getSubReg(); + if (VRM.isAssignedReg(VirtReg)) { + // This virtual register was assigned a physreg! + unsigned Phys = VRM.getPhys(VirtReg); + RegInfo->setPhysRegUsed(Phys); + if (MO.isDef()) + ReusedOperands.markClobbered(Phys); + unsigned RReg = SubIdx ? TRI->getSubReg(Phys, SubIdx) : Phys; + MI.getOperand(i).setReg(RReg); + MI.getOperand(i).setSubReg(0); + if (VRM.isImplicitlyDefined(VirtReg)) + BuildMI(MBB, &MI, MI.getDebugLoc(), + TII->get(TargetInstrInfo::IMPLICIT_DEF), RReg); + continue; + } + + // This virtual register is now known to be a spilled value. + if (!MO.isUse()) + continue; // Handle defs in the loop below (handle use&def here though) + + bool AvoidReload = false; + if (LIs->hasInterval(VirtReg)) { + LiveInterval &LI = LIs->getInterval(VirtReg); + if (!LI.liveAt(LIs->getUseIndex(LI.beginNumber()))) + // Must be defined by an implicit def. It should not be spilled. Note, + // this is for correctness reason. e.g. + // 8 %reg1024 = IMPLICIT_DEF + // 12 %reg1024 = INSERT_SUBREG %reg1024, %reg1025, 2 + // The live range [12, 14) are not part of the r1024 live interval since + // it's defined by an implicit def. It will not conflicts with live + // interval of r1025. Now suppose both registers are spilled, you can + // easily see a situation where both registers are reloaded before + // the INSERT_SUBREG and both target registers that would overlap. + AvoidReload = true; + } + + bool DoReMat = VRM.isReMaterialized(VirtReg); + int SSorRMId = DoReMat + ? VRM.getReMatId(VirtReg) : VRM.getStackSlot(VirtReg); + int ReuseSlot = SSorRMId; + + // Check to see if this stack slot is available. + unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SSorRMId); + + // If this is a sub-register use, make sure the reuse register is in the + // right register class. For example, for x86 not all of the 32-bit + // registers have accessible sub-registers. + // Similarly so for EXTRACT_SUBREG. Consider this: + // EDI = op + // MOV32_mr fi#1, EDI + // ... + // = EXTRACT_SUBREG fi#1 + // fi#1 is available in EDI, but it cannot be reused because it's not in + // the right register file. + if (PhysReg && !AvoidReload && + (SubIdx || MI.getOpcode() == TargetInstrInfo::EXTRACT_SUBREG)) { + const TargetRegisterClass* RC = RegInfo->getRegClass(VirtReg); + if (!RC->contains(PhysReg)) + PhysReg = 0; + } + + if (PhysReg && !AvoidReload) { + // This spilled operand might be part of a two-address operand. If this + // is the case, then changing it will necessarily require changing the + // def part of the instruction as well. However, in some cases, we + // aren't allowed to modify the reused register. If none of these cases + // apply, reuse it. + bool CanReuse = true; + bool isTied = MI.isRegTiedToDefOperand(i); + if (isTied) { + // Okay, we have a two address operand. We can reuse this physreg as + // long as we are allowed to clobber the value and there isn't an + // earlier def that has already clobbered the physreg. + CanReuse = !ReusedOperands.isClobbered(PhysReg) && + Spills.canClobberPhysReg(PhysReg); + } + + if (CanReuse) { + // If this stack slot value is already available, reuse it! + if (ReuseSlot > VirtRegMap::MAX_STACK_SLOT) + DOUT << "Reusing RM#" << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1; + else + DOUT << "Reusing SS#" << ReuseSlot; + DOUT << " from physreg " + << TRI->getName(PhysReg) << " for vreg" + << VirtReg <<" instead of reloading into physreg " + << TRI->getName(VRM.getPhys(VirtReg)) << "\n"; + unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg; + MI.getOperand(i).setReg(RReg); + MI.getOperand(i).setSubReg(0); + + // The only technical detail we have is that we don't know that + // PhysReg won't be clobbered by a reloaded stack slot that occurs + // later in the instruction. In particular, consider 'op V1, V2'. + // If V1 is available in physreg R0, we would choose to reuse it + // here, instead of reloading it into the register the allocator + // indicated (say R1). However, V2 might have to be reloaded + // later, and it might indicate that it needs to live in R0. When + // this occurs, we need to have information available that + // indicates it is safe to use R1 for the reload instead of R0. + // + // To further complicate matters, we might conflict with an alias, + // or R0 and R1 might not be compatible with each other. In this + // case, we actually insert a reload for V1 in R1, ensuring that + // we can get at R0 or its alias. + ReusedOperands.addReuse(i, ReuseSlot, PhysReg, + VRM.getPhys(VirtReg), VirtReg); + if (isTied) + // Only mark it clobbered if this is a use&def operand. + ReusedOperands.markClobbered(PhysReg); + ++NumReused; + + if (MI.getOperand(i).isKill() && + ReuseSlot <= VirtRegMap::MAX_STACK_SLOT) { + + // The store of this spilled value is potentially dead, but we + // won't know for certain until we've confirmed that the re-use + // above is valid, which means waiting until the other operands + // are processed. For now we just track the spill slot, we'll + // remove it after the other operands are processed if valid. + + PotentialDeadStoreSlots.push_back(ReuseSlot); + } + + // Mark is isKill if it's there no other uses of the same virtual + // register and it's not a two-address operand. IsKill will be + // unset if reg is reused. + if (!isTied && KilledMIRegs.count(VirtReg) == 0) { + MI.getOperand(i).setIsKill(); + KilledMIRegs.insert(VirtReg); + } + + continue; + } // CanReuse + + // Otherwise we have a situation where we have a two-address instruction + // whose mod/ref operand needs to be reloaded. This reload is already + // available in some register "PhysReg", but if we used PhysReg as the + // operand to our 2-addr instruction, the instruction would modify + // PhysReg. This isn't cool if something later uses PhysReg and expects + // to get its initial value. + // + // To avoid this problem, and to avoid doing a load right after a store, + // we emit a copy from PhysReg into the designated register for this + // operand. + unsigned DesignatedReg = VRM.getPhys(VirtReg); + assert(DesignatedReg && "Must map virtreg to physreg!"); + + // Note that, if we reused a register for a previous operand, the + // register we want to reload into might not actually be + // available. If this occurs, use the register indicated by the + // reuser. + if (ReusedOperands.hasReuses()) + DesignatedReg = ReusedOperands.GetRegForReload(DesignatedReg, &MI, + Spills, MaybeDeadStores, RegKills, KillOps, VRM); + + // If the mapped designated register is actually the physreg we have + // incoming, we don't need to inserted a dead copy. + if (DesignatedReg == PhysReg) { + // If this stack slot value is already available, reuse it! + if (ReuseSlot > VirtRegMap::MAX_STACK_SLOT) + DOUT << "Reusing RM#" << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1; + else + DOUT << "Reusing SS#" << ReuseSlot; + DOUT << " from physreg " << TRI->getName(PhysReg) + << " for vreg" << VirtReg + << " instead of reloading into same physreg.\n"; + unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg; + MI.getOperand(i).setReg(RReg); + MI.getOperand(i).setSubReg(0); + ReusedOperands.markClobbered(RReg); + ++NumReused; + continue; + } + + const TargetRegisterClass* RC = RegInfo->getRegClass(VirtReg); + RegInfo->setPhysRegUsed(DesignatedReg); + ReusedOperands.markClobbered(DesignatedReg); + TII->copyRegToReg(MBB, &MI, DesignatedReg, PhysReg, RC, RC); + + MachineInstr *CopyMI = prior(MII); + UpdateKills(*CopyMI, TRI, RegKills, KillOps); + + // This invalidates DesignatedReg. + Spills.ClobberPhysReg(DesignatedReg); + + Spills.addAvailable(ReuseSlot, DesignatedReg); + unsigned RReg = + SubIdx ? TRI->getSubReg(DesignatedReg, SubIdx) : DesignatedReg; + MI.getOperand(i).setReg(RReg); + MI.getOperand(i).setSubReg(0); + DOUT << '\t' << *prior(MII); + ++NumReused; + continue; + } // if (PhysReg) + + // Otherwise, reload it and remember that we have it. + PhysReg = VRM.getPhys(VirtReg); + assert(PhysReg && "Must map virtreg to physreg!"); + + // Note that, if we reused a register for a previous operand, the + // register we want to reload into might not actually be + // available. If this occurs, use the register indicated by the + // reuser. + if (ReusedOperands.hasReuses()) + PhysReg = ReusedOperands.GetRegForReload(PhysReg, &MI, + Spills, MaybeDeadStores, RegKills, KillOps, VRM); + + RegInfo->setPhysRegUsed(PhysReg); + ReusedOperands.markClobbered(PhysReg); + if (AvoidReload) + ++NumAvoided; + else { + if (DoReMat) { + ReMaterialize(MBB, MII, PhysReg, VirtReg, TII, TRI, VRM); + } else { + const TargetRegisterClass* RC = RegInfo->getRegClass(VirtReg); + TII->loadRegFromStackSlot(MBB, &MI, PhysReg, SSorRMId, RC); + MachineInstr *LoadMI = prior(MII); + VRM.addSpillSlotUse(SSorRMId, LoadMI); + ++NumLoads; + } + // This invalidates PhysReg. + Spills.ClobberPhysReg(PhysReg); + + // Any stores to this stack slot are not dead anymore. + if (!DoReMat) + MaybeDeadStores[SSorRMId] = NULL; + Spills.addAvailable(SSorRMId, PhysReg); + // Assumes this is the last use. IsKill will be unset if reg is reused + // unless it's a two-address operand. + if (!MI.isRegTiedToDefOperand(i) && + KilledMIRegs.count(VirtReg) == 0) { + MI.getOperand(i).setIsKill(); + KilledMIRegs.insert(VirtReg); + } + + UpdateKills(*prior(MII), TRI, RegKills, KillOps); + DOUT << '\t' << *prior(MII); + } + unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg; + MI.getOperand(i).setReg(RReg); + MI.getOperand(i).setSubReg(0); + } + + // Ok - now we can remove stores that have been confirmed dead. + for (unsigned j = 0, e = PotentialDeadStoreSlots.size(); j != e; ++j) { + // This was the last use and the spilled value is still available + // for reuse. That means the spill was unnecessary! + int PDSSlot = PotentialDeadStoreSlots[j]; + MachineInstr* DeadStore = MaybeDeadStores[PDSSlot]; + if (DeadStore) { + DOUT << "Removed dead store:\t" << *DeadStore; + InvalidateKills(*DeadStore, TRI, RegKills, KillOps); + VRM.RemoveMachineInstrFromMaps(DeadStore); + MBB.erase(DeadStore); + MaybeDeadStores[PDSSlot] = NULL; + ++NumDSE; + } + } + + + DOUT << '\t' << MI; + + + // If we have folded references to memory operands, make sure we clear all + // physical registers that may contain the value of the spilled virtual + // register + SmallSet FoldedSS; + for (tie(I, End) = VRM.getFoldedVirts(&MI); I != End; ) { + unsigned VirtReg = I->second.first; + VirtRegMap::ModRef MR = I->second.second; + DOUT << "Folded vreg: " << VirtReg << " MR: " << MR; + + // MI2VirtMap be can updated which invalidate the iterator. + // Increment the iterator first. + ++I; + int SS = VRM.getStackSlot(VirtReg); + if (SS == VirtRegMap::NO_STACK_SLOT) + continue; + FoldedSS.insert(SS); + DOUT << " - StackSlot: " << SS << "\n"; + + // If this folded instruction is just a use, check to see if it's a + // straight load from the virt reg slot. + if ((MR & VirtRegMap::isRef) && !(MR & VirtRegMap::isMod)) { + int FrameIdx; + unsigned DestReg = TII->isLoadFromStackSlot(&MI, FrameIdx); + if (DestReg && FrameIdx == SS) { + // If this spill slot is available, turn it into a copy (or nothing) + // instead of leaving it as a load! + if (unsigned InReg = Spills.getSpillSlotOrReMatPhysReg(SS)) { + DOUT << "Promoted Load To Copy: " << MI; + if (DestReg != InReg) { + const TargetRegisterClass *RC = RegInfo->getRegClass(VirtReg); + TII->copyRegToReg(MBB, &MI, DestReg, InReg, RC, RC); + MachineOperand *DefMO = MI.findRegisterDefOperand(DestReg); + unsigned SubIdx = DefMO->getSubReg(); + // Revisit the copy so we make sure to notice the effects of the + // operation on the destreg (either needing to RA it if it's + // virtual or needing to clobber any values if it's physical). + NextMII = &MI; + --NextMII; // backtrack to the copy. + // Propagate the sub-register index over. + if (SubIdx) { + DefMO = NextMII->findRegisterDefOperand(DestReg); + DefMO->setSubReg(SubIdx); + } + + // Mark is killed. + MachineOperand *KillOpnd = NextMII->findRegisterUseOperand(InReg); + KillOpnd->setIsKill(); + + BackTracked = true; + } else { + DOUT << "Removing now-noop copy: " << MI; + // Unset last kill since it's being reused. + InvalidateKill(InReg, TRI, RegKills, KillOps); + Spills.disallowClobberPhysReg(InReg); + } + + InvalidateKills(MI, TRI, RegKills, KillOps); + VRM.RemoveMachineInstrFromMaps(&MI); + MBB.erase(&MI); + Erased = true; + goto ProcessNextInst; + } + } else { + unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SS); + SmallVector NewMIs; + if (PhysReg && + TII->unfoldMemoryOperand(MF, &MI, PhysReg, false, false, NewMIs)) { + MBB.insert(MII, NewMIs[0]); + InvalidateKills(MI, TRI, RegKills, KillOps); + VRM.RemoveMachineInstrFromMaps(&MI); + MBB.erase(&MI); + Erased = true; + --NextMII; // backtrack to the unfolded instruction. + BackTracked = true; + goto ProcessNextInst; + } + } + } + + // If this reference is not a use, any previous store is now dead. + // Otherwise, the store to this stack slot is not dead anymore. + MachineInstr* DeadStore = MaybeDeadStores[SS]; + if (DeadStore) { + bool isDead = !(MR & VirtRegMap::isRef); + MachineInstr *NewStore = NULL; + if (MR & VirtRegMap::isModRef) { + unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SS); + SmallVector NewMIs; + // We can reuse this physreg as long as we are allowed to clobber + // the value and there isn't an earlier def that has already clobbered + // the physreg. + if (PhysReg && + !ReusedOperands.isClobbered(PhysReg) && + Spills.canClobberPhysReg(PhysReg) && + !TII->isStoreToStackSlot(&MI, SS)) { // Not profitable! + MachineOperand *KillOpnd = + DeadStore->findRegisterUseOperand(PhysReg, true); + // Note, if the store is storing a sub-register, it's possible the + // super-register is needed below. + if (KillOpnd && !KillOpnd->getSubReg() && + TII->unfoldMemoryOperand(MF, &MI, PhysReg, false, true,NewMIs)){ + MBB.insert(MII, NewMIs[0]); + NewStore = NewMIs[1]; + MBB.insert(MII, NewStore); + VRM.addSpillSlotUse(SS, NewStore); + InvalidateKills(MI, TRI, RegKills, KillOps); + VRM.RemoveMachineInstrFromMaps(&MI); + MBB.erase(&MI); + Erased = true; + --NextMII; + --NextMII; // backtrack to the unfolded instruction. + BackTracked = true; + isDead = true; + ++NumSUnfold; + } + } + } + + if (isDead) { // Previous store is dead. + // If we get here, the store is dead, nuke it now. + DOUT << "Removed dead store:\t" << *DeadStore; + InvalidateKills(*DeadStore, TRI, RegKills, KillOps); + VRM.RemoveMachineInstrFromMaps(DeadStore); + MBB.erase(DeadStore); + if (!NewStore) + ++NumDSE; + } + + MaybeDeadStores[SS] = NULL; + if (NewStore) { + // Treat this store as a spill merged into a copy. That makes the + // stack slot value available. + VRM.virtFolded(VirtReg, NewStore, VirtRegMap::isMod); + goto ProcessNextInst; + } + } + + // If the spill slot value is available, and this is a new definition of + // the value, the value is not available anymore. + if (MR & VirtRegMap::isMod) { + // Notice that the value in this stack slot has been modified. + Spills.ModifyStackSlotOrReMat(SS); + + // If this is *just* a mod of the value, check to see if this is just a + // store to the spill slot (i.e. the spill got merged into the copy). If + // so, realize that the vreg is available now, and add the store to the + // MaybeDeadStore info. + int StackSlot; + if (!(MR & VirtRegMap::isRef)) { + if (unsigned SrcReg = TII->isStoreToStackSlot(&MI, StackSlot)) { + assert(TargetRegisterInfo::isPhysicalRegister(SrcReg) && + "Src hasn't been allocated yet?"); + + if (CommuteToFoldReload(MBB, MII, VirtReg, SrcReg, StackSlot, + Spills, RegKills, KillOps, TRI, VRM)) { + NextMII = next(MII); + BackTracked = true; + goto ProcessNextInst; + } + + // Okay, this is certainly a store of SrcReg to [StackSlot]. Mark + // this as a potentially dead store in case there is a subsequent + // store into the stack slot without a read from it. + MaybeDeadStores[StackSlot] = &MI; + + // If the stack slot value was previously available in some other + // register, change it now. Otherwise, make the register + // available in PhysReg. + Spills.addAvailable(StackSlot, SrcReg, MI.killsRegister(SrcReg)); + } + } + } + } + + // Process all of the spilled defs. + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI.getOperand(i); + if (!(MO.isReg() && MO.getReg() && MO.isDef())) + continue; + + unsigned VirtReg = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(VirtReg)) { + // Check to see if this is a noop copy. If so, eliminate the + // instruction before considering the dest reg to be changed. + unsigned Src, Dst, SrcSR, DstSR; + if (TII->isMoveInstr(MI, Src, Dst, SrcSR, DstSR) && Src == Dst) { + ++NumDCE; + DOUT << "Removing now-noop copy: " << MI; + SmallVector KillRegs; + InvalidateKills(MI, TRI, RegKills, KillOps, &KillRegs); + if (MO.isDead() && !KillRegs.empty()) { + // Source register or an implicit super/sub-register use is killed. + assert(KillRegs[0] == Dst || + TRI->isSubRegister(KillRegs[0], Dst) || + TRI->isSuperRegister(KillRegs[0], Dst)); + // Last def is now dead. + TransferDeadness(&MBB, Dist, Src, RegKills, KillOps, VRM); + } + VRM.RemoveMachineInstrFromMaps(&MI); + MBB.erase(&MI); + Erased = true; + Spills.disallowClobberPhysReg(VirtReg); + goto ProcessNextInst; + } + + // If it's not a no-op copy, it clobbers the value in the destreg. + Spills.ClobberPhysReg(VirtReg); + ReusedOperands.markClobbered(VirtReg); + + // Check to see if this instruction is a load from a stack slot into + // a register. If so, this provides the stack slot value in the reg. + int FrameIdx; + if (unsigned DestReg = TII->isLoadFromStackSlot(&MI, FrameIdx)) { + assert(DestReg == VirtReg && "Unknown load situation!"); + + // If it is a folded reference, then it's not safe to clobber. + bool Folded = FoldedSS.count(FrameIdx); + // Otherwise, if it wasn't available, remember that it is now! + Spills.addAvailable(FrameIdx, DestReg, !Folded); + goto ProcessNextInst; + } + + continue; + } + + unsigned SubIdx = MO.getSubReg(); + bool DoReMat = VRM.isReMaterialized(VirtReg); + if (DoReMat) + ReMatDefs.insert(&MI); + + // The only vregs left are stack slot definitions. + int StackSlot = VRM.getStackSlot(VirtReg); + const TargetRegisterClass *RC = RegInfo->getRegClass(VirtReg); + + // If this def is part of a two-address operand, make sure to execute + // the store from the correct physical register. + unsigned PhysReg; + unsigned TiedOp; + if (MI.isRegTiedToUseOperand(i, &TiedOp)) { + PhysReg = MI.getOperand(TiedOp).getReg(); + if (SubIdx) { + unsigned SuperReg = findSuperReg(RC, PhysReg, SubIdx, TRI); + assert(SuperReg && TRI->getSubReg(SuperReg, SubIdx) == PhysReg && + "Can't find corresponding super-register!"); + PhysReg = SuperReg; + } + } else { + PhysReg = VRM.getPhys(VirtReg); + if (ReusedOperands.isClobbered(PhysReg)) { + // Another def has taken the assigned physreg. It must have been a + // use&def which got it due to reuse. Undo the reuse! + PhysReg = ReusedOperands.GetRegForReload(PhysReg, &MI, + Spills, MaybeDeadStores, RegKills, KillOps, VRM); + } + } + + assert(PhysReg && "VR not assigned a physical register?"); + RegInfo->setPhysRegUsed(PhysReg); + unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg; + ReusedOperands.markClobbered(RReg); + MI.getOperand(i).setReg(RReg); + MI.getOperand(i).setSubReg(0); + + if (!MO.isDead()) { + MachineInstr *&LastStore = MaybeDeadStores[StackSlot]; + SpillRegToStackSlot(MBB, MII, -1, PhysReg, StackSlot, RC, true, + LastStore, Spills, ReMatDefs, RegKills, KillOps, VRM); + NextMII = next(MII); + + // Check to see if this is a noop copy. If so, eliminate the + // instruction before considering the dest reg to be changed. + { + unsigned Src, Dst, SrcSR, DstSR; + if (TII->isMoveInstr(MI, Src, Dst, SrcSR, DstSR) && Src == Dst) { + ++NumDCE; + DOUT << "Removing now-noop copy: " << MI; + InvalidateKills(MI, TRI, RegKills, KillOps); + VRM.RemoveMachineInstrFromMaps(&MI); + MBB.erase(&MI); + Erased = true; + UpdateKills(*LastStore, TRI, RegKills, KillOps); + goto ProcessNextInst; + } + } + } + } + ProcessNextInst: + DistanceMap.insert(std::make_pair(&MI, Dist++)); + if (!Erased && !BackTracked) { + for (MachineBasicBlock::iterator II = &MI; II != NextMII; ++II) + UpdateKills(*II, TRI, RegKills, KillOps); + } + MII = NextMII; + } + + } + +}; + +llvm::VirtRegRewriter* llvm::createVirtRegRewriter() { + switch (RewriterOpt) { + default: assert(0 && "Unreachable!"); + case local: + return new LocalRewriter(); + case simple: + return new SimpleRewriter(); + case trivial: + return new TrivialRewriter(); + } +} diff --git a/lib/CodeGen/VirtRegRewriter.h b/lib/CodeGen/VirtRegRewriter.h new file mode 100644 index 000000000000..bc830f72b0c6 --- /dev/null +++ b/lib/CodeGen/VirtRegRewriter.h @@ -0,0 +1,56 @@ +//===-- llvm/CodeGen/VirtRegRewriter.h - VirtRegRewriter -*- C++ -*--------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_VIRTREGREWRITER_H +#define LLVM_CODEGEN_VIRTREGREWRITER_H + +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/IndexedMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Streams.h" +#include "llvm/Function.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallSet.h" +#include "VirtRegMap.h" +#include + +// TODO: +// - Finish renaming Spiller -> Rewriter +// - SimpleSpiller +// - LocalSpiller + +namespace llvm { + + /// VirtRegRewriter interface: Implementations of this interface assign + /// spilled virtual registers to stack slots, rewriting the code. + struct VirtRegRewriter { + virtual ~VirtRegRewriter(); + virtual bool runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM, + LiveIntervals* LIs) = 0; + }; + + /// createVirtRegRewriter - Create an return a rewriter object, as specified + /// on the command line. + VirtRegRewriter* createVirtRegRewriter(); + +} + +#endif diff --git a/lib/CompilerDriver/Action.cpp b/lib/CompilerDriver/Action.cpp new file mode 100644 index 000000000000..c0a1b849bcdf --- /dev/null +++ b/lib/CompilerDriver/Action.cpp @@ -0,0 +1,78 @@ +//===--- Action.cpp - The LLVM Compiler Driver ------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open +// Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Action class - implementation and auxiliary functions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CompilerDriver/Action.h" + +#include "llvm/Support/CommandLine.h" +#include "llvm/System/Program.h" + +#include +#include + +using namespace llvm; +using namespace llvmc; + +extern cl::opt DryRun; +extern cl::opt VerboseMode; + +namespace { + int ExecuteProgram(const std::string& name, + const StrVector& args) { + sys::Path prog = sys::Program::FindProgramByName(name); + + if (prog.isEmpty()) + throw std::runtime_error("Can't find program '" + name + "'"); + if (!prog.canExecute()) + throw std::runtime_error("Program '" + name + "' is not executable."); + + // Build the command line vector and the redirects array. + const sys::Path* redirects[3] = {0,0,0}; + sys::Path stdout_redirect; + + std::vector argv; + argv.reserve((args.size()+2)); + argv.push_back(name.c_str()); + + for (StrVector::const_iterator B = args.begin(), E = args.end(); + B!=E; ++B) { + if (*B == ">") { + ++B; + stdout_redirect.set(*B); + redirects[1] = &stdout_redirect; + } + else { + argv.push_back((*B).c_str()); + } + } + argv.push_back(0); // null terminate list. + + // Invoke the program. + return sys::Program::ExecuteAndWait(prog, &argv[0], 0, &redirects[0]); + } + + void print_string (const std::string& str) { + std::cerr << str << ' '; + } +} + +int llvmc::Action::Execute() const { + if (DryRun || VerboseMode) { + std::cerr << Command_ << " "; + std::for_each(Args_.begin(), Args_.end(), print_string); + std::cerr << '\n'; + } + if (DryRun) + return 0; + else + return ExecuteProgram(Command_, Args_); +} diff --git a/lib/CompilerDriver/CMakeLists.txt b/lib/CompilerDriver/CMakeLists.txt new file mode 100644 index 000000000000..153dd443cbf2 --- /dev/null +++ b/lib/CompilerDriver/CMakeLists.txt @@ -0,0 +1,10 @@ +set(LLVM_LINK_COMPONENTS support system) +set(LLVM_REQUIRES_EH 1) + +add_llvm_tool(llvmc + Action.cpp + CompilationGraph.cpp + llvmc.cpp + Plugin.cpp + Tool.cpp + ) diff --git a/lib/CompilerDriver/CompilationGraph.cpp b/lib/CompilerDriver/CompilationGraph.cpp new file mode 100644 index 000000000000..dece4e8e0ae3 --- /dev/null +++ b/lib/CompilerDriver/CompilationGraph.cpp @@ -0,0 +1,536 @@ +//===--- CompilationGraph.cpp - The LLVM Compiler Driver --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open +// Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Compilation graph - implementation. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CompilerDriver/CompilationGraph.h" +#include "llvm/CompilerDriver/Error.h" + +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/DOTGraphTraits.h" +#include "llvm/Support/GraphWriter.h" + +#include +#include +#include +#include +#include +#include +#include + +using namespace llvm; +using namespace llvmc; + +extern cl::list InputFilenames; +extern cl::list Languages; + +namespace llvmc { + + const std::string& LanguageMap::GetLanguage(const sys::Path& File) const { + LanguageMap::const_iterator Lang = this->find(File.getSuffix()); + if (Lang == this->end()) + throw std::runtime_error("Unknown suffix: " + File.getSuffix()); + return Lang->second; + } +} + +namespace { + + /// ChooseEdge - Return the edge with the maximum weight. + template + const Edge* ChooseEdge(const C& EdgesContainer, + const InputLanguagesSet& InLangs, + const std::string& NodeName = "root") { + const Edge* MaxEdge = 0; + unsigned MaxWeight = 0; + bool SingleMax = true; + + for (typename C::const_iterator B = EdgesContainer.begin(), + E = EdgesContainer.end(); B != E; ++B) { + const Edge* e = B->getPtr(); + unsigned EW = e->Weight(InLangs); + if (EW > MaxWeight) { + MaxEdge = e; + MaxWeight = EW; + SingleMax = true; + } else if (EW == MaxWeight) { + SingleMax = false; + } + } + + if (!SingleMax) + throw std::runtime_error("Node " + NodeName + + ": multiple maximal outward edges found!" + " Most probably a specification error."); + if (!MaxEdge) + throw std::runtime_error("Node " + NodeName + + ": no maximal outward edge found!" + " Most probably a specification error."); + return MaxEdge; + } + +} + +void Node::AddEdge(Edge* Edg) { + // If there already was an edge between two nodes, modify it instead + // of adding a new edge. + const std::string& ToolName = Edg->ToolName(); + for (container_type::iterator B = OutEdges.begin(), E = OutEdges.end(); + B != E; ++B) { + if ((*B)->ToolName() == ToolName) { + llvm::IntrusiveRefCntPtr(Edg).swap(*B); + return; + } + } + OutEdges.push_back(llvm::IntrusiveRefCntPtr(Edg)); +} + +CompilationGraph::CompilationGraph() { + NodesMap["root"] = Node(this); +} + +Node& CompilationGraph::getNode(const std::string& ToolName) { + nodes_map_type::iterator I = NodesMap.find(ToolName); + if (I == NodesMap.end()) + throw std::runtime_error("Node " + ToolName + " is not in the graph"); + return I->second; +} + +const Node& CompilationGraph::getNode(const std::string& ToolName) const { + nodes_map_type::const_iterator I = NodesMap.find(ToolName); + if (I == NodesMap.end()) + throw std::runtime_error("Node " + ToolName + " is not in the graph!"); + return I->second; +} + +// Find the tools list corresponding to the given language name. +const CompilationGraph::tools_vector_type& +CompilationGraph::getToolsVector(const std::string& LangName) const +{ + tools_map_type::const_iterator I = ToolsMap.find(LangName); + if (I == ToolsMap.end()) + throw std::runtime_error("No tool corresponding to the language " + + LangName + " found"); + return I->second; +} + +void CompilationGraph::insertNode(Tool* V) { + if (NodesMap.count(V->Name()) == 0) + NodesMap[V->Name()] = Node(this, V); +} + +void CompilationGraph::insertEdge(const std::string& A, Edge* Edg) { + Node& B = getNode(Edg->ToolName()); + if (A == "root") { + const char** InLangs = B.ToolPtr->InputLanguages(); + for (;*InLangs; ++InLangs) + ToolsMap[*InLangs].push_back(IntrusiveRefCntPtr(Edg)); + NodesMap["root"].AddEdge(Edg); + } + else { + Node& N = getNode(A); + N.AddEdge(Edg); + } + // Increase the inward edge counter. + B.IncrInEdges(); +} + +// Pass input file through the chain until we bump into a Join node or +// a node that says that it is the last. +void CompilationGraph::PassThroughGraph (const sys::Path& InFile, + const Node* StartNode, + const InputLanguagesSet& InLangs, + const sys::Path& TempDir, + const LanguageMap& LangMap) const { + sys::Path In = InFile; + const Node* CurNode = StartNode; + + while(true) { + Tool* CurTool = CurNode->ToolPtr.getPtr(); + + if (CurTool->IsJoin()) { + JoinTool& JT = dynamic_cast(*CurTool); + JT.AddToJoinList(In); + break; + } + + Action CurAction = CurTool->GenerateAction(In, CurNode->HasChildren(), + TempDir, InLangs, LangMap); + + if (int ret = CurAction.Execute()) + throw error_code(ret); + + if (CurAction.StopCompilation()) + return; + + CurNode = &getNode(ChooseEdge(CurNode->OutEdges, + InLangs, + CurNode->Name())->ToolName()); + In = CurAction.OutFile(); + } +} + +// Find the head of the toolchain corresponding to the given file. +// Also, insert an input language into InLangs. +const Node* CompilationGraph:: +FindToolChain(const sys::Path& In, const std::string* ForceLanguage, + InputLanguagesSet& InLangs, const LanguageMap& LangMap) const { + + // Determine the input language. + const std::string& InLanguage = + ForceLanguage ? *ForceLanguage : LangMap.GetLanguage(In); + + // Add the current input language to the input language set. + InLangs.insert(InLanguage); + + // Find the toolchain for the input language. + const tools_vector_type& TV = getToolsVector(InLanguage); + if (TV.empty()) + throw std::runtime_error("No toolchain corresponding to language " + + InLanguage + " found"); + return &getNode(ChooseEdge(TV, InLangs)->ToolName()); +} + +// Helper function used by Build(). +// Traverses initial portions of the toolchains (up to the first Join node). +// This function is also responsible for handling the -x option. +void CompilationGraph::BuildInitial (InputLanguagesSet& InLangs, + const sys::Path& TempDir, + const LanguageMap& LangMap) { + // This is related to -x option handling. + cl::list::const_iterator xIter = Languages.begin(), + xBegin = xIter, xEnd = Languages.end(); + bool xEmpty = true; + const std::string* xLanguage = 0; + unsigned xPos = 0, xPosNext = 0, filePos = 0; + + if (xIter != xEnd) { + xEmpty = false; + xPos = Languages.getPosition(xIter - xBegin); + cl::list::const_iterator xNext = llvm::next(xIter); + xPosNext = (xNext == xEnd) ? std::numeric_limits::max() + : Languages.getPosition(xNext - xBegin); + xLanguage = (*xIter == "none") ? 0 : &(*xIter); + } + + // For each input file: + for (cl::list::const_iterator B = InputFilenames.begin(), + CB = B, E = InputFilenames.end(); B != E; ++B) { + sys::Path In = sys::Path(*B); + + // Code for handling the -x option. + // Output: std::string* xLanguage (can be NULL). + if (!xEmpty) { + filePos = InputFilenames.getPosition(B - CB); + + if (xPos < filePos) { + if (filePos < xPosNext) { + xLanguage = (*xIter == "none") ? 0 : &(*xIter); + } + else { // filePos >= xPosNext + // Skip xIters while filePos > xPosNext + while (filePos > xPosNext) { + ++xIter; + xPos = xPosNext; + + cl::list::const_iterator xNext = llvm::next(xIter); + if (xNext == xEnd) + xPosNext = std::numeric_limits::max(); + else + xPosNext = Languages.getPosition(xNext - xBegin); + xLanguage = (*xIter == "none") ? 0 : &(*xIter); + } + } + } + } + + // Find the toolchain corresponding to this file. + const Node* N = FindToolChain(In, xLanguage, InLangs, LangMap); + // Pass file through the chain starting at head. + PassThroughGraph(In, N, InLangs, TempDir, LangMap); + } +} + +// Sort the nodes in topological order. +void CompilationGraph::TopologicalSort(std::vector& Out) { + std::queue Q; + Q.push(&getNode("root")); + + while (!Q.empty()) { + const Node* A = Q.front(); + Q.pop(); + Out.push_back(A); + for (Node::const_iterator EB = A->EdgesBegin(), EE = A->EdgesEnd(); + EB != EE; ++EB) { + Node* B = &getNode((*EB)->ToolName()); + B->DecrInEdges(); + if (B->HasNoInEdges()) + Q.push(B); + } + } +} + +namespace { + bool NotJoinNode(const Node* N) { + return N->ToolPtr ? !N->ToolPtr->IsJoin() : true; + } +} + +// Call TopologicalSort and filter the resulting list to include +// only Join nodes. +void CompilationGraph:: +TopologicalSortFilterJoinNodes(std::vector& Out) { + std::vector TopSorted; + TopologicalSort(TopSorted); + std::remove_copy_if(TopSorted.begin(), TopSorted.end(), + std::back_inserter(Out), NotJoinNode); +} + +int CompilationGraph::Build (const sys::Path& TempDir, + const LanguageMap& LangMap) { + + InputLanguagesSet InLangs; + + // Traverse initial parts of the toolchains and fill in InLangs. + BuildInitial(InLangs, TempDir, LangMap); + + std::vector JTV; + TopologicalSortFilterJoinNodes(JTV); + + // For all join nodes in topological order: + for (std::vector::iterator B = JTV.begin(), E = JTV.end(); + B != E; ++B) { + + const Node* CurNode = *B; + JoinTool* JT = &dynamic_cast(*CurNode->ToolPtr.getPtr()); + + // Are there any files in the join list? + if (JT->JoinListEmpty()) + continue; + + Action CurAction = JT->GenerateAction(CurNode->HasChildren(), + TempDir, InLangs, LangMap); + + if (int ret = CurAction.Execute()) + throw error_code(ret); + + if (CurAction.StopCompilation()) + return 0; + + const Node* NextNode = &getNode(ChooseEdge(CurNode->OutEdges, InLangs, + CurNode->Name())->ToolName()); + PassThroughGraph(sys::Path(CurAction.OutFile()), NextNode, + InLangs, TempDir, LangMap); + } + + return 0; +} + +int CompilationGraph::CheckLanguageNames() const { + int ret = 0; + // Check that names for output and input languages on all edges do match. + for (const_nodes_iterator B = this->NodesMap.begin(), + E = this->NodesMap.end(); B != E; ++B) { + + const Node & N1 = B->second; + if (N1.ToolPtr) { + for (Node::const_iterator EB = N1.EdgesBegin(), EE = N1.EdgesEnd(); + EB != EE; ++EB) { + const Node& N2 = this->getNode((*EB)->ToolName()); + + if (!N2.ToolPtr) { + ++ret; + std::cerr << "Error: there is an edge from '" << N1.ToolPtr->Name() + << "' back to the root!\n\n"; + continue; + } + + const char* OutLang = N1.ToolPtr->OutputLanguage(); + const char** InLangs = N2.ToolPtr->InputLanguages(); + bool eq = false; + for (;*InLangs; ++InLangs) { + if (std::strcmp(OutLang, *InLangs) == 0) { + eq = true; + break; + } + } + + if (!eq) { + ++ret; + std::cerr << "Error: Output->input language mismatch in the edge '" << + N1.ToolPtr->Name() << "' -> '" << N2.ToolPtr->Name() << "'!\n"; + + std::cerr << "Expected one of { "; + + InLangs = N2.ToolPtr->InputLanguages(); + for (;*InLangs; ++InLangs) { + std::cerr << '\'' << *InLangs << (*(InLangs+1) ? "', " : "'"); + } + + std::cerr << " }, but got '" << OutLang << "'!\n\n"; + } + + } + } + } + + return ret; +} + +int CompilationGraph::CheckMultipleDefaultEdges() const { + int ret = 0; + InputLanguagesSet Dummy; + + // For all nodes, just iterate over the outgoing edges and check if there is + // more than one edge with maximum weight. + for (const_nodes_iterator B = this->NodesMap.begin(), + E = this->NodesMap.end(); B != E; ++B) { + const Node& N = B->second; + unsigned MaxWeight = 0; + + // Ignore the root node. + if (!N.ToolPtr) + continue; + + for (Node::const_iterator EB = N.EdgesBegin(), EE = N.EdgesEnd(); + EB != EE; ++EB) { + unsigned EdgeWeight = (*EB)->Weight(Dummy); + if (EdgeWeight > MaxWeight) { + MaxWeight = EdgeWeight; + } + else if (EdgeWeight == MaxWeight) { + ++ret; + std::cerr + << "Error: there are multiple maximal edges stemming from the '" + << N.ToolPtr->Name() << "' node!\n\n"; + break; + } + } + } + + return ret; +} + +int CompilationGraph::CheckCycles() { + unsigned deleted = 0; + std::queue Q; + Q.push(&getNode("root")); + + // Try to delete all nodes that have no ingoing edges, starting from the + // root. If there are any nodes left after this operation, then we have a + // cycle. This relies on '--check-graph' not performing the topological sort. + while (!Q.empty()) { + Node* A = Q.front(); + Q.pop(); + ++deleted; + + for (Node::iterator EB = A->EdgesBegin(), EE = A->EdgesEnd(); + EB != EE; ++EB) { + Node* B = &getNode((*EB)->ToolName()); + B->DecrInEdges(); + if (B->HasNoInEdges()) + Q.push(B); + } + } + + if (deleted != NodesMap.size()) { + std::cerr << "Error: there are cycles in the compilation graph!\n" + << "Try inspecting the diagram produced by " + "'llvmc --view-graph'.\n\n"; + return 1; + } + + return 0; +} + +int CompilationGraph::Check () { + // We try to catch as many errors as we can in one go. + int ret = 0; + + // Check that output/input language names match. + ret += this->CheckLanguageNames(); + + // Check for multiple default edges. + ret += this->CheckMultipleDefaultEdges(); + + // Check for cycles. + ret += this->CheckCycles(); + + return ret; +} + +// Code related to graph visualization. + +namespace llvm { + template <> + struct DOTGraphTraits + : public DefaultDOTGraphTraits + { + + template + static std::string getNodeLabel(const Node* N, const GraphType&) + { + if (N->ToolPtr) + if (N->ToolPtr->IsJoin()) + return N->Name() + "\n (join" + + (N->HasChildren() ? ")" + : std::string(": ") + N->ToolPtr->OutputLanguage() + ')'); + else + return N->Name(); + else + return "root"; + } + + template + static std::string getEdgeSourceLabel(const Node* N, EdgeIter I) { + if (N->ToolPtr) { + return N->ToolPtr->OutputLanguage(); + } + else { + const char** InLangs = I->ToolPtr->InputLanguages(); + std::string ret; + + for (; *InLangs; ++InLangs) { + if (*(InLangs + 1)) { + ret += *InLangs; + ret += ", "; + } + else { + ret += *InLangs; + } + } + + return ret; + } + } + }; + +} + +void CompilationGraph::writeGraph(const std::string& OutputFilename) { + std::ofstream O(OutputFilename.c_str()); + + if (O.good()) { + std::cerr << "Writing '"<< OutputFilename << "' file..."; + llvm::WriteGraph(O, this); + std::cerr << "done.\n"; + O.close(); + } + else { + throw std::runtime_error("Error opening file '" + OutputFilename + + "' for writing!"); + } +} + +void CompilationGraph::viewGraph() { + llvm::ViewGraph(this, "compilation-graph"); +} diff --git a/lib/CompilerDriver/Makefile b/lib/CompilerDriver/Makefile new file mode 100644 index 000000000000..e5bf3e10a79e --- /dev/null +++ b/lib/CompilerDriver/Makefile @@ -0,0 +1,19 @@ +##===- lib/CompilerDriver/Makefile -------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open +# Source License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../.. + +# We don't want this library to appear in `llvm-config --libs` output, so its +# name doesn't start with "LLVM". + +LIBRARYNAME = CompilerDriver +LINK_COMPONENTS = support system +REQUIRES_EH := 1 + +include $(LEVEL)/Makefile.common diff --git a/lib/CompilerDriver/Plugin.cpp b/lib/CompilerDriver/Plugin.cpp new file mode 100644 index 000000000000..75abbd041d32 --- /dev/null +++ b/lib/CompilerDriver/Plugin.cpp @@ -0,0 +1,73 @@ +//===--- Plugin.cpp - The LLVM Compiler Driver ------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open +// Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Plugin support. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CompilerDriver/Plugin.h" + +#include +#include + +namespace { + + // Registry::Add<> does not do lifetime management (probably issues + // with static constructor/destructor ordering), so we have to + // implement it here. + // + // All this static registration/life-before-main model seems + // unnecessary convoluted to me. + + static bool pluginListInitialized = false; + typedef std::vector PluginList; + static PluginList Plugins; + + struct ByPriority { + bool operator()(const llvmc::BasePlugin* lhs, + const llvmc::BasePlugin* rhs) { + return lhs->Priority() < rhs->Priority(); + } + }; +} + +namespace llvmc { + + PluginLoader::PluginLoader() { + if (!pluginListInitialized) { + for (PluginRegistry::iterator B = PluginRegistry::begin(), + E = PluginRegistry::end(); B != E; ++B) + Plugins.push_back(B->instantiate()); + std::sort(Plugins.begin(), Plugins.end(), ByPriority()); + } + pluginListInitialized = true; + } + + PluginLoader::~PluginLoader() { + if (pluginListInitialized) { + for (PluginList::iterator B = Plugins.begin(), E = Plugins.end(); + B != E; ++B) + delete (*B); + } + pluginListInitialized = false; + } + + void PluginLoader::PopulateLanguageMap(LanguageMap& langMap) { + for (PluginList::iterator B = Plugins.begin(), E = Plugins.end(); + B != E; ++B) + (*B)->PopulateLanguageMap(langMap); + } + + void PluginLoader::PopulateCompilationGraph(CompilationGraph& graph) { + for (PluginList::iterator B = Plugins.begin(), E = Plugins.end(); + B != E; ++B) + (*B)->PopulateCompilationGraph(graph); + } + +} diff --git a/lib/CompilerDriver/Tool.cpp b/lib/CompilerDriver/Tool.cpp new file mode 100644 index 000000000000..886b26b5d713 --- /dev/null +++ b/lib/CompilerDriver/Tool.cpp @@ -0,0 +1,74 @@ +//===--- Tool.cpp - The LLVM Compiler Driver --------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open +// Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Tool base class - implementation details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CompilerDriver/Tool.h" + +#include "llvm/System/Path.h" +#include "llvm/Support/CommandLine.h" + +using namespace llvm; +using namespace llvmc; + +extern cl::opt OutputFilename; + +namespace { + sys::Path MakeTempFile(const sys::Path& TempDir, const std::string& BaseName, + const std::string& Suffix) { + sys::Path Out; + + // Make sure we don't end up with path names like '/file.o' if the + // TempDir is empty. + if (TempDir.empty()) { + Out.set(BaseName); + } + else { + Out = TempDir; + Out.appendComponent(BaseName); + } + Out.appendSuffix(Suffix); + // NOTE: makeUnique always *creates* a unique temporary file, + // which is good, since there will be no races. However, some + // tools do not like it when the output file already exists, so + // they have to be placated with -f or something like that. + Out.makeUnique(true, NULL); + return Out; + } +} + +sys::Path Tool::OutFilename(const sys::Path& In, + const sys::Path& TempDir, + bool StopCompilation, + const char* OutputSuffix) const { + sys::Path Out; + + if (StopCompilation) { + if (!OutputFilename.empty()) { + Out.set(OutputFilename); + } + else if (IsJoin()) { + Out.set("a"); + Out.appendSuffix(OutputSuffix); + } + else { + Out.set(In.getBasename()); + Out.appendSuffix(OutputSuffix); + } + } + else { + if (IsJoin()) + Out = MakeTempFile(TempDir, "tmp", OutputSuffix); + else + Out = MakeTempFile(TempDir, In.getBasename(), OutputSuffix); + } + return Out; +} diff --git a/lib/Debugger/CMakeLists.txt b/lib/Debugger/CMakeLists.txt new file mode 100644 index 000000000000..d2508cf4c292 --- /dev/null +++ b/lib/Debugger/CMakeLists.txt @@ -0,0 +1,10 @@ +add_llvm_library(LLVMDebugger + Debugger.cpp + ProgramInfo.cpp + RuntimeInfo.cpp + SourceFile.cpp + SourceLanguage-CFamily.cpp + SourceLanguage-CPlusPlus.cpp + SourceLanguage-Unknown.cpp + SourceLanguage.cpp + ) diff --git a/lib/Debugger/Debugger.cpp b/lib/Debugger/Debugger.cpp new file mode 100644 index 000000000000..b12d90ac9db0 --- /dev/null +++ b/lib/Debugger/Debugger.cpp @@ -0,0 +1,230 @@ +//===-- Debugger.cpp - LLVM debugger library implementation ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the main implementation of the LLVM debugger library. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Debugger/Debugger.h" +#include "llvm/Module.h" +#include "llvm/ModuleProvider.h" +#include "llvm/Bitcode/ReaderWriter.h" +#include "llvm/Debugger/InferiorProcess.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/ADT/StringExtras.h" +#include +#include +using namespace llvm; + +/// Debugger constructor - Initialize the debugger to its initial, empty, state. +/// +Debugger::Debugger() : Environment(0), Program(0), Process(0) { +} + +Debugger::~Debugger() { + // Killing the program could throw an exception. We don't want to progagate + // the exception out of our destructor though. + try { + killProgram(); + } catch (const char *) { + } catch (const std::string &) { + } + + unloadProgram(); +} + +/// getProgramPath - Get the path of the currently loaded program, or an +/// empty string if none is loaded. +std::string Debugger::getProgramPath() const { + return Program ? Program->getModuleIdentifier() : ""; +} + +static Module * +getMaterializedModuleProvider(const std::string &Filename) { + std::auto_ptr Buffer; + Buffer.reset(MemoryBuffer::getFileOrSTDIN(Filename.c_str())); + if (Buffer.get()) + return ParseBitcodeFile(Buffer.get()); + return 0; +} + +/// loadProgram - If a program is currently loaded, unload it. Then search +/// the PATH for the specified program, loading it when found. If the +/// specified program cannot be found, an exception is thrown to indicate the +/// error. +void Debugger::loadProgram(const std::string &Filename) { + if ((Program = getMaterializedModuleProvider(Filename)) || + (Program = getMaterializedModuleProvider(Filename+".bc"))) + return; // Successfully loaded the program. + + // Search the program path for the file... + if (const char *PathS = getenv("PATH")) { + std::string Path = PathS; + + std::string Directory = getToken(Path, ":"); + while (!Directory.empty()) { + if ((Program = getMaterializedModuleProvider(Directory +"/"+ Filename)) || + (Program = getMaterializedModuleProvider(Directory +"/"+ Filename + + ".bc"))) + return; // Successfully loaded the program. + + Directory = getToken(Path, ":"); + } + } + + throw "Could not find program '" + Filename + "'!"; +} + +/// unloadProgram - If a program is running, kill it, then unload all traces +/// of the current program. If no program is loaded, this method silently +/// succeeds. +void Debugger::unloadProgram() { + if (!isProgramLoaded()) return; + killProgram(); + delete Program; + Program = 0; +} + + +/// createProgram - Create an instance of the currently loaded program, +/// killing off any existing one. This creates the program and stops it at +/// the first possible moment. If there is no program loaded or if there is a +/// problem starting the program, this method throws an exception. +void Debugger::createProgram() { + if (!isProgramLoaded()) + throw "Cannot start program: none is loaded."; + + // Kill any existing program. + killProgram(); + + // Add argv[0] to the arguments vector.. + std::vector Args(ProgramArguments); + Args.insert(Args.begin(), getProgramPath()); + + // Start the new program... this could throw if the program cannot be started. + Process = InferiorProcess::create(Program, Args, Environment); +} + +InferiorProcess * +InferiorProcess::create(Module *M, const std::vector &Arguments, + const char * const *envp) { + throw"No supported binding to inferior processes (debugger not implemented)."; +} + +/// killProgram - If the program is currently executing, kill off the +/// process and free up any state related to the currently running program. If +/// there is no program currently running, this just silently succeeds. +void Debugger::killProgram() { + // The destructor takes care of the dirty work. + try { + delete Process; + } catch (...) { + Process = 0; + throw; + } + Process = 0; +} + +/// stepProgram - Implement the 'step' command, continuing execution until +/// the next possible stop point. +void Debugger::stepProgram() { + assert(isProgramRunning() && "Cannot step if the program isn't running!"); + try { + Process->stepProgram(); + } catch (InferiorProcessDead &IPD) { + killProgram(); + throw NonErrorException("The program stopped with exit code " + + itostr(IPD.getExitCode())); + } catch (...) { + killProgram(); + throw; + } +} + +/// nextProgram - Implement the 'next' command, continuing execution until +/// the next possible stop point that is in the current function. +void Debugger::nextProgram() { + assert(isProgramRunning() && "Cannot next if the program isn't running!"); + try { + // This should step the process. If the process enters a function, then it + // should 'finish' it. However, figuring this out is tricky. In + // particular, the program can do any of: + // 0. Not change current frame. + // 1. Entering or exiting a region within the current function + // (which changes the frame ID, but which we shouldn't 'finish') + // 2. Exiting the current function (which changes the frame ID) + // 3. Entering a function (which should be 'finish'ed) + // For this reason, we have to be very careful about when we decide to do + // the 'finish'. + + // Get the current frame, but don't trust it. It could change... + void *CurrentFrame = Process->getPreviousFrame(0); + + // Don't trust the current frame: get the caller frame. + void *ParentFrame = Process->getPreviousFrame(CurrentFrame); + + // Ok, we have some information, run the program one step. + Process->stepProgram(); + + // Where is the new frame? The most common case, by far is that it has not + // been modified (Case #0), in which case we don't need to do anything more. + void *NewFrame = Process->getPreviousFrame(0); + if (NewFrame != CurrentFrame) { + // Ok, the frame changed. If we are case #1, then the parent frame will + // be identical. + void *NewParentFrame = Process->getPreviousFrame(NewFrame); + if (ParentFrame != NewParentFrame) { + // Ok, now we know we aren't case #0 or #1. Check to see if we entered + // a new function. If so, the parent frame will be "CurrentFrame". + if (CurrentFrame == NewParentFrame) + Process->finishProgram(NewFrame); + } + } + + } catch (InferiorProcessDead &IPD) { + killProgram(); + throw NonErrorException("The program stopped with exit code " + + itostr(IPD.getExitCode())); + } catch (...) { + killProgram(); + throw; + } +} + +/// finishProgram - Implement the 'finish' command, continuing execution +/// until the specified frame ID returns. +void Debugger::finishProgram(void *Frame) { + assert(isProgramRunning() && "Cannot cont if the program isn't running!"); + try { + Process->finishProgram(Frame); + } catch (InferiorProcessDead &IPD) { + killProgram(); + throw NonErrorException("The program stopped with exit code " + + itostr(IPD.getExitCode())); + } catch (...) { + killProgram(); + throw; + } +} + +/// contProgram - Implement the 'cont' command, continuing execution until +/// the next breakpoint is encountered. +void Debugger::contProgram() { + assert(isProgramRunning() && "Cannot cont if the program isn't running!"); + try { + Process->contProgram(); + } catch (InferiorProcessDead &IPD) { + killProgram(); + throw NonErrorException("The program stopped with exit code " + + itostr(IPD.getExitCode())); + } catch (...) { + killProgram(); + throw; + } +} diff --git a/lib/Debugger/Makefile b/lib/Debugger/Makefile new file mode 100644 index 000000000000..8290e3020fef --- /dev/null +++ b/lib/Debugger/Makefile @@ -0,0 +1,16 @@ +##===- lib/Debugger/Makefile -------------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../.. +LIBRARYNAME = LLVMDebugger +EXTRA_DIST = README.txt +REQUIRES_EH := 1 +BUILD_ARCHIVE = 1 + +include $(LEVEL)/Makefile.common diff --git a/lib/Debugger/ProgramInfo.cpp b/lib/Debugger/ProgramInfo.cpp new file mode 100644 index 000000000000..125ff556dd50 --- /dev/null +++ b/lib/Debugger/ProgramInfo.cpp @@ -0,0 +1,377 @@ +//===-- ProgramInfo.cpp - Compute and cache info about a program ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the ProgramInfo and related classes, by sorting through +// the loaded Module. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Debugger/ProgramInfo.h" +#include "llvm/Constants.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Intrinsics.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Instructions.h" +#include "llvm/Module.h" +#include "llvm/Debugger/SourceFile.h" +#include "llvm/Debugger/SourceLanguage.h" +#include "llvm/Support/SlowOperationInformer.h" +#include "llvm/Support/Streams.h" +#include "llvm/ADT/STLExtras.h" +using namespace llvm; + +/// getGlobalVariablesUsing - Return all of the global variables which have the +/// specified value in their initializer somewhere. +static void getGlobalVariablesUsing(Value *V, + std::vector &Found) { + for (Value::use_iterator I = V->use_begin(), E = V->use_end(); I != E; ++I) { + if (GlobalVariable *GV = dyn_cast(*I)) + Found.push_back(GV); + else if (Constant *C = dyn_cast(*I)) + getGlobalVariablesUsing(C, Found); + } +} + +/// getNextStopPoint - Follow the def-use chains of the specified LLVM value, +/// traversing the use chains until we get to a stoppoint. When we do, return +/// the source location of the stoppoint. If we don't find a stoppoint, return +/// null. +static const GlobalVariable *getNextStopPoint(const Value *V, unsigned &LineNo, + unsigned &ColNo) { + // The use-def chains can fork. As such, we pick the lowest numbered one we + // find. + const GlobalVariable *LastDesc = 0; + unsigned LastLineNo = ~0; + unsigned LastColNo = ~0; + + for (Value::use_const_iterator UI = V->use_begin(), E = V->use_end(); + UI != E; ++UI) { + bool ShouldRecurse = true; + if (cast(*UI)->getOpcode() == Instruction::PHI) { + // Infinite loops == bad, ignore PHI nodes. + ShouldRecurse = false; + } else if (const CallInst *CI = dyn_cast(*UI)) { + + // If we found a stop point, check to see if it is earlier than what we + // already have. If so, remember it. + if (CI->getCalledFunction()) + if (const DbgStopPointInst *SPI = dyn_cast(CI)) { + unsigned CurLineNo = SPI->getLine(); + unsigned CurColNo = SPI->getColumn(); + const GlobalVariable *CurDesc = 0; + const Value *Op = SPI->getContext(); + + if ((CurDesc = dyn_cast(Op)) && + (LineNo < LastLineNo || + (LineNo == LastLineNo && ColNo < LastColNo))) { + LastDesc = CurDesc; + LastLineNo = CurLineNo; + LastColNo = CurColNo; + } + ShouldRecurse = false; + } + } + + // If this is not a phi node or a stopping point, recursively scan the users + // of this instruction to skip over region.begin's and the like. + if (ShouldRecurse) { + unsigned CurLineNo, CurColNo; + if (const GlobalVariable *GV = getNextStopPoint(*UI, CurLineNo,CurColNo)){ + if (LineNo < LastLineNo || (LineNo == LastLineNo && ColNo < LastColNo)){ + LastDesc = GV; + LastLineNo = CurLineNo; + LastColNo = CurColNo; + } + } + } + } + + if (LastDesc) { + LineNo = LastLineNo != ~0U ? LastLineNo : 0; + ColNo = LastColNo != ~0U ? LastColNo : 0; + } + return LastDesc; +} + + +//===----------------------------------------------------------------------===// +// SourceFileInfo implementation +// + +SourceFileInfo::SourceFileInfo(const GlobalVariable *Desc, + const SourceLanguage &Lang) + : Language(&Lang), Descriptor(Desc) { + Version = 0; + SourceText = 0; + + if (Desc && Desc->hasInitializer()) + if (ConstantStruct *CS = dyn_cast(Desc->getInitializer())) + if (CS->getNumOperands() > 4) { + if (ConstantInt *CUI = dyn_cast(CS->getOperand(1))) + Version = CUI->getZExtValue(); + + if (!GetConstantStringInfo(CS->getOperand(3), BaseName)) + BaseName = ""; + if (!GetConstantStringInfo(CS->getOperand(4), Directory)) + Directory = ""; + } +} + +SourceFileInfo::~SourceFileInfo() { + delete SourceText; +} + +SourceFile &SourceFileInfo::getSourceText() const { + // FIXME: this should take into account the source search directories! + if (SourceText == 0) { // Read the file in if we haven't already. + sys::Path tmpPath; + if (!Directory.empty()) + tmpPath.set(Directory); + tmpPath.appendComponent(BaseName); + if (tmpPath.canRead()) + SourceText = new SourceFile(tmpPath.toString(), Descriptor); + else + SourceText = new SourceFile(BaseName, Descriptor); + } + return *SourceText; +} + + +//===----------------------------------------------------------------------===// +// SourceFunctionInfo implementation +// +SourceFunctionInfo::SourceFunctionInfo(ProgramInfo &PI, + const GlobalVariable *Desc) + : Descriptor(Desc) { + LineNo = ColNo = 0; + if (Desc && Desc->hasInitializer()) + if (ConstantStruct *CS = dyn_cast(Desc->getInitializer())) + if (CS->getNumOperands() > 2) { + // Entry #1 is the file descriptor. + if (const GlobalVariable *GV = + dyn_cast(CS->getOperand(1))) + SourceFile = &PI.getSourceFile(GV); + + // Entry #2 is the function name. + if (!GetConstantStringInfo(CS->getOperand(2), Name)) + Name = ""; + } +} + +/// getSourceLocation - This method returns the location of the first stopping +/// point in the function. +void SourceFunctionInfo::getSourceLocation(unsigned &RetLineNo, + unsigned &RetColNo) const { + // If we haven't computed this yet... + if (!LineNo) { + // Look at all of the users of the function descriptor, looking for calls to + // %llvm.dbg.func.start. + for (Value::use_const_iterator UI = Descriptor->use_begin(), + E = Descriptor->use_end(); UI != E; ++UI) + if (const CallInst *CI = dyn_cast(*UI)) + if (const Function *F = CI->getCalledFunction()) + if (F->getIntrinsicID() == Intrinsic::dbg_func_start) { + // We found the start of the function. Check to see if there are + // any stop points on the use-list of the function start. + const GlobalVariable *SD = getNextStopPoint(CI, LineNo, ColNo); + if (SD) { // We found the first stop point! + // This is just a sanity check. + if (getSourceFile().getDescriptor() != SD) + cout << "WARNING: first line of function is not in the" + << " file that the function descriptor claims it is in.\n"; + break; + } + } + } + RetLineNo = LineNo; RetColNo = ColNo; +} + +//===----------------------------------------------------------------------===// +// ProgramInfo implementation +// + +ProgramInfo::ProgramInfo(Module *m) : M(m), ProgramTimeStamp(0,0) { + assert(M && "Cannot create program information with a null module!"); + sys::PathWithStatus ModPath(M->getModuleIdentifier()); + const sys::FileStatus *Stat = ModPath.getFileStatus(); + if (Stat) + ProgramTimeStamp = Stat->getTimestamp(); + + SourceFilesIsComplete = false; + SourceFunctionsIsComplete = false; +} + +ProgramInfo::~ProgramInfo() { + // Delete cached information about source program objects... + for (std::map::iterator + I = SourceFiles.begin(), E = SourceFiles.end(); I != E; ++I) + delete I->second; + for (std::map::iterator + I = SourceFunctions.begin(), E = SourceFunctions.end(); I != E; ++I) + delete I->second; + + // Delete the source language caches. + for (unsigned i = 0, e = LanguageCaches.size(); i != e; ++i) + delete LanguageCaches[i].second; +} + + +//===----------------------------------------------------------------------===// +// SourceFileInfo tracking... +// + +/// getSourceFile - Return source file information for the specified source file +/// descriptor object, adding it to the collection as needed. This method +/// always succeeds (is unambiguous), and is always efficient. +/// +const SourceFileInfo & +ProgramInfo::getSourceFile(const GlobalVariable *Desc) { + SourceFileInfo *&Result = SourceFiles[Desc]; + if (Result) return *Result; + + // Figure out what language this source file comes from... + unsigned LangID = 0; // Zero is unknown language + if (Desc && Desc->hasInitializer()) + if (ConstantStruct *CS = dyn_cast(Desc->getInitializer())) + if (CS->getNumOperands() > 2) + if (ConstantInt *CUI = dyn_cast(CS->getOperand(2))) + LangID = CUI->getZExtValue(); + + const SourceLanguage &Lang = SourceLanguage::get(LangID); + SourceFileInfo *New = Lang.createSourceFileInfo(Desc, *this); + + // FIXME: this should check to see if there is already a Filename/WorkingDir + // pair that matches this one. If so, we shouldn't create the duplicate! + // + SourceFileIndex.insert(std::make_pair(New->getBaseName(), New)); + return *(Result = New); +} + + +/// getSourceFiles - Index all of the source files in the program and return +/// a mapping of it. This information is lazily computed the first time +/// that it is requested. Since this information can take a long time to +/// compute, the user is given a chance to cancel it. If this occurs, an +/// exception is thrown. +const std::map & +ProgramInfo::getSourceFiles(bool RequiresCompleteMap) { + // If we have a fully populated map, or if the client doesn't need one, just + // return what we have. + if (SourceFilesIsComplete || !RequiresCompleteMap) + return SourceFiles; + + // Ok, all of the source file descriptors (compile_unit in dwarf terms), + // should be on the use list of the llvm.dbg.translation_units global. + // + GlobalVariable *Units = + M->getGlobalVariable("llvm.dbg.translation_units", + StructType::get(std::vector())); + if (Units == 0) + throw "Program contains no debugging information!"; + + std::vector TranslationUnits; + getGlobalVariablesUsing(Units, TranslationUnits); + + SlowOperationInformer SOI("building source files index"); + + // Loop over all of the translation units found, building the SourceFiles + // mapping. + for (unsigned i = 0, e = TranslationUnits.size(); i != e; ++i) { + getSourceFile(TranslationUnits[i]); + if (SOI.progress(i+1, e)) + throw "While building source files index, operation cancelled."; + } + + // Ok, if we got this far, then we indexed the whole program. + SourceFilesIsComplete = true; + return SourceFiles; +} + +/// getSourceFile - Look up the file with the specified name. If there is +/// more than one match for the specified filename, prompt the user to pick +/// one. If there is no source file that matches the specified name, throw +/// an exception indicating that we can't find the file. Otherwise, return +/// the file information for that file. +const SourceFileInfo &ProgramInfo::getSourceFile(const std::string &Filename) { + std::multimap::const_iterator Start, End; + getSourceFiles(); + tie(Start, End) = SourceFileIndex.equal_range(Filename); + + if (Start == End) throw "Could not find source file '" + Filename + "'!"; + const SourceFileInfo &SFI = *Start->second; + ++Start; + if (Start == End) return SFI; + + throw "FIXME: Multiple source files with the same name not implemented!"; +} + + +//===----------------------------------------------------------------------===// +// SourceFunctionInfo tracking... +// + + +/// getFunction - Return function information for the specified function +/// descriptor object, adding it to the collection as needed. This method +/// always succeeds (is unambiguous), and is always efficient. +/// +const SourceFunctionInfo & +ProgramInfo::getFunction(const GlobalVariable *Desc) { + SourceFunctionInfo *&Result = SourceFunctions[Desc]; + if (Result) return *Result; + + // Figure out what language this function comes from... + const GlobalVariable *SourceFileDesc = 0; + if (Desc && Desc->hasInitializer()) + if (ConstantStruct *CS = dyn_cast(Desc->getInitializer())) + if (CS->getNumOperands() > 0) + if (const GlobalVariable *GV = + dyn_cast(CS->getOperand(1))) + SourceFileDesc = GV; + + const SourceLanguage &Lang = getSourceFile(SourceFileDesc).getLanguage(); + return *(Result = Lang.createSourceFunctionInfo(Desc, *this)); +} + + +// getSourceFunctions - Index all of the functions in the program and return +// them. This information is lazily computed the first time that it is +// requested. Since this information can take a long time to compute, the user +// is given a chance to cancel it. If this occurs, an exception is thrown. +const std::map & +ProgramInfo::getSourceFunctions(bool RequiresCompleteMap) { + if (SourceFunctionsIsComplete || !RequiresCompleteMap) + return SourceFunctions; + + // Ok, all of the source function descriptors (subprogram in dwarf terms), + // should be on the use list of the llvm.dbg.translation_units global. + // + GlobalVariable *Units = + M->getGlobalVariable("llvm.dbg.globals", + StructType::get(std::vector())); + if (Units == 0) + throw "Program contains no debugging information!"; + + std::vector Functions; + getGlobalVariablesUsing(Units, Functions); + + SlowOperationInformer SOI("building functions index"); + + // Loop over all of the functions found, building the SourceFunctions mapping. + for (unsigned i = 0, e = Functions.size(); i != e; ++i) { + getFunction(Functions[i]); + if (SOI.progress(i+1, e)) + throw "While functions index, operation cancelled."; + } + + // Ok, if we got this far, then we indexed the whole program. + SourceFunctionsIsComplete = true; + return SourceFunctions; +} diff --git a/lib/Debugger/README.txt b/lib/Debugger/README.txt new file mode 100644 index 000000000000..89935c5a631a --- /dev/null +++ b/lib/Debugger/README.txt @@ -0,0 +1,7 @@ +//===-- llvm/lib/Debugger/ - LLVM Debugger interfaces ---------------------===// + +This directory contains the implementation of the LLVM debugger backend. This +directory builds into a library which can be used by various debugger +front-ends to debug LLVM programs. The current command line LLVM debugger, +llvm-db is currently the only client of this library, but others could be +built, to provide a GUI front-end for example. diff --git a/lib/Debugger/RuntimeInfo.cpp b/lib/Debugger/RuntimeInfo.cpp new file mode 100644 index 000000000000..2f0ff72a771e --- /dev/null +++ b/lib/Debugger/RuntimeInfo.cpp @@ -0,0 +1,69 @@ +//===-- RuntimeInfo.cpp - Compute and cache info about running program ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the RuntimeInfo and related classes, by querying and +// cachine information from the running inferior process. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Debugger/InferiorProcess.h" +#include "llvm/Debugger/ProgramInfo.h" +#include "llvm/Debugger/RuntimeInfo.h" +using namespace llvm; + +//===----------------------------------------------------------------------===// +// StackFrame class implementation + +StackFrame::StackFrame(RuntimeInfo &ri, void *ParentFrameID) + : RI(ri), SourceInfo(0) { + FrameID = RI.getInferiorProcess().getPreviousFrame(ParentFrameID); + if (FrameID == 0) throw "Stack frame does not exist!"; + + // Compute lazily as needed. + FunctionDesc = 0; +} + +const GlobalVariable *StackFrame::getFunctionDesc() { + if (FunctionDesc == 0) + FunctionDesc = RI.getInferiorProcess().getSubprogramDesc(FrameID); + return FunctionDesc; +} + +/// getSourceLocation - Return the source location that this stack frame is +/// sitting at. +void StackFrame::getSourceLocation(unsigned &lineNo, unsigned &colNo, + const SourceFileInfo *&sourceInfo) { + if (SourceInfo == 0) { + const GlobalVariable *SourceDesc = 0; + RI.getInferiorProcess().getFrameLocation(FrameID, LineNo,ColNo, SourceDesc); + SourceInfo = &RI.getProgramInfo().getSourceFile(SourceDesc); + } + + lineNo = LineNo; + colNo = ColNo; + sourceInfo = SourceInfo; +} + +//===----------------------------------------------------------------------===// +// RuntimeInfo class implementation + +/// materializeFrame - Create and process all frames up to and including the +/// specified frame number. This throws an exception if the specified frame +/// ID is nonexistant. +void RuntimeInfo::materializeFrame(unsigned ID) { + assert(ID >= CallStack.size() && "no need to materialize this frame!"); + void *CurFrame = 0; + if (!CallStack.empty()) + CurFrame = CallStack.back().getFrameID(); + + while (CallStack.size() <= ID) { + CallStack.push_back(StackFrame(*this, CurFrame)); + CurFrame = CallStack.back().getFrameID(); + } +} diff --git a/lib/Debugger/SourceFile.cpp b/lib/Debugger/SourceFile.cpp new file mode 100644 index 000000000000..03c60f87f785 --- /dev/null +++ b/lib/Debugger/SourceFile.cpp @@ -0,0 +1,82 @@ +//===-- SourceFile.cpp - SourceFile implementation for the debugger -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the SourceFile class for the LLVM debugger. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Debugger/SourceFile.h" +#include "llvm/Support/MemoryBuffer.h" +#include +using namespace llvm; + +static const char EmptyFile = 0; + +SourceFile::SourceFile(const std::string &fn, const GlobalVariable *Desc) + : Filename(fn), Descriptor(Desc) { + File.reset(MemoryBuffer::getFileOrSTDIN(fn)); + + // On error, return an empty buffer. + if (File == 0) + File.reset(MemoryBuffer::getMemBuffer(&EmptyFile, &EmptyFile)); +} + +SourceFile::~SourceFile() { +} + + +/// calculateLineOffsets - Compute the LineOffset vector for the current file. +/// +void SourceFile::calculateLineOffsets() const { + assert(LineOffset.empty() && "Line offsets already computed!"); + const char *BufPtr = File->getBufferStart(); + const char *FileStart = BufPtr; + const char *FileEnd = File->getBufferEnd(); + do { + LineOffset.push_back(BufPtr-FileStart); + + // Scan until we get to a newline. + while (BufPtr != FileEnd && *BufPtr != '\n' && *BufPtr != '\r') + ++BufPtr; + + if (BufPtr != FileEnd) { + ++BufPtr; // Skip over the \n or \r + if (BufPtr[-1] == '\r' && BufPtr != FileEnd && BufPtr[0] == '\n') + ++BufPtr; // Skip over dos/windows style \r\n's + } + } while (BufPtr != FileEnd); +} + + +/// getSourceLine - Given a line number, return the start and end of the line +/// in the file. If the line number is invalid, or if the file could not be +/// loaded, null pointers are returned for the start and end of the file. Note +/// that line numbers start with 0, not 1. +void SourceFile::getSourceLine(unsigned LineNo, const char *&LineStart, + const char *&LineEnd) const { + LineStart = LineEnd = 0; + if (LineOffset.empty()) calculateLineOffsets(); + + // Asking for an out-of-range line number? + if (LineNo >= LineOffset.size()) return; + + // Otherwise, they are asking for a valid line, which we can fulfill. + LineStart = File->getBufferStart()+LineOffset[LineNo]; + + if (LineNo+1 < LineOffset.size()) + LineEnd = File->getBufferStart()+LineOffset[LineNo+1]; + else + LineEnd = File->getBufferEnd(); + + // If the line ended with a newline, strip it off. + while (LineEnd != LineStart && (LineEnd[-1] == '\n' || LineEnd[-1] == '\r')) + --LineEnd; + + assert(LineEnd >= LineStart && "We somehow got our pointers swizzled!"); +} diff --git a/lib/Debugger/SourceLanguage-CFamily.cpp b/lib/Debugger/SourceLanguage-CFamily.cpp new file mode 100644 index 000000000000..f329db495f73 --- /dev/null +++ b/lib/Debugger/SourceLanguage-CFamily.cpp @@ -0,0 +1,28 @@ +//===-- SourceLanguage-CFamily.cpp - C family SourceLanguage impl ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the SourceLanguage class for the C family of languages +// (K&R C, C89, C99, etc). +// +//===----------------------------------------------------------------------===// + +#include "llvm/Debugger/SourceLanguage.h" +using namespace llvm; + +#if 0 +namespace { + struct CSL : public SourceLanguage { + } TheCSourceLanguageInstance; +} +#endif + +const SourceLanguage &SourceLanguage::getCFamilyInstance() { + return get(0); // We don't have an implementation for C yet fall back on + // generic +} diff --git a/lib/Debugger/SourceLanguage-CPlusPlus.cpp b/lib/Debugger/SourceLanguage-CPlusPlus.cpp new file mode 100644 index 000000000000..ce94ff4a4cb2 --- /dev/null +++ b/lib/Debugger/SourceLanguage-CPlusPlus.cpp @@ -0,0 +1,27 @@ +//===-- SourceLanguage-CPlusPlus.cpp - C++ SourceLanguage impl ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the SourceLanguage class for the C++ language. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Debugger/SourceLanguage.h" +using namespace llvm; + +#if 0 +namespace { + struct CPPSL : public SourceLanguage { + } TheCPlusPlusLanguageInstance; +} +#endif + +const SourceLanguage &SourceLanguage::getCPlusPlusInstance() { + return get(0); // We don't have an implementation for C yet fall back on + // generic +} diff --git a/lib/Debugger/SourceLanguage-Unknown.cpp b/lib/Debugger/SourceLanguage-Unknown.cpp new file mode 100644 index 000000000000..b806fc779ef7 --- /dev/null +++ b/lib/Debugger/SourceLanguage-Unknown.cpp @@ -0,0 +1,138 @@ +//===-- SourceLanguage-Unknown.cpp - Implement itf for unknown languages --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// If the LLVM debugger does not have a module for a particular language, it +// falls back on using this one to perform the source-language interface. This +// interface is not wonderful, but it gets the job done. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Debugger/SourceLanguage.h" +#include "llvm/Debugger/ProgramInfo.h" +#include "llvm/Support/Streams.h" +#include +#include +using namespace llvm; + +//===----------------------------------------------------------------------===// +// Implement the SourceLanguage cache for the Unknown language. +// + +namespace { + /// SLUCache - This cache allows for efficient lookup of source functions by + /// name. + /// + struct SLUCache : public SourceLanguageCache { + ProgramInfo &PI; + std::multimap FunctionMap; + public: + SLUCache(ProgramInfo &pi); + + typedef std::multimap::const_iterator + fm_iterator; + + std::pair + getFunction(const std::string &Name) const { + return FunctionMap.equal_range(Name); + } + + SourceFunctionInfo *addSourceFunction(SourceFunctionInfo *SF) { + FunctionMap.insert(std::make_pair(SF->getSymbolicName(), SF)); + return SF; + } + }; +} + +SLUCache::SLUCache(ProgramInfo &pi) : PI(pi) { +} + + +//===----------------------------------------------------------------------===// +// Implement SourceLanguageUnknown class, which is used to handle unrecognized +// languages. +// + +namespace { + static struct SLU : public SourceLanguage { + //===------------------------------------------------------------------===// + // Implement the miscellaneous methods... + // + virtual const char *getSourceLanguageName() const { + return "unknown"; + } + + /// lookupFunction - Given a textual function name, return the + /// SourceFunctionInfo descriptor for that function, or null if it cannot be + /// found. If the program is currently running, the RuntimeInfo object + /// provides information about the current evaluation context, otherwise it + /// will be null. + /// + virtual SourceFunctionInfo *lookupFunction(const std::string &FunctionName, + ProgramInfo &PI, + RuntimeInfo *RI = 0) const; + + //===------------------------------------------------------------------===// + // We do use a cache for information... + // + typedef SLUCache CacheType; + SLUCache *createSourceLanguageCache(ProgramInfo &PI) const { + return new SLUCache(PI); + } + + /// createSourceFunctionInfo - Create the new object and inform the cache of + /// the new function. + virtual SourceFunctionInfo * + createSourceFunctionInfo(const GlobalVariable *Desc, ProgramInfo &PI) const; + + } TheUnknownSourceLanguageInstance; +} + +const SourceLanguage &SourceLanguage::getUnknownLanguageInstance() { + return TheUnknownSourceLanguageInstance; +} + + +SourceFunctionInfo * +SLU::createSourceFunctionInfo(const GlobalVariable *Desc, + ProgramInfo &PI) const { + SourceFunctionInfo *Result = new SourceFunctionInfo(PI, Desc); + return PI.getLanguageCache(this).addSourceFunction(Result); +} + + +/// lookupFunction - Given a textual function name, return the +/// SourceFunctionInfo descriptor for that function, or null if it cannot be +/// found. If the program is currently running, the RuntimeInfo object +/// provides information about the current evaluation context, otherwise it will +/// be null. +/// +SourceFunctionInfo *SLU::lookupFunction(const std::string &FunctionName, + ProgramInfo &PI, RuntimeInfo *RI) const{ + SLUCache &Cache = PI.getLanguageCache(this); + std::pair IP + = Cache.getFunction(FunctionName); + + if (IP.first == IP.second) { + if (PI.allSourceFunctionsRead()) + return 0; // Nothing found + + // Otherwise, we might be able to find the function if we read all of them + // in. Do so now. + PI.getSourceFunctions(); + assert(PI.allSourceFunctionsRead() && "Didn't read in all functions?"); + return lookupFunction(FunctionName, PI, RI); + } + + SourceFunctionInfo *Found = IP.first->second; + ++IP.first; + if (IP.first != IP.second) + cout << "Whoa, found multiple functions with the same name. I should" + << " ask the user which one to use: FIXME!\n"; + return Found; +} diff --git a/lib/Debugger/SourceLanguage.cpp b/lib/Debugger/SourceLanguage.cpp new file mode 100644 index 000000000000..4fcc38bd3344 --- /dev/null +++ b/lib/Debugger/SourceLanguage.cpp @@ -0,0 +1,54 @@ +//===-- SourceLanguage.cpp - Implement the SourceLanguage class -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the SourceLanguage class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Debugger/SourceLanguage.h" +#include "llvm/Debugger/ProgramInfo.h" +using namespace llvm; + +const SourceLanguage &SourceLanguage::get(unsigned ID) { + switch (ID) { + case 1: // DW_LANG_C89 + case 2: // DW_LANG_C + case 12: // DW_LANG_C99 + return getCFamilyInstance(); + + case 4: // DW_LANG_C_plus_plus + return getCPlusPlusInstance(); + + case 3: // DW_LANG_Ada83 + case 5: // DW_LANG_Cobol74 + case 6: // DW_LANG_Cobol85 + case 7: // DW_LANG_Fortran77 + case 8: // DW_LANG_Fortran90 + case 9: // DW_LANG_Pascal83 + case 10: // DW_LANG_Modula2 + case 11: // DW_LANG_Java + case 13: // DW_LANG_Ada95 + case 14: // DW_LANG_Fortran95 + default: + return getUnknownLanguageInstance(); + } +} + + +SourceFileInfo * +SourceLanguage::createSourceFileInfo(const GlobalVariable *Desc, + ProgramInfo &PI) const { + return new SourceFileInfo(Desc, *this); +} + +SourceFunctionInfo * +SourceLanguage::createSourceFunctionInfo(const GlobalVariable *Desc, + ProgramInfo &PI) const { + return new SourceFunctionInfo(PI, Desc); +} diff --git a/lib/ExecutionEngine/CMakeLists.txt b/lib/ExecutionEngine/CMakeLists.txt new file mode 100644 index 000000000000..e26b98fb9d43 --- /dev/null +++ b/lib/ExecutionEngine/CMakeLists.txt @@ -0,0 +1,4 @@ +add_partially_linked_object(LLVMExecutionEngine + ExecutionEngine.cpp + ExecutionEngineBindings.cpp + ) diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp new file mode 100644 index 000000000000..29a05bbbdb64 --- /dev/null +++ b/lib/ExecutionEngine/ExecutionEngine.cpp @@ -0,0 +1,1010 @@ +//===-- ExecutionEngine.cpp - Common Implementation shared by EEs ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the common interface used by the various execution engine +// subclasses. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "jit" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Module.h" +#include "llvm/ModuleProvider.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Config/alloca.h" +#include "llvm/ExecutionEngine/ExecutionEngine.h" +#include "llvm/ExecutionEngine/GenericValue.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MutexGuard.h" +#include "llvm/System/DynamicLibrary.h" +#include "llvm/System/Host.h" +#include "llvm/Target/TargetData.h" +#include +#include +using namespace llvm; + +STATISTIC(NumInitBytes, "Number of bytes of global vars initialized"); +STATISTIC(NumGlobals , "Number of global vars initialized"); + +ExecutionEngine::EECtorFn ExecutionEngine::JITCtor = 0; +ExecutionEngine::EECtorFn ExecutionEngine::InterpCtor = 0; +ExecutionEngine::EERegisterFn ExecutionEngine::ExceptionTableRegister = 0; + + +ExecutionEngine::ExecutionEngine(ModuleProvider *P) : LazyFunctionCreator(0) { + LazyCompilationDisabled = false; + GVCompilationDisabled = false; + SymbolSearchingDisabled = false; + DlsymStubsEnabled = false; + Modules.push_back(P); + assert(P && "ModuleProvider is null?"); +} + +ExecutionEngine::~ExecutionEngine() { + clearAllGlobalMappings(); + for (unsigned i = 0, e = Modules.size(); i != e; ++i) + delete Modules[i]; +} + +char* ExecutionEngine::getMemoryForGV(const GlobalVariable* GV) { + const Type *ElTy = GV->getType()->getElementType(); + size_t GVSize = (size_t)getTargetData()->getTypeAllocSize(ElTy); + return new char[GVSize]; +} + +/// removeModuleProvider - Remove a ModuleProvider from the list of modules. +/// Relases the Module from the ModuleProvider, materializing it in the +/// process, and returns the materialized Module. +Module* ExecutionEngine::removeModuleProvider(ModuleProvider *P, + std::string *ErrInfo) { + for(SmallVector::iterator I = Modules.begin(), + E = Modules.end(); I != E; ++I) { + ModuleProvider *MP = *I; + if (MP == P) { + Modules.erase(I); + clearGlobalMappingsFromModule(MP->getModule()); + return MP->releaseModule(ErrInfo); + } + } + return NULL; +} + +/// deleteModuleProvider - Remove a ModuleProvider from the list of modules, +/// and deletes the ModuleProvider and owned Module. Avoids materializing +/// the underlying module. +void ExecutionEngine::deleteModuleProvider(ModuleProvider *P, + std::string *ErrInfo) { + for(SmallVector::iterator I = Modules.begin(), + E = Modules.end(); I != E; ++I) { + ModuleProvider *MP = *I; + if (MP == P) { + Modules.erase(I); + clearGlobalMappingsFromModule(MP->getModule()); + delete MP; + return; + } + } +} + +/// FindFunctionNamed - Search all of the active modules to find the one that +/// defines FnName. This is very slow operation and shouldn't be used for +/// general code. +Function *ExecutionEngine::FindFunctionNamed(const char *FnName) { + for (unsigned i = 0, e = Modules.size(); i != e; ++i) { + if (Function *F = Modules[i]->getModule()->getFunction(FnName)) + return F; + } + return 0; +} + + +/// addGlobalMapping - Tell the execution engine that the specified global is +/// at the specified location. This is used internally as functions are JIT'd +/// and as global variables are laid out in memory. It can and should also be +/// used by clients of the EE that want to have an LLVM global overlay +/// existing data in memory. +void ExecutionEngine::addGlobalMapping(const GlobalValue *GV, void *Addr) { + MutexGuard locked(lock); + + DOUT << "JIT: Map \'" << GV->getNameStart() << "\' to [" << Addr << "]\n"; + void *&CurVal = state.getGlobalAddressMap(locked)[GV]; + assert((CurVal == 0 || Addr == 0) && "GlobalMapping already established!"); + CurVal = Addr; + + // If we are using the reverse mapping, add it too + if (!state.getGlobalAddressReverseMap(locked).empty()) { + const GlobalValue *&V = state.getGlobalAddressReverseMap(locked)[Addr]; + assert((V == 0 || GV == 0) && "GlobalMapping already established!"); + V = GV; + } +} + +/// clearAllGlobalMappings - Clear all global mappings and start over again +/// use in dynamic compilation scenarios when you want to move globals +void ExecutionEngine::clearAllGlobalMappings() { + MutexGuard locked(lock); + + state.getGlobalAddressMap(locked).clear(); + state.getGlobalAddressReverseMap(locked).clear(); +} + +/// clearGlobalMappingsFromModule - Clear all global mappings that came from a +/// particular module, because it has been removed from the JIT. +void ExecutionEngine::clearGlobalMappingsFromModule(Module *M) { + MutexGuard locked(lock); + + for (Module::iterator FI = M->begin(), FE = M->end(); FI != FE; ++FI) { + state.getGlobalAddressMap(locked).erase(FI); + state.getGlobalAddressReverseMap(locked).erase(FI); + } + for (Module::global_iterator GI = M->global_begin(), GE = M->global_end(); + GI != GE; ++GI) { + state.getGlobalAddressMap(locked).erase(GI); + state.getGlobalAddressReverseMap(locked).erase(GI); + } +} + +/// updateGlobalMapping - Replace an existing mapping for GV with a new +/// address. This updates both maps as required. If "Addr" is null, the +/// entry for the global is removed from the mappings. +void *ExecutionEngine::updateGlobalMapping(const GlobalValue *GV, void *Addr) { + MutexGuard locked(lock); + + std::map &Map = state.getGlobalAddressMap(locked); + + // Deleting from the mapping? + if (Addr == 0) { + std::map::iterator I = Map.find(GV); + void *OldVal; + if (I == Map.end()) + OldVal = 0; + else { + OldVal = I->second; + Map.erase(I); + } + + if (!state.getGlobalAddressReverseMap(locked).empty()) + state.getGlobalAddressReverseMap(locked).erase(Addr); + return OldVal; + } + + void *&CurVal = Map[GV]; + void *OldVal = CurVal; + + if (CurVal && !state.getGlobalAddressReverseMap(locked).empty()) + state.getGlobalAddressReverseMap(locked).erase(CurVal); + CurVal = Addr; + + // If we are using the reverse mapping, add it too + if (!state.getGlobalAddressReverseMap(locked).empty()) { + const GlobalValue *&V = state.getGlobalAddressReverseMap(locked)[Addr]; + assert((V == 0 || GV == 0) && "GlobalMapping already established!"); + V = GV; + } + return OldVal; +} + +/// getPointerToGlobalIfAvailable - This returns the address of the specified +/// global value if it is has already been codegen'd, otherwise it returns null. +/// +void *ExecutionEngine::getPointerToGlobalIfAvailable(const GlobalValue *GV) { + MutexGuard locked(lock); + + std::map::iterator I = + state.getGlobalAddressMap(locked).find(GV); + return I != state.getGlobalAddressMap(locked).end() ? I->second : 0; +} + +/// getGlobalValueAtAddress - Return the LLVM global value object that starts +/// at the specified address. +/// +const GlobalValue *ExecutionEngine::getGlobalValueAtAddress(void *Addr) { + MutexGuard locked(lock); + + // If we haven't computed the reverse mapping yet, do so first. + if (state.getGlobalAddressReverseMap(locked).empty()) { + for (std::map::iterator + I = state.getGlobalAddressMap(locked).begin(), + E = state.getGlobalAddressMap(locked).end(); I != E; ++I) + state.getGlobalAddressReverseMap(locked).insert(std::make_pair(I->second, + I->first)); + } + + std::map::iterator I = + state.getGlobalAddressReverseMap(locked).find(Addr); + return I != state.getGlobalAddressReverseMap(locked).end() ? I->second : 0; +} + +// CreateArgv - Turn a vector of strings into a nice argv style array of +// pointers to null terminated strings. +// +static void *CreateArgv(ExecutionEngine *EE, + const std::vector &InputArgv) { + unsigned PtrSize = EE->getTargetData()->getPointerSize(); + char *Result = new char[(InputArgv.size()+1)*PtrSize]; + + DOUT << "JIT: ARGV = " << (void*)Result << "\n"; + const Type *SBytePtr = PointerType::getUnqual(Type::Int8Ty); + + for (unsigned i = 0; i != InputArgv.size(); ++i) { + unsigned Size = InputArgv[i].size()+1; + char *Dest = new char[Size]; + DOUT << "JIT: ARGV[" << i << "] = " << (void*)Dest << "\n"; + + std::copy(InputArgv[i].begin(), InputArgv[i].end(), Dest); + Dest[Size-1] = 0; + + // Endian safe: Result[i] = (PointerTy)Dest; + EE->StoreValueToMemory(PTOGV(Dest), (GenericValue*)(Result+i*PtrSize), + SBytePtr); + } + + // Null terminate it + EE->StoreValueToMemory(PTOGV(0), + (GenericValue*)(Result+InputArgv.size()*PtrSize), + SBytePtr); + return Result; +} + + +/// runStaticConstructorsDestructors - This method is used to execute all of +/// the static constructors or destructors for a module, depending on the +/// value of isDtors. +void ExecutionEngine::runStaticConstructorsDestructors(Module *module, bool isDtors) { + const char *Name = isDtors ? "llvm.global_dtors" : "llvm.global_ctors"; + + // Execute global ctors/dtors for each module in the program. + + GlobalVariable *GV = module->getNamedGlobal(Name); + + // If this global has internal linkage, or if it has a use, then it must be + // an old-style (llvmgcc3) static ctor with __main linked in and in use. If + // this is the case, don't execute any of the global ctors, __main will do + // it. + if (!GV || GV->isDeclaration() || GV->hasLocalLinkage()) return; + + // Should be an array of '{ int, void ()* }' structs. The first value is + // the init priority, which we ignore. + ConstantArray *InitList = dyn_cast(GV->getInitializer()); + if (!InitList) return; + for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) + if (ConstantStruct *CS = + dyn_cast(InitList->getOperand(i))) { + if (CS->getNumOperands() != 2) return; // Not array of 2-element structs. + + Constant *FP = CS->getOperand(1); + if (FP->isNullValue()) + break; // Found a null terminator, exit. + + if (ConstantExpr *CE = dyn_cast(FP)) + if (CE->isCast()) + FP = CE->getOperand(0); + if (Function *F = dyn_cast(FP)) { + // Execute the ctor/dtor function! + runFunction(F, std::vector()); + } + } +} + +/// runStaticConstructorsDestructors - This method is used to execute all of +/// the static constructors or destructors for a program, depending on the +/// value of isDtors. +void ExecutionEngine::runStaticConstructorsDestructors(bool isDtors) { + // Execute global ctors/dtors for each module in the program. + for (unsigned m = 0, e = Modules.size(); m != e; ++m) + runStaticConstructorsDestructors(Modules[m]->getModule(), isDtors); +} + +#ifndef NDEBUG +/// isTargetNullPtr - Return whether the target pointer stored at Loc is null. +static bool isTargetNullPtr(ExecutionEngine *EE, void *Loc) { + unsigned PtrSize = EE->getTargetData()->getPointerSize(); + for (unsigned i = 0; i < PtrSize; ++i) + if (*(i + (uint8_t*)Loc)) + return false; + return true; +} +#endif + +/// runFunctionAsMain - This is a helper function which wraps runFunction to +/// handle the common task of starting up main with the specified argc, argv, +/// and envp parameters. +int ExecutionEngine::runFunctionAsMain(Function *Fn, + const std::vector &argv, + const char * const * envp) { + std::vector GVArgs; + GenericValue GVArgc; + GVArgc.IntVal = APInt(32, argv.size()); + + // Check main() type + unsigned NumArgs = Fn->getFunctionType()->getNumParams(); + const FunctionType *FTy = Fn->getFunctionType(); + const Type* PPInt8Ty = + PointerType::getUnqual(PointerType::getUnqual(Type::Int8Ty)); + switch (NumArgs) { + case 3: + if (FTy->getParamType(2) != PPInt8Ty) { + cerr << "Invalid type for third argument of main() supplied\n"; + abort(); + } + // FALLS THROUGH + case 2: + if (FTy->getParamType(1) != PPInt8Ty) { + cerr << "Invalid type for second argument of main() supplied\n"; + abort(); + } + // FALLS THROUGH + case 1: + if (FTy->getParamType(0) != Type::Int32Ty) { + cerr << "Invalid type for first argument of main() supplied\n"; + abort(); + } + // FALLS THROUGH + case 0: + if (!isa(FTy->getReturnType()) && + FTy->getReturnType() != Type::VoidTy) { + cerr << "Invalid return type of main() supplied\n"; + abort(); + } + break; + default: + cerr << "Invalid number of arguments of main() supplied\n"; + abort(); + } + + if (NumArgs) { + GVArgs.push_back(GVArgc); // Arg #0 = argc. + if (NumArgs > 1) { + GVArgs.push_back(PTOGV(CreateArgv(this, argv))); // Arg #1 = argv. + assert(!isTargetNullPtr(this, GVTOP(GVArgs[1])) && + "argv[0] was null after CreateArgv"); + if (NumArgs > 2) { + std::vector EnvVars; + for (unsigned i = 0; envp[i]; ++i) + EnvVars.push_back(envp[i]); + GVArgs.push_back(PTOGV(CreateArgv(this, EnvVars))); // Arg #2 = envp. + } + } + } + return runFunction(Fn, GVArgs).IntVal.getZExtValue(); +} + +/// If possible, create a JIT, unless the caller specifically requests an +/// Interpreter or there's an error. If even an Interpreter cannot be created, +/// NULL is returned. +/// +ExecutionEngine *ExecutionEngine::create(ModuleProvider *MP, + bool ForceInterpreter, + std::string *ErrorStr, + CodeGenOpt::Level OptLevel) { + ExecutionEngine *EE = 0; + + // Make sure we can resolve symbols in the program as well. The zero arg + // to the function tells DynamicLibrary to load the program, not a library. + if (sys::DynamicLibrary::LoadLibraryPermanently(0, ErrorStr)) + return 0; + + // Unless the interpreter was explicitly selected, try making a JIT. + if (!ForceInterpreter && JITCtor) + EE = JITCtor(MP, ErrorStr, OptLevel); + + // If we can't make a JIT, make an interpreter instead. + if (EE == 0 && InterpCtor) + EE = InterpCtor(MP, ErrorStr, OptLevel); + + return EE; +} + +ExecutionEngine *ExecutionEngine::create(Module *M) { + return create(new ExistingModuleProvider(M)); +} + +/// getPointerToGlobal - This returns the address of the specified global +/// value. This may involve code generation if it's a function. +/// +void *ExecutionEngine::getPointerToGlobal(const GlobalValue *GV) { + if (Function *F = const_cast(dyn_cast(GV))) + return getPointerToFunction(F); + + MutexGuard locked(lock); + void *p = state.getGlobalAddressMap(locked)[GV]; + if (p) + return p; + + // Global variable might have been added since interpreter started. + if (GlobalVariable *GVar = + const_cast(dyn_cast(GV))) + EmitGlobalVariable(GVar); + else + assert(0 && "Global hasn't had an address allocated yet!"); + return state.getGlobalAddressMap(locked)[GV]; +} + +/// This function converts a Constant* into a GenericValue. The interesting +/// part is if C is a ConstantExpr. +/// @brief Get a GenericValue for a Constant* +GenericValue ExecutionEngine::getConstantValue(const Constant *C) { + // If its undefined, return the garbage. + if (isa(C)) + return GenericValue(); + + // If the value is a ConstantExpr + if (const ConstantExpr *CE = dyn_cast(C)) { + Constant *Op0 = CE->getOperand(0); + switch (CE->getOpcode()) { + case Instruction::GetElementPtr: { + // Compute the index + GenericValue Result = getConstantValue(Op0); + SmallVector Indices(CE->op_begin()+1, CE->op_end()); + uint64_t Offset = + TD->getIndexedOffset(Op0->getType(), &Indices[0], Indices.size()); + + char* tmp = (char*) Result.PointerVal; + Result = PTOGV(tmp + Offset); + return Result; + } + case Instruction::Trunc: { + GenericValue GV = getConstantValue(Op0); + uint32_t BitWidth = cast(CE->getType())->getBitWidth(); + GV.IntVal = GV.IntVal.trunc(BitWidth); + return GV; + } + case Instruction::ZExt: { + GenericValue GV = getConstantValue(Op0); + uint32_t BitWidth = cast(CE->getType())->getBitWidth(); + GV.IntVal = GV.IntVal.zext(BitWidth); + return GV; + } + case Instruction::SExt: { + GenericValue GV = getConstantValue(Op0); + uint32_t BitWidth = cast(CE->getType())->getBitWidth(); + GV.IntVal = GV.IntVal.sext(BitWidth); + return GV; + } + case Instruction::FPTrunc: { + // FIXME long double + GenericValue GV = getConstantValue(Op0); + GV.FloatVal = float(GV.DoubleVal); + return GV; + } + case Instruction::FPExt:{ + // FIXME long double + GenericValue GV = getConstantValue(Op0); + GV.DoubleVal = double(GV.FloatVal); + return GV; + } + case Instruction::UIToFP: { + GenericValue GV = getConstantValue(Op0); + if (CE->getType() == Type::FloatTy) + GV.FloatVal = float(GV.IntVal.roundToDouble()); + else if (CE->getType() == Type::DoubleTy) + GV.DoubleVal = GV.IntVal.roundToDouble(); + else if (CE->getType() == Type::X86_FP80Ty) { + const uint64_t zero[] = {0, 0}; + APFloat apf = APFloat(APInt(80, 2, zero)); + (void)apf.convertFromAPInt(GV.IntVal, + false, + APFloat::rmNearestTiesToEven); + GV.IntVal = apf.bitcastToAPInt(); + } + return GV; + } + case Instruction::SIToFP: { + GenericValue GV = getConstantValue(Op0); + if (CE->getType() == Type::FloatTy) + GV.FloatVal = float(GV.IntVal.signedRoundToDouble()); + else if (CE->getType() == Type::DoubleTy) + GV.DoubleVal = GV.IntVal.signedRoundToDouble(); + else if (CE->getType() == Type::X86_FP80Ty) { + const uint64_t zero[] = { 0, 0}; + APFloat apf = APFloat(APInt(80, 2, zero)); + (void)apf.convertFromAPInt(GV.IntVal, + true, + APFloat::rmNearestTiesToEven); + GV.IntVal = apf.bitcastToAPInt(); + } + return GV; + } + case Instruction::FPToUI: // double->APInt conversion handles sign + case Instruction::FPToSI: { + GenericValue GV = getConstantValue(Op0); + uint32_t BitWidth = cast(CE->getType())->getBitWidth(); + if (Op0->getType() == Type::FloatTy) + GV.IntVal = APIntOps::RoundFloatToAPInt(GV.FloatVal, BitWidth); + else if (Op0->getType() == Type::DoubleTy) + GV.IntVal = APIntOps::RoundDoubleToAPInt(GV.DoubleVal, BitWidth); + else if (Op0->getType() == Type::X86_FP80Ty) { + APFloat apf = APFloat(GV.IntVal); + uint64_t v; + bool ignored; + (void)apf.convertToInteger(&v, BitWidth, + CE->getOpcode()==Instruction::FPToSI, + APFloat::rmTowardZero, &ignored); + GV.IntVal = v; // endian? + } + return GV; + } + case Instruction::PtrToInt: { + GenericValue GV = getConstantValue(Op0); + uint32_t PtrWidth = TD->getPointerSizeInBits(); + GV.IntVal = APInt(PtrWidth, uintptr_t(GV.PointerVal)); + return GV; + } + case Instruction::IntToPtr: { + GenericValue GV = getConstantValue(Op0); + uint32_t PtrWidth = TD->getPointerSizeInBits(); + if (PtrWidth != GV.IntVal.getBitWidth()) + GV.IntVal = GV.IntVal.zextOrTrunc(PtrWidth); + assert(GV.IntVal.getBitWidth() <= 64 && "Bad pointer width"); + GV.PointerVal = PointerTy(uintptr_t(GV.IntVal.getZExtValue())); + return GV; + } + case Instruction::BitCast: { + GenericValue GV = getConstantValue(Op0); + const Type* DestTy = CE->getType(); + switch (Op0->getType()->getTypeID()) { + default: assert(0 && "Invalid bitcast operand"); + case Type::IntegerTyID: + assert(DestTy->isFloatingPoint() && "invalid bitcast"); + if (DestTy == Type::FloatTy) + GV.FloatVal = GV.IntVal.bitsToFloat(); + else if (DestTy == Type::DoubleTy) + GV.DoubleVal = GV.IntVal.bitsToDouble(); + break; + case Type::FloatTyID: + assert(DestTy == Type::Int32Ty && "Invalid bitcast"); + GV.IntVal.floatToBits(GV.FloatVal); + break; + case Type::DoubleTyID: + assert(DestTy == Type::Int64Ty && "Invalid bitcast"); + GV.IntVal.doubleToBits(GV.DoubleVal); + break; + case Type::PointerTyID: + assert(isa(DestTy) && "Invalid bitcast"); + break; // getConstantValue(Op0) above already converted it + } + return GV; + } + case Instruction::Add: + case Instruction::Sub: + case Instruction::Mul: + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::URem: + case Instruction::SRem: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: { + GenericValue LHS = getConstantValue(Op0); + GenericValue RHS = getConstantValue(CE->getOperand(1)); + GenericValue GV; + switch (CE->getOperand(0)->getType()->getTypeID()) { + default: assert(0 && "Bad add type!"); abort(); + case Type::IntegerTyID: + switch (CE->getOpcode()) { + default: assert(0 && "Invalid integer opcode"); + case Instruction::Add: GV.IntVal = LHS.IntVal + RHS.IntVal; break; + case Instruction::Sub: GV.IntVal = LHS.IntVal - RHS.IntVal; break; + case Instruction::Mul: GV.IntVal = LHS.IntVal * RHS.IntVal; break; + case Instruction::UDiv:GV.IntVal = LHS.IntVal.udiv(RHS.IntVal); break; + case Instruction::SDiv:GV.IntVal = LHS.IntVal.sdiv(RHS.IntVal); break; + case Instruction::URem:GV.IntVal = LHS.IntVal.urem(RHS.IntVal); break; + case Instruction::SRem:GV.IntVal = LHS.IntVal.srem(RHS.IntVal); break; + case Instruction::And: GV.IntVal = LHS.IntVal & RHS.IntVal; break; + case Instruction::Or: GV.IntVal = LHS.IntVal | RHS.IntVal; break; + case Instruction::Xor: GV.IntVal = LHS.IntVal ^ RHS.IntVal; break; + } + break; + case Type::FloatTyID: + switch (CE->getOpcode()) { + default: assert(0 && "Invalid float opcode"); abort(); + case Instruction::Add: + GV.FloatVal = LHS.FloatVal + RHS.FloatVal; break; + case Instruction::Sub: + GV.FloatVal = LHS.FloatVal - RHS.FloatVal; break; + case Instruction::Mul: + GV.FloatVal = LHS.FloatVal * RHS.FloatVal; break; + case Instruction::FDiv: + GV.FloatVal = LHS.FloatVal / RHS.FloatVal; break; + case Instruction::FRem: + GV.FloatVal = ::fmodf(LHS.FloatVal,RHS.FloatVal); break; + } + break; + case Type::DoubleTyID: + switch (CE->getOpcode()) { + default: assert(0 && "Invalid double opcode"); abort(); + case Instruction::Add: + GV.DoubleVal = LHS.DoubleVal + RHS.DoubleVal; break; + case Instruction::Sub: + GV.DoubleVal = LHS.DoubleVal - RHS.DoubleVal; break; + case Instruction::Mul: + GV.DoubleVal = LHS.DoubleVal * RHS.DoubleVal; break; + case Instruction::FDiv: + GV.DoubleVal = LHS.DoubleVal / RHS.DoubleVal; break; + case Instruction::FRem: + GV.DoubleVal = ::fmod(LHS.DoubleVal,RHS.DoubleVal); break; + } + break; + case Type::X86_FP80TyID: + case Type::PPC_FP128TyID: + case Type::FP128TyID: { + APFloat apfLHS = APFloat(LHS.IntVal); + switch (CE->getOpcode()) { + default: assert(0 && "Invalid long double opcode"); abort(); + case Instruction::Add: + apfLHS.add(APFloat(RHS.IntVal), APFloat::rmNearestTiesToEven); + GV.IntVal = apfLHS.bitcastToAPInt(); + break; + case Instruction::Sub: + apfLHS.subtract(APFloat(RHS.IntVal), APFloat::rmNearestTiesToEven); + GV.IntVal = apfLHS.bitcastToAPInt(); + break; + case Instruction::Mul: + apfLHS.multiply(APFloat(RHS.IntVal), APFloat::rmNearestTiesToEven); + GV.IntVal = apfLHS.bitcastToAPInt(); + break; + case Instruction::FDiv: + apfLHS.divide(APFloat(RHS.IntVal), APFloat::rmNearestTiesToEven); + GV.IntVal = apfLHS.bitcastToAPInt(); + break; + case Instruction::FRem: + apfLHS.mod(APFloat(RHS.IntVal), APFloat::rmNearestTiesToEven); + GV.IntVal = apfLHS.bitcastToAPInt(); + break; + } + } + break; + } + return GV; + } + default: + break; + } + cerr << "ConstantExpr not handled: " << *CE << "\n"; + abort(); + } + + GenericValue Result; + switch (C->getType()->getTypeID()) { + case Type::FloatTyID: + Result.FloatVal = cast(C)->getValueAPF().convertToFloat(); + break; + case Type::DoubleTyID: + Result.DoubleVal = cast(C)->getValueAPF().convertToDouble(); + break; + case Type::X86_FP80TyID: + case Type::FP128TyID: + case Type::PPC_FP128TyID: + Result.IntVal = cast (C)->getValueAPF().bitcastToAPInt(); + break; + case Type::IntegerTyID: + Result.IntVal = cast(C)->getValue(); + break; + case Type::PointerTyID: + if (isa(C)) + Result.PointerVal = 0; + else if (const Function *F = dyn_cast(C)) + Result = PTOGV(getPointerToFunctionOrStub(const_cast(F))); + else if (const GlobalVariable* GV = dyn_cast(C)) + Result = PTOGV(getOrEmitGlobalVariable(const_cast(GV))); + else + assert(0 && "Unknown constant pointer type!"); + break; + default: + cerr << "ERROR: Constant unimplemented for type: " << *C->getType() << "\n"; + abort(); + } + return Result; +} + +/// StoreIntToMemory - Fills the StoreBytes bytes of memory starting from Dst +/// with the integer held in IntVal. +static void StoreIntToMemory(const APInt &IntVal, uint8_t *Dst, + unsigned StoreBytes) { + assert((IntVal.getBitWidth()+7)/8 >= StoreBytes && "Integer too small!"); + uint8_t *Src = (uint8_t *)IntVal.getRawData(); + + if (sys::isLittleEndianHost()) + // Little-endian host - the source is ordered from LSB to MSB. Order the + // destination from LSB to MSB: Do a straight copy. + memcpy(Dst, Src, StoreBytes); + else { + // Big-endian host - the source is an array of 64 bit words ordered from + // LSW to MSW. Each word is ordered from MSB to LSB. Order the destination + // from MSB to LSB: Reverse the word order, but not the bytes in a word. + while (StoreBytes > sizeof(uint64_t)) { + StoreBytes -= sizeof(uint64_t); + // May not be aligned so use memcpy. + memcpy(Dst + StoreBytes, Src, sizeof(uint64_t)); + Src += sizeof(uint64_t); + } + + memcpy(Dst, Src + sizeof(uint64_t) - StoreBytes, StoreBytes); + } +} + +/// StoreValueToMemory - Stores the data in Val of type Ty at address Ptr. Ptr +/// is the address of the memory at which to store Val, cast to GenericValue *. +/// It is not a pointer to a GenericValue containing the address at which to +/// store Val. +void ExecutionEngine::StoreValueToMemory(const GenericValue &Val, + GenericValue *Ptr, const Type *Ty) { + const unsigned StoreBytes = getTargetData()->getTypeStoreSize(Ty); + + switch (Ty->getTypeID()) { + case Type::IntegerTyID: + StoreIntToMemory(Val.IntVal, (uint8_t*)Ptr, StoreBytes); + break; + case Type::FloatTyID: + *((float*)Ptr) = Val.FloatVal; + break; + case Type::DoubleTyID: + *((double*)Ptr) = Val.DoubleVal; + break; + case Type::X86_FP80TyID: + memcpy(Ptr, Val.IntVal.getRawData(), 10); + break; + case Type::PointerTyID: + // Ensure 64 bit target pointers are fully initialized on 32 bit hosts. + if (StoreBytes != sizeof(PointerTy)) + memset(Ptr, 0, StoreBytes); + + *((PointerTy*)Ptr) = Val.PointerVal; + break; + default: + cerr << "Cannot store value of type " << *Ty << "!\n"; + } + + if (sys::isLittleEndianHost() != getTargetData()->isLittleEndian()) + // Host and target are different endian - reverse the stored bytes. + std::reverse((uint8_t*)Ptr, StoreBytes + (uint8_t*)Ptr); +} + +/// LoadIntFromMemory - Loads the integer stored in the LoadBytes bytes starting +/// from Src into IntVal, which is assumed to be wide enough and to hold zero. +static void LoadIntFromMemory(APInt &IntVal, uint8_t *Src, unsigned LoadBytes) { + assert((IntVal.getBitWidth()+7)/8 >= LoadBytes && "Integer too small!"); + uint8_t *Dst = (uint8_t *)IntVal.getRawData(); + + if (sys::isLittleEndianHost()) + // Little-endian host - the destination must be ordered from LSB to MSB. + // The source is ordered from LSB to MSB: Do a straight copy. + memcpy(Dst, Src, LoadBytes); + else { + // Big-endian - the destination is an array of 64 bit words ordered from + // LSW to MSW. Each word must be ordered from MSB to LSB. The source is + // ordered from MSB to LSB: Reverse the word order, but not the bytes in + // a word. + while (LoadBytes > sizeof(uint64_t)) { + LoadBytes -= sizeof(uint64_t); + // May not be aligned so use memcpy. + memcpy(Dst, Src + LoadBytes, sizeof(uint64_t)); + Dst += sizeof(uint64_t); + } + + memcpy(Dst + sizeof(uint64_t) - LoadBytes, Src, LoadBytes); + } +} + +/// FIXME: document +/// +void ExecutionEngine::LoadValueFromMemory(GenericValue &Result, + GenericValue *Ptr, + const Type *Ty) { + const unsigned LoadBytes = getTargetData()->getTypeStoreSize(Ty); + + if (sys::isLittleEndianHost() != getTargetData()->isLittleEndian()) { + // Host and target are different endian - reverse copy the stored + // bytes into a buffer, and load from that. + uint8_t *Src = (uint8_t*)Ptr; + uint8_t *Buf = (uint8_t*)alloca(LoadBytes); + std::reverse_copy(Src, Src + LoadBytes, Buf); + Ptr = (GenericValue*)Buf; + } + + switch (Ty->getTypeID()) { + case Type::IntegerTyID: + // An APInt with all words initially zero. + Result.IntVal = APInt(cast(Ty)->getBitWidth(), 0); + LoadIntFromMemory(Result.IntVal, (uint8_t*)Ptr, LoadBytes); + break; + case Type::FloatTyID: + Result.FloatVal = *((float*)Ptr); + break; + case Type::DoubleTyID: + Result.DoubleVal = *((double*)Ptr); + break; + case Type::PointerTyID: + Result.PointerVal = *((PointerTy*)Ptr); + break; + case Type::X86_FP80TyID: { + // This is endian dependent, but it will only work on x86 anyway. + // FIXME: Will not trap if loading a signaling NaN. + uint64_t y[2]; + memcpy(y, Ptr, 10); + Result.IntVal = APInt(80, 2, y); + break; + } + default: + cerr << "Cannot load value of type " << *Ty << "!\n"; + abort(); + } +} + +// InitializeMemory - Recursive function to apply a Constant value into the +// specified memory location... +// +void ExecutionEngine::InitializeMemory(const Constant *Init, void *Addr) { + DOUT << "JIT: Initializing " << Addr << " "; + DEBUG(Init->dump()); + if (isa(Init)) { + return; + } else if (const ConstantVector *CP = dyn_cast(Init)) { + unsigned ElementSize = + getTargetData()->getTypeAllocSize(CP->getType()->getElementType()); + for (unsigned i = 0, e = CP->getNumOperands(); i != e; ++i) + InitializeMemory(CP->getOperand(i), (char*)Addr+i*ElementSize); + return; + } else if (isa(Init)) { + memset(Addr, 0, (size_t)getTargetData()->getTypeAllocSize(Init->getType())); + return; + } else if (const ConstantArray *CPA = dyn_cast(Init)) { + unsigned ElementSize = + getTargetData()->getTypeAllocSize(CPA->getType()->getElementType()); + for (unsigned i = 0, e = CPA->getNumOperands(); i != e; ++i) + InitializeMemory(CPA->getOperand(i), (char*)Addr+i*ElementSize); + return; + } else if (const ConstantStruct *CPS = dyn_cast(Init)) { + const StructLayout *SL = + getTargetData()->getStructLayout(cast(CPS->getType())); + for (unsigned i = 0, e = CPS->getNumOperands(); i != e; ++i) + InitializeMemory(CPS->getOperand(i), (char*)Addr+SL->getElementOffset(i)); + return; + } else if (Init->getType()->isFirstClassType()) { + GenericValue Val = getConstantValue(Init); + StoreValueToMemory(Val, (GenericValue*)Addr, Init->getType()); + return; + } + + cerr << "Bad Type: " << *Init->getType() << "\n"; + assert(0 && "Unknown constant type to initialize memory with!"); +} + +/// EmitGlobals - Emit all of the global variables to memory, storing their +/// addresses into GlobalAddress. This must make sure to copy the contents of +/// their initializers into the memory. +/// +void ExecutionEngine::emitGlobals() { + + // Loop over all of the global variables in the program, allocating the memory + // to hold them. If there is more than one module, do a prepass over globals + // to figure out how the different modules should link together. + // + std::map, + const GlobalValue*> LinkedGlobalsMap; + + if (Modules.size() != 1) { + for (unsigned m = 0, e = Modules.size(); m != e; ++m) { + Module &M = *Modules[m]->getModule(); + for (Module::const_global_iterator I = M.global_begin(), + E = M.global_end(); I != E; ++I) { + const GlobalValue *GV = I; + if (GV->hasLocalLinkage() || GV->isDeclaration() || + GV->hasAppendingLinkage() || !GV->hasName()) + continue;// Ignore external globals and globals with internal linkage. + + const GlobalValue *&GVEntry = + LinkedGlobalsMap[std::make_pair(GV->getName(), GV->getType())]; + + // If this is the first time we've seen this global, it is the canonical + // version. + if (!GVEntry) { + GVEntry = GV; + continue; + } + + // If the existing global is strong, never replace it. + if (GVEntry->hasExternalLinkage() || + GVEntry->hasDLLImportLinkage() || + GVEntry->hasDLLExportLinkage()) + continue; + + // Otherwise, we know it's linkonce/weak, replace it if this is a strong + // symbol. FIXME is this right for common? + if (GV->hasExternalLinkage() || GVEntry->hasExternalWeakLinkage()) + GVEntry = GV; + } + } + } + + std::vector NonCanonicalGlobals; + for (unsigned m = 0, e = Modules.size(); m != e; ++m) { + Module &M = *Modules[m]->getModule(); + for (Module::const_global_iterator I = M.global_begin(), E = M.global_end(); + I != E; ++I) { + // In the multi-module case, see what this global maps to. + if (!LinkedGlobalsMap.empty()) { + if (const GlobalValue *GVEntry = + LinkedGlobalsMap[std::make_pair(I->getName(), I->getType())]) { + // If something else is the canonical global, ignore this one. + if (GVEntry != &*I) { + NonCanonicalGlobals.push_back(I); + continue; + } + } + } + + if (!I->isDeclaration()) { + addGlobalMapping(I, getMemoryForGV(I)); + } else { + // External variable reference. Try to use the dynamic loader to + // get a pointer to it. + if (void *SymAddr = + sys::DynamicLibrary::SearchForAddressOfSymbol(I->getName().c_str())) + addGlobalMapping(I, SymAddr); + else { + cerr << "Could not resolve external global address: " + << I->getName() << "\n"; + abort(); + } + } + } + + // If there are multiple modules, map the non-canonical globals to their + // canonical location. + if (!NonCanonicalGlobals.empty()) { + for (unsigned i = 0, e = NonCanonicalGlobals.size(); i != e; ++i) { + const GlobalValue *GV = NonCanonicalGlobals[i]; + const GlobalValue *CGV = + LinkedGlobalsMap[std::make_pair(GV->getName(), GV->getType())]; + void *Ptr = getPointerToGlobalIfAvailable(CGV); + assert(Ptr && "Canonical global wasn't codegen'd!"); + addGlobalMapping(GV, Ptr); + } + } + + // Now that all of the globals are set up in memory, loop through them all + // and initialize their contents. + for (Module::const_global_iterator I = M.global_begin(), E = M.global_end(); + I != E; ++I) { + if (!I->isDeclaration()) { + if (!LinkedGlobalsMap.empty()) { + if (const GlobalValue *GVEntry = + LinkedGlobalsMap[std::make_pair(I->getName(), I->getType())]) + if (GVEntry != &*I) // Not the canonical variable. + continue; + } + EmitGlobalVariable(I); + } + } + } +} + +// EmitGlobalVariable - This method emits the specified global variable to the +// address specified in GlobalAddresses, or allocates new memory if it's not +// already in the map. +void ExecutionEngine::EmitGlobalVariable(const GlobalVariable *GV) { + void *GA = getPointerToGlobalIfAvailable(GV); + + if (GA == 0) { + // If it's not already specified, allocate memory for the global. + GA = getMemoryForGV(GV); + addGlobalMapping(GV, GA); + } + + // Don't initialize if it's thread local, let the client do it. + if (!GV->isThreadLocal()) + InitializeMemory(GV->getInitializer(), GA); + + const Type *ElTy = GV->getType()->getElementType(); + size_t GVSize = (size_t)getTargetData()->getTypeAllocSize(ElTy); + NumInitBytes += (unsigned)GVSize; + ++NumGlobals; +} diff --git a/lib/ExecutionEngine/ExecutionEngineBindings.cpp b/lib/ExecutionEngine/ExecutionEngineBindings.cpp new file mode 100644 index 000000000000..83397a586d53 --- /dev/null +++ b/lib/ExecutionEngine/ExecutionEngineBindings.cpp @@ -0,0 +1,206 @@ +//===-- ExecutionEngineBindings.cpp - C bindings for EEs ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the C bindings for the ExecutionEngine library. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "jit" +#include "llvm-c/ExecutionEngine.h" +#include "llvm/ExecutionEngine/GenericValue.h" +#include "llvm/ExecutionEngine/ExecutionEngine.h" +#include + +using namespace llvm; + +/*===-- Operations on generic values --------------------------------------===*/ + +LLVMGenericValueRef LLVMCreateGenericValueOfInt(LLVMTypeRef Ty, + unsigned long long N, + int IsSigned) { + GenericValue *GenVal = new GenericValue(); + GenVal->IntVal = APInt(unwrap(Ty)->getBitWidth(), N, IsSigned); + return wrap(GenVal); +} + +LLVMGenericValueRef LLVMCreateGenericValueOfPointer(void *P) { + GenericValue *GenVal = new GenericValue(); + GenVal->PointerVal = P; + return wrap(GenVal); +} + +LLVMGenericValueRef LLVMCreateGenericValueOfFloat(LLVMTypeRef TyRef, double N) { + GenericValue *GenVal = new GenericValue(); + switch (unwrap(TyRef)->getTypeID()) { + case Type::FloatTyID: + GenVal->FloatVal = N; + break; + case Type::DoubleTyID: + GenVal->DoubleVal = N; + break; + default: + assert(0 && "LLVMGenericValueToFloat supports only float and double."); + break; + } + return wrap(GenVal); +} + +unsigned LLVMGenericValueIntWidth(LLVMGenericValueRef GenValRef) { + return unwrap(GenValRef)->IntVal.getBitWidth(); +} + +unsigned long long LLVMGenericValueToInt(LLVMGenericValueRef GenValRef, + int IsSigned) { + GenericValue *GenVal = unwrap(GenValRef); + if (IsSigned) + return GenVal->IntVal.getSExtValue(); + else + return GenVal->IntVal.getZExtValue(); +} + +void *LLVMGenericValueToPointer(LLVMGenericValueRef GenVal) { + return unwrap(GenVal)->PointerVal; +} + +double LLVMGenericValueToFloat(LLVMTypeRef TyRef, LLVMGenericValueRef GenVal) { + switch (unwrap(TyRef)->getTypeID()) { + case Type::FloatTyID: + return unwrap(GenVal)->FloatVal; + case Type::DoubleTyID: + return unwrap(GenVal)->DoubleVal; + default: + assert(0 && "LLVMGenericValueToFloat supports only float and double."); + break; + } + return 0; // Not reached +} + +void LLVMDisposeGenericValue(LLVMGenericValueRef GenVal) { + delete unwrap(GenVal); +} + +/*===-- Operations on execution engines -----------------------------------===*/ + +int LLVMCreateExecutionEngine(LLVMExecutionEngineRef *OutEE, + LLVMModuleProviderRef MP, + char **OutError) { + std::string Error; + if (ExecutionEngine *EE = ExecutionEngine::create(unwrap(MP), false, &Error)){ + *OutEE = wrap(EE); + return 0; + } + *OutError = strdup(Error.c_str()); + return 1; +} + +int LLVMCreateInterpreter(LLVMExecutionEngineRef *OutInterp, + LLVMModuleProviderRef MP, + char **OutError) { + std::string Error; + if (ExecutionEngine *Interp = + ExecutionEngine::create(unwrap(MP), true, &Error)) { + *OutInterp = wrap(Interp); + return 0; + } + *OutError = strdup(Error.c_str()); + return 1; +} + +int LLVMCreateJITCompiler(LLVMExecutionEngineRef *OutJIT, + LLVMModuleProviderRef MP, + unsigned OptLevel, + char **OutError) { + std::string Error; + if (ExecutionEngine *JIT = + ExecutionEngine::createJIT(unwrap(MP), &Error, 0, + (CodeGenOpt::Level)OptLevel)) { + *OutJIT = wrap(JIT); + return 0; + } + *OutError = strdup(Error.c_str()); + return 1; +} + +void LLVMDisposeExecutionEngine(LLVMExecutionEngineRef EE) { + delete unwrap(EE); +} + +void LLVMRunStaticConstructors(LLVMExecutionEngineRef EE) { + unwrap(EE)->runStaticConstructorsDestructors(false); +} + +void LLVMRunStaticDestructors(LLVMExecutionEngineRef EE) { + unwrap(EE)->runStaticConstructorsDestructors(true); +} + +int LLVMRunFunctionAsMain(LLVMExecutionEngineRef EE, LLVMValueRef F, + unsigned ArgC, const char * const *ArgV, + const char * const *EnvP) { + std::vector ArgVec; + for (unsigned I = 0; I != ArgC; ++I) + ArgVec.push_back(ArgV[I]); + + return unwrap(EE)->runFunctionAsMain(unwrap(F), ArgVec, EnvP); +} + +LLVMGenericValueRef LLVMRunFunction(LLVMExecutionEngineRef EE, LLVMValueRef F, + unsigned NumArgs, + LLVMGenericValueRef *Args) { + std::vector ArgVec; + ArgVec.reserve(NumArgs); + for (unsigned I = 0; I != NumArgs; ++I) + ArgVec.push_back(*unwrap(Args[I])); + + GenericValue *Result = new GenericValue(); + *Result = unwrap(EE)->runFunction(unwrap(F), ArgVec); + return wrap(Result); +} + +void LLVMFreeMachineCodeForFunction(LLVMExecutionEngineRef EE, LLVMValueRef F) { + unwrap(EE)->freeMachineCodeForFunction(unwrap(F)); +} + +void LLVMAddModuleProvider(LLVMExecutionEngineRef EE, LLVMModuleProviderRef MP){ + unwrap(EE)->addModuleProvider(unwrap(MP)); +} + +int LLVMRemoveModuleProvider(LLVMExecutionEngineRef EE, + LLVMModuleProviderRef MP, + LLVMModuleRef *OutMod, char **OutError) { + std::string Error; + if (Module *Gone = unwrap(EE)->removeModuleProvider(unwrap(MP), &Error)) { + *OutMod = wrap(Gone); + return 0; + } + if (OutError) + *OutError = strdup(Error.c_str()); + return 1; +} + +int LLVMFindFunction(LLVMExecutionEngineRef EE, const char *Name, + LLVMValueRef *OutFn) { + if (Function *F = unwrap(EE)->FindFunctionNamed(Name)) { + *OutFn = wrap(F); + return 0; + } + return 1; +} + +LLVMTargetDataRef LLVMGetExecutionEngineTargetData(LLVMExecutionEngineRef EE) { + return wrap(unwrap(EE)->getTargetData()); +} + +void LLVMAddGlobalMapping(LLVMExecutionEngineRef EE, LLVMValueRef Global, + void* Addr) { + unwrap(EE)->addGlobalMapping(unwrap(Global), Addr); +} + +void *LLVMGetPointerToGlobal(LLVMExecutionEngineRef EE, LLVMValueRef Global) { + return unwrap(EE)->getPointerToGlobal(unwrap(Global)); +} diff --git a/lib/ExecutionEngine/Interpreter/CMakeLists.txt b/lib/ExecutionEngine/Interpreter/CMakeLists.txt new file mode 100644 index 000000000000..626e804e78e6 --- /dev/null +++ b/lib/ExecutionEngine/Interpreter/CMakeLists.txt @@ -0,0 +1,5 @@ +add_partially_linked_object(LLVMInterpreter + Execution.cpp + ExternalFunctions.cpp + Interpreter.cpp + ) diff --git a/lib/ExecutionEngine/Interpreter/Execution.cpp b/lib/ExecutionEngine/Interpreter/Execution.cpp new file mode 100644 index 000000000000..765fed248f98 --- /dev/null +++ b/lib/ExecutionEngine/Interpreter/Execution.cpp @@ -0,0 +1,1382 @@ +//===-- Execution.cpp - Implement code to simulate the program ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the actual instruction interpreter. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "interpreter" +#include "Interpreter.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Instructions.h" +#include "llvm/CodeGen/IntrinsicLowering.h" +#include "llvm/Support/GetElementPtrTypeIterator.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" +#include +#include +#include +using namespace llvm; + +STATISTIC(NumDynamicInsts, "Number of dynamic instructions executed"); +static Interpreter *TheEE = 0; + +static cl::opt PrintVolatile("interpreter-print-volatile", cl::Hidden, + cl::desc("make the interpreter print every volatile load and store")); + +//===----------------------------------------------------------------------===// +// Various Helper Functions +//===----------------------------------------------------------------------===// + +static inline uint64_t doSignExtension(uint64_t Val, const IntegerType* ITy) { + // Determine if the value is signed or not + bool isSigned = (Val & (1 << (ITy->getBitWidth()-1))) != 0; + // If its signed, extend the sign bits + if (isSigned) + Val |= ~ITy->getBitMask(); + return Val; +} + +static void SetValue(Value *V, GenericValue Val, ExecutionContext &SF) { + SF.Values[V] = Val; +} + +void Interpreter::initializeExecutionEngine() { + TheEE = this; +} + +//===----------------------------------------------------------------------===// +// Binary Instruction Implementations +//===----------------------------------------------------------------------===// + +#define IMPLEMENT_BINARY_OPERATOR(OP, TY) \ + case Type::TY##TyID: \ + Dest.TY##Val = Src1.TY##Val OP Src2.TY##Val; \ + break + +#define IMPLEMENT_INTEGER_BINOP1(OP, TY) \ + case Type::IntegerTyID: { \ + Dest.IntVal = Src1.IntVal OP Src2.IntVal; \ + break; \ + } + + +static void executeAddInst(GenericValue &Dest, GenericValue Src1, + GenericValue Src2, const Type *Ty) { + switch (Ty->getTypeID()) { + IMPLEMENT_INTEGER_BINOP1(+, Ty); + IMPLEMENT_BINARY_OPERATOR(+, Float); + IMPLEMENT_BINARY_OPERATOR(+, Double); + default: + cerr << "Unhandled type for Add instruction: " << *Ty << "\n"; + abort(); + } +} + +static void executeSubInst(GenericValue &Dest, GenericValue Src1, + GenericValue Src2, const Type *Ty) { + switch (Ty->getTypeID()) { + IMPLEMENT_INTEGER_BINOP1(-, Ty); + IMPLEMENT_BINARY_OPERATOR(-, Float); + IMPLEMENT_BINARY_OPERATOR(-, Double); + default: + cerr << "Unhandled type for Sub instruction: " << *Ty << "\n"; + abort(); + } +} + +static void executeMulInst(GenericValue &Dest, GenericValue Src1, + GenericValue Src2, const Type *Ty) { + switch (Ty->getTypeID()) { + IMPLEMENT_INTEGER_BINOP1(*, Ty); + IMPLEMENT_BINARY_OPERATOR(*, Float); + IMPLEMENT_BINARY_OPERATOR(*, Double); + default: + cerr << "Unhandled type for Mul instruction: " << *Ty << "\n"; + abort(); + } +} + +static void executeFDivInst(GenericValue &Dest, GenericValue Src1, + GenericValue Src2, const Type *Ty) { + switch (Ty->getTypeID()) { + IMPLEMENT_BINARY_OPERATOR(/, Float); + IMPLEMENT_BINARY_OPERATOR(/, Double); + default: + cerr << "Unhandled type for FDiv instruction: " << *Ty << "\n"; + abort(); + } +} + +static void executeFRemInst(GenericValue &Dest, GenericValue Src1, + GenericValue Src2, const Type *Ty) { + switch (Ty->getTypeID()) { + case Type::FloatTyID: + Dest.FloatVal = fmod(Src1.FloatVal, Src2.FloatVal); + break; + case Type::DoubleTyID: + Dest.DoubleVal = fmod(Src1.DoubleVal, Src2.DoubleVal); + break; + default: + cerr << "Unhandled type for Rem instruction: " << *Ty << "\n"; + abort(); + } +} + +#define IMPLEMENT_INTEGER_ICMP(OP, TY) \ + case Type::IntegerTyID: \ + Dest.IntVal = APInt(1,Src1.IntVal.OP(Src2.IntVal)); \ + break; + +// Handle pointers specially because they must be compared with only as much +// width as the host has. We _do not_ want to be comparing 64 bit values when +// running on a 32-bit target, otherwise the upper 32 bits might mess up +// comparisons if they contain garbage. +#define IMPLEMENT_POINTER_ICMP(OP) \ + case Type::PointerTyID: \ + Dest.IntVal = APInt(1,(void*)(intptr_t)Src1.PointerVal OP \ + (void*)(intptr_t)Src2.PointerVal); \ + break; + +static GenericValue executeICMP_EQ(GenericValue Src1, GenericValue Src2, + const Type *Ty) { + GenericValue Dest; + switch (Ty->getTypeID()) { + IMPLEMENT_INTEGER_ICMP(eq,Ty); + IMPLEMENT_POINTER_ICMP(==); + default: + cerr << "Unhandled type for ICMP_EQ predicate: " << *Ty << "\n"; + abort(); + } + return Dest; +} + +static GenericValue executeICMP_NE(GenericValue Src1, GenericValue Src2, + const Type *Ty) { + GenericValue Dest; + switch (Ty->getTypeID()) { + IMPLEMENT_INTEGER_ICMP(ne,Ty); + IMPLEMENT_POINTER_ICMP(!=); + default: + cerr << "Unhandled type for ICMP_NE predicate: " << *Ty << "\n"; + abort(); + } + return Dest; +} + +static GenericValue executeICMP_ULT(GenericValue Src1, GenericValue Src2, + const Type *Ty) { + GenericValue Dest; + switch (Ty->getTypeID()) { + IMPLEMENT_INTEGER_ICMP(ult,Ty); + IMPLEMENT_POINTER_ICMP(<); + default: + cerr << "Unhandled type for ICMP_ULT predicate: " << *Ty << "\n"; + abort(); + } + return Dest; +} + +static GenericValue executeICMP_SLT(GenericValue Src1, GenericValue Src2, + const Type *Ty) { + GenericValue Dest; + switch (Ty->getTypeID()) { + IMPLEMENT_INTEGER_ICMP(slt,Ty); + IMPLEMENT_POINTER_ICMP(<); + default: + cerr << "Unhandled type for ICMP_SLT predicate: " << *Ty << "\n"; + abort(); + } + return Dest; +} + +static GenericValue executeICMP_UGT(GenericValue Src1, GenericValue Src2, + const Type *Ty) { + GenericValue Dest; + switch (Ty->getTypeID()) { + IMPLEMENT_INTEGER_ICMP(ugt,Ty); + IMPLEMENT_POINTER_ICMP(>); + default: + cerr << "Unhandled type for ICMP_UGT predicate: " << *Ty << "\n"; + abort(); + } + return Dest; +} + +static GenericValue executeICMP_SGT(GenericValue Src1, GenericValue Src2, + const Type *Ty) { + GenericValue Dest; + switch (Ty->getTypeID()) { + IMPLEMENT_INTEGER_ICMP(sgt,Ty); + IMPLEMENT_POINTER_ICMP(>); + default: + cerr << "Unhandled type for ICMP_SGT predicate: " << *Ty << "\n"; + abort(); + } + return Dest; +} + +static GenericValue executeICMP_ULE(GenericValue Src1, GenericValue Src2, + const Type *Ty) { + GenericValue Dest; + switch (Ty->getTypeID()) { + IMPLEMENT_INTEGER_ICMP(ule,Ty); + IMPLEMENT_POINTER_ICMP(<=); + default: + cerr << "Unhandled type for ICMP_ULE predicate: " << *Ty << "\n"; + abort(); + } + return Dest; +} + +static GenericValue executeICMP_SLE(GenericValue Src1, GenericValue Src2, + const Type *Ty) { + GenericValue Dest; + switch (Ty->getTypeID()) { + IMPLEMENT_INTEGER_ICMP(sle,Ty); + IMPLEMENT_POINTER_ICMP(<=); + default: + cerr << "Unhandled type for ICMP_SLE predicate: " << *Ty << "\n"; + abort(); + } + return Dest; +} + +static GenericValue executeICMP_UGE(GenericValue Src1, GenericValue Src2, + const Type *Ty) { + GenericValue Dest; + switch (Ty->getTypeID()) { + IMPLEMENT_INTEGER_ICMP(uge,Ty); + IMPLEMENT_POINTER_ICMP(>=); + default: + cerr << "Unhandled type for ICMP_UGE predicate: " << *Ty << "\n"; + abort(); + } + return Dest; +} + +static GenericValue executeICMP_SGE(GenericValue Src1, GenericValue Src2, + const Type *Ty) { + GenericValue Dest; + switch (Ty->getTypeID()) { + IMPLEMENT_INTEGER_ICMP(sge,Ty); + IMPLEMENT_POINTER_ICMP(>=); + default: + cerr << "Unhandled type for ICMP_SGE predicate: " << *Ty << "\n"; + abort(); + } + return Dest; +} + +void Interpreter::visitICmpInst(ICmpInst &I) { + ExecutionContext &SF = ECStack.back(); + const Type *Ty = I.getOperand(0)->getType(); + GenericValue Src1 = getOperandValue(I.getOperand(0), SF); + GenericValue Src2 = getOperandValue(I.getOperand(1), SF); + GenericValue R; // Result + + switch (I.getPredicate()) { + case ICmpInst::ICMP_EQ: R = executeICMP_EQ(Src1, Src2, Ty); break; + case ICmpInst::ICMP_NE: R = executeICMP_NE(Src1, Src2, Ty); break; + case ICmpInst::ICMP_ULT: R = executeICMP_ULT(Src1, Src2, Ty); break; + case ICmpInst::ICMP_SLT: R = executeICMP_SLT(Src1, Src2, Ty); break; + case ICmpInst::ICMP_UGT: R = executeICMP_UGT(Src1, Src2, Ty); break; + case ICmpInst::ICMP_SGT: R = executeICMP_SGT(Src1, Src2, Ty); break; + case ICmpInst::ICMP_ULE: R = executeICMP_ULE(Src1, Src2, Ty); break; + case ICmpInst::ICMP_SLE: R = executeICMP_SLE(Src1, Src2, Ty); break; + case ICmpInst::ICMP_UGE: R = executeICMP_UGE(Src1, Src2, Ty); break; + case ICmpInst::ICMP_SGE: R = executeICMP_SGE(Src1, Src2, Ty); break; + default: + cerr << "Don't know how to handle this ICmp predicate!\n-->" << I; + abort(); + } + + SetValue(&I, R, SF); +} + +#define IMPLEMENT_FCMP(OP, TY) \ + case Type::TY##TyID: \ + Dest.IntVal = APInt(1,Src1.TY##Val OP Src2.TY##Val); \ + break + +static GenericValue executeFCMP_OEQ(GenericValue Src1, GenericValue Src2, + const Type *Ty) { + GenericValue Dest; + switch (Ty->getTypeID()) { + IMPLEMENT_FCMP(==, Float); + IMPLEMENT_FCMP(==, Double); + default: + cerr << "Unhandled type for FCmp EQ instruction: " << *Ty << "\n"; + abort(); + } + return Dest; +} + +static GenericValue executeFCMP_ONE(GenericValue Src1, GenericValue Src2, + const Type *Ty) { + GenericValue Dest; + switch (Ty->getTypeID()) { + IMPLEMENT_FCMP(!=, Float); + IMPLEMENT_FCMP(!=, Double); + + default: + cerr << "Unhandled type for FCmp NE instruction: " << *Ty << "\n"; + abort(); + } + return Dest; +} + +static GenericValue executeFCMP_OLE(GenericValue Src1, GenericValue Src2, + const Type *Ty) { + GenericValue Dest; + switch (Ty->getTypeID()) { + IMPLEMENT_FCMP(<=, Float); + IMPLEMENT_FCMP(<=, Double); + default: + cerr << "Unhandled type for FCmp LE instruction: " << *Ty << "\n"; + abort(); + } + return Dest; +} + +static GenericValue executeFCMP_OGE(GenericValue Src1, GenericValue Src2, + const Type *Ty) { + GenericValue Dest; + switch (Ty->getTypeID()) { + IMPLEMENT_FCMP(>=, Float); + IMPLEMENT_FCMP(>=, Double); + default: + cerr << "Unhandled type for FCmp GE instruction: " << *Ty << "\n"; + abort(); + } + return Dest; +} + +static GenericValue executeFCMP_OLT(GenericValue Src1, GenericValue Src2, + const Type *Ty) { + GenericValue Dest; + switch (Ty->getTypeID()) { + IMPLEMENT_FCMP(<, Float); + IMPLEMENT_FCMP(<, Double); + default: + cerr << "Unhandled type for FCmp LT instruction: " << *Ty << "\n"; + abort(); + } + return Dest; +} + +static GenericValue executeFCMP_OGT(GenericValue Src1, GenericValue Src2, + const Type *Ty) { + GenericValue Dest; + switch (Ty->getTypeID()) { + IMPLEMENT_FCMP(>, Float); + IMPLEMENT_FCMP(>, Double); + default: + cerr << "Unhandled type for FCmp GT instruction: " << *Ty << "\n"; + abort(); + } + return Dest; +} + +#define IMPLEMENT_UNORDERED(TY, X,Y) \ + if (TY == Type::FloatTy) { \ + if (X.FloatVal != X.FloatVal || Y.FloatVal != Y.FloatVal) { \ + Dest.IntVal = APInt(1,true); \ + return Dest; \ + } \ + } else if (X.DoubleVal != X.DoubleVal || Y.DoubleVal != Y.DoubleVal) { \ + Dest.IntVal = APInt(1,true); \ + return Dest; \ + } + + +static GenericValue executeFCMP_UEQ(GenericValue Src1, GenericValue Src2, + const Type *Ty) { + GenericValue Dest; + IMPLEMENT_UNORDERED(Ty, Src1, Src2) + return executeFCMP_OEQ(Src1, Src2, Ty); +} + +static GenericValue executeFCMP_UNE(GenericValue Src1, GenericValue Src2, + const Type *Ty) { + GenericValue Dest; + IMPLEMENT_UNORDERED(Ty, Src1, Src2) + return executeFCMP_ONE(Src1, Src2, Ty); +} + +static GenericValue executeFCMP_ULE(GenericValue Src1, GenericValue Src2, + const Type *Ty) { + GenericValue Dest; + IMPLEMENT_UNORDERED(Ty, Src1, Src2) + return executeFCMP_OLE(Src1, Src2, Ty); +} + +static GenericValue executeFCMP_UGE(GenericValue Src1, GenericValue Src2, + const Type *Ty) { + GenericValue Dest; + IMPLEMENT_UNORDERED(Ty, Src1, Src2) + return executeFCMP_OGE(Src1, Src2, Ty); +} + +static GenericValue executeFCMP_ULT(GenericValue Src1, GenericValue Src2, + const Type *Ty) { + GenericValue Dest; + IMPLEMENT_UNORDERED(Ty, Src1, Src2) + return executeFCMP_OLT(Src1, Src2, Ty); +} + +static GenericValue executeFCMP_UGT(GenericValue Src1, GenericValue Src2, + const Type *Ty) { + GenericValue Dest; + IMPLEMENT_UNORDERED(Ty, Src1, Src2) + return executeFCMP_OGT(Src1, Src2, Ty); +} + +static GenericValue executeFCMP_ORD(GenericValue Src1, GenericValue Src2, + const Type *Ty) { + GenericValue Dest; + if (Ty == Type::FloatTy) + Dest.IntVal = APInt(1,(Src1.FloatVal == Src1.FloatVal && + Src2.FloatVal == Src2.FloatVal)); + else + Dest.IntVal = APInt(1,(Src1.DoubleVal == Src1.DoubleVal && + Src2.DoubleVal == Src2.DoubleVal)); + return Dest; +} + +static GenericValue executeFCMP_UNO(GenericValue Src1, GenericValue Src2, + const Type *Ty) { + GenericValue Dest; + if (Ty == Type::FloatTy) + Dest.IntVal = APInt(1,(Src1.FloatVal != Src1.FloatVal || + Src2.FloatVal != Src2.FloatVal)); + else + Dest.IntVal = APInt(1,(Src1.DoubleVal != Src1.DoubleVal || + Src2.DoubleVal != Src2.DoubleVal)); + return Dest; +} + +void Interpreter::visitFCmpInst(FCmpInst &I) { + ExecutionContext &SF = ECStack.back(); + const Type *Ty = I.getOperand(0)->getType(); + GenericValue Src1 = getOperandValue(I.getOperand(0), SF); + GenericValue Src2 = getOperandValue(I.getOperand(1), SF); + GenericValue R; // Result + + switch (I.getPredicate()) { + case FCmpInst::FCMP_FALSE: R.IntVal = APInt(1,false); break; + case FCmpInst::FCMP_TRUE: R.IntVal = APInt(1,true); break; + case FCmpInst::FCMP_ORD: R = executeFCMP_ORD(Src1, Src2, Ty); break; + case FCmpInst::FCMP_UNO: R = executeFCMP_UNO(Src1, Src2, Ty); break; + case FCmpInst::FCMP_UEQ: R = executeFCMP_UEQ(Src1, Src2, Ty); break; + case FCmpInst::FCMP_OEQ: R = executeFCMP_OEQ(Src1, Src2, Ty); break; + case FCmpInst::FCMP_UNE: R = executeFCMP_UNE(Src1, Src2, Ty); break; + case FCmpInst::FCMP_ONE: R = executeFCMP_ONE(Src1, Src2, Ty); break; + case FCmpInst::FCMP_ULT: R = executeFCMP_ULT(Src1, Src2, Ty); break; + case FCmpInst::FCMP_OLT: R = executeFCMP_OLT(Src1, Src2, Ty); break; + case FCmpInst::FCMP_UGT: R = executeFCMP_UGT(Src1, Src2, Ty); break; + case FCmpInst::FCMP_OGT: R = executeFCMP_OGT(Src1, Src2, Ty); break; + case FCmpInst::FCMP_ULE: R = executeFCMP_ULE(Src1, Src2, Ty); break; + case FCmpInst::FCMP_OLE: R = executeFCMP_OLE(Src1, Src2, Ty); break; + case FCmpInst::FCMP_UGE: R = executeFCMP_UGE(Src1, Src2, Ty); break; + case FCmpInst::FCMP_OGE: R = executeFCMP_OGE(Src1, Src2, Ty); break; + default: + cerr << "Don't know how to handle this FCmp predicate!\n-->" << I; + abort(); + } + + SetValue(&I, R, SF); +} + +static GenericValue executeCmpInst(unsigned predicate, GenericValue Src1, + GenericValue Src2, const Type *Ty) { + GenericValue Result; + switch (predicate) { + case ICmpInst::ICMP_EQ: return executeICMP_EQ(Src1, Src2, Ty); + case ICmpInst::ICMP_NE: return executeICMP_NE(Src1, Src2, Ty); + case ICmpInst::ICMP_UGT: return executeICMP_UGT(Src1, Src2, Ty); + case ICmpInst::ICMP_SGT: return executeICMP_SGT(Src1, Src2, Ty); + case ICmpInst::ICMP_ULT: return executeICMP_ULT(Src1, Src2, Ty); + case ICmpInst::ICMP_SLT: return executeICMP_SLT(Src1, Src2, Ty); + case ICmpInst::ICMP_UGE: return executeICMP_UGE(Src1, Src2, Ty); + case ICmpInst::ICMP_SGE: return executeICMP_SGE(Src1, Src2, Ty); + case ICmpInst::ICMP_ULE: return executeICMP_ULE(Src1, Src2, Ty); + case ICmpInst::ICMP_SLE: return executeICMP_SLE(Src1, Src2, Ty); + case FCmpInst::FCMP_ORD: return executeFCMP_ORD(Src1, Src2, Ty); + case FCmpInst::FCMP_UNO: return executeFCMP_UNO(Src1, Src2, Ty); + case FCmpInst::FCMP_OEQ: return executeFCMP_OEQ(Src1, Src2, Ty); + case FCmpInst::FCMP_UEQ: return executeFCMP_UEQ(Src1, Src2, Ty); + case FCmpInst::FCMP_ONE: return executeFCMP_ONE(Src1, Src2, Ty); + case FCmpInst::FCMP_UNE: return executeFCMP_UNE(Src1, Src2, Ty); + case FCmpInst::FCMP_OLT: return executeFCMP_OLT(Src1, Src2, Ty); + case FCmpInst::FCMP_ULT: return executeFCMP_ULT(Src1, Src2, Ty); + case FCmpInst::FCMP_OGT: return executeFCMP_OGT(Src1, Src2, Ty); + case FCmpInst::FCMP_UGT: return executeFCMP_UGT(Src1, Src2, Ty); + case FCmpInst::FCMP_OLE: return executeFCMP_OLE(Src1, Src2, Ty); + case FCmpInst::FCMP_ULE: return executeFCMP_ULE(Src1, Src2, Ty); + case FCmpInst::FCMP_OGE: return executeFCMP_OGE(Src1, Src2, Ty); + case FCmpInst::FCMP_UGE: return executeFCMP_UGE(Src1, Src2, Ty); + case FCmpInst::FCMP_FALSE: { + GenericValue Result; + Result.IntVal = APInt(1, false); + return Result; + } + case FCmpInst::FCMP_TRUE: { + GenericValue Result; + Result.IntVal = APInt(1, true); + return Result; + } + default: + cerr << "Unhandled Cmp predicate\n"; + abort(); + } +} + +void Interpreter::visitBinaryOperator(BinaryOperator &I) { + ExecutionContext &SF = ECStack.back(); + const Type *Ty = I.getOperand(0)->getType(); + GenericValue Src1 = getOperandValue(I.getOperand(0), SF); + GenericValue Src2 = getOperandValue(I.getOperand(1), SF); + GenericValue R; // Result + + switch (I.getOpcode()) { + case Instruction::Add: executeAddInst (R, Src1, Src2, Ty); break; + case Instruction::Sub: executeSubInst (R, Src1, Src2, Ty); break; + case Instruction::Mul: executeMulInst (R, Src1, Src2, Ty); break; + case Instruction::FDiv: executeFDivInst (R, Src1, Src2, Ty); break; + case Instruction::FRem: executeFRemInst (R, Src1, Src2, Ty); break; + case Instruction::UDiv: R.IntVal = Src1.IntVal.udiv(Src2.IntVal); break; + case Instruction::SDiv: R.IntVal = Src1.IntVal.sdiv(Src2.IntVal); break; + case Instruction::URem: R.IntVal = Src1.IntVal.urem(Src2.IntVal); break; + case Instruction::SRem: R.IntVal = Src1.IntVal.srem(Src2.IntVal); break; + case Instruction::And: R.IntVal = Src1.IntVal & Src2.IntVal; break; + case Instruction::Or: R.IntVal = Src1.IntVal | Src2.IntVal; break; + case Instruction::Xor: R.IntVal = Src1.IntVal ^ Src2.IntVal; break; + default: + cerr << "Don't know how to handle this binary operator!\n-->" << I; + abort(); + } + + SetValue(&I, R, SF); +} + +static GenericValue executeSelectInst(GenericValue Src1, GenericValue Src2, + GenericValue Src3) { + return Src1.IntVal == 0 ? Src3 : Src2; +} + +void Interpreter::visitSelectInst(SelectInst &I) { + ExecutionContext &SF = ECStack.back(); + GenericValue Src1 = getOperandValue(I.getOperand(0), SF); + GenericValue Src2 = getOperandValue(I.getOperand(1), SF); + GenericValue Src3 = getOperandValue(I.getOperand(2), SF); + GenericValue R = executeSelectInst(Src1, Src2, Src3); + SetValue(&I, R, SF); +} + + +//===----------------------------------------------------------------------===// +// Terminator Instruction Implementations +//===----------------------------------------------------------------------===// + +void Interpreter::exitCalled(GenericValue GV) { + // runAtExitHandlers() assumes there are no stack frames, but + // if exit() was called, then it had a stack frame. Blow away + // the stack before interpreting atexit handlers. + ECStack.clear (); + runAtExitHandlers (); + exit (GV.IntVal.zextOrTrunc(32).getZExtValue()); +} + +/// Pop the last stack frame off of ECStack and then copy the result +/// back into the result variable if we are not returning void. The +/// result variable may be the ExitValue, or the Value of the calling +/// CallInst if there was a previous stack frame. This method may +/// invalidate any ECStack iterators you have. This method also takes +/// care of switching to the normal destination BB, if we are returning +/// from an invoke. +/// +void Interpreter::popStackAndReturnValueToCaller (const Type *RetTy, + GenericValue Result) { + // Pop the current stack frame. + ECStack.pop_back(); + + if (ECStack.empty()) { // Finished main. Put result into exit code... + if (RetTy && RetTy->isInteger()) { // Nonvoid return type? + ExitValue = Result; // Capture the exit value of the program + } else { + memset(&ExitValue.Untyped, 0, sizeof(ExitValue.Untyped)); + } + } else { + // If we have a previous stack frame, and we have a previous call, + // fill in the return value... + ExecutionContext &CallingSF = ECStack.back(); + if (Instruction *I = CallingSF.Caller.getInstruction()) { + if (CallingSF.Caller.getType() != Type::VoidTy) // Save result... + SetValue(I, Result, CallingSF); + if (InvokeInst *II = dyn_cast (I)) + SwitchToNewBasicBlock (II->getNormalDest (), CallingSF); + CallingSF.Caller = CallSite(); // We returned from the call... + } + } +} + +void Interpreter::visitReturnInst(ReturnInst &I) { + ExecutionContext &SF = ECStack.back(); + const Type *RetTy = Type::VoidTy; + GenericValue Result; + + // Save away the return value... (if we are not 'ret void') + if (I.getNumOperands()) { + RetTy = I.getReturnValue()->getType(); + Result = getOperandValue(I.getReturnValue(), SF); + } + + popStackAndReturnValueToCaller(RetTy, Result); +} + +void Interpreter::visitUnwindInst(UnwindInst &I) { + // Unwind stack + Instruction *Inst; + do { + ECStack.pop_back (); + if (ECStack.empty ()) + abort (); + Inst = ECStack.back ().Caller.getInstruction (); + } while (!(Inst && isa (Inst))); + + // Return from invoke + ExecutionContext &InvokingSF = ECStack.back (); + InvokingSF.Caller = CallSite (); + + // Go to exceptional destination BB of invoke instruction + SwitchToNewBasicBlock(cast(Inst)->getUnwindDest(), InvokingSF); +} + +void Interpreter::visitUnreachableInst(UnreachableInst &I) { + cerr << "ERROR: Program executed an 'unreachable' instruction!\n"; + abort(); +} + +void Interpreter::visitBranchInst(BranchInst &I) { + ExecutionContext &SF = ECStack.back(); + BasicBlock *Dest; + + Dest = I.getSuccessor(0); // Uncond branches have a fixed dest... + if (!I.isUnconditional()) { + Value *Cond = I.getCondition(); + if (getOperandValue(Cond, SF).IntVal == 0) // If false cond... + Dest = I.getSuccessor(1); + } + SwitchToNewBasicBlock(Dest, SF); +} + +void Interpreter::visitSwitchInst(SwitchInst &I) { + ExecutionContext &SF = ECStack.back(); + GenericValue CondVal = getOperandValue(I.getOperand(0), SF); + const Type *ElTy = I.getOperand(0)->getType(); + + // Check to see if any of the cases match... + BasicBlock *Dest = 0; + for (unsigned i = 2, e = I.getNumOperands(); i != e; i += 2) + if (executeICMP_EQ(CondVal, getOperandValue(I.getOperand(i), SF), ElTy) + .IntVal != 0) { + Dest = cast(I.getOperand(i+1)); + break; + } + + if (!Dest) Dest = I.getDefaultDest(); // No cases matched: use default + SwitchToNewBasicBlock(Dest, SF); +} + +// SwitchToNewBasicBlock - This method is used to jump to a new basic block. +// This function handles the actual updating of block and instruction iterators +// as well as execution of all of the PHI nodes in the destination block. +// +// This method does this because all of the PHI nodes must be executed +// atomically, reading their inputs before any of the results are updated. Not +// doing this can cause problems if the PHI nodes depend on other PHI nodes for +// their inputs. If the input PHI node is updated before it is read, incorrect +// results can happen. Thus we use a two phase approach. +// +void Interpreter::SwitchToNewBasicBlock(BasicBlock *Dest, ExecutionContext &SF){ + BasicBlock *PrevBB = SF.CurBB; // Remember where we came from... + SF.CurBB = Dest; // Update CurBB to branch destination + SF.CurInst = SF.CurBB->begin(); // Update new instruction ptr... + + if (!isa(SF.CurInst)) return; // Nothing fancy to do + + // Loop over all of the PHI nodes in the current block, reading their inputs. + std::vector ResultValues; + + for (; PHINode *PN = dyn_cast(SF.CurInst); ++SF.CurInst) { + // Search for the value corresponding to this previous bb... + int i = PN->getBasicBlockIndex(PrevBB); + assert(i != -1 && "PHINode doesn't contain entry for predecessor??"); + Value *IncomingValue = PN->getIncomingValue(i); + + // Save the incoming value for this PHI node... + ResultValues.push_back(getOperandValue(IncomingValue, SF)); + } + + // Now loop over all of the PHI nodes setting their values... + SF.CurInst = SF.CurBB->begin(); + for (unsigned i = 0; isa(SF.CurInst); ++SF.CurInst, ++i) { + PHINode *PN = cast(SF.CurInst); + SetValue(PN, ResultValues[i], SF); + } +} + +//===----------------------------------------------------------------------===// +// Memory Instruction Implementations +//===----------------------------------------------------------------------===// + +void Interpreter::visitAllocationInst(AllocationInst &I) { + ExecutionContext &SF = ECStack.back(); + + const Type *Ty = I.getType()->getElementType(); // Type to be allocated + + // Get the number of elements being allocated by the array... + unsigned NumElements = + getOperandValue(I.getOperand(0), SF).IntVal.getZExtValue(); + + unsigned TypeSize = (size_t)TD.getTypeAllocSize(Ty); + + // Avoid malloc-ing zero bytes, use max()... + unsigned MemToAlloc = std::max(1U, NumElements * TypeSize); + + // Allocate enough memory to hold the type... + void *Memory = malloc(MemToAlloc); + + DOUT << "Allocated Type: " << *Ty << " (" << TypeSize << " bytes) x " + << NumElements << " (Total: " << MemToAlloc << ") at " + << uintptr_t(Memory) << '\n'; + + GenericValue Result = PTOGV(Memory); + assert(Result.PointerVal != 0 && "Null pointer returned by malloc!"); + SetValue(&I, Result, SF); + + if (I.getOpcode() == Instruction::Alloca) + ECStack.back().Allocas.add(Memory); +} + +void Interpreter::visitFreeInst(FreeInst &I) { + ExecutionContext &SF = ECStack.back(); + assert(isa(I.getOperand(0)->getType()) && "Freeing nonptr?"); + GenericValue Value = getOperandValue(I.getOperand(0), SF); + // TODO: Check to make sure memory is allocated + free(GVTOP(Value)); // Free memory +} + +// getElementOffset - The workhorse for getelementptr. +// +GenericValue Interpreter::executeGEPOperation(Value *Ptr, gep_type_iterator I, + gep_type_iterator E, + ExecutionContext &SF) { + assert(isa(Ptr->getType()) && + "Cannot getElementOffset of a nonpointer type!"); + + uint64_t Total = 0; + + for (; I != E; ++I) { + if (const StructType *STy = dyn_cast(*I)) { + const StructLayout *SLO = TD.getStructLayout(STy); + + const ConstantInt *CPU = cast(I.getOperand()); + unsigned Index = unsigned(CPU->getZExtValue()); + + Total += SLO->getElementOffset(Index); + } else { + const SequentialType *ST = cast(*I); + // Get the index number for the array... which must be long type... + GenericValue IdxGV = getOperandValue(I.getOperand(), SF); + + int64_t Idx; + unsigned BitWidth = + cast(I.getOperand()->getType())->getBitWidth(); + if (BitWidth == 32) + Idx = (int64_t)(int32_t)IdxGV.IntVal.getZExtValue(); + else { + assert(BitWidth == 64 && "Invalid index type for getelementptr"); + Idx = (int64_t)IdxGV.IntVal.getZExtValue(); + } + Total += TD.getTypeAllocSize(ST->getElementType())*Idx; + } + } + + GenericValue Result; + Result.PointerVal = ((char*)getOperandValue(Ptr, SF).PointerVal) + Total; + DOUT << "GEP Index " << Total << " bytes.\n"; + return Result; +} + +void Interpreter::visitGetElementPtrInst(GetElementPtrInst &I) { + ExecutionContext &SF = ECStack.back(); + SetValue(&I, TheEE->executeGEPOperation(I.getPointerOperand(), + gep_type_begin(I), gep_type_end(I), SF), SF); +} + +void Interpreter::visitLoadInst(LoadInst &I) { + ExecutionContext &SF = ECStack.back(); + GenericValue SRC = getOperandValue(I.getPointerOperand(), SF); + GenericValue *Ptr = (GenericValue*)GVTOP(SRC); + GenericValue Result; + LoadValueFromMemory(Result, Ptr, I.getType()); + SetValue(&I, Result, SF); + if (I.isVolatile() && PrintVolatile) + cerr << "Volatile load " << I; +} + +void Interpreter::visitStoreInst(StoreInst &I) { + ExecutionContext &SF = ECStack.back(); + GenericValue Val = getOperandValue(I.getOperand(0), SF); + GenericValue SRC = getOperandValue(I.getPointerOperand(), SF); + StoreValueToMemory(Val, (GenericValue *)GVTOP(SRC), + I.getOperand(0)->getType()); + if (I.isVolatile() && PrintVolatile) + cerr << "Volatile store: " << I; +} + +//===----------------------------------------------------------------------===// +// Miscellaneous Instruction Implementations +//===----------------------------------------------------------------------===// + +void Interpreter::visitCallSite(CallSite CS) { + ExecutionContext &SF = ECStack.back(); + + // Check to see if this is an intrinsic function call... + Function *F = CS.getCalledFunction(); + if (F && F->isDeclaration ()) + switch (F->getIntrinsicID()) { + case Intrinsic::not_intrinsic: + break; + case Intrinsic::vastart: { // va_start + GenericValue ArgIndex; + ArgIndex.UIntPairVal.first = ECStack.size() - 1; + ArgIndex.UIntPairVal.second = 0; + SetValue(CS.getInstruction(), ArgIndex, SF); + return; + } + case Intrinsic::vaend: // va_end is a noop for the interpreter + return; + case Intrinsic::vacopy: // va_copy: dest = src + SetValue(CS.getInstruction(), getOperandValue(*CS.arg_begin(), SF), SF); + return; + default: + // If it is an unknown intrinsic function, use the intrinsic lowering + // class to transform it into hopefully tasty LLVM code. + // + BasicBlock::iterator me(CS.getInstruction()); + BasicBlock *Parent = CS.getInstruction()->getParent(); + bool atBegin(Parent->begin() == me); + if (!atBegin) + --me; + IL->LowerIntrinsicCall(cast(CS.getInstruction())); + + // Restore the CurInst pointer to the first instruction newly inserted, if + // any. + if (atBegin) { + SF.CurInst = Parent->begin(); + } else { + SF.CurInst = me; + ++SF.CurInst; + } + return; + } + + + SF.Caller = CS; + std::vector ArgVals; + const unsigned NumArgs = SF.Caller.arg_size(); + ArgVals.reserve(NumArgs); + uint16_t pNum = 1; + for (CallSite::arg_iterator i = SF.Caller.arg_begin(), + e = SF.Caller.arg_end(); i != e; ++i, ++pNum) { + Value *V = *i; + ArgVals.push_back(getOperandValue(V, SF)); + // Promote all integral types whose size is < sizeof(i32) into i32. + // We do this by zero or sign extending the value as appropriate + // according to the parameter attributes + const Type *Ty = V->getType(); + if (Ty->isInteger() && (ArgVals.back().IntVal.getBitWidth() < 32)) { + if (CS.paramHasAttr(pNum, Attribute::ZExt)) + ArgVals.back().IntVal = ArgVals.back().IntVal.zext(32); + else if (CS.paramHasAttr(pNum, Attribute::SExt)) + ArgVals.back().IntVal = ArgVals.back().IntVal.sext(32); + } + } + + // To handle indirect calls, we must get the pointer value from the argument + // and treat it as a function pointer. + GenericValue SRC = getOperandValue(SF.Caller.getCalledValue(), SF); + callFunction((Function*)GVTOP(SRC), ArgVals); +} + +void Interpreter::visitShl(BinaryOperator &I) { + ExecutionContext &SF = ECStack.back(); + GenericValue Src1 = getOperandValue(I.getOperand(0), SF); + GenericValue Src2 = getOperandValue(I.getOperand(1), SF); + GenericValue Dest; + if (Src2.IntVal.getZExtValue() < Src1.IntVal.getBitWidth()) + Dest.IntVal = Src1.IntVal.shl(Src2.IntVal.getZExtValue()); + else + Dest.IntVal = Src1.IntVal; + + SetValue(&I, Dest, SF); +} + +void Interpreter::visitLShr(BinaryOperator &I) { + ExecutionContext &SF = ECStack.back(); + GenericValue Src1 = getOperandValue(I.getOperand(0), SF); + GenericValue Src2 = getOperandValue(I.getOperand(1), SF); + GenericValue Dest; + if (Src2.IntVal.getZExtValue() < Src1.IntVal.getBitWidth()) + Dest.IntVal = Src1.IntVal.lshr(Src2.IntVal.getZExtValue()); + else + Dest.IntVal = Src1.IntVal; + + SetValue(&I, Dest, SF); +} + +void Interpreter::visitAShr(BinaryOperator &I) { + ExecutionContext &SF = ECStack.back(); + GenericValue Src1 = getOperandValue(I.getOperand(0), SF); + GenericValue Src2 = getOperandValue(I.getOperand(1), SF); + GenericValue Dest; + if (Src2.IntVal.getZExtValue() < Src1.IntVal.getBitWidth()) + Dest.IntVal = Src1.IntVal.ashr(Src2.IntVal.getZExtValue()); + else + Dest.IntVal = Src1.IntVal; + + SetValue(&I, Dest, SF); +} + +GenericValue Interpreter::executeTruncInst(Value *SrcVal, const Type *DstTy, + ExecutionContext &SF) { + GenericValue Dest, Src = getOperandValue(SrcVal, SF); + const IntegerType *DITy = cast(DstTy); + unsigned DBitWidth = DITy->getBitWidth(); + Dest.IntVal = Src.IntVal.trunc(DBitWidth); + return Dest; +} + +GenericValue Interpreter::executeSExtInst(Value *SrcVal, const Type *DstTy, + ExecutionContext &SF) { + GenericValue Dest, Src = getOperandValue(SrcVal, SF); + const IntegerType *DITy = cast(DstTy); + unsigned DBitWidth = DITy->getBitWidth(); + Dest.IntVal = Src.IntVal.sext(DBitWidth); + return Dest; +} + +GenericValue Interpreter::executeZExtInst(Value *SrcVal, const Type *DstTy, + ExecutionContext &SF) { + GenericValue Dest, Src = getOperandValue(SrcVal, SF); + const IntegerType *DITy = cast(DstTy); + unsigned DBitWidth = DITy->getBitWidth(); + Dest.IntVal = Src.IntVal.zext(DBitWidth); + return Dest; +} + +GenericValue Interpreter::executeFPTruncInst(Value *SrcVal, const Type *DstTy, + ExecutionContext &SF) { + GenericValue Dest, Src = getOperandValue(SrcVal, SF); + assert(SrcVal->getType() == Type::DoubleTy && DstTy == Type::FloatTy && + "Invalid FPTrunc instruction"); + Dest.FloatVal = (float) Src.DoubleVal; + return Dest; +} + +GenericValue Interpreter::executeFPExtInst(Value *SrcVal, const Type *DstTy, + ExecutionContext &SF) { + GenericValue Dest, Src = getOperandValue(SrcVal, SF); + assert(SrcVal->getType() == Type::FloatTy && DstTy == Type::DoubleTy && + "Invalid FPTrunc instruction"); + Dest.DoubleVal = (double) Src.FloatVal; + return Dest; +} + +GenericValue Interpreter::executeFPToUIInst(Value *SrcVal, const Type *DstTy, + ExecutionContext &SF) { + const Type *SrcTy = SrcVal->getType(); + uint32_t DBitWidth = cast(DstTy)->getBitWidth(); + GenericValue Dest, Src = getOperandValue(SrcVal, SF); + assert(SrcTy->isFloatingPoint() && "Invalid FPToUI instruction"); + + if (SrcTy->getTypeID() == Type::FloatTyID) + Dest.IntVal = APIntOps::RoundFloatToAPInt(Src.FloatVal, DBitWidth); + else + Dest.IntVal = APIntOps::RoundDoubleToAPInt(Src.DoubleVal, DBitWidth); + return Dest; +} + +GenericValue Interpreter::executeFPToSIInst(Value *SrcVal, const Type *DstTy, + ExecutionContext &SF) { + const Type *SrcTy = SrcVal->getType(); + uint32_t DBitWidth = cast(DstTy)->getBitWidth(); + GenericValue Dest, Src = getOperandValue(SrcVal, SF); + assert(SrcTy->isFloatingPoint() && "Invalid FPToSI instruction"); + + if (SrcTy->getTypeID() == Type::FloatTyID) + Dest.IntVal = APIntOps::RoundFloatToAPInt(Src.FloatVal, DBitWidth); + else + Dest.IntVal = APIntOps::RoundDoubleToAPInt(Src.DoubleVal, DBitWidth); + return Dest; +} + +GenericValue Interpreter::executeUIToFPInst(Value *SrcVal, const Type *DstTy, + ExecutionContext &SF) { + GenericValue Dest, Src = getOperandValue(SrcVal, SF); + assert(DstTy->isFloatingPoint() && "Invalid UIToFP instruction"); + + if (DstTy->getTypeID() == Type::FloatTyID) + Dest.FloatVal = APIntOps::RoundAPIntToFloat(Src.IntVal); + else + Dest.DoubleVal = APIntOps::RoundAPIntToDouble(Src.IntVal); + return Dest; +} + +GenericValue Interpreter::executeSIToFPInst(Value *SrcVal, const Type *DstTy, + ExecutionContext &SF) { + GenericValue Dest, Src = getOperandValue(SrcVal, SF); + assert(DstTy->isFloatingPoint() && "Invalid SIToFP instruction"); + + if (DstTy->getTypeID() == Type::FloatTyID) + Dest.FloatVal = APIntOps::RoundSignedAPIntToFloat(Src.IntVal); + else + Dest.DoubleVal = APIntOps::RoundSignedAPIntToDouble(Src.IntVal); + return Dest; + +} + +GenericValue Interpreter::executePtrToIntInst(Value *SrcVal, const Type *DstTy, + ExecutionContext &SF) { + uint32_t DBitWidth = cast(DstTy)->getBitWidth(); + GenericValue Dest, Src = getOperandValue(SrcVal, SF); + assert(isa(SrcVal->getType()) && "Invalid PtrToInt instruction"); + + Dest.IntVal = APInt(DBitWidth, (intptr_t) Src.PointerVal); + return Dest; +} + +GenericValue Interpreter::executeIntToPtrInst(Value *SrcVal, const Type *DstTy, + ExecutionContext &SF) { + GenericValue Dest, Src = getOperandValue(SrcVal, SF); + assert(isa(DstTy) && "Invalid PtrToInt instruction"); + + uint32_t PtrSize = TD.getPointerSizeInBits(); + if (PtrSize != Src.IntVal.getBitWidth()) + Src.IntVal = Src.IntVal.zextOrTrunc(PtrSize); + + Dest.PointerVal = PointerTy(intptr_t(Src.IntVal.getZExtValue())); + return Dest; +} + +GenericValue Interpreter::executeBitCastInst(Value *SrcVal, const Type *DstTy, + ExecutionContext &SF) { + + const Type *SrcTy = SrcVal->getType(); + GenericValue Dest, Src = getOperandValue(SrcVal, SF); + if (isa(DstTy)) { + assert(isa(SrcTy) && "Invalid BitCast"); + Dest.PointerVal = Src.PointerVal; + } else if (DstTy->isInteger()) { + if (SrcTy == Type::FloatTy) { + Dest.IntVal.zext(sizeof(Src.FloatVal) * CHAR_BIT); + Dest.IntVal.floatToBits(Src.FloatVal); + } else if (SrcTy == Type::DoubleTy) { + Dest.IntVal.zext(sizeof(Src.DoubleVal) * CHAR_BIT); + Dest.IntVal.doubleToBits(Src.DoubleVal); + } else if (SrcTy->isInteger()) { + Dest.IntVal = Src.IntVal; + } else + assert(0 && "Invalid BitCast"); + } else if (DstTy == Type::FloatTy) { + if (SrcTy->isInteger()) + Dest.FloatVal = Src.IntVal.bitsToFloat(); + else + Dest.FloatVal = Src.FloatVal; + } else if (DstTy == Type::DoubleTy) { + if (SrcTy->isInteger()) + Dest.DoubleVal = Src.IntVal.bitsToDouble(); + else + Dest.DoubleVal = Src.DoubleVal; + } else + assert(0 && "Invalid Bitcast"); + + return Dest; +} + +void Interpreter::visitTruncInst(TruncInst &I) { + ExecutionContext &SF = ECStack.back(); + SetValue(&I, executeTruncInst(I.getOperand(0), I.getType(), SF), SF); +} + +void Interpreter::visitSExtInst(SExtInst &I) { + ExecutionContext &SF = ECStack.back(); + SetValue(&I, executeSExtInst(I.getOperand(0), I.getType(), SF), SF); +} + +void Interpreter::visitZExtInst(ZExtInst &I) { + ExecutionContext &SF = ECStack.back(); + SetValue(&I, executeZExtInst(I.getOperand(0), I.getType(), SF), SF); +} + +void Interpreter::visitFPTruncInst(FPTruncInst &I) { + ExecutionContext &SF = ECStack.back(); + SetValue(&I, executeFPTruncInst(I.getOperand(0), I.getType(), SF), SF); +} + +void Interpreter::visitFPExtInst(FPExtInst &I) { + ExecutionContext &SF = ECStack.back(); + SetValue(&I, executeFPExtInst(I.getOperand(0), I.getType(), SF), SF); +} + +void Interpreter::visitUIToFPInst(UIToFPInst &I) { + ExecutionContext &SF = ECStack.back(); + SetValue(&I, executeUIToFPInst(I.getOperand(0), I.getType(), SF), SF); +} + +void Interpreter::visitSIToFPInst(SIToFPInst &I) { + ExecutionContext &SF = ECStack.back(); + SetValue(&I, executeSIToFPInst(I.getOperand(0), I.getType(), SF), SF); +} + +void Interpreter::visitFPToUIInst(FPToUIInst &I) { + ExecutionContext &SF = ECStack.back(); + SetValue(&I, executeFPToUIInst(I.getOperand(0), I.getType(), SF), SF); +} + +void Interpreter::visitFPToSIInst(FPToSIInst &I) { + ExecutionContext &SF = ECStack.back(); + SetValue(&I, executeFPToSIInst(I.getOperand(0), I.getType(), SF), SF); +} + +void Interpreter::visitPtrToIntInst(PtrToIntInst &I) { + ExecutionContext &SF = ECStack.back(); + SetValue(&I, executePtrToIntInst(I.getOperand(0), I.getType(), SF), SF); +} + +void Interpreter::visitIntToPtrInst(IntToPtrInst &I) { + ExecutionContext &SF = ECStack.back(); + SetValue(&I, executeIntToPtrInst(I.getOperand(0), I.getType(), SF), SF); +} + +void Interpreter::visitBitCastInst(BitCastInst &I) { + ExecutionContext &SF = ECStack.back(); + SetValue(&I, executeBitCastInst(I.getOperand(0), I.getType(), SF), SF); +} + +#define IMPLEMENT_VAARG(TY) \ + case Type::TY##TyID: Dest.TY##Val = Src.TY##Val; break + +void Interpreter::visitVAArgInst(VAArgInst &I) { + ExecutionContext &SF = ECStack.back(); + + // Get the incoming valist parameter. LLI treats the valist as a + // (ec-stack-depth var-arg-index) pair. + GenericValue VAList = getOperandValue(I.getOperand(0), SF); + GenericValue Dest; + GenericValue Src = ECStack[VAList.UIntPairVal.first] + .VarArgs[VAList.UIntPairVal.second]; + const Type *Ty = I.getType(); + switch (Ty->getTypeID()) { + case Type::IntegerTyID: Dest.IntVal = Src.IntVal; + IMPLEMENT_VAARG(Pointer); + IMPLEMENT_VAARG(Float); + IMPLEMENT_VAARG(Double); + default: + cerr << "Unhandled dest type for vaarg instruction: " << *Ty << "\n"; + abort(); + } + + // Set the Value of this Instruction. + SetValue(&I, Dest, SF); + + // Move the pointer to the next vararg. + ++VAList.UIntPairVal.second; +} + +GenericValue Interpreter::getConstantExprValue (ConstantExpr *CE, + ExecutionContext &SF) { + switch (CE->getOpcode()) { + case Instruction::Trunc: + return executeTruncInst(CE->getOperand(0), CE->getType(), SF); + case Instruction::ZExt: + return executeZExtInst(CE->getOperand(0), CE->getType(), SF); + case Instruction::SExt: + return executeSExtInst(CE->getOperand(0), CE->getType(), SF); + case Instruction::FPTrunc: + return executeFPTruncInst(CE->getOperand(0), CE->getType(), SF); + case Instruction::FPExt: + return executeFPExtInst(CE->getOperand(0), CE->getType(), SF); + case Instruction::UIToFP: + return executeUIToFPInst(CE->getOperand(0), CE->getType(), SF); + case Instruction::SIToFP: + return executeSIToFPInst(CE->getOperand(0), CE->getType(), SF); + case Instruction::FPToUI: + return executeFPToUIInst(CE->getOperand(0), CE->getType(), SF); + case Instruction::FPToSI: + return executeFPToSIInst(CE->getOperand(0), CE->getType(), SF); + case Instruction::PtrToInt: + return executePtrToIntInst(CE->getOperand(0), CE->getType(), SF); + case Instruction::IntToPtr: + return executeIntToPtrInst(CE->getOperand(0), CE->getType(), SF); + case Instruction::BitCast: + return executeBitCastInst(CE->getOperand(0), CE->getType(), SF); + case Instruction::GetElementPtr: + return executeGEPOperation(CE->getOperand(0), gep_type_begin(CE), + gep_type_end(CE), SF); + case Instruction::FCmp: + case Instruction::ICmp: + return executeCmpInst(CE->getPredicate(), + getOperandValue(CE->getOperand(0), SF), + getOperandValue(CE->getOperand(1), SF), + CE->getOperand(0)->getType()); + case Instruction::Select: + return executeSelectInst(getOperandValue(CE->getOperand(0), SF), + getOperandValue(CE->getOperand(1), SF), + getOperandValue(CE->getOperand(2), SF)); + default : + break; + } + + // The cases below here require a GenericValue parameter for the result + // so we initialize one, compute it and then return it. + GenericValue Op0 = getOperandValue(CE->getOperand(0), SF); + GenericValue Op1 = getOperandValue(CE->getOperand(1), SF); + GenericValue Dest; + const Type * Ty = CE->getOperand(0)->getType(); + switch (CE->getOpcode()) { + case Instruction::Add: executeAddInst (Dest, Op0, Op1, Ty); break; + case Instruction::Sub: executeSubInst (Dest, Op0, Op1, Ty); break; + case Instruction::Mul: executeMulInst (Dest, Op0, Op1, Ty); break; + case Instruction::FDiv: executeFDivInst(Dest, Op0, Op1, Ty); break; + case Instruction::FRem: executeFRemInst(Dest, Op0, Op1, Ty); break; + case Instruction::SDiv: Dest.IntVal = Op0.IntVal.sdiv(Op1.IntVal); break; + case Instruction::UDiv: Dest.IntVal = Op0.IntVal.udiv(Op1.IntVal); break; + case Instruction::URem: Dest.IntVal = Op0.IntVal.urem(Op1.IntVal); break; + case Instruction::SRem: Dest.IntVal = Op0.IntVal.srem(Op1.IntVal); break; + case Instruction::And: Dest.IntVal = Op0.IntVal.And(Op1.IntVal); break; + case Instruction::Or: Dest.IntVal = Op0.IntVal.Or(Op1.IntVal); break; + case Instruction::Xor: Dest.IntVal = Op0.IntVal.Xor(Op1.IntVal); break; + case Instruction::Shl: + Dest.IntVal = Op0.IntVal.shl(Op1.IntVal.getZExtValue()); + break; + case Instruction::LShr: + Dest.IntVal = Op0.IntVal.lshr(Op1.IntVal.getZExtValue()); + break; + case Instruction::AShr: + Dest.IntVal = Op0.IntVal.ashr(Op1.IntVal.getZExtValue()); + break; + default: + cerr << "Unhandled ConstantExpr: " << *CE << "\n"; + abort(); + return GenericValue(); + } + return Dest; +} + +GenericValue Interpreter::getOperandValue(Value *V, ExecutionContext &SF) { + if (ConstantExpr *CE = dyn_cast(V)) { + return getConstantExprValue(CE, SF); + } else if (Constant *CPV = dyn_cast(V)) { + return getConstantValue(CPV); + } else if (GlobalValue *GV = dyn_cast(V)) { + return PTOGV(getPointerToGlobal(GV)); + } else { + return SF.Values[V]; + } +} + +//===----------------------------------------------------------------------===// +// Dispatch and Execution Code +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// callFunction - Execute the specified function... +// +void Interpreter::callFunction(Function *F, + const std::vector &ArgVals) { + assert((ECStack.empty() || ECStack.back().Caller.getInstruction() == 0 || + ECStack.back().Caller.arg_size() == ArgVals.size()) && + "Incorrect number of arguments passed into function call!"); + // Make a new stack frame... and fill it in. + ECStack.push_back(ExecutionContext()); + ExecutionContext &StackFrame = ECStack.back(); + StackFrame.CurFunction = F; + + // Special handling for external functions. + if (F->isDeclaration()) { + GenericValue Result = callExternalFunction (F, ArgVals); + // Simulate a 'ret' instruction of the appropriate type. + popStackAndReturnValueToCaller (F->getReturnType (), Result); + return; + } + + // Get pointers to first LLVM BB & Instruction in function. + StackFrame.CurBB = F->begin(); + StackFrame.CurInst = StackFrame.CurBB->begin(); + + // Run through the function arguments and initialize their values... + assert((ArgVals.size() == F->arg_size() || + (ArgVals.size() > F->arg_size() && F->getFunctionType()->isVarArg()))&& + "Invalid number of values passed to function invocation!"); + + // Handle non-varargs arguments... + unsigned i = 0; + for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); + AI != E; ++AI, ++i) + SetValue(AI, ArgVals[i], StackFrame); + + // Handle varargs arguments... + StackFrame.VarArgs.assign(ArgVals.begin()+i, ArgVals.end()); +} + + +void Interpreter::run() { + while (!ECStack.empty()) { + // Interpret a single instruction & increment the "PC". + ExecutionContext &SF = ECStack.back(); // Current stack frame + Instruction &I = *SF.CurInst++; // Increment before execute + + // Track the number of dynamic instructions executed. + ++NumDynamicInsts; + + DOUT << "About to interpret: " << I; + visit(I); // Dispatch to one of the visit* methods... +#if 0 + // This is not safe, as visiting the instruction could lower it and free I. +#ifndef NDEBUG + if (!isa(I) && !isa(I) && + I.getType() != Type::VoidTy) { + DOUT << " --> "; + const GenericValue &Val = SF.Values[&I]; + switch (I.getType()->getTypeID()) { + default: assert(0 && "Invalid GenericValue Type"); + case Type::VoidTyID: DOUT << "void"; break; + case Type::FloatTyID: DOUT << "float " << Val.FloatVal; break; + case Type::DoubleTyID: DOUT << "double " << Val.DoubleVal; break; + case Type::PointerTyID: DOUT << "void* " << intptr_t(Val.PointerVal); + break; + case Type::IntegerTyID: + DOUT << "i" << Val.IntVal.getBitWidth() << " " + << Val.IntVal.toStringUnsigned(10) + << " (0x" << Val.IntVal.toStringUnsigned(16) << ")\n"; + break; + } + } +#endif +#endif + } +} diff --git a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp new file mode 100644 index 000000000000..160f1ba9f6c5 --- /dev/null +++ b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp @@ -0,0 +1,542 @@ +//===-- ExternalFunctions.cpp - Implement External Functions --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains both code to deal with invoking "external" functions, but +// also contains code that implements "exported" external functions. +// +// There are currently two mechanisms for handling external functions in the +// Interpreter. The first is to implement lle_* wrapper functions that are +// specific to well-known library functions which manually translate the +// arguments from GenericValues and make the call. If such a wrapper does +// not exist, and libffi is available, then the Interpreter will attempt to +// invoke the function using libffi, after finding its address. +// +//===----------------------------------------------------------------------===// + +#include "Interpreter.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Module.h" +#include "llvm/Config/config.h" // Detect libffi +#include "llvm/Support/Streams.h" +#include "llvm/System/DynamicLibrary.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Support/ManagedStatic.h" +#include +#include +#include +#include +#include + +#ifdef HAVE_FFI_CALL +#ifdef HAVE_FFI_H +#include +#define USE_LIBFFI +#elif HAVE_FFI_FFI_H +#include +#define USE_LIBFFI +#endif +#endif + +using namespace llvm; + +typedef GenericValue (*ExFunc)(const FunctionType *, + const std::vector &); +static ManagedStatic > ExportedFunctions; +static std::map FuncNames; + +#ifdef USE_LIBFFI +typedef void (*RawFunc)(void); +static ManagedStatic > RawFunctions; +#endif + +static Interpreter *TheInterpreter; + +static char getTypeID(const Type *Ty) { + switch (Ty->getTypeID()) { + case Type::VoidTyID: return 'V'; + case Type::IntegerTyID: + switch (cast(Ty)->getBitWidth()) { + case 1: return 'o'; + case 8: return 'B'; + case 16: return 'S'; + case 32: return 'I'; + case 64: return 'L'; + default: return 'N'; + } + case Type::FloatTyID: return 'F'; + case Type::DoubleTyID: return 'D'; + case Type::PointerTyID: return 'P'; + case Type::FunctionTyID:return 'M'; + case Type::StructTyID: return 'T'; + case Type::ArrayTyID: return 'A'; + case Type::OpaqueTyID: return 'O'; + default: return 'U'; + } +} + +// Try to find address of external function given a Function object. +// Please note, that interpreter doesn't know how to assemble a +// real call in general case (this is JIT job), that's why it assumes, +// that all external functions has the same (and pretty "general") signature. +// The typical example of such functions are "lle_X_" ones. +static ExFunc lookupFunction(const Function *F) { + // Function not found, look it up... start by figuring out what the + // composite function name should be. + std::string ExtName = "lle_"; + const FunctionType *FT = F->getFunctionType(); + for (unsigned i = 0, e = FT->getNumContainedTypes(); i != e; ++i) + ExtName += getTypeID(FT->getContainedType(i)); + ExtName += "_" + F->getName(); + + ExFunc FnPtr = FuncNames[ExtName]; + if (FnPtr == 0) + FnPtr = FuncNames["lle_X_"+F->getName()]; + if (FnPtr == 0) // Try calling a generic function... if it exists... + FnPtr = (ExFunc)(intptr_t)sys::DynamicLibrary::SearchForAddressOfSymbol( + ("lle_X_"+F->getName()).c_str()); + if (FnPtr != 0) + ExportedFunctions->insert(std::make_pair(F, FnPtr)); // Cache for later + return FnPtr; +} + +#ifdef USE_LIBFFI +static ffi_type *ffiTypeFor(const Type *Ty) { + switch (Ty->getTypeID()) { + case Type::VoidTyID: return &ffi_type_void; + case Type::IntegerTyID: + switch (cast(Ty)->getBitWidth()) { + case 8: return &ffi_type_sint8; + case 16: return &ffi_type_sint16; + case 32: return &ffi_type_sint32; + case 64: return &ffi_type_sint64; + } + case Type::FloatTyID: return &ffi_type_float; + case Type::DoubleTyID: return &ffi_type_double; + case Type::PointerTyID: return &ffi_type_pointer; + default: break; + } + // TODO: Support other types such as StructTyID, ArrayTyID, OpaqueTyID, etc. + cerr << "Type could not be mapped for use with libffi.\n"; + abort(); + return NULL; +} + +static void *ffiValueFor(const Type *Ty, const GenericValue &AV, + void *ArgDataPtr) { + switch (Ty->getTypeID()) { + case Type::IntegerTyID: + switch (cast(Ty)->getBitWidth()) { + case 8: { + int8_t *I8Ptr = (int8_t *) ArgDataPtr; + *I8Ptr = (int8_t) AV.IntVal.getZExtValue(); + return ArgDataPtr; + } + case 16: { + int16_t *I16Ptr = (int16_t *) ArgDataPtr; + *I16Ptr = (int16_t) AV.IntVal.getZExtValue(); + return ArgDataPtr; + } + case 32: { + int32_t *I32Ptr = (int32_t *) ArgDataPtr; + *I32Ptr = (int32_t) AV.IntVal.getZExtValue(); + return ArgDataPtr; + } + case 64: { + int64_t *I64Ptr = (int64_t *) ArgDataPtr; + *I64Ptr = (int64_t) AV.IntVal.getZExtValue(); + return ArgDataPtr; + } + } + case Type::FloatTyID: { + float *FloatPtr = (float *) ArgDataPtr; + *FloatPtr = AV.DoubleVal; + return ArgDataPtr; + } + case Type::DoubleTyID: { + double *DoublePtr = (double *) ArgDataPtr; + *DoublePtr = AV.DoubleVal; + return ArgDataPtr; + } + case Type::PointerTyID: { + void **PtrPtr = (void **) ArgDataPtr; + *PtrPtr = GVTOP(AV); + return ArgDataPtr; + } + default: break; + } + // TODO: Support other types such as StructTyID, ArrayTyID, OpaqueTyID, etc. + cerr << "Type value could not be mapped for use with libffi.\n"; + abort(); + return NULL; +} + +static bool ffiInvoke(RawFunc Fn, Function *F, + const std::vector &ArgVals, + const TargetData *TD, GenericValue &Result) { + ffi_cif cif; + const FunctionType *FTy = F->getFunctionType(); + const unsigned NumArgs = F->arg_size(); + + // TODO: We don't have type information about the remaining arguments, because + // this information is never passed into ExecutionEngine::runFunction(). + if (ArgVals.size() > NumArgs && F->isVarArg()) { + cerr << "Calling external var arg function '" << F->getName() + << "' is not supported by the Interpreter.\n"; + abort(); + } + + unsigned ArgBytes = 0; + + std::vector args(NumArgs); + for (Function::const_arg_iterator A = F->arg_begin(), E = F->arg_end(); + A != E; ++A) { + const unsigned ArgNo = A->getArgNo(); + const Type *ArgTy = FTy->getParamType(ArgNo); + args[ArgNo] = ffiTypeFor(ArgTy); + ArgBytes += TD->getTypeStoreSize(ArgTy); + } + + uint8_t *ArgData = (uint8_t*) alloca(ArgBytes); + uint8_t *ArgDataPtr = ArgData; + std::vector values(NumArgs); + for (Function::const_arg_iterator A = F->arg_begin(), E = F->arg_end(); + A != E; ++A) { + const unsigned ArgNo = A->getArgNo(); + const Type *ArgTy = FTy->getParamType(ArgNo); + values[ArgNo] = ffiValueFor(ArgTy, ArgVals[ArgNo], ArgDataPtr); + ArgDataPtr += TD->getTypeStoreSize(ArgTy); + } + + const Type *RetTy = FTy->getReturnType(); + ffi_type *rtype = ffiTypeFor(RetTy); + + if (ffi_prep_cif(&cif, FFI_DEFAULT_ABI, NumArgs, rtype, &args[0]) == FFI_OK) { + void *ret = NULL; + if (RetTy->getTypeID() != Type::VoidTyID) + ret = alloca(TD->getTypeStoreSize(RetTy)); + ffi_call(&cif, Fn, ret, &values[0]); + switch (RetTy->getTypeID()) { + case Type::IntegerTyID: + switch (cast(RetTy)->getBitWidth()) { + case 8: Result.IntVal = APInt(8 , *(int8_t *) ret); break; + case 16: Result.IntVal = APInt(16, *(int16_t*) ret); break; + case 32: Result.IntVal = APInt(32, *(int32_t*) ret); break; + case 64: Result.IntVal = APInt(64, *(int64_t*) ret); break; + } + break; + case Type::FloatTyID: Result.FloatVal = *(float *) ret; break; + case Type::DoubleTyID: Result.DoubleVal = *(double*) ret; break; + case Type::PointerTyID: Result.PointerVal = *(void **) ret; break; + default: break; + } + return true; + } + + return false; +} +#endif // USE_LIBFFI + +GenericValue Interpreter::callExternalFunction(Function *F, + const std::vector &ArgVals) { + TheInterpreter = this; + + // Do a lookup to see if the function is in our cache... this should just be a + // deferred annotation! + std::map::iterator FI = ExportedFunctions->find(F); + if (ExFunc Fn = (FI == ExportedFunctions->end()) ? lookupFunction(F) + : FI->second) + return Fn(F->getFunctionType(), ArgVals); + +#ifdef USE_LIBFFI + std::map::iterator RF = RawFunctions->find(F); + RawFunc RawFn; + if (RF == RawFunctions->end()) { + RawFn = (RawFunc)(intptr_t) + sys::DynamicLibrary::SearchForAddressOfSymbol(F->getName()); + if (RawFn != 0) + RawFunctions->insert(std::make_pair(F, RawFn)); // Cache for later + } else { + RawFn = RF->second; + } + + GenericValue Result; + if (RawFn != 0 && ffiInvoke(RawFn, F, ArgVals, getTargetData(), Result)) + return Result; +#endif // USE_LIBFFI + + cerr << "Tried to execute an unknown external function: " + << F->getType()->getDescription() << " " << F->getName() << "\n"; + if (F->getName() != "__main") + abort(); + return GenericValue(); +} + + +//===----------------------------------------------------------------------===// +// Functions "exported" to the running application... +// +extern "C" { // Don't add C++ manglings to llvm mangling :) + +// void atexit(Function*) +GenericValue lle_X_atexit(const FunctionType *FT, + const std::vector &Args) { + assert(Args.size() == 1); + TheInterpreter->addAtExitHandler((Function*)GVTOP(Args[0])); + GenericValue GV; + GV.IntVal = 0; + return GV; +} + +// void exit(int) +GenericValue lle_X_exit(const FunctionType *FT, + const std::vector &Args) { + TheInterpreter->exitCalled(Args[0]); + return GenericValue(); +} + +// void abort(void) +GenericValue lle_X_abort(const FunctionType *FT, + const std::vector &Args) { + raise (SIGABRT); + return GenericValue(); +} + +// int sprintf(char *, const char *, ...) - a very rough implementation to make +// output useful. +GenericValue lle_X_sprintf(const FunctionType *FT, + const std::vector &Args) { + char *OutputBuffer = (char *)GVTOP(Args[0]); + const char *FmtStr = (const char *)GVTOP(Args[1]); + unsigned ArgNo = 2; + + // printf should return # chars printed. This is completely incorrect, but + // close enough for now. + GenericValue GV; + GV.IntVal = APInt(32, strlen(FmtStr)); + while (1) { + switch (*FmtStr) { + case 0: return GV; // Null terminator... + default: // Normal nonspecial character + sprintf(OutputBuffer++, "%c", *FmtStr++); + break; + case '\\': { // Handle escape codes + sprintf(OutputBuffer, "%c%c", *FmtStr, *(FmtStr+1)); + FmtStr += 2; OutputBuffer += 2; + break; + } + case '%': { // Handle format specifiers + char FmtBuf[100] = "", Buffer[1000] = ""; + char *FB = FmtBuf; + *FB++ = *FmtStr++; + char Last = *FB++ = *FmtStr++; + unsigned HowLong = 0; + while (Last != 'c' && Last != 'd' && Last != 'i' && Last != 'u' && + Last != 'o' && Last != 'x' && Last != 'X' && Last != 'e' && + Last != 'E' && Last != 'g' && Last != 'G' && Last != 'f' && + Last != 'p' && Last != 's' && Last != '%') { + if (Last == 'l' || Last == 'L') HowLong++; // Keep track of l's + Last = *FB++ = *FmtStr++; + } + *FB = 0; + + switch (Last) { + case '%': + strcpy(Buffer, "%"); break; + case 'c': + sprintf(Buffer, FmtBuf, uint32_t(Args[ArgNo++].IntVal.getZExtValue())); + break; + case 'd': case 'i': + case 'u': case 'o': + case 'x': case 'X': + if (HowLong >= 1) { + if (HowLong == 1 && + TheInterpreter->getTargetData()->getPointerSizeInBits() == 64 && + sizeof(long) < sizeof(int64_t)) { + // Make sure we use %lld with a 64 bit argument because we might be + // compiling LLI on a 32 bit compiler. + unsigned Size = strlen(FmtBuf); + FmtBuf[Size] = FmtBuf[Size-1]; + FmtBuf[Size+1] = 0; + FmtBuf[Size-1] = 'l'; + } + sprintf(Buffer, FmtBuf, Args[ArgNo++].IntVal.getZExtValue()); + } else + sprintf(Buffer, FmtBuf,uint32_t(Args[ArgNo++].IntVal.getZExtValue())); + break; + case 'e': case 'E': case 'g': case 'G': case 'f': + sprintf(Buffer, FmtBuf, Args[ArgNo++].DoubleVal); break; + case 'p': + sprintf(Buffer, FmtBuf, (void*)GVTOP(Args[ArgNo++])); break; + case 's': + sprintf(Buffer, FmtBuf, (char*)GVTOP(Args[ArgNo++])); break; + default: cerr << ""; + ArgNo++; break; + } + strcpy(OutputBuffer, Buffer); + OutputBuffer += strlen(Buffer); + } + break; + } + } + return GV; +} + +// int printf(const char *, ...) - a very rough implementation to make output +// useful. +GenericValue lle_X_printf(const FunctionType *FT, + const std::vector &Args) { + char Buffer[10000]; + std::vector NewArgs; + NewArgs.push_back(PTOGV((void*)&Buffer[0])); + NewArgs.insert(NewArgs.end(), Args.begin(), Args.end()); + GenericValue GV = lle_X_sprintf(FT, NewArgs); + cout << Buffer; + return GV; +} + +static void ByteswapSCANFResults(const char *Fmt, void *Arg0, void *Arg1, + void *Arg2, void *Arg3, void *Arg4, void *Arg5, + void *Arg6, void *Arg7, void *Arg8) { + void *Args[] = { Arg0, Arg1, Arg2, Arg3, Arg4, Arg5, Arg6, Arg7, Arg8, 0 }; + + // Loop over the format string, munging read values as appropriate (performs + // byteswaps as necessary). + unsigned ArgNo = 0; + while (*Fmt) { + if (*Fmt++ == '%') { + // Read any flag characters that may be present... + bool Suppress = false; + bool Half = false; + bool Long = false; + bool LongLong = false; // long long or long double + + while (1) { + switch (*Fmt++) { + case '*': Suppress = true; break; + case 'a': /*Allocate = true;*/ break; // We don't need to track this + case 'h': Half = true; break; + case 'l': Long = true; break; + case 'q': + case 'L': LongLong = true; break; + default: + if (Fmt[-1] > '9' || Fmt[-1] < '0') // Ignore field width specs + goto Out; + } + } + Out: + + // Read the conversion character + if (!Suppress && Fmt[-1] != '%') { // Nothing to do? + unsigned Size = 0; + const Type *Ty = 0; + + switch (Fmt[-1]) { + case 'i': case 'o': case 'u': case 'x': case 'X': case 'n': case 'p': + case 'd': + if (Long || LongLong) { + Size = 8; Ty = Type::Int64Ty; + } else if (Half) { + Size = 4; Ty = Type::Int16Ty; + } else { + Size = 4; Ty = Type::Int32Ty; + } + break; + + case 'e': case 'g': case 'E': + case 'f': + if (Long || LongLong) { + Size = 8; Ty = Type::DoubleTy; + } else { + Size = 4; Ty = Type::FloatTy; + } + break; + + case 's': case 'c': case '[': // No byteswap needed + Size = 1; + Ty = Type::Int8Ty; + break; + + default: break; + } + + if (Size) { + GenericValue GV; + void *Arg = Args[ArgNo++]; + memcpy(&GV, Arg, Size); + TheInterpreter->StoreValueToMemory(GV, (GenericValue*)Arg, Ty); + } + } + } + } +} + +// int sscanf(const char *format, ...); +GenericValue lle_X_sscanf(const FunctionType *FT, + const std::vector &args) { + assert(args.size() < 10 && "Only handle up to 10 args to sscanf right now!"); + + char *Args[10]; + for (unsigned i = 0; i < args.size(); ++i) + Args[i] = (char*)GVTOP(args[i]); + + GenericValue GV; + GV.IntVal = APInt(32, sscanf(Args[0], Args[1], Args[2], Args[3], Args[4], + Args[5], Args[6], Args[7], Args[8], Args[9])); + ByteswapSCANFResults(Args[1], Args[2], Args[3], Args[4], + Args[5], Args[6], Args[7], Args[8], Args[9], 0); + return GV; +} + +// int scanf(const char *format, ...); +GenericValue lle_X_scanf(const FunctionType *FT, + const std::vector &args) { + assert(args.size() < 10 && "Only handle up to 10 args to scanf right now!"); + + char *Args[10]; + for (unsigned i = 0; i < args.size(); ++i) + Args[i] = (char*)GVTOP(args[i]); + + GenericValue GV; + GV.IntVal = APInt(32, scanf( Args[0], Args[1], Args[2], Args[3], Args[4], + Args[5], Args[6], Args[7], Args[8], Args[9])); + ByteswapSCANFResults(Args[0], Args[1], Args[2], Args[3], Args[4], + Args[5], Args[6], Args[7], Args[8], Args[9]); + return GV; +} + +// int fprintf(FILE *, const char *, ...) - a very rough implementation to make +// output useful. +GenericValue lle_X_fprintf(const FunctionType *FT, + const std::vector &Args) { + assert(Args.size() >= 2); + char Buffer[10000]; + std::vector NewArgs; + NewArgs.push_back(PTOGV(Buffer)); + NewArgs.insert(NewArgs.end(), Args.begin()+1, Args.end()); + GenericValue GV = lle_X_sprintf(FT, NewArgs); + + fputs(Buffer, (FILE *) GVTOP(Args[0])); + return GV; +} + +} // End extern "C" + + +void Interpreter::initializeExternalFunctions() { + FuncNames["lle_X_atexit"] = lle_X_atexit; + FuncNames["lle_X_exit"] = lle_X_exit; + FuncNames["lle_X_abort"] = lle_X_abort; + + FuncNames["lle_X_printf"] = lle_X_printf; + FuncNames["lle_X_sprintf"] = lle_X_sprintf; + FuncNames["lle_X_sscanf"] = lle_X_sscanf; + FuncNames["lle_X_scanf"] = lle_X_scanf; + FuncNames["lle_X_fprintf"] = lle_X_fprintf; +} + diff --git a/lib/ExecutionEngine/Interpreter/Interpreter.cpp b/lib/ExecutionEngine/Interpreter/Interpreter.cpp new file mode 100644 index 000000000000..ded65d546701 --- /dev/null +++ b/lib/ExecutionEngine/Interpreter/Interpreter.cpp @@ -0,0 +1,104 @@ +//===- Interpreter.cpp - Top-Level LLVM Interpreter Implementation --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the top-level functionality for the LLVM interpreter. +// This interpreter is designed to be a very simple, portable, inefficient +// interpreter. +// +//===----------------------------------------------------------------------===// + +#include "Interpreter.h" +#include "llvm/CodeGen/IntrinsicLowering.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Module.h" +#include "llvm/ModuleProvider.h" +#include +using namespace llvm; + +namespace { + +static struct RegisterInterp { + RegisterInterp() { Interpreter::Register(); } +} InterpRegistrator; + +} + +namespace llvm { + void LinkInInterpreter() { + } +} + +/// create - Create a new interpreter object. This can never fail. +/// +ExecutionEngine *Interpreter::create(ModuleProvider *MP, std::string* ErrStr, + CodeGenOpt::Level OptLevel /*unused*/) { + // Tell this ModuleProvide to materialize and release the module + if (!MP->materializeModule(ErrStr)) + // We got an error, just return 0 + return 0; + + return new Interpreter(MP); +} + +//===----------------------------------------------------------------------===// +// Interpreter ctor - Initialize stuff +// +Interpreter::Interpreter(ModuleProvider *M) + : ExecutionEngine(M), TD(M->getModule()) { + + memset(&ExitValue.Untyped, 0, sizeof(ExitValue.Untyped)); + setTargetData(&TD); + // Initialize the "backend" + initializeExecutionEngine(); + initializeExternalFunctions(); + emitGlobals(); + + IL = new IntrinsicLowering(TD); +} + +Interpreter::~Interpreter() { + delete IL; +} + +void Interpreter::runAtExitHandlers () { + while (!AtExitHandlers.empty()) { + callFunction(AtExitHandlers.back(), std::vector()); + AtExitHandlers.pop_back(); + run(); + } +} + +/// run - Start execution with the specified function and arguments. +/// +GenericValue +Interpreter::runFunction(Function *F, + const std::vector &ArgValues) { + assert (F && "Function *F was null at entry to run()"); + + // Try extra hard not to pass extra args to a function that isn't + // expecting them. C programmers frequently bend the rules and + // declare main() with fewer parameters than it actually gets + // passed, and the interpreter barfs if you pass a function more + // parameters than it is declared to take. This does not attempt to + // take into account gratuitous differences in declared types, + // though. + std::vector ActualArgs; + const unsigned ArgCount = F->getFunctionType()->getNumParams(); + for (unsigned i = 0; i < ArgCount; ++i) + ActualArgs.push_back(ArgValues[i]); + + // Set up the function call. + callFunction(F, ActualArgs); + + // Start executing the function. + run(); + + return ExitValue; +} + diff --git a/lib/ExecutionEngine/Interpreter/Interpreter.h b/lib/ExecutionEngine/Interpreter/Interpreter.h new file mode 100644 index 000000000000..8a285ecb82c0 --- /dev/null +++ b/lib/ExecutionEngine/Interpreter/Interpreter.h @@ -0,0 +1,241 @@ +//===-- Interpreter.h ------------------------------------------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This header file defines the interpreter structure +// +//===----------------------------------------------------------------------===// + +#ifndef LLI_INTERPRETER_H +#define LLI_INTERPRETER_H + +#include "llvm/Function.h" +#include "llvm/ExecutionEngine/ExecutionEngine.h" +#include "llvm/ExecutionEngine/GenericValue.h" +#include "llvm/Support/InstVisitor.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Support/DataTypes.h" + +namespace llvm { + +class IntrinsicLowering; +struct FunctionInfo; +template class generic_gep_type_iterator; +class ConstantExpr; +typedef generic_gep_type_iterator gep_type_iterator; + + +// AllocaHolder - Object to track all of the blocks of memory allocated by +// alloca. When the function returns, this object is popped off the execution +// stack, which causes the dtor to be run, which frees all the alloca'd memory. +// +class AllocaHolder { + friend class AllocaHolderHandle; + std::vector Allocations; + unsigned RefCnt; +public: + AllocaHolder() : RefCnt(0) {} + void add(void *mem) { Allocations.push_back(mem); } + ~AllocaHolder() { + for (unsigned i = 0; i < Allocations.size(); ++i) + free(Allocations[i]); + } +}; + +// AllocaHolderHandle gives AllocaHolder value semantics so we can stick it into +// a vector... +// +class AllocaHolderHandle { + AllocaHolder *H; +public: + AllocaHolderHandle() : H(new AllocaHolder()) { H->RefCnt++; } + AllocaHolderHandle(const AllocaHolderHandle &AH) : H(AH.H) { H->RefCnt++; } + ~AllocaHolderHandle() { if (--H->RefCnt == 0) delete H; } + + void add(void *mem) { H->add(mem); } +}; + +typedef std::vector ValuePlaneTy; + +// ExecutionContext struct - This struct represents one stack frame currently +// executing. +// +struct ExecutionContext { + Function *CurFunction;// The currently executing function + BasicBlock *CurBB; // The currently executing BB + BasicBlock::iterator CurInst; // The next instruction to execute + std::map Values; // LLVM values used in this invocation + std::vector VarArgs; // Values passed through an ellipsis + CallSite Caller; // Holds the call that called subframes. + // NULL if main func or debugger invoked fn + AllocaHolderHandle Allocas; // Track memory allocated by alloca +}; + +// Interpreter - This class represents the entirety of the interpreter. +// +class Interpreter : public ExecutionEngine, public InstVisitor { + GenericValue ExitValue; // The return value of the called function + TargetData TD; + IntrinsicLowering *IL; + + // The runtime stack of executing code. The top of the stack is the current + // function record. + std::vector ECStack; + + // AtExitHandlers - List of functions to call when the program exits, + // registered with the atexit() library function. + std::vector AtExitHandlers; + +public: + explicit Interpreter(ModuleProvider *M); + ~Interpreter(); + + /// runAtExitHandlers - Run any functions registered by the program's calls to + /// atexit(3), which we intercept and store in AtExitHandlers. + /// + void runAtExitHandlers(); + + static void Register() { + InterpCtor = create; + } + + /// create - Create an interpreter ExecutionEngine. This can never fail. + /// + static ExecutionEngine *create(ModuleProvider *M, std::string *ErrorStr = 0, + CodeGenOpt::Level = CodeGenOpt::Default); + + /// run - Start execution with the specified function and arguments. + /// + virtual GenericValue runFunction(Function *F, + const std::vector &ArgValues); + + /// recompileAndRelinkFunction - For the interpreter, functions are always + /// up-to-date. + /// + virtual void *recompileAndRelinkFunction(Function *F) { + return getPointerToFunction(F); + } + + /// freeMachineCodeForFunction - The interpreter does not generate any code. + /// + void freeMachineCodeForFunction(Function *F) { } + + // Methods used to execute code: + // Place a call on the stack + void callFunction(Function *F, const std::vector &ArgVals); + void run(); // Execute instructions until nothing left to do + + // Opcode Implementations + void visitReturnInst(ReturnInst &I); + void visitBranchInst(BranchInst &I); + void visitSwitchInst(SwitchInst &I); + + void visitBinaryOperator(BinaryOperator &I); + void visitICmpInst(ICmpInst &I); + void visitFCmpInst(FCmpInst &I); + void visitAllocationInst(AllocationInst &I); + void visitFreeInst(FreeInst &I); + void visitLoadInst(LoadInst &I); + void visitStoreInst(StoreInst &I); + void visitGetElementPtrInst(GetElementPtrInst &I); + void visitPHINode(PHINode &PN) { assert(0 && "PHI nodes already handled!"); } + void visitTruncInst(TruncInst &I); + void visitZExtInst(ZExtInst &I); + void visitSExtInst(SExtInst &I); + void visitFPTruncInst(FPTruncInst &I); + void visitFPExtInst(FPExtInst &I); + void visitUIToFPInst(UIToFPInst &I); + void visitSIToFPInst(SIToFPInst &I); + void visitFPToUIInst(FPToUIInst &I); + void visitFPToSIInst(FPToSIInst &I); + void visitPtrToIntInst(PtrToIntInst &I); + void visitIntToPtrInst(IntToPtrInst &I); + void visitBitCastInst(BitCastInst &I); + void visitSelectInst(SelectInst &I); + + + void visitCallSite(CallSite CS); + void visitCallInst(CallInst &I) { visitCallSite (CallSite (&I)); } + void visitInvokeInst(InvokeInst &I) { visitCallSite (CallSite (&I)); } + void visitUnwindInst(UnwindInst &I); + void visitUnreachableInst(UnreachableInst &I); + + void visitShl(BinaryOperator &I); + void visitLShr(BinaryOperator &I); + void visitAShr(BinaryOperator &I); + + void visitVAArgInst(VAArgInst &I); + void visitInstruction(Instruction &I) { + cerr << I; + assert(0 && "Instruction not interpretable yet!"); + } + + GenericValue callExternalFunction(Function *F, + const std::vector &ArgVals); + void exitCalled(GenericValue GV); + + void addAtExitHandler(Function *F) { + AtExitHandlers.push_back(F); + } + + GenericValue *getFirstVarArg () { + return &(ECStack.back ().VarArgs[0]); + } + + //FIXME: private: +public: + GenericValue executeGEPOperation(Value *Ptr, gep_type_iterator I, + gep_type_iterator E, ExecutionContext &SF); + +private: // Helper functions + // SwitchToNewBasicBlock - Start execution in a new basic block and run any + // PHI nodes in the top of the block. This is used for intraprocedural + // control flow. + // + void SwitchToNewBasicBlock(BasicBlock *Dest, ExecutionContext &SF); + + void *getPointerToFunction(Function *F) { return (void*)F; } + + void initializeExecutionEngine(); + void initializeExternalFunctions(); + GenericValue getConstantExprValue(ConstantExpr *CE, ExecutionContext &SF); + GenericValue getOperandValue(Value *V, ExecutionContext &SF); + GenericValue executeTruncInst(Value *SrcVal, const Type *DstTy, + ExecutionContext &SF); + GenericValue executeSExtInst(Value *SrcVal, const Type *DstTy, + ExecutionContext &SF); + GenericValue executeZExtInst(Value *SrcVal, const Type *DstTy, + ExecutionContext &SF); + GenericValue executeFPTruncInst(Value *SrcVal, const Type *DstTy, + ExecutionContext &SF); + GenericValue executeFPExtInst(Value *SrcVal, const Type *DstTy, + ExecutionContext &SF); + GenericValue executeFPToUIInst(Value *SrcVal, const Type *DstTy, + ExecutionContext &SF); + GenericValue executeFPToSIInst(Value *SrcVal, const Type *DstTy, + ExecutionContext &SF); + GenericValue executeUIToFPInst(Value *SrcVal, const Type *DstTy, + ExecutionContext &SF); + GenericValue executeSIToFPInst(Value *SrcVal, const Type *DstTy, + ExecutionContext &SF); + GenericValue executePtrToIntInst(Value *SrcVal, const Type *DstTy, + ExecutionContext &SF); + GenericValue executeIntToPtrInst(Value *SrcVal, const Type *DstTy, + ExecutionContext &SF); + GenericValue executeBitCastInst(Value *SrcVal, const Type *DstTy, + ExecutionContext &SF); + GenericValue executeCastOperation(Instruction::CastOps opcode, Value *SrcVal, + const Type *Ty, ExecutionContext &SF); + void popStackAndReturnValueToCaller(const Type *RetTy, GenericValue Result); + +}; + +} // End llvm namespace + +#endif diff --git a/lib/ExecutionEngine/Interpreter/Makefile b/lib/ExecutionEngine/Interpreter/Makefile new file mode 100644 index 000000000000..5f937c3ad6f2 --- /dev/null +++ b/lib/ExecutionEngine/Interpreter/Makefile @@ -0,0 +1,12 @@ +##===- lib/ExecutionEngine/Interpreter/Makefile ------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../.. +LIBRARYNAME = LLVMInterpreter + +include $(LEVEL)/Makefile.common diff --git a/lib/ExecutionEngine/JIT/CMakeLists.txt b/lib/ExecutionEngine/JIT/CMakeLists.txt new file mode 100644 index 000000000000..d7980d077282 --- /dev/null +++ b/lib/ExecutionEngine/JIT/CMakeLists.txt @@ -0,0 +1,11 @@ +# TODO: Support other architectures. See Makefile. +add_definitions(-DENABLE_X86_JIT) + +add_partially_linked_object(LLVMJIT + Intercept.cpp + JIT.cpp + JITDwarfEmitter.cpp + JITEmitter.cpp + JITMemoryManager.cpp + TargetSelect.cpp + ) diff --git a/lib/ExecutionEngine/JIT/Intercept.cpp b/lib/ExecutionEngine/JIT/Intercept.cpp new file mode 100644 index 000000000000..3dcc4626a1fa --- /dev/null +++ b/lib/ExecutionEngine/JIT/Intercept.cpp @@ -0,0 +1,148 @@ +//===-- Intercept.cpp - System function interception routines -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// If a function call occurs to an external function, the JIT is designed to use +// the dynamic loader interface to find a function to call. This is useful for +// calling system calls and library functions that are not available in LLVM. +// Some system calls, however, need to be handled specially. For this reason, +// we intercept some of them here and use our own stubs to handle them. +// +//===----------------------------------------------------------------------===// + +#include "JIT.h" +#include "llvm/Support/Streams.h" +#include "llvm/System/DynamicLibrary.h" +#include "llvm/Config/config.h" +using namespace llvm; + +// AtExitHandlers - List of functions to call when the program exits, +// registered with the atexit() library function. +static std::vector AtExitHandlers; + +/// runAtExitHandlers - Run any functions registered by the program's +/// calls to atexit(3), which we intercept and store in +/// AtExitHandlers. +/// +static void runAtExitHandlers() { + while (!AtExitHandlers.empty()) { + void (*Fn)() = AtExitHandlers.back(); + AtExitHandlers.pop_back(); + Fn(); + } +} + +//===----------------------------------------------------------------------===// +// Function stubs that are invoked instead of certain library calls +//===----------------------------------------------------------------------===// + +// Force the following functions to be linked in to anything that uses the +// JIT. This is a hack designed to work around the all-too-clever Glibc +// strategy of making these functions work differently when inlined vs. when +// not inlined, and hiding their real definitions in a separate archive file +// that the dynamic linker can't see. For more info, search for +// 'libc_nonshared.a' on Google, or read http://llvm.org/PR274. +#if defined(__linux__) +#if defined(HAVE_SYS_STAT_H) +#include +#endif +#include +/* stat functions are redirecting to __xstat with a version number. On x86-64 + * linking with libc_nonshared.a and -Wl,--export-dynamic doesn't make 'stat' + * available as an exported symbol, so we have to add it explicitly. + */ +class StatSymbols { +public: + StatSymbols() { + sys::DynamicLibrary::AddSymbol("stat", (void*)(intptr_t)stat); + sys::DynamicLibrary::AddSymbol("fstat", (void*)(intptr_t)fstat); + sys::DynamicLibrary::AddSymbol("lstat", (void*)(intptr_t)lstat); + sys::DynamicLibrary::AddSymbol("stat64", (void*)(intptr_t)stat64); + sys::DynamicLibrary::AddSymbol("\x1stat64", (void*)(intptr_t)stat64); + sys::DynamicLibrary::AddSymbol("\x1open64", (void*)(intptr_t)open64); + sys::DynamicLibrary::AddSymbol("\x1lseek64", (void*)(intptr_t)lseek64); + sys::DynamicLibrary::AddSymbol("fstat64", (void*)(intptr_t)fstat64); + sys::DynamicLibrary::AddSymbol("lstat64", (void*)(intptr_t)lstat64); + sys::DynamicLibrary::AddSymbol("atexit", (void*)(intptr_t)atexit); + sys::DynamicLibrary::AddSymbol("mknod", (void*)(intptr_t)mknod); + } +}; +static StatSymbols initStatSymbols; +#endif // __linux__ + +// jit_exit - Used to intercept the "exit" library call. +static void jit_exit(int Status) { + runAtExitHandlers(); // Run atexit handlers... + exit(Status); +} + +// jit_atexit - Used to intercept the "atexit" library call. +static int jit_atexit(void (*Fn)(void)) { + AtExitHandlers.push_back(Fn); // Take note of atexit handler... + return 0; // Always successful +} + +//===----------------------------------------------------------------------===// +// +/// getPointerToNamedFunction - This method returns the address of the specified +/// function by using the dynamic loader interface. As such it is only useful +/// for resolving library symbols, not code generated symbols. +/// +void *JIT::getPointerToNamedFunction(const std::string &Name, + bool AbortOnFailure) { + if (!isSymbolSearchingDisabled()) { + // Check to see if this is one of the functions we want to intercept. Note, + // we cast to intptr_t here to silence a -pedantic warning that complains + // about casting a function pointer to a normal pointer. + if (Name == "exit") return (void*)(intptr_t)&jit_exit; + if (Name == "atexit") return (void*)(intptr_t)&jit_atexit; + + const char *NameStr = Name.c_str(); + // If this is an asm specifier, skip the sentinal. + if (NameStr[0] == 1) ++NameStr; + + // If it's an external function, look it up in the process image... + void *Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr); + if (Ptr) return Ptr; + + // If it wasn't found and if it starts with an underscore ('_') character, + // and has an asm specifier, try again without the underscore. + if (Name[0] == 1 && NameStr[0] == '_') { + Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr+1); + if (Ptr) return Ptr; + } + + // Darwin/PPC adds $LDBLStub suffixes to various symbols like printf. These + // are references to hidden visibility symbols that dlsym cannot resolve. + // If we have one of these, strip off $LDBLStub and try again. +#if defined(__APPLE__) && defined(__ppc__) + if (Name.size() > 9 && Name[Name.size()-9] == '$' && + memcmp(&Name[Name.size()-8], "LDBLStub", 8) == 0) { + // First try turning $LDBLStub into $LDBL128. If that fails, strip it off. + // This mirrors logic in libSystemStubs.a. + std::string Prefix = std::string(Name.begin(), Name.end()-9); + if (void *Ptr = getPointerToNamedFunction(Prefix+"$LDBL128", false)) + return Ptr; + if (void *Ptr = getPointerToNamedFunction(Prefix, false)) + return Ptr; + } +#endif + } + + /// If a LazyFunctionCreator is installed, use it to get/create the function. + if (LazyFunctionCreator) + if (void *RP = LazyFunctionCreator(Name)) + return RP; + + if (AbortOnFailure) { + cerr << "ERROR: Program used external function '" << Name + << "' which could not be resolved!\n"; + abort(); + } + return 0; +} diff --git a/lib/ExecutionEngine/JIT/JIT.cpp b/lib/ExecutionEngine/JIT/JIT.cpp new file mode 100644 index 000000000000..f8ae8844616d --- /dev/null +++ b/lib/ExecutionEngine/JIT/JIT.cpp @@ -0,0 +1,708 @@ +//===-- JIT.cpp - LLVM Just in Time Compiler ------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This tool implements a just-in-time compiler for LLVM, allowing direct +// execution of LLVM bitcode in an efficient manner. +// +//===----------------------------------------------------------------------===// + +#include "JIT.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/GlobalVariable.h" +#include "llvm/Instructions.h" +#include "llvm/ModuleProvider.h" +#include "llvm/CodeGen/JITCodeEmitter.h" +#include "llvm/ExecutionEngine/GenericValue.h" +#include "llvm/CodeGen/MachineCodeInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetJITInfo.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/MutexGuard.h" +#include "llvm/System/DynamicLibrary.h" +#include "llvm/Config/config.h" + +using namespace llvm; + +#ifdef __APPLE__ +// Apple gcc defaults to -fuse-cxa-atexit (i.e. calls __cxa_atexit instead +// of atexit). It passes the address of linker generated symbol __dso_handle +// to the function. +// This configuration change happened at version 5330. +# include +# if defined(MAC_OS_X_VERSION_10_4) && \ + ((MAC_OS_X_VERSION_MIN_REQUIRED > MAC_OS_X_VERSION_10_4) || \ + (MAC_OS_X_VERSION_MIN_REQUIRED == MAC_OS_X_VERSION_10_4 && \ + __APPLE_CC__ >= 5330)) +# ifndef HAVE___DSO_HANDLE +# define HAVE___DSO_HANDLE 1 +# endif +# endif +#endif + +#if HAVE___DSO_HANDLE +extern void *__dso_handle __attribute__ ((__visibility__ ("hidden"))); +#endif + +namespace { + +static struct RegisterJIT { + RegisterJIT() { JIT::Register(); } +} JITRegistrator; + +} + +namespace llvm { + void LinkInJIT() { + } +} + + +#if defined(__GNUC__) && !defined(__ARM__EABI__) + +// libgcc defines the __register_frame function to dynamically register new +// dwarf frames for exception handling. This functionality is not portable +// across compilers and is only provided by GCC. We use the __register_frame +// function here so that code generated by the JIT cooperates with the unwinding +// runtime of libgcc. When JITting with exception handling enable, LLVM +// generates dwarf frames and registers it to libgcc with __register_frame. +// +// The __register_frame function works with Linux. +// +// Unfortunately, this functionality seems to be in libgcc after the unwinding +// library of libgcc for darwin was written. The code for darwin overwrites the +// value updated by __register_frame with a value fetched with "keymgr". +// "keymgr" is an obsolete functionality, which should be rewritten some day. +// In the meantime, since "keymgr" is on all libgccs shipped with apple-gcc, we +// need a workaround in LLVM which uses the "keymgr" to dynamically modify the +// values of an opaque key, used by libgcc to find dwarf tables. + +extern "C" void __register_frame(void*); + +#if defined(__APPLE__) && MAC_OS_X_VERSION_MAX_ALLOWED <= 1050 +# define USE_KEYMGR 1 +#else +# define USE_KEYMGR 0 +#endif + +#if USE_KEYMGR + +namespace { + +// LibgccObject - This is the structure defined in libgcc. There is no #include +// provided for this structure, so we also define it here. libgcc calls it +// "struct object". The structure is undocumented in libgcc. +struct LibgccObject { + void *unused1; + void *unused2; + void *unused3; + + /// frame - Pointer to the exception table. + void *frame; + + /// encoding - The encoding of the object? + union { + struct { + unsigned long sorted : 1; + unsigned long from_array : 1; + unsigned long mixed_encoding : 1; + unsigned long encoding : 8; + unsigned long count : 21; + } b; + size_t i; + } encoding; + + /// fde_end - libgcc defines this field only if some macro is defined. We + /// include this field even if it may not there, to make libgcc happy. + char *fde_end; + + /// next - At least we know it's a chained list! + struct LibgccObject *next; +}; + +// "kemgr" stuff. Apparently, all frame tables are stored there. +extern "C" void _keymgr_set_and_unlock_processwide_ptr(int, void *); +extern "C" void *_keymgr_get_and_lock_processwide_ptr(int); +#define KEYMGR_GCC3_DW2_OBJ_LIST 302 /* Dwarf2 object list */ + +/// LibgccObjectInfo - libgcc defines this struct as km_object_info. It +/// probably contains all dwarf tables that are loaded. +struct LibgccObjectInfo { + + /// seenObjects - LibgccObjects already parsed by the unwinding runtime. + /// + struct LibgccObject* seenObjects; + + /// unseenObjects - LibgccObjects not parsed yet by the unwinding runtime. + /// + struct LibgccObject* unseenObjects; + + unsigned unused[2]; +}; + +/// darwin_register_frame - Since __register_frame does not work with darwin's +/// libgcc,we provide our own function, which "tricks" libgcc by modifying the +/// "Dwarf2 object list" key. +void DarwinRegisterFrame(void* FrameBegin) { + // Get the key. + LibgccObjectInfo* LOI = (struct LibgccObjectInfo*) + _keymgr_get_and_lock_processwide_ptr(KEYMGR_GCC3_DW2_OBJ_LIST); + assert(LOI && "This should be preallocated by the runtime"); + + // Allocate a new LibgccObject to represent this frame. Deallocation of this + // object may be impossible: since darwin code in libgcc was written after + // the ability to dynamically register frames, things may crash if we + // deallocate it. + struct LibgccObject* ob = (struct LibgccObject*) + malloc(sizeof(struct LibgccObject)); + + // Do like libgcc for the values of the field. + ob->unused1 = (void *)-1; + ob->unused2 = 0; + ob->unused3 = 0; + ob->frame = FrameBegin; + ob->encoding.i = 0; + ob->encoding.b.encoding = llvm::dwarf::DW_EH_PE_omit; + + // Put the info on both places, as libgcc uses the first or the the second + // field. Note that we rely on having two pointers here. If fde_end was a + // char, things would get complicated. + ob->fde_end = (char*)LOI->unseenObjects; + ob->next = LOI->unseenObjects; + + // Update the key's unseenObjects list. + LOI->unseenObjects = ob; + + // Finally update the "key". Apparently, libgcc requires it. + _keymgr_set_and_unlock_processwide_ptr(KEYMGR_GCC3_DW2_OBJ_LIST, + LOI); + +} + +} +#endif // __APPLE__ +#endif // __GNUC__ + +/// createJIT - This is the factory method for creating a JIT for the current +/// machine, it does not fall back to the interpreter. This takes ownership +/// of the module provider. +ExecutionEngine *ExecutionEngine::createJIT(ModuleProvider *MP, + std::string *ErrorStr, + JITMemoryManager *JMM, + CodeGenOpt::Level OptLevel) { + ExecutionEngine *EE = JIT::createJIT(MP, ErrorStr, JMM, OptLevel); + if (!EE) return 0; + + // Make sure we can resolve symbols in the program as well. The zero arg + // to the function tells DynamicLibrary to load the program, not a library. + sys::DynamicLibrary::LoadLibraryPermanently(0, ErrorStr); + return EE; +} + +JIT::JIT(ModuleProvider *MP, TargetMachine &tm, TargetJITInfo &tji, + JITMemoryManager *JMM, CodeGenOpt::Level OptLevel) + : ExecutionEngine(MP), TM(tm), TJI(tji) { + setTargetData(TM.getTargetData()); + + jitstate = new JITState(MP); + + // Initialize JCE + JCE = createEmitter(*this, JMM); + + // Add target data + MutexGuard locked(lock); + FunctionPassManager &PM = jitstate->getPM(locked); + PM.add(new TargetData(*TM.getTargetData())); + + // Turn the machine code intermediate representation into bytes in memory that + // may be executed. + if (TM.addPassesToEmitMachineCode(PM, *JCE, OptLevel)) { + cerr << "Target does not support machine code emission!\n"; + abort(); + } + + // Register routine for informing unwinding runtime about new EH frames +#if defined(__GNUC__) && !defined(__ARM_EABI__) +#if USE_KEYMGR + struct LibgccObjectInfo* LOI = (struct LibgccObjectInfo*) + _keymgr_get_and_lock_processwide_ptr(KEYMGR_GCC3_DW2_OBJ_LIST); + + // The key is created on demand, and libgcc creates it the first time an + // exception occurs. Since we need the key to register frames, we create + // it now. + if (!LOI) + LOI = (LibgccObjectInfo*)calloc(sizeof(struct LibgccObjectInfo), 1); + _keymgr_set_and_unlock_processwide_ptr(KEYMGR_GCC3_DW2_OBJ_LIST, LOI); + InstallExceptionTableRegister(DarwinRegisterFrame); +#else + InstallExceptionTableRegister(__register_frame); +#endif // __APPLE__ +#endif // __GNUC__ + + // Initialize passes. + PM.doInitialization(); +} + +JIT::~JIT() { + delete jitstate; + delete JCE; + delete &TM; +} + +/// addModuleProvider - Add a new ModuleProvider to the JIT. If we previously +/// removed the last ModuleProvider, we need re-initialize jitstate with a valid +/// ModuleProvider. +void JIT::addModuleProvider(ModuleProvider *MP) { + MutexGuard locked(lock); + + if (Modules.empty()) { + assert(!jitstate && "jitstate should be NULL if Modules vector is empty!"); + + jitstate = new JITState(MP); + + FunctionPassManager &PM = jitstate->getPM(locked); + PM.add(new TargetData(*TM.getTargetData())); + + // Turn the machine code intermediate representation into bytes in memory + // that may be executed. + if (TM.addPassesToEmitMachineCode(PM, *JCE, CodeGenOpt::Default)) { + cerr << "Target does not support machine code emission!\n"; + abort(); + } + + // Initialize passes. + PM.doInitialization(); + } + + ExecutionEngine::addModuleProvider(MP); +} + +/// removeModuleProvider - If we are removing the last ModuleProvider, +/// invalidate the jitstate since the PassManager it contains references a +/// released ModuleProvider. +Module *JIT::removeModuleProvider(ModuleProvider *MP, std::string *E) { + Module *result = ExecutionEngine::removeModuleProvider(MP, E); + + MutexGuard locked(lock); + + if (jitstate->getMP() == MP) { + delete jitstate; + jitstate = 0; + } + + if (!jitstate && !Modules.empty()) { + jitstate = new JITState(Modules[0]); + + FunctionPassManager &PM = jitstate->getPM(locked); + PM.add(new TargetData(*TM.getTargetData())); + + // Turn the machine code intermediate representation into bytes in memory + // that may be executed. + if (TM.addPassesToEmitMachineCode(PM, *JCE, CodeGenOpt::Default)) { + cerr << "Target does not support machine code emission!\n"; + abort(); + } + + // Initialize passes. + PM.doInitialization(); + } + return result; +} + +/// deleteModuleProvider - Remove a ModuleProvider from the list of modules, +/// and deletes the ModuleProvider and owned Module. Avoids materializing +/// the underlying module. +void JIT::deleteModuleProvider(ModuleProvider *MP, std::string *E) { + ExecutionEngine::deleteModuleProvider(MP, E); + + MutexGuard locked(lock); + + if (jitstate->getMP() == MP) { + delete jitstate; + jitstate = 0; + } + + if (!jitstate && !Modules.empty()) { + jitstate = new JITState(Modules[0]); + + FunctionPassManager &PM = jitstate->getPM(locked); + PM.add(new TargetData(*TM.getTargetData())); + + // Turn the machine code intermediate representation into bytes in memory + // that may be executed. + if (TM.addPassesToEmitMachineCode(PM, *JCE, CodeGenOpt::Default)) { + cerr << "Target does not support machine code emission!\n"; + abort(); + } + + // Initialize passes. + PM.doInitialization(); + } +} + +/// run - Start execution with the specified function and arguments. +/// +GenericValue JIT::runFunction(Function *F, + const std::vector &ArgValues) { + assert(F && "Function *F was null at entry to run()"); + + void *FPtr = getPointerToFunction(F); + assert(FPtr && "Pointer to fn's code was null after getPointerToFunction"); + const FunctionType *FTy = F->getFunctionType(); + const Type *RetTy = FTy->getReturnType(); + + assert((FTy->getNumParams() == ArgValues.size() || + (FTy->isVarArg() && FTy->getNumParams() <= ArgValues.size())) && + "Wrong number of arguments passed into function!"); + assert(FTy->getNumParams() == ArgValues.size() && + "This doesn't support passing arguments through varargs (yet)!"); + + // Handle some common cases first. These cases correspond to common `main' + // prototypes. + if (RetTy == Type::Int32Ty || RetTy == Type::VoidTy) { + switch (ArgValues.size()) { + case 3: + if (FTy->getParamType(0) == Type::Int32Ty && + isa(FTy->getParamType(1)) && + isa(FTy->getParamType(2))) { + int (*PF)(int, char **, const char **) = + (int(*)(int, char **, const char **))(intptr_t)FPtr; + + // Call the function. + GenericValue rv; + rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue(), + (char **)GVTOP(ArgValues[1]), + (const char **)GVTOP(ArgValues[2]))); + return rv; + } + break; + case 2: + if (FTy->getParamType(0) == Type::Int32Ty && + isa(FTy->getParamType(1))) { + int (*PF)(int, char **) = (int(*)(int, char **))(intptr_t)FPtr; + + // Call the function. + GenericValue rv; + rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue(), + (char **)GVTOP(ArgValues[1]))); + return rv; + } + break; + case 1: + if (FTy->getNumParams() == 1 && + FTy->getParamType(0) == Type::Int32Ty) { + GenericValue rv; + int (*PF)(int) = (int(*)(int))(intptr_t)FPtr; + rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue())); + return rv; + } + break; + } + } + + // Handle cases where no arguments are passed first. + if (ArgValues.empty()) { + GenericValue rv; + switch (RetTy->getTypeID()) { + default: assert(0 && "Unknown return type for function call!"); + case Type::IntegerTyID: { + unsigned BitWidth = cast(RetTy)->getBitWidth(); + if (BitWidth == 1) + rv.IntVal = APInt(BitWidth, ((bool(*)())(intptr_t)FPtr)()); + else if (BitWidth <= 8) + rv.IntVal = APInt(BitWidth, ((char(*)())(intptr_t)FPtr)()); + else if (BitWidth <= 16) + rv.IntVal = APInt(BitWidth, ((short(*)())(intptr_t)FPtr)()); + else if (BitWidth <= 32) + rv.IntVal = APInt(BitWidth, ((int(*)())(intptr_t)FPtr)()); + else if (BitWidth <= 64) + rv.IntVal = APInt(BitWidth, ((int64_t(*)())(intptr_t)FPtr)()); + else + assert(0 && "Integer types > 64 bits not supported"); + return rv; + } + case Type::VoidTyID: + rv.IntVal = APInt(32, ((int(*)())(intptr_t)FPtr)()); + return rv; + case Type::FloatTyID: + rv.FloatVal = ((float(*)())(intptr_t)FPtr)(); + return rv; + case Type::DoubleTyID: + rv.DoubleVal = ((double(*)())(intptr_t)FPtr)(); + return rv; + case Type::X86_FP80TyID: + case Type::FP128TyID: + case Type::PPC_FP128TyID: + assert(0 && "long double not supported yet"); + return rv; + case Type::PointerTyID: + return PTOGV(((void*(*)())(intptr_t)FPtr)()); + } + } + + // Okay, this is not one of our quick and easy cases. Because we don't have a + // full FFI, we have to codegen a nullary stub function that just calls the + // function we are interested in, passing in constants for all of the + // arguments. Make this function and return. + + // First, create the function. + FunctionType *STy=FunctionType::get(RetTy, std::vector(), false); + Function *Stub = Function::Create(STy, Function::InternalLinkage, "", + F->getParent()); + + // Insert a basic block. + BasicBlock *StubBB = BasicBlock::Create("", Stub); + + // Convert all of the GenericValue arguments over to constants. Note that we + // currently don't support varargs. + SmallVector Args; + for (unsigned i = 0, e = ArgValues.size(); i != e; ++i) { + Constant *C = 0; + const Type *ArgTy = FTy->getParamType(i); + const GenericValue &AV = ArgValues[i]; + switch (ArgTy->getTypeID()) { + default: assert(0 && "Unknown argument type for function call!"); + case Type::IntegerTyID: + C = ConstantInt::get(AV.IntVal); + break; + case Type::FloatTyID: + C = ConstantFP::get(APFloat(AV.FloatVal)); + break; + case Type::DoubleTyID: + C = ConstantFP::get(APFloat(AV.DoubleVal)); + break; + case Type::PPC_FP128TyID: + case Type::X86_FP80TyID: + case Type::FP128TyID: + C = ConstantFP::get(APFloat(AV.IntVal)); + break; + case Type::PointerTyID: + void *ArgPtr = GVTOP(AV); + if (sizeof(void*) == 4) + C = ConstantInt::get(Type::Int32Ty, (int)(intptr_t)ArgPtr); + else + C = ConstantInt::get(Type::Int64Ty, (intptr_t)ArgPtr); + C = ConstantExpr::getIntToPtr(C, ArgTy); // Cast the integer to pointer + break; + } + Args.push_back(C); + } + + CallInst *TheCall = CallInst::Create(F, Args.begin(), Args.end(), + "", StubBB); + TheCall->setCallingConv(F->getCallingConv()); + TheCall->setTailCall(); + if (TheCall->getType() != Type::VoidTy) + ReturnInst::Create(TheCall, StubBB); // Return result of the call. + else + ReturnInst::Create(StubBB); // Just return void. + + // Finally, return the value returned by our nullary stub function. + return runFunction(Stub, std::vector()); +} + +/// runJITOnFunction - Run the FunctionPassManager full of +/// just-in-time compilation passes on F, hopefully filling in +/// GlobalAddress[F] with the address of F's machine code. +/// +void JIT::runJITOnFunction(Function *F, MachineCodeInfo *MCI) { + MutexGuard locked(lock); + + registerMachineCodeInfo(MCI); + + runJITOnFunctionUnlocked(F, locked); + + registerMachineCodeInfo(0); +} + +void JIT::runJITOnFunctionUnlocked(Function *F, const MutexGuard &locked) { + static bool isAlreadyCodeGenerating = false; + assert(!isAlreadyCodeGenerating && "Error: Recursive compilation detected!"); + + // JIT the function + isAlreadyCodeGenerating = true; + jitstate->getPM(locked).run(*F); + isAlreadyCodeGenerating = false; + + // If the function referred to another function that had not yet been + // read from bitcode, but we are jitting non-lazily, emit it now. + while (!jitstate->getPendingFunctions(locked).empty()) { + Function *PF = jitstate->getPendingFunctions(locked).back(); + jitstate->getPendingFunctions(locked).pop_back(); + + // JIT the function + isAlreadyCodeGenerating = true; + jitstate->getPM(locked).run(*PF); + isAlreadyCodeGenerating = false; + + // Now that the function has been jitted, ask the JITEmitter to rewrite + // the stub with real address of the function. + updateFunctionStub(PF); + } + + // If the JIT is configured to emit info so that dlsym can be used to + // rewrite stubs to external globals, do so now. + if (areDlsymStubsEnabled() && isLazyCompilationDisabled()) + updateDlsymStubTable(); +} + +/// getPointerToFunction - This method is used to get the address of the +/// specified function, compiling it if neccesary. +/// +void *JIT::getPointerToFunction(Function *F) { + + if (void *Addr = getPointerToGlobalIfAvailable(F)) + return Addr; // Check if function already code gen'd + + MutexGuard locked(lock); + + // Make sure we read in the function if it exists in this Module. + if (F->hasNotBeenReadFromBitcode()) { + // Determine the module provider this function is provided by. + Module *M = F->getParent(); + ModuleProvider *MP = 0; + for (unsigned i = 0, e = Modules.size(); i != e; ++i) { + if (Modules[i]->getModule() == M) { + MP = Modules[i]; + break; + } + } + assert(MP && "Function isn't in a module we know about!"); + + std::string ErrorMsg; + if (MP->materializeFunction(F, &ErrorMsg)) { + cerr << "Error reading function '" << F->getName() + << "' from bitcode file: " << ErrorMsg << "\n"; + abort(); + } + + // Now retry to get the address. + if (void *Addr = getPointerToGlobalIfAvailable(F)) + return Addr; + } + + if (F->isDeclaration()) { + bool AbortOnFailure = + !areDlsymStubsEnabled() && !F->hasExternalWeakLinkage(); + void *Addr = getPointerToNamedFunction(F->getName(), AbortOnFailure); + addGlobalMapping(F, Addr); + return Addr; + } + + runJITOnFunctionUnlocked(F, locked); + + void *Addr = getPointerToGlobalIfAvailable(F); + assert(Addr && "Code generation didn't add function to GlobalAddress table!"); + return Addr; +} + +/// getOrEmitGlobalVariable - Return the address of the specified global +/// variable, possibly emitting it to memory if needed. This is used by the +/// Emitter. +void *JIT::getOrEmitGlobalVariable(const GlobalVariable *GV) { + MutexGuard locked(lock); + + void *Ptr = getPointerToGlobalIfAvailable(GV); + if (Ptr) return Ptr; + + // If the global is external, just remember the address. + if (GV->isDeclaration()) { +#if HAVE___DSO_HANDLE + if (GV->getName() == "__dso_handle") + return (void*)&__dso_handle; +#endif + Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(GV->getName().c_str()); + if (Ptr == 0 && !areDlsymStubsEnabled()) { + cerr << "Could not resolve external global address: " + << GV->getName() << "\n"; + abort(); + } + addGlobalMapping(GV, Ptr); + } else { + // GlobalVariable's which are not "constant" will cause trouble in a server + // situation. It's returned in the same block of memory as code which may + // not be writable. + if (isGVCompilationDisabled() && !GV->isConstant()) { + cerr << "Compilation of non-internal GlobalValue is disabled!\n"; + abort(); + } + // If the global hasn't been emitted to memory yet, allocate space and + // emit it into memory. It goes in the same array as the generated + // code, jump tables, etc. + const Type *GlobalType = GV->getType()->getElementType(); + size_t S = getTargetData()->getTypeAllocSize(GlobalType); + size_t A = getTargetData()->getPreferredAlignment(GV); + if (GV->isThreadLocal()) { + MutexGuard locked(lock); + Ptr = TJI.allocateThreadLocalMemory(S); + } else if (TJI.allocateSeparateGVMemory()) { + if (A <= 8) { + Ptr = malloc(S); + } else { + // Allocate S+A bytes of memory, then use an aligned pointer within that + // space. + Ptr = malloc(S+A); + unsigned MisAligned = ((intptr_t)Ptr & (A-1)); + Ptr = (char*)Ptr + (MisAligned ? (A-MisAligned) : 0); + } + } else { + Ptr = JCE->allocateSpace(S, A); + } + addGlobalMapping(GV, Ptr); + EmitGlobalVariable(GV); + } + return Ptr; +} + +/// recompileAndRelinkFunction - This method is used to force a function +/// which has already been compiled, to be compiled again, possibly +/// after it has been modified. Then the entry to the old copy is overwritten +/// with a branch to the new copy. If there was no old copy, this acts +/// just like JIT::getPointerToFunction(). +/// +void *JIT::recompileAndRelinkFunction(Function *F) { + void *OldAddr = getPointerToGlobalIfAvailable(F); + + // If it's not already compiled there is no reason to patch it up. + if (OldAddr == 0) { return getPointerToFunction(F); } + + // Delete the old function mapping. + addGlobalMapping(F, 0); + + // Recodegen the function + runJITOnFunction(F); + + // Update state, forward the old function to the new function. + void *Addr = getPointerToGlobalIfAvailable(F); + assert(Addr && "Code generation didn't add function to GlobalAddress table!"); + TJI.replaceMachineCodeForFunction(OldAddr, Addr); + return Addr; +} + +/// getMemoryForGV - This method abstracts memory allocation of global +/// variable so that the JIT can allocate thread local variables depending +/// on the target. +/// +char* JIT::getMemoryForGV(const GlobalVariable* GV) { + const Type *ElTy = GV->getType()->getElementType(); + size_t GVSize = (size_t)getTargetData()->getTypeAllocSize(ElTy); + if (GV->isThreadLocal()) { + MutexGuard locked(lock); + return TJI.allocateThreadLocalMemory(GVSize); + } else { + return new char[GVSize]; + } +} + +void JIT::addPendingFunction(Function *F) { + MutexGuard locked(lock); + jitstate->getPendingFunctions(locked).push_back(F); +} diff --git a/lib/ExecutionEngine/JIT/JIT.h b/lib/ExecutionEngine/JIT/JIT.h new file mode 100644 index 000000000000..3ccb2dd8126b --- /dev/null +++ b/lib/ExecutionEngine/JIT/JIT.h @@ -0,0 +1,176 @@ +//===-- JIT.h - Class definition for the JIT --------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the top-level JIT data structure. +// +//===----------------------------------------------------------------------===// + +#ifndef JIT_H +#define JIT_H + +#include "llvm/ExecutionEngine/ExecutionEngine.h" +#include "llvm/PassManager.h" + +namespace llvm { + +class Function; +class TargetMachine; +class TargetJITInfo; +class MachineCodeEmitter; +class MachineCodeInfo; + +class JITState { +private: + FunctionPassManager PM; // Passes to compile a function + ModuleProvider *MP; // ModuleProvider used to create the PM + + /// PendingFunctions - Functions which have not been code generated yet, but + /// were called from a function being code generated. + std::vector PendingFunctions; + +public: + explicit JITState(ModuleProvider *MP) : PM(MP), MP(MP) {} + + FunctionPassManager &getPM(const MutexGuard &L) { + return PM; + } + + ModuleProvider *getMP() const { return MP; } + std::vector &getPendingFunctions(const MutexGuard &L) { + return PendingFunctions; + } +}; + + +class JIT : public ExecutionEngine { + TargetMachine &TM; // The current target we are compiling to + TargetJITInfo &TJI; // The JITInfo for the target we are compiling to + JITCodeEmitter *JCE; // JCE object + + JITState *jitstate; + + JIT(ModuleProvider *MP, TargetMachine &tm, TargetJITInfo &tji, + JITMemoryManager *JMM, CodeGenOpt::Level OptLevel); +public: + ~JIT(); + + static void Register() { + JITCtor = create; + } + + /// getJITInfo - Return the target JIT information structure. + /// + TargetJITInfo &getJITInfo() const { return TJI; } + + /// create - Create an return a new JIT compiler if there is one available + /// for the current target. Otherwise, return null. + /// + static ExecutionEngine *create(ModuleProvider *MP, std::string *Err, + CodeGenOpt::Level OptLevel = + CodeGenOpt::Default) { + return createJIT(MP, Err, 0, OptLevel); + } + + virtual void addModuleProvider(ModuleProvider *MP); + + /// removeModuleProvider - Remove a ModuleProvider from the list of modules. + /// Relases the Module from the ModuleProvider, materializing it in the + /// process, and returns the materialized Module. + virtual Module *removeModuleProvider(ModuleProvider *MP, + std::string *ErrInfo = 0); + + /// deleteModuleProvider - Remove a ModuleProvider from the list of modules, + /// and deletes the ModuleProvider and owned Module. Avoids materializing + /// the underlying module. + virtual void deleteModuleProvider(ModuleProvider *P,std::string *ErrInfo = 0); + + /// runFunction - Start execution with the specified function and arguments. + /// + virtual GenericValue runFunction(Function *F, + const std::vector &ArgValues); + + /// getPointerToNamedFunction - This method returns the address of the + /// specified function by using the dlsym function call. As such it is only + /// useful for resolving library symbols, not code generated symbols. + /// + /// If AbortOnFailure is false and no function with the given name is + /// found, this function silently returns a null pointer. Otherwise, + /// it prints a message to stderr and aborts. + /// + void *getPointerToNamedFunction(const std::string &Name, + bool AbortOnFailure = true); + + // CompilationCallback - Invoked the first time that a call site is found, + // which causes lazy compilation of the target function. + // + static void CompilationCallback(); + + /// getPointerToFunction - This returns the address of the specified function, + /// compiling it if necessary. + /// + void *getPointerToFunction(Function *F); + + /// getOrEmitGlobalVariable - Return the address of the specified global + /// variable, possibly emitting it to memory if needed. This is used by the + /// Emitter. + void *getOrEmitGlobalVariable(const GlobalVariable *GV); + + /// getPointerToFunctionOrStub - If the specified function has been + /// code-gen'd, return a pointer to the function. If not, compile it, or use + /// a stub to implement lazy compilation if available. + /// + void *getPointerToFunctionOrStub(Function *F); + + /// recompileAndRelinkFunction - This method is used to force a function + /// which has already been compiled, to be compiled again, possibly + /// after it has been modified. Then the entry to the old copy is overwritten + /// with a branch to the new copy. If there was no old copy, this acts + /// just like JIT::getPointerToFunction(). + /// + void *recompileAndRelinkFunction(Function *F); + + /// freeMachineCodeForFunction - deallocate memory used to code-generate this + /// Function. + /// + void freeMachineCodeForFunction(Function *F); + + /// addPendingFunction - while jitting non-lazily, a called but non-codegen'd + /// function was encountered. Add it to a pending list to be processed after + /// the current function. + /// + void addPendingFunction(Function *F); + + /// getCodeEmitter - Return the code emitter this JIT is emitting into. + JITCodeEmitter *getCodeEmitter() const { return JCE; } + + static ExecutionEngine *createJIT(ModuleProvider *MP, std::string *Err, + JITMemoryManager *JMM, + CodeGenOpt::Level OptLevel); + + + // Run the JIT on F and return information about the generated code + void runJITOnFunction(Function *F, MachineCodeInfo *MCI = 0); + +private: + static JITCodeEmitter *createEmitter(JIT &J, JITMemoryManager *JMM); + void registerMachineCodeInfo(MachineCodeInfo *MCI); + void runJITOnFunctionUnlocked(Function *F, const MutexGuard &locked); + void updateFunctionStub(Function *F); + void updateDlsymStubTable(); + +protected: + + /// getMemoryforGV - Allocate memory for a global variable. + virtual char* getMemoryForGV(const GlobalVariable* GV); + +}; + +} // End llvm namespace + +#endif diff --git a/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp b/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp new file mode 100644 index 000000000000..e101ef371ed0 --- /dev/null +++ b/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp @@ -0,0 +1,1056 @@ +//===----- JITDwarfEmitter.cpp - Write dwarf tables into memory -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines a JITDwarfEmitter object that is used by the JIT to +// write dwarf tables to memory. +// +//===----------------------------------------------------------------------===// + +#include "JIT.h" +#include "JITDwarfEmitter.h" +#include "llvm/Function.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/CodeGen/JITCodeEmitter.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineLocation.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/ExecutionEngine/JITMemoryManager.h" +#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" + +using namespace llvm; + +JITDwarfEmitter::JITDwarfEmitter(JIT& theJit) : Jit(theJit) {} + + +unsigned char* JITDwarfEmitter::EmitDwarfTable(MachineFunction& F, + JITCodeEmitter& jce, + unsigned char* StartFunction, + unsigned char* EndFunction) { + const TargetMachine& TM = F.getTarget(); + TD = TM.getTargetData(); + needsIndirectEncoding = TM.getTargetAsmInfo()->getNeedsIndirectEncoding(); + stackGrowthDirection = TM.getFrameInfo()->getStackGrowthDirection(); + RI = TM.getRegisterInfo(); + JCE = &jce; + + unsigned char* ExceptionTable = EmitExceptionTable(&F, StartFunction, + EndFunction); + + unsigned char* Result = 0; + unsigned char* EHFramePtr = 0; + + const std::vector Personalities = MMI->getPersonalities(); + EHFramePtr = EmitCommonEHFrame(Personalities[MMI->getPersonalityIndex()]); + + Result = EmitEHFrame(Personalities[MMI->getPersonalityIndex()], EHFramePtr, + StartFunction, EndFunction, ExceptionTable); + + return Result; +} + + +void +JITDwarfEmitter::EmitFrameMoves(intptr_t BaseLabelPtr, + const std::vector &Moves) const { + unsigned PointerSize = TD->getPointerSize(); + int stackGrowth = stackGrowthDirection == TargetFrameInfo::StackGrowsUp ? + PointerSize : -PointerSize; + bool IsLocal = false; + unsigned BaseLabelID = 0; + + for (unsigned i = 0, N = Moves.size(); i < N; ++i) { + const MachineMove &Move = Moves[i]; + unsigned LabelID = Move.getLabelID(); + + if (LabelID) { + LabelID = MMI->MappedLabel(LabelID); + + // Throw out move if the label is invalid. + if (!LabelID) continue; + } + + intptr_t LabelPtr = 0; + if (LabelID) LabelPtr = JCE->getLabelAddress(LabelID); + + const MachineLocation &Dst = Move.getDestination(); + const MachineLocation &Src = Move.getSource(); + + // Advance row if new location. + if (BaseLabelPtr && LabelID && (BaseLabelID != LabelID || !IsLocal)) { + JCE->emitByte(dwarf::DW_CFA_advance_loc4); + JCE->emitInt32(LabelPtr - BaseLabelPtr); + + BaseLabelID = LabelID; + BaseLabelPtr = LabelPtr; + IsLocal = true; + } + + // If advancing cfa. + if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) { + if (!Src.isReg()) { + if (Src.getReg() == MachineLocation::VirtualFP) { + JCE->emitByte(dwarf::DW_CFA_def_cfa_offset); + } else { + JCE->emitByte(dwarf::DW_CFA_def_cfa); + JCE->emitULEB128Bytes(RI->getDwarfRegNum(Src.getReg(), true)); + } + + int Offset = -Src.getOffset(); + + JCE->emitULEB128Bytes(Offset); + } else { + assert(0 && "Machine move no supported yet."); + } + } else if (Src.isReg() && + Src.getReg() == MachineLocation::VirtualFP) { + if (Dst.isReg()) { + JCE->emitByte(dwarf::DW_CFA_def_cfa_register); + JCE->emitULEB128Bytes(RI->getDwarfRegNum(Dst.getReg(), true)); + } else { + assert(0 && "Machine move no supported yet."); + } + } else { + unsigned Reg = RI->getDwarfRegNum(Src.getReg(), true); + int Offset = Dst.getOffset() / stackGrowth; + + if (Offset < 0) { + JCE->emitByte(dwarf::DW_CFA_offset_extended_sf); + JCE->emitULEB128Bytes(Reg); + JCE->emitSLEB128Bytes(Offset); + } else if (Reg < 64) { + JCE->emitByte(dwarf::DW_CFA_offset + Reg); + JCE->emitULEB128Bytes(Offset); + } else { + JCE->emitByte(dwarf::DW_CFA_offset_extended); + JCE->emitULEB128Bytes(Reg); + JCE->emitULEB128Bytes(Offset); + } + } + } +} + +/// SharedTypeIds - How many leading type ids two landing pads have in common. +static unsigned SharedTypeIds(const LandingPadInfo *L, + const LandingPadInfo *R) { + const std::vector &LIds = L->TypeIds, &RIds = R->TypeIds; + unsigned LSize = LIds.size(), RSize = RIds.size(); + unsigned MinSize = LSize < RSize ? LSize : RSize; + unsigned Count = 0; + + for (; Count != MinSize; ++Count) + if (LIds[Count] != RIds[Count]) + return Count; + + return Count; +} + + +/// PadLT - Order landing pads lexicographically by type id. +static bool PadLT(const LandingPadInfo *L, const LandingPadInfo *R) { + const std::vector &LIds = L->TypeIds, &RIds = R->TypeIds; + unsigned LSize = LIds.size(), RSize = RIds.size(); + unsigned MinSize = LSize < RSize ? LSize : RSize; + + for (unsigned i = 0; i != MinSize; ++i) + if (LIds[i] != RIds[i]) + return LIds[i] < RIds[i]; + + return LSize < RSize; +} + +namespace { + +struct KeyInfo { + static inline unsigned getEmptyKey() { return -1U; } + static inline unsigned getTombstoneKey() { return -2U; } + static unsigned getHashValue(const unsigned &Key) { return Key; } + static bool isEqual(unsigned LHS, unsigned RHS) { return LHS == RHS; } + static bool isPod() { return true; } +}; + +/// ActionEntry - Structure describing an entry in the actions table. +struct ActionEntry { + int ValueForTypeID; // The value to write - may not be equal to the type id. + int NextAction; + struct ActionEntry *Previous; +}; + +/// PadRange - Structure holding a try-range and the associated landing pad. +struct PadRange { + // The index of the landing pad. + unsigned PadIndex; + // The index of the begin and end labels in the landing pad's label lists. + unsigned RangeIndex; +}; + +typedef DenseMap RangeMapType; + +/// CallSiteEntry - Structure describing an entry in the call-site table. +struct CallSiteEntry { + unsigned BeginLabel; // zero indicates the start of the function. + unsigned EndLabel; // zero indicates the end of the function. + unsigned PadLabel; // zero indicates that there is no landing pad. + unsigned Action; +}; + +} + +unsigned char* JITDwarfEmitter::EmitExceptionTable(MachineFunction* MF, + unsigned char* StartFunction, + unsigned char* EndFunction) const { + // Map all labels and get rid of any dead landing pads. + MMI->TidyLandingPads(); + + const std::vector &TypeInfos = MMI->getTypeInfos(); + const std::vector &FilterIds = MMI->getFilterIds(); + const std::vector &PadInfos = MMI->getLandingPads(); + if (PadInfos.empty()) return 0; + + // Sort the landing pads in order of their type ids. This is used to fold + // duplicate actions. + SmallVector LandingPads; + LandingPads.reserve(PadInfos.size()); + for (unsigned i = 0, N = PadInfos.size(); i != N; ++i) + LandingPads.push_back(&PadInfos[i]); + std::sort(LandingPads.begin(), LandingPads.end(), PadLT); + + // Negative type ids index into FilterIds, positive type ids index into + // TypeInfos. The value written for a positive type id is just the type + // id itself. For a negative type id, however, the value written is the + // (negative) byte offset of the corresponding FilterIds entry. The byte + // offset is usually equal to the type id, because the FilterIds entries + // are written using a variable width encoding which outputs one byte per + // entry as long as the value written is not too large, but can differ. + // This kind of complication does not occur for positive type ids because + // type infos are output using a fixed width encoding. + // FilterOffsets[i] holds the byte offset corresponding to FilterIds[i]. + SmallVector FilterOffsets; + FilterOffsets.reserve(FilterIds.size()); + int Offset = -1; + for(std::vector::const_iterator I = FilterIds.begin(), + E = FilterIds.end(); I != E; ++I) { + FilterOffsets.push_back(Offset); + Offset -= TargetAsmInfo::getULEB128Size(*I); + } + + // Compute the actions table and gather the first action index for each + // landing pad site. + SmallVector Actions; + SmallVector FirstActions; + FirstActions.reserve(LandingPads.size()); + + int FirstAction = 0; + unsigned SizeActions = 0; + for (unsigned i = 0, N = LandingPads.size(); i != N; ++i) { + const LandingPadInfo *LP = LandingPads[i]; + const std::vector &TypeIds = LP->TypeIds; + const unsigned NumShared = i ? SharedTypeIds(LP, LandingPads[i-1]) : 0; + unsigned SizeSiteActions = 0; + + if (NumShared < TypeIds.size()) { + unsigned SizeAction = 0; + ActionEntry *PrevAction = 0; + + if (NumShared) { + const unsigned SizePrevIds = LandingPads[i-1]->TypeIds.size(); + assert(Actions.size()); + PrevAction = &Actions.back(); + SizeAction = TargetAsmInfo::getSLEB128Size(PrevAction->NextAction) + + TargetAsmInfo::getSLEB128Size(PrevAction->ValueForTypeID); + for (unsigned j = NumShared; j != SizePrevIds; ++j) { + SizeAction -= TargetAsmInfo::getSLEB128Size(PrevAction->ValueForTypeID); + SizeAction += -PrevAction->NextAction; + PrevAction = PrevAction->Previous; + } + } + + // Compute the actions. + for (unsigned I = NumShared, M = TypeIds.size(); I != M; ++I) { + int TypeID = TypeIds[I]; + assert(-1-TypeID < (int)FilterOffsets.size() && "Unknown filter id!"); + int ValueForTypeID = TypeID < 0 ? FilterOffsets[-1 - TypeID] : TypeID; + unsigned SizeTypeID = TargetAsmInfo::getSLEB128Size(ValueForTypeID); + + int NextAction = SizeAction ? -(SizeAction + SizeTypeID) : 0; + SizeAction = SizeTypeID + TargetAsmInfo::getSLEB128Size(NextAction); + SizeSiteActions += SizeAction; + + ActionEntry Action = {ValueForTypeID, NextAction, PrevAction}; + Actions.push_back(Action); + + PrevAction = &Actions.back(); + } + + // Record the first action of the landing pad site. + FirstAction = SizeActions + SizeSiteActions - SizeAction + 1; + } // else identical - re-use previous FirstAction + + FirstActions.push_back(FirstAction); + + // Compute this sites contribution to size. + SizeActions += SizeSiteActions; + } + + // Compute the call-site table. Entries must be ordered by address. + SmallVector CallSites; + + RangeMapType PadMap; + for (unsigned i = 0, N = LandingPads.size(); i != N; ++i) { + const LandingPadInfo *LandingPad = LandingPads[i]; + for (unsigned j=0, E = LandingPad->BeginLabels.size(); j != E; ++j) { + unsigned BeginLabel = LandingPad->BeginLabels[j]; + assert(!PadMap.count(BeginLabel) && "Duplicate landing pad labels!"); + PadRange P = { i, j }; + PadMap[BeginLabel] = P; + } + } + + bool MayThrow = false; + unsigned LastLabel = 0; + for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); + I != E; ++I) { + for (MachineBasicBlock::const_iterator MI = I->begin(), E = I->end(); + MI != E; ++MI) { + if (!MI->isLabel()) { + MayThrow |= MI->getDesc().isCall(); + continue; + } + + unsigned BeginLabel = MI->getOperand(0).getImm(); + assert(BeginLabel && "Invalid label!"); + + if (BeginLabel == LastLabel) + MayThrow = false; + + RangeMapType::iterator L = PadMap.find(BeginLabel); + + if (L == PadMap.end()) + continue; + + PadRange P = L->second; + const LandingPadInfo *LandingPad = LandingPads[P.PadIndex]; + + assert(BeginLabel == LandingPad->BeginLabels[P.RangeIndex] && + "Inconsistent landing pad map!"); + + // If some instruction between the previous try-range and this one may + // throw, create a call-site entry with no landing pad for the region + // between the try-ranges. + if (MayThrow) { + CallSiteEntry Site = {LastLabel, BeginLabel, 0, 0}; + CallSites.push_back(Site); + } + + LastLabel = LandingPad->EndLabels[P.RangeIndex]; + CallSiteEntry Site = {BeginLabel, LastLabel, + LandingPad->LandingPadLabel, FirstActions[P.PadIndex]}; + + assert(Site.BeginLabel && Site.EndLabel && Site.PadLabel && + "Invalid landing pad!"); + + // Try to merge with the previous call-site. + if (CallSites.size()) { + CallSiteEntry &Prev = CallSites.back(); + if (Site.PadLabel == Prev.PadLabel && Site.Action == Prev.Action) { + // Extend the range of the previous entry. + Prev.EndLabel = Site.EndLabel; + continue; + } + } + + // Otherwise, create a new call-site. + CallSites.push_back(Site); + } + } + // If some instruction between the previous try-range and the end of the + // function may throw, create a call-site entry with no landing pad for the + // region following the try-range. + if (MayThrow) { + CallSiteEntry Site = {LastLabel, 0, 0, 0}; + CallSites.push_back(Site); + } + + // Final tallies. + unsigned SizeSites = CallSites.size() * (sizeof(int32_t) + // Site start. + sizeof(int32_t) + // Site length. + sizeof(int32_t)); // Landing pad. + for (unsigned i = 0, e = CallSites.size(); i < e; ++i) + SizeSites += TargetAsmInfo::getULEB128Size(CallSites[i].Action); + + unsigned SizeTypes = TypeInfos.size() * TD->getPointerSize(); + + unsigned TypeOffset = sizeof(int8_t) + // Call site format + // Call-site table length + TargetAsmInfo::getULEB128Size(SizeSites) + + SizeSites + SizeActions + SizeTypes; + + unsigned TotalSize = sizeof(int8_t) + // LPStart format + sizeof(int8_t) + // TType format + TargetAsmInfo::getULEB128Size(TypeOffset) + // TType base offset + TypeOffset; + + unsigned SizeAlign = (4 - TotalSize) & 3; + + // Begin the exception table. + JCE->emitAlignment(4); + for (unsigned i = 0; i != SizeAlign; ++i) { + JCE->emitByte(0); + // Asm->EOL("Padding"); + } + + unsigned char* DwarfExceptionTable = (unsigned char*)JCE->getCurrentPCValue(); + + // Emit the header. + JCE->emitByte(dwarf::DW_EH_PE_omit); + // Asm->EOL("LPStart format (DW_EH_PE_omit)"); + JCE->emitByte(dwarf::DW_EH_PE_absptr); + // Asm->EOL("TType format (DW_EH_PE_absptr)"); + JCE->emitULEB128Bytes(TypeOffset); + // Asm->EOL("TType base offset"); + JCE->emitByte(dwarf::DW_EH_PE_udata4); + // Asm->EOL("Call site format (DW_EH_PE_udata4)"); + JCE->emitULEB128Bytes(SizeSites); + // Asm->EOL("Call-site table length"); + + // Emit the landing pad site information. + for (unsigned i = 0; i < CallSites.size(); ++i) { + CallSiteEntry &S = CallSites[i]; + intptr_t BeginLabelPtr = 0; + intptr_t EndLabelPtr = 0; + + if (!S.BeginLabel) { + BeginLabelPtr = (intptr_t)StartFunction; + JCE->emitInt32(0); + } else { + BeginLabelPtr = JCE->getLabelAddress(S.BeginLabel); + JCE->emitInt32(BeginLabelPtr - (intptr_t)StartFunction); + } + + // Asm->EOL("Region start"); + + if (!S.EndLabel) { + EndLabelPtr = (intptr_t)EndFunction; + JCE->emitInt32((intptr_t)EndFunction - BeginLabelPtr); + } else { + EndLabelPtr = JCE->getLabelAddress(S.EndLabel); + JCE->emitInt32(EndLabelPtr - BeginLabelPtr); + } + //Asm->EOL("Region length"); + + if (!S.PadLabel) { + JCE->emitInt32(0); + } else { + unsigned PadLabelPtr = JCE->getLabelAddress(S.PadLabel); + JCE->emitInt32(PadLabelPtr - (intptr_t)StartFunction); + } + // Asm->EOL("Landing pad"); + + JCE->emitULEB128Bytes(S.Action); + // Asm->EOL("Action"); + } + + // Emit the actions. + for (unsigned I = 0, N = Actions.size(); I != N; ++I) { + ActionEntry &Action = Actions[I]; + + JCE->emitSLEB128Bytes(Action.ValueForTypeID); + //Asm->EOL("TypeInfo index"); + JCE->emitSLEB128Bytes(Action.NextAction); + //Asm->EOL("Next action"); + } + + // Emit the type ids. + for (unsigned M = TypeInfos.size(); M; --M) { + GlobalVariable *GV = TypeInfos[M - 1]; + + if (GV) { + if (TD->getPointerSize() == sizeof(int32_t)) { + JCE->emitInt32((intptr_t)Jit.getOrEmitGlobalVariable(GV)); + } else { + JCE->emitInt64((intptr_t)Jit.getOrEmitGlobalVariable(GV)); + } + } else { + if (TD->getPointerSize() == sizeof(int32_t)) + JCE->emitInt32(0); + else + JCE->emitInt64(0); + } + // Asm->EOL("TypeInfo"); + } + + // Emit the filter typeids. + for (unsigned j = 0, M = FilterIds.size(); j < M; ++j) { + unsigned TypeID = FilterIds[j]; + JCE->emitULEB128Bytes(TypeID); + //Asm->EOL("Filter TypeInfo index"); + } + + JCE->emitAlignment(4); + + return DwarfExceptionTable; +} + +unsigned char* +JITDwarfEmitter::EmitCommonEHFrame(const Function* Personality) const { + unsigned PointerSize = TD->getPointerSize(); + int stackGrowth = stackGrowthDirection == TargetFrameInfo::StackGrowsUp ? + PointerSize : -PointerSize; + + unsigned char* StartCommonPtr = (unsigned char*)JCE->getCurrentPCValue(); + // EH Common Frame header + JCE->allocateSpace(4, 0); + unsigned char* FrameCommonBeginPtr = (unsigned char*)JCE->getCurrentPCValue(); + JCE->emitInt32((int)0); + JCE->emitByte(dwarf::DW_CIE_VERSION); + JCE->emitString(Personality ? "zPLR" : "zR"); + JCE->emitULEB128Bytes(1); + JCE->emitSLEB128Bytes(stackGrowth); + JCE->emitByte(RI->getDwarfRegNum(RI->getRARegister(), true)); + + if (Personality) { + // Augmentation Size: 3 small ULEBs of one byte each, and the personality + // function which size is PointerSize. + JCE->emitULEB128Bytes(3 + PointerSize); + + // We set the encoding of the personality as direct encoding because we use + // the function pointer. The encoding is not relative because the current + // PC value may be bigger than the personality function pointer. + if (PointerSize == 4) { + JCE->emitByte(dwarf::DW_EH_PE_sdata4); + JCE->emitInt32(((intptr_t)Jit.getPointerToGlobal(Personality))); + } else { + JCE->emitByte(dwarf::DW_EH_PE_sdata8); + JCE->emitInt64(((intptr_t)Jit.getPointerToGlobal(Personality))); + } + + JCE->emitULEB128Bytes(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4); + JCE->emitULEB128Bytes(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4); + + } else { + JCE->emitULEB128Bytes(1); + JCE->emitULEB128Bytes(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4); + } + + std::vector Moves; + RI->getInitialFrameState(Moves); + EmitFrameMoves(0, Moves); + JCE->emitAlignment(PointerSize); + + JCE->emitInt32At((uintptr_t*)StartCommonPtr, + (uintptr_t)((unsigned char*)JCE->getCurrentPCValue() - + FrameCommonBeginPtr)); + + return StartCommonPtr; +} + + +unsigned char* +JITDwarfEmitter::EmitEHFrame(const Function* Personality, + unsigned char* StartCommonPtr, + unsigned char* StartFunction, + unsigned char* EndFunction, + unsigned char* ExceptionTable) const { + unsigned PointerSize = TD->getPointerSize(); + + // EH frame header. + unsigned char* StartEHPtr = (unsigned char*)JCE->getCurrentPCValue(); + JCE->allocateSpace(4, 0); + unsigned char* FrameBeginPtr = (unsigned char*)JCE->getCurrentPCValue(); + // FDE CIE Offset + JCE->emitInt32(FrameBeginPtr - StartCommonPtr); + JCE->emitInt32(StartFunction - (unsigned char*)JCE->getCurrentPCValue()); + JCE->emitInt32(EndFunction - StartFunction); + + // If there is a personality and landing pads then point to the language + // specific data area in the exception table. + if (MMI->getPersonalityIndex()) { + JCE->emitULEB128Bytes(4); + + if (!MMI->getLandingPads().empty()) { + JCE->emitInt32(ExceptionTable - (unsigned char*)JCE->getCurrentPCValue()); + } else { + JCE->emitInt32((int)0); + } + } else { + JCE->emitULEB128Bytes(0); + } + + // Indicate locations of function specific callee saved registers in + // frame. + EmitFrameMoves((intptr_t)StartFunction, MMI->getFrameMoves()); + + JCE->emitAlignment(PointerSize); + + // Indicate the size of the table + JCE->emitInt32At((uintptr_t*)StartEHPtr, + (uintptr_t)((unsigned char*)JCE->getCurrentPCValue() - + StartEHPtr)); + + // Double zeroes for the unwind runtime + if (PointerSize == 8) { + JCE->emitInt64(0); + JCE->emitInt64(0); + } else { + JCE->emitInt32(0); + JCE->emitInt32(0); + } + + + return StartEHPtr; +} + +unsigned JITDwarfEmitter::GetDwarfTableSizeInBytes(MachineFunction& F, + JITCodeEmitter& jce, + unsigned char* StartFunction, + unsigned char* EndFunction) { + const TargetMachine& TM = F.getTarget(); + TD = TM.getTargetData(); + needsIndirectEncoding = TM.getTargetAsmInfo()->getNeedsIndirectEncoding(); + stackGrowthDirection = TM.getFrameInfo()->getStackGrowthDirection(); + RI = TM.getRegisterInfo(); + JCE = &jce; + unsigned FinalSize = 0; + + FinalSize += GetExceptionTableSizeInBytes(&F); + + const std::vector Personalities = MMI->getPersonalities(); + FinalSize += + GetCommonEHFrameSizeInBytes(Personalities[MMI->getPersonalityIndex()]); + + FinalSize += GetEHFrameSizeInBytes(Personalities[MMI->getPersonalityIndex()], + StartFunction); + + return FinalSize; +} + +/// RoundUpToAlign - Add the specified alignment to FinalSize and returns +/// the new value. +static unsigned RoundUpToAlign(unsigned FinalSize, unsigned Alignment) { + if (Alignment == 0) Alignment = 1; + // Since we do not know where the buffer will be allocated, be pessimistic. + return FinalSize + Alignment; +} + +unsigned +JITDwarfEmitter::GetEHFrameSizeInBytes(const Function* Personality, + unsigned char* StartFunction) const { + unsigned PointerSize = TD->getPointerSize(); + unsigned FinalSize = 0; + // EH frame header. + FinalSize += PointerSize; + // FDE CIE Offset + FinalSize += 3 * PointerSize; + // If there is a personality and landing pads then point to the language + // specific data area in the exception table. + if (MMI->getPersonalityIndex()) { + FinalSize += TargetAsmInfo::getULEB128Size(4); + FinalSize += PointerSize; + } else { + FinalSize += TargetAsmInfo::getULEB128Size(0); + } + + // Indicate locations of function specific callee saved registers in + // frame. + FinalSize += GetFrameMovesSizeInBytes((intptr_t)StartFunction, + MMI->getFrameMoves()); + + FinalSize = RoundUpToAlign(FinalSize, 4); + + // Double zeroes for the unwind runtime + FinalSize += 2 * PointerSize; + + return FinalSize; +} + +unsigned JITDwarfEmitter::GetCommonEHFrameSizeInBytes(const Function* Personality) + const { + + unsigned PointerSize = TD->getPointerSize(); + int stackGrowth = stackGrowthDirection == TargetFrameInfo::StackGrowsUp ? + PointerSize : -PointerSize; + unsigned FinalSize = 0; + // EH Common Frame header + FinalSize += PointerSize; + FinalSize += 4; + FinalSize += 1; + FinalSize += Personality ? 5 : 3; // "zPLR" or "zR" + FinalSize += TargetAsmInfo::getULEB128Size(1); + FinalSize += TargetAsmInfo::getSLEB128Size(stackGrowth); + FinalSize += 1; + + if (Personality) { + FinalSize += TargetAsmInfo::getULEB128Size(7); + + // Encoding + FinalSize+= 1; + //Personality + FinalSize += PointerSize; + + FinalSize += TargetAsmInfo::getULEB128Size(dwarf::DW_EH_PE_pcrel); + FinalSize += TargetAsmInfo::getULEB128Size(dwarf::DW_EH_PE_pcrel); + + } else { + FinalSize += TargetAsmInfo::getULEB128Size(1); + FinalSize += TargetAsmInfo::getULEB128Size(dwarf::DW_EH_PE_pcrel); + } + + std::vector Moves; + RI->getInitialFrameState(Moves); + FinalSize += GetFrameMovesSizeInBytes(0, Moves); + FinalSize = RoundUpToAlign(FinalSize, 4); + return FinalSize; +} + +unsigned +JITDwarfEmitter::GetFrameMovesSizeInBytes(intptr_t BaseLabelPtr, + const std::vector &Moves) const { + unsigned PointerSize = TD->getPointerSize(); + int stackGrowth = stackGrowthDirection == TargetFrameInfo::StackGrowsUp ? + PointerSize : -PointerSize; + bool IsLocal = BaseLabelPtr; + unsigned FinalSize = 0; + + for (unsigned i = 0, N = Moves.size(); i < N; ++i) { + const MachineMove &Move = Moves[i]; + unsigned LabelID = Move.getLabelID(); + + if (LabelID) { + LabelID = MMI->MappedLabel(LabelID); + + // Throw out move if the label is invalid. + if (!LabelID) continue; + } + + intptr_t LabelPtr = 0; + if (LabelID) LabelPtr = JCE->getLabelAddress(LabelID); + + const MachineLocation &Dst = Move.getDestination(); + const MachineLocation &Src = Move.getSource(); + + // Advance row if new location. + if (BaseLabelPtr && LabelID && (BaseLabelPtr != LabelPtr || !IsLocal)) { + FinalSize++; + FinalSize += PointerSize; + BaseLabelPtr = LabelPtr; + IsLocal = true; + } + + // If advancing cfa. + if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) { + if (!Src.isReg()) { + if (Src.getReg() == MachineLocation::VirtualFP) { + ++FinalSize; + } else { + ++FinalSize; + unsigned RegNum = RI->getDwarfRegNum(Src.getReg(), true); + FinalSize += TargetAsmInfo::getULEB128Size(RegNum); + } + + int Offset = -Src.getOffset(); + + FinalSize += TargetAsmInfo::getULEB128Size(Offset); + } else { + assert(0 && "Machine move no supported yet."); + } + } else if (Src.isReg() && + Src.getReg() == MachineLocation::VirtualFP) { + if (Dst.isReg()) { + ++FinalSize; + unsigned RegNum = RI->getDwarfRegNum(Dst.getReg(), true); + FinalSize += TargetAsmInfo::getULEB128Size(RegNum); + } else { + assert(0 && "Machine move no supported yet."); + } + } else { + unsigned Reg = RI->getDwarfRegNum(Src.getReg(), true); + int Offset = Dst.getOffset() / stackGrowth; + + if (Offset < 0) { + ++FinalSize; + FinalSize += TargetAsmInfo::getULEB128Size(Reg); + FinalSize += TargetAsmInfo::getSLEB128Size(Offset); + } else if (Reg < 64) { + ++FinalSize; + FinalSize += TargetAsmInfo::getULEB128Size(Offset); + } else { + ++FinalSize; + FinalSize += TargetAsmInfo::getULEB128Size(Reg); + FinalSize += TargetAsmInfo::getULEB128Size(Offset); + } + } + } + return FinalSize; +} + +unsigned +JITDwarfEmitter::GetExceptionTableSizeInBytes(MachineFunction* MF) const { + unsigned FinalSize = 0; + + // Map all labels and get rid of any dead landing pads. + MMI->TidyLandingPads(); + + const std::vector &TypeInfos = MMI->getTypeInfos(); + const std::vector &FilterIds = MMI->getFilterIds(); + const std::vector &PadInfos = MMI->getLandingPads(); + if (PadInfos.empty()) return 0; + + // Sort the landing pads in order of their type ids. This is used to fold + // duplicate actions. + SmallVector LandingPads; + LandingPads.reserve(PadInfos.size()); + for (unsigned i = 0, N = PadInfos.size(); i != N; ++i) + LandingPads.push_back(&PadInfos[i]); + std::sort(LandingPads.begin(), LandingPads.end(), PadLT); + + // Negative type ids index into FilterIds, positive type ids index into + // TypeInfos. The value written for a positive type id is just the type + // id itself. For a negative type id, however, the value written is the + // (negative) byte offset of the corresponding FilterIds entry. The byte + // offset is usually equal to the type id, because the FilterIds entries + // are written using a variable width encoding which outputs one byte per + // entry as long as the value written is not too large, but can differ. + // This kind of complication does not occur for positive type ids because + // type infos are output using a fixed width encoding. + // FilterOffsets[i] holds the byte offset corresponding to FilterIds[i]. + SmallVector FilterOffsets; + FilterOffsets.reserve(FilterIds.size()); + int Offset = -1; + for(std::vector::const_iterator I = FilterIds.begin(), + E = FilterIds.end(); I != E; ++I) { + FilterOffsets.push_back(Offset); + Offset -= TargetAsmInfo::getULEB128Size(*I); + } + + // Compute the actions table and gather the first action index for each + // landing pad site. + SmallVector Actions; + SmallVector FirstActions; + FirstActions.reserve(LandingPads.size()); + + int FirstAction = 0; + unsigned SizeActions = 0; + for (unsigned i = 0, N = LandingPads.size(); i != N; ++i) { + const LandingPadInfo *LP = LandingPads[i]; + const std::vector &TypeIds = LP->TypeIds; + const unsigned NumShared = i ? SharedTypeIds(LP, LandingPads[i-1]) : 0; + unsigned SizeSiteActions = 0; + + if (NumShared < TypeIds.size()) { + unsigned SizeAction = 0; + ActionEntry *PrevAction = 0; + + if (NumShared) { + const unsigned SizePrevIds = LandingPads[i-1]->TypeIds.size(); + assert(Actions.size()); + PrevAction = &Actions.back(); + SizeAction = TargetAsmInfo::getSLEB128Size(PrevAction->NextAction) + + TargetAsmInfo::getSLEB128Size(PrevAction->ValueForTypeID); + for (unsigned j = NumShared; j != SizePrevIds; ++j) { + SizeAction -= TargetAsmInfo::getSLEB128Size(PrevAction->ValueForTypeID); + SizeAction += -PrevAction->NextAction; + PrevAction = PrevAction->Previous; + } + } + + // Compute the actions. + for (unsigned I = NumShared, M = TypeIds.size(); I != M; ++I) { + int TypeID = TypeIds[I]; + assert(-1-TypeID < (int)FilterOffsets.size() && "Unknown filter id!"); + int ValueForTypeID = TypeID < 0 ? FilterOffsets[-1 - TypeID] : TypeID; + unsigned SizeTypeID = TargetAsmInfo::getSLEB128Size(ValueForTypeID); + + int NextAction = SizeAction ? -(SizeAction + SizeTypeID) : 0; + SizeAction = SizeTypeID + TargetAsmInfo::getSLEB128Size(NextAction); + SizeSiteActions += SizeAction; + + ActionEntry Action = {ValueForTypeID, NextAction, PrevAction}; + Actions.push_back(Action); + + PrevAction = &Actions.back(); + } + + // Record the first action of the landing pad site. + FirstAction = SizeActions + SizeSiteActions - SizeAction + 1; + } // else identical - re-use previous FirstAction + + FirstActions.push_back(FirstAction); + + // Compute this sites contribution to size. + SizeActions += SizeSiteActions; + } + + // Compute the call-site table. Entries must be ordered by address. + SmallVector CallSites; + + RangeMapType PadMap; + for (unsigned i = 0, N = LandingPads.size(); i != N; ++i) { + const LandingPadInfo *LandingPad = LandingPads[i]; + for (unsigned j=0, E = LandingPad->BeginLabels.size(); j != E; ++j) { + unsigned BeginLabel = LandingPad->BeginLabels[j]; + assert(!PadMap.count(BeginLabel) && "Duplicate landing pad labels!"); + PadRange P = { i, j }; + PadMap[BeginLabel] = P; + } + } + + bool MayThrow = false; + unsigned LastLabel = 0; + for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); + I != E; ++I) { + for (MachineBasicBlock::const_iterator MI = I->begin(), E = I->end(); + MI != E; ++MI) { + if (!MI->isLabel()) { + MayThrow |= MI->getDesc().isCall(); + continue; + } + + unsigned BeginLabel = MI->getOperand(0).getImm(); + assert(BeginLabel && "Invalid label!"); + + if (BeginLabel == LastLabel) + MayThrow = false; + + RangeMapType::iterator L = PadMap.find(BeginLabel); + + if (L == PadMap.end()) + continue; + + PadRange P = L->second; + const LandingPadInfo *LandingPad = LandingPads[P.PadIndex]; + + assert(BeginLabel == LandingPad->BeginLabels[P.RangeIndex] && + "Inconsistent landing pad map!"); + + // If some instruction between the previous try-range and this one may + // throw, create a call-site entry with no landing pad for the region + // between the try-ranges. + if (MayThrow) { + CallSiteEntry Site = {LastLabel, BeginLabel, 0, 0}; + CallSites.push_back(Site); + } + + LastLabel = LandingPad->EndLabels[P.RangeIndex]; + CallSiteEntry Site = {BeginLabel, LastLabel, + LandingPad->LandingPadLabel, FirstActions[P.PadIndex]}; + + assert(Site.BeginLabel && Site.EndLabel && Site.PadLabel && + "Invalid landing pad!"); + + // Try to merge with the previous call-site. + if (CallSites.size()) { + CallSiteEntry &Prev = CallSites.back(); + if (Site.PadLabel == Prev.PadLabel && Site.Action == Prev.Action) { + // Extend the range of the previous entry. + Prev.EndLabel = Site.EndLabel; + continue; + } + } + + // Otherwise, create a new call-site. + CallSites.push_back(Site); + } + } + // If some instruction between the previous try-range and the end of the + // function may throw, create a call-site entry with no landing pad for the + // region following the try-range. + if (MayThrow) { + CallSiteEntry Site = {LastLabel, 0, 0, 0}; + CallSites.push_back(Site); + } + + // Final tallies. + unsigned SizeSites = CallSites.size() * (sizeof(int32_t) + // Site start. + sizeof(int32_t) + // Site length. + sizeof(int32_t)); // Landing pad. + for (unsigned i = 0, e = CallSites.size(); i < e; ++i) + SizeSites += TargetAsmInfo::getULEB128Size(CallSites[i].Action); + + unsigned SizeTypes = TypeInfos.size() * TD->getPointerSize(); + + unsigned TypeOffset = sizeof(int8_t) + // Call site format + // Call-site table length + TargetAsmInfo::getULEB128Size(SizeSites) + + SizeSites + SizeActions + SizeTypes; + + unsigned TotalSize = sizeof(int8_t) + // LPStart format + sizeof(int8_t) + // TType format + TargetAsmInfo::getULEB128Size(TypeOffset) + // TType base offset + TypeOffset; + + unsigned SizeAlign = (4 - TotalSize) & 3; + + // Begin the exception table. + FinalSize = RoundUpToAlign(FinalSize, 4); + for (unsigned i = 0; i != SizeAlign; ++i) { + ++FinalSize; + } + + unsigned PointerSize = TD->getPointerSize(); + + // Emit the header. + ++FinalSize; + // Asm->EOL("LPStart format (DW_EH_PE_omit)"); + ++FinalSize; + // Asm->EOL("TType format (DW_EH_PE_absptr)"); + ++FinalSize; + // Asm->EOL("TType base offset"); + ++FinalSize; + // Asm->EOL("Call site format (DW_EH_PE_udata4)"); + ++FinalSize; + // Asm->EOL("Call-site table length"); + + // Emit the landing pad site information. + for (unsigned i = 0; i < CallSites.size(); ++i) { + CallSiteEntry &S = CallSites[i]; + + // Asm->EOL("Region start"); + FinalSize += PointerSize; + + //Asm->EOL("Region length"); + FinalSize += PointerSize; + + // Asm->EOL("Landing pad"); + FinalSize += PointerSize; + + FinalSize += TargetAsmInfo::getULEB128Size(S.Action); + // Asm->EOL("Action"); + } + + // Emit the actions. + for (unsigned I = 0, N = Actions.size(); I != N; ++I) { + ActionEntry &Action = Actions[I]; + + //Asm->EOL("TypeInfo index"); + FinalSize += TargetAsmInfo::getSLEB128Size(Action.ValueForTypeID); + //Asm->EOL("Next action"); + FinalSize += TargetAsmInfo::getSLEB128Size(Action.NextAction); + } + + // Emit the type ids. + for (unsigned M = TypeInfos.size(); M; --M) { + // Asm->EOL("TypeInfo"); + FinalSize += PointerSize; + } + + // Emit the filter typeids. + for (unsigned j = 0, M = FilterIds.size(); j < M; ++j) { + unsigned TypeID = FilterIds[j]; + FinalSize += TargetAsmInfo::getULEB128Size(TypeID); + //Asm->EOL("Filter TypeInfo index"); + } + + FinalSize = RoundUpToAlign(FinalSize, 4); + + return FinalSize; +} diff --git a/lib/ExecutionEngine/JIT/JITDwarfEmitter.h b/lib/ExecutionEngine/JIT/JITDwarfEmitter.h new file mode 100644 index 000000000000..9120ed44e6a6 --- /dev/null +++ b/lib/ExecutionEngine/JIT/JITDwarfEmitter.h @@ -0,0 +1,87 @@ +//===------ JITDwarfEmitter.h - Write dwarf tables into memory ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines a JITDwarfEmitter object that is used by the JIT to +// write dwarf tables to memory. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTION_ENGINE_JIT_DWARFEMITTER_H +#define LLVM_EXECUTION_ENGINE_JIT_DWARFEMITTER_H + +namespace llvm { + +class Function; +class JITCodeEmitter; +class MachineFunction; +class MachineModuleInfo; +class MachineMove; +class TargetData; +class TargetMachine; +class TargetRegisterInfo; + +class JITDwarfEmitter { + const TargetData* TD; + JITCodeEmitter* JCE; + const TargetRegisterInfo* RI; + MachineModuleInfo* MMI; + JIT& Jit; + bool needsIndirectEncoding; + bool stackGrowthDirection; + + unsigned char* EmitExceptionTable(MachineFunction* MF, + unsigned char* StartFunction, + unsigned char* EndFunction) const; + + void EmitFrameMoves(intptr_t BaseLabelPtr, + const std::vector &Moves) const; + + unsigned char* EmitCommonEHFrame(const Function* Personality) const; + + unsigned char* EmitEHFrame(const Function* Personality, + unsigned char* StartBufferPtr, + unsigned char* StartFunction, + unsigned char* EndFunction, + unsigned char* ExceptionTable) const; + + unsigned GetExceptionTableSizeInBytes(MachineFunction* MF) const; + + unsigned + GetFrameMovesSizeInBytes(intptr_t BaseLabelPtr, + const std::vector &Moves) const; + + unsigned GetCommonEHFrameSizeInBytes(const Function* Personality) const; + + unsigned GetEHFrameSizeInBytes(const Function* Personality, + unsigned char* StartFunction) const; + +public: + + JITDwarfEmitter(JIT& jit); + + unsigned char* EmitDwarfTable(MachineFunction& F, + JITCodeEmitter& JCE, + unsigned char* StartFunction, + unsigned char* EndFunction); + + + unsigned GetDwarfTableSizeInBytes(MachineFunction& F, + JITCodeEmitter& JCE, + unsigned char* StartFunction, + unsigned char* EndFunction); + + void setModuleInfo(MachineModuleInfo* Info) { + MMI = Info; + } +}; + + +} // end namespace llvm + +#endif // LLVM_EXECUTION_ENGINE_JIT_DWARFEMITTER_H diff --git a/lib/ExecutionEngine/JIT/JITEmitter.cpp b/lib/ExecutionEngine/JIT/JITEmitter.cpp new file mode 100644 index 000000000000..d3b0820c5f05 --- /dev/null +++ b/lib/ExecutionEngine/JIT/JITEmitter.cpp @@ -0,0 +1,1615 @@ +//===-- JITEmitter.cpp - Write machine code to executable memory ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines a MachineCodeEmitter object that is used by the JIT to +// write machine code to memory and remember where relocatable values are. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "jit" +#include "JIT.h" +#include "JITDwarfEmitter.h" +#include "llvm/Constants.h" +#include "llvm/Module.h" +#include "llvm/DerivedTypes.h" +#include "llvm/CodeGen/JITCodeEmitter.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRelocation.h" +#include "llvm/ExecutionEngine/JITMemoryManager.h" +#include "llvm/ExecutionEngine/GenericValue.h" +#include "llvm/CodeGen/MachineCodeInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetJITInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MutexGuard.h" +#include "llvm/Support/ValueHandle.h" +#include "llvm/System/Disassembler.h" +#include "llvm/System/Memory.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include +#ifndef NDEBUG +#include +#endif +using namespace llvm; + +STATISTIC(NumBytes, "Number of bytes of machine code compiled"); +STATISTIC(NumRelos, "Number of relocations applied"); +static JIT *TheJIT = 0; + + +//===----------------------------------------------------------------------===// +// JIT lazy compilation code. +// +namespace { + class JITResolverState { + public: + typedef std::map, void*> FunctionToStubMapTy; + typedef std::map StubToFunctionMapTy; + typedef std::map, void*> GlobalToIndirectSymMapTy; + private: + /// FunctionToStubMap - Keep track of the stub created for a particular + /// function so that we can reuse them if necessary. + FunctionToStubMapTy FunctionToStubMap; + + /// StubToFunctionMap - Keep track of the function that each stub + /// corresponds to. + StubToFunctionMapTy StubToFunctionMap; + + /// GlobalToIndirectSymMap - Keep track of the indirect symbol created for a + /// particular GlobalVariable so that we can reuse them if necessary. + GlobalToIndirectSymMapTy GlobalToIndirectSymMap; + + public: + FunctionToStubMapTy& getFunctionToStubMap(const MutexGuard& locked) { + assert(locked.holds(TheJIT->lock)); + return FunctionToStubMap; + } + + StubToFunctionMapTy& getStubToFunctionMap(const MutexGuard& locked) { + assert(locked.holds(TheJIT->lock)); + return StubToFunctionMap; + } + + GlobalToIndirectSymMapTy& getGlobalToIndirectSymMap(const MutexGuard& locked) { + assert(locked.holds(TheJIT->lock)); + return GlobalToIndirectSymMap; + } + }; + + /// JITResolver - Keep track of, and resolve, call sites for functions that + /// have not yet been compiled. + class JITResolver { + typedef JITResolverState::FunctionToStubMapTy FunctionToStubMapTy; + typedef JITResolverState::StubToFunctionMapTy StubToFunctionMapTy; + typedef JITResolverState::GlobalToIndirectSymMapTy GlobalToIndirectSymMapTy; + + /// LazyResolverFn - The target lazy resolver function that we actually + /// rewrite instructions to use. + TargetJITInfo::LazyResolverFn LazyResolverFn; + + JITResolverState state; + + /// ExternalFnToStubMap - This is the equivalent of FunctionToStubMap for + /// external functions. + std::map ExternalFnToStubMap; + + /// revGOTMap - map addresses to indexes in the GOT + std::map revGOTMap; + unsigned nextGOTIndex; + + static JITResolver *TheJITResolver; + public: + explicit JITResolver(JIT &jit) : nextGOTIndex(0) { + TheJIT = &jit; + + LazyResolverFn = jit.getJITInfo().getLazyResolverFunction(JITCompilerFn); + assert(TheJITResolver == 0 && "Multiple JIT resolvers?"); + TheJITResolver = this; + } + + ~JITResolver() { + TheJITResolver = 0; + } + + /// getFunctionStubIfAvailable - This returns a pointer to a function stub + /// if it has already been created. + void *getFunctionStubIfAvailable(Function *F); + + /// getFunctionStub - This returns a pointer to a function stub, creating + /// one on demand as needed. If empty is true, create a function stub + /// pointing at address 0, to be filled in later. + void *getFunctionStub(Function *F); + + /// getExternalFunctionStub - Return a stub for the function at the + /// specified address, created lazily on demand. + void *getExternalFunctionStub(void *FnAddr); + + /// getGlobalValueIndirectSym - Return an indirect symbol containing the + /// specified GV address. + void *getGlobalValueIndirectSym(GlobalValue *V, void *GVAddress); + + /// AddCallbackAtLocation - If the target is capable of rewriting an + /// instruction without the use of a stub, record the location of the use so + /// we know which function is being used at the location. + void *AddCallbackAtLocation(Function *F, void *Location) { + MutexGuard locked(TheJIT->lock); + /// Get the target-specific JIT resolver function. + state.getStubToFunctionMap(locked)[Location] = F; + return (void*)(intptr_t)LazyResolverFn; + } + + void getRelocatableGVs(SmallVectorImpl &GVs, + SmallVectorImpl &Ptrs); + + GlobalValue *invalidateStub(void *Stub); + + /// getGOTIndexForAddress - Return a new or existing index in the GOT for + /// an address. This function only manages slots, it does not manage the + /// contents of the slots or the memory associated with the GOT. + unsigned getGOTIndexForAddr(void *addr); + + /// JITCompilerFn - This function is called to resolve a stub to a compiled + /// address. If the LLVM Function corresponding to the stub has not yet + /// been compiled, this function compiles it first. + static void *JITCompilerFn(void *Stub); + }; +} + +JITResolver *JITResolver::TheJITResolver = 0; + +/// getFunctionStubIfAvailable - This returns a pointer to a function stub +/// if it has already been created. +void *JITResolver::getFunctionStubIfAvailable(Function *F) { + MutexGuard locked(TheJIT->lock); + + // If we already have a stub for this function, recycle it. + void *&Stub = state.getFunctionToStubMap(locked)[F]; + return Stub; +} + +/// getFunctionStub - This returns a pointer to a function stub, creating +/// one on demand as needed. +void *JITResolver::getFunctionStub(Function *F) { + MutexGuard locked(TheJIT->lock); + + // If we already have a stub for this function, recycle it. + void *&Stub = state.getFunctionToStubMap(locked)[F]; + if (Stub) return Stub; + + // Call the lazy resolver function unless we are JIT'ing non-lazily, in which + // case we must resolve the symbol now. + void *Actual = TheJIT->isLazyCompilationDisabled() + ? (void *)0 : (void *)(intptr_t)LazyResolverFn; + + // If this is an external declaration, attempt to resolve the address now + // to place in the stub. + if (F->isDeclaration() && !F->hasNotBeenReadFromBitcode()) { + Actual = TheJIT->getPointerToFunction(F); + + // If we resolved the symbol to a null address (eg. a weak external) + // don't emit a stub. Return a null pointer to the application. If dlsym + // stubs are enabled, not being able to resolve the address is not + // meaningful. + if (!Actual && !TheJIT->areDlsymStubsEnabled()) return 0; + } + + // Codegen a new stub, calling the lazy resolver or the actual address of the + // external function, if it was resolved. + Stub = TheJIT->getJITInfo().emitFunctionStub(F, Actual, + *TheJIT->getCodeEmitter()); + + if (Actual != (void*)(intptr_t)LazyResolverFn) { + // If we are getting the stub for an external function, we really want the + // address of the stub in the GlobalAddressMap for the JIT, not the address + // of the external function. + TheJIT->updateGlobalMapping(F, Stub); + } + + DOUT << "JIT: Stub emitted at [" << Stub << "] for function '" + << F->getName() << "'\n"; + + // Finally, keep track of the stub-to-Function mapping so that the + // JITCompilerFn knows which function to compile! + state.getStubToFunctionMap(locked)[Stub] = F; + + // If we are JIT'ing non-lazily but need to call a function that does not + // exist yet, add it to the JIT's work list so that we can fill in the stub + // address later. + if (!Actual && TheJIT->isLazyCompilationDisabled()) + if (!F->isDeclaration() || F->hasNotBeenReadFromBitcode()) + TheJIT->addPendingFunction(F); + + return Stub; +} + +/// getGlobalValueIndirectSym - Return a lazy pointer containing the specified +/// GV address. +void *JITResolver::getGlobalValueIndirectSym(GlobalValue *GV, void *GVAddress) { + MutexGuard locked(TheJIT->lock); + + // If we already have a stub for this global variable, recycle it. + void *&IndirectSym = state.getGlobalToIndirectSymMap(locked)[GV]; + if (IndirectSym) return IndirectSym; + + // Otherwise, codegen a new indirect symbol. + IndirectSym = TheJIT->getJITInfo().emitGlobalValueIndirectSym(GV, GVAddress, + *TheJIT->getCodeEmitter()); + + DOUT << "JIT: Indirect symbol emitted at [" << IndirectSym << "] for GV '" + << GV->getName() << "'\n"; + + return IndirectSym; +} + +/// getExternalFunctionStub - Return a stub for the function at the +/// specified address, created lazily on demand. +void *JITResolver::getExternalFunctionStub(void *FnAddr) { + // If we already have a stub for this function, recycle it. + void *&Stub = ExternalFnToStubMap[FnAddr]; + if (Stub) return Stub; + + Stub = TheJIT->getJITInfo().emitFunctionStub(0, FnAddr, + *TheJIT->getCodeEmitter()); + + DOUT << "JIT: Stub emitted at [" << Stub + << "] for external function at '" << FnAddr << "'\n"; + return Stub; +} + +unsigned JITResolver::getGOTIndexForAddr(void* addr) { + unsigned idx = revGOTMap[addr]; + if (!idx) { + idx = ++nextGOTIndex; + revGOTMap[addr] = idx; + DOUT << "JIT: Adding GOT entry " << idx << " for addr [" << addr << "]\n"; + } + return idx; +} + +void JITResolver::getRelocatableGVs(SmallVectorImpl &GVs, + SmallVectorImpl &Ptrs) { + MutexGuard locked(TheJIT->lock); + + FunctionToStubMapTy &FM = state.getFunctionToStubMap(locked); + GlobalToIndirectSymMapTy &GM = state.getGlobalToIndirectSymMap(locked); + + for (FunctionToStubMapTy::iterator i = FM.begin(), e = FM.end(); i != e; ++i){ + Function *F = i->first; + if (F->isDeclaration() && F->hasExternalLinkage()) { + GVs.push_back(i->first); + Ptrs.push_back(i->second); + } + } + for (GlobalToIndirectSymMapTy::iterator i = GM.begin(), e = GM.end(); + i != e; ++i) { + GVs.push_back(i->first); + Ptrs.push_back(i->second); + } +} + +GlobalValue *JITResolver::invalidateStub(void *Stub) { + MutexGuard locked(TheJIT->lock); + + FunctionToStubMapTy &FM = state.getFunctionToStubMap(locked); + StubToFunctionMapTy &SM = state.getStubToFunctionMap(locked); + GlobalToIndirectSymMapTy &GM = state.getGlobalToIndirectSymMap(locked); + + // Look up the cheap way first, to see if it's a function stub we are + // invalidating. If so, remove it from both the forward and reverse maps. + if (SM.find(Stub) != SM.end()) { + Function *F = SM[Stub]; + SM.erase(Stub); + FM.erase(F); + return F; + } + + // Otherwise, it might be an indirect symbol stub. Find it and remove it. + for (GlobalToIndirectSymMapTy::iterator i = GM.begin(), e = GM.end(); + i != e; ++i) { + if (i->second != Stub) + continue; + GlobalValue *GV = i->first; + GM.erase(i); + return GV; + } + + // Lastly, check to see if it's in the ExternalFnToStubMap. + for (std::map::iterator i = ExternalFnToStubMap.begin(), + e = ExternalFnToStubMap.end(); i != e; ++i) { + if (i->second != Stub) + continue; + ExternalFnToStubMap.erase(i); + break; + } + + return 0; +} + +/// JITCompilerFn - This function is called when a lazy compilation stub has +/// been entered. It looks up which function this stub corresponds to, compiles +/// it if necessary, then returns the resultant function pointer. +void *JITResolver::JITCompilerFn(void *Stub) { + JITResolver &JR = *TheJITResolver; + + Function* F = 0; + void* ActualPtr = 0; + + { + // Only lock for getting the Function. The call getPointerToFunction made + // in this function might trigger function materializing, which requires + // JIT lock to be unlocked. + MutexGuard locked(TheJIT->lock); + + // The address given to us for the stub may not be exactly right, it might be + // a little bit after the stub. As such, use upper_bound to find it. + StubToFunctionMapTy::iterator I = + JR.state.getStubToFunctionMap(locked).upper_bound(Stub); + assert(I != JR.state.getStubToFunctionMap(locked).begin() && + "This is not a known stub!"); + F = (--I)->second; + ActualPtr = I->first; + } + + // If we have already code generated the function, just return the address. + void *Result = TheJIT->getPointerToGlobalIfAvailable(F); + + if (!Result) { + // Otherwise we don't have it, do lazy compilation now. + + // If lazy compilation is disabled, emit a useful error message and abort. + if (TheJIT->isLazyCompilationDisabled()) { + cerr << "LLVM JIT requested to do lazy compilation of function '" + << F->getName() << "' when lazy compiles are disabled!\n"; + abort(); + } + + // We might like to remove the stub from the StubToFunction map. + // We can't do that! Multiple threads could be stuck, waiting to acquire the + // lock above. As soon as the 1st function finishes compiling the function, + // the next one will be released, and needs to be able to find the function + // it needs to call. + //JR.state.getStubToFunctionMap(locked).erase(I); + + DOUT << "JIT: Lazily resolving function '" << F->getName() + << "' In stub ptr = " << Stub << " actual ptr = " + << ActualPtr << "\n"; + + Result = TheJIT->getPointerToFunction(F); + } + + // Reacquire the lock to erase the stub in the map. + MutexGuard locked(TheJIT->lock); + + // We don't need to reuse this stub in the future, as F is now compiled. + JR.state.getFunctionToStubMap(locked).erase(F); + + // FIXME: We could rewrite all references to this stub if we knew them. + + // What we will do is set the compiled function address to map to the + // same GOT entry as the stub so that later clients may update the GOT + // if they see it still using the stub address. + // Note: this is done so the Resolver doesn't have to manage GOT memory + // Do this without allocating map space if the target isn't using a GOT + if(JR.revGOTMap.find(Stub) != JR.revGOTMap.end()) + JR.revGOTMap[Result] = JR.revGOTMap[Stub]; + + return Result; +} + +//===----------------------------------------------------------------------===// +// Function Index Support + +// On MacOS we generate an index of currently JIT'd functions so that +// performance tools can determine a symbol name and accurate code range for a +// PC value. Because performance tools are generally asynchronous, the code +// below is written with the hope that it could be interrupted at any time and +// have useful answers. However, we don't go crazy with atomic operations, we +// just do a "reasonable effort". +#ifdef __APPLE__ +#define ENABLE_JIT_SYMBOL_TABLE 0 +#endif + +/// JitSymbolEntry - Each function that is JIT compiled results in one of these +/// being added to an array of symbols. This indicates the name of the function +/// as well as the address range it occupies. This allows the client to map +/// from a PC value to the name of the function. +struct JitSymbolEntry { + const char *FnName; // FnName - a strdup'd string. + void *FnStart; + intptr_t FnSize; +}; + + +struct JitSymbolTable { + /// NextPtr - This forms a linked list of JitSymbolTable entries. This + /// pointer is not used right now, but might be used in the future. Consider + /// it reserved for future use. + JitSymbolTable *NextPtr; + + /// Symbols - This is an array of JitSymbolEntry entries. Only the first + /// 'NumSymbols' symbols are valid. + JitSymbolEntry *Symbols; + + /// NumSymbols - This indicates the number entries in the Symbols array that + /// are valid. + unsigned NumSymbols; + + /// NumAllocated - This indicates the amount of space we have in the Symbols + /// array. This is a private field that should not be read by external tools. + unsigned NumAllocated; +}; + +#if ENABLE_JIT_SYMBOL_TABLE +JitSymbolTable *__jitSymbolTable; +#endif + +static void AddFunctionToSymbolTable(const char *FnName, + void *FnStart, intptr_t FnSize) { + assert(FnName != 0 && FnStart != 0 && "Bad symbol to add"); + JitSymbolTable **SymTabPtrPtr = 0; +#if !ENABLE_JIT_SYMBOL_TABLE + return; +#else + SymTabPtrPtr = &__jitSymbolTable; +#endif + + // If this is the first entry in the symbol table, add the JitSymbolTable + // index. + if (*SymTabPtrPtr == 0) { + JitSymbolTable *New = new JitSymbolTable(); + New->NextPtr = 0; + New->Symbols = 0; + New->NumSymbols = 0; + New->NumAllocated = 0; + *SymTabPtrPtr = New; + } + + JitSymbolTable *SymTabPtr = *SymTabPtrPtr; + + // If we have space in the table, reallocate the table. + if (SymTabPtr->NumSymbols >= SymTabPtr->NumAllocated) { + // If we don't have space, reallocate the table. + unsigned NewSize = std::max(64U, SymTabPtr->NumAllocated*2); + JitSymbolEntry *NewSymbols = new JitSymbolEntry[NewSize]; + JitSymbolEntry *OldSymbols = SymTabPtr->Symbols; + + // Copy the old entries over. + memcpy(NewSymbols, OldSymbols, SymTabPtr->NumSymbols*sizeof(OldSymbols[0])); + + // Swap the new symbols in, delete the old ones. + SymTabPtr->Symbols = NewSymbols; + SymTabPtr->NumAllocated = NewSize; + delete [] OldSymbols; + } + + // Otherwise, we have enough space, just tack it onto the end of the array. + JitSymbolEntry &Entry = SymTabPtr->Symbols[SymTabPtr->NumSymbols]; + Entry.FnName = strdup(FnName); + Entry.FnStart = FnStart; + Entry.FnSize = FnSize; + ++SymTabPtr->NumSymbols; +} + +static void RemoveFunctionFromSymbolTable(void *FnStart) { + assert(FnStart && "Invalid function pointer"); + JitSymbolTable **SymTabPtrPtr = 0; +#if !ENABLE_JIT_SYMBOL_TABLE + return; +#else + SymTabPtrPtr = &__jitSymbolTable; +#endif + + JitSymbolTable *SymTabPtr = *SymTabPtrPtr; + JitSymbolEntry *Symbols = SymTabPtr->Symbols; + + // Scan the table to find its index. The table is not sorted, so do a linear + // scan. + unsigned Index; + for (Index = 0; Symbols[Index].FnStart != FnStart; ++Index) + assert(Index != SymTabPtr->NumSymbols && "Didn't find function!"); + + // Once we have an index, we know to nuke this entry, overwrite it with the + // entry at the end of the array, making the last entry redundant. + const char *OldName = Symbols[Index].FnName; + Symbols[Index] = Symbols[SymTabPtr->NumSymbols-1]; + free((void*)OldName); + + // Drop the number of symbols in the table. + --SymTabPtr->NumSymbols; + + // Finally, if we deleted the final symbol, deallocate the table itself. + if (SymTabPtr->NumSymbols != 0) + return; + + *SymTabPtrPtr = 0; + delete [] Symbols; + delete SymTabPtr; +} + +//===----------------------------------------------------------------------===// +// JITEmitter code. +// +namespace { + /// JITEmitter - The JIT implementation of the MachineCodeEmitter, which is + /// used to output functions to memory for execution. + class JITEmitter : public JITCodeEmitter { + JITMemoryManager *MemMgr; + + // When outputting a function stub in the context of some other function, we + // save BufferBegin/BufferEnd/CurBufferPtr here. + uint8_t *SavedBufferBegin, *SavedBufferEnd, *SavedCurBufferPtr; + + /// Relocations - These are the relocations that the function needs, as + /// emitted. + std::vector Relocations; + + /// MBBLocations - This vector is a mapping from MBB ID's to their address. + /// It is filled in by the StartMachineBasicBlock callback and queried by + /// the getMachineBasicBlockAddress callback. + std::vector MBBLocations; + + /// ConstantPool - The constant pool for the current function. + /// + MachineConstantPool *ConstantPool; + + /// ConstantPoolBase - A pointer to the first entry in the constant pool. + /// + void *ConstantPoolBase; + + /// ConstPoolAddresses - Addresses of individual constant pool entries. + /// + SmallVector ConstPoolAddresses; + + /// JumpTable - The jump tables for the current function. + /// + MachineJumpTableInfo *JumpTable; + + /// JumpTableBase - A pointer to the first entry in the jump table. + /// + void *JumpTableBase; + + /// Resolver - This contains info about the currently resolved functions. + JITResolver Resolver; + + /// DE - The dwarf emitter for the jit. + JITDwarfEmitter *DE; + + /// LabelLocations - This vector is a mapping from Label ID's to their + /// address. + std::vector LabelLocations; + + /// MMI - Machine module info for exception informations + MachineModuleInfo* MMI; + + // GVSet - a set to keep track of which globals have been seen + SmallPtrSet GVSet; + + // CurFn - The llvm function being emitted. Only valid during + // finishFunction(). + const Function *CurFn; + + // CurFnStubUses - For a given Function, a vector of stubs that it + // references. This facilitates the JIT detecting that a stub is no + // longer used, so that it may be deallocated. + DenseMap > CurFnStubUses; + + // StubFnRefs - For a given pointer to a stub, a set of Functions which + // reference the stub. When the count of a stub's references drops to zero, + // the stub is unused. + DenseMap > StubFnRefs; + + // ExtFnStubs - A map of external function names to stubs which have entries + // in the JITResolver's ExternalFnToStubMap. + StringMap ExtFnStubs; + + // MCI - A pointer to a MachineCodeInfo object to update with information. + MachineCodeInfo *MCI; + + public: + JITEmitter(JIT &jit, JITMemoryManager *JMM) : Resolver(jit), CurFn(0), MCI(0) { + MemMgr = JMM ? JMM : JITMemoryManager::CreateDefaultMemManager(); + if (jit.getJITInfo().needsGOT()) { + MemMgr->AllocateGOT(); + DOUT << "JIT is managing a GOT\n"; + } + + if (ExceptionHandling) DE = new JITDwarfEmitter(jit); + } + ~JITEmitter() { + delete MemMgr; + if (ExceptionHandling) delete DE; + } + + /// classof - Methods for support type inquiry through isa, cast, and + /// dyn_cast: + /// + static inline bool classof(const JITEmitter*) { return true; } + static inline bool classof(const MachineCodeEmitter*) { return true; } + + JITResolver &getJITResolver() { return Resolver; } + + virtual void startFunction(MachineFunction &F); + virtual bool finishFunction(MachineFunction &F); + + void emitConstantPool(MachineConstantPool *MCP); + void initJumpTableInfo(MachineJumpTableInfo *MJTI); + void emitJumpTableInfo(MachineJumpTableInfo *MJTI); + + virtual void startGVStub(const GlobalValue* GV, unsigned StubSize, + unsigned Alignment = 1); + virtual void startGVStub(const GlobalValue* GV, void *Buffer, + unsigned StubSize); + virtual void* finishGVStub(const GlobalValue *GV); + + /// allocateSpace - Reserves space in the current block if any, or + /// allocate a new one of the given size. + virtual void *allocateSpace(uintptr_t Size, unsigned Alignment); + + virtual void addRelocation(const MachineRelocation &MR) { + Relocations.push_back(MR); + } + + virtual void StartMachineBasicBlock(MachineBasicBlock *MBB) { + if (MBBLocations.size() <= (unsigned)MBB->getNumber()) + MBBLocations.resize((MBB->getNumber()+1)*2); + MBBLocations[MBB->getNumber()] = getCurrentPCValue(); + DOUT << "JIT: Emitting BB" << MBB->getNumber() << " at [" + << (void*) getCurrentPCValue() << "]\n"; + } + + virtual uintptr_t getConstantPoolEntryAddress(unsigned Entry) const; + virtual uintptr_t getJumpTableEntryAddress(unsigned Entry) const; + + virtual uintptr_t getMachineBasicBlockAddress(MachineBasicBlock *MBB) const { + assert(MBBLocations.size() > (unsigned)MBB->getNumber() && + MBBLocations[MBB->getNumber()] && "MBB not emitted!"); + return MBBLocations[MBB->getNumber()]; + } + + /// deallocateMemForFunction - Deallocate all memory for the specified + /// function body. + void deallocateMemForFunction(Function *F); + + /// AddStubToCurrentFunction - Mark the current function being JIT'd as + /// using the stub at the specified address. Allows + /// deallocateMemForFunction to also remove stubs no longer referenced. + void AddStubToCurrentFunction(void *Stub); + + /// getExternalFnStubs - Accessor for the JIT to find stubs emitted for + /// MachineRelocations that reference external functions by name. + const StringMap &getExternalFnStubs() const { return ExtFnStubs; } + + virtual void emitLabel(uint64_t LabelID) { + if (LabelLocations.size() <= LabelID) + LabelLocations.resize((LabelID+1)*2); + LabelLocations[LabelID] = getCurrentPCValue(); + } + + virtual uintptr_t getLabelAddress(uint64_t LabelID) const { + assert(LabelLocations.size() > (unsigned)LabelID && + LabelLocations[LabelID] && "Label not emitted!"); + return LabelLocations[LabelID]; + } + + virtual void setModuleInfo(MachineModuleInfo* Info) { + MMI = Info; + if (ExceptionHandling) DE->setModuleInfo(Info); + } + + void setMemoryExecutable(void) { + MemMgr->setMemoryExecutable(); + } + + JITMemoryManager *getMemMgr(void) const { return MemMgr; } + + void setMachineCodeInfo(MachineCodeInfo *mci) { + MCI = mci; + } + + private: + void *getPointerToGlobal(GlobalValue *GV, void *Reference, bool NoNeedStub); + void *getPointerToGVIndirectSym(GlobalValue *V, void *Reference, + bool NoNeedStub); + unsigned addSizeOfGlobal(const GlobalVariable *GV, unsigned Size); + unsigned addSizeOfGlobalsInConstantVal(const Constant *C, unsigned Size); + unsigned addSizeOfGlobalsInInitializer(const Constant *Init, unsigned Size); + unsigned GetSizeOfGlobalsInBytes(MachineFunction &MF); + }; +} + +void *JITEmitter::getPointerToGlobal(GlobalValue *V, void *Reference, + bool DoesntNeedStub) { + if (GlobalVariable *GV = dyn_cast(V)) + return TheJIT->getOrEmitGlobalVariable(GV); + + if (GlobalAlias *GA = dyn_cast(V)) + return TheJIT->getPointerToGlobal(GA->resolveAliasedGlobal(false)); + + // If we have already compiled the function, return a pointer to its body. + Function *F = cast(V); + void *ResultPtr; + if (!DoesntNeedStub && !TheJIT->isLazyCompilationDisabled()) { + // Return the function stub if it's already created. + ResultPtr = Resolver.getFunctionStubIfAvailable(F); + if (ResultPtr) + AddStubToCurrentFunction(ResultPtr); + } else { + ResultPtr = TheJIT->getPointerToGlobalIfAvailable(F); + } + if (ResultPtr) return ResultPtr; + + // If this is an external function pointer, we can force the JIT to + // 'compile' it, which really just adds it to the map. In dlsym mode, + // external functions are forced through a stub, regardless of reloc type. + if (F->isDeclaration() && !F->hasNotBeenReadFromBitcode() && + DoesntNeedStub && !TheJIT->areDlsymStubsEnabled()) + return TheJIT->getPointerToFunction(F); + + // Okay, the function has not been compiled yet, if the target callback + // mechanism is capable of rewriting the instruction directly, prefer to do + // that instead of emitting a stub. This uses the lazy resolver, so is not + // legal if lazy compilation is disabled. + if (DoesntNeedStub && !TheJIT->isLazyCompilationDisabled()) + return Resolver.AddCallbackAtLocation(F, Reference); + + // Otherwise, we have to emit a stub. + void *StubAddr = Resolver.getFunctionStub(F); + + // Add the stub to the current function's list of referenced stubs, so we can + // deallocate them if the current function is ever freed. It's possible to + // return null from getFunctionStub in the case of a weak extern that fails + // to resolve. + if (StubAddr) + AddStubToCurrentFunction(StubAddr); + + return StubAddr; +} + +void *JITEmitter::getPointerToGVIndirectSym(GlobalValue *V, void *Reference, + bool NoNeedStub) { + // Make sure GV is emitted first, and create a stub containing the fully + // resolved address. + void *GVAddress = getPointerToGlobal(V, Reference, true); + void *StubAddr = Resolver.getGlobalValueIndirectSym(V, GVAddress); + + // Add the stub to the current function's list of referenced stubs, so we can + // deallocate them if the current function is ever freed. + AddStubToCurrentFunction(StubAddr); + + return StubAddr; +} + +void JITEmitter::AddStubToCurrentFunction(void *StubAddr) { + if (!TheJIT->areDlsymStubsEnabled()) + return; + + assert(CurFn && "Stub added to current function, but current function is 0!"); + + SmallVectorImpl &StubsUsed = CurFnStubUses[CurFn]; + StubsUsed.push_back(StubAddr); + + SmallPtrSet &FnRefs = StubFnRefs[StubAddr]; + FnRefs.insert(CurFn); +} + +static unsigned GetConstantPoolSizeInBytes(MachineConstantPool *MCP, + const TargetData *TD) { + const std::vector &Constants = MCP->getConstants(); + if (Constants.empty()) return 0; + + unsigned Size = 0; + for (unsigned i = 0, e = Constants.size(); i != e; ++i) { + MachineConstantPoolEntry CPE = Constants[i]; + unsigned AlignMask = CPE.getAlignment() - 1; + Size = (Size + AlignMask) & ~AlignMask; + const Type *Ty = CPE.getType(); + Size += TD->getTypeAllocSize(Ty); + } + return Size; +} + +static unsigned GetJumpTableSizeInBytes(MachineJumpTableInfo *MJTI) { + const std::vector &JT = MJTI->getJumpTables(); + if (JT.empty()) return 0; + + unsigned NumEntries = 0; + for (unsigned i = 0, e = JT.size(); i != e; ++i) + NumEntries += JT[i].MBBs.size(); + + unsigned EntrySize = MJTI->getEntrySize(); + + return NumEntries * EntrySize; +} + +static uintptr_t RoundUpToAlign(uintptr_t Size, unsigned Alignment) { + if (Alignment == 0) Alignment = 1; + // Since we do not know where the buffer will be allocated, be pessimistic. + return Size + Alignment; +} + +/// addSizeOfGlobal - add the size of the global (plus any alignment padding) +/// into the running total Size. + +unsigned JITEmitter::addSizeOfGlobal(const GlobalVariable *GV, unsigned Size) { + const Type *ElTy = GV->getType()->getElementType(); + size_t GVSize = (size_t)TheJIT->getTargetData()->getTypeAllocSize(ElTy); + size_t GVAlign = + (size_t)TheJIT->getTargetData()->getPreferredAlignment(GV); + DOUT << "JIT: Adding in size " << GVSize << " alignment " << GVAlign; + DEBUG(GV->dump()); + // Assume code section ends with worst possible alignment, so first + // variable needs maximal padding. + if (Size==0) + Size = 1; + Size = ((Size+GVAlign-1)/GVAlign)*GVAlign; + Size += GVSize; + return Size; +} + +/// addSizeOfGlobalsInConstantVal - find any globals that we haven't seen yet +/// but are referenced from the constant; put them in GVSet and add their +/// size into the running total Size. + +unsigned JITEmitter::addSizeOfGlobalsInConstantVal(const Constant *C, + unsigned Size) { + // If its undefined, return the garbage. + if (isa(C)) + return Size; + + // If the value is a ConstantExpr + if (const ConstantExpr *CE = dyn_cast(C)) { + Constant *Op0 = CE->getOperand(0); + switch (CE->getOpcode()) { + case Instruction::GetElementPtr: + case Instruction::Trunc: + case Instruction::ZExt: + case Instruction::SExt: + case Instruction::FPTrunc: + case Instruction::FPExt: + case Instruction::UIToFP: + case Instruction::SIToFP: + case Instruction::FPToUI: + case Instruction::FPToSI: + case Instruction::PtrToInt: + case Instruction::IntToPtr: + case Instruction::BitCast: { + Size = addSizeOfGlobalsInConstantVal(Op0, Size); + break; + } + case Instruction::Add: + case Instruction::Sub: + case Instruction::Mul: + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::URem: + case Instruction::SRem: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: { + Size = addSizeOfGlobalsInConstantVal(Op0, Size); + Size = addSizeOfGlobalsInConstantVal(CE->getOperand(1), Size); + break; + } + default: { + cerr << "ConstantExpr not handled: " << *CE << "\n"; + abort(); + } + } + } + + if (C->getType()->getTypeID() == Type::PointerTyID) + if (const GlobalVariable* GV = dyn_cast(C)) + if (GVSet.insert(GV)) + Size = addSizeOfGlobal(GV, Size); + + return Size; +} + +/// addSizeOfGLobalsInInitializer - handle any globals that we haven't seen yet +/// but are referenced from the given initializer. + +unsigned JITEmitter::addSizeOfGlobalsInInitializer(const Constant *Init, + unsigned Size) { + if (!isa(Init) && + !isa(Init) && + !isa(Init) && + !isa(Init) && + !isa(Init) && + Init->getType()->isFirstClassType()) + Size = addSizeOfGlobalsInConstantVal(Init, Size); + return Size; +} + +/// GetSizeOfGlobalsInBytes - walk the code for the function, looking for +/// globals; then walk the initializers of those globals looking for more. +/// If their size has not been considered yet, add it into the running total +/// Size. + +unsigned JITEmitter::GetSizeOfGlobalsInBytes(MachineFunction &MF) { + unsigned Size = 0; + GVSet.clear(); + + for (MachineFunction::iterator MBB = MF.begin(), E = MF.end(); + MBB != E; ++MBB) { + for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end(); + I != E; ++I) { + const TargetInstrDesc &Desc = I->getDesc(); + const MachineInstr &MI = *I; + unsigned NumOps = Desc.getNumOperands(); + for (unsigned CurOp = 0; CurOp < NumOps; CurOp++) { + const MachineOperand &MO = MI.getOperand(CurOp); + if (MO.isGlobal()) { + GlobalValue* V = MO.getGlobal(); + const GlobalVariable *GV = dyn_cast(V); + if (!GV) + continue; + // If seen in previous function, it will have an entry here. + if (TheJIT->getPointerToGlobalIfAvailable(GV)) + continue; + // If seen earlier in this function, it will have an entry here. + // FIXME: it should be possible to combine these tables, by + // assuming the addresses of the new globals in this module + // start at 0 (or something) and adjusting them after codegen + // complete. Another possibility is to grab a marker bit in GV. + if (GVSet.insert(GV)) + // A variable as yet unseen. Add in its size. + Size = addSizeOfGlobal(GV, Size); + } + } + } + } + DOUT << "JIT: About to look through initializers\n"; + // Look for more globals that are referenced only from initializers. + // GVSet.end is computed each time because the set can grow as we go. + for (SmallPtrSet::iterator I = GVSet.begin(); + I != GVSet.end(); I++) { + const GlobalVariable* GV = *I; + if (GV->hasInitializer()) + Size = addSizeOfGlobalsInInitializer(GV->getInitializer(), Size); + } + + return Size; +} + +void JITEmitter::startFunction(MachineFunction &F) { + DOUT << "JIT: Starting CodeGen of Function " + << F.getFunction()->getName() << "\n"; + + uintptr_t ActualSize = 0; + // Set the memory writable, if it's not already + MemMgr->setMemoryWritable(); + if (MemMgr->NeedsExactSize()) { + DOUT << "JIT: ExactSize\n"; + const TargetInstrInfo* TII = F.getTarget().getInstrInfo(); + MachineJumpTableInfo *MJTI = F.getJumpTableInfo(); + MachineConstantPool *MCP = F.getConstantPool(); + + // Ensure the constant pool/jump table info is at least 4-byte aligned. + ActualSize = RoundUpToAlign(ActualSize, 16); + + // Add the alignment of the constant pool + ActualSize = RoundUpToAlign(ActualSize, MCP->getConstantPoolAlignment()); + + // Add the constant pool size + ActualSize += GetConstantPoolSizeInBytes(MCP, TheJIT->getTargetData()); + + // Add the aligment of the jump table info + ActualSize = RoundUpToAlign(ActualSize, MJTI->getAlignment()); + + // Add the jump table size + ActualSize += GetJumpTableSizeInBytes(MJTI); + + // Add the alignment for the function + ActualSize = RoundUpToAlign(ActualSize, + std::max(F.getFunction()->getAlignment(), 8U)); + + // Add the function size + ActualSize += TII->GetFunctionSizeInBytes(F); + + DOUT << "JIT: ActualSize before globals " << ActualSize << "\n"; + // Add the size of the globals that will be allocated after this function. + // These are all the ones referenced from this function that were not + // previously allocated. + ActualSize += GetSizeOfGlobalsInBytes(F); + DOUT << "JIT: ActualSize after globals " << ActualSize << "\n"; + } + + BufferBegin = CurBufferPtr = MemMgr->startFunctionBody(F.getFunction(), + ActualSize); + BufferEnd = BufferBegin+ActualSize; + + // Ensure the constant pool/jump table info is at least 4-byte aligned. + emitAlignment(16); + + emitConstantPool(F.getConstantPool()); + initJumpTableInfo(F.getJumpTableInfo()); + + // About to start emitting the machine code for the function. + emitAlignment(std::max(F.getFunction()->getAlignment(), 8U)); + TheJIT->updateGlobalMapping(F.getFunction(), CurBufferPtr); + + MBBLocations.clear(); +} + +bool JITEmitter::finishFunction(MachineFunction &F) { + if (CurBufferPtr == BufferEnd) { + // FIXME: Allocate more space, then try again. + cerr << "JIT: Ran out of space for generated machine code!\n"; + abort(); + } + + emitJumpTableInfo(F.getJumpTableInfo()); + + // FnStart is the start of the text, not the start of the constant pool and + // other per-function data. + uint8_t *FnStart = + (uint8_t *)TheJIT->getPointerToGlobalIfAvailable(F.getFunction()); + + // FnEnd is the end of the function's machine code. + uint8_t *FnEnd = CurBufferPtr; + + if (!Relocations.empty()) { + CurFn = F.getFunction(); + NumRelos += Relocations.size(); + + // Resolve the relocations to concrete pointers. + for (unsigned i = 0, e = Relocations.size(); i != e; ++i) { + MachineRelocation &MR = Relocations[i]; + void *ResultPtr = 0; + if (!MR.letTargetResolve()) { + if (MR.isExternalSymbol()) { + ResultPtr = TheJIT->getPointerToNamedFunction(MR.getExternalSymbol(), + false); + DOUT << "JIT: Map \'" << MR.getExternalSymbol() << "\' to [" + << ResultPtr << "]\n"; + + // If the target REALLY wants a stub for this function, emit it now. + if (!MR.doesntNeedStub()) { + if (!TheJIT->areDlsymStubsEnabled()) { + ResultPtr = Resolver.getExternalFunctionStub(ResultPtr); + } else { + void *&Stub = ExtFnStubs[MR.getExternalSymbol()]; + if (!Stub) { + Stub = Resolver.getExternalFunctionStub((void *)&Stub); + AddStubToCurrentFunction(Stub); + } + ResultPtr = Stub; + } + } + } else if (MR.isGlobalValue()) { + ResultPtr = getPointerToGlobal(MR.getGlobalValue(), + BufferBegin+MR.getMachineCodeOffset(), + MR.doesntNeedStub()); + } else if (MR.isIndirectSymbol()) { + ResultPtr = getPointerToGVIndirectSym(MR.getGlobalValue(), + BufferBegin+MR.getMachineCodeOffset(), + MR.doesntNeedStub()); + } else if (MR.isBasicBlock()) { + ResultPtr = (void*)getMachineBasicBlockAddress(MR.getBasicBlock()); + } else if (MR.isConstantPoolIndex()) { + ResultPtr = (void*)getConstantPoolEntryAddress(MR.getConstantPoolIndex()); + } else { + assert(MR.isJumpTableIndex()); + ResultPtr=(void*)getJumpTableEntryAddress(MR.getJumpTableIndex()); + } + + MR.setResultPointer(ResultPtr); + } + + // if we are managing the GOT and the relocation wants an index, + // give it one + if (MR.isGOTRelative() && MemMgr->isManagingGOT()) { + unsigned idx = Resolver.getGOTIndexForAddr(ResultPtr); + MR.setGOTIndex(idx); + if (((void**)MemMgr->getGOTBase())[idx] != ResultPtr) { + DOUT << "JIT: GOT was out of date for " << ResultPtr + << " pointing at " << ((void**)MemMgr->getGOTBase())[idx] + << "\n"; + ((void**)MemMgr->getGOTBase())[idx] = ResultPtr; + } + } + } + + CurFn = 0; + TheJIT->getJITInfo().relocate(BufferBegin, &Relocations[0], + Relocations.size(), MemMgr->getGOTBase()); + } + + // Update the GOT entry for F to point to the new code. + if (MemMgr->isManagingGOT()) { + unsigned idx = Resolver.getGOTIndexForAddr((void*)BufferBegin); + if (((void**)MemMgr->getGOTBase())[idx] != (void*)BufferBegin) { + DOUT << "JIT: GOT was out of date for " << (void*)BufferBegin + << " pointing at " << ((void**)MemMgr->getGOTBase())[idx] << "\n"; + ((void**)MemMgr->getGOTBase())[idx] = (void*)BufferBegin; + } + } + + // CurBufferPtr may have moved beyond FnEnd, due to memory allocation for + // global variables that were referenced in the relocations. + MemMgr->endFunctionBody(F.getFunction(), BufferBegin, CurBufferPtr); + + if (CurBufferPtr == BufferEnd) { + // FIXME: Allocate more space, then try again. + cerr << "JIT: Ran out of space for generated machine code!\n"; + abort(); + } + + BufferBegin = CurBufferPtr = 0; + NumBytes += FnEnd-FnStart; + + // Invalidate the icache if necessary. + sys::Memory::InvalidateInstructionCache(FnStart, FnEnd-FnStart); + + // Add it to the JIT symbol table if the host wants it. + AddFunctionToSymbolTable(F.getFunction()->getNameStart(), + FnStart, FnEnd-FnStart); + + DOUT << "JIT: Finished CodeGen of [" << (void*)FnStart + << "] Function: " << F.getFunction()->getName() + << ": " << (FnEnd-FnStart) << " bytes of text, " + << Relocations.size() << " relocations\n"; + + if (MCI) { + MCI->setAddress(FnStart); + MCI->setSize(FnEnd-FnStart); + } + + Relocations.clear(); + ConstPoolAddresses.clear(); + + // Mark code region readable and executable if it's not so already. + MemMgr->setMemoryExecutable(); + +#ifndef NDEBUG + { + if (sys::hasDisassembler()) { + DOUT << "JIT: Disassembled code:\n"; + DOUT << sys::disassembleBuffer(FnStart, FnEnd-FnStart, (uintptr_t)FnStart); + } else { + DOUT << "JIT: Binary code:\n"; + DOUT << std::hex; + uint8_t* q = FnStart; + for (int i = 0; q < FnEnd; q += 4, ++i) { + if (i == 4) + i = 0; + if (i == 0) + DOUT << "JIT: " << std::setw(8) << std::setfill('0') + << (long)(q - FnStart) << ": "; + bool Done = false; + for (int j = 3; j >= 0; --j) { + if (q + j >= FnEnd) + Done = true; + else + DOUT << std::setw(2) << std::setfill('0') << (unsigned short)q[j]; + } + if (Done) + break; + DOUT << ' '; + if (i == 3) + DOUT << '\n'; + } + DOUT << std::dec; + DOUT<< '\n'; + } + } +#endif + if (ExceptionHandling) { + uintptr_t ActualSize = 0; + SavedBufferBegin = BufferBegin; + SavedBufferEnd = BufferEnd; + SavedCurBufferPtr = CurBufferPtr; + + if (MemMgr->NeedsExactSize()) { + ActualSize = DE->GetDwarfTableSizeInBytes(F, *this, FnStart, FnEnd); + } + + BufferBegin = CurBufferPtr = MemMgr->startExceptionTable(F.getFunction(), + ActualSize); + BufferEnd = BufferBegin+ActualSize; + uint8_t* FrameRegister = DE->EmitDwarfTable(F, *this, FnStart, FnEnd); + MemMgr->endExceptionTable(F.getFunction(), BufferBegin, CurBufferPtr, + FrameRegister); + BufferBegin = SavedBufferBegin; + BufferEnd = SavedBufferEnd; + CurBufferPtr = SavedCurBufferPtr; + + TheJIT->RegisterTable(FrameRegister); + } + + if (MMI) + MMI->EndFunction(); + + return false; +} + +/// deallocateMemForFunction - Deallocate all memory for the specified +/// function body. Also drop any references the function has to stubs. +void JITEmitter::deallocateMemForFunction(Function *F) { + MemMgr->deallocateMemForFunction(F); + + // If the function did not reference any stubs, return. + if (CurFnStubUses.find(F) == CurFnStubUses.end()) + return; + + // For each referenced stub, erase the reference to this function, and then + // erase the list of referenced stubs. + SmallVectorImpl &StubList = CurFnStubUses[F]; + for (unsigned i = 0, e = StubList.size(); i != e; ++i) { + void *Stub = StubList[i]; + + // If we already invalidated this stub for this function, continue. + if (StubFnRefs.count(Stub) == 0) + continue; + + SmallPtrSet &FnRefs = StubFnRefs[Stub]; + FnRefs.erase(F); + + // If this function was the last reference to the stub, invalidate the stub + // in the JITResolver. Were there a memory manager deallocateStub routine, + // we could call that at this point too. + if (FnRefs.empty()) { + DOUT << "\nJIT: Invalidated Stub at [" << Stub << "]\n"; + StubFnRefs.erase(Stub); + + // Invalidate the stub. If it is a GV stub, update the JIT's global + // mapping for that GV to zero, otherwise, search the string map of + // external function names to stubs and remove the entry for this stub. + GlobalValue *GV = Resolver.invalidateStub(Stub); + if (GV) { + TheJIT->updateGlobalMapping(GV, 0); + } else { + for (StringMapIterator i = ExtFnStubs.begin(), + e = ExtFnStubs.end(); i != e; ++i) { + if (i->second == Stub) { + ExtFnStubs.erase(i); + break; + } + } + } + } + } + CurFnStubUses.erase(F); +} + + +void* JITEmitter::allocateSpace(uintptr_t Size, unsigned Alignment) { + if (BufferBegin) + return JITCodeEmitter::allocateSpace(Size, Alignment); + + // create a new memory block if there is no active one. + // care must be taken so that BufferBegin is invalidated when a + // block is trimmed + BufferBegin = CurBufferPtr = MemMgr->allocateSpace(Size, Alignment); + BufferEnd = BufferBegin+Size; + return CurBufferPtr; +} + +void JITEmitter::emitConstantPool(MachineConstantPool *MCP) { + if (TheJIT->getJITInfo().hasCustomConstantPool()) + return; + + const std::vector &Constants = MCP->getConstants(); + if (Constants.empty()) return; + + unsigned Size = GetConstantPoolSizeInBytes(MCP, TheJIT->getTargetData()); + unsigned Align = MCP->getConstantPoolAlignment(); + ConstantPoolBase = allocateSpace(Size, Align); + ConstantPool = MCP; + + if (ConstantPoolBase == 0) return; // Buffer overflow. + + DOUT << "JIT: Emitted constant pool at [" << ConstantPoolBase + << "] (size: " << Size << ", alignment: " << Align << ")\n"; + + // Initialize the memory for all of the constant pool entries. + unsigned Offset = 0; + for (unsigned i = 0, e = Constants.size(); i != e; ++i) { + MachineConstantPoolEntry CPE = Constants[i]; + unsigned AlignMask = CPE.getAlignment() - 1; + Offset = (Offset + AlignMask) & ~AlignMask; + + uintptr_t CAddr = (uintptr_t)ConstantPoolBase + Offset; + ConstPoolAddresses.push_back(CAddr); + if (CPE.isMachineConstantPoolEntry()) { + // FIXME: add support to lower machine constant pool values into bytes! + cerr << "Initialize memory with machine specific constant pool entry" + << " has not been implemented!\n"; + abort(); + } + TheJIT->InitializeMemory(CPE.Val.ConstVal, (void*)CAddr); + DOUT << "JIT: CP" << i << " at [0x" + << std::hex << CAddr << std::dec << "]\n"; + + const Type *Ty = CPE.Val.ConstVal->getType(); + Offset += TheJIT->getTargetData()->getTypeAllocSize(Ty); + } +} + +void JITEmitter::initJumpTableInfo(MachineJumpTableInfo *MJTI) { + if (TheJIT->getJITInfo().hasCustomJumpTables()) + return; + + const std::vector &JT = MJTI->getJumpTables(); + if (JT.empty()) return; + + unsigned NumEntries = 0; + for (unsigned i = 0, e = JT.size(); i != e; ++i) + NumEntries += JT[i].MBBs.size(); + + unsigned EntrySize = MJTI->getEntrySize(); + + // Just allocate space for all the jump tables now. We will fix up the actual + // MBB entries in the tables after we emit the code for each block, since then + // we will know the final locations of the MBBs in memory. + JumpTable = MJTI; + JumpTableBase = allocateSpace(NumEntries * EntrySize, MJTI->getAlignment()); +} + +void JITEmitter::emitJumpTableInfo(MachineJumpTableInfo *MJTI) { + if (TheJIT->getJITInfo().hasCustomJumpTables()) + return; + + const std::vector &JT = MJTI->getJumpTables(); + if (JT.empty() || JumpTableBase == 0) return; + + if (TargetMachine::getRelocationModel() == Reloc::PIC_) { + assert(MJTI->getEntrySize() == 4 && "Cross JIT'ing?"); + // For each jump table, place the offset from the beginning of the table + // to the target address. + int *SlotPtr = (int*)JumpTableBase; + + for (unsigned i = 0, e = JT.size(); i != e; ++i) { + const std::vector &MBBs = JT[i].MBBs; + // Store the offset of the basic block for this jump table slot in the + // memory we allocated for the jump table in 'initJumpTableInfo' + uintptr_t Base = (uintptr_t)SlotPtr; + for (unsigned mi = 0, me = MBBs.size(); mi != me; ++mi) { + uintptr_t MBBAddr = getMachineBasicBlockAddress(MBBs[mi]); + *SlotPtr++ = TheJIT->getJITInfo().getPICJumpTableEntry(MBBAddr, Base); + } + } + } else { + assert(MJTI->getEntrySize() == sizeof(void*) && "Cross JIT'ing?"); + + // For each jump table, map each target in the jump table to the address of + // an emitted MachineBasicBlock. + intptr_t *SlotPtr = (intptr_t*)JumpTableBase; + + for (unsigned i = 0, e = JT.size(); i != e; ++i) { + const std::vector &MBBs = JT[i].MBBs; + // Store the address of the basic block for this jump table slot in the + // memory we allocated for the jump table in 'initJumpTableInfo' + for (unsigned mi = 0, me = MBBs.size(); mi != me; ++mi) + *SlotPtr++ = getMachineBasicBlockAddress(MBBs[mi]); + } + } +} + +void JITEmitter::startGVStub(const GlobalValue* GV, unsigned StubSize, + unsigned Alignment) { + SavedBufferBegin = BufferBegin; + SavedBufferEnd = BufferEnd; + SavedCurBufferPtr = CurBufferPtr; + + BufferBegin = CurBufferPtr = MemMgr->allocateStub(GV, StubSize, Alignment); + BufferEnd = BufferBegin+StubSize+1; +} + +void JITEmitter::startGVStub(const GlobalValue* GV, void *Buffer, + unsigned StubSize) { + SavedBufferBegin = BufferBegin; + SavedBufferEnd = BufferEnd; + SavedCurBufferPtr = CurBufferPtr; + + BufferBegin = CurBufferPtr = (uint8_t *)Buffer; + BufferEnd = BufferBegin+StubSize+1; +} + +void *JITEmitter::finishGVStub(const GlobalValue* GV) { + NumBytes += getCurrentPCOffset(); + std::swap(SavedBufferBegin, BufferBegin); + BufferEnd = SavedBufferEnd; + CurBufferPtr = SavedCurBufferPtr; + return SavedBufferBegin; +} + +// getConstantPoolEntryAddress - Return the address of the 'ConstantNum' entry +// in the constant pool that was last emitted with the 'emitConstantPool' +// method. +// +uintptr_t JITEmitter::getConstantPoolEntryAddress(unsigned ConstantNum) const { + assert(ConstantNum < ConstantPool->getConstants().size() && + "Invalid ConstantPoolIndex!"); + return ConstPoolAddresses[ConstantNum]; +} + +// getJumpTableEntryAddress - Return the address of the JumpTable with index +// 'Index' in the jumpp table that was last initialized with 'initJumpTableInfo' +// +uintptr_t JITEmitter::getJumpTableEntryAddress(unsigned Index) const { + const std::vector &JT = JumpTable->getJumpTables(); + assert(Index < JT.size() && "Invalid jump table index!"); + + unsigned Offset = 0; + unsigned EntrySize = JumpTable->getEntrySize(); + + for (unsigned i = 0; i < Index; ++i) + Offset += JT[i].MBBs.size(); + + Offset *= EntrySize; + + return (uintptr_t)((char *)JumpTableBase + Offset); +} + +//===----------------------------------------------------------------------===// +// Public interface to this file +//===----------------------------------------------------------------------===// + +JITCodeEmitter *JIT::createEmitter(JIT &jit, JITMemoryManager *JMM) { + return new JITEmitter(jit, JMM); +} + +// getPointerToNamedFunction - This function is used as a global wrapper to +// JIT::getPointerToNamedFunction for the purpose of resolving symbols when +// bugpoint is debugging the JIT. In that scenario, we are loading an .so and +// need to resolve function(s) that are being mis-codegenerated, so we need to +// resolve their addresses at runtime, and this is the way to do it. +extern "C" { + void *getPointerToNamedFunction(const char *Name) { + if (Function *F = TheJIT->FindFunctionNamed(Name)) + return TheJIT->getPointerToFunction(F); + return TheJIT->getPointerToNamedFunction(Name); + } +} + +// getPointerToFunctionOrStub - If the specified function has been +// code-gen'd, return a pointer to the function. If not, compile it, or use +// a stub to implement lazy compilation if available. +// +void *JIT::getPointerToFunctionOrStub(Function *F) { + // If we have already code generated the function, just return the address. + if (void *Addr = getPointerToGlobalIfAvailable(F)) + return Addr; + + // Get a stub if the target supports it. + assert(isa(JCE) && "Unexpected MCE?"); + JITEmitter *JE = cast(getCodeEmitter()); + return JE->getJITResolver().getFunctionStub(F); +} + +void JIT::registerMachineCodeInfo(MachineCodeInfo *mc) { + assert(isa(JCE) && "Unexpected MCE?"); + JITEmitter *JE = cast(getCodeEmitter()); + + JE->setMachineCodeInfo(mc); +} + +void JIT::updateFunctionStub(Function *F) { + // Get the empty stub we generated earlier. + assert(isa(JCE) && "Unexpected MCE?"); + JITEmitter *JE = cast(getCodeEmitter()); + void *Stub = JE->getJITResolver().getFunctionStub(F); + + // Tell the target jit info to rewrite the stub at the specified address, + // rather than creating a new one. + void *Addr = getPointerToGlobalIfAvailable(F); + getJITInfo().emitFunctionStubAtAddr(F, Addr, Stub, *getCodeEmitter()); +} + +/// updateDlsymStubTable - Emit the data necessary to relocate the stubs +/// that were emitted during code generation. +/// +void JIT::updateDlsymStubTable() { + assert(isa(JCE) && "Unexpected MCE?"); + JITEmitter *JE = cast(getCodeEmitter()); + + SmallVector GVs; + SmallVector Ptrs; + const StringMap &ExtFns = JE->getExternalFnStubs(); + + JE->getJITResolver().getRelocatableGVs(GVs, Ptrs); + + unsigned nStubs = GVs.size() + ExtFns.size(); + + // If there are no relocatable stubs, return. + if (nStubs == 0) + return; + + // If there are no new relocatable stubs, return. + void *CurTable = JE->getMemMgr()->getDlsymTable(); + if (CurTable && (*(unsigned *)CurTable == nStubs)) + return; + + // Calculate the size of the stub info + unsigned offset = 4 + 4 * nStubs + sizeof(intptr_t) * nStubs; + + SmallVector Offsets; + for (unsigned i = 0; i != GVs.size(); ++i) { + Offsets.push_back(offset); + offset += GVs[i]->getName().length() + 1; + } + for (StringMapConstIterator i = ExtFns.begin(), e = ExtFns.end(); + i != e; ++i) { + Offsets.push_back(offset); + offset += strlen(i->first()) + 1; + } + + // Allocate space for the new "stub", which contains the dlsym table. + JE->startGVStub(0, offset, 4); + + // Emit the number of records + JE->emitInt32(nStubs); + + // Emit the string offsets + for (unsigned i = 0; i != nStubs; ++i) + JE->emitInt32(Offsets[i]); + + // Emit the pointers. Verify that they are at least 2-byte aligned, and set + // the low bit to 0 == GV, 1 == Function, so that the client code doing the + // relocation can write the relocated pointer at the appropriate place in + // the stub. + for (unsigned i = 0; i != GVs.size(); ++i) { + intptr_t Ptr = (intptr_t)Ptrs[i]; + assert((Ptr & 1) == 0 && "Stub pointers must be at least 2-byte aligned!"); + + if (isa(GVs[i])) + Ptr |= (intptr_t)1; + + if (sizeof(Ptr) == 8) + JE->emitInt64(Ptr); + else + JE->emitInt32(Ptr); + } + for (StringMapConstIterator i = ExtFns.begin(), e = ExtFns.end(); + i != e; ++i) { + intptr_t Ptr = (intptr_t)i->second | 1; + + if (sizeof(Ptr) == 8) + JE->emitInt64(Ptr); + else + JE->emitInt32(Ptr); + } + + // Emit the strings. + for (unsigned i = 0; i != GVs.size(); ++i) + JE->emitString(GVs[i]->getName()); + for (StringMapConstIterator i = ExtFns.begin(), e = ExtFns.end(); + i != e; ++i) + JE->emitString(i->first()); + + // Tell the JIT memory manager where it is. The JIT Memory Manager will + // deallocate space for the old one, if one existed. + JE->getMemMgr()->SetDlsymTable(JE->finishGVStub(0)); +} + +/// freeMachineCodeForFunction - release machine code memory for given Function. +/// +void JIT::freeMachineCodeForFunction(Function *F) { + + // Delete translation for this from the ExecutionEngine, so it will get + // retranslated next time it is used. + void *OldPtr = updateGlobalMapping(F, 0); + + if (OldPtr) + RemoveFunctionFromSymbolTable(OldPtr); + + // Free the actual memory for the function body and related stuff. + assert(isa(JCE) && "Unexpected MCE?"); + cast(JCE)->deallocateMemForFunction(F); +} + diff --git a/lib/ExecutionEngine/JIT/JITMemoryManager.cpp b/lib/ExecutionEngine/JIT/JITMemoryManager.cpp new file mode 100644 index 000000000000..70ccdccb8049 --- /dev/null +++ b/lib/ExecutionEngine/JIT/JITMemoryManager.cpp @@ -0,0 +1,541 @@ +//===-- JITMemoryManager.cpp - Memory Allocator for JIT'd code ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the DefaultJITMemoryManager class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/GlobalValue.h" +#include "llvm/ExecutionEngine/JITMemoryManager.h" +#include "llvm/Support/Compiler.h" +#include "llvm/System/Memory.h" +#include +#include +#include +#include +#include +#include +#include +using namespace llvm; + + +JITMemoryManager::~JITMemoryManager() {} + +//===----------------------------------------------------------------------===// +// Memory Block Implementation. +//===----------------------------------------------------------------------===// + +namespace { + /// MemoryRangeHeader - For a range of memory, this is the header that we put + /// on the block of memory. It is carefully crafted to be one word of memory. + /// Allocated blocks have just this header, free'd blocks have FreeRangeHeader + /// which starts with this. + struct FreeRangeHeader; + struct MemoryRangeHeader { + /// ThisAllocated - This is true if this block is currently allocated. If + /// not, this can be converted to a FreeRangeHeader. + unsigned ThisAllocated : 1; + + /// PrevAllocated - Keep track of whether the block immediately before us is + /// allocated. If not, the word immediately before this header is the size + /// of the previous block. + unsigned PrevAllocated : 1; + + /// BlockSize - This is the size in bytes of this memory block, + /// including this header. + uintptr_t BlockSize : (sizeof(intptr_t)*CHAR_BIT - 2); + + + /// getBlockAfter - Return the memory block immediately after this one. + /// + MemoryRangeHeader &getBlockAfter() const { + return *(MemoryRangeHeader*)((char*)this+BlockSize); + } + + /// getFreeBlockBefore - If the block before this one is free, return it, + /// otherwise return null. + FreeRangeHeader *getFreeBlockBefore() const { + if (PrevAllocated) return 0; + intptr_t PrevSize = ((intptr_t *)this)[-1]; + return (FreeRangeHeader*)((char*)this-PrevSize); + } + + /// FreeBlock - Turn an allocated block into a free block, adjusting + /// bits in the object headers, and adding an end of region memory block. + FreeRangeHeader *FreeBlock(FreeRangeHeader *FreeList); + + /// TrimAllocationToSize - If this allocated block is significantly larger + /// than NewSize, split it into two pieces (where the former is NewSize + /// bytes, including the header), and add the new block to the free list. + FreeRangeHeader *TrimAllocationToSize(FreeRangeHeader *FreeList, + uint64_t NewSize); + }; + + /// FreeRangeHeader - For a memory block that isn't already allocated, this + /// keeps track of the current block and has a pointer to the next free block. + /// Free blocks are kept on a circularly linked list. + struct FreeRangeHeader : public MemoryRangeHeader { + FreeRangeHeader *Prev; + FreeRangeHeader *Next; + + /// getMinBlockSize - Get the minimum size for a memory block. Blocks + /// smaller than this size cannot be created. + static unsigned getMinBlockSize() { + return sizeof(FreeRangeHeader)+sizeof(intptr_t); + } + + /// SetEndOfBlockSizeMarker - The word at the end of every free block is + /// known to be the size of the free block. Set it for this block. + void SetEndOfBlockSizeMarker() { + void *EndOfBlock = (char*)this + BlockSize; + ((intptr_t *)EndOfBlock)[-1] = BlockSize; + } + + FreeRangeHeader *RemoveFromFreeList() { + assert(Next->Prev == this && Prev->Next == this && "Freelist broken!"); + Next->Prev = Prev; + return Prev->Next = Next; + } + + void AddToFreeList(FreeRangeHeader *FreeList) { + Next = FreeList; + Prev = FreeList->Prev; + Prev->Next = this; + Next->Prev = this; + } + + /// GrowBlock - The block after this block just got deallocated. Merge it + /// into the current block. + void GrowBlock(uintptr_t NewSize); + + /// AllocateBlock - Mark this entire block allocated, updating freelists + /// etc. This returns a pointer to the circular free-list. + FreeRangeHeader *AllocateBlock(); + }; +} + + +/// AllocateBlock - Mark this entire block allocated, updating freelists +/// etc. This returns a pointer to the circular free-list. +FreeRangeHeader *FreeRangeHeader::AllocateBlock() { + assert(!ThisAllocated && !getBlockAfter().PrevAllocated && + "Cannot allocate an allocated block!"); + // Mark this block allocated. + ThisAllocated = 1; + getBlockAfter().PrevAllocated = 1; + + // Remove it from the free list. + return RemoveFromFreeList(); +} + +/// FreeBlock - Turn an allocated block into a free block, adjusting +/// bits in the object headers, and adding an end of region memory block. +/// If possible, coalesce this block with neighboring blocks. Return the +/// FreeRangeHeader to allocate from. +FreeRangeHeader *MemoryRangeHeader::FreeBlock(FreeRangeHeader *FreeList) { + MemoryRangeHeader *FollowingBlock = &getBlockAfter(); + assert(ThisAllocated && "This block is already allocated!"); + assert(FollowingBlock->PrevAllocated && "Flags out of sync!"); + + FreeRangeHeader *FreeListToReturn = FreeList; + + // If the block after this one is free, merge it into this block. + if (!FollowingBlock->ThisAllocated) { + FreeRangeHeader &FollowingFreeBlock = *(FreeRangeHeader *)FollowingBlock; + // "FreeList" always needs to be a valid free block. If we're about to + // coalesce with it, update our notion of what the free list is. + if (&FollowingFreeBlock == FreeList) { + FreeList = FollowingFreeBlock.Next; + FreeListToReturn = 0; + assert(&FollowingFreeBlock != FreeList && "No tombstone block?"); + } + FollowingFreeBlock.RemoveFromFreeList(); + + // Include the following block into this one. + BlockSize += FollowingFreeBlock.BlockSize; + FollowingBlock = &FollowingFreeBlock.getBlockAfter(); + + // Tell the block after the block we are coalescing that this block is + // allocated. + FollowingBlock->PrevAllocated = 1; + } + + assert(FollowingBlock->ThisAllocated && "Missed coalescing?"); + + if (FreeRangeHeader *PrevFreeBlock = getFreeBlockBefore()) { + PrevFreeBlock->GrowBlock(PrevFreeBlock->BlockSize + BlockSize); + return FreeListToReturn ? FreeListToReturn : PrevFreeBlock; + } + + // Otherwise, mark this block free. + FreeRangeHeader &FreeBlock = *(FreeRangeHeader*)this; + FollowingBlock->PrevAllocated = 0; + FreeBlock.ThisAllocated = 0; + + // Link this into the linked list of free blocks. + FreeBlock.AddToFreeList(FreeList); + + // Add a marker at the end of the block, indicating the size of this free + // block. + FreeBlock.SetEndOfBlockSizeMarker(); + return FreeListToReturn ? FreeListToReturn : &FreeBlock; +} + +/// GrowBlock - The block after this block just got deallocated. Merge it +/// into the current block. +void FreeRangeHeader::GrowBlock(uintptr_t NewSize) { + assert(NewSize > BlockSize && "Not growing block?"); + BlockSize = NewSize; + SetEndOfBlockSizeMarker(); + getBlockAfter().PrevAllocated = 0; +} + +/// TrimAllocationToSize - If this allocated block is significantly larger +/// than NewSize, split it into two pieces (where the former is NewSize +/// bytes, including the header), and add the new block to the free list. +FreeRangeHeader *MemoryRangeHeader:: +TrimAllocationToSize(FreeRangeHeader *FreeList, uint64_t NewSize) { + assert(ThisAllocated && getBlockAfter().PrevAllocated && + "Cannot deallocate part of an allocated block!"); + + // Don't allow blocks to be trimmed below minimum required size + NewSize = std::max(FreeRangeHeader::getMinBlockSize(), NewSize); + + // Round up size for alignment of header. + unsigned HeaderAlign = __alignof(FreeRangeHeader); + NewSize = (NewSize+ (HeaderAlign-1)) & ~(HeaderAlign-1); + + // Size is now the size of the block we will remove from the start of the + // current block. + assert(NewSize <= BlockSize && + "Allocating more space from this block than exists!"); + + // If splitting this block will cause the remainder to be too small, do not + // split the block. + if (BlockSize <= NewSize+FreeRangeHeader::getMinBlockSize()) + return FreeList; + + // Otherwise, we splice the required number of bytes out of this block, form + // a new block immediately after it, then mark this block allocated. + MemoryRangeHeader &FormerNextBlock = getBlockAfter(); + + // Change the size of this block. + BlockSize = NewSize; + + // Get the new block we just sliced out and turn it into a free block. + FreeRangeHeader &NewNextBlock = (FreeRangeHeader &)getBlockAfter(); + NewNextBlock.BlockSize = (char*)&FormerNextBlock - (char*)&NewNextBlock; + NewNextBlock.ThisAllocated = 0; + NewNextBlock.PrevAllocated = 1; + NewNextBlock.SetEndOfBlockSizeMarker(); + FormerNextBlock.PrevAllocated = 0; + NewNextBlock.AddToFreeList(FreeList); + return &NewNextBlock; +} + +//===----------------------------------------------------------------------===// +// Memory Block Implementation. +//===----------------------------------------------------------------------===// + +namespace { + /// DefaultJITMemoryManager - Manage memory for the JIT code generation. + /// This splits a large block of MAP_NORESERVE'd memory into two + /// sections, one for function stubs, one for the functions themselves. We + /// have to do this because we may need to emit a function stub while in the + /// middle of emitting a function, and we don't know how large the function we + /// are emitting is. + class VISIBILITY_HIDDEN DefaultJITMemoryManager : public JITMemoryManager { + std::vector Blocks; // Memory blocks allocated by the JIT + FreeRangeHeader *FreeMemoryList; // Circular list of free blocks. + + // When emitting code into a memory block, this is the block. + MemoryRangeHeader *CurBlock; + + uint8_t *CurStubPtr, *StubBase; + uint8_t *GOTBase; // Target Specific reserved memory + void *DlsymTable; // Stub external symbol information + + // Centralize memory block allocation. + sys::MemoryBlock getNewMemoryBlock(unsigned size); + + std::map FunctionBlocks; + std::map TableBlocks; + public: + DefaultJITMemoryManager(); + ~DefaultJITMemoryManager(); + + void AllocateGOT(); + void SetDlsymTable(void *); + + uint8_t *allocateStub(const GlobalValue* F, unsigned StubSize, + unsigned Alignment); + + /// startFunctionBody - When a function starts, allocate a block of free + /// executable memory, returning a pointer to it and its actual size. + uint8_t *startFunctionBody(const Function *F, uintptr_t &ActualSize) { + + FreeRangeHeader* candidateBlock = FreeMemoryList; + FreeRangeHeader* head = FreeMemoryList; + FreeRangeHeader* iter = head->Next; + + uintptr_t largest = candidateBlock->BlockSize; + + // Search for the largest free block + while (iter != head) { + if (iter->BlockSize > largest) { + largest = iter->BlockSize; + candidateBlock = iter; + } + iter = iter->Next; + } + + // Select this candidate block for allocation + CurBlock = candidateBlock; + + // Allocate the entire memory block. + FreeMemoryList = candidateBlock->AllocateBlock(); + ActualSize = CurBlock->BlockSize-sizeof(MemoryRangeHeader); + return (uint8_t *)(CurBlock+1); + } + + /// endFunctionBody - The function F is now allocated, and takes the memory + /// in the range [FunctionStart,FunctionEnd). + void endFunctionBody(const Function *F, uint8_t *FunctionStart, + uint8_t *FunctionEnd) { + assert(FunctionEnd > FunctionStart); + assert(FunctionStart == (uint8_t *)(CurBlock+1) && + "Mismatched function start/end!"); + + uintptr_t BlockSize = FunctionEnd - (uint8_t *)CurBlock; + FunctionBlocks[F] = CurBlock; + + // Release the memory at the end of this block that isn't needed. + FreeMemoryList =CurBlock->TrimAllocationToSize(FreeMemoryList, BlockSize); + } + + /// allocateSpace - Allocate a memory block of the given size. + uint8_t *allocateSpace(intptr_t Size, unsigned Alignment) { + CurBlock = FreeMemoryList; + FreeMemoryList = FreeMemoryList->AllocateBlock(); + + uint8_t *result = (uint8_t *)CurBlock+1; + + if (Alignment == 0) Alignment = 1; + result = (uint8_t*)(((intptr_t)result+Alignment-1) & + ~(intptr_t)(Alignment-1)); + + uintptr_t BlockSize = result + Size - (uint8_t *)CurBlock; + FreeMemoryList =CurBlock->TrimAllocationToSize(FreeMemoryList, BlockSize); + + return result; + } + + /// startExceptionTable - Use startFunctionBody to allocate memory for the + /// function's exception table. + uint8_t* startExceptionTable(const Function* F, uintptr_t &ActualSize) { + return startFunctionBody(F, ActualSize); + } + + /// endExceptionTable - The exception table of F is now allocated, + /// and takes the memory in the range [TableStart,TableEnd). + void endExceptionTable(const Function *F, uint8_t *TableStart, + uint8_t *TableEnd, uint8_t* FrameRegister) { + assert(TableEnd > TableStart); + assert(TableStart == (uint8_t *)(CurBlock+1) && + "Mismatched table start/end!"); + + uintptr_t BlockSize = TableEnd - (uint8_t *)CurBlock; + TableBlocks[F] = CurBlock; + + // Release the memory at the end of this block that isn't needed. + FreeMemoryList =CurBlock->TrimAllocationToSize(FreeMemoryList, BlockSize); + } + + uint8_t *getGOTBase() const { + return GOTBase; + } + + void *getDlsymTable() const { + return DlsymTable; + } + + /// deallocateMemForFunction - Deallocate all memory for the specified + /// function body. + void deallocateMemForFunction(const Function *F) { + std::map::iterator + I = FunctionBlocks.find(F); + if (I == FunctionBlocks.end()) return; + + // Find the block that is allocated for this function. + MemoryRangeHeader *MemRange = I->second; + assert(MemRange->ThisAllocated && "Block isn't allocated!"); + + // Fill the buffer with garbage! +#ifndef NDEBUG + memset(MemRange+1, 0xCD, MemRange->BlockSize-sizeof(*MemRange)); +#endif + + // Free the memory. + FreeMemoryList = MemRange->FreeBlock(FreeMemoryList); + + // Finally, remove this entry from FunctionBlocks. + FunctionBlocks.erase(I); + + I = TableBlocks.find(F); + if (I == TableBlocks.end()) return; + + // Find the block that is allocated for this function. + MemRange = I->second; + assert(MemRange->ThisAllocated && "Block isn't allocated!"); + + // Fill the buffer with garbage! +#ifndef NDEBUG + memset(MemRange+1, 0xCD, MemRange->BlockSize-sizeof(*MemRange)); +#endif + + // Free the memory. + FreeMemoryList = MemRange->FreeBlock(FreeMemoryList); + + // Finally, remove this entry from TableBlocks. + TableBlocks.erase(I); + } + + /// setMemoryWritable - When code generation is in progress, + /// the code pages may need permissions changed. + void setMemoryWritable(void) + { + for (unsigned i = 0, e = Blocks.size(); i != e; ++i) + sys::Memory::setWritable(Blocks[i]); + } + /// setMemoryExecutable - When code generation is done and we're ready to + /// start execution, the code pages may need permissions changed. + void setMemoryExecutable(void) + { + for (unsigned i = 0, e = Blocks.size(); i != e; ++i) + sys::Memory::setExecutable(Blocks[i]); + } + }; +} + +DefaultJITMemoryManager::DefaultJITMemoryManager() { + // Allocate a 16M block of memory for functions. +#if defined(__APPLE__) && defined(__arm__) + sys::MemoryBlock MemBlock = getNewMemoryBlock(4 << 20); +#else + sys::MemoryBlock MemBlock = getNewMemoryBlock(16 << 20); +#endif + + uint8_t *MemBase = static_cast(MemBlock.base()); + + // Allocate stubs backwards from the base, allocate functions forward + // from the base. + StubBase = MemBase; + CurStubPtr = MemBase + 512*1024; // Use 512k for stubs, working backwards. + + // We set up the memory chunk with 4 mem regions, like this: + // [ START + // [ Free #0 ] -> Large space to allocate functions from. + // [ Allocated #1 ] -> Tiny space to separate regions. + // [ Free #2 ] -> Tiny space so there is always at least 1 free block. + // [ Allocated #3 ] -> Tiny space to prevent looking past end of block. + // END ] + // + // The last three blocks are never deallocated or touched. + + // Add MemoryRangeHeader to the end of the memory region, indicating that + // the space after the block of memory is allocated. This is block #3. + MemoryRangeHeader *Mem3 = (MemoryRangeHeader*)(MemBase+MemBlock.size())-1; + Mem3->ThisAllocated = 1; + Mem3->PrevAllocated = 0; + Mem3->BlockSize = 0; + + /// Add a tiny free region so that the free list always has one entry. + FreeRangeHeader *Mem2 = + (FreeRangeHeader *)(((char*)Mem3)-FreeRangeHeader::getMinBlockSize()); + Mem2->ThisAllocated = 0; + Mem2->PrevAllocated = 1; + Mem2->BlockSize = FreeRangeHeader::getMinBlockSize(); + Mem2->SetEndOfBlockSizeMarker(); + Mem2->Prev = Mem2; // Mem2 *is* the free list for now. + Mem2->Next = Mem2; + + /// Add a tiny allocated region so that Mem2 is never coalesced away. + MemoryRangeHeader *Mem1 = (MemoryRangeHeader*)Mem2-1; + Mem1->ThisAllocated = 1; + Mem1->PrevAllocated = 0; + Mem1->BlockSize = (char*)Mem2 - (char*)Mem1; + + // Add a FreeRangeHeader to the start of the function body region, indicating + // that the space is free. Mark the previous block allocated so we never look + // at it. + FreeRangeHeader *Mem0 = (FreeRangeHeader*)CurStubPtr; + Mem0->ThisAllocated = 0; + Mem0->PrevAllocated = 1; + Mem0->BlockSize = (char*)Mem1-(char*)Mem0; + Mem0->SetEndOfBlockSizeMarker(); + Mem0->AddToFreeList(Mem2); + + // Start out with the freelist pointing to Mem0. + FreeMemoryList = Mem0; + + GOTBase = NULL; + DlsymTable = NULL; +} + +void DefaultJITMemoryManager::AllocateGOT() { + assert(GOTBase == 0 && "Cannot allocate the got multiple times"); + GOTBase = new uint8_t[sizeof(void*) * 8192]; + HasGOT = true; +} + +void DefaultJITMemoryManager::SetDlsymTable(void *ptr) { + DlsymTable = ptr; +} + +DefaultJITMemoryManager::~DefaultJITMemoryManager() { + for (unsigned i = 0, e = Blocks.size(); i != e; ++i) + sys::Memory::ReleaseRWX(Blocks[i]); + + delete[] GOTBase; + Blocks.clear(); +} + +uint8_t *DefaultJITMemoryManager::allocateStub(const GlobalValue* F, + unsigned StubSize, + unsigned Alignment) { + CurStubPtr -= StubSize; + CurStubPtr = (uint8_t*)(((intptr_t)CurStubPtr) & + ~(intptr_t)(Alignment-1)); + if (CurStubPtr < StubBase) { + // FIXME: allocate a new block + fprintf(stderr, "JIT ran out of memory for function stubs!\n"); + abort(); + } + return CurStubPtr; +} + +sys::MemoryBlock DefaultJITMemoryManager::getNewMemoryBlock(unsigned size) { + // Allocate a new block close to the last one. + const sys::MemoryBlock *BOld = Blocks.empty() ? 0 : &Blocks.front(); + std::string ErrMsg; + sys::MemoryBlock B = sys::Memory::AllocateRWX(size, BOld, &ErrMsg); + if (B.base() == 0) { + fprintf(stderr, + "Allocation failed when allocating new memory in the JIT\n%s\n", + ErrMsg.c_str()); + abort(); + } + Blocks.push_back(B); + return B; +} + + +JITMemoryManager *JITMemoryManager::CreateDefaultMemManager() { + return new DefaultJITMemoryManager(); +} diff --git a/lib/ExecutionEngine/JIT/Makefile b/lib/ExecutionEngine/JIT/Makefile new file mode 100644 index 000000000000..e2c9c61e88fa --- /dev/null +++ b/lib/ExecutionEngine/JIT/Makefile @@ -0,0 +1,37 @@ +##===- lib/ExecutionEngine/JIT/Makefile --------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../.. +LIBRARYNAME = LLVMJIT + +# Get the $(ARCH) setting +include $(LEVEL)/Makefile.config + +# Enable the X86 JIT if compiling on X86 +ifeq ($(ARCH), x86) + ENABLE_X86_JIT = 1 +endif + +# This flag can also be used on the command line to force inclusion +# of the X86 JIT on non-X86 hosts +ifdef ENABLE_X86_JIT + CPPFLAGS += -DENABLE_X86_JIT +endif + +# Enable the Sparc JIT if compiling on Sparc +ifeq ($(ARCH), Sparc) + ENABLE_SPARC_JIT = 1 +endif + +# This flag can also be used on the command line to force inclusion +# of the Sparc JIT on non-Sparc hosts +ifdef ENABLE_SPARC_JIT + CPPFLAGS += -DENABLE_SPARC_JIT +endif + +include $(LEVEL)/Makefile.common diff --git a/lib/ExecutionEngine/JIT/TargetSelect.cpp b/lib/ExecutionEngine/JIT/TargetSelect.cpp new file mode 100644 index 000000000000..0f208193075b --- /dev/null +++ b/lib/ExecutionEngine/JIT/TargetSelect.cpp @@ -0,0 +1,83 @@ +//===-- TargetSelect.cpp - Target Chooser Code ----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This just asks the TargetMachineRegistry for the appropriate JIT to use, and +// allows the user to specify a specific one on the commandline with -march=x. +// +//===----------------------------------------------------------------------===// + +#include "JIT.h" +#include "llvm/Module.h" +#include "llvm/ModuleProvider.h" +#include "llvm/Support/RegistryParser.h" +#include "llvm/Support/Streams.h" +#include "llvm/Target/SubtargetFeature.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetMachineRegistry.h" +using namespace llvm; + +static cl::opt > +MArch("march", cl::desc("Architecture to generate assembly for:")); + +static cl::opt +MCPU("mcpu", + cl::desc("Target a specific cpu type (-mcpu=help for details)"), + cl::value_desc("cpu-name"), + cl::init("")); + +static cl::list +MAttrs("mattr", + cl::CommaSeparated, + cl::desc("Target specific attributes (-mattr=help for details)"), + cl::value_desc("a1,+a2,-a3,...")); + +/// createInternal - Create an return a new JIT compiler if there is one +/// available for the current target. Otherwise, return null. +/// +ExecutionEngine *JIT::createJIT(ModuleProvider *MP, std::string *ErrorStr, + JITMemoryManager *JMM, + CodeGenOpt::Level OptLevel) { + const TargetMachineRegistry::entry *TheArch = MArch; + if (TheArch == 0) { + std::string Error; + TheArch = TargetMachineRegistry::getClosestTargetForJIT(Error); + if (TheArch == 0) { + if (ErrorStr) + *ErrorStr = Error; + return 0; + } + } else if (TheArch->JITMatchQualityFn() == 0) { + cerr << "WARNING: This target JIT is not designed for the host you are" + << " running. If bad things happen, please choose a different " + << "-march switch.\n"; + } + + // Package up features to be passed to target/subtarget + std::string FeaturesStr; + if (!MCPU.empty() || !MAttrs.empty()) { + SubtargetFeatures Features; + Features.setCPU(MCPU); + for (unsigned i = 0; i != MAttrs.size(); ++i) + Features.AddFeature(MAttrs[i]); + FeaturesStr = Features.getString(); + } + + // Allocate a target... + TargetMachine *Target = TheArch->CtorFn(*MP->getModule(), FeaturesStr); + assert(Target && "Could not allocate target machine!"); + + // If the target supports JIT code generation, return a new JIT now. + if (TargetJITInfo *TJ = Target->getJITInfo()) + return new JIT(MP, *Target, *TJ, JMM, OptLevel); + + if (ErrorStr) + *ErrorStr = "target does not support JIT code generation"; + return 0; +} diff --git a/lib/ExecutionEngine/Makefile b/lib/ExecutionEngine/Makefile new file mode 100644 index 000000000000..e0e050e89728 --- /dev/null +++ b/lib/ExecutionEngine/Makefile @@ -0,0 +1,13 @@ +##===- lib/ExecutionEngine/Makefile ------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../.. +LIBRARYNAME = LLVMExecutionEngine +PARALLEL_DIRS = Interpreter JIT + +include $(LEVEL)/Makefile.common diff --git a/lib/Linker/CMakeLists.txt b/lib/Linker/CMakeLists.txt new file mode 100644 index 000000000000..0b6d2f4218e3 --- /dev/null +++ b/lib/Linker/CMakeLists.txt @@ -0,0 +1,6 @@ +add_llvm_library(LLVMLinker + LinkArchives.cpp + LinkItems.cpp + LinkModules.cpp + Linker.cpp + ) diff --git a/lib/Linker/LinkArchives.cpp b/lib/Linker/LinkArchives.cpp new file mode 100644 index 000000000000..551cc8c390d8 --- /dev/null +++ b/lib/Linker/LinkArchives.cpp @@ -0,0 +1,201 @@ +//===- lib/Linker/LinkArchives.cpp - Link LLVM objects and libraries ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains routines to handle linking together LLVM bitcode files, +// and to handle annoying things like static libraries. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Linker.h" +#include "llvm/Module.h" +#include "llvm/ModuleProvider.h" +#include "llvm/ADT/SetOperations.h" +#include "llvm/Bitcode/Archive.h" +#include "llvm/Config/config.h" +#include +#include +using namespace llvm; + +/// GetAllUndefinedSymbols - calculates the set of undefined symbols that still +/// exist in an LLVM module. This is a bit tricky because there may be two +/// symbols with the same name but different LLVM types that will be resolved to +/// each other but aren't currently (thus we need to treat it as resolved). +/// +/// Inputs: +/// M - The module in which to find undefined symbols. +/// +/// Outputs: +/// UndefinedSymbols - A set of C++ strings containing the name of all +/// undefined symbols. +/// +static void +GetAllUndefinedSymbols(Module *M, std::set &UndefinedSymbols) { + std::set DefinedSymbols; + UndefinedSymbols.clear(); + + // If the program doesn't define a main, try pulling one in from a .a file. + // This is needed for programs where the main function is defined in an + // archive, such f2c'd programs. + Function *Main = M->getFunction("main"); + if (Main == 0 || Main->isDeclaration()) + UndefinedSymbols.insert("main"); + + for (Module::iterator I = M->begin(), E = M->end(); I != E; ++I) + if (I->hasName()) { + if (I->isDeclaration()) + UndefinedSymbols.insert(I->getName()); + else if (!I->hasLocalLinkage()) { + assert(!I->hasDLLImportLinkage() + && "Found dllimported non-external symbol!"); + DefinedSymbols.insert(I->getName()); + } + } + + for (Module::global_iterator I = M->global_begin(), E = M->global_end(); + I != E; ++I) + if (I->hasName()) { + if (I->isDeclaration()) + UndefinedSymbols.insert(I->getName()); + else if (!I->hasLocalLinkage()) { + assert(!I->hasDLLImportLinkage() + && "Found dllimported non-external symbol!"); + DefinedSymbols.insert(I->getName()); + } + } + + for (Module::alias_iterator I = M->alias_begin(), E = M->alias_end(); + I != E; ++I) + if (I->hasName()) + DefinedSymbols.insert(I->getName()); + + // Prune out any defined symbols from the undefined symbols set... + for (std::set::iterator I = UndefinedSymbols.begin(); + I != UndefinedSymbols.end(); ) + if (DefinedSymbols.count(*I)) + UndefinedSymbols.erase(I++); // This symbol really is defined! + else + ++I; // Keep this symbol in the undefined symbols list +} + +/// LinkInArchive - opens an archive library and link in all objects which +/// provide symbols that are currently undefined. +/// +/// Inputs: +/// Filename - The pathname of the archive. +/// +/// Return Value: +/// TRUE - An error occurred. +/// FALSE - No errors. +bool +Linker::LinkInArchive(const sys::Path &Filename, bool &is_native) { + // Make sure this is an archive file we're dealing with + if (!Filename.isArchive()) + return error("File '" + Filename.toString() + "' is not an archive."); + + // Open the archive file + verbose("Linking archive file '" + Filename.toString() + "'"); + + // Find all of the symbols currently undefined in the bitcode program. + // If all the symbols are defined, the program is complete, and there is + // no reason to link in any archive files. + std::set UndefinedSymbols; + GetAllUndefinedSymbols(Composite, UndefinedSymbols); + + if (UndefinedSymbols.empty()) { + verbose("No symbols undefined, skipping library '" + + Filename.toString() + "'"); + return false; // No need to link anything in! + } + + std::string ErrMsg; + std::auto_ptr AutoArch ( + Archive::OpenAndLoadSymbols(Filename,&ErrMsg)); + + Archive* arch = AutoArch.get(); + + if (!arch) + return error("Cannot read archive '" + Filename.toString() + + "': " + ErrMsg); + if (!arch->isBitcodeArchive()) { + is_native = true; + return false; + } + is_native = false; + + // Save a set of symbols that are not defined by the archive. Since we're + // entering a loop, there's no point searching for these multiple times. This + // variable is used to "set_subtract" from the set of undefined symbols. + std::set NotDefinedByArchive; + + // Save the current set of undefined symbols, because we may have to make + // multiple passes over the archive: + std::set CurrentlyUndefinedSymbols; + + do { + CurrentlyUndefinedSymbols = UndefinedSymbols; + + // Find the modules we need to link into the target module + std::set Modules; + if (!arch->findModulesDefiningSymbols(UndefinedSymbols, Modules, &ErrMsg)) + return error("Cannot find symbols in '" + Filename.toString() + + "': " + ErrMsg); + + // If we didn't find any more modules to link this time, we are done + // searching this archive. + if (Modules.empty()) + break; + + // Any symbols remaining in UndefinedSymbols after + // findModulesDefiningSymbols are ones that the archive does not define. So + // we add them to the NotDefinedByArchive variable now. + NotDefinedByArchive.insert(UndefinedSymbols.begin(), + UndefinedSymbols.end()); + + // Loop over all the ModuleProviders that we got back from the archive + for (std::set::iterator I=Modules.begin(), E=Modules.end(); + I != E; ++I) { + + // Get the module we must link in. + std::string moduleErrorMsg; + std::auto_ptr AutoModule((*I)->releaseModule( &moduleErrorMsg )); + if (!moduleErrorMsg.empty()) + return error("Could not load a module: " + moduleErrorMsg); + + Module* aModule = AutoModule.get(); + + if (aModule != NULL) { + verbose(" Linking in module: " + aModule->getModuleIdentifier()); + + // Link it in + if (LinkInModule(aModule, &moduleErrorMsg)) { + return error("Cannot link in module '" + + aModule->getModuleIdentifier() + "': " + moduleErrorMsg); + } + } + } + + // Get the undefined symbols from the aggregate module. This recomputes the + // symbols we still need after the new modules have been linked in. + GetAllUndefinedSymbols(Composite, UndefinedSymbols); + + // At this point we have two sets of undefined symbols: UndefinedSymbols + // which holds the undefined symbols from all the modules, and + // NotDefinedByArchive which holds symbols we know the archive doesn't + // define. There's no point searching for symbols that we won't find in the + // archive so we subtract these sets. + set_subtract(UndefinedSymbols, NotDefinedByArchive); + + // If there's no symbols left, no point in continuing to search the + // archive. + if (UndefinedSymbols.empty()) + break; + } while (CurrentlyUndefinedSymbols != UndefinedSymbols); + + return false; +} diff --git a/lib/Linker/LinkItems.cpp b/lib/Linker/LinkItems.cpp new file mode 100644 index 000000000000..7c888aa498a3 --- /dev/null +++ b/lib/Linker/LinkItems.cpp @@ -0,0 +1,238 @@ +//===- lib/Linker/LinkItems.cpp - Link LLVM objects and libraries ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains routines to handle linking together LLVM bitcode files, +// and to handle annoying things like static libraries. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Linker.h" +#include "llvm/Module.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Bitcode/ReaderWriter.h" + +using namespace llvm; + +// LinkItems - This function is the main entry point into linking. It takes a +// list of LinkItem which indicates the order the files should be linked and +// how each file should be treated (plain file or with library search). The +// function only links bitcode and produces a result list of items that are +// native objects. +bool +Linker::LinkInItems(const ItemList& Items, ItemList& NativeItems) { + // Clear the NativeItems just in case + NativeItems.clear(); + + // For each linkage item ... + for (ItemList::const_iterator I = Items.begin(), E = Items.end(); + I != E; ++I) { + if (I->second) { + // Link in the library suggested. + bool is_native = false; + if (LinkInLibrary(I->first, is_native)) + return true; + if (is_native) + NativeItems.push_back(*I); + } else { + // Link in the file suggested + bool is_native = false; + if (LinkInFile(sys::Path(I->first), is_native)) + return true; + if (is_native) + NativeItems.push_back(*I); + } + } + + // At this point we have processed all the link items provided to us. Since + // we have an aggregated module at this point, the dependent libraries in + // that module should also be aggregated with duplicates eliminated. This is + // now the time to process the dependent libraries to resolve any remaining + // symbols. + bool is_native; + for (Module::lib_iterator I = Composite->lib_begin(), + E = Composite->lib_end(); I != E; ++I) { + if(LinkInLibrary(*I, is_native)) + return true; + if (is_native) + NativeItems.push_back(std::make_pair(*I, true)); + } + + return false; +} + + +/// LinkInLibrary - links one library into the HeadModule. +/// +bool Linker::LinkInLibrary(const std::string& Lib, bool& is_native) { + is_native = false; + // Determine where this library lives. + sys::Path Pathname = FindLib(Lib); + if (Pathname.isEmpty()) + return error("Cannot find library '" + Lib + "'"); + + // If its an archive, try to link it in + std::string Magic; + Pathname.getMagicNumber(Magic, 64); + switch (sys::IdentifyFileType(Magic.c_str(), 64)) { + default: assert(0 && "Bad file type identification"); + case sys::Unknown_FileType: + return warning("Supposed library '" + Lib + "' isn't a library."); + + case sys::Bitcode_FileType: + // LLVM ".so" file. + if (LinkInFile(Pathname, is_native)) + return true; + break; + + case sys::Archive_FileType: + if (LinkInArchive(Pathname, is_native)) + return error("Cannot link archive '" + Pathname.toString() + "'"); + break; + + case sys::ELF_Relocatable_FileType: + case sys::ELF_SharedObject_FileType: + case sys::Mach_O_Object_FileType: + case sys::Mach_O_FixedVirtualMemorySharedLib_FileType: + case sys::Mach_O_DynamicallyLinkedSharedLib_FileType: + case sys::Mach_O_DynamicallyLinkedSharedLibStub_FileType: + case sys::COFF_FileType: + is_native = true; + break; + } + return false; +} + +/// LinkLibraries - takes the specified library files and links them into the +/// main bitcode object file. +/// +/// Inputs: +/// Libraries - The list of libraries to link into the module. +/// +/// Return value: +/// FALSE - No error. +/// TRUE - Error. +/// +bool Linker::LinkInLibraries(const std::vector &Libraries) { + + // Process the set of libraries we've been provided. + bool is_native = false; + for (unsigned i = 0; i < Libraries.size(); ++i) + if (LinkInLibrary(Libraries[i], is_native)) + return true; + + // At this point we have processed all the libraries provided to us. Since + // we have an aggregated module at this point, the dependent libraries in + // that module should also be aggregated with duplicates eliminated. This is + // now the time to process the dependent libraries to resolve any remaining + // symbols. + const Module::LibraryListType& DepLibs = Composite->getLibraries(); + for (Module::LibraryListType::const_iterator I = DepLibs.begin(), + E = DepLibs.end(); I != E; ++I) + if (LinkInLibrary(*I, is_native)) + return true; + + return false; +} + +/// LinkInFile - opens a bitcode file and links in all objects which +/// provide symbols that are currently undefined. +/// +/// Inputs: +/// File - The pathname of the bitcode file. +/// +/// Outputs: +/// ErrorMessage - A C++ string detailing what error occurred, if any. +/// +/// Return Value: +/// TRUE - An error occurred. +/// FALSE - No errors. +/// +bool Linker::LinkInFile(const sys::Path &File, bool &is_native) { + is_native = false; + + // Check for a file of name "-", which means "read standard input" + if (File.toString() == "-") { + std::auto_ptr M; + if (MemoryBuffer *Buffer = MemoryBuffer::getSTDIN()) { + M.reset(ParseBitcodeFile(Buffer, &Error)); + delete Buffer; + if (M.get()) + if (!LinkInModule(M.get(), &Error)) + return false; + } else + Error = "standard input is empty"; + return error("Cannot link stdin: " + Error); + } + + // Make sure we can at least read the file + if (!File.canRead()) + return error("Cannot find linker input '" + File.toString() + "'"); + + // If its an archive, try to link it in + std::string Magic; + File.getMagicNumber(Magic, 64); + switch (sys::IdentifyFileType(Magic.c_str(), 64)) { + default: assert(0 && "Bad file type identification"); + case sys::Unknown_FileType: + return warning("Ignoring file '" + File.toString() + + "' because does not contain bitcode."); + + case sys::Archive_FileType: + // A user may specify an ar archive without -l, perhaps because it + // is not installed as a library. Detect that and link the archive. + verbose("Linking archive file '" + File.toString() + "'"); + if (LinkInArchive(File, is_native)) + return true; + break; + + case sys::Bitcode_FileType: { + verbose("Linking bitcode file '" + File.toString() + "'"); + std::auto_ptr M(LoadObject(File)); + if (M.get() == 0) + return error("Cannot load file '" + File.toString() + "': " + Error); + if (LinkInModule(M.get(), &Error)) + return error("Cannot link file '" + File.toString() + "': " + Error); + + verbose("Linked in file '" + File.toString() + "'"); + break; + } + + case sys::ELF_Relocatable_FileType: + case sys::ELF_SharedObject_FileType: + case sys::Mach_O_Object_FileType: + case sys::Mach_O_FixedVirtualMemorySharedLib_FileType: + case sys::Mach_O_DynamicallyLinkedSharedLib_FileType: + case sys::Mach_O_DynamicallyLinkedSharedLibStub_FileType: + case sys::COFF_FileType: + is_native = true; + break; + } + return false; +} + +/// LinkFiles - takes a module and a list of files and links them all together. +/// It locates the file either in the current directory, as its absolute +/// or relative pathname, or as a file somewhere in LLVM_LIB_SEARCH_PATH. +/// +/// Inputs: +/// Files - A vector of sys::Path indicating the LLVM bitcode filenames +/// to be linked. The names can refer to a mixture of pure LLVM +/// bitcode files and archive (ar) formatted files. +/// +/// Return value: +/// FALSE - No errors. +/// TRUE - Some error occurred. +/// +bool Linker::LinkInFiles(const std::vector &Files) { + bool is_native; + for (unsigned i = 0; i < Files.size(); ++i) + if (LinkInFile(Files[i], is_native)) + return true; + return false; +} diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp new file mode 100644 index 000000000000..4a15d88d8f36 --- /dev/null +++ b/lib/Linker/LinkModules.cpp @@ -0,0 +1,1328 @@ +//===- lib/Linker/LinkModules.cpp - Module Linker Implementation ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the LLVM module linker. +// +// Specifically, this: +// * Merges global variables between the two modules +// * Uninit + Uninit = Init, Init + Uninit = Init, Init + Init = Error if != +// * Merges functions between two modules +// +//===----------------------------------------------------------------------===// + +#include "llvm/Linker.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Module.h" +#include "llvm/TypeSymbolTable.h" +#include "llvm/ValueSymbolTable.h" +#include "llvm/Instructions.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/Support/Streams.h" +#include "llvm/System/Path.h" +#include "llvm/ADT/DenseMap.h" +#include +using namespace llvm; + +// Error - Simple wrapper function to conditionally assign to E and return true. +// This just makes error return conditions a little bit simpler... +static inline bool Error(std::string *E, const std::string &Message) { + if (E) *E = Message; + return true; +} + +// Function: ResolveTypes() +// +// Description: +// Attempt to link the two specified types together. +// +// Inputs: +// DestTy - The type to which we wish to resolve. +// SrcTy - The original type which we want to resolve. +// +// Outputs: +// DestST - The symbol table in which the new type should be placed. +// +// Return value: +// true - There is an error and the types cannot yet be linked. +// false - No errors. +// +static bool ResolveTypes(const Type *DestTy, const Type *SrcTy) { + if (DestTy == SrcTy) return false; // If already equal, noop + assert(DestTy && SrcTy && "Can't handle null types"); + + if (const OpaqueType *OT = dyn_cast(DestTy)) { + // Type _is_ in module, just opaque... + const_cast(OT)->refineAbstractTypeTo(SrcTy); + } else if (const OpaqueType *OT = dyn_cast(SrcTy)) { + const_cast(OT)->refineAbstractTypeTo(DestTy); + } else { + return true; // Cannot link types... not-equal and neither is opaque. + } + return false; +} + +/// LinkerTypeMap - This implements a map of types that is stable +/// even if types are resolved/refined to other types. This is not a general +/// purpose map, it is specific to the linker's use. +namespace { +class LinkerTypeMap : public AbstractTypeUser { + typedef DenseMap TheMapTy; + TheMapTy TheMap; + + LinkerTypeMap(const LinkerTypeMap&); // DO NOT IMPLEMENT + void operator=(const LinkerTypeMap&); // DO NOT IMPLEMENT +public: + LinkerTypeMap() {} + ~LinkerTypeMap() { + for (DenseMap::iterator I = TheMap.begin(), + E = TheMap.end(); I != E; ++I) + I->first->removeAbstractTypeUser(this); + } + + /// lookup - Return the value for the specified type or null if it doesn't + /// exist. + const Type *lookup(const Type *Ty) const { + TheMapTy::const_iterator I = TheMap.find(Ty); + if (I != TheMap.end()) return I->second; + return 0; + } + + /// erase - Remove the specified type, returning true if it was in the set. + bool erase(const Type *Ty) { + if (!TheMap.erase(Ty)) + return false; + if (Ty->isAbstract()) + Ty->removeAbstractTypeUser(this); + return true; + } + + /// insert - This returns true if the pointer was new to the set, false if it + /// was already in the set. + bool insert(const Type *Src, const Type *Dst) { + if (!TheMap.insert(std::make_pair(Src, PATypeHolder(Dst))).second) + return false; // Already in map. + if (Src->isAbstract()) + Src->addAbstractTypeUser(this); + return true; + } + +protected: + /// refineAbstractType - The callback method invoked when an abstract type is + /// resolved to another type. An object must override this method to update + /// its internal state to reference NewType instead of OldType. + /// + virtual void refineAbstractType(const DerivedType *OldTy, + const Type *NewTy) { + TheMapTy::iterator I = TheMap.find(OldTy); + const Type *DstTy = I->second; + + TheMap.erase(I); + if (OldTy->isAbstract()) + OldTy->removeAbstractTypeUser(this); + + // Don't reinsert into the map if the key is concrete now. + if (NewTy->isAbstract()) + insert(NewTy, DstTy); + } + + /// The other case which AbstractTypeUsers must be aware of is when a type + /// makes the transition from being abstract (where it has clients on it's + /// AbstractTypeUsers list) to concrete (where it does not). This method + /// notifies ATU's when this occurs for a type. + virtual void typeBecameConcrete(const DerivedType *AbsTy) { + TheMap.erase(AbsTy); + AbsTy->removeAbstractTypeUser(this); + } + + // for debugging... + virtual void dump() const { + cerr << "AbstractTypeSet!\n"; + } +}; +} + + +// RecursiveResolveTypes - This is just like ResolveTypes, except that it +// recurses down into derived types, merging the used types if the parent types +// are compatible. +static bool RecursiveResolveTypesI(const Type *DstTy, const Type *SrcTy, + LinkerTypeMap &Pointers) { + if (DstTy == SrcTy) return false; // If already equal, noop + + // If we found our opaque type, resolve it now! + if (isa(DstTy) || isa(SrcTy)) + return ResolveTypes(DstTy, SrcTy); + + // Two types cannot be resolved together if they are of different primitive + // type. For example, we cannot resolve an int to a float. + if (DstTy->getTypeID() != SrcTy->getTypeID()) return true; + + // If neither type is abstract, then they really are just different types. + if (!DstTy->isAbstract() && !SrcTy->isAbstract()) + return true; + + // Otherwise, resolve the used type used by this derived type... + switch (DstTy->getTypeID()) { + default: + return true; + case Type::FunctionTyID: { + const FunctionType *DstFT = cast(DstTy); + const FunctionType *SrcFT = cast(SrcTy); + if (DstFT->isVarArg() != SrcFT->isVarArg() || + DstFT->getNumContainedTypes() != SrcFT->getNumContainedTypes()) + return true; + + // Use TypeHolder's so recursive resolution won't break us. + PATypeHolder ST(SrcFT), DT(DstFT); + for (unsigned i = 0, e = DstFT->getNumContainedTypes(); i != e; ++i) { + const Type *SE = ST->getContainedType(i), *DE = DT->getContainedType(i); + if (SE != DE && RecursiveResolveTypesI(DE, SE, Pointers)) + return true; + } + return false; + } + case Type::StructTyID: { + const StructType *DstST = cast(DstTy); + const StructType *SrcST = cast(SrcTy); + if (DstST->getNumContainedTypes() != SrcST->getNumContainedTypes()) + return true; + + PATypeHolder ST(SrcST), DT(DstST); + for (unsigned i = 0, e = DstST->getNumContainedTypes(); i != e; ++i) { + const Type *SE = ST->getContainedType(i), *DE = DT->getContainedType(i); + if (SE != DE && RecursiveResolveTypesI(DE, SE, Pointers)) + return true; + } + return false; + } + case Type::ArrayTyID: { + const ArrayType *DAT = cast(DstTy); + const ArrayType *SAT = cast(SrcTy); + if (DAT->getNumElements() != SAT->getNumElements()) return true; + return RecursiveResolveTypesI(DAT->getElementType(), SAT->getElementType(), + Pointers); + } + case Type::VectorTyID: { + const VectorType *DVT = cast(DstTy); + const VectorType *SVT = cast(SrcTy); + if (DVT->getNumElements() != SVT->getNumElements()) return true; + return RecursiveResolveTypesI(DVT->getElementType(), SVT->getElementType(), + Pointers); + } + case Type::PointerTyID: { + const PointerType *DstPT = cast(DstTy); + const PointerType *SrcPT = cast(SrcTy); + + if (DstPT->getAddressSpace() != SrcPT->getAddressSpace()) + return true; + + // If this is a pointer type, check to see if we have already seen it. If + // so, we are in a recursive branch. Cut off the search now. We cannot use + // an associative container for this search, because the type pointers (keys + // in the container) change whenever types get resolved. + if (SrcPT->isAbstract()) + if (const Type *ExistingDestTy = Pointers.lookup(SrcPT)) + return ExistingDestTy != DstPT; + + if (DstPT->isAbstract()) + if (const Type *ExistingSrcTy = Pointers.lookup(DstPT)) + return ExistingSrcTy != SrcPT; + // Otherwise, add the current pointers to the vector to stop recursion on + // this pair. + if (DstPT->isAbstract()) + Pointers.insert(DstPT, SrcPT); + if (SrcPT->isAbstract()) + Pointers.insert(SrcPT, DstPT); + + return RecursiveResolveTypesI(DstPT->getElementType(), + SrcPT->getElementType(), Pointers); + } + } +} + +static bool RecursiveResolveTypes(const Type *DestTy, const Type *SrcTy) { + LinkerTypeMap PointerTypes; + return RecursiveResolveTypesI(DestTy, SrcTy, PointerTypes); +} + + +// LinkTypes - Go through the symbol table of the Src module and see if any +// types are named in the src module that are not named in the Dst module. +// Make sure there are no type name conflicts. +static bool LinkTypes(Module *Dest, const Module *Src, std::string *Err) { + TypeSymbolTable *DestST = &Dest->getTypeSymbolTable(); + const TypeSymbolTable *SrcST = &Src->getTypeSymbolTable(); + + // Look for a type plane for Type's... + TypeSymbolTable::const_iterator TI = SrcST->begin(); + TypeSymbolTable::const_iterator TE = SrcST->end(); + if (TI == TE) return false; // No named types, do nothing. + + // Some types cannot be resolved immediately because they depend on other + // types being resolved to each other first. This contains a list of types we + // are waiting to recheck. + std::vector DelayedTypesToResolve; + + for ( ; TI != TE; ++TI ) { + const std::string &Name = TI->first; + const Type *RHS = TI->second; + + // Check to see if this type name is already in the dest module. + Type *Entry = DestST->lookup(Name); + + // If the name is just in the source module, bring it over to the dest. + if (Entry == 0) { + if (!Name.empty()) + DestST->insert(Name, const_cast(RHS)); + } else if (ResolveTypes(Entry, RHS)) { + // They look different, save the types 'till later to resolve. + DelayedTypesToResolve.push_back(Name); + } + } + + // Iteratively resolve types while we can... + while (!DelayedTypesToResolve.empty()) { + // Loop over all of the types, attempting to resolve them if possible... + unsigned OldSize = DelayedTypesToResolve.size(); + + // Try direct resolution by name... + for (unsigned i = 0; i != DelayedTypesToResolve.size(); ++i) { + const std::string &Name = DelayedTypesToResolve[i]; + Type *T1 = SrcST->lookup(Name); + Type *T2 = DestST->lookup(Name); + if (!ResolveTypes(T2, T1)) { + // We are making progress! + DelayedTypesToResolve.erase(DelayedTypesToResolve.begin()+i); + --i; + } + } + + // Did we not eliminate any types? + if (DelayedTypesToResolve.size() == OldSize) { + // Attempt to resolve subelements of types. This allows us to merge these + // two types: { int* } and { opaque* } + for (unsigned i = 0, e = DelayedTypesToResolve.size(); i != e; ++i) { + const std::string &Name = DelayedTypesToResolve[i]; + if (!RecursiveResolveTypes(SrcST->lookup(Name), DestST->lookup(Name))) { + // We are making progress! + DelayedTypesToResolve.erase(DelayedTypesToResolve.begin()+i); + + // Go back to the main loop, perhaps we can resolve directly by name + // now... + break; + } + } + + // If we STILL cannot resolve the types, then there is something wrong. + if (DelayedTypesToResolve.size() == OldSize) { + // Remove the symbol name from the destination. + DelayedTypesToResolve.pop_back(); + } + } + } + + + return false; +} + +#ifndef NDEBUG +static void PrintMap(const std::map &M) { + for (std::map::const_iterator I = M.begin(), E =M.end(); + I != E; ++I) { + cerr << " Fr: " << (void*)I->first << " "; + I->first->dump(); + cerr << " To: " << (void*)I->second << " "; + I->second->dump(); + cerr << "\n"; + } +} +#endif + + +// RemapOperand - Use ValueMap to convert constants from one module to another. +static Value *RemapOperand(const Value *In, + std::map &ValueMap) { + std::map::const_iterator I = ValueMap.find(In); + if (I != ValueMap.end()) + return I->second; + + // Check to see if it's a constant that we are interested in transforming. + Value *Result = 0; + if (const Constant *CPV = dyn_cast(In)) { + if ((!isa(CPV->getType()) && !isa(CPV)) || + isa(CPV) || isa(CPV)) + return const_cast(CPV); // Simple constants stay identical. + + if (const ConstantArray *CPA = dyn_cast(CPV)) { + std::vector Operands(CPA->getNumOperands()); + for (unsigned i = 0, e = CPA->getNumOperands(); i != e; ++i) + Operands[i] =cast(RemapOperand(CPA->getOperand(i), ValueMap)); + Result = ConstantArray::get(cast(CPA->getType()), Operands); + } else if (const ConstantStruct *CPS = dyn_cast(CPV)) { + std::vector Operands(CPS->getNumOperands()); + for (unsigned i = 0, e = CPS->getNumOperands(); i != e; ++i) + Operands[i] =cast(RemapOperand(CPS->getOperand(i), ValueMap)); + Result = ConstantStruct::get(cast(CPS->getType()), Operands); + } else if (isa(CPV) || isa(CPV)) { + Result = const_cast(CPV); + } else if (const ConstantVector *CP = dyn_cast(CPV)) { + std::vector Operands(CP->getNumOperands()); + for (unsigned i = 0, e = CP->getNumOperands(); i != e; ++i) + Operands[i] = cast(RemapOperand(CP->getOperand(i), ValueMap)); + Result = ConstantVector::get(Operands); + } else if (const ConstantExpr *CE = dyn_cast(CPV)) { + std::vector Ops; + for (unsigned i = 0, e = CE->getNumOperands(); i != e; ++i) + Ops.push_back(cast(RemapOperand(CE->getOperand(i),ValueMap))); + Result = CE->getWithOperands(Ops); + } else { + assert(!isa(CPV) && "Unmapped global?"); + assert(0 && "Unknown type of derived type constant value!"); + } + } else if (isa(In)) { + Result = const_cast(In); + } + + // Cache the mapping in our local map structure + if (Result) { + ValueMap[In] = Result; + return Result; + } + +#ifndef NDEBUG + cerr << "LinkModules ValueMap: \n"; + PrintMap(ValueMap); + + cerr << "Couldn't remap value: " << (void*)In << " " << *In << "\n"; + assert(0 && "Couldn't remap value!"); +#endif + return 0; +} + +/// ForceRenaming - The LLVM SymbolTable class autorenames globals that conflict +/// in the symbol table. This is good for all clients except for us. Go +/// through the trouble to force this back. +static void ForceRenaming(GlobalValue *GV, const std::string &Name) { + assert(GV->getName() != Name && "Can't force rename to self"); + ValueSymbolTable &ST = GV->getParent()->getValueSymbolTable(); + + // If there is a conflict, rename the conflict. + if (GlobalValue *ConflictGV = cast_or_null(ST.lookup(Name))) { + assert(ConflictGV->hasLocalLinkage() && + "Not conflicting with a static global, should link instead!"); + GV->takeName(ConflictGV); + ConflictGV->setName(Name); // This will cause ConflictGV to get renamed + assert(ConflictGV->getName() != Name && "ForceRenaming didn't work"); + } else { + GV->setName(Name); // Force the name back + } +} + +/// CopyGVAttributes - copy additional attributes (those not needed to construct +/// a GlobalValue) from the SrcGV to the DestGV. +static void CopyGVAttributes(GlobalValue *DestGV, const GlobalValue *SrcGV) { + // Use the maximum alignment, rather than just copying the alignment of SrcGV. + unsigned Alignment = std::max(DestGV->getAlignment(), SrcGV->getAlignment()); + DestGV->copyAttributesFrom(SrcGV); + DestGV->setAlignment(Alignment); +} + +/// GetLinkageResult - This analyzes the two global values and determines what +/// the result will look like in the destination module. In particular, it +/// computes the resultant linkage type, computes whether the global in the +/// source should be copied over to the destination (replacing the existing +/// one), and computes whether this linkage is an error or not. It also performs +/// visibility checks: we cannot link together two symbols with different +/// visibilities. +static bool GetLinkageResult(GlobalValue *Dest, const GlobalValue *Src, + GlobalValue::LinkageTypes <, bool &LinkFromSrc, + std::string *Err) { + assert((!Dest || !Src->hasLocalLinkage()) && + "If Src has internal linkage, Dest shouldn't be set!"); + if (!Dest) { + // Linking something to nothing. + LinkFromSrc = true; + LT = Src->getLinkage(); + } else if (Src->isDeclaration()) { + // If Src is external or if both Src & Dest are external.. Just link the + // external globals, we aren't adding anything. + if (Src->hasDLLImportLinkage()) { + // If one of GVs has DLLImport linkage, result should be dllimport'ed. + if (Dest->isDeclaration()) { + LinkFromSrc = true; + LT = Src->getLinkage(); + } + } else if (Dest->hasExternalWeakLinkage()) { + // If the Dest is weak, use the source linkage. + LinkFromSrc = true; + LT = Src->getLinkage(); + } else { + LinkFromSrc = false; + LT = Dest->getLinkage(); + } + } else if (Dest->isDeclaration() && !Dest->hasDLLImportLinkage()) { + // If Dest is external but Src is not: + LinkFromSrc = true; + LT = Src->getLinkage(); + } else if (Src->hasAppendingLinkage() || Dest->hasAppendingLinkage()) { + if (Src->getLinkage() != Dest->getLinkage()) + return Error(Err, "Linking globals named '" + Src->getName() + + "': can only link appending global with another appending global!"); + LinkFromSrc = true; // Special cased. + LT = Src->getLinkage(); + } else if (Src->isWeakForLinker()) { + // At this point we know that Dest has LinkOnce, External*, Weak, Common, + // or DLL* linkage. + if (Dest->hasExternalWeakLinkage() || + Dest->hasAvailableExternallyLinkage() || + (Dest->hasLinkOnceLinkage() && + (Src->hasWeakLinkage() || Src->hasCommonLinkage()))) { + LinkFromSrc = true; + LT = Src->getLinkage(); + } else { + LinkFromSrc = false; + LT = Dest->getLinkage(); + } + } else if (Dest->isWeakForLinker()) { + // At this point we know that Src has External* or DLL* linkage. + if (Src->hasExternalWeakLinkage()) { + LinkFromSrc = false; + LT = Dest->getLinkage(); + } else { + LinkFromSrc = true; + LT = GlobalValue::ExternalLinkage; + } + } else { + assert((Dest->hasExternalLinkage() || + Dest->hasDLLImportLinkage() || + Dest->hasDLLExportLinkage() || + Dest->hasExternalWeakLinkage()) && + (Src->hasExternalLinkage() || + Src->hasDLLImportLinkage() || + Src->hasDLLExportLinkage() || + Src->hasExternalWeakLinkage()) && + "Unexpected linkage type!"); + return Error(Err, "Linking globals named '" + Src->getName() + + "': symbol multiply defined!"); + } + + // Check visibility + if (Dest && Src->getVisibility() != Dest->getVisibility()) + if (!Src->isDeclaration() && !Dest->isDeclaration()) + return Error(Err, "Linking globals named '" + Src->getName() + + "': symbols have different visibilities!"); + return false; +} + +// LinkGlobals - Loop through the global variables in the src module and merge +// them into the dest module. +static bool LinkGlobals(Module *Dest, const Module *Src, + std::map &ValueMap, + std::multimap &AppendingVars, + std::string *Err) { + ValueSymbolTable &DestSymTab = Dest->getValueSymbolTable(); + + // Loop over all of the globals in the src module, mapping them over as we go + for (Module::const_global_iterator I = Src->global_begin(), + E = Src->global_end(); I != E; ++I) { + const GlobalVariable *SGV = I; + GlobalValue *DGV = 0; + + // Check to see if may have to link the global with the global, alias or + // function. + if (SGV->hasName() && !SGV->hasLocalLinkage()) + DGV = cast_or_null(DestSymTab.lookup(SGV->getNameStart(), + SGV->getNameEnd())); + + // If we found a global with the same name in the dest module, but it has + // internal linkage, we are really not doing any linkage here. + if (DGV && DGV->hasLocalLinkage()) + DGV = 0; + + // If types don't agree due to opaque types, try to resolve them. + if (DGV && DGV->getType() != SGV->getType()) + RecursiveResolveTypes(SGV->getType(), DGV->getType()); + + assert((SGV->hasInitializer() || SGV->hasExternalWeakLinkage() || + SGV->hasExternalLinkage() || SGV->hasDLLImportLinkage()) && + "Global must either be external or have an initializer!"); + + GlobalValue::LinkageTypes NewLinkage = GlobalValue::InternalLinkage; + bool LinkFromSrc = false; + if (GetLinkageResult(DGV, SGV, NewLinkage, LinkFromSrc, Err)) + return true; + + if (DGV == 0) { + // No linking to be performed, simply create an identical version of the + // symbol over in the dest module... the initializer will be filled in + // later by LinkGlobalInits. + GlobalVariable *NewDGV = + new GlobalVariable(SGV->getType()->getElementType(), + SGV->isConstant(), SGV->getLinkage(), /*init*/0, + SGV->getName(), Dest, false, + SGV->getType()->getAddressSpace()); + // Propagate alignment, visibility and section info. + CopyGVAttributes(NewDGV, SGV); + + // If the LLVM runtime renamed the global, but it is an externally visible + // symbol, DGV must be an existing global with internal linkage. Rename + // it. + if (!NewDGV->hasLocalLinkage() && NewDGV->getName() != SGV->getName()) + ForceRenaming(NewDGV, SGV->getName()); + + // Make sure to remember this mapping. + ValueMap[SGV] = NewDGV; + + // Keep track that this is an appending variable. + if (SGV->hasAppendingLinkage()) + AppendingVars.insert(std::make_pair(SGV->getName(), NewDGV)); + continue; + } + + // If the visibilities of the symbols disagree and the destination is a + // prototype, take the visibility of its input. + if (DGV->isDeclaration()) + DGV->setVisibility(SGV->getVisibility()); + + if (DGV->hasAppendingLinkage()) { + // No linking is performed yet. Just insert a new copy of the global, and + // keep track of the fact that it is an appending variable in the + // AppendingVars map. The name is cleared out so that no linkage is + // performed. + GlobalVariable *NewDGV = + new GlobalVariable(SGV->getType()->getElementType(), + SGV->isConstant(), SGV->getLinkage(), /*init*/0, + "", Dest, false, + SGV->getType()->getAddressSpace()); + + // Set alignment allowing CopyGVAttributes merge it with alignment of SGV. + NewDGV->setAlignment(DGV->getAlignment()); + // Propagate alignment, section and visibility info. + CopyGVAttributes(NewDGV, SGV); + + // Make sure to remember this mapping... + ValueMap[SGV] = NewDGV; + + // Keep track that this is an appending variable... + AppendingVars.insert(std::make_pair(SGV->getName(), NewDGV)); + continue; + } + + if (LinkFromSrc) { + if (isa(DGV)) + return Error(Err, "Global-Alias Collision on '" + SGV->getName() + + "': symbol multiple defined"); + + // If the types don't match, and if we are to link from the source, nuke + // DGV and create a new one of the appropriate type. Note that the thing + // we are replacing may be a function (if a prototype, weak, etc) or a + // global variable. + GlobalVariable *NewDGV = + new GlobalVariable(SGV->getType()->getElementType(), SGV->isConstant(), + NewLinkage, /*init*/0, DGV->getName(), Dest, false, + SGV->getType()->getAddressSpace()); + + // Propagate alignment, section, and visibility info. + CopyGVAttributes(NewDGV, SGV); + DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewDGV, DGV->getType())); + + // DGV will conflict with NewDGV because they both had the same + // name. We must erase this now so ForceRenaming doesn't assert + // because DGV might not have internal linkage. + if (GlobalVariable *Var = dyn_cast(DGV)) + Var->eraseFromParent(); + else + cast(DGV)->eraseFromParent(); + DGV = NewDGV; + + // If the symbol table renamed the global, but it is an externally visible + // symbol, DGV must be an existing global with internal linkage. Rename. + if (NewDGV->getName() != SGV->getName() && !NewDGV->hasLocalLinkage()) + ForceRenaming(NewDGV, SGV->getName()); + + // Inherit const as appropriate. + NewDGV->setConstant(SGV->isConstant()); + + // Make sure to remember this mapping. + ValueMap[SGV] = NewDGV; + continue; + } + + // Not "link from source", keep the one in the DestModule and remap the + // input onto it. + + // Special case for const propagation. + if (GlobalVariable *DGVar = dyn_cast(DGV)) + if (DGVar->isDeclaration() && SGV->isConstant() && !DGVar->isConstant()) + DGVar->setConstant(true); + + // SGV is global, but DGV is alias. + if (isa(DGV)) { + // The only valid mappings are: + // - SGV is external declaration, which is effectively a no-op. + // - SGV is weak, when we just need to throw SGV out. + if (!SGV->isDeclaration() && !SGV->isWeakForLinker()) + return Error(Err, "Global-Alias Collision on '" + SGV->getName() + + "': symbol multiple defined"); + } + + // Set calculated linkage + DGV->setLinkage(NewLinkage); + + // Make sure to remember this mapping... + ValueMap[SGV] = ConstantExpr::getBitCast(DGV, SGV->getType()); + } + return false; +} + +static GlobalValue::LinkageTypes +CalculateAliasLinkage(const GlobalValue *SGV, const GlobalValue *DGV) { + GlobalValue::LinkageTypes SL = SGV->getLinkage(); + GlobalValue::LinkageTypes DL = DGV->getLinkage(); + if (SL == GlobalValue::ExternalLinkage || DL == GlobalValue::ExternalLinkage) + return GlobalValue::ExternalLinkage; + else if (SL == GlobalValue::WeakAnyLinkage || + DL == GlobalValue::WeakAnyLinkage) + return GlobalValue::WeakAnyLinkage; + else if (SL == GlobalValue::WeakODRLinkage || + DL == GlobalValue::WeakODRLinkage) + return GlobalValue::WeakODRLinkage; + else if (SL == GlobalValue::InternalLinkage && + DL == GlobalValue::InternalLinkage) + return GlobalValue::InternalLinkage; + else { + assert (SL == GlobalValue::PrivateLinkage && + DL == GlobalValue::PrivateLinkage && "Unexpected linkage type"); + return GlobalValue::PrivateLinkage; + } +} + +// LinkAlias - Loop through the alias in the src module and link them into the +// dest module. We're assuming, that all functions/global variables were already +// linked in. +static bool LinkAlias(Module *Dest, const Module *Src, + std::map &ValueMap, + std::string *Err) { + // Loop over all alias in the src module + for (Module::const_alias_iterator I = Src->alias_begin(), + E = Src->alias_end(); I != E; ++I) { + const GlobalAlias *SGA = I; + const GlobalValue *SAliasee = SGA->getAliasedGlobal(); + GlobalAlias *NewGA = NULL; + + // Globals were already linked, thus we can just query ValueMap for variant + // of SAliasee in Dest. + std::map::const_iterator VMI = ValueMap.find(SAliasee); + assert(VMI != ValueMap.end() && "Aliasee not linked"); + GlobalValue* DAliasee = cast(VMI->second); + GlobalValue* DGV = NULL; + + // Try to find something 'similar' to SGA in destination module. + if (!DGV && !SGA->hasLocalLinkage()) { + DGV = Dest->getNamedAlias(SGA->getName()); + + // If types don't agree due to opaque types, try to resolve them. + if (DGV && DGV->getType() != SGA->getType()) + RecursiveResolveTypes(SGA->getType(), DGV->getType()); + } + + if (!DGV && !SGA->hasLocalLinkage()) { + DGV = Dest->getGlobalVariable(SGA->getName()); + + // If types don't agree due to opaque types, try to resolve them. + if (DGV && DGV->getType() != SGA->getType()) + RecursiveResolveTypes(SGA->getType(), DGV->getType()); + } + + if (!DGV && !SGA->hasLocalLinkage()) { + DGV = Dest->getFunction(SGA->getName()); + + // If types don't agree due to opaque types, try to resolve them. + if (DGV && DGV->getType() != SGA->getType()) + RecursiveResolveTypes(SGA->getType(), DGV->getType()); + } + + // No linking to be performed on internal stuff. + if (DGV && DGV->hasLocalLinkage()) + DGV = NULL; + + if (GlobalAlias *DGA = dyn_cast_or_null(DGV)) { + // Types are known to be the same, check whether aliasees equal. As + // globals are already linked we just need query ValueMap to find the + // mapping. + if (DAliasee == DGA->getAliasedGlobal()) { + // This is just two copies of the same alias. Propagate linkage, if + // necessary. + DGA->setLinkage(CalculateAliasLinkage(SGA, DGA)); + + NewGA = DGA; + // Proceed to 'common' steps + } else + return Error(Err, "Alias Collision on '" + SGA->getName()+ + "': aliases have different aliasees"); + } else if (GlobalVariable *DGVar = dyn_cast_or_null(DGV)) { + // The only allowed way is to link alias with external declaration or weak + // symbol.. + if (DGVar->isDeclaration() || DGVar->isWeakForLinker()) { + // But only if aliasee is global too... + if (!isa(DAliasee)) + return Error(Err, "Global-Alias Collision on '" + SGA->getName() + + "': aliasee is not global variable"); + + NewGA = new GlobalAlias(SGA->getType(), SGA->getLinkage(), + SGA->getName(), DAliasee, Dest); + CopyGVAttributes(NewGA, SGA); + + // Any uses of DGV need to change to NewGA, with cast, if needed. + if (SGA->getType() != DGVar->getType()) + DGVar->replaceAllUsesWith(ConstantExpr::getBitCast(NewGA, + DGVar->getType())); + else + DGVar->replaceAllUsesWith(NewGA); + + // DGVar will conflict with NewGA because they both had the same + // name. We must erase this now so ForceRenaming doesn't assert + // because DGV might not have internal linkage. + DGVar->eraseFromParent(); + + // Proceed to 'common' steps + } else + return Error(Err, "Global-Alias Collision on '" + SGA->getName() + + "': symbol multiple defined"); + } else if (Function *DF = dyn_cast_or_null(DGV)) { + // The only allowed way is to link alias with external declaration or weak + // symbol... + if (DF->isDeclaration() || DF->isWeakForLinker()) { + // But only if aliasee is function too... + if (!isa(DAliasee)) + return Error(Err, "Function-Alias Collision on '" + SGA->getName() + + "': aliasee is not function"); + + NewGA = new GlobalAlias(SGA->getType(), SGA->getLinkage(), + SGA->getName(), DAliasee, Dest); + CopyGVAttributes(NewGA, SGA); + + // Any uses of DF need to change to NewGA, with cast, if needed. + if (SGA->getType() != DF->getType()) + DF->replaceAllUsesWith(ConstantExpr::getBitCast(NewGA, + DF->getType())); + else + DF->replaceAllUsesWith(NewGA); + + // DF will conflict with NewGA because they both had the same + // name. We must erase this now so ForceRenaming doesn't assert + // because DF might not have internal linkage. + DF->eraseFromParent(); + + // Proceed to 'common' steps + } else + return Error(Err, "Function-Alias Collision on '" + SGA->getName() + + "': symbol multiple defined"); + } else { + // No linking to be performed, simply create an identical version of the + // alias over in the dest module... + + NewGA = new GlobalAlias(SGA->getType(), SGA->getLinkage(), + SGA->getName(), DAliasee, Dest); + CopyGVAttributes(NewGA, SGA); + + // Proceed to 'common' steps + } + + assert(NewGA && "No alias was created in destination module!"); + + // If the symbol table renamed the alias, but it is an externally visible + // symbol, DGA must be an global value with internal linkage. Rename it. + if (NewGA->getName() != SGA->getName() && + !NewGA->hasLocalLinkage()) + ForceRenaming(NewGA, SGA->getName()); + + // Remember this mapping so uses in the source module get remapped + // later by RemapOperand. + ValueMap[SGA] = NewGA; + } + + return false; +} + + +// LinkGlobalInits - Update the initializers in the Dest module now that all +// globals that may be referenced are in Dest. +static bool LinkGlobalInits(Module *Dest, const Module *Src, + std::map &ValueMap, + std::string *Err) { + // Loop over all of the globals in the src module, mapping them over as we go + for (Module::const_global_iterator I = Src->global_begin(), + E = Src->global_end(); I != E; ++I) { + const GlobalVariable *SGV = I; + + if (SGV->hasInitializer()) { // Only process initialized GV's + // Figure out what the initializer looks like in the dest module... + Constant *SInit = + cast(RemapOperand(SGV->getInitializer(), ValueMap)); + // Grab destination global variable or alias. + GlobalValue *DGV = cast(ValueMap[SGV]->stripPointerCasts()); + + // If dest if global variable, check that initializers match. + if (GlobalVariable *DGVar = dyn_cast(DGV)) { + if (DGVar->hasInitializer()) { + if (SGV->hasExternalLinkage()) { + if (DGVar->getInitializer() != SInit) + return Error(Err, "Global Variable Collision on '" + + SGV->getName() + + "': global variables have different initializers"); + } else if (DGVar->isWeakForLinker()) { + // Nothing is required, mapped values will take the new global + // automatically. + } else if (SGV->isWeakForLinker()) { + // Nothing is required, mapped values will take the new global + // automatically. + } else if (DGVar->hasAppendingLinkage()) { + assert(0 && "Appending linkage unimplemented!"); + } else { + assert(0 && "Unknown linkage!"); + } + } else { + // Copy the initializer over now... + DGVar->setInitializer(SInit); + } + } else { + // Destination is alias, the only valid situation is when source is + // weak. Also, note, that we already checked linkage in LinkGlobals(), + // thus we assert here. + // FIXME: Should we weaken this assumption, 'dereference' alias and + // check for initializer of aliasee? + assert(SGV->isWeakForLinker()); + } + } + } + return false; +} + +// LinkFunctionProtos - Link the functions together between the two modules, +// without doing function bodies... this just adds external function prototypes +// to the Dest function... +// +static bool LinkFunctionProtos(Module *Dest, const Module *Src, + std::map &ValueMap, + std::string *Err) { + ValueSymbolTable &DestSymTab = Dest->getValueSymbolTable(); + + // Loop over all of the functions in the src module, mapping them over + for (Module::const_iterator I = Src->begin(), E = Src->end(); I != E; ++I) { + const Function *SF = I; // SrcFunction + GlobalValue *DGV = 0; + + // Check to see if may have to link the function with the global, alias or + // function. + if (SF->hasName() && !SF->hasLocalLinkage()) + DGV = cast_or_null(DestSymTab.lookup(SF->getNameStart(), + SF->getNameEnd())); + + // If we found a global with the same name in the dest module, but it has + // internal linkage, we are really not doing any linkage here. + if (DGV && DGV->hasLocalLinkage()) + DGV = 0; + + // If types don't agree due to opaque types, try to resolve them. + if (DGV && DGV->getType() != SF->getType()) + RecursiveResolveTypes(SF->getType(), DGV->getType()); + + GlobalValue::LinkageTypes NewLinkage = GlobalValue::InternalLinkage; + bool LinkFromSrc = false; + if (GetLinkageResult(DGV, SF, NewLinkage, LinkFromSrc, Err)) + return true; + + // If there is no linkage to be performed, just bring over SF without + // modifying it. + if (DGV == 0) { + // Function does not already exist, simply insert an function signature + // identical to SF into the dest module. + Function *NewDF = Function::Create(SF->getFunctionType(), + SF->getLinkage(), + SF->getName(), Dest); + CopyGVAttributes(NewDF, SF); + + // If the LLVM runtime renamed the function, but it is an externally + // visible symbol, DF must be an existing function with internal linkage. + // Rename it. + if (!NewDF->hasLocalLinkage() && NewDF->getName() != SF->getName()) + ForceRenaming(NewDF, SF->getName()); + + // ... and remember this mapping... + ValueMap[SF] = NewDF; + continue; + } + + // If the visibilities of the symbols disagree and the destination is a + // prototype, take the visibility of its input. + if (DGV->isDeclaration()) + DGV->setVisibility(SF->getVisibility()); + + if (LinkFromSrc) { + if (isa(DGV)) + return Error(Err, "Function-Alias Collision on '" + SF->getName() + + "': symbol multiple defined"); + + // We have a definition of the same name but different type in the + // source module. Copy the prototype to the destination and replace + // uses of the destination's prototype with the new prototype. + Function *NewDF = Function::Create(SF->getFunctionType(), NewLinkage, + SF->getName(), Dest); + CopyGVAttributes(NewDF, SF); + + // Any uses of DF need to change to NewDF, with cast + DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewDF, DGV->getType())); + + // DF will conflict with NewDF because they both had the same. We must + // erase this now so ForceRenaming doesn't assert because DF might + // not have internal linkage. + if (GlobalVariable *Var = dyn_cast(DGV)) + Var->eraseFromParent(); + else + cast(DGV)->eraseFromParent(); + + // If the symbol table renamed the function, but it is an externally + // visible symbol, DF must be an existing function with internal + // linkage. Rename it. + if (NewDF->getName() != SF->getName() && !NewDF->hasLocalLinkage()) + ForceRenaming(NewDF, SF->getName()); + + // Remember this mapping so uses in the source module get remapped + // later by RemapOperand. + ValueMap[SF] = NewDF; + continue; + } + + // Not "link from source", keep the one in the DestModule and remap the + // input onto it. + + if (isa(DGV)) { + // The only valid mappings are: + // - SF is external declaration, which is effectively a no-op. + // - SF is weak, when we just need to throw SF out. + if (!SF->isDeclaration() && !SF->isWeakForLinker()) + return Error(Err, "Function-Alias Collision on '" + SF->getName() + + "': symbol multiple defined"); + } + + // Set calculated linkage + DGV->setLinkage(NewLinkage); + + // Make sure to remember this mapping. + ValueMap[SF] = ConstantExpr::getBitCast(DGV, SF->getType()); + } + return false; +} + +// LinkFunctionBody - Copy the source function over into the dest function and +// fix up references to values. At this point we know that Dest is an external +// function, and that Src is not. +static bool LinkFunctionBody(Function *Dest, Function *Src, + std::map &ValueMap, + std::string *Err) { + assert(Src && Dest && Dest->isDeclaration() && !Src->isDeclaration()); + + // Go through and convert function arguments over, remembering the mapping. + Function::arg_iterator DI = Dest->arg_begin(); + for (Function::arg_iterator I = Src->arg_begin(), E = Src->arg_end(); + I != E; ++I, ++DI) { + DI->setName(I->getName()); // Copy the name information over... + + // Add a mapping to our local map + ValueMap[I] = DI; + } + + // Splice the body of the source function into the dest function. + Dest->getBasicBlockList().splice(Dest->end(), Src->getBasicBlockList()); + + // At this point, all of the instructions and values of the function are now + // copied over. The only problem is that they are still referencing values in + // the Source function as operands. Loop through all of the operands of the + // functions and patch them up to point to the local versions... + // + for (Function::iterator BB = Dest->begin(), BE = Dest->end(); BB != BE; ++BB) + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) + for (Instruction::op_iterator OI = I->op_begin(), OE = I->op_end(); + OI != OE; ++OI) + if (!isa(*OI) && !isa(*OI)) + *OI = RemapOperand(*OI, ValueMap); + + // There is no need to map the arguments anymore. + for (Function::arg_iterator I = Src->arg_begin(), E = Src->arg_end(); + I != E; ++I) + ValueMap.erase(I); + + return false; +} + + +// LinkFunctionBodies - Link in the function bodies that are defined in the +// source module into the DestModule. This consists basically of copying the +// function over and fixing up references to values. +static bool LinkFunctionBodies(Module *Dest, Module *Src, + std::map &ValueMap, + std::string *Err) { + + // Loop over all of the functions in the src module, mapping them over as we + // go + for (Module::iterator SF = Src->begin(), E = Src->end(); SF != E; ++SF) { + if (!SF->isDeclaration()) { // No body if function is external + Function *DF = dyn_cast(ValueMap[SF]); // Destination function + + // DF not external SF external? + if (DF && DF->isDeclaration()) + // Only provide the function body if there isn't one already. + if (LinkFunctionBody(DF, SF, ValueMap, Err)) + return true; + } + } + return false; +} + +// LinkAppendingVars - If there were any appending global variables, link them +// together now. Return true on error. +static bool LinkAppendingVars(Module *M, + std::multimap &AppendingVars, + std::string *ErrorMsg) { + if (AppendingVars.empty()) return false; // Nothing to do. + + // Loop over the multimap of appending vars, processing any variables with the + // same name, forming a new appending global variable with both of the + // initializers merged together, then rewrite references to the old variables + // and delete them. + std::vector Inits; + while (AppendingVars.size() > 1) { + // Get the first two elements in the map... + std::multimap::iterator Second = AppendingVars.begin(), First=Second++; + + // If the first two elements are for different names, there is no pair... + // Otherwise there is a pair, so link them together... + if (First->first == Second->first) { + GlobalVariable *G1 = First->second, *G2 = Second->second; + const ArrayType *T1 = cast(G1->getType()->getElementType()); + const ArrayType *T2 = cast(G2->getType()->getElementType()); + + // Check to see that they two arrays agree on type... + if (T1->getElementType() != T2->getElementType()) + return Error(ErrorMsg, + "Appending variables with different element types need to be linked!"); + if (G1->isConstant() != G2->isConstant()) + return Error(ErrorMsg, + "Appending variables linked with different const'ness!"); + + if (G1->getAlignment() != G2->getAlignment()) + return Error(ErrorMsg, + "Appending variables with different alignment need to be linked!"); + + if (G1->getVisibility() != G2->getVisibility()) + return Error(ErrorMsg, + "Appending variables with different visibility need to be linked!"); + + if (G1->getSection() != G2->getSection()) + return Error(ErrorMsg, + "Appending variables with different section name need to be linked!"); + + unsigned NewSize = T1->getNumElements() + T2->getNumElements(); + ArrayType *NewType = ArrayType::get(T1->getElementType(), NewSize); + + G1->setName(""); // Clear G1's name in case of a conflict! + + // Create the new global variable... + GlobalVariable *NG = + new GlobalVariable(NewType, G1->isConstant(), G1->getLinkage(), + /*init*/0, First->first, M, G1->isThreadLocal(), + G1->getType()->getAddressSpace()); + + // Propagate alignment, visibility and section info. + CopyGVAttributes(NG, G1); + + // Merge the initializer... + Inits.reserve(NewSize); + if (ConstantArray *I = dyn_cast(G1->getInitializer())) { + for (unsigned i = 0, e = T1->getNumElements(); i != e; ++i) + Inits.push_back(I->getOperand(i)); + } else { + assert(isa(G1->getInitializer())); + Constant *CV = Constant::getNullValue(T1->getElementType()); + for (unsigned i = 0, e = T1->getNumElements(); i != e; ++i) + Inits.push_back(CV); + } + if (ConstantArray *I = dyn_cast(G2->getInitializer())) { + for (unsigned i = 0, e = T2->getNumElements(); i != e; ++i) + Inits.push_back(I->getOperand(i)); + } else { + assert(isa(G2->getInitializer())); + Constant *CV = Constant::getNullValue(T2->getElementType()); + for (unsigned i = 0, e = T2->getNumElements(); i != e; ++i) + Inits.push_back(CV); + } + NG->setInitializer(ConstantArray::get(NewType, Inits)); + Inits.clear(); + + // Replace any uses of the two global variables with uses of the new + // global... + + // FIXME: This should rewrite simple/straight-forward uses such as + // getelementptr instructions to not use the Cast! + G1->replaceAllUsesWith(ConstantExpr::getBitCast(NG, G1->getType())); + G2->replaceAllUsesWith(ConstantExpr::getBitCast(NG, G2->getType())); + + // Remove the two globals from the module now... + M->getGlobalList().erase(G1); + M->getGlobalList().erase(G2); + + // Put the new global into the AppendingVars map so that we can handle + // linking of more than two vars... + Second->second = NG; + } + AppendingVars.erase(First); + } + + return false; +} + +static bool ResolveAliases(Module *Dest) { + for (Module::alias_iterator I = Dest->alias_begin(), E = Dest->alias_end(); + I != E; ++I) + if (const GlobalValue *GV = I->resolveAliasedGlobal()) + if (GV != I && !GV->isDeclaration()) + I->replaceAllUsesWith(const_cast(GV)); + + return false; +} + +// LinkModules - This function links two modules together, with the resulting +// left module modified to be the composite of the two input modules. If an +// error occurs, true is returned and ErrorMsg (if not null) is set to indicate +// the problem. Upon failure, the Dest module could be in a modified state, and +// shouldn't be relied on to be consistent. +bool +Linker::LinkModules(Module *Dest, Module *Src, std::string *ErrorMsg) { + assert(Dest != 0 && "Invalid Destination module"); + assert(Src != 0 && "Invalid Source Module"); + + if (Dest->getDataLayout().empty()) { + if (!Src->getDataLayout().empty()) { + Dest->setDataLayout(Src->getDataLayout()); + } else { + std::string DataLayout; + + if (Dest->getEndianness() == Module::AnyEndianness) { + if (Src->getEndianness() == Module::BigEndian) + DataLayout.append("E"); + else if (Src->getEndianness() == Module::LittleEndian) + DataLayout.append("e"); + } + + if (Dest->getPointerSize() == Module::AnyPointerSize) { + if (Src->getPointerSize() == Module::Pointer64) + DataLayout.append(DataLayout.length() == 0 ? "p:64:64" : "-p:64:64"); + else if (Src->getPointerSize() == Module::Pointer32) + DataLayout.append(DataLayout.length() == 0 ? "p:32:32" : "-p:32:32"); + } + Dest->setDataLayout(DataLayout); + } + } + + // Copy the target triple from the source to dest if the dest's is empty. + if (Dest->getTargetTriple().empty() && !Src->getTargetTriple().empty()) + Dest->setTargetTriple(Src->getTargetTriple()); + + if (!Src->getDataLayout().empty() && !Dest->getDataLayout().empty() && + Src->getDataLayout() != Dest->getDataLayout()) + cerr << "WARNING: Linking two modules of different data layouts!\n"; + if (!Src->getTargetTriple().empty() && + Dest->getTargetTriple() != Src->getTargetTriple()) + cerr << "WARNING: Linking two modules of different target triples!\n"; + + // Append the module inline asm string. + if (!Src->getModuleInlineAsm().empty()) { + if (Dest->getModuleInlineAsm().empty()) + Dest->setModuleInlineAsm(Src->getModuleInlineAsm()); + else + Dest->setModuleInlineAsm(Dest->getModuleInlineAsm()+"\n"+ + Src->getModuleInlineAsm()); + } + + // Update the destination module's dependent libraries list with the libraries + // from the source module. There's no opportunity for duplicates here as the + // Module ensures that duplicate insertions are discarded. + for (Module::lib_iterator SI = Src->lib_begin(), SE = Src->lib_end(); + SI != SE; ++SI) + Dest->addLibrary(*SI); + + // LinkTypes - Go through the symbol table of the Src module and see if any + // types are named in the src module that are not named in the Dst module. + // Make sure there are no type name conflicts. + if (LinkTypes(Dest, Src, ErrorMsg)) + return true; + + // ValueMap - Mapping of values from what they used to be in Src, to what they + // are now in Dest. + std::map ValueMap; + + // AppendingVars - Keep track of global variables in the destination module + // with appending linkage. After the module is linked together, they are + // appended and the module is rewritten. + std::multimap AppendingVars; + for (Module::global_iterator I = Dest->global_begin(), E = Dest->global_end(); + I != E; ++I) { + // Add all of the appending globals already in the Dest module to + // AppendingVars. + if (I->hasAppendingLinkage()) + AppendingVars.insert(std::make_pair(I->getName(), I)); + } + + // Insert all of the globals in src into the Dest module... without linking + // initializers (which could refer to functions not yet mapped over). + if (LinkGlobals(Dest, Src, ValueMap, AppendingVars, ErrorMsg)) + return true; + + // Link the functions together between the two modules, without doing function + // bodies... this just adds external function prototypes to the Dest + // function... We do this so that when we begin processing function bodies, + // all of the global values that may be referenced are available in our + // ValueMap. + if (LinkFunctionProtos(Dest, Src, ValueMap, ErrorMsg)) + return true; + + // If there were any alias, link them now. We really need to do this now, + // because all of the aliases that may be referenced need to be available in + // ValueMap + if (LinkAlias(Dest, Src, ValueMap, ErrorMsg)) return true; + + // Update the initializers in the Dest module now that all globals that may + // be referenced are in Dest. + if (LinkGlobalInits(Dest, Src, ValueMap, ErrorMsg)) return true; + + // Link in the function bodies that are defined in the source module into the + // DestModule. This consists basically of copying the function over and + // fixing up references to values. + if (LinkFunctionBodies(Dest, Src, ValueMap, ErrorMsg)) return true; + + // If there were any appending global variables, link them together now. + if (LinkAppendingVars(Dest, AppendingVars, ErrorMsg)) return true; + + // Resolve all uses of aliases with aliasees + if (ResolveAliases(Dest)) return true; + + // If the source library's module id is in the dependent library list of the + // destination library, remove it since that module is now linked in. + sys::Path modId; + modId.set(Src->getModuleIdentifier()); + if (!modId.isEmpty()) + Dest->removeLibrary(modId.getBasename()); + + return false; +} + +// vim: sw=2 diff --git a/lib/Linker/Linker.cpp b/lib/Linker/Linker.cpp new file mode 100644 index 000000000000..d6737721d25f --- /dev/null +++ b/lib/Linker/Linker.cpp @@ -0,0 +1,178 @@ +//===- lib/Linker/Linker.cpp - Basic Linker functionality ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains basic Linker functionality that all usages will need. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Linker.h" +#include "llvm/Module.h" +#include "llvm/Bitcode/ReaderWriter.h" +#include "llvm/Config/config.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Streams.h" +using namespace llvm; + +Linker::Linker(const std::string& progname, const std::string& modname, + unsigned flags) + : Composite(0) + , LibPaths() + , Flags(flags) + , Error() + , ProgramName(progname) +{ + Composite = new Module(modname); +} + +Linker::Linker(const std::string& progname, Module* aModule, unsigned flags) + : Composite(aModule) + , LibPaths() + , Flags(flags) + , Error() + , ProgramName(progname) +{ +} + +Linker::~Linker() { + delete Composite; +} + +bool +Linker::error(const std::string& message) { + Error = message; + if (!(Flags&QuietErrors)) + cerr << ProgramName << ": error: " << message << "\n"; + return true; +} + +bool +Linker::warning(const std::string& message) { + Error = message; + if (!(Flags&QuietWarnings)) + cerr << ProgramName << ": warning: " << message << "\n"; + return false; +} + +void +Linker::verbose(const std::string& message) { + if (Flags&Verbose) + cerr << " " << message << "\n"; +} + +void +Linker::addPath(const sys::Path& path) { + LibPaths.push_back(path); +} + +void +Linker::addPaths(const std::vector& paths) { + for (unsigned i = 0; i != paths.size(); ++i) { + sys::Path aPath; + aPath.set(paths[i]); + LibPaths.push_back(aPath); + } +} + +void +Linker::addSystemPaths() { + sys::Path::GetBitcodeLibraryPaths(LibPaths); + LibPaths.insert(LibPaths.begin(),sys::Path("./")); +} + +Module* +Linker::releaseModule() { + Module* result = Composite; + LibPaths.clear(); + Error.clear(); + Composite = 0; + Flags = 0; + return result; +} + +// LoadObject - Read in and parse the bitcode file named by FN and return the +// module it contains (wrapped in an auto_ptr), or auto_ptr() and set +// Error if an error occurs. +std::auto_ptr +Linker::LoadObject(const sys::Path &FN) { + std::string ParseErrorMessage; + Module *Result = 0; + + const std::string &FNS = FN.toString(); + std::auto_ptr Buffer(MemoryBuffer::getFileOrSTDIN(FNS.c_str())); + if (Buffer.get()) + Result = ParseBitcodeFile(Buffer.get(), &ParseErrorMessage); + else + ParseErrorMessage = "Error reading file '" + FNS + "'"; + + if (Result) + return std::auto_ptr(Result); + Error = "Bitcode file '" + FN.toString() + "' could not be loaded"; + if (ParseErrorMessage.size()) + Error += ": " + ParseErrorMessage; + return std::auto_ptr(); +} + +// IsLibrary - Determine if "Name" is a library in "Directory". Return +// a non-empty sys::Path if its found, an empty one otherwise. +static inline sys::Path IsLibrary(const std::string& Name, + const sys::Path& Directory) { + + sys::Path FullPath(Directory); + + // Try the libX.a form + FullPath.appendComponent("lib" + Name); + FullPath.appendSuffix("a"); + if (FullPath.isArchive()) + return FullPath; + + // Try the libX.bca form + FullPath.eraseSuffix(); + FullPath.appendSuffix("bca"); + if (FullPath.isArchive()) + return FullPath; + + // Try the libX.so (or .dylib) form + FullPath.eraseSuffix(); + FullPath.appendSuffix(&(LTDL_SHLIB_EXT[1])); + if (FullPath.isDynamicLibrary()) // Native shared library? + return FullPath; + if (FullPath.isBitcodeFile()) // .so file containing bitcode? + return FullPath; + + // Not found .. fall through + + // Indicate that the library was not found in the directory. + FullPath.clear(); + return FullPath; +} + +/// FindLib - Try to convert Filename into the name of a file that we can open, +/// if it does not already name a file we can open, by first trying to open +/// Filename, then libFilename.[suffix] for each of a set of several common +/// library suffixes, in each of the directories in LibPaths. Returns an empty +/// Path if no matching file can be found. +/// +sys::Path +Linker::FindLib(const std::string &Filename) { + // Determine if the pathname can be found as it stands. + sys::Path FilePath(Filename); + if (FilePath.canRead() && + (FilePath.isArchive() || FilePath.isDynamicLibrary())) + return FilePath; + + // Iterate over the directories in Paths to see if we can find the library + // there. + for (unsigned Index = 0; Index != LibPaths.size(); ++Index) { + sys::Path Directory(LibPaths[Index]); + sys::Path FullPath = IsLibrary(Filename,Directory); + if (!FullPath.isEmpty()) + return FullPath; + } + return sys::Path(); +} diff --git a/lib/Linker/Makefile b/lib/Linker/Makefile new file mode 100644 index 000000000000..19e646b74830 --- /dev/null +++ b/lib/Linker/Makefile @@ -0,0 +1,15 @@ +##===- lib/Linker/Makefile ---------------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../.. +LIBRARYNAME = LLVMLinker +BUILD_ARCHIVE := 1 + +include $(LEVEL)/Makefile.common + diff --git a/lib/Makefile b/lib/Makefile new file mode 100644 index 000000000000..8dd67d9957de --- /dev/null +++ b/lib/Makefile @@ -0,0 +1,15 @@ +##===- lib/Makefile ----------------------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = .. + +PARALLEL_DIRS = VMCore AsmParser Bitcode Archive Analysis Transforms CodeGen \ + Target ExecutionEngine Debugger Linker CompilerDriver + +include $(LEVEL)/Makefile.common + diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp new file mode 100644 index 000000000000..3b03c54e9764 --- /dev/null +++ b/lib/Support/APFloat.cpp @@ -0,0 +1,2950 @@ +//===-- APFloat.cpp - Implement APFloat class -----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a class to represent arbitrary precision floating +// point values and provide a variety of arithmetic operations on them. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/FoldingSet.h" +#include "llvm/Support/MathExtras.h" +#include + +using namespace llvm; + +#define convolve(lhs, rhs) ((lhs) * 4 + (rhs)) + +/* Assumed in hexadecimal significand parsing, and conversion to + hexadecimal strings. */ +#define COMPILE_TIME_ASSERT(cond) extern int CTAssert[(cond) ? 1 : -1] +COMPILE_TIME_ASSERT(integerPartWidth % 4 == 0); + +namespace llvm { + + /* Represents floating point arithmetic semantics. */ + struct fltSemantics { + /* The largest E such that 2^E is representable; this matches the + definition of IEEE 754. */ + exponent_t maxExponent; + + /* The smallest E such that 2^E is a normalized number; this + matches the definition of IEEE 754. */ + exponent_t minExponent; + + /* Number of bits in the significand. This includes the integer + bit. */ + unsigned int precision; + + /* True if arithmetic is supported. */ + unsigned int arithmeticOK; + }; + + const fltSemantics APFloat::IEEEsingle = { 127, -126, 24, true }; + const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53, true }; + const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113, true }; + const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64, true }; + const fltSemantics APFloat::Bogus = { 0, 0, 0, true }; + + // The PowerPC format consists of two doubles. It does not map cleanly + // onto the usual format above. For now only storage of constants of + // this type is supported, no arithmetic. + const fltSemantics APFloat::PPCDoubleDouble = { 1023, -1022, 106, false }; + + /* A tight upper bound on number of parts required to hold the value + pow(5, power) is + + power * 815 / (351 * integerPartWidth) + 1 + + However, whilst the result may require only this many parts, + because we are multiplying two values to get it, the + multiplication may require an extra part with the excess part + being zero (consider the trivial case of 1 * 1, tcFullMultiply + requires two parts to hold the single-part result). So we add an + extra one to guarantee enough space whilst multiplying. */ + const unsigned int maxExponent = 16383; + const unsigned int maxPrecision = 113; + const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1; + const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815) + / (351 * integerPartWidth)); +} + +/* A bunch of private, handy routines. */ + +static inline unsigned int +partCountForBits(unsigned int bits) +{ + return ((bits) + integerPartWidth - 1) / integerPartWidth; +} + +/* Returns 0U-9U. Return values >= 10U are not digits. */ +static inline unsigned int +decDigitValue(unsigned int c) +{ + return c - '0'; +} + +static unsigned int +hexDigitValue(unsigned int c) +{ + unsigned int r; + + r = c - '0'; + if(r <= 9) + return r; + + r = c - 'A'; + if(r <= 5) + return r + 10; + + r = c - 'a'; + if(r <= 5) + return r + 10; + + return -1U; +} + +static inline void +assertArithmeticOK(const llvm::fltSemantics &semantics) { + assert(semantics.arithmeticOK + && "Compile-time arithmetic does not support these semantics"); +} + +/* Return the value of a decimal exponent of the form + [+-]ddddddd. + + If the exponent overflows, returns a large exponent with the + appropriate sign. */ +static int +readExponent(const char *p) +{ + bool isNegative; + unsigned int absExponent; + const unsigned int overlargeExponent = 24000; /* FIXME. */ + + isNegative = (*p == '-'); + if (*p == '-' || *p == '+') + p++; + + absExponent = decDigitValue(*p++); + assert (absExponent < 10U); + + for (;;) { + unsigned int value; + + value = decDigitValue(*p); + if (value >= 10U) + break; + + p++; + value += absExponent * 10; + if (absExponent >= overlargeExponent) { + absExponent = overlargeExponent; + break; + } + absExponent = value; + } + + if (isNegative) + return -(int) absExponent; + else + return (int) absExponent; +} + +/* This is ugly and needs cleaning up, but I don't immediately see + how whilst remaining safe. */ +static int +totalExponent(const char *p, int exponentAdjustment) +{ + int unsignedExponent; + bool negative, overflow; + int exponent; + + /* Move past the exponent letter and sign to the digits. */ + p++; + negative = *p == '-'; + if(*p == '-' || *p == '+') + p++; + + unsignedExponent = 0; + overflow = false; + for(;;) { + unsigned int value; + + value = decDigitValue(*p); + if(value >= 10U) + break; + + p++; + unsignedExponent = unsignedExponent * 10 + value; + if(unsignedExponent > 65535) + overflow = true; + } + + if(exponentAdjustment > 65535 || exponentAdjustment < -65536) + overflow = true; + + if(!overflow) { + exponent = unsignedExponent; + if(negative) + exponent = -exponent; + exponent += exponentAdjustment; + if(exponent > 65535 || exponent < -65536) + overflow = true; + } + + if(overflow) + exponent = negative ? -65536: 65535; + + return exponent; +} + +static const char * +skipLeadingZeroesAndAnyDot(const char *p, const char **dot) +{ + *dot = 0; + while(*p == '0') + p++; + + if(*p == '.') { + *dot = p++; + while(*p == '0') + p++; + } + + return p; +} + +/* Given a normal decimal floating point number of the form + + dddd.dddd[eE][+-]ddd + + where the decimal point and exponent are optional, fill out the + structure D. Exponent is appropriate if the significand is + treated as an integer, and normalizedExponent if the significand + is taken to have the decimal point after a single leading + non-zero digit. + + If the value is zero, V->firstSigDigit points to a non-digit, and + the return exponent is zero. +*/ +struct decimalInfo { + const char *firstSigDigit; + const char *lastSigDigit; + int exponent; + int normalizedExponent; +}; + +static void +interpretDecimal(const char *p, decimalInfo *D) +{ + const char *dot; + + p = skipLeadingZeroesAndAnyDot (p, &dot); + + D->firstSigDigit = p; + D->exponent = 0; + D->normalizedExponent = 0; + + for (;;) { + if (*p == '.') { + assert(dot == 0); + dot = p++; + } + if (decDigitValue(*p) >= 10U) + break; + p++; + } + + /* If number is all zerooes accept any exponent. */ + if (p != D->firstSigDigit) { + if (*p == 'e' || *p == 'E') + D->exponent = readExponent(p + 1); + + /* Implied decimal point? */ + if (!dot) + dot = p; + + /* Drop insignificant trailing zeroes. */ + do + do + p--; + while (*p == '0'); + while (*p == '.'); + + /* Adjust the exponents for any decimal point. */ + D->exponent += static_cast((dot - p) - (dot > p)); + D->normalizedExponent = (D->exponent + + static_cast((p - D->firstSigDigit) + - (dot > D->firstSigDigit && dot < p))); + } + + D->lastSigDigit = p; +} + +/* Return the trailing fraction of a hexadecimal number. + DIGITVALUE is the first hex digit of the fraction, P points to + the next digit. */ +static lostFraction +trailingHexadecimalFraction(const char *p, unsigned int digitValue) +{ + unsigned int hexDigit; + + /* If the first trailing digit isn't 0 or 8 we can work out the + fraction immediately. */ + if(digitValue > 8) + return lfMoreThanHalf; + else if(digitValue < 8 && digitValue > 0) + return lfLessThanHalf; + + /* Otherwise we need to find the first non-zero digit. */ + while(*p == '0') + p++; + + hexDigit = hexDigitValue(*p); + + /* If we ran off the end it is exactly zero or one-half, otherwise + a little more. */ + if(hexDigit == -1U) + return digitValue == 0 ? lfExactlyZero: lfExactlyHalf; + else + return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf; +} + +/* Return the fraction lost were a bignum truncated losing the least + significant BITS bits. */ +static lostFraction +lostFractionThroughTruncation(const integerPart *parts, + unsigned int partCount, + unsigned int bits) +{ + unsigned int lsb; + + lsb = APInt::tcLSB(parts, partCount); + + /* Note this is guaranteed true if bits == 0, or LSB == -1U. */ + if(bits <= lsb) + return lfExactlyZero; + if(bits == lsb + 1) + return lfExactlyHalf; + if(bits <= partCount * integerPartWidth + && APInt::tcExtractBit(parts, bits - 1)) + return lfMoreThanHalf; + + return lfLessThanHalf; +} + +/* Shift DST right BITS bits noting lost fraction. */ +static lostFraction +shiftRight(integerPart *dst, unsigned int parts, unsigned int bits) +{ + lostFraction lost_fraction; + + lost_fraction = lostFractionThroughTruncation(dst, parts, bits); + + APInt::tcShiftRight(dst, parts, bits); + + return lost_fraction; +} + +/* Combine the effect of two lost fractions. */ +static lostFraction +combineLostFractions(lostFraction moreSignificant, + lostFraction lessSignificant) +{ + if(lessSignificant != lfExactlyZero) { + if(moreSignificant == lfExactlyZero) + moreSignificant = lfLessThanHalf; + else if(moreSignificant == lfExactlyHalf) + moreSignificant = lfMoreThanHalf; + } + + return moreSignificant; +} + +/* The error from the true value, in half-ulps, on multiplying two + floating point numbers, which differ from the value they + approximate by at most HUE1 and HUE2 half-ulps, is strictly less + than the returned value. + + See "How to Read Floating Point Numbers Accurately" by William D + Clinger. */ +static unsigned int +HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2) +{ + assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8)); + + if (HUerr1 + HUerr2 == 0) + return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */ + else + return inexactMultiply + 2 * (HUerr1 + HUerr2); +} + +/* The number of ulps from the boundary (zero, or half if ISNEAREST) + when the least significant BITS are truncated. BITS cannot be + zero. */ +static integerPart +ulpsFromBoundary(const integerPart *parts, unsigned int bits, bool isNearest) +{ + unsigned int count, partBits; + integerPart part, boundary; + + assert (bits != 0); + + bits--; + count = bits / integerPartWidth; + partBits = bits % integerPartWidth + 1; + + part = parts[count] & (~(integerPart) 0 >> (integerPartWidth - partBits)); + + if (isNearest) + boundary = (integerPart) 1 << (partBits - 1); + else + boundary = 0; + + if (count == 0) { + if (part - boundary <= boundary - part) + return part - boundary; + else + return boundary - part; + } + + if (part == boundary) { + while (--count) + if (parts[count]) + return ~(integerPart) 0; /* A lot. */ + + return parts[0]; + } else if (part == boundary - 1) { + while (--count) + if (~parts[count]) + return ~(integerPart) 0; /* A lot. */ + + return -parts[0]; + } + + return ~(integerPart) 0; /* A lot. */ +} + +/* Place pow(5, power) in DST, and return the number of parts used. + DST must be at least one part larger than size of the answer. */ +static unsigned int +powerOf5(integerPart *dst, unsigned int power) +{ + static const integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, + 15625, 78125 }; + integerPart pow5s[maxPowerOfFiveParts * 2 + 5]; + pow5s[0] = 78125 * 5; + + unsigned int partsCount[16] = { 1 }; + integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5; + unsigned int result; + assert(power <= maxExponent); + + p1 = dst; + p2 = scratch; + + *p1 = firstEightPowers[power & 7]; + power >>= 3; + + result = 1; + pow5 = pow5s; + + for (unsigned int n = 0; power; power >>= 1, n++) { + unsigned int pc; + + pc = partsCount[n]; + + /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */ + if (pc == 0) { + pc = partsCount[n - 1]; + APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc); + pc *= 2; + if (pow5[pc - 1] == 0) + pc--; + partsCount[n] = pc; + } + + if (power & 1) { + integerPart *tmp; + + APInt::tcFullMultiply(p2, p1, pow5, result, pc); + result += pc; + if (p2[result - 1] == 0) + result--; + + /* Now result is in p1 with partsCount parts and p2 is scratch + space. */ + tmp = p1, p1 = p2, p2 = tmp; + } + + pow5 += pc; + } + + if (p1 != dst) + APInt::tcAssign(dst, p1, result); + + return result; +} + +/* Zero at the end to avoid modular arithmetic when adding one; used + when rounding up during hexadecimal output. */ +static const char hexDigitsLower[] = "0123456789abcdef0"; +static const char hexDigitsUpper[] = "0123456789ABCDEF0"; +static const char infinityL[] = "infinity"; +static const char infinityU[] = "INFINITY"; +static const char NaNL[] = "nan"; +static const char NaNU[] = "NAN"; + +/* Write out an integerPart in hexadecimal, starting with the most + significant nibble. Write out exactly COUNT hexdigits, return + COUNT. */ +static unsigned int +partAsHex (char *dst, integerPart part, unsigned int count, + const char *hexDigitChars) +{ + unsigned int result = count; + + assert (count != 0 && count <= integerPartWidth / 4); + + part >>= (integerPartWidth - 4 * count); + while (count--) { + dst[count] = hexDigitChars[part & 0xf]; + part >>= 4; + } + + return result; +} + +/* Write out an unsigned decimal integer. */ +static char * +writeUnsignedDecimal (char *dst, unsigned int n) +{ + char buff[40], *p; + + p = buff; + do + *p++ = '0' + n % 10; + while (n /= 10); + + do + *dst++ = *--p; + while (p != buff); + + return dst; +} + +/* Write out a signed decimal integer. */ +static char * +writeSignedDecimal (char *dst, int value) +{ + if (value < 0) { + *dst++ = '-'; + dst = writeUnsignedDecimal(dst, -(unsigned) value); + } else + dst = writeUnsignedDecimal(dst, value); + + return dst; +} + +/* Constructors. */ +void +APFloat::initialize(const fltSemantics *ourSemantics) +{ + unsigned int count; + + semantics = ourSemantics; + count = partCount(); + if(count > 1) + significand.parts = new integerPart[count]; +} + +void +APFloat::freeSignificand() +{ + if(partCount() > 1) + delete [] significand.parts; +} + +void +APFloat::assign(const APFloat &rhs) +{ + assert(semantics == rhs.semantics); + + sign = rhs.sign; + category = rhs.category; + exponent = rhs.exponent; + sign2 = rhs.sign2; + exponent2 = rhs.exponent2; + if(category == fcNormal || category == fcNaN) + copySignificand(rhs); +} + +void +APFloat::copySignificand(const APFloat &rhs) +{ + assert(category == fcNormal || category == fcNaN); + assert(rhs.partCount() >= partCount()); + + APInt::tcAssign(significandParts(), rhs.significandParts(), + partCount()); +} + +/* Make this number a NaN, with an arbitrary but deterministic value + for the significand. If double or longer, this is a signalling NaN, + which may not be ideal. If float, this is QNaN(0). */ +void +APFloat::makeNaN(unsigned type) +{ + category = fcNaN; + // FIXME: Add double and long double support for QNaN(0). + if (semantics->precision == 24 && semantics->maxExponent == 127) { + type |= 0x7fc00000U; + type &= ~0x80000000U; + } else + type = ~0U; + APInt::tcSet(significandParts(), type, partCount()); +} + +APFloat & +APFloat::operator=(const APFloat &rhs) +{ + if(this != &rhs) { + if(semantics != rhs.semantics) { + freeSignificand(); + initialize(rhs.semantics); + } + assign(rhs); + } + + return *this; +} + +bool +APFloat::bitwiseIsEqual(const APFloat &rhs) const { + if (this == &rhs) + return true; + if (semantics != rhs.semantics || + category != rhs.category || + sign != rhs.sign) + return false; + if (semantics==(const llvm::fltSemantics*)&PPCDoubleDouble && + sign2 != rhs.sign2) + return false; + if (category==fcZero || category==fcInfinity) + return true; + else if (category==fcNormal && exponent!=rhs.exponent) + return false; + else if (semantics==(const llvm::fltSemantics*)&PPCDoubleDouble && + exponent2!=rhs.exponent2) + return false; + else { + int i= partCount(); + const integerPart* p=significandParts(); + const integerPart* q=rhs.significandParts(); + for (; i>0; i--, p++, q++) { + if (*p != *q) + return false; + } + return true; + } +} + +APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value) +{ + assertArithmeticOK(ourSemantics); + initialize(&ourSemantics); + sign = 0; + zeroSignificand(); + exponent = ourSemantics.precision - 1; + significandParts()[0] = value; + normalize(rmNearestTiesToEven, lfExactlyZero); +} + +APFloat::APFloat(const fltSemantics &ourSemantics, + fltCategory ourCategory, bool negative, unsigned type) +{ + assertArithmeticOK(ourSemantics); + initialize(&ourSemantics); + category = ourCategory; + sign = negative; + if (category == fcNormal) + category = fcZero; + else if (ourCategory == fcNaN) + makeNaN(type); +} + +APFloat::APFloat(const fltSemantics &ourSemantics, const char *text) +{ + assertArithmeticOK(ourSemantics); + initialize(&ourSemantics); + convertFromString(text, rmNearestTiesToEven); +} + +APFloat::APFloat(const APFloat &rhs) +{ + initialize(rhs.semantics); + assign(rhs); +} + +APFloat::~APFloat() +{ + freeSignificand(); +} + +// Profile - This method 'profiles' an APFloat for use with FoldingSet. +void APFloat::Profile(FoldingSetNodeID& ID) const { + ID.Add(bitcastToAPInt()); +} + +unsigned int +APFloat::partCount() const +{ + return partCountForBits(semantics->precision + 1); +} + +unsigned int +APFloat::semanticsPrecision(const fltSemantics &semantics) +{ + return semantics.precision; +} + +const integerPart * +APFloat::significandParts() const +{ + return const_cast(this)->significandParts(); +} + +integerPart * +APFloat::significandParts() +{ + assert(category == fcNormal || category == fcNaN); + + if(partCount() > 1) + return significand.parts; + else + return &significand.part; +} + +void +APFloat::zeroSignificand() +{ + category = fcNormal; + APInt::tcSet(significandParts(), 0, partCount()); +} + +/* Increment an fcNormal floating point number's significand. */ +void +APFloat::incrementSignificand() +{ + integerPart carry; + + carry = APInt::tcIncrement(significandParts(), partCount()); + + /* Our callers should never cause us to overflow. */ + assert(carry == 0); +} + +/* Add the significand of the RHS. Returns the carry flag. */ +integerPart +APFloat::addSignificand(const APFloat &rhs) +{ + integerPart *parts; + + parts = significandParts(); + + assert(semantics == rhs.semantics); + assert(exponent == rhs.exponent); + + return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount()); +} + +/* Subtract the significand of the RHS with a borrow flag. Returns + the borrow flag. */ +integerPart +APFloat::subtractSignificand(const APFloat &rhs, integerPart borrow) +{ + integerPart *parts; + + parts = significandParts(); + + assert(semantics == rhs.semantics); + assert(exponent == rhs.exponent); + + return APInt::tcSubtract(parts, rhs.significandParts(), borrow, + partCount()); +} + +/* Multiply the significand of the RHS. If ADDEND is non-NULL, add it + on to the full-precision result of the multiplication. Returns the + lost fraction. */ +lostFraction +APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend) +{ + unsigned int omsb; // One, not zero, based MSB. + unsigned int partsCount, newPartsCount, precision; + integerPart *lhsSignificand; + integerPart scratch[4]; + integerPart *fullSignificand; + lostFraction lost_fraction; + bool ignored; + + assert(semantics == rhs.semantics); + + precision = semantics->precision; + newPartsCount = partCountForBits(precision * 2); + + if(newPartsCount > 4) + fullSignificand = new integerPart[newPartsCount]; + else + fullSignificand = scratch; + + lhsSignificand = significandParts(); + partsCount = partCount(); + + APInt::tcFullMultiply(fullSignificand, lhsSignificand, + rhs.significandParts(), partsCount, partsCount); + + lost_fraction = lfExactlyZero; + omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1; + exponent += rhs.exponent; + + if(addend) { + Significand savedSignificand = significand; + const fltSemantics *savedSemantics = semantics; + fltSemantics extendedSemantics; + opStatus status; + unsigned int extendedPrecision; + + /* Normalize our MSB. */ + extendedPrecision = precision + precision - 1; + if(omsb != extendedPrecision) + { + APInt::tcShiftLeft(fullSignificand, newPartsCount, + extendedPrecision - omsb); + exponent -= extendedPrecision - omsb; + } + + /* Create new semantics. */ + extendedSemantics = *semantics; + extendedSemantics.precision = extendedPrecision; + + if(newPartsCount == 1) + significand.part = fullSignificand[0]; + else + significand.parts = fullSignificand; + semantics = &extendedSemantics; + + APFloat extendedAddend(*addend); + status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored); + assert(status == opOK); + lost_fraction = addOrSubtractSignificand(extendedAddend, false); + + /* Restore our state. */ + if(newPartsCount == 1) + fullSignificand[0] = significand.part; + significand = savedSignificand; + semantics = savedSemantics; + + omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1; + } + + exponent -= (precision - 1); + + if(omsb > precision) { + unsigned int bits, significantParts; + lostFraction lf; + + bits = omsb - precision; + significantParts = partCountForBits(omsb); + lf = shiftRight(fullSignificand, significantParts, bits); + lost_fraction = combineLostFractions(lf, lost_fraction); + exponent += bits; + } + + APInt::tcAssign(lhsSignificand, fullSignificand, partsCount); + + if(newPartsCount > 4) + delete [] fullSignificand; + + return lost_fraction; +} + +/* Multiply the significands of LHS and RHS to DST. */ +lostFraction +APFloat::divideSignificand(const APFloat &rhs) +{ + unsigned int bit, i, partsCount; + const integerPart *rhsSignificand; + integerPart *lhsSignificand, *dividend, *divisor; + integerPart scratch[4]; + lostFraction lost_fraction; + + assert(semantics == rhs.semantics); + + lhsSignificand = significandParts(); + rhsSignificand = rhs.significandParts(); + partsCount = partCount(); + + if(partsCount > 2) + dividend = new integerPart[partsCount * 2]; + else + dividend = scratch; + + divisor = dividend + partsCount; + + /* Copy the dividend and divisor as they will be modified in-place. */ + for(i = 0; i < partsCount; i++) { + dividend[i] = lhsSignificand[i]; + divisor[i] = rhsSignificand[i]; + lhsSignificand[i] = 0; + } + + exponent -= rhs.exponent; + + unsigned int precision = semantics->precision; + + /* Normalize the divisor. */ + bit = precision - APInt::tcMSB(divisor, partsCount) - 1; + if(bit) { + exponent += bit; + APInt::tcShiftLeft(divisor, partsCount, bit); + } + + /* Normalize the dividend. */ + bit = precision - APInt::tcMSB(dividend, partsCount) - 1; + if(bit) { + exponent -= bit; + APInt::tcShiftLeft(dividend, partsCount, bit); + } + + /* Ensure the dividend >= divisor initially for the loop below. + Incidentally, this means that the division loop below is + guaranteed to set the integer bit to one. */ + if(APInt::tcCompare(dividend, divisor, partsCount) < 0) { + exponent--; + APInt::tcShiftLeft(dividend, partsCount, 1); + assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0); + } + + /* Long division. */ + for(bit = precision; bit; bit -= 1) { + if(APInt::tcCompare(dividend, divisor, partsCount) >= 0) { + APInt::tcSubtract(dividend, divisor, 0, partsCount); + APInt::tcSetBit(lhsSignificand, bit - 1); + } + + APInt::tcShiftLeft(dividend, partsCount, 1); + } + + /* Figure out the lost fraction. */ + int cmp = APInt::tcCompare(dividend, divisor, partsCount); + + if(cmp > 0) + lost_fraction = lfMoreThanHalf; + else if(cmp == 0) + lost_fraction = lfExactlyHalf; + else if(APInt::tcIsZero(dividend, partsCount)) + lost_fraction = lfExactlyZero; + else + lost_fraction = lfLessThanHalf; + + if(partsCount > 2) + delete [] dividend; + + return lost_fraction; +} + +unsigned int +APFloat::significandMSB() const +{ + return APInt::tcMSB(significandParts(), partCount()); +} + +unsigned int +APFloat::significandLSB() const +{ + return APInt::tcLSB(significandParts(), partCount()); +} + +/* Note that a zero result is NOT normalized to fcZero. */ +lostFraction +APFloat::shiftSignificandRight(unsigned int bits) +{ + /* Our exponent should not overflow. */ + assert((exponent_t) (exponent + bits) >= exponent); + + exponent += bits; + + return shiftRight(significandParts(), partCount(), bits); +} + +/* Shift the significand left BITS bits, subtract BITS from its exponent. */ +void +APFloat::shiftSignificandLeft(unsigned int bits) +{ + assert(bits < semantics->precision); + + if(bits) { + unsigned int partsCount = partCount(); + + APInt::tcShiftLeft(significandParts(), partsCount, bits); + exponent -= bits; + + assert(!APInt::tcIsZero(significandParts(), partsCount)); + } +} + +APFloat::cmpResult +APFloat::compareAbsoluteValue(const APFloat &rhs) const +{ + int compare; + + assert(semantics == rhs.semantics); + assert(category == fcNormal); + assert(rhs.category == fcNormal); + + compare = exponent - rhs.exponent; + + /* If exponents are equal, do an unsigned bignum comparison of the + significands. */ + if(compare == 0) + compare = APInt::tcCompare(significandParts(), rhs.significandParts(), + partCount()); + + if(compare > 0) + return cmpGreaterThan; + else if(compare < 0) + return cmpLessThan; + else + return cmpEqual; +} + +/* Handle overflow. Sign is preserved. We either become infinity or + the largest finite number. */ +APFloat::opStatus +APFloat::handleOverflow(roundingMode rounding_mode) +{ + /* Infinity? */ + if(rounding_mode == rmNearestTiesToEven + || rounding_mode == rmNearestTiesToAway + || (rounding_mode == rmTowardPositive && !sign) + || (rounding_mode == rmTowardNegative && sign)) + { + category = fcInfinity; + return (opStatus) (opOverflow | opInexact); + } + + /* Otherwise we become the largest finite number. */ + category = fcNormal; + exponent = semantics->maxExponent; + APInt::tcSetLeastSignificantBits(significandParts(), partCount(), + semantics->precision); + + return opInexact; +} + +/* Returns TRUE if, when truncating the current number, with BIT the + new LSB, with the given lost fraction and rounding mode, the result + would need to be rounded away from zero (i.e., by increasing the + signficand). This routine must work for fcZero of both signs, and + fcNormal numbers. */ +bool +APFloat::roundAwayFromZero(roundingMode rounding_mode, + lostFraction lost_fraction, + unsigned int bit) const +{ + /* NaNs and infinities should not have lost fractions. */ + assert(category == fcNormal || category == fcZero); + + /* Current callers never pass this so we don't handle it. */ + assert(lost_fraction != lfExactlyZero); + + switch (rounding_mode) { + default: + assert(0); + + case rmNearestTiesToAway: + return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf; + + case rmNearestTiesToEven: + if(lost_fraction == lfMoreThanHalf) + return true; + + /* Our zeroes don't have a significand to test. */ + if(lost_fraction == lfExactlyHalf && category != fcZero) + return APInt::tcExtractBit(significandParts(), bit); + + return false; + + case rmTowardZero: + return false; + + case rmTowardPositive: + return sign == false; + + case rmTowardNegative: + return sign == true; + } +} + +APFloat::opStatus +APFloat::normalize(roundingMode rounding_mode, + lostFraction lost_fraction) +{ + unsigned int omsb; /* One, not zero, based MSB. */ + int exponentChange; + + if(category != fcNormal) + return opOK; + + /* Before rounding normalize the exponent of fcNormal numbers. */ + omsb = significandMSB() + 1; + + if(omsb) { + /* OMSB is numbered from 1. We want to place it in the integer + bit numbered PRECISON if possible, with a compensating change in + the exponent. */ + exponentChange = omsb - semantics->precision; + + /* If the resulting exponent is too high, overflow according to + the rounding mode. */ + if(exponent + exponentChange > semantics->maxExponent) + return handleOverflow(rounding_mode); + + /* Subnormal numbers have exponent minExponent, and their MSB + is forced based on that. */ + if(exponent + exponentChange < semantics->minExponent) + exponentChange = semantics->minExponent - exponent; + + /* Shifting left is easy as we don't lose precision. */ + if(exponentChange < 0) { + assert(lost_fraction == lfExactlyZero); + + shiftSignificandLeft(-exponentChange); + + return opOK; + } + + if(exponentChange > 0) { + lostFraction lf; + + /* Shift right and capture any new lost fraction. */ + lf = shiftSignificandRight(exponentChange); + + lost_fraction = combineLostFractions(lf, lost_fraction); + + /* Keep OMSB up-to-date. */ + if(omsb > (unsigned) exponentChange) + omsb -= exponentChange; + else + omsb = 0; + } + } + + /* Now round the number according to rounding_mode given the lost + fraction. */ + + /* As specified in IEEE 754, since we do not trap we do not report + underflow for exact results. */ + if(lost_fraction == lfExactlyZero) { + /* Canonicalize zeroes. */ + if(omsb == 0) + category = fcZero; + + return opOK; + } + + /* Increment the significand if we're rounding away from zero. */ + if(roundAwayFromZero(rounding_mode, lost_fraction, 0)) { + if(omsb == 0) + exponent = semantics->minExponent; + + incrementSignificand(); + omsb = significandMSB() + 1; + + /* Did the significand increment overflow? */ + if(omsb == (unsigned) semantics->precision + 1) { + /* Renormalize by incrementing the exponent and shifting our + significand right one. However if we already have the + maximum exponent we overflow to infinity. */ + if(exponent == semantics->maxExponent) { + category = fcInfinity; + + return (opStatus) (opOverflow | opInexact); + } + + shiftSignificandRight(1); + + return opInexact; + } + } + + /* The normal case - we were and are not denormal, and any + significand increment above didn't overflow. */ + if(omsb == semantics->precision) + return opInexact; + + /* We have a non-zero denormal. */ + assert(omsb < semantics->precision); + + /* Canonicalize zeroes. */ + if(omsb == 0) + category = fcZero; + + /* The fcZero case is a denormal that underflowed to zero. */ + return (opStatus) (opUnderflow | opInexact); +} + +APFloat::opStatus +APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract) +{ + switch (convolve(category, rhs.category)) { + default: + assert(0); + + case convolve(fcNaN, fcZero): + case convolve(fcNaN, fcNormal): + case convolve(fcNaN, fcInfinity): + case convolve(fcNaN, fcNaN): + case convolve(fcNormal, fcZero): + case convolve(fcInfinity, fcNormal): + case convolve(fcInfinity, fcZero): + return opOK; + + case convolve(fcZero, fcNaN): + case convolve(fcNormal, fcNaN): + case convolve(fcInfinity, fcNaN): + category = fcNaN; + copySignificand(rhs); + return opOK; + + case convolve(fcNormal, fcInfinity): + case convolve(fcZero, fcInfinity): + category = fcInfinity; + sign = rhs.sign ^ subtract; + return opOK; + + case convolve(fcZero, fcNormal): + assign(rhs); + sign = rhs.sign ^ subtract; + return opOK; + + case convolve(fcZero, fcZero): + /* Sign depends on rounding mode; handled by caller. */ + return opOK; + + case convolve(fcInfinity, fcInfinity): + /* Differently signed infinities can only be validly + subtracted. */ + if(((sign ^ rhs.sign)!=0) != subtract) { + makeNaN(); + return opInvalidOp; + } + + return opOK; + + case convolve(fcNormal, fcNormal): + return opDivByZero; + } +} + +/* Add or subtract two normal numbers. */ +lostFraction +APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract) +{ + integerPart carry; + lostFraction lost_fraction; + int bits; + + /* Determine if the operation on the absolute values is effectively + an addition or subtraction. */ + subtract ^= (sign ^ rhs.sign) ? true : false; + + /* Are we bigger exponent-wise than the RHS? */ + bits = exponent - rhs.exponent; + + /* Subtraction is more subtle than one might naively expect. */ + if(subtract) { + APFloat temp_rhs(rhs); + bool reverse; + + if (bits == 0) { + reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan; + lost_fraction = lfExactlyZero; + } else if (bits > 0) { + lost_fraction = temp_rhs.shiftSignificandRight(bits - 1); + shiftSignificandLeft(1); + reverse = false; + } else { + lost_fraction = shiftSignificandRight(-bits - 1); + temp_rhs.shiftSignificandLeft(1); + reverse = true; + } + + if (reverse) { + carry = temp_rhs.subtractSignificand + (*this, lost_fraction != lfExactlyZero); + copySignificand(temp_rhs); + sign = !sign; + } else { + carry = subtractSignificand + (temp_rhs, lost_fraction != lfExactlyZero); + } + + /* Invert the lost fraction - it was on the RHS and + subtracted. */ + if(lost_fraction == lfLessThanHalf) + lost_fraction = lfMoreThanHalf; + else if(lost_fraction == lfMoreThanHalf) + lost_fraction = lfLessThanHalf; + + /* The code above is intended to ensure that no borrow is + necessary. */ + assert(!carry); + } else { + if(bits > 0) { + APFloat temp_rhs(rhs); + + lost_fraction = temp_rhs.shiftSignificandRight(bits); + carry = addSignificand(temp_rhs); + } else { + lost_fraction = shiftSignificandRight(-bits); + carry = addSignificand(rhs); + } + + /* We have a guard bit; generating a carry cannot happen. */ + assert(!carry); + } + + return lost_fraction; +} + +APFloat::opStatus +APFloat::multiplySpecials(const APFloat &rhs) +{ + switch (convolve(category, rhs.category)) { + default: + assert(0); + + case convolve(fcNaN, fcZero): + case convolve(fcNaN, fcNormal): + case convolve(fcNaN, fcInfinity): + case convolve(fcNaN, fcNaN): + return opOK; + + case convolve(fcZero, fcNaN): + case convolve(fcNormal, fcNaN): + case convolve(fcInfinity, fcNaN): + category = fcNaN; + copySignificand(rhs); + return opOK; + + case convolve(fcNormal, fcInfinity): + case convolve(fcInfinity, fcNormal): + case convolve(fcInfinity, fcInfinity): + category = fcInfinity; + return opOK; + + case convolve(fcZero, fcNormal): + case convolve(fcNormal, fcZero): + case convolve(fcZero, fcZero): + category = fcZero; + return opOK; + + case convolve(fcZero, fcInfinity): + case convolve(fcInfinity, fcZero): + makeNaN(); + return opInvalidOp; + + case convolve(fcNormal, fcNormal): + return opOK; + } +} + +APFloat::opStatus +APFloat::divideSpecials(const APFloat &rhs) +{ + switch (convolve(category, rhs.category)) { + default: + assert(0); + + case convolve(fcNaN, fcZero): + case convolve(fcNaN, fcNormal): + case convolve(fcNaN, fcInfinity): + case convolve(fcNaN, fcNaN): + case convolve(fcInfinity, fcZero): + case convolve(fcInfinity, fcNormal): + case convolve(fcZero, fcInfinity): + case convolve(fcZero, fcNormal): + return opOK; + + case convolve(fcZero, fcNaN): + case convolve(fcNormal, fcNaN): + case convolve(fcInfinity, fcNaN): + category = fcNaN; + copySignificand(rhs); + return opOK; + + case convolve(fcNormal, fcInfinity): + category = fcZero; + return opOK; + + case convolve(fcNormal, fcZero): + category = fcInfinity; + return opDivByZero; + + case convolve(fcInfinity, fcInfinity): + case convolve(fcZero, fcZero): + makeNaN(); + return opInvalidOp; + + case convolve(fcNormal, fcNormal): + return opOK; + } +} + +APFloat::opStatus +APFloat::modSpecials(const APFloat &rhs) +{ + switch (convolve(category, rhs.category)) { + default: + assert(0); + + case convolve(fcNaN, fcZero): + case convolve(fcNaN, fcNormal): + case convolve(fcNaN, fcInfinity): + case convolve(fcNaN, fcNaN): + case convolve(fcZero, fcInfinity): + case convolve(fcZero, fcNormal): + case convolve(fcNormal, fcInfinity): + return opOK; + + case convolve(fcZero, fcNaN): + case convolve(fcNormal, fcNaN): + case convolve(fcInfinity, fcNaN): + category = fcNaN; + copySignificand(rhs); + return opOK; + + case convolve(fcNormal, fcZero): + case convolve(fcInfinity, fcZero): + case convolve(fcInfinity, fcNormal): + case convolve(fcInfinity, fcInfinity): + case convolve(fcZero, fcZero): + makeNaN(); + return opInvalidOp; + + case convolve(fcNormal, fcNormal): + return opOK; + } +} + +/* Change sign. */ +void +APFloat::changeSign() +{ + /* Look mummy, this one's easy. */ + sign = !sign; +} + +void +APFloat::clearSign() +{ + /* So is this one. */ + sign = 0; +} + +void +APFloat::copySign(const APFloat &rhs) +{ + /* And this one. */ + sign = rhs.sign; +} + +/* Normalized addition or subtraction. */ +APFloat::opStatus +APFloat::addOrSubtract(const APFloat &rhs, roundingMode rounding_mode, + bool subtract) +{ + opStatus fs; + + assertArithmeticOK(*semantics); + + fs = addOrSubtractSpecials(rhs, subtract); + + /* This return code means it was not a simple case. */ + if(fs == opDivByZero) { + lostFraction lost_fraction; + + lost_fraction = addOrSubtractSignificand(rhs, subtract); + fs = normalize(rounding_mode, lost_fraction); + + /* Can only be zero if we lost no fraction. */ + assert(category != fcZero || lost_fraction == lfExactlyZero); + } + + /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a + positive zero unless rounding to minus infinity, except that + adding two like-signed zeroes gives that zero. */ + if(category == fcZero) { + if(rhs.category != fcZero || (sign == rhs.sign) == subtract) + sign = (rounding_mode == rmTowardNegative); + } + + return fs; +} + +/* Normalized addition. */ +APFloat::opStatus +APFloat::add(const APFloat &rhs, roundingMode rounding_mode) +{ + return addOrSubtract(rhs, rounding_mode, false); +} + +/* Normalized subtraction. */ +APFloat::opStatus +APFloat::subtract(const APFloat &rhs, roundingMode rounding_mode) +{ + return addOrSubtract(rhs, rounding_mode, true); +} + +/* Normalized multiply. */ +APFloat::opStatus +APFloat::multiply(const APFloat &rhs, roundingMode rounding_mode) +{ + opStatus fs; + + assertArithmeticOK(*semantics); + sign ^= rhs.sign; + fs = multiplySpecials(rhs); + + if(category == fcNormal) { + lostFraction lost_fraction = multiplySignificand(rhs, 0); + fs = normalize(rounding_mode, lost_fraction); + if(lost_fraction != lfExactlyZero) + fs = (opStatus) (fs | opInexact); + } + + return fs; +} + +/* Normalized divide. */ +APFloat::opStatus +APFloat::divide(const APFloat &rhs, roundingMode rounding_mode) +{ + opStatus fs; + + assertArithmeticOK(*semantics); + sign ^= rhs.sign; + fs = divideSpecials(rhs); + + if(category == fcNormal) { + lostFraction lost_fraction = divideSignificand(rhs); + fs = normalize(rounding_mode, lost_fraction); + if(lost_fraction != lfExactlyZero) + fs = (opStatus) (fs | opInexact); + } + + return fs; +} + +/* Normalized remainder. This is not currently correct in all cases. */ +APFloat::opStatus +APFloat::remainder(const APFloat &rhs) +{ + opStatus fs; + APFloat V = *this; + unsigned int origSign = sign; + + assertArithmeticOK(*semantics); + fs = V.divide(rhs, rmNearestTiesToEven); + if (fs == opDivByZero) + return fs; + + int parts = partCount(); + integerPart *x = new integerPart[parts]; + bool ignored; + fs = V.convertToInteger(x, parts * integerPartWidth, true, + rmNearestTiesToEven, &ignored); + if (fs==opInvalidOp) + return fs; + + fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true, + rmNearestTiesToEven); + assert(fs==opOK); // should always work + + fs = V.multiply(rhs, rmNearestTiesToEven); + assert(fs==opOK || fs==opInexact); // should not overflow or underflow + + fs = subtract(V, rmNearestTiesToEven); + assert(fs==opOK || fs==opInexact); // likewise + + if (isZero()) + sign = origSign; // IEEE754 requires this + delete[] x; + return fs; +} + +/* Normalized llvm frem (C fmod). + This is not currently correct in all cases. */ +APFloat::opStatus +APFloat::mod(const APFloat &rhs, roundingMode rounding_mode) +{ + opStatus fs; + assertArithmeticOK(*semantics); + fs = modSpecials(rhs); + + if (category == fcNormal && rhs.category == fcNormal) { + APFloat V = *this; + unsigned int origSign = sign; + + fs = V.divide(rhs, rmNearestTiesToEven); + if (fs == opDivByZero) + return fs; + + int parts = partCount(); + integerPart *x = new integerPart[parts]; + bool ignored; + fs = V.convertToInteger(x, parts * integerPartWidth, true, + rmTowardZero, &ignored); + if (fs==opInvalidOp) + return fs; + + fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true, + rmNearestTiesToEven); + assert(fs==opOK); // should always work + + fs = V.multiply(rhs, rounding_mode); + assert(fs==opOK || fs==opInexact); // should not overflow or underflow + + fs = subtract(V, rounding_mode); + assert(fs==opOK || fs==opInexact); // likewise + + if (isZero()) + sign = origSign; // IEEE754 requires this + delete[] x; + } + return fs; +} + +/* Normalized fused-multiply-add. */ +APFloat::opStatus +APFloat::fusedMultiplyAdd(const APFloat &multiplicand, + const APFloat &addend, + roundingMode rounding_mode) +{ + opStatus fs; + + assertArithmeticOK(*semantics); + + /* Post-multiplication sign, before addition. */ + sign ^= multiplicand.sign; + + /* If and only if all arguments are normal do we need to do an + extended-precision calculation. */ + if(category == fcNormal + && multiplicand.category == fcNormal + && addend.category == fcNormal) { + lostFraction lost_fraction; + + lost_fraction = multiplySignificand(multiplicand, &addend); + fs = normalize(rounding_mode, lost_fraction); + if(lost_fraction != lfExactlyZero) + fs = (opStatus) (fs | opInexact); + + /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a + positive zero unless rounding to minus infinity, except that + adding two like-signed zeroes gives that zero. */ + if(category == fcZero && sign != addend.sign) + sign = (rounding_mode == rmTowardNegative); + } else { + fs = multiplySpecials(multiplicand); + + /* FS can only be opOK or opInvalidOp. There is no more work + to do in the latter case. The IEEE-754R standard says it is + implementation-defined in this case whether, if ADDEND is a + quiet NaN, we raise invalid op; this implementation does so. + + If we need to do the addition we can do so with normal + precision. */ + if(fs == opOK) + fs = addOrSubtract(addend, rounding_mode, false); + } + + return fs; +} + +/* Comparison requires normalized numbers. */ +APFloat::cmpResult +APFloat::compare(const APFloat &rhs) const +{ + cmpResult result; + + assertArithmeticOK(*semantics); + assert(semantics == rhs.semantics); + + switch (convolve(category, rhs.category)) { + default: + assert(0); + + case convolve(fcNaN, fcZero): + case convolve(fcNaN, fcNormal): + case convolve(fcNaN, fcInfinity): + case convolve(fcNaN, fcNaN): + case convolve(fcZero, fcNaN): + case convolve(fcNormal, fcNaN): + case convolve(fcInfinity, fcNaN): + return cmpUnordered; + + case convolve(fcInfinity, fcNormal): + case convolve(fcInfinity, fcZero): + case convolve(fcNormal, fcZero): + if(sign) + return cmpLessThan; + else + return cmpGreaterThan; + + case convolve(fcNormal, fcInfinity): + case convolve(fcZero, fcInfinity): + case convolve(fcZero, fcNormal): + if(rhs.sign) + return cmpGreaterThan; + else + return cmpLessThan; + + case convolve(fcInfinity, fcInfinity): + if(sign == rhs.sign) + return cmpEqual; + else if(sign) + return cmpLessThan; + else + return cmpGreaterThan; + + case convolve(fcZero, fcZero): + return cmpEqual; + + case convolve(fcNormal, fcNormal): + break; + } + + /* Two normal numbers. Do they have the same sign? */ + if(sign != rhs.sign) { + if(sign) + result = cmpLessThan; + else + result = cmpGreaterThan; + } else { + /* Compare absolute values; invert result if negative. */ + result = compareAbsoluteValue(rhs); + + if(sign) { + if(result == cmpLessThan) + result = cmpGreaterThan; + else if(result == cmpGreaterThan) + result = cmpLessThan; + } + } + + return result; +} + +/// APFloat::convert - convert a value of one floating point type to another. +/// The return value corresponds to the IEEE754 exceptions. *losesInfo +/// records whether the transformation lost information, i.e. whether +/// converting the result back to the original type will produce the +/// original value (this is almost the same as return value==fsOK, but there +/// are edge cases where this is not so). + +APFloat::opStatus +APFloat::convert(const fltSemantics &toSemantics, + roundingMode rounding_mode, bool *losesInfo) +{ + lostFraction lostFraction; + unsigned int newPartCount, oldPartCount; + opStatus fs; + + assertArithmeticOK(*semantics); + assertArithmeticOK(toSemantics); + lostFraction = lfExactlyZero; + newPartCount = partCountForBits(toSemantics.precision + 1); + oldPartCount = partCount(); + + /* Handle storage complications. If our new form is wider, + re-allocate our bit pattern into wider storage. If it is + narrower, we ignore the excess parts, but if narrowing to a + single part we need to free the old storage. + Be careful not to reference significandParts for zeroes + and infinities, since it aborts. */ + if (newPartCount > oldPartCount) { + integerPart *newParts; + newParts = new integerPart[newPartCount]; + APInt::tcSet(newParts, 0, newPartCount); + if (category==fcNormal || category==fcNaN) + APInt::tcAssign(newParts, significandParts(), oldPartCount); + freeSignificand(); + significand.parts = newParts; + } else if (newPartCount < oldPartCount) { + /* Capture any lost fraction through truncation of parts so we get + correct rounding whilst normalizing. */ + if (category==fcNormal) + lostFraction = lostFractionThroughTruncation + (significandParts(), oldPartCount, toSemantics.precision); + if (newPartCount == 1) { + integerPart newPart = 0; + if (category==fcNormal || category==fcNaN) + newPart = significandParts()[0]; + freeSignificand(); + significand.part = newPart; + } + } + + if(category == fcNormal) { + /* Re-interpret our bit-pattern. */ + exponent += toSemantics.precision - semantics->precision; + semantics = &toSemantics; + fs = normalize(rounding_mode, lostFraction); + *losesInfo = (fs != opOK); + } else if (category == fcNaN) { + int shift = toSemantics.precision - semantics->precision; + // Do this now so significandParts gets the right answer + const fltSemantics *oldSemantics = semantics; + semantics = &toSemantics; + *losesInfo = false; + // No normalization here, just truncate + if (shift>0) + APInt::tcShiftLeft(significandParts(), newPartCount, shift); + else if (shift < 0) { + unsigned ushift = -shift; + // Figure out if we are losing information. This happens + // if are shifting out something other than 0s, or if the x87 long + // double input did not have its integer bit set (pseudo-NaN), or if the + // x87 long double input did not have its QNan bit set (because the x87 + // hardware sets this bit when converting a lower-precision NaN to + // x87 long double). + if (APInt::tcLSB(significandParts(), newPartCount) < ushift) + *losesInfo = true; + if (oldSemantics == &APFloat::x87DoubleExtended && + (!(*significandParts() & 0x8000000000000000ULL) || + !(*significandParts() & 0x4000000000000000ULL))) + *losesInfo = true; + APInt::tcShiftRight(significandParts(), newPartCount, ushift); + } + // gcc forces the Quiet bit on, which means (float)(double)(float_sNan) + // does not give you back the same bits. This is dubious, and we + // don't currently do it. You're really supposed to get + // an invalid operation signal at runtime, but nobody does that. + fs = opOK; + } else { + semantics = &toSemantics; + fs = opOK; + *losesInfo = false; + } + + return fs; +} + +/* Convert a floating point number to an integer according to the + rounding mode. If the rounded integer value is out of range this + returns an invalid operation exception and the contents of the + destination parts are unspecified. If the rounded value is in + range but the floating point number is not the exact integer, the C + standard doesn't require an inexact exception to be raised. IEEE + 854 does require it so we do that. + + Note that for conversions to integer type the C standard requires + round-to-zero to always be used. */ +APFloat::opStatus +APFloat::convertToSignExtendedInteger(integerPart *parts, unsigned int width, + bool isSigned, + roundingMode rounding_mode, + bool *isExact) const +{ + lostFraction lost_fraction; + const integerPart *src; + unsigned int dstPartsCount, truncatedBits; + + assertArithmeticOK(*semantics); + + *isExact = false; + + /* Handle the three special cases first. */ + if(category == fcInfinity || category == fcNaN) + return opInvalidOp; + + dstPartsCount = partCountForBits(width); + + if(category == fcZero) { + APInt::tcSet(parts, 0, dstPartsCount); + // Negative zero can't be represented as an int. + *isExact = !sign; + return opOK; + } + + src = significandParts(); + + /* Step 1: place our absolute value, with any fraction truncated, in + the destination. */ + if (exponent < 0) { + /* Our absolute value is less than one; truncate everything. */ + APInt::tcSet(parts, 0, dstPartsCount); + /* For exponent -1 the integer bit represents .5, look at that. + For smaller exponents leftmost truncated bit is 0. */ + truncatedBits = semantics->precision -1U - exponent; + } else { + /* We want the most significant (exponent + 1) bits; the rest are + truncated. */ + unsigned int bits = exponent + 1U; + + /* Hopelessly large in magnitude? */ + if (bits > width) + return opInvalidOp; + + if (bits < semantics->precision) { + /* We truncate (semantics->precision - bits) bits. */ + truncatedBits = semantics->precision - bits; + APInt::tcExtract(parts, dstPartsCount, src, bits, truncatedBits); + } else { + /* We want at least as many bits as are available. */ + APInt::tcExtract(parts, dstPartsCount, src, semantics->precision, 0); + APInt::tcShiftLeft(parts, dstPartsCount, bits - semantics->precision); + truncatedBits = 0; + } + } + + /* Step 2: work out any lost fraction, and increment the absolute + value if we would round away from zero. */ + if (truncatedBits) { + lost_fraction = lostFractionThroughTruncation(src, partCount(), + truncatedBits); + if (lost_fraction != lfExactlyZero + && roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) { + if (APInt::tcIncrement(parts, dstPartsCount)) + return opInvalidOp; /* Overflow. */ + } + } else { + lost_fraction = lfExactlyZero; + } + + /* Step 3: check if we fit in the destination. */ + unsigned int omsb = APInt::tcMSB(parts, dstPartsCount) + 1; + + if (sign) { + if (!isSigned) { + /* Negative numbers cannot be represented as unsigned. */ + if (omsb != 0) + return opInvalidOp; + } else { + /* It takes omsb bits to represent the unsigned integer value. + We lose a bit for the sign, but care is needed as the + maximally negative integer is a special case. */ + if (omsb == width && APInt::tcLSB(parts, dstPartsCount) + 1 != omsb) + return opInvalidOp; + + /* This case can happen because of rounding. */ + if (omsb > width) + return opInvalidOp; + } + + APInt::tcNegate (parts, dstPartsCount); + } else { + if (omsb >= width + !isSigned) + return opInvalidOp; + } + + if (lost_fraction == lfExactlyZero) { + *isExact = true; + return opOK; + } else + return opInexact; +} + +/* Same as convertToSignExtendedInteger, except we provide + deterministic values in case of an invalid operation exception, + namely zero for NaNs and the minimal or maximal value respectively + for underflow or overflow. + The *isExact output tells whether the result is exact, in the sense + that converting it back to the original floating point type produces + the original value. This is almost equivalent to result==opOK, + except for negative zeroes. +*/ +APFloat::opStatus +APFloat::convertToInteger(integerPart *parts, unsigned int width, + bool isSigned, + roundingMode rounding_mode, bool *isExact) const +{ + opStatus fs; + + fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode, + isExact); + + if (fs == opInvalidOp) { + unsigned int bits, dstPartsCount; + + dstPartsCount = partCountForBits(width); + + if (category == fcNaN) + bits = 0; + else if (sign) + bits = isSigned; + else + bits = width - isSigned; + + APInt::tcSetLeastSignificantBits(parts, dstPartsCount, bits); + if (sign && isSigned) + APInt::tcShiftLeft(parts, dstPartsCount, width - 1); + } + + return fs; +} + +/* Convert an unsigned integer SRC to a floating point number, + rounding according to ROUNDING_MODE. The sign of the floating + point number is not modified. */ +APFloat::opStatus +APFloat::convertFromUnsignedParts(const integerPart *src, + unsigned int srcCount, + roundingMode rounding_mode) +{ + unsigned int omsb, precision, dstCount; + integerPart *dst; + lostFraction lost_fraction; + + assertArithmeticOK(*semantics); + category = fcNormal; + omsb = APInt::tcMSB(src, srcCount) + 1; + dst = significandParts(); + dstCount = partCount(); + precision = semantics->precision; + + /* We want the most significant PRECISON bits of SRC. There may not + be that many; extract what we can. */ + if (precision <= omsb) { + exponent = omsb - 1; + lost_fraction = lostFractionThroughTruncation(src, srcCount, + omsb - precision); + APInt::tcExtract(dst, dstCount, src, precision, omsb - precision); + } else { + exponent = precision - 1; + lost_fraction = lfExactlyZero; + APInt::tcExtract(dst, dstCount, src, omsb, 0); + } + + return normalize(rounding_mode, lost_fraction); +} + +APFloat::opStatus +APFloat::convertFromAPInt(const APInt &Val, + bool isSigned, + roundingMode rounding_mode) +{ + unsigned int partCount = Val.getNumWords(); + APInt api = Val; + + sign = false; + if (isSigned && api.isNegative()) { + sign = true; + api = -api; + } + + return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode); +} + +/* Convert a two's complement integer SRC to a floating point number, + rounding according to ROUNDING_MODE. ISSIGNED is true if the + integer is signed, in which case it must be sign-extended. */ +APFloat::opStatus +APFloat::convertFromSignExtendedInteger(const integerPart *src, + unsigned int srcCount, + bool isSigned, + roundingMode rounding_mode) +{ + opStatus status; + + assertArithmeticOK(*semantics); + if (isSigned + && APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) { + integerPart *copy; + + /* If we're signed and negative negate a copy. */ + sign = true; + copy = new integerPart[srcCount]; + APInt::tcAssign(copy, src, srcCount); + APInt::tcNegate(copy, srcCount); + status = convertFromUnsignedParts(copy, srcCount, rounding_mode); + delete [] copy; + } else { + sign = false; + status = convertFromUnsignedParts(src, srcCount, rounding_mode); + } + + return status; +} + +/* FIXME: should this just take a const APInt reference? */ +APFloat::opStatus +APFloat::convertFromZeroExtendedInteger(const integerPart *parts, + unsigned int width, bool isSigned, + roundingMode rounding_mode) +{ + unsigned int partCount = partCountForBits(width); + APInt api = APInt(width, partCount, parts); + + sign = false; + if(isSigned && APInt::tcExtractBit(parts, width - 1)) { + sign = true; + api = -api; + } + + return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode); +} + +APFloat::opStatus +APFloat::convertFromHexadecimalString(const char *p, + roundingMode rounding_mode) +{ + lostFraction lost_fraction; + integerPart *significand; + unsigned int bitPos, partsCount; + const char *dot, *firstSignificantDigit; + + zeroSignificand(); + exponent = 0; + category = fcNormal; + + significand = significandParts(); + partsCount = partCount(); + bitPos = partsCount * integerPartWidth; + + /* Skip leading zeroes and any (hexa)decimal point. */ + p = skipLeadingZeroesAndAnyDot(p, &dot); + firstSignificantDigit = p; + + for(;;) { + integerPart hex_value; + + if(*p == '.') { + assert(dot == 0); + dot = p++; + } + + hex_value = hexDigitValue(*p); + if(hex_value == -1U) { + lost_fraction = lfExactlyZero; + break; + } + + p++; + + /* Store the number whilst 4-bit nibbles remain. */ + if(bitPos) { + bitPos -= 4; + hex_value <<= bitPos % integerPartWidth; + significand[bitPos / integerPartWidth] |= hex_value; + } else { + lost_fraction = trailingHexadecimalFraction(p, hex_value); + while(hexDigitValue(*p) != -1U) + p++; + break; + } + } + + /* Hex floats require an exponent but not a hexadecimal point. */ + assert(*p == 'p' || *p == 'P'); + + /* Ignore the exponent if we are zero. */ + if(p != firstSignificantDigit) { + int expAdjustment; + + /* Implicit hexadecimal point? */ + if(!dot) + dot = p; + + /* Calculate the exponent adjustment implicit in the number of + significant digits. */ + expAdjustment = static_cast(dot - firstSignificantDigit); + if(expAdjustment < 0) + expAdjustment++; + expAdjustment = expAdjustment * 4 - 1; + + /* Adjust for writing the significand starting at the most + significant nibble. */ + expAdjustment += semantics->precision; + expAdjustment -= partsCount * integerPartWidth; + + /* Adjust for the given exponent. */ + exponent = totalExponent(p, expAdjustment); + } + + return normalize(rounding_mode, lost_fraction); +} + +APFloat::opStatus +APFloat::roundSignificandWithExponent(const integerPart *decSigParts, + unsigned sigPartCount, int exp, + roundingMode rounding_mode) +{ + unsigned int parts, pow5PartCount; + fltSemantics calcSemantics = { 32767, -32767, 0, true }; + integerPart pow5Parts[maxPowerOfFiveParts]; + bool isNearest; + + isNearest = (rounding_mode == rmNearestTiesToEven + || rounding_mode == rmNearestTiesToAway); + + parts = partCountForBits(semantics->precision + 11); + + /* Calculate pow(5, abs(exp)). */ + pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp); + + for (;; parts *= 2) { + opStatus sigStatus, powStatus; + unsigned int excessPrecision, truncatedBits; + + calcSemantics.precision = parts * integerPartWidth - 1; + excessPrecision = calcSemantics.precision - semantics->precision; + truncatedBits = excessPrecision; + + APFloat decSig(calcSemantics, fcZero, sign); + APFloat pow5(calcSemantics, fcZero, false); + + sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount, + rmNearestTiesToEven); + powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount, + rmNearestTiesToEven); + /* Add exp, as 10^n = 5^n * 2^n. */ + decSig.exponent += exp; + + lostFraction calcLostFraction; + integerPart HUerr, HUdistance; + unsigned int powHUerr; + + if (exp >= 0) { + /* multiplySignificand leaves the precision-th bit set to 1. */ + calcLostFraction = decSig.multiplySignificand(pow5, NULL); + powHUerr = powStatus != opOK; + } else { + calcLostFraction = decSig.divideSignificand(pow5); + /* Denormal numbers have less precision. */ + if (decSig.exponent < semantics->minExponent) { + excessPrecision += (semantics->minExponent - decSig.exponent); + truncatedBits = excessPrecision; + if (excessPrecision > calcSemantics.precision) + excessPrecision = calcSemantics.precision; + } + /* Extra half-ulp lost in reciprocal of exponent. */ + powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2; + } + + /* Both multiplySignificand and divideSignificand return the + result with the integer bit set. */ + assert (APInt::tcExtractBit + (decSig.significandParts(), calcSemantics.precision - 1) == 1); + + HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK, + powHUerr); + HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(), + excessPrecision, isNearest); + + /* Are we guaranteed to round correctly if we truncate? */ + if (HUdistance >= HUerr) { + APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(), + calcSemantics.precision - excessPrecision, + excessPrecision); + /* Take the exponent of decSig. If we tcExtract-ed less bits + above we must adjust our exponent to compensate for the + implicit right shift. */ + exponent = (decSig.exponent + semantics->precision + - (calcSemantics.precision - excessPrecision)); + calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(), + decSig.partCount(), + truncatedBits); + return normalize(rounding_mode, calcLostFraction); + } + } +} + +APFloat::opStatus +APFloat::convertFromDecimalString(const char *p, roundingMode rounding_mode) +{ + decimalInfo D; + opStatus fs; + + /* Scan the text. */ + interpretDecimal(p, &D); + + /* Handle the quick cases. First the case of no significant digits, + i.e. zero, and then exponents that are obviously too large or too + small. Writing L for log 10 / log 2, a number d.ddddd*10^exp + definitely overflows if + + (exp - 1) * L >= maxExponent + + and definitely underflows to zero where + + (exp + 1) * L <= minExponent - precision + + With integer arithmetic the tightest bounds for L are + + 93/28 < L < 196/59 [ numerator <= 256 ] + 42039/12655 < L < 28738/8651 [ numerator <= 65536 ] + */ + + if (decDigitValue(*D.firstSigDigit) >= 10U) { + category = fcZero; + fs = opOK; + } else if ((D.normalizedExponent + 1) * 28738 + <= 8651 * (semantics->minExponent - (int) semantics->precision)) { + /* Underflow to zero and round. */ + zeroSignificand(); + fs = normalize(rounding_mode, lfLessThanHalf); + } else if ((D.normalizedExponent - 1) * 42039 + >= 12655 * semantics->maxExponent) { + /* Overflow and round. */ + fs = handleOverflow(rounding_mode); + } else { + integerPart *decSignificand; + unsigned int partCount; + + /* A tight upper bound on number of bits required to hold an + N-digit decimal integer is N * 196 / 59. Allocate enough space + to hold the full significand, and an extra part required by + tcMultiplyPart. */ + partCount = static_cast(D.lastSigDigit - D.firstSigDigit) + 1; + partCount = partCountForBits(1 + 196 * partCount / 59); + decSignificand = new integerPart[partCount + 1]; + partCount = 0; + + /* Convert to binary efficiently - we do almost all multiplication + in an integerPart. When this would overflow do we do a single + bignum multiplication, and then revert again to multiplication + in an integerPart. */ + do { + integerPart decValue, val, multiplier; + + val = 0; + multiplier = 1; + + do { + if (*p == '.') + p++; + + decValue = decDigitValue(*p++); + multiplier *= 10; + val = val * 10 + decValue; + /* The maximum number that can be multiplied by ten with any + digit added without overflowing an integerPart. */ + } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10); + + /* Multiply out the current part. */ + APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val, + partCount, partCount + 1, false); + + /* If we used another part (likely but not guaranteed), increase + the count. */ + if (decSignificand[partCount]) + partCount++; + } while (p <= D.lastSigDigit); + + category = fcNormal; + fs = roundSignificandWithExponent(decSignificand, partCount, + D.exponent, rounding_mode); + + delete [] decSignificand; + } + + return fs; +} + +APFloat::opStatus +APFloat::convertFromString(const char *p, roundingMode rounding_mode) +{ + assertArithmeticOK(*semantics); + + /* Handle a leading minus sign. */ + if(*p == '-') + sign = 1, p++; + else + sign = 0; + + if(p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) + return convertFromHexadecimalString(p + 2, rounding_mode); + + return convertFromDecimalString(p, rounding_mode); +} + +/* Write out a hexadecimal representation of the floating point value + to DST, which must be of sufficient size, in the C99 form + [-]0xh.hhhhp[+-]d. Return the number of characters written, + excluding the terminating NUL. + + If UPPERCASE, the output is in upper case, otherwise in lower case. + + HEXDIGITS digits appear altogether, rounding the value if + necessary. If HEXDIGITS is 0, the minimal precision to display the + number precisely is used instead. If nothing would appear after + the decimal point it is suppressed. + + The decimal exponent is always printed and has at least one digit. + Zero values display an exponent of zero. Infinities and NaNs + appear as "infinity" or "nan" respectively. + + The above rules are as specified by C99. There is ambiguity about + what the leading hexadecimal digit should be. This implementation + uses whatever is necessary so that the exponent is displayed as + stored. This implies the exponent will fall within the IEEE format + range, and the leading hexadecimal digit will be 0 (for denormals), + 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with + any other digits zero). +*/ +unsigned int +APFloat::convertToHexString(char *dst, unsigned int hexDigits, + bool upperCase, roundingMode rounding_mode) const +{ + char *p; + + assertArithmeticOK(*semantics); + + p = dst; + if (sign) + *dst++ = '-'; + + switch (category) { + case fcInfinity: + memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1); + dst += sizeof infinityL - 1; + break; + + case fcNaN: + memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1); + dst += sizeof NaNU - 1; + break; + + case fcZero: + *dst++ = '0'; + *dst++ = upperCase ? 'X': 'x'; + *dst++ = '0'; + if (hexDigits > 1) { + *dst++ = '.'; + memset (dst, '0', hexDigits - 1); + dst += hexDigits - 1; + } + *dst++ = upperCase ? 'P': 'p'; + *dst++ = '0'; + break; + + case fcNormal: + dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode); + break; + } + + *dst = 0; + + return static_cast(dst - p); +} + +/* Does the hard work of outputting the correctly rounded hexadecimal + form of a normal floating point number with the specified number of + hexadecimal digits. If HEXDIGITS is zero the minimum number of + digits necessary to print the value precisely is output. */ +char * +APFloat::convertNormalToHexString(char *dst, unsigned int hexDigits, + bool upperCase, + roundingMode rounding_mode) const +{ + unsigned int count, valueBits, shift, partsCount, outputDigits; + const char *hexDigitChars; + const integerPart *significand; + char *p; + bool roundUp; + + *dst++ = '0'; + *dst++ = upperCase ? 'X': 'x'; + + roundUp = false; + hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower; + + significand = significandParts(); + partsCount = partCount(); + + /* +3 because the first digit only uses the single integer bit, so + we have 3 virtual zero most-significant-bits. */ + valueBits = semantics->precision + 3; + shift = integerPartWidth - valueBits % integerPartWidth; + + /* The natural number of digits required ignoring trailing + insignificant zeroes. */ + outputDigits = (valueBits - significandLSB () + 3) / 4; + + /* hexDigits of zero means use the required number for the + precision. Otherwise, see if we are truncating. If we are, + find out if we need to round away from zero. */ + if (hexDigits) { + if (hexDigits < outputDigits) { + /* We are dropping non-zero bits, so need to check how to round. + "bits" is the number of dropped bits. */ + unsigned int bits; + lostFraction fraction; + + bits = valueBits - hexDigits * 4; + fraction = lostFractionThroughTruncation (significand, partsCount, bits); + roundUp = roundAwayFromZero(rounding_mode, fraction, bits); + } + outputDigits = hexDigits; + } + + /* Write the digits consecutively, and start writing in the location + of the hexadecimal point. We move the most significant digit + left and add the hexadecimal point later. */ + p = ++dst; + + count = (valueBits + integerPartWidth - 1) / integerPartWidth; + + while (outputDigits && count) { + integerPart part; + + /* Put the most significant integerPartWidth bits in "part". */ + if (--count == partsCount) + part = 0; /* An imaginary higher zero part. */ + else + part = significand[count] << shift; + + if (count && shift) + part |= significand[count - 1] >> (integerPartWidth - shift); + + /* Convert as much of "part" to hexdigits as we can. */ + unsigned int curDigits = integerPartWidth / 4; + + if (curDigits > outputDigits) + curDigits = outputDigits; + dst += partAsHex (dst, part, curDigits, hexDigitChars); + outputDigits -= curDigits; + } + + if (roundUp) { + char *q = dst; + + /* Note that hexDigitChars has a trailing '0'. */ + do { + q--; + *q = hexDigitChars[hexDigitValue (*q) + 1]; + } while (*q == '0'); + assert (q >= p); + } else { + /* Add trailing zeroes. */ + memset (dst, '0', outputDigits); + dst += outputDigits; + } + + /* Move the most significant digit to before the point, and if there + is something after the decimal point add it. This must come + after rounding above. */ + p[-1] = p[0]; + if (dst -1 == p) + dst--; + else + p[0] = '.'; + + /* Finally output the exponent. */ + *dst++ = upperCase ? 'P': 'p'; + + return writeSignedDecimal (dst, exponent); +} + +// For good performance it is desirable for different APFloats +// to produce different integers. +uint32_t +APFloat::getHashValue() const +{ + if (category==fcZero) return sign<<8 | semantics->precision ; + else if (category==fcInfinity) return sign<<9 | semantics->precision; + else if (category==fcNaN) return 1<<10 | semantics->precision; + else { + uint32_t hash = sign<<11 | semantics->precision | exponent<<12; + const integerPart* p = significandParts(); + for (int i=partCount(); i>0; i--, p++) + hash ^= ((uint32_t)*p) ^ (uint32_t)((*p)>>32); + return hash; + } +} + +// Conversion from APFloat to/from host float/double. It may eventually be +// possible to eliminate these and have everybody deal with APFloats, but that +// will take a while. This approach will not easily extend to long double. +// Current implementation requires integerPartWidth==64, which is correct at +// the moment but could be made more general. + +// Denormals have exponent minExponent in APFloat, but minExponent-1 in +// the actual IEEE respresentations. We compensate for that here. + +APInt +APFloat::convertF80LongDoubleAPFloatToAPInt() const +{ + assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended); + assert (partCount()==2); + + uint64_t myexponent, mysignificand; + + if (category==fcNormal) { + myexponent = exponent+16383; //bias + mysignificand = significandParts()[0]; + if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL)) + myexponent = 0; // denormal + } else if (category==fcZero) { + myexponent = 0; + mysignificand = 0; + } else if (category==fcInfinity) { + myexponent = 0x7fff; + mysignificand = 0x8000000000000000ULL; + } else { + assert(category == fcNaN && "Unknown category"); + myexponent = 0x7fff; + mysignificand = significandParts()[0]; + } + + uint64_t words[2]; + words[0] = mysignificand; + words[1] = ((uint64_t)(sign & 1) << 15) | + (myexponent & 0x7fffLL); + return APInt(80, 2, words); +} + +APInt +APFloat::convertPPCDoubleDoubleAPFloatToAPInt() const +{ + assert(semantics == (const llvm::fltSemantics*)&PPCDoubleDouble); + assert (partCount()==2); + + uint64_t myexponent, mysignificand, myexponent2, mysignificand2; + + if (category==fcNormal) { + myexponent = exponent + 1023; //bias + myexponent2 = exponent2 + 1023; + mysignificand = significandParts()[0]; + mysignificand2 = significandParts()[1]; + if (myexponent==1 && !(mysignificand & 0x10000000000000LL)) + myexponent = 0; // denormal + if (myexponent2==1 && !(mysignificand2 & 0x10000000000000LL)) + myexponent2 = 0; // denormal + } else if (category==fcZero) { + myexponent = 0; + mysignificand = 0; + myexponent2 = 0; + mysignificand2 = 0; + } else if (category==fcInfinity) { + myexponent = 0x7ff; + myexponent2 = 0; + mysignificand = 0; + mysignificand2 = 0; + } else { + assert(category == fcNaN && "Unknown category"); + myexponent = 0x7ff; + mysignificand = significandParts()[0]; + myexponent2 = exponent2; + mysignificand2 = significandParts()[1]; + } + + uint64_t words[2]; + words[0] = ((uint64_t)(sign & 1) << 63) | + ((myexponent & 0x7ff) << 52) | + (mysignificand & 0xfffffffffffffLL); + words[1] = ((uint64_t)(sign2 & 1) << 63) | + ((myexponent2 & 0x7ff) << 52) | + (mysignificand2 & 0xfffffffffffffLL); + return APInt(128, 2, words); +} + +APInt +APFloat::convertDoubleAPFloatToAPInt() const +{ + assert(semantics == (const llvm::fltSemantics*)&IEEEdouble); + assert (partCount()==1); + + uint64_t myexponent, mysignificand; + + if (category==fcNormal) { + myexponent = exponent+1023; //bias + mysignificand = *significandParts(); + if (myexponent==1 && !(mysignificand & 0x10000000000000LL)) + myexponent = 0; // denormal + } else if (category==fcZero) { + myexponent = 0; + mysignificand = 0; + } else if (category==fcInfinity) { + myexponent = 0x7ff; + mysignificand = 0; + } else { + assert(category == fcNaN && "Unknown category!"); + myexponent = 0x7ff; + mysignificand = *significandParts(); + } + + return APInt(64, ((((uint64_t)(sign & 1) << 63) | + ((myexponent & 0x7ff) << 52) | + (mysignificand & 0xfffffffffffffLL)))); +} + +APInt +APFloat::convertFloatAPFloatToAPInt() const +{ + assert(semantics == (const llvm::fltSemantics*)&IEEEsingle); + assert (partCount()==1); + + uint32_t myexponent, mysignificand; + + if (category==fcNormal) { + myexponent = exponent+127; //bias + mysignificand = (uint32_t)*significandParts(); + if (myexponent == 1 && !(mysignificand & 0x800000)) + myexponent = 0; // denormal + } else if (category==fcZero) { + myexponent = 0; + mysignificand = 0; + } else if (category==fcInfinity) { + myexponent = 0xff; + mysignificand = 0; + } else { + assert(category == fcNaN && "Unknown category!"); + myexponent = 0xff; + mysignificand = (uint32_t)*significandParts(); + } + + return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) | + (mysignificand & 0x7fffff))); +} + +// This function creates an APInt that is just a bit map of the floating +// point constant as it would appear in memory. It is not a conversion, +// and treating the result as a normal integer is unlikely to be useful. + +APInt +APFloat::bitcastToAPInt() const +{ + if (semantics == (const llvm::fltSemantics*)&IEEEsingle) + return convertFloatAPFloatToAPInt(); + + if (semantics == (const llvm::fltSemantics*)&IEEEdouble) + return convertDoubleAPFloatToAPInt(); + + if (semantics == (const llvm::fltSemantics*)&PPCDoubleDouble) + return convertPPCDoubleDoubleAPFloatToAPInt(); + + assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended && + "unknown format!"); + return convertF80LongDoubleAPFloatToAPInt(); +} + +float +APFloat::convertToFloat() const +{ + assert(semantics == (const llvm::fltSemantics*)&IEEEsingle); + APInt api = bitcastToAPInt(); + return api.bitsToFloat(); +} + +double +APFloat::convertToDouble() const +{ + assert(semantics == (const llvm::fltSemantics*)&IEEEdouble); + APInt api = bitcastToAPInt(); + return api.bitsToDouble(); +} + +/// Integer bit is explicit in this format. Intel hardware (387 and later) +/// does not support these bit patterns: +/// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity") +/// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN") +/// exponent = 0, integer bit 1 ("pseudodenormal") +/// exponent!=0 nor all 1's, integer bit 0 ("unnormal") +/// At the moment, the first two are treated as NaNs, the second two as Normal. +void +APFloat::initFromF80LongDoubleAPInt(const APInt &api) +{ + assert(api.getBitWidth()==80); + uint64_t i1 = api.getRawData()[0]; + uint64_t i2 = api.getRawData()[1]; + uint64_t myexponent = (i2 & 0x7fff); + uint64_t mysignificand = i1; + + initialize(&APFloat::x87DoubleExtended); + assert(partCount()==2); + + sign = static_cast(i2>>15); + if (myexponent==0 && mysignificand==0) { + // exponent, significand meaningless + category = fcZero; + } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) { + // exponent, significand meaningless + category = fcInfinity; + } else if (myexponent==0x7fff && mysignificand!=0x8000000000000000ULL) { + // exponent meaningless + category = fcNaN; + significandParts()[0] = mysignificand; + significandParts()[1] = 0; + } else { + category = fcNormal; + exponent = myexponent - 16383; + significandParts()[0] = mysignificand; + significandParts()[1] = 0; + if (myexponent==0) // denormal + exponent = -16382; + } +} + +void +APFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) +{ + assert(api.getBitWidth()==128); + uint64_t i1 = api.getRawData()[0]; + uint64_t i2 = api.getRawData()[1]; + uint64_t myexponent = (i1 >> 52) & 0x7ff; + uint64_t mysignificand = i1 & 0xfffffffffffffLL; + uint64_t myexponent2 = (i2 >> 52) & 0x7ff; + uint64_t mysignificand2 = i2 & 0xfffffffffffffLL; + + initialize(&APFloat::PPCDoubleDouble); + assert(partCount()==2); + + sign = static_cast(i1>>63); + sign2 = static_cast(i2>>63); + if (myexponent==0 && mysignificand==0) { + // exponent, significand meaningless + // exponent2 and significand2 are required to be 0; we don't check + category = fcZero; + } else if (myexponent==0x7ff && mysignificand==0) { + // exponent, significand meaningless + // exponent2 and significand2 are required to be 0; we don't check + category = fcInfinity; + } else if (myexponent==0x7ff && mysignificand!=0) { + // exponent meaningless. So is the whole second word, but keep it + // for determinism. + category = fcNaN; + exponent2 = myexponent2; + significandParts()[0] = mysignificand; + significandParts()[1] = mysignificand2; + } else { + category = fcNormal; + // Note there is no category2; the second word is treated as if it is + // fcNormal, although it might be something else considered by itself. + exponent = myexponent - 1023; + exponent2 = myexponent2 - 1023; + significandParts()[0] = mysignificand; + significandParts()[1] = mysignificand2; + if (myexponent==0) // denormal + exponent = -1022; + else + significandParts()[0] |= 0x10000000000000LL; // integer bit + if (myexponent2==0) + exponent2 = -1022; + else + significandParts()[1] |= 0x10000000000000LL; // integer bit + } +} + +void +APFloat::initFromDoubleAPInt(const APInt &api) +{ + assert(api.getBitWidth()==64); + uint64_t i = *api.getRawData(); + uint64_t myexponent = (i >> 52) & 0x7ff; + uint64_t mysignificand = i & 0xfffffffffffffLL; + + initialize(&APFloat::IEEEdouble); + assert(partCount()==1); + + sign = static_cast(i>>63); + if (myexponent==0 && mysignificand==0) { + // exponent, significand meaningless + category = fcZero; + } else if (myexponent==0x7ff && mysignificand==0) { + // exponent, significand meaningless + category = fcInfinity; + } else if (myexponent==0x7ff && mysignificand!=0) { + // exponent meaningless + category = fcNaN; + *significandParts() = mysignificand; + } else { + category = fcNormal; + exponent = myexponent - 1023; + *significandParts() = mysignificand; + if (myexponent==0) // denormal + exponent = -1022; + else + *significandParts() |= 0x10000000000000LL; // integer bit + } +} + +void +APFloat::initFromFloatAPInt(const APInt & api) +{ + assert(api.getBitWidth()==32); + uint32_t i = (uint32_t)*api.getRawData(); + uint32_t myexponent = (i >> 23) & 0xff; + uint32_t mysignificand = i & 0x7fffff; + + initialize(&APFloat::IEEEsingle); + assert(partCount()==1); + + sign = i >> 31; + if (myexponent==0 && mysignificand==0) { + // exponent, significand meaningless + category = fcZero; + } else if (myexponent==0xff && mysignificand==0) { + // exponent, significand meaningless + category = fcInfinity; + } else if (myexponent==0xff && mysignificand!=0) { + // sign, exponent, significand meaningless + category = fcNaN; + *significandParts() = mysignificand; + } else { + category = fcNormal; + exponent = myexponent - 127; //bias + *significandParts() = mysignificand; + if (myexponent==0) // denormal + exponent = -126; + else + *significandParts() |= 0x800000; // integer bit + } +} + +/// Treat api as containing the bits of a floating point number. Currently +/// we infer the floating point type from the size of the APInt. The +/// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful +/// when the size is anything else). +void +APFloat::initFromAPInt(const APInt& api, bool isIEEE) +{ + if (api.getBitWidth() == 32) + return initFromFloatAPInt(api); + else if (api.getBitWidth()==64) + return initFromDoubleAPInt(api); + else if (api.getBitWidth()==80) + return initFromF80LongDoubleAPInt(api); + else if (api.getBitWidth()==128 && !isIEEE) + return initFromPPCDoubleDoubleAPInt(api); + else + assert(0); +} + +APFloat::APFloat(const APInt& api, bool isIEEE) +{ + initFromAPInt(api, isIEEE); +} + +APFloat::APFloat(float f) +{ + APInt api = APInt(32, 0); + initFromAPInt(api.floatToBits(f)); +} + +APFloat::APFloat(double d) +{ + APInt api = APInt(64, 0); + initFromAPInt(api.doubleToBits(d)); +} diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp new file mode 100644 index 000000000000..73bf774b1717 --- /dev/null +++ b/lib/Support/APInt.cpp @@ -0,0 +1,2816 @@ +//===-- APInt.cpp - Implement APInt class ---------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a class to represent arbitrary precision integer +// constant values and provide a variety of arithmetic operations on them. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "apint" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include +using namespace llvm; + +/// A utility function for allocating memory, checking for allocation failures, +/// and ensuring the contents are zeroed. +inline static uint64_t* getClearedMemory(unsigned numWords) { + uint64_t * result = new uint64_t[numWords]; + assert(result && "APInt memory allocation fails!"); + memset(result, 0, numWords * sizeof(uint64_t)); + return result; +} + +/// A utility function for allocating memory and checking for allocation +/// failure. The content is not zeroed. +inline static uint64_t* getMemory(unsigned numWords) { + uint64_t * result = new uint64_t[numWords]; + assert(result && "APInt memory allocation fails!"); + return result; +} + +void APInt::initSlowCase(unsigned numBits, uint64_t val, bool isSigned) { + pVal = getClearedMemory(getNumWords()); + pVal[0] = val; + if (isSigned && int64_t(val) < 0) + for (unsigned i = 1; i < getNumWords(); ++i) + pVal[i] = -1ULL; +} + +void APInt::initSlowCase(const APInt& that) { + pVal = getMemory(getNumWords()); + memcpy(pVal, that.pVal, getNumWords() * APINT_WORD_SIZE); +} + + +APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[]) + : BitWidth(numBits), VAL(0) { + assert(BitWidth && "bitwidth too small"); + assert(bigVal && "Null pointer detected!"); + if (isSingleWord()) + VAL = bigVal[0]; + else { + // Get memory, cleared to 0 + pVal = getClearedMemory(getNumWords()); + // Calculate the number of words to copy + unsigned words = std::min(numWords, getNumWords()); + // Copy the words from bigVal to pVal + memcpy(pVal, bigVal, words * APINT_WORD_SIZE); + } + // Make sure unused high bits are cleared + clearUnusedBits(); +} + +APInt::APInt(unsigned numbits, const char StrStart[], unsigned slen, + uint8_t radix) + : BitWidth(numbits), VAL(0) { + assert(BitWidth && "bitwidth too small"); + fromString(numbits, StrStart, slen, radix); +} + +APInt& APInt::AssignSlowCase(const APInt& RHS) { + // Don't do anything for X = X + if (this == &RHS) + return *this; + + if (BitWidth == RHS.getBitWidth()) { + // assume same bit-width single-word case is already handled + assert(!isSingleWord()); + memcpy(pVal, RHS.pVal, getNumWords() * APINT_WORD_SIZE); + return *this; + } + + if (isSingleWord()) { + // assume case where both are single words is already handled + assert(!RHS.isSingleWord()); + VAL = 0; + pVal = getMemory(RHS.getNumWords()); + memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE); + } else if (getNumWords() == RHS.getNumWords()) + memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE); + else if (RHS.isSingleWord()) { + delete [] pVal; + VAL = RHS.VAL; + } else { + delete [] pVal; + pVal = getMemory(RHS.getNumWords()); + memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE); + } + BitWidth = RHS.BitWidth; + return clearUnusedBits(); +} + +APInt& APInt::operator=(uint64_t RHS) { + if (isSingleWord()) + VAL = RHS; + else { + pVal[0] = RHS; + memset(pVal+1, 0, (getNumWords() - 1) * APINT_WORD_SIZE); + } + return clearUnusedBits(); +} + +/// Profile - This method 'profiles' an APInt for use with FoldingSet. +void APInt::Profile(FoldingSetNodeID& ID) const { + ID.AddInteger(BitWidth); + + if (isSingleWord()) { + ID.AddInteger(VAL); + return; + } + + unsigned NumWords = getNumWords(); + for (unsigned i = 0; i < NumWords; ++i) + ID.AddInteger(pVal[i]); +} + +/// add_1 - This function adds a single "digit" integer, y, to the multiple +/// "digit" integer array, x[]. x[] is modified to reflect the addition and +/// 1 is returned if there is a carry out, otherwise 0 is returned. +/// @returns the carry of the addition. +static bool add_1(uint64_t dest[], uint64_t x[], unsigned len, uint64_t y) { + for (unsigned i = 0; i < len; ++i) { + dest[i] = y + x[i]; + if (dest[i] < y) + y = 1; // Carry one to next digit. + else { + y = 0; // No need to carry so exit early + break; + } + } + return y; +} + +/// @brief Prefix increment operator. Increments the APInt by one. +APInt& APInt::operator++() { + if (isSingleWord()) + ++VAL; + else + add_1(pVal, pVal, getNumWords(), 1); + return clearUnusedBits(); +} + +/// sub_1 - This function subtracts a single "digit" (64-bit word), y, from +/// the multi-digit integer array, x[], propagating the borrowed 1 value until +/// no further borrowing is neeeded or it runs out of "digits" in x. The result +/// is 1 if "borrowing" exhausted the digits in x, or 0 if x was not exhausted. +/// In other words, if y > x then this function returns 1, otherwise 0. +/// @returns the borrow out of the subtraction +static bool sub_1(uint64_t x[], unsigned len, uint64_t y) { + for (unsigned i = 0; i < len; ++i) { + uint64_t X = x[i]; + x[i] -= y; + if (y > X) + y = 1; // We have to "borrow 1" from next "digit" + else { + y = 0; // No need to borrow + break; // Remaining digits are unchanged so exit early + } + } + return bool(y); +} + +/// @brief Prefix decrement operator. Decrements the APInt by one. +APInt& APInt::operator--() { + if (isSingleWord()) + --VAL; + else + sub_1(pVal, getNumWords(), 1); + return clearUnusedBits(); +} + +/// add - This function adds the integer array x to the integer array Y and +/// places the result in dest. +/// @returns the carry out from the addition +/// @brief General addition of 64-bit integer arrays +static bool add(uint64_t *dest, const uint64_t *x, const uint64_t *y, + unsigned len) { + bool carry = false; + for (unsigned i = 0; i< len; ++i) { + uint64_t limit = std::min(x[i],y[i]); // must come first in case dest == x + dest[i] = x[i] + y[i] + carry; + carry = dest[i] < limit || (carry && dest[i] == limit); + } + return carry; +} + +/// Adds the RHS APint to this APInt. +/// @returns this, after addition of RHS. +/// @brief Addition assignment operator. +APInt& APInt::operator+=(const APInt& RHS) { + assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); + if (isSingleWord()) + VAL += RHS.VAL; + else { + add(pVal, pVal, RHS.pVal, getNumWords()); + } + return clearUnusedBits(); +} + +/// Subtracts the integer array y from the integer array x +/// @returns returns the borrow out. +/// @brief Generalized subtraction of 64-bit integer arrays. +static bool sub(uint64_t *dest, const uint64_t *x, const uint64_t *y, + unsigned len) { + bool borrow = false; + for (unsigned i = 0; i < len; ++i) { + uint64_t x_tmp = borrow ? x[i] - 1 : x[i]; + borrow = y[i] > x_tmp || (borrow && x[i] == 0); + dest[i] = x_tmp - y[i]; + } + return borrow; +} + +/// Subtracts the RHS APInt from this APInt +/// @returns this, after subtraction +/// @brief Subtraction assignment operator. +APInt& APInt::operator-=(const APInt& RHS) { + assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); + if (isSingleWord()) + VAL -= RHS.VAL; + else + sub(pVal, pVal, RHS.pVal, getNumWords()); + return clearUnusedBits(); +} + +/// Multiplies an integer array, x by a a uint64_t integer and places the result +/// into dest. +/// @returns the carry out of the multiplication. +/// @brief Multiply a multi-digit APInt by a single digit (64-bit) integer. +static uint64_t mul_1(uint64_t dest[], uint64_t x[], unsigned len, uint64_t y) { + // Split y into high 32-bit part (hy) and low 32-bit part (ly) + uint64_t ly = y & 0xffffffffULL, hy = y >> 32; + uint64_t carry = 0; + + // For each digit of x. + for (unsigned i = 0; i < len; ++i) { + // Split x into high and low words + uint64_t lx = x[i] & 0xffffffffULL; + uint64_t hx = x[i] >> 32; + // hasCarry - A flag to indicate if there is a carry to the next digit. + // hasCarry == 0, no carry + // hasCarry == 1, has carry + // hasCarry == 2, no carry and the calculation result == 0. + uint8_t hasCarry = 0; + dest[i] = carry + lx * ly; + // Determine if the add above introduces carry. + hasCarry = (dest[i] < carry) ? 1 : 0; + carry = hx * ly + (dest[i] >> 32) + (hasCarry ? (1ULL << 32) : 0); + // The upper limit of carry can be (2^32 - 1)(2^32 - 1) + + // (2^32 - 1) + 2^32 = 2^64. + hasCarry = (!carry && hasCarry) ? 1 : (!carry ? 2 : 0); + + carry += (lx * hy) & 0xffffffffULL; + dest[i] = (carry << 32) | (dest[i] & 0xffffffffULL); + carry = (((!carry && hasCarry != 2) || hasCarry == 1) ? (1ULL << 32) : 0) + + (carry >> 32) + ((lx * hy) >> 32) + hx * hy; + } + return carry; +} + +/// Multiplies integer array x by integer array y and stores the result into +/// the integer array dest. Note that dest's size must be >= xlen + ylen. +/// @brief Generalized multiplicate of integer arrays. +static void mul(uint64_t dest[], uint64_t x[], unsigned xlen, uint64_t y[], + unsigned ylen) { + dest[xlen] = mul_1(dest, x, xlen, y[0]); + for (unsigned i = 1; i < ylen; ++i) { + uint64_t ly = y[i] & 0xffffffffULL, hy = y[i] >> 32; + uint64_t carry = 0, lx = 0, hx = 0; + for (unsigned j = 0; j < xlen; ++j) { + lx = x[j] & 0xffffffffULL; + hx = x[j] >> 32; + // hasCarry - A flag to indicate if has carry. + // hasCarry == 0, no carry + // hasCarry == 1, has carry + // hasCarry == 2, no carry and the calculation result == 0. + uint8_t hasCarry = 0; + uint64_t resul = carry + lx * ly; + hasCarry = (resul < carry) ? 1 : 0; + carry = (hasCarry ? (1ULL << 32) : 0) + hx * ly + (resul >> 32); + hasCarry = (!carry && hasCarry) ? 1 : (!carry ? 2 : 0); + + carry += (lx * hy) & 0xffffffffULL; + resul = (carry << 32) | (resul & 0xffffffffULL); + dest[i+j] += resul; + carry = (((!carry && hasCarry != 2) || hasCarry == 1) ? (1ULL << 32) : 0)+ + (carry >> 32) + (dest[i+j] < resul ? 1 : 0) + + ((lx * hy) >> 32) + hx * hy; + } + dest[i+xlen] = carry; + } +} + +APInt& APInt::operator*=(const APInt& RHS) { + assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); + if (isSingleWord()) { + VAL *= RHS.VAL; + clearUnusedBits(); + return *this; + } + + // Get some bit facts about LHS and check for zero + unsigned lhsBits = getActiveBits(); + unsigned lhsWords = !lhsBits ? 0 : whichWord(lhsBits - 1) + 1; + if (!lhsWords) + // 0 * X ===> 0 + return *this; + + // Get some bit facts about RHS and check for zero + unsigned rhsBits = RHS.getActiveBits(); + unsigned rhsWords = !rhsBits ? 0 : whichWord(rhsBits - 1) + 1; + if (!rhsWords) { + // X * 0 ===> 0 + clear(); + return *this; + } + + // Allocate space for the result + unsigned destWords = rhsWords + lhsWords; + uint64_t *dest = getMemory(destWords); + + // Perform the long multiply + mul(dest, pVal, lhsWords, RHS.pVal, rhsWords); + + // Copy result back into *this + clear(); + unsigned wordsToCopy = destWords >= getNumWords() ? getNumWords() : destWords; + memcpy(pVal, dest, wordsToCopy * APINT_WORD_SIZE); + + // delete dest array and return + delete[] dest; + return *this; +} + +APInt& APInt::operator&=(const APInt& RHS) { + assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); + if (isSingleWord()) { + VAL &= RHS.VAL; + return *this; + } + unsigned numWords = getNumWords(); + for (unsigned i = 0; i < numWords; ++i) + pVal[i] &= RHS.pVal[i]; + return *this; +} + +APInt& APInt::operator|=(const APInt& RHS) { + assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); + if (isSingleWord()) { + VAL |= RHS.VAL; + return *this; + } + unsigned numWords = getNumWords(); + for (unsigned i = 0; i < numWords; ++i) + pVal[i] |= RHS.pVal[i]; + return *this; +} + +APInt& APInt::operator^=(const APInt& RHS) { + assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); + if (isSingleWord()) { + VAL ^= RHS.VAL; + this->clearUnusedBits(); + return *this; + } + unsigned numWords = getNumWords(); + for (unsigned i = 0; i < numWords; ++i) + pVal[i] ^= RHS.pVal[i]; + return clearUnusedBits(); +} + +APInt APInt::AndSlowCase(const APInt& RHS) const { + unsigned numWords = getNumWords(); + uint64_t* val = getMemory(numWords); + for (unsigned i = 0; i < numWords; ++i) + val[i] = pVal[i] & RHS.pVal[i]; + return APInt(val, getBitWidth()); +} + +APInt APInt::OrSlowCase(const APInt& RHS) const { + unsigned numWords = getNumWords(); + uint64_t *val = getMemory(numWords); + for (unsigned i = 0; i < numWords; ++i) + val[i] = pVal[i] | RHS.pVal[i]; + return APInt(val, getBitWidth()); +} + +APInt APInt::XorSlowCase(const APInt& RHS) const { + unsigned numWords = getNumWords(); + uint64_t *val = getMemory(numWords); + for (unsigned i = 0; i < numWords; ++i) + val[i] = pVal[i] ^ RHS.pVal[i]; + + // 0^0==1 so clear the high bits in case they got set. + return APInt(val, getBitWidth()).clearUnusedBits(); +} + +bool APInt::operator !() const { + if (isSingleWord()) + return !VAL; + + for (unsigned i = 0; i < getNumWords(); ++i) + if (pVal[i]) + return false; + return true; +} + +APInt APInt::operator*(const APInt& RHS) const { + assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); + if (isSingleWord()) + return APInt(BitWidth, VAL * RHS.VAL); + APInt Result(*this); + Result *= RHS; + return Result.clearUnusedBits(); +} + +APInt APInt::operator+(const APInt& RHS) const { + assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); + if (isSingleWord()) + return APInt(BitWidth, VAL + RHS.VAL); + APInt Result(BitWidth, 0); + add(Result.pVal, this->pVal, RHS.pVal, getNumWords()); + return Result.clearUnusedBits(); +} + +APInt APInt::operator-(const APInt& RHS) const { + assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); + if (isSingleWord()) + return APInt(BitWidth, VAL - RHS.VAL); + APInt Result(BitWidth, 0); + sub(Result.pVal, this->pVal, RHS.pVal, getNumWords()); + return Result.clearUnusedBits(); +} + +bool APInt::operator[](unsigned bitPosition) const { + return (maskBit(bitPosition) & + (isSingleWord() ? VAL : pVal[whichWord(bitPosition)])) != 0; +} + +bool APInt::EqualSlowCase(const APInt& RHS) const { + // Get some facts about the number of bits used in the two operands. + unsigned n1 = getActiveBits(); + unsigned n2 = RHS.getActiveBits(); + + // If the number of bits isn't the same, they aren't equal + if (n1 != n2) + return false; + + // If the number of bits fits in a word, we only need to compare the low word. + if (n1 <= APINT_BITS_PER_WORD) + return pVal[0] == RHS.pVal[0]; + + // Otherwise, compare everything + for (int i = whichWord(n1 - 1); i >= 0; --i) + if (pVal[i] != RHS.pVal[i]) + return false; + return true; +} + +bool APInt::EqualSlowCase(uint64_t Val) const { + unsigned n = getActiveBits(); + if (n <= APINT_BITS_PER_WORD) + return pVal[0] == Val; + else + return false; +} + +bool APInt::ult(const APInt& RHS) const { + assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison"); + if (isSingleWord()) + return VAL < RHS.VAL; + + // Get active bit length of both operands + unsigned n1 = getActiveBits(); + unsigned n2 = RHS.getActiveBits(); + + // If magnitude of LHS is less than RHS, return true. + if (n1 < n2) + return true; + + // If magnitude of RHS is greather than LHS, return false. + if (n2 < n1) + return false; + + // If they bot fit in a word, just compare the low order word + if (n1 <= APINT_BITS_PER_WORD && n2 <= APINT_BITS_PER_WORD) + return pVal[0] < RHS.pVal[0]; + + // Otherwise, compare all words + unsigned topWord = whichWord(std::max(n1,n2)-1); + for (int i = topWord; i >= 0; --i) { + if (pVal[i] > RHS.pVal[i]) + return false; + if (pVal[i] < RHS.pVal[i]) + return true; + } + return false; +} + +bool APInt::slt(const APInt& RHS) const { + assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison"); + if (isSingleWord()) { + int64_t lhsSext = (int64_t(VAL) << (64-BitWidth)) >> (64-BitWidth); + int64_t rhsSext = (int64_t(RHS.VAL) << (64-BitWidth)) >> (64-BitWidth); + return lhsSext < rhsSext; + } + + APInt lhs(*this); + APInt rhs(RHS); + bool lhsNeg = isNegative(); + bool rhsNeg = rhs.isNegative(); + if (lhsNeg) { + // Sign bit is set so perform two's complement to make it positive + lhs.flip(); + lhs++; + } + if (rhsNeg) { + // Sign bit is set so perform two's complement to make it positive + rhs.flip(); + rhs++; + } + + // Now we have unsigned values to compare so do the comparison if necessary + // based on the negativeness of the values. + if (lhsNeg) + if (rhsNeg) + return lhs.ugt(rhs); + else + return true; + else if (rhsNeg) + return false; + else + return lhs.ult(rhs); +} + +APInt& APInt::set(unsigned bitPosition) { + if (isSingleWord()) + VAL |= maskBit(bitPosition); + else + pVal[whichWord(bitPosition)] |= maskBit(bitPosition); + return *this; +} + +/// Set the given bit to 0 whose position is given as "bitPosition". +/// @brief Set a given bit to 0. +APInt& APInt::clear(unsigned bitPosition) { + if (isSingleWord()) + VAL &= ~maskBit(bitPosition); + else + pVal[whichWord(bitPosition)] &= ~maskBit(bitPosition); + return *this; +} + +/// @brief Toggle every bit to its opposite value. + +/// Toggle a given bit to its opposite value whose position is given +/// as "bitPosition". +/// @brief Toggles a given bit to its opposite value. +APInt& APInt::flip(unsigned bitPosition) { + assert(bitPosition < BitWidth && "Out of the bit-width range!"); + if ((*this)[bitPosition]) clear(bitPosition); + else set(bitPosition); + return *this; +} + +unsigned APInt::getBitsNeeded(const char* str, unsigned slen, uint8_t radix) { + assert(str != 0 && "Invalid value string"); + assert(slen > 0 && "Invalid string length"); + + // Each computation below needs to know if its negative + unsigned isNegative = str[0] == '-'; + if (isNegative) { + slen--; + str++; + } + // For radixes of power-of-two values, the bits required is accurately and + // easily computed + if (radix == 2) + return slen + isNegative; + if (radix == 8) + return slen * 3 + isNegative; + if (radix == 16) + return slen * 4 + isNegative; + + // Otherwise it must be radix == 10, the hard case + assert(radix == 10 && "Invalid radix"); + + // This is grossly inefficient but accurate. We could probably do something + // with a computation of roughly slen*64/20 and then adjust by the value of + // the first few digits. But, I'm not sure how accurate that could be. + + // Compute a sufficient number of bits that is always large enough but might + // be too large. This avoids the assertion in the constructor. + unsigned sufficient = slen*64/18; + + // Convert to the actual binary value. + APInt tmp(sufficient, str, slen, radix); + + // Compute how many bits are required. + return isNegative + tmp.logBase2() + 1; +} + +// From http://www.burtleburtle.net, byBob Jenkins. +// When targeting x86, both GCC and LLVM seem to recognize this as a +// rotate instruction. +#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k)))) + +// From http://www.burtleburtle.net, by Bob Jenkins. +#define mix(a,b,c) \ + { \ + a -= c; a ^= rot(c, 4); c += b; \ + b -= a; b ^= rot(a, 6); a += c; \ + c -= b; c ^= rot(b, 8); b += a; \ + a -= c; a ^= rot(c,16); c += b; \ + b -= a; b ^= rot(a,19); a += c; \ + c -= b; c ^= rot(b, 4); b += a; \ + } + +// From http://www.burtleburtle.net, by Bob Jenkins. +#define final(a,b,c) \ + { \ + c ^= b; c -= rot(b,14); \ + a ^= c; a -= rot(c,11); \ + b ^= a; b -= rot(a,25); \ + c ^= b; c -= rot(b,16); \ + a ^= c; a -= rot(c,4); \ + b ^= a; b -= rot(a,14); \ + c ^= b; c -= rot(b,24); \ + } + +// hashword() was adapted from http://www.burtleburtle.net, by Bob +// Jenkins. k is a pointer to an array of uint32_t values; length is +// the length of the key, in 32-bit chunks. This version only handles +// keys that are a multiple of 32 bits in size. +static inline uint32_t hashword(const uint64_t *k64, size_t length) +{ + const uint32_t *k = reinterpret_cast(k64); + uint32_t a,b,c; + + /* Set up the internal state */ + a = b = c = 0xdeadbeef + (((uint32_t)length)<<2); + + /*------------------------------------------------- handle most of the key */ + while (length > 3) + { + a += k[0]; + b += k[1]; + c += k[2]; + mix(a,b,c); + length -= 3; + k += 3; + } + + /*------------------------------------------- handle the last 3 uint32_t's */ + switch (length) { /* all the case statements fall through */ + case 3 : c+=k[2]; + case 2 : b+=k[1]; + case 1 : a+=k[0]; + final(a,b,c); + case 0: /* case 0: nothing left to add */ + break; + } + /*------------------------------------------------------ report the result */ + return c; +} + +// hashword8() was adapted from http://www.burtleburtle.net, by Bob +// Jenkins. This computes a 32-bit hash from one 64-bit word. When +// targeting x86 (32 or 64 bit), both LLVM and GCC compile this +// function into about 35 instructions when inlined. +static inline uint32_t hashword8(const uint64_t k64) +{ + uint32_t a,b,c; + a = b = c = 0xdeadbeef + 4; + b += k64 >> 32; + a += k64 & 0xffffffff; + final(a,b,c); + return c; +} +#undef final +#undef mix +#undef rot + +uint64_t APInt::getHashValue() const { + uint64_t hash; + if (isSingleWord()) + hash = hashword8(VAL); + else + hash = hashword(pVal, getNumWords()*2); + return hash; +} + +/// HiBits - This function returns the high "numBits" bits of this APInt. +APInt APInt::getHiBits(unsigned numBits) const { + return APIntOps::lshr(*this, BitWidth - numBits); +} + +/// LoBits - This function returns the low "numBits" bits of this APInt. +APInt APInt::getLoBits(unsigned numBits) const { + return APIntOps::lshr(APIntOps::shl(*this, BitWidth - numBits), + BitWidth - numBits); +} + +bool APInt::isPowerOf2() const { + return (!!*this) && !(*this & (*this - APInt(BitWidth,1))); +} + +unsigned APInt::countLeadingZerosSlowCase() const { + unsigned Count = 0; + for (unsigned i = getNumWords(); i > 0u; --i) { + if (pVal[i-1] == 0) + Count += APINT_BITS_PER_WORD; + else { + Count += CountLeadingZeros_64(pVal[i-1]); + break; + } + } + unsigned remainder = BitWidth % APINT_BITS_PER_WORD; + if (remainder) + Count -= APINT_BITS_PER_WORD - remainder; + return std::min(Count, BitWidth); +} + +static unsigned countLeadingOnes_64(uint64_t V, unsigned skip) { + unsigned Count = 0; + if (skip) + V <<= skip; + while (V && (V & (1ULL << 63))) { + Count++; + V <<= 1; + } + return Count; +} + +unsigned APInt::countLeadingOnes() const { + if (isSingleWord()) + return countLeadingOnes_64(VAL, APINT_BITS_PER_WORD - BitWidth); + + unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD; + unsigned shift; + if (!highWordBits) { + highWordBits = APINT_BITS_PER_WORD; + shift = 0; + } else { + shift = APINT_BITS_PER_WORD - highWordBits; + } + int i = getNumWords() - 1; + unsigned Count = countLeadingOnes_64(pVal[i], shift); + if (Count == highWordBits) { + for (i--; i >= 0; --i) { + if (pVal[i] == -1ULL) + Count += APINT_BITS_PER_WORD; + else { + Count += countLeadingOnes_64(pVal[i], 0); + break; + } + } + } + return Count; +} + +unsigned APInt::countTrailingZeros() const { + if (isSingleWord()) + return std::min(unsigned(CountTrailingZeros_64(VAL)), BitWidth); + unsigned Count = 0; + unsigned i = 0; + for (; i < getNumWords() && pVal[i] == 0; ++i) + Count += APINT_BITS_PER_WORD; + if (i < getNumWords()) + Count += CountTrailingZeros_64(pVal[i]); + return std::min(Count, BitWidth); +} + +unsigned APInt::countTrailingOnesSlowCase() const { + unsigned Count = 0; + unsigned i = 0; + for (; i < getNumWords() && pVal[i] == -1ULL; ++i) + Count += APINT_BITS_PER_WORD; + if (i < getNumWords()) + Count += CountTrailingOnes_64(pVal[i]); + return std::min(Count, BitWidth); +} + +unsigned APInt::countPopulationSlowCase() const { + unsigned Count = 0; + for (unsigned i = 0; i < getNumWords(); ++i) + Count += CountPopulation_64(pVal[i]); + return Count; +} + +APInt APInt::byteSwap() const { + assert(BitWidth >= 16 && BitWidth % 16 == 0 && "Cannot byteswap!"); + if (BitWidth == 16) + return APInt(BitWidth, ByteSwap_16(uint16_t(VAL))); + else if (BitWidth == 32) + return APInt(BitWidth, ByteSwap_32(unsigned(VAL))); + else if (BitWidth == 48) { + unsigned Tmp1 = unsigned(VAL >> 16); + Tmp1 = ByteSwap_32(Tmp1); + uint16_t Tmp2 = uint16_t(VAL); + Tmp2 = ByteSwap_16(Tmp2); + return APInt(BitWidth, (uint64_t(Tmp2) << 32) | Tmp1); + } else if (BitWidth == 64) + return APInt(BitWidth, ByteSwap_64(VAL)); + else { + APInt Result(BitWidth, 0); + char *pByte = (char*)Result.pVal; + for (unsigned i = 0; i < BitWidth / APINT_WORD_SIZE / 2; ++i) { + char Tmp = pByte[i]; + pByte[i] = pByte[BitWidth / APINT_WORD_SIZE - 1 - i]; + pByte[BitWidth / APINT_WORD_SIZE - i - 1] = Tmp; + } + return Result; + } +} + +APInt llvm::APIntOps::GreatestCommonDivisor(const APInt& API1, + const APInt& API2) { + APInt A = API1, B = API2; + while (!!B) { + APInt T = B; + B = APIntOps::urem(A, B); + A = T; + } + return A; +} + +APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) { + union { + double D; + uint64_t I; + } T; + T.D = Double; + + // Get the sign bit from the highest order bit + bool isNeg = T.I >> 63; + + // Get the 11-bit exponent and adjust for the 1023 bit bias + int64_t exp = ((T.I >> 52) & 0x7ff) - 1023; + + // If the exponent is negative, the value is < 0 so just return 0. + if (exp < 0) + return APInt(width, 0u); + + // Extract the mantissa by clearing the top 12 bits (sign + exponent). + uint64_t mantissa = (T.I & (~0ULL >> 12)) | 1ULL << 52; + + // If the exponent doesn't shift all bits out of the mantissa + if (exp < 52) + return isNeg ? -APInt(width, mantissa >> (52 - exp)) : + APInt(width, mantissa >> (52 - exp)); + + // If the client didn't provide enough bits for us to shift the mantissa into + // then the result is undefined, just return 0 + if (width <= exp - 52) + return APInt(width, 0); + + // Otherwise, we have to shift the mantissa bits up to the right location + APInt Tmp(width, mantissa); + Tmp = Tmp.shl((unsigned)exp - 52); + return isNeg ? -Tmp : Tmp; +} + +/// RoundToDouble - This function convert this APInt to a double. +/// The layout for double is as following (IEEE Standard 754): +/// -------------------------------------- +/// | Sign Exponent Fraction Bias | +/// |-------------------------------------- | +/// | 1[63] 11[62-52] 52[51-00] 1023 | +/// -------------------------------------- +double APInt::roundToDouble(bool isSigned) const { + + // Handle the simple case where the value is contained in one uint64_t. + if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD) { + if (isSigned) { + int64_t sext = (int64_t(VAL) << (64-BitWidth)) >> (64-BitWidth); + return double(sext); + } else + return double(VAL); + } + + // Determine if the value is negative. + bool isNeg = isSigned ? (*this)[BitWidth-1] : false; + + // Construct the absolute value if we're negative. + APInt Tmp(isNeg ? -(*this) : (*this)); + + // Figure out how many bits we're using. + unsigned n = Tmp.getActiveBits(); + + // The exponent (without bias normalization) is just the number of bits + // we are using. Note that the sign bit is gone since we constructed the + // absolute value. + uint64_t exp = n; + + // Return infinity for exponent overflow + if (exp > 1023) { + if (!isSigned || !isNeg) + return std::numeric_limits::infinity(); + else + return -std::numeric_limits::infinity(); + } + exp += 1023; // Increment for 1023 bias + + // Number of bits in mantissa is 52. To obtain the mantissa value, we must + // extract the high 52 bits from the correct words in pVal. + uint64_t mantissa; + unsigned hiWord = whichWord(n-1); + if (hiWord == 0) { + mantissa = Tmp.pVal[0]; + if (n > 52) + mantissa >>= n - 52; // shift down, we want the top 52 bits. + } else { + assert(hiWord > 0 && "huh?"); + uint64_t hibits = Tmp.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD); + uint64_t lobits = Tmp.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD); + mantissa = hibits | lobits; + } + + // The leading bit of mantissa is implicit, so get rid of it. + uint64_t sign = isNeg ? (1ULL << (APINT_BITS_PER_WORD - 1)) : 0; + union { + double D; + uint64_t I; + } T; + T.I = sign | (exp << 52) | mantissa; + return T.D; +} + +// Truncate to new width. +APInt &APInt::trunc(unsigned width) { + assert(width < BitWidth && "Invalid APInt Truncate request"); + assert(width && "Can't truncate to 0 bits"); + unsigned wordsBefore = getNumWords(); + BitWidth = width; + unsigned wordsAfter = getNumWords(); + if (wordsBefore != wordsAfter) { + if (wordsAfter == 1) { + uint64_t *tmp = pVal; + VAL = pVal[0]; + delete [] tmp; + } else { + uint64_t *newVal = getClearedMemory(wordsAfter); + for (unsigned i = 0; i < wordsAfter; ++i) + newVal[i] = pVal[i]; + delete [] pVal; + pVal = newVal; + } + } + return clearUnusedBits(); +} + +// Sign extend to a new width. +APInt &APInt::sext(unsigned width) { + assert(width > BitWidth && "Invalid APInt SignExtend request"); + // If the sign bit isn't set, this is the same as zext. + if (!isNegative()) { + zext(width); + return *this; + } + + // The sign bit is set. First, get some facts + unsigned wordsBefore = getNumWords(); + unsigned wordBits = BitWidth % APINT_BITS_PER_WORD; + BitWidth = width; + unsigned wordsAfter = getNumWords(); + + // Mask the high order word appropriately + if (wordsBefore == wordsAfter) { + unsigned newWordBits = width % APINT_BITS_PER_WORD; + // The extension is contained to the wordsBefore-1th word. + uint64_t mask = ~0ULL; + if (newWordBits) + mask >>= APINT_BITS_PER_WORD - newWordBits; + mask <<= wordBits; + if (wordsBefore == 1) + VAL |= mask; + else + pVal[wordsBefore-1] |= mask; + return clearUnusedBits(); + } + + uint64_t mask = wordBits == 0 ? 0 : ~0ULL << wordBits; + uint64_t *newVal = getMemory(wordsAfter); + if (wordsBefore == 1) + newVal[0] = VAL | mask; + else { + for (unsigned i = 0; i < wordsBefore; ++i) + newVal[i] = pVal[i]; + newVal[wordsBefore-1] |= mask; + } + for (unsigned i = wordsBefore; i < wordsAfter; i++) + newVal[i] = -1ULL; + if (wordsBefore != 1) + delete [] pVal; + pVal = newVal; + return clearUnusedBits(); +} + +// Zero extend to a new width. +APInt &APInt::zext(unsigned width) { + assert(width > BitWidth && "Invalid APInt ZeroExtend request"); + unsigned wordsBefore = getNumWords(); + BitWidth = width; + unsigned wordsAfter = getNumWords(); + if (wordsBefore != wordsAfter) { + uint64_t *newVal = getClearedMemory(wordsAfter); + if (wordsBefore == 1) + newVal[0] = VAL; + else + for (unsigned i = 0; i < wordsBefore; ++i) + newVal[i] = pVal[i]; + if (wordsBefore != 1) + delete [] pVal; + pVal = newVal; + } + return *this; +} + +APInt &APInt::zextOrTrunc(unsigned width) { + if (BitWidth < width) + return zext(width); + if (BitWidth > width) + return trunc(width); + return *this; +} + +APInt &APInt::sextOrTrunc(unsigned width) { + if (BitWidth < width) + return sext(width); + if (BitWidth > width) + return trunc(width); + return *this; +} + +/// Arithmetic right-shift this APInt by shiftAmt. +/// @brief Arithmetic right-shift function. +APInt APInt::ashr(const APInt &shiftAmt) const { + return ashr((unsigned)shiftAmt.getLimitedValue(BitWidth)); +} + +/// Arithmetic right-shift this APInt by shiftAmt. +/// @brief Arithmetic right-shift function. +APInt APInt::ashr(unsigned shiftAmt) const { + assert(shiftAmt <= BitWidth && "Invalid shift amount"); + // Handle a degenerate case + if (shiftAmt == 0) + return *this; + + // Handle single word shifts with built-in ashr + if (isSingleWord()) { + if (shiftAmt == BitWidth) + return APInt(BitWidth, 0); // undefined + else { + unsigned SignBit = APINT_BITS_PER_WORD - BitWidth; + return APInt(BitWidth, + (((int64_t(VAL) << SignBit) >> SignBit) >> shiftAmt)); + } + } + + // If all the bits were shifted out, the result is, technically, undefined. + // We return -1 if it was negative, 0 otherwise. We check this early to avoid + // issues in the algorithm below. + if (shiftAmt == BitWidth) { + if (isNegative()) + return APInt(BitWidth, -1ULL, true); + else + return APInt(BitWidth, 0); + } + + // Create some space for the result. + uint64_t * val = new uint64_t[getNumWords()]; + + // Compute some values needed by the following shift algorithms + unsigned wordShift = shiftAmt % APINT_BITS_PER_WORD; // bits to shift per word + unsigned offset = shiftAmt / APINT_BITS_PER_WORD; // word offset for shift + unsigned breakWord = getNumWords() - 1 - offset; // last word affected + unsigned bitsInWord = whichBit(BitWidth); // how many bits in last word? + if (bitsInWord == 0) + bitsInWord = APINT_BITS_PER_WORD; + + // If we are shifting whole words, just move whole words + if (wordShift == 0) { + // Move the words containing significant bits + for (unsigned i = 0; i <= breakWord; ++i) + val[i] = pVal[i+offset]; // move whole word + + // Adjust the top significant word for sign bit fill, if negative + if (isNegative()) + if (bitsInWord < APINT_BITS_PER_WORD) + val[breakWord] |= ~0ULL << bitsInWord; // set high bits + } else { + // Shift the low order words + for (unsigned i = 0; i < breakWord; ++i) { + // This combines the shifted corresponding word with the low bits from + // the next word (shifted into this word's high bits). + val[i] = (pVal[i+offset] >> wordShift) | + (pVal[i+offset+1] << (APINT_BITS_PER_WORD - wordShift)); + } + + // Shift the break word. In this case there are no bits from the next word + // to include in this word. + val[breakWord] = pVal[breakWord+offset] >> wordShift; + + // Deal with sign extenstion in the break word, and possibly the word before + // it. + if (isNegative()) { + if (wordShift > bitsInWord) { + if (breakWord > 0) + val[breakWord-1] |= + ~0ULL << (APINT_BITS_PER_WORD - (wordShift - bitsInWord)); + val[breakWord] |= ~0ULL; + } else + val[breakWord] |= (~0ULL << (bitsInWord - wordShift)); + } + } + + // Remaining words are 0 or -1, just assign them. + uint64_t fillValue = (isNegative() ? -1ULL : 0); + for (unsigned i = breakWord+1; i < getNumWords(); ++i) + val[i] = fillValue; + return APInt(val, BitWidth).clearUnusedBits(); +} + +/// Logical right-shift this APInt by shiftAmt. +/// @brief Logical right-shift function. +APInt APInt::lshr(const APInt &shiftAmt) const { + return lshr((unsigned)shiftAmt.getLimitedValue(BitWidth)); +} + +/// Logical right-shift this APInt by shiftAmt. +/// @brief Logical right-shift function. +APInt APInt::lshr(unsigned shiftAmt) const { + if (isSingleWord()) { + if (shiftAmt == BitWidth) + return APInt(BitWidth, 0); + else + return APInt(BitWidth, this->VAL >> shiftAmt); + } + + // If all the bits were shifted out, the result is 0. This avoids issues + // with shifting by the size of the integer type, which produces undefined + // results. We define these "undefined results" to always be 0. + if (shiftAmt == BitWidth) + return APInt(BitWidth, 0); + + // If none of the bits are shifted out, the result is *this. This avoids + // issues with shifting by the size of the integer type, which produces + // undefined results in the code below. This is also an optimization. + if (shiftAmt == 0) + return *this; + + // Create some space for the result. + uint64_t * val = new uint64_t[getNumWords()]; + + // If we are shifting less than a word, compute the shift with a simple carry + if (shiftAmt < APINT_BITS_PER_WORD) { + uint64_t carry = 0; + for (int i = getNumWords()-1; i >= 0; --i) { + val[i] = (pVal[i] >> shiftAmt) | carry; + carry = pVal[i] << (APINT_BITS_PER_WORD - shiftAmt); + } + return APInt(val, BitWidth).clearUnusedBits(); + } + + // Compute some values needed by the remaining shift algorithms + unsigned wordShift = shiftAmt % APINT_BITS_PER_WORD; + unsigned offset = shiftAmt / APINT_BITS_PER_WORD; + + // If we are shifting whole words, just move whole words + if (wordShift == 0) { + for (unsigned i = 0; i < getNumWords() - offset; ++i) + val[i] = pVal[i+offset]; + for (unsigned i = getNumWords()-offset; i < getNumWords(); i++) + val[i] = 0; + return APInt(val,BitWidth).clearUnusedBits(); + } + + // Shift the low order words + unsigned breakWord = getNumWords() - offset -1; + for (unsigned i = 0; i < breakWord; ++i) + val[i] = (pVal[i+offset] >> wordShift) | + (pVal[i+offset+1] << (APINT_BITS_PER_WORD - wordShift)); + // Shift the break word. + val[breakWord] = pVal[breakWord+offset] >> wordShift; + + // Remaining words are 0 + for (unsigned i = breakWord+1; i < getNumWords(); ++i) + val[i] = 0; + return APInt(val, BitWidth).clearUnusedBits(); +} + +/// Left-shift this APInt by shiftAmt. +/// @brief Left-shift function. +APInt APInt::shl(const APInt &shiftAmt) const { + // It's undefined behavior in C to shift by BitWidth or greater. + return shl((unsigned)shiftAmt.getLimitedValue(BitWidth)); +} + +APInt APInt::shlSlowCase(unsigned shiftAmt) const { + // If all the bits were shifted out, the result is 0. This avoids issues + // with shifting by the size of the integer type, which produces undefined + // results. We define these "undefined results" to always be 0. + if (shiftAmt == BitWidth) + return APInt(BitWidth, 0); + + // If none of the bits are shifted out, the result is *this. This avoids a + // lshr by the words size in the loop below which can produce incorrect + // results. It also avoids the expensive computation below for a common case. + if (shiftAmt == 0) + return *this; + + // Create some space for the result. + uint64_t * val = new uint64_t[getNumWords()]; + + // If we are shifting less than a word, do it the easy way + if (shiftAmt < APINT_BITS_PER_WORD) { + uint64_t carry = 0; + for (unsigned i = 0; i < getNumWords(); i++) { + val[i] = pVal[i] << shiftAmt | carry; + carry = pVal[i] >> (APINT_BITS_PER_WORD - shiftAmt); + } + return APInt(val, BitWidth).clearUnusedBits(); + } + + // Compute some values needed by the remaining shift algorithms + unsigned wordShift = shiftAmt % APINT_BITS_PER_WORD; + unsigned offset = shiftAmt / APINT_BITS_PER_WORD; + + // If we are shifting whole words, just move whole words + if (wordShift == 0) { + for (unsigned i = 0; i < offset; i++) + val[i] = 0; + for (unsigned i = offset; i < getNumWords(); i++) + val[i] = pVal[i-offset]; + return APInt(val,BitWidth).clearUnusedBits(); + } + + // Copy whole words from this to Result. + unsigned i = getNumWords() - 1; + for (; i > offset; --i) + val[i] = pVal[i-offset] << wordShift | + pVal[i-offset-1] >> (APINT_BITS_PER_WORD - wordShift); + val[offset] = pVal[0] << wordShift; + for (i = 0; i < offset; ++i) + val[i] = 0; + return APInt(val, BitWidth).clearUnusedBits(); +} + +APInt APInt::rotl(const APInt &rotateAmt) const { + return rotl((unsigned)rotateAmt.getLimitedValue(BitWidth)); +} + +APInt APInt::rotl(unsigned rotateAmt) const { + if (rotateAmt == 0) + return *this; + // Don't get too fancy, just use existing shift/or facilities + APInt hi(*this); + APInt lo(*this); + hi.shl(rotateAmt); + lo.lshr(BitWidth - rotateAmt); + return hi | lo; +} + +APInt APInt::rotr(const APInt &rotateAmt) const { + return rotr((unsigned)rotateAmt.getLimitedValue(BitWidth)); +} + +APInt APInt::rotr(unsigned rotateAmt) const { + if (rotateAmt == 0) + return *this; + // Don't get too fancy, just use existing shift/or facilities + APInt hi(*this); + APInt lo(*this); + lo.lshr(rotateAmt); + hi.shl(BitWidth - rotateAmt); + return hi | lo; +} + +// Square Root - this method computes and returns the square root of "this". +// Three mechanisms are used for computation. For small values (<= 5 bits), +// a table lookup is done. This gets some performance for common cases. For +// values using less than 52 bits, the value is converted to double and then +// the libc sqrt function is called. The result is rounded and then converted +// back to a uint64_t which is then used to construct the result. Finally, +// the Babylonian method for computing square roots is used. +APInt APInt::sqrt() const { + + // Determine the magnitude of the value. + unsigned magnitude = getActiveBits(); + + // Use a fast table for some small values. This also gets rid of some + // rounding errors in libc sqrt for small values. + if (magnitude <= 5) { + static const uint8_t results[32] = { + /* 0 */ 0, + /* 1- 2 */ 1, 1, + /* 3- 6 */ 2, 2, 2, 2, + /* 7-12 */ 3, 3, 3, 3, 3, 3, + /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4, + /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + /* 31 */ 6 + }; + return APInt(BitWidth, results[ (isSingleWord() ? VAL : pVal[0]) ]); + } + + // If the magnitude of the value fits in less than 52 bits (the precision of + // an IEEE double precision floating point value), then we can use the + // libc sqrt function which will probably use a hardware sqrt computation. + // This should be faster than the algorithm below. + if (magnitude < 52) { +#ifdef _MSC_VER + // Amazingly, VC++ doesn't have round(). + return APInt(BitWidth, + uint64_t(::sqrt(double(isSingleWord()?VAL:pVal[0]))) + 0.5); +#else + return APInt(BitWidth, + uint64_t(::round(::sqrt(double(isSingleWord()?VAL:pVal[0]))))); +#endif + } + + // Okay, all the short cuts are exhausted. We must compute it. The following + // is a classical Babylonian method for computing the square root. This code + // was adapted to APINt from a wikipedia article on such computations. + // See http://www.wikipedia.org/ and go to the page named + // Calculate_an_integer_square_root. + unsigned nbits = BitWidth, i = 4; + APInt testy(BitWidth, 16); + APInt x_old(BitWidth, 1); + APInt x_new(BitWidth, 0); + APInt two(BitWidth, 2); + + // Select a good starting value using binary logarithms. + for (;; i += 2, testy = testy.shl(2)) + if (i >= nbits || this->ule(testy)) { + x_old = x_old.shl(i / 2); + break; + } + + // Use the Babylonian method to arrive at the integer square root: + for (;;) { + x_new = (this->udiv(x_old) + x_old).udiv(two); + if (x_old.ule(x_new)) + break; + x_old = x_new; + } + + // Make sure we return the closest approximation + // NOTE: The rounding calculation below is correct. It will produce an + // off-by-one discrepancy with results from pari/gp. That discrepancy has been + // determined to be a rounding issue with pari/gp as it begins to use a + // floating point representation after 192 bits. There are no discrepancies + // between this algorithm and pari/gp for bit widths < 192 bits. + APInt square(x_old * x_old); + APInt nextSquare((x_old + 1) * (x_old +1)); + if (this->ult(square)) + return x_old; + else if (this->ule(nextSquare)) { + APInt midpoint((nextSquare - square).udiv(two)); + APInt offset(*this - square); + if (offset.ult(midpoint)) + return x_old; + else + return x_old + 1; + } else + assert(0 && "Error in APInt::sqrt computation"); + return x_old + 1; +} + +/// Computes the multiplicative inverse of this APInt for a given modulo. The +/// iterative extended Euclidean algorithm is used to solve for this value, +/// however we simplify it to speed up calculating only the inverse, and take +/// advantage of div+rem calculations. We also use some tricks to avoid copying +/// (potentially large) APInts around. +APInt APInt::multiplicativeInverse(const APInt& modulo) const { + assert(ult(modulo) && "This APInt must be smaller than the modulo"); + + // Using the properties listed at the following web page (accessed 06/21/08): + // http://www.numbertheory.org/php/euclid.html + // (especially the properties numbered 3, 4 and 9) it can be proved that + // BitWidth bits suffice for all the computations in the algorithm implemented + // below. More precisely, this number of bits suffice if the multiplicative + // inverse exists, but may not suffice for the general extended Euclidean + // algorithm. + + APInt r[2] = { modulo, *this }; + APInt t[2] = { APInt(BitWidth, 0), APInt(BitWidth, 1) }; + APInt q(BitWidth, 0); + + unsigned i; + for (i = 0; r[i^1] != 0; i ^= 1) { + // An overview of the math without the confusing bit-flipping: + // q = r[i-2] / r[i-1] + // r[i] = r[i-2] % r[i-1] + // t[i] = t[i-2] - t[i-1] * q + udivrem(r[i], r[i^1], q, r[i]); + t[i] -= t[i^1] * q; + } + + // If this APInt and the modulo are not coprime, there is no multiplicative + // inverse, so return 0. We check this by looking at the next-to-last + // remainder, which is the gcd(*this,modulo) as calculated by the Euclidean + // algorithm. + if (r[i] != 1) + return APInt(BitWidth, 0); + + // The next-to-last t is the multiplicative inverse. However, we are + // interested in a positive inverse. Calcuate a positive one from a negative + // one if necessary. A simple addition of the modulo suffices because + // abs(t[i]) is known to be less than *this/2 (see the link above). + return t[i].isNegative() ? t[i] + modulo : t[i]; +} + +/// Calculate the magic numbers required to implement a signed integer division +/// by a constant as a sequence of multiplies, adds and shifts. Requires that +/// the divisor not be 0, 1, or -1. Taken from "Hacker's Delight", Henry S. +/// Warren, Jr., chapter 10. +APInt::ms APInt::magic() const { + const APInt& d = *this; + unsigned p; + APInt ad, anc, delta, q1, r1, q2, r2, t; + APInt allOnes = APInt::getAllOnesValue(d.getBitWidth()); + APInt signedMin = APInt::getSignedMinValue(d.getBitWidth()); + APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth()); + struct ms mag; + + ad = d.abs(); + t = signedMin + (d.lshr(d.getBitWidth() - 1)); + anc = t - 1 - t.urem(ad); // absolute value of nc + p = d.getBitWidth() - 1; // initialize p + q1 = signedMin.udiv(anc); // initialize q1 = 2p/abs(nc) + r1 = signedMin - q1*anc; // initialize r1 = rem(2p,abs(nc)) + q2 = signedMin.udiv(ad); // initialize q2 = 2p/abs(d) + r2 = signedMin - q2*ad; // initialize r2 = rem(2p,abs(d)) + do { + p = p + 1; + q1 = q1<<1; // update q1 = 2p/abs(nc) + r1 = r1<<1; // update r1 = rem(2p/abs(nc)) + if (r1.uge(anc)) { // must be unsigned comparison + q1 = q1 + 1; + r1 = r1 - anc; + } + q2 = q2<<1; // update q2 = 2p/abs(d) + r2 = r2<<1; // update r2 = rem(2p/abs(d)) + if (r2.uge(ad)) { // must be unsigned comparison + q2 = q2 + 1; + r2 = r2 - ad; + } + delta = ad - r2; + } while (q1.ule(delta) || (q1 == delta && r1 == 0)); + + mag.m = q2 + 1; + if (d.isNegative()) mag.m = -mag.m; // resulting magic number + mag.s = p - d.getBitWidth(); // resulting shift + return mag; +} + +/// Calculate the magic numbers required to implement an unsigned integer +/// division by a constant as a sequence of multiplies, adds and shifts. +/// Requires that the divisor not be 0. Taken from "Hacker's Delight", Henry +/// S. Warren, Jr., chapter 10. +APInt::mu APInt::magicu() const { + const APInt& d = *this; + unsigned p; + APInt nc, delta, q1, r1, q2, r2; + struct mu magu; + magu.a = 0; // initialize "add" indicator + APInt allOnes = APInt::getAllOnesValue(d.getBitWidth()); + APInt signedMin = APInt::getSignedMinValue(d.getBitWidth()); + APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth()); + + nc = allOnes - (-d).urem(d); + p = d.getBitWidth() - 1; // initialize p + q1 = signedMin.udiv(nc); // initialize q1 = 2p/nc + r1 = signedMin - q1*nc; // initialize r1 = rem(2p,nc) + q2 = signedMax.udiv(d); // initialize q2 = (2p-1)/d + r2 = signedMax - q2*d; // initialize r2 = rem((2p-1),d) + do { + p = p + 1; + if (r1.uge(nc - r1)) { + q1 = q1 + q1 + 1; // update q1 + r1 = r1 + r1 - nc; // update r1 + } + else { + q1 = q1+q1; // update q1 + r1 = r1+r1; // update r1 + } + if ((r2 + 1).uge(d - r2)) { + if (q2.uge(signedMax)) magu.a = 1; + q2 = q2+q2 + 1; // update q2 + r2 = r2+r2 + 1 - d; // update r2 + } + else { + if (q2.uge(signedMin)) magu.a = 1; + q2 = q2+q2; // update q2 + r2 = r2+r2 + 1; // update r2 + } + delta = d - 1 - r2; + } while (p < d.getBitWidth()*2 && + (q1.ult(delta) || (q1 == delta && r1 == 0))); + magu.m = q2 + 1; // resulting magic number + magu.s = p - d.getBitWidth(); // resulting shift + return magu; +} + +/// Implementation of Knuth's Algorithm D (Division of nonnegative integers) +/// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The +/// variables here have the same names as in the algorithm. Comments explain +/// the algorithm and any deviation from it. +static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r, + unsigned m, unsigned n) { + assert(u && "Must provide dividend"); + assert(v && "Must provide divisor"); + assert(q && "Must provide quotient"); + assert(u != v && u != q && v != q && "Must us different memory"); + assert(n>1 && "n must be > 1"); + + // Knuth uses the value b as the base of the number system. In our case b + // is 2^31 so we just set it to -1u. + uint64_t b = uint64_t(1) << 32; + +#if 0 + DEBUG(cerr << "KnuthDiv: m=" << m << " n=" << n << '\n'); + DEBUG(cerr << "KnuthDiv: original:"); + DEBUG(for (int i = m+n; i >=0; i--) cerr << " " << std::setbase(16) << u[i]); + DEBUG(cerr << " by"); + DEBUG(for (int i = n; i >0; i--) cerr << " " << std::setbase(16) << v[i-1]); + DEBUG(cerr << '\n'); +#endif + // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of + // u and v by d. Note that we have taken Knuth's advice here to use a power + // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of + // 2 allows us to shift instead of multiply and it is easy to determine the + // shift amount from the leading zeros. We are basically normalizing the u + // and v so that its high bits are shifted to the top of v's range without + // overflow. Note that this can require an extra word in u so that u must + // be of length m+n+1. + unsigned shift = CountLeadingZeros_32(v[n-1]); + unsigned v_carry = 0; + unsigned u_carry = 0; + if (shift) { + for (unsigned i = 0; i < m+n; ++i) { + unsigned u_tmp = u[i] >> (32 - shift); + u[i] = (u[i] << shift) | u_carry; + u_carry = u_tmp; + } + for (unsigned i = 0; i < n; ++i) { + unsigned v_tmp = v[i] >> (32 - shift); + v[i] = (v[i] << shift) | v_carry; + v_carry = v_tmp; + } + } + u[m+n] = u_carry; +#if 0 + DEBUG(cerr << "KnuthDiv: normal:"); + DEBUG(for (int i = m+n; i >=0; i--) cerr << " " << std::setbase(16) << u[i]); + DEBUG(cerr << " by"); + DEBUG(for (int i = n; i >0; i--) cerr << " " << std::setbase(16) << v[i-1]); + DEBUG(cerr << '\n'); +#endif + + // D2. [Initialize j.] Set j to m. This is the loop counter over the places. + int j = m; + do { + DEBUG(cerr << "KnuthDiv: quotient digit #" << j << '\n'); + // D3. [Calculate q'.]. + // Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q') + // Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r') + // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease + // qp by 1, inrease rp by v[n-1], and repeat this test if rp < b. The test + // on v[n-2] determines at high speed most of the cases in which the trial + // value qp is one too large, and it eliminates all cases where qp is two + // too large. + uint64_t dividend = ((uint64_t(u[j+n]) << 32) + u[j+n-1]); + DEBUG(cerr << "KnuthDiv: dividend == " << dividend << '\n'); + uint64_t qp = dividend / v[n-1]; + uint64_t rp = dividend % v[n-1]; + if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) { + qp--; + rp += v[n-1]; + if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2])) + qp--; + } + DEBUG(cerr << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n'); + + // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with + // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation + // consists of a simple multiplication by a one-place number, combined with + // a subtraction. + bool isNeg = false; + for (unsigned i = 0; i < n; ++i) { + uint64_t u_tmp = uint64_t(u[j+i]) | (uint64_t(u[j+i+1]) << 32); + uint64_t subtrahend = uint64_t(qp) * uint64_t(v[i]); + bool borrow = subtrahend > u_tmp; + DEBUG(cerr << "KnuthDiv: u_tmp == " << u_tmp + << ", subtrahend == " << subtrahend + << ", borrow = " << borrow << '\n'); + + uint64_t result = u_tmp - subtrahend; + unsigned k = j + i; + u[k++] = (unsigned)(result & (b-1)); // subtract low word + u[k++] = (unsigned)(result >> 32); // subtract high word + while (borrow && k <= m+n) { // deal with borrow to the left + borrow = u[k] == 0; + u[k]--; + k++; + } + isNeg |= borrow; + DEBUG(cerr << "KnuthDiv: u[j+i] == " << u[j+i] << ", u[j+i+1] == " << + u[j+i+1] << '\n'); + } + DEBUG(cerr << "KnuthDiv: after subtraction:"); + DEBUG(for (int i = m+n; i >=0; i--) cerr << " " << u[i]); + DEBUG(cerr << '\n'); + // The digits (u[j+n]...u[j]) should be kept positive; if the result of + // this step is actually negative, (u[j+n]...u[j]) should be left as the + // true value plus b**(n+1), namely as the b's complement of + // the true value, and a "borrow" to the left should be remembered. + // + if (isNeg) { + bool carry = true; // true because b's complement is "complement + 1" + for (unsigned i = 0; i <= m+n; ++i) { + u[i] = ~u[i] + carry; // b's complement + carry = carry && u[i] == 0; + } + } + DEBUG(cerr << "KnuthDiv: after complement:"); + DEBUG(for (int i = m+n; i >=0; i--) cerr << " " << u[i]); + DEBUG(cerr << '\n'); + + // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was + // negative, go to step D6; otherwise go on to step D7. + q[j] = (unsigned)qp; + if (isNeg) { + // D6. [Add back]. The probability that this step is necessary is very + // small, on the order of only 2/b. Make sure that test data accounts for + // this possibility. Decrease q[j] by 1 + q[j]--; + // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]). + // A carry will occur to the left of u[j+n], and it should be ignored + // since it cancels with the borrow that occurred in D4. + bool carry = false; + for (unsigned i = 0; i < n; i++) { + unsigned limit = std::min(u[j+i],v[i]); + u[j+i] += v[i] + carry; + carry = u[j+i] < limit || (carry && u[j+i] == limit); + } + u[j+n] += carry; + } + DEBUG(cerr << "KnuthDiv: after correction:"); + DEBUG(for (int i = m+n; i >=0; i--) cerr <<" " << u[i]); + DEBUG(cerr << "\nKnuthDiv: digit result = " << q[j] << '\n'); + + // D7. [Loop on j.] Decrease j by one. Now if j >= 0, go back to D3. + } while (--j >= 0); + + DEBUG(cerr << "KnuthDiv: quotient:"); + DEBUG(for (int i = m; i >=0; i--) cerr <<" " << q[i]); + DEBUG(cerr << '\n'); + + // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired + // remainder may be obtained by dividing u[...] by d. If r is non-null we + // compute the remainder (urem uses this). + if (r) { + // The value d is expressed by the "shift" value above since we avoided + // multiplication by d by using a shift left. So, all we have to do is + // shift right here. In order to mak + if (shift) { + unsigned carry = 0; + DEBUG(cerr << "KnuthDiv: remainder:"); + for (int i = n-1; i >= 0; i--) { + r[i] = (u[i] >> shift) | carry; + carry = u[i] << (32 - shift); + DEBUG(cerr << " " << r[i]); + } + } else { + for (int i = n-1; i >= 0; i--) { + r[i] = u[i]; + DEBUG(cerr << " " << r[i]); + } + } + DEBUG(cerr << '\n'); + } +#if 0 + DEBUG(cerr << std::setbase(10) << '\n'); +#endif +} + +void APInt::divide(const APInt LHS, unsigned lhsWords, + const APInt &RHS, unsigned rhsWords, + APInt *Quotient, APInt *Remainder) +{ + assert(lhsWords >= rhsWords && "Fractional result"); + + // First, compose the values into an array of 32-bit words instead of + // 64-bit words. This is a necessity of both the "short division" algorithm + // and the the Knuth "classical algorithm" which requires there to be native + // operations for +, -, and * on an m bit value with an m*2 bit result. We + // can't use 64-bit operands here because we don't have native results of + // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't + // work on large-endian machines. + uint64_t mask = ~0ull >> (sizeof(unsigned)*CHAR_BIT); + unsigned n = rhsWords * 2; + unsigned m = (lhsWords * 2) - n; + + // Allocate space for the temporary values we need either on the stack, if + // it will fit, or on the heap if it won't. + unsigned SPACE[128]; + unsigned *U = 0; + unsigned *V = 0; + unsigned *Q = 0; + unsigned *R = 0; + if ((Remainder?4:3)*n+2*m+1 <= 128) { + U = &SPACE[0]; + V = &SPACE[m+n+1]; + Q = &SPACE[(m+n+1) + n]; + if (Remainder) + R = &SPACE[(m+n+1) + n + (m+n)]; + } else { + U = new unsigned[m + n + 1]; + V = new unsigned[n]; + Q = new unsigned[m+n]; + if (Remainder) + R = new unsigned[n]; + } + + // Initialize the dividend + memset(U, 0, (m+n+1)*sizeof(unsigned)); + for (unsigned i = 0; i < lhsWords; ++i) { + uint64_t tmp = (LHS.getNumWords() == 1 ? LHS.VAL : LHS.pVal[i]); + U[i * 2] = (unsigned)(tmp & mask); + U[i * 2 + 1] = (unsigned)(tmp >> (sizeof(unsigned)*CHAR_BIT)); + } + U[m+n] = 0; // this extra word is for "spill" in the Knuth algorithm. + + // Initialize the divisor + memset(V, 0, (n)*sizeof(unsigned)); + for (unsigned i = 0; i < rhsWords; ++i) { + uint64_t tmp = (RHS.getNumWords() == 1 ? RHS.VAL : RHS.pVal[i]); + V[i * 2] = (unsigned)(tmp & mask); + V[i * 2 + 1] = (unsigned)(tmp >> (sizeof(unsigned)*CHAR_BIT)); + } + + // initialize the quotient and remainder + memset(Q, 0, (m+n) * sizeof(unsigned)); + if (Remainder) + memset(R, 0, n * sizeof(unsigned)); + + // Now, adjust m and n for the Knuth division. n is the number of words in + // the divisor. m is the number of words by which the dividend exceeds the + // divisor (i.e. m+n is the length of the dividend). These sizes must not + // contain any zero words or the Knuth algorithm fails. + for (unsigned i = n; i > 0 && V[i-1] == 0; i--) { + n--; + m++; + } + for (unsigned i = m+n; i > 0 && U[i-1] == 0; i--) + m--; + + // If we're left with only a single word for the divisor, Knuth doesn't work + // so we implement the short division algorithm here. This is much simpler + // and faster because we are certain that we can divide a 64-bit quantity + // by a 32-bit quantity at hardware speed and short division is simply a + // series of such operations. This is just like doing short division but we + // are using base 2^32 instead of base 10. + assert(n != 0 && "Divide by zero?"); + if (n == 1) { + unsigned divisor = V[0]; + unsigned remainder = 0; + for (int i = m+n-1; i >= 0; i--) { + uint64_t partial_dividend = uint64_t(remainder) << 32 | U[i]; + if (partial_dividend == 0) { + Q[i] = 0; + remainder = 0; + } else if (partial_dividend < divisor) { + Q[i] = 0; + remainder = (unsigned)partial_dividend; + } else if (partial_dividend == divisor) { + Q[i] = 1; + remainder = 0; + } else { + Q[i] = (unsigned)(partial_dividend / divisor); + remainder = (unsigned)(partial_dividend - (Q[i] * divisor)); + } + } + if (R) + R[0] = remainder; + } else { + // Now we're ready to invoke the Knuth classical divide algorithm. In this + // case n > 1. + KnuthDiv(U, V, Q, R, m, n); + } + + // If the caller wants the quotient + if (Quotient) { + // Set up the Quotient value's memory. + if (Quotient->BitWidth != LHS.BitWidth) { + if (Quotient->isSingleWord()) + Quotient->VAL = 0; + else + delete [] Quotient->pVal; + Quotient->BitWidth = LHS.BitWidth; + if (!Quotient->isSingleWord()) + Quotient->pVal = getClearedMemory(Quotient->getNumWords()); + } else + Quotient->clear(); + + // The quotient is in Q. Reconstitute the quotient into Quotient's low + // order words. + if (lhsWords == 1) { + uint64_t tmp = + uint64_t(Q[0]) | (uint64_t(Q[1]) << (APINT_BITS_PER_WORD / 2)); + if (Quotient->isSingleWord()) + Quotient->VAL = tmp; + else + Quotient->pVal[0] = tmp; + } else { + assert(!Quotient->isSingleWord() && "Quotient APInt not large enough"); + for (unsigned i = 0; i < lhsWords; ++i) + Quotient->pVal[i] = + uint64_t(Q[i*2]) | (uint64_t(Q[i*2+1]) << (APINT_BITS_PER_WORD / 2)); + } + } + + // If the caller wants the remainder + if (Remainder) { + // Set up the Remainder value's memory. + if (Remainder->BitWidth != RHS.BitWidth) { + if (Remainder->isSingleWord()) + Remainder->VAL = 0; + else + delete [] Remainder->pVal; + Remainder->BitWidth = RHS.BitWidth; + if (!Remainder->isSingleWord()) + Remainder->pVal = getClearedMemory(Remainder->getNumWords()); + } else + Remainder->clear(); + + // The remainder is in R. Reconstitute the remainder into Remainder's low + // order words. + if (rhsWords == 1) { + uint64_t tmp = + uint64_t(R[0]) | (uint64_t(R[1]) << (APINT_BITS_PER_WORD / 2)); + if (Remainder->isSingleWord()) + Remainder->VAL = tmp; + else + Remainder->pVal[0] = tmp; + } else { + assert(!Remainder->isSingleWord() && "Remainder APInt not large enough"); + for (unsigned i = 0; i < rhsWords; ++i) + Remainder->pVal[i] = + uint64_t(R[i*2]) | (uint64_t(R[i*2+1]) << (APINT_BITS_PER_WORD / 2)); + } + } + + // Clean up the memory we allocated. + if (U != &SPACE[0]) { + delete [] U; + delete [] V; + delete [] Q; + delete [] R; + } +} + +APInt APInt::udiv(const APInt& RHS) const { + assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); + + // First, deal with the easy case + if (isSingleWord()) { + assert(RHS.VAL != 0 && "Divide by zero?"); + return APInt(BitWidth, VAL / RHS.VAL); + } + + // Get some facts about the LHS and RHS number of bits and words + unsigned rhsBits = RHS.getActiveBits(); + unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1); + assert(rhsWords && "Divided by zero???"); + unsigned lhsBits = this->getActiveBits(); + unsigned lhsWords = !lhsBits ? 0 : (APInt::whichWord(lhsBits - 1) + 1); + + // Deal with some degenerate cases + if (!lhsWords) + // 0 / X ===> 0 + return APInt(BitWidth, 0); + else if (lhsWords < rhsWords || this->ult(RHS)) { + // X / Y ===> 0, iff X < Y + return APInt(BitWidth, 0); + } else if (*this == RHS) { + // X / X ===> 1 + return APInt(BitWidth, 1); + } else if (lhsWords == 1 && rhsWords == 1) { + // All high words are zero, just use native divide + return APInt(BitWidth, this->pVal[0] / RHS.pVal[0]); + } + + // We have to compute it the hard way. Invoke the Knuth divide algorithm. + APInt Quotient(1,0); // to hold result. + divide(*this, lhsWords, RHS, rhsWords, &Quotient, 0); + return Quotient; +} + +APInt APInt::urem(const APInt& RHS) const { + assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); + if (isSingleWord()) { + assert(RHS.VAL != 0 && "Remainder by zero?"); + return APInt(BitWidth, VAL % RHS.VAL); + } + + // Get some facts about the LHS + unsigned lhsBits = getActiveBits(); + unsigned lhsWords = !lhsBits ? 0 : (whichWord(lhsBits - 1) + 1); + + // Get some facts about the RHS + unsigned rhsBits = RHS.getActiveBits(); + unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1); + assert(rhsWords && "Performing remainder operation by zero ???"); + + // Check the degenerate cases + if (lhsWords == 0) { + // 0 % Y ===> 0 + return APInt(BitWidth, 0); + } else if (lhsWords < rhsWords || this->ult(RHS)) { + // X % Y ===> X, iff X < Y + return *this; + } else if (*this == RHS) { + // X % X == 0; + return APInt(BitWidth, 0); + } else if (lhsWords == 1) { + // All high words are zero, just use native remainder + return APInt(BitWidth, pVal[0] % RHS.pVal[0]); + } + + // We have to compute it the hard way. Invoke the Knuth divide algorithm. + APInt Remainder(1,0); + divide(*this, lhsWords, RHS, rhsWords, 0, &Remainder); + return Remainder; +} + +void APInt::udivrem(const APInt &LHS, const APInt &RHS, + APInt &Quotient, APInt &Remainder) { + // Get some size facts about the dividend and divisor + unsigned lhsBits = LHS.getActiveBits(); + unsigned lhsWords = !lhsBits ? 0 : (APInt::whichWord(lhsBits - 1) + 1); + unsigned rhsBits = RHS.getActiveBits(); + unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1); + + // Check the degenerate cases + if (lhsWords == 0) { + Quotient = 0; // 0 / Y ===> 0 + Remainder = 0; // 0 % Y ===> 0 + return; + } + + if (lhsWords < rhsWords || LHS.ult(RHS)) { + Quotient = 0; // X / Y ===> 0, iff X < Y + Remainder = LHS; // X % Y ===> X, iff X < Y + return; + } + + if (LHS == RHS) { + Quotient = 1; // X / X ===> 1 + Remainder = 0; // X % X ===> 0; + return; + } + + if (lhsWords == 1 && rhsWords == 1) { + // There is only one word to consider so use the native versions. + uint64_t lhsValue = LHS.isSingleWord() ? LHS.VAL : LHS.pVal[0]; + uint64_t rhsValue = RHS.isSingleWord() ? RHS.VAL : RHS.pVal[0]; + Quotient = APInt(LHS.getBitWidth(), lhsValue / rhsValue); + Remainder = APInt(LHS.getBitWidth(), lhsValue % rhsValue); + return; + } + + // Okay, lets do it the long way + divide(LHS, lhsWords, RHS, rhsWords, &Quotient, &Remainder); +} + +void APInt::fromString(unsigned numbits, const char *str, unsigned slen, + uint8_t radix) { + // Check our assumptions here + assert((radix == 10 || radix == 8 || radix == 16 || radix == 2) && + "Radix should be 2, 8, 10, or 16!"); + assert(str && "String is null?"); + bool isNeg = str[0] == '-'; + if (isNeg) + str++, slen--; + assert((slen <= numbits || radix != 2) && "Insufficient bit width"); + assert(((slen-1)*3 <= numbits || radix != 8) && "Insufficient bit width"); + assert(((slen-1)*4 <= numbits || radix != 16) && "Insufficient bit width"); + assert((((slen-1)*64)/22 <= numbits || radix != 10) && "Insufficient bit width"); + + // Allocate memory + if (!isSingleWord()) + pVal = getClearedMemory(getNumWords()); + + // Figure out if we can shift instead of multiply + unsigned shift = (radix == 16 ? 4 : radix == 8 ? 3 : radix == 2 ? 1 : 0); + + // Set up an APInt for the digit to add outside the loop so we don't + // constantly construct/destruct it. + APInt apdigit(getBitWidth(), 0); + APInt apradix(getBitWidth(), radix); + + // Enter digit traversal loop + for (unsigned i = 0; i < slen; i++) { + // Get a digit + unsigned digit = 0; + char cdigit = str[i]; + if (radix == 16) { + if (!isxdigit(cdigit)) + assert(0 && "Invalid hex digit in string"); + if (isdigit(cdigit)) + digit = cdigit - '0'; + else if (cdigit >= 'a') + digit = cdigit - 'a' + 10; + else if (cdigit >= 'A') + digit = cdigit - 'A' + 10; + else + assert(0 && "huh? we shouldn't get here"); + } else if (isdigit(cdigit)) { + digit = cdigit - '0'; + assert((radix == 10 || + (radix == 8 && digit != 8 && digit != 9) || + (radix == 2 && (digit == 0 || digit == 1))) && + "Invalid digit in string for given radix"); + } else { + assert(0 && "Invalid character in digit string"); + } + + // Shift or multiply the value by the radix + if (slen > 1) { + if (shift) + *this <<= shift; + else + *this *= apradix; + } + + // Add in the digit we just interpreted + if (apdigit.isSingleWord()) + apdigit.VAL = digit; + else + apdigit.pVal[0] = digit; + *this += apdigit; + } + // If its negative, put it in two's complement form + if (isNeg) { + (*this)--; + this->flip(); + } +} + +void APInt::toString(SmallVectorImpl &Str, unsigned Radix, + bool Signed) const { + assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2) && + "Radix should be 2, 8, 10, or 16!"); + + // First, check for a zero value and just short circuit the logic below. + if (*this == 0) { + Str.push_back('0'); + return; + } + + static const char Digits[] = "0123456789ABCDEF"; + + if (isSingleWord()) { + char Buffer[65]; + char *BufPtr = Buffer+65; + + uint64_t N; + if (Signed) { + int64_t I = getSExtValue(); + if (I < 0) { + Str.push_back('-'); + I = -I; + } + N = I; + } else { + N = getZExtValue(); + } + + while (N) { + *--BufPtr = Digits[N % Radix]; + N /= Radix; + } + Str.append(BufPtr, Buffer+65); + return; + } + + APInt Tmp(*this); + + if (Signed && isNegative()) { + // They want to print the signed version and it is a negative value + // Flip the bits and add one to turn it into the equivalent positive + // value and put a '-' in the result. + Tmp.flip(); + Tmp++; + Str.push_back('-'); + } + + // We insert the digits backward, then reverse them to get the right order. + unsigned StartDig = Str.size(); + + // For the 2, 8 and 16 bit cases, we can just shift instead of divide + // because the number of bits per digit (1, 3 and 4 respectively) divides + // equaly. We just shift until the value is zero. + if (Radix != 10) { + // Just shift tmp right for each digit width until it becomes zero + unsigned ShiftAmt = (Radix == 16 ? 4 : (Radix == 8 ? 3 : 1)); + unsigned MaskAmt = Radix - 1; + + while (Tmp != 0) { + unsigned Digit = unsigned(Tmp.getRawData()[0]) & MaskAmt; + Str.push_back(Digits[Digit]); + Tmp = Tmp.lshr(ShiftAmt); + } + } else { + APInt divisor(4, 10); + while (Tmp != 0) { + APInt APdigit(1, 0); + APInt tmp2(Tmp.getBitWidth(), 0); + divide(Tmp, Tmp.getNumWords(), divisor, divisor.getNumWords(), &tmp2, + &APdigit); + unsigned Digit = (unsigned)APdigit.getZExtValue(); + assert(Digit < Radix && "divide failed"); + Str.push_back(Digits[Digit]); + Tmp = tmp2; + } + } + + // Reverse the digits before returning. + std::reverse(Str.begin()+StartDig, Str.end()); +} + +/// toString - This returns the APInt as a std::string. Note that this is an +/// inefficient method. It is better to pass in a SmallVector/SmallString +/// to the methods above. +std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const { + SmallString<40> S; + toString(S, Radix, Signed); + return S.c_str(); +} + + +void APInt::dump() const { + SmallString<40> S, U; + this->toStringUnsigned(U); + this->toStringSigned(S); + fprintf(stderr, "APInt(%db, %su %ss)", BitWidth, U.c_str(), S.c_str()); +} + +void APInt::print(raw_ostream &OS, bool isSigned) const { + SmallString<40> S; + this->toString(S, 10, isSigned); + OS << S.c_str(); +} + +// This implements a variety of operations on a representation of +// arbitrary precision, two's-complement, bignum integer values. + +/* Assumed by lowHalf, highHalf, partMSB and partLSB. A fairly safe + and unrestricting assumption. */ +#define COMPILE_TIME_ASSERT(cond) extern int CTAssert[(cond) ? 1 : -1] +COMPILE_TIME_ASSERT(integerPartWidth % 2 == 0); + +/* Some handy functions local to this file. */ +namespace { + + /* Returns the integer part with the least significant BITS set. + BITS cannot be zero. */ + static inline integerPart + lowBitMask(unsigned int bits) + { + assert (bits != 0 && bits <= integerPartWidth); + + return ~(integerPart) 0 >> (integerPartWidth - bits); + } + + /* Returns the value of the lower half of PART. */ + static inline integerPart + lowHalf(integerPart part) + { + return part & lowBitMask(integerPartWidth / 2); + } + + /* Returns the value of the upper half of PART. */ + static inline integerPart + highHalf(integerPart part) + { + return part >> (integerPartWidth / 2); + } + + /* Returns the bit number of the most significant set bit of a part. + If the input number has no bits set -1U is returned. */ + static unsigned int + partMSB(integerPart value) + { + unsigned int n, msb; + + if (value == 0) + return -1U; + + n = integerPartWidth / 2; + + msb = 0; + do { + if (value >> n) { + value >>= n; + msb += n; + } + + n >>= 1; + } while (n); + + return msb; + } + + /* Returns the bit number of the least significant set bit of a + part. If the input number has no bits set -1U is returned. */ + static unsigned int + partLSB(integerPart value) + { + unsigned int n, lsb; + + if (value == 0) + return -1U; + + lsb = integerPartWidth - 1; + n = integerPartWidth / 2; + + do { + if (value << n) { + value <<= n; + lsb -= n; + } + + n >>= 1; + } while (n); + + return lsb; + } +} + +/* Sets the least significant part of a bignum to the input value, and + zeroes out higher parts. */ +void +APInt::tcSet(integerPart *dst, integerPart part, unsigned int parts) +{ + unsigned int i; + + assert (parts > 0); + + dst[0] = part; + for(i = 1; i < parts; i++) + dst[i] = 0; +} + +/* Assign one bignum to another. */ +void +APInt::tcAssign(integerPart *dst, const integerPart *src, unsigned int parts) +{ + unsigned int i; + + for(i = 0; i < parts; i++) + dst[i] = src[i]; +} + +/* Returns true if a bignum is zero, false otherwise. */ +bool +APInt::tcIsZero(const integerPart *src, unsigned int parts) +{ + unsigned int i; + + for(i = 0; i < parts; i++) + if (src[i]) + return false; + + return true; +} + +/* Extract the given bit of a bignum; returns 0 or 1. */ +int +APInt::tcExtractBit(const integerPart *parts, unsigned int bit) +{ + return(parts[bit / integerPartWidth] + & ((integerPart) 1 << bit % integerPartWidth)) != 0; +} + +/* Set the given bit of a bignum. */ +void +APInt::tcSetBit(integerPart *parts, unsigned int bit) +{ + parts[bit / integerPartWidth] |= (integerPart) 1 << (bit % integerPartWidth); +} + +/* Returns the bit number of the least significant set bit of a + number. If the input number has no bits set -1U is returned. */ +unsigned int +APInt::tcLSB(const integerPart *parts, unsigned int n) +{ + unsigned int i, lsb; + + for(i = 0; i < n; i++) { + if (parts[i] != 0) { + lsb = partLSB(parts[i]); + + return lsb + i * integerPartWidth; + } + } + + return -1U; +} + +/* Returns the bit number of the most significant set bit of a number. + If the input number has no bits set -1U is returned. */ +unsigned int +APInt::tcMSB(const integerPart *parts, unsigned int n) +{ + unsigned int msb; + + do { + --n; + + if (parts[n] != 0) { + msb = partMSB(parts[n]); + + return msb + n * integerPartWidth; + } + } while (n); + + return -1U; +} + +/* Copy the bit vector of width srcBITS from SRC, starting at bit + srcLSB, to DST, of dstCOUNT parts, such that the bit srcLSB becomes + the least significant bit of DST. All high bits above srcBITS in + DST are zero-filled. */ +void +APInt::tcExtract(integerPart *dst, unsigned int dstCount,const integerPart *src, + unsigned int srcBits, unsigned int srcLSB) +{ + unsigned int firstSrcPart, dstParts, shift, n; + + dstParts = (srcBits + integerPartWidth - 1) / integerPartWidth; + assert (dstParts <= dstCount); + + firstSrcPart = srcLSB / integerPartWidth; + tcAssign (dst, src + firstSrcPart, dstParts); + + shift = srcLSB % integerPartWidth; + tcShiftRight (dst, dstParts, shift); + + /* We now have (dstParts * integerPartWidth - shift) bits from SRC + in DST. If this is less that srcBits, append the rest, else + clear the high bits. */ + n = dstParts * integerPartWidth - shift; + if (n < srcBits) { + integerPart mask = lowBitMask (srcBits - n); + dst[dstParts - 1] |= ((src[firstSrcPart + dstParts] & mask) + << n % integerPartWidth); + } else if (n > srcBits) { + if (srcBits % integerPartWidth) + dst[dstParts - 1] &= lowBitMask (srcBits % integerPartWidth); + } + + /* Clear high parts. */ + while (dstParts < dstCount) + dst[dstParts++] = 0; +} + +/* DST += RHS + C where C is zero or one. Returns the carry flag. */ +integerPart +APInt::tcAdd(integerPart *dst, const integerPart *rhs, + integerPart c, unsigned int parts) +{ + unsigned int i; + + assert(c <= 1); + + for(i = 0; i < parts; i++) { + integerPart l; + + l = dst[i]; + if (c) { + dst[i] += rhs[i] + 1; + c = (dst[i] <= l); + } else { + dst[i] += rhs[i]; + c = (dst[i] < l); + } + } + + return c; +} + +/* DST -= RHS + C where C is zero or one. Returns the carry flag. */ +integerPart +APInt::tcSubtract(integerPart *dst, const integerPart *rhs, + integerPart c, unsigned int parts) +{ + unsigned int i; + + assert(c <= 1); + + for(i = 0; i < parts; i++) { + integerPart l; + + l = dst[i]; + if (c) { + dst[i] -= rhs[i] + 1; + c = (dst[i] >= l); + } else { + dst[i] -= rhs[i]; + c = (dst[i] > l); + } + } + + return c; +} + +/* Negate a bignum in-place. */ +void +APInt::tcNegate(integerPart *dst, unsigned int parts) +{ + tcComplement(dst, parts); + tcIncrement(dst, parts); +} + +/* DST += SRC * MULTIPLIER + CARRY if add is true + DST = SRC * MULTIPLIER + CARRY if add is false + + Requires 0 <= DSTPARTS <= SRCPARTS + 1. If DST overlaps SRC + they must start at the same point, i.e. DST == SRC. + + If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is + returned. Otherwise DST is filled with the least significant + DSTPARTS parts of the result, and if all of the omitted higher + parts were zero return zero, otherwise overflow occurred and + return one. */ +int +APInt::tcMultiplyPart(integerPart *dst, const integerPart *src, + integerPart multiplier, integerPart carry, + unsigned int srcParts, unsigned int dstParts, + bool add) +{ + unsigned int i, n; + + /* Otherwise our writes of DST kill our later reads of SRC. */ + assert(dst <= src || dst >= src + srcParts); + assert(dstParts <= srcParts + 1); + + /* N loops; minimum of dstParts and srcParts. */ + n = dstParts < srcParts ? dstParts: srcParts; + + for(i = 0; i < n; i++) { + integerPart low, mid, high, srcPart; + + /* [ LOW, HIGH ] = MULTIPLIER * SRC[i] + DST[i] + CARRY. + + This cannot overflow, because + + (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1) + + which is less than n^2. */ + + srcPart = src[i]; + + if (multiplier == 0 || srcPart == 0) { + low = carry; + high = 0; + } else { + low = lowHalf(srcPart) * lowHalf(multiplier); + high = highHalf(srcPart) * highHalf(multiplier); + + mid = lowHalf(srcPart) * highHalf(multiplier); + high += highHalf(mid); + mid <<= integerPartWidth / 2; + if (low + mid < low) + high++; + low += mid; + + mid = highHalf(srcPart) * lowHalf(multiplier); + high += highHalf(mid); + mid <<= integerPartWidth / 2; + if (low + mid < low) + high++; + low += mid; + + /* Now add carry. */ + if (low + carry < low) + high++; + low += carry; + } + + if (add) { + /* And now DST[i], and store the new low part there. */ + if (low + dst[i] < low) + high++; + dst[i] += low; + } else + dst[i] = low; + + carry = high; + } + + if (i < dstParts) { + /* Full multiplication, there is no overflow. */ + assert(i + 1 == dstParts); + dst[i] = carry; + return 0; + } else { + /* We overflowed if there is carry. */ + if (carry) + return 1; + + /* We would overflow if any significant unwritten parts would be + non-zero. This is true if any remaining src parts are non-zero + and the multiplier is non-zero. */ + if (multiplier) + for(; i < srcParts; i++) + if (src[i]) + return 1; + + /* We fitted in the narrow destination. */ + return 0; + } +} + +/* DST = LHS * RHS, where DST has the same width as the operands and + is filled with the least significant parts of the result. Returns + one if overflow occurred, otherwise zero. DST must be disjoint + from both operands. */ +int +APInt::tcMultiply(integerPart *dst, const integerPart *lhs, + const integerPart *rhs, unsigned int parts) +{ + unsigned int i; + int overflow; + + assert(dst != lhs && dst != rhs); + + overflow = 0; + tcSet(dst, 0, parts); + + for(i = 0; i < parts; i++) + overflow |= tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts, + parts - i, true); + + return overflow; +} + +/* DST = LHS * RHS, where DST has width the sum of the widths of the + operands. No overflow occurs. DST must be disjoint from both + operands. Returns the number of parts required to hold the + result. */ +unsigned int +APInt::tcFullMultiply(integerPart *dst, const integerPart *lhs, + const integerPart *rhs, unsigned int lhsParts, + unsigned int rhsParts) +{ + /* Put the narrower number on the LHS for less loops below. */ + if (lhsParts > rhsParts) { + return tcFullMultiply (dst, rhs, lhs, rhsParts, lhsParts); + } else { + unsigned int n; + + assert(dst != lhs && dst != rhs); + + tcSet(dst, 0, rhsParts); + + for(n = 0; n < lhsParts; n++) + tcMultiplyPart(&dst[n], rhs, lhs[n], 0, rhsParts, rhsParts + 1, true); + + n = lhsParts + rhsParts; + + return n - (dst[n - 1] == 0); + } +} + +/* If RHS is zero LHS and REMAINDER are left unchanged, return one. + Otherwise set LHS to LHS / RHS with the fractional part discarded, + set REMAINDER to the remainder, return zero. i.e. + + OLD_LHS = RHS * LHS + REMAINDER + + SCRATCH is a bignum of the same size as the operands and result for + use by the routine; its contents need not be initialized and are + destroyed. LHS, REMAINDER and SCRATCH must be distinct. +*/ +int +APInt::tcDivide(integerPart *lhs, const integerPart *rhs, + integerPart *remainder, integerPart *srhs, + unsigned int parts) +{ + unsigned int n, shiftCount; + integerPart mask; + + assert(lhs != remainder && lhs != srhs && remainder != srhs); + + shiftCount = tcMSB(rhs, parts) + 1; + if (shiftCount == 0) + return true; + + shiftCount = parts * integerPartWidth - shiftCount; + n = shiftCount / integerPartWidth; + mask = (integerPart) 1 << (shiftCount % integerPartWidth); + + tcAssign(srhs, rhs, parts); + tcShiftLeft(srhs, parts, shiftCount); + tcAssign(remainder, lhs, parts); + tcSet(lhs, 0, parts); + + /* Loop, subtracting SRHS if REMAINDER is greater and adding that to + the total. */ + for(;;) { + int compare; + + compare = tcCompare(remainder, srhs, parts); + if (compare >= 0) { + tcSubtract(remainder, srhs, 0, parts); + lhs[n] |= mask; + } + + if (shiftCount == 0) + break; + shiftCount--; + tcShiftRight(srhs, parts, 1); + if ((mask >>= 1) == 0) + mask = (integerPart) 1 << (integerPartWidth - 1), n--; + } + + return false; +} + +/* Shift a bignum left COUNT bits in-place. Shifted in bits are zero. + There are no restrictions on COUNT. */ +void +APInt::tcShiftLeft(integerPart *dst, unsigned int parts, unsigned int count) +{ + if (count) { + unsigned int jump, shift; + + /* Jump is the inter-part jump; shift is is intra-part shift. */ + jump = count / integerPartWidth; + shift = count % integerPartWidth; + + while (parts > jump) { + integerPart part; + + parts--; + + /* dst[i] comes from the two parts src[i - jump] and, if we have + an intra-part shift, src[i - jump - 1]. */ + part = dst[parts - jump]; + if (shift) { + part <<= shift; + if (parts >= jump + 1) + part |= dst[parts - jump - 1] >> (integerPartWidth - shift); + } + + dst[parts] = part; + } + + while (parts > 0) + dst[--parts] = 0; + } +} + +/* Shift a bignum right COUNT bits in-place. Shifted in bits are + zero. There are no restrictions on COUNT. */ +void +APInt::tcShiftRight(integerPart *dst, unsigned int parts, unsigned int count) +{ + if (count) { + unsigned int i, jump, shift; + + /* Jump is the inter-part jump; shift is is intra-part shift. */ + jump = count / integerPartWidth; + shift = count % integerPartWidth; + + /* Perform the shift. This leaves the most significant COUNT bits + of the result at zero. */ + for(i = 0; i < parts; i++) { + integerPart part; + + if (i + jump >= parts) { + part = 0; + } else { + part = dst[i + jump]; + if (shift) { + part >>= shift; + if (i + jump + 1 < parts) + part |= dst[i + jump + 1] << (integerPartWidth - shift); + } + } + + dst[i] = part; + } + } +} + +/* Bitwise and of two bignums. */ +void +APInt::tcAnd(integerPart *dst, const integerPart *rhs, unsigned int parts) +{ + unsigned int i; + + for(i = 0; i < parts; i++) + dst[i] &= rhs[i]; +} + +/* Bitwise inclusive or of two bignums. */ +void +APInt::tcOr(integerPart *dst, const integerPart *rhs, unsigned int parts) +{ + unsigned int i; + + for(i = 0; i < parts; i++) + dst[i] |= rhs[i]; +} + +/* Bitwise exclusive or of two bignums. */ +void +APInt::tcXor(integerPart *dst, const integerPart *rhs, unsigned int parts) +{ + unsigned int i; + + for(i = 0; i < parts; i++) + dst[i] ^= rhs[i]; +} + +/* Complement a bignum in-place. */ +void +APInt::tcComplement(integerPart *dst, unsigned int parts) +{ + unsigned int i; + + for(i = 0; i < parts; i++) + dst[i] = ~dst[i]; +} + +/* Comparison (unsigned) of two bignums. */ +int +APInt::tcCompare(const integerPart *lhs, const integerPart *rhs, + unsigned int parts) +{ + while (parts) { + parts--; + if (lhs[parts] == rhs[parts]) + continue; + + if (lhs[parts] > rhs[parts]) + return 1; + else + return -1; + } + + return 0; +} + +/* Increment a bignum in-place, return the carry flag. */ +integerPart +APInt::tcIncrement(integerPart *dst, unsigned int parts) +{ + unsigned int i; + + for(i = 0; i < parts; i++) + if (++dst[i] != 0) + break; + + return i == parts; +} + +/* Set the least significant BITS bits of a bignum, clear the + rest. */ +void +APInt::tcSetLeastSignificantBits(integerPart *dst, unsigned int parts, + unsigned int bits) +{ + unsigned int i; + + i = 0; + while (bits > integerPartWidth) { + dst[i++] = ~(integerPart) 0; + bits -= integerPartWidth; + } + + if (bits) + dst[i++] = ~(integerPart) 0 >> (integerPartWidth - bits); + + while (i < parts) + dst[i++] = 0; +} diff --git a/lib/Support/APSInt.cpp b/lib/Support/APSInt.cpp new file mode 100644 index 000000000000..73acafa690c7 --- /dev/null +++ b/lib/Support/APSInt.cpp @@ -0,0 +1,23 @@ +//===-- llvm/ADT/APSInt.cpp - Arbitrary Precision Signed Int ---*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the APSInt class, which is a simple class that +// represents an arbitrary sized integer that knows its signedness. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/APSInt.h" +#include "llvm/ADT/FoldingSet.h" + +using namespace llvm; + +void APSInt::Profile(FoldingSetNodeID& ID) const { + ID.AddInteger((unsigned) (IsUnsigned ? 1 : 0)); + APInt::Profile(ID); +} diff --git a/lib/Support/Allocator.cpp b/lib/Support/Allocator.cpp new file mode 100644 index 000000000000..db0d8f31e55d --- /dev/null +++ b/lib/Support/Allocator.cpp @@ -0,0 +1,141 @@ +//===--- Allocator.cpp - Simple memory allocation abstraction -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the BumpPtrAllocator interface. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Recycler.h" +#include "llvm/Support/DataTypes.h" +#include "llvm/Support/Streams.h" +#include +using namespace llvm; + +//===----------------------------------------------------------------------===// +// MemRegion class implementation +//===----------------------------------------------------------------------===// + +namespace { +/// MemRegion - This is one chunk of the BumpPtrAllocator. +class MemRegion { + unsigned RegionSize; + MemRegion *Next; + char *NextPtr; +public: + void Init(unsigned size, unsigned Alignment, MemRegion *next) { + RegionSize = size; + Next = next; + NextPtr = (char*)(this+1); + + // Align NextPtr. + NextPtr = (char*)((intptr_t)(NextPtr+Alignment-1) & + ~(intptr_t)(Alignment-1)); + } + + const MemRegion *getNext() const { return Next; } + unsigned getNumBytesAllocated() const { + return NextPtr-(const char*)this; + } + + /// Allocate - Allocate and return at least the specified number of bytes. + /// + void *Allocate(size_t AllocSize, size_t Alignment, MemRegion **RegPtr) { + + char* Result = (char*) (((uintptr_t) (NextPtr+Alignment-1)) + & ~((uintptr_t) Alignment-1)); + + // Speculate the new value of NextPtr. + char* NextPtrTmp = Result + AllocSize; + + // If we are still within the current region, return Result. + if (unsigned (NextPtrTmp - (char*) this) <= RegionSize) { + NextPtr = NextPtrTmp; + return Result; + } + + // Otherwise, we have to allocate a new chunk. Create one twice as big as + // this one. + MemRegion *NewRegion = (MemRegion *)malloc(RegionSize*2); + NewRegion->Init(RegionSize*2, Alignment, this); + + // Update the current "first region" pointer to point to the new region. + *RegPtr = NewRegion; + + // Try allocating from it now. + return NewRegion->Allocate(AllocSize, Alignment, RegPtr); + } + + /// Deallocate - Recursively release all memory for this and its next regions + /// to the system. + void Deallocate() { + MemRegion *next = Next; + free(this); + if (next) + next->Deallocate(); + } + + /// DeallocateAllButLast - Recursively release all memory for this and its + /// next regions to the system stopping at the last region in the list. + /// Returns the pointer to the last region. + MemRegion *DeallocateAllButLast() { + MemRegion *next = Next; + if (!next) + return this; + free(this); + return next->DeallocateAllButLast(); + } +}; +} + +//===----------------------------------------------------------------------===// +// BumpPtrAllocator class implementation +//===----------------------------------------------------------------------===// + +BumpPtrAllocator::BumpPtrAllocator() { + TheMemory = malloc(4096); + ((MemRegion*)TheMemory)->Init(4096, 1, 0); +} + +BumpPtrAllocator::~BumpPtrAllocator() { + ((MemRegion*)TheMemory)->Deallocate(); +} + +void BumpPtrAllocator::Reset() { + MemRegion *MRP = (MemRegion*)TheMemory; + MRP = MRP->DeallocateAllButLast(); + MRP->Init(4096, 1, 0); + TheMemory = MRP; +} + +void *BumpPtrAllocator::Allocate(size_t Size, size_t Align) { + MemRegion *MRP = (MemRegion*)TheMemory; + void *Ptr = MRP->Allocate(Size, Align, &MRP); + TheMemory = MRP; + return Ptr; +} + +void BumpPtrAllocator::PrintStats() const { + unsigned BytesUsed = 0; + unsigned NumRegions = 0; + const MemRegion *R = (MemRegion*)TheMemory; + for (; R; R = R->getNext(), ++NumRegions) + BytesUsed += R->getNumBytesAllocated(); + + cerr << "\nNumber of memory regions: " << NumRegions << "\n"; + cerr << "Bytes allocated: " << BytesUsed << "\n"; +} + +void llvm::PrintRecyclerStats(size_t Size, + size_t Align, + size_t FreeListSize) { + cerr << "Recycler element size: " << Size << '\n'; + cerr << "Recycler element alignment: " << Align << '\n'; + cerr << "Number of elements free for recycling: " << FreeListSize << '\n'; +} diff --git a/lib/Support/Annotation.cpp b/lib/Support/Annotation.cpp new file mode 100644 index 000000000000..9764b5e829dc --- /dev/null +++ b/lib/Support/Annotation.cpp @@ -0,0 +1,115 @@ +//===-- Annotation.cpp - Implement the Annotation Classes -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the AnnotationManager class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/Annotation.h" +#include "llvm/Support/ManagedStatic.h" +#include +#include +using namespace llvm; + +Annotation::~Annotation() {} // Designed to be subclassed + +Annotable::~Annotable() { // Virtual because it's designed to be subclassed... + Annotation *A = AnnotationList; + while (A) { + Annotation *Next = A->getNext(); + delete A; + A = Next; + } +} + +namespace { + class StrCmp { + public: + bool operator()(const char *a, const char *b) const { + return strcmp(a, b) < 0; + } + }; +} + +typedef std::map IDMapType; +static unsigned IDCounter = 0; // Unique ID counter + +// Static member to ensure initialiation on demand. +static ManagedStatic IDMap; + +// On demand annotation creation support... +typedef Annotation *(*AnnFactory)(AnnotationID, const Annotable *, void *); +typedef std::map > FactMapType; + +static FactMapType *TheFactMap = 0; +static FactMapType &getFactMap() { + if (TheFactMap == 0) + TheFactMap = new FactMapType(); + return *TheFactMap; +} + +static void eraseFromFactMap(unsigned ID) { + assert(TheFactMap && "No entries found!"); + TheFactMap->erase(ID); + if (TheFactMap->empty()) { // Delete when empty + delete TheFactMap; + TheFactMap = 0; + } +} + +AnnotationID AnnotationManager::getID(const char *Name) { // Name -> ID + IDMapType::iterator I = IDMap->find(Name); + if (I == IDMap->end()) { + (*IDMap)[Name] = IDCounter++; // Add a new element + return AnnotationID(IDCounter-1); + } + return AnnotationID(I->second); +} + +// getID - Name -> ID + registration of a factory function for demand driven +// annotation support. +AnnotationID AnnotationManager::getID(const char *Name, Factory Fact, + void *Data) { + AnnotationID Result(getID(Name)); + registerAnnotationFactory(Result, Fact, Data); + return Result; +} + +// getName - This function is especially slow, but that's okay because it should +// only be used for debugging. +// +const char *AnnotationManager::getName(AnnotationID ID) { // ID -> Name + IDMapType &TheMap = *IDMap; + for (IDMapType::iterator I = TheMap.begin(); ; ++I) { + assert(I != TheMap.end() && "Annotation ID is unknown!"); + if (I->second == ID.ID) return I->first; + } +} + +// registerAnnotationFactory - This method is used to register a callback +// function used to create an annotation on demand if it is needed by the +// Annotable::findOrCreateAnnotation method. +// +void AnnotationManager::registerAnnotationFactory(AnnotationID ID, AnnFactory F, + void *ExtraData) { + if (F) + getFactMap()[ID.ID] = std::make_pair(F, ExtraData); + else + eraseFromFactMap(ID.ID); +} + +// createAnnotation - Create an annotation of the specified ID for the +// specified object, using a register annotation creation function. +// +Annotation *AnnotationManager::createAnnotation(AnnotationID ID, + const Annotable *Obj) { + FactMapType::iterator I = getFactMap().find(ID.ID); + if (I == getFactMap().end()) return 0; + return I->second.first(ID, Obj, I->second.second); +} diff --git a/lib/Support/CMakeLists.txt b/lib/Support/CMakeLists.txt new file mode 100644 index 000000000000..7c8ce706b610 --- /dev/null +++ b/lib/Support/CMakeLists.txt @@ -0,0 +1,31 @@ +add_llvm_library(LLVMSupport + APFloat.cpp + APInt.cpp + APSInt.cpp + Allocator.cpp + Annotation.cpp + CommandLine.cpp + ConstantRange.cpp + Debug.cpp + Dwarf.cpp + FileUtilities.cpp + FoldingSet.cpp + GraphWriter.cpp + IsInf.cpp + IsNAN.cpp + ManagedStatic.cpp + MemoryBuffer.cpp + PluginLoader.cpp + PrettyStackTrace.cpp + SlowOperationInformer.cpp + SmallPtrSet.cpp + Statistic.cpp + Streams.cpp + StringExtras.cpp + StringMap.cpp + StringPool.cpp + SystemUtils.cpp + Timer.cpp + Triple.cpp + raw_ostream.cpp + ) diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp new file mode 100644 index 000000000000..4922560200a0 --- /dev/null +++ b/lib/Support/CommandLine.cpp @@ -0,0 +1,1184 @@ +//===-- CommandLine.cpp - Command line parser implementation --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class implements a command line argument processor that is useful when +// creating a tool. It provides a simple, minimalistic interface that is easily +// extensible and supports nonlocal (library) command line options. +// +// Note that rather than trying to figure out what this code does, you could try +// reading the library documentation located in docs/CommandLine.html +// +//===----------------------------------------------------------------------===// + +#include "llvm/Config/config.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/Streams.h" +#include "llvm/System/Path.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +using namespace llvm; +using namespace cl; + +//===----------------------------------------------------------------------===// +// Template instantiations and anchors. +// +TEMPLATE_INSTANTIATION(class basic_parser); +TEMPLATE_INSTANTIATION(class basic_parser); +TEMPLATE_INSTANTIATION(class basic_parser); +TEMPLATE_INSTANTIATION(class basic_parser); +TEMPLATE_INSTANTIATION(class basic_parser); +TEMPLATE_INSTANTIATION(class basic_parser); +TEMPLATE_INSTANTIATION(class basic_parser); +TEMPLATE_INSTANTIATION(class basic_parser); + +TEMPLATE_INSTANTIATION(class opt); +TEMPLATE_INSTANTIATION(class opt); +TEMPLATE_INSTANTIATION(class opt); +TEMPLATE_INSTANTIATION(class opt); +TEMPLATE_INSTANTIATION(class opt); + +void Option::anchor() {} +void basic_parser_impl::anchor() {} +void parser::anchor() {} +void parser::anchor() {} +void parser::anchor() {} +void parser::anchor() {} +void parser::anchor() {} +void parser::anchor() {} +void parser::anchor() {} +void parser::anchor() {} + +//===----------------------------------------------------------------------===// + +// Globals for name and overview of program. Program name is not a string to +// avoid static ctor/dtor issues. +static char ProgramName[80] = ""; +static const char *ProgramOverview = 0; + +// This collects additional help to be printed. +static ManagedStatic > MoreHelp; + +extrahelp::extrahelp(const char *Help) + : morehelp(Help) { + MoreHelp->push_back(Help); +} + +static bool OptionListChanged = false; + +// MarkOptionsChanged - Internal helper function. +void cl::MarkOptionsChanged() { + OptionListChanged = true; +} + +/// RegisteredOptionList - This is the list of the command line options that +/// have statically constructed themselves. +static Option *RegisteredOptionList = 0; + +void Option::addArgument() { + assert(NextRegistered == 0 && "argument multiply registered!"); + + NextRegistered = RegisteredOptionList; + RegisteredOptionList = this; + MarkOptionsChanged(); +} + + +//===----------------------------------------------------------------------===// +// Basic, shared command line option processing machinery. +// + +/// GetOptionInfo - Scan the list of registered options, turning them into data +/// structures that are easier to handle. +static void GetOptionInfo(std::vector &PositionalOpts, + std::vector &SinkOpts, + std::map &OptionsMap) { + std::vector OptionNames; + Option *CAOpt = 0; // The ConsumeAfter option if it exists. + for (Option *O = RegisteredOptionList; O; O = O->getNextRegisteredOption()) { + // If this option wants to handle multiple option names, get the full set. + // This handles enum options like "-O1 -O2" etc. + O->getExtraOptionNames(OptionNames); + if (O->ArgStr[0]) + OptionNames.push_back(O->ArgStr); + + // Handle named options. + for (size_t i = 0, e = OptionNames.size(); i != e; ++i) { + // Add argument to the argument map! + if (!OptionsMap.insert(std::pair(OptionNames[i], + O)).second) { + cerr << ProgramName << ": CommandLine Error: Argument '" + << OptionNames[i] << "' defined more than once!\n"; + } + } + + OptionNames.clear(); + + // Remember information about positional options. + if (O->getFormattingFlag() == cl::Positional) + PositionalOpts.push_back(O); + else if (O->getMiscFlags() & cl::Sink) // Remember sink options + SinkOpts.push_back(O); + else if (O->getNumOccurrencesFlag() == cl::ConsumeAfter) { + if (CAOpt) + O->error("Cannot specify more than one option with cl::ConsumeAfter!"); + CAOpt = O; + } + } + + if (CAOpt) + PositionalOpts.push_back(CAOpt); + + // Make sure that they are in order of registration not backwards. + std::reverse(PositionalOpts.begin(), PositionalOpts.end()); +} + + +/// LookupOption - Lookup the option specified by the specified option on the +/// command line. If there is a value specified (after an equal sign) return +/// that as well. +static Option *LookupOption(const char *&Arg, const char *&Value, + std::map &OptionsMap) { + while (*Arg == '-') ++Arg; // Eat leading dashes + + const char *ArgEnd = Arg; + while (*ArgEnd && *ArgEnd != '=') + ++ArgEnd; // Scan till end of argument name. + + if (*ArgEnd == '=') // If we have an equals sign... + Value = ArgEnd+1; // Get the value, not the equals + + + if (*Arg == 0) return 0; + + // Look up the option. + std::map::iterator I = + OptionsMap.find(std::string(Arg, ArgEnd)); + return I != OptionsMap.end() ? I->second : 0; +} + +static inline bool ProvideOption(Option *Handler, const char *ArgName, + const char *Value, int argc, char **argv, + int &i) { + // Is this a multi-argument option? + unsigned NumAdditionalVals = Handler->getNumAdditionalVals(); + + // Enforce value requirements + switch (Handler->getValueExpectedFlag()) { + case ValueRequired: + if (Value == 0) { // No value specified? + if (i+1 < argc) { // Steal the next argument, like for '-o filename' + Value = argv[++i]; + } else { + return Handler->error(" requires a value!"); + } + } + break; + case ValueDisallowed: + if (NumAdditionalVals > 0) + return Handler->error(": multi-valued option specified" + " with ValueDisallowed modifier!"); + + if (Value) + return Handler->error(" does not allow a value! '" + + std::string(Value) + "' specified."); + break; + case ValueOptional: + break; + default: + cerr << ProgramName + << ": Bad ValueMask flag! CommandLine usage error:" + << Handler->getValueExpectedFlag() << "\n"; + abort(); + break; + } + + // If this isn't a multi-arg option, just run the handler. + if (NumAdditionalVals == 0) { + return Handler->addOccurrence(i, ArgName, Value ? Value : ""); + } + // If it is, run the handle several times. + else { + bool MultiArg = false; + + if (Value) { + if (Handler->addOccurrence(i, ArgName, Value, MultiArg)) + return true; + --NumAdditionalVals; + MultiArg = true; + } + + while (NumAdditionalVals > 0) { + + if (i+1 < argc) { + Value = argv[++i]; + } else { + return Handler->error(": not enough values!"); + } + if (Handler->addOccurrence(i, ArgName, Value, MultiArg)) + return true; + MultiArg = true; + --NumAdditionalVals; + } + return false; + } +} + +static bool ProvidePositionalOption(Option *Handler, const std::string &Arg, + int i) { + int Dummy = i; + return ProvideOption(Handler, Handler->ArgStr, Arg.c_str(), 0, 0, Dummy); +} + + +// Option predicates... +static inline bool isGrouping(const Option *O) { + return O->getFormattingFlag() == cl::Grouping; +} +static inline bool isPrefixedOrGrouping(const Option *O) { + return isGrouping(O) || O->getFormattingFlag() == cl::Prefix; +} + +// getOptionPred - Check to see if there are any options that satisfy the +// specified predicate with names that are the prefixes in Name. This is +// checked by progressively stripping characters off of the name, checking to +// see if there options that satisfy the predicate. If we find one, return it, +// otherwise return null. +// +static Option *getOptionPred(std::string Name, size_t &Length, + bool (*Pred)(const Option*), + std::map &OptionsMap) { + + std::map::iterator OMI = OptionsMap.find(Name); + if (OMI != OptionsMap.end() && Pred(OMI->second)) { + Length = Name.length(); + return OMI->second; + } + + if (Name.size() == 1) return 0; + do { + Name.erase(Name.end()-1, Name.end()); // Chop off the last character... + OMI = OptionsMap.find(Name); + + // Loop while we haven't found an option and Name still has at least two + // characters in it (so that the next iteration will not be the empty + // string... + } while ((OMI == OptionsMap.end() || !Pred(OMI->second)) && Name.size() > 1); + + if (OMI != OptionsMap.end() && Pred(OMI->second)) { + Length = Name.length(); + return OMI->second; // Found one! + } + return 0; // No option found! +} + +static bool RequiresValue(const Option *O) { + return O->getNumOccurrencesFlag() == cl::Required || + O->getNumOccurrencesFlag() == cl::OneOrMore; +} + +static bool EatsUnboundedNumberOfValues(const Option *O) { + return O->getNumOccurrencesFlag() == cl::ZeroOrMore || + O->getNumOccurrencesFlag() == cl::OneOrMore; +} + +/// ParseCStringVector - Break INPUT up wherever one or more +/// whitespace characters are found, and store the resulting tokens in +/// OUTPUT. The tokens stored in OUTPUT are dynamically allocated +/// using strdup (), so it is the caller's responsibility to free () +/// them later. +/// +static void ParseCStringVector(std::vector &output, + const char *input) { + // Characters which will be treated as token separators: + static const char *const delims = " \v\f\t\r\n"; + + std::string work (input); + // Skip past any delims at head of input string. + size_t pos = work.find_first_not_of (delims); + // If the string consists entirely of delims, then exit early. + if (pos == std::string::npos) return; + // Otherwise, jump forward to beginning of first word. + work = work.substr (pos); + // Find position of first delimiter. + pos = work.find_first_of (delims); + + while (!work.empty() && pos != std::string::npos) { + // Everything from 0 to POS is the next word to copy. + output.push_back (strdup (work.substr (0,pos).c_str ())); + // Is there another word in the string? + size_t nextpos = work.find_first_not_of (delims, pos + 1); + if (nextpos != std::string::npos) { + // Yes? Then remove delims from beginning ... + work = work.substr (work.find_first_not_of (delims, pos + 1)); + // and find the end of the word. + pos = work.find_first_of (delims); + } else { + // No? (Remainder of string is delims.) End the loop. + work = ""; + pos = std::string::npos; + } + } + + // If `input' ended with non-delim char, then we'll get here with + // the last word of `input' in `work'; copy it now. + if (!work.empty ()) { + output.push_back (strdup (work.c_str ())); + } +} + +/// ParseEnvironmentOptions - An alternative entry point to the +/// CommandLine library, which allows you to read the program's name +/// from the caller (as PROGNAME) and its command-line arguments from +/// an environment variable (whose name is given in ENVVAR). +/// +void cl::ParseEnvironmentOptions(const char *progName, const char *envVar, + const char *Overview, bool ReadResponseFiles) { + // Check args. + assert(progName && "Program name not specified"); + assert(envVar && "Environment variable name missing"); + + // Get the environment variable they want us to parse options out of. + const char *envValue = getenv(envVar); + if (!envValue) + return; + + // Get program's "name", which we wouldn't know without the caller + // telling us. + std::vector newArgv; + newArgv.push_back(strdup(progName)); + + // Parse the value of the environment variable into a "command line" + // and hand it off to ParseCommandLineOptions(). + ParseCStringVector(newArgv, envValue); + int newArgc = static_cast(newArgv.size()); + ParseCommandLineOptions(newArgc, &newArgv[0], Overview, ReadResponseFiles); + + // Free all the strdup()ed strings. + for (std::vector::iterator i = newArgv.begin(), e = newArgv.end(); + i != e; ++i) + free (*i); +} + + +/// ExpandResponseFiles - Copy the contents of argv into newArgv, +/// substituting the contents of the response files for the arguments +/// of type @file. +static void ExpandResponseFiles(int argc, char** argv, + std::vector& newArgv) { + for (int i = 1; i != argc; ++i) { + char* arg = argv[i]; + + if (arg[0] == '@') { + + sys::PathWithStatus respFile(++arg); + + // Check that the response file is not empty (mmap'ing empty + // files can be problematic). + const sys::FileStatus *FileStat = respFile.getFileStatus(); + if (FileStat && FileStat->getSize() != 0) { + + // Mmap the response file into memory. + OwningPtr + respFilePtr(MemoryBuffer::getFile(respFile.c_str())); + + // If we could open the file, parse its contents, otherwise + // pass the @file option verbatim. + + // TODO: we should also support recursive loading of response files, + // since this is how gcc behaves. (From their man page: "The file may + // itself contain additional @file options; any such options will be + // processed recursively.") + + if (respFilePtr != 0) { + ParseCStringVector(newArgv, respFilePtr->getBufferStart()); + continue; + } + } + } + newArgv.push_back(strdup(arg)); + } +} + +void cl::ParseCommandLineOptions(int argc, char **argv, + const char *Overview, bool ReadResponseFiles) { + // Process all registered options. + std::vector PositionalOpts; + std::vector SinkOpts; + std::map Opts; + GetOptionInfo(PositionalOpts, SinkOpts, Opts); + + assert((!Opts.empty() || !PositionalOpts.empty()) && + "No options specified!"); + + // Expand response files. + std::vector newArgv; + if (ReadResponseFiles) { + newArgv.push_back(strdup(argv[0])); + ExpandResponseFiles(argc, argv, newArgv); + argv = &newArgv[0]; + argc = static_cast(newArgv.size()); + } + + // Copy the program name into ProgName, making sure not to overflow it. + std::string ProgName = sys::Path(argv[0]).getLast(); + if (ProgName.size() > 79) ProgName.resize(79); + strcpy(ProgramName, ProgName.c_str()); + + ProgramOverview = Overview; + bool ErrorParsing = false; + + // Check out the positional arguments to collect information about them. + unsigned NumPositionalRequired = 0; + + // Determine whether or not there are an unlimited number of positionals + bool HasUnlimitedPositionals = false; + + Option *ConsumeAfterOpt = 0; + if (!PositionalOpts.empty()) { + if (PositionalOpts[0]->getNumOccurrencesFlag() == cl::ConsumeAfter) { + assert(PositionalOpts.size() > 1 && + "Cannot specify cl::ConsumeAfter without a positional argument!"); + ConsumeAfterOpt = PositionalOpts[0]; + } + + // Calculate how many positional values are _required_. + bool UnboundedFound = false; + for (size_t i = ConsumeAfterOpt != 0, e = PositionalOpts.size(); + i != e; ++i) { + Option *Opt = PositionalOpts[i]; + if (RequiresValue(Opt)) + ++NumPositionalRequired; + else if (ConsumeAfterOpt) { + // ConsumeAfter cannot be combined with "optional" positional options + // unless there is only one positional argument... + if (PositionalOpts.size() > 2) + ErrorParsing |= + Opt->error(" error - this positional option will never be matched, " + "because it does not Require a value, and a " + "cl::ConsumeAfter option is active!"); + } else if (UnboundedFound && !Opt->ArgStr[0]) { + // This option does not "require" a value... Make sure this option is + // not specified after an option that eats all extra arguments, or this + // one will never get any! + // + ErrorParsing |= Opt->error(" error - option can never match, because " + "another positional argument will match an " + "unbounded number of values, and this option" + " does not require a value!"); + } + UnboundedFound |= EatsUnboundedNumberOfValues(Opt); + } + HasUnlimitedPositionals = UnboundedFound || ConsumeAfterOpt; + } + + // PositionalVals - A vector of "positional" arguments we accumulate into + // the process at the end... + // + std::vector > PositionalVals; + + // If the program has named positional arguments, and the name has been run + // across, keep track of which positional argument was named. Otherwise put + // the positional args into the PositionalVals list... + Option *ActivePositionalArg = 0; + + // Loop over all of the arguments... processing them. + bool DashDashFound = false; // Have we read '--'? + for (int i = 1; i < argc; ++i) { + Option *Handler = 0; + const char *Value = 0; + const char *ArgName = ""; + + // If the option list changed, this means that some command line + // option has just been registered or deregistered. This can occur in + // response to things like -load, etc. If this happens, rescan the options. + if (OptionListChanged) { + PositionalOpts.clear(); + SinkOpts.clear(); + Opts.clear(); + GetOptionInfo(PositionalOpts, SinkOpts, Opts); + OptionListChanged = false; + } + + // Check to see if this is a positional argument. This argument is + // considered to be positional if it doesn't start with '-', if it is "-" + // itself, or if we have seen "--" already. + // + if (argv[i][0] != '-' || argv[i][1] == 0 || DashDashFound) { + // Positional argument! + if (ActivePositionalArg) { + ProvidePositionalOption(ActivePositionalArg, argv[i], i); + continue; // We are done! + } else if (!PositionalOpts.empty()) { + PositionalVals.push_back(std::make_pair(argv[i],i)); + + // All of the positional arguments have been fulfulled, give the rest to + // the consume after option... if it's specified... + // + if (PositionalVals.size() >= NumPositionalRequired && + ConsumeAfterOpt != 0) { + for (++i; i < argc; ++i) + PositionalVals.push_back(std::make_pair(argv[i],i)); + break; // Handle outside of the argument processing loop... + } + + // Delay processing positional arguments until the end... + continue; + } + } else if (argv[i][0] == '-' && argv[i][1] == '-' && argv[i][2] == 0 && + !DashDashFound) { + DashDashFound = true; // This is the mythical "--"? + continue; // Don't try to process it as an argument itself. + } else if (ActivePositionalArg && + (ActivePositionalArg->getMiscFlags() & PositionalEatsArgs)) { + // If there is a positional argument eating options, check to see if this + // option is another positional argument. If so, treat it as an argument, + // otherwise feed it to the eating positional. + ArgName = argv[i]+1; + Handler = LookupOption(ArgName, Value, Opts); + if (!Handler || Handler->getFormattingFlag() != cl::Positional) { + ProvidePositionalOption(ActivePositionalArg, argv[i], i); + continue; // We are done! + } + + } else { // We start with a '-', must be an argument... + ArgName = argv[i]+1; + Handler = LookupOption(ArgName, Value, Opts); + + // Check to see if this "option" is really a prefixed or grouped argument. + if (Handler == 0) { + std::string RealName(ArgName); + if (RealName.size() > 1) { + size_t Length = 0; + Option *PGOpt = getOptionPred(RealName, Length, isPrefixedOrGrouping, + Opts); + + // If the option is a prefixed option, then the value is simply the + // rest of the name... so fall through to later processing, by + // setting up the argument name flags and value fields. + // + if (PGOpt && PGOpt->getFormattingFlag() == cl::Prefix) { + Value = ArgName+Length; + assert(Opts.find(std::string(ArgName, Value)) != Opts.end() && + Opts.find(std::string(ArgName, Value))->second == PGOpt); + Handler = PGOpt; + } else if (PGOpt) { + // This must be a grouped option... handle them now. + assert(isGrouping(PGOpt) && "Broken getOptionPred!"); + + do { + // Move current arg name out of RealName into RealArgName... + std::string RealArgName(RealName.begin(), + RealName.begin() + Length); + RealName.erase(RealName.begin(), RealName.begin() + Length); + + // Because ValueRequired is an invalid flag for grouped arguments, + // we don't need to pass argc/argv in... + // + assert(PGOpt->getValueExpectedFlag() != cl::ValueRequired && + "Option can not be cl::Grouping AND cl::ValueRequired!"); + int Dummy; + ErrorParsing |= ProvideOption(PGOpt, RealArgName.c_str(), + 0, 0, 0, Dummy); + + // Get the next grouping option... + PGOpt = getOptionPred(RealName, Length, isGrouping, Opts); + } while (PGOpt && Length != RealName.size()); + + Handler = PGOpt; // Ate all of the options. + } + } + } + } + + if (Handler == 0) { + if (SinkOpts.empty()) { + cerr << ProgramName << ": Unknown command line argument '" + << argv[i] << "'. Try: '" << argv[0] << " --help'\n"; + ErrorParsing = true; + } else { + for (std::vector::iterator I = SinkOpts.begin(), + E = SinkOpts.end(); I != E ; ++I) + (*I)->addOccurrence(i, "", argv[i]); + } + continue; + } + + // Check to see if this option accepts a comma separated list of values. If + // it does, we have to split up the value into multiple values... + if (Value && Handler->getMiscFlags() & CommaSeparated) { + std::string Val(Value); + std::string::size_type Pos = Val.find(','); + + while (Pos != std::string::npos) { + // Process the portion before the comma... + ErrorParsing |= ProvideOption(Handler, ArgName, + std::string(Val.begin(), + Val.begin()+Pos).c_str(), + argc, argv, i); + // Erase the portion before the comma, AND the comma... + Val.erase(Val.begin(), Val.begin()+Pos+1); + Value += Pos+1; // Increment the original value pointer as well... + + // Check for another comma... + Pos = Val.find(','); + } + } + + // If this is a named positional argument, just remember that it is the + // active one... + if (Handler->getFormattingFlag() == cl::Positional) + ActivePositionalArg = Handler; + else + ErrorParsing |= ProvideOption(Handler, ArgName, Value, argc, argv, i); + } + + // Check and handle positional arguments now... + if (NumPositionalRequired > PositionalVals.size()) { + cerr << ProgramName + << ": Not enough positional command line arguments specified!\n" + << "Must specify at least " << NumPositionalRequired + << " positional arguments: See: " << argv[0] << " --help\n"; + + ErrorParsing = true; + } else if (!HasUnlimitedPositionals + && PositionalVals.size() > PositionalOpts.size()) { + cerr << ProgramName + << ": Too many positional arguments specified!\n" + << "Can specify at most " << PositionalOpts.size() + << " positional arguments: See: " << argv[0] << " --help\n"; + ErrorParsing = true; + + } else if (ConsumeAfterOpt == 0) { + // Positional args have already been handled if ConsumeAfter is specified... + unsigned ValNo = 0, NumVals = static_cast(PositionalVals.size()); + for (size_t i = 0, e = PositionalOpts.size(); i != e; ++i) { + if (RequiresValue(PositionalOpts[i])) { + ProvidePositionalOption(PositionalOpts[i], PositionalVals[ValNo].first, + PositionalVals[ValNo].second); + ValNo++; + --NumPositionalRequired; // We fulfilled our duty... + } + + // If we _can_ give this option more arguments, do so now, as long as we + // do not give it values that others need. 'Done' controls whether the + // option even _WANTS_ any more. + // + bool Done = PositionalOpts[i]->getNumOccurrencesFlag() == cl::Required; + while (NumVals-ValNo > NumPositionalRequired && !Done) { + switch (PositionalOpts[i]->getNumOccurrencesFlag()) { + case cl::Optional: + Done = true; // Optional arguments want _at most_ one value + // FALL THROUGH + case cl::ZeroOrMore: // Zero or more will take all they can get... + case cl::OneOrMore: // One or more will take all they can get... + ProvidePositionalOption(PositionalOpts[i], + PositionalVals[ValNo].first, + PositionalVals[ValNo].second); + ValNo++; + break; + default: + assert(0 && "Internal error, unexpected NumOccurrences flag in " + "positional argument processing!"); + } + } + } + } else { + assert(ConsumeAfterOpt && NumPositionalRequired <= PositionalVals.size()); + unsigned ValNo = 0; + for (size_t j = 1, e = PositionalOpts.size(); j != e; ++j) + if (RequiresValue(PositionalOpts[j])) { + ErrorParsing |= ProvidePositionalOption(PositionalOpts[j], + PositionalVals[ValNo].first, + PositionalVals[ValNo].second); + ValNo++; + } + + // Handle the case where there is just one positional option, and it's + // optional. In this case, we want to give JUST THE FIRST option to the + // positional option and keep the rest for the consume after. The above + // loop would have assigned no values to positional options in this case. + // + if (PositionalOpts.size() == 2 && ValNo == 0 && !PositionalVals.empty()) { + ErrorParsing |= ProvidePositionalOption(PositionalOpts[1], + PositionalVals[ValNo].first, + PositionalVals[ValNo].second); + ValNo++; + } + + // Handle over all of the rest of the arguments to the + // cl::ConsumeAfter command line option... + for (; ValNo != PositionalVals.size(); ++ValNo) + ErrorParsing |= ProvidePositionalOption(ConsumeAfterOpt, + PositionalVals[ValNo].first, + PositionalVals[ValNo].second); + } + + // Loop over args and make sure all required args are specified! + for (std::map::iterator I = Opts.begin(), + E = Opts.end(); I != E; ++I) { + switch (I->second->getNumOccurrencesFlag()) { + case Required: + case OneOrMore: + if (I->second->getNumOccurrences() == 0) { + I->second->error(" must be specified at least once!"); + ErrorParsing = true; + } + // Fall through + default: + break; + } + } + + // Free all of the memory allocated to the map. Command line options may only + // be processed once! + Opts.clear(); + PositionalOpts.clear(); + MoreHelp->clear(); + + // Free the memory allocated by ExpandResponseFiles. + if (ReadResponseFiles) { + // Free all the strdup()ed strings. + for (std::vector::iterator i = newArgv.begin(), e = newArgv.end(); + i != e; ++i) + free (*i); + } + + // If we had an error processing our arguments, don't let the program execute + if (ErrorParsing) exit(1); +} + +//===----------------------------------------------------------------------===// +// Option Base class implementation +// + +bool Option::error(std::string Message, const char *ArgName) { + if (ArgName == 0) ArgName = ArgStr; + if (ArgName[0] == 0) + cerr << HelpStr; // Be nice for positional arguments + else + cerr << ProgramName << ": for the -" << ArgName; + + cerr << " option: " << Message << "\n"; + return true; +} + +bool Option::addOccurrence(unsigned pos, const char *ArgName, + const std::string &Value, + bool MultiArg) { + if (!MultiArg) + NumOccurrences++; // Increment the number of times we have been seen + + switch (getNumOccurrencesFlag()) { + case Optional: + if (NumOccurrences > 1) + return error(": may only occur zero or one times!", ArgName); + break; + case Required: + if (NumOccurrences > 1) + return error(": must occur exactly one time!", ArgName); + // Fall through + case OneOrMore: + case ZeroOrMore: + case ConsumeAfter: break; + default: return error(": bad num occurrences flag value!"); + } + + return handleOccurrence(pos, ArgName, Value); +} + + +// getValueStr - Get the value description string, using "DefaultMsg" if nothing +// has been specified yet. +// +static const char *getValueStr(const Option &O, const char *DefaultMsg) { + if (O.ValueStr[0] == 0) return DefaultMsg; + return O.ValueStr; +} + +//===----------------------------------------------------------------------===// +// cl::alias class implementation +// + +// Return the width of the option tag for printing... +size_t alias::getOptionWidth() const { + return std::strlen(ArgStr)+6; +} + +// Print out the option for the alias. +void alias::printOptionInfo(size_t GlobalWidth) const { + size_t L = std::strlen(ArgStr); + cout << " -" << ArgStr << std::string(GlobalWidth-L-6, ' ') << " - " + << HelpStr << "\n"; +} + + + +//===----------------------------------------------------------------------===// +// Parser Implementation code... +// + +// basic_parser implementation +// + +// Return the width of the option tag for printing... +size_t basic_parser_impl::getOptionWidth(const Option &O) const { + size_t Len = std::strlen(O.ArgStr); + if (const char *ValName = getValueName()) + Len += std::strlen(getValueStr(O, ValName))+3; + + return Len + 6; +} + +// printOptionInfo - Print out information about this option. The +// to-be-maintained width is specified. +// +void basic_parser_impl::printOptionInfo(const Option &O, + size_t GlobalWidth) const { + cout << " -" << O.ArgStr; + + if (const char *ValName = getValueName()) + cout << "=<" << getValueStr(O, ValName) << ">"; + + cout << std::string(GlobalWidth-getOptionWidth(O), ' ') << " - " + << O.HelpStr << "\n"; +} + + + + +// parser implementation +// +bool parser::parse(Option &O, const char *ArgName, + const std::string &Arg, bool &Value) { + if (Arg == "" || Arg == "true" || Arg == "TRUE" || Arg == "True" || + Arg == "1") { + Value = true; + } else if (Arg == "false" || Arg == "FALSE" || Arg == "False" || Arg == "0") { + Value = false; + } else { + return O.error(": '" + Arg + + "' is invalid value for boolean argument! Try 0 or 1"); + } + return false; +} + +// parser implementation +// +bool parser::parse(Option &O, const char *ArgName, + const std::string &Arg, boolOrDefault &Value) { + if (Arg == "" || Arg == "true" || Arg == "TRUE" || Arg == "True" || + Arg == "1") { + Value = BOU_TRUE; + } else if (Arg == "false" || Arg == "FALSE" + || Arg == "False" || Arg == "0") { + Value = BOU_FALSE; + } else { + return O.error(": '" + Arg + + "' is invalid value for boolean argument! Try 0 or 1"); + } + return false; +} + +// parser implementation +// +bool parser::parse(Option &O, const char *ArgName, + const std::string &Arg, int &Value) { + char *End; + Value = (int)strtol(Arg.c_str(), &End, 0); + if (*End != 0) + return O.error(": '" + Arg + "' value invalid for integer argument!"); + return false; +} + +// parser implementation +// +bool parser::parse(Option &O, const char *ArgName, + const std::string &Arg, unsigned &Value) { + char *End; + errno = 0; + unsigned long V = strtoul(Arg.c_str(), &End, 0); + Value = (unsigned)V; + if (((V == ULONG_MAX) && (errno == ERANGE)) + || (*End != 0) + || (Value != V)) + return O.error(": '" + Arg + "' value invalid for uint argument!"); + return false; +} + +// parser/parser implementation +// +static bool parseDouble(Option &O, const std::string &Arg, double &Value) { + const char *ArgStart = Arg.c_str(); + char *End; + Value = strtod(ArgStart, &End); + if (*End != 0) + return O.error(": '" +Arg+ "' value invalid for floating point argument!"); + return false; +} + +bool parser::parse(Option &O, const char *AN, + const std::string &Arg, double &Val) { + return parseDouble(O, Arg, Val); +} + +bool parser::parse(Option &O, const char *AN, + const std::string &Arg, float &Val) { + double dVal; + if (parseDouble(O, Arg, dVal)) + return true; + Val = (float)dVal; + return false; +} + + + +// generic_parser_base implementation +// + +// findOption - Return the option number corresponding to the specified +// argument string. If the option is not found, getNumOptions() is returned. +// +unsigned generic_parser_base::findOption(const char *Name) { + unsigned i = 0, e = getNumOptions(); + std::string N(Name); + + while (i != e) + if (getOption(i) == N) + return i; + else + ++i; + return e; +} + + +// Return the width of the option tag for printing... +size_t generic_parser_base::getOptionWidth(const Option &O) const { + if (O.hasArgStr()) { + size_t Size = std::strlen(O.ArgStr)+6; + for (unsigned i = 0, e = getNumOptions(); i != e; ++i) + Size = std::max(Size, std::strlen(getOption(i))+8); + return Size; + } else { + size_t BaseSize = 0; + for (unsigned i = 0, e = getNumOptions(); i != e; ++i) + BaseSize = std::max(BaseSize, std::strlen(getOption(i))+8); + return BaseSize; + } +} + +// printOptionInfo - Print out information about this option. The +// to-be-maintained width is specified. +// +void generic_parser_base::printOptionInfo(const Option &O, + size_t GlobalWidth) const { + if (O.hasArgStr()) { + size_t L = std::strlen(O.ArgStr); + cout << " -" << O.ArgStr << std::string(GlobalWidth-L-6, ' ') + << " - " << O.HelpStr << "\n"; + + for (unsigned i = 0, e = getNumOptions(); i != e; ++i) { + size_t NumSpaces = GlobalWidth-strlen(getOption(i))-8; + cout << " =" << getOption(i) << std::string(NumSpaces, ' ') + << " - " << getDescription(i) << "\n"; + } + } else { + if (O.HelpStr[0]) + cout << " " << O.HelpStr << "\n"; + for (unsigned i = 0, e = getNumOptions(); i != e; ++i) { + size_t L = std::strlen(getOption(i)); + cout << " -" << getOption(i) << std::string(GlobalWidth-L-8, ' ') + << " - " << getDescription(i) << "\n"; + } + } +} + + +//===----------------------------------------------------------------------===// +// --help and --help-hidden option implementation +// + +namespace { + +class HelpPrinter { + size_t MaxArgLen; + const Option *EmptyArg; + const bool ShowHidden; + + // isHidden/isReallyHidden - Predicates to be used to filter down arg lists. + inline static bool isHidden(std::pair &OptPair) { + return OptPair.second->getOptionHiddenFlag() >= Hidden; + } + inline static bool isReallyHidden(std::pair &OptPair) { + return OptPair.second->getOptionHiddenFlag() == ReallyHidden; + } + +public: + explicit HelpPrinter(bool showHidden) : ShowHidden(showHidden) { + EmptyArg = 0; + } + + void operator=(bool Value) { + if (Value == false) return; + + // Get all the options. + std::vector PositionalOpts; + std::vector SinkOpts; + std::map OptMap; + GetOptionInfo(PositionalOpts, SinkOpts, OptMap); + + // Copy Options into a vector so we can sort them as we like... + std::vector > Opts; + copy(OptMap.begin(), OptMap.end(), std::back_inserter(Opts)); + + // Eliminate Hidden or ReallyHidden arguments, depending on ShowHidden + Opts.erase(std::remove_if(Opts.begin(), Opts.end(), + std::ptr_fun(ShowHidden ? isReallyHidden : isHidden)), + Opts.end()); + + // Eliminate duplicate entries in table (from enum flags options, f.e.) + { // Give OptionSet a scope + std::set OptionSet; + for (unsigned i = 0; i != Opts.size(); ++i) + if (OptionSet.count(Opts[i].second) == 0) + OptionSet.insert(Opts[i].second); // Add new entry to set + else + Opts.erase(Opts.begin()+i--); // Erase duplicate + } + + if (ProgramOverview) + cout << "OVERVIEW: " << ProgramOverview << "\n"; + + cout << "USAGE: " << ProgramName << " [options]"; + + // Print out the positional options. + Option *CAOpt = 0; // The cl::ConsumeAfter option, if it exists... + if (!PositionalOpts.empty() && + PositionalOpts[0]->getNumOccurrencesFlag() == ConsumeAfter) + CAOpt = PositionalOpts[0]; + + for (size_t i = CAOpt != 0, e = PositionalOpts.size(); i != e; ++i) { + if (PositionalOpts[i]->ArgStr[0]) + cout << " --" << PositionalOpts[i]->ArgStr; + cout << " " << PositionalOpts[i]->HelpStr; + } + + // Print the consume after option info if it exists... + if (CAOpt) cout << " " << CAOpt->HelpStr; + + cout << "\n\n"; + + // Compute the maximum argument length... + MaxArgLen = 0; + for (size_t i = 0, e = Opts.size(); i != e; ++i) + MaxArgLen = std::max(MaxArgLen, Opts[i].second->getOptionWidth()); + + cout << "OPTIONS:\n"; + for (size_t i = 0, e = Opts.size(); i != e; ++i) + Opts[i].second->printOptionInfo(MaxArgLen); + + // Print any extra help the user has declared. + for (std::vector::iterator I = MoreHelp->begin(), + E = MoreHelp->end(); I != E; ++I) + cout << *I; + MoreHelp->clear(); + + // Halt the program since help information was printed + exit(1); + } +}; +} // End anonymous namespace + +// Define the two HelpPrinter instances that are used to print out help, or +// help-hidden... +// +static HelpPrinter NormalPrinter(false); +static HelpPrinter HiddenPrinter(true); + +static cl::opt > +HOp("help", cl::desc("Display available options (--help-hidden for more)"), + cl::location(NormalPrinter), cl::ValueDisallowed); + +static cl::opt > +HHOp("help-hidden", cl::desc("Display all available options"), + cl::location(HiddenPrinter), cl::Hidden, cl::ValueDisallowed); + +static void (*OverrideVersionPrinter)() = 0; + +namespace { +class VersionPrinter { +public: + void print() { + cout << "Low Level Virtual Machine (http://llvm.org/):\n"; + cout << " " << PACKAGE_NAME << " version " << PACKAGE_VERSION; +#ifdef LLVM_VERSION_INFO + cout << LLVM_VERSION_INFO; +#endif + cout << "\n "; +#ifndef __OPTIMIZE__ + cout << "DEBUG build"; +#else + cout << "Optimized build"; +#endif +#ifndef NDEBUG + cout << " with assertions"; +#endif + cout << ".\n"; + cout << " Built " << __DATE__ << "(" << __TIME__ << ").\n"; + } + void operator=(bool OptionWasSpecified) { + if (OptionWasSpecified) { + if (OverrideVersionPrinter == 0) { + print(); + exit(1); + } else { + (*OverrideVersionPrinter)(); + exit(1); + } + } + } +}; +} // End anonymous namespace + + +// Define the --version option that prints out the LLVM version for the tool +static VersionPrinter VersionPrinterInstance; + +static cl::opt > +VersOp("version", cl::desc("Display the version of this program"), + cl::location(VersionPrinterInstance), cl::ValueDisallowed); + +// Utility function for printing the help message. +void cl::PrintHelpMessage() { + // This looks weird, but it actually prints the help message. The + // NormalPrinter variable is a HelpPrinter and the help gets printed when + // its operator= is invoked. That's because the "normal" usages of the + // help printer is to be assigned true/false depending on whether the + // --help option was given or not. Since we're circumventing that we have + // to make it look like --help was given, so we assign true. + NormalPrinter = true; +} + +/// Utility function for printing version number. +void cl::PrintVersionMessage() { + VersionPrinterInstance.print(); +} + +void cl::SetVersionPrinter(void (*func)()) { + OverrideVersionPrinter = func; +} diff --git a/lib/Support/ConstantRange.cpp b/lib/Support/ConstantRange.cpp new file mode 100644 index 000000000000..cb8c4b013c32 --- /dev/null +++ b/lib/Support/ConstantRange.cpp @@ -0,0 +1,472 @@ +//===-- ConstantRange.cpp - ConstantRange implementation ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Represent a range of possible values that may occur when the program is run +// for an integral value. This keeps track of a lower and upper bound for the +// constant, which MAY wrap around the end of the numeric range. To do this, it +// keeps track of a [lower, upper) bound, which specifies an interval just like +// STL iterators. When used with boolean values, the following are important +// ranges (other integral ranges use min/max values for special range values): +// +// [F, F) = {} = Empty set +// [T, F) = {T} +// [F, T) = {F} +// [T, T) = {F, T} = Full set +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/ConstantRange.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +/// Initialize a full (the default) or empty set for the specified type. +/// +ConstantRange::ConstantRange(uint32_t BitWidth, bool Full) : + Lower(BitWidth, 0), Upper(BitWidth, 0) { + if (Full) + Lower = Upper = APInt::getMaxValue(BitWidth); + else + Lower = Upper = APInt::getMinValue(BitWidth); +} + +/// Initialize a range to hold the single specified value. +/// +ConstantRange::ConstantRange(const APInt & V) : Lower(V), Upper(V + 1) { } + +ConstantRange::ConstantRange(const APInt &L, const APInt &U) : + Lower(L), Upper(U) { + assert(L.getBitWidth() == U.getBitWidth() && + "ConstantRange with unequal bit widths"); + assert((L != U || (L.isMaxValue() || L.isMinValue())) && + "Lower == Upper, but they aren't min or max value!"); +} + +/// isFullSet - Return true if this set contains all of the elements possible +/// for this data-type +bool ConstantRange::isFullSet() const { + return Lower == Upper && Lower.isMaxValue(); +} + +/// isEmptySet - Return true if this set contains no members. +/// +bool ConstantRange::isEmptySet() const { + return Lower == Upper && Lower.isMinValue(); +} + +/// isWrappedSet - Return true if this set wraps around the top of the range, +/// for example: [100, 8) +/// +bool ConstantRange::isWrappedSet() const { + return Lower.ugt(Upper); +} + +/// getSetSize - Return the number of elements in this set. +/// +APInt ConstantRange::getSetSize() const { + if (isEmptySet()) + return APInt(getBitWidth(), 0); + if (getBitWidth() == 1) { + if (Lower != Upper) // One of T or F in the set... + return APInt(2, 1); + return APInt(2, 2); // Must be full set... + } + + // Simply subtract the bounds... + return Upper - Lower; +} + +/// getUnsignedMax - Return the largest unsigned value contained in the +/// ConstantRange. +/// +APInt ConstantRange::getUnsignedMax() const { + if (isFullSet() || isWrappedSet()) + return APInt::getMaxValue(getBitWidth()); + else + return getUpper() - 1; +} + +/// getUnsignedMin - Return the smallest unsigned value contained in the +/// ConstantRange. +/// +APInt ConstantRange::getUnsignedMin() const { + if (isFullSet() || (isWrappedSet() && getUpper() != 0)) + return APInt::getMinValue(getBitWidth()); + else + return getLower(); +} + +/// getSignedMax - Return the largest signed value contained in the +/// ConstantRange. +/// +APInt ConstantRange::getSignedMax() const { + APInt SignedMax(APInt::getSignedMaxValue(getBitWidth())); + if (!isWrappedSet()) { + if (getLower().sle(getUpper() - 1)) + return getUpper() - 1; + else + return SignedMax; + } else { + if ((getUpper() - 1).slt(getLower())) { + if (getLower() != SignedMax) + return SignedMax; + else + return getUpper() - 1; + } else { + return getUpper() - 1; + } + } +} + +/// getSignedMin - Return the smallest signed value contained in the +/// ConstantRange. +/// +APInt ConstantRange::getSignedMin() const { + APInt SignedMin(APInt::getSignedMinValue(getBitWidth())); + if (!isWrappedSet()) { + if (getLower().sle(getUpper() - 1)) + return getLower(); + else + return SignedMin; + } else { + if ((getUpper() - 1).slt(getLower())) { + if (getUpper() != SignedMin) + return SignedMin; + else + return getLower(); + } else { + return getLower(); + } + } +} + +/// contains - Return true if the specified value is in the set. +/// +bool ConstantRange::contains(const APInt &V) const { + if (Lower == Upper) + return isFullSet(); + + if (!isWrappedSet()) + return Lower.ule(V) && V.ult(Upper); + else + return Lower.ule(V) || V.ult(Upper); +} + +/// subtract - Subtract the specified constant from the endpoints of this +/// constant range. +ConstantRange ConstantRange::subtract(const APInt &Val) const { + assert(Val.getBitWidth() == getBitWidth() && "Wrong bit width"); + // If the set is empty or full, don't modify the endpoints. + if (Lower == Upper) + return *this; + return ConstantRange(Lower - Val, Upper - Val); +} + + +// intersect1Wrapped - This helper function is used to intersect two ranges when +// it is known that LHS is wrapped and RHS isn't. +// +ConstantRange +ConstantRange::intersect1Wrapped(const ConstantRange &LHS, + const ConstantRange &RHS) { + assert(LHS.isWrappedSet() && !RHS.isWrappedSet()); + + // Check to see if we overlap on the Left side of RHS... + // + if (RHS.Lower.ult(LHS.Upper)) { + // We do overlap on the left side of RHS, see if we overlap on the right of + // RHS... + if (RHS.Upper.ugt(LHS.Lower)) { + // Ok, the result overlaps on both the left and right sides. See if the + // resultant interval will be smaller if we wrap or not... + // + if (LHS.getSetSize().ult(RHS.getSetSize())) + return LHS; + else + return RHS; + + } else { + // No overlap on the right, just on the left. + return ConstantRange(RHS.Lower, LHS.Upper); + } + } else { + // We don't overlap on the left side of RHS, see if we overlap on the right + // of RHS... + if (RHS.Upper.ugt(LHS.Lower)) { + // Simple overlap... + return ConstantRange(LHS.Lower, RHS.Upper); + } else { + // No overlap... + return ConstantRange(LHS.getBitWidth(), false); + } + } +} + +/// intersectWith - Return the range that results from the intersection of this +/// range with another range. +/// +ConstantRange ConstantRange::intersectWith(const ConstantRange &CR) const { + assert(getBitWidth() == CR.getBitWidth() && + "ConstantRange types don't agree!"); + // Handle common special cases + if (isEmptySet() || CR.isFullSet()) + return *this; + if (isFullSet() || CR.isEmptySet()) + return CR; + + if (!isWrappedSet()) { + if (!CR.isWrappedSet()) { + using namespace APIntOps; + APInt L = umax(Lower, CR.Lower); + APInt U = umin(Upper, CR.Upper); + + if (L.ult(U)) // If range isn't empty... + return ConstantRange(L, U); + else + return ConstantRange(getBitWidth(), false);// Otherwise, empty set + } else + return intersect1Wrapped(CR, *this); + } else { // We know "this" is wrapped... + if (!CR.isWrappedSet()) + return intersect1Wrapped(*this, CR); + else { + // Both ranges are wrapped... + using namespace APIntOps; + APInt L = umax(Lower, CR.Lower); + APInt U = umin(Upper, CR.Upper); + return ConstantRange(L, U); + } + } + return *this; +} + +/// maximalIntersectWith - Return the range that results from the intersection +/// of this range with another range. The resultant range is guaranteed to +/// include all elements contained in both input ranges, and to have the +/// smallest possible set size that does so. Because there may be two +/// intersections with the same set size, A.maximalIntersectWith(B) might not +/// be equal to B.maximalIntersect(A). +ConstantRange ConstantRange::maximalIntersectWith(const ConstantRange &CR) const { + assert(getBitWidth() == CR.getBitWidth() && + "ConstantRange types don't agree!"); + + // Handle common cases. + if ( isEmptySet() || CR.isFullSet()) return *this; + if (CR.isEmptySet() || isFullSet()) return CR; + + if (!isWrappedSet() && CR.isWrappedSet()) + return CR.maximalIntersectWith(*this); + + if (!isWrappedSet() && !CR.isWrappedSet()) { + if (Lower.ult(CR.Lower)) { + if (Upper.ule(CR.Lower)) + return ConstantRange(getBitWidth(), false); + + if (Upper.ult(CR.Upper)) + return ConstantRange(CR.Lower, Upper); + + return CR; + } else { + if (Upper.ult(CR.Upper)) + return *this; + + if (Lower.ult(CR.Upper)) + return ConstantRange(Lower, CR.Upper); + + return ConstantRange(getBitWidth(), false); + } + } + + if (isWrappedSet() && !CR.isWrappedSet()) { + if (CR.Lower.ult(Upper)) { + if (CR.Upper.ult(Upper)) + return CR; + + if (CR.Upper.ult(Lower)) + return ConstantRange(CR.Lower, Upper); + + if (getSetSize().ult(CR.getSetSize())) + return *this; + else + return CR; + } else if (CR.Lower.ult(Lower)) { + if (CR.Upper.ule(Lower)) + return ConstantRange(getBitWidth(), false); + + return ConstantRange(Lower, CR.Upper); + } + return CR; + } + + if (CR.Upper.ult(Upper)) { + if (CR.Lower.ult(Upper)) { + if (getSetSize().ult(CR.getSetSize())) + return *this; + else + return CR; + } + + if (CR.Lower.ult(Lower)) + return ConstantRange(Lower, CR.Upper); + + return CR; + } else if (CR.Upper.ult(Lower)) { + if (CR.Lower.ult(Lower)) + return *this; + + return ConstantRange(CR.Lower, Upper); + } + if (getSetSize().ult(CR.getSetSize())) + return *this; + else + return CR; +} + + +/// unionWith - Return the range that results from the union of this range with +/// another range. The resultant range is guaranteed to include the elements of +/// both sets, but may contain more. For example, [3, 9) union [12,15) is +/// [3, 15), which includes 9, 10, and 11, which were not included in either +/// set before. +/// +ConstantRange ConstantRange::unionWith(const ConstantRange &CR) const { + assert(getBitWidth() == CR.getBitWidth() && + "ConstantRange types don't agree!"); + + if ( isFullSet() || CR.isEmptySet()) return *this; + if (CR.isFullSet() || isEmptySet()) return CR; + + if (!isWrappedSet() && CR.isWrappedSet()) return CR.unionWith(*this); + + APInt L = Lower, U = Upper; + + if (!isWrappedSet() && !CR.isWrappedSet()) { + if (CR.Lower.ult(L)) + L = CR.Lower; + + if (CR.Upper.ugt(U)) + U = CR.Upper; + } + + if (isWrappedSet() && !CR.isWrappedSet()) { + if ((CR.Lower.ult(Upper) && CR.Upper.ult(Upper)) || + (CR.Lower.ugt(Lower) && CR.Upper.ugt(Lower))) { + return *this; + } + + if (CR.Lower.ule(Upper) && Lower.ule(CR.Upper)) { + return ConstantRange(getBitWidth()); + } + + if (CR.Lower.ule(Upper) && CR.Upper.ule(Lower)) { + APInt d1 = CR.Upper - Upper, d2 = Lower - CR.Upper; + if (d1.ult(d2)) { + U = CR.Upper; + } else { + L = CR.Upper; + } + } + + if (Upper.ult(CR.Lower) && CR.Upper.ult(Lower)) { + APInt d1 = CR.Lower - Upper, d2 = Lower - CR.Upper; + if (d1.ult(d2)) { + U = CR.Lower + 1; + } else { + L = CR.Upper - 1; + } + } + + if (Upper.ult(CR.Lower) && Lower.ult(CR.Upper)) { + APInt d1 = CR.Lower - Upper, d2 = Lower - CR.Lower; + + if (d1.ult(d2)) { + U = CR.Lower + 1; + } else { + L = CR.Lower; + } + } + } + + if (isWrappedSet() && CR.isWrappedSet()) { + if (Lower.ult(CR.Upper) || CR.Lower.ult(Upper)) + return ConstantRange(getBitWidth()); + + if (CR.Upper.ugt(U)) { + U = CR.Upper; + } + + if (CR.Lower.ult(L)) { + L = CR.Lower; + } + + if (L == U) return ConstantRange(getBitWidth()); + } + + return ConstantRange(L, U); +} + +/// zeroExtend - Return a new range in the specified integer type, which must +/// be strictly larger than the current type. The returned range will +/// correspond to the possible range of values as if the source range had been +/// zero extended. +ConstantRange ConstantRange::zeroExtend(uint32_t DstTySize) const { + unsigned SrcTySize = getBitWidth(); + assert(SrcTySize < DstTySize && "Not a value extension"); + if (isFullSet()) + // Change a source full set into [0, 1 << 8*numbytes) + return ConstantRange(APInt(DstTySize,0), APInt(DstTySize,1).shl(SrcTySize)); + + APInt L = Lower; L.zext(DstTySize); + APInt U = Upper; U.zext(DstTySize); + return ConstantRange(L, U); +} + +/// signExtend - Return a new range in the specified integer type, which must +/// be strictly larger than the current type. The returned range will +/// correspond to the possible range of values as if the source range had been +/// sign extended. +ConstantRange ConstantRange::signExtend(uint32_t DstTySize) const { + unsigned SrcTySize = getBitWidth(); + assert(SrcTySize < DstTySize && "Not a value extension"); + if (isFullSet()) { + return ConstantRange(APInt::getHighBitsSet(DstTySize,DstTySize-SrcTySize+1), + APInt::getLowBitsSet(DstTySize, SrcTySize-1)); + } + + APInt L = Lower; L.sext(DstTySize); + APInt U = Upper; U.sext(DstTySize); + return ConstantRange(L, U); +} + +/// truncate - Return a new range in the specified integer type, which must be +/// strictly smaller than the current type. The returned range will +/// correspond to the possible range of values as if the source range had been +/// truncated to the specified type. +ConstantRange ConstantRange::truncate(uint32_t DstTySize) const { + unsigned SrcTySize = getBitWidth(); + assert(SrcTySize > DstTySize && "Not a value truncation"); + APInt Size(APInt::getLowBitsSet(SrcTySize, DstTySize)); + if (isFullSet() || getSetSize().ugt(Size)) + return ConstantRange(DstTySize); + + APInt L = Lower; L.trunc(DstTySize); + APInt U = Upper; U.trunc(DstTySize); + return ConstantRange(L, U); +} + +/// print - Print out the bounds to a stream... +/// +void ConstantRange::print(raw_ostream &OS) const { + OS << "[" << Lower << "," << Upper << ")"; +} + +/// dump - Allow printing from a debugger easily... +/// +void ConstantRange::dump() const { + print(errs()); +} diff --git a/lib/Support/Debug.cpp b/lib/Support/Debug.cpp new file mode 100644 index 000000000000..a09cddf9022a --- /dev/null +++ b/lib/Support/Debug.cpp @@ -0,0 +1,77 @@ +//===-- Debug.cpp - An easy way to add debug output to your code ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a handle way of adding debugging information to your +// code, without it being enabled all of the time, and without having to add +// command line options to enable it. +// +// In particular, just wrap your code with the DEBUG() macro, and it will be +// enabled automatically if you specify '-debug' on the command-line. +// Alternatively, you can also use the SET_DEBUG_TYPE("foo") macro to specify +// that your debug code belongs to class "foo". Then, on the command line, you +// can specify '-debug-only=foo' to enable JUST the debug information for the +// foo class. +// +// When compiling in release mode, the -debug-* options and all code in DEBUG() +// statements disappears, so it does not effect the runtime of the code. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +using namespace llvm; + +bool llvm::DebugFlag; // DebugFlag - Exported boolean set by the -debug option + +namespace { +#ifndef NDEBUG + // -debug - Command line option to enable the DEBUG statements in the passes. + // This flag may only be enabled in debug builds. + static cl::opt + Debug("debug", cl::desc("Enable debug output"), cl::Hidden, + cl::location(DebugFlag)); + + static std::string CurrentDebugType; + static struct DebugOnlyOpt { + void operator=(const std::string &Val) const { + DebugFlag |= !Val.empty(); + CurrentDebugType = Val; + } + } DebugOnlyOptLoc; + + static cl::opt > + DebugOnly("debug-only", cl::desc("Enable a specific type of debug output"), + cl::Hidden, cl::value_desc("debug string"), + cl::location(DebugOnlyOptLoc), cl::ValueRequired); +#endif +} + +// isCurrentDebugType - Return true if the specified string is the debug type +// specified on the command line, or if none was specified on the command line +// with the -debug-only=X option. +// +bool llvm::isCurrentDebugType(const char *DebugType) { +#ifndef NDEBUG + return CurrentDebugType.empty() || DebugType == CurrentDebugType; +#else + return false; +#endif +} + +// getErrorOutputStream - Returns the error output stream (std::cerr). This +// places the std::c* I/O streams into one .cpp file and relieves the whole +// program from having to have hundreds of static c'tor/d'tors for them. +// +OStream &llvm::getErrorOutputStream(const char *DebugType) { + static OStream cnoout(0); + if (DebugFlag && isCurrentDebugType(DebugType)) + return cerr; + else + return cnoout; +} diff --git a/lib/Support/Dwarf.cpp b/lib/Support/Dwarf.cpp new file mode 100644 index 000000000000..fa99035b679f --- /dev/null +++ b/lib/Support/Dwarf.cpp @@ -0,0 +1,589 @@ +//===-- llvm/Support/Dwarf.cpp - Dwarf Framework ----------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for generic dwarf information. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/Dwarf.h" + +#include + +namespace llvm { + +namespace dwarf { + +/// TagString - Return the string for the specified tag. +/// +const char *TagString(unsigned Tag) { + switch (Tag) { + case DW_TAG_array_type: return "DW_TAG_array_type"; + case DW_TAG_class_type: return "DW_TAG_class_type"; + case DW_TAG_entry_point: return "DW_TAG_entry_point"; + case DW_TAG_enumeration_type: return "DW_TAG_enumeration_type"; + case DW_TAG_formal_parameter: return "DW_TAG_formal_parameter"; + case DW_TAG_imported_declaration: return "DW_TAG_imported_declaration"; + case DW_TAG_label: return "DW_TAG_label"; + case DW_TAG_lexical_block: return "DW_TAG_lexical_block"; + case DW_TAG_member: return "DW_TAG_member"; + case DW_TAG_pointer_type: return "DW_TAG_pointer_type"; + case DW_TAG_reference_type: return "DW_TAG_reference_type"; + case DW_TAG_compile_unit: return "DW_TAG_compile_unit"; + case DW_TAG_string_type: return "DW_TAG_string_type"; + case DW_TAG_structure_type: return "DW_TAG_structure_type"; + case DW_TAG_subroutine_type: return "DW_TAG_subroutine_type"; + case DW_TAG_typedef: return "DW_TAG_typedef"; + case DW_TAG_union_type: return "DW_TAG_union_type"; + case DW_TAG_unspecified_parameters: return "DW_TAG_unspecified_parameters"; + case DW_TAG_variant: return "DW_TAG_variant"; + case DW_TAG_common_block: return "DW_TAG_common_block"; + case DW_TAG_common_inclusion: return "DW_TAG_common_inclusion"; + case DW_TAG_inheritance: return "DW_TAG_inheritance"; + case DW_TAG_inlined_subroutine: return "DW_TAG_inlined_subroutine"; + case DW_TAG_module: return "DW_TAG_module"; + case DW_TAG_ptr_to_member_type: return "DW_TAG_ptr_to_member_type"; + case DW_TAG_set_type: return "DW_TAG_set_type"; + case DW_TAG_subrange_type: return "DW_TAG_subrange_type"; + case DW_TAG_with_stmt: return "DW_TAG_with_stmt"; + case DW_TAG_access_declaration: return "DW_TAG_access_declaration"; + case DW_TAG_base_type: return "DW_TAG_base_type"; + case DW_TAG_catch_block: return "DW_TAG_catch_block"; + case DW_TAG_const_type: return "DW_TAG_const_type"; + case DW_TAG_constant: return "DW_TAG_constant"; + case DW_TAG_enumerator: return "DW_TAG_enumerator"; + case DW_TAG_file_type: return "DW_TAG_file_type"; + case DW_TAG_friend: return "DW_TAG_friend"; + case DW_TAG_namelist: return "DW_TAG_namelist"; + case DW_TAG_namelist_item: return "DW_TAG_namelist_item"; + case DW_TAG_packed_type: return "DW_TAG_packed_type"; + case DW_TAG_subprogram: return "DW_TAG_subprogram"; + case DW_TAG_template_type_parameter: return "DW_TAG_template_type_parameter"; + case DW_TAG_template_value_parameter: return "DW_TAG_template_value_parameter"; + case DW_TAG_thrown_type: return "DW_TAG_thrown_type"; + case DW_TAG_try_block: return "DW_TAG_try_block"; + case DW_TAG_variant_part: return "DW_TAG_variant_part"; + case DW_TAG_variable: return "DW_TAG_variable"; + case DW_TAG_volatile_type: return "DW_TAG_volatile_type"; + case DW_TAG_dwarf_procedure: return "DW_TAG_dwarf_procedure"; + case DW_TAG_restrict_type: return "DW_TAG_restrict_type"; + case DW_TAG_interface_type: return "DW_TAG_interface_type"; + case DW_TAG_namespace: return "DW_TAG_namespace"; + case DW_TAG_imported_module: return "DW_TAG_imported_module"; + case DW_TAG_unspecified_type: return "DW_TAG_unspecified_type"; + case DW_TAG_partial_unit: return "DW_TAG_partial_unit"; + case DW_TAG_imported_unit: return "DW_TAG_imported_unit"; + case DW_TAG_condition: return "DW_TAG_condition"; + case DW_TAG_shared_type: return "DW_TAG_shared_type"; + case DW_TAG_lo_user: return "DW_TAG_lo_user"; + case DW_TAG_hi_user: return "DW_TAG_hi_user"; + } + assert(0 && "Unknown Dwarf Tag"); + return ""; +} + +/// ChildrenString - Return the string for the specified children flag. +/// +const char *ChildrenString(unsigned Children) { + switch (Children) { + case DW_CHILDREN_no: return "CHILDREN_no"; + case DW_CHILDREN_yes: return "CHILDREN_yes"; + } + assert(0 && "Unknown Dwarf ChildrenFlag"); + return ""; +} + +/// AttributeString - Return the string for the specified attribute. +/// +const char *AttributeString(unsigned Attribute) { + switch (Attribute) { + case DW_AT_sibling: return "DW_AT_sibling"; + case DW_AT_location: return "DW_AT_location"; + case DW_AT_name: return "DW_AT_name"; + case DW_AT_ordering: return "DW_AT_ordering"; + case DW_AT_byte_size: return "DW_AT_byte_size"; + case DW_AT_bit_offset: return "DW_AT_bit_offset"; + case DW_AT_bit_size: return "DW_AT_bit_size"; + case DW_AT_stmt_list: return "DW_AT_stmt_list"; + case DW_AT_low_pc: return "DW_AT_low_pc"; + case DW_AT_high_pc: return "DW_AT_high_pc"; + case DW_AT_language: return "DW_AT_language"; + case DW_AT_discr: return "DW_AT_discr"; + case DW_AT_discr_value: return "DW_AT_discr_value"; + case DW_AT_visibility: return "DW_AT_visibility"; + case DW_AT_import: return "DW_AT_import"; + case DW_AT_string_length: return "DW_AT_string_length"; + case DW_AT_common_reference: return "DW_AT_common_reference"; + case DW_AT_comp_dir: return "DW_AT_comp_dir"; + case DW_AT_const_value: return "DW_AT_const_value"; + case DW_AT_containing_type: return "DW_AT_containing_type"; + case DW_AT_default_value: return "DW_AT_default_value"; + case DW_AT_inline: return "DW_AT_inline"; + case DW_AT_is_optional: return "DW_AT_is_optional"; + case DW_AT_lower_bound: return "DW_AT_lower_bound"; + case DW_AT_producer: return "DW_AT_producer"; + case DW_AT_prototyped: return "DW_AT_prototyped"; + case DW_AT_return_addr: return "DW_AT_return_addr"; + case DW_AT_start_scope: return "DW_AT_start_scope"; + case DW_AT_bit_stride: return "DW_AT_bit_stride"; + case DW_AT_upper_bound: return "DW_AT_upper_bound"; + case DW_AT_abstract_origin: return "DW_AT_abstract_origin"; + case DW_AT_accessibility: return "DW_AT_accessibility"; + case DW_AT_address_class: return "DW_AT_address_class"; + case DW_AT_artificial: return "DW_AT_artificial"; + case DW_AT_base_types: return "DW_AT_base_types"; + case DW_AT_calling_convention: return "DW_AT_calling_convention"; + case DW_AT_count: return "DW_AT_count"; + case DW_AT_data_member_location: return "DW_AT_data_member_location"; + case DW_AT_decl_column: return "DW_AT_decl_column"; + case DW_AT_decl_file: return "DW_AT_decl_file"; + case DW_AT_decl_line: return "DW_AT_decl_line"; + case DW_AT_declaration: return "DW_AT_declaration"; + case DW_AT_discr_list: return "DW_AT_discr_list"; + case DW_AT_encoding: return "DW_AT_encoding"; + case DW_AT_external: return "DW_AT_external"; + case DW_AT_frame_base: return "DW_AT_frame_base"; + case DW_AT_friend: return "DW_AT_friend"; + case DW_AT_identifier_case: return "DW_AT_identifier_case"; + case DW_AT_macro_info: return "DW_AT_macro_info"; + case DW_AT_namelist_item: return "DW_AT_namelist_item"; + case DW_AT_priority: return "DW_AT_priority"; + case DW_AT_segment: return "DW_AT_segment"; + case DW_AT_specification: return "DW_AT_specification"; + case DW_AT_static_link: return "DW_AT_static_link"; + case DW_AT_type: return "DW_AT_type"; + case DW_AT_use_location: return "DW_AT_use_location"; + case DW_AT_variable_parameter: return "DW_AT_variable_parameter"; + case DW_AT_virtuality: return "DW_AT_virtuality"; + case DW_AT_vtable_elem_location: return "DW_AT_vtable_elem_location"; + case DW_AT_allocated: return "DW_AT_allocated"; + case DW_AT_associated: return "DW_AT_associated"; + case DW_AT_data_location: return "DW_AT_data_location"; + case DW_AT_byte_stride: return "DW_AT_byte_stride"; + case DW_AT_entry_pc: return "DW_AT_entry_pc"; + case DW_AT_use_UTF8: return "DW_AT_use_UTF8"; + case DW_AT_extension: return "DW_AT_extension"; + case DW_AT_ranges: return "DW_AT_ranges"; + case DW_AT_trampoline: return "DW_AT_trampoline"; + case DW_AT_call_column: return "DW_AT_call_column"; + case DW_AT_call_file: return "DW_AT_call_file"; + case DW_AT_call_line: return "DW_AT_call_line"; + case DW_AT_description: return "DW_AT_description"; + case DW_AT_binary_scale: return "DW_AT_binary_scale"; + case DW_AT_decimal_scale: return "DW_AT_decimal_scale"; + case DW_AT_small: return "DW_AT_small"; + case DW_AT_decimal_sign: return "DW_AT_decimal_sign"; + case DW_AT_digit_count: return "DW_AT_digit_count"; + case DW_AT_picture_string: return "DW_AT_picture_string"; + case DW_AT_mutable: return "DW_AT_mutable"; + case DW_AT_threads_scaled: return "DW_AT_threads_scaled"; + case DW_AT_explicit: return "DW_AT_explicit"; + case DW_AT_object_pointer: return "DW_AT_object_pointer"; + case DW_AT_endianity: return "DW_AT_endianity"; + case DW_AT_elemental: return "DW_AT_elemental"; + case DW_AT_pure: return "DW_AT_pure"; + case DW_AT_recursive: return "DW_AT_recursive"; + case DW_AT_MIPS_linkage_name: return "DW_AT_MIPS_linkage_name"; + case DW_AT_sf_names: return "DW_AT_sf_names"; + case DW_AT_src_info: return "DW_AT_src_info"; + case DW_AT_mac_info: return "DW_AT_mac_info"; + case DW_AT_src_coords: return "DW_AT_src_coords"; + case DW_AT_body_begin: return "DW_AT_body_begin"; + case DW_AT_body_end: return "DW_AT_body_end"; + case DW_AT_GNU_vector: return "DW_AT_GNU_vector"; + case DW_AT_lo_user: return "DW_AT_lo_user"; + case DW_AT_hi_user: return "DW_AT_hi_user"; + case DW_AT_APPLE_optimized: return "DW_AT_APPLE_optimized"; + case DW_AT_APPLE_flags: return "DW_AT_APPLE_flags"; + case DW_AT_APPLE_isa: return "DW_AT_APPLE_isa"; + case DW_AT_APPLE_block: return "DW_AT_APPLE_block"; + case DW_AT_APPLE_major_runtime_vers: return "DW_AT_APPLE_major_runtime_vers"; + case DW_AT_APPLE_runtime_class: return "DW_AT_APPLE_runtime_class"; + } + assert(0 && "Unknown Dwarf Attribute"); + return ""; +} + +/// FormEncodingString - Return the string for the specified form encoding. +/// +const char *FormEncodingString(unsigned Encoding) { + switch (Encoding) { + case DW_FORM_addr: return "FORM_addr"; + case DW_FORM_block2: return "FORM_block2"; + case DW_FORM_block4: return "FORM_block4"; + case DW_FORM_data2: return "FORM_data2"; + case DW_FORM_data4: return "FORM_data4"; + case DW_FORM_data8: return "FORM_data8"; + case DW_FORM_string: return "FORM_string"; + case DW_FORM_block: return "FORM_block"; + case DW_FORM_block1: return "FORM_block1"; + case DW_FORM_data1: return "FORM_data1"; + case DW_FORM_flag: return "FORM_flag"; + case DW_FORM_sdata: return "FORM_sdata"; + case DW_FORM_strp: return "FORM_strp"; + case DW_FORM_udata: return "FORM_udata"; + case DW_FORM_ref_addr: return "FORM_ref_addr"; + case DW_FORM_ref1: return "FORM_ref1"; + case DW_FORM_ref2: return "FORM_ref2"; + case DW_FORM_ref4: return "FORM_ref4"; + case DW_FORM_ref8: return "FORM_ref8"; + case DW_FORM_ref_udata: return "FORM_ref_udata"; + case DW_FORM_indirect: return "FORM_indirect"; + } + assert(0 && "Unknown Dwarf Form Encoding"); + return ""; +} + +/// OperationEncodingString - Return the string for the specified operation +/// encoding. +const char *OperationEncodingString(unsigned Encoding) { + switch (Encoding) { + case DW_OP_addr: return "OP_addr"; + case DW_OP_deref: return "OP_deref"; + case DW_OP_const1u: return "OP_const1u"; + case DW_OP_const1s: return "OP_const1s"; + case DW_OP_const2u: return "OP_const2u"; + case DW_OP_const2s: return "OP_const2s"; + case DW_OP_const4u: return "OP_const4u"; + case DW_OP_const4s: return "OP_const4s"; + case DW_OP_const8u: return "OP_const8u"; + case DW_OP_const8s: return "OP_const8s"; + case DW_OP_constu: return "OP_constu"; + case DW_OP_consts: return "OP_consts"; + case DW_OP_dup: return "OP_dup"; + case DW_OP_drop: return "OP_drop"; + case DW_OP_over: return "OP_over"; + case DW_OP_pick: return "OP_pick"; + case DW_OP_swap: return "OP_swap"; + case DW_OP_rot: return "OP_rot"; + case DW_OP_xderef: return "OP_xderef"; + case DW_OP_abs: return "OP_abs"; + case DW_OP_and: return "OP_and"; + case DW_OP_div: return "OP_div"; + case DW_OP_minus: return "OP_minus"; + case DW_OP_mod: return "OP_mod"; + case DW_OP_mul: return "OP_mul"; + case DW_OP_neg: return "OP_neg"; + case DW_OP_not: return "OP_not"; + case DW_OP_or: return "OP_or"; + case DW_OP_plus: return "OP_plus"; + case DW_OP_plus_uconst: return "OP_plus_uconst"; + case DW_OP_shl: return "OP_shl"; + case DW_OP_shr: return "OP_shr"; + case DW_OP_shra: return "OP_shra"; + case DW_OP_xor: return "OP_xor"; + case DW_OP_skip: return "OP_skip"; + case DW_OP_bra: return "OP_bra"; + case DW_OP_eq: return "OP_eq"; + case DW_OP_ge: return "OP_ge"; + case DW_OP_gt: return "OP_gt"; + case DW_OP_le: return "OP_le"; + case DW_OP_lt: return "OP_lt"; + case DW_OP_ne: return "OP_ne"; + case DW_OP_lit0: return "OP_lit0"; + case DW_OP_lit1: return "OP_lit1"; + case DW_OP_lit31: return "OP_lit31"; + case DW_OP_reg0: return "OP_reg0"; + case DW_OP_reg1: return "OP_reg1"; + case DW_OP_reg31: return "OP_reg31"; + case DW_OP_breg0: return "OP_breg0"; + case DW_OP_breg1: return "OP_breg1"; + case DW_OP_breg31: return "OP_breg31"; + case DW_OP_regx: return "OP_regx"; + case DW_OP_fbreg: return "OP_fbreg"; + case DW_OP_bregx: return "OP_bregx"; + case DW_OP_piece: return "OP_piece"; + case DW_OP_deref_size: return "OP_deref_size"; + case DW_OP_xderef_size: return "OP_xderef_size"; + case DW_OP_nop: return "OP_nop"; + case DW_OP_push_object_address: return "OP_push_object_address"; + case DW_OP_call2: return "OP_call2"; + case DW_OP_call4: return "OP_call4"; + case DW_OP_call_ref: return "OP_call_ref"; + case DW_OP_form_tls_address: return "OP_form_tls_address"; + case DW_OP_call_frame_cfa: return "OP_call_frame_cfa"; + case DW_OP_lo_user: return "OP_lo_user"; + case DW_OP_hi_user: return "OP_hi_user"; + } + assert(0 && "Unknown Dwarf Operation Encoding"); + return ""; +} + +/// AttributeEncodingString - Return the string for the specified attribute +/// encoding. +const char *AttributeEncodingString(unsigned Encoding) { + switch (Encoding) { + case DW_ATE_address: return "ATE_address"; + case DW_ATE_boolean: return "ATE_boolean"; + case DW_ATE_complex_float: return "ATE_complex_float"; + case DW_ATE_float: return "ATE_float"; + case DW_ATE_signed: return "ATE_signed"; + case DW_ATE_signed_char: return "ATE_signed_char"; + case DW_ATE_unsigned: return "ATE_unsigned"; + case DW_ATE_unsigned_char: return "ATE_unsigned_char"; + case DW_ATE_imaginary_float: return "ATE_imaginary_float"; + case DW_ATE_packed_decimal: return "ATE_packed_decimal"; + case DW_ATE_numeric_string: return "ATE_numeric_string"; + case DW_ATE_edited: return "ATE_edited"; + case DW_ATE_signed_fixed: return "ATE_signed_fixed"; + case DW_ATE_unsigned_fixed: return "ATE_unsigned_fixed"; + case DW_ATE_decimal_float: return "ATE_decimal_float"; + case DW_ATE_lo_user: return "ATE_lo_user"; + case DW_ATE_hi_user: return "ATE_hi_user"; + } + assert(0 && "Unknown Dwarf Attribute Encoding"); + return ""; +} + +/// DecimalSignString - Return the string for the specified decimal sign +/// attribute. +const char *DecimalSignString(unsigned Sign) { + switch (Sign) { + case DW_DS_unsigned: return "DS_unsigned"; + case DW_DS_leading_overpunch: return "DS_leading_overpunch"; + case DW_DS_trailing_overpunch: return "DS_trailing_overpunch"; + case DW_DS_leading_separate: return "DS_leading_separate"; + case DW_DS_trailing_separate: return "DS_trailing_separate"; + } + assert(0 && "Unknown Dwarf Decimal Sign Attribute"); + return ""; +} + +/// EndianityString - Return the string for the specified endianity. +/// +const char *EndianityString(unsigned Endian) { + switch (Endian) { + case DW_END_default: return "END_default"; + case DW_END_big: return "END_big"; + case DW_END_little: return "END_little"; + case DW_END_lo_user: return "END_lo_user"; + case DW_END_hi_user: return "END_hi_user"; + } + assert(0 && "Unknown Dwarf Endianity"); + return ""; +} + +/// AccessibilityString - Return the string for the specified accessibility. +/// +const char *AccessibilityString(unsigned Access) { + switch (Access) { + // Accessibility codes + case DW_ACCESS_public: return "ACCESS_public"; + case DW_ACCESS_protected: return "ACCESS_protected"; + case DW_ACCESS_private: return "ACCESS_private"; + } + assert(0 && "Unknown Dwarf Accessibility"); + return ""; +} + +/// VisibilityString - Return the string for the specified visibility. +/// +const char *VisibilityString(unsigned Visibility) { + switch (Visibility) { + case DW_VIS_local: return "VIS_local"; + case DW_VIS_exported: return "VIS_exported"; + case DW_VIS_qualified: return "VIS_qualified"; + } + assert(0 && "Unknown Dwarf Visibility"); + return ""; +} + +/// VirtualityString - Return the string for the specified virtuality. +/// +const char *VirtualityString(unsigned Virtuality) { + switch (Virtuality) { + case DW_VIRTUALITY_none: return "VIRTUALITY_none"; + case DW_VIRTUALITY_virtual: return "VIRTUALITY_virtual"; + case DW_VIRTUALITY_pure_virtual: return "VIRTUALITY_pure_virtual"; + } + assert(0 && "Unknown Dwarf Virtuality"); + return ""; +} + +/// LanguageString - Return the string for the specified language. +/// +const char *LanguageString(unsigned Language) { + switch (Language) { + case DW_LANG_C89: return "LANG_C89"; + case DW_LANG_C: return "LANG_C"; + case DW_LANG_Ada83: return "LANG_Ada83"; + case DW_LANG_C_plus_plus: return "LANG_C_plus_plus"; + case DW_LANG_Cobol74: return "LANG_Cobol74"; + case DW_LANG_Cobol85: return "LANG_Cobol85"; + case DW_LANG_Fortran77: return "LANG_Fortran77"; + case DW_LANG_Fortran90: return "LANG_Fortran90"; + case DW_LANG_Pascal83: return "LANG_Pascal83"; + case DW_LANG_Modula2: return "LANG_Modula2"; + case DW_LANG_Java: return "LANG_Java"; + case DW_LANG_C99: return "LANG_C99"; + case DW_LANG_Ada95: return "LANG_Ada95"; + case DW_LANG_Fortran95: return "LANG_Fortran95"; + case DW_LANG_PLI: return "LANG_PLI"; + case DW_LANG_ObjC: return "LANG_ObjC"; + case DW_LANG_ObjC_plus_plus: return "LANG_ObjC_plus_plus"; + case DW_LANG_UPC: return "LANG_UPC"; + case DW_LANG_D: return "LANG_D"; + case DW_LANG_lo_user: return "LANG_lo_user"; + case DW_LANG_hi_user: return "LANG_hi_user"; + } + assert(0 && "Unknown Dwarf Language"); + return ""; +} + +/// CaseString - Return the string for the specified identifier case. +/// +const char *CaseString(unsigned Case) { + switch (Case) { + case DW_ID_case_sensitive: return "ID_case_sensitive"; + case DW_ID_up_case: return "ID_up_case"; + case DW_ID_down_case: return "ID_down_case"; + case DW_ID_case_insensitive: return "ID_case_insensitive"; + } + assert(0 && "Unknown Dwarf Identifier Case"); + return ""; +} + +/// ConventionString - Return the string for the specified calling convention. +/// +const char *ConventionString(unsigned Convention) { + switch (Convention) { + case DW_CC_normal: return "CC_normal"; + case DW_CC_program: return "CC_program"; + case DW_CC_nocall: return "CC_nocall"; + case DW_CC_lo_user: return "CC_lo_user"; + case DW_CC_hi_user: return "CC_hi_user"; + } + assert(0 && "Unknown Dwarf Calling Convention"); + return ""; +} + +/// InlineCodeString - Return the string for the specified inline code. +/// +const char *InlineCodeString(unsigned Code) { + switch (Code) { + case DW_INL_not_inlined: return "INL_not_inlined"; + case DW_INL_inlined: return "INL_inlined"; + case DW_INL_declared_not_inlined: return "INL_declared_not_inlined"; + case DW_INL_declared_inlined: return "INL_declared_inlined"; + } + assert(0 && "Unknown Dwarf Inline Code"); + return ""; +} + +/// ArrayOrderString - Return the string for the specified array order. +/// +const char *ArrayOrderString(unsigned Order) { + switch (Order) { + case DW_ORD_row_major: return "ORD_row_major"; + case DW_ORD_col_major: return "ORD_col_major"; + } + assert(0 && "Unknown Dwarf Array Order"); + return ""; +} + +/// DiscriminantString - Return the string for the specified discriminant +/// descriptor. +const char *DiscriminantString(unsigned Discriminant) { + switch (Discriminant) { + case DW_DSC_label: return "DSC_label"; + case DW_DSC_range: return "DSC_range"; + } + assert(0 && "Unknown Dwarf Discriminant Descriptor"); + return ""; +} + +/// LNStandardString - Return the string for the specified line number standard. +/// +const char *LNStandardString(unsigned Standard) { + switch (Standard) { + case DW_LNS_copy: return "LNS_copy"; + case DW_LNS_advance_pc: return "LNS_advance_pc"; + case DW_LNS_advance_line: return "LNS_advance_line"; + case DW_LNS_set_file: return "LNS_set_file"; + case DW_LNS_set_column: return "LNS_set_column"; + case DW_LNS_negate_stmt: return "LNS_negate_stmt"; + case DW_LNS_set_basic_block: return "LNS_set_basic_block"; + case DW_LNS_const_add_pc: return "LNS_const_add_pc"; + case DW_LNS_fixed_advance_pc: return "LNS_fixed_advance_pc"; + case DW_LNS_set_prologue_end: return "LNS_set_prologue_end"; + case DW_LNS_set_epilogue_begin: return "LNS_set_epilogue_begin"; + case DW_LNS_set_isa: return "LNS_set_isa"; + } + assert(0 && "Unknown Dwarf Line Number Standard"); + return ""; +} + +/// LNExtendedString - Return the string for the specified line number extended +/// opcode encodings. +const char *LNExtendedString(unsigned Encoding) { + switch (Encoding) { + // Line Number Extended Opcode Encodings + case DW_LNE_end_sequence: return "LNE_end_sequence"; + case DW_LNE_set_address: return "LNE_set_address"; + case DW_LNE_define_file: return "LNE_define_file"; + case DW_LNE_lo_user: return "LNE_lo_user"; + case DW_LNE_hi_user: return "LNE_hi_user"; + } + assert(0 && "Unknown Dwarf Line Number Extended Opcode Encoding"); + return ""; +} + +/// MacinfoString - Return the string for the specified macinfo type encodings. +/// +const char *MacinfoString(unsigned Encoding) { + switch (Encoding) { + // Macinfo Type Encodings + case DW_MACINFO_define: return "MACINFO_define"; + case DW_MACINFO_undef: return "MACINFO_undef"; + case DW_MACINFO_start_file: return "MACINFO_start_file"; + case DW_MACINFO_end_file: return "MACINFO_end_file"; + case DW_MACINFO_vendor_ext: return "MACINFO_vendor_ext"; + } + assert(0 && "Unknown Dwarf Macinfo Type Encodings"); + return ""; +} + +/// CallFrameString - Return the string for the specified call frame instruction +/// encodings. +const char *CallFrameString(unsigned Encoding) { + switch (Encoding) { + case DW_CFA_advance_loc: return "CFA_advance_loc"; + case DW_CFA_offset: return "CFA_offset"; + case DW_CFA_restore: return "CFA_restore"; + case DW_CFA_set_loc: return "CFA_set_loc"; + case DW_CFA_advance_loc1: return "CFA_advance_loc1"; + case DW_CFA_advance_loc2: return "CFA_advance_loc2"; + case DW_CFA_advance_loc4: return "CFA_advance_loc4"; + case DW_CFA_offset_extended: return "CFA_offset_extended"; + case DW_CFA_restore_extended: return "CFA_restore_extended"; + case DW_CFA_undefined: return "CFA_undefined"; + case DW_CFA_same_value: return "CFA_same_value"; + case DW_CFA_register: return "CFA_register"; + case DW_CFA_remember_state: return "CFA_remember_state"; + case DW_CFA_restore_state: return "CFA_restore_state"; + case DW_CFA_def_cfa: return "CFA_def_cfa"; + case DW_CFA_def_cfa_register: return "CFA_def_cfa_register"; + case DW_CFA_def_cfa_offset: return "CFA_def_cfa_offset"; + case DW_CFA_def_cfa_expression: return "CFA_def_cfa_expression"; + case DW_CFA_expression: return "CFA_expression"; + case DW_CFA_offset_extended_sf: return "CFA_offset_extended_sf"; + case DW_CFA_def_cfa_sf: return "CFA_def_cfa_sf"; + case DW_CFA_def_cfa_offset_sf: return "CFA_def_cfa_offset_sf"; + case DW_CFA_val_offset: return "CFA_val_offset"; + case DW_CFA_val_offset_sf: return "CFA_val_offset_sf"; + case DW_CFA_val_expression: return "CFA_val_expression"; + case DW_CFA_lo_user: return "CFA_lo_user"; + case DW_CFA_hi_user: return "CFA_hi_user"; + } + assert(0 && "Unknown Dwarf Call Frame Instruction Encodings"); + return ""; +} + +} // End of namespace dwarf. + +} // End of namespace llvm. diff --git a/lib/Support/FileUtilities.cpp b/lib/Support/FileUtilities.cpp new file mode 100644 index 000000000000..21080b64399e --- /dev/null +++ b/lib/Support/FileUtilities.cpp @@ -0,0 +1,263 @@ +//===- Support/FileUtilities.cpp - File System Utilities ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a family of utility functions which are useful for doing +// various things with files. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/FileUtilities.h" +#include "llvm/System/Path.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" +#include +#include +#include +using namespace llvm; + +static bool isSignedChar(char C) { + return (C == '+' || C == '-'); +} + +static bool isExponentChar(char C) { + switch (C) { + case 'D': // Strange exponential notation. + case 'd': // Strange exponential notation. + case 'e': + case 'E': return true; + default: return false; + } +} + +static bool isNumberChar(char C) { + switch (C) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + case '.': return true; + default: return isSignedChar(C) || isExponentChar(C); + } +} + +static const char *BackupNumber(const char *Pos, const char *FirstChar) { + // If we didn't stop in the middle of a number, don't backup. + if (!isNumberChar(*Pos)) return Pos; + + // Otherwise, return to the start of the number. + while (Pos > FirstChar && isNumberChar(Pos[-1])) { + --Pos; + if (Pos > FirstChar && isSignedChar(Pos[0]) && !isExponentChar(Pos[-1])) + break; + } + return Pos; +} + +/// EndOfNumber - Return the first character that is not part of the specified +/// number. This assumes that the buffer is null terminated, so it won't fall +/// off the end. +static const char *EndOfNumber(const char *Pos) { + while (isNumberChar(*Pos)) + ++Pos; + return Pos; +} + +/// CompareNumbers - compare two numbers, returning true if they are different. +static bool CompareNumbers(const char *&F1P, const char *&F2P, + const char *F1End, const char *F2End, + double AbsTolerance, double RelTolerance, + std::string *ErrorMsg) { + const char *F1NumEnd, *F2NumEnd; + double V1 = 0.0, V2 = 0.0; + + // If one of the positions is at a space and the other isn't, chomp up 'til + // the end of the space. + while (isspace(*F1P) && F1P != F1End) + ++F1P; + while (isspace(*F2P) && F2P != F2End) + ++F2P; + + // If we stop on numbers, compare their difference. + if (!isNumberChar(*F1P) || !isNumberChar(*F2P)) { + // The diff failed. + F1NumEnd = F1P; + F2NumEnd = F2P; + } else { + // Note that some ugliness is built into this to permit support for numbers + // that use "D" or "d" as their exponential marker, e.g. "1.234D45". This + // occurs in 200.sixtrack in spec2k. + V1 = strtod(F1P, const_cast(&F1NumEnd)); + V2 = strtod(F2P, const_cast(&F2NumEnd)); + + if (*F1NumEnd == 'D' || *F1NumEnd == 'd') { + // Copy string into tmp buffer to replace the 'D' with an 'e'. + SmallString<200> StrTmp(F1P, EndOfNumber(F1NumEnd)+1); + // Strange exponential notation! + StrTmp[static_cast(F1NumEnd-F1P)] = 'e'; + + V1 = strtod(&StrTmp[0], const_cast(&F1NumEnd)); + F1NumEnd = F1P + (F1NumEnd-&StrTmp[0]); + } + + if (*F2NumEnd == 'D' || *F2NumEnd == 'd') { + // Copy string into tmp buffer to replace the 'D' with an 'e'. + SmallString<200> StrTmp(F2P, EndOfNumber(F2NumEnd)+1); + // Strange exponential notation! + StrTmp[static_cast(F2NumEnd-F2P)] = 'e'; + + V2 = strtod(&StrTmp[0], const_cast(&F2NumEnd)); + F2NumEnd = F2P + (F2NumEnd-&StrTmp[0]); + } + } + + if (F1NumEnd == F1P || F2NumEnd == F2P) { + if (ErrorMsg) { + *ErrorMsg = "FP Comparison failed, not a numeric difference between '"; + *ErrorMsg += F1P[0]; + *ErrorMsg += "' and '"; + *ErrorMsg += F2P[0]; + *ErrorMsg += "'"; + } + return true; + } + + // Check to see if these are inside the absolute tolerance + if (AbsTolerance < std::abs(V1-V2)) { + // Nope, check the relative tolerance... + double Diff; + if (V2) + Diff = std::abs(V1/V2 - 1.0); + else if (V1) + Diff = std::abs(V2/V1 - 1.0); + else + Diff = 0; // Both zero. + if (Diff > RelTolerance) { + if (ErrorMsg) { + *ErrorMsg = "Compared: " + ftostr(V1) + " and " + ftostr(V2) + "\n"; + *ErrorMsg += "abs. diff = " + ftostr(std::abs(V1-V2)) + + " rel.diff = " + ftostr(Diff) + "\n"; + *ErrorMsg += "Out of tolerance: rel/abs: " + ftostr(RelTolerance) + + "/" + ftostr(AbsTolerance); + } + return true; + } + } + + // Otherwise, advance our read pointers to the end of the numbers. + F1P = F1NumEnd; F2P = F2NumEnd; + return false; +} + +/// DiffFilesWithTolerance - Compare the two files specified, returning 0 if the +/// files match, 1 if they are different, and 2 if there is a file error. This +/// function differs from DiffFiles in that you can specify an absolete and +/// relative FP error that is allowed to exist. If you specify a string to fill +/// in for the error option, it will set the string to an error message if an +/// error occurs, allowing the caller to distinguish between a failed diff and a +/// file system error. +/// +int llvm::DiffFilesWithTolerance(const sys::PathWithStatus &FileA, + const sys::PathWithStatus &FileB, + double AbsTol, double RelTol, + std::string *Error) { + const sys::FileStatus *FileAStat = FileA.getFileStatus(false, Error); + if (!FileAStat) + return 2; + const sys::FileStatus *FileBStat = FileB.getFileStatus(false, Error); + if (!FileBStat) + return 2; + + // Check for zero length files because some systems croak when you try to + // mmap an empty file. + size_t A_size = FileAStat->getSize(); + size_t B_size = FileBStat->getSize(); + + // If they are both zero sized then they're the same + if (A_size == 0 && B_size == 0) + return 0; + + // If only one of them is zero sized then they can't be the same + if ((A_size == 0 || B_size == 0)) { + if (Error) + *Error = "Files differ: one is zero-sized, the other isn't"; + return 1; + } + + // Now its safe to mmap the files into memory becasue both files + // have a non-zero size. + OwningPtr F1(MemoryBuffer::getFile(FileA.c_str(), Error)); + OwningPtr F2(MemoryBuffer::getFile(FileB.c_str(), Error)); + if (F1 == 0 || F2 == 0) + return 2; + + // Okay, now that we opened the files, scan them for the first difference. + const char *File1Start = F1->getBufferStart(); + const char *File2Start = F2->getBufferStart(); + const char *File1End = F1->getBufferEnd(); + const char *File2End = F2->getBufferEnd(); + const char *F1P = File1Start; + const char *F2P = File2Start; + + if (A_size == B_size) { + // Are the buffers identical? Common case: Handle this efficiently. + if (std::memcmp(File1Start, File2Start, A_size) == 0) + return 0; + + if (AbsTol == 0 && RelTol == 0) { + if (Error) + *Error = "Files differ without tolerance allowance"; + return 1; // Files different! + } + } + + bool CompareFailed = false; + while (1) { + // Scan for the end of file or next difference. + while (F1P < File1End && F2P < File2End && *F1P == *F2P) + ++F1P, ++F2P; + + if (F1P >= File1End || F2P >= File2End) break; + + // Okay, we must have found a difference. Backup to the start of the + // current number each stream is at so that we can compare from the + // beginning. + F1P = BackupNumber(F1P, File1Start); + F2P = BackupNumber(F2P, File2Start); + + // Now that we are at the start of the numbers, compare them, exiting if + // they don't match. + if (CompareNumbers(F1P, F2P, File1End, File2End, AbsTol, RelTol, Error)) { + CompareFailed = true; + break; + } + } + + // Okay, we reached the end of file. If both files are at the end, we + // succeeded. + bool F1AtEnd = F1P >= File1End; + bool F2AtEnd = F2P >= File2End; + if (!CompareFailed && (!F1AtEnd || !F2AtEnd)) { + // Else, we might have run off the end due to a number: backup and retry. + if (F1AtEnd && isNumberChar(F1P[-1])) --F1P; + if (F2AtEnd && isNumberChar(F2P[-1])) --F2P; + F1P = BackupNumber(F1P, File1Start); + F2P = BackupNumber(F2P, File2Start); + + // Now that we are at the start of the numbers, compare them, exiting if + // they don't match. + if (CompareNumbers(F1P, F2P, File1End, File2End, AbsTol, RelTol, Error)) + CompareFailed = true; + + // If we found the end, we succeeded. + if (F1P < File1End || F2P < File2End) + CompareFailed = true; + } + + return CompareFailed; +} diff --git a/lib/Support/FoldingSet.cpp b/lib/Support/FoldingSet.cpp new file mode 100644 index 000000000000..41c730e3e1e6 --- /dev/null +++ b/lib/Support/FoldingSet.cpp @@ -0,0 +1,378 @@ +//===-- Support/FoldingSet.cpp - Uniquing Hash Set --------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a hash set that can be used to remove duplication of +// nodes in a graph. This code was originally created by Chris Lattner for use +// with SelectionDAGCSEMap, but was isolated to provide use across the llvm code +// set. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/FoldingSet.h" +#include "llvm/Support/MathExtras.h" +#include +#include +using namespace llvm; + +//===----------------------------------------------------------------------===// +// FoldingSetNodeID Implementation + +/// Add* - Add various data types to Bit data. +/// +void FoldingSetNodeID::AddPointer(const void *Ptr) { + // Note: this adds pointers to the hash using sizes and endianness that + // depend on the host. It doesn't matter however, because hashing on + // pointer values in inherently unstable. Nothing should depend on the + // ordering of nodes in the folding set. + intptr_t PtrI = (intptr_t)Ptr; + Bits.push_back(unsigned(PtrI)); + if (sizeof(intptr_t) > sizeof(unsigned)) + Bits.push_back(unsigned(uint64_t(PtrI) >> 32)); +} +void FoldingSetNodeID::AddInteger(signed I) { + Bits.push_back(I); +} +void FoldingSetNodeID::AddInteger(unsigned I) { + Bits.push_back(I); +} +void FoldingSetNodeID::AddInteger(long I) { + AddInteger((unsigned long)I); +} +void FoldingSetNodeID::AddInteger(unsigned long I) { + if (sizeof(long) == sizeof(int)) + AddInteger(unsigned(I)); + else if (sizeof(long) == sizeof(long long)) { + AddInteger((unsigned long long)I); + } else { + assert(0 && "unexpected sizeof(long)"); + } +} +void FoldingSetNodeID::AddInteger(long long I) { + AddInteger((unsigned long long)I); +} +void FoldingSetNodeID::AddInteger(unsigned long long I) { + AddInteger(unsigned(I)); + if ((uint64_t)(int)I != I) + Bits.push_back(unsigned(I >> 32)); +} + +void FoldingSetNodeID::AddString(const char *String, const char *End) { + unsigned Size = static_cast(End - String); + Bits.push_back(Size); + if (!Size) return; + + unsigned Units = Size / 4; + unsigned Pos = 0; + const unsigned *Base = (const unsigned *)String; + + // If the string is aligned do a bulk transfer. + if (!((intptr_t)Base & 3)) { + Bits.append(Base, Base + Units); + Pos = (Units + 1) * 4; + } else { + // Otherwise do it the hard way. + for (Pos += 4; Pos <= Size; Pos += 4) { + unsigned V = ((unsigned char)String[Pos - 4] << 24) | + ((unsigned char)String[Pos - 3] << 16) | + ((unsigned char)String[Pos - 2] << 8) | + (unsigned char)String[Pos - 1]; + Bits.push_back(V); + } + } + + // With the leftover bits. + unsigned V = 0; + // Pos will have overshot size by 4 - #bytes left over. + switch (Pos - Size) { + case 1: V = (V << 8) | (unsigned char)String[Size - 3]; // Fall thru. + case 2: V = (V << 8) | (unsigned char)String[Size - 2]; // Fall thru. + case 3: V = (V << 8) | (unsigned char)String[Size - 1]; break; + default: return; // Nothing left. + } + + Bits.push_back(V); +} + +void FoldingSetNodeID::AddString(const char *String) { + AddString(String, String + strlen(String)); +} + +void FoldingSetNodeID::AddString(const std::string &String) { + AddString(&*String.begin(), &*String.end()); +} + +/// ComputeHash - Compute a strong hash value for this FoldingSetNodeID, used to +/// lookup the node in the FoldingSetImpl. +unsigned FoldingSetNodeID::ComputeHash() const { + // This is adapted from SuperFastHash by Paul Hsieh. + unsigned Hash = static_cast(Bits.size()); + for (const unsigned *BP = &Bits[0], *E = BP+Bits.size(); BP != E; ++BP) { + unsigned Data = *BP; + Hash += Data & 0xFFFF; + unsigned Tmp = ((Data >> 16) << 11) ^ Hash; + Hash = (Hash << 16) ^ Tmp; + Hash += Hash >> 11; + } + + // Force "avalanching" of final 127 bits. + Hash ^= Hash << 3; + Hash += Hash >> 5; + Hash ^= Hash << 4; + Hash += Hash >> 17; + Hash ^= Hash << 25; + Hash += Hash >> 6; + return Hash; +} + +/// operator== - Used to compare two nodes to each other. +/// +bool FoldingSetNodeID::operator==(const FoldingSetNodeID &RHS)const{ + if (Bits.size() != RHS.Bits.size()) return false; + return memcmp(&Bits[0], &RHS.Bits[0], Bits.size()*sizeof(Bits[0])) == 0; +} + + +//===----------------------------------------------------------------------===// +/// Helper functions for FoldingSetImpl. + +/// GetNextPtr - In order to save space, each bucket is a +/// singly-linked-list. In order to make deletion more efficient, we make +/// the list circular, so we can delete a node without computing its hash. +/// The problem with this is that the start of the hash buckets are not +/// Nodes. If NextInBucketPtr is a bucket pointer, this method returns null: +/// use GetBucketPtr when this happens. +static FoldingSetImpl::Node *GetNextPtr(void *NextInBucketPtr) { + // The low bit is set if this is the pointer back to the bucket. + if (reinterpret_cast(NextInBucketPtr) & 1) + return 0; + + return static_cast(NextInBucketPtr); +} + + +/// testing. +static void **GetBucketPtr(void *NextInBucketPtr) { + intptr_t Ptr = reinterpret_cast(NextInBucketPtr); + assert((Ptr & 1) && "Not a bucket pointer"); + return reinterpret_cast(Ptr & ~intptr_t(1)); +} + +/// GetBucketFor - Hash the specified node ID and return the hash bucket for +/// the specified ID. +static void **GetBucketFor(const FoldingSetNodeID &ID, + void **Buckets, unsigned NumBuckets) { + // NumBuckets is always a power of 2. + unsigned BucketNum = ID.ComputeHash() & (NumBuckets-1); + return Buckets + BucketNum; +} + +//===----------------------------------------------------------------------===// +// FoldingSetImpl Implementation + +FoldingSetImpl::FoldingSetImpl(unsigned Log2InitSize) { + assert(5 < Log2InitSize && Log2InitSize < 32 && + "Initial hash table size out of range"); + NumBuckets = 1 << Log2InitSize; + Buckets = new void*[NumBuckets+1]; + clear(); +} +FoldingSetImpl::~FoldingSetImpl() { + delete [] Buckets; +} +void FoldingSetImpl::clear() { + // Set all but the last bucket to null pointers. + memset(Buckets, 0, NumBuckets*sizeof(void*)); + + // Set the very last bucket to be a non-null "pointer". + Buckets[NumBuckets] = reinterpret_cast(-1); + + // Reset the node count to zero. + NumNodes = 0; +} + +/// GrowHashTable - Double the size of the hash table and rehash everything. +/// +void FoldingSetImpl::GrowHashTable() { + void **OldBuckets = Buckets; + unsigned OldNumBuckets = NumBuckets; + NumBuckets <<= 1; + + // Clear out new buckets. + Buckets = new void*[NumBuckets+1]; + clear(); + + // Walk the old buckets, rehashing nodes into their new place. + FoldingSetNodeID ID; + for (unsigned i = 0; i != OldNumBuckets; ++i) { + void *Probe = OldBuckets[i]; + if (!Probe) continue; + while (Node *NodeInBucket = GetNextPtr(Probe)) { + // Figure out the next link, remove NodeInBucket from the old link. + Probe = NodeInBucket->getNextInBucket(); + NodeInBucket->SetNextInBucket(0); + + // Insert the node into the new bucket, after recomputing the hash. + GetNodeProfile(ID, NodeInBucket); + InsertNode(NodeInBucket, GetBucketFor(ID, Buckets, NumBuckets)); + ID.clear(); + } + } + + delete[] OldBuckets; +} + +/// FindNodeOrInsertPos - Look up the node specified by ID. If it exists, +/// return it. If not, return the insertion token that will make insertion +/// faster. +FoldingSetImpl::Node +*FoldingSetImpl::FindNodeOrInsertPos(const FoldingSetNodeID &ID, + void *&InsertPos) { + + void **Bucket = GetBucketFor(ID, Buckets, NumBuckets); + void *Probe = *Bucket; + + InsertPos = 0; + + FoldingSetNodeID OtherID; + while (Node *NodeInBucket = GetNextPtr(Probe)) { + GetNodeProfile(OtherID, NodeInBucket); + if (OtherID == ID) + return NodeInBucket; + + Probe = NodeInBucket->getNextInBucket(); + OtherID.clear(); + } + + // Didn't find the node, return null with the bucket as the InsertPos. + InsertPos = Bucket; + return 0; +} + +/// InsertNode - Insert the specified node into the folding set, knowing that it +/// is not already in the map. InsertPos must be obtained from +/// FindNodeOrInsertPos. +void FoldingSetImpl::InsertNode(Node *N, void *InsertPos) { + assert(N->getNextInBucket() == 0); + // Do we need to grow the hashtable? + if (NumNodes+1 > NumBuckets*2) { + GrowHashTable(); + FoldingSetNodeID ID; + GetNodeProfile(ID, N); + InsertPos = GetBucketFor(ID, Buckets, NumBuckets); + } + + ++NumNodes; + + /// The insert position is actually a bucket pointer. + void **Bucket = static_cast(InsertPos); + + void *Next = *Bucket; + + // If this is the first insertion into this bucket, its next pointer will be + // null. Pretend as if it pointed to itself, setting the low bit to indicate + // that it is a pointer to the bucket. + if (Next == 0) + Next = reinterpret_cast(reinterpret_cast(Bucket)|1); + + // Set the node's next pointer, and make the bucket point to the node. + N->SetNextInBucket(Next); + *Bucket = N; +} + +/// RemoveNode - Remove a node from the folding set, returning true if one was +/// removed or false if the node was not in the folding set. +bool FoldingSetImpl::RemoveNode(Node *N) { + // Because each bucket is a circular list, we don't need to compute N's hash + // to remove it. + void *Ptr = N->getNextInBucket(); + if (Ptr == 0) return false; // Not in folding set. + + --NumNodes; + N->SetNextInBucket(0); + + // Remember what N originally pointed to, either a bucket or another node. + void *NodeNextPtr = Ptr; + + // Chase around the list until we find the node (or bucket) which points to N. + while (true) { + if (Node *NodeInBucket = GetNextPtr(Ptr)) { + // Advance pointer. + Ptr = NodeInBucket->getNextInBucket(); + + // We found a node that points to N, change it to point to N's next node, + // removing N from the list. + if (Ptr == N) { + NodeInBucket->SetNextInBucket(NodeNextPtr); + return true; + } + } else { + void **Bucket = GetBucketPtr(Ptr); + Ptr = *Bucket; + + // If we found that the bucket points to N, update the bucket to point to + // whatever is next. + if (Ptr == N) { + *Bucket = NodeNextPtr; + return true; + } + } + } +} + +/// GetOrInsertNode - If there is an existing simple Node exactly +/// equal to the specified node, return it. Otherwise, insert 'N' and it +/// instead. +FoldingSetImpl::Node *FoldingSetImpl::GetOrInsertNode(FoldingSetImpl::Node *N) { + FoldingSetNodeID ID; + GetNodeProfile(ID, N); + void *IP; + if (Node *E = FindNodeOrInsertPos(ID, IP)) + return E; + InsertNode(N, IP); + return N; +} + +//===----------------------------------------------------------------------===// +// FoldingSetIteratorImpl Implementation + +FoldingSetIteratorImpl::FoldingSetIteratorImpl(void **Bucket) { + // Skip to the first non-null non-self-cycle bucket. + while (*Bucket != reinterpret_cast(-1) && + (*Bucket == 0 || GetNextPtr(*Bucket) == 0)) + ++Bucket; + + NodePtr = static_cast(*Bucket); +} + +void FoldingSetIteratorImpl::advance() { + // If there is another link within this bucket, go to it. + void *Probe = NodePtr->getNextInBucket(); + + if (FoldingSetNode *NextNodeInBucket = GetNextPtr(Probe)) + NodePtr = NextNodeInBucket; + else { + // Otherwise, this is the last link in this bucket. + void **Bucket = GetBucketPtr(Probe); + + // Skip to the next non-null non-self-cycle bucket. + do { + ++Bucket; + } while (*Bucket != reinterpret_cast(-1) && + (*Bucket == 0 || GetNextPtr(*Bucket) == 0)); + + NodePtr = static_cast(*Bucket); + } +} + +//===----------------------------------------------------------------------===// +// FoldingSetBucketIteratorImpl Implementation + +FoldingSetBucketIteratorImpl::FoldingSetBucketIteratorImpl(void **Bucket) { + Ptr = (*Bucket == 0 || GetNextPtr(*Bucket) == 0) ? (void*) Bucket : *Bucket; +} diff --git a/lib/Support/GraphWriter.cpp b/lib/Support/GraphWriter.cpp new file mode 100644 index 000000000000..c359dfb82ea7 --- /dev/null +++ b/lib/Support/GraphWriter.cpp @@ -0,0 +1,89 @@ +//===-- GraphWriter.cpp - Implements GraphWriter support routines ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements misc. GraphWriter support routines. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/GraphWriter.h" +#include "llvm/Support/Streams.h" +#include "llvm/System/Path.h" +#include "llvm/System/Program.h" +#include "llvm/Config/config.h" +using namespace llvm; + +void llvm::DisplayGraph(const sys::Path &Filename) { + std::string ErrMsg; +#if HAVE_GRAPHVIZ + sys::Path Graphviz(LLVM_PATH_GRAPHVIZ); + + std::vector args; + args.push_back(Graphviz.c_str()); + args.push_back(Filename.c_str()); + args.push_back(0); + + cerr << "Running 'Graphviz' program... " << std::flush; + if (sys::Program::ExecuteAndWait(Graphviz, &args[0],0,0,0,0,&ErrMsg)) { + cerr << "Error viewing graph: " << ErrMsg << "\n"; + } +#elif (HAVE_GV && HAVE_DOT) + sys::Path PSFilename = Filename; + PSFilename.appendSuffix("ps"); + + sys::Path dot(LLVM_PATH_DOT); + + std::vector args; + args.push_back(dot.c_str()); + args.push_back("-Tps"); + args.push_back("-Nfontname=Courier"); + args.push_back("-Gsize=7.5,10"); + args.push_back(Filename.c_str()); + args.push_back("-o"); + args.push_back(PSFilename.c_str()); + args.push_back(0); + + cerr << "Running 'dot' program... " << std::flush; + if (sys::Program::ExecuteAndWait(dot, &args[0],0,0,0,0,&ErrMsg)) { + cerr << "Error viewing graph: '" << ErrMsg << "\n"; + } else { + cerr << " done. \n"; + + sys::Path gv(LLVM_PATH_GV); + args.clear(); + args.push_back(gv.c_str()); + args.push_back(PSFilename.c_str()); + args.push_back("-spartan"); + args.push_back(0); + + ErrMsg.clear(); + if (sys::Program::ExecuteAndWait(gv, &args[0],0,0,0,0,&ErrMsg)) { + cerr << "Error viewing graph: " << ErrMsg << "\n"; + } + } + PSFilename.eraseFromDisk(); +#elif HAVE_DOTTY + sys::Path dotty(LLVM_PATH_DOTTY); + + std::vector args; + args.push_back(dotty.c_str()); + args.push_back(Filename.c_str()); + args.push_back(0); + + cerr << "Running 'dotty' program... " << std::flush; + if (sys::Program::ExecuteAndWait(dotty, &args[0],0,0,0,0,&ErrMsg)) { + cerr << "Error viewing graph: " << ErrMsg << "\n"; + } else { +#ifdef __MINGW32__ // Dotty spawns another app and doesn't wait until it returns + return; +#endif + } +#endif + + Filename.eraseFromDisk(); +} diff --git a/lib/Support/IsInf.cpp b/lib/Support/IsInf.cpp new file mode 100644 index 000000000000..d6da0c99e8d8 --- /dev/null +++ b/lib/Support/IsInf.cpp @@ -0,0 +1,49 @@ +//===-- IsInf.cpp - Platform-independent wrapper around C99 isinf() -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Platform-independent wrapper around C99 isinf() +// +//===----------------------------------------------------------------------===// + +#include "llvm/Config/config.h" + +#if HAVE_ISINF_IN_MATH_H +# include +#elif HAVE_ISINF_IN_CMATH +# include +#elif HAVE_STD_ISINF_IN_CMATH +# include +using std::isinf; +#elif HAVE_FINITE_IN_IEEEFP_H +// A handy workaround I found at http://www.unixguide.net/sun/faq ... +// apparently this has been a problem with Solaris for years. +# include +static int isinf(double x) { return !finite(x) && x==x; } +#elif defined(_MSC_VER) +#include +#define isinf(X) (!_finite(X)) +#elif defined(_AIX) && defined(__GNUC__) +// GCC's fixincludes seems to be removing the isinf() declaration from the +// system header /usr/include/math.h +# include +static int isinf(double x) { return !finite(x) && x==x; } +#elif defined(__hpux) +// HP-UX is "special" +#include +static int isinf(double x) { return ((x) == INFINITY) || ((x) == -INFINITY); } +#else +# error "Don't know how to get isinf()" +#endif + +namespace llvm { + +int IsInf(float f) { return isinf(f); } +int IsInf(double d) { return isinf(d); } + +} // end namespace llvm; diff --git a/lib/Support/IsNAN.cpp b/lib/Support/IsNAN.cpp new file mode 100644 index 000000000000..bdfdfbf3155d --- /dev/null +++ b/lib/Support/IsNAN.cpp @@ -0,0 +1,33 @@ +//===-- IsNAN.cpp ---------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Platform-independent wrapper around C99 isnan(). +// +//===----------------------------------------------------------------------===// + +#include "llvm/Config/config.h" + +#if HAVE_ISNAN_IN_MATH_H +# include +#elif HAVE_ISNAN_IN_CMATH +# include +#elif HAVE_STD_ISNAN_IN_CMATH +# include +using std::isnan; +#elif defined(_MSC_VER) +#include +#define isnan _isnan +#else +# error "Don't know how to get isnan()" +#endif + +namespace llvm { + int IsNAN(float f) { return isnan(f); } + int IsNAN(double d) { return isnan(d); } +} // end namespace llvm; diff --git a/lib/Support/Makefile b/lib/Support/Makefile new file mode 100644 index 000000000000..48c21f4fd9e0 --- /dev/null +++ b/lib/Support/Makefile @@ -0,0 +1,17 @@ +##===- lib/Support/Makefile --------------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../.. +LIBRARYNAME = LLVMSupport +BUILD_ARCHIVE = 1 + +## FIXME: This only requires RTTI because tblgen uses it. Fix that. +REQUIRES_RTTI = 1 + +include $(LEVEL)/Makefile.common diff --git a/lib/Support/ManagedStatic.cpp b/lib/Support/ManagedStatic.cpp new file mode 100644 index 000000000000..6de65752b3d6 --- /dev/null +++ b/lib/Support/ManagedStatic.cpp @@ -0,0 +1,91 @@ +//===-- ManagedStatic.cpp - Static Global wrapper -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the ManagedStatic class and llvm_shutdown(). +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/ManagedStatic.h" +#include "llvm/Config/config.h" +#include "llvm/System/Atomic.h" +#include "llvm/System/Mutex.h" +#include +using namespace llvm; + +static const ManagedStaticBase *StaticList = 0; + +static sys::Mutex* ManagedStaticMutex = 0; + +void ManagedStaticBase::RegisterManagedStatic(void *(*Creator)(), + void (*Deleter)(void*)) const { + if (ManagedStaticMutex) { + ManagedStaticMutex->acquire(); + + if (Ptr == 0) { + void* tmp = Creator ? Creator() : 0; + + sys::MemoryFence(); + Ptr = tmp; + DeleterFn = Deleter; + + // Add to list of managed statics. + Next = StaticList; + StaticList = this; + } + + ManagedStaticMutex->release(); + } else { + assert(Ptr == 0 && DeleterFn == 0 && Next == 0 && + "Partially initialized ManagedStatic!?"); + Ptr = Creator ? Creator() : 0; + DeleterFn = Deleter; + + // Add to list of managed statics. + Next = StaticList; + StaticList = this; + } +} + +void ManagedStaticBase::destroy() const { + assert(DeleterFn && "ManagedStatic not initialized correctly!"); + assert(StaticList == this && + "Not destroyed in reverse order of construction?"); + // Unlink from list. + StaticList = Next; + Next = 0; + + // Destroy memory. + DeleterFn(Ptr); + + // Cleanup. + Ptr = 0; + DeleterFn = 0; +} + +bool llvm::llvm_start_multithreaded() { +#if LLVM_MULTITHREADED + assert(ManagedStaticMutex == 0 && "Multithreaded LLVM already initialized!"); + ManagedStaticMutex = new sys::Mutex(true); + return true; +#else + return false; +#endif +} + +/// llvm_shutdown - Deallocate and destroy all ManagedStatic variables. +void llvm::llvm_shutdown() { + while (StaticList) + StaticList->destroy(); + + if (ManagedStaticMutex) { + delete ManagedStaticMutex; + ManagedStaticMutex = 0; + } +} + diff --git a/lib/Support/MemoryBuffer.cpp b/lib/Support/MemoryBuffer.cpp new file mode 100644 index 000000000000..e35c626c4086 --- /dev/null +++ b/lib/Support/MemoryBuffer.cpp @@ -0,0 +1,279 @@ +//===--- MemoryBuffer.cpp - Memory Buffer implementation ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the MemoryBuffer interface. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/System/Path.h" +#include "llvm/System/Process.h" +#include "llvm/System/Program.h" +#include +#include +#include +#include +#include +#include +#if !defined(_MSC_VER) && !defined(__MINGW32__) +#include +#include +#else +#include +#endif +#include +using namespace llvm; + +//===----------------------------------------------------------------------===// +// MemoryBuffer implementation itself. +//===----------------------------------------------------------------------===// + +MemoryBuffer::~MemoryBuffer() { + if (MustDeleteBuffer) + free((void*)BufferStart); +} + +/// initCopyOf - Initialize this source buffer with a copy of the specified +/// memory range. We make the copy so that we can null terminate it +/// successfully. +void MemoryBuffer::initCopyOf(const char *BufStart, const char *BufEnd) { + size_t Size = BufEnd-BufStart; + BufferStart = (char *)malloc((Size+1) * sizeof(char)); + BufferEnd = BufferStart+Size; + memcpy(const_cast(BufferStart), BufStart, Size); + *const_cast(BufferEnd) = 0; // Null terminate buffer. + MustDeleteBuffer = true; +} + +/// init - Initialize this MemoryBuffer as a reference to externally allocated +/// memory, memory that we know is already null terminated. +void MemoryBuffer::init(const char *BufStart, const char *BufEnd) { + assert(BufEnd[0] == 0 && "Buffer is not null terminated!"); + BufferStart = BufStart; + BufferEnd = BufEnd; + MustDeleteBuffer = false; +} + +//===----------------------------------------------------------------------===// +// MemoryBufferMem implementation. +//===----------------------------------------------------------------------===// + +namespace { +class MemoryBufferMem : public MemoryBuffer { + std::string FileID; +public: + MemoryBufferMem(const char *Start, const char *End, const char *FID, + bool Copy = false) + : FileID(FID) { + if (!Copy) + init(Start, End); + else + initCopyOf(Start, End); + } + + virtual const char *getBufferIdentifier() const { + return FileID.c_str(); + } +}; +} + +/// getMemBuffer - Open the specified memory range as a MemoryBuffer. Note +/// that EndPtr[0] must be a null byte and be accessible! +MemoryBuffer *MemoryBuffer::getMemBuffer(const char *StartPtr, + const char *EndPtr, + const char *BufferName) { + return new MemoryBufferMem(StartPtr, EndPtr, BufferName); +} + +/// getMemBufferCopy - Open the specified memory range as a MemoryBuffer, +/// copying the contents and taking ownership of it. This has no requirements +/// on EndPtr[0]. +MemoryBuffer *MemoryBuffer::getMemBufferCopy(const char *StartPtr, + const char *EndPtr, + const char *BufferName) { + return new MemoryBufferMem(StartPtr, EndPtr, BufferName, true); +} + +/// getNewUninitMemBuffer - Allocate a new MemoryBuffer of the specified size +/// that is completely initialized to zeros. Note that the caller should +/// initialize the memory allocated by this method. The memory is owned by +/// the MemoryBuffer object. +MemoryBuffer *MemoryBuffer::getNewUninitMemBuffer(size_t Size, + const char *BufferName) { + char *Buf = (char *)malloc((Size+1) * sizeof(char)); + if (!Buf) return 0; + Buf[Size] = 0; + MemoryBufferMem *SB = new MemoryBufferMem(Buf, Buf+Size, BufferName); + // The memory for this buffer is owned by the MemoryBuffer. + SB->MustDeleteBuffer = true; + return SB; +} + +/// getNewMemBuffer - Allocate a new MemoryBuffer of the specified size that +/// is completely initialized to zeros. Note that the caller should +/// initialize the memory allocated by this method. The memory is owned by +/// the MemoryBuffer object. +MemoryBuffer *MemoryBuffer::getNewMemBuffer(size_t Size, + const char *BufferName) { + MemoryBuffer *SB = getNewUninitMemBuffer(Size, BufferName); + if (!SB) return 0; + memset(const_cast(SB->getBufferStart()), 0, Size+1); + return SB; +} + + +/// getFileOrSTDIN - Open the specified file as a MemoryBuffer, or open stdin +/// if the Filename is "-". If an error occurs, this returns null and fills +/// in *ErrStr with a reason. If stdin is empty, this API (unlike getSTDIN) +/// returns an empty buffer. +MemoryBuffer *MemoryBuffer::getFileOrSTDIN(const char *Filename, + std::string *ErrStr, + int64_t FileSize) { + if (Filename[0] != '-' || Filename[1] != 0) + return getFile(Filename, ErrStr, FileSize); + MemoryBuffer *M = getSTDIN(); + if (M) return M; + + // If stdin was empty, M is null. Cons up an empty memory buffer now. + const char *EmptyStr = ""; + return MemoryBuffer::getMemBuffer(EmptyStr, EmptyStr, ""); +} + +//===----------------------------------------------------------------------===// +// MemoryBuffer::getFile implementation. +//===----------------------------------------------------------------------===// + +namespace { +/// MemoryBufferMMapFile - This represents a file that was mapped in with the +/// sys::Path::MapInFilePages method. When destroyed, it calls the +/// sys::Path::UnMapFilePages method. +class MemoryBufferMMapFile : public MemoryBuffer { + std::string Filename; +public: + MemoryBufferMMapFile(const char *filename, const char *Pages, uint64_t Size) + : Filename(filename) { + init(Pages, Pages+Size); + } + + virtual const char *getBufferIdentifier() const { + return Filename.c_str(); + } + + ~MemoryBufferMMapFile() { + sys::Path::UnMapFilePages(getBufferStart(), getBufferSize()); + } +}; +} + +MemoryBuffer *MemoryBuffer::getFile(const char *Filename, std::string *ErrStr, + int64_t FileSize) { + int OpenFlags = 0; +#ifdef O_BINARY + OpenFlags |= O_BINARY; // Open input file in binary mode on win32. +#endif + int FD = ::open(Filename, O_RDONLY|OpenFlags); + if (FD == -1) { + if (ErrStr) *ErrStr = "could not open file"; + return 0; + } + + // If we don't know the file size, use fstat to find out. fstat on an open + // file descriptor is cheaper than stat on a random path. + if (FileSize == -1) { + struct stat FileInfo; + // TODO: This should use fstat64 when available. + if (fstat(FD, &FileInfo) == -1) { + if (ErrStr) *ErrStr = "could not get file length"; + ::close(FD); + return 0; + } + FileSize = FileInfo.st_size; + } + + + // If the file is large, try to use mmap to read it in. We don't use mmap + // for small files, because this can severely fragment our address space. Also + // don't try to map files that are exactly a multiple of the system page size, + // as the file would not have the required null terminator. + if (FileSize >= 4096*4 && + (FileSize & (sys::Process::GetPageSize()-1)) != 0) { + if (const char *Pages = sys::Path::MapInFilePages(FD, FileSize)) { + // Close the file descriptor, now that the whole file is in memory. + ::close(FD); + return new MemoryBufferMMapFile(Filename, Pages, FileSize); + } + } + + MemoryBuffer *Buf = MemoryBuffer::getNewUninitMemBuffer(FileSize, Filename); + if (!Buf) { + // Failed to create a buffer. + if (ErrStr) *ErrStr = "could not allocate buffer"; + ::close(FD); + return 0; + } + + OwningPtr SB(Buf); + char *BufPtr = const_cast(SB->getBufferStart()); + + size_t BytesLeft = FileSize; + while (BytesLeft) { + ssize_t NumRead = ::read(FD, BufPtr, BytesLeft); + if (NumRead != -1) { + BytesLeft -= NumRead; + BufPtr += NumRead; + } else if (errno == EINTR) { + // try again + } else { + // error reading. + close(FD); + if (ErrStr) *ErrStr = "error reading file data"; + return 0; + } + } + close(FD); + + return SB.take(); +} + +//===----------------------------------------------------------------------===// +// MemoryBuffer::getSTDIN implementation. +//===----------------------------------------------------------------------===// + +namespace { +class STDINBufferFile : public MemoryBuffer { +public: + virtual const char *getBufferIdentifier() const { + return ""; + } +}; +} + +MemoryBuffer *MemoryBuffer::getSTDIN() { + char Buffer[4096*4]; + + std::vector FileData; + + // Read in all of the data from stdin, we cannot mmap stdin. + sys::Program::ChangeStdinToBinary(); + size_t ReadBytes; + do { + ReadBytes = fread(Buffer, sizeof(char), sizeof(Buffer), stdin); + FileData.insert(FileData.end(), Buffer, Buffer+ReadBytes); + } while (ReadBytes == sizeof(Buffer)); + + FileData.push_back(0); // &FileData[Size] is invalid. So is &*FileData.end(). + size_t Size = FileData.size(); + if (Size <= 1) + return 0; + MemoryBuffer *B = new STDINBufferFile(); + B->initCopyOf(&FileData[0], &FileData[Size-1]); + return B; +} diff --git a/lib/Support/PluginLoader.cpp b/lib/Support/PluginLoader.cpp new file mode 100644 index 000000000000..5acf1d13ee9c --- /dev/null +++ b/lib/Support/PluginLoader.cpp @@ -0,0 +1,43 @@ +//===-- PluginLoader.cpp - Implement -load command line option ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the -load command line option handler. +// +//===----------------------------------------------------------------------===// + +#define DONT_GET_PLUGIN_LOADER_OPTION +#include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/PluginLoader.h" +#include "llvm/Support/Streams.h" +#include "llvm/System/DynamicLibrary.h" +#include +#include +using namespace llvm; + +static ManagedStatic > Plugins; + +void PluginLoader::operator=(const std::string &Filename) { + std::string Error; + if (sys::DynamicLibrary::LoadLibraryPermanently(Filename.c_str(), &Error)) { + cerr << "Error opening '" << Filename << "': " << Error + << "\n -load request ignored.\n"; + } else { + Plugins->push_back(Filename); + } +} + +unsigned PluginLoader::getNumPlugins() { + return Plugins.isConstructed() ? Plugins->size() : 0; +} + +std::string &PluginLoader::getPlugin(unsigned num) { + assert(Plugins.isConstructed() && num < Plugins->size() && + "Asking for an out of bounds plugin"); + return (*Plugins)[num]; +} diff --git a/lib/Support/PrettyStackTrace.cpp b/lib/Support/PrettyStackTrace.cpp new file mode 100644 index 000000000000..c111c5e6be24 --- /dev/null +++ b/lib/Support/PrettyStackTrace.cpp @@ -0,0 +1,108 @@ +//===- PrettyStackTrace.cpp - Pretty Crash Handling -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines some helpful functions for dealing with the possibility of +// Unix signals occuring while your program is running. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/PrettyStackTrace.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/System/Signals.h" +#include "llvm/ADT/SmallString.h" +using namespace llvm; + +// FIXME: This should be thread local when llvm supports threads. +static const PrettyStackTraceEntry *PrettyStackTraceHead = 0; + +static unsigned PrintStack(const PrettyStackTraceEntry *Entry, raw_ostream &OS){ + unsigned NextID = 0; + if (Entry->getNextEntry()) + NextID = PrintStack(Entry->getNextEntry(), OS); + OS << NextID << ".\t"; + Entry->print(OS); + + return NextID+1; +} + +/// PrintCurStackTrace - Print the current stack trace to the specified stream. +static void PrintCurStackTrace(raw_ostream &OS) { + // Don't print an empty trace. + if (PrettyStackTraceHead == 0) return; + + // If there are pretty stack frames registered, walk and emit them. + OS << "Stack dump:\n"; + + PrintStack(PrettyStackTraceHead, OS); + OS.flush(); +} + +// Integrate with crash reporter. +#ifdef __APPLE__ +extern "C" const char *__crashreporter_info__; +const char *__crashreporter_info__ = 0; +#endif + + +/// CrashHandler - This callback is run if a fatal signal is delivered to the +/// process, it prints the pretty stack trace. +static void CrashHandler(void *Cookie) { +#ifndef __APPLE__ + // On non-apple systems, just emit the crash stack trace to stderr. + PrintCurStackTrace(errs()); +#else + // Otherwise, emit to a smallvector of chars, send *that* to stderr, but also + // put it into __crashreporter_info__. + SmallString<2048> TmpStr; + { + raw_svector_ostream Stream(TmpStr); + PrintCurStackTrace(Stream); + } + + if (!TmpStr.empty()) { + __crashreporter_info__ = strdup(TmpStr.c_str()); + errs() << __crashreporter_info__; + } + +#endif +} + +static bool RegisterCrashPrinter() { + sys::AddSignalHandler(CrashHandler, 0); + return false; +} + +PrettyStackTraceEntry::PrettyStackTraceEntry() { + // The first time this is called, we register the crash printer. + static bool HandlerRegistered = RegisterCrashPrinter(); + HandlerRegistered = HandlerRegistered; + + // Link ourselves. + NextEntry = PrettyStackTraceHead; + PrettyStackTraceHead = this; +} + +PrettyStackTraceEntry::~PrettyStackTraceEntry() { + assert(PrettyStackTraceHead == this && + "Pretty stack trace entry destruction is out of order"); + PrettyStackTraceHead = getNextEntry(); +} + +void PrettyStackTraceString::print(raw_ostream &OS) const { + OS << Str << "\n"; +} + +void PrettyStackTraceProgram::print(raw_ostream &OS) const { + OS << "Program arguments: "; + // Print the argument list. + for (unsigned i = 0, e = ArgC; i != e; ++i) + OS << ArgV[i] << ' '; + OS << '\n'; +} + diff --git a/lib/Support/SlowOperationInformer.cpp b/lib/Support/SlowOperationInformer.cpp new file mode 100644 index 000000000000..d5ffff9d937f --- /dev/null +++ b/lib/Support/SlowOperationInformer.cpp @@ -0,0 +1,66 @@ +//===-- SlowOperationInformer.cpp - Keep the user informed ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the SlowOperationInformer class for the LLVM debugger. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/SlowOperationInformer.h" +#include "llvm/Support/Streams.h" +#include "llvm/System/Alarm.h" +#include +#include +using namespace llvm; + +SlowOperationInformer::SlowOperationInformer(const std::string &Name) + : OperationName(Name), LastPrintAmount(0) { + sys::SetupAlarm(1); +} + +SlowOperationInformer::~SlowOperationInformer() { + sys::TerminateAlarm(); + if (LastPrintAmount) { + // If we have printed something, make _sure_ we print the 100% amount, and + // also print a newline. + cout << std::string(LastPrintAmount, '\b') << "Progress " + << OperationName << ": 100% \n"; + } +} + +/// progress - Clients should periodically call this method when they are in +/// an exception-safe state. The Amount variable should indicate how far +/// along the operation is, given in 1/10ths of a percent (in other words, +/// Amount should range from 0 to 1000). +bool SlowOperationInformer::progress(unsigned Amount) { + int status = sys::AlarmStatus(); + if (status == -1) { + cout << "\n"; + LastPrintAmount = 0; + return true; + } + + // If we haven't spent enough time in this operation to warrant displaying the + // progress bar, don't do so yet. + if (status == 0) + return false; + + // Delete whatever we printed last time. + std::string ToPrint = std::string(LastPrintAmount, '\b'); + + std::ostringstream OS; + OS << "Progress " << OperationName << ": " << Amount/10; + if (unsigned Rem = Amount % 10) + OS << "." << Rem << "%"; + else + OS << "% "; + + LastPrintAmount = OS.str().size(); + cout << ToPrint+OS.str() << std::flush; + return false; +} diff --git a/lib/Support/SmallPtrSet.cpp b/lib/Support/SmallPtrSet.cpp new file mode 100644 index 000000000000..68938fa5a571 --- /dev/null +++ b/lib/Support/SmallPtrSet.cpp @@ -0,0 +1,223 @@ +//===- llvm/ADT/SmallPtrSet.cpp - 'Normally small' pointer set ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the SmallPtrSet class. See SmallPtrSet.h for an +// overview of the algorithm. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Support/MathExtras.h" +#include + +using namespace llvm; + +void SmallPtrSetImpl::shrink_and_clear() { + assert(!isSmall() && "Can't shrink a small set!"); + free(CurArray); + + // Reduce the number of buckets. + CurArraySize = NumElements > 16 ? 1 << (Log2_32_Ceil(NumElements) + 1) : 32; + NumElements = NumTombstones = 0; + + // Install the new array. Clear all the buckets to empty. + CurArray = (const void**)malloc(sizeof(void*) * (CurArraySize+1)); + assert(CurArray && "Failed to allocate memory?"); + memset(CurArray, -1, CurArraySize*sizeof(void*)); + + // The end pointer, always valid, is set to a valid element to help the + // iterator. + CurArray[CurArraySize] = 0; +} + +bool SmallPtrSetImpl::insert_imp(const void * Ptr) { + if (isSmall()) { + // Check to see if it is already in the set. + for (const void **APtr = SmallArray, **E = SmallArray+NumElements; + APtr != E; ++APtr) + if (*APtr == Ptr) + return false; + + // Nope, there isn't. If we stay small, just 'pushback' now. + if (NumElements < CurArraySize-1) { + SmallArray[NumElements++] = Ptr; + return true; + } + // Otherwise, hit the big set case, which will call grow. + } + + // If more than 3/4 of the array is full, grow. + if (NumElements*4 >= CurArraySize*3 || + CurArraySize-(NumElements+NumTombstones) < CurArraySize/8) + Grow(); + + // Okay, we know we have space. Find a hash bucket. + const void **Bucket = const_cast(FindBucketFor(Ptr)); + if (*Bucket == Ptr) return false; // Already inserted, good. + + // Otherwise, insert it! + if (*Bucket == getTombstoneMarker()) + --NumTombstones; + *Bucket = Ptr; + ++NumElements; // Track density. + return true; +} + +bool SmallPtrSetImpl::erase_imp(const void * Ptr) { + if (isSmall()) { + // Check to see if it is in the set. + for (const void **APtr = SmallArray, **E = SmallArray+NumElements; + APtr != E; ++APtr) + if (*APtr == Ptr) { + // If it is in the set, replace this element. + *APtr = E[-1]; + E[-1] = getEmptyMarker(); + --NumElements; + return true; + } + + return false; + } + + // Okay, we know we have space. Find a hash bucket. + void **Bucket = const_cast(FindBucketFor(Ptr)); + if (*Bucket != Ptr) return false; // Not in the set? + + // Set this as a tombstone. + *Bucket = getTombstoneMarker(); + --NumElements; + ++NumTombstones; + return true; +} + +const void * const *SmallPtrSetImpl::FindBucketFor(const void *Ptr) const { + unsigned Bucket = Hash(Ptr); + unsigned ArraySize = CurArraySize; + unsigned ProbeAmt = 1; + const void *const *Array = CurArray; + const void *const *Tombstone = 0; + while (1) { + // Found Ptr's bucket? + if (Array[Bucket] == Ptr) + return Array+Bucket; + + // If we found an empty bucket, the pointer doesn't exist in the set. + // Return a tombstone if we've seen one so far, or the empty bucket if + // not. + if (Array[Bucket] == getEmptyMarker()) + return Tombstone ? Tombstone : Array+Bucket; + + // If this is a tombstone, remember it. If Ptr ends up not in the set, we + // prefer to return it than something that would require more probing. + if (Array[Bucket] == getTombstoneMarker() && !Tombstone) + Tombstone = Array+Bucket; // Remember the first tombstone found. + + // It's a hash collision or a tombstone. Reprobe. + Bucket = (Bucket + ProbeAmt++) & (ArraySize-1); + } +} + +/// Grow - Allocate a larger backing store for the buckets and move it over. +/// +void SmallPtrSetImpl::Grow() { + // Allocate at twice as many buckets, but at least 128. + unsigned OldSize = CurArraySize; + unsigned NewSize = OldSize < 64 ? 128 : OldSize*2; + + const void **OldBuckets = CurArray; + bool WasSmall = isSmall(); + + // Install the new array. Clear all the buckets to empty. + CurArray = (const void**)malloc(sizeof(void*) * (NewSize+1)); + assert(CurArray && "Failed to allocate memory?"); + CurArraySize = NewSize; + memset(CurArray, -1, NewSize*sizeof(void*)); + + // The end pointer, always valid, is set to a valid element to help the + // iterator. + CurArray[NewSize] = 0; + + // Copy over all the elements. + if (WasSmall) { + // Small sets store their elements in order. + for (const void **BucketPtr = OldBuckets, **E = OldBuckets+NumElements; + BucketPtr != E; ++BucketPtr) { + const void *Elt = *BucketPtr; + *const_cast(FindBucketFor(Elt)) = const_cast(Elt); + } + } else { + // Copy over all valid entries. + for (const void **BucketPtr = OldBuckets, **E = OldBuckets+OldSize; + BucketPtr != E; ++BucketPtr) { + // Copy over the element if it is valid. + const void *Elt = *BucketPtr; + if (Elt != getTombstoneMarker() && Elt != getEmptyMarker()) + *const_cast(FindBucketFor(Elt)) = const_cast(Elt); + } + + free(OldBuckets); + NumTombstones = 0; + } +} + +SmallPtrSetImpl::SmallPtrSetImpl(const SmallPtrSetImpl& that) { + // If we're becoming small, prepare to insert into our stack space + if (that.isSmall()) { + CurArray = &SmallArray[0]; + // Otherwise, allocate new heap space (unless we were the same size) + } else { + CurArray = (const void**)malloc(sizeof(void*) * (that.CurArraySize+1)); + assert(CurArray && "Failed to allocate memory?"); + } + + // Copy over the new array size + CurArraySize = that.CurArraySize; + + // Copy over the contents from the other set + memcpy(CurArray, that.CurArray, sizeof(void*)*(CurArraySize+1)); + + NumElements = that.NumElements; + NumTombstones = that.NumTombstones; +} + +/// CopyFrom - implement operator= from a smallptrset that has the same pointer +/// type, but may have a different small size. +void SmallPtrSetImpl::CopyFrom(const SmallPtrSetImpl &RHS) { + if (isSmall() && RHS.isSmall()) + assert(CurArraySize == RHS.CurArraySize && + "Cannot assign sets with different small sizes"); + + // If we're becoming small, prepare to insert into our stack space + if (RHS.isSmall()) { + if (!isSmall()) + free(CurArray); + CurArray = &SmallArray[0]; + // Otherwise, allocate new heap space (unless we were the same size) + } else if (CurArraySize != RHS.CurArraySize) { + if (isSmall()) + CurArray = (const void**)malloc(sizeof(void*) * (RHS.CurArraySize+1)); + else + CurArray = (const void**)realloc(CurArray, sizeof(void*)*(RHS.CurArraySize+1)); + assert(CurArray && "Failed to allocate memory?"); + } + + // Copy over the new array size + CurArraySize = RHS.CurArraySize; + + // Copy over the contents from the other set + memcpy(CurArray, RHS.CurArray, sizeof(void*)*(CurArraySize+1)); + + NumElements = RHS.NumElements; + NumTombstones = RHS.NumTombstones; +} + +SmallPtrSetImpl::~SmallPtrSetImpl() { + if (!isSmall()) + free(CurArray); +} diff --git a/lib/Support/Statistic.cpp b/lib/Support/Statistic.cpp new file mode 100644 index 000000000000..13acc1b0fa1e --- /dev/null +++ b/lib/Support/Statistic.cpp @@ -0,0 +1,126 @@ +//===-- Statistic.cpp - Easy way to expose stats information --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the 'Statistic' class, which is designed to be an easy +// way to expose various success metrics from passes. These statistics are +// printed at the end of a run, when the -stats command line option is enabled +// on the command line. +// +// This is useful for reporting information like the number of instructions +// simplified, optimized or removed by various transformations, like this: +// +// static Statistic NumInstEliminated("GCSE", "Number of instructions killed"); +// +// Later, in the code: ++NumInstEliminated; +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/Streams.h" +#include "llvm/ADT/StringExtras.h" +#include +#include +#include +using namespace llvm; + +// GetLibSupportInfoOutputFile - Return a file stream to print our output on. +namespace llvm { extern std::ostream *GetLibSupportInfoOutputFile(); } + +/// -stats - Command line option to cause transformations to emit stats about +/// what they did. +/// +static cl::opt +Enabled("stats", cl::desc("Enable statistics output from program")); + + +namespace { +/// StatisticInfo - This class is used in a ManagedStatic so that it is created +/// on demand (when the first statistic is bumped) and destroyed only when +/// llvm_shutdown is called. We print statistics from the destructor. +class StatisticInfo { + std::vector Stats; +public: + ~StatisticInfo(); + + void addStatistic(const Statistic *S) { + Stats.push_back(S); + } +}; +} + +static ManagedStatic StatInfo; + + +/// RegisterStatistic - The first time a statistic is bumped, this method is +/// called. +void Statistic::RegisterStatistic() { + // If stats are enabled, inform StatInfo that this statistic should be + // printed. + if (Enabled) + StatInfo->addStatistic(this); + // Remember we have been registered. + Initialized = true; +} + +namespace { + +struct NameCompare { + bool operator()(const Statistic *LHS, const Statistic *RHS) const { + int Cmp = std::strcmp(LHS->getName(), RHS->getName()); + if (Cmp != 0) return Cmp < 0; + + // Secondary key is the description. + return std::strcmp(LHS->getDesc(), RHS->getDesc()) < 0; + } +}; + +} + +// Print information when destroyed, iff command line option is specified. +StatisticInfo::~StatisticInfo() { + // Statistics not enabled? + if (Stats.empty()) return; + + // Get the stream to write to. + std::ostream &OutStream = *GetLibSupportInfoOutputFile(); + + // Figure out how long the biggest Value and Name fields are. + unsigned MaxNameLen = 0, MaxValLen = 0; + for (size_t i = 0, e = Stats.size(); i != e; ++i) { + MaxValLen = std::max(MaxValLen, + (unsigned)utostr(Stats[i]->getValue()).size()); + MaxNameLen = std::max(MaxNameLen, + (unsigned)std::strlen(Stats[i]->getName())); + } + + // Sort the fields by name. + std::stable_sort(Stats.begin(), Stats.end(), NameCompare()); + + // Print out the statistics header... + OutStream << "===" << std::string(73, '-') << "===\n" + << " ... Statistics Collected ...\n" + << "===" << std::string(73, '-') << "===\n\n"; + + // Print all of the statistics. + for (size_t i = 0, e = Stats.size(); i != e; ++i) { + std::string CountStr = utostr(Stats[i]->getValue()); + OutStream << std::string(MaxValLen-CountStr.size(), ' ') + << CountStr << " " << Stats[i]->getName() + << std::string(MaxNameLen-std::strlen(Stats[i]->getName()), ' ') + << " - " << Stats[i]->getDesc() << "\n"; + + } + + OutStream << std::endl; // Flush the output stream... + + if (&OutStream != cerr.stream() && &OutStream != cout.stream()) + delete &OutStream; // Close the file. +} diff --git a/lib/Support/Streams.cpp b/lib/Support/Streams.cpp new file mode 100644 index 000000000000..cf6cfeb7fd04 --- /dev/null +++ b/lib/Support/Streams.cpp @@ -0,0 +1,30 @@ +//===-- Streams.cpp - Wrappers for iostreams ------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a wrapper for the std::cout and std::cerr I/O streams. +// It prevents the need to include to each file just to get I/O. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/Streams.h" +#include +using namespace llvm; + +OStream llvm::cout(std::cout); +OStream llvm::cerr(std::cerr); +IStream llvm::cin(std::cin); + +namespace llvm { + +/// FlushStream - Function called by BaseStream to flush an ostream. +void FlushStream(std::ostream &S) { + S << std::flush; +} + +} // end anonymous namespace diff --git a/lib/Support/StringExtras.cpp b/lib/Support/StringExtras.cpp new file mode 100644 index 000000000000..1618086e602e --- /dev/null +++ b/lib/Support/StringExtras.cpp @@ -0,0 +1,114 @@ +//===-- StringExtras.cpp - Implement the StringExtras header --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the StringExtras.h header +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/StringExtras.h" +#include +using namespace llvm; + +/// getToken - This function extracts one token from source, ignoring any +/// leading characters that appear in the Delimiters string, and ending the +/// token at any of the characters that appear in the Delimiters string. If +/// there are no tokens in the source string, an empty string is returned. +/// The Source source string is updated in place to remove the returned string +/// and any delimiter prefix from it. +std::string llvm::getToken(std::string &Source, const char *Delimiters) { + size_t NumDelimiters = std::strlen(Delimiters); + + // Figure out where the token starts. + std::string::size_type Start = + Source.find_first_not_of(Delimiters, 0, NumDelimiters); + if (Start == std::string::npos) Start = Source.size(); + + // Find the next occurance of the delimiter. + std::string::size_type End = + Source.find_first_of(Delimiters, Start, NumDelimiters); + if (End == std::string::npos) End = Source.size(); + + // Create the return token. + std::string Result = std::string(Source.begin()+Start, Source.begin()+End); + + // Erase the token that we read in. + Source.erase(Source.begin(), Source.begin()+End); + + return Result; +} + +/// SplitString - Split up the specified string according to the specified +/// delimiters, appending the result fragments to the output list. +void llvm::SplitString(const std::string &Source, + std::vector &OutFragments, + const char *Delimiters) { + std::string S = Source; + + std::string S2 = getToken(S, Delimiters); + while (!S2.empty()) { + OutFragments.push_back(S2); + S2 = getToken(S, Delimiters); + } +} + + + +/// UnescapeString - Modify the argument string, turning two character sequences +/// @verbatim +/// like '\\' 'n' into '\n'. This handles: \e \a \b \f \n \r \t \v \' \ and +/// \num (where num is a 1-3 byte octal value). +/// @endverbatim +void llvm::UnescapeString(std::string &Str) { + for (unsigned i = 0; i != Str.size(); ++i) { + if (Str[i] == '\\' && i != Str.size()-1) { + switch (Str[i+1]) { + default: continue; // Don't execute the code after the switch. + case 'a': Str[i] = '\a'; break; + case 'b': Str[i] = '\b'; break; + case 'e': Str[i] = 27; break; + case 'f': Str[i] = '\f'; break; + case 'n': Str[i] = '\n'; break; + case 'r': Str[i] = '\r'; break; + case 't': Str[i] = '\t'; break; + case 'v': Str[i] = '\v'; break; + case '"': Str[i] = '\"'; break; + case '\'': Str[i] = '\''; break; + case '\\': Str[i] = '\\'; break; + } + // Nuke the second character. + Str.erase(Str.begin()+i+1); + } + } +} + +/// EscapeString - Modify the argument string, turning '\\' and anything that +/// doesn't satisfy std::isprint into an escape sequence. +void llvm::EscapeString(std::string &Str) { + for (unsigned i = 0; i != Str.size(); ++i) { + if (Str[i] == '\\') { + ++i; + Str.insert(Str.begin()+i, '\\'); + } else if (Str[i] == '\t') { + Str[i++] = '\\'; + Str.insert(Str.begin()+i, 't'); + } else if (Str[i] == '"') { + Str.insert(Str.begin()+i++, '\\'); + } else if (Str[i] == '\n') { + Str[i++] = '\\'; + Str.insert(Str.begin()+i, 'n'); + } else if (!std::isprint(Str[i])) { + // Always expand to a 3-digit octal escape. + unsigned Char = Str[i]; + Str[i++] = '\\'; + Str.insert(Str.begin()+i++, '0'+((Char/64) & 7)); + Str.insert(Str.begin()+i++, '0'+((Char/8) & 7)); + Str.insert(Str.begin()+i , '0'+( Char & 7)); + } + } +} diff --git a/lib/Support/StringMap.cpp b/lib/Support/StringMap.cpp new file mode 100644 index 000000000000..0c61732a61b3 --- /dev/null +++ b/lib/Support/StringMap.cpp @@ -0,0 +1,234 @@ +//===--- StringMap.cpp - String Hash table map implementation -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the StringMap class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/StringMap.h" +#include +using namespace llvm; + +StringMapImpl::StringMapImpl(unsigned InitSize, unsigned itemSize) { + ItemSize = itemSize; + + // If a size is specified, initialize the table with that many buckets. + if (InitSize) { + init(InitSize); + return; + } + + // Otherwise, initialize it with zero buckets to avoid the allocation. + TheTable = 0; + NumBuckets = 0; + NumItems = 0; + NumTombstones = 0; +} + +void StringMapImpl::init(unsigned InitSize) { + assert((InitSize & (InitSize-1)) == 0 && + "Init Size must be a power of 2 or zero!"); + NumBuckets = InitSize ? InitSize : 16; + NumItems = 0; + NumTombstones = 0; + + TheTable = (ItemBucket*)calloc(NumBuckets+1, sizeof(ItemBucket)); + + // Allocate one extra bucket, set it to look filled so the iterators stop at + // end. + TheTable[NumBuckets].Item = (StringMapEntryBase*)2; +} + + +/// HashString - Compute a hash code for the specified string. +/// +static unsigned HashString(const char *Start, const char *End) { + // Bernstein hash function. + unsigned int Result = 0; + // TODO: investigate whether a modified bernstein hash function performs + // better: http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx + // X*33+c -> X*33^c + while (Start != End) + Result = Result * 33 + *Start++; + Result = Result + (Result >> 5); + return Result; +} + +/// LookupBucketFor - Look up the bucket that the specified string should end +/// up in. If it already exists as a key in the map, the Item pointer for the +/// specified bucket will be non-null. Otherwise, it will be null. In either +/// case, the FullHashValue field of the bucket will be set to the hash value +/// of the string. +unsigned StringMapImpl::LookupBucketFor(const char *NameStart, + const char *NameEnd) { + unsigned HTSize = NumBuckets; + if (HTSize == 0) { // Hash table unallocated so far? + init(16); + HTSize = NumBuckets; + } + unsigned FullHashValue = HashString(NameStart, NameEnd); + unsigned BucketNo = FullHashValue & (HTSize-1); + + unsigned ProbeAmt = 1; + int FirstTombstone = -1; + while (1) { + ItemBucket &Bucket = TheTable[BucketNo]; + StringMapEntryBase *BucketItem = Bucket.Item; + // If we found an empty bucket, this key isn't in the table yet, return it. + if (BucketItem == 0) { + // If we found a tombstone, we want to reuse the tombstone instead of an + // empty bucket. This reduces probing. + if (FirstTombstone != -1) { + TheTable[FirstTombstone].FullHashValue = FullHashValue; + return FirstTombstone; + } + + Bucket.FullHashValue = FullHashValue; + return BucketNo; + } + + if (BucketItem == getTombstoneVal()) { + // Skip over tombstones. However, remember the first one we see. + if (FirstTombstone == -1) FirstTombstone = BucketNo; + } else if (Bucket.FullHashValue == FullHashValue) { + // If the full hash value matches, check deeply for a match. The common + // case here is that we are only looking at the buckets (for item info + // being non-null and for the full hash value) not at the items. This + // is important for cache locality. + + // Do the comparison like this because NameStart isn't necessarily + // null-terminated! + char *ItemStr = (char*)BucketItem+ItemSize; + unsigned ItemStrLen = BucketItem->getKeyLength(); + if (unsigned(NameEnd-NameStart) == ItemStrLen && + memcmp(ItemStr, NameStart, ItemStrLen) == 0) { + // We found a match! + return BucketNo; + } + } + + // Okay, we didn't find the item. Probe to the next bucket. + BucketNo = (BucketNo+ProbeAmt) & (HTSize-1); + + // Use quadratic probing, it has fewer clumping artifacts than linear + // probing and has good cache behavior in the common case. + ++ProbeAmt; + } +} + + +/// FindKey - Look up the bucket that contains the specified key. If it exists +/// in the map, return the bucket number of the key. Otherwise return -1. +/// This does not modify the map. +int StringMapImpl::FindKey(const char *KeyStart, const char *KeyEnd) const { + unsigned HTSize = NumBuckets; + if (HTSize == 0) return -1; // Really empty table? + unsigned FullHashValue = HashString(KeyStart, KeyEnd); + unsigned BucketNo = FullHashValue & (HTSize-1); + + unsigned ProbeAmt = 1; + while (1) { + ItemBucket &Bucket = TheTable[BucketNo]; + StringMapEntryBase *BucketItem = Bucket.Item; + // If we found an empty bucket, this key isn't in the table yet, return. + if (BucketItem == 0) + return -1; + + if (BucketItem == getTombstoneVal()) { + // Ignore tombstones. + } else if (Bucket.FullHashValue == FullHashValue) { + // If the full hash value matches, check deeply for a match. The common + // case here is that we are only looking at the buckets (for item info + // being non-null and for the full hash value) not at the items. This + // is important for cache locality. + + // Do the comparison like this because NameStart isn't necessarily + // null-terminated! + char *ItemStr = (char*)BucketItem+ItemSize; + unsigned ItemStrLen = BucketItem->getKeyLength(); + if (unsigned(KeyEnd-KeyStart) == ItemStrLen && + memcmp(ItemStr, KeyStart, ItemStrLen) == 0) { + // We found a match! + return BucketNo; + } + } + + // Okay, we didn't find the item. Probe to the next bucket. + BucketNo = (BucketNo+ProbeAmt) & (HTSize-1); + + // Use quadratic probing, it has fewer clumping artifacts than linear + // probing and has good cache behavior in the common case. + ++ProbeAmt; + } +} + +/// RemoveKey - Remove the specified StringMapEntry from the table, but do not +/// delete it. This aborts if the value isn't in the table. +void StringMapImpl::RemoveKey(StringMapEntryBase *V) { + const char *VStr = (char*)V + ItemSize; + StringMapEntryBase *V2 = RemoveKey(VStr, VStr+V->getKeyLength()); + V2 = V2; + assert(V == V2 && "Didn't find key?"); +} + +/// RemoveKey - Remove the StringMapEntry for the specified key from the +/// table, returning it. If the key is not in the table, this returns null. +StringMapEntryBase *StringMapImpl::RemoveKey(const char *KeyStart, + const char *KeyEnd) { + int Bucket = FindKey(KeyStart, KeyEnd); + if (Bucket == -1) return 0; + + StringMapEntryBase *Result = TheTable[Bucket].Item; + TheTable[Bucket].Item = getTombstoneVal(); + --NumItems; + ++NumTombstones; + return Result; +} + + + +/// RehashTable - Grow the table, redistributing values into the buckets with +/// the appropriate mod-of-hashtable-size. +void StringMapImpl::RehashTable() { + unsigned NewSize = NumBuckets*2; + // Allocate one extra bucket which will always be non-empty. This allows the + // iterators to stop at end. + ItemBucket *NewTableArray =(ItemBucket*)calloc(NewSize+1, sizeof(ItemBucket)); + NewTableArray[NewSize].Item = (StringMapEntryBase*)2; + + // Rehash all the items into their new buckets. Luckily :) we already have + // the hash values available, so we don't have to rehash any strings. + for (ItemBucket *IB = TheTable, *E = TheTable+NumBuckets; IB != E; ++IB) { + if (IB->Item && IB->Item != getTombstoneVal()) { + // Fast case, bucket available. + unsigned FullHash = IB->FullHashValue; + unsigned NewBucket = FullHash & (NewSize-1); + if (NewTableArray[NewBucket].Item == 0) { + NewTableArray[FullHash & (NewSize-1)].Item = IB->Item; + NewTableArray[FullHash & (NewSize-1)].FullHashValue = FullHash; + continue; + } + + // Otherwise probe for a spot. + unsigned ProbeSize = 1; + do { + NewBucket = (NewBucket + ProbeSize++) & (NewSize-1); + } while (NewTableArray[NewBucket].Item); + + // Finally found a slot. Fill it in. + NewTableArray[NewBucket].Item = IB->Item; + NewTableArray[NewBucket].FullHashValue = FullHash; + } + } + + free(TheTable); + + TheTable = NewTableArray; + NumBuckets = NewSize; +} diff --git a/lib/Support/StringPool.cpp b/lib/Support/StringPool.cpp new file mode 100644 index 000000000000..b9c1fd0465bd --- /dev/null +++ b/lib/Support/StringPool.cpp @@ -0,0 +1,35 @@ +//===-- StringPool.cpp - Interned string pool -----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the StringPool class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/StringPool.h" +#include "llvm/Support/Streams.h" + +using namespace llvm; + +StringPool::StringPool() {} + +StringPool::~StringPool() { + assert(InternTable.empty() && "PooledStringPtr leaked!"); +} + +PooledStringPtr StringPool::intern(const char *Begin, const char *End) { + table_t::iterator I = InternTable.find(Begin, End); + if (I != InternTable.end()) + return PooledStringPtr(&*I); + + entry_t *S = entry_t::Create(Begin, End); + S->getValue().Pool = this; + InternTable.insert(S); + + return PooledStringPtr(S); +} diff --git a/lib/Support/SystemUtils.cpp b/lib/Support/SystemUtils.cpp new file mode 100644 index 000000000000..80d6e4cba9fb --- /dev/null +++ b/lib/Support/SystemUtils.cpp @@ -0,0 +1,58 @@ +//===- SystemUtils.cpp - Utilities for low-level system tasks -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains functions used to do a variety of low-level, often +// system-specific, tasks. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/Streams.h" +#include "llvm/Support/SystemUtils.h" +#include "llvm/System/Process.h" +#include "llvm/System/Program.h" +#include +using namespace llvm; + +bool llvm::CheckBitcodeOutputToConsole(std::ostream* stream_to_check, + bool print_warning) { + if (stream_to_check == cout.stream() && + sys::Process::StandardOutIsDisplayed()) { + if (print_warning) { + cerr << "WARNING: You're attempting to print out a bitcode file.\n" + << "This is inadvisable as it may cause display problems. If\n" + << "you REALLY want to taste LLVM bitcode first-hand, you\n" + << "can force output with the `-f' option.\n\n"; + } + return true; + } + return false; +} + +/// FindExecutable - Find a named executable, giving the argv[0] of program +/// being executed. This allows us to find another LLVM tool if it is built +/// into the same directory, but that directory is neither the current +/// directory, nor in the PATH. If the executable cannot be found, return an +/// empty string. +/// +#undef FindExecutable // needed on windows :( +sys::Path llvm::FindExecutable(const std::string &ExeName, + const std::string &ProgramPath) { + // First check the directory that the calling program is in. We can do this + // if ProgramPath contains at least one / character, indicating that it is a + // relative path to bugpoint itself. + sys::Path Result ( ProgramPath ); + Result.eraseComponent(); + if (!Result.isEmpty()) { + Result.appendComponent(ExeName); + if (Result.canExecute()) + return Result; + } + + return sys::Program::FindProgramByName(ExeName); +} diff --git a/lib/Support/Timer.cpp b/lib/Support/Timer.cpp new file mode 100644 index 000000000000..3c8879bd06e3 --- /dev/null +++ b/lib/Support/Timer.cpp @@ -0,0 +1,387 @@ +//===-- Timer.cpp - Interval Timing Support -------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Interval Timing implementation. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/Timer.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/Streams.h" +#include "llvm/System/Process.h" +#include +#include +#include +#include +using namespace llvm; + +// GetLibSupportInfoOutputFile - Return a file stream to print our output on. +namespace llvm { extern std::ostream *GetLibSupportInfoOutputFile(); } + +// getLibSupportInfoOutputFilename - This ugly hack is brought to you courtesy +// of constructor/destructor ordering being unspecified by C++. Basically the +// problem is that a Statistic object gets destroyed, which ends up calling +// 'GetLibSupportInfoOutputFile()' (below), which calls this function. +// LibSupportInfoOutputFilename used to be a global variable, but sometimes it +// would get destroyed before the Statistic, causing havoc to ensue. We "fix" +// this by creating the string the first time it is needed and never destroying +// it. +static ManagedStatic LibSupportInfoOutputFilename; +static std::string &getLibSupportInfoOutputFilename() { + return *LibSupportInfoOutputFilename; +} + +namespace { + static cl::opt + TrackSpace("track-memory", cl::desc("Enable -time-passes memory " + "tracking (this may be slow)"), + cl::Hidden); + + static cl::opt + InfoOutputFilename("info-output-file", cl::value_desc("filename"), + cl::desc("File to append -stats and -timer output to"), + cl::Hidden, cl::location(getLibSupportInfoOutputFilename())); +} + +static TimerGroup *DefaultTimerGroup = 0; +static TimerGroup *getDefaultTimerGroup() { + if (DefaultTimerGroup) return DefaultTimerGroup; + return DefaultTimerGroup = new TimerGroup("Miscellaneous Ungrouped Timers"); +} + +Timer::Timer(const std::string &N) + : Elapsed(0), UserTime(0), SystemTime(0), MemUsed(0), PeakMem(0), Name(N), + Started(false), TG(getDefaultTimerGroup()) { + TG->addTimer(); +} + +Timer::Timer(const std::string &N, TimerGroup &tg) + : Elapsed(0), UserTime(0), SystemTime(0), MemUsed(0), PeakMem(0), Name(N), + Started(false), TG(&tg) { + TG->addTimer(); +} + +Timer::Timer(const Timer &T) { + TG = T.TG; + if (TG) TG->addTimer(); + operator=(T); +} + + +// Copy ctor, initialize with no TG member. +Timer::Timer(bool, const Timer &T) { + TG = T.TG; // Avoid assertion in operator= + operator=(T); // Copy contents + TG = 0; +} + + +Timer::~Timer() { + if (TG) { + if (Started) { + Started = false; + TG->addTimerToPrint(*this); + } + TG->removeTimer(); + } +} + +static inline size_t getMemUsage() { + if (TrackSpace) + return sys::Process::GetMallocUsage(); + return 0; +} + +struct TimeRecord { + double Elapsed, UserTime, SystemTime; + ssize_t MemUsed; +}; + +static TimeRecord getTimeRecord(bool Start) { + TimeRecord Result; + + sys::TimeValue now(0,0); + sys::TimeValue user(0,0); + sys::TimeValue sys(0,0); + + ssize_t MemUsed = 0; + if (Start) { + MemUsed = getMemUsage(); + sys::Process::GetTimeUsage(now,user,sys); + } else { + sys::Process::GetTimeUsage(now,user,sys); + MemUsed = getMemUsage(); + } + + Result.Elapsed = now.seconds() + now.microseconds() / 1000000.0; + Result.UserTime = user.seconds() + user.microseconds() / 1000000.0; + Result.SystemTime = sys.seconds() + sys.microseconds() / 1000000.0; + Result.MemUsed = MemUsed; + + return Result; +} + +static ManagedStatic > ActiveTimers; + +void Timer::startTimer() { + Started = true; + ActiveTimers->push_back(this); + TimeRecord TR = getTimeRecord(true); + Elapsed -= TR.Elapsed; + UserTime -= TR.UserTime; + SystemTime -= TR.SystemTime; + MemUsed -= TR.MemUsed; + PeakMemBase = TR.MemUsed; +} + +void Timer::stopTimer() { + TimeRecord TR = getTimeRecord(false); + Elapsed += TR.Elapsed; + UserTime += TR.UserTime; + SystemTime += TR.SystemTime; + MemUsed += TR.MemUsed; + + if (ActiveTimers->back() == this) { + ActiveTimers->pop_back(); + } else { + std::vector::iterator I = + std::find(ActiveTimers->begin(), ActiveTimers->end(), this); + assert(I != ActiveTimers->end() && "stop but no startTimer?"); + ActiveTimers->erase(I); + } +} + +void Timer::sum(const Timer &T) { + Elapsed += T.Elapsed; + UserTime += T.UserTime; + SystemTime += T.SystemTime; + MemUsed += T.MemUsed; + PeakMem += T.PeakMem; +} + +/// addPeakMemoryMeasurement - This method should be called whenever memory +/// usage needs to be checked. It adds a peak memory measurement to the +/// currently active timers, which will be printed when the timer group prints +/// +void Timer::addPeakMemoryMeasurement() { + size_t MemUsed = getMemUsage(); + + for (std::vector::iterator I = ActiveTimers->begin(), + E = ActiveTimers->end(); I != E; ++I) + (*I)->PeakMem = std::max((*I)->PeakMem, MemUsed-(*I)->PeakMemBase); +} + +//===----------------------------------------------------------------------===// +// NamedRegionTimer Implementation +//===----------------------------------------------------------------------===// + +namespace { + +typedef std::map Name2Timer; +typedef std::map > Name2Pair; + +} + +static ManagedStatic NamedTimers; + +static ManagedStatic NamedGroupedTimers; + +static Timer &getNamedRegionTimer(const std::string &Name) { + Name2Timer::iterator I = NamedTimers->find(Name); + if (I != NamedTimers->end()) + return I->second; + + return NamedTimers->insert(I, std::make_pair(Name, Timer(Name)))->second; +} + +static Timer &getNamedRegionTimer(const std::string &Name, + const std::string &GroupName) { + + Name2Pair::iterator I = NamedGroupedTimers->find(GroupName); + if (I == NamedGroupedTimers->end()) { + TimerGroup TG(GroupName); + std::pair Pair(TG, Name2Timer()); + I = NamedGroupedTimers->insert(I, std::make_pair(GroupName, Pair)); + } + + Name2Timer::iterator J = I->second.second.find(Name); + if (J == I->second.second.end()) + J = I->second.second.insert(J, + std::make_pair(Name, + Timer(Name, + I->second.first))); + + return J->second; +} + +NamedRegionTimer::NamedRegionTimer(const std::string &Name) + : TimeRegion(getNamedRegionTimer(Name)) {} + +NamedRegionTimer::NamedRegionTimer(const std::string &Name, + const std::string &GroupName) + : TimeRegion(getNamedRegionTimer(Name, GroupName)) {} + +//===----------------------------------------------------------------------===// +// TimerGroup Implementation +//===----------------------------------------------------------------------===// + +// printAlignedFP - Simulate the printf "%A.Bf" format, where A is the +// TotalWidth size, and B is the AfterDec size. +// +static void printAlignedFP(double Val, unsigned AfterDec, unsigned TotalWidth, + std::ostream &OS) { + assert(TotalWidth >= AfterDec+1 && "Bad FP Format!"); + OS.width(TotalWidth-AfterDec-1); + char OldFill = OS.fill(); + OS.fill(' '); + OS << (int)Val; // Integer part; + OS << "."; + OS.width(AfterDec); + OS.fill('0'); + unsigned ResultFieldSize = 1; + while (AfterDec--) ResultFieldSize *= 10; + OS << (int)(Val*ResultFieldSize) % ResultFieldSize; + OS.fill(OldFill); +} + +static void printVal(double Val, double Total, std::ostream &OS) { + if (Total < 1e-7) // Avoid dividing by zero... + OS << " ----- "; + else { + OS << " "; + printAlignedFP(Val, 4, 7, OS); + OS << " ("; + printAlignedFP(Val*100/Total, 1, 5, OS); + OS << "%)"; + } +} + +void Timer::print(const Timer &Total, std::ostream &OS) { + if (Total.UserTime) + printVal(UserTime, Total.UserTime, OS); + if (Total.SystemTime) + printVal(SystemTime, Total.SystemTime, OS); + if (Total.getProcessTime()) + printVal(getProcessTime(), Total.getProcessTime(), OS); + printVal(Elapsed, Total.Elapsed, OS); + + OS << " "; + + if (Total.MemUsed) { + OS.width(9); + OS << MemUsed << " "; + } + if (Total.PeakMem) { + if (PeakMem) { + OS.width(9); + OS << PeakMem << " "; + } else + OS << " "; + } + OS << Name << "\n"; + + Started = false; // Once printed, don't print again +} + +// GetLibSupportInfoOutputFile - Return a file stream to print our output on... +std::ostream * +llvm::GetLibSupportInfoOutputFile() { + std::string &LibSupportInfoOutputFilename = getLibSupportInfoOutputFilename(); + if (LibSupportInfoOutputFilename.empty()) + return cerr.stream(); + if (LibSupportInfoOutputFilename == "-") + return cout.stream(); + + std::ostream *Result = new std::ofstream(LibSupportInfoOutputFilename.c_str(), + std::ios::app); + if (!Result->good()) { + cerr << "Error opening info-output-file '" + << LibSupportInfoOutputFilename << " for appending!\n"; + delete Result; + return cerr.stream(); + } + return Result; +} + + +void TimerGroup::removeTimer() { + if (--NumTimers == 0 && !TimersToPrint.empty()) { // Print timing report... + // Sort the timers in descending order by amount of time taken... + std::sort(TimersToPrint.begin(), TimersToPrint.end(), + std::greater()); + + // Figure out how many spaces to indent TimerGroup name... + unsigned Padding = (80-Name.length())/2; + if (Padding > 80) Padding = 0; // Don't allow "negative" numbers + + std::ostream *OutStream = GetLibSupportInfoOutputFile(); + + ++NumTimers; + { // Scope to contain Total timer... don't allow total timer to drop us to + // zero timers... + Timer Total("TOTAL"); + + for (unsigned i = 0, e = TimersToPrint.size(); i != e; ++i) + Total.sum(TimersToPrint[i]); + + // Print out timing header... + *OutStream << "===" << std::string(73, '-') << "===\n" + << std::string(Padding, ' ') << Name << "\n" + << "===" << std::string(73, '-') + << "===\n"; + + // If this is not an collection of ungrouped times, print the total time. + // Ungrouped timers don't really make sense to add up. We still print the + // TOTAL line to make the percentages make sense. + if (this != DefaultTimerGroup) { + *OutStream << " Total Execution Time: "; + + printAlignedFP(Total.getProcessTime(), 4, 5, *OutStream); + *OutStream << " seconds ("; + printAlignedFP(Total.getWallTime(), 4, 5, *OutStream); + *OutStream << " wall clock)\n"; + } + *OutStream << "\n"; + + if (Total.UserTime) + *OutStream << " ---User Time---"; + if (Total.SystemTime) + *OutStream << " --System Time--"; + if (Total.getProcessTime()) + *OutStream << " --User+System--"; + *OutStream << " ---Wall Time---"; + if (Total.getMemUsed()) + *OutStream << " ---Mem---"; + if (Total.getPeakMem()) + *OutStream << " -PeakMem-"; + *OutStream << " --- Name ---\n"; + + // Loop through all of the timing data, printing it out... + for (unsigned i = 0, e = TimersToPrint.size(); i != e; ++i) + TimersToPrint[i].print(Total, *OutStream); + + Total.print(Total, *OutStream); + *OutStream << std::endl; // Flush output + } + --NumTimers; + + TimersToPrint.clear(); + + if (OutStream != cerr.stream() && OutStream != cout.stream()) + delete OutStream; // Close the file... + } + + // Delete default timer group! + if (NumTimers == 0 && this == DefaultTimerGroup) { + delete DefaultTimerGroup; + DefaultTimerGroup = 0; + } +} + diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp new file mode 100644 index 000000000000..e8cf69d81ff1 --- /dev/null +++ b/lib/Support/Triple.cpp @@ -0,0 +1,187 @@ +//===--- Triple.cpp - Target triple helper class --------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/Triple.h" +#include +#include +using namespace llvm; + +// + +const char *Triple::getArchTypeName(ArchType Kind) { + switch (Kind) { + case InvalidArch: return ""; + case UnknownArch: return "unknown"; + + case x86: return "i386"; + case x86_64: return "x86_64"; + case ppc: return "powerpc"; + case ppc64: return "powerpc64"; + } + + return ""; +} + +const char *Triple::getVendorTypeName(VendorType Kind) { + switch (Kind) { + case UnknownVendor: return "unknown"; + + case Apple: return "apple"; + case PC: return "PC"; + } + + return ""; +} + +const char *Triple::getOSTypeName(OSType Kind) { + switch (Kind) { + case UnknownOS: return "unknown"; + + case Darwin: return "darwin"; + case DragonFly: return "dragonfly"; + case FreeBSD: return "freebsd"; + case Linux: return "linux"; + } + + return ""; +} + +// + +void Triple::Parse() const { + assert(!isInitialized() && "Invalid parse call."); + + std::string ArchName = getArchName(); + if (ArchName.size() == 4 && ArchName[0] == 'i' && + ArchName[2] == '8' && ArchName[3] == '6') + Arch = x86; + else if (ArchName == "amd64" || ArchName == "x86_64") + Arch = x86_64; + else if (ArchName == "powerpc") + Arch = ppc; + else if (ArchName == "powerpc64") + Arch = ppc64; + else + Arch = UnknownArch; + + std::string VendorName = getVendorName(); + if (VendorName == "apple") + Vendor = Apple; + else if (VendorName == "pc") + Vendor = PC; + else + Vendor = UnknownVendor; + + std::string OSName = getOSName(); + if (memcmp(&OSName[0], "darwin", 6) == 0) + OS = Darwin; + else if (memcmp(&OSName[0], "dragonfly", 9) == 0) + OS = DragonFly; + else if (memcmp(&OSName[0], "freebsd", 7) == 0) + OS = FreeBSD; + else if (memcmp(&OSName[0], "linux", 5) == 0) + OS = Linux; + else + OS = UnknownOS; + + assert(isInitialized() && "Failed to initialize!"); +} + +static std::string extract(const std::string &A, + std::string::size_type begin, + std::string::size_type end) { + if (begin == std::string::npos) + return ""; + if (end == std::string::npos) + return A.substr(begin); + return A.substr(begin, end - begin); +} + +static std::string extract1(const std::string &A, + std::string::size_type begin, + std::string::size_type end) { + if (begin == std::string::npos || begin == end) + return ""; + return extract(A, begin + 1, end); +} + +std::string Triple::getArchName() const { + std::string Tmp = Data; + return extract(Tmp, 0, Tmp.find('-')); +} + +std::string Triple::getVendorName() const { + std::string Tmp = Data; + Tmp = extract1(Tmp, Tmp.find('-'), std::string::npos); + return extract(Tmp, 0, Tmp.find('-')); +} + +std::string Triple::getOSName() const { + std::string Tmp = Data; + Tmp = extract1(Tmp, Tmp.find('-'), std::string::npos); + Tmp = extract1(Tmp, Tmp.find('-'), std::string::npos); + return extract(Tmp, 0, Tmp.find('-')); +} + +std::string Triple::getEnvironmentName() const { + std::string Tmp = Data; + Tmp = extract1(Tmp, Tmp.find('-'), std::string::npos); + Tmp = extract1(Tmp, Tmp.find('-'), std::string::npos); + Tmp = extract1(Tmp, Tmp.find('-'), std::string::npos); + return extract(Tmp, 0, std::string::npos); +} + +std::string Triple::getOSAndEnvironmentName() const { + std::string Tmp = Data; + Tmp = extract1(Tmp, Tmp.find('-'), std::string::npos); + Tmp = extract1(Tmp, Tmp.find('-'), std::string::npos); + return extract(Tmp, 0, std::string::npos); +} + +void Triple::setTriple(const std::string &Str) { + Data = Str; + Arch = InvalidArch; +} + +void Triple::setArch(ArchType Kind) { + setArchName(getArchTypeName(Kind)); +} + +void Triple::setVendor(VendorType Kind) { + setVendorName(getVendorTypeName(Kind)); +} + +void Triple::setOS(OSType Kind) { + setOSName(getOSTypeName(Kind)); +} + +void Triple::setArchName(const std::string &Str) { + setTriple(Str + "-" + getVendorName() + "-" + getOSAndEnvironmentName()); +} + +void Triple::setVendorName(const std::string &Str) { + setTriple(getArchName() + "-" + Str + "-" + getOSAndEnvironmentName()); +} + +void Triple::setOSName(const std::string &Str) { + if (hasEnvironment()) + setTriple(getArchName() + "-" + getVendorName() + "-" + Str + + "-" + getEnvironmentName()); + else + setTriple(getArchName() + "-" + getVendorName() + "-" + Str); +} + +void Triple::setEnvironmentName(const std::string &Str) { + setTriple(getArchName() + "-" + getVendorName() + "-" + getOSName() + + "-" + Str); +} + +void Triple::setOSAndEnvironmentName(const std::string &Str) { + setTriple(getArchName() + "-" + getVendorName() + "-" + Str); +} diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp new file mode 100644 index 000000000000..6ac37bc840c6 --- /dev/null +++ b/lib/Support/raw_ostream.cpp @@ -0,0 +1,376 @@ +//===--- raw_ostream.cpp - Implement the raw_ostream classes --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements support for bulk buffered stream output. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Format.h" +#include "llvm/System/Program.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Config/config.h" +#include "llvm/Support/Compiler.h" +#include + +#if defined(HAVE_UNISTD_H) +# include +#endif +#if defined(HAVE_FCNTL_H) +# include +#endif + +#if defined(_MSC_VER) +#include +#include +#ifndef STDIN_FILENO +# define STDIN_FILENO 0 +#endif +#ifndef STDOUT_FILENO +# define STDOUT_FILENO 1 +#endif +#ifndef STDERR_FILENO +# define STDERR_FILENO 2 +#endif +#endif + +using namespace llvm; + + +// An out of line virtual method to provide a home for the class vtable. +void raw_ostream::handle() {} + +raw_ostream &raw_ostream::operator<<(unsigned long N) { + // Zero is a special case. + if (N == 0) + return *this << '0'; + + char NumberBuffer[20]; + char *EndPtr = NumberBuffer+sizeof(NumberBuffer); + char *CurPtr = EndPtr; + + while (N) { + *--CurPtr = '0' + char(N % 10); + N /= 10; + } + return write(CurPtr, EndPtr-CurPtr); +} + +raw_ostream &raw_ostream::operator<<(long N) { + if (N < 0) { + *this << '-'; + N = -N; + } + + return this->operator<<(static_cast(N)); +} + +raw_ostream &raw_ostream::operator<<(unsigned long long N) { + // Zero is a special case. + if (N == 0) + return *this << '0'; + + char NumberBuffer[20]; + char *EndPtr = NumberBuffer+sizeof(NumberBuffer); + char *CurPtr = EndPtr; + + while (N) { + *--CurPtr = '0' + char(N % 10); + N /= 10; + } + return write(CurPtr, EndPtr-CurPtr); +} + +raw_ostream &raw_ostream::operator<<(long long N) { + if (N < 0) { + *this << '-'; + N = -N; + } + + return this->operator<<(static_cast(N)); +} + +raw_ostream &raw_ostream::operator<<(const void *P) { + uintptr_t N = (uintptr_t) P; + *this << '0' << 'x'; + + // Zero is a special case. + if (N == 0) + return *this << '0'; + + char NumberBuffer[20]; + char *EndPtr = NumberBuffer+sizeof(NumberBuffer); + char *CurPtr = EndPtr; + + while (N) { + unsigned x = N % 16; + *--CurPtr = (x < 10 ? '0' + x : 'a' + x - 10); + N /= 16; + } + + return write(CurPtr, EndPtr-CurPtr); +} + +void raw_ostream::flush_nonempty() { + assert(OutBufCur > OutBufStart && "Invalid call to flush_nonempty."); + write_impl(OutBufStart, OutBufCur - OutBufStart); + OutBufCur = OutBufStart; +} + +raw_ostream &raw_ostream::write(unsigned char C) { + // Group exceptional cases into a single branch. + if (OutBufCur >= OutBufEnd) { + if (Unbuffered) { + write_impl(reinterpret_cast(&C), 1); + return *this; + } + + if (!OutBufStart) + SetBufferSize(); + else + flush_nonempty(); + } + + *OutBufCur++ = C; + return *this; +} + +raw_ostream &raw_ostream::write(const char *Ptr, unsigned Size) { + // Group exceptional cases into a single branch. + if (BUILTIN_EXPECT(OutBufCur+Size > OutBufEnd, false)) { + if (Unbuffered) { + write_impl(Ptr, Size); + return *this; + } + + if (!OutBufStart) + SetBufferSize(); + else + flush_nonempty(); + } + + // Handle short strings specially, memcpy isn't very good at very short + // strings. + switch (Size) { + case 4: OutBufCur[3] = Ptr[3]; // FALL THROUGH + case 3: OutBufCur[2] = Ptr[2]; // FALL THROUGH + case 2: OutBufCur[1] = Ptr[1]; // FALL THROUGH + case 1: OutBufCur[0] = Ptr[0]; // FALL THROUGH + case 0: break; + default: + // Normally the string to emit is shorter than the buffer. + if (Size <= unsigned(OutBufEnd-OutBufStart)) { + memcpy(OutBufCur, Ptr, Size); + break; + } + + // Otherwise we are emitting a string larger than our buffer. We + // know we already flushed, so just write it out directly. + write_impl(Ptr, Size); + Size = 0; + break; + } + OutBufCur += Size; + + return *this; +} + +// Formatted output. +raw_ostream &raw_ostream::operator<<(const format_object_base &Fmt) { + // If we have more than a few bytes left in our output buffer, try + // formatting directly onto its end. + // + // FIXME: This test is a bit silly, since if we don't have enough + // space in the buffer we will have to flush the formatted output + // anyway. We should just flush upfront in such cases, and use the + // whole buffer as our scratch pad. Note, however, that this case is + // also necessary for correctness on unbuffered streams. + unsigned NextBufferSize = 127; + if (OutBufEnd-OutBufCur > 3) { + unsigned BufferBytesLeft = OutBufEnd-OutBufCur; + unsigned BytesUsed = Fmt.print(OutBufCur, BufferBytesLeft); + + // Common case is that we have plenty of space. + if (BytesUsed < BufferBytesLeft) { + OutBufCur += BytesUsed; + return *this; + } + + // Otherwise, we overflowed and the return value tells us the size to try + // again with. + NextBufferSize = BytesUsed; + } + + // If we got here, we didn't have enough space in the output buffer for the + // string. Try printing into a SmallVector that is resized to have enough + // space. Iterate until we win. + SmallVector V; + + while (1) { + V.resize(NextBufferSize); + + // Try formatting into the SmallVector. + unsigned BytesUsed = Fmt.print(&V[0], NextBufferSize); + + // If BytesUsed fit into the vector, we win. + if (BytesUsed <= NextBufferSize) + return write(&V[0], BytesUsed); + + // Otherwise, try again with a new size. + assert(BytesUsed > NextBufferSize && "Didn't grow buffer!?"); + NextBufferSize = BytesUsed; + } +} + +//===----------------------------------------------------------------------===// +// Formatted Output +//===----------------------------------------------------------------------===// + +// Out of line virtual method. +void format_object_base::home() { +} + +//===----------------------------------------------------------------------===// +// raw_fd_ostream +//===----------------------------------------------------------------------===// + +/// raw_fd_ostream - Open the specified file for writing. If an error +/// occurs, information about the error is put into ErrorInfo, and the +/// stream should be immediately destroyed; the string will be empty +/// if no error occurred. +raw_fd_ostream::raw_fd_ostream(const char *Filename, bool Binary, + std::string &ErrorInfo) : pos(0) { + ErrorInfo.clear(); + + // Handle "-" as stdout. + if (Filename[0] == '-' && Filename[1] == 0) { + FD = STDOUT_FILENO; + // If user requested binary then put stdout into binary mode if + // possible. + if (Binary) + sys::Program::ChangeStdoutToBinary(); + ShouldClose = false; + return; + } + + int Flags = O_WRONLY|O_CREAT|O_TRUNC; +#ifdef O_BINARY + if (Binary) + Flags |= O_BINARY; +#endif + FD = open(Filename, Flags, 0644); + if (FD < 0) { + ErrorInfo = "Error opening output file '" + std::string(Filename) + "'"; + ShouldClose = false; + } else { + ShouldClose = true; + } +} + +raw_fd_ostream::~raw_fd_ostream() { + if (FD >= 0) { + flush(); + if (ShouldClose) + ::close(FD); + } +} + +void raw_fd_ostream::write_impl(const char *Ptr, unsigned Size) { + assert (FD >= 0 && "File already closed."); + pos += Size; + ::write(FD, Ptr, Size); +} + +void raw_fd_ostream::close() { + assert (ShouldClose); + ShouldClose = false; + flush(); + ::close(FD); + FD = -1; +} + +uint64_t raw_fd_ostream::seek(uint64_t off) { + flush(); + pos = lseek(FD, off, SEEK_SET); + return pos; +} + +//===----------------------------------------------------------------------===// +// raw_stdout/err_ostream +//===----------------------------------------------------------------------===// + +raw_stdout_ostream::raw_stdout_ostream():raw_fd_ostream(STDOUT_FILENO, false) {} +raw_stderr_ostream::raw_stderr_ostream():raw_fd_ostream(STDERR_FILENO, false, + true) {} + +// An out of line virtual method to provide a home for the class vtable. +void raw_stdout_ostream::handle() {} +void raw_stderr_ostream::handle() {} + +/// outs() - This returns a reference to a raw_ostream for standard output. +/// Use it like: outs() << "foo" << "bar"; +raw_ostream &llvm::outs() { + static raw_stdout_ostream S; + return S; +} + +/// errs() - This returns a reference to a raw_ostream for standard error. +/// Use it like: errs() << "foo" << "bar"; +raw_ostream &llvm::errs() { + static raw_stderr_ostream S; + return S; +} + +//===----------------------------------------------------------------------===// +// raw_os_ostream +//===----------------------------------------------------------------------===// + +raw_os_ostream::~raw_os_ostream() { + flush(); +} + +void raw_os_ostream::write_impl(const char *Ptr, unsigned Size) { + OS.write(Ptr, Size); +} + +uint64_t raw_os_ostream::current_pos() { return OS.tellp(); } + +uint64_t raw_os_ostream::tell() { + return (uint64_t)OS.tellp() + GetNumBytesInBuffer(); +} + +//===----------------------------------------------------------------------===// +// raw_string_ostream +//===----------------------------------------------------------------------===// + +raw_string_ostream::~raw_string_ostream() { + flush(); +} + +void raw_string_ostream::write_impl(const char *Ptr, unsigned Size) { + OS.append(Ptr, Size); +} + +//===----------------------------------------------------------------------===// +// raw_svector_ostream +//===----------------------------------------------------------------------===// + +raw_svector_ostream::~raw_svector_ostream() { + flush(); +} + +void raw_svector_ostream::write_impl(const char *Ptr, unsigned Size) { + OS.append(Ptr, Ptr + Size); +} + +uint64_t raw_svector_ostream::current_pos() { return OS.size(); } + +uint64_t raw_svector_ostream::tell() { + return OS.size() + GetNumBytesInBuffer(); +} diff --git a/lib/System/Alarm.cpp b/lib/System/Alarm.cpp new file mode 100644 index 000000000000..0014ca716b33 --- /dev/null +++ b/lib/System/Alarm.cpp @@ -0,0 +1,33 @@ +//===- Alarm.cpp - Alarm Generation Support ---------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Alarm functionality +// +//===----------------------------------------------------------------------===// + +#include "llvm/System/Alarm.h" +#include "llvm/Config/config.h" + +namespace llvm { +using namespace sys; + +//===----------------------------------------------------------------------===// +//=== WARNING: Implementation here must contain only TRULY operating system +//=== independent code. +//===----------------------------------------------------------------------===// + +} + +// Include the platform-specific parts of this class. +#ifdef LLVM_ON_UNIX +#include "Unix/Alarm.inc" +#endif +#ifdef LLVM_ON_WIN32 +#include "Win32/Alarm.inc" +#endif diff --git a/lib/System/Atomic.cpp b/lib/System/Atomic.cpp new file mode 100644 index 000000000000..cefd0bbf5538 --- /dev/null +++ b/lib/System/Atomic.cpp @@ -0,0 +1,53 @@ +//===-- Atomic.cpp - Atomic Operations --------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This header file implements atomic operations. +// +//===----------------------------------------------------------------------===// + +#include "llvm/System/Atomic.h" +#include "llvm/Config/config.h" + +using namespace llvm; + +#if defined(_MSC_VER) +#include +#undef MemoryFence +#endif + +void sys::MemoryFence() { +#if LLVM_MULTITHREADED==0 + return; +#else +# if defined(__GNUC__) + __sync_synchronize(); +# elif defined(_MSC_VER) + MemoryBarrier(); +# else +# error No memory fence implementation for your platform! +# endif +#endif +} + +sys::cas_flag sys::CompareAndSwap(volatile sys::cas_flag* ptr, + sys::cas_flag new_value, + sys::cas_flag old_value) { +#if LLVM_MULTITHREADED==0 + sys::cas_flag result = *ptr; + if (result == old_value) + *ptr = new_value; + return result; +#elif defined(__GNUC__) + return __sync_val_compare_and_swap(ptr, old_value, new_value); +#elif defined(_MSC_VER) + return InterlockedCompareExchange(ptr, new_value, old_value); +#else +# error No compare-and-swap implementation for your platform! +#endif +} \ No newline at end of file diff --git a/lib/System/CMakeLists.txt b/lib/System/CMakeLists.txt new file mode 100644 index 000000000000..5415dd6e300c --- /dev/null +++ b/lib/System/CMakeLists.txt @@ -0,0 +1,19 @@ +add_llvm_library(LLVMSystem + Alarm.cpp + Atomic.cpp + Disassembler.cpp + DynamicLibrary.cpp + Host.cpp + IncludeFile.cpp + Memory.cpp + Mutex.cpp + Path.cpp + Process.cpp + Program.cpp + Signals.cpp + TimeValue.cpp + ) + +if( BUILD_SHARED_LIBS AND NOT WIN32 ) + target_link_libraries(LLVMSystem dl) +endif() diff --git a/lib/System/Disassembler.cpp b/lib/System/Disassembler.cpp new file mode 100644 index 000000000000..378fe262bc10 --- /dev/null +++ b/lib/System/Disassembler.cpp @@ -0,0 +1,79 @@ +//===- lib/System/Disassembler.cpp ------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the necessary glue to call external disassembler +// libraries. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Config/config.h" +#include "llvm/System/Disassembler.h" + +#include +#include +#include +#include + +#if USE_UDIS86 +#include +#endif + +using namespace llvm; + +bool llvm::sys::hasDisassembler(void) +{ +#if defined (__i386__) || defined (__amd64__) || defined (__x86_64__) + // We have option to enable udis86 library. +# if USE_UDIS86 + return true; +#else + return false; +#endif +#else + return false; +#endif +} + +std::string llvm::sys::disassembleBuffer(uint8_t* start, size_t length, + uint64_t pc) { + std::stringstream res; + +#if defined (__i386__) || defined (__amd64__) || defined (__x86_64__) + unsigned bits; +# if defined(__i386__) + bits = 32; +# else + bits = 64; +# endif + +# if USE_UDIS86 + ud_t ud_obj; + + ud_init(&ud_obj); + ud_set_input_buffer(&ud_obj, start, length); + ud_set_mode(&ud_obj, bits); + ud_set_pc(&ud_obj, pc); + ud_set_syntax(&ud_obj, UD_SYN_ATT); + + res << std::setbase(16) + << std::setw(bits/4); + + while (ud_disassemble(&ud_obj)) { + res << ud_insn_off(&ud_obj) << ":\t" << ud_insn_asm(&ud_obj) << "\n"; + } +# else + res << "No disassembler available. See configure help for options.\n"; +# endif + +#else + res << "No disassembler available. See configure help for options.\n"; +#endif + + return res.str(); +} diff --git a/lib/System/DynamicLibrary.cpp b/lib/System/DynamicLibrary.cpp new file mode 100644 index 000000000000..3bf172c22b3f --- /dev/null +++ b/lib/System/DynamicLibrary.cpp @@ -0,0 +1,165 @@ +//===-- DynamicLibrary.cpp - Runtime link/load libraries --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This header file implements the operating system DynamicLibrary concept. +// +//===----------------------------------------------------------------------===// + +#include "llvm/System/DynamicLibrary.h" +#include "llvm/Config/config.h" +#include +#include +#include + +// Collection of symbol name/value pairs to be searched prior to any libraries. +std::map &g_symbols() { + static std::map symbols; + return symbols; +} + +void llvm::sys::DynamicLibrary::AddSymbol(const char* symbolName, + void *symbolValue) { + g_symbols()[symbolName] = symbolValue; +} + +// It is not possible to use ltdl.c on VC++ builds as the terms of its LGPL +// license and special exception would cause all of LLVM to be placed under +// the LGPL. This is because the exception applies only when libtool is +// used, and obviously libtool is not used with Visual Studio. An entirely +// separate implementation is provided in win32/DynamicLibrary.cpp. + +#ifdef LLVM_ON_WIN32 + +#include "Win32/DynamicLibrary.inc" + +#else + +//#include "ltdl.h" +#include +#include +using namespace llvm; +using namespace llvm::sys; + +//===----------------------------------------------------------------------===// +//=== WARNING: Implementation here must contain only TRULY operating system +//=== independent code. +//===----------------------------------------------------------------------===// + +//static std::vector OpenedHandles; +static std::vector OpenedHandles; + +DynamicLibrary::DynamicLibrary() {} + +DynamicLibrary::~DynamicLibrary() { + while(!OpenedHandles.empty()) { + void *H = OpenedHandles.back(); OpenedHandles.pop_back(); + dlclose(H); + } +} + +bool DynamicLibrary::LoadLibraryPermanently(const char *Filename, + std::string *ErrMsg) { + void *H = dlopen(Filename, RTLD_LAZY|RTLD_GLOBAL); + if (H == 0) { + if (ErrMsg) + *ErrMsg = dlerror(); + return true; + } + OpenedHandles.push_back(H); + return false; +} + +void* DynamicLibrary::SearchForAddressOfSymbol(const char* symbolName) { + // check_ltdl_initialization(); + + // First check symbols added via AddSymbol(). + std::map::iterator I = g_symbols().find(symbolName); + if (I != g_symbols().end()) + return I->second; + + // Now search the libraries. + for (std::vector::iterator I = OpenedHandles.begin(), + E = OpenedHandles.end(); I != E; ++I) { + //lt_ptr ptr = lt_dlsym(*I, symbolName); + void *ptr = dlsym(*I, symbolName); + if (ptr) + return ptr; + } + +#define EXPLICIT_SYMBOL(SYM) \ + extern void *SYM; if (!strcmp(symbolName, #SYM)) return &SYM + + // If this is darwin, it has some funky issues, try to solve them here. Some + // important symbols are marked 'private external' which doesn't allow + // SearchForAddressOfSymbol to find them. As such, we special case them here, + // there is only a small handful of them. + +#ifdef __APPLE__ + { + EXPLICIT_SYMBOL(__ashldi3); + EXPLICIT_SYMBOL(__ashrdi3); + EXPLICIT_SYMBOL(__cmpdi2); + EXPLICIT_SYMBOL(__divdi3); + EXPLICIT_SYMBOL(__eprintf); + EXPLICIT_SYMBOL(__fixdfdi); + EXPLICIT_SYMBOL(__fixsfdi); + EXPLICIT_SYMBOL(__fixunsdfdi); + EXPLICIT_SYMBOL(__fixunssfdi); + EXPLICIT_SYMBOL(__floatdidf); + EXPLICIT_SYMBOL(__floatdisf); + EXPLICIT_SYMBOL(__lshrdi3); + EXPLICIT_SYMBOL(__moddi3); + EXPLICIT_SYMBOL(__udivdi3); + EXPLICIT_SYMBOL(__umoddi3); + } +#endif + +#ifdef __CYGWIN__ + { + EXPLICIT_SYMBOL(_alloca); + EXPLICIT_SYMBOL(__main); + } +#endif + +#undef EXPLICIT_SYMBOL + +// This macro returns the address of a well-known, explicit symbol +#define EXPLICIT_SYMBOL(SYM) \ + if (!strcmp(symbolName, #SYM)) return &SYM + +// On linux we have a weird situation. The stderr/out/in symbols are both +// macros and global variables because of standards requirements. So, we +// boldly use the EXPLICIT_SYMBOL macro without checking for a #define first. +#if defined(__linux__) + { + EXPLICIT_SYMBOL(stderr); + EXPLICIT_SYMBOL(stdout); + EXPLICIT_SYMBOL(stdin); + } +#else + // For everything else, we want to check to make sure the symbol isn't defined + // as a macro before using EXPLICIT_SYMBOL. + { +#ifndef stdin + EXPLICIT_SYMBOL(stdin); +#endif +#ifndef stdout + EXPLICIT_SYMBOL(stdout); +#endif +#ifndef stderr + EXPLICIT_SYMBOL(stderr); +#endif + } +#endif +#undef EXPLICIT_SYMBOL + + return 0; +} + +#endif // LLVM_ON_WIN32 diff --git a/lib/System/Host.cpp b/lib/System/Host.cpp new file mode 100644 index 000000000000..fd2d9527db62 --- /dev/null +++ b/lib/System/Host.cpp @@ -0,0 +1,24 @@ +//===-- Host.cpp - Implement OS Host Concept --------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This header file implements the operating system Host concept. +// +//===----------------------------------------------------------------------===// + +#include "llvm/System/Host.h" +#include "llvm/Config/config.h" + +// Include the platform-specific parts of this class. +#ifdef LLVM_ON_UNIX +#include "Unix/Host.inc" +#endif +#ifdef LLVM_ON_WIN32 +#include "Win32/Host.inc" +#endif + diff --git a/lib/System/IncludeFile.cpp b/lib/System/IncludeFile.cpp new file mode 100644 index 000000000000..8258d40326f9 --- /dev/null +++ b/lib/System/IncludeFile.cpp @@ -0,0 +1,20 @@ +//===- lib/System/IncludeFile.cpp - Ensure Linking Of Implementation -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the IncludeFile constructor. +// +//===----------------------------------------------------------------------===// + +#include "llvm/System/IncludeFile.h" + +using namespace llvm; + +// This constructor is used to ensure linking of other modules. See the +// llvm/System/IncludeFile.h header for details. +IncludeFile::IncludeFile(const void*) {} diff --git a/lib/System/LICENSE.TXT b/lib/System/LICENSE.TXT new file mode 100644 index 000000000000..f569da205289 --- /dev/null +++ b/lib/System/LICENSE.TXT @@ -0,0 +1,6 @@ +LLVM System Interface Library +------------------------------------------------------------------------------- +The LLVM System Interface Library is licensed under the Illinois Open Source +License and has the following additional copyright: + +Copyright (C) 2004 eXtensible Systems, Inc. diff --git a/lib/System/Makefile b/lib/System/Makefile new file mode 100644 index 000000000000..49704c3c625a --- /dev/null +++ b/lib/System/Makefile @@ -0,0 +1,19 @@ +##===- lib/System/Makefile ---------------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../.. +LIBRARYNAME = LLVMSystem +BUILD_ARCHIVE = 1 + +EXTRA_DIST = Unix Win32 README.txt + +include $(LEVEL)/Makefile.common + +CompileCommonOpts := $(filter-out -pedantic,$(CompileCommonOpts)) +CompileCommonOpts := $(filter-out -Wno-long-long,$(CompileCommonOpts)) diff --git a/lib/System/Memory.cpp b/lib/System/Memory.cpp new file mode 100644 index 000000000000..375c73cf0204 --- /dev/null +++ b/lib/System/Memory.cpp @@ -0,0 +1,62 @@ +//===- Memory.cpp - Memory Handling Support ---------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines some helpful functions for allocating memory and dealing +// with memory mapped files +// +//===----------------------------------------------------------------------===// + +#include "llvm/System/Memory.h" +#include "llvm/Config/config.h" + +namespace llvm { +using namespace sys; +} + +// Include the platform-specific parts of this class. +#ifdef LLVM_ON_UNIX +#include "Unix/Memory.inc" +#endif +#ifdef LLVM_ON_WIN32 +#include "Win32/Memory.inc" +#endif + +extern "C" void sys_icache_invalidate(const void *Addr, size_t len); + +/// InvalidateInstructionCache - Before the JIT can run a block of code +/// that has been emitted it must invalidate the instruction cache on some +/// platforms. +void llvm::sys::Memory::InvalidateInstructionCache(const void *Addr, + size_t Len) { + +// icache invalidation for PPC and ARM. +#if defined(__APPLE__) +#if (defined(__POWERPC__) || defined (__ppc__) || \ + defined(_POWER) || defined(_ARCH_PPC)) || defined(__arm__) + sys_icache_invalidate(Addr, Len); +#endif +#else +#if (defined(__POWERPC__) || defined (__ppc__) || \ + defined(_POWER) || defined(_ARCH_PPC)) && defined(__GNUC__) + const size_t LineSize = 32; + + const intptr_t Mask = ~(LineSize - 1); + const intptr_t StartLine = ((intptr_t) Addr) & Mask; + const intptr_t EndLine = ((intptr_t) Addr + Len + LineSize - 1) & Mask; + + for (intptr_t Line = StartLine; Line < EndLine; Line += LineSize) + asm volatile("dcbf 0, %0" : : "r"(Line)); + asm volatile("sync"); + + for (intptr_t Line = StartLine; Line < EndLine; Line += LineSize) + asm volatile("icbi 0, %0" : : "r"(Line)); + asm volatile("isync"); +#endif +#endif // end apple +} diff --git a/lib/System/Mutex.cpp b/lib/System/Mutex.cpp new file mode 100644 index 000000000000..d95c25b39eef --- /dev/null +++ b/lib/System/Mutex.cpp @@ -0,0 +1,160 @@ +//===- Mutex.cpp - Mutual Exclusion Lock ------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the llvm::sys::Mutex class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Config/config.h" +#include "llvm/System/Mutex.h" + +//===----------------------------------------------------------------------===// +//=== WARNING: Implementation here must contain only TRULY operating system +//=== independent code. +//===----------------------------------------------------------------------===// + +#if !defined(ENABLE_THREADS) || ENABLE_THREADS == 0 +// Define all methods as no-ops if threading is explicitly disabled +namespace llvm { +using namespace sys; +Mutex::Mutex( bool recursive) { } +Mutex::~Mutex() { } +bool Mutex::acquire() { return true; } +bool Mutex::release() { return true; } +bool Mutex::tryacquire() { return true; } +} +#else + +#if defined(HAVE_PTHREAD_H) && defined(HAVE_PTHREAD_MUTEX_LOCK) + +#include +#include +#include + +namespace llvm { +using namespace sys; + + +// This variable is useful for situations where the pthread library has been +// compiled with weak linkage for its interface symbols. This allows the +// threading support to be turned off by simply not linking against -lpthread. +// In that situation, the value of pthread_mutex_init will be 0 and +// consequently pthread_enabled will be false. In such situations, all the +// pthread operations become no-ops and the functions all return false. If +// pthread_mutex_init does have an address, then mutex support is enabled. +// Note: all LLVM tools will link against -lpthread if its available since it +// is configured into the LIBS variable. +// Note: this line of code generates a warning if pthread_mutex_init is not +// declared with weak linkage. It's safe to ignore the warning. +static const bool pthread_enabled = true; + +// Construct a Mutex using pthread calls +Mutex::Mutex( bool recursive) + : data_(0) +{ + if (pthread_enabled) + { + // Declare the pthread_mutex data structures + pthread_mutex_t* mutex = + static_cast(malloc(sizeof(pthread_mutex_t))); + pthread_mutexattr_t attr; + + // Initialize the mutex attributes + int errorcode = pthread_mutexattr_init(&attr); + assert(errorcode == 0); + + // Initialize the mutex as a recursive mutex, if requested, or normal + // otherwise. + int kind = ( recursive ? PTHREAD_MUTEX_RECURSIVE : PTHREAD_MUTEX_NORMAL ); + errorcode = pthread_mutexattr_settype(&attr, kind); + assert(errorcode == 0); + +#if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__NetBSD__) && !defined(__DragonFly__) + // Make it a process local mutex + errorcode = pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_PRIVATE); +#endif + + // Initialize the mutex + errorcode = pthread_mutex_init(mutex, &attr); + assert(errorcode == 0); + + // Destroy the attributes + errorcode = pthread_mutexattr_destroy(&attr); + assert(errorcode == 0); + + // Assign the data member + data_ = mutex; + } +} + +// Destruct a Mutex +Mutex::~Mutex() +{ + if (pthread_enabled) + { + pthread_mutex_t* mutex = static_cast(data_); + assert(mutex != 0); + pthread_mutex_destroy(mutex); + free(mutex); + } +} + +bool +Mutex::acquire() +{ + if (pthread_enabled) + { + pthread_mutex_t* mutex = static_cast(data_); + assert(mutex != 0); + + int errorcode = pthread_mutex_lock(mutex); + return errorcode == 0; + } + return false; +} + +bool +Mutex::release() +{ + if (pthread_enabled) + { + pthread_mutex_t* mutex = static_cast(data_); + assert(mutex != 0); + + int errorcode = pthread_mutex_unlock(mutex); + return errorcode == 0; + } + return false; +} + +bool +Mutex::tryacquire() +{ + if (pthread_enabled) + { + pthread_mutex_t* mutex = static_cast(data_); + assert(mutex != 0); + + int errorcode = pthread_mutex_trylock(mutex); + return errorcode == 0; + } + return false; +} + +} + +#elif defined(LLVM_ON_UNIX) +#include "Unix/Mutex.inc" +#elif defined( LLVM_ON_WIN32) +#include "Win32/Mutex.inc" +#else +#warning Neither LLVM_ON_UNIX nor LLVM_ON_WIN32 was set in System/Mutex.cpp +#endif +#endif + diff --git a/lib/System/Path.cpp b/lib/System/Path.cpp new file mode 100644 index 000000000000..72bd7ad6f046 --- /dev/null +++ b/lib/System/Path.cpp @@ -0,0 +1,287 @@ +//===-- Path.cpp - Implement OS Path Concept --------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This header file implements the operating system Path concept. +// +//===----------------------------------------------------------------------===// + +#include "llvm/System/Path.h" +#include "llvm/Config/config.h" +#include +#include +#include +using namespace llvm; +using namespace sys; + +//===----------------------------------------------------------------------===// +//=== WARNING: Implementation here must contain only TRULY operating system +//=== independent code. +//===----------------------------------------------------------------------===// + +bool Path::operator==(const Path &that) const { + return path == that.path; +} + +bool Path::operator!=(const Path &that) const { + return path != that.path; +} + +bool Path::operator<(const Path& that) const { + return path < that.path; +} + +std::ostream& llvm::operator<<(std::ostream &strm, const sys::Path &aPath) { + strm << aPath.toString(); + return strm; +} + +Path +Path::GetLLVMConfigDir() { + Path result; +#ifdef LLVM_ETCDIR + if (result.set(LLVM_ETCDIR)) + return result; +#endif + return GetLLVMDefaultConfigDir(); +} + +LLVMFileType +sys::IdentifyFileType(const char *magic, unsigned length) { + assert(magic && "Invalid magic number string"); + assert(length >=4 && "Invalid magic number length"); + switch ((unsigned char)magic[0]) { + case 0xDE: // 0x0B17C0DE = BC wraper + if (magic[1] == (char)0xC0 && magic[2] == (char)0x17 && + magic[3] == (char)0x0B) + return Bitcode_FileType; + break; + case 'B': + if (magic[1] == 'C' && magic[2] == (char)0xC0 && magic[3] == (char)0xDE) + return Bitcode_FileType; + break; + case '!': + if (length >= 8) + if (memcmp(magic,"!\n",8) == 0) + return Archive_FileType; + break; + + case '\177': + if (magic[1] == 'E' && magic[2] == 'L' && magic[3] == 'F') { + if (length >= 18 && magic[17] == 0) + switch (magic[16]) { + default: break; + case 1: return ELF_Relocatable_FileType; + case 2: return ELF_Executable_FileType; + case 3: return ELF_SharedObject_FileType; + case 4: return ELF_Core_FileType; + } + } + break; + + case 0xCA: + if (magic[1] == char(0xFE) && magic[2] == char(0xBA) && + magic[3] == char(0xBE)) { + // This is complicated by an overlap with Java class files. + // See the Mach-O section in /usr/share/file/magic for details. + if (length >= 8 && magic[7] < 43) + // FIXME: Universal Binary of any type. + return Mach_O_DynamicallyLinkedSharedLib_FileType; + } + break; + + case 0xFE: + case 0xCE: { + uint16_t type = 0; + if (magic[0] == char(0xFE) && magic[1] == char(0xED) && + magic[2] == char(0xFA) && magic[3] == char(0xCE)) { + /* Native endian */ + if (length >= 16) type = magic[14] << 8 | magic[15]; + } else if (magic[0] == char(0xCE) && magic[1] == char(0xFA) && + magic[2] == char(0xED) && magic[3] == char(0xFE)) { + /* Reverse endian */ + if (length >= 14) type = magic[13] << 8 | magic[12]; + } + switch (type) { + default: break; + case 1: return Mach_O_Object_FileType; + case 2: return Mach_O_Executable_FileType; + case 3: return Mach_O_FixedVirtualMemorySharedLib_FileType; + case 4: return Mach_O_Core_FileType; + case 5: return Mach_O_PreloadExectuable_FileType; + case 6: return Mach_O_DynamicallyLinkedSharedLib_FileType; + case 7: return Mach_O_DynamicLinker_FileType; + case 8: return Mach_O_Bundle_FileType; + case 9: return Mach_O_DynamicallyLinkedSharedLibStub_FileType; + case 10: break; // FIXME: MH_DSYM companion file with only debug. + } + break; + } + case 0xF0: // PowerPC Windows + case 0x83: // Alpha 32-bit + case 0x84: // Alpha 64-bit + case 0x66: // MPS R4000 Windows + case 0x50: // mc68K + case 0x4c: // 80386 Windows + if (magic[1] == 0x01) + return COFF_FileType; + + case 0x90: // PA-RISC Windows + case 0x68: // mc68K Windows + if (magic[1] == 0x02) + return COFF_FileType; + break; + + default: + break; + } + return Unknown_FileType; +} + +bool +Path::isArchive() const { + if (canRead()) + return hasMagicNumber("!\012"); + return false; +} + +bool +Path::isDynamicLibrary() const { + if (canRead()) { + std::string Magic; + if (getMagicNumber(Magic, 64)) + switch (IdentifyFileType(Magic.c_str(), + static_cast(Magic.length()))) { + default: return false; + case Mach_O_FixedVirtualMemorySharedLib_FileType: + case Mach_O_DynamicallyLinkedSharedLib_FileType: + case Mach_O_DynamicallyLinkedSharedLibStub_FileType: + case ELF_SharedObject_FileType: + case COFF_FileType: return true; + } + } + return false; +} + +Path +Path::FindLibrary(std::string& name) { + std::vector LibPaths; + GetSystemLibraryPaths(LibPaths); + for (unsigned i = 0; i < LibPaths.size(); ++i) { + sys::Path FullPath(LibPaths[i]); + FullPath.appendComponent("lib" + name + LTDL_SHLIB_EXT); + if (FullPath.isDynamicLibrary()) + return FullPath; + FullPath.eraseSuffix(); + FullPath.appendSuffix("a"); + if (FullPath.isArchive()) + return FullPath; + } + return sys::Path(); +} + +std::string Path::GetDLLSuffix() { + return LTDL_SHLIB_EXT; +} + +bool +Path::isBitcodeFile() const { + std::string actualMagic; + if (!getMagicNumber(actualMagic, 4)) + return false; + LLVMFileType FT = + IdentifyFileType(actualMagic.c_str(), + static_cast(actualMagic.length())); + return FT == Bitcode_FileType; +} + +bool Path::hasMagicNumber(const std::string &Magic) const { + std::string actualMagic; + if (getMagicNumber(actualMagic, static_cast(Magic.size()))) + return Magic == actualMagic; + return false; +} + +void Path::makeAbsolute() { + if (isAbsolute()) + return; + + Path CWD = Path::GetCurrentDirectory(); + assert(CWD.isAbsolute() && "GetCurrentDirectory returned relative path!"); + + CWD.appendComponent(path); + + path = CWD.toString(); +} + +static void getPathList(const char*path, std::vector& Paths) { + const char* at = path; + const char* delim = strchr(at, PathSeparator); + Path tmpPath; + while (delim != 0) { + std::string tmp(at, size_t(delim-at)); + if (tmpPath.set(tmp)) + if (tmpPath.canRead()) + Paths.push_back(tmpPath); + at = delim + 1; + delim = strchr(at, PathSeparator); + } + + if (*at != 0) + if (tmpPath.set(std::string(at))) + if (tmpPath.canRead()) + Paths.push_back(tmpPath); +} + +static std::string getDirnameCharSep(const std::string& path, char Sep) { + + if (path.empty()) + return "."; + + // If the path is all slashes, return a single slash. + // Otherwise, remove all trailing slashes. + + signed pos = static_cast(path.size()) - 1; + + while (pos >= 0 && path[pos] == Sep) + --pos; + + if (pos < 0) + return path[0] == Sep ? std::string(1, Sep) : std::string("."); + + // Any slashes left? + signed i = 0; + + while (i < pos && path[i] != Sep) + ++i; + + if (i == pos) // No slashes? Return "." + return "."; + + // There is at least one slash left. Remove all trailing non-slashes. + while (pos >= 0 && path[pos] != Sep) + --pos; + + // Remove any trailing slashes. + while (pos >= 0 && path[pos] == Sep) + --pos; + + if (pos < 0) + return path[0] == Sep ? std::string(1, Sep) : std::string("."); + + return path.substr(0, pos+1); +} + +// Include the truly platform-specific parts of this class. +#if defined(LLVM_ON_UNIX) +#include "Unix/Path.inc" +#endif +#if defined(LLVM_ON_WIN32) +#include "Win32/Path.inc" +#endif + diff --git a/lib/System/Process.cpp b/lib/System/Process.cpp new file mode 100644 index 000000000000..e93b2af4c12b --- /dev/null +++ b/lib/System/Process.cpp @@ -0,0 +1,33 @@ +//===-- Process.cpp - Implement OS Process Concept --------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This header file implements the operating system Process concept. +// +//===----------------------------------------------------------------------===// + +#include "llvm/System/Process.h" +#include "llvm/Config/config.h" + +namespace llvm { +using namespace sys; + +//===----------------------------------------------------------------------===// +//=== WARNING: Implementation here must contain only TRULY operating system +//=== independent code. +//===----------------------------------------------------------------------===// + +} + +// Include the platform-specific parts of this class. +#ifdef LLVM_ON_UNIX +#include "Unix/Process.inc" +#endif +#ifdef LLVM_ON_WIN32 +#include "Win32/Process.inc" +#endif diff --git a/lib/System/Program.cpp b/lib/System/Program.cpp new file mode 100644 index 000000000000..eb289d81b2e1 --- /dev/null +++ b/lib/System/Program.cpp @@ -0,0 +1,33 @@ +//===-- Program.cpp - Implement OS Program Concept --------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This header file implements the operating system Program concept. +// +//===----------------------------------------------------------------------===// + +#include "llvm/System/Program.h" +#include "llvm/Config/config.h" + +namespace llvm { +using namespace sys; + +//===----------------------------------------------------------------------===// +//=== WARNING: Implementation here must contain only TRULY operating system +//=== independent code. +//===----------------------------------------------------------------------===// + +} + +// Include the platform-specific parts of this class. +#ifdef LLVM_ON_UNIX +#include "Unix/Program.inc" +#endif +#ifdef LLVM_ON_WIN32 +#include "Win32/Program.inc" +#endif diff --git a/lib/System/README.txt b/lib/System/README.txt new file mode 100644 index 000000000000..eacb20094a61 --- /dev/null +++ b/lib/System/README.txt @@ -0,0 +1,43 @@ +Design Of lib/System +==================== + +The software in this directory is designed to completely shield LLVM from any +and all operating system specific functionality. It is not intended to be a +complete operating system wrapper (such as ACE), but only to provide the +functionality necessary to support LLVM. + +The software located here, of necessity, has very specific and stringent design +rules. Violation of these rules means that cracks in the shield could form and +the primary goal of the library is defeated. By consistently using this library, +LLVM becomes more easily ported to new platforms since the only thing requiring +porting is this library. + +Complete documentation for the library can be found in the file: + llvm/docs/SystemLibrary.html +or at this URL: + http://llvm.org/docs/SystemLibrary.html + +While we recommend that you read the more detailed documentation, for the +impatient, here's a high level summary of the library's requirements. + + 1. No system header files are to be exposed through the interface. + 2. Std C++ and Std C header files are okay to be exposed through the interface. + 3. No exposed system-specific functions. + 4. No exposed system-specific data. + 5. Data in lib/System classes must use only simple C++ intrinsic types. + 6. Errors are handled by returning "true" and setting an optional std::string + 7. Library must not throw any exceptions, period. + 8. Interface functions must not have throw() specifications. + 9. No duplicate function impementations are permitted within an operating + system class. + +To accomplish these requirements, the library has numerous design criteria that +must be satisfied. Here's a high level summary of the library's design criteria: + + 1. No unused functionality (only what LLVM needs) + 2. High-Level Interfaces + 3. Use Opaque Classes + 4. Common Implementations + 5. Multiple Implementations + 6. Minimize Memory Allocation + 7. No Virtual Methods diff --git a/lib/System/Signals.cpp b/lib/System/Signals.cpp new file mode 100644 index 000000000000..d345b0a9aed4 --- /dev/null +++ b/lib/System/Signals.cpp @@ -0,0 +1,34 @@ +//===- Signals.cpp - Signal Handling support --------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines some helpful functions for dealing with the possibility of +// Unix signals occuring while your program is running. +// +//===----------------------------------------------------------------------===// + +#include "llvm/System/Signals.h" +#include "llvm/Config/config.h" + +namespace llvm { +using namespace sys; + +//===----------------------------------------------------------------------===// +//=== WARNING: Implementation here must contain only TRULY operating system +//=== independent code. +//===----------------------------------------------------------------------===// + +} + +// Include the platform-specific parts of this class. +#ifdef LLVM_ON_UNIX +#include "Unix/Signals.inc" +#endif +#ifdef LLVM_ON_WIN32 +#include "Win32/Signals.inc" +#endif diff --git a/lib/System/TimeValue.cpp b/lib/System/TimeValue.cpp new file mode 100644 index 000000000000..cf4984cc4d1b --- /dev/null +++ b/lib/System/TimeValue.cpp @@ -0,0 +1,58 @@ +//===-- TimeValue.cpp - Implement OS TimeValue Concept ----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the operating system TimeValue concept. +// +//===----------------------------------------------------------------------===// + +#include "llvm/System/TimeValue.h" +#include "llvm/Config/config.h" + +namespace llvm { +using namespace sys; + +const TimeValue TimeValue::MinTime = TimeValue ( INT64_MIN,0 ); +const TimeValue TimeValue::MaxTime = TimeValue ( INT64_MAX,0 ); +const TimeValue TimeValue::ZeroTime = TimeValue ( 0,0 ); +const TimeValue TimeValue::PosixZeroTime = TimeValue ( -946684800,0 ); +const TimeValue TimeValue::Win32ZeroTime = TimeValue ( -12591158400ULL,0 ); + +void +TimeValue::normalize( void ) { + if ( nanos_ >= NANOSECONDS_PER_SECOND ) { + do { + seconds_++; + nanos_ -= NANOSECONDS_PER_SECOND; + } while ( nanos_ >= NANOSECONDS_PER_SECOND ); + } else if (nanos_ <= -NANOSECONDS_PER_SECOND ) { + do { + seconds_--; + nanos_ += NANOSECONDS_PER_SECOND; + } while (nanos_ <= -NANOSECONDS_PER_SECOND); + } + + if (seconds_ >= 1 && nanos_ < 0) { + seconds_--; + nanos_ += NANOSECONDS_PER_SECOND; + } else if (seconds_ < 0 && nanos_ > 0) { + seconds_++; + nanos_ -= NANOSECONDS_PER_SECOND; + } +} + +} + +/// Include the platform specific portion of TimeValue class +#ifdef LLVM_ON_UNIX +#include "Unix/TimeValue.inc" +#endif +#ifdef LLVM_ON_WIN32 +#include "Win32/TimeValue.inc" +#endif + diff --git a/lib/System/Unix/Alarm.inc b/lib/System/Unix/Alarm.inc new file mode 100644 index 000000000000..28ff1b8a6368 --- /dev/null +++ b/lib/System/Unix/Alarm.inc @@ -0,0 +1,72 @@ +//===-- Alarm.inc - Implement Unix Alarm Support ----------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the UNIX Alarm support. +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +using namespace llvm; + +/// AlarmCancelled - This flag is set by the SIGINT signal handler if the +/// user presses CTRL-C. +static volatile bool AlarmCancelled = false; + +/// AlarmTriggered - This flag is set by the SIGALRM signal handler if the +/// alarm was triggered. +static volatile bool AlarmTriggered = false; + +/// NestedSOI - Sanity check. Alarms cannot be nested or run in parallel. +/// This ensures that they never do. +static bool NestedSOI = false; + +static RETSIGTYPE SigIntHandler(int Sig) { + AlarmCancelled = true; + signal(SIGINT, SigIntHandler); +} + +static RETSIGTYPE SigAlarmHandler(int Sig) { + AlarmTriggered = true; +} + +static void (*OldSigIntHandler) (int); + +void sys::SetupAlarm(unsigned seconds) { + assert(!NestedSOI && "sys::SetupAlarm calls cannot be nested!"); + NestedSOI = true; + AlarmCancelled = false; + AlarmTriggered = false; + ::signal(SIGALRM, SigAlarmHandler); + OldSigIntHandler = ::signal(SIGINT, SigIntHandler); + ::alarm(seconds); +} + +void sys::TerminateAlarm() { + assert(NestedSOI && "sys::TerminateAlarm called without sys::SetupAlarm!"); + ::alarm(0); + ::signal(SIGALRM, SIG_DFL); + ::signal(SIGINT, OldSigIntHandler); + AlarmCancelled = false; + AlarmTriggered = false; + NestedSOI = false; +} + +int sys::AlarmStatus() { + if (AlarmCancelled) + return -1; + if (AlarmTriggered) + return 1; + return 0; +} + +void Sleep(unsigned n) { + ::sleep(n); +} diff --git a/lib/System/Unix/Host.inc b/lib/System/Unix/Host.inc new file mode 100644 index 000000000000..fb319fd09e1c --- /dev/null +++ b/lib/System/Unix/Host.inc @@ -0,0 +1,58 @@ + //===- llvm/System/Unix/Host.inc -------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the UNIX Host support. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +//=== WARNING: Implementation here must contain only generic UNIX code that +//=== is guaranteed to work on *all* UNIX variants. +//===----------------------------------------------------------------------===// + +#include +#include "Unix.h" +#include +#include + +using namespace llvm; + +static std::string getOSVersion() { + struct utsname info; + + if (uname(&info)) + return ""; + + return info.release; +} + +std::string sys::getHostTriple() { + // FIXME: Derive more directly instead of relying on the autoconf + // generated variable. + + std::string Triple = LLVM_HOSTTRIPLE; + + // Force i86 to i386. + if (Triple[0] == 'i' && isdigit(Triple[1]) && + Triple[2] == '8' && Triple[3] == '6') + Triple[1] = '3'; + + // On darwin, we want to update the version to match that of the + // host. + std::string::size_type DarwinDashIdx = Triple.find("-darwin"); + if (DarwinDashIdx != std::string::npos) { + Triple.resize(DarwinDashIdx + strlen("-darwin")); + + // Only add the major part of the os version. + std::string Version = getOSVersion(); + Triple += Version.substr(0, Version.find('.')); + } + + return Triple; +} diff --git a/lib/System/Unix/Memory.inc b/lib/System/Unix/Memory.inc new file mode 100644 index 000000000000..b7a70135bcb7 --- /dev/null +++ b/lib/System/Unix/Memory.inc @@ -0,0 +1,150 @@ +//===- Unix/Memory.cpp - Generic UNIX System Configuration ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines some functions for various memory management utilities. +// +//===----------------------------------------------------------------------===// + +#include "Unix.h" +#include "llvm/System/Process.h" + +#ifdef HAVE_SYS_MMAN_H +#include +#endif + +#ifdef __APPLE__ +#include +#endif + +/// AllocateRWX - Allocate a slab of memory with read/write/execute +/// permissions. This is typically used for JIT applications where we want +/// to emit code to the memory then jump to it. Getting this type of memory +/// is very OS specific. +/// +llvm::sys::MemoryBlock +llvm::sys::Memory::AllocateRWX(unsigned NumBytes, const MemoryBlock* NearBlock, + std::string *ErrMsg) { + if (NumBytes == 0) return MemoryBlock(); + + unsigned pageSize = Process::GetPageSize(); + unsigned NumPages = (NumBytes+pageSize-1)/pageSize; + + int fd = -1; +#ifdef NEED_DEV_ZERO_FOR_MMAP + static int zero_fd = open("/dev/zero", O_RDWR); + if (zero_fd == -1) { + MakeErrMsg(ErrMsg, "Can't open /dev/zero device"); + return MemoryBlock(); + } + fd = zero_fd; +#endif + + int flags = MAP_PRIVATE | +#ifdef HAVE_MMAP_ANONYMOUS + MAP_ANONYMOUS +#else + MAP_ANON +#endif + ; + + void* start = NearBlock ? (unsigned char*)NearBlock->base() + + NearBlock->size() : 0; + +#if defined(__APPLE__) && defined(__arm__) + void *pa = ::mmap(start, pageSize*NumPages, PROT_READ|PROT_EXEC, + flags, fd, 0); +#else + void *pa = ::mmap(start, pageSize*NumPages, PROT_READ|PROT_WRITE|PROT_EXEC, + flags, fd, 0); +#endif + if (pa == MAP_FAILED) { + if (NearBlock) //Try again without a near hint + return AllocateRWX(NumBytes, 0); + + MakeErrMsg(ErrMsg, "Can't allocate RWX Memory"); + return MemoryBlock(); + } + +#if defined(__APPLE__) && defined(__arm__) + kern_return_t kr = vm_protect(mach_task_self(), (vm_address_t)pa, + (vm_size_t)(pageSize*NumPages), 0, + VM_PROT_READ | VM_PROT_EXECUTE | VM_PROT_COPY); + if (KERN_SUCCESS != kr) { + MakeErrMsg(ErrMsg, "vm_protect max RX failed"); + return sys::MemoryBlock(); + } + + kr = vm_protect(mach_task_self(), (vm_address_t)pa, + (vm_size_t)(pageSize*NumPages), 0, + VM_PROT_READ | VM_PROT_WRITE); + if (KERN_SUCCESS != kr) { + MakeErrMsg(ErrMsg, "vm_protect RW failed"); + return sys::MemoryBlock(); + } +#endif + + MemoryBlock result; + result.Address = pa; + result.Size = NumPages*pageSize; + + return result; +} + +bool llvm::sys::Memory::ReleaseRWX(MemoryBlock &M, std::string *ErrMsg) { + if (M.Address == 0 || M.Size == 0) return false; + if (0 != ::munmap(M.Address, M.Size)) + return MakeErrMsg(ErrMsg, "Can't release RWX Memory"); + return false; +} + +bool llvm::sys::Memory::setWritable (MemoryBlock &M, std::string *ErrMsg) { +#if defined(__APPLE__) && defined(__arm__) + if (M.Address == 0 || M.Size == 0) return false; + sys::Memory::InvalidateInstructionCache(M.Address, M.Size); + kern_return_t kr = vm_protect(mach_task_self(), (vm_address_t)M.Address, + (vm_size_t)M.Size, 0, VM_PROT_READ | VM_PROT_WRITE); + return KERN_SUCCESS == kr; +#else + return true; +#endif +} + +bool llvm::sys::Memory::setExecutable (MemoryBlock &M, std::string *ErrMsg) { +#if defined(__APPLE__) && defined(__arm__) + if (M.Address == 0 || M.Size == 0) return false; + sys::Memory::InvalidateInstructionCache(M.Address, M.Size); + kern_return_t kr = vm_protect(mach_task_self(), (vm_address_t)M.Address, + (vm_size_t)M.Size, 0, VM_PROT_READ | VM_PROT_EXECUTE | VM_PROT_COPY); + return KERN_SUCCESS == kr; +#else + return false; +#endif +} + +bool llvm::sys::Memory::setRangeWritable(const void *Addr, size_t Size) { +#if defined(__APPLE__) && defined(__arm__) + kern_return_t kr = vm_protect(mach_task_self(), (vm_address_t)Addr, + (vm_size_t)Size, 0, + VM_PROT_READ | VM_PROT_WRITE); + return KERN_SUCCESS == kr; +#else + return true; +#endif +} + +bool llvm::sys::Memory::setRangeExecutable(const void *Addr, size_t Size) { +#if defined(__APPLE__) && defined(__arm__) + kern_return_t kr = vm_protect(mach_task_self(), (vm_address_t)Addr, + (vm_size_t)Size, 0, + VM_PROT_READ | VM_PROT_EXECUTE | VM_PROT_COPY); + return KERN_SUCCESS == kr; +#else + return true; +#endif +} diff --git a/lib/System/Unix/Mutex.inc b/lib/System/Unix/Mutex.inc new file mode 100644 index 000000000000..4a015a676fc7 --- /dev/null +++ b/lib/System/Unix/Mutex.inc @@ -0,0 +1,49 @@ +//===- llvm/System/Unix/Mutex.inc - Unix Mutex Implementation ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Unix specific (non-pthread) Mutex class. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +//=== WARNING: Implementation here must contain only generic UNIX code that +//=== is guaranteed to work on *all* UNIX variants. +//===----------------------------------------------------------------------===// + +namespace llvm +{ +using namespace sys; + +Mutex::Mutex( bool recursive) +{ +} + +Mutex::~Mutex() +{ +} + +bool +Mutex::acquire() +{ + return true; +} + +bool +Mutex::release() +{ + return true; +} + +bool +Mutex::tryacquire( void ) +{ + return true; +} + +} diff --git a/lib/System/Unix/Path.inc b/lib/System/Unix/Path.inc new file mode 100644 index 000000000000..d5edee1b03b3 --- /dev/null +++ b/lib/System/Unix/Path.inc @@ -0,0 +1,876 @@ +//===- llvm/System/Unix/Path.cpp - Unix Path Implementation -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Unix specific portion of the Path class. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +//=== WARNING: Implementation here must contain only generic UNIX code that +//=== is guaranteed to work on *all* UNIX variants. +//===----------------------------------------------------------------------===// + +#include "llvm/Config/alloca.h" +#include "Unix.h" +#if HAVE_SYS_STAT_H +#include +#endif +#if HAVE_FCNTL_H +#include +#endif +#ifdef HAVE_SYS_MMAN_H +#include +#endif +#ifdef HAVE_SYS_STAT_H +#include +#endif +#if HAVE_UTIME_H +#include +#endif +#if HAVE_TIME_H +#include +#endif +#if HAVE_DIRENT_H +# include +# define NAMLEN(dirent) strlen((dirent)->d_name) +#else +# define dirent direct +# define NAMLEN(dirent) (dirent)->d_namlen +# if HAVE_SYS_NDIR_H +# include +# endif +# if HAVE_SYS_DIR_H +# include +# endif +# if HAVE_NDIR_H +# include +# endif +#endif + +#if HAVE_DLFCN_H +#include +#endif + +// Put in a hack for Cygwin which falsely reports that the mkdtemp function +// is available when it is not. +#ifdef __CYGWIN__ +# undef HAVE_MKDTEMP +#endif + +namespace { +inline bool lastIsSlash(const std::string& path) { + return !path.empty() && path[path.length() - 1] == '/'; +} + +} + +namespace llvm { +using namespace sys; + +extern const char sys::PathSeparator = ':'; + +Path::Path(const std::string& p) + : path(p) {} + +Path::Path(const char *StrStart, unsigned StrLen) + : path(StrStart, StrLen) {} + +Path& +Path::operator=(const std::string &that) { + path = that; + return *this; +} + +bool +Path::isValid() const { + // Check some obvious things + if (path.empty()) + return false; + else if (path.length() >= MAXPATHLEN) + return false; + + // Check that the characters are ascii chars + size_t len = path.length(); + unsigned i = 0; + while (i < len && isascii(path[i])) + ++i; + return i >= len; +} + +bool +Path::isAbsolute() const { + if (path.empty()) + return false; + return path[0] == '/'; +} +Path +Path::GetRootDirectory() { + Path result; + result.set("/"); + return result; +} + +Path +Path::GetTemporaryDirectory(std::string *ErrMsg) { +#if defined(HAVE_MKDTEMP) + // The best way is with mkdtemp but that's not available on many systems, + // Linux and FreeBSD have it. Others probably won't. + char pathname[MAXPATHLEN]; + strcpy(pathname,"/tmp/llvm_XXXXXX"); + if (0 == mkdtemp(pathname)) { + MakeErrMsg(ErrMsg, + std::string(pathname) + ": can't create temporary directory"); + return Path(); + } + Path result; + result.set(pathname); + assert(result.isValid() && "mkdtemp didn't create a valid pathname!"); + return result; +#elif defined(HAVE_MKSTEMP) + // If no mkdtemp is available, mkstemp can be used to create a temporary file + // which is then removed and created as a directory. We prefer this over + // mktemp because of mktemp's inherent security and threading risks. We still + // have a slight race condition from the time the temporary file is created to + // the time it is re-created as a directoy. + char pathname[MAXPATHLEN]; + strcpy(pathname, "/tmp/llvm_XXXXXX"); + int fd = 0; + if (-1 == (fd = mkstemp(pathname))) { + MakeErrMsg(ErrMsg, + std::string(pathname) + ": can't create temporary directory"); + return Path(); + } + ::close(fd); + ::unlink(pathname); // start race condition, ignore errors + if (-1 == ::mkdir(pathname, S_IRWXU)) { // end race condition + MakeErrMsg(ErrMsg, + std::string(pathname) + ": can't create temporary directory"); + return Path(); + } + Path result; + result.set(pathname); + assert(result.isValid() && "mkstemp didn't create a valid pathname!"); + return result; +#elif defined(HAVE_MKTEMP) + // If a system doesn't have mkdtemp(3) or mkstemp(3) but it does have + // mktemp(3) then we'll assume that system (e.g. AIX) has a reasonable + // implementation of mktemp(3) and doesn't follow BSD 4.3's lead of replacing + // the XXXXXX with the pid of the process and a letter. That leads to only + // twenty six temporary files that can be generated. + char pathname[MAXPATHLEN]; + strcpy(pathname, "/tmp/llvm_XXXXXX"); + char *TmpName = ::mktemp(pathname); + if (TmpName == 0) { + MakeErrMsg(ErrMsg, + std::string(TmpName) + ": can't create unique directory name"); + return Path(); + } + if (-1 == ::mkdir(TmpName, S_IRWXU)) { + MakeErrMsg(ErrMsg, + std::string(TmpName) + ": can't create temporary directory"); + return Path(); + } + Path result; + result.set(TmpName); + assert(result.isValid() && "mktemp didn't create a valid pathname!"); + return result; +#else + // This is the worst case implementation. tempnam(3) leaks memory unless its + // on an SVID2 (or later) system. On BSD 4.3 it leaks. tmpnam(3) has thread + // issues. The mktemp(3) function doesn't have enough variability in the + // temporary name generated. So, we provide our own implementation that + // increments an integer from a random number seeded by the current time. This + // should be sufficiently unique that we don't have many collisions between + // processes. Generally LLVM processes don't run very long and don't use very + // many temporary files so this shouldn't be a big issue for LLVM. + static time_t num = ::time(0); + char pathname[MAXPATHLEN]; + do { + num++; + sprintf(pathname, "/tmp/llvm_%010u", unsigned(num)); + } while ( 0 == access(pathname, F_OK ) ); + if (-1 == ::mkdir(pathname, S_IRWXU)) { + MakeErrMsg(ErrMsg, + std::string(pathname) + ": can't create temporary directory"); + return Path(); + } + Path result; + result.set(pathname); + assert(result.isValid() && "mkstemp didn't create a valid pathname!"); + return result; +#endif +} + +void +Path::GetSystemLibraryPaths(std::vector& Paths) { +#ifdef LTDL_SHLIBPATH_VAR + char* env_var = getenv(LTDL_SHLIBPATH_VAR); + if (env_var != 0) { + getPathList(env_var,Paths); + } +#endif + // FIXME: Should this look at LD_LIBRARY_PATH too? + Paths.push_back(sys::Path("/usr/local/lib/")); + Paths.push_back(sys::Path("/usr/X11R6/lib/")); + Paths.push_back(sys::Path("/usr/lib/")); + Paths.push_back(sys::Path("/lib/")); +} + +void +Path::GetBitcodeLibraryPaths(std::vector& Paths) { + char * env_var = getenv("LLVM_LIB_SEARCH_PATH"); + if (env_var != 0) { + getPathList(env_var,Paths); + } +#ifdef LLVM_LIBDIR + { + Path tmpPath; + if (tmpPath.set(LLVM_LIBDIR)) + if (tmpPath.canRead()) + Paths.push_back(tmpPath); + } +#endif + GetSystemLibraryPaths(Paths); +} + +Path +Path::GetLLVMDefaultConfigDir() { + return Path("/etc/llvm/"); +} + +Path +Path::GetUserHomeDirectory() { + const char* home = getenv("HOME"); + if (home) { + Path result; + if (result.set(home)) + return result; + } + return GetRootDirectory(); +} + +Path +Path::GetCurrentDirectory() { + char pathname[MAXPATHLEN]; + if (!getcwd(pathname,MAXPATHLEN)) { + assert (false && "Could not query current working directory."); + return Path(""); + } + + return Path(pathname); +} + +#ifdef __FreeBSD__ +static int +test_dir(char buf[PATH_MAX], char ret[PATH_MAX], + const char *dir, const char *bin) +{ + struct stat sb; + + snprintf(buf, PATH_MAX, "%s//%s", dir, bin); + if (realpath(buf, ret) == NULL) + return (1); + if (stat(buf, &sb) != 0) + return (1); + + return (0); +} + +static char * +getprogpath(char ret[PATH_MAX], const char *bin) +{ + char *pv, *s, *t, buf[PATH_MAX]; + + /* First approach: absolute path. */ + if (bin[0] == '/') { + if (test_dir(buf, ret, "/", bin) == 0) + return (ret); + return (NULL); + } + + /* Second approach: relative path. */ + if (strchr(bin, '/') != NULL) { + if (getcwd(buf, PATH_MAX) == NULL) + return (NULL); + if (test_dir(buf, ret, buf, bin) == 0) + return (ret); + return (NULL); + } + + /* Third approach: $PATH */ + if ((pv = getenv("PATH")) == NULL) + return (NULL); + s = pv = strdup(pv); + if (pv == NULL) + return (NULL); + while ((t = strsep(&s, ":")) != NULL) { + if (test_dir(buf, ret, t, bin) == 0) { + free(pv); + return (ret); + } + } + free(pv); + return (NULL); +} +#endif + +/// GetMainExecutable - Return the path to the main executable, given the +/// value of argv[0] from program startup. +Path Path::GetMainExecutable(const char *argv0, void *MainAddr) { +#if defined(__FreeBSD__) + char exe_path[PATH_MAX]; + + if (getprogpath(exe_path, argv0) != NULL) + return Path(std::string(exe_path)); +#elif defined(__linux__) || defined(__CYGWIN__) + char exe_path[MAXPATHLEN]; + ssize_t len = readlink("/proc/self/exe", exe_path, sizeof(exe_path)); + if (len > 0 && len < MAXPATHLEN - 1) { + exe_path[len] = '\0'; + return Path(std::string(exe_path)); + } +#elif defined(HAVE_DLFCN_H) + // Use dladdr to get executable path if available. + Dl_info DLInfo; + int err = dladdr(MainAddr, &DLInfo); + if (err == 0) + return Path(); + + // If the filename is a symlink, we need to resolve and return the location of + // the actual executable. + char link_path[MAXPATHLEN]; + return Path(std::string(realpath(DLInfo.dli_fname, link_path))); +#endif + return Path(); +} + + +std::string Path::getDirname() const { + return getDirnameCharSep(path, '/'); +} + +std::string +Path::getBasename() const { + // Find the last slash + std::string::size_type slash = path.rfind('/'); + if (slash == std::string::npos) + slash = 0; + else + slash++; + + std::string::size_type dot = path.rfind('.'); + if (dot == std::string::npos || dot < slash) + return path.substr(slash); + else + return path.substr(slash, dot - slash); +} + +std::string +Path::getSuffix() const { + // Find the last slash + std::string::size_type slash = path.rfind('/'); + if (slash == std::string::npos) + slash = 0; + else + slash++; + + std::string::size_type dot = path.rfind('.'); + if (dot == std::string::npos || dot < slash) + return std::string(); + else + return path.substr(dot + 1); +} + +bool Path::getMagicNumber(std::string& Magic, unsigned len) const { + assert(len < 1024 && "Request for magic string too long"); + char* buf = (char*) alloca(1 + len); + int fd = ::open(path.c_str(), O_RDONLY); + if (fd < 0) + return false; + ssize_t bytes_read = ::read(fd, buf, len); + ::close(fd); + if (ssize_t(len) != bytes_read) { + Magic.clear(); + return false; + } + Magic.assign(buf,len); + return true; +} + +bool +Path::exists() const { + return 0 == access(path.c_str(), F_OK ); +} + +bool +Path::isDirectory() const { + struct stat buf; + if (0 != stat(path.c_str(), &buf)) + return false; + return buf.st_mode & S_IFDIR ? true : false; +} + +bool +Path::canRead() const { + return 0 == access(path.c_str(), F_OK | R_OK ); +} + +bool +Path::canWrite() const { + return 0 == access(path.c_str(), F_OK | W_OK ); +} + +bool +Path::canExecute() const { + if (0 != access(path.c_str(), R_OK | X_OK )) + return false; + struct stat buf; + if (0 != stat(path.c_str(), &buf)) + return false; + if (!S_ISREG(buf.st_mode)) + return false; + return true; +} + +std::string +Path::getLast() const { + // Find the last slash + size_t pos = path.rfind('/'); + + // Handle the corner cases + if (pos == std::string::npos) + return path; + + // If the last character is a slash + if (pos == path.length()-1) { + // Find the second to last slash + size_t pos2 = path.rfind('/', pos-1); + if (pos2 == std::string::npos) + return path.substr(0,pos); + else + return path.substr(pos2+1,pos-pos2-1); + } + // Return everything after the last slash + return path.substr(pos+1); +} + +const FileStatus * +PathWithStatus::getFileStatus(bool update, std::string *ErrStr) const { + if (!fsIsValid || update) { + struct stat buf; + if (0 != stat(path.c_str(), &buf)) { + MakeErrMsg(ErrStr, path + ": can't get status of file"); + return 0; + } + status.fileSize = buf.st_size; + status.modTime.fromEpochTime(buf.st_mtime); + status.mode = buf.st_mode; + status.user = buf.st_uid; + status.group = buf.st_gid; + status.uniqueID = uint64_t(buf.st_ino); + status.isDir = S_ISDIR(buf.st_mode); + status.isFile = S_ISREG(buf.st_mode); + fsIsValid = true; + } + return &status; +} + +static bool AddPermissionBits(const Path &File, int bits) { + // Get the umask value from the operating system. We want to use it + // when changing the file's permissions. Since calling umask() sets + // the umask and returns its old value, we must call it a second + // time to reset it to the user's preference. + int mask = umask(0777); // The arg. to umask is arbitrary. + umask(mask); // Restore the umask. + + // Get the file's current mode. + struct stat buf; + if (0 != stat(File.toString().c_str(), &buf)) + return false; + // Change the file to have whichever permissions bits from 'bits' + // that the umask would not disable. + if ((chmod(File.c_str(), (buf.st_mode | (bits & ~mask)))) == -1) + return false; + return true; +} + +bool Path::makeReadableOnDisk(std::string* ErrMsg) { + if (!AddPermissionBits(*this, 0444)) + return MakeErrMsg(ErrMsg, path + ": can't make file readable"); + return false; +} + +bool Path::makeWriteableOnDisk(std::string* ErrMsg) { + if (!AddPermissionBits(*this, 0222)) + return MakeErrMsg(ErrMsg, path + ": can't make file writable"); + return false; +} + +bool Path::makeExecutableOnDisk(std::string* ErrMsg) { + if (!AddPermissionBits(*this, 0111)) + return MakeErrMsg(ErrMsg, path + ": can't make file executable"); + return false; +} + +bool +Path::getDirectoryContents(std::set& result, std::string* ErrMsg) const { + DIR* direntries = ::opendir(path.c_str()); + if (direntries == 0) + return MakeErrMsg(ErrMsg, path + ": can't open directory"); + + std::string dirPath = path; + if (!lastIsSlash(dirPath)) + dirPath += '/'; + + result.clear(); + struct dirent* de = ::readdir(direntries); + for ( ; de != 0; de = ::readdir(direntries)) { + if (de->d_name[0] != '.') { + Path aPath(dirPath + (const char*)de->d_name); + struct stat st; + if (0 != lstat(aPath.path.c_str(), &st)) { + if (S_ISLNK(st.st_mode)) + continue; // dangling symlink -- ignore + return MakeErrMsg(ErrMsg, + aPath.path + ": can't determine file object type"); + } + result.insert(aPath); + } + } + + closedir(direntries); + return false; +} + +bool +Path::set(const std::string& a_path) { + if (a_path.empty()) + return false; + std::string save(path); + path = a_path; + if (!isValid()) { + path = save; + return false; + } + return true; +} + +bool +Path::appendComponent(const std::string& name) { + if (name.empty()) + return false; + std::string save(path); + if (!lastIsSlash(path)) + path += '/'; + path += name; + if (!isValid()) { + path = save; + return false; + } + return true; +} + +bool +Path::eraseComponent() { + size_t slashpos = path.rfind('/',path.size()); + if (slashpos == 0 || slashpos == std::string::npos) { + path.erase(); + return true; + } + if (slashpos == path.size() - 1) + slashpos = path.rfind('/',slashpos-1); + if (slashpos == std::string::npos) { + path.erase(); + return true; + } + path.erase(slashpos); + return true; +} + +bool +Path::appendSuffix(const std::string& suffix) { + std::string save(path); + path.append("."); + path.append(suffix); + if (!isValid()) { + path = save; + return false; + } + return true; +} + +bool +Path::eraseSuffix() { + std::string save = path; + size_t dotpos = path.rfind('.',path.size()); + size_t slashpos = path.rfind('/',path.size()); + if (dotpos != std::string::npos) { + if (slashpos == std::string::npos || dotpos > slashpos+1) { + path.erase(dotpos, path.size()-dotpos); + return true; + } + } + if (!isValid()) + path = save; + return false; +} + +static bool createDirectoryHelper(char* beg, char* end, bool create_parents) { + + if (access(beg, F_OK | R_OK | W_OK) == 0) + return false; + + if (create_parents) { + + char* c = end; + + for (; c != beg; --c) + if (*c == '/') { + + // Recurse to handling the parent directory. + *c = '\0'; + bool x = createDirectoryHelper(beg, c, create_parents); + *c = '/'; + + // Return if we encountered an error. + if (x) + return true; + + break; + } + } + + return mkdir(beg, S_IRWXU | S_IRWXG) != 0; +} + +bool +Path::createDirectoryOnDisk( bool create_parents, std::string* ErrMsg ) { + // Get a writeable copy of the path name + char pathname[MAXPATHLEN]; + path.copy(pathname,MAXPATHLEN); + + // Null-terminate the last component + size_t lastchar = path.length() - 1 ; + + if (pathname[lastchar] != '/') + ++lastchar; + + pathname[lastchar] = 0; + + if (createDirectoryHelper(pathname, pathname+lastchar, create_parents)) + return MakeErrMsg(ErrMsg, + std::string(pathname) + ": can't create directory"); + + return false; +} + +bool +Path::createFileOnDisk(std::string* ErrMsg) { + // Create the file + int fd = ::creat(path.c_str(), S_IRUSR | S_IWUSR); + if (fd < 0) + return MakeErrMsg(ErrMsg, path + ": can't create file"); + ::close(fd); + return false; +} + +bool +Path::createTemporaryFileOnDisk(bool reuse_current, std::string* ErrMsg) { + // Make this into a unique file name + if (makeUnique( reuse_current, ErrMsg )) + return true; + + // create the file + int fd = ::open(path.c_str(), O_WRONLY|O_CREAT|O_TRUNC, 0666); + if (fd < 0) + return MakeErrMsg(ErrMsg, path + ": can't create temporary file"); + ::close(fd); + return false; +} + +bool +Path::eraseFromDisk(bool remove_contents, std::string *ErrStr) const { + // Get the status so we can determin if its a file or directory + struct stat buf; + if (0 != stat(path.c_str(), &buf)) { + MakeErrMsg(ErrStr, path + ": can't get status of file"); + return true; + } + + // Note: this check catches strange situations. In all cases, LLVM should + // only be involved in the creation and deletion of regular files. This + // check ensures that what we're trying to erase is a regular file. It + // effectively prevents LLVM from erasing things like /dev/null, any block + // special file, or other things that aren't "regular" files. + if (S_ISREG(buf.st_mode)) { + if (unlink(path.c_str()) != 0) + return MakeErrMsg(ErrStr, path + ": can't destroy file"); + return false; + } + + if (!S_ISDIR(buf.st_mode)) { + if (ErrStr) *ErrStr = "not a file or directory"; + return true; + } + + if (remove_contents) { + // Recursively descend the directory to remove its contents. + std::string cmd = "/bin/rm -rf " + path; + if (system(cmd.c_str()) != 0) { + MakeErrMsg(ErrStr, path + ": failed to recursively remove directory."); + return true; + } + return false; + } + + // Otherwise, try to just remove the one directory. + char pathname[MAXPATHLEN]; + path.copy(pathname, MAXPATHLEN); + size_t lastchar = path.length() - 1; + if (pathname[lastchar] == '/') + pathname[lastchar] = 0; + else + pathname[lastchar+1] = 0; + + if (rmdir(pathname) != 0) + return MakeErrMsg(ErrStr, + std::string(pathname) + ": can't erase directory"); + return false; +} + +bool +Path::renamePathOnDisk(const Path& newName, std::string* ErrMsg) { + if (0 != ::rename(path.c_str(), newName.c_str())) + return MakeErrMsg(ErrMsg, std::string("can't rename '") + path + "' as '" + + newName.toString() + "'"); + return false; +} + +bool +Path::setStatusInfoOnDisk(const FileStatus &si, std::string *ErrStr) const { + struct utimbuf utb; + utb.actime = si.modTime.toPosixTime(); + utb.modtime = utb.actime; + if (0 != ::utime(path.c_str(),&utb)) + return MakeErrMsg(ErrStr, path + ": can't set file modification time"); + if (0 != ::chmod(path.c_str(),si.mode)) + return MakeErrMsg(ErrStr, path + ": can't set mode"); + return false; +} + +bool +sys::CopyFile(const sys::Path &Dest, const sys::Path &Src, std::string* ErrMsg){ + int inFile = -1; + int outFile = -1; + inFile = ::open(Src.c_str(), O_RDONLY); + if (inFile == -1) + return MakeErrMsg(ErrMsg, Src.toString() + + ": can't open source file to copy"); + + outFile = ::open(Dest.c_str(), O_WRONLY|O_CREAT, 0666); + if (outFile == -1) { + ::close(inFile); + return MakeErrMsg(ErrMsg, Dest.toString() + + ": can't create destination file for copy"); + } + + char Buffer[16*1024]; + while (ssize_t Amt = ::read(inFile, Buffer, 16*1024)) { + if (Amt == -1) { + if (errno != EINTR && errno != EAGAIN) { + ::close(inFile); + ::close(outFile); + return MakeErrMsg(ErrMsg, Src.toString()+": can't read source file"); + } + } else { + char *BufPtr = Buffer; + while (Amt) { + ssize_t AmtWritten = ::write(outFile, BufPtr, Amt); + if (AmtWritten == -1) { + if (errno != EINTR && errno != EAGAIN) { + ::close(inFile); + ::close(outFile); + return MakeErrMsg(ErrMsg, Dest.toString() + + ": can't write destination file"); + } + } else { + Amt -= AmtWritten; + BufPtr += AmtWritten; + } + } + } + } + ::close(inFile); + ::close(outFile); + return false; +} + +bool +Path::makeUnique(bool reuse_current, std::string* ErrMsg) { + if (reuse_current && !exists()) + return false; // File doesn't exist already, just use it! + + // Append an XXXXXX pattern to the end of the file for use with mkstemp, + // mktemp or our own implementation. + char *FNBuffer = (char*) alloca(path.size()+8); + path.copy(FNBuffer,path.size()); + if (isDirectory()) + strcpy(FNBuffer+path.size(), "/XXXXXX"); + else + strcpy(FNBuffer+path.size(), "-XXXXXX"); + +#if defined(HAVE_MKSTEMP) + int TempFD; + if ((TempFD = mkstemp(FNBuffer)) == -1) + return MakeErrMsg(ErrMsg, path + ": can't make unique filename"); + + // We don't need to hold the temp file descriptor... we will trust that no one + // will overwrite/delete the file before we can open it again. + close(TempFD); + + // Save the name + path = FNBuffer; +#elif defined(HAVE_MKTEMP) + // If we don't have mkstemp, use the old and obsolete mktemp function. + if (mktemp(FNBuffer) == 0) + return MakeErrMsg(ErrMsg, path + ": can't make unique filename"); + + // Save the name + path = FNBuffer; +#else + // Okay, looks like we have to do it all by our lonesome. + static unsigned FCounter = 0; + unsigned offset = path.size() + 1; + while ( FCounter < 999999 && exists()) { + sprintf(FNBuffer+offset,"%06u",++FCounter); + path = FNBuffer; + } + if (FCounter > 999999) + return MakeErrMsg(ErrMsg, + path + ": can't make unique filename: too many files"); +#endif + return false; +} + +const char *Path::MapInFilePages(int FD, uint64_t FileSize) { + int Flags = MAP_PRIVATE; +#ifdef MAP_FILE + Flags |= MAP_FILE; +#endif + void *BasePtr = ::mmap(0, FileSize, PROT_READ, Flags, FD, 0); + if (BasePtr == MAP_FAILED) + return 0; + return (const char*)BasePtr; +} + +void Path::UnMapFilePages(const char *BasePtr, uint64_t FileSize) { + ::munmap((void*)BasePtr, FileSize); +} + +} // end llvm namespace diff --git a/lib/System/Unix/Process.inc b/lib/System/Unix/Process.inc new file mode 100644 index 000000000000..74b9bb8b142a --- /dev/null +++ b/lib/System/Unix/Process.inc @@ -0,0 +1,237 @@ +//===- Unix/Process.cpp - Unix Process Implementation --------- -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides the generic Unix implementation of the Process class. +// +//===----------------------------------------------------------------------===// + +#include "Unix.h" +#ifdef HAVE_SYS_TIME_H +#include +#endif +#ifdef HAVE_SYS_RESOURCE_H +#include +#endif +#ifdef HAVE_MALLOC_H +#include +#endif +#ifdef HAVE_MALLOC_MALLOC_H +#include +#endif +#ifdef HAVE_SYS_IOCTL_H +# include +#endif +#ifdef HAVE_TERMIOS_H +# include +#endif + +//===----------------------------------------------------------------------===// +//=== WARNING: Implementation here must contain only generic UNIX code that +//=== is guaranteed to work on *all* UNIX variants. +//===----------------------------------------------------------------------===// + +using namespace llvm; +using namespace sys; + +unsigned +Process::GetPageSize() +{ +#if defined(__CYGWIN__) + // On Cygwin, getpagesize() returns 64k but the page size for the purposes of + // memory protection and mmap() is 4k. + // See http://www.cygwin.com/ml/cygwin/2009-01/threads.html#00492 + static const int page_size = 0x1000; +#elif defined(HAVE_GETPAGESIZE) + static const int page_size = ::getpagesize(); +#elif defined(HAVE_SYSCONF) + static long page_size = ::sysconf(_SC_PAGE_SIZE); +#else +#warning Cannot get the page size on this machine +#endif + return static_cast(page_size); +} + +size_t Process::GetMallocUsage() { +#if defined(HAVE_MALLINFO) + struct mallinfo mi; + mi = ::mallinfo(); + return mi.uordblks; +#elif defined(HAVE_MALLOC_ZONE_STATISTICS) && defined(HAVE_MALLOC_MALLOC_H) + malloc_statistics_t Stats; + malloc_zone_statistics(malloc_default_zone(), &Stats); + return Stats.size_in_use; // darwin +#elif defined(HAVE_SBRK) + // Note this is only an approximation and more closely resembles + // the value returned by mallinfo in the arena field. + static char *StartOfMemory = reinterpret_cast(::sbrk(0)); + char *EndOfMemory = (char*)sbrk(0); + if (EndOfMemory != ((char*)-1) && StartOfMemory != ((char*)-1)) + return EndOfMemory - StartOfMemory; + else + return 0; +#else +#warning Cannot get malloc info on this platform + return 0; +#endif +} + +size_t +Process::GetTotalMemoryUsage() +{ +#if defined(HAVE_MALLINFO) + struct mallinfo mi = ::mallinfo(); + return mi.uordblks + mi.hblkhd; +#elif defined(HAVE_MALLOC_ZONE_STATISTICS) && defined(HAVE_MALLOC_MALLOC_H) + malloc_statistics_t Stats; + malloc_zone_statistics(malloc_default_zone(), &Stats); + return Stats.size_allocated; // darwin +#elif defined(HAVE_GETRUSAGE) + struct rusage usage; + ::getrusage(RUSAGE_SELF, &usage); + return usage.ru_maxrss; +#else +#warning Cannot get total memory size on this platform + return 0; +#endif +} + +void +Process::GetTimeUsage(TimeValue& elapsed, TimeValue& user_time, + TimeValue& sys_time) +{ + elapsed = TimeValue::now(); +#if defined(HAVE_GETRUSAGE) + struct rusage usage; + ::getrusage(RUSAGE_SELF, &usage); + user_time = TimeValue( + static_cast( usage.ru_utime.tv_sec ), + static_cast( usage.ru_utime.tv_usec * + TimeValue::NANOSECONDS_PER_MICROSECOND ) ); + sys_time = TimeValue( + static_cast( usage.ru_stime.tv_sec ), + static_cast( usage.ru_stime.tv_usec * + TimeValue::NANOSECONDS_PER_MICROSECOND ) ); +#else +#warning Cannot get usage times on this platform + user_time.seconds(0); + user_time.microseconds(0); + sys_time.seconds(0); + sys_time.microseconds(0); +#endif +} + +int Process::GetCurrentUserId() { + return getuid(); +} + +int Process::GetCurrentGroupId() { + return getgid(); +} + +#ifdef HAVE_MACH_MACH_H +#include +#endif + +// Some LLVM programs such as bugpoint produce core files as a normal part of +// their operation. To prevent the disk from filling up, this function +// does what's necessary to prevent their generation. +void Process::PreventCoreFiles() { +#if HAVE_SETRLIMIT + struct rlimit rlim; + rlim.rlim_cur = rlim.rlim_max = 0; + setrlimit(RLIMIT_CORE, &rlim); +#endif + +#ifdef HAVE_MACH_MACH_H + // Disable crash reporting on Mac OS X 10.0-10.4 + + // get information about the original set of exception ports for the task + mach_msg_type_number_t Count = 0; + exception_mask_t OriginalMasks[EXC_TYPES_COUNT]; + exception_port_t OriginalPorts[EXC_TYPES_COUNT]; + exception_behavior_t OriginalBehaviors[EXC_TYPES_COUNT]; + thread_state_flavor_t OriginalFlavors[EXC_TYPES_COUNT]; + kern_return_t err = + task_get_exception_ports(mach_task_self(), EXC_MASK_ALL, OriginalMasks, + &Count, OriginalPorts, OriginalBehaviors, + OriginalFlavors); + if (err == KERN_SUCCESS) { + // replace each with MACH_PORT_NULL. + for (unsigned i = 0; i != Count; ++i) + task_set_exception_ports(mach_task_self(), OriginalMasks[i], + MACH_PORT_NULL, OriginalBehaviors[i], + OriginalFlavors[i]); + } + + // Disable crash reporting on Mac OS X 10.5 + signal(SIGABRT, _exit); + signal(SIGILL, _exit); + signal(SIGFPE, _exit); + signal(SIGSEGV, _exit); + signal(SIGBUS, _exit); +#endif +} + +bool Process::StandardInIsUserInput() { +#if HAVE_ISATTY + return isatty(0); +#endif + // If we don't have isatty, just return false. + return false; +} + +bool Process::StandardOutIsDisplayed() { +#if HAVE_ISATTY + return isatty(1); +#endif + // If we don't have isatty, just return false. + return false; +} + +bool Process::StandardErrIsDisplayed() { +#if HAVE_ISATTY + return isatty(2); +#endif + // If we don't have isatty, just return false. + return false; +} + +static unsigned getColumns(int FileID) { + // If COLUMNS is defined in the environment, wrap to that many columns. + if (const char *ColumnsStr = std::getenv("COLUMNS")) { + int Columns = std::atoi(ColumnsStr); + if (Columns > 0) + return Columns; + } + + unsigned Columns = 0; + +#if defined(HAVE_SYS_IOCTL_H) && defined(HAVE_TERMIOS_H) + // Try to determine the width of the terminal. + struct winsize ws; + if (ioctl(FileID, TIOCGWINSZ, &ws) == 0) + Columns = ws.ws_col; +#endif + + return Columns; +} + +unsigned Process::StandardOutColumns() { + if (!StandardOutIsDisplayed()) + return 0; + + return getColumns(1); +} + +unsigned Process::StandardErrColumns() { + if (!StandardErrIsDisplayed()) + return 0; + + return getColumns(2); +} diff --git a/lib/System/Unix/Program.inc b/lib/System/Unix/Program.inc new file mode 100644 index 000000000000..cdc6fee60949 --- /dev/null +++ b/lib/System/Unix/Program.inc @@ -0,0 +1,287 @@ +//===- llvm/System/Unix/Program.cpp -----------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Unix specific portion of the Program class. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +//=== WARNING: Implementation here must contain only generic UNIX code that +//=== is guaranteed to work on *all* UNIX variants. +//===----------------------------------------------------------------------===// + +#include +#include "Unix.h" +#include +#if HAVE_SYS_STAT_H +#include +#endif +#if HAVE_SYS_RESOURCE_H +#include +#endif +#if HAVE_SIGNAL_H +#include +#endif +#if HAVE_FCNTL_H +#include +#endif + +namespace llvm { +using namespace sys; + +// This function just uses the PATH environment variable to find the program. +Path +Program::FindProgramByName(const std::string& progName) { + + // Check some degenerate cases + if (progName.length() == 0) // no program + return Path(); + Path temp; + if (!temp.set(progName)) // invalid name + return Path(); + // FIXME: have to check for absolute filename - we cannot assume anything + // about "." being in $PATH + if (temp.canExecute()) // already executable as is + return temp; + + // At this point, the file name is valid and its not executable + + // Get the path. If its empty, we can't do anything to find it. + const char *PathStr = getenv("PATH"); + if (PathStr == 0) + return Path(); + + // Now we have a colon separated list of directories to search; try them. + size_t PathLen = strlen(PathStr); + while (PathLen) { + // Find the first colon... + const char *Colon = std::find(PathStr, PathStr+PathLen, ':'); + + // Check to see if this first directory contains the executable... + Path FilePath; + if (FilePath.set(std::string(PathStr,Colon))) { + FilePath.appendComponent(progName); + if (FilePath.canExecute()) + return FilePath; // Found the executable! + } + + // Nope it wasn't in this directory, check the next path in the list! + PathLen -= Colon-PathStr; + PathStr = Colon; + + // Advance past duplicate colons + while (*PathStr == ':') { + PathStr++; + PathLen--; + } + } + return Path(); +} + +static bool RedirectIO(const Path *Path, int FD, std::string* ErrMsg) { + if (Path == 0) + // Noop + return false; + std::string File; + if (Path->isEmpty()) + // Redirect empty paths to /dev/null + File = "/dev/null"; + else + File = Path->toString(); + + // Open the file + int InFD = open(File.c_str(), FD == 0 ? O_RDONLY : O_WRONLY|O_CREAT, 0666); + if (InFD == -1) { + MakeErrMsg(ErrMsg, "Cannot open file '" + File + "' for " + + (FD == 0 ? "input" : "output")); + return true; + } + + // Install it as the requested FD + if (-1 == dup2(InFD, FD)) { + MakeErrMsg(ErrMsg, "Cannot dup2"); + return true; + } + close(InFD); // Close the original FD + return false; +} + +static bool Timeout = false; +static void TimeOutHandler(int Sig) { + Timeout = true; +} + +static void SetMemoryLimits (unsigned size) +{ +#if HAVE_SYS_RESOURCE_H + struct rlimit r; + __typeof__ (r.rlim_cur) limit = (__typeof__ (r.rlim_cur)) (size) * 1048576; + + // Heap size + getrlimit (RLIMIT_DATA, &r); + r.rlim_cur = limit; + setrlimit (RLIMIT_DATA, &r); +#ifdef RLIMIT_RSS + // Resident set size. + getrlimit (RLIMIT_RSS, &r); + r.rlim_cur = limit; + setrlimit (RLIMIT_RSS, &r); +#endif +#ifdef RLIMIT_AS // e.g. NetBSD doesn't have it. + // Virtual memory. + getrlimit (RLIMIT_AS, &r); + r.rlim_cur = limit; + setrlimit (RLIMIT_AS, &r); +#endif +#endif +} + +int +Program::ExecuteAndWait(const Path& path, + const char** args, + const char** envp, + const Path** redirects, + unsigned secondsToWait, + unsigned memoryLimit, + std::string* ErrMsg) +{ + if (!path.canExecute()) { + if (ErrMsg) + *ErrMsg = path.toString() + " is not executable"; + return -1; + } + +#ifdef HAVE_SYS_WAIT_H + // Create a child process. + int child = fork(); + switch (child) { + // An error occured: Return to the caller. + case -1: + MakeErrMsg(ErrMsg, "Couldn't fork"); + return -1; + + // Child process: Execute the program. + case 0: { + // Redirect file descriptors... + if (redirects) { + // Redirect stdin + if (RedirectIO(redirects[0], 0, ErrMsg)) { return -1; } + // Redirect stdout + if (RedirectIO(redirects[1], 1, ErrMsg)) { return -1; } + if (redirects[1] && redirects[2] && + *(redirects[1]) == *(redirects[2])) { + // If stdout and stderr should go to the same place, redirect stderr + // to the FD already open for stdout. + if (-1 == dup2(1,2)) { + MakeErrMsg(ErrMsg, "Can't redirect stderr to stdout"); + return -1; + } + } else { + // Just redirect stderr + if (RedirectIO(redirects[2], 2, ErrMsg)) { return -1; } + } + } + + // Set memory limits + if (memoryLimit!=0) { + SetMemoryLimits(memoryLimit); + } + + // Execute! + if (envp != 0) + execve (path.c_str(), (char**)args, (char**)envp); + else + execv (path.c_str(), (char**)args); + // If the execve() failed, we should exit and let the parent pick up + // our non-zero exit status. + exit (errno); + } + + // Parent process: Break out of the switch to do our processing. + default: + break; + } + + // Make sure stderr and stdout have been flushed + std::cerr << std::flush; + std::cout << std::flush; + fsync(1); + fsync(2); + + struct sigaction Act, Old; + + // Install a timeout handler. + if (secondsToWait) { + Timeout = false; + Act.sa_sigaction = 0; + Act.sa_handler = TimeOutHandler; + sigemptyset(&Act.sa_mask); + Act.sa_flags = 0; + sigaction(SIGALRM, &Act, &Old); + alarm(secondsToWait); + } + + // Parent process: Wait for the child process to terminate. + int status; + while (wait(&status) != child) + if (secondsToWait && errno == EINTR) { + // Kill the child. + kill(child, SIGKILL); + + // Turn off the alarm and restore the signal handler + alarm(0); + sigaction(SIGALRM, &Old, 0); + + // Wait for child to die + if (wait(&status) != child) + MakeErrMsg(ErrMsg, "Child timed out but wouldn't die"); + else + MakeErrMsg(ErrMsg, "Child timed out", 0); + + return -1; // Timeout detected + } else if (errno != EINTR) { + MakeErrMsg(ErrMsg, "Error waiting for child process"); + return -1; + } + + // We exited normally without timeout, so turn off the timer. + if (secondsToWait) { + alarm(0); + sigaction(SIGALRM, &Old, 0); + } + + // Return the proper exit status. 0=success, >0 is programs' exit status, + // <0 means a signal was returned, -9999999 means the program dumped core. + int result = 0; + if (WIFEXITED(status)) + result = WEXITSTATUS(status); + else if (WIFSIGNALED(status)) + result = 0 - WTERMSIG(status); +#ifdef WCOREDUMP + else if (WCOREDUMP(status)) + result |= 0x01000000; +#endif + return result; +#else + return -99; +#endif + +} + +bool Program::ChangeStdinToBinary(){ + // Do nothing, as Unix doesn't differentiate between text and binary. + return false; +} + +bool Program::ChangeStdoutToBinary(){ + // Do nothing, as Unix doesn't differentiate between text and binary. + return false; +} + +} diff --git a/lib/System/Unix/README.txt b/lib/System/Unix/README.txt new file mode 100644 index 000000000000..b3bace483e5d --- /dev/null +++ b/lib/System/Unix/README.txt @@ -0,0 +1,16 @@ +llvm/lib/System/Unix README +=========================== + +This directory provides implementations of the lib/System classes that +are common to two or more variants of UNIX. For example, the directory +structure underneath this directory could look like this: + +Unix - only code that is truly generic to all UNIX platforms + Posix - code that is specific to Posix variants of UNIX + SUS - code that is specific to the Single Unix Specification + SysV - code that is specific to System V variants of UNIX + +As a rule, only those directories actually needing to be created should be +created. Also, further subdirectories could be created to reflect versions of +the various standards. For example, under SUS there could be v1, v2, and v3 +subdirectories to reflect the three major versions of SUS. diff --git a/lib/System/Unix/Signals.inc b/lib/System/Unix/Signals.inc new file mode 100644 index 000000000000..e385e0c55662 --- /dev/null +++ b/lib/System/Unix/Signals.inc @@ -0,0 +1,230 @@ +//===- Signals.cpp - Generic Unix Signals Implementation -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines some helpful functions for dealing with the possibility of +// Unix signals occuring while your program is running. +// +//===----------------------------------------------------------------------===// + +#include "Unix.h" +#include "llvm/ADT/STLExtras.h" +#include +#include +#if HAVE_EXECINFO_H +# include // For backtrace(). +#endif +#if HAVE_SIGNAL_H +#include +#endif +#if HAVE_SYS_STAT_H +#include +#endif +#if HAVE_DLFCN_H && __GNUG__ +#include +#include +#endif +using namespace llvm; + +static RETSIGTYPE SignalHandler(int Sig); // defined below. + +/// InterruptFunction - The function to call if ctrl-c is pressed. +static void (*InterruptFunction)() = 0; + +static std::vector *FilesToRemove = 0; +static std::vector > *CallBacksToRun = 0; + +// IntSigs - Signals that may interrupt the program at any time. +static const int IntSigs[] = { + SIGHUP, SIGINT, SIGQUIT, SIGPIPE, SIGTERM, SIGUSR1, SIGUSR2 +}; +static const int *const IntSigsEnd = + IntSigs + sizeof(IntSigs) / sizeof(IntSigs[0]); + +// KillSigs - Signals that are synchronous with the program that will cause it +// to die. +static const int KillSigs[] = { + SIGILL, SIGTRAP, SIGABRT, SIGFPE, SIGBUS, SIGSEGV, SIGSYS, SIGXCPU, SIGXFSZ +#ifdef SIGEMT + , SIGEMT +#endif +}; +static const int *const KillSigsEnd = + KillSigs + sizeof(KillSigs) / sizeof(KillSigs[0]); + +static unsigned NumRegisteredSignals = 0; +static struct { + struct sigaction SA; + int SigNo; +} RegisteredSignalInfo[(sizeof(IntSigs)+sizeof(KillSigs))/sizeof(KillSigs[0])]; + + +static void RegisterHandler(int Signal) { + assert(NumRegisteredSignals < + sizeof(RegisteredSignalInfo)/sizeof(RegisteredSignalInfo[0]) && + "Out of space for signal handlers!"); + + struct sigaction NewHandler; + + NewHandler.sa_handler = SignalHandler; + NewHandler.sa_flags = SA_NODEFER|SA_RESETHAND; + sigemptyset(&NewHandler.sa_mask); + + // Install the new handler, save the old one in RegisteredSignalInfo. + sigaction(Signal, &NewHandler, + &RegisteredSignalInfo[NumRegisteredSignals].SA); + RegisteredSignalInfo[NumRegisteredSignals].SigNo = Signal; + ++NumRegisteredSignals; +} + +static void RegisterHandlers() { + // If the handlers are already registered, we're done. + if (NumRegisteredSignals != 0) return; + + std::for_each(IntSigs, IntSigsEnd, RegisterHandler); + std::for_each(KillSigs, KillSigsEnd, RegisterHandler); +} + +static void UnregisterHandlers() { + // Restore all of the signal handlers to how they were before we showed up. + for (unsigned i = 0, e = NumRegisteredSignals; i != e; ++i) + sigaction(RegisteredSignalInfo[i].SigNo, + &RegisteredSignalInfo[i].SA, 0); + NumRegisteredSignals = 0; +} + + + +// SignalHandler - The signal handler that runs. +static RETSIGTYPE SignalHandler(int Sig) { + // Restore the signal behavior to default, so that the program actually + // crashes when we return and the signal reissues. This also ensures that if + // we crash in our signal handler that the program will terminate immediately + // instead of recursing in the signal handler. + UnregisterHandlers(); + + // Unmask all potentially blocked kill signals. + sigset_t SigMask; + sigfillset(&SigMask); + sigprocmask(SIG_UNBLOCK, &SigMask, 0); + + if (FilesToRemove != 0) + while (!FilesToRemove->empty()) { + FilesToRemove->back().eraseFromDisk(true); + FilesToRemove->pop_back(); + } + + if (std::find(IntSigs, IntSigsEnd, Sig) != IntSigsEnd) { + if (InterruptFunction) { + void (*IF)() = InterruptFunction; + InterruptFunction = 0; + IF(); // run the interrupt function. + return; + } + raise(Sig); // Execute the default handler. + return; + } + + // Otherwise if it is a fault (like SEGV) run any handler. + if (CallBacksToRun) + for (unsigned i = 0, e = CallBacksToRun->size(); i != e; ++i) + (*CallBacksToRun)[i].first((*CallBacksToRun)[i].second); +} + + + +void llvm::sys::SetInterruptFunction(void (*IF)()) { + InterruptFunction = IF; + RegisterHandlers(); +} + +// RemoveFileOnSignal - The public API +bool llvm::sys::RemoveFileOnSignal(const sys::Path &Filename, + std::string* ErrMsg) { + if (FilesToRemove == 0) + FilesToRemove = new std::vector(); + + FilesToRemove->push_back(Filename); + + RegisterHandlers(); + return false; +} + +/// AddSignalHandler - Add a function to be called when a signal is delivered +/// to the process. The handler can have a cookie passed to it to identify +/// what instance of the handler it is. +void llvm::sys::AddSignalHandler(void (*FnPtr)(void *), void *Cookie) { + if (CallBacksToRun == 0) + CallBacksToRun = new std::vector >(); + CallBacksToRun->push_back(std::make_pair(FnPtr, Cookie)); + RegisterHandlers(); +} + + +// PrintStackTrace - In the case of a program crash or fault, print out a stack +// trace so that the user has an indication of why and where we died. +// +// On glibc systems we have the 'backtrace' function, which works nicely, but +// doesn't demangle symbols. +static void PrintStackTrace(void *) { +#ifdef HAVE_BACKTRACE + static void* StackTrace[256]; + // Use backtrace() to output a backtrace on Linux systems with glibc. + int depth = backtrace(StackTrace, + static_cast(array_lengthof(StackTrace))); +#if HAVE_DLFCN_H && __GNUG__ + int width = 0; + for (int i = 0; i < depth; ++i) { + Dl_info dlinfo; + dladdr(StackTrace[i], &dlinfo); + const char* name = strrchr(dlinfo.dli_fname, '/'); + + int nwidth; + if (name == NULL) nwidth = strlen(dlinfo.dli_fname); + else nwidth = strlen(name) - 1; + + if (nwidth > width) width = nwidth; + } + + for (int i = 0; i < depth; ++i) { + Dl_info dlinfo; + dladdr(StackTrace[i], &dlinfo); + + fprintf(stderr, "%-3d", i); + + const char* name = strrchr(dlinfo.dli_fname, '/'); + if (name == NULL) fprintf(stderr, " %-*s", width, dlinfo.dli_fname); + else fprintf(stderr, " %-*s", width, name+1); + + fprintf(stderr, " %#0*lx", + (int)(sizeof(void*) * 2) + 2, (unsigned long)StackTrace[i]); + + if (dlinfo.dli_sname != NULL) { + int res; + fputc(' ', stderr); + char* d = abi::__cxa_demangle(dlinfo.dli_sname, NULL, NULL, &res); + if (d == NULL) fputs(dlinfo.dli_sname, stderr); + else fputs(d, stderr); + free(d); + + fprintf(stderr, " + %tu",(char*)StackTrace[i]-(char*)dlinfo.dli_saddr); + } + fputc('\n', stderr); + } +#else + backtrace_symbols_fd(StackTrace, depth, STDERR_FILENO); +#endif +#endif +} + +/// PrintStackTraceOnErrorSignal - When an error signal (such as SIBABRT or +/// SIGSEGV) is delivered to the process, print a stack trace and then exit. +void llvm::sys::PrintStackTraceOnErrorSignal() { + AddSignalHandler(PrintStackTrace, 0); +} + diff --git a/lib/System/Unix/TimeValue.inc b/lib/System/Unix/TimeValue.inc new file mode 100644 index 000000000000..8dd30b9322f9 --- /dev/null +++ b/lib/System/Unix/TimeValue.inc @@ -0,0 +1,56 @@ +//===- Unix/TimeValue.cpp - Unix TimeValue Implementation -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Unix specific portion of the TimeValue class. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +//=== WARNING: Implementation here must contain only generic UNIX code that +//=== is guaranteed to work on *all* UNIX variants. +//===----------------------------------------------------------------------===// + +#include "Unix.h" + +namespace llvm { + using namespace sys; + +std::string TimeValue::toString() const { + char buffer[32]; + + time_t ourTime = time_t(this->toEpochTime()); +#ifdef __hpux +// note that the following line needs -D_REENTRANT on HP-UX to be picked up + asctime_r(localtime(&ourTime), buffer); +#else + ::asctime_r(::localtime(&ourTime), buffer); +#endif + + std::string result(buffer); + return result.substr(0,24); +} + +TimeValue TimeValue::now() { + struct timeval the_time; + timerclear(&the_time); + if (0 != ::gettimeofday(&the_time,0)) { + // This is *really* unlikely to occur because the only gettimeofday + // errors concern the timezone parameter which we're passing in as 0. + // In the unlikely case it does happen, just return MinTime, no error + // message needed. + return MinTime; + } + + return TimeValue( + static_cast( the_time.tv_sec ), + static_cast( the_time.tv_usec * + NANOSECONDS_PER_MICROSECOND ) ); +} + +} diff --git a/lib/System/Unix/Unix.h b/lib/System/Unix/Unix.h new file mode 100644 index 000000000000..452226f4f79a --- /dev/null +++ b/lib/System/Unix/Unix.h @@ -0,0 +1,104 @@ +//===- llvm/System/Unix/Unix.h - Common Unix Include File -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines things specific to Unix implementations. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SYSTEM_UNIX_UNIX_H +#define LLVM_SYSTEM_UNIX_UNIX_H + +//===----------------------------------------------------------------------===// +//=== WARNING: Implementation here must contain only generic UNIX code that +//=== is guaranteed to work on all UNIX variants. +//===----------------------------------------------------------------------===// + +#include "llvm/Config/config.h" // Get autoconf configuration settings +#include +#include +#include +#include +#include +#include + +#ifdef HAVE_UNISTD_H +#include +#endif + +#ifdef HAVE_SYS_TYPES_H +#include +#endif + +#ifdef HAVE_SYS_PARAM_H +#include +#endif + +#ifdef HAVE_ASSERT_H +#include +#endif + +#ifdef TIME_WITH_SYS_TIME +# include +# include +#else +# ifdef HAVE_SYS_TIME_H +# include +# else +# include +# endif +#endif + +#ifdef HAVE_SYS_WAIT_H +# include +#endif + +#ifndef WEXITSTATUS +# define WEXITSTATUS(stat_val) ((unsigned)(stat_val) >> 8) +#endif + +#ifndef WIFEXITED +# define WIFEXITED(stat_val) (((stat_val) & 255) == 0) +#endif + +/// This function builds an error message into \p ErrMsg using the \p prefix +/// string and the Unix error number given by \p errnum. If errnum is -1, the +/// default then the value of errno is used. +/// @brief Make an error message +/// +/// If the error number can be converted to a string, it will be +/// separated from prefix by ": ". +static inline bool MakeErrMsg( + std::string* ErrMsg, const std::string& prefix, int errnum = -1) { + if (!ErrMsg) + return true; + char buffer[MAXPATHLEN]; + buffer[0] = 0; + if (errnum == -1) + errnum = errno; +#ifdef HAVE_STRERROR_R + // strerror_r is thread-safe. + if (errnum) + strerror_r(errnum,buffer,MAXPATHLEN-1); +#elif HAVE_STRERROR + // Copy the thread un-safe result of strerror into + // the buffer as fast as possible to minimize impact + // of collision of strerror in multiple threads. + if (errnum) + strncpy(buffer,strerror(errnum),MAXPATHLEN-1); + buffer[MAXPATHLEN-1] = 0; +#else + // Strange that this system doesn't even have strerror + // but, oh well, just use a generic message + sprintf(buffer, "Error #%d", errnum); +#endif + *ErrMsg = prefix + ": " + buffer; + return true; +} + +#endif diff --git a/lib/System/Win32/Alarm.inc b/lib/System/Win32/Alarm.inc new file mode 100644 index 000000000000..e0d00a0142bf --- /dev/null +++ b/lib/System/Win32/Alarm.inc @@ -0,0 +1,43 @@ +//===-- Alarm.inc - Implement Win32 Alarm Support ---------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Win32 Alarm support. +// +//===----------------------------------------------------------------------===// + +#include +using namespace llvm; + +/// NestedSOI - Sanity check. Alarms cannot be nested or run in parallel. +/// This ensures that they never do. +static bool NestedSOI = false; + +void sys::SetupAlarm(unsigned seconds) { + assert(!NestedSOI && "sys::SetupAlarm calls cannot be nested!"); + NestedSOI = true; + // FIXME: Implement for Win32 +} + +void sys::TerminateAlarm() { + assert(NestedSOI && "sys::TerminateAlarm called without sys::SetupAlarm!"); + // FIXME: Implement for Win32 + NestedSOI = false; +} + +int sys::AlarmStatus() { + // FIXME: Implement for Win32 + return 0; +} + +// Don't pull in all of the Windows headers. +extern "C" void __stdcall Sleep(unsigned long); + +void sys::Sleep(unsigned n) { + ::Sleep(n*1000); +} diff --git a/lib/System/Win32/DynamicLibrary.inc b/lib/System/Win32/DynamicLibrary.inc new file mode 100644 index 000000000000..1ddf6cea0b32 --- /dev/null +++ b/lib/System/Win32/DynamicLibrary.inc @@ -0,0 +1,219 @@ +//===- Win32/DynamicLibrary.cpp - Win32 DL Implementation -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides the Win32 specific implementation of DynamicLibrary. +// +//===----------------------------------------------------------------------===// + +#include "Win32.h" + +#ifdef __MINGW32__ + #include +#else + #include +#endif + +#ifdef _MSC_VER + #include +#endif + +#ifdef __MINGW32__ + #if (HAVE_LIBIMAGEHLP != 1) + #error "libimagehlp.a should be present" + #endif +#else + #pragma comment(lib, "dbghelp.lib") +#endif + +namespace llvm { +using namespace sys; + +//===----------------------------------------------------------------------===// +//=== WARNING: Implementation here must contain only Win32 specific code +//=== and must not be UNIX code. +//===----------------------------------------------------------------------===// + +static std::vector OpenedHandles; + +#ifdef _WIN64 + typedef DWORD64 ModuleBaseType; +#else + typedef ULONG ModuleBaseType; +#endif + +extern "C" { +// Use old callback if: +// - Not using Visual Studio +// - Visual Studio 2005 or earlier but only if we are not using the Windows SDK +// or Windows SDK version is older than 6.0 +// Use new callback if: +// - Newer Visual Studio (comes with newer SDK). +// - Visual Studio 2005 with Windows SDK 6.0+ +#if !defined(_MSC_VER) || _MSC_VER < 1500 && (!defined(VER_PRODUCTBUILD) || VER_PRODUCTBUILD < 6000) + static BOOL CALLBACK ELM_Callback(PSTR ModuleName, + ModuleBaseType ModuleBase, + ULONG ModuleSize, + PVOID UserContext) +#else + static BOOL CALLBACK ELM_Callback(PCSTR ModuleName, + ModuleBaseType ModuleBase, + ULONG ModuleSize, + PVOID UserContext) +#endif + { + // Ignore VC++ runtimes prior to 7.1. Somehow some of them get loaded + // into the process. + if (stricmp(ModuleName, "msvci70") != 0 && + stricmp(ModuleName, "msvcirt") != 0 && + stricmp(ModuleName, "msvcp50") != 0 && + stricmp(ModuleName, "msvcp60") != 0 && + stricmp(ModuleName, "msvcp70") != 0 && + stricmp(ModuleName, "msvcr70") != 0 && +#ifndef __MINGW32__ + // Mingw32 uses msvcrt.dll by default. Don't ignore it. + // Otherwise, user should be aware, what he's doing :) + stricmp(ModuleName, "msvcrt") != 0 && +#endif + stricmp(ModuleName, "msvcrt20") != 0 && + stricmp(ModuleName, "msvcrt40") != 0) { + OpenedHandles.push_back((HMODULE)ModuleBase); + } + return TRUE; + } +} + +DynamicLibrary::DynamicLibrary() : handle(0) { + handle = GetModuleHandle(NULL); + OpenedHandles.push_back((HMODULE)handle); +} + +DynamicLibrary::~DynamicLibrary() { + if (handle == 0) + return; + + // GetModuleHandle() does not increment the ref count, so we must not free + // the handle to the executable. + if (handle != GetModuleHandle(NULL)) + FreeLibrary((HMODULE)handle); + handle = 0; + + for (std::vector::iterator I = OpenedHandles.begin(), + E = OpenedHandles.end(); I != E; ++I) { + if (*I == handle) { + // Note: don't use the swap/pop_back trick here. Order is important. + OpenedHandles.erase(I); + } + } +} + +bool DynamicLibrary::LoadLibraryPermanently(const char *filename, + std::string *ErrMsg) { + if (filename) { + HMODULE a_handle = LoadLibrary(filename); + + if (a_handle == 0) + return MakeErrMsg(ErrMsg, std::string(filename) + ": Can't open : "); + + OpenedHandles.push_back(a_handle); + } else { + // When no file is specified, enumerate all DLLs and EXEs in the + // process. + EnumerateLoadedModules(GetCurrentProcess(), ELM_Callback, 0); + } + + // Because we don't remember the handle, we will never free it; hence, + // it is loaded permanently. + return false; +} + +// Stack probing routines are in the support library (e.g. libgcc), but we don't +// have dynamic linking on windows. Provide a hook. +#if defined(__MINGW32__) || defined (_MSC_VER) + #define EXPLICIT_SYMBOL(SYM) \ + if (!strcmp(symbolName, #SYM)) return (void*)&SYM + #define EXPLICIT_SYMBOL2(SYMFROM, SYMTO) \ + if (!strcmp(symbolName, #SYMFROM)) return (void*)&SYMTO + #define EXPLICIT_SYMBOL_DEF(SYM) \ + extern "C" { extern void *SYM; } + + #if defined(__MINGW32__) + EXPLICIT_SYMBOL_DEF(_alloca); + EXPLICIT_SYMBOL_DEF(__main); + EXPLICIT_SYMBOL_DEF(__ashldi3); + EXPLICIT_SYMBOL_DEF(__ashrdi3); + EXPLICIT_SYMBOL_DEF(__cmpdi2); + EXPLICIT_SYMBOL_DEF(__divdi3); + EXPLICIT_SYMBOL_DEF(__fixdfdi); + EXPLICIT_SYMBOL_DEF(__fixsfdi); + EXPLICIT_SYMBOL_DEF(__fixunsdfdi); + EXPLICIT_SYMBOL_DEF(__fixunssfdi); + EXPLICIT_SYMBOL_DEF(__floatdidf); + EXPLICIT_SYMBOL_DEF(__floatdisf); + EXPLICIT_SYMBOL_DEF(__lshrdi3); + EXPLICIT_SYMBOL_DEF(__moddi3); + EXPLICIT_SYMBOL_DEF(__udivdi3); + EXPLICIT_SYMBOL_DEF(__umoddi3); + #elif defined(_MSC_VER) + EXPLICIT_SYMBOL_DEF(_alloca_probe); + #endif +#endif + +void* DynamicLibrary::SearchForAddressOfSymbol(const char* symbolName) { + // First check symbols added via AddSymbol(). + std::map::iterator I = g_symbols().find(symbolName); + if (I != g_symbols().end()) + return I->second; + + // Now search the libraries. + for (std::vector::iterator I = OpenedHandles.begin(), + E = OpenedHandles.end(); I != E; ++I) { + FARPROC ptr = GetProcAddress((HMODULE)*I, symbolName); + if (ptr) + return (void *) ptr; + } + +#if defined(__MINGW32__) + { + EXPLICIT_SYMBOL(_alloca); + EXPLICIT_SYMBOL(__main); + EXPLICIT_SYMBOL(__ashldi3); + EXPLICIT_SYMBOL(__ashrdi3); + EXPLICIT_SYMBOL(__cmpdi2); + EXPLICIT_SYMBOL(__divdi3); + EXPLICIT_SYMBOL(__fixdfdi); + EXPLICIT_SYMBOL(__fixsfdi); + EXPLICIT_SYMBOL(__fixunsdfdi); + EXPLICIT_SYMBOL(__fixunssfdi); + EXPLICIT_SYMBOL(__floatdidf); + EXPLICIT_SYMBOL(__floatdisf); + EXPLICIT_SYMBOL(__lshrdi3); + EXPLICIT_SYMBOL(__moddi3); + EXPLICIT_SYMBOL(__udivdi3); + EXPLICIT_SYMBOL(__umoddi3); + + EXPLICIT_SYMBOL2(alloca, _alloca); +#undef EXPLICIT_SYMBOL +#undef EXPLICIT_SYMBOL2 +#undef EXPLICIT_SYMBOL_DEF + } +#elif defined(_MSC_VER) + { + EXPLICIT_SYMBOL2(alloca, _alloca_probe); + EXPLICIT_SYMBOL2(_alloca, _alloca_probe); +#undef EXPLICIT_SYMBOL +#undef EXPLICIT_SYMBOL2 +#undef EXPLICIT_SYMBOL_DEF + } +#endif + + return 0; +} + +} + diff --git a/lib/System/Win32/Host.inc b/lib/System/Win32/Host.inc new file mode 100644 index 000000000000..18f00f8bc07b --- /dev/null +++ b/lib/System/Win32/Host.inc @@ -0,0 +1,23 @@ +//===- llvm/System/Win32/Host.inc -------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Win32 Host support. +// +//===----------------------------------------------------------------------===// + +#include "Win32.h" +#include +#include + +using namespace llvm; + +std::string sys::getHostTriple() { + // FIXME: Adapt to running version. + return LLVM_HOSTTRIPLE; +} diff --git a/lib/System/Win32/Memory.inc b/lib/System/Win32/Memory.inc new file mode 100644 index 000000000000..5e5cf7a6762d --- /dev/null +++ b/lib/System/Win32/Memory.inc @@ -0,0 +1,72 @@ +//===- Win32/Memory.cpp - Win32 Memory Implementation -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides the Win32 specific implementation of various Memory +// management utilities +// +//===----------------------------------------------------------------------===// + +#include "Win32.h" +#include "llvm/System/Process.h" + +namespace llvm { +using namespace sys; + +//===----------------------------------------------------------------------===// +//=== WARNING: Implementation here must contain only Win32 specific code +//=== and must not be UNIX code +//===----------------------------------------------------------------------===// + +MemoryBlock Memory::AllocateRWX(unsigned NumBytes, + const MemoryBlock *NearBlock, + std::string *ErrMsg) { + if (NumBytes == 0) return MemoryBlock(); + + static const long pageSize = Process::GetPageSize(); + unsigned NumPages = (NumBytes+pageSize-1)/pageSize; + + //FIXME: support NearBlock if ever needed on Win64. + + void *pa = VirtualAlloc(NULL, NumPages*pageSize, MEM_COMMIT, + PAGE_EXECUTE_READWRITE); + if (pa == NULL) { + MakeErrMsg(ErrMsg, "Can't allocate RWX Memory: "); + return MemoryBlock(); + } + + MemoryBlock result; + result.Address = pa; + result.Size = NumPages*pageSize; + return result; +} + +bool Memory::ReleaseRWX(MemoryBlock &M, std::string *ErrMsg) { + if (M.Address == 0 || M.Size == 0) return false; + if (!VirtualFree(M.Address, 0, MEM_RELEASE)) + return MakeErrMsg(ErrMsg, "Can't release RWX Memory: "); + return false; +} + +bool Memory::setWritable(MemoryBlock &M, std::string *ErrMsg) { + return true; +} + +bool Memory::setExecutable(MemoryBlock &M, std::string *ErrMsg) { + return false; +} + +bool Memory::setRangeWritable(const void *Addr, size_t Size) { + return true; +} + +bool Memory::setRangeExecutable(const void *Addr, size_t Size) { + return false; +} + +} diff --git a/lib/System/Win32/Mutex.inc b/lib/System/Win32/Mutex.inc new file mode 100644 index 000000000000..7c1723be73fc --- /dev/null +++ b/lib/System/Win32/Mutex.inc @@ -0,0 +1,58 @@ +//===- llvm/System/Win32/Mutex.inc - Win32 Mutex Implementation -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Win32 specific (non-pthread) Mutex class. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +//=== WARNING: Implementation here must contain only generic Win32 code that +//=== is guaranteed to work on *all* Win32 variants. +//===----------------------------------------------------------------------===// + +#include "Win32.h" +#include "llvm/System/Mutex.h" + +namespace llvm { +using namespace sys; + +Mutex::Mutex(bool /*recursive*/) +{ + data_ = new CRITICAL_SECTION; + InitializeCriticalSection((LPCRITICAL_SECTION)data_); +} + +Mutex::~Mutex() +{ + DeleteCriticalSection((LPCRITICAL_SECTION)data_); + delete (LPCRITICAL_SECTION)data_; + data_ = 0; +} + +bool +Mutex::acquire() +{ + EnterCriticalSection((LPCRITICAL_SECTION)data_); + return true; +} + +bool +Mutex::release() +{ + LeaveCriticalSection((LPCRITICAL_SECTION)data_); + return true; +} + +bool +Mutex::tryacquire() +{ + return TryEnterCriticalSection((LPCRITICAL_SECTION)data_); +} + +} diff --git a/lib/System/Win32/Path.inc b/lib/System/Win32/Path.inc new file mode 100644 index 000000000000..fbf8f6688a57 --- /dev/null +++ b/lib/System/Win32/Path.inc @@ -0,0 +1,825 @@ +//===- llvm/System/Win32/Path.cpp - Win32 Path Implementation ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +// Modified by Henrik Bach to comply with at least MinGW. +// Ported to Win32 by Jeff Cohen. +// +//===----------------------------------------------------------------------===// +// +// This file provides the Win32 specific implementation of the Path class. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +//=== WARNING: Implementation here must contain only generic Win32 code that +//=== is guaranteed to work on *all* Win32 variants. +//===----------------------------------------------------------------------===// + +#include "Win32.h" +#include +#include + +// We need to undo a macro defined in Windows.h, otherwise we won't compile: +#undef CopyFile +#undef GetCurrentDirectory + +// Windows happily accepts either forward or backward slashes, though any path +// returned by a Win32 API will have backward slashes. As LLVM code basically +// assumes forward slashes are used, backward slashs are converted where they +// can be introduced into a path. +// +// Another invariant is that a path ends with a slash if and only if the path +// is a root directory. Any other use of a trailing slash is stripped. Unlike +// in Unix, Windows has a rather complicated notion of a root path and this +// invariant helps simply the code. + +static void FlipBackSlashes(std::string& s) { + for (size_t i = 0; i < s.size(); i++) + if (s[i] == '\\') + s[i] = '/'; +} + +namespace llvm { +namespace sys { +const char PathSeparator = ';'; + +Path::Path(const std::string& p) + : path(p) { + FlipBackSlashes(path); +} + +Path::Path(const char *StrStart, unsigned StrLen) + : path(StrStart, StrLen) { + FlipBackSlashes(path); +} + +Path& +Path::operator=(const std::string &that) { + path = that; + FlipBackSlashes(path); + return *this; +} + +bool +Path::isValid() const { + if (path.empty()) + return false; + + // If there is a colon, it must be the second character, preceded by a letter + // and followed by something. + size_t len = path.size(); + size_t pos = path.rfind(':',len); + size_t rootslash = 0; + if (pos != std::string::npos) { + if (pos != 1 || !isalpha(path[0]) || len < 3) + return false; + rootslash = 2; + } + + // Look for a UNC path, and if found adjust our notion of the root slash. + if (len > 3 && path[0] == '/' && path[1] == '/') { + rootslash = path.find('/', 2); + if (rootslash == std::string::npos) + rootslash = 0; + } + + // Check for illegal characters. + if (path.find_first_of("\\<>\"|\001\002\003\004\005\006\007\010\011\012" + "\013\014\015\016\017\020\021\022\023\024\025\026" + "\027\030\031\032\033\034\035\036\037") + != std::string::npos) + return false; + + // Remove trailing slash, unless it's a root slash. + if (len > rootslash+1 && path[len-1] == '/') + path.erase(--len); + + // Check each component for legality. + for (pos = 0; pos < len; ++pos) { + // A component may not end in a space. + if (path[pos] == ' ') { + if (path[pos+1] == '/' || path[pos+1] == '\0') + return false; + } + + // A component may not end in a period. + if (path[pos] == '.') { + if (path[pos+1] == '/' || path[pos+1] == '\0') { + // Unless it is the pseudo-directory "."... + if (pos == 0 || path[pos-1] == '/' || path[pos-1] == ':') + return true; + // or "..". + if (pos > 0 && path[pos-1] == '.') { + if (pos == 1 || path[pos-2] == '/' || path[pos-2] == ':') + return true; + } + return false; + } + } + } + + return true; +} + +bool +Path::isAbsolute() const { + switch (path.length()) { + case 0: + return false; + case 1: + case 2: + return path[0] == '/'; + default: + return path[0] == '/' || (path[1] == ':' && path[2] == '/'); + } +} + +static Path *TempDirectory = NULL; + +Path +Path::GetTemporaryDirectory(std::string* ErrMsg) { + if (TempDirectory) + return *TempDirectory; + + char pathname[MAX_PATH]; + if (!GetTempPath(MAX_PATH, pathname)) { + if (ErrMsg) + *ErrMsg = "Can't determine temporary directory"; + return Path(); + } + + Path result; + result.set(pathname); + + // Append a subdirectory passed on our process id so multiple LLVMs don't + // step on each other's toes. +#ifdef __MINGW32__ + // Mingw's Win32 header files are broken. + sprintf(pathname, "LLVM_%u", unsigned(GetCurrentProcessId())); +#else + sprintf(pathname, "LLVM_%u", GetCurrentProcessId()); +#endif + result.appendComponent(pathname); + + // If there's a directory left over from a previous LLVM execution that + // happened to have the same process id, get rid of it. + result.eraseFromDisk(true); + + // And finally (re-)create the empty directory. + result.createDirectoryOnDisk(false); + TempDirectory = new Path(result); + return *TempDirectory; +} + +// FIXME: the following set of functions don't map to Windows very well. +Path +Path::GetRootDirectory() { + Path result; + result.set("C:/"); + return result; +} + +void +Path::GetSystemLibraryPaths(std::vector& Paths) { + Paths.push_back(sys::Path("C:/WINDOWS/SYSTEM32")); + Paths.push_back(sys::Path("C:/WINDOWS")); +} + +void +Path::GetBitcodeLibraryPaths(std::vector& Paths) { + char * env_var = getenv("LLVM_LIB_SEARCH_PATH"); + if (env_var != 0) { + getPathList(env_var,Paths); + } +#ifdef LLVM_LIBDIR + { + Path tmpPath; + if (tmpPath.set(LLVM_LIBDIR)) + if (tmpPath.canRead()) + Paths.push_back(tmpPath); + } +#endif + GetSystemLibraryPaths(Paths); +} + +Path +Path::GetLLVMDefaultConfigDir() { + // TODO: this isn't going to fly on Windows + return Path("/etc/llvm"); +} + +Path +Path::GetUserHomeDirectory() { + // TODO: Typical Windows setup doesn't define HOME. + const char* home = getenv("HOME"); + if (home) { + Path result; + if (result.set(home)) + return result; + } + return GetRootDirectory(); +} + +Path +Path::GetCurrentDirectory() { + char pathname[MAX_PATH]; + ::GetCurrentDirectoryA(MAX_PATH,pathname); + return Path(pathname); +} + +/// GetMainExecutable - Return the path to the main executable, given the +/// value of argv[0] from program startup. +Path Path::GetMainExecutable(const char *argv0, void *MainAddr) { + return Path(); +} + + +// FIXME: the above set of functions don't map to Windows very well. + + +bool +Path::isRootDirectory() const { + size_t len = path.size(); + return len > 0 && path[len-1] == '/'; +} + +std::string Path::getDirname() const { + return getDirnameCharSep(path, '/'); +} + +std::string +Path::getBasename() const { + // Find the last slash + size_t slash = path.rfind('/'); + if (slash == std::string::npos) + slash = 0; + else + slash++; + + size_t dot = path.rfind('.'); + if (dot == std::string::npos || dot < slash) + return path.substr(slash); + else + return path.substr(slash, dot - slash); +} + +std::string +Path::getSuffix() const { + // Find the last slash + size_t slash = path.rfind('/'); + if (slash == std::string::npos) + slash = 0; + else + slash++; + + size_t dot = path.rfind('.'); + if (dot == std::string::npos || dot < slash) + return std::string(); + else + return path.substr(dot + 1); +} + +bool +Path::exists() const { + DWORD attr = GetFileAttributes(path.c_str()); + return attr != INVALID_FILE_ATTRIBUTES; +} + +bool +Path::isDirectory() const { + DWORD attr = GetFileAttributes(path.c_str()); + return (attr != INVALID_FILE_ATTRIBUTES) && + (attr & FILE_ATTRIBUTE_DIRECTORY); +} + +bool +Path::canRead() const { + // FIXME: take security attributes into account. + DWORD attr = GetFileAttributes(path.c_str()); + return attr != INVALID_FILE_ATTRIBUTES; +} + +bool +Path::canWrite() const { + // FIXME: take security attributes into account. + DWORD attr = GetFileAttributes(path.c_str()); + return (attr != INVALID_FILE_ATTRIBUTES) && !(attr & FILE_ATTRIBUTE_READONLY); +} + +bool +Path::canExecute() const { + // FIXME: take security attributes into account. + DWORD attr = GetFileAttributes(path.c_str()); + return attr != INVALID_FILE_ATTRIBUTES; +} + +std::string +Path::getLast() const { + // Find the last slash + size_t pos = path.rfind('/'); + + // Handle the corner cases + if (pos == std::string::npos) + return path; + + // If the last character is a slash, we have a root directory + if (pos == path.length()-1) + return path; + + // Return everything after the last slash + return path.substr(pos+1); +} + +const FileStatus * +PathWithStatus::getFileStatus(bool update, std::string *ErrStr) const { + if (!fsIsValid || update) { + WIN32_FILE_ATTRIBUTE_DATA fi; + if (!GetFileAttributesEx(path.c_str(), GetFileExInfoStandard, &fi)) { + MakeErrMsg(ErrStr, "getStatusInfo():" + std::string(path) + + ": Can't get status: "); + return 0; + } + + status.fileSize = fi.nFileSizeHigh; + status.fileSize <<= sizeof(fi.nFileSizeHigh)*8; + status.fileSize += fi.nFileSizeLow; + + status.mode = fi.dwFileAttributes & FILE_ATTRIBUTE_READONLY ? 0555 : 0777; + status.user = 9999; // Not applicable to Windows, so... + status.group = 9999; // Not applicable to Windows, so... + + // FIXME: this is only unique if the file is accessed by the same file path. + // How do we do this for C:\dir\file and ..\dir\file ? Unix has inode + // numbers, but the concept doesn't exist in Windows. + status.uniqueID = 0; + for (unsigned i = 0; i < path.length(); ++i) + status.uniqueID += path[i]; + + __int64 ft = *reinterpret_cast<__int64*>(&fi.ftLastWriteTime); + status.modTime.fromWin32Time(ft); + + status.isDir = fi.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY; + fsIsValid = true; + } + return &status; +} + +bool Path::makeReadableOnDisk(std::string* ErrMsg) { + // All files are readable on Windows (ignoring security attributes). + return false; +} + +bool Path::makeWriteableOnDisk(std::string* ErrMsg) { + DWORD attr = GetFileAttributes(path.c_str()); + + // If it doesn't exist, we're done. + if (attr == INVALID_FILE_ATTRIBUTES) + return false; + + if (attr & FILE_ATTRIBUTE_READONLY) { + if (!SetFileAttributes(path.c_str(), attr & ~FILE_ATTRIBUTE_READONLY)) { + MakeErrMsg(ErrMsg, std::string(path) + ": Can't make file writable: "); + return true; + } + } + return false; +} + +bool Path::makeExecutableOnDisk(std::string* ErrMsg) { + // All files are executable on Windows (ignoring security attributes). + return false; +} + +bool +Path::getDirectoryContents(std::set& result, std::string* ErrMsg) const { + WIN32_FILE_ATTRIBUTE_DATA fi; + if (!GetFileAttributesEx(path.c_str(), GetFileExInfoStandard, &fi)) { + MakeErrMsg(ErrMsg, path + ": can't get status of file"); + return true; + } + + if (!(fi.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)) { + if (ErrMsg) + *ErrMsg = path + ": not a directory"; + return true; + } + + result.clear(); + WIN32_FIND_DATA fd; + std::string searchpath = path; + if (path.size() == 0 || searchpath[path.size()-1] == '/') + searchpath += "*"; + else + searchpath += "/*"; + + HANDLE h = FindFirstFile(searchpath.c_str(), &fd); + if (h == INVALID_HANDLE_VALUE) { + if (GetLastError() == ERROR_FILE_NOT_FOUND) + return true; // not really an error, now is it? + MakeErrMsg(ErrMsg, path + ": Can't read directory: "); + return true; + } + + do { + if (fd.cFileName[0] == '.') + continue; + Path aPath(path); + aPath.appendComponent(&fd.cFileName[0]); + result.insert(aPath); + } while (FindNextFile(h, &fd)); + + DWORD err = GetLastError(); + FindClose(h); + if (err != ERROR_NO_MORE_FILES) { + SetLastError(err); + MakeErrMsg(ErrMsg, path + ": Can't read directory: "); + return true; + } + return false; +} + +bool +Path::set(const std::string& a_path) { + if (a_path.empty()) + return false; + std::string save(path); + path = a_path; + FlipBackSlashes(path); + if (!isValid()) { + path = save; + return false; + } + return true; +} + +bool +Path::appendComponent(const std::string& name) { + if (name.empty()) + return false; + std::string save(path); + if (!path.empty()) { + size_t last = path.size() - 1; + if (path[last] != '/') + path += '/'; + } + path += name; + if (!isValid()) { + path = save; + return false; + } + return true; +} + +bool +Path::eraseComponent() { + size_t slashpos = path.rfind('/',path.size()); + if (slashpos == path.size() - 1 || slashpos == std::string::npos) + return false; + std::string save(path); + path.erase(slashpos); + if (!isValid()) { + path = save; + return false; + } + return true; +} + +bool +Path::appendSuffix(const std::string& suffix) { + std::string save(path); + path.append("."); + path.append(suffix); + if (!isValid()) { + path = save; + return false; + } + return true; +} + +bool +Path::eraseSuffix() { + size_t dotpos = path.rfind('.',path.size()); + size_t slashpos = path.rfind('/',path.size()); + if (dotpos != std::string::npos) { + if (slashpos == std::string::npos || dotpos > slashpos+1) { + std::string save(path); + path.erase(dotpos, path.size()-dotpos); + if (!isValid()) { + path = save; + return false; + } + return true; + } + } + return false; +} + +inline bool PathMsg(std::string* ErrMsg, const char* pathname, const char*msg) { + if (ErrMsg) + *ErrMsg = std::string(pathname) + ": " + std::string(msg); + return true; +} + +bool +Path::createDirectoryOnDisk(bool create_parents, std::string* ErrMsg) { + // Get a writeable copy of the path name + size_t len = path.length(); + char *pathname = reinterpret_cast(_alloca(len+2)); + path.copy(pathname, len); + pathname[len] = 0; + + // Make sure it ends with a slash. + if (len == 0 || pathname[len - 1] != '/') { + pathname[len] = '/'; + pathname[++len] = 0; + } + + // Determine starting point for initial / search. + char *next = pathname; + if (pathname[0] == '/' && pathname[1] == '/') { + // Skip host name. + next = strchr(pathname+2, '/'); + if (next == NULL) + return PathMsg(ErrMsg, pathname, "badly formed remote directory"); + + // Skip share name. + next = strchr(next+1, '/'); + if (next == NULL) + return PathMsg(ErrMsg, pathname,"badly formed remote directory"); + + next++; + if (*next == 0) + return PathMsg(ErrMsg, pathname, "badly formed remote directory"); + + } else { + if (pathname[1] == ':') + next += 2; // skip drive letter + if (*next == '/') + next++; // skip root directory + } + + // If we're supposed to create intermediate directories + if (create_parents) { + // Loop through the directory components until we're done + while (*next) { + next = strchr(next, '/'); + *next = 0; + if (!CreateDirectory(pathname, NULL)) + return MakeErrMsg(ErrMsg, + std::string(pathname) + ": Can't create directory: "); + *next++ = '/'; + } + } else { + // Drop trailing slash. + pathname[len-1] = 0; + if (!CreateDirectory(pathname, NULL)) { + return MakeErrMsg(ErrMsg, std::string(pathname) + ": Can't create directory: "); + } + } + return false; +} + +bool +Path::createFileOnDisk(std::string* ErrMsg) { + // Create the file + HANDLE h = CreateFile(path.c_str(), GENERIC_WRITE, 0, NULL, CREATE_NEW, + FILE_ATTRIBUTE_NORMAL, NULL); + if (h == INVALID_HANDLE_VALUE) + return MakeErrMsg(ErrMsg, path + ": Can't create file: "); + + CloseHandle(h); + return false; +} + +bool +Path::eraseFromDisk(bool remove_contents, std::string *ErrStr) const { + WIN32_FILE_ATTRIBUTE_DATA fi; + if (!GetFileAttributesEx(path.c_str(), GetFileExInfoStandard, &fi)) + return true; + + if (fi.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) { + // If it doesn't exist, we're done. + if (!exists()) + return false; + + char *pathname = reinterpret_cast(_alloca(path.length()+3)); + int lastchar = path.length() - 1 ; + path.copy(pathname, lastchar+1); + + // Make path end with '/*'. + if (pathname[lastchar] != '/') + pathname[++lastchar] = '/'; + pathname[lastchar+1] = '*'; + pathname[lastchar+2] = 0; + + if (remove_contents) { + WIN32_FIND_DATA fd; + HANDLE h = FindFirstFile(pathname, &fd); + + // It's a bad idea to alter the contents of a directory while enumerating + // its contents. So build a list of its contents first, then destroy them. + + if (h != INVALID_HANDLE_VALUE) { + std::vector list; + + do { + if (strcmp(fd.cFileName, ".") == 0) + continue; + if (strcmp(fd.cFileName, "..") == 0) + continue; + + Path aPath(path); + aPath.appendComponent(&fd.cFileName[0]); + list.push_back(aPath); + } while (FindNextFile(h, &fd)); + + DWORD err = GetLastError(); + FindClose(h); + if (err != ERROR_NO_MORE_FILES) { + SetLastError(err); + return MakeErrMsg(ErrStr, path + ": Can't read directory: "); + } + + for (std::vector::iterator I = list.begin(); I != list.end(); + ++I) { + Path &aPath = *I; + aPath.eraseFromDisk(true); + } + } else { + if (GetLastError() != ERROR_FILE_NOT_FOUND) + return MakeErrMsg(ErrStr, path + ": Can't read directory: "); + } + } + + pathname[lastchar] = 0; + if (!RemoveDirectory(pathname)) + return MakeErrMsg(ErrStr, + std::string(pathname) + ": Can't destroy directory: "); + return false; + } else { + // Read-only files cannot be deleted on Windows. Must remove the read-only + // attribute first. + if (fi.dwFileAttributes & FILE_ATTRIBUTE_READONLY) { + if (!SetFileAttributes(path.c_str(), + fi.dwFileAttributes & ~FILE_ATTRIBUTE_READONLY)) + return MakeErrMsg(ErrStr, path + ": Can't destroy file: "); + } + + if (!DeleteFile(path.c_str())) + return MakeErrMsg(ErrStr, path + ": Can't destroy file: "); + return false; + } +} + +bool Path::getMagicNumber(std::string& Magic, unsigned len) const { + assert(len < 1024 && "Request for magic string too long"); + char* buf = (char*) alloca(1 + len); + + HANDLE h = CreateFile(path.c_str(), + GENERIC_READ, + FILE_SHARE_READ, + NULL, + OPEN_EXISTING, + FILE_ATTRIBUTE_NORMAL, + NULL); + if (h == INVALID_HANDLE_VALUE) + return false; + + DWORD nRead = 0; + BOOL ret = ReadFile(h, buf, len, &nRead, NULL); + CloseHandle(h); + + if (!ret || nRead != len) + return false; + + buf[len] = '\0'; + Magic = buf; + return true; +} + +bool +Path::renamePathOnDisk(const Path& newName, std::string* ErrMsg) { + if (!MoveFileEx(path.c_str(), newName.c_str(), MOVEFILE_REPLACE_EXISTING)) + return MakeErrMsg(ErrMsg, "Can't move '" + path + "' to '" + newName.path + + "': "); + return false; +} + +bool +Path::setStatusInfoOnDisk(const FileStatus &si, std::string *ErrMsg) const { + // FIXME: should work on directories also. + if (!si.isFile) { + return true; + } + + HANDLE h = CreateFile(path.c_str(), + FILE_READ_ATTRIBUTES | FILE_WRITE_ATTRIBUTES, + FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, + NULL, + OPEN_EXISTING, + FILE_ATTRIBUTE_NORMAL, + NULL); + if (h == INVALID_HANDLE_VALUE) + return true; + + BY_HANDLE_FILE_INFORMATION bhfi; + if (!GetFileInformationByHandle(h, &bhfi)) { + DWORD err = GetLastError(); + CloseHandle(h); + SetLastError(err); + return MakeErrMsg(ErrMsg, path + ": GetFileInformationByHandle: "); + } + + FILETIME ft; + (uint64_t&)ft = si.modTime.toWin32Time(); + BOOL ret = SetFileTime(h, NULL, &ft, &ft); + DWORD err = GetLastError(); + CloseHandle(h); + if (!ret) { + SetLastError(err); + return MakeErrMsg(ErrMsg, path + ": SetFileTime: "); + } + + // Best we can do with Unix permission bits is to interpret the owner + // writable bit. + if (si.mode & 0200) { + if (bhfi.dwFileAttributes & FILE_ATTRIBUTE_READONLY) { + if (!SetFileAttributes(path.c_str(), + bhfi.dwFileAttributes & ~FILE_ATTRIBUTE_READONLY)) + return MakeErrMsg(ErrMsg, path + ": SetFileAttributes: "); + } + } else { + if (!(bhfi.dwFileAttributes & FILE_ATTRIBUTE_READONLY)) { + if (!SetFileAttributes(path.c_str(), + bhfi.dwFileAttributes | FILE_ATTRIBUTE_READONLY)) + return MakeErrMsg(ErrMsg, path + ": SetFileAttributes: "); + } + } + + return false; +} + +bool +CopyFile(const sys::Path &Dest, const sys::Path &Src, std::string* ErrMsg) { + // Can't use CopyFile macro defined in Windows.h because it would mess up the + // above line. We use the expansion it would have in a non-UNICODE build. + if (!::CopyFileA(Src.c_str(), Dest.c_str(), false)) + return MakeErrMsg(ErrMsg, "Can't copy '" + Src.toString() + + "' to '" + Dest.toString() + "': "); + return false; +} + +bool +Path::makeUnique(bool reuse_current, std::string* ErrMsg) { + if (reuse_current && !exists()) + return false; // File doesn't exist already, just use it! + + // Reserve space for -XXXXXX at the end. + char *FNBuffer = (char*) alloca(path.size()+8); + unsigned offset = path.size(); + path.copy(FNBuffer, offset); + + // Find a numeric suffix that isn't used by an existing file. Assume there + // won't be more than 1 million files with the same prefix. Probably a safe + // bet. + static unsigned FCounter = 0; + do { + sprintf(FNBuffer+offset, "-%06u", FCounter); + if (++FCounter > 999999) + FCounter = 0; + path = FNBuffer; + } while (exists()); + return false; +} + +bool +Path::createTemporaryFileOnDisk(bool reuse_current, std::string* ErrMsg) { + // Make this into a unique file name + makeUnique(reuse_current, ErrMsg); + + // Now go and create it + HANDLE h = CreateFile(path.c_str(), GENERIC_WRITE, 0, NULL, CREATE_NEW, + FILE_ATTRIBUTE_NORMAL, NULL); + if (h == INVALID_HANDLE_VALUE) + return MakeErrMsg(ErrMsg, path + ": can't create file"); + + CloseHandle(h); + return false; +} + +/// MapInFilePages - Not yet implemented on win32. +const char *Path::MapInFilePages(int FD, uint64_t FileSize) { + return 0; +} + +/// MapInFilePages - Not yet implemented on win32. +void Path::UnMapFilePages(const char *Base, uint64_t FileSize) { + assert(0 && "NOT IMPLEMENTED"); +} + +} +} diff --git a/lib/System/Win32/Process.inc b/lib/System/Win32/Process.inc new file mode 100644 index 000000000000..e1d7a9222f75 --- /dev/null +++ b/lib/System/Win32/Process.inc @@ -0,0 +1,150 @@ +//===- Win32/Process.cpp - Win32 Process Implementation ------- -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides the Win32 specific implementation of the Process class. +// +//===----------------------------------------------------------------------===// + +#include "Win32.h" +#include +#include +#include + +#ifdef __MINGW32__ + #if (HAVE_LIBPSAPI != 1) + #error "libpsapi.a should be present" + #endif +#else + #pragma comment(lib, "psapi.lib") +#endif + +//===----------------------------------------------------------------------===// +//=== WARNING: Implementation here must contain only Win32 specific code +//=== and must not be UNIX code +//===----------------------------------------------------------------------===// + +#ifdef __MINGW32__ +// This ban should be lifted when MinGW 1.0+ has defined this value. +# define _HEAPOK (-2) +#endif + +namespace llvm { +using namespace sys; + +// This function retrieves the page size using GetSystemInfo and is present +// solely so it can be called once in Process::GetPageSize to initialize the +// static variable PageSize. +inline unsigned GetPageSizeOnce() { + // NOTE: A 32-bit application running under WOW64 is supposed to use + // GetNativeSystemInfo. However, this interface is not present prior + // to Windows XP so to use it requires dynamic linking. It is not clear + // how this affects the reported page size, if at all. One could argue + // that LLVM ought to run as 64-bits on a 64-bit system, anyway. + SYSTEM_INFO info; + GetSystemInfo(&info); + return static_cast(info.dwPageSize); +} + +unsigned +Process::GetPageSize() { + static const unsigned PageSize = GetPageSizeOnce(); + return PageSize; +} + +size_t +Process::GetMallocUsage() +{ + _HEAPINFO hinfo; + hinfo._pentry = NULL; + + size_t size = 0; + + while (_heapwalk(&hinfo) == _HEAPOK) + size += hinfo._size; + + return size; +} + +size_t +Process::GetTotalMemoryUsage() +{ + PROCESS_MEMORY_COUNTERS pmc; + GetProcessMemoryInfo(GetCurrentProcess(), &pmc, sizeof(pmc)); + return pmc.PagefileUsage; +} + +void +Process::GetTimeUsage( + TimeValue& elapsed, TimeValue& user_time, TimeValue& sys_time) +{ + elapsed = TimeValue::now(); + + uint64_t ProcCreate, ProcExit, KernelTime, UserTime; + GetProcessTimes(GetCurrentProcess(), (FILETIME*)&ProcCreate, + (FILETIME*)&ProcExit, (FILETIME*)&KernelTime, + (FILETIME*)&UserTime); + + // FILETIME's are # of 100 nanosecond ticks (1/10th of a microsecond) + user_time.seconds( UserTime / 10000000 ); + user_time.nanoseconds( unsigned(UserTime % 10000000) * 100 ); + sys_time.seconds( KernelTime / 10000000 ); + sys_time.nanoseconds( unsigned(KernelTime % 10000000) * 100 ); +} + +int Process::GetCurrentUserId() +{ + return 65536; +} + +int Process::GetCurrentGroupId() +{ + return 65536; +} + +// Some LLVM programs such as bugpoint produce core files as a normal part of +// their operation. To prevent the disk from filling up, this configuration item +// does what's necessary to prevent their generation. +void Process::PreventCoreFiles() { + // Windows doesn't do core files, but it does do modal pop-up message + // boxes. As this method is used by bugpoint, preventing these pop-ups + // is the moral equivalent of suppressing core files. + SetErrorMode(SEM_FAILCRITICALERRORS | + SEM_NOGPFAULTERRORBOX | + SEM_NOOPENFILEERRORBOX); +} + +bool Process::StandardInIsUserInput() { + return GetFileType((HANDLE)_get_osfhandle(0)) == FILE_TYPE_CHAR; +} + +bool Process::StandardOutIsDisplayed() { + return GetFileType((HANDLE)_get_osfhandle(1)) == FILE_TYPE_CHAR; +} + +bool Process::StandardErrIsDisplayed() { + return GetFileType((HANDLE)_get_osfhandle(2)) == FILE_TYPE_CHAR; +} + +unsigned Process::StandardOutColumns() { + unsigned Columns = 0; + CONSOLE_SCREEN_BUFFER_INFO csbi; + if (GetConsoleScreenBufferInfo(GetStdHandle(STD_OUTPUT_HANDLE), &csbi)) + Columns = csbi.dwSize.X; + return Columns; +} + +unsigned Process::StandardErrColumns() { + unsigned Columns = 0; + CONSOLE_SCREEN_BUFFER_INFO csbi; + if (GetConsoleScreenBufferInfo(GetStdHandle(STD_ERROR_HANDLE), &csbi)) + Columns = csbi.dwSize.X; + return Columns; +} + +} diff --git a/lib/System/Win32/Program.inc b/lib/System/Win32/Program.inc new file mode 100644 index 000000000000..49086b8348e6 --- /dev/null +++ b/lib/System/Win32/Program.inc @@ -0,0 +1,316 @@ +//===- Win32/Program.cpp - Win32 Program Implementation ------- -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides the Win32 specific implementation of the Program class. +// +//===----------------------------------------------------------------------===// + +#include "Win32.h" +#include +#include +#include +#include + +//===----------------------------------------------------------------------===// +//=== WARNING: Implementation here must contain only Win32 specific code +//=== and must not be UNIX code +//===----------------------------------------------------------------------===// + +namespace llvm { +using namespace sys; + +// This function just uses the PATH environment variable to find the program. +Path +Program::FindProgramByName(const std::string& progName) { + + // Check some degenerate cases + if (progName.length() == 0) // no program + return Path(); + Path temp; + if (!temp.set(progName)) // invalid name + return Path(); + if (temp.canExecute()) // already executable as is + return temp; + + // At this point, the file name is valid and its not executable. + // Let Windows search for it. + char buffer[MAX_PATH]; + char *dummy = NULL; + DWORD len = SearchPath(NULL, progName.c_str(), ".exe", MAX_PATH, + buffer, &dummy); + + // See if it wasn't found. + if (len == 0) + return Path(); + + // See if we got the entire path. + if (len < MAX_PATH) + return Path(buffer); + + // Buffer was too small; grow and retry. + while (true) { + char *b = reinterpret_cast(_alloca(len+1)); + DWORD len2 = SearchPath(NULL, progName.c_str(), ".exe", len+1, b, &dummy); + + // It is unlikely the search failed, but it's always possible some file + // was added or removed since the last search, so be paranoid... + if (len2 == 0) + return Path(); + else if (len2 <= len) + return Path(b); + + len = len2; + } +} + +static HANDLE RedirectIO(const Path *path, int fd, std::string* ErrMsg) { + HANDLE h; + if (path == 0) { + DuplicateHandle(GetCurrentProcess(), (HANDLE)_get_osfhandle(fd), + GetCurrentProcess(), &h, + 0, TRUE, DUPLICATE_SAME_ACCESS); + return h; + } + + const char *fname; + if (path->isEmpty()) + fname = "NUL"; + else + fname = path->toString().c_str(); + + SECURITY_ATTRIBUTES sa; + sa.nLength = sizeof(sa); + sa.lpSecurityDescriptor = 0; + sa.bInheritHandle = TRUE; + + h = CreateFile(fname, fd ? GENERIC_WRITE : GENERIC_READ, FILE_SHARE_READ, + &sa, fd == 0 ? OPEN_EXISTING : CREATE_ALWAYS, + FILE_ATTRIBUTE_NORMAL, NULL); + if (h == INVALID_HANDLE_VALUE) { + MakeErrMsg(ErrMsg, std::string(fname) + ": Can't open file for " + + (fd ? "input: " : "output: ")); + } + + return h; +} + +#ifdef __MINGW32__ + // Due to unknown reason, mingw32's w32api doesn't have this declaration. + extern "C" + BOOL WINAPI SetInformationJobObject(HANDLE hJob, + JOBOBJECTINFOCLASS JobObjectInfoClass, + LPVOID lpJobObjectInfo, + DWORD cbJobObjectInfoLength); +#endif + +int +Program::ExecuteAndWait(const Path& path, + const char** args, + const char** envp, + const Path** redirects, + unsigned secondsToWait, + unsigned memoryLimit, + std::string* ErrMsg) { + if (!path.canExecute()) { + if (ErrMsg) + *ErrMsg = "program not executable"; + return -1; + } + + // Windows wants a command line, not an array of args, to pass to the new + // process. We have to concatenate them all, while quoting the args that + // have embedded spaces. + + // First, determine the length of the command line. + unsigned len = 0; + for (unsigned i = 0; args[i]; i++) { + len += strlen(args[i]) + 1; + if (strchr(args[i], ' ')) + len += 2; + } + + // Now build the command line. + char *command = reinterpret_cast(_alloca(len+1)); + char *p = command; + + for (unsigned i = 0; args[i]; i++) { + const char *arg = args[i]; + size_t len = strlen(arg); + bool needsQuoting = strchr(arg, ' ') != 0; + if (needsQuoting) + *p++ = '"'; + memcpy(p, arg, len); + p += len; + if (needsQuoting) + *p++ = '"'; + *p++ = ' '; + } + + *p = 0; + + // The pointer to the environment block for the new process. + char *envblock = 0; + + if (envp) { + // An environment block consists of a null-terminated block of + // null-terminated strings. Convert the array of environment variables to + // an environment block by concatenating them. + + // First, determine the length of the environment block. + len = 0; + for (unsigned i = 0; envp[i]; i++) + len += strlen(envp[i]) + 1; + + // Now build the environment block. + envblock = reinterpret_cast(_alloca(len+1)); + p = envblock; + + for (unsigned i = 0; envp[i]; i++) { + const char *ev = envp[i]; + size_t len = strlen(ev) + 1; + memcpy(p, ev, len); + p += len; + } + + *p = 0; + } + + // Create a child process. + STARTUPINFO si; + memset(&si, 0, sizeof(si)); + si.cb = sizeof(si); + si.hStdInput = INVALID_HANDLE_VALUE; + si.hStdOutput = INVALID_HANDLE_VALUE; + si.hStdError = INVALID_HANDLE_VALUE; + + if (redirects) { + si.dwFlags = STARTF_USESTDHANDLES; + + si.hStdInput = RedirectIO(redirects[0], 0, ErrMsg); + if (si.hStdInput == INVALID_HANDLE_VALUE) { + MakeErrMsg(ErrMsg, "can't redirect stdin"); + return -1; + } + si.hStdOutput = RedirectIO(redirects[1], 1, ErrMsg); + if (si.hStdOutput == INVALID_HANDLE_VALUE) { + CloseHandle(si.hStdInput); + MakeErrMsg(ErrMsg, "can't redirect stdout"); + return -1; + } + if (redirects[1] && redirects[2] && *(redirects[1]) == *(redirects[2])) { + // If stdout and stderr should go to the same place, redirect stderr + // to the handle already open for stdout. + DuplicateHandle(GetCurrentProcess(), si.hStdOutput, + GetCurrentProcess(), &si.hStdError, + 0, TRUE, DUPLICATE_SAME_ACCESS); + } else { + // Just redirect stderr + si.hStdError = RedirectIO(redirects[2], 2, ErrMsg); + if (si.hStdError == INVALID_HANDLE_VALUE) { + CloseHandle(si.hStdInput); + CloseHandle(si.hStdOutput); + MakeErrMsg(ErrMsg, "can't redirect stderr"); + return -1; + } + } + } + + PROCESS_INFORMATION pi; + memset(&pi, 0, sizeof(pi)); + + fflush(stdout); + fflush(stderr); + BOOL rc = CreateProcess(path.c_str(), command, NULL, NULL, TRUE, 0, + envblock, NULL, &si, &pi); + DWORD err = GetLastError(); + + // Regardless of whether the process got created or not, we are done with + // the handles we created for it to inherit. + CloseHandle(si.hStdInput); + CloseHandle(si.hStdOutput); + CloseHandle(si.hStdError); + + // Now return an error if the process didn't get created. + if (!rc) + { + SetLastError(err); + MakeErrMsg(ErrMsg, std::string("Couldn't execute program '") + + path.toString() + "'"); + return -1; + } + + // Make sure these get closed no matter what. + AutoHandle hProcess(pi.hProcess); + AutoHandle hThread(pi.hThread); + + // Assign the process to a job if a memory limit is defined. + AutoHandle hJob(0); + if (memoryLimit != 0) { + hJob = CreateJobObject(0, 0); + bool success = false; + if (hJob != 0) { + JOBOBJECT_EXTENDED_LIMIT_INFORMATION jeli; + memset(&jeli, 0, sizeof(jeli)); + jeli.BasicLimitInformation.LimitFlags = JOB_OBJECT_LIMIT_PROCESS_MEMORY; + jeli.ProcessMemoryLimit = uintptr_t(memoryLimit) * 1048576; + if (SetInformationJobObject(hJob, JobObjectExtendedLimitInformation, + &jeli, sizeof(jeli))) { + if (AssignProcessToJobObject(hJob, pi.hProcess)) + success = true; + } + } + if (!success) { + SetLastError(GetLastError()); + MakeErrMsg(ErrMsg, std::string("Unable to set memory limit")); + TerminateProcess(pi.hProcess, 1); + WaitForSingleObject(pi.hProcess, INFINITE); + return -1; + } + } + + // Wait for it to terminate. + DWORD millisecondsToWait = INFINITE; + if (secondsToWait > 0) + millisecondsToWait = secondsToWait * 1000; + + if (WaitForSingleObject(pi.hProcess, millisecondsToWait) == WAIT_TIMEOUT) { + if (!TerminateProcess(pi.hProcess, 1)) { + MakeErrMsg(ErrMsg, std::string("Failed to terminate timed-out program '") + + path.toString() + "'"); + return -1; + } + WaitForSingleObject(pi.hProcess, INFINITE); + } + + // Get its exit status. + DWORD status; + rc = GetExitCodeProcess(pi.hProcess, &status); + err = GetLastError(); + + if (!rc) { + SetLastError(err); + MakeErrMsg(ErrMsg, std::string("Failed getting status for program '") + + path.toString() + "'"); + return -1; + } + + return status; +} + +bool Program::ChangeStdinToBinary(){ + int result = _setmode( _fileno(stdin), _O_BINARY ); + return result == -1; +} + +bool Program::ChangeStdoutToBinary(){ + int result = _setmode( _fileno(stdout), _O_BINARY ); + return result == -1; +} + +} diff --git a/lib/System/Win32/Signals.inc b/lib/System/Win32/Signals.inc new file mode 100644 index 000000000000..3a8f77e3cdb9 --- /dev/null +++ b/lib/System/Win32/Signals.inc @@ -0,0 +1,270 @@ +//===- Win32/Signals.cpp - Win32 Signals Implementation ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides the Win32 specific implementation of the Signals class. +// +//===----------------------------------------------------------------------===// + +#include "Win32.h" +#include +#include +#include + +#ifdef __MINGW32__ + #include +#else + #include +#endif +#include + +#ifdef __MINGW32__ + #if ((HAVE_LIBIMAGEHLP != 1) || (HAVE_LIBPSAPI != 1)) + #error "libimagehlp.a & libpsapi.a should be present" + #endif +#else + #pragma comment(lib, "psapi.lib") + #pragma comment(lib, "dbghelp.lib") +#endif + +// Forward declare. +static LONG WINAPI LLVMUnhandledExceptionFilter(LPEXCEPTION_POINTERS ep); +static BOOL WINAPI LLVMConsoleCtrlHandler(DWORD dwCtrlType); + +// InterruptFunction - The function to call if ctrl-c is pressed. +static void (*InterruptFunction)() = 0; + +static std::vector *FilesToRemove = NULL; +static std::vector > *CallBacksToRun = 0; +static bool RegisteredUnhandledExceptionFilter = false; +static bool CleanupExecuted = false; +static PTOP_LEVEL_EXCEPTION_FILTER OldFilter = NULL; + +// Windows creates a new thread to execute the console handler when an event +// (such as CTRL/C) occurs. This causes concurrency issues with the above +// globals which this critical section addresses. +static CRITICAL_SECTION CriticalSection; + +namespace llvm { + +//===----------------------------------------------------------------------===// +//=== WARNING: Implementation here must contain only Win32 specific code +//=== and must not be UNIX code +//===----------------------------------------------------------------------===// + + +static void RegisterHandler() { + if (RegisteredUnhandledExceptionFilter) { + EnterCriticalSection(&CriticalSection); + return; + } + + // Now's the time to create the critical section. This is the first time + // through here, and there's only one thread. + InitializeCriticalSection(&CriticalSection); + + // Enter it immediately. Now if someone hits CTRL/C, the console handler + // can't proceed until the globals are updated. + EnterCriticalSection(&CriticalSection); + + RegisteredUnhandledExceptionFilter = true; + OldFilter = SetUnhandledExceptionFilter(LLVMUnhandledExceptionFilter); + SetConsoleCtrlHandler(LLVMConsoleCtrlHandler, TRUE); + + // IMPORTANT NOTE: Caller must call LeaveCriticalSection(&CriticalSection) or + // else multi-threading problems will ensue. +} + +// RemoveFileOnSignal - The public API +bool sys::RemoveFileOnSignal(const sys::Path &Filename, std::string* ErrMsg) { + RegisterHandler(); + + if (CleanupExecuted) { + if (ErrMsg) + *ErrMsg = "Process terminating -- cannot register for removal"; + return true; + } + + if (FilesToRemove == NULL) + FilesToRemove = new std::vector; + + FilesToRemove->push_back(Filename); + + LeaveCriticalSection(&CriticalSection); + return false; +} + +/// PrintStackTraceOnErrorSignal - When an error signal (such as SIBABRT or +/// SIGSEGV) is delivered to the process, print a stack trace and then exit. +void sys::PrintStackTraceOnErrorSignal() { + RegisterHandler(); + LeaveCriticalSection(&CriticalSection); +} + + +void sys::SetInterruptFunction(void (*IF)()) { + RegisterHandler(); + InterruptFunction = IF; + LeaveCriticalSection(&CriticalSection); +} + + +/// AddSignalHandler - Add a function to be called when a signal is delivered +/// to the process. The handler can have a cookie passed to it to identify +/// what instance of the handler it is. +void sys::AddSignalHandler(void (*FnPtr)(void *), void *Cookie) { + if (CallBacksToRun == 0) + CallBacksToRun = new std::vector >(); + CallBacksToRun->push_back(std::make_pair(FnPtr, Cookie)); + RegisterHandler(); +} +} + +static void Cleanup() { + EnterCriticalSection(&CriticalSection); + + // Prevent other thread from registering new files and directories for + // removal, should we be executing because of the console handler callback. + CleanupExecuted = true; + + // FIXME: open files cannot be deleted. + + if (FilesToRemove != NULL) + while (!FilesToRemove->empty()) { + try { + FilesToRemove->back().eraseFromDisk(); + } catch (...) { + } + FilesToRemove->pop_back(); + } + + if (CallBacksToRun) + for (unsigned i = 0, e = CallBacksToRun->size(); i != e; ++i) + (*CallBacksToRun)[i].first((*CallBacksToRun)[i].second); + + LeaveCriticalSection(&CriticalSection); +} + +static LONG WINAPI LLVMUnhandledExceptionFilter(LPEXCEPTION_POINTERS ep) { + try { + Cleanup(); + +#ifdef _WIN64 + // TODO: provide a x64 friendly version of the following +#else + + // Initialize the STACKFRAME structure. + STACKFRAME StackFrame; + memset(&StackFrame, 0, sizeof(StackFrame)); + + StackFrame.AddrPC.Offset = ep->ContextRecord->Eip; + StackFrame.AddrPC.Mode = AddrModeFlat; + StackFrame.AddrStack.Offset = ep->ContextRecord->Esp; + StackFrame.AddrStack.Mode = AddrModeFlat; + StackFrame.AddrFrame.Offset = ep->ContextRecord->Ebp; + StackFrame.AddrFrame.Mode = AddrModeFlat; + + HANDLE hProcess = GetCurrentProcess(); + HANDLE hThread = GetCurrentThread(); + + // Initialize the symbol handler. + SymSetOptions(SYMOPT_DEFERRED_LOADS|SYMOPT_LOAD_LINES); + SymInitialize(hProcess, NULL, TRUE); + + while (true) { + if (!StackWalk(IMAGE_FILE_MACHINE_I386, hProcess, hThread, &StackFrame, + ep->ContextRecord, NULL, SymFunctionTableAccess, + SymGetModuleBase, NULL)) { + break; + } + + if (StackFrame.AddrFrame.Offset == 0) + break; + + // Print the PC in hexadecimal. + DWORD PC = StackFrame.AddrPC.Offset; + fprintf(stderr, "%08lX", PC); + + // Print the parameters. Assume there are four. + fprintf(stderr, " (0x%08lX 0x%08lX 0x%08lX 0x%08lX)", StackFrame.Params[0], + StackFrame.Params[1], StackFrame.Params[2], StackFrame.Params[3]); + + // Verify the PC belongs to a module in this process. + if (!SymGetModuleBase(hProcess, PC)) { + fputs(" \n", stderr); + continue; + } + + // Print the symbol name. + char buffer[512]; + IMAGEHLP_SYMBOL *symbol = reinterpret_cast(buffer); + memset(symbol, 0, sizeof(IMAGEHLP_SYMBOL)); + symbol->SizeOfStruct = sizeof(IMAGEHLP_SYMBOL); + symbol->MaxNameLength = 512 - sizeof(IMAGEHLP_SYMBOL); + + DWORD dwDisp; + if (!SymGetSymFromAddr(hProcess, PC, &dwDisp, symbol)) { + fputc('\n', stderr); + continue; + } + + buffer[511] = 0; + if (dwDisp > 0) + fprintf(stderr, ", %s()+%04lu bytes(s)", symbol->Name, dwDisp); + else + fprintf(stderr, ", %s", symbol->Name); + + // Print the source file and line number information. + IMAGEHLP_LINE line; + memset(&line, 0, sizeof(line)); + line.SizeOfStruct = sizeof(line); + if (SymGetLineFromAddr(hProcess, PC, &dwDisp, &line)) { + fprintf(stderr, ", %s, line %lu", line.FileName, line.LineNumber); + if (dwDisp > 0) + fprintf(stderr, "+%04lu byte(s)", dwDisp); + } + + fputc('\n', stderr); + } + +#endif + + } catch (...) { + assert(0 && "Crashed in LLVMUnhandledExceptionFilter"); + } + + // Allow dialog box to pop up allowing choice to start debugger. + if (OldFilter) + return (*OldFilter)(ep); + else + return EXCEPTION_CONTINUE_SEARCH; +} + +static BOOL WINAPI LLVMConsoleCtrlHandler(DWORD dwCtrlType) { + // We are running in our very own thread, courtesy of Windows. + EnterCriticalSection(&CriticalSection); + Cleanup(); + + // If an interrupt function has been set, go and run one it; otherwise, + // the process dies. + void (*IF)() = InterruptFunction; + InterruptFunction = 0; // Don't run it on another CTRL-C. + + if (IF) { + // Note: if the interrupt function throws an exception, there is nothing + // to catch it in this thread so it will kill the process. + IF(); // Run it now. + LeaveCriticalSection(&CriticalSection); + return TRUE; // Don't kill the process. + } + + // Allow normal processing to take place; i.e., the process dies. + LeaveCriticalSection(&CriticalSection); + return FALSE; +} + diff --git a/lib/System/Win32/TimeValue.inc b/lib/System/Win32/TimeValue.inc new file mode 100644 index 000000000000..0ca87d423325 --- /dev/null +++ b/lib/System/Win32/TimeValue.inc @@ -0,0 +1,51 @@ +//===- Win32/TimeValue.cpp - Win32 TimeValue Implementation -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides the Win32 implementation of the TimeValue class. +// +//===----------------------------------------------------------------------===// + +#include "Win32.h" +#include + +namespace llvm { +using namespace sys; + +//===----------------------------------------------------------------------===// +//=== WARNING: Implementation here must contain only Win32 specific code. +//===----------------------------------------------------------------------===// + +TimeValue TimeValue::now() { + uint64_t ft; + GetSystemTimeAsFileTime(reinterpret_cast(&ft)); + + TimeValue t(0, 0); + t.fromWin32Time(ft); + return t; +} + +std::string TimeValue::toString() const { +#ifdef __MINGW32__ + // This ban may be lifted by either: + // (i) a future MinGW version other than 1.0 inherents the __time64_t type, or + // (ii) configure tests for either the time_t or __time64_t type. + time_t ourTime = time_t(this->toEpochTime()); + struct tm *lt = ::localtime(&ourTime); +#else + __time64_t ourTime = this->toEpochTime(); + struct tm *lt = ::_localtime64(&ourTime); +#endif + + char buffer[25]; + strftime(buffer, 25, "%a %b %d %H:%M:%S %Y", lt); + return std::string(buffer); +} + + +} diff --git a/lib/System/Win32/Win32.h b/lib/System/Win32/Win32.h new file mode 100644 index 000000000000..8f505b1a6cdb --- /dev/null +++ b/lib/System/Win32/Win32.h @@ -0,0 +1,57 @@ +//===- Win32/Win32.h - Common Win32 Include File ----------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines things specific to Win32 implementations. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +//=== WARNING: Implementation here must contain only generic Win32 code that +//=== is guaranteed to work on *all* Win32 variants. +//===----------------------------------------------------------------------===// + +// Require at least Windows 2000 API. +#define _WIN32_WINNT 0x0500 + +#include "llvm/Config/config.h" // Get autoconf configuration settings +#include "windows.h" +#include +#include + +inline bool MakeErrMsg(std::string* ErrMsg, const std::string& prefix) { + if (!ErrMsg) + return true; + char *buffer = NULL; + FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER|FORMAT_MESSAGE_FROM_SYSTEM, + NULL, GetLastError(), 0, (LPSTR)&buffer, 1, NULL); + *ErrMsg = prefix + buffer; + LocalFree(buffer); + return true; +} + +class AutoHandle { + HANDLE handle; + +public: + AutoHandle(HANDLE h) : handle(h) {} + + ~AutoHandle() { + if (handle) + CloseHandle(handle); + } + + operator HANDLE() { + return handle; + } + + AutoHandle &operator=(HANDLE h) { + handle = h; + return *this; + } +}; diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h new file mode 100644 index 000000000000..ac7de911ceec --- /dev/null +++ b/lib/Target/ARM/ARM.h @@ -0,0 +1,121 @@ +//===-- ARM.h - Top-level interface for ARM representation---- --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the entry points for global functions defined in the LLVM +// ARM back-end. +// +//===----------------------------------------------------------------------===// + +#ifndef TARGET_ARM_H +#define TARGET_ARM_H + +#include "llvm/Target/TargetMachine.h" +#include + +namespace llvm { + +class ARMTargetMachine; +class FunctionPass; +class MachineCodeEmitter; +class JITCodeEmitter; +class raw_ostream; + +// Enums corresponding to ARM condition codes +namespace ARMCC { + // The CondCodes constants map directly to the 4-bit encoding of the + // condition field for predicated instructions. + enum CondCodes { + EQ, + NE, + HS, + LO, + MI, + PL, + VS, + VC, + HI, + LS, + GE, + LT, + GT, + LE, + AL + }; + + inline static CondCodes getOppositeCondition(CondCodes CC){ + switch (CC) { + default: assert(0 && "Unknown condition code"); + case EQ: return NE; + case NE: return EQ; + case HS: return LO; + case LO: return HS; + case MI: return PL; + case PL: return MI; + case VS: return VC; + case VC: return VS; + case HI: return LS; + case LS: return HI; + case GE: return LT; + case LT: return GE; + case GT: return LE; + case LE: return GT; + } + } +} + +inline static const char *ARMCondCodeToString(ARMCC::CondCodes CC) { + switch (CC) { + default: assert(0 && "Unknown condition code"); + case ARMCC::EQ: return "eq"; + case ARMCC::NE: return "ne"; + case ARMCC::HS: return "hs"; + case ARMCC::LO: return "lo"; + case ARMCC::MI: return "mi"; + case ARMCC::PL: return "pl"; + case ARMCC::VS: return "vs"; + case ARMCC::VC: return "vc"; + case ARMCC::HI: return "hi"; + case ARMCC::LS: return "ls"; + case ARMCC::GE: return "ge"; + case ARMCC::LT: return "lt"; + case ARMCC::GT: return "gt"; + case ARMCC::LE: return "le"; + case ARMCC::AL: return "al"; + } +} + +FunctionPass *createARMISelDag(ARMTargetMachine &TM); +FunctionPass *createARMCodePrinterPass(raw_ostream &O, + ARMTargetMachine &TM, + CodeGenOpt::Level OptLevel, + bool Verbose); +FunctionPass *createARMCodeEmitterPass(ARMTargetMachine &TM, + MachineCodeEmitter &MCE); + +FunctionPass *createARMCodeEmitterPass( ARMTargetMachine &TM, + MachineCodeEmitter &MCE); +FunctionPass *createARMJITCodeEmitterPass( ARMTargetMachine &TM, + JITCodeEmitter &JCE); + +FunctionPass *createARMLoadStoreOptimizationPass(); +FunctionPass *createARMConstantIslandPass(); + +} // end namespace llvm; + +// Defines symbolic names for ARM registers. This defines a mapping from +// register name to register number. +// +#include "ARMGenRegisterNames.inc" + +// Defines symbolic names for the ARM instructions. +// +#include "ARMGenInstrNames.inc" + + +#endif diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td new file mode 100644 index 000000000000..4ac6857d22d4 --- /dev/null +++ b/lib/Target/ARM/ARM.td @@ -0,0 +1,136 @@ +//===- ARM.td - Describe the ARM Target Machine -----------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Target-independent interfaces which we are implementing +//===----------------------------------------------------------------------===// + +include "llvm/Target/Target.td" + +//===----------------------------------------------------------------------===// +// ARM Subtarget features. +// + +def ArchV4T : SubtargetFeature<"v4t", "ARMArchVersion", "V4T", + "ARM v4T">; +def ArchV5T : SubtargetFeature<"v5t", "ARMArchVersion", "V5T", + "ARM v5T">; +def ArchV5TE : SubtargetFeature<"v5te", "ARMArchVersion", "V5TE", + "ARM v5TE, v5TEj, v5TExp">; +def ArchV6 : SubtargetFeature<"v6", "ARMArchVersion", "V6", + "ARM v6">; +def ArchV7A : SubtargetFeature<"v7a", "ARMArchVersion", "V7A", + "ARM v7A">; +def FeatureVFP2 : SubtargetFeature<"vfp2", "ARMFPUType", "VFPv2", + "Enable VFP2 instructions">; +def FeatureVFP3 : SubtargetFeature<"vfp3", "ARMFPUType", "VFPv3", + "Enable VFP3 instructions">; +def FeatureNEON : SubtargetFeature<"neon", "ARMFPUType", "NEON", + "Enable NEON instructions">; +def FeatureThumb2 : SubtargetFeature<"thumb2", "ThumbMode", "Thumb2", + "Enable Thumb2 instructions">; + +//===----------------------------------------------------------------------===// +// ARM Processors supported. +// + +class Proc Features> + : Processor; + +// V4 Processors. +def : Proc<"generic", []>; +def : Proc<"arm8", []>; +def : Proc<"arm810", []>; +def : Proc<"strongarm", []>; +def : Proc<"strongarm110", []>; +def : Proc<"strongarm1100", []>; +def : Proc<"strongarm1110", []>; + +// V4T Processors. +def : Proc<"arm7tdmi", [ArchV4T]>; +def : Proc<"arm7tdmi-s", [ArchV4T]>; +def : Proc<"arm710t", [ArchV4T]>; +def : Proc<"arm720t", [ArchV4T]>; +def : Proc<"arm9", [ArchV4T]>; +def : Proc<"arm9tdmi", [ArchV4T]>; +def : Proc<"arm920", [ArchV4T]>; +def : Proc<"arm920t", [ArchV4T]>; +def : Proc<"arm922t", [ArchV4T]>; +def : Proc<"arm940t", [ArchV4T]>; +def : Proc<"ep9312", [ArchV4T]>; + +// V5T Processors. +def : Proc<"arm10tdmi", [ArchV5T]>; +def : Proc<"arm1020t", [ArchV5T]>; + +// V5TE Processors. +def : Proc<"arm9e", [ArchV5TE]>; +def : Proc<"arm926ej-s", [ArchV5TE]>; +def : Proc<"arm946e-s", [ArchV5TE]>; +def : Proc<"arm966e-s", [ArchV5TE]>; +def : Proc<"arm968e-s", [ArchV5TE]>; +def : Proc<"arm10e", [ArchV5TE]>; +def : Proc<"arm1020e", [ArchV5TE]>; +def : Proc<"arm1022e", [ArchV5TE]>; +def : Proc<"xscale", [ArchV5TE]>; +def : Proc<"iwmmxt", [ArchV5TE]>; + +// V6 Processors. +def : Proc<"arm1136j-s", [ArchV6]>; +def : Proc<"arm1136jf-s", [ArchV6, FeatureVFP2]>; +def : Proc<"arm1176jz-s", [ArchV6]>; +def : Proc<"arm1176jzf-s", [ArchV6, FeatureVFP2]>; +def : Proc<"mpcorenovfp", [ArchV6]>; +def : Proc<"mpcore", [ArchV6, FeatureVFP2]>; + +def : Proc<"arm1156t2-s", [ArchV6, FeatureThumb2]>; +def : Proc<"arm1156t2f-s", [ArchV6, FeatureThumb2, FeatureVFP2]>; + +def : Proc<"cortex-a8", [ArchV7A, FeatureThumb2, FeatureNEON]>; +def : Proc<"cortex-a9", [ArchV7A, FeatureThumb2, FeatureNEON]>; + +//===----------------------------------------------------------------------===// +// Register File Description +//===----------------------------------------------------------------------===// + +include "ARMRegisterInfo.td" + +include "ARMCallingConv.td" + +//===----------------------------------------------------------------------===// +// Instruction Descriptions +//===----------------------------------------------------------------------===// + +include "ARMInstrInfo.td" + +def ARMInstrInfo : InstrInfo { + // Define how we want to layout our target-specific information field. + let TSFlagsFields = ["AddrModeBits", + "SizeFlag", + "IndexModeBits", + "isUnaryDataProc", + "Form"]; + let TSFlagsShifts = [0, + 4, + 7, + 9, + 10]; +} + +//===----------------------------------------------------------------------===// +// Declare the target which we are implementing +//===----------------------------------------------------------------------===// + +def ARM : Target { + // Pull in Instruction Info: + let InstructionSet = ARMInstrInfo; +} diff --git a/lib/Target/ARM/ARMAddressingModes.h b/lib/Target/ARM/ARMAddressingModes.h new file mode 100644 index 000000000000..6d9b9ee88000 --- /dev/null +++ b/lib/Target/ARM/ARMAddressingModes.h @@ -0,0 +1,394 @@ +//===- ARMAddressingModes.h - ARM Addressing Modes --------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the ARM addressing mode implementation stuff. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_ARM_ARMADDRESSINGMODES_H +#define LLVM_TARGET_ARM_ARMADDRESSINGMODES_H + +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/Support/MathExtras.h" +#include + +namespace llvm { + +/// ARM_AM - ARM Addressing Mode Stuff +namespace ARM_AM { + enum ShiftOpc { + no_shift = 0, + asr, + lsl, + lsr, + ror, + rrx + }; + + enum AddrOpc { + add = '+', sub = '-' + }; + + static inline const char *getShiftOpcStr(ShiftOpc Op) { + switch (Op) { + default: assert(0 && "Unknown shift opc!"); + case ARM_AM::asr: return "asr"; + case ARM_AM::lsl: return "lsl"; + case ARM_AM::lsr: return "lsr"; + case ARM_AM::ror: return "ror"; + case ARM_AM::rrx: return "rrx"; + } + } + + static inline ShiftOpc getShiftOpcForNode(SDValue N) { + switch (N.getOpcode()) { + default: return ARM_AM::no_shift; + case ISD::SHL: return ARM_AM::lsl; + case ISD::SRL: return ARM_AM::lsr; + case ISD::SRA: return ARM_AM::asr; + case ISD::ROTR: return ARM_AM::ror; + //case ISD::ROTL: // Only if imm -> turn into ROTR. + // Can't handle RRX here, because it would require folding a flag into + // the addressing mode. :( This causes us to miss certain things. + //case ARMISD::RRX: return ARM_AM::rrx; + } + } + + enum AMSubMode { + bad_am_submode = 0, + ia, + ib, + da, + db + }; + + static inline const char *getAMSubModeStr(AMSubMode Mode) { + switch (Mode) { + default: assert(0 && "Unknown addressing sub-mode!"); + case ARM_AM::ia: return "ia"; + case ARM_AM::ib: return "ib"; + case ARM_AM::da: return "da"; + case ARM_AM::db: return "db"; + } + } + + static inline const char *getAMSubModeAltStr(AMSubMode Mode, bool isLD) { + switch (Mode) { + default: assert(0 && "Unknown addressing sub-mode!"); + case ARM_AM::ia: return isLD ? "fd" : "ea"; + case ARM_AM::ib: return isLD ? "ed" : "fa"; + case ARM_AM::da: return isLD ? "fa" : "ed"; + case ARM_AM::db: return isLD ? "ea" : "fd"; + } + } + + /// rotr32 - Rotate a 32-bit unsigned value right by a specified # bits. + /// + static inline unsigned rotr32(unsigned Val, unsigned Amt) { + assert(Amt < 32 && "Invalid rotate amount"); + return (Val >> Amt) | (Val << ((32-Amt)&31)); + } + + /// rotl32 - Rotate a 32-bit unsigned value left by a specified # bits. + /// + static inline unsigned rotl32(unsigned Val, unsigned Amt) { + assert(Amt < 32 && "Invalid rotate amount"); + return (Val << Amt) | (Val >> ((32-Amt)&31)); + } + + //===--------------------------------------------------------------------===// + // Addressing Mode #1: shift_operand with registers + //===--------------------------------------------------------------------===// + // + // This 'addressing mode' is used for arithmetic instructions. It can + // represent things like: + // reg + // reg [asr|lsl|lsr|ror|rrx] reg + // reg [asr|lsl|lsr|ror|rrx] imm + // + // This is stored three operands [rega, regb, opc]. The first is the base + // reg, the second is the shift amount (or reg0 if not present or imm). The + // third operand encodes the shift opcode and the imm if a reg isn't present. + // + static inline unsigned getSORegOpc(ShiftOpc ShOp, unsigned Imm) { + return ShOp | (Imm << 3); + } + static inline unsigned getSORegOffset(unsigned Op) { + return Op >> 3; + } + static inline ShiftOpc getSORegShOp(unsigned Op) { + return (ShiftOpc)(Op & 7); + } + + /// getSOImmValImm - Given an encoded imm field for the reg/imm form, return + /// the 8-bit imm value. + static inline unsigned getSOImmValImm(unsigned Imm) { + return Imm & 0xFF; + } + /// getSOImmValRot - Given an encoded imm field for the reg/imm form, return + /// the rotate amount. + static inline unsigned getSOImmValRot(unsigned Imm) { + return (Imm >> 8) * 2; + } + + /// getSOImmValRotate - Try to handle Imm with an immediate shifter operand, + /// computing the rotate amount to use. If this immediate value cannot be + /// handled with a single shifter-op, determine a good rotate amount that will + /// take a maximal chunk of bits out of the immediate. + static inline unsigned getSOImmValRotate(unsigned Imm) { + // 8-bit (or less) immediates are trivially shifter_operands with a rotate + // of zero. + if ((Imm & ~255U) == 0) return 0; + + // Use CTZ to compute the rotate amount. + unsigned TZ = CountTrailingZeros_32(Imm); + + // Rotate amount must be even. Something like 0x200 must be rotated 8 bits, + // not 9. + unsigned RotAmt = TZ & ~1; + + // If we can handle this spread, return it. + if ((rotr32(Imm, RotAmt) & ~255U) == 0) + return (32-RotAmt)&31; // HW rotates right, not left. + + // For values like 0xF000000F, we should skip the first run of ones, then + // retry the hunt. + if (Imm & 1) { + unsigned TrailingOnes = CountTrailingZeros_32(~Imm); + if (TrailingOnes != 32) { // Avoid overflow on 0xFFFFFFFF + // Restart the search for a high-order bit after the initial seconds of + // ones. + unsigned TZ2 = CountTrailingZeros_32(Imm & ~((1 << TrailingOnes)-1)); + + // Rotate amount must be even. + unsigned RotAmt2 = TZ2 & ~1; + + // If this fits, use it. + if (RotAmt2 != 32 && (rotr32(Imm, RotAmt2) & ~255U) == 0) + return (32-RotAmt2)&31; // HW rotates right, not left. + } + } + + // Otherwise, we have no way to cover this span of bits with a single + // shifter_op immediate. Return a chunk of bits that will be useful to + // handle. + return (32-RotAmt)&31; // HW rotates right, not left. + } + + /// getSOImmVal - Given a 32-bit immediate, if it is something that can fit + /// into an shifter_operand immediate operand, return the 12-bit encoding for + /// it. If not, return -1. + static inline int getSOImmVal(unsigned Arg) { + // 8-bit (or less) immediates are trivially shifter_operands with a rotate + // of zero. + if ((Arg & ~255U) == 0) return Arg; + + unsigned RotAmt = getSOImmValRotate(Arg); + + // If this cannot be handled with a single shifter_op, bail out. + if (rotr32(~255U, RotAmt) & Arg) + return -1; + + // Encode this correctly. + return rotl32(Arg, RotAmt) | ((RotAmt>>1) << 8); + } + + /// isSOImmTwoPartVal - Return true if the specified value can be obtained by + /// or'ing together two SOImmVal's. + static inline bool isSOImmTwoPartVal(unsigned V) { + // If this can be handled with a single shifter_op, bail out. + V = rotr32(~255U, getSOImmValRotate(V)) & V; + if (V == 0) + return false; + + // If this can be handled with two shifter_op's, accept. + V = rotr32(~255U, getSOImmValRotate(V)) & V; + return V == 0; + } + + /// getSOImmTwoPartFirst - If V is a value that satisfies isSOImmTwoPartVal, + /// return the first chunk of it. + static inline unsigned getSOImmTwoPartFirst(unsigned V) { + return rotr32(255U, getSOImmValRotate(V)) & V; + } + + /// getSOImmTwoPartSecond - If V is a value that satisfies isSOImmTwoPartVal, + /// return the second chunk of it. + static inline unsigned getSOImmTwoPartSecond(unsigned V) { + // Mask out the first hunk. + V = rotr32(~255U, getSOImmValRotate(V)) & V; + + // Take what's left. + assert(V == (rotr32(255U, getSOImmValRotate(V)) & V)); + return V; + } + + /// getThumbImmValShift - Try to handle Imm with a 8-bit immediate followed + /// by a left shift. Returns the shift amount to use. + static inline unsigned getThumbImmValShift(unsigned Imm) { + // 8-bit (or less) immediates are trivially immediate operand with a shift + // of zero. + if ((Imm & ~255U) == 0) return 0; + + // Use CTZ to compute the shift amount. + return CountTrailingZeros_32(Imm); + } + + /// isThumbImmShiftedVal - Return true if the specified value can be obtained + /// by left shifting a 8-bit immediate. + static inline bool isThumbImmShiftedVal(unsigned V) { + // If this can be handled with + V = (~255U << getThumbImmValShift(V)) & V; + return V == 0; + } + + /// getThumbImmNonShiftedVal - If V is a value that satisfies + /// isThumbImmShiftedVal, return the non-shiftd value. + static inline unsigned getThumbImmNonShiftedVal(unsigned V) { + return V >> getThumbImmValShift(V); + } + + //===--------------------------------------------------------------------===// + // Addressing Mode #2 + //===--------------------------------------------------------------------===// + // + // This is used for most simple load/store instructions. + // + // addrmode2 := reg +/- reg shop imm + // addrmode2 := reg +/- imm12 + // + // The first operand is always a Reg. The second operand is a reg if in + // reg/reg form, otherwise it's reg#0. The third field encodes the operation + // in bit 12, the immediate in bits 0-11, and the shift op in 13-15. + // + // If this addressing mode is a frame index (before prolog/epilog insertion + // and code rewriting), this operand will have the form: FI#, reg0, + // with no shift amount for the frame offset. + // + static inline unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO) { + assert(Imm12 < (1 << 12) && "Imm too large!"); + bool isSub = Opc == sub; + return Imm12 | ((int)isSub << 12) | (SO << 13); + } + static inline unsigned getAM2Offset(unsigned AM2Opc) { + return AM2Opc & ((1 << 12)-1); + } + static inline AddrOpc getAM2Op(unsigned AM2Opc) { + return ((AM2Opc >> 12) & 1) ? sub : add; + } + static inline ShiftOpc getAM2ShiftOpc(unsigned AM2Opc) { + return (ShiftOpc)(AM2Opc >> 13); + } + + + //===--------------------------------------------------------------------===// + // Addressing Mode #3 + //===--------------------------------------------------------------------===// + // + // This is used for sign-extending loads, and load/store-pair instructions. + // + // addrmode3 := reg +/- reg + // addrmode3 := reg +/- imm8 + // + // The first operand is always a Reg. The second operand is a reg if in + // reg/reg form, otherwise it's reg#0. The third field encodes the operation + // in bit 8, the immediate in bits 0-7. + + /// getAM3Opc - This function encodes the addrmode3 opc field. + static inline unsigned getAM3Opc(AddrOpc Opc, unsigned char Offset) { + bool isSub = Opc == sub; + return ((int)isSub << 8) | Offset; + } + static inline unsigned char getAM3Offset(unsigned AM3Opc) { + return AM3Opc & 0xFF; + } + static inline AddrOpc getAM3Op(unsigned AM3Opc) { + return ((AM3Opc >> 8) & 1) ? sub : add; + } + + //===--------------------------------------------------------------------===// + // Addressing Mode #4 + //===--------------------------------------------------------------------===// + // + // This is used for load / store multiple instructions. + // + // addrmode4 := reg, + // + // The four modes are: + // IA - Increment after + // IB - Increment before + // DA - Decrement after + // DB - Decrement before + // + // If the 4th bit (writeback)is set, then the base register is updated after + // the memory transfer. + + static inline AMSubMode getAM4SubMode(unsigned Mode) { + return (AMSubMode)(Mode & 0x7); + } + + static inline unsigned getAM4ModeImm(AMSubMode SubMode, bool WB = false) { + return (int)SubMode | ((int)WB << 3); + } + + static inline bool getAM4WBFlag(unsigned Mode) { + return (Mode >> 3) & 1; + } + + //===--------------------------------------------------------------------===// + // Addressing Mode #5 + //===--------------------------------------------------------------------===// + // + // This is used for coprocessor instructions, such as FP load/stores. + // + // addrmode5 := reg +/- imm8*4 + // + // The first operand is always a Reg. The third field encodes the operation + // in bit 8, the immediate in bits 0-7. + // + // This can also be used for FP load/store multiple ops. The third field encodes + // writeback mode in bit 8, the number of registers (or 2 times the number of + // registers for DPR ops) in bits 0-7. In addition, bit 9-11 encodes one of the + // following two sub-modes: + // + // IA - Increment after + // DB - Decrement before + + /// getAM5Opc - This function encodes the addrmode5 opc field. + static inline unsigned getAM5Opc(AddrOpc Opc, unsigned char Offset) { + bool isSub = Opc == sub; + return ((int)isSub << 8) | Offset; + } + static inline unsigned char getAM5Offset(unsigned AM5Opc) { + return AM5Opc & 0xFF; + } + static inline AddrOpc getAM5Op(unsigned AM5Opc) { + return ((AM5Opc >> 8) & 1) ? sub : add; + } + + /// getAM5Opc - This function encodes the addrmode5 opc field for FLDM and + /// FSTM instructions. + static inline unsigned getAM5Opc(AMSubMode SubMode, bool WB, + unsigned char Offset) { + assert((SubMode == ia || SubMode == db) && + "Illegal addressing mode 5 sub-mode!"); + return ((int)SubMode << 9) | ((int)WB << 8) | Offset; + } + static inline AMSubMode getAM5SubMode(unsigned AM5Opc) { + return (AMSubMode)((AM5Opc >> 9) & 0x7); + } + static inline bool getAM5WBFlag(unsigned AM5Opc) { + return ((AM5Opc >> 8) & 1); + } + +} // end namespace ARM_AM +} // end namespace llvm + +#endif + diff --git a/lib/Target/ARM/ARMBuildAttrs.h b/lib/Target/ARM/ARMBuildAttrs.h new file mode 100644 index 000000000000..3b38375fbc71 --- /dev/null +++ b/lib/Target/ARM/ARMBuildAttrs.h @@ -0,0 +1,64 @@ +//===-------- ARMBuildAttrs.h - ARM Build Attributes ------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains enumerations and support routines for ARM build attributes +// as defined in ARM ABI addenda document (ABI release 2.07). +// +//===----------------------------------------------------------------------===// + +#ifndef __TARGET_ARMBUILDATTRS_H__ +#define __TARGET_ARMBUILDATTRS_H__ + +namespace ARMBuildAttrs { + enum { + File = 1, + Section = 2, + Symbol = 3, + CPU_raw_name = 4, + CPU_name = 5, + CPU_arch = 6, + CPU_arch_profile = 7, + ARM_ISA_use = 8, + THUMB_ISA_use = 9, + VFP_arch = 10, + WMMX_arch = 11, + Advanced_SIMD_arch = 12, + PCS_config = 13, + ABI_PCS_R9_use = 14, + ABI_PCS_RW_data = 15, + ABI_PCS_RO_data = 16, + ABI_PCS_GOT_use = 17, + ABI_PCS_wchar_t = 18, + ABI_FP_rounding = 19, + ABI_FP_denormal = 20, + ABI_FP_exceptions = 21, + ABI_FP_user_exceptions = 22, + ABI_FP_number_model = 23, + ABI_align8_needed = 24, + ABI_align8_preserved = 25, + ABI_enum_size = 26, + ABI_HardFP_use = 27, + ABI_VFP_args = 28, + ABI_WMMX_args = 29, + ABI_optimization_goals = 30, + ABI_FP_optimization_goals = 31, + compatibility = 32, + CPU_unaligned_access = 34, + VFP_HP_extension = 36, + ABI_FP_16bit_format = 38, + nodefaults = 64, + also_compatible_with = 65, + T2EE_use = 66, + conformance = 67, + Virtualization_use = 68, + MPextension_use = 70 + }; +} + +#endif // __TARGET_ARMBUILDATTRS_H__ diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td new file mode 100644 index 000000000000..6cd786eed4d5 --- /dev/null +++ b/lib/Target/ARM/ARMCallingConv.td @@ -0,0 +1,87 @@ +//===- ARMCallingConv.td - Calling Conventions for ARM ----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This describes the calling conventions for ARM architecture. +//===----------------------------------------------------------------------===// + +/// CCIfSubtarget - Match if the current subtarget has a feature F. +class CCIfSubtarget: + CCIf().", F), A>; + +/// CCIfAlign - Match of the original alignment of the arg +class CCIfAlign: + CCIf; + +//===----------------------------------------------------------------------===// +// ARM APCS Calling Convention +//===----------------------------------------------------------------------===// +def CC_ARM_APCS : CallingConv<[ + + CCIfType<[i8, i16], CCPromoteToType>, + + // f64 is passed in pairs of GPRs, possibly split onto the stack + CCIfType<[f64], CCCustom<"CC_ARM_APCS_Custom_f64">>, + + CCIfType<[f32], CCBitConvertToType>, + CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>, + + CCIfType<[i32], CCAssignToStack<4, 4>>, + CCIfType<[f64], CCAssignToStack<8, 4>> +]>; + +def RetCC_ARM_APCS : CallingConv<[ + CCIfType<[f32], CCBitConvertToType>, + CCIfType<[f64], CCCustom<"RetCC_ARM_APCS_Custom_f64">>, + + CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>, + CCIfType<[i64], CCAssignToRegWithShadow<[R0, R2], [R1, R3]>> +]>; + +//===----------------------------------------------------------------------===// +// ARM AAPCS (EABI) Calling Convention +//===----------------------------------------------------------------------===// +def CC_ARM_AAPCS : CallingConv<[ + + CCIfType<[i8, i16], CCPromoteToType>, + + // i64/f64 is passed in even pairs of GPRs + // i64 is 8-aligned i32 here, so we may need to eat R1 as a pad register + // (and the same is true for f64 if VFP is not enabled) + CCIfType<[i32], CCIfAlign<"8", CCAssignToRegWithShadow<[R0, R2], [R0, R1]>>>, + CCIfType<[f64], CCCustom<"CC_ARM_AAPCS_Custom_f64">>, + + CCIfType<[f32], CCBitConvertToType>, + CCIfType<[i32], CCIf<"State.getNextStackOffset() == 0 &&" + "ArgFlags.getOrigAlign() != 8", + CCAssignToReg<[R0, R1, R2, R3]>>>, + + CCIfType<[i32], CCAssignToStack<4, 4>>, + CCIfType<[f64], CCAssignToStack<8, 8>> +]>; + +def RetCC_ARM_AAPCS : CallingConv<[ + CCIfType<[f32], CCBitConvertToType>, + CCIfType<[f64], CCCustom<"RetCC_ARM_AAPCS_Custom_f64">>, + + CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>, + CCIfType<[i64], CCAssignToRegWithShadow<[R0, R2], [R1, R3]>> +]>; + +//===----------------------------------------------------------------------===// +// ARM Calling Convention Dispatch +//===----------------------------------------------------------------------===// + +def CC_ARM : CallingConv<[ + CCIfSubtarget<"isAAPCS_ABI()", CCDelegateTo>, + CCDelegateTo +]>; + +def RetCC_ARM : CallingConv<[ + CCIfSubtarget<"isAAPCS_ABI()", CCDelegateTo>, + CCDelegateTo +]>; diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp new file mode 100644 index 000000000000..44fac12019b3 --- /dev/null +++ b/lib/Target/ARM/ARMCodeEmitter.cpp @@ -0,0 +1,1411 @@ +//===-- ARM/ARMCodeEmitter.cpp - Convert ARM code to machine code ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the pass that transforms the ARM machine instructions into +// relocatable machine code. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "jit" +#include "ARM.h" +#include "ARMAddressingModes.h" +#include "ARMConstantPoolValue.h" +#include "ARMInstrInfo.h" +#include "ARMRelocations.h" +#include "ARMSubtarget.h" +#include "ARMTargetMachine.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/PassManager.h" +#include "llvm/CodeGen/MachineCodeEmitter.h" +#include "llvm/CodeGen/JITCodeEmitter.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#ifndef NDEBUG +#include +#endif +using namespace llvm; + +STATISTIC(NumEmitted, "Number of machine instructions emitted"); + +namespace { + + class ARMCodeEmitter { + public: + /// getBinaryCodeForInstr - This function, generated by the + /// CodeEmitterGenerator using TableGen, produces the binary encoding for + /// machine instructions. + unsigned getBinaryCodeForInstr(const MachineInstr &MI); + }; + + template + class VISIBILITY_HIDDEN Emitter : public MachineFunctionPass, + public ARMCodeEmitter { + ARMJITInfo *JTI; + const ARMInstrInfo *II; + const TargetData *TD; + TargetMachine &TM; + CodeEmitter &MCE; + const std::vector *MCPEs; + const std::vector *MJTEs; + bool IsPIC; + + public: + static char ID; + explicit Emitter(TargetMachine &tm, CodeEmitter &mce) + : MachineFunctionPass(&ID), JTI(0), II(0), TD(0), TM(tm), + MCE(mce), MCPEs(0), MJTEs(0), + IsPIC(TM.getRelocationModel() == Reloc::PIC_) {} + Emitter(TargetMachine &tm, CodeEmitter &mce, + const ARMInstrInfo &ii, const TargetData &td) + : MachineFunctionPass(&ID), JTI(0), II(&ii), TD(&td), TM(tm), + MCE(mce), MCPEs(0), MJTEs(0), + IsPIC(TM.getRelocationModel() == Reloc::PIC_) {} + + bool runOnMachineFunction(MachineFunction &MF); + + virtual const char *getPassName() const { + return "ARM Machine Code Emitter"; + } + + void emitInstruction(const MachineInstr &MI); + + private: + + void emitWordLE(unsigned Binary); + + void emitDWordLE(uint64_t Binary); + + void emitConstPoolInstruction(const MachineInstr &MI); + + void emitMOVi2piecesInstruction(const MachineInstr &MI); + + void emitLEApcrelJTInstruction(const MachineInstr &MI); + + void emitPseudoMoveInstruction(const MachineInstr &MI); + + void addPCLabel(unsigned LabelID); + + void emitPseudoInstruction(const MachineInstr &MI); + + unsigned getMachineSoRegOpValue(const MachineInstr &MI, + const TargetInstrDesc &TID, + const MachineOperand &MO, + unsigned OpIdx); + + unsigned getMachineSoImmOpValue(unsigned SoImm); + + unsigned getAddrModeSBit(const MachineInstr &MI, + const TargetInstrDesc &TID) const; + + void emitDataProcessingInstruction(const MachineInstr &MI, + unsigned ImplicitRd = 0, + unsigned ImplicitRn = 0); + + void emitLoadStoreInstruction(const MachineInstr &MI, + unsigned ImplicitRd = 0, + unsigned ImplicitRn = 0); + + void emitMiscLoadStoreInstruction(const MachineInstr &MI, + unsigned ImplicitRn = 0); + + void emitLoadStoreMultipleInstruction(const MachineInstr &MI); + + void emitMulFrmInstruction(const MachineInstr &MI); + + void emitExtendInstruction(const MachineInstr &MI); + + void emitMiscArithInstruction(const MachineInstr &MI); + + void emitBranchInstruction(const MachineInstr &MI); + + void emitInlineJumpTable(unsigned JTIndex); + + void emitMiscBranchInstruction(const MachineInstr &MI); + + void emitVFPArithInstruction(const MachineInstr &MI); + + void emitVFPConversionInstruction(const MachineInstr &MI); + + void emitVFPLoadStoreInstruction(const MachineInstr &MI); + + void emitVFPLoadStoreMultipleInstruction(const MachineInstr &MI); + + void emitMiscInstruction(const MachineInstr &MI); + + /// getMachineOpValue - Return binary encoding of operand. If the machine + /// operand requires relocation, record the relocation and return zero. + unsigned getMachineOpValue(const MachineInstr &MI,const MachineOperand &MO); + unsigned getMachineOpValue(const MachineInstr &MI, unsigned OpIdx) { + return getMachineOpValue(MI, MI.getOperand(OpIdx)); + } + + /// getShiftOp - Return the shift opcode (bit[6:5]) of the immediate value. + /// + unsigned getShiftOp(unsigned Imm) const ; + + /// Routines that handle operands which add machine relocations which are + /// fixed up by the relocation stage. + void emitGlobalAddress(GlobalValue *GV, unsigned Reloc, + bool NeedStub, intptr_t ACPV = 0); + void emitExternalSymbolAddress(const char *ES, unsigned Reloc); + void emitConstPoolAddress(unsigned CPI, unsigned Reloc); + void emitJumpTableAddress(unsigned JTIndex, unsigned Reloc); + void emitMachineBasicBlock(MachineBasicBlock *BB, unsigned Reloc, + intptr_t JTBase = 0); + }; + template + char Emitter::ID = 0; +} + +/// createARMCodeEmitterPass - Return a pass that emits the collected ARM code +/// to the specified MCE object. + +namespace llvm { + +FunctionPass *createARMCodeEmitterPass(ARMTargetMachine &TM, + MachineCodeEmitter &MCE) { + return new Emitter(TM, MCE); +} +FunctionPass *createARMJITCodeEmitterPass(ARMTargetMachine &TM, + JITCodeEmitter &JCE) { + return new Emitter(TM, JCE); +} + +} // end namespace llvm + +template +bool Emitter::runOnMachineFunction(MachineFunction &MF) { + assert((MF.getTarget().getRelocationModel() != Reloc::Default || + MF.getTarget().getRelocationModel() != Reloc::Static) && + "JIT relocation model must be set to static or default!"); + II = ((ARMTargetMachine&)MF.getTarget()).getInstrInfo(); + TD = ((ARMTargetMachine&)MF.getTarget()).getTargetData(); + JTI = ((ARMTargetMachine&)MF.getTarget()).getJITInfo(); + MCPEs = &MF.getConstantPool()->getConstants(); + MJTEs = &MF.getJumpTableInfo()->getJumpTables(); + IsPIC = TM.getRelocationModel() == Reloc::PIC_; + JTI->Initialize(MF, IsPIC); + + do { + DOUT << "JITTing function '" << MF.getFunction()->getName() << "'\n"; + MCE.startFunction(MF); + for (MachineFunction::iterator MBB = MF.begin(), E = MF.end(); + MBB != E; ++MBB) { + MCE.StartMachineBasicBlock(MBB); + for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end(); + I != E; ++I) + emitInstruction(*I); + } + } while (MCE.finishFunction(MF)); + + return false; +} + +/// getShiftOp - Return the shift opcode (bit[6:5]) of the immediate value. +/// +template +unsigned Emitter::getShiftOp(unsigned Imm) const { + switch (ARM_AM::getAM2ShiftOpc(Imm)) { + default: assert(0 && "Unknown shift opc!"); + case ARM_AM::asr: return 2; + case ARM_AM::lsl: return 0; + case ARM_AM::lsr: return 1; + case ARM_AM::ror: + case ARM_AM::rrx: return 3; + } + return 0; +} + +/// getMachineOpValue - Return binary encoding of operand. If the machine +/// operand requires relocation, record the relocation and return zero. +template +unsigned Emitter::getMachineOpValue(const MachineInstr &MI, + const MachineOperand &MO) { + if (MO.isReg()) + return ARMRegisterInfo::getRegisterNumbering(MO.getReg()); + else if (MO.isImm()) + return static_cast(MO.getImm()); + else if (MO.isGlobal()) + emitGlobalAddress(MO.getGlobal(), ARM::reloc_arm_branch, true); + else if (MO.isSymbol()) + emitExternalSymbolAddress(MO.getSymbolName(), ARM::reloc_arm_branch); + else if (MO.isCPI()) { + const TargetInstrDesc &TID = MI.getDesc(); + // For VFP load, the immediate offset is multiplied by 4. + unsigned Reloc = ((TID.TSFlags & ARMII::FormMask) == ARMII::VFPLdStFrm) + ? ARM::reloc_arm_vfp_cp_entry : ARM::reloc_arm_cp_entry; + emitConstPoolAddress(MO.getIndex(), Reloc); + } else if (MO.isJTI()) + emitJumpTableAddress(MO.getIndex(), ARM::reloc_arm_relative); + else if (MO.isMBB()) + emitMachineBasicBlock(MO.getMBB(), ARM::reloc_arm_branch); + else { + cerr << "ERROR: Unknown type of MachineOperand: " << MO << "\n"; + abort(); + } + return 0; +} + +/// emitGlobalAddress - Emit the specified address to the code stream. +/// +template +void Emitter::emitGlobalAddress(GlobalValue *GV, unsigned Reloc, + bool NeedStub, intptr_t ACPV) { + MCE.addRelocation(MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc, + GV, ACPV, NeedStub)); +} + +/// emitExternalSymbolAddress - Arrange for the address of an external symbol to +/// be emitted to the current location in the function, and allow it to be PC +/// relative. +template +void Emitter::emitExternalSymbolAddress(const char *ES, + unsigned Reloc) { + MCE.addRelocation(MachineRelocation::getExtSym(MCE.getCurrentPCOffset(), + Reloc, ES)); +} + +/// emitConstPoolAddress - Arrange for the address of an constant pool +/// to be emitted to the current location in the function, and allow it to be PC +/// relative. +template +void Emitter::emitConstPoolAddress(unsigned CPI, + unsigned Reloc) { + // Tell JIT emitter we'll resolve the address. + MCE.addRelocation(MachineRelocation::getConstPool(MCE.getCurrentPCOffset(), + Reloc, CPI, 0, true)); +} + +/// emitJumpTableAddress - Arrange for the address of a jump table to +/// be emitted to the current location in the function, and allow it to be PC +/// relative. +template +void Emitter::emitJumpTableAddress(unsigned JTIndex, + unsigned Reloc) { + MCE.addRelocation(MachineRelocation::getJumpTable(MCE.getCurrentPCOffset(), + Reloc, JTIndex, 0, true)); +} + +/// emitMachineBasicBlock - Emit the specified address basic block. +template +void Emitter::emitMachineBasicBlock(MachineBasicBlock *BB, + unsigned Reloc, intptr_t JTBase) { + MCE.addRelocation(MachineRelocation::getBB(MCE.getCurrentPCOffset(), + Reloc, BB, JTBase)); +} + +template +void Emitter::emitWordLE(unsigned Binary) { +#ifndef NDEBUG + DOUT << " 0x" << std::hex << std::setw(8) << std::setfill('0') + << Binary << std::dec << "\n"; +#endif + MCE.emitWordLE(Binary); +} + +template +void Emitter::emitDWordLE(uint64_t Binary) { +#ifndef NDEBUG + DOUT << " 0x" << std::hex << std::setw(8) << std::setfill('0') + << (unsigned)Binary << std::dec << "\n"; + DOUT << " 0x" << std::hex << std::setw(8) << std::setfill('0') + << (unsigned)(Binary >> 32) << std::dec << "\n"; +#endif + MCE.emitDWordLE(Binary); +} + +template +void Emitter::emitInstruction(const MachineInstr &MI) { + DOUT << "JIT: " << (void*)MCE.getCurrentPCValue() << ":\t" << MI; + + NumEmitted++; // Keep track of the # of mi's emitted + switch (MI.getDesc().TSFlags & ARMII::FormMask) { + default: { + assert(0 && "Unhandled instruction encoding format!"); + break; + } + case ARMII::Pseudo: + emitPseudoInstruction(MI); + break; + case ARMII::DPFrm: + case ARMII::DPSoRegFrm: + emitDataProcessingInstruction(MI); + break; + case ARMII::LdFrm: + case ARMII::StFrm: + emitLoadStoreInstruction(MI); + break; + case ARMII::LdMiscFrm: + case ARMII::StMiscFrm: + emitMiscLoadStoreInstruction(MI); + break; + case ARMII::LdStMulFrm: + emitLoadStoreMultipleInstruction(MI); + break; + case ARMII::MulFrm: + emitMulFrmInstruction(MI); + break; + case ARMII::ExtFrm: + emitExtendInstruction(MI); + break; + case ARMII::ArithMiscFrm: + emitMiscArithInstruction(MI); + break; + case ARMII::BrFrm: + emitBranchInstruction(MI); + break; + case ARMII::BrMiscFrm: + emitMiscBranchInstruction(MI); + break; + // VFP instructions. + case ARMII::VFPUnaryFrm: + case ARMII::VFPBinaryFrm: + emitVFPArithInstruction(MI); + break; + case ARMII::VFPConv1Frm: + case ARMII::VFPConv2Frm: + case ARMII::VFPConv3Frm: + case ARMII::VFPConv4Frm: + case ARMII::VFPConv5Frm: + emitVFPConversionInstruction(MI); + break; + case ARMII::VFPLdStFrm: + emitVFPLoadStoreInstruction(MI); + break; + case ARMII::VFPLdStMulFrm: + emitVFPLoadStoreMultipleInstruction(MI); + break; + case ARMII::VFPMiscFrm: + emitMiscInstruction(MI); + break; + } +} + +template +void Emitter::emitConstPoolInstruction(const MachineInstr &MI) { + unsigned CPI = MI.getOperand(0).getImm(); // CP instruction index. + unsigned CPIndex = MI.getOperand(1).getIndex(); // Actual cp entry index. + const MachineConstantPoolEntry &MCPE = (*MCPEs)[CPIndex]; + + // Remember the CONSTPOOL_ENTRY address for later relocation. + JTI->addConstantPoolEntryAddr(CPI, MCE.getCurrentPCValue()); + + // Emit constpool island entry. In most cases, the actual values will be + // resolved and relocated after code emission. + if (MCPE.isMachineConstantPoolEntry()) { + ARMConstantPoolValue *ACPV = + static_cast(MCPE.Val.MachineCPVal); + + DOUT << " ** ARM constant pool #" << CPI << " @ " + << (void*)MCE.getCurrentPCValue() << " " << *ACPV << '\n'; + + GlobalValue *GV = ACPV->getGV(); + if (GV) { + assert(!ACPV->isStub() && "Don't know how to deal this yet!"); + if (ACPV->isNonLazyPointer()) + MCE.addRelocation(MachineRelocation::getIndirectSymbol( + MCE.getCurrentPCOffset(), ARM::reloc_arm_machine_cp_entry, GV, + (intptr_t)ACPV, false)); + else + emitGlobalAddress(GV, ARM::reloc_arm_machine_cp_entry, + ACPV->isStub() || isa(GV), (intptr_t)ACPV); + } else { + assert(!ACPV->isNonLazyPointer() && "Don't know how to deal this yet!"); + emitExternalSymbolAddress(ACPV->getSymbol(), ARM::reloc_arm_absolute); + } + emitWordLE(0); + } else { + Constant *CV = MCPE.Val.ConstVal; + +#ifndef NDEBUG + DOUT << " ** Constant pool #" << CPI << " @ " + << (void*)MCE.getCurrentPCValue() << " "; + if (const Function *F = dyn_cast(CV)) + DOUT << F->getName(); + else + DOUT << *CV; + DOUT << '\n'; +#endif + + if (GlobalValue *GV = dyn_cast(CV)) { + emitGlobalAddress(GV, ARM::reloc_arm_absolute, isa(GV)); + emitWordLE(0); + } else if (const ConstantInt *CI = dyn_cast(CV)) { + uint32_t Val = *(uint32_t*)CI->getValue().getRawData(); + emitWordLE(Val); + } else if (const ConstantFP *CFP = dyn_cast(CV)) { + if (CFP->getType() == Type::FloatTy) + emitWordLE(CFP->getValueAPF().bitcastToAPInt().getZExtValue()); + else if (CFP->getType() == Type::DoubleTy) + emitDWordLE(CFP->getValueAPF().bitcastToAPInt().getZExtValue()); + else { + assert(0 && "Unable to handle this constantpool entry!"); + abort(); + } + } else { + assert(0 && "Unable to handle this constantpool entry!"); + abort(); + } + } +} + +template +void Emitter::emitMOVi2piecesInstruction(const MachineInstr &MI) { + const MachineOperand &MO0 = MI.getOperand(0); + const MachineOperand &MO1 = MI.getOperand(1); + assert(MO1.isImm() && "Not a valid so_imm value!"); + unsigned V1 = ARM_AM::getSOImmTwoPartFirst(MO1.getImm()); + unsigned V2 = ARM_AM::getSOImmTwoPartSecond(MO1.getImm()); + + // Emit the 'mov' instruction. + unsigned Binary = 0xd << 21; // mov: Insts{24-21} = 0b1101 + + // Set the conditional execution predicate. + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + // Encode Rd. + Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRdShift; + + // Encode so_imm. + // Set bit I(25) to identify this is the immediate form of + Binary |= 1 << ARMII::I_BitShift; + Binary |= getMachineSoImmOpValue(ARM_AM::getSOImmVal(V1)); + emitWordLE(Binary); + + // Now the 'orr' instruction. + Binary = 0xc << 21; // orr: Insts{24-21} = 0b1100 + + // Set the conditional execution predicate. + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + // Encode Rd. + Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRdShift; + + // Encode Rn. + Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRnShift; + + // Encode so_imm. + // Set bit I(25) to identify this is the immediate form of + Binary |= 1 << ARMII::I_BitShift; + Binary |= getMachineSoImmOpValue(ARM_AM::getSOImmVal(V2)); + emitWordLE(Binary); +} + +template +void Emitter::emitLEApcrelJTInstruction(const MachineInstr &MI) { + // It's basically add r, pc, (LJTI - $+8) + + const TargetInstrDesc &TID = MI.getDesc(); + + // Emit the 'add' instruction. + unsigned Binary = 0x4 << 21; // add: Insts{24-31} = 0b0100 + + // Set the conditional execution predicate + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + // Encode S bit if MI modifies CPSR. + Binary |= getAddrModeSBit(MI, TID); + + // Encode Rd. + Binary |= getMachineOpValue(MI, 0) << ARMII::RegRdShift; + + // Encode Rn which is PC. + Binary |= ARMRegisterInfo::getRegisterNumbering(ARM::PC) << ARMII::RegRnShift; + + // Encode the displacement. + // Set bit I(25) to identify this is the immediate form of . + Binary |= 1 << ARMII::I_BitShift; + emitJumpTableAddress(MI.getOperand(1).getIndex(), ARM::reloc_arm_jt_base); + + emitWordLE(Binary); +} + +template +void Emitter::emitPseudoMoveInstruction(const MachineInstr &MI) { + unsigned Opcode = MI.getDesc().Opcode; + + // Part of binary is determined by TableGn. + unsigned Binary = getBinaryCodeForInstr(MI); + + // Set the conditional execution predicate + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + // Encode S bit if MI modifies CPSR. + if (Opcode == ARM::MOVsrl_flag || Opcode == ARM::MOVsra_flag) + Binary |= 1 << ARMII::S_BitShift; + + // Encode register def if there is one. + Binary |= getMachineOpValue(MI, 0) << ARMII::RegRdShift; + + // Encode the shift operation. + switch (Opcode) { + default: break; + case ARM::MOVrx: + // rrx + Binary |= 0x6 << 4; + break; + case ARM::MOVsrl_flag: + // lsr #1 + Binary |= (0x2 << 4) | (1 << 7); + break; + case ARM::MOVsra_flag: + // asr #1 + Binary |= (0x4 << 4) | (1 << 7); + break; + } + + // Encode register Rm. + Binary |= getMachineOpValue(MI, 1); + + emitWordLE(Binary); +} + +template +void Emitter::addPCLabel(unsigned LabelID) { + DOUT << " ** LPC" << LabelID << " @ " + << (void*)MCE.getCurrentPCValue() << '\n'; + JTI->addPCLabelAddr(LabelID, MCE.getCurrentPCValue()); +} + +template +void Emitter::emitPseudoInstruction(const MachineInstr &MI) { + unsigned Opcode = MI.getDesc().Opcode; + switch (Opcode) { + default: + abort(); // FIXME: + case TargetInstrInfo::INLINEASM: { + // We allow inline assembler nodes with empty bodies - they can + // implicitly define registers, which is ok for JIT. + if (MI.getOperand(0).getSymbolName()[0]) { + assert(0 && "JIT does not support inline asm!\n"); + abort(); + } + break; + } + case TargetInstrInfo::DBG_LABEL: + case TargetInstrInfo::EH_LABEL: + MCE.emitLabel(MI.getOperand(0).getImm()); + break; + case TargetInstrInfo::IMPLICIT_DEF: + case TargetInstrInfo::DECLARE: + case ARM::DWARF_LOC: + // Do nothing. + break; + case ARM::CONSTPOOL_ENTRY: + emitConstPoolInstruction(MI); + break; + case ARM::PICADD: { + // Remember of the address of the PC label for relocation later. + addPCLabel(MI.getOperand(2).getImm()); + // PICADD is just an add instruction that implicitly read pc. + emitDataProcessingInstruction(MI, 0, ARM::PC); + break; + } + case ARM::PICLDR: + case ARM::PICLDRB: + case ARM::PICSTR: + case ARM::PICSTRB: { + // Remember of the address of the PC label for relocation later. + addPCLabel(MI.getOperand(2).getImm()); + // These are just load / store instructions that implicitly read pc. + emitLoadStoreInstruction(MI, 0, ARM::PC); + break; + } + case ARM::PICLDRH: + case ARM::PICLDRSH: + case ARM::PICLDRSB: + case ARM::PICSTRH: { + // Remember of the address of the PC label for relocation later. + addPCLabel(MI.getOperand(2).getImm()); + // These are just load / store instructions that implicitly read pc. + emitMiscLoadStoreInstruction(MI, ARM::PC); + break; + } + case ARM::MOVi2pieces: + // Two instructions to materialize a constant. + emitMOVi2piecesInstruction(MI); + break; + case ARM::LEApcrelJT: + // Materialize jumptable address. + emitLEApcrelJTInstruction(MI); + break; + case ARM::MOVrx: + case ARM::MOVsrl_flag: + case ARM::MOVsra_flag: + emitPseudoMoveInstruction(MI); + break; + } +} + +template +unsigned Emitter::getMachineSoRegOpValue( + const MachineInstr &MI, + const TargetInstrDesc &TID, + const MachineOperand &MO, + unsigned OpIdx) { + unsigned Binary = getMachineOpValue(MI, MO); + + const MachineOperand &MO1 = MI.getOperand(OpIdx + 1); + const MachineOperand &MO2 = MI.getOperand(OpIdx + 2); + ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(MO2.getImm()); + + // Encode the shift opcode. + unsigned SBits = 0; + unsigned Rs = MO1.getReg(); + if (Rs) { + // Set shift operand (bit[7:4]). + // LSL - 0001 + // LSR - 0011 + // ASR - 0101 + // ROR - 0111 + // RRX - 0110 and bit[11:8] clear. + switch (SOpc) { + default: assert(0 && "Unknown shift opc!"); + case ARM_AM::lsl: SBits = 0x1; break; + case ARM_AM::lsr: SBits = 0x3; break; + case ARM_AM::asr: SBits = 0x5; break; + case ARM_AM::ror: SBits = 0x7; break; + case ARM_AM::rrx: SBits = 0x6; break; + } + } else { + // Set shift operand (bit[6:4]). + // LSL - 000 + // LSR - 010 + // ASR - 100 + // ROR - 110 + switch (SOpc) { + default: assert(0 && "Unknown shift opc!"); + case ARM_AM::lsl: SBits = 0x0; break; + case ARM_AM::lsr: SBits = 0x2; break; + case ARM_AM::asr: SBits = 0x4; break; + case ARM_AM::ror: SBits = 0x6; break; + } + } + Binary |= SBits << 4; + if (SOpc == ARM_AM::rrx) + return Binary; + + // Encode the shift operation Rs or shift_imm (except rrx). + if (Rs) { + // Encode Rs bit[11:8]. + assert(ARM_AM::getSORegOffset(MO2.getImm()) == 0); + return Binary | + (ARMRegisterInfo::getRegisterNumbering(Rs) << ARMII::RegRsShift); + } + + // Encode shift_imm bit[11:7]. + return Binary | ARM_AM::getSORegOffset(MO2.getImm()) << 7; +} + +template +unsigned Emitter::getMachineSoImmOpValue(unsigned SoImm) { + // Encode rotate_imm. + unsigned Binary = (ARM_AM::getSOImmValRot(SoImm) >> 1) + << ARMII::SoRotImmShift; + + // Encode immed_8. + Binary |= ARM_AM::getSOImmValImm(SoImm); + return Binary; +} + +template +unsigned Emitter::getAddrModeSBit(const MachineInstr &MI, + const TargetInstrDesc &TID) const { + for (unsigned i = MI.getNumOperands(), e = TID.getNumOperands(); i != e; --i){ + const MachineOperand &MO = MI.getOperand(i-1); + if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR) + return 1 << ARMII::S_BitShift; + } + return 0; +} + +template +void Emitter::emitDataProcessingInstruction( + const MachineInstr &MI, + unsigned ImplicitRd, + unsigned ImplicitRn) { + const TargetInstrDesc &TID = MI.getDesc(); + + // Part of binary is determined by TableGn. + unsigned Binary = getBinaryCodeForInstr(MI); + + // Set the conditional execution predicate + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + // Encode S bit if MI modifies CPSR. + Binary |= getAddrModeSBit(MI, TID); + + // Encode register def if there is one. + unsigned NumDefs = TID.getNumDefs(); + unsigned OpIdx = 0; + if (NumDefs) + Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift; + else if (ImplicitRd) + // Special handling for implicit use (e.g. PC). + Binary |= (ARMRegisterInfo::getRegisterNumbering(ImplicitRd) + << ARMII::RegRdShift); + + // If this is a two-address operand, skip it. e.g. MOVCCr operand 1. + if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1) + ++OpIdx; + + // Encode first non-shifter register operand if there is one. + bool isUnary = TID.TSFlags & ARMII::UnaryDP; + if (!isUnary) { + if (ImplicitRn) + // Special handling for implicit use (e.g. PC). + Binary |= (ARMRegisterInfo::getRegisterNumbering(ImplicitRn) + << ARMII::RegRnShift); + else { + Binary |= getMachineOpValue(MI, OpIdx) << ARMII::RegRnShift; + ++OpIdx; + } + } + + // Encode shifter operand. + const MachineOperand &MO = MI.getOperand(OpIdx); + if ((TID.TSFlags & ARMII::FormMask) == ARMII::DPSoRegFrm) { + // Encode SoReg. + emitWordLE(Binary | getMachineSoRegOpValue(MI, TID, MO, OpIdx)); + return; + } + + if (MO.isReg()) { + // Encode register Rm. + emitWordLE(Binary | ARMRegisterInfo::getRegisterNumbering(MO.getReg())); + return; + } + + // Encode so_imm. + // Set bit I(25) to identify this is the immediate form of . + Binary |= 1 << ARMII::I_BitShift; + Binary |= getMachineSoImmOpValue(MO.getImm()); + + emitWordLE(Binary); +} + +template +void Emitter::emitLoadStoreInstruction( + const MachineInstr &MI, + unsigned ImplicitRd, + unsigned ImplicitRn) { + const TargetInstrDesc &TID = MI.getDesc(); + unsigned Form = TID.TSFlags & ARMII::FormMask; + bool IsPrePost = (TID.TSFlags & ARMII::IndexModeMask) != 0; + + // Part of binary is determined by TableGn. + unsigned Binary = getBinaryCodeForInstr(MI); + + // Set the conditional execution predicate + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + unsigned OpIdx = 0; + + // Operand 0 of a pre- and post-indexed store is the address base + // writeback. Skip it. + bool Skipped = false; + if (IsPrePost && Form == ARMII::StFrm) { + ++OpIdx; + Skipped = true; + } + + // Set first operand + if (ImplicitRd) + // Special handling for implicit use (e.g. PC). + Binary |= (ARMRegisterInfo::getRegisterNumbering(ImplicitRd) + << ARMII::RegRdShift); + else + Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift; + + // Set second operand + if (ImplicitRn) + // Special handling for implicit use (e.g. PC). + Binary |= (ARMRegisterInfo::getRegisterNumbering(ImplicitRn) + << ARMII::RegRnShift); + else + Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift; + + // If this is a two-address operand, skip it. e.g. LDR_PRE. + if (!Skipped && TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1) + ++OpIdx; + + const MachineOperand &MO2 = MI.getOperand(OpIdx); + unsigned AM2Opc = (ImplicitRn == ARM::PC) + ? 0 : MI.getOperand(OpIdx+1).getImm(); + + // Set bit U(23) according to sign of immed value (positive or negative). + Binary |= ((ARM_AM::getAM2Op(AM2Opc) == ARM_AM::add ? 1 : 0) << + ARMII::U_BitShift); + if (!MO2.getReg()) { // is immediate + if (ARM_AM::getAM2Offset(AM2Opc)) + // Set the value of offset_12 field + Binary |= ARM_AM::getAM2Offset(AM2Opc); + emitWordLE(Binary); + return; + } + + // Set bit I(25), because this is not in immediate enconding. + Binary |= 1 << ARMII::I_BitShift; + assert(TargetRegisterInfo::isPhysicalRegister(MO2.getReg())); + // Set bit[3:0] to the corresponding Rm register + Binary |= ARMRegisterInfo::getRegisterNumbering(MO2.getReg()); + + // If this instr is in scaled register offset/index instruction, set + // shift_immed(bit[11:7]) and shift(bit[6:5]) fields. + if (unsigned ShImm = ARM_AM::getAM2Offset(AM2Opc)) { + Binary |= getShiftOp(AM2Opc) << ARMII::ShiftImmShift; // shift + Binary |= ShImm << ARMII::ShiftShift; // shift_immed + } + + emitWordLE(Binary); +} + +template +void Emitter::emitMiscLoadStoreInstruction(const MachineInstr &MI, + unsigned ImplicitRn) { + const TargetInstrDesc &TID = MI.getDesc(); + unsigned Form = TID.TSFlags & ARMII::FormMask; + bool IsPrePost = (TID.TSFlags & ARMII::IndexModeMask) != 0; + + // Part of binary is determined by TableGn. + unsigned Binary = getBinaryCodeForInstr(MI); + + // Set the conditional execution predicate + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + unsigned OpIdx = 0; + + // Operand 0 of a pre- and post-indexed store is the address base + // writeback. Skip it. + bool Skipped = false; + if (IsPrePost && Form == ARMII::StMiscFrm) { + ++OpIdx; + Skipped = true; + } + + // Set first operand + Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift; + + // Set second operand + if (ImplicitRn) + // Special handling for implicit use (e.g. PC). + Binary |= (ARMRegisterInfo::getRegisterNumbering(ImplicitRn) + << ARMII::RegRnShift); + else + Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift; + + // If this is a two-address operand, skip it. e.g. LDRH_POST. + if (!Skipped && TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1) + ++OpIdx; + + const MachineOperand &MO2 = MI.getOperand(OpIdx); + unsigned AM3Opc = (ImplicitRn == ARM::PC) + ? 0 : MI.getOperand(OpIdx+1).getImm(); + + // Set bit U(23) according to sign of immed value (positive or negative) + Binary |= ((ARM_AM::getAM3Op(AM3Opc) == ARM_AM::add ? 1 : 0) << + ARMII::U_BitShift); + + // If this instr is in register offset/index encoding, set bit[3:0] + // to the corresponding Rm register. + if (MO2.getReg()) { + Binary |= ARMRegisterInfo::getRegisterNumbering(MO2.getReg()); + emitWordLE(Binary); + return; + } + + // This instr is in immediate offset/index encoding, set bit 22 to 1. + Binary |= 1 << ARMII::AM3_I_BitShift; + if (unsigned ImmOffs = ARM_AM::getAM3Offset(AM3Opc)) { + // Set operands + Binary |= (ImmOffs >> 4) << ARMII::ImmHiShift; // immedH + Binary |= (ImmOffs & 0xF); // immedL + } + + emitWordLE(Binary); +} + +static unsigned getAddrModeUPBits(unsigned Mode) { + unsigned Binary = 0; + + // Set addressing mode by modifying bits U(23) and P(24) + // IA - Increment after - bit U = 1 and bit P = 0 + // IB - Increment before - bit U = 1 and bit P = 1 + // DA - Decrement after - bit U = 0 and bit P = 0 + // DB - Decrement before - bit U = 0 and bit P = 1 + switch (Mode) { + default: assert(0 && "Unknown addressing sub-mode!"); + case ARM_AM::da: break; + case ARM_AM::db: Binary |= 0x1 << ARMII::P_BitShift; break; + case ARM_AM::ia: Binary |= 0x1 << ARMII::U_BitShift; break; + case ARM_AM::ib: Binary |= 0x3 << ARMII::U_BitShift; break; + } + + return Binary; +} + +template +void Emitter::emitLoadStoreMultipleInstruction( + const MachineInstr &MI) { + // Part of binary is determined by TableGn. + unsigned Binary = getBinaryCodeForInstr(MI); + + // Set the conditional execution predicate + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + // Set base address operand + Binary |= getMachineOpValue(MI, 0) << ARMII::RegRnShift; + + // Set addressing mode by modifying bits U(23) and P(24) + const MachineOperand &MO = MI.getOperand(1); + Binary |= getAddrModeUPBits(ARM_AM::getAM4SubMode(MO.getImm())); + + // Set bit W(21) + if (ARM_AM::getAM4WBFlag(MO.getImm())) + Binary |= 0x1 << ARMII::W_BitShift; + + // Set registers + for (unsigned i = 4, e = MI.getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI.getOperand(i); + if (!MO.isReg() || MO.isImplicit()) + break; + unsigned RegNum = ARMRegisterInfo::getRegisterNumbering(MO.getReg()); + assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) && + RegNum < 16); + Binary |= 0x1 << RegNum; + } + + emitWordLE(Binary); +} + +template +void Emitter::emitMulFrmInstruction(const MachineInstr &MI) { + const TargetInstrDesc &TID = MI.getDesc(); + + // Part of binary is determined by TableGn. + unsigned Binary = getBinaryCodeForInstr(MI); + + // Set the conditional execution predicate + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + // Encode S bit if MI modifies CPSR. + Binary |= getAddrModeSBit(MI, TID); + + // 32x32->64bit operations have two destination registers. The number + // of register definitions will tell us if that's what we're dealing with. + unsigned OpIdx = 0; + if (TID.getNumDefs() == 2) + Binary |= getMachineOpValue (MI, OpIdx++) << ARMII::RegRdLoShift; + + // Encode Rd + Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdHiShift; + + // Encode Rm + Binary |= getMachineOpValue(MI, OpIdx++); + + // Encode Rs + Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRsShift; + + // Many multiple instructions (e.g. MLA) have three src operands. Encode + // it as Rn (for multiply, that's in the same offset as RdLo. + if (TID.getNumOperands() > OpIdx && + !TID.OpInfo[OpIdx].isPredicate() && + !TID.OpInfo[OpIdx].isOptionalDef()) + Binary |= getMachineOpValue(MI, OpIdx) << ARMII::RegRdLoShift; + + emitWordLE(Binary); +} + +template +void Emitter::emitExtendInstruction(const MachineInstr &MI) { + const TargetInstrDesc &TID = MI.getDesc(); + + // Part of binary is determined by TableGn. + unsigned Binary = getBinaryCodeForInstr(MI); + + // Set the conditional execution predicate + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + unsigned OpIdx = 0; + + // Encode Rd + Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift; + + const MachineOperand &MO1 = MI.getOperand(OpIdx++); + const MachineOperand &MO2 = MI.getOperand(OpIdx); + if (MO2.isReg()) { + // Two register operand form. + // Encode Rn. + Binary |= getMachineOpValue(MI, MO1) << ARMII::RegRnShift; + + // Encode Rm. + Binary |= getMachineOpValue(MI, MO2); + ++OpIdx; + } else { + Binary |= getMachineOpValue(MI, MO1); + } + + // Encode rot imm (0, 8, 16, or 24) if it has a rotate immediate operand. + if (MI.getOperand(OpIdx).isImm() && + !TID.OpInfo[OpIdx].isPredicate() && + !TID.OpInfo[OpIdx].isOptionalDef()) + Binary |= (getMachineOpValue(MI, OpIdx) / 8) << ARMII::ExtRotImmShift; + + emitWordLE(Binary); +} + +template +void Emitter::emitMiscArithInstruction(const MachineInstr &MI) { + const TargetInstrDesc &TID = MI.getDesc(); + + // Part of binary is determined by TableGn. + unsigned Binary = getBinaryCodeForInstr(MI); + + // Set the conditional execution predicate + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + unsigned OpIdx = 0; + + // Encode Rd + Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift; + + const MachineOperand &MO = MI.getOperand(OpIdx++); + if (OpIdx == TID.getNumOperands() || + TID.OpInfo[OpIdx].isPredicate() || + TID.OpInfo[OpIdx].isOptionalDef()) { + // Encode Rm and it's done. + Binary |= getMachineOpValue(MI, MO); + emitWordLE(Binary); + return; + } + + // Encode Rn. + Binary |= getMachineOpValue(MI, MO) << ARMII::RegRnShift; + + // Encode Rm. + Binary |= getMachineOpValue(MI, OpIdx++); + + // Encode shift_imm. + unsigned ShiftAmt = MI.getOperand(OpIdx).getImm(); + assert(ShiftAmt < 32 && "shift_imm range is 0 to 31!"); + Binary |= ShiftAmt << ARMII::ShiftShift; + + emitWordLE(Binary); +} + +template +void Emitter::emitBranchInstruction(const MachineInstr &MI) { + const TargetInstrDesc &TID = MI.getDesc(); + + if (TID.Opcode == ARM::TPsoft) + abort(); // FIXME + + // Part of binary is determined by TableGn. + unsigned Binary = getBinaryCodeForInstr(MI); + + // Set the conditional execution predicate + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + // Set signed_immed_24 field + Binary |= getMachineOpValue(MI, 0); + + emitWordLE(Binary); +} + +template +void Emitter::emitInlineJumpTable(unsigned JTIndex) { + // Remember the base address of the inline jump table. + uintptr_t JTBase = MCE.getCurrentPCValue(); + JTI->addJumpTableBaseAddr(JTIndex, JTBase); + DOUT << " ** Jump Table #" << JTIndex << " @ " << (void*)JTBase << '\n'; + + // Now emit the jump table entries. + const std::vector &MBBs = (*MJTEs)[JTIndex].MBBs; + for (unsigned i = 0, e = MBBs.size(); i != e; ++i) { + if (IsPIC) + // DestBB address - JT base. + emitMachineBasicBlock(MBBs[i], ARM::reloc_arm_pic_jt, JTBase); + else + // Absolute DestBB address. + emitMachineBasicBlock(MBBs[i], ARM::reloc_arm_absolute); + emitWordLE(0); + } +} + +template +void Emitter::emitMiscBranchInstruction(const MachineInstr &MI) { + const TargetInstrDesc &TID = MI.getDesc(); + + // Handle jump tables. + if (TID.Opcode == ARM::BR_JTr || TID.Opcode == ARM::BR_JTadd) { + // First emit a ldr pc, [] instruction. + emitDataProcessingInstruction(MI, ARM::PC); + + // Then emit the inline jump table. + unsigned JTIndex = (TID.Opcode == ARM::BR_JTr) + ? MI.getOperand(1).getIndex() : MI.getOperand(2).getIndex(); + emitInlineJumpTable(JTIndex); + return; + } else if (TID.Opcode == ARM::BR_JTm) { + // First emit a ldr pc, [] instruction. + emitLoadStoreInstruction(MI, ARM::PC); + + // Then emit the inline jump table. + emitInlineJumpTable(MI.getOperand(3).getIndex()); + return; + } + + // Part of binary is determined by TableGn. + unsigned Binary = getBinaryCodeForInstr(MI); + + // Set the conditional execution predicate + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + if (TID.Opcode == ARM::BX_RET) + // The return register is LR. + Binary |= ARMRegisterInfo::getRegisterNumbering(ARM::LR); + else + // otherwise, set the return register + Binary |= getMachineOpValue(MI, 0); + + emitWordLE(Binary); +} + +static unsigned encodeVFPRd(const MachineInstr &MI, unsigned OpIdx) { + unsigned RegD = MI.getOperand(OpIdx).getReg(); + unsigned Binary = 0; + bool isSPVFP = false; + RegD = ARMRegisterInfo::getRegisterNumbering(RegD, isSPVFP); + if (!isSPVFP) + Binary |= RegD << ARMII::RegRdShift; + else { + Binary |= ((RegD & 0x1E) >> 1) << ARMII::RegRdShift; + Binary |= (RegD & 0x01) << ARMII::D_BitShift; + } + return Binary; +} + +static unsigned encodeVFPRn(const MachineInstr &MI, unsigned OpIdx) { + unsigned RegN = MI.getOperand(OpIdx).getReg(); + unsigned Binary = 0; + bool isSPVFP = false; + RegN = ARMRegisterInfo::getRegisterNumbering(RegN, isSPVFP); + if (!isSPVFP) + Binary |= RegN << ARMII::RegRnShift; + else { + Binary |= ((RegN & 0x1E) >> 1) << ARMII::RegRnShift; + Binary |= (RegN & 0x01) << ARMII::N_BitShift; + } + return Binary; +} + +static unsigned encodeVFPRm(const MachineInstr &MI, unsigned OpIdx) { + unsigned RegM = MI.getOperand(OpIdx).getReg(); + unsigned Binary = 0; + bool isSPVFP = false; + RegM = ARMRegisterInfo::getRegisterNumbering(RegM, isSPVFP); + if (!isSPVFP) + Binary |= RegM; + else { + Binary |= ((RegM & 0x1E) >> 1); + Binary |= (RegM & 0x01) << ARMII::M_BitShift; + } + return Binary; +} + +template +void Emitter::emitVFPArithInstruction(const MachineInstr &MI) { + const TargetInstrDesc &TID = MI.getDesc(); + + // Part of binary is determined by TableGn. + unsigned Binary = getBinaryCodeForInstr(MI); + + // Set the conditional execution predicate + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + unsigned OpIdx = 0; + assert((Binary & ARMII::D_BitShift) == 0 && + (Binary & ARMII::N_BitShift) == 0 && + (Binary & ARMII::M_BitShift) == 0 && "VFP encoding bug!"); + + // Encode Dd / Sd. + Binary |= encodeVFPRd(MI, OpIdx++); + + // If this is a two-address operand, skip it, e.g. FMACD. + if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1) + ++OpIdx; + + // Encode Dn / Sn. + if ((TID.TSFlags & ARMII::FormMask) == ARMII::VFPBinaryFrm) + Binary |= encodeVFPRn(MI, OpIdx++); + + if (OpIdx == TID.getNumOperands() || + TID.OpInfo[OpIdx].isPredicate() || + TID.OpInfo[OpIdx].isOptionalDef()) { + // FCMPEZD etc. has only one operand. + emitWordLE(Binary); + return; + } + + // Encode Dm / Sm. + Binary |= encodeVFPRm(MI, OpIdx); + + emitWordLE(Binary); +} + +template +void Emitter::emitVFPConversionInstruction( + const MachineInstr &MI) { + const TargetInstrDesc &TID = MI.getDesc(); + unsigned Form = TID.TSFlags & ARMII::FormMask; + + // Part of binary is determined by TableGn. + unsigned Binary = getBinaryCodeForInstr(MI); + + // Set the conditional execution predicate + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + switch (Form) { + default: break; + case ARMII::VFPConv1Frm: + case ARMII::VFPConv2Frm: + case ARMII::VFPConv3Frm: + // Encode Dd / Sd. + Binary |= encodeVFPRd(MI, 0); + break; + case ARMII::VFPConv4Frm: + // Encode Dn / Sn. + Binary |= encodeVFPRn(MI, 0); + break; + case ARMII::VFPConv5Frm: + // Encode Dm / Sm. + Binary |= encodeVFPRm(MI, 0); + break; + } + + switch (Form) { + default: break; + case ARMII::VFPConv1Frm: + // Encode Dm / Sm. + Binary |= encodeVFPRm(MI, 1); + break; + case ARMII::VFPConv2Frm: + case ARMII::VFPConv3Frm: + // Encode Dn / Sn. + Binary |= encodeVFPRn(MI, 1); + break; + case ARMII::VFPConv4Frm: + case ARMII::VFPConv5Frm: + // Encode Dd / Sd. + Binary |= encodeVFPRd(MI, 1); + break; + } + + if (Form == ARMII::VFPConv5Frm) + // Encode Dn / Sn. + Binary |= encodeVFPRn(MI, 2); + else if (Form == ARMII::VFPConv3Frm) + // Encode Dm / Sm. + Binary |= encodeVFPRm(MI, 2); + + emitWordLE(Binary); +} + +template +void Emitter::emitVFPLoadStoreInstruction(const MachineInstr &MI) { + // Part of binary is determined by TableGn. + unsigned Binary = getBinaryCodeForInstr(MI); + + // Set the conditional execution predicate + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + unsigned OpIdx = 0; + + // Encode Dd / Sd. + Binary |= encodeVFPRd(MI, OpIdx++); + + // Encode address base. + const MachineOperand &Base = MI.getOperand(OpIdx++); + Binary |= getMachineOpValue(MI, Base) << ARMII::RegRnShift; + + // If there is a non-zero immediate offset, encode it. + if (Base.isReg()) { + const MachineOperand &Offset = MI.getOperand(OpIdx); + if (unsigned ImmOffs = ARM_AM::getAM5Offset(Offset.getImm())) { + if (ARM_AM::getAM5Op(Offset.getImm()) == ARM_AM::add) + Binary |= 1 << ARMII::U_BitShift; + Binary |= ImmOffs; + emitWordLE(Binary); + return; + } + } + + // If immediate offset is omitted, default to +0. + Binary |= 1 << ARMII::U_BitShift; + + emitWordLE(Binary); +} + +template +void Emitter::emitVFPLoadStoreMultipleInstruction( + const MachineInstr &MI) { + // Part of binary is determined by TableGn. + unsigned Binary = getBinaryCodeForInstr(MI); + + // Set the conditional execution predicate + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + // Set base address operand + Binary |= getMachineOpValue(MI, 0) << ARMII::RegRnShift; + + // Set addressing mode by modifying bits U(23) and P(24) + const MachineOperand &MO = MI.getOperand(1); + Binary |= getAddrModeUPBits(ARM_AM::getAM5SubMode(MO.getImm())); + + // Set bit W(21) + if (ARM_AM::getAM5WBFlag(MO.getImm())) + Binary |= 0x1 << ARMII::W_BitShift; + + // First register is encoded in Dd. + Binary |= encodeVFPRd(MI, 4); + + // Number of registers are encoded in offset field. + unsigned NumRegs = 1; + for (unsigned i = 5, e = MI.getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI.getOperand(i); + if (!MO.isReg() || MO.isImplicit()) + break; + ++NumRegs; + } + Binary |= NumRegs * 2; + + emitWordLE(Binary); +} + +template +void Emitter::emitMiscInstruction(const MachineInstr &MI) { + // Part of binary is determined by TableGn. + unsigned Binary = getBinaryCodeForInstr(MI); + + // Set the conditional execution predicate + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + emitWordLE(Binary); +} + +#include "ARMGenCodeEmitter.inc" + diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp new file mode 100644 index 000000000000..db723fe8325e --- /dev/null +++ b/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -0,0 +1,1285 @@ +//===-- ARMConstantIslandPass.cpp - ARM constant islands --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a pass that splits the constant pool up into 'islands' +// which are scattered through-out the function. This is required due to the +// limited pc-relative displacements that ARM has. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "arm-cp-islands" +#include "ARM.h" +#include "ARMMachineFunctionInfo.h" +#include "ARMInstrInfo.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Statistic.h" +using namespace llvm; + +STATISTIC(NumCPEs, "Number of constpool entries"); +STATISTIC(NumSplit, "Number of uncond branches inserted"); +STATISTIC(NumCBrFixed, "Number of cond branches fixed"); +STATISTIC(NumUBrFixed, "Number of uncond branches fixed"); + +namespace { + /// ARMConstantIslands - Due to limited PC-relative displacements, ARM + /// requires constant pool entries to be scattered among the instructions + /// inside a function. To do this, it completely ignores the normal LLVM + /// constant pool; instead, it places constants wherever it feels like with + /// special instructions. + /// + /// The terminology used in this pass includes: + /// Islands - Clumps of constants placed in the function. + /// Water - Potential places where an island could be formed. + /// CPE - A constant pool entry that has been placed somewhere, which + /// tracks a list of users. + class VISIBILITY_HIDDEN ARMConstantIslands : public MachineFunctionPass { + /// BBSizes - The size of each MachineBasicBlock in bytes of code, indexed + /// by MBB Number. The two-byte pads required for Thumb alignment are + /// counted as part of the following block (i.e., the offset and size for + /// a padded block will both be ==2 mod 4). + std::vector BBSizes; + + /// BBOffsets - the offset of each MBB in bytes, starting from 0. + /// The two-byte pads required for Thumb alignment are counted as part of + /// the following block. + std::vector BBOffsets; + + /// WaterList - A sorted list of basic blocks where islands could be placed + /// (i.e. blocks that don't fall through to the following block, due + /// to a return, unreachable, or unconditional branch). + std::vector WaterList; + + /// CPUser - One user of a constant pool, keeping the machine instruction + /// pointer, the constant pool being referenced, and the max displacement + /// allowed from the instruction to the CP. + struct CPUser { + MachineInstr *MI; + MachineInstr *CPEMI; + unsigned MaxDisp; + CPUser(MachineInstr *mi, MachineInstr *cpemi, unsigned maxdisp) + : MI(mi), CPEMI(cpemi), MaxDisp(maxdisp) {} + }; + + /// CPUsers - Keep track of all of the machine instructions that use various + /// constant pools and their max displacement. + std::vector CPUsers; + + /// CPEntry - One per constant pool entry, keeping the machine instruction + /// pointer, the constpool index, and the number of CPUser's which + /// reference this entry. + struct CPEntry { + MachineInstr *CPEMI; + unsigned CPI; + unsigned RefCount; + CPEntry(MachineInstr *cpemi, unsigned cpi, unsigned rc = 0) + : CPEMI(cpemi), CPI(cpi), RefCount(rc) {} + }; + + /// CPEntries - Keep track of all of the constant pool entry machine + /// instructions. For each original constpool index (i.e. those that + /// existed upon entry to this pass), it keeps a vector of entries. + /// Original elements are cloned as we go along; the clones are + /// put in the vector of the original element, but have distinct CPIs. + std::vector > CPEntries; + + /// ImmBranch - One per immediate branch, keeping the machine instruction + /// pointer, conditional or unconditional, the max displacement, + /// and (if isCond is true) the corresponding unconditional branch + /// opcode. + struct ImmBranch { + MachineInstr *MI; + unsigned MaxDisp : 31; + bool isCond : 1; + int UncondBr; + ImmBranch(MachineInstr *mi, unsigned maxdisp, bool cond, int ubr) + : MI(mi), MaxDisp(maxdisp), isCond(cond), UncondBr(ubr) {} + }; + + /// ImmBranches - Keep track of all the immediate branch instructions. + /// + std::vector ImmBranches; + + /// PushPopMIs - Keep track of all the Thumb push / pop instructions. + /// + SmallVector PushPopMIs; + + /// HasFarJump - True if any far jump instruction has been emitted during + /// the branch fix up pass. + bool HasFarJump; + + const TargetInstrInfo *TII; + ARMFunctionInfo *AFI; + bool isThumb; + public: + static char ID; + ARMConstantIslands() : MachineFunctionPass(&ID) {} + + virtual bool runOnMachineFunction(MachineFunction &Fn); + + virtual const char *getPassName() const { + return "ARM constant island placement and branch shortening pass"; + } + + private: + void DoInitialPlacement(MachineFunction &Fn, + std::vector &CPEMIs); + CPEntry *findConstPoolEntry(unsigned CPI, const MachineInstr *CPEMI); + void InitialFunctionScan(MachineFunction &Fn, + const std::vector &CPEMIs); + MachineBasicBlock *SplitBlockBeforeInstr(MachineInstr *MI); + void UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB); + void AdjustBBOffsetsAfter(MachineBasicBlock *BB, int delta); + bool DecrementOldEntry(unsigned CPI, MachineInstr* CPEMI); + int LookForExistingCPEntry(CPUser& U, unsigned UserOffset); + bool LookForWater(CPUser&U, unsigned UserOffset, + MachineBasicBlock** NewMBB); + MachineBasicBlock* AcceptWater(MachineBasicBlock *WaterBB, + std::vector::iterator IP); + void CreateNewWater(unsigned CPUserIndex, unsigned UserOffset, + MachineBasicBlock** NewMBB); + bool HandleConstantPoolUser(MachineFunction &Fn, unsigned CPUserIndex); + void RemoveDeadCPEMI(MachineInstr *CPEMI); + bool RemoveUnusedCPEntries(); + bool CPEIsInRange(MachineInstr *MI, unsigned UserOffset, + MachineInstr *CPEMI, unsigned Disp, + bool DoDump); + bool WaterIsInRange(unsigned UserOffset, MachineBasicBlock *Water, + CPUser &U); + bool OffsetIsInRange(unsigned UserOffset, unsigned TrialOffset, + unsigned Disp, bool NegativeOK); + bool BBIsInRange(MachineInstr *MI, MachineBasicBlock *BB, unsigned Disp); + bool FixUpImmediateBr(MachineFunction &Fn, ImmBranch &Br); + bool FixUpConditionalBr(MachineFunction &Fn, ImmBranch &Br); + bool FixUpUnconditionalBr(MachineFunction &Fn, ImmBranch &Br); + bool UndoLRSpillRestore(); + + unsigned GetOffsetOf(MachineInstr *MI) const; + void dumpBBs(); + void verify(MachineFunction &Fn); + }; + char ARMConstantIslands::ID = 0; +} + +/// verify - check BBOffsets, BBSizes, alignment of islands +void ARMConstantIslands::verify(MachineFunction &Fn) { + assert(BBOffsets.size() == BBSizes.size()); + for (unsigned i = 1, e = BBOffsets.size(); i != e; ++i) + assert(BBOffsets[i-1]+BBSizes[i-1] == BBOffsets[i]); + if (isThumb) { + for (MachineFunction::iterator MBBI = Fn.begin(), E = Fn.end(); + MBBI != E; ++MBBI) { + MachineBasicBlock *MBB = MBBI; + if (!MBB->empty() && + MBB->begin()->getOpcode() == ARM::CONSTPOOL_ENTRY) + assert((BBOffsets[MBB->getNumber()]%4 == 0 && + BBSizes[MBB->getNumber()]%4 == 0) || + (BBOffsets[MBB->getNumber()]%4 != 0 && + BBSizes[MBB->getNumber()]%4 != 0)); + } + } +} + +/// print block size and offset information - debugging +void ARMConstantIslands::dumpBBs() { + for (unsigned J = 0, E = BBOffsets.size(); J !=E; ++J) { + DOUT << "block " << J << " offset " << BBOffsets[J] << + " size " << BBSizes[J] << "\n"; + } +} + +/// createARMConstantIslandPass - returns an instance of the constpool +/// island pass. +FunctionPass *llvm::createARMConstantIslandPass() { + return new ARMConstantIslands(); +} + +bool ARMConstantIslands::runOnMachineFunction(MachineFunction &Fn) { + MachineConstantPool &MCP = *Fn.getConstantPool(); + + TII = Fn.getTarget().getInstrInfo(); + AFI = Fn.getInfo(); + isThumb = AFI->isThumbFunction(); + + HasFarJump = false; + + // Renumber all of the machine basic blocks in the function, guaranteeing that + // the numbers agree with the position of the block in the function. + Fn.RenumberBlocks(); + + /// Thumb functions containing constant pools get 2-byte alignment. + /// This is so we can keep exact track of where the alignment padding goes. + /// Set default. + AFI->setAlign(isThumb ? 1U : 2U); + + // Perform the initial placement of the constant pool entries. To start with, + // we put them all at the end of the function. + std::vector CPEMIs; + if (!MCP.isEmpty()) { + DoInitialPlacement(Fn, CPEMIs); + if (isThumb) + AFI->setAlign(2U); + } + + /// The next UID to take is the first unused one. + AFI->initConstPoolEntryUId(CPEMIs.size()); + + // Do the initial scan of the function, building up information about the + // sizes of each block, the location of all the water, and finding all of the + // constant pool users. + InitialFunctionScan(Fn, CPEMIs); + CPEMIs.clear(); + + /// Remove dead constant pool entries. + RemoveUnusedCPEntries(); + + // Iteratively place constant pool entries and fix up branches until there + // is no change. + bool MadeChange = false; + while (true) { + bool Change = false; + for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) + Change |= HandleConstantPoolUser(Fn, i); + DEBUG(dumpBBs()); + for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i) + Change |= FixUpImmediateBr(Fn, ImmBranches[i]); + DEBUG(dumpBBs()); + if (!Change) + break; + MadeChange = true; + } + + // After a while, this might be made debug-only, but it is not expensive. + verify(Fn); + + // If LR has been forced spilled and no far jumps (i.e. BL) has been issued. + // Undo the spill / restore of LR if possible. + if (!HasFarJump && AFI->isLRSpilledForFarJump() && isThumb) + MadeChange |= UndoLRSpillRestore(); + + BBSizes.clear(); + BBOffsets.clear(); + WaterList.clear(); + CPUsers.clear(); + CPEntries.clear(); + ImmBranches.clear(); + PushPopMIs.clear(); + + return MadeChange; +} + +/// DoInitialPlacement - Perform the initial placement of the constant pool +/// entries. To start with, we put them all at the end of the function. +void ARMConstantIslands::DoInitialPlacement(MachineFunction &Fn, + std::vector &CPEMIs) { + // Create the basic block to hold the CPE's. + MachineBasicBlock *BB = Fn.CreateMachineBasicBlock(); + Fn.push_back(BB); + + // Add all of the constants from the constant pool to the end block, use an + // identity mapping of CPI's to CPE's. + const std::vector &CPs = + Fn.getConstantPool()->getConstants(); + + const TargetData &TD = *Fn.getTarget().getTargetData(); + for (unsigned i = 0, e = CPs.size(); i != e; ++i) { + unsigned Size = TD.getTypeAllocSize(CPs[i].getType()); + // Verify that all constant pool entries are a multiple of 4 bytes. If not, + // we would have to pad them out or something so that instructions stay + // aligned. + assert((Size & 3) == 0 && "CP Entry not multiple of 4 bytes!"); + MachineInstr *CPEMI = + BuildMI(BB, DebugLoc::getUnknownLoc(), TII->get(ARM::CONSTPOOL_ENTRY)) + .addImm(i).addConstantPoolIndex(i).addImm(Size); + CPEMIs.push_back(CPEMI); + + // Add a new CPEntry, but no corresponding CPUser yet. + std::vector CPEs; + CPEs.push_back(CPEntry(CPEMI, i)); + CPEntries.push_back(CPEs); + NumCPEs++; + DOUT << "Moved CPI#" << i << " to end of function as #" << i << "\n"; + } +} + +/// BBHasFallthrough - Return true if the specified basic block can fallthrough +/// into the block immediately after it. +static bool BBHasFallthrough(MachineBasicBlock *MBB) { + // Get the next machine basic block in the function. + MachineFunction::iterator MBBI = MBB; + if (next(MBBI) == MBB->getParent()->end()) // Can't fall off end of function. + return false; + + MachineBasicBlock *NextBB = next(MBBI); + for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(), + E = MBB->succ_end(); I != E; ++I) + if (*I == NextBB) + return true; + + return false; +} + +/// findConstPoolEntry - Given the constpool index and CONSTPOOL_ENTRY MI, +/// look up the corresponding CPEntry. +ARMConstantIslands::CPEntry +*ARMConstantIslands::findConstPoolEntry(unsigned CPI, + const MachineInstr *CPEMI) { + std::vector &CPEs = CPEntries[CPI]; + // Number of entries per constpool index should be small, just do a + // linear search. + for (unsigned i = 0, e = CPEs.size(); i != e; ++i) { + if (CPEs[i].CPEMI == CPEMI) + return &CPEs[i]; + } + return NULL; +} + +/// InitialFunctionScan - Do the initial scan of the function, building up +/// information about the sizes of each block, the location of all the water, +/// and finding all of the constant pool users. +void ARMConstantIslands::InitialFunctionScan(MachineFunction &Fn, + const std::vector &CPEMIs) { + unsigned Offset = 0; + for (MachineFunction::iterator MBBI = Fn.begin(), E = Fn.end(); + MBBI != E; ++MBBI) { + MachineBasicBlock &MBB = *MBBI; + + // If this block doesn't fall through into the next MBB, then this is + // 'water' that a constant pool island could be placed. + if (!BBHasFallthrough(&MBB)) + WaterList.push_back(&MBB); + + unsigned MBBSize = 0; + for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); + I != E; ++I) { + // Add instruction size to MBBSize. + MBBSize += TII->GetInstSizeInBytes(I); + + int Opc = I->getOpcode(); + if (I->getDesc().isBranch()) { + bool isCond = false; + unsigned Bits = 0; + unsigned Scale = 1; + int UOpc = Opc; + switch (Opc) { + case ARM::tBR_JTr: + // A Thumb table jump may involve padding; for the offsets to + // be right, functions containing these must be 4-byte aligned. + AFI->setAlign(2U); + if ((Offset+MBBSize)%4 != 0) + MBBSize += 2; // padding + continue; // Does not get an entry in ImmBranches + default: + continue; // Ignore other JT branches + case ARM::Bcc: + isCond = true; + UOpc = ARM::B; + // Fallthrough + case ARM::B: + Bits = 24; + Scale = 4; + break; + case ARM::tBcc: + isCond = true; + UOpc = ARM::tB; + Bits = 8; + Scale = 2; + break; + case ARM::tB: + Bits = 11; + Scale = 2; + break; + } + + // Record this immediate branch. + unsigned MaxOffs = ((1 << (Bits-1))-1) * Scale; + ImmBranches.push_back(ImmBranch(I, MaxOffs, isCond, UOpc)); + } + + if (Opc == ARM::tPUSH || Opc == ARM::tPOP_RET) + PushPopMIs.push_back(I); + + // Scan the instructions for constant pool operands. + for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) + if (I->getOperand(op).isCPI()) { + // We found one. The addressing mode tells us the max displacement + // from the PC that this instruction permits. + + // Basic size info comes from the TSFlags field. + unsigned Bits = 0; + unsigned Scale = 1; + unsigned TSFlags = I->getDesc().TSFlags; + switch (TSFlags & ARMII::AddrModeMask) { + default: + // Constant pool entries can reach anything. + if (I->getOpcode() == ARM::CONSTPOOL_ENTRY) + continue; + if (I->getOpcode() == ARM::tLEApcrel) { + Bits = 8; // Taking the address of a CP entry. + break; + } + assert(0 && "Unknown addressing mode for CP reference!"); + case ARMII::AddrMode1: // AM1: 8 bits << 2 + Bits = 8; + Scale = 4; // Taking the address of a CP entry. + break; + case ARMII::AddrMode2: + Bits = 12; // +-offset_12 + break; + case ARMII::AddrMode3: + Bits = 8; // +-offset_8 + break; + // addrmode4 has no immediate offset. + case ARMII::AddrMode5: + Bits = 8; + Scale = 4; // +-(offset_8*4) + break; + case ARMII::AddrModeT1: + Bits = 5; // +offset_5 + break; + case ARMII::AddrModeT2: + Bits = 5; + Scale = 2; // +(offset_5*2) + break; + case ARMII::AddrModeT4: + Bits = 5; + Scale = 4; // +(offset_5*4) + break; + case ARMII::AddrModeTs: + Bits = 8; + Scale = 4; // +(offset_8*4) + break; + } + + // Remember that this is a user of a CP entry. + unsigned CPI = I->getOperand(op).getIndex(); + MachineInstr *CPEMI = CPEMIs[CPI]; + unsigned MaxOffs = ((1 << Bits)-1) * Scale; + CPUsers.push_back(CPUser(I, CPEMI, MaxOffs)); + + // Increment corresponding CPEntry reference count. + CPEntry *CPE = findConstPoolEntry(CPI, CPEMI); + assert(CPE && "Cannot find a corresponding CPEntry!"); + CPE->RefCount++; + + // Instructions can only use one CP entry, don't bother scanning the + // rest of the operands. + break; + } + } + + // In thumb mode, if this block is a constpool island, we may need padding + // so it's aligned on 4 byte boundary. + if (isThumb && + !MBB.empty() && + MBB.begin()->getOpcode() == ARM::CONSTPOOL_ENTRY && + (Offset%4) != 0) + MBBSize += 2; + + BBSizes.push_back(MBBSize); + BBOffsets.push_back(Offset); + Offset += MBBSize; + } +} + +/// GetOffsetOf - Return the current offset of the specified machine instruction +/// from the start of the function. This offset changes as stuff is moved +/// around inside the function. +unsigned ARMConstantIslands::GetOffsetOf(MachineInstr *MI) const { + MachineBasicBlock *MBB = MI->getParent(); + + // The offset is composed of two things: the sum of the sizes of all MBB's + // before this instruction's block, and the offset from the start of the block + // it is in. + unsigned Offset = BBOffsets[MBB->getNumber()]; + + // If we're looking for a CONSTPOOL_ENTRY in Thumb, see if this block has + // alignment padding, and compensate if so. + if (isThumb && + MI->getOpcode() == ARM::CONSTPOOL_ENTRY && + Offset%4 != 0) + Offset += 2; + + // Sum instructions before MI in MBB. + for (MachineBasicBlock::iterator I = MBB->begin(); ; ++I) { + assert(I != MBB->end() && "Didn't find MI in its own basic block?"); + if (&*I == MI) return Offset; + Offset += TII->GetInstSizeInBytes(I); + } +} + +/// CompareMBBNumbers - Little predicate function to sort the WaterList by MBB +/// ID. +static bool CompareMBBNumbers(const MachineBasicBlock *LHS, + const MachineBasicBlock *RHS) { + return LHS->getNumber() < RHS->getNumber(); +} + +/// UpdateForInsertedWaterBlock - When a block is newly inserted into the +/// machine function, it upsets all of the block numbers. Renumber the blocks +/// and update the arrays that parallel this numbering. +void ARMConstantIslands::UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB) { + // Renumber the MBB's to keep them consequtive. + NewBB->getParent()->RenumberBlocks(NewBB); + + // Insert a size into BBSizes to align it properly with the (newly + // renumbered) block numbers. + BBSizes.insert(BBSizes.begin()+NewBB->getNumber(), 0); + + // Likewise for BBOffsets. + BBOffsets.insert(BBOffsets.begin()+NewBB->getNumber(), 0); + + // Next, update WaterList. Specifically, we need to add NewMBB as having + // available water after it. + std::vector::iterator IP = + std::lower_bound(WaterList.begin(), WaterList.end(), NewBB, + CompareMBBNumbers); + WaterList.insert(IP, NewBB); +} + + +/// Split the basic block containing MI into two blocks, which are joined by +/// an unconditional branch. Update datastructures and renumber blocks to +/// account for this change and returns the newly created block. +MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) { + MachineBasicBlock *OrigBB = MI->getParent(); + MachineFunction &MF = *OrigBB->getParent(); + + // Create a new MBB for the code after the OrigBB. + MachineBasicBlock *NewBB = + MF.CreateMachineBasicBlock(OrigBB->getBasicBlock()); + MachineFunction::iterator MBBI = OrigBB; ++MBBI; + MF.insert(MBBI, NewBB); + + // Splice the instructions starting with MI over to NewBB. + NewBB->splice(NewBB->end(), OrigBB, MI, OrigBB->end()); + + // Add an unconditional branch from OrigBB to NewBB. + // Note the new unconditional branch is not being recorded. + // There doesn't seem to be meaningful DebugInfo available; this doesn't + // correspond to anything in the source. + BuildMI(OrigBB, DebugLoc::getUnknownLoc(), + TII->get(isThumb ? ARM::tB : ARM::B)).addMBB(NewBB); + NumSplit++; + + // Update the CFG. All succs of OrigBB are now succs of NewBB. + while (!OrigBB->succ_empty()) { + MachineBasicBlock *Succ = *OrigBB->succ_begin(); + OrigBB->removeSuccessor(Succ); + NewBB->addSuccessor(Succ); + + // This pass should be run after register allocation, so there should be no + // PHI nodes to update. + assert((Succ->empty() || Succ->begin()->getOpcode() != TargetInstrInfo::PHI) + && "PHI nodes should be eliminated by now!"); + } + + // OrigBB branches to NewBB. + OrigBB->addSuccessor(NewBB); + + // Update internal data structures to account for the newly inserted MBB. + // This is almost the same as UpdateForInsertedWaterBlock, except that + // the Water goes after OrigBB, not NewBB. + MF.RenumberBlocks(NewBB); + + // Insert a size into BBSizes to align it properly with the (newly + // renumbered) block numbers. + BBSizes.insert(BBSizes.begin()+NewBB->getNumber(), 0); + + // Likewise for BBOffsets. + BBOffsets.insert(BBOffsets.begin()+NewBB->getNumber(), 0); + + // Next, update WaterList. Specifically, we need to add OrigMBB as having + // available water after it (but not if it's already there, which happens + // when splitting before a conditional branch that is followed by an + // unconditional branch - in that case we want to insert NewBB). + std::vector::iterator IP = + std::lower_bound(WaterList.begin(), WaterList.end(), OrigBB, + CompareMBBNumbers); + MachineBasicBlock* WaterBB = *IP; + if (WaterBB == OrigBB) + WaterList.insert(next(IP), NewBB); + else + WaterList.insert(IP, OrigBB); + + // Figure out how large the first NewMBB is. (It cannot + // contain a constpool_entry or tablejump.) + unsigned NewBBSize = 0; + for (MachineBasicBlock::iterator I = NewBB->begin(), E = NewBB->end(); + I != E; ++I) + NewBBSize += TII->GetInstSizeInBytes(I); + + unsigned OrigBBI = OrigBB->getNumber(); + unsigned NewBBI = NewBB->getNumber(); + // Set the size of NewBB in BBSizes. + BBSizes[NewBBI] = NewBBSize; + + // We removed instructions from UserMBB, subtract that off from its size. + // Add 2 or 4 to the block to count the unconditional branch we added to it. + unsigned delta = isThumb ? 2 : 4; + BBSizes[OrigBBI] -= NewBBSize - delta; + + // ...and adjust BBOffsets for NewBB accordingly. + BBOffsets[NewBBI] = BBOffsets[OrigBBI] + BBSizes[OrigBBI]; + + // All BBOffsets following these blocks must be modified. + AdjustBBOffsetsAfter(NewBB, delta); + + return NewBB; +} + +/// OffsetIsInRange - Checks whether UserOffset (the location of a constant pool +/// reference) is within MaxDisp of TrialOffset (a proposed location of a +/// constant pool entry). +bool ARMConstantIslands::OffsetIsInRange(unsigned UserOffset, + unsigned TrialOffset, unsigned MaxDisp, bool NegativeOK) { + // On Thumb offsets==2 mod 4 are rounded down by the hardware for + // purposes of the displacement computation; compensate for that here. + // Effectively, the valid range of displacements is 2 bytes smaller for such + // references. + if (isThumb && UserOffset%4 !=0) + UserOffset -= 2; + // CPEs will be rounded up to a multiple of 4. + if (isThumb && TrialOffset%4 != 0) + TrialOffset += 2; + + if (UserOffset <= TrialOffset) { + // User before the Trial. + if (TrialOffset-UserOffset <= MaxDisp) + return true; + } else if (NegativeOK) { + if (UserOffset-TrialOffset <= MaxDisp) + return true; + } + return false; +} + +/// WaterIsInRange - Returns true if a CPE placed after the specified +/// Water (a basic block) will be in range for the specific MI. + +bool ARMConstantIslands::WaterIsInRange(unsigned UserOffset, + MachineBasicBlock* Water, CPUser &U) +{ + unsigned MaxDisp = U.MaxDisp; + MachineFunction::iterator I = next(MachineFunction::iterator(Water)); + unsigned CPEOffset = BBOffsets[Water->getNumber()] + + BBSizes[Water->getNumber()]; + + // If the CPE is to be inserted before the instruction, that will raise + // the offset of the instruction. (Currently applies only to ARM, so + // no alignment compensation attempted here.) + if (CPEOffset < UserOffset) + UserOffset += U.CPEMI->getOperand(2).getImm(); + + return OffsetIsInRange (UserOffset, CPEOffset, MaxDisp, !isThumb); +} + +/// CPEIsInRange - Returns true if the distance between specific MI and +/// specific ConstPool entry instruction can fit in MI's displacement field. +bool ARMConstantIslands::CPEIsInRange(MachineInstr *MI, unsigned UserOffset, + MachineInstr *CPEMI, + unsigned MaxDisp, bool DoDump) { + unsigned CPEOffset = GetOffsetOf(CPEMI); + assert(CPEOffset%4 == 0 && "Misaligned CPE"); + + if (DoDump) { + DOUT << "User of CPE#" << CPEMI->getOperand(0).getImm() + << " max delta=" << MaxDisp + << " insn address=" << UserOffset + << " CPE address=" << CPEOffset + << " offset=" << int(CPEOffset-UserOffset) << "\t" << *MI; + } + + return OffsetIsInRange(UserOffset, CPEOffset, MaxDisp, !isThumb); +} + +#ifndef NDEBUG +/// BBIsJumpedOver - Return true of the specified basic block's only predecessor +/// unconditionally branches to its only successor. +static bool BBIsJumpedOver(MachineBasicBlock *MBB) { + if (MBB->pred_size() != 1 || MBB->succ_size() != 1) + return false; + + MachineBasicBlock *Succ = *MBB->succ_begin(); + MachineBasicBlock *Pred = *MBB->pred_begin(); + MachineInstr *PredMI = &Pred->back(); + if (PredMI->getOpcode() == ARM::B || PredMI->getOpcode() == ARM::tB) + return PredMI->getOperand(0).getMBB() == Succ; + return false; +} +#endif // NDEBUG + +void ARMConstantIslands::AdjustBBOffsetsAfter(MachineBasicBlock *BB, + int delta) { + MachineFunction::iterator MBBI = BB; MBBI = next(MBBI); + for(unsigned i=BB->getNumber()+1; igetParent()->getNumBlockIDs(); i++) { + BBOffsets[i] += delta; + // If some existing blocks have padding, adjust the padding as needed, a + // bit tricky. delta can be negative so don't use % on that. + if (isThumb) { + MachineBasicBlock *MBB = MBBI; + if (!MBB->empty()) { + // Constant pool entries require padding. + if (MBB->begin()->getOpcode() == ARM::CONSTPOOL_ENTRY) { + unsigned oldOffset = BBOffsets[i] - delta; + if (oldOffset%4==0 && BBOffsets[i]%4!=0) { + // add new padding + BBSizes[i] += 2; + delta += 2; + } else if (oldOffset%4!=0 && BBOffsets[i]%4==0) { + // remove existing padding + BBSizes[i] -=2; + delta -= 2; + } + } + // Thumb jump tables require padding. They should be at the end; + // following unconditional branches are removed by AnalyzeBranch. + MachineInstr *ThumbJTMI = NULL; + if (prior(MBB->end())->getOpcode() == ARM::tBR_JTr) + ThumbJTMI = prior(MBB->end()); + if (ThumbJTMI) { + unsigned newMIOffset = GetOffsetOf(ThumbJTMI); + unsigned oldMIOffset = newMIOffset - delta; + if (oldMIOffset%4 == 0 && newMIOffset%4 != 0) { + // remove existing padding + BBSizes[i] -= 2; + delta -= 2; + } else if (oldMIOffset%4 != 0 && newMIOffset%4 == 0) { + // add new padding + BBSizes[i] += 2; + delta += 2; + } + } + if (delta==0) + return; + } + MBBI = next(MBBI); + } + } +} + +/// DecrementOldEntry - find the constant pool entry with index CPI +/// and instruction CPEMI, and decrement its refcount. If the refcount +/// becomes 0 remove the entry and instruction. Returns true if we removed +/// the entry, false if we didn't. + +bool ARMConstantIslands::DecrementOldEntry(unsigned CPI, MachineInstr *CPEMI) { + // Find the old entry. Eliminate it if it is no longer used. + CPEntry *CPE = findConstPoolEntry(CPI, CPEMI); + assert(CPE && "Unexpected!"); + if (--CPE->RefCount == 0) { + RemoveDeadCPEMI(CPEMI); + CPE->CPEMI = NULL; + NumCPEs--; + return true; + } + return false; +} + +/// LookForCPEntryInRange - see if the currently referenced CPE is in range; +/// if not, see if an in-range clone of the CPE is in range, and if so, +/// change the data structures so the user references the clone. Returns: +/// 0 = no existing entry found +/// 1 = entry found, and there were no code insertions or deletions +/// 2 = entry found, and there were code insertions or deletions +int ARMConstantIslands::LookForExistingCPEntry(CPUser& U, unsigned UserOffset) +{ + MachineInstr *UserMI = U.MI; + MachineInstr *CPEMI = U.CPEMI; + + // Check to see if the CPE is already in-range. + if (CPEIsInRange(UserMI, UserOffset, CPEMI, U.MaxDisp, true)) { + DOUT << "In range\n"; + return 1; + } + + // No. Look for previously created clones of the CPE that are in range. + unsigned CPI = CPEMI->getOperand(1).getIndex(); + std::vector &CPEs = CPEntries[CPI]; + for (unsigned i = 0, e = CPEs.size(); i != e; ++i) { + // We already tried this one + if (CPEs[i].CPEMI == CPEMI) + continue; + // Removing CPEs can leave empty entries, skip + if (CPEs[i].CPEMI == NULL) + continue; + if (CPEIsInRange(UserMI, UserOffset, CPEs[i].CPEMI, U.MaxDisp, false)) { + DOUT << "Replacing CPE#" << CPI << " with CPE#" << CPEs[i].CPI << "\n"; + // Point the CPUser node to the replacement + U.CPEMI = CPEs[i].CPEMI; + // Change the CPI in the instruction operand to refer to the clone. + for (unsigned j = 0, e = UserMI->getNumOperands(); j != e; ++j) + if (UserMI->getOperand(j).isCPI()) { + UserMI->getOperand(j).setIndex(CPEs[i].CPI); + break; + } + // Adjust the refcount of the clone... + CPEs[i].RefCount++; + // ...and the original. If we didn't remove the old entry, none of the + // addresses changed, so we don't need another pass. + return DecrementOldEntry(CPI, CPEMI) ? 2 : 1; + } + } + return 0; +} + +/// getUnconditionalBrDisp - Returns the maximum displacement that can fit in +/// the specific unconditional branch instruction. +static inline unsigned getUnconditionalBrDisp(int Opc) { + return (Opc == ARM::tB) ? ((1<<10)-1)*2 : ((1<<23)-1)*4; +} + +/// AcceptWater - Small amount of common code factored out of the following. + +MachineBasicBlock* ARMConstantIslands::AcceptWater(MachineBasicBlock *WaterBB, + std::vector::iterator IP) { + DOUT << "found water in range\n"; + // Remove the original WaterList entry; we want subsequent + // insertions in this vicinity to go after the one we're + // about to insert. This considerably reduces the number + // of times we have to move the same CPE more than once. + WaterList.erase(IP); + // CPE goes before following block (NewMBB). + return next(MachineFunction::iterator(WaterBB)); +} + +/// LookForWater - look for an existing entry in the WaterList in which +/// we can place the CPE referenced from U so it's within range of U's MI. +/// Returns true if found, false if not. If it returns true, *NewMBB +/// is set to the WaterList entry. +/// For ARM, we prefer the water that's farthest away. For Thumb, prefer +/// water that will not introduce padding to water that will; within each +/// group, prefer the water that's farthest away. + +bool ARMConstantIslands::LookForWater(CPUser &U, unsigned UserOffset, + MachineBasicBlock** NewMBB) { + std::vector::iterator IPThatWouldPad; + MachineBasicBlock* WaterBBThatWouldPad = NULL; + if (!WaterList.empty()) { + for (std::vector::iterator IP = prior(WaterList.end()), + B = WaterList.begin();; --IP) { + MachineBasicBlock* WaterBB = *IP; + if (WaterIsInRange(UserOffset, WaterBB, U)) { + if (isThumb && + (BBOffsets[WaterBB->getNumber()] + + BBSizes[WaterBB->getNumber()])%4 != 0) { + // This is valid Water, but would introduce padding. Remember + // it in case we don't find any Water that doesn't do this. + if (!WaterBBThatWouldPad) { + WaterBBThatWouldPad = WaterBB; + IPThatWouldPad = IP; + } + } else { + *NewMBB = AcceptWater(WaterBB, IP); + return true; + } + } + if (IP == B) + break; + } + } + if (isThumb && WaterBBThatWouldPad) { + *NewMBB = AcceptWater(WaterBBThatWouldPad, IPThatWouldPad); + return true; + } + return false; +} + +/// CreateNewWater - No existing WaterList entry will work for +/// CPUsers[CPUserIndex], so create a place to put the CPE. The end of the +/// block is used if in range, and the conditional branch munged so control +/// flow is correct. Otherwise the block is split to create a hole with an +/// unconditional branch around it. In either case *NewMBB is set to a +/// block following which the new island can be inserted (the WaterList +/// is not adjusted). + +void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex, + unsigned UserOffset, MachineBasicBlock** NewMBB) { + CPUser &U = CPUsers[CPUserIndex]; + MachineInstr *UserMI = U.MI; + MachineInstr *CPEMI = U.CPEMI; + MachineBasicBlock *UserMBB = UserMI->getParent(); + unsigned OffsetOfNextBlock = BBOffsets[UserMBB->getNumber()] + + BBSizes[UserMBB->getNumber()]; + assert(OffsetOfNextBlock== BBOffsets[UserMBB->getNumber()+1]); + + // If the use is at the end of the block, or the end of the block + // is within range, make new water there. (The addition below is + // for the unconditional branch we will be adding: 4 bytes on ARM, + // 2 on Thumb. Possible Thumb alignment padding is allowed for + // inside OffsetIsInRange. + // If the block ends in an unconditional branch already, it is water, + // and is known to be out of range, so we'll always be adding a branch.) + if (&UserMBB->back() == UserMI || + OffsetIsInRange(UserOffset, OffsetOfNextBlock + (isThumb ? 2: 4), + U.MaxDisp, !isThumb)) { + DOUT << "Split at end of block\n"; + if (&UserMBB->back() == UserMI) + assert(BBHasFallthrough(UserMBB) && "Expected a fallthrough BB!"); + *NewMBB = next(MachineFunction::iterator(UserMBB)); + // Add an unconditional branch from UserMBB to fallthrough block. + // Record it for branch lengthening; this new branch will not get out of + // range, but if the preceding conditional branch is out of range, the + // targets will be exchanged, and the altered branch may be out of + // range, so the machinery has to know about it. + int UncondBr = isThumb ? ARM::tB : ARM::B; + BuildMI(UserMBB, DebugLoc::getUnknownLoc(), + TII->get(UncondBr)).addMBB(*NewMBB); + unsigned MaxDisp = getUnconditionalBrDisp(UncondBr); + ImmBranches.push_back(ImmBranch(&UserMBB->back(), + MaxDisp, false, UncondBr)); + int delta = isThumb ? 2 : 4; + BBSizes[UserMBB->getNumber()] += delta; + AdjustBBOffsetsAfter(UserMBB, delta); + } else { + // What a big block. Find a place within the block to split it. + // This is a little tricky on Thumb since instructions are 2 bytes + // and constant pool entries are 4 bytes: if instruction I references + // island CPE, and instruction I+1 references CPE', it will + // not work well to put CPE as far forward as possible, since then + // CPE' cannot immediately follow it (that location is 2 bytes + // farther away from I+1 than CPE was from I) and we'd need to create + // a new island. So, we make a first guess, then walk through the + // instructions between the one currently being looked at and the + // possible insertion point, and make sure any other instructions + // that reference CPEs will be able to use the same island area; + // if not, we back up the insertion point. + + // The 4 in the following is for the unconditional branch we'll be + // inserting (allows for long branch on Thumb). Alignment of the + // island is handled inside OffsetIsInRange. + unsigned BaseInsertOffset = UserOffset + U.MaxDisp -4; + // This could point off the end of the block if we've already got + // constant pool entries following this block; only the last one is + // in the water list. Back past any possible branches (allow for a + // conditional and a maximally long unconditional). + if (BaseInsertOffset >= BBOffsets[UserMBB->getNumber()+1]) + BaseInsertOffset = BBOffsets[UserMBB->getNumber()+1] - + (isThumb ? 6 : 8); + unsigned EndInsertOffset = BaseInsertOffset + + CPEMI->getOperand(2).getImm(); + MachineBasicBlock::iterator MI = UserMI; + ++MI; + unsigned CPUIndex = CPUserIndex+1; + for (unsigned Offset = UserOffset+TII->GetInstSizeInBytes(UserMI); + Offset < BaseInsertOffset; + Offset += TII->GetInstSizeInBytes(MI), + MI = next(MI)) { + if (CPUIndex < CPUsers.size() && CPUsers[CPUIndex].MI == MI) { + if (!OffsetIsInRange(Offset, EndInsertOffset, + CPUsers[CPUIndex].MaxDisp, !isThumb)) { + BaseInsertOffset -= (isThumb ? 2 : 4); + EndInsertOffset -= (isThumb ? 2 : 4); + } + // This is overly conservative, as we don't account for CPEMIs + // being reused within the block, but it doesn't matter much. + EndInsertOffset += CPUsers[CPUIndex].CPEMI->getOperand(2).getImm(); + CPUIndex++; + } + } + DOUT << "Split in middle of big block\n"; + *NewMBB = SplitBlockBeforeInstr(prior(MI)); + } +} + +/// HandleConstantPoolUser - Analyze the specified user, checking to see if it +/// is out-of-range. If so, pick up the constant pool value and move it some +/// place in-range. Return true if we changed any addresses (thus must run +/// another pass of branch lengthening), false otherwise. +bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &Fn, + unsigned CPUserIndex) { + CPUser &U = CPUsers[CPUserIndex]; + MachineInstr *UserMI = U.MI; + MachineInstr *CPEMI = U.CPEMI; + unsigned CPI = CPEMI->getOperand(1).getIndex(); + unsigned Size = CPEMI->getOperand(2).getImm(); + MachineBasicBlock *NewMBB; + // Compute this only once, it's expensive. The 4 or 8 is the value the + // hardware keeps in the PC (2 insns ahead of the reference). + unsigned UserOffset = GetOffsetOf(UserMI) + (isThumb ? 4 : 8); + + // Special case: tLEApcrel are two instructions MI's. The actual user is the + // second instruction. + if (UserMI->getOpcode() == ARM::tLEApcrel) + UserOffset += 2; + + // See if the current entry is within range, or there is a clone of it + // in range. + int result = LookForExistingCPEntry(U, UserOffset); + if (result==1) return false; + else if (result==2) return true; + + // No existing clone of this CPE is within range. + // We will be generating a new clone. Get a UID for it. + unsigned ID = AFI->createConstPoolEntryUId(); + + // Look for water where we can place this CPE. We look for the farthest one + // away that will work. Forward references only for now (although later + // we might find some that are backwards). + + if (!LookForWater(U, UserOffset, &NewMBB)) { + // No water found. + DOUT << "No water found\n"; + CreateNewWater(CPUserIndex, UserOffset, &NewMBB); + } + + // Okay, we know we can put an island before NewMBB now, do it! + MachineBasicBlock *NewIsland = Fn.CreateMachineBasicBlock(); + Fn.insert(NewMBB, NewIsland); + + // Update internal data structures to account for the newly inserted MBB. + UpdateForInsertedWaterBlock(NewIsland); + + // Decrement the old entry, and remove it if refcount becomes 0. + DecrementOldEntry(CPI, CPEMI); + + // Now that we have an island to add the CPE to, clone the original CPE and + // add it to the island. + U.CPEMI = BuildMI(NewIsland, DebugLoc::getUnknownLoc(), + TII->get(ARM::CONSTPOOL_ENTRY)) + .addImm(ID).addConstantPoolIndex(CPI).addImm(Size); + CPEntries[CPI].push_back(CPEntry(U.CPEMI, ID, 1)); + NumCPEs++; + + BBOffsets[NewIsland->getNumber()] = BBOffsets[NewMBB->getNumber()]; + // Compensate for .align 2 in thumb mode. + if (isThumb && BBOffsets[NewIsland->getNumber()]%4 != 0) + Size += 2; + // Increase the size of the island block to account for the new entry. + BBSizes[NewIsland->getNumber()] += Size; + AdjustBBOffsetsAfter(NewIsland, Size); + + // Finally, change the CPI in the instruction operand to be ID. + for (unsigned i = 0, e = UserMI->getNumOperands(); i != e; ++i) + if (UserMI->getOperand(i).isCPI()) { + UserMI->getOperand(i).setIndex(ID); + break; + } + + DOUT << " Moved CPE to #" << ID << " CPI=" << CPI << "\t" << *UserMI; + + return true; +} + +/// RemoveDeadCPEMI - Remove a dead constant pool entry instruction. Update +/// sizes and offsets of impacted basic blocks. +void ARMConstantIslands::RemoveDeadCPEMI(MachineInstr *CPEMI) { + MachineBasicBlock *CPEBB = CPEMI->getParent(); + unsigned Size = CPEMI->getOperand(2).getImm(); + CPEMI->eraseFromParent(); + BBSizes[CPEBB->getNumber()] -= Size; + // All succeeding offsets have the current size value added in, fix this. + if (CPEBB->empty()) { + // In thumb mode, the size of island may be padded by two to compensate for + // the alignment requirement. Then it will now be 2 when the block is + // empty, so fix this. + // All succeeding offsets have the current size value added in, fix this. + if (BBSizes[CPEBB->getNumber()] != 0) { + Size += BBSizes[CPEBB->getNumber()]; + BBSizes[CPEBB->getNumber()] = 0; + } + } + AdjustBBOffsetsAfter(CPEBB, -Size); + // An island has only one predecessor BB and one successor BB. Check if + // this BB's predecessor jumps directly to this BB's successor. This + // shouldn't happen currently. + assert(!BBIsJumpedOver(CPEBB) && "How did this happen?"); + // FIXME: remove the empty blocks after all the work is done? +} + +/// RemoveUnusedCPEntries - Remove constant pool entries whose refcounts +/// are zero. +bool ARMConstantIslands::RemoveUnusedCPEntries() { + unsigned MadeChange = false; + for (unsigned i = 0, e = CPEntries.size(); i != e; ++i) { + std::vector &CPEs = CPEntries[i]; + for (unsigned j = 0, ee = CPEs.size(); j != ee; ++j) { + if (CPEs[j].RefCount == 0 && CPEs[j].CPEMI) { + RemoveDeadCPEMI(CPEs[j].CPEMI); + CPEs[j].CPEMI = NULL; + MadeChange = true; + } + } + } + return MadeChange; +} + +/// BBIsInRange - Returns true if the distance between specific MI and +/// specific BB can fit in MI's displacement field. +bool ARMConstantIslands::BBIsInRange(MachineInstr *MI,MachineBasicBlock *DestBB, + unsigned MaxDisp) { + unsigned PCAdj = isThumb ? 4 : 8; + unsigned BrOffset = GetOffsetOf(MI) + PCAdj; + unsigned DestOffset = BBOffsets[DestBB->getNumber()]; + + DOUT << "Branch of destination BB#" << DestBB->getNumber() + << " from BB#" << MI->getParent()->getNumber() + << " max delta=" << MaxDisp + << " from " << GetOffsetOf(MI) << " to " << DestOffset + << " offset " << int(DestOffset-BrOffset) << "\t" << *MI; + + if (BrOffset <= DestOffset) { + // Branch before the Dest. + if (DestOffset-BrOffset <= MaxDisp) + return true; + } else { + if (BrOffset-DestOffset <= MaxDisp) + return true; + } + return false; +} + +/// FixUpImmediateBr - Fix up an immediate branch whose destination is too far +/// away to fit in its displacement field. +bool ARMConstantIslands::FixUpImmediateBr(MachineFunction &Fn, ImmBranch &Br) { + MachineInstr *MI = Br.MI; + MachineBasicBlock *DestBB = MI->getOperand(0).getMBB(); + + // Check to see if the DestBB is already in-range. + if (BBIsInRange(MI, DestBB, Br.MaxDisp)) + return false; + + if (!Br.isCond) + return FixUpUnconditionalBr(Fn, Br); + return FixUpConditionalBr(Fn, Br); +} + +/// FixUpUnconditionalBr - Fix up an unconditional branch whose destination is +/// too far away to fit in its displacement field. If the LR register has been +/// spilled in the epilogue, then we can use BL to implement a far jump. +/// Otherwise, add an intermediate branch instruction to a branch. +bool +ARMConstantIslands::FixUpUnconditionalBr(MachineFunction &Fn, ImmBranch &Br) { + MachineInstr *MI = Br.MI; + MachineBasicBlock *MBB = MI->getParent(); + assert(isThumb && "Expected a Thumb function!"); + + // Use BL to implement far jump. + Br.MaxDisp = (1 << 21) * 2; + MI->setDesc(TII->get(ARM::tBfar)); + BBSizes[MBB->getNumber()] += 2; + AdjustBBOffsetsAfter(MBB, 2); + HasFarJump = true; + NumUBrFixed++; + + DOUT << " Changed B to long jump " << *MI; + + return true; +} + +/// FixUpConditionalBr - Fix up a conditional branch whose destination is too +/// far away to fit in its displacement field. It is converted to an inverse +/// conditional branch + an unconditional branch to the destination. +bool +ARMConstantIslands::FixUpConditionalBr(MachineFunction &Fn, ImmBranch &Br) { + MachineInstr *MI = Br.MI; + MachineBasicBlock *DestBB = MI->getOperand(0).getMBB(); + + // Add an unconditional branch to the destination and invert the branch + // condition to jump over it: + // blt L1 + // => + // bge L2 + // b L1 + // L2: + ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(1).getImm(); + CC = ARMCC::getOppositeCondition(CC); + unsigned CCReg = MI->getOperand(2).getReg(); + + // If the branch is at the end of its MBB and that has a fall-through block, + // direct the updated conditional branch to the fall-through block. Otherwise, + // split the MBB before the next instruction. + MachineBasicBlock *MBB = MI->getParent(); + MachineInstr *BMI = &MBB->back(); + bool NeedSplit = (BMI != MI) || !BBHasFallthrough(MBB); + + NumCBrFixed++; + if (BMI != MI) { + if (next(MachineBasicBlock::iterator(MI)) == prior(MBB->end()) && + BMI->getOpcode() == Br.UncondBr) { + // Last MI in the BB is an unconditional branch. Can we simply invert the + // condition and swap destinations: + // beq L1 + // b L2 + // => + // bne L2 + // b L1 + MachineBasicBlock *NewDest = BMI->getOperand(0).getMBB(); + if (BBIsInRange(MI, NewDest, Br.MaxDisp)) { + DOUT << " Invert Bcc condition and swap its destination with " << *BMI; + BMI->getOperand(0).setMBB(DestBB); + MI->getOperand(0).setMBB(NewDest); + MI->getOperand(1).setImm(CC); + return true; + } + } + } + + if (NeedSplit) { + SplitBlockBeforeInstr(MI); + // No need for the branch to the next block. We're adding an unconditional + // branch to the destination. + int delta = TII->GetInstSizeInBytes(&MBB->back()); + BBSizes[MBB->getNumber()] -= delta; + MachineBasicBlock* SplitBB = next(MachineFunction::iterator(MBB)); + AdjustBBOffsetsAfter(SplitBB, -delta); + MBB->back().eraseFromParent(); + // BBOffsets[SplitBB] is wrong temporarily, fixed below + } + MachineBasicBlock *NextBB = next(MachineFunction::iterator(MBB)); + + DOUT << " Insert B to BB#" << DestBB->getNumber() + << " also invert condition and change dest. to BB#" + << NextBB->getNumber() << "\n"; + + // Insert a new conditional branch and a new unconditional branch. + // Also update the ImmBranch as well as adding a new entry for the new branch. + BuildMI(MBB, DebugLoc::getUnknownLoc(), + TII->get(MI->getOpcode())) + .addMBB(NextBB).addImm(CC).addReg(CCReg); + Br.MI = &MBB->back(); + BBSizes[MBB->getNumber()] += TII->GetInstSizeInBytes(&MBB->back()); + BuildMI(MBB, DebugLoc::getUnknownLoc(), TII->get(Br.UncondBr)).addMBB(DestBB); + BBSizes[MBB->getNumber()] += TII->GetInstSizeInBytes(&MBB->back()); + unsigned MaxDisp = getUnconditionalBrDisp(Br.UncondBr); + ImmBranches.push_back(ImmBranch(&MBB->back(), MaxDisp, false, Br.UncondBr)); + + // Remove the old conditional branch. It may or may not still be in MBB. + BBSizes[MI->getParent()->getNumber()] -= TII->GetInstSizeInBytes(MI); + MI->eraseFromParent(); + + // The net size change is an addition of one unconditional branch. + int delta = TII->GetInstSizeInBytes(&MBB->back()); + AdjustBBOffsetsAfter(MBB, delta); + return true; +} + +/// UndoLRSpillRestore - Remove Thumb push / pop instructions that only spills +/// LR / restores LR to pc. +bool ARMConstantIslands::UndoLRSpillRestore() { + bool MadeChange = false; + for (unsigned i = 0, e = PushPopMIs.size(); i != e; ++i) { + MachineInstr *MI = PushPopMIs[i]; + if (MI->getOpcode() == ARM::tPOP_RET && + MI->getOperand(0).getReg() == ARM::PC && + MI->getNumExplicitOperands() == 1) { + BuildMI(MI->getParent(), MI->getDebugLoc(), TII->get(ARM::tBX_RET)); + MI->eraseFromParent(); + MadeChange = true; + } + } + return MadeChange; +} diff --git a/lib/Target/ARM/ARMConstantPoolValue.cpp b/lib/Target/ARM/ARMConstantPoolValue.cpp new file mode 100644 index 000000000000..3a038c9a8c37 --- /dev/null +++ b/lib/Target/ARM/ARMConstantPoolValue.cpp @@ -0,0 +1,100 @@ +//===- ARMConstantPoolValue.cpp - ARM constantpool value --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the ARM specific constantpool value class. +// +//===----------------------------------------------------------------------===// + +#include "ARMConstantPoolValue.h" +#include "llvm/ADT/FoldingSet.h" +#include "llvm/GlobalValue.h" +#include "llvm/Type.h" +#include "llvm/Support/Streams.h" +#include "llvm/Support/raw_ostream.h" +#include +using namespace llvm; + +ARMConstantPoolValue::ARMConstantPoolValue(GlobalValue *gv, unsigned id, + ARMCP::ARMCPKind k, + unsigned char PCAdj, + const char *Modif, + bool AddCA) + : MachineConstantPoolValue((const Type*)gv->getType()), + GV(gv), S(NULL), LabelId(id), Kind(k), PCAdjust(PCAdj), + Modifier(Modif), AddCurrentAddress(AddCA) {} + +ARMConstantPoolValue::ARMConstantPoolValue(const char *s, unsigned id, + ARMCP::ARMCPKind k, + unsigned char PCAdj, + const char *Modif, + bool AddCA) + : MachineConstantPoolValue((const Type*)Type::Int32Ty), + GV(NULL), S(s), LabelId(id), Kind(k), PCAdjust(PCAdj), + Modifier(Modif), AddCurrentAddress(AddCA) {} + +ARMConstantPoolValue::ARMConstantPoolValue(GlobalValue *gv, + ARMCP::ARMCPKind k, + const char *Modif) + : MachineConstantPoolValue((const Type*)Type::Int32Ty), + GV(gv), S(NULL), LabelId(0), Kind(k), PCAdjust(0), + Modifier(Modif) {} + +int ARMConstantPoolValue::getExistingMachineCPValue(MachineConstantPool *CP, + unsigned Alignment) { + unsigned AlignMask = Alignment - 1; + const std::vector Constants = CP->getConstants(); + for (unsigned i = 0, e = Constants.size(); i != e; ++i) { + if (Constants[i].isMachineConstantPoolEntry() && + (Constants[i].getAlignment() & AlignMask) == 0) { + ARMConstantPoolValue *CPV = + (ARMConstantPoolValue *)Constants[i].Val.MachineCPVal; + if (CPV->GV == GV && + CPV->S == S && + CPV->LabelId == LabelId && + CPV->Kind == Kind && + CPV->PCAdjust == PCAdjust) + return i; + } + } + + return -1; +} + +void +ARMConstantPoolValue::AddSelectionDAGCSEId(FoldingSetNodeID &ID) { + ID.AddPointer(GV); + ID.AddPointer(S); + ID.AddInteger(LabelId); + ID.AddInteger((unsigned)Kind); + ID.AddInteger(PCAdjust); +} + +void ARMConstantPoolValue::dump() const { + cerr << " " << *this; +} + +void ARMConstantPoolValue::print(std::ostream &O) const { + raw_os_ostream RawOS(O); + print(RawOS); +} + +void ARMConstantPoolValue::print(raw_ostream &O) const { + if (GV) + O << GV->getName(); + else + O << S; + if (isNonLazyPointer()) O << "$non_lazy_ptr"; + else if (isStub()) O << "$stub"; + if (Modifier) O << "(" << Modifier << ")"; + if (PCAdjust != 0) { + O << "-(LPC" << LabelId << "+" << (unsigned)PCAdjust; + if (AddCurrentAddress) O << "-."; + O << ")"; + } +} diff --git a/lib/Target/ARM/ARMConstantPoolValue.h b/lib/Target/ARM/ARMConstantPoolValue.h new file mode 100644 index 000000000000..d2b9066dcc97 --- /dev/null +++ b/lib/Target/ARM/ARMConstantPoolValue.h @@ -0,0 +1,92 @@ +//===- ARMConstantPoolValue.h - ARM constantpool value ----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the ARM specific constantpool value class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_ARM_CONSTANTPOOLVALUE_H +#define LLVM_TARGET_ARM_CONSTANTPOOLVALUE_H + +#include "llvm/CodeGen/MachineConstantPool.h" +#include + +namespace llvm { + +class GlobalValue; + +namespace ARMCP { + enum ARMCPKind { + CPValue, + CPNonLazyPtr, + CPStub + }; +} + +/// ARMConstantPoolValue - ARM specific constantpool value. This is used to +/// represent PC relative displacement between the address of the load +/// instruction and the global value being loaded, i.e. (&GV-(LPIC+8)). +class ARMConstantPoolValue : public MachineConstantPoolValue { + GlobalValue *GV; // GlobalValue being loaded. + const char *S; // ExtSymbol being loaded. + unsigned LabelId; // Label id of the load. + ARMCP::ARMCPKind Kind; // non_lazy_ptr or stub? + unsigned char PCAdjust; // Extra adjustment if constantpool is pc relative. + // 8 for ARM, 4 for Thumb. + const char *Modifier; // GV modifier i.e. (&GV(modifier)-(LPIC+8)) + bool AddCurrentAddress; + +public: + ARMConstantPoolValue(GlobalValue *gv, unsigned id, + ARMCP::ARMCPKind Kind = ARMCP::CPValue, + unsigned char PCAdj = 0, const char *Modifier = NULL, + bool AddCurrentAddress = false); + ARMConstantPoolValue(const char *s, unsigned id, + ARMCP::ARMCPKind Kind = ARMCP::CPValue, + unsigned char PCAdj = 0, const char *Modifier = NULL, + bool AddCurrentAddress = false); + ARMConstantPoolValue(GlobalValue *GV, ARMCP::ARMCPKind Kind, + const char *Modifier); + + + GlobalValue *getGV() const { return GV; } + const char *getSymbol() const { return S; } + const char *getModifier() const { return Modifier; } + bool hasModifier() const { return Modifier != NULL; } + bool mustAddCurrentAddress() const { return AddCurrentAddress; } + unsigned getLabelId() const { return LabelId; } + bool isNonLazyPointer() const { return Kind == ARMCP::CPNonLazyPtr; } + bool isStub() const { return Kind == ARMCP::CPStub; } + unsigned char getPCAdjustment() const { return PCAdjust; } + + virtual int getExistingMachineCPValue(MachineConstantPool *CP, + unsigned Alignment); + + virtual void AddSelectionDAGCSEId(FoldingSetNodeID &ID); + + void print(std::ostream *O) const { if (O) print(*O); } + void print(std::ostream &O) const; + void print(raw_ostream *O) const { if (O) print(*O); } + void print(raw_ostream &O) const; + void dump() const; +}; + + inline std::ostream &operator<<(std::ostream &O, const ARMConstantPoolValue &V) { + V.print(O); + return O; +} + +inline raw_ostream &operator<<(raw_ostream &O, const ARMConstantPoolValue &V) { + V.print(O); + return O; +} + +} // End llvm namespace + +#endif diff --git a/lib/Target/ARM/ARMFrameInfo.h b/lib/Target/ARM/ARMFrameInfo.h new file mode 100644 index 000000000000..405b8f2b9f28 --- /dev/null +++ b/lib/Target/ARM/ARMFrameInfo.h @@ -0,0 +1,32 @@ +//===-- ARMTargetFrameInfo.h - Define TargetFrameInfo for ARM ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// +// +//===----------------------------------------------------------------------===// + +#ifndef ARM_FRAMEINFO_H +#define ARM_FRAMEINFO_H + +#include "ARM.h" +#include "llvm/Target/TargetFrameInfo.h" +#include "ARMSubtarget.h" + +namespace llvm { + +class ARMFrameInfo : public TargetFrameInfo { +public: + explicit ARMFrameInfo(const ARMSubtarget &ST) + : TargetFrameInfo(StackGrowsDown, ST.getStackAlignment(), 0) { + } +}; + +} // End llvm namespace + +#endif diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp new file mode 100644 index 000000000000..ca3a9cb40329 --- /dev/null +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -0,0 +1,911 @@ +//===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines an instruction selector for the ARM target. +// +//===----------------------------------------------------------------------===// + +#include "ARM.h" +#include "ARMAddressingModes.h" +#include "ARMConstantPoolValue.h" +#include "ARMISelLowering.h" +#include "ARMTargetMachine.h" +#include "llvm/CallingConv.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/Intrinsics.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +using namespace llvm; + +//===--------------------------------------------------------------------===// +/// ARMDAGToDAGISel - ARM specific code to select ARM machine +/// instructions for SelectionDAG operations. +/// +namespace { +class ARMDAGToDAGISel : public SelectionDAGISel { + ARMTargetMachine &TM; + + /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can + /// make the right decision when generating code for different targets. + const ARMSubtarget *Subtarget; + +public: + explicit ARMDAGToDAGISel(ARMTargetMachine &tm) + : SelectionDAGISel(tm), TM(tm), + Subtarget(&TM.getSubtarget()) { + } + + virtual const char *getPassName() const { + return "ARM Instruction Selection"; + } + + SDNode *Select(SDValue Op); + virtual void InstructionSelect(); + bool SelectAddrMode2(SDValue Op, SDValue N, SDValue &Base, + SDValue &Offset, SDValue &Opc); + bool SelectAddrMode2Offset(SDValue Op, SDValue N, + SDValue &Offset, SDValue &Opc); + bool SelectAddrMode3(SDValue Op, SDValue N, SDValue &Base, + SDValue &Offset, SDValue &Opc); + bool SelectAddrMode3Offset(SDValue Op, SDValue N, + SDValue &Offset, SDValue &Opc); + bool SelectAddrMode5(SDValue Op, SDValue N, SDValue &Base, + SDValue &Offset); + + bool SelectAddrModePC(SDValue Op, SDValue N, SDValue &Offset, + SDValue &Label); + + bool SelectThumbAddrModeRR(SDValue Op, SDValue N, SDValue &Base, + SDValue &Offset); + bool SelectThumbAddrModeRI5(SDValue Op, SDValue N, unsigned Scale, + SDValue &Base, SDValue &OffImm, + SDValue &Offset); + bool SelectThumbAddrModeS1(SDValue Op, SDValue N, SDValue &Base, + SDValue &OffImm, SDValue &Offset); + bool SelectThumbAddrModeS2(SDValue Op, SDValue N, SDValue &Base, + SDValue &OffImm, SDValue &Offset); + bool SelectThumbAddrModeS4(SDValue Op, SDValue N, SDValue &Base, + SDValue &OffImm, SDValue &Offset); + bool SelectThumbAddrModeSP(SDValue Op, SDValue N, SDValue &Base, + SDValue &OffImm); + + bool SelectShifterOperandReg(SDValue Op, SDValue N, SDValue &A, + SDValue &B, SDValue &C); + + // Include the pieces autogenerated from the target description. +#include "ARMGenDAGISel.inc" + +private: + /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for + /// inline asm expressions. + virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, + char ConstraintCode, + std::vector &OutOps); +}; +} + +void ARMDAGToDAGISel::InstructionSelect() { + DEBUG(BB->dump()); + + SelectRoot(*CurDAG); + CurDAG->RemoveDeadNodes(); +} + +bool ARMDAGToDAGISel::SelectAddrMode2(SDValue Op, SDValue N, + SDValue &Base, SDValue &Offset, + SDValue &Opc) { + if (N.getOpcode() == ISD::MUL) { + if (ConstantSDNode *RHS = dyn_cast(N.getOperand(1))) { + // X * [3,5,9] -> X + X * [2,4,8] etc. + int RHSC = (int)RHS->getZExtValue(); + if (RHSC & 1) { + RHSC = RHSC & ~1; + ARM_AM::AddrOpc AddSub = ARM_AM::add; + if (RHSC < 0) { + AddSub = ARM_AM::sub; + RHSC = - RHSC; + } + if (isPowerOf2_32(RHSC)) { + unsigned ShAmt = Log2_32(RHSC); + Base = Offset = N.getOperand(0); + Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, + ARM_AM::lsl), + MVT::i32); + return true; + } + } + } + } + + if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB) { + Base = N; + if (N.getOpcode() == ISD::FrameIndex) { + int FI = cast(N)->getIndex(); + Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); + } else if (N.getOpcode() == ARMISD::Wrapper) { + Base = N.getOperand(0); + } + Offset = CurDAG->getRegister(0, MVT::i32); + Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0, + ARM_AM::no_shift), + MVT::i32); + return true; + } + + // Match simple R +/- imm12 operands. + if (N.getOpcode() == ISD::ADD) + if (ConstantSDNode *RHS = dyn_cast(N.getOperand(1))) { + int RHSC = (int)RHS->getZExtValue(); + if ((RHSC >= 0 && RHSC < 0x1000) || + (RHSC < 0 && RHSC > -0x1000)) { // 12 bits. + Base = N.getOperand(0); + if (Base.getOpcode() == ISD::FrameIndex) { + int FI = cast(Base)->getIndex(); + Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); + } + Offset = CurDAG->getRegister(0, MVT::i32); + + ARM_AM::AddrOpc AddSub = ARM_AM::add; + if (RHSC < 0) { + AddSub = ARM_AM::sub; + RHSC = - RHSC; + } + Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, RHSC, + ARM_AM::no_shift), + MVT::i32); + return true; + } + } + + // Otherwise this is R +/- [possibly shifted] R + ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::ADD ? ARM_AM::add:ARM_AM::sub; + ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(1)); + unsigned ShAmt = 0; + + Base = N.getOperand(0); + Offset = N.getOperand(1); + + if (ShOpcVal != ARM_AM::no_shift) { + // Check to see if the RHS of the shift is a constant, if not, we can't fold + // it. + if (ConstantSDNode *Sh = + dyn_cast(N.getOperand(1).getOperand(1))) { + ShAmt = Sh->getZExtValue(); + Offset = N.getOperand(1).getOperand(0); + } else { + ShOpcVal = ARM_AM::no_shift; + } + } + + // Try matching (R shl C) + (R). + if (N.getOpcode() == ISD::ADD && ShOpcVal == ARM_AM::no_shift) { + ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0)); + if (ShOpcVal != ARM_AM::no_shift) { + // Check to see if the RHS of the shift is a constant, if not, we can't + // fold it. + if (ConstantSDNode *Sh = + dyn_cast(N.getOperand(0).getOperand(1))) { + ShAmt = Sh->getZExtValue(); + Offset = N.getOperand(0).getOperand(0); + Base = N.getOperand(1); + } else { + ShOpcVal = ARM_AM::no_shift; + } + } + } + + Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal), + MVT::i32); + return true; +} + +bool ARMDAGToDAGISel::SelectAddrMode2Offset(SDValue Op, SDValue N, + SDValue &Offset, SDValue &Opc) { + unsigned Opcode = Op.getOpcode(); + ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) + ? cast(Op)->getAddressingMode() + : cast(Op)->getAddressingMode(); + ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) + ? ARM_AM::add : ARM_AM::sub; + if (ConstantSDNode *C = dyn_cast(N)) { + int Val = (int)C->getZExtValue(); + if (Val >= 0 && Val < 0x1000) { // 12 bits. + Offset = CurDAG->getRegister(0, MVT::i32); + Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val, + ARM_AM::no_shift), + MVT::i32); + return true; + } + } + + Offset = N; + ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N); + unsigned ShAmt = 0; + if (ShOpcVal != ARM_AM::no_shift) { + // Check to see if the RHS of the shift is a constant, if not, we can't fold + // it. + if (ConstantSDNode *Sh = dyn_cast(N.getOperand(1))) { + ShAmt = Sh->getZExtValue(); + Offset = N.getOperand(0); + } else { + ShOpcVal = ARM_AM::no_shift; + } + } + + Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal), + MVT::i32); + return true; +} + + +bool ARMDAGToDAGISel::SelectAddrMode3(SDValue Op, SDValue N, + SDValue &Base, SDValue &Offset, + SDValue &Opc) { + if (N.getOpcode() == ISD::SUB) { + // X - C is canonicalize to X + -C, no need to handle it here. + Base = N.getOperand(0); + Offset = N.getOperand(1); + Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0),MVT::i32); + return true; + } + + if (N.getOpcode() != ISD::ADD) { + Base = N; + if (N.getOpcode() == ISD::FrameIndex) { + int FI = cast(N)->getIndex(); + Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); + } + Offset = CurDAG->getRegister(0, MVT::i32); + Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0),MVT::i32); + return true; + } + + // If the RHS is +/- imm8, fold into addr mode. + if (ConstantSDNode *RHS = dyn_cast(N.getOperand(1))) { + int RHSC = (int)RHS->getZExtValue(); + if ((RHSC >= 0 && RHSC < 256) || + (RHSC < 0 && RHSC > -256)) { // note -256 itself isn't allowed. + Base = N.getOperand(0); + if (Base.getOpcode() == ISD::FrameIndex) { + int FI = cast(Base)->getIndex(); + Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); + } + Offset = CurDAG->getRegister(0, MVT::i32); + + ARM_AM::AddrOpc AddSub = ARM_AM::add; + if (RHSC < 0) { + AddSub = ARM_AM::sub; + RHSC = - RHSC; + } + Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC),MVT::i32); + return true; + } + } + + Base = N.getOperand(0); + Offset = N.getOperand(1); + Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), MVT::i32); + return true; +} + +bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDValue Op, SDValue N, + SDValue &Offset, SDValue &Opc) { + unsigned Opcode = Op.getOpcode(); + ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) + ? cast(Op)->getAddressingMode() + : cast(Op)->getAddressingMode(); + ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) + ? ARM_AM::add : ARM_AM::sub; + if (ConstantSDNode *C = dyn_cast(N)) { + int Val = (int)C->getZExtValue(); + if (Val >= 0 && Val < 256) { + Offset = CurDAG->getRegister(0, MVT::i32); + Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), MVT::i32); + return true; + } + } + + Offset = N; + Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), MVT::i32); + return true; +} + + +bool ARMDAGToDAGISel::SelectAddrMode5(SDValue Op, SDValue N, + SDValue &Base, SDValue &Offset) { + if (N.getOpcode() != ISD::ADD) { + Base = N; + if (N.getOpcode() == ISD::FrameIndex) { + int FI = cast(N)->getIndex(); + Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); + } else if (N.getOpcode() == ARMISD::Wrapper) { + Base = N.getOperand(0); + } + Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0), + MVT::i32); + return true; + } + + // If the RHS is +/- imm8, fold into addr mode. + if (ConstantSDNode *RHS = dyn_cast(N.getOperand(1))) { + int RHSC = (int)RHS->getZExtValue(); + if ((RHSC & 3) == 0) { // The constant is implicitly multiplied by 4. + RHSC >>= 2; + if ((RHSC >= 0 && RHSC < 256) || + (RHSC < 0 && RHSC > -256)) { // note -256 itself isn't allowed. + Base = N.getOperand(0); + if (Base.getOpcode() == ISD::FrameIndex) { + int FI = cast(Base)->getIndex(); + Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); + } + + ARM_AM::AddrOpc AddSub = ARM_AM::add; + if (RHSC < 0) { + AddSub = ARM_AM::sub; + RHSC = - RHSC; + } + Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC), + MVT::i32); + return true; + } + } + } + + Base = N; + Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0), + MVT::i32); + return true; +} + +bool ARMDAGToDAGISel::SelectAddrModePC(SDValue Op, SDValue N, + SDValue &Offset, SDValue &Label) { + if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) { + Offset = N.getOperand(0); + SDValue N1 = N.getOperand(1); + Label = CurDAG->getTargetConstant(cast(N1)->getZExtValue(), + MVT::i32); + return true; + } + return false; +} + +bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue Op, SDValue N, + SDValue &Base, SDValue &Offset){ + // FIXME dl should come from the parent load or store, not the address + DebugLoc dl = Op.getDebugLoc(); + if (N.getOpcode() != ISD::ADD) { + Base = N; + // We must materialize a zero in a reg! Returning a constant here + // wouldn't work without additional code to position the node within + // ISel's topological ordering in a place where ISel will process it + // normally. Instead, just explicitly issue a tMOVri8 node! + Offset = SDValue(CurDAG->getTargetNode(ARM::tMOVi8, dl, MVT::i32, + CurDAG->getTargetConstant(0, MVT::i32)), 0); + return true; + } + + Base = N.getOperand(0); + Offset = N.getOperand(1); + return true; +} + +bool +ARMDAGToDAGISel::SelectThumbAddrModeRI5(SDValue Op, SDValue N, + unsigned Scale, SDValue &Base, + SDValue &OffImm, SDValue &Offset) { + if (Scale == 4) { + SDValue TmpBase, TmpOffImm; + if (SelectThumbAddrModeSP(Op, N, TmpBase, TmpOffImm)) + return false; // We want to select tLDRspi / tSTRspi instead. + if (N.getOpcode() == ARMISD::Wrapper && + N.getOperand(0).getOpcode() == ISD::TargetConstantPool) + return false; // We want to select tLDRpci instead. + } + + if (N.getOpcode() != ISD::ADD) { + Base = (N.getOpcode() == ARMISD::Wrapper) ? N.getOperand(0) : N; + Offset = CurDAG->getRegister(0, MVT::i32); + OffImm = CurDAG->getTargetConstant(0, MVT::i32); + return true; + } + + // Thumb does not have [sp, r] address mode. + RegisterSDNode *LHSR = dyn_cast(N.getOperand(0)); + RegisterSDNode *RHSR = dyn_cast(N.getOperand(1)); + if ((LHSR && LHSR->getReg() == ARM::SP) || + (RHSR && RHSR->getReg() == ARM::SP)) { + Base = N; + Offset = CurDAG->getRegister(0, MVT::i32); + OffImm = CurDAG->getTargetConstant(0, MVT::i32); + return true; + } + + // If the RHS is + imm5 * scale, fold into addr mode. + if (ConstantSDNode *RHS = dyn_cast(N.getOperand(1))) { + int RHSC = (int)RHS->getZExtValue(); + if ((RHSC & (Scale-1)) == 0) { // The constant is implicitly multiplied. + RHSC /= Scale; + if (RHSC >= 0 && RHSC < 32) { + Base = N.getOperand(0); + Offset = CurDAG->getRegister(0, MVT::i32); + OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32); + return true; + } + } + } + + Base = N.getOperand(0); + Offset = N.getOperand(1); + OffImm = CurDAG->getTargetConstant(0, MVT::i32); + return true; +} + +bool ARMDAGToDAGISel::SelectThumbAddrModeS1(SDValue Op, SDValue N, + SDValue &Base, SDValue &OffImm, + SDValue &Offset) { + return SelectThumbAddrModeRI5(Op, N, 1, Base, OffImm, Offset); +} + +bool ARMDAGToDAGISel::SelectThumbAddrModeS2(SDValue Op, SDValue N, + SDValue &Base, SDValue &OffImm, + SDValue &Offset) { + return SelectThumbAddrModeRI5(Op, N, 2, Base, OffImm, Offset); +} + +bool ARMDAGToDAGISel::SelectThumbAddrModeS4(SDValue Op, SDValue N, + SDValue &Base, SDValue &OffImm, + SDValue &Offset) { + return SelectThumbAddrModeRI5(Op, N, 4, Base, OffImm, Offset); +} + +bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue Op, SDValue N, + SDValue &Base, SDValue &OffImm) { + if (N.getOpcode() == ISD::FrameIndex) { + int FI = cast(N)->getIndex(); + Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); + OffImm = CurDAG->getTargetConstant(0, MVT::i32); + return true; + } + + if (N.getOpcode() != ISD::ADD) + return false; + + RegisterSDNode *LHSR = dyn_cast(N.getOperand(0)); + if (N.getOperand(0).getOpcode() == ISD::FrameIndex || + (LHSR && LHSR->getReg() == ARM::SP)) { + // If the RHS is + imm8 * scale, fold into addr mode. + if (ConstantSDNode *RHS = dyn_cast(N.getOperand(1))) { + int RHSC = (int)RHS->getZExtValue(); + if ((RHSC & 3) == 0) { // The constant is implicitly multiplied. + RHSC >>= 2; + if (RHSC >= 0 && RHSC < 256) { + Base = N.getOperand(0); + if (Base.getOpcode() == ISD::FrameIndex) { + int FI = cast(Base)->getIndex(); + Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); + } + OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32); + return true; + } + } + } + } + + return false; +} + +bool ARMDAGToDAGISel::SelectShifterOperandReg(SDValue Op, + SDValue N, + SDValue &BaseReg, + SDValue &ShReg, + SDValue &Opc) { + ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N); + + // Don't match base register only case. That is matched to a separate + // lower complexity pattern with explicit register operand. + if (ShOpcVal == ARM_AM::no_shift) return false; + + BaseReg = N.getOperand(0); + unsigned ShImmVal = 0; + if (ConstantSDNode *RHS = dyn_cast(N.getOperand(1))) { + ShReg = CurDAG->getRegister(0, MVT::i32); + ShImmVal = RHS->getZExtValue() & 31; + } else { + ShReg = N.getOperand(1); + } + Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal), + MVT::i32); + return true; +} + +/// getAL - Returns a ARMCC::AL immediate node. +static inline SDValue getAL(SelectionDAG *CurDAG) { + return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, MVT::i32); +} + + +SDNode *ARMDAGToDAGISel::Select(SDValue Op) { + SDNode *N = Op.getNode(); + DebugLoc dl = N->getDebugLoc(); + + if (N->isMachineOpcode()) + return NULL; // Already selected. + + switch (N->getOpcode()) { + default: break; + case ISD::Constant: { + unsigned Val = cast(N)->getZExtValue(); + bool UseCP = true; + if (Subtarget->isThumb()) + UseCP = (Val > 255 && // MOV + ~Val > 255 && // MOV + MVN + !ARM_AM::isThumbImmShiftedVal(Val)); // MOV + LSL + else + UseCP = (ARM_AM::getSOImmVal(Val) == -1 && // MOV + ARM_AM::getSOImmVal(~Val) == -1 && // MVN + !ARM_AM::isSOImmTwoPartVal(Val)); // two instrs. + if (UseCP) { + SDValue CPIdx = + CurDAG->getTargetConstantPool(ConstantInt::get(Type::Int32Ty, Val), + TLI.getPointerTy()); + + SDNode *ResNode; + if (Subtarget->isThumb()) + ResNode = CurDAG->getTargetNode(ARM::tLDRcp, dl, MVT::i32, MVT::Other, + CPIdx, CurDAG->getEntryNode()); + else { + SDValue Ops[] = { + CPIdx, + CurDAG->getRegister(0, MVT::i32), + CurDAG->getTargetConstant(0, MVT::i32), + getAL(CurDAG), + CurDAG->getRegister(0, MVT::i32), + CurDAG->getEntryNode() + }; + ResNode=CurDAG->getTargetNode(ARM::LDRcp, dl, MVT::i32, MVT::Other, + Ops, 6); + } + ReplaceUses(Op, SDValue(ResNode, 0)); + return NULL; + } + + // Other cases are autogenerated. + break; + } + case ISD::FrameIndex: { + // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm. + int FI = cast(N)->getIndex(); + SDValue TFI = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); + if (Subtarget->isThumb()) { + return CurDAG->SelectNodeTo(N, ARM::tADDrSPi, MVT::i32, TFI, + CurDAG->getTargetConstant(0, MVT::i32)); + } else { + SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, MVT::i32), + getAL(CurDAG), CurDAG->getRegister(0, MVT::i32), + CurDAG->getRegister(0, MVT::i32) }; + return CurDAG->SelectNodeTo(N, ARM::ADDri, MVT::i32, Ops, 5); + } + } + case ISD::ADD: { + if (!Subtarget->isThumb()) + break; + // Select add sp, c to tADDhirr. + SDValue N0 = Op.getOperand(0); + SDValue N1 = Op.getOperand(1); + RegisterSDNode *LHSR = dyn_cast(Op.getOperand(0)); + RegisterSDNode *RHSR = dyn_cast(Op.getOperand(1)); + if (LHSR && LHSR->getReg() == ARM::SP) { + std::swap(N0, N1); + std::swap(LHSR, RHSR); + } + if (RHSR && RHSR->getReg() == ARM::SP) { + SDValue Val = SDValue(CurDAG->getTargetNode(ARM::tMOVlor2hir, dl, + Op.getValueType(), N0, N0), 0); + return CurDAG->SelectNodeTo(N, ARM::tADDhirr, Op.getValueType(), Val, N1); + } + break; + } + case ISD::MUL: + if (Subtarget->isThumb()) + break; + if (ConstantSDNode *C = dyn_cast(Op.getOperand(1))) { + unsigned RHSV = C->getZExtValue(); + if (!RHSV) break; + if (isPowerOf2_32(RHSV-1)) { // 2^n+1? + SDValue V = Op.getOperand(0); + unsigned ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, Log2_32(RHSV-1)); + SDValue Ops[] = { V, V, CurDAG->getRegister(0, MVT::i32), + CurDAG->getTargetConstant(ShImm, MVT::i32), + getAL(CurDAG), CurDAG->getRegister(0, MVT::i32), + CurDAG->getRegister(0, MVT::i32) }; + return CurDAG->SelectNodeTo(N, ARM::ADDrs, MVT::i32, Ops, 7); + } + if (isPowerOf2_32(RHSV+1)) { // 2^n-1? + SDValue V = Op.getOperand(0); + unsigned ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, Log2_32(RHSV+1)); + SDValue Ops[] = { V, V, CurDAG->getRegister(0, MVT::i32), + CurDAG->getTargetConstant(ShImm, MVT::i32), + getAL(CurDAG), CurDAG->getRegister(0, MVT::i32), + CurDAG->getRegister(0, MVT::i32) }; + return CurDAG->SelectNodeTo(N, ARM::RSBrs, MVT::i32, Ops, 7); + } + } + break; + case ARMISD::FMRRD: + return CurDAG->getTargetNode(ARM::FMRRD, dl, MVT::i32, MVT::i32, + Op.getOperand(0), getAL(CurDAG), + CurDAG->getRegister(0, MVT::i32)); + case ISD::UMUL_LOHI: { + SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1), + getAL(CurDAG), CurDAG->getRegister(0, MVT::i32), + CurDAG->getRegister(0, MVT::i32) }; + return CurDAG->getTargetNode(ARM::UMULL, dl, MVT::i32, MVT::i32, Ops, 5); + } + case ISD::SMUL_LOHI: { + SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1), + getAL(CurDAG), CurDAG->getRegister(0, MVT::i32), + CurDAG->getRegister(0, MVT::i32) }; + return CurDAG->getTargetNode(ARM::SMULL, dl, MVT::i32, MVT::i32, Ops, 5); + } + case ISD::LOAD: { + LoadSDNode *LD = cast(Op); + ISD::MemIndexedMode AM = LD->getAddressingMode(); + MVT LoadedVT = LD->getMemoryVT(); + if (AM != ISD::UNINDEXED) { + SDValue Offset, AMOpc; + bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); + unsigned Opcode = 0; + bool Match = false; + if (LoadedVT == MVT::i32 && + SelectAddrMode2Offset(Op, LD->getOffset(), Offset, AMOpc)) { + Opcode = isPre ? ARM::LDR_PRE : ARM::LDR_POST; + Match = true; + } else if (LoadedVT == MVT::i16 && + SelectAddrMode3Offset(Op, LD->getOffset(), Offset, AMOpc)) { + Match = true; + Opcode = (LD->getExtensionType() == ISD::SEXTLOAD) + ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST) + : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST); + } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) { + if (LD->getExtensionType() == ISD::SEXTLOAD) { + if (SelectAddrMode3Offset(Op, LD->getOffset(), Offset, AMOpc)) { + Match = true; + Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST; + } + } else { + if (SelectAddrMode2Offset(Op, LD->getOffset(), Offset, AMOpc)) { + Match = true; + Opcode = isPre ? ARM::LDRB_PRE : ARM::LDRB_POST; + } + } + } + + if (Match) { + SDValue Chain = LD->getChain(); + SDValue Base = LD->getBasePtr(); + SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG), + CurDAG->getRegister(0, MVT::i32), Chain }; + return CurDAG->getTargetNode(Opcode, dl, MVT::i32, MVT::i32, + MVT::Other, Ops, 6); + } + } + // Other cases are autogenerated. + break; + } + case ARMISD::BRCOND: { + // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) + // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc) + // Pattern complexity = 6 cost = 1 size = 0 + + // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) + // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc) + // Pattern complexity = 6 cost = 1 size = 0 + + unsigned Opc = Subtarget->isThumb() ? ARM::tBcc : ARM::Bcc; + SDValue Chain = Op.getOperand(0); + SDValue N1 = Op.getOperand(1); + SDValue N2 = Op.getOperand(2); + SDValue N3 = Op.getOperand(3); + SDValue InFlag = Op.getOperand(4); + assert(N1.getOpcode() == ISD::BasicBlock); + assert(N2.getOpcode() == ISD::Constant); + assert(N3.getOpcode() == ISD::Register); + + SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned) + cast(N2)->getZExtValue()), + MVT::i32); + SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag }; + SDNode *ResNode = CurDAG->getTargetNode(Opc, dl, MVT::Other, + MVT::Flag, Ops, 5); + Chain = SDValue(ResNode, 0); + if (Op.getNode()->getNumValues() == 2) { + InFlag = SDValue(ResNode, 1); + ReplaceUses(SDValue(Op.getNode(), 1), InFlag); + } + ReplaceUses(SDValue(Op.getNode(), 0), SDValue(Chain.getNode(), Chain.getResNo())); + return NULL; + } + case ARMISD::CMOV: { + bool isThumb = Subtarget->isThumb(); + MVT VT = Op.getValueType(); + SDValue N0 = Op.getOperand(0); + SDValue N1 = Op.getOperand(1); + SDValue N2 = Op.getOperand(2); + SDValue N3 = Op.getOperand(3); + SDValue InFlag = Op.getOperand(4); + assert(N2.getOpcode() == ISD::Constant); + assert(N3.getOpcode() == ISD::Register); + + // Pattern: (ARMcmov:i32 GPR:i32:$false, so_reg:i32:$true, (imm:i32):$cc) + // Emits: (MOVCCs:i32 GPR:i32:$false, so_reg:i32:$true, (imm:i32):$cc) + // Pattern complexity = 18 cost = 1 size = 0 + SDValue CPTmp0; + SDValue CPTmp1; + SDValue CPTmp2; + if (!isThumb && VT == MVT::i32 && + SelectShifterOperandReg(Op, N1, CPTmp0, CPTmp1, CPTmp2)) { + SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned) + cast(N2)->getZExtValue()), + MVT::i32); + SDValue Ops[] = { N0, CPTmp0, CPTmp1, CPTmp2, Tmp2, N3, InFlag }; + return CurDAG->SelectNodeTo(Op.getNode(), ARM::MOVCCs, MVT::i32, Ops, 7); + } + + // Pattern: (ARMcmov:i32 GPR:i32:$false, + // (imm:i32)<><>:$true, + // (imm:i32):$cc) + // Emits: (MOVCCi:i32 GPR:i32:$false, + // (so_imm_XFORM:i32 (imm:i32):$true), (imm:i32):$cc) + // Pattern complexity = 10 cost = 1 size = 0 + if (VT == MVT::i32 && + N3.getOpcode() == ISD::Constant && + Predicate_so_imm(N3.getNode())) { + SDValue Tmp1 = CurDAG->getTargetConstant(((unsigned) + cast(N1)->getZExtValue()), + MVT::i32); + Tmp1 = Transform_so_imm_XFORM(Tmp1.getNode()); + SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned) + cast(N2)->getZExtValue()), + MVT::i32); + SDValue Ops[] = { N0, Tmp1, Tmp2, N3, InFlag }; + return CurDAG->SelectNodeTo(Op.getNode(), ARM::MOVCCi, MVT::i32, Ops, 5); + } + + // Pattern: (ARMcmov:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc) + // Emits: (MOVCCr:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc) + // Pattern complexity = 6 cost = 1 size = 0 + // + // Pattern: (ARMcmov:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc) + // Emits: (tMOVCCr:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc) + // Pattern complexity = 6 cost = 11 size = 0 + // + // Also FCPYScc and FCPYDcc. + SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned) + cast(N2)->getZExtValue()), + MVT::i32); + SDValue Ops[] = { N0, N1, Tmp2, N3, InFlag }; + unsigned Opc = 0; + switch (VT.getSimpleVT()) { + default: assert(false && "Illegal conditional move type!"); + break; + case MVT::i32: + Opc = isThumb ? ARM::tMOVCCr : ARM::MOVCCr; + break; + case MVT::f32: + Opc = ARM::FCPYScc; + break; + case MVT::f64: + Opc = ARM::FCPYDcc; + break; + } + return CurDAG->SelectNodeTo(Op.getNode(), Opc, VT, Ops, 5); + } + case ARMISD::CNEG: { + MVT VT = Op.getValueType(); + SDValue N0 = Op.getOperand(0); + SDValue N1 = Op.getOperand(1); + SDValue N2 = Op.getOperand(2); + SDValue N3 = Op.getOperand(3); + SDValue InFlag = Op.getOperand(4); + assert(N2.getOpcode() == ISD::Constant); + assert(N3.getOpcode() == ISD::Register); + + SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned) + cast(N2)->getZExtValue()), + MVT::i32); + SDValue Ops[] = { N0, N1, Tmp2, N3, InFlag }; + unsigned Opc = 0; + switch (VT.getSimpleVT()) { + default: assert(false && "Illegal conditional move type!"); + break; + case MVT::f32: + Opc = ARM::FNEGScc; + break; + case MVT::f64: + Opc = ARM::FNEGDcc; + break; + } + return CurDAG->SelectNodeTo(Op.getNode(), Opc, VT, Ops, 5); + } + + case ISD::DECLARE: { + SDValue Chain = Op.getOperand(0); + SDValue N1 = Op.getOperand(1); + SDValue N2 = Op.getOperand(2); + FrameIndexSDNode *FINode = dyn_cast(N1); + // FIXME: handle VLAs. + if (!FINode) { + ReplaceUses(Op.getValue(0), Chain); + return NULL; + } + if (N2.getOpcode() == ARMISD::PIC_ADD && isa(N2.getOperand(0))) + N2 = N2.getOperand(0); + LoadSDNode *Ld = dyn_cast(N2); + if (!Ld) { + ReplaceUses(Op.getValue(0), Chain); + return NULL; + } + SDValue BasePtr = Ld->getBasePtr(); + assert(BasePtr.getOpcode() == ARMISD::Wrapper && + isa(BasePtr.getOperand(0)) && + "llvm.dbg.variable should be a constantpool node"); + ConstantPoolSDNode *CP = cast(BasePtr.getOperand(0)); + GlobalValue *GV = 0; + if (CP->isMachineConstantPoolEntry()) { + ARMConstantPoolValue *ACPV = (ARMConstantPoolValue*)CP->getMachineCPVal(); + GV = ACPV->getGV(); + } else + GV = dyn_cast(CP->getConstVal()); + if (!GV) { + ReplaceUses(Op.getValue(0), Chain); + return NULL; + } + + SDValue Tmp1 = CurDAG->getTargetFrameIndex(FINode->getIndex(), + TLI.getPointerTy()); + SDValue Tmp2 = CurDAG->getTargetGlobalAddress(GV, TLI.getPointerTy()); + SDValue Ops[] = { Tmp1, Tmp2, Chain }; + return CurDAG->getTargetNode(TargetInstrInfo::DECLARE, dl, + MVT::Other, Ops, 3); + } + } + + return SelectCode(Op); +} + +bool ARMDAGToDAGISel:: +SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, + std::vector &OutOps) { + assert(ConstraintCode == 'm' && "unexpected asm memory constraint"); + + SDValue Base, Offset, Opc; + if (!SelectAddrMode2(Op, Op, Base, Offset, Opc)) + return true; + + OutOps.push_back(Base); + OutOps.push_back(Offset); + OutOps.push_back(Opc); + return false; +} + +/// createARMISelDag - This pass converts a legalized DAG into a +/// ARM-specific DAG, ready for instruction scheduling. +/// +FunctionPass *llvm::createARMISelDag(ARMTargetMachine &TM) { + return new ARMDAGToDAGISel(TM); +} diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp new file mode 100644 index 000000000000..c0fd9dcd1072 --- /dev/null +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -0,0 +1,2346 @@ +//===-- ARMISelLowering.cpp - ARM DAG Lowering Implementation -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interfaces that ARM uses to lower LLVM code into a +// selection DAG. +// +//===----------------------------------------------------------------------===// + +#include "ARM.h" +#include "ARMAddressingModes.h" +#include "ARMConstantPoolValue.h" +#include "ARMISelLowering.h" +#include "ARMMachineFunctionInfo.h" +#include "ARMRegisterInfo.h" +#include "ARMSubtarget.h" +#include "ARMTargetMachine.h" +#include "llvm/CallingConv.h" +#include "llvm/Constants.h" +#include "llvm/Function.h" +#include "llvm/Instruction.h" +#include "llvm/Intrinsics.h" +#include "llvm/GlobalValue.h" +#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/ADT/VectorExtras.h" +#include "llvm/Support/MathExtras.h" +using namespace llvm; + +static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State); +static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State); +static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State); +static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State); + +ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) + : TargetLowering(TM), ARMPCLabelIndex(0) { + Subtarget = &TM.getSubtarget(); + + if (Subtarget->isTargetDarwin()) { + // Uses VFP for Thumb libfuncs if available. + if (Subtarget->isThumb() && Subtarget->hasVFP2()) { + // Single-precision floating-point arithmetic. + setLibcallName(RTLIB::ADD_F32, "__addsf3vfp"); + setLibcallName(RTLIB::SUB_F32, "__subsf3vfp"); + setLibcallName(RTLIB::MUL_F32, "__mulsf3vfp"); + setLibcallName(RTLIB::DIV_F32, "__divsf3vfp"); + + // Double-precision floating-point arithmetic. + setLibcallName(RTLIB::ADD_F64, "__adddf3vfp"); + setLibcallName(RTLIB::SUB_F64, "__subdf3vfp"); + setLibcallName(RTLIB::MUL_F64, "__muldf3vfp"); + setLibcallName(RTLIB::DIV_F64, "__divdf3vfp"); + + // Single-precision comparisons. + setLibcallName(RTLIB::OEQ_F32, "__eqsf2vfp"); + setLibcallName(RTLIB::UNE_F32, "__nesf2vfp"); + setLibcallName(RTLIB::OLT_F32, "__ltsf2vfp"); + setLibcallName(RTLIB::OLE_F32, "__lesf2vfp"); + setLibcallName(RTLIB::OGE_F32, "__gesf2vfp"); + setLibcallName(RTLIB::OGT_F32, "__gtsf2vfp"); + setLibcallName(RTLIB::UO_F32, "__unordsf2vfp"); + setLibcallName(RTLIB::O_F32, "__unordsf2vfp"); + + setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE); + setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETNE); + setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE); + setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE); + setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE); + setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE); + setCmpLibcallCC(RTLIB::UO_F32, ISD::SETNE); + setCmpLibcallCC(RTLIB::O_F32, ISD::SETEQ); + + // Double-precision comparisons. + setLibcallName(RTLIB::OEQ_F64, "__eqdf2vfp"); + setLibcallName(RTLIB::UNE_F64, "__nedf2vfp"); + setLibcallName(RTLIB::OLT_F64, "__ltdf2vfp"); + setLibcallName(RTLIB::OLE_F64, "__ledf2vfp"); + setLibcallName(RTLIB::OGE_F64, "__gedf2vfp"); + setLibcallName(RTLIB::OGT_F64, "__gtdf2vfp"); + setLibcallName(RTLIB::UO_F64, "__unorddf2vfp"); + setLibcallName(RTLIB::O_F64, "__unorddf2vfp"); + + setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE); + setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETNE); + setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE); + setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE); + setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE); + setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE); + setCmpLibcallCC(RTLIB::UO_F64, ISD::SETNE); + setCmpLibcallCC(RTLIB::O_F64, ISD::SETEQ); + + // Floating-point to integer conversions. + // i64 conversions are done via library routines even when generating VFP + // instructions, so use the same ones. + setLibcallName(RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp"); + setLibcallName(RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp"); + setLibcallName(RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp"); + setLibcallName(RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp"); + + // Conversions between floating types. + setLibcallName(RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp"); + setLibcallName(RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp"); + + // Integer to floating-point conversions. + // i64 conversions are done via library routines even when generating VFP + // instructions, so use the same ones. + // FIXME: There appears to be some naming inconsistency in ARM libgcc: + // e.g., __floatunsidf vs. __floatunssidfvfp. + setLibcallName(RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp"); + setLibcallName(RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp"); + setLibcallName(RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp"); + setLibcallName(RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp"); + } + } + + // These libcalls are not available in 32-bit. + setLibcallName(RTLIB::SHL_I128, 0); + setLibcallName(RTLIB::SRL_I128, 0); + setLibcallName(RTLIB::SRA_I128, 0); + + if (Subtarget->isThumb()) + addRegisterClass(MVT::i32, ARM::tGPRRegisterClass); + else + addRegisterClass(MVT::i32, ARM::GPRRegisterClass); + if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb()) { + addRegisterClass(MVT::f32, ARM::SPRRegisterClass); + addRegisterClass(MVT::f64, ARM::DPRRegisterClass); + + setTruncStoreAction(MVT::f64, MVT::f32, Expand); + } + computeRegisterProperties(); + + // ARM does not have f32 extending load. + setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); + + // ARM does not have i1 sign extending load. + setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); + + // ARM supports all 4 flavors of integer indexed load / store. + for (unsigned im = (unsigned)ISD::PRE_INC; + im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { + setIndexedLoadAction(im, MVT::i1, Legal); + setIndexedLoadAction(im, MVT::i8, Legal); + setIndexedLoadAction(im, MVT::i16, Legal); + setIndexedLoadAction(im, MVT::i32, Legal); + setIndexedStoreAction(im, MVT::i1, Legal); + setIndexedStoreAction(im, MVT::i8, Legal); + setIndexedStoreAction(im, MVT::i16, Legal); + setIndexedStoreAction(im, MVT::i32, Legal); + } + + // i64 operation support. + if (Subtarget->isThumb()) { + setOperationAction(ISD::MUL, MVT::i64, Expand); + setOperationAction(ISD::MULHU, MVT::i32, Expand); + setOperationAction(ISD::MULHS, MVT::i32, Expand); + setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); + setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); + } else { + setOperationAction(ISD::MUL, MVT::i64, Expand); + setOperationAction(ISD::MULHU, MVT::i32, Expand); + if (!Subtarget->hasV6Ops()) + setOperationAction(ISD::MULHS, MVT::i32, Expand); + } + setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand); + setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand); + setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand); + setOperationAction(ISD::SRL, MVT::i64, Custom); + setOperationAction(ISD::SRA, MVT::i64, Custom); + + // ARM does not have ROTL. + setOperationAction(ISD::ROTL, MVT::i32, Expand); + setOperationAction(ISD::CTTZ, MVT::i32, Expand); + setOperationAction(ISD::CTPOP, MVT::i32, Expand); + if (!Subtarget->hasV5TOps() || Subtarget->isThumb()) + setOperationAction(ISD::CTLZ, MVT::i32, Expand); + + // Only ARMv6 has BSWAP. + if (!Subtarget->hasV6Ops()) + setOperationAction(ISD::BSWAP, MVT::i32, Expand); + + // These are expanded into libcalls. + setOperationAction(ISD::SDIV, MVT::i32, Expand); + setOperationAction(ISD::UDIV, MVT::i32, Expand); + setOperationAction(ISD::SREM, MVT::i32, Expand); + setOperationAction(ISD::UREM, MVT::i32, Expand); + setOperationAction(ISD::SDIVREM, MVT::i32, Expand); + setOperationAction(ISD::UDIVREM, MVT::i32, Expand); + + // Support label based line numbers. + setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand); + setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand); + + setOperationAction(ISD::RET, MVT::Other, Custom); + setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); + setOperationAction(ISD::ConstantPool, MVT::i32, Custom); + setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom); + setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); + + // Use the default implementation. + setOperationAction(ISD::VASTART, MVT::Other, Custom); + setOperationAction(ISD::VAARG, MVT::Other, Expand); + setOperationAction(ISD::VACOPY, MVT::Other, Expand); + setOperationAction(ISD::VAEND, MVT::Other, Expand); + setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); + setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); + setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); + + if (!Subtarget->hasV6Ops()) { + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); + } + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); + + if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb()) + // Turn f64->i64 into FMRRD, i64 -> f64 to FMDRR iff target supports vfp2. + setOperationAction(ISD::BIT_CONVERT, MVT::i64, Custom); + + // We want to custom lower some of our intrinsics. + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); + + setOperationAction(ISD::SETCC, MVT::i32, Expand); + setOperationAction(ISD::SETCC, MVT::f32, Expand); + setOperationAction(ISD::SETCC, MVT::f64, Expand); + setOperationAction(ISD::SELECT, MVT::i32, Expand); + setOperationAction(ISD::SELECT, MVT::f32, Expand); + setOperationAction(ISD::SELECT, MVT::f64, Expand); + setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); + setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); + setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); + + setOperationAction(ISD::BRCOND, MVT::Other, Expand); + setOperationAction(ISD::BR_CC, MVT::i32, Custom); + setOperationAction(ISD::BR_CC, MVT::f32, Custom); + setOperationAction(ISD::BR_CC, MVT::f64, Custom); + setOperationAction(ISD::BR_JT, MVT::Other, Custom); + + // We don't support sin/cos/fmod/copysign/pow + setOperationAction(ISD::FSIN, MVT::f64, Expand); + setOperationAction(ISD::FSIN, MVT::f32, Expand); + setOperationAction(ISD::FCOS, MVT::f32, Expand); + setOperationAction(ISD::FCOS, MVT::f64, Expand); + setOperationAction(ISD::FREM, MVT::f64, Expand); + setOperationAction(ISD::FREM, MVT::f32, Expand); + if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb()) { + setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); + setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); + } + setOperationAction(ISD::FPOW, MVT::f64, Expand); + setOperationAction(ISD::FPOW, MVT::f32, Expand); + + // int <-> fp are custom expanded into bit_convert + ARMISD ops. + if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb()) { + setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); + setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); + setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); + } + + // We have target-specific dag combine patterns for the following nodes: + // ARMISD::FMRRD - No need to call setTargetDAGCombine + setTargetDAGCombine(ISD::ADD); + setTargetDAGCombine(ISD::SUB); + + setStackPointerRegisterToSaveRestore(ARM::SP); + setSchedulingPreference(SchedulingForRegPressure); + setIfCvtBlockSizeLimit(Subtarget->isThumb() ? 0 : 10); + setIfCvtDupBlockSizeLimit(Subtarget->isThumb() ? 0 : 2); + + maxStoresPerMemcpy = 1; //// temporary - rewrite interface to use type + // Do not enable CodePlacementOpt for now: it currently runs after the + // ARMConstantIslandPass and messes up branch relaxation and placement + // of constant islands. + // benefitFromCodePlacementOpt = true; +} + +const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { + switch (Opcode) { + default: return 0; + case ARMISD::Wrapper: return "ARMISD::Wrapper"; + case ARMISD::WrapperJT: return "ARMISD::WrapperJT"; + case ARMISD::CALL: return "ARMISD::CALL"; + case ARMISD::CALL_PRED: return "ARMISD::CALL_PRED"; + case ARMISD::CALL_NOLINK: return "ARMISD::CALL_NOLINK"; + case ARMISD::tCALL: return "ARMISD::tCALL"; + case ARMISD::BRCOND: return "ARMISD::BRCOND"; + case ARMISD::BR_JT: return "ARMISD::BR_JT"; + case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG"; + case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD"; + case ARMISD::CMP: return "ARMISD::CMP"; + case ARMISD::CMPNZ: return "ARMISD::CMPNZ"; + case ARMISD::CMPFP: return "ARMISD::CMPFP"; + case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0"; + case ARMISD::FMSTAT: return "ARMISD::FMSTAT"; + case ARMISD::CMOV: return "ARMISD::CMOV"; + case ARMISD::CNEG: return "ARMISD::CNEG"; + + case ARMISD::FTOSI: return "ARMISD::FTOSI"; + case ARMISD::FTOUI: return "ARMISD::FTOUI"; + case ARMISD::SITOF: return "ARMISD::SITOF"; + case ARMISD::UITOF: return "ARMISD::UITOF"; + + case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG"; + case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG"; + case ARMISD::RRX: return "ARMISD::RRX"; + + case ARMISD::FMRRD: return "ARMISD::FMRRD"; + case ARMISD::FMDRR: return "ARMISD::FMDRR"; + + case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER"; + } +} + +//===----------------------------------------------------------------------===// +// Lowering Code +//===----------------------------------------------------------------------===// + +/// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC +static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) { + switch (CC) { + default: assert(0 && "Unknown condition code!"); + case ISD::SETNE: return ARMCC::NE; + case ISD::SETEQ: return ARMCC::EQ; + case ISD::SETGT: return ARMCC::GT; + case ISD::SETGE: return ARMCC::GE; + case ISD::SETLT: return ARMCC::LT; + case ISD::SETLE: return ARMCC::LE; + case ISD::SETUGT: return ARMCC::HI; + case ISD::SETUGE: return ARMCC::HS; + case ISD::SETULT: return ARMCC::LO; + case ISD::SETULE: return ARMCC::LS; + } +} + +/// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC. It +/// returns true if the operands should be inverted to form the proper +/// comparison. +static bool FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, + ARMCC::CondCodes &CondCode2) { + bool Invert = false; + CondCode2 = ARMCC::AL; + switch (CC) { + default: assert(0 && "Unknown FP condition!"); + case ISD::SETEQ: + case ISD::SETOEQ: CondCode = ARMCC::EQ; break; + case ISD::SETGT: + case ISD::SETOGT: CondCode = ARMCC::GT; break; + case ISD::SETGE: + case ISD::SETOGE: CondCode = ARMCC::GE; break; + case ISD::SETOLT: CondCode = ARMCC::MI; break; + case ISD::SETOLE: CondCode = ARMCC::GT; Invert = true; break; + case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break; + case ISD::SETO: CondCode = ARMCC::VC; break; + case ISD::SETUO: CondCode = ARMCC::VS; break; + case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break; + case ISD::SETUGT: CondCode = ARMCC::HI; break; + case ISD::SETUGE: CondCode = ARMCC::PL; break; + case ISD::SETLT: + case ISD::SETULT: CondCode = ARMCC::LT; break; + case ISD::SETLE: + case ISD::SETULE: CondCode = ARMCC::LE; break; + case ISD::SETNE: + case ISD::SETUNE: CondCode = ARMCC::NE; break; + } + return Invert; +} + +//===----------------------------------------------------------------------===// +// Calling Convention Implementation +// +// The lower operations present on calling convention works on this order: +// LowerCALL (virt regs --> phys regs, virt regs --> stack) +// LowerFORMAL_ARGUMENTS (phys --> virt regs, stack --> virt regs) +// LowerRET (virt regs --> phys regs) +// LowerCALL (phys regs --> virt regs) +// +//===----------------------------------------------------------------------===// + +#include "ARMGenCallingConv.inc" + +// APCS f64 is in register pairs, possibly split to stack +static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State) { + static const unsigned HiRegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; + static const unsigned LoRegList[] = { ARM::R1, + ARM::R2, + ARM::R3, + ARM::NoRegister }; + + unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 4); + if (Reg == 0) + return false; // we didn't handle it + + unsigned i; + for (i = 0; i < 4; ++i) + if (HiRegList[i] == Reg) + break; + + State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, MVT::i32, LocInfo)); + if (LoRegList[i] != ARM::NoRegister) + State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i], + MVT::i32, LocInfo)); + else + State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT, + State.AllocateStack(4, 4), + MVT::i32, LocInfo)); + return true; // we handled it +} + +// AAPCS f64 is in aligned register pairs +static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State) { + static const unsigned HiRegList[] = { ARM::R0, ARM::R2 }; + static const unsigned LoRegList[] = { ARM::R1, ARM::R3 }; + + unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 2); + if (Reg == 0) + return false; // we didn't handle it + + unsigned i; + for (i = 0; i < 2; ++i) + if (HiRegList[i] == Reg) + break; + + State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, MVT::i32, LocInfo)); + State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i], + MVT::i32, LocInfo)); + return true; // we handled it +} + +static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State) { + static const unsigned HiRegList[] = { ARM::R0, ARM::R2 }; + static const unsigned LoRegList[] = { ARM::R1, ARM::R3 }; + + unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 2); + if (Reg == 0) + return false; // we didn't handle it + + unsigned i; + for (i = 0; i < 2; ++i) + if (HiRegList[i] == Reg) + break; + + State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, MVT::i32, LocInfo)); + State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i], + MVT::i32, LocInfo)); + return true; // we handled it +} + +static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State) { + return RetCC_ARM_APCS_Custom_f64(ValNo, ValVT, LocVT, LocInfo, ArgFlags, + State); +} + +/// LowerCallResult - Lower the result values of an ISD::CALL into the +/// appropriate copies out of appropriate physical registers. This assumes that +/// Chain/InFlag are the input chain/flag to use, and that TheCall is the call +/// being lowered. The returns a SDNode with the same number of values as the +/// ISD::CALL. +SDNode *ARMTargetLowering:: +LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall, + unsigned CallingConv, SelectionDAG &DAG) { + + DebugLoc dl = TheCall->getDebugLoc(); + // Assign locations to each value returned by this call. + SmallVector RVLocs; + bool isVarArg = TheCall->isVarArg(); + CCState CCInfo(CallingConv, isVarArg, getTargetMachine(), RVLocs); + CCInfo.AnalyzeCallResult(TheCall, RetCC_ARM); + + SmallVector ResultVals; + + // Copy all of the result registers out of their specified physreg. + for (unsigned i = 0; i != RVLocs.size(); ++i) { + CCValAssign VA = RVLocs[i]; + + SDValue Val; + if (VA.needsCustom()) { + // Handle f64 as custom. + SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, + InFlag); + Chain = Lo.getValue(1); + InFlag = Lo.getValue(2); + VA = RVLocs[++i]; // skip ahead to next loc + SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, + InFlag); + Chain = Hi.getValue(1); + InFlag = Hi.getValue(2); + Val = DAG.getNode(ARMISD::FMDRR, dl, MVT::f64, Lo, Hi); + } else { + Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(), + InFlag); + Chain = Val.getValue(1); + InFlag = Val.getValue(2); + } + + switch (VA.getLocInfo()) { + default: assert(0 && "Unknown loc info!"); + case CCValAssign::Full: break; + case CCValAssign::BCvt: + Val = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), Val); + break; + } + + ResultVals.push_back(Val); + } + + // Merge everything together with a MERGE_VALUES node. + ResultVals.push_back(Chain); + return DAG.getNode(ISD::MERGE_VALUES, dl, TheCall->getVTList(), + &ResultVals[0], ResultVals.size()).getNode(); +} + +/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified +/// by "Src" to address "Dst" of size "Size". Alignment information is +/// specified by the specific parameter attribute. The copy will be passed as +/// a byval function parameter. +/// Sometimes what we are copying is the end of a larger object, the part that +/// does not fit in registers. +static SDValue +CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, + ISD::ArgFlagsTy Flags, SelectionDAG &DAG, + DebugLoc dl) { + SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32); + return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), + /*AlwaysInline=*/false, NULL, 0, NULL, 0); +} + +/// LowerMemOpCallTo - Store the argument to the stack. +SDValue +ARMTargetLowering::LowerMemOpCallTo(CallSDNode *TheCall, SelectionDAG &DAG, + const SDValue &StackPtr, + const CCValAssign &VA, SDValue Chain, + SDValue Arg, ISD::ArgFlagsTy Flags) { + DebugLoc dl = TheCall->getDebugLoc(); + unsigned LocMemOffset = VA.getLocMemOffset(); + SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); + PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); + if (Flags.isByVal()) { + return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl); + } + return DAG.getStore(Chain, dl, Arg, PtrOff, + PseudoSourceValue::getStack(), LocMemOffset); +} + +/// LowerCALL - Lowering a ISD::CALL node into a callseq_start <- +/// ARMISD:CALL <- callseq_end chain. Also add input and output parameter +/// nodes. +SDValue ARMTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) { + CallSDNode *TheCall = cast(Op.getNode()); + MVT RetVT = TheCall->getRetValType(0); + SDValue Chain = TheCall->getChain(); + unsigned CC = TheCall->getCallingConv(); + assert((CC == CallingConv::C || + CC == CallingConv::Fast) && "unknown calling convention"); + bool isVarArg = TheCall->isVarArg(); + SDValue Callee = TheCall->getCallee(); + DebugLoc dl = TheCall->getDebugLoc(); + + // Analyze operands of the call, assigning locations to each operand. + SmallVector ArgLocs; + CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); + CCInfo.AnalyzeCallOperands(TheCall, CC_ARM); + + // Get a count of how many bytes are to be pushed on the stack. + unsigned NumBytes = CCInfo.getNextStackOffset(); + + // Adjust the stack pointer for the new arguments... + // These operations are automatically eliminated by the prolog/epilog pass + Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); + + SDValue StackPtr = DAG.getRegister(ARM::SP, MVT::i32); + + SmallVector, 8> RegsToPass; + SmallVector MemOpChains; + + // Walk the register/memloc assignments, inserting copies/loads. In the case + // of tail call optimization, arguments are handled later. + for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); + i != e; + ++i, ++realArgIdx) { + CCValAssign &VA = ArgLocs[i]; + SDValue Arg = TheCall->getArg(realArgIdx); + ISD::ArgFlagsTy Flags = TheCall->getArgFlags(realArgIdx); + + // Promote the value if needed. + switch (VA.getLocInfo()) { + default: assert(0 && "Unknown loc info!"); + case CCValAssign::Full: break; + case CCValAssign::SExt: + Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); + break; + case CCValAssign::ZExt: + Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); + break; + case CCValAssign::AExt: + Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); + break; + case CCValAssign::BCvt: + Arg = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getLocVT(), Arg); + break; + } + + // f64 is passed in i32 pairs and must be combined + if (VA.needsCustom()) { + SDValue fmrrd = DAG.getNode(ARMISD::FMRRD, dl, + DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1); + RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd)); + VA = ArgLocs[++i]; // skip ahead to next loc + if (VA.isRegLoc()) + RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(1))); + else { + assert(VA.isMemLoc()); + if (StackPtr.getNode() == 0) + StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy()); + + MemOpChains.push_back(LowerMemOpCallTo(TheCall, DAG, StackPtr, VA, + Chain, fmrrd.getValue(1), + Flags)); + } + } else if (VA.isRegLoc()) { + RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); + } else { + assert(VA.isMemLoc()); + if (StackPtr.getNode() == 0) + StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy()); + + MemOpChains.push_back(LowerMemOpCallTo(TheCall, DAG, StackPtr, VA, + Chain, Arg, Flags)); + } + } + + if (!MemOpChains.empty()) + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &MemOpChains[0], MemOpChains.size()); + + // Build a sequence of copy-to-reg nodes chained together with token chain + // and flag operands which copy the outgoing args into the appropriate regs. + SDValue InFlag; + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { + Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, + RegsToPass[i].second, InFlag); + InFlag = Chain.getValue(1); + } + + // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every + // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol + // node so that legalize doesn't hack it. + bool isDirect = false; + bool isARMFunc = false; + bool isLocalARMFunc = false; + if (GlobalAddressSDNode *G = dyn_cast(Callee)) { + GlobalValue *GV = G->getGlobal(); + isDirect = true; + bool isExt = (GV->isDeclaration() || GV->hasWeakLinkage() || + GV->hasLinkOnceLinkage()); + bool isStub = (isExt && Subtarget->isTargetDarwin()) && + getTargetMachine().getRelocationModel() != Reloc::Static; + isARMFunc = !Subtarget->isThumb() || isStub; + // ARM call to a local ARM function is predicable. + isLocalARMFunc = !Subtarget->isThumb() && !isExt; + // tBX takes a register source operand. + if (isARMFunc && Subtarget->isThumb() && !Subtarget->hasV5TOps()) { + ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, ARMPCLabelIndex, + ARMCP::CPStub, 4); + SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); + CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); + Callee = DAG.getLoad(getPointerTy(), dl, + DAG.getEntryNode(), CPAddr, NULL, 0); + SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32); + Callee = DAG.getNode(ARMISD::PIC_ADD, dl, + getPointerTy(), Callee, PICLabel); + } else + Callee = DAG.getTargetGlobalAddress(GV, getPointerTy()); + } else if (ExternalSymbolSDNode *S = dyn_cast(Callee)) { + isDirect = true; + bool isStub = Subtarget->isTargetDarwin() && + getTargetMachine().getRelocationModel() != Reloc::Static; + isARMFunc = !Subtarget->isThumb() || isStub; + // tBX takes a register source operand. + const char *Sym = S->getSymbol(); + if (isARMFunc && Subtarget->isThumb() && !Subtarget->hasV5TOps()) { + ARMConstantPoolValue *CPV = new ARMConstantPoolValue(Sym, ARMPCLabelIndex, + ARMCP::CPStub, 4); + SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); + CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); + Callee = DAG.getLoad(getPointerTy(), dl, + DAG.getEntryNode(), CPAddr, NULL, 0); + SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32); + Callee = DAG.getNode(ARMISD::PIC_ADD, dl, + getPointerTy(), Callee, PICLabel); + } else + Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy()); + } + + // FIXME: handle tail calls differently. + unsigned CallOpc; + if (Subtarget->isThumb()) { + if (!Subtarget->hasV5TOps() && (!isDirect || isARMFunc)) + CallOpc = ARMISD::CALL_NOLINK; + else + CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL; + } else { + CallOpc = (isDirect || Subtarget->hasV5TOps()) + ? (isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL) + : ARMISD::CALL_NOLINK; + } + if (CallOpc == ARMISD::CALL_NOLINK && !Subtarget->isThumb()) { + // implicit def LR - LR mustn't be allocated as GRP:$dst of CALL_NOLINK + Chain = DAG.getCopyToReg(Chain, dl, ARM::LR, DAG.getUNDEF(MVT::i32),InFlag); + InFlag = Chain.getValue(1); + } + + std::vector Ops; + Ops.push_back(Chain); + Ops.push_back(Callee); + + // Add argument registers to the end of the list so that they are known live + // into the call. + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) + Ops.push_back(DAG.getRegister(RegsToPass[i].first, + RegsToPass[i].second.getValueType())); + + if (InFlag.getNode()) + Ops.push_back(InFlag); + // Returns a chain and a flag for retval copy to use. + Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Flag), + &Ops[0], Ops.size()); + InFlag = Chain.getValue(1); + + Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), + DAG.getIntPtrConstant(0, true), InFlag); + if (RetVT != MVT::Other) + InFlag = Chain.getValue(1); + + // Handle result values, copying them out of physregs into vregs that we + // return. + return SDValue(LowerCallResult(Chain, InFlag, TheCall, CC, DAG), + Op.getResNo()); +} + +SDValue ARMTargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG) { + // The chain is always operand #0 + SDValue Chain = Op.getOperand(0); + DebugLoc dl = Op.getDebugLoc(); + + // CCValAssign - represent the assignment of the return value to a location. + SmallVector RVLocs; + unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv(); + bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); + + // CCState - Info about the registers and stack slots. + CCState CCInfo(CC, isVarArg, getTargetMachine(), RVLocs); + + // Analyze return values of ISD::RET. + CCInfo.AnalyzeReturn(Op.getNode(), RetCC_ARM); + + // If this is the first return lowered for this function, add + // the regs to the liveout set for the function. + if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { + for (unsigned i = 0; i != RVLocs.size(); ++i) + if (RVLocs[i].isRegLoc()) + DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); + } + + SDValue Flag; + + // Copy the result values into the output registers. + for (unsigned i = 0, realRVLocIdx = 0; + i != RVLocs.size(); + ++i, ++realRVLocIdx) { + CCValAssign &VA = RVLocs[i]; + assert(VA.isRegLoc() && "Can only return in registers!"); + + // ISD::RET => ret chain, (regnum1,val1), ... + // So i*2+1 index only the regnums + SDValue Arg = Op.getOperand(realRVLocIdx*2+1); + + switch (VA.getLocInfo()) { + default: assert(0 && "Unknown loc info!"); + case CCValAssign::Full: break; + case CCValAssign::BCvt: + Arg = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getLocVT(), Arg); + break; + } + + // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is + // available. + if (VA.needsCustom()) { + SDValue fmrrd = DAG.getNode(ARMISD::FMRRD, dl, + DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1); + Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd, Flag); + Flag = Chain.getValue(1); + VA = RVLocs[++i]; // skip ahead to next loc + Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd.getValue(1), + Flag); + } else + Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag); + + // Guarantee that all emitted copies are + // stuck together, avoiding something bad. + Flag = Chain.getValue(1); + } + + SDValue result; + if (Flag.getNode()) + result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain, Flag); + else // Return Void + result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain); + + return result; +} + +// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as +// their target countpart wrapped in the ARMISD::Wrapper node. Suppose N is +// one of the above mentioned nodes. It has to be wrapped because otherwise +// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only +// be used to form addressing mode. These wrapped nodes will be selected +// into MOVi. +static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) { + MVT PtrVT = Op.getValueType(); + // FIXME there is no actual debug info here + DebugLoc dl = Op.getDebugLoc(); + ConstantPoolSDNode *CP = cast(Op); + SDValue Res; + if (CP->isMachineConstantPoolEntry()) + Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, + CP->getAlignment()); + else + Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, + CP->getAlignment()); + return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res); +} + +// Lower ISD::GlobalTLSAddress using the "general dynamic" model +SDValue +ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, + SelectionDAG &DAG) { + DebugLoc dl = GA->getDebugLoc(); + MVT PtrVT = getPointerTy(); + unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; + ARMConstantPoolValue *CPV = + new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex, ARMCP::CPValue, + PCAdj, "tlsgd", true); + SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4); + Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument); + Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument, NULL, 0); + SDValue Chain = Argument.getValue(1); + + SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32); + Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel); + + // call __tls_get_addr. + ArgListTy Args; + ArgListEntry Entry; + Entry.Node = Argument; + Entry.Ty = (const Type *) Type::Int32Ty; + Args.push_back(Entry); + // FIXME: is there useful debug info available here? + std::pair CallResult = + LowerCallTo(Chain, (const Type *) Type::Int32Ty, false, false, false, false, + CallingConv::C, false, + DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl); + return CallResult.first; +} + +// Lower ISD::GlobalTLSAddress using the "initial exec" or +// "local exec" model. +SDValue +ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, + SelectionDAG &DAG) { + GlobalValue *GV = GA->getGlobal(); + DebugLoc dl = GA->getDebugLoc(); + SDValue Offset; + SDValue Chain = DAG.getEntryNode(); + MVT PtrVT = getPointerTy(); + // Get the Thread Pointer + SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); + + if (GV->isDeclaration()){ + // initial exec model + unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; + ARMConstantPoolValue *CPV = + new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex, ARMCP::CPValue, + PCAdj, "gottpoff", true); + Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); + Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); + Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, NULL, 0); + Chain = Offset.getValue(1); + + SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32); + Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel); + + Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, NULL, 0); + } else { + // local exec model + ARMConstantPoolValue *CPV = + new ARMConstantPoolValue(GV, ARMCP::CPValue, "tpoff"); + Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); + Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); + Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, NULL, 0); + } + + // The address of the thread local variable is the add of the thread + // pointer with the offset of the variable. + return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset); +} + +SDValue +ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) { + // TODO: implement the "local dynamic" model + assert(Subtarget->isTargetELF() && + "TLS not implemented for non-ELF targets"); + GlobalAddressSDNode *GA = cast(Op); + // If the relocation model is PIC, use the "General Dynamic" TLS Model, + // otherwise use the "Local Exec" TLS Model + if (getTargetMachine().getRelocationModel() == Reloc::PIC_) + return LowerToTLSGeneralDynamicModel(GA, DAG); + else + return LowerToTLSExecModels(GA, DAG); +} + +SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, + SelectionDAG &DAG) { + MVT PtrVT = getPointerTy(); + DebugLoc dl = Op.getDebugLoc(); + GlobalValue *GV = cast(Op)->getGlobal(); + Reloc::Model RelocM = getTargetMachine().getRelocationModel(); + if (RelocM == Reloc::PIC_) { + bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility(); + ARMConstantPoolValue *CPV = + new ARMConstantPoolValue(GV, ARMCP::CPValue, UseGOTOFF ? "GOTOFF":"GOT"); + SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); + CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); + SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), + CPAddr, NULL, 0); + SDValue Chain = Result.getValue(1); + SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT); + Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT); + if (!UseGOTOFF) + Result = DAG.getLoad(PtrVT, dl, Chain, Result, NULL, 0); + return Result; + } else { + SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4); + CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); + return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, NULL, 0); + } +} + +/// GVIsIndirectSymbol - true if the GV will be accessed via an indirect symbol +/// even in non-static mode. +static bool GVIsIndirectSymbol(GlobalValue *GV, Reloc::Model RelocM) { + // If symbol visibility is hidden, the extra load is not needed if + // the symbol is definitely defined in the current translation unit. + bool isDecl = GV->isDeclaration() && !GV->hasNotBeenReadFromBitcode(); + if (GV->hasHiddenVisibility() && (!isDecl && !GV->hasCommonLinkage())) + return false; + return RelocM != Reloc::Static && (isDecl || GV->isWeakForLinker()); +} + +SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, + SelectionDAG &DAG) { + MVT PtrVT = getPointerTy(); + DebugLoc dl = Op.getDebugLoc(); + GlobalValue *GV = cast(Op)->getGlobal(); + Reloc::Model RelocM = getTargetMachine().getRelocationModel(); + bool IsIndirect = GVIsIndirectSymbol(GV, RelocM); + SDValue CPAddr; + if (RelocM == Reloc::Static) + CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4); + else { + unsigned PCAdj = (RelocM != Reloc::PIC_) + ? 0 : (Subtarget->isThumb() ? 4 : 8); + ARMCP::ARMCPKind Kind = IsIndirect ? ARMCP::CPNonLazyPtr + : ARMCP::CPValue; + ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, ARMPCLabelIndex, + Kind, PCAdj); + CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); + } + CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); + + SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, NULL, 0); + SDValue Chain = Result.getValue(1); + + if (RelocM == Reloc::PIC_) { + SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32); + Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); + } + if (IsIndirect) + Result = DAG.getLoad(PtrVT, dl, Chain, Result, NULL, 0); + + return Result; +} + +SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, + SelectionDAG &DAG){ + assert(Subtarget->isTargetELF() && + "GLOBAL OFFSET TABLE not implemented for non-ELF targets"); + MVT PtrVT = getPointerTy(); + DebugLoc dl = Op.getDebugLoc(); + unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; + ARMConstantPoolValue *CPV = new ARMConstantPoolValue("_GLOBAL_OFFSET_TABLE_", + ARMPCLabelIndex, + ARMCP::CPValue, PCAdj); + SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); + CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); + SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, NULL, 0); + SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32); + return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); +} + +SDValue +ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { + MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + unsigned IntNo = cast(Op.getOperand(0))->getZExtValue(); + DebugLoc dl = Op.getDebugLoc(); + switch (IntNo) { + default: return SDValue(); // Don't custom lower most intrinsics. + case Intrinsic::arm_thread_pointer: + return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); + case Intrinsic::eh_sjlj_setjmp: + SDValue Res = DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, MVT::i32, + Op.getOperand(1)); + return Res; + } +} + +static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG, + unsigned VarArgsFrameIndex) { + // vastart just stores the address of the VarArgsFrameIndex slot into the + // memory location argument. + DebugLoc dl = Op.getDebugLoc(); + MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + SDValue FR = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT); + const Value *SV = cast(Op.getOperand(2))->getValue(); + return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0); +} + +SDValue +ARMTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) { + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + + SDValue Root = Op.getOperand(0); + DebugLoc dl = Op.getDebugLoc(); + bool isVarArg = cast(Op.getOperand(2))->getZExtValue() != 0; + unsigned CC = MF.getFunction()->getCallingConv(); + ARMFunctionInfo *AFI = MF.getInfo(); + + // Assign locations to all of the incoming arguments. + SmallVector ArgLocs; + CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); + CCInfo.AnalyzeFormalArguments(Op.getNode(), CC_ARM); + + SmallVector ArgValues; + + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + + // Arguments stored in registers. + if (VA.isRegLoc()) { + MVT RegVT = VA.getLocVT(); + TargetRegisterClass *RC; + if (AFI->isThumbFunction()) + RC = ARM::tGPRRegisterClass; + else + RC = ARM::GPRRegisterClass; + + if (RegVT == MVT::f64) { + // f64 is passed in pairs of GPRs and must be combined. + RegVT = MVT::i32; + } else if (!((RegVT == MVT::i32) || (RegVT == MVT::f32))) + assert(0 && "RegVT not supported by FORMAL_ARGUMENTS Lowering"); + + // Transform the arguments stored in physical registers into virtual ones. + unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); + SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, RegVT); + + // f64 is passed in i32 pairs and must be combined. + if (VA.needsCustom()) { + SDValue ArgValue2; + + VA = ArgLocs[++i]; // skip ahead to next loc + if (VA.isMemLoc()) { + // must be APCS to split like this + unsigned ArgSize = VA.getLocVT().getSizeInBits()/8; + int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset()); + + // Create load node to retrieve arguments from the stack. + SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); + ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN, NULL, 0); + } else { + Reg = MF.addLiveIn(VA.getLocReg(), RC); + ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); + } + + ArgValue = DAG.getNode(ARMISD::FMDRR, dl, MVT::f64, + ArgValue, ArgValue2); + } + + // If this is an 8 or 16-bit value, it is really passed promoted + // to 32 bits. Insert an assert[sz]ext to capture this, then + // truncate to the right size. + switch (VA.getLocInfo()) { + default: assert(0 && "Unknown loc info!"); + case CCValAssign::Full: break; + case CCValAssign::BCvt: + ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), ArgValue); + break; + case CCValAssign::SExt: + ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue, + DAG.getValueType(VA.getValVT())); + ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); + break; + case CCValAssign::ZExt: + ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue, + DAG.getValueType(VA.getValVT())); + ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); + break; + } + + ArgValues.push_back(ArgValue); + + } else { // VA.isRegLoc() + + // sanity check + assert(VA.isMemLoc()); + assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered"); + + unsigned ArgSize = VA.getLocVT().getSizeInBits()/8; + int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset()); + + // Create load nodes to retrieve arguments from the stack. + SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); + ArgValues.push_back(DAG.getLoad(VA.getValVT(), dl, Root, FIN, NULL, 0)); + } + } + + // varargs + if (isVarArg) { + static const unsigned GPRArgRegs[] = { + ARM::R0, ARM::R1, ARM::R2, ARM::R3 + }; + + unsigned NumGPRs = CCInfo.getFirstUnallocated + (GPRArgRegs, sizeof(GPRArgRegs) / sizeof(GPRArgRegs[0])); + + unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment(); + unsigned VARegSize = (4 - NumGPRs) * 4; + unsigned VARegSaveSize = (VARegSize + Align - 1) & ~(Align - 1); + unsigned ArgOffset = 0; + if (VARegSaveSize) { + // If this function is vararg, store any remaining integer argument regs + // to their spots on the stack so that they may be loaded by deferencing + // the result of va_next. + AFI->setVarArgsRegSaveSize(VARegSaveSize); + ArgOffset = CCInfo.getNextStackOffset(); + VarArgsFrameIndex = MFI->CreateFixedObject(VARegSaveSize, ArgOffset + + VARegSaveSize - VARegSize); + SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); + + SmallVector MemOps; + for (; NumGPRs < 4; ++NumGPRs) { + TargetRegisterClass *RC; + if (AFI->isThumbFunction()) + RC = ARM::tGPRRegisterClass; + else + RC = ARM::GPRRegisterClass; + + unsigned VReg = MF.addLiveIn(GPRArgRegs[NumGPRs], RC); + SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, MVT::i32); + SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0); + MemOps.push_back(Store); + FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN, + DAG.getConstant(4, getPointerTy())); + } + if (!MemOps.empty()) + Root = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &MemOps[0], MemOps.size()); + } else + // This will point to the next argument passed via stack. + VarArgsFrameIndex = MFI->CreateFixedObject(4, ArgOffset); + } + + ArgValues.push_back(Root); + + // Return the new list of results. + return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getNode()->getVTList(), + &ArgValues[0], ArgValues.size()).getValue(Op.getResNo()); +} + +/// isFloatingPointZero - Return true if this is +0.0. +static bool isFloatingPointZero(SDValue Op) { + if (ConstantFPSDNode *CFP = dyn_cast(Op)) + return CFP->getValueAPF().isPosZero(); + else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) { + // Maybe this has already been legalized into the constant pool? + if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) { + SDValue WrapperOp = Op.getOperand(1).getOperand(0); + if (ConstantPoolSDNode *CP = dyn_cast(WrapperOp)) + if (ConstantFP *CFP = dyn_cast(CP->getConstVal())) + return CFP->getValueAPF().isPosZero(); + } + } + return false; +} + +static bool isLegalCmpImmediate(unsigned C, bool isThumb) { + return ( isThumb && (C & ~255U) == 0) || + (!isThumb && ARM_AM::getSOImmVal(C) != -1); +} + +/// Returns appropriate ARM CMP (cmp) and corresponding condition code for +/// the given operands. +static SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, + SDValue &ARMCC, SelectionDAG &DAG, bool isThumb, + DebugLoc dl) { + if (ConstantSDNode *RHSC = dyn_cast(RHS.getNode())) { + unsigned C = RHSC->getZExtValue(); + if (!isLegalCmpImmediate(C, isThumb)) { + // Constant does not fit, try adjusting it by one? + switch (CC) { + default: break; + case ISD::SETLT: + case ISD::SETGE: + if (isLegalCmpImmediate(C-1, isThumb)) { + CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT; + RHS = DAG.getConstant(C-1, MVT::i32); + } + break; + case ISD::SETULT: + case ISD::SETUGE: + if (C > 0 && isLegalCmpImmediate(C-1, isThumb)) { + CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT; + RHS = DAG.getConstant(C-1, MVT::i32); + } + break; + case ISD::SETLE: + case ISD::SETGT: + if (isLegalCmpImmediate(C+1, isThumb)) { + CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE; + RHS = DAG.getConstant(C+1, MVT::i32); + } + break; + case ISD::SETULE: + case ISD::SETUGT: + if (C < 0xffffffff && isLegalCmpImmediate(C+1, isThumb)) { + CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE; + RHS = DAG.getConstant(C+1, MVT::i32); + } + break; + } + } + } + + ARMCC::CondCodes CondCode = IntCCToARMCC(CC); + ARMISD::NodeType CompareType; + switch (CondCode) { + default: + CompareType = ARMISD::CMP; + break; + case ARMCC::EQ: + case ARMCC::NE: + case ARMCC::MI: + case ARMCC::PL: + // Uses only N and Z Flags + CompareType = ARMISD::CMPNZ; + break; + } + ARMCC = DAG.getConstant(CondCode, MVT::i32); + return DAG.getNode(CompareType, dl, MVT::Flag, LHS, RHS); +} + +/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands. +static SDValue getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG, + DebugLoc dl) { + SDValue Cmp; + if (!isFloatingPointZero(RHS)) + Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Flag, LHS, RHS); + else + Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Flag, LHS); + return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Flag, Cmp); +} + +static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG, + const ARMSubtarget *ST) { + MVT VT = Op.getValueType(); + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + ISD::CondCode CC = cast(Op.getOperand(4))->get(); + SDValue TrueVal = Op.getOperand(2); + SDValue FalseVal = Op.getOperand(3); + DebugLoc dl = Op.getDebugLoc(); + + if (LHS.getValueType() == MVT::i32) { + SDValue ARMCC; + SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); + SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMCC, DAG, ST->isThumb(), dl); + return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMCC, CCR,Cmp); + } + + ARMCC::CondCodes CondCode, CondCode2; + if (FPCCToARMCC(CC, CondCode, CondCode2)) + std::swap(TrueVal, FalseVal); + + SDValue ARMCC = DAG.getConstant(CondCode, MVT::i32); + SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); + SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); + SDValue Result = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, + ARMCC, CCR, Cmp); + if (CondCode2 != ARMCC::AL) { + SDValue ARMCC2 = DAG.getConstant(CondCode2, MVT::i32); + // FIXME: Needs another CMP because flag can have but one use. + SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl); + Result = DAG.getNode(ARMISD::CMOV, dl, VT, + Result, TrueVal, ARMCC2, CCR, Cmp2); + } + return Result; +} + +static SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG, + const ARMSubtarget *ST) { + SDValue Chain = Op.getOperand(0); + ISD::CondCode CC = cast(Op.getOperand(1))->get(); + SDValue LHS = Op.getOperand(2); + SDValue RHS = Op.getOperand(3); + SDValue Dest = Op.getOperand(4); + DebugLoc dl = Op.getDebugLoc(); + + if (LHS.getValueType() == MVT::i32) { + SDValue ARMCC; + SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); + SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMCC, DAG, ST->isThumb(), dl); + return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, + Chain, Dest, ARMCC, CCR,Cmp); + } + + assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64); + ARMCC::CondCodes CondCode, CondCode2; + if (FPCCToARMCC(CC, CondCode, CondCode2)) + // Swap the LHS/RHS of the comparison if needed. + std::swap(LHS, RHS); + + SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); + SDValue ARMCC = DAG.getConstant(CondCode, MVT::i32); + SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); + SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Flag); + SDValue Ops[] = { Chain, Dest, ARMCC, CCR, Cmp }; + SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5); + if (CondCode2 != ARMCC::AL) { + ARMCC = DAG.getConstant(CondCode2, MVT::i32); + SDValue Ops[] = { Res, Dest, ARMCC, CCR, Res.getValue(1) }; + Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5); + } + return Res; +} + +SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) { + SDValue Chain = Op.getOperand(0); + SDValue Table = Op.getOperand(1); + SDValue Index = Op.getOperand(2); + DebugLoc dl = Op.getDebugLoc(); + + MVT PTy = getPointerTy(); + JumpTableSDNode *JT = cast(Table); + ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo(); + SDValue UId = DAG.getConstant(AFI->createJumpTableUId(), PTy); + SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy); + Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI, UId); + Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, PTy)); + SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table); + bool isPIC = getTargetMachine().getRelocationModel() == Reloc::PIC_; + Addr = DAG.getLoad(isPIC ? (MVT)MVT::i32 : PTy, dl, + Chain, Addr, NULL, 0); + Chain = Addr.getValue(1); + if (isPIC) + Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table); + return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId); +} + +static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) { + DebugLoc dl = Op.getDebugLoc(); + unsigned Opc = + Op.getOpcode() == ISD::FP_TO_SINT ? ARMISD::FTOSI : ARMISD::FTOUI; + Op = DAG.getNode(Opc, dl, MVT::f32, Op.getOperand(0)); + return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op); +} + +static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) { + MVT VT = Op.getValueType(); + DebugLoc dl = Op.getDebugLoc(); + unsigned Opc = + Op.getOpcode() == ISD::SINT_TO_FP ? ARMISD::SITOF : ARMISD::UITOF; + + Op = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, Op.getOperand(0)); + return DAG.getNode(Opc, dl, VT, Op); +} + +static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) { + // Implement fcopysign with a fabs and a conditional fneg. + SDValue Tmp0 = Op.getOperand(0); + SDValue Tmp1 = Op.getOperand(1); + DebugLoc dl = Op.getDebugLoc(); + MVT VT = Op.getValueType(); + MVT SrcVT = Tmp1.getValueType(); + SDValue AbsVal = DAG.getNode(ISD::FABS, dl, VT, Tmp0); + SDValue Cmp = getVFPCmp(Tmp1, DAG.getConstantFP(0.0, SrcVT), DAG, dl); + SDValue ARMCC = DAG.getConstant(ARMCC::LT, MVT::i32); + SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); + return DAG.getNode(ARMISD::CNEG, dl, VT, AbsVal, AbsVal, ARMCC, CCR, Cmp); +} + +SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) { + MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + MFI->setFrameAddressIsTaken(true); + MVT VT = Op.getValueType(); + DebugLoc dl = Op.getDebugLoc(); // FIXME probably not meaningful + unsigned Depth = cast(Op.getOperand(0))->getZExtValue(); + unsigned FrameReg = (Subtarget->isThumb() || Subtarget->useThumbBacktraces()) + ? ARM::R7 : ARM::R11; + SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT); + while (Depth--) + FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0); + return FrameAddr; +} + +SDValue +ARMTargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, + SDValue Chain, + SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, + bool AlwaysInline, + const Value *DstSV, uint64_t DstSVOff, + const Value *SrcSV, uint64_t SrcSVOff){ + // Do repeated 4-byte loads and stores. To be improved. + // This requires 4-byte alignment. + if ((Align & 3) != 0) + return SDValue(); + // This requires the copy size to be a constant, preferrably + // within a subtarget-specific limit. + ConstantSDNode *ConstantSize = dyn_cast(Size); + if (!ConstantSize) + return SDValue(); + uint64_t SizeVal = ConstantSize->getZExtValue(); + if (!AlwaysInline && SizeVal > getSubtarget()->getMaxInlineSizeThreshold()) + return SDValue(); + + unsigned BytesLeft = SizeVal & 3; + unsigned NumMemOps = SizeVal >> 2; + unsigned EmittedNumMemOps = 0; + MVT VT = MVT::i32; + unsigned VTSize = 4; + unsigned i = 0; + const unsigned MAX_LOADS_IN_LDM = 6; + SDValue TFOps[MAX_LOADS_IN_LDM]; + SDValue Loads[MAX_LOADS_IN_LDM]; + uint64_t SrcOff = 0, DstOff = 0; + + // Emit up to MAX_LOADS_IN_LDM loads, then a TokenFactor barrier, then the + // same number of stores. The loads and stores will get combined into + // ldm/stm later on. + while (EmittedNumMemOps < NumMemOps) { + for (i = 0; + i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) { + Loads[i] = DAG.getLoad(VT, dl, Chain, + DAG.getNode(ISD::ADD, dl, MVT::i32, Src, + DAG.getConstant(SrcOff, MVT::i32)), + SrcSV, SrcSVOff + SrcOff); + TFOps[i] = Loads[i].getValue(1); + SrcOff += VTSize; + } + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); + + for (i = 0; + i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) { + TFOps[i] = DAG.getStore(Chain, dl, Loads[i], + DAG.getNode(ISD::ADD, dl, MVT::i32, Dst, + DAG.getConstant(DstOff, MVT::i32)), + DstSV, DstSVOff + DstOff); + DstOff += VTSize; + } + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); + + EmittedNumMemOps += i; + } + + if (BytesLeft == 0) + return Chain; + + // Issue loads / stores for the trailing (1 - 3) bytes. + unsigned BytesLeftSave = BytesLeft; + i = 0; + while (BytesLeft) { + if (BytesLeft >= 2) { + VT = MVT::i16; + VTSize = 2; + } else { + VT = MVT::i8; + VTSize = 1; + } + + Loads[i] = DAG.getLoad(VT, dl, Chain, + DAG.getNode(ISD::ADD, dl, MVT::i32, Src, + DAG.getConstant(SrcOff, MVT::i32)), + SrcSV, SrcSVOff + SrcOff); + TFOps[i] = Loads[i].getValue(1); + ++i; + SrcOff += VTSize; + BytesLeft -= VTSize; + } + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); + + i = 0; + BytesLeft = BytesLeftSave; + while (BytesLeft) { + if (BytesLeft >= 2) { + VT = MVT::i16; + VTSize = 2; + } else { + VT = MVT::i8; + VTSize = 1; + } + + TFOps[i] = DAG.getStore(Chain, dl, Loads[i], + DAG.getNode(ISD::ADD, dl, MVT::i32, Dst, + DAG.getConstant(DstOff, MVT::i32)), + DstSV, DstSVOff + DstOff); + ++i; + DstOff += VTSize; + BytesLeft -= VTSize; + } + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); +} + +static SDValue ExpandBIT_CONVERT(SDNode *N, SelectionDAG &DAG) { + SDValue Op = N->getOperand(0); + DebugLoc dl = N->getDebugLoc(); + if (N->getValueType(0) == MVT::f64) { + // Turn i64->f64 into FMDRR. + SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op, + DAG.getConstant(0, MVT::i32)); + SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op, + DAG.getConstant(1, MVT::i32)); + return DAG.getNode(ARMISD::FMDRR, dl, MVT::f64, Lo, Hi); + } + + // Turn f64->i64 into FMRRD. + SDValue Cvt = DAG.getNode(ARMISD::FMRRD, dl, + DAG.getVTList(MVT::i32, MVT::i32), &Op, 1); + + // Merge the pieces into a single i64 value. + return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1)); +} + +static SDValue ExpandSRx(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST) { + assert(N->getValueType(0) == MVT::i64 && + (N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) && + "Unknown shift to lower!"); + + // We only lower SRA, SRL of 1 here, all others use generic lowering. + if (!isa(N->getOperand(1)) || + cast(N->getOperand(1))->getZExtValue() != 1) + return SDValue(); + + // If we are in thumb mode, we don't have RRX. + if (ST->isThumb()) return SDValue(); + + // Okay, we have a 64-bit SRA or SRL of 1. Lower this to an RRX expr. + DebugLoc dl = N->getDebugLoc(); + SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0), + DAG.getConstant(0, MVT::i32)); + SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0), + DAG.getConstant(1, MVT::i32)); + + // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and + // captures the result into a carry flag. + unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG; + Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Flag), &Hi, 1); + + // The low part is an ARMISD::RRX operand, which shifts the carry in. + Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1)); + + // Merge the pieces into a single i64 value. + return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi); +} + +SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { + switch (Op.getOpcode()) { + default: assert(0 && "Don't know how to custom lower this!"); abort(); + case ISD::ConstantPool: return LowerConstantPool(Op, DAG); + case ISD::GlobalAddress: + return Subtarget->isTargetDarwin() ? LowerGlobalAddressDarwin(Op, DAG) : + LowerGlobalAddressELF(Op, DAG); + case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); + case ISD::CALL: return LowerCALL(Op, DAG); + case ISD::RET: return LowerRET(Op, DAG); + case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG, Subtarget); + case ISD::BR_CC: return LowerBR_CC(Op, DAG, Subtarget); + case ISD::BR_JT: return LowerBR_JT(Op, DAG); + case ISD::VASTART: return LowerVASTART(Op, DAG, VarArgsFrameIndex); + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG); + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG); + case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); + case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG); + case ISD::RETURNADDR: break; + case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); + case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG); + case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); + case ISD::BIT_CONVERT: return ExpandBIT_CONVERT(Op.getNode(), DAG); + case ISD::SRL: + case ISD::SRA: return ExpandSRx(Op.getNode(), DAG,Subtarget); + } + return SDValue(); +} + +/// ReplaceNodeResults - Replace the results of node with an illegal result +/// type with new values built out of custom code. +void ARMTargetLowering::ReplaceNodeResults(SDNode *N, + SmallVectorImpl&Results, + SelectionDAG &DAG) { + switch (N->getOpcode()) { + default: + assert(0 && "Don't know how to custom expand this!"); + return; + case ISD::BIT_CONVERT: + Results.push_back(ExpandBIT_CONVERT(N, DAG)); + return; + case ISD::SRL: + case ISD::SRA: { + SDValue Res = ExpandSRx(N, DAG, Subtarget); + if (Res.getNode()) + Results.push_back(Res); + return; + } + } +} + +//===----------------------------------------------------------------------===// +// ARM Scheduler Hooks +//===----------------------------------------------------------------------===// + +MachineBasicBlock * +ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *BB) const { + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + DebugLoc dl = MI->getDebugLoc(); + switch (MI->getOpcode()) { + default: assert(false && "Unexpected instr type to insert"); + case ARM::tMOVCCr: { + // To "insert" a SELECT_CC instruction, we actually have to insert the + // diamond control-flow pattern. The incoming instruction knows the + // destination vreg to set, the condition code register to branch on, the + // true/false values to select between, and a branch opcode to use. + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction::iterator It = BB; + ++It; + + // thisMBB: + // ... + // TrueVal = ... + // cmpTY ccX, r1, r2 + // bCC copy1MBB + // fallthrough --> copy0MBB + MachineBasicBlock *thisMBB = BB; + MachineFunction *F = BB->getParent(); + MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); + BuildMI(BB, dl, TII->get(ARM::tBcc)).addMBB(sinkMBB) + .addImm(MI->getOperand(3).getImm()).addReg(MI->getOperand(4).getReg()); + F->insert(It, copy0MBB); + F->insert(It, sinkMBB); + // Update machine-CFG edges by first adding all successors of the current + // block to the new block which will contain the Phi node for the select. + for(MachineBasicBlock::succ_iterator i = BB->succ_begin(), + e = BB->succ_end(); i != e; ++i) + sinkMBB->addSuccessor(*i); + // Next, remove all successors of the current block, and add the true + // and fallthrough blocks as its successors. + while(!BB->succ_empty()) + BB->removeSuccessor(BB->succ_begin()); + BB->addSuccessor(copy0MBB); + BB->addSuccessor(sinkMBB); + + // copy0MBB: + // %FalseValue = ... + // # fallthrough to sinkMBB + BB = copy0MBB; + + // Update machine-CFG edges + BB->addSuccessor(sinkMBB); + + // sinkMBB: + // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] + // ... + BB = sinkMBB; + BuildMI(BB, dl, TII->get(ARM::PHI), MI->getOperand(0).getReg()) + .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB) + .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); + + F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. + return BB; + } + } +} + +//===----------------------------------------------------------------------===// +// ARM Optimization Hooks +//===----------------------------------------------------------------------===// + +static +SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, + TargetLowering::DAGCombinerInfo &DCI) { + SelectionDAG &DAG = DCI.DAG; + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + MVT VT = N->getValueType(0); + unsigned Opc = N->getOpcode(); + bool isSlctCC = Slct.getOpcode() == ISD::SELECT_CC; + SDValue LHS = isSlctCC ? Slct.getOperand(2) : Slct.getOperand(1); + SDValue RHS = isSlctCC ? Slct.getOperand(3) : Slct.getOperand(2); + ISD::CondCode CC = ISD::SETCC_INVALID; + + if (isSlctCC) { + CC = cast(Slct.getOperand(4))->get(); + } else { + SDValue CCOp = Slct.getOperand(0); + if (CCOp.getOpcode() == ISD::SETCC) + CC = cast(CCOp.getOperand(2))->get(); + } + + bool DoXform = false; + bool InvCC = false; + assert ((Opc == ISD::ADD || (Opc == ISD::SUB && Slct == N->getOperand(1))) && + "Bad input!"); + + if (LHS.getOpcode() == ISD::Constant && + cast(LHS)->isNullValue()) { + DoXform = true; + } else if (CC != ISD::SETCC_INVALID && + RHS.getOpcode() == ISD::Constant && + cast(RHS)->isNullValue()) { + std::swap(LHS, RHS); + SDValue Op0 = Slct.getOperand(0); + MVT OpVT = isSlctCC ? Op0.getValueType() : + Op0.getOperand(0).getValueType(); + bool isInt = OpVT.isInteger(); + CC = ISD::getSetCCInverse(CC, isInt); + + if (!TLI.isCondCodeLegal(CC, OpVT)) + return SDValue(); // Inverse operator isn't legal. + + DoXform = true; + InvCC = true; + } + + if (DoXform) { + SDValue Result = DAG.getNode(Opc, RHS.getDebugLoc(), VT, OtherOp, RHS); + if (isSlctCC) + return DAG.getSelectCC(N->getDebugLoc(), OtherOp, Result, + Slct.getOperand(0), Slct.getOperand(1), CC); + SDValue CCOp = Slct.getOperand(0); + if (InvCC) + CCOp = DAG.getSetCC(Slct.getDebugLoc(), CCOp.getValueType(), + CCOp.getOperand(0), CCOp.getOperand(1), CC); + return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT, + CCOp, OtherOp, Result); + } + return SDValue(); +} + +/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD. +static SDValue PerformADDCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI) { + // added by evan in r37685 with no testcase. + SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); + + // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c)) + if (N0.getOpcode() == ISD::SELECT && N0.getNode()->hasOneUse()) { + SDValue Result = combineSelectAndUse(N, N0, N1, DCI); + if (Result.getNode()) return Result; + } + if (N1.getOpcode() == ISD::SELECT && N1.getNode()->hasOneUse()) { + SDValue Result = combineSelectAndUse(N, N1, N0, DCI); + if (Result.getNode()) return Result; + } + + return SDValue(); +} + +/// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB. +static SDValue PerformSUBCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI) { + // added by evan in r37685 with no testcase. + SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); + + // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c)) + if (N1.getOpcode() == ISD::SELECT && N1.getNode()->hasOneUse()) { + SDValue Result = combineSelectAndUse(N, N1, N0, DCI); + if (Result.getNode()) return Result; + } + + return SDValue(); +} + + +/// PerformFMRRDCombine - Target-specific dag combine xforms for ARMISD::FMRRD. +static SDValue PerformFMRRDCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI) { + // fmrrd(fmdrr x, y) -> x,y + SDValue InDouble = N->getOperand(0); + if (InDouble.getOpcode() == ARMISD::FMDRR) + return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1)); + return SDValue(); +} + +SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, + DAGCombinerInfo &DCI) const { + switch (N->getOpcode()) { + default: break; + case ISD::ADD: return PerformADDCombine(N, DCI); + case ISD::SUB: return PerformSUBCombine(N, DCI); + case ARMISD::FMRRD: return PerformFMRRDCombine(N, DCI); + } + + return SDValue(); +} + +/// isLegalAddressImmediate - Return true if the integer value can be used +/// as the offset of the target addressing mode for load / store of the +/// given type. +static bool isLegalAddressImmediate(int64_t V, MVT VT, + const ARMSubtarget *Subtarget) { + if (V == 0) + return true; + + if (!VT.isSimple()) + return false; + + if (Subtarget->isThumb()) { + if (V < 0) + return false; + + unsigned Scale = 1; + switch (VT.getSimpleVT()) { + default: return false; + case MVT::i1: + case MVT::i8: + // Scale == 1; + break; + case MVT::i16: + // Scale == 2; + Scale = 2; + break; + case MVT::i32: + // Scale == 4; + Scale = 4; + break; + } + + if ((V & (Scale - 1)) != 0) + return false; + V /= Scale; + return V == (V & ((1LL << 5) - 1)); + } + + if (V < 0) + V = - V; + switch (VT.getSimpleVT()) { + default: return false; + case MVT::i1: + case MVT::i8: + case MVT::i32: + // +- imm12 + return V == (V & ((1LL << 12) - 1)); + case MVT::i16: + // +- imm8 + return V == (V & ((1LL << 8) - 1)); + case MVT::f32: + case MVT::f64: + if (!Subtarget->hasVFP2()) + return false; + if ((V & 3) != 0) + return false; + V >>= 2; + return V == (V & ((1LL << 8) - 1)); + } +} + +/// isLegalAddressingMode - Return true if the addressing mode represented +/// by AM is legal for this target, for a load/store of the specified type. +bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM, + const Type *Ty) const { + MVT VT = getValueType(Ty, true); + if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget)) + return false; + + // Can never fold addr of global into load/store. + if (AM.BaseGV) + return false; + + switch (AM.Scale) { + case 0: // no scale reg, must be "r+i" or "r", or "i". + break; + case 1: + if (Subtarget->isThumb()) + return false; + // FALL THROUGH. + default: + // ARM doesn't support any R+R*scale+imm addr modes. + if (AM.BaseOffs) + return false; + + if (!VT.isSimple()) + return false; + + int Scale = AM.Scale; + switch (VT.getSimpleVT()) { + default: return false; + case MVT::i1: + case MVT::i8: + case MVT::i32: + case MVT::i64: + // This assumes i64 is legalized to a pair of i32. If not (i.e. + // ldrd / strd are used, then its address mode is same as i16. + // r + r + if (Scale < 0) Scale = -Scale; + if (Scale == 1) + return true; + // r + r << imm + return isPowerOf2_32(Scale & ~1); + case MVT::i16: + // r + r + if (((unsigned)AM.HasBaseReg + Scale) <= 2) + return true; + return false; + + case MVT::isVoid: + // Note, we allow "void" uses (basically, uses that aren't loads or + // stores), because arm allows folding a scale into many arithmetic + // operations. This should be made more precise and revisited later. + + // Allow r << imm, but the imm has to be a multiple of two. + if (AM.Scale & 1) return false; + return isPowerOf2_32(AM.Scale); + } + break; + } + return true; +} + +static bool getIndexedAddressParts(SDNode *Ptr, MVT VT, + bool isSEXTLoad, SDValue &Base, + SDValue &Offset, bool &isInc, + SelectionDAG &DAG) { + if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB) + return false; + + if (VT == MVT::i16 || ((VT == MVT::i8 || VT == MVT::i1) && isSEXTLoad)) { + // AddressingMode 3 + Base = Ptr->getOperand(0); + if (ConstantSDNode *RHS = dyn_cast(Ptr->getOperand(1))) { + int RHSC = (int)RHS->getZExtValue(); + if (RHSC < 0 && RHSC > -256) { + isInc = false; + Offset = DAG.getConstant(-RHSC, RHS->getValueType(0)); + return true; + } + } + isInc = (Ptr->getOpcode() == ISD::ADD); + Offset = Ptr->getOperand(1); + return true; + } else if (VT == MVT::i32 || VT == MVT::i8 || VT == MVT::i1) { + // AddressingMode 2 + if (ConstantSDNode *RHS = dyn_cast(Ptr->getOperand(1))) { + int RHSC = (int)RHS->getZExtValue(); + if (RHSC < 0 && RHSC > -0x1000) { + isInc = false; + Offset = DAG.getConstant(-RHSC, RHS->getValueType(0)); + Base = Ptr->getOperand(0); + return true; + } + } + + if (Ptr->getOpcode() == ISD::ADD) { + isInc = true; + ARM_AM::ShiftOpc ShOpcVal= ARM_AM::getShiftOpcForNode(Ptr->getOperand(0)); + if (ShOpcVal != ARM_AM::no_shift) { + Base = Ptr->getOperand(1); + Offset = Ptr->getOperand(0); + } else { + Base = Ptr->getOperand(0); + Offset = Ptr->getOperand(1); + } + return true; + } + + isInc = (Ptr->getOpcode() == ISD::ADD); + Base = Ptr->getOperand(0); + Offset = Ptr->getOperand(1); + return true; + } + + // FIXME: Use FLDM / FSTM to emulate indexed FP load / store. + return false; +} + +/// getPreIndexedAddressParts - returns true by value, base pointer and +/// offset pointer and addressing mode by reference if the node's address +/// can be legally represented as pre-indexed load / store address. +bool +ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, + SDValue &Offset, + ISD::MemIndexedMode &AM, + SelectionDAG &DAG) const { + if (Subtarget->isThumb()) + return false; + + MVT VT; + SDValue Ptr; + bool isSEXTLoad = false; + if (LoadSDNode *LD = dyn_cast(N)) { + Ptr = LD->getBasePtr(); + VT = LD->getMemoryVT(); + isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD; + } else if (StoreSDNode *ST = dyn_cast(N)) { + Ptr = ST->getBasePtr(); + VT = ST->getMemoryVT(); + } else + return false; + + bool isInc; + bool isLegal = getIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base, Offset, + isInc, DAG); + if (isLegal) { + AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC; + return true; + } + return false; +} + +/// getPostIndexedAddressParts - returns true by value, base pointer and +/// offset pointer and addressing mode by reference if this node can be +/// combined with a load / store to form a post-indexed load / store. +bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, + SDValue &Base, + SDValue &Offset, + ISD::MemIndexedMode &AM, + SelectionDAG &DAG) const { + if (Subtarget->isThumb()) + return false; + + MVT VT; + SDValue Ptr; + bool isSEXTLoad = false; + if (LoadSDNode *LD = dyn_cast(N)) { + VT = LD->getMemoryVT(); + isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD; + } else if (StoreSDNode *ST = dyn_cast(N)) { + VT = ST->getMemoryVT(); + } else + return false; + + bool isInc; + bool isLegal = getIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset, + isInc, DAG); + if (isLegal) { + AM = isInc ? ISD::POST_INC : ISD::POST_DEC; + return true; + } + return false; +} + +void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, + const APInt &Mask, + APInt &KnownZero, + APInt &KnownOne, + const SelectionDAG &DAG, + unsigned Depth) const { + KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); + switch (Op.getOpcode()) { + default: break; + case ARMISD::CMOV: { + // Bits are known zero/one if known on the LHS and RHS. + DAG.ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1); + if (KnownZero == 0 && KnownOne == 0) return; + + APInt KnownZeroRHS, KnownOneRHS; + DAG.ComputeMaskedBits(Op.getOperand(1), Mask, + KnownZeroRHS, KnownOneRHS, Depth+1); + KnownZero &= KnownZeroRHS; + KnownOne &= KnownOneRHS; + return; + } + } +} + +//===----------------------------------------------------------------------===// +// ARM Inline Assembly Support +//===----------------------------------------------------------------------===// + +/// getConstraintType - Given a constraint letter, return the type of +/// constraint it is for this target. +ARMTargetLowering::ConstraintType +ARMTargetLowering::getConstraintType(const std::string &Constraint) const { + if (Constraint.size() == 1) { + switch (Constraint[0]) { + default: break; + case 'l': return C_RegisterClass; + case 'w': return C_RegisterClass; + } + } + return TargetLowering::getConstraintType(Constraint); +} + +std::pair +ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, + MVT VT) const { + if (Constraint.size() == 1) { + // GCC RS6000 Constraint Letters + switch (Constraint[0]) { + case 'l': + if (Subtarget->isThumb()) + return std::make_pair(0U, ARM::tGPRRegisterClass); + else + return std::make_pair(0U, ARM::GPRRegisterClass); + case 'r': + return std::make_pair(0U, ARM::GPRRegisterClass); + case 'w': + if (VT == MVT::f32) + return std::make_pair(0U, ARM::SPRRegisterClass); + if (VT == MVT::f64) + return std::make_pair(0U, ARM::DPRRegisterClass); + break; + } + } + return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); +} + +std::vector ARMTargetLowering:: +getRegClassForInlineAsmConstraint(const std::string &Constraint, + MVT VT) const { + if (Constraint.size() != 1) + return std::vector(); + + switch (Constraint[0]) { // GCC ARM Constraint Letters + default: break; + case 'l': + return make_vector(ARM::R0, ARM::R1, ARM::R2, ARM::R3, + ARM::R4, ARM::R5, ARM::R6, ARM::R7, + 0); + case 'r': + return make_vector(ARM::R0, ARM::R1, ARM::R2, ARM::R3, + ARM::R4, ARM::R5, ARM::R6, ARM::R7, + ARM::R8, ARM::R9, ARM::R10, ARM::R11, + ARM::R12, ARM::LR, 0); + case 'w': + if (VT == MVT::f32) + return make_vector(ARM::S0, ARM::S1, ARM::S2, ARM::S3, + ARM::S4, ARM::S5, ARM::S6, ARM::S7, + ARM::S8, ARM::S9, ARM::S10, ARM::S11, + ARM::S12,ARM::S13,ARM::S14,ARM::S15, + ARM::S16,ARM::S17,ARM::S18,ARM::S19, + ARM::S20,ARM::S21,ARM::S22,ARM::S23, + ARM::S24,ARM::S25,ARM::S26,ARM::S27, + ARM::S28,ARM::S29,ARM::S30,ARM::S31, 0); + if (VT == MVT::f64) + return make_vector(ARM::D0, ARM::D1, ARM::D2, ARM::D3, + ARM::D4, ARM::D5, ARM::D6, ARM::D7, + ARM::D8, ARM::D9, ARM::D10,ARM::D11, + ARM::D12,ARM::D13,ARM::D14,ARM::D15, 0); + break; + } + + return std::vector(); +} + +/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops +/// vector. If it is invalid, don't add anything to Ops. +void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op, + char Constraint, + bool hasMemory, + std::vector&Ops, + SelectionDAG &DAG) const { + SDValue Result(0, 0); + + switch (Constraint) { + default: break; + case 'I': case 'J': case 'K': case 'L': + case 'M': case 'N': case 'O': + ConstantSDNode *C = dyn_cast(Op); + if (!C) + return; + + int64_t CVal64 = C->getSExtValue(); + int CVal = (int) CVal64; + // None of these constraints allow values larger than 32 bits. Check + // that the value fits in an int. + if (CVal != CVal64) + return; + + switch (Constraint) { + case 'I': + if (Subtarget->isThumb()) { + // This must be a constant between 0 and 255, for ADD immediates. + if (CVal >= 0 && CVal <= 255) + break; + } else { + // A constant that can be used as an immediate value in a + // data-processing instruction. + if (ARM_AM::getSOImmVal(CVal) != -1) + break; + } + return; + + case 'J': + if (Subtarget->isThumb()) { + // This must be a constant between -255 and -1, for negated ADD + // immediates. This can be used in GCC with an "n" modifier that + // prints the negated value, for use with SUB instructions. It is + // not useful otherwise but is implemented for compatibility. + if (CVal >= -255 && CVal <= -1) + break; + } else { + // This must be a constant between -4095 and 4095. It is not clear + // what this constraint is intended for. Implemented for + // compatibility with GCC. + if (CVal >= -4095 && CVal <= 4095) + break; + } + return; + + case 'K': + if (Subtarget->isThumb()) { + // A 32-bit value where only one byte has a nonzero value. Exclude + // zero to match GCC. This constraint is used by GCC internally for + // constants that can be loaded with a move/shift combination. + // It is not useful otherwise but is implemented for compatibility. + if (CVal != 0 && ARM_AM::isThumbImmShiftedVal(CVal)) + break; + } else { + // A constant whose bitwise inverse can be used as an immediate + // value in a data-processing instruction. This can be used in GCC + // with a "B" modifier that prints the inverted value, for use with + // BIC and MVN instructions. It is not useful otherwise but is + // implemented for compatibility. + if (ARM_AM::getSOImmVal(~CVal) != -1) + break; + } + return; + + case 'L': + if (Subtarget->isThumb()) { + // This must be a constant between -7 and 7, + // for 3-operand ADD/SUB immediate instructions. + if (CVal >= -7 && CVal < 7) + break; + } else { + // A constant whose negation can be used as an immediate value in a + // data-processing instruction. This can be used in GCC with an "n" + // modifier that prints the negated value, for use with SUB + // instructions. It is not useful otherwise but is implemented for + // compatibility. + if (ARM_AM::getSOImmVal(-CVal) != -1) + break; + } + return; + + case 'M': + if (Subtarget->isThumb()) { + // This must be a multiple of 4 between 0 and 1020, for + // ADD sp + immediate. + if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0)) + break; + } else { + // A power of two or a constant between 0 and 32. This is used in + // GCC for the shift amount on shifted register operands, but it is + // useful in general for any shift amounts. + if ((CVal >= 0 && CVal <= 32) || ((CVal & (CVal - 1)) == 0)) + break; + } + return; + + case 'N': + if (Subtarget->isThumb()) { + // This must be a constant between 0 and 31, for shift amounts. + if (CVal >= 0 && CVal <= 31) + break; + } + return; + + case 'O': + if (Subtarget->isThumb()) { + // This must be a multiple of 4 between -508 and 508, for + // ADD/SUB sp = sp + immediate. + if ((CVal >= -508 && CVal <= 508) && ((CVal & 3) == 0)) + break; + } + return; + } + Result = DAG.getTargetConstant(CVal, Op.getValueType()); + break; + } + + if (Result.getNode()) { + Ops.push_back(Result); + return; + } + return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, hasMemory, + Ops, DAG); +} diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h new file mode 100644 index 000000000000..2dab2dbba663 --- /dev/null +++ b/lib/Target/ARM/ARMISelLowering.h @@ -0,0 +1,184 @@ +//===-- ARMISelLowering.h - ARM DAG Lowering Interface ----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interfaces that ARM uses to lower LLVM code into a +// selection DAG. +// +//===----------------------------------------------------------------------===// + +#ifndef ARMISELLOWERING_H +#define ARMISELLOWERING_H + +#include "ARMSubtarget.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/CallingConvLower.h" +#include + +namespace llvm { + class ARMConstantPoolValue; + + namespace ARMISD { + // ARM Specific DAG Nodes + enum NodeType { + // Start the numbering where the builtin ops and target ops leave off. + FIRST_NUMBER = ISD::BUILTIN_OP_END, + + Wrapper, // Wrapper - A wrapper node for TargetConstantPool, + // TargetExternalSymbol, and TargetGlobalAddress. + WrapperJT, // WrapperJT - A wrapper node for TargetJumpTable + + CALL, // Function call. + CALL_PRED, // Function call that's predicable. + CALL_NOLINK, // Function call with branch not branch-and-link. + tCALL, // Thumb function call. + BRCOND, // Conditional branch. + BR_JT, // Jumptable branch. + RET_FLAG, // Return with a flag operand. + + PIC_ADD, // Add with a PC operand and a PIC label. + + CMP, // ARM compare instructions. + CMPNZ, // ARM compare that uses only N or Z flags. + CMPFP, // ARM VFP compare instruction, sets FPSCR. + CMPFPw0, // ARM VFP compare against zero instruction, sets FPSCR. + FMSTAT, // ARM fmstat instruction. + CMOV, // ARM conditional move instructions. + CNEG, // ARM conditional negate instructions. + + FTOSI, // FP to sint within a FP register. + FTOUI, // FP to uint within a FP register. + SITOF, // sint to FP within a FP register. + UITOF, // uint to FP within a FP register. + + SRL_FLAG, // V,Flag = srl_flag X -> srl X, 1 + save carry out. + SRA_FLAG, // V,Flag = sra_flag X -> sra X, 1 + save carry out. + RRX, // V = RRX X, Flag -> srl X, 1 + shift in carry flag. + + FMRRD, // double to two gprs. + FMDRR, // Two gprs to double. + + EH_SJLJ_SETJMP, // SjLj exception handling setjmp + EH_SJLJ_LONGJMP, // SjLj exception handling longjmp + + THREAD_POINTER + }; + } + + //===--------------------------------------------------------------------===// + // ARMTargetLowering - ARM Implementation of the TargetLowering interface + + class ARMTargetLowering : public TargetLowering { + int VarArgsFrameIndex; // FrameIndex for start of varargs area. + public: + explicit ARMTargetLowering(TargetMachine &TM); + + virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG); + + /// ReplaceNodeResults - Replace the results of node with an illegal result + /// type with new values built out of custom code. + /// + virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl&Results, + SelectionDAG &DAG); + + virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; + + virtual const char *getTargetNodeName(unsigned Opcode) const; + + virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *MBB) const; + + /// isLegalAddressingMode - Return true if the addressing mode represented + /// by AM is legal for this target, for a load/store of the specified type. + virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty)const; + + /// getPreIndexedAddressParts - returns true by value, base pointer and + /// offset pointer and addressing mode by reference if the node's address + /// can be legally represented as pre-indexed load / store address. + virtual bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, + SDValue &Offset, + ISD::MemIndexedMode &AM, + SelectionDAG &DAG) const; + + /// getPostIndexedAddressParts - returns true by value, base pointer and + /// offset pointer and addressing mode by reference if this node can be + /// combined with a load / store to form a post-indexed load / store. + virtual bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, + SDValue &Base, SDValue &Offset, + ISD::MemIndexedMode &AM, + SelectionDAG &DAG) const; + + virtual void computeMaskedBitsForTargetNode(const SDValue Op, + const APInt &Mask, + APInt &KnownZero, + APInt &KnownOne, + const SelectionDAG &DAG, + unsigned Depth) const; + ConstraintType getConstraintType(const std::string &Constraint) const; + std::pair + getRegForInlineAsmConstraint(const std::string &Constraint, + MVT VT) const; + std::vector + getRegClassForInlineAsmConstraint(const std::string &Constraint, + MVT VT) const; + + /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops + /// vector. If it is invalid, don't add anything to Ops. If hasMemory is + /// true it means one of the asm constraint of the inline asm instruction + /// being processed is 'm'. + virtual void LowerAsmOperandForConstraint(SDValue Op, + char ConstraintLetter, + bool hasMemory, + std::vector &Ops, + SelectionDAG &DAG) const; + + virtual const ARMSubtarget* getSubtarget() { + return Subtarget; + } + + private: + /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can + /// make the right decision when generating code for different targets. + const ARMSubtarget *Subtarget; + + /// ARMPCLabelIndex - Keep track the number of ARM PC labels created. + /// + unsigned ARMPCLabelIndex; + + SDValue LowerMemOpCallTo(CallSDNode *TheCall, SelectionDAG &DAG, + const SDValue &StackPtr, const CCValAssign &VA, + SDValue Chain, SDValue Arg, ISD::ArgFlagsTy Flags); + SDNode *LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall, + unsigned CallingConv, SelectionDAG &DAG); + SDValue LowerCALL(SDValue Op, SelectionDAG &DAG); + SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG); + SDValue LowerRET(SDValue Op, SelectionDAG &DAG); + SDValue LowerGlobalAddressDarwin(SDValue Op, SelectionDAG &DAG); + SDValue LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG); + SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG); + SDValue LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, + SelectionDAG &DAG); + SDValue LowerToTLSExecModels(GlobalAddressSDNode *GA, + SelectionDAG &DAG); + SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG); + SDValue LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG); + SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG); + SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG); + + SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, + SDValue Chain, + SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, + bool AlwaysInline, + const Value *DstSV, uint64_t DstSVOff, + const Value *SrcSV, uint64_t SrcSVOff); + }; +} + +#endif // ARMISELLOWERING_H diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td new file mode 100644 index 000000000000..9a1e1c2bb756 --- /dev/null +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -0,0 +1,868 @@ +//===- ARMInstrFormats.td - ARM Instruction Formats --*- tablegen -*---------=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// +// ARM Instruction Format Definitions. +// + +// Format specifies the encoding used by the instruction. This is part of the +// ad-hoc solution used to emit machine instruction encodings by our machine +// code emitter. +class Format val> { + bits<5> Value = val; +} + +def Pseudo : Format<0>; +def MulFrm : Format<1>; +def BrFrm : Format<2>; +def BrMiscFrm : Format<3>; + +def DPFrm : Format<4>; +def DPSoRegFrm : Format<5>; + +def LdFrm : Format<6>; +def StFrm : Format<7>; +def LdMiscFrm : Format<8>; +def StMiscFrm : Format<9>; +def LdStMulFrm : Format<10>; + +def ArithMiscFrm : Format<11>; +def ExtFrm : Format<12>; + +def VFPUnaryFrm : Format<13>; +def VFPBinaryFrm : Format<14>; +def VFPConv1Frm : Format<15>; +def VFPConv2Frm : Format<16>; +def VFPConv3Frm : Format<17>; +def VFPConv4Frm : Format<18>; +def VFPConv5Frm : Format<19>; +def VFPLdStFrm : Format<20>; +def VFPLdStMulFrm : Format<21>; +def VFPMiscFrm : Format<22>; + +def ThumbFrm : Format<23>; + +// Misc flag for data processing instructions that indicates whether +// the instruction has a Rn register operand. +class UnaryDP { bit isUnaryDataProc = 1; } + +//===----------------------------------------------------------------------===// + +// ARM Instruction templates. +// + +class InstARM + : Instruction { + field bits<32> Inst; + + let Namespace = "ARM"; + + // TSFlagsFields + AddrMode AM = am; + bits<4> AddrModeBits = AM.Value; + + SizeFlagVal SZ = sz; + bits<3> SizeFlag = SZ.Value; + + IndexMode IM = im; + bits<2> IndexModeBits = IM.Value; + + Format F = f; + bits<5> Form = F.Value; + + // + // Attributes specific to ARM instructions... + // + bit isUnaryDataProc = 0; + + let Constraints = cstr; +} + +class PseudoInst pattern> + : InstARM { + let OutOperandList = oops; + let InOperandList = iops; + let AsmString = asm; + let Pattern = pattern; +} + +// Almost all ARM instructions are predicable. +class I pattern> + : InstARM { + let OutOperandList = oops; + let InOperandList = !con(iops, (ops pred:$p)); + let AsmString = !strconcat(opc, !strconcat("${p}", asm)); + let Pattern = pattern; + list Predicates = [IsARM]; +} + +// Same as I except it can optionally modify CPSR. Note it's modeled as +// an input operand since by default it's a zero register. It will +// become an implicit def once it's "flipped". +class sI pattern> + : InstARM { + let OutOperandList = oops; + let InOperandList = !con(iops, (ops pred:$p, cc_out:$s)); + let AsmString = !strconcat(opc, !strconcat("${p}${s}", asm)); + let Pattern = pattern; + list Predicates = [IsARM]; +} + +// Special cases +class XI pattern> + : InstARM { + let OutOperandList = oops; + let InOperandList = iops; + let AsmString = asm; + let Pattern = pattern; + list Predicates = [IsARM]; +} + +class AI pattern> + : I; +class AsI pattern> + : sI; +class AXI pattern> + : XI; + +// Ctrl flow instructions +class ABI opcod, dag oops, dag iops, string opc, + string asm, list pattern> + : I { + let Inst{27-24} = opcod; +} +class ABXI opcod, dag oops, dag iops, string asm, list pattern> + : XI { + let Inst{27-24} = opcod; +} +class ABXIx2 pattern> + : XI; + +// BR_JT instructions +class JTI pattern> + : XI; + +// addrmode1 instructions +class AI1 opcod, dag oops, dag iops, Format f, string opc, + string asm, list pattern> + : I { + let Inst{24-21} = opcod; + let Inst{27-26} = {0,0}; +} +class AsI1 opcod, dag oops, dag iops, Format f, string opc, + string asm, list pattern> + : sI { + let Inst{24-21} = opcod; + let Inst{27-26} = {0,0}; +} +class AXI1 opcod, dag oops, dag iops, Format f, string asm, + list pattern> + : XI { + let Inst{24-21} = opcod; + let Inst{27-26} = {0,0}; +} +class AI1x2 pattern> + : I; + + +// addrmode2 loads and stores +class AI2 pattern> + : I { + let Inst{27-26} = {0,1}; +} + +// loads +class AI2ldw pattern> + : I { + let Inst{20} = 1; // L bit + let Inst{21} = 0; // W bit + let Inst{22} = 0; // B bit + let Inst{24} = 1; // P bit + let Inst{27-26} = {0,1}; +} +class AXI2ldw pattern> + : XI { + let Inst{20} = 1; // L bit + let Inst{21} = 0; // W bit + let Inst{22} = 0; // B bit + let Inst{24} = 1; // P bit + let Inst{27-26} = {0,1}; +} +class AI2ldb pattern> + : I { + let Inst{20} = 1; // L bit + let Inst{21} = 0; // W bit + let Inst{22} = 1; // B bit + let Inst{24} = 1; // P bit + let Inst{27-26} = {0,1}; +} +class AXI2ldb pattern> + : XI { + let Inst{20} = 1; // L bit + let Inst{21} = 0; // W bit + let Inst{22} = 1; // B bit + let Inst{24} = 1; // P bit + let Inst{27-26} = {0,1}; +} + +// stores +class AI2stw pattern> + : I { + let Inst{20} = 0; // L bit + let Inst{21} = 0; // W bit + let Inst{22} = 0; // B bit + let Inst{24} = 1; // P bit + let Inst{27-26} = {0,1}; +} +class AXI2stw pattern> + : XI { + let Inst{20} = 0; // L bit + let Inst{21} = 0; // W bit + let Inst{22} = 0; // B bit + let Inst{24} = 1; // P bit + let Inst{27-26} = {0,1}; +} +class AI2stb pattern> + : I { + let Inst{20} = 0; // L bit + let Inst{21} = 0; // W bit + let Inst{22} = 1; // B bit + let Inst{24} = 1; // P bit + let Inst{27-26} = {0,1}; +} +class AXI2stb pattern> + : XI { + let Inst{20} = 0; // L bit + let Inst{21} = 0; // W bit + let Inst{22} = 1; // B bit + let Inst{24} = 1; // P bit + let Inst{27-26} = {0,1}; +} + +// Pre-indexed loads +class AI2ldwpr pattern> + : I { + let Inst{20} = 1; // L bit + let Inst{21} = 1; // W bit + let Inst{22} = 0; // B bit + let Inst{24} = 1; // P bit + let Inst{27-26} = {0,1}; +} +class AI2ldbpr pattern> + : I { + let Inst{20} = 1; // L bit + let Inst{21} = 1; // W bit + let Inst{22} = 1; // B bit + let Inst{24} = 1; // P bit + let Inst{27-26} = {0,1}; +} + +// Pre-indexed stores +class AI2stwpr pattern> + : I { + let Inst{20} = 0; // L bit + let Inst{21} = 1; // W bit + let Inst{22} = 0; // B bit + let Inst{24} = 1; // P bit + let Inst{27-26} = {0,1}; +} +class AI2stbpr pattern> + : I { + let Inst{20} = 0; // L bit + let Inst{21} = 1; // W bit + let Inst{22} = 1; // B bit + let Inst{24} = 1; // P bit + let Inst{27-26} = {0,1}; +} + +// Post-indexed loads +class AI2ldwpo pattern> + : I { + let Inst{20} = 1; // L bit + let Inst{21} = 0; // W bit + let Inst{22} = 0; // B bit + let Inst{24} = 0; // P bit + let Inst{27-26} = {0,1}; +} +class AI2ldbpo pattern> + : I { + let Inst{20} = 1; // L bit + let Inst{21} = 0; // W bit + let Inst{22} = 1; // B bit + let Inst{24} = 0; // P bit + let Inst{27-26} = {0,1}; +} + +// Post-indexed stores +class AI2stwpo pattern> + : I { + let Inst{20} = 0; // L bit + let Inst{21} = 0; // W bit + let Inst{22} = 0; // B bit + let Inst{24} = 0; // P bit + let Inst{27-26} = {0,1}; +} +class AI2stbpo pattern> + : I { + let Inst{20} = 0; // L bit + let Inst{21} = 0; // W bit + let Inst{22} = 1; // B bit + let Inst{24} = 0; // P bit + let Inst{27-26} = {0,1}; +} + +// addrmode3 instructions +class AI3 pattern> + : I; +class AXI3 pattern> + : XI; + +// loads +class AI3ldh pattern> + : I { + let Inst{4} = 1; + let Inst{5} = 1; // H bit + let Inst{6} = 0; // S bit + let Inst{7} = 1; + let Inst{20} = 1; // L bit + let Inst{21} = 0; // W bit + let Inst{24} = 1; // P bit +} +class AXI3ldh pattern> + : XI { + let Inst{4} = 1; + let Inst{5} = 1; // H bit + let Inst{6} = 0; // S bit + let Inst{7} = 1; + let Inst{20} = 1; // L bit + let Inst{21} = 0; // W bit + let Inst{24} = 1; // P bit +} +class AI3ldsh pattern> + : I { + let Inst{4} = 1; + let Inst{5} = 1; // H bit + let Inst{6} = 1; // S bit + let Inst{7} = 1; + let Inst{20} = 1; // L bit + let Inst{21} = 0; // W bit + let Inst{24} = 1; // P bit +} +class AXI3ldsh pattern> + : XI { + let Inst{4} = 1; + let Inst{5} = 1; // H bit + let Inst{6} = 1; // S bit + let Inst{7} = 1; + let Inst{20} = 1; // L bit + let Inst{21} = 0; // W bit + let Inst{24} = 1; // P bit +} +class AI3ldsb pattern> + : I { + let Inst{4} = 1; + let Inst{5} = 0; // H bit + let Inst{6} = 1; // S bit + let Inst{7} = 1; + let Inst{20} = 1; // L bit + let Inst{21} = 0; // W bit + let Inst{24} = 1; // P bit +} +class AXI3ldsb pattern> + : XI { + let Inst{4} = 1; + let Inst{5} = 0; // H bit + let Inst{6} = 1; // S bit + let Inst{7} = 1; + let Inst{20} = 1; // L bit + let Inst{21} = 0; // W bit + let Inst{24} = 1; // P bit +} +class AI3ldd pattern> + : I { + let Inst{4} = 1; + let Inst{5} = 0; // H bit + let Inst{6} = 1; // S bit + let Inst{7} = 1; + let Inst{20} = 0; // L bit + let Inst{21} = 0; // W bit + let Inst{24} = 1; // P bit +} + +// stores +class AI3sth pattern> + : I { + let Inst{4} = 1; + let Inst{5} = 1; // H bit + let Inst{6} = 0; // S bit + let Inst{7} = 1; + let Inst{20} = 0; // L bit + let Inst{21} = 0; // W bit + let Inst{24} = 1; // P bit +} +class AXI3sth pattern> + : XI { + let Inst{4} = 1; + let Inst{5} = 1; // H bit + let Inst{6} = 0; // S bit + let Inst{7} = 1; + let Inst{20} = 0; // L bit + let Inst{21} = 0; // W bit + let Inst{24} = 1; // P bit +} +class AI3std pattern> + : I { + let Inst{4} = 1; + let Inst{5} = 1; // H bit + let Inst{6} = 1; // S bit + let Inst{7} = 1; + let Inst{20} = 0; // L bit + let Inst{21} = 0; // W bit + let Inst{24} = 1; // P bit +} + +// Pre-indexed loads +class AI3ldhpr pattern> + : I { + let Inst{4} = 1; + let Inst{5} = 1; // H bit + let Inst{6} = 0; // S bit + let Inst{7} = 1; + let Inst{20} = 1; // L bit + let Inst{21} = 1; // W bit + let Inst{24} = 1; // P bit +} +class AI3ldshpr pattern> + : I { + let Inst{4} = 1; + let Inst{5} = 1; // H bit + let Inst{6} = 1; // S bit + let Inst{7} = 1; + let Inst{20} = 1; // L bit + let Inst{21} = 1; // W bit + let Inst{24} = 1; // P bit +} +class AI3ldsbpr pattern> + : I { + let Inst{4} = 1; + let Inst{5} = 0; // H bit + let Inst{6} = 1; // S bit + let Inst{7} = 1; + let Inst{20} = 1; // L bit + let Inst{21} = 1; // W bit + let Inst{24} = 1; // P bit +} + +// Pre-indexed stores +class AI3sthpr pattern> + : I { + let Inst{4} = 1; + let Inst{5} = 1; // H bit + let Inst{6} = 0; // S bit + let Inst{7} = 1; + let Inst{20} = 0; // L bit + let Inst{21} = 1; // W bit + let Inst{24} = 1; // P bit +} + +// Post-indexed loads +class AI3ldhpo pattern> + : I { + let Inst{4} = 1; + let Inst{5} = 1; // H bit + let Inst{6} = 0; // S bit + let Inst{7} = 1; + let Inst{20} = 1; // L bit + let Inst{21} = 1; // W bit + let Inst{24} = 0; // P bit +} +class AI3ldshpo pattern> + : I { + let Inst{4} = 1; + let Inst{5} = 1; // H bit + let Inst{6} = 1; // S bit + let Inst{7} = 1; + let Inst{20} = 1; // L bit + let Inst{21} = 1; // W bit + let Inst{24} = 0; // P bit +} +class AI3ldsbpo pattern> + : I { + let Inst{4} = 1; + let Inst{5} = 0; // H bit + let Inst{6} = 1; // S bit + let Inst{7} = 1; + let Inst{20} = 1; // L bit + let Inst{21} = 1; // W bit + let Inst{24} = 0; // P bit +} + +// Post-indexed stores +class AI3sthpo pattern> + : I { + let Inst{4} = 1; + let Inst{5} = 1; // H bit + let Inst{6} = 0; // S bit + let Inst{7} = 1; + let Inst{20} = 0; // L bit + let Inst{21} = 1; // W bit + let Inst{24} = 0; // P bit +} + + +// addrmode4 instructions +class AXI4ld pattern> + : XI { + let Inst{20} = 1; // L bit + let Inst{22} = 0; // S bit + let Inst{27-25} = 0b100; +} +class AXI4st pattern> + : XI { + let Inst{20} = 0; // L bit + let Inst{22} = 0; // S bit + let Inst{27-25} = 0b100; +} + +// Unsigned multiply, multiply-accumulate instructions. +class AMul1I opcod, dag oops, dag iops, string opc, + string asm, list pattern> + : I { + let Inst{7-4} = 0b1001; + let Inst{20} = 0; // S bit + let Inst{27-21} = opcod; +} +class AsMul1I opcod, dag oops, dag iops, string opc, + string asm, list pattern> + : sI { + let Inst{7-4} = 0b1001; + let Inst{27-21} = opcod; +} + +// Most significant word multiply +class AMul2I opcod, dag oops, dag iops, string opc, + string asm, list pattern> + : I { + let Inst{7-4} = 0b1001; + let Inst{20} = 1; + let Inst{27-21} = opcod; +} + +// SMUL / SMULW / SMLA / SMLAW +class AMulxyI opcod, dag oops, dag iops, string opc, + string asm, list pattern> + : I { + let Inst{4} = 0; + let Inst{7} = 1; + let Inst{20} = 0; + let Inst{27-21} = opcod; +} + +// Extend instructions. +class AExtI opcod, dag oops, dag iops, string opc, + string asm, list pattern> + : I { + let Inst{7-4} = 0b0111; + let Inst{27-20} = opcod; +} + +// Misc Arithmetic instructions. +class AMiscA1I opcod, dag oops, dag iops, string opc, + string asm, list pattern> + : I { + let Inst{27-20} = opcod; +} + +//===----------------------------------------------------------------------===// + +// ARMPat - Same as Pat<>, but requires that the compiler be in ARM mode. +class ARMPat : Pat { + list Predicates = [IsARM]; +} +class ARMV5TEPat : Pat { + list Predicates = [IsARM, HasV5TE]; +} +class ARMV6Pat : Pat { + list Predicates = [IsARM, HasV6]; +} + +//===----------------------------------------------------------------------===// +// +// Thumb Instruction Format Definitions. +// + + +// TI - Thumb instruction. + +class ThumbI pattern> + : InstARM { + let OutOperandList = outs; + let InOperandList = ins; + let AsmString = asm; + let Pattern = pattern; + list Predicates = [IsThumb]; +} + +class TI pattern> + : ThumbI; +class TI1 pattern> + : ThumbI; +class TI2 pattern> + : ThumbI; +class TI4 pattern> + : ThumbI; +class TIs pattern> + : ThumbI; + +// Two-address instructions +class TIt pattern> + : ThumbI; + +// BL, BLX(1) are translated by assembler into two instructions +class TIx2 pattern> + : ThumbI; + +// BR_JT instructions +class TJTI pattern> + : ThumbI; + + +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// ARM VFP Instruction templates. +// + +// ARM VFP addrmode5 loads and stores +class ADI5 opcod1, bits<2> opcod2, dag oops, dag iops, + string opc, string asm, list pattern> + : I { + // TODO: Mark the instructions with the appropriate subtarget info. + let Inst{27-24} = opcod1; + let Inst{21-20} = opcod2; + let Inst{11-8} = 0b1011; +} + +class ASI5 opcod1, bits<2> opcod2, dag oops, dag iops, + string opc, string asm, list pattern> + : I { + // TODO: Mark the instructions with the appropriate subtarget info. + let Inst{27-24} = opcod1; + let Inst{21-20} = opcod2; + let Inst{11-8} = 0b1010; +} + +// Load / store multiple +class AXSI5 pattern> + : XI { + // TODO: Mark the instructions with the appropriate subtarget info. + let Inst{27-25} = 0b110; + let Inst{11-8} = 0b1011; +} + +class AXDI5 pattern> + : XI { + // TODO: Mark the instructions with the appropriate subtarget info. + let Inst{27-25} = 0b110; + let Inst{11-8} = 0b1010; +} + + +// Double precision, unary +class ADuI opcod1, bits<4> opcod2, bits<4> opcod3, dag oops, dag iops, + string opc, string asm, list pattern> + : AI { + let Inst{27-20} = opcod1; + let Inst{19-16} = opcod2; + let Inst{11-8} = 0b1011; + let Inst{7-4} = opcod3; +} + +// Double precision, binary +class ADbI opcod, dag oops, dag iops, string opc, + string asm, list pattern> + : AI { + let Inst{27-20} = opcod; + let Inst{11-8} = 0b1011; +} + +// Single precision, unary +class ASuI opcod1, bits<4> opcod2, bits<4> opcod3, dag oops, dag iops, + string opc, string asm, list pattern> + : AI { + // Bits 22 (D bit) and 5 (M bit) will be changed during instruction encoding. + let Inst{27-20} = opcod1; + let Inst{19-16} = opcod2; + let Inst{11-8} = 0b1010; + let Inst{7-4} = opcod3; +} + +// Single precision, binary +class ASbI opcod, dag oops, dag iops, string opc, + string asm, list pattern> + : AI { + // Bit 22 (D bit) can be changed during instruction encoding. + let Inst{27-20} = opcod; + let Inst{11-8} = 0b1010; +} + +// VFP conversion instructions +class AVConv1I opcod1, bits<4> opcod2, bits<4> opcod3, + dag oops, dag iops, string opc, string asm, list pattern> + : AI { + let Inst{27-20} = opcod1; + let Inst{19-16} = opcod2; + let Inst{11-8} = opcod3; + let Inst{6} = 1; +} + +class AVConvXI opcod1, bits<4> opcod2, dag oops, dag iops, Format f, + string opc, string asm, list pattern> + : AI { + let Inst{27-20} = opcod1; + let Inst{11-8} = opcod2; + let Inst{4} = 1; +} + +class AVConv2I opcod1, bits<4> opcod2, dag oops, dag iops, string opc, + string asm, list pattern> + : AVConvXI; + +class AVConv3I opcod1, bits<4> opcod2, dag oops, dag iops, string opc, + string asm, list pattern> + : AVConvXI; + +class AVConv4I opcod1, bits<4> opcod2, dag oops, dag iops, string opc, + string asm, list pattern> + : AVConvXI; + +class AVConv5I opcod1, bits<4> opcod2, dag oops, dag iops, string opc, + string asm, list pattern> + : AVConvXI; + +//===----------------------------------------------------------------------===// + + +// ThumbPat - Same as Pat<>, but requires that the compiler be in Thumb mode. +class ThumbPat : Pat { + list Predicates = [IsThumb]; +} + +class ThumbV5Pat : Pat { + list Predicates = [IsThumb, HasV5T]; +} diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp new file mode 100644 index 000000000000..4b0dbb5dacdb --- /dev/null +++ b/lib/Target/ARM/ARMInstrInfo.cpp @@ -0,0 +1,1025 @@ +//===- ARMInstrInfo.cpp - ARM Instruction Information -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the ARM implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#include "ARMInstrInfo.h" +#include "ARM.h" +#include "ARMAddressingModes.h" +#include "ARMGenInstrInfo.inc" +#include "ARMMachineFunctionInfo.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/Support/CommandLine.h" +using namespace llvm; + +static cl::opt +EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden, + cl::desc("Enable ARM 2-addr to 3-addr conv")); + +static inline +const MachineInstrBuilder &AddDefaultPred(const MachineInstrBuilder &MIB) { + return MIB.addImm((int64_t)ARMCC::AL).addReg(0); +} + +static inline +const MachineInstrBuilder &AddDefaultCC(const MachineInstrBuilder &MIB) { + return MIB.addReg(0); +} + +ARMInstrInfo::ARMInstrInfo(const ARMSubtarget &STI) + : TargetInstrInfoImpl(ARMInsts, array_lengthof(ARMInsts)), + RI(*this, STI) { +} + + +/// Return true if the instruction is a register to register move and +/// leave the source and dest operands in the passed parameters. +/// +bool ARMInstrInfo::isMoveInstr(const MachineInstr &MI, + unsigned &SrcReg, unsigned &DstReg, + unsigned& SrcSubIdx, unsigned& DstSubIdx) const { + SrcSubIdx = DstSubIdx = 0; // No sub-registers. + + unsigned oc = MI.getOpcode(); + switch (oc) { + default: + return false; + case ARM::FCPYS: + case ARM::FCPYD: + SrcReg = MI.getOperand(1).getReg(); + DstReg = MI.getOperand(0).getReg(); + return true; + case ARM::MOVr: + case ARM::tMOVr: + case ARM::tMOVhir2lor: + case ARM::tMOVlor2hir: + case ARM::tMOVhir2hir: + assert(MI.getDesc().getNumOperands() >= 2 && + MI.getOperand(0).isReg() && + MI.getOperand(1).isReg() && + "Invalid ARM MOV instruction"); + SrcReg = MI.getOperand(1).getReg(); + DstReg = MI.getOperand(0).getReg(); + return true; + } +} + +unsigned ARMInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, + int &FrameIndex) const { + switch (MI->getOpcode()) { + default: break; + case ARM::LDR: + if (MI->getOperand(1).isFI() && + MI->getOperand(2).isReg() && + MI->getOperand(3).isImm() && + MI->getOperand(2).getReg() == 0 && + MI->getOperand(3).getImm() == 0) { + FrameIndex = MI->getOperand(1).getIndex(); + return MI->getOperand(0).getReg(); + } + break; + case ARM::FLDD: + case ARM::FLDS: + if (MI->getOperand(1).isFI() && + MI->getOperand(2).isImm() && + MI->getOperand(2).getImm() == 0) { + FrameIndex = MI->getOperand(1).getIndex(); + return MI->getOperand(0).getReg(); + } + break; + case ARM::tRestore: + if (MI->getOperand(1).isFI() && + MI->getOperand(2).isImm() && + MI->getOperand(2).getImm() == 0) { + FrameIndex = MI->getOperand(1).getIndex(); + return MI->getOperand(0).getReg(); + } + break; + } + return 0; +} + +unsigned ARMInstrInfo::isStoreToStackSlot(const MachineInstr *MI, + int &FrameIndex) const { + switch (MI->getOpcode()) { + default: break; + case ARM::STR: + if (MI->getOperand(1).isFI() && + MI->getOperand(2).isReg() && + MI->getOperand(3).isImm() && + MI->getOperand(2).getReg() == 0 && + MI->getOperand(3).getImm() == 0) { + FrameIndex = MI->getOperand(1).getIndex(); + return MI->getOperand(0).getReg(); + } + break; + case ARM::FSTD: + case ARM::FSTS: + if (MI->getOperand(1).isFI() && + MI->getOperand(2).isImm() && + MI->getOperand(2).getImm() == 0) { + FrameIndex = MI->getOperand(1).getIndex(); + return MI->getOperand(0).getReg(); + } + break; + case ARM::tSpill: + if (MI->getOperand(1).isFI() && + MI->getOperand(2).isImm() && + MI->getOperand(2).getImm() == 0) { + FrameIndex = MI->getOperand(1).getIndex(); + return MI->getOperand(0).getReg(); + } + break; + } + return 0; +} + +void ARMInstrInfo::reMaterialize(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned DestReg, + const MachineInstr *Orig) const { + DebugLoc dl = Orig->getDebugLoc(); + if (Orig->getOpcode() == ARM::MOVi2pieces) { + RI.emitLoadConstPool(MBB, I, DestReg, Orig->getOperand(1).getImm(), + Orig->getOperand(2).getImm(), + Orig->getOperand(3).getReg(), this, false, dl); + return; + } + + MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig); + MI->getOperand(0).setReg(DestReg); + MBB.insert(I, MI); +} + +static unsigned getUnindexedOpcode(unsigned Opc) { + switch (Opc) { + default: break; + case ARM::LDR_PRE: + case ARM::LDR_POST: + return ARM::LDR; + case ARM::LDRH_PRE: + case ARM::LDRH_POST: + return ARM::LDRH; + case ARM::LDRB_PRE: + case ARM::LDRB_POST: + return ARM::LDRB; + case ARM::LDRSH_PRE: + case ARM::LDRSH_POST: + return ARM::LDRSH; + case ARM::LDRSB_PRE: + case ARM::LDRSB_POST: + return ARM::LDRSB; + case ARM::STR_PRE: + case ARM::STR_POST: + return ARM::STR; + case ARM::STRH_PRE: + case ARM::STRH_POST: + return ARM::STRH; + case ARM::STRB_PRE: + case ARM::STRB_POST: + return ARM::STRB; + } + return 0; +} + +MachineInstr * +ARMInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, + MachineBasicBlock::iterator &MBBI, + LiveVariables *LV) const { + if (!EnableARM3Addr) + return NULL; + + MachineInstr *MI = MBBI; + MachineFunction &MF = *MI->getParent()->getParent(); + unsigned TSFlags = MI->getDesc().TSFlags; + bool isPre = false; + switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) { + default: return NULL; + case ARMII::IndexModePre: + isPre = true; + break; + case ARMII::IndexModePost: + break; + } + + // Try splitting an indexed load/store to an un-indexed one plus an add/sub + // operation. + unsigned MemOpc = getUnindexedOpcode(MI->getOpcode()); + if (MemOpc == 0) + return NULL; + + MachineInstr *UpdateMI = NULL; + MachineInstr *MemMI = NULL; + unsigned AddrMode = (TSFlags & ARMII::AddrModeMask); + const TargetInstrDesc &TID = MI->getDesc(); + unsigned NumOps = TID.getNumOperands(); + bool isLoad = !TID.mayStore(); + const MachineOperand &WB = isLoad ? MI->getOperand(1) : MI->getOperand(0); + const MachineOperand &Base = MI->getOperand(2); + const MachineOperand &Offset = MI->getOperand(NumOps-3); + unsigned WBReg = WB.getReg(); + unsigned BaseReg = Base.getReg(); + unsigned OffReg = Offset.getReg(); + unsigned OffImm = MI->getOperand(NumOps-2).getImm(); + ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI->getOperand(NumOps-1).getImm(); + switch (AddrMode) { + default: + assert(false && "Unknown indexed op!"); + return NULL; + case ARMII::AddrMode2: { + bool isSub = ARM_AM::getAM2Op(OffImm) == ARM_AM::sub; + unsigned Amt = ARM_AM::getAM2Offset(OffImm); + if (OffReg == 0) { + int SOImmVal = ARM_AM::getSOImmVal(Amt); + if (SOImmVal == -1) + // Can't encode it in a so_imm operand. This transformation will + // add more than 1 instruction. Abandon! + return NULL; + UpdateMI = BuildMI(MF, MI->getDebugLoc(), + get(isSub ? ARM::SUBri : ARM::ADDri), WBReg) + .addReg(BaseReg).addImm(SOImmVal) + .addImm(Pred).addReg(0).addReg(0); + } else if (Amt != 0) { + ARM_AM::ShiftOpc ShOpc = ARM_AM::getAM2ShiftOpc(OffImm); + unsigned SOOpc = ARM_AM::getSORegOpc(ShOpc, Amt); + UpdateMI = BuildMI(MF, MI->getDebugLoc(), + get(isSub ? ARM::SUBrs : ARM::ADDrs), WBReg) + .addReg(BaseReg).addReg(OffReg).addReg(0).addImm(SOOpc) + .addImm(Pred).addReg(0).addReg(0); + } else + UpdateMI = BuildMI(MF, MI->getDebugLoc(), + get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg) + .addReg(BaseReg).addReg(OffReg) + .addImm(Pred).addReg(0).addReg(0); + break; + } + case ARMII::AddrMode3 : { + bool isSub = ARM_AM::getAM3Op(OffImm) == ARM_AM::sub; + unsigned Amt = ARM_AM::getAM3Offset(OffImm); + if (OffReg == 0) + // Immediate is 8-bits. It's guaranteed to fit in a so_imm operand. + UpdateMI = BuildMI(MF, MI->getDebugLoc(), + get(isSub ? ARM::SUBri : ARM::ADDri), WBReg) + .addReg(BaseReg).addImm(Amt) + .addImm(Pred).addReg(0).addReg(0); + else + UpdateMI = BuildMI(MF, MI->getDebugLoc(), + get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg) + .addReg(BaseReg).addReg(OffReg) + .addImm(Pred).addReg(0).addReg(0); + break; + } + } + + std::vector NewMIs; + if (isPre) { + if (isLoad) + MemMI = BuildMI(MF, MI->getDebugLoc(), + get(MemOpc), MI->getOperand(0).getReg()) + .addReg(WBReg).addReg(0).addImm(0).addImm(Pred); + else + MemMI = BuildMI(MF, MI->getDebugLoc(), + get(MemOpc)).addReg(MI->getOperand(1).getReg()) + .addReg(WBReg).addReg(0).addImm(0).addImm(Pred); + NewMIs.push_back(MemMI); + NewMIs.push_back(UpdateMI); + } else { + if (isLoad) + MemMI = BuildMI(MF, MI->getDebugLoc(), + get(MemOpc), MI->getOperand(0).getReg()) + .addReg(BaseReg).addReg(0).addImm(0).addImm(Pred); + else + MemMI = BuildMI(MF, MI->getDebugLoc(), + get(MemOpc)).addReg(MI->getOperand(1).getReg()) + .addReg(BaseReg).addReg(0).addImm(0).addImm(Pred); + if (WB.isDead()) + UpdateMI->getOperand(0).setIsDead(); + NewMIs.push_back(UpdateMI); + NewMIs.push_back(MemMI); + } + + // Transfer LiveVariables states, kill / dead info. + if (LV) { + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.getReg() && + TargetRegisterInfo::isVirtualRegister(MO.getReg())) { + unsigned Reg = MO.getReg(); + + LiveVariables::VarInfo &VI = LV->getVarInfo(Reg); + if (MO.isDef()) { + MachineInstr *NewMI = (Reg == WBReg) ? UpdateMI : MemMI; + if (MO.isDead()) + LV->addVirtualRegisterDead(Reg, NewMI); + } + if (MO.isUse() && MO.isKill()) { + for (unsigned j = 0; j < 2; ++j) { + // Look at the two new MI's in reverse order. + MachineInstr *NewMI = NewMIs[j]; + if (!NewMI->readsRegister(Reg)) + continue; + LV->addVirtualRegisterKilled(Reg, NewMI); + if (VI.removeKill(MI)) + VI.Kills.push_back(NewMI); + break; + } + } + } + } + } + + MFI->insert(MBBI, NewMIs[1]); + MFI->insert(MBBI, NewMIs[0]); + return NewMIs[0]; +} + +// Branch analysis. +bool ARMInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl &Cond, + bool AllowModify) const { + // If the block has no terminators, it just falls into the block after it. + MachineBasicBlock::iterator I = MBB.end(); + if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) + return false; + + // Get the last instruction in the block. + MachineInstr *LastInst = I; + + // If there is only one terminator instruction, process it. + unsigned LastOpc = LastInst->getOpcode(); + if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { + if (LastOpc == ARM::B || LastOpc == ARM::tB) { + TBB = LastInst->getOperand(0).getMBB(); + return false; + } + if (LastOpc == ARM::Bcc || LastOpc == ARM::tBcc) { + // Block ends with fall-through condbranch. + TBB = LastInst->getOperand(0).getMBB(); + Cond.push_back(LastInst->getOperand(1)); + Cond.push_back(LastInst->getOperand(2)); + return false; + } + return true; // Can't handle indirect branch. + } + + // Get the instruction before it if it is a terminator. + MachineInstr *SecondLastInst = I; + + // If there are three terminators, we don't know what sort of block this is. + if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I)) + return true; + + // If the block ends with ARM::B/ARM::tB and a ARM::Bcc/ARM::tBcc, handle it. + unsigned SecondLastOpc = SecondLastInst->getOpcode(); + if ((SecondLastOpc == ARM::Bcc && LastOpc == ARM::B) || + (SecondLastOpc == ARM::tBcc && LastOpc == ARM::tB)) { + TBB = SecondLastInst->getOperand(0).getMBB(); + Cond.push_back(SecondLastInst->getOperand(1)); + Cond.push_back(SecondLastInst->getOperand(2)); + FBB = LastInst->getOperand(0).getMBB(); + return false; + } + + // If the block ends with two unconditional branches, handle it. The second + // one is not executed, so remove it. + if ((SecondLastOpc == ARM::B || SecondLastOpc==ARM::tB) && + (LastOpc == ARM::B || LastOpc == ARM::tB)) { + TBB = SecondLastInst->getOperand(0).getMBB(); + I = LastInst; + if (AllowModify) + I->eraseFromParent(); + return false; + } + + // ...likewise if it ends with a branch table followed by an unconditional + // branch. The branch folder can create these, and we must get rid of them for + // correctness of Thumb constant islands. + if ((SecondLastOpc == ARM::BR_JTr || SecondLastOpc==ARM::BR_JTm || + SecondLastOpc == ARM::BR_JTadd || SecondLastOpc==ARM::tBR_JTr) && + (LastOpc == ARM::B || LastOpc == ARM::tB)) { + I = LastInst; + if (AllowModify) + I->eraseFromParent(); + return true; + } + + // Otherwise, can't handle this. + return true; +} + + +unsigned ARMInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { + MachineFunction &MF = *MBB.getParent(); + ARMFunctionInfo *AFI = MF.getInfo(); + int BOpc = AFI->isThumbFunction() ? ARM::tB : ARM::B; + int BccOpc = AFI->isThumbFunction() ? ARM::tBcc : ARM::Bcc; + + MachineBasicBlock::iterator I = MBB.end(); + if (I == MBB.begin()) return 0; + --I; + if (I->getOpcode() != BOpc && I->getOpcode() != BccOpc) + return 0; + + // Remove the branch. + I->eraseFromParent(); + + I = MBB.end(); + + if (I == MBB.begin()) return 1; + --I; + if (I->getOpcode() != BccOpc) + return 1; + + // Remove the branch. + I->eraseFromParent(); + return 2; +} + +unsigned +ARMInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + const SmallVectorImpl &Cond) const { + // FIXME this should probably have a DebugLoc argument + DebugLoc dl = DebugLoc::getUnknownLoc(); + MachineFunction &MF = *MBB.getParent(); + ARMFunctionInfo *AFI = MF.getInfo(); + int BOpc = AFI->isThumbFunction() ? ARM::tB : ARM::B; + int BccOpc = AFI->isThumbFunction() ? ARM::tBcc : ARM::Bcc; + + // Shouldn't be a fall through. + assert(TBB && "InsertBranch must not be told to insert a fallthrough"); + assert((Cond.size() == 2 || Cond.size() == 0) && + "ARM branch conditions have two components!"); + + if (FBB == 0) { + if (Cond.empty()) // Unconditional branch? + BuildMI(&MBB, dl, get(BOpc)).addMBB(TBB); + else + BuildMI(&MBB, dl, get(BccOpc)).addMBB(TBB) + .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()); + return 1; + } + + // Two-way conditional branch. + BuildMI(&MBB, dl, get(BccOpc)).addMBB(TBB) + .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()); + BuildMI(&MBB, dl, get(BOpc)).addMBB(FBB); + return 2; +} + +bool ARMInstrInfo::copyRegToReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned DestReg, unsigned SrcReg, + const TargetRegisterClass *DestRC, + const TargetRegisterClass *SrcRC) const { + MachineFunction &MF = *MBB.getParent(); + ARMFunctionInfo *AFI = MF.getInfo(); + DebugLoc DL = DebugLoc::getUnknownLoc(); + if (I != MBB.end()) DL = I->getDebugLoc(); + + if (!AFI->isThumbFunction()) { + if (DestRC == ARM::GPRRegisterClass) { + AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg) + .addReg(SrcReg))); + return true; + } + } else { + if (DestRC == ARM::GPRRegisterClass) { + if (SrcRC == ARM::GPRRegisterClass) { + BuildMI(MBB, I, DL, get(ARM::tMOVhir2hir), DestReg).addReg(SrcReg); + return true; + } else if (SrcRC == ARM::tGPRRegisterClass) { + BuildMI(MBB, I, DL, get(ARM::tMOVlor2hir), DestReg).addReg(SrcReg); + return true; + } + } else if (DestRC == ARM::tGPRRegisterClass) { + if (SrcRC == ARM::GPRRegisterClass) { + BuildMI(MBB, I, DL, get(ARM::tMOVhir2lor), DestReg).addReg(SrcReg); + return true; + } else if (SrcRC == ARM::tGPRRegisterClass) { + BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg).addReg(SrcReg); + return true; + } + } + } + if (DestRC != SrcRC) { + // Not yet supported! + return false; + } + + + if (DestRC == ARM::SPRRegisterClass) + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FCPYS), DestReg) + .addReg(SrcReg)); + else if (DestRC == ARM::DPRRegisterClass) + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FCPYD), DestReg) + .addReg(SrcReg)); + else + return false; + + return true; +} + +void ARMInstrInfo:: +storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + unsigned SrcReg, bool isKill, int FI, + const TargetRegisterClass *RC) const { + DebugLoc DL = DebugLoc::getUnknownLoc(); + if (I != MBB.end()) DL = I->getDebugLoc(); + + if (RC == ARM::GPRRegisterClass) { + MachineFunction &MF = *MBB.getParent(); + ARMFunctionInfo *AFI = MF.getInfo(); + assert (!AFI->isThumbFunction()); + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STR)) + .addReg(SrcReg, getKillRegState(isKill)) + .addFrameIndex(FI).addReg(0).addImm(0)); + } else if (RC == ARM::tGPRRegisterClass) { + MachineFunction &MF = *MBB.getParent(); + ARMFunctionInfo *AFI = MF.getInfo(); + assert (AFI->isThumbFunction()); + BuildMI(MBB, I, DL, get(ARM::tSpill)) + .addReg(SrcReg, getKillRegState(isKill)) + .addFrameIndex(FI).addImm(0); + } else if (RC == ARM::DPRRegisterClass) { + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FSTD)) + .addReg(SrcReg, getKillRegState(isKill)) + .addFrameIndex(FI).addImm(0)); + } else { + assert(RC == ARM::SPRRegisterClass && "Unknown regclass!"); + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FSTS)) + .addReg(SrcReg, getKillRegState(isKill)) + .addFrameIndex(FI).addImm(0)); + } +} + +void ARMInstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg, + bool isKill, + SmallVectorImpl &Addr, + const TargetRegisterClass *RC, + SmallVectorImpl &NewMIs) const{ + DebugLoc DL = DebugLoc::getUnknownLoc(); + unsigned Opc = 0; + if (RC == ARM::GPRRegisterClass) { + ARMFunctionInfo *AFI = MF.getInfo(); + if (AFI->isThumbFunction()) { + Opc = Addr[0].isFI() ? ARM::tSpill : ARM::tSTR; + MachineInstrBuilder MIB = + BuildMI(MF, DL, get(Opc)).addReg(SrcReg, getKillRegState(isKill)); + for (unsigned i = 0, e = Addr.size(); i != e; ++i) + MIB.addOperand(Addr[i]); + NewMIs.push_back(MIB); + return; + } + Opc = ARM::STR; + } else if (RC == ARM::DPRRegisterClass) { + Opc = ARM::FSTD; + } else { + assert(RC == ARM::SPRRegisterClass && "Unknown regclass!"); + Opc = ARM::FSTS; + } + + MachineInstrBuilder MIB = + BuildMI(MF, DL, get(Opc)).addReg(SrcReg, getKillRegState(isKill)); + for (unsigned i = 0, e = Addr.size(); i != e; ++i) + MIB.addOperand(Addr[i]); + AddDefaultPred(MIB); + NewMIs.push_back(MIB); + return; +} + +void ARMInstrInfo:: +loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + unsigned DestReg, int FI, + const TargetRegisterClass *RC) const { + DebugLoc DL = DebugLoc::getUnknownLoc(); + if (I != MBB.end()) DL = I->getDebugLoc(); + + if (RC == ARM::GPRRegisterClass) { + MachineFunction &MF = *MBB.getParent(); + ARMFunctionInfo *AFI = MF.getInfo(); + assert (!AFI->isThumbFunction()); + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDR), DestReg) + .addFrameIndex(FI).addReg(0).addImm(0)); + } else if (RC == ARM::tGPRRegisterClass) { + MachineFunction &MF = *MBB.getParent(); + ARMFunctionInfo *AFI = MF.getInfo(); + assert (AFI->isThumbFunction()); + BuildMI(MBB, I, DL, get(ARM::tRestore), DestReg) + .addFrameIndex(FI).addImm(0); + } else if (RC == ARM::DPRRegisterClass) { + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FLDD), DestReg) + .addFrameIndex(FI).addImm(0)); + } else { + assert(RC == ARM::SPRRegisterClass && "Unknown regclass!"); + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FLDS), DestReg) + .addFrameIndex(FI).addImm(0)); + } +} + +void ARMInstrInfo:: +loadRegFromAddr(MachineFunction &MF, unsigned DestReg, + SmallVectorImpl &Addr, + const TargetRegisterClass *RC, + SmallVectorImpl &NewMIs) const { + DebugLoc DL = DebugLoc::getUnknownLoc(); + unsigned Opc = 0; + if (RC == ARM::GPRRegisterClass) { + ARMFunctionInfo *AFI = MF.getInfo(); + if (AFI->isThumbFunction()) { + Opc = Addr[0].isFI() ? ARM::tRestore : ARM::tLDR; + MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg); + for (unsigned i = 0, e = Addr.size(); i != e; ++i) + MIB.addOperand(Addr[i]); + NewMIs.push_back(MIB); + return; + } + Opc = ARM::LDR; + } else if (RC == ARM::DPRRegisterClass) { + Opc = ARM::FLDD; + } else { + assert(RC == ARM::SPRRegisterClass && "Unknown regclass!"); + Opc = ARM::FLDS; + } + + MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg); + for (unsigned i = 0, e = Addr.size(); i != e; ++i) + MIB.addOperand(Addr[i]); + AddDefaultPred(MIB); + NewMIs.push_back(MIB); + return; +} + +bool ARMInstrInfo:: +spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector &CSI) const { + MachineFunction &MF = *MBB.getParent(); + ARMFunctionInfo *AFI = MF.getInfo(); + if (!AFI->isThumbFunction() || CSI.empty()) + return false; + + DebugLoc DL = DebugLoc::getUnknownLoc(); + if (MI != MBB.end()) DL = MI->getDebugLoc(); + + MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, get(ARM::tPUSH)); + for (unsigned i = CSI.size(); i != 0; --i) { + unsigned Reg = CSI[i-1].getReg(); + // Add the callee-saved register as live-in. It's killed at the spill. + MBB.addLiveIn(Reg); + MIB.addReg(Reg, RegState::Kill); + } + return true; +} + +bool ARMInstrInfo:: +restoreCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector &CSI) const { + MachineFunction &MF = *MBB.getParent(); + ARMFunctionInfo *AFI = MF.getInfo(); + if (!AFI->isThumbFunction() || CSI.empty()) + return false; + + bool isVarArg = AFI->getVarArgsRegSaveSize() > 0; + MachineInstr *PopMI = MF.CreateMachineInstr(get(ARM::tPOP),MI->getDebugLoc()); + MBB.insert(MI, PopMI); + for (unsigned i = CSI.size(); i != 0; --i) { + unsigned Reg = CSI[i-1].getReg(); + if (Reg == ARM::LR) { + // Special epilogue for vararg functions. See emitEpilogue + if (isVarArg) + continue; + Reg = ARM::PC; + PopMI->setDesc(get(ARM::tPOP_RET)); + MBB.erase(MI); + } + PopMI->addOperand(MachineOperand::CreateReg(Reg, true)); + } + return true; +} + +MachineInstr *ARMInstrInfo:: +foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, + const SmallVectorImpl &Ops, int FI) const { + if (Ops.size() != 1) return NULL; + + unsigned OpNum = Ops[0]; + unsigned Opc = MI->getOpcode(); + MachineInstr *NewMI = NULL; + switch (Opc) { + default: break; + case ARM::MOVr: { + if (MI->getOperand(4).getReg() == ARM::CPSR) + // If it is updating CPSR, then it cannot be folded. + break; + unsigned Pred = MI->getOperand(2).getImm(); + unsigned PredReg = MI->getOperand(3).getReg(); + if (OpNum == 0) { // move -> store + unsigned SrcReg = MI->getOperand(1).getReg(); + bool isKill = MI->getOperand(1).isKill(); + NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::STR)) + .addReg(SrcReg, getKillRegState(isKill)) + .addFrameIndex(FI).addReg(0).addImm(0).addImm(Pred).addReg(PredReg); + } else { // move -> load + unsigned DstReg = MI->getOperand(0).getReg(); + bool isDead = MI->getOperand(0).isDead(); + NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::LDR)) + .addReg(DstReg, RegState::Define | getDeadRegState(isDead)) + .addFrameIndex(FI).addReg(0).addImm(0).addImm(Pred).addReg(PredReg); + } + break; + } + case ARM::tMOVr: + case ARM::tMOVlor2hir: + case ARM::tMOVhir2lor: + case ARM::tMOVhir2hir: { + if (OpNum == 0) { // move -> store + unsigned SrcReg = MI->getOperand(1).getReg(); + bool isKill = MI->getOperand(1).isKill(); + if (RI.isPhysicalRegister(SrcReg) && !RI.isLowRegister(SrcReg)) + // tSpill cannot take a high register operand. + break; + NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::tSpill)) + .addReg(SrcReg, getKillRegState(isKill)) + .addFrameIndex(FI).addImm(0); + } else { // move -> load + unsigned DstReg = MI->getOperand(0).getReg(); + if (RI.isPhysicalRegister(DstReg) && !RI.isLowRegister(DstReg)) + // tRestore cannot target a high register operand. + break; + bool isDead = MI->getOperand(0).isDead(); + NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::tRestore)) + .addReg(DstReg, RegState::Define | getDeadRegState(isDead)) + .addFrameIndex(FI).addImm(0); + } + break; + } + case ARM::FCPYS: { + unsigned Pred = MI->getOperand(2).getImm(); + unsigned PredReg = MI->getOperand(3).getReg(); + if (OpNum == 0) { // move -> store + unsigned SrcReg = MI->getOperand(1).getReg(); + NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::FSTS)) + .addReg(SrcReg).addFrameIndex(FI) + .addImm(0).addImm(Pred).addReg(PredReg); + } else { // move -> load + unsigned DstReg = MI->getOperand(0).getReg(); + NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::FLDS), DstReg) + .addFrameIndex(FI) + .addImm(0).addImm(Pred).addReg(PredReg); + } + break; + } + case ARM::FCPYD: { + unsigned Pred = MI->getOperand(2).getImm(); + unsigned PredReg = MI->getOperand(3).getReg(); + if (OpNum == 0) { // move -> store + unsigned SrcReg = MI->getOperand(1).getReg(); + bool isKill = MI->getOperand(1).isKill(); + NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::FSTD)) + .addReg(SrcReg, getKillRegState(isKill)) + .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg); + } else { // move -> load + unsigned DstReg = MI->getOperand(0).getReg(); + bool isDead = MI->getOperand(0).isDead(); + NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::FLDD)) + .addReg(DstReg, RegState::Define | getDeadRegState(isDead)) + .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg); + } + break; + } + } + + return NewMI; +} + +bool ARMInstrInfo:: +canFoldMemoryOperand(const MachineInstr *MI, + const SmallVectorImpl &Ops) const { + if (Ops.size() != 1) return false; + + unsigned OpNum = Ops[0]; + unsigned Opc = MI->getOpcode(); + switch (Opc) { + default: break; + case ARM::MOVr: + // If it is updating CPSR, then it cannot be folded. + return MI->getOperand(4).getReg() != ARM::CPSR; + case ARM::tMOVr: + case ARM::tMOVlor2hir: + case ARM::tMOVhir2lor: + case ARM::tMOVhir2hir: { + if (OpNum == 0) { // move -> store + unsigned SrcReg = MI->getOperand(1).getReg(); + if (RI.isPhysicalRegister(SrcReg) && !RI.isLowRegister(SrcReg)) + // tSpill cannot take a high register operand. + return false; + } else { // move -> load + unsigned DstReg = MI->getOperand(0).getReg(); + if (RI.isPhysicalRegister(DstReg) && !RI.isLowRegister(DstReg)) + // tRestore cannot target a high register operand. + return false; + } + return true; + } + case ARM::FCPYS: + case ARM::FCPYD: + return true; + } + + return false; +} + +bool ARMInstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const { + if (MBB.empty()) return false; + + switch (MBB.back().getOpcode()) { + case ARM::BX_RET: // Return. + case ARM::LDM_RET: + case ARM::tBX_RET: + case ARM::tBX_RET_vararg: + case ARM::tPOP_RET: + case ARM::B: + case ARM::tB: // Uncond branch. + case ARM::tBR_JTr: + case ARM::BR_JTr: // Jumptable branch. + case ARM::BR_JTm: // Jumptable branch through mem. + case ARM::BR_JTadd: // Jumptable branch add to pc. + return true; + default: return false; + } +} + +bool ARMInstrInfo:: +ReverseBranchCondition(SmallVectorImpl &Cond) const { + ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm(); + Cond[0].setImm(ARMCC::getOppositeCondition(CC)); + return false; +} + +bool ARMInstrInfo::isPredicated(const MachineInstr *MI) const { + int PIdx = MI->findFirstPredOperandIdx(); + return PIdx != -1 && MI->getOperand(PIdx).getImm() != ARMCC::AL; +} + +bool ARMInstrInfo:: +PredicateInstruction(MachineInstr *MI, + const SmallVectorImpl &Pred) const { + unsigned Opc = MI->getOpcode(); + if (Opc == ARM::B || Opc == ARM::tB) { + MI->setDesc(get(Opc == ARM::B ? ARM::Bcc : ARM::tBcc)); + MI->addOperand(MachineOperand::CreateImm(Pred[0].getImm())); + MI->addOperand(MachineOperand::CreateReg(Pred[1].getReg(), false)); + return true; + } + + int PIdx = MI->findFirstPredOperandIdx(); + if (PIdx != -1) { + MachineOperand &PMO = MI->getOperand(PIdx); + PMO.setImm(Pred[0].getImm()); + MI->getOperand(PIdx+1).setReg(Pred[1].getReg()); + return true; + } + return false; +} + +bool ARMInstrInfo:: +SubsumesPredicate(const SmallVectorImpl &Pred1, + const SmallVectorImpl &Pred2) const { + if (Pred1.size() > 2 || Pred2.size() > 2) + return false; + + ARMCC::CondCodes CC1 = (ARMCC::CondCodes)Pred1[0].getImm(); + ARMCC::CondCodes CC2 = (ARMCC::CondCodes)Pred2[0].getImm(); + if (CC1 == CC2) + return true; + + switch (CC1) { + default: + return false; + case ARMCC::AL: + return true; + case ARMCC::HS: + return CC2 == ARMCC::HI; + case ARMCC::LS: + return CC2 == ARMCC::LO || CC2 == ARMCC::EQ; + case ARMCC::GE: + return CC2 == ARMCC::GT; + case ARMCC::LE: + return CC2 == ARMCC::LT; + } +} + +bool ARMInstrInfo::DefinesPredicate(MachineInstr *MI, + std::vector &Pred) const { + const TargetInstrDesc &TID = MI->getDesc(); + if (!TID.getImplicitDefs() && !TID.hasOptionalDef()) + return false; + + bool Found = false; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.getReg() == ARM::CPSR) { + Pred.push_back(MO); + Found = true; + } + } + + return Found; +} + + +/// FIXME: Works around a gcc miscompilation with -fstrict-aliasing +static unsigned getNumJTEntries(const std::vector &JT, + unsigned JTI) DISABLE_INLINE; +static unsigned getNumJTEntries(const std::vector &JT, + unsigned JTI) { + return JT[JTI].MBBs.size(); +} + +/// GetInstSize - Return the size of the specified MachineInstr. +/// +unsigned ARMInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { + const MachineBasicBlock &MBB = *MI->getParent(); + const MachineFunction *MF = MBB.getParent(); + const TargetAsmInfo *TAI = MF->getTarget().getTargetAsmInfo(); + + // Basic size info comes from the TSFlags field. + const TargetInstrDesc &TID = MI->getDesc(); + unsigned TSFlags = TID.TSFlags; + + switch ((TSFlags & ARMII::SizeMask) >> ARMII::SizeShift) { + default: { + // If this machine instr is an inline asm, measure it. + if (MI->getOpcode() == ARM::INLINEASM) + return TAI->getInlineAsmLength(MI->getOperand(0).getSymbolName()); + if (MI->isLabel()) + return 0; + switch (MI->getOpcode()) { + default: + assert(0 && "Unknown or unset size field for instr!"); + break; + case TargetInstrInfo::IMPLICIT_DEF: + case TargetInstrInfo::DECLARE: + case TargetInstrInfo::DBG_LABEL: + case TargetInstrInfo::EH_LABEL: + return 0; + } + break; + } + case ARMII::Size8Bytes: return 8; // Arm instruction x 2. + case ARMII::Size4Bytes: return 4; // Arm instruction. + case ARMII::Size2Bytes: return 2; // Thumb instruction. + case ARMII::SizeSpecial: { + switch (MI->getOpcode()) { + case ARM::CONSTPOOL_ENTRY: + // If this machine instr is a constant pool entry, its size is recorded as + // operand #2. + return MI->getOperand(2).getImm(); + case ARM::Int_eh_sjlj_setjmp: return 12; + case ARM::BR_JTr: + case ARM::BR_JTm: + case ARM::BR_JTadd: + case ARM::tBR_JTr: { + // These are jumptable branches, i.e. a branch followed by an inlined + // jumptable. The size is 4 + 4 * number of entries. + unsigned NumOps = TID.getNumOperands(); + MachineOperand JTOP = + MI->getOperand(NumOps - (TID.isPredicable() ? 3 : 2)); + unsigned JTI = JTOP.getIndex(); + const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); + const std::vector &JT = MJTI->getJumpTables(); + assert(JTI < JT.size()); + // Thumb instructions are 2 byte aligned, but JT entries are 4 byte + // 4 aligned. The assembler / linker may add 2 byte padding just before + // the JT entries. The size does not include this padding; the + // constant islands pass does separate bookkeeping for it. + // FIXME: If we know the size of the function is less than (1 << 16) *2 + // bytes, we can use 16-bit entries instead. Then there won't be an + // alignment issue. + return getNumJTEntries(JT, JTI) * 4 + + (MI->getOpcode()==ARM::tBR_JTr ? 2 : 4); + } + default: + // Otherwise, pseudo-instruction sizes are zero. + return 0; + } + } + } + return 0; // Not reached +} diff --git a/lib/Target/ARM/ARMInstrInfo.h b/lib/Target/ARM/ARMInstrInfo.h new file mode 100644 index 000000000000..13ff3fea84be --- /dev/null +++ b/lib/Target/ARM/ARMInstrInfo.h @@ -0,0 +1,258 @@ +//===- ARMInstrInfo.h - ARM Instruction Information -------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the ARM implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef ARMINSTRUCTIONINFO_H +#define ARMINSTRUCTIONINFO_H + +#include "llvm/Target/TargetInstrInfo.h" +#include "ARMRegisterInfo.h" +#include "ARM.h" + +namespace llvm { + class ARMSubtarget; + +/// ARMII - This namespace holds all of the target specific flags that +/// instruction info tracks. +/// +namespace ARMII { + enum { + //===------------------------------------------------------------------===// + // Instruction Flags. + + //===------------------------------------------------------------------===// + // This four-bit field describes the addressing mode used. + + AddrModeMask = 0xf, + AddrModeNone = 0, + AddrMode1 = 1, + AddrMode2 = 2, + AddrMode3 = 3, + AddrMode4 = 4, + AddrMode5 = 5, + AddrModeT1 = 6, + AddrModeT2 = 7, + AddrModeT4 = 8, + AddrModeTs = 9, // i8 * 4 for pc and sp relative data + + // Size* - Flags to keep track of the size of an instruction. + SizeShift = 4, + SizeMask = 7 << SizeShift, + SizeSpecial = 1, // 0 byte pseudo or special case. + Size8Bytes = 2, + Size4Bytes = 3, + Size2Bytes = 4, + + // IndexMode - Unindex, pre-indexed, or post-indexed. Only valid for load + // and store ops + IndexModeShift = 7, + IndexModeMask = 3 << IndexModeShift, + IndexModePre = 1, + IndexModePost = 2, + + //===------------------------------------------------------------------===// + // Misc flags. + + // UnaryDP - Indicates this is a unary data processing instruction, i.e. + // it doesn't have a Rn operand. + UnaryDP = 1 << 9, + + //===------------------------------------------------------------------===// + // Instruction encoding formats. + // + FormShift = 10, + FormMask = 0x1f << FormShift, + + // Pseudo instructions + Pseudo = 0 << FormShift, + + // Multiply instructions + MulFrm = 1 << FormShift, + + // Branch instructions + BrFrm = 2 << FormShift, + BrMiscFrm = 3 << FormShift, + + // Data Processing instructions + DPFrm = 4 << FormShift, + DPSoRegFrm = 5 << FormShift, + + // Load and Store + LdFrm = 6 << FormShift, + StFrm = 7 << FormShift, + LdMiscFrm = 8 << FormShift, + StMiscFrm = 9 << FormShift, + LdStMulFrm = 10 << FormShift, + + // Miscellaneous arithmetic instructions + ArithMiscFrm = 11 << FormShift, + + // Extend instructions + ExtFrm = 12 << FormShift, + + // VFP formats + VFPUnaryFrm = 13 << FormShift, + VFPBinaryFrm = 14 << FormShift, + VFPConv1Frm = 15 << FormShift, + VFPConv2Frm = 16 << FormShift, + VFPConv3Frm = 17 << FormShift, + VFPConv4Frm = 18 << FormShift, + VFPConv5Frm = 19 << FormShift, + VFPLdStFrm = 20 << FormShift, + VFPLdStMulFrm = 21 << FormShift, + VFPMiscFrm = 22 << FormShift, + + // Thumb format + ThumbFrm = 23 << FormShift, + + //===------------------------------------------------------------------===// + // Field shifts - such shifts are used to set field while generating + // machine instructions. + M_BitShift = 5, + ShiftImmShift = 5, + ShiftShift = 7, + N_BitShift = 7, + ImmHiShift = 8, + SoRotImmShift = 8, + RegRsShift = 8, + ExtRotImmShift = 10, + RegRdLoShift = 12, + RegRdShift = 12, + RegRdHiShift = 16, + RegRnShift = 16, + S_BitShift = 20, + W_BitShift = 21, + AM3_I_BitShift = 22, + D_BitShift = 22, + U_BitShift = 23, + P_BitShift = 24, + I_BitShift = 25, + CondShift = 28 + }; +} + +class ARMInstrInfo : public TargetInstrInfoImpl { + const ARMRegisterInfo RI; +public: + explicit ARMInstrInfo(const ARMSubtarget &STI); + + /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As + /// such, whenever a client has an instance of instruction info, it should + /// always be able to get register info as well (through this method). + /// + virtual const ARMRegisterInfo &getRegisterInfo() const { return RI; } + + /// Return true if the instruction is a register to register move and return + /// the source and dest operands and their sub-register indices by reference. + virtual bool isMoveInstr(const MachineInstr &MI, + unsigned &SrcReg, unsigned &DstReg, + unsigned &SrcSubIdx, unsigned &DstSubIdx) const; + + virtual unsigned isLoadFromStackSlot(const MachineInstr *MI, + int &FrameIndex) const; + virtual unsigned isStoreToStackSlot(const MachineInstr *MI, + int &FrameIndex) const; + + void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + unsigned DestReg, const MachineInstr *Orig) const; + + virtual MachineInstr *convertToThreeAddress(MachineFunction::iterator &MFI, + MachineBasicBlock::iterator &MBBI, + LiveVariables *LV) const; + + // Branch analysis. + virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl &Cond, + bool AllowModify) const; + virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const; + virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + const SmallVectorImpl &Cond) const; + virtual bool copyRegToReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned DestReg, unsigned SrcReg, + const TargetRegisterClass *DestRC, + const TargetRegisterClass *SrcRC) const; + virtual void storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned SrcReg, bool isKill, int FrameIndex, + const TargetRegisterClass *RC) const; + + virtual void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill, + SmallVectorImpl &Addr, + const TargetRegisterClass *RC, + SmallVectorImpl &NewMIs) const; + + virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned DestReg, int FrameIndex, + const TargetRegisterClass *RC) const; + + virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg, + SmallVectorImpl &Addr, + const TargetRegisterClass *RC, + SmallVectorImpl &NewMIs) const; + virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector &CSI) const; + virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector &CSI) const; + + virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr* MI, + const SmallVectorImpl &Ops, + int FrameIndex) const; + + virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr* MI, + const SmallVectorImpl &Ops, + MachineInstr* LoadMI) const { + return 0; + } + + virtual bool canFoldMemoryOperand(const MachineInstr *MI, + const SmallVectorImpl &Ops) const; + + virtual bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const; + virtual + bool ReverseBranchCondition(SmallVectorImpl &Cond) const; + + // Predication support. + virtual bool isPredicated(const MachineInstr *MI) const; + + ARMCC::CondCodes getPredicate(const MachineInstr *MI) const { + int PIdx = MI->findFirstPredOperandIdx(); + return PIdx != -1 ? (ARMCC::CondCodes)MI->getOperand(PIdx).getImm() + : ARMCC::AL; + } + + virtual + bool PredicateInstruction(MachineInstr *MI, + const SmallVectorImpl &Pred) const; + + virtual + bool SubsumesPredicate(const SmallVectorImpl &Pred1, + const SmallVectorImpl &Pred2) const; + + virtual bool DefinesPredicate(MachineInstr *MI, + std::vector &Pred) const; + + /// GetInstSize - Returns the size of the specified MachineInstr. + /// + virtual unsigned GetInstSizeInBytes(const MachineInstr* MI) const; +}; + +} + +#endif diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td new file mode 100644 index 000000000000..680e77234db2 --- /dev/null +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -0,0 +1,1390 @@ +//===- ARMInstrInfo.td - Target Description for ARM Target -*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the ARM instructions in TableGen format. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// ARM specific DAG Nodes. +// + +// Type profiles. +def SDT_ARMCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>; +def SDT_ARMCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>, SDTCisVT<1, i32> ]>; + +def SDT_ARMSaveCallPC : SDTypeProfile<0, 1, []>; + +def SDT_ARMcall : SDTypeProfile<0, -1, [SDTCisInt<0>]>; + +def SDT_ARMCMov : SDTypeProfile<1, 3, + [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, + SDTCisVT<3, i32>]>; + +def SDT_ARMBrcond : SDTypeProfile<0, 2, + [SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>]>; + +def SDT_ARMBrJT : SDTypeProfile<0, 3, + [SDTCisPtrTy<0>, SDTCisVT<1, i32>, + SDTCisVT<2, i32>]>; + +def SDT_ARMCmp : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>; + +def SDT_ARMPICAdd : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, + SDTCisPtrTy<1>, SDTCisVT<2, i32>]>; + +def SDT_ARMThreadPointer : SDTypeProfile<1, 0, [SDTCisPtrTy<0>]>; +def SDT_ARMEH_SJLJ_Setjmp : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisPtrTy<1>]>; + +// Node definitions. +def ARMWrapper : SDNode<"ARMISD::Wrapper", SDTIntUnaryOp>; +def ARMWrapperJT : SDNode<"ARMISD::WrapperJT", SDTIntBinOp>; + +def ARMcallseq_start : SDNode<"ISD::CALLSEQ_START", SDT_ARMCallSeqStart, + [SDNPHasChain, SDNPOutFlag]>; +def ARMcallseq_end : SDNode<"ISD::CALLSEQ_END", SDT_ARMCallSeqEnd, + [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; + +def ARMcall : SDNode<"ARMISD::CALL", SDT_ARMcall, + [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; +def ARMcall_pred : SDNode<"ARMISD::CALL_PRED", SDT_ARMcall, + [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; +def ARMcall_nolink : SDNode<"ARMISD::CALL_NOLINK", SDT_ARMcall, + [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; + +def ARMretflag : SDNode<"ARMISD::RET_FLAG", SDTNone, + [SDNPHasChain, SDNPOptInFlag]>; + +def ARMcmov : SDNode<"ARMISD::CMOV", SDT_ARMCMov, + [SDNPInFlag]>; +def ARMcneg : SDNode<"ARMISD::CNEG", SDT_ARMCMov, + [SDNPInFlag]>; + +def ARMbrcond : SDNode<"ARMISD::BRCOND", SDT_ARMBrcond, + [SDNPHasChain, SDNPInFlag, SDNPOutFlag]>; + +def ARMbrjt : SDNode<"ARMISD::BR_JT", SDT_ARMBrJT, + [SDNPHasChain]>; + +def ARMcmp : SDNode<"ARMISD::CMP", SDT_ARMCmp, + [SDNPOutFlag]>; + +def ARMcmpNZ : SDNode<"ARMISD::CMPNZ", SDT_ARMCmp, + [SDNPOutFlag]>; + +def ARMpic_add : SDNode<"ARMISD::PIC_ADD", SDT_ARMPICAdd>; + +def ARMsrl_flag : SDNode<"ARMISD::SRL_FLAG", SDTIntUnaryOp, [SDNPOutFlag]>; +def ARMsra_flag : SDNode<"ARMISD::SRA_FLAG", SDTIntUnaryOp, [SDNPOutFlag]>; +def ARMrrx : SDNode<"ARMISD::RRX" , SDTIntUnaryOp, [SDNPInFlag ]>; + +def ARMthread_pointer: SDNode<"ARMISD::THREAD_POINTER", SDT_ARMThreadPointer>; +def ARMeh_sjlj_setjmp: SDNode<"ARMISD::EH_SJLJ_SETJMP", SDT_ARMEH_SJLJ_Setjmp>; + +//===----------------------------------------------------------------------===// +// ARM Instruction Predicate Definitions. +// +def HasV5T : Predicate<"Subtarget->hasV5TOps()">; +def HasV5TE : Predicate<"Subtarget->hasV5TEOps()">; +def HasV6 : Predicate<"Subtarget->hasV6Ops()">; +def IsThumb : Predicate<"Subtarget->isThumb()">; +def IsThumb2 : Predicate<"Subtarget->isThumb2()">; +def IsARM : Predicate<"!Subtarget->isThumb()">; + +//===----------------------------------------------------------------------===// +// ARM Flag Definitions. + +class RegConstraint { + string Constraints = C; +} + +//===----------------------------------------------------------------------===// +// ARM specific transformation functions and pattern fragments. +// + +// so_imm_XFORM - Return a so_imm value packed into the format described for +// so_imm def below. +def so_imm_XFORM : SDNodeXFormgetTargetConstant(ARM_AM::getSOImmVal(N->getZExtValue()), + MVT::i32); +}]>; + +// so_imm_neg_XFORM - Return a so_imm value packed into the format described for +// so_imm_neg def below. +def so_imm_neg_XFORM : SDNodeXFormgetTargetConstant(ARM_AM::getSOImmVal(-(int)N->getZExtValue()), + MVT::i32); +}]>; + +// so_imm_not_XFORM - Return a so_imm value packed into the format described for +// so_imm_not def below. +def so_imm_not_XFORM : SDNodeXFormgetTargetConstant(ARM_AM::getSOImmVal(~(int)N->getZExtValue()), + MVT::i32); +}]>; + +// rot_imm predicate - True if the 32-bit immediate is equal to 8, 16, or 24. +def rot_imm : PatLeaf<(i32 imm), [{ + int32_t v = (int32_t)N->getZExtValue(); + return v == 8 || v == 16 || v == 24; +}]>; + +/// imm1_15 predicate - True if the 32-bit immediate is in the range [1,15]. +def imm1_15 : PatLeaf<(i32 imm), [{ + return (int32_t)N->getZExtValue() >= 1 && (int32_t)N->getZExtValue() < 16; +}]>; + +/// imm16_31 predicate - True if the 32-bit immediate is in the range [16,31]. +def imm16_31 : PatLeaf<(i32 imm), [{ + return (int32_t)N->getZExtValue() >= 16 && (int32_t)N->getZExtValue() < 32; +}]>; + +def so_imm_neg : + PatLeaf<(imm), [{ + return ARM_AM::getSOImmVal(-(int)N->getZExtValue()) != -1; + }], so_imm_neg_XFORM>; + +def so_imm_not : + PatLeaf<(imm), [{ + return ARM_AM::getSOImmVal(~(int)N->getZExtValue()) != -1; + }], so_imm_not_XFORM>; + +// sext_16_node predicate - True if the SDNode is sign-extended 16 or more bits. +def sext_16_node : PatLeaf<(i32 GPR:$a), [{ + return CurDAG->ComputeNumSignBits(SDValue(N,0)) >= 17; +}]>; + +class BinOpFrag : PatFrag<(ops node:$LHS, node:$RHS), res>; +class UnOpFrag : PatFrag<(ops node:$Src), res>; + +//===----------------------------------------------------------------------===// +// Operand Definitions. +// + +// Branch target. +def brtarget : Operand; + +// A list of registers separated by comma. Used by load/store multiple. +def reglist : Operand { + let PrintMethod = "printRegisterList"; +} + +// An operand for the CONSTPOOL_ENTRY pseudo-instruction. +def cpinst_operand : Operand { + let PrintMethod = "printCPInstOperand"; +} + +def jtblock_operand : Operand { + let PrintMethod = "printJTBlockOperand"; +} + +// Local PC labels. +def pclabel : Operand { + let PrintMethod = "printPCLabel"; +} + +// shifter_operand operands: so_reg and so_imm. +def so_reg : Operand, // reg reg imm + ComplexPattern { + let PrintMethod = "printSORegOperand"; + let MIOperandInfo = (ops GPR, GPR, i32imm); +} + +// so_imm - Match a 32-bit shifter_operand immediate operand, which is an +// 8-bit immediate rotated by an arbitrary number of bits. so_imm values are +// represented in the imm field in the same 12-bit form that they are encoded +// into so_imm instructions: the 8-bit immediate is the least significant bits +// [bits 0-7], the 4-bit shift amount is the next 4 bits [bits 8-11]. +def so_imm : Operand, + PatLeaf<(imm), + [{ return ARM_AM::getSOImmVal(N->getZExtValue()) != -1; }], + so_imm_XFORM> { + let PrintMethod = "printSOImmOperand"; +} + +// Break so_imm's up into two pieces. This handles immediates with up to 16 +// bits set in them. This uses so_imm2part to match and so_imm2part_[12] to +// get the first/second pieces. +def so_imm2part : Operand, + PatLeaf<(imm), [{ + return ARM_AM::isSOImmTwoPartVal((unsigned)N->getZExtValue()); + }]> { + let PrintMethod = "printSOImm2PartOperand"; +} + +def so_imm2part_1 : SDNodeXFormgetZExtValue()); + return CurDAG->getTargetConstant(ARM_AM::getSOImmVal(V), MVT::i32); +}]>; + +def so_imm2part_2 : SDNodeXFormgetZExtValue()); + return CurDAG->getTargetConstant(ARM_AM::getSOImmVal(V), MVT::i32); +}]>; + + +// Define ARM specific addressing modes. + +// addrmode2 := reg +/- reg shop imm +// addrmode2 := reg +/- imm12 +// +def addrmode2 : Operand, + ComplexPattern { + let PrintMethod = "printAddrMode2Operand"; + let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm); +} + +def am2offset : Operand, + ComplexPattern { + let PrintMethod = "printAddrMode2OffsetOperand"; + let MIOperandInfo = (ops GPR, i32imm); +} + +// addrmode3 := reg +/- reg +// addrmode3 := reg +/- imm8 +// +def addrmode3 : Operand, + ComplexPattern { + let PrintMethod = "printAddrMode3Operand"; + let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm); +} + +def am3offset : Operand, + ComplexPattern { + let PrintMethod = "printAddrMode3OffsetOperand"; + let MIOperandInfo = (ops GPR, i32imm); +} + +// addrmode4 := reg, +// +def addrmode4 : Operand, + ComplexPattern { + let PrintMethod = "printAddrMode4Operand"; + let MIOperandInfo = (ops GPR, i32imm); +} + +// addrmode5 := reg +/- imm8*4 +// +def addrmode5 : Operand, + ComplexPattern { + let PrintMethod = "printAddrMode5Operand"; + let MIOperandInfo = (ops GPR, i32imm); +} + +// addrmodepc := pc + reg +// +def addrmodepc : Operand, + ComplexPattern { + let PrintMethod = "printAddrModePCOperand"; + let MIOperandInfo = (ops GPR, i32imm); +} + +// ARM Predicate operand. Default to 14 = always (AL). Second part is CC +// register whose default is 0 (no register). +def pred : PredicateOperand { + let PrintMethod = "printPredicateOperand"; +} + +// Conditional code result for instructions whose 's' bit is set, e.g. subs. +// +def cc_out : OptionalDefOperand { + let PrintMethod = "printSBitModifierOperand"; +} + +//===----------------------------------------------------------------------===// +// ARM Instruction flags. These need to match ARMInstrInfo.h. +// + +// Addressing mode. +class AddrMode val> { + bits<4> Value = val; +} +def AddrModeNone : AddrMode<0>; +def AddrMode1 : AddrMode<1>; +def AddrMode2 : AddrMode<2>; +def AddrMode3 : AddrMode<3>; +def AddrMode4 : AddrMode<4>; +def AddrMode5 : AddrMode<5>; +def AddrModeT1 : AddrMode<6>; +def AddrModeT2 : AddrMode<7>; +def AddrModeT4 : AddrMode<8>; +def AddrModeTs : AddrMode<9>; + +// Instruction size. +class SizeFlagVal val> { + bits<3> Value = val; +} +def SizeInvalid : SizeFlagVal<0>; // Unset. +def SizeSpecial : SizeFlagVal<1>; // Pseudo or special. +def Size8Bytes : SizeFlagVal<2>; +def Size4Bytes : SizeFlagVal<3>; +def Size2Bytes : SizeFlagVal<4>; + +// Load / store index mode. +class IndexMode val> { + bits<2> Value = val; +} +def IndexModeNone : IndexMode<0>; +def IndexModePre : IndexMode<1>; +def IndexModePost : IndexMode<2>; + +//===----------------------------------------------------------------------===// + +include "ARMInstrFormats.td" + +//===----------------------------------------------------------------------===// +// Multiclass helpers... +// + +/// AsI1_bin_irs - Defines a set of (op r, {so_imm|r|so_reg}) patterns for a +/// binop that produces a value. +multiclass AsI1_bin_irs opcod, string opc, PatFrag opnode> { + def ri : AsI1; + def rr : AsI1; + def rs : AsI1; +} + +/// ASI1_bin_s_irs - Similar to AsI1_bin_irs except it sets the 's' bit so the +/// instruction modifies the CSPR register. +let Defs = [CPSR] in { +multiclass ASI1_bin_s_irs opcod, string opc, PatFrag opnode> { + def ri : AI1; + def rr : AI1; + def rs : AI1; +} +} + +/// AI1_cmp_irs - Defines a set of (op r, {so_imm|r|so_reg}) cmp / test +/// patterns. Similar to AsI1_bin_irs except the instruction does not produce +/// a explicit result, only implicitly set CPSR. +let Defs = [CPSR] in { +multiclass AI1_cmp_irs opcod, string opc, PatFrag opnode> { + def ri : AI1; + def rr : AI1; + def rs : AI1; +} +} + +/// AI_unary_rrot - A unary operation with two forms: one whose operand is a +/// register and one whose operand is a register rotated by 8/16/24. +/// FIXME: Remove the 'r' variant. Its rot_imm is zero. +multiclass AI_unary_rrot opcod, string opc, PatFrag opnode> { + def r : AExtI, + Requires<[IsARM, HasV6]> { + let Inst{19-16} = 0b1111; + } + def r_rot : AExtI, + Requires<[IsARM, HasV6]> { + let Inst{19-16} = 0b1111; + } +} + +/// AI_bin_rrot - A binary operation with two forms: one whose operand is a +/// register and one whose operand is a register rotated by 8/16/24. +multiclass AI_bin_rrot opcod, string opc, PatFrag opnode> { + def rr : AExtI, + Requires<[IsARM, HasV6]>; + def rr_rot : AExtI, + Requires<[IsARM, HasV6]>; +} + +/// AsXI1_bin_c_irs - Same as AsI1_bin_irs but without the predicate operand and +/// setting carry bit. But it can optionally set CPSR. +let Uses = [CPSR] in { +multiclass AsXI1_bin_c_irs opcod, string opc, PatFrag opnode> { + def ri : AXI1; + def rr : AXI1; + def rs : AXI1; +} +} + +//===----------------------------------------------------------------------===// +// Instructions +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Miscellaneous Instructions. +// + +/// CONSTPOOL_ENTRY - This instruction represents a floating constant pool in +/// the function. The first operand is the ID# for this instruction, the second +/// is the index into the MachineConstantPool that this is, the third is the +/// size in bytes of this constant pool entry. +let isNotDuplicable = 1 in +def CONSTPOOL_ENTRY : +PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx, + i32imm:$size), + "${instid:label} ${cpidx:cpentry}", []>; + +let Defs = [SP], Uses = [SP] in { +def ADJCALLSTACKUP : +PseudoInst<(outs), (ins i32imm:$amt1, i32imm:$amt2, pred:$p), + "@ ADJCALLSTACKUP $amt1", + [(ARMcallseq_end timm:$amt1, timm:$amt2)]>; + +def ADJCALLSTACKDOWN : +PseudoInst<(outs), (ins i32imm:$amt, pred:$p), + "@ ADJCALLSTACKDOWN $amt", + [(ARMcallseq_start timm:$amt)]>; +} + +def DWARF_LOC : +PseudoInst<(outs), (ins i32imm:$line, i32imm:$col, i32imm:$file), + ".loc $file, $line, $col", + [(dwarf_loc (i32 imm:$line), (i32 imm:$col), (i32 imm:$file))]>; + + +// Address computation and loads and stores in PIC mode. +let isNotDuplicable = 1 in { +def PICADD : AXI1<0b0100, (outs GPR:$dst), (ins GPR:$a, pclabel:$cp, pred:$p), + Pseudo, "$cp:\n\tadd$p $dst, pc, $a", + [(set GPR:$dst, (ARMpic_add GPR:$a, imm:$cp))]>; + +let AddedComplexity = 10 in { +let canFoldAsLoad = 1 in +def PICLDR : AXI2ldw<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p), + Pseudo, "${addr:label}:\n\tldr$p $dst, $addr", + [(set GPR:$dst, (load addrmodepc:$addr))]>; + +def PICLDRH : AXI3ldh<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p), + Pseudo, "${addr:label}:\n\tldr${p}h $dst, $addr", + [(set GPR:$dst, (zextloadi16 addrmodepc:$addr))]>; + +def PICLDRB : AXI2ldb<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p), + Pseudo, "${addr:label}:\n\tldr${p}b $dst, $addr", + [(set GPR:$dst, (zextloadi8 addrmodepc:$addr))]>; + +def PICLDRSH : AXI3ldsh<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p), + Pseudo, "${addr:label}:\n\tldr${p}sh $dst, $addr", + [(set GPR:$dst, (sextloadi16 addrmodepc:$addr))]>; + +def PICLDRSB : AXI3ldsb<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p), + Pseudo, "${addr:label}:\n\tldr${p}sb $dst, $addr", + [(set GPR:$dst, (sextloadi8 addrmodepc:$addr))]>; +} +let AddedComplexity = 10 in { +def PICSTR : AXI2stw<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p), + Pseudo, "${addr:label}:\n\tstr$p $src, $addr", + [(store GPR:$src, addrmodepc:$addr)]>; + +def PICSTRH : AXI3sth<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p), + Pseudo, "${addr:label}:\n\tstr${p}h $src, $addr", + [(truncstorei16 GPR:$src, addrmodepc:$addr)]>; + +def PICSTRB : AXI2stb<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p), + Pseudo, "${addr:label}:\n\tstr${p}b $src, $addr", + [(truncstorei8 GPR:$src, addrmodepc:$addr)]>; +} +} // isNotDuplicable = 1 + +//===----------------------------------------------------------------------===// +// Control Flow Instructions. +// + +let isReturn = 1, isTerminator = 1 in + def BX_RET : AI<(outs), (ins), BrMiscFrm, "bx", " lr", [(ARMretflag)]> { + let Inst{7-4} = 0b0001; + let Inst{19-8} = 0b111111111111; + let Inst{27-20} = 0b00010010; +} + +// FIXME: remove when we have a way to marking a MI with these properties. +// FIXME: $dst1 should be a def. But the extra ops must be in the end of the +// operand list. +// FIXME: Should pc be an implicit operand like PICADD, etc? +let isReturn = 1, isTerminator = 1 in + def LDM_RET : AXI4ld<(outs), + (ins addrmode4:$addr, pred:$p, reglist:$dst1, variable_ops), + LdStMulFrm, "ldm${p}${addr:submode} $addr, $dst1", + []>; + +let isCall = 1, + Defs = [R0, R1, R2, R3, R12, LR, + D0, D1, D2, D3, D4, D5, D6, D7, CPSR] in { + def BL : ABXI<0b1011, (outs), (ins i32imm:$func, variable_ops), + "bl ${func:call}", + [(ARMcall tglobaladdr:$func)]>; + + def BL_pred : ABI<0b1011, (outs), (ins i32imm:$func, variable_ops), + "bl", " ${func:call}", + [(ARMcall_pred tglobaladdr:$func)]>; + + // ARMv5T and above + def BLX : AXI<(outs), (ins GPR:$func, variable_ops), BrMiscFrm, + "blx $func", + [(ARMcall GPR:$func)]>, Requires<[IsARM, HasV5T]> { + let Inst{7-4} = 0b0011; + let Inst{19-8} = 0b111111111111; + let Inst{27-20} = 0b00010010; + } + + let Uses = [LR] in { + // ARMv4T + def BX : ABXIx2<(outs), (ins GPR:$func, variable_ops), + "mov lr, pc\n\tbx $func", + [(ARMcall_nolink GPR:$func)]>; + } +} + +let isBranch = 1, isTerminator = 1 in { + // B is "predicable" since it can be xformed into a Bcc. + let isBarrier = 1 in { + let isPredicable = 1 in + def B : ABXI<0b1010, (outs), (ins brtarget:$target), "b $target", + [(br bb:$target)]>; + + let isNotDuplicable = 1, isIndirectBranch = 1 in { + def BR_JTr : JTI<(outs), (ins GPR:$target, jtblock_operand:$jt, i32imm:$id), + "mov pc, $target \n$jt", + [(ARMbrjt GPR:$target, tjumptable:$jt, imm:$id)]> { + let Inst{20} = 0; // S Bit + let Inst{24-21} = 0b1101; + let Inst{27-26} = {0,0}; + } + def BR_JTm : JTI<(outs), + (ins addrmode2:$target, jtblock_operand:$jt, i32imm:$id), + "ldr pc, $target \n$jt", + [(ARMbrjt (i32 (load addrmode2:$target)), tjumptable:$jt, + imm:$id)]> { + let Inst{20} = 1; // L bit + let Inst{21} = 0; // W bit + let Inst{22} = 0; // B bit + let Inst{24} = 1; // P bit + let Inst{27-26} = {0,1}; + } + def BR_JTadd : JTI<(outs), + (ins GPR:$target, GPR:$idx, jtblock_operand:$jt, i32imm:$id), + "add pc, $target, $idx \n$jt", + [(ARMbrjt (add GPR:$target, GPR:$idx), tjumptable:$jt, + imm:$id)]> { + let Inst{20} = 0; // S bit + let Inst{24-21} = 0b0100; + let Inst{27-26} = {0,0}; + } + } // isNotDuplicable = 1, isIndirectBranch = 1 + } // isBarrier = 1 + + // FIXME: should be able to write a pattern for ARMBrcond, but can't use + // a two-value operand where a dag node expects two operands. :( + def Bcc : ABI<0b1010, (outs), (ins brtarget:$target), + "b", " $target", + [/*(ARMbrcond bb:$target, imm:$cc, CCR:$ccr)*/]>; +} + +//===----------------------------------------------------------------------===// +// Load / store Instructions. +// + +// Load +let canFoldAsLoad = 1 in +def LDR : AI2ldw<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm, + "ldr", " $dst, $addr", + [(set GPR:$dst, (load addrmode2:$addr))]>; + +// Special LDR for loads from non-pc-relative constpools. +let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1 in +def LDRcp : AI2ldw<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm, + "ldr", " $dst, $addr", []>; + +// Loads with zero extension +def LDRH : AI3ldh<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm, + "ldr", "h $dst, $addr", + [(set GPR:$dst, (zextloadi16 addrmode3:$addr))]>; + +def LDRB : AI2ldb<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm, + "ldr", "b $dst, $addr", + [(set GPR:$dst, (zextloadi8 addrmode2:$addr))]>; + +// Loads with sign extension +def LDRSH : AI3ldsh<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm, + "ldr", "sh $dst, $addr", + [(set GPR:$dst, (sextloadi16 addrmode3:$addr))]>; + +def LDRSB : AI3ldsb<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm, + "ldr", "sb $dst, $addr", + [(set GPR:$dst, (sextloadi8 addrmode3:$addr))]>; + +let mayLoad = 1 in { +// Load doubleword +def LDRD : AI3ldd<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm, + "ldr", "d $dst, $addr", + []>, Requires<[IsARM, HasV5T]>; + +// Indexed loads +def LDR_PRE : AI2ldwpr<(outs GPR:$dst, GPR:$base_wb), + (ins addrmode2:$addr), LdFrm, + "ldr", " $dst, $addr!", "$addr.base = $base_wb", []>; + +def LDR_POST : AI2ldwpo<(outs GPR:$dst, GPR:$base_wb), + (ins GPR:$base, am2offset:$offset), LdFrm, + "ldr", " $dst, [$base], $offset", "$base = $base_wb", []>; + +def LDRH_PRE : AI3ldhpr<(outs GPR:$dst, GPR:$base_wb), + (ins addrmode3:$addr), LdMiscFrm, + "ldr", "h $dst, $addr!", "$addr.base = $base_wb", []>; + +def LDRH_POST : AI3ldhpo<(outs GPR:$dst, GPR:$base_wb), + (ins GPR:$base,am3offset:$offset), LdMiscFrm, + "ldr", "h $dst, [$base], $offset", "$base = $base_wb", []>; + +def LDRB_PRE : AI2ldbpr<(outs GPR:$dst, GPR:$base_wb), + (ins addrmode2:$addr), LdFrm, + "ldr", "b $dst, $addr!", "$addr.base = $base_wb", []>; + +def LDRB_POST : AI2ldbpo<(outs GPR:$dst, GPR:$base_wb), + (ins GPR:$base,am2offset:$offset), LdFrm, + "ldr", "b $dst, [$base], $offset", "$base = $base_wb", []>; + +def LDRSH_PRE : AI3ldshpr<(outs GPR:$dst, GPR:$base_wb), + (ins addrmode3:$addr), LdMiscFrm, + "ldr", "sh $dst, $addr!", "$addr.base = $base_wb", []>; + +def LDRSH_POST: AI3ldshpo<(outs GPR:$dst, GPR:$base_wb), + (ins GPR:$base,am3offset:$offset), LdMiscFrm, + "ldr", "sh $dst, [$base], $offset", "$base = $base_wb", []>; + +def LDRSB_PRE : AI3ldsbpr<(outs GPR:$dst, GPR:$base_wb), + (ins addrmode3:$addr), LdMiscFrm, + "ldr", "sb $dst, $addr!", "$addr.base = $base_wb", []>; + +def LDRSB_POST: AI3ldsbpo<(outs GPR:$dst, GPR:$base_wb), + (ins GPR:$base,am3offset:$offset), LdMiscFrm, + "ldr", "sb $dst, [$base], $offset", "$base = $base_wb", []>; +} + +// Store +def STR : AI2stw<(outs), (ins GPR:$src, addrmode2:$addr), StFrm, + "str", " $src, $addr", + [(store GPR:$src, addrmode2:$addr)]>; + +// Stores with truncate +def STRH : AI3sth<(outs), (ins GPR:$src, addrmode3:$addr), StMiscFrm, + "str", "h $src, $addr", + [(truncstorei16 GPR:$src, addrmode3:$addr)]>; + +def STRB : AI2stb<(outs), (ins GPR:$src, addrmode2:$addr), StFrm, + "str", "b $src, $addr", + [(truncstorei8 GPR:$src, addrmode2:$addr)]>; + +// Store doubleword +let mayStore = 1 in +def STRD : AI3std<(outs), (ins GPR:$src, addrmode3:$addr), StMiscFrm, + "str", "d $src, $addr", + []>, Requires<[IsARM, HasV5T]>; + +// Indexed stores +def STR_PRE : AI2stwpr<(outs GPR:$base_wb), + (ins GPR:$src, GPR:$base, am2offset:$offset), StFrm, + "str", " $src, [$base, $offset]!", "$base = $base_wb", + [(set GPR:$base_wb, + (pre_store GPR:$src, GPR:$base, am2offset:$offset))]>; + +def STR_POST : AI2stwpo<(outs GPR:$base_wb), + (ins GPR:$src, GPR:$base,am2offset:$offset), StFrm, + "str", " $src, [$base], $offset", "$base = $base_wb", + [(set GPR:$base_wb, + (post_store GPR:$src, GPR:$base, am2offset:$offset))]>; + +def STRH_PRE : AI3sthpr<(outs GPR:$base_wb), + (ins GPR:$src, GPR:$base,am3offset:$offset), StMiscFrm, + "str", "h $src, [$base, $offset]!", "$base = $base_wb", + [(set GPR:$base_wb, + (pre_truncsti16 GPR:$src, GPR:$base,am3offset:$offset))]>; + +def STRH_POST: AI3sthpo<(outs GPR:$base_wb), + (ins GPR:$src, GPR:$base,am3offset:$offset), StMiscFrm, + "str", "h $src, [$base], $offset", "$base = $base_wb", + [(set GPR:$base_wb, (post_truncsti16 GPR:$src, + GPR:$base, am3offset:$offset))]>; + +def STRB_PRE : AI2stbpr<(outs GPR:$base_wb), + (ins GPR:$src, GPR:$base,am2offset:$offset), StFrm, + "str", "b $src, [$base, $offset]!", "$base = $base_wb", + [(set GPR:$base_wb, (pre_truncsti8 GPR:$src, + GPR:$base, am2offset:$offset))]>; + +def STRB_POST: AI2stbpo<(outs GPR:$base_wb), + (ins GPR:$src, GPR:$base,am2offset:$offset), StFrm, + "str", "b $src, [$base], $offset", "$base = $base_wb", + [(set GPR:$base_wb, (post_truncsti8 GPR:$src, + GPR:$base, am2offset:$offset))]>; + +//===----------------------------------------------------------------------===// +// Load / store multiple Instructions. +// + +// FIXME: $dst1 should be a def. +let mayLoad = 1 in +def LDM : AXI4ld<(outs), + (ins addrmode4:$addr, pred:$p, reglist:$dst1, variable_ops), + LdStMulFrm, "ldm${p}${addr:submode} $addr, $dst1", + []>; + +let mayStore = 1 in +def STM : AXI4st<(outs), + (ins addrmode4:$addr, pred:$p, reglist:$src1, variable_ops), + LdStMulFrm, "stm${p}${addr:submode} $addr, $src1", + []>; + +//===----------------------------------------------------------------------===// +// Move Instructions. +// + +def MOVr : AsI1<0b1101, (outs GPR:$dst), (ins GPR:$src), DPFrm, + "mov", " $dst, $src", []>, UnaryDP; +def MOVs : AsI1<0b1101, (outs GPR:$dst), (ins so_reg:$src), DPSoRegFrm, + "mov", " $dst, $src", [(set GPR:$dst, so_reg:$src)]>, UnaryDP; + +let isReMaterializable = 1, isAsCheapAsAMove = 1 in +def MOVi : AsI1<0b1101, (outs GPR:$dst), (ins so_imm:$src), DPFrm, + "mov", " $dst, $src", [(set GPR:$dst, so_imm:$src)]>, UnaryDP; + +def MOVrx : AsI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo, + "mov", " $dst, $src, rrx", + [(set GPR:$dst, (ARMrrx GPR:$src))]>, UnaryDP; + +// These aren't really mov instructions, but we have to define them this way +// due to flag operands. + +let Defs = [CPSR] in { +def MOVsrl_flag : AI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo, + "mov", "s $dst, $src, lsr #1", + [(set GPR:$dst, (ARMsrl_flag GPR:$src))]>, UnaryDP; +def MOVsra_flag : AI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo, + "mov", "s $dst, $src, asr #1", + [(set GPR:$dst, (ARMsra_flag GPR:$src))]>, UnaryDP; +} + +//===----------------------------------------------------------------------===// +// Extend Instructions. +// + +// Sign extenders + +defm SXTB : AI_unary_rrot<0b01101010, + "sxtb", UnOpFrag<(sext_inreg node:$Src, i8)>>; +defm SXTH : AI_unary_rrot<0b01101011, + "sxth", UnOpFrag<(sext_inreg node:$Src, i16)>>; + +defm SXTAB : AI_bin_rrot<0b01101010, + "sxtab", BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS, i8))>>; +defm SXTAH : AI_bin_rrot<0b01101011, + "sxtah", BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS,i16))>>; + +// TODO: SXT(A){B|H}16 + +// Zero extenders + +let AddedComplexity = 16 in { +defm UXTB : AI_unary_rrot<0b01101110, + "uxtb" , UnOpFrag<(and node:$Src, 0x000000FF)>>; +defm UXTH : AI_unary_rrot<0b01101111, + "uxth" , UnOpFrag<(and node:$Src, 0x0000FFFF)>>; +defm UXTB16 : AI_unary_rrot<0b01101100, + "uxtb16", UnOpFrag<(and node:$Src, 0x00FF00FF)>>; + +def : ARMV6Pat<(and (shl GPR:$Src, 8), 0xFF00FF), + (UXTB16r_rot GPR:$Src, 24)>; +def : ARMV6Pat<(and (srl GPR:$Src, 8), 0xFF00FF), + (UXTB16r_rot GPR:$Src, 8)>; + +defm UXTAB : AI_bin_rrot<0b01101110, "uxtab", + BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>; +defm UXTAH : AI_bin_rrot<0b01101111, "uxtah", + BinOpFrag<(add node:$LHS, (and node:$RHS, 0xFFFF))>>; +} + +// This isn't safe in general, the add is two 16-bit units, not a 32-bit add. +//defm UXTAB16 : xxx<"uxtab16", 0xff00ff>; + +// TODO: UXT(A){B|H}16 + +//===----------------------------------------------------------------------===// +// Arithmetic Instructions. +// + +defm ADD : AsI1_bin_irs<0b0100, "add", + BinOpFrag<(add node:$LHS, node:$RHS)>>; +defm SUB : AsI1_bin_irs<0b0010, "sub", + BinOpFrag<(sub node:$LHS, node:$RHS)>>; + +// ADD and SUB with 's' bit set. +defm ADDS : ASI1_bin_s_irs<0b0100, "add", + BinOpFrag<(addc node:$LHS, node:$RHS)>>; +defm SUBS : ASI1_bin_s_irs<0b0010, "sub", + BinOpFrag<(subc node:$LHS, node:$RHS)>>; + +// FIXME: Do not allow ADC / SBC to be predicated for now. +defm ADC : AsXI1_bin_c_irs<0b0101, "adc", + BinOpFrag<(adde node:$LHS, node:$RHS)>>; +defm SBC : AsXI1_bin_c_irs<0b0110, "sbc", + BinOpFrag<(sube node:$LHS, node:$RHS)>>; + +// These don't define reg/reg forms, because they are handled above. +def RSBri : AsI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm, + "rsb", " $dst, $a, $b", + [(set GPR:$dst, (sub so_imm:$b, GPR:$a))]>; + +def RSBrs : AsI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm, + "rsb", " $dst, $a, $b", + [(set GPR:$dst, (sub so_reg:$b, GPR:$a))]>; + +// RSB with 's' bit set. +let Defs = [CPSR] in { +def RSBSri : AI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm, + "rsb", "s $dst, $a, $b", + [(set GPR:$dst, (subc so_imm:$b, GPR:$a))]>; +def RSBSrs : AI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm, + "rsb", "s $dst, $a, $b", + [(set GPR:$dst, (subc so_reg:$b, GPR:$a))]>; +} + +// FIXME: Do not allow RSC to be predicated for now. But they can set CPSR. +let Uses = [CPSR] in { +def RSCri : AXI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_imm:$b, cc_out:$s), + DPFrm, "rsc${s} $dst, $a, $b", + [(set GPR:$dst, (sube so_imm:$b, GPR:$a))]>; +def RSCrs : AXI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_reg:$b, cc_out:$s), + DPSoRegFrm, "rsc${s} $dst, $a, $b", + [(set GPR:$dst, (sube so_reg:$b, GPR:$a))]>; +} + +// (sub X, imm) gets canonicalized to (add X, -imm). Match this form. +def : ARMPat<(add GPR:$src, so_imm_neg:$imm), + (SUBri GPR:$src, so_imm_neg:$imm)>; + +//def : ARMPat<(addc GPR:$src, so_imm_neg:$imm), +// (SUBSri GPR:$src, so_imm_neg:$imm)>; +//def : ARMPat<(adde GPR:$src, so_imm_neg:$imm), +// (SBCri GPR:$src, so_imm_neg:$imm)>; + +// Note: These are implemented in C++ code, because they have to generate +// ADD/SUBrs instructions, which use a complex pattern that a xform function +// cannot produce. +// (mul X, 2^n+1) -> (add (X << n), X) +// (mul X, 2^n-1) -> (rsb X, (X << n)) + + +//===----------------------------------------------------------------------===// +// Bitwise Instructions. +// + +defm AND : AsI1_bin_irs<0b0000, "and", + BinOpFrag<(and node:$LHS, node:$RHS)>>; +defm ORR : AsI1_bin_irs<0b1100, "orr", + BinOpFrag<(or node:$LHS, node:$RHS)>>; +defm EOR : AsI1_bin_irs<0b0001, "eor", + BinOpFrag<(xor node:$LHS, node:$RHS)>>; +defm BIC : AsI1_bin_irs<0b1110, "bic", + BinOpFrag<(and node:$LHS, (not node:$RHS))>>; + +def MVNr : AsI1<0b1111, (outs GPR:$dst), (ins GPR:$src), DPFrm, + "mvn", " $dst, $src", + [(set GPR:$dst, (not GPR:$src))]>, UnaryDP; +def MVNs : AsI1<0b1111, (outs GPR:$dst), (ins so_reg:$src), DPSoRegFrm, + "mvn", " $dst, $src", + [(set GPR:$dst, (not so_reg:$src))]>, UnaryDP; +let isReMaterializable = 1, isAsCheapAsAMove = 1 in +def MVNi : AsI1<0b1111, (outs GPR:$dst), (ins so_imm:$imm), DPFrm, + "mvn", " $dst, $imm", + [(set GPR:$dst, so_imm_not:$imm)]>,UnaryDP; + +def : ARMPat<(and GPR:$src, so_imm_not:$imm), + (BICri GPR:$src, so_imm_not:$imm)>; + +//===----------------------------------------------------------------------===// +// Multiply Instructions. +// + +def MUL : AsMul1I<0b0000000, (outs GPR:$dst), (ins GPR:$a, GPR:$b), + "mul", " $dst, $a, $b", + [(set GPR:$dst, (mul GPR:$a, GPR:$b))]>; + +def MLA : AsMul1I<0b0000001, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), + "mla", " $dst, $a, $b, $c", + [(set GPR:$dst, (add (mul GPR:$a, GPR:$b), GPR:$c))]>; + +// Extra precision multiplies with low / high results +def SMULL : AsMul1I<0b0000110, (outs GPR:$ldst, GPR:$hdst), + (ins GPR:$a, GPR:$b), + "smull", " $ldst, $hdst, $a, $b", []>; + +def UMULL : AsMul1I<0b0000100, (outs GPR:$ldst, GPR:$hdst), + (ins GPR:$a, GPR:$b), + "umull", " $ldst, $hdst, $a, $b", []>; + +// Multiply + accumulate +def SMLAL : AsMul1I<0b0000111, (outs GPR:$ldst, GPR:$hdst), + (ins GPR:$a, GPR:$b), + "smlal", " $ldst, $hdst, $a, $b", []>; + +def UMLAL : AsMul1I<0b0000101, (outs GPR:$ldst, GPR:$hdst), + (ins GPR:$a, GPR:$b), + "umlal", " $ldst, $hdst, $a, $b", []>; + +def UMAAL : AMul1I <0b0000010, (outs GPR:$ldst, GPR:$hdst), + (ins GPR:$a, GPR:$b), + "umaal", " $ldst, $hdst, $a, $b", []>, + Requires<[IsARM, HasV6]>; + +// Most significant word multiply +def SMMUL : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b), + "smmul", " $dst, $a, $b", + [(set GPR:$dst, (mulhs GPR:$a, GPR:$b))]>, + Requires<[IsARM, HasV6]> { + let Inst{7-4} = 0b0001; + let Inst{15-12} = 0b1111; +} + +def SMMLA : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), + "smmla", " $dst, $a, $b, $c", + [(set GPR:$dst, (add (mulhs GPR:$a, GPR:$b), GPR:$c))]>, + Requires<[IsARM, HasV6]> { + let Inst{7-4} = 0b0001; +} + + +def SMMLS : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), + "smmls", " $dst, $a, $b, $c", + [(set GPR:$dst, (sub GPR:$c, (mulhs GPR:$a, GPR:$b)))]>, + Requires<[IsARM, HasV6]> { + let Inst{7-4} = 0b1101; +} + +multiclass AI_smul { + def BB : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b), + !strconcat(opc, "bb"), " $dst, $a, $b", + [(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16), + (sext_inreg GPR:$b, i16)))]>, + Requires<[IsARM, HasV5TE]> { + let Inst{5} = 0; + let Inst{6} = 0; + } + + def BT : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b), + !strconcat(opc, "bt"), " $dst, $a, $b", + [(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16), + (sra GPR:$b, 16)))]>, + Requires<[IsARM, HasV5TE]> { + let Inst{5} = 0; + let Inst{6} = 1; + } + + def TB : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b), + !strconcat(opc, "tb"), " $dst, $a, $b", + [(set GPR:$dst, (opnode (sra GPR:$a, 16), + (sext_inreg GPR:$b, i16)))]>, + Requires<[IsARM, HasV5TE]> { + let Inst{5} = 1; + let Inst{6} = 0; + } + + def TT : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b), + !strconcat(opc, "tt"), " $dst, $a, $b", + [(set GPR:$dst, (opnode (sra GPR:$a, 16), + (sra GPR:$b, 16)))]>, + Requires<[IsARM, HasV5TE]> { + let Inst{5} = 1; + let Inst{6} = 1; + } + + def WB : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b), + !strconcat(opc, "wb"), " $dst, $a, $b", + [(set GPR:$dst, (sra (opnode GPR:$a, + (sext_inreg GPR:$b, i16)), 16))]>, + Requires<[IsARM, HasV5TE]> { + let Inst{5} = 1; + let Inst{6} = 0; + } + + def WT : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b), + !strconcat(opc, "wt"), " $dst, $a, $b", + [(set GPR:$dst, (sra (opnode GPR:$a, + (sra GPR:$b, 16)), 16))]>, + Requires<[IsARM, HasV5TE]> { + let Inst{5} = 1; + let Inst{6} = 1; + } +} + + +multiclass AI_smla { + def BB : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), + !strconcat(opc, "bb"), " $dst, $a, $b, $acc", + [(set GPR:$dst, (add GPR:$acc, + (opnode (sext_inreg GPR:$a, i16), + (sext_inreg GPR:$b, i16))))]>, + Requires<[IsARM, HasV5TE]> { + let Inst{5} = 0; + let Inst{6} = 0; + } + + def BT : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), + !strconcat(opc, "bt"), " $dst, $a, $b, $acc", + [(set GPR:$dst, (add GPR:$acc, (opnode (sext_inreg GPR:$a, i16), + (sra GPR:$b, 16))))]>, + Requires<[IsARM, HasV5TE]> { + let Inst{5} = 0; + let Inst{6} = 1; + } + + def TB : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), + !strconcat(opc, "tb"), " $dst, $a, $b, $acc", + [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, 16), + (sext_inreg GPR:$b, i16))))]>, + Requires<[IsARM, HasV5TE]> { + let Inst{5} = 1; + let Inst{6} = 0; + } + + def TT : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), + !strconcat(opc, "tt"), " $dst, $a, $b, $acc", + [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, 16), + (sra GPR:$b, 16))))]>, + Requires<[IsARM, HasV5TE]> { + let Inst{5} = 1; + let Inst{6} = 1; + } + + def WB : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), + !strconcat(opc, "wb"), " $dst, $a, $b, $acc", + [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a, + (sext_inreg GPR:$b, i16)), 16)))]>, + Requires<[IsARM, HasV5TE]> { + let Inst{5} = 0; + let Inst{6} = 0; + } + + def WT : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), + !strconcat(opc, "wt"), " $dst, $a, $b, $acc", + [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a, + (sra GPR:$b, 16)), 16)))]>, + Requires<[IsARM, HasV5TE]> { + let Inst{5} = 0; + let Inst{6} = 1; + } +} + +defm SMUL : AI_smul<"smul", BinOpFrag<(mul node:$LHS, node:$RHS)>>; +defm SMLA : AI_smla<"smla", BinOpFrag<(mul node:$LHS, node:$RHS)>>; + +// TODO: Halfword multiple accumulate long: SMLAL +// TODO: Dual halfword multiple: SMUAD, SMUSD, SMLAD, SMLSD, SMLALD, SMLSLD + +//===----------------------------------------------------------------------===// +// Misc. Arithmetic Instructions. +// + +def CLZ : AMiscA1I<0b000010110, (outs GPR:$dst), (ins GPR:$src), + "clz", " $dst, $src", + [(set GPR:$dst, (ctlz GPR:$src))]>, Requires<[IsARM, HasV5T]> { + let Inst{7-4} = 0b0001; + let Inst{11-8} = 0b1111; + let Inst{19-16} = 0b1111; +} + +def REV : AMiscA1I<0b01101011, (outs GPR:$dst), (ins GPR:$src), + "rev", " $dst, $src", + [(set GPR:$dst, (bswap GPR:$src))]>, Requires<[IsARM, HasV6]> { + let Inst{7-4} = 0b0011; + let Inst{11-8} = 0b1111; + let Inst{19-16} = 0b1111; +} + +def REV16 : AMiscA1I<0b01101011, (outs GPR:$dst), (ins GPR:$src), + "rev16", " $dst, $src", + [(set GPR:$dst, + (or (and (srl GPR:$src, 8), 0xFF), + (or (and (shl GPR:$src, 8), 0xFF00), + (or (and (srl GPR:$src, 8), 0xFF0000), + (and (shl GPR:$src, 8), 0xFF000000)))))]>, + Requires<[IsARM, HasV6]> { + let Inst{7-4} = 0b1011; + let Inst{11-8} = 0b1111; + let Inst{19-16} = 0b1111; +} + +def REVSH : AMiscA1I<0b01101111, (outs GPR:$dst), (ins GPR:$src), + "revsh", " $dst, $src", + [(set GPR:$dst, + (sext_inreg + (or (srl (and GPR:$src, 0xFF00), 8), + (shl GPR:$src, 8)), i16))]>, + Requires<[IsARM, HasV6]> { + let Inst{7-4} = 0b1011; + let Inst{11-8} = 0b1111; + let Inst{19-16} = 0b1111; +} + +def PKHBT : AMiscA1I<0b01101000, (outs GPR:$dst), + (ins GPR:$src1, GPR:$src2, i32imm:$shamt), + "pkhbt", " $dst, $src1, $src2, LSL $shamt", + [(set GPR:$dst, (or (and GPR:$src1, 0xFFFF), + (and (shl GPR:$src2, (i32 imm:$shamt)), + 0xFFFF0000)))]>, + Requires<[IsARM, HasV6]> { + let Inst{6-4} = 0b001; +} + +// Alternate cases for PKHBT where identities eliminate some nodes. +def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF), (and GPR:$src2, 0xFFFF0000)), + (PKHBT GPR:$src1, GPR:$src2, 0)>; +def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF), (shl GPR:$src2, imm16_31:$shamt)), + (PKHBT GPR:$src1, GPR:$src2, imm16_31:$shamt)>; + + +def PKHTB : AMiscA1I<0b01101000, (outs GPR:$dst), + (ins GPR:$src1, GPR:$src2, i32imm:$shamt), + "pkhtb", " $dst, $src1, $src2, ASR $shamt", + [(set GPR:$dst, (or (and GPR:$src1, 0xFFFF0000), + (and (sra GPR:$src2, imm16_31:$shamt), + 0xFFFF)))]>, Requires<[IsARM, HasV6]> { + let Inst{6-4} = 0b101; +} + +// Alternate cases for PKHTB where identities eliminate some nodes. Note that +// a shift amount of 0 is *not legal* here, it is PKHBT instead. +def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF0000), (srl GPR:$src2, 16)), + (PKHTB GPR:$src1, GPR:$src2, 16)>; +def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF0000), + (and (srl GPR:$src2, imm1_15:$shamt), 0xFFFF)), + (PKHTB GPR:$src1, GPR:$src2, imm1_15:$shamt)>; + +//===----------------------------------------------------------------------===// +// Comparison Instructions... +// + +defm CMP : AI1_cmp_irs<0b1010, "cmp", + BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>>; +defm CMN : AI1_cmp_irs<0b1011, "cmn", + BinOpFrag<(ARMcmp node:$LHS,(ineg node:$RHS))>>; + +// Note that TST/TEQ don't set all the same flags that CMP does! +defm TST : AI1_cmp_irs<0b1000, "tst", + BinOpFrag<(ARMcmpNZ (and node:$LHS, node:$RHS), 0)>>; +defm TEQ : AI1_cmp_irs<0b1001, "teq", + BinOpFrag<(ARMcmpNZ (xor node:$LHS, node:$RHS), 0)>>; + +defm CMPnz : AI1_cmp_irs<0b1010, "cmp", + BinOpFrag<(ARMcmpNZ node:$LHS, node:$RHS)>>; +defm CMNnz : AI1_cmp_irs<0b1011, "cmn", + BinOpFrag<(ARMcmpNZ node:$LHS,(ineg node:$RHS))>>; + +def : ARMPat<(ARMcmp GPR:$src, so_imm_neg:$imm), + (CMNri GPR:$src, so_imm_neg:$imm)>; + +def : ARMPat<(ARMcmpNZ GPR:$src, so_imm_neg:$imm), + (CMNri GPR:$src, so_imm_neg:$imm)>; + + +// Conditional moves +// FIXME: should be able to write a pattern for ARMcmov, but can't use +// a two-value operand where a dag node expects two operands. :( +def MOVCCr : AI1<0b1101, (outs GPR:$dst), (ins GPR:$false, GPR:$true), DPFrm, + "mov", " $dst, $true", + [/*(set GPR:$dst, (ARMcmov GPR:$false, GPR:$true, imm:$cc, CCR:$ccr))*/]>, + RegConstraint<"$false = $dst">, UnaryDP; + +def MOVCCs : AI1<0b1101, (outs GPR:$dst), + (ins GPR:$false, so_reg:$true), DPSoRegFrm, + "mov", " $dst, $true", + [/*(set GPR:$dst, (ARMcmov GPR:$false, so_reg:$true, imm:$cc, CCR:$ccr))*/]>, + RegConstraint<"$false = $dst">, UnaryDP; + +def MOVCCi : AI1<0b1101, (outs GPR:$dst), + (ins GPR:$false, so_imm:$true), DPFrm, + "mov", " $dst, $true", + [/*(set GPR:$dst, (ARMcmov GPR:$false, so_imm:$true, imm:$cc, CCR:$ccr))*/]>, + RegConstraint<"$false = $dst">, UnaryDP; + + +// LEApcrel - Load a pc-relative address into a register without offending the +// assembler. +def LEApcrel : AXI1<0x0, (outs GPR:$dst), (ins i32imm:$label, pred:$p), Pseudo, + !strconcat(!strconcat(".set PCRELV${:uid}, ($label-(", + "${:private}PCRELL${:uid}+8))\n"), + !strconcat("${:private}PCRELL${:uid}:\n\t", + "add$p $dst, pc, #PCRELV${:uid}")), + []>; + +def LEApcrelJT : AXI1<0x0, (outs GPR:$dst), (ins i32imm:$label, i32imm:$id, pred:$p), + Pseudo, + !strconcat(!strconcat(".set PCRELV${:uid}, (${label}_${id:no_hash}-(", + "${:private}PCRELL${:uid}+8))\n"), + !strconcat("${:private}PCRELL${:uid}:\n\t", + "add$p $dst, pc, #PCRELV${:uid}")), + []>; + +//===----------------------------------------------------------------------===// +// TLS Instructions +// + +// __aeabi_read_tp preserves the registers r1-r3. +let isCall = 1, + Defs = [R0, R12, LR, CPSR] in { + def TPsoft : ABXI<0b1011, (outs), (ins), + "bl __aeabi_read_tp", + [(set R0, ARMthread_pointer)]>; +} + +//===----------------------------------------------------------------------===// +// SJLJ Exception handling intrinsics +// eh_sjlj_setjmp() is a three instruction sequence to store the return +// address and save #0 in R0 for the non-longjmp case. +// Since by its nature we may be coming from some other function to get +// here, and we're using the stack frame for the containing function to +// save/restore registers, we can't keep anything live in regs across +// the eh_sjlj_setjmp(), else it will almost certainly have been tromped upon +// when we get here from a longjmp(). We force everthing out of registers +// except for our own input by listing the relevant registers in Defs. By +// doing so, we also cause the prologue/epilogue code to actively preserve +// all of the callee-saved resgisters, which is exactly what we want. +let Defs = + [ R0, R1, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, + D0, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15 ] in { + def Int_eh_sjlj_setjmp : XI<(outs), (ins GPR:$src), + AddrModeNone, SizeSpecial, IndexModeNone, Pseudo, + "add r0, pc, #4\n\t" + "str r0, [$src, #+4]\n\t" + "mov r0, #0 @ eh_setjmp", "", + [(set R0, (ARMeh_sjlj_setjmp GPR:$src))]>; +} + +//===----------------------------------------------------------------------===// +// Non-Instruction Patterns +// + +// ConstantPool, GlobalAddress, and JumpTable +def : ARMPat<(ARMWrapper tglobaladdr :$dst), (LEApcrel tglobaladdr :$dst)>; +def : ARMPat<(ARMWrapper tconstpool :$dst), (LEApcrel tconstpool :$dst)>; +def : ARMPat<(ARMWrapperJT tjumptable:$dst, imm:$id), + (LEApcrelJT tjumptable:$dst, imm:$id)>; + +// Large immediate handling. + +// Two piece so_imms. +let isReMaterializable = 1 in +def MOVi2pieces : AI1x2<(outs GPR:$dst), (ins so_imm2part:$src), Pseudo, + "mov", " $dst, $src", + [(set GPR:$dst, so_imm2part:$src)]>; + +def : ARMPat<(or GPR:$LHS, so_imm2part:$RHS), + (ORRri (ORRri GPR:$LHS, (so_imm2part_1 imm:$RHS)), + (so_imm2part_2 imm:$RHS))>; +def : ARMPat<(xor GPR:$LHS, so_imm2part:$RHS), + (EORri (EORri GPR:$LHS, (so_imm2part_1 imm:$RHS)), + (so_imm2part_2 imm:$RHS))>; + +// TODO: add,sub,and, 3-instr forms? + + +// Direct calls +def : ARMPat<(ARMcall texternalsym:$func), (BL texternalsym:$func)>; + +// zextload i1 -> zextload i8 +def : ARMPat<(zextloadi1 addrmode2:$addr), (LDRB addrmode2:$addr)>; + +// extload -> zextload +def : ARMPat<(extloadi1 addrmode2:$addr), (LDRB addrmode2:$addr)>; +def : ARMPat<(extloadi8 addrmode2:$addr), (LDRB addrmode2:$addr)>; +def : ARMPat<(extloadi16 addrmode3:$addr), (LDRH addrmode3:$addr)>; + +def : ARMPat<(extloadi8 addrmodepc:$addr), (PICLDRB addrmodepc:$addr)>; +def : ARMPat<(extloadi16 addrmodepc:$addr), (PICLDRH addrmodepc:$addr)>; + +// smul* and smla* +def : ARMV5TEPat<(mul (sra (shl GPR:$a, 16), 16), (sra (shl GPR:$b, 16), 16)), + (SMULBB GPR:$a, GPR:$b)>; +def : ARMV5TEPat<(mul sext_16_node:$a, sext_16_node:$b), + (SMULBB GPR:$a, GPR:$b)>; +def : ARMV5TEPat<(mul (sra (shl GPR:$a, 16), 16), (sra GPR:$b, 16)), + (SMULBT GPR:$a, GPR:$b)>; +def : ARMV5TEPat<(mul sext_16_node:$a, (sra GPR:$b, 16)), + (SMULBT GPR:$a, GPR:$b)>; +def : ARMV5TEPat<(mul (sra GPR:$a, 16), (sra (shl GPR:$b, 16), 16)), + (SMULTB GPR:$a, GPR:$b)>; +def : ARMV5TEPat<(mul (sra GPR:$a, 16), sext_16_node:$b), + (SMULTB GPR:$a, GPR:$b)>; +def : ARMV5TEPat<(sra (mul GPR:$a, (sra (shl GPR:$b, 16), 16)), 16), + (SMULWB GPR:$a, GPR:$b)>; +def : ARMV5TEPat<(sra (mul GPR:$a, sext_16_node:$b), 16), + (SMULWB GPR:$a, GPR:$b)>; + +def : ARMV5TEPat<(add GPR:$acc, + (mul (sra (shl GPR:$a, 16), 16), + (sra (shl GPR:$b, 16), 16))), + (SMLABB GPR:$a, GPR:$b, GPR:$acc)>; +def : ARMV5TEPat<(add GPR:$acc, + (mul sext_16_node:$a, sext_16_node:$b)), + (SMLABB GPR:$a, GPR:$b, GPR:$acc)>; +def : ARMV5TEPat<(add GPR:$acc, + (mul (sra (shl GPR:$a, 16), 16), (sra GPR:$b, 16))), + (SMLABT GPR:$a, GPR:$b, GPR:$acc)>; +def : ARMV5TEPat<(add GPR:$acc, + (mul sext_16_node:$a, (sra GPR:$b, 16))), + (SMLABT GPR:$a, GPR:$b, GPR:$acc)>; +def : ARMV5TEPat<(add GPR:$acc, + (mul (sra GPR:$a, 16), (sra (shl GPR:$b, 16), 16))), + (SMLATB GPR:$a, GPR:$b, GPR:$acc)>; +def : ARMV5TEPat<(add GPR:$acc, + (mul (sra GPR:$a, 16), sext_16_node:$b)), + (SMLATB GPR:$a, GPR:$b, GPR:$acc)>; +def : ARMV5TEPat<(add GPR:$acc, + (sra (mul GPR:$a, (sra (shl GPR:$b, 16), 16)), 16)), + (SMLAWB GPR:$a, GPR:$b, GPR:$acc)>; +def : ARMV5TEPat<(add GPR:$acc, + (sra (mul GPR:$a, sext_16_node:$b), 16)), + (SMLAWB GPR:$a, GPR:$b, GPR:$acc)>; + +//===----------------------------------------------------------------------===// +// Thumb Support +// + +include "ARMInstrThumb.td" + +//===----------------------------------------------------------------------===// +// Floating Point Support +// + +include "ARMInstrVFP.td" diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td new file mode 100644 index 000000000000..ffb83a8b4d36 --- /dev/null +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -0,0 +1,562 @@ +//===- ARMInstrThumb.td - Thumb support for ARM ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the Thumb instruction set. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Thumb specific DAG Nodes. +// + +def ARMtcall : SDNode<"ARMISD::tCALL", SDT_ARMcall, + [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; + +def imm_neg_XFORM : SDNodeXFormgetTargetConstant(-(int)N->getZExtValue(), MVT::i32); +}]>; +def imm_comp_XFORM : SDNodeXFormgetTargetConstant(~((uint32_t)N->getZExtValue()), MVT::i32); +}]>; + + +/// imm0_7 predicate - True if the 32-bit immediate is in the range [0,7]. +def imm0_7 : PatLeaf<(i32 imm), [{ + return (uint32_t)N->getZExtValue() < 8; +}]>; +def imm0_7_neg : PatLeaf<(i32 imm), [{ + return (uint32_t)-N->getZExtValue() < 8; +}], imm_neg_XFORM>; + +def imm0_255 : PatLeaf<(i32 imm), [{ + return (uint32_t)N->getZExtValue() < 256; +}]>; +def imm0_255_comp : PatLeaf<(i32 imm), [{ + return ~((uint32_t)N->getZExtValue()) < 256; +}]>; + +def imm8_255 : PatLeaf<(i32 imm), [{ + return (uint32_t)N->getZExtValue() >= 8 && (uint32_t)N->getZExtValue() < 256; +}]>; +def imm8_255_neg : PatLeaf<(i32 imm), [{ + unsigned Val = -N->getZExtValue(); + return Val >= 8 && Val < 256; +}], imm_neg_XFORM>; + +// Break imm's up into two pieces: an immediate + a left shift. +// This uses thumb_immshifted to match and thumb_immshifted_val and +// thumb_immshifted_shamt to get the val/shift pieces. +def thumb_immshifted : PatLeaf<(imm), [{ + return ARM_AM::isThumbImmShiftedVal((unsigned)N->getZExtValue()); +}]>; + +def thumb_immshifted_val : SDNodeXFormgetZExtValue()); + return CurDAG->getTargetConstant(V, MVT::i32); +}]>; + +def thumb_immshifted_shamt : SDNodeXFormgetZExtValue()); + return CurDAG->getTargetConstant(V, MVT::i32); +}]>; + +// Define Thumb specific addressing modes. + +// t_addrmode_rr := reg + reg +// +def t_addrmode_rr : Operand, + ComplexPattern { + let PrintMethod = "printThumbAddrModeRROperand"; + let MIOperandInfo = (ops tGPR:$base, tGPR:$offsreg); +} + +// t_addrmode_s4 := reg + reg +// reg + imm5 * 4 +// +def t_addrmode_s4 : Operand, + ComplexPattern { + let PrintMethod = "printThumbAddrModeS4Operand"; + let MIOperandInfo = (ops tGPR:$base, i32imm:$offsimm, tGPR:$offsreg); +} + +// t_addrmode_s2 := reg + reg +// reg + imm5 * 2 +// +def t_addrmode_s2 : Operand, + ComplexPattern { + let PrintMethod = "printThumbAddrModeS2Operand"; + let MIOperandInfo = (ops tGPR:$base, i32imm:$offsimm, tGPR:$offsreg); +} + +// t_addrmode_s1 := reg + reg +// reg + imm5 +// +def t_addrmode_s1 : Operand, + ComplexPattern { + let PrintMethod = "printThumbAddrModeS1Operand"; + let MIOperandInfo = (ops tGPR:$base, i32imm:$offsimm, tGPR:$offsreg); +} + +// t_addrmode_sp := sp + imm8 * 4 +// +def t_addrmode_sp : Operand, + ComplexPattern { + let PrintMethod = "printThumbAddrModeSPOperand"; + let MIOperandInfo = (ops tGPR:$base, i32imm:$offsimm); +} + +//===----------------------------------------------------------------------===// +// Miscellaneous Instructions. +// + +let Defs = [SP], Uses = [SP] in { +def tADJCALLSTACKUP : +PseudoInst<(outs), (ins i32imm:$amt1, i32imm:$amt2), + "@ tADJCALLSTACKUP $amt1", + [(ARMcallseq_end imm:$amt1, imm:$amt2)]>, Requires<[IsThumb]>; + +def tADJCALLSTACKDOWN : +PseudoInst<(outs), (ins i32imm:$amt), + "@ tADJCALLSTACKDOWN $amt", + [(ARMcallseq_start imm:$amt)]>, Requires<[IsThumb]>; +} + +let isNotDuplicable = 1 in +def tPICADD : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, pclabel:$cp), + "$cp:\n\tadd $dst, pc", + [(set tGPR:$dst, (ARMpic_add tGPR:$lhs, imm:$cp))]>; + +//===----------------------------------------------------------------------===// +// Control Flow Instructions. +// + +let isReturn = 1, isTerminator = 1 in { + def tBX_RET : TI<(outs), (ins), "bx lr", [(ARMretflag)]>; + // Alternative return instruction used by vararg functions. + def tBX_RET_vararg : TI<(outs), (ins tGPR:$target), "bx $target", []>; +} + +// FIXME: remove when we have a way to marking a MI with these properties. +let isReturn = 1, isTerminator = 1 in +def tPOP_RET : TI<(outs reglist:$dst1, variable_ops), (ins), + "pop $dst1", []>; + +let isCall = 1, + Defs = [R0, R1, R2, R3, LR, + D0, D1, D2, D3, D4, D5, D6, D7] in { + def tBL : TIx2<(outs), (ins i32imm:$func, variable_ops), + "bl ${func:call}", + [(ARMtcall tglobaladdr:$func)]>; + // ARMv5T and above + def tBLXi : TIx2<(outs), (ins i32imm:$func, variable_ops), + "blx ${func:call}", + [(ARMcall tglobaladdr:$func)]>, Requires<[HasV5T]>; + def tBLXr : TI<(outs), (ins tGPR:$func, variable_ops), + "blx $func", + [(ARMtcall tGPR:$func)]>, Requires<[HasV5T]>; + // ARMv4T + def tBX : TIx2<(outs), (ins tGPR:$func, variable_ops), + "cpy lr, pc\n\tbx $func", + [(ARMcall_nolink tGPR:$func)]>; +} + +let isBranch = 1, isTerminator = 1 in { + let isBarrier = 1 in { + let isPredicable = 1 in + def tB : TI<(outs), (ins brtarget:$target), "b $target", + [(br bb:$target)]>; + + // Far jump + def tBfar : TIx2<(outs), (ins brtarget:$target), "bl $target\t@ far jump",[]>; + + def tBR_JTr : TJTI<(outs), + (ins tGPR:$target, jtblock_operand:$jt, i32imm:$id), + "cpy pc, $target \n\t.align\t2\n$jt", + [(ARMbrjt tGPR:$target, tjumptable:$jt, imm:$id)]>; + } +} + +// FIXME: should be able to write a pattern for ARMBrcond, but can't use +// a two-value operand where a dag node expects two operands. :( +let isBranch = 1, isTerminator = 1 in + def tBcc : TI<(outs), (ins brtarget:$target, pred:$cc), "b$cc $target", + [/*(ARMbrcond bb:$target, imm:$cc)*/]>; + +//===----------------------------------------------------------------------===// +// Load Store Instructions. +// + +let canFoldAsLoad = 1 in +def tLDR : TI4<(outs tGPR:$dst), (ins t_addrmode_s4:$addr), + "ldr $dst, $addr", + [(set tGPR:$dst, (load t_addrmode_s4:$addr))]>; + +def tLDRB : TI1<(outs tGPR:$dst), (ins t_addrmode_s1:$addr), + "ldrb $dst, $addr", + [(set tGPR:$dst, (zextloadi8 t_addrmode_s1:$addr))]>; + +def tLDRH : TI2<(outs tGPR:$dst), (ins t_addrmode_s2:$addr), + "ldrh $dst, $addr", + [(set tGPR:$dst, (zextloadi16 t_addrmode_s2:$addr))]>; + +def tLDRSB : TI1<(outs tGPR:$dst), (ins t_addrmode_rr:$addr), + "ldrsb $dst, $addr", + [(set tGPR:$dst, (sextloadi8 t_addrmode_rr:$addr))]>; + +def tLDRSH : TI2<(outs tGPR:$dst), (ins t_addrmode_rr:$addr), + "ldrsh $dst, $addr", + [(set tGPR:$dst, (sextloadi16 t_addrmode_rr:$addr))]>; + +let canFoldAsLoad = 1 in +def tLDRspi : TIs<(outs tGPR:$dst), (ins t_addrmode_sp:$addr), + "ldr $dst, $addr", + [(set tGPR:$dst, (load t_addrmode_sp:$addr))]>; + +// Special instruction for restore. It cannot clobber condition register +// when it's expanded by eliminateCallFramePseudoInstr(). +let canFoldAsLoad = 1, mayLoad = 1 in +def tRestore : TIs<(outs tGPR:$dst), (ins t_addrmode_sp:$addr), + "ldr $dst, $addr", []>; + +// Load tconstpool +let canFoldAsLoad = 1 in +def tLDRpci : TIs<(outs tGPR:$dst), (ins i32imm:$addr), + "ldr $dst, $addr", + [(set tGPR:$dst, (load (ARMWrapper tconstpool:$addr)))]>; + +// Special LDR for loads from non-pc-relative constpools. +let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1 in +def tLDRcp : TIs<(outs tGPR:$dst), (ins i32imm:$addr), + "ldr $dst, $addr", []>; + +def tSTR : TI4<(outs), (ins tGPR:$src, t_addrmode_s4:$addr), + "str $src, $addr", + [(store tGPR:$src, t_addrmode_s4:$addr)]>; + +def tSTRB : TI1<(outs), (ins tGPR:$src, t_addrmode_s1:$addr), + "strb $src, $addr", + [(truncstorei8 tGPR:$src, t_addrmode_s1:$addr)]>; + +def tSTRH : TI2<(outs), (ins tGPR:$src, t_addrmode_s2:$addr), + "strh $src, $addr", + [(truncstorei16 tGPR:$src, t_addrmode_s2:$addr)]>; + +def tSTRspi : TIs<(outs), (ins tGPR:$src, t_addrmode_sp:$addr), + "str $src, $addr", + [(store tGPR:$src, t_addrmode_sp:$addr)]>; + +let mayStore = 1 in { +// Special instruction for spill. It cannot clobber condition register +// when it's expanded by eliminateCallFramePseudoInstr(). +def tSpill : TIs<(outs), (ins tGPR:$src, t_addrmode_sp:$addr), + "str $src, $addr", []>; +} + +//===----------------------------------------------------------------------===// +// Load / store multiple Instructions. +// + +// TODO: A7-44: LDMIA - load multiple + +let mayLoad = 1 in +def tPOP : TI<(outs reglist:$dst1, variable_ops), (ins), + "pop $dst1", []>; + +let mayStore = 1 in +def tPUSH : TI<(outs), (ins reglist:$src1, variable_ops), + "push $src1", []>; + +//===----------------------------------------------------------------------===// +// Arithmetic Instructions. +// + +// Add with carry +def tADC : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), + "adc $dst, $rhs", + [(set tGPR:$dst, (adde tGPR:$lhs, tGPR:$rhs))]>; + +def tADDS : TI<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), + "add $dst, $lhs, $rhs", + [(set tGPR:$dst, (addc tGPR:$lhs, tGPR:$rhs))]>; + + +def tADDi3 : TI<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), + "add $dst, $lhs, $rhs", + [(set tGPR:$dst, (add tGPR:$lhs, imm0_7:$rhs))]>; + +def tADDi8 : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), + "add $dst, $rhs", + [(set tGPR:$dst, (add tGPR:$lhs, imm8_255:$rhs))]>; + +def tADDrr : TI<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), + "add $dst, $lhs, $rhs", + [(set tGPR:$dst, (add tGPR:$lhs, tGPR:$rhs))]>; + +def tADDhirr : TIt<(outs tGPR:$dst), (ins GPR:$lhs, GPR:$rhs), + "add $dst, $rhs @ addhirr", []>; + +def tADDrPCi : TI<(outs tGPR:$dst), (ins i32imm:$rhs), + "add $dst, pc, $rhs * 4", []>; + +def tADDrSPi : TI<(outs tGPR:$dst), (ins GPR:$sp, i32imm:$rhs), + "add $dst, $sp, $rhs * 4 @ addrspi", []>; + +def tADDspi : TIt<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), + "add $dst, $rhs * 4", []>; + +def tAND : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), + "and $dst, $rhs", + [(set tGPR:$dst, (and tGPR:$lhs, tGPR:$rhs))]>; + +def tASRri : TI<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), + "asr $dst, $lhs, $rhs", + [(set tGPR:$dst, (sra tGPR:$lhs, imm:$rhs))]>; + +def tASRrr : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), + "asr $dst, $rhs", + [(set tGPR:$dst, (sra tGPR:$lhs, tGPR:$rhs))]>; + +def tBIC : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), + "bic $dst, $rhs", + [(set tGPR:$dst, (and tGPR:$lhs, (not tGPR:$rhs)))]>; + + +def tCMN : TI<(outs), (ins tGPR:$lhs, tGPR:$rhs), + "cmn $lhs, $rhs", + [(ARMcmp tGPR:$lhs, (ineg tGPR:$rhs))]>; + +def tCMPi8 : TI<(outs), (ins tGPR:$lhs, i32imm:$rhs), + "cmp $lhs, $rhs", + [(ARMcmp tGPR:$lhs, imm0_255:$rhs)]>; + +def tCMPr : TI<(outs), (ins tGPR:$lhs, tGPR:$rhs), + "cmp $lhs, $rhs", + [(ARMcmp tGPR:$lhs, tGPR:$rhs)]>; + +def tTST : TI<(outs), (ins tGPR:$lhs, tGPR:$rhs), + "tst $lhs, $rhs", + [(ARMcmpNZ (and tGPR:$lhs, tGPR:$rhs), 0)]>; + +def tCMNNZ : TI<(outs), (ins tGPR:$lhs, tGPR:$rhs), + "cmn $lhs, $rhs", + [(ARMcmpNZ tGPR:$lhs, (ineg tGPR:$rhs))]>; + +def tCMPNZi8 : TI<(outs), (ins tGPR:$lhs, i32imm:$rhs), + "cmp $lhs, $rhs", + [(ARMcmpNZ tGPR:$lhs, imm0_255:$rhs)]>; + +def tCMPNZr : TI<(outs), (ins tGPR:$lhs, tGPR:$rhs), + "cmp $lhs, $rhs", + [(ARMcmpNZ tGPR:$lhs, tGPR:$rhs)]>; + +// TODO: A7-37: CMP(3) - cmp hi regs + +def tEOR : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), + "eor $dst, $rhs", + [(set tGPR:$dst, (xor tGPR:$lhs, tGPR:$rhs))]>; + +def tLSLri : TI<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), + "lsl $dst, $lhs, $rhs", + [(set tGPR:$dst, (shl tGPR:$lhs, imm:$rhs))]>; + +def tLSLrr : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), + "lsl $dst, $rhs", + [(set tGPR:$dst, (shl tGPR:$lhs, tGPR:$rhs))]>; + +def tLSRri : TI<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), + "lsr $dst, $lhs, $rhs", + [(set tGPR:$dst, (srl tGPR:$lhs, imm:$rhs))]>; + +def tLSRrr : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), + "lsr $dst, $rhs", + [(set tGPR:$dst, (srl tGPR:$lhs, tGPR:$rhs))]>; + +// FIXME: This is not rematerializable because mov changes the condition code. +def tMOVi8 : TI<(outs tGPR:$dst), (ins i32imm:$src), + "mov $dst, $src", + [(set tGPR:$dst, imm0_255:$src)]>; + +// TODO: A7-73: MOV(2) - mov setting flag. + + +// Note: MOV(2) of two low regs updates the flags, so we emit this as 'cpy', +// which is MOV(3). This also supports high registers. +def tMOVr : TI<(outs tGPR:$dst), (ins tGPR:$src), + "cpy $dst, $src", []>; +def tMOVhir2lor : TI<(outs tGPR:$dst), (ins GPR:$src), + "cpy $dst, $src\t@ hir2lor", []>; +def tMOVlor2hir : TI<(outs GPR:$dst), (ins tGPR:$src), + "cpy $dst, $src\t@ lor2hir", []>; +def tMOVhir2hir : TI<(outs GPR:$dst), (ins GPR:$src), + "cpy $dst, $src\t@ hir2hir", []>; + +def tMUL : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), + "mul $dst, $rhs", + [(set tGPR:$dst, (mul tGPR:$lhs, tGPR:$rhs))]>; + +def tMVN : TI<(outs tGPR:$dst), (ins tGPR:$src), + "mvn $dst, $src", + [(set tGPR:$dst, (not tGPR:$src))]>; + +def tNEG : TI<(outs tGPR:$dst), (ins tGPR:$src), + "neg $dst, $src", + [(set tGPR:$dst, (ineg tGPR:$src))]>; + +def tORR : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), + "orr $dst, $rhs", + [(set tGPR:$dst, (or tGPR:$lhs, tGPR:$rhs))]>; + + +def tREV : TI<(outs tGPR:$dst), (ins tGPR:$src), + "rev $dst, $src", + [(set tGPR:$dst, (bswap tGPR:$src))]>, + Requires<[IsThumb, HasV6]>; + +def tREV16 : TI<(outs tGPR:$dst), (ins tGPR:$src), + "rev16 $dst, $src", + [(set tGPR:$dst, + (or (and (srl tGPR:$src, 8), 0xFF), + (or (and (shl tGPR:$src, 8), 0xFF00), + (or (and (srl tGPR:$src, 8), 0xFF0000), + (and (shl tGPR:$src, 8), 0xFF000000)))))]>, + Requires<[IsThumb, HasV6]>; + +def tREVSH : TI<(outs tGPR:$dst), (ins tGPR:$src), + "revsh $dst, $src", + [(set tGPR:$dst, + (sext_inreg + (or (srl (and tGPR:$src, 0xFFFF), 8), + (shl tGPR:$src, 8)), i16))]>, + Requires<[IsThumb, HasV6]>; + +def tROR : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), + "ror $dst, $rhs", + [(set tGPR:$dst, (rotr tGPR:$lhs, tGPR:$rhs))]>; + + +// Subtract with carry +def tSBC : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), + "sbc $dst, $rhs", + [(set tGPR:$dst, (sube tGPR:$lhs, tGPR:$rhs))]>; + +def tSUBS : TI<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), + "sub $dst, $lhs, $rhs", + [(set tGPR:$dst, (subc tGPR:$lhs, tGPR:$rhs))]>; + + +// TODO: A7-96: STMIA - store multiple. + +def tSUBi3 : TI<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), + "sub $dst, $lhs, $rhs", + [(set tGPR:$dst, (add tGPR:$lhs, imm0_7_neg:$rhs))]>; + +def tSUBi8 : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), + "sub $dst, $rhs", + [(set tGPR:$dst, (add tGPR:$lhs, imm8_255_neg:$rhs))]>; + +def tSUBrr : TI<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), + "sub $dst, $lhs, $rhs", + [(set tGPR:$dst, (sub tGPR:$lhs, tGPR:$rhs))]>; + +def tSUBspi : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), + "sub $dst, $rhs * 4", []>; + +def tSXTB : TI<(outs tGPR:$dst), (ins tGPR:$src), + "sxtb $dst, $src", + [(set tGPR:$dst, (sext_inreg tGPR:$src, i8))]>, + Requires<[IsThumb, HasV6]>; +def tSXTH : TI<(outs tGPR:$dst), (ins tGPR:$src), + "sxth $dst, $src", + [(set tGPR:$dst, (sext_inreg tGPR:$src, i16))]>, + Requires<[IsThumb, HasV6]>; + + +def tUXTB : TI<(outs tGPR:$dst), (ins tGPR:$src), + "uxtb $dst, $src", + [(set tGPR:$dst, (and tGPR:$src, 0xFF))]>, + Requires<[IsThumb, HasV6]>; +def tUXTH : TI<(outs tGPR:$dst), (ins tGPR:$src), + "uxth $dst, $src", + [(set tGPR:$dst, (and tGPR:$src, 0xFFFF))]>, + Requires<[IsThumb, HasV6]>; + + +// Conditional move tMOVCCr - Used to implement the Thumb SELECT_CC DAG operation. +// Expanded by the scheduler into a branch sequence. +let usesCustomDAGSchedInserter = 1 in // Expanded by the scheduler. + def tMOVCCr : + PseudoInst<(outs tGPR:$dst), (ins tGPR:$false, tGPR:$true, pred:$cc), + "@ tMOVCCr $cc", + [/*(set tGPR:$dst, (ARMcmov tGPR:$false, tGPR:$true, imm:$cc))*/]>; + +// tLEApcrel - Load a pc-relative address into a register without offending the +// assembler. +def tLEApcrel : TIx2<(outs tGPR:$dst), (ins i32imm:$label), + !strconcat(!strconcat(".set PCRELV${:uid}, ($label-(", + "${:private}PCRELL${:uid}+4))\n"), + !strconcat("\tmov $dst, #PCRELV${:uid}\n", + "${:private}PCRELL${:uid}:\n\tadd $dst, pc")), + []>; + +def tLEApcrelJT : TIx2<(outs tGPR:$dst), (ins i32imm:$label, i32imm:$id), + !strconcat(!strconcat(".set PCRELV${:uid}, (${label}_${id:no_hash}-(", + "${:private}PCRELL${:uid}+4))\n"), + !strconcat("\tmov $dst, #PCRELV${:uid}\n", + "${:private}PCRELL${:uid}:\n\tadd $dst, pc")), + []>; + +//===----------------------------------------------------------------------===// +// TLS Instructions +// + +// __aeabi_read_tp preserves the registers r1-r3. +let isCall = 1, + Defs = [R0, LR] in { + def tTPsoft : TIx2<(outs), (ins), + "bl __aeabi_read_tp", + [(set R0, ARMthread_pointer)]>; +} + +//===----------------------------------------------------------------------===// +// Non-Instruction Patterns +// + +// ConstantPool, GlobalAddress +def : ThumbPat<(ARMWrapper tglobaladdr :$dst), (tLEApcrel tglobaladdr :$dst)>; +def : ThumbPat<(ARMWrapper tconstpool :$dst), (tLEApcrel tconstpool :$dst)>; + +// JumpTable +def : ThumbPat<(ARMWrapperJT tjumptable:$dst, imm:$id), + (tLEApcrelJT tjumptable:$dst, imm:$id)>; + +// Direct calls +def : ThumbPat<(ARMtcall texternalsym:$func), (tBL texternalsym:$func)>; +def : ThumbV5Pat<(ARMcall texternalsym:$func), (tBLXi texternalsym:$func)>; + +// Indirect calls to ARM routines +def : ThumbV5Pat<(ARMcall tGPR:$dst), (tBLXr tGPR:$dst)>; + +// zextload i1 -> zextload i8 +def : ThumbPat<(zextloadi1 t_addrmode_s1:$addr), + (tLDRB t_addrmode_s1:$addr)>; + +// extload -> zextload +def : ThumbPat<(extloadi1 t_addrmode_s1:$addr), (tLDRB t_addrmode_s1:$addr)>; +def : ThumbPat<(extloadi8 t_addrmode_s1:$addr), (tLDRB t_addrmode_s1:$addr)>; +def : ThumbPat<(extloadi16 t_addrmode_s2:$addr), (tLDRH t_addrmode_s2:$addr)>; + +// Large immediate handling. + +// Two piece imms. +def : ThumbPat<(i32 thumb_immshifted:$src), + (tLSLri (tMOVi8 (thumb_immshifted_val imm:$src)), + (thumb_immshifted_shamt imm:$src))>; + +def : ThumbPat<(i32 imm0_255_comp:$src), + (tMVN (tMOVi8 (imm_comp_XFORM imm:$src)))>; diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td new file mode 100644 index 000000000000..168fb45f11ea --- /dev/null +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -0,0 +1,12 @@ +//===- ARMInstrThumb2.td - Thumb2 support for ARM -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the Thumb2 instruction set. +// +//===----------------------------------------------------------------------===// diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td new file mode 100644 index 000000000000..0247dafd07df --- /dev/null +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -0,0 +1,398 @@ +//===- ARMInstrVFP.td - VFP support for ARM -------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the ARM VFP instruction set. +// +//===----------------------------------------------------------------------===// + +def SDT_FTOI : +SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisFP<1>]>; +def SDT_ITOF : +SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVT<1, f32>]>; +def SDT_CMPFP0 : +SDTypeProfile<0, 1, [SDTCisFP<0>]>; +def SDT_FMDRR : +SDTypeProfile<1, 2, [SDTCisVT<0, f64>, SDTCisVT<1, i32>, + SDTCisSameAs<1, 2>]>; + +def arm_ftoui : SDNode<"ARMISD::FTOUI", SDT_FTOI>; +def arm_ftosi : SDNode<"ARMISD::FTOSI", SDT_FTOI>; +def arm_sitof : SDNode<"ARMISD::SITOF", SDT_ITOF>; +def arm_uitof : SDNode<"ARMISD::UITOF", SDT_ITOF>; +def arm_fmstat : SDNode<"ARMISD::FMSTAT", SDTNone, [SDNPInFlag,SDNPOutFlag]>; +def arm_cmpfp : SDNode<"ARMISD::CMPFP", SDT_ARMCmp, [SDNPOutFlag]>; +def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0",SDT_CMPFP0, [SDNPOutFlag]>; +def arm_fmdrr : SDNode<"ARMISD::FMDRR", SDT_FMDRR>; + +//===----------------------------------------------------------------------===// +// Load / store Instructions. +// + +let canFoldAsLoad = 1 in { +def FLDD : ADI5<0b1101, 0b01, (outs DPR:$dst), (ins addrmode5:$addr), + "fldd", " $dst, $addr", + [(set DPR:$dst, (load addrmode5:$addr))]>; + +def FLDS : ASI5<0b1101, 0b01, (outs SPR:$dst), (ins addrmode5:$addr), + "flds", " $dst, $addr", + [(set SPR:$dst, (load addrmode5:$addr))]>; +} // canFoldAsLoad + +def FSTD : ADI5<0b1101, 0b00, (outs), (ins DPR:$src, addrmode5:$addr), + "fstd", " $src, $addr", + [(store DPR:$src, addrmode5:$addr)]>; + +def FSTS : ASI5<0b1101, 0b00, (outs), (ins SPR:$src, addrmode5:$addr), + "fsts", " $src, $addr", + [(store SPR:$src, addrmode5:$addr)]>; + +//===----------------------------------------------------------------------===// +// Load / store multiple Instructions. +// + +let mayLoad = 1 in { +def FLDMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$dst1, + variable_ops), + "fldm${addr:submode}d${p} ${addr:base}, $dst1", + []> { + let Inst{20} = 1; +} + +def FLDMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$dst1, + variable_ops), + "fldm${addr:submode}s${p} ${addr:base}, $dst1", + []> { + let Inst{20} = 1; +} +} + +let mayStore = 1 in { +def FSTMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$src1, + variable_ops), + "fstm${addr:submode}d${p} ${addr:base}, $src1", + []> { + let Inst{20} = 0; +} + +def FSTMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$src1, + variable_ops), + "fstm${addr:submode}s${p} ${addr:base}, $src1", + []> { + let Inst{20} = 0; +} +} // mayStore + +// FLDMX, FSTMX - mixing S/D registers for pre-armv6 cores + +//===----------------------------------------------------------------------===// +// FP Binary Operations. +// + +def FADDD : ADbI<0b11100011, (outs DPR:$dst), (ins DPR:$a, DPR:$b), + "faddd", " $dst, $a, $b", + [(set DPR:$dst, (fadd DPR:$a, DPR:$b))]>; + +def FADDS : ASbI<0b11100011, (outs SPR:$dst), (ins SPR:$a, SPR:$b), + "fadds", " $dst, $a, $b", + [(set SPR:$dst, (fadd SPR:$a, SPR:$b))]>; + +// These are encoded as unary instructions. +def FCMPED : ADuI<0b11101011, 0b0100, 0b1100, (outs), (ins DPR:$a, DPR:$b), + "fcmped", " $a, $b", + [(arm_cmpfp DPR:$a, DPR:$b)]>; + +def FCMPES : ASuI<0b11101011, 0b0100, 0b1100, (outs), (ins SPR:$a, SPR:$b), + "fcmpes", " $a, $b", + [(arm_cmpfp SPR:$a, SPR:$b)]>; + +def FDIVD : ADbI<0b11101000, (outs DPR:$dst), (ins DPR:$a, DPR:$b), + "fdivd", " $dst, $a, $b", + [(set DPR:$dst, (fdiv DPR:$a, DPR:$b))]>; + +def FDIVS : ASbI<0b11101000, (outs SPR:$dst), (ins SPR:$a, SPR:$b), + "fdivs", " $dst, $a, $b", + [(set SPR:$dst, (fdiv SPR:$a, SPR:$b))]>; + +def FMULD : ADbI<0b11100010, (outs DPR:$dst), (ins DPR:$a, DPR:$b), + "fmuld", " $dst, $a, $b", + [(set DPR:$dst, (fmul DPR:$a, DPR:$b))]>; + +def FMULS : ASbI<0b11100010, (outs SPR:$dst), (ins SPR:$a, SPR:$b), + "fmuls", " $dst, $a, $b", + [(set SPR:$dst, (fmul SPR:$a, SPR:$b))]>; + +def FNMULD : ADbI<0b11100010, (outs DPR:$dst), (ins DPR:$a, DPR:$b), + "fnmuld", " $dst, $a, $b", + [(set DPR:$dst, (fneg (fmul DPR:$a, DPR:$b)))]> { + let Inst{6} = 1; +} + +def FNMULS : ASbI<0b11100010, (outs SPR:$dst), (ins SPR:$a, SPR:$b), + "fnmuls", " $dst, $a, $b", + [(set SPR:$dst, (fneg (fmul SPR:$a, SPR:$b)))]> { + let Inst{6} = 1; +} + +// Match reassociated forms only if not sign dependent rounding. +def : Pat<(fmul (fneg DPR:$a), DPR:$b), + (FNMULD DPR:$a, DPR:$b)>, Requires<[NoHonorSignDependentRounding]>; +def : Pat<(fmul (fneg SPR:$a), SPR:$b), + (FNMULS SPR:$a, SPR:$b)>, Requires<[NoHonorSignDependentRounding]>; + + +def FSUBD : ADbI<0b11100011, (outs DPR:$dst), (ins DPR:$a, DPR:$b), + "fsubd", " $dst, $a, $b", + [(set DPR:$dst, (fsub DPR:$a, DPR:$b))]> { + let Inst{6} = 1; +} + +def FSUBS : ASbI<0b11100011, (outs SPR:$dst), (ins SPR:$a, SPR:$b), + "fsubs", " $dst, $a, $b", + [(set SPR:$dst, (fsub SPR:$a, SPR:$b))]> { + let Inst{6} = 1; +} + +//===----------------------------------------------------------------------===// +// FP Unary Operations. +// + +def FABSD : ADuI<0b11101011, 0b0000, 0b1100, (outs DPR:$dst), (ins DPR:$a), + "fabsd", " $dst, $a", + [(set DPR:$dst, (fabs DPR:$a))]>; + +def FABSS : ASuI<0b11101011, 0b0000, 0b1100, (outs SPR:$dst), (ins SPR:$a), + "fabss", " $dst, $a", + [(set SPR:$dst, (fabs SPR:$a))]>; + +def FCMPEZD : ADuI<0b11101011, 0b0101, 0b1100, (outs), (ins DPR:$a), + "fcmpezd", " $a", + [(arm_cmpfp0 DPR:$a)]>; + +def FCMPEZS : ASuI<0b11101011, 0b0101, 0b1100, (outs), (ins SPR:$a), + "fcmpezs", " $a", + [(arm_cmpfp0 SPR:$a)]>; + +def FCVTDS : ASuI<0b11101011, 0b0111, 0b1100, (outs DPR:$dst), (ins SPR:$a), + "fcvtds", " $dst, $a", + [(set DPR:$dst, (fextend SPR:$a))]>; + +// Special case encoding: bits 11-8 is 0b1011. +def FCVTSD : AI<(outs SPR:$dst), (ins DPR:$a), VFPUnaryFrm, + "fcvtsd", " $dst, $a", + [(set SPR:$dst, (fround DPR:$a))]> { + let Inst{27-23} = 0b11101; + let Inst{21-16} = 0b110111; + let Inst{11-8} = 0b1011; + let Inst{7-4} = 0b1100; +} + +def FCPYD : ADuI<0b11101011, 0b0000, 0b0100, (outs DPR:$dst), (ins DPR:$a), + "fcpyd", " $dst, $a", []>; + +def FCPYS : ASuI<0b11101011, 0b0000, 0b0100, (outs SPR:$dst), (ins SPR:$a), + "fcpys", " $dst, $a", []>; + +def FNEGD : ADuI<0b11101011, 0b0001, 0b0100, (outs DPR:$dst), (ins DPR:$a), + "fnegd", " $dst, $a", + [(set DPR:$dst, (fneg DPR:$a))]>; + +def FNEGS : ASuI<0b11101011, 0b0001, 0b0100, (outs SPR:$dst), (ins SPR:$a), + "fnegs", " $dst, $a", + [(set SPR:$dst, (fneg SPR:$a))]>; + +def FSQRTD : ADuI<0b11101011, 0b0001, 0b1100, (outs DPR:$dst), (ins DPR:$a), + "fsqrtd", " $dst, $a", + [(set DPR:$dst, (fsqrt DPR:$a))]>; + +def FSQRTS : ASuI<0b11101011, 0b0001, 0b1100, (outs SPR:$dst), (ins SPR:$a), + "fsqrts", " $dst, $a", + [(set SPR:$dst, (fsqrt SPR:$a))]>; + +//===----------------------------------------------------------------------===// +// FP <-> GPR Copies. Int <-> FP Conversions. +// + +def FMRS : AVConv2I<0b11100001, 0b1010, (outs GPR:$dst), (ins SPR:$src), + "fmrs", " $dst, $src", + [(set GPR:$dst, (bitconvert SPR:$src))]>; + +def FMSR : AVConv4I<0b11100000, 0b1010, (outs SPR:$dst), (ins GPR:$src), + "fmsr", " $dst, $src", + [(set SPR:$dst, (bitconvert GPR:$src))]>; + +def FMRRD : AVConv3I<0b11000101, 0b1011, + (outs GPR:$dst1, GPR:$dst2), (ins DPR:$src), + "fmrrd", " $dst1, $dst2, $src", + [/* FIXME: Can't write pattern for multiple result instr*/]>; + +// FMDHR: GPR -> SPR +// FMDLR: GPR -> SPR + +def FMDRR : AVConv5I<0b11000100, 0b1011, + (outs DPR:$dst), (ins GPR:$src1, GPR:$src2), + "fmdrr", " $dst, $src1, $src2", + [(set DPR:$dst, (arm_fmdrr GPR:$src1, GPR:$src2))]>; + +// FMRDH: SPR -> GPR +// FMRDL: SPR -> GPR +// FMRRS: SPR -> GPR +// FMRX : SPR system reg -> GPR + +// FMSRR: GPR -> SPR + +// FMXR: GPR -> VFP Sstem reg + + +// Int to FP: + +def FSITOD : AVConv1I<0b11101011, 0b1000, 0b1011, (outs DPR:$dst), (ins SPR:$a), + "fsitod", " $dst, $a", + [(set DPR:$dst, (arm_sitof SPR:$a))]> { + let Inst{7} = 1; +} + +def FSITOS : AVConv1I<0b11101011, 0b1000, 0b1010, (outs SPR:$dst), (ins SPR:$a), + "fsitos", " $dst, $a", + [(set SPR:$dst, (arm_sitof SPR:$a))]> { + let Inst{7} = 1; +} + +def FUITOD : AVConv1I<0b11101011, 0b1000, 0b1011, (outs DPR:$dst), (ins SPR:$a), + "fuitod", " $dst, $a", + [(set DPR:$dst, (arm_uitof SPR:$a))]>; + +def FUITOS : AVConv1I<0b11101011, 0b1000, 0b1010, (outs SPR:$dst), (ins SPR:$a), + "fuitos", " $dst, $a", + [(set SPR:$dst, (arm_uitof SPR:$a))]>; + +// FP to Int: +// Always set Z bit in the instruction, i.e. "round towards zero" variants. + +def FTOSIZD : AVConv1I<0b11101011, 0b1101, 0b1011, + (outs SPR:$dst), (ins DPR:$a), + "ftosizd", " $dst, $a", + [(set SPR:$dst, (arm_ftosi DPR:$a))]> { + let Inst{7} = 1; // Z bit +} + +def FTOSIZS : AVConv1I<0b11101011, 0b1101, 0b1010, + (outs SPR:$dst), (ins SPR:$a), + "ftosizs", " $dst, $a", + [(set SPR:$dst, (arm_ftosi SPR:$a))]> { + let Inst{7} = 1; // Z bit +} + +def FTOUIZD : AVConv1I<0b11101011, 0b1100, 0b1011, + (outs SPR:$dst), (ins DPR:$a), + "ftouizd", " $dst, $a", + [(set SPR:$dst, (arm_ftoui DPR:$a))]> { + let Inst{7} = 1; // Z bit +} + +def FTOUIZS : AVConv1I<0b11101011, 0b1100, 0b1010, + (outs SPR:$dst), (ins SPR:$a), + "ftouizs", " $dst, $a", + [(set SPR:$dst, (arm_ftoui SPR:$a))]> { + let Inst{7} = 1; // Z bit +} + +//===----------------------------------------------------------------------===// +// FP FMA Operations. +// + +def FMACD : ADbI<0b11100000, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b), + "fmacd", " $dst, $a, $b", + [(set DPR:$dst, (fadd (fmul DPR:$a, DPR:$b), DPR:$dstin))]>, + RegConstraint<"$dstin = $dst">; + +def FMACS : ASbI<0b11100000, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b), + "fmacs", " $dst, $a, $b", + [(set SPR:$dst, (fadd (fmul SPR:$a, SPR:$b), SPR:$dstin))]>, + RegConstraint<"$dstin = $dst">; + +def FMSCD : ADbI<0b11100001, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b), + "fmscd", " $dst, $a, $b", + [(set DPR:$dst, (fsub (fmul DPR:$a, DPR:$b), DPR:$dstin))]>, + RegConstraint<"$dstin = $dst">; + +def FMSCS : ASbI<0b11100001, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b), + "fmscs", " $dst, $a, $b", + [(set SPR:$dst, (fsub (fmul SPR:$a, SPR:$b), SPR:$dstin))]>, + RegConstraint<"$dstin = $dst">; + +def FNMACD : ADbI<0b11100000, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b), + "fnmacd", " $dst, $a, $b", + [(set DPR:$dst, (fadd (fneg (fmul DPR:$a, DPR:$b)), DPR:$dstin))]>, + RegConstraint<"$dstin = $dst"> { + let Inst{6} = 1; +} + +def FNMACS : ASbI<0b11100000, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b), + "fnmacs", " $dst, $a, $b", + [(set SPR:$dst, (fadd (fneg (fmul SPR:$a, SPR:$b)), SPR:$dstin))]>, + RegConstraint<"$dstin = $dst"> { + let Inst{6} = 1; +} + +def FNMSCD : ADbI<0b11100001, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b), + "fnmscd", " $dst, $a, $b", + [(set DPR:$dst, (fsub (fneg (fmul DPR:$a, DPR:$b)), DPR:$dstin))]>, + RegConstraint<"$dstin = $dst"> { + let Inst{6} = 1; +} + +def FNMSCS : ASbI<0b11100001, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b), + "fnmscs", " $dst, $a, $b", + [(set SPR:$dst, (fsub (fneg (fmul SPR:$a, SPR:$b)), SPR:$dstin))]>, + RegConstraint<"$dstin = $dst"> { + let Inst{6} = 1; +} + +//===----------------------------------------------------------------------===// +// FP Conditional moves. +// + +def FCPYDcc : ADuI<0b11101011, 0b0000, 0b0100, + (outs DPR:$dst), (ins DPR:$false, DPR:$true), + "fcpyd", " $dst, $true", + [/*(set DPR:$dst, (ARMcmov DPR:$false, DPR:$true, imm:$cc))*/]>, + RegConstraint<"$false = $dst">; + +def FCPYScc : ASuI<0b11101011, 0b0000, 0b0100, + (outs SPR:$dst), (ins SPR:$false, SPR:$true), + "fcpys", " $dst, $true", + [/*(set SPR:$dst, (ARMcmov SPR:$false, SPR:$true, imm:$cc))*/]>, + RegConstraint<"$false = $dst">; + +def FNEGDcc : ADuI<0b11101011, 0b0001, 0b0100, + (outs DPR:$dst), (ins DPR:$false, DPR:$true), + "fnegd", " $dst, $true", + [/*(set DPR:$dst, (ARMcneg DPR:$false, DPR:$true, imm:$cc))*/]>, + RegConstraint<"$false = $dst">; + +def FNEGScc : ASuI<0b11101011, 0b0001, 0b0100, + (outs SPR:$dst), (ins SPR:$false, SPR:$true), + "fnegs", " $dst, $true", + [/*(set SPR:$dst, (ARMcneg SPR:$false, SPR:$true, imm:$cc))*/]>, + RegConstraint<"$false = $dst">; + + +//===----------------------------------------------------------------------===// +// Misc. +// + +let Defs = [CPSR] in +def FMSTAT : AI<(outs), (ins), VFPMiscFrm, "fmstat", "", [(arm_fmstat)]> { + let Inst{27-20} = 0b11101111; + let Inst{19-16} = 0b0001; + let Inst{15-12} = 0b1111; + let Inst{11-8} = 0b1010; + let Inst{7} = 0; + let Inst{4} = 1; +} diff --git a/lib/Target/ARM/ARMJITInfo.cpp b/lib/Target/ARM/ARMJITInfo.cpp new file mode 100644 index 000000000000..e551c41936f8 --- /dev/null +++ b/lib/Target/ARM/ARMJITInfo.cpp @@ -0,0 +1,298 @@ +//===-- ARMJITInfo.cpp - Implement the JIT interfaces for the ARM target --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the JIT interfaces for the ARM target. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "jit" +#include "ARMJITInfo.h" +#include "ARMInstrInfo.h" +#include "ARMConstantPoolValue.h" +#include "ARMRelocations.h" +#include "ARMSubtarget.h" +#include "llvm/Function.h" +#include "llvm/CodeGen/JITCodeEmitter.h" +#include "llvm/Config/alloca.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Streams.h" +#include "llvm/System/Memory.h" +#include +using namespace llvm; + +void ARMJITInfo::replaceMachineCodeForFunction(void *Old, void *New) { + abort(); +} + +/// JITCompilerFunction - This contains the address of the JIT function used to +/// compile a function lazily. +static TargetJITInfo::JITCompilerFn JITCompilerFunction; + +// Get the ASMPREFIX for the current host. This is often '_'. +#ifndef __USER_LABEL_PREFIX__ +#define __USER_LABEL_PREFIX__ +#endif +#define GETASMPREFIX2(X) #X +#define GETASMPREFIX(X) GETASMPREFIX2(X) +#define ASMPREFIX GETASMPREFIX(__USER_LABEL_PREFIX__) + +// CompilationCallback stub - We can't use a C function with inline assembly in +// it, because we the prolog/epilog inserted by GCC won't work for us (we need +// to preserve more context and manipulate the stack directly). Instead, +// write our own wrapper, which does things our way, so we have complete +// control over register saving and restoring. +extern "C" { +#if defined(__arm__) + void ARMCompilationCallback(void); + asm( + ".text\n" + ".align 2\n" + ".globl " ASMPREFIX "ARMCompilationCallback\n" + ASMPREFIX "ARMCompilationCallback:\n" + // Save caller saved registers since they may contain stuff + // for the real target function right now. We have to act as if this + // whole compilation callback doesn't exist as far as the caller is + // concerned, so we can't just preserve the callee saved regs. + "stmdb sp!, {r0, r1, r2, r3, lr}\n" +#ifndef __SOFTFP__ + "fstmfdd sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n" +#endif + // The LR contains the address of the stub function on entry. + // pass it as the argument to the C part of the callback + "mov r0, lr\n" + "sub sp, sp, #4\n" + // Call the C portion of the callback + "bl " ASMPREFIX "ARMCompilationCallbackC\n" + "add sp, sp, #4\n" + // Restoring the LR to the return address of the function that invoked + // the stub and de-allocating the stack space for it requires us to + // swap the two saved LR values on the stack, as they're backwards + // for what we need since the pop instruction has a pre-determined + // order for the registers. + // +--------+ + // 0 | LR | Original return address + // +--------+ + // 1 | LR | Stub address (start of stub) + // 2-5 | R3..R0 | Saved registers (we need to preserve all regs) + // 6-20 | D0..D7 | Saved VFP registers + // +--------+ + // +#ifndef __SOFTFP__ + // Restore VFP caller-saved registers. + "fldmfdd sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n" +#endif + // + // We need to exchange the values in slots 0 and 1 so we can + // return to the address in slot 1 with the address in slot 0 + // restored to the LR. + "ldr r0, [sp,#20]\n" + "ldr r1, [sp,#16]\n" + "str r1, [sp,#20]\n" + "str r0, [sp,#16]\n" + // Return to the (newly modified) stub to invoke the real function. + // The above twiddling of the saved return addresses allows us to + // deallocate everything, including the LR the stub saved, all in one + // pop instruction. + "ldmia sp!, {r0, r1, r2, r3, lr, pc}\n" + ); +#else // Not an ARM host + void ARMCompilationCallback() { + assert(0 && "Cannot call ARMCompilationCallback() on a non-ARM arch!\n"); + abort(); + } +#endif +} + +/// ARMCompilationCallbackC - This is the target-specific function invoked +/// by the function stub when we did not know the real target of a call. +/// This function must locate the start of the stub or call site and pass +/// it into the JIT compiler function. +extern "C" void ARMCompilationCallbackC(intptr_t StubAddr) { + // Get the address of the compiled code for this function. + intptr_t NewVal = (intptr_t)JITCompilerFunction((void*)StubAddr); + + // Rewrite the call target... so that we don't end up here every time we + // execute the call. We're replacing the first two instructions of the + // stub with: + // ldr pc, [pc,#-4] + // + if (!sys::Memory::setRangeWritable((void*)StubAddr, 8)) { + cerr << "ERROR: Unable to mark stub writable\n"; + abort(); + } + *(intptr_t *)StubAddr = 0xe51ff004; // ldr pc, [pc, #-4] + *(intptr_t *)(StubAddr+4) = NewVal; + if (!sys::Memory::setRangeExecutable((void*)StubAddr, 8)) { + cerr << "ERROR: Unable to mark stub executable\n"; + abort(); + } +} + +TargetJITInfo::LazyResolverFn +ARMJITInfo::getLazyResolverFunction(JITCompilerFn F) { + JITCompilerFunction = F; + return ARMCompilationCallback; +} + +void *ARMJITInfo::emitGlobalValueIndirectSym(const GlobalValue *GV, void *Ptr, + JITCodeEmitter &JCE) { + JCE.startGVStub(GV, 4, 4); + JCE.emitWordLE((intptr_t)Ptr); + void *PtrAddr = JCE.finishGVStub(GV); + addIndirectSymAddr(Ptr, (intptr_t)PtrAddr); + return PtrAddr; +} + +void *ARMJITInfo::emitFunctionStub(const Function* F, void *Fn, + JITCodeEmitter &JCE) { + // If this is just a call to an external function, emit a branch instead of a + // call. The code is the same except for one bit of the last instruction. + if (Fn != (void*)(intptr_t)ARMCompilationCallback) { + // Branch to the corresponding function addr. + if (IsPIC) { + // The stub is 8-byte size and 4-aligned. + intptr_t LazyPtr = getIndirectSymAddr(Fn); + if (!LazyPtr) { + // In PIC mode, the function stub is loading a lazy-ptr. + LazyPtr= (intptr_t)emitGlobalValueIndirectSym((GlobalValue*)F, Fn, JCE); + if (F) + DOUT << "JIT: Indirect symbol emitted at [" << LazyPtr << "] for GV '" + << F->getName() << "'\n"; + else + DOUT << "JIT: Stub emitted at [" << LazyPtr + << "] for external function at '" << Fn << "'\n"; + } + JCE.startGVStub(F, 16, 4); + intptr_t Addr = (intptr_t)JCE.getCurrentPCValue(); + JCE.emitWordLE(0xe59fc004); // ldr pc, [pc, #+4] + JCE.emitWordLE(0xe08fc00c); // L_func$scv: add ip, pc, ip + JCE.emitWordLE(0xe59cf000); // ldr pc, [ip] + JCE.emitWordLE(LazyPtr - (Addr+4+8)); // func - (L_func$scv+8) + sys::Memory::InvalidateInstructionCache((void*)Addr, 16); + } else { + // The stub is 8-byte size and 4-aligned. + JCE.startGVStub(F, 8, 4); + intptr_t Addr = (intptr_t)JCE.getCurrentPCValue(); + JCE.emitWordLE(0xe51ff004); // ldr pc, [pc, #-4] + JCE.emitWordLE((intptr_t)Fn); // addr of function + sys::Memory::InvalidateInstructionCache((void*)Addr, 8); + } + } else { + // The compilation callback will overwrite the first two words of this + // stub with indirect branch instructions targeting the compiled code. + // This stub sets the return address to restart the stub, so that + // the new branch will be invoked when we come back. + // + // Branch and link to the compilation callback. + // The stub is 16-byte size and 4-byte aligned. + JCE.startGVStub(F, 16, 4); + intptr_t Addr = (intptr_t)JCE.getCurrentPCValue(); + // Save LR so the callback can determine which stub called it. + // The compilation callback is responsible for popping this prior + // to returning. + JCE.emitWordLE(0xe92d4000); // push {lr} + // Set the return address to go back to the start of this stub. + JCE.emitWordLE(0xe24fe00c); // sub lr, pc, #12 + // Invoke the compilation callback. + JCE.emitWordLE(0xe51ff004); // ldr pc, [pc, #-4] + // The address of the compilation callback. + JCE.emitWordLE((intptr_t)ARMCompilationCallback); + sys::Memory::InvalidateInstructionCache((void*)Addr, 16); + } + + return JCE.finishGVStub(F); +} + +intptr_t ARMJITInfo::resolveRelocDestAddr(MachineRelocation *MR) const { + ARM::RelocationType RT = (ARM::RelocationType)MR->getRelocationType(); + switch (RT) { + default: + return (intptr_t)(MR->getResultPointer()); + case ARM::reloc_arm_pic_jt: + // Destination address - jump table base. + return (intptr_t)(MR->getResultPointer()) - MR->getConstantVal(); + case ARM::reloc_arm_jt_base: + // Jump table base address. + return getJumpTableBaseAddr(MR->getJumpTableIndex()); + case ARM::reloc_arm_cp_entry: + case ARM::reloc_arm_vfp_cp_entry: + // Constant pool entry address. + return getConstantPoolEntryAddr(MR->getConstantPoolIndex()); + case ARM::reloc_arm_machine_cp_entry: { + ARMConstantPoolValue *ACPV = (ARMConstantPoolValue*)MR->getConstantVal(); + assert((!ACPV->hasModifier() && !ACPV->mustAddCurrentAddress()) && + "Can't handle this machine constant pool entry yet!"); + intptr_t Addr = (intptr_t)(MR->getResultPointer()); + Addr -= getPCLabelAddr(ACPV->getLabelId()) + ACPV->getPCAdjustment(); + return Addr; + } + } +} + +/// relocate - Before the JIT can run a block of code that has been emitted, +/// it must rewrite the code to contain the actual addresses of any +/// referenced global symbols. +void ARMJITInfo::relocate(void *Function, MachineRelocation *MR, + unsigned NumRelocs, unsigned char* GOTBase) { + for (unsigned i = 0; i != NumRelocs; ++i, ++MR) { + void *RelocPos = (char*)Function + MR->getMachineCodeOffset(); + intptr_t ResultPtr = resolveRelocDestAddr(MR); + switch ((ARM::RelocationType)MR->getRelocationType()) { + case ARM::reloc_arm_cp_entry: + case ARM::reloc_arm_vfp_cp_entry: + case ARM::reloc_arm_relative: { + // It is necessary to calculate the correct PC relative value. We + // subtract the base addr from the target addr to form a byte offset. + ResultPtr = ResultPtr - (intptr_t)RelocPos - 8; + // If the result is positive, set bit U(23) to 1. + if (ResultPtr >= 0) + *((intptr_t*)RelocPos) |= 1 << ARMII::U_BitShift; + else { + // Otherwise, obtain the absolute value and set bit U(23) to 0. + *((intptr_t*)RelocPos) &= ~(1 << ARMII::U_BitShift); + ResultPtr = - ResultPtr; + } + // Set the immed value calculated. + // VFP immediate offset is multiplied by 4. + if (MR->getRelocationType() == ARM::reloc_arm_vfp_cp_entry) + ResultPtr = ResultPtr >> 2; + *((intptr_t*)RelocPos) |= ResultPtr; + // Set register Rn to PC. + *((intptr_t*)RelocPos) |= + ARMRegisterInfo::getRegisterNumbering(ARM::PC) << ARMII::RegRnShift; + break; + } + case ARM::reloc_arm_pic_jt: + case ARM::reloc_arm_machine_cp_entry: + case ARM::reloc_arm_absolute: { + // These addresses have already been resolved. + *((intptr_t*)RelocPos) |= (intptr_t)ResultPtr; + break; + } + case ARM::reloc_arm_branch: { + // It is necessary to calculate the correct value of signed_immed_24 + // field. We subtract the base addr from the target addr to form a + // byte offset, which must be inside the range -33554432 and +33554428. + // Then, we set the signed_immed_24 field of the instruction to bits + // [25:2] of the byte offset. More details ARM-ARM p. A4-11. + ResultPtr = ResultPtr - (intptr_t)RelocPos - 8; + ResultPtr = (ResultPtr & 0x03FFFFFC) >> 2; + assert(ResultPtr >= -33554432 && ResultPtr <= 33554428); + *((intptr_t*)RelocPos) |= ResultPtr; + break; + } + case ARM::reloc_arm_jt_base: { + // JT base - (instruction addr + 8) + ResultPtr = ResultPtr - (intptr_t)RelocPos - 8; + *((intptr_t*)RelocPos) |= ResultPtr; + break; + } + } + } +} diff --git a/lib/Target/ARM/ARMJITInfo.h b/lib/Target/ARM/ARMJITInfo.h new file mode 100644 index 000000000000..7dfeed8b7bf3 --- /dev/null +++ b/lib/Target/ARM/ARMJITInfo.h @@ -0,0 +1,178 @@ +//===- ARMJITInfo.h - ARM implementation of the JIT interface --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the ARMJITInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef ARMJITINFO_H +#define ARMJITINFO_H + +#include "ARMMachineFunctionInfo.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/Target/TargetJITInfo.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" + +namespace llvm { + class ARMTargetMachine; + + class ARMJITInfo : public TargetJITInfo { + // ConstPoolId2AddrMap - A map from constant pool ids to the corresponding + // CONSTPOOL_ENTRY addresses. + SmallVector ConstPoolId2AddrMap; + + // JumpTableId2AddrMap - A map from inline jumptable ids to the + // corresponding inline jump table bases. + SmallVector JumpTableId2AddrMap; + + // PCLabelMap - A map from PC labels to addresses. + DenseMap PCLabelMap; + + // Sym2IndirectSymMap - A map from symbol (GlobalValue and ExternalSymbol) + // addresses to their indirect symbol addresses. + DenseMap Sym2IndirectSymMap; + + // IsPIC - True if the relocation model is PIC. This is used to determine + // how to codegen function stubs. + bool IsPIC; + + public: + explicit ARMJITInfo() : IsPIC(false) { useGOT = false; } + + /// replaceMachineCodeForFunction - Make it so that calling the function + /// whose machine code is at OLD turns into a call to NEW, perhaps by + /// overwriting OLD with a branch to NEW. This is used for self-modifying + /// code. + /// + virtual void replaceMachineCodeForFunction(void *Old, void *New); + + /// emitGlobalValueIndirectSym - Use the specified JITCodeEmitter object + /// to emit an indirect symbol which contains the address of the specified + /// ptr. + virtual void *emitGlobalValueIndirectSym(const GlobalValue* GV, void *ptr, + JITCodeEmitter &JCE); + + /// emitFunctionStub - Use the specified JITCodeEmitter object to emit a + /// small native function that simply calls the function at the specified + /// address. + virtual void *emitFunctionStub(const Function* F, void *Fn, + JITCodeEmitter &JCE); + + /// getLazyResolverFunction - Expose the lazy resolver to the JIT. + virtual LazyResolverFn getLazyResolverFunction(JITCompilerFn); + + /// relocate - Before the JIT can run a block of code that has been emitted, + /// it must rewrite the code to contain the actual addresses of any + /// referenced global symbols. + virtual void relocate(void *Function, MachineRelocation *MR, + unsigned NumRelocs, unsigned char* GOTBase); + + /// hasCustomConstantPool - Allows a target to specify that constant + /// pool address resolution is handled by the target. + virtual bool hasCustomConstantPool() const { return true; } + + /// hasCustomJumpTables - Allows a target to specify that jumptables + /// are emitted by the target. + virtual bool hasCustomJumpTables() const { return true; } + + /// allocateSeparateGVMemory - If true, globals should be placed in + /// separately allocated heap memory rather than in the same + /// code memory allocated by JITCodeEmitter. + virtual bool allocateSeparateGVMemory() const { +#ifdef __APPLE__ + return true; +#else + return false; +#endif + } + + /// Initialize - Initialize internal stage for the function being JITted. + /// Resize constant pool ids to CONSTPOOL_ENTRY addresses map; resize + /// jump table ids to jump table bases map; remember if codegen relocation + /// model is PIC. + void Initialize(const MachineFunction &MF, bool isPIC) { + const ARMFunctionInfo *AFI = MF.getInfo(); + ConstPoolId2AddrMap.resize(AFI->getNumConstPoolEntries()); + JumpTableId2AddrMap.resize(AFI->getNumJumpTables()); + IsPIC = isPIC; + } + + /// getConstantPoolEntryAddr - The ARM target puts all constant + /// pool entries into constant islands. This returns the address of the + /// constant pool entry of the specified index. + intptr_t getConstantPoolEntryAddr(unsigned CPI) const { + assert(CPI < ConstPoolId2AddrMap.size()); + return ConstPoolId2AddrMap[CPI]; + } + + /// addConstantPoolEntryAddr - Map a Constant Pool Index to the address + /// where its associated value is stored. When relocations are processed, + /// this value will be used to resolve references to the constant. + void addConstantPoolEntryAddr(unsigned CPI, intptr_t Addr) { + assert(CPI < ConstPoolId2AddrMap.size()); + ConstPoolId2AddrMap[CPI] = Addr; + } + + /// getJumpTableBaseAddr - The ARM target inline all jump tables within + /// text section of the function. This returns the address of the base of + /// the jump table of the specified index. + intptr_t getJumpTableBaseAddr(unsigned JTI) const { + assert(JTI < JumpTableId2AddrMap.size()); + return JumpTableId2AddrMap[JTI]; + } + + /// addJumpTableBaseAddr - Map a jump table index to the address where + /// the corresponding inline jump table is emitted. When relocations are + /// processed, this value will be used to resolve references to the + /// jump table. + void addJumpTableBaseAddr(unsigned JTI, intptr_t Addr) { + assert(JTI < JumpTableId2AddrMap.size()); + JumpTableId2AddrMap[JTI] = Addr; + } + + /// getPCLabelAddr - Retrieve the address of the PC label of the specified id. + intptr_t getPCLabelAddr(unsigned Id) const { + DenseMap::const_iterator I = PCLabelMap.find(Id); + assert(I != PCLabelMap.end()); + return I->second; + } + + /// addPCLabelAddr - Remember the address of the specified PC label. + void addPCLabelAddr(unsigned Id, intptr_t Addr) { + PCLabelMap.insert(std::make_pair(Id, Addr)); + } + + /// getIndirectSymAddr - Retrieve the address of the indirect symbol of the + /// specified symbol located at address. Returns 0 if the indirect symbol + /// has not been emitted. + intptr_t getIndirectSymAddr(void *Addr) const { + DenseMap::const_iterator I= Sym2IndirectSymMap.find(Addr); + if (I != Sym2IndirectSymMap.end()) + return I->second; + return 0; + } + + /// addIndirectSymAddr - Add a mapping from address of an emitted symbol to + /// its indirect symbol address. + void addIndirectSymAddr(void *SymAddr, intptr_t IndSymAddr) { + Sym2IndirectSymMap.insert(std::make_pair(SymAddr, IndSymAddr)); + } + + private: + /// resolveRelocDestAddr - Resolve the resulting address of the relocation + /// if it's not already solved. Constantpool entries must be resolved by + /// ARM target. + intptr_t resolveRelocDestAddr(MachineRelocation *MR) const; + }; +} + +#endif diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp new file mode 100644 index 000000000000..047552f627db --- /dev/null +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -0,0 +1,778 @@ +//===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a pass that performs load / store related peephole +// optimizations. This pass should be run after register allocation. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "arm-ldst-opt" +#include "ARM.h" +#include "ARMAddressingModes.h" +#include "ARMMachineFunctionInfo.h" +#include "ARMRegisterInfo.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +using namespace llvm; + +STATISTIC(NumLDMGened , "Number of ldm instructions generated"); +STATISTIC(NumSTMGened , "Number of stm instructions generated"); +STATISTIC(NumFLDMGened, "Number of fldm instructions generated"); +STATISTIC(NumFSTMGened, "Number of fstm instructions generated"); + +namespace { + struct VISIBILITY_HIDDEN ARMLoadStoreOpt : public MachineFunctionPass { + static char ID; + ARMLoadStoreOpt() : MachineFunctionPass(&ID) {} + + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + ARMFunctionInfo *AFI; + RegScavenger *RS; + + virtual bool runOnMachineFunction(MachineFunction &Fn); + + virtual const char *getPassName() const { + return "ARM load / store optimization pass"; + } + + private: + struct MemOpQueueEntry { + int Offset; + unsigned Position; + MachineBasicBlock::iterator MBBI; + bool Merged; + MemOpQueueEntry(int o, int p, MachineBasicBlock::iterator i) + : Offset(o), Position(p), MBBI(i), Merged(false) {}; + }; + typedef SmallVector MemOpQueue; + typedef MemOpQueue::iterator MemOpQueueIter; + + SmallVector + MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base, + int Opcode, unsigned Size, + ARMCC::CondCodes Pred, unsigned PredReg, + unsigned Scratch, MemOpQueue &MemOps); + + void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps); + bool LoadStoreMultipleOpti(MachineBasicBlock &MBB); + bool MergeReturnIntoLDM(MachineBasicBlock &MBB); + }; + char ARMLoadStoreOpt::ID = 0; +} + +/// createARMLoadStoreOptimizationPass - returns an instance of the load / store +/// optimization pass. +FunctionPass *llvm::createARMLoadStoreOptimizationPass() { + return new ARMLoadStoreOpt(); +} + +static int getLoadStoreMultipleOpcode(int Opcode) { + switch (Opcode) { + case ARM::LDR: + NumLDMGened++; + return ARM::LDM; + case ARM::STR: + NumSTMGened++; + return ARM::STM; + case ARM::FLDS: + NumFLDMGened++; + return ARM::FLDMS; + case ARM::FSTS: + NumFSTMGened++; + return ARM::FSTMS; + case ARM::FLDD: + NumFLDMGened++; + return ARM::FLDMD; + case ARM::FSTD: + NumFSTMGened++; + return ARM::FSTMD; + default: abort(); + } + return 0; +} + +/// mergeOps - Create and insert a LDM or STM with Base as base register and +/// registers in Regs as the register operands that would be loaded / stored. +/// It returns true if the transformation is done. +static bool mergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + int Offset, unsigned Base, bool BaseKill, int Opcode, + ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, + SmallVector, 8> &Regs, + const TargetInstrInfo *TII) { + // FIXME would it be better to take a DL from one of the loads arbitrarily? + DebugLoc dl = DebugLoc::getUnknownLoc(); + // Only a single register to load / store. Don't bother. + unsigned NumRegs = Regs.size(); + if (NumRegs <= 1) + return false; + + ARM_AM::AMSubMode Mode = ARM_AM::ia; + bool isAM4 = Opcode == ARM::LDR || Opcode == ARM::STR; + if (isAM4 && Offset == 4) + Mode = ARM_AM::ib; + else if (isAM4 && Offset == -4 * (int)NumRegs + 4) + Mode = ARM_AM::da; + else if (isAM4 && Offset == -4 * (int)NumRegs) + Mode = ARM_AM::db; + else if (Offset != 0) { + // If starting offset isn't zero, insert a MI to materialize a new base. + // But only do so if it is cost effective, i.e. merging more than two + // loads / stores. + if (NumRegs <= 2) + return false; + + unsigned NewBase; + if (Opcode == ARM::LDR) + // If it is a load, then just use one of the destination register to + // use as the new base. + NewBase = Regs[NumRegs-1].first; + else { + // Use the scratch register to use as a new base. + NewBase = Scratch; + if (NewBase == 0) + return false; + } + int BaseOpc = ARM::ADDri; + if (Offset < 0) { + BaseOpc = ARM::SUBri; + Offset = - Offset; + } + int ImmedOffset = ARM_AM::getSOImmVal(Offset); + if (ImmedOffset == -1) + return false; // Probably not worth it then. + + BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase) + .addReg(Base, getKillRegState(BaseKill)).addImm(ImmedOffset) + .addImm(Pred).addReg(PredReg).addReg(0); + Base = NewBase; + BaseKill = true; // New base is always killed right its use. + } + + bool isDPR = Opcode == ARM::FLDD || Opcode == ARM::FSTD; + bool isDef = Opcode == ARM::LDR || Opcode == ARM::FLDS || Opcode == ARM::FLDD; + Opcode = getLoadStoreMultipleOpcode(Opcode); + MachineInstrBuilder MIB = (isAM4) + ? BuildMI(MBB, MBBI, dl, TII->get(Opcode)) + .addReg(Base, getKillRegState(BaseKill)) + .addImm(ARM_AM::getAM4ModeImm(Mode)).addImm(Pred).addReg(PredReg) + : BuildMI(MBB, MBBI, dl, TII->get(Opcode)) + .addReg(Base, getKillRegState(BaseKill)) + .addImm(ARM_AM::getAM5Opc(Mode, false, isDPR ? NumRegs<<1 : NumRegs)) + .addImm(Pred).addReg(PredReg); + for (unsigned i = 0; i != NumRegs; ++i) + MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef) + | getKillRegState(Regs[i].second)); + + return true; +} + +/// MergeLDR_STR - Merge a number of load / store instructions into one or more +/// load / store multiple instructions. +SmallVector +ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, + unsigned Base, int Opcode, unsigned Size, + ARMCC::CondCodes Pred, unsigned PredReg, + unsigned Scratch, MemOpQueue &MemOps) { + SmallVector Merges; + bool isAM4 = Opcode == ARM::LDR || Opcode == ARM::STR; + int Offset = MemOps[SIndex].Offset; + int SOffset = Offset; + unsigned Pos = MemOps[SIndex].Position; + MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI; + unsigned PReg = MemOps[SIndex].MBBI->getOperand(0).getReg(); + unsigned PRegNum = ARMRegisterInfo::getRegisterNumbering(PReg); + bool isKill = MemOps[SIndex].MBBI->getOperand(0).isKill(); + + SmallVector, 8> Regs; + Regs.push_back(std::make_pair(PReg, isKill)); + for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) { + int NewOffset = MemOps[i].Offset; + unsigned Reg = MemOps[i].MBBI->getOperand(0).getReg(); + unsigned RegNum = ARMRegisterInfo::getRegisterNumbering(Reg); + isKill = MemOps[i].MBBI->getOperand(0).isKill(); + // AM4 - register numbers in ascending order. + // AM5 - consecutive register numbers in ascending order. + if (NewOffset == Offset + (int)Size && + ((isAM4 && RegNum > PRegNum) || RegNum == PRegNum+1)) { + Offset += Size; + Regs.push_back(std::make_pair(Reg, isKill)); + PRegNum = RegNum; + } else { + // Can't merge this in. Try merge the earlier ones first. + if (mergeOps(MBB, ++Loc, SOffset, Base, false, Opcode, Pred, PredReg, + Scratch, Regs, TII)) { + Merges.push_back(prior(Loc)); + for (unsigned j = SIndex; j < i; ++j) { + MBB.erase(MemOps[j].MBBI); + MemOps[j].Merged = true; + } + } + SmallVector Merges2 = + MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,MemOps); + Merges.append(Merges2.begin(), Merges2.end()); + return Merges; + } + + if (MemOps[i].Position > Pos) { + Pos = MemOps[i].Position; + Loc = MemOps[i].MBBI; + } + } + + bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1; + if (mergeOps(MBB, ++Loc, SOffset, Base, BaseKill, Opcode, Pred, PredReg, + Scratch, Regs, TII)) { + Merges.push_back(prior(Loc)); + for (unsigned i = SIndex, e = MemOps.size(); i != e; ++i) { + MBB.erase(MemOps[i].MBBI); + MemOps[i].Merged = true; + } + } + + return Merges; +} + +/// getInstrPredicate - If instruction is predicated, returns its predicate +/// condition, otherwise returns AL. It also returns the condition code +/// register by reference. +static ARMCC::CondCodes getInstrPredicate(MachineInstr *MI, unsigned &PredReg) { + int PIdx = MI->findFirstPredOperandIdx(); + if (PIdx == -1) { + PredReg = 0; + return ARMCC::AL; + } + + PredReg = MI->getOperand(PIdx+1).getReg(); + return (ARMCC::CondCodes)MI->getOperand(PIdx).getImm(); +} + +static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base, + unsigned Bytes, ARMCC::CondCodes Pred, + unsigned PredReg) { + unsigned MyPredReg = 0; + return (MI && MI->getOpcode() == ARM::SUBri && + MI->getOperand(0).getReg() == Base && + MI->getOperand(1).getReg() == Base && + ARM_AM::getAM2Offset(MI->getOperand(2).getImm()) == Bytes && + getInstrPredicate(MI, MyPredReg) == Pred && + MyPredReg == PredReg); +} + +static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base, + unsigned Bytes, ARMCC::CondCodes Pred, + unsigned PredReg) { + unsigned MyPredReg = 0; + return (MI && MI->getOpcode() == ARM::ADDri && + MI->getOperand(0).getReg() == Base && + MI->getOperand(1).getReg() == Base && + ARM_AM::getAM2Offset(MI->getOperand(2).getImm()) == Bytes && + getInstrPredicate(MI, MyPredReg) == Pred && + MyPredReg == PredReg); +} + +static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) { + switch (MI->getOpcode()) { + default: return 0; + case ARM::LDR: + case ARM::STR: + case ARM::FLDS: + case ARM::FSTS: + return 4; + case ARM::FLDD: + case ARM::FSTD: + return 8; + case ARM::LDM: + case ARM::STM: + return (MI->getNumOperands() - 4) * 4; + case ARM::FLDMS: + case ARM::FSTMS: + case ARM::FLDMD: + case ARM::FSTMD: + return ARM_AM::getAM5Offset(MI->getOperand(1).getImm()) * 4; + } +} + +/// mergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base +/// register into the LDM/STM/FLDM{D|S}/FSTM{D|S} op when possible: +/// +/// stmia rn, +/// rn := rn + 4 * 3; +/// => +/// stmia rn!, +/// +/// rn := rn - 4 * 3; +/// ldmia rn, +/// => +/// ldmdb rn!, +static bool mergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + bool &Advance, + MachineBasicBlock::iterator &I) { + MachineInstr *MI = MBBI; + unsigned Base = MI->getOperand(0).getReg(); + unsigned Bytes = getLSMultipleTransferSize(MI); + unsigned PredReg = 0; + ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); + int Opcode = MI->getOpcode(); + bool isAM4 = Opcode == ARM::LDM || Opcode == ARM::STM; + + if (isAM4) { + if (ARM_AM::getAM4WBFlag(MI->getOperand(1).getImm())) + return false; + + // Can't use the updating AM4 sub-mode if the base register is also a dest + // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined. + for (unsigned i = 3, e = MI->getNumOperands(); i != e; ++i) { + if (MI->getOperand(i).getReg() == Base) + return false; + } + + ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm()); + if (MBBI != MBB.begin()) { + MachineBasicBlock::iterator PrevMBBI = prior(MBBI); + if (Mode == ARM_AM::ia && + isMatchingDecrement(PrevMBBI, Base, Bytes, Pred, PredReg)) { + MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::db, true)); + MBB.erase(PrevMBBI); + return true; + } else if (Mode == ARM_AM::ib && + isMatchingDecrement(PrevMBBI, Base, Bytes, Pred, PredReg)) { + MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::da, true)); + MBB.erase(PrevMBBI); + return true; + } + } + + if (MBBI != MBB.end()) { + MachineBasicBlock::iterator NextMBBI = next(MBBI); + if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) && + isMatchingIncrement(NextMBBI, Base, Bytes, Pred, PredReg)) { + MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true)); + if (NextMBBI == I) { + Advance = true; + ++I; + } + MBB.erase(NextMBBI); + return true; + } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) && + isMatchingDecrement(NextMBBI, Base, Bytes, Pred, PredReg)) { + MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true)); + if (NextMBBI == I) { + Advance = true; + ++I; + } + MBB.erase(NextMBBI); + return true; + } + } + } else { + // FLDM{D|S}, FSTM{D|S} addressing mode 5 ops. + if (ARM_AM::getAM5WBFlag(MI->getOperand(1).getImm())) + return false; + + ARM_AM::AMSubMode Mode = ARM_AM::getAM5SubMode(MI->getOperand(1).getImm()); + unsigned Offset = ARM_AM::getAM5Offset(MI->getOperand(1).getImm()); + if (MBBI != MBB.begin()) { + MachineBasicBlock::iterator PrevMBBI = prior(MBBI); + if (Mode == ARM_AM::ia && + isMatchingDecrement(PrevMBBI, Base, Bytes, Pred, PredReg)) { + MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::db, true, Offset)); + MBB.erase(PrevMBBI); + return true; + } + } + + if (MBBI != MBB.end()) { + MachineBasicBlock::iterator NextMBBI = next(MBBI); + if (Mode == ARM_AM::ia && + isMatchingIncrement(NextMBBI, Base, Bytes, Pred, PredReg)) { + MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::ia, true, Offset)); + if (NextMBBI == I) { + Advance = true; + ++I; + } + MBB.erase(NextMBBI); + } + return true; + } + } + + return false; +} + +static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc) { + switch (Opc) { + case ARM::LDR: return ARM::LDR_PRE; + case ARM::STR: return ARM::STR_PRE; + case ARM::FLDS: return ARM::FLDMS; + case ARM::FLDD: return ARM::FLDMD; + case ARM::FSTS: return ARM::FSTMS; + case ARM::FSTD: return ARM::FSTMD; + default: abort(); + } + return 0; +} + +static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc) { + switch (Opc) { + case ARM::LDR: return ARM::LDR_POST; + case ARM::STR: return ARM::STR_POST; + case ARM::FLDS: return ARM::FLDMS; + case ARM::FLDD: return ARM::FLDMD; + case ARM::FSTS: return ARM::FSTMS; + case ARM::FSTD: return ARM::FSTMD; + default: abort(); + } + return 0; +} + +/// mergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base +/// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible: +static bool mergeBaseUpdateLoadStore(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const TargetInstrInfo *TII, + bool &Advance, + MachineBasicBlock::iterator &I) { + MachineInstr *MI = MBBI; + unsigned Base = MI->getOperand(1).getReg(); + bool BaseKill = MI->getOperand(1).isKill(); + unsigned Bytes = getLSMultipleTransferSize(MI); + int Opcode = MI->getOpcode(); + DebugLoc dl = MI->getDebugLoc(); + bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR; + if ((isAM2 && ARM_AM::getAM2Offset(MI->getOperand(3).getImm()) != 0) || + (!isAM2 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)) + return false; + + bool isLd = Opcode == ARM::LDR || Opcode == ARM::FLDS || Opcode == ARM::FLDD; + // Can't do the merge if the destination register is the same as the would-be + // writeback register. + if (isLd && MI->getOperand(0).getReg() == Base) + return false; + + unsigned PredReg = 0; + ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); + bool DoMerge = false; + ARM_AM::AddrOpc AddSub = ARM_AM::add; + unsigned NewOpc = 0; + if (MBBI != MBB.begin()) { + MachineBasicBlock::iterator PrevMBBI = prior(MBBI); + if (isMatchingDecrement(PrevMBBI, Base, Bytes, Pred, PredReg)) { + DoMerge = true; + AddSub = ARM_AM::sub; + NewOpc = getPreIndexedLoadStoreOpcode(Opcode); + } else if (isAM2 && isMatchingIncrement(PrevMBBI, Base, Bytes, + Pred, PredReg)) { + DoMerge = true; + NewOpc = getPreIndexedLoadStoreOpcode(Opcode); + } + if (DoMerge) + MBB.erase(PrevMBBI); + } + + if (!DoMerge && MBBI != MBB.end()) { + MachineBasicBlock::iterator NextMBBI = next(MBBI); + if (isAM2 && isMatchingDecrement(NextMBBI, Base, Bytes, Pred, PredReg)) { + DoMerge = true; + AddSub = ARM_AM::sub; + NewOpc = getPostIndexedLoadStoreOpcode(Opcode); + } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Pred, PredReg)) { + DoMerge = true; + NewOpc = getPostIndexedLoadStoreOpcode(Opcode); + } + if (DoMerge) { + if (NextMBBI == I) { + Advance = true; + ++I; + } + MBB.erase(NextMBBI); + } + } + + if (!DoMerge) + return false; + + bool isDPR = NewOpc == ARM::FLDMD || NewOpc == ARM::FSTMD; + unsigned Offset = isAM2 ? ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift) + : ARM_AM::getAM5Opc((AddSub == ARM_AM::sub) ? ARM_AM::db : ARM_AM::ia, + true, isDPR ? 2 : 1); + if (isLd) { + if (isAM2) + // LDR_PRE, LDR_POST; + BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) + .addReg(Base, RegState::Define) + .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); + else + // FLDMS, FLDMD + BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) + .addReg(Base, getKillRegState(BaseKill)) + .addImm(Offset).addImm(Pred).addReg(PredReg) + .addReg(MI->getOperand(0).getReg(), RegState::Define); + } else { + MachineOperand &MO = MI->getOperand(0); + if (isAM2) + // STR_PRE, STR_POST; + BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) + .addReg(MO.getReg(), getKillRegState(BaseKill)) + .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); + else + // FSTMS, FSTMD + BuildMI(MBB, MBBI, dl, TII->get(NewOpc)).addReg(Base).addImm(Offset) + .addImm(Pred).addReg(PredReg) + .addReg(MO.getReg(), getKillRegState(MO.isKill())); + } + MBB.erase(MBBI); + + return true; +} + +/// isMemoryOp - Returns true if instruction is a memory operations (that this +/// pass is capable of operating on). +static bool isMemoryOp(MachineInstr *MI) { + int Opcode = MI->getOpcode(); + switch (Opcode) { + default: break; + case ARM::LDR: + case ARM::STR: + return MI->getOperand(1).isReg() && MI->getOperand(2).getReg() == 0; + case ARM::FLDS: + case ARM::FSTS: + return MI->getOperand(1).isReg(); + case ARM::FLDD: + case ARM::FSTD: + return MI->getOperand(1).isReg(); + } + return false; +} + +/// AdvanceRS - Advance register scavenger to just before the earliest memory +/// op that is being merged. +void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) { + MachineBasicBlock::iterator Loc = MemOps[0].MBBI; + unsigned Position = MemOps[0].Position; + for (unsigned i = 1, e = MemOps.size(); i != e; ++i) { + if (MemOps[i].Position < Position) { + Position = MemOps[i].Position; + Loc = MemOps[i].MBBI; + } + } + + if (Loc != MBB.begin()) + RS->forward(prior(Loc)); +} + +/// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR +/// ops of the same base and incrementing offset into LDM / STM ops. +bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { + unsigned NumMerges = 0; + unsigned NumMemOps = 0; + MemOpQueue MemOps; + unsigned CurrBase = 0; + int CurrOpc = -1; + unsigned CurrSize = 0; + ARMCC::CondCodes CurrPred = ARMCC::AL; + unsigned CurrPredReg = 0; + unsigned Position = 0; + + RS->enterBasicBlock(&MBB); + MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); + while (MBBI != E) { + bool Advance = false; + bool TryMerge = false; + bool Clobber = false; + + bool isMemOp = isMemoryOp(MBBI); + if (isMemOp) { + int Opcode = MBBI->getOpcode(); + bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR; + unsigned Size = getLSMultipleTransferSize(MBBI); + unsigned Base = MBBI->getOperand(1).getReg(); + unsigned PredReg = 0; + ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg); + unsigned NumOperands = MBBI->getDesc().getNumOperands(); + unsigned OffField = MBBI->getOperand(NumOperands-3).getImm(); + int Offset = isAM2 + ? ARM_AM::getAM2Offset(OffField) : ARM_AM::getAM5Offset(OffField) * 4; + if (isAM2) { + if (ARM_AM::getAM2Op(OffField) == ARM_AM::sub) + Offset = -Offset; + } else { + if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub) + Offset = -Offset; + } + // Watch out for: + // r4 := ldr [r5] + // r5 := ldr [r5, #4] + // r6 := ldr [r5, #8] + // + // The second ldr has effectively broken the chain even though it + // looks like the later ldr(s) use the same base register. Try to + // merge the ldr's so far, including this one. But don't try to + // combine the following ldr(s). + Clobber = (Opcode == ARM::LDR && Base == MBBI->getOperand(0).getReg()); + if (CurrBase == 0 && !Clobber) { + // Start of a new chain. + CurrBase = Base; + CurrOpc = Opcode; + CurrSize = Size; + CurrPred = Pred; + CurrPredReg = PredReg; + MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI)); + NumMemOps++; + Advance = true; + } else { + if (Clobber) { + TryMerge = true; + Advance = true; + } + + if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) { + // No need to match PredReg. + // Continue adding to the queue. + if (Offset > MemOps.back().Offset) { + MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI)); + NumMemOps++; + Advance = true; + } else { + for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); + I != E; ++I) { + if (Offset < I->Offset) { + MemOps.insert(I, MemOpQueueEntry(Offset, Position, MBBI)); + NumMemOps++; + Advance = true; + break; + } else if (Offset == I->Offset) { + // Collision! This can't be merged! + break; + } + } + } + } + } + } + + if (Advance) { + ++Position; + ++MBBI; + } else + TryMerge = true; + + if (TryMerge) { + if (NumMemOps > 1) { + // Try to find a free register to use as a new base in case it's needed. + // First advance to the instruction just before the start of the chain. + AdvanceRS(MBB, MemOps); + // Find a scratch register. Make sure it's a call clobbered register or + // a spilled callee-saved register. + unsigned Scratch = RS->FindUnusedReg(&ARM::GPRRegClass, true); + if (!Scratch) + Scratch = RS->FindUnusedReg(&ARM::GPRRegClass, + AFI->getSpilledCSRegisters()); + // Process the load / store instructions. + RS->forward(prior(MBBI)); + + // Merge ops. + SmallVector MBBII = + MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize, + CurrPred, CurrPredReg, Scratch, MemOps); + + // Try folding preceeding/trailing base inc/dec into the generated + // LDM/STM ops. + for (unsigned i = 0, e = MBBII.size(); i < e; ++i) + if (mergeBaseUpdateLSMultiple(MBB, MBBII[i], Advance, MBBI)) + NumMerges++; + NumMerges += MBBII.size(); + + // Try folding preceeding/trailing base inc/dec into those load/store + // that were not merged to form LDM/STM ops. + for (unsigned i = 0; i != NumMemOps; ++i) + if (!MemOps[i].Merged) + if (mergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI)) + NumMerges++; + + // RS may be pointing to an instruction that's deleted. + RS->skipTo(prior(MBBI)); + } + + CurrBase = 0; + CurrOpc = -1; + CurrSize = 0; + CurrPred = ARMCC::AL; + CurrPredReg = 0; + if (NumMemOps) { + MemOps.clear(); + NumMemOps = 0; + } + + // If iterator hasn't been advanced and this is not a memory op, skip it. + // It can't start a new chain anyway. + if (!Advance && !isMemOp && MBBI != E) { + ++Position; + ++MBBI; + } + } + } + return NumMerges > 0; +} + +/// MergeReturnIntoLDM - If this is a exit BB, try merging the return op +/// (bx lr) into the preceeding stack restore so it directly restore the value +/// of LR into pc. +/// ldmfd sp!, {r7, lr} +/// bx lr +/// => +/// ldmfd sp!, {r7, pc} +bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { + if (MBB.empty()) return false; + + MachineBasicBlock::iterator MBBI = prior(MBB.end()); + if (MBBI->getOpcode() == ARM::BX_RET && MBBI != MBB.begin()) { + MachineInstr *PrevMI = prior(MBBI); + if (PrevMI->getOpcode() == ARM::LDM) { + MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1); + if (MO.getReg() == ARM::LR) { + PrevMI->setDesc(TII->get(ARM::LDM_RET)); + MO.setReg(ARM::PC); + MBB.erase(MBBI); + return true; + } + } + } + return false; +} + +bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { + const TargetMachine &TM = Fn.getTarget(); + AFI = Fn.getInfo(); + TII = TM.getInstrInfo(); + TRI = TM.getRegisterInfo(); + RS = new RegScavenger(); + + bool Modified = false; + for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; + ++MFI) { + MachineBasicBlock &MBB = *MFI; + Modified |= LoadStoreMultipleOpti(MBB); + Modified |= MergeReturnIntoLDM(MBB); + } + + delete RS; + return Modified; +} diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h new file mode 100644 index 000000000000..6662be12a578 --- /dev/null +++ b/lib/Target/ARM/ARMMachineFunctionInfo.h @@ -0,0 +1,238 @@ +//====- ARMMachineFuctionInfo.h - ARM machine function info -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares ARM-specific per-machine-function information. +// +//===----------------------------------------------------------------------===// + +#ifndef ARMMACHINEFUNCTIONINFO_H +#define ARMMACHINEFUNCTIONINFO_H + +#include "ARMSubtarget.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/ADT/BitVector.h" + +namespace llvm { + +/// ARMFunctionInfo - This class is derived from MachineFunction private +/// ARM target-specific information for each MachineFunction. +class ARMFunctionInfo : public MachineFunctionInfo { + + /// isThumb - True if this function is compiled under Thumb mode. + /// Used to initialized Align, so must precede it. + bool isThumb; + + /// Align - required alignment. ARM functions and Thumb functions with + /// constant pools require 4-byte alignment; other Thumb functions + /// require only 2-byte alignment. + unsigned Align; + + /// VarArgsRegSaveSize - Size of the register save area for vararg functions. + /// + unsigned VarArgsRegSaveSize; + + /// HasStackFrame - True if this function has a stack frame. Set by + /// processFunctionBeforeCalleeSavedScan(). + bool HasStackFrame; + + /// LRSpilledForFarJump - True if the LR register has been for spilled to + /// enable far jump. + bool LRSpilledForFarJump; + + /// R3IsLiveIn - True if R3 is live in to this function. + /// FIXME: Remove when register scavenger for Thumb is done. + bool R3IsLiveIn; + + /// FramePtrSpillOffset - If HasStackFrame, this records the frame pointer + /// spill stack offset. + unsigned FramePtrSpillOffset; + + /// GPRCS1Offset, GPRCS2Offset, DPRCSOffset - Starting offset of callee saved + /// register spills areas. For Mac OS X: + /// + /// GPR callee-saved (1) : r4, r5, r6, r7, lr + /// -------------------------------------------- + /// GPR callee-saved (2) : r8, r10, r11 + /// -------------------------------------------- + /// DPR callee-saved : d8 - d15 + unsigned GPRCS1Offset; + unsigned GPRCS2Offset; + unsigned DPRCSOffset; + + /// GPRCS1Size, GPRCS2Size, DPRCSSize - Sizes of callee saved register spills + /// areas. + unsigned GPRCS1Size; + unsigned GPRCS2Size; + unsigned DPRCSSize; + + /// GPRCS1Frames, GPRCS2Frames, DPRCSFrames - Keeps track of frame indices + /// which belong to these spill areas. + BitVector GPRCS1Frames; + BitVector GPRCS2Frames; + BitVector DPRCSFrames; + + /// SpilledCSRegs - A BitVector mask of all spilled callee-saved registers. + /// + BitVector SpilledCSRegs; + + /// JumpTableUId - Unique id for jumptables. + /// + unsigned JumpTableUId; + + unsigned ConstPoolEntryUId; + +public: + ARMFunctionInfo() : + isThumb(false), + Align(2U), + VarArgsRegSaveSize(0), HasStackFrame(false), + LRSpilledForFarJump(false), R3IsLiveIn(false), + FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0), + GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0), + GPRCS1Frames(0), GPRCS2Frames(0), DPRCSFrames(0), + JumpTableUId(0), ConstPoolEntryUId(0) {} + + ARMFunctionInfo(MachineFunction &MF) : + isThumb(MF.getTarget().getSubtarget().isThumb()), + Align(isThumb ? 1U : 2U), + VarArgsRegSaveSize(0), HasStackFrame(false), + LRSpilledForFarJump(false), R3IsLiveIn(false), + FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0), + GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0), + GPRCS1Frames(32), GPRCS2Frames(32), DPRCSFrames(32), + SpilledCSRegs(MF.getTarget().getRegisterInfo()->getNumRegs()), + JumpTableUId(0), ConstPoolEntryUId(0) {} + + bool isThumbFunction() const { return isThumb; } + + unsigned getAlign() const { return Align; } + void setAlign(unsigned a) { Align = a; } + + unsigned getVarArgsRegSaveSize() const { return VarArgsRegSaveSize; } + void setVarArgsRegSaveSize(unsigned s) { VarArgsRegSaveSize = s; } + + bool hasStackFrame() const { return HasStackFrame; } + void setHasStackFrame(bool s) { HasStackFrame = s; } + + bool isLRSpilledForFarJump() const { return LRSpilledForFarJump; } + void setLRIsSpilledForFarJump(bool s) { LRSpilledForFarJump = s; } + + // FIXME: Remove when register scavenger for Thumb is done. + bool isR3LiveIn() const { return R3IsLiveIn; } + void setR3IsLiveIn(bool l) { R3IsLiveIn = l; } + + unsigned getFramePtrSpillOffset() const { return FramePtrSpillOffset; } + void setFramePtrSpillOffset(unsigned o) { FramePtrSpillOffset = o; } + + unsigned getGPRCalleeSavedArea1Offset() const { return GPRCS1Offset; } + unsigned getGPRCalleeSavedArea2Offset() const { return GPRCS2Offset; } + unsigned getDPRCalleeSavedAreaOffset() const { return DPRCSOffset; } + + void setGPRCalleeSavedArea1Offset(unsigned o) { GPRCS1Offset = o; } + void setGPRCalleeSavedArea2Offset(unsigned o) { GPRCS2Offset = o; } + void setDPRCalleeSavedAreaOffset(unsigned o) { DPRCSOffset = o; } + + unsigned getGPRCalleeSavedArea1Size() const { return GPRCS1Size; } + unsigned getGPRCalleeSavedArea2Size() const { return GPRCS2Size; } + unsigned getDPRCalleeSavedAreaSize() const { return DPRCSSize; } + + void setGPRCalleeSavedArea1Size(unsigned s) { GPRCS1Size = s; } + void setGPRCalleeSavedArea2Size(unsigned s) { GPRCS2Size = s; } + void setDPRCalleeSavedAreaSize(unsigned s) { DPRCSSize = s; } + + bool isGPRCalleeSavedArea1Frame(int fi) const { + if (fi < 0 || fi >= (int)GPRCS1Frames.size()) + return false; + return GPRCS1Frames[fi]; + } + bool isGPRCalleeSavedArea2Frame(int fi) const { + if (fi < 0 || fi >= (int)GPRCS2Frames.size()) + return false; + return GPRCS2Frames[fi]; + } + bool isDPRCalleeSavedAreaFrame(int fi) const { + if (fi < 0 || fi >= (int)DPRCSFrames.size()) + return false; + return DPRCSFrames[fi]; + } + + void addGPRCalleeSavedArea1Frame(int fi) { + if (fi >= 0) { + int Size = GPRCS1Frames.size(); + if (fi >= Size) { + Size *= 2; + if (fi >= Size) + Size = fi+1; + GPRCS1Frames.resize(Size); + } + GPRCS1Frames[fi] = true; + } + } + void addGPRCalleeSavedArea2Frame(int fi) { + if (fi >= 0) { + int Size = GPRCS2Frames.size(); + if (fi >= Size) { + Size *= 2; + if (fi >= Size) + Size = fi+1; + GPRCS2Frames.resize(Size); + } + GPRCS2Frames[fi] = true; + } + } + void addDPRCalleeSavedAreaFrame(int fi) { + if (fi >= 0) { + int Size = DPRCSFrames.size(); + if (fi >= Size) { + Size *= 2; + if (fi >= Size) + Size = fi+1; + DPRCSFrames.resize(Size); + } + DPRCSFrames[fi] = true; + } + } + + void setCSRegisterIsSpilled(unsigned Reg) { + SpilledCSRegs.set(Reg); + } + + bool isCSRegisterSpilled(unsigned Reg) const { + return SpilledCSRegs[Reg]; + } + + const BitVector &getSpilledCSRegisters() const { + return SpilledCSRegs; + } + + unsigned createJumpTableUId() { + return JumpTableUId++; + } + + unsigned getNumJumpTables() const { + return JumpTableUId; + } + + void initConstPoolEntryUId(unsigned UId) { + ConstPoolEntryUId = UId; + } + + unsigned getNumConstPoolEntries() const { + return ConstPoolEntryUId; + } + + unsigned createConstPoolEntryUId() { + return ConstPoolEntryUId++; + } +}; +} // End llvm namespace + +#endif // ARMMACHINEFUNCTIONINFO_H diff --git a/lib/Target/ARM/ARMRegisterInfo.cpp b/lib/Target/ARM/ARMRegisterInfo.cpp new file mode 100644 index 000000000000..199858f525c8 --- /dev/null +++ b/lib/Target/ARM/ARMRegisterInfo.cpp @@ -0,0 +1,1528 @@ +//===- ARMRegisterInfo.cpp - ARM Register Information -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the ARM implementation of the TargetRegisterInfo class. +// +//===----------------------------------------------------------------------===// + +#include "ARM.h" +#include "ARMAddressingModes.h" +#include "ARMInstrInfo.h" +#include "ARMMachineFunctionInfo.h" +#include "ARMRegisterInfo.h" +#include "ARMSubtarget.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLocation.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/CommandLine.h" +#include +using namespace llvm; + +static cl::opt ThumbRegScavenging("enable-thumb-reg-scavenging", + cl::Hidden, + cl::desc("Enable register scavenging on Thumb")); + +unsigned ARMRegisterInfo::getRegisterNumbering(unsigned RegEnum) { + using namespace ARM; + switch (RegEnum) { + case R0: case S0: case D0: return 0; + case R1: case S1: case D1: return 1; + case R2: case S2: case D2: return 2; + case R3: case S3: case D3: return 3; + case R4: case S4: case D4: return 4; + case R5: case S5: case D5: return 5; + case R6: case S6: case D6: return 6; + case R7: case S7: case D7: return 7; + case R8: case S8: case D8: return 8; + case R9: case S9: case D9: return 9; + case R10: case S10: case D10: return 10; + case R11: case S11: case D11: return 11; + case R12: case S12: case D12: return 12; + case SP: case S13: case D13: return 13; + case LR: case S14: case D14: return 14; + case PC: case S15: case D15: return 15; + case S16: return 16; + case S17: return 17; + case S18: return 18; + case S19: return 19; + case S20: return 20; + case S21: return 21; + case S22: return 22; + case S23: return 23; + case S24: return 24; + case S25: return 25; + case S26: return 26; + case S27: return 27; + case S28: return 28; + case S29: return 29; + case S30: return 30; + case S31: return 31; + default: + assert(0 && "Unknown ARM register!"); + abort(); + } +} + +unsigned ARMRegisterInfo::getRegisterNumbering(unsigned RegEnum, + bool &isSPVFP) { + isSPVFP = false; + + using namespace ARM; + switch (RegEnum) { + default: + assert(0 && "Unknown ARM register!"); + abort(); + case R0: case D0: return 0; + case R1: case D1: return 1; + case R2: case D2: return 2; + case R3: case D3: return 3; + case R4: case D4: return 4; + case R5: case D5: return 5; + case R6: case D6: return 6; + case R7: case D7: return 7; + case R8: case D8: return 8; + case R9: case D9: return 9; + case R10: case D10: return 10; + case R11: case D11: return 11; + case R12: case D12: return 12; + case SP: case D13: return 13; + case LR: case D14: return 14; + case PC: case D15: return 15; + + case S0: case S1: case S2: case S3: + case S4: case S5: case S6: case S7: + case S8: case S9: case S10: case S11: + case S12: case S13: case S14: case S15: + case S16: case S17: case S18: case S19: + case S20: case S21: case S22: case S23: + case S24: case S25: case S26: case S27: + case S28: case S29: case S30: case S31: { + isSPVFP = true; + switch (RegEnum) { + default: return 0; // Avoid compile time warning. + case S0: return 0; + case S1: return 1; + case S2: return 2; + case S3: return 3; + case S4: return 4; + case S5: return 5; + case S6: return 6; + case S7: return 7; + case S8: return 8; + case S9: return 9; + case S10: return 10; + case S11: return 11; + case S12: return 12; + case S13: return 13; + case S14: return 14; + case S15: return 15; + case S16: return 16; + case S17: return 17; + case S18: return 18; + case S19: return 19; + case S20: return 20; + case S21: return 21; + case S22: return 22; + case S23: return 23; + case S24: return 24; + case S25: return 25; + case S26: return 26; + case S27: return 27; + case S28: return 28; + case S29: return 29; + case S30: return 30; + case S31: return 31; + } + } + } +} + +ARMRegisterInfo::ARMRegisterInfo(const TargetInstrInfo &tii, + const ARMSubtarget &sti) + : ARMGenRegisterInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP), + TII(tii), STI(sti), + FramePtr((STI.useThumbBacktraces() || STI.isThumb()) ? ARM::R7 : ARM::R11) { +} + +static inline +const MachineInstrBuilder &AddDefaultPred(const MachineInstrBuilder &MIB) { + return MIB.addImm((int64_t)ARMCC::AL).addReg(0); +} + +static inline +const MachineInstrBuilder &AddDefaultCC(const MachineInstrBuilder &MIB) { + return MIB.addReg(0); +} + +/// emitLoadConstPool - Emits a load from constpool to materialize the +/// specified immediate. +void ARMRegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, + unsigned DestReg, int Val, + unsigned Pred, unsigned PredReg, + const TargetInstrInfo *TII, + bool isThumb, + DebugLoc dl) const { + MachineFunction &MF = *MBB.getParent(); + MachineConstantPool *ConstantPool = MF.getConstantPool(); + Constant *C = ConstantInt::get(Type::Int32Ty, Val); + unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4); + if (isThumb) + BuildMI(MBB, MBBI, dl, + TII->get(ARM::tLDRcp),DestReg).addConstantPoolIndex(Idx); + else + BuildMI(MBB, MBBI, dl, TII->get(ARM::LDRcp), DestReg) + .addConstantPoolIndex(Idx) + .addReg(0).addImm(0).addImm(Pred).addReg(PredReg); +} + +const TargetRegisterClass *ARMRegisterInfo::getPointerRegClass() const { + return &ARM::GPRRegClass; +} + +/// isLowRegister - Returns true if the register is low register r0-r7. +/// +bool ARMRegisterInfo::isLowRegister(unsigned Reg) const { + using namespace ARM; + switch (Reg) { + case R0: case R1: case R2: case R3: + case R4: case R5: case R6: case R7: + return true; + default: + return false; + } +} + +const TargetRegisterClass* +ARMRegisterInfo::getPhysicalRegisterRegClass(unsigned Reg, MVT VT) const { + if (STI.isThumb()) { + if (isLowRegister(Reg)) + return ARM::tGPRRegisterClass; + switch (Reg) { + default: + break; + case ARM::R8: case ARM::R9: case ARM::R10: case ARM::R11: + case ARM::R12: case ARM::SP: case ARM::LR: case ARM::PC: + return ARM::GPRRegisterClass; + } + } + return TargetRegisterInfo::getPhysicalRegisterRegClass(Reg, VT); +} + +const unsigned* +ARMRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { + static const unsigned CalleeSavedRegs[] = { + ARM::LR, ARM::R11, ARM::R10, ARM::R9, ARM::R8, + ARM::R7, ARM::R6, ARM::R5, ARM::R4, + + ARM::D15, ARM::D14, ARM::D13, ARM::D12, + ARM::D11, ARM::D10, ARM::D9, ARM::D8, + 0 + }; + + static const unsigned DarwinCalleeSavedRegs[] = { + ARM::LR, ARM::R7, ARM::R6, ARM::R5, ARM::R4, + ARM::R11, ARM::R10, ARM::R9, ARM::R8, + + ARM::D15, ARM::D14, ARM::D13, ARM::D12, + ARM::D11, ARM::D10, ARM::D9, ARM::D8, + 0 + }; + return STI.isTargetDarwin() ? DarwinCalleeSavedRegs : CalleeSavedRegs; +} + +const TargetRegisterClass* const * +ARMRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { + static const TargetRegisterClass * const CalleeSavedRegClasses[] = { + &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass, + &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass, + &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass, + + &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, + &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, + 0 + }; + static const TargetRegisterClass * const ThumbCalleeSavedRegClasses[] = { + &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass, + &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::tGPRRegClass, + &ARM::tGPRRegClass,&ARM::tGPRRegClass,&ARM::tGPRRegClass, + + &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, + &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, + 0 + }; + return STI.isThumb() ? ThumbCalleeSavedRegClasses : CalleeSavedRegClasses; +} + +BitVector ARMRegisterInfo::getReservedRegs(const MachineFunction &MF) const { + // FIXME: avoid re-calculating this everytime. + BitVector Reserved(getNumRegs()); + Reserved.set(ARM::SP); + Reserved.set(ARM::PC); + if (STI.isTargetDarwin() || hasFP(MF)) + Reserved.set(FramePtr); + // Some targets reserve R9. + if (STI.isR9Reserved()) + Reserved.set(ARM::R9); + return Reserved; +} + +bool +ARMRegisterInfo::isReservedReg(const MachineFunction &MF, unsigned Reg) const { + switch (Reg) { + default: break; + case ARM::SP: + case ARM::PC: + return true; + case ARM::R7: + case ARM::R11: + if (FramePtr == Reg && (STI.isTargetDarwin() || hasFP(MF))) + return true; + break; + case ARM::R9: + return STI.isR9Reserved(); + } + + return false; +} + +bool +ARMRegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const { + const ARMFunctionInfo *AFI = MF.getInfo(); + return ThumbRegScavenging || !AFI->isThumbFunction(); +} + +/// hasFP - Return true if the specified function should have a dedicated frame +/// pointer register. This is true if the function has variable sized allocas +/// or if frame pointer elimination is disabled. +/// +bool ARMRegisterInfo::hasFP(const MachineFunction &MF) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + return NoFramePointerElim || MFI->hasVarSizedObjects(); +} + +// hasReservedCallFrame - Under normal circumstances, when a frame pointer is +// not required, we reserve argument space for call sites in the function +// immediately on entry to the current function. This eliminates the need for +// add/sub sp brackets around call sites. Returns true if the call frame is +// included as part of the stack frame. +bool ARMRegisterInfo::hasReservedCallFrame(MachineFunction &MF) const { + const MachineFrameInfo *FFI = MF.getFrameInfo(); + unsigned CFSize = FFI->getMaxCallFrameSize(); + ARMFunctionInfo *AFI = MF.getInfo(); + // It's not always a good idea to include the call frame as part of the + // stack frame. ARM (especially Thumb) has small immediate offset to + // address the stack frame. So a large call frame can cause poor codegen + // and may even makes it impossible to scavenge a register. + if (AFI->isThumbFunction()) { + if (CFSize >= ((1 << 8) - 1) * 4 / 2) // Half of imm8 * 4 + return false; + } else { + if (CFSize >= ((1 << 12) - 1) / 2) // Half of imm12 + return false; + } + return !MF.getFrameInfo()->hasVarSizedObjects(); +} + +/// emitARMRegPlusImmediate - Emits a series of instructions to materialize +/// a destreg = basereg + immediate in ARM code. +static +void emitARMRegPlusImmediate(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, + unsigned DestReg, unsigned BaseReg, int NumBytes, + ARMCC::CondCodes Pred, unsigned PredReg, + const TargetInstrInfo &TII, + DebugLoc dl) { + bool isSub = NumBytes < 0; + if (isSub) NumBytes = -NumBytes; + + while (NumBytes) { + unsigned RotAmt = ARM_AM::getSOImmValRotate(NumBytes); + unsigned ThisVal = NumBytes & ARM_AM::rotr32(0xFF, RotAmt); + assert(ThisVal && "Didn't extract field correctly"); + + // We will handle these bits from offset, clear them. + NumBytes &= ~ThisVal; + + // Get the properly encoded SOImmVal field. + int SOImmVal = ARM_AM::getSOImmVal(ThisVal); + assert(SOImmVal != -1 && "Bit extraction didn't work?"); + + // Build the new ADD / SUB. + BuildMI(MBB, MBBI, dl, TII.get(isSub ? ARM::SUBri : ARM::ADDri), DestReg) + .addReg(BaseReg, RegState::Kill).addImm(SOImmVal) + .addImm((unsigned)Pred).addReg(PredReg).addReg(0); + BaseReg = DestReg; + } +} + +/// calcNumMI - Returns the number of instructions required to materialize +/// the specific add / sub r, c instruction. +static unsigned calcNumMI(int Opc, int ExtraOpc, unsigned Bytes, + unsigned NumBits, unsigned Scale) { + unsigned NumMIs = 0; + unsigned Chunk = ((1 << NumBits) - 1) * Scale; + + if (Opc == ARM::tADDrSPi) { + unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes; + Bytes -= ThisVal; + NumMIs++; + NumBits = 8; + Scale = 1; // Followed by a number of tADDi8. + Chunk = ((1 << NumBits) - 1) * Scale; + } + + NumMIs += Bytes / Chunk; + if ((Bytes % Chunk) != 0) + NumMIs++; + if (ExtraOpc) + NumMIs++; + return NumMIs; +} + +/// emitThumbRegPlusImmInReg - Emits a series of instructions to materialize +/// a destreg = basereg + immediate in Thumb code. Materialize the immediate +/// in a register using mov / mvn sequences or load the immediate from a +/// constpool entry. +static +void emitThumbRegPlusImmInReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, + unsigned DestReg, unsigned BaseReg, + int NumBytes, bool CanChangeCC, + const TargetInstrInfo &TII, + const ARMRegisterInfo& MRI, + DebugLoc dl) { + bool isHigh = !MRI.isLowRegister(DestReg) || + (BaseReg != 0 && !MRI.isLowRegister(BaseReg)); + bool isSub = false; + // Subtract doesn't have high register version. Load the negative value + // if either base or dest register is a high register. Also, if do not + // issue sub as part of the sequence if condition register is to be + // preserved. + if (NumBytes < 0 && !isHigh && CanChangeCC) { + isSub = true; + NumBytes = -NumBytes; + } + unsigned LdReg = DestReg; + if (DestReg == ARM::SP) { + assert(BaseReg == ARM::SP && "Unexpected!"); + LdReg = ARM::R3; + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVlor2hir), ARM::R12) + .addReg(ARM::R3, RegState::Kill); + } + + if (NumBytes <= 255 && NumBytes >= 0) + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg).addImm(NumBytes); + else if (NumBytes < 0 && NumBytes >= -255) { + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg).addImm(NumBytes); + BuildMI(MBB, MBBI, dl, TII.get(ARM::tNEG), LdReg) + .addReg(LdReg, RegState::Kill); + } else + MRI.emitLoadConstPool(MBB, MBBI, LdReg, NumBytes, ARMCC::AL, 0, &TII, + true, dl); + + // Emit add / sub. + int Opc = (isSub) ? ARM::tSUBrr : (isHigh ? ARM::tADDhirr : ARM::tADDrr); + const MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, + TII.get(Opc), DestReg); + if (DestReg == ARM::SP || isSub) + MIB.addReg(BaseReg).addReg(LdReg, RegState::Kill); + else + MIB.addReg(LdReg).addReg(BaseReg, RegState::Kill); + if (DestReg == ARM::SP) + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVhir2lor), ARM::R3) + .addReg(ARM::R12, RegState::Kill); +} + +/// emitThumbRegPlusImmediate - Emits a series of instructions to materialize +/// a destreg = basereg + immediate in Thumb code. +static +void emitThumbRegPlusImmediate(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, + unsigned DestReg, unsigned BaseReg, + int NumBytes, const TargetInstrInfo &TII, + const ARMRegisterInfo& MRI, + DebugLoc dl) { + bool isSub = NumBytes < 0; + unsigned Bytes = (unsigned)NumBytes; + if (isSub) Bytes = -NumBytes; + bool isMul4 = (Bytes & 3) == 0; + bool isTwoAddr = false; + bool DstNotEqBase = false; + unsigned NumBits = 1; + unsigned Scale = 1; + int Opc = 0; + int ExtraOpc = 0; + + if (DestReg == BaseReg && BaseReg == ARM::SP) { + assert(isMul4 && "Thumb sp inc / dec size must be multiple of 4!"); + NumBits = 7; + Scale = 4; + Opc = isSub ? ARM::tSUBspi : ARM::tADDspi; + isTwoAddr = true; + } else if (!isSub && BaseReg == ARM::SP) { + // r1 = add sp, 403 + // => + // r1 = add sp, 100 * 4 + // r1 = add r1, 3 + if (!isMul4) { + Bytes &= ~3; + ExtraOpc = ARM::tADDi3; + } + NumBits = 8; + Scale = 4; + Opc = ARM::tADDrSPi; + } else { + // sp = sub sp, c + // r1 = sub sp, c + // r8 = sub sp, c + if (DestReg != BaseReg) + DstNotEqBase = true; + NumBits = 8; + Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8; + isTwoAddr = true; + } + + unsigned NumMIs = calcNumMI(Opc, ExtraOpc, Bytes, NumBits, Scale); + unsigned Threshold = (DestReg == ARM::SP) ? 3 : 2; + if (NumMIs > Threshold) { + // This will expand into too many instructions. Load the immediate from a + // constpool entry. + emitThumbRegPlusImmInReg(MBB, MBBI, DestReg, BaseReg, NumBytes, true, TII, + MRI, dl); + return; + } + + if (DstNotEqBase) { + if (MRI.isLowRegister(DestReg) && MRI.isLowRegister(BaseReg)) { + // If both are low registers, emit DestReg = add BaseReg, max(Imm, 7) + unsigned Chunk = (1 << 3) - 1; + unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes; + Bytes -= ThisVal; + BuildMI(MBB, MBBI, dl,TII.get(isSub ? ARM::tSUBi3 : ARM::tADDi3), DestReg) + .addReg(BaseReg, RegState::Kill).addImm(ThisVal); + } else { + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), DestReg) + .addReg(BaseReg, RegState::Kill); + } + BaseReg = DestReg; + } + + unsigned Chunk = ((1 << NumBits) - 1) * Scale; + while (Bytes) { + unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes; + Bytes -= ThisVal; + ThisVal /= Scale; + // Build the new tADD / tSUB. + if (isTwoAddr) + BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg) + .addReg(DestReg).addImm(ThisVal); + else { + bool isKill = BaseReg != ARM::SP; + BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg) + .addReg(BaseReg, getKillRegState(isKill)).addImm(ThisVal); + BaseReg = DestReg; + + if (Opc == ARM::tADDrSPi) { + // r4 = add sp, imm + // r4 = add r4, imm + // ... + NumBits = 8; + Scale = 1; + Chunk = ((1 << NumBits) - 1) * Scale; + Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8; + isTwoAddr = true; + } + } + } + + if (ExtraOpc) + BuildMI(MBB, MBBI, dl, TII.get(ExtraOpc), DestReg) + .addReg(DestReg, RegState::Kill) + .addImm(((unsigned)NumBytes) & 3); +} + +static +void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, + int NumBytes, ARMCC::CondCodes Pred, unsigned PredReg, + bool isThumb, const TargetInstrInfo &TII, + const ARMRegisterInfo& MRI, + DebugLoc dl) { + if (isThumb) + emitThumbRegPlusImmediate(MBB, MBBI, ARM::SP, ARM::SP, NumBytes, TII, + MRI, dl); + else + emitARMRegPlusImmediate(MBB, MBBI, ARM::SP, ARM::SP, NumBytes, + Pred, PredReg, TII, dl); +} + +void ARMRegisterInfo:: +eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + if (!hasReservedCallFrame(MF)) { + // If we have alloca, convert as follows: + // ADJCALLSTACKDOWN -> sub, sp, sp, amount + // ADJCALLSTACKUP -> add, sp, sp, amount + MachineInstr *Old = I; + DebugLoc dl = Old->getDebugLoc(); + unsigned Amount = Old->getOperand(0).getImm(); + if (Amount != 0) { + ARMFunctionInfo *AFI = MF.getInfo(); + // We need to keep the stack aligned properly. To do this, we round the + // amount of space needed for the outgoing arguments up to the next + // alignment boundary. + unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment(); + Amount = (Amount+Align-1)/Align*Align; + + // Replace the pseudo instruction with a new instruction... + unsigned Opc = Old->getOpcode(); + bool isThumb = AFI->isThumbFunction(); + ARMCC::CondCodes Pred = isThumb + ? ARMCC::AL : (ARMCC::CondCodes)Old->getOperand(1).getImm(); + if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) { + // Note: PredReg is operand 2 for ADJCALLSTACKDOWN. + unsigned PredReg = isThumb ? 0 : Old->getOperand(2).getReg(); + emitSPUpdate(MBB, I, -Amount, Pred, PredReg, isThumb, TII, *this, dl); + } else { + // Note: PredReg is operand 3 for ADJCALLSTACKUP. + unsigned PredReg = isThumb ? 0 : Old->getOperand(3).getReg(); + assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP); + emitSPUpdate(MBB, I, Amount, Pred, PredReg, isThumb, TII, *this, dl); + } + } + } + MBB.erase(I); +} + +/// emitThumbConstant - Emit a series of instructions to materialize a +/// constant. +static void emitThumbConstant(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, + unsigned DestReg, int Imm, + const TargetInstrInfo &TII, + const ARMRegisterInfo& MRI, + DebugLoc dl) { + bool isSub = Imm < 0; + if (isSub) Imm = -Imm; + + int Chunk = (1 << 8) - 1; + int ThisVal = (Imm > Chunk) ? Chunk : Imm; + Imm -= ThisVal; + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), DestReg).addImm(ThisVal); + if (Imm > 0) + emitThumbRegPlusImmediate(MBB, MBBI, DestReg, DestReg, Imm, TII, MRI, dl); + if (isSub) + BuildMI(MBB, MBBI, dl, TII.get(ARM::tNEG), DestReg) + .addReg(DestReg, RegState::Kill); +} + +/// findScratchRegister - Find a 'free' ARM register. If register scavenger +/// is not being used, R12 is available. Otherwise, try for a call-clobbered +/// register first and then a spilled callee-saved register if that fails. +static +unsigned findScratchRegister(RegScavenger *RS, const TargetRegisterClass *RC, + ARMFunctionInfo *AFI) { + unsigned Reg = RS ? RS->FindUnusedReg(RC, true) : (unsigned) ARM::R12; + assert (!AFI->isThumbFunction()); + if (Reg == 0) + // Try a already spilled CS register. + Reg = RS->FindUnusedReg(RC, AFI->getSpilledCSRegisters()); + + return Reg; +} + +void ARMRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, RegScavenger *RS) const{ + unsigned i = 0; + MachineInstr &MI = *II; + MachineBasicBlock &MBB = *MI.getParent(); + MachineFunction &MF = *MBB.getParent(); + ARMFunctionInfo *AFI = MF.getInfo(); + bool isThumb = AFI->isThumbFunction(); + DebugLoc dl = MI.getDebugLoc(); + + while (!MI.getOperand(i).isFI()) { + ++i; + assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!"); + } + + unsigned FrameReg = ARM::SP; + int FrameIndex = MI.getOperand(i).getIndex(); + int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) + + MF.getFrameInfo()->getStackSize() + SPAdj; + + if (AFI->isGPRCalleeSavedArea1Frame(FrameIndex)) + Offset -= AFI->getGPRCalleeSavedArea1Offset(); + else if (AFI->isGPRCalleeSavedArea2Frame(FrameIndex)) + Offset -= AFI->getGPRCalleeSavedArea2Offset(); + else if (AFI->isDPRCalleeSavedAreaFrame(FrameIndex)) + Offset -= AFI->getDPRCalleeSavedAreaOffset(); + else if (hasFP(MF)) { + assert(SPAdj == 0 && "Unexpected"); + // There is alloca()'s in this function, must reference off the frame + // pointer instead. + FrameReg = getFrameRegister(MF); + Offset -= AFI->getFramePtrSpillOffset(); + } + + unsigned Opcode = MI.getOpcode(); + const TargetInstrDesc &Desc = MI.getDesc(); + unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); + bool isSub = false; + + // Memory operands in inline assembly always use AddrMode2. + if (Opcode == ARM::INLINEASM) + AddrMode = ARMII::AddrMode2; + + if (Opcode == ARM::ADDri) { + Offset += MI.getOperand(i+1).getImm(); + if (Offset == 0) { + // Turn it into a move. + MI.setDesc(TII.get(ARM::MOVr)); + MI.getOperand(i).ChangeToRegister(FrameReg, false); + MI.RemoveOperand(i+1); + return; + } else if (Offset < 0) { + Offset = -Offset; + isSub = true; + MI.setDesc(TII.get(ARM::SUBri)); + } + + // Common case: small offset, fits into instruction. + int ImmedOffset = ARM_AM::getSOImmVal(Offset); + if (ImmedOffset != -1) { + // Replace the FrameIndex with sp / fp + MI.getOperand(i).ChangeToRegister(FrameReg, false); + MI.getOperand(i+1).ChangeToImmediate(ImmedOffset); + return; + } + + // Otherwise, we fallback to common code below to form the imm offset with + // a sequence of ADDri instructions. First though, pull as much of the imm + // into this ADDri as possible. + unsigned RotAmt = ARM_AM::getSOImmValRotate(Offset); + unsigned ThisImmVal = Offset & ARM_AM::rotr32(0xFF, RotAmt); + + // We will handle these bits from offset, clear them. + Offset &= ~ThisImmVal; + + // Get the properly encoded SOImmVal field. + int ThisSOImmVal = ARM_AM::getSOImmVal(ThisImmVal); + assert(ThisSOImmVal != -1 && "Bit extraction didn't work?"); + MI.getOperand(i+1).ChangeToImmediate(ThisSOImmVal); + } else if (Opcode == ARM::tADDrSPi) { + Offset += MI.getOperand(i+1).getImm(); + + // Can't use tADDrSPi if it's based off the frame pointer. + unsigned NumBits = 0; + unsigned Scale = 1; + if (FrameReg != ARM::SP) { + Opcode = ARM::tADDi3; + MI.setDesc(TII.get(ARM::tADDi3)); + NumBits = 3; + } else { + NumBits = 8; + Scale = 4; + assert((Offset & 3) == 0 && + "Thumb add/sub sp, #imm immediate must be multiple of 4!"); + } + + if (Offset == 0) { + // Turn it into a move. + MI.setDesc(TII.get(ARM::tMOVhir2lor)); + MI.getOperand(i).ChangeToRegister(FrameReg, false); + MI.RemoveOperand(i+1); + return; + } + + // Common case: small offset, fits into instruction. + unsigned Mask = (1 << NumBits) - 1; + if (((Offset / Scale) & ~Mask) == 0) { + // Replace the FrameIndex with sp / fp + MI.getOperand(i).ChangeToRegister(FrameReg, false); + MI.getOperand(i+1).ChangeToImmediate(Offset / Scale); + return; + } + + unsigned DestReg = MI.getOperand(0).getReg(); + unsigned Bytes = (Offset > 0) ? Offset : -Offset; + unsigned NumMIs = calcNumMI(Opcode, 0, Bytes, NumBits, Scale); + // MI would expand into a large number of instructions. Don't try to + // simplify the immediate. + if (NumMIs > 2) { + emitThumbRegPlusImmediate(MBB, II, DestReg, FrameReg, Offset, TII, + *this, dl); + MBB.erase(II); + return; + } + + if (Offset > 0) { + // Translate r0 = add sp, imm to + // r0 = add sp, 255*4 + // r0 = add r0, (imm - 255*4) + MI.getOperand(i).ChangeToRegister(FrameReg, false); + MI.getOperand(i+1).ChangeToImmediate(Mask); + Offset = (Offset - Mask * Scale); + MachineBasicBlock::iterator NII = next(II); + emitThumbRegPlusImmediate(MBB, NII, DestReg, DestReg, Offset, TII, + *this, dl); + } else { + // Translate r0 = add sp, -imm to + // r0 = -imm (this is then translated into a series of instructons) + // r0 = add r0, sp + emitThumbConstant(MBB, II, DestReg, Offset, TII, *this, dl); + MI.setDesc(TII.get(ARM::tADDhirr)); + MI.getOperand(i).ChangeToRegister(DestReg, false, false, true); + MI.getOperand(i+1).ChangeToRegister(FrameReg, false); + } + return; + } else { + unsigned ImmIdx = 0; + int InstrOffs = 0; + unsigned NumBits = 0; + unsigned Scale = 1; + switch (AddrMode) { + case ARMII::AddrMode2: { + ImmIdx = i+2; + InstrOffs = ARM_AM::getAM2Offset(MI.getOperand(ImmIdx).getImm()); + if (ARM_AM::getAM2Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub) + InstrOffs *= -1; + NumBits = 12; + break; + } + case ARMII::AddrMode3: { + ImmIdx = i+2; + InstrOffs = ARM_AM::getAM3Offset(MI.getOperand(ImmIdx).getImm()); + if (ARM_AM::getAM3Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub) + InstrOffs *= -1; + NumBits = 8; + break; + } + case ARMII::AddrMode5: { + ImmIdx = i+1; + InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm()); + if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub) + InstrOffs *= -1; + NumBits = 8; + Scale = 4; + break; + } + case ARMII::AddrModeTs: { + ImmIdx = i+1; + InstrOffs = MI.getOperand(ImmIdx).getImm(); + NumBits = (FrameReg == ARM::SP) ? 8 : 5; + Scale = 4; + break; + } + default: + assert(0 && "Unsupported addressing mode!"); + abort(); + break; + } + + Offset += InstrOffs * Scale; + assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!"); + if (Offset < 0 && !isThumb) { + Offset = -Offset; + isSub = true; + } + + // Common case: small offset, fits into instruction. + MachineOperand &ImmOp = MI.getOperand(ImmIdx); + int ImmedOffset = Offset / Scale; + unsigned Mask = (1 << NumBits) - 1; + if ((unsigned)Offset <= Mask * Scale) { + // Replace the FrameIndex with sp + MI.getOperand(i).ChangeToRegister(FrameReg, false); + if (isSub) + ImmedOffset |= 1 << NumBits; + ImmOp.ChangeToImmediate(ImmedOffset); + return; + } + + bool isThumSpillRestore = Opcode == ARM::tRestore || Opcode == ARM::tSpill; + if (AddrMode == ARMII::AddrModeTs) { + // Thumb tLDRspi, tSTRspi. These will change to instructions that use + // a different base register. + NumBits = 5; + Mask = (1 << NumBits) - 1; + } + // If this is a thumb spill / restore, we will be using a constpool load to + // materialize the offset. + if (AddrMode == ARMII::AddrModeTs && isThumSpillRestore) + ImmOp.ChangeToImmediate(0); + else { + // Otherwise, it didn't fit. Pull in what we can to simplify the immed. + ImmedOffset = ImmedOffset & Mask; + if (isSub) + ImmedOffset |= 1 << NumBits; + ImmOp.ChangeToImmediate(ImmedOffset); + Offset &= ~(Mask*Scale); + } + } + + // If we get here, the immediate doesn't fit into the instruction. We folded + // as much as possible above, handle the rest, providing a register that is + // SP+LargeImm. + assert(Offset && "This code isn't needed if offset already handled!"); + + if (isThumb) { + if (Desc.mayLoad()) { + // Use the destination register to materialize sp + offset. + unsigned TmpReg = MI.getOperand(0).getReg(); + bool UseRR = false; + if (Opcode == ARM::tRestore) { + if (FrameReg == ARM::SP) + emitThumbRegPlusImmInReg(MBB, II, TmpReg, FrameReg, + Offset, false, TII, *this, dl); + else { + emitLoadConstPool(MBB, II, TmpReg, Offset, ARMCC::AL, 0, &TII, + true, dl); + UseRR = true; + } + } else + emitThumbRegPlusImmediate(MBB, II, TmpReg, FrameReg, Offset, TII, + *this, dl); + MI.setDesc(TII.get(ARM::tLDR)); + MI.getOperand(i).ChangeToRegister(TmpReg, false, false, true); + if (UseRR) + // Use [reg, reg] addrmode. + MI.addOperand(MachineOperand::CreateReg(FrameReg, false)); + else // tLDR has an extra register operand. + MI.addOperand(MachineOperand::CreateReg(0, false)); + } else if (Desc.mayStore()) { + // FIXME! This is horrific!!! We need register scavenging. + // Our temporary workaround has marked r3 unavailable. Of course, r3 is + // also a ABI register so it's possible that is is the register that is + // being storing here. If that's the case, we do the following: + // r12 = r2 + // Use r2 to materialize sp + offset + // str r3, r2 + // r2 = r12 + unsigned ValReg = MI.getOperand(0).getReg(); + unsigned TmpReg = ARM::R3; + bool UseRR = false; + if (ValReg == ARM::R3) { + BuildMI(MBB, II, dl, TII.get(ARM::tMOVlor2hir), ARM::R12) + .addReg(ARM::R2, RegState::Kill); + TmpReg = ARM::R2; + } + if (TmpReg == ARM::R3 && AFI->isR3LiveIn()) + BuildMI(MBB, II, dl, TII.get(ARM::tMOVlor2hir), ARM::R12) + .addReg(ARM::R3, RegState::Kill); + if (Opcode == ARM::tSpill) { + if (FrameReg == ARM::SP) + emitThumbRegPlusImmInReg(MBB, II, TmpReg, FrameReg, + Offset, false, TII, *this, dl); + else { + emitLoadConstPool(MBB, II, TmpReg, Offset, ARMCC::AL, 0, &TII, + true, dl); + UseRR = true; + } + } else + emitThumbRegPlusImmediate(MBB, II, TmpReg, FrameReg, Offset, TII, + *this, dl); + MI.setDesc(TII.get(ARM::tSTR)); + MI.getOperand(i).ChangeToRegister(TmpReg, false, false, true); + if (UseRR) // Use [reg, reg] addrmode. + MI.addOperand(MachineOperand::CreateReg(FrameReg, false)); + else // tSTR has an extra register operand. + MI.addOperand(MachineOperand::CreateReg(0, false)); + + MachineBasicBlock::iterator NII = next(II); + if (ValReg == ARM::R3) + BuildMI(MBB, NII, dl, TII.get(ARM::tMOVhir2lor), ARM::R2) + .addReg(ARM::R12, RegState::Kill); + if (TmpReg == ARM::R3 && AFI->isR3LiveIn()) + BuildMI(MBB, NII, dl, TII.get(ARM::tMOVhir2lor), ARM::R3) + .addReg(ARM::R12, RegState::Kill); + } else + assert(false && "Unexpected opcode!"); + } else { + // Insert a set of r12 with the full address: r12 = sp + offset + // If the offset we have is too large to fit into the instruction, we need + // to form it with a series of ADDri's. Do this by taking 8-bit chunks + // out of 'Offset'. + unsigned ScratchReg = findScratchRegister(RS, &ARM::GPRRegClass, AFI); + if (ScratchReg == 0) + // No register is "free". Scavenge a register. + ScratchReg = RS->scavengeRegister(&ARM::GPRRegClass, II, SPAdj); + int PIdx = MI.findFirstPredOperandIdx(); + ARMCC::CondCodes Pred = (PIdx == -1) + ? ARMCC::AL : (ARMCC::CondCodes)MI.getOperand(PIdx).getImm(); + unsigned PredReg = (PIdx == -1) ? 0 : MI.getOperand(PIdx+1).getReg(); + emitARMRegPlusImmediate(MBB, II, ScratchReg, FrameReg, + isSub ? -Offset : Offset, Pred, PredReg, TII, dl); + MI.getOperand(i).ChangeToRegister(ScratchReg, false, false, true); + } +} + +static unsigned estimateStackSize(MachineFunction &MF, MachineFrameInfo *MFI) { + const MachineFrameInfo *FFI = MF.getFrameInfo(); + int Offset = 0; + for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) { + int FixedOff = -FFI->getObjectOffset(i); + if (FixedOff > Offset) Offset = FixedOff; + } + for (unsigned i = 0, e = FFI->getObjectIndexEnd(); i != e; ++i) { + if (FFI->isDeadObjectIndex(i)) + continue; + Offset += FFI->getObjectSize(i); + unsigned Align = FFI->getObjectAlignment(i); + // Adjust to alignment boundary + Offset = (Offset+Align-1)/Align*Align; + } + return (unsigned)Offset; +} + +void +ARMRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, + RegScavenger *RS) const { + // This tells PEI to spill the FP as if it is any other callee-save register + // to take advantage the eliminateFrameIndex machinery. This also ensures it + // is spilled in the order specified by getCalleeSavedRegs() to make it easier + // to combine multiple loads / stores. + bool CanEliminateFrame = true; + bool CS1Spilled = false; + bool LRSpilled = false; + unsigned NumGPRSpills = 0; + SmallVector UnspilledCS1GPRs; + SmallVector UnspilledCS2GPRs; + ARMFunctionInfo *AFI = MF.getInfo(); + + // Don't spill FP if the frame can be eliminated. This is determined + // by scanning the callee-save registers to see if any is used. + const unsigned *CSRegs = getCalleeSavedRegs(); + const TargetRegisterClass* const *CSRegClasses = getCalleeSavedRegClasses(); + for (unsigned i = 0; CSRegs[i]; ++i) { + unsigned Reg = CSRegs[i]; + bool Spilled = false; + if (MF.getRegInfo().isPhysRegUsed(Reg)) { + AFI->setCSRegisterIsSpilled(Reg); + Spilled = true; + CanEliminateFrame = false; + } else { + // Check alias registers too. + for (const unsigned *Aliases = getAliasSet(Reg); *Aliases; ++Aliases) { + if (MF.getRegInfo().isPhysRegUsed(*Aliases)) { + Spilled = true; + CanEliminateFrame = false; + } + } + } + + if (CSRegClasses[i] == &ARM::GPRRegClass) { + if (Spilled) { + NumGPRSpills++; + + if (!STI.isTargetDarwin()) { + if (Reg == ARM::LR) + LRSpilled = true; + CS1Spilled = true; + continue; + } + + // Keep track if LR and any of R4, R5, R6, and R7 is spilled. + switch (Reg) { + case ARM::LR: + LRSpilled = true; + // Fallthrough + case ARM::R4: + case ARM::R5: + case ARM::R6: + case ARM::R7: + CS1Spilled = true; + break; + default: + break; + } + } else { + if (!STI.isTargetDarwin()) { + UnspilledCS1GPRs.push_back(Reg); + continue; + } + + switch (Reg) { + case ARM::R4: + case ARM::R5: + case ARM::R6: + case ARM::R7: + case ARM::LR: + UnspilledCS1GPRs.push_back(Reg); + break; + default: + UnspilledCS2GPRs.push_back(Reg); + break; + } + } + } + } + + bool ForceLRSpill = false; + if (!LRSpilled && AFI->isThumbFunction()) { + unsigned FnSize = TII.GetFunctionSizeInBytes(MF); + // Force LR to be spilled if the Thumb function size is > 2048. This enables + // use of BL to implement far jump. If it turns out that it's not needed + // then the branch fix up path will undo it. + if (FnSize >= (1 << 11)) { + CanEliminateFrame = false; + ForceLRSpill = true; + } + } + + bool ExtraCSSpill = false; + if (!CanEliminateFrame || hasFP(MF)) { + AFI->setHasStackFrame(true); + + // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled. + // Spill LR as well so we can fold BX_RET to the registers restore (LDM). + if (!LRSpilled && CS1Spilled) { + MF.getRegInfo().setPhysRegUsed(ARM::LR); + AFI->setCSRegisterIsSpilled(ARM::LR); + NumGPRSpills++; + UnspilledCS1GPRs.erase(std::find(UnspilledCS1GPRs.begin(), + UnspilledCS1GPRs.end(), (unsigned)ARM::LR)); + ForceLRSpill = false; + ExtraCSSpill = true; + } + + // Darwin ABI requires FP to point to the stack slot that contains the + // previous FP. + if (STI.isTargetDarwin() || hasFP(MF)) { + MF.getRegInfo().setPhysRegUsed(FramePtr); + NumGPRSpills++; + } + + // If stack and double are 8-byte aligned and we are spilling an odd number + // of GPRs. Spill one extra callee save GPR so we won't have to pad between + // the integer and double callee save areas. + unsigned TargetAlign = MF.getTarget().getFrameInfo()->getStackAlignment(); + if (TargetAlign == 8 && (NumGPRSpills & 1)) { + if (CS1Spilled && !UnspilledCS1GPRs.empty()) { + for (unsigned i = 0, e = UnspilledCS1GPRs.size(); i != e; ++i) { + unsigned Reg = UnspilledCS1GPRs[i]; + // Don't spiil high register if the function is thumb + if (!AFI->isThumbFunction() || isLowRegister(Reg) || Reg == ARM::LR) { + MF.getRegInfo().setPhysRegUsed(Reg); + AFI->setCSRegisterIsSpilled(Reg); + if (!isReservedReg(MF, Reg)) + ExtraCSSpill = true; + break; + } + } + } else if (!UnspilledCS2GPRs.empty() && + !AFI->isThumbFunction()) { + unsigned Reg = UnspilledCS2GPRs.front(); + MF.getRegInfo().setPhysRegUsed(Reg); + AFI->setCSRegisterIsSpilled(Reg); + if (!isReservedReg(MF, Reg)) + ExtraCSSpill = true; + } + } + + // Estimate if we might need to scavenge a register at some point in order + // to materialize a stack offset. If so, either spill one additiona + // callee-saved register or reserve a special spill slot to facilitate + // register scavenging. + if (RS && !ExtraCSSpill && !AFI->isThumbFunction()) { + MachineFrameInfo *MFI = MF.getFrameInfo(); + unsigned Size = estimateStackSize(MF, MFI); + unsigned Limit = (1 << 12) - 1; + for (MachineFunction::iterator BB = MF.begin(),E = MF.end();BB != E; ++BB) + for (MachineBasicBlock::iterator I= BB->begin(); I != BB->end(); ++I) { + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) + if (I->getOperand(i).isFI()) { + unsigned Opcode = I->getOpcode(); + const TargetInstrDesc &Desc = TII.get(Opcode); + unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); + if (AddrMode == ARMII::AddrMode3) { + Limit = (1 << 8) - 1; + goto DoneEstimating; + } else if (AddrMode == ARMII::AddrMode5) { + unsigned ThisLimit = ((1 << 8) - 1) * 4; + if (ThisLimit < Limit) + Limit = ThisLimit; + } + } + } + DoneEstimating: + if (Size >= Limit) { + // If any non-reserved CS register isn't spilled, just spill one or two + // extra. That should take care of it! + unsigned NumExtras = TargetAlign / 4; + SmallVector Extras; + while (NumExtras && !UnspilledCS1GPRs.empty()) { + unsigned Reg = UnspilledCS1GPRs.back(); + UnspilledCS1GPRs.pop_back(); + if (!isReservedReg(MF, Reg)) { + Extras.push_back(Reg); + NumExtras--; + } + } + while (NumExtras && !UnspilledCS2GPRs.empty()) { + unsigned Reg = UnspilledCS2GPRs.back(); + UnspilledCS2GPRs.pop_back(); + if (!isReservedReg(MF, Reg)) { + Extras.push_back(Reg); + NumExtras--; + } + } + if (Extras.size() && NumExtras == 0) { + for (unsigned i = 0, e = Extras.size(); i != e; ++i) { + MF.getRegInfo().setPhysRegUsed(Extras[i]); + AFI->setCSRegisterIsSpilled(Extras[i]); + } + } else { + // Reserve a slot closest to SP or frame pointer. + const TargetRegisterClass *RC = &ARM::GPRRegClass; + RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), + RC->getAlignment())); + } + } + } + } + + if (ForceLRSpill) { + MF.getRegInfo().setPhysRegUsed(ARM::LR); + AFI->setCSRegisterIsSpilled(ARM::LR); + AFI->setLRIsSpilledForFarJump(true); + } +} + +/// Move iterator pass the next bunch of callee save load / store ops for +/// the particular spill area (1: integer area 1, 2: integer area 2, +/// 3: fp area, 0: don't care). +static void movePastCSLoadStoreOps(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, + int Opc, unsigned Area, + const ARMSubtarget &STI) { + while (MBBI != MBB.end() && + MBBI->getOpcode() == Opc && MBBI->getOperand(1).isFI()) { + if (Area != 0) { + bool Done = false; + unsigned Category = 0; + switch (MBBI->getOperand(0).getReg()) { + case ARM::R4: case ARM::R5: case ARM::R6: case ARM::R7: + case ARM::LR: + Category = 1; + break; + case ARM::R8: case ARM::R9: case ARM::R10: case ARM::R11: + Category = STI.isTargetDarwin() ? 2 : 1; + break; + case ARM::D8: case ARM::D9: case ARM::D10: case ARM::D11: + case ARM::D12: case ARM::D13: case ARM::D14: case ARM::D15: + Category = 3; + break; + default: + Done = true; + break; + } + if (Done || Category != Area) + break; + } + + ++MBBI; + } +} + +void ARMRegisterInfo::emitPrologue(MachineFunction &MF) const { + MachineBasicBlock &MBB = MF.front(); + MachineBasicBlock::iterator MBBI = MBB.begin(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + ARMFunctionInfo *AFI = MF.getInfo(); + bool isThumb = AFI->isThumbFunction(); + unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize(); + unsigned NumBytes = MFI->getStackSize(); + const std::vector &CSI = MFI->getCalleeSavedInfo(); + DebugLoc dl = (MBBI != MBB.end() ? + MBBI->getDebugLoc() : DebugLoc::getUnknownLoc()); + + if (isThumb) { + // Check if R3 is live in. It might have to be used as a scratch register. + for (MachineRegisterInfo::livein_iterator I =MF.getRegInfo().livein_begin(), + E = MF.getRegInfo().livein_end(); I != E; ++I) { + if (I->first == ARM::R3) { + AFI->setR3IsLiveIn(true); + break; + } + } + + // Thumb add/sub sp, imm8 instructions implicitly multiply the offset by 4. + NumBytes = (NumBytes + 3) & ~3; + MFI->setStackSize(NumBytes); + } + + // Determine the sizes of each callee-save spill areas and record which frame + // belongs to which callee-save spill areas. + unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0; + int FramePtrSpillFI = 0; + + if (VARegSaveSize) + emitSPUpdate(MBB, MBBI, -VARegSaveSize, ARMCC::AL, 0, isThumb, TII, + *this, dl); + + if (!AFI->hasStackFrame()) { + if (NumBytes != 0) + emitSPUpdate(MBB, MBBI, -NumBytes, ARMCC::AL, 0, isThumb, TII, *this, dl); + return; + } + + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + unsigned Reg = CSI[i].getReg(); + int FI = CSI[i].getFrameIdx(); + switch (Reg) { + case ARM::R4: + case ARM::R5: + case ARM::R6: + case ARM::R7: + case ARM::LR: + if (Reg == FramePtr) + FramePtrSpillFI = FI; + AFI->addGPRCalleeSavedArea1Frame(FI); + GPRCS1Size += 4; + break; + case ARM::R8: + case ARM::R9: + case ARM::R10: + case ARM::R11: + if (Reg == FramePtr) + FramePtrSpillFI = FI; + if (STI.isTargetDarwin()) { + AFI->addGPRCalleeSavedArea2Frame(FI); + GPRCS2Size += 4; + } else { + AFI->addGPRCalleeSavedArea1Frame(FI); + GPRCS1Size += 4; + } + break; + default: + AFI->addDPRCalleeSavedAreaFrame(FI); + DPRCSSize += 8; + } + } + + if (!isThumb) { + // Build the new SUBri to adjust SP for integer callee-save spill area 1. + emitSPUpdate(MBB, MBBI, -GPRCS1Size, ARMCC::AL, 0, isThumb, TII, *this, dl); + movePastCSLoadStoreOps(MBB, MBBI, ARM::STR, 1, STI); + } else if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) { + ++MBBI; + if (MBBI != MBB.end()) + dl = MBBI->getDebugLoc(); + } + + // Darwin ABI requires FP to point to the stack slot that contains the + // previous FP. + if (STI.isTargetDarwin() || hasFP(MF)) { + MachineInstrBuilder MIB = + BuildMI(MBB, MBBI, dl, TII.get(isThumb ? ARM::tADDrSPi : ARM::ADDri), + FramePtr) + .addFrameIndex(FramePtrSpillFI).addImm(0); + if (!isThumb) AddDefaultCC(AddDefaultPred(MIB)); + } + + if (!isThumb) { + // Build the new SUBri to adjust SP for integer callee-save spill area 2. + emitSPUpdate(MBB, MBBI, -GPRCS2Size, ARMCC::AL, 0, false, TII, *this, dl); + + // Build the new SUBri to adjust SP for FP callee-save spill area. + movePastCSLoadStoreOps(MBB, MBBI, ARM::STR, 2, STI); + emitSPUpdate(MBB, MBBI, -DPRCSSize, ARMCC::AL, 0, false, TII, *this, dl); + } + + // Determine starting offsets of spill areas. + unsigned DPRCSOffset = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize); + unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize; + unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size; + AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + NumBytes); + AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset); + AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset); + AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset); + + NumBytes = DPRCSOffset; + if (NumBytes) { + // Insert it after all the callee-save spills. + if (!isThumb) + movePastCSLoadStoreOps(MBB, MBBI, ARM::FSTD, 3, STI); + emitSPUpdate(MBB, MBBI, -NumBytes, ARMCC::AL, 0, isThumb, TII, *this, dl); + } + + if(STI.isTargetELF() && hasFP(MF)) { + MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() - + AFI->getFramePtrSpillOffset()); + } + + AFI->setGPRCalleeSavedArea1Size(GPRCS1Size); + AFI->setGPRCalleeSavedArea2Size(GPRCS2Size); + AFI->setDPRCalleeSavedAreaSize(DPRCSSize); +} + +static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) { + for (unsigned i = 0; CSRegs[i]; ++i) + if (Reg == CSRegs[i]) + return true; + return false; +} + +static bool isCSRestore(MachineInstr *MI, const unsigned *CSRegs) { + return ((MI->getOpcode() == ARM::FLDD || + MI->getOpcode() == ARM::LDR || + MI->getOpcode() == ARM::tRestore) && + MI->getOperand(1).isFI() && + isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs)); +} + +void ARMRegisterInfo::emitEpilogue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + MachineBasicBlock::iterator MBBI = prior(MBB.end()); + assert((MBBI->getOpcode() == ARM::BX_RET || + MBBI->getOpcode() == ARM::tBX_RET || + MBBI->getOpcode() == ARM::tPOP_RET) && + "Can only insert epilog into returning blocks"); + DebugLoc dl = MBBI->getDebugLoc(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + ARMFunctionInfo *AFI = MF.getInfo(); + bool isThumb = AFI->isThumbFunction(); + unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize(); + int NumBytes = (int)MFI->getStackSize(); + + if (!AFI->hasStackFrame()) { + if (NumBytes != 0) + emitSPUpdate(MBB, MBBI, NumBytes, ARMCC::AL, 0, isThumb, TII, *this, dl); + } else { + // Unwind MBBI to point to first LDR / FLDD. + const unsigned *CSRegs = getCalleeSavedRegs(); + if (MBBI != MBB.begin()) { + do + --MBBI; + while (MBBI != MBB.begin() && isCSRestore(MBBI, CSRegs)); + if (!isCSRestore(MBBI, CSRegs)) + ++MBBI; + } + + // Move SP to start of FP callee save spill area. + NumBytes -= (AFI->getGPRCalleeSavedArea1Size() + + AFI->getGPRCalleeSavedArea2Size() + + AFI->getDPRCalleeSavedAreaSize()); + if (isThumb) { + if (hasFP(MF)) { + NumBytes = AFI->getFramePtrSpillOffset() - NumBytes; + // Reset SP based on frame pointer only if the stack frame extends beyond + // frame pointer stack slot or target is ELF and the function has FP. + if (NumBytes) + emitThumbRegPlusImmediate(MBB, MBBI, ARM::SP, FramePtr, -NumBytes, + TII, *this, dl); + else + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVlor2hir), ARM::SP) + .addReg(FramePtr); + } else { + if (MBBI->getOpcode() == ARM::tBX_RET && + &MBB.front() != MBBI && + prior(MBBI)->getOpcode() == ARM::tPOP) { + MachineBasicBlock::iterator PMBBI = prior(MBBI); + emitSPUpdate(MBB, PMBBI, NumBytes, ARMCC::AL, 0, isThumb, TII, + *this, dl); + } else + emitSPUpdate(MBB, MBBI, NumBytes, ARMCC::AL, 0, isThumb, TII, + *this, dl); + } + } else { + // Darwin ABI requires FP to point to the stack slot that contains the + // previous FP. + if ((STI.isTargetDarwin() && NumBytes) || hasFP(MF)) { + NumBytes = AFI->getFramePtrSpillOffset() - NumBytes; + // Reset SP based on frame pointer only if the stack frame extends beyond + // frame pointer stack slot or target is ELF and the function has FP. + if (AFI->getGPRCalleeSavedArea2Size() || + AFI->getDPRCalleeSavedAreaSize() || + AFI->getDPRCalleeSavedAreaOffset()|| + hasFP(MF)) { + if (NumBytes) + BuildMI(MBB, MBBI, dl, TII.get(ARM::SUBri), ARM::SP).addReg(FramePtr) + .addImm(NumBytes) + .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0); + else + BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP).addReg(FramePtr) + .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0); + } + } else if (NumBytes) { + emitSPUpdate(MBB, MBBI, NumBytes, ARMCC::AL, 0, false, TII, *this, dl); + } + + // Move SP to start of integer callee save spill area 2. + movePastCSLoadStoreOps(MBB, MBBI, ARM::FLDD, 3, STI); + emitSPUpdate(MBB, MBBI, AFI->getDPRCalleeSavedAreaSize(), ARMCC::AL, 0, + false, TII, *this, dl); + + // Move SP to start of integer callee save spill area 1. + movePastCSLoadStoreOps(MBB, MBBI, ARM::LDR, 2, STI); + emitSPUpdate(MBB, MBBI, AFI->getGPRCalleeSavedArea2Size(), ARMCC::AL, 0, + false, TII, *this, dl); + + // Move SP to SP upon entry to the function. + movePastCSLoadStoreOps(MBB, MBBI, ARM::LDR, 1, STI); + emitSPUpdate(MBB, MBBI, AFI->getGPRCalleeSavedArea1Size(), ARMCC::AL, 0, + false, TII, *this, dl); + } + } + + if (VARegSaveSize) { + if (isThumb) + // Epilogue for vararg functions: pop LR to R3 and branch off it. + // FIXME: Verify this is still ok when R3 is no longer being reserved. + BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)).addReg(ARM::R3); + + emitSPUpdate(MBB, MBBI, VARegSaveSize, ARMCC::AL, 0, isThumb, TII, + *this, dl); + + if (isThumb) { + BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg)).addReg(ARM::R3); + MBB.erase(MBBI); + } + } +} + +unsigned ARMRegisterInfo::getRARegister() const { + return ARM::LR; +} + +unsigned ARMRegisterInfo::getFrameRegister(MachineFunction &MF) const { + if (STI.isTargetDarwin() || hasFP(MF)) + return (STI.useThumbBacktraces() || STI.isThumb()) ? ARM::R7 : ARM::R11; + else + return ARM::SP; +} + +unsigned ARMRegisterInfo::getEHExceptionRegister() const { + assert(0 && "What is the exception register"); + return 0; +} + +unsigned ARMRegisterInfo::getEHHandlerRegister() const { + assert(0 && "What is the exception handler register"); + return 0; +} + +int ARMRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const { + return ARMGenRegisterInfo::getDwarfRegNumFull(RegNum, 0); +} + +#include "ARMGenRegisterInfo.inc" diff --git a/lib/Target/ARM/ARMRegisterInfo.h b/lib/Target/ARM/ARMRegisterInfo.h new file mode 100644 index 000000000000..e1d9efbcabf7 --- /dev/null +++ b/lib/Target/ARM/ARMRegisterInfo.h @@ -0,0 +1,102 @@ +//===- ARMRegisterInfo.h - ARM Register Information Impl --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the ARM implementation of the TargetRegisterInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef ARMREGISTERINFO_H +#define ARMREGISTERINFO_H + +#include "llvm/Target/TargetRegisterInfo.h" +#include "ARMGenRegisterInfo.h.inc" + +namespace llvm { + class ARMSubtarget; + class TargetInstrInfo; + class Type; + +struct ARMRegisterInfo : public ARMGenRegisterInfo { + const TargetInstrInfo &TII; + const ARMSubtarget &STI; +private: + /// FramePtr - ARM physical register used as frame ptr. + unsigned FramePtr; + +public: + ARMRegisterInfo(const TargetInstrInfo &tii, const ARMSubtarget &STI); + + /// emitLoadConstPool - Emits a load from constpool to materialize the + /// specified immediate. + void emitLoadConstPool(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, + unsigned DestReg, int Val, + unsigned Pred, unsigned PredReg, + const TargetInstrInfo *TII, bool isThumb, + DebugLoc dl) const; + + /// getRegisterNumbering - Given the enum value for some register, e.g. + /// ARM::LR, return the number that it corresponds to (e.g. 14). + static unsigned getRegisterNumbering(unsigned RegEnum); + + /// Same as previous getRegisterNumbering except it returns true in isSPVFP + /// if the register is a single precision VFP register. + static unsigned getRegisterNumbering(unsigned RegEnum, bool &isSPVFP); + + /// getPointerRegClass - Return the register class to use to hold pointers. + /// This is used for addressing modes. + const TargetRegisterClass *getPointerRegClass() const; + + /// Code Generation virtual methods... + const TargetRegisterClass * + getPhysicalRegisterRegClass(unsigned Reg, MVT VT = MVT::Other) const; + const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const; + + const TargetRegisterClass* const* + getCalleeSavedRegClasses(const MachineFunction *MF = 0) const; + + BitVector getReservedRegs(const MachineFunction &MF) const; + + bool isReservedReg(const MachineFunction &MF, unsigned Reg) const; + + bool requiresRegisterScavenging(const MachineFunction &MF) const; + + bool hasFP(const MachineFunction &MF) const; + + bool hasReservedCallFrame(MachineFunction &MF) const; + + void eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const; + + void eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, RegScavenger *RS = NULL) const; + + void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, + RegScavenger *RS = NULL) const; + + void emitPrologue(MachineFunction &MF) const; + void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; + + // Debug information queries. + unsigned getRARegister() const; + unsigned getFrameRegister(MachineFunction &MF) const; + + // Exception handling queries. + unsigned getEHExceptionRegister() const; + unsigned getEHHandlerRegister() const; + + int getDwarfRegNum(unsigned RegNum, bool isEH) const; + + bool isLowRegister(unsigned Reg) const; +}; + +} // end namespace llvm + +#endif diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td new file mode 100644 index 000000000000..e8daf7489ebe --- /dev/null +++ b/lib/Target/ARM/ARMRegisterInfo.td @@ -0,0 +1,221 @@ +//===- ARMRegisterInfo.td - ARM Register defs -------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Declarations that describe the ARM register file +//===----------------------------------------------------------------------===// + +// Registers are identified with 4-bit ID numbers. +class ARMReg num, string n, list subregs = []> : Register { + field bits<4> Num; + let Namespace = "ARM"; + let SubRegs = subregs; +} + +class ARMFReg num, string n> : Register { + field bits<5> Num; + let Namespace = "ARM"; +} + +// Integer registers +def R0 : ARMReg< 0, "r0">, DwarfRegNum<[0]>; +def R1 : ARMReg< 1, "r1">, DwarfRegNum<[1]>; +def R2 : ARMReg< 2, "r2">, DwarfRegNum<[2]>; +def R3 : ARMReg< 3, "r3">, DwarfRegNum<[3]>; +def R4 : ARMReg< 4, "r4">, DwarfRegNum<[4]>; +def R5 : ARMReg< 5, "r5">, DwarfRegNum<[5]>; +def R6 : ARMReg< 6, "r6">, DwarfRegNum<[6]>; +def R7 : ARMReg< 7, "r7">, DwarfRegNum<[7]>; +def R8 : ARMReg< 8, "r8">, DwarfRegNum<[8]>; +def R9 : ARMReg< 9, "r9">, DwarfRegNum<[9]>; +def R10 : ARMReg<10, "r10">, DwarfRegNum<[10]>; +def R11 : ARMReg<11, "r11">, DwarfRegNum<[11]>; +def R12 : ARMReg<12, "r12">, DwarfRegNum<[12]>; +def SP : ARMReg<13, "sp">, DwarfRegNum<[13]>; +def LR : ARMReg<14, "lr">, DwarfRegNum<[14]>; +def PC : ARMReg<15, "pc">, DwarfRegNum<[15]>; + +// Float registers +def S0 : ARMFReg< 0, "s0">; def S1 : ARMFReg< 1, "s1">; +def S2 : ARMFReg< 2, "s2">; def S3 : ARMFReg< 3, "s3">; +def S4 : ARMFReg< 4, "s4">; def S5 : ARMFReg< 5, "s5">; +def S6 : ARMFReg< 6, "s6">; def S7 : ARMFReg< 7, "s7">; +def S8 : ARMFReg< 8, "s8">; def S9 : ARMFReg< 9, "s9">; +def S10 : ARMFReg<10, "s10">; def S11 : ARMFReg<11, "s11">; +def S12 : ARMFReg<12, "s12">; def S13 : ARMFReg<13, "s13">; +def S14 : ARMFReg<14, "s14">; def S15 : ARMFReg<15, "s15">; +def S16 : ARMFReg<16, "s16">; def S17 : ARMFReg<17, "s17">; +def S18 : ARMFReg<18, "s18">; def S19 : ARMFReg<19, "s19">; +def S20 : ARMFReg<20, "s20">; def S21 : ARMFReg<21, "s21">; +def S22 : ARMFReg<22, "s22">; def S23 : ARMFReg<23, "s23">; +def S24 : ARMFReg<24, "s24">; def S25 : ARMFReg<25, "s25">; +def S26 : ARMFReg<26, "s26">; def S27 : ARMFReg<27, "s27">; +def S28 : ARMFReg<28, "s28">; def S29 : ARMFReg<29, "s29">; +def S30 : ARMFReg<30, "s30">; def S31 : ARMFReg<31, "s31">; + +// Aliases of the F* registers used to hold 64-bit fp values (doubles) +def D0 : ARMReg< 0, "d0", [S0, S1]>; +def D1 : ARMReg< 1, "d1", [S2, S3]>; +def D2 : ARMReg< 2, "d2", [S4, S5]>; +def D3 : ARMReg< 3, "d3", [S6, S7]>; +def D4 : ARMReg< 4, "d4", [S8, S9]>; +def D5 : ARMReg< 5, "d5", [S10, S11]>; +def D6 : ARMReg< 6, "d6", [S12, S13]>; +def D7 : ARMReg< 7, "d7", [S14, S15]>; +def D8 : ARMReg< 8, "d8", [S16, S17]>; +def D9 : ARMReg< 9, "d9", [S18, S19]>; +def D10 : ARMReg<10, "d10", [S20, S21]>; +def D11 : ARMReg<11, "d11", [S22, S23]>; +def D12 : ARMReg<12, "d12", [S24, S25]>; +def D13 : ARMReg<13, "d13", [S26, S27]>; +def D14 : ARMReg<14, "d14", [S28, S29]>; +def D15 : ARMReg<15, "d15", [S30, S31]>; + +// Current Program Status Register. +def CPSR : ARMReg<0, "cpsr">; + +// Register classes. +// +// pc == Program Counter +// lr == Link Register +// sp == Stack Pointer +// r12 == ip (scratch) +// r7 == Frame Pointer (thumb-style backtraces) +// r11 == Frame Pointer (arm-style backtraces) +// r10 == Stack Limit +// +def GPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, + R7, R8, R9, R10, R12, R11, + LR, SP, PC]> { + let MethodProtos = [{ + iterator allocation_order_begin(const MachineFunction &MF) const; + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + // FIXME: We are reserving r12 in case the PEI needs to use it to + // generate large stack offset. Make it available once we have register + // scavenging. Similarly r3 is reserved in Thumb mode for now. + let MethodBodies = [{ + // FP is R11, R9 is available. + static const unsigned ARM_GPR_AO_1[] = { + ARM::R3, ARM::R2, ARM::R1, ARM::R0, + ARM::R12,ARM::LR, + ARM::R4, ARM::R5, ARM::R6, ARM::R7, + ARM::R8, ARM::R9, ARM::R10, + ARM::R11 }; + // FP is R11, R9 is not available. + static const unsigned ARM_GPR_AO_2[] = { + ARM::R3, ARM::R2, ARM::R1, ARM::R0, + ARM::R12,ARM::LR, + ARM::R4, ARM::R5, ARM::R6, ARM::R7, + ARM::R8, ARM::R10, + ARM::R11 }; + // FP is R7, R9 is available. + static const unsigned ARM_GPR_AO_3[] = { + ARM::R3, ARM::R2, ARM::R1, ARM::R0, + ARM::R12,ARM::LR, + ARM::R4, ARM::R5, ARM::R6, + ARM::R8, ARM::R9, ARM::R10,ARM::R11, + ARM::R7 }; + // FP is R7, R9 is not available. + static const unsigned ARM_GPR_AO_4[] = { + ARM::R3, ARM::R2, ARM::R1, ARM::R0, + ARM::R12,ARM::LR, + ARM::R4, ARM::R5, ARM::R6, + ARM::R8, ARM::R10,ARM::R11, + ARM::R7 }; + + GPRClass::iterator + GPRClass::allocation_order_begin(const MachineFunction &MF) const { + const TargetMachine &TM = MF.getTarget(); + const ARMSubtarget &Subtarget = TM.getSubtarget(); + if (Subtarget.useThumbBacktraces()) { + if (Subtarget.isR9Reserved()) + return ARM_GPR_AO_4; + else + return ARM_GPR_AO_3; + } else { + if (Subtarget.isR9Reserved()) + return ARM_GPR_AO_2; + else + return ARM_GPR_AO_1; + } + } + + GPRClass::iterator + GPRClass::allocation_order_end(const MachineFunction &MF) const { + const TargetMachine &TM = MF.getTarget(); + const TargetRegisterInfo *RI = TM.getRegisterInfo(); + const ARMSubtarget &Subtarget = TM.getSubtarget(); + GPRClass::iterator I; + + if (Subtarget.useThumbBacktraces()) { + if (Subtarget.isR9Reserved()) { + I = ARM_GPR_AO_4 + (sizeof(ARM_GPR_AO_4)/sizeof(unsigned)); + } else { + I = ARM_GPR_AO_3 + (sizeof(ARM_GPR_AO_3)/sizeof(unsigned)); + } + } else { + if (Subtarget.isR9Reserved()) { + I = ARM_GPR_AO_2 + (sizeof(ARM_GPR_AO_2)/sizeof(unsigned)); + } else { + I = ARM_GPR_AO_1 + (sizeof(ARM_GPR_AO_1)/sizeof(unsigned)); + } + } + + // Mac OS X requires FP not to be clobbered for backtracing purpose. + return (Subtarget.isTargetDarwin() || RI->hasFP(MF)) ? I-1 : I; + } + }]; +} + +// Thumb registers are R0-R7 normally. Some instructions can still use +// the general GPR register class above (MOV, e.g.) +def tGPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, R7]> { + let MethodProtos = [{ + iterator allocation_order_begin(const MachineFunction &MF) const; + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + // FIXME: We are reserving r3 in Thumb mode in case the PEI needs to use it + // to generate large stack offset. Make it available once we have register + // scavenging. + let MethodBodies = [{ + static const unsigned THUMB_tGPR_AO[] = { + ARM::R2, ARM::R1, ARM::R0, + ARM::R4, ARM::R5, ARM::R6, ARM::R7 }; + + // FP is R7, only low registers available. + tGPRClass::iterator + tGPRClass::allocation_order_begin(const MachineFunction &MF) const { + return THUMB_tGPR_AO; + } + + tGPRClass::iterator + tGPRClass::allocation_order_end(const MachineFunction &MF) const { + const TargetMachine &TM = MF.getTarget(); + const TargetRegisterInfo *RI = TM.getRegisterInfo(); + const ARMSubtarget &Subtarget = TM.getSubtarget(); + tGPRClass::iterator I = + THUMB_tGPR_AO + (sizeof(THUMB_tGPR_AO)/sizeof(unsigned)); + // Mac OS X requires FP not to be clobbered for backtracing purpose. + return (Subtarget.isTargetDarwin() || RI->hasFP(MF)) ? I-1 : I; + } + }]; +} + +def SPR : RegisterClass<"ARM", [f32], 32, [S0, S1, S2, S3, S4, S5, S6, S7, S8, + S9, S10, S11, S12, S13, S14, S15, S16, S17, S18, S19, S20, S21, S22, + S23, S24, S25, S26, S27, S28, S29, S30, S31]>; + +// ARM requires only word alignment for double. It's more performant if it +// is double-word alignment though. +def DPR : RegisterClass<"ARM", [f64], 64, [D0, D1, D2, D3, D4, D5, D6, D7, D8, + D9, D10, D11, D12, D13, D14, D15]>; + +// Condition code registers. +def CCR : RegisterClass<"ARM", [i32], 32, [CPSR]>; diff --git a/lib/Target/ARM/ARMRelocations.h b/lib/Target/ARM/ARMRelocations.h new file mode 100644 index 000000000000..2cc295085b8b --- /dev/null +++ b/lib/Target/ARM/ARMRelocations.h @@ -0,0 +1,56 @@ +//===- ARMRelocations.h - ARM Code Relocations ------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the ARM target-specific relocation types. +// +//===----------------------------------------------------------------------===// + +#ifndef ARMRELOCATIONS_H +#define ARMRELOCATIONS_H + +#include "llvm/CodeGen/MachineRelocation.h" + +namespace llvm { + namespace ARM { + enum RelocationType { + // reloc_arm_absolute - Absolute relocation, just add the relocated value + // to the value already in memory. + reloc_arm_absolute, + + // reloc_arm_relative - PC relative relocation, add the relocated value to + // the value already in memory, after we adjust it for where the PC is. + reloc_arm_relative, + + // reloc_arm_cp_entry - PC relative relocation for constpool_entry's whose + // addresses are kept locally in a map. + reloc_arm_cp_entry, + + // reloc_arm_vfp_cp_entry - Same as reloc_arm_cp_entry except the offset + // should be divided by 4. + reloc_arm_vfp_cp_entry, + + // reloc_arm_machine_cp_entry - Relocation of a ARM machine constantpool + // entry. + reloc_arm_machine_cp_entry, + + // reloc_arm_jt_base - PC relative relocation for jump tables whose + // addresses are kept locally in a map. + reloc_arm_jt_base, + + // reloc_arm_pic_jt - PIC jump table entry relocation: dest bb - jt base. + reloc_arm_pic_jt, + + // reloc_arm_branch - Branch address relocation. + reloc_arm_branch + }; + } +} + +#endif + diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp new file mode 100644 index 000000000000..ef78cd52d85a --- /dev/null +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -0,0 +1,84 @@ +//===-- ARMSubtarget.cpp - ARM Subtarget Information ------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the ARM specific subclass of TargetSubtarget. +// +//===----------------------------------------------------------------------===// + +#include "ARMSubtarget.h" +#include "ARMGenSubtarget.inc" +#include "llvm/Module.h" +using namespace llvm; + +ARMSubtarget::ARMSubtarget(const Module &M, const std::string &FS, + bool isThumb) + : ARMArchVersion(V4T) + , ARMFPUType(None) + , IsThumb(isThumb) + , ThumbMode(Thumb1) + , UseThumbBacktraces(false) + , IsR9Reserved(false) + , stackAlignment(4) + , CPUString("generic") + , TargetType(isELF) // Default to ELF unless otherwise specified. + , TargetABI(ARM_ABI_APCS) { + // Determine default and user specified characteristics + + // Parse features string. + CPUString = ParseSubtargetFeatures(FS, CPUString); + + // Set the boolean corresponding to the current target triple, or the default + // if one cannot be determined, to true. + const std::string& TT = M.getTargetTriple(); + unsigned Len = TT.length(); + unsigned Idx = 0; + + if (Len >= 5 && TT.substr(0, 4) == "armv") + Idx = 4; + else if (Len >= 6 && TT.substr(0, 6) == "thumb") { + IsThumb = true; + if (Len >= 7 && TT[5] == 'v') + Idx = 6; + } + if (Idx) { + unsigned SubVer = TT[Idx]; + if (SubVer > '4' && SubVer <= '9') { + if (SubVer >= '7') + ARMArchVersion = V7A; + else if (SubVer == '6') + ARMArchVersion = V6; + else if (SubVer == '5') { + ARMArchVersion = V5T; + if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == 'e') + ARMArchVersion = V5TE; + } + } + } + + if (Len >= 10) { + if (TT.find("-darwin") != std::string::npos) + // arm-darwin + TargetType = isDarwin; + } else if (TT.empty()) { +#if defined(__APPLE__) + TargetType = isDarwin; +#endif + } + + if (TT.find("eabi") != std::string::npos) + TargetABI = ARM_ABI_AAPCS; + + if (isAAPCS_ABI()) + stackAlignment = 8; + + if (isTargetDarwin()) { + UseThumbBacktraces = true; + IsR9Reserved = true; + } +} diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h new file mode 100644 index 000000000000..8b469cff77d8 --- /dev/null +++ b/lib/Target/ARM/ARMSubtarget.h @@ -0,0 +1,122 @@ +//=====---- ARMSubtarget.h - Define Subtarget for the ARM -----*- C++ -*--====// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the ARM specific subclass of TargetSubtarget. +// +//===----------------------------------------------------------------------===// + +#ifndef ARMSUBTARGET_H +#define ARMSUBTARGET_H + +#include "llvm/Target/TargetSubtarget.h" +#include + +namespace llvm { +class Module; + +class ARMSubtarget : public TargetSubtarget { +protected: + enum ARMArchEnum { + V4T, V5T, V5TE, V6, V7A + }; + + enum ARMFPEnum { + None, VFPv2, VFPv3, NEON + }; + + enum ThumbTypeEnum { + Thumb1, + Thumb2 + }; + + /// ARMArchVersion - ARM architecture version: V4T (base), V5T, V5TE, + /// V6, V6T2, V7A. + ARMArchEnum ARMArchVersion; + + /// ARMFPUType - Floating Point Unit type. + ARMFPEnum ARMFPUType; + + /// IsThumb - True if we are in thumb mode, false if in ARM mode. + bool IsThumb; + + /// ThumbMode - Indicates supported Thumb version. + ThumbTypeEnum ThumbMode; + + /// UseThumbBacktraces - True if we use thumb style backtraces. + bool UseThumbBacktraces; + + /// IsR9Reserved - True if R9 is a not available as general purpose register. + bool IsR9Reserved; + + /// stackAlignment - The minimum alignment known to hold of the stack frame on + /// entry to the function and which must be maintained by every function. + unsigned stackAlignment; + + /// CPUString - String name of used CPU. + std::string CPUString; + + public: + enum { + isELF, isDarwin + } TargetType; + + enum { + ARM_ABI_APCS, + ARM_ABI_AAPCS // ARM EABI + } TargetABI; + + /// This constructor initializes the data members to match that + /// of the specified module. + /// + ARMSubtarget(const Module &M, const std::string &FS, bool isThumb); + + /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size + /// that still makes it profitable to inline the call. + unsigned getMaxInlineSizeThreshold() const { + // FIXME: For now, we don't lower memcpy's to loads / stores for Thumb. + // Change this once Thumb ldmia / stmia support is added. + return isThumb() ? 0 : 64; + } + /// ParseSubtargetFeatures - Parses features string setting specified + /// subtarget options. Definition of function is auto generated by tblgen. + std::string ParseSubtargetFeatures(const std::string &FS, + const std::string &CPU); + + bool hasV4TOps() const { return ARMArchVersion >= V4T; } + bool hasV5TOps() const { return ARMArchVersion >= V5T; } + bool hasV5TEOps() const { return ARMArchVersion >= V5TE; } + bool hasV6Ops() const { return ARMArchVersion >= V6; } + bool hasV7Ops() const { return ARMArchVersion >= V7A; } + + bool hasVFP2() const { return ARMFPUType >= VFPv2; } + bool hasVFP3() const { return ARMFPUType >= VFPv3; } + bool hasNEON() const { return ARMFPUType >= NEON; } + + bool isTargetDarwin() const { return TargetType == isDarwin; } + bool isTargetELF() const { return TargetType == isELF; } + + bool isAPCS_ABI() const { return TargetABI == ARM_ABI_APCS; } + bool isAAPCS_ABI() const { return TargetABI == ARM_ABI_AAPCS; } + + bool isThumb() const { return IsThumb; } + bool isThumb2() const { return IsThumb && (ThumbMode >= Thumb2); } + + bool useThumbBacktraces() const { return UseThumbBacktraces; } + bool isR9Reserved() const { return IsR9Reserved; } + + const std::string & getCPUString() const { return CPUString; } + + /// getStackAlignment - Returns the minimum alignment known to hold of the + /// stack frame on entry to the function and which must be maintained by every + /// function for this subtarget. + unsigned getStackAlignment() const { return stackAlignment; } +}; +} // End llvm namespace + +#endif // ARMSUBTARGET_H diff --git a/lib/Target/ARM/ARMTargetAsmInfo.cpp b/lib/Target/ARM/ARMTargetAsmInfo.cpp new file mode 100644 index 000000000000..4107dccd2a69 --- /dev/null +++ b/lib/Target/ARM/ARMTargetAsmInfo.cpp @@ -0,0 +1,291 @@ +//===-- ARMTargetAsmInfo.cpp - ARM asm properties ---------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declarations of the ARMTargetAsmInfo properties. +// +//===----------------------------------------------------------------------===// + +#include "ARMTargetAsmInfo.h" +#include "ARMTargetMachine.h" +#include +#include +using namespace llvm; + + +const char *const llvm::arm_asm_table[] = { + "{r0}", "r0", + "{r1}", "r1", + "{r2}", "r2", + "{r3}", "r3", + "{r4}", "r4", + "{r5}", "r5", + "{r6}", "r6", + "{r7}", "r7", + "{r8}", "r8", + "{r9}", "r9", + "{r10}", "r10", + "{r11}", "r11", + "{r12}", "r12", + "{r13}", "r13", + "{r14}", "r14", + "{lr}", "lr", + "{sp}", "sp", + "{ip}", "ip", + "{fp}", "fp", + "{sl}", "sl", + "{memory}", "memory", + "{cc}", "cc", + 0,0}; + +ARMDarwinTargetAsmInfo::ARMDarwinTargetAsmInfo(const ARMTargetMachine &TM): + ARMTargetAsmInfo(TM) { + Subtarget = &TM.getSubtarget(); + + GlobalPrefix = "_"; + PrivateGlobalPrefix = "L"; + LessPrivateGlobalPrefix = "l"; + StringConstantPrefix = "\1LC"; + BSSSection = 0; // no BSS section + ZeroDirective = "\t.space\t"; + ZeroFillDirective = "\t.zerofill\t"; // Uses .zerofill + SetDirective = "\t.set\t"; + WeakRefDirective = "\t.weak_reference\t"; + WeakDefDirective = "\t.weak_definition "; + HiddenDirective = "\t.private_extern\t"; + ProtectedDirective = NULL; + JumpTableDataSection = ".const"; + CStringSection = "\t.cstring"; + HasDotTypeDotSizeDirective = false; + HasSingleParameterDotFile = false; + NeedsIndirectEncoding = true; + if (TM.getRelocationModel() == Reloc::Static) { + StaticCtorsSection = ".constructor"; + StaticDtorsSection = ".destructor"; + } else { + StaticCtorsSection = ".mod_init_func"; + StaticDtorsSection = ".mod_term_func"; + } + + // In non-PIC modes, emit a special label before jump tables so that the + // linker can perform more accurate dead code stripping. + if (TM.getRelocationModel() != Reloc::PIC_) { + // Emit a local label that is preserved until the linker runs. + JumpTableSpecialLabelPrefix = "l"; + } + + NeedsSet = true; + DwarfAbbrevSection = ".section __DWARF,__debug_abbrev,regular,debug"; + DwarfInfoSection = ".section __DWARF,__debug_info,regular,debug"; + DwarfLineSection = ".section __DWARF,__debug_line,regular,debug"; + DwarfFrameSection = ".section __DWARF,__debug_frame,regular,debug"; + DwarfPubNamesSection = ".section __DWARF,__debug_pubnames,regular,debug"; + DwarfPubTypesSection = ".section __DWARF,__debug_pubtypes,regular,debug"; + DwarfStrSection = ".section __DWARF,__debug_str,regular,debug"; + DwarfLocSection = ".section __DWARF,__debug_loc,regular,debug"; + DwarfARangesSection = ".section __DWARF,__debug_aranges,regular,debug"; + DwarfRangesSection = ".section __DWARF,__debug_ranges,regular,debug"; + DwarfMacInfoSection = ".section __DWARF,__debug_macinfo,regular,debug"; +} + +ARMELFTargetAsmInfo::ARMELFTargetAsmInfo(const ARMTargetMachine &TM): + ARMTargetAsmInfo(TM) { + Subtarget = &TM.getSubtarget(); + + NeedsSet = false; + HasLEB128 = true; + AbsoluteDebugSectionOffsets = true; + CStringSection = ".rodata.str"; + PrivateGlobalPrefix = ".L"; + WeakRefDirective = "\t.weak\t"; + SetDirective = "\t.set\t"; + DwarfRequiresFrameSection = false; + DwarfAbbrevSection = "\t.section\t.debug_abbrev,\"\",%progbits"; + DwarfInfoSection = "\t.section\t.debug_info,\"\",%progbits"; + DwarfLineSection = "\t.section\t.debug_line,\"\",%progbits"; + DwarfFrameSection = "\t.section\t.debug_frame,\"\",%progbits"; + DwarfPubNamesSection ="\t.section\t.debug_pubnames,\"\",%progbits"; + DwarfPubTypesSection ="\t.section\t.debug_pubtypes,\"\",%progbits"; + DwarfStrSection = "\t.section\t.debug_str,\"\",%progbits"; + DwarfLocSection = "\t.section\t.debug_loc,\"\",%progbits"; + DwarfARangesSection = "\t.section\t.debug_aranges,\"\",%progbits"; + DwarfRangesSection = "\t.section\t.debug_ranges,\"\",%progbits"; + DwarfMacInfoSection = "\t.section\t.debug_macinfo,\"\",%progbits"; + + if (Subtarget->isAAPCS_ABI()) { + StaticCtorsSection = "\t.section .init_array,\"aw\",%init_array"; + StaticDtorsSection = "\t.section .fini_array,\"aw\",%fini_array"; + } else { + StaticCtorsSection = "\t.section .ctors,\"aw\",%progbits"; + StaticDtorsSection = "\t.section .dtors,\"aw\",%progbits"; + } +} + +/// Count the number of comma-separated arguments. +/// Do not try to detect errors. +template +unsigned ARMTargetAsmInfo::countArguments(const char* p) const { + unsigned count = 0; + while (*p && isspace(*p) && *p != '\n') + p++; + count++; + while (*p && *p!='\n' && + strncmp(p, BaseTAI::CommentString, + strlen(BaseTAI::CommentString))!=0) { + if (*p==',') + count++; + p++; + } + return count; +} + +/// Count the length of a string enclosed in quote characters. +/// Do not try to detect errors. +template +unsigned ARMTargetAsmInfo::countString(const char* p) const { + unsigned count = 0; + while (*p && isspace(*p) && *p!='\n') + p++; + if (!*p || *p != '\"') + return count; + while (*++p && *p != '\"') + count++; + return count; +} + +/// ARM-specific version of TargetAsmInfo::getInlineAsmLength. +template +unsigned ARMTargetAsmInfo::getInlineAsmLength(const char *s) const { + // Make a lowercase-folded version of s for counting purposes. + char *q, *s_copy = (char *)malloc(strlen(s) + 1); + strcpy(s_copy, s); + for (q=s_copy; *q; q++) + *q = tolower(*q); + const char *Str = s_copy; + + // Count the number of bytes in the asm. + bool atInsnStart = true; + bool inTextSection = true; + unsigned Length = 0; + for (; *Str; ++Str) { + if (atInsnStart) { + // Skip whitespace + while (*Str && isspace(*Str) && *Str != '\n') + Str++; + // Skip label + for (const char* p = Str; *p && !isspace(*p); p++) + if (*p == ':') { + Str = p+1; + while (*Str && isspace(*Str) && *Str != '\n') + Str++; + break; + } + + if (*Str == 0) break; + + // Ignore everything from comment char(s) to EOL + if (strncmp(Str, BaseTAI::CommentString, + strlen(BaseTAI::CommentString)) == 0) + atInsnStart = false; + // FIXME do something like the following for non-Darwin + else if (*Str == '.' && Subtarget->isTargetDarwin()) { + // Directive. + atInsnStart = false; + + // Some change the section, but don't generate code. + if (strncmp(Str, ".literal4", strlen(".literal4"))==0 || + strncmp(Str, ".literal8", strlen(".literal8"))==0 || + strncmp(Str, ".const", strlen(".const"))==0 || + strncmp(Str, ".constructor", strlen(".constructor"))==0 || + strncmp(Str, ".cstring", strlen(".cstring"))==0 || + strncmp(Str, ".data", strlen(".data"))==0 || + strncmp(Str, ".destructor", strlen(".destructor"))==0 || + strncmp(Str, ".fvmlib_init0", strlen(".fvmlib_init0"))==0 || + strncmp(Str, ".fvmlib_init1", strlen(".fvmlib_init1"))==0 || + strncmp(Str, ".mod_init_func", strlen(".mod_init_func"))==0 || + strncmp(Str, ".mod_term_func", strlen(".mod_term_func"))==0 || + strncmp(Str, ".picsymbol_stub", strlen(".picsymbol_stub"))==0 || + strncmp(Str, ".symbol_stub", strlen(".symbol_stub"))==0 || + strncmp(Str, ".static_data", strlen(".static_data"))==0 || + strncmp(Str, ".section", strlen(".section"))==0 || + strncmp(Str, ".lazy_symbol_pointer", strlen(".lazy_symbol_pointer"))==0 || + strncmp(Str, ".non_lazy_symbol_pointer", strlen(".non_lazy_symbol_pointer"))==0 || + strncmp(Str, ".dyld", strlen(".dyld"))==0 || + strncmp(Str, ".const_data", strlen(".const_data"))==0 || + strncmp(Str, ".objc", strlen(".objc"))==0 || //// many directives + strncmp(Str, ".static_const", strlen(".static_const"))==0) + inTextSection=false; + else if (strncmp(Str, ".text", strlen(".text"))==0) + inTextSection = true; + // Some can't really be handled without implementing significant pieces + // of an assembler. Others require dynamic adjustment of block sizes in + // AdjustBBOffsetsAfter; it's a big compile-time speed hit to check every + // instruction in there, and none of these are currently used in the kernel. + else if (strncmp(Str, ".macro", strlen(".macro"))==0 || + strncmp(Str, ".if", strlen(".if"))==0 || + strncmp(Str, ".align", strlen(".align"))==0 || + strncmp(Str, ".fill", strlen(".fill"))==0 || + strncmp(Str, ".space", strlen(".space"))==0 || + strncmp(Str, ".zerofill", strlen(".zerofill"))==0 || + strncmp(Str, ".p2align", strlen(".p2align"))==0 || + strncmp(Str, ".p2alignw", strlen(".p2alignw"))==0 || + strncmp(Str, ".p2alignl", strlen(".p2alignl"))==0 || + strncmp(Str, ".align32", strlen(".p2align32"))==0 || + strncmp(Str, ".include", strlen(".include"))==0) + cerr << "Directive " << Str << " in asm may lead to invalid offsets for" << + " constant pools (the assembler will tell you if this happens).\n"; + // Some generate code, but this is only interesting in the text section. + else if (inTextSection) { + if (strncmp(Str, ".long", strlen(".long"))==0) + Length += 4*countArguments(Str+strlen(".long")); + else if (strncmp(Str, ".short", strlen(".short"))==0) + Length += 2*countArguments(Str+strlen(".short")); + else if (strncmp(Str, ".byte", strlen(".byte"))==0) + Length += 1*countArguments(Str+strlen(".byte")); + else if (strncmp(Str, ".single", strlen(".single"))==0) + Length += 4*countArguments(Str+strlen(".single")); + else if (strncmp(Str, ".double", strlen(".double"))==0) + Length += 8*countArguments(Str+strlen(".double")); + else if (strncmp(Str, ".quad", strlen(".quad"))==0) + Length += 16*countArguments(Str+strlen(".quad")); + else if (strncmp(Str, ".ascii", strlen(".ascii"))==0) + Length += countString(Str+strlen(".ascii")); + else if (strncmp(Str, ".asciz", strlen(".asciz"))==0) + Length += countString(Str+strlen(".asciz"))+1; + } + } else if (inTextSection) { + // An instruction + atInsnStart = false; + if (Subtarget->isThumb()) { + // BL and BLX are 4 bytes, all others 2. + if (strncmp(Str, "blx", strlen("blx"))==0) { + const char* p = Str+3; + while (*p && isspace(*p)) + p++; + if (*p == 'r' || *p=='R') + Length += 2; // BLX reg + else + Length += 4; // BLX non-reg + } else if (strncmp(Str, "bl", strlen("bl"))==0) + Length += 4; // BL + else + Length += 2; // Thumb anything else + } + else + Length += 4; // ARM + } + } + if (*Str == '\n' || *Str == BaseTAI::SeparatorChar) + atInsnStart = true; + } + free(s_copy); + return Length; +} + +// Instantiate default implementation. +TEMPLATE_INSTANTIATION(class ARMTargetAsmInfo); diff --git a/lib/Target/ARM/ARMTargetAsmInfo.h b/lib/Target/ARM/ARMTargetAsmInfo.h new file mode 100644 index 000000000000..9e6f8568f76a --- /dev/null +++ b/lib/Target/ARM/ARMTargetAsmInfo.h @@ -0,0 +1,64 @@ +//=====-- ARMTargetAsmInfo.h - ARM asm properties -------------*- C++ -*--====// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the ARMTargetAsmInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef ARMTARGETASMINFO_H +#define ARMTARGETASMINFO_H + +#include "ARMTargetMachine.h" +#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/Target/ELFTargetAsmInfo.h" +#include "llvm/Target/DarwinTargetAsmInfo.h" +#include "llvm/Support/Compiler.h" + +namespace llvm { + + extern const char *const arm_asm_table[]; + + template + struct ARMTargetAsmInfo : public BaseTAI { + explicit ARMTargetAsmInfo(const ARMTargetMachine &TM): + BaseTAI(TM) { + BaseTAI::AsmTransCBE = arm_asm_table; + + BaseTAI::AlignmentIsInBytes = false; + BaseTAI::Data64bitsDirective = 0; + BaseTAI::CommentString = "@"; + BaseTAI::ConstantPoolSection = "\t.text\n"; + BaseTAI::COMMDirectiveTakesAlignment = false; + BaseTAI::InlineAsmStart = "@ InlineAsm Start"; + BaseTAI::InlineAsmEnd = "@ InlineAsm End"; + BaseTAI::LCOMMDirective = "\t.lcomm\t"; + } + + const ARMSubtarget *Subtarget; + + virtual unsigned getInlineAsmLength(const char *Str) const; + unsigned countArguments(const char *p) const; + unsigned countString(const char *p) const; + }; + + typedef ARMTargetAsmInfo ARMGenericTargetAsmInfo; + + EXTERN_TEMPLATE_INSTANTIATION(class ARMTargetAsmInfo); + + struct ARMDarwinTargetAsmInfo : public ARMTargetAsmInfo { + explicit ARMDarwinTargetAsmInfo(const ARMTargetMachine &TM); + }; + + struct ARMELFTargetAsmInfo : public ARMTargetAsmInfo { + explicit ARMELFTargetAsmInfo(const ARMTargetMachine &TM); + }; + +} // namespace llvm + +#endif diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp new file mode 100644 index 000000000000..1dc7d19aa105 --- /dev/null +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -0,0 +1,242 @@ +//===-- ARMTargetMachine.cpp - Define TargetMachine for ARM ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// +//===----------------------------------------------------------------------===// + +#include "ARMTargetMachine.h" +#include "ARMTargetAsmInfo.h" +#include "ARMFrameInfo.h" +#include "ARM.h" +#include "llvm/Module.h" +#include "llvm/PassManager.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachineRegistry.h" +#include "llvm/Target/TargetOptions.h" +using namespace llvm; + +static cl::opt DisableLdStOpti("disable-arm-loadstore-opti", cl::Hidden, + cl::desc("Disable load store optimization pass")); +static cl::opt DisableIfConversion("disable-arm-if-conversion",cl::Hidden, + cl::desc("Disable if-conversion pass")); + +/// ARMTargetMachineModule - Note that this is used on hosts that cannot link +/// in a library unless there are references into the library. In particular, +/// it seems that it is not possible to get things to work on Win32 without +/// this. Though it is unused, do not remove it. +extern "C" int ARMTargetMachineModule; +int ARMTargetMachineModule = 0; + +// Register the target. +static RegisterTarget X("arm", "ARM"); +static RegisterTarget Y("thumb", "Thumb"); + +// No assembler printer by default +ARMTargetMachine::AsmPrinterCtorFn ARMTargetMachine::AsmPrinterCtor = 0; + +/// ThumbTargetMachine - Create an Thumb architecture model. +/// +unsigned ThumbTargetMachine::getJITMatchQuality() { +#if defined(__thumb__) + return 10; +#endif + return 0; +} + +unsigned ThumbTargetMachine::getModuleMatchQuality(const Module &M) { + std::string TT = M.getTargetTriple(); + // Match thumb-foo-bar, as well as things like thumbv5blah-* + if (TT.size() >= 6 && + (TT.substr(0, 6) == "thumb-" || TT.substr(0, 6) == "thumbv")) + return 20; + + // If the target triple is something non-thumb, we don't match. + if (!TT.empty()) return 0; + + if (M.getEndianness() == Module::LittleEndian && + M.getPointerSize() == Module::Pointer32) + return 10; // Weak match + else if (M.getEndianness() != Module::AnyEndianness || + M.getPointerSize() != Module::AnyPointerSize) + return 0; // Match for some other target + + return getJITMatchQuality()/2; +} + +ThumbTargetMachine::ThumbTargetMachine(const Module &M, const std::string &FS) + : ARMTargetMachine(M, FS, true) { +} + +/// TargetMachine ctor - Create an ARM architecture model. +/// +ARMTargetMachine::ARMTargetMachine(const Module &M, const std::string &FS, + bool isThumb) + : Subtarget(M, FS, isThumb), + DataLayout(Subtarget.isAPCS_ABI() ? + // APCS ABI + (isThumb ? + std::string("e-p:32:32-f64:32:32-i64:32:32-" + "i16:16:32-i8:8:32-i1:8:32-a:0:32") : + std::string("e-p:32:32-f64:32:32-i64:32:32")) : + // AAPCS ABI + (isThumb ? + std::string("e-p:32:32-f64:64:64-i64:64:64-" + "i16:16:32-i8:8:32-i1:8:32-a:0:32") : + std::string("e-p:32:32-f64:64:64-i64:64:64"))), + InstrInfo(Subtarget), + FrameInfo(Subtarget), + JITInfo(), + TLInfo(*this) { + DefRelocModel = getRelocationModel(); +} + +unsigned ARMTargetMachine::getJITMatchQuality() { +#if defined(__arm__) + return 10; +#endif + return 0; +} + +unsigned ARMTargetMachine::getModuleMatchQuality(const Module &M) { + std::string TT = M.getTargetTriple(); + // Match arm-foo-bar, as well as things like armv5blah-* + if (TT.size() >= 4 && + (TT.substr(0, 4) == "arm-" || TT.substr(0, 4) == "armv")) + return 20; + // If the target triple is something non-arm, we don't match. + if (!TT.empty()) return 0; + + if (M.getEndianness() == Module::LittleEndian && + M.getPointerSize() == Module::Pointer32) + return 10; // Weak match + else if (M.getEndianness() != Module::AnyEndianness || + M.getPointerSize() != Module::AnyPointerSize) + return 0; // Match for some other target + + return getJITMatchQuality()/2; +} + + +const TargetAsmInfo *ARMTargetMachine::createTargetAsmInfo() const { + switch (Subtarget.TargetType) { + case ARMSubtarget::isDarwin: + return new ARMDarwinTargetAsmInfo(*this); + case ARMSubtarget::isELF: + return new ARMELFTargetAsmInfo(*this); + default: + return new ARMGenericTargetAsmInfo(*this); + } +} + + +// Pass Pipeline Configuration +bool ARMTargetMachine::addInstSelector(PassManagerBase &PM, + CodeGenOpt::Level OptLevel) { + PM.add(createARMISelDag(*this)); + return false; +} + +bool ARMTargetMachine::addPreEmitPass(PassManagerBase &PM, + CodeGenOpt::Level OptLevel) { + // FIXME: temporarily disabling load / store optimization pass for Thumb mode. + if (OptLevel != CodeGenOpt::None && !DisableLdStOpti && !Subtarget.isThumb()) + PM.add(createARMLoadStoreOptimizationPass()); + + if (OptLevel != CodeGenOpt::None && + !DisableIfConversion && !Subtarget.isThumb()) + PM.add(createIfConverterPass()); + + PM.add(createARMConstantIslandPass()); + return true; +} + +bool ARMTargetMachine::addAssemblyEmitter(PassManagerBase &PM, + CodeGenOpt::Level OptLevel, + bool Verbose, + raw_ostream &Out) { + // Output assembly language. + assert(AsmPrinterCtor && "AsmPrinter was not linked in"); + if (AsmPrinterCtor) + PM.add(AsmPrinterCtor(Out, *this, OptLevel, Verbose)); + + return false; +} + + +bool ARMTargetMachine::addCodeEmitter(PassManagerBase &PM, + CodeGenOpt::Level OptLevel, + bool DumpAsm, + MachineCodeEmitter &MCE) { + // FIXME: Move this to TargetJITInfo! + if (DefRelocModel == Reloc::Default) + setRelocationModel(Reloc::Static); + + // Machine code emitter pass for ARM. + PM.add(createARMCodeEmitterPass(*this, MCE)); + if (DumpAsm) { + assert(AsmPrinterCtor && "AsmPrinter was not linked in"); + if (AsmPrinterCtor) + PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true)); + } + + return false; +} + +bool ARMTargetMachine::addCodeEmitter(PassManagerBase &PM, + CodeGenOpt::Level OptLevel, + bool DumpAsm, + JITCodeEmitter &JCE) { + // FIXME: Move this to TargetJITInfo! + if (DefRelocModel == Reloc::Default) + setRelocationModel(Reloc::Static); + + // Machine code emitter pass for ARM. + PM.add(createARMJITCodeEmitterPass(*this, JCE)); + if (DumpAsm) { + assert(AsmPrinterCtor && "AsmPrinter was not linked in"); + if (AsmPrinterCtor) + PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true)); + } + + return false; +} + +bool ARMTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM, + CodeGenOpt::Level OptLevel, + bool DumpAsm, + MachineCodeEmitter &MCE) { + // Machine code emitter pass for ARM. + PM.add(createARMCodeEmitterPass(*this, MCE)); + if (DumpAsm) { + assert(AsmPrinterCtor && "AsmPrinter was not linked in"); + if (AsmPrinterCtor) + PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true)); + } + + return false; +} + +bool ARMTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM, + CodeGenOpt::Level OptLevel, + bool DumpAsm, + JITCodeEmitter &JCE) { + // Machine code emitter pass for ARM. + PM.add(createARMJITCodeEmitterPass(*this, JCE)); + if (DumpAsm) { + assert(AsmPrinterCtor && "AsmPrinter was not linked in"); + if (AsmPrinterCtor) + PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true)); + } + + return false; +} + + diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h new file mode 100644 index 000000000000..916a8aa9e985 --- /dev/null +++ b/lib/Target/ARM/ARMTargetMachine.h @@ -0,0 +1,104 @@ +//===-- ARMTargetMachine.h - Define TargetMachine for ARM -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the ARM specific subclass of TargetMachine. +// +//===----------------------------------------------------------------------===// + +#ifndef ARMTARGETMACHINE_H +#define ARMTARGETMACHINE_H + +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetFrameInfo.h" +#include "ARMInstrInfo.h" +#include "ARMFrameInfo.h" +#include "ARMJITInfo.h" +#include "ARMSubtarget.h" +#include "ARMISelLowering.h" + +namespace llvm { + +class Module; + +class ARMTargetMachine : public LLVMTargetMachine { + ARMSubtarget Subtarget; + const TargetData DataLayout; // Calculates type size & alignment + ARMInstrInfo InstrInfo; + ARMFrameInfo FrameInfo; + ARMJITInfo JITInfo; + ARMTargetLowering TLInfo; + Reloc::Model DefRelocModel; // Reloc model before it's overridden. + +protected: + // To avoid having target depend on the asmprinter stuff libraries, asmprinter + // set this functions to ctor pointer at startup time if they are linked in. + typedef FunctionPass *(*AsmPrinterCtorFn)(raw_ostream &o, + ARMTargetMachine &tm, + CodeGenOpt::Level OptLevel, + bool verbose); + static AsmPrinterCtorFn AsmPrinterCtor; + +public: + ARMTargetMachine(const Module &M, const std::string &FS, bool isThumb = false); + + virtual const ARMInstrInfo *getInstrInfo() const { return &InstrInfo; } + virtual const ARMFrameInfo *getFrameInfo() const { return &FrameInfo; } + virtual ARMJITInfo *getJITInfo() { return &JITInfo; } + virtual const ARMRegisterInfo *getRegisterInfo() const { + return &InstrInfo.getRegisterInfo(); + } + virtual const TargetData *getTargetData() const { return &DataLayout; } + virtual const ARMSubtarget *getSubtargetImpl() const { return &Subtarget; } + virtual ARMTargetLowering *getTargetLowering() const { + return const_cast(&TLInfo); + } + + static void registerAsmPrinter(AsmPrinterCtorFn F) { + AsmPrinterCtor = F; + } + + static unsigned getModuleMatchQuality(const Module &M); + static unsigned getJITMatchQuality(); + + virtual const TargetAsmInfo *createTargetAsmInfo() const; + + // Pass Pipeline Configuration + virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel); + virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel); + virtual bool addAssemblyEmitter(PassManagerBase &PM, + CodeGenOpt::Level OptLevel, + bool Verbose, raw_ostream &Out); + virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, + bool DumpAsm, MachineCodeEmitter &MCE); + virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, + bool DumpAsm, JITCodeEmitter &MCE); + virtual bool addSimpleCodeEmitter(PassManagerBase &PM, + CodeGenOpt::Level OptLevel, + bool DumpAsm, + MachineCodeEmitter &MCE); + virtual bool addSimpleCodeEmitter(PassManagerBase &PM, + CodeGenOpt::Level OptLevel, + bool DumpAsm, + JITCodeEmitter &MCE); +}; + +/// ThumbTargetMachine - Thumb target machine. +/// +class ThumbTargetMachine : public ARMTargetMachine { +public: + ThumbTargetMachine(const Module &M, const std::string &FS); + + static unsigned getJITMatchQuality(); + static unsigned getModuleMatchQuality(const Module &M); +}; + +} // end namespace llvm + +#endif diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp new file mode 100644 index 000000000000..d908cf436313 --- /dev/null +++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp @@ -0,0 +1,1117 @@ +//===-- ARMAsmPrinter.cpp - ARM LLVM assembly writer ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a printer that converts from our internal representation +// of machine-dependent LLVM code to GAS-format ARM assembly language. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "asm-printer" +#include "ARM.h" +#include "ARMBuildAttrs.h" +#include "ARMTargetMachine.h" +#include "ARMAddressingModes.h" +#include "ARMConstantPoolValue.h" +#include "ARMMachineFunctionInfo.h" +#include "llvm/Constants.h" +#include "llvm/Module.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/DwarfWriter.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Mangler.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include +using namespace llvm; + +STATISTIC(EmittedInsts, "Number of machine instrs printed"); + +namespace { + class VISIBILITY_HIDDEN ARMAsmPrinter : public AsmPrinter { + DwarfWriter *DW; + MachineModuleInfo *MMI; + + /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can + /// make the right decision when printing asm code for different targets. + const ARMSubtarget *Subtarget; + + /// AFI - Keep a pointer to ARMFunctionInfo for the current + /// MachineFunction. + ARMFunctionInfo *AFI; + + /// MCP - Keep a pointer to constantpool entries of the current + /// MachineFunction. + const MachineConstantPool *MCP; + + /// We name each basic block in a Function with a unique number, so + /// that we can consistently refer to them later. This is cleared + /// at the beginning of each call to runOnMachineFunction(). + /// + typedef std::map ValueMapTy; + ValueMapTy NumberForBB; + + /// GVNonLazyPtrs - Keeps the set of GlobalValues that require + /// non-lazy-pointers for indirect access. + StringSet<> GVNonLazyPtrs; + + /// HiddenGVNonLazyPtrs - Keeps the set of GlobalValues with hidden + /// visibility that require non-lazy-pointers for indirect access. + StringSet<> HiddenGVNonLazyPtrs; + + /// FnStubs - Keeps the set of external function GlobalAddresses that the + /// asm printer should generate stubs for. + StringSet<> FnStubs; + + /// True if asm printer is printing a series of CONSTPOOL_ENTRY. + bool InCPMode; + public: + explicit ARMAsmPrinter(raw_ostream &O, TargetMachine &TM, + const TargetAsmInfo *T, CodeGenOpt::Level OL, + bool V) + : AsmPrinter(O, TM, T, OL, V), DW(0), MMI(NULL), AFI(NULL), MCP(NULL), + InCPMode(false) { + Subtarget = &TM.getSubtarget(); + } + + virtual const char *getPassName() const { + return "ARM Assembly Printer"; + } + + void printOperand(const MachineInstr *MI, int opNum, + const char *Modifier = 0); + void printSOImmOperand(const MachineInstr *MI, int opNum); + void printSOImm2PartOperand(const MachineInstr *MI, int opNum); + void printSORegOperand(const MachineInstr *MI, int opNum); + void printAddrMode2Operand(const MachineInstr *MI, int OpNo); + void printAddrMode2OffsetOperand(const MachineInstr *MI, int OpNo); + void printAddrMode3Operand(const MachineInstr *MI, int OpNo); + void printAddrMode3OffsetOperand(const MachineInstr *MI, int OpNo); + void printAddrMode4Operand(const MachineInstr *MI, int OpNo, + const char *Modifier = 0); + void printAddrMode5Operand(const MachineInstr *MI, int OpNo, + const char *Modifier = 0); + void printAddrModePCOperand(const MachineInstr *MI, int OpNo, + const char *Modifier = 0); + void printThumbAddrModeRROperand(const MachineInstr *MI, int OpNo); + void printThumbAddrModeRI5Operand(const MachineInstr *MI, int OpNo, + unsigned Scale); + void printThumbAddrModeS1Operand(const MachineInstr *MI, int OpNo); + void printThumbAddrModeS2Operand(const MachineInstr *MI, int OpNo); + void printThumbAddrModeS4Operand(const MachineInstr *MI, int OpNo); + void printThumbAddrModeSPOperand(const MachineInstr *MI, int OpNo); + void printPredicateOperand(const MachineInstr *MI, int opNum); + void printSBitModifierOperand(const MachineInstr *MI, int opNum); + void printPCLabel(const MachineInstr *MI, int opNum); + void printRegisterList(const MachineInstr *MI, int opNum); + void printCPInstOperand(const MachineInstr *MI, int opNum, + const char *Modifier); + void printJTBlockOperand(const MachineInstr *MI, int opNum); + + virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode); + virtual bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, + const char *ExtraCode); + + void printModuleLevelGV(const GlobalVariable* GVar); + bool printInstruction(const MachineInstr *MI); // autogenerated. + void printMachineInstruction(const MachineInstr *MI); + bool runOnMachineFunction(MachineFunction &F); + bool doInitialization(Module &M); + bool doFinalization(Module &M); + + /// EmitMachineConstantPoolValue - Print a machine constantpool value to + /// the .s file. + virtual void EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) { + printDataDirective(MCPV->getType()); + + ARMConstantPoolValue *ACPV = static_cast(MCPV); + GlobalValue *GV = ACPV->getGV(); + std::string Name = GV ? Mang->getValueName(GV) : TAI->getGlobalPrefix(); + if (!GV) + Name += ACPV->getSymbol(); + if (ACPV->isNonLazyPointer()) { + if (GV->hasHiddenVisibility()) + HiddenGVNonLazyPtrs.insert(Name); + else + GVNonLazyPtrs.insert(Name); + printSuffixedName(Name, "$non_lazy_ptr"); + } else if (ACPV->isStub()) { + FnStubs.insert(Name); + printSuffixedName(Name, "$stub"); + } else + O << Name; + if (ACPV->hasModifier()) O << "(" << ACPV->getModifier() << ")"; + if (ACPV->getPCAdjustment() != 0) { + O << "-(" << TAI->getPrivateGlobalPrefix() << "PC" + << utostr(ACPV->getLabelId()) + << "+" << (unsigned)ACPV->getPCAdjustment(); + if (ACPV->mustAddCurrentAddress()) + O << "-."; + O << ")"; + } + O << "\n"; + + // If the constant pool value is a extern weak symbol, remember to emit + // the weak reference. + if (GV && GV->hasExternalWeakLinkage()) + ExtWeakSymbols.insert(GV); + } + + void getAnalysisUsage(AnalysisUsage &AU) const { + AsmPrinter::getAnalysisUsage(AU); + AU.setPreservesAll(); + AU.addRequired(); + AU.addRequired(); + } + }; +} // end of anonymous namespace + +#include "ARMGenAsmWriter.inc" + +/// runOnMachineFunction - This uses the printInstruction() +/// method to print assembly for each instruction. +/// +bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) { + this->MF = &MF; + + AFI = MF.getInfo(); + MCP = MF.getConstantPool(); + + SetupMachineFunction(MF); + O << "\n"; + + // NOTE: we don't print out constant pools here, they are handled as + // instructions. + + O << "\n"; + // Print out labels for the function. + const Function *F = MF.getFunction(); + switch (F->getLinkage()) { + default: assert(0 && "Unknown linkage type!"); + case Function::PrivateLinkage: + case Function::InternalLinkage: + SwitchToTextSection("\t.text", F); + break; + case Function::ExternalLinkage: + SwitchToTextSection("\t.text", F); + O << "\t.globl\t" << CurrentFnName << "\n"; + break; + case Function::WeakAnyLinkage: + case Function::WeakODRLinkage: + case Function::LinkOnceAnyLinkage: + case Function::LinkOnceODRLinkage: + if (Subtarget->isTargetDarwin()) { + SwitchToTextSection( + ".section __TEXT,__textcoal_nt,coalesced,pure_instructions", F); + O << "\t.globl\t" << CurrentFnName << "\n"; + O << "\t.weak_definition\t" << CurrentFnName << "\n"; + } else { + O << TAI->getWeakRefDirective() << CurrentFnName << "\n"; + } + break; + } + + printVisibility(CurrentFnName, F->getVisibility()); + + if (AFI->isThumbFunction()) { + EmitAlignment(1, F, AFI->getAlign()); + O << "\t.code\t16\n"; + O << "\t.thumb_func"; + if (Subtarget->isTargetDarwin()) + O << "\t" << CurrentFnName; + O << "\n"; + InCPMode = false; + } else + EmitAlignment(2, F); + + O << CurrentFnName << ":\n"; + // Emit pre-function debug information. + DW->BeginFunction(&MF); + + if (Subtarget->isTargetDarwin()) { + // If the function is empty, then we need to emit *something*. Otherwise, + // the function's label might be associated with something that it wasn't + // meant to be associated with. We emit a noop in this situation. + MachineFunction::iterator I = MF.begin(); + + if (++I == MF.end() && MF.front().empty()) + O << "\tnop\n"; + } + + // Print out code for the function. + for (MachineFunction::const_iterator I = MF.begin(), E = MF.end(); + I != E; ++I) { + // Print a label for the basic block. + if (I != MF.begin()) { + printBasicBlockLabel(I, true, true, VerboseAsm); + O << '\n'; + } + for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end(); + II != E; ++II) { + // Print the assembly for the instruction. + printMachineInstruction(II); + } + } + + if (TAI->hasDotTypeDotSizeDirective()) + O << "\t.size " << CurrentFnName << ", .-" << CurrentFnName << "\n"; + + // Emit post-function debug information. + DW->EndFunction(&MF); + + O.flush(); + + return false; +} + +void ARMAsmPrinter::printOperand(const MachineInstr *MI, int opNum, + const char *Modifier) { + const MachineOperand &MO = MI->getOperand(opNum); + switch (MO.getType()) { + case MachineOperand::MO_Register: + if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) + O << TM.getRegisterInfo()->get(MO.getReg()).AsmName; + else + assert(0 && "not implemented"); + break; + case MachineOperand::MO_Immediate: { + if (!Modifier || strcmp(Modifier, "no_hash") != 0) + O << "#"; + + O << MO.getImm(); + break; + } + case MachineOperand::MO_MachineBasicBlock: + printBasicBlockLabel(MO.getMBB()); + return; + case MachineOperand::MO_GlobalAddress: { + bool isCallOp = Modifier && !strcmp(Modifier, "call"); + GlobalValue *GV = MO.getGlobal(); + std::string Name = Mang->getValueName(GV); + bool isExt = (GV->isDeclaration() || GV->hasWeakLinkage() || + GV->hasLinkOnceLinkage()); + if (isExt && isCallOp && Subtarget->isTargetDarwin() && + TM.getRelocationModel() != Reloc::Static) { + printSuffixedName(Name, "$stub"); + FnStubs.insert(Name); + } else + O << Name; + + printOffset(MO.getOffset()); + + if (isCallOp && Subtarget->isTargetELF() && + TM.getRelocationModel() == Reloc::PIC_) + O << "(PLT)"; + if (GV->hasExternalWeakLinkage()) + ExtWeakSymbols.insert(GV); + break; + } + case MachineOperand::MO_ExternalSymbol: { + bool isCallOp = Modifier && !strcmp(Modifier, "call"); + std::string Name(TAI->getGlobalPrefix()); + Name += MO.getSymbolName(); + if (isCallOp && Subtarget->isTargetDarwin() && + TM.getRelocationModel() != Reloc::Static) { + printSuffixedName(Name, "$stub"); + FnStubs.insert(Name); + } else + O << Name; + if (isCallOp && Subtarget->isTargetELF() && + TM.getRelocationModel() == Reloc::PIC_) + O << "(PLT)"; + break; + } + case MachineOperand::MO_ConstantPoolIndex: + O << TAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() + << '_' << MO.getIndex(); + break; + case MachineOperand::MO_JumpTableIndex: + O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() + << '_' << MO.getIndex(); + break; + default: + O << ""; abort (); break; + } +} + +static void printSOImm(raw_ostream &O, int64_t V, bool VerboseAsm, + const TargetAsmInfo *TAI) { + assert(V < (1 << 12) && "Not a valid so_imm value!"); + unsigned Imm = ARM_AM::getSOImmValImm(V); + unsigned Rot = ARM_AM::getSOImmValRot(V); + + // Print low-level immediate formation info, per + // A5.1.3: "Data-processing operands - Immediate". + if (Rot) { + O << "#" << Imm << ", " << Rot; + // Pretty printed version. + if (VerboseAsm) + O << ' ' << TAI->getCommentString() + << ' ' << (int)ARM_AM::rotr32(Imm, Rot); + } else { + O << "#" << Imm; + } +} + +/// printSOImmOperand - SOImm is 4-bit rotate amount in bits 8-11 with 8-bit +/// immediate in bits 0-7. +void ARMAsmPrinter::printSOImmOperand(const MachineInstr *MI, int OpNum) { + const MachineOperand &MO = MI->getOperand(OpNum); + assert(MO.isImm() && "Not a valid so_imm value!"); + printSOImm(O, MO.getImm(), VerboseAsm, TAI); +} + +/// printSOImm2PartOperand - SOImm is broken into two pieces using a 'mov' +/// followed by an 'orr' to materialize. +void ARMAsmPrinter::printSOImm2PartOperand(const MachineInstr *MI, int OpNum) { + const MachineOperand &MO = MI->getOperand(OpNum); + assert(MO.isImm() && "Not a valid so_imm value!"); + unsigned V1 = ARM_AM::getSOImmTwoPartFirst(MO.getImm()); + unsigned V2 = ARM_AM::getSOImmTwoPartSecond(MO.getImm()); + printSOImm(O, ARM_AM::getSOImmVal(V1), VerboseAsm, TAI); + O << "\n\torr"; + printPredicateOperand(MI, 2); + O << " "; + printOperand(MI, 0); + O << ", "; + printOperand(MI, 0); + O << ", "; + printSOImm(O, ARM_AM::getSOImmVal(V2), VerboseAsm, TAI); +} + +// so_reg is a 4-operand unit corresponding to register forms of the A5.1 +// "Addressing Mode 1 - Data-processing operands" forms. This includes: +// REG 0 0 - e.g. R5 +// REG REG 0,SH_OPC - e.g. R5, ROR R3 +// REG 0 IMM,SH_OPC - e.g. R5, LSL #3 +void ARMAsmPrinter::printSORegOperand(const MachineInstr *MI, int Op) { + const MachineOperand &MO1 = MI->getOperand(Op); + const MachineOperand &MO2 = MI->getOperand(Op+1); + const MachineOperand &MO3 = MI->getOperand(Op+2); + + assert(TargetRegisterInfo::isPhysicalRegister(MO1.getReg())); + O << TM.getRegisterInfo()->get(MO1.getReg()).AsmName; + + // Print the shift opc. + O << ", " + << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO3.getImm())) + << " "; + + if (MO2.getReg()) { + assert(TargetRegisterInfo::isPhysicalRegister(MO2.getReg())); + O << TM.getRegisterInfo()->get(MO2.getReg()).AsmName; + assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0); + } else { + O << "#" << ARM_AM::getSORegOffset(MO3.getImm()); + } +} + +void ARMAsmPrinter::printAddrMode2Operand(const MachineInstr *MI, int Op) { + const MachineOperand &MO1 = MI->getOperand(Op); + const MachineOperand &MO2 = MI->getOperand(Op+1); + const MachineOperand &MO3 = MI->getOperand(Op+2); + + if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right. + printOperand(MI, Op); + return; + } + + O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).AsmName; + + if (!MO2.getReg()) { + if (ARM_AM::getAM2Offset(MO3.getImm())) // Don't print +0. + O << ", #" + << (char)ARM_AM::getAM2Op(MO3.getImm()) + << ARM_AM::getAM2Offset(MO3.getImm()); + O << "]"; + return; + } + + O << ", " + << (char)ARM_AM::getAM2Op(MO3.getImm()) + << TM.getRegisterInfo()->get(MO2.getReg()).AsmName; + + if (unsigned ShImm = ARM_AM::getAM2Offset(MO3.getImm())) + O << ", " + << ARM_AM::getShiftOpcStr(ARM_AM::getAM2ShiftOpc(MO3.getImm())) + << " #" << ShImm; + O << "]"; +} + +void ARMAsmPrinter::printAddrMode2OffsetOperand(const MachineInstr *MI, int Op){ + const MachineOperand &MO1 = MI->getOperand(Op); + const MachineOperand &MO2 = MI->getOperand(Op+1); + + if (!MO1.getReg()) { + unsigned ImmOffs = ARM_AM::getAM2Offset(MO2.getImm()); + assert(ImmOffs && "Malformed indexed load / store!"); + O << "#" + << (char)ARM_AM::getAM2Op(MO2.getImm()) + << ImmOffs; + return; + } + + O << (char)ARM_AM::getAM2Op(MO2.getImm()) + << TM.getRegisterInfo()->get(MO1.getReg()).AsmName; + + if (unsigned ShImm = ARM_AM::getAM2Offset(MO2.getImm())) + O << ", " + << ARM_AM::getShiftOpcStr(ARM_AM::getAM2ShiftOpc(MO2.getImm())) + << " #" << ShImm; +} + +void ARMAsmPrinter::printAddrMode3Operand(const MachineInstr *MI, int Op) { + const MachineOperand &MO1 = MI->getOperand(Op); + const MachineOperand &MO2 = MI->getOperand(Op+1); + const MachineOperand &MO3 = MI->getOperand(Op+2); + + assert(TargetRegisterInfo::isPhysicalRegister(MO1.getReg())); + O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).AsmName; + + if (MO2.getReg()) { + O << ", " + << (char)ARM_AM::getAM3Op(MO3.getImm()) + << TM.getRegisterInfo()->get(MO2.getReg()).AsmName + << "]"; + return; + } + + if (unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm())) + O << ", #" + << (char)ARM_AM::getAM3Op(MO3.getImm()) + << ImmOffs; + O << "]"; +} + +void ARMAsmPrinter::printAddrMode3OffsetOperand(const MachineInstr *MI, int Op){ + const MachineOperand &MO1 = MI->getOperand(Op); + const MachineOperand &MO2 = MI->getOperand(Op+1); + + if (MO1.getReg()) { + O << (char)ARM_AM::getAM3Op(MO2.getImm()) + << TM.getRegisterInfo()->get(MO1.getReg()).AsmName; + return; + } + + unsigned ImmOffs = ARM_AM::getAM3Offset(MO2.getImm()); + assert(ImmOffs && "Malformed indexed load / store!"); + O << "#" + << (char)ARM_AM::getAM3Op(MO2.getImm()) + << ImmOffs; +} + +void ARMAsmPrinter::printAddrMode4Operand(const MachineInstr *MI, int Op, + const char *Modifier) { + const MachineOperand &MO1 = MI->getOperand(Op); + const MachineOperand &MO2 = MI->getOperand(Op+1); + ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MO2.getImm()); + if (Modifier && strcmp(Modifier, "submode") == 0) { + if (MO1.getReg() == ARM::SP) { + bool isLDM = (MI->getOpcode() == ARM::LDM || + MI->getOpcode() == ARM::LDM_RET); + O << ARM_AM::getAMSubModeAltStr(Mode, isLDM); + } else + O << ARM_AM::getAMSubModeStr(Mode); + } else { + printOperand(MI, Op); + if (ARM_AM::getAM4WBFlag(MO2.getImm())) + O << "!"; + } +} + +void ARMAsmPrinter::printAddrMode5Operand(const MachineInstr *MI, int Op, + const char *Modifier) { + const MachineOperand &MO1 = MI->getOperand(Op); + const MachineOperand &MO2 = MI->getOperand(Op+1); + + if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right. + printOperand(MI, Op); + return; + } + + assert(TargetRegisterInfo::isPhysicalRegister(MO1.getReg())); + + if (Modifier && strcmp(Modifier, "submode") == 0) { + ARM_AM::AMSubMode Mode = ARM_AM::getAM5SubMode(MO2.getImm()); + if (MO1.getReg() == ARM::SP) { + bool isFLDM = (MI->getOpcode() == ARM::FLDMD || + MI->getOpcode() == ARM::FLDMS); + O << ARM_AM::getAMSubModeAltStr(Mode, isFLDM); + } else + O << ARM_AM::getAMSubModeStr(Mode); + return; + } else if (Modifier && strcmp(Modifier, "base") == 0) { + // Used for FSTM{D|S} and LSTM{D|S} operations. + O << TM.getRegisterInfo()->get(MO1.getReg()).AsmName; + if (ARM_AM::getAM5WBFlag(MO2.getImm())) + O << "!"; + return; + } + + O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).AsmName; + + if (unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm())) { + O << ", #" + << (char)ARM_AM::getAM5Op(MO2.getImm()) + << ImmOffs*4; + } + O << "]"; +} + +void ARMAsmPrinter::printAddrModePCOperand(const MachineInstr *MI, int Op, + const char *Modifier) { + if (Modifier && strcmp(Modifier, "label") == 0) { + printPCLabel(MI, Op+1); + return; + } + + const MachineOperand &MO1 = MI->getOperand(Op); + assert(TargetRegisterInfo::isPhysicalRegister(MO1.getReg())); + O << "[pc, +" << TM.getRegisterInfo()->get(MO1.getReg()).AsmName << "]"; +} + +void +ARMAsmPrinter::printThumbAddrModeRROperand(const MachineInstr *MI, int Op) { + const MachineOperand &MO1 = MI->getOperand(Op); + const MachineOperand &MO2 = MI->getOperand(Op+1); + O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).AsmName; + O << ", " << TM.getRegisterInfo()->get(MO2.getReg()).AsmName << "]"; +} + +void +ARMAsmPrinter::printThumbAddrModeRI5Operand(const MachineInstr *MI, int Op, + unsigned Scale) { + const MachineOperand &MO1 = MI->getOperand(Op); + const MachineOperand &MO2 = MI->getOperand(Op+1); + const MachineOperand &MO3 = MI->getOperand(Op+2); + + if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right. + printOperand(MI, Op); + return; + } + + O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).AsmName; + if (MO3.getReg()) + O << ", " << TM.getRegisterInfo()->get(MO3.getReg()).AsmName; + else if (unsigned ImmOffs = MO2.getImm()) { + O << ", #" << ImmOffs; + if (Scale > 1) + O << " * " << Scale; + } + O << "]"; +} + +void +ARMAsmPrinter::printThumbAddrModeS1Operand(const MachineInstr *MI, int Op) { + printThumbAddrModeRI5Operand(MI, Op, 1); +} +void +ARMAsmPrinter::printThumbAddrModeS2Operand(const MachineInstr *MI, int Op) { + printThumbAddrModeRI5Operand(MI, Op, 2); +} +void +ARMAsmPrinter::printThumbAddrModeS4Operand(const MachineInstr *MI, int Op) { + printThumbAddrModeRI5Operand(MI, Op, 4); +} + +void ARMAsmPrinter::printThumbAddrModeSPOperand(const MachineInstr *MI,int Op) { + const MachineOperand &MO1 = MI->getOperand(Op); + const MachineOperand &MO2 = MI->getOperand(Op+1); + O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).AsmName; + if (unsigned ImmOffs = MO2.getImm()) + O << ", #" << ImmOffs << " * 4"; + O << "]"; +} + +void ARMAsmPrinter::printPredicateOperand(const MachineInstr *MI, int opNum) { + ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(opNum).getImm(); + if (CC != ARMCC::AL) + O << ARMCondCodeToString(CC); +} + +void ARMAsmPrinter::printSBitModifierOperand(const MachineInstr *MI, int opNum){ + unsigned Reg = MI->getOperand(opNum).getReg(); + if (Reg) { + assert(Reg == ARM::CPSR && "Expect ARM CPSR register!"); + O << 's'; + } +} + +void ARMAsmPrinter::printPCLabel(const MachineInstr *MI, int opNum) { + int Id = (int)MI->getOperand(opNum).getImm(); + O << TAI->getPrivateGlobalPrefix() << "PC" << Id; +} + +void ARMAsmPrinter::printRegisterList(const MachineInstr *MI, int opNum) { + O << "{"; + for (unsigned i = opNum, e = MI->getNumOperands(); i != e; ++i) { + printOperand(MI, i); + if (i != e-1) O << ", "; + } + O << "}"; +} + +void ARMAsmPrinter::printCPInstOperand(const MachineInstr *MI, int OpNo, + const char *Modifier) { + assert(Modifier && "This operand only works with a modifier!"); + // There are two aspects to a CONSTANTPOOL_ENTRY operand, the label and the + // data itself. + if (!strcmp(Modifier, "label")) { + unsigned ID = MI->getOperand(OpNo).getImm(); + O << TAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() + << '_' << ID << ":\n"; + } else { + assert(!strcmp(Modifier, "cpentry") && "Unknown modifier for CPE"); + unsigned CPI = MI->getOperand(OpNo).getIndex(); + + const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI]; + + if (MCPE.isMachineConstantPoolEntry()) { + EmitMachineConstantPoolValue(MCPE.Val.MachineCPVal); + } else { + EmitGlobalConstant(MCPE.Val.ConstVal); + // remember to emit the weak reference + if (const GlobalValue *GV = dyn_cast(MCPE.Val.ConstVal)) + if (GV->hasExternalWeakLinkage()) + ExtWeakSymbols.insert(GV); + } + } +} + +void ARMAsmPrinter::printJTBlockOperand(const MachineInstr *MI, int OpNo) { + const MachineOperand &MO1 = MI->getOperand(OpNo); + const MachineOperand &MO2 = MI->getOperand(OpNo+1); // Unique Id + unsigned JTI = MO1.getIndex(); + O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() + << '_' << JTI << '_' << MO2.getImm() << ":\n"; + + const char *JTEntryDirective = TAI->getJumpTableDirective(); + if (!JTEntryDirective) + JTEntryDirective = TAI->getData32bitsDirective(); + + const MachineFunction *MF = MI->getParent()->getParent(); + const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); + const std::vector &JT = MJTI->getJumpTables(); + const std::vector &JTBBs = JT[JTI].MBBs; + bool UseSet= TAI->getSetDirective() && TM.getRelocationModel() == Reloc::PIC_; + std::set JTSets; + for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) { + MachineBasicBlock *MBB = JTBBs[i]; + if (UseSet && JTSets.insert(MBB).second) + printPICJumpTableSetLabel(JTI, MO2.getImm(), MBB); + + O << JTEntryDirective << ' '; + if (UseSet) + O << TAI->getPrivateGlobalPrefix() << getFunctionNumber() + << '_' << JTI << '_' << MO2.getImm() + << "_set_" << MBB->getNumber(); + else if (TM.getRelocationModel() == Reloc::PIC_) { + printBasicBlockLabel(MBB, false, false, false); + // If the arch uses custom Jump Table directives, don't calc relative to JT + if (!TAI->getJumpTableDirective()) + O << '-' << TAI->getPrivateGlobalPrefix() << "JTI" + << getFunctionNumber() << '_' << JTI << '_' << MO2.getImm(); + } else + printBasicBlockLabel(MBB, false, false, false); + if (i != e-1) + O << '\n'; + } +} + + +bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode){ + // Does this asm operand have a single letter operand modifier? + if (ExtraCode && ExtraCode[0]) { + if (ExtraCode[1] != 0) return true; // Unknown modifier. + + switch (ExtraCode[0]) { + default: return true; // Unknown modifier. + case 'a': // Don't print "#" before a global var name or constant. + case 'c': // Don't print "$" before a global var name or constant. + printOperand(MI, OpNo, "no_hash"); + return false; + case 'P': // Print a VFP double precision register. + printOperand(MI, OpNo); + return false; + case 'Q': + if (TM.getTargetData()->isLittleEndian()) + break; + // Fallthrough + case 'R': + if (TM.getTargetData()->isBigEndian()) + break; + // Fallthrough + case 'H': // Write second word of DI / DF reference. + // Verify that this operand has two consecutive registers. + if (!MI->getOperand(OpNo).isReg() || + OpNo+1 == MI->getNumOperands() || + !MI->getOperand(OpNo+1).isReg()) + return true; + ++OpNo; // Return the high-part. + } + } + + printOperand(MI, OpNo); + return false; +} + +bool ARMAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, + unsigned OpNo, unsigned AsmVariant, + const char *ExtraCode) { + if (ExtraCode && ExtraCode[0]) + return true; // Unknown modifier. + printAddrMode2Operand(MI, OpNo); + return false; +} + +void ARMAsmPrinter::printMachineInstruction(const MachineInstr *MI) { + ++EmittedInsts; + + int Opc = MI->getOpcode(); + switch (Opc) { + case ARM::CONSTPOOL_ENTRY: + if (!InCPMode && AFI->isThumbFunction()) { + EmitAlignment(2); + InCPMode = true; + } + break; + default: { + if (InCPMode && AFI->isThumbFunction()) + InCPMode = false; + }} + + // Call the autogenerated instruction printer routines. + printInstruction(MI); +} + +bool ARMAsmPrinter::doInitialization(Module &M) { + + bool Result = AsmPrinter::doInitialization(M); + + // Emit initial debug information. + MMI = getAnalysisIfAvailable(); + assert(MMI); + DW = getAnalysisIfAvailable(); + assert(DW && "Dwarf Writer is not available"); + DW->BeginModule(&M, MMI, O, this, TAI); + + // Darwin wants symbols to be quoted if they have complex names. + if (Subtarget->isTargetDarwin()) + Mang->setUseQuotes(true); + + // Emit ARM Build Attributes + if (Subtarget->isTargetELF()) { + // CPU Type + std::string CPUString = Subtarget->getCPUString(); + if (CPUString != "generic") + O << "\t.cpu " << CPUString << '\n'; + + // FIXME: Emit FPU type + if (Subtarget->hasVFP2()) + O << "\t.eabi_attribute " << ARMBuildAttrs::VFP_arch << ", 2\n"; + + // Signal various FP modes. + if (!UnsafeFPMath) + O << "\t.eabi_attribute " << ARMBuildAttrs::ABI_FP_denormal << ", 1\n" + << "\t.eabi_attribute " << ARMBuildAttrs::ABI_FP_exceptions << ", 1\n"; + + if (FiniteOnlyFPMath()) + O << "\t.eabi_attribute " << ARMBuildAttrs::ABI_FP_number_model << ", 1\n"; + else + O << "\t.eabi_attribute " << ARMBuildAttrs::ABI_FP_number_model << ", 3\n"; + + // 8-bytes alignment stuff. + O << "\t.eabi_attribute " << ARMBuildAttrs::ABI_align8_needed << ", 1\n" + << "\t.eabi_attribute " << ARMBuildAttrs::ABI_align8_preserved << ", 1\n"; + + // FIXME: Should we signal R9 usage? + } + + return Result; +} + +/// PrintUnmangledNameSafely - Print out the printable characters in the name. +/// Don't print things like \\n or \\0. +static void PrintUnmangledNameSafely(const Value *V, raw_ostream &OS) { + for (const char *Name = V->getNameStart(), *E = Name+V->getNameLen(); + Name != E; ++Name) + if (isprint(*Name)) + OS << *Name; +} + +void ARMAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) { + const TargetData *TD = TM.getTargetData(); + + if (!GVar->hasInitializer()) // External global require no code + return; + + // Check to see if this is a special global used by LLVM, if so, emit it. + + if (EmitSpecialLLVMGlobal(GVar)) { + if (Subtarget->isTargetDarwin() && + TM.getRelocationModel() == Reloc::Static) { + if (GVar->getName() == "llvm.global_ctors") + O << ".reference .constructors_used\n"; + else if (GVar->getName() == "llvm.global_dtors") + O << ".reference .destructors_used\n"; + } + return; + } + + std::string name = Mang->getValueName(GVar); + Constant *C = GVar->getInitializer(); + const Type *Type = C->getType(); + unsigned Size = TD->getTypeAllocSize(Type); + unsigned Align = TD->getPreferredAlignmentLog(GVar); + bool isDarwin = Subtarget->isTargetDarwin(); + + printVisibility(name, GVar->getVisibility()); + + if (Subtarget->isTargetELF()) + O << "\t.type " << name << ",%object\n"; + + if (C->isNullValue() && !GVar->hasSection() && !GVar->isThreadLocal() && + !(isDarwin && + TAI->SectionKindForGlobal(GVar) == SectionKind::RODataMergeStr)) { + // FIXME: This seems to be pretty darwin-specific + + if (GVar->hasExternalLinkage()) { + SwitchToSection(TAI->SectionForGlobal(GVar)); + if (const char *Directive = TAI->getZeroFillDirective()) { + O << "\t.globl\t" << name << "\n"; + O << Directive << "__DATA, __common, " << name << ", " + << Size << ", " << Align << "\n"; + return; + } + } + + if (GVar->hasLocalLinkage() || GVar->isWeakForLinker()) { + if (Size == 0) Size = 1; // .comm Foo, 0 is undefined, avoid it. + + if (isDarwin) { + if (GVar->hasLocalLinkage()) { + O << TAI->getLCOMMDirective() << name << "," << Size + << ',' << Align; + } else if (GVar->hasCommonLinkage()) { + O << TAI->getCOMMDirective() << name << "," << Size + << ',' << Align; + } else { + SwitchToSection(TAI->SectionForGlobal(GVar)); + O << "\t.globl " << name << '\n' + << TAI->getWeakDefDirective() << name << '\n'; + EmitAlignment(Align, GVar); + O << name << ":"; + if (VerboseAsm) { + O << "\t\t\t\t" << TAI->getCommentString() << ' '; + PrintUnmangledNameSafely(GVar, O); + } + O << '\n'; + EmitGlobalConstant(C); + return; + } + } else if (TAI->getLCOMMDirective() != NULL) { + if (GVar->hasLocalLinkage()) { + O << TAI->getLCOMMDirective() << name << "," << Size; + } else { + O << TAI->getCOMMDirective() << name << "," << Size; + if (TAI->getCOMMDirectiveTakesAlignment()) + O << ',' << (TAI->getAlignmentIsInBytes() ? (1 << Align) : Align); + } + } else { + SwitchToSection(TAI->SectionForGlobal(GVar)); + if (GVar->hasLocalLinkage()) + O << "\t.local\t" << name << "\n"; + O << TAI->getCOMMDirective() << name << "," << Size; + if (TAI->getCOMMDirectiveTakesAlignment()) + O << "," << (TAI->getAlignmentIsInBytes() ? (1 << Align) : Align); + } + if (VerboseAsm) { + O << "\t\t" << TAI->getCommentString() << " "; + PrintUnmangledNameSafely(GVar, O); + } + O << "\n"; + return; + } + } + + SwitchToSection(TAI->SectionForGlobal(GVar)); + switch (GVar->getLinkage()) { + case GlobalValue::CommonLinkage: + case GlobalValue::LinkOnceAnyLinkage: + case GlobalValue::LinkOnceODRLinkage: + case GlobalValue::WeakAnyLinkage: + case GlobalValue::WeakODRLinkage: + if (isDarwin) { + O << "\t.globl " << name << "\n" + << "\t.weak_definition " << name << "\n"; + } else { + O << "\t.weak " << name << "\n"; + } + break; + case GlobalValue::AppendingLinkage: + // FIXME: appending linkage variables should go into a section of + // their name or something. For now, just emit them as external. + case GlobalValue::ExternalLinkage: + O << "\t.globl " << name << "\n"; + // FALL THROUGH + case GlobalValue::PrivateLinkage: + case GlobalValue::InternalLinkage: + break; + default: + assert(0 && "Unknown linkage type!"); + break; + } + + EmitAlignment(Align, GVar); + O << name << ":"; + if (VerboseAsm) { + O << "\t\t\t\t" << TAI->getCommentString() << " "; + PrintUnmangledNameSafely(GVar, O); + } + O << "\n"; + if (TAI->hasDotTypeDotSizeDirective()) + O << "\t.size " << name << ", " << Size << "\n"; + + // If the initializer is a extern weak symbol, remember to emit the weak + // reference! + if (const GlobalValue *GV = dyn_cast(C)) + if (GV->hasExternalWeakLinkage()) + ExtWeakSymbols.insert(GV); + + EmitGlobalConstant(C); + O << '\n'; +} + + +bool ARMAsmPrinter::doFinalization(Module &M) { + for (Module::const_global_iterator I = M.global_begin(), E = M.global_end(); + I != E; ++I) + printModuleLevelGV(I); + + if (Subtarget->isTargetDarwin()) { + SwitchToDataSection(""); + + // Output stubs for dynamically-linked functions + for (StringSet<>::iterator i = FnStubs.begin(), e = FnStubs.end(); + i != e; ++i) { + if (TM.getRelocationModel() == Reloc::PIC_) + SwitchToTextSection(".section __TEXT,__picsymbolstub4,symbol_stubs," + "none,16", 0); + else + SwitchToTextSection(".section __TEXT,__symbol_stub4,symbol_stubs," + "none,12", 0); + + EmitAlignment(2); + O << "\t.code\t32\n"; + + const char *p = i->getKeyData(); + printSuffixedName(p, "$stub"); + O << ":\n"; + O << "\t.indirect_symbol " << p << "\n"; + O << "\tldr ip, "; + printSuffixedName(p, "$slp"); + O << "\n"; + if (TM.getRelocationModel() == Reloc::PIC_) { + printSuffixedName(p, "$scv"); + O << ":\n"; + O << "\tadd ip, pc, ip\n"; + } + O << "\tldr pc, [ip, #0]\n"; + printSuffixedName(p, "$slp"); + O << ":\n"; + O << "\t.long\t"; + printSuffixedName(p, "$lazy_ptr"); + if (TM.getRelocationModel() == Reloc::PIC_) { + O << "-("; + printSuffixedName(p, "$scv"); + O << "+8)\n"; + } else + O << "\n"; + SwitchToDataSection(".lazy_symbol_pointer", 0); + printSuffixedName(p, "$lazy_ptr"); + O << ":\n"; + O << "\t.indirect_symbol " << p << "\n"; + O << "\t.long\tdyld_stub_binding_helper\n"; + } + O << "\n"; + + // Output non-lazy-pointers for external and common global variables. + if (!GVNonLazyPtrs.empty()) { + SwitchToDataSection("\t.non_lazy_symbol_pointer", 0); + for (StringSet<>::iterator i = GVNonLazyPtrs.begin(), + e = GVNonLazyPtrs.end(); i != e; ++i) { + const char *p = i->getKeyData(); + printSuffixedName(p, "$non_lazy_ptr"); + O << ":\n"; + O << "\t.indirect_symbol " << p << "\n"; + O << "\t.long\t0\n"; + } + } + + if (!HiddenGVNonLazyPtrs.empty()) { + SwitchToSection(TAI->getDataSection()); + for (StringSet<>::iterator i = HiddenGVNonLazyPtrs.begin(), + e = HiddenGVNonLazyPtrs.end(); i != e; ++i) { + const char *p = i->getKeyData(); + EmitAlignment(2); + printSuffixedName(p, "$non_lazy_ptr"); + O << ":\n"; + O << "\t.long " << p << "\n"; + } + } + + + // Emit initial debug information. + DW->EndModule(); + + // Funny Darwin hack: This flag tells the linker that no global symbols + // contain code that falls through to other global symbols (e.g. the obvious + // implementation of multiple entry points). If this doesn't occur, the + // linker can safely perform dead code stripping. Since LLVM never + // generates code that does this, it is always safe to set. + O << "\t.subsections_via_symbols\n"; + } else { + // Emit final debug information for ELF. + DW->EndModule(); + } + + return AsmPrinter::doFinalization(M); +} + +/// createARMCodePrinterPass - Returns a pass that prints the ARM +/// assembly code for a MachineFunction to the given output stream, +/// using the given target machine description. This should work +/// regardless of whether the function is in SSA form. +/// +FunctionPass *llvm::createARMCodePrinterPass(raw_ostream &o, + ARMTargetMachine &tm, + CodeGenOpt::Level OptLevel, + bool verbose) { + return new ARMAsmPrinter(o, tm, tm.getTargetAsmInfo(), OptLevel, verbose); +} + +namespace { + static struct Register { + Register() { + ARMTargetMachine::registerAsmPrinter(createARMCodePrinterPass); + } + } Registrator; +} diff --git a/lib/Target/ARM/AsmPrinter/CMakeLists.txt b/lib/Target/ARM/AsmPrinter/CMakeLists.txt new file mode 100644 index 000000000000..524a748665be --- /dev/null +++ b/lib/Target/ARM/AsmPrinter/CMakeLists.txt @@ -0,0 +1,9 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_partially_linked_object(LLVMARMAsmPrinter + ARMAsmPrinter.cpp + ) + +target_name_of_partially_linked_object(LLVMARMCodeGen n) + +add_dependencies(LLVMARMAsmPrinter ${n}) diff --git a/lib/Target/ARM/AsmPrinter/Makefile b/lib/Target/ARM/AsmPrinter/Makefile new file mode 100644 index 000000000000..ce36cec47b6e --- /dev/null +++ b/lib/Target/ARM/AsmPrinter/Makefile @@ -0,0 +1,15 @@ +##===- lib/Target/ARM/Makefile -----------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../../.. +LIBRARYNAME = LLVMARMAsmPrinter + +# Hack: we need to include 'main' arm target directory to grab private headers +CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt new file mode 100644 index 000000000000..2ac40f535497 --- /dev/null +++ b/lib/Target/ARM/CMakeLists.txt @@ -0,0 +1,27 @@ +set(LLVM_TARGET_DEFINITIONS ARM.td) + +tablegen(ARMGenRegisterInfo.h.inc -gen-register-desc-header) +tablegen(ARMGenRegisterNames.inc -gen-register-enums) +tablegen(ARMGenRegisterInfo.inc -gen-register-desc) +tablegen(ARMGenInstrNames.inc -gen-instr-enums) +tablegen(ARMGenInstrInfo.inc -gen-instr-desc) +tablegen(ARMGenCodeEmitter.inc -gen-emitter) +tablegen(ARMGenAsmWriter.inc -gen-asm-writer) +tablegen(ARMGenDAGISel.inc -gen-dag-isel) +tablegen(ARMGenCallingConv.inc -gen-callingconv) +tablegen(ARMGenSubtarget.inc -gen-subtarget) + +add_llvm_target(ARMCodeGen + ARMCodeEmitter.cpp + ARMConstantIslandPass.cpp + ARMConstantPoolValue.cpp + ARMInstrInfo.cpp + ARMISelDAGToDAG.cpp + ARMISelLowering.cpp + ARMJITInfo.cpp + ARMLoadStoreOptimizer.cpp + ARMRegisterInfo.cpp + ARMSubtarget.cpp + ARMTargetAsmInfo.cpp + ARMTargetMachine.cpp + ) diff --git a/lib/Target/ARM/Makefile b/lib/Target/ARM/Makefile new file mode 100644 index 000000000000..9a3b9be5b345 --- /dev/null +++ b/lib/Target/ARM/Makefile @@ -0,0 +1,23 @@ +##===- lib/Target/ARM/Makefile -----------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../.. +LIBRARYNAME = LLVMARMCodeGen +TARGET = ARM + +# Make sure that tblgen is run, first thing. +BUILT_SOURCES = ARMGenRegisterInfo.h.inc ARMGenRegisterNames.inc \ + ARMGenRegisterInfo.inc ARMGenInstrNames.inc \ + ARMGenInstrInfo.inc ARMGenAsmWriter.inc \ + ARMGenDAGISel.inc ARMGenSubtarget.inc \ + ARMGenCodeEmitter.inc ARMGenCallingConv.inc + +DIRS = AsmPrinter + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/ARM/README-Thumb.txt b/lib/Target/ARM/README-Thumb.txt new file mode 100644 index 000000000000..4d3200b445c1 --- /dev/null +++ b/lib/Target/ARM/README-Thumb.txt @@ -0,0 +1,228 @@ +//===---------------------------------------------------------------------===// +// Random ideas for the ARM backend (Thumb specific). +//===---------------------------------------------------------------------===// + +* Add support for compiling functions in both ARM and Thumb mode, then taking + the smallest. + +* Add support for compiling individual basic blocks in thumb mode, when in a + larger ARM function. This can be used for presumed cold code, like paths + to abort (failure path of asserts), EH handling code, etc. + +* Thumb doesn't have normal pre/post increment addressing modes, but you can + load/store 32-bit integers with pre/postinc by using load/store multiple + instrs with a single register. + +* Make better use of high registers r8, r10, r11, r12 (ip). Some variants of add + and cmp instructions can use high registers. Also, we can use them as + temporaries to spill values into. + +* In thumb mode, short, byte, and bool preferred alignments are currently set + to 4 to accommodate ISA restriction (i.e. add sp, #imm, imm must be multiple + of 4). + +//===---------------------------------------------------------------------===// + +Potential jumptable improvements: + +* If we know function size is less than (1 << 16) * 2 bytes, we can use 16-bit + jumptable entries (e.g. (L1 - L2) >> 1). Or even smaller entries if the + function is even smaller. This also applies to ARM. + +* Thumb jumptable codegen can improve given some help from the assembler. This + is what we generate right now: + + .set PCRELV0, (LJTI1_0_0-(LPCRELL0+4)) +LPCRELL0: + mov r1, #PCRELV0 + add r1, pc + ldr r0, [r0, r1] + cpy pc, r0 + .align 2 +LJTI1_0_0: + .long LBB1_3 + ... + +Note there is another pc relative add that we can take advantage of. + add r1, pc, #imm_8 * 4 + +We should be able to generate: + +LPCRELL0: + add r1, LJTI1_0_0 + ldr r0, [r0, r1] + cpy pc, r0 + .align 2 +LJTI1_0_0: + .long LBB1_3 + +if the assembler can translate the add to: + add r1, pc, #((LJTI1_0_0-(LPCRELL0+4))&0xfffffffc) + +Note the assembler also does something similar to constpool load: +LPCRELL0: + ldr r0, LCPI1_0 +=> + ldr r0, pc, #((LCPI1_0-(LPCRELL0+4))&0xfffffffc) + + +//===---------------------------------------------------------------------===// + +We compiles the following: + +define i16 @func_entry_2E_ce(i32 %i) { + switch i32 %i, label %bb12.exitStub [ + i32 0, label %bb4.exitStub + i32 1, label %bb9.exitStub + i32 2, label %bb4.exitStub + i32 3, label %bb4.exitStub + i32 7, label %bb9.exitStub + i32 8, label %bb.exitStub + i32 9, label %bb9.exitStub + ] + +bb12.exitStub: + ret i16 0 + +bb4.exitStub: + ret i16 1 + +bb9.exitStub: + ret i16 2 + +bb.exitStub: + ret i16 3 +} + +into: + +_func_entry_2E_ce: + mov r2, #1 + lsl r2, r0 + cmp r0, #9 + bhi LBB1_4 @bb12.exitStub +LBB1_1: @newFuncRoot + mov r1, #13 + tst r2, r1 + bne LBB1_5 @bb4.exitStub +LBB1_2: @newFuncRoot + ldr r1, LCPI1_0 + tst r2, r1 + bne LBB1_6 @bb9.exitStub +LBB1_3: @newFuncRoot + mov r1, #1 + lsl r1, r1, #8 + tst r2, r1 + bne LBB1_7 @bb.exitStub +LBB1_4: @bb12.exitStub + mov r0, #0 + bx lr +LBB1_5: @bb4.exitStub + mov r0, #1 + bx lr +LBB1_6: @bb9.exitStub + mov r0, #2 + bx lr +LBB1_7: @bb.exitStub + mov r0, #3 + bx lr +LBB1_8: + .align 2 +LCPI1_0: + .long 642 + + +gcc compiles to: + + cmp r0, #9 + @ lr needed for prologue + bhi L2 + ldr r3, L11 + mov r2, #1 + mov r1, r2, asl r0 + ands r0, r3, r2, asl r0 + movne r0, #2 + bxne lr + tst r1, #13 + beq L9 +L3: + mov r0, r2 + bx lr +L9: + tst r1, #256 + movne r0, #3 + bxne lr +L2: + mov r0, #0 + bx lr +L12: + .align 2 +L11: + .long 642 + + +GCC is doing a couple of clever things here: + 1. It is predicating one of the returns. This isn't a clear win though: in + cases where that return isn't taken, it is replacing one condbranch with + two 'ne' predicated instructions. + 2. It is sinking the shift of "1 << i" into the tst, and using ands instead of + tst. This will probably require whole function isel. + 3. GCC emits: + tst r1, #256 + we emit: + mov r1, #1 + lsl r1, r1, #8 + tst r2, r1 + + +//===---------------------------------------------------------------------===// + +When spilling in thumb mode and the sp offset is too large to fit in the ldr / +str offset field, we load the offset from a constpool entry and add it to sp: + +ldr r2, LCPI +add r2, sp +ldr r2, [r2] + +These instructions preserve the condition code which is important if the spill +is between a cmp and a bcc instruction. However, we can use the (potentially) +cheaper sequnce if we know it's ok to clobber the condition register. + +add r2, sp, #255 * 4 +add r2, #132 +ldr r2, [r2, #7 * 4] + +This is especially bad when dynamic alloca is used. The all fixed size stack +objects are referenced off the frame pointer with negative offsets. See +oggenc for an example. + +//===---------------------------------------------------------------------===// + +We are reserving R3 as a scratch register under thumb mode. So if it is live in +to the function, we save / restore R3 to / from R12. Until register scavenging +is done, we should save R3 to a high callee saved reg at emitPrologue time +(when hasFP is true or stack size is large) and restore R3 from that register +instead. This allows us to at least get rid of the save to r12 everytime it is +used. + +//===---------------------------------------------------------------------===// + +Poor codegen test/CodeGen/ARM/select.ll f7: + + ldr r5, LCPI1_0 +LPC0: + add r5, pc + ldr r6, LCPI1_1 + ldr r2, LCPI1_2 + cpy r3, r6 + cpy lr, pc + bx r5 + +//===---------------------------------------------------------------------===// + +Make register allocator / spiller smarter so we can re-materialize "mov r, imm", +etc. Almost all Thumb instructions clobber condition code. + +//===---------------------------------------------------------------------===// + +Add ldmia, stmia support. diff --git a/lib/Target/ARM/README.txt b/lib/Target/ARM/README.txt new file mode 100644 index 000000000000..068c441ed737 --- /dev/null +++ b/lib/Target/ARM/README.txt @@ -0,0 +1,554 @@ +//===---------------------------------------------------------------------===// +// Random ideas for the ARM backend. +//===---------------------------------------------------------------------===// + +Reimplement 'select' in terms of 'SEL'. + +* We would really like to support UXTAB16, but we need to prove that the + add doesn't need to overflow between the two 16-bit chunks. + +* Implement pre/post increment support. (e.g. PR935) +* Coalesce stack slots! +* Implement smarter constant generation for binops with large immediates. + +* Consider materializing FP constants like 0.0f and 1.0f using integer + immediate instructions then copy to FPU. Slower than load into FPU? + +//===---------------------------------------------------------------------===// + +Crazy idea: Consider code that uses lots of 8-bit or 16-bit values. By the +time regalloc happens, these values are now in a 32-bit register, usually with +the top-bits known to be sign or zero extended. If spilled, we should be able +to spill these to a 8-bit or 16-bit stack slot, zero or sign extending as part +of the reload. + +Doing this reduces the size of the stack frame (important for thumb etc), and +also increases the likelihood that we will be able to reload multiple values +from the stack with a single load. + +//===---------------------------------------------------------------------===// + +The constant island pass is in good shape. Some cleanups might be desirable, +but there is unlikely to be much improvement in the generated code. + +1. There may be some advantage to trying to be smarter about the initial +placement, rather than putting everything at the end. + +2. There might be some compile-time efficiency to be had by representing +consecutive islands as a single block rather than multiple blocks. + +3. Use a priority queue to sort constant pool users in inverse order of + position so we always process the one closed to the end of functions + first. This may simply CreateNewWater. + +//===---------------------------------------------------------------------===// + +Eliminate copysign custom expansion. We are still generating crappy code with +default expansion + if-conversion. + +//===---------------------------------------------------------------------===// + +Eliminate one instruction from: + +define i32 @_Z6slow4bii(i32 %x, i32 %y) { + %tmp = icmp sgt i32 %x, %y + %retval = select i1 %tmp, i32 %x, i32 %y + ret i32 %retval +} + +__Z6slow4bii: + cmp r0, r1 + movgt r1, r0 + mov r0, r1 + bx lr +=> + +__Z6slow4bii: + cmp r0, r1 + movle r0, r1 + bx lr + +//===---------------------------------------------------------------------===// + +Implement long long "X-3" with instructions that fold the immediate in. These +were disabled due to badness with the ARM carry flag on subtracts. + +//===---------------------------------------------------------------------===// + +We currently compile abs: +int foo(int p) { return p < 0 ? -p : p; } + +into: + +_foo: + rsb r1, r0, #0 + cmn r0, #1 + movgt r1, r0 + mov r0, r1 + bx lr + +This is very, uh, literal. This could be a 3 operation sequence: + t = (p sra 31); + res = (p xor t)-t + +Which would be better. This occurs in png decode. + +//===---------------------------------------------------------------------===// + +More load / store optimizations: +1) Look past instructions without side-effects (not load, store, branch, etc.) + when forming the list of loads / stores to optimize. + +2) Smarter register allocation? +We are probably missing some opportunities to use ldm / stm. Consider: + +ldr r5, [r0] +ldr r4, [r0, #4] + +This cannot be merged into a ldm. Perhaps we will need to do the transformation +before register allocation. Then teach the register allocator to allocate a +chunk of consecutive registers. + +3) Better representation for block transfer? This is from Olden/power: + + fldd d0, [r4] + fstd d0, [r4, #+32] + fldd d0, [r4, #+8] + fstd d0, [r4, #+40] + fldd d0, [r4, #+16] + fstd d0, [r4, #+48] + fldd d0, [r4, #+24] + fstd d0, [r4, #+56] + +If we can spare the registers, it would be better to use fldm and fstm here. +Need major register allocator enhancement though. + +4) Can we recognize the relative position of constantpool entries? i.e. Treat + + ldr r0, LCPI17_3 + ldr r1, LCPI17_4 + ldr r2, LCPI17_5 + + as + ldr r0, LCPI17 + ldr r1, LCPI17+4 + ldr r2, LCPI17+8 + + Then the ldr's can be combined into a single ldm. See Olden/power. + +Note for ARM v4 gcc uses ldmia to load a pair of 32-bit values to represent a +double 64-bit FP constant: + + adr r0, L6 + ldmia r0, {r0-r1} + + .align 2 +L6: + .long -858993459 + .long 1074318540 + +5) Can we make use of ldrd and strd? Instead of generating ldm / stm, use +ldrd/strd instead if there are only two destination registers that form an +odd/even pair. However, we probably would pay a penalty if the address is not +aligned on 8-byte boundary. This requires more information on load / store +nodes (and MI's?) then we currently carry. + +6) struct copies appear to be done field by field +instead of by words, at least sometimes: + +struct foo { int x; short s; char c1; char c2; }; +void cpy(struct foo*a, struct foo*b) { *a = *b; } + +llvm code (-O2) + ldrb r3, [r1, #+6] + ldr r2, [r1] + ldrb r12, [r1, #+7] + ldrh r1, [r1, #+4] + str r2, [r0] + strh r1, [r0, #+4] + strb r3, [r0, #+6] + strb r12, [r0, #+7] +gcc code (-O2) + ldmia r1, {r1-r2} + stmia r0, {r1-r2} + +In this benchmark poor handling of aggregate copies has shown up as +having a large effect on size, and possibly speed as well (we don't have +a good way to measure on ARM). + +//===---------------------------------------------------------------------===// + +* Consider this silly example: + +double bar(double x) { + double r = foo(3.1); + return x+r; +} + +_bar: + stmfd sp!, {r4, r5, r7, lr} + add r7, sp, #8 + mov r4, r0 + mov r5, r1 + fldd d0, LCPI1_0 + fmrrd r0, r1, d0 + bl _foo + fmdrr d0, r4, r5 + fmsr s2, r0 + fsitod d1, s2 + faddd d0, d1, d0 + fmrrd r0, r1, d0 + ldmfd sp!, {r4, r5, r7, pc} + +Ignore the prologue and epilogue stuff for a second. Note + mov r4, r0 + mov r5, r1 +the copys to callee-save registers and the fact they are only being used by the +fmdrr instruction. It would have been better had the fmdrr been scheduled +before the call and place the result in a callee-save DPR register. The two +mov ops would not have been necessary. + +//===---------------------------------------------------------------------===// + +Calling convention related stuff: + +* gcc's parameter passing implementation is terrible and we suffer as a result: + +e.g. +struct s { + double d1; + int s1; +}; + +void foo(struct s S) { + printf("%g, %d\n", S.d1, S.s1); +} + +'S' is passed via registers r0, r1, r2. But gcc stores them to the stack, and +then reload them to r1, r2, and r3 before issuing the call (r0 contains the +address of the format string): + + stmfd sp!, {r7, lr} + add r7, sp, #0 + sub sp, sp, #12 + stmia sp, {r0, r1, r2} + ldmia sp, {r1-r2} + ldr r0, L5 + ldr r3, [sp, #8] +L2: + add r0, pc, r0 + bl L_printf$stub + +Instead of a stmia, ldmia, and a ldr, wouldn't it be better to do three moves? + +* Return an aggregate type is even worse: + +e.g. +struct s foo(void) { + struct s S = {1.1, 2}; + return S; +} + + mov ip, r0 + ldr r0, L5 + sub sp, sp, #12 +L2: + add r0, pc, r0 + @ lr needed for prologue + ldmia r0, {r0, r1, r2} + stmia sp, {r0, r1, r2} + stmia ip, {r0, r1, r2} + mov r0, ip + add sp, sp, #12 + bx lr + +r0 (and later ip) is the hidden parameter from caller to store the value in. The +first ldmia loads the constants into r0, r1, r2. The last stmia stores r0, r1, +r2 into the address passed in. However, there is one additional stmia that +stores r0, r1, and r2 to some stack location. The store is dead. + +The llvm-gcc generated code looks like this: + +csretcc void %foo(%struct.s* %agg.result) { +entry: + %S = alloca %struct.s, align 4 ; <%struct.s*> [#uses=1] + %memtmp = alloca %struct.s ; <%struct.s*> [#uses=1] + cast %struct.s* %S to sbyte* ; :0 [#uses=2] + call void %llvm.memcpy.i32( sbyte* %0, sbyte* cast ({ double, int }* %C.0.904 to sbyte*), uint 12, uint 4 ) + cast %struct.s* %agg.result to sbyte* ; :1 [#uses=2] + call void %llvm.memcpy.i32( sbyte* %1, sbyte* %0, uint 12, uint 0 ) + cast %struct.s* %memtmp to sbyte* ; :2 [#uses=1] + call void %llvm.memcpy.i32( sbyte* %2, sbyte* %1, uint 12, uint 0 ) + ret void +} + +llc ends up issuing two memcpy's (the first memcpy becomes 3 loads from +constantpool). Perhaps we should 1) fix llvm-gcc so the memcpy is translated +into a number of load and stores, or 2) custom lower memcpy (of small size) to +be ldmia / stmia. I think option 2 is better but the current register +allocator cannot allocate a chunk of registers at a time. + +A feasible temporary solution is to use specific physical registers at the +lowering time for small (<= 4 words?) transfer size. + +* ARM CSRet calling convention requires the hidden argument to be returned by +the callee. + +//===---------------------------------------------------------------------===// + +We can definitely do a better job on BB placements to eliminate some branches. +It's very common to see llvm generated assembly code that looks like this: + +LBB3: + ... +LBB4: +... + beq LBB3 + b LBB2 + +If BB4 is the only predecessor of BB3, then we can emit BB3 after BB4. We can +then eliminate beq and and turn the unconditional branch to LBB2 to a bne. + +See McCat/18-imp/ComputeBoundingBoxes for an example. + +//===---------------------------------------------------------------------===// + +Register scavenging is now implemented. The example in the previous version +of this document produces optimal code at -O2. + +//===---------------------------------------------------------------------===// + +Pre-/post- indexed load / stores: + +1) We should not make the pre/post- indexed load/store transform if the base ptr +is guaranteed to be live beyond the load/store. This can happen if the base +ptr is live out of the block we are performing the optimization. e.g. + +mov r1, r2 +ldr r3, [r1], #4 +... + +vs. + +ldr r3, [r2] +add r1, r2, #4 +... + +In most cases, this is just a wasted optimization. However, sometimes it can +negatively impact the performance because two-address code is more restrictive +when it comes to scheduling. + +Unfortunately, liveout information is currently unavailable during DAG combine +time. + +2) Consider spliting a indexed load / store into a pair of add/sub + load/store + to solve #1 (in TwoAddressInstructionPass.cpp). + +3) Enhance LSR to generate more opportunities for indexed ops. + +4) Once we added support for multiple result patterns, write indexed loads + patterns instead of C++ instruction selection code. + +5) Use FLDM / FSTM to emulate indexed FP load / store. + +//===---------------------------------------------------------------------===// + +We should add i64 support to take advantage of the 64-bit load / stores. +We can add a pseudo i64 register class containing pseudo registers that are +register pairs. All other ops (e.g. add, sub) would be expanded as usual. + +We need to add pseudo instructions (i.e. gethi / getlo) to extract i32 registers +from the i64 register. These are single moves which can be eliminated if the +destination register is a sub-register of the source. We should implement proper +subreg support in the register allocator to coalesce these away. + +There are other minor issues such as multiple instructions for a spill / restore +/ move. + +//===---------------------------------------------------------------------===// + +Implement support for some more tricky ways to materialize immediates. For +example, to get 0xffff8000, we can use: + +mov r9, #&3f8000 +sub r9, r9, #&400000 + +//===---------------------------------------------------------------------===// + +We sometimes generate multiple add / sub instructions to update sp in prologue +and epilogue if the inc / dec value is too large to fit in a single immediate +operand. In some cases, perhaps it might be better to load the value from a +constantpool instead. + +//===---------------------------------------------------------------------===// + +GCC generates significantly better code for this function. + +int foo(int StackPtr, unsigned char *Line, unsigned char *Stack, int LineLen) { + int i = 0; + + if (StackPtr != 0) { + while (StackPtr != 0 && i < (((LineLen) < (32768))? (LineLen) : (32768))) + Line[i++] = Stack[--StackPtr]; + if (LineLen > 32768) + { + while (StackPtr != 0 && i < LineLen) + { + i++; + --StackPtr; + } + } + } + return StackPtr; +} + +//===---------------------------------------------------------------------===// + +This should compile to the mlas instruction: +int mlas(int x, int y, int z) { return ((x * y + z) < 0) ? 7 : 13; } + +//===---------------------------------------------------------------------===// + +At some point, we should triage these to see if they still apply to us: + +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19598 +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=18560 +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=27016 + +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11831 +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11826 +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11825 +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11824 +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11823 +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11820 +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=10982 + +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=10242 +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9831 +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9760 +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9759 +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9703 +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9702 +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9663 + +http://www.inf.u-szeged.hu/gcc-arm/ +http://citeseer.ist.psu.edu/debus04linktime.html + +//===---------------------------------------------------------------------===// + +gcc generates smaller code for this function at -O2 or -Os: + +void foo(signed char* p) { + if (*p == 3) + bar(); + else if (*p == 4) + baz(); + else if (*p == 5) + quux(); +} + +llvm decides it's a good idea to turn the repeated if...else into a +binary tree, as if it were a switch; the resulting code requires -1 +compare-and-branches when *p<=2 or *p==5, the same number if *p==4 +or *p>6, and +1 if *p==3. So it should be a speed win +(on balance). However, the revised code is larger, with 4 conditional +branches instead of 3. + +More seriously, there is a byte->word extend before +each comparison, where there should be only one, and the condition codes +are not remembered when the same two values are compared twice. + +//===---------------------------------------------------------------------===// + +More register scavenging work: + +1. Use the register scavenger to track frame index materialized into registers + (those that do not fit in addressing modes) to allow reuse in the same BB. +2. Finish scavenging for Thumb. +3. We know some spills and restores are unnecessary. The issue is once live + intervals are merged, they are not never split. So every def is spilled + and every use requires a restore if the register allocator decides the + resulting live interval is not assigned a physical register. It may be + possible (with the help of the scavenger) to turn some spill / restore + pairs into register copies. + +//===---------------------------------------------------------------------===// + +More LSR enhancements possible: + +1. Teach LSR about pre- and post- indexed ops to allow iv increment be merged + in a load / store. +2. Allow iv reuse even when a type conversion is required. For example, i8 + and i32 load / store addressing modes are identical. + + +//===---------------------------------------------------------------------===// + +This: + +int foo(int a, int b, int c, int d) { + long long acc = (long long)a * (long long)b; + acc += (long long)c * (long long)d; + return (int)(acc >> 32); +} + +Should compile to use SMLAL (Signed Multiply Accumulate Long) which multiplies +two signed 32-bit values to produce a 64-bit value, and accumulates this with +a 64-bit value. + +We currently get this with both v4 and v6: + +_foo: + smull r1, r0, r1, r0 + smull r3, r2, r3, r2 + adds r3, r3, r1 + adc r0, r2, r0 + bx lr + +//===---------------------------------------------------------------------===// + +This: + #include + std::pair full_add(unsigned a, unsigned b) + { return std::make_pair(a + b, a + b < a); } + bool no_overflow(unsigned a, unsigned b) + { return !full_add(a, b).second; } + +Should compile to: + +_Z8full_addjj: + adds r2, r1, r2 + movcc r1, #0 + movcs r1, #1 + str r2, [r0, #0] + strb r1, [r0, #4] + mov pc, lr + +_Z11no_overflowjj: + cmn r0, r1 + movcs r0, #0 + movcc r0, #1 + mov pc, lr + +not: + +__Z8full_addjj: + add r3, r2, r1 + str r3, [r0] + mov r2, #1 + mov r12, #0 + cmp r3, r1 + movlo r12, r2 + str r12, [r0, #+4] + bx lr +__Z11no_overflowjj: + add r3, r1, r0 + mov r2, #1 + mov r1, #0 + cmp r3, r0 + movhs r1, r2 + mov r0, r1 + bx lr + +//===---------------------------------------------------------------------===// + diff --git a/lib/Target/Alpha/Alpha.h b/lib/Target/Alpha/Alpha.h new file mode 100644 index 000000000000..281517614935 --- /dev/null +++ b/lib/Target/Alpha/Alpha.h @@ -0,0 +1,51 @@ +//===-- Alpha.h - Top-level interface for Alpha representation --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the entry points for global functions defined in the LLVM +// Alpha back-end. +// +//===----------------------------------------------------------------------===// + +#ifndef TARGET_ALPHA_H +#define TARGET_ALPHA_H + +#include "llvm/Target/TargetMachine.h" + +namespace llvm { + + class AlphaTargetMachine; + class FunctionPass; + class MachineCodeEmitter; + class raw_ostream; + + FunctionPass *createAlphaISelDag(AlphaTargetMachine &TM); + FunctionPass *createAlphaCodePrinterPass(raw_ostream &OS, + TargetMachine &TM, + CodeGenOpt::Level OptLevel, + bool Verbose); + FunctionPass *createAlphaPatternInstructionSelector(TargetMachine &TM); + FunctionPass *createAlphaCodeEmitterPass(AlphaTargetMachine &TM, + MachineCodeEmitter &MCE); + FunctionPass *createAlphaJITCodeEmitterPass(AlphaTargetMachine &TM, + JITCodeEmitter &JCE); + FunctionPass *createAlphaLLRPPass(AlphaTargetMachine &tm); + FunctionPass *createAlphaBranchSelectionPass(); + +} // end namespace llvm; + +// Defines symbolic names for Alpha registers. This defines a mapping from +// register name to register number. +// +#include "AlphaGenRegisterNames.inc" + +// Defines symbolic names for the Alpha instructions. +// +#include "AlphaGenInstrNames.inc" + +#endif diff --git a/lib/Target/Alpha/Alpha.td b/lib/Target/Alpha/Alpha.td new file mode 100644 index 000000000000..e3748c6a09f3 --- /dev/null +++ b/lib/Target/Alpha/Alpha.td @@ -0,0 +1,66 @@ +//===- Alpha.td - Describe the Alpha Target Machine --------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// +//===----------------------------------------------------------------------===// + +// Get the target-independent interfaces which we are implementing... +// +include "llvm/Target/Target.td" + +//Alpha is little endian + +//===----------------------------------------------------------------------===// +// Subtarget Features +//===----------------------------------------------------------------------===// + +def FeatureCIX : SubtargetFeature<"cix", "HasCT", "true", + "Enable CIX extentions">; + +//===----------------------------------------------------------------------===// +// Register File Description +//===----------------------------------------------------------------------===// + +include "AlphaRegisterInfo.td" + +//===----------------------------------------------------------------------===// +// Schedule Description +//===----------------------------------------------------------------------===// + +include "AlphaSchedule.td" + +//===----------------------------------------------------------------------===// +// Instruction Descriptions +//===----------------------------------------------------------------------===// + +include "AlphaInstrInfo.td" + +def AlphaInstrInfo : InstrInfo { + // Define how we want to layout our target-specific information field. + // let TSFlagsFields = []; + // let TSFlagsShifts = []; +} + +//===----------------------------------------------------------------------===// +// Alpha Processor Definitions +//===----------------------------------------------------------------------===// + +def : Processor<"generic", Alpha21264Itineraries, []>; +def : Processor<"ev6" , Alpha21264Itineraries, []>; +def : Processor<"ev67" , Alpha21264Itineraries, [FeatureCIX]>; + +//===----------------------------------------------------------------------===// +// The Alpha Target +//===----------------------------------------------------------------------===// + + +def Alpha : Target { + // Pull in Instruction Info: + let InstructionSet = AlphaInstrInfo; +} diff --git a/lib/Target/Alpha/AlphaBranchSelector.cpp b/lib/Target/Alpha/AlphaBranchSelector.cpp new file mode 100644 index 000000000000..aca8ca734897 --- /dev/null +++ b/lib/Target/Alpha/AlphaBranchSelector.cpp @@ -0,0 +1,67 @@ +//===-- AlphaBranchSelector.cpp - Convert Pseudo branchs ----------*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Replace Pseudo COND_BRANCH_* with their appropriate real branch +// Simplified version of the PPC Branch Selector +// +//===----------------------------------------------------------------------===// + +#include "Alpha.h" +#include "AlphaInstrInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetAsmInfo.h" +using namespace llvm; + +namespace { + struct VISIBILITY_HIDDEN AlphaBSel : public MachineFunctionPass { + static char ID; + AlphaBSel() : MachineFunctionPass(&ID) {} + + virtual bool runOnMachineFunction(MachineFunction &Fn); + + virtual const char *getPassName() const { + return "Alpha Branch Selection"; + } + }; + char AlphaBSel::ID = 0; +} + +/// createAlphaBranchSelectionPass - returns an instance of the Branch Selection +/// Pass +/// +FunctionPass *llvm::createAlphaBranchSelectionPass() { + return new AlphaBSel(); +} + +bool AlphaBSel::runOnMachineFunction(MachineFunction &Fn) { + + for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; + ++MFI) { + MachineBasicBlock *MBB = MFI; + + for (MachineBasicBlock::iterator MBBI = MBB->begin(), EE = MBB->end(); + MBBI != EE; ++MBBI) { + if (MBBI->getOpcode() == Alpha::COND_BRANCH_I || + MBBI->getOpcode() == Alpha::COND_BRANCH_F) { + + // condbranch operands: + // 0. bc opcode + // 1. reg + // 2. target MBB + const TargetInstrInfo *TII = Fn.getTarget().getInstrInfo(); + MBBI->setDesc(TII->get(MBBI->getOperand(0).getImm())); + } + } + } + + return true; +} + diff --git a/lib/Target/Alpha/AlphaCodeEmitter.cpp b/lib/Target/Alpha/AlphaCodeEmitter.cpp new file mode 100644 index 000000000000..f50f007c2076 --- /dev/null +++ b/lib/Target/Alpha/AlphaCodeEmitter.cpp @@ -0,0 +1,242 @@ +//===-- Alpha/AlphaCodeEmitter.cpp - Convert Alpha code to machine code ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the pass that transforms the Alpha machine instructions +// into relocatable machine code. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "alpha-emitter" +#include "AlphaTargetMachine.h" +#include "AlphaRelocations.h" +#include "Alpha.h" +#include "llvm/PassManager.h" +#include "llvm/CodeGen/MachineCodeEmitter.h" +#include "llvm/CodeGen/JITCodeEmitter.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Function.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +using namespace llvm; + +namespace { + + class AlphaCodeEmitter { + MachineCodeEmitter &MCE; + public: + AlphaCodeEmitter(MachineCodeEmitter &mce) : MCE(mce) {} + + /// getBinaryCodeForInstr - This function, generated by the + /// CodeEmitterGenerator using TableGen, produces the binary encoding for + /// machine instructions. + + unsigned getBinaryCodeForInstr(const MachineInstr &MI); + + /// getMachineOpValue - evaluates the MachineOperand of a given MachineInstr + + unsigned getMachineOpValue(const MachineInstr &MI, + const MachineOperand &MO); + }; + + template + class VISIBILITY_HIDDEN Emitter : public MachineFunctionPass, + public AlphaCodeEmitter + { + const AlphaInstrInfo *II; + TargetMachine &TM; + CodeEmitter &MCE; + + public: + static char ID; + explicit Emitter(TargetMachine &tm, CodeEmitter &mce) + : MachineFunctionPass(&ID), AlphaCodeEmitter(mce), + II(0), TM(tm), MCE(mce) {} + Emitter(TargetMachine &tm, CodeEmitter &mce, const AlphaInstrInfo& ii) + : MachineFunctionPass(&ID), AlphaCodeEmitter(mce), + II(&ii), TM(tm), MCE(mce) {} + + bool runOnMachineFunction(MachineFunction &MF); + + virtual const char *getPassName() const { + return "Alpha Machine Code Emitter"; + } + + void emitInstruction(const MachineInstr &MI); + + private: + void emitBasicBlock(MachineBasicBlock &MBB); + }; + + template + char Emitter::ID = 0; +} + +/// createAlphaCodeEmitterPass - Return a pass that emits the collected Alpha +/// code to the specified MCE object. + +FunctionPass *llvm::createAlphaCodeEmitterPass(AlphaTargetMachine &TM, + MachineCodeEmitter &MCE) { + return new Emitter(TM, MCE); +} + +FunctionPass *llvm::createAlphaJITCodeEmitterPass(AlphaTargetMachine &TM, + JITCodeEmitter &JCE) { + return new Emitter(TM, JCE); +} + +template +bool Emitter::runOnMachineFunction(MachineFunction &MF) { + II = ((AlphaTargetMachine&)MF.getTarget()).getInstrInfo(); + + do { + MCE.startFunction(MF); + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) + emitBasicBlock(*I); + } while (MCE.finishFunction(MF)); + + return false; +} + +template +void Emitter::emitBasicBlock(MachineBasicBlock &MBB) { + MCE.StartMachineBasicBlock(&MBB); + for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); + I != E; ++I) { + const MachineInstr &MI = *I; + switch(MI.getOpcode()) { + default: + MCE.emitWordLE(getBinaryCodeForInstr(*I)); + break; + case Alpha::ALTENT: + case Alpha::PCLABEL: + case Alpha::MEMLABEL: + case TargetInstrInfo::IMPLICIT_DEF: + break; //skip these + } + } +} + +static unsigned getAlphaRegNumber(unsigned Reg) { + switch (Reg) { + case Alpha::R0 : case Alpha::F0 : return 0; + case Alpha::R1 : case Alpha::F1 : return 1; + case Alpha::R2 : case Alpha::F2 : return 2; + case Alpha::R3 : case Alpha::F3 : return 3; + case Alpha::R4 : case Alpha::F4 : return 4; + case Alpha::R5 : case Alpha::F5 : return 5; + case Alpha::R6 : case Alpha::F6 : return 6; + case Alpha::R7 : case Alpha::F7 : return 7; + case Alpha::R8 : case Alpha::F8 : return 8; + case Alpha::R9 : case Alpha::F9 : return 9; + case Alpha::R10 : case Alpha::F10 : return 10; + case Alpha::R11 : case Alpha::F11 : return 11; + case Alpha::R12 : case Alpha::F12 : return 12; + case Alpha::R13 : case Alpha::F13 : return 13; + case Alpha::R14 : case Alpha::F14 : return 14; + case Alpha::R15 : case Alpha::F15 : return 15; + case Alpha::R16 : case Alpha::F16 : return 16; + case Alpha::R17 : case Alpha::F17 : return 17; + case Alpha::R18 : case Alpha::F18 : return 18; + case Alpha::R19 : case Alpha::F19 : return 19; + case Alpha::R20 : case Alpha::F20 : return 20; + case Alpha::R21 : case Alpha::F21 : return 21; + case Alpha::R22 : case Alpha::F22 : return 22; + case Alpha::R23 : case Alpha::F23 : return 23; + case Alpha::R24 : case Alpha::F24 : return 24; + case Alpha::R25 : case Alpha::F25 : return 25; + case Alpha::R26 : case Alpha::F26 : return 26; + case Alpha::R27 : case Alpha::F27 : return 27; + case Alpha::R28 : case Alpha::F28 : return 28; + case Alpha::R29 : case Alpha::F29 : return 29; + case Alpha::R30 : case Alpha::F30 : return 30; + case Alpha::R31 : case Alpha::F31 : return 31; + default: + assert(0 && "Unhandled reg"); + abort(); + } +} + +unsigned AlphaCodeEmitter::getMachineOpValue(const MachineInstr &MI, + const MachineOperand &MO) { + + unsigned rv = 0; // Return value; defaults to 0 for unhandled cases + // or things that get fixed up later by the JIT. + + if (MO.isReg()) { + rv = getAlphaRegNumber(MO.getReg()); + } else if (MO.isImm()) { + rv = MO.getImm(); + } else if (MO.isGlobal() || MO.isSymbol() || MO.isCPI()) { + DOUT << MO << " is a relocated op for " << MI << "\n"; + unsigned Reloc = 0; + int Offset = 0; + bool useGOT = false; + switch (MI.getOpcode()) { + case Alpha::BSR: + Reloc = Alpha::reloc_bsr; + break; + case Alpha::LDLr: + case Alpha::LDQr: + case Alpha::LDBUr: + case Alpha::LDWUr: + case Alpha::LDSr: + case Alpha::LDTr: + case Alpha::LDAr: + case Alpha::STQr: + case Alpha::STLr: + case Alpha::STWr: + case Alpha::STBr: + case Alpha::STSr: + case Alpha::STTr: + Reloc = Alpha::reloc_gprellow; + break; + case Alpha::LDAHr: + Reloc = Alpha::reloc_gprelhigh; + break; + case Alpha::LDQl: + Reloc = Alpha::reloc_literal; + useGOT = true; + break; + case Alpha::LDAg: + case Alpha::LDAHg: + Reloc = Alpha::reloc_gpdist; + Offset = MI.getOperand(3).getImm(); + break; + default: + assert(0 && "unknown relocatable instruction"); + abort(); + } + if (MO.isGlobal()) + MCE.addRelocation(MachineRelocation::getGV(MCE.getCurrentPCOffset(), + Reloc, MO.getGlobal(), Offset, + isa(MO.getGlobal()), + useGOT)); + else if (MO.isSymbol()) + MCE.addRelocation(MachineRelocation::getExtSym(MCE.getCurrentPCOffset(), + Reloc, MO.getSymbolName(), + Offset, true)); + else + MCE.addRelocation(MachineRelocation::getConstPool(MCE.getCurrentPCOffset(), + Reloc, MO.getIndex(), Offset)); + } else if (MO.isMBB()) { + MCE.addRelocation(MachineRelocation::getBB(MCE.getCurrentPCOffset(), + Alpha::reloc_bsr, MO.getMBB())); + }else { + cerr << "ERROR: Unknown type of MachineOperand: " << MO << "\n"; + abort(); + } + + return rv; +} + +#include "AlphaGenCodeEmitter.inc" + + diff --git a/lib/Target/Alpha/AlphaISelDAGToDAG.cpp b/lib/Target/Alpha/AlphaISelDAGToDAG.cpp new file mode 100644 index 000000000000..affcd3e7fec8 --- /dev/null +++ b/lib/Target/Alpha/AlphaISelDAGToDAG.cpp @@ -0,0 +1,553 @@ +//===-- AlphaISelDAGToDAG.cpp - Alpha pattern matching inst selector ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines a pattern matching instruction selector for Alpha, +// converting from a legalized dag to a Alpha dag. +// +//===----------------------------------------------------------------------===// + +#include "Alpha.h" +#include "AlphaTargetMachine.h" +#include "AlphaISelLowering.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/GlobalValue.h" +#include "llvm/Intrinsics.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" +#include +using namespace llvm; + +namespace { + + //===--------------------------------------------------------------------===// + /// AlphaDAGToDAGISel - Alpha specific code to select Alpha machine + /// instructions for SelectionDAG operations. + class AlphaDAGToDAGISel : public SelectionDAGISel { + static const int64_t IMM_LOW = -32768; + static const int64_t IMM_HIGH = 32767; + static const int64_t IMM_MULT = 65536; + static const int64_t IMM_FULLHIGH = IMM_HIGH + IMM_HIGH * IMM_MULT; + static const int64_t IMM_FULLLOW = IMM_LOW + IMM_LOW * IMM_MULT; + + static int64_t get_ldah16(int64_t x) { + int64_t y = x / IMM_MULT; + if (x % IMM_MULT > IMM_HIGH) + ++y; + return y; + } + + static int64_t get_lda16(int64_t x) { + return x - get_ldah16(x) * IMM_MULT; + } + + /// get_zapImm - Return a zap mask if X is a valid immediate for a zapnot + /// instruction (if not, return 0). Note that this code accepts partial + /// zap masks. For example (and LHS, 1) is a valid zap, as long we know + /// that the bits 1-7 of LHS are already zero. If LHS is non-null, we are + /// in checking mode. If LHS is null, we assume that the mask has already + /// been validated before. + uint64_t get_zapImm(SDValue LHS, uint64_t Constant) { + uint64_t BitsToCheck = 0; + unsigned Result = 0; + for (unsigned i = 0; i != 8; ++i) { + if (((Constant >> 8*i) & 0xFF) == 0) { + // nothing to do. + } else { + Result |= 1 << i; + if (((Constant >> 8*i) & 0xFF) == 0xFF) { + // If the entire byte is set, zapnot the byte. + } else if (LHS.getNode() == 0) { + // Otherwise, if the mask was previously validated, we know its okay + // to zapnot this entire byte even though all the bits aren't set. + } else { + // Otherwise we don't know that the it's okay to zapnot this entire + // byte. Only do this iff we can prove that the missing bits are + // already null, so the bytezap doesn't need to really null them. + BitsToCheck |= ~Constant & (0xFF << 8*i); + } + } + } + + // If there are missing bits in a byte (for example, X & 0xEF00), check to + // see if the missing bits (0x1000) are already known zero if not, the zap + // isn't okay to do, as it won't clear all the required bits. + if (BitsToCheck && + !CurDAG->MaskedValueIsZero(LHS, + APInt(LHS.getValueSizeInBits(), + BitsToCheck))) + return 0; + + return Result; + } + + static uint64_t get_zapImm(uint64_t x) { + unsigned build = 0; + for(int i = 0; i != 8; ++i) { + if ((x & 0x00FF) == 0x00FF) + build |= 1 << i; + else if ((x & 0x00FF) != 0) + return 0; + x >>= 8; + } + return build; + } + + + static uint64_t getNearPower2(uint64_t x) { + if (!x) return 0; + unsigned at = CountLeadingZeros_64(x); + uint64_t complow = 1 << (63 - at); + uint64_t comphigh = 1 << (64 - at); + //cerr << x << ":" << complow << ":" << comphigh << "\n"; + if (abs(complow - x) <= abs(comphigh - x)) + return complow; + else + return comphigh; + } + + static bool chkRemNearPower2(uint64_t x, uint64_t r, bool swap) { + uint64_t y = getNearPower2(x); + if (swap) + return (y - x) == r; + else + return (x - y) == r; + } + + static bool isFPZ(SDValue N) { + ConstantFPSDNode *CN = dyn_cast(N); + return (CN && (CN->getValueAPF().isZero())); + } + static bool isFPZn(SDValue N) { + ConstantFPSDNode *CN = dyn_cast(N); + return (CN && CN->getValueAPF().isNegZero()); + } + static bool isFPZp(SDValue N) { + ConstantFPSDNode *CN = dyn_cast(N); + return (CN && CN->getValueAPF().isPosZero()); + } + + public: + explicit AlphaDAGToDAGISel(AlphaTargetMachine &TM) + : SelectionDAGISel(TM) + {} + + /// getI64Imm - Return a target constant with the specified value, of type + /// i64. + inline SDValue getI64Imm(int64_t Imm) { + return CurDAG->getTargetConstant(Imm, MVT::i64); + } + + // Select - Convert the specified operand from a target-independent to a + // target-specific node if it hasn't already been changed. + SDNode *Select(SDValue Op); + + /// InstructionSelect - This callback is invoked by + /// SelectionDAGISel when it has created a SelectionDAG for us to codegen. + virtual void InstructionSelect(); + + virtual const char *getPassName() const { + return "Alpha DAG->DAG Pattern Instruction Selection"; + } + + /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for + /// inline asm expressions. + virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, + char ConstraintCode, + std::vector &OutOps) { + SDValue Op0; + switch (ConstraintCode) { + default: return true; + case 'm': // memory + Op0 = Op; + break; + } + + OutOps.push_back(Op0); + return false; + } + +// Include the pieces autogenerated from the target description. +#include "AlphaGenDAGISel.inc" + +private: + SDValue getGlobalBaseReg(); + SDValue getGlobalRetAddr(); + void SelectCALL(SDValue Op); + + }; +} + +/// getGlobalBaseReg - Output the instructions required to put the +/// GOT address into a register. +/// +SDValue AlphaDAGToDAGISel::getGlobalBaseReg() { + unsigned GP = 0; + for(MachineRegisterInfo::livein_iterator ii = RegInfo->livein_begin(), + ee = RegInfo->livein_end(); ii != ee; ++ii) + if (ii->first == Alpha::R29) { + GP = ii->second; + break; + } + assert(GP && "GOT PTR not in liveins"); + // FIXME is there anywhere sensible to get a DebugLoc here? + return CurDAG->getCopyFromReg(CurDAG->getEntryNode(), + DebugLoc::getUnknownLoc(), GP, MVT::i64); +} + +/// getRASaveReg - Grab the return address +/// +SDValue AlphaDAGToDAGISel::getGlobalRetAddr() { + unsigned RA = 0; + for(MachineRegisterInfo::livein_iterator ii = RegInfo->livein_begin(), + ee = RegInfo->livein_end(); ii != ee; ++ii) + if (ii->first == Alpha::R26) { + RA = ii->second; + break; + } + assert(RA && "RA PTR not in liveins"); + // FIXME is there anywhere sensible to get a DebugLoc here? + return CurDAG->getCopyFromReg(CurDAG->getEntryNode(), + DebugLoc::getUnknownLoc(), RA, MVT::i64); +} + +/// InstructionSelect - This callback is invoked by +/// SelectionDAGISel when it has created a SelectionDAG for us to codegen. +void AlphaDAGToDAGISel::InstructionSelect() { + DEBUG(BB->dump()); + + // Select target instructions for the DAG. + SelectRoot(*CurDAG); + CurDAG->RemoveDeadNodes(); +} + +// Select - Convert the specified operand from a target-independent to a +// target-specific node if it hasn't already been changed. +SDNode *AlphaDAGToDAGISel::Select(SDValue Op) { + SDNode *N = Op.getNode(); + if (N->isMachineOpcode()) { + return NULL; // Already selected. + } + DebugLoc dl = N->getDebugLoc(); + + switch (N->getOpcode()) { + default: break; + case AlphaISD::CALL: + SelectCALL(Op); + return NULL; + + case ISD::FrameIndex: { + int FI = cast(N)->getIndex(); + return CurDAG->SelectNodeTo(N, Alpha::LDA, MVT::i64, + CurDAG->getTargetFrameIndex(FI, MVT::i32), + getI64Imm(0)); + } + case ISD::GLOBAL_OFFSET_TABLE: { + SDValue Result = getGlobalBaseReg(); + ReplaceUses(Op, Result); + return NULL; + } + case AlphaISD::GlobalRetAddr: { + SDValue Result = getGlobalRetAddr(); + ReplaceUses(Op, Result); + return NULL; + } + + case AlphaISD::DivCall: { + SDValue Chain = CurDAG->getEntryNode(); + SDValue N0 = Op.getOperand(0); + SDValue N1 = Op.getOperand(1); + SDValue N2 = Op.getOperand(2); + Chain = CurDAG->getCopyToReg(Chain, dl, Alpha::R24, N1, + SDValue(0,0)); + Chain = CurDAG->getCopyToReg(Chain, dl, Alpha::R25, N2, + Chain.getValue(1)); + Chain = CurDAG->getCopyToReg(Chain, dl, Alpha::R27, N0, + Chain.getValue(1)); + SDNode *CNode = + CurDAG->getTargetNode(Alpha::JSRs, dl, MVT::Other, MVT::Flag, + Chain, Chain.getValue(1)); + Chain = CurDAG->getCopyFromReg(Chain, dl, Alpha::R27, MVT::i64, + SDValue(CNode, 1)); + return CurDAG->SelectNodeTo(N, Alpha::BISr, MVT::i64, Chain, Chain); + } + + case ISD::READCYCLECOUNTER: { + SDValue Chain = N->getOperand(0); + return CurDAG->getTargetNode(Alpha::RPCC, dl, MVT::i64, MVT::Other, + Chain); + } + + case ISD::Constant: { + uint64_t uval = cast(N)->getZExtValue(); + + if (uval == 0) { + SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, + Alpha::R31, MVT::i64); + ReplaceUses(Op, Result); + return NULL; + } + + int64_t val = (int64_t)uval; + int32_t val32 = (int32_t)val; + if (val <= IMM_HIGH + IMM_HIGH * IMM_MULT && + val >= IMM_LOW + IMM_LOW * IMM_MULT) + break; //(LDAH (LDA)) + if ((uval >> 32) == 0 && //empty upper bits + val32 <= IMM_HIGH + IMM_HIGH * IMM_MULT) + // val32 >= IMM_LOW + IMM_LOW * IMM_MULT) //always true + break; //(zext (LDAH (LDA))) + //Else use the constant pool + ConstantInt *C = ConstantInt::get(Type::Int64Ty, uval); + SDValue CPI = CurDAG->getTargetConstantPool(C, MVT::i64); + SDNode *Tmp = CurDAG->getTargetNode(Alpha::LDAHr, dl, MVT::i64, CPI, + getGlobalBaseReg()); + return CurDAG->SelectNodeTo(N, Alpha::LDQr, MVT::i64, MVT::Other, + CPI, SDValue(Tmp, 0), CurDAG->getEntryNode()); + } + case ISD::TargetConstantFP: + case ISD::ConstantFP: { + ConstantFPSDNode *CN = cast(N); + bool isDouble = N->getValueType(0) == MVT::f64; + MVT T = isDouble ? MVT::f64 : MVT::f32; + if (CN->getValueAPF().isPosZero()) { + return CurDAG->SelectNodeTo(N, isDouble ? Alpha::CPYST : Alpha::CPYSS, + T, CurDAG->getRegister(Alpha::F31, T), + CurDAG->getRegister(Alpha::F31, T)); + } else if (CN->getValueAPF().isNegZero()) { + return CurDAG->SelectNodeTo(N, isDouble ? Alpha::CPYSNT : Alpha::CPYSNS, + T, CurDAG->getRegister(Alpha::F31, T), + CurDAG->getRegister(Alpha::F31, T)); + } else { + abort(); + } + break; + } + + case ISD::SETCC: + if (N->getOperand(0).getNode()->getValueType(0).isFloatingPoint()) { + ISD::CondCode CC = cast(N->getOperand(2))->get(); + + unsigned Opc = Alpha::WTF; + bool rev = false; + bool inv = false; + switch(CC) { + default: DEBUG(N->dump(CurDAG)); assert(0 && "Unknown FP comparison!"); + case ISD::SETEQ: case ISD::SETOEQ: case ISD::SETUEQ: + Opc = Alpha::CMPTEQ; break; + case ISD::SETLT: case ISD::SETOLT: case ISD::SETULT: + Opc = Alpha::CMPTLT; break; + case ISD::SETLE: case ISD::SETOLE: case ISD::SETULE: + Opc = Alpha::CMPTLE; break; + case ISD::SETGT: case ISD::SETOGT: case ISD::SETUGT: + Opc = Alpha::CMPTLT; rev = true; break; + case ISD::SETGE: case ISD::SETOGE: case ISD::SETUGE: + Opc = Alpha::CMPTLE; rev = true; break; + case ISD::SETNE: case ISD::SETONE: case ISD::SETUNE: + Opc = Alpha::CMPTEQ; inv = true; break; + case ISD::SETO: + Opc = Alpha::CMPTUN; inv = true; break; + case ISD::SETUO: + Opc = Alpha::CMPTUN; break; + }; + SDValue tmp1 = N->getOperand(rev?1:0); + SDValue tmp2 = N->getOperand(rev?0:1); + SDNode *cmp = CurDAG->getTargetNode(Opc, dl, MVT::f64, tmp1, tmp2); + if (inv) + cmp = CurDAG->getTargetNode(Alpha::CMPTEQ, dl, + MVT::f64, SDValue(cmp, 0), + CurDAG->getRegister(Alpha::F31, MVT::f64)); + switch(CC) { + case ISD::SETUEQ: case ISD::SETULT: case ISD::SETULE: + case ISD::SETUNE: case ISD::SETUGT: case ISD::SETUGE: + { + SDNode* cmp2 = CurDAG->getTargetNode(Alpha::CMPTUN, dl, MVT::f64, + tmp1, tmp2); + cmp = CurDAG->getTargetNode(Alpha::ADDT, dl, MVT::f64, + SDValue(cmp2, 0), SDValue(cmp, 0)); + break; + } + default: break; + } + + SDNode* LD = CurDAG->getTargetNode(Alpha::FTOIT, dl, + MVT::i64, SDValue(cmp, 0)); + return CurDAG->getTargetNode(Alpha::CMPULT, dl, MVT::i64, + CurDAG->getRegister(Alpha::R31, MVT::i64), + SDValue(LD,0)); + } + break; + + case ISD::SELECT: + if (N->getValueType(0).isFloatingPoint() && + (N->getOperand(0).getOpcode() != ISD::SETCC || + !N->getOperand(0).getOperand(1).getValueType().isFloatingPoint())) { + //This should be the condition not covered by the Patterns + //FIXME: Don't have SelectCode die, but rather return something testable + // so that things like this can be caught in fall though code + //move int to fp + bool isDouble = N->getValueType(0) == MVT::f64; + SDValue cond = N->getOperand(0); + SDValue TV = N->getOperand(1); + SDValue FV = N->getOperand(2); + + SDNode* LD = CurDAG->getTargetNode(Alpha::ITOFT, dl, MVT::f64, cond); + return CurDAG->getTargetNode(isDouble?Alpha::FCMOVNET:Alpha::FCMOVNES, + dl, MVT::f64, FV, TV, SDValue(LD,0)); + } + break; + + case ISD::AND: { + ConstantSDNode* SC = NULL; + ConstantSDNode* MC = NULL; + if (N->getOperand(0).getOpcode() == ISD::SRL && + (MC = dyn_cast(N->getOperand(1))) && + (SC = dyn_cast(N->getOperand(0).getOperand(1)))) { + uint64_t sval = SC->getZExtValue(); + uint64_t mval = MC->getZExtValue(); + // If the result is a zap, let the autogened stuff handle it. + if (get_zapImm(N->getOperand(0), mval)) + break; + // given mask X, and shift S, we want to see if there is any zap in the + // mask if we play around with the botton S bits + uint64_t dontcare = (~0ULL) >> (64 - sval); + uint64_t mask = mval << sval; + + if (get_zapImm(mask | dontcare)) + mask = mask | dontcare; + + if (get_zapImm(mask)) { + SDValue Z = + SDValue(CurDAG->getTargetNode(Alpha::ZAPNOTi, dl, MVT::i64, + N->getOperand(0).getOperand(0), + getI64Imm(get_zapImm(mask))), 0); + return CurDAG->getTargetNode(Alpha::SRLr, dl, MVT::i64, Z, + getI64Imm(sval)); + } + } + break; + } + + } + + return SelectCode(Op); +} + +void AlphaDAGToDAGISel::SelectCALL(SDValue Op) { + //TODO: add flag stuff to prevent nondeturministic breakage! + + SDNode *N = Op.getNode(); + SDValue Chain = N->getOperand(0); + SDValue Addr = N->getOperand(1); + SDValue InFlag(0,0); // Null incoming flag value. + DebugLoc dl = N->getDebugLoc(); + + std::vector CallOperands; + std::vector TypeOperands; + + //grab the arguments + for(int i = 2, e = N->getNumOperands(); i < e; ++i) { + TypeOperands.push_back(N->getOperand(i).getValueType()); + CallOperands.push_back(N->getOperand(i)); + } + int count = N->getNumOperands() - 2; + + static const unsigned args_int[] = {Alpha::R16, Alpha::R17, Alpha::R18, + Alpha::R19, Alpha::R20, Alpha::R21}; + static const unsigned args_float[] = {Alpha::F16, Alpha::F17, Alpha::F18, + Alpha::F19, Alpha::F20, Alpha::F21}; + + for (int i = 6; i < count; ++i) { + unsigned Opc = Alpha::WTF; + if (TypeOperands[i].isInteger()) { + Opc = Alpha::STQ; + } else if (TypeOperands[i] == MVT::f32) { + Opc = Alpha::STS; + } else if (TypeOperands[i] == MVT::f64) { + Opc = Alpha::STT; + } else + assert(0 && "Unknown operand"); + + SDValue Ops[] = { CallOperands[i], getI64Imm((i - 6) * 8), + CurDAG->getCopyFromReg(Chain, dl, Alpha::R30, MVT::i64), + Chain }; + Chain = SDValue(CurDAG->getTargetNode(Opc, dl, MVT::Other, Ops, 4), 0); + } + for (int i = 0; i < std::min(6, count); ++i) { + if (TypeOperands[i].isInteger()) { + Chain = CurDAG->getCopyToReg(Chain, dl, args_int[i], + CallOperands[i], InFlag); + InFlag = Chain.getValue(1); + } else if (TypeOperands[i] == MVT::f32 || TypeOperands[i] == MVT::f64) { + Chain = CurDAG->getCopyToReg(Chain, dl, args_float[i], + CallOperands[i], InFlag); + InFlag = Chain.getValue(1); + } else + assert(0 && "Unknown operand"); + } + + // Finally, once everything is in registers to pass to the call, emit the + // call itself. + if (Addr.getOpcode() == AlphaISD::GPRelLo) { + SDValue GOT = getGlobalBaseReg(); + Chain = CurDAG->getCopyToReg(Chain, dl, Alpha::R29, GOT, InFlag); + InFlag = Chain.getValue(1); + Chain = SDValue(CurDAG->getTargetNode(Alpha::BSR, dl, MVT::Other, + MVT::Flag, Addr.getOperand(0), + Chain, InFlag), 0); + } else { + Chain = CurDAG->getCopyToReg(Chain, dl, Alpha::R27, Addr, InFlag); + InFlag = Chain.getValue(1); + Chain = SDValue(CurDAG->getTargetNode(Alpha::JSR, dl, MVT::Other, + MVT::Flag, Chain, InFlag), 0); + } + InFlag = Chain.getValue(1); + + std::vector CallResults; + + switch (N->getValueType(0).getSimpleVT()) { + default: assert(0 && "Unexpected ret value!"); + case MVT::Other: break; + case MVT::i64: + Chain = CurDAG->getCopyFromReg(Chain, dl, + Alpha::R0, MVT::i64, InFlag).getValue(1); + CallResults.push_back(Chain.getValue(0)); + break; + case MVT::f32: + Chain = CurDAG->getCopyFromReg(Chain, dl, + Alpha::F0, MVT::f32, InFlag).getValue(1); + CallResults.push_back(Chain.getValue(0)); + break; + case MVT::f64: + Chain = CurDAG->getCopyFromReg(Chain, dl, + Alpha::F0, MVT::f64, InFlag).getValue(1); + CallResults.push_back(Chain.getValue(0)); + break; + } + + CallResults.push_back(Chain); + for (unsigned i = 0, e = CallResults.size(); i != e; ++i) + ReplaceUses(Op.getValue(i), CallResults[i]); +} + + +/// createAlphaISelDag - This pass converts a legalized DAG into a +/// Alpha-specific DAG, ready for instruction scheduling. +/// +FunctionPass *llvm::createAlphaISelDag(AlphaTargetMachine &TM) { + return new AlphaDAGToDAGISel(TM); +} diff --git a/lib/Target/Alpha/AlphaISelLowering.cpp b/lib/Target/Alpha/AlphaISelLowering.cpp new file mode 100644 index 000000000000..10011125cf85 --- /dev/null +++ b/lib/Target/Alpha/AlphaISelLowering.cpp @@ -0,0 +1,798 @@ +//===-- AlphaISelLowering.cpp - Alpha DAG Lowering Implementation ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the AlphaISelLowering class. +// +//===----------------------------------------------------------------------===// + +#include "AlphaISelLowering.h" +#include "AlphaTargetMachine.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Constants.h" +#include "llvm/Function.h" +#include "llvm/Module.h" +#include "llvm/Intrinsics.h" +#include "llvm/Support/CommandLine.h" +using namespace llvm; + +/// AddLiveIn - This helper function adds the specified physical register to the +/// MachineFunction as a live in value. It also creates a corresponding virtual +/// register for it. +static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg, + TargetRegisterClass *RC) { + assert(RC->contains(PReg) && "Not the correct regclass!"); + unsigned VReg = MF.getRegInfo().createVirtualRegister(RC); + MF.getRegInfo().addLiveIn(PReg, VReg); + return VReg; +} + +AlphaTargetLowering::AlphaTargetLowering(TargetMachine &TM) : TargetLowering(TM) { + // Set up the TargetLowering object. + //I am having problems with shr n ubyte 1 + setShiftAmountType(MVT::i64); + setBooleanContents(ZeroOrOneBooleanContent); + + setUsesGlobalOffsetTable(true); + + addRegisterClass(MVT::i64, Alpha::GPRCRegisterClass); + addRegisterClass(MVT::f64, Alpha::F8RCRegisterClass); + addRegisterClass(MVT::f32, Alpha::F4RCRegisterClass); + + // We want to custom lower some of our intrinsics. + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); + + setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote); + setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); + + setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); + setLoadExtAction(ISD::ZEXTLOAD, MVT::i32, Expand); + + setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); + setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Expand); + setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Expand); + + // setOperationAction(ISD::BRIND, MVT::Other, Expand); + setOperationAction(ISD::BR_JT, MVT::Other, Expand); + setOperationAction(ISD::BR_CC, MVT::Other, Expand); + setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); + + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); + + setOperationAction(ISD::FREM, MVT::f32, Expand); + setOperationAction(ISD::FREM, MVT::f64, Expand); + + setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); + setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); + setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); + setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); + + if (!TM.getSubtarget().hasCT()) { + setOperationAction(ISD::CTPOP , MVT::i64 , Expand); + setOperationAction(ISD::CTTZ , MVT::i64 , Expand); + setOperationAction(ISD::CTLZ , MVT::i64 , Expand); + } + setOperationAction(ISD::BSWAP , MVT::i64, Expand); + setOperationAction(ISD::ROTL , MVT::i64, Expand); + setOperationAction(ISD::ROTR , MVT::i64, Expand); + + setOperationAction(ISD::SREM , MVT::i64, Custom); + setOperationAction(ISD::UREM , MVT::i64, Custom); + setOperationAction(ISD::SDIV , MVT::i64, Custom); + setOperationAction(ISD::UDIV , MVT::i64, Custom); + + setOperationAction(ISD::ADDC , MVT::i64, Expand); + setOperationAction(ISD::ADDE , MVT::i64, Expand); + setOperationAction(ISD::SUBC , MVT::i64, Expand); + setOperationAction(ISD::SUBE , MVT::i64, Expand); + + setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); + setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); + + + // We don't support sin/cos/sqrt/pow + setOperationAction(ISD::FSIN , MVT::f64, Expand); + setOperationAction(ISD::FCOS , MVT::f64, Expand); + setOperationAction(ISD::FSIN , MVT::f32, Expand); + setOperationAction(ISD::FCOS , MVT::f32, Expand); + + setOperationAction(ISD::FSQRT, MVT::f64, Expand); + setOperationAction(ISD::FSQRT, MVT::f32, Expand); + + setOperationAction(ISD::FPOW , MVT::f32, Expand); + setOperationAction(ISD::FPOW , MVT::f64, Expand); + + setOperationAction(ISD::SETCC, MVT::f32, Promote); + + setOperationAction(ISD::BIT_CONVERT, MVT::f32, Promote); + + // We don't have line number support yet. + setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand); + setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand); + setOperationAction(ISD::DBG_LABEL, MVT::Other, Expand); + setOperationAction(ISD::EH_LABEL, MVT::Other, Expand); + + // Not implemented yet. + setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); + setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand); + + // We want to legalize GlobalAddress and ConstantPool and + // ExternalSymbols nodes into the appropriate instructions to + // materialize the address. + setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); + setOperationAction(ISD::ConstantPool, MVT::i64, Custom); + setOperationAction(ISD::ExternalSymbol, MVT::i64, Custom); + setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom); + + setOperationAction(ISD::VASTART, MVT::Other, Custom); + setOperationAction(ISD::VAEND, MVT::Other, Expand); + setOperationAction(ISD::VACOPY, MVT::Other, Custom); + setOperationAction(ISD::VAARG, MVT::Other, Custom); + setOperationAction(ISD::VAARG, MVT::i32, Custom); + + setOperationAction(ISD::RET, MVT::Other, Custom); + + setOperationAction(ISD::JumpTable, MVT::i64, Custom); + setOperationAction(ISD::JumpTable, MVT::i32, Custom); + + setStackPointerRegisterToSaveRestore(Alpha::R30); + + addLegalFPImmediate(APFloat(+0.0)); //F31 + addLegalFPImmediate(APFloat(+0.0f)); //F31 + addLegalFPImmediate(APFloat(-0.0)); //-F31 + addLegalFPImmediate(APFloat(-0.0f)); //-F31 + + setJumpBufSize(272); + setJumpBufAlignment(16); + + computeRegisterProperties(); +} + +MVT AlphaTargetLowering::getSetCCResultType(MVT VT) const { + return MVT::i64; +} + +const char *AlphaTargetLowering::getTargetNodeName(unsigned Opcode) const { + switch (Opcode) { + default: return 0; + case AlphaISD::CVTQT_: return "Alpha::CVTQT_"; + case AlphaISD::CVTQS_: return "Alpha::CVTQS_"; + case AlphaISD::CVTTQ_: return "Alpha::CVTTQ_"; + case AlphaISD::GPRelHi: return "Alpha::GPRelHi"; + case AlphaISD::GPRelLo: return "Alpha::GPRelLo"; + case AlphaISD::RelLit: return "Alpha::RelLit"; + case AlphaISD::GlobalRetAddr: return "Alpha::GlobalRetAddr"; + case AlphaISD::CALL: return "Alpha::CALL"; + case AlphaISD::DivCall: return "Alpha::DivCall"; + case AlphaISD::RET_FLAG: return "Alpha::RET_FLAG"; + case AlphaISD::COND_BRANCH_I: return "Alpha::COND_BRANCH_I"; + case AlphaISD::COND_BRANCH_F: return "Alpha::COND_BRANCH_F"; + } +} + +static SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) { + MVT PtrVT = Op.getValueType(); + JumpTableSDNode *JT = cast(Op); + SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); + SDValue Zero = DAG.getConstant(0, PtrVT); + // FIXME there isn't really any debug info here + DebugLoc dl = Op.getDebugLoc(); + + SDValue Hi = DAG.getNode(AlphaISD::GPRelHi, dl, MVT::i64, JTI, + DAG.getGLOBAL_OFFSET_TABLE(MVT::i64)); + SDValue Lo = DAG.getNode(AlphaISD::GPRelLo, dl, MVT::i64, JTI, Hi); + return Lo; +} + +//http://www.cs.arizona.edu/computer.help/policy/DIGITAL_unix/ +//AA-PY8AC-TET1_html/callCH3.html#BLOCK21 + +//For now, just use variable size stack frame format + +//In a standard call, the first six items are passed in registers $16 +//- $21 and/or registers $f16 - $f21. (See Section 4.1.2 for details +//of argument-to-register correspondence.) The remaining items are +//collected in a memory argument list that is a naturally aligned +//array of quadwords. In a standard call, this list, if present, must +//be passed at 0(SP). +//7 ... n 0(SP) ... (n-7)*8(SP) + +// //#define FP $15 +// //#define RA $26 +// //#define PV $27 +// //#define GP $29 +// //#define SP $30 + +static SDValue LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, + int &VarArgsBase, + int &VarArgsOffset) { + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + std::vector ArgValues; + SDValue Root = Op.getOperand(0); + DebugLoc dl = Op.getDebugLoc(); + + AddLiveIn(MF, Alpha::R29, &Alpha::GPRCRegClass); //GP + AddLiveIn(MF, Alpha::R26, &Alpha::GPRCRegClass); //RA + + unsigned args_int[] = { + Alpha::R16, Alpha::R17, Alpha::R18, Alpha::R19, Alpha::R20, Alpha::R21}; + unsigned args_float[] = { + Alpha::F16, Alpha::F17, Alpha::F18, Alpha::F19, Alpha::F20, Alpha::F21}; + + for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues()-1; ArgNo != e; ++ArgNo) { + SDValue argt; + MVT ObjectVT = Op.getValue(ArgNo).getValueType(); + SDValue ArgVal; + + if (ArgNo < 6) { + switch (ObjectVT.getSimpleVT()) { + default: + assert(false && "Invalid value type!"); + case MVT::f64: + args_float[ArgNo] = AddLiveIn(MF, args_float[ArgNo], + &Alpha::F8RCRegClass); + ArgVal = DAG.getCopyFromReg(Root, dl, args_float[ArgNo], ObjectVT); + break; + case MVT::f32: + args_float[ArgNo] = AddLiveIn(MF, args_float[ArgNo], + &Alpha::F4RCRegClass); + ArgVal = DAG.getCopyFromReg(Root, dl, args_float[ArgNo], ObjectVT); + break; + case MVT::i64: + args_int[ArgNo] = AddLiveIn(MF, args_int[ArgNo], + &Alpha::GPRCRegClass); + ArgVal = DAG.getCopyFromReg(Root, dl, args_int[ArgNo], MVT::i64); + break; + } + } else { //more args + // Create the frame index object for this incoming parameter... + int FI = MFI->CreateFixedObject(8, 8 * (ArgNo - 6)); + + // Create the SelectionDAG nodes corresponding to a load + //from this parameter + SDValue FIN = DAG.getFrameIndex(FI, MVT::i64); + ArgVal = DAG.getLoad(ObjectVT, dl, Root, FIN, NULL, 0); + } + ArgValues.push_back(ArgVal); + } + + // If the functions takes variable number of arguments, copy all regs to stack + bool isVarArg = cast(Op.getOperand(2))->getZExtValue() != 0; + if (isVarArg) { + VarArgsOffset = (Op.getNode()->getNumValues()-1) * 8; + std::vector LS; + for (int i = 0; i < 6; ++i) { + if (TargetRegisterInfo::isPhysicalRegister(args_int[i])) + args_int[i] = AddLiveIn(MF, args_int[i], &Alpha::GPRCRegClass); + SDValue argt = DAG.getCopyFromReg(Root, dl, args_int[i], MVT::i64); + int FI = MFI->CreateFixedObject(8, -8 * (6 - i)); + if (i == 0) VarArgsBase = FI; + SDValue SDFI = DAG.getFrameIndex(FI, MVT::i64); + LS.push_back(DAG.getStore(Root, dl, argt, SDFI, NULL, 0)); + + if (TargetRegisterInfo::isPhysicalRegister(args_float[i])) + args_float[i] = AddLiveIn(MF, args_float[i], &Alpha::F8RCRegClass); + argt = DAG.getCopyFromReg(Root, dl, args_float[i], MVT::f64); + FI = MFI->CreateFixedObject(8, - 8 * (12 - i)); + SDFI = DAG.getFrameIndex(FI, MVT::i64); + LS.push_back(DAG.getStore(Root, dl, argt, SDFI, NULL, 0)); + } + + //Set up a token factor with all the stack traffic + Root = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &LS[0], LS.size()); + } + + ArgValues.push_back(Root); + + // Return the new list of results. + return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getNode()->getVTList(), + &ArgValues[0], ArgValues.size()); +} + +static SDValue LowerRET(SDValue Op, SelectionDAG &DAG) { + DebugLoc dl = Op.getDebugLoc(); + SDValue Copy = DAG.getCopyToReg(Op.getOperand(0), dl, Alpha::R26, + DAG.getNode(AlphaISD::GlobalRetAddr, + DebugLoc::getUnknownLoc(), + MVT::i64), + SDValue()); + switch (Op.getNumOperands()) { + default: + assert(0 && "Do not know how to return this many arguments!"); + abort(); + case 1: + break; + //return SDValue(); // ret void is legal + case 3: { + MVT ArgVT = Op.getOperand(1).getValueType(); + unsigned ArgReg; + if (ArgVT.isInteger()) + ArgReg = Alpha::R0; + else { + assert(ArgVT.isFloatingPoint()); + ArgReg = Alpha::F0; + } + Copy = DAG.getCopyToReg(Copy, dl, ArgReg, + Op.getOperand(1), Copy.getValue(1)); + if (DAG.getMachineFunction().getRegInfo().liveout_empty()) + DAG.getMachineFunction().getRegInfo().addLiveOut(ArgReg); + break; + } + case 5: { + MVT ArgVT = Op.getOperand(1).getValueType(); + unsigned ArgReg1, ArgReg2; + if (ArgVT.isInteger()) { + ArgReg1 = Alpha::R0; + ArgReg2 = Alpha::R1; + } else { + assert(ArgVT.isFloatingPoint()); + ArgReg1 = Alpha::F0; + ArgReg2 = Alpha::F1; + } + Copy = DAG.getCopyToReg(Copy, dl, ArgReg1, + Op.getOperand(1), Copy.getValue(1)); + if (std::find(DAG.getMachineFunction().getRegInfo().liveout_begin(), + DAG.getMachineFunction().getRegInfo().liveout_end(), ArgReg1) + == DAG.getMachineFunction().getRegInfo().liveout_end()) + DAG.getMachineFunction().getRegInfo().addLiveOut(ArgReg1); + Copy = DAG.getCopyToReg(Copy, dl, ArgReg2, + Op.getOperand(3), Copy.getValue(1)); + if (std::find(DAG.getMachineFunction().getRegInfo().liveout_begin(), + DAG.getMachineFunction().getRegInfo().liveout_end(), ArgReg2) + == DAG.getMachineFunction().getRegInfo().liveout_end()) + DAG.getMachineFunction().getRegInfo().addLiveOut(ArgReg2); + break; + } + } + return DAG.getNode(AlphaISD::RET_FLAG, dl, + MVT::Other, Copy, Copy.getValue(1)); +} + +std::pair +AlphaTargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy, + bool RetSExt, bool RetZExt, bool isVarArg, + bool isInreg, unsigned CallingConv, + bool isTailCall, SDValue Callee, + ArgListTy &Args, SelectionDAG &DAG, + DebugLoc dl) { + int NumBytes = 0; + if (Args.size() > 6) + NumBytes = (Args.size() - 6) * 8; + + Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); + std::vector args_to_use; + for (unsigned i = 0, e = Args.size(); i != e; ++i) + { + switch (getValueType(Args[i].Ty).getSimpleVT()) { + default: assert(0 && "Unexpected ValueType for argument!"); + case MVT::i1: + case MVT::i8: + case MVT::i16: + case MVT::i32: + // Promote the integer to 64 bits. If the input type is signed use a + // sign extend, otherwise use a zero extend. + if (Args[i].isSExt) + Args[i].Node = DAG.getNode(ISD::SIGN_EXTEND, dl, + MVT::i64, Args[i].Node); + else if (Args[i].isZExt) + Args[i].Node = DAG.getNode(ISD::ZERO_EXTEND, dl, + MVT::i64, Args[i].Node); + else + Args[i].Node = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, Args[i].Node); + break; + case MVT::i64: + case MVT::f64: + case MVT::f32: + break; + } + args_to_use.push_back(Args[i].Node); + } + + std::vector RetVals; + MVT RetTyVT = getValueType(RetTy); + MVT ActualRetTyVT = RetTyVT; + if (RetTyVT.getSimpleVT() >= MVT::i1 && RetTyVT.getSimpleVT() <= MVT::i32) + ActualRetTyVT = MVT::i64; + + if (RetTyVT != MVT::isVoid) + RetVals.push_back(ActualRetTyVT); + RetVals.push_back(MVT::Other); + + std::vector Ops; + Ops.push_back(Chain); + Ops.push_back(Callee); + Ops.insert(Ops.end(), args_to_use.begin(), args_to_use.end()); + SDValue TheCall = DAG.getNode(AlphaISD::CALL, dl, + RetVals, &Ops[0], Ops.size()); + Chain = TheCall.getValue(RetTyVT != MVT::isVoid); + Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), + DAG.getIntPtrConstant(0, true), SDValue()); + SDValue RetVal = TheCall; + + if (RetTyVT != ActualRetTyVT) { + ISD::NodeType AssertKind = ISD::DELETED_NODE; + if (RetSExt) + AssertKind = ISD::AssertSext; + else if (RetZExt) + AssertKind = ISD::AssertZext; + + if (AssertKind != ISD::DELETED_NODE) + RetVal = DAG.getNode(AssertKind, dl, MVT::i64, RetVal, + DAG.getValueType(RetTyVT)); + + RetVal = DAG.getNode(ISD::TRUNCATE, dl, RetTyVT, RetVal); + } + + return std::make_pair(RetVal, Chain); +} + +void AlphaTargetLowering::LowerVAARG(SDNode *N, SDValue &Chain, + SDValue &DataPtr, SelectionDAG &DAG) { + Chain = N->getOperand(0); + SDValue VAListP = N->getOperand(1); + const Value *VAListS = cast(N->getOperand(2))->getValue(); + DebugLoc dl = N->getDebugLoc(); + + SDValue Base = DAG.getLoad(MVT::i64, dl, Chain, VAListP, VAListS, 0); + SDValue Tmp = DAG.getNode(ISD::ADD, dl, MVT::i64, VAListP, + DAG.getConstant(8, MVT::i64)); + SDValue Offset = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Base.getValue(1), + Tmp, NULL, 0, MVT::i32); + DataPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Base, Offset); + if (N->getValueType(0).isFloatingPoint()) + { + //if fp && Offset < 6*8, then subtract 6*8 from DataPtr + SDValue FPDataPtr = DAG.getNode(ISD::SUB, dl, MVT::i64, DataPtr, + DAG.getConstant(8*6, MVT::i64)); + SDValue CC = DAG.getSetCC(dl, MVT::i64, Offset, + DAG.getConstant(8*6, MVT::i64), ISD::SETLT); + DataPtr = DAG.getNode(ISD::SELECT, dl, MVT::i64, CC, FPDataPtr, DataPtr); + } + + SDValue NewOffset = DAG.getNode(ISD::ADD, dl, MVT::i64, Offset, + DAG.getConstant(8, MVT::i64)); + Chain = DAG.getTruncStore(Offset.getValue(1), dl, NewOffset, Tmp, NULL, 0, + MVT::i32); +} + +/// LowerOperation - Provide custom lowering hooks for some operations. +/// +SDValue AlphaTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { + DebugLoc dl = Op.getDebugLoc(); + switch (Op.getOpcode()) { + default: assert(0 && "Wasn't expecting to be able to lower this!"); + case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG, + VarArgsBase, + VarArgsOffset); + + case ISD::RET: return LowerRET(Op,DAG); + case ISD::JumpTable: return LowerJumpTable(Op, DAG); + + case ISD::INTRINSIC_WO_CHAIN: { + unsigned IntNo = cast(Op.getOperand(0))->getZExtValue(); + switch (IntNo) { + default: break; // Don't custom lower most intrinsics. + case Intrinsic::alpha_umulh: + return DAG.getNode(ISD::MULHU, dl, MVT::i64, + Op.getOperand(1), Op.getOperand(2)); + } + } + + case ISD::SINT_TO_FP: { + assert(Op.getOperand(0).getValueType() == MVT::i64 && + "Unhandled SINT_TO_FP type in custom expander!"); + SDValue LD; + bool isDouble = Op.getValueType() == MVT::f64; + LD = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, Op.getOperand(0)); + SDValue FP = DAG.getNode(isDouble?AlphaISD::CVTQT_:AlphaISD::CVTQS_, dl, + isDouble?MVT::f64:MVT::f32, LD); + return FP; + } + case ISD::FP_TO_SINT: { + bool isDouble = Op.getOperand(0).getValueType() == MVT::f64; + SDValue src = Op.getOperand(0); + + if (!isDouble) //Promote + src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, src); + + src = DAG.getNode(AlphaISD::CVTTQ_, dl, MVT::f64, src); + + return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, src); + } + case ISD::ConstantPool: { + ConstantPoolSDNode *CP = cast(Op); + Constant *C = CP->getConstVal(); + SDValue CPI = DAG.getTargetConstantPool(C, MVT::i64, CP->getAlignment()); + // FIXME there isn't really any debug info here + + SDValue Hi = DAG.getNode(AlphaISD::GPRelHi, dl, MVT::i64, CPI, + DAG.getGLOBAL_OFFSET_TABLE(MVT::i64)); + SDValue Lo = DAG.getNode(AlphaISD::GPRelLo, dl, MVT::i64, CPI, Hi); + return Lo; + } + case ISD::GlobalTLSAddress: + assert(0 && "TLS not implemented for Alpha."); + case ISD::GlobalAddress: { + GlobalAddressSDNode *GSDN = cast(Op); + GlobalValue *GV = GSDN->getGlobal(); + SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i64, GSDN->getOffset()); + // FIXME there isn't really any debug info here + + // if (!GV->hasWeakLinkage() && !GV->isDeclaration() && !GV->hasLinkOnceLinkage()) { + if (GV->hasLocalLinkage()) { + SDValue Hi = DAG.getNode(AlphaISD::GPRelHi, dl, MVT::i64, GA, + DAG.getGLOBAL_OFFSET_TABLE(MVT::i64)); + SDValue Lo = DAG.getNode(AlphaISD::GPRelLo, dl, MVT::i64, GA, Hi); + return Lo; + } else + return DAG.getNode(AlphaISD::RelLit, dl, MVT::i64, GA, + DAG.getGLOBAL_OFFSET_TABLE(MVT::i64)); + } + case ISD::ExternalSymbol: { + return DAG.getNode(AlphaISD::RelLit, dl, MVT::i64, + DAG.getTargetExternalSymbol(cast(Op) + ->getSymbol(), MVT::i64), + DAG.getGLOBAL_OFFSET_TABLE(MVT::i64)); + } + + case ISD::UREM: + case ISD::SREM: + //Expand only on constant case + if (Op.getOperand(1).getOpcode() == ISD::Constant) { + MVT VT = Op.getNode()->getValueType(0); + SDValue Tmp1 = Op.getNode()->getOpcode() == ISD::UREM ? + BuildUDIV(Op.getNode(), DAG, NULL) : + BuildSDIV(Op.getNode(), DAG, NULL); + Tmp1 = DAG.getNode(ISD::MUL, dl, VT, Tmp1, Op.getOperand(1)); + Tmp1 = DAG.getNode(ISD::SUB, dl, VT, Op.getOperand(0), Tmp1); + return Tmp1; + } + //fall through + case ISD::SDIV: + case ISD::UDIV: + if (Op.getValueType().isInteger()) { + if (Op.getOperand(1).getOpcode() == ISD::Constant) + return Op.getOpcode() == ISD::SDIV ? BuildSDIV(Op.getNode(), DAG, NULL) + : BuildUDIV(Op.getNode(), DAG, NULL); + const char* opstr = 0; + switch (Op.getOpcode()) { + case ISD::UREM: opstr = "__remqu"; break; + case ISD::SREM: opstr = "__remq"; break; + case ISD::UDIV: opstr = "__divqu"; break; + case ISD::SDIV: opstr = "__divq"; break; + } + SDValue Tmp1 = Op.getOperand(0), + Tmp2 = Op.getOperand(1), + Addr = DAG.getExternalSymbol(opstr, MVT::i64); + return DAG.getNode(AlphaISD::DivCall, dl, MVT::i64, Addr, Tmp1, Tmp2); + } + break; + + case ISD::VAARG: { + SDValue Chain, DataPtr; + LowerVAARG(Op.getNode(), Chain, DataPtr, DAG); + + SDValue Result; + if (Op.getValueType() == MVT::i32) + Result = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Chain, DataPtr, + NULL, 0, MVT::i32); + else + Result = DAG.getLoad(Op.getValueType(), dl, Chain, DataPtr, NULL, 0); + return Result; + } + case ISD::VACOPY: { + SDValue Chain = Op.getOperand(0); + SDValue DestP = Op.getOperand(1); + SDValue SrcP = Op.getOperand(2); + const Value *DestS = cast(Op.getOperand(3))->getValue(); + const Value *SrcS = cast(Op.getOperand(4))->getValue(); + + SDValue Val = DAG.getLoad(getPointerTy(), dl, Chain, SrcP, SrcS, 0); + SDValue Result = DAG.getStore(Val.getValue(1), dl, Val, DestP, DestS, 0); + SDValue NP = DAG.getNode(ISD::ADD, dl, MVT::i64, SrcP, + DAG.getConstant(8, MVT::i64)); + Val = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Result, + NP, NULL,0, MVT::i32); + SDValue NPD = DAG.getNode(ISD::ADD, dl, MVT::i64, DestP, + DAG.getConstant(8, MVT::i64)); + return DAG.getTruncStore(Val.getValue(1), dl, Val, NPD, NULL, 0, MVT::i32); + } + case ISD::VASTART: { + SDValue Chain = Op.getOperand(0); + SDValue VAListP = Op.getOperand(1); + const Value *VAListS = cast(Op.getOperand(2))->getValue(); + + // vastart stores the address of the VarArgsBase and VarArgsOffset + SDValue FR = DAG.getFrameIndex(VarArgsBase, MVT::i64); + SDValue S1 = DAG.getStore(Chain, dl, FR, VAListP, VAListS, 0); + SDValue SA2 = DAG.getNode(ISD::ADD, dl, MVT::i64, VAListP, + DAG.getConstant(8, MVT::i64)); + return DAG.getTruncStore(S1, dl, DAG.getConstant(VarArgsOffset, MVT::i64), + SA2, NULL, 0, MVT::i32); + } + case ISD::RETURNADDR: + return DAG.getNode(AlphaISD::GlobalRetAddr, DebugLoc::getUnknownLoc(), + MVT::i64); + //FIXME: implement + case ISD::FRAMEADDR: break; + } + + return SDValue(); +} + +void AlphaTargetLowering::ReplaceNodeResults(SDNode *N, + SmallVectorImpl&Results, + SelectionDAG &DAG) { + DebugLoc dl = N->getDebugLoc(); + assert(N->getValueType(0) == MVT::i32 && + N->getOpcode() == ISD::VAARG && + "Unknown node to custom promote!"); + + SDValue Chain, DataPtr; + LowerVAARG(N, Chain, DataPtr, DAG); + SDValue Res = DAG.getLoad(N->getValueType(0), dl, Chain, DataPtr, NULL, 0); + Results.push_back(Res); + Results.push_back(SDValue(Res.getNode(), 1)); +} + + +//Inline Asm + +/// getConstraintType - Given a constraint letter, return the type of +/// constraint it is for this target. +AlphaTargetLowering::ConstraintType +AlphaTargetLowering::getConstraintType(const std::string &Constraint) const { + if (Constraint.size() == 1) { + switch (Constraint[0]) { + default: break; + case 'f': + case 'r': + return C_RegisterClass; + } + } + return TargetLowering::getConstraintType(Constraint); +} + +std::vector AlphaTargetLowering:: +getRegClassForInlineAsmConstraint(const std::string &Constraint, + MVT VT) const { + if (Constraint.size() == 1) { + switch (Constraint[0]) { + default: break; // Unknown constriant letter + case 'f': + return make_vector(Alpha::F0 , Alpha::F1 , Alpha::F2 , + Alpha::F3 , Alpha::F4 , Alpha::F5 , + Alpha::F6 , Alpha::F7 , Alpha::F8 , + Alpha::F9 , Alpha::F10, Alpha::F11, + Alpha::F12, Alpha::F13, Alpha::F14, + Alpha::F15, Alpha::F16, Alpha::F17, + Alpha::F18, Alpha::F19, Alpha::F20, + Alpha::F21, Alpha::F22, Alpha::F23, + Alpha::F24, Alpha::F25, Alpha::F26, + Alpha::F27, Alpha::F28, Alpha::F29, + Alpha::F30, Alpha::F31, 0); + case 'r': + return make_vector(Alpha::R0 , Alpha::R1 , Alpha::R2 , + Alpha::R3 , Alpha::R4 , Alpha::R5 , + Alpha::R6 , Alpha::R7 , Alpha::R8 , + Alpha::R9 , Alpha::R10, Alpha::R11, + Alpha::R12, Alpha::R13, Alpha::R14, + Alpha::R15, Alpha::R16, Alpha::R17, + Alpha::R18, Alpha::R19, Alpha::R20, + Alpha::R21, Alpha::R22, Alpha::R23, + Alpha::R24, Alpha::R25, Alpha::R26, + Alpha::R27, Alpha::R28, Alpha::R29, + Alpha::R30, Alpha::R31, 0); + } + } + + return std::vector(); +} +//===----------------------------------------------------------------------===// +// Other Lowering Code +//===----------------------------------------------------------------------===// + +MachineBasicBlock * +AlphaTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *BB) const { + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + assert((MI->getOpcode() == Alpha::CAS32 || + MI->getOpcode() == Alpha::CAS64 || + MI->getOpcode() == Alpha::LAS32 || + MI->getOpcode() == Alpha::LAS64 || + MI->getOpcode() == Alpha::SWAP32 || + MI->getOpcode() == Alpha::SWAP64) && + "Unexpected instr type to insert"); + + bool is32 = MI->getOpcode() == Alpha::CAS32 || + MI->getOpcode() == Alpha::LAS32 || + MI->getOpcode() == Alpha::SWAP32; + + //Load locked store conditional for atomic ops take on the same form + //start: + //ll + //do stuff (maybe branch to exit) + //sc + //test sc and maybe branck to start + //exit: + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + DebugLoc dl = MI->getDebugLoc(); + MachineFunction::iterator It = BB; + ++It; + + MachineBasicBlock *thisMBB = BB; + MachineFunction *F = BB->getParent(); + MachineBasicBlock *llscMBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); + + sinkMBB->transferSuccessors(thisMBB); + + F->insert(It, llscMBB); + F->insert(It, sinkMBB); + + BuildMI(thisMBB, dl, TII->get(Alpha::BR)).addMBB(llscMBB); + + unsigned reg_res = MI->getOperand(0).getReg(), + reg_ptr = MI->getOperand(1).getReg(), + reg_v2 = MI->getOperand(2).getReg(), + reg_store = F->getRegInfo().createVirtualRegister(&Alpha::GPRCRegClass); + + BuildMI(llscMBB, dl, TII->get(is32 ? Alpha::LDL_L : Alpha::LDQ_L), + reg_res).addImm(0).addReg(reg_ptr); + switch (MI->getOpcode()) { + case Alpha::CAS32: + case Alpha::CAS64: { + unsigned reg_cmp + = F->getRegInfo().createVirtualRegister(&Alpha::GPRCRegClass); + BuildMI(llscMBB, dl, TII->get(Alpha::CMPEQ), reg_cmp) + .addReg(reg_v2).addReg(reg_res); + BuildMI(llscMBB, dl, TII->get(Alpha::BEQ)) + .addImm(0).addReg(reg_cmp).addMBB(sinkMBB); + BuildMI(llscMBB, dl, TII->get(Alpha::BISr), reg_store) + .addReg(Alpha::R31).addReg(MI->getOperand(3).getReg()); + break; + } + case Alpha::LAS32: + case Alpha::LAS64: { + BuildMI(llscMBB, dl,TII->get(is32 ? Alpha::ADDLr : Alpha::ADDQr), reg_store) + .addReg(reg_res).addReg(reg_v2); + break; + } + case Alpha::SWAP32: + case Alpha::SWAP64: { + BuildMI(llscMBB, dl, TII->get(Alpha::BISr), reg_store) + .addReg(reg_v2).addReg(reg_v2); + break; + } + } + BuildMI(llscMBB, dl, TII->get(is32 ? Alpha::STL_C : Alpha::STQ_C), reg_store) + .addReg(reg_store).addImm(0).addReg(reg_ptr); + BuildMI(llscMBB, dl, TII->get(Alpha::BEQ)) + .addImm(0).addReg(reg_store).addMBB(llscMBB); + BuildMI(llscMBB, dl, TII->get(Alpha::BR)).addMBB(sinkMBB); + + thisMBB->addSuccessor(llscMBB); + llscMBB->addSuccessor(llscMBB); + llscMBB->addSuccessor(sinkMBB); + F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. + + return sinkMBB; +} + +bool +AlphaTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { + // The Alpha target isn't yet aware of offsets. + return false; +} diff --git a/lib/Target/Alpha/AlphaISelLowering.h b/lib/Target/Alpha/AlphaISelLowering.h new file mode 100644 index 000000000000..fdd817c76488 --- /dev/null +++ b/lib/Target/Alpha/AlphaISelLowering.h @@ -0,0 +1,114 @@ +//===-- AlphaISelLowering.h - Alpha DAG Lowering Interface ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interfaces that Alpha uses to lower LLVM code into a +// selection DAG. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_ALPHA_ALPHAISELLOWERING_H +#define LLVM_TARGET_ALPHA_ALPHAISELLOWERING_H + +#include "llvm/ADT/VectorExtras.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "Alpha.h" + +namespace llvm { + + namespace AlphaISD { + enum NodeType { + // Start the numbering where the builting ops and target ops leave off. + FIRST_NUMBER = ISD::BUILTIN_OP_END, + //These corrospond to the identical Instruction + CVTQT_, CVTQS_, CVTTQ_, + + /// GPRelHi/GPRelLo - These represent the high and low 16-bit + /// parts of a global address respectively. + GPRelHi, GPRelLo, + + /// RetLit - Literal Relocation of a Global + RelLit, + + /// GlobalRetAddr - used to restore the return address + GlobalRetAddr, + + /// CALL - Normal call. + CALL, + + /// DIVCALL - used for special library calls for div and rem + DivCall, + + /// return flag operand + RET_FLAG, + + /// CHAIN = COND_BRANCH CHAIN, OPC, (G|F)PRC, DESTBB [, INFLAG] - This + /// corresponds to the COND_BRANCH pseudo instruction. + /// *PRC is the input register to compare to zero, + /// OPC is the branch opcode to use (e.g. Alpha::BEQ), + /// DESTBB is the destination block to branch to, and INFLAG is + /// an optional input flag argument. + COND_BRANCH_I, COND_BRANCH_F + + }; + } + + class AlphaTargetLowering : public TargetLowering { + int VarArgsOffset; // What is the offset to the first vaarg + int VarArgsBase; // What is the base FrameIndex + bool useITOF; + public: + explicit AlphaTargetLowering(TargetMachine &TM); + + /// getSetCCResultType - Get the SETCC result ValueType + virtual MVT getSetCCResultType(MVT VT) const; + + /// LowerOperation - Provide custom lowering hooks for some operations. + /// + virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG); + + /// ReplaceNodeResults - Replace the results of node with an illegal result + /// type with new values built out of custom code. + /// + virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl&Results, + SelectionDAG &DAG); + + // Friendly names for dumps + const char *getTargetNodeName(unsigned Opcode) const; + + /// LowerCallTo - This hook lowers an abstract call to a function into an + /// actual call. + virtual std::pair + LowerCallTo(SDValue Chain, const Type *RetTy, bool RetSExt, bool RetZExt, + bool isVarArg, bool isInreg, unsigned CC, bool isTailCall, + SDValue Callee, ArgListTy &Args, SelectionDAG &DAG, + DebugLoc dl); + + ConstraintType getConstraintType(const std::string &Constraint) const; + + std::vector + getRegClassForInlineAsmConstraint(const std::string &Constraint, + MVT VT) const; + + bool hasITOF() { return useITOF; } + + MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *BB) const; + + virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; + + private: + // Helpers for custom lowering. + void LowerVAARG(SDNode *N, SDValue &Chain, SDValue &DataPtr, + SelectionDAG &DAG); + + }; +} + +#endif // LLVM_TARGET_ALPHA_ALPHAISELLOWERING_H diff --git a/lib/Target/Alpha/AlphaInstrFormats.td b/lib/Target/Alpha/AlphaInstrFormats.td new file mode 100644 index 000000000000..6d82875fad2d --- /dev/null +++ b/lib/Target/Alpha/AlphaInstrFormats.td @@ -0,0 +1,268 @@ +//===- AlphaInstrFormats.td - Alpha Instruction Formats ----*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// +//===----------------------------------------------------------------------===// + +//3.3: +//Memory +//Branch +//Operate +//Floating-point +//PALcode + +def u8imm : Operand; +def s14imm : Operand; +def s16imm : Operand; +def s21imm : Operand; +def s64imm : Operand; +def u64imm : Operand; + +//===----------------------------------------------------------------------===// +// Instruction format superclass +//===----------------------------------------------------------------------===// +// Alpha instruction baseline +class InstAlpha op, string asmstr, InstrItinClass itin> : Instruction { + field bits<32> Inst; + let Namespace = "Alpha"; + let AsmString = asmstr; + let Inst{31-26} = op; + let Itinerary = itin; +} + + +//3.3.1 +class MForm opcode, bit load, string asmstr, list pattern, InstrItinClass itin> + : InstAlpha { + let Pattern = pattern; + let canFoldAsLoad = load; + let Defs = [R28]; //We may use this for frame index calculations, so reserve it here + + bits<5> Ra; + bits<16> disp; + bits<5> Rb; + + let Inst{25-21} = Ra; + let Inst{20-16} = Rb; + let Inst{15-0} = disp; +} +class MfcForm opcode, bits<16> fc, string asmstr, InstrItinClass itin> + : InstAlpha { + bits<5> Ra; + + let OutOperandList = (ops GPRC:$RA); + let InOperandList = (ops); + let Inst{25-21} = Ra; + let Inst{20-16} = 0; + let Inst{15-0} = fc; +} +class MfcPForm opcode, bits<16> fc, string asmstr, InstrItinClass itin> + : InstAlpha { + let OutOperandList = (ops); + let InOperandList = (ops); + let Inst{25-21} = 0; + let Inst{20-16} = 0; + let Inst{15-0} = fc; +} + +class MbrForm opcode, bits<2> TB, dag OL, string asmstr, InstrItinClass itin> + : InstAlpha { + bits<5> Ra; + bits<5> Rb; + bits<14> disp; + + let OutOperandList = (ops); + let InOperandList = OL; + + let Inst{25-21} = Ra; + let Inst{20-16} = Rb; + let Inst{15-14} = TB; + let Inst{13-0} = disp; +} +class MbrpForm opcode, bits<2> TB, dag OL, string asmstr, list pattern, InstrItinClass itin> + : InstAlpha { + let Pattern=pattern; + bits<5> Ra; + bits<5> Rb; + bits<14> disp; + + let OutOperandList = (ops); + let InOperandList = OL; + + let Inst{25-21} = Ra; + let Inst{20-16} = Rb; + let Inst{15-14} = TB; + let Inst{13-0} = disp; +} + +//3.3.2 +def target : Operand {} + +let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in { +class BFormN opcode, dag OL, string asmstr, InstrItinClass itin> + : InstAlpha { + let OutOperandList = (ops); + let InOperandList = OL; + bits<64> Opc; //dummy + bits<5> Ra; + bits<21> disp; + + let Inst{25-21} = Ra; + let Inst{20-0} = disp; +} +} + +let isBranch = 1, isTerminator = 1 in +class BFormD opcode, string asmstr, list pattern, InstrItinClass itin> + : InstAlpha { + let Pattern = pattern; + let OutOperandList = (ops); + let InOperandList = (ops target:$DISP); + bits<5> Ra; + bits<21> disp; + + let Inst{25-21} = Ra; + let Inst{20-0} = disp; +} + +//3.3.3 +class OForm opcode, bits<7> fun, string asmstr, list pattern, InstrItinClass itin> + : InstAlpha { + let Pattern = pattern; + let OutOperandList = (outs GPRC:$RC); + let InOperandList = (ins GPRC:$RA, GPRC:$RB); + + bits<5> Rc; + bits<5> Ra; + bits<5> Rb; + bits<7> Function = fun; + + let Inst{25-21} = Ra; + let Inst{20-16} = Rb; + let Inst{15-13} = 0; + let Inst{12} = 0; + let Inst{11-5} = Function; + let Inst{4-0} = Rc; +} + +class OForm2 opcode, bits<7> fun, string asmstr, list pattern, InstrItinClass itin> + : InstAlpha { + let Pattern = pattern; + let OutOperandList = (outs GPRC:$RC); + let InOperandList = (ins GPRC:$RB); + + bits<5> Rc; + bits<5> Rb; + bits<7> Function = fun; + + let Inst{25-21} = 31; + let Inst{20-16} = Rb; + let Inst{15-13} = 0; + let Inst{12} = 0; + let Inst{11-5} = Function; + let Inst{4-0} = Rc; +} + +class OForm4 opcode, bits<7> fun, string asmstr, list pattern, InstrItinClass itin> + : InstAlpha { + let Pattern = pattern; + let OutOperandList = (outs GPRC:$RDEST); + let InOperandList = (ins GPRC:$RCOND, GPRC:$RTRUE, GPRC:$RFALSE); + let Constraints = "$RFALSE = $RDEST"; + let DisableEncoding = "$RFALSE"; + + bits<5> Rc; + bits<5> Ra; + bits<5> Rb; + bits<7> Function = fun; + +// let isTwoAddress = 1; + let Inst{25-21} = Ra; + let Inst{20-16} = Rb; + let Inst{15-13} = 0; + let Inst{12} = 0; + let Inst{11-5} = Function; + let Inst{4-0} = Rc; +} + + +class OFormL opcode, bits<7> fun, string asmstr, list pattern, InstrItinClass itin> + : InstAlpha { + let Pattern = pattern; + let OutOperandList = (outs GPRC:$RC); + let InOperandList = (ins GPRC:$RA, u8imm:$L); + + bits<5> Rc; + bits<5> Ra; + bits<8> LIT; + bits<7> Function = fun; + + let Inst{25-21} = Ra; + let Inst{20-13} = LIT; + let Inst{12} = 1; + let Inst{11-5} = Function; + let Inst{4-0} = Rc; +} + +class OForm4L opcode, bits<7> fun, string asmstr, list pattern, InstrItinClass itin> + : InstAlpha { + let Pattern = pattern; + let OutOperandList = (outs GPRC:$RDEST); + let InOperandList = (ins GPRC:$RCOND, s64imm:$RTRUE, GPRC:$RFALSE); + let Constraints = "$RFALSE = $RDEST"; + let DisableEncoding = "$RFALSE"; + + bits<5> Rc; + bits<5> Ra; + bits<8> LIT; + bits<7> Function = fun; + +// let isTwoAddress = 1; + let Inst{25-21} = Ra; + let Inst{20-13} = LIT; + let Inst{12} = 1; + let Inst{11-5} = Function; + let Inst{4-0} = Rc; +} + +//3.3.4 +class FPForm opcode, bits<11> fun, string asmstr, list pattern, InstrItinClass itin> + : InstAlpha { + let Pattern = pattern; + + bits<5> Fc; + bits<5> Fa; + bits<5> Fb; + bits<11> Function = fun; + + let Inst{25-21} = Fa; + let Inst{20-16} = Fb; + let Inst{15-5} = Function; + let Inst{4-0} = Fc; +} + +//3.3.5 +class PALForm opcode, dag OL, string asmstr, InstrItinClass itin> + : InstAlpha { + let OutOperandList = (ops); + let InOperandList = OL; + bits<26> Function; + + let Inst{25-0} = Function; +} + + +// Pseudo instructions. +class PseudoInstAlpha pattern, InstrItinClass itin> + : InstAlpha<0, nm, itin> { + let OutOperandList = OOL; + let InOperandList = IOL; + let Pattern = pattern; + +} diff --git a/lib/Target/Alpha/AlphaInstrInfo.cpp b/lib/Target/Alpha/AlphaInstrInfo.cpp new file mode 100644 index 000000000000..a54d97d33c40 --- /dev/null +++ b/lib/Target/Alpha/AlphaInstrInfo.cpp @@ -0,0 +1,450 @@ +//===- AlphaInstrInfo.cpp - Alpha Instruction Information -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Alpha implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#include "Alpha.h" +#include "AlphaInstrInfo.h" +#include "AlphaGenInstrInfo.inc" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +using namespace llvm; + +AlphaInstrInfo::AlphaInstrInfo() + : TargetInstrInfoImpl(AlphaInsts, array_lengthof(AlphaInsts)), + RI(*this) { } + + +bool AlphaInstrInfo::isMoveInstr(const MachineInstr& MI, + unsigned& sourceReg, unsigned& destReg, + unsigned& SrcSR, unsigned& DstSR) const { + unsigned oc = MI.getOpcode(); + if (oc == Alpha::BISr || + oc == Alpha::CPYSS || + oc == Alpha::CPYST || + oc == Alpha::CPYSSt || + oc == Alpha::CPYSTs) { + // or r1, r2, r2 + // cpys(s|t) r1 r2 r2 + assert(MI.getNumOperands() >= 3 && + MI.getOperand(0).isReg() && + MI.getOperand(1).isReg() && + MI.getOperand(2).isReg() && + "invalid Alpha BIS instruction!"); + if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) { + sourceReg = MI.getOperand(1).getReg(); + destReg = MI.getOperand(0).getReg(); + SrcSR = DstSR = 0; + return true; + } + } + return false; +} + +unsigned +AlphaInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, + int &FrameIndex) const { + switch (MI->getOpcode()) { + case Alpha::LDL: + case Alpha::LDQ: + case Alpha::LDBU: + case Alpha::LDWU: + case Alpha::LDS: + case Alpha::LDT: + if (MI->getOperand(1).isFI()) { + FrameIndex = MI->getOperand(1).getIndex(); + return MI->getOperand(0).getReg(); + } + break; + } + return 0; +} + +unsigned +AlphaInstrInfo::isStoreToStackSlot(const MachineInstr *MI, + int &FrameIndex) const { + switch (MI->getOpcode()) { + case Alpha::STL: + case Alpha::STQ: + case Alpha::STB: + case Alpha::STW: + case Alpha::STS: + case Alpha::STT: + if (MI->getOperand(1).isFI()) { + FrameIndex = MI->getOperand(1).getIndex(); + return MI->getOperand(0).getReg(); + } + break; + } + return 0; +} + +static bool isAlphaIntCondCode(unsigned Opcode) { + switch (Opcode) { + case Alpha::BEQ: + case Alpha::BNE: + case Alpha::BGE: + case Alpha::BGT: + case Alpha::BLE: + case Alpha::BLT: + case Alpha::BLBC: + case Alpha::BLBS: + return true; + default: + return false; + } +} + +unsigned AlphaInstrInfo::InsertBranch(MachineBasicBlock &MBB, + MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + const SmallVectorImpl &Cond) const { + // FIXME this should probably have a DebugLoc argument + DebugLoc dl = DebugLoc::getUnknownLoc(); + assert(TBB && "InsertBranch must not be told to insert a fallthrough"); + assert((Cond.size() == 2 || Cond.size() == 0) && + "Alpha branch conditions have two components!"); + + // One-way branch. + if (FBB == 0) { + if (Cond.empty()) // Unconditional branch + BuildMI(&MBB, dl, get(Alpha::BR)).addMBB(TBB); + else // Conditional branch + if (isAlphaIntCondCode(Cond[0].getImm())) + BuildMI(&MBB, dl, get(Alpha::COND_BRANCH_I)) + .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB); + else + BuildMI(&MBB, dl, get(Alpha::COND_BRANCH_F)) + .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB); + return 1; + } + + // Two-way Conditional Branch. + if (isAlphaIntCondCode(Cond[0].getImm())) + BuildMI(&MBB, dl, get(Alpha::COND_BRANCH_I)) + .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB); + else + BuildMI(&MBB, dl, get(Alpha::COND_BRANCH_F)) + .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB); + BuildMI(&MBB, dl, get(Alpha::BR)).addMBB(FBB); + return 2; +} + +bool AlphaInstrInfo::copyRegToReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned DestReg, unsigned SrcReg, + const TargetRegisterClass *DestRC, + const TargetRegisterClass *SrcRC) const { + //cerr << "copyRegToReg " << DestReg << " <- " << SrcReg << "\n"; + if (DestRC != SrcRC) { + // Not yet supported! + return false; + } + + DebugLoc DL = DebugLoc::getUnknownLoc(); + if (MI != MBB.end()) DL = MI->getDebugLoc(); + + if (DestRC == Alpha::GPRCRegisterClass) { + BuildMI(MBB, MI, DL, get(Alpha::BISr), DestReg) + .addReg(SrcReg) + .addReg(SrcReg); + } else if (DestRC == Alpha::F4RCRegisterClass) { + BuildMI(MBB, MI, DL, get(Alpha::CPYSS), DestReg) + .addReg(SrcReg) + .addReg(SrcReg); + } else if (DestRC == Alpha::F8RCRegisterClass) { + BuildMI(MBB, MI, DL, get(Alpha::CPYST), DestReg) + .addReg(SrcReg) + .addReg(SrcReg); + } else { + // Attempt to copy register that is not GPR or FPR + return false; + } + + return true; +} + +void +AlphaInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned SrcReg, bool isKill, int FrameIdx, + const TargetRegisterClass *RC) const { + //cerr << "Trying to store " << getPrettyName(SrcReg) << " to " + // << FrameIdx << "\n"; + //BuildMI(MBB, MI, Alpha::WTF, 0).addReg(SrcReg); + + DebugLoc DL = DebugLoc::getUnknownLoc(); + if (MI != MBB.end()) DL = MI->getDebugLoc(); + + if (RC == Alpha::F4RCRegisterClass) + BuildMI(MBB, MI, DL, get(Alpha::STS)) + .addReg(SrcReg, getKillRegState(isKill)) + .addFrameIndex(FrameIdx).addReg(Alpha::F31); + else if (RC == Alpha::F8RCRegisterClass) + BuildMI(MBB, MI, DL, get(Alpha::STT)) + .addReg(SrcReg, getKillRegState(isKill)) + .addFrameIndex(FrameIdx).addReg(Alpha::F31); + else if (RC == Alpha::GPRCRegisterClass) + BuildMI(MBB, MI, DL, get(Alpha::STQ)) + .addReg(SrcReg, getKillRegState(isKill)) + .addFrameIndex(FrameIdx).addReg(Alpha::F31); + else + abort(); +} + +void AlphaInstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg, + bool isKill, + SmallVectorImpl &Addr, + const TargetRegisterClass *RC, + SmallVectorImpl &NewMIs) const { + unsigned Opc = 0; + if (RC == Alpha::F4RCRegisterClass) + Opc = Alpha::STS; + else if (RC == Alpha::F8RCRegisterClass) + Opc = Alpha::STT; + else if (RC == Alpha::GPRCRegisterClass) + Opc = Alpha::STQ; + else + abort(); + DebugLoc DL = DebugLoc::getUnknownLoc(); + MachineInstrBuilder MIB = + BuildMI(MF, DL, get(Opc)).addReg(SrcReg, getKillRegState(isKill)); + for (unsigned i = 0, e = Addr.size(); i != e; ++i) + MIB.addOperand(Addr[i]); + NewMIs.push_back(MIB); +} + +void +AlphaInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned DestReg, int FrameIdx, + const TargetRegisterClass *RC) const { + //cerr << "Trying to load " << getPrettyName(DestReg) << " to " + // << FrameIdx << "\n"; + DebugLoc DL = DebugLoc::getUnknownLoc(); + if (MI != MBB.end()) DL = MI->getDebugLoc(); + + if (RC == Alpha::F4RCRegisterClass) + BuildMI(MBB, MI, DL, get(Alpha::LDS), DestReg) + .addFrameIndex(FrameIdx).addReg(Alpha::F31); + else if (RC == Alpha::F8RCRegisterClass) + BuildMI(MBB, MI, DL, get(Alpha::LDT), DestReg) + .addFrameIndex(FrameIdx).addReg(Alpha::F31); + else if (RC == Alpha::GPRCRegisterClass) + BuildMI(MBB, MI, DL, get(Alpha::LDQ), DestReg) + .addFrameIndex(FrameIdx).addReg(Alpha::F31); + else + abort(); +} + +void AlphaInstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg, + SmallVectorImpl &Addr, + const TargetRegisterClass *RC, + SmallVectorImpl &NewMIs) const { + unsigned Opc = 0; + if (RC == Alpha::F4RCRegisterClass) + Opc = Alpha::LDS; + else if (RC == Alpha::F8RCRegisterClass) + Opc = Alpha::LDT; + else if (RC == Alpha::GPRCRegisterClass) + Opc = Alpha::LDQ; + else + abort(); + DebugLoc DL = DebugLoc::getUnknownLoc(); + MachineInstrBuilder MIB = + BuildMI(MF, DL, get(Opc), DestReg); + for (unsigned i = 0, e = Addr.size(); i != e; ++i) + MIB.addOperand(Addr[i]); + NewMIs.push_back(MIB); +} + +MachineInstr *AlphaInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr *MI, + const SmallVectorImpl &Ops, + int FrameIndex) const { + if (Ops.size() != 1) return NULL; + + // Make sure this is a reg-reg copy. + unsigned Opc = MI->getOpcode(); + + MachineInstr *NewMI = NULL; + switch(Opc) { + default: + break; + case Alpha::BISr: + case Alpha::CPYSS: + case Alpha::CPYST: + if (MI->getOperand(1).getReg() == MI->getOperand(2).getReg()) { + if (Ops[0] == 0) { // move -> store + unsigned InReg = MI->getOperand(1).getReg(); + bool isKill = MI->getOperand(1).isKill(); + Opc = (Opc == Alpha::BISr) ? Alpha::STQ : + ((Opc == Alpha::CPYSS) ? Alpha::STS : Alpha::STT); + NewMI = BuildMI(MF, MI->getDebugLoc(), get(Opc)) + .addReg(InReg, getKillRegState(isKill)) + .addFrameIndex(FrameIndex) + .addReg(Alpha::F31); + } else { // load -> move + unsigned OutReg = MI->getOperand(0).getReg(); + bool isDead = MI->getOperand(0).isDead(); + Opc = (Opc == Alpha::BISr) ? Alpha::LDQ : + ((Opc == Alpha::CPYSS) ? Alpha::LDS : Alpha::LDT); + NewMI = BuildMI(MF, MI->getDebugLoc(), get(Opc)) + .addReg(OutReg, RegState::Define | getDeadRegState(isDead)) + .addFrameIndex(FrameIndex) + .addReg(Alpha::F31); + } + } + break; + } + return NewMI; +} + +static unsigned AlphaRevCondCode(unsigned Opcode) { + switch (Opcode) { + case Alpha::BEQ: return Alpha::BNE; + case Alpha::BNE: return Alpha::BEQ; + case Alpha::BGE: return Alpha::BLT; + case Alpha::BGT: return Alpha::BLE; + case Alpha::BLE: return Alpha::BGT; + case Alpha::BLT: return Alpha::BGE; + case Alpha::BLBC: return Alpha::BLBS; + case Alpha::BLBS: return Alpha::BLBC; + case Alpha::FBEQ: return Alpha::FBNE; + case Alpha::FBNE: return Alpha::FBEQ; + case Alpha::FBGE: return Alpha::FBLT; + case Alpha::FBGT: return Alpha::FBLE; + case Alpha::FBLE: return Alpha::FBGT; + case Alpha::FBLT: return Alpha::FBGE; + default: + assert(0 && "Unknown opcode"); + } + return 0; // Not reached +} + +// Branch analysis. +bool AlphaInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl &Cond, + bool AllowModify) const { + // If the block has no terminators, it just falls into the block after it. + MachineBasicBlock::iterator I = MBB.end(); + if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) + return false; + + // Get the last instruction in the block. + MachineInstr *LastInst = I; + + // If there is only one terminator instruction, process it. + if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { + if (LastInst->getOpcode() == Alpha::BR) { + TBB = LastInst->getOperand(0).getMBB(); + return false; + } else if (LastInst->getOpcode() == Alpha::COND_BRANCH_I || + LastInst->getOpcode() == Alpha::COND_BRANCH_F) { + // Block ends with fall-through condbranch. + TBB = LastInst->getOperand(2).getMBB(); + Cond.push_back(LastInst->getOperand(0)); + Cond.push_back(LastInst->getOperand(1)); + return false; + } + // Otherwise, don't know what this is. + return true; + } + + // Get the instruction before it if it's a terminator. + MachineInstr *SecondLastInst = I; + + // If there are three terminators, we don't know what sort of block this is. + if (SecondLastInst && I != MBB.begin() && + isUnpredicatedTerminator(--I)) + return true; + + // If the block ends with Alpha::BR and Alpha::COND_BRANCH_*, handle it. + if ((SecondLastInst->getOpcode() == Alpha::COND_BRANCH_I || + SecondLastInst->getOpcode() == Alpha::COND_BRANCH_F) && + LastInst->getOpcode() == Alpha::BR) { + TBB = SecondLastInst->getOperand(2).getMBB(); + Cond.push_back(SecondLastInst->getOperand(0)); + Cond.push_back(SecondLastInst->getOperand(1)); + FBB = LastInst->getOperand(0).getMBB(); + return false; + } + + // If the block ends with two Alpha::BRs, handle it. The second one is not + // executed, so remove it. + if (SecondLastInst->getOpcode() == Alpha::BR && + LastInst->getOpcode() == Alpha::BR) { + TBB = SecondLastInst->getOperand(0).getMBB(); + I = LastInst; + if (AllowModify) + I->eraseFromParent(); + return false; + } + + // Otherwise, can't handle this. + return true; +} + +unsigned AlphaInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { + MachineBasicBlock::iterator I = MBB.end(); + if (I == MBB.begin()) return 0; + --I; + if (I->getOpcode() != Alpha::BR && + I->getOpcode() != Alpha::COND_BRANCH_I && + I->getOpcode() != Alpha::COND_BRANCH_F) + return 0; + + // Remove the branch. + I->eraseFromParent(); + + I = MBB.end(); + + if (I == MBB.begin()) return 1; + --I; + if (I->getOpcode() != Alpha::COND_BRANCH_I && + I->getOpcode() != Alpha::COND_BRANCH_F) + return 1; + + // Remove the branch. + I->eraseFromParent(); + return 2; +} + +void AlphaInstrInfo::insertNoop(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const { + DebugLoc DL = DebugLoc::getUnknownLoc(); + if (MI != MBB.end()) DL = MI->getDebugLoc(); + BuildMI(MBB, MI, DL, get(Alpha::BISr), Alpha::R31) + .addReg(Alpha::R31) + .addReg(Alpha::R31); +} + +bool AlphaInstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const { + if (MBB.empty()) return false; + + switch (MBB.back().getOpcode()) { + case Alpha::RETDAG: // Return. + case Alpha::RETDAGp: + case Alpha::BR: // Uncond branch. + case Alpha::JMP: // Indirect branch. + return true; + default: return false; + } +} +bool AlphaInstrInfo:: +ReverseBranchCondition(SmallVectorImpl &Cond) const { + assert(Cond.size() == 2 && "Invalid Alpha branch opcode!"); + Cond[0].setImm(AlphaRevCondCode(Cond[0].getImm())); + return false; +} + diff --git a/lib/Target/Alpha/AlphaInstrInfo.h b/lib/Target/Alpha/AlphaInstrInfo.h new file mode 100644 index 000000000000..182aa32f447a --- /dev/null +++ b/lib/Target/Alpha/AlphaInstrInfo.h @@ -0,0 +1,97 @@ +//===- AlphaInstrInfo.h - Alpha Instruction Information ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Alpha implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef ALPHAINSTRUCTIONINFO_H +#define ALPHAINSTRUCTIONINFO_H + +#include "llvm/Target/TargetInstrInfo.h" +#include "AlphaRegisterInfo.h" + +namespace llvm { + +class AlphaInstrInfo : public TargetInstrInfoImpl { + const AlphaRegisterInfo RI; +public: + AlphaInstrInfo(); + + /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As + /// such, whenever a client has an instance of instruction info, it should + /// always be able to get register info as well (through this method). + /// + virtual const AlphaRegisterInfo &getRegisterInfo() const { return RI; } + + /// Return true if the instruction is a register to register move and return + /// the source and dest operands and their sub-register indices by reference. + virtual bool isMoveInstr(const MachineInstr &MI, + unsigned &SrcReg, unsigned &DstReg, + unsigned &SrcSubIdx, unsigned &DstSubIdx) const; + + virtual unsigned isLoadFromStackSlot(const MachineInstr *MI, + int &FrameIndex) const; + virtual unsigned isStoreToStackSlot(const MachineInstr *MI, + int &FrameIndex) const; + + virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + const SmallVectorImpl &Cond) const; + virtual bool copyRegToReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned DestReg, unsigned SrcReg, + const TargetRegisterClass *DestRC, + const TargetRegisterClass *SrcRC) const; + virtual void storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned SrcReg, bool isKill, int FrameIndex, + const TargetRegisterClass *RC) const; + + virtual void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill, + SmallVectorImpl &Addr, + const TargetRegisterClass *RC, + SmallVectorImpl &NewMIs) const; + + virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned DestReg, int FrameIndex, + const TargetRegisterClass *RC) const; + + virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg, + SmallVectorImpl &Addr, + const TargetRegisterClass *RC, + SmallVectorImpl &NewMIs) const; + + virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr* MI, + const SmallVectorImpl &Ops, + int FrameIndex) const; + + virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr* MI, + const SmallVectorImpl &Ops, + MachineInstr* LoadMI) const { + return 0; + } + + bool AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl &Cond, + bool AllowModify) const; + unsigned RemoveBranch(MachineBasicBlock &MBB) const; + void insertNoop(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const; + bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const; + bool ReverseBranchCondition(SmallVectorImpl &Cond) const; +}; + +} + +#endif diff --git a/lib/Target/Alpha/AlphaInstrInfo.td b/lib/Target/Alpha/AlphaInstrInfo.td new file mode 100644 index 000000000000..e73bdf9f6e91 --- /dev/null +++ b/lib/Target/Alpha/AlphaInstrInfo.td @@ -0,0 +1,1137 @@ +//===- AlphaInstrInfo.td - The Alpha Instruction Set -------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// +//===----------------------------------------------------------------------===// + +include "AlphaInstrFormats.td" + +//******************** +//Custom DAG Nodes +//******************** + +def SDTFPUnaryOpUnC : SDTypeProfile<1, 1, [ + SDTCisFP<1>, SDTCisFP<0> +]>; +def Alpha_cvtqt : SDNode<"AlphaISD::CVTQT_", SDTFPUnaryOpUnC, []>; +def Alpha_cvtqs : SDNode<"AlphaISD::CVTQS_", SDTFPUnaryOpUnC, []>; +def Alpha_cvttq : SDNode<"AlphaISD::CVTTQ_" , SDTFPUnaryOp, []>; +def Alpha_gprello : SDNode<"AlphaISD::GPRelLo", SDTIntBinOp, []>; +def Alpha_gprelhi : SDNode<"AlphaISD::GPRelHi", SDTIntBinOp, []>; +def Alpha_rellit : SDNode<"AlphaISD::RelLit", SDTIntBinOp, [SDNPMayLoad]>; + +def retflag : SDNode<"AlphaISD::RET_FLAG", SDTNone, + [SDNPHasChain, SDNPOptInFlag]>; + +// These are target-independent nodes, but have target-specific formats. +def SDT_AlphaCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i64> ]>; +def SDT_AlphaCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i64>, + SDTCisVT<1, i64> ]>; + +def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_AlphaCallSeqStart, + [SDNPHasChain, SDNPOutFlag]>; +def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_AlphaCallSeqEnd, + [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; + +//******************** +//Paterns for matching +//******************** +def invX : SDNodeXFormgetZExtValue()); +}]>; +def negX : SDNodeXFormgetZExtValue() + 1); +}]>; +def SExt32 : SDNodeXFormgetZExtValue() << 32) >> 32); +}]>; +def SExt16 : SDNodeXFormgetZExtValue() << 48) >> 48); +}]>; +def LL16 : SDNodeXFormgetZExtValue())); +}]>; +def LH16 : SDNodeXFormgetZExtValue())); +}]>; +def iZAPX : SDNodeXForm(N->getOperand(1)); + return getI64Imm(get_zapImm(SDValue(), RHS->getZExtValue())); +}]>; +def nearP2X : SDNodeXFormgetZExtValue()))); +}]>; +def nearP2RemX : SDNodeXFormgetZExtValue() - getNearPower2((uint64_t)N->getZExtValue())); + return getI64Imm(Log2_64(x)); +}]>; + +def immUExt8 : PatLeaf<(imm), [{ //imm fits in 8 bit zero extended field + return (uint64_t)N->getZExtValue() == (uint8_t)N->getZExtValue(); +}]>; +def immUExt8inv : PatLeaf<(imm), [{ //inverted imm fits in 8 bit zero extended field + return (uint64_t)~N->getZExtValue() == (uint8_t)~N->getZExtValue(); +}], invX>; +def immUExt8neg : PatLeaf<(imm), [{ //negated imm fits in 8 bit zero extended field + return ((uint64_t)~N->getZExtValue() + 1) == + (uint8_t)((uint64_t)~N->getZExtValue() + 1); +}], negX>; +def immSExt16 : PatLeaf<(imm), [{ //imm fits in 16 bit sign extended field + return ((int64_t)N->getZExtValue() << 48) >> 48 == + (int64_t)N->getZExtValue(); +}]>; +def immSExt16int : PatLeaf<(imm), [{ //(int)imm fits in a 16 bit sign extended field + return ((int64_t)N->getZExtValue() << 48) >> 48 == + ((int64_t)N->getZExtValue() << 32) >> 32; +}], SExt16>; + +def zappat : PatFrag<(ops node:$LHS), (and node:$LHS, imm:$L), [{ + ConstantSDNode *RHS = dyn_cast(N->getOperand(1)); + if (!RHS) return 0; + uint64_t build = get_zapImm(N->getOperand(0), (uint64_t)RHS->getZExtValue()); + return build != 0; +}]>; + +def immFPZ : PatLeaf<(fpimm), [{ //the only fpconstant nodes are +/- 0.0 + (void)N; // silence warning. + return true; +}]>; + +def immRem1 :PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),1,0);}]>; +def immRem2 :PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),2,0);}]>; +def immRem3 :PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),3,0);}]>; +def immRem4 :PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),4,0);}]>; +def immRem5 :PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),5,0);}]>; +def immRem1n:PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),1,1);}]>; +def immRem2n:PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),2,1);}]>; +def immRem3n:PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),3,1);}]>; +def immRem4n:PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),4,1);}]>; +def immRem5n:PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),5,1);}]>; + +def immRemP2n : PatLeaf<(imm), [{ + return isPowerOf2_64(getNearPower2((uint64_t)N->getZExtValue()) - + N->getZExtValue()); +}]>; +def immRemP2 : PatLeaf<(imm), [{ + return isPowerOf2_64(N->getZExtValue() - + getNearPower2((uint64_t)N->getZExtValue())); +}]>; +def immUExt8ME : PatLeaf<(imm), [{ //use this imm for mulqi + int64_t d = abs64((int64_t)N->getZExtValue() - + (int64_t)getNearPower2((uint64_t)N->getZExtValue())); + if (isPowerOf2_64(d)) return false; + switch (d) { + case 1: case 3: case 5: return false; + default: return (uint64_t)N->getZExtValue() == (uint8_t)N->getZExtValue(); + }; +}]>; + +def intop : PatFrag<(ops node:$op), (sext_inreg node:$op, i32)>; +def add4 : PatFrag<(ops node:$op1, node:$op2), + (add (shl node:$op1, 2), node:$op2)>; +def sub4 : PatFrag<(ops node:$op1, node:$op2), + (sub (shl node:$op1, 2), node:$op2)>; +def add8 : PatFrag<(ops node:$op1, node:$op2), + (add (shl node:$op1, 3), node:$op2)>; +def sub8 : PatFrag<(ops node:$op1, node:$op2), + (sub (shl node:$op1, 3), node:$op2)>; +class BinOpFrag : PatFrag<(ops node:$LHS, node:$RHS), res>; +class CmpOpFrag : PatFrag<(ops node:$R), res>; + +//Pseudo ops for selection + +def WTF : PseudoInstAlpha<(outs), (ins variable_ops), "#wtf", [], s_pseudo>; + +let hasCtrlDep = 1, Defs = [R30], Uses = [R30] in { +def ADJUSTSTACKUP : PseudoInstAlpha<(outs), (ins s64imm:$amt), + "; ADJUP $amt", + [(callseq_start timm:$amt)], s_pseudo>; +def ADJUSTSTACKDOWN : PseudoInstAlpha<(outs), (ins s64imm:$amt1, s64imm:$amt2), + "; ADJDOWN $amt1", + [(callseq_end timm:$amt1, timm:$amt2)], s_pseudo>; +} + +def ALTENT : PseudoInstAlpha<(outs), (ins s64imm:$TARGET), "$$$TARGET..ng:\n", [], s_pseudo>; +def PCLABEL : PseudoInstAlpha<(outs), (ins s64imm:$num), "PCMARKER_$num:\n",[], s_pseudo>; +def MEMLABEL : PseudoInstAlpha<(outs), (ins s64imm:$i, s64imm:$j, s64imm:$k, s64imm:$m), + "LSMARKER$$$i$$$j$$$k$$$m:", [], s_pseudo>; + + +let usesCustomDAGSchedInserter = 1 in { // Expanded by the scheduler. +def CAS32 : PseudoInstAlpha<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "", + [(set GPRC:$dst, (atomic_cmp_swap_32 GPRC:$ptr, GPRC:$cmp, GPRC:$swp))], s_pseudo>; +def CAS64 : PseudoInstAlpha<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "", + [(set GPRC:$dst, (atomic_cmp_swap_64 GPRC:$ptr, GPRC:$cmp, GPRC:$swp))], s_pseudo>; + +def LAS32 : PseudoInstAlpha<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$swp), "", + [(set GPRC:$dst, (atomic_load_add_32 GPRC:$ptr, GPRC:$swp))], s_pseudo>; +def LAS64 :PseudoInstAlpha<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$swp), "", + [(set GPRC:$dst, (atomic_load_add_64 GPRC:$ptr, GPRC:$swp))], s_pseudo>; + +def SWAP32 : PseudoInstAlpha<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$swp), "", + [(set GPRC:$dst, (atomic_swap_32 GPRC:$ptr, GPRC:$swp))], s_pseudo>; +def SWAP64 :PseudoInstAlpha<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$swp), "", + [(set GPRC:$dst, (atomic_swap_64 GPRC:$ptr, GPRC:$swp))], s_pseudo>; +} + +//*********************** +//Real instructions +//*********************** + +//Operation Form: + +//conditional moves, int + +multiclass cmov_inst fun, string asmstr, PatFrag OpNode> { +def r : OForm4<0x11, fun, !strconcat(asmstr, " $RCOND,$RTRUE,$RDEST"), + [(set GPRC:$RDEST, (select (OpNode GPRC:$RCOND), GPRC:$RTRUE, GPRC:$RFALSE))], s_cmov>; +def i : OForm4L<0x11, fun, !strconcat(asmstr, " $RCOND,$RTRUE,$RDEST"), + [(set GPRC:$RDEST, (select (OpNode GPRC:$RCOND), immUExt8:$RTRUE, GPRC:$RFALSE))], s_cmov>; +} + +defm CMOVEQ : cmov_inst<0x24, "cmoveq", CmpOpFrag<(seteq node:$R, 0)>>; +defm CMOVNE : cmov_inst<0x26, "cmovne", CmpOpFrag<(setne node:$R, 0)>>; +defm CMOVLT : cmov_inst<0x44, "cmovlt", CmpOpFrag<(setlt node:$R, 0)>>; +defm CMOVLE : cmov_inst<0x64, "cmovle", CmpOpFrag<(setle node:$R, 0)>>; +defm CMOVGT : cmov_inst<0x66, "cmovgt", CmpOpFrag<(setgt node:$R, 0)>>; +defm CMOVGE : cmov_inst<0x46, "cmovge", CmpOpFrag<(setge node:$R, 0)>>; +defm CMOVLBC : cmov_inst<0x16, "cmovlbc", CmpOpFrag<(xor node:$R, 1)>>; +defm CMOVLBS : cmov_inst<0x14, "cmovlbs", CmpOpFrag<(and node:$R, 1)>>; + +//General pattern for cmov +def : Pat<(select GPRC:$which, GPRC:$src1, GPRC:$src2), + (CMOVNEr GPRC:$src2, GPRC:$src1, GPRC:$which)>; +def : Pat<(select GPRC:$which, GPRC:$src1, immUExt8:$src2), + (CMOVEQi GPRC:$src1, immUExt8:$src2, GPRC:$which)>; + +//Invert sense when we can for constants: +def : Pat<(select (setne GPRC:$RCOND, 0), GPRC:$RTRUE, immUExt8:$RFALSE), + (CMOVEQi GPRC:$RCOND, immUExt8:$RFALSE, GPRC:$RTRUE)>; +def : Pat<(select (setgt GPRC:$RCOND, 0), GPRC:$RTRUE, immUExt8:$RFALSE), + (CMOVLEi GPRC:$RCOND, immUExt8:$RFALSE, GPRC:$RTRUE)>; +def : Pat<(select (setge GPRC:$RCOND, 0), GPRC:$RTRUE, immUExt8:$RFALSE), + (CMOVLTi GPRC:$RCOND, immUExt8:$RFALSE, GPRC:$RTRUE)>; +def : Pat<(select (setlt GPRC:$RCOND, 0), GPRC:$RTRUE, immUExt8:$RFALSE), + (CMOVGEi GPRC:$RCOND, immUExt8:$RFALSE, GPRC:$RTRUE)>; +def : Pat<(select (setle GPRC:$RCOND, 0), GPRC:$RTRUE, immUExt8:$RFALSE), + (CMOVGTi GPRC:$RCOND, immUExt8:$RFALSE, GPRC:$RTRUE)>; + +multiclass all_inst opc, bits<7> funl, bits<7> funq, + string asmstr, PatFrag OpNode, InstrItinClass itin> { + def Lr : OForm< opc, funl, !strconcat(asmstr, "l $RA,$RB,$RC"), + [(set GPRC:$RC, (intop (OpNode GPRC:$RA, GPRC:$RB)))], itin>; + def Li : OFormL; + def Qr : OForm< opc, funq, !strconcat(asmstr, "q $RA,$RB,$RC"), + [(set GPRC:$RC, (OpNode GPRC:$RA, GPRC:$RB))], itin>; + def Qi : OFormL; +} + +defm MUL : all_inst<0x13, 0x00, 0x20, "mul", BinOpFrag<(mul node:$LHS, node:$RHS)>, s_imul>; +defm ADD : all_inst<0x10, 0x00, 0x20, "add", BinOpFrag<(add node:$LHS, node:$RHS)>, s_iadd>; +defm S4ADD : all_inst<0x10, 0x02, 0x22, "s4add", add4, s_iadd>; +defm S8ADD : all_inst<0x10, 0x12, 0x32, "s8add", add8, s_iadd>; +defm S4SUB : all_inst<0x10, 0x0B, 0x2B, "s4sub", sub4, s_iadd>; +defm S8SUB : all_inst<0x10, 0x1B, 0x3B, "s8sub", sub8, s_iadd>; +defm SUB : all_inst<0x10, 0x09, 0x29, "sub", BinOpFrag<(sub node:$LHS, node:$RHS)>, s_iadd>; +//Const cases since legalize does sub x, int -> add x, inv(int) + 1 +def : Pat<(intop (add GPRC:$RA, immUExt8neg:$L)), (SUBLi GPRC:$RA, immUExt8neg:$L)>; +def : Pat<(add GPRC:$RA, immUExt8neg:$L), (SUBQi GPRC:$RA, immUExt8neg:$L)>; +def : Pat<(intop (add4 GPRC:$RA, immUExt8neg:$L)), (S4SUBLi GPRC:$RA, immUExt8neg:$L)>; +def : Pat<(add4 GPRC:$RA, immUExt8neg:$L), (S4SUBQi GPRC:$RA, immUExt8neg:$L)>; +def : Pat<(intop (add8 GPRC:$RA, immUExt8neg:$L)), (S8SUBLi GPRC:$RA, immUExt8neg:$L)>; +def : Pat<(add8 GPRC:$RA, immUExt8neg:$L), (S8SUBQi GPRC:$RA, immUExt8neg:$L)>; + +multiclass log_inst opc, bits<7> fun, string asmstr, SDNode OpNode, InstrItinClass itin> { +def r : OForm; +def i : OFormL; +} +multiclass inv_inst opc, bits<7> fun, string asmstr, SDNode OpNode, InstrItinClass itin> { +def r : OForm; +def i : OFormL; +} + +defm AND : log_inst<0x11, 0x00, "and", and, s_ilog>; +defm BIC : inv_inst<0x11, 0x08, "bic", and, s_ilog>; +defm BIS : log_inst<0x11, 0x20, "bis", or, s_ilog>; +defm ORNOT : inv_inst<0x11, 0x28, "ornot", or, s_ilog>; +defm XOR : log_inst<0x11, 0x40, "xor", xor, s_ilog>; +defm EQV : inv_inst<0x11, 0x48, "eqv", xor, s_ilog>; + +defm SL : log_inst<0x12, 0x39, "sll", shl, s_ishf>; +defm SRA : log_inst<0x12, 0x3c, "sra", sra, s_ishf>; +defm SRL : log_inst<0x12, 0x34, "srl", srl, s_ishf>; +defm UMULH : log_inst<0x13, 0x30, "umulh", mulhu, s_imul>; + +def CTLZ : OForm2<0x1C, 0x32, "CTLZ $RB,$RC", + [(set GPRC:$RC, (ctlz GPRC:$RB))], s_imisc>; +def CTPOP : OForm2<0x1C, 0x30, "CTPOP $RB,$RC", + [(set GPRC:$RC, (ctpop GPRC:$RB))], s_imisc>; +def CTTZ : OForm2<0x1C, 0x33, "CTTZ $RB,$RC", + [(set GPRC:$RC, (cttz GPRC:$RB))], s_imisc>; +def EXTBL : OForm< 0x12, 0x06, "EXTBL $RA,$RB,$RC", + [(set GPRC:$RC, (and (srl GPRC:$RA, (shl GPRC:$RB, 3)), 255))], s_ishf>; +def EXTWL : OForm< 0x12, 0x16, "EXTWL $RA,$RB,$RC", + [(set GPRC:$RC, (and (srl GPRC:$RA, (shl GPRC:$RB, 3)), 65535))], s_ishf>; +def EXTLL : OForm< 0x12, 0x26, "EXTLL $RA,$RB,$RC", + [(set GPRC:$RC, (and (srl GPRC:$RA, (shl GPRC:$RB, 3)), 4294967295))], s_ishf>; +def SEXTB : OForm2<0x1C, 0x00, "sextb $RB,$RC", + [(set GPRC:$RC, (sext_inreg GPRC:$RB, i8))], s_ishf>; +def SEXTW : OForm2<0x1C, 0x01, "sextw $RB,$RC", + [(set GPRC:$RC, (sext_inreg GPRC:$RB, i16))], s_ishf>; + +//def EXTBLi : OFormL<0x12, 0x06, "EXTBL $RA,$L,$RC", []>; //Extract byte low +//def EXTLH : OForm< 0x12, 0x6A, "EXTLH $RA,$RB,$RC", []>; //Extract longword high +//def EXTLHi : OFormL<0x12, 0x6A, "EXTLH $RA,$L,$RC", []>; //Extract longword high +//def EXTLLi : OFormL<0x12, 0x26, "EXTLL $RA,$L,$RC", []>; //Extract longword low +//def EXTQH : OForm< 0x12, 0x7A, "EXTQH $RA,$RB,$RC", []>; //Extract quadword high +//def EXTQHi : OFormL<0x12, 0x7A, "EXTQH $RA,$L,$RC", []>; //Extract quadword high +//def EXTQ : OForm< 0x12, 0x36, "EXTQ $RA,$RB,$RC", []>; //Extract quadword low +//def EXTQi : OFormL<0x12, 0x36, "EXTQ $RA,$L,$RC", []>; //Extract quadword low +//def EXTWH : OForm< 0x12, 0x5A, "EXTWH $RA,$RB,$RC", []>; //Extract word high +//def EXTWHi : OFormL<0x12, 0x5A, "EXTWH $RA,$L,$RC", []>; //Extract word high +//def EXTWLi : OFormL<0x12, 0x16, "EXTWL $RA,$L,$RC", []>; //Extract word low + +//def INSBL : OForm< 0x12, 0x0B, "INSBL $RA,$RB,$RC", []>; //Insert byte low +//def INSBLi : OFormL<0x12, 0x0B, "INSBL $RA,$L,$RC", []>; //Insert byte low +//def INSLH : OForm< 0x12, 0x67, "INSLH $RA,$RB,$RC", []>; //Insert longword high +//def INSLHi : OFormL<0x12, 0x67, "INSLH $RA,$L,$RC", []>; //Insert longword high +//def INSLL : OForm< 0x12, 0x2B, "INSLL $RA,$RB,$RC", []>; //Insert longword low +//def INSLLi : OFormL<0x12, 0x2B, "INSLL $RA,$L,$RC", []>; //Insert longword low +//def INSQH : OForm< 0x12, 0x77, "INSQH $RA,$RB,$RC", []>; //Insert quadword high +//def INSQHi : OFormL<0x12, 0x77, "INSQH $RA,$L,$RC", []>; //Insert quadword high +//def INSQL : OForm< 0x12, 0x3B, "INSQL $RA,$RB,$RC", []>; //Insert quadword low +//def INSQLi : OFormL<0x12, 0x3B, "INSQL $RA,$L,$RC", []>; //Insert quadword low +//def INSWH : OForm< 0x12, 0x57, "INSWH $RA,$RB,$RC", []>; //Insert word high +//def INSWHi : OFormL<0x12, 0x57, "INSWH $RA,$L,$RC", []>; //Insert word high +//def INSWL : OForm< 0x12, 0x1B, "INSWL $RA,$RB,$RC", []>; //Insert word low +//def INSWLi : OFormL<0x12, 0x1B, "INSWL $RA,$L,$RC", []>; //Insert word low + +//def MSKBL : OForm< 0x12, 0x02, "MSKBL $RA,$RB,$RC", []>; //Mask byte low +//def MSKBLi : OFormL<0x12, 0x02, "MSKBL $RA,$L,$RC", []>; //Mask byte low +//def MSKLH : OForm< 0x12, 0x62, "MSKLH $RA,$RB,$RC", []>; //Mask longword high +//def MSKLHi : OFormL<0x12, 0x62, "MSKLH $RA,$L,$RC", []>; //Mask longword high +//def MSKLL : OForm< 0x12, 0x22, "MSKLL $RA,$RB,$RC", []>; //Mask longword low +//def MSKLLi : OFormL<0x12, 0x22, "MSKLL $RA,$L,$RC", []>; //Mask longword low +//def MSKQH : OForm< 0x12, 0x72, "MSKQH $RA,$RB,$RC", []>; //Mask quadword high +//def MSKQHi : OFormL<0x12, 0x72, "MSKQH $RA,$L,$RC", []>; //Mask quadword high +//def MSKQL : OForm< 0x12, 0x32, "MSKQL $RA,$RB,$RC", []>; //Mask quadword low +//def MSKQLi : OFormL<0x12, 0x32, "MSKQL $RA,$L,$RC", []>; //Mask quadword low +//def MSKWH : OForm< 0x12, 0x52, "MSKWH $RA,$RB,$RC", []>; //Mask word high +//def MSKWHi : OFormL<0x12, 0x52, "MSKWH $RA,$L,$RC", []>; //Mask word high +//def MSKWL : OForm< 0x12, 0x12, "MSKWL $RA,$RB,$RC", []>; //Mask word low +//def MSKWLi : OFormL<0x12, 0x12, "MSKWL $RA,$L,$RC", []>; //Mask word low + +def ZAPNOTi : OFormL<0x12, 0x31, "zapnot $RA,$L,$RC", [], s_ishf>; + +// Define the pattern that produces ZAPNOTi. +def : Pat<(zappat:$imm GPRC:$RA), + (ZAPNOTi GPRC:$RA, (iZAPX GPRC:$imm))>; + + +//Comparison, int +//So this is a waste of what this instruction can do, but it still saves something +def CMPBGE : OForm< 0x10, 0x0F, "cmpbge $RA,$RB,$RC", + [(set GPRC:$RC, (setuge (and GPRC:$RA, 255), (and GPRC:$RB, 255)))], s_ilog>; +def CMPBGEi : OFormL<0x10, 0x0F, "cmpbge $RA,$L,$RC", + [(set GPRC:$RC, (setuge (and GPRC:$RA, 255), immUExt8:$L))], s_ilog>; +def CMPEQ : OForm< 0x10, 0x2D, "cmpeq $RA,$RB,$RC", + [(set GPRC:$RC, (seteq GPRC:$RA, GPRC:$RB))], s_iadd>; +def CMPEQi : OFormL<0x10, 0x2D, "cmpeq $RA,$L,$RC", + [(set GPRC:$RC, (seteq GPRC:$RA, immUExt8:$L))], s_iadd>; +def CMPLE : OForm< 0x10, 0x6D, "cmple $RA,$RB,$RC", + [(set GPRC:$RC, (setle GPRC:$RA, GPRC:$RB))], s_iadd>; +def CMPLEi : OFormL<0x10, 0x6D, "cmple $RA,$L,$RC", + [(set GPRC:$RC, (setle GPRC:$RA, immUExt8:$L))], s_iadd>; +def CMPLT : OForm< 0x10, 0x4D, "cmplt $RA,$RB,$RC", + [(set GPRC:$RC, (setlt GPRC:$RA, GPRC:$RB))], s_iadd>; +def CMPLTi : OFormL<0x10, 0x4D, "cmplt $RA,$L,$RC", + [(set GPRC:$RC, (setlt GPRC:$RA, immUExt8:$L))], s_iadd>; +def CMPULE : OForm< 0x10, 0x3D, "cmpule $RA,$RB,$RC", + [(set GPRC:$RC, (setule GPRC:$RA, GPRC:$RB))], s_iadd>; +def CMPULEi : OFormL<0x10, 0x3D, "cmpule $RA,$L,$RC", + [(set GPRC:$RC, (setule GPRC:$RA, immUExt8:$L))], s_iadd>; +def CMPULT : OForm< 0x10, 0x1D, "cmpult $RA,$RB,$RC", + [(set GPRC:$RC, (setult GPRC:$RA, GPRC:$RB))], s_iadd>; +def CMPULTi : OFormL<0x10, 0x1D, "cmpult $RA,$L,$RC", + [(set GPRC:$RC, (setult GPRC:$RA, immUExt8:$L))], s_iadd>; + +//Patterns for unsupported int comparisons +def : Pat<(setueq GPRC:$X, GPRC:$Y), (CMPEQ GPRC:$X, GPRC:$Y)>; +def : Pat<(setueq GPRC:$X, immUExt8:$Y), (CMPEQi GPRC:$X, immUExt8:$Y)>; + +def : Pat<(setugt GPRC:$X, GPRC:$Y), (CMPULT GPRC:$Y, GPRC:$X)>; +def : Pat<(setugt immUExt8:$X, GPRC:$Y), (CMPULTi GPRC:$Y, immUExt8:$X)>; + +def : Pat<(setuge GPRC:$X, GPRC:$Y), (CMPULE GPRC:$Y, GPRC:$X)>; +def : Pat<(setuge immUExt8:$X, GPRC:$Y), (CMPULEi GPRC:$Y, immUExt8:$X)>; + +def : Pat<(setgt GPRC:$X, GPRC:$Y), (CMPLT GPRC:$Y, GPRC:$X)>; +def : Pat<(setgt immUExt8:$X, GPRC:$Y), (CMPLTi GPRC:$Y, immUExt8:$X)>; + +def : Pat<(setge GPRC:$X, GPRC:$Y), (CMPLE GPRC:$Y, GPRC:$X)>; +def : Pat<(setge immUExt8:$X, GPRC:$Y), (CMPLEi GPRC:$Y, immUExt8:$X)>; + +def : Pat<(setne GPRC:$X, GPRC:$Y), (CMPEQi (CMPEQ GPRC:$X, GPRC:$Y), 0)>; +def : Pat<(setne GPRC:$X, immUExt8:$Y), (CMPEQi (CMPEQi GPRC:$X, immUExt8:$Y), 0)>; + +def : Pat<(setune GPRC:$X, GPRC:$Y), (CMPEQi (CMPEQ GPRC:$X, GPRC:$Y), 0)>; +def : Pat<(setune GPRC:$X, immUExt8:$Y), (CMPEQi (CMPEQ GPRC:$X, immUExt8:$Y), 0)>; + + +let isReturn = 1, isTerminator = 1, Ra = 31, Rb = 26, disp = 1, Uses = [R26] in { + def RETDAG : MbrForm< 0x1A, 0x02, (ops), "ret $$31,($$26),1", s_jsr>; //Return from subroutine + def RETDAGp : MbrpForm< 0x1A, 0x02, (ops), "ret $$31,($$26),1", [(retflag)], s_jsr>; //Return from subroutine +} + +let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1, Ra = 31, disp = 0 in +def JMP : MbrpForm< 0x1A, 0x00, (ops GPRC:$RS), "jmp $$31,($RS),0", + [(brind GPRC:$RS)], s_jsr>; //Jump + +let isCall = 1, Ra = 26, + Defs = [R0, R1, R2, R3, R4, R5, R6, R7, R8, R16, R17, R18, R19, + R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, + F0, F1, + F10, F11, F12, F13, F14, F15, F16, F17, F18, F19, + F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F30], Uses = [R29] in { + def BSR : BFormD<0x34, "bsr $$26,$$$DISP..ng", [], s_jsr>; //Branch to subroutine +} +let isCall = 1, Ra = 26, Rb = 27, disp = 0, + Defs = [R0, R1, R2, R3, R4, R5, R6, R7, R8, R16, R17, R18, R19, + R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, + F0, F1, + F10, F11, F12, F13, F14, F15, F16, F17, F18, F19, + F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F30], Uses = [R27, R29] in { + def JSR : MbrForm< 0x1A, 0x01, (ops ), "jsr $$26,($$27),0", s_jsr>; //Jump to subroutine +} + +let isCall = 1, Ra = 23, Rb = 27, disp = 0, + Defs = [R23, R24, R25, R27, R28], Uses = [R24, R25, R27] in + def JSRs : MbrForm< 0x1A, 0x01, (ops ), "jsr $$23,($$27),0", s_jsr>; //Jump to div or rem + + +def JSR_COROUTINE : MbrForm< 0x1A, 0x03, (ops GPRC:$RD, GPRC:$RS, s14imm:$DISP), "jsr_coroutine $RD,($RS),$DISP", s_jsr>; //Jump to subroutine return + + +let OutOperandList = (ops GPRC:$RA), InOperandList = (ops s64imm:$DISP, GPRC:$RB) in { +def LDQ : MForm<0x29, 1, "ldq $RA,$DISP($RB)", + [(set GPRC:$RA, (load (add GPRC:$RB, immSExt16:$DISP)))], s_ild>; +def LDQr : MForm<0x29, 1, "ldq $RA,$DISP($RB)\t\t!gprellow", + [(set GPRC:$RA, (load (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB)))], s_ild>; +def LDL : MForm<0x28, 1, "ldl $RA,$DISP($RB)", + [(set GPRC:$RA, (sextloadi32 (add GPRC:$RB, immSExt16:$DISP)))], s_ild>; +def LDLr : MForm<0x28, 1, "ldl $RA,$DISP($RB)\t\t!gprellow", + [(set GPRC:$RA, (sextloadi32 (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB)))], s_ild>; +def LDBU : MForm<0x0A, 1, "ldbu $RA,$DISP($RB)", + [(set GPRC:$RA, (zextloadi8 (add GPRC:$RB, immSExt16:$DISP)))], s_ild>; +def LDBUr : MForm<0x0A, 1, "ldbu $RA,$DISP($RB)\t\t!gprellow", + [(set GPRC:$RA, (zextloadi8 (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB)))], s_ild>; +def LDWU : MForm<0x0C, 1, "ldwu $RA,$DISP($RB)", + [(set GPRC:$RA, (zextloadi16 (add GPRC:$RB, immSExt16:$DISP)))], s_ild>; +def LDWUr : MForm<0x0C, 1, "ldwu $RA,$DISP($RB)\t\t!gprellow", + [(set GPRC:$RA, (zextloadi16 (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB)))], s_ild>; +} + + +let OutOperandList = (ops), InOperandList = (ops GPRC:$RA, s64imm:$DISP, GPRC:$RB) in { +def STB : MForm<0x0E, 0, "stb $RA,$DISP($RB)", + [(truncstorei8 GPRC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_ist>; +def STBr : MForm<0x0E, 0, "stb $RA,$DISP($RB)\t\t!gprellow", + [(truncstorei8 GPRC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_ist>; +def STW : MForm<0x0D, 0, "stw $RA,$DISP($RB)", + [(truncstorei16 GPRC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_ist>; +def STWr : MForm<0x0D, 0, "stw $RA,$DISP($RB)\t\t!gprellow", + [(truncstorei16 GPRC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_ist>; +def STL : MForm<0x2C, 0, "stl $RA,$DISP($RB)", + [(truncstorei32 GPRC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_ist>; +def STLr : MForm<0x2C, 0, "stl $RA,$DISP($RB)\t\t!gprellow", + [(truncstorei32 GPRC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_ist>; +def STQ : MForm<0x2D, 0, "stq $RA,$DISP($RB)", + [(store GPRC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_ist>; +def STQr : MForm<0x2D, 0, "stq $RA,$DISP($RB)\t\t!gprellow", + [(store GPRC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_ist>; +} + +//Load address +let OutOperandList = (ops GPRC:$RA), InOperandList = (ops s64imm:$DISP, GPRC:$RB) in { +def LDA : MForm<0x08, 0, "lda $RA,$DISP($RB)", + [(set GPRC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_lda>; +def LDAr : MForm<0x08, 0, "lda $RA,$DISP($RB)\t\t!gprellow", + [(set GPRC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_lda>; //Load address +def LDAH : MForm<0x09, 0, "ldah $RA,$DISP($RB)", + [], s_lda>; //Load address high +def LDAHr : MForm<0x09, 0, "ldah $RA,$DISP($RB)\t\t!gprelhigh", + [(set GPRC:$RA, (Alpha_gprelhi tglobaladdr:$DISP, GPRC:$RB))], s_lda>; //Load address high +} + +let OutOperandList = (ops), InOperandList = (ops F4RC:$RA, s64imm:$DISP, GPRC:$RB) in { +def STS : MForm<0x26, 0, "sts $RA,$DISP($RB)", + [(store F4RC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_fst>; +def STSr : MForm<0x26, 0, "sts $RA,$DISP($RB)\t\t!gprellow", + [(store F4RC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_fst>; +} +let OutOperandList = (ops F4RC:$RA), InOperandList = (ops s64imm:$DISP, GPRC:$RB) in { +def LDS : MForm<0x22, 1, "lds $RA,$DISP($RB)", + [(set F4RC:$RA, (load (add GPRC:$RB, immSExt16:$DISP)))], s_fld>; +def LDSr : MForm<0x22, 1, "lds $RA,$DISP($RB)\t\t!gprellow", + [(set F4RC:$RA, (load (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB)))], s_fld>; +} +let OutOperandList = (ops), InOperandList = (ops F8RC:$RA, s64imm:$DISP, GPRC:$RB) in { +def STT : MForm<0x27, 0, "stt $RA,$DISP($RB)", + [(store F8RC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_fst>; +def STTr : MForm<0x27, 0, "stt $RA,$DISP($RB)\t\t!gprellow", + [(store F8RC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_fst>; +} +let OutOperandList = (ops F8RC:$RA), InOperandList = (ops s64imm:$DISP, GPRC:$RB) in { +def LDT : MForm<0x23, 1, "ldt $RA,$DISP($RB)", + [(set F8RC:$RA, (load (add GPRC:$RB, immSExt16:$DISP)))], s_fld>; +def LDTr : MForm<0x23, 1, "ldt $RA,$DISP($RB)\t\t!gprellow", + [(set F8RC:$RA, (load (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB)))], s_fld>; +} + + +//constpool rels +def : Pat<(i64 (load (Alpha_gprello tconstpool:$DISP, GPRC:$RB))), + (LDQr tconstpool:$DISP, GPRC:$RB)>; +def : Pat<(i64 (sextloadi32 (Alpha_gprello tconstpool:$DISP, GPRC:$RB))), + (LDLr tconstpool:$DISP, GPRC:$RB)>; +def : Pat<(i64 (zextloadi8 (Alpha_gprello tconstpool:$DISP, GPRC:$RB))), + (LDBUr tconstpool:$DISP, GPRC:$RB)>; +def : Pat<(i64 (zextloadi16 (Alpha_gprello tconstpool:$DISP, GPRC:$RB))), + (LDWUr tconstpool:$DISP, GPRC:$RB)>; +def : Pat<(i64 (Alpha_gprello tconstpool:$DISP, GPRC:$RB)), + (LDAr tconstpool:$DISP, GPRC:$RB)>; +def : Pat<(i64 (Alpha_gprelhi tconstpool:$DISP, GPRC:$RB)), + (LDAHr tconstpool:$DISP, GPRC:$RB)>; +def : Pat<(f32 (load (Alpha_gprello tconstpool:$DISP, GPRC:$RB))), + (LDSr tconstpool:$DISP, GPRC:$RB)>; +def : Pat<(f64 (load (Alpha_gprello tconstpool:$DISP, GPRC:$RB))), + (LDTr tconstpool:$DISP, GPRC:$RB)>; + +//jumptable rels +def : Pat<(i64 (Alpha_gprelhi tjumptable:$DISP, GPRC:$RB)), + (LDAHr tjumptable:$DISP, GPRC:$RB)>; +def : Pat<(i64 (Alpha_gprello tjumptable:$DISP, GPRC:$RB)), + (LDAr tjumptable:$DISP, GPRC:$RB)>; + + +//misc ext patterns +def : Pat<(i64 (extloadi8 (add GPRC:$RB, immSExt16:$DISP))), + (LDBU immSExt16:$DISP, GPRC:$RB)>; +def : Pat<(i64 (extloadi16 (add GPRC:$RB, immSExt16:$DISP))), + (LDWU immSExt16:$DISP, GPRC:$RB)>; +def : Pat<(i64 (extloadi32 (add GPRC:$RB, immSExt16:$DISP))), + (LDL immSExt16:$DISP, GPRC:$RB)>; + +//0 disp patterns +def : Pat<(i64 (load GPRC:$addr)), + (LDQ 0, GPRC:$addr)>; +def : Pat<(f64 (load GPRC:$addr)), + (LDT 0, GPRC:$addr)>; +def : Pat<(f32 (load GPRC:$addr)), + (LDS 0, GPRC:$addr)>; +def : Pat<(i64 (sextloadi32 GPRC:$addr)), + (LDL 0, GPRC:$addr)>; +def : Pat<(i64 (zextloadi16 GPRC:$addr)), + (LDWU 0, GPRC:$addr)>; +def : Pat<(i64 (zextloadi8 GPRC:$addr)), + (LDBU 0, GPRC:$addr)>; +def : Pat<(i64 (extloadi8 GPRC:$addr)), + (LDBU 0, GPRC:$addr)>; +def : Pat<(i64 (extloadi16 GPRC:$addr)), + (LDWU 0, GPRC:$addr)>; +def : Pat<(i64 (extloadi32 GPRC:$addr)), + (LDL 0, GPRC:$addr)>; + +def : Pat<(store GPRC:$DATA, GPRC:$addr), + (STQ GPRC:$DATA, 0, GPRC:$addr)>; +def : Pat<(store F8RC:$DATA, GPRC:$addr), + (STT F8RC:$DATA, 0, GPRC:$addr)>; +def : Pat<(store F4RC:$DATA, GPRC:$addr), + (STS F4RC:$DATA, 0, GPRC:$addr)>; +def : Pat<(truncstorei32 GPRC:$DATA, GPRC:$addr), + (STL GPRC:$DATA, 0, GPRC:$addr)>; +def : Pat<(truncstorei16 GPRC:$DATA, GPRC:$addr), + (STW GPRC:$DATA, 0, GPRC:$addr)>; +def : Pat<(truncstorei8 GPRC:$DATA, GPRC:$addr), + (STB GPRC:$DATA, 0, GPRC:$addr)>; + + +//load address, rellocated gpdist form +let OutOperandList = (ops GPRC:$RA), + InOperandList = (ops s16imm:$DISP, GPRC:$RB, s16imm:$NUM), + mayLoad = 1 in { +def LDAg : MForm<0x08, 1, "lda $RA,0($RB)\t\t!gpdisp!$NUM", [], s_lda>; //Load address +def LDAHg : MForm<0x09, 1, "ldah $RA,0($RB)\t\t!gpdisp!$NUM", [], s_lda>; //Load address +} + +//Load quad, rellocated literal form +let OutOperandList = (ops GPRC:$RA), InOperandList = (ops s64imm:$DISP, GPRC:$RB) in +def LDQl : MForm<0x29, 1, "ldq $RA,$DISP($RB)\t\t!literal", + [(set GPRC:$RA, (Alpha_rellit tglobaladdr:$DISP, GPRC:$RB))], s_ild>; +def : Pat<(Alpha_rellit texternalsym:$ext, GPRC:$RB), + (LDQl texternalsym:$ext, GPRC:$RB)>; + +let OutOperandList = (outs GPRC:$RR), + InOperandList = (ins GPRC:$RA, s64imm:$DISP, GPRC:$RB), + Constraints = "$RA = $RR", + DisableEncoding = "$RR" in { +def STQ_C : MForm<0x2F, 0, "stq_l $RA,$DISP($RB)", [], s_ist>; +def STL_C : MForm<0x2E, 0, "stl_l $RA,$DISP($RB)", [], s_ist>; +} +let OutOperandList = (ops GPRC:$RA), + InOperandList = (ops s64imm:$DISP, GPRC:$RB), + mayLoad = 1 in { +def LDQ_L : MForm<0x2B, 1, "ldq_l $RA,$DISP($RB)", [], s_ild>; +def LDL_L : MForm<0x2A, 1, "ldl_l $RA,$DISP($RB)", [], s_ild>; +} + +def RPCC : MfcForm<0x18, 0xC000, "rpcc $RA", s_rpcc>; //Read process cycle counter +def MB : MfcPForm<0x18, 0x4000, "mb", s_imisc>; //memory barrier +def WMB : MfcPForm<0x18, 0x4400, "wmb", s_imisc>; //write memory barrier + +def : Pat<(membarrier (i64 imm:$ll), (i64 imm:$ls), (i64 imm:$sl), (i64 1), (i64 imm:$dev)), + (WMB)>; +def : Pat<(membarrier (i64 imm:$ll), (i64 imm:$ls), (i64 imm:$sl), (i64 imm:$ss), (i64 imm:$dev)), + (MB)>; + +//Basic Floating point ops + +//Floats + +let OutOperandList = (ops F4RC:$RC), InOperandList = (ops F4RC:$RB), Fa = 31 in +def SQRTS : FPForm<0x14, 0x58B, "sqrts/su $RB,$RC", + [(set F4RC:$RC, (fsqrt F4RC:$RB))], s_fsqrts>; + +let OutOperandList = (ops F4RC:$RC), InOperandList = (ops F4RC:$RA, F4RC:$RB) in { +def ADDS : FPForm<0x16, 0x580, "adds/su $RA,$RB,$RC", + [(set F4RC:$RC, (fadd F4RC:$RA, F4RC:$RB))], s_fadd>; +def SUBS : FPForm<0x16, 0x581, "subs/su $RA,$RB,$RC", + [(set F4RC:$RC, (fsub F4RC:$RA, F4RC:$RB))], s_fadd>; +def DIVS : FPForm<0x16, 0x583, "divs/su $RA,$RB,$RC", + [(set F4RC:$RC, (fdiv F4RC:$RA, F4RC:$RB))], s_fdivs>; +def MULS : FPForm<0x16, 0x582, "muls/su $RA,$RB,$RC", + [(set F4RC:$RC, (fmul F4RC:$RA, F4RC:$RB))], s_fmul>; + +def CPYSS : FPForm<0x17, 0x020, "cpys $RA,$RB,$RC", + [(set F4RC:$RC, (fcopysign F4RC:$RB, F4RC:$RA))], s_fadd>; +def CPYSES : FPForm<0x17, 0x022, "cpyse $RA,$RB,$RC",[], s_fadd>; //Copy sign and exponent +def CPYSNS : FPForm<0x17, 0x021, "cpysn $RA,$RB,$RC", + [(set F4RC:$RC, (fneg (fcopysign F4RC:$RB, F4RC:$RA)))], s_fadd>; +} + +//Doubles + +let OutOperandList = (ops F8RC:$RC), InOperandList = (ops F8RC:$RB), Fa = 31 in +def SQRTT : FPForm<0x14, 0x5AB, "sqrtt/su $RB,$RC", + [(set F8RC:$RC, (fsqrt F8RC:$RB))], s_fsqrtt>; + +let OutOperandList = (ops F8RC:$RC), InOperandList = (ops F8RC:$RA, F8RC:$RB) in { +def ADDT : FPForm<0x16, 0x5A0, "addt/su $RA,$RB,$RC", + [(set F8RC:$RC, (fadd F8RC:$RA, F8RC:$RB))], s_fadd>; +def SUBT : FPForm<0x16, 0x5A1, "subt/su $RA,$RB,$RC", + [(set F8RC:$RC, (fsub F8RC:$RA, F8RC:$RB))], s_fadd>; +def DIVT : FPForm<0x16, 0x5A3, "divt/su $RA,$RB,$RC", + [(set F8RC:$RC, (fdiv F8RC:$RA, F8RC:$RB))], s_fdivt>; +def MULT : FPForm<0x16, 0x5A2, "mult/su $RA,$RB,$RC", + [(set F8RC:$RC, (fmul F8RC:$RA, F8RC:$RB))], s_fmul>; + +def CPYST : FPForm<0x17, 0x020, "cpys $RA,$RB,$RC", + [(set F8RC:$RC, (fcopysign F8RC:$RB, F8RC:$RA))], s_fadd>; +def CPYSET : FPForm<0x17, 0x022, "cpyse $RA,$RB,$RC",[], s_fadd>; //Copy sign and exponent +def CPYSNT : FPForm<0x17, 0x021, "cpysn $RA,$RB,$RC", + [(set F8RC:$RC, (fneg (fcopysign F8RC:$RB, F8RC:$RA)))], s_fadd>; + +def CMPTEQ : FPForm<0x16, 0x5A5, "cmpteq/su $RA,$RB,$RC", [], s_fadd>; +// [(set F8RC:$RC, (seteq F8RC:$RA, F8RC:$RB))]>; +def CMPTLE : FPForm<0x16, 0x5A7, "cmptle/su $RA,$RB,$RC", [], s_fadd>; +// [(set F8RC:$RC, (setle F8RC:$RA, F8RC:$RB))]>; +def CMPTLT : FPForm<0x16, 0x5A6, "cmptlt/su $RA,$RB,$RC", [], s_fadd>; +// [(set F8RC:$RC, (setlt F8RC:$RA, F8RC:$RB))]>; +def CMPTUN : FPForm<0x16, 0x5A4, "cmptun/su $RA,$RB,$RC", [], s_fadd>; +// [(set F8RC:$RC, (setuo F8RC:$RA, F8RC:$RB))]>; +} + +//More CPYS forms: +let OutOperandList = (ops F8RC:$RC), InOperandList = (ops F4RC:$RA, F8RC:$RB) in { +def CPYSTs : FPForm<0x17, 0x020, "cpys $RA,$RB,$RC", + [(set F8RC:$RC, (fcopysign F8RC:$RB, F4RC:$RA))], s_fadd>; +def CPYSNTs : FPForm<0x17, 0x021, "cpysn $RA,$RB,$RC", + [(set F8RC:$RC, (fneg (fcopysign F8RC:$RB, F4RC:$RA)))], s_fadd>; +} +let OutOperandList = (ops F4RC:$RC), InOperandList = (ops F8RC:$RA, F4RC:$RB) in { +def CPYSSt : FPForm<0x17, 0x020, "cpys $RA,$RB,$RC", + [(set F4RC:$RC, (fcopysign F4RC:$RB, F8RC:$RA))], s_fadd>; +def CPYSESt : FPForm<0x17, 0x022, "cpyse $RA,$RB,$RC",[], s_fadd>; //Copy sign and exponent +def CPYSNSt : FPForm<0x17, 0x021, "cpysn $RA,$RB,$RC", + [(set F4RC:$RC, (fneg (fcopysign F4RC:$RB, F8RC:$RA)))], s_fadd>; +} + +//conditional moves, floats +let OutOperandList = (ops F4RC:$RDEST), InOperandList = (ops F4RC:$RFALSE, F4RC:$RTRUE, F8RC:$RCOND), + isTwoAddress = 1 in { +def FCMOVEQS : FPForm<0x17, 0x02A, "fcmoveq $RCOND,$RTRUE,$RDEST",[], s_fcmov>; //FCMOVE if = zero +def FCMOVGES : FPForm<0x17, 0x02D, "fcmovge $RCOND,$RTRUE,$RDEST",[], s_fcmov>; //FCMOVE if >= zero +def FCMOVGTS : FPForm<0x17, 0x02F, "fcmovgt $RCOND,$RTRUE,$RDEST",[], s_fcmov>; //FCMOVE if > zero +def FCMOVLES : FPForm<0x17, 0x02E, "fcmovle $RCOND,$RTRUE,$RDEST",[], s_fcmov>; //FCMOVE if <= zero +def FCMOVLTS : FPForm<0x17, 0x02C, "fcmovlt $RCOND,$RTRUE,$RDEST",[], s_fcmov>; // FCMOVE if < zero +def FCMOVNES : FPForm<0x17, 0x02B, "fcmovne $RCOND,$RTRUE,$RDEST",[], s_fcmov>; //FCMOVE if != zero +} +//conditional moves, doubles +let OutOperandList = (ops F8RC:$RDEST), InOperandList = (ops F8RC:$RFALSE, F8RC:$RTRUE, F8RC:$RCOND), + isTwoAddress = 1 in { +def FCMOVEQT : FPForm<0x17, 0x02A, "fcmoveq $RCOND,$RTRUE,$RDEST", [], s_fcmov>; +def FCMOVGET : FPForm<0x17, 0x02D, "fcmovge $RCOND,$RTRUE,$RDEST", [], s_fcmov>; +def FCMOVGTT : FPForm<0x17, 0x02F, "fcmovgt $RCOND,$RTRUE,$RDEST", [], s_fcmov>; +def FCMOVLET : FPForm<0x17, 0x02E, "fcmovle $RCOND,$RTRUE,$RDEST", [], s_fcmov>; +def FCMOVLTT : FPForm<0x17, 0x02C, "fcmovlt $RCOND,$RTRUE,$RDEST", [], s_fcmov>; +def FCMOVNET : FPForm<0x17, 0x02B, "fcmovne $RCOND,$RTRUE,$RDEST", [], s_fcmov>; +} + +//misc FP selects +//Select double + +def : Pat<(select (seteq F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf), + (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>; +def : Pat<(select (setoeq F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf), + (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>; +def : Pat<(select (setueq F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf), + (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>; + +def : Pat<(select (setne F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf), + (FCMOVEQT F8RC:$sf, F8RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>; +def : Pat<(select (setone F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf), + (FCMOVEQT F8RC:$sf, F8RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>; +def : Pat<(select (setune F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf), + (FCMOVEQT F8RC:$sf, F8RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>; + +def : Pat<(select (setgt F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf), + (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLT F8RC:$RB, F8RC:$RA))>; +def : Pat<(select (setogt F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf), + (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLT F8RC:$RB, F8RC:$RA))>; +def : Pat<(select (setugt F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf), + (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLT F8RC:$RB, F8RC:$RA))>; + +def : Pat<(select (setge F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf), + (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLE F8RC:$RB, F8RC:$RA))>; +def : Pat<(select (setoge F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf), + (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLE F8RC:$RB, F8RC:$RA))>; +def : Pat<(select (setuge F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf), + (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLE F8RC:$RB, F8RC:$RA))>; + +def : Pat<(select (setlt F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf), + (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLT F8RC:$RA, F8RC:$RB))>; +def : Pat<(select (setolt F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf), + (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLT F8RC:$RA, F8RC:$RB))>; +def : Pat<(select (setult F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf), + (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLT F8RC:$RA, F8RC:$RB))>; + +def : Pat<(select (setle F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf), + (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLE F8RC:$RA, F8RC:$RB))>; +def : Pat<(select (setole F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf), + (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLE F8RC:$RA, F8RC:$RB))>; +def : Pat<(select (setule F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf), + (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLE F8RC:$RA, F8RC:$RB))>; + +//Select single +def : Pat<(select (seteq F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf), + (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>; +def : Pat<(select (setoeq F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf), + (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>; +def : Pat<(select (setueq F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf), + (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>; + +def : Pat<(select (setne F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf), + (FCMOVEQS F4RC:$sf, F4RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>; +def : Pat<(select (setone F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf), + (FCMOVEQS F4RC:$sf, F4RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>; +def : Pat<(select (setune F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf), + (FCMOVEQS F4RC:$sf, F4RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>; + +def : Pat<(select (setgt F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf), + (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLT F8RC:$RB, F8RC:$RA))>; +def : Pat<(select (setogt F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf), + (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLT F8RC:$RB, F8RC:$RA))>; +def : Pat<(select (setugt F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf), + (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLT F8RC:$RB, F8RC:$RA))>; + +def : Pat<(select (setge F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf), + (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLE F8RC:$RB, F8RC:$RA))>; +def : Pat<(select (setoge F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf), + (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLE F8RC:$RB, F8RC:$RA))>; +def : Pat<(select (setuge F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf), + (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLE F8RC:$RB, F8RC:$RA))>; + +def : Pat<(select (setlt F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf), + (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLT F8RC:$RA, F8RC:$RB))>; +def : Pat<(select (setolt F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf), + (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLT F8RC:$RA, F8RC:$RB))>; +def : Pat<(select (setult F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf), + (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLT F8RC:$RA, F8RC:$RB))>; + +def : Pat<(select (setle F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf), + (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLE F8RC:$RA, F8RC:$RB))>; +def : Pat<(select (setole F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf), + (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLE F8RC:$RA, F8RC:$RB))>; +def : Pat<(select (setule F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf), + (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLE F8RC:$RA, F8RC:$RB))>; + + + +let OutOperandList = (ops GPRC:$RC), InOperandList = (ops F4RC:$RA), Fb = 31 in +def FTOIS : FPForm<0x1C, 0x078, "ftois $RA,$RC",[], s_ftoi>; //Floating to integer move, S_floating +let OutOperandList = (ops GPRC:$RC), InOperandList = (ops F8RC:$RA), Fb = 31 in +def FTOIT : FPForm<0x1C, 0x070, "ftoit $RA,$RC", + [(set GPRC:$RC, (bitconvert F8RC:$RA))], s_ftoi>; //Floating to integer move +let OutOperandList = (ops F4RC:$RC), InOperandList = (ops GPRC:$RA), Fb = 31 in +def ITOFS : FPForm<0x14, 0x004, "itofs $RA,$RC",[], s_itof>; //Integer to floating move, S_floating +let OutOperandList = (ops F8RC:$RC), InOperandList = (ops GPRC:$RA), Fb = 31 in +def ITOFT : FPForm<0x14, 0x024, "itoft $RA,$RC", + [(set F8RC:$RC, (bitconvert GPRC:$RA))], s_itof>; //Integer to floating move + + +let OutOperandList = (ops F4RC:$RC), InOperandList = (ops F8RC:$RB), Fa = 31 in +def CVTQS : FPForm<0x16, 0x7BC, "cvtqs/sui $RB,$RC", + [(set F4RC:$RC, (Alpha_cvtqs F8RC:$RB))], s_fadd>; +let OutOperandList = (ops F8RC:$RC), InOperandList = (ops F8RC:$RB), Fa = 31 in +def CVTQT : FPForm<0x16, 0x7BE, "cvtqt/sui $RB,$RC", + [(set F8RC:$RC, (Alpha_cvtqt F8RC:$RB))], s_fadd>; +let OutOperandList = (ops F8RC:$RC), InOperandList = (ops F8RC:$RB), Fa = 31 in +def CVTTQ : FPForm<0x16, 0x52F, "cvttq/svc $RB,$RC", + [(set F8RC:$RC, (Alpha_cvttq F8RC:$RB))], s_fadd>; +let OutOperandList = (ops F8RC:$RC), InOperandList = (ops F4RC:$RB), Fa = 31 in +def CVTST : FPForm<0x16, 0x6AC, "cvtst/s $RB,$RC", + [(set F8RC:$RC, (fextend F4RC:$RB))], s_fadd>; +let OutOperandList = (ops F4RC:$RC), InOperandList = (ops F8RC:$RB), Fa = 31 in +def CVTTS : FPForm<0x16, 0x7AC, "cvtts/sui $RB,$RC", + [(set F4RC:$RC, (fround F8RC:$RB))], s_fadd>; + + +///////////////////////////////////////////////////////// +//Branching +///////////////////////////////////////////////////////// +class br_icc opc, string asmstr> + : BFormN; +class br_fcc opc, string asmstr> + : BFormN; + +let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in { +let Ra = 31 in +def BR : BFormD<0x30, "br $$31,$DISP", [(br bb:$DISP)], s_ubr>; + +def COND_BRANCH_I : BFormN<0, (ops u64imm:$opc, GPRC:$R, target:$dst), + "{:comment} COND_BRANCH imm:$opc, GPRC:$R, bb:$dst", + s_icbr>; +def COND_BRANCH_F : BFormN<0, (ops u64imm:$opc, F8RC:$R, target:$dst), + "{:comment} COND_BRANCH imm:$opc, F8RC:$R, bb:$dst", + s_fbr>; +//Branches, int +def BEQ : br_icc<0x39, "beq">; +def BGE : br_icc<0x3E, "bge">; +def BGT : br_icc<0x3F, "bgt">; +def BLBC : br_icc<0x38, "blbc">; +def BLBS : br_icc<0x3C, "blbs">; +def BLE : br_icc<0x3B, "ble">; +def BLT : br_icc<0x3A, "blt">; +def BNE : br_icc<0x3D, "bne">; + +//Branches, float +def FBEQ : br_fcc<0x31, "fbeq">; +def FBGE : br_fcc<0x36, "fbge">; +def FBGT : br_fcc<0x37, "fbgt">; +def FBLE : br_fcc<0x33, "fble">; +def FBLT : br_fcc<0x32, "fblt">; +def FBNE : br_fcc<0x36, "fbne">; +} + +//An ugly trick to get the opcode as an imm I can use +def immBRCond : SDNodeXFormgetZExtValue()) { + default: assert(0 && "Unknown branch type"); + case 0: return getI64Imm(Alpha::BEQ); + case 1: return getI64Imm(Alpha::BNE); + case 2: return getI64Imm(Alpha::BGE); + case 3: return getI64Imm(Alpha::BGT); + case 4: return getI64Imm(Alpha::BLE); + case 5: return getI64Imm(Alpha::BLT); + case 6: return getI64Imm(Alpha::BLBS); + case 7: return getI64Imm(Alpha::BLBC); + case 20: return getI64Imm(Alpha::FBEQ); + case 21: return getI64Imm(Alpha::FBNE); + case 22: return getI64Imm(Alpha::FBGE); + case 23: return getI64Imm(Alpha::FBGT); + case 24: return getI64Imm(Alpha::FBLE); + case 25: return getI64Imm(Alpha::FBLT); + } +}]>; + +//Int cond patterns +def : Pat<(brcond (seteq GPRC:$RA, 0), bb:$DISP), + (COND_BRANCH_I (immBRCond 0), GPRC:$RA, bb:$DISP)>; +def : Pat<(brcond (setge GPRC:$RA, 0), bb:$DISP), + (COND_BRANCH_I (immBRCond 2), GPRC:$RA, bb:$DISP)>; +def : Pat<(brcond (setgt GPRC:$RA, 0), bb:$DISP), + (COND_BRANCH_I (immBRCond 3), GPRC:$RA, bb:$DISP)>; +def : Pat<(brcond (and GPRC:$RA, 1), bb:$DISP), + (COND_BRANCH_I (immBRCond 6), GPRC:$RA, bb:$DISP)>; +def : Pat<(brcond (setle GPRC:$RA, 0), bb:$DISP), + (COND_BRANCH_I (immBRCond 4), GPRC:$RA, bb:$DISP)>; +def : Pat<(brcond (setlt GPRC:$RA, 0), bb:$DISP), + (COND_BRANCH_I (immBRCond 5), GPRC:$RA, bb:$DISP)>; +def : Pat<(brcond (setne GPRC:$RA, 0), bb:$DISP), + (COND_BRANCH_I (immBRCond 1), GPRC:$RA, bb:$DISP)>; + +def : Pat<(brcond GPRC:$RA, bb:$DISP), + (COND_BRANCH_I (immBRCond 1), GPRC:$RA, bb:$DISP)>; +def : Pat<(brcond (setne GPRC:$RA, GPRC:$RB), bb:$DISP), + (COND_BRANCH_I (immBRCond 0), (CMPEQ GPRC:$RA, GPRC:$RB), bb:$DISP)>; +def : Pat<(brcond (setne GPRC:$RA, immUExt8:$L), bb:$DISP), + (COND_BRANCH_I (immBRCond 0), (CMPEQi GPRC:$RA, immUExt8:$L), bb:$DISP)>; + +//FP cond patterns +def : Pat<(brcond (seteq F8RC:$RA, immFPZ), bb:$DISP), + (COND_BRANCH_F (immBRCond 20), F8RC:$RA, bb:$DISP)>; +def : Pat<(brcond (setne F8RC:$RA, immFPZ), bb:$DISP), + (COND_BRANCH_F (immBRCond 21), F8RC:$RA, bb:$DISP)>; +def : Pat<(brcond (setge F8RC:$RA, immFPZ), bb:$DISP), + (COND_BRANCH_F (immBRCond 22), F8RC:$RA, bb:$DISP)>; +def : Pat<(brcond (setgt F8RC:$RA, immFPZ), bb:$DISP), + (COND_BRANCH_F (immBRCond 23), F8RC:$RA, bb:$DISP)>; +def : Pat<(brcond (setle F8RC:$RA, immFPZ), bb:$DISP), + (COND_BRANCH_F (immBRCond 24), F8RC:$RA, bb:$DISP)>; +def : Pat<(brcond (setlt F8RC:$RA, immFPZ), bb:$DISP), + (COND_BRANCH_F (immBRCond 25), F8RC:$RA, bb:$DISP)>; + + +def : Pat<(brcond (seteq F8RC:$RA, F8RC:$RB), bb:$DISP), + (COND_BRANCH_F (immBRCond 21), (CMPTEQ F8RC:$RA, F8RC:$RB), bb:$DISP)>; +def : Pat<(brcond (setoeq F8RC:$RA, F8RC:$RB), bb:$DISP), + (COND_BRANCH_F (immBRCond 21), (CMPTEQ F8RC:$RA, F8RC:$RB), bb:$DISP)>; +def : Pat<(brcond (setueq F8RC:$RA, F8RC:$RB), bb:$DISP), + (COND_BRANCH_F (immBRCond 21), (CMPTEQ F8RC:$RA, F8RC:$RB), bb:$DISP)>; + +def : Pat<(brcond (setlt F8RC:$RA, F8RC:$RB), bb:$DISP), + (COND_BRANCH_F (immBRCond 21), (CMPTLT F8RC:$RA, F8RC:$RB), bb:$DISP)>; +def : Pat<(brcond (setolt F8RC:$RA, F8RC:$RB), bb:$DISP), + (COND_BRANCH_F (immBRCond 21), (CMPTLT F8RC:$RA, F8RC:$RB), bb:$DISP)>; +def : Pat<(brcond (setult F8RC:$RA, F8RC:$RB), bb:$DISP), + (COND_BRANCH_F (immBRCond 21), (CMPTLT F8RC:$RA, F8RC:$RB), bb:$DISP)>; + +def : Pat<(brcond (setle F8RC:$RA, F8RC:$RB), bb:$DISP), + (COND_BRANCH_F (immBRCond 21), (CMPTLE F8RC:$RA, F8RC:$RB), bb:$DISP)>; +def : Pat<(brcond (setole F8RC:$RA, F8RC:$RB), bb:$DISP), + (COND_BRANCH_F (immBRCond 21), (CMPTLE F8RC:$RA, F8RC:$RB), bb:$DISP)>; +def : Pat<(brcond (setule F8RC:$RA, F8RC:$RB), bb:$DISP), + (COND_BRANCH_F (immBRCond 21), (CMPTLE F8RC:$RA, F8RC:$RB), bb:$DISP)>; + +def : Pat<(brcond (setgt F8RC:$RA, F8RC:$RB), bb:$DISP), + (COND_BRANCH_F (immBRCond 21), (CMPTLT F8RC:$RB, F8RC:$RA), bb:$DISP)>; +def : Pat<(brcond (setogt F8RC:$RA, F8RC:$RB), bb:$DISP), + (COND_BRANCH_F (immBRCond 21), (CMPTLT F8RC:$RB, F8RC:$RA), bb:$DISP)>; +def : Pat<(brcond (setugt F8RC:$RA, F8RC:$RB), bb:$DISP), + (COND_BRANCH_F (immBRCond 21), (CMPTLT F8RC:$RB, F8RC:$RA), bb:$DISP)>; + +def : Pat<(brcond (setge F8RC:$RA, F8RC:$RB), bb:$DISP), + (COND_BRANCH_F (immBRCond 21), (CMPTLE F8RC:$RB, F8RC:$RA), bb:$DISP)>; +def : Pat<(brcond (setoge F8RC:$RA, F8RC:$RB), bb:$DISP), + (COND_BRANCH_F (immBRCond 21), (CMPTLE F8RC:$RB, F8RC:$RA), bb:$DISP)>; +def : Pat<(brcond (setuge F8RC:$RA, F8RC:$RB), bb:$DISP), + (COND_BRANCH_F (immBRCond 21), (CMPTLE F8RC:$RB, F8RC:$RA), bb:$DISP)>; + +def : Pat<(brcond (setne F8RC:$RA, F8RC:$RB), bb:$DISP), + (COND_BRANCH_F (immBRCond 20), (CMPTEQ F8RC:$RA, F8RC:$RB), bb:$DISP)>; +def : Pat<(brcond (setone F8RC:$RA, F8RC:$RB), bb:$DISP), + (COND_BRANCH_F (immBRCond 20), (CMPTEQ F8RC:$RA, F8RC:$RB), bb:$DISP)>; +def : Pat<(brcond (setune F8RC:$RA, F8RC:$RB), bb:$DISP), + (COND_BRANCH_F (immBRCond 20), (CMPTEQ F8RC:$RA, F8RC:$RB), bb:$DISP)>; + + +def : Pat<(brcond (setoeq F8RC:$RA, immFPZ), bb:$DISP), + (COND_BRANCH_F (immBRCond 20), F8RC:$RA,bb:$DISP)>; +def : Pat<(brcond (setueq F8RC:$RA, immFPZ), bb:$DISP), + (COND_BRANCH_F (immBRCond 20), F8RC:$RA,bb:$DISP)>; + +def : Pat<(brcond (setoge F8RC:$RA, immFPZ), bb:$DISP), + (COND_BRANCH_F (immBRCond 22), F8RC:$RA,bb:$DISP)>; +def : Pat<(brcond (setuge F8RC:$RA, immFPZ), bb:$DISP), + (COND_BRANCH_F (immBRCond 22), F8RC:$RA,bb:$DISP)>; + +def : Pat<(brcond (setogt F8RC:$RA, immFPZ), bb:$DISP), + (COND_BRANCH_F (immBRCond 23), F8RC:$RA,bb:$DISP)>; +def : Pat<(brcond (setugt F8RC:$RA, immFPZ), bb:$DISP), + (COND_BRANCH_F (immBRCond 23), F8RC:$RA,bb:$DISP)>; + +def : Pat<(brcond (setole F8RC:$RA, immFPZ), bb:$DISP), + (COND_BRANCH_F (immBRCond 24), F8RC:$RA,bb:$DISP)>; +def : Pat<(brcond (setule F8RC:$RA, immFPZ), bb:$DISP), + (COND_BRANCH_F (immBRCond 24), F8RC:$RA,bb:$DISP)>; + +def : Pat<(brcond (setolt F8RC:$RA, immFPZ), bb:$DISP), + (COND_BRANCH_F (immBRCond 25), F8RC:$RA,bb:$DISP)>; +def : Pat<(brcond (setult F8RC:$RA, immFPZ), bb:$DISP), + (COND_BRANCH_F (immBRCond 25), F8RC:$RA,bb:$DISP)>; + +def : Pat<(brcond (setone F8RC:$RA, immFPZ), bb:$DISP), + (COND_BRANCH_F (immBRCond 21), F8RC:$RA,bb:$DISP)>; +def : Pat<(brcond (setune F8RC:$RA, immFPZ), bb:$DISP), + (COND_BRANCH_F (immBRCond 21), F8RC:$RA,bb:$DISP)>; + +//End Branches + +//S_floating : IEEE Single +//T_floating : IEEE Double + +//Unused instructions +//Mnemonic Format Opcode Description +//CALL_PAL Pcd 00 Trap to PALcode +//ECB Mfc 18.E800 Evict cache block +//EXCB Mfc 18.0400 Exception barrier +//FETCH Mfc 18.8000 Prefetch data +//FETCH_M Mfc 18.A000 Prefetch data, modify intent +//LDQ_U Mem 0B Load unaligned quadword +//MB Mfc 18.4000 Memory barrier +//STQ_U Mem 0F Store unaligned quadword +//TRAPB Mfc 18.0000 Trap barrier +//WH64 Mfc 18.F800 Write hint  64 bytes +//WMB Mfc 18.4400 Write memory barrier +//MF_FPCR F-P 17.025 Move from FPCR +//MT_FPCR F-P 17.024 Move to FPCR +//There are in the Multimedia extentions, so let's not use them yet +//def MAXSB8 : OForm<0x1C, 0x3E, "MAXSB8 $RA,$RB,$RC">; //Vector signed byte maximum +//def MAXSW4 : OForm< 0x1C, 0x3F, "MAXSW4 $RA,$RB,$RC">; //Vector signed word maximum +//def MAXUB8 : OForm<0x1C, 0x3C, "MAXUB8 $RA,$RB,$RC">; //Vector unsigned byte maximum +//def MAXUW4 : OForm< 0x1C, 0x3D, "MAXUW4 $RA,$RB,$RC">; //Vector unsigned word maximum +//def MINSB8 : OForm< 0x1C, 0x38, "MINSB8 $RA,$RB,$RC">; //Vector signed byte minimum +//def MINSW4 : OForm< 0x1C, 0x39, "MINSW4 $RA,$RB,$RC">; //Vector signed word minimum +//def MINUB8 : OForm< 0x1C, 0x3A, "MINUB8 $RA,$RB,$RC">; //Vector unsigned byte minimum +//def MINUW4 : OForm< 0x1C, 0x3B, "MINUW4 $RA,$RB,$RC">; //Vector unsigned word minimum +//def PERR : OForm< 0x1C, 0x31, "PERR $RA,$RB,$RC">; //Pixel error +//def PKLB : OForm< 0x1C, 0x37, "PKLB $RA,$RB,$RC">; //Pack longwords to bytes +//def PKWB : OForm<0x1C, 0x36, "PKWB $RA,$RB,$RC">; //Pack words to bytes +//def UNPKBL : OForm< 0x1C, 0x35, "UNPKBL $RA,$RB,$RC">; //Unpack bytes to longwords +//def UNPKBW : OForm< 0x1C, 0x34, "UNPKBW $RA,$RB,$RC">; //Unpack bytes to words +//CVTLQ F-P 17.010 Convert longword to quadword +//CVTQL F-P 17.030 Convert quadword to longword + + +//Constant handling + +def immConst2Part : PatLeaf<(imm), [{ + //true if imm fits in a LDAH LDA pair + int64_t val = (int64_t)N->getZExtValue(); + return (val <= IMM_FULLHIGH && val >= IMM_FULLLOW); +}]>; +def immConst2PartInt : PatLeaf<(imm), [{ + //true if imm fits in a LDAH LDA pair with zeroext + uint64_t uval = N->getZExtValue(); + int32_t val32 = (int32_t)uval; + return ((uval >> 32) == 0 && //empty upper bits + val32 <= IMM_FULLHIGH); +// val32 >= IMM_FULLLOW + IMM_LOW * IMM_MULT); //Always True +}], SExt32>; + +def : Pat<(i64 immConst2Part:$imm), + (LDA (LL16 immConst2Part:$imm), (LDAH (LH16 immConst2Part:$imm), R31))>; + +def : Pat<(i64 immSExt16:$imm), + (LDA immSExt16:$imm, R31)>; + +def : Pat<(i64 immSExt16int:$imm), + (ZAPNOTi (LDA (SExt16 immSExt16int:$imm), R31), 15)>; +def : Pat<(i64 immConst2PartInt:$imm), + (ZAPNOTi (LDA (LL16 (SExt32 immConst2PartInt:$imm)), + (LDAH (LH16 (SExt32 immConst2PartInt:$imm)), R31)), 15)>; + + +//TODO: I want to just define these like this! +//def : Pat<(i64 0), +// (R31)>; +//def : Pat<(f64 0.0), +// (F31)>; +//def : Pat<(f64 -0.0), +// (CPYSNT F31, F31)>; +//def : Pat<(f32 0.0), +// (F31)>; +//def : Pat<(f32 -0.0), +// (CPYSNS F31, F31)>; + +//Misc Patterns: + +def : Pat<(sext_inreg GPRC:$RB, i32), + (ADDLi GPRC:$RB, 0)>; + +def : Pat<(fabs F8RC:$RB), + (CPYST F31, F8RC:$RB)>; +def : Pat<(fabs F4RC:$RB), + (CPYSS F31, F4RC:$RB)>; +def : Pat<(fneg F8RC:$RB), + (CPYSNT F8RC:$RB, F8RC:$RB)>; +def : Pat<(fneg F4RC:$RB), + (CPYSNS F4RC:$RB, F4RC:$RB)>; + +def : Pat<(fcopysign F4RC:$A, (fneg F4RC:$B)), + (CPYSNS F4RC:$B, F4RC:$A)>; +def : Pat<(fcopysign F8RC:$A, (fneg F8RC:$B)), + (CPYSNT F8RC:$B, F8RC:$A)>; +def : Pat<(fcopysign F4RC:$A, (fneg F8RC:$B)), + (CPYSNSt F8RC:$B, F4RC:$A)>; +def : Pat<(fcopysign F8RC:$A, (fneg F4RC:$B)), + (CPYSNTs F4RC:$B, F8RC:$A)>; + +//Yes, signed multiply high is ugly +def : Pat<(mulhs GPRC:$RA, GPRC:$RB), + (SUBQr (UMULHr GPRC:$RA, GPRC:$RB), (ADDQr (CMOVGEr GPRC:$RB, R31, GPRC:$RA), + (CMOVGEr GPRC:$RA, R31, GPRC:$RB)))>; + +//Stupid crazy arithmetic stuff: +let AddedComplexity = 1 in { +def : Pat<(mul GPRC:$RA, 5), (S4ADDQr GPRC:$RA, GPRC:$RA)>; +def : Pat<(mul GPRC:$RA, 9), (S8ADDQr GPRC:$RA, GPRC:$RA)>; +def : Pat<(mul GPRC:$RA, 3), (S4SUBQr GPRC:$RA, GPRC:$RA)>; +def : Pat<(mul GPRC:$RA, 7), (S8SUBQr GPRC:$RA, GPRC:$RA)>; + +//slight tree expansion if we are multiplying near to a power of 2 +//n is above a power of 2 +def : Pat<(mul GPRC:$RA, immRem1:$imm), + (ADDQr (SLr GPRC:$RA, (nearP2X immRem1:$imm)), GPRC:$RA)>; +def : Pat<(mul GPRC:$RA, immRem2:$imm), + (ADDQr (SLr GPRC:$RA, (nearP2X immRem2:$imm)), (ADDQr GPRC:$RA, GPRC:$RA))>; +def : Pat<(mul GPRC:$RA, immRem3:$imm), + (ADDQr (SLr GPRC:$RA, (nearP2X immRem3:$imm)), (S4SUBQr GPRC:$RA, GPRC:$RA))>; +def : Pat<(mul GPRC:$RA, immRem4:$imm), + (S4ADDQr GPRC:$RA, (SLr GPRC:$RA, (nearP2X immRem4:$imm)))>; +def : Pat<(mul GPRC:$RA, immRem5:$imm), + (ADDQr (SLr GPRC:$RA, (nearP2X immRem5:$imm)), (S4ADDQr GPRC:$RA, GPRC:$RA))>; +def : Pat<(mul GPRC:$RA, immRemP2:$imm), + (ADDQr (SLr GPRC:$RA, (nearP2X immRemP2:$imm)), (SLi GPRC:$RA, (nearP2RemX immRemP2:$imm)))>; + +//n is below a power of 2 +//FIXME: figure out why something is truncating the imm to 32bits +// this will fix 2007-11-27-mulneg3 +//def : Pat<(mul GPRC:$RA, immRem1n:$imm), +// (SUBQr (SLr GPRC:$RA, (nearP2X immRem1n:$imm)), GPRC:$RA)>; +//def : Pat<(mul GPRC:$RA, immRem2n:$imm), +// (SUBQr (SLr GPRC:$RA, (nearP2X immRem2n:$imm)), (ADDQr GPRC:$RA, GPRC:$RA))>; +//def : Pat<(mul GPRC:$RA, immRem3n:$imm), +// (SUBQr (SLr GPRC:$RA, (nearP2X immRem3n:$imm)), (S4SUBQr GPRC:$RA, GPRC:$RA))>; +//def : Pat<(mul GPRC:$RA, immRem4n:$imm), +// (SUBQr (SLr GPRC:$RA, (nearP2X immRem4n:$imm)), (SLi GPRC:$RA, 2))>; +//def : Pat<(mul GPRC:$RA, immRem5n:$imm), +// (SUBQr (SLr GPRC:$RA, (nearP2X immRem5n:$imm)), (S4ADDQr GPRC:$RA, GPRC:$RA))>; +//def : Pat<(mul GPRC:$RA, immRemP2n:$imm), +// (SUBQr (SLr GPRC:$RA, (nearP2X immRemP2n:$imm)), (SLi GPRC:$RA, (nearP2RemX immRemP2n:$imm)))>; +} //Added complexity diff --git a/lib/Target/Alpha/AlphaJITInfo.cpp b/lib/Target/Alpha/AlphaJITInfo.cpp new file mode 100644 index 000000000000..3fecb19d73b7 --- /dev/null +++ b/lib/Target/Alpha/AlphaJITInfo.cpp @@ -0,0 +1,307 @@ +//===-- AlphaJITInfo.cpp - Implement the JIT interfaces for the Alpha ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the JIT interfaces for the Alpha target. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "jit" +#include "AlphaJITInfo.h" +#include "AlphaRelocations.h" +#include "llvm/Function.h" +#include "llvm/CodeGen/JITCodeEmitter.h" +#include "llvm/Config/alloca.h" +#include "llvm/Support/Debug.h" +#include +#include +using namespace llvm; + +#define BUILD_OFormatI(Op, RA, LIT, FUN, RC) \ + ((Op << 26) | (RA << 21) | (LIT << 13) | (1 << 12) | (FUN << 5) | (RC)) +#define BUILD_OFormat(Op, RA, RB, FUN, RC) \ + ((Op << 26) | (RA << 21) | (RB << 16) | (FUN << 5) | (RC)) + +#define BUILD_LDA(RD, RS, IMM16) \ + ((0x08 << 26) | ((RD) << 21) | ((RS) << 16) | ((IMM16) & 65535)) +#define BUILD_LDAH(RD, RS, IMM16) \ + ((0x09 << 26) | ((RD) << 21) | ((RS) << 16) | ((IMM16) & 65535)) + +#define BUILD_LDQ(RD, RS, IMM16) \ + ((0x29 << 26) | ((RD) << 21) | ((RS) << 16) | ((IMM16) & 0xFFFF)) + +#define BUILD_JMP(RD, RS, IMM16) \ + ((0x1A << 26) | ((RD) << 21) | ((RS) << 16) | (0x00 << 14) | ((IMM16) & 0x3FFF)) +#define BUILD_JSR(RD, RS, IMM16) \ + ((0x1A << 26) | ((RD) << 21) | ((RS) << 16) | (0x01 << 14) | ((IMM16) & 0x3FFF)) + +#define BUILD_SLLi(RD, RS, IMM8) \ + (BUILD_OFormatI(0x12, RS, IMM8, 0x39, RD)) + +#define BUILD_ORi(RD, RS, IMM8) \ + (BUILD_OFormatI(0x11, RS, IMM8, 0x20, RD)) + +#define BUILD_OR(RD, RS, RT) \ + (BUILD_OFormat(0x11, RS, RT, 0x20, RD)) + + + +static void EmitBranchToAt(void *At, void *To) { + unsigned long Fn = (unsigned long)To; + + unsigned *AtI = (unsigned*)At; + + AtI[0] = BUILD_OR(0, 27, 27); + + DOUT << "Stub targeting " << To << "\n"; + + for (int x = 1; x <= 8; ++x) { + AtI[2*x - 1] = BUILD_SLLi(27,27,8); + unsigned d = (Fn >> (64 - 8 * x)) & 0x00FF; + //DOUT << "outputing " << hex << d << dec << "\n"; + AtI[2*x] = BUILD_ORi(27, 27, d); + } + AtI[17] = BUILD_JMP(31,27,0); //jump, preserving ra, and setting pv + AtI[18] = 0x00FFFFFF; //mark this as a stub +} + +void AlphaJITInfo::replaceMachineCodeForFunction(void *Old, void *New) { + //FIXME + assert(0); +} + +static TargetJITInfo::JITCompilerFn JITCompilerFunction; +//static AlphaJITInfo* AlphaJTI; + +extern "C" { +#ifdef __alpha + + void AlphaCompilationCallbackC(long* oldpv, void* CameFromStub) + { + void* Target = JITCompilerFunction(CameFromStub); + + //rewrite the stub to an unconditional branch + if (((unsigned*)CameFromStub)[18] == 0x00FFFFFF) { + DOUT << "Came from a stub, rewriting\n"; + EmitBranchToAt(CameFromStub, Target); + } else { + DOUT << "confused, didn't come from stub at " << CameFromStub + << " old jump vector " << oldpv + << " new jump vector " << Target << "\n"; + } + + //Change pv to new Target + *oldpv = (long)Target; + } + + void AlphaCompilationCallback(void); + + asm( + ".text\n" + ".globl AlphaComilationCallbackC\n" + ".align 4\n" + ".globl AlphaCompilationCallback\n" + ".ent AlphaCompilationCallback\n" +"AlphaCompilationCallback:\n" + // //get JIT's GOT + "ldgp $29, 0($27)\n" + //Save args, callee saved, and perhaps others? + //args: $16-$21 $f16-$f21 (12) + //callee: $9-$14 $f2-$f9 (14) + //others: fp:$15 ra:$26 pv:$27 (3) + "lda $30, -232($30)\n" + "stq $16, 0($30)\n" + "stq $17, 8($30)\n" + "stq $18, 16($30)\n" + "stq $19, 24($30)\n" + "stq $20, 32($30)\n" + "stq $21, 40($30)\n" + "stt $f16, 48($30)\n" + "stt $f17, 56($30)\n" + "stt $f18, 64($30)\n" + "stt $f19, 72($30)\n" + "stt $f20, 80($30)\n" + "stt $f21, 88($30)\n" + "stq $9, 96($30)\n" + "stq $10, 104($30)\n" + "stq $11, 112($30)\n" + "stq $12, 120($30)\n" + "stq $13, 128($30)\n" + "stq $14, 136($30)\n" + "stt $f2, 144($30)\n" + "stt $f3, 152($30)\n" + "stt $f4, 160($30)\n" + "stt $f5, 168($30)\n" + "stt $f6, 176($30)\n" + "stt $f7, 184($30)\n" + "stt $f8, 192($30)\n" + "stt $f9, 200($30)\n" + "stq $15, 208($30)\n" + "stq $26, 216($30)\n" + "stq $27, 224($30)\n" + + "addq $30, 224, $16\n" //pass the addr of saved pv as the first arg + "bis $0, $0, $17\n" //pass the roughly stub addr in second arg + "jsr $26, AlphaCompilationCallbackC\n" //call without saving ra + + "ldq $16, 0($30)\n" + "ldq $17, 8($30)\n" + "ldq $18, 16($30)\n" + "ldq $19, 24($30)\n" + "ldq $20, 32($30)\n" + "ldq $21, 40($30)\n" + "ldt $f16, 48($30)\n" + "ldt $f17, 56($30)\n" + "ldt $f18, 64($30)\n" + "ldt $f19, 72($30)\n" + "ldt $f20, 80($30)\n" + "ldt $f21, 88($30)\n" + "ldq $9, 96($30)\n" + "ldq $10, 104($30)\n" + "ldq $11, 112($30)\n" + "ldq $12, 120($30)\n" + "ldq $13, 128($30)\n" + "ldq $14, 136($30)\n" + "ldt $f2, 144($30)\n" + "ldt $f3, 152($30)\n" + "ldt $f4, 160($30)\n" + "ldt $f5, 168($30)\n" + "ldt $f6, 176($30)\n" + "ldt $f7, 184($30)\n" + "ldt $f8, 192($30)\n" + "ldt $f9, 200($30)\n" + "ldq $15, 208($30)\n" + "ldq $26, 216($30)\n" + "ldq $27, 224($30)\n" //this was updated in the callback with the target + + "lda $30, 232($30)\n" //restore sp + "jmp $31, ($27)\n" //jump to the new function + ".end AlphaCompilationCallback\n" + ); +#else + void AlphaCompilationCallback() { + cerr << "Cannot call AlphaCompilationCallback() on a non-Alpha arch!\n"; + abort(); + } +#endif +} + +void *AlphaJITInfo::emitFunctionStub(const Function* F, void *Fn, + JITCodeEmitter &JCE) { + //assert(Fn == AlphaCompilationCallback && "Where are you going?\n"); + //Do things in a stupid slow way! + JCE.startGVStub(F, 19*4); + void* Addr = (void*)(intptr_t)JCE.getCurrentPCValue(); + for (int x = 0; x < 19; ++ x) + JCE.emitWordLE(0); + EmitBranchToAt(Addr, Fn); + DOUT << "Emitting Stub to " << Fn << " at [" << Addr << "]\n"; + return JCE.finishGVStub(F); +} + +TargetJITInfo::LazyResolverFn +AlphaJITInfo::getLazyResolverFunction(JITCompilerFn F) { + JITCompilerFunction = F; + // setZerothGOTEntry((void*)AlphaCompilationCallback); + return AlphaCompilationCallback; +} + +//These describe LDAx +static const int IMM_LOW = -32768; +static const int IMM_HIGH = 32767; +static const int IMM_MULT = 65536; + +static long getUpper16(long l) +{ + long y = l / IMM_MULT; + if (l % IMM_MULT > IMM_HIGH) + ++y; + if (l % IMM_MULT < IMM_LOW) + --y; + assert((short)y == y && "displacement out of range"); + return y; +} + +static long getLower16(long l) +{ + long h = getUpper16(l); + long y = l - h * IMM_MULT; + assert(y == (short)y && "Displacement out of range"); + return y; +} + +void AlphaJITInfo::relocate(void *Function, MachineRelocation *MR, + unsigned NumRelocs, unsigned char* GOTBase) { + //because gpdist are paired and relative to the pc of the first inst, + //we need to have some state + + static std::map, void*> gpdistmap; + + for (unsigned i = 0; i != NumRelocs; ++i, ++MR) { + unsigned *RelocPos = (unsigned*)Function + MR->getMachineCodeOffset()/4; + long idx = 0; + bool doCommon = true; + switch ((Alpha::RelocationType)MR->getRelocationType()) { + default: assert(0 && "Unknown relocation type!"); + case Alpha::reloc_literal: + //This is a LDQl + idx = MR->getGOTIndex(); + DOUT << "Literal relocation to slot " << idx; + idx = (idx - GOToffset) * 8; + DOUT << " offset " << idx << "\n"; + break; + case Alpha::reloc_gprellow: + idx = (unsigned char*)MR->getResultPointer() - &GOTBase[GOToffset * 8]; + idx = getLower16(idx); + DOUT << "gprellow relocation offset " << idx << "\n"; + DOUT << " Pointer is " << (void*)MR->getResultPointer() + << " GOT is " << (void*)&GOTBase[GOToffset * 8] << "\n"; + break; + case Alpha::reloc_gprelhigh: + idx = (unsigned char*)MR->getResultPointer() - &GOTBase[GOToffset * 8]; + idx = getUpper16(idx); + DOUT << "gprelhigh relocation offset " << idx << "\n"; + DOUT << " Pointer is " << (void*)MR->getResultPointer() + << " GOT is " << (void*)&GOTBase[GOToffset * 8] << "\n"; + break; + case Alpha::reloc_gpdist: + switch (*RelocPos >> 26) { + case 0x09: //LDAH + idx = &GOTBase[GOToffset * 8] - (unsigned char*)RelocPos; + idx = getUpper16(idx); + DOUT << "LDAH: " << idx << "\n"; + //add the relocation to the map + gpdistmap[std::make_pair(Function, MR->getConstantVal())] = RelocPos; + break; + case 0x08: //LDA + assert(gpdistmap[std::make_pair(Function, MR->getConstantVal())] && + "LDAg without seeing LDAHg"); + idx = &GOTBase[GOToffset * 8] - + (unsigned char*)gpdistmap[std::make_pair(Function, MR->getConstantVal())]; + idx = getLower16(idx); + DOUT << "LDA: " << idx << "\n"; + break; + default: + assert(0 && "Cannot handle gpdist yet"); + } + break; + case Alpha::reloc_bsr: { + idx = (((unsigned char*)MR->getResultPointer() - + (unsigned char*)RelocPos) >> 2) + 1; //skip first 2 inst of fun + *RelocPos |= (idx & ((1 << 21)-1)); + doCommon = false; + break; + } + } + if (doCommon) { + short x = (short)idx; + assert(x == idx); + *(short*)RelocPos = x; + } + } +} diff --git a/lib/Target/Alpha/AlphaJITInfo.h b/lib/Target/Alpha/AlphaJITInfo.h new file mode 100644 index 000000000000..edff990dbc24 --- /dev/null +++ b/lib/Target/Alpha/AlphaJITInfo.h @@ -0,0 +1,47 @@ +//===- AlphaJITInfo.h - Alpha impl. of the JIT interface ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Alpha implementation of the TargetJITInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef ALPHA_JITINFO_H +#define ALPHA_JITINFO_H + +#include "llvm/Target/TargetJITInfo.h" + +namespace llvm { + class TargetMachine; + + class AlphaJITInfo : public TargetJITInfo { + protected: + TargetMachine &TM; + public: + explicit AlphaJITInfo(TargetMachine &tm) : TM(tm) + { useGOT = true; } + + virtual void *emitFunctionStub(const Function* F, void *Fn, + JITCodeEmitter &JCE); + virtual LazyResolverFn getLazyResolverFunction(JITCompilerFn); + virtual void relocate(void *Function, MachineRelocation *MR, + unsigned NumRelocs, unsigned char* GOTBase); + + /// replaceMachineCodeForFunction - Make it so that calling the function + /// whose machine code is at OLD turns into a call to NEW, perhaps by + /// overwriting OLD with a branch to NEW. This is used for self-modifying + /// code. + /// + virtual void replaceMachineCodeForFunction(void *Old, void *New); + private: + static const unsigned GOToffset = 4096; + + }; +} + +#endif diff --git a/lib/Target/Alpha/AlphaLLRP.cpp b/lib/Target/Alpha/AlphaLLRP.cpp new file mode 100644 index 000000000000..0c51bc554be9 --- /dev/null +++ b/lib/Target/Alpha/AlphaLLRP.cpp @@ -0,0 +1,158 @@ +//===-- AlphaLLRP.cpp - Alpha Load Load Replay Trap elimination pass. -- --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Here we check for potential replay traps introduced by the spiller +// We also align some branch targets if we can do so for free. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "alpha-nops" +#include "Alpha.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/ADT/SetOperations.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/CommandLine.h" +using namespace llvm; + +STATISTIC(nopintro, "Number of nops inserted"); +STATISTIC(nopalign, "Number of nops inserted for alignment"); + +namespace { + cl::opt + AlignAll("alpha-align-all", cl::Hidden, + cl::desc("Align all blocks")); + + struct AlphaLLRPPass : public MachineFunctionPass { + /// Target machine description which we query for reg. names, data + /// layout, etc. + /// + AlphaTargetMachine &TM; + + static char ID; + AlphaLLRPPass(AlphaTargetMachine &tm) + : MachineFunctionPass(&ID), TM(tm) { } + + virtual const char *getPassName() const { + return "Alpha NOP inserter"; + } + + bool runOnMachineFunction(MachineFunction &F) { + const TargetInstrInfo *TII = F.getTarget().getInstrInfo(); + bool Changed = false; + MachineInstr* prev[3] = {0,0,0}; + DebugLoc dl = DebugLoc::getUnknownLoc(); + unsigned count = 0; + for (MachineFunction::iterator FI = F.begin(), FE = F.end(); + FI != FE; ++FI) { + MachineBasicBlock& MBB = *FI; + bool ub = false; + for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ) { + if (count%4 == 0) + prev[0] = prev[1] = prev[2] = 0; //Slots cleared at fetch boundary + ++count; + MachineInstr *MI = I++; + switch (MI->getOpcode()) { + case Alpha::LDQ: case Alpha::LDL: + case Alpha::LDWU: case Alpha::LDBU: + case Alpha::LDT: case Alpha::LDS: + case Alpha::STQ: case Alpha::STL: + case Alpha::STW: case Alpha::STB: + case Alpha::STT: case Alpha::STS: + if (MI->getOperand(2).getReg() == Alpha::R30) { + if (prev[0] && + prev[0]->getOperand(2).getReg() == MI->getOperand(2).getReg()&& + prev[0]->getOperand(1).getImm() == MI->getOperand(1).getImm()){ + prev[0] = prev[1]; + prev[1] = prev[2]; + prev[2] = 0; + BuildMI(MBB, MI, dl, TII->get(Alpha::BISr), Alpha::R31) + .addReg(Alpha::R31) + .addReg(Alpha::R31); + Changed = true; nopintro += 1; + count += 1; + } else if (prev[1] + && prev[1]->getOperand(2).getReg() == + MI->getOperand(2).getReg() + && prev[1]->getOperand(1).getImm() == + MI->getOperand(1).getImm()) { + prev[0] = prev[2]; + prev[1] = prev[2] = 0; + BuildMI(MBB, MI, dl, TII->get(Alpha::BISr), Alpha::R31) + .addReg(Alpha::R31) + .addReg(Alpha::R31); + BuildMI(MBB, MI, dl, TII->get(Alpha::BISr), Alpha::R31) + .addReg(Alpha::R31) + .addReg(Alpha::R31); + Changed = true; nopintro += 2; + count += 2; + } else if (prev[2] + && prev[2]->getOperand(2).getReg() == + MI->getOperand(2).getReg() + && prev[2]->getOperand(1).getImm() == + MI->getOperand(1).getImm()) { + prev[0] = prev[1] = prev[2] = 0; + BuildMI(MBB, MI, dl, TII->get(Alpha::BISr), Alpha::R31) + .addReg(Alpha::R31).addReg(Alpha::R31); + BuildMI(MBB, MI, dl, TII->get(Alpha::BISr), Alpha::R31) + .addReg(Alpha::R31).addReg(Alpha::R31); + BuildMI(MBB, MI, dl, TII->get(Alpha::BISr), Alpha::R31) + .addReg(Alpha::R31).addReg(Alpha::R31); + Changed = true; nopintro += 3; + count += 3; + } + prev[0] = prev[1]; + prev[1] = prev[2]; + prev[2] = MI; + break; + } + prev[0] = prev[1]; + prev[1] = prev[2]; + prev[2] = 0; + break; + case Alpha::ALTENT: + case Alpha::MEMLABEL: + case Alpha::PCLABEL: + --count; + break; + case Alpha::BR: + case Alpha::JMP: + ub = true; + //fall through + default: + prev[0] = prev[1]; + prev[1] = prev[2]; + prev[2] = 0; + break; + } + } + if (ub || AlignAll) { + //we can align stuff for free at this point + while (count % 4) { + BuildMI(MBB, MBB.end(), dl, TII->get(Alpha::BISr), Alpha::R31) + .addReg(Alpha::R31).addReg(Alpha::R31); + ++count; + ++nopalign; + prev[0] = prev[1]; + prev[1] = prev[2]; + prev[2] = 0; + } + } + } + return Changed; + } + }; + char AlphaLLRPPass::ID = 0; +} // end of anonymous namespace + +FunctionPass *llvm::createAlphaLLRPPass(AlphaTargetMachine &tm) { + return new AlphaLLRPPass(tm); +} diff --git a/lib/Target/Alpha/AlphaRegisterInfo.cpp b/lib/Target/Alpha/AlphaRegisterInfo.cpp new file mode 100644 index 000000000000..feee6e467f47 --- /dev/null +++ b/lib/Target/Alpha/AlphaRegisterInfo.cpp @@ -0,0 +1,335 @@ +//===- AlphaRegisterInfo.cpp - Alpha Register Information -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Alpha implementation of the TargetRegisterInfo class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "reginfo" +#include "Alpha.h" +#include "AlphaRegisterInfo.h" +#include "llvm/Constants.h" +#include "llvm/Type.h" +#include "llvm/Function.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineLocation.h" +#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/STLExtras.h" +#include +using namespace llvm; + +//These describe LDAx +static const int IMM_LOW = -32768; +static const int IMM_HIGH = 32767; +static const int IMM_MULT = 65536; + +static long getUpper16(long l) +{ + long y = l / IMM_MULT; + if (l % IMM_MULT > IMM_HIGH) + ++y; + return y; +} + +static long getLower16(long l) +{ + long h = getUpper16(l); + return l - h * IMM_MULT; +} + +AlphaRegisterInfo::AlphaRegisterInfo(const TargetInstrInfo &tii) + : AlphaGenRegisterInfo(Alpha::ADJUSTSTACKDOWN, Alpha::ADJUSTSTACKUP), + TII(tii) +{ +} + +const unsigned* AlphaRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) + const { + static const unsigned CalleeSavedRegs[] = { + Alpha::R9, Alpha::R10, + Alpha::R11, Alpha::R12, + Alpha::R13, Alpha::R14, + Alpha::F2, Alpha::F3, + Alpha::F4, Alpha::F5, + Alpha::F6, Alpha::F7, + Alpha::F8, Alpha::F9, 0 + }; + return CalleeSavedRegs; +} + +const TargetRegisterClass* const* +AlphaRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { + static const TargetRegisterClass * const CalleeSavedRegClasses[] = { + &Alpha::GPRCRegClass, &Alpha::GPRCRegClass, + &Alpha::GPRCRegClass, &Alpha::GPRCRegClass, + &Alpha::GPRCRegClass, &Alpha::GPRCRegClass, + &Alpha::F8RCRegClass, &Alpha::F8RCRegClass, + &Alpha::F8RCRegClass, &Alpha::F8RCRegClass, + &Alpha::F8RCRegClass, &Alpha::F8RCRegClass, + &Alpha::F8RCRegClass, &Alpha::F8RCRegClass, 0 + }; + return CalleeSavedRegClasses; +} + +BitVector AlphaRegisterInfo::getReservedRegs(const MachineFunction &MF) const { + BitVector Reserved(getNumRegs()); + Reserved.set(Alpha::R15); + Reserved.set(Alpha::R30); + Reserved.set(Alpha::R31); + return Reserved; +} + +//===----------------------------------------------------------------------===// +// Stack Frame Processing methods +//===----------------------------------------------------------------------===// + +// hasFP - Return true if the specified function should have a dedicated frame +// pointer register. This is true if the function has variable sized allocas or +// if frame pointer elimination is disabled. +// +bool AlphaRegisterInfo::hasFP(const MachineFunction &MF) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + return MFI->hasVarSizedObjects(); +} + +void AlphaRegisterInfo:: +eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + if (hasFP(MF)) { + // If we have a frame pointer, turn the adjcallstackup instruction into a + // 'sub ESP, ' and the adjcallstackdown instruction into 'add ESP, + // ' + MachineInstr *Old = I; + uint64_t Amount = Old->getOperand(0).getImm(); + if (Amount != 0) { + // We need to keep the stack aligned properly. To do this, we round the + // amount of space needed for the outgoing arguments up to the next + // alignment boundary. + unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment(); + Amount = (Amount+Align-1)/Align*Align; + + MachineInstr *New; + if (Old->getOpcode() == Alpha::ADJUSTSTACKDOWN) { + New=BuildMI(MF, Old->getDebugLoc(), TII.get(Alpha::LDA), Alpha::R30) + .addImm(-Amount).addReg(Alpha::R30); + } else { + assert(Old->getOpcode() == Alpha::ADJUSTSTACKUP); + New=BuildMI(MF, Old->getDebugLoc(), TII.get(Alpha::LDA), Alpha::R30) + .addImm(Amount).addReg(Alpha::R30); + } + + // Replace the pseudo instruction with a new instruction... + MBB.insert(I, New); + } + } + + MBB.erase(I); +} + +//Alpha has a slightly funny stack: +//Args +//<- incoming SP +//fixed locals (and spills, callee saved, etc) +//<- FP +//variable locals +//<- SP + +void AlphaRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, RegScavenger *RS) const { + assert(SPAdj == 0 && "Unexpected"); + + unsigned i = 0; + MachineInstr &MI = *II; + MachineBasicBlock &MBB = *MI.getParent(); + MachineFunction &MF = *MBB.getParent(); + bool FP = hasFP(MF); + + while (!MI.getOperand(i).isFI()) { + ++i; + assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!"); + } + + int FrameIndex = MI.getOperand(i).getIndex(); + + // Add the base register of R30 (SP) or R15 (FP). + MI.getOperand(i + 1).ChangeToRegister(FP ? Alpha::R15 : Alpha::R30, false); + + // Now add the frame object offset to the offset from the virtual frame index. + int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex); + + DOUT << "FI: " << FrameIndex << " Offset: " << Offset << "\n"; + + Offset += MF.getFrameInfo()->getStackSize(); + + DOUT << "Corrected Offset " << Offset + << " for stack size: " << MF.getFrameInfo()->getStackSize() << "\n"; + + if (Offset > IMM_HIGH || Offset < IMM_LOW) { + DOUT << "Unconditionally using R28 for evil purposes Offset: " + << Offset << "\n"; + //so in this case, we need to use a temporary register, and move the + //original inst off the SP/FP + //fix up the old: + MI.getOperand(i + 1).ChangeToRegister(Alpha::R28, false); + MI.getOperand(i).ChangeToImmediate(getLower16(Offset)); + //insert the new + MachineInstr* nMI=BuildMI(MF, MI.getDebugLoc(), + TII.get(Alpha::LDAH), Alpha::R28) + .addImm(getUpper16(Offset)).addReg(FP ? Alpha::R15 : Alpha::R30); + MBB.insert(II, nMI); + } else { + MI.getOperand(i).ChangeToImmediate(Offset); + } +} + + +void AlphaRegisterInfo::emitPrologue(MachineFunction &MF) const { + MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB + MachineBasicBlock::iterator MBBI = MBB.begin(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + DebugLoc dl = (MBBI != MBB.end() ? + MBBI->getDebugLoc() : DebugLoc::getUnknownLoc()); + bool FP = hasFP(MF); + + static int curgpdist = 0; + + //handle GOP offset + BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDAHg), Alpha::R29) + .addGlobalAddress(const_cast(MF.getFunction())) + .addReg(Alpha::R27).addImm(++curgpdist); + BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDAg), Alpha::R29) + .addGlobalAddress(const_cast(MF.getFunction())) + .addReg(Alpha::R29).addImm(curgpdist); + + //evil const_cast until MO stuff setup to handle const + BuildMI(MBB, MBBI, dl, TII.get(Alpha::ALTENT)) + .addGlobalAddress(const_cast(MF.getFunction())); + + // Get the number of bytes to allocate from the FrameInfo + long NumBytes = MFI->getStackSize(); + + if (FP) + NumBytes += 8; //reserve space for the old FP + + // Do we need to allocate space on the stack? + if (NumBytes == 0) return; + + unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment(); + NumBytes = (NumBytes+Align-1)/Align*Align; + + // Update frame info to pretend that this is part of the stack... + MFI->setStackSize(NumBytes); + + // adjust stack pointer: r30 -= numbytes + NumBytes = -NumBytes; + if (NumBytes >= IMM_LOW) { + BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDA), Alpha::R30).addImm(NumBytes) + .addReg(Alpha::R30); + } else if (getUpper16(NumBytes) >= IMM_LOW) { + BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDAH), Alpha::R30) + .addImm(getUpper16(NumBytes)).addReg(Alpha::R30); + BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDA), Alpha::R30) + .addImm(getLower16(NumBytes)).addReg(Alpha::R30); + } else { + cerr << "Too big a stack frame at " << NumBytes << "\n"; + abort(); + } + + //now if we need to, save the old FP and set the new + if (FP) + { + BuildMI(MBB, MBBI, dl, TII.get(Alpha::STQ)) + .addReg(Alpha::R15).addImm(0).addReg(Alpha::R30); + //this must be the last instr in the prolog + BuildMI(MBB, MBBI, dl, TII.get(Alpha::BISr), Alpha::R15) + .addReg(Alpha::R30).addReg(Alpha::R30); + } + +} + +void AlphaRegisterInfo::emitEpilogue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + MachineBasicBlock::iterator MBBI = prior(MBB.end()); + assert((MBBI->getOpcode() == Alpha::RETDAG || + MBBI->getOpcode() == Alpha::RETDAGp) + && "Can only insert epilog into returning blocks"); + DebugLoc dl = MBBI->getDebugLoc(); + + bool FP = hasFP(MF); + + // Get the number of bytes allocated from the FrameInfo... + long NumBytes = MFI->getStackSize(); + + //now if we need to, restore the old FP + if (FP) { + //copy the FP into the SP (discards allocas) + BuildMI(MBB, MBBI, dl, TII.get(Alpha::BISr), Alpha::R30).addReg(Alpha::R15) + .addReg(Alpha::R15); + //restore the FP + BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDQ), Alpha::R15) + .addImm(0).addReg(Alpha::R15); + } + + if (NumBytes != 0) { + if (NumBytes <= IMM_HIGH) { + BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDA), Alpha::R30).addImm(NumBytes) + .addReg(Alpha::R30); + } else if (getUpper16(NumBytes) <= IMM_HIGH) { + BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDAH), Alpha::R30) + .addImm(getUpper16(NumBytes)).addReg(Alpha::R30); + BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDA), Alpha::R30) + .addImm(getLower16(NumBytes)).addReg(Alpha::R30); + } else { + cerr << "Too big a stack frame at " << NumBytes << "\n"; + abort(); + } + } +} + +unsigned AlphaRegisterInfo::getRARegister() const { + assert(0 && "What is the return address register"); + return 0; +} + +unsigned AlphaRegisterInfo::getFrameRegister(MachineFunction &MF) const { + return hasFP(MF) ? Alpha::R15 : Alpha::R30; +} + +unsigned AlphaRegisterInfo::getEHExceptionRegister() const { + assert(0 && "What is the exception register"); + return 0; +} + +unsigned AlphaRegisterInfo::getEHHandlerRegister() const { + assert(0 && "What is the exception handler register"); + return 0; +} + +int AlphaRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const { + assert(0 && "What is the dwarf register number"); + return -1; +} + +#include "AlphaGenRegisterInfo.inc" + +std::string AlphaRegisterInfo::getPrettyName(unsigned reg) +{ + std::string s(RegisterDescriptors[reg].Name); + return s; +} diff --git a/lib/Target/Alpha/AlphaRegisterInfo.h b/lib/Target/Alpha/AlphaRegisterInfo.h new file mode 100644 index 000000000000..c4f5f7b421dd --- /dev/null +++ b/lib/Target/Alpha/AlphaRegisterInfo.h @@ -0,0 +1,67 @@ +//===- AlphaRegisterInfo.h - Alpha Register Information Impl ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Alpha implementation of the TargetRegisterInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef ALPHAREGISTERINFO_H +#define ALPHAREGISTERINFO_H + +#include "llvm/Target/TargetRegisterInfo.h" +#include "AlphaGenRegisterInfo.h.inc" + +namespace llvm { + +class TargetInstrInfo; +class Type; + +struct AlphaRegisterInfo : public AlphaGenRegisterInfo { + const TargetInstrInfo &TII; + + AlphaRegisterInfo(const TargetInstrInfo &tii); + + /// Code Generation virtual methods... + const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const; + + const TargetRegisterClass* const* getCalleeSavedRegClasses( + const MachineFunction *MF = 0) const; + + BitVector getReservedRegs(const MachineFunction &MF) const; + + bool hasFP(const MachineFunction &MF) const; + + void eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const; + + void eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, RegScavenger *RS = NULL) const; + + //void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; + + void emitPrologue(MachineFunction &MF) const; + void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; + + // Debug information queries. + unsigned getRARegister() const; + unsigned getFrameRegister(MachineFunction &MF) const; + + // Exception handling queries. + unsigned getEHExceptionRegister() const; + unsigned getEHHandlerRegister() const; + + int getDwarfRegNum(unsigned RegNum, bool isEH) const; + + static std::string getPrettyName(unsigned reg); +}; + +} // end namespace llvm + +#endif diff --git a/lib/Target/Alpha/AlphaRegisterInfo.td b/lib/Target/Alpha/AlphaRegisterInfo.td new file mode 100644 index 000000000000..35e6804ea6ac --- /dev/null +++ b/lib/Target/Alpha/AlphaRegisterInfo.td @@ -0,0 +1,171 @@ +//===- AlphaRegisterInfo.td - The Alpha Register File ------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the Alpha register set. +// +//===----------------------------------------------------------------------===// + +class AlphaReg : Register { + field bits<5> Num; + let Namespace = "Alpha"; +} + +// We identify all our registers with a 5-bit ID, for consistency's sake. + +// GPR - One of the 32 32-bit general-purpose registers +class GPR num, string n> : AlphaReg { + let Num = num; +} + +// FPR - One of the 32 64-bit floating-point registers +class FPR num, string n> : AlphaReg { + let Num = num; +} + +//#define FP $15 +//#define RA $26 +//#define PV $27 +//#define GP $29 +//#define SP $30 + +// General-purpose registers +def R0 : GPR< 0, "$0">, DwarfRegNum<[0]>; +def R1 : GPR< 1, "$1">, DwarfRegNum<[1]>; +def R2 : GPR< 2, "$2">, DwarfRegNum<[2]>; +def R3 : GPR< 3, "$3">, DwarfRegNum<[3]>; +def R4 : GPR< 4, "$4">, DwarfRegNum<[4]>; +def R5 : GPR< 5, "$5">, DwarfRegNum<[5]>; +def R6 : GPR< 6, "$6">, DwarfRegNum<[6]>; +def R7 : GPR< 7, "$7">, DwarfRegNum<[7]>; +def R8 : GPR< 8, "$8">, DwarfRegNum<[8]>; +def R9 : GPR< 9, "$9">, DwarfRegNum<[9]>; +def R10 : GPR<10, "$10">, DwarfRegNum<[10]>; +def R11 : GPR<11, "$11">, DwarfRegNum<[11]>; +def R12 : GPR<12, "$12">, DwarfRegNum<[12]>; +def R13 : GPR<13, "$13">, DwarfRegNum<[13]>; +def R14 : GPR<14, "$14">, DwarfRegNum<[14]>; +def R15 : GPR<15, "$15">, DwarfRegNum<[15]>; +def R16 : GPR<16, "$16">, DwarfRegNum<[16]>; +def R17 : GPR<17, "$17">, DwarfRegNum<[17]>; +def R18 : GPR<18, "$18">, DwarfRegNum<[18]>; +def R19 : GPR<19, "$19">, DwarfRegNum<[19]>; +def R20 : GPR<20, "$20">, DwarfRegNum<[20]>; +def R21 : GPR<21, "$21">, DwarfRegNum<[21]>; +def R22 : GPR<22, "$22">, DwarfRegNum<[22]>; +def R23 : GPR<23, "$23">, DwarfRegNum<[23]>; +def R24 : GPR<24, "$24">, DwarfRegNum<[24]>; +def R25 : GPR<25, "$25">, DwarfRegNum<[25]>; +def R26 : GPR<26, "$26">, DwarfRegNum<[26]>; +def R27 : GPR<27, "$27">, DwarfRegNum<[27]>; +def R28 : GPR<28, "$28">, DwarfRegNum<[28]>; +def R29 : GPR<29, "$29">, DwarfRegNum<[29]>; +def R30 : GPR<30, "$30">, DwarfRegNum<[30]>; +def R31 : GPR<31, "$31">, DwarfRegNum<[31]>; + +// Floating-point registers +def F0 : FPR< 0, "$f0">, DwarfRegNum<[33]>; +def F1 : FPR< 1, "$f1">, DwarfRegNum<[34]>; +def F2 : FPR< 2, "$f2">, DwarfRegNum<[35]>; +def F3 : FPR< 3, "$f3">, DwarfRegNum<[36]>; +def F4 : FPR< 4, "$f4">, DwarfRegNum<[37]>; +def F5 : FPR< 5, "$f5">, DwarfRegNum<[38]>; +def F6 : FPR< 6, "$f6">, DwarfRegNum<[39]>; +def F7 : FPR< 7, "$f7">, DwarfRegNum<[40]>; +def F8 : FPR< 8, "$f8">, DwarfRegNum<[41]>; +def F9 : FPR< 9, "$f9">, DwarfRegNum<[42]>; +def F10 : FPR<10, "$f10">, DwarfRegNum<[43]>; +def F11 : FPR<11, "$f11">, DwarfRegNum<[44]>; +def F12 : FPR<12, "$f12">, DwarfRegNum<[45]>; +def F13 : FPR<13, "$f13">, DwarfRegNum<[46]>; +def F14 : FPR<14, "$f14">, DwarfRegNum<[47]>; +def F15 : FPR<15, "$f15">, DwarfRegNum<[48]>; +def F16 : FPR<16, "$f16">, DwarfRegNum<[49]>; +def F17 : FPR<17, "$f17">, DwarfRegNum<[50]>; +def F18 : FPR<18, "$f18">, DwarfRegNum<[51]>; +def F19 : FPR<19, "$f19">, DwarfRegNum<[52]>; +def F20 : FPR<20, "$f20">, DwarfRegNum<[53]>; +def F21 : FPR<21, "$f21">, DwarfRegNum<[54]>; +def F22 : FPR<22, "$f22">, DwarfRegNum<[55]>; +def F23 : FPR<23, "$f23">, DwarfRegNum<[56]>; +def F24 : FPR<24, "$f24">, DwarfRegNum<[57]>; +def F25 : FPR<25, "$f25">, DwarfRegNum<[58]>; +def F26 : FPR<26, "$f26">, DwarfRegNum<[59]>; +def F27 : FPR<27, "$f27">, DwarfRegNum<[60]>; +def F28 : FPR<28, "$f28">, DwarfRegNum<[61]>; +def F29 : FPR<29, "$f29">, DwarfRegNum<[62]>; +def F30 : FPR<30, "$f30">, DwarfRegNum<[63]>; +def F31 : FPR<31, "$f31">, DwarfRegNum<[64]>; + + // //#define FP $15 + // //#define RA $26 + // //#define PV $27 + // //#define GP $29 + // //#define SP $30 + // $28 is undefined after any and all calls + +/// Register classes +def GPRC : RegisterClass<"Alpha", [i64], 64, + // Volatile + [R0, R1, R2, R3, R4, R5, R6, R7, R8, R16, R17, R18, R19, R20, R21, R22, + R23, R24, R25, R28, + //Special meaning, but volatile + R27, //procedure address + R26, //return address + R29, //global offset table address + // Non-volatile + R9, R10, R11, R12, R13, R14, +// Don't allocate 15, 30, 31 + R15, R30, R31 ]> //zero +{ + let MethodProtos = [{ + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + GPRCClass::iterator + GPRCClass::allocation_order_end(const MachineFunction &MF) const { + return end()-3; + } + }]; +} + +def F4RC : RegisterClass<"Alpha", [f32], 64, [F0, F1, + F10, F11, F12, F13, F14, F15, F16, F17, F18, F19, + F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F30, + // Saved: + F2, F3, F4, F5, F6, F7, F8, F9, + F31 ]> //zero +{ + let MethodProtos = [{ + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + F4RCClass::iterator + F4RCClass::allocation_order_end(const MachineFunction &MF) const { + return end()-1; + } + }]; +} + +def F8RC : RegisterClass<"Alpha", [f64], 64, [F0, F1, + F10, F11, F12, F13, F14, F15, F16, F17, F18, F19, + F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F30, + // Saved: + F2, F3, F4, F5, F6, F7, F8, F9, + F31 ]> //zero +{ + let MethodProtos = [{ + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + F8RCClass::iterator + F8RCClass::allocation_order_end(const MachineFunction &MF) const { + return end()-1; + } + }]; +} diff --git a/lib/Target/Alpha/AlphaRelocations.h b/lib/Target/Alpha/AlphaRelocations.h new file mode 100644 index 000000000000..4c92045d4696 --- /dev/null +++ b/lib/Target/Alpha/AlphaRelocations.h @@ -0,0 +1,31 @@ +//===- AlphaRelocations.h - Alpha Code Relocations --------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the Alpha target-specific relocation types. +// +//===----------------------------------------------------------------------===// + +#ifndef ALPHARELOCATIONS_H +#define ALPHARELOCATIONS_H + +#include "llvm/CodeGen/MachineRelocation.h" + +namespace llvm { + namespace Alpha { + enum RelocationType { + reloc_literal, + reloc_gprellow, + reloc_gprelhigh, + reloc_gpdist, + reloc_bsr + }; + } +} + +#endif diff --git a/lib/Target/Alpha/AlphaSchedule.td b/lib/Target/Alpha/AlphaSchedule.td new file mode 100644 index 000000000000..b7b456084709 --- /dev/null +++ b/lib/Target/Alpha/AlphaSchedule.td @@ -0,0 +1,84 @@ +//===- AlphaSchedule.td - Alpha Scheduling Definitions -----*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//This is table 2-2 from the 21264 compiler writers guide +//modified some + +//Pipelines + +def L0 : FuncUnit; +def L1 : FuncUnit; +def FST0 : FuncUnit; +def FST1 : FuncUnit; +def U0 : FuncUnit; +def U1 : FuncUnit; +def FA : FuncUnit; +def FM : FuncUnit; + +def s_ild : InstrItinClass; +def s_fld : InstrItinClass; +def s_ist : InstrItinClass; +def s_fst : InstrItinClass; +def s_lda : InstrItinClass; +def s_rpcc : InstrItinClass; +def s_rx : InstrItinClass; +def s_mxpr : InstrItinClass; +def s_icbr : InstrItinClass; +def s_ubr : InstrItinClass; +def s_jsr : InstrItinClass; +def s_iadd : InstrItinClass; +def s_ilog : InstrItinClass; +def s_ishf : InstrItinClass; +def s_cmov : InstrItinClass; +def s_imul : InstrItinClass; +def s_imisc : InstrItinClass; +def s_fbr : InstrItinClass; +def s_fadd : InstrItinClass; +def s_fmul : InstrItinClass; +def s_fcmov : InstrItinClass; +def s_fdivt : InstrItinClass; +def s_fdivs : InstrItinClass; +def s_fsqrts: InstrItinClass; +def s_fsqrtt: InstrItinClass; +def s_ftoi : InstrItinClass; +def s_itof : InstrItinClass; +def s_pseudo : InstrItinClass; + +//Table 2­4 Instruction Class Latency in Cycles +//modified some + +def Alpha21264Itineraries : ProcessorItineraries<[ + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]> +]>; diff --git a/lib/Target/Alpha/AlphaSubtarget.cpp b/lib/Target/Alpha/AlphaSubtarget.cpp new file mode 100644 index 000000000000..d5a9365d75c1 --- /dev/null +++ b/lib/Target/Alpha/AlphaSubtarget.cpp @@ -0,0 +1,25 @@ +//===- AlphaSubtarget.cpp - Alpha Subtarget Information ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Alpha specific subclass of TargetSubtarget. +// +//===----------------------------------------------------------------------===// + +#include "AlphaSubtarget.h" +#include "Alpha.h" +#include "AlphaGenSubtarget.inc" +using namespace llvm; + +AlphaSubtarget::AlphaSubtarget(const Module &M, const std::string &FS) + : HasCT(false) { + std::string CPU = "generic"; + + // Parse features string. + ParseSubtargetFeatures(FS, CPU); +} diff --git a/lib/Target/Alpha/AlphaSubtarget.h b/lib/Target/Alpha/AlphaSubtarget.h new file mode 100644 index 000000000000..0a944cb0a634 --- /dev/null +++ b/lib/Target/Alpha/AlphaSubtarget.h @@ -0,0 +1,47 @@ +//=====-- AlphaSubtarget.h - Define Subtarget for the Alpha --*- C++ -*--====// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the Alpha specific subclass of TargetSubtarget. +// +//===----------------------------------------------------------------------===// + +#ifndef ALPHASUBTARGET_H +#define ALPHASUBTARGET_H + +#include "llvm/Target/TargetInstrItineraries.h" +#include "llvm/Target/TargetSubtarget.h" + +#include + +namespace llvm { +class Module; + +class AlphaSubtarget : public TargetSubtarget { +protected: + + bool HasCT; + + InstrItineraryData InstrItins; + +public: + /// This constructor initializes the data members to match that + /// of the specified module. + /// + AlphaSubtarget(const Module &M, const std::string &FS); + + /// ParseSubtargetFeatures - Parses features string setting specified + /// subtarget options. Definition of function is auto generated by tblgen. + std::string ParseSubtargetFeatures(const std::string &FS, + const std::string &CPU); + + bool hasCT() const { return HasCT; } +}; +} // End llvm namespace + +#endif diff --git a/lib/Target/Alpha/AlphaTargetAsmInfo.cpp b/lib/Target/Alpha/AlphaTargetAsmInfo.cpp new file mode 100644 index 000000000000..6092ab67b5fc --- /dev/null +++ b/lib/Target/Alpha/AlphaTargetAsmInfo.cpp @@ -0,0 +1,31 @@ +//===-- AlphaTargetAsmInfo.cpp - Alpha asm properties -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declarations of the AlphaTargetAsmInfo properties. +// +//===----------------------------------------------------------------------===// + +#include "AlphaTargetMachine.h" +#include "AlphaTargetAsmInfo.h" + +using namespace llvm; + +AlphaTargetAsmInfo::AlphaTargetAsmInfo(const AlphaTargetMachine &TM) + : TargetAsmInfo(TM) { + AlignmentIsInBytes = false; + PrivateGlobalPrefix = "$"; + JumpTableDirective = ".gprel32"; + JumpTableDataSection = "\t.section .rodata\n"; + WeakRefDirective = "\t.weak\t"; +} + +unsigned AlphaTargetAsmInfo::RelocBehaviour() const { + return (TM.getRelocationModel() != Reloc::Static ? + Reloc::LocalOrGlobal : Reloc::Global); +} diff --git a/lib/Target/Alpha/AlphaTargetAsmInfo.h b/lib/Target/Alpha/AlphaTargetAsmInfo.h new file mode 100644 index 000000000000..7675b26f6e81 --- /dev/null +++ b/lib/Target/Alpha/AlphaTargetAsmInfo.h @@ -0,0 +1,32 @@ +//=====-- AlphaTargetAsmInfo.h - Alpha asm properties ---------*- C++ -*--====// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the AlphaTargetAsmInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef ALPHATARGETASMINFO_H +#define ALPHATARGETASMINFO_H + +#include "llvm/Target/TargetAsmInfo.h" + +namespace llvm { + + // Forward declaration. + class AlphaTargetMachine; + + struct AlphaTargetAsmInfo : public TargetAsmInfo { + explicit AlphaTargetAsmInfo(const AlphaTargetMachine &TM); + + virtual unsigned RelocBehaviour() const; + }; + +} // namespace llvm + +#endif diff --git a/lib/Target/Alpha/AlphaTargetMachine.cpp b/lib/Target/Alpha/AlphaTargetMachine.cpp new file mode 100644 index 000000000000..4c830541f16a --- /dev/null +++ b/lib/Target/Alpha/AlphaTargetMachine.cpp @@ -0,0 +1,126 @@ +//===-- AlphaTargetMachine.cpp - Define TargetMachine for Alpha -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// +//===----------------------------------------------------------------------===// + +#include "Alpha.h" +#include "AlphaJITInfo.h" +#include "AlphaTargetAsmInfo.h" +#include "AlphaTargetMachine.h" +#include "llvm/Module.h" +#include "llvm/PassManager.h" +#include "llvm/Target/TargetMachineRegistry.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +/// AlphaTargetMachineModule - Note that this is used on hosts that cannot link +/// in a library unless there are references into the library. In particular, +/// it seems that it is not possible to get things to work on Win32 without +/// this. Though it is unused, do not remove it. +extern "C" int AlphaTargetMachineModule; +int AlphaTargetMachineModule = 0; + +// Register the targets +static RegisterTarget X("alpha", "Alpha [experimental]"); + +const TargetAsmInfo *AlphaTargetMachine::createTargetAsmInfo() const { + return new AlphaTargetAsmInfo(*this); +} + +unsigned AlphaTargetMachine::getModuleMatchQuality(const Module &M) { + // We strongly match "alpha*". + std::string TT = M.getTargetTriple(); + if (TT.size() >= 5 && TT[0] == 'a' && TT[1] == 'l' && TT[2] == 'p' && + TT[3] == 'h' && TT[4] == 'a') + return 20; + // If the target triple is something non-alpha, we don't match. + if (!TT.empty()) return 0; + + if (M.getEndianness() == Module::LittleEndian && + M.getPointerSize() == Module::Pointer64) + return 10; // Weak match + else if (M.getEndianness() != Module::AnyEndianness || + M.getPointerSize() != Module::AnyPointerSize) + return 0; // Match for some other target + + return getJITMatchQuality()/2; +} + +unsigned AlphaTargetMachine::getJITMatchQuality() { +#ifdef __alpha + return 10; +#else + return 0; +#endif +} + +AlphaTargetMachine::AlphaTargetMachine(const Module &M, const std::string &FS) + : DataLayout("e-f128:128:128"), + FrameInfo(TargetFrameInfo::StackGrowsDown, 16, 0), + JITInfo(*this), + Subtarget(M, FS), + TLInfo(*this) { + setRelocationModel(Reloc::PIC_); +} + + +//===----------------------------------------------------------------------===// +// Pass Pipeline Configuration +//===----------------------------------------------------------------------===// + +bool AlphaTargetMachine::addInstSelector(PassManagerBase &PM, + CodeGenOpt::Level OptLevel) { + PM.add(createAlphaISelDag(*this)); + return false; +} +bool AlphaTargetMachine::addPreEmitPass(PassManagerBase &PM, + CodeGenOpt::Level OptLevel) { + // Must run branch selection immediately preceding the asm printer + PM.add(createAlphaBranchSelectionPass()); + return false; +} +bool AlphaTargetMachine::addAssemblyEmitter(PassManagerBase &PM, + CodeGenOpt::Level OptLevel, + bool Verbose, + raw_ostream &Out) { + PM.add(createAlphaLLRPPass(*this)); + PM.add(createAlphaCodePrinterPass(Out, *this, OptLevel, Verbose)); + return false; +} +bool AlphaTargetMachine::addCodeEmitter(PassManagerBase &PM, + CodeGenOpt::Level OptLevel, + bool DumpAsm, MachineCodeEmitter &MCE) { + PM.add(createAlphaCodeEmitterPass(*this, MCE)); + if (DumpAsm) + PM.add(createAlphaCodePrinterPass(errs(), *this, OptLevel, true)); + return false; +} +bool AlphaTargetMachine::addCodeEmitter(PassManagerBase &PM, + CodeGenOpt::Level OptLevel, + bool DumpAsm, JITCodeEmitter &JCE) { + PM.add(createAlphaJITCodeEmitterPass(*this, JCE)); + if (DumpAsm) + PM.add(createAlphaCodePrinterPass(errs(), *this, OptLevel, true)); + return false; +} +bool AlphaTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM, + CodeGenOpt::Level OptLevel, + bool DumpAsm, + MachineCodeEmitter &MCE) { + return addCodeEmitter(PM, OptLevel, DumpAsm, MCE); +} +bool AlphaTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM, + CodeGenOpt::Level OptLevel, + bool DumpAsm, + JITCodeEmitter &JCE) { + return addCodeEmitter(PM, OptLevel, DumpAsm, JCE); +} + diff --git a/lib/Target/Alpha/AlphaTargetMachine.h b/lib/Target/Alpha/AlphaTargetMachine.h new file mode 100644 index 000000000000..51224e80de70 --- /dev/null +++ b/lib/Target/Alpha/AlphaTargetMachine.h @@ -0,0 +1,82 @@ +//===-- AlphaTargetMachine.h - Define TargetMachine for Alpha ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the Alpha-specific subclass of TargetMachine. +// +//===----------------------------------------------------------------------===// + +#ifndef ALPHA_TARGETMACHINE_H +#define ALPHA_TARGETMACHINE_H + +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetFrameInfo.h" +#include "AlphaInstrInfo.h" +#include "AlphaJITInfo.h" +#include "AlphaISelLowering.h" +#include "AlphaSubtarget.h" + +namespace llvm { + +class GlobalValue; + +class AlphaTargetMachine : public LLVMTargetMachine { + const TargetData DataLayout; // Calculates type size & alignment + AlphaInstrInfo InstrInfo; + TargetFrameInfo FrameInfo; + AlphaJITInfo JITInfo; + AlphaSubtarget Subtarget; + AlphaTargetLowering TLInfo; + +protected: + virtual const TargetAsmInfo *createTargetAsmInfo() const; + +public: + AlphaTargetMachine(const Module &M, const std::string &FS); + + virtual const AlphaInstrInfo *getInstrInfo() const { return &InstrInfo; } + virtual const TargetFrameInfo *getFrameInfo() const { return &FrameInfo; } + virtual const AlphaSubtarget *getSubtargetImpl() const{ return &Subtarget; } + virtual const AlphaRegisterInfo *getRegisterInfo() const { + return &InstrInfo.getRegisterInfo(); + } + virtual AlphaTargetLowering* getTargetLowering() const { + return const_cast(&TLInfo); + } + virtual const TargetData *getTargetData() const { return &DataLayout; } + virtual AlphaJITInfo* getJITInfo() { + return &JITInfo; + } + + static unsigned getJITMatchQuality(); + static unsigned getModuleMatchQuality(const Module &M); + + // Pass Pipeline Configuration + virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel); + virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel); + virtual bool addAssemblyEmitter(PassManagerBase &PM, + CodeGenOpt::Level OptLevel, + bool Verbose, raw_ostream &Out); + virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, + bool DumpAsm, MachineCodeEmitter &MCE); + virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, + bool DumpAsm, JITCodeEmitter &JCE); + virtual bool addSimpleCodeEmitter(PassManagerBase &PM, + CodeGenOpt::Level OptLevel, + bool DumpAsm, + MachineCodeEmitter &MCE); + virtual bool addSimpleCodeEmitter(PassManagerBase &PM, + CodeGenOpt::Level OptLevel, + bool DumpAsm, + JITCodeEmitter &JCE); +}; + +} // end namespace llvm + +#endif diff --git a/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp b/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp new file mode 100644 index 000000000000..74b48ee66235 --- /dev/null +++ b/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp @@ -0,0 +1,305 @@ +//===-- AlphaAsmPrinter.cpp - Alpha LLVM assembly writer ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a printer that converts from our internal representation +// of machine-dependent LLVM code to GAS-format Alpha assembly language. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "asm-printer" +#include "Alpha.h" +#include "AlphaInstrInfo.h" +#include "AlphaTargetMachine.h" +#include "llvm/Module.h" +#include "llvm/Type.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/DwarfWriter.h" +#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Mangler.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/Statistic.h" +using namespace llvm; + +STATISTIC(EmittedInsts, "Number of machine instrs printed"); + +namespace { + struct VISIBILITY_HIDDEN AlphaAsmPrinter : public AsmPrinter { + /// Unique incrementer for label values for referencing Global values. + /// + + explicit AlphaAsmPrinter(raw_ostream &o, TargetMachine &tm, + const TargetAsmInfo *T, CodeGenOpt::Level OL, + bool V) + : AsmPrinter(o, tm, T, OL, V) {} + + virtual const char *getPassName() const { + return "Alpha Assembly Printer"; + } + bool printInstruction(const MachineInstr *MI); + void printOp(const MachineOperand &MO, bool IsCallOp = false); + void printOperand(const MachineInstr *MI, int opNum); + void printBaseOffsetPair (const MachineInstr *MI, int i, bool brackets=true); + void printModuleLevelGV(const GlobalVariable* GVar); + bool runOnMachineFunction(MachineFunction &F); + bool doInitialization(Module &M); + bool doFinalization(Module &M); + + bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode); + bool PrintAsmMemoryOperand(const MachineInstr *MI, + unsigned OpNo, + unsigned AsmVariant, + const char *ExtraCode); + }; +} // end of anonymous namespace + +/// createAlphaCodePrinterPass - Returns a pass that prints the Alpha +/// assembly code for a MachineFunction to the given output stream, +/// using the given target machine description. This should work +/// regardless of whether the function is in SSA form. +/// +FunctionPass *llvm::createAlphaCodePrinterPass(raw_ostream &o, + TargetMachine &tm, + CodeGenOpt::Level OptLevel, + bool verbose) { + return new AlphaAsmPrinter(o, tm, tm.getTargetAsmInfo(), OptLevel, verbose); +} + +#include "AlphaGenAsmWriter.inc" + +void AlphaAsmPrinter::printOperand(const MachineInstr *MI, int opNum) +{ + const MachineOperand &MO = MI->getOperand(opNum); + if (MO.getType() == MachineOperand::MO_Register) { + assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) && + "Not physreg??"); + O << TM.getRegisterInfo()->get(MO.getReg()).AsmName; + } else if (MO.isImm()) { + O << MO.getImm(); + assert(MO.getImm() < (1 << 30)); + } else { + printOp(MO); + } +} + + +void AlphaAsmPrinter::printOp(const MachineOperand &MO, bool IsCallOp) { + const TargetRegisterInfo &RI = *TM.getRegisterInfo(); + + switch (MO.getType()) { + case MachineOperand::MO_Register: + O << RI.get(MO.getReg()).AsmName; + return; + + case MachineOperand::MO_Immediate: + cerr << "printOp() does not handle immediate values\n"; + abort(); + return; + + case MachineOperand::MO_MachineBasicBlock: + printBasicBlockLabel(MO.getMBB()); + return; + + case MachineOperand::MO_ConstantPoolIndex: + O << TAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << "_" + << MO.getIndex(); + return; + + case MachineOperand::MO_ExternalSymbol: + O << MO.getSymbolName(); + return; + + case MachineOperand::MO_GlobalAddress: { + GlobalValue *GV = MO.getGlobal(); + O << Mang->getValueName(GV); + if (GV->isDeclaration() && GV->hasExternalWeakLinkage()) + ExtWeakSymbols.insert(GV); + return; + } + + case MachineOperand::MO_JumpTableIndex: + O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() + << '_' << MO.getIndex(); + return; + + default: + O << ""; + return; + } +} + +/// runOnMachineFunction - This uses the printMachineInstruction() +/// method to print assembly for each instruction. +/// +bool AlphaAsmPrinter::runOnMachineFunction(MachineFunction &MF) { + this->MF = &MF; + + SetupMachineFunction(MF); + O << "\n\n"; + + // Print out constants referenced by the function + EmitConstantPool(MF.getConstantPool()); + + // Print out jump tables referenced by the function + EmitJumpTableInfo(MF.getJumpTableInfo(), MF); + + // Print out labels for the function. + const Function *F = MF.getFunction(); + SwitchToSection(TAI->SectionForGlobal(F)); + + EmitAlignment(4, F); + switch (F->getLinkage()) { + default: assert(0 && "Unknown linkage type!"); + case Function::InternalLinkage: // Symbols default to internal. + case Function::PrivateLinkage: + break; + case Function::ExternalLinkage: + O << "\t.globl " << CurrentFnName << "\n"; + break; + case Function::WeakAnyLinkage: + case Function::WeakODRLinkage: + case Function::LinkOnceAnyLinkage: + case Function::LinkOnceODRLinkage: + O << TAI->getWeakRefDirective() << CurrentFnName << "\n"; + break; + } + + printVisibility(CurrentFnName, F->getVisibility()); + + O << "\t.ent " << CurrentFnName << "\n"; + + O << CurrentFnName << ":\n"; + + // Print out code for the function. + for (MachineFunction::const_iterator I = MF.begin(), E = MF.end(); + I != E; ++I) { + if (I != MF.begin()) { + printBasicBlockLabel(I, true, true); + O << '\n'; + } + for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end(); + II != E; ++II) { + // Print the assembly for the instruction. + ++EmittedInsts; + if (!printInstruction(II)) { + assert(0 && "Unhandled instruction in asm writer!"); + abort(); + } + } + } + + O << "\t.end " << CurrentFnName << "\n"; + + // We didn't modify anything. + return false; +} + +bool AlphaAsmPrinter::doInitialization(Module &M) +{ + if(TM.getSubtarget().hasCT()) + O << "\t.arch ev6\n"; //This might need to be ev67, so leave this test here + else + O << "\t.arch ev6\n"; + O << "\t.set noat\n"; + return AsmPrinter::doInitialization(M); +} + +void AlphaAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) { + const TargetData *TD = TM.getTargetData(); + + if (!GVar->hasInitializer()) return; // External global require no code + + // Check to see if this is a special global used by LLVM, if so, emit it. + if (EmitSpecialLLVMGlobal(GVar)) + return; + + std::string name = Mang->getValueName(GVar); + Constant *C = GVar->getInitializer(); + unsigned Size = TD->getTypeAllocSize(C->getType()); + unsigned Align = TD->getPreferredAlignmentLog(GVar); + + // 0: Switch to section + SwitchToSection(TAI->SectionForGlobal(GVar)); + + // 1: Check visibility + printVisibility(name, GVar->getVisibility()); + + // 2: Kind + switch (GVar->getLinkage()) { + case GlobalValue::LinkOnceAnyLinkage: + case GlobalValue::LinkOnceODRLinkage: + case GlobalValue::WeakAnyLinkage: + case GlobalValue::WeakODRLinkage: + case GlobalValue::CommonLinkage: + O << TAI->getWeakRefDirective() << name << '\n'; + break; + case GlobalValue::AppendingLinkage: + case GlobalValue::ExternalLinkage: + O << TAI->getGlobalDirective() << name << "\n"; + break; + case GlobalValue::InternalLinkage: + case GlobalValue::PrivateLinkage: + break; + default: + assert(0 && "Unknown linkage type!"); + cerr << "Unknown linkage type!\n"; + abort(); + } + + // 3: Type, Size, Align + if (TAI->hasDotTypeDotSizeDirective()) { + O << "\t.type\t" << name << ", @object\n"; + O << "\t.size\t" << name << ", " << Size << "\n"; + } + + EmitAlignment(Align, GVar); + + O << name << ":\n"; + + // If the initializer is a extern weak symbol, remember to emit the weak + // reference! + if (const GlobalValue *GV = dyn_cast(C)) + if (GV->hasExternalWeakLinkage()) + ExtWeakSymbols.insert(GV); + + EmitGlobalConstant(C); + O << '\n'; +} + +bool AlphaAsmPrinter::doFinalization(Module &M) { + for (Module::const_global_iterator I = M.global_begin(), E = M.global_end(); + I != E; ++I) + printModuleLevelGV(I); + + return AsmPrinter::doFinalization(M); +} + +/// PrintAsmOperand - Print out an operand for an inline asm expression. +/// +bool AlphaAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, + const char *ExtraCode) { + printOperand(MI, OpNo); + return false; +} + +bool AlphaAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, + unsigned OpNo, + unsigned AsmVariant, + const char *ExtraCode) { + if (ExtraCode && ExtraCode[0]) + return true; // Unknown modifier. + O << "0("; + printOperand(MI, OpNo); + O << ")"; + return false; +} diff --git a/lib/Target/Alpha/AsmPrinter/CMakeLists.txt b/lib/Target/Alpha/AsmPrinter/CMakeLists.txt new file mode 100644 index 000000000000..b62a7f683568 --- /dev/null +++ b/lib/Target/Alpha/AsmPrinter/CMakeLists.txt @@ -0,0 +1,9 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_partially_linked_object(LLVMAlphaAsmPrinter + AlphaAsmPrinter.cpp + ) + +target_name_of_partially_linked_object(LLVMAlphaCodeGen n) + +add_dependencies(LLVMAlphaAsmPrinter ${n}) diff --git a/lib/Target/Alpha/AsmPrinter/Makefile b/lib/Target/Alpha/AsmPrinter/Makefile new file mode 100644 index 000000000000..c5b3e946695b --- /dev/null +++ b/lib/Target/Alpha/AsmPrinter/Makefile @@ -0,0 +1,15 @@ +##===- lib/Target/Alpha/Makefile ---------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../../.. +LIBRARYNAME = LLVMAlphaAsmPrinter + +# Hack: we need to include 'main' alpha target directory to grab private headers +CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/Alpha/CMakeLists.txt b/lib/Target/Alpha/CMakeLists.txt new file mode 100644 index 000000000000..1e535f7769a3 --- /dev/null +++ b/lib/Target/Alpha/CMakeLists.txt @@ -0,0 +1,25 @@ +set(LLVM_TARGET_DEFINITIONS Alpha.td) + +tablegen(AlphaGenRegisterInfo.h.inc -gen-register-desc-header) +tablegen(AlphaGenRegisterNames.inc -gen-register-enums) +tablegen(AlphaGenRegisterInfo.inc -gen-register-desc) +tablegen(AlphaGenInstrNames.inc -gen-instr-enums) +tablegen(AlphaGenInstrInfo.inc -gen-instr-desc) +tablegen(AlphaGenCodeEmitter.inc -gen-emitter) +tablegen(AlphaGenAsmWriter.inc -gen-asm-writer) +tablegen(AlphaGenDAGISel.inc -gen-dag-isel) +tablegen(AlphaGenSubtarget.inc -gen-subtarget) + +add_llvm_target(AlphaCodeGen + AlphaBranchSelector.cpp + AlphaCodeEmitter.cpp + AlphaInstrInfo.cpp + AlphaISelDAGToDAG.cpp + AlphaISelLowering.cpp + AlphaJITInfo.cpp + AlphaLLRP.cpp + AlphaRegisterInfo.cpp + AlphaSubtarget.cpp + AlphaTargetAsmInfo.cpp + AlphaTargetMachine.cpp + ) diff --git a/lib/Target/Alpha/Makefile b/lib/Target/Alpha/Makefile new file mode 100644 index 000000000000..d6c82c7d7435 --- /dev/null +++ b/lib/Target/Alpha/Makefile @@ -0,0 +1,22 @@ +##===- lib/Target/Alpha/Makefile -------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../.. +LIBRARYNAME = LLVMAlphaCodeGen +TARGET = Alpha + +# Make sure that tblgen is run, first thing. +BUILT_SOURCES = AlphaGenRegisterInfo.h.inc AlphaGenRegisterNames.inc \ + AlphaGenRegisterInfo.inc AlphaGenInstrNames.inc \ + AlphaGenInstrInfo.inc AlphaGenCodeEmitter.inc \ + AlphaGenAsmWriter.inc AlphaGenDAGISel.inc \ + AlphaGenSubtarget.inc + +DIRS = AsmPrinter + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/Alpha/README.txt b/lib/Target/Alpha/README.txt new file mode 100644 index 000000000000..9ae15174c582 --- /dev/null +++ b/lib/Target/Alpha/README.txt @@ -0,0 +1,42 @@ +*** + +add gcc builtins for alpha instructions + + +*** + +custom expand byteswap into nifty +extract/insert/mask byte/word/longword/quadword low/high +sequences + +*** + +see if any of the extract/insert/mask operations can be added + +*** + +match more interesting things for cmovlbc cmovlbs (move if low bit clear/set) + +*** + +lower srem and urem + +remq(i,j): i - (j * divq(i,j)) if j != 0 +remqu(i,j): i - (j * divqu(i,j)) if j != 0 +reml(i,j): i - (j * divl(i,j)) if j != 0 +remlu(i,j): i - (j * divlu(i,j)) if j != 0 + +*** + +add crazy vector instructions (MVI): + +(MIN|MAX)(U|S)(B8|W4) min and max, signed and unsigned, byte and word +PKWB, UNPKBW pack/unpack word to byte +PKLB UNPKBL pack/unpack long to byte +PERR pixel error (sum accross bytes of bytewise abs(i8v8 a - i8v8 b)) + +cmpbytes bytewise cmpeq of i8v8 a and i8v8 b (not part of MVI extentions) + +this has some good examples for other operations that can be synthesised well +from these rather meager vector ops (such as saturating add). +http://www.alphalinux.org/docs/MVI-full.html diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp new file mode 100644 index 000000000000..4d7b54503e89 --- /dev/null +++ b/lib/Target/CBackend/CBackend.cpp @@ -0,0 +1,3601 @@ +//===-- CBackend.cpp - Library for converting LLVM code to C --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This library converts LLVM code to C code, compilable by GCC and other C +// compilers. +// +//===----------------------------------------------------------------------===// + +#include "CTargetMachine.h" +#include "llvm/CallingConv.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Module.h" +#include "llvm/Instructions.h" +#include "llvm/Pass.h" +#include "llvm/PassManager.h" +#include "llvm/TypeSymbolTable.h" +#include "llvm/Intrinsics.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/InlineAsm.h" +#include "llvm/Analysis/ConstantsScanner.h" +#include "llvm/Analysis/FindUsedTypes.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/IntrinsicLowering.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Target/TargetMachineRegistry.h" +#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/GetElementPtrTypeIterator.h" +#include "llvm/Support/InstVisitor.h" +#include "llvm/Support/Mangler.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Config/config.h" +#include +#include +using namespace llvm; + +/// CBackendTargetMachineModule - Note that this is used on hosts that +/// cannot link in a library unless there are references into the +/// library. In particular, it seems that it is not possible to get +/// things to work on Win32 without this. Though it is unused, do not +/// remove it. +extern "C" int CBackendTargetMachineModule; +int CBackendTargetMachineModule = 0; + +// Register the target. +static RegisterTarget X("c", "C backend"); + +namespace { + /// CBackendNameAllUsedStructsAndMergeFunctions - This pass inserts names for + /// any unnamed structure types that are used by the program, and merges + /// external functions with the same name. + /// + class CBackendNameAllUsedStructsAndMergeFunctions : public ModulePass { + public: + static char ID; + CBackendNameAllUsedStructsAndMergeFunctions() + : ModulePass(&ID) {} + void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + } + + virtual const char *getPassName() const { + return "C backend type canonicalizer"; + } + + virtual bool runOnModule(Module &M); + }; + + char CBackendNameAllUsedStructsAndMergeFunctions::ID = 0; + + /// CWriter - This class is the main chunk of code that converts an LLVM + /// module to a C translation unit. + class CWriter : public FunctionPass, public InstVisitor { + raw_ostream &Out; + IntrinsicLowering *IL; + Mangler *Mang; + LoopInfo *LI; + const Module *TheModule; + const TargetAsmInfo* TAsm; + const TargetData* TD; + std::map TypeNames; + std::map FPConstantMap; + std::set intrinsicPrototypesAlreadyGenerated; + std::set ByValParams; + unsigned FPCounter; + + public: + static char ID; + explicit CWriter(raw_ostream &o) + : FunctionPass(&ID), Out(o), IL(0), Mang(0), LI(0), + TheModule(0), TAsm(0), TD(0) { + FPCounter = 0; + } + + virtual const char *getPassName() const { return "C backend"; } + + void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + AU.setPreservesAll(); + } + + virtual bool doInitialization(Module &M); + + bool runOnFunction(Function &F) { + // Do not codegen any 'available_externally' functions at all, they have + // definitions outside the translation unit. + if (F.hasAvailableExternallyLinkage()) + return false; + + LI = &getAnalysis(); + + // Get rid of intrinsics we can't handle. + lowerIntrinsics(F); + + // Output all floating point constants that cannot be printed accurately. + printFloatingPointConstants(F); + + printFunction(F); + return false; + } + + virtual bool doFinalization(Module &M) { + // Free memory... + delete IL; + delete TD; + delete Mang; + FPConstantMap.clear(); + TypeNames.clear(); + ByValParams.clear(); + intrinsicPrototypesAlreadyGenerated.clear(); + return false; + } + + raw_ostream &printType(raw_ostream &Out, const Type *Ty, + bool isSigned = false, + const std::string &VariableName = "", + bool IgnoreName = false, + const AttrListPtr &PAL = AttrListPtr()); + std::ostream &printType(std::ostream &Out, const Type *Ty, + bool isSigned = false, + const std::string &VariableName = "", + bool IgnoreName = false, + const AttrListPtr &PAL = AttrListPtr()); + raw_ostream &printSimpleType(raw_ostream &Out, const Type *Ty, + bool isSigned, + const std::string &NameSoFar = ""); + std::ostream &printSimpleType(std::ostream &Out, const Type *Ty, + bool isSigned, + const std::string &NameSoFar = ""); + + void printStructReturnPointerFunctionType(raw_ostream &Out, + const AttrListPtr &PAL, + const PointerType *Ty); + + /// writeOperandDeref - Print the result of dereferencing the specified + /// operand with '*'. This is equivalent to printing '*' then using + /// writeOperand, but avoids excess syntax in some cases. + void writeOperandDeref(Value *Operand) { + if (isAddressExposed(Operand)) { + // Already something with an address exposed. + writeOperandInternal(Operand); + } else { + Out << "*("; + writeOperand(Operand); + Out << ")"; + } + } + + void writeOperand(Value *Operand, bool Static = false); + void writeInstComputationInline(Instruction &I); + void writeOperandInternal(Value *Operand, bool Static = false); + void writeOperandWithCast(Value* Operand, unsigned Opcode); + void writeOperandWithCast(Value* Operand, const ICmpInst &I); + bool writeInstructionCast(const Instruction &I); + + void writeMemoryAccess(Value *Operand, const Type *OperandType, + bool IsVolatile, unsigned Alignment); + + private : + std::string InterpretASMConstraint(InlineAsm::ConstraintInfo& c); + + void lowerIntrinsics(Function &F); + + void printModule(Module *M); + void printModuleTypes(const TypeSymbolTable &ST); + void printContainedStructs(const Type *Ty, std::set &); + void printFloatingPointConstants(Function &F); + void printFloatingPointConstants(const Constant *C); + void printFunctionSignature(const Function *F, bool Prototype); + + void printFunction(Function &); + void printBasicBlock(BasicBlock *BB); + void printLoop(Loop *L); + + void printCast(unsigned opcode, const Type *SrcTy, const Type *DstTy); + void printConstant(Constant *CPV, bool Static); + void printConstantWithCast(Constant *CPV, unsigned Opcode); + bool printConstExprCast(const ConstantExpr *CE, bool Static); + void printConstantArray(ConstantArray *CPA, bool Static); + void printConstantVector(ConstantVector *CV, bool Static); + + /// isAddressExposed - Return true if the specified value's name needs to + /// have its address taken in order to get a C value of the correct type. + /// This happens for global variables, byval parameters, and direct allocas. + bool isAddressExposed(const Value *V) const { + if (const Argument *A = dyn_cast(V)) + return ByValParams.count(A); + return isa(V) || isDirectAlloca(V); + } + + // isInlinableInst - Attempt to inline instructions into their uses to build + // trees as much as possible. To do this, we have to consistently decide + // what is acceptable to inline, so that variable declarations don't get + // printed and an extra copy of the expr is not emitted. + // + static bool isInlinableInst(const Instruction &I) { + // Always inline cmp instructions, even if they are shared by multiple + // expressions. GCC generates horrible code if we don't. + if (isa(I)) + return true; + + // Must be an expression, must be used exactly once. If it is dead, we + // emit it inline where it would go. + if (I.getType() == Type::VoidTy || !I.hasOneUse() || + isa(I) || isa(I) || isa(I) || + isa(I) || isa(I) || isa(I) || + isa(I)) + // Don't inline a load across a store or other bad things! + return false; + + // Must not be used in inline asm, extractelement, or shufflevector. + if (I.hasOneUse()) { + const Instruction &User = cast(*I.use_back()); + if (isInlineAsm(User) || isa(User) || + isa(User)) + return false; + } + + // Only inline instruction it if it's use is in the same BB as the inst. + return I.getParent() == cast(I.use_back())->getParent(); + } + + // isDirectAlloca - Define fixed sized allocas in the entry block as direct + // variables which are accessed with the & operator. This causes GCC to + // generate significantly better code than to emit alloca calls directly. + // + static const AllocaInst *isDirectAlloca(const Value *V) { + const AllocaInst *AI = dyn_cast(V); + if (!AI) return false; + if (AI->isArrayAllocation()) + return 0; // FIXME: we can also inline fixed size array allocas! + if (AI->getParent() != &AI->getParent()->getParent()->getEntryBlock()) + return 0; + return AI; + } + + // isInlineAsm - Check if the instruction is a call to an inline asm chunk + static bool isInlineAsm(const Instruction& I) { + if (isa(&I) && isa(I.getOperand(0))) + return true; + return false; + } + + // Instruction visitation functions + friend class InstVisitor; + + void visitReturnInst(ReturnInst &I); + void visitBranchInst(BranchInst &I); + void visitSwitchInst(SwitchInst &I); + void visitInvokeInst(InvokeInst &I) { + assert(0 && "Lowerinvoke pass didn't work!"); + } + + void visitUnwindInst(UnwindInst &I) { + assert(0 && "Lowerinvoke pass didn't work!"); + } + void visitUnreachableInst(UnreachableInst &I); + + void visitPHINode(PHINode &I); + void visitBinaryOperator(Instruction &I); + void visitICmpInst(ICmpInst &I); + void visitFCmpInst(FCmpInst &I); + + void visitCastInst (CastInst &I); + void visitSelectInst(SelectInst &I); + void visitCallInst (CallInst &I); + void visitInlineAsm(CallInst &I); + bool visitBuiltinCall(CallInst &I, Intrinsic::ID ID, bool &WroteCallee); + + void visitMallocInst(MallocInst &I); + void visitAllocaInst(AllocaInst &I); + void visitFreeInst (FreeInst &I); + void visitLoadInst (LoadInst &I); + void visitStoreInst (StoreInst &I); + void visitGetElementPtrInst(GetElementPtrInst &I); + void visitVAArgInst (VAArgInst &I); + + void visitInsertElementInst(InsertElementInst &I); + void visitExtractElementInst(ExtractElementInst &I); + void visitShuffleVectorInst(ShuffleVectorInst &SVI); + + void visitInsertValueInst(InsertValueInst &I); + void visitExtractValueInst(ExtractValueInst &I); + + void visitInstruction(Instruction &I) { + cerr << "C Writer does not know about " << I; + abort(); + } + + void outputLValue(Instruction *I) { + Out << " " << GetValueName(I) << " = "; + } + + bool isGotoCodeNecessary(BasicBlock *From, BasicBlock *To); + void printPHICopiesForSuccessor(BasicBlock *CurBlock, + BasicBlock *Successor, unsigned Indent); + void printBranchToBlock(BasicBlock *CurBlock, BasicBlock *SuccBlock, + unsigned Indent); + void printGEPExpression(Value *Ptr, gep_type_iterator I, + gep_type_iterator E, bool Static); + + std::string GetValueName(const Value *Operand); + }; +} + +char CWriter::ID = 0; + +/// This method inserts names for any unnamed structure types that are used by +/// the program, and removes names from structure types that are not used by the +/// program. +/// +bool CBackendNameAllUsedStructsAndMergeFunctions::runOnModule(Module &M) { + // Get a set of types that are used by the program... + std::set UT = getAnalysis().getTypes(); + + // Loop over the module symbol table, removing types from UT that are + // already named, and removing names for types that are not used. + // + TypeSymbolTable &TST = M.getTypeSymbolTable(); + for (TypeSymbolTable::iterator TI = TST.begin(), TE = TST.end(); + TI != TE; ) { + TypeSymbolTable::iterator I = TI++; + + // If this isn't a struct or array type, remove it from our set of types + // to name. This simplifies emission later. + if (!isa(I->second) && !isa(I->second) && + !isa(I->second)) { + TST.remove(I); + } else { + // If this is not used, remove it from the symbol table. + std::set::iterator UTI = UT.find(I->second); + if (UTI == UT.end()) + TST.remove(I); + else + UT.erase(UTI); // Only keep one name for this type. + } + } + + // UT now contains types that are not named. Loop over it, naming + // structure types. + // + bool Changed = false; + unsigned RenameCounter = 0; + for (std::set::const_iterator I = UT.begin(), E = UT.end(); + I != E; ++I) + if (isa(*I) || isa(*I)) { + while (M.addTypeName("unnamed"+utostr(RenameCounter), *I)) + ++RenameCounter; + Changed = true; + } + + + // Loop over all external functions and globals. If we have two with + // identical names, merge them. + // FIXME: This code should disappear when we don't allow values with the same + // names when they have different types! + std::map ExtSymbols; + for (Module::iterator I = M.begin(), E = M.end(); I != E;) { + Function *GV = I++; + if (GV->isDeclaration() && GV->hasName()) { + std::pair::iterator, bool> X + = ExtSymbols.insert(std::make_pair(GV->getName(), GV)); + if (!X.second) { + // Found a conflict, replace this global with the previous one. + GlobalValue *OldGV = X.first->second; + GV->replaceAllUsesWith(ConstantExpr::getBitCast(OldGV, GV->getType())); + GV->eraseFromParent(); + Changed = true; + } + } + } + // Do the same for globals. + for (Module::global_iterator I = M.global_begin(), E = M.global_end(); + I != E;) { + GlobalVariable *GV = I++; + if (GV->isDeclaration() && GV->hasName()) { + std::pair::iterator, bool> X + = ExtSymbols.insert(std::make_pair(GV->getName(), GV)); + if (!X.second) { + // Found a conflict, replace this global with the previous one. + GlobalValue *OldGV = X.first->second; + GV->replaceAllUsesWith(ConstantExpr::getBitCast(OldGV, GV->getType())); + GV->eraseFromParent(); + Changed = true; + } + } + } + + return Changed; +} + +/// printStructReturnPointerFunctionType - This is like printType for a struct +/// return type, except, instead of printing the type as void (*)(Struct*, ...) +/// print it as "Struct (*)(...)", for struct return functions. +void CWriter::printStructReturnPointerFunctionType(raw_ostream &Out, + const AttrListPtr &PAL, + const PointerType *TheTy) { + const FunctionType *FTy = cast(TheTy->getElementType()); + std::stringstream FunctionInnards; + FunctionInnards << " (*) ("; + bool PrintedType = false; + + FunctionType::param_iterator I = FTy->param_begin(), E = FTy->param_end(); + const Type *RetTy = cast(I->get())->getElementType(); + unsigned Idx = 1; + for (++I, ++Idx; I != E; ++I, ++Idx) { + if (PrintedType) + FunctionInnards << ", "; + const Type *ArgTy = *I; + if (PAL.paramHasAttr(Idx, Attribute::ByVal)) { + assert(isa(ArgTy)); + ArgTy = cast(ArgTy)->getElementType(); + } + printType(FunctionInnards, ArgTy, + /*isSigned=*/PAL.paramHasAttr(Idx, Attribute::SExt), ""); + PrintedType = true; + } + if (FTy->isVarArg()) { + if (PrintedType) + FunctionInnards << ", ..."; + } else if (!PrintedType) { + FunctionInnards << "void"; + } + FunctionInnards << ')'; + std::string tstr = FunctionInnards.str(); + printType(Out, RetTy, + /*isSigned=*/PAL.paramHasAttr(0, Attribute::SExt), tstr); +} + +raw_ostream & +CWriter::printSimpleType(raw_ostream &Out, const Type *Ty, bool isSigned, + const std::string &NameSoFar) { + assert((Ty->isPrimitiveType() || Ty->isInteger() || isa(Ty)) && + "Invalid type for printSimpleType"); + switch (Ty->getTypeID()) { + case Type::VoidTyID: return Out << "void " << NameSoFar; + case Type::IntegerTyID: { + unsigned NumBits = cast(Ty)->getBitWidth(); + if (NumBits == 1) + return Out << "bool " << NameSoFar; + else if (NumBits <= 8) + return Out << (isSigned?"signed":"unsigned") << " char " << NameSoFar; + else if (NumBits <= 16) + return Out << (isSigned?"signed":"unsigned") << " short " << NameSoFar; + else if (NumBits <= 32) + return Out << (isSigned?"signed":"unsigned") << " int " << NameSoFar; + else if (NumBits <= 64) + return Out << (isSigned?"signed":"unsigned") << " long long "<< NameSoFar; + else { + assert(NumBits <= 128 && "Bit widths > 128 not implemented yet"); + return Out << (isSigned?"llvmInt128":"llvmUInt128") << " " << NameSoFar; + } + } + case Type::FloatTyID: return Out << "float " << NameSoFar; + case Type::DoubleTyID: return Out << "double " << NameSoFar; + // Lacking emulation of FP80 on PPC, etc., we assume whichever of these is + // present matches host 'long double'. + case Type::X86_FP80TyID: + case Type::PPC_FP128TyID: + case Type::FP128TyID: return Out << "long double " << NameSoFar; + + case Type::VectorTyID: { + const VectorType *VTy = cast(Ty); + return printSimpleType(Out, VTy->getElementType(), isSigned, + " __attribute__((vector_size(" + + utostr(TD->getTypeAllocSize(VTy)) + " ))) " + NameSoFar); + } + + default: + cerr << "Unknown primitive type: " << *Ty << "\n"; + abort(); + } +} + +std::ostream & +CWriter::printSimpleType(std::ostream &Out, const Type *Ty, bool isSigned, + const std::string &NameSoFar) { + assert((Ty->isPrimitiveType() || Ty->isInteger() || isa(Ty)) && + "Invalid type for printSimpleType"); + switch (Ty->getTypeID()) { + case Type::VoidTyID: return Out << "void " << NameSoFar; + case Type::IntegerTyID: { + unsigned NumBits = cast(Ty)->getBitWidth(); + if (NumBits == 1) + return Out << "bool " << NameSoFar; + else if (NumBits <= 8) + return Out << (isSigned?"signed":"unsigned") << " char " << NameSoFar; + else if (NumBits <= 16) + return Out << (isSigned?"signed":"unsigned") << " short " << NameSoFar; + else if (NumBits <= 32) + return Out << (isSigned?"signed":"unsigned") << " int " << NameSoFar; + else if (NumBits <= 64) + return Out << (isSigned?"signed":"unsigned") << " long long "<< NameSoFar; + else { + assert(NumBits <= 128 && "Bit widths > 128 not implemented yet"); + return Out << (isSigned?"llvmInt128":"llvmUInt128") << " " << NameSoFar; + } + } + case Type::FloatTyID: return Out << "float " << NameSoFar; + case Type::DoubleTyID: return Out << "double " << NameSoFar; + // Lacking emulation of FP80 on PPC, etc., we assume whichever of these is + // present matches host 'long double'. + case Type::X86_FP80TyID: + case Type::PPC_FP128TyID: + case Type::FP128TyID: return Out << "long double " << NameSoFar; + + case Type::VectorTyID: { + const VectorType *VTy = cast(Ty); + return printSimpleType(Out, VTy->getElementType(), isSigned, + " __attribute__((vector_size(" + + utostr(TD->getTypeAllocSize(VTy)) + " ))) " + NameSoFar); + } + + default: + cerr << "Unknown primitive type: " << *Ty << "\n"; + abort(); + } +} + +// Pass the Type* and the variable name and this prints out the variable +// declaration. +// +raw_ostream &CWriter::printType(raw_ostream &Out, const Type *Ty, + bool isSigned, const std::string &NameSoFar, + bool IgnoreName, const AttrListPtr &PAL) { + if (Ty->isPrimitiveType() || Ty->isInteger() || isa(Ty)) { + printSimpleType(Out, Ty, isSigned, NameSoFar); + return Out; + } + + // Check to see if the type is named. + if (!IgnoreName || isa(Ty)) { + std::map::iterator I = TypeNames.find(Ty); + if (I != TypeNames.end()) return Out << I->second << ' ' << NameSoFar; + } + + switch (Ty->getTypeID()) { + case Type::FunctionTyID: { + const FunctionType *FTy = cast(Ty); + std::stringstream FunctionInnards; + FunctionInnards << " (" << NameSoFar << ") ("; + unsigned Idx = 1; + for (FunctionType::param_iterator I = FTy->param_begin(), + E = FTy->param_end(); I != E; ++I) { + const Type *ArgTy = *I; + if (PAL.paramHasAttr(Idx, Attribute::ByVal)) { + assert(isa(ArgTy)); + ArgTy = cast(ArgTy)->getElementType(); + } + if (I != FTy->param_begin()) + FunctionInnards << ", "; + printType(FunctionInnards, ArgTy, + /*isSigned=*/PAL.paramHasAttr(Idx, Attribute::SExt), ""); + ++Idx; + } + if (FTy->isVarArg()) { + if (FTy->getNumParams()) + FunctionInnards << ", ..."; + } else if (!FTy->getNumParams()) { + FunctionInnards << "void"; + } + FunctionInnards << ')'; + std::string tstr = FunctionInnards.str(); + printType(Out, FTy->getReturnType(), + /*isSigned=*/PAL.paramHasAttr(0, Attribute::SExt), tstr); + return Out; + } + case Type::StructTyID: { + const StructType *STy = cast(Ty); + Out << NameSoFar + " {\n"; + unsigned Idx = 0; + for (StructType::element_iterator I = STy->element_begin(), + E = STy->element_end(); I != E; ++I) { + Out << " "; + printType(Out, *I, false, "field" + utostr(Idx++)); + Out << ";\n"; + } + Out << '}'; + if (STy->isPacked()) + Out << " __attribute__ ((packed))"; + return Out; + } + + case Type::PointerTyID: { + const PointerType *PTy = cast(Ty); + std::string ptrName = "*" + NameSoFar; + + if (isa(PTy->getElementType()) || + isa(PTy->getElementType())) + ptrName = "(" + ptrName + ")"; + + if (!PAL.isEmpty()) + // Must be a function ptr cast! + return printType(Out, PTy->getElementType(), false, ptrName, true, PAL); + return printType(Out, PTy->getElementType(), false, ptrName); + } + + case Type::ArrayTyID: { + const ArrayType *ATy = cast(Ty); + unsigned NumElements = ATy->getNumElements(); + if (NumElements == 0) NumElements = 1; + // Arrays are wrapped in structs to allow them to have normal + // value semantics (avoiding the array "decay"). + Out << NameSoFar << " { "; + printType(Out, ATy->getElementType(), false, + "array[" + utostr(NumElements) + "]"); + return Out << "; }"; + } + + case Type::OpaqueTyID: { + static int Count = 0; + std::string TyName = "struct opaque_" + itostr(Count++); + assert(TypeNames.find(Ty) == TypeNames.end()); + TypeNames[Ty] = TyName; + return Out << TyName << ' ' << NameSoFar; + } + default: + assert(0 && "Unhandled case in getTypeProps!"); + abort(); + } + + return Out; +} + +// Pass the Type* and the variable name and this prints out the variable +// declaration. +// +std::ostream &CWriter::printType(std::ostream &Out, const Type *Ty, + bool isSigned, const std::string &NameSoFar, + bool IgnoreName, const AttrListPtr &PAL) { + if (Ty->isPrimitiveType() || Ty->isInteger() || isa(Ty)) { + printSimpleType(Out, Ty, isSigned, NameSoFar); + return Out; + } + + // Check to see if the type is named. + if (!IgnoreName || isa(Ty)) { + std::map::iterator I = TypeNames.find(Ty); + if (I != TypeNames.end()) return Out << I->second << ' ' << NameSoFar; + } + + switch (Ty->getTypeID()) { + case Type::FunctionTyID: { + const FunctionType *FTy = cast(Ty); + std::stringstream FunctionInnards; + FunctionInnards << " (" << NameSoFar << ") ("; + unsigned Idx = 1; + for (FunctionType::param_iterator I = FTy->param_begin(), + E = FTy->param_end(); I != E; ++I) { + const Type *ArgTy = *I; + if (PAL.paramHasAttr(Idx, Attribute::ByVal)) { + assert(isa(ArgTy)); + ArgTy = cast(ArgTy)->getElementType(); + } + if (I != FTy->param_begin()) + FunctionInnards << ", "; + printType(FunctionInnards, ArgTy, + /*isSigned=*/PAL.paramHasAttr(Idx, Attribute::SExt), ""); + ++Idx; + } + if (FTy->isVarArg()) { + if (FTy->getNumParams()) + FunctionInnards << ", ..."; + } else if (!FTy->getNumParams()) { + FunctionInnards << "void"; + } + FunctionInnards << ')'; + std::string tstr = FunctionInnards.str(); + printType(Out, FTy->getReturnType(), + /*isSigned=*/PAL.paramHasAttr(0, Attribute::SExt), tstr); + return Out; + } + case Type::StructTyID: { + const StructType *STy = cast(Ty); + Out << NameSoFar + " {\n"; + unsigned Idx = 0; + for (StructType::element_iterator I = STy->element_begin(), + E = STy->element_end(); I != E; ++I) { + Out << " "; + printType(Out, *I, false, "field" + utostr(Idx++)); + Out << ";\n"; + } + Out << '}'; + if (STy->isPacked()) + Out << " __attribute__ ((packed))"; + return Out; + } + + case Type::PointerTyID: { + const PointerType *PTy = cast(Ty); + std::string ptrName = "*" + NameSoFar; + + if (isa(PTy->getElementType()) || + isa(PTy->getElementType())) + ptrName = "(" + ptrName + ")"; + + if (!PAL.isEmpty()) + // Must be a function ptr cast! + return printType(Out, PTy->getElementType(), false, ptrName, true, PAL); + return printType(Out, PTy->getElementType(), false, ptrName); + } + + case Type::ArrayTyID: { + const ArrayType *ATy = cast(Ty); + unsigned NumElements = ATy->getNumElements(); + if (NumElements == 0) NumElements = 1; + // Arrays are wrapped in structs to allow them to have normal + // value semantics (avoiding the array "decay"). + Out << NameSoFar << " { "; + printType(Out, ATy->getElementType(), false, + "array[" + utostr(NumElements) + "]"); + return Out << "; }"; + } + + case Type::OpaqueTyID: { + static int Count = 0; + std::string TyName = "struct opaque_" + itostr(Count++); + assert(TypeNames.find(Ty) == TypeNames.end()); + TypeNames[Ty] = TyName; + return Out << TyName << ' ' << NameSoFar; + } + default: + assert(0 && "Unhandled case in getTypeProps!"); + abort(); + } + + return Out; +} + +void CWriter::printConstantArray(ConstantArray *CPA, bool Static) { + + // As a special case, print the array as a string if it is an array of + // ubytes or an array of sbytes with positive values. + // + const Type *ETy = CPA->getType()->getElementType(); + bool isString = (ETy == Type::Int8Ty || ETy == Type::Int8Ty); + + // Make sure the last character is a null char, as automatically added by C + if (isString && (CPA->getNumOperands() == 0 || + !cast(*(CPA->op_end()-1))->isNullValue())) + isString = false; + + if (isString) { + Out << '\"'; + // Keep track of whether the last number was a hexadecimal escape + bool LastWasHex = false; + + // Do not include the last character, which we know is null + for (unsigned i = 0, e = CPA->getNumOperands()-1; i != e; ++i) { + unsigned char C = cast(CPA->getOperand(i))->getZExtValue(); + + // Print it out literally if it is a printable character. The only thing + // to be careful about is when the last letter output was a hex escape + // code, in which case we have to be careful not to print out hex digits + // explicitly (the C compiler thinks it is a continuation of the previous + // character, sheesh...) + // + if (isprint(C) && (!LastWasHex || !isxdigit(C))) { + LastWasHex = false; + if (C == '"' || C == '\\') + Out << "\\" << (char)C; + else + Out << (char)C; + } else { + LastWasHex = false; + switch (C) { + case '\n': Out << "\\n"; break; + case '\t': Out << "\\t"; break; + case '\r': Out << "\\r"; break; + case '\v': Out << "\\v"; break; + case '\a': Out << "\\a"; break; + case '\"': Out << "\\\""; break; + case '\'': Out << "\\\'"; break; + default: + Out << "\\x"; + Out << (char)(( C/16 < 10) ? ( C/16 +'0') : ( C/16 -10+'A')); + Out << (char)(((C&15) < 10) ? ((C&15)+'0') : ((C&15)-10+'A')); + LastWasHex = true; + break; + } + } + } + Out << '\"'; + } else { + Out << '{'; + if (CPA->getNumOperands()) { + Out << ' '; + printConstant(cast(CPA->getOperand(0)), Static); + for (unsigned i = 1, e = CPA->getNumOperands(); i != e; ++i) { + Out << ", "; + printConstant(cast(CPA->getOperand(i)), Static); + } + } + Out << " }"; + } +} + +void CWriter::printConstantVector(ConstantVector *CP, bool Static) { + Out << '{'; + if (CP->getNumOperands()) { + Out << ' '; + printConstant(cast(CP->getOperand(0)), Static); + for (unsigned i = 1, e = CP->getNumOperands(); i != e; ++i) { + Out << ", "; + printConstant(cast(CP->getOperand(i)), Static); + } + } + Out << " }"; +} + +// isFPCSafeToPrint - Returns true if we may assume that CFP may be written out +// textually as a double (rather than as a reference to a stack-allocated +// variable). We decide this by converting CFP to a string and back into a +// double, and then checking whether the conversion results in a bit-equal +// double to the original value of CFP. This depends on us and the target C +// compiler agreeing on the conversion process (which is pretty likely since we +// only deal in IEEE FP). +// +static bool isFPCSafeToPrint(const ConstantFP *CFP) { + bool ignored; + // Do long doubles in hex for now. + if (CFP->getType() != Type::FloatTy && CFP->getType() != Type::DoubleTy) + return false; + APFloat APF = APFloat(CFP->getValueAPF()); // copy + if (CFP->getType() == Type::FloatTy) + APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &ignored); +#if HAVE_PRINTF_A && ENABLE_CBE_PRINTF_A + char Buffer[100]; + sprintf(Buffer, "%a", APF.convertToDouble()); + if (!strncmp(Buffer, "0x", 2) || + !strncmp(Buffer, "-0x", 3) || + !strncmp(Buffer, "+0x", 3)) + return APF.bitwiseIsEqual(APFloat(atof(Buffer))); + return false; +#else + std::string StrVal = ftostr(APF); + + while (StrVal[0] == ' ') + StrVal.erase(StrVal.begin()); + + // Check to make sure that the stringized number is not some string like "Inf" + // or NaN. Check that the string matches the "[-+]?[0-9]" regex. + if ((StrVal[0] >= '0' && StrVal[0] <= '9') || + ((StrVal[0] == '-' || StrVal[0] == '+') && + (StrVal[1] >= '0' && StrVal[1] <= '9'))) + // Reparse stringized version! + return APF.bitwiseIsEqual(APFloat(atof(StrVal.c_str()))); + return false; +#endif +} + +/// Print out the casting for a cast operation. This does the double casting +/// necessary for conversion to the destination type, if necessary. +/// @brief Print a cast +void CWriter::printCast(unsigned opc, const Type *SrcTy, const Type *DstTy) { + // Print the destination type cast + switch (opc) { + case Instruction::UIToFP: + case Instruction::SIToFP: + case Instruction::IntToPtr: + case Instruction::Trunc: + case Instruction::BitCast: + case Instruction::FPExt: + case Instruction::FPTrunc: // For these the DstTy sign doesn't matter + Out << '('; + printType(Out, DstTy); + Out << ')'; + break; + case Instruction::ZExt: + case Instruction::PtrToInt: + case Instruction::FPToUI: // For these, make sure we get an unsigned dest + Out << '('; + printSimpleType(Out, DstTy, false); + Out << ')'; + break; + case Instruction::SExt: + case Instruction::FPToSI: // For these, make sure we get a signed dest + Out << '('; + printSimpleType(Out, DstTy, true); + Out << ')'; + break; + default: + assert(0 && "Invalid cast opcode"); + } + + // Print the source type cast + switch (opc) { + case Instruction::UIToFP: + case Instruction::ZExt: + Out << '('; + printSimpleType(Out, SrcTy, false); + Out << ')'; + break; + case Instruction::SIToFP: + case Instruction::SExt: + Out << '('; + printSimpleType(Out, SrcTy, true); + Out << ')'; + break; + case Instruction::IntToPtr: + case Instruction::PtrToInt: + // Avoid "cast to pointer from integer of different size" warnings + Out << "(unsigned long)"; + break; + case Instruction::Trunc: + case Instruction::BitCast: + case Instruction::FPExt: + case Instruction::FPTrunc: + case Instruction::FPToSI: + case Instruction::FPToUI: + break; // These don't need a source cast. + default: + assert(0 && "Invalid cast opcode"); + break; + } +} + +// printConstant - The LLVM Constant to C Constant converter. +void CWriter::printConstant(Constant *CPV, bool Static) { + if (const ConstantExpr *CE = dyn_cast(CPV)) { + switch (CE->getOpcode()) { + case Instruction::Trunc: + case Instruction::ZExt: + case Instruction::SExt: + case Instruction::FPTrunc: + case Instruction::FPExt: + case Instruction::UIToFP: + case Instruction::SIToFP: + case Instruction::FPToUI: + case Instruction::FPToSI: + case Instruction::PtrToInt: + case Instruction::IntToPtr: + case Instruction::BitCast: + Out << "("; + printCast(CE->getOpcode(), CE->getOperand(0)->getType(), CE->getType()); + if (CE->getOpcode() == Instruction::SExt && + CE->getOperand(0)->getType() == Type::Int1Ty) { + // Make sure we really sext from bool here by subtracting from 0 + Out << "0-"; + } + printConstant(CE->getOperand(0), Static); + if (CE->getType() == Type::Int1Ty && + (CE->getOpcode() == Instruction::Trunc || + CE->getOpcode() == Instruction::FPToUI || + CE->getOpcode() == Instruction::FPToSI || + CE->getOpcode() == Instruction::PtrToInt)) { + // Make sure we really truncate to bool here by anding with 1 + Out << "&1u"; + } + Out << ')'; + return; + + case Instruction::GetElementPtr: + Out << "("; + printGEPExpression(CE->getOperand(0), gep_type_begin(CPV), + gep_type_end(CPV), Static); + Out << ")"; + return; + case Instruction::Select: + Out << '('; + printConstant(CE->getOperand(0), Static); + Out << '?'; + printConstant(CE->getOperand(1), Static); + Out << ':'; + printConstant(CE->getOperand(2), Static); + Out << ')'; + return; + case Instruction::Add: + case Instruction::Sub: + case Instruction::Mul: + case Instruction::SDiv: + case Instruction::UDiv: + case Instruction::FDiv: + case Instruction::URem: + case Instruction::SRem: + case Instruction::FRem: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + case Instruction::ICmp: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + { + Out << '('; + bool NeedsClosingParens = printConstExprCast(CE, Static); + printConstantWithCast(CE->getOperand(0), CE->getOpcode()); + switch (CE->getOpcode()) { + case Instruction::Add: Out << " + "; break; + case Instruction::Sub: Out << " - "; break; + case Instruction::Mul: Out << " * "; break; + case Instruction::URem: + case Instruction::SRem: + case Instruction::FRem: Out << " % "; break; + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::FDiv: Out << " / "; break; + case Instruction::And: Out << " & "; break; + case Instruction::Or: Out << " | "; break; + case Instruction::Xor: Out << " ^ "; break; + case Instruction::Shl: Out << " << "; break; + case Instruction::LShr: + case Instruction::AShr: Out << " >> "; break; + case Instruction::ICmp: + switch (CE->getPredicate()) { + case ICmpInst::ICMP_EQ: Out << " == "; break; + case ICmpInst::ICMP_NE: Out << " != "; break; + case ICmpInst::ICMP_SLT: + case ICmpInst::ICMP_ULT: Out << " < "; break; + case ICmpInst::ICMP_SLE: + case ICmpInst::ICMP_ULE: Out << " <= "; break; + case ICmpInst::ICMP_SGT: + case ICmpInst::ICMP_UGT: Out << " > "; break; + case ICmpInst::ICMP_SGE: + case ICmpInst::ICMP_UGE: Out << " >= "; break; + default: assert(0 && "Illegal ICmp predicate"); + } + break; + default: assert(0 && "Illegal opcode here!"); + } + printConstantWithCast(CE->getOperand(1), CE->getOpcode()); + if (NeedsClosingParens) + Out << "))"; + Out << ')'; + return; + } + case Instruction::FCmp: { + Out << '('; + bool NeedsClosingParens = printConstExprCast(CE, Static); + if (CE->getPredicate() == FCmpInst::FCMP_FALSE) + Out << "0"; + else if (CE->getPredicate() == FCmpInst::FCMP_TRUE) + Out << "1"; + else { + const char* op = 0; + switch (CE->getPredicate()) { + default: assert(0 && "Illegal FCmp predicate"); + case FCmpInst::FCMP_ORD: op = "ord"; break; + case FCmpInst::FCMP_UNO: op = "uno"; break; + case FCmpInst::FCMP_UEQ: op = "ueq"; break; + case FCmpInst::FCMP_UNE: op = "une"; break; + case FCmpInst::FCMP_ULT: op = "ult"; break; + case FCmpInst::FCMP_ULE: op = "ule"; break; + case FCmpInst::FCMP_UGT: op = "ugt"; break; + case FCmpInst::FCMP_UGE: op = "uge"; break; + case FCmpInst::FCMP_OEQ: op = "oeq"; break; + case FCmpInst::FCMP_ONE: op = "one"; break; + case FCmpInst::FCMP_OLT: op = "olt"; break; + case FCmpInst::FCMP_OLE: op = "ole"; break; + case FCmpInst::FCMP_OGT: op = "ogt"; break; + case FCmpInst::FCMP_OGE: op = "oge"; break; + } + Out << "llvm_fcmp_" << op << "("; + printConstantWithCast(CE->getOperand(0), CE->getOpcode()); + Out << ", "; + printConstantWithCast(CE->getOperand(1), CE->getOpcode()); + Out << ")"; + } + if (NeedsClosingParens) + Out << "))"; + Out << ')'; + return; + } + default: + cerr << "CWriter Error: Unhandled constant expression: " + << *CE << "\n"; + abort(); + } + } else if (isa(CPV) && CPV->getType()->isSingleValueType()) { + Out << "(("; + printType(Out, CPV->getType()); // sign doesn't matter + Out << ")/*UNDEF*/"; + if (!isa(CPV->getType())) { + Out << "0)"; + } else { + Out << "{})"; + } + return; + } + + if (ConstantInt *CI = dyn_cast(CPV)) { + const Type* Ty = CI->getType(); + if (Ty == Type::Int1Ty) + Out << (CI->getZExtValue() ? '1' : '0'); + else if (Ty == Type::Int32Ty) + Out << CI->getZExtValue() << 'u'; + else if (Ty->getPrimitiveSizeInBits() > 32) + Out << CI->getZExtValue() << "ull"; + else { + Out << "(("; + printSimpleType(Out, Ty, false) << ')'; + if (CI->isMinValue(true)) + Out << CI->getZExtValue() << 'u'; + else + Out << CI->getSExtValue(); + Out << ')'; + } + return; + } + + switch (CPV->getType()->getTypeID()) { + case Type::FloatTyID: + case Type::DoubleTyID: + case Type::X86_FP80TyID: + case Type::PPC_FP128TyID: + case Type::FP128TyID: { + ConstantFP *FPC = cast(CPV); + std::map::iterator I = FPConstantMap.find(FPC); + if (I != FPConstantMap.end()) { + // Because of FP precision problems we must load from a stack allocated + // value that holds the value in hex. + Out << "(*(" << (FPC->getType() == Type::FloatTy ? "float" : + FPC->getType() == Type::DoubleTy ? "double" : + "long double") + << "*)&FPConstant" << I->second << ')'; + } else { + double V; + if (FPC->getType() == Type::FloatTy) + V = FPC->getValueAPF().convertToFloat(); + else if (FPC->getType() == Type::DoubleTy) + V = FPC->getValueAPF().convertToDouble(); + else { + // Long double. Convert the number to double, discarding precision. + // This is not awesome, but it at least makes the CBE output somewhat + // useful. + APFloat Tmp = FPC->getValueAPF(); + bool LosesInfo; + Tmp.convert(APFloat::IEEEdouble, APFloat::rmTowardZero, &LosesInfo); + V = Tmp.convertToDouble(); + } + + if (IsNAN(V)) { + // The value is NaN + + // FIXME the actual NaN bits should be emitted. + // The prefix for a quiet NaN is 0x7FF8. For a signalling NaN, + // it's 0x7ff4. + const unsigned long QuietNaN = 0x7ff8UL; + //const unsigned long SignalNaN = 0x7ff4UL; + + // We need to grab the first part of the FP # + char Buffer[100]; + + uint64_t ll = DoubleToBits(V); + sprintf(Buffer, "0x%llx", static_cast(ll)); + + std::string Num(&Buffer[0], &Buffer[6]); + unsigned long Val = strtoul(Num.c_str(), 0, 16); + + if (FPC->getType() == Type::FloatTy) + Out << "LLVM_NAN" << (Val == QuietNaN ? "" : "S") << "F(\"" + << Buffer << "\") /*nan*/ "; + else + Out << "LLVM_NAN" << (Val == QuietNaN ? "" : "S") << "(\"" + << Buffer << "\") /*nan*/ "; + } else if (IsInf(V)) { + // The value is Inf + if (V < 0) Out << '-'; + Out << "LLVM_INF" << (FPC->getType() == Type::FloatTy ? "F" : "") + << " /*inf*/ "; + } else { + std::string Num; +#if HAVE_PRINTF_A && ENABLE_CBE_PRINTF_A + // Print out the constant as a floating point number. + char Buffer[100]; + sprintf(Buffer, "%a", V); + Num = Buffer; +#else + Num = ftostr(FPC->getValueAPF()); +#endif + Out << Num; + } + } + break; + } + + case Type::ArrayTyID: + // Use C99 compound expression literal initializer syntax. + if (!Static) { + Out << "("; + printType(Out, CPV->getType()); + Out << ")"; + } + Out << "{ "; // Arrays are wrapped in struct types. + if (ConstantArray *CA = dyn_cast(CPV)) { + printConstantArray(CA, Static); + } else { + assert(isa(CPV) || isa(CPV)); + const ArrayType *AT = cast(CPV->getType()); + Out << '{'; + if (AT->getNumElements()) { + Out << ' '; + Constant *CZ = Constant::getNullValue(AT->getElementType()); + printConstant(CZ, Static); + for (unsigned i = 1, e = AT->getNumElements(); i != e; ++i) { + Out << ", "; + printConstant(CZ, Static); + } + } + Out << " }"; + } + Out << " }"; // Arrays are wrapped in struct types. + break; + + case Type::VectorTyID: + // Use C99 compound expression literal initializer syntax. + if (!Static) { + Out << "("; + printType(Out, CPV->getType()); + Out << ")"; + } + if (ConstantVector *CV = dyn_cast(CPV)) { + printConstantVector(CV, Static); + } else { + assert(isa(CPV) || isa(CPV)); + const VectorType *VT = cast(CPV->getType()); + Out << "{ "; + Constant *CZ = Constant::getNullValue(VT->getElementType()); + printConstant(CZ, Static); + for (unsigned i = 1, e = VT->getNumElements(); i != e; ++i) { + Out << ", "; + printConstant(CZ, Static); + } + Out << " }"; + } + break; + + case Type::StructTyID: + // Use C99 compound expression literal initializer syntax. + if (!Static) { + Out << "("; + printType(Out, CPV->getType()); + Out << ")"; + } + if (isa(CPV) || isa(CPV)) { + const StructType *ST = cast(CPV->getType()); + Out << '{'; + if (ST->getNumElements()) { + Out << ' '; + printConstant(Constant::getNullValue(ST->getElementType(0)), Static); + for (unsigned i = 1, e = ST->getNumElements(); i != e; ++i) { + Out << ", "; + printConstant(Constant::getNullValue(ST->getElementType(i)), Static); + } + } + Out << " }"; + } else { + Out << '{'; + if (CPV->getNumOperands()) { + Out << ' '; + printConstant(cast(CPV->getOperand(0)), Static); + for (unsigned i = 1, e = CPV->getNumOperands(); i != e; ++i) { + Out << ", "; + printConstant(cast(CPV->getOperand(i)), Static); + } + } + Out << " }"; + } + break; + + case Type::PointerTyID: + if (isa(CPV)) { + Out << "(("; + printType(Out, CPV->getType()); // sign doesn't matter + Out << ")/*NULL*/0)"; + break; + } else if (GlobalValue *GV = dyn_cast(CPV)) { + writeOperand(GV, Static); + break; + } + // FALL THROUGH + default: + cerr << "Unknown constant type: " << *CPV << "\n"; + abort(); + } +} + +// Some constant expressions need to be casted back to the original types +// because their operands were casted to the expected type. This function takes +// care of detecting that case and printing the cast for the ConstantExpr. +bool CWriter::printConstExprCast(const ConstantExpr* CE, bool Static) { + bool NeedsExplicitCast = false; + const Type *Ty = CE->getOperand(0)->getType(); + bool TypeIsSigned = false; + switch (CE->getOpcode()) { + case Instruction::Add: + case Instruction::Sub: + case Instruction::Mul: + // We need to cast integer arithmetic so that it is always performed + // as unsigned, to avoid undefined behavior on overflow. + if (!Ty->isIntOrIntVector()) break; + // FALL THROUGH + case Instruction::LShr: + case Instruction::URem: + case Instruction::UDiv: NeedsExplicitCast = true; break; + case Instruction::AShr: + case Instruction::SRem: + case Instruction::SDiv: NeedsExplicitCast = true; TypeIsSigned = true; break; + case Instruction::SExt: + Ty = CE->getType(); + NeedsExplicitCast = true; + TypeIsSigned = true; + break; + case Instruction::ZExt: + case Instruction::Trunc: + case Instruction::FPTrunc: + case Instruction::FPExt: + case Instruction::UIToFP: + case Instruction::SIToFP: + case Instruction::FPToUI: + case Instruction::FPToSI: + case Instruction::PtrToInt: + case Instruction::IntToPtr: + case Instruction::BitCast: + Ty = CE->getType(); + NeedsExplicitCast = true; + break; + default: break; + } + if (NeedsExplicitCast) { + Out << "(("; + if (Ty->isInteger() && Ty != Type::Int1Ty) + printSimpleType(Out, Ty, TypeIsSigned); + else + printType(Out, Ty); // not integer, sign doesn't matter + Out << ")("; + } + return NeedsExplicitCast; +} + +// Print a constant assuming that it is the operand for a given Opcode. The +// opcodes that care about sign need to cast their operands to the expected +// type before the operation proceeds. This function does the casting. +void CWriter::printConstantWithCast(Constant* CPV, unsigned Opcode) { + + // Extract the operand's type, we'll need it. + const Type* OpTy = CPV->getType(); + + // Indicate whether to do the cast or not. + bool shouldCast = false; + bool typeIsSigned = false; + + // Based on the Opcode for which this Constant is being written, determine + // the new type to which the operand should be casted by setting the value + // of OpTy. If we change OpTy, also set shouldCast to true so it gets + // casted below. + switch (Opcode) { + default: + // for most instructions, it doesn't matter + break; + case Instruction::Add: + case Instruction::Sub: + case Instruction::Mul: + // We need to cast integer arithmetic so that it is always performed + // as unsigned, to avoid undefined behavior on overflow. + if (!OpTy->isIntOrIntVector()) break; + // FALL THROUGH + case Instruction::LShr: + case Instruction::UDiv: + case Instruction::URem: + shouldCast = true; + break; + case Instruction::AShr: + case Instruction::SDiv: + case Instruction::SRem: + shouldCast = true; + typeIsSigned = true; + break; + } + + // Write out the casted constant if we should, otherwise just write the + // operand. + if (shouldCast) { + Out << "(("; + printSimpleType(Out, OpTy, typeIsSigned); + Out << ")"; + printConstant(CPV, false); + Out << ")"; + } else + printConstant(CPV, false); +} + +std::string CWriter::GetValueName(const Value *Operand) { + std::string Name; + + if (!isa(Operand) && Operand->getName() != "") { + std::string VarName; + + Name = Operand->getName(); + VarName.reserve(Name.capacity()); + + for (std::string::iterator I = Name.begin(), E = Name.end(); + I != E; ++I) { + char ch = *I; + + if (!((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || + (ch >= '0' && ch <= '9') || ch == '_')) { + char buffer[5]; + sprintf(buffer, "_%x_", ch); + VarName += buffer; + } else + VarName += ch; + } + + Name = "llvm_cbe_" + VarName; + } else { + Name = Mang->getValueName(Operand); + } + + return Name; +} + +/// writeInstComputationInline - Emit the computation for the specified +/// instruction inline, with no destination provided. +void CWriter::writeInstComputationInline(Instruction &I) { + // If this is a non-trivial bool computation, make sure to truncate down to + // a 1 bit value. This is important because we want "add i1 x, y" to return + // "0" when x and y are true, not "2" for example. + bool NeedBoolTrunc = false; + if (I.getType() == Type::Int1Ty && !isa(I) && !isa(I)) + NeedBoolTrunc = true; + + if (NeedBoolTrunc) + Out << "(("; + + visit(I); + + if (NeedBoolTrunc) + Out << ")&1)"; +} + + +void CWriter::writeOperandInternal(Value *Operand, bool Static) { + if (Instruction *I = dyn_cast(Operand)) + // Should we inline this instruction to build a tree? + if (isInlinableInst(*I) && !isDirectAlloca(I)) { + Out << '('; + writeInstComputationInline(*I); + Out << ')'; + return; + } + + Constant* CPV = dyn_cast(Operand); + + if (CPV && !isa(CPV)) + printConstant(CPV, Static); + else + Out << GetValueName(Operand); +} + +void CWriter::writeOperand(Value *Operand, bool Static) { + bool isAddressImplicit = isAddressExposed(Operand); + if (isAddressImplicit) + Out << "(&"; // Global variables are referenced as their addresses by llvm + + writeOperandInternal(Operand, Static); + + if (isAddressImplicit) + Out << ')'; +} + +// Some instructions need to have their result value casted back to the +// original types because their operands were casted to the expected type. +// This function takes care of detecting that case and printing the cast +// for the Instruction. +bool CWriter::writeInstructionCast(const Instruction &I) { + const Type *Ty = I.getOperand(0)->getType(); + switch (I.getOpcode()) { + case Instruction::Add: + case Instruction::Sub: + case Instruction::Mul: + // We need to cast integer arithmetic so that it is always performed + // as unsigned, to avoid undefined behavior on overflow. + if (!Ty->isIntOrIntVector()) break; + // FALL THROUGH + case Instruction::LShr: + case Instruction::URem: + case Instruction::UDiv: + Out << "(("; + printSimpleType(Out, Ty, false); + Out << ")("; + return true; + case Instruction::AShr: + case Instruction::SRem: + case Instruction::SDiv: + Out << "(("; + printSimpleType(Out, Ty, true); + Out << ")("; + return true; + default: break; + } + return false; +} + +// Write the operand with a cast to another type based on the Opcode being used. +// This will be used in cases where an instruction has specific type +// requirements (usually signedness) for its operands. +void CWriter::writeOperandWithCast(Value* Operand, unsigned Opcode) { + + // Extract the operand's type, we'll need it. + const Type* OpTy = Operand->getType(); + + // Indicate whether to do the cast or not. + bool shouldCast = false; + + // Indicate whether the cast should be to a signed type or not. + bool castIsSigned = false; + + // Based on the Opcode for which this Operand is being written, determine + // the new type to which the operand should be casted by setting the value + // of OpTy. If we change OpTy, also set shouldCast to true. + switch (Opcode) { + default: + // for most instructions, it doesn't matter + break; + case Instruction::Add: + case Instruction::Sub: + case Instruction::Mul: + // We need to cast integer arithmetic so that it is always performed + // as unsigned, to avoid undefined behavior on overflow. + if (!OpTy->isIntOrIntVector()) break; + // FALL THROUGH + case Instruction::LShr: + case Instruction::UDiv: + case Instruction::URem: // Cast to unsigned first + shouldCast = true; + castIsSigned = false; + break; + case Instruction::GetElementPtr: + case Instruction::AShr: + case Instruction::SDiv: + case Instruction::SRem: // Cast to signed first + shouldCast = true; + castIsSigned = true; + break; + } + + // Write out the casted operand if we should, otherwise just write the + // operand. + if (shouldCast) { + Out << "(("; + printSimpleType(Out, OpTy, castIsSigned); + Out << ")"; + writeOperand(Operand); + Out << ")"; + } else + writeOperand(Operand); +} + +// Write the operand with a cast to another type based on the icmp predicate +// being used. +void CWriter::writeOperandWithCast(Value* Operand, const ICmpInst &Cmp) { + // This has to do a cast to ensure the operand has the right signedness. + // Also, if the operand is a pointer, we make sure to cast to an integer when + // doing the comparison both for signedness and so that the C compiler doesn't + // optimize things like "p < NULL" to false (p may contain an integer value + // f.e.). + bool shouldCast = Cmp.isRelational(); + + // Write out the casted operand if we should, otherwise just write the + // operand. + if (!shouldCast) { + writeOperand(Operand); + return; + } + + // Should this be a signed comparison? If so, convert to signed. + bool castIsSigned = Cmp.isSignedPredicate(); + + // If the operand was a pointer, convert to a large integer type. + const Type* OpTy = Operand->getType(); + if (isa(OpTy)) + OpTy = TD->getIntPtrType(); + + Out << "(("; + printSimpleType(Out, OpTy, castIsSigned); + Out << ")"; + writeOperand(Operand); + Out << ")"; +} + +// generateCompilerSpecificCode - This is where we add conditional compilation +// directives to cater to specific compilers as need be. +// +static void generateCompilerSpecificCode(raw_ostream& Out, + const TargetData *TD) { + // Alloca is hard to get, and we don't want to include stdlib.h here. + Out << "/* get a declaration for alloca */\n" + << "#if defined(__CYGWIN__) || defined(__MINGW32__)\n" + << "#define alloca(x) __builtin_alloca((x))\n" + << "#define _alloca(x) __builtin_alloca((x))\n" + << "#elif defined(__APPLE__)\n" + << "extern void *__builtin_alloca(unsigned long);\n" + << "#define alloca(x) __builtin_alloca(x)\n" + << "#define longjmp _longjmp\n" + << "#define setjmp _setjmp\n" + << "#elif defined(__sun__)\n" + << "#if defined(__sparcv9)\n" + << "extern void *__builtin_alloca(unsigned long);\n" + << "#else\n" + << "extern void *__builtin_alloca(unsigned int);\n" + << "#endif\n" + << "#define alloca(x) __builtin_alloca(x)\n" + << "#elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__)\n" + << "#define alloca(x) __builtin_alloca(x)\n" + << "#elif defined(_MSC_VER)\n" + << "#define inline _inline\n" + << "#define alloca(x) _alloca(x)\n" + << "#else\n" + << "#include \n" + << "#endif\n\n"; + + // We output GCC specific attributes to preserve 'linkonce'ness on globals. + // If we aren't being compiled with GCC, just drop these attributes. + Out << "#ifndef __GNUC__ /* Can only support \"linkonce\" vars with GCC */\n" + << "#define __attribute__(X)\n" + << "#endif\n\n"; + + // On Mac OS X, "external weak" is spelled "__attribute__((weak_import))". + Out << "#if defined(__GNUC__) && defined(__APPLE_CC__)\n" + << "#define __EXTERNAL_WEAK__ __attribute__((weak_import))\n" + << "#elif defined(__GNUC__)\n" + << "#define __EXTERNAL_WEAK__ __attribute__((weak))\n" + << "#else\n" + << "#define __EXTERNAL_WEAK__\n" + << "#endif\n\n"; + + // For now, turn off the weak linkage attribute on Mac OS X. (See above.) + Out << "#if defined(__GNUC__) && defined(__APPLE_CC__)\n" + << "#define __ATTRIBUTE_WEAK__\n" + << "#elif defined(__GNUC__)\n" + << "#define __ATTRIBUTE_WEAK__ __attribute__((weak))\n" + << "#else\n" + << "#define __ATTRIBUTE_WEAK__\n" + << "#endif\n\n"; + + // Add hidden visibility support. FIXME: APPLE_CC? + Out << "#if defined(__GNUC__)\n" + << "#define __HIDDEN__ __attribute__((visibility(\"hidden\")))\n" + << "#endif\n\n"; + + // Define NaN and Inf as GCC builtins if using GCC, as 0 otherwise + // From the GCC documentation: + // + // double __builtin_nan (const char *str) + // + // This is an implementation of the ISO C99 function nan. + // + // Since ISO C99 defines this function in terms of strtod, which we do + // not implement, a description of the parsing is in order. The string is + // parsed as by strtol; that is, the base is recognized by leading 0 or + // 0x prefixes. The number parsed is placed in the significand such that + // the least significant bit of the number is at the least significant + // bit of the significand. The number is truncated to fit the significand + // field provided. The significand is forced to be a quiet NaN. + // + // This function, if given a string literal, is evaluated early enough + // that it is considered a compile-time constant. + // + // float __builtin_nanf (const char *str) + // + // Similar to __builtin_nan, except the return type is float. + // + // double __builtin_inf (void) + // + // Similar to __builtin_huge_val, except a warning is generated if the + // target floating-point format does not support infinities. This + // function is suitable for implementing the ISO C99 macro INFINITY. + // + // float __builtin_inff (void) + // + // Similar to __builtin_inf, except the return type is float. + Out << "#ifdef __GNUC__\n" + << "#define LLVM_NAN(NanStr) __builtin_nan(NanStr) /* Double */\n" + << "#define LLVM_NANF(NanStr) __builtin_nanf(NanStr) /* Float */\n" + << "#define LLVM_NANS(NanStr) __builtin_nans(NanStr) /* Double */\n" + << "#define LLVM_NANSF(NanStr) __builtin_nansf(NanStr) /* Float */\n" + << "#define LLVM_INF __builtin_inf() /* Double */\n" + << "#define LLVM_INFF __builtin_inff() /* Float */\n" + << "#define LLVM_PREFETCH(addr,rw,locality) " + "__builtin_prefetch(addr,rw,locality)\n" + << "#define __ATTRIBUTE_CTOR__ __attribute__((constructor))\n" + << "#define __ATTRIBUTE_DTOR__ __attribute__((destructor))\n" + << "#define LLVM_ASM __asm__\n" + << "#else\n" + << "#define LLVM_NAN(NanStr) ((double)0.0) /* Double */\n" + << "#define LLVM_NANF(NanStr) 0.0F /* Float */\n" + << "#define LLVM_NANS(NanStr) ((double)0.0) /* Double */\n" + << "#define LLVM_NANSF(NanStr) 0.0F /* Float */\n" + << "#define LLVM_INF ((double)0.0) /* Double */\n" + << "#define LLVM_INFF 0.0F /* Float */\n" + << "#define LLVM_PREFETCH(addr,rw,locality) /* PREFETCH */\n" + << "#define __ATTRIBUTE_CTOR__\n" + << "#define __ATTRIBUTE_DTOR__\n" + << "#define LLVM_ASM(X)\n" + << "#endif\n\n"; + + Out << "#if __GNUC__ < 4 /* Old GCC's, or compilers not GCC */ \n" + << "#define __builtin_stack_save() 0 /* not implemented */\n" + << "#define __builtin_stack_restore(X) /* noop */\n" + << "#endif\n\n"; + + // Output typedefs for 128-bit integers. If these are needed with a + // 32-bit target or with a C compiler that doesn't support mode(TI), + // more drastic measures will be needed. + Out << "#if __GNUC__ && __LP64__ /* 128-bit integer types */\n" + << "typedef int __attribute__((mode(TI))) llvmInt128;\n" + << "typedef unsigned __attribute__((mode(TI))) llvmUInt128;\n" + << "#endif\n\n"; + + // Output target-specific code that should be inserted into main. + Out << "#define CODE_FOR_MAIN() /* Any target-specific code for main()*/\n"; +} + +/// FindStaticTors - Given a static ctor/dtor list, unpack its contents into +/// the StaticTors set. +static void FindStaticTors(GlobalVariable *GV, std::set &StaticTors){ + ConstantArray *InitList = dyn_cast(GV->getInitializer()); + if (!InitList) return; + + for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) + if (ConstantStruct *CS = dyn_cast(InitList->getOperand(i))){ + if (CS->getNumOperands() != 2) return; // Not array of 2-element structs. + + if (CS->getOperand(1)->isNullValue()) + return; // Found a null terminator, exit printing. + Constant *FP = CS->getOperand(1); + if (ConstantExpr *CE = dyn_cast(FP)) + if (CE->isCast()) + FP = CE->getOperand(0); + if (Function *F = dyn_cast(FP)) + StaticTors.insert(F); + } +} + +enum SpecialGlobalClass { + NotSpecial = 0, + GlobalCtors, GlobalDtors, + NotPrinted +}; + +/// getGlobalVariableClass - If this is a global that is specially recognized +/// by LLVM, return a code that indicates how we should handle it. +static SpecialGlobalClass getGlobalVariableClass(const GlobalVariable *GV) { + // If this is a global ctors/dtors list, handle it now. + if (GV->hasAppendingLinkage() && GV->use_empty()) { + if (GV->getName() == "llvm.global_ctors") + return GlobalCtors; + else if (GV->getName() == "llvm.global_dtors") + return GlobalDtors; + } + + // Otherwise, it it is other metadata, don't print it. This catches things + // like debug information. + if (GV->getSection() == "llvm.metadata") + return NotPrinted; + + return NotSpecial; +} + + +bool CWriter::doInitialization(Module &M) { + // Initialize + TheModule = &M; + + TD = new TargetData(&M); + IL = new IntrinsicLowering(*TD); + IL->AddPrototypes(M); + + // Ensure that all structure types have names... + Mang = new Mangler(M); + Mang->markCharUnacceptable('.'); + + // Keep track of which functions are static ctors/dtors so they can have + // an attribute added to their prototypes. + std::set StaticCtors, StaticDtors; + for (Module::global_iterator I = M.global_begin(), E = M.global_end(); + I != E; ++I) { + switch (getGlobalVariableClass(I)) { + default: break; + case GlobalCtors: + FindStaticTors(I, StaticCtors); + break; + case GlobalDtors: + FindStaticTors(I, StaticDtors); + break; + } + } + + // get declaration for alloca + Out << "/* Provide Declarations */\n"; + Out << "#include \n"; // Varargs support + Out << "#include \n"; // Unwind support + generateCompilerSpecificCode(Out, TD); + + // Provide a definition for `bool' if not compiling with a C++ compiler. + Out << "\n" + << "#ifndef __cplusplus\ntypedef unsigned char bool;\n#endif\n" + + << "\n\n/* Support for floating point constants */\n" + << "typedef unsigned long long ConstantDoubleTy;\n" + << "typedef unsigned int ConstantFloatTy;\n" + << "typedef struct { unsigned long long f1; unsigned short f2; " + "unsigned short pad[3]; } ConstantFP80Ty;\n" + // This is used for both kinds of 128-bit long double; meaning differs. + << "typedef struct { unsigned long long f1; unsigned long long f2; }" + " ConstantFP128Ty;\n" + << "\n\n/* Global Declarations */\n"; + + // First output all the declarations for the program, because C requires + // Functions & globals to be declared before they are used. + // + + // Loop over the symbol table, emitting all named constants... + printModuleTypes(M.getTypeSymbolTable()); + + // Global variable declarations... + if (!M.global_empty()) { + Out << "\n/* External Global Variable Declarations */\n"; + for (Module::global_iterator I = M.global_begin(), E = M.global_end(); + I != E; ++I) { + + if (I->hasExternalLinkage() || I->hasExternalWeakLinkage() || + I->hasCommonLinkage()) + Out << "extern "; + else if (I->hasDLLImportLinkage()) + Out << "__declspec(dllimport) "; + else + continue; // Internal Global + + // Thread Local Storage + if (I->isThreadLocal()) + Out << "__thread "; + + printType(Out, I->getType()->getElementType(), false, GetValueName(I)); + + if (I->hasExternalWeakLinkage()) + Out << " __EXTERNAL_WEAK__"; + Out << ";\n"; + } + } + + // Function declarations + Out << "\n/* Function Declarations */\n"; + Out << "double fmod(double, double);\n"; // Support for FP rem + Out << "float fmodf(float, float);\n"; + Out << "long double fmodl(long double, long double);\n"; + + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { + // Don't print declarations for intrinsic functions. + if (!I->isIntrinsic() && I->getName() != "setjmp" && + I->getName() != "longjmp" && I->getName() != "_setjmp") { + if (I->hasExternalWeakLinkage()) + Out << "extern "; + printFunctionSignature(I, true); + if (I->hasWeakLinkage() || I->hasLinkOnceLinkage()) + Out << " __ATTRIBUTE_WEAK__"; + if (I->hasExternalWeakLinkage()) + Out << " __EXTERNAL_WEAK__"; + if (StaticCtors.count(I)) + Out << " __ATTRIBUTE_CTOR__"; + if (StaticDtors.count(I)) + Out << " __ATTRIBUTE_DTOR__"; + if (I->hasHiddenVisibility()) + Out << " __HIDDEN__"; + + if (I->hasName() && I->getName()[0] == 1) + Out << " LLVM_ASM(\"" << I->getName().c_str()+1 << "\")"; + + Out << ";\n"; + } + } + + // Output the global variable declarations + if (!M.global_empty()) { + Out << "\n\n/* Global Variable Declarations */\n"; + for (Module::global_iterator I = M.global_begin(), E = M.global_end(); + I != E; ++I) + if (!I->isDeclaration()) { + // Ignore special globals, such as debug info. + if (getGlobalVariableClass(I)) + continue; + + if (I->hasLocalLinkage()) + Out << "static "; + else + Out << "extern "; + + // Thread Local Storage + if (I->isThreadLocal()) + Out << "__thread "; + + printType(Out, I->getType()->getElementType(), false, + GetValueName(I)); + + if (I->hasLinkOnceLinkage()) + Out << " __attribute__((common))"; + else if (I->hasCommonLinkage()) // FIXME is this right? + Out << " __ATTRIBUTE_WEAK__"; + else if (I->hasWeakLinkage()) + Out << " __ATTRIBUTE_WEAK__"; + else if (I->hasExternalWeakLinkage()) + Out << " __EXTERNAL_WEAK__"; + if (I->hasHiddenVisibility()) + Out << " __HIDDEN__"; + Out << ";\n"; + } + } + + // Output the global variable definitions and contents... + if (!M.global_empty()) { + Out << "\n\n/* Global Variable Definitions and Initialization */\n"; + for (Module::global_iterator I = M.global_begin(), E = M.global_end(); + I != E; ++I) + if (!I->isDeclaration()) { + // Ignore special globals, such as debug info. + if (getGlobalVariableClass(I)) + continue; + + if (I->hasLocalLinkage()) + Out << "static "; + else if (I->hasDLLImportLinkage()) + Out << "__declspec(dllimport) "; + else if (I->hasDLLExportLinkage()) + Out << "__declspec(dllexport) "; + + // Thread Local Storage + if (I->isThreadLocal()) + Out << "__thread "; + + printType(Out, I->getType()->getElementType(), false, + GetValueName(I)); + if (I->hasLinkOnceLinkage()) + Out << " __attribute__((common))"; + else if (I->hasWeakLinkage()) + Out << " __ATTRIBUTE_WEAK__"; + else if (I->hasCommonLinkage()) + Out << " __ATTRIBUTE_WEAK__"; + + if (I->hasHiddenVisibility()) + Out << " __HIDDEN__"; + + // If the initializer is not null, emit the initializer. If it is null, + // we try to avoid emitting large amounts of zeros. The problem with + // this, however, occurs when the variable has weak linkage. In this + // case, the assembler will complain about the variable being both weak + // and common, so we disable this optimization. + // FIXME common linkage should avoid this problem. + if (!I->getInitializer()->isNullValue()) { + Out << " = " ; + writeOperand(I->getInitializer(), true); + } else if (I->hasWeakLinkage()) { + // We have to specify an initializer, but it doesn't have to be + // complete. If the value is an aggregate, print out { 0 }, and let + // the compiler figure out the rest of the zeros. + Out << " = " ; + if (isa(I->getInitializer()->getType()) || + isa(I->getInitializer()->getType())) { + Out << "{ 0 }"; + } else if (isa(I->getInitializer()->getType())) { + // As with structs and vectors, but with an extra set of braces + // because arrays are wrapped in structs. + Out << "{ { 0 } }"; + } else { + // Just print it out normally. + writeOperand(I->getInitializer(), true); + } + } + Out << ";\n"; + } + } + + if (!M.empty()) + Out << "\n\n/* Function Bodies */\n"; + + // Emit some helper functions for dealing with FCMP instruction's + // predicates + Out << "static inline int llvm_fcmp_ord(double X, double Y) { "; + Out << "return X == X && Y == Y; }\n"; + Out << "static inline int llvm_fcmp_uno(double X, double Y) { "; + Out << "return X != X || Y != Y; }\n"; + Out << "static inline int llvm_fcmp_ueq(double X, double Y) { "; + Out << "return X == Y || llvm_fcmp_uno(X, Y); }\n"; + Out << "static inline int llvm_fcmp_une(double X, double Y) { "; + Out << "return X != Y; }\n"; + Out << "static inline int llvm_fcmp_ult(double X, double Y) { "; + Out << "return X < Y || llvm_fcmp_uno(X, Y); }\n"; + Out << "static inline int llvm_fcmp_ugt(double X, double Y) { "; + Out << "return X > Y || llvm_fcmp_uno(X, Y); }\n"; + Out << "static inline int llvm_fcmp_ule(double X, double Y) { "; + Out << "return X <= Y || llvm_fcmp_uno(X, Y); }\n"; + Out << "static inline int llvm_fcmp_uge(double X, double Y) { "; + Out << "return X >= Y || llvm_fcmp_uno(X, Y); }\n"; + Out << "static inline int llvm_fcmp_oeq(double X, double Y) { "; + Out << "return X == Y ; }\n"; + Out << "static inline int llvm_fcmp_one(double X, double Y) { "; + Out << "return X != Y && llvm_fcmp_ord(X, Y); }\n"; + Out << "static inline int llvm_fcmp_olt(double X, double Y) { "; + Out << "return X < Y ; }\n"; + Out << "static inline int llvm_fcmp_ogt(double X, double Y) { "; + Out << "return X > Y ; }\n"; + Out << "static inline int llvm_fcmp_ole(double X, double Y) { "; + Out << "return X <= Y ; }\n"; + Out << "static inline int llvm_fcmp_oge(double X, double Y) { "; + Out << "return X >= Y ; }\n"; + return false; +} + + +/// Output all floating point constants that cannot be printed accurately... +void CWriter::printFloatingPointConstants(Function &F) { + // Scan the module for floating point constants. If any FP constant is used + // in the function, we want to redirect it here so that we do not depend on + // the precision of the printed form, unless the printed form preserves + // precision. + // + for (constant_iterator I = constant_begin(&F), E = constant_end(&F); + I != E; ++I) + printFloatingPointConstants(*I); + + Out << '\n'; +} + +void CWriter::printFloatingPointConstants(const Constant *C) { + // If this is a constant expression, recursively check for constant fp values. + if (const ConstantExpr *CE = dyn_cast(C)) { + for (unsigned i = 0, e = CE->getNumOperands(); i != e; ++i) + printFloatingPointConstants(CE->getOperand(i)); + return; + } + + // Otherwise, check for a FP constant that we need to print. + const ConstantFP *FPC = dyn_cast(C); + if (FPC == 0 || + // Do not put in FPConstantMap if safe. + isFPCSafeToPrint(FPC) || + // Already printed this constant? + FPConstantMap.count(FPC)) + return; + + FPConstantMap[FPC] = FPCounter; // Number the FP constants + + if (FPC->getType() == Type::DoubleTy) { + double Val = FPC->getValueAPF().convertToDouble(); + uint64_t i = FPC->getValueAPF().bitcastToAPInt().getZExtValue(); + Out << "static const ConstantDoubleTy FPConstant" << FPCounter++ + << " = 0x" << utohexstr(i) + << "ULL; /* " << Val << " */\n"; + } else if (FPC->getType() == Type::FloatTy) { + float Val = FPC->getValueAPF().convertToFloat(); + uint32_t i = (uint32_t)FPC->getValueAPF().bitcastToAPInt(). + getZExtValue(); + Out << "static const ConstantFloatTy FPConstant" << FPCounter++ + << " = 0x" << utohexstr(i) + << "U; /* " << Val << " */\n"; + } else if (FPC->getType() == Type::X86_FP80Ty) { + // api needed to prevent premature destruction + APInt api = FPC->getValueAPF().bitcastToAPInt(); + const uint64_t *p = api.getRawData(); + Out << "static const ConstantFP80Ty FPConstant" << FPCounter++ + << " = { 0x" << utohexstr(p[0]) + << "ULL, 0x" << utohexstr((uint16_t)p[1]) << ",{0,0,0}" + << "}; /* Long double constant */\n"; + } else if (FPC->getType() == Type::PPC_FP128Ty) { + APInt api = FPC->getValueAPF().bitcastToAPInt(); + const uint64_t *p = api.getRawData(); + Out << "static const ConstantFP128Ty FPConstant" << FPCounter++ + << " = { 0x" + << utohexstr(p[0]) << ", 0x" << utohexstr(p[1]) + << "}; /* Long double constant */\n"; + + } else { + assert(0 && "Unknown float type!"); + } +} + + + +/// printSymbolTable - Run through symbol table looking for type names. If a +/// type name is found, emit its declaration... +/// +void CWriter::printModuleTypes(const TypeSymbolTable &TST) { + Out << "/* Helper union for bitcasts */\n"; + Out << "typedef union {\n"; + Out << " unsigned int Int32;\n"; + Out << " unsigned long long Int64;\n"; + Out << " float Float;\n"; + Out << " double Double;\n"; + Out << "} llvmBitCastUnion;\n"; + + // We are only interested in the type plane of the symbol table. + TypeSymbolTable::const_iterator I = TST.begin(); + TypeSymbolTable::const_iterator End = TST.end(); + + // If there are no type names, exit early. + if (I == End) return; + + // Print out forward declarations for structure types before anything else! + Out << "/* Structure forward decls */\n"; + for (; I != End; ++I) { + std::string Name = "struct l_" + Mang->makeNameProper(I->first); + Out << Name << ";\n"; + TypeNames.insert(std::make_pair(I->second, Name)); + } + + Out << '\n'; + + // Now we can print out typedefs. Above, we guaranteed that this can only be + // for struct or opaque types. + Out << "/* Typedefs */\n"; + for (I = TST.begin(); I != End; ++I) { + std::string Name = "l_" + Mang->makeNameProper(I->first); + Out << "typedef "; + printType(Out, I->second, false, Name); + Out << ";\n"; + } + + Out << '\n'; + + // Keep track of which structures have been printed so far... + std::set StructPrinted; + + // Loop over all structures then push them into the stack so they are + // printed in the correct order. + // + Out << "/* Structure contents */\n"; + for (I = TST.begin(); I != End; ++I) + if (isa(I->second) || isa(I->second)) + // Only print out used types! + printContainedStructs(I->second, StructPrinted); +} + +// Push the struct onto the stack and recursively push all structs +// this one depends on. +// +// TODO: Make this work properly with vector types +// +void CWriter::printContainedStructs(const Type *Ty, + std::set &StructPrinted) { + // Don't walk through pointers. + if (isa(Ty) || Ty->isPrimitiveType() || Ty->isInteger()) return; + + // Print all contained types first. + for (Type::subtype_iterator I = Ty->subtype_begin(), + E = Ty->subtype_end(); I != E; ++I) + printContainedStructs(*I, StructPrinted); + + if (isa(Ty) || isa(Ty)) { + // Check to see if we have already printed this struct. + if (StructPrinted.insert(Ty).second) { + // Print structure type out. + std::string Name = TypeNames[Ty]; + printType(Out, Ty, false, Name, true); + Out << ";\n\n"; + } + } +} + +void CWriter::printFunctionSignature(const Function *F, bool Prototype) { + /// isStructReturn - Should this function actually return a struct by-value? + bool isStructReturn = F->hasStructRetAttr(); + + if (F->hasLocalLinkage()) Out << "static "; + if (F->hasDLLImportLinkage()) Out << "__declspec(dllimport) "; + if (F->hasDLLExportLinkage()) Out << "__declspec(dllexport) "; + switch (F->getCallingConv()) { + case CallingConv::X86_StdCall: + Out << "__attribute__((stdcall)) "; + break; + case CallingConv::X86_FastCall: + Out << "__attribute__((fastcall)) "; + break; + } + + // Loop over the arguments, printing them... + const FunctionType *FT = cast(F->getFunctionType()); + const AttrListPtr &PAL = F->getAttributes(); + + std::stringstream FunctionInnards; + + // Print out the name... + FunctionInnards << GetValueName(F) << '('; + + bool PrintedArg = false; + if (!F->isDeclaration()) { + if (!F->arg_empty()) { + Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); + unsigned Idx = 1; + + // If this is a struct-return function, don't print the hidden + // struct-return argument. + if (isStructReturn) { + assert(I != E && "Invalid struct return function!"); + ++I; + ++Idx; + } + + std::string ArgName; + for (; I != E; ++I) { + if (PrintedArg) FunctionInnards << ", "; + if (I->hasName() || !Prototype) + ArgName = GetValueName(I); + else + ArgName = ""; + const Type *ArgTy = I->getType(); + if (PAL.paramHasAttr(Idx, Attribute::ByVal)) { + ArgTy = cast(ArgTy)->getElementType(); + ByValParams.insert(I); + } + printType(FunctionInnards, ArgTy, + /*isSigned=*/PAL.paramHasAttr(Idx, Attribute::SExt), + ArgName); + PrintedArg = true; + ++Idx; + } + } + } else { + // Loop over the arguments, printing them. + FunctionType::param_iterator I = FT->param_begin(), E = FT->param_end(); + unsigned Idx = 1; + + // If this is a struct-return function, don't print the hidden + // struct-return argument. + if (isStructReturn) { + assert(I != E && "Invalid struct return function!"); + ++I; + ++Idx; + } + + for (; I != E; ++I) { + if (PrintedArg) FunctionInnards << ", "; + const Type *ArgTy = *I; + if (PAL.paramHasAttr(Idx, Attribute::ByVal)) { + assert(isa(ArgTy)); + ArgTy = cast(ArgTy)->getElementType(); + } + printType(FunctionInnards, ArgTy, + /*isSigned=*/PAL.paramHasAttr(Idx, Attribute::SExt)); + PrintedArg = true; + ++Idx; + } + } + + // Finish printing arguments... if this is a vararg function, print the ..., + // unless there are no known types, in which case, we just emit (). + // + if (FT->isVarArg() && PrintedArg) { + if (PrintedArg) FunctionInnards << ", "; + FunctionInnards << "..."; // Output varargs portion of signature! + } else if (!FT->isVarArg() && !PrintedArg) { + FunctionInnards << "void"; // ret() -> ret(void) in C. + } + FunctionInnards << ')'; + + // Get the return tpe for the function. + const Type *RetTy; + if (!isStructReturn) + RetTy = F->getReturnType(); + else { + // If this is a struct-return function, print the struct-return type. + RetTy = cast(FT->getParamType(0))->getElementType(); + } + + // Print out the return type and the signature built above. + printType(Out, RetTy, + /*isSigned=*/PAL.paramHasAttr(0, Attribute::SExt), + FunctionInnards.str()); +} + +static inline bool isFPIntBitCast(const Instruction &I) { + if (!isa(I)) + return false; + const Type *SrcTy = I.getOperand(0)->getType(); + const Type *DstTy = I.getType(); + return (SrcTy->isFloatingPoint() && DstTy->isInteger()) || + (DstTy->isFloatingPoint() && SrcTy->isInteger()); +} + +void CWriter::printFunction(Function &F) { + /// isStructReturn - Should this function actually return a struct by-value? + bool isStructReturn = F.hasStructRetAttr(); + + printFunctionSignature(&F, false); + Out << " {\n"; + + // If this is a struct return function, handle the result with magic. + if (isStructReturn) { + const Type *StructTy = + cast(F.arg_begin()->getType())->getElementType(); + Out << " "; + printType(Out, StructTy, false, "StructReturn"); + Out << "; /* Struct return temporary */\n"; + + Out << " "; + printType(Out, F.arg_begin()->getType(), false, + GetValueName(F.arg_begin())); + Out << " = &StructReturn;\n"; + } + + bool PrintedVar = false; + + // print local variable information for the function + for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ++I) { + if (const AllocaInst *AI = isDirectAlloca(&*I)) { + Out << " "; + printType(Out, AI->getAllocatedType(), false, GetValueName(AI)); + Out << "; /* Address-exposed local */\n"; + PrintedVar = true; + } else if (I->getType() != Type::VoidTy && !isInlinableInst(*I)) { + Out << " "; + printType(Out, I->getType(), false, GetValueName(&*I)); + Out << ";\n"; + + if (isa(*I)) { // Print out PHI node temporaries as well... + Out << " "; + printType(Out, I->getType(), false, + GetValueName(&*I)+"__PHI_TEMPORARY"); + Out << ";\n"; + } + PrintedVar = true; + } + // We need a temporary for the BitCast to use so it can pluck a value out + // of a union to do the BitCast. This is separate from the need for a + // variable to hold the result of the BitCast. + if (isFPIntBitCast(*I)) { + Out << " llvmBitCastUnion " << GetValueName(&*I) + << "__BITCAST_TEMPORARY;\n"; + PrintedVar = true; + } + } + + if (PrintedVar) + Out << '\n'; + + if (F.hasExternalLinkage() && F.getName() == "main") + Out << " CODE_FOR_MAIN();\n"; + + // print the basic blocks + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { + if (Loop *L = LI->getLoopFor(BB)) { + if (L->getHeader() == BB && L->getParentLoop() == 0) + printLoop(L); + } else { + printBasicBlock(BB); + } + } + + Out << "}\n\n"; +} + +void CWriter::printLoop(Loop *L) { + Out << " do { /* Syntactic loop '" << L->getHeader()->getName() + << "' to make GCC happy */\n"; + for (unsigned i = 0, e = L->getBlocks().size(); i != e; ++i) { + BasicBlock *BB = L->getBlocks()[i]; + Loop *BBLoop = LI->getLoopFor(BB); + if (BBLoop == L) + printBasicBlock(BB); + else if (BB == BBLoop->getHeader() && BBLoop->getParentLoop() == L) + printLoop(BBLoop); + } + Out << " } while (1); /* end of syntactic loop '" + << L->getHeader()->getName() << "' */\n"; +} + +void CWriter::printBasicBlock(BasicBlock *BB) { + + // Don't print the label for the basic block if there are no uses, or if + // the only terminator use is the predecessor basic block's terminator. + // We have to scan the use list because PHI nodes use basic blocks too but + // do not require a label to be generated. + // + bool NeedsLabel = false; + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) + if (isGotoCodeNecessary(*PI, BB)) { + NeedsLabel = true; + break; + } + + if (NeedsLabel) Out << GetValueName(BB) << ":\n"; + + // Output all of the instructions in the basic block... + for (BasicBlock::iterator II = BB->begin(), E = --BB->end(); II != E; + ++II) { + if (!isInlinableInst(*II) && !isDirectAlloca(II)) { + if (II->getType() != Type::VoidTy && !isInlineAsm(*II)) + outputLValue(II); + else + Out << " "; + writeInstComputationInline(*II); + Out << ";\n"; + } + } + + // Don't emit prefix or suffix for the terminator. + visit(*BB->getTerminator()); +} + + +// Specific Instruction type classes... note that all of the casts are +// necessary because we use the instruction classes as opaque types... +// +void CWriter::visitReturnInst(ReturnInst &I) { + // If this is a struct return function, return the temporary struct. + bool isStructReturn = I.getParent()->getParent()->hasStructRetAttr(); + + if (isStructReturn) { + Out << " return StructReturn;\n"; + return; + } + + // Don't output a void return if this is the last basic block in the function + if (I.getNumOperands() == 0 && + &*--I.getParent()->getParent()->end() == I.getParent() && + !I.getParent()->size() == 1) { + return; + } + + if (I.getNumOperands() > 1) { + Out << " {\n"; + Out << " "; + printType(Out, I.getParent()->getParent()->getReturnType()); + Out << " llvm_cbe_mrv_temp = {\n"; + for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) { + Out << " "; + writeOperand(I.getOperand(i)); + if (i != e - 1) + Out << ","; + Out << "\n"; + } + Out << " };\n"; + Out << " return llvm_cbe_mrv_temp;\n"; + Out << " }\n"; + return; + } + + Out << " return"; + if (I.getNumOperands()) { + Out << ' '; + writeOperand(I.getOperand(0)); + } + Out << ";\n"; +} + +void CWriter::visitSwitchInst(SwitchInst &SI) { + + Out << " switch ("; + writeOperand(SI.getOperand(0)); + Out << ") {\n default:\n"; + printPHICopiesForSuccessor (SI.getParent(), SI.getDefaultDest(), 2); + printBranchToBlock(SI.getParent(), SI.getDefaultDest(), 2); + Out << ";\n"; + for (unsigned i = 2, e = SI.getNumOperands(); i != e; i += 2) { + Out << " case "; + writeOperand(SI.getOperand(i)); + Out << ":\n"; + BasicBlock *Succ = cast(SI.getOperand(i+1)); + printPHICopiesForSuccessor (SI.getParent(), Succ, 2); + printBranchToBlock(SI.getParent(), Succ, 2); + if (Function::iterator(Succ) == next(Function::iterator(SI.getParent()))) + Out << " break;\n"; + } + Out << " }\n"; +} + +void CWriter::visitUnreachableInst(UnreachableInst &I) { + Out << " /*UNREACHABLE*/;\n"; +} + +bool CWriter::isGotoCodeNecessary(BasicBlock *From, BasicBlock *To) { + /// FIXME: This should be reenabled, but loop reordering safe!! + return true; + + if (next(Function::iterator(From)) != Function::iterator(To)) + return true; // Not the direct successor, we need a goto. + + //isa(From->getTerminator()) + + if (LI->getLoopFor(From) != LI->getLoopFor(To)) + return true; + return false; +} + +void CWriter::printPHICopiesForSuccessor (BasicBlock *CurBlock, + BasicBlock *Successor, + unsigned Indent) { + for (BasicBlock::iterator I = Successor->begin(); isa(I); ++I) { + PHINode *PN = cast(I); + // Now we have to do the printing. + Value *IV = PN->getIncomingValueForBlock(CurBlock); + if (!isa(IV)) { + Out << std::string(Indent, ' '); + Out << " " << GetValueName(I) << "__PHI_TEMPORARY = "; + writeOperand(IV); + Out << "; /* for PHI node */\n"; + } + } +} + +void CWriter::printBranchToBlock(BasicBlock *CurBB, BasicBlock *Succ, + unsigned Indent) { + if (isGotoCodeNecessary(CurBB, Succ)) { + Out << std::string(Indent, ' ') << " goto "; + writeOperand(Succ); + Out << ";\n"; + } +} + +// Branch instruction printing - Avoid printing out a branch to a basic block +// that immediately succeeds the current one. +// +void CWriter::visitBranchInst(BranchInst &I) { + + if (I.isConditional()) { + if (isGotoCodeNecessary(I.getParent(), I.getSuccessor(0))) { + Out << " if ("; + writeOperand(I.getCondition()); + Out << ") {\n"; + + printPHICopiesForSuccessor (I.getParent(), I.getSuccessor(0), 2); + printBranchToBlock(I.getParent(), I.getSuccessor(0), 2); + + if (isGotoCodeNecessary(I.getParent(), I.getSuccessor(1))) { + Out << " } else {\n"; + printPHICopiesForSuccessor (I.getParent(), I.getSuccessor(1), 2); + printBranchToBlock(I.getParent(), I.getSuccessor(1), 2); + } + } else { + // First goto not necessary, assume second one is... + Out << " if (!"; + writeOperand(I.getCondition()); + Out << ") {\n"; + + printPHICopiesForSuccessor (I.getParent(), I.getSuccessor(1), 2); + printBranchToBlock(I.getParent(), I.getSuccessor(1), 2); + } + + Out << " }\n"; + } else { + printPHICopiesForSuccessor (I.getParent(), I.getSuccessor(0), 0); + printBranchToBlock(I.getParent(), I.getSuccessor(0), 0); + } + Out << "\n"; +} + +// PHI nodes get copied into temporary values at the end of predecessor basic +// blocks. We now need to copy these temporary values into the REAL value for +// the PHI. +void CWriter::visitPHINode(PHINode &I) { + writeOperand(&I); + Out << "__PHI_TEMPORARY"; +} + + +void CWriter::visitBinaryOperator(Instruction &I) { + // binary instructions, shift instructions, setCond instructions. + assert(!isa(I.getType())); + + // We must cast the results of binary operations which might be promoted. + bool needsCast = false; + if ((I.getType() == Type::Int8Ty) || (I.getType() == Type::Int16Ty) + || (I.getType() == Type::FloatTy)) { + needsCast = true; + Out << "(("; + printType(Out, I.getType(), false); + Out << ")("; + } + + // If this is a negation operation, print it out as such. For FP, we don't + // want to print "-0.0 - X". + if (BinaryOperator::isNeg(&I)) { + Out << "-("; + writeOperand(BinaryOperator::getNegArgument(cast(&I))); + Out << ")"; + } else if (I.getOpcode() == Instruction::FRem) { + // Output a call to fmod/fmodf instead of emitting a%b + if (I.getType() == Type::FloatTy) + Out << "fmodf("; + else if (I.getType() == Type::DoubleTy) + Out << "fmod("; + else // all 3 flavors of long double + Out << "fmodl("; + writeOperand(I.getOperand(0)); + Out << ", "; + writeOperand(I.getOperand(1)); + Out << ")"; + } else { + + // Write out the cast of the instruction's value back to the proper type + // if necessary. + bool NeedsClosingParens = writeInstructionCast(I); + + // Certain instructions require the operand to be forced to a specific type + // so we use writeOperandWithCast here instead of writeOperand. Similarly + // below for operand 1 + writeOperandWithCast(I.getOperand(0), I.getOpcode()); + + switch (I.getOpcode()) { + case Instruction::Add: Out << " + "; break; + case Instruction::Sub: Out << " - "; break; + case Instruction::Mul: Out << " * "; break; + case Instruction::URem: + case Instruction::SRem: + case Instruction::FRem: Out << " % "; break; + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::FDiv: Out << " / "; break; + case Instruction::And: Out << " & "; break; + case Instruction::Or: Out << " | "; break; + case Instruction::Xor: Out << " ^ "; break; + case Instruction::Shl : Out << " << "; break; + case Instruction::LShr: + case Instruction::AShr: Out << " >> "; break; + default: cerr << "Invalid operator type!" << I; abort(); + } + + writeOperandWithCast(I.getOperand(1), I.getOpcode()); + if (NeedsClosingParens) + Out << "))"; + } + + if (needsCast) { + Out << "))"; + } +} + +void CWriter::visitICmpInst(ICmpInst &I) { + // We must cast the results of icmp which might be promoted. + bool needsCast = false; + + // Write out the cast of the instruction's value back to the proper type + // if necessary. + bool NeedsClosingParens = writeInstructionCast(I); + + // Certain icmp predicate require the operand to be forced to a specific type + // so we use writeOperandWithCast here instead of writeOperand. Similarly + // below for operand 1 + writeOperandWithCast(I.getOperand(0), I); + + switch (I.getPredicate()) { + case ICmpInst::ICMP_EQ: Out << " == "; break; + case ICmpInst::ICMP_NE: Out << " != "; break; + case ICmpInst::ICMP_ULE: + case ICmpInst::ICMP_SLE: Out << " <= "; break; + case ICmpInst::ICMP_UGE: + case ICmpInst::ICMP_SGE: Out << " >= "; break; + case ICmpInst::ICMP_ULT: + case ICmpInst::ICMP_SLT: Out << " < "; break; + case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_SGT: Out << " > "; break; + default: cerr << "Invalid icmp predicate!" << I; abort(); + } + + writeOperandWithCast(I.getOperand(1), I); + if (NeedsClosingParens) + Out << "))"; + + if (needsCast) { + Out << "))"; + } +} + +void CWriter::visitFCmpInst(FCmpInst &I) { + if (I.getPredicate() == FCmpInst::FCMP_FALSE) { + Out << "0"; + return; + } + if (I.getPredicate() == FCmpInst::FCMP_TRUE) { + Out << "1"; + return; + } + + const char* op = 0; + switch (I.getPredicate()) { + default: assert(0 && "Illegal FCmp predicate"); + case FCmpInst::FCMP_ORD: op = "ord"; break; + case FCmpInst::FCMP_UNO: op = "uno"; break; + case FCmpInst::FCMP_UEQ: op = "ueq"; break; + case FCmpInst::FCMP_UNE: op = "une"; break; + case FCmpInst::FCMP_ULT: op = "ult"; break; + case FCmpInst::FCMP_ULE: op = "ule"; break; + case FCmpInst::FCMP_UGT: op = "ugt"; break; + case FCmpInst::FCMP_UGE: op = "uge"; break; + case FCmpInst::FCMP_OEQ: op = "oeq"; break; + case FCmpInst::FCMP_ONE: op = "one"; break; + case FCmpInst::FCMP_OLT: op = "olt"; break; + case FCmpInst::FCMP_OLE: op = "ole"; break; + case FCmpInst::FCMP_OGT: op = "ogt"; break; + case FCmpInst::FCMP_OGE: op = "oge"; break; + } + + Out << "llvm_fcmp_" << op << "("; + // Write the first operand + writeOperand(I.getOperand(0)); + Out << ", "; + // Write the second operand + writeOperand(I.getOperand(1)); + Out << ")"; +} + +static const char * getFloatBitCastField(const Type *Ty) { + switch (Ty->getTypeID()) { + default: assert(0 && "Invalid Type"); + case Type::FloatTyID: return "Float"; + case Type::DoubleTyID: return "Double"; + case Type::IntegerTyID: { + unsigned NumBits = cast(Ty)->getBitWidth(); + if (NumBits <= 32) + return "Int32"; + else + return "Int64"; + } + } +} + +void CWriter::visitCastInst(CastInst &I) { + const Type *DstTy = I.getType(); + const Type *SrcTy = I.getOperand(0)->getType(); + if (isFPIntBitCast(I)) { + Out << '('; + // These int<->float and long<->double casts need to be handled specially + Out << GetValueName(&I) << "__BITCAST_TEMPORARY." + << getFloatBitCastField(I.getOperand(0)->getType()) << " = "; + writeOperand(I.getOperand(0)); + Out << ", " << GetValueName(&I) << "__BITCAST_TEMPORARY." + << getFloatBitCastField(I.getType()); + Out << ')'; + return; + } + + Out << '('; + printCast(I.getOpcode(), SrcTy, DstTy); + + // Make a sext from i1 work by subtracting the i1 from 0 (an int). + if (SrcTy == Type::Int1Ty && I.getOpcode() == Instruction::SExt) + Out << "0-"; + + writeOperand(I.getOperand(0)); + + if (DstTy == Type::Int1Ty && + (I.getOpcode() == Instruction::Trunc || + I.getOpcode() == Instruction::FPToUI || + I.getOpcode() == Instruction::FPToSI || + I.getOpcode() == Instruction::PtrToInt)) { + // Make sure we really get a trunc to bool by anding the operand with 1 + Out << "&1u"; + } + Out << ')'; +} + +void CWriter::visitSelectInst(SelectInst &I) { + Out << "(("; + writeOperand(I.getCondition()); + Out << ") ? ("; + writeOperand(I.getTrueValue()); + Out << ") : ("; + writeOperand(I.getFalseValue()); + Out << "))"; +} + + +void CWriter::lowerIntrinsics(Function &F) { + // This is used to keep track of intrinsics that get generated to a lowered + // function. We must generate the prototypes before the function body which + // will only be expanded on first use (by the loop below). + std::vector prototypesToGen; + + // Examine all the instructions in this function to find the intrinsics that + // need to be lowered. + for (Function::iterator BB = F.begin(), EE = F.end(); BB != EE; ++BB) + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) + if (CallInst *CI = dyn_cast(I++)) + if (Function *F = CI->getCalledFunction()) + switch (F->getIntrinsicID()) { + case Intrinsic::not_intrinsic: + case Intrinsic::memory_barrier: + case Intrinsic::vastart: + case Intrinsic::vacopy: + case Intrinsic::vaend: + case Intrinsic::returnaddress: + case Intrinsic::frameaddress: + case Intrinsic::setjmp: + case Intrinsic::longjmp: + case Intrinsic::prefetch: + case Intrinsic::dbg_stoppoint: + case Intrinsic::powi: + case Intrinsic::x86_sse_cmp_ss: + case Intrinsic::x86_sse_cmp_ps: + case Intrinsic::x86_sse2_cmp_sd: + case Intrinsic::x86_sse2_cmp_pd: + case Intrinsic::ppc_altivec_lvsl: + // We directly implement these intrinsics + break; + default: + // If this is an intrinsic that directly corresponds to a GCC + // builtin, we handle it. + const char *BuiltinName = ""; +#define GET_GCC_BUILTIN_NAME +#include "llvm/Intrinsics.gen" +#undef GET_GCC_BUILTIN_NAME + // If we handle it, don't lower it. + if (BuiltinName[0]) break; + + // All other intrinsic calls we must lower. + Instruction *Before = 0; + if (CI != &BB->front()) + Before = prior(BasicBlock::iterator(CI)); + + IL->LowerIntrinsicCall(CI); + if (Before) { // Move iterator to instruction after call + I = Before; ++I; + } else { + I = BB->begin(); + } + // If the intrinsic got lowered to another call, and that call has + // a definition then we need to make sure its prototype is emitted + // before any calls to it. + if (CallInst *Call = dyn_cast(I)) + if (Function *NewF = Call->getCalledFunction()) + if (!NewF->isDeclaration()) + prototypesToGen.push_back(NewF); + + break; + } + + // We may have collected some prototypes to emit in the loop above. + // Emit them now, before the function that uses them is emitted. But, + // be careful not to emit them twice. + std::vector::iterator I = prototypesToGen.begin(); + std::vector::iterator E = prototypesToGen.end(); + for ( ; I != E; ++I) { + if (intrinsicPrototypesAlreadyGenerated.insert(*I).second) { + Out << '\n'; + printFunctionSignature(*I, true); + Out << ";\n"; + } + } +} + +void CWriter::visitCallInst(CallInst &I) { + if (isa(I.getOperand(0))) + return visitInlineAsm(I); + + bool WroteCallee = false; + + // Handle intrinsic function calls first... + if (Function *F = I.getCalledFunction()) + if (Intrinsic::ID ID = (Intrinsic::ID)F->getIntrinsicID()) + if (visitBuiltinCall(I, ID, WroteCallee)) + return; + + Value *Callee = I.getCalledValue(); + + const PointerType *PTy = cast(Callee->getType()); + const FunctionType *FTy = cast(PTy->getElementType()); + + // If this is a call to a struct-return function, assign to the first + // parameter instead of passing it to the call. + const AttrListPtr &PAL = I.getAttributes(); + bool hasByVal = I.hasByValArgument(); + bool isStructRet = I.hasStructRetAttr(); + if (isStructRet) { + writeOperandDeref(I.getOperand(1)); + Out << " = "; + } + + if (I.isTailCall()) Out << " /*tail*/ "; + + if (!WroteCallee) { + // If this is an indirect call to a struct return function, we need to cast + // the pointer. Ditto for indirect calls with byval arguments. + bool NeedsCast = (hasByVal || isStructRet) && !isa(Callee); + + // GCC is a real PITA. It does not permit codegening casts of functions to + // function pointers if they are in a call (it generates a trap instruction + // instead!). We work around this by inserting a cast to void* in between + // the function and the function pointer cast. Unfortunately, we can't just + // form the constant expression here, because the folder will immediately + // nuke it. + // + // Note finally, that this is completely unsafe. ANSI C does not guarantee + // that void* and function pointers have the same size. :( To deal with this + // in the common case, we handle casts where the number of arguments passed + // match exactly. + // + if (ConstantExpr *CE = dyn_cast(Callee)) + if (CE->isCast()) + if (Function *RF = dyn_cast(CE->getOperand(0))) { + NeedsCast = true; + Callee = RF; + } + + if (NeedsCast) { + // Ok, just cast the pointer type. + Out << "(("; + if (isStructRet) + printStructReturnPointerFunctionType(Out, PAL, + cast(I.getCalledValue()->getType())); + else if (hasByVal) + printType(Out, I.getCalledValue()->getType(), false, "", true, PAL); + else + printType(Out, I.getCalledValue()->getType()); + Out << ")(void*)"; + } + writeOperand(Callee); + if (NeedsCast) Out << ')'; + } + + Out << '('; + + unsigned NumDeclaredParams = FTy->getNumParams(); + + CallSite::arg_iterator AI = I.op_begin()+1, AE = I.op_end(); + unsigned ArgNo = 0; + if (isStructRet) { // Skip struct return argument. + ++AI; + ++ArgNo; + } + + bool PrintedArg = false; + for (; AI != AE; ++AI, ++ArgNo) { + if (PrintedArg) Out << ", "; + if (ArgNo < NumDeclaredParams && + (*AI)->getType() != FTy->getParamType(ArgNo)) { + Out << '('; + printType(Out, FTy->getParamType(ArgNo), + /*isSigned=*/PAL.paramHasAttr(ArgNo+1, Attribute::SExt)); + Out << ')'; + } + // Check if the argument is expected to be passed by value. + if (I.paramHasAttr(ArgNo+1, Attribute::ByVal)) + writeOperandDeref(*AI); + else + writeOperand(*AI); + PrintedArg = true; + } + Out << ')'; +} + +/// visitBuiltinCall - Handle the call to the specified builtin. Returns true +/// if the entire call is handled, return false it it wasn't handled, and +/// optionally set 'WroteCallee' if the callee has already been printed out. +bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID, + bool &WroteCallee) { + switch (ID) { + default: { + // If this is an intrinsic that directly corresponds to a GCC + // builtin, we emit it here. + const char *BuiltinName = ""; + Function *F = I.getCalledFunction(); +#define GET_GCC_BUILTIN_NAME +#include "llvm/Intrinsics.gen" +#undef GET_GCC_BUILTIN_NAME + assert(BuiltinName[0] && "Unknown LLVM intrinsic!"); + + Out << BuiltinName; + WroteCallee = true; + return false; + } + case Intrinsic::memory_barrier: + Out << "__sync_synchronize()"; + return true; + case Intrinsic::vastart: + Out << "0; "; + + Out << "va_start(*(va_list*)"; + writeOperand(I.getOperand(1)); + Out << ", "; + // Output the last argument to the enclosing function. + if (I.getParent()->getParent()->arg_empty()) { + cerr << "The C backend does not currently support zero " + << "argument varargs functions, such as '" + << I.getParent()->getParent()->getName() << "'!\n"; + abort(); + } + writeOperand(--I.getParent()->getParent()->arg_end()); + Out << ')'; + return true; + case Intrinsic::vaend: + if (!isa(I.getOperand(1))) { + Out << "0; va_end(*(va_list*)"; + writeOperand(I.getOperand(1)); + Out << ')'; + } else { + Out << "va_end(*(va_list*)0)"; + } + return true; + case Intrinsic::vacopy: + Out << "0; "; + Out << "va_copy(*(va_list*)"; + writeOperand(I.getOperand(1)); + Out << ", *(va_list*)"; + writeOperand(I.getOperand(2)); + Out << ')'; + return true; + case Intrinsic::returnaddress: + Out << "__builtin_return_address("; + writeOperand(I.getOperand(1)); + Out << ')'; + return true; + case Intrinsic::frameaddress: + Out << "__builtin_frame_address("; + writeOperand(I.getOperand(1)); + Out << ')'; + return true; + case Intrinsic::powi: + Out << "__builtin_powi("; + writeOperand(I.getOperand(1)); + Out << ", "; + writeOperand(I.getOperand(2)); + Out << ')'; + return true; + case Intrinsic::setjmp: + Out << "setjmp(*(jmp_buf*)"; + writeOperand(I.getOperand(1)); + Out << ')'; + return true; + case Intrinsic::longjmp: + Out << "longjmp(*(jmp_buf*)"; + writeOperand(I.getOperand(1)); + Out << ", "; + writeOperand(I.getOperand(2)); + Out << ')'; + return true; + case Intrinsic::prefetch: + Out << "LLVM_PREFETCH((const void *)"; + writeOperand(I.getOperand(1)); + Out << ", "; + writeOperand(I.getOperand(2)); + Out << ", "; + writeOperand(I.getOperand(3)); + Out << ")"; + return true; + case Intrinsic::stacksave: + // Emit this as: Val = 0; *((void**)&Val) = __builtin_stack_save() + // to work around GCC bugs (see PR1809). + Out << "0; *((void**)&" << GetValueName(&I) + << ") = __builtin_stack_save()"; + return true; + case Intrinsic::dbg_stoppoint: { + // If we use writeOperand directly we get a "u" suffix which is rejected + // by gcc. + std::stringstream SPIStr; + DbgStopPointInst &SPI = cast(I); + SPI.getDirectory()->print(SPIStr); + Out << "\n#line " + << SPI.getLine() + << " \""; + Out << SPIStr.str(); + SPIStr.clear(); + SPI.getFileName()->print(SPIStr); + Out << SPIStr.str() << "\"\n"; + return true; + } + case Intrinsic::x86_sse_cmp_ss: + case Intrinsic::x86_sse_cmp_ps: + case Intrinsic::x86_sse2_cmp_sd: + case Intrinsic::x86_sse2_cmp_pd: + Out << '('; + printType(Out, I.getType()); + Out << ')'; + // Multiple GCC builtins multiplex onto this intrinsic. + switch (cast(I.getOperand(3))->getZExtValue()) { + default: assert(0 && "Invalid llvm.x86.sse.cmp!"); + case 0: Out << "__builtin_ia32_cmpeq"; break; + case 1: Out << "__builtin_ia32_cmplt"; break; + case 2: Out << "__builtin_ia32_cmple"; break; + case 3: Out << "__builtin_ia32_cmpunord"; break; + case 4: Out << "__builtin_ia32_cmpneq"; break; + case 5: Out << "__builtin_ia32_cmpnlt"; break; + case 6: Out << "__builtin_ia32_cmpnle"; break; + case 7: Out << "__builtin_ia32_cmpord"; break; + } + if (ID == Intrinsic::x86_sse_cmp_ps || ID == Intrinsic::x86_sse2_cmp_pd) + Out << 'p'; + else + Out << 's'; + if (ID == Intrinsic::x86_sse_cmp_ss || ID == Intrinsic::x86_sse_cmp_ps) + Out << 's'; + else + Out << 'd'; + + Out << "("; + writeOperand(I.getOperand(1)); + Out << ", "; + writeOperand(I.getOperand(2)); + Out << ")"; + return true; + case Intrinsic::ppc_altivec_lvsl: + Out << '('; + printType(Out, I.getType()); + Out << ')'; + Out << "__builtin_altivec_lvsl(0, (void*)"; + writeOperand(I.getOperand(1)); + Out << ")"; + return true; + } +} + +//This converts the llvm constraint string to something gcc is expecting. +//TODO: work out platform independent constraints and factor those out +// of the per target tables +// handle multiple constraint codes +std::string CWriter::InterpretASMConstraint(InlineAsm::ConstraintInfo& c) { + + assert(c.Codes.size() == 1 && "Too many asm constraint codes to handle"); + + const char *const *table = 0; + + //Grab the translation table from TargetAsmInfo if it exists + if (!TAsm) { + std::string E; + const TargetMachineRegistry::entry* Match = + TargetMachineRegistry::getClosestStaticTargetForModule(*TheModule, E); + if (Match) { + //Per platform Target Machines don't exist, so create it + // this must be done only once + const TargetMachine* TM = Match->CtorFn(*TheModule, ""); + TAsm = TM->getTargetAsmInfo(); + } + } + if (TAsm) + table = TAsm->getAsmCBE(); + + //Search the translation table if it exists + for (int i = 0; table && table[i]; i += 2) + if (c.Codes[0] == table[i]) + return table[i+1]; + + //default is identity + return c.Codes[0]; +} + +//TODO: import logic from AsmPrinter.cpp +static std::string gccifyAsm(std::string asmstr) { + for (std::string::size_type i = 0; i != asmstr.size(); ++i) + if (asmstr[i] == '\n') + asmstr.replace(i, 1, "\\n"); + else if (asmstr[i] == '\t') + asmstr.replace(i, 1, "\\t"); + else if (asmstr[i] == '$') { + if (asmstr[i + 1] == '{') { + std::string::size_type a = asmstr.find_first_of(':', i + 1); + std::string::size_type b = asmstr.find_first_of('}', i + 1); + std::string n = "%" + + asmstr.substr(a + 1, b - a - 1) + + asmstr.substr(i + 2, a - i - 2); + asmstr.replace(i, b - i + 1, n); + i += n.size() - 1; + } else + asmstr.replace(i, 1, "%"); + } + else if (asmstr[i] == '%')//grr + { asmstr.replace(i, 1, "%%"); ++i;} + + return asmstr; +} + +//TODO: assumptions about what consume arguments from the call are likely wrong +// handle communitivity +void CWriter::visitInlineAsm(CallInst &CI) { + InlineAsm* as = cast(CI.getOperand(0)); + std::vector Constraints = as->ParseConstraints(); + + std::vector > ResultVals; + if (CI.getType() == Type::VoidTy) + ; + else if (const StructType *ST = dyn_cast(CI.getType())) { + for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) + ResultVals.push_back(std::make_pair(&CI, (int)i)); + } else { + ResultVals.push_back(std::make_pair(&CI, -1)); + } + + // Fix up the asm string for gcc and emit it. + Out << "__asm__ volatile (\"" << gccifyAsm(as->getAsmString()) << "\"\n"; + Out << " :"; + + unsigned ValueCount = 0; + bool IsFirst = true; + + // Convert over all the output constraints. + for (std::vector::iterator I = Constraints.begin(), + E = Constraints.end(); I != E; ++I) { + + if (I->Type != InlineAsm::isOutput) { + ++ValueCount; + continue; // Ignore non-output constraints. + } + + assert(I->Codes.size() == 1 && "Too many asm constraint codes to handle"); + std::string C = InterpretASMConstraint(*I); + if (C.empty()) continue; + + if (!IsFirst) { + Out << ", "; + IsFirst = false; + } + + // Unpack the dest. + Value *DestVal; + int DestValNo = -1; + + if (ValueCount < ResultVals.size()) { + DestVal = ResultVals[ValueCount].first; + DestValNo = ResultVals[ValueCount].second; + } else + DestVal = CI.getOperand(ValueCount-ResultVals.size()+1); + + if (I->isEarlyClobber) + C = "&"+C; + + Out << "\"=" << C << "\"(" << GetValueName(DestVal); + if (DestValNo != -1) + Out << ".field" << DestValNo; // Multiple retvals. + Out << ")"; + ++ValueCount; + } + + + // Convert over all the input constraints. + Out << "\n :"; + IsFirst = true; + ValueCount = 0; + for (std::vector::iterator I = Constraints.begin(), + E = Constraints.end(); I != E; ++I) { + if (I->Type != InlineAsm::isInput) { + ++ValueCount; + continue; // Ignore non-input constraints. + } + + assert(I->Codes.size() == 1 && "Too many asm constraint codes to handle"); + std::string C = InterpretASMConstraint(*I); + if (C.empty()) continue; + + if (!IsFirst) { + Out << ", "; + IsFirst = false; + } + + assert(ValueCount >= ResultVals.size() && "Input can't refer to result"); + Value *SrcVal = CI.getOperand(ValueCount-ResultVals.size()+1); + + Out << "\"" << C << "\"("; + if (!I->isIndirect) + writeOperand(SrcVal); + else + writeOperandDeref(SrcVal); + Out << ")"; + } + + // Convert over the clobber constraints. + IsFirst = true; + ValueCount = 0; + for (std::vector::iterator I = Constraints.begin(), + E = Constraints.end(); I != E; ++I) { + if (I->Type != InlineAsm::isClobber) + continue; // Ignore non-input constraints. + + assert(I->Codes.size() == 1 && "Too many asm constraint codes to handle"); + std::string C = InterpretASMConstraint(*I); + if (C.empty()) continue; + + if (!IsFirst) { + Out << ", "; + IsFirst = false; + } + + Out << '\"' << C << '"'; + } + + Out << ")"; +} + +void CWriter::visitMallocInst(MallocInst &I) { + assert(0 && "lowerallocations pass didn't work!"); +} + +void CWriter::visitAllocaInst(AllocaInst &I) { + Out << '('; + printType(Out, I.getType()); + Out << ") alloca(sizeof("; + printType(Out, I.getType()->getElementType()); + Out << ')'; + if (I.isArrayAllocation()) { + Out << " * " ; + writeOperand(I.getOperand(0)); + } + Out << ')'; +} + +void CWriter::visitFreeInst(FreeInst &I) { + assert(0 && "lowerallocations pass didn't work!"); +} + +void CWriter::printGEPExpression(Value *Ptr, gep_type_iterator I, + gep_type_iterator E, bool Static) { + + // If there are no indices, just print out the pointer. + if (I == E) { + writeOperand(Ptr); + return; + } + + // Find out if the last index is into a vector. If so, we have to print this + // specially. Since vectors can't have elements of indexable type, only the + // last index could possibly be of a vector element. + const VectorType *LastIndexIsVector = 0; + { + for (gep_type_iterator TmpI = I; TmpI != E; ++TmpI) + LastIndexIsVector = dyn_cast(*TmpI); + } + + Out << "("; + + // If the last index is into a vector, we can't print it as &a[i][j] because + // we can't index into a vector with j in GCC. Instead, emit this as + // (((float*)&a[i])+j) + if (LastIndexIsVector) { + Out << "(("; + printType(Out, PointerType::getUnqual(LastIndexIsVector->getElementType())); + Out << ")("; + } + + Out << '&'; + + // If the first index is 0 (very typical) we can do a number of + // simplifications to clean up the code. + Value *FirstOp = I.getOperand(); + if (!isa(FirstOp) || !cast(FirstOp)->isNullValue()) { + // First index isn't simple, print it the hard way. + writeOperand(Ptr); + } else { + ++I; // Skip the zero index. + + // Okay, emit the first operand. If Ptr is something that is already address + // exposed, like a global, avoid emitting (&foo)[0], just emit foo instead. + if (isAddressExposed(Ptr)) { + writeOperandInternal(Ptr, Static); + } else if (I != E && isa(*I)) { + // If we didn't already emit the first operand, see if we can print it as + // P->f instead of "P[0].f" + writeOperand(Ptr); + Out << "->field" << cast(I.getOperand())->getZExtValue(); + ++I; // eat the struct index as well. + } else { + // Instead of emitting P[0][1], emit (*P)[1], which is more idiomatic. + Out << "(*"; + writeOperand(Ptr); + Out << ")"; + } + } + + for (; I != E; ++I) { + if (isa(*I)) { + Out << ".field" << cast(I.getOperand())->getZExtValue(); + } else if (isa(*I)) { + Out << ".array["; + writeOperandWithCast(I.getOperand(), Instruction::GetElementPtr); + Out << ']'; + } else if (!isa(*I)) { + Out << '['; + writeOperandWithCast(I.getOperand(), Instruction::GetElementPtr); + Out << ']'; + } else { + // If the last index is into a vector, then print it out as "+j)". This + // works with the 'LastIndexIsVector' code above. + if (isa(I.getOperand()) && + cast(I.getOperand())->isNullValue()) { + Out << "))"; // avoid "+0". + } else { + Out << ")+("; + writeOperandWithCast(I.getOperand(), Instruction::GetElementPtr); + Out << "))"; + } + } + } + Out << ")"; +} + +void CWriter::writeMemoryAccess(Value *Operand, const Type *OperandType, + bool IsVolatile, unsigned Alignment) { + + bool IsUnaligned = Alignment && + Alignment < TD->getABITypeAlignment(OperandType); + + if (!IsUnaligned) + Out << '*'; + if (IsVolatile || IsUnaligned) { + Out << "(("; + if (IsUnaligned) + Out << "struct __attribute__ ((packed, aligned(" << Alignment << "))) {"; + printType(Out, OperandType, false, IsUnaligned ? "data" : "volatile*"); + if (IsUnaligned) { + Out << "; } "; + if (IsVolatile) Out << "volatile "; + Out << "*"; + } + Out << ")"; + } + + writeOperand(Operand); + + if (IsVolatile || IsUnaligned) { + Out << ')'; + if (IsUnaligned) + Out << "->data"; + } +} + +void CWriter::visitLoadInst(LoadInst &I) { + writeMemoryAccess(I.getOperand(0), I.getType(), I.isVolatile(), + I.getAlignment()); + +} + +void CWriter::visitStoreInst(StoreInst &I) { + writeMemoryAccess(I.getPointerOperand(), I.getOperand(0)->getType(), + I.isVolatile(), I.getAlignment()); + Out << " = "; + Value *Operand = I.getOperand(0); + Constant *BitMask = 0; + if (const IntegerType* ITy = dyn_cast(Operand->getType())) + if (!ITy->isPowerOf2ByteWidth()) + // We have a bit width that doesn't match an even power-of-2 byte + // size. Consequently we must & the value with the type's bit mask + BitMask = ConstantInt::get(ITy, ITy->getBitMask()); + if (BitMask) + Out << "(("; + writeOperand(Operand); + if (BitMask) { + Out << ") & "; + printConstant(BitMask, false); + Out << ")"; + } +} + +void CWriter::visitGetElementPtrInst(GetElementPtrInst &I) { + printGEPExpression(I.getPointerOperand(), gep_type_begin(I), + gep_type_end(I), false); +} + +void CWriter::visitVAArgInst(VAArgInst &I) { + Out << "va_arg(*(va_list*)"; + writeOperand(I.getOperand(0)); + Out << ", "; + printType(Out, I.getType()); + Out << ");\n "; +} + +void CWriter::visitInsertElementInst(InsertElementInst &I) { + const Type *EltTy = I.getType()->getElementType(); + writeOperand(I.getOperand(0)); + Out << ";\n "; + Out << "(("; + printType(Out, PointerType::getUnqual(EltTy)); + Out << ")(&" << GetValueName(&I) << "))["; + writeOperand(I.getOperand(2)); + Out << "] = ("; + writeOperand(I.getOperand(1)); + Out << ")"; +} + +void CWriter::visitExtractElementInst(ExtractElementInst &I) { + // We know that our operand is not inlined. + Out << "(("; + const Type *EltTy = + cast(I.getOperand(0)->getType())->getElementType(); + printType(Out, PointerType::getUnqual(EltTy)); + Out << ")(&" << GetValueName(I.getOperand(0)) << "))["; + writeOperand(I.getOperand(1)); + Out << "]"; +} + +void CWriter::visitShuffleVectorInst(ShuffleVectorInst &SVI) { + Out << "("; + printType(Out, SVI.getType()); + Out << "){ "; + const VectorType *VT = SVI.getType(); + unsigned NumElts = VT->getNumElements(); + const Type *EltTy = VT->getElementType(); + + for (unsigned i = 0; i != NumElts; ++i) { + if (i) Out << ", "; + int SrcVal = SVI.getMaskValue(i); + if ((unsigned)SrcVal >= NumElts*2) { + Out << " 0/*undef*/ "; + } else { + Value *Op = SVI.getOperand((unsigned)SrcVal >= NumElts); + if (isa(Op)) { + // Do an extractelement of this value from the appropriate input. + Out << "(("; + printType(Out, PointerType::getUnqual(EltTy)); + Out << ")(&" << GetValueName(Op) + << "))[" << (SrcVal & (NumElts-1)) << "]"; + } else if (isa(Op) || isa(Op)) { + Out << "0"; + } else { + printConstant(cast(Op)->getOperand(SrcVal & + (NumElts-1)), + false); + } + } + } + Out << "}"; +} + +void CWriter::visitInsertValueInst(InsertValueInst &IVI) { + // Start by copying the entire aggregate value into the result variable. + writeOperand(IVI.getOperand(0)); + Out << ";\n "; + + // Then do the insert to update the field. + Out << GetValueName(&IVI); + for (const unsigned *b = IVI.idx_begin(), *i = b, *e = IVI.idx_end(); + i != e; ++i) { + const Type *IndexedTy = + ExtractValueInst::getIndexedType(IVI.getOperand(0)->getType(), b, i+1); + if (isa(IndexedTy)) + Out << ".array[" << *i << "]"; + else + Out << ".field" << *i; + } + Out << " = "; + writeOperand(IVI.getOperand(1)); +} + +void CWriter::visitExtractValueInst(ExtractValueInst &EVI) { + Out << "("; + if (isa(EVI.getOperand(0))) { + Out << "("; + printType(Out, EVI.getType()); + Out << ") 0/*UNDEF*/"; + } else { + Out << GetValueName(EVI.getOperand(0)); + for (const unsigned *b = EVI.idx_begin(), *i = b, *e = EVI.idx_end(); + i != e; ++i) { + const Type *IndexedTy = + ExtractValueInst::getIndexedType(EVI.getOperand(0)->getType(), b, i+1); + if (isa(IndexedTy)) + Out << ".array[" << *i << "]"; + else + Out << ".field" << *i; + } + } + Out << ")"; +} + +//===----------------------------------------------------------------------===// +// External Interface declaration +//===----------------------------------------------------------------------===// + +bool CTargetMachine::addPassesToEmitWholeFile(PassManager &PM, + raw_ostream &o, + CodeGenFileType FileType, + CodeGenOpt::Level OptLevel) { + if (FileType != TargetMachine::AssemblyFile) return true; + + PM.add(createGCLoweringPass()); + PM.add(createLowerAllocationsPass(true)); + PM.add(createLowerInvokePass()); + PM.add(createCFGSimplificationPass()); // clean up after lower invoke. + PM.add(new CBackendNameAllUsedStructsAndMergeFunctions()); + PM.add(new CWriter(o)); + PM.add(createGCInfoDeleter()); + return false; +} diff --git a/lib/Target/CBackend/CMakeLists.txt b/lib/Target/CBackend/CMakeLists.txt new file mode 100644 index 000000000000..be243366d50e --- /dev/null +++ b/lib/Target/CBackend/CMakeLists.txt @@ -0,0 +1,3 @@ +add_llvm_target(CBackend + CBackend.cpp + ) diff --git a/lib/Target/CBackend/CTargetMachine.h b/lib/Target/CBackend/CTargetMachine.h new file mode 100644 index 000000000000..8b262455ad34 --- /dev/null +++ b/lib/Target/CBackend/CTargetMachine.h @@ -0,0 +1,43 @@ +//===-- CTargetMachine.h - TargetMachine for the C backend ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the TargetMachine that is used by the C backend. +// +//===----------------------------------------------------------------------===// + +#ifndef CTARGETMACHINE_H +#define CTARGETMACHINE_H + +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetData.h" + +namespace llvm { + +struct CTargetMachine : public TargetMachine { + const TargetData DataLayout; // Calculates type size & alignment + + CTargetMachine(const Module &M, const std::string &FS) + : DataLayout(&M) {} + + virtual bool WantsWholeFile() const { return true; } + virtual bool addPassesToEmitWholeFile(PassManager &PM, raw_ostream &Out, + CodeGenFileType FileType, + CodeGenOpt::Level OptLevel); + + // This class always works, but must be requested explicitly on + // llc command line. + static unsigned getModuleMatchQuality(const Module &M) { return 0; } + + virtual const TargetData *getTargetData() const { return &DataLayout; } +}; + +} // End llvm namespace + + +#endif diff --git a/lib/Target/CBackend/Makefile b/lib/Target/CBackend/Makefile new file mode 100644 index 000000000000..336de0c6f440 --- /dev/null +++ b/lib/Target/CBackend/Makefile @@ -0,0 +1,14 @@ +##===- lib/Target/CBackend/Makefile ------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../.. +LIBRARYNAME = LLVMCBackend +include $(LEVEL)/Makefile.common + +CompileCommonOpts += -Wno-format diff --git a/lib/Target/CMakeLists.txt b/lib/Target/CMakeLists.txt new file mode 100644 index 000000000000..1cf0a91078fa --- /dev/null +++ b/lib/Target/CMakeLists.txt @@ -0,0 +1,17 @@ +add_llvm_library(LLVMTarget + DarwinTargetAsmInfo.cpp + ELFTargetAsmInfo.cpp + SubtargetFeature.cpp + Target.cpp + TargetAsmInfo.cpp + TargetData.cpp + TargetFrameInfo.cpp + TargetInstrInfo.cpp + TargetMachOWriterInfo.cpp + TargetMachine.cpp + TargetMachineRegistry.cpp + TargetRegisterInfo.cpp + TargetSubtarget.cpp + ) + +# TODO: Support other targets besides X86. See Makefile. \ No newline at end of file diff --git a/lib/Target/CellSPU/AsmPrinter/CMakeLists.txt b/lib/Target/CellSPU/AsmPrinter/CMakeLists.txt new file mode 100644 index 000000000000..4336b057a346 --- /dev/null +++ b/lib/Target/CellSPU/AsmPrinter/CMakeLists.txt @@ -0,0 +1,12 @@ +include_directories( + ${CMAKE_CURRENT_BINARY_DIR}/.. + ${CMAKE_CURRENT_SOURCE_DIR}/.. + ) + +add_partially_linked_object(LLVMCellSPUAsmPrinter + SPUAsmPrinter.cpp + ) + +target_name_of_partially_linked_object(LLVMCellSPUCodeGen n) + +add_dependencies(LLVMCellSPUAsmPrinter ${n}) diff --git a/lib/Target/CellSPU/AsmPrinter/Makefile b/lib/Target/CellSPU/AsmPrinter/Makefile new file mode 100644 index 000000000000..dd56df71a5de --- /dev/null +++ b/lib/Target/CellSPU/AsmPrinter/Makefile @@ -0,0 +1,17 @@ +##===- lib/Target/CellSPU/Makefile -------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../../.. +LIBRARYNAME = LLVMCellSPUAsmPrinter + +# Hack: we need to include 'main' CellSPU target directory to grab +# private headers +CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp b/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp new file mode 100644 index 000000000000..da1bf074de96 --- /dev/null +++ b/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp @@ -0,0 +1,623 @@ +//===-- SPUAsmPrinter.cpp - Print machine instrs to Cell SPU assembly -------=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a printer that converts from our internal representation +// of machine-dependent LLVM code to Cell SPU assembly language. This printer +// is the output mechanism used by `llc'. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "asmprinter" +#include "SPU.h" +#include "SPUTargetMachine.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Module.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/DwarfWriter.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/Support/Mangler.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" +#include +using namespace llvm; + +namespace { + STATISTIC(EmittedInsts, "Number of machine instrs printed"); + + const std::string bss_section(".bss"); + + class VISIBILITY_HIDDEN SPUAsmPrinter : public AsmPrinter { + std::set FnStubs, GVStubs; + public: + explicit SPUAsmPrinter(raw_ostream &O, TargetMachine &TM, + const TargetAsmInfo *T, CodeGenOpt::Level OL, + bool V) : + AsmPrinter(O, TM, T, OL, V) {} + + virtual const char *getPassName() const { + return "STI CBEA SPU Assembly Printer"; + } + + SPUTargetMachine &getTM() { + return static_cast(TM); + } + + /// printInstruction - This method is automatically generated by tablegen + /// from the instruction set description. This method returns true if the + /// machine instruction was sufficiently described to print it, otherwise it + /// returns false. + bool printInstruction(const MachineInstr *MI); + + void printMachineInstruction(const MachineInstr *MI); + void printOp(const MachineOperand &MO); + + /// printRegister - Print register according to target requirements. + /// + void printRegister(const MachineOperand &MO, bool R0AsZero) { + unsigned RegNo = MO.getReg(); + assert(TargetRegisterInfo::isPhysicalRegister(RegNo) && + "Not physreg??"); + O << TM.getRegisterInfo()->get(RegNo).AsmName; + } + + void printOperand(const MachineInstr *MI, unsigned OpNo) { + const MachineOperand &MO = MI->getOperand(OpNo); + if (MO.isReg()) { + assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg())&&"Not physreg??"); + O << TM.getRegisterInfo()->get(MO.getReg()).AsmName; + } else if (MO.isImm()) { + O << MO.getImm(); + } else { + printOp(MO); + } + } + + bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode); + bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode); + + + void + printS7ImmOperand(const MachineInstr *MI, unsigned OpNo) + { + int value = MI->getOperand(OpNo).getImm(); + value = (value << (32 - 7)) >> (32 - 7); + + assert((value >= -(1 << 8) && value <= (1 << 7) - 1) + && "Invalid s7 argument"); + O << value; + } + + void + printU7ImmOperand(const MachineInstr *MI, unsigned OpNo) + { + unsigned int value = MI->getOperand(OpNo).getImm(); + assert(value < (1 << 8) && "Invalid u7 argument"); + O << value; + } + + void + printShufAddr(const MachineInstr *MI, unsigned OpNo) + { + char value = MI->getOperand(OpNo).getImm(); + O << (int) value; + O << "("; + printOperand(MI, OpNo+1); + O << ")"; + } + + void + printS16ImmOperand(const MachineInstr *MI, unsigned OpNo) + { + O << (short) MI->getOperand(OpNo).getImm(); + } + + void + printU16ImmOperand(const MachineInstr *MI, unsigned OpNo) + { + O << (unsigned short)MI->getOperand(OpNo).getImm(); + } + + void + printU32ImmOperand(const MachineInstr *MI, unsigned OpNo) + { + O << (unsigned)MI->getOperand(OpNo).getImm(); + } + + void + printMemRegReg(const MachineInstr *MI, unsigned OpNo) { + // When used as the base register, r0 reads constant zero rather than + // the value contained in the register. For this reason, the darwin + // assembler requires that we print r0 as 0 (no r) when used as the base. + const MachineOperand &MO = MI->getOperand(OpNo); + O << TM.getRegisterInfo()->get(MO.getReg()).AsmName; + O << ", "; + printOperand(MI, OpNo+1); + } + + void + printU18ImmOperand(const MachineInstr *MI, unsigned OpNo) + { + unsigned int value = MI->getOperand(OpNo).getImm(); + assert(value <= (1 << 19) - 1 && "Invalid u18 argument"); + O << value; + } + + void + printS10ImmOperand(const MachineInstr *MI, unsigned OpNo) + { + short value = (short) (((int) MI->getOperand(OpNo).getImm() << 16) + >> 16); + assert((value >= -(1 << 9) && value <= (1 << 9) - 1) + && "Invalid s10 argument"); + O << value; + } + + void + printU10ImmOperand(const MachineInstr *MI, unsigned OpNo) + { + short value = (short) (((int) MI->getOperand(OpNo).getImm() << 16) + >> 16); + assert((value <= (1 << 10) - 1) && "Invalid u10 argument"); + O << value; + } + + void + printDFormAddr(const MachineInstr *MI, unsigned OpNo) + { + assert(MI->getOperand(OpNo).isImm() && + "printDFormAddr first operand is not immediate"); + int64_t value = int64_t(MI->getOperand(OpNo).getImm()); + int16_t value16 = int16_t(value); + assert((value16 >= -(1 << (9+4)) && value16 <= (1 << (9+4)) - 1) + && "Invalid dform s10 offset argument"); + O << (value16 & ~0xf) << "("; + printOperand(MI, OpNo+1); + O << ")"; + } + + void + printAddr256K(const MachineInstr *MI, unsigned OpNo) + { + /* Note: operand 1 is an offset or symbol name. */ + if (MI->getOperand(OpNo).isImm()) { + printS16ImmOperand(MI, OpNo); + } else { + printOp(MI->getOperand(OpNo)); + if (MI->getOperand(OpNo+1).isImm()) { + int displ = int(MI->getOperand(OpNo+1).getImm()); + if (displ > 0) + O << "+" << displ; + else if (displ < 0) + O << displ; + } + } + } + + void printCallOperand(const MachineInstr *MI, unsigned OpNo) { + printOp(MI->getOperand(OpNo)); + } + + void printPCRelativeOperand(const MachineInstr *MI, unsigned OpNo) { + // Used to generate a ".-", but it turns out that the assembler + // really wants the target. + // + // N.B.: This operand is used for call targets. Branch hints are another + // animal entirely. + printOp(MI->getOperand(OpNo)); + } + + void printHBROperand(const MachineInstr *MI, unsigned OpNo) { + // HBR operands are generated in front of branches, hence, the + // program counter plus the target. + O << ".+"; + printOp(MI->getOperand(OpNo)); + } + + void printSymbolHi(const MachineInstr *MI, unsigned OpNo) { + if (MI->getOperand(OpNo).isImm()) { + printS16ImmOperand(MI, OpNo); + } else { + printOp(MI->getOperand(OpNo)); + O << "@h"; + } + } + + void printSymbolLo(const MachineInstr *MI, unsigned OpNo) { + if (MI->getOperand(OpNo).isImm()) { + printS16ImmOperand(MI, OpNo); + } else { + printOp(MI->getOperand(OpNo)); + O << "@l"; + } + } + + /// Print local store address + void printSymbolLSA(const MachineInstr *MI, unsigned OpNo) { + printOp(MI->getOperand(OpNo)); + } + + void printROTHNeg7Imm(const MachineInstr *MI, unsigned OpNo) { + if (MI->getOperand(OpNo).isImm()) { + int value = (int) MI->getOperand(OpNo).getImm(); + assert((value >= 0 && value < 16) + && "Invalid negated immediate rotate 7-bit argument"); + O << -value; + } else { + assert(0 &&"Invalid/non-immediate rotate amount in printRotateNeg7Imm"); + } + } + + void printROTNeg7Imm(const MachineInstr *MI, unsigned OpNo) { + if (MI->getOperand(OpNo).isImm()) { + int value = (int) MI->getOperand(OpNo).getImm(); + assert((value >= 0 && value <= 32) + && "Invalid negated immediate rotate 7-bit argument"); + O << -value; + } else { + assert(0 &&"Invalid/non-immediate rotate amount in printRotateNeg7Imm"); + } + } + + virtual bool runOnMachineFunction(MachineFunction &F) = 0; + //! Assembly printer cleanup after function has been emitted + virtual bool doFinalization(Module &M) = 0; + }; + + /// LinuxAsmPrinter - SPU assembly printer, customized for Linux + class VISIBILITY_HIDDEN LinuxAsmPrinter : public SPUAsmPrinter { + DwarfWriter *DW; + MachineModuleInfo *MMI; + public: + explicit LinuxAsmPrinter(raw_ostream &O, SPUTargetMachine &TM, + const TargetAsmInfo *T, CodeGenOpt::Level F, + bool V) + : SPUAsmPrinter(O, TM, T, F, V), DW(0), MMI(0) {} + + virtual const char *getPassName() const { + return "STI CBEA SPU Assembly Printer"; + } + + bool runOnMachineFunction(MachineFunction &F); + bool doInitialization(Module &M); + //! Dump globals, perform cleanup after function emission + bool doFinalization(Module &M); + + void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired(); + AU.addRequired(); + SPUAsmPrinter::getAnalysisUsage(AU); + } + + //! Emit a global variable according to its section and type + void printModuleLevelGV(const GlobalVariable* GVar); + }; +} // end of anonymous namespace + +// Include the auto-generated portion of the assembly writer +#include "SPUGenAsmWriter.inc" + +void SPUAsmPrinter::printOp(const MachineOperand &MO) { + switch (MO.getType()) { + case MachineOperand::MO_Immediate: + cerr << "printOp() does not handle immediate values\n"; + abort(); + return; + + case MachineOperand::MO_MachineBasicBlock: + printBasicBlockLabel(MO.getMBB()); + return; + case MachineOperand::MO_JumpTableIndex: + O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() + << '_' << MO.getIndex(); + return; + case MachineOperand::MO_ConstantPoolIndex: + O << TAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() + << '_' << MO.getIndex(); + return; + case MachineOperand::MO_ExternalSymbol: + // Computing the address of an external symbol, not calling it. + if (TM.getRelocationModel() != Reloc::Static) { + std::string Name(TAI->getGlobalPrefix()); Name += MO.getSymbolName(); + GVStubs.insert(Name); + O << "L" << Name << "$non_lazy_ptr"; + return; + } + O << TAI->getGlobalPrefix() << MO.getSymbolName(); + return; + case MachineOperand::MO_GlobalAddress: { + // Computing the address of a global symbol, not calling it. + GlobalValue *GV = MO.getGlobal(); + std::string Name = Mang->getValueName(GV); + + // External or weakly linked global variables need non-lazily-resolved + // stubs + if (TM.getRelocationModel() != Reloc::Static) { + if (((GV->isDeclaration() || GV->hasWeakLinkage() || + GV->hasLinkOnceLinkage() || GV->hasCommonLinkage()))) { + GVStubs.insert(Name); + O << "L" << Name << "$non_lazy_ptr"; + return; + } + } + O << Name; + + if (GV->hasExternalWeakLinkage()) + ExtWeakSymbols.insert(GV); + return; + } + + default: + O << ""; + return; + } +} + +/// PrintAsmOperand - Print out an operand for an inline asm expression. +/// +bool SPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, + const char *ExtraCode) { + // Does this asm operand have a single letter operand modifier? + if (ExtraCode && ExtraCode[0]) { + if (ExtraCode[1] != 0) return true; // Unknown modifier. + + switch (ExtraCode[0]) { + default: return true; // Unknown modifier. + case 'L': // Write second word of DImode reference. + // Verify that this operand has two consecutive registers. + if (!MI->getOperand(OpNo).isReg() || + OpNo+1 == MI->getNumOperands() || + !MI->getOperand(OpNo+1).isReg()) + return true; + ++OpNo; // Return the high-part. + break; + } + } + + printOperand(MI, OpNo); + return false; +} + +bool SPUAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, + unsigned OpNo, + unsigned AsmVariant, + const char *ExtraCode) { + if (ExtraCode && ExtraCode[0]) + return true; // Unknown modifier. + printMemRegReg(MI, OpNo); + return false; +} + +/// printMachineInstruction -- Print out a single PowerPC MI in Darwin syntax +/// to the current output stream. +/// +void SPUAsmPrinter::printMachineInstruction(const MachineInstr *MI) { + ++EmittedInsts; + printInstruction(MI); +} + +/// runOnMachineFunction - This uses the printMachineInstruction() +/// method to print assembly for each instruction. +/// +bool +LinuxAsmPrinter::runOnMachineFunction(MachineFunction &MF) +{ + this->MF = &MF; + + SetupMachineFunction(MF); + O << "\n\n"; + + // Print out constants referenced by the function + EmitConstantPool(MF.getConstantPool()); + + // Print out labels for the function. + const Function *F = MF.getFunction(); + + SwitchToSection(TAI->SectionForGlobal(F)); + EmitAlignment(3, F); + + switch (F->getLinkage()) { + default: assert(0 && "Unknown linkage type!"); + case Function::PrivateLinkage: + case Function::InternalLinkage: // Symbols default to internal. + break; + case Function::ExternalLinkage: + O << "\t.global\t" << CurrentFnName << "\n" + << "\t.type\t" << CurrentFnName << ", @function\n"; + break; + case Function::WeakAnyLinkage: + case Function::WeakODRLinkage: + case Function::LinkOnceAnyLinkage: + case Function::LinkOnceODRLinkage: + O << "\t.global\t" << CurrentFnName << "\n"; + O << "\t.weak_definition\t" << CurrentFnName << "\n"; + break; + } + O << CurrentFnName << ":\n"; + + // Emit pre-function debug information. + DW->BeginFunction(&MF); + + // Print out code for the function. + for (MachineFunction::const_iterator I = MF.begin(), E = MF.end(); + I != E; ++I) { + // Print a label for the basic block. + if (I != MF.begin()) { + printBasicBlockLabel(I, true, true); + O << '\n'; + } + for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end(); + II != E; ++II) { + // Print the assembly for the instruction. + printMachineInstruction(II); + } + } + + O << "\t.size\t" << CurrentFnName << ",.-" << CurrentFnName << "\n"; + + // Print out jump tables referenced by the function. + EmitJumpTableInfo(MF.getJumpTableInfo(), MF); + + // Emit post-function debug information. + DW->EndFunction(&MF); + + // We didn't modify anything. + return false; +} + + +bool LinuxAsmPrinter::doInitialization(Module &M) { + bool Result = AsmPrinter::doInitialization(M); + SwitchToTextSection("\t.text"); + // Emit initial debug information. + DW = getAnalysisIfAvailable(); + assert(DW && "Dwarf Writer is not available"); + MMI = getAnalysisIfAvailable(); + DW->BeginModule(&M, MMI, O, this, TAI); + return Result; +} + +/// PrintUnmangledNameSafely - Print out the printable characters in the name. +/// Don't print things like \\n or \\0. +static void PrintUnmangledNameSafely(const Value *V, raw_ostream &OS) { + for (const char *Name = V->getNameStart(), *E = Name+V->getNameLen(); + Name != E; ++Name) + if (isprint(*Name)) + OS << *Name; +} + +/*! + Emit a global variable according to its section, alignment, etc. + + \note This code was shamelessly copied from the PowerPC's assembly printer, + which sort of screams for some kind of refactorization of common code. + */ +void LinuxAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) { + const TargetData *TD = TM.getTargetData(); + + if (!GVar->hasInitializer()) + return; + + // Check to see if this is a special global used by LLVM, if so, emit it. + if (EmitSpecialLLVMGlobal(GVar)) + return; + + std::string name = Mang->getValueName(GVar); + + printVisibility(name, GVar->getVisibility()); + + Constant *C = GVar->getInitializer(); + const Type *Type = C->getType(); + unsigned Size = TD->getTypeAllocSize(Type); + unsigned Align = TD->getPreferredAlignmentLog(GVar); + + SwitchToSection(TAI->SectionForGlobal(GVar)); + + if (C->isNullValue() && /* FIXME: Verify correct */ + !GVar->hasSection() && + (GVar->hasLocalLinkage() || GVar->hasExternalLinkage() || + GVar->isWeakForLinker())) { + if (Size == 0) Size = 1; // .comm Foo, 0 is undefined, avoid it. + + if (GVar->hasExternalLinkage()) { + O << "\t.global " << name << '\n'; + O << "\t.type " << name << ", @object\n"; + O << name << ":\n"; + O << "\t.zero " << Size << '\n'; + } else if (GVar->hasLocalLinkage()) { + O << TAI->getLCOMMDirective() << name << ',' << Size; + } else { + O << ".comm " << name << ',' << Size; + } + O << "\t\t" << TAI->getCommentString() << " '"; + PrintUnmangledNameSafely(GVar, O); + O << "'\n"; + return; + } + + switch (GVar->getLinkage()) { + // Should never be seen for the CellSPU platform... + case GlobalValue::LinkOnceAnyLinkage: + case GlobalValue::LinkOnceODRLinkage: + case GlobalValue::WeakAnyLinkage: + case GlobalValue::WeakODRLinkage: + case GlobalValue::CommonLinkage: + O << "\t.global " << name << '\n' + << "\t.type " << name << ", @object\n" + << "\t.weak " << name << '\n'; + break; + case GlobalValue::AppendingLinkage: + // FIXME: appending linkage variables should go into a section of + // their name or something. For now, just emit them as external. + case GlobalValue::ExternalLinkage: + // If external or appending, declare as a global symbol + O << "\t.global " << name << '\n' + << "\t.type " << name << ", @object\n"; + // FALL THROUGH + case GlobalValue::PrivateLinkage: + case GlobalValue::InternalLinkage: + break; + default: + cerr << "Unknown linkage type!"; + abort(); + } + + EmitAlignment(Align, GVar); + O << name << ":\t\t\t\t" << TAI->getCommentString() << " '"; + PrintUnmangledNameSafely(GVar, O); + O << "'\n"; + + // If the initializer is a extern weak symbol, remember to emit the weak + // reference! + if (const GlobalValue *GV = dyn_cast(C)) + if (GV->hasExternalWeakLinkage()) + ExtWeakSymbols.insert(GV); + + EmitGlobalConstant(C); + O << '\n'; +} + +bool LinuxAsmPrinter::doFinalization(Module &M) { + // Print out module-level global variables here. + for (Module::const_global_iterator I = M.global_begin(), E = M.global_end(); + I != E; ++I) + printModuleLevelGV(I); + + // Emit initial debug information. + DW->EndModule(); + + return AsmPrinter::doFinalization(M); +} + +/// createSPUCodePrinterPass - Returns a pass that prints the Cell SPU +/// assembly code for a MachineFunction to the given output stream, in a format +/// that the Linux SPU assembler can deal with. +/// +FunctionPass *llvm::createSPUAsmPrinterPass(raw_ostream &o, + SPUTargetMachine &tm, + CodeGenOpt::Level OptLevel, + bool verbose) { + return new LinuxAsmPrinter(o, tm, tm.getTargetAsmInfo(), OptLevel, verbose); +} diff --git a/lib/Target/CellSPU/CMakeLists.txt b/lib/Target/CellSPU/CMakeLists.txt new file mode 100644 index 000000000000..e3e12acc505d --- /dev/null +++ b/lib/Target/CellSPU/CMakeLists.txt @@ -0,0 +1,24 @@ +set(LLVM_TARGET_DEFINITIONS SPU.td) + +tablegen(SPUGenInstrNames.inc -gen-instr-enums) +tablegen(SPUGenRegisterNames.inc -gen-register-enums) +tablegen(SPUGenAsmWriter.inc -gen-asm-writer) +tablegen(SPUGenCodeEmitter.inc -gen-emitter) +tablegen(SPUGenRegisterInfo.h.inc -gen-register-desc-header) +tablegen(SPUGenRegisterInfo.inc -gen-register-desc) +tablegen(SPUGenInstrInfo.inc -gen-instr-desc) +tablegen(SPUGenDAGISel.inc -gen-dag-isel) +tablegen(SPUGenSubtarget.inc -gen-subtarget) +tablegen(SPUGenCallingConv.inc -gen-callingconv) + +add_llvm_target(CellSPUCodeGen + SPUFrameInfo.cpp + SPUHazardRecognizers.cpp + SPUInstrInfo.cpp + SPUISelDAGToDAG.cpp + SPUISelLowering.cpp + SPURegisterInfo.cpp + SPUSubtarget.cpp + SPUTargetAsmInfo.cpp + SPUTargetMachine.cpp + ) diff --git a/lib/Target/CellSPU/CellSDKIntrinsics.td b/lib/Target/CellSPU/CellSDKIntrinsics.td new file mode 100644 index 000000000000..5d759a41c2c0 --- /dev/null +++ b/lib/Target/CellSPU/CellSDKIntrinsics.td @@ -0,0 +1,448 @@ +//===-- CellSDKIntrinsics.td - Cell SDK Intrinsics ---------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +///--==-- Arithmetic ops intrinsics --==-- +def CellSDKah: + RR_Int_v8i16<0b00010011000, "ah", IntegerOp, int_spu_si_ah>; +def CellSDKahi: + RI10_Int_v8i16<0b00010011000, "ahi", IntegerOp, int_spu_si_ahi>; +def CellSDKa: + RR_Int_v4i32<0b00000011000, "a", IntegerOp, int_spu_si_a>; +def CellSDKai: + RI10_Int_v4i32<0b00111000, "ai", IntegerOp, int_spu_si_ai>; +def CellSDKsfh: + RR_Int_v8i16<0b00010010000, "sfh", IntegerOp, int_spu_si_sfh>; +def CellSDKsfhi: + RI10_Int_v8i16<0b10110000, "sfhi", IntegerOp, int_spu_si_sfhi>; +def CellSDKsf: + RR_Int_v4i32<0b00000010000, "sf", IntegerOp, int_spu_si_sf>; +def CellSDKsfi: + RI10_Int_v4i32<0b00110000, "sfi", IntegerOp, int_spu_si_sfi>; +def CellSDKaddx: + RR_Int_v4i32<0b00000010110, "addx", IntegerOp, int_spu_si_addx>; +def CellSDKcg: + RR_Int_v4i32<0b0100001100, "cg", IntegerOp, int_spu_si_cg>; +def CellSDKcgx: + RR_Int_v4i32<0b01000010110, "cgx", IntegerOp, int_spu_si_cgx>; +def CellSDKsfx: + RR_Int_v4i32<0b10000010110, "sfx", IntegerOp, int_spu_si_sfx>; +def CellSDKbg: + RR_Int_v4i32<0b01000010000, "bg", IntegerOp, int_spu_si_bg>; +def CellSDKbgx: + RR_Int_v4i32<0b11000010110, "bgx", IntegerOp, int_spu_si_bgx>; + +def CellSDKmpy: + RRForm<0b00100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "mpy $rT, $rA, $rB", IntegerMulDiv, + [(set (v4i32 VECREG:$rT), (int_spu_si_mpy (v8i16 VECREG:$rA), + (v8i16 VECREG:$rB)))]>; + +def CellSDKmpyu: + RRForm<0b00110011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "mpyu $rT, $rA, $rB", IntegerMulDiv, + [(set (v4i32 VECREG:$rT), (int_spu_si_mpyu (v8i16 VECREG:$rA), + (v8i16 VECREG:$rB)))] >; + +def CellSDKmpyi: + RI10Form<0b00101110, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), + "mpyi $rT, $rA, $val", IntegerMulDiv, + [(set (v4i32 VECREG:$rT), (int_spu_si_mpyi (v8i16 VECREG:$rA), + i16ImmSExt10:$val))]>; + +def CellSDKmpyui: + RI10Form<0b10101110, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), + "mpyui $rT, $rA, $val", IntegerMulDiv, + [(set (v4i32 VECREG:$rT), (int_spu_si_mpyui (v8i16 VECREG:$rA), + i16ImmSExt10:$val))]>; + +def CellSDKmpya: + RRRForm<0b0011, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), + "mpya $rT, $rA, $rB, $rC", IntegerMulDiv, + [(set (v4i32 VECREG:$rT), (int_spu_si_mpya (v8i16 VECREG:$rA), + (v8i16 VECREG:$rB), + (v8i16 VECREG:$rC)))]>; + +def CellSDKmpyh: + RRForm<0b10100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "mpyh $rT, $rA, $rB", IntegerMulDiv, + [(set (v4i32 VECREG:$rT), (int_spu_si_mpyh (v4i32 VECREG:$rA), + (v8i16 VECREG:$rB)))]>; + +def CellSDKmpys: + RRForm<0b11100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "mpys $rT, $rA, $rB", IntegerMulDiv, + [(set (v4i32 VECREG:$rT), (int_spu_si_mpys (v8i16 VECREG:$rA), + (v8i16 VECREG:$rB)))]>; + +def CellSDKmpyhh: + RRForm<0b01100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "mpyhh $rT, $rA, $rB", IntegerMulDiv, + [(set (v4i32 VECREG:$rT), (int_spu_si_mpyhh (v8i16 VECREG:$rA), + (v8i16 VECREG:$rB)))]>; + +def CellSDKmpyhha: + RRForm<0b01100010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "mpyhha $rT, $rA, $rB", IntegerMulDiv, + [(set (v4i32 VECREG:$rT), (int_spu_si_mpyhha (v8i16 VECREG:$rA), + (v8i16 VECREG:$rB)))]>; + +// Not sure how to match a (set $rT, (add $rT (mpyhh $rA, $rB)))... so leave +// as an intrinsic for the time being +def CellSDKmpyhhu: + RRForm<0b01110011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "mpyhhu $rT, $rA, $rB", IntegerMulDiv, + [(set (v4i32 VECREG:$rT), (int_spu_si_mpyhhu (v8i16 VECREG:$rA), + (v8i16 VECREG:$rB)))]>; + +def CellSDKmpyhhau: + RRForm<0b01110010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "mpyhhau $rT, $rA, $rB", IntegerMulDiv, + [(set (v4i32 VECREG:$rT), (int_spu_si_mpyhhau (v8i16 VECREG:$rA), + (v8i16 VECREG:$rB)))]>; + +def CellSDKand: + RRForm<0b1000011000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "and\t $rT, $rA, $rB", IntegerOp, + [(set (v4i32 VECREG:$rT), + (int_spu_si_and (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>; + +def CellSDKandc: + RRForm<0b10000011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "andc\t $rT, $rA, $rB", IntegerOp, + [(set (v4i32 VECREG:$rT), + (int_spu_si_andc (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>; + +def CellSDKandbi: + RI10Form<0b01101000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val), + "andbi\t $rT, $rA, $val", BranchResolv, + [(set (v16i8 VECREG:$rT), + (int_spu_si_andbi (v16i8 VECREG:$rA), immU8:$val))]>; + +def CellSDKandhi: + RI10Form<0b10101000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), + "andhi\t $rT, $rA, $val", BranchResolv, + [(set (v8i16 VECREG:$rT), + (int_spu_si_andhi (v8i16 VECREG:$rA), i16ImmSExt10:$val))]>; + +def CellSDKandi: + RI10Form<0b00101000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), + "andi\t $rT, $rA, $val", BranchResolv, + [(set (v4i32 VECREG:$rT), + (int_spu_si_andi (v4i32 VECREG:$rA), i32ImmSExt10:$val))]>; + +def CellSDKor: + RRForm<0b10000010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "or\t $rT, $rA, $rB", IntegerOp, + [(set (v4i32 VECREG:$rT), + (int_spu_si_or (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>; + +def CellSDKorc: + RRForm<0b10010011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "addc\t $rT, $rA, $rB", IntegerOp, + [(set (v4i32 VECREG:$rT), + (int_spu_si_orc (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>; + +def CellSDKorbi: + RI10Form<0b01100000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val), + "orbi\t $rT, $rA, $val", BranchResolv, + [(set (v16i8 VECREG:$rT), + (int_spu_si_orbi (v16i8 VECREG:$rA), immU8:$val))]>; + +def CellSDKorhi: + RI10Form<0b10100000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), + "orhi\t $rT, $rA, $val", BranchResolv, + [(set (v8i16 VECREG:$rT), + (int_spu_si_orhi (v8i16 VECREG:$rA), i16ImmSExt10:$val))]>; + +def CellSDKori: + RI10Form<0b00100000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), + "ori\t $rT, $rA, $val", BranchResolv, + [(set (v4i32 VECREG:$rT), + (int_spu_si_ori (v4i32 VECREG:$rA), i32ImmSExt10:$val))]>; + +def CellSDKxor: + RRForm<0b10000010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "xor\t $rT, $rA, $rB", IntegerOp, + [(set (v4i32 VECREG:$rT), + (int_spu_si_xor (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>; + +def CellSDKxorbi: + RI10Form<0b01100000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val), + "xorbi\t $rT, $rA, $val", BranchResolv, + [(set (v16i8 VECREG:$rT), (int_spu_si_xorbi (v16i8 VECREG:$rA), immU8:$val))]>; + +def CellSDKxorhi: + RI10Form<0b10100000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), + "xorhi\t $rT, $rA, $val", BranchResolv, + [(set (v8i16 VECREG:$rT), + (int_spu_si_xorhi (v8i16 VECREG:$rA), i16ImmSExt10:$val))]>; + +def CellSDKxori: + RI10Form<0b00100000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), + "xori\t $rT, $rA, $val", BranchResolv, + [(set (v4i32 VECREG:$rT), + (int_spu_si_xori (v4i32 VECREG:$rA), i32ImmSExt10:$val))]>; + +def CellSDKnor: + RRForm<0b10000010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "nor\t $rT, $rA, $rB", IntegerOp, + [(set (v4i32 VECREG:$rT), + (int_spu_si_nor (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>; + +def CellSDKnand: + RRForm<0b10000010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "nand\t $rT, $rA, $rB", IntegerOp, + [(set (v4i32 VECREG:$rT), + (int_spu_si_nand (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>; + +//===----------------------------------------------------------------------===// +// Shift/rotate intrinsics: +//===----------------------------------------------------------------------===// + +def CellSDKshli: + Pat<(int_spu_si_shli (v4i32 VECREG:$rA), uimm7:$val), + (SHLIv4i32 VECREG:$rA, uimm7:$val)>; + +def CellSDKshlqbi: + Pat<(int_spu_si_shlqbi VECREG:$rA, R32C:$rB), + (SHLQBIv16i8 VECREG:$rA, R32C:$rB)>; + +def CellSDKshlqii: + Pat<(int_spu_si_shlqbii VECREG:$rA, uimm7:$val), + (SHLQBIIv16i8 VECREG:$rA, uimm7:$val)>; + +def CellSDKshlqby: + Pat<(int_spu_si_shlqby VECREG:$rA, R32C:$rB), + (SHLQBYv16i8 VECREG:$rA, R32C:$rB)>; + +def CellSDKshlqbyi: + Pat<(int_spu_si_shlqbyi VECREG:$rA, uimm7:$val), + (SHLQBYIv16i8 VECREG:$rA, uimm7:$val)>; + +//===----------------------------------------------------------------------===// +// Branch/compare intrinsics: +//===----------------------------------------------------------------------===// + +def CellSDKceq: + RRForm<0b00000011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "ceq\t $rT, $rA, $rB", BranchResolv, + [(set (v4i32 VECREG:$rT), + (int_spu_si_ceq (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>; + +def CellSDKceqi: + RI10Form<0b00111110, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), + "ceqi\t $rT, $rA, $val", BranchResolv, + [(set (v4i32 VECREG:$rT), + (int_spu_si_ceqi (v4i32 VECREG:$rA), i32ImmSExt10:$val))]>; + +def CellSDKceqb: + RRForm<0b00001011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "ceqb\t $rT, $rA, $rB", BranchResolv, + [(set (v16i8 VECREG:$rT), + (int_spu_si_ceqb (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)))]>; + +def CellSDKceqbi: + RI10Form<0b01111110, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val), + "ceqbi\t $rT, $rA, $val", BranchResolv, + [(set (v16i8 VECREG:$rT), (int_spu_si_ceqbi (v16i8 VECREG:$rA), immU8:$val))]>; + +def CellSDKceqh: + RRForm<0b00010011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "ceqh\t $rT, $rA, $rB", BranchResolv, + [(set (v8i16 VECREG:$rT), + (int_spu_si_ceqh (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)))]>; + +def CellSDKceqhi: + RI10Form<0b10111110, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), + "ceqhi\t $rT, $rA, $val", BranchResolv, + [(set (v8i16 VECREG:$rT), + (int_spu_si_ceqhi (v8i16 VECREG:$rA), i16ImmSExt10:$val))]>; +def CellSDKcgth: + RRForm<0b00010011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "cgth\t $rT, $rA, $rB", BranchResolv, + [(set (v8i16 VECREG:$rT), + (int_spu_si_cgth (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)))]>; + +def CellSDKcgthi: + RI10Form<0b10111110, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), + "cgthi\t $rT, $rA, $val", BranchResolv, + [(set (v8i16 VECREG:$rT), + (int_spu_si_cgthi (v8i16 VECREG:$rA), i16ImmSExt10:$val))]>; + +def CellSDKcgt: + RRForm<0b00000010010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "cgt\t $rT, $rA, $rB", BranchResolv, + [(set (v4i32 VECREG:$rT), + (int_spu_si_cgt (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>; + +def CellSDKcgti: + RI10Form<0b00110010, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), + "cgti\t $rT, $rA, $val", BranchResolv, + [(set (v4i32 VECREG:$rT), + (int_spu_si_cgti (v4i32 VECREG:$rA), i32ImmSExt10:$val))]>; + +def CellSDKcgtb: + RRForm<0b00001010010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "cgtb\t $rT, $rA, $rB", BranchResolv, + [(set (v16i8 VECREG:$rT), + (int_spu_si_cgtb (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)))]>; + +def CellSDKcgtbi: + RI10Form<0b01110010, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val), + "cgtbi\t $rT, $rA, $val", BranchResolv, + [(set (v16i8 VECREG:$rT), (int_spu_si_cgtbi (v16i8 VECREG:$rA), immU8:$val))]>; + +def CellSDKclgth: + RRForm<0b00010011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "clgth\t $rT, $rA, $rB", BranchResolv, + [(set (v8i16 VECREG:$rT), + (int_spu_si_clgth (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)))]>; + +def CellSDKclgthi: + RI10Form<0b10111010, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), + "clgthi\t $rT, $rA, $val", BranchResolv, + [(set (v8i16 VECREG:$rT), + (int_spu_si_clgthi (v8i16 VECREG:$rA), i16ImmSExt10:$val))]>; + +def CellSDKclgt: + RRForm<0b00000011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "clgt\t $rT, $rA, $rB", BranchResolv, + [(set (v4i32 VECREG:$rT), + (int_spu_si_clgt (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>; + +def CellSDKclgti: + RI10Form<0b00111010, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), + "clgti\t $rT, $rA, $val", BranchResolv, + [(set (v4i32 VECREG:$rT), + (int_spu_si_clgti (v4i32 VECREG:$rA), i32ImmSExt10:$val))]>; + +def CellSDKclgtb: + RRForm<0b00001011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "clgtb\t $rT, $rA, $rB", BranchResolv, + [(set (v16i8 VECREG:$rT), + (int_spu_si_clgtb (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)))]>; + +def CellSDKclgtbi: + RI10Form<0b01111010, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val), + "clgtbi\t $rT, $rA, $val", BranchResolv, + [(set (v16i8 VECREG:$rT), + (int_spu_si_clgtbi (v16i8 VECREG:$rA), immU8:$val))]>; + +//===----------------------------------------------------------------------===// +// Floating-point intrinsics: +//===----------------------------------------------------------------------===// + +def CellSDKfa: + RRForm<0b00100011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "fa\t $rT, $rA, $rB", SPrecFP, + [(set (v4f32 VECREG:$rT), (int_spu_si_fa (v4f32 VECREG:$rA), + (v4f32 VECREG:$rB)))]>; + +def CellSDKfs: + RRForm<0b10100011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "fs\t $rT, $rA, $rB", SPrecFP, + [(set (v4f32 VECREG:$rT), (int_spu_si_fs (v4f32 VECREG:$rA), + (v4f32 VECREG:$rB)))]>; + +def CellSDKfm: + RRForm<0b01100011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "fm\t $rT, $rA, $rB", SPrecFP, + [(set (v4f32 VECREG:$rT), (int_spu_si_fm (v4f32 VECREG:$rA), + (v4f32 VECREG:$rB)))]>; + +def CellSDKfceq: + RRForm<0b01000011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "fceq\t $rT, $rA, $rB", SPrecFP, + [(set (v4f32 VECREG:$rT), (int_spu_si_fceq (v4f32 VECREG:$rA), + (v4f32 VECREG:$rB)))]>; + +def CellSDKfcgt: + RRForm<0b01000011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "fcgt\t $rT, $rA, $rB", SPrecFP, + [(set (v4f32 VECREG:$rT), (int_spu_si_fcgt (v4f32 VECREG:$rA), + (v4f32 VECREG:$rB)))]>; + +def CellSDKfcmeq: + RRForm<0b01010011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "fcmeq\t $rT, $rA, $rB", SPrecFP, + [(set (v4f32 VECREG:$rT), (int_spu_si_fcmeq (v4f32 VECREG:$rA), + (v4f32 VECREG:$rB)))]>; + +def CellSDKfcmgt: + RRForm<0b01010011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "fcmgt\t $rT, $rA, $rB", SPrecFP, + [(set (v4f32 VECREG:$rT), (int_spu_si_fcmgt (v4f32 VECREG:$rA), + (v4f32 VECREG:$rB)))]>; + +def CellSDKfma: + RRRForm<0b0111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), + "fma\t $rT, $rA, $rB, $rC", SPrecFP, + [(set (v4f32 VECREG:$rT), (int_spu_si_fma (v4f32 VECREG:$rA), + (v4f32 VECREG:$rB), + (v4f32 VECREG:$rC)))]>; + +def CellSDKfnms: + RRRForm<0b1011, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), + "fnms\t $rT, $rA, $rB, $rC", SPrecFP, + [(set (v4f32 VECREG:$rT), (int_spu_si_fnms (v4f32 VECREG:$rA), + (v4f32 VECREG:$rB), + (v4f32 VECREG:$rC)))]>; + +def CellSDKfms: + RRRForm<0b1111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), + "fms\t $rT, $rA, $rB, $rC", SPrecFP, + [(set (v4f32 VECREG:$rT), (int_spu_si_fms (v4f32 VECREG:$rA), + (v4f32 VECREG:$rB), + (v4f32 VECREG:$rC)))]>; + +//===----------------------------------------------------------------------===// +// Double precision floating-point intrinsics: +//===----------------------------------------------------------------------===// + +def CellSDKdfa: + RRForm<0b00110011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "dfa\t $rT, $rA, $rB", DPrecFP, + [(set (v2f64 VECREG:$rT), (int_spu_si_dfa (v2f64 VECREG:$rA), + (v2f64 VECREG:$rB)))]>; + +def CellSDKdfs: + RRForm<0b10110011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "dfs\t $rT, $rA, $rB", DPrecFP, + [(set (v2f64 VECREG:$rT), (int_spu_si_dfs (v2f64 VECREG:$rA), + (v2f64 VECREG:$rB)))]>; + +def CellSDKdfm: + RRForm<0b01110011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "dfm\t $rT, $rA, $rB", DPrecFP, + [(set (v2f64 VECREG:$rT), (int_spu_si_dfm (v2f64 VECREG:$rA), + (v2f64 VECREG:$rB)))]>; + +def CellSDKdfma: + RRForm<0b00111010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "dfma\t $rT, $rA, $rB", DPrecFP, + [(set (v2f64 VECREG:$rT), (int_spu_si_dfma (v2f64 VECREG:$rA), + (v2f64 VECREG:$rB)))]>; + +def CellSDKdfnma: + RRForm<0b11111010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "dfnma\t $rT, $rA, $rB", DPrecFP, + [(set (v2f64 VECREG:$rT), (int_spu_si_dfnma (v2f64 VECREG:$rA), + (v2f64 VECREG:$rB)))]>; + +def CellSDKdfnms: + RRForm<0b01111010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "dfnms\t $rT, $rA, $rB", DPrecFP, + [(set (v2f64 VECREG:$rT), (int_spu_si_dfnms (v2f64 VECREG:$rA), + (v2f64 VECREG:$rB)))]>; + +def CellSDKdfms: + RRForm<0b10111010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "dfms\t $rT, $rA, $rB", DPrecFP, + [(set (v2f64 VECREG:$rT), (int_spu_si_dfms (v2f64 VECREG:$rA), + (v2f64 VECREG:$rB)))]>; diff --git a/lib/Target/CellSPU/Makefile b/lib/Target/CellSPU/Makefile new file mode 100644 index 000000000000..a460db3cfeda --- /dev/null +++ b/lib/Target/CellSPU/Makefile @@ -0,0 +1,22 @@ +##===- lib/Target/CellSPU/Makefile -------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../.. +LIBRARYNAME = LLVMCellSPUCodeGen +TARGET = SPU + +BUILT_SOURCES = SPUGenInstrNames.inc SPUGenRegisterNames.inc \ + SPUGenAsmWriter.inc SPUGenCodeEmitter.inc \ + SPUGenRegisterInfo.h.inc SPUGenRegisterInfo.inc \ + SPUGenInstrInfo.inc SPUGenDAGISel.inc \ + SPUGenSubtarget.inc SPUGenCallingConv.inc + +DIRS = AsmPrinter + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/CellSPU/README.txt b/lib/Target/CellSPU/README.txt new file mode 100644 index 000000000000..4783dd5d24eb --- /dev/null +++ b/lib/Target/CellSPU/README.txt @@ -0,0 +1,90 @@ +//===- README.txt - Notes for improving CellSPU-specific code gen ---------===// + +This code was contributed by a team from the Computer Systems Research +Department in The Aerospace Corporation: + +- Scott Michel (head bottle washer and much of the non-floating point + instructions) +- Mark Thomas (floating point instructions) +- Michael AuYeung (intrinsics) +- Chandler Carruth (LLVM expertise) +- Nehal Desai (debugging, i32 operations, RoadRunner SPU expertise) + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR +OTHERWISE. IN NO EVENT SHALL THE AEROSPACE CORPORATION BE LIABLE FOR DAMAGES +OF ANY KIND OR NATURE WHETHER BASED IN CONTRACT, TORT, OR OTHERWISE ARISING +OUT OF OR IN CONNECTION WITH THE USE OF THE SOFTWARE INCLUDING, WITHOUT +LIMITATION, DAMAGES RESULTING FROM LOST OR CONTAMINATED DATA, LOST PROFITS OR +REVENUE, COMPUTER MALFUNCTION, OR FOR ANY SPECIAL, INCIDENTAL, CONSEQUENTIAL, +OR PUNITIVE DAMAGES, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES OR +SUCH DAMAGES ARE FORESEEABLE. + +--------------------------------------------------------------------------- +--WARNING--: +--WARNING--: The CellSPU work is work-in-progress and "alpha" quality code. +--WARNING--: + +If you are brave enough to try this code or help to hack on it, be sure +to add 'spu' to configure's --enable-targets option, e.g.: + + ./configure \ + --enable-targets=x86,x86_64,powerpc,spu + +--------------------------------------------------------------------------- + +TODO: +* Create a machine pass for performing dual-pipeline scheduling specifically + for CellSPU, and insert branch prediction instructions as needed. + +* i32 instructions: + + * i32 division (work-in-progress) + +* i64 support (see i64operations.c test harness): + + * shifts and comparison operators: done + * sign and zero extension: done + * addition: done + * subtraction: needed + * multiplication: done + +* i128 support: + + * zero extension, any extension: done + * sign extension: needed + * arithmetic operators (add, sub, mul, div): needed + * logical operations (and, or, shl, srl, sra, xor, nor, nand): needed + + * or: done + +* f64 support + + * Comparison operators: + SETOEQ unimplemented + SETOGT unimplemented + SETOGE unimplemented + SETOLT unimplemented + SETOLE unimplemented + SETONE unimplemented + SETO done (lowered) + SETUO done (lowered) + SETUEQ unimplemented + SETUGT unimplemented + SETUGE unimplemented + SETULT unimplemented + SETULE unimplemented + SETUNE unimplemented + +* LLVM vector suport + + * VSETCC needs to be implemented. It's pretty straightforward to code, but + needs implementation. + +* Intrinsics + + * spu.h instrinsics added but not tested. Need to have an operational + llvm-spu-gcc in order to write a unit test harness. + +===-------------------------------------------------------------------------=== diff --git a/lib/Target/CellSPU/SPU.h b/lib/Target/CellSPU/SPU.h new file mode 100644 index 000000000000..77a062e0e2e3 --- /dev/null +++ b/lib/Target/CellSPU/SPU.h @@ -0,0 +1,102 @@ +//===-- SPU.h - Top-level interface for Cell SPU Target ----------*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the entry points for global functions defined in the LLVM +// Cell SPU back-end. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_IBMCELLSPU_H +#define LLVM_TARGET_IBMCELLSPU_H + +#include "llvm/Support/DataTypes.h" +#include "llvm/Target/TargetMachine.h" + +namespace llvm { + class SPUTargetMachine; + class FunctionPass; + class raw_ostream; + + FunctionPass *createSPUISelDag(SPUTargetMachine &TM); + FunctionPass *createSPUAsmPrinterPass(raw_ostream &o, + SPUTargetMachine &tm, + CodeGenOpt::Level OptLevel, + bool verbose); + + /*--== Utility functions/predicates/etc used all over the place: --==*/ + //! Predicate test for a signed 10-bit value + /*! + \param Value The input value to be tested + + This predicate tests for a signed 10-bit value, returning the 10-bit value + as a short if true. + */ + template + inline bool isS10Constant(T Value); + + template<> + inline bool isS10Constant(short Value) { + int SExtValue = ((int) Value << (32 - 10)) >> (32 - 10); + return ((Value > 0 && Value <= (1 << 9) - 1) + || (Value < 0 && (short) SExtValue == Value)); + } + + template<> + inline bool isS10Constant(int Value) { + return (Value >= -(1 << 9) && Value <= (1 << 9) - 1); + } + + template<> + inline bool isS10Constant(uint32_t Value) { + return (Value <= ((1 << 9) - 1)); + } + + template<> + inline bool isS10Constant(int64_t Value) { + return (Value >= -(1 << 9) && Value <= (1 << 9) - 1); + } + + template<> + inline bool isS10Constant(uint64_t Value) { + return (Value <= ((1 << 9) - 1)); + } + + //! Predicate test for an unsigned 10-bit value + /*! + \param Value The input value to be tested + + This predicate tests for an unsigned 10-bit value, returning the 10-bit value + as a short if true. + */ + inline bool isU10Constant(short Value) { + return (Value == (Value & 0x3ff)); + } + + inline bool isU10Constant(int Value) { + return (Value == (Value & 0x3ff)); + } + + inline bool isU10Constant(uint32_t Value) { + return (Value == (Value & 0x3ff)); + } + + inline bool isU10Constant(int64_t Value) { + return (Value == (Value & 0x3ff)); + } + + inline bool isU10Constant(uint64_t Value) { + return (Value == (Value & 0x3ff)); + } +} + +// Defines symbolic names for the SPU instructions. +// +#include "SPUGenInstrNames.inc" + +#endif /* LLVM_TARGET_IBMCELLSPU_H */ diff --git a/lib/Target/CellSPU/SPU.td b/lib/Target/CellSPU/SPU.td new file mode 100644 index 000000000000..8327fe03d7f8 --- /dev/null +++ b/lib/Target/CellSPU/SPU.td @@ -0,0 +1,66 @@ +//===- SPU.td - Describe the STI Cell SPU Target Machine ----*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is the top level entry point for the STI Cell SPU target machine. +// +//===----------------------------------------------------------------------===// + +// Get the target-independent interfaces which we are implementing. +// +include "llvm/Target/Target.td" + +// Holder of code fragments (you'd think this'd already be in +// a td file somewhere... :-) + +class CodeFrag { + dag Fragment = frag; +} + +//===----------------------------------------------------------------------===// +// Register File Description +//===----------------------------------------------------------------------===// + +include "SPURegisterInfo.td" + +//===----------------------------------------------------------------------===// +// Instruction formats, instructions +//===----------------------------------------------------------------------===// + +include "SPUNodes.td" +include "SPUOperands.td" +include "SPUSchedule.td" +include "SPUInstrFormats.td" +include "SPUInstrInfo.td" + +//===----------------------------------------------------------------------===// +// Subtarget features: +//===----------------------------------------------------------------------===// + +def DefaultProc: SubtargetFeature<"", "ProcDirective", "SPU::DEFAULT_PROC", "">; +def LargeMemFeature: + SubtargetFeature<"large_mem","UseLargeMem", "true", + "Use large (>256) LSA memory addressing [default = false]">; + +def SPURev0 : Processor<"v0", SPUItineraries, [DefaultProc]>; + +//===----------------------------------------------------------------------===// +// Calling convention: +//===----------------------------------------------------------------------===// + +include "SPUCallingConv.td" + +// Target: + +def SPUInstrInfo : InstrInfo { + let isLittleEndianEncoding = 1; +} + +def SPU : Target { + let InstructionSet = SPUInstrInfo; +} diff --git a/lib/Target/CellSPU/SPU128InstrInfo.td b/lib/Target/CellSPU/SPU128InstrInfo.td new file mode 100644 index 000000000000..3031fda54381 --- /dev/null +++ b/lib/Target/CellSPU/SPU128InstrInfo.td @@ -0,0 +1,41 @@ +//===--- SPU128InstrInfo.td - Cell SPU 128-bit operations -*- tablegen -*--===// +// +// Cell SPU 128-bit operations +// +//===----------------------------------------------------------------------===// + +// zext 32->128: Zero extend 32-bit to 128-bit +def : Pat<(i128 (zext R32C:$rSrc)), + (ROTQMBYIr128_zext_r32 R32C:$rSrc, 12)>; + +// zext 64->128: Zero extend 64-bit to 128-bit +def : Pat<(i128 (zext R64C:$rSrc)), + (ROTQMBYIr128_zext_r64 R64C:$rSrc, 8)>; + +// zext 16->128: Zero extend 16-bit to 128-bit +def : Pat<(i128 (zext R16C:$rSrc)), + (ROTQMBYIr128_zext_r32 (ANDi16i32 R16C:$rSrc, (ILAr32 0xffff)), 12)>; + +// zext 8->128: Zero extend 8-bit to 128-bit +def : Pat<(i128 (zext R8C:$rSrc)), + (ROTQMBYIr128_zext_r32 (ANDIi8i32 R8C:$rSrc, 0xf), 12)>; + +// anyext 32->128: Zero extend 32-bit to 128-bit +def : Pat<(i128 (anyext R32C:$rSrc)), + (ROTQMBYIr128_zext_r32 R32C:$rSrc, 12)>; + +// anyext 64->128: Zero extend 64-bit to 128-bit +def : Pat<(i128 (anyext R64C:$rSrc)), + (ROTQMBYIr128_zext_r64 R64C:$rSrc, 8)>; + +// anyext 16->128: Zero extend 16-bit to 128-bit +def : Pat<(i128 (anyext R16C:$rSrc)), + (ROTQMBYIr128_zext_r32 (ANDi16i32 R16C:$rSrc, (ILAr32 0xffff)), 12)>; + +// anyext 8->128: Zero extend 8-bit to 128-bit +def : Pat<(i128 (anyext R8C:$rSrc)), + (ROTQMBYIr128_zext_r32 (ANDIi8i32 R8C:$rSrc, 0xf), 12)>; + +// Shift left +def : Pat<(shl GPRC:$rA, R32C:$rB), + (SHLQBYBIr128 (SHLQBIr128 GPRC:$rA, R32C:$rB), R32C:$rB)>; diff --git a/lib/Target/CellSPU/SPU64InstrInfo.td b/lib/Target/CellSPU/SPU64InstrInfo.td new file mode 100644 index 000000000000..06eb1496def7 --- /dev/null +++ b/lib/Target/CellSPU/SPU64InstrInfo.td @@ -0,0 +1,394 @@ +//====--- SPU64InstrInfo.td - Cell SPU 64-bit operations -*- tablegen -*--====// +// +// Cell SPU 64-bit operations +// +//===----------------------------------------------------------------------===// + +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +// 64-bit comparisons: +// +// 1. The instruction sequences for vector vice scalar differ by a +// constant. In the scalar case, we're only interested in the +// top two 32-bit slots, whereas we're interested in an exact +// all-four-slot match in the vector case. +// +// 2. There are no "immediate" forms, since loading 64-bit constants +// could be a constant pool load. +// +// 3. i64 setcc results are i32, which are subsequently converted to a FSM +// mask when used in a select pattern. +// +// 4. v2i64 setcc results are v4i32, which can be converted to a FSM mask (TODO) +// [Note: this may be moot, since gb produces v4i32 or r32.] +// +// 5. The code sequences for r64 and v2i64 are probably overly conservative, +// compared to the code that gcc produces. +// +// M00$E B!tes Kan be Pretty N@sTi!!!!! (appologies to Monty!) +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ + +// selb instruction definition for i64. Note that the selection mask is +// a vector, produced by various forms of FSM: +def SELBr64_cond: + SELBInst<(outs R64C:$rT), (ins R64C:$rA, R64C:$rB, VECREG:$rC), + [/* no pattern */]>; + +// The generic i64 select pattern, which assumes that the comparison result +// is in a 32-bit register that contains a select mask pattern (i.e., gather +// bits result): + +def : Pat<(select R32C:$rCond, R64C:$rFalse, R64C:$rTrue), + (SELBr64_cond R64C:$rTrue, R64C:$rFalse, (FSMr32 R32C:$rCond))>; + +// select the negative condition: +class I64SELECTNegCond: + Pat<(select (i32 (cond R64C:$rA, R64C:$rB)), R64C:$rTrue, R64C:$rFalse), + (SELBr64_cond R64C:$rTrue, R64C:$rFalse, (FSMr32 compare.Fragment))>; + +// setcc the negative condition: +class I64SETCCNegCond: + Pat<(cond R64C:$rA, R64C:$rB), + (XORIr32 compare.Fragment, -1)>; + +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +// The i64 seteq fragment that does the scalar->vector conversion and +// comparison: +def CEQr64compare: + CodeFrag<(CGTIv4i32 (GBv4i32 (CEQv4i32 (ORv2i64_i64 R64C:$rA), + (ORv2i64_i64 R64C:$rB))), 0xb)>; + +// The i64 seteq fragment that does the vector comparison +def CEQv2i64compare: + CodeFrag<(CEQIv4i32 (GBv4i32 (CEQv4i32 VECREG:$rA, VECREG:$rB)), 0xf)>; + +// i64 seteq (equality): the setcc result is i32, which is converted to a +// vector FSM mask when used in a select pattern. +// +// v2i64 seteq (equality): the setcc result is v4i32 +multiclass CompareEqual64 { + // Plain old comparison, converts back to i32 scalar + def r64: CodeFrag<(ORi32_v4i32 CEQr64compare.Fragment)>; + def v2i64: CodeFrag<(ORi32_v4i32 CEQv2i64compare.Fragment)>; + + // SELB mask from FSM: + def r64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CEQr64compare.Fragment))>; + def v2i64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CEQv2i64compare.Fragment))>; +} + +defm I64EQ: CompareEqual64; + +def : Pat<(seteq R64C:$rA, R64C:$rB), I64EQr64.Fragment>; +def : Pat<(seteq (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)), I64EQv2i64.Fragment>; + +// i64 setne: +def : I64SETCCNegCond; +def : I64SELECTNegCond; + +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +// i64 setugt/setule: +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ + +def CLGTr64ugt: + CodeFrag<(CLGTv4i32 (ORv2i64_i64 R64C:$rA), (ORv2i64_i64 R64C:$rB))>; + +def CLGTr64eq: + CodeFrag<(CEQv4i32 (ORv2i64_i64 R64C:$rA), (ORv2i64_i64 R64C:$rB))>; + +def CLGTr64compare: + CodeFrag<(SELBv2i64 CLGTr64ugt.Fragment, + (XSWDv2i64 CLGTr64ugt.Fragment), + CLGTr64eq.Fragment)>; + +def CLGTv2i64ugt: + CodeFrag<(CLGTv4i32 VECREG:$rA, VECREG:$rB)>; + +def CLGTv2i64eq: + CodeFrag<(CEQv4i32 VECREG:$rA, VECREG:$rB)>; + +def CLGTv2i64compare: + CodeFrag<(SELBv2i64 CLGTv2i64ugt.Fragment, + (XSWDv2i64 CLGTr64ugt.Fragment), + CLGTv2i64eq.Fragment)>; + +multiclass CompareLogicalGreaterThan64 { + // Plain old comparison, converts back to i32 scalar + def r64: CodeFrag<(ORi32_v4i32 CLGTr64compare.Fragment)>; + def v2i64: CodeFrag; + + // SELB mask from FSM: + def r64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CLGTr64compare.Fragment))>; + def v2i64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CLGTv2i64compare.Fragment))>; +} + +defm I64LGT: CompareLogicalGreaterThan64; + +def : Pat<(setugt R64C:$rA, R64C:$rB), I64LGTr64.Fragment>; +def : Pat<(setugt (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)), + I64LGTv2i64.Fragment>; + +// i64 setult: +def : I64SETCCNegCond; +def : I64SELECTNegCond; + +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +// i64 setuge/setult: +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ + +def CLGEr64compare: + CodeFrag<(CGTIv4i32 (GBv4i32 (ORv4i32 CLGTr64ugt.Fragment, + CLGTr64eq.Fragment)), 0xb)>; + +def CLGEv2i64compare: + CodeFrag<(CEQIv4i32 (GBv4i32 (ORv4i32 CLGTv2i64ugt.Fragment, + CLGTv2i64eq.Fragment)), 0xf)>; + +multiclass CompareLogicalGreaterEqual64 { + // Plain old comparison, converts back to i32 scalar + def r64: CodeFrag<(ORi32_v4i32 CLGEr64compare.Fragment)>; + def v2i64: CodeFrag; + + // SELB mask from FSM: + def r64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CLGEr64compare.Fragment))>; + def v2i64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CLGEv2i64compare.Fragment))>; +} + +defm I64LGE: CompareLogicalGreaterEqual64; + +def : Pat<(setuge R64C:$rA, R64C:$rB), I64LGEr64.Fragment>; +def : Pat<(setuge (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)), + I64LGEv2i64.Fragment>; + +// i64 setult: +def : I64SETCCNegCond; +def : I64SELECTNegCond; + +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +// i64 setgt/setle: +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ + +def CGTr64sgt: + CodeFrag<(CGTv4i32 (ORv2i64_i64 R64C:$rA), (ORv2i64_i64 R64C:$rB))>; + +def CGTr64eq: + CodeFrag<(CEQv4i32 (ORv2i64_i64 R64C:$rA), (ORv2i64_i64 R64C:$rB))>; + +def CGTr64compare: + CodeFrag<(SELBv2i64 CGTr64sgt.Fragment, + (XSWDv2i64 CGTr64sgt.Fragment), + CGTr64eq.Fragment)>; + +def CGTv2i64sgt: + CodeFrag<(CGTv4i32 VECREG:$rA, VECREG:$rB)>; + +def CGTv2i64eq: + CodeFrag<(CEQv4i32 VECREG:$rA, VECREG:$rB)>; + +def CGTv2i64compare: + CodeFrag<(SELBv2i64 CGTv2i64sgt.Fragment, + (XSWDv2i64 CGTr64sgt.Fragment), + CGTv2i64eq.Fragment)>; + +multiclass CompareGreaterThan64 { + // Plain old comparison, converts back to i32 scalar + def r64: CodeFrag<(ORi32_v4i32 CGTr64compare.Fragment)>; + def v2i64: CodeFrag; + + // SELB mask from FSM: + def r64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CGTr64compare.Fragment))>; + def v2i64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CGTv2i64compare.Fragment))>; +} + +defm I64GT: CompareLogicalGreaterThan64; + +def : Pat<(setgt R64C:$rA, R64C:$rB), I64GTr64.Fragment>; +def : Pat<(setgt (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)), + I64GTv2i64.Fragment>; + +// i64 setult: +def : I64SETCCNegCond; +def : I64SELECTNegCond; + +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +// i64 setge/setlt: +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ + +def CGEr64compare: + CodeFrag<(CGTIv4i32 (GBv4i32 (ORv4i32 CGTr64sgt.Fragment, + CGTr64eq.Fragment)), 0xb)>; + +def CGEv2i64compare: + CodeFrag<(CEQIv4i32 (GBv4i32 (ORv4i32 CGTv2i64sgt.Fragment, + CGTv2i64eq.Fragment)), 0xf)>; + +multiclass CompareGreaterEqual64 { + // Plain old comparison, converts back to i32 scalar + def r64: CodeFrag<(ORi32_v4i32 CGEr64compare.Fragment)>; + def v2i64: CodeFrag; + + // SELB mask from FSM: + def r64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CGEr64compare.Fragment))>; + def v2i64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CGEv2i64compare.Fragment))>; +} + +defm I64GE: CompareGreaterEqual64; + +def : Pat<(setge R64C:$rA, R64C:$rB), I64GEr64.Fragment>; +def : Pat<(setge (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)), + I64GEv2i64.Fragment>; + +// i64 setult: +def : I64SETCCNegCond; +def : I64SELECTNegCond; + +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +// v2i64, i64 add +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ + +class v2i64_add_cg: + CodeFrag<(CGv4i32 lhs, rhs)>; + +class v2i64_add_1: + CodeFrag<(ADDXv4i32 lhs, rhs, (SHUFBv4i32 cg, cg, cg_mask))>; + +class v2i64_add: + v2i64_add_1.Fragment, cg_mask>; + +def : Pat<(SPUadd64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)), + (ORi64_v2i64 v2i64_add<(ORv2i64_i64 R64C:$rA), + (ORv2i64_i64 R64C:$rB), + (v4i32 VECREG:$rCGmask)>.Fragment)>; + +def : Pat<(SPUadd64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB), + (v4i32 VECREG:$rCGmask)), + v2i64_add<(v2i64 VECREG:$rA), + (v2i64 VECREG:$rB), + (v4i32 VECREG:$rCGmask)>.Fragment>; + +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +// v2i64, i64 subtraction +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ + +class v2i64_sub_bg: CodeFrag<(BGv4i32 lhs, rhs)>; + +class v2i64_sub: + CodeFrag<(SFXv4i32 lhs, rhs, (SHUFBv4i32 bg, bg, bg_mask))>; + +def : Pat<(SPUsub64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)), + (ORi64_v2i64 v2i64_sub<(ORv2i64_i64 R64C:$rA), + (ORv2i64_i64 R64C:$rB), + v2i64_sub_bg<(ORv2i64_i64 R64C:$rA), + (ORv2i64_i64 R64C:$rB)>.Fragment, + (v4i32 VECREG:$rCGmask)>.Fragment)>; + +def : Pat<(SPUsub64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB), + (v4i32 VECREG:$rCGmask)), + v2i64_sub<(v2i64 VECREG:$rA), + (v2i64 VECREG:$rB), + v2i64_sub_bg<(v2i64 VECREG:$rA), + (v2i64 VECREG:$rB)>.Fragment, + (v4i32 VECREG:$rCGmask)>.Fragment>; + +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +// v2i64, i64 multiply +// +// Note: i64 multiply is simply the vector->scalar conversion of the +// full-on v2i64 multiply, since the entire vector has to be manipulated +// anyway. +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ + +class v2i64_mul_ahi64 : + CodeFrag<(SELBv4i32 rA, (ILv4i32 0), (FSMBIv4i32 0x0f0f))>; + +class v2i64_mul_bhi64 : + CodeFrag<(SELBv4i32 rB, (ILv4i32 0), (FSMBIv4i32 0x0f0f))>; + +class v2i64_mul_alo64 : + CodeFrag<(SELBv4i32 rB, (ILv4i32 0), (FSMBIv4i32 0xf0f0))>; + +class v2i64_mul_blo64 : + CodeFrag<(SELBv4i32 rB, (ILv4i32 0), (FSMBIv4i32 0xf0f0))>; + +class v2i64_mul_ashlq2: + CodeFrag<(SHLQBYIv4i32 rA, 0x2)>; + +class v2i64_mul_ashlq4: + CodeFrag<(SHLQBYIv4i32 rA, 0x4)>; + +class v2i64_mul_bshlq2 : + CodeFrag<(SHLQBYIv4i32 rB, 0x2)>; + +class v2i64_mul_bshlq4 : + CodeFrag<(SHLQBYIv4i32 rB, 0x4)>; + +class v2i64_highprod: + CodeFrag<(Av4i32 + (Av4i32 + (MPYUv4i32 v2i64_mul_bshlq4.Fragment, // a1 x b3 + v2i64_mul_ahi64.Fragment), + (MPYHv4i32 v2i64_mul_ahi64.Fragment, // a0 x b3 + v2i64_mul_bshlq4.Fragment)), + (Av4i32 + (MPYHv4i32 v2i64_mul_bhi64.Fragment, + v2i64_mul_ashlq4.Fragment), + (Av4i32 + (MPYHv4i32 v2i64_mul_ashlq4.Fragment, + v2i64_mul_bhi64.Fragment), + (Av4i32 + (MPYUv4i32 v2i64_mul_ashlq4.Fragment, + v2i64_mul_bhi64.Fragment), + (Av4i32 + (MPYHv4i32 v2i64_mul_ashlq2.Fragment, + v2i64_mul_bshlq2.Fragment), + (MPYUv4i32 v2i64_mul_ashlq2.Fragment, + v2i64_mul_bshlq2.Fragment))))))>; + +class v2i64_mul_a3_b3: + CodeFrag<(MPYUv4i32 v2i64_mul_alo64.Fragment, + v2i64_mul_blo64.Fragment)>; + +class v2i64_mul_a2_b3: + CodeFrag<(SELBv4i32 (SHLQBYIv4i32 + (MPYHHUv4i32 v2i64_mul_alo64.Fragment, + v2i64_mul_bshlq2.Fragment), 0x2), + (ILv4i32 0), + (FSMBIv4i32 0xc3c3))>; + +class v2i64_mul_a3_b2: + CodeFrag<(SELBv4i32 (SHLQBYIv4i32 + (MPYHHUv4i32 v2i64_mul_blo64.Fragment, + v2i64_mul_ashlq2.Fragment), 0x2), + (ILv4i32 0), + (FSMBIv4i32 0xc3c3))>; + +class v2i64_lowsum: + v2i64_add.Fragment, + v2i64_mul_a2_b3.Fragment, rCGmask>.Fragment, + v2i64_mul_a3_b2.Fragment, rCGmask>; + +class v2i64_mul: + v2i64_add.Fragment, + (SELBv4i32 v2i64_highprod.Fragment, + (ILv4i32 0), + (FSMBIv4i32 0x0f0f)), + rCGmask>; + +def : Pat<(SPUmul64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)), + (ORi64_v2i64 v2i64_mul<(ORv2i64_i64 R64C:$rA), + (ORv2i64_i64 R64C:$rB), + (v4i32 VECREG:$rCGmask)>.Fragment)>; + +def : Pat<(SPUmul64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB), + (v4i32 VECREG:$rCGmask)), + v2i64_mul<(v2i64 VECREG:$rA), (v2i64 VECREG:$rB), + (v4i32 VECREG:$rCGmask)>.Fragment>; + +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +// f64 comparisons +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ + +// selb instruction definition for i64. Note that the selection mask is +// a vector, produced by various forms of FSM: +def SELBf64_cond: + SELBInst<(outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB, R32C:$rC), + [(set R64FP:$rT, + (select R32C:$rC, R64FP:$rB, R64FP:$rA))]>; diff --git a/lib/Target/CellSPU/SPUCallingConv.td b/lib/Target/CellSPU/SPUCallingConv.td new file mode 100644 index 000000000000..10dc837d90b7 --- /dev/null +++ b/lib/Target/CellSPU/SPUCallingConv.td @@ -0,0 +1,115 @@ +//===- SPUCallingConv.td - Calling Conventions for CellSPU ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This describes the calling conventions for the STI Cell SPU architecture. +// +//===----------------------------------------------------------------------===// + +/// CCIfSubtarget - Match if the current subtarget has a feature F. +class CCIfSubtarget + : CCIf().", F), A>; + +//===----------------------------------------------------------------------===// +// Return Value Calling Convention +//===----------------------------------------------------------------------===// + +// Return-value convention for Cell SPU: Everything can be passed back via $3: +def RetCC_SPU : CallingConv<[ + CCIfType<[i8], CCAssignToReg<[R3]>>, + CCIfType<[i16], CCAssignToReg<[R3]>>, + CCIfType<[i32], CCAssignToReg<[R3]>>, + CCIfType<[i64], CCAssignToReg<[R3]>>, + CCIfType<[i128], CCAssignToReg<[R3]>>, + CCIfType<[f32, f64], CCAssignToReg<[R3]>>, + CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToReg<[R3]>>, + CCIfType<[v2i32], CCAssignToReg<[R3]>> +]>; + + +//===----------------------------------------------------------------------===// +// CellSPU Argument Calling Conventions +// (note: this isn't used, but presumably should be at some point when other +// targets do.) +//===----------------------------------------------------------------------===// +/* +def CC_SPU : CallingConv<[ + CCIfType<[i8], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11, + R12, R13, R14, R15, R16, R17, R18, R19, R20, + R21, R22, R23, R24, R25, R26, R27, R28, R29, + R30, R31, R32, R33, R34, R35, R36, R37, R38, + R39, R40, R41, R42, R43, R44, R45, R46, R47, + R48, R49, R50, R51, R52, R53, R54, R55, R56, + R57, R58, R59, R60, R61, R62, R63, R64, R65, + R66, R67, R68, R69, R70, R71, R72, R73, R74, + R75, R76, R77, R78, R79]>>, + CCIfType<[i16], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11, + R12, R13, R14, R15, R16, R17, R18, R19, R20, + R21, R22, R23, R24, R25, R26, R27, R28, R29, + R30, R31, R32, R33, R34, R35, R36, R37, R38, + R39, R40, R41, R42, R43, R44, R45, R46, R47, + R48, R49, R50, R51, R52, R53, R54, R55, R56, + R57, R58, R59, R60, R61, R62, R63, R64, R65, + R66, R67, R68, R69, R70, R71, R72, R73, R74, + R75, R76, R77, R78, R79]>>, + CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11, + R12, R13, R14, R15, R16, R17, R18, R19, R20, + R21, R22, R23, R24, R25, R26, R27, R28, R29, + R30, R31, R32, R33, R34, R35, R36, R37, R38, + R39, R40, R41, R42, R43, R44, R45, R46, R47, + R48, R49, R50, R51, R52, R53, R54, R55, R56, + R57, R58, R59, R60, R61, R62, R63, R64, R65, + R66, R67, R68, R69, R70, R71, R72, R73, R74, + R75, R76, R77, R78, R79]>>, + CCIfType<[f32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11, + R12, R13, R14, R15, R16, R17, R18, R19, R20, + R21, R22, R23, R24, R25, R26, R27, R28, R29, + R30, R31, R32, R33, R34, R35, R36, R37, R38, + R39, R40, R41, R42, R43, R44, R45, R46, R47, + R48, R49, R50, R51, R52, R53, R54, R55, R56, + R57, R58, R59, R60, R61, R62, R63, R64, R65, + R66, R67, R68, R69, R70, R71, R72, R73, R74, + R75, R76, R77, R78, R79]>>, + CCIfType<[i64], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11, + R12, R13, R14, R15, R16, R17, R18, R19, R20, + R21, R22, R23, R24, R25, R26, R27, R28, R29, + R30, R31, R32, R33, R34, R35, R36, R37, R38, + R39, R40, R41, R42, R43, R44, R45, R46, R47, + R48, R49, R50, R51, R52, R53, R54, R55, R56, + R57, R58, R59, R60, R61, R62, R63, R64, R65, + R66, R67, R68, R69, R70, R71, R72, R73, R74, + R75, R76, R77, R78, R79]>>, + CCIfType<[f64], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11, + R12, R13, R14, R15, R16, R17, R18, R19, R20, + R21, R22, R23, R24, R25, R26, R27, R28, R29, + R30, R31, R32, R33, R34, R35, R36, R37, R38, + R39, R40, R41, R42, R43, R44, R45, R46, R47, + R48, R49, R50, R51, R52, R53, R54, R55, R56, + R57, R58, R59, R60, R61, R62, R63, R64, R65, + R66, R67, R68, R69, R70, R71, R72, R73, R74, + R75, R76, R77, R78, R79]>>, + CCIfType<[v16i8, v8i16, v4i32, v4f32, v2i64, v2f64], + CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11, + R12, R13, R14, R15, R16, R17, R18, R19, R20, + R21, R22, R23, R24, R25, R26, R27, R28, R29, + R30, R31, R32, R33, R34, R35, R36, R37, R38, + R39, R40, R41, R42, R43, R44, R45, R46, R47, + R48, R49, R50, R51, R52, R53, R54, R55, R56, + R57, R58, R59, R60, R61, R62, R63, R64, R65, + R66, R67, R68, R69, R70, R71, R72, R73, R74, + R75, R76, R77, R78, R79]>>, + + // Integer/FP values get stored in stack slots that are 8 bytes in size and + // 8-byte aligned if there are no more registers to hold them. + CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>, + + // Vectors get 16-byte stack slots that are 16-byte aligned. + CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], + CCAssignToStack<16, 16>> +]>; +*/ diff --git a/lib/Target/CellSPU/SPUFrameInfo.cpp b/lib/Target/CellSPU/SPUFrameInfo.cpp new file mode 100644 index 000000000000..60d7ba736ac6 --- /dev/null +++ b/lib/Target/CellSPU/SPUFrameInfo.cpp @@ -0,0 +1,29 @@ +//===-- SPUTargetMachine.cpp - Define TargetMachine for Cell SPU ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Top-level implementation for the Cell SPU target. +// +//===----------------------------------------------------------------------===// + +#include "SPU.h" +#include "SPUFrameInfo.h" +#include "SPURegisterNames.h" +using namespace llvm; + +//===----------------------------------------------------------------------===// +// SPUFrameInfo: +//===----------------------------------------------------------------------===// + +SPUFrameInfo::SPUFrameInfo(const TargetMachine &tm): + TargetFrameInfo(TargetFrameInfo::StackGrowsDown, 16, 0), + TM(tm) +{ + LR[0].first = SPU::R0; + LR[0].second = 16; +} diff --git a/lib/Target/CellSPU/SPUFrameInfo.h b/lib/Target/CellSPU/SPUFrameInfo.h new file mode 100644 index 000000000000..e8ca333f0b69 --- /dev/null +++ b/lib/Target/CellSPU/SPUFrameInfo.h @@ -0,0 +1,79 @@ +//===-- SPUFrameInfo.h - Top-level interface for Cell SPU Target -*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains CellSPU frame information that doesn't fit anywhere else +// cleanly... +// +//===----------------------------------------------------------------------===// + +#if !defined(SPUFRAMEINFO_H) + +#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "SPURegisterInfo.h" + +namespace llvm { + class SPUFrameInfo: public TargetFrameInfo { + const TargetMachine &TM; + std::pair LR[1]; + + public: + SPUFrameInfo(const TargetMachine &tm); + + //! Return a function's saved spill slots + /*! + For CellSPU, a function's saved spill slots is just the link register. + */ + const std::pair * + getCalleeSaveSpillSlots(unsigned &NumEntries) const; + + //! Stack slot size (16 bytes) + static int stackSlotSize() { + return 16; + } + //! Maximum frame offset representable by a signed 10-bit integer + /*! + This is the maximum frame offset that can be expressed as a 10-bit + integer, used in D-form addresses. + */ + static int maxFrameOffset() { + return ((1 << 9) - 1) * stackSlotSize(); + } + //! Minimum frame offset representable by a signed 10-bit integer + static int minFrameOffset() { + return -(1 << 9) * stackSlotSize(); + } + //! Minimum frame size (enough to spill LR + SP) + static int minStackSize() { + return (2 * stackSlotSize()); + } + //! Frame size required to spill all registers plus frame info + static int fullSpillSize() { + return (SPURegisterInfo::getNumArgRegs() * stackSlotSize()); + } + //! Convert frame index to stack offset + static int FItoStackOffset(int frame_index) { + return frame_index * stackSlotSize(); + } + //! Number of instructions required to overcome hint-for-branch latency + /*! + HBR (hint-for-branch) instructions can be inserted when, for example, + we know that a given function is going to be called, such as printf(), + in the control flow graph. HBRs are only inserted if a sufficient number + of instructions occurs between the HBR and the target. Currently, HBRs + take 6 cycles, ergo, the magic number 6. + */ + static int branchHintPenalty() { + return 6; + } + }; +} + +#define SPUFRAMEINFO_H 1 +#endif diff --git a/lib/Target/CellSPU/SPUHazardRecognizers.cpp b/lib/Target/CellSPU/SPUHazardRecognizers.cpp new file mode 100644 index 000000000000..caaa71a422fb --- /dev/null +++ b/lib/Target/CellSPU/SPUHazardRecognizers.cpp @@ -0,0 +1,138 @@ +//===-- SPUHazardRecognizers.cpp - Cell Hazard Recognizer Impls -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements hazard recognizers for scheduling on Cell SPU +// processors. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "sched" + +#include "SPUHazardRecognizers.h" +#include "SPU.h" +#include "SPUInstrInfo.h" +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +//===----------------------------------------------------------------------===// +// Cell SPU hazard recognizer +// +// This is the pipeline hazard recognizer for the Cell SPU processor. It does +// very little right now. +//===----------------------------------------------------------------------===// + +SPUHazardRecognizer::SPUHazardRecognizer(const TargetInstrInfo &tii) : + TII(tii), + EvenOdd(0) +{ +} + +/// Return the pipeline hazard type encountered or generated by this +/// instruction. Currently returns NoHazard. +/// +/// \return NoHazard +ScheduleHazardRecognizer::HazardType +SPUHazardRecognizer::getHazardType(SUnit *SU) +{ + // Initial thoughts on how to do this, but this code cannot work unless the + // function's prolog and epilog code are also being scheduled so that we can + // accurately determine which pipeline is being scheduled. +#if 0 + const SDNode *Node = SU->getNode()->getFlaggedMachineNode(); + ScheduleHazardRecognizer::HazardType retval = NoHazard; + bool mustBeOdd = false; + + switch (Node->getOpcode()) { + case SPU::LQDv16i8: + case SPU::LQDv8i16: + case SPU::LQDv4i32: + case SPU::LQDv4f32: + case SPU::LQDv2f64: + case SPU::LQDr128: + case SPU::LQDr64: + case SPU::LQDr32: + case SPU::LQDr16: + case SPU::LQAv16i8: + case SPU::LQAv8i16: + case SPU::LQAv4i32: + case SPU::LQAv4f32: + case SPU::LQAv2f64: + case SPU::LQAr128: + case SPU::LQAr64: + case SPU::LQAr32: + case SPU::LQXv4i32: + case SPU::LQXr128: + case SPU::LQXr64: + case SPU::LQXr32: + case SPU::LQXr16: + case SPU::STQDv16i8: + case SPU::STQDv8i16: + case SPU::STQDv4i32: + case SPU::STQDv4f32: + case SPU::STQDv2f64: + case SPU::STQDr128: + case SPU::STQDr64: + case SPU::STQDr32: + case SPU::STQDr16: + case SPU::STQDr8: + case SPU::STQAv16i8: + case SPU::STQAv8i16: + case SPU::STQAv4i32: + case SPU::STQAv4f32: + case SPU::STQAv2f64: + case SPU::STQAr128: + case SPU::STQAr64: + case SPU::STQAr32: + case SPU::STQAr16: + case SPU::STQAr8: + case SPU::STQXv16i8: + case SPU::STQXv8i16: + case SPU::STQXv4i32: + case SPU::STQXv4f32: + case SPU::STQXv2f64: + case SPU::STQXr128: + case SPU::STQXr64: + case SPU::STQXr32: + case SPU::STQXr16: + case SPU::STQXr8: + case SPU::RET: + mustBeOdd = true; + break; + default: + // Assume that this instruction can be on the even pipe + break; + } + + if (mustBeOdd && !EvenOdd) + retval = Hazard; + + DOUT << "SPUHazardRecognizer EvenOdd " << EvenOdd << " Hazard " << retval << "\n"; + EvenOdd ^= 1; + return retval; +#else + return NoHazard; +#endif +} + +void SPUHazardRecognizer::EmitInstruction(SUnit *SU) +{ +} + +void SPUHazardRecognizer::AdvanceCycle() +{ + DOUT << "SPUHazardRecognizer::AdvanceCycle\n"; +} + +void SPUHazardRecognizer::EmitNoop() +{ + AdvanceCycle(); +} diff --git a/lib/Target/CellSPU/SPUHazardRecognizers.h b/lib/Target/CellSPU/SPUHazardRecognizers.h new file mode 100644 index 000000000000..d0ae2d8e71c8 --- /dev/null +++ b/lib/Target/CellSPU/SPUHazardRecognizers.h @@ -0,0 +1,41 @@ +//===-- SPUHazardRecognizers.h - Cell SPU Hazard Recognizer -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines hazard recognizers for scheduling on the Cell SPU +// processor. +// +//===----------------------------------------------------------------------===// + +#ifndef SPUHAZRECS_H +#define SPUHAZRECS_H + +#include "llvm/CodeGen/ScheduleHazardRecognizer.h" + +namespace llvm { + +class TargetInstrInfo; + +/// SPUHazardRecognizer +class SPUHazardRecognizer : public ScheduleHazardRecognizer +{ +private: + const TargetInstrInfo &TII; + int EvenOdd; + +public: + SPUHazardRecognizer(const TargetInstrInfo &TII); + virtual HazardType getHazardType(SUnit *SU); + virtual void EmitInstruction(SUnit *SU); + virtual void AdvanceCycle(); + virtual void EmitNoop(); +}; + +} // end namespace llvm + +#endif diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp new file mode 100644 index 000000000000..779d75d0218a --- /dev/null +++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp @@ -0,0 +1,1244 @@ +//===-- SPUISelDAGToDAG.cpp - CellSPU pattern matching inst selector ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines a pattern matching instruction selector for the Cell SPU, +// converting from a legalized dag to a SPU-target dag. +// +//===----------------------------------------------------------------------===// + +#include "SPU.h" +#include "SPUTargetMachine.h" +#include "SPUISelLowering.h" +#include "SPUHazardRecognizers.h" +#include "SPUFrameInfo.h" +#include "SPURegisterNames.h" +#include "SPUTargetMachine.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Constants.h" +#include "llvm/GlobalValue.h" +#include "llvm/Intrinsics.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/Compiler.h" + +using namespace llvm; + +namespace { + //! ConstantSDNode predicate for i32 sign-extended, 10-bit immediates + bool + isI64IntS10Immediate(ConstantSDNode *CN) + { + return isS10Constant(CN->getSExtValue()); + } + + //! ConstantSDNode predicate for i32 sign-extended, 10-bit immediates + bool + isI32IntS10Immediate(ConstantSDNode *CN) + { + return isS10Constant(CN->getSExtValue()); + } + + //! ConstantSDNode predicate for i32 unsigned 10-bit immediate values + bool + isI32IntU10Immediate(ConstantSDNode *CN) + { + return isU10Constant(CN->getSExtValue()); + } + + //! ConstantSDNode predicate for i16 sign-extended, 10-bit immediate values + bool + isI16IntS10Immediate(ConstantSDNode *CN) + { + return isS10Constant(CN->getSExtValue()); + } + + //! SDNode predicate for i16 sign-extended, 10-bit immediate values + bool + isI16IntS10Immediate(SDNode *N) + { + ConstantSDNode *CN = dyn_cast(N); + return (CN != 0 && isI16IntS10Immediate(CN)); + } + + //! ConstantSDNode predicate for i16 unsigned 10-bit immediate values + bool + isI16IntU10Immediate(ConstantSDNode *CN) + { + return isU10Constant((short) CN->getZExtValue()); + } + + //! SDNode predicate for i16 sign-extended, 10-bit immediate values + bool + isI16IntU10Immediate(SDNode *N) + { + return (N->getOpcode() == ISD::Constant + && isI16IntU10Immediate(cast(N))); + } + + //! ConstantSDNode predicate for signed 16-bit values + /*! + \arg CN The constant SelectionDAG node holding the value + \arg Imm The returned 16-bit value, if returning true + + This predicate tests the value in \a CN to see whether it can be + represented as a 16-bit, sign-extended quantity. Returns true if + this is the case. + */ + bool + isIntS16Immediate(ConstantSDNode *CN, short &Imm) + { + MVT vt = CN->getValueType(0); + Imm = (short) CN->getZExtValue(); + if (vt.getSimpleVT() >= MVT::i1 && vt.getSimpleVT() <= MVT::i16) { + return true; + } else if (vt == MVT::i32) { + int32_t i_val = (int32_t) CN->getZExtValue(); + short s_val = (short) i_val; + return i_val == s_val; + } else { + int64_t i_val = (int64_t) CN->getZExtValue(); + short s_val = (short) i_val; + return i_val == s_val; + } + + return false; + } + + //! SDNode predicate for signed 16-bit values. + bool + isIntS16Immediate(SDNode *N, short &Imm) + { + return (N->getOpcode() == ISD::Constant + && isIntS16Immediate(cast(N), Imm)); + } + + //! ConstantFPSDNode predicate for representing floats as 16-bit sign ext. + static bool + isFPS16Immediate(ConstantFPSDNode *FPN, short &Imm) + { + MVT vt = FPN->getValueType(0); + if (vt == MVT::f32) { + int val = FloatToBits(FPN->getValueAPF().convertToFloat()); + int sval = (int) ((val << 16) >> 16); + Imm = (short) val; + return val == sval; + } + + return false; + } + + bool + isHighLow(const SDValue &Op) + { + return (Op.getOpcode() == SPUISD::IndirectAddr + && ((Op.getOperand(0).getOpcode() == SPUISD::Hi + && Op.getOperand(1).getOpcode() == SPUISD::Lo) + || (Op.getOperand(0).getOpcode() == SPUISD::Lo + && Op.getOperand(1).getOpcode() == SPUISD::Hi))); + } + + //===------------------------------------------------------------------===// + //! MVT to "useful stuff" mapping structure: + + struct valtype_map_s { + MVT VT; + unsigned ldresult_ins; /// LDRESULT instruction (0 = undefined) + bool ldresult_imm; /// LDRESULT instruction requires immediate? + unsigned lrinst; /// LR instruction + }; + + const valtype_map_s valtype_map[] = { + { MVT::i8, SPU::ORBIr8, true, SPU::LRr8 }, + { MVT::i16, SPU::ORHIr16, true, SPU::LRr16 }, + { MVT::i32, SPU::ORIr32, true, SPU::LRr32 }, + { MVT::i64, SPU::ORr64, false, SPU::LRr64 }, + { MVT::f32, SPU::ORf32, false, SPU::LRf32 }, + { MVT::f64, SPU::ORf64, false, SPU::LRf64 }, + // vector types... (sigh!) + { MVT::v16i8, 0, false, SPU::LRv16i8 }, + { MVT::v8i16, 0, false, SPU::LRv8i16 }, + { MVT::v4i32, 0, false, SPU::LRv4i32 }, + { MVT::v2i64, 0, false, SPU::LRv2i64 }, + { MVT::v4f32, 0, false, SPU::LRv4f32 }, + { MVT::v2f64, 0, false, SPU::LRv2f64 } + }; + + const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]); + + const valtype_map_s *getValueTypeMapEntry(MVT VT) + { + const valtype_map_s *retval = 0; + for (size_t i = 0; i < n_valtype_map; ++i) { + if (valtype_map[i].VT == VT) { + retval = valtype_map + i; + break; + } + } + + +#ifndef NDEBUG + if (retval == 0) { + cerr << "SPUISelDAGToDAG.cpp: getValueTypeMapEntry returns NULL for " + << VT.getMVTString() + << "\n"; + abort(); + } +#endif + + return retval; + } + + //! Generate the carry-generate shuffle mask. + SDValue getCarryGenerateShufMask(SelectionDAG &DAG, DebugLoc dl) { + SmallVector ShufBytes; + + // Create the shuffle mask for "rotating" the borrow up one register slot + // once the borrow is generated. + ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32)); + ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32)); + ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32)); + ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32)); + + return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + &ShufBytes[0], ShufBytes.size()); + } + + //! Generate the borrow-generate shuffle mask + SDValue getBorrowGenerateShufMask(SelectionDAG &DAG, DebugLoc dl) { + SmallVector ShufBytes; + + // Create the shuffle mask for "rotating" the borrow up one register slot + // once the borrow is generated. + ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32)); + ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32)); + ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32)); + ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32)); + + return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + &ShufBytes[0], ShufBytes.size()); + } + + //===------------------------------------------------------------------===// + /// SPUDAGToDAGISel - Cell SPU-specific code to select SPU machine + /// instructions for SelectionDAG operations. + /// + class SPUDAGToDAGISel : + public SelectionDAGISel + { + SPUTargetMachine &TM; + SPUTargetLowering &SPUtli; + unsigned GlobalBaseReg; + + public: + explicit SPUDAGToDAGISel(SPUTargetMachine &tm) : + SelectionDAGISel(tm), + TM(tm), + SPUtli(*tm.getTargetLowering()) + { } + + virtual bool runOnFunction(Function &Fn) { + // Make sure we re-emit a set of the global base reg if necessary + GlobalBaseReg = 0; + SelectionDAGISel::runOnFunction(Fn); + return true; + } + + /// getI32Imm - Return a target constant with the specified value, of type + /// i32. + inline SDValue getI32Imm(uint32_t Imm) { + return CurDAG->getTargetConstant(Imm, MVT::i32); + } + + /// getI64Imm - Return a target constant with the specified value, of type + /// i64. + inline SDValue getI64Imm(uint64_t Imm) { + return CurDAG->getTargetConstant(Imm, MVT::i64); + } + + /// getSmallIPtrImm - Return a target constant of pointer type. + inline SDValue getSmallIPtrImm(unsigned Imm) { + return CurDAG->getTargetConstant(Imm, SPUtli.getPointerTy()); + } + + SDNode *emitBuildVector(SDValue build_vec) { + MVT vecVT = build_vec.getValueType(); + MVT eltVT = vecVT.getVectorElementType(); + SDNode *bvNode = build_vec.getNode(); + DebugLoc dl = bvNode->getDebugLoc(); + + // Check to see if this vector can be represented as a CellSPU immediate + // constant by invoking all of the instruction selection predicates: + if (((vecVT == MVT::v8i16) && + (SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i16).getNode() != 0)) || + ((vecVT == MVT::v4i32) && + ((SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i32).getNode() != 0) || + (SPU::get_ILHUvec_imm(bvNode, *CurDAG, MVT::i32).getNode() != 0) || + (SPU::get_vec_u18imm(bvNode, *CurDAG, MVT::i32).getNode() != 0) || + (SPU::get_v4i32_imm(bvNode, *CurDAG).getNode() != 0))) || + ((vecVT == MVT::v2i64) && + ((SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i64).getNode() != 0) || + (SPU::get_ILHUvec_imm(bvNode, *CurDAG, MVT::i64).getNode() != 0) || + (SPU::get_vec_u18imm(bvNode, *CurDAG, MVT::i64).getNode() != 0)))) + return Select(build_vec); + + // No, need to emit a constant pool spill: + std::vector CV; + + for (size_t i = 0; i < build_vec.getNumOperands(); ++i) { + ConstantSDNode *V = dyn_cast (build_vec.getOperand(i)); + CV.push_back(const_cast (V->getConstantIntValue())); + } + + Constant *CP = ConstantVector::get(CV); + SDValue CPIdx = CurDAG->getConstantPool(CP, SPUtli.getPointerTy()); + unsigned Alignment = cast(CPIdx)->getAlignment(); + SDValue CGPoolOffset = + SPU::LowerConstantPool(CPIdx, *CurDAG, + SPUtli.getSPUTargetMachine()); + return SelectCode(CurDAG->getLoad(build_vec.getValueType(), dl, + CurDAG->getEntryNode(), CGPoolOffset, + PseudoSourceValue::getConstantPool(), 0, + false, Alignment)); + } + + /// Select - Convert the specified operand from a target-independent to a + /// target-specific node if it hasn't already been changed. + SDNode *Select(SDValue Op); + + //! Emit the instruction sequence for i64 shl + SDNode *SelectSHLi64(SDValue &Op, MVT OpVT); + + //! Emit the instruction sequence for i64 srl + SDNode *SelectSRLi64(SDValue &Op, MVT OpVT); + + //! Emit the instruction sequence for i64 sra + SDNode *SelectSRAi64(SDValue &Op, MVT OpVT); + + //! Emit the necessary sequence for loading i64 constants: + SDNode *SelectI64Constant(SDValue &Op, MVT OpVT, DebugLoc dl); + + //! Alternate instruction emit sequence for loading i64 constants + SDNode *SelectI64Constant(uint64_t i64const, MVT OpVT, DebugLoc dl); + + //! Returns true if the address N is an A-form (local store) address + bool SelectAFormAddr(SDValue Op, SDValue N, SDValue &Base, + SDValue &Index); + + //! D-form address predicate + bool SelectDFormAddr(SDValue Op, SDValue N, SDValue &Base, + SDValue &Index); + + /// Alternate D-form address using i7 offset predicate + bool SelectDForm2Addr(SDValue Op, SDValue N, SDValue &Disp, + SDValue &Base); + + /// D-form address selection workhorse + bool DFormAddressPredicate(SDValue Op, SDValue N, SDValue &Disp, + SDValue &Base, int minOffset, int maxOffset); + + //! Address predicate if N can be expressed as an indexed [r+r] operation. + bool SelectXFormAddr(SDValue Op, SDValue N, SDValue &Base, + SDValue &Index); + + /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for + /// inline asm expressions. + virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, + char ConstraintCode, + std::vector &OutOps) { + SDValue Op0, Op1; + switch (ConstraintCode) { + default: return true; + case 'm': // memory + if (!SelectDFormAddr(Op, Op, Op0, Op1) + && !SelectAFormAddr(Op, Op, Op0, Op1)) + SelectXFormAddr(Op, Op, Op0, Op1); + break; + case 'o': // offsetable + if (!SelectDFormAddr(Op, Op, Op0, Op1) + && !SelectAFormAddr(Op, Op, Op0, Op1)) { + Op0 = Op; + Op1 = getSmallIPtrImm(0); + } + break; + case 'v': // not offsetable +#if 1 + assert(0 && "InlineAsmMemoryOperand 'v' constraint not handled."); +#else + SelectAddrIdxOnly(Op, Op, Op0, Op1); +#endif + break; + } + + OutOps.push_back(Op0); + OutOps.push_back(Op1); + return false; + } + + /// InstructionSelect - This callback is invoked by + /// SelectionDAGISel when it has created a SelectionDAG for us to codegen. + virtual void InstructionSelect(); + + virtual const char *getPassName() const { + return "Cell SPU DAG->DAG Pattern Instruction Selection"; + } + + /// CreateTargetHazardRecognizer - Return the hazard recognizer to use for + /// this target when scheduling the DAG. + virtual ScheduleHazardRecognizer *CreateTargetHazardRecognizer() { + const TargetInstrInfo *II = TM.getInstrInfo(); + assert(II && "No InstrInfo?"); + return new SPUHazardRecognizer(*II); + } + + // Include the pieces autogenerated from the target description. +#include "SPUGenDAGISel.inc" + }; +} + +/// InstructionSelect - This callback is invoked by +/// SelectionDAGISel when it has created a SelectionDAG for us to codegen. +void +SPUDAGToDAGISel::InstructionSelect() +{ + DEBUG(BB->dump()); + + // Select target instructions for the DAG. + SelectRoot(*CurDAG); + CurDAG->RemoveDeadNodes(); +} + +/*! + \arg Op The ISD instruction operand + \arg N The address to be tested + \arg Base The base address + \arg Index The base address index + */ +bool +SPUDAGToDAGISel::SelectAFormAddr(SDValue Op, SDValue N, SDValue &Base, + SDValue &Index) { + // These match the addr256k operand type: + MVT OffsVT = MVT::i16; + SDValue Zero = CurDAG->getTargetConstant(0, OffsVT); + + switch (N.getOpcode()) { + case ISD::Constant: + case ISD::ConstantPool: + case ISD::GlobalAddress: + cerr << "SPU SelectAFormAddr: Constant/Pool/Global not lowered.\n"; + abort(); + /*NOTREACHED*/ + + case ISD::TargetConstant: + case ISD::TargetGlobalAddress: + case ISD::TargetJumpTable: + cerr << "SPUSelectAFormAddr: Target Constant/Pool/Global not wrapped as " + << "A-form address.\n"; + abort(); + /*NOTREACHED*/ + + case SPUISD::AFormAddr: + // Just load from memory if there's only a single use of the location, + // otherwise, this will get handled below with D-form offset addresses + if (N.hasOneUse()) { + SDValue Op0 = N.getOperand(0); + switch (Op0.getOpcode()) { + case ISD::TargetConstantPool: + case ISD::TargetJumpTable: + Base = Op0; + Index = Zero; + return true; + + case ISD::TargetGlobalAddress: { + GlobalAddressSDNode *GSDN = cast(Op0); + GlobalValue *GV = GSDN->getGlobal(); + if (GV->getAlignment() == 16) { + Base = Op0; + Index = Zero; + return true; + } + break; + } + } + } + break; + } + return false; +} + +bool +SPUDAGToDAGISel::SelectDForm2Addr(SDValue Op, SDValue N, SDValue &Disp, + SDValue &Base) { + const int minDForm2Offset = -(1 << 7); + const int maxDForm2Offset = (1 << 7) - 1; + return DFormAddressPredicate(Op, N, Disp, Base, minDForm2Offset, + maxDForm2Offset); +} + +/*! + \arg Op The ISD instruction (ignored) + \arg N The address to be tested + \arg Base Base address register/pointer + \arg Index Base address index + + Examine the input address by a base register plus a signed 10-bit + displacement, [r+I10] (D-form address). + + \return true if \a N is a D-form address with \a Base and \a Index set + to non-empty SDValue instances. +*/ +bool +SPUDAGToDAGISel::SelectDFormAddr(SDValue Op, SDValue N, SDValue &Base, + SDValue &Index) { + return DFormAddressPredicate(Op, N, Base, Index, + SPUFrameInfo::minFrameOffset(), + SPUFrameInfo::maxFrameOffset()); +} + +bool +SPUDAGToDAGISel::DFormAddressPredicate(SDValue Op, SDValue N, SDValue &Base, + SDValue &Index, int minOffset, + int maxOffset) { + unsigned Opc = N.getOpcode(); + MVT PtrTy = SPUtli.getPointerTy(); + + if (Opc == ISD::FrameIndex) { + // Stack frame index must be less than 512 (divided by 16): + FrameIndexSDNode *FIN = dyn_cast(N); + int FI = int(FIN->getIndex()); + DEBUG(cerr << "SelectDFormAddr: ISD::FrameIndex = " + << FI << "\n"); + if (SPUFrameInfo::FItoStackOffset(FI) < maxOffset) { + Base = CurDAG->getTargetConstant(0, PtrTy); + Index = CurDAG->getTargetFrameIndex(FI, PtrTy); + return true; + } + } else if (Opc == ISD::ADD) { + // Generated by getelementptr + const SDValue Op0 = N.getOperand(0); + const SDValue Op1 = N.getOperand(1); + + if ((Op0.getOpcode() == SPUISD::Hi && Op1.getOpcode() == SPUISD::Lo) + || (Op1.getOpcode() == SPUISD::Hi && Op0.getOpcode() == SPUISD::Lo)) { + Base = CurDAG->getTargetConstant(0, PtrTy); + Index = N; + return true; + } else if (Op1.getOpcode() == ISD::Constant + || Op1.getOpcode() == ISD::TargetConstant) { + ConstantSDNode *CN = dyn_cast(Op1); + int32_t offset = int32_t(CN->getSExtValue()); + + if (Op0.getOpcode() == ISD::FrameIndex) { + FrameIndexSDNode *FIN = dyn_cast(Op0); + int FI = int(FIN->getIndex()); + DEBUG(cerr << "SelectDFormAddr: ISD::ADD offset = " << offset + << " frame index = " << FI << "\n"); + + if (SPUFrameInfo::FItoStackOffset(FI) < maxOffset) { + Base = CurDAG->getTargetConstant(offset, PtrTy); + Index = CurDAG->getTargetFrameIndex(FI, PtrTy); + return true; + } + } else if (offset > minOffset && offset < maxOffset) { + Base = CurDAG->getTargetConstant(offset, PtrTy); + Index = Op0; + return true; + } + } else if (Op0.getOpcode() == ISD::Constant + || Op0.getOpcode() == ISD::TargetConstant) { + ConstantSDNode *CN = dyn_cast(Op0); + int32_t offset = int32_t(CN->getSExtValue()); + + if (Op1.getOpcode() == ISD::FrameIndex) { + FrameIndexSDNode *FIN = dyn_cast(Op1); + int FI = int(FIN->getIndex()); + DEBUG(cerr << "SelectDFormAddr: ISD::ADD offset = " << offset + << " frame index = " << FI << "\n"); + + if (SPUFrameInfo::FItoStackOffset(FI) < maxOffset) { + Base = CurDAG->getTargetConstant(offset, PtrTy); + Index = CurDAG->getTargetFrameIndex(FI, PtrTy); + return true; + } + } else if (offset > minOffset && offset < maxOffset) { + Base = CurDAG->getTargetConstant(offset, PtrTy); + Index = Op1; + return true; + } + } + } else if (Opc == SPUISD::IndirectAddr) { + // Indirect with constant offset -> D-Form address + const SDValue Op0 = N.getOperand(0); + const SDValue Op1 = N.getOperand(1); + + if (Op0.getOpcode() == SPUISD::Hi + && Op1.getOpcode() == SPUISD::Lo) { + // (SPUindirect (SPUhi , 0), (SPUlo , 0)) + Base = CurDAG->getTargetConstant(0, PtrTy); + Index = N; + return true; + } else if (isa(Op0) || isa(Op1)) { + int32_t offset = 0; + SDValue idxOp; + + if (isa(Op1)) { + ConstantSDNode *CN = cast(Op1); + offset = int32_t(CN->getSExtValue()); + idxOp = Op0; + } else if (isa(Op0)) { + ConstantSDNode *CN = cast(Op0); + offset = int32_t(CN->getSExtValue()); + idxOp = Op1; + } + + if (offset >= minOffset && offset <= maxOffset) { + Base = CurDAG->getTargetConstant(offset, PtrTy); + Index = idxOp; + return true; + } + } + } else if (Opc == SPUISD::AFormAddr) { + Base = CurDAG->getTargetConstant(0, N.getValueType()); + Index = N; + return true; + } else if (Opc == SPUISD::LDRESULT) { + Base = CurDAG->getTargetConstant(0, N.getValueType()); + Index = N; + return true; + } else if (Opc == ISD::Register || Opc == ISD::CopyFromReg) { + unsigned OpOpc = Op.getOpcode(); + + if (OpOpc == ISD::STORE || OpOpc == ISD::LOAD) { + // Direct load/store without getelementptr + SDValue Addr, Offs; + + // Get the register from CopyFromReg + if (Opc == ISD::CopyFromReg) + Addr = N.getOperand(1); + else + Addr = N; // Register + + Offs = ((OpOpc == ISD::STORE) ? Op.getOperand(3) : Op.getOperand(2)); + + if (Offs.getOpcode() == ISD::Constant || Offs.getOpcode() == ISD::UNDEF) { + if (Offs.getOpcode() == ISD::UNDEF) + Offs = CurDAG->getTargetConstant(0, Offs.getValueType()); + + Base = Offs; + Index = Addr; + return true; + } + } else { + /* If otherwise unadorned, default to D-form address with 0 offset: */ + if (Opc == ISD::CopyFromReg) { + Index = N.getOperand(1); + } else { + Index = N; + } + + Base = CurDAG->getTargetConstant(0, Index.getValueType()); + return true; + } + } + + return false; +} + +/*! + \arg Op The ISD instruction operand + \arg N The address operand + \arg Base The base pointer operand + \arg Index The offset/index operand + + If the address \a N can be expressed as an A-form or D-form address, returns + false. Otherwise, creates two operands, Base and Index that will become the + (r)(r) X-form address. +*/ +bool +SPUDAGToDAGISel::SelectXFormAddr(SDValue Op, SDValue N, SDValue &Base, + SDValue &Index) { + if (!SelectAFormAddr(Op, N, Base, Index) + && !SelectDFormAddr(Op, N, Base, Index)) { + // If the address is neither A-form or D-form, punt and use an X-form + // address: + Base = N.getOperand(1); + Index = N.getOperand(0); + return true; + } + + return false; +} + +//! Convert the operand from a target-independent to a target-specific node +/*! + */ +SDNode * +SPUDAGToDAGISel::Select(SDValue Op) { + SDNode *N = Op.getNode(); + unsigned Opc = N->getOpcode(); + int n_ops = -1; + unsigned NewOpc; + MVT OpVT = Op.getValueType(); + SDValue Ops[8]; + DebugLoc dl = N->getDebugLoc(); + + if (N->isMachineOpcode()) { + return NULL; // Already selected. + } + + if (Opc == ISD::FrameIndex) { + int FI = cast(N)->getIndex(); + SDValue TFI = CurDAG->getTargetFrameIndex(FI, Op.getValueType()); + SDValue Imm0 = CurDAG->getTargetConstant(0, Op.getValueType()); + + if (FI < 128) { + NewOpc = SPU::AIr32; + Ops[0] = TFI; + Ops[1] = Imm0; + n_ops = 2; + } else { + NewOpc = SPU::Ar32; + Ops[0] = CurDAG->getRegister(SPU::R1, Op.getValueType()); + Ops[1] = SDValue(CurDAG->getTargetNode(SPU::ILAr32, dl, Op.getValueType(), + TFI, Imm0), 0); + n_ops = 2; + } + } else if (Opc == ISD::Constant && OpVT == MVT::i64) { + // Catch the i64 constants that end up here. Note: The backend doesn't + // attempt to legalize the constant (it's useless because DAGCombiner + // will insert 64-bit constants and we can't stop it). + return SelectI64Constant(Op, OpVT, Op.getDebugLoc()); + } else if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) + && OpVT == MVT::i64) { + SDValue Op0 = Op.getOperand(0); + MVT Op0VT = Op0.getValueType(); + MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits())); + MVT OpVecVT = MVT::getVectorVT(OpVT, (128 / OpVT.getSizeInBits())); + SDValue shufMask; + + switch (Op0VT.getSimpleVT()) { + default: + cerr << "CellSPU Select: Unhandled zero/any extend MVT\n"; + abort(); + /*NOTREACHED*/ + break; + case MVT::i32: + shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + CurDAG->getConstant(0x80808080, MVT::i32), + CurDAG->getConstant(0x00010203, MVT::i32), + CurDAG->getConstant(0x80808080, MVT::i32), + CurDAG->getConstant(0x08090a0b, MVT::i32)); + break; + + case MVT::i16: + shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + CurDAG->getConstant(0x80808080, MVT::i32), + CurDAG->getConstant(0x80800203, MVT::i32), + CurDAG->getConstant(0x80808080, MVT::i32), + CurDAG->getConstant(0x80800a0b, MVT::i32)); + break; + + case MVT::i8: + shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + CurDAG->getConstant(0x80808080, MVT::i32), + CurDAG->getConstant(0x80808003, MVT::i32), + CurDAG->getConstant(0x80808080, MVT::i32), + CurDAG->getConstant(0x8080800b, MVT::i32)); + break; + } + + SDNode *shufMaskLoad = emitBuildVector(shufMask); + SDNode *PromoteScalar = + SelectCode(CurDAG->getNode(SPUISD::PREFSLOT2VEC, dl, Op0VecVT, Op0)); + + SDValue zextShuffle = + CurDAG->getNode(SPUISD::SHUFB, dl, OpVecVT, + SDValue(PromoteScalar, 0), + SDValue(PromoteScalar, 0), + SDValue(shufMaskLoad, 0)); + + // N.B.: BIT_CONVERT replaces and updates the zextShuffle node, so we + // re-use it in the VEC2PREFSLOT selection without needing to explicitly + // call SelectCode (it's already done for us.) + SelectCode(CurDAG->getNode(ISD::BIT_CONVERT, dl, OpVecVT, zextShuffle)); + return SelectCode(CurDAG->getNode(SPUISD::VEC2PREFSLOT, dl, OpVT, + zextShuffle)); + } else if (Opc == ISD::ADD && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) { + SDNode *CGLoad = + emitBuildVector(getCarryGenerateShufMask(*CurDAG, dl)); + + return SelectCode(CurDAG->getNode(SPUISD::ADD64_MARKER, dl, OpVT, + Op.getOperand(0), Op.getOperand(1), + SDValue(CGLoad, 0))); + } else if (Opc == ISD::SUB && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) { + SDNode *CGLoad = + emitBuildVector(getBorrowGenerateShufMask(*CurDAG, dl)); + + return SelectCode(CurDAG->getNode(SPUISD::SUB64_MARKER, dl, OpVT, + Op.getOperand(0), Op.getOperand(1), + SDValue(CGLoad, 0))); + } else if (Opc == ISD::MUL && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) { + SDNode *CGLoad = + emitBuildVector(getCarryGenerateShufMask(*CurDAG, dl)); + + return SelectCode(CurDAG->getNode(SPUISD::MUL64_MARKER, dl, OpVT, + Op.getOperand(0), Op.getOperand(1), + SDValue(CGLoad, 0))); + } else if (Opc == ISD::TRUNCATE) { + SDValue Op0 = Op.getOperand(0); + if ((Op0.getOpcode() == ISD::SRA || Op0.getOpcode() == ISD::SRL) + && OpVT == MVT::i32 + && Op0.getValueType() == MVT::i64) { + // Catch (truncate:i32 ([sra|srl]:i64 arg, c), where c >= 32 + // + // Take advantage of the fact that the upper 32 bits are in the + // i32 preferred slot and avoid shuffle gymnastics: + ConstantSDNode *CN = dyn_cast(Op0.getOperand(1)); + if (CN != 0) { + unsigned shift_amt = unsigned(CN->getZExtValue()); + + if (shift_amt >= 32) { + SDNode *hi32 = + CurDAG->getTargetNode(SPU::ORr32_r64, dl, OpVT, + Op0.getOperand(0)); + + shift_amt -= 32; + if (shift_amt > 0) { + // Take care of the additional shift, if present: + SDValue shift = CurDAG->getTargetConstant(shift_amt, MVT::i32); + unsigned Opc = SPU::ROTMAIr32_i32; + + if (Op0.getOpcode() == ISD::SRL) + Opc = SPU::ROTMr32; + + hi32 = CurDAG->getTargetNode(Opc, dl, OpVT, SDValue(hi32, 0), + shift); + } + + return hi32; + } + } + } + } else if (Opc == ISD::SHL) { + if (OpVT == MVT::i64) { + return SelectSHLi64(Op, OpVT); + } + } else if (Opc == ISD::SRL) { + if (OpVT == MVT::i64) { + return SelectSRLi64(Op, OpVT); + } + } else if (Opc == ISD::SRA) { + if (OpVT == MVT::i64) { + return SelectSRAi64(Op, OpVT); + } + } else if (Opc == ISD::FNEG + && (OpVT == MVT::f64 || OpVT == MVT::v2f64)) { + DebugLoc dl = Op.getDebugLoc(); + // Check if the pattern is a special form of DFNMS: + // (fneg (fsub (fmul R64FP:$rA, R64FP:$rB), R64FP:$rC)) + SDValue Op0 = Op.getOperand(0); + if (Op0.getOpcode() == ISD::FSUB) { + SDValue Op00 = Op0.getOperand(0); + if (Op00.getOpcode() == ISD::FMUL) { + unsigned Opc = SPU::DFNMSf64; + if (OpVT == MVT::v2f64) + Opc = SPU::DFNMSv2f64; + + return CurDAG->getTargetNode(Opc, dl, OpVT, + Op00.getOperand(0), + Op00.getOperand(1), + Op0.getOperand(1)); + } + } + + SDValue negConst = CurDAG->getConstant(0x8000000000000000ULL, MVT::i64); + SDNode *signMask = 0; + unsigned Opc = SPU::XORfneg64; + + if (OpVT == MVT::f64) { + signMask = SelectI64Constant(negConst, MVT::i64, dl); + } else if (OpVT == MVT::v2f64) { + Opc = SPU::XORfnegvec; + signMask = emitBuildVector(CurDAG->getNode(ISD::BUILD_VECTOR, dl, + MVT::v2i64, + negConst, negConst)); + } + + return CurDAG->getTargetNode(Opc, dl, OpVT, + Op.getOperand(0), SDValue(signMask, 0)); + } else if (Opc == ISD::FABS) { + if (OpVT == MVT::f64) { + SDNode *signMask = SelectI64Constant(0x7fffffffffffffffULL, MVT::i64, dl); + return CurDAG->getTargetNode(SPU::ANDfabs64, dl, OpVT, + Op.getOperand(0), SDValue(signMask, 0)); + } else if (OpVT == MVT::v2f64) { + SDValue absConst = CurDAG->getConstant(0x7fffffffffffffffULL, MVT::i64); + SDValue absVec = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, + absConst, absConst); + SDNode *signMask = emitBuildVector(absVec); + return CurDAG->getTargetNode(SPU::ANDfabsvec, dl, OpVT, + Op.getOperand(0), SDValue(signMask, 0)); + } + } else if (Opc == SPUISD::LDRESULT) { + // Custom select instructions for LDRESULT + MVT VT = N->getValueType(0); + SDValue Arg = N->getOperand(0); + SDValue Chain = N->getOperand(1); + SDNode *Result; + const valtype_map_s *vtm = getValueTypeMapEntry(VT); + + if (vtm->ldresult_ins == 0) { + cerr << "LDRESULT for unsupported type: " + << VT.getMVTString() + << "\n"; + abort(); + } + + Opc = vtm->ldresult_ins; + if (vtm->ldresult_imm) { + SDValue Zero = CurDAG->getTargetConstant(0, VT); + + Result = CurDAG->getTargetNode(Opc, dl, VT, MVT::Other, Arg, Zero, Chain); + } else { + Result = CurDAG->getTargetNode(Opc, dl, VT, MVT::Other, Arg, Arg, Chain); + } + + return Result; + } else if (Opc == SPUISD::IndirectAddr) { + // Look at the operands: SelectCode() will catch the cases that aren't + // specifically handled here. + // + // SPUInstrInfo catches the following patterns: + // (SPUindirect (SPUhi ...), (SPUlo ...)) + // (SPUindirect $sp, imm) + MVT VT = Op.getValueType(); + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + RegisterSDNode *RN; + + if ((Op0.getOpcode() != SPUISD::Hi && Op1.getOpcode() != SPUISD::Lo) + || (Op0.getOpcode() == ISD::Register + && ((RN = dyn_cast(Op0.getNode())) != 0 + && RN->getReg() != SPU::R1))) { + NewOpc = SPU::Ar32; + if (Op1.getOpcode() == ISD::Constant) { + ConstantSDNode *CN = cast(Op1); + Op1 = CurDAG->getTargetConstant(CN->getSExtValue(), VT); + NewOpc = (isI32IntS10Immediate(CN) ? SPU::AIr32 : SPU::Ar32); + } + Ops[0] = Op0; + Ops[1] = Op1; + n_ops = 2; + } + } + + if (n_ops > 0) { + if (N->hasOneUse()) + return CurDAG->SelectNodeTo(N, NewOpc, OpVT, Ops, n_ops); + else + return CurDAG->getTargetNode(NewOpc, dl, OpVT, Ops, n_ops); + } else + return SelectCode(Op); +} + +/*! + * Emit the instruction sequence for i64 left shifts. The basic algorithm + * is to fill the bottom two word slots with zeros so that zeros are shifted + * in as the entire quadword is shifted left. + * + * \note This code could also be used to implement v2i64 shl. + * + * @param Op The shl operand + * @param OpVT Op's machine value value type (doesn't need to be passed, but + * makes life easier.) + * @return The SDNode with the entire instruction sequence + */ +SDNode * +SPUDAGToDAGISel::SelectSHLi64(SDValue &Op, MVT OpVT) { + SDValue Op0 = Op.getOperand(0); + MVT VecVT = MVT::getVectorVT(OpVT, (128 / OpVT.getSizeInBits())); + SDValue ShiftAmt = Op.getOperand(1); + MVT ShiftAmtVT = ShiftAmt.getValueType(); + SDNode *VecOp0, *SelMask, *ZeroFill, *Shift = 0; + SDValue SelMaskVal; + DebugLoc dl = Op.getDebugLoc(); + + VecOp0 = CurDAG->getTargetNode(SPU::ORv2i64_i64, dl, VecVT, Op0); + SelMaskVal = CurDAG->getTargetConstant(0xff00ULL, MVT::i16); + SelMask = CurDAG->getTargetNode(SPU::FSMBIv2i64, dl, VecVT, SelMaskVal); + ZeroFill = CurDAG->getTargetNode(SPU::ILv2i64, dl, VecVT, + CurDAG->getTargetConstant(0, OpVT)); + VecOp0 = CurDAG->getTargetNode(SPU::SELBv2i64, dl, VecVT, + SDValue(ZeroFill, 0), + SDValue(VecOp0, 0), + SDValue(SelMask, 0)); + + if (ConstantSDNode *CN = dyn_cast(ShiftAmt)) { + unsigned bytes = unsigned(CN->getZExtValue()) >> 3; + unsigned bits = unsigned(CN->getZExtValue()) & 7; + + if (bytes > 0) { + Shift = + CurDAG->getTargetNode(SPU::SHLQBYIv2i64, dl, VecVT, + SDValue(VecOp0, 0), + CurDAG->getTargetConstant(bytes, ShiftAmtVT)); + } + + if (bits > 0) { + Shift = + CurDAG->getTargetNode(SPU::SHLQBIIv2i64, dl, VecVT, + SDValue((Shift != 0 ? Shift : VecOp0), 0), + CurDAG->getTargetConstant(bits, ShiftAmtVT)); + } + } else { + SDNode *Bytes = + CurDAG->getTargetNode(SPU::ROTMIr32, dl, ShiftAmtVT, + ShiftAmt, + CurDAG->getTargetConstant(3, ShiftAmtVT)); + SDNode *Bits = + CurDAG->getTargetNode(SPU::ANDIr32, dl, ShiftAmtVT, + ShiftAmt, + CurDAG->getTargetConstant(7, ShiftAmtVT)); + Shift = + CurDAG->getTargetNode(SPU::SHLQBYv2i64, dl, VecVT, + SDValue(VecOp0, 0), SDValue(Bytes, 0)); + Shift = + CurDAG->getTargetNode(SPU::SHLQBIv2i64, dl, VecVT, + SDValue(Shift, 0), SDValue(Bits, 0)); + } + + return CurDAG->getTargetNode(SPU::ORi64_v2i64, dl, OpVT, SDValue(Shift, 0)); +} + +/*! + * Emit the instruction sequence for i64 logical right shifts. + * + * @param Op The shl operand + * @param OpVT Op's machine value value type (doesn't need to be passed, but + * makes life easier.) + * @return The SDNode with the entire instruction sequence + */ +SDNode * +SPUDAGToDAGISel::SelectSRLi64(SDValue &Op, MVT OpVT) { + SDValue Op0 = Op.getOperand(0); + MVT VecVT = MVT::getVectorVT(OpVT, (128 / OpVT.getSizeInBits())); + SDValue ShiftAmt = Op.getOperand(1); + MVT ShiftAmtVT = ShiftAmt.getValueType(); + SDNode *VecOp0, *Shift = 0; + DebugLoc dl = Op.getDebugLoc(); + + VecOp0 = CurDAG->getTargetNode(SPU::ORv2i64_i64, dl, VecVT, Op0); + + if (ConstantSDNode *CN = dyn_cast(ShiftAmt)) { + unsigned bytes = unsigned(CN->getZExtValue()) >> 3; + unsigned bits = unsigned(CN->getZExtValue()) & 7; + + if (bytes > 0) { + Shift = + CurDAG->getTargetNode(SPU::ROTQMBYIv2i64, dl, VecVT, + SDValue(VecOp0, 0), + CurDAG->getTargetConstant(bytes, ShiftAmtVT)); + } + + if (bits > 0) { + Shift = + CurDAG->getTargetNode(SPU::ROTQMBIIv2i64, dl, VecVT, + SDValue((Shift != 0 ? Shift : VecOp0), 0), + CurDAG->getTargetConstant(bits, ShiftAmtVT)); + } + } else { + SDNode *Bytes = + CurDAG->getTargetNode(SPU::ROTMIr32, dl, ShiftAmtVT, + ShiftAmt, + CurDAG->getTargetConstant(3, ShiftAmtVT)); + SDNode *Bits = + CurDAG->getTargetNode(SPU::ANDIr32, dl, ShiftAmtVT, + ShiftAmt, + CurDAG->getTargetConstant(7, ShiftAmtVT)); + + // Ensure that the shift amounts are negated! + Bytes = CurDAG->getTargetNode(SPU::SFIr32, dl, ShiftAmtVT, + SDValue(Bytes, 0), + CurDAG->getTargetConstant(0, ShiftAmtVT)); + + Bits = CurDAG->getTargetNode(SPU::SFIr32, dl, ShiftAmtVT, + SDValue(Bits, 0), + CurDAG->getTargetConstant(0, ShiftAmtVT)); + + Shift = + CurDAG->getTargetNode(SPU::ROTQMBYv2i64, dl, VecVT, + SDValue(VecOp0, 0), SDValue(Bytes, 0)); + Shift = + CurDAG->getTargetNode(SPU::ROTQMBIv2i64, dl, VecVT, + SDValue(Shift, 0), SDValue(Bits, 0)); + } + + return CurDAG->getTargetNode(SPU::ORi64_v2i64, dl, OpVT, SDValue(Shift, 0)); +} + +/*! + * Emit the instruction sequence for i64 arithmetic right shifts. + * + * @param Op The shl operand + * @param OpVT Op's machine value value type (doesn't need to be passed, but + * makes life easier.) + * @return The SDNode with the entire instruction sequence + */ +SDNode * +SPUDAGToDAGISel::SelectSRAi64(SDValue &Op, MVT OpVT) { + // Promote Op0 to vector + MVT VecVT = MVT::getVectorVT(OpVT, (128 / OpVT.getSizeInBits())); + SDValue ShiftAmt = Op.getOperand(1); + MVT ShiftAmtVT = ShiftAmt.getValueType(); + DebugLoc dl = Op.getDebugLoc(); + + SDNode *VecOp0 = + CurDAG->getTargetNode(SPU::ORv2i64_i64, dl, VecVT, Op.getOperand(0)); + + SDValue SignRotAmt = CurDAG->getTargetConstant(31, ShiftAmtVT); + SDNode *SignRot = + CurDAG->getTargetNode(SPU::ROTMAIv2i64_i32, dl, MVT::v2i64, + SDValue(VecOp0, 0), SignRotAmt); + SDNode *UpperHalfSign = + CurDAG->getTargetNode(SPU::ORi32_v4i32, dl, MVT::i32, SDValue(SignRot, 0)); + + SDNode *UpperHalfSignMask = + CurDAG->getTargetNode(SPU::FSM64r32, dl, VecVT, SDValue(UpperHalfSign, 0)); + SDNode *UpperLowerMask = + CurDAG->getTargetNode(SPU::FSMBIv2i64, dl, VecVT, + CurDAG->getTargetConstant(0xff00ULL, MVT::i16)); + SDNode *UpperLowerSelect = + CurDAG->getTargetNode(SPU::SELBv2i64, dl, VecVT, + SDValue(UpperHalfSignMask, 0), + SDValue(VecOp0, 0), + SDValue(UpperLowerMask, 0)); + + SDNode *Shift = 0; + + if (ConstantSDNode *CN = dyn_cast(ShiftAmt)) { + unsigned bytes = unsigned(CN->getZExtValue()) >> 3; + unsigned bits = unsigned(CN->getZExtValue()) & 7; + + if (bytes > 0) { + bytes = 31 - bytes; + Shift = + CurDAG->getTargetNode(SPU::ROTQBYIv2i64, dl, VecVT, + SDValue(UpperLowerSelect, 0), + CurDAG->getTargetConstant(bytes, ShiftAmtVT)); + } + + if (bits > 0) { + bits = 8 - bits; + Shift = + CurDAG->getTargetNode(SPU::ROTQBIIv2i64, dl, VecVT, + SDValue((Shift != 0 ? Shift : UpperLowerSelect), 0), + CurDAG->getTargetConstant(bits, ShiftAmtVT)); + } + } else { + SDNode *NegShift = + CurDAG->getTargetNode(SPU::SFIr32, dl, ShiftAmtVT, + ShiftAmt, CurDAG->getTargetConstant(0, ShiftAmtVT)); + + Shift = + CurDAG->getTargetNode(SPU::ROTQBYBIv2i64_r32, dl, VecVT, + SDValue(UpperLowerSelect, 0), SDValue(NegShift, 0)); + Shift = + CurDAG->getTargetNode(SPU::ROTQBIv2i64, dl, VecVT, + SDValue(Shift, 0), SDValue(NegShift, 0)); + } + + return CurDAG->getTargetNode(SPU::ORi64_v2i64, dl, OpVT, SDValue(Shift, 0)); +} + +/*! + Do the necessary magic necessary to load a i64 constant + */ +SDNode *SPUDAGToDAGISel::SelectI64Constant(SDValue& Op, MVT OpVT, + DebugLoc dl) { + ConstantSDNode *CN = cast(Op.getNode()); + return SelectI64Constant(CN->getZExtValue(), OpVT, dl); +} + +SDNode *SPUDAGToDAGISel::SelectI64Constant(uint64_t Value64, MVT OpVT, + DebugLoc dl) { + MVT OpVecVT = MVT::getVectorVT(OpVT, 2); + SDValue i64vec = + SPU::LowerV2I64Splat(OpVecVT, *CurDAG, Value64, dl); + + // Here's where it gets interesting, because we have to parse out the + // subtree handed back in i64vec: + + if (i64vec.getOpcode() == ISD::BIT_CONVERT) { + // The degenerate case where the upper and lower bits in the splat are + // identical: + SDValue Op0 = i64vec.getOperand(0); + + ReplaceUses(i64vec, Op0); + return CurDAG->getTargetNode(SPU::ORi64_v2i64, dl, OpVT, + SDValue(emitBuildVector(Op0), 0)); + } else if (i64vec.getOpcode() == SPUISD::SHUFB) { + SDValue lhs = i64vec.getOperand(0); + SDValue rhs = i64vec.getOperand(1); + SDValue shufmask = i64vec.getOperand(2); + + if (lhs.getOpcode() == ISD::BIT_CONVERT) { + ReplaceUses(lhs, lhs.getOperand(0)); + lhs = lhs.getOperand(0); + } + + SDNode *lhsNode = (lhs.getNode()->isMachineOpcode() + ? lhs.getNode() + : emitBuildVector(lhs)); + + if (rhs.getOpcode() == ISD::BIT_CONVERT) { + ReplaceUses(rhs, rhs.getOperand(0)); + rhs = rhs.getOperand(0); + } + + SDNode *rhsNode = (rhs.getNode()->isMachineOpcode() + ? rhs.getNode() + : emitBuildVector(rhs)); + + if (shufmask.getOpcode() == ISD::BIT_CONVERT) { + ReplaceUses(shufmask, shufmask.getOperand(0)); + shufmask = shufmask.getOperand(0); + } + + SDNode *shufMaskNode = (shufmask.getNode()->isMachineOpcode() + ? shufmask.getNode() + : emitBuildVector(shufmask)); + + SDNode *shufNode = + Select(CurDAG->getNode(SPUISD::SHUFB, dl, OpVecVT, + SDValue(lhsNode, 0), SDValue(rhsNode, 0), + SDValue(shufMaskNode, 0))); + + return CurDAG->getTargetNode(SPU::ORi64_v2i64, dl, OpVT, + SDValue(shufNode, 0)); + } else if (i64vec.getOpcode() == ISD::BUILD_VECTOR) { + return CurDAG->getTargetNode(SPU::ORi64_v2i64, dl, OpVT, + SDValue(emitBuildVector(i64vec), 0)); + } else { + cerr << "SPUDAGToDAGISel::SelectI64Constant: Unhandled i64vec condition\n"; + abort(); + } +} + +/// createSPUISelDag - This pass converts a legalized DAG into a +/// SPU-specific DAG, ready for instruction scheduling. +/// +FunctionPass *llvm::createSPUISelDag(SPUTargetMachine &TM) { + return new SPUDAGToDAGISel(TM); +} diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp new file mode 100644 index 000000000000..864a914bba78 --- /dev/null +++ b/lib/Target/CellSPU/SPUISelLowering.cpp @@ -0,0 +1,2980 @@ +// +//===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the SPUTargetLowering class. +// +//===----------------------------------------------------------------------===// + +#include "SPURegisterNames.h" +#include "SPUISelLowering.h" +#include "SPUTargetMachine.h" +#include "SPUFrameInfo.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/VectorExtras.h" +#include "llvm/CallingConv.h" +#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/Constants.h" +#include "llvm/Function.h" +#include "llvm/Intrinsics.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Target/TargetOptions.h" + +#include + +using namespace llvm; + +// Used in getTargetNodeName() below +namespace { + std::map node_names; + + //! MVT mapping to useful data for Cell SPU + struct valtype_map_s { + const MVT valtype; + const int prefslot_byte; + }; + + const valtype_map_s valtype_map[] = { + { MVT::i1, 3 }, + { MVT::i8, 3 }, + { MVT::i16, 2 }, + { MVT::i32, 0 }, + { MVT::f32, 0 }, + { MVT::i64, 0 }, + { MVT::f64, 0 }, + { MVT::i128, 0 } + }; + + const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]); + + const valtype_map_s *getValueTypeMapEntry(MVT VT) { + const valtype_map_s *retval = 0; + + for (size_t i = 0; i < n_valtype_map; ++i) { + if (valtype_map[i].valtype == VT) { + retval = valtype_map + i; + break; + } + } + +#ifndef NDEBUG + if (retval == 0) { + cerr << "getValueTypeMapEntry returns NULL for " + << VT.getMVTString() + << "\n"; + abort(); + } +#endif + + return retval; + } + + //! Expand a library call into an actual call DAG node + /*! + \note + This code is taken from SelectionDAGLegalize, since it is not exposed as + part of the LLVM SelectionDAG API. + */ + + SDValue + ExpandLibCall(RTLIB::Libcall LC, SDValue Op, SelectionDAG &DAG, + bool isSigned, SDValue &Hi, SPUTargetLowering &TLI) { + // The input chain to this libcall is the entry node of the function. + // Legalizing the call will automatically add the previous call to the + // dependence. + SDValue InChain = DAG.getEntryNode(); + + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) { + MVT ArgVT = Op.getOperand(i).getValueType(); + const Type *ArgTy = ArgVT.getTypeForMVT(); + Entry.Node = Op.getOperand(i); + Entry.Ty = ArgTy; + Entry.isSExt = isSigned; + Entry.isZExt = !isSigned; + Args.push_back(Entry); + } + SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), + TLI.getPointerTy()); + + // Splice the libcall in wherever FindInputOutputChains tells us to. + const Type *RetTy = Op.getNode()->getValueType(0).getTypeForMVT(); + std::pair CallInfo = + TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, + CallingConv::C, false, Callee, Args, DAG, + Op.getDebugLoc()); + + return CallInfo.first; + } +} + +SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) + : TargetLowering(TM), + SPUTM(TM) +{ + // Fold away setcc operations if possible. + setPow2DivIsCheap(); + + // Use _setjmp/_longjmp instead of setjmp/longjmp. + setUseUnderscoreSetJmp(true); + setUseUnderscoreLongJmp(true); + + // Set RTLIB libcall names as used by SPU: + setLibcallName(RTLIB::DIV_F64, "__fast_divdf3"); + + // Set up the SPU's register classes: + addRegisterClass(MVT::i8, SPU::R8CRegisterClass); + addRegisterClass(MVT::i16, SPU::R16CRegisterClass); + addRegisterClass(MVT::i32, SPU::R32CRegisterClass); + addRegisterClass(MVT::i64, SPU::R64CRegisterClass); + addRegisterClass(MVT::f32, SPU::R32FPRegisterClass); + addRegisterClass(MVT::f64, SPU::R64FPRegisterClass); + addRegisterClass(MVT::i128, SPU::GPRCRegisterClass); + + // SPU has no sign or zero extended loads for i1, i8, i16: + setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote); + setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); + setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); + + setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand); + + // SPU constant load actions are custom lowered: + setOperationAction(ISD::ConstantFP, MVT::f32, Legal); + setOperationAction(ISD::ConstantFP, MVT::f64, Custom); + + // SPU's loads and stores have to be custom lowered: + for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::i128; + ++sctype) { + MVT VT = (MVT::SimpleValueType)sctype; + + setOperationAction(ISD::LOAD, VT, Custom); + setOperationAction(ISD::STORE, VT, Custom); + setLoadExtAction(ISD::EXTLOAD, VT, Custom); + setLoadExtAction(ISD::ZEXTLOAD, VT, Custom); + setLoadExtAction(ISD::SEXTLOAD, VT, Custom); + + for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) { + MVT StoreVT = (MVT::SimpleValueType) stype; + setTruncStoreAction(VT, StoreVT, Expand); + } + } + + for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64; + ++sctype) { + MVT VT = (MVT::SimpleValueType) sctype; + + setOperationAction(ISD::LOAD, VT, Custom); + setOperationAction(ISD::STORE, VT, Custom); + + for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) { + MVT StoreVT = (MVT::SimpleValueType) stype; + setTruncStoreAction(VT, StoreVT, Expand); + } + } + + // Expand the jumptable branches + setOperationAction(ISD::BR_JT, MVT::Other, Expand); + setOperationAction(ISD::BR_CC, MVT::Other, Expand); + + // Custom lower SELECT_CC for most cases, but expand by default + setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); + setOperationAction(ISD::SELECT_CC, MVT::i8, Custom); + setOperationAction(ISD::SELECT_CC, MVT::i16, Custom); + setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); + setOperationAction(ISD::SELECT_CC, MVT::i64, Custom); + + // SPU has no intrinsics for these particular operations: + setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); + + // SPU has no SREM/UREM instructions + setOperationAction(ISD::SREM, MVT::i32, Expand); + setOperationAction(ISD::UREM, MVT::i32, Expand); + setOperationAction(ISD::SREM, MVT::i64, Expand); + setOperationAction(ISD::UREM, MVT::i64, Expand); + + // We don't support sin/cos/sqrt/fmod + setOperationAction(ISD::FSIN , MVT::f64, Expand); + setOperationAction(ISD::FCOS , MVT::f64, Expand); + setOperationAction(ISD::FREM , MVT::f64, Expand); + setOperationAction(ISD::FSIN , MVT::f32, Expand); + setOperationAction(ISD::FCOS , MVT::f32, Expand); + setOperationAction(ISD::FREM , MVT::f32, Expand); + + // Expand fsqrt to the appropriate libcall (NOTE: should use h/w fsqrt + // for f32!) + setOperationAction(ISD::FSQRT, MVT::f64, Expand); + setOperationAction(ISD::FSQRT, MVT::f32, Expand); + + setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); + setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); + + // SPU can do rotate right and left, so legalize it... but customize for i8 + // because instructions don't exist. + + // FIXME: Change from "expand" to appropriate type once ROTR is supported in + // .td files. + setOperationAction(ISD::ROTR, MVT::i32, Expand /*Legal*/); + setOperationAction(ISD::ROTR, MVT::i16, Expand /*Legal*/); + setOperationAction(ISD::ROTR, MVT::i8, Expand /*Custom*/); + + setOperationAction(ISD::ROTL, MVT::i32, Legal); + setOperationAction(ISD::ROTL, MVT::i16, Legal); + setOperationAction(ISD::ROTL, MVT::i8, Custom); + + // SPU has no native version of shift left/right for i8 + setOperationAction(ISD::SHL, MVT::i8, Custom); + setOperationAction(ISD::SRL, MVT::i8, Custom); + setOperationAction(ISD::SRA, MVT::i8, Custom); + + // Make these operations legal and handle them during instruction selection: + setOperationAction(ISD::SHL, MVT::i64, Legal); + setOperationAction(ISD::SRL, MVT::i64, Legal); + setOperationAction(ISD::SRA, MVT::i64, Legal); + + // Custom lower i8, i32 and i64 multiplications + setOperationAction(ISD::MUL, MVT::i8, Custom); + setOperationAction(ISD::MUL, MVT::i32, Legal); + setOperationAction(ISD::MUL, MVT::i64, Legal); + + // Need to custom handle (some) common i8, i64 math ops + setOperationAction(ISD::ADD, MVT::i8, Custom); + setOperationAction(ISD::ADD, MVT::i64, Legal); + setOperationAction(ISD::SUB, MVT::i8, Custom); + setOperationAction(ISD::SUB, MVT::i64, Legal); + + // SPU does not have BSWAP. It does have i32 support CTLZ. + // CTPOP has to be custom lowered. + setOperationAction(ISD::BSWAP, MVT::i32, Expand); + setOperationAction(ISD::BSWAP, MVT::i64, Expand); + + setOperationAction(ISD::CTPOP, MVT::i8, Custom); + setOperationAction(ISD::CTPOP, MVT::i16, Custom); + setOperationAction(ISD::CTPOP, MVT::i32, Custom); + setOperationAction(ISD::CTPOP, MVT::i64, Custom); + + setOperationAction(ISD::CTTZ , MVT::i32, Expand); + setOperationAction(ISD::CTTZ , MVT::i64, Expand); + + setOperationAction(ISD::CTLZ , MVT::i32, Legal); + + // SPU has a version of select that implements (a&~c)|(b&c), just like + // select ought to work: + setOperationAction(ISD::SELECT, MVT::i8, Legal); + setOperationAction(ISD::SELECT, MVT::i16, Legal); + setOperationAction(ISD::SELECT, MVT::i32, Legal); + setOperationAction(ISD::SELECT, MVT::i64, Legal); + + setOperationAction(ISD::SETCC, MVT::i8, Legal); + setOperationAction(ISD::SETCC, MVT::i16, Legal); + setOperationAction(ISD::SETCC, MVT::i32, Legal); + setOperationAction(ISD::SETCC, MVT::i64, Legal); + setOperationAction(ISD::SETCC, MVT::f64, Custom); + + // Custom lower i128 -> i64 truncates + setOperationAction(ISD::TRUNCATE, MVT::i64, Custom); + + // SPU has a legal FP -> signed INT instruction for f32, but for f64, need + // to expand to a libcall, hence the custom lowering: + setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); + setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); + + // FDIV on SPU requires custom lowering + setOperationAction(ISD::FDIV, MVT::f64, Expand); // to libcall + + // SPU has [U|S]INT_TO_FP for f32->i32, but not for f64->i32, f64->i64: + setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); + setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote); + setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote); + setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote); + setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote); + setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); + + setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal); + setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal); + setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal); + setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal); + + // We cannot sextinreg(i1). Expand to shifts. + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); + + // Support label based line numbers. + setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand); + setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand); + + // We want to legalize GlobalAddress and ConstantPool nodes into the + // appropriate instructions to materialize the address. + for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128; + ++sctype) { + MVT VT = (MVT::SimpleValueType)sctype; + + setOperationAction(ISD::GlobalAddress, VT, Custom); + setOperationAction(ISD::ConstantPool, VT, Custom); + setOperationAction(ISD::JumpTable, VT, Custom); + } + + // RET must be custom lowered, to meet ABI requirements + setOperationAction(ISD::RET, MVT::Other, Custom); + + // VASTART needs to be custom lowered to use the VarArgsFrameIndex + setOperationAction(ISD::VASTART , MVT::Other, Custom); + + // Use the default implementation. + setOperationAction(ISD::VAARG , MVT::Other, Expand); + setOperationAction(ISD::VACOPY , MVT::Other, Expand); + setOperationAction(ISD::VAEND , MVT::Other, Expand); + setOperationAction(ISD::STACKSAVE , MVT::Other, Expand); + setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand); + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand); + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand); + + // Cell SPU has instructions for converting between i64 and fp. + setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); + setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); + + // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT + setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote); + + // BUILD_PAIR can't be handled natively, and should be expanded to shl/or + setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand); + + // First set operation action for all vector types to expand. Then we + // will selectively turn on ones that can be effectively codegen'd. + addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass); + addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass); + addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass); + addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass); + addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass); + addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass); + + // "Odd size" vector classes that we're willing to support: + addRegisterClass(MVT::v2i32, SPU::VECREGRegisterClass); + + for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; + i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) { + MVT VT = (MVT::SimpleValueType)i; + + // add/sub are legal for all supported vector VT's. + setOperationAction(ISD::ADD, VT, Legal); + setOperationAction(ISD::SUB, VT, Legal); + // mul has to be custom lowered. + setOperationAction(ISD::MUL, VT, Legal); + + setOperationAction(ISD::AND, VT, Legal); + setOperationAction(ISD::OR, VT, Legal); + setOperationAction(ISD::XOR, VT, Legal); + setOperationAction(ISD::LOAD, VT, Legal); + setOperationAction(ISD::SELECT, VT, Legal); + setOperationAction(ISD::STORE, VT, Legal); + + // These operations need to be expanded: + setOperationAction(ISD::SDIV, VT, Expand); + setOperationAction(ISD::SREM, VT, Expand); + setOperationAction(ISD::UDIV, VT, Expand); + setOperationAction(ISD::UREM, VT, Expand); + + // Custom lower build_vector, constant pool spills, insert and + // extract vector elements: + setOperationAction(ISD::BUILD_VECTOR, VT, Custom); + setOperationAction(ISD::ConstantPool, VT, Custom); + setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); + } + + setOperationAction(ISD::AND, MVT::v16i8, Custom); + setOperationAction(ISD::OR, MVT::v16i8, Custom); + setOperationAction(ISD::XOR, MVT::v16i8, Custom); + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom); + + setOperationAction(ISD::FDIV, MVT::v4f32, Legal); + + setShiftAmountType(MVT::i32); + setBooleanContents(ZeroOrNegativeOneBooleanContent); + + setStackPointerRegisterToSaveRestore(SPU::R1); + + // We have target-specific dag combine patterns for the following nodes: + setTargetDAGCombine(ISD::ADD); + setTargetDAGCombine(ISD::ZERO_EXTEND); + setTargetDAGCombine(ISD::SIGN_EXTEND); + setTargetDAGCombine(ISD::ANY_EXTEND); + + computeRegisterProperties(); + + // Set pre-RA register scheduler default to BURR, which produces slightly + // better code than the default (could also be TDRR, but TargetLowering.h + // needs a mod to support that model): + setSchedulingPreference(SchedulingForRegPressure); +} + +const char * +SPUTargetLowering::getTargetNodeName(unsigned Opcode) const +{ + if (node_names.empty()) { + node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG"; + node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi"; + node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo"; + node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr"; + node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr"; + node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr"; + node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT"; + node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL"; + node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB"; + node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK"; + node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB"; + node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC"; + node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT"; + node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS"; + node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES"; + node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL"; + node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL"; + node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA"; + node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL"; + node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR"; + node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT"; + node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] = + "SPUISD::ROTBYTES_LEFT_BITS"; + node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK"; + node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB"; + node_names[(unsigned) SPUISD::ADD64_MARKER] = "SPUISD::ADD64_MARKER"; + node_names[(unsigned) SPUISD::SUB64_MARKER] = "SPUISD::SUB64_MARKER"; + node_names[(unsigned) SPUISD::MUL64_MARKER] = "SPUISD::MUL64_MARKER"; + } + + std::map::iterator i = node_names.find(Opcode); + + return ((i != node_names.end()) ? i->second : 0); +} + +//===----------------------------------------------------------------------===// +// Return the Cell SPU's SETCC result type +//===----------------------------------------------------------------------===// + +MVT SPUTargetLowering::getSetCCResultType(MVT VT) const { + // i16 and i32 are valid SETCC result types + return ((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) ? VT : MVT::i32); +} + +//===----------------------------------------------------------------------===// +// Calling convention code: +//===----------------------------------------------------------------------===// + +#include "SPUGenCallingConv.inc" + +//===----------------------------------------------------------------------===// +// LowerOperation implementation +//===----------------------------------------------------------------------===// + +/// Custom lower loads for CellSPU +/*! + All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements + within a 16-byte block, we have to rotate to extract the requested element. + + For extending loads, we also want to ensure that the following sequence is + emitted, e.g. for MVT::f32 extending load to MVT::f64: + +\verbatim +%1 v16i8,ch = load +%2 v16i8,ch = rotate %1 +%3 v4f8, ch = bitconvert %2 +%4 f32 = vec2perfslot %3 +%5 f64 = fp_extend %4 +\endverbatim +*/ +static SDValue +LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { + LoadSDNode *LN = cast(Op); + SDValue the_chain = LN->getChain(); + MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + MVT InVT = LN->getMemoryVT(); + MVT OutVT = Op.getValueType(); + ISD::LoadExtType ExtType = LN->getExtensionType(); + unsigned alignment = LN->getAlignment(); + const valtype_map_s *vtm = getValueTypeMapEntry(InVT); + DebugLoc dl = Op.getDebugLoc(); + + switch (LN->getAddressingMode()) { + case ISD::UNINDEXED: { + SDValue result; + SDValue basePtr = LN->getBasePtr(); + SDValue rotate; + + if (alignment == 16) { + ConstantSDNode *CN; + + // Special cases for a known aligned load to simplify the base pointer + // and the rotation amount: + if (basePtr.getOpcode() == ISD::ADD + && (CN = dyn_cast (basePtr.getOperand(1))) != 0) { + // Known offset into basePtr + int64_t offset = CN->getSExtValue(); + int64_t rotamt = int64_t((offset & 0xf) - vtm->prefslot_byte); + + if (rotamt < 0) + rotamt += 16; + + rotate = DAG.getConstant(rotamt, MVT::i16); + + // Simplify the base pointer for this case: + basePtr = basePtr.getOperand(0); + if ((offset & ~0xf) > 0) { + basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, + basePtr, + DAG.getConstant((offset & ~0xf), PtrVT)); + } + } else if ((basePtr.getOpcode() == SPUISD::AFormAddr) + || (basePtr.getOpcode() == SPUISD::IndirectAddr + && basePtr.getOperand(0).getOpcode() == SPUISD::Hi + && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) { + // Plain aligned a-form address: rotate into preferred slot + // Same for (SPUindirect (SPUhi ...), (SPUlo ...)) + int64_t rotamt = -vtm->prefslot_byte; + if (rotamt < 0) + rotamt += 16; + rotate = DAG.getConstant(rotamt, MVT::i16); + } else { + // Offset the rotate amount by the basePtr and the preferred slot + // byte offset + int64_t rotamt = -vtm->prefslot_byte; + if (rotamt < 0) + rotamt += 16; + rotate = DAG.getNode(ISD::ADD, dl, PtrVT, + basePtr, + DAG.getConstant(rotamt, PtrVT)); + } + } else { + // Unaligned load: must be more pessimistic about addressing modes: + if (basePtr.getOpcode() == ISD::ADD) { + MachineFunction &MF = DAG.getMachineFunction(); + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass); + SDValue Flag; + + SDValue Op0 = basePtr.getOperand(0); + SDValue Op1 = basePtr.getOperand(1); + + if (isa(Op1)) { + // Convert the (add , ) to an indirect address contained + // in a register. Note that this is done because we need to avoid + // creating a 0(reg) d-form address due to the SPU's block loads. + basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1); + the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag); + basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT); + } else { + // Convert the (add , ) to an indirect address, which + // will likely be lowered as a reg(reg) x-form address. + basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1); + } + } else { + basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, + basePtr, + DAG.getConstant(0, PtrVT)); + } + + // Offset the rotate amount by the basePtr and the preferred slot + // byte offset + rotate = DAG.getNode(ISD::ADD, dl, PtrVT, + basePtr, + DAG.getConstant(-vtm->prefslot_byte, PtrVT)); + } + + // Re-emit as a v16i8 vector load + result = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr, + LN->getSrcValue(), LN->getSrcValueOffset(), + LN->isVolatile(), 16); + + // Update the chain + the_chain = result.getValue(1); + + // Rotate into the preferred slot: + result = DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, MVT::v16i8, + result.getValue(0), rotate); + + // Convert the loaded v16i8 vector to the appropriate vector type + // specified by the operand: + MVT vecVT = MVT::getVectorVT(InVT, (128 / InVT.getSizeInBits())); + result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT, + DAG.getNode(ISD::BIT_CONVERT, dl, vecVT, result)); + + // Handle extending loads by extending the scalar result: + if (ExtType == ISD::SEXTLOAD) { + result = DAG.getNode(ISD::SIGN_EXTEND, dl, OutVT, result); + } else if (ExtType == ISD::ZEXTLOAD) { + result = DAG.getNode(ISD::ZERO_EXTEND, dl, OutVT, result); + } else if (ExtType == ISD::EXTLOAD) { + unsigned NewOpc = ISD::ANY_EXTEND; + + if (OutVT.isFloatingPoint()) + NewOpc = ISD::FP_EXTEND; + + result = DAG.getNode(NewOpc, dl, OutVT, result); + } + + SDVTList retvts = DAG.getVTList(OutVT, MVT::Other); + SDValue retops[2] = { + result, + the_chain + }; + + result = DAG.getNode(SPUISD::LDRESULT, dl, retvts, + retops, sizeof(retops) / sizeof(retops[0])); + return result; + } + case ISD::PRE_INC: + case ISD::PRE_DEC: + case ISD::POST_INC: + case ISD::POST_DEC: + case ISD::LAST_INDEXED_MODE: + cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than " + "UNINDEXED\n"; + cerr << (unsigned) LN->getAddressingMode() << "\n"; + abort(); + /*NOTREACHED*/ + } + + return SDValue(); +} + +/// Custom lower stores for CellSPU +/*! + All CellSPU stores are aligned to 16-byte boundaries, so for elements + within a 16-byte block, we have to generate a shuffle to insert the + requested element into its place, then store the resulting block. + */ +static SDValue +LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { + StoreSDNode *SN = cast(Op); + SDValue Value = SN->getValue(); + MVT VT = Value.getValueType(); + MVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT()); + MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + DebugLoc dl = Op.getDebugLoc(); + unsigned alignment = SN->getAlignment(); + + switch (SN->getAddressingMode()) { + case ISD::UNINDEXED: { + // The vector type we really want to load from the 16-byte chunk. + MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits())), + stVecVT = MVT::getVectorVT(StVT, (128 / StVT.getSizeInBits())); + + SDValue alignLoadVec; + SDValue basePtr = SN->getBasePtr(); + SDValue the_chain = SN->getChain(); + SDValue insertEltOffs; + + if (alignment == 16) { + ConstantSDNode *CN; + + // Special cases for a known aligned load to simplify the base pointer + // and insertion byte: + if (basePtr.getOpcode() == ISD::ADD + && (CN = dyn_cast(basePtr.getOperand(1))) != 0) { + // Known offset into basePtr + int64_t offset = CN->getSExtValue(); + + // Simplify the base pointer for this case: + basePtr = basePtr.getOperand(0); + insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, + basePtr, + DAG.getConstant((offset & 0xf), PtrVT)); + + if ((offset & ~0xf) > 0) { + basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, + basePtr, + DAG.getConstant((offset & ~0xf), PtrVT)); + } + } else { + // Otherwise, assume it's at byte 0 of basePtr + insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, + basePtr, + DAG.getConstant(0, PtrVT)); + } + } else { + // Unaligned load: must be more pessimistic about addressing modes: + if (basePtr.getOpcode() == ISD::ADD) { + MachineFunction &MF = DAG.getMachineFunction(); + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass); + SDValue Flag; + + SDValue Op0 = basePtr.getOperand(0); + SDValue Op1 = basePtr.getOperand(1); + + if (isa(Op1)) { + // Convert the (add , ) to an indirect address contained + // in a register. Note that this is done because we need to avoid + // creating a 0(reg) d-form address due to the SPU's block loads. + basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1); + the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag); + basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT); + } else { + // Convert the (add , ) to an indirect address, which + // will likely be lowered as a reg(reg) x-form address. + basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1); + } + } else { + basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, + basePtr, + DAG.getConstant(0, PtrVT)); + } + + // Insertion point is solely determined by basePtr's contents + insertEltOffs = DAG.getNode(ISD::ADD, dl, PtrVT, + basePtr, + DAG.getConstant(0, PtrVT)); + } + + // Re-emit as a v16i8 vector load + alignLoadVec = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr, + SN->getSrcValue(), SN->getSrcValueOffset(), + SN->isVolatile(), 16); + + // Update the chain + the_chain = alignLoadVec.getValue(1); + + LoadSDNode *LN = cast(alignLoadVec); + SDValue theValue = SN->getValue(); + SDValue result; + + if (StVT != VT + && (theValue.getOpcode() == ISD::AssertZext + || theValue.getOpcode() == ISD::AssertSext)) { + // Drill down and get the value for zero- and sign-extended + // quantities + theValue = theValue.getOperand(0); + } + + // If the base pointer is already a D-form address, then just create + // a new D-form address with a slot offset and the orignal base pointer. + // Otherwise generate a D-form address with the slot offset relative + // to the stack pointer, which is always aligned. +#if !defined(NDEBUG) + if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { + cerr << "CellSPU LowerSTORE: basePtr = "; + basePtr.getNode()->dump(&DAG); + cerr << "\n"; + } +#endif + + SDValue insertEltOp = + DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT, insertEltOffs); + SDValue vectorizeOp = + DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT, theValue); + + result = DAG.getNode(SPUISD::SHUFB, dl, vecVT, + vectorizeOp, alignLoadVec, + DAG.getNode(ISD::BIT_CONVERT, dl, + MVT::v4i32, insertEltOp)); + + result = DAG.getStore(the_chain, dl, result, basePtr, + LN->getSrcValue(), LN->getSrcValueOffset(), + LN->isVolatile(), LN->getAlignment()); + +#if 0 && !defined(NDEBUG) + if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { + const SDValue ¤tRoot = DAG.getRoot(); + + DAG.setRoot(result); + cerr << "------- CellSPU:LowerStore result:\n"; + DAG.dump(); + cerr << "-------\n"; + DAG.setRoot(currentRoot); + } +#endif + + return result; + /*UNREACHED*/ + } + case ISD::PRE_INC: + case ISD::PRE_DEC: + case ISD::POST_INC: + case ISD::POST_DEC: + case ISD::LAST_INDEXED_MODE: + cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than " + "UNINDEXED\n"; + cerr << (unsigned) SN->getAddressingMode() << "\n"; + abort(); + /*NOTREACHED*/ + } + + return SDValue(); +} + +//! Generate the address of a constant pool entry. +SDValue +LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { + MVT PtrVT = Op.getValueType(); + ConstantPoolSDNode *CP = cast(Op); + Constant *C = CP->getConstVal(); + SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment()); + SDValue Zero = DAG.getConstant(0, PtrVT); + const TargetMachine &TM = DAG.getTarget(); + // FIXME there is no actual debug info here + DebugLoc dl = Op.getDebugLoc(); + + if (TM.getRelocationModel() == Reloc::Static) { + if (!ST->usingLargeMem()) { + // Just return the SDValue with the constant pool address in it. + return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, CPI, Zero); + } else { + SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, CPI, Zero); + SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, CPI, Zero); + return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo); + } + } + + assert(0 && + "LowerConstantPool: Relocation model other than static" + " not supported."); + return SDValue(); +} + +//! Alternate entry point for generating the address of a constant pool entry +SDValue +SPU::LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUTargetMachine &TM) { + return ::LowerConstantPool(Op, DAG, TM.getSubtargetImpl()); +} + +static SDValue +LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { + MVT PtrVT = Op.getValueType(); + JumpTableSDNode *JT = cast(Op); + SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); + SDValue Zero = DAG.getConstant(0, PtrVT); + const TargetMachine &TM = DAG.getTarget(); + // FIXME there is no actual debug info here + DebugLoc dl = Op.getDebugLoc(); + + if (TM.getRelocationModel() == Reloc::Static) { + if (!ST->usingLargeMem()) { + return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, JTI, Zero); + } else { + SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, JTI, Zero); + SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, JTI, Zero); + return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo); + } + } + + assert(0 && + "LowerJumpTable: Relocation model other than static not supported."); + return SDValue(); +} + +static SDValue +LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { + MVT PtrVT = Op.getValueType(); + GlobalAddressSDNode *GSDN = cast(Op); + GlobalValue *GV = GSDN->getGlobal(); + SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset()); + const TargetMachine &TM = DAG.getTarget(); + SDValue Zero = DAG.getConstant(0, PtrVT); + // FIXME there is no actual debug info here + DebugLoc dl = Op.getDebugLoc(); + + if (TM.getRelocationModel() == Reloc::Static) { + if (!ST->usingLargeMem()) { + return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, GA, Zero); + } else { + SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, GA, Zero); + SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, GA, Zero); + return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo); + } + } else { + cerr << "LowerGlobalAddress: Relocation model other than static not " + << "supported.\n"; + abort(); + /*NOTREACHED*/ + } + + return SDValue(); +} + +//! Custom lower double precision floating point constants +static SDValue +LowerConstantFP(SDValue Op, SelectionDAG &DAG) { + MVT VT = Op.getValueType(); + // FIXME there is no actual debug info here + DebugLoc dl = Op.getDebugLoc(); + + if (VT == MVT::f64) { + ConstantFPSDNode *FP = cast(Op.getNode()); + + assert((FP != 0) && + "LowerConstantFP: Node is not ConstantFPSDNode"); + + uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble()); + SDValue T = DAG.getConstant(dbits, MVT::i64); + SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T); + return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, + DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Tvec)); + } + + return SDValue(); +} + +static SDValue +LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex) +{ + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + SmallVector ArgValues; + SDValue Root = Op.getOperand(0); + bool isVarArg = cast(Op.getOperand(2))->getZExtValue() != 0; + DebugLoc dl = Op.getDebugLoc(); + + const unsigned *ArgRegs = SPURegisterInfo::getArgRegs(); + const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs(); + + unsigned ArgOffset = SPUFrameInfo::minStackSize(); + unsigned ArgRegIdx = 0; + unsigned StackSlotSize = SPUFrameInfo::stackSlotSize(); + + MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + + // Add DAG nodes to load the arguments or copy them out of registers. + for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues() - 1; + ArgNo != e; ++ArgNo) { + MVT ObjectVT = Op.getValue(ArgNo).getValueType(); + unsigned ObjSize = ObjectVT.getSizeInBits()/8; + SDValue ArgVal; + + if (ArgRegIdx < NumArgRegs) { + const TargetRegisterClass *ArgRegClass; + + switch (ObjectVT.getSimpleVT()) { + default: { + cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: " + << ObjectVT.getMVTString() + << "\n"; + abort(); + } + case MVT::i8: + ArgRegClass = &SPU::R8CRegClass; + break; + case MVT::i16: + ArgRegClass = &SPU::R16CRegClass; + break; + case MVT::i32: + ArgRegClass = &SPU::R32CRegClass; + break; + case MVT::i64: + ArgRegClass = &SPU::R64CRegClass; + break; + case MVT::i128: + ArgRegClass = &SPU::GPRCRegClass; + break; + case MVT::f32: + ArgRegClass = &SPU::R32FPRegClass; + break; + case MVT::f64: + ArgRegClass = &SPU::R64FPRegClass; + break; + case MVT::v2f64: + case MVT::v4f32: + case MVT::v2i64: + case MVT::v4i32: + case MVT::v8i16: + case MVT::v16i8: + ArgRegClass = &SPU::VECREGRegClass; + break; + } + + unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass); + RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg); + ArgVal = DAG.getCopyFromReg(Root, dl, VReg, ObjectVT); + ++ArgRegIdx; + } else { + // We need to load the argument to a virtual register if we determined + // above that we ran out of physical registers of the appropriate type + // or we're forced to do vararg + int FI = MFI->CreateFixedObject(ObjSize, ArgOffset); + SDValue FIN = DAG.getFrameIndex(FI, PtrVT); + ArgVal = DAG.getLoad(ObjectVT, dl, Root, FIN, NULL, 0); + ArgOffset += StackSlotSize; + } + + ArgValues.push_back(ArgVal); + // Update the chain + Root = ArgVal.getOperand(0); + } + + // vararg handling: + if (isVarArg) { + // unsigned int ptr_size = PtrVT.getSizeInBits() / 8; + // We will spill (79-3)+1 registers to the stack + SmallVector MemOps; + + // Create the frame slot + + for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) { + VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset); + SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT); + SDValue ArgVal = DAG.getRegister(ArgRegs[ArgRegIdx], MVT::v16i8); + SDValue Store = DAG.getStore(Root, dl, ArgVal, FIN, NULL, 0); + Root = Store.getOperand(0); + MemOps.push_back(Store); + + // Increment address by stack slot size for the next stored argument + ArgOffset += StackSlotSize; + } + if (!MemOps.empty()) + Root = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &MemOps[0], MemOps.size()); + } + + ArgValues.push_back(Root); + + // Return the new list of results. + return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getNode()->getVTList(), + &ArgValues[0], ArgValues.size()); +} + +/// isLSAAddress - Return the immediate to use if the specified +/// value is representable as a LSA address. +static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) { + ConstantSDNode *C = dyn_cast(Op); + if (!C) return 0; + + int Addr = C->getZExtValue(); + if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero. + (Addr << 14 >> 14) != Addr) + return 0; // Top 14 bits have to be sext of immediate. + + return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode(); +} + +static SDValue +LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { + CallSDNode *TheCall = cast(Op.getNode()); + SDValue Chain = TheCall->getChain(); + SDValue Callee = TheCall->getCallee(); + unsigned NumOps = TheCall->getNumArgs(); + unsigned StackSlotSize = SPUFrameInfo::stackSlotSize(); + const unsigned *ArgRegs = SPURegisterInfo::getArgRegs(); + const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs(); + DebugLoc dl = TheCall->getDebugLoc(); + + // Handy pointer type + MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + + // Accumulate how many bytes are to be pushed on the stack, including the + // linkage area, and parameter passing area. According to the SPU ABI, + // we minimally need space for [LR] and [SP] + unsigned NumStackBytes = SPUFrameInfo::minStackSize(); + + // Set up a copy of the stack pointer for use loading and storing any + // arguments that may not fit in the registers available for argument + // passing. + SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32); + + // Figure out which arguments are going to go in registers, and which in + // memory. + unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR] + unsigned ArgRegIdx = 0; + + // Keep track of registers passing arguments + std::vector > RegsToPass; + // And the arguments passed on the stack + SmallVector MemOpChains; + + for (unsigned i = 0; i != NumOps; ++i) { + SDValue Arg = TheCall->getArg(i); + + // PtrOff will be used to store the current argument to the stack if a + // register cannot be found for it. + SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType()); + PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); + + switch (Arg.getValueType().getSimpleVT()) { + default: assert(0 && "Unexpected ValueType for argument!"); + case MVT::i8: + case MVT::i16: + case MVT::i32: + case MVT::i64: + case MVT::i128: + if (ArgRegIdx != NumArgRegs) { + RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg)); + } else { + MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0)); + ArgOffset += StackSlotSize; + } + break; + case MVT::f32: + case MVT::f64: + if (ArgRegIdx != NumArgRegs) { + RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg)); + } else { + MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0)); + ArgOffset += StackSlotSize; + } + break; + case MVT::v2i64: + case MVT::v2f64: + case MVT::v4f32: + case MVT::v4i32: + case MVT::v8i16: + case MVT::v16i8: + if (ArgRegIdx != NumArgRegs) { + RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg)); + } else { + MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0)); + ArgOffset += StackSlotSize; + } + break; + } + } + + // Update number of stack bytes actually used, insert a call sequence start + NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize()); + Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes, + true)); + + if (!MemOpChains.empty()) { + // Adjust the stack pointer for the stack arguments. + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &MemOpChains[0], MemOpChains.size()); + } + + // Build a sequence of copy-to-reg nodes chained together with token chain + // and flag operands which copy the outgoing args into the appropriate regs. + SDValue InFlag; + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { + Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, + RegsToPass[i].second, InFlag); + InFlag = Chain.getValue(1); + } + + SmallVector Ops; + unsigned CallOpc = SPUISD::CALL; + + // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every + // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol + // node so that legalize doesn't hack it. + if (GlobalAddressSDNode *G = dyn_cast(Callee)) { + GlobalValue *GV = G->getGlobal(); + MVT CalleeVT = Callee.getValueType(); + SDValue Zero = DAG.getConstant(0, PtrVT); + SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT); + + if (!ST->usingLargeMem()) { + // Turn calls to targets that are defined (i.e., have bodies) into BRSL + // style calls, otherwise, external symbols are BRASL calls. This assumes + // that declared/defined symbols are in the same compilation unit and can + // be reached through PC-relative jumps. + // + // NOTE: + // This may be an unsafe assumption for JIT and really large compilation + // units. + if (GV->isDeclaration()) { + Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, GA, Zero); + } else { + Callee = DAG.getNode(SPUISD::PCRelAddr, dl, CalleeVT, GA, Zero); + } + } else { + // "Large memory" mode: Turn all calls into indirect calls with a X-form + // address pairs: + Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, GA, Zero); + } + } else if (ExternalSymbolSDNode *S = dyn_cast(Callee)) { + MVT CalleeVT = Callee.getValueType(); + SDValue Zero = DAG.getConstant(0, PtrVT); + SDValue ExtSym = DAG.getTargetExternalSymbol(S->getSymbol(), + Callee.getValueType()); + + if (!ST->usingLargeMem()) { + Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, ExtSym, Zero); + } else { + Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, ExtSym, Zero); + } + } else if (SDNode *Dest = isLSAAddress(Callee, DAG)) { + // If this is an absolute destination address that appears to be a legal + // local store address, use the munged value. + Callee = SDValue(Dest, 0); + } + + Ops.push_back(Chain); + Ops.push_back(Callee); + + // Add argument registers to the end of the list so that they are known live + // into the call. + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) + Ops.push_back(DAG.getRegister(RegsToPass[i].first, + RegsToPass[i].second.getValueType())); + + if (InFlag.getNode()) + Ops.push_back(InFlag); + // Returns a chain and a flag for retval copy to use. + Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Flag), + &Ops[0], Ops.size()); + InFlag = Chain.getValue(1); + + Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true), + DAG.getIntPtrConstant(0, true), InFlag); + if (TheCall->getValueType(0) != MVT::Other) + InFlag = Chain.getValue(1); + + SDValue ResultVals[3]; + unsigned NumResults = 0; + + // If the call has results, copy the values out of the ret val registers. + switch (TheCall->getValueType(0).getSimpleVT()) { + default: assert(0 && "Unexpected ret value!"); + case MVT::Other: break; + case MVT::i32: + if (TheCall->getValueType(1) == MVT::i32) { + Chain = DAG.getCopyFromReg(Chain, dl, SPU::R4, + MVT::i32, InFlag).getValue(1); + ResultVals[0] = Chain.getValue(0); + Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32, + Chain.getValue(2)).getValue(1); + ResultVals[1] = Chain.getValue(0); + NumResults = 2; + } else { + Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32, + InFlag).getValue(1); + ResultVals[0] = Chain.getValue(0); + NumResults = 1; + } + break; + case MVT::i64: + Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i64, + InFlag).getValue(1); + ResultVals[0] = Chain.getValue(0); + NumResults = 1; + break; + case MVT::i128: + Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i128, + InFlag).getValue(1); + ResultVals[0] = Chain.getValue(0); + NumResults = 1; + break; + case MVT::f32: + case MVT::f64: + Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, TheCall->getValueType(0), + InFlag).getValue(1); + ResultVals[0] = Chain.getValue(0); + NumResults = 1; + break; + case MVT::v2f64: + case MVT::v2i64: + case MVT::v4f32: + case MVT::v4i32: + case MVT::v8i16: + case MVT::v16i8: + Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, TheCall->getValueType(0), + InFlag).getValue(1); + ResultVals[0] = Chain.getValue(0); + NumResults = 1; + break; + } + + // If the function returns void, just return the chain. + if (NumResults == 0) + return Chain; + + // Otherwise, merge everything together with a MERGE_VALUES node. + ResultVals[NumResults++] = Chain; + SDValue Res = DAG.getMergeValues(ResultVals, NumResults, dl); + return Res.getValue(Op.getResNo()); +} + +static SDValue +LowerRET(SDValue Op, SelectionDAG &DAG, TargetMachine &TM) { + SmallVector RVLocs; + unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv(); + bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); + DebugLoc dl = Op.getDebugLoc(); + CCState CCInfo(CC, isVarArg, TM, RVLocs); + CCInfo.AnalyzeReturn(Op.getNode(), RetCC_SPU); + + // If this is the first return lowered for this function, add the regs to the + // liveout set for the function. + if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { + for (unsigned i = 0; i != RVLocs.size(); ++i) + DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); + } + + SDValue Chain = Op.getOperand(0); + SDValue Flag; + + // Copy the result values into the output registers. + for (unsigned i = 0; i != RVLocs.size(); ++i) { + CCValAssign &VA = RVLocs[i]; + assert(VA.isRegLoc() && "Can only return in registers!"); + Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), + Op.getOperand(i*2+1), Flag); + Flag = Chain.getValue(1); + } + + if (Flag.getNode()) + return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain, Flag); + else + return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain); +} + + +//===----------------------------------------------------------------------===// +// Vector related lowering: +//===----------------------------------------------------------------------===// + +static ConstantSDNode * +getVecImm(SDNode *N) { + SDValue OpVal(0, 0); + + // Check to see if this buildvec has a single non-undef value in its elements. + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; + if (OpVal.getNode() == 0) + OpVal = N->getOperand(i); + else if (OpVal != N->getOperand(i)) + return 0; + } + + if (OpVal.getNode() != 0) { + if (ConstantSDNode *CN = dyn_cast(OpVal)) { + return CN; + } + } + + return 0; +} + +/// get_vec_i18imm - Test if this vector is a vector filled with the same value +/// and the value fits into an unsigned 18-bit constant, and if so, return the +/// constant +SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG, + MVT ValueType) { + if (ConstantSDNode *CN = getVecImm(N)) { + uint64_t Value = CN->getZExtValue(); + if (ValueType == MVT::i64) { + uint64_t UValue = CN->getZExtValue(); + uint32_t upper = uint32_t(UValue >> 32); + uint32_t lower = uint32_t(UValue); + if (upper != lower) + return SDValue(); + Value = Value >> 32; + } + if (Value <= 0x3ffff) + return DAG.getTargetConstant(Value, ValueType); + } + + return SDValue(); +} + +/// get_vec_i16imm - Test if this vector is a vector filled with the same value +/// and the value fits into a signed 16-bit constant, and if so, return the +/// constant +SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG, + MVT ValueType) { + if (ConstantSDNode *CN = getVecImm(N)) { + int64_t Value = CN->getSExtValue(); + if (ValueType == MVT::i64) { + uint64_t UValue = CN->getZExtValue(); + uint32_t upper = uint32_t(UValue >> 32); + uint32_t lower = uint32_t(UValue); + if (upper != lower) + return SDValue(); + Value = Value >> 32; + } + if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) { + return DAG.getTargetConstant(Value, ValueType); + } + } + + return SDValue(); +} + +/// get_vec_i10imm - Test if this vector is a vector filled with the same value +/// and the value fits into a signed 10-bit constant, and if so, return the +/// constant +SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG, + MVT ValueType) { + if (ConstantSDNode *CN = getVecImm(N)) { + int64_t Value = CN->getSExtValue(); + if (ValueType == MVT::i64) { + uint64_t UValue = CN->getZExtValue(); + uint32_t upper = uint32_t(UValue >> 32); + uint32_t lower = uint32_t(UValue); + if (upper != lower) + return SDValue(); + Value = Value >> 32; + } + if (isS10Constant(Value)) + return DAG.getTargetConstant(Value, ValueType); + } + + return SDValue(); +} + +/// get_vec_i8imm - Test if this vector is a vector filled with the same value +/// and the value fits into a signed 8-bit constant, and if so, return the +/// constant. +/// +/// @note: The incoming vector is v16i8 because that's the only way we can load +/// constant vectors. Thus, we test to see if the upper and lower bytes are the +/// same value. +SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG, + MVT ValueType) { + if (ConstantSDNode *CN = getVecImm(N)) { + int Value = (int) CN->getZExtValue(); + if (ValueType == MVT::i16 + && Value <= 0xffff /* truncated from uint64_t */ + && ((short) Value >> 8) == ((short) Value & 0xff)) + return DAG.getTargetConstant(Value & 0xff, ValueType); + else if (ValueType == MVT::i8 + && (Value & 0xff) == Value) + return DAG.getTargetConstant(Value, ValueType); + } + + return SDValue(); +} + +/// get_ILHUvec_imm - Test if this vector is a vector filled with the same value +/// and the value fits into a signed 16-bit constant, and if so, return the +/// constant +SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG, + MVT ValueType) { + if (ConstantSDNode *CN = getVecImm(N)) { + uint64_t Value = CN->getZExtValue(); + if ((ValueType == MVT::i32 + && ((unsigned) Value & 0xffff0000) == (unsigned) Value) + || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value)) + return DAG.getTargetConstant(Value >> 16, ValueType); + } + + return SDValue(); +} + +/// get_v4i32_imm - Catch-all for general 32-bit constant vectors +SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) { + if (ConstantSDNode *CN = getVecImm(N)) { + return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32); + } + + return SDValue(); +} + +/// get_v4i32_imm - Catch-all for general 64-bit constant vectors +SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) { + if (ConstantSDNode *CN = getVecImm(N)) { + return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64); + } + + return SDValue(); +} + +//! Lower a BUILD_VECTOR instruction creatively: +SDValue +LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { + MVT VT = Op.getValueType(); + MVT EltVT = VT.getVectorElementType(); + DebugLoc dl = Op.getDebugLoc(); + BuildVectorSDNode *BCN = dyn_cast(Op.getNode()); + assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerBUILD_VECTOR"); + unsigned minSplatBits = EltVT.getSizeInBits(); + + if (minSplatBits < 16) + minSplatBits = 16; + + APInt APSplatBits, APSplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + + if (!BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, + HasAnyUndefs, minSplatBits) + || minSplatBits < SplatBitSize) + return SDValue(); // Wasn't a constant vector or splat exceeded min + + uint64_t SplatBits = APSplatBits.getZExtValue(); + + switch (VT.getSimpleVT()) { + default: + cerr << "CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = " + << VT.getMVTString() + << "\n"; + abort(); + /*NOTREACHED*/ + case MVT::v4f32: { + uint32_t Value32 = uint32_t(SplatBits); + assert(SplatBitSize == 32 + && "LowerBUILD_VECTOR: Unexpected floating point vector element."); + // NOTE: pretend the constant is an integer. LLVM won't load FP constants + SDValue T = DAG.getConstant(Value32, MVT::i32); + return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32, + DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, T,T,T,T)); + break; + } + case MVT::v2f64: { + uint64_t f64val = uint64_t(SplatBits); + assert(SplatBitSize == 64 + && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes."); + // NOTE: pretend the constant is an integer. LLVM won't load FP constants + SDValue T = DAG.getConstant(f64val, MVT::i64); + return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, + DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T)); + break; + } + case MVT::v16i8: { + // 8-bit constants have to be expanded to 16-bits + unsigned short Value16 = SplatBits /* | (SplatBits << 8) */; + SmallVector Ops; + + Ops.assign(8, DAG.getConstant(Value16, MVT::i16)); + return DAG.getNode(ISD::BIT_CONVERT, dl, VT, + DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i16, &Ops[0], Ops.size())); + } + case MVT::v8i16: { + unsigned short Value16 = SplatBits; + SDValue T = DAG.getConstant(Value16, EltVT); + SmallVector Ops; + + Ops.assign(8, T); + return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size()); + } + case MVT::v4i32: { + SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType()); + return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T, T, T); + } + case MVT::v2i32: { + SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType()); + return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T); + } + case MVT::v2i64: { + return SPU::LowerV2I64Splat(VT, DAG, SplatBits, dl); + } + } + + return SDValue(); +} + +/*! + */ +SDValue +SPU::LowerV2I64Splat(MVT OpVT, SelectionDAG& DAG, uint64_t SplatVal, + DebugLoc dl) { + uint32_t upper = uint32_t(SplatVal >> 32); + uint32_t lower = uint32_t(SplatVal); + + if (upper == lower) { + // Magic constant that can be matched by IL, ILA, et. al. + SDValue Val = DAG.getTargetConstant(upper, MVT::i32); + return DAG.getNode(ISD::BIT_CONVERT, dl, OpVT, + DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + Val, Val, Val, Val)); + } else { + bool upper_special, lower_special; + + // NOTE: This code creates common-case shuffle masks that can be easily + // detected as common expressions. It is not attempting to create highly + // specialized masks to replace any and all 0's, 0xff's and 0x80's. + + // Detect if the upper or lower half is a special shuffle mask pattern: + upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000); + lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000); + + // Both upper and lower are special, lower to a constant pool load: + if (lower_special && upper_special) { + SDValue SplatValCN = DAG.getConstant(SplatVal, MVT::i64); + return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, + SplatValCN, SplatValCN); + } + + SDValue LO32; + SDValue HI32; + SmallVector ShufBytes; + SDValue Result; + + // Create lower vector if not a special pattern + if (!lower_special) { + SDValue LO32C = DAG.getConstant(lower, MVT::i32); + LO32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT, + DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + LO32C, LO32C, LO32C, LO32C)); + } + + // Create upper vector if not a special pattern + if (!upper_special) { + SDValue HI32C = DAG.getConstant(upper, MVT::i32); + HI32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT, + DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + HI32C, HI32C, HI32C, HI32C)); + } + + // If either upper or lower are special, then the two input operands are + // the same (basically, one of them is a "don't care") + if (lower_special) + LO32 = HI32; + if (upper_special) + HI32 = LO32; + + for (int i = 0; i < 4; ++i) { + uint64_t val = 0; + for (int j = 0; j < 4; ++j) { + SDValue V; + bool process_upper, process_lower; + val <<= 8; + process_upper = (upper_special && (i & 1) == 0); + process_lower = (lower_special && (i & 1) == 1); + + if (process_upper || process_lower) { + if ((process_upper && upper == 0) + || (process_lower && lower == 0)) + val |= 0x80; + else if ((process_upper && upper == 0xffffffff) + || (process_lower && lower == 0xffffffff)) + val |= 0xc0; + else if ((process_upper && upper == 0x80000000) + || (process_lower && lower == 0x80000000)) + val |= (j == 0 ? 0xe0 : 0x80); + } else + val |= i * 4 + j + ((i & 1) * 16); + } + + ShufBytes.push_back(DAG.getConstant(val, MVT::i32)); + } + + return DAG.getNode(SPUISD::SHUFB, dl, OpVT, HI32, LO32, + DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + &ShufBytes[0], ShufBytes.size())); + } +} + +/// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on +/// which the Cell can operate. The code inspects V3 to ascertain whether the +/// permutation vector, V3, is monotonically increasing with one "exception" +/// element, e.g., (0, 1, _, 3). If this is the case, then generate a +/// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool. +/// In either case, the net result is going to eventually invoke SHUFB to +/// permute/shuffle the bytes from V1 and V2. +/// \note +/// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate +/// control word for byte/halfword/word insertion. This takes care of a single +/// element move from V2 into V1. +/// \note +/// SPUISD::SHUFB is eventually selected as Cell's shufb instructions. +static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { + const ShuffleVectorSDNode *SVN = cast(Op); + SDValue V1 = Op.getOperand(0); + SDValue V2 = Op.getOperand(1); + DebugLoc dl = Op.getDebugLoc(); + + if (V2.getOpcode() == ISD::UNDEF) V2 = V1; + + // If we have a single element being moved from V1 to V2, this can be handled + // using the C*[DX] compute mask instructions, but the vector elements have + // to be monotonically increasing with one exception element. + MVT VecVT = V1.getValueType(); + MVT EltVT = VecVT.getVectorElementType(); + unsigned EltsFromV2 = 0; + unsigned V2Elt = 0; + unsigned V2EltIdx0 = 0; + unsigned CurrElt = 0; + unsigned MaxElts = VecVT.getVectorNumElements(); + unsigned PrevElt = 0; + unsigned V0Elt = 0; + bool monotonic = true; + bool rotate = true; + + if (EltVT == MVT::i8) { + V2EltIdx0 = 16; + } else if (EltVT == MVT::i16) { + V2EltIdx0 = 8; + } else if (EltVT == MVT::i32 || EltVT == MVT::f32) { + V2EltIdx0 = 4; + } else if (EltVT == MVT::i64 || EltVT == MVT::f64) { + V2EltIdx0 = 2; + } else + assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE"); + + for (unsigned i = 0; i != MaxElts; ++i) { + if (SVN->getMaskElt(i) < 0) + continue; + + unsigned SrcElt = SVN->getMaskElt(i); + + if (monotonic) { + if (SrcElt >= V2EltIdx0) { + if (1 >= (++EltsFromV2)) { + V2Elt = (V2EltIdx0 - SrcElt) << 2; + } + } else if (CurrElt != SrcElt) { + monotonic = false; + } + + ++CurrElt; + } + + if (rotate) { + if (PrevElt > 0 && SrcElt < MaxElts) { + if ((PrevElt == SrcElt - 1) + || (PrevElt == MaxElts - 1 && SrcElt == 0)) { + PrevElt = SrcElt; + if (SrcElt == 0) + V0Elt = i; + } else { + rotate = false; + } + } else if (PrevElt == 0) { + // First time through, need to keep track of previous element + PrevElt = SrcElt; + } else { + // This isn't a rotation, takes elements from vector 2 + rotate = false; + } + } + } + + if (EltsFromV2 == 1 && monotonic) { + // Compute mask and shuffle + MachineFunction &MF = DAG.getMachineFunction(); + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass); + MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + // Initialize temporary register to 0 + SDValue InitTempReg = + DAG.getCopyToReg(DAG.getEntryNode(), dl, VReg, DAG.getConstant(0, PtrVT)); + // Copy register's contents as index in SHUFFLE_MASK: + SDValue ShufMaskOp = + DAG.getNode(SPUISD::SHUFFLE_MASK, dl, MVT::v4i32, + DAG.getTargetConstant(V2Elt, MVT::i32), + DAG.getCopyFromReg(InitTempReg, dl, VReg, PtrVT)); + // Use shuffle mask in SHUFB synthetic instruction: + return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V2, V1, + ShufMaskOp); + } else if (rotate) { + int rotamt = (MaxElts - V0Elt) * EltVT.getSizeInBits()/8; + + return DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, V1.getValueType(), + V1, DAG.getConstant(rotamt, MVT::i16)); + } else { + // Convert the SHUFFLE_VECTOR mask's input element units to the + // actual bytes. + unsigned BytesPerElement = EltVT.getSizeInBits()/8; + + SmallVector ResultMask; + for (unsigned i = 0, e = MaxElts; i != e; ++i) { + unsigned SrcElt = SVN->getMaskElt(i) < 0 ? 0 : SVN->getMaskElt(i); + + for (unsigned j = 0; j < BytesPerElement; ++j) + ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,MVT::i8)); + } + + SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8, + &ResultMask[0], ResultMask.size()); + return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V1, V2, VPermMask); + } +} + +static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) { + SDValue Op0 = Op.getOperand(0); // Op0 = the scalar + DebugLoc dl = Op.getDebugLoc(); + + if (Op0.getNode()->getOpcode() == ISD::Constant) { + // For a constant, build the appropriate constant vector, which will + // eventually simplify to a vector register load. + + ConstantSDNode *CN = cast(Op0.getNode()); + SmallVector ConstVecValues; + MVT VT; + size_t n_copies; + + // Create a constant vector: + switch (Op.getValueType().getSimpleVT()) { + default: assert(0 && "Unexpected constant value type in " + "LowerSCALAR_TO_VECTOR"); + case MVT::v16i8: n_copies = 16; VT = MVT::i8; break; + case MVT::v8i16: n_copies = 8; VT = MVT::i16; break; + case MVT::v4i32: n_copies = 4; VT = MVT::i32; break; + case MVT::v4f32: n_copies = 4; VT = MVT::f32; break; + case MVT::v2i64: n_copies = 2; VT = MVT::i64; break; + case MVT::v2f64: n_copies = 2; VT = MVT::f64; break; + } + + SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT); + for (size_t j = 0; j < n_copies; ++j) + ConstVecValues.push_back(CValue); + + return DAG.getNode(ISD::BUILD_VECTOR, dl, Op.getValueType(), + &ConstVecValues[0], ConstVecValues.size()); + } else { + // Otherwise, copy the value from one register to another: + switch (Op0.getValueType().getSimpleVT()) { + default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR"); + case MVT::i8: + case MVT::i16: + case MVT::i32: + case MVT::i64: + case MVT::f32: + case MVT::f64: + return DAG.getNode(SPUISD::PREFSLOT2VEC, dl, Op.getValueType(), Op0, Op0); + } + } + + return SDValue(); +} + +static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { + MVT VT = Op.getValueType(); + SDValue N = Op.getOperand(0); + SDValue Elt = Op.getOperand(1); + DebugLoc dl = Op.getDebugLoc(); + SDValue retval; + + if (ConstantSDNode *C = dyn_cast(Elt)) { + // Constant argument: + int EltNo = (int) C->getZExtValue(); + + // sanity checks: + if (VT == MVT::i8 && EltNo >= 16) + assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15"); + else if (VT == MVT::i16 && EltNo >= 8) + assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7"); + else if (VT == MVT::i32 && EltNo >= 4) + assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4"); + else if (VT == MVT::i64 && EltNo >= 2) + assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2"); + + if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) { + // i32 and i64: Element 0 is the preferred slot + return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, N); + } + + // Need to generate shuffle mask and extract: + int prefslot_begin = -1, prefslot_end = -1; + int elt_byte = EltNo * VT.getSizeInBits() / 8; + + switch (VT.getSimpleVT()) { + default: + assert(false && "Invalid value type!"); + case MVT::i8: { + prefslot_begin = prefslot_end = 3; + break; + } + case MVT::i16: { + prefslot_begin = 2; prefslot_end = 3; + break; + } + case MVT::i32: + case MVT::f32: { + prefslot_begin = 0; prefslot_end = 3; + break; + } + case MVT::i64: + case MVT::f64: { + prefslot_begin = 0; prefslot_end = 7; + break; + } + } + + assert(prefslot_begin != -1 && prefslot_end != -1 && + "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized"); + + unsigned int ShufBytes[16]; + for (int i = 0; i < 16; ++i) { + // zero fill uppper part of preferred slot, don't care about the + // other slots: + unsigned int mask_val; + if (i <= prefslot_end) { + mask_val = + ((i < prefslot_begin) + ? 0x80 + : elt_byte + (i - prefslot_begin)); + + ShufBytes[i] = mask_val; + } else + ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)]; + } + + SDValue ShufMask[4]; + for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) { + unsigned bidx = i * 4; + unsigned int bits = ((ShufBytes[bidx] << 24) | + (ShufBytes[bidx+1] << 16) | + (ShufBytes[bidx+2] << 8) | + ShufBytes[bidx+3]); + ShufMask[i] = DAG.getConstant(bits, MVT::i32); + } + + SDValue ShufMaskVec = + DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + &ShufMask[0], sizeof(ShufMask)/sizeof(ShufMask[0])); + + retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, + DAG.getNode(SPUISD::SHUFB, dl, N.getValueType(), + N, N, ShufMaskVec)); + } else { + // Variable index: Rotate the requested element into slot 0, then replicate + // slot 0 across the vector + MVT VecVT = N.getValueType(); + if (!VecVT.isSimple() || !VecVT.isVector() || !VecVT.is128BitVector()) { + cerr << "LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit vector type!\n"; + abort(); + } + + // Make life easier by making sure the index is zero-extended to i32 + if (Elt.getValueType() != MVT::i32) + Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Elt); + + // Scale the index to a bit/byte shift quantity + APInt scaleFactor = + APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false); + unsigned scaleShift = scaleFactor.logBase2(); + SDValue vecShift; + + if (scaleShift > 0) { + // Scale the shift factor: + Elt = DAG.getNode(ISD::SHL, dl, MVT::i32, Elt, + DAG.getConstant(scaleShift, MVT::i32)); + } + + vecShift = DAG.getNode(SPUISD::SHLQUAD_L_BYTES, dl, VecVT, N, Elt); + + // Replicate the bytes starting at byte 0 across the entire vector (for + // consistency with the notion of a unified register set) + SDValue replicate; + + switch (VT.getSimpleVT()) { + default: + cerr << "LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector type\n"; + abort(); + /*NOTREACHED*/ + case MVT::i8: { + SDValue factor = DAG.getConstant(0x00000000, MVT::i32); + replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + factor, factor, factor, factor); + break; + } + case MVT::i16: { + SDValue factor = DAG.getConstant(0x00010001, MVT::i32); + replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + factor, factor, factor, factor); + break; + } + case MVT::i32: + case MVT::f32: { + SDValue factor = DAG.getConstant(0x00010203, MVT::i32); + replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + factor, factor, factor, factor); + break; + } + case MVT::i64: + case MVT::f64: { + SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32); + SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32); + replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + loFactor, hiFactor, loFactor, hiFactor); + break; + } + } + + retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, + DAG.getNode(SPUISD::SHUFB, dl, VecVT, + vecShift, vecShift, replicate)); + } + + return retval; +} + +static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { + SDValue VecOp = Op.getOperand(0); + SDValue ValOp = Op.getOperand(1); + SDValue IdxOp = Op.getOperand(2); + DebugLoc dl = Op.getDebugLoc(); + MVT VT = Op.getValueType(); + + ConstantSDNode *CN = cast(IdxOp); + assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!"); + + MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + // Use $sp ($1) because it's always 16-byte aligned and it's available: + SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, + DAG.getRegister(SPU::R1, PtrVT), + DAG.getConstant(CN->getSExtValue(), PtrVT)); + SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, VT, Pointer); + + SDValue result = + DAG.getNode(SPUISD::SHUFB, dl, VT, + DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, ValOp), + VecOp, + DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4i32, ShufMask)); + + return result; +} + +static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc, + const TargetLowering &TLI) +{ + SDValue N0 = Op.getOperand(0); // Everything has at least one operand + DebugLoc dl = Op.getDebugLoc(); + MVT ShiftVT = TLI.getShiftAmountTy(); + + assert(Op.getValueType() == MVT::i8); + switch (Opc) { + default: + assert(0 && "Unhandled i8 math operator"); + /*NOTREACHED*/ + break; + case ISD::ADD: { + // 8-bit addition: Promote the arguments up to 16-bits and truncate + // the result: + SDValue N1 = Op.getOperand(1); + N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0); + N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1); + return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, + DAG.getNode(Opc, dl, MVT::i16, N0, N1)); + + } + + case ISD::SUB: { + // 8-bit subtraction: Promote the arguments up to 16-bits and truncate + // the result: + SDValue N1 = Op.getOperand(1); + N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0); + N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1); + return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, + DAG.getNode(Opc, dl, MVT::i16, N0, N1)); + } + case ISD::ROTR: + case ISD::ROTL: { + SDValue N1 = Op.getOperand(1); + MVT N1VT = N1.getValueType(); + + N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0); + if (!N1VT.bitsEq(ShiftVT)) { + unsigned N1Opc = N1.getValueType().bitsLT(ShiftVT) + ? ISD::ZERO_EXTEND + : ISD::TRUNCATE; + N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1); + } + + // Replicate lower 8-bits into upper 8: + SDValue ExpandArg = + DAG.getNode(ISD::OR, dl, MVT::i16, N0, + DAG.getNode(ISD::SHL, dl, MVT::i16, + N0, DAG.getConstant(8, MVT::i32))); + + // Truncate back down to i8 + return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, + DAG.getNode(Opc, dl, MVT::i16, ExpandArg, N1)); + } + case ISD::SRL: + case ISD::SHL: { + SDValue N1 = Op.getOperand(1); + MVT N1VT = N1.getValueType(); + + N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0); + if (!N1VT.bitsEq(ShiftVT)) { + unsigned N1Opc = ISD::ZERO_EXTEND; + + if (N1.getValueType().bitsGT(ShiftVT)) + N1Opc = ISD::TRUNCATE; + + N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1); + } + + return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, + DAG.getNode(Opc, dl, MVT::i16, N0, N1)); + } + case ISD::SRA: { + SDValue N1 = Op.getOperand(1); + MVT N1VT = N1.getValueType(); + + N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0); + if (!N1VT.bitsEq(ShiftVT)) { + unsigned N1Opc = ISD::SIGN_EXTEND; + + if (N1VT.bitsGT(ShiftVT)) + N1Opc = ISD::TRUNCATE; + N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1); + } + + return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, + DAG.getNode(Opc, dl, MVT::i16, N0, N1)); + } + case ISD::MUL: { + SDValue N1 = Op.getOperand(1); + + N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0); + N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1); + return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, + DAG.getNode(Opc, dl, MVT::i16, N0, N1)); + break; + } + } + + return SDValue(); +} + +//! Lower byte immediate operations for v16i8 vectors: +static SDValue +LowerByteImmed(SDValue Op, SelectionDAG &DAG) { + SDValue ConstVec; + SDValue Arg; + MVT VT = Op.getValueType(); + DebugLoc dl = Op.getDebugLoc(); + + ConstVec = Op.getOperand(0); + Arg = Op.getOperand(1); + if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) { + if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) { + ConstVec = ConstVec.getOperand(0); + } else { + ConstVec = Op.getOperand(1); + Arg = Op.getOperand(0); + if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) { + ConstVec = ConstVec.getOperand(0); + } + } + } + + if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) { + BuildVectorSDNode *BCN = dyn_cast(ConstVec.getNode()); + assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerByteImmed"); + + APInt APSplatBits, APSplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + unsigned minSplatBits = VT.getVectorElementType().getSizeInBits(); + + if (BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, + HasAnyUndefs, minSplatBits) + && minSplatBits <= SplatBitSize) { + uint64_t SplatBits = APSplatBits.getZExtValue(); + SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8); + + SmallVector tcVec; + tcVec.assign(16, tc); + return DAG.getNode(Op.getNode()->getOpcode(), dl, VT, Arg, + DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &tcVec[0], tcVec.size())); + } + } + + // These operations (AND, OR, XOR) are legal, they just couldn't be custom + // lowered. Return the operation, rather than a null SDValue. + return Op; +} + +//! Custom lowering for CTPOP (count population) +/*! + Custom lowering code that counts the number ones in the input + operand. SPU has such an instruction, but it counts the number of + ones per byte, which then have to be accumulated. +*/ +static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) { + MVT VT = Op.getValueType(); + MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits())); + DebugLoc dl = Op.getDebugLoc(); + + switch (VT.getSimpleVT()) { + default: + assert(false && "Invalid value type!"); + case MVT::i8: { + SDValue N = Op.getOperand(0); + SDValue Elt0 = DAG.getConstant(0, MVT::i32); + + SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N); + SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote); + + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, CNTB, Elt0); + } + + case MVT::i16: { + MachineFunction &MF = DAG.getMachineFunction(); + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + + unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass); + + SDValue N = Op.getOperand(0); + SDValue Elt0 = DAG.getConstant(0, MVT::i16); + SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16); + SDValue Shift1 = DAG.getConstant(8, MVT::i32); + + SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N); + SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote); + + // CNTB_result becomes the chain to which all of the virtual registers + // CNTB_reg, SUM1_reg become associated: + SDValue CNTB_result = + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, CNTB, Elt0); + + SDValue CNTB_rescopy = + DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result); + + SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i16); + + return DAG.getNode(ISD::AND, dl, MVT::i16, + DAG.getNode(ISD::ADD, dl, MVT::i16, + DAG.getNode(ISD::SRL, dl, MVT::i16, + Tmp1, Shift1), + Tmp1), + Mask0); + } + + case MVT::i32: { + MachineFunction &MF = DAG.getMachineFunction(); + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + + unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass); + unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass); + + SDValue N = Op.getOperand(0); + SDValue Elt0 = DAG.getConstant(0, MVT::i32); + SDValue Mask0 = DAG.getConstant(0xff, MVT::i32); + SDValue Shift1 = DAG.getConstant(16, MVT::i32); + SDValue Shift2 = DAG.getConstant(8, MVT::i32); + + SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N); + SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote); + + // CNTB_result becomes the chain to which all of the virtual registers + // CNTB_reg, SUM1_reg become associated: + SDValue CNTB_result = + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, CNTB, Elt0); + + SDValue CNTB_rescopy = + DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result); + + SDValue Comp1 = + DAG.getNode(ISD::SRL, dl, MVT::i32, + DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32), + Shift1); + + SDValue Sum1 = + DAG.getNode(ISD::ADD, dl, MVT::i32, Comp1, + DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32)); + + SDValue Sum1_rescopy = + DAG.getCopyToReg(CNTB_result, dl, SUM1_reg, Sum1); + + SDValue Comp2 = + DAG.getNode(ISD::SRL, dl, MVT::i32, + DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32), + Shift2); + SDValue Sum2 = + DAG.getNode(ISD::ADD, dl, MVT::i32, Comp2, + DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32)); + + return DAG.getNode(ISD::AND, dl, MVT::i32, Sum2, Mask0); + } + + case MVT::i64: + break; + } + + return SDValue(); +} + +//! Lower ISD::FP_TO_SINT, ISD::FP_TO_UINT for i32 +/*! + f32->i32 passes through unchanged, whereas f64->i32 expands to a libcall. + All conversions to i64 are expanded to a libcall. + */ +static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, + SPUTargetLowering &TLI) { + MVT OpVT = Op.getValueType(); + SDValue Op0 = Op.getOperand(0); + MVT Op0VT = Op0.getValueType(); + + if ((OpVT == MVT::i32 && Op0VT == MVT::f64) + || OpVT == MVT::i64) { + // Convert f32 / f64 to i32 / i64 via libcall. + RTLIB::Libcall LC = + (Op.getOpcode() == ISD::FP_TO_SINT) + ? RTLIB::getFPTOSINT(Op0VT, OpVT) + : RTLIB::getFPTOUINT(Op0VT, OpVT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd fp-to-int conversion!"); + SDValue Dummy; + return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI); + } + + return Op; +} + +//! Lower ISD::SINT_TO_FP, ISD::UINT_TO_FP for i32 +/*! + i32->f32 passes through unchanged, whereas i32->f64 is expanded to a libcall. + All conversions from i64 are expanded to a libcall. + */ +static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG, + SPUTargetLowering &TLI) { + MVT OpVT = Op.getValueType(); + SDValue Op0 = Op.getOperand(0); + MVT Op0VT = Op0.getValueType(); + + if ((OpVT == MVT::f64 && Op0VT == MVT::i32) + || Op0VT == MVT::i64) { + // Convert i32, i64 to f64 via libcall: + RTLIB::Libcall LC = + (Op.getOpcode() == ISD::SINT_TO_FP) + ? RTLIB::getSINTTOFP(Op0VT, OpVT) + : RTLIB::getUINTTOFP(Op0VT, OpVT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd int-to-fp conversion!"); + SDValue Dummy; + return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI); + } + + return Op; +} + +//! Lower ISD::SETCC +/*! + This handles MVT::f64 (double floating point) condition lowering + */ +static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG, + const TargetLowering &TLI) { + CondCodeSDNode *CC = dyn_cast(Op.getOperand(2)); + DebugLoc dl = Op.getDebugLoc(); + assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n"); + + SDValue lhs = Op.getOperand(0); + SDValue rhs = Op.getOperand(1); + MVT lhsVT = lhs.getValueType(); + assert(lhsVT == MVT::f64 && "LowerSETCC: type other than MVT::64\n"); + + MVT ccResultVT = TLI.getSetCCResultType(lhs.getValueType()); + APInt ccResultOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits()); + MVT IntVT(MVT::i64); + + // Take advantage of the fact that (truncate (sra arg, 32)) is efficiently + // selected to a NOP: + SDValue i64lhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, lhs); + SDValue lhsHi32 = + DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, + DAG.getNode(ISD::SRL, dl, IntVT, + i64lhs, DAG.getConstant(32, MVT::i32))); + SDValue lhsHi32abs = + DAG.getNode(ISD::AND, dl, MVT::i32, + lhsHi32, DAG.getConstant(0x7fffffff, MVT::i32)); + SDValue lhsLo32 = + DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, i64lhs); + + // SETO and SETUO only use the lhs operand: + if (CC->get() == ISD::SETO) { + // Evaluates to true if Op0 is not [SQ]NaN - lowers to the inverse of + // SETUO + APInt ccResultAllOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits()); + return DAG.getNode(ISD::XOR, dl, ccResultVT, + DAG.getSetCC(dl, ccResultVT, + lhs, DAG.getConstantFP(0.0, lhsVT), + ISD::SETUO), + DAG.getConstant(ccResultAllOnes, ccResultVT)); + } else if (CC->get() == ISD::SETUO) { + // Evaluates to true if Op0 is [SQ]NaN + return DAG.getNode(ISD::AND, dl, ccResultVT, + DAG.getSetCC(dl, ccResultVT, + lhsHi32abs, + DAG.getConstant(0x7ff00000, MVT::i32), + ISD::SETGE), + DAG.getSetCC(dl, ccResultVT, + lhsLo32, + DAG.getConstant(0, MVT::i32), + ISD::SETGT)); + } + + SDValue i64rhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, rhs); + SDValue rhsHi32 = + DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, + DAG.getNode(ISD::SRL, dl, IntVT, + i64rhs, DAG.getConstant(32, MVT::i32))); + + // If a value is negative, subtract from the sign magnitude constant: + SDValue signMag2TC = DAG.getConstant(0x8000000000000000ULL, IntVT); + + // Convert the sign-magnitude representation into 2's complement: + SDValue lhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT, + lhsHi32, DAG.getConstant(31, MVT::i32)); + SDValue lhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64lhs); + SDValue lhsSelect = + DAG.getNode(ISD::SELECT, dl, IntVT, + lhsSelectMask, lhsSignMag2TC, i64lhs); + + SDValue rhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT, + rhsHi32, DAG.getConstant(31, MVT::i32)); + SDValue rhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64rhs); + SDValue rhsSelect = + DAG.getNode(ISD::SELECT, dl, IntVT, + rhsSelectMask, rhsSignMag2TC, i64rhs); + + unsigned compareOp; + + switch (CC->get()) { + case ISD::SETOEQ: + case ISD::SETUEQ: + compareOp = ISD::SETEQ; break; + case ISD::SETOGT: + case ISD::SETUGT: + compareOp = ISD::SETGT; break; + case ISD::SETOGE: + case ISD::SETUGE: + compareOp = ISD::SETGE; break; + case ISD::SETOLT: + case ISD::SETULT: + compareOp = ISD::SETLT; break; + case ISD::SETOLE: + case ISD::SETULE: + compareOp = ISD::SETLE; break; + case ISD::SETUNE: + case ISD::SETONE: + compareOp = ISD::SETNE; break; + default: + cerr << "CellSPU ISel Select: unimplemented f64 condition\n"; + abort(); + break; + } + + SDValue result = + DAG.getSetCC(dl, ccResultVT, lhsSelect, rhsSelect, + (ISD::CondCode) compareOp); + + if ((CC->get() & 0x8) == 0) { + // Ordered comparison: + SDValue lhsNaN = DAG.getSetCC(dl, ccResultVT, + lhs, DAG.getConstantFP(0.0, MVT::f64), + ISD::SETO); + SDValue rhsNaN = DAG.getSetCC(dl, ccResultVT, + rhs, DAG.getConstantFP(0.0, MVT::f64), + ISD::SETO); + SDValue ordered = DAG.getNode(ISD::AND, dl, ccResultVT, lhsNaN, rhsNaN); + + result = DAG.getNode(ISD::AND, dl, ccResultVT, ordered, result); + } + + return result; +} + +//! Lower ISD::SELECT_CC +/*! + ISD::SELECT_CC can (generally) be implemented directly on the SPU using the + SELB instruction. + + \note Need to revisit this in the future: if the code path through the true + and false value computations is longer than the latency of a branch (6 + cycles), then it would be more advantageous to branch and insert a new basic + block and branch on the condition. However, this code does not make that + assumption, given the simplisitc uses so far. + */ + +static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG, + const TargetLowering &TLI) { + MVT VT = Op.getValueType(); + SDValue lhs = Op.getOperand(0); + SDValue rhs = Op.getOperand(1); + SDValue trueval = Op.getOperand(2); + SDValue falseval = Op.getOperand(3); + SDValue condition = Op.getOperand(4); + DebugLoc dl = Op.getDebugLoc(); + + // NOTE: SELB's arguments: $rA, $rB, $mask + // + // SELB selects bits from $rA where bits in $mask are 0, bits from $rB + // where bits in $mask are 1. CCond will be inverted, having 1s where the + // condition was true and 0s where the condition was false. Hence, the + // arguments to SELB get reversed. + + // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's + // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up + // with another "cannot select select_cc" assert: + + SDValue compare = DAG.getNode(ISD::SETCC, dl, + TLI.getSetCCResultType(Op.getValueType()), + lhs, rhs, condition); + return DAG.getNode(SPUISD::SELB, dl, VT, falseval, trueval, compare); +} + +//! Custom lower ISD::TRUNCATE +static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) +{ + // Type to truncate to + MVT VT = Op.getValueType(); + MVT::SimpleValueType simpleVT = VT.getSimpleVT(); + MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits())); + DebugLoc dl = Op.getDebugLoc(); + + // Type to truncate from + SDValue Op0 = Op.getOperand(0); + MVT Op0VT = Op0.getValueType(); + + if (Op0VT.getSimpleVT() == MVT::i128 && simpleVT == MVT::i64) { + // Create shuffle mask, least significant doubleword of quadword + unsigned maskHigh = 0x08090a0b; + unsigned maskLow = 0x0c0d0e0f; + // Use a shuffle to perform the truncation + SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + DAG.getConstant(maskHigh, MVT::i32), + DAG.getConstant(maskLow, MVT::i32), + DAG.getConstant(maskHigh, MVT::i32), + DAG.getConstant(maskLow, MVT::i32)); + + SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, dl, VecVT, + Op0, Op0, shufMask); + + return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, truncShuffle); + } + + return SDValue(); // Leave the truncate unmolested +} + +//! Custom (target-specific) lowering entry point +/*! + This is where LLVM's DAG selection process calls to do target-specific + lowering of nodes. + */ +SDValue +SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) +{ + unsigned Opc = (unsigned) Op.getOpcode(); + MVT VT = Op.getValueType(); + + switch (Opc) { + default: { + cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n"; + cerr << "Op.getOpcode() = " << Opc << "\n"; + cerr << "*Op.getNode():\n"; + Op.getNode()->dump(); + abort(); + } + case ISD::LOAD: + case ISD::EXTLOAD: + case ISD::SEXTLOAD: + case ISD::ZEXTLOAD: + return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl()); + case ISD::STORE: + return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl()); + case ISD::ConstantPool: + return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl()); + case ISD::GlobalAddress: + return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl()); + case ISD::JumpTable: + return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl()); + case ISD::ConstantFP: + return LowerConstantFP(Op, DAG); + case ISD::FORMAL_ARGUMENTS: + return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex); + case ISD::CALL: + return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl()); + case ISD::RET: + return LowerRET(Op, DAG, getTargetMachine()); + + // i8, i64 math ops: + case ISD::ADD: + case ISD::SUB: + case ISD::ROTR: + case ISD::ROTL: + case ISD::SRL: + case ISD::SHL: + case ISD::SRA: { + if (VT == MVT::i8) + return LowerI8Math(Op, DAG, Opc, *this); + break; + } + + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + return LowerFP_TO_INT(Op, DAG, *this); + + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: + return LowerINT_TO_FP(Op, DAG, *this); + + // Vector-related lowering. + case ISD::BUILD_VECTOR: + return LowerBUILD_VECTOR(Op, DAG); + case ISD::SCALAR_TO_VECTOR: + return LowerSCALAR_TO_VECTOR(Op, DAG); + case ISD::VECTOR_SHUFFLE: + return LowerVECTOR_SHUFFLE(Op, DAG); + case ISD::EXTRACT_VECTOR_ELT: + return LowerEXTRACT_VECTOR_ELT(Op, DAG); + case ISD::INSERT_VECTOR_ELT: + return LowerINSERT_VECTOR_ELT(Op, DAG); + + // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately: + case ISD::AND: + case ISD::OR: + case ISD::XOR: + return LowerByteImmed(Op, DAG); + + // Vector and i8 multiply: + case ISD::MUL: + if (VT == MVT::i8) + return LowerI8Math(Op, DAG, Opc, *this); + + case ISD::CTPOP: + return LowerCTPOP(Op, DAG); + + case ISD::SELECT_CC: + return LowerSELECT_CC(Op, DAG, *this); + + case ISD::SETCC: + return LowerSETCC(Op, DAG, *this); + + case ISD::TRUNCATE: + return LowerTRUNCATE(Op, DAG); + } + + return SDValue(); +} + +void SPUTargetLowering::ReplaceNodeResults(SDNode *N, + SmallVectorImpl&Results, + SelectionDAG &DAG) +{ +#if 0 + unsigned Opc = (unsigned) N->getOpcode(); + MVT OpVT = N->getValueType(0); + + switch (Opc) { + default: { + cerr << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n"; + cerr << "Op.getOpcode() = " << Opc << "\n"; + cerr << "*Op.getNode():\n"; + N->dump(); + abort(); + /*NOTREACHED*/ + } + } +#endif + + /* Otherwise, return unchanged */ +} + +//===----------------------------------------------------------------------===// +// Target Optimization Hooks +//===----------------------------------------------------------------------===// + +SDValue +SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const +{ +#if 0 + TargetMachine &TM = getTargetMachine(); +#endif + const SPUSubtarget *ST = SPUTM.getSubtargetImpl(); + SelectionDAG &DAG = DCI.DAG; + SDValue Op0 = N->getOperand(0); // everything has at least one operand + MVT NodeVT = N->getValueType(0); // The node's value type + MVT Op0VT = Op0.getValueType(); // The first operand's result + SDValue Result; // Initially, empty result + DebugLoc dl = N->getDebugLoc(); + + switch (N->getOpcode()) { + default: break; + case ISD::ADD: { + SDValue Op1 = N->getOperand(1); + + if (Op0.getOpcode() == SPUISD::IndirectAddr + || Op1.getOpcode() == SPUISD::IndirectAddr) { + // Normalize the operands to reduce repeated code + SDValue IndirectArg = Op0, AddArg = Op1; + + if (Op1.getOpcode() == SPUISD::IndirectAddr) { + IndirectArg = Op1; + AddArg = Op0; + } + + if (isa(AddArg)) { + ConstantSDNode *CN0 = cast (AddArg); + SDValue IndOp1 = IndirectArg.getOperand(1); + + if (CN0->isNullValue()) { + // (add (SPUindirect , ), 0) -> + // (SPUindirect , ) + +#if !defined(NDEBUG) + if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { + cerr << "\n" + << "Replace: (add (SPUindirect , ), 0)\n" + << "With: (SPUindirect , )\n"; + } +#endif + + return IndirectArg; + } else if (isa(IndOp1)) { + // (add (SPUindirect , ), ) -> + // (SPUindirect , ) + ConstantSDNode *CN1 = cast (IndOp1); + int64_t combinedConst = CN0->getSExtValue() + CN1->getSExtValue(); + SDValue combinedValue = DAG.getConstant(combinedConst, Op0VT); + +#if !defined(NDEBUG) + if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { + cerr << "\n" + << "Replace: (add (SPUindirect , " << CN1->getSExtValue() + << "), " << CN0->getSExtValue() << ")\n" + << "With: (SPUindirect , " + << combinedConst << ")\n"; + } +#endif + + return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT, + IndirectArg, combinedValue); + } + } + } + break; + } + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + case ISD::ANY_EXTEND: { + if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) { + // (any_extend (SPUextract_elt0 )) -> + // (SPUextract_elt0 ) + // Types must match, however... +#if !defined(NDEBUG) + if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { + cerr << "\nReplace: "; + N->dump(&DAG); + cerr << "\nWith: "; + Op0.getNode()->dump(&DAG); + cerr << "\n"; + } +#endif + + return Op0; + } + break; + } + case SPUISD::IndirectAddr: { + if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) { + ConstantSDNode *CN = dyn_cast(N->getOperand(1)); + if (CN != 0 && CN->getZExtValue() == 0) { + // (SPUindirect (SPUaform , 0), 0) -> + // (SPUaform , 0) + + DEBUG(cerr << "Replace: "); + DEBUG(N->dump(&DAG)); + DEBUG(cerr << "\nWith: "); + DEBUG(Op0.getNode()->dump(&DAG)); + DEBUG(cerr << "\n"); + + return Op0; + } + } else if (Op0.getOpcode() == ISD::ADD) { + SDValue Op1 = N->getOperand(1); + if (ConstantSDNode *CN1 = dyn_cast(Op1)) { + // (SPUindirect (add , ), 0) -> + // (SPUindirect , ) + if (CN1->isNullValue()) { + +#if !defined(NDEBUG) + if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { + cerr << "\n" + << "Replace: (SPUindirect (add , ), 0)\n" + << "With: (SPUindirect , )\n"; + } +#endif + + return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT, + Op0.getOperand(0), Op0.getOperand(1)); + } + } + } + break; + } + case SPUISD::SHLQUAD_L_BITS: + case SPUISD::SHLQUAD_L_BYTES: + case SPUISD::VEC_SHL: + case SPUISD::VEC_SRL: + case SPUISD::VEC_SRA: + case SPUISD::ROTBYTES_LEFT: { + SDValue Op1 = N->getOperand(1); + + // Kill degenerate vector shifts: + if (ConstantSDNode *CN = dyn_cast(Op1)) { + if (CN->isNullValue()) { + Result = Op0; + } + } + break; + } + case SPUISD::PREFSLOT2VEC: { + switch (Op0.getOpcode()) { + default: + break; + case ISD::ANY_EXTEND: + case ISD::ZERO_EXTEND: + case ISD::SIGN_EXTEND: { + // (SPUprefslot2vec (any|zero|sign_extend (SPUvec2prefslot ))) -> + // + // but only if the SPUprefslot2vec and types match. + SDValue Op00 = Op0.getOperand(0); + if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) { + SDValue Op000 = Op00.getOperand(0); + if (Op000.getValueType() == NodeVT) { + Result = Op000; + } + } + break; + } + case SPUISD::VEC2PREFSLOT: { + // (SPUprefslot2vec (SPUvec2prefslot )) -> + // + Result = Op0.getOperand(0); + break; + } + } + break; + } + } + + // Otherwise, return unchanged. +#ifndef NDEBUG + if (Result.getNode()) { + DEBUG(cerr << "\nReplace.SPU: "); + DEBUG(N->dump(&DAG)); + DEBUG(cerr << "\nWith: "); + DEBUG(Result.getNode()->dump(&DAG)); + DEBUG(cerr << "\n"); + } +#endif + + return Result; +} + +//===----------------------------------------------------------------------===// +// Inline Assembly Support +//===----------------------------------------------------------------------===// + +/// getConstraintType - Given a constraint letter, return the type of +/// constraint it is for this target. +SPUTargetLowering::ConstraintType +SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const { + if (ConstraintLetter.size() == 1) { + switch (ConstraintLetter[0]) { + default: break; + case 'b': + case 'r': + case 'f': + case 'v': + case 'y': + return C_RegisterClass; + } + } + return TargetLowering::getConstraintType(ConstraintLetter); +} + +std::pair +SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, + MVT VT) const +{ + if (Constraint.size() == 1) { + // GCC RS6000 Constraint Letters + switch (Constraint[0]) { + case 'b': // R1-R31 + case 'r': // R0-R31 + if (VT == MVT::i64) + return std::make_pair(0U, SPU::R64CRegisterClass); + return std::make_pair(0U, SPU::R32CRegisterClass); + case 'f': + if (VT == MVT::f32) + return std::make_pair(0U, SPU::R32FPRegisterClass); + else if (VT == MVT::f64) + return std::make_pair(0U, SPU::R64FPRegisterClass); + break; + case 'v': + return std::make_pair(0U, SPU::GPRCRegisterClass); + } + } + + return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); +} + +//! Compute used/known bits for a SPU operand +void +SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, + const APInt &Mask, + APInt &KnownZero, + APInt &KnownOne, + const SelectionDAG &DAG, + unsigned Depth ) const { +#if 0 + const uint64_t uint64_sizebits = sizeof(uint64_t) * CHAR_BIT; + + switch (Op.getOpcode()) { + default: + // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); + break; + case CALL: + case SHUFB: + case SHUFFLE_MASK: + case CNTB: + case SPUISD::PREFSLOT2VEC: + case SPUISD::LDRESULT: + case SPUISD::VEC2PREFSLOT: + case SPUISD::SHLQUAD_L_BITS: + case SPUISD::SHLQUAD_L_BYTES: + case SPUISD::VEC_SHL: + case SPUISD::VEC_SRL: + case SPUISD::VEC_SRA: + case SPUISD::VEC_ROTL: + case SPUISD::VEC_ROTR: + case SPUISD::ROTBYTES_LEFT: + case SPUISD::SELECT_MASK: + case SPUISD::SELB: + } +#endif +} + +unsigned +SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op, + unsigned Depth) const { + switch (Op.getOpcode()) { + default: + return 1; + + case ISD::SETCC: { + MVT VT = Op.getValueType(); + + if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32) { + VT = MVT::i32; + } + return VT.getSizeInBits(); + } + } +} + +// LowerAsmOperandForConstraint +void +SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op, + char ConstraintLetter, + bool hasMemory, + std::vector &Ops, + SelectionDAG &DAG) const { + // Default, for the time being, to the base class handler + TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, hasMemory, + Ops, DAG); +} + +/// isLegalAddressImmediate - Return true if the integer value can be used +/// as the offset of the target addressing mode. +bool SPUTargetLowering::isLegalAddressImmediate(int64_t V, + const Type *Ty) const { + // SPU's addresses are 256K: + return (V > -(1 << 18) && V < (1 << 18) - 1); +} + +bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const { + return false; +} + +bool +SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { + // The SPU target isn't yet aware of offsets. + return false; +} diff --git a/lib/Target/CellSPU/SPUISelLowering.h b/lib/Target/CellSPU/SPUISelLowering.h new file mode 100644 index 000000000000..866c632d527a --- /dev/null +++ b/lib/Target/CellSPU/SPUISelLowering.h @@ -0,0 +1,154 @@ +//===-- SPUISelLowering.h - Cell SPU DAG Lowering Interface -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interfaces that Cell SPU uses to lower LLVM code into +// a selection DAG. +// +//===----------------------------------------------------------------------===// + +#ifndef SPU_ISELLOWERING_H +#define SPU_ISELLOWERING_H + +#include "llvm/Target/TargetLowering.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "SPU.h" + +namespace llvm { + namespace SPUISD { + enum NodeType { + // Start the numbering where the builting ops and target ops leave off. + FIRST_NUMBER = ISD::BUILTIN_OP_END, + + // Pseudo instructions: + RET_FLAG, ///< Return with flag, matched by bi instruction + + Hi, ///< High address component (upper 16) + Lo, ///< Low address component (lower 16) + PCRelAddr, ///< Program counter relative address + AFormAddr, ///< A-form address (local store) + IndirectAddr, ///< D-Form "imm($r)" and X-form "$r($r)" + + LDRESULT, ///< Load result (value, chain) + CALL, ///< CALL instruction + SHUFB, ///< Vector shuffle (permute) + SHUFFLE_MASK, ///< Shuffle mask + CNTB, ///< Count leading ones in bytes + PREFSLOT2VEC, ///< Promote scalar->vector + VEC2PREFSLOT, ///< Extract element 0 + SHLQUAD_L_BITS, ///< Rotate quad left, by bits + SHLQUAD_L_BYTES, ///< Rotate quad left, by bytes + VEC_SHL, ///< Vector shift left + VEC_SRL, ///< Vector shift right (logical) + VEC_SRA, ///< Vector shift right (arithmetic) + VEC_ROTL, ///< Vector rotate left + VEC_ROTR, ///< Vector rotate right + ROTBYTES_LEFT, ///< Rotate bytes (loads -> ROTQBYI) + ROTBYTES_LEFT_BITS, ///< Rotate bytes left by bit shift count + SELECT_MASK, ///< Select Mask (FSM, FSMB, FSMH, FSMBI) + SELB, ///< Select bits -> (b & mask) | (a & ~mask) + // Markers: These aren't used to generate target-dependent nodes, but + // are used during instruction selection. + ADD64_MARKER, ///< i64 addition marker + SUB64_MARKER, ///< i64 subtraction marker + MUL64_MARKER, ///< i64 multiply marker + LAST_SPUISD ///< Last user-defined instruction + }; + } + + //! Utility functions specific to CellSPU: + namespace SPU { + SDValue get_vec_u18imm(SDNode *N, SelectionDAG &DAG, + MVT ValueType); + SDValue get_vec_i16imm(SDNode *N, SelectionDAG &DAG, + MVT ValueType); + SDValue get_vec_i10imm(SDNode *N, SelectionDAG &DAG, + MVT ValueType); + SDValue get_vec_i8imm(SDNode *N, SelectionDAG &DAG, + MVT ValueType); + SDValue get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG, + MVT ValueType); + SDValue get_v4i32_imm(SDNode *N, SelectionDAG &DAG); + SDValue get_v2i64_imm(SDNode *N, SelectionDAG &DAG); + + SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG, + const SPUTargetMachine &TM); + //! Simplify a MVT::v2i64 constant splat to CellSPU-ready form + SDValue LowerV2I64Splat(MVT OpVT, SelectionDAG &DAG, uint64_t splat, + DebugLoc dl); + } + + class SPUTargetMachine; // forward dec'l. + + class SPUTargetLowering : + public TargetLowering + { + int VarArgsFrameIndex; // FrameIndex for start of varargs area. + int ReturnAddrIndex; // FrameIndex for return slot. + SPUTargetMachine &SPUTM; + + public: + //! The venerable constructor + /*! + This is where the CellSPU backend sets operation handling (i.e., legal, + custom, expand or promote.) + */ + SPUTargetLowering(SPUTargetMachine &TM); + + //! Get the target machine + SPUTargetMachine &getSPUTargetMachine() { + return SPUTM; + } + + /// getTargetNodeName() - This method returns the name of a target specific + /// DAG node. + virtual const char *getTargetNodeName(unsigned Opcode) const; + + /// getSetCCResultType - Return the ValueType for ISD::SETCC + virtual MVT getSetCCResultType(MVT VT) const; + + //! Custom lowering hooks + virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG); + + //! Custom lowering hook for nodes with illegal result types. + virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl&Results, + SelectionDAG &DAG); + + virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; + + virtual void computeMaskedBitsForTargetNode(const SDValue Op, + const APInt &Mask, + APInt &KnownZero, + APInt &KnownOne, + const SelectionDAG &DAG, + unsigned Depth = 0) const; + + virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op, + unsigned Depth = 0) const; + + ConstraintType getConstraintType(const std::string &ConstraintLetter) const; + + std::pair + getRegForInlineAsmConstraint(const std::string &Constraint, + MVT VT) const; + + void LowerAsmOperandForConstraint(SDValue Op, char ConstraintLetter, + bool hasMemory, + std::vector &Ops, + SelectionDAG &DAG) const; + + /// isLegalAddressImmediate - Return true if the integer value can be used + /// as the offset of the target addressing mode. + virtual bool isLegalAddressImmediate(int64_t V, const Type *Ty) const; + virtual bool isLegalAddressImmediate(GlobalValue *) const; + + virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; + }; +} + +#endif diff --git a/lib/Target/CellSPU/SPUInstrBuilder.h b/lib/Target/CellSPU/SPUInstrBuilder.h new file mode 100644 index 000000000000..5e268f8767c2 --- /dev/null +++ b/lib/Target/CellSPU/SPUInstrBuilder.h @@ -0,0 +1,43 @@ +//==-- SPUInstrBuilder.h - Aides for building Cell SPU insts -----*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file exposes functions that may be used with BuildMI from the +// MachineInstrBuilder.h file to simplify generating frame and constant pool +// references. +// +// For reference, the order of operands for memory references is: +// (Operand), Dest Reg, Base Reg, and either Reg Index or Immediate +// Displacement. +// +//===----------------------------------------------------------------------===// + +#ifndef SPU_INSTRBUILDER_H +#define SPU_INSTRBUILDER_H + +#include "llvm/CodeGen/MachineInstrBuilder.h" + +namespace llvm { + +/// addFrameReference - This function is used to add a reference to the base of +/// an abstract object on the stack frame of the current function. This +/// reference has base register as the FrameIndex offset until it is resolved. +/// This allows a constant offset to be specified as well... +/// +inline const MachineInstrBuilder& +addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0, + bool mem = true) { + if (mem) + return MIB.addImm(Offset).addFrameIndex(FI); + else + return MIB.addFrameIndex(FI).addImm(Offset); +} + +} // End llvm namespace + +#endif diff --git a/lib/Target/CellSPU/SPUInstrFormats.td b/lib/Target/CellSPU/SPUInstrFormats.td new file mode 100644 index 000000000000..21bc275209c6 --- /dev/null +++ b/lib/Target/CellSPU/SPUInstrFormats.td @@ -0,0 +1,298 @@ +//==== SPUInstrFormats.td - Cell SPU Instruction Formats ---*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// +// Cell SPU instruction formats. Note that these are notationally similar to +// PowerPC, like "A-Form". But the sizes of operands and fields differ. + +// This was kiped from the PPC instruction formats (seemed like a good idea...) + +class SPUInstr + : Instruction { + field bits<32> Inst; + + let Namespace = "SPU"; + let OutOperandList = OOL; + let InOperandList = IOL; + let AsmString = asmstr; + let Itinerary = itin; +} + +// RR Format +class RRForm opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : SPUInstr { + bits<7> RA; + bits<7> RB; + bits<7> RT; + + let Pattern = pattern; + + let Inst{0-10} = opcode; + let Inst{11-17} = RB; + let Inst{18-24} = RA; + let Inst{25-31} = RT; +} + +let RB = 0 in { + // RR Format, where RB is zeroed (dont care): + class RRForm_1 opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : RRForm + { } + + let RA = 0 in { + // RR Format, where RA and RB are zeroed (dont care): + // Used for reads from status control registers (see FPSCRRr32) + class RRForm_2 opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : RRForm + { } + } +} + +let RT = 0 in { + // RR Format, where RT is zeroed (don't care), or as the instruction handbook + // says, "RT is a false target." Used in "Halt if" instructions + class RRForm_3 opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : RRForm + { } +} + +// RRR Format +class RRRForm opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : SPUInstr +{ + bits<7> RA; + bits<7> RB; + bits<7> RC; + bits<7> RT; + + let Pattern = pattern; + + let Inst{0-3} = opcode; + let Inst{4-10} = RT; + let Inst{11-17} = RB; + let Inst{18-24} = RA; + let Inst{25-31} = RC; +} + +// RI7 Format +class RI7Form opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : SPUInstr +{ + bits<7> i7; + bits<7> RA; + bits<7> RT; + + let Pattern = pattern; + + let Inst{0-10} = opcode; + let Inst{11-17} = i7; + let Inst{18-24} = RA; + let Inst{25-31} = RT; +} + +// CVTIntFp Format +class CVTIntFPForm opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : SPUInstr +{ + bits<7> RA; + bits<7> RT; + + let Pattern = pattern; + + let Inst{0-9} = opcode; + let Inst{10-17} = 0; + let Inst{18-24} = RA; + let Inst{25-31} = RT; +} + +let RA = 0 in { + class BICondForm opcode, dag OOL, dag IOL, string asmstr, list pattern> + : RRForm + { } + + let RT = 0 in { + // Branch instruction format (without D/E flag settings) + class BRForm opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : RRForm + { } + + class BIForm opcode, string asmstr, list pattern> + : RRForm + { } + + let RB = 0 in { + // Return instruction (bi, branch indirect), RA is zero (LR): + class RETForm pattern> + : BRForm<0b00010101100, (outs), (ins), asmstr, BranchResolv, + pattern> + { } + } + } +} + +// Branch indirect external data forms: +class BISLEDForm DE_flag, string asmstr, list pattern> + : SPUInstr<(outs), (ins indcalltarget:$func), asmstr, BranchResolv> +{ + bits<7> Rcalldest; + + let Pattern = pattern; + + let Inst{0-10} = 0b11010101100; + let Inst{11} = 0; + let Inst{12-13} = DE_flag; + let Inst{14-17} = 0b0000; + let Inst{18-24} = Rcalldest; + let Inst{25-31} = 0b0000000; +} + +// RI10 Format +class RI10Form opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : SPUInstr +{ + bits<10> i10; + bits<7> RA; + bits<7> RT; + + let Pattern = pattern; + + let Inst{0-7} = opcode; + let Inst{8-17} = i10; + let Inst{18-24} = RA; + let Inst{25-31} = RT; +} + +// RI10 Format, where the constant is zero (or effectively ignored by the +// SPU) +let i10 = 0 in { + class RI10Form_1 opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : RI10Form + { } +} + +// RI10 Format, where RT is ignored. +// This format is used primarily by the Halt If ... Immediate set of +// instructions +let RT = 0 in { + class RI10Form_2 opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : RI10Form + { } +} + +// RI16 Format +class RI16Form opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : SPUInstr +{ + bits<16> i16; + bits<7> RT; + + let Pattern = pattern; + + let Inst{0-8} = opcode; + let Inst{9-24} = i16; + let Inst{25-31} = RT; +} + +// Specialized version of the RI16 Format for unconditional branch relative and +// branch absolute, branch and set link. Note that for branch and set link, the +// link register doesn't have to be $lr, but this is actually hard coded into +// the instruction pattern. + +let RT = 0 in { + class UncondBranch opcode, dag OOL, dag IOL, string asmstr, + list pattern> + : RI16Form + { } + + class BranchSetLink opcode, dag OOL, dag IOL, string asmstr, + list pattern> + : RI16Form + { } +} + +//===----------------------------------------------------------------------===// +// Specialized versions of RI16: +//===----------------------------------------------------------------------===// + +// RI18 Format +class RI18Form opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : SPUInstr +{ + bits<18> i18; + bits<7> RT; + + let Pattern = pattern; + + let Inst{0-6} = opcode; + let Inst{7-24} = i18; + let Inst{25-31} = RT; +} + +//===----------------------------------------------------------------------===// +// Instruction formats for intrinsics: +//===----------------------------------------------------------------------===// + +// RI10 Format for v8i16 intrinsics +class RI10_Int_v8i16 opcode, string opc, InstrItinClass itin, + Intrinsic IntID> : + RI10Form; + +class RI10_Int_v4i32 opcode, string opc, InstrItinClass itin, + Intrinsic IntID> : + RI10Form; + +// RR Format for v8i16 intrinsics +class RR_Int_v8i16 opcode, string opc, InstrItinClass itin, + Intrinsic IntID> : + RRForm; + +// RR Format for v4i32 intrinsics +class RR_Int_v4i32 opcode, string opc, InstrItinClass itin, + Intrinsic IntID> : + RRForm; + +//===----------------------------------------------------------------------===// +// Pseudo instructions, like call frames: +//===----------------------------------------------------------------------===// + +class Pseudo pattern> + : SPUInstr { + let OutOperandList = OOL; + let InOperandList = IOL; + let AsmString = asmstr; + let Pattern = pattern; + let Inst{31-0} = 0; +} diff --git a/lib/Target/CellSPU/SPUInstrInfo.cpp b/lib/Target/CellSPU/SPUInstrInfo.cpp new file mode 100644 index 000000000000..4af995a78139 --- /dev/null +++ b/lib/Target/CellSPU/SPUInstrInfo.cpp @@ -0,0 +1,693 @@ +//===- SPUInstrInfo.cpp - Cell SPU Instruction Information ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Cell SPU implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#include "SPURegisterNames.h" +#include "SPUInstrInfo.h" +#include "SPUInstrBuilder.h" +#include "SPUTargetMachine.h" +#include "SPUGenInstrInfo.inc" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/Support/Streams.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +namespace { + //! Predicate for an unconditional branch instruction + inline bool isUncondBranch(const MachineInstr *I) { + unsigned opc = I->getOpcode(); + + return (opc == SPU::BR + || opc == SPU::BRA + || opc == SPU::BI); + } + + //! Predicate for a conditional branch instruction + inline bool isCondBranch(const MachineInstr *I) { + unsigned opc = I->getOpcode(); + + return (opc == SPU::BRNZr32 + || opc == SPU::BRNZv4i32 + || opc == SPU::BRZr32 + || opc == SPU::BRZv4i32 + || opc == SPU::BRHNZr16 + || opc == SPU::BRHNZv8i16 + || opc == SPU::BRHZr16 + || opc == SPU::BRHZv8i16); + } +} + +SPUInstrInfo::SPUInstrInfo(SPUTargetMachine &tm) + : TargetInstrInfoImpl(SPUInsts, sizeof(SPUInsts)/sizeof(SPUInsts[0])), + TM(tm), + RI(*TM.getSubtargetImpl(), *this) +{ /* NOP */ } + +bool +SPUInstrInfo::isMoveInstr(const MachineInstr& MI, + unsigned& sourceReg, + unsigned& destReg, + unsigned& SrcSR, unsigned& DstSR) const { + SrcSR = DstSR = 0; // No sub-registers. + + switch (MI.getOpcode()) { + default: + break; + case SPU::ORIv4i32: + case SPU::ORIr32: + case SPU::ORHIv8i16: + case SPU::ORHIr16: + case SPU::ORHIi8i16: + case SPU::ORBIv16i8: + case SPU::ORBIr8: + case SPU::ORIi16i32: + case SPU::ORIi8i32: + case SPU::AHIvec: + case SPU::AHIr16: + case SPU::AIv4i32: + assert(MI.getNumOperands() == 3 && + MI.getOperand(0).isReg() && + MI.getOperand(1).isReg() && + MI.getOperand(2).isImm() && + "invalid SPU ORI/ORHI/ORBI/AHI/AI/SFI/SFHI instruction!"); + if (MI.getOperand(2).getImm() == 0) { + sourceReg = MI.getOperand(1).getReg(); + destReg = MI.getOperand(0).getReg(); + return true; + } + break; + case SPU::AIr32: + assert(MI.getNumOperands() == 3 && + "wrong number of operands to AIr32"); + if (MI.getOperand(0).isReg() && + MI.getOperand(1).isReg() && + (MI.getOperand(2).isImm() && + MI.getOperand(2).getImm() == 0)) { + sourceReg = MI.getOperand(1).getReg(); + destReg = MI.getOperand(0).getReg(); + return true; + } + break; + case SPU::LRr8: + case SPU::LRr16: + case SPU::LRr32: + case SPU::LRf32: + case SPU::LRr64: + case SPU::LRf64: + case SPU::LRr128: + case SPU::LRv16i8: + case SPU::LRv8i16: + case SPU::LRv4i32: + case SPU::LRv4f32: + case SPU::LRv2i64: + case SPU::LRv2f64: + case SPU::ORv16i8_i8: + case SPU::ORv8i16_i16: + case SPU::ORv4i32_i32: + case SPU::ORv2i64_i64: + case SPU::ORv4f32_f32: + case SPU::ORv2f64_f64: + case SPU::ORi8_v16i8: + case SPU::ORi16_v8i16: + case SPU::ORi32_v4i32: + case SPU::ORi64_v2i64: + case SPU::ORf32_v4f32: + case SPU::ORf64_v2f64: +/* + case SPU::ORi128_r64: + case SPU::ORi128_f64: + case SPU::ORi128_r32: + case SPU::ORi128_f32: + case SPU::ORi128_r16: + case SPU::ORi128_r8: +*/ + case SPU::ORi128_vec: +/* + case SPU::ORr64_i128: + case SPU::ORf64_i128: + case SPU::ORr32_i128: + case SPU::ORf32_i128: + case SPU::ORr16_i128: + case SPU::ORr8_i128: +*/ + case SPU::ORvec_i128: +/* + case SPU::ORr16_r32: + case SPU::ORr8_r32: + case SPU::ORf32_r32: + case SPU::ORr32_f32: + case SPU::ORr32_r16: + case SPU::ORr32_r8: + case SPU::ORr16_r64: + case SPU::ORr8_r64: + case SPU::ORr64_r16: + case SPU::ORr64_r8: +*/ + case SPU::ORr64_r32: + case SPU::ORr32_r64: + case SPU::ORf32_r32: + case SPU::ORr32_f32: + case SPU::ORf64_r64: + case SPU::ORr64_f64: { + assert(MI.getNumOperands() == 2 && + MI.getOperand(0).isReg() && + MI.getOperand(1).isReg() && + "invalid SPU OR_ or LR instruction!"); + if (MI.getOperand(0).getReg() == MI.getOperand(1).getReg()) { + sourceReg = MI.getOperand(1).getReg(); + destReg = MI.getOperand(0).getReg(); + return true; + } + break; + } + case SPU::ORv16i8: + case SPU::ORv8i16: + case SPU::ORv4i32: + case SPU::ORv2i64: + case SPU::ORr8: + case SPU::ORr16: + case SPU::ORr32: + case SPU::ORr64: + case SPU::ORr128: + case SPU::ORf32: + case SPU::ORf64: + assert(MI.getNumOperands() == 3 && + MI.getOperand(0).isReg() && + MI.getOperand(1).isReg() && + MI.getOperand(2).isReg() && + "invalid SPU OR(vec|r32|r64|gprc) instruction!"); + if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) { + sourceReg = MI.getOperand(1).getReg(); + destReg = MI.getOperand(0).getReg(); + return true; + } + break; + } + + return false; +} + +unsigned +SPUInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, + int &FrameIndex) const { + switch (MI->getOpcode()) { + default: break; + case SPU::LQDv16i8: + case SPU::LQDv8i16: + case SPU::LQDv4i32: + case SPU::LQDv4f32: + case SPU::LQDv2f64: + case SPU::LQDr128: + case SPU::LQDr64: + case SPU::LQDr32: + case SPU::LQDr16: { + const MachineOperand MOp1 = MI->getOperand(1); + const MachineOperand MOp2 = MI->getOperand(2); + if (MOp1.isImm() && MOp2.isFI()) { + FrameIndex = MOp2.getIndex(); + return MI->getOperand(0).getReg(); + } + break; + } + } + return 0; +} + +unsigned +SPUInstrInfo::isStoreToStackSlot(const MachineInstr *MI, + int &FrameIndex) const { + switch (MI->getOpcode()) { + default: break; + case SPU::STQDv16i8: + case SPU::STQDv8i16: + case SPU::STQDv4i32: + case SPU::STQDv4f32: + case SPU::STQDv2f64: + case SPU::STQDr128: + case SPU::STQDr64: + case SPU::STQDr32: + case SPU::STQDr16: + case SPU::STQDr8: { + const MachineOperand MOp1 = MI->getOperand(1); + const MachineOperand MOp2 = MI->getOperand(2); + if (MOp1.isImm() && MOp2.isFI()) { + FrameIndex = MOp2.getIndex(); + return MI->getOperand(0).getReg(); + } + break; + } + } + return 0; +} + +bool SPUInstrInfo::copyRegToReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned DestReg, unsigned SrcReg, + const TargetRegisterClass *DestRC, + const TargetRegisterClass *SrcRC) const +{ + // We support cross register class moves for our aliases, such as R3 in any + // reg class to any other reg class containing R3. This is required because + // we instruction select bitconvert i64 -> f64 as a noop for example, so our + // types have no specific meaning. + + DebugLoc DL = DebugLoc::getUnknownLoc(); + if (MI != MBB.end()) DL = MI->getDebugLoc(); + + if (DestRC == SPU::R8CRegisterClass) { + BuildMI(MBB, MI, DL, get(SPU::LRr8), DestReg).addReg(SrcReg); + } else if (DestRC == SPU::R16CRegisterClass) { + BuildMI(MBB, MI, DL, get(SPU::LRr16), DestReg).addReg(SrcReg); + } else if (DestRC == SPU::R32CRegisterClass) { + BuildMI(MBB, MI, DL, get(SPU::LRr32), DestReg).addReg(SrcReg); + } else if (DestRC == SPU::R32FPRegisterClass) { + BuildMI(MBB, MI, DL, get(SPU::LRf32), DestReg).addReg(SrcReg); + } else if (DestRC == SPU::R64CRegisterClass) { + BuildMI(MBB, MI, DL, get(SPU::LRr64), DestReg).addReg(SrcReg); + } else if (DestRC == SPU::R64FPRegisterClass) { + BuildMI(MBB, MI, DL, get(SPU::LRf64), DestReg).addReg(SrcReg); + } else if (DestRC == SPU::GPRCRegisterClass) { + BuildMI(MBB, MI, DL, get(SPU::LRr128), DestReg).addReg(SrcReg); + } else if (DestRC == SPU::VECREGRegisterClass) { + BuildMI(MBB, MI, DL, get(SPU::LRv16i8), DestReg).addReg(SrcReg); + } else { + // Attempt to copy unknown/unsupported register class! + return false; + } + + return true; +} + +void +SPUInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned SrcReg, bool isKill, int FrameIdx, + const TargetRegisterClass *RC) const +{ + unsigned opc; + bool isValidFrameIdx = (FrameIdx < SPUFrameInfo::maxFrameOffset()); + if (RC == SPU::GPRCRegisterClass) { + opc = (isValidFrameIdx ? SPU::STQDr128 : SPU::STQXr128); + } else if (RC == SPU::R64CRegisterClass) { + opc = (isValidFrameIdx ? SPU::STQDr64 : SPU::STQXr64); + } else if (RC == SPU::R64FPRegisterClass) { + opc = (isValidFrameIdx ? SPU::STQDr64 : SPU::STQXr64); + } else if (RC == SPU::R32CRegisterClass) { + opc = (isValidFrameIdx ? SPU::STQDr32 : SPU::STQXr32); + } else if (RC == SPU::R32FPRegisterClass) { + opc = (isValidFrameIdx ? SPU::STQDr32 : SPU::STQXr32); + } else if (RC == SPU::R16CRegisterClass) { + opc = (isValidFrameIdx ? SPU::STQDr16 : SPU::STQXr16); + } else if (RC == SPU::R8CRegisterClass) { + opc = (isValidFrameIdx ? SPU::STQDr8 : SPU::STQXr8); + } else if (RC == SPU::VECREGRegisterClass) { + opc = (isValidFrameIdx) ? SPU::STQDv16i8 : SPU::STQXv16i8; + } else { + assert(0 && "Unknown regclass!"); + abort(); + } + + DebugLoc DL = DebugLoc::getUnknownLoc(); + if (MI != MBB.end()) DL = MI->getDebugLoc(); + addFrameReference(BuildMI(MBB, MI, DL, get(opc)) + .addReg(SrcReg, getKillRegState(isKill)), FrameIdx); +} + +void SPUInstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg, + bool isKill, + SmallVectorImpl &Addr, + const TargetRegisterClass *RC, + SmallVectorImpl &NewMIs) const { + cerr << "storeRegToAddr() invoked!\n"; + abort(); + + if (Addr[0].isFI()) { + /* do what storeRegToStackSlot does here */ + } else { + unsigned Opc = 0; + if (RC == SPU::GPRCRegisterClass) { + /* Opc = PPC::STW; */ + } else if (RC == SPU::R16CRegisterClass) { + /* Opc = PPC::STD; */ + } else if (RC == SPU::R32CRegisterClass) { + /* Opc = PPC::STFD; */ + } else if (RC == SPU::R32FPRegisterClass) { + /* Opc = PPC::STFD; */ + } else if (RC == SPU::R64FPRegisterClass) { + /* Opc = PPC::STFS; */ + } else if (RC == SPU::VECREGRegisterClass) { + /* Opc = PPC::STVX; */ + } else { + assert(0 && "Unknown regclass!"); + abort(); + } + DebugLoc DL = DebugLoc::getUnknownLoc(); + MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc)) + .addReg(SrcReg, getKillRegState(isKill)); + for (unsigned i = 0, e = Addr.size(); i != e; ++i) + MIB.addOperand(Addr[i]); + NewMIs.push_back(MIB); + } +} + +void +SPUInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned DestReg, int FrameIdx, + const TargetRegisterClass *RC) const +{ + unsigned opc; + bool isValidFrameIdx = (FrameIdx < SPUFrameInfo::maxFrameOffset()); + if (RC == SPU::GPRCRegisterClass) { + opc = (isValidFrameIdx ? SPU::LQDr128 : SPU::LQXr128); + } else if (RC == SPU::R64CRegisterClass) { + opc = (isValidFrameIdx ? SPU::LQDr64 : SPU::LQXr64); + } else if (RC == SPU::R64FPRegisterClass) { + opc = (isValidFrameIdx ? SPU::LQDr64 : SPU::LQXr64); + } else if (RC == SPU::R32CRegisterClass) { + opc = (isValidFrameIdx ? SPU::LQDr32 : SPU::LQXr32); + } else if (RC == SPU::R32FPRegisterClass) { + opc = (isValidFrameIdx ? SPU::LQDr32 : SPU::LQXr32); + } else if (RC == SPU::R16CRegisterClass) { + opc = (isValidFrameIdx ? SPU::LQDr16 : SPU::LQXr16); + } else if (RC == SPU::R8CRegisterClass) { + opc = (isValidFrameIdx ? SPU::LQDr8 : SPU::LQXr8); + } else if (RC == SPU::VECREGRegisterClass) { + opc = (isValidFrameIdx) ? SPU::LQDv16i8 : SPU::LQXv16i8; + } else { + assert(0 && "Unknown regclass in loadRegFromStackSlot!"); + abort(); + } + + DebugLoc DL = DebugLoc::getUnknownLoc(); + if (MI != MBB.end()) DL = MI->getDebugLoc(); + addFrameReference(BuildMI(MBB, MI, DL, get(opc), DestReg), FrameIdx); +} + +/*! + \note We are really pessimistic here about what kind of a load we're doing. + */ +void SPUInstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg, + SmallVectorImpl &Addr, + const TargetRegisterClass *RC, + SmallVectorImpl &NewMIs) + const { + cerr << "loadRegToAddr() invoked!\n"; + abort(); + + if (Addr[0].isFI()) { + /* do what loadRegFromStackSlot does here... */ + } else { + unsigned Opc = 0; + if (RC == SPU::R8CRegisterClass) { + /* do brilliance here */ + } else if (RC == SPU::R16CRegisterClass) { + /* Opc = PPC::LWZ; */ + } else if (RC == SPU::R32CRegisterClass) { + /* Opc = PPC::LD; */ + } else if (RC == SPU::R32FPRegisterClass) { + /* Opc = PPC::LFD; */ + } else if (RC == SPU::R64FPRegisterClass) { + /* Opc = PPC::LFS; */ + } else if (RC == SPU::VECREGRegisterClass) { + /* Opc = PPC::LVX; */ + } else if (RC == SPU::GPRCRegisterClass) { + /* Opc = something else! */ + } else { + assert(0 && "Unknown regclass!"); + abort(); + } + DebugLoc DL = DebugLoc::getUnknownLoc(); + MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg); + for (unsigned i = 0, e = Addr.size(); i != e; ++i) + MIB.addOperand(Addr[i]); + NewMIs.push_back(MIB); + } +} + +//! Return true if the specified load or store can be folded +bool +SPUInstrInfo::canFoldMemoryOperand(const MachineInstr *MI, + const SmallVectorImpl &Ops) const { + if (Ops.size() != 1) return false; + + // Make sure this is a reg-reg copy. + unsigned Opc = MI->getOpcode(); + + switch (Opc) { + case SPU::ORv16i8: + case SPU::ORv8i16: + case SPU::ORv4i32: + case SPU::ORv2i64: + case SPU::ORr8: + case SPU::ORr16: + case SPU::ORr32: + case SPU::ORr64: + case SPU::ORf32: + case SPU::ORf64: + if (MI->getOperand(1).getReg() == MI->getOperand(2).getReg()) + return true; + break; + } + + return false; +} + +/// foldMemoryOperand - SPU, like PPC, can only fold spills into +/// copy instructions, turning them into load/store instructions. +MachineInstr * +SPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr *MI, + const SmallVectorImpl &Ops, + int FrameIndex) const +{ + if (Ops.size() != 1) return 0; + + unsigned OpNum = Ops[0]; + unsigned Opc = MI->getOpcode(); + MachineInstr *NewMI = 0; + + switch (Opc) { + case SPU::ORv16i8: + case SPU::ORv8i16: + case SPU::ORv4i32: + case SPU::ORv2i64: + case SPU::ORr8: + case SPU::ORr16: + case SPU::ORr32: + case SPU::ORr64: + case SPU::ORf32: + case SPU::ORf64: + if (OpNum == 0) { // move -> store + unsigned InReg = MI->getOperand(1).getReg(); + bool isKill = MI->getOperand(1).isKill(); + if (FrameIndex < SPUFrameInfo::maxFrameOffset()) { + MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), + get(SPU::STQDr32)); + + MIB.addReg(InReg, getKillRegState(isKill)); + NewMI = addFrameReference(MIB, FrameIndex); + } + } else { // move -> load + unsigned OutReg = MI->getOperand(0).getReg(); + bool isDead = MI->getOperand(0).isDead(); + MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), get(Opc)); + + MIB.addReg(OutReg, RegState::Define | getDeadRegState(isDead)); + Opc = (FrameIndex < SPUFrameInfo::maxFrameOffset()) + ? SPU::STQDr32 : SPU::STQXr32; + NewMI = addFrameReference(MIB, FrameIndex); + break; + } + } + + return NewMI; +} + +//! Branch analysis +/*! + \note This code was kiped from PPC. There may be more branch analysis for + CellSPU than what's currently done here. + */ +bool +SPUInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl &Cond, + bool AllowModify) const { + // If the block has no terminators, it just falls into the block after it. + MachineBasicBlock::iterator I = MBB.end(); + if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) + return false; + + // Get the last instruction in the block. + MachineInstr *LastInst = I; + + // If there is only one terminator instruction, process it. + if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { + if (isUncondBranch(LastInst)) { + TBB = LastInst->getOperand(0).getMBB(); + return false; + } else if (isCondBranch(LastInst)) { + // Block ends with fall-through condbranch. + TBB = LastInst->getOperand(1).getMBB(); + DEBUG(cerr << "Pushing LastInst: "); + DEBUG(LastInst->dump()); + Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode())); + Cond.push_back(LastInst->getOperand(0)); + return false; + } + // Otherwise, don't know what this is. + return true; + } + + // Get the instruction before it if it's a terminator. + MachineInstr *SecondLastInst = I; + + // If there are three terminators, we don't know what sort of block this is. + if (SecondLastInst && I != MBB.begin() && + isUnpredicatedTerminator(--I)) + return true; + + // If the block ends with a conditional and unconditional branch, handle it. + if (isCondBranch(SecondLastInst) && isUncondBranch(LastInst)) { + TBB = SecondLastInst->getOperand(1).getMBB(); + DEBUG(cerr << "Pushing SecondLastInst: "); + DEBUG(SecondLastInst->dump()); + Cond.push_back(MachineOperand::CreateImm(SecondLastInst->getOpcode())); + Cond.push_back(SecondLastInst->getOperand(0)); + FBB = LastInst->getOperand(0).getMBB(); + return false; + } + + // If the block ends with two unconditional branches, handle it. The second + // one is not executed, so remove it. + if (isUncondBranch(SecondLastInst) && isUncondBranch(LastInst)) { + TBB = SecondLastInst->getOperand(0).getMBB(); + I = LastInst; + if (AllowModify) + I->eraseFromParent(); + return false; + } + + // Otherwise, can't handle this. + return true; +} + +unsigned +SPUInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { + MachineBasicBlock::iterator I = MBB.end(); + if (I == MBB.begin()) + return 0; + --I; + if (!isCondBranch(I) && !isUncondBranch(I)) + return 0; + + // Remove the first branch. + DEBUG(cerr << "Removing branch: "); + DEBUG(I->dump()); + I->eraseFromParent(); + I = MBB.end(); + if (I == MBB.begin()) + return 1; + + --I; + if (!(isCondBranch(I) || isUncondBranch(I))) + return 1; + + // Remove the second branch. + DEBUG(cerr << "Removing second branch: "); + DEBUG(I->dump()); + I->eraseFromParent(); + return 2; +} + +unsigned +SPUInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + const SmallVectorImpl &Cond) const { + // FIXME this should probably have a DebugLoc argument + DebugLoc dl = DebugLoc::getUnknownLoc(); + // Shouldn't be a fall through. + assert(TBB && "InsertBranch must not be told to insert a fallthrough"); + assert((Cond.size() == 2 || Cond.size() == 0) && + "SPU branch conditions have two components!"); + + // One-way branch. + if (FBB == 0) { + if (Cond.empty()) { + // Unconditional branch + MachineInstrBuilder MIB = BuildMI(&MBB, dl, get(SPU::BR)); + MIB.addMBB(TBB); + + DEBUG(cerr << "Inserted one-way uncond branch: "); + DEBUG((*MIB).dump()); + } else { + // Conditional branch + MachineInstrBuilder MIB = BuildMI(&MBB, dl, get(Cond[0].getImm())); + MIB.addReg(Cond[1].getReg()).addMBB(TBB); + + DEBUG(cerr << "Inserted one-way cond branch: "); + DEBUG((*MIB).dump()); + } + return 1; + } else { + MachineInstrBuilder MIB = BuildMI(&MBB, dl, get(Cond[0].getImm())); + MachineInstrBuilder MIB2 = BuildMI(&MBB, dl, get(SPU::BR)); + + // Two-way Conditional Branch. + MIB.addReg(Cond[1].getReg()).addMBB(TBB); + MIB2.addMBB(FBB); + + DEBUG(cerr << "Inserted conditional branch: "); + DEBUG((*MIB).dump()); + DEBUG(cerr << "part 2: "); + DEBUG((*MIB2).dump()); + return 2; + } +} + +bool +SPUInstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const { + return (!MBB.empty() && isUncondBranch(&MBB.back())); +} +//! Reverses a branch's condition, returning false on success. +bool +SPUInstrInfo::ReverseBranchCondition(SmallVectorImpl &Cond) + const { + // Pretty brainless way of inverting the condition, but it works, considering + // there are only two conditions... + static struct { + unsigned Opc; //! The incoming opcode + unsigned RevCondOpc; //! The reversed condition opcode + } revconds[] = { + { SPU::BRNZr32, SPU::BRZr32 }, + { SPU::BRNZv4i32, SPU::BRZv4i32 }, + { SPU::BRZr32, SPU::BRNZr32 }, + { SPU::BRZv4i32, SPU::BRNZv4i32 }, + { SPU::BRHNZr16, SPU::BRHZr16 }, + { SPU::BRHNZv8i16, SPU::BRHZv8i16 }, + { SPU::BRHZr16, SPU::BRHNZr16 }, + { SPU::BRHZv8i16, SPU::BRHNZv8i16 } + }; + + unsigned Opc = unsigned(Cond[0].getImm()); + // Pretty dull mapping between the two conditions that SPU can generate: + for (int i = sizeof(revconds)/sizeof(revconds[0]) - 1; i >= 0; --i) { + if (revconds[i].Opc == Opc) { + Cond[0].setImm(revconds[i].RevCondOpc); + return false; + } + } + + return true; +} diff --git a/lib/Target/CellSPU/SPUInstrInfo.h b/lib/Target/CellSPU/SPUInstrInfo.h new file mode 100644 index 000000000000..ffb40875ff10 --- /dev/null +++ b/lib/Target/CellSPU/SPUInstrInfo.h @@ -0,0 +1,114 @@ +//===- SPUInstrInfo.h - Cell SPU Instruction Information --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the CellSPU implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef SPU_INSTRUCTIONINFO_H +#define SPU_INSTRUCTIONINFO_H + +#include "SPU.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "SPURegisterInfo.h" + +namespace llvm { + //! Cell SPU instruction information class + class SPUInstrInfo : public TargetInstrInfoImpl { + SPUTargetMachine &TM; + const SPURegisterInfo RI; + protected: + virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr* MI, + const SmallVectorImpl &Ops, + int FrameIndex) const; + + virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr* MI, + const SmallVectorImpl &Ops, + MachineInstr* LoadMI) const { + return 0; + } + + public: + explicit SPUInstrInfo(SPUTargetMachine &tm); + + /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As + /// such, whenever a client has an instance of instruction info, it should + /// always be able to get register info as well (through this method). + /// + virtual const SPURegisterInfo &getRegisterInfo() const { return RI; } + + /// Return true if the instruction is a register to register move and return + /// the source and dest operands and their sub-register indices by reference. + virtual bool isMoveInstr(const MachineInstr &MI, + unsigned &SrcReg, unsigned &DstReg, + unsigned &SrcSubIdx, unsigned &DstSubIdx) const; + + unsigned isLoadFromStackSlot(const MachineInstr *MI, + int &FrameIndex) const; + unsigned isStoreToStackSlot(const MachineInstr *MI, + int &FrameIndex) const; + + virtual bool copyRegToReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned DestReg, unsigned SrcReg, + const TargetRegisterClass *DestRC, + const TargetRegisterClass *SrcRC) const; + + //! Store a register to a stack slot, based on its register class. + virtual void storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned SrcReg, bool isKill, int FrameIndex, + const TargetRegisterClass *RC) const; + + //! Store a register to an address, based on its register class + virtual void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill, + SmallVectorImpl &Addr, + const TargetRegisterClass *RC, + SmallVectorImpl &NewMIs) const; + + //! Load a register from a stack slot, based on its register class. + virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned DestReg, int FrameIndex, + const TargetRegisterClass *RC) const; + + //! Loqad a register from an address, based on its register class + virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg, + SmallVectorImpl &Addr, + const TargetRegisterClass *RC, + SmallVectorImpl &NewMIs) const; + + //! Return true if the specified load or store can be folded + virtual + bool canFoldMemoryOperand(const MachineInstr *MI, + const SmallVectorImpl &Ops) const; + + //! Return true if the specified block does not fall through + virtual bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const; + + //! Reverses a branch's condition, returning false on success. + virtual + bool ReverseBranchCondition(SmallVectorImpl &Cond) const; + + virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl &Cond, + bool AllowModify) const; + + virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const; + + virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + const SmallVectorImpl &Cond) const; + }; +} + +#endif diff --git a/lib/Target/CellSPU/SPUInstrInfo.td b/lib/Target/CellSPU/SPUInstrInfo.td new file mode 100644 index 000000000000..63eb85a2921e --- /dev/null +++ b/lib/Target/CellSPU/SPUInstrInfo.td @@ -0,0 +1,4614 @@ +//==- SPUInstrInfo.td - Describe the Cell SPU Instructions -*- tablegen -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// Cell SPU Instructions: +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// TODO Items (not urgent today, but would be nice, low priority) +// +// ANDBI, ORBI: SPU constructs a 4-byte constant for these instructions by +// concatenating the byte argument b as "bbbb". Could recognize this bit pattern +// in 16-bit and 32-bit constants and reduce instruction count. +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Pseudo instructions: +//===----------------------------------------------------------------------===// + +let hasCtrlDep = 1, Defs = [R1], Uses = [R1] in { + def ADJCALLSTACKDOWN : Pseudo<(outs), (ins u16imm_i32:$amt), + "${:comment} ADJCALLSTACKDOWN", + [(callseq_start timm:$amt)]>; + def ADJCALLSTACKUP : Pseudo<(outs), (ins u16imm_i32:$amt), + "${:comment} ADJCALLSTACKUP", + [(callseq_end timm:$amt)]>; +} + +//===----------------------------------------------------------------------===// +// DWARF debugging Pseudo Instructions +//===----------------------------------------------------------------------===// + +def DWARF_LOC : Pseudo<(outs), (ins i32imm:$line, i32imm:$col, i32imm:$file), + ".loc $file, $line, $col", + [(dwarf_loc (i32 imm:$line), (i32 imm:$col), (i32 imm:$file))]>; + +//===----------------------------------------------------------------------===// +// Loads: +// NB: The ordering is actually important, since the instruction selection +// will try each of the instructions in sequence, i.e., the D-form first with +// the 10-bit displacement, then the A-form with the 16 bit displacement, and +// finally the X-form with the register-register. +//===----------------------------------------------------------------------===// + +let canFoldAsLoad = 1 in { + class LoadDFormVec + : RI10Form<0b00101100, (outs VECREG:$rT), (ins dformaddr:$src), + "lqd\t$rT, $src", + LoadStore, + [(set (vectype VECREG:$rT), (load dform_addr:$src))]> + { } + + class LoadDForm + : RI10Form<0b00101100, (outs rclass:$rT), (ins dformaddr:$src), + "lqd\t$rT, $src", + LoadStore, + [(set rclass:$rT, (load dform_addr:$src))]> + { } + + multiclass LoadDForms + { + def v16i8: LoadDFormVec; + def v8i16: LoadDFormVec; + def v4i32: LoadDFormVec; + def v2i64: LoadDFormVec; + def v4f32: LoadDFormVec; + def v2f64: LoadDFormVec; + + def v2i32: LoadDFormVec; + + def r128: LoadDForm; + def r64: LoadDForm; + def r32: LoadDForm; + def f32: LoadDForm; + def f64: LoadDForm; + def r16: LoadDForm; + def r8: LoadDForm; + } + + class LoadAFormVec + : RI16Form<0b100001100, (outs VECREG:$rT), (ins addr256k:$src), + "lqa\t$rT, $src", + LoadStore, + [(set (vectype VECREG:$rT), (load aform_addr:$src))]> + { } + + class LoadAForm + : RI16Form<0b100001100, (outs rclass:$rT), (ins addr256k:$src), + "lqa\t$rT, $src", + LoadStore, + [(set rclass:$rT, (load aform_addr:$src))]> + { } + + multiclass LoadAForms + { + def v16i8: LoadAFormVec; + def v8i16: LoadAFormVec; + def v4i32: LoadAFormVec; + def v2i64: LoadAFormVec; + def v4f32: LoadAFormVec; + def v2f64: LoadAFormVec; + + def v2i32: LoadAFormVec; + + def r128: LoadAForm; + def r64: LoadAForm; + def r32: LoadAForm; + def f32: LoadAForm; + def f64: LoadAForm; + def r16: LoadAForm; + def r8: LoadAForm; + } + + class LoadXFormVec + : RRForm<0b00100011100, (outs VECREG:$rT), (ins memrr:$src), + "lqx\t$rT, $src", + LoadStore, + [(set (vectype VECREG:$rT), (load xform_addr:$src))]> + { } + + class LoadXForm + : RRForm<0b00100011100, (outs rclass:$rT), (ins memrr:$src), + "lqx\t$rT, $src", + LoadStore, + [(set rclass:$rT, (load xform_addr:$src))]> + { } + + multiclass LoadXForms + { + def v16i8: LoadXFormVec; + def v8i16: LoadXFormVec; + def v4i32: LoadXFormVec; + def v2i64: LoadXFormVec; + def v4f32: LoadXFormVec; + def v2f64: LoadXFormVec; + + def v2i32: LoadXFormVec; + + def r128: LoadXForm; + def r64: LoadXForm; + def r32: LoadXForm; + def f32: LoadXForm; + def f64: LoadXForm; + def r16: LoadXForm; + def r8: LoadXForm; + } + + defm LQA : LoadAForms; + defm LQD : LoadDForms; + defm LQX : LoadXForms; + +/* Load quadword, PC relative: Not much use at this point in time. + Might be of use later for relocatable code. It's effectively the + same as LQA, but uses PC-relative addressing. + def LQR : RI16Form<0b111001100, (outs VECREG:$rT), (ins s16imm:$disp), + "lqr\t$rT, $disp", LoadStore, + [(set VECREG:$rT, (load iaddr:$disp))]>; + */ +} + +//===----------------------------------------------------------------------===// +// Stores: +//===----------------------------------------------------------------------===// +class StoreDFormVec + : RI10Form<0b00100100, (outs), (ins VECREG:$rT, dformaddr:$src), + "stqd\t$rT, $src", + LoadStore, + [(store (vectype VECREG:$rT), dform_addr:$src)]> +{ } + +class StoreDForm + : RI10Form<0b00100100, (outs), (ins rclass:$rT, dformaddr:$src), + "stqd\t$rT, $src", + LoadStore, + [(store rclass:$rT, dform_addr:$src)]> +{ } + +multiclass StoreDForms +{ + def v16i8: StoreDFormVec; + def v8i16: StoreDFormVec; + def v4i32: StoreDFormVec; + def v2i64: StoreDFormVec; + def v4f32: StoreDFormVec; + def v2f64: StoreDFormVec; + + def v2i32: StoreDFormVec; + + def r128: StoreDForm; + def r64: StoreDForm; + def r32: StoreDForm; + def f32: StoreDForm; + def f64: StoreDForm; + def r16: StoreDForm; + def r8: StoreDForm; +} + +class StoreAFormVec + : RI16Form<0b0010010, (outs), (ins VECREG:$rT, addr256k:$src), + "stqa\t$rT, $src", + LoadStore, + [(store (vectype VECREG:$rT), aform_addr:$src)]>; + +class StoreAForm + : RI16Form<0b001001, (outs), (ins rclass:$rT, addr256k:$src), + "stqa\t$rT, $src", + LoadStore, + [(store rclass:$rT, aform_addr:$src)]>; + +multiclass StoreAForms +{ + def v16i8: StoreAFormVec; + def v8i16: StoreAFormVec; + def v4i32: StoreAFormVec; + def v2i64: StoreAFormVec; + def v4f32: StoreAFormVec; + def v2f64: StoreAFormVec; + + def v2i32: StoreAFormVec; + + def r128: StoreAForm; + def r64: StoreAForm; + def r32: StoreAForm; + def f32: StoreAForm; + def f64: StoreAForm; + def r16: StoreAForm; + def r8: StoreAForm; +} + +class StoreXFormVec + : RRForm<0b00100100, (outs), (ins VECREG:$rT, memrr:$src), + "stqx\t$rT, $src", + LoadStore, + [(store (vectype VECREG:$rT), xform_addr:$src)]> +{ } + +class StoreXForm + : RRForm<0b00100100, (outs), (ins rclass:$rT, memrr:$src), + "stqx\t$rT, $src", + LoadStore, + [(store rclass:$rT, xform_addr:$src)]> +{ } + +multiclass StoreXForms +{ + def v16i8: StoreXFormVec; + def v8i16: StoreXFormVec; + def v4i32: StoreXFormVec; + def v2i64: StoreXFormVec; + def v4f32: StoreXFormVec; + def v2f64: StoreXFormVec; + + def v2i32: StoreXFormVec; + + def r128: StoreXForm; + def r64: StoreXForm; + def r32: StoreXForm; + def f32: StoreXForm; + def f64: StoreXForm; + def r16: StoreXForm; + def r8: StoreXForm; +} + +defm STQD : StoreDForms; +defm STQA : StoreAForms; +defm STQX : StoreXForms; + +/* Store quadword, PC relative: Not much use at this point in time. Might + be useful for relocatable code. +def STQR : RI16Form<0b111000100, (outs), (ins VECREG:$rT, s16imm:$disp), + "stqr\t$rT, $disp", LoadStore, + [(store VECREG:$rT, iaddr:$disp)]>; +*/ + +//===----------------------------------------------------------------------===// +// Generate Controls for Insertion: +//===----------------------------------------------------------------------===// + +def CBD: RI7Form<0b10101111100, (outs VECREG:$rT), (ins shufaddr:$src), + "cbd\t$rT, $src", ShuffleOp, + [(set (v16i8 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>; + +def CBX: RRForm<0b00101011100, (outs VECREG:$rT), (ins memrr:$src), + "cbx\t$rT, $src", ShuffleOp, + [(set (v16i8 VECREG:$rT), (SPUshufmask xform_addr:$src))]>; + +def CHD: RI7Form<0b10101111100, (outs VECREG:$rT), (ins shufaddr:$src), + "chd\t$rT, $src", ShuffleOp, + [(set (v8i16 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>; + +def CHX: RRForm<0b10101011100, (outs VECREG:$rT), (ins memrr:$src), + "chx\t$rT, $src", ShuffleOp, + [(set (v8i16 VECREG:$rT), (SPUshufmask xform_addr:$src))]>; + +def CWD: RI7Form<0b01101111100, (outs VECREG:$rT), (ins shufaddr:$src), + "cwd\t$rT, $src", ShuffleOp, + [(set (v4i32 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>; + +def CWX: RRForm<0b01101011100, (outs VECREG:$rT), (ins memrr:$src), + "cwx\t$rT, $src", ShuffleOp, + [(set (v4i32 VECREG:$rT), (SPUshufmask xform_addr:$src))]>; + +def CWDf32: RI7Form<0b01101111100, (outs VECREG:$rT), (ins shufaddr:$src), + "cwd\t$rT, $src", ShuffleOp, + [(set (v4f32 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>; + +def CWXf32: RRForm<0b01101011100, (outs VECREG:$rT), (ins memrr:$src), + "cwx\t$rT, $src", ShuffleOp, + [(set (v4f32 VECREG:$rT), (SPUshufmask xform_addr:$src))]>; + +def CDD: RI7Form<0b11101111100, (outs VECREG:$rT), (ins shufaddr:$src), + "cdd\t$rT, $src", ShuffleOp, + [(set (v2i64 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>; + +def CDX: RRForm<0b11101011100, (outs VECREG:$rT), (ins memrr:$src), + "cdx\t$rT, $src", ShuffleOp, + [(set (v2i64 VECREG:$rT), (SPUshufmask xform_addr:$src))]>; + +def CDDf64: RI7Form<0b11101111100, (outs VECREG:$rT), (ins shufaddr:$src), + "cdd\t$rT, $src", ShuffleOp, + [(set (v2f64 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>; + +def CDXf64: RRForm<0b11101011100, (outs VECREG:$rT), (ins memrr:$src), + "cdx\t$rT, $src", ShuffleOp, + [(set (v2f64 VECREG:$rT), (SPUshufmask xform_addr:$src))]>; + +//===----------------------------------------------------------------------===// +// Constant formation: +//===----------------------------------------------------------------------===// + +def ILHv8i16: + RI16Form<0b110000010, (outs VECREG:$rT), (ins s16imm:$val), + "ilh\t$rT, $val", ImmLoad, + [(set (v8i16 VECREG:$rT), (v8i16 v8i16SExt16Imm:$val))]>; + +def ILHr16: + RI16Form<0b110000010, (outs R16C:$rT), (ins s16imm:$val), + "ilh\t$rT, $val", ImmLoad, + [(set R16C:$rT, immSExt16:$val)]>; + +// Cell SPU doesn't have a native 8-bit immediate load, but ILH works ("with +// the right constant") +def ILHr8: + RI16Form<0b110000010, (outs R8C:$rT), (ins s16imm_i8:$val), + "ilh\t$rT, $val", ImmLoad, + [(set R8C:$rT, immSExt8:$val)]>; + +// IL does sign extension! + +class ILInst pattern>: + RI16Form<0b100000010, OOL, IOL, "il\t$rT, $val", + ImmLoad, pattern>; + +class ILVecInst: + ILInst<(outs VECREG:$rT), (ins immtype:$val), + [(set (vectype VECREG:$rT), (vectype xform:$val))]>; + +class ILRegInst: + ILInst<(outs rclass:$rT), (ins immtype:$val), + [(set rclass:$rT, xform:$val)]>; + +multiclass ImmediateLoad +{ + def v2i64: ILVecInst; + def v4i32: ILVecInst; + + // TODO: Need v2f64, v4f32 + + def r64: ILRegInst; + def r32: ILRegInst; + def f32: ILRegInst; + def f64: ILRegInst; +} + +defm IL : ImmediateLoad; + +class ILHUInst pattern>: + RI16Form<0b010000010, OOL, IOL, "ilhu\t$rT, $val", + ImmLoad, pattern>; + +class ILHUVecInst: + ILHUInst<(outs VECREG:$rT), (ins immtype:$val), + [(set (vectype VECREG:$rT), (vectype xform:$val))]>; + +class ILHURegInst: + ILHUInst<(outs rclass:$rT), (ins immtype:$val), + [(set rclass:$rT, xform:$val)]>; + +multiclass ImmLoadHalfwordUpper +{ + def v2i64: ILHUVecInst; + def v4i32: ILHUVecInst; + + def r64: ILHURegInst; + def r32: ILHURegInst; + + // Loads the high portion of an address + def hi: ILHURegInst; + + // Used in custom lowering constant SFP loads: + def f32: ILHURegInst; +} + +defm ILHU : ImmLoadHalfwordUpper; + +// Immediate load address (can also be used to load 18-bit unsigned constants, +// see the zext 16->32 pattern) + +class ILAInst pattern>: + RI18Form<0b1000010, OOL, IOL, "ila\t$rT, $val", + LoadNOP, pattern>; + +class ILAVecInst: + ILAInst<(outs VECREG:$rT), (ins immtype:$val), + [(set (vectype VECREG:$rT), (vectype xform:$val))]>; + +class ILARegInst: + ILAInst<(outs rclass:$rT), (ins immtype:$val), + [(set rclass:$rT, xform:$val)]>; + +multiclass ImmLoadAddress +{ + def v2i64: ILAVecInst; + def v4i32: ILAVecInst; + + def r64: ILARegInst; + def r32: ILARegInst; + def f32: ILARegInst; + def f64: ILARegInst; + + def hi: ILARegInst; + def lo: ILARegInst; + + def lsa: ILAInst<(outs R32C:$rT), (ins symbolLSA:$val), + [/* no pattern */]>; +} + +defm ILA : ImmLoadAddress; + +// Immediate OR, Halfword Lower: The "other" part of loading large constants +// into 32-bit registers. See the anonymous pattern Pat<(i32 imm:$imm), ...> +// Note that these are really two operand instructions, but they're encoded +// as three operands with the first two arguments tied-to each other. + +class IOHLInst pattern>: + RI16Form<0b100000110, OOL, IOL, "iohl\t$rT, $val", + ImmLoad, pattern>, + RegConstraint<"$rS = $rT">, + NoEncode<"$rS">; + +class IOHLVecInst: + IOHLInst<(outs VECREG:$rT), (ins VECREG:$rS, immtype:$val), + [/* no pattern */]>; + +class IOHLRegInst: + IOHLInst<(outs rclass:$rT), (ins rclass:$rS, immtype:$val), + [/* no pattern */]>; + +multiclass ImmOrHalfwordLower +{ + def v2i64: IOHLVecInst; + def v4i32: IOHLVecInst; + + def r32: IOHLRegInst; + def f32: IOHLRegInst; + + def lo: IOHLRegInst; +} + +defm IOHL: ImmOrHalfwordLower; + +// Form select mask for bytes using immediate, used in conjunction with the +// SELB instruction: + +class FSMBIVec: + RI16Form<0b101001100, (outs VECREG:$rT), (ins u16imm:$val), + "fsmbi\t$rT, $val", + SelectOp, + [(set (vectype VECREG:$rT), (SPUselmask (i16 immU16:$val)))]>; + +multiclass FormSelectMaskBytesImm +{ + def v16i8: FSMBIVec; + def v8i16: FSMBIVec; + def v4i32: FSMBIVec; + def v2i64: FSMBIVec; +} + +defm FSMBI : FormSelectMaskBytesImm; + +// fsmb: Form select mask for bytes. N.B. Input operand, $rA, is 16-bits +class FSMBInst pattern>: + RRForm_1<0b01101101100, OOL, IOL, "fsmb\t$rT, $rA", SelectOp, + pattern>; + +class FSMBRegInst: + FSMBInst<(outs VECREG:$rT), (ins rclass:$rA), + [(set (vectype VECREG:$rT), (SPUselmask rclass:$rA))]>; + +class FSMBVecInst: + FSMBInst<(outs VECREG:$rT), (ins VECREG:$rA), + [(set (vectype VECREG:$rT), + (SPUselmask (vectype VECREG:$rA)))]>; + +multiclass FormSelectMaskBits { + def v16i8_r16: FSMBRegInst; + def v16i8: FSMBVecInst; +} + +defm FSMB: FormSelectMaskBits; + +// fsmh: Form select mask for halfwords. N.B., Input operand, $rA, is +// only 8-bits wide (even though it's input as 16-bits here) + +class FSMHInst pattern>: + RRForm_1<0b10101101100, OOL, IOL, "fsmh\t$rT, $rA", SelectOp, + pattern>; + +class FSMHRegInst: + FSMHInst<(outs VECREG:$rT), (ins rclass:$rA), + [(set (vectype VECREG:$rT), (SPUselmask rclass:$rA))]>; + +class FSMHVecInst: + FSMHInst<(outs VECREG:$rT), (ins VECREG:$rA), + [(set (vectype VECREG:$rT), + (SPUselmask (vectype VECREG:$rA)))]>; + +multiclass FormSelectMaskHalfword { + def v8i16_r16: FSMHRegInst; + def v8i16: FSMHVecInst; +} + +defm FSMH: FormSelectMaskHalfword; + +// fsm: Form select mask for words. Like the other fsm* instructions, +// only the lower 4 bits of $rA are significant. + +class FSMInst pattern>: + RRForm_1<0b00101101100, OOL, IOL, "fsm\t$rT, $rA", SelectOp, + pattern>; + +class FSMRegInst: + FSMInst<(outs VECREG:$rT), (ins rclass:$rA), + [(set (vectype VECREG:$rT), (SPUselmask rclass:$rA))]>; + +class FSMVecInst: + FSMInst<(outs VECREG:$rT), (ins VECREG:$rA), + [(set (vectype VECREG:$rT), (SPUselmask (vectype VECREG:$rA)))]>; + +multiclass FormSelectMaskWord { + def v4i32: FSMVecInst; + + def r32 : FSMRegInst; + def r16 : FSMRegInst; +} + +defm FSM : FormSelectMaskWord; + +// Special case when used for i64 math operations +multiclass FormSelectMaskWord64 { + def r32 : FSMRegInst; + def r16 : FSMRegInst; +} + +defm FSM64 : FormSelectMaskWord64; + +//===----------------------------------------------------------------------===// +// Integer and Logical Operations: +//===----------------------------------------------------------------------===// + +def AHv8i16: + RRForm<0b00010011000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "ah\t$rT, $rA, $rB", IntegerOp, + [(set (v8i16 VECREG:$rT), (int_spu_si_ah VECREG:$rA, VECREG:$rB))]>; + +def : Pat<(add (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)), + (AHv8i16 VECREG:$rA, VECREG:$rB)>; + +def AHr16: + RRForm<0b00010011000, (outs R16C:$rT), (ins R16C:$rA, R16C:$rB), + "ah\t$rT, $rA, $rB", IntegerOp, + [(set R16C:$rT, (add R16C:$rA, R16C:$rB))]>; + +def AHIvec: + RI10Form<0b10111000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), + "ahi\t$rT, $rA, $val", IntegerOp, + [(set (v8i16 VECREG:$rT), (add (v8i16 VECREG:$rA), + v8i16SExt10Imm:$val))]>; + +def AHIr16: + RI10Form<0b10111000, (outs R16C:$rT), (ins R16C:$rA, s10imm:$val), + "ahi\t$rT, $rA, $val", IntegerOp, + [(set R16C:$rT, (add R16C:$rA, i16ImmSExt10:$val))]>; + +// v4i32, i32 add instruction: + +class AInst pattern>: + RRForm<0b00000011000, OOL, IOL, + "a\t$rT, $rA, $rB", IntegerOp, + pattern>; + +class AVecInst: + AInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + [(set (vectype VECREG:$rT), (add (vectype VECREG:$rA), + (vectype VECREG:$rB)))]>; + +class ARegInst: + AInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB), + [(set rclass:$rT, (add rclass:$rA, rclass:$rB))]>; + +multiclass AddInstruction { + def v4i32: AVecInst; + def v16i8: AVecInst; + + def r32: ARegInst; +} + +defm A : AddInstruction; + +class AIInst pattern>: + RI10Form<0b00111000, OOL, IOL, + "ai\t$rT, $rA, $val", IntegerOp, + pattern>; + +class AIVecInst: + AIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), + [(set (vectype VECREG:$rT), (add (vectype VECREG:$rA), immpred:$val))]>; + +class AIFPVecInst: + AIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), + [/* no pattern */]>; + +class AIRegInst: + AIInst<(outs rclass:$rT), (ins rclass:$rA, s10imm_i32:$val), + [(set rclass:$rT, (add rclass:$rA, immpred:$val))]>; + +// This is used to add epsilons to floating point numbers in the f32 fdiv code: +class AIFPInst: + AIInst<(outs rclass:$rT), (ins rclass:$rA, s10imm_i32:$val), + [/* no pattern */]>; + +multiclass AddImmediate { + def v4i32: AIVecInst; + + def r32: AIRegInst; + + def v4f32: AIFPVecInst; + def f32: AIFPInst; +} + +defm AI : AddImmediate; + +def SFHvec: + RRForm<0b00010010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "sfh\t$rT, $rA, $rB", IntegerOp, + [(set (v8i16 VECREG:$rT), (sub (v8i16 VECREG:$rA), + (v8i16 VECREG:$rB)))]>; + +def SFHr16: + RRForm<0b00010010000, (outs R16C:$rT), (ins R16C:$rA, R16C:$rB), + "sfh\t$rT, $rA, $rB", IntegerOp, + [(set R16C:$rT, (sub R16C:$rA, R16C:$rB))]>; + +def SFHIvec: + RI10Form<0b10110000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), + "sfhi\t$rT, $rA, $val", IntegerOp, + [(set (v8i16 VECREG:$rT), (sub v8i16SExt10Imm:$val, + (v8i16 VECREG:$rA)))]>; + +def SFHIr16 : RI10Form<0b10110000, (outs R16C:$rT), (ins R16C:$rA, s10imm:$val), + "sfhi\t$rT, $rA, $val", IntegerOp, + [(set R16C:$rT, (sub i16ImmSExt10:$val, R16C:$rA))]>; + +def SFvec : RRForm<0b00000010000, (outs VECREG:$rT), + (ins VECREG:$rA, VECREG:$rB), + "sf\t$rT, $rA, $rB", IntegerOp, + [(set (v4i32 VECREG:$rT), (sub (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>; + +def SFr32 : RRForm<0b00000010000, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB), + "sf\t$rT, $rA, $rB", IntegerOp, + [(set R32C:$rT, (sub R32C:$rA, R32C:$rB))]>; + +def SFIvec: + RI10Form<0b00110000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), + "sfi\t$rT, $rA, $val", IntegerOp, + [(set (v4i32 VECREG:$rT), (sub v4i32SExt10Imm:$val, + (v4i32 VECREG:$rA)))]>; + +def SFIr32 : RI10Form<0b00110000, (outs R32C:$rT), + (ins R32C:$rA, s10imm_i32:$val), + "sfi\t$rT, $rA, $val", IntegerOp, + [(set R32C:$rT, (sub i32ImmSExt10:$val, R32C:$rA))]>; + +// ADDX: only available in vector form, doesn't match a pattern. +class ADDXInst pattern>: + RRForm<0b00000010110, OOL, IOL, + "addx\t$rT, $rA, $rB", + IntegerOp, pattern>; + +class ADDXVecInst: + ADDXInst<(outs VECREG:$rT), + (ins VECREG:$rA, VECREG:$rB, VECREG:$rCarry), + [/* no pattern */]>, + RegConstraint<"$rCarry = $rT">, + NoEncode<"$rCarry">; + +class ADDXRegInst: + ADDXInst<(outs rclass:$rT), + (ins rclass:$rA, rclass:$rB, rclass:$rCarry), + [/* no pattern */]>, + RegConstraint<"$rCarry = $rT">, + NoEncode<"$rCarry">; + +multiclass AddExtended { + def v2i64 : ADDXVecInst; + def v4i32 : ADDXVecInst; + def r64 : ADDXRegInst; + def r32 : ADDXRegInst; +} + +defm ADDX : AddExtended; + +// CG: Generate carry for add +class CGInst pattern>: + RRForm<0b01000011000, OOL, IOL, + "cg\t$rT, $rA, $rB", + IntegerOp, pattern>; + +class CGVecInst: + CGInst<(outs VECREG:$rT), + (ins VECREG:$rA, VECREG:$rB), + [/* no pattern */]>; + +class CGRegInst: + CGInst<(outs rclass:$rT), + (ins rclass:$rA, rclass:$rB), + [/* no pattern */]>; + +multiclass CarryGenerate { + def v2i64 : CGVecInst; + def v4i32 : CGVecInst; + def r64 : CGRegInst; + def r32 : CGRegInst; +} + +defm CG : CarryGenerate; + +// SFX: Subract from, extended. This is used in conjunction with BG to subtract +// with carry (borrow, in this case) +class SFXInst pattern>: + RRForm<0b10000010110, OOL, IOL, + "sfx\t$rT, $rA, $rB", + IntegerOp, pattern>; + +class SFXVecInst: + SFXInst<(outs VECREG:$rT), + (ins VECREG:$rA, VECREG:$rB, VECREG:$rCarry), + [/* no pattern */]>, + RegConstraint<"$rCarry = $rT">, + NoEncode<"$rCarry">; + +class SFXRegInst: + SFXInst<(outs rclass:$rT), + (ins rclass:$rA, rclass:$rB, rclass:$rCarry), + [/* no pattern */]>, + RegConstraint<"$rCarry = $rT">, + NoEncode<"$rCarry">; + +multiclass SubtractExtended { + def v2i64 : SFXVecInst; + def v4i32 : SFXVecInst; + def r64 : SFXRegInst; + def r32 : SFXRegInst; +} + +defm SFX : SubtractExtended; + +// BG: only available in vector form, doesn't match a pattern. +class BGInst pattern>: + RRForm<0b01000010000, OOL, IOL, + "bg\t$rT, $rA, $rB", + IntegerOp, pattern>; + +class BGVecInst: + BGInst<(outs VECREG:$rT), + (ins VECREG:$rA, VECREG:$rB), + [/* no pattern */]>; + +class BGRegInst: + BGInst<(outs rclass:$rT), + (ins rclass:$rA, rclass:$rB), + [/* no pattern */]>; + +multiclass BorrowGenerate { + def v4i32 : BGVecInst; + def v2i64 : BGVecInst; + def r64 : BGRegInst; + def r32 : BGRegInst; +} + +defm BG : BorrowGenerate; + +// BGX: Borrow generate, extended. +def BGXvec: + RRForm<0b11000010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, + VECREG:$rCarry), + "bgx\t$rT, $rA, $rB", IntegerOp, + []>, + RegConstraint<"$rCarry = $rT">, + NoEncode<"$rCarry">; + +// Halfword multiply variants: +// N.B: These can be used to build up larger quantities (16x16 -> 32) + +def MPYv8i16: + RRForm<0b00100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "mpy\t$rT, $rA, $rB", IntegerMulDiv, + [/* no pattern */]>; + +def MPYr16: + RRForm<0b00100011110, (outs R16C:$rT), (ins R16C:$rA, R16C:$rB), + "mpy\t$rT, $rA, $rB", IntegerMulDiv, + [(set R16C:$rT, (mul R16C:$rA, R16C:$rB))]>; + +// Unsigned 16-bit multiply: + +class MPYUInst pattern>: + RRForm<0b00110011110, OOL, IOL, + "mpyu\t$rT, $rA, $rB", IntegerMulDiv, + pattern>; + +def MPYUv4i32: + MPYUInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + [/* no pattern */]>; + +def MPYUr16: + MPYUInst<(outs R32C:$rT), (ins R16C:$rA, R16C:$rB), + [(set R32C:$rT, (mul (zext R16C:$rA), (zext R16C:$rB)))]>; + +def MPYUr32: + MPYUInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB), + [/* no pattern */]>; + +// mpyi: multiply 16 x s10imm -> 32 result. + +class MPYIInst pattern>: + RI10Form<0b00101110, OOL, IOL, + "mpyi\t$rT, $rA, $val", IntegerMulDiv, + pattern>; + +def MPYIvec: + MPYIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), + [(set (v8i16 VECREG:$rT), + (mul (v8i16 VECREG:$rA), v8i16SExt10Imm:$val))]>; + +def MPYIr16: + MPYIInst<(outs R16C:$rT), (ins R16C:$rA, s10imm:$val), + [(set R16C:$rT, (mul R16C:$rA, i16ImmSExt10:$val))]>; + +// mpyui: same issues as other multiplies, plus, this doesn't match a +// pattern... but may be used during target DAG selection or lowering + +class MPYUIInst pattern>: + RI10Form<0b10101110, OOL, IOL, + "mpyui\t$rT, $rA, $val", IntegerMulDiv, + pattern>; + +def MPYUIvec: + MPYUIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), + []>; + +def MPYUIr16: + MPYUIInst<(outs R16C:$rT), (ins R16C:$rA, s10imm:$val), + []>; + +// mpya: 16 x 16 + 16 -> 32 bit result +class MPYAInst pattern>: + RRRForm<0b0011, OOL, IOL, + "mpya\t$rT, $rA, $rB, $rC", IntegerMulDiv, + pattern>; + +def MPYAv4i32: + MPYAInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), + [(set (v4i32 VECREG:$rT), + (add (v4i32 (bitconvert (mul (v8i16 VECREG:$rA), + (v8i16 VECREG:$rB)))), + (v4i32 VECREG:$rC)))]>; + +def MPYAr32: + MPYAInst<(outs R32C:$rT), (ins R16C:$rA, R16C:$rB, R32C:$rC), + [(set R32C:$rT, (add (sext (mul R16C:$rA, R16C:$rB)), + R32C:$rC))]>; + +def MPYAr32_sext: + MPYAInst<(outs R32C:$rT), (ins R16C:$rA, R16C:$rB, R32C:$rC), + [(set R32C:$rT, (add (mul (sext R16C:$rA), (sext R16C:$rB)), + R32C:$rC))]>; + +def MPYAr32_sextinreg: + MPYAInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB, R32C:$rC), + [(set R32C:$rT, (add (mul (sext_inreg R32C:$rA, i16), + (sext_inreg R32C:$rB, i16)), + R32C:$rC))]>; + +// mpyh: multiply high, used to synthesize 32-bit multiplies +class MPYHInst pattern>: + RRForm<0b10100011110, OOL, IOL, + "mpyh\t$rT, $rA, $rB", IntegerMulDiv, + pattern>; + +def MPYHv4i32: + MPYHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + [/* no pattern */]>; + +def MPYHr32: + MPYHInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB), + [/* no pattern */]>; + +// mpys: multiply high and shift right (returns the top half of +// a 16-bit multiply, sign extended to 32 bits.) + +class MPYSInst: + RRForm<0b11100011110, OOL, IOL, + "mpys\t$rT, $rA, $rB", IntegerMulDiv, + [/* no pattern */]>; + +def MPYSv4i32: + MPYSInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>; + +def MPYSr16: + MPYSInst<(outs R32C:$rT), (ins R16C:$rA, R16C:$rB)>; + +// mpyhh: multiply high-high (returns the 32-bit result from multiplying +// the top 16 bits of the $rA, $rB) + +class MPYHHInst: + RRForm<0b01100011110, OOL, IOL, + "mpyhh\t$rT, $rA, $rB", IntegerMulDiv, + [/* no pattern */]>; + +def MPYHHv8i16: + MPYHHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>; + +def MPYHHr32: + MPYHHInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>; + +// mpyhha: Multiply high-high, add to $rT: + +class MPYHHAInst: + RRForm<0b01100010110, OOL, IOL, + "mpyhha\t$rT, $rA, $rB", IntegerMulDiv, + [/* no pattern */]>; + +def MPYHHAvec: + MPYHHAInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>; + +def MPYHHAr32: + MPYHHAInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>; + +// mpyhhu: Multiply high-high, unsigned, e.g.: +// +// +-------+-------+ +-------+-------+ +---------+ +// | a0 . a1 | x | b0 . b1 | = | a0 x b0 | +// +-------+-------+ +-------+-------+ +---------+ +// +// where a0, b0 are the upper 16 bits of the 32-bit word + +class MPYHHUInst: + RRForm<0b01110011110, OOL, IOL, + "mpyhhu\t$rT, $rA, $rB", IntegerMulDiv, + [/* no pattern */]>; + +def MPYHHUv4i32: + MPYHHUInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>; + +def MPYHHUr32: + MPYHHUInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>; + +// mpyhhau: Multiply high-high, unsigned + +class MPYHHAUInst: + RRForm<0b01110010110, OOL, IOL, + "mpyhhau\t$rT, $rA, $rB", IntegerMulDiv, + [/* no pattern */]>; + +def MPYHHAUvec: + MPYHHAUInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>; + +def MPYHHAUr32: + MPYHHAUInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>; + +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +// clz: Count leading zeroes +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +class CLZInst pattern>: + RRForm_1<0b10100101010, OOL, IOL, "clz\t$rT, $rA", + IntegerOp, pattern>; + +class CLZRegInst: + CLZInst<(outs rclass:$rT), (ins rclass:$rA), + [(set rclass:$rT, (ctlz rclass:$rA))]>; + +class CLZVecInst: + CLZInst<(outs VECREG:$rT), (ins VECREG:$rA), + [(set (vectype VECREG:$rT), (ctlz (vectype VECREG:$rA)))]>; + +multiclass CountLeadingZeroes { + def v4i32 : CLZVecInst; + def r32 : CLZRegInst; +} + +defm CLZ : CountLeadingZeroes; + +// cntb: Count ones in bytes (aka "population count") +// +// NOTE: This instruction is really a vector instruction, but the custom +// lowering code uses it in unorthodox ways to support CTPOP for other +// data types! + +def CNTBv16i8: + RRForm_1<0b00101101010, (outs VECREG:$rT), (ins VECREG:$rA), + "cntb\t$rT, $rA", IntegerOp, + [(set (v16i8 VECREG:$rT), (SPUcntb (v16i8 VECREG:$rA)))]>; + +def CNTBv8i16 : + RRForm_1<0b00101101010, (outs VECREG:$rT), (ins VECREG:$rA), + "cntb\t$rT, $rA", IntegerOp, + [(set (v8i16 VECREG:$rT), (SPUcntb (v8i16 VECREG:$rA)))]>; + +def CNTBv4i32 : + RRForm_1<0b00101101010, (outs VECREG:$rT), (ins VECREG:$rA), + "cntb\t$rT, $rA", IntegerOp, + [(set (v4i32 VECREG:$rT), (SPUcntb (v4i32 VECREG:$rA)))]>; + +// gbb: Gather the low order bits from each byte in $rA into a single 16-bit +// quantity stored into $rT's slot 0, upper 16 bits are zeroed, as are +// slots 1-3. +// +// Note: This instruction "pairs" with the fsmb instruction for all of the +// various types defined here. +// +// Note 2: The "VecInst" and "RegInst" forms refer to the result being either +// a vector or register. + +class GBBInst pattern>: + RRForm_1<0b01001101100, OOL, IOL, "gbb\t$rT, $rA", GatherOp, pattern>; + +class GBBRegInst: + GBBInst<(outs rclass:$rT), (ins VECREG:$rA), + [/* no pattern */]>; + +class GBBVecInst: + GBBInst<(outs VECREG:$rT), (ins VECREG:$rA), + [/* no pattern */]>; + +multiclass GatherBitsFromBytes { + def v16i8_r32: GBBRegInst; + def v16i8_r16: GBBRegInst; + def v16i8: GBBVecInst; +} + +defm GBB: GatherBitsFromBytes; + +// gbh: Gather all low order bits from each halfword in $rA into a single +// 8-bit quantity stored in $rT's slot 0, with the upper bits of $rT set to 0 +// and slots 1-3 also set to 0. +// +// See notes for GBBInst, above. + +class GBHInst pattern>: + RRForm_1<0b10001101100, OOL, IOL, "gbh\t$rT, $rA", GatherOp, + pattern>; + +class GBHRegInst: + GBHInst<(outs rclass:$rT), (ins VECREG:$rA), + [/* no pattern */]>; + +class GBHVecInst: + GBHInst<(outs VECREG:$rT), (ins VECREG:$rA), + [/* no pattern */]>; + +multiclass GatherBitsHalfword { + def v8i16_r32: GBHRegInst; + def v8i16_r16: GBHRegInst; + def v8i16: GBHVecInst; +} + +defm GBH: GatherBitsHalfword; + +// gb: Gather all low order bits from each word in $rA into a single +// 4-bit quantity stored in $rT's slot 0, upper bits in $rT set to 0, +// as well as slots 1-3. +// +// See notes for gbb, above. + +class GBInst pattern>: + RRForm_1<0b00001101100, OOL, IOL, "gb\t$rT, $rA", GatherOp, + pattern>; + +class GBRegInst: + GBInst<(outs rclass:$rT), (ins VECREG:$rA), + [/* no pattern */]>; + +class GBVecInst: + GBInst<(outs VECREG:$rT), (ins VECREG:$rA), + [/* no pattern */]>; + +multiclass GatherBitsWord { + def v4i32_r32: GBRegInst; + def v4i32_r16: GBRegInst; + def v4i32: GBVecInst; +} + +defm GB: GatherBitsWord; + +// avgb: average bytes +def AVGB: + RRForm<0b11001011000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "avgb\t$rT, $rA, $rB", ByteOp, + []>; + +// absdb: absolute difference of bytes +def ABSDB: + RRForm<0b11001010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "absdb\t$rT, $rA, $rB", ByteOp, + []>; + +// sumb: sum bytes into halfwords +def SUMB: + RRForm<0b11001010010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "sumb\t$rT, $rA, $rB", ByteOp, + []>; + +// Sign extension operations: +class XSBHInst pattern>: + RRForm_1<0b01101101010, OOL, IOL, + "xsbh\t$rDst, $rSrc", + IntegerOp, pattern>; + +class XSBHVecInst: + XSBHInst<(outs VECREG:$rDst), (ins VECREG:$rSrc), + [(set (v8i16 VECREG:$rDst), (sext (vectype VECREG:$rSrc)))]>; + +class XSBHInRegInst pattern>: + XSBHInst<(outs rclass:$rDst), (ins rclass:$rSrc), + pattern>; + +multiclass ExtendByteHalfword { + def v16i8: XSBHVecInst; + def r8: XSBHInst<(outs R16C:$rDst), (ins R8C:$rSrc), + [(set R16C:$rDst, (sext R8C:$rSrc))]>; + def r16: XSBHInRegInst; + + // 32-bit form for XSBH: used to sign extend 8-bit quantities to 16-bit + // quantities to 32-bit quantities via a 32-bit register (see the sext 8->32 + // pattern below). Intentionally doesn't match a pattern because we want the + // sext 8->32 pattern to do the work for us, namely because we need the extra + // XSHWr32. + def r32: XSBHInRegInst; + + // Same as the 32-bit version, but for i64 + def r64: XSBHInRegInst; +} + +defm XSBH : ExtendByteHalfword; + +// Sign extend halfwords to words: + +class XSHWInst pattern>: + RRForm_1<0b01101101010, OOL, IOL, "xshw\t$rDest, $rSrc", + IntegerOp, pattern>; + +class XSHWVecInst: + XSHWInst<(outs VECREG:$rDest), (ins VECREG:$rSrc), + [(set (out_vectype VECREG:$rDest), + (sext (in_vectype VECREG:$rSrc)))]>; + +class XSHWInRegInst pattern>: + XSHWInst<(outs rclass:$rDest), (ins rclass:$rSrc), + pattern>; + +class XSHWRegInst: + XSHWInst<(outs rclass:$rDest), (ins R16C:$rSrc), + [(set rclass:$rDest, (sext R16C:$rSrc))]>; + +multiclass ExtendHalfwordWord { + def v4i32: XSHWVecInst; + + def r16: XSHWRegInst; + + def r32: XSHWInRegInst; + def r64: XSHWInRegInst; +} + +defm XSHW : ExtendHalfwordWord; + +// Sign-extend words to doublewords (32->64 bits) + +class XSWDInst pattern>: + RRForm_1<0b01100101010, OOL, IOL, "xswd\t$rDst, $rSrc", + IntegerOp, pattern>; + +class XSWDVecInst: + XSWDInst<(outs VECREG:$rDst), (ins VECREG:$rSrc), + [(set (out_vectype VECREG:$rDst), + (sext (out_vectype VECREG:$rSrc)))]>; + +class XSWDRegInst: + XSWDInst<(outs out_rclass:$rDst), (ins in_rclass:$rSrc), + [(set out_rclass:$rDst, (sext in_rclass:$rSrc))]>; + +multiclass ExtendWordToDoubleWord { + def v2i64: XSWDVecInst; + def r64: XSWDRegInst; + + def r64_inreg: XSWDInst<(outs R64C:$rDst), (ins R64C:$rSrc), + [(set R64C:$rDst, (sext_inreg R64C:$rSrc, i32))]>; +} + +defm XSWD : ExtendWordToDoubleWord; + +// AND operations + +class ANDInst pattern> : + RRForm<0b10000011000, OOL, IOL, "and\t$rT, $rA, $rB", + IntegerOp, pattern>; + +class ANDVecInst: + ANDInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + [(set (vectype VECREG:$rT), (and (vectype VECREG:$rA), + (vectype VECREG:$rB)))]>; + +class ANDRegInst: + ANDInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB), + [(set rclass:$rT, (and rclass:$rA, rclass:$rB))]>; + +multiclass BitwiseAnd +{ + def v16i8: ANDVecInst; + def v8i16: ANDVecInst; + def v4i32: ANDVecInst; + def v2i64: ANDVecInst; + + def r128: ANDRegInst; + def r64: ANDRegInst; + def r32: ANDRegInst; + def r16: ANDRegInst; + def r8: ANDRegInst; + + //===--------------------------------------------- + // Special instructions to perform the fabs instruction + def fabs32: ANDInst<(outs R32FP:$rT), (ins R32FP:$rA, R32C:$rB), + [/* Intentionally does not match a pattern */]>; + + def fabs64: ANDInst<(outs R64FP:$rT), (ins R64FP:$rA, R64C:$rB), + [/* Intentionally does not match a pattern */]>; + + def fabsvec: ANDInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + [/* Intentionally does not match a pattern */]>; + + //===--------------------------------------------- + + // Hacked form of AND to zero-extend 16-bit quantities to 32-bit + // quantities -- see 16->32 zext pattern. + // + // This pattern is somewhat artificial, since it might match some + // compiler generated pattern but it is unlikely to do so. + + def i16i32: ANDInst<(outs R32C:$rT), (ins R16C:$rA, R32C:$rB), + [(set R32C:$rT, (and (zext R16C:$rA), R32C:$rB))]>; +} + +defm AND : BitwiseAnd; + +// N.B.: vnot_conv is one of those special target selection pattern fragments, +// in which we expect there to be a bit_convert on the constant. Bear in mind +// that llvm translates "not " to "xor , -1" (or in this case, a +// constant -1 vector.) + +class ANDCInst pattern>: + RRForm<0b10000011010, OOL, IOL, "andc\t$rT, $rA, $rB", + IntegerOp, pattern>; + +class ANDCVecInst: + ANDCInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + [(set (vectype VECREG:$rT), + (and (vectype VECREG:$rA), + (vnot_frag (vectype VECREG:$rB))))]>; + +class ANDCRegInst: + ANDCInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB), + [(set rclass:$rT, (and rclass:$rA, (not rclass:$rB)))]>; + +multiclass AndComplement +{ + def v16i8: ANDCVecInst; + def v8i16: ANDCVecInst; + def v4i32: ANDCVecInst; + def v2i64: ANDCVecInst; + + def r128: ANDCRegInst; + def r64: ANDCRegInst; + def r32: ANDCRegInst; + def r16: ANDCRegInst; + def r8: ANDCRegInst; + + // Sometimes, the xor pattern has a bitcast constant: + def v16i8_conv: ANDCVecInst; +} + +defm ANDC : AndComplement; + +class ANDBIInst pattern>: + RI10Form<0b01101000, OOL, IOL, "andbi\t$rT, $rA, $val", + ByteOp, pattern>; + +multiclass AndByteImm +{ + def v16i8: ANDBIInst<(outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val), + [(set (v16i8 VECREG:$rT), + (and (v16i8 VECREG:$rA), + (v16i8 v16i8U8Imm:$val)))]>; + + def r8: ANDBIInst<(outs R8C:$rT), (ins R8C:$rA, u10imm_i8:$val), + [(set R8C:$rT, (and R8C:$rA, immU8:$val))]>; +} + +defm ANDBI : AndByteImm; + +class ANDHIInst pattern> : + RI10Form<0b10101000, OOL, IOL, "andhi\t$rT, $rA, $val", + ByteOp, pattern>; + +multiclass AndHalfwordImm +{ + def v8i16: ANDHIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), + [(set (v8i16 VECREG:$rT), + (and (v8i16 VECREG:$rA), v8i16SExt10Imm:$val))]>; + + def r16: ANDHIInst<(outs R16C:$rT), (ins R16C:$rA, u10imm:$val), + [(set R16C:$rT, (and R16C:$rA, i16ImmUns10:$val))]>; + + // Zero-extend i8 to i16: + def i8i16: ANDHIInst<(outs R16C:$rT), (ins R8C:$rA, u10imm:$val), + [(set R16C:$rT, (and (zext R8C:$rA), i16ImmUns10:$val))]>; +} + +defm ANDHI : AndHalfwordImm; + +class ANDIInst pattern> : + RI10Form<0b00101000, OOL, IOL, "andi\t$rT, $rA, $val", + IntegerOp, pattern>; + +multiclass AndWordImm +{ + def v4i32: ANDIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), + [(set (v4i32 VECREG:$rT), + (and (v4i32 VECREG:$rA), v4i32SExt10Imm:$val))]>; + + def r32: ANDIInst<(outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val), + [(set R32C:$rT, (and R32C:$rA, i32ImmSExt10:$val))]>; + + // Hacked form of ANDI to zero-extend i8 quantities to i32. See the zext 8->32 + // pattern below. + def i8i32: ANDIInst<(outs R32C:$rT), (ins R8C:$rA, s10imm_i32:$val), + [(set R32C:$rT, + (and (zext R8C:$rA), i32ImmSExt10:$val))]>; + + // Hacked form of ANDI to zero-extend i16 quantities to i32. See the + // zext 16->32 pattern below. + // + // Note that this pattern is somewhat artificial, since it might match + // something the compiler generates but is unlikely to occur in practice. + def i16i32: ANDIInst<(outs R32C:$rT), (ins R16C:$rA, s10imm_i32:$val), + [(set R32C:$rT, + (and (zext R16C:$rA), i32ImmSExt10:$val))]>; +} + +defm ANDI : AndWordImm; + +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +// Bitwise OR group: +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ + +// Bitwise "or" (N.B.: These are also register-register copy instructions...) +class ORInst pattern>: + RRForm<0b10000010000, OOL, IOL, "or\t$rT, $rA, $rB", + IntegerOp, pattern>; + +class ORVecInst: + ORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + [(set (vectype VECREG:$rT), (or (vectype VECREG:$rA), + (vectype VECREG:$rB)))]>; + +class ORRegInst: + ORInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB), + [(set rclass:$rT, (or rclass:$rA, rclass:$rB))]>; + +// ORCvtForm: OR conversion form +// +// This is used to "convert" the preferred slot to its vector equivalent, as +// well as convert a vector back to its preferred slot. +// +// These are effectively no-ops, but need to exist for proper type conversion +// and type coercion. + +class ORCvtForm pattern = [/* no pattern */]> + : SPUInstr { + bits<7> RA; + bits<7> RT; + + let Pattern = pattern; + + let Inst{0-10} = 0b10000010000; + let Inst{11-17} = RA; + let Inst{18-24} = RA; + let Inst{25-31} = RT; +} + +class ORPromoteScalar: + ORCvtForm<(outs VECREG:$rT), (ins rclass:$rA)>; + +class ORExtractElt: + ORCvtForm<(outs rclass:$rT), (ins VECREG:$rA)>; + +/* class ORCvtRegGPRC: + ORCvtForm<(outs GPRC:$rT), (ins rclass:$rA)>; */ + +/* class ORCvtGPRCReg: + ORCvtForm<(outs rclass:$rT), (ins GPRC:$rA)>; */ + +class ORCvtFormR32Reg pattern = [ ]>: + ORCvtForm<(outs rclass:$rT), (ins R32C:$rA), pattern>; + +class ORCvtFormRegR32 pattern = [ ]>: + ORCvtForm<(outs R32C:$rT), (ins rclass:$rA), pattern>; + +class ORCvtFormR64Reg pattern = [ ]>: + ORCvtForm<(outs rclass:$rT), (ins R64C:$rA), pattern>; + +class ORCvtFormRegR64 pattern = [ ]>: + ORCvtForm<(outs R64C:$rT), (ins rclass:$rA), pattern>; + +class ORCvtGPRCVec: + ORCvtForm<(outs VECREG:$rT), (ins GPRC:$rA)>; + +class ORCvtVecGPRC: + ORCvtForm<(outs GPRC:$rT), (ins VECREG:$rA)>; + +multiclass BitwiseOr +{ + def v16i8: ORVecInst; + def v8i16: ORVecInst; + def v4i32: ORVecInst; + def v2i64: ORVecInst; + + def v4f32: ORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + [(set (v4f32 VECREG:$rT), + (v4f32 (bitconvert (or (v4i32 VECREG:$rA), + (v4i32 VECREG:$rB)))))]>; + + def v2f64: ORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + [(set (v2f64 VECREG:$rT), + (v2f64 (bitconvert (or (v2i64 VECREG:$rA), + (v2i64 VECREG:$rB)))))]>; + + def r128: ORRegInst; + def r64: ORRegInst; + def r32: ORRegInst; + def r16: ORRegInst; + def r8: ORRegInst; + + // OR instructions used to copy f32 and f64 registers. + def f32: ORInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB), + [/* no pattern */]>; + + def f64: ORInst<(outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB), + [/* no pattern */]>; + + // scalar->vector promotion, prefslot2vec: + def v16i8_i8: ORPromoteScalar; + def v8i16_i16: ORPromoteScalar; + def v4i32_i32: ORPromoteScalar; + def v2i64_i64: ORPromoteScalar; + def v4f32_f32: ORPromoteScalar; + def v2f64_f64: ORPromoteScalar; + + // vector->scalar demotion, vec2prefslot: + def i8_v16i8: ORExtractElt; + def i16_v8i16: ORExtractElt; + def i32_v4i32: ORExtractElt; + def i64_v2i64: ORExtractElt; + def f32_v4f32: ORExtractElt; + def f64_v2f64: ORExtractElt; + + // Conversion from vector to GPRC + def i128_vec: ORCvtVecGPRC; + + // Conversion from GPRC to vector + def vec_i128: ORCvtGPRCVec; + +/* + // Conversion from register to GPRC + def i128_r64: ORCvtRegGPRC; + def i128_f64: ORCvtRegGPRC; + def i128_r32: ORCvtRegGPRC; + def i128_f32: ORCvtRegGPRC; + def i128_r16: ORCvtRegGPRC; + def i128_r8: ORCvtRegGPRC; + + // Conversion from GPRC to register + def r64_i128: ORCvtGPRCReg; + def f64_i128: ORCvtGPRCReg; + def r32_i128: ORCvtGPRCReg; + def f32_i128: ORCvtGPRCReg; + def r16_i128: ORCvtGPRCReg; + def r8_i128: ORCvtGPRCReg; +*/ +/* + // Conversion from register to R32C: + def r32_r16: ORCvtFormRegR32; + def r32_r8: ORCvtFormRegR32; + + // Conversion from R32C to register + def r32_r16: ORCvtFormR32Reg; + def r32_r8: ORCvtFormR32Reg; +*/ + + // Conversion from R64C to register: + def r32_r64: ORCvtFormR64Reg; + // def r16_r64: ORCvtFormR64Reg; + // def r8_r64: ORCvtFormR64Reg; + + // Conversion to R64C from register: + def r64_r32: ORCvtFormRegR64; + // def r64_r16: ORCvtFormRegR64; + // def r64_r8: ORCvtFormRegR64; + + // bitconvert patterns: + def r32_f32: ORCvtFormR32Reg; + def f32_r32: ORCvtFormRegR32; + + def r64_f64: ORCvtFormR64Reg; + def f64_r64: ORCvtFormRegR64; +} + +defm OR : BitwiseOr; + +// scalar->vector promotion patterns (preferred slot to vector): +def : Pat<(v16i8 (SPUprefslot2vec R8C:$rA)), + (ORv16i8_i8 R8C:$rA)>; + +def : Pat<(v8i16 (SPUprefslot2vec R16C:$rA)), + (ORv8i16_i16 R16C:$rA)>; + +def : Pat<(v4i32 (SPUprefslot2vec R32C:$rA)), + (ORv4i32_i32 R32C:$rA)>; + +def : Pat<(v2i64 (SPUprefslot2vec R64C:$rA)), + (ORv2i64_i64 R64C:$rA)>; + +def : Pat<(v4f32 (SPUprefslot2vec R32FP:$rA)), + (ORv4f32_f32 R32FP:$rA)>; + +def : Pat<(v2f64 (SPUprefslot2vec R64FP:$rA)), + (ORv2f64_f64 R64FP:$rA)>; + +// ORi*_v*: Used to extract vector element 0 (the preferred slot), otherwise +// known as converting the vector back to its preferred slot + +def : Pat<(SPUvec2prefslot (v16i8 VECREG:$rA)), + (ORi8_v16i8 VECREG:$rA)>; + +def : Pat<(SPUvec2prefslot (v8i16 VECREG:$rA)), + (ORi16_v8i16 VECREG:$rA)>; + +def : Pat<(SPUvec2prefslot (v4i32 VECREG:$rA)), + (ORi32_v4i32 VECREG:$rA)>; + +def : Pat<(SPUvec2prefslot (v2i64 VECREG:$rA)), + (ORi64_v2i64 VECREG:$rA)>; + +def : Pat<(SPUvec2prefslot (v4f32 VECREG:$rA)), + (ORf32_v4f32 VECREG:$rA)>; + +def : Pat<(SPUvec2prefslot (v2f64 VECREG:$rA)), + (ORf64_v2f64 VECREG:$rA)>; + +// Load Register: This is an assembler alias for a bitwise OR of a register +// against itself. It's here because it brings some clarity to assembly +// language output. + +let hasCtrlDep = 1 in { + class LRInst + : SPUInstr { + bits<7> RA; + bits<7> RT; + + let Pattern = [/*no pattern*/]; + + let Inst{0-10} = 0b10000010000; /* It's an OR operation */ + let Inst{11-17} = RA; + let Inst{18-24} = RA; + let Inst{25-31} = RT; + } + + class LRVecInst: + LRInst<(outs VECREG:$rT), (ins VECREG:$rA)>; + + class LRRegInst: + LRInst<(outs rclass:$rT), (ins rclass:$rA)>; + + multiclass LoadRegister { + def v2i64: LRVecInst; + def v2f64: LRVecInst; + def v4i32: LRVecInst; + def v4f32: LRVecInst; + def v8i16: LRVecInst; + def v16i8: LRVecInst; + + def r128: LRRegInst; + def r64: LRRegInst; + def f64: LRRegInst; + def r32: LRRegInst; + def f32: LRRegInst; + def r16: LRRegInst; + def r8: LRRegInst; + } + + defm LR: LoadRegister; +} + +// ORC: Bitwise "or" with complement (c = a | ~b) + +class ORCInst pattern>: + RRForm<0b10010010000, OOL, IOL, "orc\t$rT, $rA, $rB", + IntegerOp, pattern>; + +class ORCVecInst: + ORCInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + [(set (vectype VECREG:$rT), (or (vectype VECREG:$rA), + (vnot (vectype VECREG:$rB))))]>; + +class ORCRegInst: + ORCInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB), + [(set rclass:$rT, (or rclass:$rA, (not rclass:$rB)))]>; + +multiclass BitwiseOrComplement +{ + def v16i8: ORCVecInst; + def v8i16: ORCVecInst; + def v4i32: ORCVecInst; + def v2i64: ORCVecInst; + + def r128: ORCRegInst; + def r64: ORCRegInst; + def r32: ORCRegInst; + def r16: ORCRegInst; + def r8: ORCRegInst; +} + +defm ORC : BitwiseOrComplement; + +// OR byte immediate +class ORBIInst pattern>: + RI10Form<0b01100000, OOL, IOL, "orbi\t$rT, $rA, $val", + IntegerOp, pattern>; + +class ORBIVecInst: + ORBIInst<(outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val), + [(set (v16i8 VECREG:$rT), (or (vectype VECREG:$rA), + (vectype immpred:$val)))]>; + +multiclass BitwiseOrByteImm +{ + def v16i8: ORBIVecInst; + + def r8: ORBIInst<(outs R8C:$rT), (ins R8C:$rA, u10imm_i8:$val), + [(set R8C:$rT, (or R8C:$rA, immU8:$val))]>; +} + +defm ORBI : BitwiseOrByteImm; + +// OR halfword immediate +class ORHIInst pattern>: + RI10Form<0b10100000, OOL, IOL, "orhi\t$rT, $rA, $val", + IntegerOp, pattern>; + +class ORHIVecInst: + ORHIInst<(outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val), + [(set (vectype VECREG:$rT), (or (vectype VECREG:$rA), + immpred:$val))]>; + +multiclass BitwiseOrHalfwordImm +{ + def v8i16: ORHIVecInst; + + def r16: ORHIInst<(outs R16C:$rT), (ins R16C:$rA, u10imm:$val), + [(set R16C:$rT, (or R16C:$rA, i16ImmUns10:$val))]>; + + // Specialized ORHI form used to promote 8-bit registers to 16-bit + def i8i16: ORHIInst<(outs R16C:$rT), (ins R8C:$rA, s10imm:$val), + [(set R16C:$rT, (or (anyext R8C:$rA), + i16ImmSExt10:$val))]>; +} + +defm ORHI : BitwiseOrHalfwordImm; + +class ORIInst pattern>: + RI10Form<0b00100000, OOL, IOL, "ori\t$rT, $rA, $val", + IntegerOp, pattern>; + +class ORIVecInst: + ORIInst<(outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val), + [(set (vectype VECREG:$rT), (or (vectype VECREG:$rA), + immpred:$val))]>; + +// Bitwise "or" with immediate +multiclass BitwiseOrImm +{ + def v4i32: ORIVecInst; + + def r32: ORIInst<(outs R32C:$rT), (ins R32C:$rA, u10imm_i32:$val), + [(set R32C:$rT, (or R32C:$rA, i32ImmUns10:$val))]>; + + // i16i32: hacked version of the ori instruction to extend 16-bit quantities + // to 32-bit quantities. used exclusively to match "anyext" conversions (vide + // infra "anyext 16->32" pattern.) + def i16i32: ORIInst<(outs R32C:$rT), (ins R16C:$rA, s10imm_i32:$val), + [(set R32C:$rT, (or (anyext R16C:$rA), + i32ImmSExt10:$val))]>; + + // i8i32: Hacked version of the ORI instruction to extend 16-bit quantities + // to 32-bit quantities. Used exclusively to match "anyext" conversions (vide + // infra "anyext 16->32" pattern.) + def i8i32: ORIInst<(outs R32C:$rT), (ins R8C:$rA, s10imm_i32:$val), + [(set R32C:$rT, (or (anyext R8C:$rA), + i32ImmSExt10:$val))]>; +} + +defm ORI : BitwiseOrImm; + +// ORX: "or" across the vector: or's $rA's word slots leaving the result in +// $rT[0], slots 1-3 are zeroed. +// +// FIXME: Needs to match an intrinsic pattern. +def ORXv4i32: + RRForm<0b10010010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "orx\t$rT, $rA, $rB", IntegerOp, + []>; + +// XOR: + +class XORInst pattern> : + RRForm<0b10010010000, OOL, IOL, "xor\t$rT, $rA, $rB", + IntegerOp, pattern>; + +class XORVecInst: + XORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + [(set (vectype VECREG:$rT), (xor (vectype VECREG:$rA), + (vectype VECREG:$rB)))]>; + +class XORRegInst: + XORInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB), + [(set rclass:$rT, (xor rclass:$rA, rclass:$rB))]>; + +multiclass BitwiseExclusiveOr +{ + def v16i8: XORVecInst; + def v8i16: XORVecInst; + def v4i32: XORVecInst; + def v2i64: XORVecInst; + + def r128: XORRegInst; + def r64: XORRegInst; + def r32: XORRegInst; + def r16: XORRegInst; + def r8: XORRegInst; + + // XOR instructions used to negate f32 and f64 quantities. + + def fneg32: XORInst<(outs R32FP:$rT), (ins R32FP:$rA, R32C:$rB), + [/* no pattern */]>; + + def fneg64: XORInst<(outs R64FP:$rT), (ins R64FP:$rA, R64C:$rB), + [/* no pattern */]>; + + def fnegvec: XORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + [/* no pattern, see fneg{32,64} */]>; +} + +defm XOR : BitwiseExclusiveOr; + +//==---------------------------------------------------------- + +class XORBIInst pattern>: + RI10Form<0b01100000, OOL, IOL, "xorbi\t$rT, $rA, $val", + IntegerOp, pattern>; + +multiclass XorByteImm +{ + def v16i8: + XORBIInst<(outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val), + [(set (v16i8 VECREG:$rT), (xor (v16i8 VECREG:$rA), v16i8U8Imm:$val))]>; + + def r8: + XORBIInst<(outs R8C:$rT), (ins R8C:$rA, u10imm_i8:$val), + [(set R8C:$rT, (xor R8C:$rA, immU8:$val))]>; +} + +defm XORBI : XorByteImm; + +def XORHIv8i16: + RI10Form<0b10100000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val), + "xorhi\t$rT, $rA, $val", IntegerOp, + [(set (v8i16 VECREG:$rT), (xor (v8i16 VECREG:$rA), + v8i16SExt10Imm:$val))]>; + +def XORHIr16: + RI10Form<0b10100000, (outs R16C:$rT), (ins R16C:$rA, s10imm:$val), + "xorhi\t$rT, $rA, $val", IntegerOp, + [(set R16C:$rT, (xor R16C:$rA, i16ImmSExt10:$val))]>; + +def XORIv4i32: + RI10Form<0b00100000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm_i32:$val), + "xori\t$rT, $rA, $val", IntegerOp, + [(set (v4i32 VECREG:$rT), (xor (v4i32 VECREG:$rA), + v4i32SExt10Imm:$val))]>; + +def XORIr32: + RI10Form<0b00100000, (outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val), + "xori\t$rT, $rA, $val", IntegerOp, + [(set R32C:$rT, (xor R32C:$rA, i32ImmSExt10:$val))]>; + +// NAND: + +class NANDInst pattern>: + RRForm<0b10010011000, OOL, IOL, "nand\t$rT, $rA, $rB", + IntegerOp, pattern>; + +class NANDVecInst: + NANDInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + [(set (vectype VECREG:$rT), (vnot (and (vectype VECREG:$rA), + (vectype VECREG:$rB))))]>; +class NANDRegInst: + NANDInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB), + [(set rclass:$rT, (not (and rclass:$rA, rclass:$rB)))]>; + +multiclass BitwiseNand +{ + def v16i8: NANDVecInst; + def v8i16: NANDVecInst; + def v4i32: NANDVecInst; + def v2i64: NANDVecInst; + + def r128: NANDRegInst; + def r64: NANDRegInst; + def r32: NANDRegInst; + def r16: NANDRegInst; + def r8: NANDRegInst; +} + +defm NAND : BitwiseNand; + +// NOR: + +class NORInst pattern>: + RRForm<0b10010010000, OOL, IOL, "nor\t$rT, $rA, $rB", + IntegerOp, pattern>; + +class NORVecInst: + NORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + [(set (vectype VECREG:$rT), (vnot (or (vectype VECREG:$rA), + (vectype VECREG:$rB))))]>; +class NORRegInst: + NORInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB), + [(set rclass:$rT, (not (or rclass:$rA, rclass:$rB)))]>; + +multiclass BitwiseNor +{ + def v16i8: NORVecInst; + def v8i16: NORVecInst; + def v4i32: NORVecInst; + def v2i64: NORVecInst; + + def r128: NORRegInst; + def r64: NORRegInst; + def r32: NORRegInst; + def r16: NORRegInst; + def r8: NORRegInst; +} + +defm NOR : BitwiseNor; + +// Select bits: +class SELBInst pattern>: + RRRForm<0b1000, OOL, IOL, "selb\t$rT, $rA, $rB, $rC", + IntegerOp, pattern>; + +class SELBVecInst: + SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), + [(set (vectype VECREG:$rT), + (or (and (vectype VECREG:$rC), (vectype VECREG:$rB)), + (and (vnot_frag (vectype VECREG:$rC)), + (vectype VECREG:$rA))))]>; + +class SELBVecVCondInst: + SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), + [(set (vectype VECREG:$rT), + (select (vectype VECREG:$rC), + (vectype VECREG:$rB), + (vectype VECREG:$rA)))]>; + +class SELBVecCondInst: + SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, R32C:$rC), + [(set (vectype VECREG:$rT), + (select R32C:$rC, + (vectype VECREG:$rB), + (vectype VECREG:$rA)))]>; + +class SELBRegInst: + SELBInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB, rclass:$rC), + [(set rclass:$rT, + (or (and rclass:$rB, rclass:$rC), + (and rclass:$rA, (not rclass:$rC))))]>; + +class SELBRegCondInst: + SELBInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB, rcond:$rC), + [(set rclass:$rT, + (select rcond:$rC, rclass:$rB, rclass:$rA))]>; + +multiclass SelectBits +{ + def v16i8: SELBVecInst; + def v8i16: SELBVecInst; + def v4i32: SELBVecInst; + def v2i64: SELBVecInst; + + def r128: SELBRegInst; + def r64: SELBRegInst; + def r32: SELBRegInst; + def r16: SELBRegInst; + def r8: SELBRegInst; + + def v16i8_cond: SELBVecCondInst; + def v8i16_cond: SELBVecCondInst; + def v4i32_cond: SELBVecCondInst; + def v2i64_cond: SELBVecCondInst; + + def v16i8_vcond: SELBVecCondInst; + def v8i16_vcond: SELBVecCondInst; + def v4i32_vcond: SELBVecCondInst; + def v2i64_vcond: SELBVecCondInst; + + def v4f32_cond: + SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), + [(set (v4f32 VECREG:$rT), + (select (v4i32 VECREG:$rC), + (v4f32 VECREG:$rB), + (v4f32 VECREG:$rA)))]>; + + // SELBr64_cond is defined in SPU64InstrInfo.td + def r32_cond: SELBRegCondInst; + def f32_cond: SELBRegCondInst; + def r16_cond: SELBRegCondInst; + def r8_cond: SELBRegCondInst; +} + +defm SELB : SelectBits; + +class SPUselbPatVec: + Pat<(SPUselb (vectype VECREG:$rA), (vectype VECREG:$rB), (vectype VECREG:$rC)), + (inst VECREG:$rA, VECREG:$rB, VECREG:$rC)>; + +def : SPUselbPatVec; +def : SPUselbPatVec; +def : SPUselbPatVec; +def : SPUselbPatVec; + +class SPUselbPatReg: + Pat<(SPUselb rclass:$rA, rclass:$rB, rclass:$rC), + (inst rclass:$rA, rclass:$rB, rclass:$rC)>; + +def : SPUselbPatReg; +def : SPUselbPatReg; +def : SPUselbPatReg; +def : SPUselbPatReg; + +// EQV: Equivalence (1 for each same bit, otherwise 0) +// +// Note: There are a lot of ways to match this bit operator and these patterns +// attempt to be as exhaustive as possible. + +class EQVInst pattern>: + RRForm<0b10010010000, OOL, IOL, "eqv\t$rT, $rA, $rB", + IntegerOp, pattern>; + +class EQVVecInst: + EQVInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + [(set (vectype VECREG:$rT), + (or (and (vectype VECREG:$rA), (vectype VECREG:$rB)), + (and (vnot (vectype VECREG:$rA)), + (vnot (vectype VECREG:$rB)))))]>; + +class EQVRegInst: + EQVInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB), + [(set rclass:$rT, (or (and rclass:$rA, rclass:$rB), + (and (not rclass:$rA), (not rclass:$rB))))]>; + +class EQVVecPattern1: + EQVInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + [(set (vectype VECREG:$rT), + (xor (vectype VECREG:$rA), (vnot (vectype VECREG:$rB))))]>; + +class EQVRegPattern1: + EQVInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB), + [(set rclass:$rT, (xor rclass:$rA, (not rclass:$rB)))]>; + +class EQVVecPattern2: + EQVInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + [(set (vectype VECREG:$rT), + (or (and (vectype VECREG:$rA), (vectype VECREG:$rB)), + (vnot (or (vectype VECREG:$rA), (vectype VECREG:$rB)))))]>; + +class EQVRegPattern2: + EQVInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB), + [(set rclass:$rT, + (or (and rclass:$rA, rclass:$rB), + (not (or rclass:$rA, rclass:$rB))))]>; + +class EQVVecPattern3: + EQVInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + [(set (vectype VECREG:$rT), + (not (xor (vectype VECREG:$rA), (vectype VECREG:$rB))))]>; + +class EQVRegPattern3: + EQVInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB), + [(set rclass:$rT, (not (xor rclass:$rA, rclass:$rB)))]>; + +multiclass BitEquivalence +{ + def v16i8: EQVVecInst; + def v8i16: EQVVecInst; + def v4i32: EQVVecInst; + def v2i64: EQVVecInst; + + def v16i8_1: EQVVecPattern1; + def v8i16_1: EQVVecPattern1; + def v4i32_1: EQVVecPattern1; + def v2i64_1: EQVVecPattern1; + + def v16i8_2: EQVVecPattern2; + def v8i16_2: EQVVecPattern2; + def v4i32_2: EQVVecPattern2; + def v2i64_2: EQVVecPattern2; + + def v16i8_3: EQVVecPattern3; + def v8i16_3: EQVVecPattern3; + def v4i32_3: EQVVecPattern3; + def v2i64_3: EQVVecPattern3; + + def r128: EQVRegInst; + def r64: EQVRegInst; + def r32: EQVRegInst; + def r16: EQVRegInst; + def r8: EQVRegInst; + + def r128_1: EQVRegPattern1; + def r64_1: EQVRegPattern1; + def r32_1: EQVRegPattern1; + def r16_1: EQVRegPattern1; + def r8_1: EQVRegPattern1; + + def r128_2: EQVRegPattern2; + def r64_2: EQVRegPattern2; + def r32_2: EQVRegPattern2; + def r16_2: EQVRegPattern2; + def r8_2: EQVRegPattern2; + + def r128_3: EQVRegPattern3; + def r64_3: EQVRegPattern3; + def r32_3: EQVRegPattern3; + def r16_3: EQVRegPattern3; + def r8_3: EQVRegPattern3; +} + +defm EQV: BitEquivalence; + +//===----------------------------------------------------------------------===// +// Vector shuffle... +//===----------------------------------------------------------------------===// +// SPUshuffle is generated in LowerVECTOR_SHUFFLE and gets replaced with SHUFB. +// See the SPUshuffle SDNode operand above, which sets up the DAG pattern +// matcher to emit something when the LowerVECTOR_SHUFFLE generates a node with +// the SPUISD::SHUFB opcode. +//===----------------------------------------------------------------------===// + +class SHUFBInst pattern>: + RRRForm<0b1000, OOL, IOL, "shufb\t$rT, $rA, $rB, $rC", + IntegerOp, pattern>; + +class SHUFBVecInst: + SHUFBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), + [(set (resultvec VECREG:$rT), + (SPUshuffle (resultvec VECREG:$rA), + (resultvec VECREG:$rB), + (maskvec VECREG:$rC)))]>; + +class SHUFBGPRCInst: + SHUFBInst<(outs VECREG:$rT), (ins GPRC:$rA, GPRC:$rB, VECREG:$rC), + [/* no pattern */]>; + +multiclass ShuffleBytes +{ + def v16i8 : SHUFBVecInst; + def v16i8_m32 : SHUFBVecInst; + def v8i16 : SHUFBVecInst; + def v8i16_m32 : SHUFBVecInst; + def v4i32 : SHUFBVecInst; + def v4i32_m32 : SHUFBVecInst; + def v2i64 : SHUFBVecInst; + def v2i64_m32 : SHUFBVecInst; + + def v4f32 : SHUFBVecInst; + def v4f32_m32 : SHUFBVecInst; + + def v2f64 : SHUFBVecInst; + def v2f64_m32 : SHUFBVecInst; + + def gprc : SHUFBGPRCInst; +} + +defm SHUFB : ShuffleBytes; + +//===----------------------------------------------------------------------===// +// Shift and rotate group: +//===----------------------------------------------------------------------===// + +class SHLHInst pattern>: + RRForm<0b11111010000, OOL, IOL, "shlh\t$rT, $rA, $rB", + RotateShift, pattern>; + +class SHLHVecInst: + SHLHInst<(outs VECREG:$rT), (ins VECREG:$rA, R16C:$rB), + [(set (vectype VECREG:$rT), + (SPUvec_shl (vectype VECREG:$rA), R16C:$rB))]>; + +multiclass ShiftLeftHalfword +{ + def v8i16: SHLHVecInst; + def r16: SHLHInst<(outs R16C:$rT), (ins R16C:$rA, R16C:$rB), + [(set R16C:$rT, (shl R16C:$rA, R16C:$rB))]>; + def r16_r32: SHLHInst<(outs R16C:$rT), (ins R16C:$rA, R32C:$rB), + [(set R16C:$rT, (shl R16C:$rA, R32C:$rB))]>; +} + +defm SHLH : ShiftLeftHalfword; + +//===----------------------------------------------------------------------===// + +class SHLHIInst pattern>: + RI7Form<0b11111010000, OOL, IOL, "shlhi\t$rT, $rA, $val", + RotateShift, pattern>; + +class SHLHIVecInst: + SHLHIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm:$val), + [(set (vectype VECREG:$rT), + (SPUvec_shl (vectype VECREG:$rA), (i16 uimm7:$val)))]>; + +multiclass ShiftLeftHalfwordImm +{ + def v8i16: SHLHIVecInst; + def r16: SHLHIInst<(outs R16C:$rT), (ins R16C:$rA, u7imm:$val), + [(set R16C:$rT, (shl R16C:$rA, (i16 uimm7:$val)))]>; +} + +defm SHLHI : ShiftLeftHalfwordImm; + +def : Pat<(SPUvec_shl (v8i16 VECREG:$rA), (i32 uimm7:$val)), + (SHLHIv8i16 VECREG:$rA, uimm7:$val)>; + +def : Pat<(shl R16C:$rA, (i32 uimm7:$val)), + (SHLHIr16 R16C:$rA, uimm7:$val)>; + +//===----------------------------------------------------------------------===// + +class SHLInst pattern>: + RRForm<0b11111010000, OOL, IOL, "shl\t$rT, $rA, $rB", + RotateShift, pattern>; + +multiclass ShiftLeftWord +{ + def v4i32: + SHLInst<(outs VECREG:$rT), (ins VECREG:$rA, R16C:$rB), + [(set (v4i32 VECREG:$rT), + (SPUvec_shl (v4i32 VECREG:$rA), R16C:$rB))]>; + def r32: + SHLInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB), + [(set R32C:$rT, (shl R32C:$rA, R32C:$rB))]>; +} + +defm SHL: ShiftLeftWord; + +//===----------------------------------------------------------------------===// + +class SHLIInst pattern>: + RI7Form<0b11111010000, OOL, IOL, "shli\t$rT, $rA, $val", + RotateShift, pattern>; + +multiclass ShiftLeftWordImm +{ + def v4i32: + SHLIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm_i32:$val), + [(set (v4i32 VECREG:$rT), + (SPUvec_shl (v4i32 VECREG:$rA), (i32 uimm7:$val)))]>; + + def r32: + SHLIInst<(outs R32C:$rT), (ins R32C:$rA, u7imm_i32:$val), + [(set R32C:$rT, (shl R32C:$rA, (i32 uimm7:$val)))]>; +} + +defm SHLI : ShiftLeftWordImm; + +//===----------------------------------------------------------------------===// +// SHLQBI vec form: Note that this will shift the entire vector (the 128-bit +// register) to the left. Vector form is here to ensure type correctness. +// +// The shift count is in the lowest 3 bits (29-31) of $rB, so only a bit shift +// of 7 bits is actually possible. +// +// Note also that SHLQBI/SHLQBII are used in conjunction with SHLQBY/SHLQBYI +// to shift i64 and i128. SHLQBI is the residual left over after shifting by +// bytes with SHLQBY. + +class SHLQBIInst pattern>: + RRForm<0b11011011100, OOL, IOL, "shlqbi\t$rT, $rA, $rB", + RotateShift, pattern>; + +class SHLQBIVecInst: + SHLQBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB), + [(set (vectype VECREG:$rT), + (SPUshlquad_l_bits (vectype VECREG:$rA), R32C:$rB))]>; + +class SHLQBIRegInst: + SHLQBIInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB), + [/* no pattern */]>; + +multiclass ShiftLeftQuadByBits +{ + def v16i8: SHLQBIVecInst; + def v8i16: SHLQBIVecInst; + def v4i32: SHLQBIVecInst; + def v4f32: SHLQBIVecInst; + def v2i64: SHLQBIVecInst; + def v2f64: SHLQBIVecInst; + + def r128: SHLQBIRegInst; +} + +defm SHLQBI : ShiftLeftQuadByBits; + +// See note above on SHLQBI. In this case, the predicate actually does then +// enforcement, whereas with SHLQBI, we have to "take it on faith." +class SHLQBIIInst pattern>: + RI7Form<0b11011111100, OOL, IOL, "shlqbii\t$rT, $rA, $val", + RotateShift, pattern>; + +class SHLQBIIVecInst: + SHLQBIIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm_i32:$val), + [(set (vectype VECREG:$rT), + (SPUshlquad_l_bits (vectype VECREG:$rA), (i32 bitshift:$val)))]>; + +multiclass ShiftLeftQuadByBitsImm +{ + def v16i8 : SHLQBIIVecInst; + def v8i16 : SHLQBIIVecInst; + def v4i32 : SHLQBIIVecInst; + def v4f32 : SHLQBIIVecInst; + def v2i64 : SHLQBIIVecInst; + def v2f64 : SHLQBIIVecInst; +} + +defm SHLQBII : ShiftLeftQuadByBitsImm; + +// SHLQBY, SHLQBYI vector forms: Shift the entire vector to the left by bytes, +// not by bits. See notes above on SHLQBI. + +class SHLQBYInst pattern>: + RI7Form<0b11111011100, OOL, IOL, "shlqby\t$rT, $rA, $rB", + RotateShift, pattern>; + +class SHLQBYVecInst: + SHLQBYInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB), + [(set (vectype VECREG:$rT), + (SPUshlquad_l_bytes (vectype VECREG:$rA), R32C:$rB))]>; + +multiclass ShiftLeftQuadBytes +{ + def v16i8: SHLQBYVecInst; + def v8i16: SHLQBYVecInst; + def v4i32: SHLQBYVecInst; + def v4f32: SHLQBYVecInst; + def v2i64: SHLQBYVecInst; + def v2f64: SHLQBYVecInst; + def r128: SHLQBYInst<(outs GPRC:$rT), (ins GPRC:$rA, R32C:$rB), + [(set GPRC:$rT, (SPUshlquad_l_bytes GPRC:$rA, R32C:$rB))]>; +} + +defm SHLQBY: ShiftLeftQuadBytes; + +class SHLQBYIInst pattern>: + RI7Form<0b11111111100, OOL, IOL, "shlqbyi\t$rT, $rA, $val", + RotateShift, pattern>; + +class SHLQBYIVecInst: + SHLQBYIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm_i32:$val), + [(set (vectype VECREG:$rT), + (SPUshlquad_l_bytes (vectype VECREG:$rA), (i32 uimm7:$val)))]>; + +multiclass ShiftLeftQuadBytesImm +{ + def v16i8: SHLQBYIVecInst; + def v8i16: SHLQBYIVecInst; + def v4i32: SHLQBYIVecInst; + def v4f32: SHLQBYIVecInst; + def v2i64: SHLQBYIVecInst; + def v2f64: SHLQBYIVecInst; + def r128: SHLQBYIInst<(outs GPRC:$rT), (ins GPRC:$rA, u7imm_i32:$val), + [(set GPRC:$rT, + (SPUshlquad_l_bytes GPRC:$rA, (i32 uimm7:$val)))]>; +} + +defm SHLQBYI : ShiftLeftQuadBytesImm; + +class SHLQBYBIInst pattern>: + RRForm<0b00111001111, OOL, IOL, "shlqbybi\t$rT, $rA, $rB", + RotateShift, pattern>; + +class SHLQBYBIVecInst: + SHLQBYBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB), + [/* no pattern */]>; + +class SHLQBYBIRegInst: + SHLQBYBIInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB), + [/* no pattern */]>; + +multiclass ShiftLeftQuadBytesBitCount +{ + def v16i8: SHLQBYBIVecInst; + def v8i16: SHLQBYBIVecInst; + def v4i32: SHLQBYBIVecInst; + def v4f32: SHLQBYBIVecInst; + def v2i64: SHLQBYBIVecInst; + def v2f64: SHLQBYBIVecInst; + + def r128: SHLQBYBIRegInst; +} + +defm SHLQBYBI : ShiftLeftQuadBytesBitCount; + +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +// Rotate halfword: +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +class ROTHInst pattern>: + RRForm<0b00111010000, OOL, IOL, "roth\t$rT, $rA, $rB", + RotateShift, pattern>; + +class ROTHVecInst: + ROTHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + [(set (vectype VECREG:$rT), + (SPUvec_rotl VECREG:$rA, VECREG:$rB))]>; + +class ROTHRegInst: + ROTHInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB), + [(set rclass:$rT, (rotl rclass:$rA, rclass:$rB))]>; + +multiclass RotateLeftHalfword +{ + def v8i16: ROTHVecInst; + def r16: ROTHRegInst; +} + +defm ROTH: RotateLeftHalfword; + +def ROTHr16_r32: ROTHInst<(outs R16C:$rT), (ins R16C:$rA, R32C:$rB), + [(set R16C:$rT, (rotl R16C:$rA, R32C:$rB))]>; + +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +// Rotate halfword, immediate: +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +class ROTHIInst pattern>: + RI7Form<0b00111110000, OOL, IOL, "rothi\t$rT, $rA, $val", + RotateShift, pattern>; + +class ROTHIVecInst: + ROTHIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm:$val), + [(set (vectype VECREG:$rT), + (SPUvec_rotl VECREG:$rA, (i16 uimm7:$val)))]>; + +multiclass RotateLeftHalfwordImm +{ + def v8i16: ROTHIVecInst; + def r16: ROTHIInst<(outs R16C:$rT), (ins R16C:$rA, u7imm:$val), + [(set R16C:$rT, (rotl R16C:$rA, (i16 uimm7:$val)))]>; + def r16_r32: ROTHIInst<(outs R16C:$rT), (ins R16C:$rA, u7imm_i32:$val), + [(set R16C:$rT, (rotl R16C:$rA, (i32 uimm7:$val)))]>; +} + +defm ROTHI: RotateLeftHalfwordImm; + +def : Pat<(SPUvec_rotl VECREG:$rA, (i32 uimm7:$val)), + (ROTHIv8i16 VECREG:$rA, imm:$val)>; + +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +// Rotate word: +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ + +class ROTInst pattern>: + RRForm<0b00011010000, OOL, IOL, "rot\t$rT, $rA, $rB", + RotateShift, pattern>; + +class ROTVecInst: + ROTInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB), + [(set (vectype VECREG:$rT), + (SPUvec_rotl (vectype VECREG:$rA), R32C:$rB))]>; + +class ROTRegInst: + ROTInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB), + [(set rclass:$rT, + (rotl rclass:$rA, R32C:$rB))]>; + +multiclass RotateLeftWord +{ + def v4i32: ROTVecInst; + def r32: ROTRegInst; +} + +defm ROT: RotateLeftWord; + +// The rotate amount is in the same bits whether we've got an 8-bit, 16-bit or +// 32-bit register +def ROTr32_r16_anyext: + ROTInst<(outs R32C:$rT), (ins R32C:$rA, R16C:$rB), + [(set R32C:$rT, (rotl R32C:$rA, (i32 (anyext R16C:$rB))))]>; + +def : Pat<(rotl R32C:$rA, (i32 (zext R16C:$rB))), + (ROTr32_r16_anyext R32C:$rA, R16C:$rB)>; + +def : Pat<(rotl R32C:$rA, (i32 (sext R16C:$rB))), + (ROTr32_r16_anyext R32C:$rA, R16C:$rB)>; + +def ROTr32_r8_anyext: + ROTInst<(outs R32C:$rT), (ins R32C:$rA, R8C:$rB), + [(set R32C:$rT, (rotl R32C:$rA, (i32 (anyext R8C:$rB))))]>; + +def : Pat<(rotl R32C:$rA, (i32 (zext R8C:$rB))), + (ROTr32_r8_anyext R32C:$rA, R8C:$rB)>; + +def : Pat<(rotl R32C:$rA, (i32 (sext R8C:$rB))), + (ROTr32_r8_anyext R32C:$rA, R8C:$rB)>; + +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +// Rotate word, immediate +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ + +class ROTIInst pattern>: + RI7Form<0b00011110000, OOL, IOL, "roti\t$rT, $rA, $val", + RotateShift, pattern>; + +class ROTIVecInst: + ROTIInst<(outs VECREG:$rT), (ins VECREG:$rA, optype:$val), + [(set (vectype VECREG:$rT), + (SPUvec_rotl (vectype VECREG:$rA), (inttype pred:$val)))]>; + +class ROTIRegInst: + ROTIInst<(outs rclass:$rT), (ins rclass:$rA, optype:$val), + [(set rclass:$rT, (rotl rclass:$rA, (inttype pred:$val)))]>; + +multiclass RotateLeftWordImm +{ + def v4i32: ROTIVecInst; + def v4i32_i16: ROTIVecInst; + def v4i32_i8: ROTIVecInst; + + def r32: ROTIRegInst; + def r32_i16: ROTIRegInst; + def r32_i8: ROTIRegInst; +} + +defm ROTI : RotateLeftWordImm; + +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +// Rotate quad by byte (count) +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ + +class ROTQBYInst pattern>: + RRForm<0b00111011100, OOL, IOL, "rotqby\t$rT, $rA, $rB", + RotateShift, pattern>; + +class ROTQBYVecInst: + ROTQBYInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB), + [(set (vectype VECREG:$rT), + (SPUrotbytes_left (vectype VECREG:$rA), R32C:$rB))]>; + +multiclass RotateQuadLeftByBytes +{ + def v16i8: ROTQBYVecInst; + def v8i16: ROTQBYVecInst; + def v4i32: ROTQBYVecInst; + def v4f32: ROTQBYVecInst; + def v2i64: ROTQBYVecInst; + def v2f64: ROTQBYVecInst; +} + +defm ROTQBY: RotateQuadLeftByBytes; + +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +// Rotate quad by byte (count), immediate +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ + +class ROTQBYIInst pattern>: + RI7Form<0b00111111100, OOL, IOL, "rotqbyi\t$rT, $rA, $val", + RotateShift, pattern>; + +class ROTQBYIVecInst: + ROTQBYIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm:$val), + [(set (vectype VECREG:$rT), + (SPUrotbytes_left (vectype VECREG:$rA), (i16 uimm7:$val)))]>; + +multiclass RotateQuadByBytesImm +{ + def v16i8: ROTQBYIVecInst; + def v8i16: ROTQBYIVecInst; + def v4i32: ROTQBYIVecInst; + def v4f32: ROTQBYIVecInst; + def v2i64: ROTQBYIVecInst; + def vfi64: ROTQBYIVecInst; +} + +defm ROTQBYI: RotateQuadByBytesImm; + +// See ROTQBY note above. +class ROTQBYBIInst pattern>: + RI7Form<0b00110011100, OOL, IOL, + "rotqbybi\t$rT, $rA, $shift", + RotateShift, pattern>; + +class ROTQBYBIVecInst: + ROTQBYBIInst<(outs VECREG:$rT), (ins VECREG:$rA, rclass:$shift), + [(set (vectype VECREG:$rT), + (SPUrotbytes_left_bits (vectype VECREG:$rA), rclass:$shift))]>; + +multiclass RotateQuadByBytesByBitshift { + def v16i8_r32: ROTQBYBIVecInst; + def v8i16_r32: ROTQBYBIVecInst; + def v4i32_r32: ROTQBYBIVecInst; + def v2i64_r32: ROTQBYBIVecInst; +} + +defm ROTQBYBI : RotateQuadByBytesByBitshift; + +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +// See ROTQBY note above. +// +// Assume that the user of this instruction knows to shift the rotate count +// into bit 29 +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ + +class ROTQBIInst pattern>: + RRForm<0b00011011100, OOL, IOL, "rotqbi\t$rT, $rA, $rB", + RotateShift, pattern>; + +class ROTQBIVecInst: + ROTQBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB), + [/* no pattern yet */]>; + +class ROTQBIRegInst: + ROTQBIInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB), + [/* no pattern yet */]>; + +multiclass RotateQuadByBitCount +{ + def v16i8: ROTQBIVecInst; + def v8i16: ROTQBIVecInst; + def v4i32: ROTQBIVecInst; + def v2i64: ROTQBIVecInst; + + def r128: ROTQBIRegInst; + def r64: ROTQBIRegInst; +} + +defm ROTQBI: RotateQuadByBitCount; + +class ROTQBIIInst pattern>: + RI7Form<0b00011111100, OOL, IOL, "rotqbii\t$rT, $rA, $val", + RotateShift, pattern>; + +class ROTQBIIVecInst: + ROTQBIIInst<(outs VECREG:$rT), (ins VECREG:$rA, optype:$val), + [/* no pattern yet */]>; + +class ROTQBIIRegInst: + ROTQBIIInst<(outs rclass:$rT), (ins rclass:$rA, optype:$val), + [/* no pattern yet */]>; + +multiclass RotateQuadByBitCountImm +{ + def v16i8: ROTQBIIVecInst; + def v8i16: ROTQBIIVecInst; + def v4i32: ROTQBIIVecInst; + def v2i64: ROTQBIIVecInst; + + def r128: ROTQBIIRegInst; + def r64: ROTQBIIRegInst; +} + +defm ROTQBII : RotateQuadByBitCountImm; + +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +// ROTHM v8i16 form: +// NOTE(1): No vector rotate is generated by the C/C++ frontend (today), +// so this only matches a synthetically generated/lowered code +// fragment. +// NOTE(2): $rB must be negated before the right rotate! +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ + +class ROTHMInst pattern>: + RRForm<0b10111010000, OOL, IOL, "rothm\t$rT, $rA, $rB", + RotateShift, pattern>; + +def ROTHMv8i16: + ROTHMInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB), + [/* see patterns below - $rB must be negated */]>; + +def : Pat<(SPUvec_srl (v8i16 VECREG:$rA), R32C:$rB), + (ROTHMv8i16 VECREG:$rA, (SFIr32 R32C:$rB, 0))>; + +def : Pat<(SPUvec_srl (v8i16 VECREG:$rA), R16C:$rB), + (ROTHMv8i16 VECREG:$rA, + (SFIr32 (XSHWr16 R16C:$rB), 0))>; + +def : Pat<(SPUvec_srl (v8i16 VECREG:$rA), R8C:$rB), + (ROTHMv8i16 VECREG:$rA, + (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB) ), 0))>; + +// ROTHM r16 form: Rotate 16-bit quantity to right, zero fill at the left +// Note: This instruction doesn't match a pattern because rB must be negated +// for the instruction to work. Thus, the pattern below the instruction! + +def ROTHMr16: + ROTHMInst<(outs R16C:$rT), (ins R16C:$rA, R32C:$rB), + [/* see patterns below - $rB must be negated! */]>; + +def : Pat<(srl R16C:$rA, R32C:$rB), + (ROTHMr16 R16C:$rA, (SFIr32 R32C:$rB, 0))>; + +def : Pat<(srl R16C:$rA, R16C:$rB), + (ROTHMr16 R16C:$rA, + (SFIr32 (XSHWr16 R16C:$rB), 0))>; + +def : Pat<(srl R16C:$rA, R8C:$rB), + (ROTHMr16 R16C:$rA, + (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB) ), 0))>; + +// ROTHMI v8i16 form: See the comment for ROTHM v8i16. The difference here is +// that the immediate can be complemented, so that the user doesn't have to +// worry about it. + +class ROTHMIInst pattern>: + RI7Form<0b10111110000, OOL, IOL, "rothmi\t$rT, $rA, $val", + RotateShift, pattern>; + +def ROTHMIv8i16: + ROTHMIInst<(outs VECREG:$rT), (ins VECREG:$rA, rothNeg7imm:$val), + [/* no pattern */]>; + +def : Pat<(SPUvec_srl (v8i16 VECREG:$rA), (i32 imm:$val)), + (ROTHMIv8i16 VECREG:$rA, imm:$val)>; + +def: Pat<(SPUvec_srl (v8i16 VECREG:$rA), (i16 imm:$val)), + (ROTHMIv8i16 VECREG:$rA, imm:$val)>; + +def: Pat<(SPUvec_srl (v8i16 VECREG:$rA), (i8 imm:$val)), + (ROTHMIv8i16 VECREG:$rA, imm:$val)>; + +def ROTHMIr16: + ROTHMIInst<(outs R16C:$rT), (ins R16C:$rA, rothNeg7imm:$val), + [/* no pattern */]>; + +def: Pat<(srl R16C:$rA, (i32 uimm7:$val)), + (ROTHMIr16 R16C:$rA, uimm7:$val)>; + +def: Pat<(srl R16C:$rA, (i16 uimm7:$val)), + (ROTHMIr16 R16C:$rA, uimm7:$val)>; + +def: Pat<(srl R16C:$rA, (i8 uimm7:$val)), + (ROTHMIr16 R16C:$rA, uimm7:$val)>; + +// ROTM v4i32 form: See the ROTHM v8i16 comments. +class ROTMInst pattern>: + RRForm<0b10011010000, OOL, IOL, "rotm\t$rT, $rA, $rB", + RotateShift, pattern>; + +def ROTMv4i32: + ROTMInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB), + [/* see patterns below - $rB must be negated */]>; + +def : Pat<(SPUvec_srl VECREG:$rA, R32C:$rB), + (ROTMv4i32 VECREG:$rA, (SFIr32 R32C:$rB, 0))>; + +def : Pat<(SPUvec_srl VECREG:$rA, R16C:$rB), + (ROTMv4i32 VECREG:$rA, + (SFIr32 (XSHWr16 R16C:$rB), 0))>; + +def : Pat<(SPUvec_srl VECREG:$rA, R8C:$rB), + (ROTMv4i32 VECREG:$rA, + (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB)), 0))>; + +def ROTMr32: + ROTMInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB), + [/* see patterns below - $rB must be negated */]>; + +def : Pat<(srl R32C:$rA, R32C:$rB), + (ROTMr32 R32C:$rA, (SFIr32 R32C:$rB, 0))>; + +def : Pat<(srl R32C:$rA, R16C:$rB), + (ROTMr32 R32C:$rA, + (SFIr32 (XSHWr16 R16C:$rB), 0))>; + +def : Pat<(srl R32C:$rA, R8C:$rB), + (ROTMr32 R32C:$rA, + (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB)), 0))>; + +// ROTMI v4i32 form: See the comment for ROTHM v8i16. +def ROTMIv4i32: + RI7Form<0b10011110000, (outs VECREG:$rT), (ins VECREG:$rA, rotNeg7imm:$val), + "rotmi\t$rT, $rA, $val", RotateShift, + [(set (v4i32 VECREG:$rT), + (SPUvec_srl VECREG:$rA, (i32 uimm7:$val)))]>; + +def : Pat<(SPUvec_srl VECREG:$rA, (i16 uimm7:$val)), + (ROTMIv4i32 VECREG:$rA, uimm7:$val)>; + +def : Pat<(SPUvec_srl VECREG:$rA, (i8 uimm7:$val)), + (ROTMIv4i32 VECREG:$rA, uimm7:$val)>; + +// ROTMI r32 form: know how to complement the immediate value. +def ROTMIr32: + RI7Form<0b10011110000, (outs R32C:$rT), (ins R32C:$rA, rotNeg7imm:$val), + "rotmi\t$rT, $rA, $val", RotateShift, + [(set R32C:$rT, (srl R32C:$rA, (i32 uimm7:$val)))]>; + +def : Pat<(srl R32C:$rA, (i16 imm:$val)), + (ROTMIr32 R32C:$rA, uimm7:$val)>; + +def : Pat<(srl R32C:$rA, (i8 imm:$val)), + (ROTMIr32 R32C:$rA, uimm7:$val)>; + +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +// ROTQMBY: This is a vector form merely so that when used in an +// instruction pattern, type checking will succeed. This instruction assumes +// that the user knew to negate $rB. +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ + +class ROTQMBYInst pattern>: + RRForm<0b10111011100, OOL, IOL, "rotqmby\t$rT, $rA, $rB", + RotateShift, pattern>; + +class ROTQMBYVecInst: + ROTQMBYInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB), + [/* no pattern, $rB must be negated */]>; + +class ROTQMBYRegInst: + ROTQMBYInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB), + [/* no pattern */]>; + +multiclass RotateQuadBytes +{ + def v16i8: ROTQMBYVecInst; + def v8i16: ROTQMBYVecInst; + def v4i32: ROTQMBYVecInst; + def v2i64: ROTQMBYVecInst; + + def r128: ROTQMBYRegInst; + def r64: ROTQMBYRegInst; +} + +defm ROTQMBY : RotateQuadBytes; + +class ROTQMBYIInst pattern>: + RI7Form<0b10111111100, OOL, IOL, "rotqmbyi\t$rT, $rA, $val", + RotateShift, pattern>; + +class ROTQMBYIVecInst: + ROTQMBYIInst<(outs VECREG:$rT), (ins VECREG:$rA, rotNeg7imm:$val), + [/* no pattern */]>; + +class ROTQMBYIRegInst: + ROTQMBYIInst<(outs rclass:$rT), (ins rclass:$rA, optype:$val), + [/* no pattern */]>; + +// 128-bit zero extension form: +class ROTQMBYIZExtInst: + ROTQMBYIInst<(outs GPRC:$rT), (ins rclass:$rA, optype:$val), + [/* no pattern */]>; + +multiclass RotateQuadBytesImm +{ + def v16i8: ROTQMBYIVecInst; + def v8i16: ROTQMBYIVecInst; + def v4i32: ROTQMBYIVecInst; + def v2i64: ROTQMBYIVecInst; + + def r128: ROTQMBYIRegInst; + def r64: ROTQMBYIRegInst; + + def r128_zext_r8: ROTQMBYIZExtInst; + def r128_zext_r16: ROTQMBYIZExtInst; + def r128_zext_r32: ROTQMBYIZExtInst; + def r128_zext_r64: ROTQMBYIZExtInst; +} + +defm ROTQMBYI : RotateQuadBytesImm; + +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +// Rotate right and mask by bit count +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ + +class ROTQMBYBIInst pattern>: + RRForm<0b10110011100, OOL, IOL, "rotqmbybi\t$rT, $rA, $rB", + RotateShift, pattern>; + +class ROTQMBYBIVecInst: + ROTQMBYBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB), + [/* no pattern, */]>; + +multiclass RotateMaskQuadByBitCount +{ + def v16i8: ROTQMBYBIVecInst; + def v8i16: ROTQMBYBIVecInst; + def v4i32: ROTQMBYBIVecInst; + def v2i64: ROTQMBYBIVecInst; +} + +defm ROTQMBYBI: RotateMaskQuadByBitCount; + +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +// Rotate quad and mask by bits +// Note that the rotate amount has to be negated +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ + +class ROTQMBIInst pattern>: + RRForm<0b10011011100, OOL, IOL, "rotqmbi\t$rT, $rA, $rB", + RotateShift, pattern>; + +class ROTQMBIVecInst: + ROTQMBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB), + [/* no pattern */]>; + +class ROTQMBIRegInst: + ROTQMBIInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB), + [/* no pattern */]>; + +multiclass RotateMaskQuadByBits +{ + def v16i8: ROTQMBIVecInst; + def v8i16: ROTQMBIVecInst; + def v4i32: ROTQMBIVecInst; + def v2i64: ROTQMBIVecInst; + + def r128: ROTQMBIRegInst; + def r64: ROTQMBIRegInst; +} + +defm ROTQMBI: RotateMaskQuadByBits; + +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +// Rotate quad and mask by bits, immediate +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ + +class ROTQMBIIInst pattern>: + RI7Form<0b10011111100, OOL, IOL, "rotqmbii\t$rT, $rA, $val", + RotateShift, pattern>; + +class ROTQMBIIVecInst: + ROTQMBIIInst<(outs VECREG:$rT), (ins VECREG:$rA, rotNeg7imm:$val), + [/* no pattern */]>; + +class ROTQMBIIRegInst: + ROTQMBIIInst<(outs rclass:$rT), (ins rclass:$rA, rotNeg7imm:$val), + [/* no pattern */]>; + +multiclass RotateMaskQuadByBitsImm +{ + def v16i8: ROTQMBIIVecInst; + def v8i16: ROTQMBIIVecInst; + def v4i32: ROTQMBIIVecInst; + def v2i64: ROTQMBIIVecInst; + + def r128: ROTQMBIIRegInst; + def r64: ROTQMBIIRegInst; +} + +defm ROTQMBII: RotateMaskQuadByBitsImm; + +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ + +def ROTMAHv8i16: + RRForm<0b01111010000, (outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB), + "rotmah\t$rT, $rA, $rB", RotateShift, + [/* see patterns below - $rB must be negated */]>; + +def : Pat<(SPUvec_sra VECREG:$rA, R32C:$rB), + (ROTMAHv8i16 VECREG:$rA, (SFIr32 R32C:$rB, 0))>; + +def : Pat<(SPUvec_sra VECREG:$rA, R16C:$rB), + (ROTMAHv8i16 VECREG:$rA, + (SFIr32 (XSHWr16 R16C:$rB), 0))>; + +def : Pat<(SPUvec_sra VECREG:$rA, R8C:$rB), + (ROTMAHv8i16 VECREG:$rA, + (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB)), 0))>; + +def ROTMAHr16: + RRForm<0b01111010000, (outs R16C:$rT), (ins R16C:$rA, R32C:$rB), + "rotmah\t$rT, $rA, $rB", RotateShift, + [/* see patterns below - $rB must be negated */]>; + +def : Pat<(sra R16C:$rA, R32C:$rB), + (ROTMAHr16 R16C:$rA, (SFIr32 R32C:$rB, 0))>; + +def : Pat<(sra R16C:$rA, R16C:$rB), + (ROTMAHr16 R16C:$rA, + (SFIr32 (XSHWr16 R16C:$rB), 0))>; + +def : Pat<(sra R16C:$rA, R8C:$rB), + (ROTMAHr16 R16C:$rA, + (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB)), 0))>; + +def ROTMAHIv8i16: + RRForm<0b01111110000, (outs VECREG:$rT), (ins VECREG:$rA, rothNeg7imm:$val), + "rotmahi\t$rT, $rA, $val", RotateShift, + [(set (v8i16 VECREG:$rT), + (SPUvec_sra (v8i16 VECREG:$rA), (i32 uimm7:$val)))]>; + +def : Pat<(SPUvec_sra (v8i16 VECREG:$rA), (i16 uimm7:$val)), + (ROTMAHIv8i16 (v8i16 VECREG:$rA), (i32 uimm7:$val))>; + +def : Pat<(SPUvec_sra (v8i16 VECREG:$rA), (i8 uimm7:$val)), + (ROTMAHIv8i16 (v8i16 VECREG:$rA), (i32 uimm7:$val))>; + +def ROTMAHIr16: + RRForm<0b01111110000, (outs R16C:$rT), (ins R16C:$rA, rothNeg7imm_i16:$val), + "rotmahi\t$rT, $rA, $val", RotateShift, + [(set R16C:$rT, (sra R16C:$rA, (i16 uimm7:$val)))]>; + +def : Pat<(sra R16C:$rA, (i32 imm:$val)), + (ROTMAHIr16 R16C:$rA, uimm7:$val)>; + +def : Pat<(sra R16C:$rA, (i8 imm:$val)), + (ROTMAHIr16 R16C:$rA, uimm7:$val)>; + +def ROTMAv4i32: + RRForm<0b01011010000, (outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB), + "rotma\t$rT, $rA, $rB", RotateShift, + [/* see patterns below - $rB must be negated */]>; + +def : Pat<(SPUvec_sra VECREG:$rA, R32C:$rB), + (ROTMAv4i32 (v4i32 VECREG:$rA), (SFIr32 R32C:$rB, 0))>; + +def : Pat<(SPUvec_sra VECREG:$rA, R16C:$rB), + (ROTMAv4i32 (v4i32 VECREG:$rA), + (SFIr32 (XSHWr16 R16C:$rB), 0))>; + +def : Pat<(SPUvec_sra VECREG:$rA, R8C:$rB), + (ROTMAv4i32 (v4i32 VECREG:$rA), + (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB)), 0))>; + +def ROTMAr32: + RRForm<0b01011010000, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB), + "rotma\t$rT, $rA, $rB", RotateShift, + [/* see patterns below - $rB must be negated */]>; + +def : Pat<(sra R32C:$rA, R32C:$rB), + (ROTMAr32 R32C:$rA, (SFIr32 R32C:$rB, 0))>; + +def : Pat<(sra R32C:$rA, R16C:$rB), + (ROTMAr32 R32C:$rA, + (SFIr32 (XSHWr16 R16C:$rB), 0))>; + +def : Pat<(sra R32C:$rA, R8C:$rB), + (ROTMAr32 R32C:$rA, + (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB)), 0))>; + +class ROTMAIInst pattern>: + RRForm<0b01011110000, OOL, IOL, + "rotmai\t$rT, $rA, $val", + RotateShift, pattern>; + +class ROTMAIVecInst: + ROTMAIInst<(outs VECREG:$rT), (ins VECREG:$rA, intop:$val), + [(set (vectype VECREG:$rT), + (SPUvec_sra VECREG:$rA, (inttype uimm7:$val)))]>; + +class ROTMAIRegInst: + ROTMAIInst<(outs rclass:$rT), (ins rclass:$rA, intop:$val), + [(set rclass:$rT, (sra rclass:$rA, (inttype uimm7:$val)))]>; + +multiclass RotateMaskAlgebraicImm { + def v2i64_i32 : ROTMAIVecInst; + def v4i32_i32 : ROTMAIVecInst; + def r64_i32 : ROTMAIRegInst; + def r32_i32 : ROTMAIRegInst; +} + +defm ROTMAI : RotateMaskAlgebraicImm; + +//===----------------------------------------------------------------------===// +// Branch and conditionals: +//===----------------------------------------------------------------------===// + +let isTerminator = 1, isBarrier = 1 in { + // Halt If Equal (r32 preferred slot only, no vector form) + def HEQr32: + RRForm_3<0b00011011110, (outs), (ins R32C:$rA, R32C:$rB), + "heq\t$rA, $rB", BranchResolv, + [/* no pattern to match */]>; + + def HEQIr32 : + RI10Form_2<0b11111110, (outs), (ins R32C:$rA, s10imm:$val), + "heqi\t$rA, $val", BranchResolv, + [/* no pattern to match */]>; + + // HGT/HGTI: These instructions use signed arithmetic for the comparison, + // contrasting with HLGT/HLGTI, which use unsigned comparison: + def HGTr32: + RRForm_3<0b00011010010, (outs), (ins R32C:$rA, R32C:$rB), + "hgt\t$rA, $rB", BranchResolv, + [/* no pattern to match */]>; + + def HGTIr32: + RI10Form_2<0b11110010, (outs), (ins R32C:$rA, s10imm:$val), + "hgti\t$rA, $val", BranchResolv, + [/* no pattern to match */]>; + + def HLGTr32: + RRForm_3<0b00011011010, (outs), (ins R32C:$rA, R32C:$rB), + "hlgt\t$rA, $rB", BranchResolv, + [/* no pattern to match */]>; + + def HLGTIr32: + RI10Form_2<0b11111010, (outs), (ins R32C:$rA, s10imm:$val), + "hlgti\t$rA, $val", BranchResolv, + [/* no pattern to match */]>; +} + +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +// Comparison operators for i8, i16 and i32: +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ + +class CEQBInst pattern> : + RRForm<0b00001011110, OOL, IOL, "ceqb\t$rT, $rA, $rB", + ByteOp, pattern>; + +multiclass CmpEqualByte +{ + def v16i8 : + CEQBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + [(set (v16i8 VECREG:$rT), (seteq (v8i16 VECREG:$rA), + (v8i16 VECREG:$rB)))]>; + + def r8 : + CEQBInst<(outs R8C:$rT), (ins R8C:$rA, R8C:$rB), + [(set R8C:$rT, (seteq R8C:$rA, R8C:$rB))]>; +} + +class CEQBIInst pattern> : + RI10Form<0b01111110, OOL, IOL, "ceqbi\t$rT, $rA, $val", + ByteOp, pattern>; + +multiclass CmpEqualByteImm +{ + def v16i8 : + CEQBIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm_i8:$val), + [(set (v16i8 VECREG:$rT), (seteq (v16i8 VECREG:$rA), + v16i8SExt8Imm:$val))]>; + def r8: + CEQBIInst<(outs R8C:$rT), (ins R8C:$rA, s10imm_i8:$val), + [(set R8C:$rT, (seteq R8C:$rA, immSExt8:$val))]>; +} + +class CEQHInst pattern> : + RRForm<0b00010011110, OOL, IOL, "ceqh\t$rT, $rA, $rB", + ByteOp, pattern>; + +multiclass CmpEqualHalfword +{ + def v8i16 : CEQHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + [(set (v8i16 VECREG:$rT), (seteq (v8i16 VECREG:$rA), + (v8i16 VECREG:$rB)))]>; + + def r16 : CEQHInst<(outs R16C:$rT), (ins R16C:$rA, R16C:$rB), + [(set R16C:$rT, (seteq R16C:$rA, R16C:$rB))]>; +} + +class CEQHIInst pattern> : + RI10Form<0b10111110, OOL, IOL, "ceqhi\t$rT, $rA, $val", + ByteOp, pattern>; + +multiclass CmpEqualHalfwordImm +{ + def v8i16 : CEQHIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), + [(set (v8i16 VECREG:$rT), + (seteq (v8i16 VECREG:$rA), + (v8i16 v8i16SExt10Imm:$val)))]>; + def r16 : CEQHIInst<(outs R16C:$rT), (ins R16C:$rA, s10imm:$val), + [(set R16C:$rT, (seteq R16C:$rA, i16ImmSExt10:$val))]>; +} + +class CEQInst pattern> : + RRForm<0b00000011110, OOL, IOL, "ceq\t$rT, $rA, $rB", + ByteOp, pattern>; + +multiclass CmpEqualWord +{ + def v4i32 : CEQInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + [(set (v4i32 VECREG:$rT), + (seteq (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>; + + def r32 : CEQInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB), + [(set R32C:$rT, (seteq R32C:$rA, R32C:$rB))]>; +} + +class CEQIInst pattern> : + RI10Form<0b00111110, OOL, IOL, "ceqi\t$rT, $rA, $val", + ByteOp, pattern>; + +multiclass CmpEqualWordImm +{ + def v4i32 : CEQIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), + [(set (v4i32 VECREG:$rT), + (seteq (v4i32 VECREG:$rA), + (v4i32 v4i32SExt16Imm:$val)))]>; + + def r32: CEQIInst<(outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val), + [(set R32C:$rT, (seteq R32C:$rA, i32ImmSExt10:$val))]>; +} + +class CGTBInst pattern> : + RRForm<0b00001010010, OOL, IOL, "cgtb\t$rT, $rA, $rB", + ByteOp, pattern>; + +multiclass CmpGtrByte +{ + def v16i8 : + CGTBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + [(set (v16i8 VECREG:$rT), (setgt (v8i16 VECREG:$rA), + (v8i16 VECREG:$rB)))]>; + + def r8 : + CGTBInst<(outs R8C:$rT), (ins R8C:$rA, R8C:$rB), + [(set R8C:$rT, (setgt R8C:$rA, R8C:$rB))]>; +} + +class CGTBIInst pattern> : + RI10Form<0b01110010, OOL, IOL, "cgtbi\t$rT, $rA, $val", + ByteOp, pattern>; + +multiclass CmpGtrByteImm +{ + def v16i8 : + CGTBIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm_i8:$val), + [(set (v16i8 VECREG:$rT), (setgt (v16i8 VECREG:$rA), + v16i8SExt8Imm:$val))]>; + def r8: + CGTBIInst<(outs R8C:$rT), (ins R8C:$rA, s10imm_i8:$val), + [(set R8C:$rT, (setgt R8C:$rA, immSExt8:$val))]>; +} + +class CGTHInst pattern> : + RRForm<0b00010010010, OOL, IOL, "cgth\t$rT, $rA, $rB", + ByteOp, pattern>; + +multiclass CmpGtrHalfword +{ + def v8i16 : CGTHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + [(set (v8i16 VECREG:$rT), (setgt (v8i16 VECREG:$rA), + (v8i16 VECREG:$rB)))]>; + + def r16 : CGTHInst<(outs R16C:$rT), (ins R16C:$rA, R16C:$rB), + [(set R16C:$rT, (setgt R16C:$rA, R16C:$rB))]>; +} + +class CGTHIInst pattern> : + RI10Form<0b10110010, OOL, IOL, "cgthi\t$rT, $rA, $val", + ByteOp, pattern>; + +multiclass CmpGtrHalfwordImm +{ + def v8i16 : CGTHIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), + [(set (v8i16 VECREG:$rT), + (setgt (v8i16 VECREG:$rA), + (v8i16 v8i16SExt10Imm:$val)))]>; + def r16 : CGTHIInst<(outs R16C:$rT), (ins R16C:$rA, s10imm:$val), + [(set R16C:$rT, (setgt R16C:$rA, i16ImmSExt10:$val))]>; +} + +class CGTInst pattern> : + RRForm<0b00000010010, OOL, IOL, "cgt\t$rT, $rA, $rB", + ByteOp, pattern>; + +multiclass CmpGtrWord +{ + def v4i32 : CGTInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + [(set (v4i32 VECREG:$rT), + (setgt (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>; + + def r32 : CGTInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB), + [(set R32C:$rT, (setgt R32C:$rA, R32C:$rB))]>; +} + +class CGTIInst pattern> : + RI10Form<0b00110010, OOL, IOL, "cgti\t$rT, $rA, $val", + ByteOp, pattern>; + +multiclass CmpGtrWordImm +{ + def v4i32 : CGTIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), + [(set (v4i32 VECREG:$rT), + (setgt (v4i32 VECREG:$rA), + (v4i32 v4i32SExt16Imm:$val)))]>; + + def r32: CGTIInst<(outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val), + [(set R32C:$rT, (setgt R32C:$rA, i32ImmSExt10:$val))]>; + + // CGTIv4f32, CGTIf32: These are used in the f32 fdiv instruction sequence: + def v4f32: CGTIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), + [(set (v4i32 VECREG:$rT), + (setgt (v4i32 (bitconvert (v4f32 VECREG:$rA))), + (v4i32 v4i32SExt16Imm:$val)))]>; + + def f32: CGTIInst<(outs R32C:$rT), (ins R32FP:$rA, s10imm_i32:$val), + [/* no pattern */]>; +} + +class CLGTBInst pattern> : + RRForm<0b00001011010, OOL, IOL, "clgtb\t$rT, $rA, $rB", + ByteOp, pattern>; + +multiclass CmpLGtrByte +{ + def v16i8 : + CLGTBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + [(set (v16i8 VECREG:$rT), (setugt (v8i16 VECREG:$rA), + (v8i16 VECREG:$rB)))]>; + + def r8 : + CLGTBInst<(outs R8C:$rT), (ins R8C:$rA, R8C:$rB), + [(set R8C:$rT, (setugt R8C:$rA, R8C:$rB))]>; +} + +class CLGTBIInst pattern> : + RI10Form<0b01111010, OOL, IOL, "clgtbi\t$rT, $rA, $val", + ByteOp, pattern>; + +multiclass CmpLGtrByteImm +{ + def v16i8 : + CLGTBIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm_i8:$val), + [(set (v16i8 VECREG:$rT), (setugt (v16i8 VECREG:$rA), + v16i8SExt8Imm:$val))]>; + def r8: + CLGTBIInst<(outs R8C:$rT), (ins R8C:$rA, s10imm_i8:$val), + [(set R8C:$rT, (setugt R8C:$rA, immSExt8:$val))]>; +} + +class CLGTHInst pattern> : + RRForm<0b00010011010, OOL, IOL, "clgth\t$rT, $rA, $rB", + ByteOp, pattern>; + +multiclass CmpLGtrHalfword +{ + def v8i16 : CLGTHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + [(set (v8i16 VECREG:$rT), (setugt (v8i16 VECREG:$rA), + (v8i16 VECREG:$rB)))]>; + + def r16 : CLGTHInst<(outs R16C:$rT), (ins R16C:$rA, R16C:$rB), + [(set R16C:$rT, (setugt R16C:$rA, R16C:$rB))]>; +} + +class CLGTHIInst pattern> : + RI10Form<0b10111010, OOL, IOL, "clgthi\t$rT, $rA, $val", + ByteOp, pattern>; + +multiclass CmpLGtrHalfwordImm +{ + def v8i16 : CLGTHIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), + [(set (v8i16 VECREG:$rT), + (setugt (v8i16 VECREG:$rA), + (v8i16 v8i16SExt10Imm:$val)))]>; + def r16 : CLGTHIInst<(outs R16C:$rT), (ins R16C:$rA, s10imm:$val), + [(set R16C:$rT, (setugt R16C:$rA, i16ImmSExt10:$val))]>; +} + +class CLGTInst pattern> : + RRForm<0b00000011010, OOL, IOL, "clgt\t$rT, $rA, $rB", + ByteOp, pattern>; + +multiclass CmpLGtrWord +{ + def v4i32 : CLGTInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + [(set (v4i32 VECREG:$rT), + (setugt (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>; + + def r32 : CLGTInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB), + [(set R32C:$rT, (setugt R32C:$rA, R32C:$rB))]>; +} + +class CLGTIInst pattern> : + RI10Form<0b00111010, OOL, IOL, "clgti\t$rT, $rA, $val", + ByteOp, pattern>; + +multiclass CmpLGtrWordImm +{ + def v4i32 : CLGTIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), + [(set (v4i32 VECREG:$rT), + (setugt (v4i32 VECREG:$rA), + (v4i32 v4i32SExt16Imm:$val)))]>; + + def r32: CLGTIInst<(outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val), + [(set R32C:$rT, (setugt R32C:$rA, i32ImmSExt10:$val))]>; +} + +defm CEQB : CmpEqualByte; +defm CEQBI : CmpEqualByteImm; +defm CEQH : CmpEqualHalfword; +defm CEQHI : CmpEqualHalfwordImm; +defm CEQ : CmpEqualWord; +defm CEQI : CmpEqualWordImm; +defm CGTB : CmpGtrByte; +defm CGTBI : CmpGtrByteImm; +defm CGTH : CmpGtrHalfword; +defm CGTHI : CmpGtrHalfwordImm; +defm CGT : CmpGtrWord; +defm CGTI : CmpGtrWordImm; +defm CLGTB : CmpLGtrByte; +defm CLGTBI : CmpLGtrByteImm; +defm CLGTH : CmpLGtrHalfword; +defm CLGTHI : CmpLGtrHalfwordImm; +defm CLGT : CmpLGtrWord; +defm CLGTI : CmpLGtrWordImm; + +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +// For SETCC primitives not supported above (setlt, setle, setge, etc.) +// define a pattern to generate the right code, as a binary operator +// (in a manner of speaking.) +// +// Notes: +// 1. This only matches the setcc set of conditionals. Special pattern +// matching is used for select conditionals. +// +// 2. The "DAG" versions of these classes is almost exclusively used for +// i64 comparisons. See the tblgen fundamentals documentation for what +// ".ResultInstrs[0]" means; see TargetSelectionDAG.td and the Pattern +// class for where ResultInstrs originates. +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ + +class SETCCNegCondReg: + Pat<(cond rclass:$rA, rclass:$rB), + (xorinst (cmpare rclass:$rA, rclass:$rB), (inttype -1))>; + +class SETCCNegCondImm: + Pat<(cond rclass:$rA, (inttype immpred:$imm)), + (xorinst (cmpare rclass:$rA, (inttype immpred:$imm)), (inttype -1))>; + +def : SETCCNegCondReg; +def : SETCCNegCondImm; + +def : SETCCNegCondReg; +def : SETCCNegCondImm; + +def : SETCCNegCondReg; +def : SETCCNegCondImm; + +class SETCCBinOpReg: + Pat<(cond rclass:$rA, rclass:$rB), + (binop (cmpOp1 rclass:$rA, rclass:$rB), + (cmpOp2 rclass:$rA, rclass:$rB))>; + +class SETCCBinOpImm: + Pat<(cond rclass:$rA, (immtype immpred:$imm)), + (binop (cmpOp1 rclass:$rA, (immtype immpred:$imm)), + (cmpOp2 rclass:$rA, (immtype immpred:$imm)))>; + +def : SETCCBinOpReg; +def : SETCCBinOpImm; +def : SETCCBinOpReg; +def : SETCCBinOpImm; +def : Pat<(setle R8C:$rA, R8C:$rB), + (XORBIr8 (CGTBr8 R8C:$rA, R8C:$rB), 0xff)>; +def : Pat<(setle R8C:$rA, immU8:$imm), + (XORBIr8 (CGTBIr8 R8C:$rA, immU8:$imm), 0xff)>; + +def : SETCCBinOpReg; +def : SETCCBinOpImm; +def : SETCCBinOpReg; +def : SETCCBinOpImm; +def : Pat<(setle R16C:$rA, R16C:$rB), + (XORHIr16 (CGTHr16 R16C:$rA, R16C:$rB), 0xffff)>; +def : Pat<(setle R16C:$rA, i16ImmSExt10:$imm), + (XORHIr16 (CGTHIr16 R16C:$rA, i16ImmSExt10:$imm), 0xffff)>; + +def : SETCCBinOpReg; +def : SETCCBinOpImm; +def : SETCCBinOpReg; +def : SETCCBinOpImm; +def : Pat<(setle R32C:$rA, R32C:$rB), + (XORIr32 (CGTr32 R32C:$rA, R32C:$rB), 0xffffffff)>; +def : Pat<(setle R32C:$rA, i32ImmSExt10:$imm), + (XORIr32 (CGTIr32 R32C:$rA, i32ImmSExt10:$imm), 0xffffffff)>; + +def : SETCCBinOpReg; +def : SETCCBinOpImm; +def : SETCCBinOpReg; +def : SETCCBinOpImm; +def : Pat<(setule R8C:$rA, R8C:$rB), + (XORBIr8 (CLGTBr8 R8C:$rA, R8C:$rB), 0xff)>; +def : Pat<(setule R8C:$rA, immU8:$imm), + (XORBIr8 (CLGTBIr8 R8C:$rA, immU8:$imm), 0xff)>; + +def : SETCCBinOpReg; +def : SETCCBinOpImm; +def : SETCCBinOpReg; +def : SETCCBinOpImm; +def : Pat<(setule R16C:$rA, R16C:$rB), + (XORHIr16 (CLGTHr16 R16C:$rA, R16C:$rB), 0xffff)>; +def : Pat<(setule R16C:$rA, i16ImmSExt10:$imm), + (XORHIr16 (CLGTHIr16 R16C:$rA, i16ImmSExt10:$imm), 0xffff)>; + +def : SETCCBinOpReg; +def : SETCCBinOpImm; +def : SETCCBinOpReg; +def : SETCCBinOpImm; +def : Pat<(setule R32C:$rA, R32C:$rB), + (XORIr32 (CLGTr32 R32C:$rA, R32C:$rB), 0xffffffff)>; +def : Pat<(setule R32C:$rA, i32ImmSExt10:$imm), + (XORIr32 (CLGTIr32 R32C:$rA, i32ImmSExt10:$imm), 0xffffffff)>; + +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +// select conditional patterns: +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ + +class SELECTNegCondReg: + Pat<(select (inttype (cond rclass:$rA, rclass:$rB)), + rclass:$rTrue, rclass:$rFalse), + (selinstr rclass:$rTrue, rclass:$rFalse, + (cmpare rclass:$rA, rclass:$rB))>; + +class SELECTNegCondImm: + Pat<(select (inttype (cond rclass:$rA, immpred:$imm)), + rclass:$rTrue, rclass:$rFalse), + (selinstr rclass:$rTrue, rclass:$rFalse, + (cmpare rclass:$rA, immpred:$imm))>; + +def : SELECTNegCondReg; +def : SELECTNegCondImm; +def : SELECTNegCondReg; +def : SELECTNegCondImm; +def : SELECTNegCondReg; +def : SELECTNegCondImm; + +def : SELECTNegCondReg; +def : SELECTNegCondImm; +def : SELECTNegCondReg; +def : SELECTNegCondImm; +def : SELECTNegCondReg; +def : SELECTNegCondImm; + +def : SELECTNegCondReg; +def : SELECTNegCondImm; +def : SELECTNegCondReg; +def : SELECTNegCondImm; +def : SELECTNegCondReg; +def : SELECTNegCondImm; + +class SELECTBinOpReg: + Pat<(select (inttype (cond rclass:$rA, rclass:$rB)), + rclass:$rTrue, rclass:$rFalse), + (selinstr rclass:$rFalse, rclass:$rTrue, + (binop (cmpOp1 rclass:$rA, rclass:$rB), + (cmpOp2 rclass:$rA, rclass:$rB)))>; + +class SELECTBinOpImm: + Pat<(select (inttype (cond rclass:$rA, (inttype immpred:$imm))), + rclass:$rTrue, rclass:$rFalse), + (selinstr rclass:$rFalse, rclass:$rTrue, + (binop (cmpOp1 rclass:$rA, (inttype immpred:$imm)), + (cmpOp2 rclass:$rA, (inttype immpred:$imm))))>; + +def : SELECTBinOpReg; +def : SELECTBinOpImm; + +def : SELECTBinOpReg; +def : SELECTBinOpImm; + +def : SELECTBinOpReg; +def : SELECTBinOpImm; + +def : SELECTBinOpReg; +def : SELECTBinOpImm; + +def : SELECTBinOpReg; +def : SELECTBinOpImm; + +def : SELECTBinOpReg; +def : SELECTBinOpImm; + +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ + +let isCall = 1, + // All calls clobber the non-callee-saved registers: + Defs = [R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, + R10,R11,R12,R13,R14,R15,R16,R17,R18,R19, + R20,R21,R22,R23,R24,R25,R26,R27,R28,R29, + R30,R31,R32,R33,R34,R35,R36,R37,R38,R39, + R40,R41,R42,R43,R44,R45,R46,R47,R48,R49, + R50,R51,R52,R53,R54,R55,R56,R57,R58,R59, + R60,R61,R62,R63,R64,R65,R66,R67,R68,R69, + R70,R71,R72,R73,R74,R75,R76,R77,R78,R79], + // All of these instructions use $lr (aka $0) + Uses = [R0] in { + // Branch relative and set link: Used if we actually know that the target + // is within [-32768, 32767] bytes of the target + def BRSL: + BranchSetLink<0b011001100, (outs), (ins relcalltarget:$func, variable_ops), + "brsl\t$$lr, $func", + [(SPUcall (SPUpcrel tglobaladdr:$func, 0))]>; + + // Branch absolute and set link: Used if we actually know that the target + // is an absolute address + def BRASL: + BranchSetLink<0b011001100, (outs), (ins calltarget:$func, variable_ops), + "brasl\t$$lr, $func", + [(SPUcall (SPUaform tglobaladdr:$func, 0))]>; + + // Branch indirect and set link if external data. These instructions are not + // actually generated, matched by an intrinsic: + def BISLED_00: BISLEDForm<0b11, "bisled\t$$lr, $func", [/* empty pattern */]>; + def BISLED_E0: BISLEDForm<0b10, "bisled\t$$lr, $func", [/* empty pattern */]>; + def BISLED_0D: BISLEDForm<0b01, "bisled\t$$lr, $func", [/* empty pattern */]>; + def BISLED_ED: BISLEDForm<0b00, "bisled\t$$lr, $func", [/* empty pattern */]>; + + // Branch indirect and set link. This is the "X-form" address version of a + // function call + def BISL: + BIForm<0b10010101100, "bisl\t$$lr, $func", [(SPUcall R32C:$func)]>; +} + +// Support calls to external symbols: +def : Pat<(SPUcall (SPUpcrel texternalsym:$func, 0)), + (BRSL texternalsym:$func)>; + +def : Pat<(SPUcall (SPUaform texternalsym:$func, 0)), + (BRASL texternalsym:$func)>; + +// Unconditional branches: +let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in { + def BR : + UncondBranch<0b001001100, (outs), (ins brtarget:$dest), + "br\t$dest", + [(br bb:$dest)]>; + + // Unconditional, absolute address branch + def BRA: + UncondBranch<0b001100000, (outs), (ins brtarget:$dest), + "bra\t$dest", + [/* no pattern */]>; + + // Indirect branch + def BI: + BIForm<0b00010101100, "bi\t$func", [(brind R32C:$func)]>; + + // Conditional branches: + class BRNZInst pattern>: + RI16Form<0b010000100, (outs), IOL, "brnz\t$rCond,$dest", + BranchResolv, pattern>; + + class BRNZRegInst: + BRNZInst<(ins rclass:$rCond, brtarget:$dest), + [(brcond rclass:$rCond, bb:$dest)]>; + + class BRNZVecInst: + BRNZInst<(ins VECREG:$rCond, brtarget:$dest), + [(brcond (vectype VECREG:$rCond), bb:$dest)]>; + + multiclass BranchNotZero { + def v4i32 : BRNZVecInst; + def r32 : BRNZRegInst; + } + + defm BRNZ : BranchNotZero; + + class BRZInst pattern>: + RI16Form<0b000000100, (outs), IOL, "brz\t$rT,$dest", + BranchResolv, pattern>; + + class BRZRegInst: + BRZInst<(ins rclass:$rT, brtarget:$dest), [/* no pattern */]>; + + class BRZVecInst: + BRZInst<(ins VECREG:$rT, brtarget:$dest), [/* no pattern */]>; + + multiclass BranchZero { + def v4i32: BRZVecInst; + def r32: BRZRegInst; + } + + defm BRZ: BranchZero; + + // Note: LLVM doesn't do branch conditional, indirect. Otherwise these would + // be useful: + /* + class BINZInst pattern>: + BICondForm<0b10010100100, (outs), IOL, "binz\t$rA, $dest", pattern>; + + class BINZRegInst: + BINZInst<(ins rclass:$rA, brtarget:$dest), + [(brcond rclass:$rA, R32C:$dest)]>; + + class BINZVecInst: + BINZInst<(ins VECREG:$rA, R32C:$dest), + [(brcond (vectype VECREG:$rA), R32C:$dest)]>; + + multiclass BranchNotZeroIndirect { + def v4i32: BINZVecInst; + def r32: BINZRegInst; + } + + defm BINZ: BranchNotZeroIndirect; + + class BIZInst pattern>: + BICondForm<0b00010100100, (outs), IOL, "biz\t$rA, $func", pattern>; + + class BIZRegInst: + BIZInst<(ins rclass:$rA, R32C:$func), [/* no pattern */]>; + + class BIZVecInst: + BIZInst<(ins VECREG:$rA, R32C:$func), [/* no pattern */]>; + + multiclass BranchZeroIndirect { + def v4i32: BIZVecInst; + def r32: BIZRegInst; + } + + defm BIZ: BranchZeroIndirect; + */ + + class BRHNZInst pattern>: + RI16Form<0b011000100, (outs), IOL, "brhnz\t$rCond,$dest", BranchResolv, + pattern>; + + class BRHNZRegInst: + BRHNZInst<(ins rclass:$rCond, brtarget:$dest), + [(brcond rclass:$rCond, bb:$dest)]>; + + class BRHNZVecInst: + BRHNZInst<(ins VECREG:$rCond, brtarget:$dest), [/* no pattern */]>; + + multiclass BranchNotZeroHalfword { + def v8i16: BRHNZVecInst; + def r16: BRHNZRegInst; + } + + defm BRHNZ: BranchNotZeroHalfword; + + class BRHZInst pattern>: + RI16Form<0b001000100, (outs), IOL, "brhz\t$rT,$dest", BranchResolv, + pattern>; + + class BRHZRegInst: + BRHZInst<(ins rclass:$rT, brtarget:$dest), [/* no pattern */]>; + + class BRHZVecInst: + BRHZInst<(ins VECREG:$rT, brtarget:$dest), [/* no pattern */]>; + + multiclass BranchZeroHalfword { + def v8i16: BRHZVecInst; + def r16: BRHZRegInst; + } + + defm BRHZ: BranchZeroHalfword; +} + +//===----------------------------------------------------------------------===// +// setcc and brcond patterns: +//===----------------------------------------------------------------------===// + +def : Pat<(brcond (i16 (seteq R16C:$rA, 0)), bb:$dest), + (BRHZr16 R16C:$rA, bb:$dest)>; +def : Pat<(brcond (i16 (setne R16C:$rA, 0)), bb:$dest), + (BRHNZr16 R16C:$rA, bb:$dest)>; + +def : Pat<(brcond (i32 (seteq R32C:$rA, 0)), bb:$dest), + (BRZr32 R32C:$rA, bb:$dest)>; +def : Pat<(brcond (i32 (setne R32C:$rA, 0)), bb:$dest), + (BRNZr32 R32C:$rA, bb:$dest)>; + +multiclass BranchCondEQ +{ + def r16imm: Pat<(brcond (i16 (cond R16C:$rA, i16ImmSExt10:$val)), bb:$dest), + (brinst16 (CEQHIr16 R16C:$rA, i16ImmSExt10:$val), bb:$dest)>; + + def r16 : Pat<(brcond (i16 (cond R16C:$rA, R16C:$rB)), bb:$dest), + (brinst16 (CEQHr16 R16C:$rA, R16:$rB), bb:$dest)>; + + def r32imm : Pat<(brcond (i32 (cond R32C:$rA, i32ImmSExt10:$val)), bb:$dest), + (brinst32 (CEQIr32 R32C:$rA, i32ImmSExt10:$val), bb:$dest)>; + + def r32 : Pat<(brcond (i32 (cond R32C:$rA, R32C:$rB)), bb:$dest), + (brinst32 (CEQr32 R32C:$rA, R32C:$rB), bb:$dest)>; +} + +defm BRCONDeq : BranchCondEQ; +defm BRCONDne : BranchCondEQ; + +multiclass BranchCondLGT +{ + def r16imm : Pat<(brcond (i16 (cond R16C:$rA, i16ImmSExt10:$val)), bb:$dest), + (brinst16 (CLGTHIr16 R16C:$rA, i16ImmSExt10:$val), bb:$dest)>; + + def r16 : Pat<(brcond (i16 (cond R16C:$rA, R16C:$rB)), bb:$dest), + (brinst16 (CLGTHr16 R16C:$rA, R16:$rB), bb:$dest)>; + + def r32imm : Pat<(brcond (i32 (cond R32C:$rA, i32ImmSExt10:$val)), bb:$dest), + (brinst32 (CLGTIr32 R32C:$rA, i32ImmSExt10:$val), bb:$dest)>; + + def r32 : Pat<(brcond (i32 (cond R32C:$rA, R32C:$rB)), bb:$dest), + (brinst32 (CLGTr32 R32C:$rA, R32C:$rB), bb:$dest)>; +} + +defm BRCONDugt : BranchCondLGT; +defm BRCONDule : BranchCondLGT; + +multiclass BranchCondLGTEQ +{ + def r16imm: Pat<(brcond (i16 (cond R16C:$rA, i16ImmSExt10:$val)), bb:$dest), + (brinst16 (orinst16 (CLGTHIr16 R16C:$rA, i16ImmSExt10:$val), + (CEQHIr16 R16C:$rA, i16ImmSExt10:$val)), + bb:$dest)>; + + def r16: Pat<(brcond (i16 (cond R16C:$rA, R16C:$rB)), bb:$dest), + (brinst16 (orinst16 (CLGTHr16 R16C:$rA, R16:$rB), + (CEQHr16 R16C:$rA, R16:$rB)), + bb:$dest)>; + + def r32imm : Pat<(brcond (i32 (cond R32C:$rA, i32ImmSExt10:$val)), bb:$dest), + (brinst32 (orinst32 (CLGTIr32 R32C:$rA, i32ImmSExt10:$val), + (CEQIr32 R32C:$rA, i32ImmSExt10:$val)), + bb:$dest)>; + + def r32 : Pat<(brcond (i32 (cond R32C:$rA, R32C:$rB)), bb:$dest), + (brinst32 (orinst32 (CLGTr32 R32C:$rA, R32C:$rB), + (CEQr32 R32C:$rA, R32C:$rB)), + bb:$dest)>; +} + +defm BRCONDuge : BranchCondLGTEQ; +defm BRCONDult : BranchCondLGTEQ; + +multiclass BranchCondGT +{ + def r16imm : Pat<(brcond (i16 (cond R16C:$rA, i16ImmSExt10:$val)), bb:$dest), + (brinst16 (CGTHIr16 R16C:$rA, i16ImmSExt10:$val), bb:$dest)>; + + def r16 : Pat<(brcond (i16 (cond R16C:$rA, R16C:$rB)), bb:$dest), + (brinst16 (CGTHr16 R16C:$rA, R16:$rB), bb:$dest)>; + + def r32imm : Pat<(brcond (i32 (cond R32C:$rA, i32ImmSExt10:$val)), bb:$dest), + (brinst32 (CGTIr32 R32C:$rA, i32ImmSExt10:$val), bb:$dest)>; + + def r32 : Pat<(brcond (i32 (cond R32C:$rA, R32C:$rB)), bb:$dest), + (brinst32 (CGTr32 R32C:$rA, R32C:$rB), bb:$dest)>; +} + +defm BRCONDgt : BranchCondGT; +defm BRCONDle : BranchCondGT; + +multiclass BranchCondGTEQ +{ + def r16imm: Pat<(brcond (i16 (cond R16C:$rA, i16ImmSExt10:$val)), bb:$dest), + (brinst16 (orinst16 (CGTHIr16 R16C:$rA, i16ImmSExt10:$val), + (CEQHIr16 R16C:$rA, i16ImmSExt10:$val)), + bb:$dest)>; + + def r16: Pat<(brcond (i16 (cond R16C:$rA, R16C:$rB)), bb:$dest), + (brinst16 (orinst16 (CGTHr16 R16C:$rA, R16:$rB), + (CEQHr16 R16C:$rA, R16:$rB)), + bb:$dest)>; + + def r32imm : Pat<(brcond (i32 (cond R32C:$rA, i32ImmSExt10:$val)), bb:$dest), + (brinst32 (orinst32 (CGTIr32 R32C:$rA, i32ImmSExt10:$val), + (CEQIr32 R32C:$rA, i32ImmSExt10:$val)), + bb:$dest)>; + + def r32 : Pat<(brcond (i32 (cond R32C:$rA, R32C:$rB)), bb:$dest), + (brinst32 (orinst32 (CGTr32 R32C:$rA, R32C:$rB), + (CEQr32 R32C:$rA, R32C:$rB)), + bb:$dest)>; +} + +defm BRCONDge : BranchCondGTEQ; +defm BRCONDlt : BranchCondGTEQ; + +let isTerminator = 1, isBarrier = 1 in { + let isReturn = 1 in { + def RET: + RETForm<"bi\t$$lr", [(retflag)]>; + } +} + +//===----------------------------------------------------------------------===// +// Single precision floating point instructions +//===----------------------------------------------------------------------===// + +class FAInst pattern>: + RRForm<0b01011000100, OOL, IOL, "fa\t$rT, $rA, $rB", + SPrecFP, pattern>; + +class FAVecInst: + FAInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + [(set (vectype VECREG:$rT), + (fadd (vectype VECREG:$rA), (vectype VECREG:$rB)))]>; + +multiclass SFPAdd +{ + def v4f32: FAVecInst; + def f32: FAInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB), + [(set R32FP:$rT, (fadd R32FP:$rA, R32FP:$rB))]>; +} + +defm FA : SFPAdd; + +class FSInst pattern>: + RRForm<0b01011000100, OOL, IOL, "fs\t$rT, $rA, $rB", + SPrecFP, pattern>; + +class FSVecInst: + FSInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + [(set (vectype VECREG:$rT), + (fsub (vectype VECREG:$rA), (vectype VECREG:$rB)))]>; + +multiclass SFPSub +{ + def v4f32: FSVecInst; + def f32: FSInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB), + [(set R32FP:$rT, (fsub R32FP:$rA, R32FP:$rB))]>; +} + +defm FS : SFPSub; + +// Floating point reciprocal estimate + +class FRESTInst: + RRForm_1<0b00110111000, OOL, IOL, + "frest\t$rT, $rA", SPrecFP, + [/* no pattern */]>; + +def FRESTv4f32 : + FRESTInst<(outs VECREG:$rT), (ins VECREG:$rA)>; + +def FRESTf32 : + FRESTInst<(outs R32FP:$rT), (ins R32FP:$rA)>; + +// Floating point interpolate (used in conjunction with reciprocal estimate) +def FIv4f32 : + RRForm<0b00101011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "fi\t$rT, $rA, $rB", SPrecFP, + [/* no pattern */]>; + +def FIf32 : + RRForm<0b00101011110, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB), + "fi\t$rT, $rA, $rB", SPrecFP, + [/* no pattern */]>; + +//-------------------------------------------------------------------------- +// Basic single precision floating point comparisons: +// +// Note: There is no support on SPU for single precision NaN. Consequently, +// ordered and unordered comparisons are the same. +//-------------------------------------------------------------------------- + +def FCEQf32 : + RRForm<0b01000011110, (outs R32C:$rT), (ins R32FP:$rA, R32FP:$rB), + "fceq\t$rT, $rA, $rB", SPrecFP, + [(set R32C:$rT, (setueq R32FP:$rA, R32FP:$rB))]>; + +def : Pat<(setoeq R32FP:$rA, R32FP:$rB), + (FCEQf32 R32FP:$rA, R32FP:$rB)>; + +def FCMEQf32 : + RRForm<0b01010011110, (outs R32C:$rT), (ins R32FP:$rA, R32FP:$rB), + "fcmeq\t$rT, $rA, $rB", SPrecFP, + [(set R32C:$rT, (setueq (fabs R32FP:$rA), (fabs R32FP:$rB)))]>; + +def : Pat<(setoeq (fabs R32FP:$rA), (fabs R32FP:$rB)), + (FCMEQf32 R32FP:$rA, R32FP:$rB)>; + +def FCGTf32 : + RRForm<0b01000011010, (outs R32C:$rT), (ins R32FP:$rA, R32FP:$rB), + "fcgt\t$rT, $rA, $rB", SPrecFP, + [(set R32C:$rT, (setugt R32FP:$rA, R32FP:$rB))]>; + +def : Pat<(setugt R32FP:$rA, R32FP:$rB), + (FCGTf32 R32FP:$rA, R32FP:$rB)>; + +def FCMGTf32 : + RRForm<0b01010011010, (outs R32C:$rT), (ins R32FP:$rA, R32FP:$rB), + "fcmgt\t$rT, $rA, $rB", SPrecFP, + [(set R32C:$rT, (setugt (fabs R32FP:$rA), (fabs R32FP:$rB)))]>; + +def : Pat<(setugt (fabs R32FP:$rA), (fabs R32FP:$rB)), + (FCMGTf32 R32FP:$rA, R32FP:$rB)>; + +//-------------------------------------------------------------------------- +// Single precision floating point comparisons and SETCC equivalents: +//-------------------------------------------------------------------------- + +def : SETCCNegCondReg; +def : SETCCNegCondReg; + +def : SETCCBinOpReg; +def : SETCCBinOpReg; + +def : SETCCBinOpReg; +def : SETCCBinOpReg; + +def : Pat<(setule R32FP:$rA, R32FP:$rB), + (XORIr32 (FCGTf32 R32FP:$rA, R32FP:$rB), 0xffffffff)>; +def : Pat<(setole R32FP:$rA, R32FP:$rB), + (XORIr32 (FCGTf32 R32FP:$rA, R32FP:$rB), 0xffffffff)>; + +// FP Status and Control Register Write +// Why isn't rT a don't care in the ISA? +// Should we create a special RRForm_3 for this guy and zero out the rT? +def FSCRWf32 : + RRForm_1<0b01011101110, (outs R32FP:$rT), (ins R32FP:$rA), + "fscrwr\t$rA", SPrecFP, + [/* This instruction requires an intrinsic. Note: rT is unused. */]>; + +// FP Status and Control Register Read +def FSCRRf32 : + RRForm_2<0b01011101110, (outs R32FP:$rT), (ins), + "fscrrd\t$rT", SPrecFP, + [/* This instruction requires an intrinsic */]>; + +// llvm instruction space +// How do these map onto cell instructions? +// fdiv rA rB +// frest rC rB # c = 1/b (both lines) +// fi rC rB rC +// fm rD rA rC # d = a * 1/b +// fnms rB rD rB rA # b = - (d * b - a) --should == 0 in a perfect world +// fma rB rB rC rD # b = b * c + d +// = -(d *b -a) * c + d +// = a * c - c ( a *b *c - a) + +// fcopysign (???) + +// Library calls: +// These llvm instructions will actually map to library calls. +// All that's needed, then, is to check that the appropriate library is +// imported and do a brsl to the proper function name. +// frem # fmod(x, y): x - (x/y) * y +// (Note: fmod(double, double), fmodf(float,float) +// fsqrt? +// fsin? +// fcos? +// Unimplemented SPU instruction space +// floating reciprocal absolute square root estimate (frsqest) + +// The following are probably just intrinsics +// status and control register write +// status and control register read + +//-------------------------------------- +// Floating point multiply instructions +//-------------------------------------- + +def FMv4f32: + RRForm<0b00100011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "fm\t$rT, $rA, $rB", SPrecFP, + [(set (v4f32 VECREG:$rT), (fmul (v4f32 VECREG:$rA), + (v4f32 VECREG:$rB)))]>; + +def FMf32 : + RRForm<0b01100011010, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB), + "fm\t$rT, $rA, $rB", SPrecFP, + [(set R32FP:$rT, (fmul R32FP:$rA, R32FP:$rB))]>; + +// Floating point multiply and add +// e.g. d = c + (a * b) +def FMAv4f32: + RRRForm<0b0111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), + "fma\t$rT, $rA, $rB, $rC", SPrecFP, + [(set (v4f32 VECREG:$rT), + (fadd (v4f32 VECREG:$rC), + (fmul (v4f32 VECREG:$rA), (v4f32 VECREG:$rB))))]>; + +def FMAf32: + RRRForm<0b0111, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC), + "fma\t$rT, $rA, $rB, $rC", SPrecFP, + [(set R32FP:$rT, (fadd R32FP:$rC, (fmul R32FP:$rA, R32FP:$rB)))]>; + +// FP multiply and subtract +// Subtracts value in rC from product +// res = a * b - c +def FMSv4f32 : + RRRForm<0b0111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), + "fms\t$rT, $rA, $rB, $rC", SPrecFP, + [(set (v4f32 VECREG:$rT), + (fsub (fmul (v4f32 VECREG:$rA), (v4f32 VECREG:$rB)), + (v4f32 VECREG:$rC)))]>; + +def FMSf32 : + RRRForm<0b0111, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC), + "fms\t$rT, $rA, $rB, $rC", SPrecFP, + [(set R32FP:$rT, + (fsub (fmul R32FP:$rA, R32FP:$rB), R32FP:$rC))]>; + +// Floating Negative Mulitply and Subtract +// Subtracts product from value in rC +// res = fneg(fms a b c) +// = - (a * b - c) +// = c - a * b +// NOTE: subtraction order +// fsub a b = a - b +// fs a b = b - a? +def FNMSf32 : + RRRForm<0b1101, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC), + "fnms\t$rT, $rA, $rB, $rC", SPrecFP, + [(set R32FP:$rT, (fsub R32FP:$rC, (fmul R32FP:$rA, R32FP:$rB)))]>; + +def FNMSv4f32 : + RRRForm<0b1101, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), + "fnms\t$rT, $rA, $rB, $rC", SPrecFP, + [(set (v4f32 VECREG:$rT), + (fsub (v4f32 VECREG:$rC), + (fmul (v4f32 VECREG:$rA), + (v4f32 VECREG:$rB))))]>; + +//-------------------------------------- +// Floating Point Conversions +// Signed conversions: +def CSiFv4f32: + CVTIntFPForm<0b0101101110, (outs VECREG:$rT), (ins VECREG:$rA), + "csflt\t$rT, $rA, 0", SPrecFP, + [(set (v4f32 VECREG:$rT), (sint_to_fp (v4i32 VECREG:$rA)))]>; + +// Convert signed integer to floating point +def CSiFf32 : + CVTIntFPForm<0b0101101110, (outs R32FP:$rT), (ins R32C:$rA), + "csflt\t$rT, $rA, 0", SPrecFP, + [(set R32FP:$rT, (sint_to_fp R32C:$rA))]>; + +// Convert unsigned into to float +def CUiFv4f32 : + CVTIntFPForm<0b1101101110, (outs VECREG:$rT), (ins VECREG:$rA), + "cuflt\t$rT, $rA, 0", SPrecFP, + [(set (v4f32 VECREG:$rT), (uint_to_fp (v4i32 VECREG:$rA)))]>; + +def CUiFf32 : + CVTIntFPForm<0b1101101110, (outs R32FP:$rT), (ins R32C:$rA), + "cuflt\t$rT, $rA, 0", SPrecFP, + [(set R32FP:$rT, (uint_to_fp R32C:$rA))]>; + +// Convert float to unsigned int +// Assume that scale = 0 + +def CFUiv4f32 : + CVTIntFPForm<0b1101101110, (outs VECREG:$rT), (ins VECREG:$rA), + "cfltu\t$rT, $rA, 0", SPrecFP, + [(set (v4i32 VECREG:$rT), (fp_to_uint (v4f32 VECREG:$rA)))]>; + +def CFUif32 : + CVTIntFPForm<0b1101101110, (outs R32C:$rT), (ins R32FP:$rA), + "cfltu\t$rT, $rA, 0", SPrecFP, + [(set R32C:$rT, (fp_to_uint R32FP:$rA))]>; + +// Convert float to signed int +// Assume that scale = 0 + +def CFSiv4f32 : + CVTIntFPForm<0b1101101110, (outs VECREG:$rT), (ins VECREG:$rA), + "cflts\t$rT, $rA, 0", SPrecFP, + [(set (v4i32 VECREG:$rT), (fp_to_sint (v4f32 VECREG:$rA)))]>; + +def CFSif32 : + CVTIntFPForm<0b1101101110, (outs R32C:$rT), (ins R32FP:$rA), + "cflts\t$rT, $rA, 0", SPrecFP, + [(set R32C:$rT, (fp_to_sint R32FP:$rA))]>; + +//===----------------------------------------------------------------------==// +// Single<->Double precision conversions +//===----------------------------------------------------------------------==// + +// NOTE: We use "vec" name suffix here to avoid confusion (e.g. input is a +// v4f32, output is v2f64--which goes in the name?) + +// Floating point extend single to double +// NOTE: Not sure if passing in v4f32 to FESDvec is correct since it +// operates on two double-word slots (i.e. 1st and 3rd fp numbers +// are ignored). +def FESDvec : + RRForm_1<0b00011101110, (outs VECREG:$rT), (ins VECREG:$rA), + "fesd\t$rT, $rA", SPrecFP, + [(set (v2f64 VECREG:$rT), (fextend (v4f32 VECREG:$rA)))]>; + +def FESDf32 : + RRForm_1<0b00011101110, (outs R64FP:$rT), (ins R32FP:$rA), + "fesd\t$rT, $rA", SPrecFP, + [(set R64FP:$rT, (fextend R32FP:$rA))]>; + +// Floating point round double to single +//def FRDSvec : +// RRForm_1<0b10011101110, (outs VECREG:$rT), (ins VECREG:$rA), +// "frds\t$rT, $rA,", SPrecFP, +// [(set (v4f32 R32FP:$rT), (fround (v2f64 R64FP:$rA)))]>; + +def FRDSf64 : + RRForm_1<0b10011101110, (outs R32FP:$rT), (ins R64FP:$rA), + "frds\t$rT, $rA", SPrecFP, + [(set R32FP:$rT, (fround R64FP:$rA))]>; + +//ToDo include anyextend? + +//===----------------------------------------------------------------------==// +// Double precision floating point instructions +//===----------------------------------------------------------------------==// +def FAf64 : + RRForm<0b00110011010, (outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB), + "dfa\t$rT, $rA, $rB", DPrecFP, + [(set R64FP:$rT, (fadd R64FP:$rA, R64FP:$rB))]>; + +def FAv2f64 : + RRForm<0b00110011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "dfa\t$rT, $rA, $rB", DPrecFP, + [(set (v2f64 VECREG:$rT), (fadd (v2f64 VECREG:$rA), (v2f64 VECREG:$rB)))]>; + +def FSf64 : + RRForm<0b10100011010, (outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB), + "dfs\t$rT, $rA, $rB", DPrecFP, + [(set R64FP:$rT, (fsub R64FP:$rA, R64FP:$rB))]>; + +def FSv2f64 : + RRForm<0b10100011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "dfs\t$rT, $rA, $rB", DPrecFP, + [(set (v2f64 VECREG:$rT), + (fsub (v2f64 VECREG:$rA), (v2f64 VECREG:$rB)))]>; + +def FMf64 : + RRForm<0b01100011010, (outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB), + "dfm\t$rT, $rA, $rB", DPrecFP, + [(set R64FP:$rT, (fmul R64FP:$rA, R64FP:$rB))]>; + +def FMv2f64: + RRForm<0b00100011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "dfm\t$rT, $rA, $rB", DPrecFP, + [(set (v2f64 VECREG:$rT), + (fmul (v2f64 VECREG:$rA), (v2f64 VECREG:$rB)))]>; + +def FMAf64: + RRForm<0b00111010110, (outs R64FP:$rT), + (ins R64FP:$rA, R64FP:$rB, R64FP:$rC), + "dfma\t$rT, $rA, $rB", DPrecFP, + [(set R64FP:$rT, (fadd R64FP:$rC, (fmul R64FP:$rA, R64FP:$rB)))]>, + RegConstraint<"$rC = $rT">, + NoEncode<"$rC">; + +def FMAv2f64: + RRForm<0b00111010110, (outs VECREG:$rT), + (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), + "dfma\t$rT, $rA, $rB", DPrecFP, + [(set (v2f64 VECREG:$rT), + (fadd (v2f64 VECREG:$rC), + (fmul (v2f64 VECREG:$rA), (v2f64 VECREG:$rB))))]>, + RegConstraint<"$rC = $rT">, + NoEncode<"$rC">; + +def FMSf64 : + RRForm<0b10111010110, (outs R64FP:$rT), + (ins R64FP:$rA, R64FP:$rB, R64FP:$rC), + "dfms\t$rT, $rA, $rB", DPrecFP, + [(set R64FP:$rT, (fsub (fmul R64FP:$rA, R64FP:$rB), R64FP:$rC))]>, + RegConstraint<"$rC = $rT">, + NoEncode<"$rC">; + +def FMSv2f64 : + RRForm<0b10111010110, (outs VECREG:$rT), + (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), + "dfms\t$rT, $rA, $rB", DPrecFP, + [(set (v2f64 VECREG:$rT), + (fsub (fmul (v2f64 VECREG:$rA), (v2f64 VECREG:$rB)), + (v2f64 VECREG:$rC)))]>; + +// DFNMS: - (a * b - c) +// - (a * b) + c => c - (a * b) + +class DFNMSInst pattern>: + RRForm<0b01111010110, OOL, IOL, "dfnms\t$rT, $rA, $rB", + DPrecFP, pattern>, + RegConstraint<"$rC = $rT">, + NoEncode<"$rC">; + +class DFNMSVecInst pattern>: + DFNMSInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), + pattern>; + +class DFNMSRegInst pattern>: + DFNMSInst<(outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB, R64FP:$rC), + pattern>; + +multiclass DFMultiplySubtract +{ + def v2f64 : DFNMSVecInst<[(set (v2f64 VECREG:$rT), + (fsub (v2f64 VECREG:$rC), + (fmul (v2f64 VECREG:$rA), + (v2f64 VECREG:$rB))))]>; + + def f64 : DFNMSRegInst<[(set R64FP:$rT, + (fsub R64FP:$rC, + (fmul R64FP:$rA, R64FP:$rB)))]>; +} + +defm DFNMS : DFMultiplySubtract; + +// - (a * b + c) +// - (a * b) - c +def FNMAf64 : + RRForm<0b11111010110, (outs R64FP:$rT), + (ins R64FP:$rA, R64FP:$rB, R64FP:$rC), + "dfnma\t$rT, $rA, $rB", DPrecFP, + [(set R64FP:$rT, (fneg (fadd R64FP:$rC, (fmul R64FP:$rA, R64FP:$rB))))]>, + RegConstraint<"$rC = $rT">, + NoEncode<"$rC">; + +def FNMAv2f64 : + RRForm<0b11111010110, (outs VECREG:$rT), + (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), + "dfnma\t$rT, $rA, $rB", DPrecFP, + [(set (v2f64 VECREG:$rT), + (fneg (fadd (v2f64 VECREG:$rC), + (fmul (v2f64 VECREG:$rA), + (v2f64 VECREG:$rB)))))]>, + RegConstraint<"$rC = $rT">, + NoEncode<"$rC">; + +//===----------------------------------------------------------------------==// +// Floating point negation and absolute value +//===----------------------------------------------------------------------==// + +def : Pat<(fneg (v4f32 VECREG:$rA)), + (XORfnegvec (v4f32 VECREG:$rA), + (v4f32 (ILHUv4i32 0x8000)))>; + +def : Pat<(fneg R32FP:$rA), + (XORfneg32 R32FP:$rA, (ILHUr32 0x8000))>; + +// Floating point absolute value +// Note: f64 fabs is custom-selected. + +def : Pat<(fabs R32FP:$rA), + (ANDfabs32 R32FP:$rA, (IOHLr32 (ILHUr32 0x7fff), 0xffff))>; + +def : Pat<(fabs (v4f32 VECREG:$rA)), + (ANDfabsvec (v4f32 VECREG:$rA), + (IOHLv4i32 (ILHUv4i32 0x7fff), 0xffff))>; + +//===----------------------------------------------------------------------===// +// Hint for branch instructions: +//===----------------------------------------------------------------------===// + +/* def HBR : SPUInstr<(outs), (ins), "hbr\t" */ + +//===----------------------------------------------------------------------===// +// Execution, Load NOP (execute NOPs belong in even pipeline, load NOPs belong +// in the odd pipeline) +//===----------------------------------------------------------------------===// + +def ENOP : SPUInstr<(outs), (ins), "enop", ExecNOP> { + let Pattern = []; + + let Inst{0-10} = 0b10000000010; + let Inst{11-17} = 0; + let Inst{18-24} = 0; + let Inst{25-31} = 0; +} + +def LNOP : SPUInstr<(outs), (ins), "lnop", LoadNOP> { + let Pattern = []; + + let Inst{0-10} = 0b10000000000; + let Inst{11-17} = 0; + let Inst{18-24} = 0; + let Inst{25-31} = 0; +} + +//===----------------------------------------------------------------------===// +// Bit conversions (type conversions between vector/packed types) +// NOTE: Promotions are handled using the XS* instructions. +//===----------------------------------------------------------------------===// +def : Pat<(v16i8 (bitconvert (v8i16 VECREG:$src))), (v16i8 VECREG:$src)>; +def : Pat<(v16i8 (bitconvert (v4i32 VECREG:$src))), (v16i8 VECREG:$src)>; +def : Pat<(v16i8 (bitconvert (v2i64 VECREG:$src))), (v16i8 VECREG:$src)>; +def : Pat<(v16i8 (bitconvert (v4f32 VECREG:$src))), (v16i8 VECREG:$src)>; +def : Pat<(v16i8 (bitconvert (v2f64 VECREG:$src))), (v16i8 VECREG:$src)>; + +def : Pat<(v8i16 (bitconvert (v16i8 VECREG:$src))), (v8i16 VECREG:$src)>; +def : Pat<(v8i16 (bitconvert (v4i32 VECREG:$src))), (v8i16 VECREG:$src)>; +def : Pat<(v8i16 (bitconvert (v2i64 VECREG:$src))), (v8i16 VECREG:$src)>; +def : Pat<(v8i16 (bitconvert (v4f32 VECREG:$src))), (v8i16 VECREG:$src)>; +def : Pat<(v8i16 (bitconvert (v2f64 VECREG:$src))), (v8i16 VECREG:$src)>; + +def : Pat<(v4i32 (bitconvert (v16i8 VECREG:$src))), (v4i32 VECREG:$src)>; +def : Pat<(v4i32 (bitconvert (v8i16 VECREG:$src))), (v4i32 VECREG:$src)>; +def : Pat<(v4i32 (bitconvert (v2i64 VECREG:$src))), (v4i32 VECREG:$src)>; +def : Pat<(v4i32 (bitconvert (v4f32 VECREG:$src))), (v4i32 VECREG:$src)>; +def : Pat<(v4i32 (bitconvert (v2f64 VECREG:$src))), (v4i32 VECREG:$src)>; + +def : Pat<(v2i64 (bitconvert (v16i8 VECREG:$src))), (v2i64 VECREG:$src)>; +def : Pat<(v2i64 (bitconvert (v8i16 VECREG:$src))), (v2i64 VECREG:$src)>; +def : Pat<(v2i64 (bitconvert (v4i32 VECREG:$src))), (v2i64 VECREG:$src)>; +def : Pat<(v2i64 (bitconvert (v4f32 VECREG:$src))), (v2i64 VECREG:$src)>; +def : Pat<(v2i64 (bitconvert (v2f64 VECREG:$src))), (v2i64 VECREG:$src)>; + +def : Pat<(v4f32 (bitconvert (v16i8 VECREG:$src))), (v4f32 VECREG:$src)>; +def : Pat<(v4f32 (bitconvert (v8i16 VECREG:$src))), (v4f32 VECREG:$src)>; +def : Pat<(v4f32 (bitconvert (v2i64 VECREG:$src))), (v4f32 VECREG:$src)>; +def : Pat<(v4f32 (bitconvert (v4i32 VECREG:$src))), (v4f32 VECREG:$src)>; +def : Pat<(v4f32 (bitconvert (v2f64 VECREG:$src))), (v4f32 VECREG:$src)>; + +def : Pat<(v2f64 (bitconvert (v16i8 VECREG:$src))), (v2f64 VECREG:$src)>; +def : Pat<(v2f64 (bitconvert (v8i16 VECREG:$src))), (v2f64 VECREG:$src)>; +def : Pat<(v2f64 (bitconvert (v4i32 VECREG:$src))), (v2f64 VECREG:$src)>; +def : Pat<(v2f64 (bitconvert (v2i64 VECREG:$src))), (v2f64 VECREG:$src)>; +def : Pat<(v2f64 (bitconvert (v2f64 VECREG:$src))), (v2f64 VECREG:$src)>; + +def : Pat<(i128 (bitconvert (v16i8 VECREG:$src))), + (ORi128_vec VECREG:$src)>; +def : Pat<(i128 (bitconvert (v8i16 VECREG:$src))), + (ORi128_vec VECREG:$src)>; +def : Pat<(i128 (bitconvert (v4i32 VECREG:$src))), + (ORi128_vec VECREG:$src)>; +def : Pat<(i128 (bitconvert (v2i64 VECREG:$src))), + (ORi128_vec VECREG:$src)>; +def : Pat<(i128 (bitconvert (v4f32 VECREG:$src))), + (ORi128_vec VECREG:$src)>; +def : Pat<(i128 (bitconvert (v2f64 VECREG:$src))), + (ORi128_vec VECREG:$src)>; + +def : Pat<(v16i8 (bitconvert (i128 GPRC:$src))), + (v16i8 (ORvec_i128 GPRC:$src))>; +def : Pat<(v8i16 (bitconvert (i128 GPRC:$src))), + (v8i16 (ORvec_i128 GPRC:$src))>; +def : Pat<(v4i32 (bitconvert (i128 GPRC:$src))), + (v4i32 (ORvec_i128 GPRC:$src))>; +def : Pat<(v2i64 (bitconvert (i128 GPRC:$src))), + (v2i64 (ORvec_i128 GPRC:$src))>; +def : Pat<(v4f32 (bitconvert (i128 GPRC:$src))), + (v4f32 (ORvec_i128 GPRC:$src))>; +def : Pat<(v2f64 (bitconvert (i128 GPRC:$src))), + (v2f64 (ORvec_i128 GPRC:$src))>; + +//===----------------------------------------------------------------------===// +// Instruction patterns: +//===----------------------------------------------------------------------===// + +// General 32-bit constants: +def : Pat<(i32 imm:$imm), + (IOHLr32 (ILHUr32 (HI16 imm:$imm)), (LO16 imm:$imm))>; + +// Single precision float constants: +def : Pat<(f32 fpimm:$imm), + (IOHLf32 (ILHUf32 (HI16_f32 fpimm:$imm)), (LO16_f32 fpimm:$imm))>; + +// General constant 32-bit vectors +def : Pat<(v4i32 v4i32Imm:$imm), + (IOHLv4i32 (v4i32 (ILHUv4i32 (HI16_vec v4i32Imm:$imm))), + (LO16_vec v4i32Imm:$imm))>; + +// 8-bit constants +def : Pat<(i8 imm:$imm), + (ILHr8 imm:$imm)>; + +//===----------------------------------------------------------------------===// +// Call instruction patterns: +//===----------------------------------------------------------------------===// +// Return void +def : Pat<(ret), + (RET)>; + +//===----------------------------------------------------------------------===// +// Zero/Any/Sign extensions +//===----------------------------------------------------------------------===// + +// sext 8->32: Sign extend bytes to words +def : Pat<(sext_inreg R32C:$rSrc, i8), + (XSHWr32 (XSBHr32 R32C:$rSrc))>; + +def : Pat<(i32 (sext R8C:$rSrc)), + (XSHWr16 (XSBHr8 R8C:$rSrc))>; + +// sext 8->64: Sign extend bytes to double word +def : Pat<(sext_inreg R64C:$rSrc, i8), + (XSWDr64_inreg (XSHWr64 (XSBHr64 R64C:$rSrc)))>; + +def : Pat<(i64 (sext R8C:$rSrc)), + (XSWDr64 (XSHWr16 (XSBHr8 R8C:$rSrc)))>; + +// zext 8->16: Zero extend bytes to halfwords +def : Pat<(i16 (zext R8C:$rSrc)), + (ANDHIi8i16 R8C:$rSrc, 0xff)>; + +// zext 8->32: Zero extend bytes to words +def : Pat<(i32 (zext R8C:$rSrc)), + (ANDIi8i32 R8C:$rSrc, 0xff)>; + +// zext 8->64: Zero extend bytes to double words +def : Pat<(i64 (zext R8C:$rSrc)), + (ORi64_v2i64 (SELBv4i32 (ROTQMBYv4i32 + (ORv4i32_i32 (ANDIi8i32 R8C:$rSrc, 0xff)), + 0x4), + (ILv4i32 0x0), + (FSMBIv4i32 0x0f0f)))>; + +// anyext 8->16: Extend 8->16 bits, irrespective of sign, preserves high bits +def : Pat<(i16 (anyext R8C:$rSrc)), + (ORHIi8i16 R8C:$rSrc, 0)>; + +// anyext 8->32: Extend 8->32 bits, irrespective of sign, preserves high bits +def : Pat<(i32 (anyext R8C:$rSrc)), + (ORIi8i32 R8C:$rSrc, 0)>; + +// sext 16->64: Sign extend halfword to double word +def : Pat<(sext_inreg R64C:$rSrc, i16), + (XSWDr64_inreg (XSHWr64 R64C:$rSrc))>; + +def : Pat<(sext R16C:$rSrc), + (XSWDr64 (XSHWr16 R16C:$rSrc))>; + +// zext 16->32: Zero extend halfwords to words +def : Pat<(i32 (zext R16C:$rSrc)), + (ANDi16i32 R16C:$rSrc, (ILAr32 0xffff))>; + +def : Pat<(i32 (zext (and R16C:$rSrc, 0xf))), + (ANDIi16i32 R16C:$rSrc, 0xf)>; + +def : Pat<(i32 (zext (and R16C:$rSrc, 0xff))), + (ANDIi16i32 R16C:$rSrc, 0xff)>; + +def : Pat<(i32 (zext (and R16C:$rSrc, 0xfff))), + (ANDIi16i32 R16C:$rSrc, 0xfff)>; + +// anyext 16->32: Extend 16->32 bits, irrespective of sign +def : Pat<(i32 (anyext R16C:$rSrc)), + (ORIi16i32 R16C:$rSrc, 0)>; + +//===----------------------------------------------------------------------===// +// Truncates: +// These truncates are for the SPU's supported types (i8, i16, i32). i64 and +// above are custom lowered. +//===----------------------------------------------------------------------===// + +def : Pat<(i8 (trunc GPRC:$src)), + (ORi8_v16i8 + (SHUFBgprc GPRC:$src, GPRC:$src, + (IOHLv4i32 (ILHUv4i32 0x0f0f), 0x0f0f)))>; + +def : Pat<(i8 (trunc R64C:$src)), + (ORi8_v16i8 + (SHUFBv2i64_m32 + (ORv2i64_i64 R64C:$src), + (ORv2i64_i64 R64C:$src), + (IOHLv4i32 (ILHUv4i32 0x0707), 0x0707)))>; + +def : Pat<(i8 (trunc R32C:$src)), + (ORi8_v16i8 + (SHUFBv4i32_m32 + (ORv4i32_i32 R32C:$src), + (ORv4i32_i32 R32C:$src), + (IOHLv4i32 (ILHUv4i32 0x0303), 0x0303)))>; + +def : Pat<(i8 (trunc R16C:$src)), + (ORi8_v16i8 + (SHUFBv4i32_m32 + (ORv8i16_i16 R16C:$src), + (ORv8i16_i16 R16C:$src), + (IOHLv4i32 (ILHUv4i32 0x0303), 0x0303)))>; + +def : Pat<(i16 (trunc GPRC:$src)), + (ORi16_v8i16 + (SHUFBgprc GPRC:$src, GPRC:$src, + (IOHLv4i32 (ILHUv4i32 0x0e0f), 0x0e0f)))>; + +def : Pat<(i16 (trunc R64C:$src)), + (ORi16_v8i16 + (SHUFBv2i64_m32 + (ORv2i64_i64 R64C:$src), + (ORv2i64_i64 R64C:$src), + (IOHLv4i32 (ILHUv4i32 0x0607), 0x0607)))>; + +def : Pat<(i16 (trunc R32C:$src)), + (ORi16_v8i16 + (SHUFBv4i32_m32 + (ORv4i32_i32 R32C:$src), + (ORv4i32_i32 R32C:$src), + (IOHLv4i32 (ILHUv4i32 0x0203), 0x0203)))>; + +def : Pat<(i32 (trunc GPRC:$src)), + (ORi32_v4i32 + (SHUFBgprc GPRC:$src, GPRC:$src, + (IOHLv4i32 (ILHUv4i32 0x0c0d), 0x0e0f)))>; + +def : Pat<(i32 (trunc R64C:$src)), + (ORi32_v4i32 + (SHUFBv2i64_m32 + (ORv2i64_i64 R64C:$src), + (ORv2i64_i64 R64C:$src), + (IOHLv4i32 (ILHUv4i32 0x0405), 0x0607)))>; + +//===----------------------------------------------------------------------===// +// Address generation: SPU, like PPC, has to split addresses into high and +// low parts in order to load them into a register. +//===----------------------------------------------------------------------===// + +def : Pat<(SPUaform tglobaladdr:$in, 0), (ILAlsa tglobaladdr:$in)>; +def : Pat<(SPUaform texternalsym:$in, 0), (ILAlsa texternalsym:$in)>; +def : Pat<(SPUaform tjumptable:$in, 0), (ILAlsa tjumptable:$in)>; +def : Pat<(SPUaform tconstpool:$in, 0), (ILAlsa tconstpool:$in)>; + +def : Pat<(SPUindirect (SPUhi tglobaladdr:$in, 0), + (SPUlo tglobaladdr:$in, 0)), + (IOHLlo (ILHUhi tglobaladdr:$in), tglobaladdr:$in)>; + +def : Pat<(SPUindirect (SPUhi texternalsym:$in, 0), + (SPUlo texternalsym:$in, 0)), + (IOHLlo (ILHUhi texternalsym:$in), texternalsym:$in)>; + +def : Pat<(SPUindirect (SPUhi tjumptable:$in, 0), + (SPUlo tjumptable:$in, 0)), + (IOHLlo (ILHUhi tjumptable:$in), tjumptable:$in)>; + +def : Pat<(SPUindirect (SPUhi tconstpool:$in, 0), + (SPUlo tconstpool:$in, 0)), + (IOHLlo (ILHUhi tconstpool:$in), tconstpool:$in)>; + +def : Pat<(add (SPUhi tglobaladdr:$in, 0), (SPUlo tglobaladdr:$in, 0)), + (IOHLlo (ILHUhi tglobaladdr:$in), tglobaladdr:$in)>; + +def : Pat<(add (SPUhi texternalsym:$in, 0), (SPUlo texternalsym:$in, 0)), + (IOHLlo (ILHUhi texternalsym:$in), texternalsym:$in)>; + +def : Pat<(add (SPUhi tjumptable:$in, 0), (SPUlo tjumptable:$in, 0)), + (IOHLlo (ILHUhi tjumptable:$in), tjumptable:$in)>; + +def : Pat<(add (SPUhi tconstpool:$in, 0), (SPUlo tconstpool:$in, 0)), + (IOHLlo (ILHUhi tconstpool:$in), tconstpool:$in)>; + +// Intrinsics: +include "CellSDKIntrinsics.td" +// Various math operator instruction sequences +include "SPUMathInstr.td" +// 64-bit "instructions"/support +include "SPU64InstrInfo.td" +// 128-bit "instructions"/support +include "SPU128InstrInfo.td" diff --git a/lib/Target/CellSPU/SPUMachineFunction.h b/lib/Target/CellSPU/SPUMachineFunction.h new file mode 100644 index 000000000000..6a66967bc050 --- /dev/null +++ b/lib/Target/CellSPU/SPUMachineFunction.h @@ -0,0 +1,43 @@ +//===-- SPUMachineFunctionInfo.h - Private data used for CellSPU --*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the IBM Cell SPU specific subclass of MachineFunctionInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef SPU_MACHINE_FUNCTION_INFO_H +#define SPU_MACHINE_FUNCTION_INFO_H + +#include "llvm/CodeGen/MachineFunction.h" + +namespace llvm { + +/// SPUFunctionInfo - Cell SPU target-specific information for each +/// MachineFunction +class SPUFunctionInfo : public MachineFunctionInfo { +private: + /// UsesLR - Indicates whether LR is used in the current function. + /// + bool UsesLR; + +public: + SPUFunctionInfo(MachineFunction& MF) + : UsesLR(false) + {} + + void setUsesLR(bool U) { UsesLR = U; } + bool usesLR() { return UsesLR; } + +}; + +} // end of namespace llvm + + +#endif + diff --git a/lib/Target/CellSPU/SPUMathInstr.td b/lib/Target/CellSPU/SPUMathInstr.td new file mode 100644 index 000000000000..80ebde3ef259 --- /dev/null +++ b/lib/Target/CellSPU/SPUMathInstr.td @@ -0,0 +1,97 @@ +//======--- SPUMathInst.td - Cell SPU math operations -*- tablegen -*---======// +// +// Cell SPU math operations +// +// This target description file contains instruction sequences for various +// math operations, such as vector multiplies, i32 multiply, etc., for the +// SPU's i32, i16 i8 and corresponding vector types. +// +// Any resemblance to libsimdmath or the Cell SDK simdmath library is +// purely and completely coincidental. +//===----------------------------------------------------------------------===// + +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +// v16i8 multiply instruction sequence: +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ + +def : Pat<(mul (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)), + (ORv4i32 + (ANDv4i32 + (SELBv4i32 (MPYv8i16 VECREG:$rA, VECREG:$rB), + (SHLHIv8i16 (MPYv8i16 (ROTMAHIv8i16 VECREG:$rA, 8), + (ROTMAHIv8i16 VECREG:$rB, 8)), 8), + (FSMBIv8i16 0x2222)), + (ILAv4i32 0x0000ffff)), + (SHLIv4i32 + (SELBv4i32 (MPYv8i16 (ROTMAIv4i32_i32 VECREG:$rA, 16), + (ROTMAIv4i32_i32 VECREG:$rB, 16)), + (SHLHIv8i16 (MPYv8i16 (ROTMAIv4i32_i32 VECREG:$rA, 8), + (ROTMAIv4i32_i32 VECREG:$rB, 8)), 8), + (FSMBIv8i16 0x2222)), 16))>; + +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +// v8i16 multiply instruction sequence: +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ + +def : Pat<(mul (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)), + (SELBv8i16 (MPYv8i16 VECREG:$rA, VECREG:$rB), + (SHLIv4i32 (MPYHHv8i16 VECREG:$rA, VECREG:$rB), 16), + (FSMBIv8i16 0xcccc))>; + +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +// v4i32, i32 multiply instruction sequence: +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ + +def MPYv4i32: + Pat<(mul (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)), + (Av4i32 + (Av4i32 (MPYHv4i32 VECREG:$rA, VECREG:$rB), + (MPYHv4i32 VECREG:$rB, VECREG:$rA)), + (MPYUv4i32 VECREG:$rA, VECREG:$rB))>; + +def MPYi32: + Pat<(mul R32C:$rA, R32C:$rB), + (Ar32 + (Ar32 (MPYHr32 R32C:$rA, R32C:$rB), + (MPYHr32 R32C:$rB, R32C:$rA)), + (MPYUr32 R32C:$rA, R32C:$rB))>; + +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +// f32, v4f32 divide instruction sequence: +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ + +// Reciprocal estimate and interpolation +def Interpf32: CodeFrag<(FIf32 R32FP:$rB, (FRESTf32 R32FP:$rB))>; +// Division estimate +def DivEstf32: CodeFrag<(FMf32 R32FP:$rA, Interpf32.Fragment)>; +// Newton-Raphson iteration +def NRaphf32: CodeFrag<(FMAf32 (FNMSf32 DivEstf32.Fragment, R32FP:$rB, R32FP:$rA), + Interpf32.Fragment, + DivEstf32.Fragment)>; +// Epsilon addition +def Epsilonf32: CodeFrag<(AIf32 NRaphf32.Fragment, 1)>; + +def : Pat<(fdiv R32FP:$rA, R32FP:$rB), + (SELBf32_cond NRaphf32.Fragment, + Epsilonf32.Fragment, + (CGTIf32 (FNMSf32 R32FP:$rB, Epsilonf32.Fragment, R32FP:$rA), -1))>; + +// Reciprocal estimate and interpolation +def Interpv4f32: CodeFrag<(FIv4f32 (v4f32 VECREG:$rB), (FRESTv4f32 (v4f32 VECREG:$rB)))>; +// Division estimate +def DivEstv4f32: CodeFrag<(FMv4f32 (v4f32 VECREG:$rA), Interpv4f32.Fragment)>; +// Newton-Raphson iteration +def NRaphv4f32: CodeFrag<(FMAv4f32 (FNMSv4f32 DivEstv4f32.Fragment, + (v4f32 VECREG:$rB), + (v4f32 VECREG:$rA)), + Interpv4f32.Fragment, + DivEstv4f32.Fragment)>; +// Epsilon addition +def Epsilonv4f32: CodeFrag<(AIv4f32 NRaphv4f32.Fragment, 1)>; + +def : Pat<(fdiv (v4f32 VECREG:$rA), (v4f32 VECREG:$rB)), + (SELBv4f32_cond NRaphv4f32.Fragment, + Epsilonv4f32.Fragment, + (CGTIv4f32 (FNMSv4f32 (v4f32 VECREG:$rB), + Epsilonv4f32.Fragment, + (v4f32 VECREG:$rA)), -1))>; diff --git a/lib/Target/CellSPU/SPUNodes.td b/lib/Target/CellSPU/SPUNodes.td new file mode 100644 index 000000000000..87c4115d1b18 --- /dev/null +++ b/lib/Target/CellSPU/SPUNodes.td @@ -0,0 +1,156 @@ +//===- SPUNodes.td - Specialized SelectionDAG nodes used for CellSPU ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Type profiles and SelectionDAG nodes used by CellSPU +// +//===----------------------------------------------------------------------===// + +// Type profile for a call sequence +def SDT_SPUCallSeq : SDTypeProfile<0, 1, [ SDTCisVT<0, i32> ]>; + +// SPU_GenControl: Type profile for generating control words for insertions +def SPU_GenControl : SDTypeProfile<1, 1, []>; +def SPUshufmask : SDNode<"SPUISD::SHUFFLE_MASK", SPU_GenControl, []>; + +def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_SPUCallSeq, + [SDNPHasChain, SDNPOutFlag]>; +def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_SPUCallSeq, + [SDNPHasChain, SDNPOutFlag]>; +//===----------------------------------------------------------------------===// +// Operand constraints: +//===----------------------------------------------------------------------===// + +def SDT_SPUCall : SDTypeProfile<0, -1, [SDTCisInt<0>]>; +def SPUcall : SDNode<"SPUISD::CALL", SDT_SPUCall, + [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; + +// Operand type constraints for vector shuffle/permute operations +def SDT_SPUshuffle : SDTypeProfile<1, 3, [ + SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2> +]>; + +// Vector binary operator type constraints (needs a further constraint to +// ensure that operand 0 is a vector...): + +def SPUVecBinop: SDTypeProfile<1, 2, [ + SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2> +]>; + +// Trinary operators, e.g., addx, carry generate +def SPUIntTrinaryOp : SDTypeProfile<1, 3, [ + SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisInt<0> +]>; + +// SELECT_MASK type constraints: There are several variations for the various +// vector types (this avoids having to bit_convert all over the place.) +def SPUselmask_type: SDTypeProfile<1, 1, [ + SDTCisInt<1> +]>; + +// SELB type constraints: +def SPUselb_type: SDTypeProfile<1, 3, [ + SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisSameAs<0, 3> ]>; + +// SPU Vector shift pseudo-instruction type constraints +def SPUvecshift_type: SDTypeProfile<1, 2, [ + SDTCisSameAs<0, 1>, SDTCisInt<2>]>; + +// "marker" type for i64 operators that need a shuffle mask +// (i.e., uses cg or bg or another instruction that needs to +// use shufb to get things in the right place.) +// Op0: The result +// Op1, 2: LHS, RHS +// Op3: Carry-generate shuffle mask + +def SPUmarker_type : SDTypeProfile<1, 3, [ + SDTCisInt<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2> ]>; + +//===----------------------------------------------------------------------===// +// Synthetic/pseudo-instructions +//===----------------------------------------------------------------------===// + +// SPU CNTB: +def SPUcntb : SDNode<"SPUISD::CNTB", SDTIntUnaryOp>; + +// SPU vector shuffle node, matched by the SPUISD::SHUFB enum (see +// SPUISelLowering.h): +def SPUshuffle: SDNode<"SPUISD::SHUFB", SDT_SPUshuffle, []>; + +// Shift left quadword by bits and bytes +def SPUshlquad_l_bits: SDNode<"SPUISD::SHLQUAD_L_BITS", SPUvecshift_type, []>; +def SPUshlquad_l_bytes: SDNode<"SPUISD::SHLQUAD_L_BYTES", SPUvecshift_type, []>; + +// Vector shifts (ISD::SHL,SRL,SRA are for _integers_ only): +def SPUvec_shl: SDNode<"SPUISD::VEC_SHL", SPUvecshift_type, []>; +def SPUvec_srl: SDNode<"SPUISD::VEC_SRL", SPUvecshift_type, []>; +def SPUvec_sra: SDNode<"SPUISD::VEC_SRA", SPUvecshift_type, []>; + +def SPUvec_rotl: SDNode<"SPUISD::VEC_ROTL", SPUvecshift_type, []>; +def SPUvec_rotr: SDNode<"SPUISD::VEC_ROTR", SPUvecshift_type, []>; + +// Vector rotate left, bits shifted out of the left are rotated in on the right +def SPUrotbytes_left: SDNode<"SPUISD::ROTBYTES_LEFT", + SPUvecshift_type, []>; + +// Vector rotate left by bytes, but the count is given in bits and the SPU +// internally converts it to bytes (saves an instruction to mask off lower +// three bits) +def SPUrotbytes_left_bits : SDNode<"SPUISD::ROTBYTES_LEFT_BITS", + SPUvecshift_type>; + +// SPU form select mask for bytes, immediate +def SPUselmask: SDNode<"SPUISD::SELECT_MASK", SPUselmask_type, []>; + +// SPU select bits instruction +def SPUselb: SDNode<"SPUISD::SELB", SPUselb_type, []>; + +def SDTprefslot2vec: SDTypeProfile<1, 1, []>; +def SPUprefslot2vec: SDNode<"SPUISD::PREFSLOT2VEC", SDTprefslot2vec, []>; + +def SPU_vec_demote : SDTypeProfile<1, 1, []>; +def SPUvec2prefslot: SDNode<"SPUISD::VEC2PREFSLOT", SPU_vec_demote, []>; + +// Address high and low components, used for [r+r] type addressing +def SPUhi : SDNode<"SPUISD::Hi", SDTIntBinOp, []>; +def SPUlo : SDNode<"SPUISD::Lo", SDTIntBinOp, []>; + +// PC-relative address +def SPUpcrel : SDNode<"SPUISD::PCRelAddr", SDTIntBinOp, []>; + +// A-Form local store addresses +def SPUaform : SDNode<"SPUISD::AFormAddr", SDTIntBinOp, []>; + +// Indirect [D-Form "imm($reg)" and X-Form "$reg($reg)"] addresses +def SPUindirect : SDNode<"SPUISD::IndirectAddr", SDTIntBinOp, []>; + +// i64 markers: supplies extra operands used to generate the i64 operator +// instruction sequences +def SPUadd64 : SDNode<"SPUISD::ADD64_MARKER", SPUmarker_type, []>; +def SPUsub64 : SDNode<"SPUISD::SUB64_MARKER", SPUmarker_type, []>; +def SPUmul64 : SDNode<"SPUISD::MUL64_MARKER", SPUmarker_type, []>; + +//===----------------------------------------------------------------------===// +// Constraints: (taken from PPCInstrInfo.td) +//===----------------------------------------------------------------------===// + +class RegConstraint { + string Constraints = C; +} + +class NoEncode { + string DisableEncoding = E; +} + +//===----------------------------------------------------------------------===// +// Return (flag isn't quite what it means: the operations are flagged so that +// instruction scheduling doesn't disassociate them.) +//===----------------------------------------------------------------------===// + +def retflag : SDNode<"SPUISD::RET_FLAG", SDTNone, + [SDNPHasChain, SDNPOptInFlag]>; diff --git a/lib/Target/CellSPU/SPUOperands.td b/lib/Target/CellSPU/SPUOperands.td new file mode 100644 index 000000000000..802628f89965 --- /dev/null +++ b/lib/Target/CellSPU/SPUOperands.td @@ -0,0 +1,655 @@ +//===- SPUOperands.td - Cell SPU Instruction Operands ------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// Cell SPU Instruction Operands: +//===----------------------------------------------------------------------===// + +def LO16 : SDNodeXFormgetZExtValue(); + // Transformation function: get the low 16 bits. + return getI32Imm(val & 0xffff); +}]>; + +def LO16_vec : SDNodeXFormgetOpcode() == ISD::BUILD_VECTOR + && "LO16_vec got something other than a BUILD_VECTOR"); + + // Get first constant operand... + for (unsigned i = 0, e = N->getNumOperands(); + OpVal.getNode() == 0 && i != e; ++i) { + if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; + if (OpVal.getNode() == 0) + OpVal = N->getOperand(i); + } + + assert(OpVal.getNode() != 0 && "LO16_vec did not locate a node"); + ConstantSDNode *CN = cast(OpVal); + return getI32Imm((unsigned)CN->getZExtValue() & 0xffff); +}]>; + +// Transform an immediate, returning the high 16 bits shifted down: +def HI16 : SDNodeXFormgetZExtValue() >> 16); +}]>; + +// Transformation function: shift the high 16 bit immediate from a build_vector +// node into the low 16 bits, and return a 16-bit constant. +def HI16_vec : SDNodeXFormgetOpcode() == ISD::BUILD_VECTOR + && "HI16_vec got something other than a BUILD_VECTOR"); + + // Get first constant operand... + for (unsigned i = 0, e = N->getNumOperands(); + OpVal.getNode() == 0 && i != e; ++i) { + if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; + if (OpVal.getNode() == 0) + OpVal = N->getOperand(i); + } + + assert(OpVal.getNode() != 0 && "HI16_vec did not locate a node"); + ConstantSDNode *CN = cast(OpVal); + return getI32Imm((unsigned)CN->getZExtValue() >> 16); +}]>; + +// simm7 predicate - True if the immediate fits in an 7-bit signed +// field. +def simm7: PatLeaf<(imm), [{ + int sextVal = int(N->getSExtValue()); + return (sextVal >= -64 && sextVal <= 63); +}]>; + +// uimm7 predicate - True if the immediate fits in an 7-bit unsigned +// field. +def uimm7: PatLeaf<(imm), [{ + return (N->getZExtValue() <= 0x7f); +}]>; + +// immSExt8 predicate - True if the immediate fits in an 8-bit sign extended +// field. +def immSExt8 : PatLeaf<(imm), [{ + int Value = int(N->getSExtValue()); + return (Value >= -(1 << 8) && Value <= (1 << 8) - 1); +}]>; + +// immU8: immediate, unsigned 8-bit quantity +def immU8 : PatLeaf<(imm), [{ + return (N->getZExtValue() <= 0xff); +}]>; + +// i64ImmSExt10 predicate - True if the i64 immediate fits in a 10-bit sign +// extended field. Used by RI10Form instructions like 'ldq'. +def i64ImmSExt10 : PatLeaf<(imm), [{ + return isI64IntS10Immediate(N); +}]>; + +// i32ImmSExt10 predicate - True if the i32 immediate fits in a 10-bit sign +// extended field. Used by RI10Form instructions like 'ldq'. +def i32ImmSExt10 : PatLeaf<(imm), [{ + return isI32IntS10Immediate(N); +}]>; + +// i32ImmUns10 predicate - True if the i32 immediate fits in a 10-bit unsigned +// field. Used by RI10Form instructions like 'ldq'. +def i32ImmUns10 : PatLeaf<(imm), [{ + return isI32IntU10Immediate(N); +}]>; + +// i16ImmSExt10 predicate - True if the i16 immediate fits in a 10-bit sign +// extended field. Used by RI10Form instructions like 'ldq'. +def i16ImmSExt10 : PatLeaf<(imm), [{ + return isI16IntS10Immediate(N); +}]>; + +// i16ImmUns10 predicate - True if the i16 immediate fits into a 10-bit unsigned +// value. Used by RI10Form instructions. +def i16ImmUns10 : PatLeaf<(imm), [{ + return isI16IntU10Immediate(N); +}]>; + +def immSExt16 : PatLeaf<(imm), [{ + // immSExt16 predicate - True if the immediate fits in a 16-bit sign extended + // field. + short Ignored; + return isIntS16Immediate(N, Ignored); +}]>; + +def immZExt16 : PatLeaf<(imm), [{ + // immZExt16 predicate - True if the immediate fits in a 16-bit zero extended + // field. + return (uint64_t)N->getZExtValue() == (unsigned short)N->getZExtValue(); +}], LO16>; + +def immU16 : PatLeaf<(imm), [{ + // immU16 predicate- True if the immediate fits into a 16-bit unsigned field. + return (uint64_t)N->getZExtValue() == (N->getZExtValue() & 0xffff); +}]>; + +def imm18 : PatLeaf<(imm), [{ + // imm18 predicate: True if the immediate fits into an 18-bit unsigned field. + int Value = (int) N->getZExtValue(); + return ((Value & ((1 << 19) - 1)) == Value); +}]>; + +def lo16 : PatLeaf<(imm), [{ + // lo16 predicate - returns true if the immediate has all zeros in the + // low order bits and is a 32-bit constant: + if (N->getValueType(0) == MVT::i32) { + uint32_t val = N->getZExtValue(); + return ((val & 0x0000ffff) == val); + } + + return false; +}], LO16>; + +def hi16 : PatLeaf<(imm), [{ + // hi16 predicate - returns true if the immediate has all zeros in the + // low order bits and is a 32-bit constant: + if (N->getValueType(0) == MVT::i32) { + uint32_t val = uint32_t(N->getZExtValue()); + return ((val & 0xffff0000) == val); + } else if (N->getValueType(0) == MVT::i64) { + uint64_t val = N->getZExtValue(); + return ((val & 0xffff0000ULL) == val); + } + + return false; +}], HI16>; + +def bitshift : PatLeaf<(imm), [{ + // bitshift predicate - returns true if 0 < imm <= 7 for SHLQBII + // (shift left quadword by bits immediate) + int64_t Val = N->getZExtValue(); + return (Val > 0 && Val <= 7); +}]>; + +//===----------------------------------------------------------------------===// +// Floating point operands: +//===----------------------------------------------------------------------===// + +// Transform a float, returning the high 16 bits shifted down, as if +// the float was really an unsigned integer: +def HI16_f32 : SDNodeXFormgetValueAPF().convertToFloat(); + return getI32Imm(FloatToBits(fval) >> 16); +}]>; + +// Transformation function on floats: get the low 16 bits as if the float was +// an unsigned integer. +def LO16_f32 : SDNodeXFormgetValueAPF().convertToFloat(); + return getI32Imm(FloatToBits(fval) & 0xffff); +}]>; + +def FPimm_sext16 : SDNodeXFormgetValueAPF().convertToFloat(); + return getI32Imm((int) ((FloatToBits(fval) << 16) >> 16)); +}]>; + +def FPimm_u18 : SDNodeXFormgetValueAPF().convertToFloat(); + return getI32Imm(FloatToBits(fval) & ((1 << 19) - 1)); +}]>; + +def fpimmSExt16 : PatLeaf<(fpimm), [{ + short Ignored; + return isFPS16Immediate(N, Ignored); +}], FPimm_sext16>; + +// Does the SFP constant only have upp 16 bits set? +def hi16_f32 : PatLeaf<(fpimm), [{ + if (N->getValueType(0) == MVT::f32) { + uint32_t val = FloatToBits(N->getValueAPF().convertToFloat()); + return ((val & 0xffff0000) == val); + } + + return false; +}], HI16_f32>; + +// Does the SFP constant fit into 18 bits? +def fpimm18 : PatLeaf<(fpimm), [{ + if (N->getValueType(0) == MVT::f32) { + uint32_t Value = FloatToBits(N->getValueAPF().convertToFloat()); + return ((Value & ((1 << 19) - 1)) == Value); + } + + return false; +}], FPimm_u18>; + +//===----------------------------------------------------------------------===// +// 64-bit operands (TODO): +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// build_vector operands: +//===----------------------------------------------------------------------===// + +// v16i8SExt8Imm_xform function: convert build_vector to 8-bit sign extended +// immediate constant load for v16i8 vectors. N.B.: The incoming constant has +// to be a 16-bit quantity with the upper and lower bytes equal (e.g., 0x2a2a). +def v16i8SExt8Imm_xform: SDNodeXForm; + +// v16i8SExt8Imm: Predicate test for 8-bit sign extended immediate constant +// load, works in conjunction with its transform function. N.B.: This relies the +// incoming constant being a 16-bit quantity, where the upper and lower bytes +// are EXACTLY the same (e.g., 0x2a2a) +def v16i8SExt8Imm: PatLeaf<(build_vector), [{ + return SPU::get_vec_i8imm(N, *CurDAG, MVT::i8).getNode() != 0; +}], v16i8SExt8Imm_xform>; + +// v16i8U8Imm_xform function: convert build_vector to unsigned 8-bit +// immediate constant load for v16i8 vectors. N.B.: The incoming constant has +// to be a 16-bit quantity with the upper and lower bytes equal (e.g., 0x2a2a). +def v16i8U8Imm_xform: SDNodeXForm; + +// v16i8U8Imm: Predicate test for unsigned 8-bit immediate constant +// load, works in conjunction with its transform function. N.B.: This relies the +// incoming constant being a 16-bit quantity, where the upper and lower bytes +// are EXACTLY the same (e.g., 0x2a2a) +def v16i8U8Imm: PatLeaf<(build_vector), [{ + return SPU::get_vec_i8imm(N, *CurDAG, MVT::i8).getNode() != 0; +}], v16i8U8Imm_xform>; + +// v8i16SExt8Imm_xform function: convert build_vector to 8-bit sign extended +// immediate constant load for v8i16 vectors. +def v8i16SExt8Imm_xform: SDNodeXForm; + +// v8i16SExt8Imm: Predicate test for 8-bit sign extended immediate constant +// load, works in conjunction with its transform function. +def v8i16SExt8Imm: PatLeaf<(build_vector), [{ + return SPU::get_vec_i8imm(N, *CurDAG, MVT::i16).getNode() != 0; +}], v8i16SExt8Imm_xform>; + +// v8i16SExt10Imm_xform function: convert build_vector to 16-bit sign extended +// immediate constant load for v8i16 vectors. +def v8i16SExt10Imm_xform: SDNodeXForm; + +// v8i16SExt10Imm: Predicate test for 16-bit sign extended immediate constant +// load, works in conjunction with its transform function. +def v8i16SExt10Imm: PatLeaf<(build_vector), [{ + return SPU::get_vec_i10imm(N, *CurDAG, MVT::i16).getNode() != 0; +}], v8i16SExt10Imm_xform>; + +// v8i16Uns10Imm_xform function: convert build_vector to 16-bit unsigned +// immediate constant load for v8i16 vectors. +def v8i16Uns10Imm_xform: SDNodeXForm; + +// v8i16Uns10Imm: Predicate test for 16-bit unsigned immediate constant +// load, works in conjunction with its transform function. +def v8i16Uns10Imm: PatLeaf<(build_vector), [{ + return SPU::get_vec_i10imm(N, *CurDAG, MVT::i16).getNode() != 0; +}], v8i16Uns10Imm_xform>; + +// v8i16SExt16Imm_xform function: convert build_vector to 16-bit sign extended +// immediate constant load for v8i16 vectors. +def v8i16Uns16Imm_xform: SDNodeXForm; + +// v8i16SExt16Imm: Predicate test for 16-bit sign extended immediate constant +// load, works in conjunction with its transform function. +def v8i16SExt16Imm: PatLeaf<(build_vector), [{ + return SPU::get_vec_i16imm(N, *CurDAG, MVT::i16).getNode() != 0; +}], v8i16Uns16Imm_xform>; + +// v4i32SExt10Imm_xform function: convert build_vector to 10-bit sign extended +// immediate constant load for v4i32 vectors. +def v4i32SExt10Imm_xform: SDNodeXForm; + +// v4i32SExt10Imm: Predicate test for 10-bit sign extended immediate constant +// load, works in conjunction with its transform function. +def v4i32SExt10Imm: PatLeaf<(build_vector), [{ + return SPU::get_vec_i10imm(N, *CurDAG, MVT::i32).getNode() != 0; +}], v4i32SExt10Imm_xform>; + +// v4i32Uns10Imm_xform function: convert build_vector to 10-bit unsigned +// immediate constant load for v4i32 vectors. +def v4i32Uns10Imm_xform: SDNodeXForm; + +// v4i32Uns10Imm: Predicate test for 10-bit unsigned immediate constant +// load, works in conjunction with its transform function. +def v4i32Uns10Imm: PatLeaf<(build_vector), [{ + return SPU::get_vec_i10imm(N, *CurDAG, MVT::i32).getNode() != 0; +}], v4i32Uns10Imm_xform>; + +// v4i32SExt16Imm_xform function: convert build_vector to 16-bit sign extended +// immediate constant load for v4i32 vectors. +def v4i32SExt16Imm_xform: SDNodeXForm; + +// v4i32SExt16Imm: Predicate test for 16-bit sign extended immediate constant +// load, works in conjunction with its transform function. +def v4i32SExt16Imm: PatLeaf<(build_vector), [{ + return SPU::get_vec_i16imm(N, *CurDAG, MVT::i32).getNode() != 0; +}], v4i32SExt16Imm_xform>; + +// v4i32Uns18Imm_xform function: convert build_vector to 18-bit unsigned +// immediate constant load for v4i32 vectors. +def v4i32Uns18Imm_xform: SDNodeXForm; + +// v4i32Uns18Imm: Predicate test for 18-bit unsigned immediate constant load, +// works in conjunction with its transform function. +def v4i32Uns18Imm: PatLeaf<(build_vector), [{ + return SPU::get_vec_u18imm(N, *CurDAG, MVT::i32).getNode() != 0; +}], v4i32Uns18Imm_xform>; + +// ILHUvec_get_imm xform function: convert build_vector to ILHUvec imm constant +// load. +def ILHUvec_get_imm: SDNodeXForm; + +/// immILHUvec: Predicate test for a ILHU constant vector. +def immILHUvec: PatLeaf<(build_vector), [{ + return SPU::get_ILHUvec_imm(N, *CurDAG, MVT::i32).getNode() != 0; +}], ILHUvec_get_imm>; + +// Catch-all for any other i32 vector constants +def v4i32_get_imm: SDNodeXForm; + +def v4i32Imm: PatLeaf<(build_vector), [{ + return SPU::get_v4i32_imm(N, *CurDAG).getNode() != 0; +}], v4i32_get_imm>; + +// v2i64SExt10Imm_xform function: convert build_vector to 10-bit sign extended +// immediate constant load for v2i64 vectors. +def v2i64SExt10Imm_xform: SDNodeXForm; + +// v2i64SExt10Imm: Predicate test for 10-bit sign extended immediate constant +// load, works in conjunction with its transform function. +def v2i64SExt10Imm: PatLeaf<(build_vector), [{ + return SPU::get_vec_i10imm(N, *CurDAG, MVT::i64).getNode() != 0; +}], v2i64SExt10Imm_xform>; + +// v2i64SExt16Imm_xform function: convert build_vector to 16-bit sign extended +// immediate constant load for v2i64 vectors. +def v2i64SExt16Imm_xform: SDNodeXForm; + +// v2i64SExt16Imm: Predicate test for 16-bit sign extended immediate constant +// load, works in conjunction with its transform function. +def v2i64SExt16Imm: PatLeaf<(build_vector), [{ + return SPU::get_vec_i16imm(N, *CurDAG, MVT::i64).getNode() != 0; +}], v2i64SExt16Imm_xform>; + +// v2i64Uns18Imm_xform function: convert build_vector to 18-bit unsigned +// immediate constant load for v2i64 vectors. +def v2i64Uns18Imm_xform: SDNodeXForm; + +// v2i64Uns18Imm: Predicate test for 18-bit unsigned immediate constant load, +// works in conjunction with its transform function. +def v2i64Uns18Imm: PatLeaf<(build_vector), [{ + return SPU::get_vec_u18imm(N, *CurDAG, MVT::i64).getNode() != 0; +}], v2i64Uns18Imm_xform>; + +/// immILHUvec: Predicate test for a ILHU constant vector. +def immILHUvec_i64: PatLeaf<(build_vector), [{ + return SPU::get_ILHUvec_imm(N, *CurDAG, MVT::i64).getNode() != 0; +}], ILHUvec_get_imm>; + +// Catch-all for any other i32 vector constants +def v2i64_get_imm: SDNodeXForm; + +def v2i64Imm: PatLeaf<(build_vector), [{ + return SPU::get_v2i64_imm(N, *CurDAG).getNode() != 0; +}], v2i64_get_imm>; + +//===----------------------------------------------------------------------===// +// Operand Definitions. + +def s7imm: Operand { + let PrintMethod = "printS7ImmOperand"; +} + +def s7imm_i8: Operand { + let PrintMethod = "printS7ImmOperand"; +} + +def u7imm: Operand { + let PrintMethod = "printU7ImmOperand"; +} + +def u7imm_i8: Operand { + let PrintMethod = "printU7ImmOperand"; +} + +def u7imm_i32: Operand { + let PrintMethod = "printU7ImmOperand"; +} + +// Halfword, signed 10-bit constant +def s10imm : Operand { + let PrintMethod = "printS10ImmOperand"; +} + +def s10imm_i8: Operand { + let PrintMethod = "printS10ImmOperand"; +} + +def s10imm_i32: Operand { + let PrintMethod = "printS10ImmOperand"; +} + +def s10imm_i64: Operand { + let PrintMethod = "printS10ImmOperand"; +} + +// Unsigned 10-bit integers: +def u10imm: Operand { + let PrintMethod = "printU10ImmOperand"; +} + +def u10imm_i8: Operand { + let PrintMethod = "printU10ImmOperand"; +} + +def u10imm_i32: Operand { + let PrintMethod = "printU10ImmOperand"; +} + +def s16imm : Operand { + let PrintMethod = "printS16ImmOperand"; +} + +def s16imm_i8: Operand { + let PrintMethod = "printS16ImmOperand"; +} + +def s16imm_i32: Operand { + let PrintMethod = "printS16ImmOperand"; +} + +def s16imm_i64: Operand { + let PrintMethod = "printS16ImmOperand"; +} + +def s16imm_f32: Operand { + let PrintMethod = "printS16ImmOperand"; +} + +def s16imm_f64: Operand { + let PrintMethod = "printS16ImmOperand"; +} + +def u16imm_i64 : Operand { + let PrintMethod = "printU16ImmOperand"; +} + +def u16imm_i32 : Operand { + let PrintMethod = "printU16ImmOperand"; +} + +def u16imm : Operand { + let PrintMethod = "printU16ImmOperand"; +} + +def f16imm : Operand { + let PrintMethod = "printU16ImmOperand"; +} + +def s18imm : Operand { + let PrintMethod = "printS18ImmOperand"; +} + +def u18imm : Operand { + let PrintMethod = "printU18ImmOperand"; +} + +def u18imm_i64 : Operand { + let PrintMethod = "printU18ImmOperand"; +} + +def f18imm : Operand { + let PrintMethod = "printU18ImmOperand"; +} + +def f18imm_f64 : Operand { + let PrintMethod = "printU18ImmOperand"; +} + +// Negated 7-bit halfword rotate immediate operands +def rothNeg7imm : Operand { + let PrintMethod = "printROTHNeg7Imm"; +} + +def rothNeg7imm_i16 : Operand { + let PrintMethod = "printROTHNeg7Imm"; +} + +// Negated 7-bit word rotate immediate operands +def rotNeg7imm : Operand { + let PrintMethod = "printROTNeg7Imm"; +} + +def rotNeg7imm_i16 : Operand { + let PrintMethod = "printROTNeg7Imm"; +} + +def rotNeg7imm_i8 : Operand { + let PrintMethod = "printROTNeg7Imm"; +} + +def target : Operand { + let PrintMethod = "printBranchOperand"; +} + +// Absolute address call target +def calltarget : Operand { + let PrintMethod = "printCallOperand"; + let MIOperandInfo = (ops u18imm:$calldest); +} + +// PC relative call target +def relcalltarget : Operand { + let PrintMethod = "printPCRelativeOperand"; + let MIOperandInfo = (ops s16imm:$calldest); +} + +// Branch targets: +def brtarget : Operand { + let PrintMethod = "printPCRelativeOperand"; +} + +// Hint for branch target +def hbrtarget : Operand { + let PrintMethod = "printHBROperand"; +} + +// Indirect call target +def indcalltarget : Operand { + let PrintMethod = "printCallOperand"; + let MIOperandInfo = (ops ptr_rc:$calldest); +} + +def symbolHi: Operand { + let PrintMethod = "printSymbolHi"; +} + +def symbolLo: Operand { + let PrintMethod = "printSymbolLo"; +} + +def symbolLSA: Operand { + let PrintMethod = "printSymbolLSA"; +} + +// Shuffle address memory operaand [s7imm(reg) d-format] +def shufaddr : Operand { + let PrintMethod = "printShufAddr"; + let MIOperandInfo = (ops s7imm:$imm, ptr_rc:$reg); +} + +// memory s10imm(reg) operand +def dformaddr : Operand { + let PrintMethod = "printDFormAddr"; + let MIOperandInfo = (ops s10imm:$imm, ptr_rc:$reg); +} + +// 256K local store address +// N.B.: The tblgen code generator expects to have two operands, an offset +// and a pointer. Of these, only the immediate is actually used. +def addr256k : Operand { + let PrintMethod = "printAddr256K"; + let MIOperandInfo = (ops s16imm:$imm, ptr_rc:$reg); +} + +// memory s18imm(reg) operand +def memri18 : Operand { + let PrintMethod = "printMemRegImmS18"; + let MIOperandInfo = (ops s18imm:$imm, ptr_rc:$reg); +} + +// memory register + register operand +def memrr : Operand { + let PrintMethod = "printMemRegReg"; + let MIOperandInfo = (ops ptr_rc:$reg_a, ptr_rc:$reg_b); +} + +// Define SPU-specific addressing modes: These come in three basic +// flavors: +// +// D-form : [r+I10] (10-bit signed offset + reg) +// X-form : [r+r] (reg+reg) +// A-form : abs (256K LSA offset) +// D-form(2): [r+I7] (7-bit signed offset + reg) + +def dform_addr : ComplexPattern; +def xform_addr : ComplexPattern; +def aform_addr : ComplexPattern; +def dform2_addr : ComplexPattern; diff --git a/lib/Target/CellSPU/SPURegisterInfo.cpp b/lib/Target/CellSPU/SPURegisterInfo.cpp new file mode 100644 index 000000000000..e031048e7ccb --- /dev/null +++ b/lib/Target/CellSPU/SPURegisterInfo.cpp @@ -0,0 +1,614 @@ +//===- SPURegisterInfo.cpp - Cell SPU Register Information ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Cell implementation of the TargetRegisterInfo class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "reginfo" +#include "SPU.h" +#include "SPURegisterInfo.h" +#include "SPURegisterNames.h" +#include "SPUInstrBuilder.h" +#include "SPUSubtarget.h" +#include "SPUMachineFunction.h" +#include "SPUFrameInfo.h" +#include "llvm/Constants.h" +#include "llvm/Type.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineLocation.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/STLExtras.h" +#include + +using namespace llvm; + +/// getRegisterNumbering - Given the enum value for some register, e.g. +/// PPC::F14, return the number that it corresponds to (e.g. 14). +unsigned SPURegisterInfo::getRegisterNumbering(unsigned RegEnum) { + using namespace SPU; + switch (RegEnum) { + case SPU::R0: return 0; + case SPU::R1: return 1; + case SPU::R2: return 2; + case SPU::R3: return 3; + case SPU::R4: return 4; + case SPU::R5: return 5; + case SPU::R6: return 6; + case SPU::R7: return 7; + case SPU::R8: return 8; + case SPU::R9: return 9; + case SPU::R10: return 10; + case SPU::R11: return 11; + case SPU::R12: return 12; + case SPU::R13: return 13; + case SPU::R14: return 14; + case SPU::R15: return 15; + case SPU::R16: return 16; + case SPU::R17: return 17; + case SPU::R18: return 18; + case SPU::R19: return 19; + case SPU::R20: return 20; + case SPU::R21: return 21; + case SPU::R22: return 22; + case SPU::R23: return 23; + case SPU::R24: return 24; + case SPU::R25: return 25; + case SPU::R26: return 26; + case SPU::R27: return 27; + case SPU::R28: return 28; + case SPU::R29: return 29; + case SPU::R30: return 30; + case SPU::R31: return 31; + case SPU::R32: return 32; + case SPU::R33: return 33; + case SPU::R34: return 34; + case SPU::R35: return 35; + case SPU::R36: return 36; + case SPU::R37: return 37; + case SPU::R38: return 38; + case SPU::R39: return 39; + case SPU::R40: return 40; + case SPU::R41: return 41; + case SPU::R42: return 42; + case SPU::R43: return 43; + case SPU::R44: return 44; + case SPU::R45: return 45; + case SPU::R46: return 46; + case SPU::R47: return 47; + case SPU::R48: return 48; + case SPU::R49: return 49; + case SPU::R50: return 50; + case SPU::R51: return 51; + case SPU::R52: return 52; + case SPU::R53: return 53; + case SPU::R54: return 54; + case SPU::R55: return 55; + case SPU::R56: return 56; + case SPU::R57: return 57; + case SPU::R58: return 58; + case SPU::R59: return 59; + case SPU::R60: return 60; + case SPU::R61: return 61; + case SPU::R62: return 62; + case SPU::R63: return 63; + case SPU::R64: return 64; + case SPU::R65: return 65; + case SPU::R66: return 66; + case SPU::R67: return 67; + case SPU::R68: return 68; + case SPU::R69: return 69; + case SPU::R70: return 70; + case SPU::R71: return 71; + case SPU::R72: return 72; + case SPU::R73: return 73; + case SPU::R74: return 74; + case SPU::R75: return 75; + case SPU::R76: return 76; + case SPU::R77: return 77; + case SPU::R78: return 78; + case SPU::R79: return 79; + case SPU::R80: return 80; + case SPU::R81: return 81; + case SPU::R82: return 82; + case SPU::R83: return 83; + case SPU::R84: return 84; + case SPU::R85: return 85; + case SPU::R86: return 86; + case SPU::R87: return 87; + case SPU::R88: return 88; + case SPU::R89: return 89; + case SPU::R90: return 90; + case SPU::R91: return 91; + case SPU::R92: return 92; + case SPU::R93: return 93; + case SPU::R94: return 94; + case SPU::R95: return 95; + case SPU::R96: return 96; + case SPU::R97: return 97; + case SPU::R98: return 98; + case SPU::R99: return 99; + case SPU::R100: return 100; + case SPU::R101: return 101; + case SPU::R102: return 102; + case SPU::R103: return 103; + case SPU::R104: return 104; + case SPU::R105: return 105; + case SPU::R106: return 106; + case SPU::R107: return 107; + case SPU::R108: return 108; + case SPU::R109: return 109; + case SPU::R110: return 110; + case SPU::R111: return 111; + case SPU::R112: return 112; + case SPU::R113: return 113; + case SPU::R114: return 114; + case SPU::R115: return 115; + case SPU::R116: return 116; + case SPU::R117: return 117; + case SPU::R118: return 118; + case SPU::R119: return 119; + case SPU::R120: return 120; + case SPU::R121: return 121; + case SPU::R122: return 122; + case SPU::R123: return 123; + case SPU::R124: return 124; + case SPU::R125: return 125; + case SPU::R126: return 126; + case SPU::R127: return 127; + default: + cerr << "Unhandled reg in SPURegisterInfo::getRegisterNumbering!\n"; + abort(); + } +} + +SPURegisterInfo::SPURegisterInfo(const SPUSubtarget &subtarget, + const TargetInstrInfo &tii) : + SPUGenRegisterInfo(SPU::ADJCALLSTACKDOWN, SPU::ADJCALLSTACKUP), + Subtarget(subtarget), + TII(tii) +{ +} + +// SPU's 128-bit registers used for argument passing: +static const unsigned SPU_ArgRegs[] = { + SPU::R3, SPU::R4, SPU::R5, SPU::R6, SPU::R7, SPU::R8, SPU::R9, + SPU::R10, SPU::R11, SPU::R12, SPU::R13, SPU::R14, SPU::R15, SPU::R16, + SPU::R17, SPU::R18, SPU::R19, SPU::R20, SPU::R21, SPU::R22, SPU::R23, + SPU::R24, SPU::R25, SPU::R26, SPU::R27, SPU::R28, SPU::R29, SPU::R30, + SPU::R31, SPU::R32, SPU::R33, SPU::R34, SPU::R35, SPU::R36, SPU::R37, + SPU::R38, SPU::R39, SPU::R40, SPU::R41, SPU::R42, SPU::R43, SPU::R44, + SPU::R45, SPU::R46, SPU::R47, SPU::R48, SPU::R49, SPU::R50, SPU::R51, + SPU::R52, SPU::R53, SPU::R54, SPU::R55, SPU::R56, SPU::R57, SPU::R58, + SPU::R59, SPU::R60, SPU::R61, SPU::R62, SPU::R63, SPU::R64, SPU::R65, + SPU::R66, SPU::R67, SPU::R68, SPU::R69, SPU::R70, SPU::R71, SPU::R72, + SPU::R73, SPU::R74, SPU::R75, SPU::R76, SPU::R77, SPU::R78, SPU::R79 +}; + +const unsigned * +SPURegisterInfo::getArgRegs() +{ + return SPU_ArgRegs; +} + +unsigned +SPURegisterInfo::getNumArgRegs() +{ + return sizeof(SPU_ArgRegs) / sizeof(SPU_ArgRegs[0]); +} + +/// getPointerRegClass - Return the register class to use to hold pointers. +/// This is used for addressing modes. +const TargetRegisterClass * SPURegisterInfo::getPointerRegClass() const +{ + return &SPU::R32CRegClass; +} + +const unsigned * +SPURegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const +{ + // Cell ABI calling convention + static const unsigned SPU_CalleeSaveRegs[] = { + SPU::R80, SPU::R81, SPU::R82, SPU::R83, + SPU::R84, SPU::R85, SPU::R86, SPU::R87, + SPU::R88, SPU::R89, SPU::R90, SPU::R91, + SPU::R92, SPU::R93, SPU::R94, SPU::R95, + SPU::R96, SPU::R97, SPU::R98, SPU::R99, + SPU::R100, SPU::R101, SPU::R102, SPU::R103, + SPU::R104, SPU::R105, SPU::R106, SPU::R107, + SPU::R108, SPU::R109, SPU::R110, SPU::R111, + SPU::R112, SPU::R113, SPU::R114, SPU::R115, + SPU::R116, SPU::R117, SPU::R118, SPU::R119, + SPU::R120, SPU::R121, SPU::R122, SPU::R123, + SPU::R124, SPU::R125, SPU::R126, SPU::R127, + SPU::R2, /* environment pointer */ + SPU::R1, /* stack pointer */ + SPU::R0, /* link register */ + 0 /* end */ + }; + + return SPU_CalleeSaveRegs; +} + +const TargetRegisterClass* const* +SPURegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const +{ + // Cell ABI Calling Convention + static const TargetRegisterClass * const SPU_CalleeSaveRegClasses[] = { + &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass, + &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass, + &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass, + &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass, + &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass, + &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass, + &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass, + &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass, + &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass, + &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass, + &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass, + &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass, + &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass, + &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass, + &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass, + &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass, + &SPU::GPRCRegClass, /* environment pointer */ + &SPU::GPRCRegClass, /* stack pointer */ + &SPU::GPRCRegClass, /* link register */ + 0 /* end */ + }; + + return SPU_CalleeSaveRegClasses; +} + +/*! + R0 (link register), R1 (stack pointer) and R2 (environment pointer -- this is + generally unused) are the Cell's reserved registers + */ +BitVector SPURegisterInfo::getReservedRegs(const MachineFunction &MF) const { + BitVector Reserved(getNumRegs()); + Reserved.set(SPU::R0); // LR + Reserved.set(SPU::R1); // SP + Reserved.set(SPU::R2); // environment pointer + return Reserved; +} + +//===----------------------------------------------------------------------===// +// Stack Frame Processing methods +//===----------------------------------------------------------------------===// + +// needsFP - Return true if the specified function should have a dedicated frame +// pointer register. This is true if the function has variable sized allocas or +// if frame pointer elimination is disabled. +// +static bool needsFP(const MachineFunction &MF) { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + return NoFramePointerElim || MFI->hasVarSizedObjects(); +} + +//-------------------------------------------------------------------------- +// hasFP - Return true if the specified function actually has a dedicated frame +// pointer register. This is true if the function needs a frame pointer and has +// a non-zero stack size. +bool +SPURegisterInfo::hasFP(const MachineFunction &MF) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + return MFI->getStackSize() && needsFP(MF); +} + +//-------------------------------------------------------------------------- +void +SPURegisterInfo::eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) + const +{ + // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions. + MBB.erase(I); +} + +void +SPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, + RegScavenger *RS) const +{ + unsigned i = 0; + MachineInstr &MI = *II; + MachineBasicBlock &MBB = *MI.getParent(); + MachineFunction &MF = *MBB.getParent(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + + while (!MI.getOperand(i).isFI()) { + ++i; + assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!"); + } + + MachineOperand &SPOp = MI.getOperand(i); + int FrameIndex = SPOp.getIndex(); + + // Now add the frame object offset to the offset from r1. + int Offset = MFI->getObjectOffset(FrameIndex); + + // Most instructions, except for generated FrameIndex additions using AIr32 + // and ILAr32, have the immediate in operand 1. AIr32 and ILAr32 have the + // immediate in operand 2. + unsigned OpNo = 1; + if (MI.getOpcode() == SPU::AIr32 || MI.getOpcode() == SPU::ILAr32) + OpNo = 2; + + MachineOperand &MO = MI.getOperand(OpNo); + + // Offset is biased by $lr's slot at the bottom. + Offset += MO.getImm() + MFI->getStackSize() + SPUFrameInfo::minStackSize(); + assert((Offset & 0xf) == 0 + && "16-byte alignment violated in eliminateFrameIndex"); + + // Replace the FrameIndex with base register with $sp (aka $r1) + SPOp.ChangeToRegister(SPU::R1, false); + if (Offset > SPUFrameInfo::maxFrameOffset() + || Offset < SPUFrameInfo::minFrameOffset()) { + cerr << "Large stack adjustment (" + << Offset + << ") in SPURegisterInfo::eliminateFrameIndex."; + } else { + MO.ChangeToImmediate(Offset); + } +} + +/// determineFrameLayout - Determine the size of the frame and maximum call +/// frame size. +void +SPURegisterInfo::determineFrameLayout(MachineFunction &MF) const +{ + MachineFrameInfo *MFI = MF.getFrameInfo(); + + // Get the number of bytes to allocate from the FrameInfo + unsigned FrameSize = MFI->getStackSize(); + + // Get the alignments provided by the target, and the maximum alignment + // (if any) of the fixed frame objects. + unsigned TargetAlign = MF.getTarget().getFrameInfo()->getStackAlignment(); + unsigned Align = std::max(TargetAlign, MFI->getMaxAlignment()); + assert(isPowerOf2_32(Align) && "Alignment is not power of 2"); + unsigned AlignMask = Align - 1; + + // Get the maximum call frame size of all the calls. + unsigned maxCallFrameSize = MFI->getMaxCallFrameSize(); + + // If we have dynamic alloca then maxCallFrameSize needs to be aligned so + // that allocations will be aligned. + if (MFI->hasVarSizedObjects()) + maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask; + + // Update maximum call frame size. + MFI->setMaxCallFrameSize(maxCallFrameSize); + + // Include call frame size in total. + FrameSize += maxCallFrameSize; + + // Make sure the frame is aligned. + FrameSize = (FrameSize + AlignMask) & ~AlignMask; + + // Update frame info. + MFI->setStackSize(FrameSize); +} + +void SPURegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, + RegScavenger *RS) + const { + // Mark LR and SP unused, since the prolog spills them to stack and + // we don't want anyone else to spill them for us. + // + // Also, unless R2 is really used someday, don't spill it automatically. + MF.getRegInfo().setPhysRegUnused(SPU::R0); + MF.getRegInfo().setPhysRegUnused(SPU::R1); + MF.getRegInfo().setPhysRegUnused(SPU::R2); +} + +void SPURegisterInfo::emitPrologue(MachineFunction &MF) const +{ + MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB + MachineBasicBlock::iterator MBBI = MBB.begin(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + MachineModuleInfo *MMI = MFI->getMachineModuleInfo(); + DebugLoc dl = (MBBI != MBB.end() ? + MBBI->getDebugLoc() : DebugLoc::getUnknownLoc()); + + // Prepare for debug frame info. + bool hasDebugInfo = MMI && MMI->hasDebugInfo(); + unsigned FrameLabelId = 0; + + // Move MBBI back to the beginning of the function. + MBBI = MBB.begin(); + + // Work out frame sizes. + determineFrameLayout(MF); + int FrameSize = MFI->getStackSize(); + + assert((FrameSize & 0xf) == 0 + && "SPURegisterInfo::emitPrologue: FrameSize not aligned"); + + if (FrameSize > 0 || MFI->hasCalls()) { + FrameSize = -(FrameSize + SPUFrameInfo::minStackSize()); + if (hasDebugInfo) { + // Mark effective beginning of when frame pointer becomes valid. + FrameLabelId = MMI->NextLabelID(); + BuildMI(MBB, MBBI, dl, TII.get(SPU::DBG_LABEL)).addImm(FrameLabelId); + } + + // Adjust stack pointer, spilling $lr -> 16($sp) and $sp -> -FrameSize($sp) + // for the ABI + BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr32), SPU::R0).addImm(16) + .addReg(SPU::R1); + if (isS10Constant(FrameSize)) { + // Spill $sp to adjusted $sp + BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr32), SPU::R1).addImm(FrameSize) + .addReg(SPU::R1); + // Adjust $sp by required amout + BuildMI(MBB, MBBI, dl, TII.get(SPU::AIr32), SPU::R1).addReg(SPU::R1) + .addImm(FrameSize); + } else if (FrameSize <= (1 << 16) - 1 && FrameSize >= -(1 << 16)) { + // Frame size can be loaded into ILr32n, so temporarily spill $r2 and use + // $r2 to adjust $sp: + BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr128), SPU::R2) + .addImm(-16) + .addReg(SPU::R1); + BuildMI(MBB, MBBI, dl, TII.get(SPU::ILr32), SPU::R2) + .addImm(FrameSize); + BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr32), SPU::R1) + .addReg(SPU::R2) + .addReg(SPU::R1); + BuildMI(MBB, MBBI, dl, TII.get(SPU::Ar32), SPU::R1) + .addReg(SPU::R1) + .addReg(SPU::R2); + BuildMI(MBB, MBBI, dl, TII.get(SPU::SFIr32), SPU::R2) + .addReg(SPU::R2) + .addImm(16); + BuildMI(MBB, MBBI, dl, TII.get(SPU::LQXr128), SPU::R2) + .addReg(SPU::R2) + .addReg(SPU::R1); + } else { + cerr << "Unhandled frame size: " << FrameSize << "\n"; + abort(); + } + + if (hasDebugInfo) { + std::vector &Moves = MMI->getFrameMoves(); + + // Show update of SP. + MachineLocation SPDst(MachineLocation::VirtualFP); + MachineLocation SPSrc(MachineLocation::VirtualFP, -FrameSize); + Moves.push_back(MachineMove(FrameLabelId, SPDst, SPSrc)); + + // Add callee saved registers to move list. + const std::vector &CSI = MFI->getCalleeSavedInfo(); + for (unsigned I = 0, E = CSI.size(); I != E; ++I) { + int Offset = MFI->getObjectOffset(CSI[I].getFrameIdx()); + unsigned Reg = CSI[I].getReg(); + if (Reg == SPU::R0) continue; + MachineLocation CSDst(MachineLocation::VirtualFP, Offset); + MachineLocation CSSrc(Reg); + Moves.push_back(MachineMove(FrameLabelId, CSDst, CSSrc)); + } + + // Mark effective beginning of when frame pointer is ready. + unsigned ReadyLabelId = MMI->NextLabelID(); + BuildMI(MBB, MBBI, dl, TII.get(SPU::DBG_LABEL)).addImm(ReadyLabelId); + + MachineLocation FPDst(SPU::R1); + MachineLocation FPSrc(MachineLocation::VirtualFP); + Moves.push_back(MachineMove(ReadyLabelId, FPDst, FPSrc)); + } + } else { + // This is a leaf function -- insert a branch hint iff there are + // sufficient number instructions in the basic block. Note that + // this is just a best guess based on the basic block's size. + if (MBB.size() >= (unsigned) SPUFrameInfo::branchHintPenalty()) { + MachineBasicBlock::iterator MBBI = prior(MBB.end()); + dl = MBBI->getDebugLoc(); + + // Insert terminator label + unsigned BranchLabelId = MMI->NextLabelID(); + BuildMI(MBB, MBBI, dl, TII.get(SPU::DBG_LABEL)).addImm(BranchLabelId); + } + } +} + +void +SPURegisterInfo::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const +{ + MachineBasicBlock::iterator MBBI = prior(MBB.end()); + const MachineFrameInfo *MFI = MF.getFrameInfo(); + int FrameSize = MFI->getStackSize(); + int LinkSlotOffset = SPUFrameInfo::stackSlotSize(); + DebugLoc dl = MBBI->getDebugLoc(); + + assert(MBBI->getOpcode() == SPU::RET && + "Can only insert epilog into returning blocks"); + assert((FrameSize & 0xf) == 0 + && "SPURegisterInfo::emitEpilogue: FrameSize not aligned"); + if (FrameSize > 0 || MFI->hasCalls()) { + FrameSize = FrameSize + SPUFrameInfo::minStackSize(); + if (isS10Constant(FrameSize + LinkSlotOffset)) { + // Reload $lr, adjust $sp by required amount + // Note: We do this to slightly improve dual issue -- not by much, but it + // is an opportunity for dual issue. + BuildMI(MBB, MBBI, dl, TII.get(SPU::LQDr128), SPU::R0) + .addImm(FrameSize + LinkSlotOffset) + .addReg(SPU::R1); + BuildMI(MBB, MBBI, dl, TII.get(SPU::AIr32), SPU::R1) + .addReg(SPU::R1) + .addImm(FrameSize); + } else if (FrameSize <= (1 << 16) - 1 && FrameSize >= -(1 << 16)) { + // Frame size can be loaded into ILr32n, so temporarily spill $r2 and use + // $r2 to adjust $sp: + BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr128), SPU::R2) + .addImm(16) + .addReg(SPU::R1); + BuildMI(MBB, MBBI, dl, TII.get(SPU::ILr32), SPU::R2) + .addImm(FrameSize); + BuildMI(MBB, MBBI, dl, TII.get(SPU::Ar32), SPU::R1) + .addReg(SPU::R1) + .addReg(SPU::R2); + BuildMI(MBB, MBBI, dl, TII.get(SPU::LQDr128), SPU::R0) + .addImm(16) + .addReg(SPU::R2); + BuildMI(MBB, MBBI, dl, TII.get(SPU::SFIr32), SPU::R2). + addReg(SPU::R2) + .addImm(16); + BuildMI(MBB, MBBI, dl, TII.get(SPU::LQXr128), SPU::R2) + .addReg(SPU::R2) + .addReg(SPU::R1); + } else { + cerr << "Unhandled frame size: " << FrameSize << "\n"; + abort(); + } + } +} + +unsigned +SPURegisterInfo::getRARegister() const +{ + return SPU::R0; +} + +unsigned +SPURegisterInfo::getFrameRegister(MachineFunction &MF) const +{ + return SPU::R1; +} + +void +SPURegisterInfo::getInitialFrameState(std::vector &Moves) const +{ + // Initial state of the frame pointer is R1. + MachineLocation Dst(MachineLocation::VirtualFP); + MachineLocation Src(SPU::R1, 0); + Moves.push_back(MachineMove(0, Dst, Src)); +} + + +int +SPURegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const { + // FIXME: Most probably dwarf numbers differs for Linux and Darwin + return SPUGenRegisterInfo::getDwarfRegNumFull(RegNum, 0); +} + +#include "SPUGenRegisterInfo.inc" diff --git a/lib/Target/CellSPU/SPURegisterInfo.h b/lib/Target/CellSPU/SPURegisterInfo.h new file mode 100644 index 000000000000..5b6e9ec68cdb --- /dev/null +++ b/lib/Target/CellSPU/SPURegisterInfo.h @@ -0,0 +1,101 @@ +//===- SPURegisterInfo.h - Cell SPU Register Information Impl ----*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Cell SPU implementation of the TargetRegisterInfo +// class. +// +//===----------------------------------------------------------------------===// + +#ifndef SPU_REGISTERINFO_H +#define SPU_REGISTERINFO_H + +#include "SPU.h" +#include "SPUGenRegisterInfo.h.inc" + +namespace llvm { + class SPUSubtarget; + class TargetInstrInfo; + class Type; + + class SPURegisterInfo : public SPUGenRegisterInfo { + private: + const SPUSubtarget &Subtarget; + const TargetInstrInfo &TII; + + //! Predicate: Does the machine function use the link register? + bool usesLR(MachineFunction &MF) const; + + public: + SPURegisterInfo(const SPUSubtarget &subtarget, const TargetInstrInfo &tii); + + //! Translate a register's enum value to a register number + /*! + This method translates a register's enum value to it's regiser number, + e.g. SPU::R14 -> 14. + */ + static unsigned getRegisterNumbering(unsigned RegEnum); + + /// getPointerRegClass - Return the register class to use to hold pointers. + /// This is used for addressing modes. + virtual const TargetRegisterClass *getPointerRegClass() const; + + //! Return the array of callee-saved registers + virtual const unsigned* getCalleeSavedRegs(const MachineFunction *MF) const; + + //! Return the register class array of the callee-saved registers + virtual const TargetRegisterClass* const * + getCalleeSavedRegClasses(const MachineFunction *MF) const; + + //! Return the reserved registers + BitVector getReservedRegs(const MachineFunction &MF) const; + + //! Prediate: Target has dedicated frame pointer + bool hasFP(const MachineFunction &MF) const; + //! Eliminate the call frame setup pseudo-instructions + void eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const; + //! Convert frame indicies into machine operands + void eliminateFrameIndex(MachineBasicBlock::iterator II, int, + RegScavenger *RS) const; + //! Determine the frame's layour + void determineFrameLayout(MachineFunction &MF) const; + + void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, + RegScavenger *RS = NULL) const; + //! Emit the function prologue + void emitPrologue(MachineFunction &MF) const; + //! Emit the function epilogue + void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; + //! Get return address register (LR, aka R0) + unsigned getRARegister() const; + //! Get the stack frame register (SP, aka R1) + unsigned getFrameRegister(MachineFunction &MF) const; + //! Perform target-specific stack frame setup. + void getInitialFrameState(std::vector &Moves) const; + + //------------------------------------------------------------------------ + // New methods added: + //------------------------------------------------------------------------ + + //! Return the array of argument passing registers + /*! + \note The size of this array is returned by getArgRegsSize(). + */ + static const unsigned *getArgRegs(); + + //! Return the size of the argument passing register array + static unsigned getNumArgRegs(); + + //! Get DWARF debugging register number + int getDwarfRegNum(unsigned RegNum, bool isEH) const; + }; +} // end namespace llvm + +#endif diff --git a/lib/Target/CellSPU/SPURegisterInfo.td b/lib/Target/CellSPU/SPURegisterInfo.td new file mode 100644 index 000000000000..bb88f2bf9a29 --- /dev/null +++ b/lib/Target/CellSPU/SPURegisterInfo.td @@ -0,0 +1,429 @@ +//===- SPURegisterInfo.td - The Cell SPU Register File -----*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// +//===----------------------------------------------------------------------===// + +class SPUReg : Register { + let Namespace = "SPU"; +} + +// The SPU's register are all 128-bits wide, which makes specifying the +// registers relatively easy, if relatively mundane: + +class SPUVecReg num, string n> : SPUReg { + field bits<7> Num = num; +} + +def R0 : SPUVecReg<0, "$lr">, DwarfRegNum<[0]>; +def R1 : SPUVecReg<1, "$sp">, DwarfRegNum<[1]>; +def R2 : SPUVecReg<2, "$2">, DwarfRegNum<[2]>; +def R3 : SPUVecReg<3, "$3">, DwarfRegNum<[3]>; +def R4 : SPUVecReg<4, "$4">, DwarfRegNum<[4]>; +def R5 : SPUVecReg<5, "$5">, DwarfRegNum<[5]>; +def R6 : SPUVecReg<6, "$6">, DwarfRegNum<[6]>; +def R7 : SPUVecReg<7, "$7">, DwarfRegNum<[7]>; +def R8 : SPUVecReg<8, "$8">, DwarfRegNum<[8]>; +def R9 : SPUVecReg<9, "$9">, DwarfRegNum<[9]>; +def R10 : SPUVecReg<10, "$10">, DwarfRegNum<[10]>; +def R11 : SPUVecReg<11, "$11">, DwarfRegNum<[11]>; +def R12 : SPUVecReg<12, "$12">, DwarfRegNum<[12]>; +def R13 : SPUVecReg<13, "$13">, DwarfRegNum<[13]>; +def R14 : SPUVecReg<14, "$14">, DwarfRegNum<[14]>; +def R15 : SPUVecReg<15, "$15">, DwarfRegNum<[15]>; +def R16 : SPUVecReg<16, "$16">, DwarfRegNum<[16]>; +def R17 : SPUVecReg<17, "$17">, DwarfRegNum<[17]>; +def R18 : SPUVecReg<18, "$18">, DwarfRegNum<[18]>; +def R19 : SPUVecReg<19, "$19">, DwarfRegNum<[19]>; +def R20 : SPUVecReg<20, "$20">, DwarfRegNum<[20]>; +def R21 : SPUVecReg<21, "$21">, DwarfRegNum<[21]>; +def R22 : SPUVecReg<22, "$22">, DwarfRegNum<[22]>; +def R23 : SPUVecReg<23, "$23">, DwarfRegNum<[23]>; +def R24 : SPUVecReg<24, "$24">, DwarfRegNum<[24]>; +def R25 : SPUVecReg<25, "$25">, DwarfRegNum<[25]>; +def R26 : SPUVecReg<26, "$26">, DwarfRegNum<[26]>; +def R27 : SPUVecReg<27, "$27">, DwarfRegNum<[27]>; +def R28 : SPUVecReg<28, "$28">, DwarfRegNum<[28]>; +def R29 : SPUVecReg<29, "$29">, DwarfRegNum<[29]>; +def R30 : SPUVecReg<30, "$30">, DwarfRegNum<[30]>; +def R31 : SPUVecReg<31, "$31">, DwarfRegNum<[31]>; +def R32 : SPUVecReg<32, "$32">, DwarfRegNum<[32]>; +def R33 : SPUVecReg<33, "$33">, DwarfRegNum<[33]>; +def R34 : SPUVecReg<34, "$34">, DwarfRegNum<[34]>; +def R35 : SPUVecReg<35, "$35">, DwarfRegNum<[35]>; +def R36 : SPUVecReg<36, "$36">, DwarfRegNum<[36]>; +def R37 : SPUVecReg<37, "$37">, DwarfRegNum<[37]>; +def R38 : SPUVecReg<38, "$38">, DwarfRegNum<[38]>; +def R39 : SPUVecReg<39, "$39">, DwarfRegNum<[39]>; +def R40 : SPUVecReg<40, "$40">, DwarfRegNum<[40]>; +def R41 : SPUVecReg<41, "$41">, DwarfRegNum<[41]>; +def R42 : SPUVecReg<42, "$42">, DwarfRegNum<[42]>; +def R43 : SPUVecReg<43, "$43">, DwarfRegNum<[43]>; +def R44 : SPUVecReg<44, "$44">, DwarfRegNum<[44]>; +def R45 : SPUVecReg<45, "$45">, DwarfRegNum<[45]>; +def R46 : SPUVecReg<46, "$46">, DwarfRegNum<[46]>; +def R47 : SPUVecReg<47, "$47">, DwarfRegNum<[47]>; +def R48 : SPUVecReg<48, "$48">, DwarfRegNum<[48]>; +def R49 : SPUVecReg<49, "$49">, DwarfRegNum<[49]>; +def R50 : SPUVecReg<50, "$50">, DwarfRegNum<[50]>; +def R51 : SPUVecReg<51, "$51">, DwarfRegNum<[51]>; +def R52 : SPUVecReg<52, "$52">, DwarfRegNum<[52]>; +def R53 : SPUVecReg<53, "$53">, DwarfRegNum<[53]>; +def R54 : SPUVecReg<54, "$54">, DwarfRegNum<[54]>; +def R55 : SPUVecReg<55, "$55">, DwarfRegNum<[55]>; +def R56 : SPUVecReg<56, "$56">, DwarfRegNum<[56]>; +def R57 : SPUVecReg<57, "$57">, DwarfRegNum<[57]>; +def R58 : SPUVecReg<58, "$58">, DwarfRegNum<[58]>; +def R59 : SPUVecReg<59, "$59">, DwarfRegNum<[59]>; +def R60 : SPUVecReg<60, "$60">, DwarfRegNum<[60]>; +def R61 : SPUVecReg<61, "$61">, DwarfRegNum<[61]>; +def R62 : SPUVecReg<62, "$62">, DwarfRegNum<[62]>; +def R63 : SPUVecReg<63, "$63">, DwarfRegNum<[63]>; +def R64 : SPUVecReg<64, "$64">, DwarfRegNum<[64]>; +def R65 : SPUVecReg<65, "$65">, DwarfRegNum<[65]>; +def R66 : SPUVecReg<66, "$66">, DwarfRegNum<[66]>; +def R67 : SPUVecReg<67, "$67">, DwarfRegNum<[67]>; +def R68 : SPUVecReg<68, "$68">, DwarfRegNum<[68]>; +def R69 : SPUVecReg<69, "$69">, DwarfRegNum<[69]>; +def R70 : SPUVecReg<70, "$70">, DwarfRegNum<[70]>; +def R71 : SPUVecReg<71, "$71">, DwarfRegNum<[71]>; +def R72 : SPUVecReg<72, "$72">, DwarfRegNum<[72]>; +def R73 : SPUVecReg<73, "$73">, DwarfRegNum<[73]>; +def R74 : SPUVecReg<74, "$74">, DwarfRegNum<[74]>; +def R75 : SPUVecReg<75, "$75">, DwarfRegNum<[75]>; +def R76 : SPUVecReg<76, "$76">, DwarfRegNum<[76]>; +def R77 : SPUVecReg<77, "$77">, DwarfRegNum<[77]>; +def R78 : SPUVecReg<78, "$78">, DwarfRegNum<[78]>; +def R79 : SPUVecReg<79, "$79">, DwarfRegNum<[79]>; +def R80 : SPUVecReg<80, "$80">, DwarfRegNum<[80]>; +def R81 : SPUVecReg<81, "$81">, DwarfRegNum<[81]>; +def R82 : SPUVecReg<82, "$82">, DwarfRegNum<[82]>; +def R83 : SPUVecReg<83, "$83">, DwarfRegNum<[83]>; +def R84 : SPUVecReg<84, "$84">, DwarfRegNum<[84]>; +def R85 : SPUVecReg<85, "$85">, DwarfRegNum<[85]>; +def R86 : SPUVecReg<86, "$86">, DwarfRegNum<[86]>; +def R87 : SPUVecReg<87, "$87">, DwarfRegNum<[87]>; +def R88 : SPUVecReg<88, "$88">, DwarfRegNum<[88]>; +def R89 : SPUVecReg<89, "$89">, DwarfRegNum<[89]>; +def R90 : SPUVecReg<90, "$90">, DwarfRegNum<[90]>; +def R91 : SPUVecReg<91, "$91">, DwarfRegNum<[91]>; +def R92 : SPUVecReg<92, "$92">, DwarfRegNum<[92]>; +def R93 : SPUVecReg<93, "$93">, DwarfRegNum<[93]>; +def R94 : SPUVecReg<94, "$94">, DwarfRegNum<[94]>; +def R95 : SPUVecReg<95, "$95">, DwarfRegNum<[95]>; +def R96 : SPUVecReg<96, "$96">, DwarfRegNum<[96]>; +def R97 : SPUVecReg<97, "$97">, DwarfRegNum<[97]>; +def R98 : SPUVecReg<98, "$98">, DwarfRegNum<[98]>; +def R99 : SPUVecReg<99, "$99">, DwarfRegNum<[99]>; +def R100 : SPUVecReg<100, "$100">, DwarfRegNum<[100]>; +def R101 : SPUVecReg<101, "$101">, DwarfRegNum<[101]>; +def R102 : SPUVecReg<102, "$102">, DwarfRegNum<[102]>; +def R103 : SPUVecReg<103, "$103">, DwarfRegNum<[103]>; +def R104 : SPUVecReg<104, "$104">, DwarfRegNum<[104]>; +def R105 : SPUVecReg<105, "$105">, DwarfRegNum<[105]>; +def R106 : SPUVecReg<106, "$106">, DwarfRegNum<[106]>; +def R107 : SPUVecReg<107, "$107">, DwarfRegNum<[107]>; +def R108 : SPUVecReg<108, "$108">, DwarfRegNum<[108]>; +def R109 : SPUVecReg<109, "$109">, DwarfRegNum<[109]>; +def R110 : SPUVecReg<110, "$110">, DwarfRegNum<[110]>; +def R111 : SPUVecReg<111, "$111">, DwarfRegNum<[111]>; +def R112 : SPUVecReg<112, "$112">, DwarfRegNum<[112]>; +def R113 : SPUVecReg<113, "$113">, DwarfRegNum<[113]>; +def R114 : SPUVecReg<114, "$114">, DwarfRegNum<[114]>; +def R115 : SPUVecReg<115, "$115">, DwarfRegNum<[115]>; +def R116 : SPUVecReg<116, "$116">, DwarfRegNum<[116]>; +def R117 : SPUVecReg<117, "$117">, DwarfRegNum<[117]>; +def R118 : SPUVecReg<118, "$118">, DwarfRegNum<[118]>; +def R119 : SPUVecReg<119, "$119">, DwarfRegNum<[119]>; +def R120 : SPUVecReg<120, "$120">, DwarfRegNum<[120]>; +def R121 : SPUVecReg<121, "$121">, DwarfRegNum<[121]>; +def R122 : SPUVecReg<122, "$122">, DwarfRegNum<[122]>; +def R123 : SPUVecReg<123, "$123">, DwarfRegNum<[123]>; +def R124 : SPUVecReg<124, "$124">, DwarfRegNum<[124]>; +def R125 : SPUVecReg<125, "$125">, DwarfRegNum<[125]>; +def R126 : SPUVecReg<126, "$126">, DwarfRegNum<[126]>; +def R127 : SPUVecReg<127, "$127">, DwarfRegNum<[127]>; + +/* Need floating point status register here: */ +/* def FPCSR : ... */ + +// The SPU's registers as 128-bit wide entities, and can function as general +// purpose registers, where the operands are in the "preferred slot": +def GPRC : RegisterClass<"SPU", [i128], 128, + [ + /* volatile register */ + R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, + R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31, + R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46, + R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61, + R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76, + R77, R78, R79, + /* non-volatile register: take hint from PPC and allocate in reverse order */ + R127, R126, R125, R124, R123, R122, R121, R120, R119, R118, R117, R116, R115, + R114, R113, R112, R111, R110, R109, R108, R107, R106, R105, R104, R103, R102, + R101, R100, R99, R98, R97, R96, R95, R94, R93, R92, R91, R90, R89, R88, R87, + R86, R85, R84, R83, R82, R81, R80, + /* environment ptr, SP, LR */ + R2, R1, R0 ]> +{ + let MethodProtos = [{ + iterator allocation_order_begin(const MachineFunction &MF) const; + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + GPRCClass::iterator + GPRCClass::allocation_order_begin(const MachineFunction &MF) const { + return begin(); + } + GPRCClass::iterator + GPRCClass::allocation_order_end(const MachineFunction &MF) const { + return end()-3; // don't allocate R2, R1, or R0 (envp, sp, lr) + } + }]; +} + +// The SPU's registers as 64-bit wide (double word integer) "preferred slot": +def R64C : RegisterClass<"SPU", [i64], 128, + [ + /* volatile register */ + R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, + R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31, + R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46, + R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61, + R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76, + R77, R78, R79, + /* non-volatile register: take hint from PPC and allocate in reverse order */ + R127, R126, R125, R124, R123, R122, R121, R120, R119, R118, R117, R116, R115, + R114, R113, R112, R111, R110, R109, R108, R107, R106, R105, R104, R103, R102, + R101, R100, R99, R98, R97, R96, R95, R94, R93, R92, R91, R90, R89, R88, R87, + R86, R85, R84, R83, R82, R81, R80, + /* environment ptr, SP, LR */ + R2, R1, R0 ]> +{ + let MethodProtos = [{ + iterator allocation_order_begin(const MachineFunction &MF) const; + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + R64CClass::iterator + R64CClass::allocation_order_begin(const MachineFunction &MF) const { + return begin(); + } + R64CClass::iterator + R64CClass::allocation_order_end(const MachineFunction &MF) const { + return end()-3; // don't allocate R2, R1, or R0 (envp, sp, lr) + } + }]; +} + +// The SPU's registers as 64-bit wide (double word) FP "preferred slot": +def R64FP : RegisterClass<"SPU", [f64], 128, + [ + /* volatile register */ + R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, + R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31, + R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46, + R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61, + R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76, + R77, R78, R79, + /* non-volatile register: take hint from PPC and allocate in reverse order */ + R127, R126, R125, R124, R123, R122, R121, R120, R119, R118, R117, R116, R115, + R114, R113, R112, R111, R110, R109, R108, R107, R106, R105, R104, R103, R102, + R101, R100, R99, R98, R97, R96, R95, R94, R93, R92, R91, R90, R89, R88, R87, + R86, R85, R84, R83, R82, R81, R80, + /* environment ptr, SP, LR */ + R2, R1, R0 ]> +{ + let MethodProtos = [{ + iterator allocation_order_begin(const MachineFunction &MF) const; + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + R64FPClass::iterator + R64FPClass::allocation_order_begin(const MachineFunction &MF) const { + return begin(); + } + R64FPClass::iterator + R64FPClass::allocation_order_end(const MachineFunction &MF) const { + return end()-3; // don't allocate R2, R1, or R0 (envp, sp, lr) + } + }]; +} + +// The SPU's registers as 32-bit wide (word) "preferred slot": +def R32C : RegisterClass<"SPU", [i32], 128, + [ + /* volatile register */ + R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, + R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31, + R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46, + R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61, + R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76, + R77, R78, R79, + /* non-volatile register: take hint from PPC and allocate in reverse order */ + R127, R126, R125, R124, R123, R122, R121, R120, R119, R118, R117, R116, R115, + R114, R113, R112, R111, R110, R109, R108, R107, R106, R105, R104, R103, R102, + R101, R100, R99, R98, R97, R96, R95, R94, R93, R92, R91, R90, R89, R88, R87, + R86, R85, R84, R83, R82, R81, R80, + /* environment ptr, SP, LR */ + R2, R1, R0 ]> +{ + let MethodProtos = [{ + iterator allocation_order_begin(const MachineFunction &MF) const; + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + R32CClass::iterator + R32CClass::allocation_order_begin(const MachineFunction &MF) const { + return begin(); + } + R32CClass::iterator + R32CClass::allocation_order_end(const MachineFunction &MF) const { + return end()-3; // don't allocate R2, R1, or R0 (envp, sp, lr) + } + }]; +} + +// The SPU's registers as single precision floating point "preferred slot": +def R32FP : RegisterClass<"SPU", [f32], 128, + [ + /* volatile register */ + R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, + R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31, + R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46, + R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61, + R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76, + R77, R78, R79, + /* non-volatile register: take hint from PPC and allocate in reverse order */ + R127, R126, R125, R124, R123, R122, R121, R120, R119, R118, R117, R116, R115, + R114, R113, R112, R111, R110, R109, R108, R107, R106, R105, R104, R103, R102, + R101, R100, R99, R98, R97, R96, R95, R94, R93, R92, R91, R90, R89, R88, R87, + R86, R85, R84, R83, R82, R81, R80, + /* environment ptr, SP, LR */ + R2, R1, R0 ]> +{ + let MethodProtos = [{ + iterator allocation_order_begin(const MachineFunction &MF) const; + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + R32FPClass::iterator + R32FPClass::allocation_order_begin(const MachineFunction &MF) const { + return begin(); + } + R32FPClass::iterator + R32FPClass::allocation_order_end(const MachineFunction &MF) const { + return end()-3; // don't allocate R2, R1, or R0 (envp, sp, lr) + } + }]; +} + +// The SPU's registers as 16-bit wide (halfword) "preferred slot": +def R16C : RegisterClass<"SPU", [i16], 128, + [ + /* volatile register */ + R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, + R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31, + R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46, + R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61, + R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76, + R77, R78, R79, + /* non-volatile register: take hint from PPC and allocate in reverse order */ + R127, R126, R125, R124, R123, R122, R121, R120, R119, R118, R117, R116, R115, + R114, R113, R112, R111, R110, R109, R108, R107, R106, R105, R104, R103, R102, + R101, R100, R99, R98, R97, R96, R95, R94, R93, R92, R91, R90, R89, R88, R87, + R86, R85, R84, R83, R82, R81, R80, + /* environment ptr, SP, LR */ + R2, R1, R0 ]> +{ + let MethodProtos = [{ + iterator allocation_order_begin(const MachineFunction &MF) const; + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + R16CClass::iterator + R16CClass::allocation_order_begin(const MachineFunction &MF) const { + return begin(); + } + R16CClass::iterator + R16CClass::allocation_order_end(const MachineFunction &MF) const { + return end()-3; // don't allocate R2, R1, or R0 (envp, sp, lr) + } + }]; +} + +// The SPU's registers as 8-bit wide (byte) "preferred slot": +def R8C : RegisterClass<"SPU", [i8], 128, + [ + /* volatile register */ + R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, + R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31, + R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46, + R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61, + R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76, + R77, R78, R79, + /* non-volatile register: take hint from PPC and allocate in reverse order */ + R127, R126, R125, R124, R123, R122, R121, R120, R119, R118, R117, R116, R115, + R114, R113, R112, R111, R110, R109, R108, R107, R106, R105, R104, R103, R102, + R101, R100, R99, R98, R97, R96, R95, R94, R93, R92, R91, R90, R89, R88, R87, + R86, R85, R84, R83, R82, R81, R80, + /* environment ptr, SP, LR */ + R2, R1, R0 ]> +{ + let MethodProtos = [{ + iterator allocation_order_begin(const MachineFunction &MF) const; + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + R8CClass::iterator + R8CClass::allocation_order_begin(const MachineFunction &MF) const { + return begin(); + } + R8CClass::iterator + R8CClass::allocation_order_end(const MachineFunction &MF) const { + return end()-3; // don't allocate R2, R1, or R0 (envp, sp, lr) + } + }]; +} + +// The SPU's registers as vector registers: +def VECREG : RegisterClass<"SPU", + [v16i8,v8i16,v2i32,v4i32,v4f32,v2i64,v2f64], + 128, + [ + /* volatile register */ + R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, + R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31, + R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46, + R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61, + R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76, + R77, R78, R79, + /* non-volatile register: take hint from PPC and allocate in reverse order */ + R127, R126, R125, R124, R123, R122, R121, R120, R119, R118, R117, R116, R115, + R114, R113, R112, R111, R110, R109, R108, R107, R106, R105, R104, R103, R102, + R101, R100, R99, R98, R97, R96, R95, R94, R93, R92, R91, R90, R89, R88, R87, + R86, R85, R84, R83, R82, R81, R80, + /* environment ptr, SP, LR */ + R2, R1, R0 ]> +{ + let MethodProtos = [{ + iterator allocation_order_begin(const MachineFunction &MF) const; + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + VECREGClass::iterator + VECREGClass::allocation_order_begin(const MachineFunction &MF) const { + return begin(); + } + VECREGClass::iterator + VECREGClass::allocation_order_end(const MachineFunction &MF) const { + return end()-3; // don't allocate R2, R1, or R0 (envp, sp, lr) + } + }]; +} diff --git a/lib/Target/CellSPU/SPURegisterNames.h b/lib/Target/CellSPU/SPURegisterNames.h new file mode 100644 index 000000000000..6c3afdf41fdc --- /dev/null +++ b/lib/Target/CellSPU/SPURegisterNames.h @@ -0,0 +1,18 @@ +//===- SPURegisterNames.h - Wrapper header for SPU register names -*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef SPU_REGISTER_NAMES_H +#define SPU_REGISTER_NAMES_H + +// Define symbolic names for Cell registers. This defines a mapping from +// register name to register number. +// +#include "SPUGenRegisterNames.inc" + +#endif diff --git a/lib/Target/CellSPU/SPUSchedule.td b/lib/Target/CellSPU/SPUSchedule.td new file mode 100644 index 000000000000..785dc4660110 --- /dev/null +++ b/lib/Target/CellSPU/SPUSchedule.td @@ -0,0 +1,57 @@ +//===- SPUSchedule.td - Cell Scheduling Definitions --------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Even pipeline: + +def EVEN_UNIT : FuncUnit; // Even execution unit: (PC & 0x7 == 000) +def ODD_UNIT : FuncUnit; // Odd execution unit: (PC & 0x7 == 100) + +//===----------------------------------------------------------------------===// +// Instruction Itinerary classes used for Cell SPU +//===----------------------------------------------------------------------===// + +def LoadStore : InstrItinClass; // ODD_UNIT +def BranchHints : InstrItinClass; // ODD_UNIT +def BranchResolv : InstrItinClass; // ODD_UNIT +def ChanOpSPR : InstrItinClass; // ODD_UNIT +def ShuffleOp : InstrItinClass; // ODD_UNIT +def SelectOp : InstrItinClass; // ODD_UNIT +def GatherOp : InstrItinClass; // ODD_UNIT +def LoadNOP : InstrItinClass; // ODD_UNIT +def ExecNOP : InstrItinClass; // EVEN_UNIT +def SPrecFP : InstrItinClass; // EVEN_UNIT +def DPrecFP : InstrItinClass; // EVEN_UNIT +def FPInt : InstrItinClass; // EVEN_UNIT (FP<->integer) +def ByteOp : InstrItinClass; // EVEN_UNIT +def IntegerOp : InstrItinClass; // EVEN_UNIT +def IntegerMulDiv: InstrItinClass; // EVEN_UNIT +def RotateShift : InstrItinClass; // EVEN_UNIT +def ImmLoad : InstrItinClass; // EVEN_UNIT + +/* Note: The itinerary for the Cell SPU is somewhat contrived... */ +def SPUItineraries : ProcessorItineraries<[ + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]> + ]>; diff --git a/lib/Target/CellSPU/SPUSubtarget.cpp b/lib/Target/CellSPU/SPUSubtarget.cpp new file mode 100644 index 000000000000..0a1c2f75cfe5 --- /dev/null +++ b/lib/Target/CellSPU/SPUSubtarget.cpp @@ -0,0 +1,40 @@ +//===- SPUSubtarget.cpp - STI Cell SPU Subtarget Information --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the CellSPU-specific subclass of TargetSubtarget. +// +//===----------------------------------------------------------------------===// + +#include "SPUSubtarget.h" +#include "SPU.h" +#include "llvm/Module.h" +#include "llvm/Target/TargetMachine.h" +#include "SPUGenSubtarget.inc" + +using namespace llvm; + +SPUSubtarget::SPUSubtarget(const TargetMachine &tm, const Module &M, + const std::string &FS) : + TM(tm), + StackAlignment(16), + ProcDirective(SPU::DEFAULT_PROC), + UseLargeMem(false) +{ + // Should be the target SPU processor type. For now, since there's only + // one, simply default to the current "v0" default: + std::string default_cpu("v0"); + + // Parse features string. + ParseSubtargetFeatures(FS, default_cpu); +} + +/// SetJITMode - This is called to inform the subtarget info that we are +/// producing code for the JIT. +void SPUSubtarget::SetJITMode() { +} diff --git a/lib/Target/CellSPU/SPUSubtarget.h b/lib/Target/CellSPU/SPUSubtarget.h new file mode 100644 index 000000000000..b6a34099b2f7 --- /dev/null +++ b/lib/Target/CellSPU/SPUSubtarget.h @@ -0,0 +1,95 @@ +//===-- SPUSubtarget.h - Define Subtarget for the Cell SPU ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the Cell SPU-specific subclass of TargetSubtarget. +// +//===----------------------------------------------------------------------===// + +#ifndef CELLSUBTARGET_H +#define CELLSUBTARGET_H + +#include "llvm/Target/TargetInstrItineraries.h" +#include "llvm/Target/TargetSubtarget.h" + +#include + +namespace llvm { + class Module; + class GlobalValue; + class TargetMachine; + + namespace SPU { + enum { + PROC_NONE, + DEFAULT_PROC + }; + } + + class SPUSubtarget : public TargetSubtarget { + protected: + const TargetMachine &TM; + + /// stackAlignment - The minimum alignment known to hold of the stack frame + /// on entry to the function and which must be maintained by every function. + unsigned StackAlignment; + + /// Selected instruction itineraries (one entry per itinerary class.) + InstrItineraryData InstrItins; + + /// Which SPU processor (this isn't really used, but it's there to keep + /// the C compiler happy) + unsigned ProcDirective; + + /// Use (assume) large memory -- effectively disables the LQA/STQA + /// instructions that assume 259K local store. + bool UseLargeMem; + + public: + /// This constructor initializes the data members to match that + /// of the specified module. + /// + SPUSubtarget(const TargetMachine &TM, const Module &M, + const std::string &FS); + + /// ParseSubtargetFeatures - Parses features string setting specified + /// subtarget options. Definition of function is auto generated by tblgen. + std::string ParseSubtargetFeatures(const std::string &FS, + const std::string &CPU); + + /// SetJITMode - This is called to inform the subtarget info that we are + /// producing code for the JIT. + void SetJITMode(); + + /// getStackAlignment - Returns the minimum alignment known to hold of the + /// stack frame on entry to the function and which must be maintained by + /// every function for this subtarget. + unsigned getStackAlignment() const { return StackAlignment; } + + /// getInstrItins - Return the instruction itineraies based on subtarget + /// selection. + const InstrItineraryData &getInstrItineraryData() const { + return InstrItins; + } + + /// Use large memory addressing predicate + bool usingLargeMem() const { + return UseLargeMem; + } + + /// getTargetDataString - Return the pointer size and type alignment + /// properties of this subtarget. + const char *getTargetDataString() const { + return "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128" + "-i16:16:128-i8:8:128-i1:8:128-a:0:128-v64:128:128-v128:128:128" + "-s:128:128"; + } + }; +} // End llvm namespace + +#endif diff --git a/lib/Target/CellSPU/SPUTargetAsmInfo.cpp b/lib/Target/CellSPU/SPUTargetAsmInfo.cpp new file mode 100644 index 000000000000..ff88ed810716 --- /dev/null +++ b/lib/Target/CellSPU/SPUTargetAsmInfo.cpp @@ -0,0 +1,74 @@ +//===-- SPUTargetAsmInfo.cpp - Cell SPU asm properties ----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declarations of the SPUTargetAsmInfo properties. +// +//===----------------------------------------------------------------------===// + +#include "SPUTargetAsmInfo.h" +#include "SPUTargetMachine.h" +#include "llvm/Function.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Dwarf.h" + +using namespace llvm; +using namespace llvm::dwarf; + +SPULinuxTargetAsmInfo::SPULinuxTargetAsmInfo(const SPUTargetMachine &TM) : + SPUTargetAsmInfo(TM) { + PCSymbol = "."; + CommentString = "#"; + GlobalPrefix = ""; + PrivateGlobalPrefix = ".L"; + // This corresponds to what the gcc SPU compiler emits, for consistency. + CStringSection = ".rodata.str"; + + // Has leb128, .loc and .file + HasLEB128 = true; + HasDotLocAndDotFile = true; + + // BSS section needs to be emitted as ".section" + BSSSection = "\t.section\t.bss"; + BSSSection_ = getUnnamedSection("\t.section\t.bss", + SectionFlags::Writeable | SectionFlags::BSS, + true); + + SupportsDebugInformation = true; + NeedsSet = true; + SupportsMacInfoSection = false; + DwarfAbbrevSection = "\t.section .debug_abbrev,\"\",@progbits"; + DwarfInfoSection = "\t.section .debug_info,\"\",@progbits"; + DwarfLineSection = "\t.section .debug_line,\"\",@progbits"; + DwarfFrameSection = "\t.section .debug_frame,\"\",@progbits"; + DwarfPubNamesSection = "\t.section .debug_pubnames,\"\",@progbits"; + DwarfPubTypesSection = "\t.section .debug_pubtypes,\"\",progbits"; + DwarfStrSection = "\t.section .debug_str,\"MS\",@progbits,1"; + DwarfLocSection = "\t.section .debug_loc,\"\",@progbits"; + DwarfARangesSection = "\t.section .debug_aranges,\"\",@progbits"; + DwarfRangesSection = "\t.section .debug_ranges,\"\",@progbits"; + DwarfMacInfoSection = "\t.section .debug_macinfo,\"\",progbits"; + + // Exception handling is not supported on CellSPU (think about it: you only + // have 256K for code+data. Would you support exception handling?) + SupportsExceptionHandling = false; +} + +/// PreferredEHDataFormat - This hook allows the target to select data +/// format used for encoding pointers in exception handling data. Reason is +/// 0 for data, 1 for code labels, 2 for function pointers. Global is true +/// if the symbol can be relocated. +unsigned +SPULinuxTargetAsmInfo::PreferredEHDataFormat(DwarfEncoding::Target Reason, + bool Global) const { + // We really need to write something here. + return TargetAsmInfo::PreferredEHDataFormat(Reason, Global); +} + +// Instantiate default implementation. +TEMPLATE_INSTANTIATION(class SPUTargetAsmInfo); diff --git a/lib/Target/CellSPU/SPUTargetAsmInfo.h b/lib/Target/CellSPU/SPUTargetAsmInfo.h new file mode 100644 index 000000000000..d10a56540592 --- /dev/null +++ b/lib/Target/CellSPU/SPUTargetAsmInfo.h @@ -0,0 +1,51 @@ +//===-- SPUTargetAsmInfo.h - Cell SPU asm properties -----------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the SPUTargetAsmInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef SPUTARGETASMINFO_H +#define SPUTARGETASMINFO_H + +#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/Target/ELFTargetAsmInfo.h" +#include "SPUTargetMachine.h" +#include "SPUSubtarget.h" + +namespace llvm { + + // Forward declaration. + class SPUTargetMachine; + + template + struct SPUTargetAsmInfo : public BaseTAI { + explicit SPUTargetAsmInfo(const SPUTargetMachine &TM): + BaseTAI(TM) { + /* (unused today) + * const SPUSubtarget *Subtarget = &TM.getSubtarget(); */ + + BaseTAI::ZeroDirective = "\t.space\t"; + BaseTAI::SetDirective = "\t.set"; + BaseTAI::Data64bitsDirective = "\t.quad\t"; + BaseTAI::AlignmentIsInBytes = false; + BaseTAI::LCOMMDirective = "\t.lcomm\t"; + BaseTAI::InlineAsmStart = "# InlineAsm Start"; + BaseTAI::InlineAsmEnd = "# InlineAsm End"; + } + }; + + struct SPULinuxTargetAsmInfo : public SPUTargetAsmInfo { + explicit SPULinuxTargetAsmInfo(const SPUTargetMachine &TM); + virtual unsigned PreferredEHDataFormat(DwarfEncoding::Target Reason, + bool Global) const; + }; +} // namespace llvm + +#endif /* SPUTARGETASMINFO_H */ diff --git a/lib/Target/CellSPU/SPUTargetMachine.cpp b/lib/Target/CellSPU/SPUTargetMachine.cpp new file mode 100644 index 000000000000..7fa902243c7b --- /dev/null +++ b/lib/Target/CellSPU/SPUTargetMachine.cpp @@ -0,0 +1,98 @@ +//===-- SPUTargetMachine.cpp - Define TargetMachine for Cell SPU ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Top-level implementation for the Cell SPU target. +// +//===----------------------------------------------------------------------===// + +#include "SPU.h" +#include "SPURegisterNames.h" +#include "SPUTargetAsmInfo.h" +#include "SPUTargetMachine.h" +#include "llvm/Module.h" +#include "llvm/PassManager.h" +#include "llvm/Target/TargetMachineRegistry.h" +#include "llvm/CodeGen/RegAllocRegistry.h" +#include "llvm/CodeGen/SchedulerRegistry.h" + +using namespace llvm; + +/// CellSPUTargetMachineModule - Note that this is used on hosts that +/// cannot link in a library unless there are references into the +/// library. In particular, it seems that it is not possible to get +/// things to work on Win32 without this. Though it is unused, do not +/// remove it. +extern "C" int CellSPUTargetMachineModule; +int CellSPUTargetMachineModule = 0; + +namespace { + // Register the targets + RegisterTarget + CELLSPU("cellspu", "STI CBEA Cell SPU [experimental]"); +} + +const std::pair * +SPUFrameInfo::getCalleeSaveSpillSlots(unsigned &NumEntries) const { + NumEntries = 1; + return &LR[0]; +} + +const TargetAsmInfo * +SPUTargetMachine::createTargetAsmInfo() const +{ + return new SPULinuxTargetAsmInfo(*this); +} + +unsigned +SPUTargetMachine::getModuleMatchQuality(const Module &M) +{ + // We strongly match "spu-*" or "cellspu-*". + std::string TT = M.getTargetTriple(); + if ((TT.size() == 3 && std::string(TT.begin(), TT.begin()+3) == "spu") + || (TT.size() == 7 && std::string(TT.begin(), TT.begin()+7) == "cellspu") + || (TT.size() >= 4 && std::string(TT.begin(), TT.begin()+4) == "spu-") + || (TT.size() >= 8 && std::string(TT.begin(), TT.begin()+8) == "cellspu-")) + return 20; + + return 0; // No match at all... +} + +SPUTargetMachine::SPUTargetMachine(const Module &M, const std::string &FS) + : Subtarget(*this, M, FS), + DataLayout(Subtarget.getTargetDataString()), + InstrInfo(*this), + FrameInfo(*this), + TLInfo(*this), + InstrItins(Subtarget.getInstrItineraryData()) +{ + // For the time being, use static relocations, since there's really no + // support for PIC yet. + setRelocationModel(Reloc::Static); +} + +//===----------------------------------------------------------------------===// +// Pass Pipeline Configuration +//===----------------------------------------------------------------------===// + +bool +SPUTargetMachine::addInstSelector(PassManagerBase &PM, + CodeGenOpt::Level OptLevel) +{ + // Install an instruction selector. + PM.add(createSPUISelDag(*this)); + return false; +} + +bool SPUTargetMachine::addAssemblyEmitter(PassManagerBase &PM, + CodeGenOpt::Level OptLevel, + bool Verbose, + raw_ostream &Out) { + PM.add(createSPUAsmPrinterPass(Out, *this, OptLevel, Verbose)); + return false; +} diff --git a/lib/Target/CellSPU/SPUTargetMachine.h b/lib/Target/CellSPU/SPUTargetMachine.h new file mode 100644 index 000000000000..cd3920333851 --- /dev/null +++ b/lib/Target/CellSPU/SPUTargetMachine.h @@ -0,0 +1,95 @@ +//===-- SPUTargetMachine.h - Define TargetMachine for Cell SPU ----*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the CellSPU-specific subclass of TargetMachine. +// +//===----------------------------------------------------------------------===// + +#ifndef SPU_TARGETMACHINE_H +#define SPU_TARGETMACHINE_H + +#include "SPUSubtarget.h" +#include "SPUInstrInfo.h" +#include "SPUISelLowering.h" +#include "SPUFrameInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetData.h" + +namespace llvm { +class PassManager; +class GlobalValue; +class TargetFrameInfo; + +/// SPUTargetMachine +/// +class SPUTargetMachine : public LLVMTargetMachine { + SPUSubtarget Subtarget; + const TargetData DataLayout; + SPUInstrInfo InstrInfo; + SPUFrameInfo FrameInfo; + SPUTargetLowering TLInfo; + InstrItineraryData InstrItins; + +protected: + virtual const TargetAsmInfo *createTargetAsmInfo() const; + +public: + SPUTargetMachine(const Module &M, const std::string &FS); + + /// Return the subtarget implementation object + virtual const SPUSubtarget *getSubtargetImpl() const { + return &Subtarget; + } + virtual const SPUInstrInfo *getInstrInfo() const { + return &InstrInfo; + } + virtual const SPUFrameInfo *getFrameInfo() const { + return &FrameInfo; + } + /*! + \note Cell SPU does not support JIT today. It could support JIT at some + point. + */ + virtual TargetJITInfo *getJITInfo() { + return NULL; + } + + //! Module match function + /*! + Module matching function called by TargetMachineRegistry(). + */ + static unsigned getModuleMatchQuality(const Module &M); + + virtual SPUTargetLowering *getTargetLowering() const { + return const_cast(&TLInfo); + } + + virtual const SPURegisterInfo *getRegisterInfo() const { + return &InstrInfo.getRegisterInfo(); + } + + virtual const TargetData *getTargetData() const { + return &DataLayout; + } + + virtual const InstrItineraryData getInstrItineraryData() const { + return InstrItins; + } + + // Pass Pipeline Configuration + virtual bool addInstSelector(PassManagerBase &PM, + CodeGenOpt::Level OptLevel); + virtual bool addAssemblyEmitter(PassManagerBase &PM, + CodeGenOpt::Level OptLevel, + bool Verbose, raw_ostream &Out); +}; + +} // end namespace llvm + +#endif diff --git a/lib/Target/CppBackend/CMakeLists.txt b/lib/Target/CppBackend/CMakeLists.txt new file mode 100644 index 000000000000..f8182b80c94c --- /dev/null +++ b/lib/Target/CppBackend/CMakeLists.txt @@ -0,0 +1,3 @@ +add_llvm_target(CppBackend + CPPBackend.cpp + ) diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp new file mode 100644 index 000000000000..4082989c4c48 --- /dev/null +++ b/lib/Target/CppBackend/CPPBackend.cpp @@ -0,0 +1,2007 @@ +//===-- CPPBackend.cpp - Library for converting LLVM code to C++ code -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the writing of the LLVM IR as a set of C++ calls to the +// LLVM IR interface. The input module is assumed to be verified. +// +//===----------------------------------------------------------------------===// + +#include "CPPTargetMachine.h" +#include "llvm/CallingConv.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/InlineAsm.h" +#include "llvm/Instruction.h" +#include "llvm/Instructions.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/PassManager.h" +#include "llvm/TypeSymbolTable.h" +#include "llvm/Target/TargetMachineRegistry.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Streams.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Config/config.h" +#include +#include + +using namespace llvm; + +static cl::opt +FuncName("cppfname", cl::desc("Specify the name of the generated function"), + cl::value_desc("function name")); + +enum WhatToGenerate { + GenProgram, + GenModule, + GenContents, + GenFunction, + GenFunctions, + GenInline, + GenVariable, + GenType +}; + +static cl::opt GenerationType("cppgen", cl::Optional, + cl::desc("Choose what kind of output to generate"), + cl::init(GenProgram), + cl::values( + clEnumValN(GenProgram, "program", "Generate a complete program"), + clEnumValN(GenModule, "module", "Generate a module definition"), + clEnumValN(GenContents, "contents", "Generate contents of a module"), + clEnumValN(GenFunction, "function", "Generate a function definition"), + clEnumValN(GenFunctions,"functions", "Generate all function definitions"), + clEnumValN(GenInline, "inline", "Generate an inline function"), + clEnumValN(GenVariable, "variable", "Generate a variable definition"), + clEnumValN(GenType, "type", "Generate a type definition"), + clEnumValEnd + ) +); + +static cl::opt NameToGenerate("cppfor", cl::Optional, + cl::desc("Specify the name of the thing to generate"), + cl::init("!bad!")); + +/// CppBackendTargetMachineModule - Note that this is used on hosts +/// that cannot link in a library unless there are references into the +/// library. In particular, it seems that it is not possible to get +/// things to work on Win32 without this. Though it is unused, do not +/// remove it. +extern "C" int CppBackendTargetMachineModule; +int CppBackendTargetMachineModule = 0; + +// Register the target. +static RegisterTarget X("cpp", "C++ backend"); + +namespace { + typedef std::vector TypeList; + typedef std::map TypeMap; + typedef std::map ValueMap; + typedef std::set NameSet; + typedef std::set TypeSet; + typedef std::set ValueSet; + typedef std::map ForwardRefMap; + + /// CppWriter - This class is the main chunk of code that converts an LLVM + /// module to a C++ translation unit. + class CppWriter : public ModulePass { + raw_ostream &Out; + const Module *TheModule; + uint64_t uniqueNum; + TypeMap TypeNames; + ValueMap ValueNames; + TypeMap UnresolvedTypes; + TypeList TypeStack; + NameSet UsedNames; + TypeSet DefinedTypes; + ValueSet DefinedValues; + ForwardRefMap ForwardRefs; + bool is_inline; + + public: + static char ID; + explicit CppWriter(raw_ostream &o) : + ModulePass(&ID), Out(o), uniqueNum(0), is_inline(false) {} + + virtual const char *getPassName() const { return "C++ backend"; } + + bool runOnModule(Module &M); + + void printProgram(const std::string& fname, const std::string& modName ); + void printModule(const std::string& fname, const std::string& modName ); + void printContents(const std::string& fname, const std::string& modName ); + void printFunction(const std::string& fname, const std::string& funcName ); + void printFunctions(); + void printInline(const std::string& fname, const std::string& funcName ); + void printVariable(const std::string& fname, const std::string& varName ); + void printType(const std::string& fname, const std::string& typeName ); + + void error(const std::string& msg); + + private: + void printLinkageType(GlobalValue::LinkageTypes LT); + void printVisibilityType(GlobalValue::VisibilityTypes VisTypes); + void printCallingConv(unsigned cc); + void printEscapedString(const std::string& str); + void printCFP(const ConstantFP* CFP); + + std::string getCppName(const Type* val); + inline void printCppName(const Type* val); + + std::string getCppName(const Value* val); + inline void printCppName(const Value* val); + + void printAttributes(const AttrListPtr &PAL, const std::string &name); + bool printTypeInternal(const Type* Ty); + inline void printType(const Type* Ty); + void printTypes(const Module* M); + + void printConstant(const Constant *CPV); + void printConstants(const Module* M); + + void printVariableUses(const GlobalVariable *GV); + void printVariableHead(const GlobalVariable *GV); + void printVariableBody(const GlobalVariable *GV); + + void printFunctionUses(const Function *F); + void printFunctionHead(const Function *F); + void printFunctionBody(const Function *F); + void printInstruction(const Instruction *I, const std::string& bbname); + std::string getOpName(Value*); + + void printModuleBody(); + }; + + static unsigned indent_level = 0; + inline raw_ostream& nl(raw_ostream& Out, int delta = 0) { + Out << "\n"; + if (delta >= 0 || indent_level >= unsigned(-delta)) + indent_level += delta; + for (unsigned i = 0; i < indent_level; ++i) + Out << " "; + return Out; + } + + inline void in() { indent_level++; } + inline void out() { if (indent_level >0) indent_level--; } + + inline void + sanitize(std::string& str) { + for (size_t i = 0; i < str.length(); ++i) + if (!isalnum(str[i]) && str[i] != '_') + str[i] = '_'; + } + + inline std::string + getTypePrefix(const Type* Ty ) { + switch (Ty->getTypeID()) { + case Type::VoidTyID: return "void_"; + case Type::IntegerTyID: + return std::string("int") + utostr(cast(Ty)->getBitWidth()) + + "_"; + case Type::FloatTyID: return "float_"; + case Type::DoubleTyID: return "double_"; + case Type::LabelTyID: return "label_"; + case Type::FunctionTyID: return "func_"; + case Type::StructTyID: return "struct_"; + case Type::ArrayTyID: return "array_"; + case Type::PointerTyID: return "ptr_"; + case Type::VectorTyID: return "packed_"; + case Type::OpaqueTyID: return "opaque_"; + default: return "other_"; + } + return "unknown_"; + } + + // Looks up the type in the symbol table and returns a pointer to its name or + // a null pointer if it wasn't found. Note that this isn't the same as the + // Mode::getTypeName function which will return an empty string, not a null + // pointer if the name is not found. + inline const std::string* + findTypeName(const TypeSymbolTable& ST, const Type* Ty) { + TypeSymbolTable::const_iterator TI = ST.begin(); + TypeSymbolTable::const_iterator TE = ST.end(); + for (;TI != TE; ++TI) + if (TI->second == Ty) + return &(TI->first); + return 0; + } + + void CppWriter::error(const std::string& msg) { + cerr << msg << "\n"; + exit(2); + } + + // printCFP - Print a floating point constant .. very carefully :) + // This makes sure that conversion to/from floating yields the same binary + // result so that we don't lose precision. + void CppWriter::printCFP(const ConstantFP *CFP) { + bool ignored; + APFloat APF = APFloat(CFP->getValueAPF()); // copy + if (CFP->getType() == Type::FloatTy) + APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &ignored); + Out << "ConstantFP::get("; + Out << "APFloat("; +#if HAVE_PRINTF_A + char Buffer[100]; + sprintf(Buffer, "%A", APF.convertToDouble()); + if ((!strncmp(Buffer, "0x", 2) || + !strncmp(Buffer, "-0x", 3) || + !strncmp(Buffer, "+0x", 3)) && + APF.bitwiseIsEqual(APFloat(atof(Buffer)))) { + if (CFP->getType() == Type::DoubleTy) + Out << "BitsToDouble(" << Buffer << ")"; + else + Out << "BitsToFloat((float)" << Buffer << ")"; + Out << ")"; + } else { +#endif + std::string StrVal = ftostr(CFP->getValueAPF()); + + while (StrVal[0] == ' ') + StrVal.erase(StrVal.begin()); + + // Check to make sure that the stringized number is not some string like + // "Inf" or NaN. Check that the string matches the "[-+]?[0-9]" regex. + if (((StrVal[0] >= '0' && StrVal[0] <= '9') || + ((StrVal[0] == '-' || StrVal[0] == '+') && + (StrVal[1] >= '0' && StrVal[1] <= '9'))) && + (CFP->isExactlyValue(atof(StrVal.c_str())))) { + if (CFP->getType() == Type::DoubleTy) + Out << StrVal; + else + Out << StrVal << "f"; + } else if (CFP->getType() == Type::DoubleTy) + Out << "BitsToDouble(0x" + << utohexstr(CFP->getValueAPF().bitcastToAPInt().getZExtValue()) + << "ULL) /* " << StrVal << " */"; + else + Out << "BitsToFloat(0x" + << utohexstr((uint32_t)CFP->getValueAPF(). + bitcastToAPInt().getZExtValue()) + << "U) /* " << StrVal << " */"; + Out << ")"; +#if HAVE_PRINTF_A + } +#endif + Out << ")"; + } + + void CppWriter::printCallingConv(unsigned cc){ + // Print the calling convention. + switch (cc) { + case CallingConv::C: Out << "CallingConv::C"; break; + case CallingConv::Fast: Out << "CallingConv::Fast"; break; + case CallingConv::Cold: Out << "CallingConv::Cold"; break; + case CallingConv::FirstTargetCC: Out << "CallingConv::FirstTargetCC"; break; + default: Out << cc; break; + } + } + + void CppWriter::printLinkageType(GlobalValue::LinkageTypes LT) { + switch (LT) { + case GlobalValue::InternalLinkage: + Out << "GlobalValue::InternalLinkage"; break; + case GlobalValue::PrivateLinkage: + Out << "GlobalValue::PrivateLinkage"; break; + case GlobalValue::AvailableExternallyLinkage: + Out << "GlobalValue::AvailableExternallyLinkage "; break; + case GlobalValue::LinkOnceAnyLinkage: + Out << "GlobalValue::LinkOnceAnyLinkage "; break; + case GlobalValue::LinkOnceODRLinkage: + Out << "GlobalValue::LinkOnceODRLinkage "; break; + case GlobalValue::WeakAnyLinkage: + Out << "GlobalValue::WeakAnyLinkage"; break; + case GlobalValue::WeakODRLinkage: + Out << "GlobalValue::WeakODRLinkage"; break; + case GlobalValue::AppendingLinkage: + Out << "GlobalValue::AppendingLinkage"; break; + case GlobalValue::ExternalLinkage: + Out << "GlobalValue::ExternalLinkage"; break; + case GlobalValue::DLLImportLinkage: + Out << "GlobalValue::DLLImportLinkage"; break; + case GlobalValue::DLLExportLinkage: + Out << "GlobalValue::DLLExportLinkage"; break; + case GlobalValue::ExternalWeakLinkage: + Out << "GlobalValue::ExternalWeakLinkage"; break; + case GlobalValue::GhostLinkage: + Out << "GlobalValue::GhostLinkage"; break; + case GlobalValue::CommonLinkage: + Out << "GlobalValue::CommonLinkage"; break; + } + } + + void CppWriter::printVisibilityType(GlobalValue::VisibilityTypes VisType) { + switch (VisType) { + default: assert(0 && "Unknown GVar visibility"); + case GlobalValue::DefaultVisibility: + Out << "GlobalValue::DefaultVisibility"; + break; + case GlobalValue::HiddenVisibility: + Out << "GlobalValue::HiddenVisibility"; + break; + case GlobalValue::ProtectedVisibility: + Out << "GlobalValue::ProtectedVisibility"; + break; + } + } + + // printEscapedString - Print each character of the specified string, escaping + // it if it is not printable or if it is an escape char. + void CppWriter::printEscapedString(const std::string &Str) { + for (unsigned i = 0, e = Str.size(); i != e; ++i) { + unsigned char C = Str[i]; + if (isprint(C) && C != '"' && C != '\\') { + Out << C; + } else { + Out << "\\x" + << (char) ((C/16 < 10) ? ( C/16 +'0') : ( C/16 -10+'A')) + << (char)(((C&15) < 10) ? ((C&15)+'0') : ((C&15)-10+'A')); + } + } + } + + std::string CppWriter::getCppName(const Type* Ty) { + // First, handle the primitive types .. easy + if (Ty->isPrimitiveType() || Ty->isInteger()) { + switch (Ty->getTypeID()) { + case Type::VoidTyID: return "Type::VoidTy"; + case Type::IntegerTyID: { + unsigned BitWidth = cast(Ty)->getBitWidth(); + return "IntegerType::get(" + utostr(BitWidth) + ")"; + } + case Type::X86_FP80TyID: return "Type::X86_FP80Ty"; + case Type::FloatTyID: return "Type::FloatTy"; + case Type::DoubleTyID: return "Type::DoubleTy"; + case Type::LabelTyID: return "Type::LabelTy"; + default: + error("Invalid primitive type"); + break; + } + return "Type::VoidTy"; // shouldn't be returned, but make it sensible + } + + // Now, see if we've seen the type before and return that + TypeMap::iterator I = TypeNames.find(Ty); + if (I != TypeNames.end()) + return I->second; + + // Okay, let's build a new name for this type. Start with a prefix + const char* prefix = 0; + switch (Ty->getTypeID()) { + case Type::FunctionTyID: prefix = "FuncTy_"; break; + case Type::StructTyID: prefix = "StructTy_"; break; + case Type::ArrayTyID: prefix = "ArrayTy_"; break; + case Type::PointerTyID: prefix = "PointerTy_"; break; + case Type::OpaqueTyID: prefix = "OpaqueTy_"; break; + case Type::VectorTyID: prefix = "VectorTy_"; break; + default: prefix = "OtherTy_"; break; // prevent breakage + } + + // See if the type has a name in the symboltable and build accordingly + const std::string* tName = findTypeName(TheModule->getTypeSymbolTable(), Ty); + std::string name; + if (tName) + name = std::string(prefix) + *tName; + else + name = std::string(prefix) + utostr(uniqueNum++); + sanitize(name); + + // Save the name + return TypeNames[Ty] = name; + } + + void CppWriter::printCppName(const Type* Ty) { + printEscapedString(getCppName(Ty)); + } + + std::string CppWriter::getCppName(const Value* val) { + std::string name; + ValueMap::iterator I = ValueNames.find(val); + if (I != ValueNames.end() && I->first == val) + return I->second; + + if (const GlobalVariable* GV = dyn_cast(val)) { + name = std::string("gvar_") + + getTypePrefix(GV->getType()->getElementType()); + } else if (isa(val)) { + name = std::string("func_"); + } else if (const Constant* C = dyn_cast(val)) { + name = std::string("const_") + getTypePrefix(C->getType()); + } else if (const Argument* Arg = dyn_cast(val)) { + if (is_inline) { + unsigned argNum = std::distance(Arg->getParent()->arg_begin(), + Function::const_arg_iterator(Arg)) + 1; + name = std::string("arg_") + utostr(argNum); + NameSet::iterator NI = UsedNames.find(name); + if (NI != UsedNames.end()) + name += std::string("_") + utostr(uniqueNum++); + UsedNames.insert(name); + return ValueNames[val] = name; + } else { + name = getTypePrefix(val->getType()); + } + } else { + name = getTypePrefix(val->getType()); + } + name += (val->hasName() ? val->getName() : utostr(uniqueNum++)); + sanitize(name); + NameSet::iterator NI = UsedNames.find(name); + if (NI != UsedNames.end()) + name += std::string("_") + utostr(uniqueNum++); + UsedNames.insert(name); + return ValueNames[val] = name; + } + + void CppWriter::printCppName(const Value* val) { + printEscapedString(getCppName(val)); + } + + void CppWriter::printAttributes(const AttrListPtr &PAL, + const std::string &name) { + Out << "AttrListPtr " << name << "_PAL;"; + nl(Out); + if (!PAL.isEmpty()) { + Out << '{'; in(); nl(Out); + Out << "SmallVector Attrs;"; nl(Out); + Out << "AttributeWithIndex PAWI;"; nl(Out); + for (unsigned i = 0; i < PAL.getNumSlots(); ++i) { + unsigned index = PAL.getSlot(i).Index; + Attributes attrs = PAL.getSlot(i).Attrs; + Out << "PAWI.Index = " << index << "U; PAWI.Attrs = 0 "; +#define HANDLE_ATTR(X) \ + if (attrs & Attribute::X) \ + Out << " | Attribute::" #X; \ + attrs &= ~Attribute::X; + + HANDLE_ATTR(SExt); + HANDLE_ATTR(ZExt); + HANDLE_ATTR(NoReturn); + HANDLE_ATTR(InReg); + HANDLE_ATTR(StructRet); + HANDLE_ATTR(NoUnwind); + HANDLE_ATTR(NoAlias); + HANDLE_ATTR(ByVal); + HANDLE_ATTR(Nest); + HANDLE_ATTR(ReadNone); + HANDLE_ATTR(ReadOnly); + HANDLE_ATTR(NoInline); + HANDLE_ATTR(AlwaysInline); + HANDLE_ATTR(OptimizeForSize); + HANDLE_ATTR(StackProtect); + HANDLE_ATTR(StackProtectReq); + HANDLE_ATTR(NoCapture); +#undef HANDLE_ATTR + assert(attrs == 0 && "Unhandled attribute!"); + Out << ";"; + nl(Out); + Out << "Attrs.push_back(PAWI);"; + nl(Out); + } + Out << name << "_PAL = AttrListPtr::get(Attrs.begin(), Attrs.end());"; + nl(Out); + out(); nl(Out); + Out << '}'; nl(Out); + } + } + + bool CppWriter::printTypeInternal(const Type* Ty) { + // We don't print definitions for primitive types + if (Ty->isPrimitiveType() || Ty->isInteger()) + return false; + + // If we already defined this type, we don't need to define it again. + if (DefinedTypes.find(Ty) != DefinedTypes.end()) + return false; + + // Everything below needs the name for the type so get it now. + std::string typeName(getCppName(Ty)); + + // Search the type stack for recursion. If we find it, then generate this + // as an OpaqueType, but make sure not to do this multiple times because + // the type could appear in multiple places on the stack. Once the opaque + // definition is issued, it must not be re-issued. Consequently we have to + // check the UnresolvedTypes list as well. + TypeList::const_iterator TI = std::find(TypeStack.begin(), TypeStack.end(), + Ty); + if (TI != TypeStack.end()) { + TypeMap::const_iterator I = UnresolvedTypes.find(Ty); + if (I == UnresolvedTypes.end()) { + Out << "PATypeHolder " << typeName << "_fwd = OpaqueType::get();"; + nl(Out); + UnresolvedTypes[Ty] = typeName; + } + return true; + } + + // We're going to print a derived type which, by definition, contains other + // types. So, push this one we're printing onto the type stack to assist with + // recursive definitions. + TypeStack.push_back(Ty); + + // Print the type definition + switch (Ty->getTypeID()) { + case Type::FunctionTyID: { + const FunctionType* FT = cast(Ty); + Out << "std::vector" << typeName << "_args;"; + nl(Out); + FunctionType::param_iterator PI = FT->param_begin(); + FunctionType::param_iterator PE = FT->param_end(); + for (; PI != PE; ++PI) { + const Type* argTy = static_cast(*PI); + bool isForward = printTypeInternal(argTy); + std::string argName(getCppName(argTy)); + Out << typeName << "_args.push_back(" << argName; + if (isForward) + Out << "_fwd"; + Out << ");"; + nl(Out); + } + bool isForward = printTypeInternal(FT->getReturnType()); + std::string retTypeName(getCppName(FT->getReturnType())); + Out << "FunctionType* " << typeName << " = FunctionType::get("; + in(); nl(Out) << "/*Result=*/" << retTypeName; + if (isForward) + Out << "_fwd"; + Out << ","; + nl(Out) << "/*Params=*/" << typeName << "_args,"; + nl(Out) << "/*isVarArg=*/" << (FT->isVarArg() ? "true" : "false") << ");"; + out(); + nl(Out); + break; + } + case Type::StructTyID: { + const StructType* ST = cast(Ty); + Out << "std::vector" << typeName << "_fields;"; + nl(Out); + StructType::element_iterator EI = ST->element_begin(); + StructType::element_iterator EE = ST->element_end(); + for (; EI != EE; ++EI) { + const Type* fieldTy = static_cast(*EI); + bool isForward = printTypeInternal(fieldTy); + std::string fieldName(getCppName(fieldTy)); + Out << typeName << "_fields.push_back(" << fieldName; + if (isForward) + Out << "_fwd"; + Out << ");"; + nl(Out); + } + Out << "StructType* " << typeName << " = StructType::get(" + << typeName << "_fields, /*isPacked=*/" + << (ST->isPacked() ? "true" : "false") << ");"; + nl(Out); + break; + } + case Type::ArrayTyID: { + const ArrayType* AT = cast(Ty); + const Type* ET = AT->getElementType(); + bool isForward = printTypeInternal(ET); + std::string elemName(getCppName(ET)); + Out << "ArrayType* " << typeName << " = ArrayType::get(" + << elemName << (isForward ? "_fwd" : "") + << ", " << utostr(AT->getNumElements()) << ");"; + nl(Out); + break; + } + case Type::PointerTyID: { + const PointerType* PT = cast(Ty); + const Type* ET = PT->getElementType(); + bool isForward = printTypeInternal(ET); + std::string elemName(getCppName(ET)); + Out << "PointerType* " << typeName << " = PointerType::get(" + << elemName << (isForward ? "_fwd" : "") + << ", " << utostr(PT->getAddressSpace()) << ");"; + nl(Out); + break; + } + case Type::VectorTyID: { + const VectorType* PT = cast(Ty); + const Type* ET = PT->getElementType(); + bool isForward = printTypeInternal(ET); + std::string elemName(getCppName(ET)); + Out << "VectorType* " << typeName << " = VectorType::get(" + << elemName << (isForward ? "_fwd" : "") + << ", " << utostr(PT->getNumElements()) << ");"; + nl(Out); + break; + } + case Type::OpaqueTyID: { + Out << "OpaqueType* " << typeName << " = OpaqueType::get();"; + nl(Out); + break; + } + default: + error("Invalid TypeID"); + } + + // If the type had a name, make sure we recreate it. + const std::string* progTypeName = + findTypeName(TheModule->getTypeSymbolTable(),Ty); + if (progTypeName) { + Out << "mod->addTypeName(\"" << *progTypeName << "\", " + << typeName << ");"; + nl(Out); + } + + // Pop us off the type stack + TypeStack.pop_back(); + + // Indicate that this type is now defined. + DefinedTypes.insert(Ty); + + // Early resolve as many unresolved types as possible. Search the unresolved + // types map for the type we just printed. Now that its definition is complete + // we can resolve any previous references to it. This prevents a cascade of + // unresolved types. + TypeMap::iterator I = UnresolvedTypes.find(Ty); + if (I != UnresolvedTypes.end()) { + Out << "cast(" << I->second + << "_fwd.get())->refineAbstractTypeTo(" << I->second << ");"; + nl(Out); + Out << I->second << " = cast<"; + switch (Ty->getTypeID()) { + case Type::FunctionTyID: Out << "FunctionType"; break; + case Type::ArrayTyID: Out << "ArrayType"; break; + case Type::StructTyID: Out << "StructType"; break; + case Type::VectorTyID: Out << "VectorType"; break; + case Type::PointerTyID: Out << "PointerType"; break; + case Type::OpaqueTyID: Out << "OpaqueType"; break; + default: Out << "NoSuchDerivedType"; break; + } + Out << ">(" << I->second << "_fwd.get());"; + nl(Out); nl(Out); + UnresolvedTypes.erase(I); + } + + // Finally, separate the type definition from other with a newline. + nl(Out); + + // We weren't a recursive type + return false; + } + + // Prints a type definition. Returns true if it could not resolve all the + // types in the definition but had to use a forward reference. + void CppWriter::printType(const Type* Ty) { + assert(TypeStack.empty()); + TypeStack.clear(); + printTypeInternal(Ty); + assert(TypeStack.empty()); + } + + void CppWriter::printTypes(const Module* M) { + // Walk the symbol table and print out all its types + const TypeSymbolTable& symtab = M->getTypeSymbolTable(); + for (TypeSymbolTable::const_iterator TI = symtab.begin(), TE = symtab.end(); + TI != TE; ++TI) { + + // For primitive types and types already defined, just add a name + TypeMap::const_iterator TNI = TypeNames.find(TI->second); + if (TI->second->isInteger() || TI->second->isPrimitiveType() || + TNI != TypeNames.end()) { + Out << "mod->addTypeName(\""; + printEscapedString(TI->first); + Out << "\", " << getCppName(TI->second) << ");"; + nl(Out); + // For everything else, define the type + } else { + printType(TI->second); + } + } + + // Add all of the global variables to the value table... + for (Module::const_global_iterator I = TheModule->global_begin(), + E = TheModule->global_end(); I != E; ++I) { + if (I->hasInitializer()) + printType(I->getInitializer()->getType()); + printType(I->getType()); + } + + // Add all the functions to the table + for (Module::const_iterator FI = TheModule->begin(), FE = TheModule->end(); + FI != FE; ++FI) { + printType(FI->getReturnType()); + printType(FI->getFunctionType()); + // Add all the function arguments + for (Function::const_arg_iterator AI = FI->arg_begin(), + AE = FI->arg_end(); AI != AE; ++AI) { + printType(AI->getType()); + } + + // Add all of the basic blocks and instructions + for (Function::const_iterator BB = FI->begin(), + E = FI->end(); BB != E; ++BB) { + printType(BB->getType()); + for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E; + ++I) { + printType(I->getType()); + for (unsigned i = 0; i < I->getNumOperands(); ++i) + printType(I->getOperand(i)->getType()); + } + } + } + } + + + // printConstant - Print out a constant pool entry... + void CppWriter::printConstant(const Constant *CV) { + // First, if the constant is actually a GlobalValue (variable or function) + // or its already in the constant list then we've printed it already and we + // can just return. + if (isa(CV) || ValueNames.find(CV) != ValueNames.end()) + return; + + std::string constName(getCppName(CV)); + std::string typeName(getCppName(CV->getType())); + + if (isa(CV)) { + // Skip variables and functions, we emit them elsewhere + return; + } + + if (const ConstantInt *CI = dyn_cast(CV)) { + std::string constValue = CI->getValue().toString(10, true); + Out << "ConstantInt* " << constName << " = ConstantInt::get(APInt(" + << cast(CI->getType())->getBitWidth() << ", \"" + << constValue << "\", " << constValue.length() << ", 10));"; + } else if (isa(CV)) { + Out << "ConstantAggregateZero* " << constName + << " = ConstantAggregateZero::get(" << typeName << ");"; + } else if (isa(CV)) { + Out << "ConstantPointerNull* " << constName + << " = ConstantPointerNull::get(" << typeName << ");"; + } else if (const ConstantFP *CFP = dyn_cast(CV)) { + Out << "ConstantFP* " << constName << " = "; + printCFP(CFP); + Out << ";"; + } else if (const ConstantArray *CA = dyn_cast(CV)) { + if (CA->isString() && CA->getType()->getElementType() == Type::Int8Ty) { + Out << "Constant* " << constName << " = ConstantArray::get(\""; + std::string tmp = CA->getAsString(); + bool nullTerminate = false; + if (tmp[tmp.length()-1] == 0) { + tmp.erase(tmp.length()-1); + nullTerminate = true; + } + printEscapedString(tmp); + // Determine if we want null termination or not. + if (nullTerminate) + Out << "\", true"; // Indicate that the null terminator should be + // added. + else + Out << "\", false";// No null terminator + Out << ");"; + } else { + Out << "std::vector " << constName << "_elems;"; + nl(Out); + unsigned N = CA->getNumOperands(); + for (unsigned i = 0; i < N; ++i) { + printConstant(CA->getOperand(i)); // recurse to print operands + Out << constName << "_elems.push_back(" + << getCppName(CA->getOperand(i)) << ");"; + nl(Out); + } + Out << "Constant* " << constName << " = ConstantArray::get(" + << typeName << ", " << constName << "_elems);"; + } + } else if (const ConstantStruct *CS = dyn_cast(CV)) { + Out << "std::vector " << constName << "_fields;"; + nl(Out); + unsigned N = CS->getNumOperands(); + for (unsigned i = 0; i < N; i++) { + printConstant(CS->getOperand(i)); + Out << constName << "_fields.push_back(" + << getCppName(CS->getOperand(i)) << ");"; + nl(Out); + } + Out << "Constant* " << constName << " = ConstantStruct::get(" + << typeName << ", " << constName << "_fields);"; + } else if (const ConstantVector *CP = dyn_cast(CV)) { + Out << "std::vector " << constName << "_elems;"; + nl(Out); + unsigned N = CP->getNumOperands(); + for (unsigned i = 0; i < N; ++i) { + printConstant(CP->getOperand(i)); + Out << constName << "_elems.push_back(" + << getCppName(CP->getOperand(i)) << ");"; + nl(Out); + } + Out << "Constant* " << constName << " = ConstantVector::get(" + << typeName << ", " << constName << "_elems);"; + } else if (isa(CV)) { + Out << "UndefValue* " << constName << " = UndefValue::get(" + << typeName << ");"; + } else if (const ConstantExpr *CE = dyn_cast(CV)) { + if (CE->getOpcode() == Instruction::GetElementPtr) { + Out << "std::vector " << constName << "_indices;"; + nl(Out); + printConstant(CE->getOperand(0)); + for (unsigned i = 1; i < CE->getNumOperands(); ++i ) { + printConstant(CE->getOperand(i)); + Out << constName << "_indices.push_back(" + << getCppName(CE->getOperand(i)) << ");"; + nl(Out); + } + Out << "Constant* " << constName + << " = ConstantExpr::getGetElementPtr(" + << getCppName(CE->getOperand(0)) << ", " + << "&" << constName << "_indices[0], " + << constName << "_indices.size()" + << " );"; + } else if (CE->isCast()) { + printConstant(CE->getOperand(0)); + Out << "Constant* " << constName << " = ConstantExpr::getCast("; + switch (CE->getOpcode()) { + default: assert(0 && "Invalid cast opcode"); + case Instruction::Trunc: Out << "Instruction::Trunc"; break; + case Instruction::ZExt: Out << "Instruction::ZExt"; break; + case Instruction::SExt: Out << "Instruction::SExt"; break; + case Instruction::FPTrunc: Out << "Instruction::FPTrunc"; break; + case Instruction::FPExt: Out << "Instruction::FPExt"; break; + case Instruction::FPToUI: Out << "Instruction::FPToUI"; break; + case Instruction::FPToSI: Out << "Instruction::FPToSI"; break; + case Instruction::UIToFP: Out << "Instruction::UIToFP"; break; + case Instruction::SIToFP: Out << "Instruction::SIToFP"; break; + case Instruction::PtrToInt: Out << "Instruction::PtrToInt"; break; + case Instruction::IntToPtr: Out << "Instruction::IntToPtr"; break; + case Instruction::BitCast: Out << "Instruction::BitCast"; break; + } + Out << ", " << getCppName(CE->getOperand(0)) << ", " + << getCppName(CE->getType()) << ");"; + } else { + unsigned N = CE->getNumOperands(); + for (unsigned i = 0; i < N; ++i ) { + printConstant(CE->getOperand(i)); + } + Out << "Constant* " << constName << " = ConstantExpr::"; + switch (CE->getOpcode()) { + case Instruction::Add: Out << "getAdd("; break; + case Instruction::Sub: Out << "getSub("; break; + case Instruction::Mul: Out << "getMul("; break; + case Instruction::UDiv: Out << "getUDiv("; break; + case Instruction::SDiv: Out << "getSDiv("; break; + case Instruction::FDiv: Out << "getFDiv("; break; + case Instruction::URem: Out << "getURem("; break; + case Instruction::SRem: Out << "getSRem("; break; + case Instruction::FRem: Out << "getFRem("; break; + case Instruction::And: Out << "getAnd("; break; + case Instruction::Or: Out << "getOr("; break; + case Instruction::Xor: Out << "getXor("; break; + case Instruction::ICmp: + Out << "getICmp(ICmpInst::ICMP_"; + switch (CE->getPredicate()) { + case ICmpInst::ICMP_EQ: Out << "EQ"; break; + case ICmpInst::ICMP_NE: Out << "NE"; break; + case ICmpInst::ICMP_SLT: Out << "SLT"; break; + case ICmpInst::ICMP_ULT: Out << "ULT"; break; + case ICmpInst::ICMP_SGT: Out << "SGT"; break; + case ICmpInst::ICMP_UGT: Out << "UGT"; break; + case ICmpInst::ICMP_SLE: Out << "SLE"; break; + case ICmpInst::ICMP_ULE: Out << "ULE"; break; + case ICmpInst::ICMP_SGE: Out << "SGE"; break; + case ICmpInst::ICMP_UGE: Out << "UGE"; break; + default: error("Invalid ICmp Predicate"); + } + break; + case Instruction::FCmp: + Out << "getFCmp(FCmpInst::FCMP_"; + switch (CE->getPredicate()) { + case FCmpInst::FCMP_FALSE: Out << "FALSE"; break; + case FCmpInst::FCMP_ORD: Out << "ORD"; break; + case FCmpInst::FCMP_UNO: Out << "UNO"; break; + case FCmpInst::FCMP_OEQ: Out << "OEQ"; break; + case FCmpInst::FCMP_UEQ: Out << "UEQ"; break; + case FCmpInst::FCMP_ONE: Out << "ONE"; break; + case FCmpInst::FCMP_UNE: Out << "UNE"; break; + case FCmpInst::FCMP_OLT: Out << "OLT"; break; + case FCmpInst::FCMP_ULT: Out << "ULT"; break; + case FCmpInst::FCMP_OGT: Out << "OGT"; break; + case FCmpInst::FCMP_UGT: Out << "UGT"; break; + case FCmpInst::FCMP_OLE: Out << "OLE"; break; + case FCmpInst::FCMP_ULE: Out << "ULE"; break; + case FCmpInst::FCMP_OGE: Out << "OGE"; break; + case FCmpInst::FCMP_UGE: Out << "UGE"; break; + case FCmpInst::FCMP_TRUE: Out << "TRUE"; break; + default: error("Invalid FCmp Predicate"); + } + break; + case Instruction::Shl: Out << "getShl("; break; + case Instruction::LShr: Out << "getLShr("; break; + case Instruction::AShr: Out << "getAShr("; break; + case Instruction::Select: Out << "getSelect("; break; + case Instruction::ExtractElement: Out << "getExtractElement("; break; + case Instruction::InsertElement: Out << "getInsertElement("; break; + case Instruction::ShuffleVector: Out << "getShuffleVector("; break; + default: + error("Invalid constant expression"); + break; + } + Out << getCppName(CE->getOperand(0)); + for (unsigned i = 1; i < CE->getNumOperands(); ++i) + Out << ", " << getCppName(CE->getOperand(i)); + Out << ");"; + } + } else { + error("Bad Constant"); + Out << "Constant* " << constName << " = 0; "; + } + nl(Out); + } + + void CppWriter::printConstants(const Module* M) { + // Traverse all the global variables looking for constant initializers + for (Module::const_global_iterator I = TheModule->global_begin(), + E = TheModule->global_end(); I != E; ++I) + if (I->hasInitializer()) + printConstant(I->getInitializer()); + + // Traverse the LLVM functions looking for constants + for (Module::const_iterator FI = TheModule->begin(), FE = TheModule->end(); + FI != FE; ++FI) { + // Add all of the basic blocks and instructions + for (Function::const_iterator BB = FI->begin(), + E = FI->end(); BB != E; ++BB) { + for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E; + ++I) { + for (unsigned i = 0; i < I->getNumOperands(); ++i) { + if (Constant* C = dyn_cast(I->getOperand(i))) { + printConstant(C); + } + } + } + } + } + } + + void CppWriter::printVariableUses(const GlobalVariable *GV) { + nl(Out) << "// Type Definitions"; + nl(Out); + printType(GV->getType()); + if (GV->hasInitializer()) { + Constant* Init = GV->getInitializer(); + printType(Init->getType()); + if (Function* F = dyn_cast(Init)) { + nl(Out)<< "/ Function Declarations"; nl(Out); + printFunctionHead(F); + } else if (GlobalVariable* gv = dyn_cast(Init)) { + nl(Out) << "// Global Variable Declarations"; nl(Out); + printVariableHead(gv); + } else { + nl(Out) << "// Constant Definitions"; nl(Out); + printConstant(gv); + } + if (GlobalVariable* gv = dyn_cast(Init)) { + nl(Out) << "// Global Variable Definitions"; nl(Out); + printVariableBody(gv); + } + } + } + + void CppWriter::printVariableHead(const GlobalVariable *GV) { + nl(Out) << "GlobalVariable* " << getCppName(GV); + if (is_inline) { + Out << " = mod->getGlobalVariable("; + printEscapedString(GV->getName()); + Out << ", " << getCppName(GV->getType()->getElementType()) << ",true)"; + nl(Out) << "if (!" << getCppName(GV) << ") {"; + in(); nl(Out) << getCppName(GV); + } + Out << " = new GlobalVariable("; + nl(Out) << "/*Type=*/"; + printCppName(GV->getType()->getElementType()); + Out << ","; + nl(Out) << "/*isConstant=*/" << (GV->isConstant()?"true":"false"); + Out << ","; + nl(Out) << "/*Linkage=*/"; + printLinkageType(GV->getLinkage()); + Out << ","; + nl(Out) << "/*Initializer=*/0, "; + if (GV->hasInitializer()) { + Out << "// has initializer, specified below"; + } + nl(Out) << "/*Name=*/\""; + printEscapedString(GV->getName()); + Out << "\","; + nl(Out) << "mod);"; + nl(Out); + + if (GV->hasSection()) { + printCppName(GV); + Out << "->setSection(\""; + printEscapedString(GV->getSection()); + Out << "\");"; + nl(Out); + } + if (GV->getAlignment()) { + printCppName(GV); + Out << "->setAlignment(" << utostr(GV->getAlignment()) << ");"; + nl(Out); + } + if (GV->getVisibility() != GlobalValue::DefaultVisibility) { + printCppName(GV); + Out << "->setVisibility("; + printVisibilityType(GV->getVisibility()); + Out << ");"; + nl(Out); + } + if (is_inline) { + out(); Out << "}"; nl(Out); + } + } + + void CppWriter::printVariableBody(const GlobalVariable *GV) { + if (GV->hasInitializer()) { + printCppName(GV); + Out << "->setInitializer("; + Out << getCppName(GV->getInitializer()) << ");"; + nl(Out); + } + } + + std::string CppWriter::getOpName(Value* V) { + if (!isa(V) || DefinedValues.find(V) != DefinedValues.end()) + return getCppName(V); + + // See if its alread in the map of forward references, if so just return the + // name we already set up for it + ForwardRefMap::const_iterator I = ForwardRefs.find(V); + if (I != ForwardRefs.end()) + return I->second; + + // This is a new forward reference. Generate a unique name for it + std::string result(std::string("fwdref_") + utostr(uniqueNum++)); + + // Yes, this is a hack. An Argument is the smallest instantiable value that + // we can make as a placeholder for the real value. We'll replace these + // Argument instances later. + Out << "Argument* " << result << " = new Argument(" + << getCppName(V->getType()) << ");"; + nl(Out); + ForwardRefs[V] = result; + return result; + } + + // printInstruction - This member is called for each Instruction in a function. + void CppWriter::printInstruction(const Instruction *I, + const std::string& bbname) { + std::string iName(getCppName(I)); + + // Before we emit this instruction, we need to take care of generating any + // forward references. So, we get the names of all the operands in advance + std::string* opNames = new std::string[I->getNumOperands()]; + for (unsigned i = 0; i < I->getNumOperands(); i++) { + opNames[i] = getOpName(I->getOperand(i)); + } + + switch (I->getOpcode()) { + default: + error("Invalid instruction"); + break; + + case Instruction::Ret: { + const ReturnInst* ret = cast(I); + Out << "ReturnInst::Create(" + << (ret->getReturnValue() ? opNames[0] + ", " : "") << bbname << ");"; + break; + } + case Instruction::Br: { + const BranchInst* br = cast(I); + Out << "BranchInst::Create(" ; + if (br->getNumOperands() == 3 ) { + Out << opNames[2] << ", " + << opNames[1] << ", " + << opNames[0] << ", "; + + } else if (br->getNumOperands() == 1) { + Out << opNames[0] << ", "; + } else { + error("Branch with 2 operands?"); + } + Out << bbname << ");"; + break; + } + case Instruction::Switch: { + const SwitchInst* sw = cast(I); + Out << "SwitchInst* " << iName << " = SwitchInst::Create(" + << opNames[0] << ", " + << opNames[1] << ", " + << sw->getNumCases() << ", " << bbname << ");"; + nl(Out); + for (unsigned i = 2; i < sw->getNumOperands(); i += 2 ) { + Out << iName << "->addCase(" + << opNames[i] << ", " + << opNames[i+1] << ");"; + nl(Out); + } + break; + } + case Instruction::Invoke: { + const InvokeInst* inv = cast(I); + Out << "std::vector " << iName << "_params;"; + nl(Out); + for (unsigned i = 3; i < inv->getNumOperands(); ++i) { + Out << iName << "_params.push_back(" + << opNames[i] << ");"; + nl(Out); + } + Out << "InvokeInst *" << iName << " = InvokeInst::Create(" + << opNames[0] << ", " + << opNames[1] << ", " + << opNames[2] << ", " + << iName << "_params.begin(), " << iName << "_params.end(), \""; + printEscapedString(inv->getName()); + Out << "\", " << bbname << ");"; + nl(Out) << iName << "->setCallingConv("; + printCallingConv(inv->getCallingConv()); + Out << ");"; + printAttributes(inv->getAttributes(), iName); + Out << iName << "->setAttributes(" << iName << "_PAL);"; + nl(Out); + break; + } + case Instruction::Unwind: { + Out << "new UnwindInst(" + << bbname << ");"; + break; + } + case Instruction::Unreachable:{ + Out << "new UnreachableInst(" + << bbname << ");"; + break; + } + case Instruction::Add: + case Instruction::Sub: + case Instruction::Mul: + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::FDiv: + case Instruction::URem: + case Instruction::SRem: + case Instruction::FRem: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr:{ + Out << "BinaryOperator* " << iName << " = BinaryOperator::Create("; + switch (I->getOpcode()) { + case Instruction::Add: Out << "Instruction::Add"; break; + case Instruction::Sub: Out << "Instruction::Sub"; break; + case Instruction::Mul: Out << "Instruction::Mul"; break; + case Instruction::UDiv:Out << "Instruction::UDiv"; break; + case Instruction::SDiv:Out << "Instruction::SDiv"; break; + case Instruction::FDiv:Out << "Instruction::FDiv"; break; + case Instruction::URem:Out << "Instruction::URem"; break; + case Instruction::SRem:Out << "Instruction::SRem"; break; + case Instruction::FRem:Out << "Instruction::FRem"; break; + case Instruction::And: Out << "Instruction::And"; break; + case Instruction::Or: Out << "Instruction::Or"; break; + case Instruction::Xor: Out << "Instruction::Xor"; break; + case Instruction::Shl: Out << "Instruction::Shl"; break; + case Instruction::LShr:Out << "Instruction::LShr"; break; + case Instruction::AShr:Out << "Instruction::AShr"; break; + default: Out << "Instruction::BadOpCode"; break; + } + Out << ", " << opNames[0] << ", " << opNames[1] << ", \""; + printEscapedString(I->getName()); + Out << "\", " << bbname << ");"; + break; + } + case Instruction::FCmp: { + Out << "FCmpInst* " << iName << " = new FCmpInst("; + switch (cast(I)->getPredicate()) { + case FCmpInst::FCMP_FALSE: Out << "FCmpInst::FCMP_FALSE"; break; + case FCmpInst::FCMP_OEQ : Out << "FCmpInst::FCMP_OEQ"; break; + case FCmpInst::FCMP_OGT : Out << "FCmpInst::FCMP_OGT"; break; + case FCmpInst::FCMP_OGE : Out << "FCmpInst::FCMP_OGE"; break; + case FCmpInst::FCMP_OLT : Out << "FCmpInst::FCMP_OLT"; break; + case FCmpInst::FCMP_OLE : Out << "FCmpInst::FCMP_OLE"; break; + case FCmpInst::FCMP_ONE : Out << "FCmpInst::FCMP_ONE"; break; + case FCmpInst::FCMP_ORD : Out << "FCmpInst::FCMP_ORD"; break; + case FCmpInst::FCMP_UNO : Out << "FCmpInst::FCMP_UNO"; break; + case FCmpInst::FCMP_UEQ : Out << "FCmpInst::FCMP_UEQ"; break; + case FCmpInst::FCMP_UGT : Out << "FCmpInst::FCMP_UGT"; break; + case FCmpInst::FCMP_UGE : Out << "FCmpInst::FCMP_UGE"; break; + case FCmpInst::FCMP_ULT : Out << "FCmpInst::FCMP_ULT"; break; + case FCmpInst::FCMP_ULE : Out << "FCmpInst::FCMP_ULE"; break; + case FCmpInst::FCMP_UNE : Out << "FCmpInst::FCMP_UNE"; break; + case FCmpInst::FCMP_TRUE : Out << "FCmpInst::FCMP_TRUE"; break; + default: Out << "FCmpInst::BAD_ICMP_PREDICATE"; break; + } + Out << ", " << opNames[0] << ", " << opNames[1] << ", \""; + printEscapedString(I->getName()); + Out << "\", " << bbname << ");"; + break; + } + case Instruction::ICmp: { + Out << "ICmpInst* " << iName << " = new ICmpInst("; + switch (cast(I)->getPredicate()) { + case ICmpInst::ICMP_EQ: Out << "ICmpInst::ICMP_EQ"; break; + case ICmpInst::ICMP_NE: Out << "ICmpInst::ICMP_NE"; break; + case ICmpInst::ICMP_ULE: Out << "ICmpInst::ICMP_ULE"; break; + case ICmpInst::ICMP_SLE: Out << "ICmpInst::ICMP_SLE"; break; + case ICmpInst::ICMP_UGE: Out << "ICmpInst::ICMP_UGE"; break; + case ICmpInst::ICMP_SGE: Out << "ICmpInst::ICMP_SGE"; break; + case ICmpInst::ICMP_ULT: Out << "ICmpInst::ICMP_ULT"; break; + case ICmpInst::ICMP_SLT: Out << "ICmpInst::ICMP_SLT"; break; + case ICmpInst::ICMP_UGT: Out << "ICmpInst::ICMP_UGT"; break; + case ICmpInst::ICMP_SGT: Out << "ICmpInst::ICMP_SGT"; break; + default: Out << "ICmpInst::BAD_ICMP_PREDICATE"; break; + } + Out << ", " << opNames[0] << ", " << opNames[1] << ", \""; + printEscapedString(I->getName()); + Out << "\", " << bbname << ");"; + break; + } + case Instruction::Malloc: { + const MallocInst* mallocI = cast(I); + Out << "MallocInst* " << iName << " = new MallocInst(" + << getCppName(mallocI->getAllocatedType()) << ", "; + if (mallocI->isArrayAllocation()) + Out << opNames[0] << ", " ; + Out << "\""; + printEscapedString(mallocI->getName()); + Out << "\", " << bbname << ");"; + if (mallocI->getAlignment()) + nl(Out) << iName << "->setAlignment(" + << mallocI->getAlignment() << ");"; + break; + } + case Instruction::Free: { + Out << "FreeInst* " << iName << " = new FreeInst(" + << getCppName(I->getOperand(0)) << ", " << bbname << ");"; + break; + } + case Instruction::Alloca: { + const AllocaInst* allocaI = cast(I); + Out << "AllocaInst* " << iName << " = new AllocaInst(" + << getCppName(allocaI->getAllocatedType()) << ", "; + if (allocaI->isArrayAllocation()) + Out << opNames[0] << ", "; + Out << "\""; + printEscapedString(allocaI->getName()); + Out << "\", " << bbname << ");"; + if (allocaI->getAlignment()) + nl(Out) << iName << "->setAlignment(" + << allocaI->getAlignment() << ");"; + break; + } + case Instruction::Load:{ + const LoadInst* load = cast(I); + Out << "LoadInst* " << iName << " = new LoadInst(" + << opNames[0] << ", \""; + printEscapedString(load->getName()); + Out << "\", " << (load->isVolatile() ? "true" : "false" ) + << ", " << bbname << ");"; + break; + } + case Instruction::Store: { + const StoreInst* store = cast(I); + Out << " new StoreInst(" + << opNames[0] << ", " + << opNames[1] << ", " + << (store->isVolatile() ? "true" : "false") + << ", " << bbname << ");"; + break; + } + case Instruction::GetElementPtr: { + const GetElementPtrInst* gep = cast(I); + if (gep->getNumOperands() <= 2) { + Out << "GetElementPtrInst* " << iName << " = GetElementPtrInst::Create(" + << opNames[0]; + if (gep->getNumOperands() == 2) + Out << ", " << opNames[1]; + } else { + Out << "std::vector " << iName << "_indices;"; + nl(Out); + for (unsigned i = 1; i < gep->getNumOperands(); ++i ) { + Out << iName << "_indices.push_back(" + << opNames[i] << ");"; + nl(Out); + } + Out << "Instruction* " << iName << " = GetElementPtrInst::Create(" + << opNames[0] << ", " << iName << "_indices.begin(), " + << iName << "_indices.end()"; + } + Out << ", \""; + printEscapedString(gep->getName()); + Out << "\", " << bbname << ");"; + break; + } + case Instruction::PHI: { + const PHINode* phi = cast(I); + + Out << "PHINode* " << iName << " = PHINode::Create(" + << getCppName(phi->getType()) << ", \""; + printEscapedString(phi->getName()); + Out << "\", " << bbname << ");"; + nl(Out) << iName << "->reserveOperandSpace(" + << phi->getNumIncomingValues() + << ");"; + nl(Out); + for (unsigned i = 0; i < phi->getNumOperands(); i+=2) { + Out << iName << "->addIncoming(" + << opNames[i] << ", " << opNames[i+1] << ");"; + nl(Out); + } + break; + } + case Instruction::Trunc: + case Instruction::ZExt: + case Instruction::SExt: + case Instruction::FPTrunc: + case Instruction::FPExt: + case Instruction::FPToUI: + case Instruction::FPToSI: + case Instruction::UIToFP: + case Instruction::SIToFP: + case Instruction::PtrToInt: + case Instruction::IntToPtr: + case Instruction::BitCast: { + const CastInst* cst = cast(I); + Out << "CastInst* " << iName << " = new "; + switch (I->getOpcode()) { + case Instruction::Trunc: Out << "TruncInst"; break; + case Instruction::ZExt: Out << "ZExtInst"; break; + case Instruction::SExt: Out << "SExtInst"; break; + case Instruction::FPTrunc: Out << "FPTruncInst"; break; + case Instruction::FPExt: Out << "FPExtInst"; break; + case Instruction::FPToUI: Out << "FPToUIInst"; break; + case Instruction::FPToSI: Out << "FPToSIInst"; break; + case Instruction::UIToFP: Out << "UIToFPInst"; break; + case Instruction::SIToFP: Out << "SIToFPInst"; break; + case Instruction::PtrToInt: Out << "PtrToIntInst"; break; + case Instruction::IntToPtr: Out << "IntToPtrInst"; break; + case Instruction::BitCast: Out << "BitCastInst"; break; + default: assert(!"Unreachable"); break; + } + Out << "(" << opNames[0] << ", " + << getCppName(cst->getType()) << ", \""; + printEscapedString(cst->getName()); + Out << "\", " << bbname << ");"; + break; + } + case Instruction::Call:{ + const CallInst* call = cast(I); + if (const InlineAsm* ila = dyn_cast(call->getCalledValue())) { + Out << "InlineAsm* " << getCppName(ila) << " = InlineAsm::get(" + << getCppName(ila->getFunctionType()) << ", \"" + << ila->getAsmString() << "\", \"" + << ila->getConstraintString() << "\"," + << (ila->hasSideEffects() ? "true" : "false") << ");"; + nl(Out); + } + if (call->getNumOperands() > 2) { + Out << "std::vector " << iName << "_params;"; + nl(Out); + for (unsigned i = 1; i < call->getNumOperands(); ++i) { + Out << iName << "_params.push_back(" << opNames[i] << ");"; + nl(Out); + } + Out << "CallInst* " << iName << " = CallInst::Create(" + << opNames[0] << ", " << iName << "_params.begin(), " + << iName << "_params.end(), \""; + } else if (call->getNumOperands() == 2) { + Out << "CallInst* " << iName << " = CallInst::Create(" + << opNames[0] << ", " << opNames[1] << ", \""; + } else { + Out << "CallInst* " << iName << " = CallInst::Create(" << opNames[0] + << ", \""; + } + printEscapedString(call->getName()); + Out << "\", " << bbname << ");"; + nl(Out) << iName << "->setCallingConv("; + printCallingConv(call->getCallingConv()); + Out << ");"; + nl(Out) << iName << "->setTailCall(" + << (call->isTailCall() ? "true":"false"); + Out << ");"; + printAttributes(call->getAttributes(), iName); + Out << iName << "->setAttributes(" << iName << "_PAL);"; + nl(Out); + break; + } + case Instruction::Select: { + const SelectInst* sel = cast(I); + Out << "SelectInst* " << getCppName(sel) << " = SelectInst::Create("; + Out << opNames[0] << ", " << opNames[1] << ", " << opNames[2] << ", \""; + printEscapedString(sel->getName()); + Out << "\", " << bbname << ");"; + break; + } + case Instruction::UserOp1: + /// FALL THROUGH + case Instruction::UserOp2: { + /// FIXME: What should be done here? + break; + } + case Instruction::VAArg: { + const VAArgInst* va = cast(I); + Out << "VAArgInst* " << getCppName(va) << " = new VAArgInst(" + << opNames[0] << ", " << getCppName(va->getType()) << ", \""; + printEscapedString(va->getName()); + Out << "\", " << bbname << ");"; + break; + } + case Instruction::ExtractElement: { + const ExtractElementInst* eei = cast(I); + Out << "ExtractElementInst* " << getCppName(eei) + << " = new ExtractElementInst(" << opNames[0] + << ", " << opNames[1] << ", \""; + printEscapedString(eei->getName()); + Out << "\", " << bbname << ");"; + break; + } + case Instruction::InsertElement: { + const InsertElementInst* iei = cast(I); + Out << "InsertElementInst* " << getCppName(iei) + << " = InsertElementInst::Create(" << opNames[0] + << ", " << opNames[1] << ", " << opNames[2] << ", \""; + printEscapedString(iei->getName()); + Out << "\", " << bbname << ");"; + break; + } + case Instruction::ShuffleVector: { + const ShuffleVectorInst* svi = cast(I); + Out << "ShuffleVectorInst* " << getCppName(svi) + << " = new ShuffleVectorInst(" << opNames[0] + << ", " << opNames[1] << ", " << opNames[2] << ", \""; + printEscapedString(svi->getName()); + Out << "\", " << bbname << ");"; + break; + } + case Instruction::ExtractValue: { + const ExtractValueInst *evi = cast(I); + Out << "std::vector " << iName << "_indices;"; + nl(Out); + for (unsigned i = 0; i < evi->getNumIndices(); ++i) { + Out << iName << "_indices.push_back(" + << evi->idx_begin()[i] << ");"; + nl(Out); + } + Out << "ExtractValueInst* " << getCppName(evi) + << " = ExtractValueInst::Create(" << opNames[0] + << ", " + << iName << "_indices.begin(), " << iName << "_indices.end(), \""; + printEscapedString(evi->getName()); + Out << "\", " << bbname << ");"; + break; + } + case Instruction::InsertValue: { + const InsertValueInst *ivi = cast(I); + Out << "std::vector " << iName << "_indices;"; + nl(Out); + for (unsigned i = 0; i < ivi->getNumIndices(); ++i) { + Out << iName << "_indices.push_back(" + << ivi->idx_begin()[i] << ");"; + nl(Out); + } + Out << "InsertValueInst* " << getCppName(ivi) + << " = InsertValueInst::Create(" << opNames[0] + << ", " << opNames[1] << ", " + << iName << "_indices.begin(), " << iName << "_indices.end(), \""; + printEscapedString(ivi->getName()); + Out << "\", " << bbname << ");"; + break; + } + } + DefinedValues.insert(I); + nl(Out); + delete [] opNames; +} + + // Print out the types, constants and declarations needed by one function + void CppWriter::printFunctionUses(const Function* F) { + nl(Out) << "// Type Definitions"; nl(Out); + if (!is_inline) { + // Print the function's return type + printType(F->getReturnType()); + + // Print the function's function type + printType(F->getFunctionType()); + + // Print the types of each of the function's arguments + for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end(); + AI != AE; ++AI) { + printType(AI->getType()); + } + } + + // Print type definitions for every type referenced by an instruction and + // make a note of any global values or constants that are referenced + SmallPtrSet gvs; + SmallPtrSet consts; + for (Function::const_iterator BB = F->begin(), BE = F->end(); + BB != BE; ++BB){ + for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); + I != E; ++I) { + // Print the type of the instruction itself + printType(I->getType()); + + // Print the type of each of the instruction's operands + for (unsigned i = 0; i < I->getNumOperands(); ++i) { + Value* operand = I->getOperand(i); + printType(operand->getType()); + + // If the operand references a GVal or Constant, make a note of it + if (GlobalValue* GV = dyn_cast(operand)) { + gvs.insert(GV); + if (GlobalVariable *GVar = dyn_cast(GV)) + if (GVar->hasInitializer()) + consts.insert(GVar->getInitializer()); + } else if (Constant* C = dyn_cast(operand)) + consts.insert(C); + } + } + } + + // Print the function declarations for any functions encountered + nl(Out) << "// Function Declarations"; nl(Out); + for (SmallPtrSet::iterator I = gvs.begin(), E = gvs.end(); + I != E; ++I) { + if (Function* Fun = dyn_cast(*I)) { + if (!is_inline || Fun != F) + printFunctionHead(Fun); + } + } + + // Print the global variable declarations for any variables encountered + nl(Out) << "// Global Variable Declarations"; nl(Out); + for (SmallPtrSet::iterator I = gvs.begin(), E = gvs.end(); + I != E; ++I) { + if (GlobalVariable* F = dyn_cast(*I)) + printVariableHead(F); + } + + // Print the constants found + nl(Out) << "// Constant Definitions"; nl(Out); + for (SmallPtrSet::iterator I = consts.begin(), + E = consts.end(); I != E; ++I) { + printConstant(*I); + } + + // Process the global variables definitions now that all the constants have + // been emitted. These definitions just couple the gvars with their constant + // initializers. + nl(Out) << "// Global Variable Definitions"; nl(Out); + for (SmallPtrSet::iterator I = gvs.begin(), E = gvs.end(); + I != E; ++I) { + if (GlobalVariable* GV = dyn_cast(*I)) + printVariableBody(GV); + } + } + + void CppWriter::printFunctionHead(const Function* F) { + nl(Out) << "Function* " << getCppName(F); + if (is_inline) { + Out << " = mod->getFunction(\""; + printEscapedString(F->getName()); + Out << "\", " << getCppName(F->getFunctionType()) << ");"; + nl(Out) << "if (!" << getCppName(F) << ") {"; + nl(Out) << getCppName(F); + } + Out<< " = Function::Create("; + nl(Out,1) << "/*Type=*/" << getCppName(F->getFunctionType()) << ","; + nl(Out) << "/*Linkage=*/"; + printLinkageType(F->getLinkage()); + Out << ","; + nl(Out) << "/*Name=*/\""; + printEscapedString(F->getName()); + Out << "\", mod); " << (F->isDeclaration()? "// (external, no body)" : ""); + nl(Out,-1); + printCppName(F); + Out << "->setCallingConv("; + printCallingConv(F->getCallingConv()); + Out << ");"; + nl(Out); + if (F->hasSection()) { + printCppName(F); + Out << "->setSection(\"" << F->getSection() << "\");"; + nl(Out); + } + if (F->getAlignment()) { + printCppName(F); + Out << "->setAlignment(" << F->getAlignment() << ");"; + nl(Out); + } + if (F->getVisibility() != GlobalValue::DefaultVisibility) { + printCppName(F); + Out << "->setVisibility("; + printVisibilityType(F->getVisibility()); + Out << ");"; + nl(Out); + } + if (F->hasGC()) { + printCppName(F); + Out << "->setGC(\"" << F->getGC() << "\");"; + nl(Out); + } + if (is_inline) { + Out << "}"; + nl(Out); + } + printAttributes(F->getAttributes(), getCppName(F)); + printCppName(F); + Out << "->setAttributes(" << getCppName(F) << "_PAL);"; + nl(Out); + } + + void CppWriter::printFunctionBody(const Function *F) { + if (F->isDeclaration()) + return; // external functions have no bodies. + + // Clear the DefinedValues and ForwardRefs maps because we can't have + // cross-function forward refs + ForwardRefs.clear(); + DefinedValues.clear(); + + // Create all the argument values + if (!is_inline) { + if (!F->arg_empty()) { + Out << "Function::arg_iterator args = " << getCppName(F) + << "->arg_begin();"; + nl(Out); + } + for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end(); + AI != AE; ++AI) { + Out << "Value* " << getCppName(AI) << " = args++;"; + nl(Out); + if (AI->hasName()) { + Out << getCppName(AI) << "->setName(\"" << AI->getName() << "\");"; + nl(Out); + } + } + } + + // Create all the basic blocks + nl(Out); + for (Function::const_iterator BI = F->begin(), BE = F->end(); + BI != BE; ++BI) { + std::string bbname(getCppName(BI)); + Out << "BasicBlock* " << bbname << " = BasicBlock::Create(\""; + if (BI->hasName()) + printEscapedString(BI->getName()); + Out << "\"," << getCppName(BI->getParent()) << ",0);"; + nl(Out); + } + + // Output all of its basic blocks... for the function + for (Function::const_iterator BI = F->begin(), BE = F->end(); + BI != BE; ++BI) { + std::string bbname(getCppName(BI)); + nl(Out) << "// Block " << BI->getName() << " (" << bbname << ")"; + nl(Out); + + // Output all of the instructions in the basic block... + for (BasicBlock::const_iterator I = BI->begin(), E = BI->end(); + I != E; ++I) { + printInstruction(I,bbname); + } + } + + // Loop over the ForwardRefs and resolve them now that all instructions + // are generated. + if (!ForwardRefs.empty()) { + nl(Out) << "// Resolve Forward References"; + nl(Out); + } + + while (!ForwardRefs.empty()) { + ForwardRefMap::iterator I = ForwardRefs.begin(); + Out << I->second << "->replaceAllUsesWith(" + << getCppName(I->first) << "); delete " << I->second << ";"; + nl(Out); + ForwardRefs.erase(I); + } + } + + void CppWriter::printInline(const std::string& fname, + const std::string& func) { + const Function* F = TheModule->getFunction(func); + if (!F) { + error(std::string("Function '") + func + "' not found in input module"); + return; + } + if (F->isDeclaration()) { + error(std::string("Function '") + func + "' is external!"); + return; + } + nl(Out) << "BasicBlock* " << fname << "(Module* mod, Function *" + << getCppName(F); + unsigned arg_count = 1; + for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end(); + AI != AE; ++AI) { + Out << ", Value* arg_" << arg_count; + } + Out << ") {"; + nl(Out); + is_inline = true; + printFunctionUses(F); + printFunctionBody(F); + is_inline = false; + Out << "return " << getCppName(F->begin()) << ";"; + nl(Out) << "}"; + nl(Out); + } + + void CppWriter::printModuleBody() { + // Print out all the type definitions + nl(Out) << "// Type Definitions"; nl(Out); + printTypes(TheModule); + + // Functions can call each other and global variables can reference them so + // define all the functions first before emitting their function bodies. + nl(Out) << "// Function Declarations"; nl(Out); + for (Module::const_iterator I = TheModule->begin(), E = TheModule->end(); + I != E; ++I) + printFunctionHead(I); + + // Process the global variables declarations. We can't initialze them until + // after the constants are printed so just print a header for each global + nl(Out) << "// Global Variable Declarations\n"; nl(Out); + for (Module::const_global_iterator I = TheModule->global_begin(), + E = TheModule->global_end(); I != E; ++I) { + printVariableHead(I); + } + + // Print out all the constants definitions. Constants don't recurse except + // through GlobalValues. All GlobalValues have been declared at this point + // so we can proceed to generate the constants. + nl(Out) << "// Constant Definitions"; nl(Out); + printConstants(TheModule); + + // Process the global variables definitions now that all the constants have + // been emitted. These definitions just couple the gvars with their constant + // initializers. + nl(Out) << "// Global Variable Definitions"; nl(Out); + for (Module::const_global_iterator I = TheModule->global_begin(), + E = TheModule->global_end(); I != E; ++I) { + printVariableBody(I); + } + + // Finally, we can safely put out all of the function bodies. + nl(Out) << "// Function Definitions"; nl(Out); + for (Module::const_iterator I = TheModule->begin(), E = TheModule->end(); + I != E; ++I) { + if (!I->isDeclaration()) { + nl(Out) << "// Function: " << I->getName() << " (" << getCppName(I) + << ")"; + nl(Out) << "{"; + nl(Out,1); + printFunctionBody(I); + nl(Out,-1) << "}"; + nl(Out); + } + } + } + + void CppWriter::printProgram(const std::string& fname, + const std::string& mName) { + Out << "#include \n"; + Out << "#include \n"; + Out << "#include \n"; + Out << "#include \n"; + Out << "#include \n"; + Out << "#include \n"; + Out << "#include \n"; + Out << "#include \n"; + Out << "#include \n"; + Out << "#include \n"; + Out << "#include \n"; + Out << "#include \n"; + Out << "#include \n"; + Out << "#include \n"; + Out << "#include \n"; + Out << "#include \n"; + Out << "#include \n"; + Out << "using namespace llvm;\n\n"; + Out << "Module* " << fname << "();\n\n"; + Out << "int main(int argc, char**argv) {\n"; + Out << " Module* Mod = " << fname << "();\n"; + Out << " verifyModule(*Mod, PrintMessageAction);\n"; + Out << " outs().flush();\n"; + Out << " PassManager PM;\n"; + Out << " PM.add(createPrintModulePass(&outs()));\n"; + Out << " PM.run(*Mod);\n"; + Out << " return 0;\n"; + Out << "}\n\n"; + printModule(fname,mName); + } + + void CppWriter::printModule(const std::string& fname, + const std::string& mName) { + nl(Out) << "Module* " << fname << "() {"; + nl(Out,1) << "// Module Construction"; + nl(Out) << "Module* mod = new Module(\"" << mName << "\");"; + if (!TheModule->getTargetTriple().empty()) { + nl(Out) << "mod->setDataLayout(\"" << TheModule->getDataLayout() << "\");"; + } + if (!TheModule->getTargetTriple().empty()) { + nl(Out) << "mod->setTargetTriple(\"" << TheModule->getTargetTriple() + << "\");"; + } + + if (!TheModule->getModuleInlineAsm().empty()) { + nl(Out) << "mod->setModuleInlineAsm(\""; + printEscapedString(TheModule->getModuleInlineAsm()); + Out << "\");"; + } + nl(Out); + + // Loop over the dependent libraries and emit them. + Module::lib_iterator LI = TheModule->lib_begin(); + Module::lib_iterator LE = TheModule->lib_end(); + while (LI != LE) { + Out << "mod->addLibrary(\"" << *LI << "\");"; + nl(Out); + ++LI; + } + printModuleBody(); + nl(Out) << "return mod;"; + nl(Out,-1) << "}"; + nl(Out); + } + + void CppWriter::printContents(const std::string& fname, + const std::string& mName) { + Out << "\nModule* " << fname << "(Module *mod) {\n"; + Out << "\nmod->setModuleIdentifier(\"" << mName << "\");\n"; + printModuleBody(); + Out << "\nreturn mod;\n"; + Out << "\n}\n"; + } + + void CppWriter::printFunction(const std::string& fname, + const std::string& funcName) { + const Function* F = TheModule->getFunction(funcName); + if (!F) { + error(std::string("Function '") + funcName + "' not found in input module"); + return; + } + Out << "\nFunction* " << fname << "(Module *mod) {\n"; + printFunctionUses(F); + printFunctionHead(F); + printFunctionBody(F); + Out << "return " << getCppName(F) << ";\n"; + Out << "}\n"; + } + + void CppWriter::printFunctions() { + const Module::FunctionListType &funcs = TheModule->getFunctionList(); + Module::const_iterator I = funcs.begin(); + Module::const_iterator IE = funcs.end(); + + for (; I != IE; ++I) { + const Function &func = *I; + if (!func.isDeclaration()) { + std::string name("define_"); + name += func.getName(); + printFunction(name, func.getName()); + } + } + } + + void CppWriter::printVariable(const std::string& fname, + const std::string& varName) { + const GlobalVariable* GV = TheModule->getNamedGlobal(varName); + + if (!GV) { + error(std::string("Variable '") + varName + "' not found in input module"); + return; + } + Out << "\nGlobalVariable* " << fname << "(Module *mod) {\n"; + printVariableUses(GV); + printVariableHead(GV); + printVariableBody(GV); + Out << "return " << getCppName(GV) << ";\n"; + Out << "}\n"; + } + + void CppWriter::printType(const std::string& fname, + const std::string& typeName) { + const Type* Ty = TheModule->getTypeByName(typeName); + if (!Ty) { + error(std::string("Type '") + typeName + "' not found in input module"); + return; + } + Out << "\nType* " << fname << "(Module *mod) {\n"; + printType(Ty); + Out << "return " << getCppName(Ty) << ";\n"; + Out << "}\n"; + } + + bool CppWriter::runOnModule(Module &M) { + TheModule = &M; + + // Emit a header + Out << "// Generated by llvm2cpp - DO NOT MODIFY!\n\n"; + + // Get the name of the function we're supposed to generate + std::string fname = FuncName.getValue(); + + // Get the name of the thing we are to generate + std::string tgtname = NameToGenerate.getValue(); + if (GenerationType == GenModule || + GenerationType == GenContents || + GenerationType == GenProgram || + GenerationType == GenFunctions) { + if (tgtname == "!bad!") { + if (M.getModuleIdentifier() == "-") + tgtname = ""; + else + tgtname = M.getModuleIdentifier(); + } + } else if (tgtname == "!bad!") + error("You must use the -for option with -gen-{function,variable,type}"); + + switch (WhatToGenerate(GenerationType)) { + case GenProgram: + if (fname.empty()) + fname = "makeLLVMModule"; + printProgram(fname,tgtname); + break; + case GenModule: + if (fname.empty()) + fname = "makeLLVMModule"; + printModule(fname,tgtname); + break; + case GenContents: + if (fname.empty()) + fname = "makeLLVMModuleContents"; + printContents(fname,tgtname); + break; + case GenFunction: + if (fname.empty()) + fname = "makeLLVMFunction"; + printFunction(fname,tgtname); + break; + case GenFunctions: + printFunctions(); + break; + case GenInline: + if (fname.empty()) + fname = "makeLLVMInline"; + printInline(fname,tgtname); + break; + case GenVariable: + if (fname.empty()) + fname = "makeLLVMVariable"; + printVariable(fname,tgtname); + break; + case GenType: + if (fname.empty()) + fname = "makeLLVMType"; + printType(fname,tgtname); + break; + default: + error("Invalid generation option"); + } + + return false; + } +} + +char CppWriter::ID = 0; + +//===----------------------------------------------------------------------===// +// External Interface declaration +//===----------------------------------------------------------------------===// + +bool CPPTargetMachine::addPassesToEmitWholeFile(PassManager &PM, + raw_ostream &o, + CodeGenFileType FileType, + CodeGenOpt::Level OptLevel) { + if (FileType != TargetMachine::AssemblyFile) return true; + PM.add(new CppWriter(o)); + return false; +} diff --git a/lib/Target/CppBackend/CPPTargetMachine.h b/lib/Target/CppBackend/CPPTargetMachine.h new file mode 100644 index 000000000000..db4bc0e722c8 --- /dev/null +++ b/lib/Target/CppBackend/CPPTargetMachine.h @@ -0,0 +1,44 @@ +//===-- CPPTargetMachine.h - TargetMachine for the C++ backend --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the TargetMachine that is used by the C++ backend. +// +//===----------------------------------------------------------------------===// + +#ifndef CPPTARGETMACHINE_H +#define CPPTARGETMACHINE_H + +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetData.h" + +namespace llvm { + +class raw_ostream; + +struct CPPTargetMachine : public TargetMachine { + const TargetData DataLayout; // Calculates type size & alignment + + CPPTargetMachine(const Module &M, const std::string &FS) + : DataLayout(&M) {} + + virtual bool WantsWholeFile() const { return true; } + virtual bool addPassesToEmitWholeFile(PassManager &PM, raw_ostream &Out, + CodeGenFileType FileType, + CodeGenOpt::Level OptLevel); + + // This class always works, but shouldn't be the default in most cases. + static unsigned getModuleMatchQuality(const Module &M) { return 1; } + + virtual const TargetData *getTargetData() const { return &DataLayout; } +}; + +} // End llvm namespace + + +#endif diff --git a/lib/Target/CppBackend/Makefile b/lib/Target/CppBackend/Makefile new file mode 100644 index 000000000000..ca7e1a82c808 --- /dev/null +++ b/lib/Target/CppBackend/Makefile @@ -0,0 +1,14 @@ +##===- lib/Target/CppBackend/Makefile --- ------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../.. +LIBRARYNAME = LLVMCppBackend +include $(LEVEL)/Makefile.common + +CompileCommonOpts += -Wno-format diff --git a/lib/Target/DarwinTargetAsmInfo.cpp b/lib/Target/DarwinTargetAsmInfo.cpp new file mode 100644 index 000000000000..05d235177642 --- /dev/null +++ b/lib/Target/DarwinTargetAsmInfo.cpp @@ -0,0 +1,169 @@ +//===-- DarwinTargetAsmInfo.cpp - Darwin asm properties ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines target asm properties related what form asm statements +// should take in general on Darwin-based targets +// +//===----------------------------------------------------------------------===// + +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/GlobalVariable.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/Mangler.h" +#include "llvm/Target/DarwinTargetAsmInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetData.h" + +using namespace llvm; + +DarwinTargetAsmInfo::DarwinTargetAsmInfo(const TargetMachine &TM) + : TargetAsmInfo(TM) { + + CStringSection_ = getUnnamedSection("\t.cstring", + SectionFlags::Mergeable | SectionFlags::Strings); + FourByteConstantSection = getUnnamedSection("\t.literal4\n", + SectionFlags::Mergeable); + EightByteConstantSection = getUnnamedSection("\t.literal8\n", + SectionFlags::Mergeable); + + // Note: 16-byte constant section is subtarget specific and should be provided + // there, if needed. + SixteenByteConstantSection = 0; + + ReadOnlySection = getUnnamedSection("\t.const\n", SectionFlags::None); + + TextCoalSection = + getNamedSection("\t__TEXT,__textcoal_nt,coalesced,pure_instructions", + SectionFlags::Code); + ConstTextCoalSection = getNamedSection("\t__TEXT,__const_coal,coalesced", + SectionFlags::None); + ConstDataCoalSection = getNamedSection("\t__DATA,__const_coal,coalesced", + SectionFlags::None); + ConstDataSection = getUnnamedSection(".const_data", SectionFlags::None); + DataCoalSection = getNamedSection("\t__DATA,__datacoal_nt,coalesced", + SectionFlags::Writeable); +} + +/// emitUsedDirectiveFor - On Darwin, internally linked data beginning with +/// the PrivateGlobalPrefix or the LessPrivateGlobalPrefix does not have the +/// directive emitted (this occurs in ObjC metadata). + +bool +DarwinTargetAsmInfo::emitUsedDirectiveFor(const GlobalValue* GV, + Mangler *Mang) const { + if (GV==0) + return false; + if (GV->hasLocalLinkage() && !isa(GV) && + ((strlen(getPrivateGlobalPrefix()) != 0 && + Mang->getValueName(GV).substr(0,strlen(getPrivateGlobalPrefix())) == + getPrivateGlobalPrefix()) || + (strlen(getLessPrivateGlobalPrefix()) != 0 && + Mang->getValueName(GV).substr(0,strlen(getLessPrivateGlobalPrefix())) == + getLessPrivateGlobalPrefix()))) + return false; + return true; +} + +const Section* +DarwinTargetAsmInfo::SelectSectionForGlobal(const GlobalValue *GV) const { + SectionKind::Kind Kind = SectionKindForGlobal(GV); + bool isWeak = GV->isWeakForLinker(); + bool isNonStatic = TM.getRelocationModel() != Reloc::Static; + + switch (Kind) { + case SectionKind::Text: + if (isWeak) + return TextCoalSection; + else + return TextSection; + case SectionKind::Data: + case SectionKind::ThreadData: + case SectionKind::BSS: + case SectionKind::ThreadBSS: + if (cast(GV)->isConstant()) + return (isWeak ? ConstDataCoalSection : ConstDataSection); + else + return (isWeak ? DataCoalSection : DataSection); + case SectionKind::ROData: + return (isWeak ? ConstDataCoalSection : + (isNonStatic ? ConstDataSection : getReadOnlySection())); + case SectionKind::RODataMergeStr: + return (isWeak ? + ConstTextCoalSection : + MergeableStringSection(cast(GV))); + case SectionKind::RODataMergeConst: + return (isWeak ? + ConstDataCoalSection: + MergeableConstSection(cast(GV))); + default: + assert(0 && "Unsuported section kind for global"); + } + + // FIXME: Do we have any extra special weird cases? + return NULL; +} + +const Section* +DarwinTargetAsmInfo::MergeableStringSection(const GlobalVariable *GV) const { + const TargetData *TD = TM.getTargetData(); + Constant *C = cast(GV)->getInitializer(); + const Type *Ty = cast(C->getType())->getElementType(); + + unsigned Size = TD->getTypeAllocSize(Ty); + if (Size) { + unsigned Align = TD->getPreferredAlignment(GV); + if (Align <= 32) + return getCStringSection_(); + } + + return getReadOnlySection(); +} + +const Section* +DarwinTargetAsmInfo::MergeableConstSection(const GlobalVariable *GV) const { + Constant *C = GV->getInitializer(); + + return MergeableConstSection(C->getType()); +} + +inline const Section* +DarwinTargetAsmInfo::MergeableConstSection(const Type *Ty) const { + const TargetData *TD = TM.getTargetData(); + + unsigned Size = TD->getTypeAllocSize(Ty); + if (Size == 4) + return FourByteConstantSection; + else if (Size == 8) + return EightByteConstantSection; + else if (Size == 16 && SixteenByteConstantSection) + return SixteenByteConstantSection; + + return getReadOnlySection(); +} + +const Section* +DarwinTargetAsmInfo::SelectSectionForMachineConst(const Type *Ty) const { + const Section* S = MergeableConstSection(Ty); + + // Handle weird special case, when compiling PIC stuff. + if (S == getReadOnlySection() && + TM.getRelocationModel() != Reloc::Static) + return ConstDataSection; + + return S; +} + +std::string +DarwinTargetAsmInfo::UniqueSectionForGlobal(const GlobalValue* GV, + SectionKind::Kind kind) const { + assert(0 && "Darwin does not use unique sections"); + return ""; +} diff --git a/lib/Target/ELFTargetAsmInfo.cpp b/lib/Target/ELFTargetAsmInfo.cpp new file mode 100644 index 000000000000..8f6e96e2751d --- /dev/null +++ b/lib/Target/ELFTargetAsmInfo.cpp @@ -0,0 +1,227 @@ +//===-- ELFTargetAsmInfo.cpp - ELF asm properties ---------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines target asm properties related what form asm statements +// should take in general on ELF-based targets +// +//===----------------------------------------------------------------------===// + +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/GlobalVariable.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/Target/ELFTargetAsmInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetData.h" + +using namespace llvm; + +ELFTargetAsmInfo::ELFTargetAsmInfo(const TargetMachine &TM) + : TargetAsmInfo(TM) { + + BSSSection_ = getUnnamedSection("\t.bss", + SectionFlags::Writeable | SectionFlags::BSS); + ReadOnlySection = getNamedSection("\t.rodata", SectionFlags::None); + TLSDataSection = getNamedSection("\t.tdata", + SectionFlags::Writeable | SectionFlags::TLS); + TLSBSSSection = getNamedSection("\t.tbss", + SectionFlags::Writeable | SectionFlags::TLS | SectionFlags::BSS); + + DataRelSection = getNamedSection("\t.data.rel", SectionFlags::Writeable); + DataRelLocalSection = getNamedSection("\t.data.rel.local", + SectionFlags::Writeable); + DataRelROSection = getNamedSection("\t.data.rel.ro", + SectionFlags::Writeable); + DataRelROLocalSection = getNamedSection("\t.data.rel.ro.local", + SectionFlags::Writeable); +} + +SectionKind::Kind +ELFTargetAsmInfo::SectionKindForGlobal(const GlobalValue *GV) const { + SectionKind::Kind Kind = TargetAsmInfo::SectionKindForGlobal(GV); + + if (Kind != SectionKind::Data) + return Kind; + + // Decide, whether we need data.rel stuff + const GlobalVariable* GVar = dyn_cast(GV); + if (GVar->hasInitializer()) { + Constant *C = GVar->getInitializer(); + bool isConstant = GVar->isConstant(); + unsigned Reloc = RelocBehaviour(); + if (Reloc != Reloc::None && C->ContainsRelocations(Reloc)) + return (C->ContainsRelocations(Reloc::Global) ? + (isConstant ? + SectionKind::DataRelRO : SectionKind::DataRel) : + (isConstant ? + SectionKind::DataRelROLocal : SectionKind::DataRelLocal)); + } + + return Kind; +} + +const Section* +ELFTargetAsmInfo::SelectSectionForGlobal(const GlobalValue *GV) const { + SectionKind::Kind Kind = SectionKindForGlobal(GV); + + if (const Function *F = dyn_cast(GV)) { + switch (F->getLinkage()) { + default: assert(0 && "Unknown linkage type!"); + case Function::PrivateLinkage: + case Function::InternalLinkage: + case Function::DLLExportLinkage: + case Function::ExternalLinkage: + return TextSection; + case Function::WeakAnyLinkage: + case Function::WeakODRLinkage: + case Function::LinkOnceAnyLinkage: + case Function::LinkOnceODRLinkage: + std::string Name = UniqueSectionForGlobal(GV, Kind); + unsigned Flags = SectionFlagsForGlobal(GV, Name.c_str()); + return getNamedSection(Name.c_str(), Flags); + } + } else if (const GlobalVariable *GVar = dyn_cast(GV)) { + if (GVar->isWeakForLinker()) { + std::string Name = UniqueSectionForGlobal(GVar, Kind); + unsigned Flags = SectionFlagsForGlobal(GVar, Name.c_str()); + return getNamedSection(Name.c_str(), Flags); + } else { + switch (Kind) { + case SectionKind::Data: + case SectionKind::SmallData: + return DataSection; + case SectionKind::DataRel: + return DataRelSection; + case SectionKind::DataRelLocal: + return DataRelLocalSection; + case SectionKind::DataRelRO: + return DataRelROSection; + case SectionKind::DataRelROLocal: + return DataRelROLocalSection; + case SectionKind::BSS: + case SectionKind::SmallBSS: + // ELF targets usually have BSS sections + return getBSSSection_(); + case SectionKind::ROData: + case SectionKind::SmallROData: + return getReadOnlySection(); + case SectionKind::RODataMergeStr: + return MergeableStringSection(GVar); + case SectionKind::RODataMergeConst: + return MergeableConstSection(GVar); + case SectionKind::ThreadData: + // ELF targets usually support TLS stuff + return TLSDataSection; + case SectionKind::ThreadBSS: + return TLSBSSSection; + default: + assert(0 && "Unsuported section kind for global"); + } + } + } else + assert(0 && "Unsupported global"); + + return NULL; +} + +const Section* +ELFTargetAsmInfo::SelectSectionForMachineConst(const Type *Ty) const { + // FIXME: Support data.rel stuff someday + return MergeableConstSection(Ty); +} + +const Section* +ELFTargetAsmInfo::MergeableConstSection(const GlobalVariable *GV) const { + Constant *C = GV->getInitializer(); + return MergeableConstSection(C->getType()); +} + +inline const Section* +ELFTargetAsmInfo::MergeableConstSection(const Type *Ty) const { + const TargetData *TD = TM.getTargetData(); + + // FIXME: string here is temporary, until stuff will fully land in. + // We cannot use {Four,Eight,Sixteen}ByteConstantSection here, since it's + // currently directly used by asmprinter. + unsigned Size = TD->getTypeAllocSize(Ty); + if (Size == 4 || Size == 8 || Size == 16) { + std::string Name = ".rodata.cst" + utostr(Size); + + return getNamedSection(Name.c_str(), + SectionFlags::setEntitySize(SectionFlags::Mergeable, + Size)); + } + + return getReadOnlySection(); +} + +const Section* +ELFTargetAsmInfo::MergeableStringSection(const GlobalVariable *GV) const { + const TargetData *TD = TM.getTargetData(); + Constant *C = cast(GV)->getInitializer(); + const Type *Ty = cast(C->getType())->getElementType(); + + unsigned Size = TD->getTypeAllocSize(Ty); + if (Size <= 16) { + assert(getCStringSection() && "Should have string section prefix"); + + // We also need alignment here + unsigned Align = TD->getPrefTypeAlignment(Ty); + if (Align < Size) + Align = Size; + + std::string Name = getCStringSection() + utostr(Size) + '.' + utostr(Align); + unsigned Flags = SectionFlags::setEntitySize(SectionFlags::Mergeable | + SectionFlags::Strings, + Size); + return getNamedSection(Name.c_str(), Flags); + } + + return getReadOnlySection(); +} + +std::string ELFTargetAsmInfo::printSectionFlags(unsigned flags) const { + std::string Flags = ",\""; + + if (!(flags & SectionFlags::Debug)) + Flags += 'a'; + if (flags & SectionFlags::Code) + Flags += 'x'; + if (flags & SectionFlags::Writeable) + Flags += 'w'; + if (flags & SectionFlags::Mergeable) + Flags += 'M'; + if (flags & SectionFlags::Strings) + Flags += 'S'; + if (flags & SectionFlags::TLS) + Flags += 'T'; + if (flags & SectionFlags::Small) + Flags += 's'; + + Flags += "\","; + + // If comment string is '@', e.g. as on ARM - use '%' instead + if (strcmp(CommentString, "@") == 0) + Flags += '%'; + else + Flags += '@'; + + // FIXME: There can be exceptions here + if (flags & SectionFlags::BSS) + Flags += "nobits"; + else + Flags += "progbits"; + + if (unsigned entitySize = SectionFlags::getEntitySize(flags)) + Flags += "," + utostr(entitySize); + + return Flags; +} diff --git a/lib/Target/IA64/AsmPrinter/CMakeLists.txt b/lib/Target/IA64/AsmPrinter/CMakeLists.txt new file mode 100644 index 000000000000..1d552bd5551c --- /dev/null +++ b/lib/Target/IA64/AsmPrinter/CMakeLists.txt @@ -0,0 +1,12 @@ +include_directories( + ${CMAKE_CURRENT_BINARY_DIR}/.. + ${CMAKE_CURRENT_SOURCE_DIR}/.. + ) + +add_partially_linked_object(LLVMIA64AsmPrinter + IA64AsmPrinter.cpp + ) + +target_name_of_partially_linked_object(LLVMIA64CodeGen n) + +add_dependencies(LLVMIA64AsmPrinter ${n}) diff --git a/lib/Target/IA64/AsmPrinter/IA64AsmPrinter.cpp b/lib/Target/IA64/AsmPrinter/IA64AsmPrinter.cpp new file mode 100644 index 000000000000..fc54e23a44d7 --- /dev/null +++ b/lib/Target/IA64/AsmPrinter/IA64AsmPrinter.cpp @@ -0,0 +1,376 @@ +//===-- IA64AsmPrinter.cpp - Print out IA64 LLVM as assembly --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a printer that converts from our internal representation +// of machine-dependent LLVM code to assembly accepted by the GNU binutils 'gas' +// assembler. The Intel 'ias' and HP-UX 'as' assemblers *may* choke on this +// output, but if so that's a bug I'd like to hear about: please file a bug +// report in bugzilla. FYI, the not too bad 'ias' assembler is bundled with +// the Intel C/C++ compiler for Itanium Linux. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "asm-printer" +#include "IA64.h" +#include "IA64TargetMachine.h" +#include "llvm/Module.h" +#include "llvm/Type.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/DwarfWriter.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/Support/Mangler.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/Statistic.h" +using namespace llvm; + +STATISTIC(EmittedInsts, "Number of machine instrs printed"); + +namespace { + class IA64AsmPrinter : public AsmPrinter { + std::set ExternalFunctionNames, ExternalObjectNames; + public: + explicit IA64AsmPrinter(raw_ostream &O, TargetMachine &TM, + const TargetAsmInfo *T, CodeGenOpt::Level OL, + bool V) + : AsmPrinter(O, TM, T, OL, V) {} + + virtual const char *getPassName() const { + return "IA64 Assembly Printer"; + } + + /// printInstruction - This method is automatically generated by tablegen + /// from the instruction set description. This method returns true if the + /// machine instruction was sufficiently described to print it, otherwise it + /// returns false. + bool printInstruction(const MachineInstr *MI); + + // This method is used by the tablegen'erated instruction printer. + void printOperand(const MachineInstr *MI, unsigned OpNo){ + const MachineOperand &MO = MI->getOperand(OpNo); + if (MO.getType() == MachineOperand::MO_Register) { + assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) && + "Not physref??"); + //XXX Bug Workaround: See note in Printer::doInitialization about %. + O << TM.getRegisterInfo()->get(MO.getReg()).AsmName; + } else { + printOp(MO); + } + } + + void printS8ImmOperand(const MachineInstr *MI, unsigned OpNo) { + int val=(unsigned int)MI->getOperand(OpNo).getImm(); + if(val>=128) val=val-256; // if negative, flip sign + O << val; + } + void printS14ImmOperand(const MachineInstr *MI, unsigned OpNo) { + int val=(unsigned int)MI->getOperand(OpNo).getImm(); + if(val>=8192) val=val-16384; // if negative, flip sign + O << val; + } + void printS22ImmOperand(const MachineInstr *MI, unsigned OpNo) { + int val=(unsigned int)MI->getOperand(OpNo).getImm(); + if(val>=2097152) val=val-4194304; // if negative, flip sign + O << val; + } + void printU64ImmOperand(const MachineInstr *MI, unsigned OpNo) { + O << (uint64_t)MI->getOperand(OpNo).getImm(); + } + void printS64ImmOperand(const MachineInstr *MI, unsigned OpNo) { +// XXX : nasty hack to avoid GPREL22 "relocation truncated to fit" linker +// errors - instead of add rX = @gprel(CPI), r1;; we now +// emit movl rX = @gprel(CPIgetOperand(OpNo).isImm()) { + O << (int64_t)MI->getOperand(OpNo).getImm(); + } else { // this is a constant pool reference: FIXME: assert this + printOp(MI->getOperand(OpNo)); + } + } + + void printGlobalOperand(const MachineInstr *MI, unsigned OpNo) { + printOp(MI->getOperand(OpNo), false); // this is NOT a br.call instruction + } + + void printCallOperand(const MachineInstr *MI, unsigned OpNo) { + printOp(MI->getOperand(OpNo), true); // this is a br.call instruction + } + + void printMachineInstruction(const MachineInstr *MI); + void printOp(const MachineOperand &MO, bool isBRCALLinsn= false); + void printModuleLevelGV(const GlobalVariable* GVar); + bool runOnMachineFunction(MachineFunction &F); + bool doInitialization(Module &M); + bool doFinalization(Module &M); + }; +} // end of anonymous namespace + + +// Include the auto-generated portion of the assembly writer. +#include "IA64GenAsmWriter.inc" + +/// runOnMachineFunction - This uses the printMachineInstruction() +/// method to print assembly for each instruction. +/// +bool IA64AsmPrinter::runOnMachineFunction(MachineFunction &MF) { + this->MF = &MF; + + SetupMachineFunction(MF); + O << "\n\n"; + + // Print out constants referenced by the function + EmitConstantPool(MF.getConstantPool()); + + const Function *F = MF.getFunction(); + SwitchToSection(TAI->SectionForGlobal(F)); + + // Print out labels for the function. + EmitAlignment(5); + O << "\t.global\t" << CurrentFnName << '\n'; + + printVisibility(CurrentFnName, F->getVisibility()); + + O << "\t.type\t" << CurrentFnName << ", @function\n"; + O << CurrentFnName << ":\n"; + + // Print out code for the function. + for (MachineFunction::const_iterator I = MF.begin(), E = MF.end(); + I != E; ++I) { + // Print a label for the basic block if there are any predecessors. + if (!I->pred_empty()) { + printBasicBlockLabel(I, true, true); + O << '\n'; + } + for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end(); + II != E; ++II) { + // Print the assembly for the instruction. + printMachineInstruction(II); + } + } + + // We didn't modify anything. + return false; +} + +void IA64AsmPrinter::printOp(const MachineOperand &MO, + bool isBRCALLinsn /* = false */) { + const TargetRegisterInfo &RI = *TM.getRegisterInfo(); + switch (MO.getType()) { + case MachineOperand::MO_Register: + O << RI.get(MO.getReg()).AsmName; + return; + + case MachineOperand::MO_Immediate: + O << MO.getImm(); + return; + case MachineOperand::MO_MachineBasicBlock: + printBasicBlockLabel(MO.getMBB()); + return; + case MachineOperand::MO_ConstantPoolIndex: { + O << "@gprel(" << TAI->getPrivateGlobalPrefix() + << "CPI" << getFunctionNumber() << "_" << MO.getIndex() << ")"; + return; + } + + case MachineOperand::MO_GlobalAddress: { + + // functions need @ltoff(@fptr(fn_name)) form + GlobalValue *GV = MO.getGlobal(); + Function *F = dyn_cast(GV); + + bool Needfptr=false; // if we're computing an address @ltoff(X), do + // we need to decorate it so it becomes + // @ltoff(@fptr(X)) ? + if (F && !isBRCALLinsn /*&& F->isDeclaration()*/) + Needfptr=true; + + // if this is the target of a call instruction, we should define + // the function somewhere (GNU gas has no problem without this, but + // Intel ias rightly complains of an 'undefined symbol') + + if (F /*&& isBRCALLinsn*/ && F->isDeclaration()) + ExternalFunctionNames.insert(Mang->getValueName(MO.getGlobal())); + else + if (GV->isDeclaration()) // e.g. stuff like 'stdin' + ExternalObjectNames.insert(Mang->getValueName(MO.getGlobal())); + + if (!isBRCALLinsn) + O << "@ltoff("; + if (Needfptr) + O << "@fptr("; + O << Mang->getValueName(MO.getGlobal()); + + if (Needfptr && !isBRCALLinsn) + O << "#))"; // close both fptr( and ltoff( + else { + if (Needfptr) + O << "#)"; // close only fptr( + if (!isBRCALLinsn) + O << "#)"; // close only ltoff( + } + + int Offset = MO.getOffset(); + if (Offset > 0) + O << " + " << Offset; + else if (Offset < 0) + O << " - " << -Offset; + return; + } + case MachineOperand::MO_ExternalSymbol: + O << MO.getSymbolName(); + ExternalFunctionNames.insert(MO.getSymbolName()); + return; + default: + O << ""; return; + } +} + +/// printMachineInstruction -- Print out a single IA64 LLVM instruction +/// MI to the current output stream. +/// +void IA64AsmPrinter::printMachineInstruction(const MachineInstr *MI) { + ++EmittedInsts; + + // Call the autogenerated instruction printer routines. + printInstruction(MI); +} + +bool IA64AsmPrinter::doInitialization(Module &M) { + bool Result = AsmPrinter::doInitialization(M); + + O << "\n.ident \"LLVM-ia64\"\n\n" + << "\t.psr lsb\n" // should be "msb" on HP-UX, for starters + << "\t.radix C\n" + << "\t.psr abi64\n"; // we only support 64 bits for now + return Result; +} + +void IA64AsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) { + const TargetData *TD = TM.getTargetData(); + + if (!GVar->hasInitializer()) + return; // External global require no code + + // Check to see if this is a special global used by LLVM, if so, emit it. + if (EmitSpecialLLVMGlobal(GVar)) + return; + + O << "\n\n"; + std::string name = Mang->getValueName(GVar); + Constant *C = GVar->getInitializer(); + unsigned Size = TD->getTypeAllocSize(C->getType()); + unsigned Align = TD->getPreferredAlignmentLog(GVar); + + printVisibility(name, GVar->getVisibility()); + + SwitchToSection(TAI->SectionForGlobal(GVar)); + + if (C->isNullValue() && !GVar->hasSection()) { + if (!GVar->isThreadLocal() && + (GVar->hasLocalLinkage() || GVar->isWeakForLinker())) { + if (Size == 0) Size = 1; // .comm Foo, 0 is undefined, avoid it. + + if (GVar->hasLocalLinkage()) { + O << "\t.lcomm " << name << "#," << Size + << ',' << (1 << Align); + O << '\n'; + } else { + O << "\t.common " << name << "#," << Size + << ',' << (1 << Align); + O << '\n'; + } + + return; + } + } + + switch (GVar->getLinkage()) { + case GlobalValue::LinkOnceAnyLinkage: + case GlobalValue::LinkOnceODRLinkage: + case GlobalValue::CommonLinkage: + case GlobalValue::WeakAnyLinkage: + case GlobalValue::WeakODRLinkage: + // Nonnull linkonce -> weak + O << "\t.weak " << name << '\n'; + break; + case GlobalValue::AppendingLinkage: + // FIXME: appending linkage variables should go into a section of + // their name or something. For now, just emit them as external. + case GlobalValue::ExternalLinkage: + // If external or appending, declare as a global symbol + O << TAI->getGlobalDirective() << name << '\n'; + // FALL THROUGH + case GlobalValue::InternalLinkage: + case GlobalValue::PrivateLinkage: + break; + case GlobalValue::GhostLinkage: + cerr << "GhostLinkage cannot appear in IA64AsmPrinter!\n"; + abort(); + case GlobalValue::DLLImportLinkage: + cerr << "DLLImport linkage is not supported by this target!\n"; + abort(); + case GlobalValue::DLLExportLinkage: + cerr << "DLLExport linkage is not supported by this target!\n"; + abort(); + default: + assert(0 && "Unknown linkage type!"); + } + + EmitAlignment(Align, GVar); + + if (TAI->hasDotTypeDotSizeDirective()) { + O << "\t.type " << name << ",@object\n"; + O << "\t.size " << name << ',' << Size << '\n'; + } + + O << name << ":\n"; + EmitGlobalConstant(C); +} + + +bool IA64AsmPrinter::doFinalization(Module &M) { + // Print out module-level global variables here. + for (Module::const_global_iterator I = M.global_begin(), E = M.global_end(); + I != E; ++I) + printModuleLevelGV(I); + + // we print out ".global X \n .type X, @function" for each external function + O << "\n\n// br.call targets referenced (and not defined) above: \n"; + for (std::set::iterator i = ExternalFunctionNames.begin(), + e = ExternalFunctionNames.end(); i!=e; ++i) { + O << "\t.global " << *i << "\n\t.type " << *i << ", @function\n"; + } + O << "\n\n"; + + // we print out ".global X \n .type X, @object" for each external object + O << "\n\n// (external) symbols referenced (and not defined) above: \n"; + for (std::set::iterator i = ExternalObjectNames.begin(), + e = ExternalObjectNames.end(); i!=e; ++i) { + O << "\t.global " << *i << "\n\t.type " << *i << ", @object\n"; + } + O << "\n\n"; + + return AsmPrinter::doFinalization(M); +} + +/// createIA64CodePrinterPass - Returns a pass that prints the IA64 +/// assembly code for a MachineFunction to the given output stream, using +/// the given target machine description. +/// +FunctionPass *llvm::createIA64CodePrinterPass(raw_ostream &o, + IA64TargetMachine &tm, + CodeGenOpt::Level OptLevel, + bool verbose) { + return new IA64AsmPrinter(o, tm, tm.getTargetAsmInfo(), OptLevel, verbose); +} diff --git a/lib/Target/IA64/AsmPrinter/Makefile b/lib/Target/IA64/AsmPrinter/Makefile new file mode 100644 index 000000000000..12880f36f76d --- /dev/null +++ b/lib/Target/IA64/AsmPrinter/Makefile @@ -0,0 +1,17 @@ +##===- lib/Target/IA64/AsmPrinter/Makefile -----------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../../.. +LIBRARYNAME = LLVMIA64AsmPrinter + +# Hack: we need to include 'main' IA64 target directory to grab +# private headers +CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/IA64/CMakeLists.txt b/lib/Target/IA64/CMakeLists.txt new file mode 100644 index 000000000000..26f86ca197fc --- /dev/null +++ b/lib/Target/IA64/CMakeLists.txt @@ -0,0 +1,20 @@ +set(LLVM_TARGET_DEFINITIONS IA64.td) + +tablegen(IA64GenRegisterInfo.h.inc -gen-register-desc-header) +tablegen(IA64GenRegisterNames.inc -gen-register-enums) +tablegen(IA64GenRegisterInfo.inc -gen-register-desc) +tablegen(IA64GenInstrNames.inc -gen-instr-enums) +tablegen(IA64GenInstrInfo.inc -gen-instr-desc) +tablegen(IA64GenAsmWriter.inc -gen-asm-writer) +tablegen(IA64GenDAGISel.inc -gen-dag-isel) + +add_llvm_target(IA64CodeGen + IA64Bundling.cpp + IA64InstrInfo.cpp + IA64ISelDAGToDAG.cpp + IA64ISelLowering.cpp + IA64RegisterInfo.cpp + IA64Subtarget.cpp + IA64TargetAsmInfo.cpp + IA64TargetMachine.cpp + ) diff --git a/lib/Target/IA64/IA64.h b/lib/Target/IA64/IA64.h new file mode 100644 index 000000000000..ec8e3d6d74da --- /dev/null +++ b/lib/Target/IA64/IA64.h @@ -0,0 +1,58 @@ +//===-- IA64.h - Top-level interface for IA64 representation ------*- C++ -*-===// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the entry points for global functions defined in the IA64 +// target library, as used by the LLVM JIT. +// +//===----------------------------------------------------------------------===// + +#ifndef TARGET_IA64_H +#define TARGET_IA64_H + +#include "llvm/Target/TargetMachine.h" + +namespace llvm { + +class IA64TargetMachine; +class FunctionPass; +class raw_ostream; + +/// createIA64DAGToDAGInstructionSelector - This pass converts an LLVM +/// function into IA64 machine code in a sane, DAG->DAG transform. +/// +FunctionPass *createIA64DAGToDAGInstructionSelector(IA64TargetMachine &TM); + +/// createIA64BundlingPass - This pass adds stop bits and bundles +/// instructions. +/// +FunctionPass *createIA64BundlingPass(IA64TargetMachine &TM); + +/// createIA64CodePrinterPass - Returns a pass that prints the IA64 +/// assembly code for a MachineFunction to the given output stream, +/// using the given target machine description. This should work +/// regardless of whether the function is in SSA form. +/// +FunctionPass *createIA64CodePrinterPass(raw_ostream &o, + IA64TargetMachine &tm, + CodeGenOpt::Level OptLevel, + bool verbose); + +} // End llvm namespace + +// Defines symbolic names for IA64 registers. This defines a mapping from +// register name to register number. +// +#include "IA64GenRegisterNames.inc" + +// Defines symbolic names for the IA64 instructions. +// +#include "IA64GenInstrNames.inc" + +#endif + + diff --git a/lib/Target/IA64/IA64.td b/lib/Target/IA64/IA64.td new file mode 100644 index 000000000000..c469281ab16e --- /dev/null +++ b/lib/Target/IA64/IA64.td @@ -0,0 +1,39 @@ +//===-- IA64.td - Target definition file for Intel IA64 -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is a target description file for the Intel IA64 architecture, +// also known variously as ia64, IA-64, IPF, "the Itanium architecture" etc. +// +//===----------------------------------------------------------------------===// + +// Get the target-independent interfaces which we are implementing... +// +include "llvm/Target/Target.td" + +//===----------------------------------------------------------------------===// +// Register File Description +//===----------------------------------------------------------------------===// + +include "IA64RegisterInfo.td" + +//===----------------------------------------------------------------------===// +// Instruction Descriptions +//===----------------------------------------------------------------------===// + +include "IA64InstrInfo.td" + +def IA64InstrInfo : InstrInfo { } + +def IA64 : Target { + // Our instruction set + let InstructionSet = IA64InstrInfo; + +} + + diff --git a/lib/Target/IA64/IA64Bundling.cpp b/lib/Target/IA64/IA64Bundling.cpp new file mode 100644 index 000000000000..3a9ba6ca3f61 --- /dev/null +++ b/lib/Target/IA64/IA64Bundling.cpp @@ -0,0 +1,118 @@ +//===-- IA64Bundling.cpp - IA-64 instruction bundling pass. ------------ --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Add stops where required to prevent read-after-write and write-after-write +// dependencies, for both registers and memory addresses. There are exceptions: +// +// - Compare instructions (cmp*, tbit, tnat, fcmp, frcpa) are OK with +// WAW dependencies so long as they all target p0, or are of parallel +// type (.and*/.or*) +// +// FIXME: bundling, for now, is left to the assembler. +// FIXME: this might be an appropriate place to translate between different +// instructions that do the same thing, if this helps bundling. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "ia64-codegen" +#include "IA64.h" +#include "IA64InstrInfo.h" +#include "IA64TargetMachine.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/ADT/SetOperations.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/Debug.h" +#include +using namespace llvm; + +STATISTIC(StopBitsAdded, "Number of stop bits added"); + +namespace { + struct IA64BundlingPass : public MachineFunctionPass { + static char ID; + /// Target machine description which we query for reg. names, data + /// layout, etc. + /// + IA64TargetMachine &TM; + + IA64BundlingPass(IA64TargetMachine &tm) + : MachineFunctionPass(&ID), TM(tm) { } + + virtual const char *getPassName() const { + return "IA64 (Itanium) Bundling Pass"; + } + + bool runOnMachineBasicBlock(MachineBasicBlock &MBB); + bool runOnMachineFunction(MachineFunction &F) { + bool Changed = false; + for (MachineFunction::iterator FI = F.begin(), FE = F.end(); + FI != FE; ++FI) + Changed |= runOnMachineBasicBlock(*FI); + return Changed; + } + + // XXX: ugly global, but pending writes can cross basic blocks. Note that + // taken branches end instruction groups. So we only need to worry about + // 'fallthrough' code + std::set PendingRegWrites; + }; + char IA64BundlingPass::ID = 0; +} // end of anonymous namespace + +/// createIA64BundlingPass - Returns a pass that adds STOP (;;) instructions +/// and arranges the result into bundles. +/// +FunctionPass *llvm::createIA64BundlingPass(IA64TargetMachine &tm) { + return new IA64BundlingPass(tm); +} + +/// runOnMachineBasicBlock - add stops and bundle this MBB. +/// +bool IA64BundlingPass::runOnMachineBasicBlock(MachineBasicBlock &MBB) { + bool Changed = false; + + for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ) { + MachineInstr *CurrentInsn = I++; + std::set CurrentReads, CurrentWrites, OrigWrites; + + for(unsigned i=0; i < CurrentInsn->getNumOperands(); i++) { + MachineOperand &MO=CurrentInsn->getOperand(i); + if (MO.isReg()) { + if(MO.isUse()) { // TODO: exclude p0 + CurrentReads.insert(MO.getReg()); + } + if(MO.isDef()) { // TODO: exclude p0 + CurrentWrites.insert(MO.getReg()); + OrigWrites.insert(MO.getReg()); // FIXME: use a nondestructive + // set_intersect instead? + } + } + } + + // CurrentReads/CurrentWrites contain info for the current instruction. + // Does it read or write any registers that are pending a write? + // (i.e. not separated by a stop) + set_intersect(CurrentReads, PendingRegWrites); + set_intersect(CurrentWrites, PendingRegWrites); + + if(! (CurrentReads.empty() && CurrentWrites.empty()) ) { + // there is a conflict, insert a stop and reset PendingRegWrites + CurrentInsn = BuildMI(MBB, CurrentInsn, CurrentInsn->getDebugLoc(), + TM.getInstrInfo()->get(IA64::STOP), 0); + PendingRegWrites=OrigWrites; // carry over current writes to next insn + Changed=true; StopBitsAdded++; // update stats + } else { // otherwise, track additional pending writes + set_union(PendingRegWrites, OrigWrites); + } + } // onto the next insn in the MBB + + return Changed; +} + diff --git a/lib/Target/IA64/IA64ISelDAGToDAG.cpp b/lib/Target/IA64/IA64ISelDAGToDAG.cpp new file mode 100644 index 000000000000..9800c506ca9e --- /dev/null +++ b/lib/Target/IA64/IA64ISelDAGToDAG.cpp @@ -0,0 +1,575 @@ +//===---- IA64ISelDAGToDAG.cpp - IA64 pattern matching inst selector ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines a pattern matching instruction selector for IA64, +// converting a legalized dag to an IA64 dag. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "ia64-codegen" +#include "IA64.h" +#include "IA64TargetMachine.h" +#include "IA64ISelLowering.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Constants.h" +#include "llvm/GlobalValue.h" +#include "llvm/Intrinsics.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" +using namespace llvm; + +namespace { + //===--------------------------------------------------------------------===// + /// IA64DAGToDAGISel - IA64 specific code to select IA64 machine + /// instructions for SelectionDAG operations. + /// + class IA64DAGToDAGISel : public SelectionDAGISel { + unsigned GlobalBaseReg; + public: + explicit IA64DAGToDAGISel(IA64TargetMachine &TM) + : SelectionDAGISel(TM) {} + + virtual bool runOnFunction(Function &Fn) { + // Make sure we re-emit a set of the global base reg if necessary + GlobalBaseReg = 0; + return SelectionDAGISel::runOnFunction(Fn); + } + + /// getI64Imm - Return a target constant with the specified value, of type + /// i64. + inline SDValue getI64Imm(uint64_t Imm) { + return CurDAG->getTargetConstant(Imm, MVT::i64); + } + + /// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC + /// base register. Return the virtual register that holds this value. + // SDValue getGlobalBaseReg(); TODO: hmm + + // Select - Convert the specified operand from a target-independent to a + // target-specific node if it hasn't already been changed. + SDNode *Select(SDValue N); + + SDNode *SelectIntImmediateExpr(SDValue LHS, SDValue RHS, + unsigned OCHi, unsigned OCLo, + bool IsArithmetic = false, + bool Negate = false); + SDNode *SelectBitfieldInsert(SDNode *N); + + /// SelectCC - Select a comparison of the specified values with the + /// specified condition code, returning the CR# of the expression. + SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC); + + /// SelectAddr - Given the specified address, return the two operands for a + /// load/store instruction, and return true if it should be an indexed [r+r] + /// operation. + bool SelectAddr(SDValue Addr, SDValue &Op1, SDValue &Op2); + + /// InstructionSelect - This callback is invoked by + /// SelectionDAGISel when it has created a SelectionDAG for us to codegen. + virtual void InstructionSelect(); + + virtual const char *getPassName() const { + return "IA64 (Itanium) DAG->DAG Instruction Selector"; + } + +// Include the pieces autogenerated from the target description. +#include "IA64GenDAGISel.inc" + +private: + SDNode *SelectDIV(SDValue Op); + }; +} + +/// InstructionSelect - This callback is invoked by +/// SelectionDAGISel when it has created a SelectionDAG for us to codegen. +void IA64DAGToDAGISel::InstructionSelect() { + DEBUG(BB->dump()); + + // Select target instructions for the DAG. + SelectRoot(*CurDAG); + CurDAG->RemoveDeadNodes(); +} + +SDNode *IA64DAGToDAGISel::SelectDIV(SDValue Op) { + SDNode *N = Op.getNode(); + SDValue Chain = N->getOperand(0); + SDValue Tmp1 = N->getOperand(0); + SDValue Tmp2 = N->getOperand(1); + DebugLoc dl = N->getDebugLoc(); + + bool isFP=false; + + if(Tmp1.getValueType().isFloatingPoint()) + isFP=true; + + bool isModulus=false; // is it a division or a modulus? + bool isSigned=false; + + switch(N->getOpcode()) { + case ISD::FDIV: + case ISD::SDIV: isModulus=false; isSigned=true; break; + case ISD::UDIV: isModulus=false; isSigned=false; break; + case ISD::FREM: + case ISD::SREM: isModulus=true; isSigned=true; break; + case ISD::UREM: isModulus=true; isSigned=false; break; + } + + // TODO: check for integer divides by powers of 2 (or other simple patterns?) + + SDValue TmpPR, TmpPR2; + SDValue TmpF1, TmpF2, TmpF3, TmpF4, TmpF5, TmpF6, TmpF7, TmpF8; + SDValue TmpF9, TmpF10,TmpF11,TmpF12,TmpF13,TmpF14,TmpF15; + SDNode *Result; + + // we'll need copies of F0 and F1 + SDValue F0 = CurDAG->getRegister(IA64::F0, MVT::f64); + SDValue F1 = CurDAG->getRegister(IA64::F1, MVT::f64); + + // OK, emit some code: + + if(!isFP) { + // first, load the inputs into FP regs. + TmpF1 = + SDValue(CurDAG->getTargetNode(IA64::SETFSIG, dl, MVT::f64, Tmp1), 0); + Chain = TmpF1.getValue(1); + TmpF2 = + SDValue(CurDAG->getTargetNode(IA64::SETFSIG, dl, MVT::f64, Tmp2), 0); + Chain = TmpF2.getValue(1); + + // next, convert the inputs to FP + if(isSigned) { + TmpF3 = + SDValue(CurDAG->getTargetNode(IA64::FCVTXF, dl, MVT::f64, TmpF1), 0); + Chain = TmpF3.getValue(1); + TmpF4 = + SDValue(CurDAG->getTargetNode(IA64::FCVTXF, dl, MVT::f64, TmpF2), 0); + Chain = TmpF4.getValue(1); + } else { // is unsigned + TmpF3 = + SDValue(CurDAG->getTargetNode(IA64::FCVTXUFS1, dl, MVT::f64, TmpF1), + 0); + Chain = TmpF3.getValue(1); + TmpF4 = + SDValue(CurDAG->getTargetNode(IA64::FCVTXUFS1, dl, MVT::f64, TmpF2), + 0); + Chain = TmpF4.getValue(1); + } + + } else { // this is an FP divide/remainder, so we 'leak' some temp + // regs and assign TmpF3=Tmp1, TmpF4=Tmp2 + TmpF3=Tmp1; + TmpF4=Tmp2; + } + + // we start by computing an approximate reciprocal (good to 9 bits?) + // note, this instruction writes _both_ TmpF5 (answer) and TmpPR (predicate) + if(isFP) + TmpF5 = SDValue(CurDAG->getTargetNode(IA64::FRCPAS0, dl, MVT::f64, + MVT::i1, TmpF3, TmpF4), 0); + else + TmpF5 = SDValue(CurDAG->getTargetNode(IA64::FRCPAS1, dl, MVT::f64, + MVT::i1, TmpF3, TmpF4), 0); + + TmpPR = TmpF5.getValue(1); + Chain = TmpF5.getValue(2); + + SDValue minusB; + if(isModulus) { // for remainders, it'll be handy to have + // copies of -input_b + minusB = SDValue(CurDAG->getTargetNode(IA64::SUB, dl, MVT::i64, + CurDAG->getRegister(IA64::r0, MVT::i64), Tmp2), 0); + Chain = minusB.getValue(1); + } + + SDValue TmpE0, TmpY1, TmpE1, TmpY2; + + SDValue OpsE0[] = { TmpF4, TmpF5, F1, TmpPR }; + TmpE0 = SDValue(CurDAG->getTargetNode(IA64::CFNMAS1, dl, MVT::f64, + OpsE0, 4), 0); + Chain = TmpE0.getValue(1); + SDValue OpsY1[] = { TmpF5, TmpE0, TmpF5, TmpPR }; + TmpY1 = SDValue(CurDAG->getTargetNode(IA64::CFMAS1, dl, MVT::f64, + OpsY1, 4), 0); + Chain = TmpY1.getValue(1); + SDValue OpsE1[] = { TmpE0, TmpE0, F0, TmpPR }; + TmpE1 = SDValue(CurDAG->getTargetNode(IA64::CFMAS1, dl, MVT::f64, + OpsE1, 4), 0); + Chain = TmpE1.getValue(1); + SDValue OpsY2[] = { TmpY1, TmpE1, TmpY1, TmpPR }; + TmpY2 = SDValue(CurDAG->getTargetNode(IA64::CFMAS1, dl, MVT::f64, + OpsY2, 4), 0); + Chain = TmpY2.getValue(1); + + if(isFP) { // if this is an FP divide, we finish up here and exit early + if(isModulus) + assert(0 && "Sorry, try another FORTRAN compiler."); + + SDValue TmpE2, TmpY3, TmpQ0, TmpR0; + + SDValue OpsE2[] = { TmpE1, TmpE1, F0, TmpPR }; + TmpE2 = SDValue(CurDAG->getTargetNode(IA64::CFMAS1, dl, MVT::f64, + OpsE2, 4), 0); + Chain = TmpE2.getValue(1); + SDValue OpsY3[] = { TmpY2, TmpE2, TmpY2, TmpPR }; + TmpY3 = SDValue(CurDAG->getTargetNode(IA64::CFMAS1, dl, MVT::f64, + OpsY3, 4), 0); + Chain = TmpY3.getValue(1); + SDValue OpsQ0[] = { Tmp1, TmpY3, F0, TmpPR }; + TmpQ0 = + SDValue(CurDAG->getTargetNode(IA64::CFMADS1, dl, // double prec! + MVT::f64, OpsQ0, 4), 0); + Chain = TmpQ0.getValue(1); + SDValue OpsR0[] = { Tmp2, TmpQ0, Tmp1, TmpPR }; + TmpR0 = + SDValue(CurDAG->getTargetNode(IA64::CFNMADS1, dl, // double prec! + MVT::f64, OpsR0, 4), 0); + Chain = TmpR0.getValue(1); + +// we want Result to have the same target register as the frcpa, so +// we two-address hack it. See the comment "for this to work..." on +// page 48 of Intel application note #245415 + SDValue Ops[] = { TmpF5, TmpY3, TmpR0, TmpQ0, TmpPR }; + Result = CurDAG->getTargetNode(IA64::TCFMADS0, dl, // d.p. s0 rndg! + MVT::f64, Ops, 5); + Chain = SDValue(Result, 1); + return Result; // XXX: early exit! + } else { // this is *not* an FP divide, so there's a bit left to do: + + SDValue TmpQ2, TmpR2, TmpQ3, TmpQ; + + SDValue OpsQ2[] = { TmpF3, TmpY2, F0, TmpPR }; + TmpQ2 = SDValue(CurDAG->getTargetNode(IA64::CFMAS1, dl, MVT::f64, + OpsQ2, 4), 0); + Chain = TmpQ2.getValue(1); + SDValue OpsR2[] = { TmpF4, TmpQ2, TmpF3, TmpPR }; + TmpR2 = SDValue(CurDAG->getTargetNode(IA64::CFNMAS1, dl, MVT::f64, + OpsR2, 4), 0); + Chain = TmpR2.getValue(1); + +// we want TmpQ3 to have the same target register as the frcpa? maybe we +// should two-address hack it. See the comment "for this to work..." on page +// 48 of Intel application note #245415 + SDValue OpsQ3[] = { TmpF5, TmpR2, TmpY2, TmpQ2, TmpPR }; + TmpQ3 = SDValue(CurDAG->getTargetNode(IA64::TCFMAS1, dl, MVT::f64, + OpsQ3, 5), 0); + Chain = TmpQ3.getValue(1); + + // STORY: without these two-address instructions (TCFMAS1 and TCFMADS0) + // the FPSWA won't be able to help out in the case of large/tiny + // arguments. Other fun bugs may also appear, e.g. 0/x = x, not 0. + + if(isSigned) + TmpQ = SDValue(CurDAG->getTargetNode(IA64::FCVTFXTRUNCS1, dl, + MVT::f64, TmpQ3), 0); + else + TmpQ = SDValue(CurDAG->getTargetNode(IA64::FCVTFXUTRUNCS1, dl, + MVT::f64, TmpQ3), 0); + + Chain = TmpQ.getValue(1); + + if(isModulus) { + SDValue FPminusB = + SDValue(CurDAG->getTargetNode(IA64::SETFSIG, dl, MVT::f64, minusB), + 0); + Chain = FPminusB.getValue(1); + SDValue Remainder = + SDValue(CurDAG->getTargetNode(IA64::XMAL, dl, MVT::f64, + TmpQ, FPminusB, TmpF1), 0); + Chain = Remainder.getValue(1); + Result = CurDAG->getTargetNode(IA64::GETFSIG, dl, MVT::i64, Remainder); + Chain = SDValue(Result, 1); + } else { // just an integer divide + Result = CurDAG->getTargetNode(IA64::GETFSIG, dl, MVT::i64, TmpQ); + Chain = SDValue(Result, 1); + } + + return Result; + } // wasn't an FP divide +} + +// Select - Convert the specified operand from a target-independent to a +// target-specific node if it hasn't already been changed. +SDNode *IA64DAGToDAGISel::Select(SDValue Op) { + SDNode *N = Op.getNode(); + if (N->isMachineOpcode()) + return NULL; // Already selected. + DebugLoc dl = Op.getDebugLoc(); + + switch (N->getOpcode()) { + default: break; + + case IA64ISD::BRCALL: { // XXX: this is also a hack! + SDValue Chain = N->getOperand(0); + SDValue InFlag; // Null incoming flag value. + + if(N->getNumOperands()==3) { // we have an incoming chain, callee and flag + InFlag = N->getOperand(2); + } + + unsigned CallOpcode; + SDValue CallOperand; + + // if we can call directly, do so + if (GlobalAddressSDNode *GASD = + dyn_cast(N->getOperand(1))) { + CallOpcode = IA64::BRCALL_IPREL_GA; + CallOperand = CurDAG->getTargetGlobalAddress(GASD->getGlobal(), MVT::i64); + } else if (isa(N->getOperand(1))) { + // FIXME: we currently NEED this case for correctness, to avoid + // "non-pic code with imm reloc.n against dynamic symbol" errors + CallOpcode = IA64::BRCALL_IPREL_ES; + CallOperand = N->getOperand(1); + } else { + // otherwise we need to load the function descriptor, + // load the branch target (function)'s entry point and GP, + // branch (call) then restore the GP + SDValue FnDescriptor = N->getOperand(1); + + // load the branch target's entry point [mem] and + // GP value [mem+8] + SDValue targetEntryPoint= + SDValue(CurDAG->getTargetNode(IA64::LD8, dl, MVT::i64, MVT::Other, + FnDescriptor, CurDAG->getEntryNode()), 0); + Chain = targetEntryPoint.getValue(1); + SDValue targetGPAddr= + SDValue(CurDAG->getTargetNode(IA64::ADDS, dl, MVT::i64, + FnDescriptor, + CurDAG->getConstant(8, MVT::i64)), 0); + Chain = targetGPAddr.getValue(1); + SDValue targetGP = + SDValue(CurDAG->getTargetNode(IA64::LD8, dl, MVT::i64,MVT::Other, + targetGPAddr, CurDAG->getEntryNode()), 0); + Chain = targetGP.getValue(1); + + Chain = CurDAG->getCopyToReg(Chain, dl, IA64::r1, targetGP, InFlag); + InFlag = Chain.getValue(1); + Chain = CurDAG->getCopyToReg(Chain, dl, IA64::B6, + targetEntryPoint, InFlag); // FLAG these? + InFlag = Chain.getValue(1); + + CallOperand = CurDAG->getRegister(IA64::B6, MVT::i64); + CallOpcode = IA64::BRCALL_INDIRECT; + } + + // Finally, once everything is setup, emit the call itself + if (InFlag.getNode()) + Chain = SDValue(CurDAG->getTargetNode(CallOpcode, dl, MVT::Other, + MVT::Flag, CallOperand, InFlag), 0); + else // there might be no arguments + Chain = SDValue(CurDAG->getTargetNode(CallOpcode, dl, MVT::Other, + MVT::Flag, CallOperand, Chain), 0); + InFlag = Chain.getValue(1); + + std::vector CallResults; + + CallResults.push_back(Chain); + CallResults.push_back(InFlag); + + for (unsigned i = 0, e = CallResults.size(); i != e; ++i) + ReplaceUses(Op.getValue(i), CallResults[i]); + return NULL; + } + + case IA64ISD::GETFD: { + SDValue Input = N->getOperand(0); + return CurDAG->getTargetNode(IA64::GETFD, dl, MVT::i64, Input); + } + + case ISD::FDIV: + case ISD::SDIV: + case ISD::UDIV: + case ISD::SREM: + case ISD::UREM: + return SelectDIV(Op); + + case ISD::TargetConstantFP: { + SDValue Chain = CurDAG->getEntryNode(); // this is a constant, so.. + + SDValue V; + ConstantFPSDNode* N2 = cast(N); + if (N2->getValueAPF().isPosZero()) { + V = CurDAG->getCopyFromReg(Chain, dl, IA64::F0, MVT::f64); + } else if (N2->isExactlyValue(N2->getValueType(0) == MVT::f32 ? + APFloat(+1.0f) : APFloat(+1.0))) { + V = CurDAG->getCopyFromReg(Chain, dl, IA64::F1, MVT::f64); + } else + assert(0 && "Unexpected FP constant!"); + + ReplaceUses(SDValue(N, 0), V); + return 0; + } + + case ISD::FrameIndex: { // TODO: reduce creepyness + int FI = cast(N)->getIndex(); + if (N->hasOneUse()) + return CurDAG->SelectNodeTo(N, IA64::MOV, MVT::i64, + CurDAG->getTargetFrameIndex(FI, MVT::i64)); + else + return CurDAG->getTargetNode(IA64::MOV, dl, MVT::i64, + CurDAG->getTargetFrameIndex(FI, MVT::i64)); + } + + case ISD::ConstantPool: { // TODO: nuke the constant pool + // (ia64 doesn't need one) + ConstantPoolSDNode *CP = cast(N); + Constant *C = CP->getConstVal(); + SDValue CPI = CurDAG->getTargetConstantPool(C, MVT::i64, + CP->getAlignment()); + return CurDAG->getTargetNode(IA64::ADDL_GA, dl, MVT::i64, // ? + CurDAG->getRegister(IA64::r1, MVT::i64), CPI); + } + + case ISD::GlobalAddress: { + GlobalValue *GV = cast(N)->getGlobal(); + SDValue GA = CurDAG->getTargetGlobalAddress(GV, MVT::i64); + SDValue Tmp = + SDValue(CurDAG->getTargetNode(IA64::ADDL_GA, dl, MVT::i64, + CurDAG->getRegister(IA64::r1, + MVT::i64), GA), 0); + return CurDAG->getTargetNode(IA64::LD8, dl, MVT::i64, MVT::Other, Tmp, + CurDAG->getEntryNode()); + } + +/* XXX + case ISD::ExternalSymbol: { + SDValue EA = CurDAG->getTargetExternalSymbol( + cast(N)->getSymbol(), + MVT::i64); + SDValue Tmp = CurDAG->getTargetNode(IA64::ADDL_EA, dl, MVT::i64, + CurDAG->getRegister(IA64::r1, + MVT::i64), + EA); + return CurDAG->getTargetNode(IA64::LD8, dl, MVT::i64, Tmp); + } +*/ + + case ISD::LOAD: { // FIXME: load -1, not 1, for bools? + LoadSDNode *LD = cast(N); + SDValue Chain = LD->getChain(); + SDValue Address = LD->getBasePtr(); + + MVT TypeBeingLoaded = LD->getMemoryVT(); + unsigned Opc; + switch (TypeBeingLoaded.getSimpleVT()) { + default: +#ifndef NDEBUG + N->dump(CurDAG); +#endif + assert(0 && "Cannot load this type!"); + case MVT::i1: { // this is a bool + Opc = IA64::LD1; // first we load a byte, then compare for != 0 + if(N->getValueType(0) == MVT::i1) { // XXX: early exit! + return CurDAG->SelectNodeTo(N, IA64::CMPNE, MVT::i1, MVT::Other, + SDValue(CurDAG->getTargetNode(Opc, dl, + MVT::i64, + Address), 0), + CurDAG->getRegister(IA64::r0, MVT::i64), + Chain); + } + /* otherwise, we want to load a bool into something bigger: LD1 + will do that for us, so we just fall through */ + } + case MVT::i8: Opc = IA64::LD1; break; + case MVT::i16: Opc = IA64::LD2; break; + case MVT::i32: Opc = IA64::LD4; break; + case MVT::i64: Opc = IA64::LD8; break; + + case MVT::f32: Opc = IA64::LDF4; break; + case MVT::f64: Opc = IA64::LDF8; break; + } + + // TODO: comment this + return CurDAG->SelectNodeTo(N, Opc, N->getValueType(0), MVT::Other, + Address, Chain); + } + + case ISD::STORE: { + StoreSDNode *ST = cast(N); + SDValue Address = ST->getBasePtr(); + SDValue Chain = ST->getChain(); + + unsigned Opc; + if (ISD::isNON_TRUNCStore(N)) { + switch (N->getOperand(1).getValueType().getSimpleVT()) { + default: assert(0 && "unknown type in store"); + case MVT::i1: { // this is a bool + Opc = IA64::ST1; // we store either 0 or 1 as a byte + // first load zero! + SDValue Initial = CurDAG->getCopyFromReg(Chain, dl, IA64::r0, MVT::i64); + Chain = Initial.getValue(1); + // then load 1 into the same reg iff the predicate to store is 1 + SDValue Tmp = ST->getValue(); + Tmp = + SDValue(CurDAG->getTargetNode(IA64::TPCADDS, dl, MVT::i64, Initial, + CurDAG->getTargetConstant(1, + MVT::i64), + Tmp), 0); + return CurDAG->SelectNodeTo(N, Opc, MVT::Other, Address, Tmp, Chain); + } + case MVT::i64: Opc = IA64::ST8; break; + case MVT::f64: Opc = IA64::STF8; break; + } + } else { // Truncating store + switch(ST->getMemoryVT().getSimpleVT()) { + default: assert(0 && "unknown type in truncstore"); + case MVT::i8: Opc = IA64::ST1; break; + case MVT::i16: Opc = IA64::ST2; break; + case MVT::i32: Opc = IA64::ST4; break; + case MVT::f32: Opc = IA64::STF4; break; + } + } + + SDValue N1 = N->getOperand(1); + SDValue N2 = N->getOperand(2); + return CurDAG->SelectNodeTo(N, Opc, MVT::Other, N2, N1, Chain); + } + + case ISD::BRCOND: { + SDValue Chain = N->getOperand(0); + SDValue CC = N->getOperand(1); + MachineBasicBlock *Dest = + cast(N->getOperand(2))->getBasicBlock(); + //FIXME - we do NOT need long branches all the time + return CurDAG->SelectNodeTo(N, IA64::BRLCOND_NOTCALL, MVT::Other, CC, + CurDAG->getBasicBlock(Dest), Chain); + } + + case ISD::CALLSEQ_START: + case ISD::CALLSEQ_END: { + int64_t Amt = cast(N->getOperand(1))->getZExtValue(); + unsigned Opc = N->getOpcode() == ISD::CALLSEQ_START ? + IA64::ADJUSTCALLSTACKDOWN : IA64::ADJUSTCALLSTACKUP; + SDValue N0 = N->getOperand(0); + return CurDAG->SelectNodeTo(N, Opc, MVT::Other, getI64Imm(Amt), N0); + } + + case ISD::BR: + // FIXME: we don't need long branches all the time! + SDValue N0 = N->getOperand(0); + return CurDAG->SelectNodeTo(N, IA64::BRL_NOTCALL, MVT::Other, + N->getOperand(1), N0); + } + + return SelectCode(Op); +} + + +/// createIA64DAGToDAGInstructionSelector - This pass converts a legalized DAG +/// into an IA64-specific DAG, ready for instruction scheduling. +/// +FunctionPass +*llvm::createIA64DAGToDAGInstructionSelector(IA64TargetMachine &TM) { + return new IA64DAGToDAGISel(TM); +} + diff --git a/lib/Target/IA64/IA64ISelLowering.cpp b/lib/Target/IA64/IA64ISelLowering.cpp new file mode 100644 index 000000000000..34a0686564c0 --- /dev/null +++ b/lib/Target/IA64/IA64ISelLowering.cpp @@ -0,0 +1,622 @@ +//===-- IA64ISelLowering.cpp - IA64 DAG Lowering Implementation -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the IA64ISelLowering class. +// +//===----------------------------------------------------------------------===// + +#include "IA64ISelLowering.h" +#include "IA64MachineFunctionInfo.h" +#include "IA64TargetMachine.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Constants.h" +#include "llvm/Function.h" +using namespace llvm; + +IA64TargetLowering::IA64TargetLowering(TargetMachine &TM) + : TargetLowering(TM) { + + // register class for general registers + addRegisterClass(MVT::i64, IA64::GRRegisterClass); + + // register class for FP registers + addRegisterClass(MVT::f64, IA64::FPRegisterClass); + + // register class for predicate registers + addRegisterClass(MVT::i1, IA64::PRRegisterClass); + + setLoadExtAction(ISD::EXTLOAD , MVT::i1 , Promote); + + setLoadExtAction(ISD::ZEXTLOAD , MVT::i1 , Promote); + + setLoadExtAction(ISD::SEXTLOAD , MVT::i1 , Promote); + setLoadExtAction(ISD::SEXTLOAD , MVT::i8 , Expand); + setLoadExtAction(ISD::SEXTLOAD , MVT::i16 , Expand); + setLoadExtAction(ISD::SEXTLOAD , MVT::i32 , Expand); + + setOperationAction(ISD::BRIND , MVT::Other, Expand); + setOperationAction(ISD::BR_JT , MVT::Other, Expand); + setOperationAction(ISD::BR_CC , MVT::Other, Expand); + setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand); + + // ia64 uses SELECT not SELECT_CC + setOperationAction(ISD::SELECT_CC , MVT::Other, Expand); + + // We need to handle ISD::RET for void functions ourselves, + // so we get a chance to restore ar.pfs before adding a + // br.ret insn + setOperationAction(ISD::RET, MVT::Other, Custom); + + setShiftAmountType(MVT::i64); + + setOperationAction(ISD::FREM , MVT::f32 , Expand); + setOperationAction(ISD::FREM , MVT::f64 , Expand); + + setOperationAction(ISD::UREM , MVT::f32 , Expand); + setOperationAction(ISD::UREM , MVT::f64 , Expand); + + setOperationAction(ISD::MEMBARRIER , MVT::Other, Expand); + + setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote); + setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote); + + // We don't support sin/cos/sqrt/pow + setOperationAction(ISD::FSIN , MVT::f64, Expand); + setOperationAction(ISD::FCOS , MVT::f64, Expand); + setOperationAction(ISD::FSQRT, MVT::f64, Expand); + setOperationAction(ISD::FPOW , MVT::f64, Expand); + setOperationAction(ISD::FSIN , MVT::f32, Expand); + setOperationAction(ISD::FCOS , MVT::f32, Expand); + setOperationAction(ISD::FSQRT, MVT::f32, Expand); + setOperationAction(ISD::FPOW , MVT::f32, Expand); + + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand); + + // FIXME: IA64 supports fcopysign natively! + setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); + setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); + + // We don't have line number support yet. + setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand); + setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand); + setOperationAction(ISD::DBG_LABEL, MVT::Other, Expand); + setOperationAction(ISD::EH_LABEL, MVT::Other, Expand); + + // IA64 has ctlz in the form of the 'fnorm' instruction. The Legalizer + // expansion for ctlz/cttz in terms of ctpop is much larger, but lower + // latency. + // FIXME: Custom lower CTLZ when compiling for size? + setOperationAction(ISD::CTLZ , MVT::i64 , Expand); + setOperationAction(ISD::CTTZ , MVT::i64 , Expand); + setOperationAction(ISD::ROTL , MVT::i64 , Expand); + setOperationAction(ISD::ROTR , MVT::i64 , Expand); + + // FIXME: IA64 has this, but is not implemented. should be mux @rev + setOperationAction(ISD::BSWAP, MVT::i64 , Expand); + + // VASTART needs to be custom lowered to use the VarArgsFrameIndex + setOperationAction(ISD::VAARG , MVT::Other, Custom); + setOperationAction(ISD::VASTART , MVT::Other, Custom); + + // Use the default implementation. + setOperationAction(ISD::VACOPY , MVT::Other, Expand); + setOperationAction(ISD::VAEND , MVT::Other, Expand); + setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); + setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand); + + // Thread Local Storage + setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom); + + setStackPointerRegisterToSaveRestore(IA64::r12); + + setJumpBufSize(704); // on ia64-linux, jmp_bufs are 704 bytes.. + setJumpBufAlignment(16); // ...and must be 16-byte aligned + + computeRegisterProperties(); + + addLegalFPImmediate(APFloat(+0.0)); + addLegalFPImmediate(APFloat(-0.0)); + addLegalFPImmediate(APFloat(+1.0)); + addLegalFPImmediate(APFloat(-1.0)); +} + +const char *IA64TargetLowering::getTargetNodeName(unsigned Opcode) const { + switch (Opcode) { + default: return 0; + case IA64ISD::GETFD: return "IA64ISD::GETFD"; + case IA64ISD::BRCALL: return "IA64ISD::BRCALL"; + case IA64ISD::RET_FLAG: return "IA64ISD::RET_FLAG"; + } +} + +MVT IA64TargetLowering::getSetCCResultType(MVT VT) const { + return MVT::i1; +} + +void IA64TargetLowering::LowerArguments(Function &F, SelectionDAG &DAG, + SmallVectorImpl &ArgValues, + DebugLoc dl) { + // + // add beautiful description of IA64 stack frame format + // here (from intel 24535803.pdf most likely) + // + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + + GP = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i64)); + SP = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i64)); + RP = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i64)); + + MachineBasicBlock& BB = MF.front(); + + unsigned args_int[] = {IA64::r32, IA64::r33, IA64::r34, IA64::r35, + IA64::r36, IA64::r37, IA64::r38, IA64::r39}; + + unsigned args_FP[] = {IA64::F8, IA64::F9, IA64::F10, IA64::F11, + IA64::F12,IA64::F13,IA64::F14, IA64::F15}; + + unsigned argVreg[8]; + unsigned argPreg[8]; + unsigned argOpc[8]; + + unsigned used_FPArgs = 0; // how many FP args have been used so far? + + unsigned ArgOffset = 0; + int count = 0; + + for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) + { + SDValue newroot, argt; + if(count < 8) { // need to fix this logic? maybe. + + switch (getValueType(I->getType()).getSimpleVT()) { + default: + assert(0 && "ERROR in LowerArgs: can't lower this type of arg.\n"); + case MVT::f32: + // fixme? (well, will need to for weird FP structy stuff, + // see intel ABI docs) + case MVT::f64: +//XXX BuildMI(&BB, IA64::IDEF, 0, args_FP[used_FPArgs]); + MF.getRegInfo().addLiveIn(args_FP[used_FPArgs]); + // mark this reg as liveIn + // floating point args go into f8..f15 as-needed, the increment + argVreg[count] = // is below..: + MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::f64)); + // FP args go into f8..f15 as needed: (hence the ++) + argPreg[count] = args_FP[used_FPArgs++]; + argOpc[count] = IA64::FMOV; + argt = newroot = DAG.getCopyFromReg(DAG.getRoot(), dl, + argVreg[count], MVT::f64); + if (I->getType() == Type::FloatTy) + argt = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, argt, + DAG.getIntPtrConstant(0)); + break; + case MVT::i1: // NOTE: as far as C abi stuff goes, + // bools are just boring old ints + case MVT::i8: + case MVT::i16: + case MVT::i32: + case MVT::i64: +//XXX BuildMI(&BB, IA64::IDEF, 0, args_int[count]); + MF.getRegInfo().addLiveIn(args_int[count]); + // mark this register as liveIn + argVreg[count] = + MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i64)); + argPreg[count] = args_int[count]; + argOpc[count] = IA64::MOV; + argt = newroot = + DAG.getCopyFromReg(DAG.getRoot(), dl, argVreg[count], MVT::i64); + if ( getValueType(I->getType()) != MVT::i64) + argt = DAG.getNode(ISD::TRUNCATE, dl, getValueType(I->getType()), + newroot); + break; + } + } else { // more than 8 args go into the frame + // Create the frame index object for this incoming parameter... + ArgOffset = 16 + 8 * (count - 8); + int FI = MFI->CreateFixedObject(8, ArgOffset); + + // Create the SelectionDAG nodes corresponding to a load + //from this parameter + SDValue FIN = DAG.getFrameIndex(FI, MVT::i64); + argt = newroot = DAG.getLoad(getValueType(I->getType()), dl, + DAG.getEntryNode(), FIN, NULL, 0); + } + ++count; + DAG.setRoot(newroot.getValue(1)); + ArgValues.push_back(argt); + } + + + // Create a vreg to hold the output of (what will become) + // the "alloc" instruction + VirtGPR = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i64)); + BuildMI(&BB, dl, TII->get(IA64::PSEUDO_ALLOC), VirtGPR); + // we create a PSEUDO_ALLOC (pseudo)instruction for now +/* + BuildMI(&BB, IA64::IDEF, 0, IA64::r1); + + // hmm: + BuildMI(&BB, IA64::IDEF, 0, IA64::r12); + BuildMI(&BB, IA64::IDEF, 0, IA64::rp); + // ..hmm. + + BuildMI(&BB, IA64::MOV, 1, GP).addReg(IA64::r1); + + // hmm: + BuildMI(&BB, IA64::MOV, 1, SP).addReg(IA64::r12); + BuildMI(&BB, IA64::MOV, 1, RP).addReg(IA64::rp); + // ..hmm. +*/ + + unsigned tempOffset=0; + + // if this is a varargs function, we simply lower llvm.va_start by + // pointing to the first entry + if(F.isVarArg()) { + tempOffset=0; + VarArgsFrameIndex = MFI->CreateFixedObject(8, tempOffset); + } + + // here we actually do the moving of args, and store them to the stack + // too if this is a varargs function: + for (int i = 0; i < count && i < 8; ++i) { + BuildMI(&BB, dl, TII->get(argOpc[i]), argVreg[i]).addReg(argPreg[i]); + if(F.isVarArg()) { + // if this is a varargs function, we copy the input registers to the stack + int FI = MFI->CreateFixedObject(8, tempOffset); + tempOffset+=8; //XXX: is it safe to use r22 like this? + BuildMI(&BB, dl, TII->get(IA64::MOV), IA64::r22).addFrameIndex(FI); + // FIXME: we should use st8.spill here, one day + BuildMI(&BB, dl, TII->get(IA64::ST8), IA64::r22).addReg(argPreg[i]); + } + } + + // Finally, inform the code generator which regs we return values in. + // (see the ISD::RET: case in the instruction selector) + switch (getValueType(F.getReturnType()).getSimpleVT()) { + default: assert(0 && "i have no idea where to return this type!"); + case MVT::isVoid: break; + case MVT::i1: + case MVT::i8: + case MVT::i16: + case MVT::i32: + case MVT::i64: + MF.getRegInfo().addLiveOut(IA64::r8); + break; + case MVT::f32: + case MVT::f64: + MF.getRegInfo().addLiveOut(IA64::F8); + break; + } +} + +std::pair +IA64TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy, + bool RetSExt, bool RetZExt, bool isVarArg, + bool isInreg, unsigned CallingConv, + bool isTailCall, SDValue Callee, + ArgListTy &Args, SelectionDAG &DAG, + DebugLoc dl) { + + MachineFunction &MF = DAG.getMachineFunction(); + + unsigned NumBytes = 16; + unsigned outRegsUsed = 0; + + if (Args.size() > 8) { + NumBytes += (Args.size() - 8) * 8; + outRegsUsed = 8; + } else { + outRegsUsed = Args.size(); + } + + // FIXME? this WILL fail if we ever try to pass around an arg that + // consumes more than a single output slot (a 'real' double, int128 + // some sort of aggregate etc.), as we'll underestimate how many 'outX' + // registers we use. Hopefully, the assembler will notice. + MF.getInfo()->outRegsUsed= + std::max(outRegsUsed, MF.getInfo()->outRegsUsed); + + // keep stack frame 16-byte aligned + // assert(NumBytes==((NumBytes+15) & ~15) && + // "stack frame not 16-byte aligned!"); + NumBytes = (NumBytes+15) & ~15; + + Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); + + SDValue StackPtr; + std::vector Stores; + std::vector Converts; + std::vector RegValuesToPass; + unsigned ArgOffset = 16; + + for (unsigned i = 0, e = Args.size(); i != e; ++i) + { + SDValue Val = Args[i].Node; + MVT ObjectVT = Val.getValueType(); + SDValue ValToStore(0, 0), ValToConvert(0, 0); + unsigned ObjSize=8; + switch (ObjectVT.getSimpleVT()) { + default: assert(0 && "unexpected argument type!"); + case MVT::i1: + case MVT::i8: + case MVT::i16: + case MVT::i32: { + //promote to 64-bits, sign/zero extending based on type + //of the argument + ISD::NodeType ExtendKind = ISD::ANY_EXTEND; + if (Args[i].isSExt) + ExtendKind = ISD::SIGN_EXTEND; + else if (Args[i].isZExt) + ExtendKind = ISD::ZERO_EXTEND; + Val = DAG.getNode(ExtendKind, dl, MVT::i64, Val); + // XXX: fall through + } + case MVT::i64: + //ObjSize = 8; + if(RegValuesToPass.size() >= 8) { + ValToStore = Val; + } else { + RegValuesToPass.push_back(Val); + } + break; + case MVT::f32: + //promote to 64-bits + Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val); + // XXX: fall through + case MVT::f64: + if(RegValuesToPass.size() >= 8) { + ValToStore = Val; + } else { + RegValuesToPass.push_back(Val); + if(1 /* TODO: if(calling external or varadic function)*/ ) { + ValToConvert = Val; // additionally pass this FP value as an int + } + } + break; + } + + if(ValToStore.getNode()) { + if(!StackPtr.getNode()) { + StackPtr = DAG.getRegister(IA64::r12, MVT::i64); + } + SDValue PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); + PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, PtrOff); + Stores.push_back(DAG.getStore(Chain, dl, ValToStore, PtrOff, NULL, 0)); + ArgOffset += ObjSize; + } + + if(ValToConvert.getNode()) { + Converts.push_back(DAG.getNode(IA64ISD::GETFD, dl, + MVT::i64, ValToConvert)); + } + } + + // Emit all stores, make sure they occur before any copies into physregs. + if (!Stores.empty()) + Chain = DAG.getNode(ISD::TokenFactor, dl, + MVT::Other, &Stores[0],Stores.size()); + + static const unsigned IntArgRegs[] = { + IA64::out0, IA64::out1, IA64::out2, IA64::out3, + IA64::out4, IA64::out5, IA64::out6, IA64::out7 + }; + + static const unsigned FPArgRegs[] = { + IA64::F8, IA64::F9, IA64::F10, IA64::F11, + IA64::F12, IA64::F13, IA64::F14, IA64::F15 + }; + + SDValue InFlag; + + // save the current GP, SP and RP : FIXME: do we need to do all 3 always? + SDValue GPBeforeCall = DAG.getCopyFromReg(Chain, dl, IA64::r1, + MVT::i64, InFlag); + Chain = GPBeforeCall.getValue(1); + InFlag = Chain.getValue(2); + SDValue SPBeforeCall = DAG.getCopyFromReg(Chain, dl, IA64::r12, + MVT::i64, InFlag); + Chain = SPBeforeCall.getValue(1); + InFlag = Chain.getValue(2); + SDValue RPBeforeCall = DAG.getCopyFromReg(Chain, dl, IA64::rp, + MVT::i64, InFlag); + Chain = RPBeforeCall.getValue(1); + InFlag = Chain.getValue(2); + + // Build a sequence of copy-to-reg nodes chained together with token chain + // and flag operands which copy the outgoing integer args into regs out[0-7] + // mapped 1:1 and the FP args into regs F8-F15 "lazily" + // TODO: for performance, we should only copy FP args into int regs when we + // know this is required (i.e. for varardic or external (unknown) functions) + + // first to the FP->(integer representation) conversions, these are + // flagged for now, but shouldn't have to be (TODO) + unsigned seenConverts = 0; + for (unsigned i = 0, e = RegValuesToPass.size(); i != e; ++i) { + if(RegValuesToPass[i].getValueType().isFloatingPoint()) { + Chain = DAG.getCopyToReg(Chain, dl, IntArgRegs[i], + Converts[seenConverts++], InFlag); + InFlag = Chain.getValue(1); + } + } + + // next copy args into the usual places, these are flagged + unsigned usedFPArgs = 0; + for (unsigned i = 0, e = RegValuesToPass.size(); i != e; ++i) { + Chain = DAG.getCopyToReg(Chain, dl, + RegValuesToPass[i].getValueType().isInteger() ? + IntArgRegs[i] : FPArgRegs[usedFPArgs++], RegValuesToPass[i], InFlag); + InFlag = Chain.getValue(1); + } + + // If the callee is a GlobalAddress node (quite common, every direct call is) + // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. +/* + if (GlobalAddressSDNode *G = dyn_cast(Callee)) { + Callee = DAG.getTargetGlobalAddress(G->getGlobal(), MVT::i64); + } +*/ + + std::vector NodeTys; + std::vector CallOperands; + NodeTys.push_back(MVT::Other); // Returns a chain + NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. + CallOperands.push_back(Chain); + CallOperands.push_back(Callee); + + // emit the call itself + if (InFlag.getNode()) + CallOperands.push_back(InFlag); + else + assert(0 && "this should never happen!\n"); + + // to make way for a hack: + Chain = DAG.getNode(IA64ISD::BRCALL, dl, NodeTys, + &CallOperands[0], CallOperands.size()); + InFlag = Chain.getValue(1); + + // restore the GP, SP and RP after the call + Chain = DAG.getCopyToReg(Chain, dl, IA64::r1, GPBeforeCall, InFlag); + InFlag = Chain.getValue(1); + Chain = DAG.getCopyToReg(Chain, dl, IA64::r12, SPBeforeCall, InFlag); + InFlag = Chain.getValue(1); + Chain = DAG.getCopyToReg(Chain, dl, IA64::rp, RPBeforeCall, InFlag); + InFlag = Chain.getValue(1); + + std::vector RetVals; + RetVals.push_back(MVT::Other); + RetVals.push_back(MVT::Flag); + + MVT RetTyVT = getValueType(RetTy); + SDValue RetVal; + if (RetTyVT != MVT::isVoid) { + switch (RetTyVT.getSimpleVT()) { + default: assert(0 && "Unknown value type to return!"); + case MVT::i1: { // bools are just like other integers (returned in r8) + // we *could* fall through to the truncate below, but this saves a + // few redundant predicate ops + SDValue boolInR8 = DAG.getCopyFromReg(Chain, dl, IA64::r8, + MVT::i64,InFlag); + InFlag = boolInR8.getValue(2); + Chain = boolInR8.getValue(1); + SDValue zeroReg = DAG.getCopyFromReg(Chain, dl, IA64::r0, + MVT::i64, InFlag); + InFlag = zeroReg.getValue(2); + Chain = zeroReg.getValue(1); + + RetVal = DAG.getSetCC(dl, MVT::i1, boolInR8, zeroReg, ISD::SETNE); + break; + } + case MVT::i8: + case MVT::i16: + case MVT::i32: + RetVal = DAG.getCopyFromReg(Chain, dl, IA64::r8, MVT::i64, InFlag); + Chain = RetVal.getValue(1); + + // keep track of whether it is sign or zero extended (todo: bools?) +/* XXX + RetVal = DAG.getNode(RetTy->isSigned() ? ISD::AssertSext :ISD::AssertZext, + dl, MVT::i64, RetVal, DAG.getValueType(RetTyVT)); +*/ + RetVal = DAG.getNode(ISD::TRUNCATE, dl, RetTyVT, RetVal); + break; + case MVT::i64: + RetVal = DAG.getCopyFromReg(Chain, dl, IA64::r8, MVT::i64, InFlag); + Chain = RetVal.getValue(1); + InFlag = RetVal.getValue(2); // XXX dead + break; + case MVT::f32: + RetVal = DAG.getCopyFromReg(Chain, dl, IA64::F8, MVT::f64, InFlag); + Chain = RetVal.getValue(1); + RetVal = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, RetVal, + DAG.getIntPtrConstant(0)); + break; + case MVT::f64: + RetVal = DAG.getCopyFromReg(Chain, dl, IA64::F8, MVT::f64, InFlag); + Chain = RetVal.getValue(1); + InFlag = RetVal.getValue(2); // XXX dead + break; + } + } + + Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), + DAG.getIntPtrConstant(0, true), SDValue()); + return std::make_pair(RetVal, Chain); +} + +SDValue IA64TargetLowering:: +LowerOperation(SDValue Op, SelectionDAG &DAG) { + DebugLoc dl = Op.getDebugLoc(); + switch (Op.getOpcode()) { + default: assert(0 && "Should not custom lower this!"); + case ISD::GlobalTLSAddress: + assert(0 && "TLS not implemented for IA64."); + case ISD::RET: { + SDValue AR_PFSVal, Copy; + + switch(Op.getNumOperands()) { + default: + assert(0 && "Do not know how to return this many arguments!"); + abort(); + case 1: + AR_PFSVal = DAG.getCopyFromReg(Op.getOperand(0), dl, VirtGPR, MVT::i64); + AR_PFSVal = DAG.getCopyToReg(AR_PFSVal.getValue(1), dl, IA64::AR_PFS, + AR_PFSVal); + return DAG.getNode(IA64ISD::RET_FLAG, dl, MVT::Other, AR_PFSVal); + case 3: { + // Copy the result into the output register & restore ar.pfs + MVT ArgVT = Op.getOperand(1).getValueType(); + unsigned ArgReg = ArgVT.isInteger() ? IA64::r8 : IA64::F8; + + AR_PFSVal = DAG.getCopyFromReg(Op.getOperand(0), dl, VirtGPR, MVT::i64); + Copy = DAG.getCopyToReg(AR_PFSVal.getValue(1), dl, ArgReg, + Op.getOperand(1), SDValue()); + AR_PFSVal = DAG.getCopyToReg(Copy.getValue(0), dl, + IA64::AR_PFS, AR_PFSVal, Copy.getValue(1)); + return DAG.getNode(IA64ISD::RET_FLAG, dl, MVT::Other, + AR_PFSVal, AR_PFSVal.getValue(1)); + } + } + return SDValue(); + } + case ISD::VAARG: { + MVT VT = getPointerTy(); + const Value *SV = cast(Op.getOperand(2))->getValue(); + SDValue VAList = DAG.getLoad(VT, dl, Op.getOperand(0), Op.getOperand(1), + SV, 0); + // Increment the pointer, VAList, to the next vaarg + SDValue VAIncr = DAG.getNode(ISD::ADD, dl, VT, VAList, + DAG.getConstant(VT.getSizeInBits()/8, + VT)); + // Store the incremented VAList to the legalized pointer + VAIncr = DAG.getStore(VAList.getValue(1), dl, VAIncr, + Op.getOperand(1), SV, 0); + // Load the actual argument out of the pointer VAList + return DAG.getLoad(Op.getValueType(), dl, VAIncr, VAList, NULL, 0); + } + case ISD::VASTART: { + // vastart just stores the address of the VarArgsFrameIndex slot into the + // memory location argument. + SDValue FR = DAG.getFrameIndex(VarArgsFrameIndex, MVT::i64); + const Value *SV = cast(Op.getOperand(2))->getValue(); + return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0); + } + // Frame & Return address. Currently unimplemented + case ISD::RETURNADDR: break; + case ISD::FRAMEADDR: break; + } + return SDValue(); +} diff --git a/lib/Target/IA64/IA64ISelLowering.h b/lib/Target/IA64/IA64ISelLowering.h new file mode 100644 index 000000000000..edf7eb895ad2 --- /dev/null +++ b/lib/Target/IA64/IA64ISelLowering.h @@ -0,0 +1,76 @@ +//===-- IA64ISelLowering.h - IA64 DAG Lowering Interface --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interfaces that IA64 uses to lower LLVM code into a +// selection DAG. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_IA64_IA64ISELLOWERING_H +#define LLVM_TARGET_IA64_IA64ISELLOWERING_H + +#include "llvm/Target/TargetLowering.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "IA64.h" + +namespace llvm { + namespace IA64ISD { + enum NodeType { + // Start the numbering where the builting ops and target ops leave off. + FIRST_NUMBER = ISD::BUILTIN_OP_END, + + /// GETFD - the getf.d instruction takes a floating point operand and + /// returns its 64-bit memory representation as an i64 + GETFD, + + // TODO: explain this hack + BRCALL, + + // RET_FLAG - Return with a flag operand + RET_FLAG + }; + } + + class IA64TargetLowering : public TargetLowering { + int VarArgsFrameIndex; // FrameIndex for start of varargs area. + //int ReturnAddrIndex; // FrameIndex for return slot. + unsigned GP, SP, RP; // FIXME - clean this mess up + public: + explicit IA64TargetLowering(TargetMachine &TM); + + unsigned VirtGPR; // this is public so it can be accessed in the selector + // for ISD::RET. add an accessor instead? FIXME + const char *getTargetNodeName(unsigned Opcode) const; + + /// getSetCCResultType: return ISD::SETCC's result type. + virtual MVT getSetCCResultType(MVT VT) const; + + /// LowerArguments - This hook must be implemented to indicate how we should + /// lower the arguments for the specified function, into the specified DAG. + virtual void LowerArguments(Function &F, SelectionDAG &DAG, + SmallVectorImpl &ArgValues, + DebugLoc dl); + + /// LowerCallTo - This hook lowers an abstract call to a function into an + /// actual call. + virtual std::pair + LowerCallTo(SDValue Chain, const Type *RetTy, + bool RetSExt, bool RetZExt, bool isVarArg, bool isInreg, + unsigned CC, bool isTailCall, + SDValue Callee, ArgListTy &Args, SelectionDAG &DAG, + DebugLoc dl); + + /// LowerOperation - for custom lowering specific ops + /// (currently, only "ret void") + virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG); + + }; +} + +#endif // LLVM_TARGET_IA64_IA64ISELLOWERING_H diff --git a/lib/Target/IA64/IA64InstrBuilder.h b/lib/Target/IA64/IA64InstrBuilder.h new file mode 100644 index 000000000000..a5d4dca530fb --- /dev/null +++ b/lib/Target/IA64/IA64InstrBuilder.h @@ -0,0 +1,40 @@ +//===-- IA64PCInstrBuilder.h - Aids for building IA64 insts -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file exposes functions that may be used with BuildMI from the +// MachineInstrBuilder.h file to simplify generating frame and constant pool +// references. +// +//===----------------------------------------------------------------------===// + +#ifndef IA64_INSTRBUILDER_H +#define IA64_INSTRBUILDER_H + +#include "llvm/CodeGen/MachineInstrBuilder.h" + +namespace llvm { + +/// addFrameReference - This function is used to add a reference to the base of +/// an abstract object on the stack frame of the current function. This +/// reference has base register as the FrameIndex offset until it is resolved. +/// This allows a constant offset to be specified as well... +/// +inline const MachineInstrBuilder& +addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0, + bool mem = true) { + if (mem) + return MIB.addImm(Offset).addFrameIndex(FI); + else + return MIB.addFrameIndex(FI).addImm(Offset); +} + +} // End llvm namespace + +#endif + diff --git a/lib/Target/IA64/IA64InstrFormats.td b/lib/Target/IA64/IA64InstrFormats.td new file mode 100644 index 000000000000..c465880d3e1a --- /dev/null +++ b/lib/Target/IA64/IA64InstrFormats.td @@ -0,0 +1,80 @@ +//===- IA64InstrFormats.td - IA64 Instruction Formats --*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// - Warning: the stuff in here isn't really being used, so is mostly +// junk. It'll get fixed as the JIT gets built. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Instruction format superclass +//===----------------------------------------------------------------------===// + +class InstIA64 op, dag OOL, dag IOL, string asmstr> : Instruction { + // IA64 instruction baseline + field bits<41> Inst; + let Namespace = "IA64"; + let OutOperandList = OOL; + let InOperandList = IOL; + let AsmString = asmstr; + + let Inst{40-37} = op; +} + +//"Each Itanium instruction is categorized into one of six types." +//We should have: +// A, I, M, F, B, L+X + +class AForm opcode, bits<6> qpReg, dag OOL, dag IOL, string asmstr> : + InstIA64 { + + let Inst{5-0} = qpReg; +} + +class AForm_DAG opcode, bits<6> qpReg, dag OOL, dag IOL, string asmstr, + list pattern> : + InstIA64 { + + let Pattern = pattern; + let Inst{5-0} = qpReg; +} + +let isBranch = 1, isTerminator = 1 in +class BForm opcode, bits<6> x6, bits<3> btype, dag OOL, dag IOL, string asmstr> : + InstIA64 { + + let Inst{32-27} = x6; + let Inst{8-6} = btype; +} + +class MForm opcode, bits<6> x6, dag OOL, dag IOL, string asmstr> : + InstIA64 { + bits<7> Ra; + bits<7> Rb; + bits<16> disp; + + let Inst{35-30} = x6; +// let Inst{20-16} = Rb; + let Inst{15-0} = disp; +} + +class RawForm opcode, bits<26> rest, dag OOL, dag IOL, string asmstr> : + InstIA64 { + let Inst{25-0} = rest; +} + +// Pseudo instructions. +class PseudoInstIA64 : InstIA64<0, OOL, IOL, nm> { +} + +class PseudoInstIA64_DAG pattern> + : InstIA64<0, OOL, IOL, nm> { + let Pattern = pattern; +} + diff --git a/lib/Target/IA64/IA64InstrInfo.cpp b/lib/Target/IA64/IA64InstrInfo.cpp new file mode 100644 index 000000000000..5f89d4f13994 --- /dev/null +++ b/lib/Target/IA64/IA64InstrInfo.cpp @@ -0,0 +1,193 @@ +//===- IA64InstrInfo.cpp - IA64 Instruction Information -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the IA64 implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#include "IA64InstrInfo.h" +#include "IA64.h" +#include "IA64InstrBuilder.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/ADT/SmallVector.h" +#include "IA64GenInstrInfo.inc" +using namespace llvm; + +IA64InstrInfo::IA64InstrInfo() + : TargetInstrInfoImpl(IA64Insts, sizeof(IA64Insts)/sizeof(IA64Insts[0])), + RI(*this) { +} + + +bool IA64InstrInfo::isMoveInstr(const MachineInstr& MI, + unsigned& sourceReg, + unsigned& destReg, + unsigned& SrcSR, unsigned& DstSR) const { + SrcSR = DstSR = 0; // No sub-registers. + + unsigned oc = MI.getOpcode(); + if (oc == IA64::MOV || oc == IA64::FMOV) { + // TODO: this doesn't detect predicate moves + assert(MI.getNumOperands() >= 2 && + /* MI.getOperand(0).isReg() && + MI.getOperand(1).isReg() && */ + "invalid register-register move instruction"); + if (MI.getOperand(0).isReg() && + MI.getOperand(1).isReg()) { + // if both operands of the MOV/FMOV are registers, then + // yes, this is a move instruction + sourceReg = MI.getOperand(1).getReg(); + destReg = MI.getOperand(0).getReg(); + return true; + } + } + return false; // we don't consider e.g. %regN = MOV a + // move instruction +} + +unsigned +IA64InstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + const SmallVectorImpl &Cond)const { + // FIXME this should probably have a DebugLoc argument + DebugLoc dl = DebugLoc::getUnknownLoc(); + // Can only insert uncond branches so far. + assert(Cond.empty() && !FBB && TBB && "Can only handle uncond branches!"); + BuildMI(&MBB, dl, get(IA64::BRL_NOTCALL)).addMBB(TBB); + return 1; +} + +bool IA64InstrInfo::copyRegToReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned DestReg, unsigned SrcReg, + const TargetRegisterClass *DestRC, + const TargetRegisterClass *SrcRC) const { + if (DestRC != SrcRC) { + // Not yet supported! + return false; + } + + DebugLoc DL = DebugLoc::getUnknownLoc(); + if (MI != MBB.end()) DL = MI->getDebugLoc(); + + if(DestRC == IA64::PRRegisterClass ) // if a bool, we use pseudocode + // (SrcReg) DestReg = cmp.eq.unc(r0, r0) + BuildMI(MBB, MI, DL, get(IA64::PCMPEQUNC), DestReg) + .addReg(IA64::r0).addReg(IA64::r0).addReg(SrcReg); + else // otherwise, MOV works (for both gen. regs and FP regs) + BuildMI(MBB, MI, DL, get(IA64::MOV), DestReg).addReg(SrcReg); + + return true; +} + +void IA64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned SrcReg, bool isKill, + int FrameIdx, + const TargetRegisterClass *RC) const{ + DebugLoc DL = DebugLoc::getUnknownLoc(); + if (MI != MBB.end()) DL = MI->getDebugLoc(); + + if (RC == IA64::FPRegisterClass) { + BuildMI(MBB, MI, DL, get(IA64::STF_SPILL)).addFrameIndex(FrameIdx) + .addReg(SrcReg, getKillRegState(isKill)); + } else if (RC == IA64::GRRegisterClass) { + BuildMI(MBB, MI, DL, get(IA64::ST8)).addFrameIndex(FrameIdx) + .addReg(SrcReg, getKillRegState(isKill)); + } else if (RC == IA64::PRRegisterClass) { + /* we use IA64::r2 as a temporary register for doing this hackery. */ + // first we load 0: + BuildMI(MBB, MI, DL, get(IA64::MOV), IA64::r2).addReg(IA64::r0); + // then conditionally add 1: + BuildMI(MBB, MI, DL, get(IA64::CADDIMM22), IA64::r2).addReg(IA64::r2) + .addImm(1).addReg(SrcReg, getKillRegState(isKill)); + // and then store it to the stack + BuildMI(MBB, MI, DL, get(IA64::ST8)) + .addFrameIndex(FrameIdx) + .addReg(IA64::r2); + } else assert(0 && + "sorry, I don't know how to store this sort of reg in the stack\n"); +} + +void IA64InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg, + bool isKill, + SmallVectorImpl &Addr, + const TargetRegisterClass *RC, + SmallVectorImpl &NewMIs) const { + unsigned Opc = 0; + if (RC == IA64::FPRegisterClass) { + Opc = IA64::STF8; + } else if (RC == IA64::GRRegisterClass) { + Opc = IA64::ST8; + } else if (RC == IA64::PRRegisterClass) { + Opc = IA64::ST1; + } else { + assert(0 && + "sorry, I don't know how to store this sort of reg\n"); + } + + DebugLoc DL = DebugLoc::getUnknownLoc(); + MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc)); + for (unsigned i = 0, e = Addr.size(); i != e; ++i) + MIB.addOperand(Addr[i]); + MIB.addReg(SrcReg, getKillRegState(isKill)); + NewMIs.push_back(MIB); + return; + +} + +void IA64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned DestReg, int FrameIdx, + const TargetRegisterClass *RC)const{ + DebugLoc DL = DebugLoc::getUnknownLoc(); + if (MI != MBB.end()) DL = MI->getDebugLoc(); + + if (RC == IA64::FPRegisterClass) { + BuildMI(MBB, MI, DL, get(IA64::LDF_FILL), DestReg).addFrameIndex(FrameIdx); + } else if (RC == IA64::GRRegisterClass) { + BuildMI(MBB, MI, DL, get(IA64::LD8), DestReg).addFrameIndex(FrameIdx); + } else if (RC == IA64::PRRegisterClass) { + // first we load a byte from the stack into r2, our 'predicate hackery' + // scratch reg + BuildMI(MBB, MI, DL, get(IA64::LD8), IA64::r2).addFrameIndex(FrameIdx); + // then we compare it to zero. If it _is_ zero, compare-not-equal to + // r0 gives us 0, which is what we want, so that's nice. + BuildMI(MBB, MI, DL, get(IA64::CMPNE), DestReg) + .addReg(IA64::r2) + .addReg(IA64::r0); + } else { + assert(0 && + "sorry, I don't know how to load this sort of reg from the stack\n"); + } +} + +void IA64InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg, + SmallVectorImpl &Addr, + const TargetRegisterClass *RC, + SmallVectorImpl &NewMIs) const { + unsigned Opc = 0; + if (RC == IA64::FPRegisterClass) { + Opc = IA64::LDF8; + } else if (RC == IA64::GRRegisterClass) { + Opc = IA64::LD8; + } else if (RC == IA64::PRRegisterClass) { + Opc = IA64::LD1; + } else { + assert(0 && + "sorry, I don't know how to load this sort of reg\n"); + } + + DebugLoc DL = DebugLoc::getUnknownLoc(); + MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg); + for (unsigned i = 0, e = Addr.size(); i != e; ++i) + MIB.addOperand(Addr[i]); + NewMIs.push_back(MIB); + return; +} diff --git a/lib/Target/IA64/IA64InstrInfo.h b/lib/Target/IA64/IA64InstrInfo.h new file mode 100644 index 000000000000..79236c2c7c86 --- /dev/null +++ b/lib/Target/IA64/IA64InstrInfo.h @@ -0,0 +1,70 @@ +//===- IA64InstrInfo.h - IA64 Instruction Information ----------*- C++ -*- ===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the IA64 implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef IA64INSTRUCTIONINFO_H +#define IA64INSTRUCTIONINFO_H + +#include "llvm/Target/TargetInstrInfo.h" +#include "IA64RegisterInfo.h" + +namespace llvm { + +class IA64InstrInfo : public TargetInstrInfoImpl { + const IA64RegisterInfo RI; +public: + IA64InstrInfo(); + + /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As + /// such, whenever a client has an instance of instruction info, it should + /// always be able to get register info as well (through this method). + /// + virtual const IA64RegisterInfo &getRegisterInfo() const { return RI; } + + /// Return true if the instruction is a register to register move and return + /// the source and dest operands and their sub-register indices by reference. + virtual bool isMoveInstr(const MachineInstr &MI, + unsigned &SrcReg, unsigned &DstReg, + unsigned &SrcSubIdx, unsigned &DstSubIdx) const; + virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + const SmallVectorImpl &Cond) const; + virtual bool copyRegToReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned DestReg, unsigned SrcReg, + const TargetRegisterClass *DestRC, + const TargetRegisterClass *SrcRC) const; + virtual void storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned SrcReg, bool isKill, int FrameIndex, + const TargetRegisterClass *RC) const; + + virtual void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill, + SmallVectorImpl &Addr, + const TargetRegisterClass *RC, + SmallVectorImpl &NewMIs) const; + + virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned DestReg, int FrameIndex, + const TargetRegisterClass *RC) const; + + virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg, + SmallVectorImpl &Addr, + const TargetRegisterClass *RC, + SmallVectorImpl &NewMIs) const; +}; + +} // End llvm namespace + +#endif + diff --git a/lib/Target/IA64/IA64InstrInfo.td b/lib/Target/IA64/IA64InstrInfo.td new file mode 100644 index 000000000000..2ab9897bddeb --- /dev/null +++ b/lib/Target/IA64/IA64InstrInfo.td @@ -0,0 +1,751 @@ +//===- IA64InstrInfo.td - Describe the IA64 Instruction Set -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the IA64 instruction set, defining the instructions, and +// properties of the instructions which are needed for code generation, machine +// code emission, and analysis. +// +//===----------------------------------------------------------------------===// + +include "IA64InstrFormats.td" + +//===----------------------------------------------------------------------===// +// IA-64 specific DAG Nodes. +// + +def IA64getfd : SDNode<"IA64ISD::GETFD", SDTFPToIntOp, []>; + +def retflag : SDNode<"IA64ISD::RET_FLAG", SDTNone, + [SDNPHasChain, SDNPOptInFlag]>; + +//===--------- +// Instruction types + +class isA { bit A=1; } // I or M unit +class isM { bit M=1; } // M unit +class isI { bit I=1; } // I unit +class isB { bit B=1; } // B unit +class isF { bit F=1; } // F unit +class isLX { bit LX=1; } // I/B + +//===--------- + +def u2imm : Operand; +def u6imm : Operand; +def s8imm : Operand { + let PrintMethod = "printS8ImmOperand"; +} +def s14imm : Operand { + let PrintMethod = "printS14ImmOperand"; +} +def s22imm : Operand { + let PrintMethod = "printS22ImmOperand"; +} +def u64imm : Operand { + let PrintMethod = "printU64ImmOperand"; +} +def s64imm : Operand { + let PrintMethod = "printS64ImmOperand"; +} + +let PrintMethod = "printGlobalOperand" in + def globaladdress : Operand; + +// the asmprinter needs to know about calls +let PrintMethod = "printCallOperand" in + def calltarget : Operand; + +/* new daggy action!!! */ + +def is32ones : PatLeaf<(i64 imm), [{ + // is32ones predicate - True if the immediate is 0x00000000FFFFFFFF + // Used to create ZXT4s appropriately + uint64_t v = (uint64_t)N->getZExtValue(); + return (v == 0x00000000FFFFFFFFLL); +}]>; + +// isMIXable predicates - True if the immediate is +// 0xFF00FF00FF00FF00, 0x00FF00FF00FF00FF +// etc, through 0x00000000FFFFFFFF +// Used to test for the suitability of mix* +def isMIX1Lable: PatLeaf<(i64 imm), [{ + return((uint64_t)N->getZExtValue()==0xFF00FF00FF00FF00LL); +}]>; +def isMIX1Rable: PatLeaf<(i64 imm), [{ + return((uint64_t)N->getZExtValue()==0x00FF00FF00FF00FFLL); +}]>; +def isMIX2Lable: PatLeaf<(i64 imm), [{ + return((uint64_t)N->getZExtValue()==0xFFFF0000FFFF0000LL); +}]>; +def isMIX2Rable: PatLeaf<(i64 imm), [{ + return((uint64_t)N->getZExtValue()==0x0000FFFF0000FFFFLL); +}]>; +def isMIX4Lable: PatLeaf<(i64 imm), [{ + return((uint64_t)N->getZExtValue()==0xFFFFFFFF00000000LL); +}]>; +def isMIX4Rable: PatLeaf<(i64 imm), [{ + return((uint64_t)N->getZExtValue()==0x00000000FFFFFFFFLL); +}]>; + +def isSHLADDimm: PatLeaf<(i64 imm), [{ + // isSHLADDimm predicate - True if the immediate is exactly 1, 2, 3 or 4 + // - 0 is *not* okay. + // Used to create shladd instructions appropriately + int64_t v = (int64_t)N->getZExtValue(); + return (v >= 1 && v <= 4); +}]>; + +def immSExt14 : PatLeaf<(i64 imm), [{ + // immSExt14 predicate - True if the immediate fits in a 14-bit sign extended + // field. Used by instructions like 'adds'. + int64_t v = (int64_t)N->getZExtValue(); + return (v <= 8191 && v >= -8192); +}]>; + +// imm64 predicate - True if the immediate fits in a 64-bit +// field - i.e., true. used to keep movl happy +def imm64 : PatLeaf<(i64 imm)>; + +def ADD : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1, GR:$src2), + "add $dst = $src1, $src2", + [(set GR:$dst, (add GR:$src1, GR:$src2))]>, isA; + +def ADD1 : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1, GR:$src2), + "add $dst = $src1, $src2, 1", + [(set GR:$dst, (add (add GR:$src1, GR:$src2), 1))]>, isA; + +def ADDS : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1, s14imm:$imm), + "adds $dst = $imm, $src1", + [(set GR:$dst, (add GR:$src1, immSExt14:$imm))]>, isA; + +def MOVL : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins s64imm:$imm), + "movl $dst = $imm", + [(set GR:$dst, imm64:$imm)]>, isLX; + +def ADDL_GA : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1, globaladdress:$imm), + "addl $dst = $imm, $src1", + []>, isA; + +// hmm +def ADDL_EA : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1, calltarget:$imm), + "addl $dst = $imm, $src1", + []>, isA; + +def SUB : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1, GR:$src2), + "sub $dst = $src1, $src2", + [(set GR:$dst, (sub GR:$src1, GR:$src2))]>, isA; + +def SUB1 : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1, GR:$src2), + "sub $dst = $src1, $src2, 1", + [(set GR:$dst, (add (sub GR: $src1, GR:$src2), -1))]>, isA; + +let isTwoAddress = 1 in { +def TPCADDIMM22 : AForm<0x03, 0x0b, + (outs GR:$dst), (ins GR:$src1, s22imm:$imm, PR:$qp), + "($qp) add $dst = $imm, $dst">, isA; +def TPCADDS : AForm_DAG<0x03, 0x0b, + (outs GR:$dst), (ins GR:$src1, s14imm:$imm, PR:$qp), + "($qp) adds $dst = $imm, $dst", + []>, isA; +def TPCMPIMM8NE : AForm<0x03, 0x0b, + (outs PR:$dst), (ins PR:$src1, s22imm:$imm, GR:$src2, PR:$qp), + "($qp) cmp.ne $dst , p0 = $imm, $src2">, isA; +} + +// zero extend a bool (predicate reg) into an integer reg +def ZXTb : Pat<(zext PR:$src), + (TPCADDIMM22 (ADDS r0, 0), 1, PR:$src)>; +def AXTb : Pat<(anyext PR:$src), + (TPCADDIMM22 (ADDS r0, 0), 1, PR:$src)>; + +// normal sign/zero-extends +def SXT1 : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins GR:$src), "sxt1 $dst = $src", + [(set GR:$dst, (sext_inreg GR:$src, i8))]>, isI; +def ZXT1 : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins GR:$src), "zxt1 $dst = $src", + [(set GR:$dst, (and GR:$src, 255))]>, isI; +def SXT2 : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins GR:$src), "sxt2 $dst = $src", + [(set GR:$dst, (sext_inreg GR:$src, i16))]>, isI; +def ZXT2 : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins GR:$src), "zxt2 $dst = $src", + [(set GR:$dst, (and GR:$src, 65535))]>, isI; +def SXT4 : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins GR:$src), "sxt4 $dst = $src", + [(set GR:$dst, (sext_inreg GR:$src, i32))]>, isI; +def ZXT4 : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins GR:$src), "zxt4 $dst = $src", + [(set GR:$dst, (and GR:$src, is32ones))]>, isI; + +// fixme: shrs vs shru? +def MIX1L : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1, GR:$src2), + "mix1.l $dst = $src1, $src2", + [(set GR:$dst, (or (and GR:$src1, isMIX1Lable), + (and (srl GR:$src2, (i64 8)), isMIX1Lable)))]>, isI; + +def MIX2L : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1, GR:$src2), + "mix2.l $dst = $src1, $src2", + [(set GR:$dst, (or (and GR:$src1, isMIX2Lable), + (and (srl GR:$src2, (i64 16)), isMIX2Lable)))]>, isI; + +def MIX4L : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1, GR:$src2), + "mix4.l $dst = $src1, $src2", + [(set GR:$dst, (or (and GR:$src1, isMIX4Lable), + (and (srl GR:$src2, (i64 32)), isMIX4Lable)))]>, isI; + +def MIX1R : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1, GR:$src2), + "mix1.r $dst = $src1, $src2", + [(set GR:$dst, (or (and (shl GR:$src1, (i64 8)), isMIX1Rable), + (and GR:$src2, isMIX1Rable)))]>, isI; + +def MIX2R : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1, GR:$src2), + "mix2.r $dst = $src1, $src2", + [(set GR:$dst, (or (and (shl GR:$src1, (i64 16)), isMIX2Rable), + (and GR:$src2, isMIX2Rable)))]>, isI; + +def MIX4R : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1, GR:$src2), + "mix4.r $dst = $src1, $src2", + [(set GR:$dst, (or (and (shl GR:$src1, (i64 32)), isMIX4Rable), + (and GR:$src2, isMIX4Rable)))]>, isI; + +def GETFSIGD : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins FP:$src), + "getf.sig $dst = $src", + []>, isM; + +def SETFSIGD : AForm_DAG<0x03, 0x0b, (outs FP:$dst), (ins GR:$src), + "setf.sig $dst = $src", + []>, isM; + +def XMALD : AForm_DAG<0x03, 0x0b, (outs FP:$dst), (ins FP:$src1, FP:$src2, FP:$src3), + "xma.l $dst = $src1, $src2, $src3", + []>, isF; +def XMAHD : AForm_DAG<0x03, 0x0b, (outs FP:$dst), (ins FP:$src1, FP:$src2, FP:$src3), + "xma.h $dst = $src1, $src2, $src3", + []>, isF; +def XMAHUD : AForm_DAG<0x03, 0x0b, (outs FP:$dst), (ins FP:$src1, FP:$src2, FP:$src3), + "xma.hu $dst = $src1, $src2, $src3", + []>, isF; + +// pseudocode for integer multiplication +def : Pat<(mul GR:$src1, GR:$src2), + (GETFSIGD (XMALD (SETFSIGD GR:$src1), (SETFSIGD GR:$src2), F0))>; +def : Pat<(mulhs GR:$src1, GR:$src2), + (GETFSIGD (XMAHD (SETFSIGD GR:$src1), (SETFSIGD GR:$src2), F0))>; +def : Pat<(mulhu GR:$src1, GR:$src2), + (GETFSIGD (XMAHUD (SETFSIGD GR:$src1), (SETFSIGD GR:$src2), F0))>; + +// TODO: addp4 (addp4 dst = src, r0 is a 32-bit add) +// has imm form, too + +// def ADDS : AForm<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1, s14imm:$imm), +// "adds $dst = $imm, $src1">; + +def AND : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1, GR:$src2), + "and $dst = $src1, $src2", + [(set GR:$dst, (and GR:$src1, GR:$src2))]>, isA; +def ANDCM : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1, GR:$src2), + "andcm $dst = $src1, $src2", + [(set GR:$dst, (and GR:$src1, (not GR:$src2)))]>, isA; +// TODO: and/andcm/or/xor/add/sub/shift immediate forms +def OR : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1, GR:$src2), + "or $dst = $src1, $src2", + [(set GR:$dst, (or GR:$src1, GR:$src2))]>, isA; + +def pOR : AForm<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1, GR:$src2, PR:$qp), + "($qp) or $dst = $src1, $src2">, isA; + +// the following are all a bit unfortunate: we throw away the complement +// of the compare! +def CMPEQ : AForm_DAG<0x03, 0x0b, (outs PR:$dst), (ins GR:$src1, GR:$src2), + "cmp.eq $dst, p0 = $src1, $src2", + [(set PR:$dst, (seteq GR:$src1, GR:$src2))]>, isA; +def CMPGT : AForm_DAG<0x03, 0x0b, (outs PR:$dst), (ins GR:$src1, GR:$src2), + "cmp.gt $dst, p0 = $src1, $src2", + [(set PR:$dst, (setgt GR:$src1, GR:$src2))]>, isA; +def CMPGE : AForm_DAG<0x03, 0x0b, (outs PR:$dst), (ins GR:$src1, GR:$src2), + "cmp.ge $dst, p0 = $src1, $src2", + [(set PR:$dst, (setge GR:$src1, GR:$src2))]>, isA; +def CMPLT : AForm_DAG<0x03, 0x0b, (outs PR:$dst), (ins GR:$src1, GR:$src2), + "cmp.lt $dst, p0 = $src1, $src2", + [(set PR:$dst, (setlt GR:$src1, GR:$src2))]>, isA; +def CMPLE : AForm_DAG<0x03, 0x0b, (outs PR:$dst), (ins GR:$src1, GR:$src2), + "cmp.le $dst, p0 = $src1, $src2", + [(set PR:$dst, (setle GR:$src1, GR:$src2))]>, isA; +def CMPNE : AForm_DAG<0x03, 0x0b, (outs PR:$dst), (ins GR:$src1, GR:$src2), + "cmp.ne $dst, p0 = $src1, $src2", + [(set PR:$dst, (setne GR:$src1, GR:$src2))]>, isA; +def CMPLTU: AForm_DAG<0x03, 0x0b, (outs PR:$dst), (ins GR:$src1, GR:$src2), + "cmp.ltu $dst, p0 = $src1, $src2", + [(set PR:$dst, (setult GR:$src1, GR:$src2))]>, isA; +def CMPGTU: AForm_DAG<0x03, 0x0b, (outs PR:$dst), (ins GR:$src1, GR:$src2), + "cmp.gtu $dst, p0 = $src1, $src2", + [(set PR:$dst, (setugt GR:$src1, GR:$src2))]>, isA; +def CMPLEU: AForm_DAG<0x03, 0x0b, (outs PR:$dst), (ins GR:$src1, GR:$src2), + "cmp.leu $dst, p0 = $src1, $src2", + [(set PR:$dst, (setule GR:$src1, GR:$src2))]>, isA; +def CMPGEU: AForm_DAG<0x03, 0x0b, (outs PR:$dst), (ins GR:$src1, GR:$src2), + "cmp.geu $dst, p0 = $src1, $src2", + [(set PR:$dst, (setuge GR:$src1, GR:$src2))]>, isA; + +// and we do the whole thing again for FP compares! +def FCMPEQ : AForm_DAG<0x03, 0x0b, (outs PR:$dst), (ins FP:$src1, FP:$src2), + "fcmp.eq $dst, p0 = $src1, $src2", + [(set PR:$dst, (seteq FP:$src1, FP:$src2))]>, isF; +def FCMPGT : AForm_DAG<0x03, 0x0b, (outs PR:$dst), (ins FP:$src1, FP:$src2), + "fcmp.gt $dst, p0 = $src1, $src2", + [(set PR:$dst, (setgt FP:$src1, FP:$src2))]>, isF; +def FCMPGE : AForm_DAG<0x03, 0x0b, (outs PR:$dst), (ins FP:$src1, FP:$src2), + "fcmp.ge $dst, p0 = $src1, $src2", + [(set PR:$dst, (setge FP:$src1, FP:$src2))]>, isF; +def FCMPLT : AForm_DAG<0x03, 0x0b, (outs PR:$dst), (ins FP:$src1, FP:$src2), + "fcmp.lt $dst, p0 = $src1, $src2", + [(set PR:$dst, (setlt FP:$src1, FP:$src2))]>, isF; +def FCMPLE : AForm_DAG<0x03, 0x0b, (outs PR:$dst), (ins FP:$src1, FP:$src2), + "fcmp.le $dst, p0 = $src1, $src2", + [(set PR:$dst, (setle FP:$src1, FP:$src2))]>, isF; +def FCMPNE : AForm_DAG<0x03, 0x0b, (outs PR:$dst), (ins FP:$src1, FP:$src2), + "fcmp.neq $dst, p0 = $src1, $src2", + [(set PR:$dst, (setne FP:$src1, FP:$src2))]>, isF; +def FCMPLTU: AForm_DAG<0x03, 0x0b, (outs PR:$dst), (ins FP:$src1, FP:$src2), + "fcmp.lt $dst, p0 = $src1, $src2", + [(set PR:$dst, (setult FP:$src1, FP:$src2))]>, isF; +def FCMPGTU: AForm_DAG<0x03, 0x0b, (outs PR:$dst), (ins FP:$src1, FP:$src2), + "fcmp.gt $dst, p0 = $src1, $src2", + [(set PR:$dst, (setugt FP:$src1, FP:$src2))]>, isF; +def FCMPLEU: AForm_DAG<0x03, 0x0b, (outs PR:$dst), (ins FP:$src1, FP:$src2), + "fcmp.le $dst, p0 = $src1, $src2", + [(set PR:$dst, (setule FP:$src1, FP:$src2))]>, isF; +def FCMPGEU: AForm_DAG<0x03, 0x0b, (outs PR:$dst), (ins FP:$src1, FP:$src2), + "fcmp.ge $dst, p0 = $src1, $src2", + [(set PR:$dst, (setuge FP:$src1, FP:$src2))]>, isF; + +def PCMPEQUNCR0R0 : AForm<0x03, 0x0b, (outs PR:$dst), (ins PR:$qp), + "($qp) cmp.eq.unc $dst, p0 = r0, r0">, isA; + +def : Pat<(trunc GR:$src), // truncate i64 to i1 + (CMPNE GR:$src, r0)>; // $src!=0? If so, PR:$dst=true + +let isTwoAddress=1 in { + def TPCMPEQR0R0 : AForm<0x03, 0x0b, (outs PR:$dst), (ins PR:$bogus, PR:$qp), + "($qp) cmp.eq $dst, p0 = r0, r0">, isA; + def TPCMPNER0R0 : AForm<0x03, 0x0b, (outs PR:$dst), (ins PR:$bogus, PR:$qp), + "($qp) cmp.ne $dst, p0 = r0, r0">, isA; +} + +/* our pseudocode for OR on predicates is: +pC = pA OR pB +------------- +(pA) cmp.eq.unc pC,p0 = r0,r0 // pC = pA + ;; +(pB) cmp.eq pC,p0 = r0,r0 // if (pB) pC = 1 */ + +def bOR : Pat<(or PR:$src1, PR:$src2), + (TPCMPEQR0R0 (PCMPEQUNCR0R0 PR:$src1), PR:$src2)>; + +/* our pseudocode for AND on predicates is: + * +(pA) cmp.eq.unc pC,p0 = r0,r0 // pC = pA + cmp.eq pTemp,p0 = r0,r0 // pTemp = NOT pB + ;; +(pB) cmp.ne pTemp,p0 = r0,r0 + ;; +(pTemp)cmp.ne pC,p0 = r0,r0 // if (NOT pB) pC = 0 */ + +def bAND : Pat<(and PR:$src1, PR:$src2), + ( TPCMPNER0R0 (PCMPEQUNCR0R0 PR:$src1), + (TPCMPNER0R0 (CMPEQ r0, r0), PR:$src2) )>; + +/* one possible routine for XOR on predicates is: + + // Compute px = py ^ pz + // using sum of products: px = (py & !pz) | (pz & !py) + // Uses 5 instructions in 3 cycles. + // cycle 1 +(pz) cmp.eq.unc px = r0, r0 // px = pz +(py) cmp.eq.unc pt = r0, r0 // pt = py + ;; + // cycle 2 +(pt) cmp.ne.and px = r0, r0 // px = px & !pt (px = pz & !pt) +(pz) cmp.ne.and pt = r0, r0 // pt = pt & !pz + ;; + } { .mmi + // cycle 3 +(pt) cmp.eq.or px = r0, r0 // px = px | pt + +*** Another, which we use here, requires one scratch GR. it is: + + mov rt = 0 // initialize rt off critical path + ;; + + // cycle 1 +(pz) cmp.eq.unc px = r0, r0 // px = pz +(pz) mov rt = 1 // rt = pz + ;; + // cycle 2 +(py) cmp.ne px = 1, rt // if (py) px = !pz + +.. these routines kindly provided by Jim Hull +*/ + +def bXOR : Pat<(xor PR:$src1, PR:$src2), + (TPCMPIMM8NE (PCMPEQUNCR0R0 PR:$src2), 1, + (TPCADDS (ADDS r0, 0), 1, PR:$src2), + PR:$src1)>; + +def XOR : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1, GR:$src2), + "xor $dst = $src1, $src2", + [(set GR:$dst, (xor GR:$src1, GR:$src2))]>, isA; + +def SHLADD: AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1,s64imm:$imm,GR:$src2), + "shladd $dst = $src1, $imm, $src2", + [(set GR:$dst, (add GR:$src2, (shl GR:$src1, isSHLADDimm:$imm)))]>, isA; + +def SHL : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1, GR:$src2), + "shl $dst = $src1, $src2", + [(set GR:$dst, (shl GR:$src1, GR:$src2))]>, isI; + +def SHRU : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1, GR:$src2), + "shr.u $dst = $src1, $src2", + [(set GR:$dst, (srl GR:$src1, GR:$src2))]>, isI; + +def SHRS : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1, GR:$src2), + "shr $dst = $src1, $src2", + [(set GR:$dst, (sra GR:$src1, GR:$src2))]>, isI; + +def MOV : AForm<0x03, 0x0b, (outs GR:$dst), (ins GR:$src), "mov $dst = $src">, isA; +def FMOV : AForm<0x03, 0x0b, (outs FP:$dst), (ins FP:$src), + "mov $dst = $src">, isF; // XXX: there _is_ no fmov +def PMOV : AForm<0x03, 0x0b, (outs GR:$dst), (ins GR:$src, PR:$qp), + "($qp) mov $dst = $src">, isA; + +def SPILL_ALL_PREDICATES_TO_GR : AForm<0x03, 0x0b, (outs GR:$dst), (ins), + "mov $dst = pr">, isI; +def FILL_ALL_PREDICATES_FROM_GR : AForm<0x03, 0x0b, (outs), (ins GR:$src), + "mov pr = $src">, isI; + +let isTwoAddress = 1 in { + def CMOV : AForm<0x03, 0x0b, (outs GR:$dst), (ins GR:$src2, GR:$src, PR:$qp), + "($qp) mov $dst = $src">, isA; +} + +def PFMOV : AForm<0x03, 0x0b, (outs FP:$dst), (ins FP:$src, PR:$qp), + "($qp) mov $dst = $src">, isF; + +let isTwoAddress = 1 in { + def CFMOV : AForm<0x03, 0x0b, (outs FP:$dst), (ins FP:$src2, FP:$src, PR:$qp), + "($qp) mov $dst = $src">, isF; +} + +def SELECTINT : Pat<(select PR:$which, GR:$src1, GR:$src2), + (CMOV (MOV GR:$src2), GR:$src1, PR:$which)>; // note order! +def SELECTFP : Pat<(select PR:$which, FP:$src1, FP:$src2), + (CFMOV (FMOV FP:$src2), FP:$src1, PR:$which)>; // note order! +// TODO: can do this faster, w/o using any integer regs (see pattern isel) +def SELECTBOOL : Pat<(select PR:$which, PR:$src1, PR:$src2), // note order! + (CMPNE (CMOV + (MOV (TPCADDIMM22 (ADDS r0, 0), 1, PR:$src2)), + (TPCADDIMM22 (ADDS r0, 0), 1, PR:$src1), PR:$which), r0)>; + +// load constants of various sizes // FIXME: prettyprint -ve constants +def : Pat<(i64 immSExt14:$imm), (ADDS r0, immSExt14:$imm)>; +def : Pat<(i1 -1), (CMPEQ r0, r0)>; // TODO: this should just be a ref to p0 +def : Pat<(i1 0), (CMPNE r0, r0)>; // TODO: any instruction actually *using* + // this predicate should be killed! + +// TODO: support postincrement (reg, imm9) loads+stores - this needs more +// tablegen support + +def IUSE : PseudoInstIA64<(outs), (ins variable_ops), "// IUSE">; +def ADJUSTCALLSTACKUP : PseudoInstIA64<(outs), (ins variable_ops), + "// ADJUSTCALLSTACKUP">; +def ADJUSTCALLSTACKDOWN : PseudoInstIA64<(outs), (ins variable_ops), + "// ADJUSTCALLSTACKDOWN">; +def PSEUDO_ALLOC : PseudoInstIA64<(outs), (ins GR:$foo), "// PSEUDO_ALLOC">; + +def ALLOC : AForm<0x03, 0x0b, + (outs GR:$dst), (ins i8imm:$inputs, i8imm:$locals, i8imm:$outputs, i8imm:$rotating), + "alloc $dst = ar.pfs,$inputs,$locals,$outputs,$rotating">, isM; + +let isTwoAddress = 1 in { + def TCMPNE : AForm<0x03, 0x0b, + (outs PR:$dst), (ins PR:$src2, GR:$src3, GR:$src4), + "cmp.ne $dst, p0 = $src3, $src4">, isA; + + def TPCMPEQOR : AForm<0x03, 0x0b, + (outs PR:$dst), (ins PR:$src2, GR:$src3, GR:$src4, PR:$qp), + "($qp) cmp.eq.or $dst, p0 = $src3, $src4">, isA; + + def TPCMPNE : AForm<0x03, 0x0b, + (outs PR:$dst), (ins PR:$src2, GR:$src3, GR:$src4, PR:$qp), + "($qp) cmp.ne $dst, p0 = $src3, $src4">, isA; + + def TPCMPEQ : AForm<0x03, 0x0b, + (outs PR:$dst), (ins PR:$src2, GR:$src3, GR:$src4, PR:$qp), + "($qp) cmp.eq $dst, p0 = $src3, $src4">, isA; +} + +def MOVSIMM14 : AForm<0x03, 0x0b, (outs GR:$dst), (ins s14imm:$imm), + "mov $dst = $imm">, isA; +def MOVSIMM22 : AForm<0x03, 0x0b, (outs GR:$dst), (ins s22imm:$imm), + "mov $dst = $imm">, isA; +def MOVLIMM64 : AForm<0x03, 0x0b, (outs GR:$dst), (ins s64imm:$imm), + "movl $dst = $imm">, isLX; + +def SHLI : AForm<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1, u6imm:$imm), + "shl $dst = $src1, $imm">, isI; +def SHRUI : AForm<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1, u6imm:$imm), + "shr.u $dst = $src1, $imm">, isI; +def SHRSI : AForm<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1, u6imm:$imm), + "shr $dst = $src1, $imm">, isI; + +def EXTRU : AForm<0x03, 0x0b, + (outs GR:$dst), (ins GR:$src1, u6imm:$imm1, u6imm:$imm2), + "extr.u $dst = $src1, $imm1, $imm2">, isI; + +def DEPZ : AForm<0x03, 0x0b, + (outs GR:$dst), (ins GR:$src1, u6imm:$imm1, u6imm:$imm2), + "dep.z $dst = $src1, $imm1, $imm2">, isI; + +def PCMPEQOR : AForm<0x03, 0x0b, (outs PR:$dst), (ins GR:$src1, GR:$src2, PR:$qp), + "($qp) cmp.eq.or $dst, p0 = $src1, $src2">, isA; +def PCMPEQUNC : AForm<0x03, 0x0b, (outs PR:$dst), (ins GR:$src1, GR:$src2, PR:$qp), + "($qp) cmp.eq.unc $dst, p0 = $src1, $src2">, isA; +def PCMPNE : AForm<0x03, 0x0b, (outs PR:$dst), (ins GR:$src1, GR:$src2, PR:$qp), + "($qp) cmp.ne $dst, p0 = $src1, $src2">, isA; + +// two destinations! +def BCMPEQ : AForm<0x03, 0x0b, (outs PR:$dst1, PR:$dst2), (ins GR:$src1, GR:$src2), + "cmp.eq $dst1, dst2 = $src1, $src2">, isA; + +def ADDIMM14 : AForm<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1, s14imm:$imm), + "adds $dst = $imm, $src1">, isA; + +def ADDIMM22 : AForm<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1, s22imm:$imm), + "add $dst = $imm, $src1">, isA; +def CADDIMM22 : AForm<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1, s22imm:$imm, PR:$qp), + "($qp) add $dst = $imm, $src1">, isA; + +def SUBIMM8 : AForm<0x03, 0x0b, (outs GR:$dst), (ins s8imm:$imm, GR:$src2), + "sub $dst = $imm, $src2">, isA; + +let mayStore = 1 in { + def ST1 : AForm<0x03, 0x0b, (outs), (ins GR:$dstPtr, GR:$value), + "st1 [$dstPtr] = $value">, isM; + def ST2 : AForm<0x03, 0x0b, (outs), (ins GR:$dstPtr, GR:$value), + "st2 [$dstPtr] = $value">, isM; + def ST4 : AForm<0x03, 0x0b, (outs), (ins GR:$dstPtr, GR:$value), + "st4 [$dstPtr] = $value">, isM; + def ST8 : AForm<0x03, 0x0b, (outs), (ins GR:$dstPtr, GR:$value), + "st8 [$dstPtr] = $value">, isM; + def STF4 : AForm<0x03, 0x0b, (outs), (ins GR:$dstPtr, FP:$value), + "stfs [$dstPtr] = $value">, isM; + def STF8 : AForm<0x03, 0x0b, (outs), (ins GR:$dstPtr, FP:$value), + "stfd [$dstPtr] = $value">, isM; + def STF_SPILL : AForm<0x03, 0x0b, (outs), (ins GR:$dstPtr, FP:$value), + "stf.spill [$dstPtr] = $value">, isM; +} + +let canFoldAsLoad = 1 in { + def LD1 : AForm<0x03, 0x0b, (outs GR:$dst), (ins GR:$srcPtr), + "ld1 $dst = [$srcPtr]">, isM; + def LD2 : AForm<0x03, 0x0b, (outs GR:$dst), (ins GR:$srcPtr), + "ld2 $dst = [$srcPtr]">, isM; + def LD4 : AForm<0x03, 0x0b, (outs GR:$dst), (ins GR:$srcPtr), + "ld4 $dst = [$srcPtr]">, isM; + def LD8 : AForm<0x03, 0x0b, (outs GR:$dst), (ins GR:$srcPtr), + "ld8 $dst = [$srcPtr]">, isM; + def LDF4 : AForm<0x03, 0x0b, (outs FP:$dst), (ins GR:$srcPtr), + "ldfs $dst = [$srcPtr]">, isM; + def LDF8 : AForm<0x03, 0x0b, (outs FP:$dst), (ins GR:$srcPtr), + "ldfd $dst = [$srcPtr]">, isM; + def LDF_FILL : AForm<0x03, 0x0b, (outs FP:$dst), (ins GR:$srcPtr), + "ldf.fill $dst = [$srcPtr]">, isM; +} + +def POPCNT : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins GR:$src), + "popcnt $dst = $src", + [(set GR:$dst, (ctpop GR:$src))]>, isI; + +// some FP stuff: // TODO: single-precision stuff? +def FADD : AForm_DAG<0x03, 0x0b, (outs FP:$dst), (ins FP:$src1, FP:$src2), + "fadd $dst = $src1, $src2", + [(set FP:$dst, (fadd FP:$src1, FP:$src2))]>, isF; +def FADDS: AForm<0x03, 0x0b, (outs FP:$dst), (ins FP:$src1, FP:$src2), + "fadd.s $dst = $src1, $src2">, isF; +def FSUB : AForm_DAG<0x03, 0x0b, (outs FP:$dst), (ins FP:$src1, FP:$src2), + "fsub $dst = $src1, $src2", + [(set FP:$dst, (fsub FP:$src1, FP:$src2))]>, isF; +def FMPY : AForm_DAG<0x03, 0x0b, (outs FP:$dst), (ins FP:$src1, FP:$src2), + "fmpy $dst = $src1, $src2", + [(set FP:$dst, (fmul FP:$src1, FP:$src2))]>, isF; +def FMA : AForm_DAG<0x03, 0x0b, (outs FP:$dst), (ins FP:$src1, FP:$src2, FP:$src3), + "fma $dst = $src1, $src2, $src3", + [(set FP:$dst, (fadd (fmul FP:$src1, FP:$src2), FP:$src3))]>, isF; +def FMS : AForm_DAG<0x03, 0x0b, (outs FP:$dst), (ins FP:$src1, FP:$src2, FP:$src3), + "fms $dst = $src1, $src2, $src3", + [(set FP:$dst, (fsub (fmul FP:$src1, FP:$src2), FP:$src3))]>, isF; +def FNMA : AForm_DAG<0x03, 0x0b, (outs FP:$dst), (ins FP:$src1, FP:$src2, FP:$src3), + "fnma $dst = $src1, $src2, $src3", + [(set FP:$dst, (fneg (fadd (fmul FP:$src1, FP:$src2), FP:$src3)))]>, isF; +def FABS : AForm_DAG<0x03, 0x0b, (outs FP:$dst), (ins FP:$src), + "fabs $dst = $src", + [(set FP:$dst, (fabs FP:$src))]>, isF; +def FNEG : AForm_DAG<0x03, 0x0b, (outs FP:$dst), (ins FP:$src), + "fneg $dst = $src", + [(set FP:$dst, (fneg FP:$src))]>, isF; +def FNEGABS : AForm_DAG<0x03, 0x0b, (outs FP:$dst), (ins FP:$src), + "fnegabs $dst = $src", + [(set FP:$dst, (fneg (fabs FP:$src)))]>, isF; + +let isTwoAddress=1 in { +def TCFMAS1 : AForm<0x03, 0x0b, + (outs FP:$dst), (ins FP:$bogussrc, FP:$src1, FP:$src2, FP:$src3, PR:$qp), + "($qp) fma.s1 $dst = $src1, $src2, $src3">, isF; +def TCFMADS0 : AForm<0x03, 0x0b, + (outs FP:$dst), (ins FP:$bogussrc, FP:$src1, FP:$src2, FP:$src3, PR:$qp), + "($qp) fma.d.s0 $dst = $src1, $src2, $src3">, isF; +} + +def CFMAS1 : AForm<0x03, 0x0b, + (outs FP:$dst), (ins FP:$src1, FP:$src2, FP:$src3, PR:$qp), + "($qp) fma.s1 $dst = $src1, $src2, $src3">, isF; +def CFNMAS1 : AForm<0x03, 0x0b, + (outs FP:$dst), (ins FP:$src1, FP:$src2, FP:$src3, PR:$qp), + "($qp) fnma.s1 $dst = $src1, $src2, $src3">, isF; + +def CFMADS1 : AForm<0x03, 0x0b, + (outs FP:$dst), (ins FP:$src1, FP:$src2, FP:$src3, PR:$qp), + "($qp) fma.d.s1 $dst = $src1, $src2, $src3">, isF; +def CFMADS0 : AForm<0x03, 0x0b, + (outs FP:$dst), (ins FP:$src1, FP:$src2, FP:$src3, PR:$qp), + "($qp) fma.d.s0 $dst = $src1, $src2, $src3">, isF; +def CFNMADS1 : AForm<0x03, 0x0b, + (outs FP:$dst), (ins FP:$src1, FP:$src2, FP:$src3, PR:$qp), + "($qp) fnma.d.s1 $dst = $src1, $src2, $src3">, isF; + +def FRCPAS0 : AForm<0x03, 0x0b, (outs FP:$dstFR, PR:$dstPR), (ins FP:$src1, FP:$src2), + "frcpa.s0 $dstFR, $dstPR = $src1, $src2">, isF; +def FRCPAS1 : AForm<0x03, 0x0b, (outs FP:$dstFR, PR:$dstPR), (ins FP:$src1, FP:$src2), + "frcpa.s1 $dstFR, $dstPR = $src1, $src2">, isF; + +def XMAL : AForm<0x03, 0x0b, (outs FP:$dst), (ins FP:$src1, FP:$src2, FP:$src3), + "xma.l $dst = $src1, $src2, $src3">, isF; + +def FCVTXF : AForm<0x03, 0x0b, (outs FP:$dst), (ins FP:$src), + "fcvt.xf $dst = $src">, isF; +def FCVTXUF : AForm<0x03, 0x0b, (outs FP:$dst), (ins FP:$src), + "fcvt.xuf $dst = $src">, isF; +def FCVTXUFS1 : AForm<0x03, 0x0b, (outs FP:$dst), (ins FP:$src), + "fcvt.xuf.s1 $dst = $src">, isF; +def FCVTFX : AForm<0x03, 0x0b, (outs FP:$dst), (ins FP:$src), + "fcvt.fx $dst = $src">, isF; +def FCVTFXU : AForm<0x03, 0x0b, (outs FP:$dst), (ins FP:$src), + "fcvt.fxu $dst = $src">, isF; + +def FCVTFXTRUNC : AForm<0x03, 0x0b, (outs FP:$dst), (ins FP:$src), + "fcvt.fx.trunc $dst = $src">, isF; +def FCVTFXUTRUNC : AForm<0x03, 0x0b, (outs FP:$dst), (ins FP:$src), + "fcvt.fxu.trunc $dst = $src">, isF; + +def FCVTFXTRUNCS1 : AForm<0x03, 0x0b, (outs FP:$dst), (ins FP:$src), + "fcvt.fx.trunc.s1 $dst = $src">, isF; +def FCVTFXUTRUNCS1 : AForm<0x03, 0x0b, (outs FP:$dst), (ins FP:$src), + "fcvt.fxu.trunc.s1 $dst = $src">, isF; + +def FNORMD : AForm<0x03, 0x0b, (outs FP:$dst), (ins FP:$src), + "fnorm.d $dst = $src">, isF; + +def GETFD : AForm<0x03, 0x0b, (outs GR:$dst), (ins FP:$src), + "getf.d $dst = $src">, isM; +def SETFD : AForm<0x03, 0x0b, (outs FP:$dst), (ins GR:$src), + "setf.d $dst = $src">, isM; + +def GETFSIG : AForm<0x03, 0x0b, (outs GR:$dst), (ins FP:$src), + "getf.sig $dst = $src">, isM; +def SETFSIG : AForm<0x03, 0x0b, (outs FP:$dst), (ins GR:$src), + "setf.sig $dst = $src">, isM; + +// these four FP<->int conversion patterns need checking/cleaning +def SINT_TO_FP : Pat<(sint_to_fp GR:$src), + (FNORMD (FCVTXF (SETFSIG GR:$src)))>; +def UINT_TO_FP : Pat<(uint_to_fp GR:$src), + (FNORMD (FCVTXUF (SETFSIG GR:$src)))>; +def FP_TO_SINT : Pat<(i64 (fp_to_sint FP:$src)), + (GETFSIG (FCVTFXTRUNC FP:$src))>; +def FP_TO_UINT : Pat<(i64 (fp_to_uint FP:$src)), + (GETFSIG (FCVTFXUTRUNC FP:$src))>; + +def fpimm0 : PatLeaf<(fpimm), [{ + return N->isExactlyValue(+0.0); +}]>; +def fpimm1 : PatLeaf<(fpimm), [{ + return N->isExactlyValue(+1.0); +}]>; +def fpimmn0 : PatLeaf<(fpimm), [{ + return N->isExactlyValue(-0.0); +}]>; +def fpimmn1 : PatLeaf<(fpimm), [{ + return N->isExactlyValue(-1.0); +}]>; + +def : Pat<(f64 fpimm0), (FMOV F0)>; +def : Pat<(f64 fpimm1), (FMOV F1)>; +def : Pat<(f64 fpimmn0), (FNEG F0)>; +def : Pat<(f64 fpimmn1), (FNEG F1)>; + +let isTerminator = 1, isBranch = 1 in { + def BRL_NOTCALL : RawForm<0x03, 0xb0, (outs), (ins i64imm:$dst), + "(p0) brl.cond.sptk $dst">, isB; + def BRLCOND_NOTCALL : RawForm<0x03, 0xb0, (outs), (ins PR:$qp, i64imm:$dst), + "($qp) brl.cond.sptk $dst">, isB; + def BRCOND_NOTCALL : RawForm<0x03, 0xb0, (outs), (ins PR:$qp, GR:$dst), + "($qp) br.cond.sptk $dst">, isB; +} + +let isCall = 1, /* isTerminator = 1, isBranch = 1, */ + Uses = [out0,out1,out2,out3,out4,out5,out6,out7], +// all calls clobber non-callee-saved registers, and for now, they are these: + Defs = [r2,r3,r8,r9,r10,r11,r14,r15,r16,r17,r18,r19,r20,r21,r22,r23,r24, + r25,r26,r27,r28,r29,r30,r31, + p6,p7,p8,p9,p10,p11,p12,p13,p14,p15, + F6,F7,F8,F9,F10,F11,F12,F13,F14,F15, + F32,F33,F34,F35,F36,F37,F38,F39,F40,F41,F42,F43,F44,F45,F46,F47,F48,F49, + F50,F51,F52,F53,F54,F55,F56, + F57,F58,F59,F60,F61,F62,F63,F64,F65,F66,F67,F68,F69,F70,F71,F72,F73,F74, + F75,F76,F77,F78,F79,F80,F81, + F82,F83,F84,F85,F86,F87,F88,F89,F90,F91,F92,F93,F94,F95,F96,F97,F98,F99, + F100,F101,F102,F103,F104,F105, + F106,F107,F108,F109,F110,F111,F112,F113,F114,F115,F116,F117,F118,F119, + F120,F121,F122,F123,F124,F125,F126,F127, + out0,out1,out2,out3,out4,out5,out6,out7] in { +// old pattern call + def BRCALL: RawForm<0x03, 0xb0, (outs), (ins calltarget:$dst), + "br.call.sptk rp = $dst">, isB; // FIXME: teach llvm about branch regs? +// new daggy stuff! + +// calls a globaladdress + def BRCALL_IPREL_GA : RawForm<0x03, 0xb0, (outs), (ins calltarget:$dst), + "br.call.sptk rp = $dst">, isB; // FIXME: teach llvm about branch regs? +// calls an externalsymbol + def BRCALL_IPREL_ES : RawForm<0x03, 0xb0, (outs), (ins calltarget:$dst), + "br.call.sptk rp = $dst">, isB; // FIXME: teach llvm about branch regs? +// calls through a function descriptor + def BRCALL_INDIRECT : RawForm<0x03, 0xb0, (outs), (ins GR:$branchreg), + "br.call.sptk rp = $branchreg">, isB; // FIXME: teach llvm about branch regs? + def BRLCOND_CALL : RawForm<0x03, 0xb0, (outs), (ins PR:$qp, i64imm:$dst), + "($qp) brl.cond.call.sptk $dst">, isB; + def BRCOND_CALL : RawForm<0x03, 0xb0, (outs), (ins PR:$qp, GR:$dst), + "($qp) br.cond.call.sptk $dst">, isB; +} + +// Return branch: +let isTerminator = 1, isReturn = 1 in + def RET : AForm_DAG<0x03, 0x0b, (outs), (ins), + "br.ret.sptk.many rp", + [(retflag)]>, isB; // return +def : Pat<(ret), (RET)>; + +// the evil stop bit of despair +def STOP : PseudoInstIA64<(outs), (ins variable_ops), ";;">; + diff --git a/lib/Target/IA64/IA64MachineFunctionInfo.h b/lib/Target/IA64/IA64MachineFunctionInfo.h new file mode 100644 index 000000000000..fb930564a9d1 --- /dev/null +++ b/lib/Target/IA64/IA64MachineFunctionInfo.h @@ -0,0 +1,34 @@ +//===-- IA64MachineFunctionInfo.h - IA64-specific information ---*- C++ -*-===// +//===-- for MachineFunction ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +//===----------------------------------------------------------------------===// +// +// This file declares IA64-specific per-machine-function information. +// +//===----------------------------------------------------------------------===// + +#ifndef IA64MACHINEFUNCTIONINFO_H +#define IA64MACHINEFUNCTIONINFO_H + +#include "llvm/CodeGen/MachineFunction.h" +//#include "IA64JITInfo.h" + +namespace llvm { + +class IA64FunctionInfo : public MachineFunctionInfo { + +public: + unsigned outRegsUsed; // how many 'out' registers are used + // by this machinefunction? (used to compute the appropriate + // entry in the 'alloc' instruction at the top of the + // machinefunction) + IA64FunctionInfo(MachineFunction& MF) { outRegsUsed=0; }; + +}; + +} // End llvm namespace + +#endif + diff --git a/lib/Target/IA64/IA64RegisterInfo.cpp b/lib/Target/IA64/IA64RegisterInfo.cpp new file mode 100644 index 000000000000..7ad6f51a9b8b --- /dev/null +++ b/lib/Target/IA64/IA64RegisterInfo.cpp @@ -0,0 +1,319 @@ +//===- IA64RegisterInfo.cpp - IA64 Register Information ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the IA64 implementation of the TargetRegisterInfo class. +// This file is responsible for the frame pointer elimination optimization +// on IA64. +// +//===----------------------------------------------------------------------===// + +#include "IA64.h" +#include "IA64RegisterInfo.h" +#include "IA64InstrBuilder.h" +#include "IA64MachineFunctionInfo.h" +#include "llvm/Constants.h" +#include "llvm/Type.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineLocation.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/STLExtras.h" +using namespace llvm; + +IA64RegisterInfo::IA64RegisterInfo(const TargetInstrInfo &tii) + : IA64GenRegisterInfo(IA64::ADJUSTCALLSTACKDOWN, IA64::ADJUSTCALLSTACKUP), + TII(tii) {} + +const unsigned* IA64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) + const { + static const unsigned CalleeSavedRegs[] = { + IA64::r5, 0 + }; + return CalleeSavedRegs; +} + +const TargetRegisterClass* const* +IA64RegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { + static const TargetRegisterClass * const CalleeSavedRegClasses[] = { + &IA64::GRRegClass, 0 + }; + return CalleeSavedRegClasses; +} + +BitVector IA64RegisterInfo::getReservedRegs(const MachineFunction &MF) const { + BitVector Reserved(getNumRegs()); + Reserved.set(IA64::r0); + Reserved.set(IA64::r1); + Reserved.set(IA64::r2); + Reserved.set(IA64::r5); + Reserved.set(IA64::r12); + Reserved.set(IA64::r13); + Reserved.set(IA64::r22); + Reserved.set(IA64::rp); + return Reserved; +} + +//===----------------------------------------------------------------------===// +// Stack Frame Processing methods +//===----------------------------------------------------------------------===// + +// hasFP - Return true if the specified function should have a dedicated frame +// pointer register. This is true if the function has variable sized allocas or +// if frame pointer elimination is disabled. +// +bool IA64RegisterInfo::hasFP(const MachineFunction &MF) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + return NoFramePointerElim || MFI->hasVarSizedObjects(); +} + +void IA64RegisterInfo:: +eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + if (hasFP(MF)) { + // If we have a frame pointer, turn the adjcallstackup instruction into a + // 'sub SP, ' and the adjcallstackdown instruction into 'add SP, + // ' + MachineInstr *Old = I; + unsigned Amount = Old->getOperand(0).getImm(); + DebugLoc dl = Old->getDebugLoc(); + if (Amount != 0) { + // We need to keep the stack aligned properly. To do this, we round the + // amount of space needed for the outgoing arguments up to the next + // alignment boundary. + unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment(); + Amount = (Amount+Align-1)/Align*Align; + + // Replace the pseudo instruction with a new instruction... + if (Old->getOpcode() == IA64::ADJUSTCALLSTACKDOWN) { + BuildMI(MBB, I, dl, TII.get(IA64::ADDIMM22), IA64::r12) + .addReg(IA64::r12).addImm(-Amount); + } else { + assert(Old->getOpcode() == IA64::ADJUSTCALLSTACKUP); + BuildMI(MBB, I, dl, TII.get(IA64::ADDIMM22), IA64::r12) + .addReg(IA64::r12).addImm(Amount); + } + } + } + + MBB.erase(I); +} + +void IA64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, RegScavenger *RS)const{ + assert(SPAdj == 0 && "Unexpected"); + + unsigned i = 0; + MachineInstr &MI = *II; + MachineBasicBlock &MBB = *MI.getParent(); + MachineFunction &MF = *MBB.getParent(); + DebugLoc dl = MI.getDebugLoc(); + + bool FP = hasFP(MF); + + while (!MI.getOperand(i).isFI()) { + ++i; + assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!"); + } + + int FrameIndex = MI.getOperand(i).getIndex(); + + // choose a base register: ( hasFP? framepointer : stack pointer ) + unsigned BaseRegister = FP ? IA64::r5 : IA64::r12; + // Add the base register + MI.getOperand(i).ChangeToRegister(BaseRegister, false); + + // Now add the frame object offset to the offset from r1. + int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex); + + // If we're not using a Frame Pointer that has been set to the value of the + // SP before having the stack size subtracted from it, then add the stack size + // to Offset to get the correct offset. + Offset += MF.getFrameInfo()->getStackSize(); + + // XXX: we use 'r22' as another hack+slash temporary register here :( + if (Offset <= 8191 && Offset >= -8192) { // smallish offset + // Fix up the old: + MI.getOperand(i).ChangeToRegister(IA64::r22, false); + //insert the new + BuildMI(MBB, II, dl, TII.get(IA64::ADDIMM22), IA64::r22) + .addReg(BaseRegister).addImm(Offset); + } else { // it's big + //fix up the old: + MI.getOperand(i).ChangeToRegister(IA64::r22, false); + BuildMI(MBB, II, dl, TII.get(IA64::MOVLIMM64), IA64::r22).addImm(Offset); + BuildMI(MBB, II, dl, TII.get(IA64::ADD), IA64::r22).addReg(BaseRegister) + .addReg(IA64::r22); + } + +} + +void IA64RegisterInfo::emitPrologue(MachineFunction &MF) const { + MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB + MachineBasicBlock::iterator MBBI = MBB.begin(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + bool FP = hasFP(MF); + DebugLoc dl = (MBBI != MBB.end() ? + MBBI->getDebugLoc() : DebugLoc::getUnknownLoc()); + + // first, we handle the 'alloc' instruction, that should be right up the + // top of any function + static const unsigned RegsInOrder[96] = { // there are 96 GPRs the + // RSE worries about + IA64::r32, IA64::r33, IA64::r34, IA64::r35, + IA64::r36, IA64::r37, IA64::r38, IA64::r39, IA64::r40, IA64::r41, + IA64::r42, IA64::r43, IA64::r44, IA64::r45, IA64::r46, IA64::r47, + IA64::r48, IA64::r49, IA64::r50, IA64::r51, IA64::r52, IA64::r53, + IA64::r54, IA64::r55, IA64::r56, IA64::r57, IA64::r58, IA64::r59, + IA64::r60, IA64::r61, IA64::r62, IA64::r63, IA64::r64, IA64::r65, + IA64::r66, IA64::r67, IA64::r68, IA64::r69, IA64::r70, IA64::r71, + IA64::r72, IA64::r73, IA64::r74, IA64::r75, IA64::r76, IA64::r77, + IA64::r78, IA64::r79, IA64::r80, IA64::r81, IA64::r82, IA64::r83, + IA64::r84, IA64::r85, IA64::r86, IA64::r87, IA64::r88, IA64::r89, + IA64::r90, IA64::r91, IA64::r92, IA64::r93, IA64::r94, IA64::r95, + IA64::r96, IA64::r97, IA64::r98, IA64::r99, IA64::r100, IA64::r101, + IA64::r102, IA64::r103, IA64::r104, IA64::r105, IA64::r106, IA64::r107, + IA64::r108, IA64::r109, IA64::r110, IA64::r111, IA64::r112, IA64::r113, + IA64::r114, IA64::r115, IA64::r116, IA64::r117, IA64::r118, IA64::r119, + IA64::r120, IA64::r121, IA64::r122, IA64::r123, IA64::r124, IA64::r125, + IA64::r126, IA64::r127 }; + + unsigned numStackedGPRsUsed=0; + for (int i=0; i != 96; i++) { + if (MF.getRegInfo().isPhysRegUsed(RegsInOrder[i])) + numStackedGPRsUsed=i+1; // (i+1 and not ++ - consider fn(fp, fp, int) + } + + unsigned numOutRegsUsed=MF.getInfo()->outRegsUsed; + + // XXX FIXME : this code should be a bit more reliable (in case there _isn't_ + // a pseudo_alloc in the MBB) + unsigned dstRegOfPseudoAlloc; + for(MBBI = MBB.begin(); /*MBBI->getOpcode() != IA64::PSEUDO_ALLOC*/; ++MBBI) { + assert(MBBI != MBB.end()); + if(MBBI->getOpcode() == IA64::PSEUDO_ALLOC) { + dstRegOfPseudoAlloc=MBBI->getOperand(0).getReg(); + break; + } + } + + if (MBBI != MBB.end()) dl = MBBI->getDebugLoc(); + + BuildMI(MBB, MBBI, dl, TII.get(IA64::ALLOC)). + addReg(dstRegOfPseudoAlloc).addImm(0). + addImm(numStackedGPRsUsed).addImm(numOutRegsUsed).addImm(0); + + // Get the number of bytes to allocate from the FrameInfo + unsigned NumBytes = MFI->getStackSize(); + + if(FP) + NumBytes += 8; // reserve space for the old FP + + // Do we need to allocate space on the stack? + if (NumBytes == 0) + return; + + // Add 16 bytes at the bottom of the stack (scratch area) + // and round the size to a multiple of the alignment. + unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment(); + unsigned Size = 16 + (FP ? 8 : 0); + NumBytes = (NumBytes+Size+Align-1)/Align*Align; + + // Update frame info to pretend that this is part of the stack... + MFI->setStackSize(NumBytes); + + // adjust stack pointer: r12 -= numbytes + if (NumBytes <= 8191) { + BuildMI(MBB, MBBI, dl, TII.get(IA64::ADDIMM22),IA64::r12).addReg(IA64::r12). + addImm(-NumBytes); + } else { // we use r22 as a scratch register here + // first load the decrement into r22 + BuildMI(MBB, MBBI, dl, TII.get(IA64::MOVLIMM64), IA64::r22). + addImm(-NumBytes); + // FIXME: MOVLSI32 expects a _u_32imm + // then add (subtract) it to r12 (stack ptr) + BuildMI(MBB, MBBI, dl, TII.get(IA64::ADD), IA64::r12) + .addReg(IA64::r12).addReg(IA64::r22); + + } + + // now if we need to, save the old FP and set the new + if (FP) { + BuildMI(MBB, MBBI,dl,TII.get(IA64::ST8)).addReg(IA64::r12).addReg(IA64::r5); + // this must be the last instr in the prolog ? (XXX: why??) + BuildMI(MBB, MBBI, dl, TII.get(IA64::MOV), IA64::r5).addReg(IA64::r12); + } + +} + +void IA64RegisterInfo::emitEpilogue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + MachineBasicBlock::iterator MBBI = prior(MBB.end()); + assert(MBBI->getOpcode() == IA64::RET && + "Can only insert epilog into returning blocks"); + DebugLoc dl = MBBI->getDebugLoc(); + bool FP = hasFP(MF); + + // Get the number of bytes allocated from the FrameInfo... + unsigned NumBytes = MFI->getStackSize(); + + //now if we need to, restore the old FP + if (FP) { + //copy the FP into the SP (discards allocas) + BuildMI(MBB, MBBI, dl, TII.get(IA64::MOV), IA64::r12).addReg(IA64::r5); + //restore the FP + BuildMI(MBB, MBBI, dl, TII.get(IA64::LD8), IA64::r5).addReg(IA64::r5); + } + + if (NumBytes != 0) { + if (NumBytes <= 8191) { + BuildMI(MBB, MBBI, dl, TII.get(IA64::ADDIMM22),IA64::r12). + addReg(IA64::r12).addImm(NumBytes); + } else { + BuildMI(MBB, MBBI, dl, TII.get(IA64::MOVLIMM64), IA64::r22). + addImm(NumBytes); + BuildMI(MBB, MBBI, dl, TII.get(IA64::ADD), IA64::r12).addReg(IA64::r12). + addReg(IA64::r22); + } + } +} + +unsigned IA64RegisterInfo::getRARegister() const { + assert(0 && "What is the return address register"); + return 0; +} + +unsigned IA64RegisterInfo::getFrameRegister(MachineFunction &MF) const { + return hasFP(MF) ? IA64::r5 : IA64::r12; +} + +unsigned IA64RegisterInfo::getEHExceptionRegister() const { + assert(0 && "What is the exception register"); + return 0; +} + +unsigned IA64RegisterInfo::getEHHandlerRegister() const { + assert(0 && "What is the exception handler register"); + return 0; +} + +int IA64RegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const { + assert(0 && "What is the dwarf register number"); + return -1; +} + +#include "IA64GenRegisterInfo.inc" + diff --git a/lib/Target/IA64/IA64RegisterInfo.h b/lib/Target/IA64/IA64RegisterInfo.h new file mode 100644 index 000000000000..0c5083e75c25 --- /dev/null +++ b/lib/Target/IA64/IA64RegisterInfo.h @@ -0,0 +1,63 @@ +//===- IA64RegisterInfo.h - IA64 Register Information Impl ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the IA64 implementation of the TargetRegisterInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef IA64REGISTERINFO_H +#define IA64REGISTERINFO_H + +#include "llvm/Target/TargetRegisterInfo.h" +#include "IA64GenRegisterInfo.h.inc" + +namespace llvm { + +class TargetInstrInfo; + +struct IA64RegisterInfo : public IA64GenRegisterInfo { + const TargetInstrInfo &TII; + + IA64RegisterInfo(const TargetInstrInfo &tii); + + /// Code Generation virtual methods... + const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const; + + const TargetRegisterClass* const* getCalleeSavedRegClasses( + const MachineFunction *MF = 0) const; + + BitVector getReservedRegs(const MachineFunction &MF) const; + + bool hasFP(const MachineFunction &MF) const; + + void eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const; + + void eliminateFrameIndex(MachineBasicBlock::iterator MI, + int SPAdj, RegScavenger *RS = NULL) const; + + void emitPrologue(MachineFunction &MF) const; + void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; + + // Debug information queries. + unsigned getRARegister() const; + unsigned getFrameRegister(MachineFunction &MF) const; + + // Exception handling queries. + unsigned getEHExceptionRegister() const; + unsigned getEHHandlerRegister() const; + + int getDwarfRegNum(unsigned RegNum, bool isEH) const; +}; + +} // End llvm namespace + +#endif + diff --git a/lib/Target/IA64/IA64RegisterInfo.td b/lib/Target/IA64/IA64RegisterInfo.td new file mode 100644 index 000000000000..dd72dc3008a3 --- /dev/null +++ b/lib/Target/IA64/IA64RegisterInfo.td @@ -0,0 +1,509 @@ +//===- IA64RegisterInfo.td - Describe the IA64 Register File ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the IA64 register file, defining the registers +// themselves, aliases between the registers, and the register classes built +// out of the registers. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Register definitions... +// + +class IA64Register : Register { + let Namespace = "IA64"; +} + +// GR - One of 128 32-bit general registers +class GR num, string n> : IA64Register { + field bits<7> Num = num; +} + +// FP - One of 128 82-bit floating-point registers +class FP num, string n> : IA64Register { + field bits<7> Num = num; +} + +// PR - One of 64 1-bit predicate registers +class PR num, string n> : IA64Register { + field bits<6> Num = num; +} + +/* general registers */ +def r0 : GR< 0, "r0">, DwarfRegNum<[0]>; +def r1 : GR< 1, "r1">, DwarfRegNum<[1]>; +def r2 : GR< 2, "r2">, DwarfRegNum<[2]>; +def r3 : GR< 3, "r3">, DwarfRegNum<[3]>; +def r4 : GR< 4, "r4">, DwarfRegNum<[4]>; +def r5 : GR< 5, "r5">, DwarfRegNum<[5]>; +def r6 : GR< 6, "r6">, DwarfRegNum<[6]>; +def r7 : GR< 7, "r7">, DwarfRegNum<[7]>; +def r8 : GR< 8, "r8">, DwarfRegNum<[8]>; +def r9 : GR< 9, "r9">, DwarfRegNum<[9]>; +def r10 : GR< 10, "r10">, DwarfRegNum<[10]>; +def r11 : GR< 11, "r11">, DwarfRegNum<[11]>; +def r12 : GR< 12, "r12">, DwarfRegNum<[12]>; +def r13 : GR< 13, "r13">, DwarfRegNum<[13]>; +def r14 : GR< 14, "r14">, DwarfRegNum<[14]>; +def r15 : GR< 15, "r15">, DwarfRegNum<[15]>; +def r16 : GR< 16, "r16">, DwarfRegNum<[16]>; +def r17 : GR< 17, "r17">, DwarfRegNum<[17]>; +def r18 : GR< 18, "r18">, DwarfRegNum<[18]>; +def r19 : GR< 19, "r19">, DwarfRegNum<[19]>; +def r20 : GR< 20, "r20">, DwarfRegNum<[20]>; +def r21 : GR< 21, "r21">, DwarfRegNum<[21]>; +def r22 : GR< 22, "r22">, DwarfRegNum<[22]>; +def r23 : GR< 23, "r23">, DwarfRegNum<[23]>; +def r24 : GR< 24, "r24">, DwarfRegNum<[24]>; +def r25 : GR< 25, "r25">, DwarfRegNum<[25]>; +def r26 : GR< 26, "r26">, DwarfRegNum<[26]>; +def r27 : GR< 27, "r27">, DwarfRegNum<[27]>; +def r28 : GR< 28, "r28">, DwarfRegNum<[28]>; +def r29 : GR< 29, "r29">, DwarfRegNum<[29]>; +def r30 : GR< 30, "r30">, DwarfRegNum<[30]>; +def r31 : GR< 31, "r31">, DwarfRegNum<[31]>; +def r32 : GR< 32, "r32">, DwarfRegNum<[32]>; +def r33 : GR< 33, "r33">, DwarfRegNum<[33]>; +def r34 : GR< 34, "r34">, DwarfRegNum<[34]>; +def r35 : GR< 35, "r35">, DwarfRegNum<[35]>; +def r36 : GR< 36, "r36">, DwarfRegNum<[36]>; +def r37 : GR< 37, "r37">, DwarfRegNum<[37]>; +def r38 : GR< 38, "r38">, DwarfRegNum<[38]>; +def r39 : GR< 39, "r39">, DwarfRegNum<[39]>; +def r40 : GR< 40, "r40">, DwarfRegNum<[40]>; +def r41 : GR< 41, "r41">, DwarfRegNum<[41]>; +def r42 : GR< 42, "r42">, DwarfRegNum<[42]>; +def r43 : GR< 43, "r43">, DwarfRegNum<[43]>; +def r44 : GR< 44, "r44">, DwarfRegNum<[44]>; +def r45 : GR< 45, "r45">, DwarfRegNum<[45]>; +def r46 : GR< 46, "r46">, DwarfRegNum<[46]>; +def r47 : GR< 47, "r47">, DwarfRegNum<[47]>; +def r48 : GR< 48, "r48">, DwarfRegNum<[48]>; +def r49 : GR< 49, "r49">, DwarfRegNum<[49]>; +def r50 : GR< 50, "r50">, DwarfRegNum<[50]>; +def r51 : GR< 51, "r51">, DwarfRegNum<[51]>; +def r52 : GR< 52, "r52">, DwarfRegNum<[52]>; +def r53 : GR< 53, "r53">, DwarfRegNum<[53]>; +def r54 : GR< 54, "r54">, DwarfRegNum<[54]>; +def r55 : GR< 55, "r55">, DwarfRegNum<[55]>; +def r56 : GR< 56, "r56">, DwarfRegNum<[56]>; +def r57 : GR< 57, "r57">, DwarfRegNum<[57]>; +def r58 : GR< 58, "r58">, DwarfRegNum<[58]>; +def r59 : GR< 59, "r59">, DwarfRegNum<[59]>; +def r60 : GR< 60, "r60">, DwarfRegNum<[60]>; +def r61 : GR< 61, "r61">, DwarfRegNum<[61]>; +def r62 : GR< 62, "r62">, DwarfRegNum<[62]>; +def r63 : GR< 63, "r63">, DwarfRegNum<[63]>; +def r64 : GR< 64, "r64">, DwarfRegNum<[64]>; +def r65 : GR< 65, "r65">, DwarfRegNum<[65]>; +def r66 : GR< 66, "r66">, DwarfRegNum<[66]>; +def r67 : GR< 67, "r67">, DwarfRegNum<[67]>; +def r68 : GR< 68, "r68">, DwarfRegNum<[68]>; +def r69 : GR< 69, "r69">, DwarfRegNum<[69]>; +def r70 : GR< 70, "r70">, DwarfRegNum<[70]>; +def r71 : GR< 71, "r71">, DwarfRegNum<[71]>; +def r72 : GR< 72, "r72">, DwarfRegNum<[72]>; +def r73 : GR< 73, "r73">, DwarfRegNum<[73]>; +def r74 : GR< 74, "r74">, DwarfRegNum<[74]>; +def r75 : GR< 75, "r75">, DwarfRegNum<[75]>; +def r76 : GR< 76, "r76">, DwarfRegNum<[76]>; +def r77 : GR< 77, "r77">, DwarfRegNum<[77]>; +def r78 : GR< 78, "r78">, DwarfRegNum<[78]>; +def r79 : GR< 79, "r79">, DwarfRegNum<[79]>; +def r80 : GR< 80, "r80">, DwarfRegNum<[80]>; +def r81 : GR< 81, "r81">, DwarfRegNum<[81]>; +def r82 : GR< 82, "r82">, DwarfRegNum<[82]>; +def r83 : GR< 83, "r83">, DwarfRegNum<[83]>; +def r84 : GR< 84, "r84">, DwarfRegNum<[84]>; +def r85 : GR< 85, "r85">, DwarfRegNum<[85]>; +def r86 : GR< 86, "r86">, DwarfRegNum<[86]>; +def r87 : GR< 87, "r87">, DwarfRegNum<[87]>; +def r88 : GR< 88, "r88">, DwarfRegNum<[88]>; +def r89 : GR< 89, "r89">, DwarfRegNum<[89]>; +def r90 : GR< 90, "r90">, DwarfRegNum<[90]>; +def r91 : GR< 91, "r91">, DwarfRegNum<[91]>; +def r92 : GR< 92, "r92">, DwarfRegNum<[92]>; +def r93 : GR< 93, "r93">, DwarfRegNum<[93]>; +def r94 : GR< 94, "r94">, DwarfRegNum<[94]>; +def r95 : GR< 95, "r95">, DwarfRegNum<[95]>; +def r96 : GR< 96, "r96">, DwarfRegNum<[96]>; +def r97 : GR< 97, "r97">, DwarfRegNum<[97]>; +def r98 : GR< 98, "r98">, DwarfRegNum<[98]>; +def r99 : GR< 99, "r99">, DwarfRegNum<[99]>; +def r100 : GR< 100, "r100">, DwarfRegNum<[100]>; +def r101 : GR< 101, "r101">, DwarfRegNum<[101]>; +def r102 : GR< 102, "r102">, DwarfRegNum<[102]>; +def r103 : GR< 103, "r103">, DwarfRegNum<[103]>; +def r104 : GR< 104, "r104">, DwarfRegNum<[104]>; +def r105 : GR< 105, "r105">, DwarfRegNum<[105]>; +def r106 : GR< 106, "r106">, DwarfRegNum<[106]>; +def r107 : GR< 107, "r107">, DwarfRegNum<[107]>; +def r108 : GR< 108, "r108">, DwarfRegNum<[108]>; +def r109 : GR< 109, "r109">, DwarfRegNum<[109]>; +def r110 : GR< 110, "r110">, DwarfRegNum<[110]>; +def r111 : GR< 111, "r111">, DwarfRegNum<[111]>; +def r112 : GR< 112, "r112">, DwarfRegNum<[112]>; +def r113 : GR< 113, "r113">, DwarfRegNum<[113]>; +def r114 : GR< 114, "r114">, DwarfRegNum<[114]>; +def r115 : GR< 115, "r115">, DwarfRegNum<[115]>; +def r116 : GR< 116, "r116">, DwarfRegNum<[116]>; +def r117 : GR< 117, "r117">, DwarfRegNum<[117]>; +def r118 : GR< 118, "r118">, DwarfRegNum<[118]>; +def r119 : GR< 119, "r119">, DwarfRegNum<[119]>; +def r120 : GR< 120, "r120">, DwarfRegNum<[120]>; +def r121 : GR< 121, "r121">, DwarfRegNum<[121]>; +def r122 : GR< 122, "r122">, DwarfRegNum<[122]>; +def r123 : GR< 123, "r123">, DwarfRegNum<[123]>; +def r124 : GR< 124, "r124">, DwarfRegNum<[124]>; +def r125 : GR< 125, "r125">, DwarfRegNum<[125]>; +def r126 : GR< 126, "r126">, DwarfRegNum<[126]>; +def r127 : GR< 127, "r127">, DwarfRegNum<[127]>; + +/* floating-point registers */ +def F0 : FP< 0, "f0">, DwarfRegNum<[128]>; +def F1 : FP< 1, "f1">, DwarfRegNum<[129]>; +def F2 : FP< 2, "f2">, DwarfRegNum<[130]>; +def F3 : FP< 3, "f3">, DwarfRegNum<[131]>; +def F4 : FP< 4, "f4">, DwarfRegNum<[132]>; +def F5 : FP< 5, "f5">, DwarfRegNum<[133]>; +def F6 : FP< 6, "f6">, DwarfRegNum<[134]>; +def F7 : FP< 7, "f7">, DwarfRegNum<[135]>; +def F8 : FP< 8, "f8">, DwarfRegNum<[136]>; +def F9 : FP< 9, "f9">, DwarfRegNum<[137]>; +def F10 : FP< 10, "f10">, DwarfRegNum<[138]>; +def F11 : FP< 11, "f11">, DwarfRegNum<[139]>; +def F12 : FP< 12, "f12">, DwarfRegNum<[140]>; +def F13 : FP< 13, "f13">, DwarfRegNum<[141]>; +def F14 : FP< 14, "f14">, DwarfRegNum<[142]>; +def F15 : FP< 15, "f15">, DwarfRegNum<[143]>; +def F16 : FP< 16, "f16">, DwarfRegNum<[144]>; +def F17 : FP< 17, "f17">, DwarfRegNum<[145]>; +def F18 : FP< 18, "f18">, DwarfRegNum<[146]>; +def F19 : FP< 19, "f19">, DwarfRegNum<[147]>; +def F20 : FP< 20, "f20">, DwarfRegNum<[148]>; +def F21 : FP< 21, "f21">, DwarfRegNum<[149]>; +def F22 : FP< 22, "f22">, DwarfRegNum<[150]>; +def F23 : FP< 23, "f23">, DwarfRegNum<[151]>; +def F24 : FP< 24, "f24">, DwarfRegNum<[152]>; +def F25 : FP< 25, "f25">, DwarfRegNum<[153]>; +def F26 : FP< 26, "f26">, DwarfRegNum<[154]>; +def F27 : FP< 27, "f27">, DwarfRegNum<[155]>; +def F28 : FP< 28, "f28">, DwarfRegNum<[156]>; +def F29 : FP< 29, "f29">, DwarfRegNum<[157]>; +def F30 : FP< 30, "f30">, DwarfRegNum<[158]>; +def F31 : FP< 31, "f31">, DwarfRegNum<[159]>; +def F32 : FP< 32, "f32">, DwarfRegNum<[160]>; +def F33 : FP< 33, "f33">, DwarfRegNum<[161]>; +def F34 : FP< 34, "f34">, DwarfRegNum<[162]>; +def F35 : FP< 35, "f35">, DwarfRegNum<[163]>; +def F36 : FP< 36, "f36">, DwarfRegNum<[164]>; +def F37 : FP< 37, "f37">, DwarfRegNum<[165]>; +def F38 : FP< 38, "f38">, DwarfRegNum<[166]>; +def F39 : FP< 39, "f39">, DwarfRegNum<[167]>; +def F40 : FP< 40, "f40">, DwarfRegNum<[168]>; +def F41 : FP< 41, "f41">, DwarfRegNum<[169]>; +def F42 : FP< 42, "f42">, DwarfRegNum<[170]>; +def F43 : FP< 43, "f43">, DwarfRegNum<[171]>; +def F44 : FP< 44, "f44">, DwarfRegNum<[172]>; +def F45 : FP< 45, "f45">, DwarfRegNum<[173]>; +def F46 : FP< 46, "f46">, DwarfRegNum<[174]>; +def F47 : FP< 47, "f47">, DwarfRegNum<[175]>; +def F48 : FP< 48, "f48">, DwarfRegNum<[176]>; +def F49 : FP< 49, "f49">, DwarfRegNum<[177]>; +def F50 : FP< 50, "f50">, DwarfRegNum<[178]>; +def F51 : FP< 51, "f51">, DwarfRegNum<[179]>; +def F52 : FP< 52, "f52">, DwarfRegNum<[180]>; +def F53 : FP< 53, "f53">, DwarfRegNum<[181]>; +def F54 : FP< 54, "f54">, DwarfRegNum<[182]>; +def F55 : FP< 55, "f55">, DwarfRegNum<[183]>; +def F56 : FP< 56, "f56">, DwarfRegNum<[184]>; +def F57 : FP< 57, "f57">, DwarfRegNum<[185]>; +def F58 : FP< 58, "f58">, DwarfRegNum<[186]>; +def F59 : FP< 59, "f59">, DwarfRegNum<[187]>; +def F60 : FP< 60, "f60">, DwarfRegNum<[188]>; +def F61 : FP< 61, "f61">, DwarfRegNum<[189]>; +def F62 : FP< 62, "f62">, DwarfRegNum<[190]>; +def F63 : FP< 63, "f63">, DwarfRegNum<[191]>; +def F64 : FP< 64, "f64">, DwarfRegNum<[192]>; +def F65 : FP< 65, "f65">, DwarfRegNum<[193]>; +def F66 : FP< 66, "f66">, DwarfRegNum<[194]>; +def F67 : FP< 67, "f67">, DwarfRegNum<[195]>; +def F68 : FP< 68, "f68">, DwarfRegNum<[196]>; +def F69 : FP< 69, "f69">, DwarfRegNum<[197]>; +def F70 : FP< 70, "f70">, DwarfRegNum<[198]>; +def F71 : FP< 71, "f71">, DwarfRegNum<[199]>; +def F72 : FP< 72, "f72">, DwarfRegNum<[200]>; +def F73 : FP< 73, "f73">, DwarfRegNum<[201]>; +def F74 : FP< 74, "f74">, DwarfRegNum<[202]>; +def F75 : FP< 75, "f75">, DwarfRegNum<[203]>; +def F76 : FP< 76, "f76">, DwarfRegNum<[204]>; +def F77 : FP< 77, "f77">, DwarfRegNum<[205]>; +def F78 : FP< 78, "f78">, DwarfRegNum<[206]>; +def F79 : FP< 79, "f79">, DwarfRegNum<[207]>; +def F80 : FP< 80, "f80">, DwarfRegNum<[208]>; +def F81 : FP< 81, "f81">, DwarfRegNum<[209]>; +def F82 : FP< 82, "f82">, DwarfRegNum<[210]>; +def F83 : FP< 83, "f83">, DwarfRegNum<[211]>; +def F84 : FP< 84, "f84">, DwarfRegNum<[212]>; +def F85 : FP< 85, "f85">, DwarfRegNum<[213]>; +def F86 : FP< 86, "f86">, DwarfRegNum<[214]>; +def F87 : FP< 87, "f87">, DwarfRegNum<[215]>; +def F88 : FP< 88, "f88">, DwarfRegNum<[216]>; +def F89 : FP< 89, "f89">, DwarfRegNum<[217]>; +def F90 : FP< 90, "f90">, DwarfRegNum<[218]>; +def F91 : FP< 91, "f91">, DwarfRegNum<[219]>; +def F92 : FP< 92, "f92">, DwarfRegNum<[220]>; +def F93 : FP< 93, "f93">, DwarfRegNum<[221]>; +def F94 : FP< 94, "f94">, DwarfRegNum<[222]>; +def F95 : FP< 95, "f95">, DwarfRegNum<[223]>; +def F96 : FP< 96, "f96">, DwarfRegNum<[224]>; +def F97 : FP< 97, "f97">, DwarfRegNum<[225]>; +def F98 : FP< 98, "f98">, DwarfRegNum<[226]>; +def F99 : FP< 99, "f99">, DwarfRegNum<[227]>; +def F100 : FP< 100, "f100">, DwarfRegNum<[228]>; +def F101 : FP< 101, "f101">, DwarfRegNum<[229]>; +def F102 : FP< 102, "f102">, DwarfRegNum<[230]>; +def F103 : FP< 103, "f103">, DwarfRegNum<[231]>; +def F104 : FP< 104, "f104">, DwarfRegNum<[232]>; +def F105 : FP< 105, "f105">, DwarfRegNum<[233]>; +def F106 : FP< 106, "f106">, DwarfRegNum<[234]>; +def F107 : FP< 107, "f107">, DwarfRegNum<[235]>; +def F108 : FP< 108, "f108">, DwarfRegNum<[236]>; +def F109 : FP< 109, "f109">, DwarfRegNum<[237]>; +def F110 : FP< 110, "f110">, DwarfRegNum<[238]>; +def F111 : FP< 111, "f111">, DwarfRegNum<[239]>; +def F112 : FP< 112, "f112">, DwarfRegNum<[240]>; +def F113 : FP< 113, "f113">, DwarfRegNum<[241]>; +def F114 : FP< 114, "f114">, DwarfRegNum<[242]>; +def F115 : FP< 115, "f115">, DwarfRegNum<[243]>; +def F116 : FP< 116, "f116">, DwarfRegNum<[244]>; +def F117 : FP< 117, "f117">, DwarfRegNum<[245]>; +def F118 : FP< 118, "f118">, DwarfRegNum<[246]>; +def F119 : FP< 119, "f119">, DwarfRegNum<[247]>; +def F120 : FP< 120, "f120">, DwarfRegNum<[248]>; +def F121 : FP< 121, "f121">, DwarfRegNum<[249]>; +def F122 : FP< 122, "f122">, DwarfRegNum<[250]>; +def F123 : FP< 123, "f123">, DwarfRegNum<[251]>; +def F124 : FP< 124, "f124">, DwarfRegNum<[252]>; +def F125 : FP< 125, "f125">, DwarfRegNum<[253]>; +def F126 : FP< 126, "f126">, DwarfRegNum<[254]>; +def F127 : FP< 127, "f127">, DwarfRegNum<[255]>; + +/* predicate registers */ +def p0 : PR< 0, "p0">, DwarfRegNum<[256]>; +def p1 : PR< 1, "p1">, DwarfRegNum<[257]>; +def p2 : PR< 2, "p2">, DwarfRegNum<[258]>; +def p3 : PR< 3, "p3">, DwarfRegNum<[259]>; +def p4 : PR< 4, "p4">, DwarfRegNum<[260]>; +def p5 : PR< 5, "p5">, DwarfRegNum<[261]>; +def p6 : PR< 6, "p6">, DwarfRegNum<[262]>; +def p7 : PR< 7, "p7">, DwarfRegNum<[263]>; +def p8 : PR< 8, "p8">, DwarfRegNum<[264]>; +def p9 : PR< 9, "p9">, DwarfRegNum<[265]>; +def p10 : PR< 10, "p10">, DwarfRegNum<[266]>; +def p11 : PR< 11, "p11">, DwarfRegNum<[267]>; +def p12 : PR< 12, "p12">, DwarfRegNum<[268]>; +def p13 : PR< 13, "p13">, DwarfRegNum<[269]>; +def p14 : PR< 14, "p14">, DwarfRegNum<[270]>; +def p15 : PR< 15, "p15">, DwarfRegNum<[271]>; +def p16 : PR< 16, "p16">, DwarfRegNum<[272]>; +def p17 : PR< 17, "p17">, DwarfRegNum<[273]>; +def p18 : PR< 18, "p18">, DwarfRegNum<[274]>; +def p19 : PR< 19, "p19">, DwarfRegNum<[275]>; +def p20 : PR< 20, "p20">, DwarfRegNum<[276]>; +def p21 : PR< 21, "p21">, DwarfRegNum<[277]>; +def p22 : PR< 22, "p22">, DwarfRegNum<[278]>; +def p23 : PR< 23, "p23">, DwarfRegNum<[279]>; +def p24 : PR< 24, "p24">, DwarfRegNum<[280]>; +def p25 : PR< 25, "p25">, DwarfRegNum<[281]>; +def p26 : PR< 26, "p26">, DwarfRegNum<[282]>; +def p27 : PR< 27, "p27">, DwarfRegNum<[283]>; +def p28 : PR< 28, "p28">, DwarfRegNum<[284]>; +def p29 : PR< 29, "p29">, DwarfRegNum<[285]>; +def p30 : PR< 30, "p30">, DwarfRegNum<[286]>; +def p31 : PR< 31, "p31">, DwarfRegNum<[287]>; +def p32 : PR< 32, "p32">, DwarfRegNum<[288]>; +def p33 : PR< 33, "p33">, DwarfRegNum<[289]>; +def p34 : PR< 34, "p34">, DwarfRegNum<[290]>; +def p35 : PR< 35, "p35">, DwarfRegNum<[291]>; +def p36 : PR< 36, "p36">, DwarfRegNum<[292]>; +def p37 : PR< 37, "p37">, DwarfRegNum<[293]>; +def p38 : PR< 38, "p38">, DwarfRegNum<[294]>; +def p39 : PR< 39, "p39">, DwarfRegNum<[295]>; +def p40 : PR< 40, "p40">, DwarfRegNum<[296]>; +def p41 : PR< 41, "p41">, DwarfRegNum<[297]>; +def p42 : PR< 42, "p42">, DwarfRegNum<[298]>; +def p43 : PR< 43, "p43">, DwarfRegNum<[299]>; +def p44 : PR< 44, "p44">, DwarfRegNum<[300]>; +def p45 : PR< 45, "p45">, DwarfRegNum<[301]>; +def p46 : PR< 46, "p46">, DwarfRegNum<[302]>; +def p47 : PR< 47, "p47">, DwarfRegNum<[303]>; +def p48 : PR< 48, "p48">, DwarfRegNum<[304]>; +def p49 : PR< 49, "p49">, DwarfRegNum<[305]>; +def p50 : PR< 50, "p50">, DwarfRegNum<[306]>; +def p51 : PR< 51, "p51">, DwarfRegNum<[307]>; +def p52 : PR< 52, "p52">, DwarfRegNum<[308]>; +def p53 : PR< 53, "p53">, DwarfRegNum<[309]>; +def p54 : PR< 54, "p54">, DwarfRegNum<[310]>; +def p55 : PR< 55, "p55">, DwarfRegNum<[311]>; +def p56 : PR< 56, "p56">, DwarfRegNum<[312]>; +def p57 : PR< 57, "p57">, DwarfRegNum<[313]>; +def p58 : PR< 58, "p58">, DwarfRegNum<[314]>; +def p59 : PR< 59, "p59">, DwarfRegNum<[315]>; +def p60 : PR< 60, "p60">, DwarfRegNum<[316]>; +def p61 : PR< 61, "p61">, DwarfRegNum<[317]>; +def p62 : PR< 62, "p62">, DwarfRegNum<[318]>; +def p63 : PR< 63, "p63">, DwarfRegNum<[319]>; + +// XXX : this is temporary, we'll eventually have the output registers +// in the general purpose register class too? +def out0 : GR<0, "out0">, DwarfRegNum<[120]>; +def out1 : GR<1, "out1">, DwarfRegNum<[121]>; +def out2 : GR<2, "out2">, DwarfRegNum<[122]>; +def out3 : GR<3, "out3">, DwarfRegNum<[123]>; +def out4 : GR<4, "out4">, DwarfRegNum<[124]>; +def out5 : GR<5, "out5">, DwarfRegNum<[125]>; +def out6 : GR<6, "out6">, DwarfRegNum<[126]>; +def out7 : GR<7, "out7">, DwarfRegNum<[127]>; + +// application (special) registers: + +// "previous function state" application register +def AR_PFS : GR<0, "ar.pfs">, DwarfRegNum<[331]>; + +// "return pointer" (this is really branch register b0) +def rp : GR<0, "rp">, DwarfRegNum<[-1]>; + +// branch reg 6 +def B6 : GR<0, "b6">, DwarfRegNum<[326]>; + +//===----------------------------------------------------------------------===// +// Register Class Definitions... now that we have all of the pieces, define the +// top-level register classes. The order specified in the register list is +// implicitly defined to be the register allocation order. +// + +// these are the scratch (+stacked) general registers +// FIXME/XXX we also reserve a frame pointer (r5) +// FIXME/XXX we also reserve r2 for spilling/filling predicates +// in IA64RegisterInfo.cpp +// FIXME/XXX we also reserve r22 for calculating addresses +// in IA64RegisterInfo.cpp + +def GR : RegisterClass<"IA64", [i64], 64, + [ + +//FIXME!: for both readability and performance, we don't want the out +// registers to be the first ones allocated + + out7, out6, out5, out4, out3, out2, out1, out0, + r3, r8, r9, r10, r11, r14, r15, + r16, r17, r18, r19, r20, r21, r23, + r24, r25, r26, r27, r28, r29, r30, r31, + r32, r33, r34, r35, r36, r37, r38, r39, + r40, r41, r42, r43, r44, r45, r46, r47, + r48, r49, r50, r51, r52, r53, r54, r55, + r56, r57, r58, r59, r60, r61, r62, r63, + r64, r65, r66, r67, r68, r69, r70, r71, + r72, r73, r74, r75, r76, r77, r78, r79, + r80, r81, r82, r83, r84, r85, r86, r87, + r88, r89, r90, r91, r92, r93, r94, r95, + r96, r97, r98, r99, r100, r101, r102, r103, + r104, r105, r106, r107, r108, r109, r110, r111, + r112, r113, r114, r115, r116, r117, r118, r119, + // last 17 are special (look down) + r120, r121, r122, r123, r124, r125, r126, r127, + r0, r1, r2, r5, r12, r13, r22, rp, AR_PFS]> + { + let MethodProtos = [{ + iterator allocation_order_begin(const MachineFunction &MF) const; + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + GRClass::iterator + GRClass::allocation_order_begin(const MachineFunction &MF) const { + // hide the 8 out? registers appropriately: + return begin()+(8-(MF.getInfo()->outRegsUsed)); + } + + GRClass::iterator + GRClass::allocation_order_end(const MachineFunction &MF) const { + // the 9 special registers r0,r1,r2,r5,r12,r13 etc + int numReservedRegs=9; + + // we also can't allocate registers for use as locals if they're already + // required as 'out' registers + numReservedRegs+=MF.getInfo()->outRegsUsed; + return end()-numReservedRegs; // hide registers appropriately + } + }]; +} + + +// these are the scratch (+stacked) FP registers + +def FP : RegisterClass<"IA64", [f64], 64, + [F6, F7, + F8, F9, F10, F11, F12, F13, F14, F15, + F32, F33, F34, F35, F36, F37, F38, F39, + F40, F41, F42, F43, F44, F45, F46, F47, + F48, F49, F50, F51, F52, F53, F54, F55, + F56, F57, F58, F59, F60, F61, F62, F63, + F64, F65, F66, F67, F68, F69, F70, F71, + F72, F73, F74, F75, F76, F77, F78, F79, + F80, F81, F82, F83, F84, F85, F86, F87, + F88, F89, F90, F91, F92, F93, F94, F95, + F96, F97, F98, F99, F100, F101, F102, F103, + F104, F105, F106, F107, F108, F109, F110, F111, + F112, F113, F114, F115, F116, F117, F118, F119, + F120, F121, F122, F123, F124, F125, F126, F127, + F0, F1]> // these last two are hidden + { +// the 128s here are to make stf.spill/ldf.fill happy, +// when storing full (82-bit) FP regs to stack slots +// we need to 16-byte align + let Size=128; + let Alignment=128; + + let MethodProtos = [{ + iterator allocation_order_begin(const MachineFunction &MF) const; + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + FPClass::iterator + FPClass::allocation_order_begin(const MachineFunction &MF) const { + return begin(); // we don't hide any FP regs from the start + } + + FPClass::iterator + FPClass::allocation_order_end(const MachineFunction &MF) const { + return end()-2; // we hide regs F0, F1 from the end + } + }]; +} + +// these are the predicate registers, p0 (1/TRUE) is not here +def PR : RegisterClass<"IA64", [i1], 64, + +// for now, let's be wimps and only have the scratch predicate regs + [p6, p7, p8, p9, p10, p11, p12, p13, p14, p15]> { + let Size = 64; + } + +/* + [p1, p2, p3, p4, p5, p6, p7, + p8, p9, p10, p11, p12, p13, p14, p15, + p16, p17, p18, p19, p20, p21, p22, p23, + p24, p25, p26, p27, p28, p29, p30, p31, + p32, p33, p34, p35, p36, p37, p38, p39, + p40, p41, p42, p43, p44, p45, p46, p47, + p48, p49, p50, p51, p52, p53, p54, p55, + p56, p57, p58, p59, p60, p61, p62, p63]>; + */ diff --git a/lib/Target/IA64/IA64Subtarget.cpp b/lib/Target/IA64/IA64Subtarget.cpp new file mode 100644 index 000000000000..4eca50bdd22b --- /dev/null +++ b/lib/Target/IA64/IA64Subtarget.cpp @@ -0,0 +1,18 @@ +//===-- IA64Subtarget.cpp - IA64 Subtarget Information ----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the IA64 specific subclass of TargetSubtarget. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "subtarget" +#include "IA64Subtarget.h" +using namespace llvm; + +IA64Subtarget::IA64Subtarget() {} diff --git a/lib/Target/IA64/IA64Subtarget.h b/lib/Target/IA64/IA64Subtarget.h new file mode 100644 index 000000000000..0387af55119a --- /dev/null +++ b/lib/Target/IA64/IA64Subtarget.h @@ -0,0 +1,28 @@ +//====---- IA64Subtarget.h - Define Subtarget for the IA64 -----*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the IA64 specific subclass of TargetSubtarget. +// +//===----------------------------------------------------------------------===// + +#ifndef IA64SUBTARGET_H +#define IA64SUBTARGET_H + +#include "llvm/Target/TargetSubtarget.h" + +namespace llvm { + +class IA64Subtarget : public TargetSubtarget { +public: + IA64Subtarget(); +}; + +} // End llvm namespace + +#endif diff --git a/lib/Target/IA64/IA64TargetAsmInfo.cpp b/lib/Target/IA64/IA64TargetAsmInfo.cpp new file mode 100644 index 000000000000..2ae8beb9148e --- /dev/null +++ b/lib/Target/IA64/IA64TargetAsmInfo.cpp @@ -0,0 +1,44 @@ +//===-- IA64TargetAsmInfo.cpp - IA64 asm properties -------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declarations of the IA64TargetAsmInfo properties. +// +//===----------------------------------------------------------------------===// + +#include "IA64TargetAsmInfo.h" +#include "llvm/Constants.h" +#include "llvm/Target/TargetMachine.h" + +using namespace llvm; + +IA64TargetAsmInfo::IA64TargetAsmInfo(const TargetMachine &TM): + ELFTargetAsmInfo(TM) { + CommentString = "//"; + Data8bitsDirective = "\tdata1\t"; // FIXME: check that we are + Data16bitsDirective = "\tdata2.ua\t"; // disabling auto-alignment + Data32bitsDirective = "\tdata4.ua\t"; // properly + Data64bitsDirective = "\tdata8.ua\t"; + ZeroDirective = "\t.skip\t"; + AsciiDirective = "\tstring\t"; + + GlobalVarAddrPrefix=""; + GlobalVarAddrSuffix=""; + FunctionAddrPrefix="@fptr("; + FunctionAddrSuffix=")"; + + // FIXME: would be nice to have rodata (no 'w') when appropriate? + ConstantPoolSection = "\n\t.section .data, \"aw\", \"progbits\"\n"; +} + +unsigned IA64TargetAsmInfo::RelocBehaviour() const { + return (TM.getRelocationModel() != Reloc::Static ? + Reloc::LocalOrGlobal : Reloc::Global); +} + +// FIXME: Support small data/bss/rodata sections someday. diff --git a/lib/Target/IA64/IA64TargetAsmInfo.h b/lib/Target/IA64/IA64TargetAsmInfo.h new file mode 100644 index 000000000000..130822e887f4 --- /dev/null +++ b/lib/Target/IA64/IA64TargetAsmInfo.h @@ -0,0 +1,33 @@ +//=====-- IA64TargetAsmInfo.h - IA64 asm properties -----------*- C++ -*--====// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the IA64TargetAsmInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef IA64TARGETASMINFO_H +#define IA64TARGETASMINFO_H + +#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/Target/ELFTargetAsmInfo.h" + +namespace llvm { + + // Forward declaration. + class TargetMachine; + + struct IA64TargetAsmInfo : public ELFTargetAsmInfo { + explicit IA64TargetAsmInfo(const TargetMachine &TM); + virtual unsigned RelocBehaviour() const; + }; + + +} // namespace llvm + +#endif diff --git a/lib/Target/IA64/IA64TargetMachine.cpp b/lib/Target/IA64/IA64TargetMachine.cpp new file mode 100644 index 000000000000..878a00a44518 --- /dev/null +++ b/lib/Target/IA64/IA64TargetMachine.cpp @@ -0,0 +1,94 @@ +//===-- IA64TargetMachine.cpp - Define TargetMachine for IA64 -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the IA64 specific subclass of TargetMachine. +// +//===----------------------------------------------------------------------===// + +#include "IA64TargetAsmInfo.h" +#include "IA64TargetMachine.h" +#include "IA64.h" +#include "llvm/Module.h" +#include "llvm/PassManager.h" +#include "llvm/Target/TargetMachineRegistry.h" +using namespace llvm; + +/// IA64TargetMachineModule - Note that this is used on hosts that cannot link +/// in a library unless there are references into the library. In particular, +/// it seems that it is not possible to get things to work on Win32 without +/// this. Though it is unused, do not remove it. +extern "C" int IA64TargetMachineModule; +int IA64TargetMachineModule = 0; + +static RegisterTarget X("ia64", + "IA-64 (Itanium) [experimental]"); + +const TargetAsmInfo *IA64TargetMachine::createTargetAsmInfo() const { + return new IA64TargetAsmInfo(*this); +} + +unsigned IA64TargetMachine::getModuleMatchQuality(const Module &M) { + // we match [iI][aA]*64 + bool seenIA64=false; + std::string TT = M.getTargetTriple(); + + if (TT.size() >= 4) { + if( (TT[0]=='i' || TT[0]=='I') && + (TT[1]=='a' || TT[1]=='A') ) { + for(unsigned int i=2; i<(TT.size()-1); i++) + if(TT[i]=='6' && TT[i+1]=='4') + seenIA64=true; + } + + if (seenIA64) + return 20; // strong match + } + // If the target triple is something non-ia64, we don't match. + if (!TT.empty()) return 0; + +#if defined(__ia64__) || defined(__IA64__) + return 5; +#else + return 0; +#endif +} + +/// IA64TargetMachine ctor - Create an LP64 architecture model +/// +IA64TargetMachine::IA64TargetMachine(const Module &M, const std::string &FS) + : DataLayout("e-f80:128:128"), + FrameInfo(TargetFrameInfo::StackGrowsDown, 16, 0), + TLInfo(*this) { // FIXME? check this stuff +} + + +//===----------------------------------------------------------------------===// +// Pass Pipeline Configuration +//===----------------------------------------------------------------------===// + +bool IA64TargetMachine::addInstSelector(PassManagerBase &PM, + CodeGenOpt::Level OptLevel){ + PM.add(createIA64DAGToDAGInstructionSelector(*this)); + return false; +} + +bool IA64TargetMachine::addPreEmitPass(PassManagerBase &PM, + CodeGenOpt::Level OptLevel) { + // Make sure everything is bundled happily + PM.add(createIA64BundlingPass(*this)); + return true; +} +bool IA64TargetMachine::addAssemblyEmitter(PassManagerBase &PM, + CodeGenOpt::Level OptLevel, + bool Verbose, + raw_ostream &Out) { + PM.add(createIA64CodePrinterPass(Out, *this, OptLevel, Verbose)); + return false; +} + diff --git a/lib/Target/IA64/IA64TargetMachine.h b/lib/Target/IA64/IA64TargetMachine.h new file mode 100644 index 000000000000..29d625ce673a --- /dev/null +++ b/lib/Target/IA64/IA64TargetMachine.h @@ -0,0 +1,64 @@ +//===-- IA64TargetMachine.h - Define TargetMachine for IA64 ---*- C++ -*---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the IA64 specific subclass of TargetMachine. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_IA64TARGETMACHINE_H +#define LLVM_TARGET_IA64TARGETMACHINE_H + +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetFrameInfo.h" +#include "IA64InstrInfo.h" +#include "IA64ISelLowering.h" +#include "IA64Subtarget.h" + +namespace llvm { + +class IA64TargetMachine : public LLVMTargetMachine { + IA64Subtarget Subtarget; + const TargetData DataLayout; // Calculates type size & alignment + IA64InstrInfo InstrInfo; + TargetFrameInfo FrameInfo; + //IA64JITInfo JITInfo; + IA64TargetLowering TLInfo; + +protected: + virtual const TargetAsmInfo *createTargetAsmInfo() const; + +public: + IA64TargetMachine(const Module &M, const std::string &FS); + + virtual const IA64InstrInfo *getInstrInfo() const { return &InstrInfo; } + virtual const TargetFrameInfo *getFrameInfo() const { return &FrameInfo; } + virtual const IA64Subtarget *getSubtargetImpl() const { return &Subtarget; } + virtual IA64TargetLowering *getTargetLowering() const { + return const_cast(&TLInfo); + } + virtual const IA64RegisterInfo *getRegisterInfo() const { + return &InstrInfo.getRegisterInfo(); + } + virtual const TargetData *getTargetData() const { return &DataLayout; } + + static unsigned getModuleMatchQuality(const Module &M); + + // Pass Pipeline Configuration + virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel); + virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel); + virtual bool addAssemblyEmitter(PassManagerBase &PM, + CodeGenOpt::Level OptLevel, + bool Verbose, raw_ostream &Out); +}; +} // End llvm namespace + +#endif + + diff --git a/lib/Target/IA64/Makefile b/lib/Target/IA64/Makefile new file mode 100644 index 000000000000..d38325422c60 --- /dev/null +++ b/lib/Target/IA64/Makefile @@ -0,0 +1,20 @@ +##===- lib/Target/IA64/Makefile -----------------------------*- Makefile -*-===## +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../.. +LIBRARYNAME = LLVMIA64CodeGen +TARGET = IA64 +# Make sure that tblgen is run, first thing. +BUILT_SOURCES = IA64GenRegisterInfo.h.inc IA64GenRegisterNames.inc \ + IA64GenRegisterInfo.inc IA64GenInstrNames.inc \ + IA64GenInstrInfo.inc IA64GenAsmWriter.inc \ + IA64GenDAGISel.inc + +DIRS = AsmPrinter + +include $(LEVEL)/Makefile.common + diff --git a/lib/Target/IA64/README b/lib/Target/IA64/README new file mode 100644 index 000000000000..60761ac11d97 --- /dev/null +++ b/lib/Target/IA64/README @@ -0,0 +1,48 @@ +TODO: + - Un-bitrot ISel + - Hook up If-Conversion a la ARM target + - Hook up all branch analysis functions + - Instruction scheduling + - Bundling + - Dynamic Optimization + - Testing and bugfixing + - stop passing FP args in both FP *and* integer regs when not required + - allocate low (nonstacked) registers more aggressively + - clean up and thoroughly test the isel patterns. + - fix stacked register allocation order: (for readability) we don't want + the out? registers being the first ones used + - fix up floating point + (nb http://gcc.gnu.org/wiki?pagename=ia64%20floating%20point ) + - bundling! + (we will avoid the mess that is: + http://gcc.gnu.org/ml/gcc/2003-12/msg00832.html ) + - instruction scheduling (hmmmm! ;) + - counted loop support + - make integer + FP mul/div more clever (we have fixed pseudocode atm) + - track and use comparison complements + +INFO: + - we are strictly LP64 here, no support for ILP32 on HP-UX. Linux users + don't need to worry about this. + - i have instruction scheduling/bundling pseudocode, that really works + (has been tested, albeit at the perl-script level). + so, before you go write your own, send me an email! + +KNOWN DEFECTS AT THE CURRENT TIME: + - C++ vtables contain naked function pointers, not function descriptors, + which is bad. see http://llvm.cs.uiuc.edu/bugs/show_bug.cgi?id=406 + - varargs are broken + - alloca doesn't work (indeed, stack frame layout is bogus) + - no support for big-endian environments + - (not really the backend, but...) the CFE has some issues on IA64. + these will probably be fixed soon. + +ACKNOWLEDGEMENTS: + - Chris Lattner (x100) + - Other LLVM developers ("hey, that looks familiar") + +CONTACT: + - You can email me at duraid@octopus.com.au. If you find a small bug, + just email me. If you find a big bug, please file a bug report + in bugzilla! http://llvm.cs.uiuc.edu is your one stop shop for all + things LLVM. diff --git a/lib/Target/MSIL/CMakeLists.txt b/lib/Target/MSIL/CMakeLists.txt new file mode 100644 index 000000000000..b1d47ef05ec5 --- /dev/null +++ b/lib/Target/MSIL/CMakeLists.txt @@ -0,0 +1,3 @@ +add_llvm_target(MSIL + MSILWriter.cpp + ) diff --git a/lib/Target/MSIL/MSILWriter.cpp b/lib/Target/MSIL/MSILWriter.cpp new file mode 100644 index 000000000000..ada851d4f226 --- /dev/null +++ b/lib/Target/MSIL/MSILWriter.cpp @@ -0,0 +1,1680 @@ +//===-- MSILWriter.cpp - Library for converting LLVM code to MSIL ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This library converts LLVM code to MSIL code. +// +//===----------------------------------------------------------------------===// + +#include "MSILWriter.h" +#include "llvm/CallingConv.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Intrinsics.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/TypeSymbolTable.h" +#include "llvm/Analysis/ConstantsScanner.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/InstVisitor.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/CodeGen/Passes.h" + +namespace { + // TargetMachine for the MSIL + struct VISIBILITY_HIDDEN MSILTarget : public TargetMachine { + const TargetData DataLayout; // Calculates type size & alignment + + MSILTarget(const Module &M, const std::string &FS) + : DataLayout(&M) {} + + virtual bool WantsWholeFile() const { return true; } + virtual bool addPassesToEmitWholeFile(PassManager &PM, raw_ostream &Out, + CodeGenFileType FileType, + CodeGenOpt::Level OptLevel); + + // This class always works, but shouldn't be the default in most cases. + static unsigned getModuleMatchQuality(const Module &M) { return 1; } + + virtual const TargetData *getTargetData() const { return &DataLayout; } + }; +} + +/// MSILTargetMachineModule - Note that this is used on hosts that +/// cannot link in a library unless there are references into the +/// library. In particular, it seems that it is not possible to get +/// things to work on Win32 without this. Though it is unused, do not +/// remove it. +extern "C" int MSILTargetMachineModule; +int MSILTargetMachineModule = 0; + +static RegisterTarget X("msil", "MSIL backend"); + +bool MSILModule::runOnModule(Module &M) { + ModulePtr = &M; + TD = &getAnalysis(); + bool Changed = false; + // Find named types. + TypeSymbolTable& Table = M.getTypeSymbolTable(); + std::set Types = getAnalysis().getTypes(); + for (TypeSymbolTable::iterator I = Table.begin(), E = Table.end(); I!=E; ) { + if (!isa(I->second) && !isa(I->second)) + Table.remove(I++); + else { + std::set::iterator T = Types.find(I->second); + if (T==Types.end()) + Table.remove(I++); + else { + Types.erase(T); + ++I; + } + } + } + // Find unnamed types. + unsigned RenameCounter = 0; + for (std::set::const_iterator I = Types.begin(), + E = Types.end(); I!=E; ++I) + if (const StructType *STy = dyn_cast(*I)) { + while (ModulePtr->addTypeName("unnamed$"+utostr(RenameCounter), STy)) + ++RenameCounter; + Changed = true; + } + // Pointer for FunctionPass. + UsedTypes = &getAnalysis().getTypes(); + return Changed; +} + +char MSILModule::ID = 0; +char MSILWriter::ID = 0; + +bool MSILWriter::runOnFunction(Function &F) { + if (F.isDeclaration()) return false; + + // Do not codegen any 'available_externally' functions at all, they have + // definitions outside the translation unit. + if (F.hasAvailableExternallyLinkage()) + return false; + + LInfo = &getAnalysis(); + printFunction(F); + return false; +} + + +bool MSILWriter::doInitialization(Module &M) { + ModulePtr = &M; + Mang = new Mangler(M); + Out << ".assembly extern mscorlib {}\n"; + Out << ".assembly MSIL {}\n\n"; + Out << "// External\n"; + printExternals(); + Out << "// Declarations\n"; + printDeclarations(M.getTypeSymbolTable()); + Out << "// Definitions\n"; + printGlobalVariables(); + Out << "// Startup code\n"; + printModuleStartup(); + return false; +} + + +bool MSILWriter::doFinalization(Module &M) { + delete Mang; + return false; +} + + +void MSILWriter::printModuleStartup() { + Out << + ".method static public int32 $MSIL_Startup() {\n" + "\t.entrypoint\n" + "\t.locals (native int i)\n" + "\t.locals (native int argc)\n" + "\t.locals (native int ptr)\n" + "\t.locals (void* argv)\n" + "\t.locals (string[] args)\n" + "\tcall\tstring[] [mscorlib]System.Environment::GetCommandLineArgs()\n" + "\tdup\n" + "\tstloc\targs\n" + "\tldlen\n" + "\tconv.i4\n" + "\tdup\n" + "\tstloc\targc\n"; + printPtrLoad(TD->getPointerSize()); + Out << + "\tmul\n" + "\tlocalloc\n" + "\tstloc\targv\n" + "\tldc.i4.0\n" + "\tstloc\ti\n" + "L_01:\n" + "\tldloc\ti\n" + "\tldloc\targc\n" + "\tceq\n" + "\tbrtrue\tL_02\n" + "\tldloc\targs\n" + "\tldloc\ti\n" + "\tldelem.ref\n" + "\tcall\tnative int [mscorlib]System.Runtime.InteropServices.Marshal::" + "StringToHGlobalAnsi(string)\n" + "\tstloc\tptr\n" + "\tldloc\targv\n" + "\tldloc\ti\n"; + printPtrLoad(TD->getPointerSize()); + Out << + "\tmul\n" + "\tadd\n" + "\tldloc\tptr\n" + "\tstind.i\n" + "\tldloc\ti\n" + "\tldc.i4.1\n" + "\tadd\n" + "\tstloc\ti\n" + "\tbr\tL_01\n" + "L_02:\n" + "\tcall void $MSIL_Init()\n"; + + // Call user 'main' function. + const Function* F = ModulePtr->getFunction("main"); + if (!F || F->isDeclaration()) { + Out << "\tldc.i4.0\n\tret\n}\n"; + return; + } + bool BadSig = true; + std::string Args(""); + Function::const_arg_iterator Arg1,Arg2; + + switch (F->arg_size()) { + case 0: + BadSig = false; + break; + case 1: + Arg1 = F->arg_begin(); + if (Arg1->getType()->isInteger()) { + Out << "\tldloc\targc\n"; + Args = getTypeName(Arg1->getType()); + BadSig = false; + } + break; + case 2: + Arg1 = Arg2 = F->arg_begin(); ++Arg2; + if (Arg1->getType()->isInteger() && + Arg2->getType()->getTypeID() == Type::PointerTyID) { + Out << "\tldloc\targc\n\tldloc\targv\n"; + Args = getTypeName(Arg1->getType())+","+getTypeName(Arg2->getType()); + BadSig = false; + } + break; + default: + BadSig = true; + } + + bool RetVoid = (F->getReturnType()->getTypeID() == Type::VoidTyID); + if (BadSig || (!F->getReturnType()->isInteger() && !RetVoid)) { + Out << "\tldc.i4.0\n"; + } else { + Out << "\tcall\t" << getTypeName(F->getReturnType()) << + getConvModopt(F->getCallingConv()) << "main(" << Args << ")\n"; + if (RetVoid) + Out << "\tldc.i4.0\n"; + else + Out << "\tconv.i4\n"; + } + Out << "\tret\n}\n"; +} + +bool MSILWriter::isZeroValue(const Value* V) { + if (const Constant *C = dyn_cast(V)) + return C->isNullValue(); + return false; +} + + +std::string MSILWriter::getValueName(const Value* V) { + // Name into the quotes allow control and space characters. + return "'"+Mang->getValueName(V)+"'"; +} + + +std::string MSILWriter::getLabelName(const std::string& Name) { + if (Name.find('.')!=std::string::npos) { + std::string Tmp(Name); + // Replace unaccepable characters in the label name. + for (std::string::iterator I = Tmp.begin(), E = Tmp.end(); I!=E; ++I) + if (*I=='.') *I = '@'; + return Tmp; + } + return Name; +} + + +std::string MSILWriter::getLabelName(const Value* V) { + return getLabelName(Mang->getValueName(V)); +} + + +std::string MSILWriter::getConvModopt(unsigned CallingConvID) { + switch (CallingConvID) { + case CallingConv::C: + case CallingConv::Cold: + case CallingConv::Fast: + return "modopt([mscorlib]System.Runtime.CompilerServices.CallConvCdecl) "; + case CallingConv::X86_FastCall: + return "modopt([mscorlib]System.Runtime.CompilerServices.CallConvFastcall) "; + case CallingConv::X86_StdCall: + return "modopt([mscorlib]System.Runtime.CompilerServices.CallConvStdcall) "; + default: + cerr << "CallingConvID = " << CallingConvID << '\n'; + assert(0 && "Unsupported calling convention"); + } + return ""; // Not reached +} + + +std::string MSILWriter::getArrayTypeName(Type::TypeID TyID, const Type* Ty) { + std::string Tmp = ""; + const Type* ElemTy = Ty; + assert(Ty->getTypeID()==TyID && "Invalid type passed"); + // Walk trought array element types. + for (;;) { + // Multidimensional array. + if (ElemTy->getTypeID()==TyID) { + if (const ArrayType* ATy = dyn_cast(ElemTy)) + Tmp += utostr(ATy->getNumElements()); + else if (const VectorType* VTy = dyn_cast(ElemTy)) + Tmp += utostr(VTy->getNumElements()); + ElemTy = cast(ElemTy)->getElementType(); + } + // Base element type found. + if (ElemTy->getTypeID()!=TyID) break; + Tmp += ","; + } + return getTypeName(ElemTy, false, true)+"["+Tmp+"]"; +} + + +std::string MSILWriter::getPrimitiveTypeName(const Type* Ty, bool isSigned) { + unsigned NumBits = 0; + switch (Ty->getTypeID()) { + case Type::VoidTyID: + return "void "; + case Type::IntegerTyID: + NumBits = getBitWidth(Ty); + if(NumBits==1) + return "bool "; + if (!isSigned) + return "unsigned int"+utostr(NumBits)+" "; + return "int"+utostr(NumBits)+" "; + case Type::FloatTyID: + return "float32 "; + case Type::DoubleTyID: + return "float64 "; + default: + cerr << "Type = " << *Ty << '\n'; + assert(0 && "Invalid primitive type"); + } + return ""; // Not reached +} + + +std::string MSILWriter::getTypeName(const Type* Ty, bool isSigned, + bool isNested) { + if (Ty->isPrimitiveType() || Ty->isInteger()) + return getPrimitiveTypeName(Ty,isSigned); + // FIXME: "OpaqueType" support + switch (Ty->getTypeID()) { + case Type::PointerTyID: + return "void* "; + case Type::StructTyID: + if (isNested) + return ModulePtr->getTypeName(Ty); + return "valuetype '"+ModulePtr->getTypeName(Ty)+"' "; + case Type::ArrayTyID: + if (isNested) + return getArrayTypeName(Ty->getTypeID(),Ty); + return "valuetype '"+getArrayTypeName(Ty->getTypeID(),Ty)+"' "; + case Type::VectorTyID: + if (isNested) + return getArrayTypeName(Ty->getTypeID(),Ty); + return "valuetype '"+getArrayTypeName(Ty->getTypeID(),Ty)+"' "; + default: + cerr << "Type = " << *Ty << '\n'; + assert(0 && "Invalid type in getTypeName()"); + } + return ""; // Not reached +} + + +MSILWriter::ValueType MSILWriter::getValueLocation(const Value* V) { + // Function argument + if (isa(V)) + return ArgumentVT; + // Function + else if (const Function* F = dyn_cast(V)) + return F->hasLocalLinkage() ? InternalVT : GlobalVT; + // Variable + else if (const GlobalVariable* G = dyn_cast(V)) + return G->hasLocalLinkage() ? InternalVT : GlobalVT; + // Constant + else if (isa(V)) + return isa(V) ? ConstExprVT : ConstVT; + // Local variable + return LocalVT; +} + + +std::string MSILWriter::getTypePostfix(const Type* Ty, bool Expand, + bool isSigned) { + unsigned NumBits = 0; + switch (Ty->getTypeID()) { + // Integer constant, expanding for stack operations. + case Type::IntegerTyID: + NumBits = getBitWidth(Ty); + // Expand integer value to "int32" or "int64". + if (Expand) return (NumBits<=32 ? "i4" : "i8"); + if (NumBits==1) return "i1"; + return (isSigned ? "i" : "u")+utostr(NumBits/8); + // Float constant. + case Type::FloatTyID: + return "r4"; + case Type::DoubleTyID: + return "r8"; + case Type::PointerTyID: + return "i"+utostr(TD->getTypeAllocSize(Ty)); + default: + cerr << "TypeID = " << Ty->getTypeID() << '\n'; + assert(0 && "Invalid type in TypeToPostfix()"); + } + return ""; // Not reached +} + + +void MSILWriter::printConvToPtr() { + switch (ModulePtr->getPointerSize()) { + case Module::Pointer32: + printSimpleInstruction("conv.u4"); + break; + case Module::Pointer64: + printSimpleInstruction("conv.u8"); + break; + default: + assert(0 && "Module use not supporting pointer size"); + } +} + + +void MSILWriter::printPtrLoad(uint64_t N) { + switch (ModulePtr->getPointerSize()) { + case Module::Pointer32: + printSimpleInstruction("ldc.i4",utostr(N).c_str()); + // FIXME: Need overflow test? + if (!isUInt32(N)) { + cerr << "Value = " << utostr(N) << '\n'; + assert(0 && "32-bit pointer overflowed"); + } + break; + case Module::Pointer64: + printSimpleInstruction("ldc.i8",utostr(N).c_str()); + break; + default: + assert(0 && "Module use not supporting pointer size"); + } +} + + +void MSILWriter::printValuePtrLoad(const Value* V) { + printValueLoad(V); + printConvToPtr(); +} + + +void MSILWriter::printConstLoad(const Constant* C) { + if (const ConstantInt* CInt = dyn_cast(C)) { + // Integer constant + Out << "\tldc." << getTypePostfix(C->getType(),true) << '\t'; + if (CInt->isMinValue(true)) + Out << CInt->getSExtValue(); + else + Out << CInt->getZExtValue(); + } else if (const ConstantFP* FP = dyn_cast(C)) { + // Float constant + uint64_t X; + unsigned Size; + if (FP->getType()->getTypeID()==Type::FloatTyID) { + X = (uint32_t)FP->getValueAPF().bitcastToAPInt().getZExtValue(); + Size = 4; + } else { + X = FP->getValueAPF().bitcastToAPInt().getZExtValue(); + Size = 8; + } + Out << "\tldc.r" << Size << "\t( " << utohexstr(X) << ')'; + } else if (isa(C)) { + // Undefined constant value = NULL. + printPtrLoad(0); + } else { + cerr << "Constant = " << *C << '\n'; + assert(0 && "Invalid constant value"); + } + Out << '\n'; +} + + +void MSILWriter::printValueLoad(const Value* V) { + MSILWriter::ValueType Location = getValueLocation(V); + switch (Location) { + // Global variable or function address. + case GlobalVT: + case InternalVT: + if (const Function* F = dyn_cast(V)) { + std::string Name = getConvModopt(F->getCallingConv())+getValueName(F); + printSimpleInstruction("ldftn", + getCallSignature(F->getFunctionType(),NULL,Name).c_str()); + } else { + std::string Tmp; + const Type* ElemTy = cast(V->getType())->getElementType(); + if (Location==GlobalVT && cast(V)->hasDLLImportLinkage()) { + Tmp = "void* "+getValueName(V); + printSimpleInstruction("ldsfld",Tmp.c_str()); + } else { + Tmp = getTypeName(ElemTy)+getValueName(V); + printSimpleInstruction("ldsflda",Tmp.c_str()); + } + } + break; + // Function argument. + case ArgumentVT: + printSimpleInstruction("ldarg",getValueName(V).c_str()); + break; + // Local function variable. + case LocalVT: + printSimpleInstruction("ldloc",getValueName(V).c_str()); + break; + // Constant value. + case ConstVT: + if (isa(V)) + printPtrLoad(0); + else + printConstLoad(cast(V)); + break; + // Constant expression. + case ConstExprVT: + printConstantExpr(cast(V)); + break; + default: + cerr << "Value = " << *V << '\n'; + assert(0 && "Invalid value location"); + } +} + + +void MSILWriter::printValueSave(const Value* V) { + switch (getValueLocation(V)) { + case ArgumentVT: + printSimpleInstruction("starg",getValueName(V).c_str()); + break; + case LocalVT: + printSimpleInstruction("stloc",getValueName(V).c_str()); + break; + default: + cerr << "Value = " << *V << '\n'; + assert(0 && "Invalid value location"); + } +} + + +void MSILWriter::printBinaryInstruction(const char* Name, const Value* Left, + const Value* Right) { + printValueLoad(Left); + printValueLoad(Right); + Out << '\t' << Name << '\n'; +} + + +void MSILWriter::printSimpleInstruction(const char* Inst, const char* Operand) { + if(Operand) + Out << '\t' << Inst << '\t' << Operand << '\n'; + else + Out << '\t' << Inst << '\n'; +} + + +void MSILWriter::printPHICopy(const BasicBlock* Src, const BasicBlock* Dst) { + for (BasicBlock::const_iterator I = Dst->begin(), E = Dst->end(); + isa(I); ++I) { + const PHINode* Phi = cast(I); + const Value* Val = Phi->getIncomingValueForBlock(Src); + if (isa(Val)) continue; + printValueLoad(Val); + printValueSave(Phi); + } +} + + +void MSILWriter::printBranchToBlock(const BasicBlock* CurrBB, + const BasicBlock* TrueBB, + const BasicBlock* FalseBB) { + if (TrueBB==FalseBB) { + // "TrueBB" and "FalseBB" destination equals + printPHICopy(CurrBB,TrueBB); + printSimpleInstruction("pop"); + printSimpleInstruction("br",getLabelName(TrueBB).c_str()); + } else if (FalseBB==NULL) { + // If "FalseBB" not used the jump have condition + printPHICopy(CurrBB,TrueBB); + printSimpleInstruction("brtrue",getLabelName(TrueBB).c_str()); + } else if (TrueBB==NULL) { + // If "TrueBB" not used the jump is unconditional + printPHICopy(CurrBB,FalseBB); + printSimpleInstruction("br",getLabelName(FalseBB).c_str()); + } else { + // Copy PHI instructions for each block + std::string TmpLabel; + // Print PHI instructions for "TrueBB" + if (isa(TrueBB->begin())) { + TmpLabel = getLabelName(TrueBB)+"$phi_"+utostr(getUniqID()); + printSimpleInstruction("brtrue",TmpLabel.c_str()); + } else { + printSimpleInstruction("brtrue",getLabelName(TrueBB).c_str()); + } + // Print PHI instructions for "FalseBB" + if (isa(FalseBB->begin())) { + printPHICopy(CurrBB,FalseBB); + printSimpleInstruction("br",getLabelName(FalseBB).c_str()); + } else { + printSimpleInstruction("br",getLabelName(FalseBB).c_str()); + } + if (isa(TrueBB->begin())) { + // Handle "TrueBB" PHI Copy + Out << TmpLabel << ":\n"; + printPHICopy(CurrBB,TrueBB); + printSimpleInstruction("br",getLabelName(TrueBB).c_str()); + } + } +} + + +void MSILWriter::printBranchInstruction(const BranchInst* Inst) { + if (Inst->isUnconditional()) { + printBranchToBlock(Inst->getParent(),NULL,Inst->getSuccessor(0)); + } else { + printValueLoad(Inst->getCondition()); + printBranchToBlock(Inst->getParent(),Inst->getSuccessor(0), + Inst->getSuccessor(1)); + } +} + + +void MSILWriter::printSelectInstruction(const Value* Cond, const Value* VTrue, + const Value* VFalse) { + std::string TmpLabel = std::string("select$true_")+utostr(getUniqID()); + printValueLoad(VTrue); + printValueLoad(Cond); + printSimpleInstruction("brtrue",TmpLabel.c_str()); + printSimpleInstruction("pop"); + printValueLoad(VFalse); + Out << TmpLabel << ":\n"; +} + + +void MSILWriter::printIndirectLoad(const Value* V) { + const Type* Ty = V->getType(); + printValueLoad(V); + if (const PointerType* P = dyn_cast(Ty)) + Ty = P->getElementType(); + std::string Tmp = "ldind."+getTypePostfix(Ty, false); + printSimpleInstruction(Tmp.c_str()); +} + + +void MSILWriter::printIndirectSave(const Value* Ptr, const Value* Val) { + printValueLoad(Ptr); + printValueLoad(Val); + printIndirectSave(Val->getType()); +} + + +void MSILWriter::printIndirectSave(const Type* Ty) { + // Instruction need signed postfix for any type. + std::string postfix = getTypePostfix(Ty, false); + if (*postfix.begin()=='u') *postfix.begin() = 'i'; + postfix = "stind."+postfix; + printSimpleInstruction(postfix.c_str()); +} + + +void MSILWriter::printCastInstruction(unsigned int Op, const Value* V, + const Type* Ty) { + std::string Tmp(""); + printValueLoad(V); + switch (Op) { + // Signed + case Instruction::SExt: + case Instruction::SIToFP: + case Instruction::FPToSI: + Tmp = "conv."+getTypePostfix(Ty,false,true); + printSimpleInstruction(Tmp.c_str()); + break; + // Unsigned + case Instruction::FPTrunc: + case Instruction::FPExt: + case Instruction::UIToFP: + case Instruction::Trunc: + case Instruction::ZExt: + case Instruction::FPToUI: + case Instruction::PtrToInt: + case Instruction::IntToPtr: + Tmp = "conv."+getTypePostfix(Ty,false); + printSimpleInstruction(Tmp.c_str()); + break; + // Do nothing + case Instruction::BitCast: + // FIXME: meaning that ld*/st* instruction do not change data format. + break; + default: + cerr << "Opcode = " << Op << '\n'; + assert(0 && "Invalid conversion instruction"); + } +} + + +void MSILWriter::printGepInstruction(const Value* V, gep_type_iterator I, + gep_type_iterator E) { + unsigned Size; + // Load address + printValuePtrLoad(V); + // Calculate element offset. + for (; I!=E; ++I){ + Size = 0; + const Value* IndexValue = I.getOperand(); + if (const StructType* StrucTy = dyn_cast(*I)) { + uint64_t FieldIndex = cast(IndexValue)->getZExtValue(); + // Offset is the sum of all previous structure fields. + for (uint64_t F = 0; FgetTypeAllocSize(StrucTy->getContainedType((unsigned)F)); + printPtrLoad(Size); + printSimpleInstruction("add"); + continue; + } else if (const SequentialType* SeqTy = dyn_cast(*I)) { + Size = TD->getTypeAllocSize(SeqTy->getElementType()); + } else { + Size = TD->getTypeAllocSize(*I); + } + // Add offset of current element to stack top. + if (!isZeroValue(IndexValue)) { + // Constant optimization. + if (const ConstantInt* C = dyn_cast(IndexValue)) { + if (C->getValue().isNegative()) { + printPtrLoad(C->getValue().abs().getZExtValue()*Size); + printSimpleInstruction("sub"); + continue; + } else + printPtrLoad(C->getZExtValue()*Size); + } else { + printPtrLoad(Size); + printValuePtrLoad(IndexValue); + printSimpleInstruction("mul"); + } + printSimpleInstruction("add"); + } + } +} + + +std::string MSILWriter::getCallSignature(const FunctionType* Ty, + const Instruction* Inst, + std::string Name) { + std::string Tmp(""); + if (Ty->isVarArg()) Tmp += "vararg "; + // Name and return type. + Tmp += getTypeName(Ty->getReturnType())+Name+"("; + // Function argument type list. + unsigned NumParams = Ty->getNumParams(); + for (unsigned I = 0; I!=NumParams; ++I) { + if (I!=0) Tmp += ","; + Tmp += getTypeName(Ty->getParamType(I)); + } + // CLR needs to know the exact amount of parameters received by vararg + // function, because caller cleans the stack. + if (Ty->isVarArg() && Inst) { + // Origin to function arguments in "CallInst" or "InvokeInst". + unsigned Org = isa(Inst) ? 3 : 1; + // Print variable argument types. + unsigned NumOperands = Inst->getNumOperands()-Org; + if (NumParamsgetOperand(J+Org)->getType()); + } + } + } + return Tmp+")"; +} + + +void MSILWriter::printFunctionCall(const Value* FnVal, + const Instruction* Inst) { + // Get function calling convention. + std::string Name = ""; + if (const CallInst* Call = dyn_cast(Inst)) + Name = getConvModopt(Call->getCallingConv()); + else if (const InvokeInst* Invoke = dyn_cast(Inst)) + Name = getConvModopt(Invoke->getCallingConv()); + else { + cerr << "Instruction = " << Inst->getName() << '\n'; + assert(0 && "Need \"Invoke\" or \"Call\" instruction only"); + } + if (const Function* F = dyn_cast(FnVal)) { + // Direct call. + Name += getValueName(F); + printSimpleInstruction("call", + getCallSignature(F->getFunctionType(),Inst,Name).c_str()); + } else { + // Indirect function call. + const PointerType* PTy = cast(FnVal->getType()); + const FunctionType* FTy = cast(PTy->getElementType()); + // Load function address. + printValueLoad(FnVal); + printSimpleInstruction("calli",getCallSignature(FTy,Inst,Name).c_str()); + } +} + + +void MSILWriter::printIntrinsicCall(const IntrinsicInst* Inst) { + std::string Name; + switch (Inst->getIntrinsicID()) { + case Intrinsic::vastart: + Name = getValueName(Inst->getOperand(1)); + Name.insert(Name.length()-1,"$valist"); + // Obtain the argument handle. + printSimpleInstruction("ldloca",Name.c_str()); + printSimpleInstruction("arglist"); + printSimpleInstruction("call", + "instance void [mscorlib]System.ArgIterator::.ctor" + "(valuetype [mscorlib]System.RuntimeArgumentHandle)"); + // Save as pointer type "void*" + printValueLoad(Inst->getOperand(1)); + printSimpleInstruction("ldloca",Name.c_str()); + printIndirectSave(PointerType::getUnqual(IntegerType::get(8))); + break; + case Intrinsic::vaend: + // Close argument list handle. + printIndirectLoad(Inst->getOperand(1)); + printSimpleInstruction("call","instance void [mscorlib]System.ArgIterator::End()"); + break; + case Intrinsic::vacopy: + // Copy "ArgIterator" valuetype. + printIndirectLoad(Inst->getOperand(1)); + printIndirectLoad(Inst->getOperand(2)); + printSimpleInstruction("cpobj","[mscorlib]System.ArgIterator"); + break; + default: + cerr << "Intrinsic ID = " << Inst->getIntrinsicID() << '\n'; + assert(0 && "Invalid intrinsic function"); + } +} + + +void MSILWriter::printCallInstruction(const Instruction* Inst) { + if (isa(Inst)) { + // Handle intrinsic function. + printIntrinsicCall(cast(Inst)); + } else { + // Load arguments to stack and call function. + for (int I = 1, E = Inst->getNumOperands(); I!=E; ++I) + printValueLoad(Inst->getOperand(I)); + printFunctionCall(Inst->getOperand(0),Inst); + } +} + + +void MSILWriter::printICmpInstruction(unsigned Predicate, const Value* Left, + const Value* Right) { + switch (Predicate) { + case ICmpInst::ICMP_EQ: + printBinaryInstruction("ceq",Left,Right); + break; + case ICmpInst::ICMP_NE: + // Emulate = not neg (Op1 eq Op2) + printBinaryInstruction("ceq",Left,Right); + printSimpleInstruction("neg"); + printSimpleInstruction("not"); + break; + case ICmpInst::ICMP_ULE: + case ICmpInst::ICMP_SLE: + // Emulate = (Op1 eq Op2) or (Op1 lt Op2) + printBinaryInstruction("ceq",Left,Right); + if (Predicate==ICmpInst::ICMP_ULE) + printBinaryInstruction("clt.un",Left,Right); + else + printBinaryInstruction("clt",Left,Right); + printSimpleInstruction("or"); + break; + case ICmpInst::ICMP_UGE: + case ICmpInst::ICMP_SGE: + // Emulate = (Op1 eq Op2) or (Op1 gt Op2) + printBinaryInstruction("ceq",Left,Right); + if (Predicate==ICmpInst::ICMP_UGE) + printBinaryInstruction("cgt.un",Left,Right); + else + printBinaryInstruction("cgt",Left,Right); + printSimpleInstruction("or"); + break; + case ICmpInst::ICMP_ULT: + printBinaryInstruction("clt.un",Left,Right); + break; + case ICmpInst::ICMP_SLT: + printBinaryInstruction("clt",Left,Right); + break; + case ICmpInst::ICMP_UGT: + printBinaryInstruction("cgt.un",Left,Right); + case ICmpInst::ICMP_SGT: + printBinaryInstruction("cgt",Left,Right); + break; + default: + cerr << "Predicate = " << Predicate << '\n'; + assert(0 && "Invalid icmp predicate"); + } +} + + +void MSILWriter::printFCmpInstruction(unsigned Predicate, const Value* Left, + const Value* Right) { + // FIXME: Correct comparison + std::string NanFunc = "bool [mscorlib]System.Double::IsNaN(float64)"; + switch (Predicate) { + case FCmpInst::FCMP_UGT: + // X > Y || llvm_fcmp_uno(X, Y) + printBinaryInstruction("cgt",Left,Right); + printFCmpInstruction(FCmpInst::FCMP_UNO,Left,Right); + printSimpleInstruction("or"); + break; + case FCmpInst::FCMP_OGT: + // X > Y + printBinaryInstruction("cgt",Left,Right); + break; + case FCmpInst::FCMP_UGE: + // X >= Y || llvm_fcmp_uno(X, Y) + printBinaryInstruction("ceq",Left,Right); + printBinaryInstruction("cgt",Left,Right); + printSimpleInstruction("or"); + printFCmpInstruction(FCmpInst::FCMP_UNO,Left,Right); + printSimpleInstruction("or"); + break; + case FCmpInst::FCMP_OGE: + // X >= Y + printBinaryInstruction("ceq",Left,Right); + printBinaryInstruction("cgt",Left,Right); + printSimpleInstruction("or"); + break; + case FCmpInst::FCMP_ULT: + // X < Y || llvm_fcmp_uno(X, Y) + printBinaryInstruction("clt",Left,Right); + printFCmpInstruction(FCmpInst::FCMP_UNO,Left,Right); + printSimpleInstruction("or"); + break; + case FCmpInst::FCMP_OLT: + // X < Y + printBinaryInstruction("clt",Left,Right); + break; + case FCmpInst::FCMP_ULE: + // X <= Y || llvm_fcmp_uno(X, Y) + printBinaryInstruction("ceq",Left,Right); + printBinaryInstruction("clt",Left,Right); + printSimpleInstruction("or"); + printFCmpInstruction(FCmpInst::FCMP_UNO,Left,Right); + printSimpleInstruction("or"); + break; + case FCmpInst::FCMP_OLE: + // X <= Y + printBinaryInstruction("ceq",Left,Right); + printBinaryInstruction("clt",Left,Right); + printSimpleInstruction("or"); + break; + case FCmpInst::FCMP_UEQ: + // X == Y || llvm_fcmp_uno(X, Y) + printBinaryInstruction("ceq",Left,Right); + printFCmpInstruction(FCmpInst::FCMP_UNO,Left,Right); + printSimpleInstruction("or"); + break; + case FCmpInst::FCMP_OEQ: + // X == Y + printBinaryInstruction("ceq",Left,Right); + break; + case FCmpInst::FCMP_UNE: + // X != Y + printBinaryInstruction("ceq",Left,Right); + printSimpleInstruction("neg"); + printSimpleInstruction("not"); + break; + case FCmpInst::FCMP_ONE: + // X != Y && llvm_fcmp_ord(X, Y) + printBinaryInstruction("ceq",Left,Right); + printSimpleInstruction("not"); + break; + case FCmpInst::FCMP_ORD: + // return X == X && Y == Y + printBinaryInstruction("ceq",Left,Left); + printBinaryInstruction("ceq",Right,Right); + printSimpleInstruction("or"); + break; + case FCmpInst::FCMP_UNO: + // X != X || Y != Y + printBinaryInstruction("ceq",Left,Left); + printSimpleInstruction("not"); + printBinaryInstruction("ceq",Right,Right); + printSimpleInstruction("not"); + printSimpleInstruction("or"); + break; + default: + assert(0 && "Illegal FCmp predicate"); + } +} + + +void MSILWriter::printInvokeInstruction(const InvokeInst* Inst) { + std::string Label = "leave$normal_"+utostr(getUniqID()); + Out << ".try {\n"; + // Load arguments + for (int I = 3, E = Inst->getNumOperands(); I!=E; ++I) + printValueLoad(Inst->getOperand(I)); + // Print call instruction + printFunctionCall(Inst->getOperand(0),Inst); + // Save function result and leave "try" block + printValueSave(Inst); + printSimpleInstruction("leave",Label.c_str()); + Out << "}\n"; + Out << "catch [mscorlib]System.Exception {\n"; + // Redirect to unwind block + printSimpleInstruction("pop"); + printBranchToBlock(Inst->getParent(),NULL,Inst->getUnwindDest()); + Out << "}\n" << Label << ":\n"; + // Redirect to continue block + printBranchToBlock(Inst->getParent(),NULL,Inst->getNormalDest()); +} + + +void MSILWriter::printSwitchInstruction(const SwitchInst* Inst) { + // FIXME: Emulate with IL "switch" instruction + // Emulate = if () else if () else if () else ... + for (unsigned int I = 1, E = Inst->getNumCases(); I!=E; ++I) { + printValueLoad(Inst->getCondition()); + printValueLoad(Inst->getCaseValue(I)); + printSimpleInstruction("ceq"); + // Condition jump to successor block + printBranchToBlock(Inst->getParent(),Inst->getSuccessor(I),NULL); + } + // Jump to default block + printBranchToBlock(Inst->getParent(),NULL,Inst->getDefaultDest()); +} + + +void MSILWriter::printVAArgInstruction(const VAArgInst* Inst) { + printIndirectLoad(Inst->getOperand(0)); + printSimpleInstruction("call", + "instance typedref [mscorlib]System.ArgIterator::GetNextArg()"); + printSimpleInstruction("refanyval","void*"); + std::string Name = + "ldind."+getTypePostfix(PointerType::getUnqual(IntegerType::get(8)),false); + printSimpleInstruction(Name.c_str()); +} + + +void MSILWriter::printAllocaInstruction(const AllocaInst* Inst) { + uint64_t Size = TD->getTypeAllocSize(Inst->getAllocatedType()); + // Constant optimization. + if (const ConstantInt* CInt = dyn_cast(Inst->getOperand(0))) { + printPtrLoad(CInt->getZExtValue()*Size); + } else { + printPtrLoad(Size); + printValueLoad(Inst->getOperand(0)); + printSimpleInstruction("mul"); + } + printSimpleInstruction("localloc"); +} + + +void MSILWriter::printInstruction(const Instruction* Inst) { + const Value *Left = 0, *Right = 0; + if (Inst->getNumOperands()>=1) Left = Inst->getOperand(0); + if (Inst->getNumOperands()>=2) Right = Inst->getOperand(1); + // Print instruction + // FIXME: "ShuffleVector","ExtractElement","InsertElement" support. + switch (Inst->getOpcode()) { + // Terminator + case Instruction::Ret: + if (Inst->getNumOperands()) { + printValueLoad(Left); + printSimpleInstruction("ret"); + } else + printSimpleInstruction("ret"); + break; + case Instruction::Br: + printBranchInstruction(cast(Inst)); + break; + // Binary + case Instruction::Add: + printBinaryInstruction("add",Left,Right); + break; + case Instruction::Sub: + printBinaryInstruction("sub",Left,Right); + break; + case Instruction::Mul: + printBinaryInstruction("mul",Left,Right); + break; + case Instruction::UDiv: + printBinaryInstruction("div.un",Left,Right); + break; + case Instruction::SDiv: + case Instruction::FDiv: + printBinaryInstruction("div",Left,Right); + break; + case Instruction::URem: + printBinaryInstruction("rem.un",Left,Right); + break; + case Instruction::SRem: + case Instruction::FRem: + printBinaryInstruction("rem",Left,Right); + break; + // Binary Condition + case Instruction::ICmp: + printICmpInstruction(cast(Inst)->getPredicate(),Left,Right); + break; + case Instruction::FCmp: + printFCmpInstruction(cast(Inst)->getPredicate(),Left,Right); + break; + // Bitwise Binary + case Instruction::And: + printBinaryInstruction("and",Left,Right); + break; + case Instruction::Or: + printBinaryInstruction("or",Left,Right); + break; + case Instruction::Xor: + printBinaryInstruction("xor",Left,Right); + break; + case Instruction::Shl: + printValueLoad(Left); + printValueLoad(Right); + printSimpleInstruction("conv.i4"); + printSimpleInstruction("shl"); + break; + case Instruction::LShr: + printValueLoad(Left); + printValueLoad(Right); + printSimpleInstruction("conv.i4"); + printSimpleInstruction("shr.un"); + break; + case Instruction::AShr: + printValueLoad(Left); + printValueLoad(Right); + printSimpleInstruction("conv.i4"); + printSimpleInstruction("shr"); + break; + case Instruction::Select: + printSelectInstruction(Inst->getOperand(0),Inst->getOperand(1),Inst->getOperand(2)); + break; + case Instruction::Load: + printIndirectLoad(Inst->getOperand(0)); + break; + case Instruction::Store: + printIndirectSave(Inst->getOperand(1), Inst->getOperand(0)); + break; + case Instruction::Trunc: + case Instruction::ZExt: + case Instruction::SExt: + case Instruction::FPTrunc: + case Instruction::FPExt: + case Instruction::UIToFP: + case Instruction::SIToFP: + case Instruction::FPToUI: + case Instruction::FPToSI: + case Instruction::PtrToInt: + case Instruction::IntToPtr: + case Instruction::BitCast: + printCastInstruction(Inst->getOpcode(),Left, + cast(Inst)->getDestTy()); + break; + case Instruction::GetElementPtr: + printGepInstruction(Inst->getOperand(0),gep_type_begin(Inst), + gep_type_end(Inst)); + break; + case Instruction::Call: + printCallInstruction(cast(Inst)); + break; + case Instruction::Invoke: + printInvokeInstruction(cast(Inst)); + break; + case Instruction::Unwind: + printSimpleInstruction("newobj", + "instance void [mscorlib]System.Exception::.ctor()"); + printSimpleInstruction("throw"); + break; + case Instruction::Switch: + printSwitchInstruction(cast(Inst)); + break; + case Instruction::Alloca: + printAllocaInstruction(cast(Inst)); + break; + case Instruction::Malloc: + assert(0 && "LowerAllocationsPass used"); + break; + case Instruction::Free: + assert(0 && "LowerAllocationsPass used"); + break; + case Instruction::Unreachable: + printSimpleInstruction("ldstr", "\"Unreachable instruction\""); + printSimpleInstruction("newobj", + "instance void [mscorlib]System.Exception::.ctor(string)"); + printSimpleInstruction("throw"); + break; + case Instruction::VAArg: + printVAArgInstruction(cast(Inst)); + break; + default: + cerr << "Instruction = " << Inst->getName() << '\n'; + assert(0 && "Unsupported instruction"); + } +} + + +void MSILWriter::printLoop(const Loop* L) { + Out << getLabelName(L->getHeader()->getName()) << ":\n"; + const std::vector& blocks = L->getBlocks(); + for (unsigned I = 0, E = blocks.size(); I!=E; I++) { + BasicBlock* BB = blocks[I]; + Loop* BBLoop = LInfo->getLoopFor(BB); + if (BBLoop == L) + printBasicBlock(BB); + else if (BB==BBLoop->getHeader() && BBLoop->getParentLoop()==L) + printLoop(BBLoop); + } + printSimpleInstruction("br",getLabelName(L->getHeader()->getName()).c_str()); +} + + +void MSILWriter::printBasicBlock(const BasicBlock* BB) { + Out << getLabelName(BB) << ":\n"; + for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E; ++I) { + const Instruction* Inst = I; + // Comment llvm original instruction + // Out << "\n//" << *Inst << "\n"; + // Do not handle PHI instruction in current block + if (Inst->getOpcode()==Instruction::PHI) continue; + // Print instruction + printInstruction(Inst); + // Save result + if (Inst->getType()!=Type::VoidTy) { + // Do not save value after invoke, it done in "try" block + if (Inst->getOpcode()==Instruction::Invoke) continue; + printValueSave(Inst); + } + } +} + + +void MSILWriter::printLocalVariables(const Function& F) { + std::string Name; + const Type* Ty = NULL; + std::set Printed; + const Value* VaList = NULL; + unsigned StackDepth = 8; + // Find local variables + for (const_inst_iterator I = inst_begin(&F), E = inst_end(&F); I!=E; ++I) { + if (I->getOpcode()==Instruction::Call || + I->getOpcode()==Instruction::Invoke) { + // Test stack depth. + if (StackDepthgetNumOperands()) + StackDepth = I->getNumOperands(); + } + const AllocaInst* AI = dyn_cast(&*I); + if (AI && !isa(AI)) { + // Local variable allocation. + Ty = PointerType::getUnqual(AI->getAllocatedType()); + Name = getValueName(AI); + Out << "\t.locals (" << getTypeName(Ty) << Name << ")\n"; + } else if (I->getType()!=Type::VoidTy) { + // Operation result. + Ty = I->getType(); + Name = getValueName(&*I); + Out << "\t.locals (" << getTypeName(Ty) << Name << ")\n"; + } + // Test on 'va_list' variable + bool isVaList = false; + if (const VAArgInst* VaInst = dyn_cast(&*I)) { + // "va_list" as "va_arg" instruction operand. + isVaList = true; + VaList = VaInst->getOperand(0); + } else if (const IntrinsicInst* Inst = dyn_cast(&*I)) { + // "va_list" as intrinsic function operand. + switch (Inst->getIntrinsicID()) { + case Intrinsic::vastart: + case Intrinsic::vaend: + case Intrinsic::vacopy: + isVaList = true; + VaList = Inst->getOperand(1); + break; + default: + isVaList = false; + } + } + // Print "va_list" variable. + if (isVaList && Printed.insert(VaList).second) { + Name = getValueName(VaList); + Name.insert(Name.length()-1,"$valist"); + Out << "\t.locals (valuetype [mscorlib]System.ArgIterator " + << Name << ")\n"; + } + } + printSimpleInstruction(".maxstack",utostr(StackDepth*2).c_str()); +} + + +void MSILWriter::printFunctionBody(const Function& F) { + // Print body + for (Function::const_iterator I = F.begin(), E = F.end(); I!=E; ++I) { + if (Loop *L = LInfo->getLoopFor(I)) { + if (L->getHeader()==I && L->getParentLoop()==0) + printLoop(L); + } else { + printBasicBlock(I); + } + } +} + + +void MSILWriter::printConstantExpr(const ConstantExpr* CE) { + const Value *left = 0, *right = 0; + if (CE->getNumOperands()>=1) left = CE->getOperand(0); + if (CE->getNumOperands()>=2) right = CE->getOperand(1); + // Print instruction + switch (CE->getOpcode()) { + case Instruction::Trunc: + case Instruction::ZExt: + case Instruction::SExt: + case Instruction::FPTrunc: + case Instruction::FPExt: + case Instruction::UIToFP: + case Instruction::SIToFP: + case Instruction::FPToUI: + case Instruction::FPToSI: + case Instruction::PtrToInt: + case Instruction::IntToPtr: + case Instruction::BitCast: + printCastInstruction(CE->getOpcode(),left,CE->getType()); + break; + case Instruction::GetElementPtr: + printGepInstruction(CE->getOperand(0),gep_type_begin(CE),gep_type_end(CE)); + break; + case Instruction::ICmp: + printICmpInstruction(CE->getPredicate(),left,right); + break; + case Instruction::FCmp: + printFCmpInstruction(CE->getPredicate(),left,right); + break; + case Instruction::Select: + printSelectInstruction(CE->getOperand(0),CE->getOperand(1),CE->getOperand(2)); + break; + case Instruction::Add: + printBinaryInstruction("add",left,right); + break; + case Instruction::Sub: + printBinaryInstruction("sub",left,right); + break; + case Instruction::Mul: + printBinaryInstruction("mul",left,right); + break; + case Instruction::UDiv: + printBinaryInstruction("div.un",left,right); + break; + case Instruction::SDiv: + case Instruction::FDiv: + printBinaryInstruction("div",left,right); + break; + case Instruction::URem: + printBinaryInstruction("rem.un",left,right); + break; + case Instruction::SRem: + case Instruction::FRem: + printBinaryInstruction("rem",left,right); + break; + case Instruction::And: + printBinaryInstruction("and",left,right); + break; + case Instruction::Or: + printBinaryInstruction("or",left,right); + break; + case Instruction::Xor: + printBinaryInstruction("xor",left,right); + break; + case Instruction::Shl: + printBinaryInstruction("shl",left,right); + break; + case Instruction::LShr: + printBinaryInstruction("shr.un",left,right); + break; + case Instruction::AShr: + printBinaryInstruction("shr",left,right); + break; + default: + cerr << "Expression = " << *CE << "\n"; + assert(0 && "Invalid constant expression"); + } +} + + +void MSILWriter::printStaticInitializerList() { + // List of global variables with uninitialized fields. + for (std::map >::iterator + VarI = StaticInitList.begin(), VarE = StaticInitList.end(); VarI!=VarE; + ++VarI) { + const std::vector& InitList = VarI->second; + if (InitList.empty()) continue; + // For each uninitialized field. + for (std::vector::const_iterator I = InitList.begin(), + E = InitList.end(); I!=E; ++I) { + if (const ConstantExpr *CE = dyn_cast(I->constant)) { + // Out << "\n// Init " << getValueName(VarI->first) << ", offset " << + // utostr(I->offset) << ", type "<< *I->constant->getType() << "\n\n"; + // Load variable address + printValueLoad(VarI->first); + // Add offset + if (I->offset!=0) { + printPtrLoad(I->offset); + printSimpleInstruction("add"); + } + // Load value + printConstantExpr(CE); + // Save result at offset + std::string postfix = getTypePostfix(CE->getType(),true); + if (*postfix.begin()=='u') *postfix.begin() = 'i'; + postfix = "stind."+postfix; + printSimpleInstruction(postfix.c_str()); + } else { + cerr << "Constant = " << *I->constant << '\n'; + assert(0 && "Invalid static initializer"); + } + } + } +} + + +void MSILWriter::printFunction(const Function& F) { + bool isSigned = F.paramHasAttr(0, Attribute::SExt); + Out << "\n.method static "; + Out << (F.hasLocalLinkage() ? "private " : "public "); + if (F.isVarArg()) Out << "vararg "; + Out << getTypeName(F.getReturnType(),isSigned) << + getConvModopt(F.getCallingConv()) << getValueName(&F) << '\n'; + // Arguments + Out << "\t("; + unsigned ArgIdx = 1; + for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); I!=E; + ++I, ++ArgIdx) { + isSigned = F.paramHasAttr(ArgIdx, Attribute::SExt); + if (I!=F.arg_begin()) Out << ", "; + Out << getTypeName(I->getType(),isSigned) << getValueName(I); + } + Out << ") cil managed\n"; + // Body + Out << "{\n"; + printLocalVariables(F); + printFunctionBody(F); + Out << "}\n"; +} + + +void MSILWriter::printDeclarations(const TypeSymbolTable& ST) { + std::string Name; + std::set Printed; + for (std::set::const_iterator + UI = UsedTypes->begin(), UE = UsedTypes->end(); UI!=UE; ++UI) { + const Type* Ty = *UI; + if (isa(Ty) || isa(Ty) || isa(Ty)) + Name = getTypeName(Ty, false, true); + // Type with no need to declare. + else continue; + // Print not duplicated type + if (Printed.insert(Ty).second) { + Out << ".class value explicit ansi sealed '" << Name << "'"; + Out << " { .pack " << 1 << " .size " << TD->getTypeAllocSize(Ty); + Out << " }\n\n"; + } + } +} + + +unsigned int MSILWriter::getBitWidth(const Type* Ty) { + unsigned int N = Ty->getPrimitiveSizeInBits(); + assert(N!=0 && "Invalid type in getBitWidth()"); + switch (N) { + case 1: + case 8: + case 16: + case 32: + case 64: + return N; + default: + cerr << "Bits = " << N << '\n'; + assert(0 && "Unsupported integer width"); + } + return 0; // Not reached +} + + +void MSILWriter::printStaticConstant(const Constant* C, uint64_t& Offset) { + uint64_t TySize = 0; + const Type* Ty = C->getType(); + // Print zero initialized constant. + if (isa(C) || C->isNullValue()) { + TySize = TD->getTypeAllocSize(C->getType()); + Offset += TySize; + Out << "int8 (0) [" << TySize << "]"; + return; + } + // Print constant initializer + switch (Ty->getTypeID()) { + case Type::IntegerTyID: { + TySize = TD->getTypeAllocSize(Ty); + const ConstantInt* Int = cast(C); + Out << getPrimitiveTypeName(Ty,true) << "(" << Int->getSExtValue() << ")"; + break; + } + case Type::FloatTyID: + case Type::DoubleTyID: { + TySize = TD->getTypeAllocSize(Ty); + const ConstantFP* FP = cast(C); + if (Ty->getTypeID() == Type::FloatTyID) + Out << "int32 (" << + (uint32_t)FP->getValueAPF().bitcastToAPInt().getZExtValue() << ')'; + else + Out << "int64 (" << + FP->getValueAPF().bitcastToAPInt().getZExtValue() << ')'; + break; + } + case Type::ArrayTyID: + case Type::VectorTyID: + case Type::StructTyID: + for (unsigned I = 0, E = C->getNumOperands(); IgetOperand(I),Offset); + } + break; + case Type::PointerTyID: + TySize = TD->getTypeAllocSize(C->getType()); + // Initialize with global variable address + if (const GlobalVariable *G = dyn_cast(C)) { + std::string name = getValueName(G); + Out << "&(" << name.insert(name.length()-1,"$data") << ")"; + } else { + // Dynamic initialization + if (!isa(C) && !C->isNullValue()) + InitListPtr->push_back(StaticInitializer(C,Offset)); + // Null pointer initialization + if (TySize==4) Out << "int32 (0)"; + else if (TySize==8) Out << "int64 (0)"; + else assert(0 && "Invalid pointer size"); + } + break; + default: + cerr << "TypeID = " << Ty->getTypeID() << '\n'; + assert(0 && "Invalid type in printStaticConstant()"); + } + // Increase offset. + Offset += TySize; +} + + +void MSILWriter::printStaticInitializer(const Constant* C, + const std::string& Name) { + switch (C->getType()->getTypeID()) { + case Type::IntegerTyID: + case Type::FloatTyID: + case Type::DoubleTyID: + Out << getPrimitiveTypeName(C->getType(), false); + break; + case Type::ArrayTyID: + case Type::VectorTyID: + case Type::StructTyID: + case Type::PointerTyID: + Out << getTypeName(C->getType()); + break; + default: + cerr << "Type = " << *C << "\n"; + assert(0 && "Invalid constant type"); + } + // Print initializer + std::string label = Name; + label.insert(label.length()-1,"$data"); + Out << Name << " at " << label << '\n'; + Out << ".data " << label << " = {\n"; + uint64_t offset = 0; + printStaticConstant(C,offset); + Out << "\n}\n\n"; +} + + +void MSILWriter::printVariableDefinition(const GlobalVariable* G) { + const Constant* C = G->getInitializer(); + if (C->isNullValue() || isa(C) || isa(C)) + InitListPtr = 0; + else + InitListPtr = &StaticInitList[G]; + printStaticInitializer(C,getValueName(G)); +} + + +void MSILWriter::printGlobalVariables() { + if (ModulePtr->global_empty()) return; + Module::global_iterator I,E; + for (I = ModulePtr->global_begin(), E = ModulePtr->global_end(); I!=E; ++I) { + // Variable definition + Out << ".field static " << (I->isDeclaration() ? "public " : + "private "); + if (I->isDeclaration()) { + Out << getTypeName(I->getType()) << getValueName(&*I) << "\n\n"; + } else + printVariableDefinition(&*I); + } +} + + +const char* MSILWriter::getLibraryName(const Function* F) { + return getLibraryForSymbol(F->getName().c_str(), true, F->getCallingConv()); +} + + +const char* MSILWriter::getLibraryName(const GlobalVariable* GV) { + return getLibraryForSymbol(Mang->getValueName(GV).c_str(), false, 0); +} + + +const char* MSILWriter::getLibraryForSymbol(const char* Name, bool isFunction, + unsigned CallingConv) { + // TODO: Read *.def file with function and libraries definitions. + return "MSVCRT.DLL"; +} + + +void MSILWriter::printExternals() { + Module::const_iterator I,E; + // Functions. + for (I=ModulePtr->begin(),E=ModulePtr->end(); I!=E; ++I) { + // Skip intrisics + if (I->isIntrinsic()) continue; + if (I->isDeclaration()) { + const Function* F = I; + std::string Name = getConvModopt(F->getCallingConv())+getValueName(F); + std::string Sig = + getCallSignature(cast(F->getFunctionType()), NULL, Name); + Out << ".method static hidebysig pinvokeimpl(\"" + << getLibraryName(F) << "\")\n\t" << Sig << " preservesig {}\n\n"; + } + } + // External variables and static initialization. + Out << + ".method public hidebysig static pinvokeimpl(\"KERNEL32.DLL\" ansi winapi)" + " native int LoadLibrary(string) preservesig {}\n" + ".method public hidebysig static pinvokeimpl(\"KERNEL32.DLL\" ansi winapi)" + " native int GetProcAddress(native int, string) preservesig {}\n"; + Out << + ".method private static void* $MSIL_Import(string lib,string sym)\n" + " managed cil\n{\n" + "\tldarg\tlib\n" + "\tcall\tnative int LoadLibrary(string)\n" + "\tldarg\tsym\n" + "\tcall\tnative int GetProcAddress(native int,string)\n" + "\tdup\n" + "\tbrtrue\tL_01\n" + "\tldstr\t\"Can no import variable\"\n" + "\tnewobj\tinstance void [mscorlib]System.Exception::.ctor(string)\n" + "\tthrow\n" + "L_01:\n" + "\tret\n" + "}\n\n" + ".method static private void $MSIL_Init() managed cil\n{\n"; + printStaticInitializerList(); + // Foreach global variable. + for (Module::global_iterator I = ModulePtr->global_begin(), + E = ModulePtr->global_end(); I!=E; ++I) { + if (!I->isDeclaration() || !I->hasDLLImportLinkage()) continue; + // Use "LoadLibrary"/"GetProcAddress" to recive variable address. + std::string Label = "not_null$_"+utostr(getUniqID()); + std::string Tmp = getTypeName(I->getType())+getValueName(&*I); + printSimpleInstruction("ldsflda",Tmp.c_str()); + Out << "\tldstr\t\"" << getLibraryName(&*I) << "\"\n"; + Out << "\tldstr\t\"" << Mang->getValueName(&*I) << "\"\n"; + printSimpleInstruction("call","void* $MSIL_Import(string,string)"); + printIndirectSave(I->getType()); + } + printSimpleInstruction("ret"); + Out << "}\n\n"; +} + + +//===----------------------------------------------------------------------===// +// External Interface declaration +//===----------------------------------------------------------------------===// + +bool MSILTarget::addPassesToEmitWholeFile(PassManager &PM, raw_ostream &o, + CodeGenFileType FileType, + CodeGenOpt::Level OptLevel) +{ + if (FileType != TargetMachine::AssemblyFile) return true; + MSILWriter* Writer = new MSILWriter(o); + PM.add(createGCLoweringPass()); + PM.add(createLowerAllocationsPass(true)); + // FIXME: Handle switch trougth native IL instruction "switch" + PM.add(createLowerSwitchPass()); + PM.add(createCFGSimplificationPass()); + PM.add(new MSILModule(Writer->UsedTypes,Writer->TD)); + PM.add(Writer); + PM.add(createGCInfoDeleter()); + return false; +} diff --git a/lib/Target/MSIL/MSILWriter.h b/lib/Target/MSIL/MSILWriter.h new file mode 100644 index 000000000000..45f5579bfb34 --- /dev/null +++ b/lib/Target/MSIL/MSILWriter.h @@ -0,0 +1,255 @@ +//===-- MSILWriter.h - TargetMachine for the MSIL ---------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the MSILWriter that is used by the MSIL. +// +//===----------------------------------------------------------------------===// +#ifndef MSILWRITER_H +#define MSILWRITER_H + +#include "llvm/Constants.h" +#include "llvm/Module.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Pass.h" +#include "llvm/PassManager.h" +#include "llvm/Analysis/FindUsedTypes.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Support/GetElementPtrTypeIterator.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetMachineRegistry.h" +#include "llvm/Support/Mangler.h" +#include +using namespace llvm; + +namespace { + + class MSILModule : public ModulePass { + Module *ModulePtr; + const std::set*& UsedTypes; + const TargetData*& TD; + + public: + static char ID; + MSILModule(const std::set*& _UsedTypes, + const TargetData*& _TD) + : ModulePass(&ID), UsedTypes(_UsedTypes), TD(_TD) {} + + void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + AU.addRequired(); + } + + virtual const char *getPassName() const { + return "MSIL backend definitions"; + } + + virtual bool runOnModule(Module &M); + + }; + + class MSILWriter : public FunctionPass { + struct StaticInitializer { + const Constant* constant; + uint64_t offset; + + StaticInitializer() + : constant(0), offset(0) {} + + StaticInitializer(const Constant* _constant, uint64_t _offset) + : constant(_constant), offset(_offset) {} + }; + + uint64_t UniqID; + + uint64_t getUniqID() { + return ++UniqID; + } + + public: + raw_ostream &Out; + Module* ModulePtr; + const TargetData* TD; + Mangler* Mang; + LoopInfo *LInfo; + std::vector* InitListPtr; + std::map > + StaticInitList; + const std::set* UsedTypes; + static char ID; + MSILWriter(raw_ostream &o) : FunctionPass(&ID), Out(o) { + UniqID = 0; + } + + enum ValueType { + UndefVT, + GlobalVT, + InternalVT, + ArgumentVT, + LocalVT, + ConstVT, + ConstExprVT + }; + + bool isVariable(ValueType V) { + return V==GlobalVT || V==InternalVT || V==ArgumentVT || V==LocalVT; + } + + bool isConstValue(ValueType V) { + return V==ConstVT || V==ConstExprVT; + } + + virtual const char *getPassName() const { return "MSIL backend"; } + + void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + AU.setPreservesAll(); + } + + bool runOnFunction(Function &F); + + virtual bool doInitialization(Module &M); + + virtual bool doFinalization(Module &M); + + void printModuleStartup(); + + bool isZeroValue(const Value* V); + + std::string getValueName(const Value* V); + + std::string getLabelName(const Value* V); + + std::string getLabelName(const std::string& Name); + + std::string getConvModopt(unsigned CallingConvID); + + std::string getArrayTypeName(Type::TypeID TyID, const Type* Ty); + + std::string getPrimitiveTypeName(const Type* Ty, bool isSigned); + + std::string getFunctionTypeName(const Type* Ty); + + std::string getPointerTypeName(const Type* Ty); + + std::string getTypeName(const Type* Ty, bool isSigned = false, + bool isNested = false); + + ValueType getValueLocation(const Value* V); + + std::string getTypePostfix(const Type* Ty, bool Expand, + bool isSigned = false); + + void printConvToPtr(); + + void printPtrLoad(uint64_t N); + + void printValuePtrLoad(const Value* V); + + void printConstLoad(const Constant* C); + + void printValueLoad(const Value* V); + + void printValueSave(const Value* V); + + void printBinaryInstruction(const char* Name, const Value* Left, + const Value* Right); + + void printSimpleInstruction(const char* Inst, const char* Operand = NULL); + + void printPHICopy(const BasicBlock* Src, const BasicBlock* Dst); + + void printBranchToBlock(const BasicBlock* CurrBB, + const BasicBlock* TrueBB, + const BasicBlock* FalseBB); + + void printBranchInstruction(const BranchInst* Inst); + + void printSelectInstruction(const Value* Cond, const Value* VTrue, + const Value* VFalse); + + void printIndirectLoad(const Value* V); + + void printIndirectSave(const Value* Ptr, const Value* Val); + + void printIndirectSave(const Type* Ty); + + void printCastInstruction(unsigned int Op, const Value* V, + const Type* Ty); + + void printGepInstruction(const Value* V, gep_type_iterator I, + gep_type_iterator E); + + std::string getCallSignature(const FunctionType* Ty, + const Instruction* Inst, + std::string Name); + + void printFunctionCall(const Value* FnVal, const Instruction* Inst); + + void printIntrinsicCall(const IntrinsicInst* Inst); + + void printCallInstruction(const Instruction* Inst); + + void printICmpInstruction(unsigned Predicate, const Value* Left, + const Value* Right); + + void printFCmpInstruction(unsigned Predicate, const Value* Left, + const Value* Right); + + void printInvokeInstruction(const InvokeInst* Inst); + + void printSwitchInstruction(const SwitchInst* Inst); + + void printVAArgInstruction(const VAArgInst* Inst); + + void printAllocaInstruction(const AllocaInst* Inst); + + void printInstruction(const Instruction* Inst); + + void printLoop(const Loop* L); + + void printBasicBlock(const BasicBlock* BB); + + void printLocalVariables(const Function& F); + + void printFunctionBody(const Function& F); + + void printConstantExpr(const ConstantExpr* CE); + + void printStaticInitializerList(); + + void printFunction(const Function& F); + + void printDeclarations(const TypeSymbolTable& ST); + + unsigned int getBitWidth(const Type* Ty); + + void printStaticConstant(const Constant* C, uint64_t& Offset); + + void printStaticInitializer(const Constant* C, const std::string& Name); + + void printVariableDefinition(const GlobalVariable* G); + + void printGlobalVariables(); + + const char* getLibraryName(const Function* F); + + const char* getLibraryName(const GlobalVariable* GV); + + const char* getLibraryForSymbol(const char* Name, bool isFunction, + unsigned CallingConv); + + void printExternals(); + }; +} + +#endif + diff --git a/lib/Target/MSIL/Makefile b/lib/Target/MSIL/Makefile new file mode 100644 index 000000000000..94265edf98c5 --- /dev/null +++ b/lib/Target/MSIL/Makefile @@ -0,0 +1,14 @@ +##===- lib/Target/MSIL/Makefile ----------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../.. +LIBRARYNAME = LLVMMSIL +include $(LEVEL)/Makefile.common + +CompileCommonOpts := $(CompileCommonOpts) -Wno-format diff --git a/lib/Target/MSIL/README.TXT b/lib/Target/MSIL/README.TXT new file mode 100644 index 000000000000..d797c71fd39f --- /dev/null +++ b/lib/Target/MSIL/README.TXT @@ -0,0 +1,26 @@ +//===---------------------------------------------------------------------===// + +Vector instructions support. + +ShuffleVector +ExtractElement +InsertElement + +//===---------------------------------------------------------------------===// + +Add "OpaqueType" type. + +//===---------------------------------------------------------------------===// + +"switch" instruction emulation with CLI "switch" instruction. + +//===---------------------------------------------------------------------===// + +Write linker for external function, because function export need to know +dynamic library where function located. + +.method static hidebysig pinvokeimpl("msvcrt.dll" cdecl) + void free(void*) preservesig {} + + + diff --git a/lib/Target/MSP430/CMakeLists.txt b/lib/Target/MSP430/CMakeLists.txt new file mode 100644 index 000000000000..67017733cd9c --- /dev/null +++ b/lib/Target/MSP430/CMakeLists.txt @@ -0,0 +1,23 @@ +set(LLVM_TARGET_DEFINITIONS MSP430.td) + +tablegen(MSP430GenRegisterInfo.h.inc -gen-register-desc-header) +tablegen(MSP430GenRegisterNames.inc -gen-register-enums) +tablegen(MSP430GenRegisterInfo.inc -gen-register-desc) +tablegen(MSP430GenInstrNames.inc -gen-instr-enums) +tablegen(MSP430GenInstrInfo.inc -gen-instr-desc) +tablegen(MSP430GenAsmWriter.inc -gen-asm-writer) +tablegen(MSP430GenDAGISel.inc -gen-dag-isel) +tablegen(MSP430GenCallingConv.inc -gen-callingconv) +tablegen(MSP430GenSubtarget.inc -gen-subtarget) + +add_llvm_target(MSP430 + MSP430AsmPrinter.cpp + MSP430FrameInfo.cpp + MSP430InstrInfo.cpp + MSP430ISelDAGToDAG.cpp + MSP430ISelLowering.cpp + MSP430RegisterInfo.cpp + MSP430Subtarget.cpp + MSP430TargetAsmInfo.cpp + MSP430TargetMachine.cpp + ) diff --git a/lib/Target/MSP430/MSP430.h b/lib/Target/MSP430/MSP430.h new file mode 100644 index 000000000000..ed0cd0496aaa --- /dev/null +++ b/lib/Target/MSP430/MSP430.h @@ -0,0 +1,40 @@ +//==-- MSP430.h - Top-level interface for MSP430 representation --*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the entry points for global functions defined in +// the LLVM MSP430 backend. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_MSP430_H +#define LLVM_TARGET_MSP430_H + +#include "llvm/Target/TargetMachine.h" + +namespace llvm { + class MSP430TargetMachine; + class FunctionPass; + class raw_ostream; + + FunctionPass *createMSP430ISelDag(MSP430TargetMachine &TM, + CodeGenOpt::Level OptLevel); + FunctionPass *createMSP430CodePrinterPass(raw_ostream &o, + MSP430TargetMachine &tm, + CodeGenOpt::Level OptLevel, + bool verbose); +} // end namespace llvm; + +// Defines symbolic names for MSP430 registers. +// This defines a mapping from register name to register number. +#include "MSP430GenRegisterNames.inc" + +// Defines symbolic names for the MSP430 instructions. +#include "MSP430GenInstrNames.inc" + +#endif diff --git a/lib/Target/MSP430/MSP430.td b/lib/Target/MSP430/MSP430.td new file mode 100644 index 000000000000..89313ab59c1b --- /dev/null +++ b/lib/Target/MSP430/MSP430.td @@ -0,0 +1,60 @@ +//===- MSP430.td - Describe the MSP430 Target Machine ---------*- tblgen -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This is the top level entry point for the MSP430 target. +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Target-independent interfaces +//===----------------------------------------------------------------------===// + +include "llvm/Target/Target.td" + +//===----------------------------------------------------------------------===// +// Subtarget Features. +//===----------------------------------------------------------------------===// +def FeatureX + : SubtargetFeature<"ext", "ExtendedInsts", "true", + "Enable MSP430-X extensions">; + +//===----------------------------------------------------------------------===// +// MSP430 supported processors. +//===----------------------------------------------------------------------===// +class Proc Features> + : Processor; + +def : Proc<"generic", []>; + +//===----------------------------------------------------------------------===// +// Register File Description +//===----------------------------------------------------------------------===// + +include "MSP430RegisterInfo.td" + +//===----------------------------------------------------------------------===// +// Calling Convention Description +//===----------------------------------------------------------------------===// + +include "MSP430CallingConv.td" + +//===----------------------------------------------------------------------===// +// Instruction Descriptions +//===----------------------------------------------------------------------===// + +include "MSP430InstrInfo.td" + +def MSP430InstrInfo : InstrInfo {} + +//===----------------------------------------------------------------------===// +// Target Declaration +//===----------------------------------------------------------------------===// + +def MSP430 : Target { + let InstructionSet = MSP430InstrInfo; +} + diff --git a/lib/Target/MSP430/MSP430AsmPrinter.cpp b/lib/Target/MSP430/MSP430AsmPrinter.cpp new file mode 100644 index 000000000000..71b785bb4fe5 --- /dev/null +++ b/lib/Target/MSP430/MSP430AsmPrinter.cpp @@ -0,0 +1,267 @@ +//===-- MSP430AsmPrinter.cpp - MSP430 LLVM assembly writer ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a printer that converts from our internal representation +// of machine-dependent LLVM code to the MSP430 assembly language. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "asm-printer" +#include "MSP430.h" +#include "MSP430InstrInfo.h" +#include "MSP430TargetMachine.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Module.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/DwarfWriter.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Mangler.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +STATISTIC(EmittedInsts, "Number of machine instrs printed"); + +namespace { + class VISIBILITY_HIDDEN MSP430AsmPrinter : public AsmPrinter { + public: + MSP430AsmPrinter(raw_ostream &O, MSP430TargetMachine &TM, + const TargetAsmInfo *TAI, + CodeGenOpt::Level OL, bool V) + : AsmPrinter(O, TM, TAI, OL, V) {} + + virtual const char *getPassName() const { + return "MSP430 Assembly Printer"; + } + + void printOperand(const MachineInstr *MI, int OpNum, + const char* Modifier = 0); + void printSrcMemOperand(const MachineInstr *MI, int OpNum, + const char* Modifier = 0); + void printCCOperand(const MachineInstr *MI, int OpNum); + bool printInstruction(const MachineInstr *MI); // autogenerated. + void printMachineInstruction(const MachineInstr * MI); + + void emitFunctionHeader(const MachineFunction &MF); + bool runOnMachineFunction(MachineFunction &F); + bool doInitialization(Module &M); + bool doFinalization(Module &M); + + void getAnalysisUsage(AnalysisUsage &AU) const { + AsmPrinter::getAnalysisUsage(AU); + AU.setPreservesAll(); + } + }; +} // end of anonymous namespace + +#include "MSP430GenAsmWriter.inc" + +/// createMSP430CodePrinterPass - Returns a pass that prints the MSP430 +/// assembly code for a MachineFunction to the given output stream, +/// using the given target machine description. This should work +/// regardless of whether the function is in SSA form. +/// +FunctionPass *llvm::createMSP430CodePrinterPass(raw_ostream &o, + MSP430TargetMachine &tm, + CodeGenOpt::Level OptLevel, + bool verbose) { + return new MSP430AsmPrinter(o, tm, tm.getTargetAsmInfo(), OptLevel, verbose); +} + +bool MSP430AsmPrinter::doInitialization(Module &M) { + Mang = new Mangler(M, "", TAI->getPrivateGlobalPrefix()); + return false; // success +} + + +bool MSP430AsmPrinter::doFinalization(Module &M) { + return AsmPrinter::doFinalization(M); +} + +void MSP430AsmPrinter::emitFunctionHeader(const MachineFunction &MF) { + const Function *F = MF.getFunction(); + + SwitchToSection(TAI->SectionForGlobal(F)); + + unsigned FnAlign = 4; + if (F->hasFnAttr(Attribute::OptimizeForSize)) + FnAlign = 1; + + EmitAlignment(FnAlign, F); + + switch (F->getLinkage()) { + default: assert(0 && "Unknown linkage type!"); + case Function::InternalLinkage: // Symbols default to internal. + case Function::PrivateLinkage: + break; + case Function::ExternalLinkage: + O << "\t.globl\t" << CurrentFnName << '\n'; + break; + case Function::LinkOnceAnyLinkage: + case Function::LinkOnceODRLinkage: + case Function::WeakAnyLinkage: + case Function::WeakODRLinkage: + O << "\t.weak\t" << CurrentFnName << '\n'; + break; + } + + printVisibility(CurrentFnName, F->getVisibility()); + + O << "\t.type\t" << CurrentFnName << ",@function\n" + << CurrentFnName << ":\n"; +} + +bool MSP430AsmPrinter::runOnMachineFunction(MachineFunction &MF) { + SetupMachineFunction(MF); + O << "\n\n"; + + // Print the 'header' of function + emitFunctionHeader(MF); + + // Print out code for the function. + for (MachineFunction::const_iterator I = MF.begin(), E = MF.end(); + I != E; ++I) { + // Print a label for the basic block. + if (!VerboseAsm && (I->pred_empty() || I->isOnlyReachableByFallthrough())) { + // This is an entry block or a block that's only reachable via a + // fallthrough edge. In non-VerboseAsm mode, don't print the label. + } else { + printBasicBlockLabel(I, true, true, VerboseAsm); + O << '\n'; + } + + for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end(); + II != E; ++II) + // Print the assembly for the instruction. + printMachineInstruction(II); + } + + if (TAI->hasDotTypeDotSizeDirective()) + O << "\t.size\t" << CurrentFnName << ", .-" << CurrentFnName << '\n'; + + O.flush(); + + // We didn't modify anything + return false; +} + +void MSP430AsmPrinter::printMachineInstruction(const MachineInstr *MI) { + ++EmittedInsts; + + // Call the autogenerated instruction printer routines. + if (printInstruction(MI)) + return; + + assert(0 && "Should not happen"); +} + +void MSP430AsmPrinter::printOperand(const MachineInstr *MI, int OpNum, + const char* Modifier) { + const MachineOperand &MO = MI->getOperand(OpNum); + switch (MO.getType()) { + case MachineOperand::MO_Register: + assert (TargetRegisterInfo::isPhysicalRegister(MO.getReg()) && + "Virtual registers should be already mapped!"); + O << TM.getRegisterInfo()->get(MO.getReg()).AsmName; + return; + case MachineOperand::MO_Immediate: + if (!Modifier || strcmp(Modifier, "nohash")) + O << '#'; + O << MO.getImm(); + return; + case MachineOperand::MO_MachineBasicBlock: + printBasicBlockLabel(MO.getMBB()); + return; + case MachineOperand::MO_GlobalAddress: { + bool isMemOp = Modifier && !strcmp(Modifier, "mem"); + bool isCallOp = Modifier && !strcmp(Modifier, "call"); + std::string Name = Mang->getValueName(MO.getGlobal()); + assert(MO.getOffset() == 0 && "No offsets allowed!"); + + if (isCallOp) + O << '#'; + else if (isMemOp) + O << '&'; + + O << Name; + + return; + } + case MachineOperand::MO_ExternalSymbol: { + bool isCallOp = Modifier && !strcmp(Modifier, "call"); + std::string Name(TAI->getGlobalPrefix()); + Name += MO.getSymbolName(); + if (isCallOp) + O << '#'; + O << Name; + return; + } + default: + assert(0 && "Not implemented yet!"); + } +} + +void MSP430AsmPrinter::printSrcMemOperand(const MachineInstr *MI, int OpNum, + const char* Modifier) { + const MachineOperand &Base = MI->getOperand(OpNum); + const MachineOperand &Disp = MI->getOperand(OpNum+1); + + if (Base.isGlobal()) + printOperand(MI, OpNum, "mem"); + else if (Disp.isImm() && !Base.getReg()) + printOperand(MI, OpNum); + else if (Base.getReg()) { + if (Disp.getImm()) { + printOperand(MI, OpNum + 1, "nohash"); + O << '('; + printOperand(MI, OpNum); + O << ')'; + } else { + O << '@'; + printOperand(MI, OpNum); + } + } else + assert(0 && "Unsupported memory operand"); +} + +void MSP430AsmPrinter::printCCOperand(const MachineInstr *MI, int OpNum) { + unsigned CC = MI->getOperand(OpNum).getImm(); + + switch (CC) { + default: + assert(0 && "Unsupported CC code"); + break; + case MSP430::COND_E: + O << "eq"; + break; + case MSP430::COND_NE: + O << "ne"; + break; + case MSP430::COND_HS: + O << "hs"; + break; + case MSP430::COND_LO: + O << "lo"; + break; + case MSP430::COND_GE: + O << "ge"; + break; + case MSP430::COND_L: + O << 'l'; + break; + } +} diff --git a/lib/Target/MSP430/MSP430CallingConv.td b/lib/Target/MSP430/MSP430CallingConv.td new file mode 100644 index 000000000000..ad27cc9122a8 --- /dev/null +++ b/lib/Target/MSP430/MSP430CallingConv.td @@ -0,0 +1,37 @@ +//==- MSP430CallingConv.td - Calling Conventions for MSP430 -*- tablegen -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This describes the calling conventions for MSP430 architecture. +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// MSP430 Return Value Calling Convention +//===----------------------------------------------------------------------===// +def RetCC_MSP430 : CallingConv<[ + // i8 are returned in registers R15B, R14B, R13B, R12B + CCIfType<[i8], CCAssignToReg<[R15B, R14B, R13B, R12B]>>, + + // i16 are returned in registers R15, R14, R13, R12 + CCIfType<[i16], CCAssignToReg<[R15W, R14W, R13W, R12W]>> +]>; + +//===----------------------------------------------------------------------===// +// MSP430 Argument Calling Conventions +//===----------------------------------------------------------------------===// +def CC_MSP430 : CallingConv<[ + // Promote i8 arguments to i16. + CCIfType<[i8], CCPromoteToType>, + + // The first 4 integer arguments of non-varargs functions are passed in + // integer registers. + CCIfNotVarArg>>, + + // Integer values get stored in stack slots that are 2 bytes in + // size and 2-byte aligned. + CCIfType<[i16], CCAssignToStack<2, 2>> +]>; diff --git a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp new file mode 100644 index 000000000000..bf49ec0bff46 --- /dev/null +++ b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp @@ -0,0 +1,194 @@ +//===-- MSP430ISelDAGToDAG.cpp - A dag to dag inst selector for MSP430 ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines an instruction selector for the MSP430 target. +// +//===----------------------------------------------------------------------===// + +#include "MSP430.h" +#include "MSP430ISelLowering.h" +#include "MSP430TargetMachine.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/Intrinsics.h" +#include "llvm/CallingConv.h" +#include "llvm/Constants.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +using namespace llvm; + +/// MSP430DAGToDAGISel - MSP430 specific code to select MSP430 machine +/// instructions for SelectionDAG operations. +/// +namespace { + class MSP430DAGToDAGISel : public SelectionDAGISel { + MSP430TargetLowering &Lowering; + const MSP430Subtarget &Subtarget; + + public: + MSP430DAGToDAGISel(MSP430TargetMachine &TM, CodeGenOpt::Level OptLevel) + : SelectionDAGISel(TM, OptLevel), + Lowering(*TM.getTargetLowering()), + Subtarget(*TM.getSubtargetImpl()) { } + + virtual void InstructionSelect(); + + virtual const char *getPassName() const { + return "MSP430 DAG->DAG Pattern Instruction Selection"; + } + + // Include the pieces autogenerated from the target description. + #include "MSP430GenDAGISel.inc" + + private: + SDNode *Select(SDValue Op); + bool SelectAddr(SDValue Op, SDValue Addr, SDValue &Base, SDValue &Disp); + + #ifndef NDEBUG + unsigned Indent; + #endif + }; +} // end anonymous namespace + +/// createMSP430ISelDag - This pass converts a legalized DAG into a +/// MSP430-specific DAG, ready for instruction scheduling. +/// +FunctionPass *llvm::createMSP430ISelDag(MSP430TargetMachine &TM, + CodeGenOpt::Level OptLevel) { + return new MSP430DAGToDAGISel(TM, OptLevel); +} + +// FIXME: This is pretty dummy routine and needs to be rewritten in the future. +bool MSP430DAGToDAGISel::SelectAddr(SDValue Op, SDValue Addr, + SDValue &Base, SDValue &Disp) { + // Try to match frame address first. + if (FrameIndexSDNode *FIN = dyn_cast(Addr)) { + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i16); + Disp = CurDAG->getTargetConstant(0, MVT::i16); + return true; + } + + switch (Addr.getOpcode()) { + case ISD::ADD: + // Operand is a result from ADD with constant operand which fits into i16. + if (ConstantSDNode *CN = dyn_cast(Addr.getOperand(1))) { + uint64_t CVal = CN->getZExtValue(); + // Offset should fit into 16 bits. + if (((CVal << 48) >> 48) == CVal) { + SDValue N0 = Addr.getOperand(0); + if (FrameIndexSDNode *FIN = dyn_cast(N0)) + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i16); + else + Base = N0; + + Disp = CurDAG->getTargetConstant(CVal, MVT::i16); + return true; + } + } + break; + case MSP430ISD::Wrapper: + SDValue N0 = Addr.getOperand(0); + if (GlobalAddressSDNode *G = dyn_cast(N0)) { + Base = CurDAG->getTargetGlobalAddress(G->getGlobal(), + MVT::i16, G->getOffset()); + Disp = CurDAG->getTargetConstant(0, MVT::i16); + return true; + } else if (ExternalSymbolSDNode *E = dyn_cast(N0)) { + Base = CurDAG->getTargetExternalSymbol(E->getSymbol(), MVT::i16); + Disp = CurDAG->getTargetConstant(0, MVT::i16); + } + break; + }; + + Base = Addr; + Disp = CurDAG->getTargetConstant(0, MVT::i16); + + return true; +} + + + +/// InstructionSelect - This callback is invoked by +/// SelectionDAGISel when it has created a SelectionDAG for us to codegen. +void MSP430DAGToDAGISel::InstructionSelect() { + DEBUG(BB->dump()); + + // Codegen the basic block. +#ifndef NDEBUG + DOUT << "===== Instruction selection begins:\n"; + Indent = 0; +#endif + SelectRoot(*CurDAG); +#ifndef NDEBUG + DOUT << "===== Instruction selection ends:\n"; +#endif + + CurDAG->RemoveDeadNodes(); +} + +SDNode *MSP430DAGToDAGISel::Select(SDValue Op) { + SDNode *Node = Op.getNode(); + DebugLoc dl = Op.getDebugLoc(); + + // Dump information about the Node being selected + #ifndef NDEBUG + DOUT << std::string(Indent, ' ') << "Selecting: "; + DEBUG(Node->dump(CurDAG)); + DOUT << "\n"; + Indent += 2; + #endif + + // If we have a custom node, we already have selected! + if (Node->isMachineOpcode()) { + #ifndef NDEBUG + DOUT << std::string(Indent-2, ' ') << "== "; + DEBUG(Node->dump(CurDAG)); + DOUT << "\n"; + Indent -= 2; + #endif + return NULL; + } + + // Few custom selection stuff. + switch (Node->getOpcode()) { + default: break; + case ISD::FrameIndex: { + assert(Op.getValueType() == MVT::i16); + int FI = cast(Node)->getIndex(); + SDValue TFI = CurDAG->getTargetFrameIndex(FI, MVT::i16); + if (Node->hasOneUse()) + return CurDAG->SelectNodeTo(Node, MSP430::ADD16ri, MVT::i16, + TFI, CurDAG->getTargetConstant(0, MVT::i16)); + return CurDAG->getTargetNode(MSP430::ADD16ri, dl, MVT::i16, + TFI, CurDAG->getTargetConstant(0, MVT::i16)); + } + } + + // Select the default instruction + SDNode *ResNode = SelectCode(Op); + + #ifndef NDEBUG + DOUT << std::string(Indent-2, ' ') << "=> "; + if (ResNode == NULL || ResNode == Op.getNode()) + DEBUG(Op.getNode()->dump(CurDAG)); + else + DEBUG(ResNode->dump(CurDAG)); + DOUT << "\n"; + Indent -= 2; + #endif + + return ResNode; +} diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp new file mode 100644 index 000000000000..14db20e5fcd6 --- /dev/null +++ b/lib/Target/MSP430/MSP430ISelLowering.cpp @@ -0,0 +1,670 @@ +//===-- MSP430ISelLowering.cpp - MSP430 DAG Lowering Implementation ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the MSP430TargetLowering class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "msp430-lower" + +#include "MSP430ISelLowering.h" +#include "MSP430.h" +#include "MSP430TargetMachine.h" +#include "MSP430Subtarget.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/Intrinsics.h" +#include "llvm/CallingConv.h" +#include "llvm/GlobalVariable.h" +#include "llvm/GlobalAlias.h" +#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/VectorExtras.h" +using namespace llvm; + +MSP430TargetLowering::MSP430TargetLowering(MSP430TargetMachine &tm) : + TargetLowering(tm), Subtarget(*tm.getSubtargetImpl()), TM(tm) { + + // Set up the register classes. + addRegisterClass(MVT::i8, MSP430::GR8RegisterClass); + addRegisterClass(MVT::i16, MSP430::GR16RegisterClass); + + // Compute derived properties from the register classes + computeRegisterProperties(); + + // Provide all sorts of operation actions + + // Division is expensive + setIntDivIsCheap(false); + + // Even if we have only 1 bit shift here, we can perform + // shifts of the whole bitwidth 1 bit per step. + setShiftAmountType(MVT::i8); + + setStackPointerRegisterToSaveRestore(MSP430::SPW); + setBooleanContents(ZeroOrOneBooleanContent); + setSchedulingPreference(SchedulingForLatency); + + setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote); + setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); + setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); + setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Expand); + setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Expand); + + // We don't have any truncstores + setTruncStoreAction(MVT::i16, MVT::i8, Expand); + + setOperationAction(ISD::SRA, MVT::i8, Custom); + setOperationAction(ISD::SHL, MVT::i8, Custom); + setOperationAction(ISD::SRL, MVT::i8, Custom); + setOperationAction(ISD::SRA, MVT::i16, Custom); + setOperationAction(ISD::SHL, MVT::i16, Custom); + setOperationAction(ISD::SRL, MVT::i16, Custom); + setOperationAction(ISD::ROTL, MVT::i8, Expand); + setOperationAction(ISD::ROTR, MVT::i8, Expand); + setOperationAction(ISD::ROTL, MVT::i16, Expand); + setOperationAction(ISD::ROTR, MVT::i16, Expand); + setOperationAction(ISD::RET, MVT::Other, Custom); + setOperationAction(ISD::GlobalAddress, MVT::i16, Custom); + setOperationAction(ISD::ExternalSymbol, MVT::i16, Custom); + setOperationAction(ISD::BR_JT, MVT::Other, Expand); + setOperationAction(ISD::BRIND, MVT::Other, Expand); + setOperationAction(ISD::BR_CC, MVT::i8, Custom); + setOperationAction(ISD::BR_CC, MVT::i16, Custom); + setOperationAction(ISD::BRCOND, MVT::Other, Expand); + setOperationAction(ISD::SETCC, MVT::i8, Expand); + setOperationAction(ISD::SETCC, MVT::i16, Expand); + setOperationAction(ISD::SELECT, MVT::i8, Expand); + setOperationAction(ISD::SELECT, MVT::i16, Expand); + setOperationAction(ISD::SELECT_CC, MVT::i8, Custom); + setOperationAction(ISD::SELECT_CC, MVT::i16, Custom); + setOperationAction(ISD::SIGN_EXTEND, MVT::i16, Custom); + + // FIXME: Implement efficiently multiplication by a constant + setOperationAction(ISD::MUL, MVT::i16, Expand); + setOperationAction(ISD::MULHS, MVT::i16, Expand); + setOperationAction(ISD::MULHU, MVT::i16, Expand); + setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand); + setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand); + + setOperationAction(ISD::UDIV, MVT::i16, Expand); + setOperationAction(ISD::UDIVREM, MVT::i16, Expand); + setOperationAction(ISD::UREM, MVT::i16, Expand); + setOperationAction(ISD::SDIV, MVT::i16, Expand); + setOperationAction(ISD::SDIVREM, MVT::i16, Expand); + setOperationAction(ISD::SREM, MVT::i16, Expand); +} + +SDValue MSP430TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { + switch (Op.getOpcode()) { + case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG); + case ISD::SHL: // FALLTHROUGH + case ISD::SRL: + case ISD::SRA: return LowerShifts(Op, DAG); + case ISD::RET: return LowerRET(Op, DAG); + case ISD::CALL: return LowerCALL(Op, DAG); + case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); + case ISD::ExternalSymbol: return LowerExternalSymbol(Op, DAG); + case ISD::BR_CC: return LowerBR_CC(Op, DAG); + case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); + case ISD::SIGN_EXTEND: return LowerSIGN_EXTEND(Op, DAG); + default: + assert(0 && "unimplemented operand"); + return SDValue(); + } +} + +//===----------------------------------------------------------------------===// +// Calling Convention Implementation +//===----------------------------------------------------------------------===// + +#include "MSP430GenCallingConv.inc" + +SDValue MSP430TargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, + SelectionDAG &DAG) { + unsigned CC = cast(Op.getOperand(1))->getZExtValue(); + switch (CC) { + default: + assert(0 && "Unsupported calling convention"); + case CallingConv::C: + case CallingConv::Fast: + return LowerCCCArguments(Op, DAG); + } +} + +SDValue MSP430TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) { + CallSDNode *TheCall = cast(Op.getNode()); + unsigned CallingConv = TheCall->getCallingConv(); + switch (CallingConv) { + default: + assert(0 && "Unsupported calling convention"); + case CallingConv::Fast: + case CallingConv::C: + return LowerCCCCallTo(Op, DAG, CallingConv); + } +} + +/// LowerCCCArguments - transform physical registers into virtual registers and +/// generate load operations for arguments places on the stack. +// FIXME: struct return stuff +// FIXME: varargs +SDValue MSP430TargetLowering::LowerCCCArguments(SDValue Op, + SelectionDAG &DAG) { + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + SDValue Root = Op.getOperand(0); + bool isVarArg = cast(Op.getOperand(2))->getZExtValue() != 0; + unsigned CC = MF.getFunction()->getCallingConv(); + DebugLoc dl = Op.getDebugLoc(); + + // Assign locations to all of the incoming arguments. + SmallVector ArgLocs; + CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); + CCInfo.AnalyzeFormalArguments(Op.getNode(), CC_MSP430); + + assert(!isVarArg && "Varargs not supported yet"); + + SmallVector ArgValues; + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + if (VA.isRegLoc()) { + // Arguments passed in registers + MVT RegVT = VA.getLocVT(); + switch (RegVT.getSimpleVT()) { + default: + cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: " + << RegVT.getSimpleVT() + << "\n"; + abort(); + case MVT::i16: + unsigned VReg = + RegInfo.createVirtualRegister(MSP430::GR16RegisterClass); + RegInfo.addLiveIn(VA.getLocReg(), VReg); + SDValue ArgValue = DAG.getCopyFromReg(Root, dl, VReg, RegVT); + + // If this is an 8-bit value, it is really passed promoted to 16 + // bits. Insert an assert[sz]ext to capture this, then truncate to the + // right size. + if (VA.getLocInfo() == CCValAssign::SExt) + ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue, + DAG.getValueType(VA.getValVT())); + else if (VA.getLocInfo() == CCValAssign::ZExt) + ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue, + DAG.getValueType(VA.getValVT())); + + if (VA.getLocInfo() != CCValAssign::Full) + ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); + + ArgValues.push_back(ArgValue); + } + } else { + // Sanity check + assert(VA.isMemLoc()); + // Load the argument to a virtual register + unsigned ObjSize = VA.getLocVT().getSizeInBits()/8; + if (ObjSize > 2) { + cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: " + << VA.getLocVT().getSimpleVT() + << "\n"; + } + // Create the frame index object for this incoming parameter... + int FI = MFI->CreateFixedObject(ObjSize, VA.getLocMemOffset()); + + // Create the SelectionDAG nodes corresponding to a load + //from this parameter + SDValue FIN = DAG.getFrameIndex(FI, MVT::i16); + ArgValues.push_back(DAG.getLoad(VA.getLocVT(), dl, Root, FIN, + PseudoSourceValue::getFixedStack(FI), 0)); + } + } + + ArgValues.push_back(Root); + + // Return the new list of results. + return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getNode()->getVTList(), + &ArgValues[0], ArgValues.size()).getValue(Op.getResNo()); +} + +SDValue MSP430TargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG) { + // CCValAssign - represent the assignment of the return value to a location + SmallVector RVLocs; + unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv(); + bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); + DebugLoc dl = Op.getDebugLoc(); + + // CCState - Info about the registers and stack slot. + CCState CCInfo(CC, isVarArg, getTargetMachine(), RVLocs); + + // Analize return values of ISD::RET + CCInfo.AnalyzeReturn(Op.getNode(), RetCC_MSP430); + + // If this is the first return lowered for this function, add the regs to the + // liveout set for the function. + if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { + for (unsigned i = 0; i != RVLocs.size(); ++i) + if (RVLocs[i].isRegLoc()) + DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); + } + + // The chain is always operand #0 + SDValue Chain = Op.getOperand(0); + SDValue Flag; + + // Copy the result values into the output registers. + for (unsigned i = 0; i != RVLocs.size(); ++i) { + CCValAssign &VA = RVLocs[i]; + assert(VA.isRegLoc() && "Can only return in registers!"); + + // ISD::RET => ret chain, (regnum1,val1), ... + // So i*2+1 index only the regnums + Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), + Op.getOperand(i*2+1), Flag); + + // Guarantee that all emitted copies are stuck together, + // avoiding something bad. + Flag = Chain.getValue(1); + } + + if (Flag.getNode()) + return DAG.getNode(MSP430ISD::RET_FLAG, dl, MVT::Other, Chain, Flag); + + // Return Void + return DAG.getNode(MSP430ISD::RET_FLAG, dl, MVT::Other, Chain); +} + +/// LowerCCCCallTo - functions arguments are copied from virtual regs to +/// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted. +/// TODO: sret. +SDValue MSP430TargetLowering::LowerCCCCallTo(SDValue Op, SelectionDAG &DAG, + unsigned CC) { + CallSDNode *TheCall = cast(Op.getNode()); + SDValue Chain = TheCall->getChain(); + SDValue Callee = TheCall->getCallee(); + bool isVarArg = TheCall->isVarArg(); + DebugLoc dl = Op.getDebugLoc(); + + // Analyze operands of the call, assigning locations to each operand. + SmallVector ArgLocs; + CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); + + CCInfo.AnalyzeCallOperands(TheCall, CC_MSP430); + + // Get a count of how many bytes are to be pushed on the stack. + unsigned NumBytes = CCInfo.getNextStackOffset(); + + Chain = DAG.getCALLSEQ_START(Chain ,DAG.getConstant(NumBytes, + getPointerTy(), true)); + + SmallVector, 4> RegsToPass; + SmallVector MemOpChains; + SDValue StackPtr; + + // Walk the register/memloc assignments, inserting copies/loads. + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + + // Arguments start after the 5 first operands of ISD::CALL + SDValue Arg = TheCall->getArg(i); + + // Promote the value if needed. + switch (VA.getLocInfo()) { + default: assert(0 && "Unknown loc info!"); + case CCValAssign::Full: break; + case CCValAssign::SExt: + Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); + break; + case CCValAssign::ZExt: + Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); + break; + case CCValAssign::AExt: + Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); + break; + } + + // Arguments that can be passed on register must be kept at RegsToPass + // vector + if (VA.isRegLoc()) { + RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); + } else { + assert(VA.isMemLoc()); + + if (StackPtr.getNode() == 0) + StackPtr = DAG.getCopyFromReg(Chain, dl, MSP430::SPW, getPointerTy()); + + SDValue PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), + StackPtr, + DAG.getIntPtrConstant(VA.getLocMemOffset())); + + + MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, + PseudoSourceValue::getStack(), + VA.getLocMemOffset())); + } + } + + // Transform all store nodes into one single node because all store nodes are + // independent of each other. + if (!MemOpChains.empty()) + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &MemOpChains[0], MemOpChains.size()); + + // Build a sequence of copy-to-reg nodes chained together with token chain and + // flag operands which copy the outgoing args into registers. The InFlag in + // necessary since all emited instructions must be stuck together. + SDValue InFlag; + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { + Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, + RegsToPass[i].second, InFlag); + InFlag = Chain.getValue(1); + } + + // If the callee is a GlobalAddress node (quite common, every direct call is) + // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. + // Likewise ExternalSymbol -> TargetExternalSymbol. + if (GlobalAddressSDNode *G = dyn_cast(Callee)) + Callee = DAG.getTargetGlobalAddress(G->getGlobal(), MVT::i16); + else if (ExternalSymbolSDNode *E = dyn_cast(Callee)) + Callee = DAG.getTargetExternalSymbol(E->getSymbol(), MVT::i16); + + // Returns a chain & a flag for retval copy to use. + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); + SmallVector Ops; + Ops.push_back(Chain); + Ops.push_back(Callee); + + // Add argument registers to the end of the list so that they are + // known live into the call. + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) + Ops.push_back(DAG.getRegister(RegsToPass[i].first, + RegsToPass[i].second.getValueType())); + + if (InFlag.getNode()) + Ops.push_back(InFlag); + + Chain = DAG.getNode(MSP430ISD::CALL, dl, NodeTys, &Ops[0], Ops.size()); + InFlag = Chain.getValue(1); + + // Create the CALLSEQ_END node. + Chain = DAG.getCALLSEQ_END(Chain, + DAG.getConstant(NumBytes, getPointerTy(), true), + DAG.getConstant(0, getPointerTy(), true), + InFlag); + InFlag = Chain.getValue(1); + + // Handle result values, copying them out of physregs into vregs that we + // return. + return SDValue(LowerCallResult(Chain, InFlag, TheCall, CC, DAG), + Op.getResNo()); +} + +/// LowerCallResult - Lower the result values of an ISD::CALL into the +/// appropriate copies out of appropriate physical registers. This assumes that +/// Chain/InFlag are the input chain/flag to use, and that TheCall is the call +/// being lowered. Returns a SDNode with the same number of values as the +/// ISD::CALL. +SDNode* +MSP430TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, + CallSDNode *TheCall, + unsigned CallingConv, + SelectionDAG &DAG) { + bool isVarArg = TheCall->isVarArg(); + DebugLoc dl = TheCall->getDebugLoc(); + + // Assign locations to each value returned by this call. + SmallVector RVLocs; + CCState CCInfo(CallingConv, isVarArg, getTargetMachine(), RVLocs); + + CCInfo.AnalyzeCallResult(TheCall, RetCC_MSP430); + SmallVector ResultVals; + + // Copy all of the result registers out of their specified physreg. + for (unsigned i = 0; i != RVLocs.size(); ++i) { + Chain = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(), + RVLocs[i].getValVT(), InFlag).getValue(1); + InFlag = Chain.getValue(2); + ResultVals.push_back(Chain.getValue(0)); + } + + ResultVals.push_back(Chain); + + // Merge everything together with a MERGE_VALUES node. + return DAG.getNode(ISD::MERGE_VALUES, dl, TheCall->getVTList(), + &ResultVals[0], ResultVals.size()).getNode(); +} + +SDValue MSP430TargetLowering::LowerShifts(SDValue Op, + SelectionDAG &DAG) { + unsigned Opc = Op.getOpcode(); + SDNode* N = Op.getNode(); + MVT VT = Op.getValueType(); + DebugLoc dl = N->getDebugLoc(); + + // We currently only lower shifts of constant argument. + if (!isa(N->getOperand(1))) + return SDValue(); + + uint64_t ShiftAmount = cast(N->getOperand(1))->getZExtValue(); + + // Expand the stuff into sequence of shifts. + // FIXME: for some shift amounts this might be done better! + // E.g.: foo >> (8 + N) => sxt(swpb(foo)) >> N + SDValue Victim = N->getOperand(0); + + if (Opc == ISD::SRL && ShiftAmount) { + // Emit a special goodness here: + // srl A, 1 => clrc; rrc A + Victim = DAG.getNode(MSP430ISD::RRC, dl, VT, Victim); + ShiftAmount -= 1; + } + + while (ShiftAmount--) + Victim = DAG.getNode((Opc == ISD::SHL ? MSP430ISD::RLA : MSP430ISD::RRA), + dl, VT, Victim); + + return Victim; +} + +SDValue MSP430TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) { + const GlobalValue *GV = cast(Op)->getGlobal(); + int64_t Offset = cast(Op)->getOffset(); + + // Create the TargetGlobalAddress node, folding in the constant offset. + SDValue Result = DAG.getTargetGlobalAddress(GV, getPointerTy(), Offset); + return DAG.getNode(MSP430ISD::Wrapper, Op.getDebugLoc(), + getPointerTy(), Result); +} + +SDValue MSP430TargetLowering::LowerExternalSymbol(SDValue Op, + SelectionDAG &DAG) { + DebugLoc dl = Op.getDebugLoc(); + const char *Sym = cast(Op)->getSymbol(); + SDValue Result = DAG.getTargetExternalSymbol(Sym, getPointerTy()); + + return DAG.getNode(MSP430ISD::Wrapper, dl, getPointerTy(), Result);; +} + +static SDValue EmitCMP(SDValue &LHS, SDValue &RHS, unsigned &TargetCC, + ISD::CondCode CC, + DebugLoc dl, SelectionDAG &DAG) { + // FIXME: Handle bittests someday + assert(!LHS.getValueType().isFloatingPoint() && "We don't handle FP yet"); + + // FIXME: Handle jump negative someday + TargetCC = MSP430::COND_INVALID; + switch (CC) { + default: assert(0 && "Invalid integer condition!"); + case ISD::SETEQ: + TargetCC = MSP430::COND_E; // aka COND_Z + break; + case ISD::SETNE: + TargetCC = MSP430::COND_NE; // aka COND_NZ + break; + case ISD::SETULE: + std::swap(LHS, RHS); // FALLTHROUGH + case ISD::SETUGE: + TargetCC = MSP430::COND_HS; // aka COND_C + break; + case ISD::SETUGT: + std::swap(LHS, RHS); // FALLTHROUGH + case ISD::SETULT: + TargetCC = MSP430::COND_LO; // aka COND_NC + break; + case ISD::SETLE: + std::swap(LHS, RHS); // FALLTHROUGH + case ISD::SETGE: + TargetCC = MSP430::COND_GE; + break; + case ISD::SETGT: + std::swap(LHS, RHS); // FALLTHROUGH + case ISD::SETLT: + TargetCC = MSP430::COND_L; + break; + } + + return DAG.getNode(MSP430ISD::CMP, dl, MVT::Flag, LHS, RHS); +} + + +SDValue MSP430TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) { + SDValue Chain = Op.getOperand(0); + ISD::CondCode CC = cast(Op.getOperand(1))->get(); + SDValue LHS = Op.getOperand(2); + SDValue RHS = Op.getOperand(3); + SDValue Dest = Op.getOperand(4); + DebugLoc dl = Op.getDebugLoc(); + + unsigned TargetCC = MSP430::COND_INVALID; + SDValue Flag = EmitCMP(LHS, RHS, TargetCC, CC, dl, DAG); + + return DAG.getNode(MSP430ISD::BR_CC, dl, Op.getValueType(), + Chain, + Dest, DAG.getConstant(TargetCC, MVT::i8), + Flag); +} + +SDValue MSP430TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) { + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + SDValue TrueV = Op.getOperand(2); + SDValue FalseV = Op.getOperand(3); + ISD::CondCode CC = cast(Op.getOperand(4))->get(); + DebugLoc dl = Op.getDebugLoc(); + + unsigned TargetCC = MSP430::COND_INVALID; + SDValue Flag = EmitCMP(LHS, RHS, TargetCC, CC, dl, DAG); + + SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Flag); + SmallVector Ops; + Ops.push_back(TrueV); + Ops.push_back(FalseV); + Ops.push_back(DAG.getConstant(TargetCC, MVT::i8)); + Ops.push_back(Flag); + + return DAG.getNode(MSP430ISD::SELECT_CC, dl, VTs, &Ops[0], Ops.size()); +} + +SDValue MSP430TargetLowering::LowerSIGN_EXTEND(SDValue Op, + SelectionDAG &DAG) { + SDValue Val = Op.getOperand(0); + MVT VT = Op.getValueType(); + DebugLoc dl = Op.getDebugLoc(); + + assert(VT == MVT::i16 && "Only support i16 for now!"); + + return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT, + DAG.getNode(ISD::ANY_EXTEND, dl, VT, Val), + DAG.getValueType(Val.getValueType())); +} + +const char *MSP430TargetLowering::getTargetNodeName(unsigned Opcode) const { + switch (Opcode) { + default: return NULL; + case MSP430ISD::RET_FLAG: return "MSP430ISD::RET_FLAG"; + case MSP430ISD::RRA: return "MSP430ISD::RRA"; + case MSP430ISD::RLA: return "MSP430ISD::RLA"; + case MSP430ISD::RRC: return "MSP430ISD::RRC"; + case MSP430ISD::CALL: return "MSP430ISD::CALL"; + case MSP430ISD::Wrapper: return "MSP430ISD::Wrapper"; + case MSP430ISD::BR_CC: return "MSP430ISD::BR_CC"; + case MSP430ISD::CMP: return "MSP430ISD::CMP"; + case MSP430ISD::SELECT_CC: return "MSP430ISD::SELECT_CC"; + } +} + +//===----------------------------------------------------------------------===// +// Other Lowering Code +//===----------------------------------------------------------------------===// + +MachineBasicBlock* +MSP430TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *BB) const { + const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo(); + DebugLoc dl = MI->getDebugLoc(); + assert((MI->getOpcode() == MSP430::Select16 || + MI->getOpcode() == MSP430::Select8) && + "Unexpected instr type to insert"); + + // To "insert" a SELECT instruction, we actually have to insert the diamond + // control-flow pattern. The incoming instruction knows the destination vreg + // to set, the condition code register to branch on, the true/false values to + // select between, and a branch opcode to use. + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction::iterator I = BB; + ++I; + + // thisMBB: + // ... + // TrueVal = ... + // cmpTY ccX, r1, r2 + // jCC copy1MBB + // fallthrough --> copy0MBB + MachineBasicBlock *thisMBB = BB; + MachineFunction *F = BB->getParent(); + MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *copy1MBB = F->CreateMachineBasicBlock(LLVM_BB); + BuildMI(BB, dl, TII.get(MSP430::JCC)) + .addMBB(copy1MBB) + .addImm(MI->getOperand(3).getImm()); + F->insert(I, copy0MBB); + F->insert(I, copy1MBB); + // Update machine-CFG edges by transferring all successors of the current + // block to the new block which will contain the Phi node for the select. + copy1MBB->transferSuccessors(BB); + // Next, add the true and fallthrough blocks as its successors. + BB->addSuccessor(copy0MBB); + BB->addSuccessor(copy1MBB); + + // copy0MBB: + // %FalseValue = ... + // # fallthrough to copy1MBB + BB = copy0MBB; + + // Update machine-CFG edges + BB->addSuccessor(copy1MBB); + + // copy1MBB: + // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] + // ... + BB = copy1MBB; + BuildMI(BB, dl, TII.get(MSP430::PHI), + MI->getOperand(0).getReg()) + .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB) + .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB); + + F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. + return BB; +} diff --git a/lib/Target/MSP430/MSP430ISelLowering.h b/lib/Target/MSP430/MSP430ISelLowering.h new file mode 100644 index 000000000000..404534dde89e --- /dev/null +++ b/lib/Target/MSP430/MSP430ISelLowering.h @@ -0,0 +1,103 @@ +//==-- MSP430ISelLowering.h - MSP430 DAG Lowering Interface ------*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interfaces that MSP430 uses to lower LLVM code into a +// selection DAG. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_MSP430_ISELLOWERING_H +#define LLVM_TARGET_MSP430_ISELLOWERING_H + +#include "MSP430.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/Target/TargetLowering.h" + +namespace llvm { + namespace MSP430ISD { + enum { + FIRST_NUMBER = ISD::BUILTIN_OP_END, + + /// Return with a flag operand. Operand 0 is the chain operand. + RET_FLAG, + + /// Y = R{R,L}A X, rotate right (left) arithmetically + RRA, RLA, + + /// Y = RRC X, rotate right via carry + RRC, + + /// CALL/TAILCALL - These operations represent an abstract call + /// instruction, which includes a bunch of information. + CALL, + + /// Wrapper - A wrapper node for TargetConstantPool, TargetExternalSymbol, + /// and TargetGlobalAddress. + Wrapper, + + /// CMP - Compare instruction. + CMP, + + /// SetCC. Operand 0 is condition code, and operand 1 is the flag + /// operand produced by a CMP instruction. + SETCC, + + /// MSP430 conditional branches. Operand 0 is the chain operand, operand 1 + /// is the block to branch if condition is true, operand 2 is the + /// condition code, and operand 3 is the flag operand produced by a CMP + /// instruction. + BR_CC, + + /// SELECT_CC. Operand 0 and operand 1 are selection variable, operand 3 + /// is condition code and operand 4 is flag operand. + SELECT_CC + }; + } + + class MSP430Subtarget; + class MSP430TargetMachine; + + class MSP430TargetLowering : public TargetLowering { + public: + explicit MSP430TargetLowering(MSP430TargetMachine &TM); + + /// LowerOperation - Provide custom lowering hooks for some operations. + virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG); + + /// getTargetNodeName - This method returns the name of a target specific + /// DAG node. + virtual const char *getTargetNodeName(unsigned Opcode) const; + + SDValue LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG); + SDValue LowerCALL(SDValue Op, SelectionDAG &DAG); + SDValue LowerRET(SDValue Op, SelectionDAG &DAG); + SDValue LowerCCCArguments(SDValue Op, SelectionDAG &DAG); + SDValue LowerShifts(SDValue Op, SelectionDAG &DAG); + SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG); + SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG); + SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG); + SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG); + SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG); + + SDValue LowerCCCCallTo(SDValue Op, SelectionDAG &DAG, + unsigned CC); + SDNode* LowerCallResult(SDValue Chain, SDValue InFlag, + CallSDNode *TheCall, + unsigned CallingConv, SelectionDAG &DAG); + + MachineBasicBlock* EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *BB) const; + + private: + const MSP430Subtarget &Subtarget; + const MSP430TargetMachine &TM; + }; +} // namespace llvm + +#endif // LLVM_TARGET_MSP430_ISELLOWERING_H diff --git a/lib/Target/MSP430/MSP430InstrFormats.td b/lib/Target/MSP430/MSP430InstrFormats.td new file mode 100644 index 000000000000..61b339901648 --- /dev/null +++ b/lib/Target/MSP430/MSP430InstrFormats.td @@ -0,0 +1,67 @@ +//===- MSP430InstrFormats.td - MSP430 Instruction Formats-----*- tblgen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Describe MSP430 instructions format here +// + +// Generic MSP430 Format +class MSP430Inst : Instruction { + field bits<16> Inst; + + let Namespace = "MSP430"; + + dag OutOperandList = outs; + dag InOperandList = ins; + + let AsmString = asmstr; +} + +// FIXME: Create different classes for different addressing modes. + +// MSP430 Double Operand (Format I) Instructions +class IForm opcode, bit ad, bit bw, bits<2> as, + dag outs, dag ins, string asmstr, list pattern> + : MSP430Inst { + let Pattern = pattern; + + let Inst{12-15} = opcode; + let Inst{7} = ad; + let Inst{6} = bw; + let Inst{4-5} = as; +} + +// MSP430 Single Operand (Format II) Instructions +class IIForm opcode, bit bw, bits<2> ad, + dag outs, dag ins, string asmstr, list pattern> + : MSP430Inst { + let Pattern = pattern; + + let Inst{7-15} = opcode; + let Inst{6} = bw; + let Inst{4-5} = ad; +} + +// MSP430 Conditional Jumps Instructions +class CJForm opcode, bits<3> cond, bit s, + dag outs, dag ins, string asmstr, list pattern> + : MSP430Inst { + let Pattern = pattern; + + let Inst{13-15} = opcode; + let Inst{10-12} = cond; + let Inst{9} = s; +} + +// Pseudo instructions +class Pseudo pattern> + : MSP430Inst { + let Pattern = pattern; + let Inst{15-0} = 0; +} diff --git a/lib/Target/MSP430/MSP430InstrInfo.cpp b/lib/Target/MSP430/MSP430InstrInfo.cpp new file mode 100644 index 000000000000..91112c3d732f --- /dev/null +++ b/lib/Target/MSP430/MSP430InstrInfo.cpp @@ -0,0 +1,177 @@ +//===- MSP430InstrInfo.cpp - MSP430 Instruction Information ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the MSP430 implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#include "MSP430.h" +#include "MSP430InstrInfo.h" +#include "MSP430MachineFunctionInfo.h" +#include "MSP430TargetMachine.h" +#include "MSP430GenInstrInfo.inc" +#include "llvm/Function.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" + +using namespace llvm; + +MSP430InstrInfo::MSP430InstrInfo(MSP430TargetMachine &tm) + : TargetInstrInfoImpl(MSP430Insts, array_lengthof(MSP430Insts)), + RI(tm, *this), TM(tm) {} + +void MSP430InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned SrcReg, bool isKill, int FrameIdx, + const TargetRegisterClass *RC) const { + DebugLoc DL = DebugLoc::getUnknownLoc(); + if (MI != MBB.end()) DL = MI->getDebugLoc(); + + if (RC == &MSP430::GR16RegClass) + BuildMI(MBB, MI, DL, get(MSP430::MOV16mr)) + .addFrameIndex(FrameIdx).addImm(0) + .addReg(SrcReg, getKillRegState(isKill)); + else if (RC == &MSP430::GR8RegClass) + BuildMI(MBB, MI, DL, get(MSP430::MOV8mr)) + .addFrameIndex(FrameIdx).addImm(0) + .addReg(SrcReg, getKillRegState(isKill)); + else + assert(0 && "Cannot store this register to stack slot!"); +} + +void MSP430InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned DestReg, int FrameIdx, + const TargetRegisterClass *RC) const{ + DebugLoc DL = DebugLoc::getUnknownLoc(); + if (MI != MBB.end()) DL = MI->getDebugLoc(); + + if (RC == &MSP430::GR16RegClass) + BuildMI(MBB, MI, DL, get(MSP430::MOV16rm)) + .addReg(DestReg).addFrameIndex(FrameIdx).addImm(0); + else if (RC == &MSP430::GR8RegClass) + BuildMI(MBB, MI, DL, get(MSP430::MOV8rm)) + .addReg(DestReg).addFrameIndex(FrameIdx).addImm(0); + else + assert(0 && "Cannot store this register to stack slot!"); +} + +bool MSP430InstrInfo::copyRegToReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned DestReg, unsigned SrcReg, + const TargetRegisterClass *DestRC, + const TargetRegisterClass *SrcRC) const { + DebugLoc DL = DebugLoc::getUnknownLoc(); + if (I != MBB.end()) DL = I->getDebugLoc(); + + if (DestRC == SrcRC) { + unsigned Opc; + if (DestRC == &MSP430::GR16RegClass) { + Opc = MSP430::MOV16rr; + } else if (DestRC == &MSP430::GR8RegClass) { + Opc = MSP430::MOV8rr; + } else { + return false; + } + + BuildMI(MBB, I, DL, get(Opc), DestReg).addReg(SrcReg); + return true; + } + + return false; +} + +bool +MSP430InstrInfo::isMoveInstr(const MachineInstr& MI, + unsigned &SrcReg, unsigned &DstReg, + unsigned &SrcSubIdx, unsigned &DstSubIdx) const { + SrcSubIdx = DstSubIdx = 0; // No sub-registers yet. + + switch (MI.getOpcode()) { + default: + return false; + case MSP430::MOV8rr: + case MSP430::MOV16rr: + assert(MI.getNumOperands() >= 2 && + MI.getOperand(0).isReg() && + MI.getOperand(1).isReg() && + "invalid register-register move instruction"); + SrcReg = MI.getOperand(1).getReg(); + DstReg = MI.getOperand(0).getReg(); + return true; + } +} + +bool +MSP430InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector &CSI) const { + if (CSI.empty()) + return false; + + DebugLoc DL = DebugLoc::getUnknownLoc(); + if (MI != MBB.end()) DL = MI->getDebugLoc(); + + MachineFunction &MF = *MBB.getParent(); + MSP430MachineFunctionInfo *MFI = MF.getInfo(); + MFI->setCalleeSavedFrameSize(CSI.size() * 2); + + for (unsigned i = CSI.size(); i != 0; --i) { + unsigned Reg = CSI[i-1].getReg(); + // Add the callee-saved register as live-in. It's killed at the spill. + MBB.addLiveIn(Reg); + BuildMI(MBB, MI, DL, get(MSP430::PUSH16r)) + .addReg(Reg, RegState::Kill); + } + return true; +} + +bool +MSP430InstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector &CSI) const { + if (CSI.empty()) + return false; + + DebugLoc DL = DebugLoc::getUnknownLoc(); + if (MI != MBB.end()) DL = MI->getDebugLoc(); + + for (unsigned i = 0, e = CSI.size(); i != e; ++i) + BuildMI(MBB, MI, DL, get(MSP430::POP16r), CSI[i].getReg()); + + return true; +} + +unsigned +MSP430InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + const SmallVectorImpl &Cond) const { + // FIXME this should probably have a DebugLoc operand + DebugLoc dl = DebugLoc::getUnknownLoc(); + + // Shouldn't be a fall through. + assert(TBB && "InsertBranch must not be told to insert a fallthrough"); + assert((Cond.size() == 1 || Cond.size() == 0) && + "MSP430 branch conditions have one component!"); + + if (Cond.empty()) { + // Unconditional branch? + assert(!FBB && "Unconditional branch with multiple successors!"); + BuildMI(&MBB, dl, get(MSP430::JMP)).addMBB(TBB); + return 1; + } + + // Conditional branch. + unsigned Count = 0; + assert(0 && "Implement conditional branches!"); + + return Count; +} diff --git a/lib/Target/MSP430/MSP430InstrInfo.h b/lib/Target/MSP430/MSP430InstrInfo.h new file mode 100644 index 000000000000..e07aacad9dc2 --- /dev/null +++ b/lib/Target/MSP430/MSP430InstrInfo.h @@ -0,0 +1,84 @@ +//===- MSP430InstrInfo.h - MSP430 Instruction Information -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the MSP430 implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_MSP430INSTRINFO_H +#define LLVM_TARGET_MSP430INSTRINFO_H + +#include "llvm/Target/TargetInstrInfo.h" +#include "MSP430RegisterInfo.h" + +namespace llvm { + +class MSP430TargetMachine; + +namespace MSP430 { + // MSP430 specific condition code. + enum CondCode { + COND_E = 0, // aka COND_Z + COND_NE = 1, // aka COND_NZ + COND_HS = 2, // aka COND_C + COND_LO = 3, // aka COND_NC + COND_GE = 4, + COND_L = 5, + + COND_INVALID + }; +} + +class MSP430InstrInfo : public TargetInstrInfoImpl { + const MSP430RegisterInfo RI; + MSP430TargetMachine &TM; +public: + explicit MSP430InstrInfo(MSP430TargetMachine &TM); + + /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As + /// such, whenever a client has an instance of instruction info, it should + /// always be able to get register info as well (through this method). + /// + virtual const TargetRegisterInfo &getRegisterInfo() const { return RI; } + + bool copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + unsigned DestReg, unsigned SrcReg, + const TargetRegisterClass *DestRC, + const TargetRegisterClass *SrcRC) const; + + bool isMoveInstr(const MachineInstr& MI, + unsigned &SrcReg, unsigned &DstReg, + unsigned &SrcSubIdx, unsigned &DstSubIdx) const; + + virtual void storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned SrcReg, bool isKill, + int FrameIndex, + const TargetRegisterClass *RC) const; + virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned DestReg, int FrameIdx, + const TargetRegisterClass *RC) const; + + virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector &CSI) const; + virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector &CSI) const; + + virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + const SmallVectorImpl &Cond) const; + +}; + +} + +#endif diff --git a/lib/Target/MSP430/MSP430InstrInfo.td b/lib/Target/MSP430/MSP430InstrInfo.td new file mode 100644 index 000000000000..39c08e40be46 --- /dev/null +++ b/lib/Target/MSP430/MSP430InstrInfo.td @@ -0,0 +1,901 @@ +//===- MSP430InstrInfo.td - MSP430 Instruction defs -----------*- tblgen-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the MSP430 instructions in TableGen format. +// +//===----------------------------------------------------------------------===// + +include "MSP430InstrFormats.td" + +//===----------------------------------------------------------------------===// +// Type Constraints. +//===----------------------------------------------------------------------===// +class SDTCisI8 : SDTCisVT; +class SDTCisI16 : SDTCisVT; + +//===----------------------------------------------------------------------===// +// Type Profiles. +//===----------------------------------------------------------------------===// +def SDT_MSP430Call : SDTypeProfile<0, -1, [SDTCisVT<0, iPTR>]>; +def SDT_MSP430CallSeqStart : SDCallSeqStart<[SDTCisVT<0, i16>]>; +def SDT_MSP430CallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i16>, SDTCisVT<1, i16>]>; +def SDT_MSP430Wrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>; +def SDT_MSP430Cmp : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>; +def SDT_MSP430BrCC : SDTypeProfile<0, 2, [SDTCisVT<0, OtherVT>, + SDTCisVT<1, i8>]>; +def SDT_MSP430SelectCC : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, + SDTCisVT<3, i8>]>; + +//===----------------------------------------------------------------------===// +// MSP430 Specific Node Definitions. +//===----------------------------------------------------------------------===// +def MSP430retflag : SDNode<"MSP430ISD::RET_FLAG", SDTNone, + [SDNPHasChain, SDNPOptInFlag]>; + +def MSP430rra : SDNode<"MSP430ISD::RRA", SDTIntUnaryOp, []>; +def MSP430rla : SDNode<"MSP430ISD::RLA", SDTIntUnaryOp, []>; +def MSP430rrc : SDNode<"MSP430ISD::RRC", SDTIntUnaryOp, []>; + +def MSP430call : SDNode<"MSP430ISD::CALL", SDT_MSP430Call, + [SDNPHasChain, SDNPOutFlag, SDNPOptInFlag]>; +def MSP430callseq_start : + SDNode<"ISD::CALLSEQ_START", SDT_MSP430CallSeqStart, + [SDNPHasChain, SDNPOutFlag]>; +def MSP430callseq_end : + SDNode<"ISD::CALLSEQ_END", SDT_MSP430CallSeqEnd, + [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; +def MSP430Wrapper : SDNode<"MSP430ISD::Wrapper", SDT_MSP430Wrapper>; +def MSP430cmp : SDNode<"MSP430ISD::CMP", SDT_MSP430Cmp, [SDNPOutFlag]>; +def MSP430brcc : SDNode<"MSP430ISD::BR_CC", SDT_MSP430BrCC, [SDNPHasChain, SDNPInFlag]>; +def MSP430selectcc: SDNode<"MSP430ISD::SELECT_CC", SDT_MSP430SelectCC, [SDNPInFlag]>; + +//===----------------------------------------------------------------------===// +// MSP430 Operand Definitions. +//===----------------------------------------------------------------------===// + +// Address operands +def memsrc : Operand { + let PrintMethod = "printSrcMemOperand"; + let MIOperandInfo = (ops GR16, i16imm); +} + +def memdst : Operand { + let PrintMethod = "printSrcMemOperand"; + let MIOperandInfo = (ops GR16, i16imm); +} + +// Branch targets have OtherVT type. +def brtarget : Operand; + +// Operand for printing out a condition code. +def cc : Operand { + let PrintMethod = "printCCOperand"; +} + +//===----------------------------------------------------------------------===// +// MSP430 Complex Pattern Definitions. +//===----------------------------------------------------------------------===// + +def addr : ComplexPattern; + +//===----------------------------------------------------------------------===// +// Pattern Fragments +def zextloadi16i8 : PatFrag<(ops node:$ptr), (i16 (zextloadi8 node:$ptr))>; +def extloadi16i8 : PatFrag<(ops node:$ptr), (i16 ( extloadi8 node:$ptr))>; + +//===----------------------------------------------------------------------===// +// Instruction list.. + +// ADJCALLSTACKDOWN/UP implicitly use/def SP because they may be expanded into +// a stack adjustment and the codegen must know that they may modify the stack +// pointer before prolog-epilog rewriting occurs. +// Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become +// sub / add which can clobber SRW. +let Defs = [SPW, SRW], Uses = [SPW] in { +def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i16imm:$amt), + "#ADJCALLSTACKDOWN", + [(MSP430callseq_start timm:$amt)]>; +def ADJCALLSTACKUP : Pseudo<(outs), (ins i16imm:$amt1, i16imm:$amt2), + "#ADJCALLSTACKUP", + [(MSP430callseq_end timm:$amt1, timm:$amt2)]>; +} + +let usesCustomDAGSchedInserter = 1 in { + def Select8 : Pseudo<(outs GR8:$dst), (ins GR8:$src1, GR8:$src2, i8imm:$cc), + "# Select8 PSEUDO", + [(set GR8:$dst, + (MSP430selectcc GR8:$src1, GR8:$src2, imm:$cc))]>; + def Select16 : Pseudo<(outs GR16:$dst), (ins GR16:$src1, GR16:$src2, i8imm:$cc), + "# Select16 PSEUDO", + [(set GR16:$dst, + (MSP430selectcc GR16:$src1, GR16:$src2, imm:$cc))]>; +} + +let neverHasSideEffects = 1 in +def NOP : Pseudo<(outs), (ins), "nop", []>; + +//===----------------------------------------------------------------------===// +// Control Flow Instructions... +// + +// FIXME: Provide proper encoding! +let isReturn = 1, isTerminator = 1 in { + def RET : Pseudo<(outs), (ins), "ret", [(MSP430retflag)]>; +} + +let isBranch = 1, isTerminator = 1 in { + +// Direct branch +let isBarrier = 1 in + def JMP : Pseudo<(outs), (ins brtarget:$dst), + "jmp\t$dst", + [(br bb:$dst)]>; + +// Conditional branches +let Uses = [SRW] in + def JCC : Pseudo<(outs), (ins brtarget:$dst, cc:$cc), + "j$cc $dst", + [(MSP430brcc bb:$dst, imm:$cc)]>; +} // isBranch, isTerminator + +//===----------------------------------------------------------------------===// +// Call Instructions... +// +let isCall = 1 in + // All calls clobber the non-callee saved registers. SPW is marked as + // a use to prevent stack-pointer assignments that appear immediately + // before calls from potentially appearing dead. Uses for argument + // registers are added manually. + let Defs = [R12W, R13W, R14W, R15W, SRW], + Uses = [SPW] in { + def CALLi : Pseudo<(outs), (ins i16imm:$dst, variable_ops), + "call\t${dst:call}", [(MSP430call imm:$dst)]>; + def CALLr : Pseudo<(outs), (ins GR16:$dst, variable_ops), + "call\t$dst", [(MSP430call GR16:$dst)]>; + def CALLm : Pseudo<(outs), (ins memsrc:$dst, variable_ops), + "call\t${dst:mem}", [(MSP430call (load addr:$dst))]>; + } + + +//===----------------------------------------------------------------------===// +// Miscellaneous Instructions... +// +let Defs = [SPW], Uses = [SPW], neverHasSideEffects=1 in { +let mayLoad = 1 in +def POP16r : Pseudo<(outs GR16:$reg), (ins), "pop.w\t$reg", []>; + +let mayStore = 1 in +def PUSH16r : Pseudo<(outs), (ins GR16:$reg), "push.w\t$reg",[]>; +} + +//===----------------------------------------------------------------------===// +// Move Instructions + +// FIXME: Provide proper encoding! +let neverHasSideEffects = 1 in { +def MOV8rr : Pseudo<(outs GR8:$dst), (ins GR8:$src), + "mov.b\t{$src, $dst}", + []>; +def MOV16rr : Pseudo<(outs GR16:$dst), (ins GR16:$src), + "mov.w\t{$src, $dst}", + []>; +} + +// FIXME: Provide proper encoding! +let isReMaterializable = 1, isAsCheapAsAMove = 1 in { +def MOV8ri : Pseudo<(outs GR8:$dst), (ins i8imm:$src), + "mov.b\t{$src, $dst}", + [(set GR8:$dst, imm:$src)]>; +def MOV16ri : Pseudo<(outs GR16:$dst), (ins i16imm:$src), + "mov.w\t{$src, $dst}", + [(set GR16:$dst, imm:$src)]>; +} + +let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in { +def MOV8rm : Pseudo<(outs GR8:$dst), (ins memsrc:$src), + "mov.b\t{$src, $dst}", + [(set GR8:$dst, (load addr:$src))]>; +def MOV16rm : Pseudo<(outs GR16:$dst), (ins memsrc:$src), + "mov.w\t{$src, $dst}", + [(set GR16:$dst, (load addr:$src))]>; +} + +def MOVZX16rr8 : Pseudo<(outs GR16:$dst), (ins GR8:$src), + "mov.b\t{$src, $dst}", + [(set GR16:$dst, (zext GR8:$src))]>; +def MOVZX16rm8 : Pseudo<(outs GR16:$dst), (ins memsrc:$src), + "mov.b\t{$src, $dst}", + [(set GR16:$dst, (zextloadi16i8 addr:$src))]>; + +// Any instruction that defines a 8-bit result leaves the high half of the +// register. Truncate can be lowered to EXTRACT_SUBREG, and CopyFromReg may +// be copying from a truncate, but any other 8-bit operation will zero-extend +// up to 16 bits. +def def8 : PatLeaf<(i8 GR8:$src), [{ + return N->getOpcode() != ISD::TRUNCATE && + N->getOpcode() != TargetInstrInfo::EXTRACT_SUBREG && + N->getOpcode() != ISD::CopyFromReg; +}]>; + +// In the case of a 8-bit def that is known to implicitly zero-extend, +// we can use a SUBREG_TO_REG. +def : Pat<(i16 (zext def8:$src)), + (SUBREG_TO_REG (i16 0), GR8:$src, subreg_8bit)>; + + +def MOV8mi : Pseudo<(outs), (ins memdst:$dst, i8imm:$src), + "mov.b\t{$src, $dst}", + [(store (i8 imm:$src), addr:$dst)]>; +def MOV16mi : Pseudo<(outs), (ins memdst:$dst, i16imm:$src), + "mov.w\t{$src, $dst}", + [(store (i16 imm:$src), addr:$dst)]>; + +def MOV8mr : Pseudo<(outs), (ins memdst:$dst, GR8:$src), + "mov.b\t{$src, $dst}", + [(store GR8:$src, addr:$dst)]>; +def MOV16mr : Pseudo<(outs), (ins memdst:$dst, GR16:$src), + "mov.w\t{$src, $dst}", + [(store GR16:$src, addr:$dst)]>; + +//===----------------------------------------------------------------------===// +// Arithmetic Instructions + +let isTwoAddress = 1 in { + +let Defs = [SRW] in { + +let isCommutable = 1 in { // X = ADD Y, Z == X = ADD Z, Y +// FIXME: Provide proper encoding! +def ADD8rr : Pseudo<(outs GR8:$dst), (ins GR8:$src1, GR8:$src2), + "add.b\t{$src2, $dst}", + [(set GR8:$dst, (add GR8:$src1, GR8:$src2)), + (implicit SRW)]>; +def ADD16rr : Pseudo<(outs GR16:$dst), (ins GR16:$src1, GR16:$src2), + "add.w\t{$src2, $dst}", + [(set GR16:$dst, (add GR16:$src1, GR16:$src2)), + (implicit SRW)]>; +} + +def ADD8rm : Pseudo<(outs GR8:$dst), (ins GR8:$src1, memsrc:$src2), + "add.b\t{$src2, $dst}", + [(set GR8:$dst, (add GR8:$src1, (load addr:$src2))), + (implicit SRW)]>; +def ADD16rm : Pseudo<(outs GR16:$dst), (ins GR16:$src1, memsrc:$src2), + "add.w\t{$src2, $dst}", + [(set GR16:$dst, (add GR16:$src1, (load addr:$src2))), + (implicit SRW)]>; + +def ADD8ri : Pseudo<(outs GR8:$dst), (ins GR8:$src1, i8imm:$src2), + "add.b\t{$src2, $dst}", + [(set GR8:$dst, (add GR8:$src1, imm:$src2)), + (implicit SRW)]>; +def ADD16ri : Pseudo<(outs GR16:$dst), (ins GR16:$src1, i16imm:$src2), + "add.w\t{$src2, $dst}", + [(set GR16:$dst, (add GR16:$src1, imm:$src2)), + (implicit SRW)]>; + +let isTwoAddress = 0 in { +def ADD8mr : Pseudo<(outs), (ins memdst:$dst, GR8:$src), + "add.b\t{$src, $dst}", + [(store (add (load addr:$dst), GR8:$src), addr:$dst), + (implicit SRW)]>; +def ADD16mr : Pseudo<(outs), (ins memdst:$dst, GR16:$src), + "add.w\t{$src, $dst}", + [(store (add (load addr:$dst), GR16:$src), addr:$dst), + (implicit SRW)]>; + +def ADD8mi : Pseudo<(outs), (ins memdst:$dst, i8imm:$src), + "add.b\t{$src, $dst}", + [(store (add (load addr:$dst), (i8 imm:$src)), addr:$dst), + (implicit SRW)]>; +def ADD16mi : Pseudo<(outs), (ins memdst:$dst, i16imm:$src), + "add.w\t{$src, $dst}", + [(store (add (load addr:$dst), (i16 imm:$src)), addr:$dst), + (implicit SRW)]>; + +def ADD8mm : Pseudo<(outs), (ins memdst:$dst, memsrc:$src), + "add.b\t{$src, $dst}", + [(store (add (load addr:$dst), (i8 (load addr:$src))), addr:$dst), + (implicit SRW)]>; +def ADD16mm : Pseudo<(outs), (ins memdst:$dst, memsrc:$src), + "add.w\t{$src, $dst}", + [(store (add (load addr:$dst), (i16 (load addr:$src))), addr:$dst), + (implicit SRW)]>; +} + +let Uses = [SRW] in { + +let isCommutable = 1 in { // X = ADDC Y, Z == X = ADDC Z, Y +def ADC8rr : Pseudo<(outs GR8:$dst), (ins GR8:$src1, GR8:$src2), + "addc.b\t{$src2, $dst}", + [(set GR8:$dst, (adde GR8:$src1, GR8:$src2)), + (implicit SRW)]>; +def ADC16rr : Pseudo<(outs GR16:$dst), (ins GR16:$src1, GR16:$src2), + "addc.w\t{$src2, $dst}", + [(set GR16:$dst, (adde GR16:$src1, GR16:$src2)), + (implicit SRW)]>; +} // isCommutable + +def ADC8ri : Pseudo<(outs GR8:$dst), (ins GR8:$src1, i8imm:$src2), + "addc.b\t{$src2, $dst}", + [(set GR8:$dst, (adde GR8:$src1, imm:$src2)), + (implicit SRW)]>; +def ADC16ri : Pseudo<(outs GR16:$dst), (ins GR16:$src1, i16imm:$src2), + "addc.w\t{$src2, $dst}", + [(set GR16:$dst, (adde GR16:$src1, imm:$src2)), + (implicit SRW)]>; + +def ADC8rm : Pseudo<(outs GR8:$dst), (ins GR8:$src1, memsrc:$src2), + "addc.b\t{$src2, $dst}", + [(set GR8:$dst, (adde GR8:$src1, (load addr:$src2))), + (implicit SRW)]>; +def ADC16rm : Pseudo<(outs GR16:$dst), (ins GR16:$src1, memsrc:$src2), + "addc.w\t{$src2, $dst}", + [(set GR16:$dst, (adde GR16:$src1, (load addr:$src2))), + (implicit SRW)]>; + +let isTwoAddress = 0 in { +def ADC8mr : Pseudo<(outs), (ins memdst:$dst, GR8:$src), + "addc.b\t{$src, $dst}", + [(store (adde (load addr:$dst), GR8:$src), addr:$dst), + (implicit SRW)]>; +def ADC16mr : Pseudo<(outs), (ins memdst:$dst, GR16:$src), + "addc.w\t{$src, $dst}", + [(store (adde (load addr:$dst), GR16:$src), addr:$dst), + (implicit SRW)]>; + +def ADC8mi : Pseudo<(outs), (ins memdst:$dst, i8imm:$src), + "addc.b\t{$src, $dst}", + [(store (adde (load addr:$dst), (i8 imm:$src)), addr:$dst), + (implicit SRW)]>; +def ADC16mi : Pseudo<(outs), (ins memdst:$dst, i16imm:$src), + "addc.w\t{$src, $dst}", + [(store (adde (load addr:$dst), (i16 imm:$src)), addr:$dst), + (implicit SRW)]>; + +def ADC8mm : Pseudo<(outs), (ins memdst:$dst, memsrc:$src), + "addc.b\t{$src, $dst}", + [(store (adde (load addr:$dst), (i8 (load addr:$src))), addr:$dst), + (implicit SRW)]>; +def ADC16mm : Pseudo<(outs), (ins memdst:$dst, memsrc:$src), + "addc.w\t{$src, $dst}", + [(store (adde (load addr:$dst), (i16 (load addr:$src))), addr:$dst), + (implicit SRW)]>; +} + +} // Uses = [SRW] + +let isCommutable = 1 in { // X = AND Y, Z == X = AND Z, Y +def AND8rr : Pseudo<(outs GR8:$dst), (ins GR8:$src1, GR8:$src2), + "and.b\t{$src2, $dst}", + [(set GR8:$dst, (and GR8:$src1, GR8:$src2)), + (implicit SRW)]>; +def AND16rr : Pseudo<(outs GR16:$dst), (ins GR16:$src1, GR16:$src2), + "and.w\t{$src2, $dst}", + [(set GR16:$dst, (and GR16:$src1, GR16:$src2)), + (implicit SRW)]>; +} + +def AND8ri : Pseudo<(outs GR8:$dst), (ins GR8:$src1, i8imm:$src2), + "and.b\t{$src2, $dst}", + [(set GR8:$dst, (and GR8:$src1, imm:$src2)), + (implicit SRW)]>; +def AND16ri : Pseudo<(outs GR16:$dst), (ins GR16:$src1, i16imm:$src2), + "and.w\t{$src2, $dst}", + [(set GR16:$dst, (and GR16:$src1, imm:$src2)), + (implicit SRW)]>; + +def AND8rm : Pseudo<(outs GR8:$dst), (ins GR8:$src1, memsrc:$src2), + "and.b\t{$src2, $dst}", + [(set GR8:$dst, (and GR8:$src1, (load addr:$src2))), + (implicit SRW)]>; +def AND16rm : Pseudo<(outs GR16:$dst), (ins GR16:$src1, memsrc:$src2), + "and.w\t{$src2, $dst}", + [(set GR16:$dst, (and GR16:$src1, (load addr:$src2))), + (implicit SRW)]>; + +let isTwoAddress = 0 in { +def AND8mr : Pseudo<(outs), (ins memdst:$dst, GR8:$src), + "and.b\t{$src, $dst}", + [(store (and (load addr:$dst), GR8:$src), addr:$dst), + (implicit SRW)]>; +def AND16mr : Pseudo<(outs), (ins memdst:$dst, GR16:$src), + "and.w\t{$src, $dst}", + [(store (and (load addr:$dst), GR16:$src), addr:$dst), + (implicit SRW)]>; + +def AND8mi : Pseudo<(outs), (ins memdst:$dst, i8imm:$src), + "and.b\t{$src, $dst}", + [(store (and (load addr:$dst), (i8 imm:$src)), addr:$dst), + (implicit SRW)]>; +def AND16mi : Pseudo<(outs), (ins memdst:$dst, i16imm:$src), + "and.w\t{$src, $dst}", + [(store (and (load addr:$dst), (i16 imm:$src)), addr:$dst), + (implicit SRW)]>; + +def AND8mm : Pseudo<(outs), (ins memdst:$dst, memsrc:$src), + "and.b\t{$src, $dst}", + [(store (and (load addr:$dst), (i8 (load addr:$src))), addr:$dst), + (implicit SRW)]>; +def AND16mm : Pseudo<(outs), (ins memdst:$dst, memsrc:$src), + "and.w\t{$src, $dst}", + [(store (and (load addr:$dst), (i16 (load addr:$src))), addr:$dst), + (implicit SRW)]>; +} + + +let isCommutable = 1 in { // X = XOR Y, Z == X = XOR Z, Y +def XOR8rr : Pseudo<(outs GR8:$dst), (ins GR8:$src1, GR8:$src2), + "xor.b\t{$src2, $dst}", + [(set GR8:$dst, (xor GR8:$src1, GR8:$src2)), + (implicit SRW)]>; +def XOR16rr : Pseudo<(outs GR16:$dst), (ins GR16:$src1, GR16:$src2), + "xor.w\t{$src2, $dst}", + [(set GR16:$dst, (xor GR16:$src1, GR16:$src2)), + (implicit SRW)]>; +} + +def XOR8ri : Pseudo<(outs GR8:$dst), (ins GR8:$src1, i8imm:$src2), + "xor.b\t{$src2, $dst}", + [(set GR8:$dst, (xor GR8:$src1, imm:$src2)), + (implicit SRW)]>; +def XOR16ri : Pseudo<(outs GR16:$dst), (ins GR16:$src1, i16imm:$src2), + "xor.w\t{$src2, $dst}", + [(set GR16:$dst, (xor GR16:$src1, imm:$src2)), + (implicit SRW)]>; + +def XOR8rm : Pseudo<(outs GR8:$dst), (ins GR8:$src1, memsrc:$src2), + "xor.b\t{$src2, $dst}", + [(set GR8:$dst, (xor GR8:$src1, (load addr:$src2))), + (implicit SRW)]>; +def XOR16rm : Pseudo<(outs GR16:$dst), (ins GR16:$src1, memsrc:$src2), + "xor.w\t{$src2, $dst}", + [(set GR16:$dst, (xor GR16:$src1, (load addr:$src2))), + (implicit SRW)]>; + +let isTwoAddress = 0 in { +def XOR8mr : Pseudo<(outs), (ins memdst:$dst, GR8:$src), + "xor.b\t{$src, $dst}", + [(store (xor (load addr:$dst), GR8:$src), addr:$dst), + (implicit SRW)]>; +def XOR16mr : Pseudo<(outs), (ins memdst:$dst, GR16:$src), + "xor.w\t{$src, $dst}", + [(store (xor (load addr:$dst), GR16:$src), addr:$dst), + (implicit SRW)]>; + +def XOR8mi : Pseudo<(outs), (ins memdst:$dst, i8imm:$src), + "xor.b\t{$src, $dst}", + [(store (xor (load addr:$dst), (i8 imm:$src)), addr:$dst), + (implicit SRW)]>; +def XOR16mi : Pseudo<(outs), (ins memdst:$dst, i16imm:$src), + "xor.w\t{$src, $dst}", + [(store (xor (load addr:$dst), (i16 imm:$src)), addr:$dst), + (implicit SRW)]>; + +def XOR8mm : Pseudo<(outs), (ins memdst:$dst, memsrc:$src), + "xor.b\t{$src, $dst}", + [(store (xor (load addr:$dst), (i8 (load addr:$src))), addr:$dst), + (implicit SRW)]>; +def XOR16mm : Pseudo<(outs), (ins memdst:$dst, memsrc:$src), + "xor.w\t{$src, $dst}", + [(store (xor (load addr:$dst), (i16 (load addr:$src))), addr:$dst), + (implicit SRW)]>; +} + + +def SUB8rr : Pseudo<(outs GR8:$dst), (ins GR8:$src1, GR8:$src2), + "sub.b\t{$src2, $dst}", + [(set GR8:$dst, (sub GR8:$src1, GR8:$src2)), + (implicit SRW)]>; +def SUB16rr : Pseudo<(outs GR16:$dst), (ins GR16:$src1, GR16:$src2), + "sub.w\t{$src2, $dst}", + [(set GR16:$dst, (sub GR16:$src1, GR16:$src2)), + (implicit SRW)]>; + +def SUB8ri : Pseudo<(outs GR8:$dst), (ins GR8:$src1, i8imm:$src2), + "sub.b\t{$src2, $dst}", + [(set GR8:$dst, (sub GR8:$src1, imm:$src2)), + (implicit SRW)]>; +def SUB16ri : Pseudo<(outs GR16:$dst), (ins GR16:$src1, i16imm:$src2), + "sub.w\t{$src2, $dst}", + [(set GR16:$dst, (sub GR16:$src1, imm:$src2)), + (implicit SRW)]>; + +def SUB8rm : Pseudo<(outs GR8:$dst), (ins GR8:$src1, memsrc:$src2), + "sub.b\t{$src2, $dst}", + [(set GR8:$dst, (sub GR8:$src1, (load addr:$src2))), + (implicit SRW)]>; +def SUB16rm : Pseudo<(outs GR16:$dst), (ins GR16:$src1, memsrc:$src2), + "sub.w\t{$src2, $dst}", + [(set GR16:$dst, (sub GR16:$src1, (load addr:$src2))), + (implicit SRW)]>; + +let isTwoAddress = 0 in { +def SUB8mr : Pseudo<(outs), (ins memdst:$dst, GR8:$src), + "sub.b\t{$src, $dst}", + [(store (sub (load addr:$dst), GR8:$src), addr:$dst), + (implicit SRW)]>; +def SUB16mr : Pseudo<(outs), (ins memdst:$dst, GR16:$src), + "sub.w\t{$src, $dst}", + [(store (sub (load addr:$dst), GR16:$src), addr:$dst), + (implicit SRW)]>; + +def SUB8mi : Pseudo<(outs), (ins memdst:$dst, i8imm:$src), + "sub.b\t{$src, $dst}", + [(store (sub (load addr:$dst), (i8 imm:$src)), addr:$dst), + (implicit SRW)]>; +def SUB16mi : Pseudo<(outs), (ins memdst:$dst, i16imm:$src), + "sub.w\t{$src, $dst}", + [(store (sub (load addr:$dst), (i16 imm:$src)), addr:$dst), + (implicit SRW)]>; + +def SUB8mm : Pseudo<(outs), (ins memdst:$dst, memsrc:$src), + "sub.b\t{$src, $dst}", + [(store (sub (load addr:$dst), (i8 (load addr:$src))), addr:$dst), + (implicit SRW)]>; +def SUB16mm : Pseudo<(outs), (ins memdst:$dst, memsrc:$src), + "sub.w\t{$src, $dst}", + [(store (sub (load addr:$dst), (i16 (load addr:$src))), addr:$dst), + (implicit SRW)]>; +} + +let Uses = [SRW] in { +def SBC8rr : Pseudo<(outs GR8:$dst), (ins GR8:$src1, GR8:$src2), + "subc.b\t{$src2, $dst}", + [(set GR8:$dst, (sube GR8:$src1, GR8:$src2)), + (implicit SRW)]>; +def SBC16rr : Pseudo<(outs GR16:$dst), (ins GR16:$src1, GR16:$src2), + "subc.w\t{$src2, $dst}", + [(set GR16:$dst, (sube GR16:$src1, GR16:$src2)), + (implicit SRW)]>; + +def SBC8ri : Pseudo<(outs GR8:$dst), (ins GR8:$src1, i8imm:$src2), + "subc.b\t{$src2, $dst}", + [(set GR8:$dst, (sube GR8:$src1, imm:$src2)), + (implicit SRW)]>; +def SBC16ri : Pseudo<(outs GR16:$dst), (ins GR16:$src1, i16imm:$src2), + "subc.w\t{$src2, $dst}", + [(set GR16:$dst, (sube GR16:$src1, imm:$src2)), + (implicit SRW)]>; + +def SBC8rm : Pseudo<(outs GR8:$dst), (ins GR8:$src1, memsrc:$src2), + "subc.b\t{$src2, $dst}", + [(set GR8:$dst, (sube GR8:$src1, (load addr:$src2))), + (implicit SRW)]>; +def SBC16rm : Pseudo<(outs GR16:$dst), (ins GR16:$src1, memsrc:$src2), + "subc.w\t{$src2, $dst}", + [(set GR16:$dst, (sube GR16:$src1, (load addr:$src2))), + (implicit SRW)]>; + +let isTwoAddress = 0 in { +def SBC8mr : Pseudo<(outs), (ins memdst:$dst, GR8:$src), + "subc.b\t{$src, $dst}", + [(store (sube (load addr:$dst), GR8:$src), addr:$dst), + (implicit SRW)]>; +def SBC16mr : Pseudo<(outs), (ins memdst:$dst, GR16:$src), + "subc.w\t{$src, $dst}", + [(store (sube (load addr:$dst), GR16:$src), addr:$dst), + (implicit SRW)]>; + +def SBC8mi : Pseudo<(outs), (ins memdst:$dst, i8imm:$src), + "subc.b\t{$src, $dst}", + [(store (sube (load addr:$dst), (i8 imm:$src)), addr:$dst), + (implicit SRW)]>; +def SBC16mi : Pseudo<(outs), (ins memdst:$dst, i16imm:$src), + "subc.w\t{$src, $dst}", + [(store (sube (load addr:$dst), (i16 imm:$src)), addr:$dst), + (implicit SRW)]>; + +def SBC8mm : Pseudo<(outs), (ins memdst:$dst, memsrc:$src), + "subc.b\t{$src, $dst}", + [(store (sube (load addr:$dst), (i8 (load addr:$src))), addr:$dst), + (implicit SRW)]>; +def SBC16mm : Pseudo<(outs), (ins memdst:$dst, memsrc:$src), + "subc.w\t{$src, $dst}", + [(store (sube (load addr:$dst), (i16 (load addr:$src))), addr:$dst), + (implicit SRW)]>; +} + +} // Uses = [SRW] + +// FIXME: Provide proper encoding! +def SAR8r1 : Pseudo<(outs GR8:$dst), (ins GR8:$src), + "rra.b\t$dst", + [(set GR8:$dst, (MSP430rra GR8:$src)), + (implicit SRW)]>; +def SAR16r1 : Pseudo<(outs GR16:$dst), (ins GR16:$src), + "rra.w\t$dst", + [(set GR16:$dst, (MSP430rra GR16:$src)), + (implicit SRW)]>; + +def SHL8r1 : Pseudo<(outs GR8:$dst), (ins GR8:$src), + "rla.b\t$dst", + [(set GR8:$dst, (MSP430rla GR8:$src)), + (implicit SRW)]>; +def SHL16r1 : Pseudo<(outs GR16:$dst), (ins GR16:$src), + "rla.w\t$dst", + [(set GR16:$dst, (MSP430rla GR16:$src)), + (implicit SRW)]>; + +def SAR8r1c : Pseudo<(outs GR8:$dst), (ins GR8:$src), + "clrc\n\t" + "rrc.b\t$dst", + [(set GR8:$dst, (MSP430rrc GR8:$src)), + (implicit SRW)]>; +def SAR16r1c : Pseudo<(outs GR16:$dst), (ins GR16:$src), + "clrc\n\t" + "rrc.w\t$dst", + [(set GR16:$dst, (MSP430rrc GR16:$src)), + (implicit SRW)]>; + +def SEXT16r : Pseudo<(outs GR16:$dst), (ins GR16:$src), + "sxt\t$dst", + [(set GR16:$dst, (sext_inreg GR16:$src, i8)), + (implicit SRW)]>; + +} // Defs = [SRW] + +def SWPB16r : Pseudo<(outs GR16:$dst), (ins GR16:$src), + "swpb\t$dst", + [(set GR16:$dst, (bswap GR16:$src))]>; + +let isCommutable = 1 in { // X = OR Y, Z == X = OR Z, Y +def OR8rr : Pseudo<(outs GR8:$dst), (ins GR8:$src1, GR8:$src2), + "bis.b\t{$src2, $dst}", + [(set GR8:$dst, (or GR8:$src1, GR8:$src2))]>; +def OR16rr : Pseudo<(outs GR16:$dst), (ins GR16:$src1, GR16:$src2), + "bis.w\t{$src2, $dst}", + [(set GR16:$dst, (or GR16:$src1, GR16:$src2))]>; +} + +def OR8ri : Pseudo<(outs GR8:$dst), (ins GR8:$src1, i8imm:$src2), + "bis.b\t{$src2, $dst}", + [(set GR8:$dst, (or GR8:$src1, imm:$src2))]>; +def OR16ri : Pseudo<(outs GR16:$dst), (ins GR16:$src1, i16imm:$src2), + "bis.w\t{$src2, $dst}", + [(set GR16:$dst, (or GR16:$src1, imm:$src2))]>; + +def OR8rm : Pseudo<(outs GR8:$dst), (ins GR8:$src1, memsrc:$src2), + "bis.b\t{$src2, $dst}", + [(set GR8:$dst, (or GR8:$src1, (load addr:$src2)))]>; +def OR16rm : Pseudo<(outs GR16:$dst), (ins GR16:$src1, memsrc:$src2), + "bis.w\t{$src2, $dst}", + [(set GR16:$dst, (or GR16:$src1, (load addr:$src2)))]>; + +let isTwoAddress = 0 in { +def OR8mr : Pseudo<(outs), (ins memdst:$dst, GR8:$src), + "bis.b\t{$src, $dst}", + [(store (or (load addr:$dst), GR8:$src), addr:$dst), + (implicit SRW)]>; +def OR16mr : Pseudo<(outs), (ins memdst:$dst, GR16:$src), + "bis.w\t{$src, $dst}", + [(store (or (load addr:$dst), GR16:$src), addr:$dst), + (implicit SRW)]>; + +def OR8mi : Pseudo<(outs), (ins memdst:$dst, i8imm:$src), + "bis.b\t{$src, $dst}", + [(store (or (load addr:$dst), (i8 imm:$src)), addr:$dst), + (implicit SRW)]>; +def OR16mi : Pseudo<(outs), (ins memdst:$dst, i16imm:$src), + "bis.w\t{$src, $dst}", + [(store (or (load addr:$dst), (i16 imm:$src)), addr:$dst), + (implicit SRW)]>; + +def OR8mm : Pseudo<(outs), (ins memdst:$dst, memsrc:$src), + "bis.b\t{$src, $dst}", + [(store (or (load addr:$dst), (i8 (load addr:$src))), addr:$dst), + (implicit SRW)]>; +def OR16mm : Pseudo<(outs), (ins memdst:$dst, memsrc:$src), + "bis.w\t{$src, $dst}", + [(store (or (load addr:$dst), (i16 (load addr:$src))), addr:$dst), + (implicit SRW)]>; +} + +} // isTwoAddress = 1 + +// Integer comparisons +let Defs = [SRW] in { +def CMP8rr : Pseudo<(outs), (ins GR8:$src1, GR8:$src2), + "cmp.b\t{$src1, $src2}", + [(MSP430cmp GR8:$src1, GR8:$src2), (implicit SRW)]>; +def CMP16rr : Pseudo<(outs), (ins GR16:$src1, GR16:$src2), + "cmp.w\t{$src1, $src2}", + [(MSP430cmp GR16:$src1, GR16:$src2), (implicit SRW)]>; + +def CMP8ir : Pseudo<(outs), (ins i8imm:$src1, GR8:$src2), + "cmp.b\t{$src1, $src2}", + [(MSP430cmp imm:$src1, GR8:$src2), (implicit SRW)]>; +def CMP16ir : Pseudo<(outs), (ins i16imm:$src1, GR16:$src2), + "cmp.w\t{$src1, $src2}", + [(MSP430cmp imm:$src1, GR16:$src2), (implicit SRW)]>; + +def CMP8im : Pseudo<(outs), (ins i8imm:$src1, memsrc:$src2), + "cmp.b\t{$src1, $src2}", + [(MSP430cmp (i8 imm:$src1), (load addr:$src2)), (implicit SRW)]>; +def CMP16im : Pseudo<(outs), (ins i16imm:$src1, memsrc:$src2), + "cmp.w\t{$src1, $src2}", + [(MSP430cmp (i16 imm:$src1), (load addr:$src2)), (implicit SRW)]>; + +// FIXME: imm is allowed only on src operand, not on dst. + +//def CMP8ri : Pseudo<(outs), (ins GR8:$src1, i8imm:$src2), +// "cmp.b\t{$src1, $src2}", +// [(MSP430cmp GR8:$src1, imm:$src2), (implicit SRW)]>; +//def CMP16ri : Pseudo<(outs), (ins GR16:$src1, i16imm:$src2), +// "cmp.w\t{$src1, $src2}", +// [(MSP430cmp GR16:$src1, imm:$src2), (implicit SRW)]>; + +//def CMP8mi : Pseudo<(outs), (ins memsrc:$src1, i8imm:$src2), +// "cmp.b\t{$src1, $src2}", +// [(MSP430cmp (load addr:$src1), (i8 imm:$src2)), (implicit SRW)]>; +//def CMP16mi : Pseudo<(outs), (ins memsrc:$src1, i16imm:$src2), +// "cmp.w\t{$src1, $src2}", +// [(MSP430cmp (load addr:$src1), (i16 imm:$src2)), (implicit SRW)]>; + + +// Imm 0, +1, +2, +4, +8 are encoded via constant generator registers. +// That's why we can use them as dest operands. +// We don't define new class for them, since they would need special encoding +// in the future. + +def CMP8ri0 : Pseudo<(outs), (ins GR8:$src1), + "cmp.b\t{$src1, #0}", + [(MSP430cmp GR8:$src1, 0), (implicit SRW)]>; +def CMP16ri0: Pseudo<(outs), (ins GR16:$src1), + "cmp.w\t{$src1, #0}", + [(MSP430cmp GR16:$src1, 0), (implicit SRW)]>; +def CMP8ri1 : Pseudo<(outs), (ins GR8:$src1), + "cmp.b\t{$src1, #1}", + [(MSP430cmp GR8:$src1, 1), (implicit SRW)]>; +def CMP16ri1: Pseudo<(outs), (ins GR16:$src1), + "cmp.w\t{$src1, #1}", + [(MSP430cmp GR16:$src1, 1), (implicit SRW)]>; +def CMP8ri2 : Pseudo<(outs), (ins GR8:$src1), + "cmp.b\t{$src1, #2}", + [(MSP430cmp GR8:$src1, 2), (implicit SRW)]>; +def CMP16ri2: Pseudo<(outs), (ins GR16:$src1), + "cmp.w\t{$src1, #2}", + [(MSP430cmp GR16:$src1, 2), (implicit SRW)]>; +def CMP8ri4 : Pseudo<(outs), (ins GR8:$src1), + "cmp.b\t{$src1, #4}", + [(MSP430cmp GR8:$src1, 4), (implicit SRW)]>; +def CMP16ri4: Pseudo<(outs), (ins GR16:$src1), + "cmp.w\t{$src1, #4}", + [(MSP430cmp GR16:$src1, 4), (implicit SRW)]>; +def CMP8ri8 : Pseudo<(outs), (ins GR8:$src1), + "cmp.b\t{$src1, #8}", + [(MSP430cmp GR8:$src1, 8), (implicit SRW)]>; +def CMP16ri8: Pseudo<(outs), (ins GR16:$src1), + "cmp.w\t{$src1, #8}", + [(MSP430cmp GR16:$src1, 8), (implicit SRW)]>; + +def CMP8rm : Pseudo<(outs), (ins GR8:$src1, memsrc:$src2), + "cmp.b\t{$src1, $src2}", + [(MSP430cmp GR8:$src1, (load addr:$src2)), (implicit SRW)]>; +def CMP16rm : Pseudo<(outs), (ins GR16:$src1, memsrc:$src2), + "cmp.w\t{$src1, $src2}", + [(MSP430cmp GR16:$src1, (load addr:$src2)), (implicit SRW)]>; + +def CMP8mr : Pseudo<(outs), (ins memsrc:$src1, GR8:$src2), + "cmp.b\t{$src1, $src2}", + [(MSP430cmp (load addr:$src1), GR8:$src2), (implicit SRW)]>; +def CMP16mr : Pseudo<(outs), (ins memsrc:$src1, GR16:$src2), + "cmp.w\t{$src1, $src2}", + [(MSP430cmp (load addr:$src1), GR16:$src2), (implicit SRW)]>; + +def CMP8mi0 : Pseudo<(outs), (ins memsrc:$src1), + "cmp.b\t{$src1, #0}", + [(MSP430cmp (load addr:$src1), (i8 0)), (implicit SRW)]>; +def CMP16mi0: Pseudo<(outs), (ins memsrc:$src1), + "cmp.w\t{$src1, #0}", + [(MSP430cmp (load addr:$src1), (i16 0)), (implicit SRW)]>; +def CMP8mi1 : Pseudo<(outs), (ins memsrc:$src1), + "cmp.b\t{$src1, #1}", + [(MSP430cmp (load addr:$src1), (i8 1)), (implicit SRW)]>; +def CMP16mi1: Pseudo<(outs), (ins memsrc:$src1), + "cmp.w\t{$src1, #1}", + [(MSP430cmp (load addr:$src1), (i16 1)), (implicit SRW)]>; +def CMP8mi2 : Pseudo<(outs), (ins memsrc:$src1), + "cmp.b\t{$src1, #2}", + [(MSP430cmp (load addr:$src1), (i8 2)), (implicit SRW)]>; +def CMP16mi2: Pseudo<(outs), (ins memsrc:$src1), + "cmp.w\t{$src1, #2}", + [(MSP430cmp (load addr:$src1), (i16 2)), (implicit SRW)]>; +def CMP8mi4 : Pseudo<(outs), (ins memsrc:$src1), + "cmp.b\t{$src1, #4}", + [(MSP430cmp (load addr:$src1), (i8 4)), (implicit SRW)]>; +def CMP16mi4: Pseudo<(outs), (ins memsrc:$src1), + "cmp.w\t{$src1, #4}", + [(MSP430cmp (load addr:$src1), (i16 4)), (implicit SRW)]>; +def CMP8mi8 : Pseudo<(outs), (ins memsrc:$src1), + "cmp.b\t{$src1, #8}", + [(MSP430cmp (load addr:$src1), (i8 8)), (implicit SRW)]>; +def CMP16mi8: Pseudo<(outs), (ins memsrc:$src1), + "cmp.w\t{$src1, #8}", + [(MSP430cmp (load addr:$src1), (i16 8)), (implicit SRW)]>; + +} // Defs = [SRW] + +//===----------------------------------------------------------------------===// +// Non-Instruction Patterns + +// extload +def : Pat<(extloadi16i8 addr:$src), (MOVZX16rm8 addr:$src)>; + +// anyext +def : Pat<(anyext addr:$src), (MOVZX16rr8 GR8:$src)>; + +// truncs +def : Pat<(i8 (trunc GR16:$src)), + (EXTRACT_SUBREG GR16:$src, subreg_8bit)>; + +// GlobalAddress, ExternalSymbol +def : Pat<(i16 (MSP430Wrapper tglobaladdr:$dst)), (MOV16ri tglobaladdr:$dst)>; +def : Pat<(i16 (MSP430Wrapper texternalsym:$dst)), (MOV16ri texternalsym:$dst)>; + +def : Pat<(add GR16:$src1, (MSP430Wrapper tglobaladdr :$src2)), + (ADD16ri GR16:$src1, tglobaladdr:$src2)>; +def : Pat<(add GR16:$src1, (MSP430Wrapper texternalsym:$src2)), + (ADD16ri GR16:$src1, texternalsym:$src2)>; + +def : Pat<(store (i16 (MSP430Wrapper tglobaladdr:$src)), addr:$dst), + (MOV16mi addr:$dst, tglobaladdr:$src)>; +def : Pat<(store (i16 (MSP430Wrapper texternalsym:$src)), addr:$dst), + (MOV16mi addr:$dst, texternalsym:$src)>; + +// calls +def : Pat<(MSP430call (i16 tglobaladdr:$dst)), + (CALLi tglobaladdr:$dst)>; +def : Pat<(MSP430call (i16 texternalsym:$dst)), + (CALLi texternalsym:$dst)>; + +// add and sub always produce carry +def : Pat<(addc GR16:$src1, GR16:$src2), + (ADD16rr GR16:$src1, GR16:$src2)>; +def : Pat<(addc GR16:$src1, (load addr:$src2)), + (ADD16rm GR16:$src1, addr:$src2)>; +def : Pat<(addc GR16:$src1, imm:$src2), + (ADD16ri GR16:$src1, imm:$src2)>; +def : Pat<(store (addc (load addr:$dst), GR16:$src), addr:$dst), + (ADD16mr addr:$dst, GR16:$src)>; +def : Pat<(store (addc (load addr:$dst), (i16 (load addr:$src))), addr:$dst), + (ADD16mm addr:$dst, addr:$src)>; + +def : Pat<(addc GR8:$src1, GR8:$src2), + (ADD8rr GR8:$src1, GR8:$src2)>; +def : Pat<(addc GR8:$src1, (load addr:$src2)), + (ADD8rm GR8:$src1, addr:$src2)>; +def : Pat<(addc GR8:$src1, imm:$src2), + (ADD8ri GR8:$src1, imm:$src2)>; +def : Pat<(store (addc (load addr:$dst), GR8:$src), addr:$dst), + (ADD8mr addr:$dst, GR8:$src)>; +def : Pat<(store (addc (load addr:$dst), (i8 (load addr:$src))), addr:$dst), + (ADD8mm addr:$dst, addr:$src)>; + +def : Pat<(subc GR16:$src1, GR16:$src2), + (SUB16rr GR16:$src1, GR16:$src2)>; +def : Pat<(subc GR16:$src1, (load addr:$src2)), + (SUB16rm GR16:$src1, addr:$src2)>; +def : Pat<(subc GR16:$src1, imm:$src2), + (SUB16ri GR16:$src1, imm:$src2)>; +def : Pat<(store (subc (load addr:$dst), GR16:$src), addr:$dst), + (SUB16mr addr:$dst, GR16:$src)>; +def : Pat<(store (subc (load addr:$dst), (i16 (load addr:$src))), addr:$dst), + (SUB16mm addr:$dst, addr:$src)>; + +def : Pat<(subc GR8:$src1, GR8:$src2), + (SUB8rr GR8:$src1, GR8:$src2)>; +def : Pat<(subc GR8:$src1, (load addr:$src2)), + (SUB8rm GR8:$src1, addr:$src2)>; +def : Pat<(subc GR8:$src1, imm:$src2), + (SUB8ri GR8:$src1, imm:$src2)>; +def : Pat<(store (subc (load addr:$dst), GR8:$src), addr:$dst), + (SUB8mr addr:$dst, GR8:$src)>; +def : Pat<(store (subc (load addr:$dst), (i8 (load addr:$src))), addr:$dst), + (SUB8mm addr:$dst, addr:$src)>; diff --git a/lib/Target/MSP430/MSP430MachineFunctionInfo.h b/lib/Target/MSP430/MSP430MachineFunctionInfo.h new file mode 100644 index 000000000000..b94d7e44cace --- /dev/null +++ b/lib/Target/MSP430/MSP430MachineFunctionInfo.h @@ -0,0 +1,39 @@ +//===- MSP430MachineFuctionInfo.h - MSP430 machine function info -*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares MSP430-specific per-machine-function information. +// +//===----------------------------------------------------------------------===// + +#ifndef MSP430MACHINEFUNCTIONINFO_H +#define MSP430MACHINEFUNCTIONINFO_H + +#include "llvm/CodeGen/MachineFunction.h" + +namespace llvm { + +/// MSP430MachineFunctionInfo - This class is derived from MachineFunction and +/// contains private MSP430 target-specific information for each MachineFunction. +class MSP430MachineFunctionInfo : public MachineFunctionInfo { + /// CalleeSavedFrameSize - Size of the callee-saved register portion of the + /// stack frame in bytes. + unsigned CalleeSavedFrameSize; + +public: + MSP430MachineFunctionInfo() : CalleeSavedFrameSize(0) {} + + MSP430MachineFunctionInfo(MachineFunction &MF) : CalleeSavedFrameSize(0) {} + + unsigned getCalleeSavedFrameSize() const { return CalleeSavedFrameSize; } + void setCalleeSavedFrameSize(unsigned bytes) { CalleeSavedFrameSize = bytes; } +}; + +} // End llvm namespace + +#endif diff --git a/lib/Target/MSP430/MSP430RegisterInfo.cpp b/lib/Target/MSP430/MSP430RegisterInfo.cpp new file mode 100644 index 000000000000..ef6f99756c2e --- /dev/null +++ b/lib/Target/MSP430/MSP430RegisterInfo.cpp @@ -0,0 +1,355 @@ +//===- MSP430RegisterInfo.cpp - MSP430 Register Information ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the MSP430 implementation of the TargetRegisterInfo class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "msp430-reg-info" + +#include "MSP430.h" +#include "MSP430MachineFunctionInfo.h" +#include "MSP430RegisterInfo.h" +#include "MSP430TargetMachine.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/ADT/BitVector.h" + +using namespace llvm; + +// FIXME: Provide proper call frame setup / destroy opcodes. +MSP430RegisterInfo::MSP430RegisterInfo(MSP430TargetMachine &tm, + const TargetInstrInfo &tii) + : MSP430GenRegisterInfo(MSP430::ADJCALLSTACKDOWN, MSP430::ADJCALLSTACKUP), + TM(tm), TII(tii) { + StackAlign = TM.getFrameInfo()->getStackAlignment(); +} + +const unsigned* +MSP430RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { + static const unsigned CalleeSavedRegs[] = { + MSP430::FPW, MSP430::R5W, MSP430::R6W, MSP430::R7W, + MSP430::R8W, MSP430::R9W, MSP430::R10W, MSP430::R11W, + 0 + }; + + return CalleeSavedRegs; +} + +const TargetRegisterClass* const* +MSP430RegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { + static const TargetRegisterClass * const CalleeSavedRegClasses[] = { + &MSP430::GR16RegClass, &MSP430::GR16RegClass, + &MSP430::GR16RegClass, &MSP430::GR16RegClass, + &MSP430::GR16RegClass, &MSP430::GR16RegClass, + &MSP430::GR16RegClass, &MSP430::GR16RegClass, + 0 + }; + + return CalleeSavedRegClasses; +} + +BitVector +MSP430RegisterInfo::getReservedRegs(const MachineFunction &MF) const { + BitVector Reserved(getNumRegs()); + + // Mark 4 special registers as reserved. + Reserved.set(MSP430::PCW); + Reserved.set(MSP430::SPW); + Reserved.set(MSP430::SRW); + Reserved.set(MSP430::CGW); + + // Mark frame pointer as reserved if needed. + if (hasFP(MF)) + Reserved.set(MSP430::FPW); + + return Reserved; +} + +const TargetRegisterClass* MSP430RegisterInfo::getPointerRegClass() const { + return &MSP430::GR16RegClass; +} + + +bool MSP430RegisterInfo::hasFP(const MachineFunction &MF) const { + return NoFramePointerElim || MF.getFrameInfo()->hasVarSizedObjects(); +} + +bool MSP430RegisterInfo::hasReservedCallFrame(MachineFunction &MF) const { + return !MF.getFrameInfo()->hasVarSizedObjects(); +} + +void MSP430RegisterInfo:: +eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + if (!hasReservedCallFrame(MF)) { + // If the stack pointer can be changed after prologue, turn the + // adjcallstackup instruction into a 'sub SPW, ' and the + // adjcallstackdown instruction into 'add SPW, ' + // TODO: consider using push / pop instead of sub + store / add + MachineInstr *Old = I; + uint64_t Amount = Old->getOperand(0).getImm(); + if (Amount != 0) { + // We need to keep the stack aligned properly. To do this, we round the + // amount of space needed for the outgoing arguments up to the next + // alignment boundary. + Amount = (Amount+StackAlign-1)/StackAlign*StackAlign; + + MachineInstr *New = 0; + if (Old->getOpcode() == getCallFrameSetupOpcode()) { + New = BuildMI(MF, Old->getDebugLoc(), + TII.get(MSP430::SUB16ri), MSP430::SPW) + .addReg(MSP430::SPW).addImm(Amount); + } else { + assert(Old->getOpcode() == getCallFrameDestroyOpcode()); + // factor out the amount the callee already popped. + uint64_t CalleeAmt = Old->getOperand(1).getImm(); + Amount -= CalleeAmt; + if (Amount) + New = BuildMI(MF, Old->getDebugLoc(), + TII.get(MSP430::ADD16ri), MSP430::SPW) + .addReg(MSP430::SPW).addImm(Amount); + } + + if (New) { + // The SRW implicit def is dead. + New->getOperand(3).setIsDead(); + + // Replace the pseudo instruction with a new instruction... + MBB.insert(I, New); + } + } + } else if (I->getOpcode() == getCallFrameDestroyOpcode()) { + // If we are performing frame pointer elimination and if the callee pops + // something off the stack pointer, add it back. + if (uint64_t CalleeAmt = I->getOperand(1).getImm()) { + MachineInstr *Old = I; + MachineInstr *New = + BuildMI(MF, Old->getDebugLoc(), TII.get(MSP430::SUB16ri), + MSP430::SPW).addReg(MSP430::SPW).addImm(CalleeAmt); + // The SRW implicit def is dead. + New->getOperand(3).setIsDead(); + + MBB.insert(I, New); + } + } + + MBB.erase(I); +} + +void +MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, RegScavenger *RS) const { + assert(SPAdj == 0 && "Unexpected"); + + unsigned i = 0; + MachineInstr &MI = *II; + MachineBasicBlock &MBB = *MI.getParent(); + MachineFunction &MF = *MBB.getParent(); + DebugLoc dl = MI.getDebugLoc(); + while (!MI.getOperand(i).isFI()) { + ++i; + assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!"); + } + + int FrameIndex = MI.getOperand(i).getIndex(); + + unsigned BasePtr = (hasFP(MF) ? MSP430::FPW : MSP430::SPW); + int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex); + + // Skip the saved PC + Offset += 2; + + if (!hasFP(MF)) + Offset += MF.getFrameInfo()->getStackSize(); + else + Offset += 2; // Skip the saved FPW + + // Fold imm into offset + Offset += MI.getOperand(i+1).getImm(); + + if (MI.getOpcode() == MSP430::ADD16ri) { + // This is actually "load effective address" of the stack slot + // instruction. We have only two-address instructions, thus we need to + // expand it into mov + add + + MI.setDesc(TII.get(MSP430::MOV16rr)); + MI.getOperand(i).ChangeToRegister(BasePtr, false); + + if (Offset == 0) + return; + + // We need to materialize the offset via add instruction. + unsigned DstReg = MI.getOperand(0).getReg(); + if (Offset < 0) + BuildMI(MBB, next(II), dl, TII.get(MSP430::SUB16ri), DstReg) + .addReg(DstReg).addImm(-Offset); + else + BuildMI(MBB, next(II), dl, TII.get(MSP430::ADD16ri), DstReg) + .addReg(DstReg).addImm(Offset); + + return; + } + + MI.getOperand(i).ChangeToRegister(BasePtr, false); + MI.getOperand(i+1).ChangeToImmediate(Offset); +} + +void +MSP430RegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF) + const { + // Create a frame entry for the FPW register that must be saved. + if (hasFP(MF)) { + int FrameIdx = MF.getFrameInfo()->CreateFixedObject(2, -4); + assert(FrameIdx == MF.getFrameInfo()->getObjectIndexBegin() && + "Slot for FPW register must be last in order to be found!"); + FrameIdx = 0; + } +} + + +void MSP430RegisterInfo::emitPrologue(MachineFunction &MF) const { + MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB + MachineFrameInfo *MFI = MF.getFrameInfo(); + MSP430MachineFunctionInfo *MSP430FI = MF.getInfo(); + MachineBasicBlock::iterator MBBI = MBB.begin(); + DebugLoc DL = (MBBI != MBB.end() ? MBBI->getDebugLoc() : + DebugLoc::getUnknownLoc()); + + // Get the number of bytes to allocate from the FrameInfo. + uint64_t StackSize = MFI->getStackSize(); + + uint64_t NumBytes = 0; + if (hasFP(MF)) { + // Calculate required stack adjustment + uint64_t FrameSize = StackSize - 2; + NumBytes = FrameSize - MSP430FI->getCalleeSavedFrameSize(); + + // Get the offset of the stack slot for the EBP register... which is + // guaranteed to be the last slot by processFunctionBeforeFrameFinalized. + // Update the frame offset adjustment. + MFI->setOffsetAdjustment(-NumBytes); + + // Save FPW into the appropriate stack slot... + BuildMI(MBB, MBBI, DL, TII.get(MSP430::PUSH16r)) + .addReg(MSP430::FPW, RegState::Kill); + + // Update FPW with the new base value... + BuildMI(MBB, MBBI, DL, TII.get(MSP430::MOV16rr), MSP430::FPW) + .addReg(MSP430::SPW); + + // Mark the FramePtr as live-in in every block except the entry. + for (MachineFunction::iterator I = next(MF.begin()), E = MF.end(); + I != E; ++I) + I->addLiveIn(MSP430::FPW); + + } else + NumBytes = StackSize - MSP430FI->getCalleeSavedFrameSize(); + + // Skip the callee-saved push instructions. + while (MBBI != MBB.end() && (MBBI->getOpcode() == MSP430::PUSH16r)) + ++MBBI; + + if (MBBI != MBB.end()) + DL = MBBI->getDebugLoc(); + + if (NumBytes) { // adjust stack pointer: SPW -= numbytes + // If there is an SUB16ri of SPW immediately before this instruction, merge + // the two. + //NumBytes -= mergeSPUpdates(MBB, MBBI, true); + // If there is an ADD16ri or SUB16ri of SPW immediately after this + // instruction, merge the two instructions. + // mergeSPUpdatesDown(MBB, MBBI, &NumBytes); + + if (NumBytes) { + MachineInstr *MI = + BuildMI(MBB, MBBI, DL, TII.get(MSP430::SUB16ri), MSP430::SPW) + .addReg(MSP430::SPW).addImm(NumBytes); + // The SRW implicit def is dead. + MI->getOperand(3).setIsDead(); + } + } +} + +void MSP430RegisterInfo::emitEpilogue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + MSP430MachineFunctionInfo *MSP430FI = MF.getInfo(); + MachineBasicBlock::iterator MBBI = prior(MBB.end()); + unsigned RetOpcode = MBBI->getOpcode(); + DebugLoc DL = MBBI->getDebugLoc(); + + switch (RetOpcode) { + case MSP430::RET: break; // These are ok + default: + assert(0 && "Can only insert epilog into returning blocks"); + } + + // Get the number of bytes to allocate from the FrameInfo + uint64_t StackSize = MFI->getStackSize(); + unsigned CSSize = MSP430FI->getCalleeSavedFrameSize(); + uint64_t NumBytes = 0; + + if (hasFP(MF)) { + // Calculate required stack adjustment + uint64_t FrameSize = StackSize - 2; + NumBytes = FrameSize - CSSize; + + // pop FPW. + BuildMI(MBB, MBBI, DL, TII.get(MSP430::POP16r), MSP430::FPW); + } else + NumBytes = StackSize - CSSize; + + // Skip the callee-saved pop instructions. + MachineBasicBlock::iterator LastCSPop = MBBI; + while (MBBI != MBB.begin()) { + MachineBasicBlock::iterator PI = prior(MBBI); + unsigned Opc = PI->getOpcode(); + if (Opc != MSP430::POP16r && !PI->getDesc().isTerminator()) + break; + --MBBI; + } + + DL = MBBI->getDebugLoc(); + + // If there is an ADD16ri or SUB16ri of SPW immediately before this + // instruction, merge the two instructions. + //if (NumBytes || MFI->hasVarSizedObjects()) + // mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes); + + if (MFI->hasVarSizedObjects()) { + assert(0 && "Not implemented yet!"); + } else { + // adjust stack pointer back: SPW += numbytes + if (NumBytes) { + MachineInstr *MI = + BuildMI(MBB, MBBI, DL, TII.get(MSP430::ADD16ri), MSP430::SPW) + .addReg(MSP430::SPW).addImm(NumBytes); + // The SRW implicit def is dead. + MI->getOperand(3).setIsDead(); + } + } +} + +unsigned MSP430RegisterInfo::getRARegister() const { + return MSP430::PCW; +} + +unsigned MSP430RegisterInfo::getFrameRegister(MachineFunction &MF) const { + return hasFP(MF) ? MSP430::FPW : MSP430::SPW; +} + +int MSP430RegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const { + assert(0 && "Not implemented yet!"); +} + +#include "MSP430GenRegisterInfo.inc" diff --git a/lib/Target/MSP430/MSP430RegisterInfo.h b/lib/Target/MSP430/MSP430RegisterInfo.h new file mode 100644 index 000000000000..a210e36e001d --- /dev/null +++ b/lib/Target/MSP430/MSP430RegisterInfo.h @@ -0,0 +1,70 @@ +//===- MSP430RegisterInfo.h - MSP430 Register Information Impl --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the MSP430 implementation of the MRegisterInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_MSP430REGISTERINFO_H +#define LLVM_TARGET_MSP430REGISTERINFO_H + +#include "llvm/Target/TargetRegisterInfo.h" +#include "MSP430GenRegisterInfo.h.inc" + +namespace llvm { + +class TargetInstrInfo; +class MSP430TargetMachine; + +struct MSP430RegisterInfo : public MSP430GenRegisterInfo { +private: + MSP430TargetMachine &TM; + const TargetInstrInfo &TII; + + /// StackAlign - Default stack alignment. + /// + unsigned StackAlign; +public: + MSP430RegisterInfo(MSP430TargetMachine &tm, const TargetInstrInfo &tii); + + /// Code Generation virtual methods... + const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const; + + const TargetRegisterClass* const* + getCalleeSavedRegClasses(const MachineFunction *MF = 0) const; + + BitVector getReservedRegs(const MachineFunction &MF) const; + const TargetRegisterClass* getPointerRegClass() const; + + bool hasFP(const MachineFunction &MF) const; + bool hasReservedCallFrame(MachineFunction &MF) const; + + void eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const; + + void eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, RegScavenger *RS = NULL) const; + + void emitPrologue(MachineFunction &MF) const; + void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; + + void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; + + // Debug information queries. + unsigned getRARegister() const; + unsigned getFrameRegister(MachineFunction &MF) const; + + //! Get DWARF debugging register number + int getDwarfRegNum(unsigned RegNum, bool isEH) const; +}; + +} // end namespace llvm + +#endif // LLVM_TARGET_MSP430REGISTERINFO_H diff --git a/lib/Target/MSP430/MSP430RegisterInfo.td b/lib/Target/MSP430/MSP430RegisterInfo.td new file mode 100644 index 000000000000..4078626ea2dd --- /dev/null +++ b/lib/Target/MSP430/MSP430RegisterInfo.td @@ -0,0 +1,122 @@ +//===- MSP430RegisterInfo.td - MSP430 Register defs ----------*- tblgen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Declarations that describe the MSP430 register file +//===----------------------------------------------------------------------===// + +class MSP430Reg num, string n> : Register { + field bits<4> Num = num; + let Namespace = "MSP430"; +} + +class MSP430RegWithSubregs num, string n, list subregs> + : RegisterWithSubRegs { + field bits<4> Num = num; + let Namespace = "MSP430"; +} + +//===----------------------------------------------------------------------===// +// Registers +//===----------------------------------------------------------------------===// + +def PCB : MSP430Reg<0, "r0">; +def SPB : MSP430Reg<1, "r1">; +def SRB : MSP430Reg<2, "r2">; +def CGB : MSP430Reg<3, "r3">; +def FPB : MSP430Reg<4, "r4">; +def R5B : MSP430Reg<5, "r5">; +def R6B : MSP430Reg<6, "r6">; +def R7B : MSP430Reg<7, "r7">; +def R8B : MSP430Reg<8, "r8">; +def R9B : MSP430Reg<9, "r9">; +def R10B : MSP430Reg<10, "r10">; +def R11B : MSP430Reg<11, "r11">; +def R12B : MSP430Reg<12, "r12">; +def R13B : MSP430Reg<13, "r13">; +def R14B : MSP430Reg<14, "r14">; +def R15B : MSP430Reg<15, "r15">; + +def PCW : MSP430RegWithSubregs<0, "r0", [PCB]>; +def SPW : MSP430RegWithSubregs<1, "r1", [SPB]>; +def SRW : MSP430RegWithSubregs<2, "r2", [SRB]>; +def CGW : MSP430RegWithSubregs<3, "r3", [CGB]>; +def FPW : MSP430RegWithSubregs<4, "r4", [FPB]>; +def R5W : MSP430RegWithSubregs<5, "r5", [R5B]>; +def R6W : MSP430RegWithSubregs<6, "r6", [R6B]>; +def R7W : MSP430RegWithSubregs<7, "r7", [R7B]>; +def R8W : MSP430RegWithSubregs<8, "r8", [R8B]>; +def R9W : MSP430RegWithSubregs<9, "r9", [R9B]>; +def R10W : MSP430RegWithSubregs<10, "r10", [R10B]>; +def R11W : MSP430RegWithSubregs<11, "r11", [R11B]>; +def R12W : MSP430RegWithSubregs<12, "r12", [R12B]>; +def R13W : MSP430RegWithSubregs<13, "r13", [R13B]>; +def R14W : MSP430RegWithSubregs<14, "r14", [R14B]>; +def R15W : MSP430RegWithSubregs<15, "r15", [R15B]>; + +def : SubRegSet<1, [PCW, SPW, SRW, CGW, FPW, + R5W, R6W, R7W, R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W], + [PCB, SPB, SRB, CGB, FPB, + R5B, R6B, R7B, R8B, R9B, R10B, R11B, R12B, R13B, R14B, R15B]>; + +def subreg_8bit : PatLeaf<(i32 1)>; + +def GR8 : RegisterClass<"MSP430", [i8], 8, + // Volatile registers + [R12B, R13B, R14B, R15B, R11B, R10B, R9B, R8B, R7B, R6B, R5B, + // Frame pointer, sometimes allocable + FPB, + // Volatile, but not allocable + PCB, SPB, SRB, CGB]> +{ + let MethodProtos = [{ + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + GR8Class::iterator + GR8Class::allocation_order_end(const MachineFunction &MF) const { + const TargetMachine &TM = MF.getTarget(); + const TargetRegisterInfo *RI = TM.getRegisterInfo(); + // Depending on whether the function uses frame pointer or not, last 5 or 4 + // registers on the list above are reserved + if (RI->hasFP(MF)) + return end()-5; + else + return end()-4; + } + }]; +} + +def GR16 : RegisterClass<"MSP430", [i16], 16, + // Volatile registers + [R12W, R13W, R14W, R15W, R11W, R10W, R9W, R8W, R7W, R6W, R5W, + // Frame pointer, sometimes allocable + FPW, + // Volatile, but not allocable + PCW, SPW, SRW, CGW]> +{ + let SubRegClassList = [GR8]; + let MethodProtos = [{ + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + GR16Class::iterator + GR16Class::allocation_order_end(const MachineFunction &MF) const { + const TargetMachine &TM = MF.getTarget(); + const TargetRegisterInfo *RI = TM.getRegisterInfo(); + // Depending on whether the function uses frame pointer or not, last 5 or 4 + // registers on the list above are reserved + if (RI->hasFP(MF)) + return end()-5; + else + return end()-4; + } + }]; +} + diff --git a/lib/Target/MSP430/MSP430Subtarget.cpp b/lib/Target/MSP430/MSP430Subtarget.cpp new file mode 100644 index 000000000000..ef9e10339bc3 --- /dev/null +++ b/lib/Target/MSP430/MSP430Subtarget.cpp @@ -0,0 +1,27 @@ +//===- MSP430Subtarget.cpp - MSP430 Subtarget Information ---------*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the MSP430 specific subclass of TargetSubtarget. +// +//===----------------------------------------------------------------------===// + +#include "MSP430Subtarget.h" +#include "MSP430.h" +#include "MSP430GenSubtarget.inc" +#include "llvm/Target/TargetMachine.h" + +using namespace llvm; + +MSP430Subtarget::MSP430Subtarget(const TargetMachine &TM, const Module &M, + const std::string &FS) { + std::string CPU = "generic"; + + // Parse features string. + ParseSubtargetFeatures(FS, CPU); +} diff --git a/lib/Target/MSP430/MSP430Subtarget.h b/lib/Target/MSP430/MSP430Subtarget.h new file mode 100644 index 000000000000..96c8108b71bc --- /dev/null +++ b/lib/Target/MSP430/MSP430Subtarget.h @@ -0,0 +1,41 @@ +//====-- MSP430Subtarget.h - Define Subtarget for the MSP430 ---*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the MSP430 specific subclass of TargetSubtarget. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_MSP430_SUBTARGET_H +#define LLVM_TARGET_MSP430_SUBTARGET_H + +#include "llvm/Target/TargetSubtarget.h" + +#include + +namespace llvm { +class Module; +class TargetMachine; + +class MSP430Subtarget : public TargetSubtarget { + bool ExtendedInsts; +public: + /// This constructor initializes the data members to match that + /// of the specified module. + /// + MSP430Subtarget(const TargetMachine &TM, const Module &M, + const std::string &FS); + + /// ParseSubtargetFeatures - Parses features string setting specified + /// subtarget options. Definition of function is auto generated by tblgen. + std::string ParseSubtargetFeatures(const std::string &FS, + const std::string &CPU); +}; +} // End llvm namespace + +#endif // LLVM_TARGET_MSP430_SUBTARGET_H diff --git a/lib/Target/MSP430/MSP430TargetAsmInfo.cpp b/lib/Target/MSP430/MSP430TargetAsmInfo.cpp new file mode 100644 index 000000000000..ab181de13f94 --- /dev/null +++ b/lib/Target/MSP430/MSP430TargetAsmInfo.cpp @@ -0,0 +1,22 @@ +//===-- MSP430TargetAsmInfo.cpp - MSP430 asm properties -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declarations of the MSP430TargetAsmInfo properties. +// +//===----------------------------------------------------------------------===// + +#include "MSP430TargetAsmInfo.h" +#include "MSP430TargetMachine.h" + +using namespace llvm; + +MSP430TargetAsmInfo::MSP430TargetAsmInfo(const MSP430TargetMachine &TM) + : ELFTargetAsmInfo(TM) { + AlignmentIsInBytes = false; +} diff --git a/lib/Target/MSP430/MSP430TargetAsmInfo.h b/lib/Target/MSP430/MSP430TargetAsmInfo.h new file mode 100644 index 000000000000..b58d5c9c764c --- /dev/null +++ b/lib/Target/MSP430/MSP430TargetAsmInfo.h @@ -0,0 +1,31 @@ +//=====-- MSP430TargetAsmInfo.h - MSP430 asm properties -------*- C++ -*--====// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the MSP430TargetAsmInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef MSP430TARGETASMINFO_H +#define MSP430TARGETASMINFO_H + +#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/Target/ELFTargetAsmInfo.h" + +namespace llvm { + + // Forward declaration. + class MSP430TargetMachine; + + struct MSP430TargetAsmInfo : public ELFTargetAsmInfo { + explicit MSP430TargetAsmInfo(const MSP430TargetMachine &TM); + }; + +} // namespace llvm + +#endif diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp new file mode 100644 index 000000000000..78869463f3a2 --- /dev/null +++ b/lib/Target/MSP430/MSP430TargetMachine.cpp @@ -0,0 +1,76 @@ +//===-- MSP430TargetMachine.cpp - Define TargetMachine for MSP430 ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Top-level implementation for the MSP430 target. +// +//===----------------------------------------------------------------------===// + +#include "MSP430.h" +#include "MSP430TargetAsmInfo.h" +#include "MSP430TargetMachine.h" +#include "llvm/Module.h" +#include "llvm/PassManager.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/Target/TargetMachineRegistry.h" + +using namespace llvm; + +/// MSP430TargetMachineModule - Note that this is used on hosts that +/// cannot link in a library unless there are references into the +/// library. In particular, it seems that it is not possible to get +/// things to work on Win32 without this. Though it is unused, do not +/// remove it. +extern "C" int MSP430TargetMachineModule; +int MSP430TargetMachineModule = 0; + + +// Register the targets +static RegisterTarget +X("msp430", "MSP430 [experimental]"); + +MSP430TargetMachine::MSP430TargetMachine(const Module &M, + const std::string &FS) : + Subtarget(*this, M, FS), + // FIXME: Check TargetData string. + DataLayout("e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"), + InstrInfo(*this), TLInfo(*this), + FrameInfo(TargetFrameInfo::StackGrowsDown, 2, -2) { } + +const TargetAsmInfo *MSP430TargetMachine::createTargetAsmInfo() const { + return new MSP430TargetAsmInfo(*this); +} + +bool MSP430TargetMachine::addInstSelector(PassManagerBase &PM, + CodeGenOpt::Level OptLevel) { + // Install an instruction selector. + PM.add(createMSP430ISelDag(*this, OptLevel)); + return false; +} + +bool MSP430TargetMachine::addAssemblyEmitter(PassManagerBase &PM, + CodeGenOpt::Level OptLevel, + bool Verbose, + raw_ostream &Out) { + // Output assembly language. + PM.add(createMSP430CodePrinterPass(Out, *this, OptLevel, Verbose)); + return false; +} + +unsigned MSP430TargetMachine::getModuleMatchQuality(const Module &M) { + std::string TT = M.getTargetTriple(); + + // We strongly match msp430 + if (TT.size() >= 6 && TT[0] == 'm' && TT[1] == 's' && TT[2] == 'p' && + TT[3] == '4' && TT[4] == '3' && TT[5] == '0') + return 20; + + return 0; +} + diff --git a/lib/Target/MSP430/MSP430TargetMachine.h b/lib/Target/MSP430/MSP430TargetMachine.h new file mode 100644 index 000000000000..d9ffa2b5ac8f --- /dev/null +++ b/lib/Target/MSP430/MSP430TargetMachine.h @@ -0,0 +1,68 @@ +//==-- MSP430TargetMachine.h - Define TargetMachine for MSP430 ---*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the MSP430 specific subclass of TargetMachine. +// +//===----------------------------------------------------------------------===// + + +#ifndef LLVM_TARGET_MSP430_TARGETMACHINE_H +#define LLVM_TARGET_MSP430_TARGETMACHINE_H + +#include "MSP430InstrInfo.h" +#include "MSP430ISelLowering.h" +#include "MSP430RegisterInfo.h" +#include "MSP430Subtarget.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetMachine.h" + +namespace llvm { + +/// MSP430TargetMachine +/// +class MSP430TargetMachine : public LLVMTargetMachine { + MSP430Subtarget Subtarget; + const TargetData DataLayout; // Calculates type size & alignment + MSP430InstrInfo InstrInfo; + MSP430TargetLowering TLInfo; + + // MSP430 does not have any call stack frame, therefore not having + // any MSP430 specific FrameInfo class. + TargetFrameInfo FrameInfo; + +protected: + virtual const TargetAsmInfo *createTargetAsmInfo() const; + +public: + MSP430TargetMachine(const Module &M, const std::string &FS); + + virtual const TargetFrameInfo *getFrameInfo() const { return &FrameInfo; } + virtual const MSP430InstrInfo *getInstrInfo() const { return &InstrInfo; } + virtual const TargetData *getTargetData() const { return &DataLayout;} + virtual const MSP430Subtarget *getSubtargetImpl() const { return &Subtarget; } + + virtual const TargetRegisterInfo *getRegisterInfo() const { + return &InstrInfo.getRegisterInfo(); + } + + virtual MSP430TargetLowering *getTargetLowering() const { + return const_cast(&TLInfo); + } + + virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel); + virtual bool addAssemblyEmitter(PassManagerBase &PM, + CodeGenOpt::Level OptLevel, bool Verbose, + raw_ostream &Out); + static unsigned getModuleMatchQuality(const Module &M); +}; // MSP430TargetMachine. + +} // end namespace llvm + +#endif // LLVM_TARGET_MSP430_TARGETMACHINE_H diff --git a/lib/Target/MSP430/Makefile b/lib/Target/MSP430/Makefile new file mode 100644 index 000000000000..45cb3aa45b85 --- /dev/null +++ b/lib/Target/MSP430/Makefile @@ -0,0 +1,21 @@ +##===- lib/Target/MSP430/Makefile --------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../.. +LIBRARYNAME = LLVMMSP430 +TARGET = MSP430 + +# Make sure that tblgen is run, first thing. +BUILT_SOURCES = MSP430GenRegisterInfo.h.inc MSP430GenRegisterNames.inc \ + MSP430GenRegisterInfo.inc MSP430GenInstrNames.inc \ + MSP430GenInstrInfo.inc MSP430GenAsmWriter.inc \ + MSP430GenDAGISel.inc MSP430GenCallingConv.inc \ + MSP430GenSubtarget.inc + +include $(LEVEL)/Makefile.common + diff --git a/lib/Target/MSP430/README.txt b/lib/Target/MSP430/README.txt new file mode 100644 index 000000000000..b14e93d84d6a --- /dev/null +++ b/lib/Target/MSP430/README.txt @@ -0,0 +1,42 @@ +//===---------------------------------------------------------------------===// +// MSP430 backend. +//===---------------------------------------------------------------------===// + +DISCLAIMER: Thid backend should be considered as highly experimental. I never +seen nor worked with this MCU, all information was gathered from datasheet +only. The original intention of making this backend was to write documentation +of form "How to write backend for dummies" :) Thes notes hopefully will be +available pretty soon. + +Some things are incomplete / not implemented yet (this list surely is not +complete as well): + +0. Implement asmprinting for variables :) + +1. Verify, how stuff is handling implicit zext with 8 bit operands (this might +be modelled currently in improper way - should we need to mark the superreg as +def for every 8 bit instruction?). + +2. Libcalls: multiplication, division, remainder. Note, that calling convention +for libcalls is incomptible with calling convention of libcalls of msp430-gcc +(these cannot be used though due to license restriction). + +3. Implement multiplication / division by constant (dag combiner hook?). + +4. Implement non-constant shifts. + +5. Implement varargs stuff. + +6. Verify and fix (if needed) how's stuff playing with i32 / i64. + +7. Implement floating point stuff (softfp?) + +8. Implement instruction encoding for (possible) direct code emission in the +future. + +9. Since almost all instructions set flags - implement brcond / select in better +way (currently they emit explicit comparison). + +10. Handle imm in comparisons in better way (see comment in MSP430InstrInfo.td) + +11. Implement hooks for better memory op folding, etc. diff --git a/lib/Target/Makefile b/lib/Target/Makefile new file mode 100644 index 000000000000..50a360f1f868 --- /dev/null +++ b/lib/Target/Makefile @@ -0,0 +1,20 @@ +#===- lib/Target/Makefile ----------------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../.. +LIBRARYNAME = LLVMTarget +BUILD_ARCHIVE = 1 + +# We include this early so we can access the value of TARGETS_TO_BUILD as the +# value for PARALLEL_DIRS which must be set before Makefile.rules is included +include $(LEVEL)/Makefile.config + +PARALLEL_DIRS := $(TARGETS_TO_BUILD) + +include $(LLVM_SRC_ROOT)/Makefile.rules diff --git a/lib/Target/Mips/AsmPrinter/CMakeLists.txt b/lib/Target/Mips/AsmPrinter/CMakeLists.txt new file mode 100644 index 000000000000..6a868c2fc78c --- /dev/null +++ b/lib/Target/Mips/AsmPrinter/CMakeLists.txt @@ -0,0 +1,12 @@ +include_directories( + ${CMAKE_CURRENT_BINARY_DIR}/.. + ${CMAKE_CURRENT_SOURCE_DIR}/.. + ) + +add_partially_linked_object(LLVMMipsAsmPrinter + MipsAsmPrinter.cpp + ) + +target_name_of_partially_linked_object(LLVMMipsCodeGen n) + +add_dependencies(LLVMMipsAsmPrinter ${n}) diff --git a/lib/Target/Mips/AsmPrinter/Makefile b/lib/Target/Mips/AsmPrinter/Makefile new file mode 100644 index 000000000000..a2fecf44e8e1 --- /dev/null +++ b/lib/Target/Mips/AsmPrinter/Makefile @@ -0,0 +1,17 @@ +##===- lib/Target/Mips/AsmPrinter/Makefile -----------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../../.. +LIBRARYNAME = LLVMMipsAsmPrinter + +# Hack: we need to include 'main' Mips target directory to grab +# private headers +CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp new file mode 100644 index 000000000000..dfb62382e75d --- /dev/null +++ b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp @@ -0,0 +1,580 @@ +//===-- MipsAsmPrinter.cpp - Mips LLVM assembly writer --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a printer that converts from our internal representation +// of machine-dependent LLVM code to GAS-format MIPS assembly language. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "mips-asm-printer" + +#include "Mips.h" +#include "MipsSubtarget.h" +#include "MipsInstrInfo.h" +#include "MipsTargetMachine.h" +#include "MipsMachineFunction.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Module.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/DwarfWriter.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Support/Mangler.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include + +using namespace llvm; + +STATISTIC(EmittedInsts, "Number of machine instrs printed"); + +namespace { + class VISIBILITY_HIDDEN MipsAsmPrinter : public AsmPrinter { + const MipsSubtarget *Subtarget; + public: + explicit MipsAsmPrinter(raw_ostream &O, MipsTargetMachine &TM, + const TargetAsmInfo *T, CodeGenOpt::Level OL, + bool V) + : AsmPrinter(O, TM, T, OL, V) { + Subtarget = &TM.getSubtarget(); + } + + virtual const char *getPassName() const { + return "Mips Assembly Printer"; + } + + bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode); + void printOperand(const MachineInstr *MI, int opNum); + void printUnsignedImm(const MachineInstr *MI, int opNum); + void printMemOperand(const MachineInstr *MI, int opNum, + const char *Modifier = 0); + void printFCCOperand(const MachineInstr *MI, int opNum, + const char *Modifier = 0); + void printModuleLevelGV(const GlobalVariable* GVar); + void printSavedRegsBitmask(MachineFunction &MF); + void printHex32(unsigned int Value); + + const char *emitCurrentABIString(void); + void emitFunctionStart(MachineFunction &MF); + void emitFunctionEnd(MachineFunction &MF); + void emitFrameDirective(MachineFunction &MF); + + bool printInstruction(const MachineInstr *MI); // autogenerated. + bool runOnMachineFunction(MachineFunction &F); + bool doInitialization(Module &M); + bool doFinalization(Module &M); + }; +} // end of anonymous namespace + +#include "MipsGenAsmWriter.inc" + +/// createMipsCodePrinterPass - Returns a pass that prints the MIPS +/// assembly code for a MachineFunction to the given output stream, +/// using the given target machine description. This should work +/// regardless of whether the function is in SSA form. +FunctionPass *llvm::createMipsCodePrinterPass(raw_ostream &o, + MipsTargetMachine &tm, + CodeGenOpt::Level OptLevel, + bool verbose) { + return new MipsAsmPrinter(o, tm, tm.getTargetAsmInfo(), OptLevel, verbose); +} + +//===----------------------------------------------------------------------===// +// +// Mips Asm Directives +// +// -- Frame directive "frame Stackpointer, Stacksize, RARegister" +// Describe the stack frame. +// +// -- Mask directives "(f)mask bitmask, offset" +// Tells the assembler which registers are saved and where. +// bitmask - contain a little endian bitset indicating which registers are +// saved on function prologue (e.g. with a 0x80000000 mask, the +// assembler knows the register 31 (RA) is saved at prologue. +// offset - the position before stack pointer subtraction indicating where +// the first saved register on prologue is located. (e.g. with a +// +// Consider the following function prologue: +// +// .frame $fp,48,$ra +// .mask 0xc0000000,-8 +// addiu $sp, $sp, -48 +// sw $ra, 40($sp) +// sw $fp, 36($sp) +// +// With a 0xc0000000 mask, the assembler knows the register 31 (RA) and +// 30 (FP) are saved at prologue. As the save order on prologue is from +// left to right, RA is saved first. A -8 offset means that after the +// stack pointer subtration, the first register in the mask (RA) will be +// saved at address 48-8=40. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Mask directives +//===----------------------------------------------------------------------===// + +// Create a bitmask with all callee saved registers for CPU or Floating Point +// registers. For CPU registers consider RA, GP and FP for saving if necessary. +void MipsAsmPrinter:: +printSavedRegsBitmask(MachineFunction &MF) +{ + const TargetRegisterInfo &RI = *TM.getRegisterInfo(); + MipsFunctionInfo *MipsFI = MF.getInfo(); + + // CPU and FPU Saved Registers Bitmasks + unsigned int CPUBitmask = 0; + unsigned int FPUBitmask = 0; + + // Set the CPU and FPU Bitmasks + MachineFrameInfo *MFI = MF.getFrameInfo(); + const std::vector &CSI = MFI->getCalleeSavedInfo(); + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + unsigned RegNum = MipsRegisterInfo::getRegisterNumbering(CSI[i].getReg()); + if (CSI[i].getRegClass() == Mips::CPURegsRegisterClass) + CPUBitmask |= (1 << RegNum); + else + FPUBitmask |= (1 << RegNum); + } + + // Return Address and Frame registers must also be set in CPUBitmask. + if (RI.hasFP(MF)) + CPUBitmask |= (1 << MipsRegisterInfo:: + getRegisterNumbering(RI.getFrameRegister(MF))); + + if (MF.getFrameInfo()->hasCalls()) + CPUBitmask |= (1 << MipsRegisterInfo:: + getRegisterNumbering(RI.getRARegister())); + + // Print CPUBitmask + O << "\t.mask \t"; printHex32(CPUBitmask); O << ',' + << MipsFI->getCPUTopSavedRegOff() << '\n'; + + // Print FPUBitmask + O << "\t.fmask\t"; printHex32(FPUBitmask); O << "," + << MipsFI->getFPUTopSavedRegOff() << '\n'; +} + +// Print a 32 bit hex number with all numbers. +void MipsAsmPrinter:: +printHex32(unsigned int Value) +{ + O << "0x"; + for (int i = 7; i >= 0; i--) + O << utohexstr( (Value & (0xF << (i*4))) >> (i*4) ); +} + +//===----------------------------------------------------------------------===// +// Frame and Set directives +//===----------------------------------------------------------------------===// + +/// Frame Directive +void MipsAsmPrinter:: +emitFrameDirective(MachineFunction &MF) +{ + const TargetRegisterInfo &RI = *TM.getRegisterInfo(); + + unsigned stackReg = RI.getFrameRegister(MF); + unsigned returnReg = RI.getRARegister(); + unsigned stackSize = MF.getFrameInfo()->getStackSize(); + + + O << "\t.frame\t" << '$' << LowercaseString(RI.get(stackReg).AsmName) + << ',' << stackSize << ',' + << '$' << LowercaseString(RI.get(returnReg).AsmName) + << '\n'; +} + +/// Emit Set directives. +const char * MipsAsmPrinter:: +emitCurrentABIString(void) +{ + switch(Subtarget->getTargetABI()) { + case MipsSubtarget::O32: return "abi32"; + case MipsSubtarget::O64: return "abiO64"; + case MipsSubtarget::N32: return "abiN32"; + case MipsSubtarget::N64: return "abi64"; + case MipsSubtarget::EABI: return "eabi32"; // TODO: handle eabi64 + default: break; + } + + assert(0 && "Unknown Mips ABI"); + return NULL; +} + +/// Emit the directives used by GAS on the start of functions +void MipsAsmPrinter:: +emitFunctionStart(MachineFunction &MF) +{ + // Print out the label for the function. + const Function *F = MF.getFunction(); + SwitchToSection(TAI->SectionForGlobal(F)); + + // 2 bits aligned + EmitAlignment(2, F); + + O << "\t.globl\t" << CurrentFnName << '\n'; + O << "\t.ent\t" << CurrentFnName << '\n'; + + printVisibility(CurrentFnName, F->getVisibility()); + + if ((TAI->hasDotTypeDotSizeDirective()) && Subtarget->isLinux()) + O << "\t.type\t" << CurrentFnName << ", @function\n"; + + O << CurrentFnName << ":\n"; + + emitFrameDirective(MF); + printSavedRegsBitmask(MF); + + O << '\n'; +} + +/// Emit the directives used by GAS on the end of functions +void MipsAsmPrinter:: +emitFunctionEnd(MachineFunction &MF) +{ + // There are instruction for this macros, but they must + // always be at the function end, and we can't emit and + // break with BB logic. + O << "\t.set\tmacro\n"; + O << "\t.set\treorder\n"; + + O << "\t.end\t" << CurrentFnName << '\n'; + if (TAI->hasDotTypeDotSizeDirective() && !Subtarget->isLinux()) + O << "\t.size\t" << CurrentFnName << ", .-" << CurrentFnName << '\n'; +} + +/// runOnMachineFunction - This uses the printMachineInstruction() +/// method to print assembly for each instruction. +bool MipsAsmPrinter:: +runOnMachineFunction(MachineFunction &MF) +{ + this->MF = &MF; + + SetupMachineFunction(MF); + + // Print out constants referenced by the function + EmitConstantPool(MF.getConstantPool()); + + // Print out jump tables referenced by the function + EmitJumpTableInfo(MF.getJumpTableInfo(), MF); + + O << "\n\n"; + + // Emit the function start directives + emitFunctionStart(MF); + + // Print out code for the function. + for (MachineFunction::const_iterator I = MF.begin(), E = MF.end(); + I != E; ++I) { + + // Print a label for the basic block. + if (I != MF.begin()) { + printBasicBlockLabel(I, true, true); + O << '\n'; + } + + for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end(); + II != E; ++II) { + // Print the assembly for the instruction. + printInstruction(II); + ++EmittedInsts; + } + + // Each Basic Block is separated by a newline + O << '\n'; + } + + // Emit function end directives + emitFunctionEnd(MF); + + // We didn't modify anything. + return false; +} + +// Print out an operand for an inline asm expression. +bool MipsAsmPrinter:: +PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode) +{ + // Does this asm operand have a single letter operand modifier? + if (ExtraCode && ExtraCode[0]) + return true; // Unknown modifier. + + printOperand(MI, OpNo); + return false; +} + +void MipsAsmPrinter:: +printOperand(const MachineInstr *MI, int opNum) +{ + const MachineOperand &MO = MI->getOperand(opNum); + const TargetRegisterInfo &RI = *TM.getRegisterInfo(); + bool closeP = false; + bool isPIC = (TM.getRelocationModel() == Reloc::PIC_); + bool isCodeLarge = (TM.getCodeModel() == CodeModel::Large); + + // %hi and %lo used on mips gas to load global addresses on + // static code. %got is used to load global addresses when + // using PIC_. %call16 is used to load direct call targets + // on PIC_ and small code size. %call_lo and %call_hi load + // direct call targets on PIC_ and large code size. + if (MI->getOpcode() == Mips::LUi && !MO.isReg() && !MO.isImm()) { + if ((isPIC) && (isCodeLarge)) + O << "%call_hi("; + else + O << "%hi("; + closeP = true; + } else if ((MI->getOpcode() == Mips::ADDiu) && !MO.isReg() && !MO.isImm()) { + const MachineOperand &firstMO = MI->getOperand(opNum-1); + if (firstMO.getReg() == Mips::GP) + O << "%gp_rel("; + else + O << "%lo("; + closeP = true; + } else if ((isPIC) && (MI->getOpcode() == Mips::LW) && + (!MO.isReg()) && (!MO.isImm())) { + const MachineOperand &firstMO = MI->getOperand(opNum-1); + const MachineOperand &lastMO = MI->getOperand(opNum+1); + if ((firstMO.isReg()) && (lastMO.isReg())) { + if ((firstMO.getReg() == Mips::T9) && (lastMO.getReg() == Mips::GP) + && (!isCodeLarge)) + O << "%call16("; + else if ((firstMO.getReg() != Mips::T9) && (lastMO.getReg() == Mips::GP)) + O << "%got("; + else if ((firstMO.getReg() == Mips::T9) && (lastMO.getReg() != Mips::GP) + && (isCodeLarge)) + O << "%call_lo("; + closeP = true; + } + } + + switch (MO.getType()) + { + case MachineOperand::MO_Register: + if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) + O << '$' << LowercaseString (RI.get(MO.getReg()).AsmName); + else + O << '$' << MO.getReg(); + break; + + case MachineOperand::MO_Immediate: + O << (short int)MO.getImm(); + break; + + case MachineOperand::MO_MachineBasicBlock: + printBasicBlockLabel(MO.getMBB()); + return; + + case MachineOperand::MO_GlobalAddress: + { + const GlobalValue *GV = MO.getGlobal(); + O << Mang->getValueName(GV); + } + break; + + case MachineOperand::MO_ExternalSymbol: + O << MO.getSymbolName(); + break; + + case MachineOperand::MO_JumpTableIndex: + O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() + << '_' << MO.getIndex(); + break; + + case MachineOperand::MO_ConstantPoolIndex: + O << TAI->getPrivateGlobalPrefix() << "CPI" + << getFunctionNumber() << "_" << MO.getIndex(); + break; + + default: + O << ""; abort (); break; + } + + if (closeP) O << ")"; +} + +void MipsAsmPrinter:: +printUnsignedImm(const MachineInstr *MI, int opNum) +{ + const MachineOperand &MO = MI->getOperand(opNum); + if (MO.getType() == MachineOperand::MO_Immediate) + O << (unsigned short int)MO.getImm(); + else + printOperand(MI, opNum); +} + +void MipsAsmPrinter:: +printMemOperand(const MachineInstr *MI, int opNum, const char *Modifier) +{ + // when using stack locations for not load/store instructions + // print the same way as all normal 3 operand instructions. + if (Modifier && !strcmp(Modifier, "stackloc")) { + printOperand(MI, opNum+1); + O << ", "; + printOperand(MI, opNum); + return; + } + + // Load/Store memory operands -- imm($reg) + // If PIC target the target is loaded as the + // pattern lw $25,%call16($28) + printOperand(MI, opNum); + O << "("; + printOperand(MI, opNum+1); + O << ")"; +} + +void MipsAsmPrinter:: +printFCCOperand(const MachineInstr *MI, int opNum, const char *Modifier) +{ + const MachineOperand& MO = MI->getOperand(opNum); + O << Mips::MipsFCCToString((Mips::CondCode)MO.getImm()); +} + +bool MipsAsmPrinter:: +doInitialization(Module &M) +{ + Mang = new Mangler(M, "", TAI->getPrivateGlobalPrefix()); + + // Tell the assembler which ABI we are using + O << "\t.section .mdebug." << emitCurrentABIString() << '\n'; + + // TODO: handle O64 ABI + if (Subtarget->isABI_EABI()) + O << "\t.section .gcc_compiled_long" << + (Subtarget->isGP32bit() ? "32" : "64") << '\n'; + + // return to previous section + O << "\t.previous" << '\n'; + + return false; // success +} + +void MipsAsmPrinter:: +printModuleLevelGV(const GlobalVariable* GVar) { + const TargetData *TD = TM.getTargetData(); + + if (!GVar->hasInitializer()) + return; // External global require no code + + // Check to see if this is a special global used by LLVM, if so, emit it. + if (EmitSpecialLLVMGlobal(GVar)) + return; + + O << "\n\n"; + std::string name = Mang->getValueName(GVar); + Constant *C = GVar->getInitializer(); + const Type *CTy = C->getType(); + unsigned Size = TD->getTypeAllocSize(CTy); + const ConstantArray *CVA = dyn_cast(C); + bool printSizeAndType = true; + + // A data structure or array is aligned in memory to the largest + // alignment boundary required by any data type inside it (this matches + // the Preferred Type Alignment). For integral types, the alignment is + // the type size. + unsigned Align; + if (CTy->getTypeID() == Type::IntegerTyID || + CTy->getTypeID() == Type::VoidTyID) { + assert(!(Size & (Size-1)) && "Alignment is not a power of two!"); + Align = Log2_32(Size); + } else + Align = TD->getPreferredTypeAlignmentShift(CTy); + + printVisibility(name, GVar->getVisibility()); + + SwitchToSection(TAI->SectionForGlobal(GVar)); + + if (C->isNullValue() && !GVar->hasSection()) { + if (!GVar->isThreadLocal() && + (GVar->hasLocalLinkage() || GVar->isWeakForLinker())) { + if (Size == 0) Size = 1; // .comm Foo, 0 is undefined, avoid it. + + if (GVar->hasLocalLinkage()) + O << "\t.local\t" << name << '\n'; + + O << TAI->getCOMMDirective() << name << ',' << Size; + if (TAI->getCOMMDirectiveTakesAlignment()) + O << ',' << (1 << Align); + + O << '\n'; + return; + } + } + switch (GVar->getLinkage()) { + case GlobalValue::LinkOnceAnyLinkage: + case GlobalValue::LinkOnceODRLinkage: + case GlobalValue::CommonLinkage: + case GlobalValue::WeakAnyLinkage: + case GlobalValue::WeakODRLinkage: + // FIXME: Verify correct for weak. + // Nonnull linkonce -> weak + O << "\t.weak " << name << '\n'; + break; + case GlobalValue::AppendingLinkage: + // FIXME: appending linkage variables should go into a section of their name + // or something. For now, just emit them as external. + case GlobalValue::ExternalLinkage: + // If external or appending, declare as a global symbol + O << TAI->getGlobalDirective() << name << '\n'; + // Fall Through + case GlobalValue::PrivateLinkage: + case GlobalValue::InternalLinkage: + if (CVA && CVA->isCString()) + printSizeAndType = false; + break; + case GlobalValue::GhostLinkage: + cerr << "Should not have any unmaterialized functions!\n"; + abort(); + case GlobalValue::DLLImportLinkage: + cerr << "DLLImport linkage is not supported by this target!\n"; + abort(); + case GlobalValue::DLLExportLinkage: + cerr << "DLLExport linkage is not supported by this target!\n"; + abort(); + default: + assert(0 && "Unknown linkage type!"); + } + + EmitAlignment(Align, GVar); + + if (TAI->hasDotTypeDotSizeDirective() && printSizeAndType) { + O << "\t.type " << name << ",@object\n"; + O << "\t.size " << name << ',' << Size << '\n'; + } + + O << name << ":\n"; + EmitGlobalConstant(C); +} + +bool MipsAsmPrinter:: +doFinalization(Module &M) +{ + // Print out module-level global variables here. + for (Module::const_global_iterator I = M.global_begin(), + E = M.global_end(); I != E; ++I) + printModuleLevelGV(I); + + O << '\n'; + + return AsmPrinter::doFinalization(M); +} diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt new file mode 100644 index 000000000000..70c7a51c2850 --- /dev/null +++ b/lib/Target/Mips/CMakeLists.txt @@ -0,0 +1,22 @@ +set(LLVM_TARGET_DEFINITIONS Mips.td) + +tablegen(MipsGenRegisterInfo.h.inc -gen-register-desc-header) +tablegen(MipsGenRegisterNames.inc -gen-register-enums) +tablegen(MipsGenRegisterInfo.inc -gen-register-desc) +tablegen(MipsGenInstrNames.inc -gen-instr-enums) +tablegen(MipsGenInstrInfo.inc -gen-instr-desc) +tablegen(MipsGenAsmWriter.inc -gen-asm-writer) +tablegen(MipsGenDAGISel.inc -gen-dag-isel) +tablegen(MipsGenCallingConv.inc -gen-callingconv) +tablegen(MipsGenSubtarget.inc -gen-subtarget) + +add_llvm_target(MipsCodeGen + MipsDelaySlotFiller.cpp + MipsInstrInfo.cpp + MipsISelDAGToDAG.cpp + MipsISelLowering.cpp + MipsRegisterInfo.cpp + MipsSubtarget.cpp + MipsTargetAsmInfo.cpp + MipsTargetMachine.cpp + ) diff --git a/lib/Target/Mips/Makefile b/lib/Target/Mips/Makefile new file mode 100644 index 000000000000..48ab5f994704 --- /dev/null +++ b/lib/Target/Mips/Makefile @@ -0,0 +1,23 @@ +##===- lib/Target/Mips/Makefile ----------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../.. +LIBRARYNAME = LLVMMipsCodeGen +TARGET = Mips + +# Make sure that tblgen is run, first thing. +BUILT_SOURCES = MipsGenRegisterInfo.h.inc MipsGenRegisterNames.inc \ + MipsGenRegisterInfo.inc MipsGenInstrNames.inc \ + MipsGenInstrInfo.inc MipsGenAsmWriter.inc \ + MipsGenDAGISel.inc MipsGenCallingConv.inc \ + MipsGenSubtarget.inc + +DIRS = AsmPrinter + +include $(LEVEL)/Makefile.common + diff --git a/lib/Target/Mips/Mips.h b/lib/Target/Mips/Mips.h new file mode 100644 index 000000000000..0accb4e347ee --- /dev/null +++ b/lib/Target/Mips/Mips.h @@ -0,0 +1,41 @@ +//===-- Mips.h - Top-level interface for Mips representation ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the entry points for global functions defined in +// the LLVM Mips back-end. +// +//===----------------------------------------------------------------------===// + +#ifndef TARGET_MIPS_H +#define TARGET_MIPS_H + +#include "llvm/Target/TargetMachine.h" + +namespace llvm { + class MipsTargetMachine; + class FunctionPass; + class MachineCodeEmitter; + class raw_ostream; + + FunctionPass *createMipsISelDag(MipsTargetMachine &TM); + FunctionPass *createMipsDelaySlotFillerPass(MipsTargetMachine &TM); + FunctionPass *createMipsCodePrinterPass(raw_ostream &OS, + MipsTargetMachine &TM, + CodeGenOpt::Level OptLevel, + bool Verbose); +} // end namespace llvm; + +// Defines symbolic names for Mips registers. This defines a mapping from +// register name to register number. +#include "MipsGenRegisterNames.inc" + +// Defines symbolic names for the Mips instructions. +#include "MipsGenInstrNames.inc" + +#endif diff --git a/lib/Target/Mips/Mips.td b/lib/Target/Mips/Mips.td new file mode 100644 index 000000000000..79a78d86aef7 --- /dev/null +++ b/lib/Target/Mips/Mips.td @@ -0,0 +1,88 @@ +//===- Mips.td - Describe the Mips Target Machine ---------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This is the top level entry point for the Mips target. +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Target-independent interfaces +//===----------------------------------------------------------------------===// + +include "llvm/Target/Target.td" + +//===----------------------------------------------------------------------===// +// Register File, Calling Conv, Instruction Descriptions +//===----------------------------------------------------------------------===// + +include "MipsRegisterInfo.td" +include "MipsSchedule.td" +include "MipsInstrInfo.td" +include "MipsCallingConv.td" + +def MipsInstrInfo : InstrInfo { + let TSFlagsFields = []; + let TSFlagsShifts = []; +} + +//===----------------------------------------------------------------------===// +// Mips Subtarget features // +//===----------------------------------------------------------------------===// + +def FeatureGP64Bit : SubtargetFeature<"gp64", "IsGP64bit", "true", + "General Purpose Registers are 64-bit wide.">; +def FeatureFP64Bit : SubtargetFeature<"fp64", "IsFP64bit", "true", + "Support 64-bit FP registers.">; +def FeatureSingleFloat : SubtargetFeature<"single-float", "IsSingleFloat", + "true", "Only supports single precision float">; +def FeatureMips1 : SubtargetFeature<"mips1", "MipsArchVersion", "Mips1", + "Mips1 ISA Support">; +def FeatureMips2 : SubtargetFeature<"mips2", "MipsArchVersion", "Mips2", + "Mips2 ISA Support">; +def FeatureO32 : SubtargetFeature<"o32", "MipsABI", "O32", + "Enable o32 ABI">; +def FeatureEABI : SubtargetFeature<"eabi", "MipsABI", "EABI", + "Enable eabi ABI">; +def FeatureVFPU : SubtargetFeature<"vfpu", "HasVFPU", + "true", "Enable vector FPU instructions.">; +def FeatureSEInReg : SubtargetFeature<"seinreg", "HasSEInReg", "true", + "Enable 'signext in register' instructions.">; +def FeatureCondMov : SubtargetFeature<"condmov", "HasCondMov", "true", + "Enable 'conditional move' instructions.">; +def FeatureMulDivAdd : SubtargetFeature<"muldivadd", "HasMulDivAdd", "true", + "Enable 'multiply add/sub' instructions.">; +def FeatureMinMax : SubtargetFeature<"minmax", "HasMinMax", "true", + "Enable 'min/max' instructions.">; +def FeatureSwap : SubtargetFeature<"swap", "HasSwap", "true", + "Enable 'byte/half swap' instructions.">; +def FeatureBitCount : SubtargetFeature<"bitcount", "HasBitCount", "true", + "Enable 'count leading bits' instructions.">; + +//===----------------------------------------------------------------------===// +// Mips processors supported. +//===----------------------------------------------------------------------===// + +class Proc Features> + : Processor; + +def : Proc<"mips1", [FeatureMips1]>; +def : Proc<"r2000", [FeatureMips1]>; +def : Proc<"r3000", [FeatureMips1]>; + +def : Proc<"mips2", [FeatureMips2]>; +def : Proc<"r6000", [FeatureMips2]>; + +// Allegrex is a 32bit subset of r4000, both for interger and fp registers, +// but much more similar to Mips2 than Mips3. It also contains some of +// Mips32/Mips32r2 instructions and a custom vector fpu processor. +def : Proc<"allegrex", [FeatureMips2, FeatureSingleFloat, FeatureEABI, + FeatureVFPU, FeatureSEInReg, FeatureCondMov, FeatureMulDivAdd, + FeatureMinMax, FeatureSwap, FeatureBitCount]>; + +def Mips : Target { + let InstructionSet = MipsInstrInfo; +} diff --git a/lib/Target/Mips/MipsCallingConv.td b/lib/Target/Mips/MipsCallingConv.td new file mode 100644 index 000000000000..01fe92e6b73c --- /dev/null +++ b/lib/Target/Mips/MipsCallingConv.td @@ -0,0 +1,86 @@ +//===- MipsCallingConv.td - Calling Conventions for Mips --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This describes the calling conventions for Mips architecture. +//===----------------------------------------------------------------------===// + +/// CCIfSubtarget - Match if the current subtarget has a feature F. +class CCIfSubtarget: + CCIf().", F), A>; + +//===----------------------------------------------------------------------===// +// Mips O32 Calling Convention +//===----------------------------------------------------------------------===// + +// Only the return rules are defined here for O32. The rules for argument +// passing are defined in MipsISelLowering.cpp. +def RetCC_MipsO32 : CallingConv<[ + // i32 are returned in registers V0, V1 + CCIfType<[i32], CCAssignToReg<[V0, V1]>>, + + // f32 are returned in registers F0, F1 + CCIfType<[f32], CCAssignToReg<[F0, F1]>>, + + // f64 are returned in register D0 + CCIfType<[f64], CCIfSubtarget<"isNotSingleFloat()", CCAssignToReg<[D0]>>> +]>; + +//===----------------------------------------------------------------------===// +// Mips EABI Calling Convention +//===----------------------------------------------------------------------===// + +def CC_MipsEABI : CallingConv<[ + // Promote i8/i16 arguments to i32. + CCIfType<[i8, i16], CCPromoteToType>, + + // Integer arguments are passed in integer registers. + CCIfType<[i32], CCAssignToReg<[A0, A1, A2, A3, T0, T1, T2, T3]>>, + + // Single fp arguments are passed in pairs within 32-bit mode + CCIfType<[f32], CCIfSubtarget<"isSingleFloat()", + CCAssignToReg<[F12, F13, F14, F15, F16, F17, F18, F19]>>>, + + CCIfType<[f32], CCIfSubtarget<"isNotSingleFloat()", + CCAssignToReg<[F12, F14, F16, F18]>>>, + + // The first 4 doubl fp arguments are passed in single fp registers. + CCIfType<[f64], CCIfSubtarget<"isNotSingleFloat()", + CCAssignToReg<[D6, D7, D8, D9]>>>, + + // Integer values get stored in stack slots that are 4 bytes in + // size and 4-byte aligned. + CCIfType<[i32, f32], CCAssignToStack<4, 4>>, + + // Integer values get stored in stack slots that are 8 bytes in + // size and 8-byte aligned. + CCIfType<[f64], CCIfSubtarget<"isNotSingleFloat()", CCAssignToStack<8, 8>>> +]>; + +def RetCC_MipsEABI : CallingConv<[ + // i32 are returned in registers V0, V1 + CCIfType<[i32], CCAssignToReg<[V0, V1]>>, + + // f32 are returned in registers F0, F1 + CCIfType<[f32], CCAssignToReg<[F0, F1]>>, + + // f64 are returned in register D0 + CCIfType<[f64], CCIfSubtarget<"isNotSingleFloat()", CCAssignToReg<[D0]>>> +]>; + +//===----------------------------------------------------------------------===// +// Mips Calling Convention Dispatch +//===----------------------------------------------------------------------===// + +def CC_Mips : CallingConv<[ + CCIfSubtarget<"isABI_EABI()", CCDelegateTo> +]>; + +def RetCC_Mips : CallingConv<[ + CCIfSubtarget<"isABI_EABI()", CCDelegateTo>, + CCDelegateTo +]>; diff --git a/lib/Target/Mips/MipsDelaySlotFiller.cpp b/lib/Target/Mips/MipsDelaySlotFiller.cpp new file mode 100644 index 000000000000..a2b615d8add2 --- /dev/null +++ b/lib/Target/Mips/MipsDelaySlotFiller.cpp @@ -0,0 +1,77 @@ +//===-- DelaySlotFiller.cpp - Mips delay slot filler ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Simple pass to fills delay slots with NOPs. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "delay-slot-filler" + +#include "Mips.h" +#include "MipsTargetMachine.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/ADT/Statistic.h" + +using namespace llvm; + +STATISTIC(FilledSlots, "Number of delay slots filled"); + +namespace { + struct Filler : public MachineFunctionPass { + + TargetMachine &TM; + const TargetInstrInfo *TII; + + static char ID; + Filler(TargetMachine &tm) + : MachineFunctionPass(&ID), TM(tm), TII(tm.getInstrInfo()) { } + + virtual const char *getPassName() const { + return "Mips Delay Slot Filler"; + } + + bool runOnMachineBasicBlock(MachineBasicBlock &MBB); + bool runOnMachineFunction(MachineFunction &F) { + bool Changed = false; + for (MachineFunction::iterator FI = F.begin(), FE = F.end(); + FI != FE; ++FI) + Changed |= runOnMachineBasicBlock(*FI); + return Changed; + } + + }; + char Filler::ID = 0; +} // end of anonymous namespace + +/// runOnMachineBasicBlock - Fill in delay slots for the given basic block. +/// Currently, we fill delay slots with NOPs. We assume there is only one +/// delay slot per delayed instruction. +bool Filler:: +runOnMachineBasicBlock(MachineBasicBlock &MBB) +{ + bool Changed = false; + for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) + if (I->getDesc().hasDelaySlot()) { + MachineBasicBlock::iterator J = I; + ++J; + BuildMI(MBB, J, I->getDebugLoc(), TII->get(Mips::NOP)); + ++FilledSlots; + Changed = true; + } + return Changed; +} + +/// createMipsDelaySlotFillerPass - Returns a pass that fills in delay +/// slots in Mips MachineFunctions +FunctionPass *llvm::createMipsDelaySlotFillerPass(MipsTargetMachine &tm) { + return new Filler(tm); +} + diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp new file mode 100644 index 000000000000..f05ac702ccdd --- /dev/null +++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp @@ -0,0 +1,392 @@ +//===-- MipsISelDAGToDAG.cpp - A dag to dag inst selector for Mips --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines an instruction selector for the MIPS target. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "mips-isel" +#include "Mips.h" +#include "MipsISelLowering.h" +#include "MipsMachineFunction.h" +#include "MipsRegisterInfo.h" +#include "MipsSubtarget.h" +#include "MipsTargetMachine.h" +#include "llvm/GlobalValue.h" +#include "llvm/Instructions.h" +#include "llvm/Intrinsics.h" +#include "llvm/Support/CFG.h" +#include "llvm/Type.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +using namespace llvm; + +//===----------------------------------------------------------------------===// +// Instruction Selector Implementation +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// MipsDAGToDAGISel - MIPS specific code to select MIPS machine +// instructions for SelectionDAG operations. +//===----------------------------------------------------------------------===// +namespace { + +class VISIBILITY_HIDDEN MipsDAGToDAGISel : public SelectionDAGISel { + + /// TM - Keep a reference to MipsTargetMachine. + MipsTargetMachine &TM; + + /// Subtarget - Keep a pointer to the MipsSubtarget around so that we can + /// make the right decision when generating code for different targets. + const MipsSubtarget &Subtarget; + +public: + explicit MipsDAGToDAGISel(MipsTargetMachine &tm) : + SelectionDAGISel(tm), + TM(tm), Subtarget(tm.getSubtarget()) {} + + virtual void InstructionSelect(); + + // Pass Name + virtual const char *getPassName() const { + return "MIPS DAG->DAG Pattern Instruction Selection"; + } + + +private: + // Include the pieces autogenerated from the target description. + #include "MipsGenDAGISel.inc" + + SDValue getGlobalBaseReg(); + SDNode *Select(SDValue N); + + // Complex Pattern. + bool SelectAddr(SDValue Op, SDValue N, + SDValue &Base, SDValue &Offset); + + + // getI32Imm - Return a target constant with the specified + // value, of type i32. + inline SDValue getI32Imm(unsigned Imm) { + return CurDAG->getTargetConstant(Imm, MVT::i32); + } + + + #ifndef NDEBUG + unsigned Indent; + #endif +}; + +} + +/// InstructionSelect - This callback is invoked by +/// SelectionDAGISel when it has created a SelectionDAG for us to codegen. +void MipsDAGToDAGISel:: +InstructionSelect() +{ + DEBUG(BB->dump()); + // Codegen the basic block. + #ifndef NDEBUG + DOUT << "===== Instruction selection begins:\n"; + Indent = 0; + #endif + + // Select target instructions for the DAG. + SelectRoot(*CurDAG); + + #ifndef NDEBUG + DOUT << "===== Instruction selection ends:\n"; + #endif + + CurDAG->RemoveDeadNodes(); +} + +/// getGlobalBaseReg - Output the instructions required to put the +/// GOT address into a register. +SDValue MipsDAGToDAGISel::getGlobalBaseReg() { + MachineFunction* MF = BB->getParent(); + unsigned GP = 0; + for(MachineRegisterInfo::livein_iterator ii = MF->getRegInfo().livein_begin(), + ee = MF->getRegInfo().livein_end(); ii != ee; ++ii) + if (ii->first == Mips::GP) { + GP = ii->second; + break; + } + assert(GP && "GOT PTR not in liveins"); + // FIXME is there a sensible place to get debug info for this? + return CurDAG->getCopyFromReg(CurDAG->getEntryNode(), + DebugLoc::getUnknownLoc(), GP, MVT::i32); +} + +/// ComplexPattern used on MipsInstrInfo +/// Used on Mips Load/Store instructions +bool MipsDAGToDAGISel:: +SelectAddr(SDValue Op, SDValue Addr, SDValue &Offset, SDValue &Base) +{ + // if Address is FI, get the TargetFrameIndex. + if (FrameIndexSDNode *FIN = dyn_cast(Addr)) { + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); + Offset = CurDAG->getTargetConstant(0, MVT::i32); + return true; + } + + // on PIC code Load GA + if (TM.getRelocationModel() == Reloc::PIC_) { + if ((Addr.getOpcode() == ISD::TargetGlobalAddress) || + (Addr.getOpcode() == ISD::TargetJumpTable)){ + Base = CurDAG->getRegister(Mips::GP, MVT::i32); + Offset = Addr; + return true; + } + } else { + if ((Addr.getOpcode() == ISD::TargetExternalSymbol || + Addr.getOpcode() == ISD::TargetGlobalAddress)) + return false; + } + + // Operand is a result from an ADD. + if (Addr.getOpcode() == ISD::ADD) { + if (ConstantSDNode *CN = dyn_cast(Addr.getOperand(1))) { + if (Predicate_immSExt16(CN)) { + + // If the first operand is a FI, get the TargetFI Node + if (FrameIndexSDNode *FIN = dyn_cast + (Addr.getOperand(0))) { + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); + } else { + Base = Addr.getOperand(0); + } + + Offset = CurDAG->getTargetConstant(CN->getZExtValue(), MVT::i32); + return true; + } + } + } + + Base = Addr; + Offset = CurDAG->getTargetConstant(0, MVT::i32); + return true; +} + +/// Select instructions not customized! Used for +/// expanded, promoted and normal instructions +SDNode* MipsDAGToDAGISel:: +Select(SDValue N) +{ + SDNode *Node = N.getNode(); + unsigned Opcode = Node->getOpcode(); + DebugLoc dl = Node->getDebugLoc(); + + // Dump information about the Node being selected + #ifndef NDEBUG + DOUT << std::string(Indent, ' ') << "Selecting: "; + DEBUG(Node->dump(CurDAG)); + DOUT << "\n"; + Indent += 2; + #endif + + // If we have a custom node, we already have selected! + if (Node->isMachineOpcode()) { + #ifndef NDEBUG + DOUT << std::string(Indent-2, ' ') << "== "; + DEBUG(Node->dump(CurDAG)); + DOUT << "\n"; + Indent -= 2; + #endif + return NULL; + } + + /// + // Instruction Selection not handled by the auto-generated + // tablegen selection should be handled here. + /// + switch(Opcode) { + + default: break; + + case ISD::SUBE: + case ISD::ADDE: { + SDValue InFlag = Node->getOperand(2), CmpLHS; + unsigned Opc = InFlag.getOpcode(); Opc=Opc; + assert(((Opc == ISD::ADDC || Opc == ISD::ADDE) || + (Opc == ISD::SUBC || Opc == ISD::SUBE)) && + "(ADD|SUB)E flag operand must come from (ADD|SUB)C/E insn"); + + unsigned MOp; + if (Opcode == ISD::ADDE) { + CmpLHS = InFlag.getValue(0); + MOp = Mips::ADDu; + } else { + CmpLHS = InFlag.getOperand(0); + MOp = Mips::SUBu; + } + + SDValue Ops[] = { CmpLHS, InFlag.getOperand(1) }; + + SDValue LHS = Node->getOperand(0); + SDValue RHS = Node->getOperand(1); + + MVT VT = LHS.getValueType(); + SDNode *Carry = CurDAG->getTargetNode(Mips::SLTu, dl, VT, Ops, 2); + SDNode *AddCarry = CurDAG->getTargetNode(Mips::ADDu, dl, VT, + SDValue(Carry,0), RHS); + + return CurDAG->SelectNodeTo(N.getNode(), MOp, VT, MVT::Flag, + LHS, SDValue(AddCarry,0)); + } + + /// Mul/Div with two results + case ISD::SDIVREM: + case ISD::UDIVREM: + case ISD::SMUL_LOHI: + case ISD::UMUL_LOHI: { + SDValue Op1 = Node->getOperand(0); + SDValue Op2 = Node->getOperand(1); + + unsigned Op; + if (Opcode == ISD::UMUL_LOHI || Opcode == ISD::SMUL_LOHI) + Op = (Opcode == ISD::UMUL_LOHI ? Mips::MULTu : Mips::MULT); + else + Op = (Opcode == ISD::UDIVREM ? Mips::DIVu : Mips::DIV); + + SDNode *Node = CurDAG->getTargetNode(Op, dl, MVT::Flag, Op1, Op2); + + SDValue InFlag = SDValue(Node, 0); + SDNode *Lo = CurDAG->getTargetNode(Mips::MFLO, dl, MVT::i32, + MVT::Flag, InFlag); + InFlag = SDValue(Lo,1); + SDNode *Hi = CurDAG->getTargetNode(Mips::MFHI, dl, MVT::i32, InFlag); + + if (!N.getValue(0).use_empty()) + ReplaceUses(N.getValue(0), SDValue(Lo,0)); + + if (!N.getValue(1).use_empty()) + ReplaceUses(N.getValue(1), SDValue(Hi,0)); + + return NULL; + } + + /// Special Muls + case ISD::MUL: + case ISD::MULHS: + case ISD::MULHU: { + SDValue MulOp1 = Node->getOperand(0); + SDValue MulOp2 = Node->getOperand(1); + + unsigned MulOp = (Opcode == ISD::MULHU ? Mips::MULTu : Mips::MULT); + SDNode *MulNode = CurDAG->getTargetNode(MulOp, dl, + MVT::Flag, MulOp1, MulOp2); + + SDValue InFlag = SDValue(MulNode, 0); + + if (MulOp == ISD::MUL) + return CurDAG->getTargetNode(Mips::MFLO, dl, MVT::i32, InFlag); + else + return CurDAG->getTargetNode(Mips::MFHI, dl, MVT::i32, InFlag); + } + + /// Div/Rem operations + case ISD::SREM: + case ISD::UREM: + case ISD::SDIV: + case ISD::UDIV: { + SDValue Op1 = Node->getOperand(0); + SDValue Op2 = Node->getOperand(1); + + unsigned Op, MOp; + if (Opcode == ISD::SDIV || Opcode == ISD::UDIV) { + Op = (Opcode == ISD::SDIV ? Mips::DIV : Mips::DIVu); + MOp = Mips::MFLO; + } else { + Op = (Opcode == ISD::SREM ? Mips::DIV : Mips::DIVu); + MOp = Mips::MFHI; + } + SDNode *Node = CurDAG->getTargetNode(Op, dl, MVT::Flag, Op1, Op2); + + SDValue InFlag = SDValue(Node, 0); + return CurDAG->getTargetNode(MOp, dl, MVT::i32, InFlag); + } + + // Get target GOT address. + case ISD::GLOBAL_OFFSET_TABLE: { + SDValue Result = getGlobalBaseReg(); + ReplaceUses(N, Result); + return NULL; + } + + /// Handle direct and indirect calls when using PIC. On PIC, when + /// GOT is smaller than about 64k (small code) the GA target is + /// loaded with only one instruction. Otherwise GA's target must + /// be loaded with 3 instructions. + case MipsISD::JmpLink: { + if (TM.getRelocationModel() == Reloc::PIC_) { + //bool isCodeLarge = (TM.getCodeModel() == CodeModel::Large); + SDValue Chain = Node->getOperand(0); + SDValue Callee = Node->getOperand(1); + SDValue T9Reg = CurDAG->getRegister(Mips::T9, MVT::i32); + SDValue InFlag(0, 0); + + if ( (isa(Callee)) || + (isa(Callee)) ) + { + /// Direct call for global addresses and external symbols + SDValue GPReg = CurDAG->getRegister(Mips::GP, MVT::i32); + + // Use load to get GOT target + SDValue Ops[] = { Callee, GPReg, Chain }; + SDValue Load = SDValue(CurDAG->getTargetNode(Mips::LW, dl, MVT::i32, + MVT::Other, Ops, 3), 0); + Chain = Load.getValue(1); + + // Call target must be on T9 + Chain = CurDAG->getCopyToReg(Chain, dl, T9Reg, Load, InFlag); + } else + /// Indirect call + Chain = CurDAG->getCopyToReg(Chain, dl, T9Reg, Callee, InFlag); + + // Emit Jump and Link Register + SDNode *ResNode = CurDAG->getTargetNode(Mips::JALR, dl, MVT::Other, + MVT::Flag, T9Reg, Chain); + Chain = SDValue(ResNode, 0); + InFlag = SDValue(ResNode, 1); + ReplaceUses(SDValue(Node, 0), Chain); + ReplaceUses(SDValue(Node, 1), InFlag); + return ResNode; + } + } + } + + // Select the default instruction + SDNode *ResNode = SelectCode(N); + + #ifndef NDEBUG + DOUT << std::string(Indent-2, ' ') << "=> "; + if (ResNode == NULL || ResNode == N.getNode()) + DEBUG(N.getNode()->dump(CurDAG)); + else + DEBUG(ResNode->dump(CurDAG)); + DOUT << "\n"; + Indent -= 2; + #endif + + return ResNode; +} + +/// createMipsISelDag - This pass converts a legalized DAG into a +/// MIPS-specific DAG, ready for instruction scheduling. +FunctionPass *llvm::createMipsISelDag(MipsTargetMachine &TM) { + return new MipsDAGToDAGISel(TM); +} diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp new file mode 100644 index 000000000000..9281940019a9 --- /dev/null +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -0,0 +1,1254 @@ +//===-- MipsISelLowering.cpp - Mips DAG Lowering Implementation -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interfaces that Mips uses to lower LLVM code into a +// selection DAG. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "mips-lower" + +#include "MipsISelLowering.h" +#include "MipsMachineFunction.h" +#include "MipsTargetMachine.h" +#include "MipsSubtarget.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/GlobalVariable.h" +#include "llvm/Intrinsics.h" +#include "llvm/CallingConv.h" +#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/Support/Debug.h" +using namespace llvm; + +const char *MipsTargetLowering:: +getTargetNodeName(unsigned Opcode) const +{ + switch (Opcode) + { + case MipsISD::JmpLink : return "MipsISD::JmpLink"; + case MipsISD::Hi : return "MipsISD::Hi"; + case MipsISD::Lo : return "MipsISD::Lo"; + case MipsISD::GPRel : return "MipsISD::GPRel"; + case MipsISD::Ret : return "MipsISD::Ret"; + case MipsISD::CMov : return "MipsISD::CMov"; + case MipsISD::SelectCC : return "MipsISD::SelectCC"; + case MipsISD::FPSelectCC : return "MipsISD::FPSelectCC"; + case MipsISD::FPBrcond : return "MipsISD::FPBrcond"; + case MipsISD::FPCmp : return "MipsISD::FPCmp"; + case MipsISD::FPRound : return "MipsISD::FPRound"; + default : return NULL; + } +} + +MipsTargetLowering:: +MipsTargetLowering(MipsTargetMachine &TM): TargetLowering(TM) +{ + Subtarget = &TM.getSubtarget(); + + // Mips does not have i1 type, so use i32 for + // setcc operations results (slt, sgt, ...). + setBooleanContents(ZeroOrOneBooleanContent); + + // JumpTable targets must use GOT when using PIC_ + setUsesGlobalOffsetTable(true); + + // Set up the register classes + addRegisterClass(MVT::i32, Mips::CPURegsRegisterClass); + addRegisterClass(MVT::f32, Mips::FGR32RegisterClass); + + // When dealing with single precision only, use libcalls + if (!Subtarget->isSingleFloat()) + if (!Subtarget->isFP64bit()) + addRegisterClass(MVT::f64, Mips::AFGR64RegisterClass); + + // Legal fp constants + addLegalFPImmediate(APFloat(+0.0f)); + + // Load extented operations for i1 types must be promoted + setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote); + setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); + setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); + + // Used by legalize types to correctly generate the setcc result. + // Without this, every float setcc comes with a AND/OR with the result, + // we don't want this, since the fpcmp result goes to a flag register, + // which is used implicitly by brcond and select operations. + AddPromotedToType(ISD::SETCC, MVT::i1, MVT::i32); + + // Mips Custom Operations + setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); + setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); + setOperationAction(ISD::RET, MVT::Other, Custom); + setOperationAction(ISD::JumpTable, MVT::i32, Custom); + setOperationAction(ISD::ConstantPool, MVT::i32, Custom); + setOperationAction(ISD::SELECT, MVT::f32, Custom); + setOperationAction(ISD::SELECT, MVT::i32, Custom); + setOperationAction(ISD::SETCC, MVT::f32, Custom); + setOperationAction(ISD::SETCC, MVT::f64, Custom); + setOperationAction(ISD::BRCOND, MVT::Other, Custom); + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); + setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); + + // We custom lower AND/OR to handle the case where the DAG contain 'ands/ors' + // with operands comming from setcc fp comparions. This is necessary since + // the result from these setcc are in a flag registers (FCR31). + setOperationAction(ISD::AND, MVT::i32, Custom); + setOperationAction(ISD::OR, MVT::i32, Custom); + + // Operations not directly supported by Mips. + setOperationAction(ISD::BR_JT, MVT::Other, Expand); + setOperationAction(ISD::BR_CC, MVT::Other, Expand); + setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); + setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand); + setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); + setOperationAction(ISD::CTPOP, MVT::i32, Expand); + setOperationAction(ISD::CTTZ, MVT::i32, Expand); + setOperationAction(ISD::ROTL, MVT::i32, Expand); + setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand); + setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand); + setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand); + setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); + + // We don't have line number support yet. + setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand); + setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand); + setOperationAction(ISD::DBG_LABEL, MVT::Other, Expand); + setOperationAction(ISD::EH_LABEL, MVT::Other, Expand); + + // Use the default for now + setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); + setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); + setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); + + if (Subtarget->isSingleFloat()) + setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); + + if (!Subtarget->hasSEInReg()) { + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); + } + + if (!Subtarget->hasBitCount()) + setOperationAction(ISD::CTLZ, MVT::i32, Expand); + + if (!Subtarget->hasSwap()) + setOperationAction(ISD::BSWAP, MVT::i32, Expand); + + setStackPointerRegisterToSaveRestore(Mips::SP); + computeRegisterProperties(); +} + + +MVT MipsTargetLowering::getSetCCResultType(MVT VT) const { + return MVT::i32; +} + + +SDValue MipsTargetLowering:: +LowerOperation(SDValue Op, SelectionDAG &DAG) +{ + switch (Op.getOpcode()) + { + case ISD::AND: return LowerANDOR(Op, DAG); + case ISD::BRCOND: return LowerBRCOND(Op, DAG); + case ISD::CALL: return LowerCALL(Op, DAG); + case ISD::ConstantPool: return LowerConstantPool(Op, DAG); + case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); + case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG); + case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG); + case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); + case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); + case ISD::JumpTable: return LowerJumpTable(Op, DAG); + case ISD::OR: return LowerANDOR(Op, DAG); + case ISD::RET: return LowerRET(Op, DAG); + case ISD::SELECT: return LowerSELECT(Op, DAG); + case ISD::SETCC: return LowerSETCC(Op, DAG); + } + return SDValue(); +} + +//===----------------------------------------------------------------------===// +// Lower helper functions +//===----------------------------------------------------------------------===// + +// AddLiveIn - This helper function adds the specified physical register to the +// MachineFunction as a live in value. It also creates a corresponding +// virtual register for it. +static unsigned +AddLiveIn(MachineFunction &MF, unsigned PReg, TargetRegisterClass *RC) +{ + assert(RC->contains(PReg) && "Not the correct regclass!"); + unsigned VReg = MF.getRegInfo().createVirtualRegister(RC); + MF.getRegInfo().addLiveIn(PReg, VReg); + return VReg; +} + +// A address must be loaded from a small section if its size is less than the +// small section size threshold. Data in this section must be addressed using +// gp_rel operator. +bool MipsTargetLowering::IsInSmallSection(unsigned Size) { + return (Size > 0 && (Size <= Subtarget->getSSectionThreshold())); +} + +// Discover if this global address can be placed into small data/bss section. +bool MipsTargetLowering::IsGlobalInSmallSection(GlobalValue *GV) +{ + const TargetData *TD = getTargetData(); + const GlobalVariable *GVA = dyn_cast(GV); + + if (!GVA) + return false; + + const Type *Ty = GV->getType()->getElementType(); + unsigned Size = TD->getTypeAllocSize(Ty); + + // if this is a internal constant string, there is a special + // section for it, but not in small data/bss. + if (GVA->hasInitializer() && GV->hasLocalLinkage()) { + Constant *C = GVA->getInitializer(); + const ConstantArray *CVA = dyn_cast(C); + if (CVA && CVA->isCString()) + return false; + } + + return IsInSmallSection(Size); +} + +// Get fp branch code (not opcode) from condition code. +static Mips::FPBranchCode GetFPBranchCodeFromCond(Mips::CondCode CC) { + if (CC >= Mips::FCOND_F && CC <= Mips::FCOND_NGT) + return Mips::BRANCH_T; + + if (CC >= Mips::FCOND_T && CC <= Mips::FCOND_GT) + return Mips::BRANCH_F; + + return Mips::BRANCH_INVALID; +} + +static unsigned FPBranchCodeToOpc(Mips::FPBranchCode BC) { + switch(BC) { + default: + assert(0 && "Unknown branch code"); + case Mips::BRANCH_T : return Mips::BC1T; + case Mips::BRANCH_F : return Mips::BC1F; + case Mips::BRANCH_TL : return Mips::BC1TL; + case Mips::BRANCH_FL : return Mips::BC1FL; + } +} + +static Mips::CondCode FPCondCCodeToFCC(ISD::CondCode CC) { + switch (CC) { + default: assert(0 && "Unknown fp condition code!"); + case ISD::SETEQ: + case ISD::SETOEQ: return Mips::FCOND_EQ; + case ISD::SETUNE: return Mips::FCOND_OGL; + case ISD::SETLT: + case ISD::SETOLT: return Mips::FCOND_OLT; + case ISD::SETGT: + case ISD::SETOGT: return Mips::FCOND_OGT; + case ISD::SETLE: + case ISD::SETOLE: return Mips::FCOND_OLE; + case ISD::SETGE: + case ISD::SETOGE: return Mips::FCOND_OGE; + case ISD::SETULT: return Mips::FCOND_ULT; + case ISD::SETULE: return Mips::FCOND_ULE; + case ISD::SETUGT: return Mips::FCOND_UGT; + case ISD::SETUGE: return Mips::FCOND_UGE; + case ISD::SETUO: return Mips::FCOND_UN; + case ISD::SETO: return Mips::FCOND_OR; + case ISD::SETNE: + case ISD::SETONE: return Mips::FCOND_NEQ; + case ISD::SETUEQ: return Mips::FCOND_UEQ; + } +} + +MachineBasicBlock * +MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *BB) const { + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + bool isFPCmp = false; + DebugLoc dl = MI->getDebugLoc(); + + switch (MI->getOpcode()) { + default: assert(false && "Unexpected instr type to insert"); + case Mips::Select_FCC: + case Mips::Select_FCC_S32: + case Mips::Select_FCC_D32: + isFPCmp = true; // FALL THROUGH + case Mips::Select_CC: + case Mips::Select_CC_S32: + case Mips::Select_CC_D32: { + // To "insert" a SELECT_CC instruction, we actually have to insert the + // diamond control-flow pattern. The incoming instruction knows the + // destination vreg to set, the condition code register to branch on, the + // true/false values to select between, and a branch opcode to use. + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction::iterator It = BB; + ++It; + + // thisMBB: + // ... + // TrueVal = ... + // setcc r1, r2, r3 + // bNE r1, r0, copy1MBB + // fallthrough --> copy0MBB + MachineBasicBlock *thisMBB = BB; + MachineFunction *F = BB->getParent(); + MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); + + // Emit the right instruction according to the type of the operands compared + if (isFPCmp) { + // Find the condiction code present in the setcc operation. + Mips::CondCode CC = (Mips::CondCode)MI->getOperand(4).getImm(); + // Get the branch opcode from the branch code. + unsigned Opc = FPBranchCodeToOpc(GetFPBranchCodeFromCond(CC)); + BuildMI(BB, dl, TII->get(Opc)).addMBB(sinkMBB); + } else + BuildMI(BB, dl, TII->get(Mips::BNE)).addReg(MI->getOperand(1).getReg()) + .addReg(Mips::ZERO).addMBB(sinkMBB); + + F->insert(It, copy0MBB); + F->insert(It, sinkMBB); + // Update machine-CFG edges by first adding all successors of the current + // block to the new block which will contain the Phi node for the select. + for(MachineBasicBlock::succ_iterator i = BB->succ_begin(), + e = BB->succ_end(); i != e; ++i) + sinkMBB->addSuccessor(*i); + // Next, remove all successors of the current block, and add the true + // and fallthrough blocks as its successors. + while(!BB->succ_empty()) + BB->removeSuccessor(BB->succ_begin()); + BB->addSuccessor(copy0MBB); + BB->addSuccessor(sinkMBB); + + // copy0MBB: + // %FalseValue = ... + // # fallthrough to sinkMBB + BB = copy0MBB; + + // Update machine-CFG edges + BB->addSuccessor(sinkMBB); + + // sinkMBB: + // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] + // ... + BB = sinkMBB; + BuildMI(BB, dl, TII->get(Mips::PHI), MI->getOperand(0).getReg()) + .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB) + .addReg(MI->getOperand(3).getReg()).addMBB(thisMBB); + + F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. + return BB; + } + } +} + +//===----------------------------------------------------------------------===// +// Misc Lower Operation implementation +//===----------------------------------------------------------------------===// + +SDValue MipsTargetLowering:: +LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) +{ + if (!Subtarget->isMips1()) + return Op; + + MachineFunction &MF = DAG.getMachineFunction(); + unsigned CCReg = AddLiveIn(MF, Mips::FCR31, Mips::CCRRegisterClass); + + SDValue Chain = DAG.getEntryNode(); + DebugLoc dl = Op.getDebugLoc(); + SDValue Src = Op.getOperand(0); + + // Set the condition register + SDValue CondReg = DAG.getCopyFromReg(Chain, dl, CCReg, MVT::i32); + CondReg = DAG.getCopyToReg(Chain, dl, Mips::AT, CondReg); + CondReg = DAG.getCopyFromReg(CondReg, dl, Mips::AT, MVT::i32); + + SDValue Cst = DAG.getConstant(3, MVT::i32); + SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i32, CondReg, Cst); + Cst = DAG.getConstant(2, MVT::i32); + SDValue Xor = DAG.getNode(ISD::XOR, dl, MVT::i32, Or, Cst); + + SDValue InFlag(0, 0); + CondReg = DAG.getCopyToReg(Chain, dl, Mips::FCR31, Xor, InFlag); + + // Emit the round instruction and bit convert to integer + SDValue Trunc = DAG.getNode(MipsISD::FPRound, dl, MVT::f32, + Src, CondReg.getValue(1)); + SDValue BitCvt = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Trunc); + return BitCvt; +} + +SDValue MipsTargetLowering:: +LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) +{ + SDValue Chain = Op.getOperand(0); + SDValue Size = Op.getOperand(1); + DebugLoc dl = Op.getDebugLoc(); + + // Get a reference from Mips stack pointer + SDValue StackPointer = DAG.getCopyFromReg(Chain, dl, Mips::SP, MVT::i32); + + // Subtract the dynamic size from the actual stack size to + // obtain the new stack size. + SDValue Sub = DAG.getNode(ISD::SUB, dl, MVT::i32, StackPointer, Size); + + // The Sub result contains the new stack start address, so it + // must be placed in the stack pointer register. + Chain = DAG.getCopyToReg(StackPointer.getValue(1), dl, Mips::SP, Sub); + + // This node always has two return values: a new stack pointer + // value and a chain + SDValue Ops[2] = { Sub, Chain }; + return DAG.getMergeValues(Ops, 2, dl); +} + +SDValue MipsTargetLowering:: +LowerANDOR(SDValue Op, SelectionDAG &DAG) +{ + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + DebugLoc dl = Op.getDebugLoc(); + + if (LHS.getOpcode() != MipsISD::FPCmp || RHS.getOpcode() != MipsISD::FPCmp) + return Op; + + SDValue True = DAG.getConstant(1, MVT::i32); + SDValue False = DAG.getConstant(0, MVT::i32); + + SDValue LSEL = DAG.getNode(MipsISD::FPSelectCC, dl, True.getValueType(), + LHS, True, False, LHS.getOperand(2)); + SDValue RSEL = DAG.getNode(MipsISD::FPSelectCC, dl, True.getValueType(), + RHS, True, False, RHS.getOperand(2)); + + return DAG.getNode(Op.getOpcode(), dl, MVT::i32, LSEL, RSEL); +} + +SDValue MipsTargetLowering:: +LowerBRCOND(SDValue Op, SelectionDAG &DAG) +{ + // The first operand is the chain, the second is the condition, the third is + // the block to branch to if the condition is true. + SDValue Chain = Op.getOperand(0); + SDValue Dest = Op.getOperand(2); + DebugLoc dl = Op.getDebugLoc(); + + if (Op.getOperand(1).getOpcode() != MipsISD::FPCmp) + return Op; + + SDValue CondRes = Op.getOperand(1); + SDValue CCNode = CondRes.getOperand(2); + Mips::CondCode CC = + (Mips::CondCode)cast(CCNode)->getZExtValue(); + SDValue BrCode = DAG.getConstant(GetFPBranchCodeFromCond(CC), MVT::i32); + + return DAG.getNode(MipsISD::FPBrcond, dl, Op.getValueType(), Chain, BrCode, + Dest, CondRes); +} + +SDValue MipsTargetLowering:: +LowerSETCC(SDValue Op, SelectionDAG &DAG) +{ + // The operands to this are the left and right operands to compare (ops #0, + // and #1) and the condition code to compare them with (op #2) as a + // CondCodeSDNode. + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + DebugLoc dl = Op.getDebugLoc(); + + ISD::CondCode CC = cast(Op.getOperand(2))->get(); + + return DAG.getNode(MipsISD::FPCmp, dl, Op.getValueType(), LHS, RHS, + DAG.getConstant(FPCondCCodeToFCC(CC), MVT::i32)); +} + +SDValue MipsTargetLowering:: +LowerSELECT(SDValue Op, SelectionDAG &DAG) +{ + SDValue Cond = Op.getOperand(0); + SDValue True = Op.getOperand(1); + SDValue False = Op.getOperand(2); + DebugLoc dl = Op.getDebugLoc(); + + // if the incomming condition comes from a integer compare, the select + // operation must be SelectCC or a conditional move if the subtarget + // supports it. + if (Cond.getOpcode() != MipsISD::FPCmp) { + if (Subtarget->hasCondMov() && !True.getValueType().isFloatingPoint()) + return Op; + return DAG.getNode(MipsISD::SelectCC, dl, True.getValueType(), + Cond, True, False); + } + + // if the incomming condition comes from fpcmp, the select + // operation must use FPSelectCC. + SDValue CCNode = Cond.getOperand(2); + return DAG.getNode(MipsISD::FPSelectCC, dl, True.getValueType(), + Cond, True, False, CCNode); +} + +SDValue MipsTargetLowering:: +LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) +{ + // FIXME there isn't actually debug info here + DebugLoc dl = Op.getDebugLoc(); + GlobalValue *GV = cast(Op)->getGlobal(); + SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32); + + if (!Subtarget->hasABICall()) { + SDVTList VTs = DAG.getVTList(MVT::i32); + SDValue Ops[] = { GA }; + // %gp_rel relocation + if (!isa(GV) && IsGlobalInSmallSection(GV)) { + SDValue GPRelNode = DAG.getNode(MipsISD::GPRel, dl, VTs, Ops, 1); + SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(MVT::i32); + return DAG.getNode(ISD::ADD, dl, MVT::i32, GOT, GPRelNode); + } + // %hi/%lo relocation + SDValue HiPart = DAG.getNode(MipsISD::Hi, dl, VTs, Ops, 1); + SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, GA); + return DAG.getNode(ISD::ADD, dl, MVT::i32, HiPart, Lo); + + } else { // Abicall relocations, TODO: make this cleaner. + SDValue ResNode = DAG.getLoad(MVT::i32, dl, + DAG.getEntryNode(), GA, NULL, 0); + // On functions and global targets not internal linked only + // a load from got/GP is necessary for PIC to work. + if (!GV->hasLocalLinkage() || isa(GV)) + return ResNode; + SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, GA); + return DAG.getNode(ISD::ADD, dl, MVT::i32, ResNode, Lo); + } + + assert(0 && "Dont know how to handle GlobalAddress"); + return SDValue(0,0); +} + +SDValue MipsTargetLowering:: +LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) +{ + assert(0 && "TLS not implemented for MIPS."); + return SDValue(); // Not reached +} + +SDValue MipsTargetLowering:: +LowerJumpTable(SDValue Op, SelectionDAG &DAG) +{ + SDValue ResNode; + SDValue HiPart; + // FIXME there isn't actually debug info here + DebugLoc dl = Op.getDebugLoc(); + + MVT PtrVT = Op.getValueType(); + JumpTableSDNode *JT = cast(Op); + SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); + + if (getTargetMachine().getRelocationModel() != Reloc::PIC_) { + SDVTList VTs = DAG.getVTList(MVT::i32); + SDValue Ops[] = { JTI }; + HiPart = DAG.getNode(MipsISD::Hi, dl, VTs, Ops, 1); + } else // Emit Load from Global Pointer + HiPart = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(), JTI, NULL, 0); + + SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, JTI); + ResNode = DAG.getNode(ISD::ADD, dl, MVT::i32, HiPart, Lo); + + return ResNode; +} + +SDValue MipsTargetLowering:: +LowerConstantPool(SDValue Op, SelectionDAG &DAG) +{ + SDValue ResNode; + ConstantPoolSDNode *N = cast(Op); + Constant *C = N->getConstVal(); + SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment()); + // FIXME there isn't actually debug info here + DebugLoc dl = Op.getDebugLoc(); + + // gp_rel relocation + // FIXME: we should reference the constant pool using small data sections, + // but the asm printer currently doens't support this feature without + // hacking it. This feature should come soon so we can uncomment the + // stuff below. + //if (!Subtarget->hasABICall() && + // IsInSmallSection(getTargetData()->getTypeAllocSize(C->getType()))) { + // SDValue GPRelNode = DAG.getNode(MipsISD::GPRel, MVT::i32, CP); + // SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(MVT::i32); + // ResNode = DAG.getNode(ISD::ADD, MVT::i32, GOT, GPRelNode); + //} else { // %hi/%lo relocation + SDValue HiPart = DAG.getNode(MipsISD::Hi, dl, MVT::i32, CP); + SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, CP); + ResNode = DAG.getNode(ISD::ADD, dl, MVT::i32, HiPart, Lo); + //} + + return ResNode; +} + +//===----------------------------------------------------------------------===// +// Calling Convention Implementation +// +// The lower operations present on calling convention works on this order: +// LowerCALL (virt regs --> phys regs, virt regs --> stack) +// LowerFORMAL_ARGUMENTS (phys --> virt regs, stack --> virt regs) +// LowerRET (virt regs --> phys regs) +// LowerCALL (phys regs --> virt regs) +// +//===----------------------------------------------------------------------===// + +#include "MipsGenCallingConv.inc" + +//===----------------------------------------------------------------------===// +// TODO: Implement a generic logic using tblgen that can support this. +// Mips O32 ABI rules: +// --- +// i32 - Passed in A0, A1, A2, A3 and stack +// f32 - Only passed in f32 registers if no int reg has been used yet to hold +// an argument. Otherwise, passed in A1, A2, A3 and stack. +// f64 - Only passed in two aliased f32 registers if no int reg has been used +// yet to hold an argument. Otherwise, use A2, A3 and stack. If A1 is +// not used, it must be shadowed. If only A3 is avaiable, shadow it and +// go to stack. +//===----------------------------------------------------------------------===// + +static bool CC_MipsO32(unsigned ValNo, MVT ValVT, + MVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State) { + + static const unsigned IntRegsSize=4, FloatRegsSize=2; + + static const unsigned IntRegs[] = { + Mips::A0, Mips::A1, Mips::A2, Mips::A3 + }; + static const unsigned F32Regs[] = { + Mips::F12, Mips::F14 + }; + static const unsigned F64Regs[] = { + Mips::D6, Mips::D7 + }; + + unsigned Reg=0; + unsigned UnallocIntReg = State.getFirstUnallocated(IntRegs, IntRegsSize); + bool IntRegUsed = (IntRegs[UnallocIntReg] != (unsigned (Mips::A0))); + + // Promote i8 and i16 + if (LocVT == MVT::i8 || LocVT == MVT::i16) { + LocVT = MVT::i32; + if (ArgFlags.isSExt()) + LocInfo = CCValAssign::SExt; + else if (ArgFlags.isZExt()) + LocInfo = CCValAssign::ZExt; + else + LocInfo = CCValAssign::AExt; + } + + if (ValVT == MVT::i32 || (ValVT == MVT::f32 && IntRegUsed)) { + Reg = State.AllocateReg(IntRegs, IntRegsSize); + IntRegUsed = true; + LocVT = MVT::i32; + } + + if (ValVT.isFloatingPoint() && !IntRegUsed) { + if (ValVT == MVT::f32) + Reg = State.AllocateReg(F32Regs, FloatRegsSize); + else + Reg = State.AllocateReg(F64Regs, FloatRegsSize); + } + + if (ValVT == MVT::f64 && IntRegUsed) { + if (UnallocIntReg != IntRegsSize) { + // If we hit register A3 as the first not allocated, we must + // mark it as allocated (shadow) and use the stack instead. + if (IntRegs[UnallocIntReg] != (unsigned (Mips::A3))) + Reg = Mips::A2; + for (;UnallocIntReg < IntRegsSize; ++UnallocIntReg) + State.AllocateReg(UnallocIntReg); + } + LocVT = MVT::i32; + } + + if (!Reg) { + unsigned SizeInBytes = ValVT.getSizeInBits() >> 3; + unsigned Offset = State.AllocateStack(SizeInBytes, SizeInBytes); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); + } else + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + + return false; // CC must always match +} + +//===----------------------------------------------------------------------===// +// CALL Calling Convention Implementation +//===----------------------------------------------------------------------===// + +/// LowerCALL - functions arguments are copied from virtual regs to +/// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted. +/// TODO: isVarArg, isTailCall. +SDValue MipsTargetLowering:: +LowerCALL(SDValue Op, SelectionDAG &DAG) +{ + MachineFunction &MF = DAG.getMachineFunction(); + + CallSDNode *TheCall = cast(Op.getNode()); + SDValue Chain = TheCall->getChain(); + SDValue Callee = TheCall->getCallee(); + bool isVarArg = TheCall->isVarArg(); + unsigned CC = TheCall->getCallingConv(); + DebugLoc dl = TheCall->getDebugLoc(); + + MachineFrameInfo *MFI = MF.getFrameInfo(); + + // Analyze operands of the call, assigning locations to each operand. + SmallVector ArgLocs; + CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); + + // To meet O32 ABI, Mips must always allocate 16 bytes on + // the stack (even if less than 4 are used as arguments) + if (Subtarget->isABI_O32()) { + int VTsize = MVT(MVT::i32).getSizeInBits()/8; + MFI->CreateFixedObject(VTsize, (VTsize*3)); + CCInfo.AnalyzeCallOperands(TheCall, CC_MipsO32); + } else + CCInfo.AnalyzeCallOperands(TheCall, CC_Mips); + + // Get a count of how many bytes are to be pushed on the stack. + unsigned NumBytes = CCInfo.getNextStackOffset(); + Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); + + // With EABI is it possible to have 16 args on registers. + SmallVector, 16> RegsToPass; + SmallVector MemOpChains; + + // First/LastArgStackLoc contains the first/last + // "at stack" argument location. + int LastArgStackLoc = 0; + unsigned FirstStackArgLoc = (Subtarget->isABI_EABI() ? 0 : 16); + + // Walk the register/memloc assignments, inserting copies/loads. + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + SDValue Arg = TheCall->getArg(i); + CCValAssign &VA = ArgLocs[i]; + + // Promote the value if needed. + switch (VA.getLocInfo()) { + default: assert(0 && "Unknown loc info!"); + case CCValAssign::Full: + if (Subtarget->isABI_O32() && VA.isRegLoc()) { + if (VA.getValVT() == MVT::f32 && VA.getLocVT() == MVT::i32) + Arg = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Arg); + if (VA.getValVT() == MVT::f64 && VA.getLocVT() == MVT::i32) { + Arg = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, Arg); + SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Arg, + DAG.getConstant(0, getPointerTy())); + SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Arg, + DAG.getConstant(1, getPointerTy())); + RegsToPass.push_back(std::make_pair(VA.getLocReg(), Lo)); + RegsToPass.push_back(std::make_pair(VA.getLocReg()+1, Hi)); + continue; + } + } + break; + case CCValAssign::SExt: + Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); + break; + case CCValAssign::ZExt: + Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); + break; + case CCValAssign::AExt: + Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); + break; + } + + // Arguments that can be passed on register must be kept at + // RegsToPass vector + if (VA.isRegLoc()) { + RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); + continue; + } + + // Register can't get to this point... + assert(VA.isMemLoc()); + + // Create the frame index object for this incoming parameter + // This guarantees that when allocating Local Area the firsts + // 16 bytes which are alwayes reserved won't be overwritten + // if O32 ABI is used. For EABI the first address is zero. + LastArgStackLoc = (FirstStackArgLoc + VA.getLocMemOffset()); + int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8, + LastArgStackLoc); + + SDValue PtrOff = DAG.getFrameIndex(FI,getPointerTy()); + + // emit ISD::STORE whichs stores the + // parameter value to a stack Location + MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0)); + } + + // Transform all store nodes into one single node because all store + // nodes are independent of each other. + if (!MemOpChains.empty()) + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &MemOpChains[0], MemOpChains.size()); + + // Build a sequence of copy-to-reg nodes chained together with token + // chain and flag operands which copy the outgoing args into registers. + // The InFlag in necessary since all emited instructions must be + // stuck together. + SDValue InFlag; + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { + Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, + RegsToPass[i].second, InFlag); + InFlag = Chain.getValue(1); + } + + // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every + // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol + // node so that legalize doesn't hack it. + if (GlobalAddressSDNode *G = dyn_cast(Callee)) + Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); + else if (ExternalSymbolSDNode *S = dyn_cast(Callee)) + Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); + + // MipsJmpLink = #chain, #target_address, #opt_in_flags... + // = Chain, Callee, Reg#1, Reg#2, ... + // + // Returns a chain & a flag for retval copy to use. + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); + SmallVector Ops; + Ops.push_back(Chain); + Ops.push_back(Callee); + + // Add argument registers to the end of the list so that they are + // known live into the call. + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) + Ops.push_back(DAG.getRegister(RegsToPass[i].first, + RegsToPass[i].second.getValueType())); + + if (InFlag.getNode()) + Ops.push_back(InFlag); + + Chain = DAG.getNode(MipsISD::JmpLink, dl, NodeTys, &Ops[0], Ops.size()); + InFlag = Chain.getValue(1); + + // Create the CALLSEQ_END node. + Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), + DAG.getIntPtrConstant(0, true), InFlag); + InFlag = Chain.getValue(1); + + // Create a stack location to hold GP when PIC is used. This stack + // location is used on function prologue to save GP and also after all + // emited CALL's to restore GP. + if (getTargetMachine().getRelocationModel() == Reloc::PIC_) { + // Function can have an arbitrary number of calls, so + // hold the LastArgStackLoc with the biggest offset. + int FI; + MipsFunctionInfo *MipsFI = MF.getInfo(); + if (LastArgStackLoc >= MipsFI->getGPStackOffset()) { + LastArgStackLoc = (!LastArgStackLoc) ? (16) : (LastArgStackLoc+4); + // Create the frame index only once. SPOffset here can be anything + // (this will be fixed on processFunctionBeforeFrameFinalized) + if (MipsFI->getGPStackOffset() == -1) { + FI = MFI->CreateFixedObject(4, 0); + MipsFI->setGPFI(FI); + } + MipsFI->setGPStackOffset(LastArgStackLoc); + } + + // Reload GP value. + FI = MipsFI->getGPFI(); + SDValue FIN = DAG.getFrameIndex(FI,getPointerTy()); + SDValue GPLoad = DAG.getLoad(MVT::i32, dl, Chain, FIN, NULL, 0); + Chain = GPLoad.getValue(1); + Chain = DAG.getCopyToReg(Chain, dl, DAG.getRegister(Mips::GP, MVT::i32), + GPLoad, SDValue(0,0)); + InFlag = Chain.getValue(1); + } + + // Handle result values, copying them out of physregs into vregs that we + // return. + return SDValue(LowerCallResult(Chain, InFlag, TheCall, CC, DAG), Op.getResNo()); +} + +/// LowerCallResult - Lower the result values of an ISD::CALL into the +/// appropriate copies out of appropriate physical registers. This assumes that +/// Chain/InFlag are the input chain/flag to use, and that TheCall is the call +/// being lowered. Returns a SDNode with the same number of values as the +/// ISD::CALL. +SDNode *MipsTargetLowering:: +LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall, + unsigned CallingConv, SelectionDAG &DAG) { + + bool isVarArg = TheCall->isVarArg(); + DebugLoc dl = TheCall->getDebugLoc(); + + // Assign locations to each value returned by this call. + SmallVector RVLocs; + CCState CCInfo(CallingConv, isVarArg, getTargetMachine(), RVLocs); + + CCInfo.AnalyzeCallResult(TheCall, RetCC_Mips); + SmallVector ResultVals; + + // Copy all of the result registers out of their specified physreg. + for (unsigned i = 0; i != RVLocs.size(); ++i) { + Chain = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(), + RVLocs[i].getValVT(), InFlag).getValue(1); + InFlag = Chain.getValue(2); + ResultVals.push_back(Chain.getValue(0)); + } + + ResultVals.push_back(Chain); + + // Merge everything together with a MERGE_VALUES node. + return DAG.getNode(ISD::MERGE_VALUES, dl, TheCall->getVTList(), + &ResultVals[0], ResultVals.size()).getNode(); +} + +//===----------------------------------------------------------------------===// +// FORMAL_ARGUMENTS Calling Convention Implementation +//===----------------------------------------------------------------------===// + +/// LowerFORMAL_ARGUMENTS - transform physical registers into +/// virtual registers and generate load operations for +/// arguments places on the stack. +/// TODO: isVarArg +SDValue MipsTargetLowering:: +LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) +{ + SDValue Root = Op.getOperand(0); + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + MipsFunctionInfo *MipsFI = MF.getInfo(); + DebugLoc dl = Op.getDebugLoc(); + + bool isVarArg = cast(Op.getOperand(2))->getZExtValue() != 0; + unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv(); + + unsigned StackReg = MF.getTarget().getRegisterInfo()->getFrameRegister(MF); + + // GP must be live into PIC and non-PIC call target. + AddLiveIn(MF, Mips::GP, Mips::CPURegsRegisterClass); + + // Assign locations to all of the incoming arguments. + SmallVector ArgLocs; + CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); + + if (Subtarget->isABI_O32()) + CCInfo.AnalyzeFormalArguments(Op.getNode(), CC_MipsO32); + else + CCInfo.AnalyzeFormalArguments(Op.getNode(), CC_Mips); + + SmallVector ArgValues; + SDValue StackPtr; + + unsigned FirstStackArgLoc = (Subtarget->isABI_EABI() ? 0 : 16); + + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + + // Arguments stored on registers + if (VA.isRegLoc()) { + MVT RegVT = VA.getLocVT(); + TargetRegisterClass *RC = 0; + + if (RegVT == MVT::i32) + RC = Mips::CPURegsRegisterClass; + else if (RegVT == MVT::f32) + RC = Mips::FGR32RegisterClass; + else if (RegVT == MVT::f64) { + if (!Subtarget->isSingleFloat()) + RC = Mips::AFGR64RegisterClass; + } else + assert(0 && "RegVT not supported by FORMAL_ARGUMENTS Lowering"); + + // Transform the arguments stored on + // physical registers into virtual ones + unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC); + SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, RegVT); + + // If this is an 8 or 16-bit value, it has been passed promoted + // to 32 bits. Insert an assert[sz]ext to capture this, then + // truncate to the right size. + if (VA.getLocInfo() != CCValAssign::Full) { + unsigned Opcode = 0; + if (VA.getLocInfo() == CCValAssign::SExt) + Opcode = ISD::AssertSext; + else if (VA.getLocInfo() == CCValAssign::ZExt) + Opcode = ISD::AssertZext; + if (Opcode) + ArgValue = DAG.getNode(Opcode, dl, RegVT, ArgValue, + DAG.getValueType(VA.getValVT())); + ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); + } + + // Handle O32 ABI cases: i32->f32 and (i32,i32)->f64 + if (Subtarget->isABI_O32()) { + if (RegVT == MVT::i32 && VA.getValVT() == MVT::f32) + ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, ArgValue); + if (RegVT == MVT::i32 && VA.getValVT() == MVT::f64) { + unsigned Reg2 = AddLiveIn(DAG.getMachineFunction(), + VA.getLocReg()+1, RC); + SDValue ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg2, RegVT); + SDValue Hi = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, ArgValue); + SDValue Lo = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, ArgValue2); + ArgValue = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::f64, Lo, Hi); + } + } + + ArgValues.push_back(ArgValue); + + // To meet ABI, when VARARGS are passed on registers, the registers + // must have their values written to the caller stack frame. + if ((isVarArg) && (Subtarget->isABI_O32())) { + if (StackPtr.getNode() == 0) + StackPtr = DAG.getRegister(StackReg, getPointerTy()); + + // The stack pointer offset is relative to the caller stack frame. + // Since the real stack size is unknown here, a negative SPOffset + // is used so there's a way to adjust these offsets when the stack + // size get known (on EliminateFrameIndex). A dummy SPOffset is + // used instead of a direct negative address (which is recorded to + // be used on emitPrologue) to avoid mis-calc of the first stack + // offset on PEI::calculateFrameObjectOffsets. + // Arguments are always 32-bit. + int FI = MFI->CreateFixedObject(4, 0); + MipsFI->recordStoreVarArgsFI(FI, -(4+(i*4))); + SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy()); + + // emit ISD::STORE whichs stores the + // parameter value to a stack Location + ArgValues.push_back(DAG.getStore(Root, dl, ArgValue, PtrOff, NULL, 0)); + } + + } else { // VA.isRegLoc() + + // sanity check + assert(VA.isMemLoc()); + + // The stack pointer offset is relative to the caller stack frame. + // Since the real stack size is unknown here, a negative SPOffset + // is used so there's a way to adjust these offsets when the stack + // size get known (on EliminateFrameIndex). A dummy SPOffset is + // used instead of a direct negative address (which is recorded to + // be used on emitPrologue) to avoid mis-calc of the first stack + // offset on PEI::calculateFrameObjectOffsets. + // Arguments are always 32-bit. + unsigned ArgSize = VA.getLocVT().getSizeInBits()/8; + int FI = MFI->CreateFixedObject(ArgSize, 0); + MipsFI->recordLoadArgsFI(FI, -(ArgSize+ + (FirstStackArgLoc + VA.getLocMemOffset()))); + + // Create load nodes to retrieve arguments from the stack + SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); + ArgValues.push_back(DAG.getLoad(VA.getValVT(), dl, Root, FIN, NULL, 0)); + } + } + + // The mips ABIs for returning structs by value requires that we copy + // the sret argument into $v0 for the return. Save the argument into + // a virtual register so that we can access it from the return points. + if (DAG.getMachineFunction().getFunction()->hasStructRetAttr()) { + unsigned Reg = MipsFI->getSRetReturnReg(); + if (!Reg) { + Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i32)); + MipsFI->setSRetReturnReg(Reg); + } + SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, ArgValues[0]); + Root = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Root); + } + + ArgValues.push_back(Root); + + // Return the new list of results. + return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getNode()->getVTList(), + &ArgValues[0], ArgValues.size()).getValue(Op.getResNo()); +} + +//===----------------------------------------------------------------------===// +// Return Value Calling Convention Implementation +//===----------------------------------------------------------------------===// + +SDValue MipsTargetLowering:: +LowerRET(SDValue Op, SelectionDAG &DAG) +{ + // CCValAssign - represent the assignment of + // the return value to a location + SmallVector RVLocs; + unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv(); + bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); + DebugLoc dl = Op.getDebugLoc(); + + // CCState - Info about the registers and stack slot. + CCState CCInfo(CC, isVarArg, getTargetMachine(), RVLocs); + + // Analize return values of ISD::RET + CCInfo.AnalyzeReturn(Op.getNode(), RetCC_Mips); + + // If this is the first return lowered for this function, add + // the regs to the liveout set for the function. + if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { + for (unsigned i = 0; i != RVLocs.size(); ++i) + if (RVLocs[i].isRegLoc()) + DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); + } + + // The chain is always operand #0 + SDValue Chain = Op.getOperand(0); + SDValue Flag; + + // Copy the result values into the output registers. + for (unsigned i = 0; i != RVLocs.size(); ++i) { + CCValAssign &VA = RVLocs[i]; + assert(VA.isRegLoc() && "Can only return in registers!"); + + // ISD::RET => ret chain, (regnum1,val1), ... + // So i*2+1 index only the regnums + Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), + Op.getOperand(i*2+1), Flag); + + // guarantee that all emitted copies are + // stuck together, avoiding something bad + Flag = Chain.getValue(1); + } + + // The mips ABIs for returning structs by value requires that we copy + // the sret argument into $v0 for the return. We saved the argument into + // a virtual register in the entry block, so now we copy the value out + // and into $v0. + if (DAG.getMachineFunction().getFunction()->hasStructRetAttr()) { + MachineFunction &MF = DAG.getMachineFunction(); + MipsFunctionInfo *MipsFI = MF.getInfo(); + unsigned Reg = MipsFI->getSRetReturnReg(); + + if (!Reg) + assert(0 && "sret virtual register not created in the entry block"); + SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy()); + + Chain = DAG.getCopyToReg(Chain, dl, Mips::V0, Val, Flag); + Flag = Chain.getValue(1); + } + + // Return on Mips is always a "jr $ra" + if (Flag.getNode()) + return DAG.getNode(MipsISD::Ret, dl, MVT::Other, + Chain, DAG.getRegister(Mips::RA, MVT::i32), Flag); + else // Return Void + return DAG.getNode(MipsISD::Ret, dl, MVT::Other, + Chain, DAG.getRegister(Mips::RA, MVT::i32)); +} + +//===----------------------------------------------------------------------===// +// Mips Inline Assembly Support +//===----------------------------------------------------------------------===// + +/// getConstraintType - Given a constraint letter, return the type of +/// constraint it is for this target. +MipsTargetLowering::ConstraintType MipsTargetLowering:: +getConstraintType(const std::string &Constraint) const +{ + // Mips specific constrainy + // GCC config/mips/constraints.md + // + // 'd' : An address register. Equivalent to r + // unless generating MIPS16 code. + // 'y' : Equivalent to r; retained for + // backwards compatibility. + // 'f' : Floating Point registers. + if (Constraint.size() == 1) { + switch (Constraint[0]) { + default : break; + case 'd': + case 'y': + case 'f': + return C_RegisterClass; + break; + } + } + return TargetLowering::getConstraintType(Constraint); +} + +/// getRegClassForInlineAsmConstraint - Given a constraint letter (e.g. "r"), +/// return a list of registers that can be used to satisfy the constraint. +/// This should only be used for C_RegisterClass constraints. +std::pair MipsTargetLowering:: +getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const +{ + if (Constraint.size() == 1) { + switch (Constraint[0]) { + case 'r': + return std::make_pair(0U, Mips::CPURegsRegisterClass); + case 'f': + if (VT == MVT::f32) + return std::make_pair(0U, Mips::FGR32RegisterClass); + if (VT == MVT::f64) + if ((!Subtarget->isSingleFloat()) && (!Subtarget->isFP64bit())) + return std::make_pair(0U, Mips::AFGR64RegisterClass); + } + } + return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); +} + +/// Given a register class constraint, like 'r', if this corresponds directly +/// to an LLVM register class, return a register of 0 and the register class +/// pointer. +std::vector MipsTargetLowering:: +getRegClassForInlineAsmConstraint(const std::string &Constraint, + MVT VT) const +{ + if (Constraint.size() != 1) + return std::vector(); + + switch (Constraint[0]) { + default : break; + case 'r': + // GCC Mips Constraint Letters + case 'd': + case 'y': + return make_vector(Mips::T0, Mips::T1, Mips::T2, Mips::T3, + Mips::T4, Mips::T5, Mips::T6, Mips::T7, Mips::S0, Mips::S1, + Mips::S2, Mips::S3, Mips::S4, Mips::S5, Mips::S6, Mips::S7, + Mips::T8, 0); + + case 'f': + if (VT == MVT::f32) { + if (Subtarget->isSingleFloat()) + return make_vector(Mips::F2, Mips::F3, Mips::F4, Mips::F5, + Mips::F6, Mips::F7, Mips::F8, Mips::F9, Mips::F10, Mips::F11, + Mips::F20, Mips::F21, Mips::F22, Mips::F23, Mips::F24, + Mips::F25, Mips::F26, Mips::F27, Mips::F28, Mips::F29, + Mips::F30, Mips::F31, 0); + else + return make_vector(Mips::F2, Mips::F4, Mips::F6, Mips::F8, + Mips::F10, Mips::F20, Mips::F22, Mips::F24, Mips::F26, + Mips::F28, Mips::F30, 0); + } + + if (VT == MVT::f64) + if ((!Subtarget->isSingleFloat()) && (!Subtarget->isFP64bit())) + return make_vector(Mips::D1, Mips::D2, Mips::D3, Mips::D4, + Mips::D5, Mips::D10, Mips::D11, Mips::D12, Mips::D13, + Mips::D14, Mips::D15, 0); + } + return std::vector(); +} + +bool +MipsTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { + // The Mips target isn't yet aware of offsets. + return false; +} diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h new file mode 100644 index 000000000000..55cd6eadd096 --- /dev/null +++ b/lib/Target/Mips/MipsISelLowering.h @@ -0,0 +1,130 @@ +//===-- MipsISelLowering.h - Mips DAG Lowering Interface --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interfaces that Mips uses to lower LLVM code into a +// selection DAG. +// +//===----------------------------------------------------------------------===// + +#ifndef MipsISELLOWERING_H +#define MipsISELLOWERING_H + +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/Target/TargetLowering.h" +#include "Mips.h" +#include "MipsSubtarget.h" + +namespace llvm { + namespace MipsISD { + enum NodeType { + // Start the numbering from where ISD NodeType finishes. + FIRST_NUMBER = ISD::BUILTIN_OP_END, + + // Jump and link (call) + JmpLink, + + // Get the Higher 16 bits from a 32-bit immediate + // No relation with Mips Hi register + Hi, + + // Get the Lower 16 bits from a 32-bit immediate + // No relation with Mips Lo register + Lo, + + // Handle gp_rel (small data/bss sections) relocation. + GPRel, + + // Conditional Move + CMov, + + // Select CC Pseudo Instruction + SelectCC, + + // Floating Point Select CC Pseudo Instruction + FPSelectCC, + + // Floating Point Branch Conditional + FPBrcond, + + // Floating Point Compare + FPCmp, + + // Floating Point Rounding + FPRound, + + // Return + Ret + }; + } + + //===--------------------------------------------------------------------===// + // TargetLowering Implementation + //===--------------------------------------------------------------------===// + class MipsTargetLowering : public TargetLowering + { + // FrameIndex for return slot. + int ReturnAddrIndex; + public: + + explicit MipsTargetLowering(MipsTargetMachine &TM); + + /// LowerOperation - Provide custom lowering hooks for some operations. + virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG); + + /// getTargetNodeName - This method returns the name of a target specific + // DAG node. + virtual const char *getTargetNodeName(unsigned Opcode) const; + + /// getSetCCResultType - get the ISD::SETCC result ValueType + MVT getSetCCResultType(MVT VT) const; + + private: + // Subtarget Info + const MipsSubtarget *Subtarget; + + // Lower Operand helpers + SDNode *LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall, + unsigned CallingConv, SelectionDAG &DAG); + bool IsGlobalInSmallSection(GlobalValue *GV); + bool IsInSmallSection(unsigned Size); + + // Lower Operand specifics + SDValue LowerANDOR(SDValue Op, SelectionDAG &DAG); + SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG); + SDValue LowerCALL(SDValue Op, SelectionDAG &DAG); + SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG); + SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG); + SDValue LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG); + SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG); + SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG); + SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG); + SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG); + SDValue LowerRET(SDValue Op, SelectionDAG &DAG); + SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG); + SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG); + + virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *MBB) const; + + // Inline asm support + ConstraintType getConstraintType(const std::string &Constraint) const; + + std::pair + getRegForInlineAsmConstraint(const std::string &Constraint, + MVT VT) const; + + std::vector + getRegClassForInlineAsmConstraint(const std::string &Constraint, + MVT VT) const; + + virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; + }; +} + +#endif // MipsISELLOWERING_H diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td new file mode 100644 index 000000000000..b6a6d2f5c052 --- /dev/null +++ b/lib/Target/Mips/MipsInstrFPU.td @@ -0,0 +1,304 @@ +//===- MipsInstrFPU.td - Mips FPU Instruction Information -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Mips implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Floating Point Instructions +// ------------------------ +// * 64bit fp: +// - 32 64-bit registers (default mode) +// - 16 even 32-bit registers (32-bit compatible mode) for +// single and double access. +// * 32bit fp: +// - 16 even 32-bit registers - single and double (aliased) +// - 32 32-bit registers (within single-only mode) +//===----------------------------------------------------------------------===// + +// Floating Point Compare and Branch +def SDT_MipsFPBrcond : SDTypeProfile<0, 3, [SDTCisSameAs<0, 2>, SDTCisInt<0>, + SDTCisVT<1, OtherVT>]>; +def SDT_MipsFPCmp : SDTypeProfile<0, 3, [SDTCisSameAs<0, 1>, SDTCisFP<0>, + SDTCisInt<2>]>; +def SDT_MipsFPSelectCC : SDTypeProfile<1, 4, [SDTCisInt<1>, SDTCisInt<4>, + SDTCisSameAs<0, 2>, SDTCisSameAs<2, 3>]>; + +def MipsFPRound : SDNode<"MipsISD::FPRound", SDTFPRoundOp, [SDNPOptInFlag]>; +def MipsFPBrcond : SDNode<"MipsISD::FPBrcond", SDT_MipsFPBrcond, + [SDNPHasChain]>; +def MipsFPCmp : SDNode<"MipsISD::FPCmp", SDT_MipsFPCmp>; +def MipsFPSelectCC : SDNode<"MipsISD::FPSelectCC", SDT_MipsFPSelectCC>; + +// Operand for printing out a condition code. +let PrintMethod = "printFCCOperand" in + def condcode : Operand; + +//===----------------------------------------------------------------------===// +// Feature predicates. +//===----------------------------------------------------------------------===// + +def In32BitMode : Predicate<"!Subtarget.isFP64bit()">; +def IsSingleFloat : Predicate<"Subtarget.isSingleFloat()">; +def IsNotSingleFloat : Predicate<"!Subtarget.isSingleFloat()">; + +//===----------------------------------------------------------------------===// +// Instruction Class Templates +// +// A set of multiclasses is used to address the register usage. +// +// S32 - single precision in 16 32bit even fp registers +// single precision in 32 32bit fp registers in SingleOnly mode +// S64 - single precision in 32 64bit fp registers (In64BitMode) +// D32 - double precision in 16 32bit even fp registers +// D64 - double precision in 32 64bit fp registers (In64BitMode) +// +// Only S32 and D32 are supported right now. +//===----------------------------------------------------------------------===// + +multiclass FFR1_1 funct, string asmstr> +{ + def _S32 : FFR<0x11, funct, 0x0, (outs FGR32:$fd), (ins FGR32:$fs), + !strconcat(asmstr, ".s $fd, $fs"), []>; + + def _D32 : FFR<0x11, funct, 0x1, (outs FGR32:$fd), (ins AFGR64:$fs), + !strconcat(asmstr, ".d $fd, $fs"), []>, Requires<[In32BitMode]>; +} + +multiclass FFR1_2 funct, string asmstr, SDNode FOp> +{ + def _S32 : FFR<0x11, funct, 0x0, (outs FGR32:$fd), (ins FGR32:$fs), + !strconcat(asmstr, ".s $fd, $fs"), + [(set FGR32:$fd, (FOp FGR32:$fs))]>; + + def _D32 : FFR<0x11, funct, 0x1, (outs AFGR64:$fd), (ins AFGR64:$fs), + !strconcat(asmstr, ".d $fd, $fs"), + [(set AFGR64:$fd, (FOp AFGR64:$fs))]>, Requires<[In32BitMode]>; +} + +class FFR1_3 funct, bits<5> fmt, RegisterClass RcSrc, + RegisterClass RcDst, string asmstr>: + FFR<0x11, funct, fmt, (outs RcSrc:$fd), (ins RcDst:$fs), + !strconcat(asmstr, " $fd, $fs"), []>; + + +multiclass FFR1_4 funct, string asmstr, SDNode FOp> { + def _S32 : FFR<0x11, funct, 0x0, (outs FGR32:$fd), + (ins FGR32:$fs, FGR32:$ft), + !strconcat(asmstr, ".s $fd, $fs, $ft"), + [(set FGR32:$fd, (FOp FGR32:$fs, FGR32:$ft))]>; + + def _D32 : FFR<0x11, funct, 0x1, (outs AFGR64:$fd), + (ins AFGR64:$fs, AFGR64:$ft), + !strconcat(asmstr, ".d $fd, $fs, $ft"), + [(set AFGR64:$fd, (FOp AFGR64:$fs, AFGR64:$ft))]>, + Requires<[In32BitMode]>; +} + +//===----------------------------------------------------------------------===// +// Floating Point Instructions +//===----------------------------------------------------------------------===// + +let ft = 0 in { + defm FLOOR_W : FFR1_1<0b001111, "floor.w">; + defm CEIL_W : FFR1_1<0b001110, "ceil.w">; + defm ROUND_W : FFR1_1<0b001100, "round.w">; + defm TRUNC_W : FFR1_1<0b001101, "trunc.w">; + defm CVTW : FFR1_1<0b100100, "cvt.w">; + defm FMOV : FFR1_1<0b000110, "mov">; + + defm FABS : FFR1_2<0b000101, "abs", fabs>; + defm FNEG : FFR1_2<0b000111, "neg", fneg>; + defm FSQRT : FFR1_2<0b000100, "sqrt", fsqrt>; + + /// Convert to Single Precison + def CVTS_W32 : FFR1_3<0b100000, 0x2, FGR32, FGR32, "cvt.s.w">; + + let Predicates = [IsNotSingleFloat] in { + /// Ceil to long signed integer + def CEIL_LS : FFR1_3<0b001010, 0x0, FGR32, FGR32, "ceil.l">; + def CEIL_LD : FFR1_3<0b001010, 0x1, AFGR64, AFGR64, "ceil.l">; + + /// Round to long signed integer + def ROUND_LS : FFR1_3<0b001000, 0x0, FGR32, FGR32, "round.l">; + def ROUND_LD : FFR1_3<0b001000, 0x1, AFGR64, AFGR64, "round.l">; + + /// Floor to long signed integer + def FLOOR_LS : FFR1_3<0b001011, 0x0, FGR32, FGR32, "floor.l">; + def FLOOR_LD : FFR1_3<0b001011, 0x1, AFGR64, AFGR64, "floor.l">; + + /// Trunc to long signed integer + def TRUNC_LS : FFR1_3<0b001001, 0x0, FGR32, FGR32, "trunc.l">; + def TRUNC_LD : FFR1_3<0b001001, 0x1, AFGR64, AFGR64, "trunc.l">; + + /// Convert to long signed integer + def CVTL_S : FFR1_3<0b100101, 0x0, FGR32, FGR32, "cvt.l">; + def CVTL_D : FFR1_3<0b100101, 0x1, AFGR64, AFGR64, "cvt.l">; + + /// Convert to Double Precison + def CVTD_S32 : FFR1_3<0b100001, 0x0, AFGR64, FGR32, "cvt.d.s">; + def CVTD_W32 : FFR1_3<0b100001, 0x2, AFGR64, FGR32, "cvt.d.w">; + def CVTD_L32 : FFR1_3<0b100001, 0x3, AFGR64, AFGR64, "cvt.d.l">; + + /// Convert to Single Precison + def CVTS_D32 : FFR1_3<0b100000, 0x1, FGR32, AFGR64, "cvt.s.d">; + def CVTS_L32 : FFR1_3<0b100000, 0x3, FGR32, AFGR64, "cvt.s.l">; + } +} + +// The odd-numbered registers are only referenced when doing loads, +// stores, and moves between floating-point and integer registers. +// When defining instructions, we reference all 32-bit registers, +// regardless of register aliasing. +let fd = 0 in { + /// Move Control Registers From/To CPU Registers + def CFC1 : FFR<0x11, 0x0, 0x2, (outs CPURegs:$rt), (ins CCR:$fs), + "cfc1 $rt, $fs", []>; + + def CTC1 : FFR<0x11, 0x0, 0x6, (outs CCR:$rt), (ins CPURegs:$fs), + "ctc1 $fs, $rt", []>; + + def MFC1 : FFR<0x11, 0x00, 0x00, (outs CPURegs:$rt), (ins FGR32:$fs), + "mfc1 $rt, $fs", []>; + + def MTC1 : FFR<0x11, 0x00, 0x04, (outs FGR32:$fs), (ins CPURegs:$rt), + "mtc1 $rt, $fs", []>; +} + +/// Floating Point Memory Instructions +let Predicates = [IsNotSingleFloat] in { + def LDC1 : FFI<0b110101, (outs AFGR64:$ft), (ins mem:$addr), + "ldc1 $ft, $addr", [(set AFGR64:$ft, (load addr:$addr))]>; + + def SDC1 : FFI<0b111101, (outs), (ins AFGR64:$ft, mem:$addr), + "sdc1 $ft, $addr", [(store AFGR64:$ft, addr:$addr)]>; +} + +// LWC1 and SWC1 can always be emited with odd registers. +def LWC1 : FFI<0b110001, (outs FGR32:$ft), (ins mem:$addr), "lwc1 $ft, $addr", + [(set FGR32:$ft, (load addr:$addr))]>; +def SWC1 : FFI<0b111001, (outs), (ins FGR32:$ft, mem:$addr), "swc1 $ft, $addr", + [(store FGR32:$ft, addr:$addr)]>; + +/// Floating-point Aritmetic +defm FADD : FFR1_4<0x10, "add", fadd>; +defm FDIV : FFR1_4<0x03, "div", fdiv>; +defm FMUL : FFR1_4<0x02, "mul", fmul>; +defm FSUB : FFR1_4<0x01, "sub", fsub>; + +//===----------------------------------------------------------------------===// +// Floating Point Branch Codes +//===----------------------------------------------------------------------===// +// Mips branch codes. These correspond to condcode in MipsInstrInfo.h. +// They must be kept in synch. +def MIPS_BRANCH_F : PatLeaf<(i32 0)>; +def MIPS_BRANCH_T : PatLeaf<(i32 1)>; +def MIPS_BRANCH_FL : PatLeaf<(i32 2)>; +def MIPS_BRANCH_TL : PatLeaf<(i32 3)>; + +/// Floating Point Branch of False/True (Likely) +let isBranch=1, isTerminator=1, hasDelaySlot=1, base=0x8, Uses=[FCR31] in { + class FBRANCH : FFI<0x11, (outs), + (ins brtarget:$dst), !strconcat(asmstr, " $dst"), + [(MipsFPBrcond op, bb:$dst, FCR31)]>; +} +def BC1F : FBRANCH; +def BC1T : FBRANCH; +def BC1FL : FBRANCH; +def BC1TL : FBRANCH; + +//===----------------------------------------------------------------------===// +// Floating Point Flag Conditions +//===----------------------------------------------------------------------===// +// Mips condition codes. They must correspond to condcode in MipsInstrInfo.h. +// They must be kept in synch. +def MIPS_FCOND_F : PatLeaf<(i32 0)>; +def MIPS_FCOND_UN : PatLeaf<(i32 1)>; +def MIPS_FCOND_EQ : PatLeaf<(i32 2)>; +def MIPS_FCOND_UEQ : PatLeaf<(i32 3)>; +def MIPS_FCOND_OLT : PatLeaf<(i32 4)>; +def MIPS_FCOND_ULT : PatLeaf<(i32 5)>; +def MIPS_FCOND_OLE : PatLeaf<(i32 6)>; +def MIPS_FCOND_ULE : PatLeaf<(i32 7)>; +def MIPS_FCOND_SF : PatLeaf<(i32 8)>; +def MIPS_FCOND_NGLE : PatLeaf<(i32 9)>; +def MIPS_FCOND_SEQ : PatLeaf<(i32 10)>; +def MIPS_FCOND_NGL : PatLeaf<(i32 11)>; +def MIPS_FCOND_LT : PatLeaf<(i32 12)>; +def MIPS_FCOND_NGE : PatLeaf<(i32 13)>; +def MIPS_FCOND_LE : PatLeaf<(i32 14)>; +def MIPS_FCOND_NGT : PatLeaf<(i32 15)>; + +/// Floating Point Compare +let hasDelaySlot = 1, Defs=[FCR31] in { + def FCMP_S32 : FCC<0x0, (outs), (ins FGR32:$fs, FGR32:$ft, condcode:$cc), + "c.$cc.s $fs, $ft", [(MipsFPCmp FGR32:$fs, FGR32:$ft, imm:$cc), + (implicit FCR31)]>; + + def FCMP_D32 : FCC<0x1, (outs), (ins AFGR64:$fs, AFGR64:$ft, condcode:$cc), + "c.$cc.d $fs, $ft", [(MipsFPCmp AFGR64:$fs, AFGR64:$ft, imm:$cc), + (implicit FCR31)]>, Requires<[In32BitMode]>; +} + +//===----------------------------------------------------------------------===// +// Floating Point Pseudo-Instructions +//===----------------------------------------------------------------------===// + +// For some explanation, see Select_CC at MipsInstrInfo.td. We also embedd a +// condiciton code to enable easy handling by the Custom Inserter. +let usesCustomDAGSchedInserter = 1, Uses=[FCR31] in { + class PseudoFPSelCC : + MipsPseudo<(outs RC:$dst), + (ins CPURegs:$CmpRes, RC:$T, RC:$F, condcode:$cc), asmstr, + [(set RC:$dst, (MipsFPSelectCC CPURegs:$CmpRes, RC:$T, RC:$F, + imm:$cc))]>; +} + +// The values to be selected are fp but the condition test is with integers. +def Select_CC_S32 : PseudoSelCC; +def Select_CC_D32 : PseudoSelCC, + Requires<[In32BitMode]>; + +// The values to be selected are int but the condition test is done with fp. +def Select_FCC : PseudoFPSelCC; + +// The values to be selected and the condition test is done with fp. +def Select_FCC_S32 : PseudoFPSelCC; +def Select_FCC_D32 : PseudoFPSelCC, + Requires<[In32BitMode]>; + +def MOVCCRToCCR : MipsPseudo<(outs CCR:$dst), (ins CCR:$src), + "# MOVCCRToCCR", []>; + +//===----------------------------------------------------------------------===// +// Floating Point Patterns +//===----------------------------------------------------------------------===// +def fpimm0 : PatLeaf<(fpimm), [{ + return N->isExactlyValue(+0.0); +}]>; + +def : Pat<(f32 fpimm0), (MTC1 ZERO)>; + +def : Pat<(f32 (sint_to_fp CPURegs:$src)), (CVTS_W32 (MTC1 CPURegs:$src))>; +def : Pat<(f64 (sint_to_fp CPURegs:$src)), (CVTD_W32 (MTC1 CPURegs:$src))>; + +def : Pat<(i32 (fp_to_sint FGR32:$src)), (MFC1 (TRUNC_W_S32 FGR32:$src))>; + +def : Pat<(i32 (bitconvert FGR32:$src)), (MFC1 FGR32:$src)>; +def : Pat<(f32 (bitconvert CPURegs:$src)), (MTC1 CPURegs:$src)>; + +let Predicates = [In32BitMode] in { + def : Pat<(f32 (fround AFGR64:$src)), (CVTS_D32 AFGR64:$src)>; + def : Pat<(f64 (fextend FGR32:$src)), (CVTD_S32 FGR32:$src)>; +} + +// MipsFPRound is only emitted for MipsI targets. +def : Pat<(f32 (MipsFPRound AFGR64:$src)), (CVTW_D32 AFGR64:$src)>; + diff --git a/lib/Target/Mips/MipsInstrFormats.td b/lib/Target/Mips/MipsInstrFormats.td new file mode 100644 index 000000000000..0853272f7280 --- /dev/null +++ b/lib/Target/Mips/MipsInstrFormats.td @@ -0,0 +1,182 @@ +//===- MipsRegisterInfo.td - Mips Register defs -----------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Describe MIPS instructions format +// +// CPU INSTRUCTION FORMATS +// +// opcode - operation code. +// rs - src reg. +// rt - dst reg (on a 2 regs instr) or src reg (on a 3 reg instr). +// rd - dst reg, only used on 3 regs instr. +// shamt - only used on shift instructions, contains the shift amount. +// funct - combined with opcode field give us an operation code. +// +//===----------------------------------------------------------------------===// + +// Generic Mips Format +class MipsInst pattern, + InstrItinClass itin>: Instruction +{ + field bits<32> Inst; + + let Namespace = "Mips"; + + bits<6> opcode; + + // Top 5 bits are the 'opcode' field + let Inst{31-26} = opcode; + + dag OutOperandList = outs; + dag InOperandList = ins; + + let AsmString = asmstr; + let Pattern = pattern; + let Itinerary = itin; +} + +// Mips Pseudo Instructions Format +class MipsPseudo pattern>: + MipsInst; + +//===----------------------------------------------------------------------===// +// Format R instruction class in Mips : <|opcode|rs|rt|rd|shamt|funct|> +//===----------------------------------------------------------------------===// + +class FR op, bits<6> _funct, dag outs, dag ins, string asmstr, + list pattern, InstrItinClass itin>: + MipsInst +{ + bits<5> rd; + bits<5> rs; + bits<5> rt; + bits<5> shamt; + bits<6> funct; + + let opcode = op; + let funct = _funct; + + let Inst{25-21} = rs; + let Inst{20-16} = rt; + let Inst{15-11} = rd; + let Inst{10-6} = shamt; + let Inst{5-0} = funct; +} + +//===----------------------------------------------------------------------===// +// Format I instruction class in Mips : <|opcode|rs|rt|immediate|> +//===----------------------------------------------------------------------===// + +class FI op, dag outs, dag ins, string asmstr, list pattern, + InstrItinClass itin>: MipsInst +{ + bits<5> rt; + bits<5> rs; + bits<16> imm16; + + let opcode = op; + + let Inst{25-21} = rs; + let Inst{20-16} = rt; + let Inst{15-0} = imm16; +} + +//===----------------------------------------------------------------------===// +// Format J instruction class in Mips : <|opcode|address|> +//===----------------------------------------------------------------------===// + +class FJ op, dag outs, dag ins, string asmstr, list pattern, + InstrItinClass itin>: MipsInst +{ + bits<26> addr; + + let opcode = op; + + let Inst{25-0} = addr; +} + +//===----------------------------------------------------------------------===// +// +// FLOATING POINT INSTRUCTION FORMATS +// +// opcode - operation code. +// fs - src reg. +// ft - dst reg (on a 2 regs instr) or src reg (on a 3 reg instr). +// fd - dst reg, only used on 3 regs instr. +// fmt - double or single precision. +// funct - combined with opcode field give us an operation code. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Format FR instruction class in Mips : <|opcode|fmt|ft|fs|fd|funct|> +//===----------------------------------------------------------------------===// + +class FFR op, bits<6> _funct, bits<5> _fmt, dag outs, dag ins, + string asmstr, list pattern> : + MipsInst +{ + bits<5> fd; + bits<5> fs; + bits<5> ft; + bits<5> fmt; + bits<6> funct; + + let opcode = op; + let funct = _funct; + let fmt = _fmt; + + let Inst{25-21} = fmt; + let Inst{20-16} = ft; + let Inst{15-11} = fs; + let Inst{10-6} = fd; + let Inst{5-0} = funct; +} + +//===----------------------------------------------------------------------===// +// Format FI instruction class in Mips : <|opcode|base|ft|immediate|> +//===----------------------------------------------------------------------===// + +class FFI op, dag outs, dag ins, string asmstr, list pattern>: + MipsInst +{ + bits<5> ft; + bits<5> base; + bits<16> imm16; + + let opcode = op; + + let Inst{25-21} = base; + let Inst{20-16} = ft; + let Inst{15-0} = imm16; +} + +//===----------------------------------------------------------------------===// +// Compare instruction class in Mips : <|010001|fmt|ft|fs|0000011|condcode|> +//===----------------------------------------------------------------------===// + +class FCC _fmt, dag outs, dag ins, string asmstr, list pattern> : + MipsInst +{ + bits<5> fs; + bits<5> ft; + bits<4> cc; + bits<5> fmt; + + let opcode = 0x11; + let fmt = _fmt; + + let Inst{25-21} = fmt; + let Inst{20-16} = ft; + let Inst{15-11} = fs; + let Inst{10-6} = 0; + let Inst{5-4} = 0b11; + let Inst{3-0} = cc; +} diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp new file mode 100644 index 000000000000..6225fa9c9884 --- /dev/null +++ b/lib/Target/Mips/MipsInstrInfo.cpp @@ -0,0 +1,623 @@ +//===- MipsInstrInfo.cpp - Mips Instruction Information ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Mips implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#include "MipsInstrInfo.h" +#include "MipsTargetMachine.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "MipsGenInstrInfo.inc" + +using namespace llvm; + +MipsInstrInfo::MipsInstrInfo(MipsTargetMachine &tm) + : TargetInstrInfoImpl(MipsInsts, array_lengthof(MipsInsts)), + TM(tm), RI(*TM.getSubtargetImpl(), *this) {} + +static bool isZeroImm(const MachineOperand &op) { + return op.isImm() && op.getImm() == 0; +} + +/// Return true if the instruction is a register to register move and +/// leave the source and dest operands in the passed parameters. +bool MipsInstrInfo:: +isMoveInstr(const MachineInstr &MI, unsigned &SrcReg, unsigned &DstReg, + unsigned &SrcSubIdx, unsigned &DstSubIdx) const +{ + SrcSubIdx = DstSubIdx = 0; // No sub-registers. + + // addu $dst, $src, $zero || addu $dst, $zero, $src + // or $dst, $src, $zero || or $dst, $zero, $src + if ((MI.getOpcode() == Mips::ADDu) || (MI.getOpcode() == Mips::OR)) { + if (MI.getOperand(1).getReg() == Mips::ZERO) { + DstReg = MI.getOperand(0).getReg(); + SrcReg = MI.getOperand(2).getReg(); + return true; + } else if (MI.getOperand(2).getReg() == Mips::ZERO) { + DstReg = MI.getOperand(0).getReg(); + SrcReg = MI.getOperand(1).getReg(); + return true; + } + } + + // mov $fpDst, $fpSrc + // mfc $gpDst, $fpSrc + // mtc $fpDst, $gpSrc + if (MI.getOpcode() == Mips::FMOV_S32 || + MI.getOpcode() == Mips::FMOV_D32 || + MI.getOpcode() == Mips::MFC1 || + MI.getOpcode() == Mips::MTC1 || + MI.getOpcode() == Mips::MOVCCRToCCR) { + DstReg = MI.getOperand(0).getReg(); + SrcReg = MI.getOperand(1).getReg(); + return true; + } + + // addiu $dst, $src, 0 + if (MI.getOpcode() == Mips::ADDiu) { + if ((MI.getOperand(1).isReg()) && (isZeroImm(MI.getOperand(2)))) { + DstReg = MI.getOperand(0).getReg(); + SrcReg = MI.getOperand(1).getReg(); + return true; + } + } + + return false; +} + +/// isLoadFromStackSlot - If the specified machine instruction is a direct +/// load from a stack slot, return the virtual or physical register number of +/// the destination along with the FrameIndex of the loaded stack slot. If +/// not, return 0. This predicate must return 0 if the instruction has +/// any side effects other than loading from the stack slot. +unsigned MipsInstrInfo:: +isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const +{ + if ((MI->getOpcode() == Mips::LW) || (MI->getOpcode() == Mips::LWC1) || + (MI->getOpcode() == Mips::LDC1)) { + if ((MI->getOperand(2).isFI()) && // is a stack slot + (MI->getOperand(1).isImm()) && // the imm is zero + (isZeroImm(MI->getOperand(1)))) { + FrameIndex = MI->getOperand(2).getIndex(); + return MI->getOperand(0).getReg(); + } + } + + return 0; +} + +/// isStoreToStackSlot - If the specified machine instruction is a direct +/// store to a stack slot, return the virtual or physical register number of +/// the source reg along with the FrameIndex of the loaded stack slot. If +/// not, return 0. This predicate must return 0 if the instruction has +/// any side effects other than storing to the stack slot. +unsigned MipsInstrInfo:: +isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const +{ + if ((MI->getOpcode() == Mips::SW) || (MI->getOpcode() == Mips::SWC1) || + (MI->getOpcode() == Mips::SDC1)) { + if ((MI->getOperand(2).isFI()) && // is a stack slot + (MI->getOperand(1).isImm()) && // the imm is zero + (isZeroImm(MI->getOperand(1)))) { + FrameIndex = MI->getOperand(2).getIndex(); + return MI->getOperand(0).getReg(); + } + } + return 0; +} + +/// insertNoop - If data hazard condition is found insert the target nop +/// instruction. +void MipsInstrInfo:: +insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const +{ + DebugLoc DL = DebugLoc::getUnknownLoc(); + if (MI != MBB.end()) DL = MI->getDebugLoc(); + BuildMI(MBB, MI, DL, get(Mips::NOP)); +} + +bool MipsInstrInfo:: +copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + unsigned DestReg, unsigned SrcReg, + const TargetRegisterClass *DestRC, + const TargetRegisterClass *SrcRC) const { + DebugLoc DL = DebugLoc::getUnknownLoc(); + if (I != MBB.end()) DL = I->getDebugLoc(); + + if (DestRC != SrcRC) { + + // Copy to/from FCR31 condition register + if ((DestRC == Mips::CPURegsRegisterClass) && + (SrcRC == Mips::CCRRegisterClass)) + BuildMI(MBB, I, DL, get(Mips::CFC1), DestReg).addReg(SrcReg); + else if ((DestRC == Mips::CCRRegisterClass) && + (SrcRC == Mips::CPURegsRegisterClass)) + BuildMI(MBB, I, DL, get(Mips::CTC1), DestReg).addReg(SrcReg); + + // Moves between coprocessors and cpu + else if ((DestRC == Mips::CPURegsRegisterClass) && + (SrcRC == Mips::FGR32RegisterClass)) + BuildMI(MBB, I, DL, get(Mips::MFC1), DestReg).addReg(SrcReg); + else if ((DestRC == Mips::FGR32RegisterClass) && + (SrcRC == Mips::CPURegsRegisterClass)) + BuildMI(MBB, I, DL, get(Mips::MTC1), DestReg).addReg(SrcReg); + + // Move from/to Hi/Lo registers + else if ((DestRC == Mips::HILORegisterClass) && + (SrcRC == Mips::CPURegsRegisterClass)) { + unsigned Opc = (DestReg == Mips::HI) ? Mips::MTHI : Mips::MTLO; + BuildMI(MBB, I, DL, get(Opc), DestReg); + } else if ((SrcRC == Mips::HILORegisterClass) && + (DestRC == Mips::CPURegsRegisterClass)) { + unsigned Opc = (SrcReg == Mips::HI) ? Mips::MFHI : Mips::MFLO; + BuildMI(MBB, I, DL, get(Opc), DestReg); + + // Can't copy this register + } else + return false; + + return true; + } + + if (DestRC == Mips::CPURegsRegisterClass) + BuildMI(MBB, I, DL, get(Mips::ADDu), DestReg).addReg(Mips::ZERO) + .addReg(SrcReg); + else if (DestRC == Mips::FGR32RegisterClass) + BuildMI(MBB, I, DL, get(Mips::FMOV_S32), DestReg).addReg(SrcReg); + else if (DestRC == Mips::AFGR64RegisterClass) + BuildMI(MBB, I, DL, get(Mips::FMOV_D32), DestReg).addReg(SrcReg); + else if (DestRC == Mips::CCRRegisterClass) + BuildMI(MBB, I, DL, get(Mips::MOVCCRToCCR), DestReg).addReg(SrcReg); + else + // Can't copy this register + return false; + + return true; +} + +void MipsInstrInfo:: +storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + unsigned SrcReg, bool isKill, int FI, + const TargetRegisterClass *RC) const { + unsigned Opc; + + DebugLoc DL = DebugLoc::getUnknownLoc(); + if (I != MBB.end()) DL = I->getDebugLoc(); + + if (RC == Mips::CPURegsRegisterClass) + Opc = Mips::SW; + else if (RC == Mips::FGR32RegisterClass) + Opc = Mips::SWC1; + else { + assert(RC == Mips::AFGR64RegisterClass); + Opc = Mips::SDC1; + } + + BuildMI(MBB, I, DL, get(Opc)).addReg(SrcReg, getKillRegState(isKill)) + .addImm(0).addFrameIndex(FI); +} + +void MipsInstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg, + bool isKill, SmallVectorImpl &Addr, + const TargetRegisterClass *RC, SmallVectorImpl &NewMIs) const +{ + unsigned Opc; + if (RC == Mips::CPURegsRegisterClass) + Opc = Mips::SW; + else if (RC == Mips::FGR32RegisterClass) + Opc = Mips::SWC1; + else { + assert(RC == Mips::AFGR64RegisterClass); + Opc = Mips::SDC1; + } + + DebugLoc DL = DebugLoc::getUnknownLoc(); + MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc)) + .addReg(SrcReg, getKillRegState(isKill)); + for (unsigned i = 0, e = Addr.size(); i != e; ++i) + MIB.addOperand(Addr[i]); + NewMIs.push_back(MIB); + return; +} + +void MipsInstrInfo:: +loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + unsigned DestReg, int FI, + const TargetRegisterClass *RC) const +{ + unsigned Opc; + if (RC == Mips::CPURegsRegisterClass) + Opc = Mips::LW; + else if (RC == Mips::FGR32RegisterClass) + Opc = Mips::LWC1; + else { + assert(RC == Mips::AFGR64RegisterClass); + Opc = Mips::LDC1; + } + + DebugLoc DL = DebugLoc::getUnknownLoc(); + if (I != MBB.end()) DL = I->getDebugLoc(); + BuildMI(MBB, I, DL, get(Opc), DestReg).addImm(0).addFrameIndex(FI); +} + +void MipsInstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg, + SmallVectorImpl &Addr, + const TargetRegisterClass *RC, + SmallVectorImpl &NewMIs) const { + unsigned Opc; + if (RC == Mips::CPURegsRegisterClass) + Opc = Mips::LW; + else if (RC == Mips::FGR32RegisterClass) + Opc = Mips::LWC1; + else { + assert(RC == Mips::AFGR64RegisterClass); + Opc = Mips::LDC1; + } + + DebugLoc DL = DebugLoc::getUnknownLoc(); + MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg); + for (unsigned i = 0, e = Addr.size(); i != e; ++i) + MIB.addOperand(Addr[i]); + NewMIs.push_back(MIB); + return; +} + +MachineInstr *MipsInstrInfo:: +foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr* MI, + const SmallVectorImpl &Ops, int FI) const +{ + if (Ops.size() != 1) return NULL; + + MachineInstr *NewMI = NULL; + + switch (MI->getOpcode()) { + case Mips::ADDu: + if ((MI->getOperand(0).isReg()) && + (MI->getOperand(1).isReg()) && + (MI->getOperand(1).getReg() == Mips::ZERO) && + (MI->getOperand(2).isReg())) { + if (Ops[0] == 0) { // COPY -> STORE + unsigned SrcReg = MI->getOperand(2).getReg(); + bool isKill = MI->getOperand(2).isKill(); + NewMI = BuildMI(MF, MI->getDebugLoc(), get(Mips::SW)) + .addReg(SrcReg, getKillRegState(isKill)) + .addImm(0).addFrameIndex(FI); + } else { // COPY -> LOAD + unsigned DstReg = MI->getOperand(0).getReg(); + bool isDead = MI->getOperand(0).isDead(); + NewMI = BuildMI(MF, MI->getDebugLoc(), get(Mips::LW)) + .addReg(DstReg, RegState::Define | getDeadRegState(isDead)) + .addImm(0).addFrameIndex(FI); + } + } + break; + case Mips::FMOV_S32: + case Mips::FMOV_D32: + if ((MI->getOperand(0).isReg()) && + (MI->getOperand(1).isReg())) { + const TargetRegisterClass + *RC = RI.getRegClass(MI->getOperand(0).getReg()); + unsigned StoreOpc, LoadOpc; + + if (RC == Mips::FGR32RegisterClass) { + LoadOpc = Mips::LWC1; StoreOpc = Mips::SWC1; + } else { + assert(RC == Mips::AFGR64RegisterClass); + LoadOpc = Mips::LDC1; StoreOpc = Mips::SDC1; + } + + if (Ops[0] == 0) { // COPY -> STORE + unsigned SrcReg = MI->getOperand(1).getReg(); + bool isKill = MI->getOperand(1).isKill(); + NewMI = BuildMI(MF, MI->getDebugLoc(), get(StoreOpc)) + .addReg(SrcReg, getKillRegState(isKill)) + .addImm(0).addFrameIndex(FI) ; + } else { // COPY -> LOAD + unsigned DstReg = MI->getOperand(0).getReg(); + bool isDead = MI->getOperand(0).isDead(); + NewMI = BuildMI(MF, MI->getDebugLoc(), get(LoadOpc)) + .addReg(DstReg, RegState::Define | getDeadRegState(isDead)) + .addImm(0).addFrameIndex(FI); + } + } + break; + } + + return NewMI; +} + +//===----------------------------------------------------------------------===// +// Branch Analysis +//===----------------------------------------------------------------------===// + +/// GetCondFromBranchOpc - Return the Mips CC that matches +/// the correspondent Branch instruction opcode. +static Mips::CondCode GetCondFromBranchOpc(unsigned BrOpc) +{ + switch (BrOpc) { + default: return Mips::COND_INVALID; + case Mips::BEQ : return Mips::COND_E; + case Mips::BNE : return Mips::COND_NE; + case Mips::BGTZ : return Mips::COND_GZ; + case Mips::BGEZ : return Mips::COND_GEZ; + case Mips::BLTZ : return Mips::COND_LZ; + case Mips::BLEZ : return Mips::COND_LEZ; + + // We dont do fp branch analysis yet! + case Mips::BC1T : + case Mips::BC1F : return Mips::COND_INVALID; + } +} + +/// GetCondBranchFromCond - Return the Branch instruction +/// opcode that matches the cc. +unsigned Mips::GetCondBranchFromCond(Mips::CondCode CC) +{ + switch (CC) { + default: assert(0 && "Illegal condition code!"); + case Mips::COND_E : return Mips::BEQ; + case Mips::COND_NE : return Mips::BNE; + case Mips::COND_GZ : return Mips::BGTZ; + case Mips::COND_GEZ : return Mips::BGEZ; + case Mips::COND_LZ : return Mips::BLTZ; + case Mips::COND_LEZ : return Mips::BLEZ; + + case Mips::FCOND_F: + case Mips::FCOND_UN: + case Mips::FCOND_EQ: + case Mips::FCOND_UEQ: + case Mips::FCOND_OLT: + case Mips::FCOND_ULT: + case Mips::FCOND_OLE: + case Mips::FCOND_ULE: + case Mips::FCOND_SF: + case Mips::FCOND_NGLE: + case Mips::FCOND_SEQ: + case Mips::FCOND_NGL: + case Mips::FCOND_LT: + case Mips::FCOND_NGE: + case Mips::FCOND_LE: + case Mips::FCOND_NGT: return Mips::BC1T; + + case Mips::FCOND_T: + case Mips::FCOND_OR: + case Mips::FCOND_NEQ: + case Mips::FCOND_OGL: + case Mips::FCOND_UGE: + case Mips::FCOND_OGE: + case Mips::FCOND_UGT: + case Mips::FCOND_OGT: + case Mips::FCOND_ST: + case Mips::FCOND_GLE: + case Mips::FCOND_SNE: + case Mips::FCOND_GL: + case Mips::FCOND_NLT: + case Mips::FCOND_GE: + case Mips::FCOND_NLE: + case Mips::FCOND_GT: return Mips::BC1F; + } +} + +/// GetOppositeBranchCondition - Return the inverse of the specified +/// condition, e.g. turning COND_E to COND_NE. +Mips::CondCode Mips::GetOppositeBranchCondition(Mips::CondCode CC) +{ + switch (CC) { + default: assert(0 && "Illegal condition code!"); + case Mips::COND_E : return Mips::COND_NE; + case Mips::COND_NE : return Mips::COND_E; + case Mips::COND_GZ : return Mips::COND_LEZ; + case Mips::COND_GEZ : return Mips::COND_LZ; + case Mips::COND_LZ : return Mips::COND_GEZ; + case Mips::COND_LEZ : return Mips::COND_GZ; + case Mips::FCOND_F : return Mips::FCOND_T; + case Mips::FCOND_UN : return Mips::FCOND_OR; + case Mips::FCOND_EQ : return Mips::FCOND_NEQ; + case Mips::FCOND_UEQ: return Mips::FCOND_OGL; + case Mips::FCOND_OLT: return Mips::FCOND_UGE; + case Mips::FCOND_ULT: return Mips::FCOND_OGE; + case Mips::FCOND_OLE: return Mips::FCOND_UGT; + case Mips::FCOND_ULE: return Mips::FCOND_OGT; + case Mips::FCOND_SF: return Mips::FCOND_ST; + case Mips::FCOND_NGLE:return Mips::FCOND_GLE; + case Mips::FCOND_SEQ: return Mips::FCOND_SNE; + case Mips::FCOND_NGL: return Mips::FCOND_GL; + case Mips::FCOND_LT: return Mips::FCOND_NLT; + case Mips::FCOND_NGE: return Mips::FCOND_GE; + case Mips::FCOND_LE: return Mips::FCOND_NLE; + case Mips::FCOND_NGT: return Mips::FCOND_GT; + } +} + +bool MipsInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, + MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl &Cond, + bool AllowModify) const +{ + // If the block has no terminators, it just falls into the block after it. + MachineBasicBlock::iterator I = MBB.end(); + if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) + return false; + + // Get the last instruction in the block. + MachineInstr *LastInst = I; + + // If there is only one terminator instruction, process it. + unsigned LastOpc = LastInst->getOpcode(); + if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { + if (!LastInst->getDesc().isBranch()) + return true; + + // Unconditional branch + if (LastOpc == Mips::J) { + TBB = LastInst->getOperand(0).getMBB(); + return false; + } + + Mips::CondCode BranchCode = GetCondFromBranchOpc(LastInst->getOpcode()); + if (BranchCode == Mips::COND_INVALID) + return true; // Can't handle indirect branch. + + // Conditional branch + // Block ends with fall-through condbranch. + if (LastOpc != Mips::COND_INVALID) { + int LastNumOp = LastInst->getNumOperands(); + + TBB = LastInst->getOperand(LastNumOp-1).getMBB(); + Cond.push_back(MachineOperand::CreateImm(BranchCode)); + + for (int i=0; igetOperand(i)); + } + + return false; + } + } + + // Get the instruction before it if it is a terminator. + MachineInstr *SecondLastInst = I; + + // If there are three terminators, we don't know what sort of block this is. + if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I)) + return true; + + // If the block ends with Mips::J and a Mips::BNE/Mips::BEQ, handle it. + unsigned SecondLastOpc = SecondLastInst->getOpcode(); + Mips::CondCode BranchCode = GetCondFromBranchOpc(SecondLastOpc); + + if (BranchCode != Mips::COND_INVALID && LastOpc == Mips::J) { + int SecondNumOp = SecondLastInst->getNumOperands(); + + TBB = SecondLastInst->getOperand(SecondNumOp-1).getMBB(); + Cond.push_back(MachineOperand::CreateImm(BranchCode)); + + for (int i=0; igetOperand(i)); + } + + FBB = LastInst->getOperand(0).getMBB(); + return false; + } + + // If the block ends with two unconditional branches, handle it. The last + // one is not executed, so remove it. + if ((SecondLastOpc == Mips::J) && (LastOpc == Mips::J)) { + TBB = SecondLastInst->getOperand(0).getMBB(); + I = LastInst; + if (AllowModify) + I->eraseFromParent(); + return false; + } + + // Otherwise, can't handle this. + return true; +} + +unsigned MipsInstrInfo:: +InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + const SmallVectorImpl &Cond) const { + // FIXME this should probably have a DebugLoc argument + DebugLoc dl = DebugLoc::getUnknownLoc(); + // Shouldn't be a fall through. + assert(TBB && "InsertBranch must not be told to insert a fallthrough"); + assert((Cond.size() == 3 || Cond.size() == 2 || Cond.size() == 0) && + "Mips branch conditions can have two|three components!"); + + if (FBB == 0) { // One way branch. + if (Cond.empty()) { + // Unconditional branch? + BuildMI(&MBB, dl, get(Mips::J)).addMBB(TBB); + } else { + // Conditional branch. + unsigned Opc = GetCondBranchFromCond((Mips::CondCode)Cond[0].getImm()); + const TargetInstrDesc &TID = get(Opc); + + if (TID.getNumOperands() == 3) + BuildMI(&MBB, dl, TID).addReg(Cond[1].getReg()) + .addReg(Cond[2].getReg()) + .addMBB(TBB); + else + BuildMI(&MBB, dl, TID).addReg(Cond[1].getReg()) + .addMBB(TBB); + + } + return 1; + } + + // Two-way Conditional branch. + unsigned Opc = GetCondBranchFromCond((Mips::CondCode)Cond[0].getImm()); + const TargetInstrDesc &TID = get(Opc); + + if (TID.getNumOperands() == 3) + BuildMI(&MBB, dl, TID).addReg(Cond[1].getReg()).addReg(Cond[2].getReg()) + .addMBB(TBB); + else + BuildMI(&MBB, dl, TID).addReg(Cond[1].getReg()).addMBB(TBB); + + BuildMI(&MBB, dl, get(Mips::J)).addMBB(FBB); + return 2; +} + +unsigned MipsInstrInfo:: +RemoveBranch(MachineBasicBlock &MBB) const +{ + MachineBasicBlock::iterator I = MBB.end(); + if (I == MBB.begin()) return 0; + --I; + if (I->getOpcode() != Mips::J && + GetCondFromBranchOpc(I->getOpcode()) == Mips::COND_INVALID) + return 0; + + // Remove the branch. + I->eraseFromParent(); + + I = MBB.end(); + + if (I == MBB.begin()) return 1; + --I; + if (GetCondFromBranchOpc(I->getOpcode()) == Mips::COND_INVALID) + return 1; + + // Remove the branch. + I->eraseFromParent(); + return 2; +} + +/// BlockHasNoFallThrough - Analyze if MachineBasicBlock does not +/// fall-through into its successor block. +bool MipsInstrInfo:: +BlockHasNoFallThrough(const MachineBasicBlock &MBB) const +{ + if (MBB.empty()) return false; + + switch (MBB.back().getOpcode()) { + case Mips::RET: // Return. + case Mips::JR: // Indirect branch. + case Mips::J: // Uncond branch. + return true; + default: return false; + } +} + +/// ReverseBranchCondition - Return the inverse opcode of the +/// specified Branch instruction. +bool MipsInstrInfo:: +ReverseBranchCondition(SmallVectorImpl &Cond) const +{ + assert( (Cond.size() == 3 || Cond.size() == 2) && + "Invalid Mips branch condition!"); + Cond[0].setImm(GetOppositeBranchCondition((Mips::CondCode)Cond[0].getImm())); + return false; +} diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h new file mode 100644 index 000000000000..334244e6601a --- /dev/null +++ b/lib/Target/Mips/MipsInstrInfo.h @@ -0,0 +1,223 @@ +//===- MipsInstrInfo.h - Mips Instruction Information -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Mips implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef MIPSINSTRUCTIONINFO_H +#define MIPSINSTRUCTIONINFO_H + +#include "Mips.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "MipsRegisterInfo.h" + +namespace llvm { + +namespace Mips { + + // Mips Branch Codes + enum FPBranchCode { + BRANCH_F, + BRANCH_T, + BRANCH_FL, + BRANCH_TL, + BRANCH_INVALID + }; + + // Mips Condition Codes + enum CondCode { + // To be used with float branch True + FCOND_F, + FCOND_UN, + FCOND_EQ, + FCOND_UEQ, + FCOND_OLT, + FCOND_ULT, + FCOND_OLE, + FCOND_ULE, + FCOND_SF, + FCOND_NGLE, + FCOND_SEQ, + FCOND_NGL, + FCOND_LT, + FCOND_NGE, + FCOND_LE, + FCOND_NGT, + + // To be used with float branch False + // This conditions have the same mnemonic as the + // above ones, but are used with a branch False; + FCOND_T, + FCOND_OR, + FCOND_NEQ, + FCOND_OGL, + FCOND_UGE, + FCOND_OGE, + FCOND_UGT, + FCOND_OGT, + FCOND_ST, + FCOND_GLE, + FCOND_SNE, + FCOND_GL, + FCOND_NLT, + FCOND_GE, + FCOND_NLE, + FCOND_GT, + + // Only integer conditions + COND_E, + COND_GZ, + COND_GEZ, + COND_LZ, + COND_LEZ, + COND_NE, + COND_INVALID + }; + + // Turn condition code into conditional branch opcode. + unsigned GetCondBranchFromCond(CondCode CC); + + /// GetOppositeBranchCondition - Return the inverse of the specified cond, + /// e.g. turning COND_E to COND_NE. + CondCode GetOppositeBranchCondition(Mips::CondCode CC); + + /// MipsCCToString - Map each FP condition code to its string + inline static const char *MipsFCCToString(Mips::CondCode CC) + { + switch (CC) { + default: assert(0 && "Unknown condition code"); + case FCOND_F: + case FCOND_T: return "f"; + case FCOND_UN: + case FCOND_OR: return "un"; + case FCOND_EQ: + case FCOND_NEQ: return "eq"; + case FCOND_UEQ: + case FCOND_OGL: return "ueq"; + case FCOND_OLT: + case FCOND_UGE: return "olt"; + case FCOND_ULT: + case FCOND_OGE: return "ult"; + case FCOND_OLE: + case FCOND_UGT: return "ole"; + case FCOND_ULE: + case FCOND_OGT: return "ule"; + case FCOND_SF: + case FCOND_ST: return "sf"; + case FCOND_NGLE: + case FCOND_GLE: return "ngle"; + case FCOND_SEQ: + case FCOND_SNE: return "seq"; + case FCOND_NGL: + case FCOND_GL: return "ngl"; + case FCOND_LT: + case FCOND_NLT: return "lt"; + case FCOND_NGE: + case FCOND_GE: return "ge"; + case FCOND_LE: + case FCOND_NLE: return "nle"; + case FCOND_NGT: + case FCOND_GT: return "gt"; + } + } +} + +class MipsInstrInfo : public TargetInstrInfoImpl { + MipsTargetMachine &TM; + const MipsRegisterInfo RI; +public: + explicit MipsInstrInfo(MipsTargetMachine &TM); + + /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As + /// such, whenever a client has an instance of instruction info, it should + /// always be able to get register info as well (through this method). + /// + virtual const MipsRegisterInfo &getRegisterInfo() const { return RI; } + + /// Return true if the instruction is a register to register move and return + /// the source and dest operands and their sub-register indices by reference. + virtual bool isMoveInstr(const MachineInstr &MI, + unsigned &SrcReg, unsigned &DstReg, + unsigned &SrcSubIdx, unsigned &DstSubIdx) const; + + /// isLoadFromStackSlot - If the specified machine instruction is a direct + /// load from a stack slot, return the virtual or physical register number of + /// the destination along with the FrameIndex of the loaded stack slot. If + /// not, return 0. This predicate must return 0 if the instruction has + /// any side effects other than loading from the stack slot. + virtual unsigned isLoadFromStackSlot(const MachineInstr *MI, + int &FrameIndex) const; + + /// isStoreToStackSlot - If the specified machine instruction is a direct + /// store to a stack slot, return the virtual or physical register number of + /// the source reg along with the FrameIndex of the loaded stack slot. If + /// not, return 0. This predicate must return 0 if the instruction has + /// any side effects other than storing to the stack slot. + virtual unsigned isStoreToStackSlot(const MachineInstr *MI, + int &FrameIndex) const; + + /// Branch Analysis + virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl &Cond, + bool AllowModify) const; + virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const; + virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + const SmallVectorImpl &Cond) const; + virtual bool copyRegToReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned DestReg, unsigned SrcReg, + const TargetRegisterClass *DestRC, + const TargetRegisterClass *SrcRC) const; + virtual void storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned SrcReg, bool isKill, int FrameIndex, + const TargetRegisterClass *RC) const; + + virtual void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill, + SmallVectorImpl &Addr, + const TargetRegisterClass *RC, + SmallVectorImpl &NewMIs) const; + + virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned DestReg, int FrameIndex, + const TargetRegisterClass *RC) const; + + virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg, + SmallVectorImpl &Addr, + const TargetRegisterClass *RC, + SmallVectorImpl &NewMIs) const; + + virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr* MI, + const SmallVectorImpl &Ops, + int FrameIndex) const; + + virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr* MI, + const SmallVectorImpl &Ops, + MachineInstr* LoadMI) const { + return 0; + } + + virtual bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const; + virtual + bool ReverseBranchCondition(SmallVectorImpl &Cond) const; + + /// Insert nop instruction when hazard condition is found + virtual void insertNoop(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const; +}; + +} + +#endif diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td new file mode 100644 index 000000000000..b9276fe495eb --- /dev/null +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -0,0 +1,707 @@ +//===- MipsInstrInfo.td - Mips Register defs --------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Instruction format superclass +//===----------------------------------------------------------------------===// + +include "MipsInstrFormats.td" + +//===----------------------------------------------------------------------===// +// Mips profiles and nodes +//===----------------------------------------------------------------------===// + +def SDT_MipsRet : SDTypeProfile<0, 1, [SDTCisInt<0>]>; +def SDT_MipsJmpLink : SDTypeProfile<0, 1, [SDTCisVT<0, iPTR>]>; +def SDT_MipsSelectCC : SDTypeProfile<1, 3, [SDTCisSameAs<0, 2>, + SDTCisSameAs<2, 3>, SDTCisInt<1>]>; +def SDT_MipsCMov : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>, + SDTCisSameAs<1, 2>, SDTCisSameAs<3, 4>, + SDTCisInt<4>]>; +def SDT_MipsCallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>]>; +def SDT_MipsCallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; + +// Call +def MipsJmpLink : SDNode<"MipsISD::JmpLink",SDT_MipsJmpLink, [SDNPHasChain, + SDNPOutFlag]>; + +// Hi and Lo nodes are used to handle global addresses. Used on +// MipsISelLowering to lower stuff like GlobalAddress, ExternalSymbol +// static model. (nothing to do with Mips Registers Hi and Lo) +def MipsHi : SDNode<"MipsISD::Hi", SDTIntUnaryOp>; +def MipsLo : SDNode<"MipsISD::Lo", SDTIntUnaryOp>; +def MipsGPRel : SDNode<"MipsISD::GPRel", SDTIntUnaryOp>; + +// Return +def MipsRet : SDNode<"MipsISD::Ret", SDT_MipsRet, [SDNPHasChain, + SDNPOptInFlag]>; + +// These are target-independent nodes, but have target-specific formats. +def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_MipsCallSeqStart, + [SDNPHasChain, SDNPOutFlag]>; +def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_MipsCallSeqEnd, + [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; + +// Select Condition Code +def MipsSelectCC : SDNode<"MipsISD::SelectCC", SDT_MipsSelectCC>; + +// Conditional Move +def MipsCMov : SDNode<"MipsISD::CMov", SDT_MipsCMov>; + +//===----------------------------------------------------------------------===// +// Mips Instruction Predicate Definitions. +//===----------------------------------------------------------------------===// +def HasSEInReg : Predicate<"Subtarget.hasSEInReg()">; +def HasBitCount : Predicate<"Subtarget.hasBitCount()">; +def HasSwap : Predicate<"Subtarget.hasSwap()">; +def HasCondMov : Predicate<"Subtarget.hasCondMov()">; + +//===----------------------------------------------------------------------===// +// Mips Operand, Complex Patterns and Transformations Definitions. +//===----------------------------------------------------------------------===// + +// Instruction operand types +def brtarget : Operand; +def calltarget : Operand; +def simm16 : Operand; +def shamt : Operand; + +// Unsigned Operand +def uimm16 : Operand { + let PrintMethod = "printUnsignedImm"; +} + +// Address operand +def mem : Operand { + let PrintMethod = "printMemOperand"; + let MIOperandInfo = (ops simm16, CPURegs); +} + +// Transformation Function - get the lower 16 bits. +def LO16 : SDNodeXFormgetZExtValue() & 0xFFFF); +}]>; + +// Transformation Function - get the higher 16 bits. +def HI16 : SDNodeXFormgetZExtValue() >> 16); +}]>; + +// Node immediate fits as 16-bit sign extended on target immediate. +// e.g. addi, andi +def immSExt16 : PatLeaf<(imm), [{ + if (N->getValueType(0) == MVT::i32) + return (int32_t)N->getZExtValue() == (short)N->getZExtValue(); + else + return (int64_t)N->getZExtValue() == (short)N->getZExtValue(); +}]>; + +// Node immediate fits as 16-bit zero extended on target immediate. +// The LO16 param means that only the lower 16 bits of the node +// immediate are caught. +// e.g. addiu, sltiu +def immZExt16 : PatLeaf<(imm), [{ + if (N->getValueType(0) == MVT::i32) + return (uint32_t)N->getZExtValue() == (unsigned short)N->getZExtValue(); + else + return (uint64_t)N->getZExtValue() == (unsigned short)N->getZExtValue(); +}], LO16>; + +// shamt field must fit in 5 bits. +def immZExt5 : PatLeaf<(imm), [{ + return N->getZExtValue() == ((N->getZExtValue()) & 0x1f) ; +}]>; + +// Mips Address Mode! SDNode frameindex could possibily be a match +// since load and store instructions from stack used it. +def addr : ComplexPattern; + +//===----------------------------------------------------------------------===// +// Instructions specific format +//===----------------------------------------------------------------------===// + +// Arithmetic 3 register operands +let isCommutable = 1 in +class ArithR op, bits<6> func, string instr_asm, SDNode OpNode, + InstrItinClass itin>: + FR< op, + func, + (outs CPURegs:$dst), + (ins CPURegs:$b, CPURegs:$c), + !strconcat(instr_asm, "\t$dst, $b, $c"), + [(set CPURegs:$dst, (OpNode CPURegs:$b, CPURegs:$c))], itin>; + +let isCommutable = 1 in +class ArithOverflowR op, bits<6> func, string instr_asm>: + FR< op, + func, + (outs CPURegs:$dst), + (ins CPURegs:$b, CPURegs:$c), + !strconcat(instr_asm, "\t$dst, $b, $c"), + [], IIAlu>; + +// Arithmetic 2 register operands +class ArithI op, string instr_asm, SDNode OpNode, + Operand Od, PatLeaf imm_type> : + FI< op, + (outs CPURegs:$dst), + (ins CPURegs:$b, Od:$c), + !strconcat(instr_asm, "\t$dst, $b, $c"), + [(set CPURegs:$dst, (OpNode CPURegs:$b, imm_type:$c))], IIAlu>; + +class ArithOverflowI op, string instr_asm, SDNode OpNode, + Operand Od, PatLeaf imm_type> : + FI< op, + (outs CPURegs:$dst), + (ins CPURegs:$b, Od:$c), + !strconcat(instr_asm, "\t$dst, $b, $c"), + [], IIAlu>; + +// Arithmetic Multiply ADD/SUB +let rd=0 in +class MArithR func, string instr_asm> : + FR< 0x1c, + func, + (outs CPURegs:$rs), + (ins CPURegs:$rt), + !strconcat(instr_asm, "\t$rs, $rt"), + [], IIImul>; + +// Logical +class LogicR func, string instr_asm, SDNode OpNode>: + FR< 0x00, + func, + (outs CPURegs:$dst), + (ins CPURegs:$b, CPURegs:$c), + !strconcat(instr_asm, "\t$dst, $b, $c"), + [(set CPURegs:$dst, (OpNode CPURegs:$b, CPURegs:$c))], IIAlu>; + +class LogicI op, string instr_asm, SDNode OpNode>: + FI< op, + (outs CPURegs:$dst), + (ins CPURegs:$b, uimm16:$c), + !strconcat(instr_asm, "\t$dst, $b, $c"), + [(set CPURegs:$dst, (OpNode CPURegs:$b, immZExt16:$c))], IIAlu>; + +class LogicNOR op, bits<6> func, string instr_asm>: + FR< op, + func, + (outs CPURegs:$dst), + (ins CPURegs:$b, CPURegs:$c), + !strconcat(instr_asm, "\t$dst, $b, $c"), + [(set CPURegs:$dst, (not (or CPURegs:$b, CPURegs:$c)))], IIAlu>; + +// Shifts +let rt = 0 in +class LogicR_shift_imm func, string instr_asm, SDNode OpNode>: + FR< 0x00, + func, + (outs CPURegs:$dst), + (ins CPURegs:$b, shamt:$c), + !strconcat(instr_asm, "\t$dst, $b, $c"), + [(set CPURegs:$dst, (OpNode CPURegs:$b, immZExt5:$c))], IIAlu>; + +class LogicR_shift_reg func, string instr_asm, SDNode OpNode>: + FR< 0x00, + func, + (outs CPURegs:$dst), + (ins CPURegs:$b, CPURegs:$c), + !strconcat(instr_asm, "\t$dst, $b, $c"), + [(set CPURegs:$dst, (OpNode CPURegs:$b, CPURegs:$c))], IIAlu>; + +// Load Upper Imediate +class LoadUpper op, string instr_asm>: + FI< op, + (outs CPURegs:$dst), + (ins uimm16:$imm), + !strconcat(instr_asm, "\t$dst, $imm"), + [], IIAlu>; + +// Memory Load/Store +let canFoldAsLoad = 1, hasDelaySlot = 1 in +class LoadM op, string instr_asm, PatFrag OpNode>: + FI< op, + (outs CPURegs:$dst), + (ins mem:$addr), + !strconcat(instr_asm, "\t$dst, $addr"), + [(set CPURegs:$dst, (OpNode addr:$addr))], IILoad>; + +class StoreM op, string instr_asm, PatFrag OpNode>: + FI< op, + (outs), + (ins CPURegs:$dst, mem:$addr), + !strconcat(instr_asm, "\t$dst, $addr"), + [(OpNode CPURegs:$dst, addr:$addr)], IIStore>; + +// Conditional Branch +let isBranch = 1, isTerminator=1, hasDelaySlot = 1 in { +class CBranch op, string instr_asm, PatFrag cond_op>: + FI< op, + (outs), + (ins CPURegs:$a, CPURegs:$b, brtarget:$offset), + !strconcat(instr_asm, "\t$a, $b, $offset"), + [(brcond (cond_op CPURegs:$a, CPURegs:$b), bb:$offset)], + IIBranch>; + + +class CBranchZero op, string instr_asm, PatFrag cond_op>: + FI< op, + (outs), + (ins CPURegs:$src, brtarget:$offset), + !strconcat(instr_asm, "\t$src, $offset"), + [(brcond (cond_op CPURegs:$src, 0), bb:$offset)], + IIBranch>; +} + +// SetCC +class SetCC_R op, bits<6> func, string instr_asm, + PatFrag cond_op>: + FR< op, + func, + (outs CPURegs:$dst), + (ins CPURegs:$b, CPURegs:$c), + !strconcat(instr_asm, "\t$dst, $b, $c"), + [(set CPURegs:$dst, (cond_op CPURegs:$b, CPURegs:$c))], + IIAlu>; + +class SetCC_I op, string instr_asm, PatFrag cond_op, + Operand Od, PatLeaf imm_type>: + FI< op, + (outs CPURegs:$dst), + (ins CPURegs:$b, Od:$c), + !strconcat(instr_asm, "\t$dst, $b, $c"), + [(set CPURegs:$dst, (cond_op CPURegs:$b, imm_type:$c))], + IIAlu>; + +// Unconditional branch +let isBranch=1, isTerminator=1, isBarrier=1, hasDelaySlot = 1 in +class JumpFJ op, string instr_asm>: + FJ< op, + (outs), + (ins brtarget:$target), + !strconcat(instr_asm, "\t$target"), + [(br bb:$target)], IIBranch>; + +let isBranch=1, isTerminator=1, isBarrier=1, rd=0, hasDelaySlot = 1 in +class JumpFR op, bits<6> func, string instr_asm>: + FR< op, + func, + (outs), + (ins CPURegs:$target), + !strconcat(instr_asm, "\t$target"), + [(brind CPURegs:$target)], IIBranch>; + +// Jump and Link (Call) +let isCall=1, hasDelaySlot=1, + // All calls clobber the non-callee saved registers... + Defs = [AT, V0, V1, A0, A1, A2, A3, T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, + K0, K1, F0, F1, F2, F3, F4, F5, F6, F7, F8, F9, F10, F11, F12, F13, + F14, F15, F16, F17, F18, F19], Uses = [GP] in { + class JumpLink op, string instr_asm>: + FJ< op, + (outs), + (ins calltarget:$target), + !strconcat(instr_asm, "\t$target"), + [(MipsJmpLink imm:$target)], IIBranch>; + + let rd=31 in + class JumpLinkReg op, bits<6> func, string instr_asm>: + FR< op, + func, + (outs), + (ins CPURegs:$rs), + !strconcat(instr_asm, "\t$rs"), + [(MipsJmpLink CPURegs:$rs)], IIBranch>; + + class BranchLink: + FI< 0x1, + (outs), + (ins CPURegs:$rs, brtarget:$target), + !strconcat(instr_asm, "\t$rs, $target"), + [], IIBranch>; +} + +// Mul, Div +class MulDiv func, string instr_asm, InstrItinClass itin>: + FR< 0x00, + func, + (outs), + (ins CPURegs:$a, CPURegs:$b), + !strconcat(instr_asm, "\t$a, $b"), + [], itin>; + +// Move from Hi/Lo +class MoveFromLOHI func, string instr_asm>: + FR< 0x00, + func, + (outs CPURegs:$dst), + (ins), + !strconcat(instr_asm, "\t$dst"), + [], IIHiLo>; + +class MoveToLOHI func, string instr_asm>: + FR< 0x00, + func, + (outs), + (ins CPURegs:$src), + !strconcat(instr_asm, "\t$src"), + [], IIHiLo>; + +class EffectiveAddress : + FI<0x09, + (outs CPURegs:$dst), + (ins mem:$addr), + instr_asm, + [(set CPURegs:$dst, addr:$addr)], IIAlu>; + +// Count Leading Ones/Zeros in Word +class CountLeading func, string instr_asm, SDNode CountOp>: + FR< 0x1c, func, (outs CPURegs:$dst), (ins CPURegs:$src), + !strconcat(instr_asm, "\t$dst, $src"), + [(set CPURegs:$dst, (CountOp CPURegs:$src))], IIAlu>; + +// Sign Extend in Register. +class SignExtInReg func, string instr_asm, ValueType vt>: + FR< 0x3f, func, (outs CPURegs:$dst), (ins CPURegs:$src), + !strconcat(instr_asm, "\t$dst, $src"), + [(set CPURegs:$dst, (sext_inreg CPURegs:$src, vt))], NoItinerary>; + +// Byte Swap +class ByteSwap func, string instr_asm>: + FR< 0x1f, func, (outs CPURegs:$dst), (ins CPURegs:$src), + !strconcat(instr_asm, "\t$dst, $src"), + [(set CPURegs:$dst, (bswap CPURegs:$src))], NoItinerary>; + +// Conditional Move +class CondMov func, string instr_asm, PatLeaf MovCode>: + FR< 0x00, func, (outs CPURegs:$dst), (ins CPURegs:$F, CPURegs:$T, + CPURegs:$cond), !strconcat(instr_asm, "\t$dst, $T, $cond"), + [(set CPURegs:$dst, (MipsCMov CPURegs:$F, CPURegs:$T, + CPURegs:$cond, MovCode))], NoItinerary>; + +//===----------------------------------------------------------------------===// +// Pseudo instructions +//===----------------------------------------------------------------------===// + +// As stack alignment is always done with addiu, we need a 16-bit immediate +let Defs = [SP], Uses = [SP] in { +def ADJCALLSTACKDOWN : MipsPseudo<(outs), (ins uimm16:$amt), + "!ADJCALLSTACKDOWN $amt", + [(callseq_start timm:$amt)]>; +def ADJCALLSTACKUP : MipsPseudo<(outs), (ins uimm16:$amt1, uimm16:$amt2), + "!ADJCALLSTACKUP $amt1", + [(callseq_end timm:$amt1, timm:$amt2)]>; +} + +// Some assembly macros need to avoid pseudoinstructions and assembler +// automatic reodering, we should reorder ourselves. +def MACRO : MipsPseudo<(outs), (ins), ".set\tmacro", []>; +def REORDER : MipsPseudo<(outs), (ins), ".set\treorder", []>; +def NOMACRO : MipsPseudo<(outs), (ins), ".set\tnomacro", []>; +def NOREORDER : MipsPseudo<(outs), (ins), ".set\tnoreorder", []>; + +// When handling PIC code the assembler needs .cpload and .cprestore +// directives. If the real instructions corresponding these directives +// are used, we have the same behavior, but get also a bunch of warnings +// from the assembler. +def CPLOAD : MipsPseudo<(outs), (ins CPURegs:$picreg), ".cpload\t$picreg", []>; +def CPRESTORE : MipsPseudo<(outs), (ins uimm16:$loc), ".cprestore\t$loc\n", []>; + +// The supported Mips ISAs dont have any instruction close to the SELECT_CC +// operation. The solution is to create a Mips pseudo SELECT_CC instruction +// (MipsSelectCC), use LowerSELECT_CC to generate this instruction and finally +// replace it for real supported nodes into EmitInstrWithCustomInserter +let usesCustomDAGSchedInserter = 1 in { + class PseudoSelCC: + MipsPseudo<(outs RC:$dst), (ins CPURegs:$CmpRes, RC:$T, RC:$F), asmstr, + [(set RC:$dst, (MipsSelectCC CPURegs:$CmpRes, RC:$T, RC:$F))]>; +} + +def Select_CC : PseudoSelCC; + +//===----------------------------------------------------------------------===// +// Instruction definition +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// MipsI Instructions +//===----------------------------------------------------------------------===// + +/// Arithmetic Instructions (ALU Immediate) +def ADDiu : ArithI<0x09, "addiu", add, simm16, immSExt16>; +def ADDi : ArithOverflowI<0x08, "addi", add, simm16, immSExt16>; +def SLTi : SetCC_I<0x0a, "slti", setlt, simm16, immSExt16>; +def SLTiu : SetCC_I<0x0b, "sltiu", setult, simm16, immSExt16>; +def ANDi : LogicI<0x0c, "andi", and>; +def ORi : LogicI<0x0d, "ori", or>; +def XORi : LogicI<0x0e, "xori", xor>; +def LUi : LoadUpper<0x0f, "lui">; + +/// Arithmetic Instructions (3-Operand, R-Type) +def ADDu : ArithR<0x00, 0x21, "addu", add, IIAlu>; +def SUBu : ArithR<0x00, 0x23, "subu", sub, IIAlu>; +def ADD : ArithOverflowR<0x00, 0x20, "add">; +def SUB : ArithOverflowR<0x00, 0x22, "sub">; +def SLT : SetCC_R<0x00, 0x2a, "slt", setlt>; +def SLTu : SetCC_R<0x00, 0x2b, "sltu", setult>; +def AND : LogicR<0x24, "and", and>; +def OR : LogicR<0x25, "or", or>; +def XOR : LogicR<0x26, "xor", xor>; +def NOR : LogicNOR<0x00, 0x27, "nor">; + +/// Shift Instructions +def SLL : LogicR_shift_imm<0x00, "sll", shl>; +def SRL : LogicR_shift_imm<0x02, "srl", srl>; +def SRA : LogicR_shift_imm<0x03, "sra", sra>; +def SLLV : LogicR_shift_reg<0x04, "sllv", shl>; +def SRLV : LogicR_shift_reg<0x06, "srlv", srl>; +def SRAV : LogicR_shift_reg<0x07, "srav", sra>; + +/// Load and Store Instructions +def LB : LoadM<0x20, "lb", sextloadi8>; +def LBu : LoadM<0x24, "lbu", zextloadi8>; +def LH : LoadM<0x21, "lh", sextloadi16>; +def LHu : LoadM<0x25, "lhu", zextloadi16>; +def LW : LoadM<0x23, "lw", load>; +def SB : StoreM<0x28, "sb", truncstorei8>; +def SH : StoreM<0x29, "sh", truncstorei16>; +def SW : StoreM<0x2b, "sw", store>; + +/// Jump and Branch Instructions +def J : JumpFJ<0x02, "j">; +def JR : JumpFR<0x00, 0x08, "jr">; +def JAL : JumpLink<0x03, "jal">; +def JALR : JumpLinkReg<0x00, 0x09, "jalr">; +def BEQ : CBranch<0x04, "beq", seteq>; +def BNE : CBranch<0x05, "bne", setne>; + +let rt=1 in + def BGEZ : CBranchZero<0x01, "bgez", setge>; + +let rt=0 in { + def BGTZ : CBranchZero<0x07, "bgtz", setgt>; + def BLEZ : CBranchZero<0x07, "blez", setle>; + def BLTZ : CBranchZero<0x01, "bltz", setlt>; +} + +def BGEZAL : BranchLink<"bgezal">; +def BLTZAL : BranchLink<"bltzal">; + +let isReturn=1, isTerminator=1, hasDelaySlot=1, + isBarrier=1, hasCtrlDep=1, rs=0, rt=0, shamt=0 in + def RET : FR <0x00, 0x02, (outs), (ins CPURegs:$target), + "jr\t$target", [(MipsRet CPURegs:$target)], IIBranch>; + +/// Multiply and Divide Instructions. +let Defs = [HI, LO] in { + def MULT : MulDiv<0x18, "mult", IIImul>; + def MULTu : MulDiv<0x19, "multu", IIImul>; + def DIV : MulDiv<0x1a, "div", IIIdiv>; + def DIVu : MulDiv<0x1b, "divu", IIIdiv>; +} + +let Defs = [HI] in + def MTHI : MoveToLOHI<0x11, "mthi">; +let Defs = [LO] in + def MTLO : MoveToLOHI<0x13, "mtlo">; + +let Uses = [HI] in + def MFHI : MoveFromLOHI<0x10, "mfhi">; +let Uses = [LO] in + def MFLO : MoveFromLOHI<0x12, "mflo">; + +/// Sign Ext In Register Instructions. +let Predicates = [HasSEInReg] in { + let shamt = 0x10, rs = 0 in + def SEB : SignExtInReg<0x21, "seb", i8>; + + let shamt = 0x18, rs = 0 in + def SEH : SignExtInReg<0x20, "seh", i16>; +} + +/// Count Leading +let Predicates = [HasBitCount] in { + let rt = 0 in + def CLZ : CountLeading<0b010110, "clz", ctlz>; +} + +/// Byte Swap +let Predicates = [HasSwap] in { + let shamt = 0x3, rs = 0 in + def WSBW : ByteSwap<0x20, "wsbw">; +} + +/// Conditional Move +def MIPS_CMOV_ZERO : PatLeaf<(i32 0)>; +def MIPS_CMOV_NZERO : PatLeaf<(i32 1)>; + +let Predicates = [HasCondMov], isTwoAddress = 1 in { + def MOVN : CondMov<0x0a, "movn", MIPS_CMOV_NZERO>; + def MOVZ : CondMov<0x0b, "movz", MIPS_CMOV_ZERO>; +} + +/// No operation +let addr=0 in + def NOP : FJ<0, (outs), (ins), "nop", [], IIAlu>; + +// FrameIndexes are legalized when they are operands from load/store +// instructions. The same not happens for stack address copies, so an +// add op with mem ComplexPattern is used and the stack address copy +// can be matched. It's similar to Sparc LEA_ADDRi +def LEA_ADDiu : EffectiveAddress<"addiu\t$dst, ${addr:stackloc}">; + +// MADD*/MSUB* are not part of MipsI either. +//def MADD : MArithR<0x00, "madd">; +//def MADDU : MArithR<0x01, "maddu">; +//def MSUB : MArithR<0x04, "msub">; +//def MSUBU : MArithR<0x05, "msubu">; + +// MUL is a assembly macro in the current used ISAs. In recent ISA's +// it is a real instruction. +//def MUL : ArithR<0x1c, 0x02, "mul", mul, IIImul>; + +//===----------------------------------------------------------------------===// +// Arbitrary patterns that map to one or more instructions +//===----------------------------------------------------------------------===// + +// Small immediates +def : Pat<(i32 immSExt16:$in), + (ADDiu ZERO, imm:$in)>; +def : Pat<(i32 immZExt16:$in), + (ORi ZERO, imm:$in)>; + +// Arbitrary immediates +def : Pat<(i32 imm:$imm), + (ORi (LUi (HI16 imm:$imm)), (LO16 imm:$imm))>; + +// Carry patterns +def : Pat<(subc CPURegs:$lhs, CPURegs:$rhs), + (SUBu CPURegs:$lhs, CPURegs:$rhs)>; +def : Pat<(addc CPURegs:$lhs, CPURegs:$rhs), + (ADDu CPURegs:$lhs, CPURegs:$rhs)>; +def : Pat<(addc CPURegs:$src, imm:$imm), + (ADDiu CPURegs:$src, imm:$imm)>; + +// Call +def : Pat<(MipsJmpLink (i32 tglobaladdr:$dst)), + (JAL tglobaladdr:$dst)>; +def : Pat<(MipsJmpLink (i32 texternalsym:$dst)), + (JAL texternalsym:$dst)>; +def : Pat<(MipsJmpLink CPURegs:$dst), + (JALR CPURegs:$dst)>; + +// hi/lo relocs +def : Pat<(MipsHi tglobaladdr:$in), (LUi tglobaladdr:$in)>; +def : Pat<(add CPURegs:$hi, (MipsLo tglobaladdr:$lo)), + (ADDiu CPURegs:$hi, tglobaladdr:$lo)>; + +def : Pat<(MipsHi tjumptable:$in), (LUi tjumptable:$in)>; +def : Pat<(add CPURegs:$hi, (MipsLo tjumptable:$lo)), + (ADDiu CPURegs:$hi, tjumptable:$lo)>; + +def : Pat<(MipsHi tconstpool:$in), (LUi tconstpool:$in)>; +def : Pat<(add CPURegs:$hi, (MipsLo tconstpool:$lo)), + (ADDiu CPURegs:$hi, tconstpool:$lo)>; + +// gp_rel relocs +def : Pat<(add CPURegs:$gp, (MipsGPRel tglobaladdr:$in)), + (ADDiu CPURegs:$gp, tglobaladdr:$in)>; +def : Pat<(add CPURegs:$gp, (MipsGPRel tconstpool:$in)), + (ADDiu CPURegs:$gp, tconstpool:$in)>; + +// Mips does not have "not", so we expand our way +def : Pat<(not CPURegs:$in), + (NOR CPURegs:$in, ZERO)>; + +// extended load and stores +def : Pat<(extloadi1 addr:$src), (LBu addr:$src)>; +def : Pat<(extloadi8 addr:$src), (LBu addr:$src)>; +def : Pat<(extloadi16 addr:$src), (LHu addr:$src)>; + +// peepholes +def : Pat<(store (i32 0), addr:$dst), (SW ZERO, addr:$dst)>; + +// brcond patterns +def : Pat<(brcond (setne CPURegs:$lhs, 0), bb:$dst), + (BNE CPURegs:$lhs, ZERO, bb:$dst)>; +def : Pat<(brcond (seteq CPURegs:$lhs, 0), bb:$dst), + (BEQ CPURegs:$lhs, ZERO, bb:$dst)>; + +def : Pat<(brcond (setge CPURegs:$lhs, CPURegs:$rhs), bb:$dst), + (BEQ (SLT CPURegs:$lhs, CPURegs:$rhs), ZERO, bb:$dst)>; +def : Pat<(brcond (setuge CPURegs:$lhs, CPURegs:$rhs), bb:$dst), + (BEQ (SLTu CPURegs:$lhs, CPURegs:$rhs), ZERO, bb:$dst)>; +def : Pat<(brcond (setge CPURegs:$lhs, immSExt16:$rhs), bb:$dst), + (BEQ (SLTi CPURegs:$lhs, immSExt16:$rhs), ZERO, bb:$dst)>; +def : Pat<(brcond (setuge CPURegs:$lhs, immSExt16:$rhs), bb:$dst), + (BEQ (SLTiu CPURegs:$lhs, immSExt16:$rhs), ZERO, bb:$dst)>; + +def : Pat<(brcond (setle CPURegs:$lhs, CPURegs:$rhs), bb:$dst), + (BEQ (SLT CPURegs:$rhs, CPURegs:$lhs), ZERO, bb:$dst)>; +def : Pat<(brcond (setule CPURegs:$lhs, CPURegs:$rhs), bb:$dst), + (BEQ (SLTu CPURegs:$rhs, CPURegs:$lhs), ZERO, bb:$dst)>; + +def : Pat<(brcond CPURegs:$cond, bb:$dst), + (BNE CPURegs:$cond, ZERO, bb:$dst)>; + +// select patterns +def : Pat<(select (setge CPURegs:$lhs, CPURegs:$rhs), CPURegs:$T, CPURegs:$F), + (MOVZ CPURegs:$F, CPURegs:$T, (SLT CPURegs:$lhs, CPURegs:$rhs))>; +def : Pat<(select (setuge CPURegs:$lhs, CPURegs:$rhs), CPURegs:$T, CPURegs:$F), + (MOVZ CPURegs:$F, CPURegs:$T, (SLTu CPURegs:$lhs, CPURegs:$rhs))>; +def : Pat<(select (setge CPURegs:$lhs, immSExt16:$rhs), CPURegs:$T, CPURegs:$F), + (MOVZ CPURegs:$F, CPURegs:$T, (SLTi CPURegs:$lhs, immSExt16:$rhs))>; +def : Pat<(select (setuge CPURegs:$lh, immSExt16:$rh), CPURegs:$T, CPURegs:$F), + (MOVZ CPURegs:$F, CPURegs:$T, (SLTiu CPURegs:$lh, immSExt16:$rh))>; + +def : Pat<(select (setle CPURegs:$lhs, CPURegs:$rhs), CPURegs:$T, CPURegs:$F), + (MOVZ CPURegs:$F, CPURegs:$T, (SLT CPURegs:$rhs, CPURegs:$lhs))>; +def : Pat<(select (setule CPURegs:$lhs, CPURegs:$rhs), CPURegs:$T, CPURegs:$F), + (MOVZ CPURegs:$F, CPURegs:$T, (SLTu CPURegs:$rhs, CPURegs:$lhs))>; + +def : Pat<(select (seteq CPURegs:$lhs, CPURegs:$rhs), CPURegs:$T, CPURegs:$F), + (MOVZ CPURegs:$F, CPURegs:$T, (XOR CPURegs:$lhs, CPURegs:$rhs))>; +def : Pat<(select (setne CPURegs:$lhs, CPURegs:$rhs), CPURegs:$T, CPURegs:$F), + (MOVN CPURegs:$F, CPURegs:$T, (XOR CPURegs:$lhs, CPURegs:$rhs))>; + +def : Pat<(select CPURegs:$cond, CPURegs:$T, CPURegs:$F), + (MOVN CPURegs:$F, CPURegs:$T, CPURegs:$cond)>; + +// setcc patterns +def : Pat<(seteq CPURegs:$lhs, CPURegs:$rhs), + (SLTu (XOR CPURegs:$lhs, CPURegs:$rhs), 1)>; +def : Pat<(setne CPURegs:$lhs, CPURegs:$rhs), + (SLTu ZERO, (XOR CPURegs:$lhs, CPURegs:$rhs))>; + +def : Pat<(setle CPURegs:$lhs, CPURegs:$rhs), + (XORi (SLT CPURegs:$rhs, CPURegs:$lhs), 1)>; +def : Pat<(setule CPURegs:$lhs, CPURegs:$rhs), + (XORi (SLTu CPURegs:$rhs, CPURegs:$lhs), 1)>; + +def : Pat<(setgt CPURegs:$lhs, CPURegs:$rhs), + (SLT CPURegs:$rhs, CPURegs:$lhs)>; +def : Pat<(setugt CPURegs:$lhs, CPURegs:$rhs), + (SLTu CPURegs:$rhs, CPURegs:$lhs)>; + +def : Pat<(setge CPURegs:$lhs, CPURegs:$rhs), + (XORi (SLT CPURegs:$lhs, CPURegs:$rhs), 1)>; +def : Pat<(setuge CPURegs:$lhs, CPURegs:$rhs), + (XORi (SLTu CPURegs:$lhs, CPURegs:$rhs), 1)>; + +def : Pat<(setge CPURegs:$lhs, immSExt16:$rhs), + (XORi (SLTi CPURegs:$lhs, immSExt16:$rhs), 1)>; +def : Pat<(setuge CPURegs:$lhs, immSExt16:$rhs), + (XORi (SLTiu CPURegs:$lhs, immSExt16:$rhs), 1)>; + +//===----------------------------------------------------------------------===// +// Floating Point Support +//===----------------------------------------------------------------------===// + +include "MipsInstrFPU.td" + diff --git a/lib/Target/Mips/MipsMachineFunction.h b/lib/Target/Mips/MipsMachineFunction.h new file mode 100644 index 000000000000..b95394ec81ce --- /dev/null +++ b/lib/Target/Mips/MipsMachineFunction.h @@ -0,0 +1,131 @@ +//===-- MipsMachineFunctionInfo.h - Private data used for Mips ----*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the Mips specific subclass of MachineFunctionInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef MIPS_MACHINE_FUNCTION_INFO_H +#define MIPS_MACHINE_FUNCTION_INFO_H + +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/VectorExtras.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFrameInfo.h" + +namespace llvm { + +/// MipsFunctionInfo - This class is derived from MachineFunction private +/// Mips target-specific information for each MachineFunction. +class MipsFunctionInfo : public MachineFunctionInfo { + +private: + /// Holds for each function where on the stack the Frame Pointer must be + /// saved. This is used on Prologue and Epilogue to emit FP save/restore + int FPStackOffset; + + /// Holds for each function where on the stack the Return Address must be + /// saved. This is used on Prologue and Epilogue to emit RA save/restore + int RAStackOffset; + + /// At each function entry, two special bitmask directives must be emitted + /// to help debugging, for CPU and FPU callee saved registers. Both need + /// the negative offset from the final stack size and its higher registers + /// location on the stack. + int CPUTopSavedRegOff; + int FPUTopSavedRegOff; + + /// MipsFIHolder - Holds a FrameIndex and it's Stack Pointer Offset + struct MipsFIHolder { + + int FI; + int SPOffset; + + MipsFIHolder(int FrameIndex, int StackPointerOffset) + : FI(FrameIndex), SPOffset(StackPointerOffset) {} + }; + + /// When PIC is used the GP must be saved on the stack on the function + /// prologue and must be reloaded from this stack location after every + /// call. A reference to its stack location and frame index must be kept + /// to be used on emitPrologue and processFunctionBeforeFrameFinalized. + MipsFIHolder GPHolder; + + /// On LowerFORMAL_ARGUMENTS the stack size is unknown, so the Stack + /// Pointer Offset calculation of "not in register arguments" must be + /// postponed to emitPrologue. + SmallVector FnLoadArgs; + bool HasLoadArgs; + + // When VarArgs, we must write registers back to caller stack, preserving + // on register arguments. Since the stack size is unknown on + // LowerFORMAL_ARGUMENTS, the Stack Pointer Offset calculation must be + // postponed to emitPrologue. + SmallVector FnStoreVarArgs; + bool HasStoreVarArgs; + + /// SRetReturnReg - Some subtargets require that sret lowering includes + /// returning the value of the returned struct in a register. This field + /// holds the virtual register into which the sret argument is passed. + unsigned SRetReturnReg; + +public: + MipsFunctionInfo(MachineFunction& MF) + : FPStackOffset(0), RAStackOffset(0), CPUTopSavedRegOff(0), + FPUTopSavedRegOff(0), GPHolder(-1,-1), HasLoadArgs(false), + HasStoreVarArgs(false), SRetReturnReg(0) + {} + + int getFPStackOffset() const { return FPStackOffset; } + void setFPStackOffset(int Off) { FPStackOffset = Off; } + + int getRAStackOffset() const { return RAStackOffset; } + void setRAStackOffset(int Off) { RAStackOffset = Off; } + + int getCPUTopSavedRegOff() const { return CPUTopSavedRegOff; } + void setCPUTopSavedRegOff(int Off) { CPUTopSavedRegOff = Off; } + + int getFPUTopSavedRegOff() const { return FPUTopSavedRegOff; } + void setFPUTopSavedRegOff(int Off) { FPUTopSavedRegOff = Off; } + + int getGPStackOffset() const { return GPHolder.SPOffset; } + int getGPFI() const { return GPHolder.FI; } + void setGPStackOffset(int Off) { GPHolder.SPOffset = Off; } + void setGPFI(int FI) { GPHolder.FI = FI; } + + bool hasLoadArgs() const { return HasLoadArgs; } + bool hasStoreVarArgs() const { return HasStoreVarArgs; } + + void recordLoadArgsFI(int FI, int SPOffset) { + if (!HasLoadArgs) HasLoadArgs=true; + FnLoadArgs.push_back(MipsFIHolder(FI, SPOffset)); + } + void recordStoreVarArgsFI(int FI, int SPOffset) { + if (!HasStoreVarArgs) HasStoreVarArgs=true; + FnStoreVarArgs.push_back(MipsFIHolder(FI, SPOffset)); + } + + void adjustLoadArgsFI(MachineFrameInfo *MFI) const { + if (!hasLoadArgs()) return; + for (unsigned i = 0, e = FnLoadArgs.size(); i != e; ++i) + MFI->setObjectOffset( FnLoadArgs[i].FI, FnLoadArgs[i].SPOffset ); + } + void adjustStoreVarArgsFI(MachineFrameInfo *MFI) const { + if (!hasStoreVarArgs()) return; + for (unsigned i = 0, e = FnStoreVarArgs.size(); i != e; ++i) + MFI->setObjectOffset( FnStoreVarArgs[i].FI, FnStoreVarArgs[i].SPOffset ); + } + + unsigned getSRetReturnReg() const { return SRetReturnReg; } + void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; } +}; + +} // end of namespace llvm + +#endif // MIPS_MACHINE_FUNCTION_INFO_H diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp new file mode 100644 index 000000000000..579d4db6422f --- /dev/null +++ b/lib/Target/Mips/MipsRegisterInfo.cpp @@ -0,0 +1,535 @@ +//===- MipsRegisterInfo.cpp - MIPS Register Information -== -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the MIPS implementation of the TargetRegisterInfo class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "mips-reg-info" + +#include "Mips.h" +#include "MipsSubtarget.h" +#include "MipsRegisterInfo.h" +#include "MipsMachineFunction.h" +#include "llvm/Constants.h" +#include "llvm/Type.h" +#include "llvm/Function.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineLocation.h" +#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/STLExtras.h" + +using namespace llvm; + +MipsRegisterInfo::MipsRegisterInfo(const MipsSubtarget &ST, + const TargetInstrInfo &tii) + : MipsGenRegisterInfo(Mips::ADJCALLSTACKDOWN, Mips::ADJCALLSTACKUP), + Subtarget(ST), TII(tii) {} + +/// getRegisterNumbering - Given the enum value for some register, e.g. +/// Mips::RA, return the number that it corresponds to (e.g. 31). +unsigned MipsRegisterInfo:: +getRegisterNumbering(unsigned RegEnum) +{ + switch (RegEnum) { + case Mips::ZERO : case Mips::F0 : case Mips::D0 : return 0; + case Mips::AT : case Mips::F1 : return 1; + case Mips::V0 : case Mips::F2 : case Mips::D1 : return 2; + case Mips::V1 : case Mips::F3 : return 3; + case Mips::A0 : case Mips::F4 : case Mips::D2 : return 4; + case Mips::A1 : case Mips::F5 : return 5; + case Mips::A2 : case Mips::F6 : case Mips::D3 : return 6; + case Mips::A3 : case Mips::F7 : return 7; + case Mips::T0 : case Mips::F8 : case Mips::D4 : return 8; + case Mips::T1 : case Mips::F9 : return 9; + case Mips::T2 : case Mips::F10: case Mips::D5: return 10; + case Mips::T3 : case Mips::F11: return 11; + case Mips::T4 : case Mips::F12: case Mips::D6: return 12; + case Mips::T5 : case Mips::F13: return 13; + case Mips::T6 : case Mips::F14: case Mips::D7: return 14; + case Mips::T7 : case Mips::F15: return 15; + case Mips::T8 : case Mips::F16: case Mips::D8: return 16; + case Mips::T9 : case Mips::F17: return 17; + case Mips::S0 : case Mips::F18: case Mips::D9: return 18; + case Mips::S1 : case Mips::F19: return 19; + case Mips::S2 : case Mips::F20: case Mips::D10: return 20; + case Mips::S3 : case Mips::F21: return 21; + case Mips::S4 : case Mips::F22: case Mips::D11: return 22; + case Mips::S5 : case Mips::F23: return 23; + case Mips::S6 : case Mips::F24: case Mips::D12: return 24; + case Mips::S7 : case Mips::F25: return 25; + case Mips::K0 : case Mips::F26: case Mips::D13: return 26; + case Mips::K1 : case Mips::F27: return 27; + case Mips::GP : case Mips::F28: case Mips::D14: return 28; + case Mips::SP : case Mips::F29: return 29; + case Mips::FP : case Mips::F30: case Mips::D15: return 30; + case Mips::RA : case Mips::F31: return 31; + default: assert(0 && "Unknown register number!"); + } + return 0; // Not reached +} + +unsigned MipsRegisterInfo::getPICCallReg(void) { return Mips::T9; } + +//===----------------------------------------------------------------------===// +// Callee Saved Registers methods +//===----------------------------------------------------------------------===// + +/// Mips Callee Saved Registers +const unsigned* MipsRegisterInfo:: +getCalleeSavedRegs(const MachineFunction *MF) const +{ + // Mips callee-save register range is $16-$23, $f20-$f30 + static const unsigned SingleFloatOnlyCalleeSavedRegs[] = { + Mips::S0, Mips::S1, Mips::S2, Mips::S3, + Mips::S4, Mips::S5, Mips::S6, Mips::S7, + Mips::F20, Mips::F21, Mips::F22, Mips::F23, Mips::F24, Mips::F25, + Mips::F26, Mips::F27, Mips::F28, Mips::F29, Mips::F30, 0 + }; + + static const unsigned BitMode32CalleeSavedRegs[] = { + Mips::S0, Mips::S1, Mips::S2, Mips::S3, + Mips::S4, Mips::S5, Mips::S6, Mips::S7, + Mips::F20, Mips::F22, Mips::F24, Mips::F26, Mips::F28, Mips::F30, + Mips::D10, Mips::D11, Mips::D12, Mips::D13, Mips::D14, Mips::D15,0 + }; + + if (Subtarget.isSingleFloat()) + return SingleFloatOnlyCalleeSavedRegs; + else + return BitMode32CalleeSavedRegs; +} + +/// Mips Callee Saved Register Classes +const TargetRegisterClass* const* +MipsRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const +{ + static const TargetRegisterClass * const SingleFloatOnlyCalleeSavedRC[] = { + &Mips::CPURegsRegClass, &Mips::CPURegsRegClass, &Mips::CPURegsRegClass, + &Mips::CPURegsRegClass, &Mips::CPURegsRegClass, &Mips::CPURegsRegClass, + &Mips::CPURegsRegClass, &Mips::CPURegsRegClass, + &Mips::FGR32RegClass, &Mips::FGR32RegClass, &Mips::FGR32RegClass, + &Mips::FGR32RegClass, &Mips::FGR32RegClass, &Mips::FGR32RegClass, + &Mips::FGR32RegClass, &Mips::FGR32RegClass, &Mips::FGR32RegClass, + &Mips::FGR32RegClass, &Mips::FGR32RegClass, 0 + }; + + static const TargetRegisterClass * const BitMode32CalleeSavedRC[] = { + &Mips::CPURegsRegClass, &Mips::CPURegsRegClass, &Mips::CPURegsRegClass, + &Mips::CPURegsRegClass, &Mips::CPURegsRegClass, &Mips::CPURegsRegClass, + &Mips::CPURegsRegClass, &Mips::CPURegsRegClass, + &Mips::FGR32RegClass, &Mips::FGR32RegClass, &Mips::FGR32RegClass, + &Mips::FGR32RegClass, &Mips::FGR32RegClass, &Mips::FGR32RegClass, + &Mips::AFGR64RegClass, &Mips::AFGR64RegClass, &Mips::AFGR64RegClass, + &Mips::AFGR64RegClass, &Mips::AFGR64RegClass, &Mips::AFGR64RegClass, 0 + }; + + if (Subtarget.isSingleFloat()) + return SingleFloatOnlyCalleeSavedRC; + else + return BitMode32CalleeSavedRC; +} + +BitVector MipsRegisterInfo:: +getReservedRegs(const MachineFunction &MF) const +{ + BitVector Reserved(getNumRegs()); + Reserved.set(Mips::ZERO); + Reserved.set(Mips::AT); + Reserved.set(Mips::K0); + Reserved.set(Mips::K1); + Reserved.set(Mips::GP); + Reserved.set(Mips::SP); + Reserved.set(Mips::FP); + Reserved.set(Mips::RA); + + // SRV4 requires that odd register can't be used. + if (!Subtarget.isSingleFloat()) + for (unsigned FReg=(Mips::F0)+1; FReg < Mips::F30; FReg+=2) + Reserved.set(FReg); + + return Reserved; +} + +//===----------------------------------------------------------------------===// +// +// Stack Frame Processing methods +// +----------------------------+ +// +// The stack is allocated decrementing the stack pointer on +// the first instruction of a function prologue. Once decremented, +// all stack referencesare are done thought a positive offset +// from the stack/frame pointer, so the stack is considering +// to grow up! Otherwise terrible hacks would have to be made +// to get this stack ABI compliant :) +// +// The stack frame required by the ABI (after call): +// Offset +// +// 0 ---------- +// 4 Args to pass +// . saved $GP (used in PIC) +// . Alloca allocations +// . Local Area +// . CPU "Callee Saved" Registers +// . saved FP +// . saved RA +// . FPU "Callee Saved" Registers +// StackSize ----------- +// +// Offset - offset from sp after stack allocation on function prologue +// +// The sp is the stack pointer subtracted/added from the stack size +// at the Prologue/Epilogue +// +// References to the previous stack (to obtain arguments) are done +// with offsets that exceeds the stack size: (stacksize+(4*(num_arg-1)) +// +// Examples: +// - reference to the actual stack frame +// for any local area var there is smt like : FI >= 0, StackOffset: 4 +// sw REGX, 4(SP) +// +// - reference to previous stack frame +// suppose there's a load to the 5th arguments : FI < 0, StackOffset: 16. +// The emitted instruction will be something like: +// lw REGX, 16+StackSize(SP) +// +// Since the total stack size is unknown on LowerFORMAL_ARGUMENTS, all +// stack references (ObjectOffset) created to reference the function +// arguments, are negative numbers. This way, on eliminateFrameIndex it's +// possible to detect those references and the offsets are adjusted to +// their real location. +// +//===----------------------------------------------------------------------===// + +void MipsRegisterInfo::adjustMipsStackFrame(MachineFunction &MF) const +{ + MachineFrameInfo *MFI = MF.getFrameInfo(); + MipsFunctionInfo *MipsFI = MF.getInfo(); + const std::vector &CSI = MFI->getCalleeSavedInfo(); + unsigned StackAlign = MF.getTarget().getFrameInfo()->getStackAlignment(); + + // Min and Max CSI FrameIndex. + int MinCSFI = -1, MaxCSFI = -1; + + // See the description at MipsMachineFunction.h + int TopCPUSavedRegOff = -1, TopFPUSavedRegOff = -1; + + // Replace the dummy '0' SPOffset by the negative offsets, as explained on + // LowerFORMAL_ARGUMENTS. Leaving '0' for while is necessary to avoid + // the approach done by calculateFrameObjectOffsets to the stack frame. + MipsFI->adjustLoadArgsFI(MFI); + MipsFI->adjustStoreVarArgsFI(MFI); + + // It happens that the default stack frame allocation order does not directly + // map to the convention used for mips. So we must fix it. We move the callee + // save register slots after the local variables area, as described in the + // stack frame above. + unsigned CalleeSavedAreaSize = 0; + if (!CSI.empty()) { + MinCSFI = CSI[0].getFrameIdx(); + MaxCSFI = CSI[CSI.size()-1].getFrameIdx(); + } + for (unsigned i = 0, e = CSI.size(); i != e; ++i) + CalleeSavedAreaSize += MFI->getObjectAlignment(CSI[i].getFrameIdx()); + + // Adjust local variables. They should come on the stack right + // after the arguments. + int LastOffsetFI = -1; + for (int i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) { + if (i >= MinCSFI && i <= MaxCSFI) + continue; + if (MFI->isDeadObjectIndex(i)) + continue; + unsigned Offset = MFI->getObjectOffset(i) - CalleeSavedAreaSize; + if (LastOffsetFI == -1) + LastOffsetFI = i; + if (Offset > MFI->getObjectOffset(LastOffsetFI)) + LastOffsetFI = i; + MFI->setObjectOffset(i, Offset); + } + + // Adjust CPU Callee Saved Registers Area. Registers RA and FP must + // be saved in this CPU Area there is the need. This whole Area must + // be aligned to the default Stack Alignment requirements. + unsigned StackOffset = 0; + unsigned RegSize = Subtarget.isGP32bit() ? 4 : 8; + + if (LastOffsetFI >= 0) + StackOffset = MFI->getObjectOffset(LastOffsetFI)+ + MFI->getObjectSize(LastOffsetFI); + StackOffset = ((StackOffset+StackAlign-1)/StackAlign*StackAlign); + + for (unsigned i = 0, e = CSI.size(); i != e ; ++i) { + if (CSI[i].getRegClass() != Mips::CPURegsRegisterClass) + break; + MFI->setObjectOffset(CSI[i].getFrameIdx(), StackOffset); + TopCPUSavedRegOff = StackOffset; + StackOffset += MFI->getObjectAlignment(CSI[i].getFrameIdx()); + } + + if (hasFP(MF)) { + MFI->setObjectOffset(MFI->CreateStackObject(RegSize, RegSize), + StackOffset); + MipsFI->setFPStackOffset(StackOffset); + TopCPUSavedRegOff = StackOffset; + StackOffset += RegSize; + } + + if (MFI->hasCalls()) { + MFI->setObjectOffset(MFI->CreateStackObject(RegSize, RegSize), + StackOffset); + MipsFI->setRAStackOffset(StackOffset); + TopCPUSavedRegOff = StackOffset; + StackOffset += RegSize; + } + StackOffset = ((StackOffset+StackAlign-1)/StackAlign*StackAlign); + + // Adjust FPU Callee Saved Registers Area. This Area must be + // aligned to the default Stack Alignment requirements. + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + if (CSI[i].getRegClass() == Mips::CPURegsRegisterClass) + continue; + MFI->setObjectOffset(CSI[i].getFrameIdx(), StackOffset); + TopFPUSavedRegOff = StackOffset; + StackOffset += MFI->getObjectAlignment(CSI[i].getFrameIdx()); + } + StackOffset = ((StackOffset+StackAlign-1)/StackAlign*StackAlign); + + // Update frame info + MFI->setStackSize(StackOffset); + + // Recalculate the final tops offset. The final values must be '0' + // if there isn't a callee saved register for CPU or FPU, otherwise + // a negative offset is needed. + if (TopCPUSavedRegOff >= 0) + MipsFI->setCPUTopSavedRegOff(TopCPUSavedRegOff-StackOffset); + + if (TopFPUSavedRegOff >= 0) + MipsFI->setFPUTopSavedRegOff(TopFPUSavedRegOff-StackOffset); +} + +// hasFP - Return true if the specified function should have a dedicated frame +// pointer register. This is true if the function has variable sized allocas or +// if frame pointer elimination is disabled. +bool MipsRegisterInfo:: +hasFP(const MachineFunction &MF) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + return NoFramePointerElim || MFI->hasVarSizedObjects(); +} + +// This function eliminate ADJCALLSTACKDOWN, +// ADJCALLSTACKUP pseudo instructions +void MipsRegisterInfo:: +eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions. + MBB.erase(I); +} + +// FrameIndex represent objects inside a abstract stack. +// We must replace FrameIndex with an stack/frame pointer +// direct reference. +void MipsRegisterInfo:: +eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, + RegScavenger *RS) const +{ + MachineInstr &MI = *II; + MachineFunction &MF = *MI.getParent()->getParent(); + + unsigned i = 0; + while (!MI.getOperand(i).isFI()) { + ++i; + assert(i < MI.getNumOperands() && + "Instr doesn't have FrameIndex operand!"); + } + + #ifndef NDEBUG + DOUT << "\nFunction : " << MF.getFunction()->getName() << "\n"; + DOUT << "<--------->\n"; + MI.print(DOUT); + #endif + + int FrameIndex = MI.getOperand(i).getIndex(); + int stackSize = MF.getFrameInfo()->getStackSize(); + int spOffset = MF.getFrameInfo()->getObjectOffset(FrameIndex); + + #ifndef NDEBUG + DOUT << "FrameIndex : " << FrameIndex << "\n"; + DOUT << "spOffset : " << spOffset << "\n"; + DOUT << "stackSize : " << stackSize << "\n"; + #endif + + // as explained on LowerFORMAL_ARGUMENTS, detect negative offsets + // and adjust SPOffsets considering the final stack size. + int Offset = ((spOffset < 0) ? (stackSize + (-(spOffset+4))) : (spOffset)); + Offset += MI.getOperand(i-1).getImm(); + + #ifndef NDEBUG + DOUT << "Offset : " << Offset << "\n"; + DOUT << "<--------->\n"; + #endif + + MI.getOperand(i-1).ChangeToImmediate(Offset); + MI.getOperand(i).ChangeToRegister(getFrameRegister(MF), false); +} + +void MipsRegisterInfo:: +emitPrologue(MachineFunction &MF) const +{ + MachineBasicBlock &MBB = MF.front(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + MipsFunctionInfo *MipsFI = MF.getInfo(); + MachineBasicBlock::iterator MBBI = MBB.begin(); + DebugLoc dl = (MBBI != MBB.end() ? + MBBI->getDebugLoc() : DebugLoc::getUnknownLoc()); + bool isPIC = (MF.getTarget().getRelocationModel() == Reloc::PIC_); + + // Get the right frame order for Mips. + adjustMipsStackFrame(MF); + + // Get the number of bytes to allocate from the FrameInfo. + unsigned StackSize = MFI->getStackSize(); + + // No need to allocate space on the stack. + if (StackSize == 0 && !MFI->hasCalls()) return; + + int FPOffset = MipsFI->getFPStackOffset(); + int RAOffset = MipsFI->getRAStackOffset(); + + BuildMI(MBB, MBBI, dl, TII.get(Mips::NOREORDER)); + + // TODO: check need from GP here. + if (isPIC && Subtarget.isABI_O32()) + BuildMI(MBB, MBBI, dl, TII.get(Mips::CPLOAD)).addReg(getPICCallReg()); + BuildMI(MBB, MBBI, dl, TII.get(Mips::NOMACRO)); + + // Adjust stack : addi sp, sp, (-imm) + BuildMI(MBB, MBBI, dl, TII.get(Mips::ADDiu), Mips::SP) + .addReg(Mips::SP).addImm(-StackSize); + + // Save the return address only if the function isnt a leaf one. + // sw $ra, stack_loc($sp) + if (MFI->hasCalls()) { + BuildMI(MBB, MBBI, dl, TII.get(Mips::SW)) + .addReg(Mips::RA).addImm(RAOffset).addReg(Mips::SP); + } + + // if framepointer enabled, save it and set it + // to point to the stack pointer + if (hasFP(MF)) { + // sw $fp,stack_loc($sp) + BuildMI(MBB, MBBI, dl, TII.get(Mips::SW)) + .addReg(Mips::FP).addImm(FPOffset).addReg(Mips::SP); + + // move $fp, $sp + BuildMI(MBB, MBBI, dl, TII.get(Mips::ADDu), Mips::FP) + .addReg(Mips::SP).addReg(Mips::ZERO); + } + + // PIC speficic function prologue + if ((isPIC) && (MFI->hasCalls())) { + BuildMI(MBB, MBBI, dl, TII.get(Mips::CPRESTORE)) + .addImm(MipsFI->getGPStackOffset()); + } +} + +void MipsRegisterInfo:: +emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const +{ + MachineBasicBlock::iterator MBBI = prior(MBB.end()); + MachineFrameInfo *MFI = MF.getFrameInfo(); + MipsFunctionInfo *MipsFI = MF.getInfo(); + DebugLoc dl = MBBI->getDebugLoc(); + + // Get the number of bytes from FrameInfo + int NumBytes = (int) MFI->getStackSize(); + + // Get the FI's where RA and FP are saved. + int FPOffset = MipsFI->getFPStackOffset(); + int RAOffset = MipsFI->getRAStackOffset(); + + // if framepointer enabled, restore it and restore the + // stack pointer + if (hasFP(MF)) { + // move $sp, $fp + BuildMI(MBB, MBBI, dl, TII.get(Mips::ADDu), Mips::SP) + .addReg(Mips::FP).addReg(Mips::ZERO); + + // lw $fp,stack_loc($sp) + BuildMI(MBB, MBBI, dl, TII.get(Mips::LW), Mips::FP) + .addImm(FPOffset).addReg(Mips::SP); + } + + // Restore the return address only if the function isnt a leaf one. + // lw $ra, stack_loc($sp) + if (MFI->hasCalls()) { + BuildMI(MBB, MBBI, dl, TII.get(Mips::LW), Mips::RA) + .addImm(RAOffset).addReg(Mips::SP); + } + + // adjust stack : insert addi sp, sp, (imm) + if (NumBytes) { + BuildMI(MBB, MBBI, dl, TII.get(Mips::ADDiu), Mips::SP) + .addReg(Mips::SP).addImm(NumBytes); + } +} + + +void MipsRegisterInfo:: +processFunctionBeforeFrameFinalized(MachineFunction &MF) const { + // Set the SPOffset on the FI where GP must be saved/loaded. + MachineFrameInfo *MFI = MF.getFrameInfo(); + bool isPIC = (MF.getTarget().getRelocationModel() == Reloc::PIC_); + if (MFI->hasCalls() && isPIC) { + MipsFunctionInfo *MipsFI = MF.getInfo(); + MFI->setObjectOffset(MipsFI->getGPFI(), MipsFI->getGPStackOffset()); + } +} + +unsigned MipsRegisterInfo:: +getRARegister() const { + return Mips::RA; +} + +unsigned MipsRegisterInfo:: +getFrameRegister(MachineFunction &MF) const { + return hasFP(MF) ? Mips::FP : Mips::SP; +} + +unsigned MipsRegisterInfo:: +getEHExceptionRegister() const { + assert(0 && "What is the exception register"); + return 0; +} + +unsigned MipsRegisterInfo:: +getEHHandlerRegister() const { + assert(0 && "What is the exception handler register"); + return 0; +} + +int MipsRegisterInfo:: +getDwarfRegNum(unsigned RegNum, bool isEH) const { + assert(0 && "What is the dwarf register number"); + return -1; +} + +#include "MipsGenRegisterInfo.inc" + diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h new file mode 100644 index 000000000000..808e995b4ed3 --- /dev/null +++ b/lib/Target/Mips/MipsRegisterInfo.h @@ -0,0 +1,78 @@ +//===- MipsRegisterInfo.h - Mips Register Information Impl ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Mips implementation of the TargetRegisterInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef MIPSREGISTERINFO_H +#define MIPSREGISTERINFO_H + +#include "Mips.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "MipsGenRegisterInfo.h.inc" + +namespace llvm { +class MipsSubtarget; +class TargetInstrInfo; +class Type; + +struct MipsRegisterInfo : public MipsGenRegisterInfo { + const MipsSubtarget &Subtarget; + const TargetInstrInfo &TII; + + MipsRegisterInfo(const MipsSubtarget &Subtarget, const TargetInstrInfo &tii); + + /// getRegisterNumbering - Given the enum value for some register, e.g. + /// Mips::RA, return the number that it corresponds to (e.g. 31). + static unsigned getRegisterNumbering(unsigned RegEnum); + + /// Get PIC indirect call register + static unsigned getPICCallReg(void); + + /// Adjust the Mips stack frame. + void adjustMipsStackFrame(MachineFunction &MF) const; + + /// Code Generation virtual methods... + const unsigned *getCalleeSavedRegs(const MachineFunction* MF = 0) const; + + const TargetRegisterClass* const* + getCalleeSavedRegClasses(const MachineFunction* MF = 0) const; + + BitVector getReservedRegs(const MachineFunction &MF) const; + + bool hasFP(const MachineFunction &MF) const; + + void eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const; + + /// Stack Frame Processing Methods + void eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, RegScavenger *RS = NULL) const; + + void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; + + void emitPrologue(MachineFunction &MF) const; + void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; + + /// Debug information queries. + unsigned getRARegister() const; + unsigned getFrameRegister(MachineFunction &MF) const; + + /// Exception handling queries. + unsigned getEHExceptionRegister() const; + unsigned getEHHandlerRegister() const; + + int getDwarfRegNum(unsigned RegNum, bool isEH) const; +}; + +} // end namespace llvm + +#endif diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td new file mode 100644 index 000000000000..bbb275c66242 --- /dev/null +++ b/lib/Target/Mips/MipsRegisterInfo.td @@ -0,0 +1,252 @@ +//===- MipsRegisterInfo.td - Mips Register defs -----------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Declarations that describe the MIPS register file +//===----------------------------------------------------------------------===// + +// We have banks of 32 registers each. +class MipsReg : Register { + field bits<5> Num; + let Namespace = "Mips"; +} + +// Mips CPU Registers +class MipsGPRReg num, string n> : MipsReg { + let Num = num; +} + +// Mips 32-bit FPU Registers +class FPR num, string n> : MipsReg { + let Num = num; +} + +// Mips 64-bit (aliased) FPU Registers +class AFPR num, string n, list aliases> : MipsReg { + let Num = num; + let Aliases = aliases; +} + +//===----------------------------------------------------------------------===// +// Registers +//===----------------------------------------------------------------------===// + +let Namespace = "Mips" in { + + // General Purpose Registers + def ZERO : MipsGPRReg< 0, "ZERO">, DwarfRegNum<[0]>; + def AT : MipsGPRReg< 1, "AT">, DwarfRegNum<[1]>; + def V0 : MipsGPRReg< 2, "2">, DwarfRegNum<[2]>; + def V1 : MipsGPRReg< 3, "3">, DwarfRegNum<[3]>; + def A0 : MipsGPRReg< 4, "4">, DwarfRegNum<[5]>; + def A1 : MipsGPRReg< 5, "5">, DwarfRegNum<[5]>; + def A2 : MipsGPRReg< 6, "6">, DwarfRegNum<[6]>; + def A3 : MipsGPRReg< 7, "7">, DwarfRegNum<[7]>; + def T0 : MipsGPRReg< 8, "8">, DwarfRegNum<[8]>; + def T1 : MipsGPRReg< 9, "9">, DwarfRegNum<[9]>; + def T2 : MipsGPRReg< 10, "10">, DwarfRegNum<[10]>; + def T3 : MipsGPRReg< 11, "11">, DwarfRegNum<[11]>; + def T4 : MipsGPRReg< 12, "12">, DwarfRegNum<[12]>; + def T5 : MipsGPRReg< 13, "13">, DwarfRegNum<[13]>; + def T6 : MipsGPRReg< 14, "14">, DwarfRegNum<[14]>; + def T7 : MipsGPRReg< 15, "15">, DwarfRegNum<[15]>; + def S0 : MipsGPRReg< 16, "16">, DwarfRegNum<[16]>; + def S1 : MipsGPRReg< 17, "17">, DwarfRegNum<[17]>; + def S2 : MipsGPRReg< 18, "18">, DwarfRegNum<[18]>; + def S3 : MipsGPRReg< 19, "19">, DwarfRegNum<[19]>; + def S4 : MipsGPRReg< 20, "20">, DwarfRegNum<[20]>; + def S5 : MipsGPRReg< 21, "21">, DwarfRegNum<[21]>; + def S6 : MipsGPRReg< 22, "22">, DwarfRegNum<[22]>; + def S7 : MipsGPRReg< 23, "23">, DwarfRegNum<[23]>; + def T8 : MipsGPRReg< 24, "24">, DwarfRegNum<[24]>; + def T9 : MipsGPRReg< 25, "25">, DwarfRegNum<[25]>; + def K0 : MipsGPRReg< 26, "26">, DwarfRegNum<[26]>; + def K1 : MipsGPRReg< 27, "27">, DwarfRegNum<[27]>; + def GP : MipsGPRReg< 28, "GP">, DwarfRegNum<[28]>; + def SP : MipsGPRReg< 29, "SP">, DwarfRegNum<[29]>; + def FP : MipsGPRReg< 30, "FP">, DwarfRegNum<[30]>; + def RA : MipsGPRReg< 31, "RA">, DwarfRegNum<[31]>; + + /// Mips Single point precision FPU Registers + def F0 : FPR< 0, "F0">, DwarfRegNum<[32]>; + def F1 : FPR< 1, "F1">, DwarfRegNum<[33]>; + def F2 : FPR< 2, "F2">, DwarfRegNum<[34]>; + def F3 : FPR< 3, "F3">, DwarfRegNum<[35]>; + def F4 : FPR< 4, "F4">, DwarfRegNum<[36]>; + def F5 : FPR< 5, "F5">, DwarfRegNum<[37]>; + def F6 : FPR< 6, "F6">, DwarfRegNum<[38]>; + def F7 : FPR< 7, "F7">, DwarfRegNum<[39]>; + def F8 : FPR< 8, "F8">, DwarfRegNum<[40]>; + def F9 : FPR< 9, "F9">, DwarfRegNum<[41]>; + def F10 : FPR<10, "F10">, DwarfRegNum<[42]>; + def F11 : FPR<11, "F11">, DwarfRegNum<[43]>; + def F12 : FPR<12, "F12">, DwarfRegNum<[44]>; + def F13 : FPR<13, "F13">, DwarfRegNum<[45]>; + def F14 : FPR<14, "F14">, DwarfRegNum<[46]>; + def F15 : FPR<15, "F15">, DwarfRegNum<[47]>; + def F16 : FPR<16, "F16">, DwarfRegNum<[48]>; + def F17 : FPR<17, "F17">, DwarfRegNum<[49]>; + def F18 : FPR<18, "F18">, DwarfRegNum<[50]>; + def F19 : FPR<19, "F19">, DwarfRegNum<[51]>; + def F20 : FPR<20, "F20">, DwarfRegNum<[52]>; + def F21 : FPR<21, "F21">, DwarfRegNum<[53]>; + def F22 : FPR<22, "F22">, DwarfRegNum<[54]>; + def F23 : FPR<23, "F23">, DwarfRegNum<[55]>; + def F24 : FPR<24, "F24">, DwarfRegNum<[56]>; + def F25 : FPR<25, "F25">, DwarfRegNum<[57]>; + def F26 : FPR<26, "F26">, DwarfRegNum<[58]>; + def F27 : FPR<27, "F27">, DwarfRegNum<[59]>; + def F28 : FPR<28, "F28">, DwarfRegNum<[60]>; + def F29 : FPR<29, "F29">, DwarfRegNum<[61]>; + def F30 : FPR<30, "F30">, DwarfRegNum<[62]>; + def F31 : FPR<31, "F31">, DwarfRegNum<[63]>; + + /// Mips Double point precision FPU Registers (aliased + /// with the single precision to hold 64 bit values) + def D0 : AFPR< 0, "F0", [F0, F1]>, DwarfRegNum<[32]>; + def D1 : AFPR< 2, "F2", [F2, F3]>, DwarfRegNum<[34]>; + def D2 : AFPR< 4, "F4", [F4, F5]>, DwarfRegNum<[36]>; + def D3 : AFPR< 6, "F6", [F6, F7]>, DwarfRegNum<[38]>; + def D4 : AFPR< 8, "F8", [F8, F9]>, DwarfRegNum<[40]>; + def D5 : AFPR<10, "F10", [F10, F11]>, DwarfRegNum<[42]>; + def D6 : AFPR<12, "F12", [F12, F13]>, DwarfRegNum<[44]>; + def D7 : AFPR<14, "F14", [F14, F15]>, DwarfRegNum<[46]>; + def D8 : AFPR<16, "F16", [F16, F17]>, DwarfRegNum<[48]>; + def D9 : AFPR<18, "F18", [F18, F19]>, DwarfRegNum<[50]>; + def D10 : AFPR<20, "F20", [F20, F21]>, DwarfRegNum<[52]>; + def D11 : AFPR<22, "F22", [F22, F23]>, DwarfRegNum<[54]>; + def D12 : AFPR<24, "F24", [F24, F25]>, DwarfRegNum<[56]>; + def D13 : AFPR<26, "F26", [F26, F27]>, DwarfRegNum<[58]>; + def D14 : AFPR<28, "F28", [F28, F29]>, DwarfRegNum<[60]>; + def D15 : AFPR<30, "F30", [F30, F31]>, DwarfRegNum<[62]>; + + // Hi/Lo registers + def HI : Register<"hi">, DwarfRegNum<[64]>; + def LO : Register<"lo">, DwarfRegNum<[65]>; + + // Status flags register + def FCR31 : Register<"31">; +} + +//===----------------------------------------------------------------------===// +// Register Classes +//===----------------------------------------------------------------------===// + +def CPURegs : RegisterClass<"Mips", [i32], 32, + // Return Values and Arguments + [V0, V1, A0, A1, A2, A3, + // Not preserved across procedure calls + T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, + // Callee save + S0, S1, S2, S3, S4, S5, S6, S7, + // Reserved + ZERO, AT, K0, K1, GP, SP, FP, RA]> +{ + let MethodProtos = [{ + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + CPURegsClass::iterator + CPURegsClass::allocation_order_end(const MachineFunction &MF) const { + // The last 8 registers on the list above are reserved + return end()-8; + } + }]; +} + +// 64bit fp: +// * FGR64 - 32 64-bit registers +// * AFGR64 - 16 32-bit even registers (32-bit FP Mode) +// +// 32bit fp: +// * FGR32 - 16 32-bit even registers +// * FGR32 - 32 32-bit registers (single float only mode) +def FGR32 : RegisterClass<"Mips", [f32], 32, + // Return Values and Arguments + [F0, F1, F2, F3, F12, F13, F14, F15, + // Not preserved across procedure calls + F4, F5, F6, F7, F8, F9, F10, F11, F16, F17, F18, F19, + // Callee save + F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F30, + // Reserved + F31]> +{ + let MethodProtos = [{ + iterator allocation_order_begin(const MachineFunction &MF) const; + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + + static const unsigned MIPS_FGR32[] = { + Mips::F0, Mips::F1, Mips::F2, Mips::F3, Mips::F12, Mips::F13, + Mips::F14, Mips::F15, Mips::F4, Mips::F5, Mips::F6, Mips::F7, + Mips::F8, Mips::F9, Mips::F10, Mips::F11, Mips::F16, Mips::F17, + Mips::F18, Mips::F19, Mips::F20, Mips::F21, Mips::F22, Mips::F23, + Mips::F24, Mips::F25, Mips::F26, Mips::F27, Mips::F28, Mips::F29, + Mips::F30 + }; + + static const unsigned MIPS_SVR4_FGR32[] = { + Mips::F0, Mips::F2, Mips::F12, Mips::F14, Mips::F4, + Mips::F6, Mips::F8, Mips::F10, Mips::F16, Mips::F18, + Mips::F20, Mips::F22, Mips::F24, Mips::F26, Mips::F28, Mips::F30, + }; + + FGR32Class::iterator + FGR32Class::allocation_order_begin(const MachineFunction &MF) const { + const TargetMachine &TM = MF.getTarget(); + const MipsSubtarget &Subtarget = TM.getSubtarget(); + + if (Subtarget.isSingleFloat()) + return MIPS_FGR32; + else + return MIPS_SVR4_FGR32; + } + + FGR32Class::iterator + FGR32Class::allocation_order_end(const MachineFunction &MF) const { + const TargetMachine &TM = MF.getTarget(); + const MipsSubtarget &Subtarget = TM.getSubtarget(); + + if (Subtarget.isSingleFloat()) + return MIPS_FGR32 + (sizeof(MIPS_FGR32) / sizeof(unsigned)); + else + return MIPS_SVR4_FGR32 + (sizeof(MIPS_SVR4_FGR32) / sizeof(unsigned)); + } + }]; +} + +def AFGR64 : RegisterClass<"Mips", [f64], 64, + // Return Values and Arguments + [D0, D1, D6, D7, + // Not preserved across procedure calls + D2, D3, D4, D5, D8, D9, + // Callee save + D10, D11, D12, D13, D14, + // Reserved + D15]> +{ + let MethodProtos = [{ + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + AFGR64Class::iterator + AFGR64Class::allocation_order_end(const MachineFunction &MF) const { + // The last register on the list above is reserved + return end()-1; + } + }]; +} + +// Condition Register for floating point operations +def CCR : RegisterClass<"Mips", [i32], 32, [FCR31]>; + +// Hi/Lo Registers +def HILO : RegisterClass<"Mips", [i32], 32, [HI, LO]>; + diff --git a/lib/Target/Mips/MipsSchedule.td b/lib/Target/Mips/MipsSchedule.td new file mode 100644 index 000000000000..0c3ca57361cd --- /dev/null +++ b/lib/Target/Mips/MipsSchedule.td @@ -0,0 +1,63 @@ +//===- MipsSchedule.td - Mips Scheduling Definitions ------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Functional units across Mips chips sets. Based on GCC/Mips backend files. +//===----------------------------------------------------------------------===// +def ALU : FuncUnit; +def IMULDIV : FuncUnit; + +//===----------------------------------------------------------------------===// +// Instruction Itinerary classes used for Mips +//===----------------------------------------------------------------------===// +def IIAlu : InstrItinClass; +def IILoad : InstrItinClass; +def IIStore : InstrItinClass; +def IIXfer : InstrItinClass; +def IIBranch : InstrItinClass; +def IIHiLo : InstrItinClass; +def IIImul : InstrItinClass; +def IIIdiv : InstrItinClass; +def IIFcvt : InstrItinClass; +def IIFmove : InstrItinClass; +def IIFcmp : InstrItinClass; +def IIFadd : InstrItinClass; +def IIFmulSingle : InstrItinClass; +def IIFmulDouble : InstrItinClass; +def IIFdivSingle : InstrItinClass; +def IIFdivDouble : InstrItinClass; +def IIFsqrtSingle : InstrItinClass; +def IIFsqrtDouble : InstrItinClass; +def IIFrecipFsqrtStep : InstrItinClass; +def IIPseudo : InstrItinClass; + +//===----------------------------------------------------------------------===// +// Mips Generic instruction itineraries. +//===----------------------------------------------------------------------===// +def MipsGenericItineraries : ProcessorItineraries<[ + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]> +]>; diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp new file mode 100644 index 000000000000..4245f274f8f0 --- /dev/null +++ b/lib/Target/Mips/MipsSubtarget.cpp @@ -0,0 +1,77 @@ +//===- MipsSubtarget.cpp - Mips Subtarget Information -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Mips specific subclass of TargetSubtarget. +// +//===----------------------------------------------------------------------===// + +#include "MipsSubtarget.h" +#include "Mips.h" +#include "MipsGenSubtarget.inc" +#include "llvm/Module.h" +#include "llvm/Support/CommandLine.h" +using namespace llvm; + +static cl::opt +NotABICall("disable-mips-abicall", cl::Hidden, + cl::desc("Disable code for SVR4-style dynamic objects")); +static cl::opt +AbsoluteCall("enable-mips-absolute-call", cl::Hidden, + cl::desc("Enable absolute call within abicall")); +static cl::opt +SSThreshold("mips-ssection-threshold", cl::Hidden, + cl::desc("Small data and bss section threshold size (default=8)"), + cl::init(8)); + +MipsSubtarget::MipsSubtarget(const TargetMachine &TM, const Module &M, + const std::string &FS, bool little) : + MipsArchVersion(Mips1), MipsABI(O32), IsLittle(little), IsSingleFloat(false), + IsFP64bit(false), IsGP64bit(false), HasVFPU(false), HasABICall(true), + HasAbsoluteCall(false), IsLinux(true), HasSEInReg(false), HasCondMov(false), + HasMulDivAdd(false), HasMinMax(false), HasSwap(false), HasBitCount(false) +{ + std::string CPU = "mips1"; + MipsArchVersion = Mips1; + + // Parse features string. + ParseSubtargetFeatures(FS, CPU); + const std::string& TT = M.getTargetTriple(); + + // Small section size threshold + SSectionThreshold = SSThreshold; + + // Is the target system Linux ? + if (TT.find("linux") == std::string::npos) + IsLinux = false; + + // When only the target triple is specified and is + // a allegrex target, set the features. We also match + // big and little endian allegrex cores (dont really + // know if a big one exists) + if (TT.find("mipsallegrex") != std::string::npos || + TT.find("psp") != std::string::npos) { + MipsABI = EABI; + IsSingleFloat = true; + MipsArchVersion = Mips2; + HasVFPU = true; // Enables Allegrex Vector FPU (not supported yet) + HasSEInReg = true; + HasBitCount = true; + HasSwap = true; + HasCondMov = true; + } + + // Abicall is the default for O32 ABI, but is disabled within EABI and in + // static code. + if (NotABICall || isABI_EABI() || (TM.getRelocationModel() == Reloc::Static)) + HasABICall = false; + + // TODO: disable when handling 64 bit symbols in the future. + if (HasABICall && AbsoluteCall) + HasAbsoluteCall = true; +} diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h new file mode 100644 index 000000000000..61c37c1d377e --- /dev/null +++ b/lib/Target/Mips/MipsSubtarget.h @@ -0,0 +1,139 @@ +//=====-- MipsSubtarget.h - Define Subtarget for the Mips -----*- C++ -*--====// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the Mips specific subclass of TargetSubtarget. +// +//===----------------------------------------------------------------------===// + +#ifndef MIPSSUBTARGET_H +#define MIPSSUBTARGET_H + +#include "llvm/Target/TargetSubtarget.h" +#include "llvm/Target/TargetMachine.h" + +#include + +namespace llvm { +class Module; + +class MipsSubtarget : public TargetSubtarget { + +public: + enum MipsABIEnum { + O32, O64, N32, N64, EABI + }; + +protected: + + enum MipsArchEnum { + Mips1, Mips2, Mips3, Mips4, Mips32, Mips32r2, Mips64, Mips64r2 + }; + + // Mips architecture version + MipsArchEnum MipsArchVersion; + + // Mips supported ABIs + MipsABIEnum MipsABI; + + // IsLittle - The target is Little Endian + bool IsLittle; + + // IsSingleFloat - The target only supports single precision float + // point operations. This enable the target to use all 32 32-bit + // floating point registers instead of only using even ones. + bool IsSingleFloat; + + // IsFP64bit - The target processor has 64-bit floating point registers. + bool IsFP64bit; + + // IsFP64bit - General-purpose registers are 64 bits wide + bool IsGP64bit; + + // HasVFPU - Processor has a vector floating point unit. + bool HasVFPU; + + // IsABICall - Enable SRV4 code for SVR4-style dynamic objects + bool HasABICall; + + // HasAbsoluteCall - Enable code that is not fully position-independent. + // Only works with HasABICall enabled. + bool HasAbsoluteCall; + + // isLinux - Target system is Linux. Is false we consider ELFOS for now. + bool IsLinux; + + // Put global and static items less than or equal to SSectionThreshold + // bytes into the small data or bss section. The default is 8. + unsigned SSectionThreshold; + + /// Features related to the presence of specific instructions. + + // HasSEInReg - SEB and SEH (signext in register) instructions. + bool HasSEInReg; + + // HasCondMov - Conditional mov (MOVZ, MOVN) instructions. + bool HasCondMov; + + // HasMulDivAdd - Multiply add and sub (MADD, MADDu, MSUB, MSUBu) + // instructions. + bool HasMulDivAdd; + + // HasMinMax - MIN and MAX instructions. + bool HasMinMax; + + // HasSwap - Byte and half swap instructions. + bool HasSwap; + + // HasBitCount - Count leading '1' and '0' bits. + bool HasBitCount; + + InstrItineraryData InstrItins; + +public: + + /// Only O32 and EABI supported right now. + bool isABI_EABI() const { return MipsABI == EABI; } + bool isABI_O32() const { return MipsABI == O32; } + unsigned getTargetABI() const { return MipsABI; } + + /// This constructor initializes the data members to match that + /// of the specified module. + MipsSubtarget(const TargetMachine &TM, const Module &M, + const std::string &FS, bool little); + + /// ParseSubtargetFeatures - Parses features string setting specified + /// subtarget options. Definition of function is auto generated by tblgen. + std::string ParseSubtargetFeatures(const std::string &FS, + const std::string &CPU); + + bool isMips1() const { return MipsArchVersion == Mips1; } + + bool isLittle() const { return IsLittle; } + bool isFP64bit() const { return IsFP64bit; }; + bool isGP64bit() const { return IsGP64bit; }; + bool isGP32bit() const { return !IsGP64bit; }; + bool isSingleFloat() const { return IsSingleFloat; }; + bool isNotSingleFloat() const { return !IsSingleFloat; }; + bool hasVFPU() const { return HasVFPU; }; + bool hasABICall() const { return HasABICall; }; + bool hasAbsoluteCall() const { return HasAbsoluteCall; }; + bool isLinux() const { return IsLinux; }; + unsigned getSSectionThreshold() const { return SSectionThreshold; } + + /// Features related to the presence of specific instructions. + bool hasSEInReg() const { return HasSEInReg; }; + bool hasCondMov() const { return HasCondMov; }; + bool hasMulDivAdd() const { return HasMulDivAdd; }; + bool hasMinMax() const { return HasMinMax; }; + bool hasSwap() const { return HasSwap; }; + bool hasBitCount() const { return HasBitCount; }; +}; +} // End llvm namespace + +#endif diff --git a/lib/Target/Mips/MipsTargetAsmInfo.cpp b/lib/Target/Mips/MipsTargetAsmInfo.cpp new file mode 100644 index 000000000000..c197b0c2981c --- /dev/null +++ b/lib/Target/Mips/MipsTargetAsmInfo.cpp @@ -0,0 +1,98 @@ +//===-- MipsTargetAsmInfo.cpp - Mips asm properties -------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declarations of the MipsTargetAsmInfo properties. +// +//===----------------------------------------------------------------------===// + +#include "MipsTargetAsmInfo.h" +#include "MipsTargetMachine.h" +#include "llvm/GlobalVariable.h" + +using namespace llvm; + +MipsTargetAsmInfo::MipsTargetAsmInfo(const MipsTargetMachine &TM): + ELFTargetAsmInfo(TM) { + + Subtarget = &TM.getSubtarget(); + + AlignmentIsInBytes = false; + COMMDirectiveTakesAlignment = true; + Data16bitsDirective = "\t.half\t"; + Data32bitsDirective = "\t.word\t"; + Data64bitsDirective = NULL; + PrivateGlobalPrefix = "$"; + JumpTableDataSection = "\t.rdata"; + CommentString = "#"; + ZeroDirective = "\t.space\t"; + BSSSection = "\t.section\t.bss"; + CStringSection = ".rodata.str"; + + if (!Subtarget->hasABICall()) { + JumpTableDirective = "\t.word\t"; + SmallDataSection = getNamedSection("\t.sdata", SectionFlags::Writeable); + SmallBSSSection = getNamedSection("\t.sbss", + SectionFlags::Writeable | + SectionFlags::BSS); + } else + JumpTableDirective = "\t.gpword\t"; + +} + +unsigned MipsTargetAsmInfo:: +SectionFlagsForGlobal(const GlobalValue *GV, const char* Name) const { + unsigned Flags = ELFTargetAsmInfo::SectionFlagsForGlobal(GV, Name); + // Mask out Small Section flag bit, Mips doesnt support 's' section symbol + // for its small sections. + return (Flags & (~SectionFlags::Small)); +} + +SectionKind::Kind MipsTargetAsmInfo:: +SectionKindForGlobal(const GlobalValue *GV) const { + SectionKind::Kind K = ELFTargetAsmInfo::SectionKindForGlobal(GV); + + if (Subtarget->hasABICall()) + return K; + + if (K != SectionKind::Data && K != SectionKind::BSS && + K != SectionKind::RODataMergeConst) + return K; + + if (isa(GV)) { + const TargetData *TD = TM.getTargetData(); + unsigned Size = TD->getTypeAllocSize(GV->getType()->getElementType()); + unsigned Threshold = Subtarget->getSSectionThreshold(); + + if (Size > 0 && Size <= Threshold) { + if (K == SectionKind::BSS) + return SectionKind::SmallBSS; + else + return SectionKind::SmallData; + } + } + + return K; +} + +const Section* MipsTargetAsmInfo:: +SelectSectionForGlobal(const GlobalValue *GV) const { + SectionKind::Kind K = SectionKindForGlobal(GV); + const GlobalVariable *GVA = dyn_cast(GV); + + if (GVA && (!GVA->isWeakForLinker())) + switch (K) { + case SectionKind::SmallData: + return getSmallDataSection(); + case SectionKind::SmallBSS: + return getSmallBSSSection(); + default: break; + } + + return ELFTargetAsmInfo::SelectSectionForGlobal(GV); +} diff --git a/lib/Target/Mips/MipsTargetAsmInfo.h b/lib/Target/Mips/MipsTargetAsmInfo.h new file mode 100644 index 000000000000..2b5a739e7219 --- /dev/null +++ b/lib/Target/Mips/MipsTargetAsmInfo.h @@ -0,0 +1,51 @@ +//=====-- MipsTargetAsmInfo.h - Mips asm properties -----------*- C++ -*--====// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the MipsTargetAsmInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef MIPSTARGETASMINFO_H +#define MIPSTARGETASMINFO_H + +#include "MipsSubtarget.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/ELFTargetAsmInfo.h" + +namespace llvm { + + // Forward declaration. + class GlobalValue; + class MipsTargetMachine; + + struct MipsTargetAsmInfo : public ELFTargetAsmInfo { + explicit MipsTargetAsmInfo(const MipsTargetMachine &TM); + + /// SectionKindForGlobal - This hook allows the target to select proper + /// section kind used for global emission. + virtual SectionKind::Kind + SectionKindForGlobal(const GlobalValue *GV) const; + + /// SectionFlagsForGlobal - This hook allows the target to select proper + /// section flags either for given global or for section. + virtual unsigned + SectionFlagsForGlobal(const GlobalValue *GV = NULL, + const char* name = NULL) const; + + virtual const Section* SelectSectionForGlobal(const GlobalValue *GV) const; + + private: + const MipsSubtarget *Subtarget; + }; + +} // namespace llvm + +#endif diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp new file mode 100644 index 000000000000..ef524e3ecd72 --- /dev/null +++ b/lib/Target/Mips/MipsTargetMachine.cpp @@ -0,0 +1,133 @@ +//===-- MipsTargetMachine.cpp - Define TargetMachine for Mips -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implements the info about Mips target spec. +// +//===----------------------------------------------------------------------===// + +#include "Mips.h" +#include "MipsTargetAsmInfo.h" +#include "MipsTargetMachine.h" +#include "llvm/Module.h" +#include "llvm/PassManager.h" +#include "llvm/Target/TargetMachineRegistry.h" +using namespace llvm; + +/// MipsTargetMachineModule - Note that this is used on hosts that +/// cannot link in a library unless there are references into the +/// library. In particular, it seems that it is not possible to get +/// things to work on Win32 without this. Though it is unused, do not +/// remove it. +extern "C" int MipsTargetMachineModule; +int MipsTargetMachineModule = 0; + +// Register the target. +static RegisterTarget X("mips", "Mips"); +static RegisterTarget Y("mipsel", "Mipsel"); + +const TargetAsmInfo *MipsTargetMachine:: +createTargetAsmInfo() const +{ + return new MipsTargetAsmInfo(*this); +} + +// DataLayout --> Big-endian, 32-bit pointer/ABI/alignment +// The stack is always 8 byte aligned +// On function prologue, the stack is created by decrementing +// its pointer. Once decremented, all references are done with positive +// offset from the stack/frame pointer, using StackGrowsUp enables +// an easier handling. +// Using CodeModel::Large enables different CALL behavior. +MipsTargetMachine:: +MipsTargetMachine(const Module &M, const std::string &FS, bool isLittle=false): + Subtarget(*this, M, FS, isLittle), + DataLayout(isLittle ? std::string("e-p:32:32:32-i8:8:32-i16:16:32") : + std::string("E-p:32:32:32-i8:8:32-i16:16:32")), + InstrInfo(*this), + FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0), + TLInfo(*this) +{ + // Abicall enables PIC by default + if (Subtarget.hasABICall()) + setRelocationModel(Reloc::PIC_); + + // TODO: create an option to enable long calls, like -mlong-calls, + // that would be our CodeModel::Large. It must not work with Abicall. + if (getCodeModel() == CodeModel::Default) + setCodeModel(CodeModel::Small); +} + +MipselTargetMachine:: +MipselTargetMachine(const Module &M, const std::string &FS) : + MipsTargetMachine(M, FS, true) {} + +// return 0 and must specify -march to gen MIPS code. +unsigned MipsTargetMachine:: +getModuleMatchQuality(const Module &M) +{ + // We strongly match "mips*-*". + std::string TT = M.getTargetTriple(); + if (TT.size() >= 5 && std::string(TT.begin(), TT.begin()+5) == "mips-") + return 20; + + if (TT.size() >= 13 && std::string(TT.begin(), + TT.begin()+13) == "mipsallegrex-") + return 20; + + return 0; +} + +// return 0 and must specify -march to gen MIPSEL code. +unsigned MipselTargetMachine:: +getModuleMatchQuality(const Module &M) +{ + // We strongly match "mips*el-*". + std::string TT = M.getTargetTriple(); + if (TT.size() >= 7 && std::string(TT.begin(), TT.begin()+7) == "mipsel-") + return 20; + + if (TT.size() >= 15 && std::string(TT.begin(), + TT.begin()+15) == "mipsallegrexel-") + return 20; + + if (TT.size() == 3 && std::string(TT.begin(), TT.begin()+3) == "psp") + return 20; + + return 0; +} + +// Install an instruction selector pass using +// the ISelDag to gen Mips code. +bool MipsTargetMachine:: +addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel) +{ + PM.add(createMipsISelDag(*this)); + return false; +} + +// Implemented by targets that want to run passes immediately before +// machine code is emitted. return true if -print-machineinstrs should +// print out the code after the passes. +bool MipsTargetMachine:: +addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel) +{ + PM.add(createMipsDelaySlotFillerPass(*this)); + return true; +} + +// Implements the AssemblyEmitter for the target. Must return +// true if AssemblyEmitter is supported +bool MipsTargetMachine:: +addAssemblyEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, + bool Verbose, raw_ostream &Out) +{ + // Output assembly language. + PM.add(createMipsCodePrinterPass(Out, *this, OptLevel, Verbose)); + return false; +} diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h new file mode 100644 index 000000000000..a9e1df27ae7f --- /dev/null +++ b/lib/Target/Mips/MipsTargetMachine.h @@ -0,0 +1,80 @@ +//===-- MipsTargetMachine.h - Define TargetMachine for Mips -00--*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the Mips specific subclass of TargetMachine. +// +//===----------------------------------------------------------------------===// + +#ifndef MIPSTARGETMACHINE_H +#define MIPSTARGETMACHINE_H + +#include "MipsSubtarget.h" +#include "MipsInstrInfo.h" +#include "MipsISelLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetFrameInfo.h" + +namespace llvm { + class raw_ostream; + + class MipsTargetMachine : public LLVMTargetMachine { + MipsSubtarget Subtarget; + const TargetData DataLayout; // Calculates type size & alignment + MipsInstrInfo InstrInfo; + TargetFrameInfo FrameInfo; + MipsTargetLowering TLInfo; + + protected: + virtual const TargetAsmInfo *createTargetAsmInfo() const; + + public: + MipsTargetMachine(const Module &M, const std::string &FS, bool isLittle); + + virtual const MipsInstrInfo *getInstrInfo() const + { return &InstrInfo; } + virtual const TargetFrameInfo *getFrameInfo() const + { return &FrameInfo; } + virtual const MipsSubtarget *getSubtargetImpl() const + { return &Subtarget; } + virtual const TargetData *getTargetData() const + { return &DataLayout;} + + virtual const MipsRegisterInfo *getRegisterInfo() const { + return &InstrInfo.getRegisterInfo(); + } + + virtual MipsTargetLowering *getTargetLowering() const { + return const_cast(&TLInfo); + } + + static unsigned getModuleMatchQuality(const Module &M); + + // Pass Pipeline Configuration + virtual bool addInstSelector(PassManagerBase &PM, + CodeGenOpt::Level OptLevel); + virtual bool addPreEmitPass(PassManagerBase &PM, + CodeGenOpt::Level OptLevel); + virtual bool addAssemblyEmitter(PassManagerBase &PM, + CodeGenOpt::Level OptLevel, + bool Verbose, raw_ostream &Out); + }; + +/// MipselTargetMachine - Mipsel target machine. +/// +class MipselTargetMachine : public MipsTargetMachine { +public: + MipselTargetMachine(const Module &M, const std::string &FS); + + static unsigned getModuleMatchQuality(const Module &M); +}; + +} // End llvm namespace + +#endif diff --git a/lib/Target/PIC16/CMakeLists.txt b/lib/Target/PIC16/CMakeLists.txt new file mode 100644 index 000000000000..00d737af4c2e --- /dev/null +++ b/lib/Target/PIC16/CMakeLists.txt @@ -0,0 +1,24 @@ +set(LLVM_TARGET_DEFINITIONS PIC16.td) + +tablegen(PIC16GenRegisterInfo.h.inc -gen-register-desc-header) +tablegen(PIC16GenRegisterNames.inc -gen-register-enums) +tablegen(PIC16GenRegisterInfo.inc -gen-register-desc) +tablegen(PIC16GenInstrNames.inc -gen-instr-enums) +tablegen(PIC16GenInstrInfo.inc -gen-instr-desc) +tablegen(PIC16GenAsmWriter.inc -gen-asm-writer) +tablegen(PIC16GenDAGISel.inc -gen-dag-isel) +tablegen(PIC16GenCallingConv.inc -gen-callingconv) +tablegen(PIC16GenSubtarget.inc -gen-subtarget) + +add_llvm_target(PIC16 + PIC16AsmPrinter.cpp + PIC16DebugInfo.cpp + PIC16InstrInfo.cpp + PIC16ISelDAGToDAG.cpp + PIC16ISelLowering.cpp + PIC16MemSelOpt.cpp + PIC16RegisterInfo.cpp + PIC16Subtarget.cpp + PIC16TargetAsmInfo.cpp + PIC16TargetMachine.cpp + ) diff --git a/lib/Target/PIC16/Makefile b/lib/Target/PIC16/Makefile new file mode 100644 index 000000000000..c429324cc2d1 --- /dev/null +++ b/lib/Target/PIC16/Makefile @@ -0,0 +1,21 @@ +##===- lib/Target/PIC16/Makefile ---------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../.. +LIBRARYNAME = LLVMPIC16 +TARGET = PIC16 + +# Make sure that tblgen is run, first thing. +BUILT_SOURCES = PIC16GenRegisterInfo.h.inc PIC16GenRegisterNames.inc \ + PIC16GenRegisterInfo.inc PIC16GenInstrNames.inc \ + PIC16GenInstrInfo.inc PIC16GenAsmWriter.inc \ + PIC16GenDAGISel.inc PIC16GenCallingConv.inc \ + PIC16GenSubtarget.inc + +include $(LEVEL)/Makefile.common + diff --git a/lib/Target/PIC16/PIC16.h b/lib/Target/PIC16/PIC16.h new file mode 100644 index 000000000000..40bed2f50e10 --- /dev/null +++ b/lib/Target/PIC16/PIC16.h @@ -0,0 +1,345 @@ +//===-- PIC16.h - Top-level interface for PIC16 representation --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the entry points for global functions defined in +// the LLVM PIC16 back-end. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_PIC16_H +#define LLVM_TARGET_PIC16_H + +#include "llvm/Target/TargetMachine.h" +#include +#include +#include +#include +#include + +namespace llvm { + class PIC16TargetMachine; + class FunctionPass; + class MachineCodeEmitter; + class raw_ostream; + +namespace PIC16CC { + enum CondCodes { + EQ, + NE, + LT, + LE, + GT, + GE, + ULT, + UGT, + ULE, + UGE + }; +} + // A Central class to manage all ABI naming conventions. + // PAN - [P]ic16 [A]BI [N]ames + class PAN { + public: + // Map the name of the symbol to its section name. + // Current ABI: + // ----------------------------------------------------- + // ALL Names are prefixed with the symobl '@'. + // ------------------------------------------------------ + // Global variables do not have any '.' in their names. + // These are maily function names and global variable names. + // Example - @foo, @i + // ------------------------------------------------------- + // Functions and auto variables. + // Names are mangled as .. + // Where is '@' and is any one of + // the following + // .auto. - an automatic var of a function. + // .temp. - temproray data of a function. + // .ret. - return value label for a function. + // .frame. - Frame label for a function where retval, args + // and temps are stored. + // .args. - Label used to pass arguments to a direct call. + // Example - Function name: @foo + // Its frame: @foo.frame. + // Its retval: @foo.ret. + // Its local vars: @foo.auto.a + // Its temp data: @foo.temp. + // Its arg passing: @foo.args. + //---------------------------------------------- + // Libcall - compiler generated libcall names must start with .lib. + // This id will be used to emit extern decls for libcalls. + // Example - libcall name: @.lib.sra.i8 + // To pass args: @.lib.sra.i8.args. + // To return val: @.lib.sra.i8.ret. + //---------------------------------------------- + // SECTION Names + // uninitialized globals - @udata..# + // initialized globals - @idata..# + // Function frame - @.frame_section. + // Function autos - @.autos_section. + // Declarations - @section.0 + //---------------------------------------------------------- + + // Tags used to mangle different names. + enum TAGS { + PREFIX_SYMBOL, + GLOBAL, + STATIC_LOCAL, + AUTOS_LABEL, + FRAME_LABEL, + RET_LABEL, + ARGS_LABEL, + TEMPS_LABEL, + + LIBCALL, + + FRAME_SECTION, + AUTOS_SECTION, + CODE_SECTION + }; + + // Textual names of the tags. + inline static const char *getTagName(TAGS tag) { + switch (tag) { + default: return ""; + case PREFIX_SYMBOL: return "@"; + case AUTOS_LABEL: return ".auto."; + case FRAME_LABEL: return ".frame."; + case TEMPS_LABEL: return ".temp."; + case ARGS_LABEL: return ".args."; + case RET_LABEL: return ".ret."; + case LIBCALL: return ".lib."; + case FRAME_SECTION: return ".frame_section."; + case AUTOS_SECTION: return ".autos_section."; + case CODE_SECTION: return ".code_section."; + } + } + + // Get tag type for the Symbol. + inline static TAGS getSymbolTag(const std::string &Sym) { + if (Sym.find(getTagName(TEMPS_LABEL)) != std::string::npos) + return TEMPS_LABEL; + + if (Sym.find(getTagName(FRAME_LABEL)) != std::string::npos) + return FRAME_LABEL; + + if (Sym.find(getTagName(RET_LABEL)) != std::string::npos) + return RET_LABEL; + + if (Sym.find(getTagName(ARGS_LABEL)) != std::string::npos) + return ARGS_LABEL; + + if (Sym.find(getTagName(AUTOS_LABEL)) != std::string::npos) + return AUTOS_LABEL; + + if (Sym.find(getTagName(LIBCALL)) != std::string::npos) + return LIBCALL; + + // It does not have any Tag. So its a true global or static local. + if (Sym.find(".") == std::string::npos) + return GLOBAL; + + // If a . is there, then it may be static local. + // We should mangle these as well in clang. + if (Sym.find(".") != std::string::npos) + return STATIC_LOCAL; + + assert (0 && "Could not determine Symbol's tag"); + } + + // addPrefix - add prefix symbol to a name if there isn't one already. + inline static std::string addPrefix (const std::string &Name) { + std::string prefix = getTagName (PREFIX_SYMBOL); + + // If this name already has a prefix, nothing to do. + if (Name.compare(0, prefix.size(), prefix) == 0) + return Name; + + return prefix + Name; + } + + // Get mangled func name from a mangled sym name. + // In all cases func name is the first component before a '.'. + static inline std::string getFuncNameForSym(const std::string &Sym1) { + assert (getSymbolTag(Sym1) != GLOBAL && "not belongs to a function"); + + std::string Sym = addPrefix(Sym1); + + // Position of the . after func name. That's where func name ends. + size_t func_name_end = Sym.find ('.'); + + return Sym.substr (0, func_name_end); + } + + // Get Frame start label for a func. + static std::string getFrameLabel(const std::string &Func) { + std::string Func1 = addPrefix(Func); + std::string tag = getTagName(FRAME_LABEL); + return Func1 + tag; + } + + static std::string getRetvalLabel(const std::string &Func) { + std::string Func1 = addPrefix(Func); + std::string tag = getTagName(RET_LABEL); + return Func1 + tag; + } + + static std::string getArgsLabel(const std::string &Func) { + std::string Func1 = addPrefix(Func); + std::string tag = getTagName(ARGS_LABEL); + return Func1 + tag; + } + + static std::string getTempdataLabel(const std::string &Func) { + std::string Func1 = addPrefix(Func); + std::string tag = getTagName(TEMPS_LABEL); + return Func1 + tag; + } + + static std::string getFrameSectionName(const std::string &Func) { + std::string Func1 = addPrefix(Func); + std::string tag = getTagName(FRAME_SECTION); + return Func1 + tag + "# UDATA_OVR"; + } + + static std::string getAutosSectionName(const std::string &Func) { + std::string Func1 = addPrefix(Func); + std::string tag = getTagName(AUTOS_SECTION); + return Func1 + tag + "# UDATA_OVR"; + } + + static std::string getCodeSectionName(const std::string &Func) { + std::string Func1 = addPrefix(Func); + std::string tag = getTagName(CODE_SECTION); + return Func1 + tag + "# CODE"; + } + + // udata and idata section names are generated by a given number. + // @udata..# + static std::string getUdataSectionName(unsigned num) { + std::ostringstream o; + o << getTagName(PREFIX_SYMBOL) << "udata." << num << ".# UDATA"; + return o.str(); + } + + static std::string getIdataSectionName(unsigned num) { + std::ostringstream o; + o << getTagName(PREFIX_SYMBOL) << "idata." << num << ".# IDATA"; + return o.str(); + } + + inline static bool isLocalName (const std::string &Name) { + if (getSymbolTag(Name) == AUTOS_LABEL) + return true; + + return false; + } + + inline static bool isLocalToFunc (std::string &Func, std::string &Var) { + if (! isLocalName(Var)) return false; + + std::string Func1 = addPrefix(Func); + // Extract func name of the varilable. + const std::string &fname = getFuncNameForSym(Var); + + if (fname.compare(Func1) == 0) + return true; + + return false; + } + + + // Get the section for the given external symbol names. + // This tries to find the type (Tag) of the symbol from its mangled name + // and return appropriate section name for it. + static inline std::string getSectionNameForSym(const std::string &Sym1) { + std::string Sym = addPrefix(Sym1); + + std::string SectionName; + + std::string Fname = getFuncNameForSym (Sym); + TAGS id = getSymbolTag (Sym); + + switch (id) { + default : assert (0 && "Could not determine external symbol type"); + case FRAME_LABEL: + case RET_LABEL: + case TEMPS_LABEL: + case ARGS_LABEL: { + return getFrameSectionName(Fname); + } + case AUTOS_LABEL: { + return getAutosSectionName(Fname); + } + } + } + }; // class PAN. + + + // External symbol names require memory to live till the program end. + // So we have to allocate it and keep. + inline static const char *createESName (const std::string &name) { + char *tmpName = new char[name.size() + 1]; + strcpy (tmpName, name.c_str()); + return tmpName; + } + + + + inline static const char *PIC16CondCodeToString(PIC16CC::CondCodes CC) { + switch (CC) { + default: assert(0 && "Unknown condition code"); + case PIC16CC::NE: return "ne"; + case PIC16CC::EQ: return "eq"; + case PIC16CC::LT: return "lt"; + case PIC16CC::ULT: return "lt"; + case PIC16CC::LE: return "le"; + case PIC16CC::GT: return "gt"; + case PIC16CC::UGT: return "gt"; + case PIC16CC::GE: return "ge"; + } + } + + inline static bool isSignedComparison(PIC16CC::CondCodes CC) { + switch (CC) { + default: assert(0 && "Unknown condition code"); + case PIC16CC::NE: + case PIC16CC::EQ: + case PIC16CC::LT: + case PIC16CC::LE: + case PIC16CC::GE: + case PIC16CC::GT: + return true; + case PIC16CC::ULT: + case PIC16CC::UGT: + case PIC16CC::ULE: + case PIC16CC::UGE: + return false; // condition codes for unsigned comparison. + } + } + + + + FunctionPass *createPIC16ISelDag(PIC16TargetMachine &TM); + FunctionPass *createPIC16CodePrinterPass(raw_ostream &OS, + PIC16TargetMachine &TM, + CodeGenOpt::Level OptLevel, + bool Verbose); + // Banksel optimzer pass. + FunctionPass *createPIC16MemSelOptimizerPass(); +} // end namespace llvm; + +// Defines symbolic names for PIC16 registers. This defines a mapping from +// register name to register number. +#include "PIC16GenRegisterNames.inc" + +// Defines symbolic names for the PIC16 instructions. +#include "PIC16GenInstrNames.inc" + +#endif diff --git a/lib/Target/PIC16/PIC16.td b/lib/Target/PIC16/PIC16.td new file mode 100644 index 000000000000..b2b9b1cd171e --- /dev/null +++ b/lib/Target/PIC16/PIC16.td @@ -0,0 +1,40 @@ +//===- PIC16.td - Describe the PIC16 Target Machine -----------*- tblgen -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This is the top level entry point for the PIC16 target. +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Target-independent interfaces +//===----------------------------------------------------------------------===// + +include "llvm/Target/Target.td" + +include "PIC16RegisterInfo.td" +include "PIC16InstrInfo.td" + +//===----------------------------------------------------------------------===// +// Subtarget Features. +//===----------------------------------------------------------------------===// +def FeatureCooper : SubtargetFeature<"cooper", "IsCooper", "true", + "PIC16 Cooper ISA Support">; + +//===----------------------------------------------------------------------===// +// PIC16 supported processors. +//===----------------------------------------------------------------------===// + +def : Processor<"generic", NoItineraries, []>; +def : Processor<"cooper", NoItineraries, [FeatureCooper]>; + + +def PIC16InstrInfo : InstrInfo {} + +def PIC16 : Target { + let InstructionSet = PIC16InstrInfo; +} + diff --git a/lib/Target/PIC16/PIC16AsmPrinter.cpp b/lib/Target/PIC16/PIC16AsmPrinter.cpp new file mode 100644 index 000000000000..ef3bc4b52a96 --- /dev/null +++ b/lib/Target/PIC16/PIC16AsmPrinter.cpp @@ -0,0 +1,404 @@ +//===-- PIC16AsmPrinter.cpp - PIC16 LLVM assembly writer ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a printer that converts from our internal representation +// of machine-dependent LLVM code to PIC16 assembly language. +// +//===----------------------------------------------------------------------===// + +#include "PIC16AsmPrinter.h" +#include "PIC16TargetAsmInfo.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/Module.h" +#include "llvm/CodeGen/DwarfWriter.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Mangler.h" +#include "llvm/CodeGen/DwarfWriter.h" +#include "llvm/CodeGen/MachineModuleInfo.h" + +using namespace llvm; + +#include "PIC16GenAsmWriter.inc" + +bool PIC16AsmPrinter::printMachineInstruction(const MachineInstr *MI) { + printInstruction(MI); + return true; +} + +/// runOnMachineFunction - This uses the printInstruction() +/// method to print assembly for each instruction. +/// +bool PIC16AsmPrinter::runOnMachineFunction(MachineFunction &MF) { + this->MF = &MF; + + // This calls the base class function required to be called at beginning + // of runOnMachineFunction. + SetupMachineFunction(MF); + + // Get the mangled name. + const Function *F = MF.getFunction(); + CurrentFnName = Mang->getValueName(F); + + // Emit the function variables. + EmitFunctionFrame(MF); + + // Emit function begin debug directives + DbgInfo.EmitFunctBeginDI(F); + + EmitAutos(CurrentFnName); + const char *codeSection = PAN::getCodeSectionName(CurrentFnName).c_str(); + + const Section *fCodeSection = TAI->getNamedSection(codeSection, + SectionFlags::Code); + O << "\n"; + // Start the Code Section. + SwitchToSection (fCodeSection); + + // Emit the frame address of the function at the beginning of code. + O << "\tretlw low(" << PAN::getFrameLabel(CurrentFnName) << ")\n"; + O << "\tretlw high(" << PAN::getFrameLabel(CurrentFnName) << ")\n"; + + // Emit function start label. + O << CurrentFnName << ":\n"; + + // For emitting line directives, we need to keep track of the current + // source line. When it changes then only emit the line directive. + unsigned CurLine = 0; + O << "\n"; + // Print out code for the function. + for (MachineFunction::const_iterator I = MF.begin(), E = MF.end(); + I != E; ++I) { + // Print a label for the basic block. + if (I != MF.begin()) { + printBasicBlockLabel(I, true); + O << '\n'; + } + + for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end(); + II != E; ++II) { + // Emit the line directive if source line changed. + const DebugLoc DL = II->getDebugLoc(); + if (!DL.isUnknown()) { + unsigned line = MF.getDebugLocTuple(DL).Line; + if (line != CurLine) { + O << "\t.line " << line << "\n"; + CurLine = line; + } + } + + // Print the assembly for the instruction. + printMachineInstruction(II); + } + } + + // Emit function end debug directives. + DbgInfo.EmitFunctEndDI(F, CurLine); + return false; // we didn't modify anything. +} + +/// createPIC16CodePrinterPass - Returns a pass that prints the PIC16 +/// assembly code for a MachineFunction to the given output stream, +/// using the given target machine description. This should work +/// regardless of whether the function is in SSA form. +/// +FunctionPass *llvm::createPIC16CodePrinterPass(raw_ostream &o, + PIC16TargetMachine &tm, + CodeGenOpt::Level OptLevel, + bool verbose) { + return new PIC16AsmPrinter(o, tm, tm.getTargetAsmInfo(), OptLevel, verbose); +} + + +// printOperand - print operand of insn. +void PIC16AsmPrinter::printOperand(const MachineInstr *MI, int opNum) { + const MachineOperand &MO = MI->getOperand(opNum); + + switch (MO.getType()) { + case MachineOperand::MO_Register: + if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) + O << TM.getRegisterInfo()->get(MO.getReg()).AsmName; + else + assert(0 && "not implemented"); + return; + + case MachineOperand::MO_Immediate: + O << (int)MO.getImm(); + return; + + case MachineOperand::MO_GlobalAddress: { + O << Mang->getValueName(MO.getGlobal()); + break; + } + case MachineOperand::MO_ExternalSymbol: { + const char *Sname = MO.getSymbolName(); + + // If its a libcall name, record it to decls section. + if (PAN::getSymbolTag(Sname) == PAN::LIBCALL) { + LibcallDecls.push_back(Sname); + } + + O << Sname; + break; + } + case MachineOperand::MO_MachineBasicBlock: + printBasicBlockLabel(MO.getMBB()); + return; + + default: + assert(0 && " Operand type not supported."); + } +} + +void PIC16AsmPrinter::printCCOperand(const MachineInstr *MI, int opNum) { + int CC = (int)MI->getOperand(opNum).getImm(); + O << PIC16CondCodeToString((PIC16CC::CondCodes)CC); +} + +void PIC16AsmPrinter::printLibcallDecls(void) { + // If no libcalls used, return. + if (LibcallDecls.empty()) return; + + O << TAI->getCommentString() << "External decls for libcalls - BEGIN." <<"\n"; + // Remove duplicate entries. + LibcallDecls.sort(); + LibcallDecls.unique(); + for (std::list::const_iterator I = LibcallDecls.begin(); + I != LibcallDecls.end(); I++) { + O << TAI->getExternDirective() << *I << "\n"; + O << TAI->getExternDirective() << PAN::getArgsLabel(*I) << "\n"; + O << TAI->getExternDirective() << PAN::getRetvalLabel(*I) << "\n"; + } + O << TAI->getCommentString() << "External decls for libcalls - END." <<"\n"; +} + +bool PIC16AsmPrinter::doInitialization (Module &M) { + bool Result = AsmPrinter::doInitialization(M); + DbgInfo.EmitFileDirective(M); + + // FIXME:: This is temporary solution to generate the include file. + // The processor should be passed to llc as in input and the header file + // should be generated accordingly. + O << "\n\t#include P16F1937.INC\n"; + MachineModuleInfo *MMI = getAnalysisIfAvailable(); + assert(MMI); + DwarfWriter *DW = getAnalysisIfAvailable(); + assert(DW && "Dwarf Writer is not available"); + DW->BeginModule(&M, MMI, O, this, TAI); + + // Set the section names for all globals. + for (Module::global_iterator I = M.global_begin(), E = M.global_end(); + I != E; ++I) { + I->setSection(TAI->SectionForGlobal(I)->getName()); + } + + EmitFunctionDecls(M); + EmitUndefinedVars(M); + EmitDefinedVars(M); + EmitIData(M); + EmitUData(M); + EmitRomData(M); + DbgInfo.PopulateFunctsDI(M); + return Result; +} + +// Emit extern decls for functions imported from other modules, and emit +// global declarations for function defined in this module and which are +// available to other modules. +void PIC16AsmPrinter::EmitFunctionDecls (Module &M) { + // Emit declarations for external functions. + O << TAI->getCommentString() << "Function Declarations - BEGIN." <<"\n"; + for (Module::iterator I = M.begin(), E = M.end(); I != E; I++) { + std::string Name = Mang->getValueName(I); + if (Name.compare("@abort") == 0) + continue; + + // If it is llvm intrinsic call then don't emit + if (Name.find("llvm.") != std::string::npos) + continue; + + if (! (I->isDeclaration() || I->hasExternalLinkage())) + continue; + + const char *directive = I->isDeclaration() ? TAI->getExternDirective() : + TAI->getGlobalDirective(); + + O << directive << Name << "\n"; + O << directive << PAN::getRetvalLabel(Name) << "\n"; + O << directive << PAN::getArgsLabel(Name) << "\n"; + } + + O << TAI->getCommentString() << "Function Declarations - END." <<"\n"; +} + +// Emit variables imported from other Modules. +void PIC16AsmPrinter::EmitUndefinedVars (Module &M) +{ + std::vector Items = PTAI->ExternalVarDecls->Items; + if (! Items.size()) return; + + O << "\n" << TAI->getCommentString() << "Imported Variables - BEGIN" << "\n"; + for (unsigned j = 0; j < Items.size(); j++) { + O << TAI->getExternDirective() << Mang->getValueName(Items[j]) << "\n"; + } + O << TAI->getCommentString() << "Imported Variables - END" << "\n"; +} + +// Emit variables defined in this module and are available to other modules. +void PIC16AsmPrinter::EmitDefinedVars (Module &M) +{ + std::vector Items = PTAI->ExternalVarDefs->Items; + if (! Items.size()) return; + + O << "\n" << TAI->getCommentString() << "Exported Variables - BEGIN" << "\n"; + for (unsigned j = 0; j < Items.size(); j++) { + O << TAI->getGlobalDirective() << Mang->getValueName(Items[j]) << "\n"; + } + O << TAI->getCommentString() << "Exported Variables - END" << "\n"; +} + +// Emit initialized data placed in ROM. +void PIC16AsmPrinter::EmitRomData (Module &M) +{ + + std::vector Items = PTAI->ROSection->Items; + if (! Items.size()) return; + + // Print ROData ection. + O << "\n"; + SwitchToSection(PTAI->ROSection->S_); + for (unsigned j = 0; j < Items.size(); j++) { + O << Mang->getValueName(Items[j]); + Constant *C = Items[j]->getInitializer(); + int AddrSpace = Items[j]->getType()->getAddressSpace(); + EmitGlobalConstant(C, AddrSpace); + } +} + +bool PIC16AsmPrinter::doFinalization(Module &M) { + printLibcallDecls(); + DbgInfo.EmitVarDebugInfo(M); + O << "\n\t" << ".EOF"; + O << "\n\t" << "END\n"; + bool Result = AsmPrinter::doFinalization(M); + return Result; +} + +void PIC16AsmPrinter::EmitFunctionFrame(MachineFunction &MF) { + const Function *F = MF.getFunction(); + std::string FuncName = Mang->getValueName(F); + const TargetData *TD = TM.getTargetData(); + // Emit the data section name. + O << "\n"; + const char *SectionName = PAN::getFrameSectionName(CurrentFnName).c_str(); + + const Section *fPDataSection = TAI->getNamedSection(SectionName, + SectionFlags::Writeable); + SwitchToSection(fPDataSection); + + // Emit function frame label + O << PAN::getFrameLabel(CurrentFnName) << ":\n"; + + const Type *RetType = F->getReturnType(); + unsigned RetSize = 0; + if (RetType->getTypeID() != Type::VoidTyID) + RetSize = TD->getTypeAllocSize(RetType); + + //Emit function return value space + // FIXME: Do not emit RetvalLable when retsize is zero. To do this + // we will need to avoid printing a global directive for Retval label + // in emitExternandGloblas. + if(RetSize > 0) + O << PAN::getRetvalLabel(CurrentFnName) << " RES " << RetSize << "\n"; + else + O << PAN::getRetvalLabel(CurrentFnName) << ": \n"; + + // Emit variable to hold the space for function arguments + unsigned ArgSize = 0; + for (Function::const_arg_iterator argi = F->arg_begin(), + arge = F->arg_end(); argi != arge ; ++argi) { + const Type *Ty = argi->getType(); + ArgSize += TD->getTypeAllocSize(Ty); + } + + O << PAN::getArgsLabel(CurrentFnName) << " RES " << ArgSize << "\n"; + + // Emit temporary space + int TempSize = PTLI->GetTmpSize(); + if (TempSize > 0 ) + O << PAN::getTempdataLabel(CurrentFnName) << " RES " << TempSize <<"\n"; +} + +void PIC16AsmPrinter::EmitIData (Module &M) { + + // Print all IDATA sections. + std::vector IDATASections = PTAI->IDATASections; + for (unsigned i = 0; i < IDATASections.size(); i++) { + O << "\n"; + SwitchToSection(IDATASections[i]->S_); + std::vector Items = IDATASections[i]->Items; + for (unsigned j = 0; j < Items.size(); j++) { + std::string Name = Mang->getValueName(Items[j]); + Constant *C = Items[j]->getInitializer(); + int AddrSpace = Items[j]->getType()->getAddressSpace(); + O << Name; + EmitGlobalConstant(C, AddrSpace); + } + } +} + +void PIC16AsmPrinter::EmitUData (Module &M) { + const TargetData *TD = TM.getTargetData(); + + // Print all BSS sections. + std::vector BSSSections = PTAI->BSSSections; + for (unsigned i = 0; i < BSSSections.size(); i++) { + O << "\n"; + SwitchToSection(BSSSections[i]->S_); + std::vector Items = BSSSections[i]->Items; + for (unsigned j = 0; j < Items.size(); j++) { + std::string Name = Mang->getValueName(Items[j]); + Constant *C = Items[j]->getInitializer(); + const Type *Ty = C->getType(); + unsigned Size = TD->getTypeAllocSize(Ty); + + O << Name << " " <<"RES"<< " " << Size ; + O << "\n"; + } + } +} + +void PIC16AsmPrinter::EmitAutos (std::string FunctName) +{ + // Section names for all globals are already set. + + const TargetData *TD = TM.getTargetData(); + + // Now print Autos section for this function. + std::string SectionName = PAN::getAutosSectionName(FunctName); + std::vector AutosSections = PTAI->AutosSections; + for (unsigned i = 0; i < AutosSections.size(); i++) { + O << "\n"; + if (AutosSections[i]->S_->getName() == SectionName) { + SwitchToSection(AutosSections[i]->S_); + std::vector Items = AutosSections[i]->Items; + for (unsigned j = 0; j < Items.size(); j++) { + std::string VarName = Mang->getValueName(Items[j]); + Constant *C = Items[j]->getInitializer(); + const Type *Ty = C->getType(); + unsigned Size = TD->getTypeAllocSize(Ty); + // Emit memory reserve directive. + O << VarName << " RES " << Size << "\n"; + } + break; + } + } +} + diff --git a/lib/Target/PIC16/PIC16AsmPrinter.h b/lib/Target/PIC16/PIC16AsmPrinter.h new file mode 100644 index 000000000000..2545dfd680a8 --- /dev/null +++ b/lib/Target/PIC16/PIC16AsmPrinter.h @@ -0,0 +1,70 @@ +//===-- PIC16AsmPrinter.h - PIC16 LLVM assembly writer ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a printer that converts from our internal representation +// of machine-dependent LLVM code to PIC16 assembly language. +// +//===----------------------------------------------------------------------===// + +#ifndef PIC16ASMPRINTER_H +#define PIC16ASMPRINTER_H + +#include "PIC16.h" +#include "PIC16TargetMachine.h" +#include "PIC16DebugInfo.h" +#include "llvm/Analysis/DebugInfo.h" +#include "PIC16TargetAsmInfo.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/Target/TargetMachine.h" +#include +#include + +namespace llvm { + struct VISIBILITY_HIDDEN PIC16AsmPrinter : public AsmPrinter { + explicit PIC16AsmPrinter(raw_ostream &O, PIC16TargetMachine &TM, + const TargetAsmInfo *T, CodeGenOpt::Level OL, + bool V) + : AsmPrinter(O, TM, T, OL, V), DbgInfo(O,T) { + PTLI = TM.getTargetLowering(); + PTAI = static_cast (T); + } + private : + virtual const char *getPassName() const { + return "PIC16 Assembly Printer"; + } + + bool runOnMachineFunction(MachineFunction &F); + void printOperand(const MachineInstr *MI, int opNum); + void printCCOperand(const MachineInstr *MI, int opNum); + bool printInstruction(const MachineInstr *MI); // definition autogenerated. + bool printMachineInstruction(const MachineInstr *MI); + void EmitFunctionDecls (Module &M); + void EmitUndefinedVars (Module &M); + void EmitDefinedVars (Module &M); + void EmitIData (Module &M); + void EmitUData (Module &M); + void EmitAutos (std::string FunctName); + void EmitRomData (Module &M); + void EmitFunctionFrame(MachineFunction &MF); + void printLibcallDecls(void); + protected: + bool doInitialization(Module &M); + bool doFinalization(Module &M); + + private: + PIC16TargetLowering *PTLI; + PIC16DbgInfo DbgInfo; + const PIC16TargetAsmInfo *PTAI; + std::list LibcallDecls; // List of extern decls. + }; +} // end of namespace + +#endif diff --git a/lib/Target/PIC16/PIC16DebugInfo.cpp b/lib/Target/PIC16/PIC16DebugInfo.cpp new file mode 100644 index 000000000000..4d43811f24a6 --- /dev/null +++ b/lib/Target/PIC16/PIC16DebugInfo.cpp @@ -0,0 +1,270 @@ +//===-- PIC16DebugInfo.cpp - Implementation for PIC16 Debug Information ======// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the helper functions for representing debug information. +// +//===----------------------------------------------------------------------===// + +#include "PIC16.h" +#include "PIC16DebugInfo.h" +#include "llvm/GlobalVariable.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +PIC16DbgInfo::~PIC16DbgInfo() { + for(std::map::iterator i = FunctNameMap.begin(); + i!=FunctNameMap.end(); i++) + delete i->second; + FunctNameMap.clear(); +} + +void PIC16DbgInfo::PopulateDebugInfo(DIType Ty, unsigned short &TypeNo, + bool &HasAux, int Aux[], + std::string &TypeName) { + if (Ty.isBasicType(Ty.getTag())) { + std::string Name = ""; + Ty.getName(Name); + unsigned short BaseTy = GetTypeDebugNumber(Name); + TypeNo = TypeNo << PIC16Dbg::S_BASIC; + TypeNo = TypeNo | (0xffff & BaseTy); + } + else if (Ty.isDerivedType(Ty.getTag())) { + switch(Ty.getTag()) + { + case dwarf::DW_TAG_pointer_type: + TypeNo = TypeNo << PIC16Dbg::S_DERIVED; + TypeNo = TypeNo | PIC16Dbg::DT_PTR; + break; + default: + TypeNo = TypeNo << PIC16Dbg::S_DERIVED; + } + DIType BaseType = DIDerivedType(Ty.getGV()).getTypeDerivedFrom(); + PopulateDebugInfo(BaseType, TypeNo, HasAux, Aux, TypeName); + } + else if (Ty.isCompositeType(Ty.getTag())) { + switch (Ty.getTag()) { + case dwarf::DW_TAG_array_type: { + DICompositeType CTy = DICompositeType(Ty.getGV()); + DIArray Elements = CTy.getTypeArray(); + unsigned short size = 1; + unsigned short Dimension[4]={0,0,0,0}; + for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { + DIDescriptor Element = Elements.getElement(i); + if (Element.getTag() == dwarf::DW_TAG_subrange_type) { + TypeNo = TypeNo << PIC16Dbg::S_DERIVED; + TypeNo = TypeNo | PIC16Dbg::DT_ARY; + DISubrange SubRange = DISubrange(Element.getGV()); + Dimension[i] = SubRange.getHi() - SubRange.getLo() + 1; + // Each dimension is represented by 2 bytes starting at byte 9. + Aux[8+i*2+0] = Dimension[i]; + Aux[8+i*2+1] = Dimension[i] >> 8; + size = size * Dimension[i]; + } + } + HasAux = true; + // In auxillary entry for array, 7th and 8th byte represent array size. + Aux[6] = size; + Aux[7] = size >> 8; + DIType BaseType = CTy.getTypeDerivedFrom(); + PopulateDebugInfo(BaseType, TypeNo, HasAux, Aux, TypeName); + + break; + } + case dwarf:: DW_TAG_union_type: + case dwarf::DW_TAG_structure_type: { + DICompositeType CTy = DICompositeType(Ty.getGV()); + TypeNo = TypeNo << PIC16Dbg::S_BASIC; + if (Ty.getTag() == dwarf::DW_TAG_structure_type) + TypeNo = TypeNo | PIC16Dbg::T_STRUCT; + else + TypeNo = TypeNo | PIC16Dbg::T_UNION; + CTy.getName(TypeName); + unsigned size = CTy.getSizeInBits()/8; + // 7th and 8th byte represent size. + HasAux = true; + Aux[6] = size; + Aux[7] = size >> 8; + break; + } + case dwarf::DW_TAG_enumeration_type: { + TypeNo = TypeNo << PIC16Dbg::S_BASIC; + TypeNo = TypeNo | PIC16Dbg::T_ENUM; + break; + } + default: + TypeNo = TypeNo << PIC16Dbg::S_DERIVED; + } + } + else { + TypeNo = PIC16Dbg::T_NULL; + HasAux = false; + } + return; +} + + +unsigned PIC16DbgInfo::GetTypeDebugNumber(std::string &type) { + if (type == "char") + return PIC16Dbg::T_CHAR; + else if (type == "short") + return PIC16Dbg::T_SHORT; + else if (type == "int") + return PIC16Dbg::T_INT; + else if (type == "long") + return PIC16Dbg::T_LONG; + else if (type == "unsigned char") + return PIC16Dbg::T_UCHAR; + else if (type == "unsigned short") + return PIC16Dbg::T_USHORT; + else if (type == "unsigned int") + return PIC16Dbg::T_UINT; + else if (type == "unsigned long") + return PIC16Dbg::T_ULONG; + else + return 0; +} + +short PIC16DbgInfo::getClass(DIGlobalVariable DIGV) { + short ClassNo; + if (PAN::isLocalName(DIGV.getGlobal()->getName())) { + // Generating C_AUTO here fails due to error in linker. Change it once + // linker is fixed. + ClassNo = PIC16Dbg::C_STAT; + } + else if (DIGV.isLocalToUnit()) + ClassNo = PIC16Dbg::C_STAT; + else + ClassNo = PIC16Dbg::C_EXT; + return ClassNo; +} + +void PIC16DbgInfo::PopulateFunctsDI(Module &M) { + GlobalVariable *Root = M.getGlobalVariable("llvm.dbg.subprograms"); + if (!Root) + return; + Constant *RootC = cast(*Root->use_begin()); + + for (Value::use_iterator UI = RootC->use_begin(), UE = Root->use_end(); + UI != UE; ++UI) + for (Value::use_iterator UUI = UI->use_begin(), UUE = UI->use_end(); + UUI != UUE; ++UUI) { + GlobalVariable *GVSP = cast(*UUI); + DISubprogram *SP = new DISubprogram(GVSP); + std::string Name; + SP->getLinkageName(Name); + FunctNameMap[Name] = SP; + } + return; +} + +DISubprogram* PIC16DbgInfo::getFunctDI(std::string FunctName) { + return FunctNameMap[FunctName]; +} + +void PIC16DbgInfo::EmitFunctBeginDI(const Function *F) { + std::string FunctName = F->getName(); + DISubprogram *SP = getFunctDI(FunctName); + if (SP) { + std::string FunctBeginSym = ".bf." + FunctName; + std::string BlockBeginSym = ".bb." + FunctName; + + int FunctBeginLine = SP->getLineNumber(); + int BFAux[PIC16Dbg::AuxSize] = {0}; + BFAux[4] = FunctBeginLine; + BFAux[5] = FunctBeginLine >> 8; + // Emit debug directives for beginning of function. + EmitSymbol(FunctBeginSym, PIC16Dbg::C_FCN); + EmitAuxEntry(FunctBeginSym, BFAux, PIC16Dbg::AuxSize); + EmitSymbol(BlockBeginSym, PIC16Dbg::C_BLOCK); + EmitAuxEntry(BlockBeginSym, BFAux, PIC16Dbg::AuxSize); + } +} + +void PIC16DbgInfo::EmitFunctEndDI(const Function *F, unsigned Line) { + std::string FunctName = F->getName(); + DISubprogram *SP = getFunctDI(FunctName); + if (SP) { + std::string FunctEndSym = ".ef." + FunctName; + std::string BlockEndSym = ".eb." + FunctName; + + // Emit debug directives for end of function. + EmitSymbol(BlockEndSym, PIC16Dbg::C_BLOCK); + int EFAux[PIC16Dbg::AuxSize] = {0}; + // 5th and 6th byte stand for line number. + EFAux[4] = Line; + EFAux[5] = Line >> 8; + EmitAuxEntry(BlockEndSym, EFAux, PIC16Dbg::AuxSize); + EmitSymbol(FunctEndSym, PIC16Dbg::C_FCN); + EmitAuxEntry(FunctEndSym, EFAux, PIC16Dbg::AuxSize); + } +} + +/// EmitAuxEntry - Emit Auxiliary debug information. +/// +void PIC16DbgInfo::EmitAuxEntry(const std::string VarName, int Aux[], int num) { + O << "\n\t.dim " << VarName << ", 1" ; + for (int i = 0; i(*Root->use_begin()); + for (Value::use_iterator UI = RootC->use_begin(), UE = Root->use_end(); + UI != UE; ++UI) { + for (Value::use_iterator UUI = UI->use_begin(), UUE = UI->use_end(); + UUI != UUE; ++UUI) { + DIGlobalVariable DIGV(cast(*UUI)); + DIType Ty = DIGV.getType(); + unsigned short TypeNo = 0; + bool HasAux = false; + int Aux[PIC16Dbg::AuxSize] = { 0 }; + std::string TypeName = ""; + std::string VarName = TAI->getGlobalPrefix()+DIGV.getGlobal()->getName(); + PopulateDebugInfo(Ty, TypeNo, HasAux, Aux, TypeName); + // Emit debug info only if type information is availaible. + if (TypeNo != PIC16Dbg::T_NULL) { + O << "\n\t.type " << VarName << ", " << TypeNo; + short ClassNo = getClass(DIGV); + O << "\n\t.class " << VarName << ", " << ClassNo; + if (HasAux) { + if (TypeName != "") { + // Emit debug info for structure and union objects after + // .dim directive supports structure/union tag name in aux entry. + /* O << "\n\t.dim " << VarName << ", 1," << TypeName; + for (int i = 0; i + +namespace llvm { + namespace PIC16Dbg { + enum VarType { + T_NULL, + T_VOID, + T_CHAR, + T_SHORT, + T_INT, + T_LONG, + T_FLOAT, + T_DOUBLE, + T_STRUCT, + T_UNION, + T_ENUM, + T_MOE, + T_UCHAR, + T_USHORT, + T_UINT, + T_ULONG + }; + enum DerivedType { + DT_NONE, + DT_PTR, + DT_FCN, + DT_ARY + }; + enum TypeSize { + S_BASIC = 5, + S_DERIVED = 3 + }; + enum DbgClass { + C_NULL, + C_AUTO, + C_EXT, + C_STAT, + C_REG, + C_EXTDEF, + C_LABEL, + C_ULABEL, + C_MOS, + C_ARG, + C_STRTAG, + C_MOU, + C_UNTAG, + C_TPDEF, + C_USTATIC, + C_ENTAG, + C_MOE, + C_REGPARM, + C_FIELD, + C_AUTOARG, + C_LASTENT, + C_BLOCK = 100, + C_FCN, + C_EOS, + C_FILE, + C_LINE, + C_ALIAS, + C_HIDDEN, + C_EOF, + C_LIST, + C_SECTION, + C_EFCN = 255 + }; + enum SymbolSize { + AuxSize =20 + }; + } + + class raw_ostream; + + class PIC16DbgInfo { + std::map FunctNameMap; + raw_ostream &O; + const TargetAsmInfo *TAI; + public: + PIC16DbgInfo(raw_ostream &o, const TargetAsmInfo *T) : O(o), TAI(T) {} + ~PIC16DbgInfo(); + void PopulateDebugInfo(DIType Ty, unsigned short &TypeNo, bool &HasAux, + int Aux[], std::string &TypeName); + unsigned GetTypeDebugNumber(std::string &type); + short getClass(DIGlobalVariable DIGV); + void PopulateFunctsDI(Module &M); + DISubprogram *getFunctDI(std::string FunctName); + void EmitFunctBeginDI(const Function *F); + void EmitFunctEndDI(const Function *F, unsigned Line); + void EmitAuxEntry(const std::string VarName, int Aux[], int num); + inline void EmitSymbol(std::string Name, int Class); + void EmitVarDebugInfo(Module &M); + void EmitFileDirective(Module &M); + }; +} // end namespace llvm; +#endif diff --git a/lib/Target/PIC16/PIC16ISelDAGToDAG.cpp b/lib/Target/PIC16/PIC16ISelDAGToDAG.cpp new file mode 100644 index 000000000000..6c2b8ec9747a --- /dev/null +++ b/lib/Target/PIC16/PIC16ISelDAGToDAG.cpp @@ -0,0 +1,59 @@ +//===-- PIC16ISelDAGToDAG.cpp - A dag to dag inst selector for PIC16 ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines an instruction selector for the PIC16 target. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "pic16-isel" + +#include "PIC16ISelDAGToDAG.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +/// createPIC16ISelDag - This pass converts a legalized DAG into a +/// PIC16-specific DAG, ready for instruction scheduling. +FunctionPass *llvm::createPIC16ISelDag(PIC16TargetMachine &TM) { + return new PIC16DAGToDAGISel(TM); +} + + +/// InstructionSelect - This callback is invoked by +/// SelectionDAGISel when it has created a SelectionDAG for us to codegen. +void PIC16DAGToDAGISel::InstructionSelect() { + DEBUG(BB->dump()); + SelectRoot(*CurDAG); + CurDAG->RemoveDeadNodes(); +} + +/// Select - Select instructions not customized! Used for +/// expanded, promoted and normal instructions. +SDNode* PIC16DAGToDAGISel::Select(SDValue N) { + + // Select the default instruction. + SDNode *ResNode = SelectCode(N); + + return ResNode; +} + + +// SelectDirectAddr - Match a direct address for DAG. +// A direct address could be a globaladdress or externalsymbol. +bool PIC16DAGToDAGISel::SelectDirectAddr(SDValue Op, SDValue N, + SDValue &Address) { + // Return true if TGA or ES. + if (N.getOpcode() == ISD::TargetGlobalAddress + || N.getOpcode() == ISD::TargetExternalSymbol) { + Address = N; + return true; + } + + return false; +} diff --git a/lib/Target/PIC16/PIC16ISelDAGToDAG.h b/lib/Target/PIC16/PIC16ISelDAGToDAG.h new file mode 100644 index 000000000000..83abed3958a4 --- /dev/null +++ b/lib/Target/PIC16/PIC16ISelDAGToDAG.h @@ -0,0 +1,60 @@ +//===-- PIC16ISelDAGToDAG.cpp - A dag to dag inst selector for PIC16 ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines an instruction selector for the PIC16 target. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "pic16-isel" + +#include "PIC16.h" +#include "PIC16ISelLowering.h" +#include "PIC16RegisterInfo.h" +#include "PIC16TargetMachine.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Intrinsics.h" +using namespace llvm; + +namespace { + +class VISIBILITY_HIDDEN PIC16DAGToDAGISel : public SelectionDAGISel { + + /// TM - Keep a reference to PIC16TargetMachine. + PIC16TargetMachine &TM; + + /// PIC16Lowering - This object fully describes how to lower LLVM code to an + /// PIC16-specific SelectionDAG. + PIC16TargetLowering PIC16Lowering; + +public: + explicit PIC16DAGToDAGISel(PIC16TargetMachine &tm) : + SelectionDAGISel(tm), + TM(tm), PIC16Lowering(*TM.getTargetLowering()) {} + + // Pass Name + virtual const char *getPassName() const { + return "PIC16 DAG->DAG Pattern Instruction Selection"; + } + + virtual void InstructionSelect(); + +private: + // Include the pieces autogenerated from the target description. +#include "PIC16GenDAGISel.inc" + + SDNode *Select(SDValue N); + + // Match direct address complex pattern. + bool SelectDirectAddr(SDValue Op, SDValue N, SDValue &Address); + +}; + +} + diff --git a/lib/Target/PIC16/PIC16ISelLowering.cpp b/lib/Target/PIC16/PIC16ISelLowering.cpp new file mode 100644 index 000000000000..92fdcb2c0c15 --- /dev/null +++ b/lib/Target/PIC16/PIC16ISelLowering.cpp @@ -0,0 +1,1756 @@ +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interfaces that PIC16 uses to lower LLVM code into a +// selection DAG. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "pic16-lower" + +#include "PIC16ISelLowering.h" +#include "PIC16TargetMachine.h" +#include "llvm/DerivedTypes.h" +#include "llvm/GlobalValue.h" +#include "llvm/Function.h" +#include "llvm/CallingConv.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" + + +using namespace llvm; + +static const char *getIntrinsicName(unsigned opcode) { + std::string Basename; + switch(opcode) { + default: assert (0 && "do not know intrinsic name"); + case PIC16ISD::SRA_I8: Basename = "sra.i8"; break; + case RTLIB::SRA_I16: Basename = "sra.i16"; break; + case RTLIB::SRA_I32: Basename = "sra.i32"; break; + + case PIC16ISD::SLL_I8: Basename = "sll.i8"; break; + case RTLIB::SHL_I16: Basename = "sll.i16"; break; + case RTLIB::SHL_I32: Basename = "sll.i32"; break; + + case PIC16ISD::SRL_I8: Basename = "srl.i8"; break; + case RTLIB::SRL_I16: Basename = "srl.i16"; break; + case RTLIB::SRL_I32: Basename = "srl.i32"; break; + + case PIC16ISD::MUL_I8: Basename = "mul.i8"; break; + case RTLIB::MUL_I16: Basename = "mul.i16"; break; + case RTLIB::MUL_I32: Basename = "mul.i32"; break; + } + + std::string prefix = PAN::getTagName(PAN::PREFIX_SYMBOL); + std::string tagname = PAN::getTagName(PAN::LIBCALL); + std::string Fullname = prefix + tagname + Basename; + + // The name has to live through program life. + char *tmp = new char[Fullname.size() + 1]; + strcpy (tmp, Fullname.c_str()); + + return tmp; +} + +// PIC16TargetLowering Constructor. +PIC16TargetLowering::PIC16TargetLowering(PIC16TargetMachine &TM) + : TargetLowering(TM), TmpSize(0) { + + Subtarget = &TM.getSubtarget(); + + addRegisterClass(MVT::i8, PIC16::GPRRegisterClass); + + setShiftAmountType(MVT::i8); + setShiftAmountFlavor(Extend); + + // SRA library call names + setPIC16LibcallName(PIC16ISD::SRA_I8, getIntrinsicName(PIC16ISD::SRA_I8)); + setLibcallName(RTLIB::SRA_I16, getIntrinsicName(RTLIB::SRA_I16)); + setLibcallName(RTLIB::SRA_I32, getIntrinsicName(RTLIB::SRA_I32)); + + // SHL library call names + setPIC16LibcallName(PIC16ISD::SLL_I8, getIntrinsicName(PIC16ISD::SLL_I8)); + setLibcallName(RTLIB::SHL_I16, getIntrinsicName(RTLIB::SHL_I16)); + setLibcallName(RTLIB::SHL_I32, getIntrinsicName(RTLIB::SHL_I32)); + + // SRL library call names + setPIC16LibcallName(PIC16ISD::SRL_I8, getIntrinsicName(PIC16ISD::SRL_I8)); + setLibcallName(RTLIB::SRL_I16, getIntrinsicName(RTLIB::SRL_I16)); + setLibcallName(RTLIB::SRL_I32, getIntrinsicName(RTLIB::SRL_I32)); + + // MUL Library call names + setPIC16LibcallName(PIC16ISD::MUL_I8, getIntrinsicName(PIC16ISD::MUL_I8)); + setLibcallName(RTLIB::MUL_I16, getIntrinsicName(RTLIB::MUL_I16)); + setLibcallName(RTLIB::MUL_I32, getIntrinsicName(RTLIB::MUL_I32)); + + setOperationAction(ISD::GlobalAddress, MVT::i16, Custom); + setOperationAction(ISD::ExternalSymbol, MVT::i16, Custom); + + setOperationAction(ISD::LOAD, MVT::i8, Legal); + setOperationAction(ISD::LOAD, MVT::i16, Custom); + setOperationAction(ISD::LOAD, MVT::i32, Custom); + + setOperationAction(ISD::STORE, MVT::i8, Legal); + setOperationAction(ISD::STORE, MVT::i16, Custom); + setOperationAction(ISD::STORE, MVT::i32, Custom); + + setOperationAction(ISD::ADDE, MVT::i8, Custom); + setOperationAction(ISD::ADDC, MVT::i8, Custom); + setOperationAction(ISD::SUBE, MVT::i8, Custom); + setOperationAction(ISD::SUBC, MVT::i8, Custom); + setOperationAction(ISD::ADD, MVT::i8, Custom); + setOperationAction(ISD::ADD, MVT::i16, Custom); + + setOperationAction(ISD::OR, MVT::i8, Custom); + setOperationAction(ISD::AND, MVT::i8, Custom); + setOperationAction(ISD::XOR, MVT::i8, Custom); + + setOperationAction(ISD::FrameIndex, MVT::i16, Custom); + setOperationAction(ISD::CALL, MVT::i16, Custom); + setOperationAction(ISD::RET, MVT::Other, Custom); + + setOperationAction(ISD::MUL, MVT::i8, Custom); + setOperationAction(ISD::MUL, MVT::i16, Expand); + setOperationAction(ISD::MUL, MVT::i32, Expand); + + setOperationAction(ISD::SMUL_LOHI, MVT::i8, Expand); + setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand); + setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); + setOperationAction(ISD::UMUL_LOHI, MVT::i8, Expand); + setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand); + setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); + setOperationAction(ISD::MULHU, MVT::i8, Expand); + setOperationAction(ISD::MULHU, MVT::i16, Expand); + setOperationAction(ISD::MULHU, MVT::i32, Expand); + setOperationAction(ISD::MULHS, MVT::i8, Expand); + setOperationAction(ISD::MULHS, MVT::i16, Expand); + setOperationAction(ISD::MULHS, MVT::i32, Expand); + + setOperationAction(ISD::SRA, MVT::i8, Custom); + setOperationAction(ISD::SRA, MVT::i16, Expand); + setOperationAction(ISD::SRA, MVT::i32, Expand); + setOperationAction(ISD::SHL, MVT::i8, Custom); + setOperationAction(ISD::SHL, MVT::i16, Expand); + setOperationAction(ISD::SHL, MVT::i32, Expand); + setOperationAction(ISD::SRL, MVT::i8, Custom); + setOperationAction(ISD::SRL, MVT::i16, Expand); + setOperationAction(ISD::SRL, MVT::i32, Expand); + + // PIC16 does not support shift parts + setOperationAction(ISD::SRA_PARTS, MVT::i8, Expand); + setOperationAction(ISD::SRA_PARTS, MVT::i16, Expand); + setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand); + setOperationAction(ISD::SHL_PARTS, MVT::i8, Expand); + setOperationAction(ISD::SHL_PARTS, MVT::i16, Expand); + setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand); + setOperationAction(ISD::SRL_PARTS, MVT::i8, Expand); + setOperationAction(ISD::SRL_PARTS, MVT::i16, Expand); + setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand); + + + // PIC16 does not have a SETCC, expand it to SELECT_CC. + setOperationAction(ISD::SETCC, MVT::i8, Expand); + setOperationAction(ISD::SELECT, MVT::i8, Expand); + setOperationAction(ISD::BRCOND, MVT::Other, Expand); + setOperationAction(ISD::BRIND, MVT::Other, Expand); + + setOperationAction(ISD::SELECT_CC, MVT::i8, Custom); + setOperationAction(ISD::BR_CC, MVT::i8, Custom); + + //setOperationAction(ISD::TRUNCATE, MVT::i16, Custom); + setTruncStoreAction(MVT::i16, MVT::i8, Custom); + + // Now deduce the information based on the above mentioned + // actions + computeRegisterProperties(); +} + +// getOutFlag - Extract the flag result if the Op has it. +static SDValue getOutFlag(SDValue &Op) { + // Flag is the last value of the node. + SDValue Flag = Op.getValue(Op.getNode()->getNumValues() - 1); + + assert (Flag.getValueType() == MVT::Flag + && "Node does not have an out Flag"); + + return Flag; +} +// Get the TmpOffset for FrameIndex +unsigned PIC16TargetLowering::GetTmpOffsetForFI(unsigned FI, unsigned size) { + std::map::iterator + MapIt = FiTmpOffsetMap.find(FI); + if (MapIt != FiTmpOffsetMap.end()) + return MapIt->second; + + // This FI (FrameIndex) is not yet mapped, so map it + FiTmpOffsetMap[FI] = TmpSize; + TmpSize += size; + return FiTmpOffsetMap[FI]; +} + +// To extract chain value from the SDValue Nodes +// This function will help to maintain the chain extracting +// code at one place. In case of any change in future it will +// help maintain the code. +static SDValue getChain(SDValue &Op) { + SDValue Chain = Op.getValue(Op.getNode()->getNumValues() - 1); + + // If the last value returned in Flag then the chain is + // second last value returned. + if (Chain.getValueType() == MVT::Flag) + Chain = Op.getValue(Op.getNode()->getNumValues() - 2); + + // All nodes may not produce a chain. Therefore following assert + // verifies that the node is returning a chain only. + assert (Chain.getValueType() == MVT::Other + && "Node does not have a chain"); + + return Chain; +} + +/// PopulateResults - Helper function to LowerOperation. +/// If a node wants to return multiple results after lowering, +/// it stuffs them into an array of SDValue called Results. + +static void PopulateResults(SDValue N, SmallVectorImpl&Results) { + if (N.getOpcode() == ISD::MERGE_VALUES) { + int NumResults = N.getNumOperands(); + for( int i = 0; i < NumResults; i++) + Results.push_back(N.getOperand(i)); + } + else + Results.push_back(N); +} + +MVT PIC16TargetLowering::getSetCCResultType(MVT ValType) const { + return MVT::i8; +} + +/// The type legalizer framework of generating legalizer can generate libcalls +/// only when the operand/result types are illegal. +/// PIC16 needs to generate libcalls even for the legal types (i8) for some ops. +/// For example an arithmetic right shift. These functions are used to lower +/// such operations that generate libcall for legal types. + +void +PIC16TargetLowering::setPIC16LibcallName(PIC16ISD::PIC16Libcall Call, + const char *Name) { + PIC16LibcallNames[Call] = Name; +} + +const char * +PIC16TargetLowering::getPIC16LibcallName(PIC16ISD::PIC16Libcall Call) { + return PIC16LibcallNames[Call]; +} + +SDValue +PIC16TargetLowering::MakePIC16Libcall(PIC16ISD::PIC16Libcall Call, + MVT RetVT, const SDValue *Ops, + unsigned NumOps, bool isSigned, + SelectionDAG &DAG, DebugLoc dl) { + + TargetLowering::ArgListTy Args; + Args.reserve(NumOps); + + TargetLowering::ArgListEntry Entry; + for (unsigned i = 0; i != NumOps; ++i) { + Entry.Node = Ops[i]; + Entry.Ty = Entry.Node.getValueType().getTypeForMVT(); + Entry.isSExt = isSigned; + Entry.isZExt = !isSigned; + Args.push_back(Entry); + } + SDValue Callee = DAG.getExternalSymbol(getPIC16LibcallName(Call), MVT::i8); + + const Type *RetTy = RetVT.getTypeForMVT(); + std::pair CallInfo = + LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false, + false, CallingConv::C, false, Callee, Args, DAG, dl); + + return CallInfo.first; +} + +const char *PIC16TargetLowering::getTargetNodeName(unsigned Opcode) const { + switch (Opcode) { + default: return NULL; + case PIC16ISD::Lo: return "PIC16ISD::Lo"; + case PIC16ISD::Hi: return "PIC16ISD::Hi"; + case PIC16ISD::MTLO: return "PIC16ISD::MTLO"; + case PIC16ISD::MTHI: return "PIC16ISD::MTHI"; + case PIC16ISD::MTPCLATH: return "PIC16ISD::MTPCLATH"; + case PIC16ISD::PIC16Connect: return "PIC16ISD::PIC16Connect"; + case PIC16ISD::Banksel: return "PIC16ISD::Banksel"; + case PIC16ISD::PIC16Load: return "PIC16ISD::PIC16Load"; + case PIC16ISD::PIC16LdArg: return "PIC16ISD::PIC16LdArg"; + case PIC16ISD::PIC16LdWF: return "PIC16ISD::PIC16LdWF"; + case PIC16ISD::PIC16Store: return "PIC16ISD::PIC16Store"; + case PIC16ISD::PIC16StWF: return "PIC16ISD::PIC16StWF"; + case PIC16ISD::BCF: return "PIC16ISD::BCF"; + case PIC16ISD::LSLF: return "PIC16ISD::LSLF"; + case PIC16ISD::LRLF: return "PIC16ISD::LRLF"; + case PIC16ISD::RLF: return "PIC16ISD::RLF"; + case PIC16ISD::RRF: return "PIC16ISD::RRF"; + case PIC16ISD::CALL: return "PIC16ISD::CALL"; + case PIC16ISD::CALLW: return "PIC16ISD::CALLW"; + case PIC16ISD::SUBCC: return "PIC16ISD::SUBCC"; + case PIC16ISD::SELECT_ICC: return "PIC16ISD::SELECT_ICC"; + case PIC16ISD::BRCOND: return "PIC16ISD::BRCOND"; + case PIC16ISD::Dummy: return "PIC16ISD::Dummy"; + } +} + +void PIC16TargetLowering::ReplaceNodeResults(SDNode *N, + SmallVectorImpl&Results, + SelectionDAG &DAG) { + + switch (N->getOpcode()) { + case ISD::GlobalAddress: + Results.push_back(ExpandGlobalAddress(N, DAG)); + return; + case ISD::ExternalSymbol: + Results.push_back(ExpandExternalSymbol(N, DAG)); + return; + case ISD::STORE: + Results.push_back(ExpandStore(N, DAG)); + return; + case ISD::LOAD: + PopulateResults(ExpandLoad(N, DAG), Results); + return; + case ISD::ADD: + // Results.push_back(ExpandAdd(N, DAG)); + return; + case ISD::FrameIndex: + Results.push_back(ExpandFrameIndex(N, DAG)); + return; + default: + assert (0 && "not implemented"); + return; + } +} + +SDValue PIC16TargetLowering::ExpandFrameIndex(SDNode *N, SelectionDAG &DAG) { + + // Currently handling FrameIndex of size MVT::i16 only + // One example of this scenario is when return value is written on + // FrameIndex#0 + + if (N->getValueType(0) != MVT::i16) + return SDValue(); + + // Expand the FrameIndex into ExternalSymbol and a Constant node + // The constant will represent the frame index number + // Get the current function frame + MachineFunction &MF = DAG.getMachineFunction(); + const Function *Func = MF.getFunction(); + const std::string Name = Func->getName(); + + FrameIndexSDNode *FR = dyn_cast(SDValue(N,0)); + // FIXME there isn't really debug info here + DebugLoc dl = FR->getDebugLoc(); + int Index = FR->getIndex(); + + // Expand FrameIndex like GlobalAddress and ExternalSymbol + // Also use Offset field for lo and hi parts. The default + // offset is zero. + SDValue Offset = DAG.getConstant(0, MVT::i8); + SDValue FI = DAG.getTargetFrameIndex(Index, MVT::i8); + SDValue Lo = DAG.getNode(PIC16ISD::Lo, dl, MVT::i8, FI, Offset); + SDValue Hi = DAG.getNode(PIC16ISD::Hi, dl, MVT::i8, FI, Offset); + return DAG.getNode(ISD::BUILD_PAIR, dl, N->getValueType(0), Lo, Hi); +} + + +SDValue PIC16TargetLowering::ExpandStore(SDNode *N, SelectionDAG &DAG) { + StoreSDNode *St = cast(N); + SDValue Chain = St->getChain(); + SDValue Src = St->getValue(); + SDValue Ptr = St->getBasePtr(); + MVT ValueType = Src.getValueType(); + unsigned StoreOffset = 0; + DebugLoc dl = N->getDebugLoc(); + + SDValue PtrLo, PtrHi; + LegalizeAddress(Ptr, DAG, PtrLo, PtrHi, StoreOffset, dl); + + if (ValueType == MVT::i8) { + return DAG.getNode (PIC16ISD::PIC16Store, dl, MVT::Other, Chain, Src, + PtrLo, PtrHi, + DAG.getConstant (0 + StoreOffset, MVT::i8)); + } + else if (ValueType == MVT::i16) { + // Get the Lo and Hi parts from MERGE_VALUE or BUILD_PAIR. + SDValue SrcLo, SrcHi; + GetExpandedParts(Src, DAG, SrcLo, SrcHi); + SDValue ChainLo = Chain, ChainHi = Chain; + if (Chain.getOpcode() == ISD::TokenFactor) { + ChainLo = Chain.getOperand(0); + ChainHi = Chain.getOperand(1); + } + SDValue Store1 = DAG.getNode(PIC16ISD::PIC16Store, dl, MVT::Other, + ChainLo, + SrcLo, PtrLo, PtrHi, + DAG.getConstant (0 + StoreOffset, MVT::i8)); + + SDValue Store2 = DAG.getNode(PIC16ISD::PIC16Store, dl, MVT::Other, ChainHi, + SrcHi, PtrLo, PtrHi, + DAG.getConstant (1 + StoreOffset, MVT::i8)); + + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, getChain(Store1), + getChain(Store2)); + } + else if (ValueType == MVT::i32) { + // Get the Lo and Hi parts from MERGE_VALUE or BUILD_PAIR. + SDValue SrcLo, SrcHi; + GetExpandedParts(Src, DAG, SrcLo, SrcHi); + + // Get the expanded parts of each of SrcLo and SrcHi. + SDValue SrcLo1, SrcLo2, SrcHi1, SrcHi2; + GetExpandedParts(SrcLo, DAG, SrcLo1, SrcLo2); + GetExpandedParts(SrcHi, DAG, SrcHi1, SrcHi2); + + SDValue ChainLo = Chain, ChainHi = Chain; + if (Chain.getOpcode() == ISD::TokenFactor) { + ChainLo = Chain.getOperand(0); + ChainHi = Chain.getOperand(1); + } + SDValue ChainLo1 = ChainLo, ChainLo2 = ChainLo, ChainHi1 = ChainHi, + ChainHi2 = ChainHi; + if (ChainLo.getOpcode() == ISD::TokenFactor) { + ChainLo1 = ChainLo.getOperand(0); + ChainLo2 = ChainLo.getOperand(1); + } + if (ChainHi.getOpcode() == ISD::TokenFactor) { + ChainHi1 = ChainHi.getOperand(0); + ChainHi2 = ChainHi.getOperand(1); + } + SDValue Store1 = DAG.getNode(PIC16ISD::PIC16Store, dl, MVT::Other, + ChainLo1, + SrcLo1, PtrLo, PtrHi, + DAG.getConstant (0 + StoreOffset, MVT::i8)); + + SDValue Store2 = DAG.getNode(PIC16ISD::PIC16Store, dl, MVT::Other, ChainLo2, + SrcLo2, PtrLo, PtrHi, + DAG.getConstant (1 + StoreOffset, MVT::i8)); + + SDValue Store3 = DAG.getNode(PIC16ISD::PIC16Store, dl, MVT::Other, ChainHi1, + SrcHi1, PtrLo, PtrHi, + DAG.getConstant (2 + StoreOffset, MVT::i8)); + + SDValue Store4 = DAG.getNode(PIC16ISD::PIC16Store, dl, MVT::Other, ChainHi2, + SrcHi2, PtrLo, PtrHi, + DAG.getConstant (3 + StoreOffset, MVT::i8)); + + SDValue RetLo = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + getChain(Store1), getChain(Store2)); + SDValue RetHi = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + getChain(Store3), getChain(Store4)); + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, RetLo, RetHi); + + } + else { + assert (0 && "value type not supported"); + return SDValue(); + } +} + +SDValue PIC16TargetLowering::ExpandExternalSymbol(SDNode *N, SelectionDAG &DAG) +{ + ExternalSymbolSDNode *ES = dyn_cast(SDValue(N, 0)); + // FIXME there isn't really debug info here + DebugLoc dl = ES->getDebugLoc(); + + SDValue TES = DAG.getTargetExternalSymbol(ES->getSymbol(), MVT::i8); + SDValue Offset = DAG.getConstant(0, MVT::i8); + SDValue Lo = DAG.getNode(PIC16ISD::Lo, dl, MVT::i8, TES, Offset); + SDValue Hi = DAG.getNode(PIC16ISD::Hi, dl, MVT::i8, TES, Offset); + + return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i16, Lo, Hi); +} + +// ExpandGlobalAddress - +SDValue PIC16TargetLowering::ExpandGlobalAddress(SDNode *N, SelectionDAG &DAG) { + GlobalAddressSDNode *G = dyn_cast(SDValue(N, 0)); + // FIXME there isn't really debug info here + DebugLoc dl = G->getDebugLoc(); + + SDValue TGA = DAG.getTargetGlobalAddress(G->getGlobal(), MVT::i8, + G->getOffset()); + + SDValue Offset = DAG.getConstant(0, MVT::i8); + SDValue Lo = DAG.getNode(PIC16ISD::Lo, dl, MVT::i8, TGA, Offset); + SDValue Hi = DAG.getNode(PIC16ISD::Hi, dl, MVT::i8, TGA, Offset); + + return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i16, Lo, Hi); +} + +bool PIC16TargetLowering::isDirectAddress(const SDValue &Op) { + assert (Op.getNode() != NULL && "Can't operate on NULL SDNode!!"); + + if (Op.getOpcode() == ISD::BUILD_PAIR) { + if (Op.getOperand(0).getOpcode() == PIC16ISD::Lo) + return true; + } + return false; +} + +// Return true if DirectAddress is in ROM_SPACE +bool PIC16TargetLowering::isRomAddress(const SDValue &Op) { + + // RomAddress is a GlobalAddress in ROM_SPACE_ + // If the Op is not a GlobalAddress return NULL without checking + // anything further. + if (!isDirectAddress(Op)) + return false; + + // Its a GlobalAddress. + // It is BUILD_PAIR((PIC16Lo TGA), (PIC16Hi TGA)) and Op is BUILD_PAIR + SDValue TGA = Op.getOperand(0).getOperand(0); + GlobalAddressSDNode *GSDN = dyn_cast(TGA); + + if (GSDN->getAddressSpace() == PIC16ISD::ROM_SPACE) + return true; + + // Any other address space return it false + return false; +} + + +// GetExpandedParts - This function is on the similiar lines as +// the GetExpandedInteger in type legalizer is. This returns expanded +// parts of Op in Lo and Hi. + +void PIC16TargetLowering::GetExpandedParts(SDValue Op, SelectionDAG &DAG, + SDValue &Lo, SDValue &Hi) { + SDNode *N = Op.getNode(); + DebugLoc dl = N->getDebugLoc(); + MVT NewVT = getTypeToTransformTo(N->getValueType(0)); + + // Extract the lo component. + Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, NewVT, Op, + DAG.getConstant(0, MVT::i8)); + + // extract the hi component + Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, NewVT, Op, + DAG.getConstant(1, MVT::i8)); +} + +// Legalize FrameIndex into ExternalSymbol and offset. +void +PIC16TargetLowering::LegalizeFrameIndex(SDValue Op, SelectionDAG &DAG, + SDValue &ES, int &Offset) { + + MachineFunction &MF = DAG.getMachineFunction(); + const Function *Func = MF.getFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + const std::string Name = Func->getName(); + + FrameIndexSDNode *FR = dyn_cast(Op); + + // FrameIndices are not stack offsets. But they represent the request + // for space on stack. That space requested may be more than one byte. + // Therefore, to calculate the stack offset that a FrameIndex aligns + // with, we need to traverse all the FrameIndices available earlier in + // the list and add their requested size. + unsigned FIndex = FR->getIndex(); + const char *tmpName; + if (FIndex < ReservedFrameCount) { + tmpName = createESName(PAN::getFrameLabel(Name)); + ES = DAG.getTargetExternalSymbol(tmpName, MVT::i8); + Offset = 0; + for (unsigned i=0; igetObjectSize(i); + } + } else { + // FrameIndex has been made for some temporary storage + tmpName = createESName(PAN::getTempdataLabel(Name)); + ES = DAG.getTargetExternalSymbol(tmpName, MVT::i8); + Offset = GetTmpOffsetForFI(FIndex, MFI->getObjectSize(FIndex)); + } + + return; +} + +// This function legalizes the PIC16 Addresses. If the Pointer is +// -- Direct address variable residing +// --> then a Banksel for that variable will be created. +// -- Rom variable +// --> then it will be treated as an indirect address. +// -- Indirect address +// --> then the address will be loaded into FSR +// -- ADD with constant operand +// --> then constant operand of ADD will be returned as Offset +// and non-constant operand of ADD will be treated as pointer. +// Returns the high and lo part of the address, and the offset(in case of ADD). + +void PIC16TargetLowering::LegalizeAddress(SDValue Ptr, SelectionDAG &DAG, + SDValue &Lo, SDValue &Hi, + unsigned &Offset, DebugLoc dl) { + + // Offset, by default, should be 0 + Offset = 0; + + // If the pointer is ADD with constant, + // return the constant value as the offset + if (Ptr.getOpcode() == ISD::ADD) { + SDValue OperLeft = Ptr.getOperand(0); + SDValue OperRight = Ptr.getOperand(1); + if (OperLeft.getOpcode() == ISD::Constant) { + Offset = dyn_cast(OperLeft)->getZExtValue(); + Ptr = OperRight; + } else if (OperRight.getOpcode() == ISD::Constant) { + Offset = dyn_cast(OperRight)->getZExtValue(); + Ptr = OperLeft; + } + } + + // If the pointer is Type i8 and an external symbol + // then treat it as direct address. + // One example for such case is storing and loading + // from function frame during a call + if (Ptr.getValueType() == MVT::i8) { + switch (Ptr.getOpcode()) { + case ISD::TargetExternalSymbol: + Lo = Ptr; + Hi = DAG.getConstant(1, MVT::i8); + return; + } + } + + // Expansion of FrameIndex has Lo/Hi parts + if (isDirectAddress(Ptr)) { + SDValue TFI = Ptr.getOperand(0).getOperand(0); + if (TFI.getOpcode() == ISD::TargetFrameIndex) { + int FrameOffset; + LegalizeFrameIndex(TFI, DAG, Lo, FrameOffset); + Hi = DAG.getConstant(1, MVT::i8); + Offset += FrameOffset; + return; + } + } + + if (isDirectAddress(Ptr) && !isRomAddress(Ptr)) { + // Direct addressing case for RAM variables. The Hi part is constant + // and the Lo part is the TGA itself. + Lo = Ptr.getOperand(0).getOperand(0); + + // For direct addresses Hi is a constant. Value 1 for the constant + // signifies that banksel needs to generated for it. Value 0 for + // the constant signifies that banksel does not need to be generated + // for it. Mark it as 1 now and optimize later. + Hi = DAG.getConstant(1, MVT::i8); + return; + } + + // Indirect addresses. Get the hi and lo parts of ptr. + GetExpandedParts(Ptr, DAG, Lo, Hi); + + // Put the hi and lo parts into FSR. + Lo = DAG.getNode(PIC16ISD::MTLO, dl, MVT::i8, Lo); + Hi = DAG.getNode(PIC16ISD::MTHI, dl, MVT::i8, Hi); + + return; +} + +SDValue PIC16TargetLowering::ExpandLoad(SDNode *N, SelectionDAG &DAG) { + LoadSDNode *LD = dyn_cast(SDValue(N, 0)); + SDValue Chain = LD->getChain(); + SDValue Ptr = LD->getBasePtr(); + DebugLoc dl = LD->getDebugLoc(); + + SDValue Load, Offset; + SDVTList Tys; + MVT VT, NewVT; + SDValue PtrLo, PtrHi; + unsigned LoadOffset; + + // Legalize direct/indirect addresses. This will give the lo and hi parts + // of the address and the offset. + LegalizeAddress(Ptr, DAG, PtrLo, PtrHi, LoadOffset, dl); + + // Load from the pointer (direct address or FSR) + VT = N->getValueType(0); + unsigned NumLoads = VT.getSizeInBits() / 8; + std::vector PICLoads; + unsigned iter; + MVT MemVT = LD->getMemoryVT(); + if(ISD::isNON_EXTLoad(N)) { + for (iter=0; itergetMemoryVT(); + unsigned MemBytes = MemVT.getSizeInBits() / 8; + unsigned ExtdBytes = VT.getSizeInBits() / 8; + Offset = DAG.getConstant(LoadOffset, MVT::i8); + + Tys = DAG.getVTList(MVT::i8, MVT::Other); + // For MemBytes generate PIC16Load with proper offset + for (iter=0; itergetOperand(0); + SDValue Amt = N->getOperand(1); + PIC16ISD::PIC16Libcall CallCode; + switch (N->getOpcode()) { + case ISD::SRA: + CallCode = PIC16ISD::SRA_I8; + break; + case ISD::SHL: + CallCode = PIC16ISD::SLL_I8; + break; + case ISD::SRL: + CallCode = PIC16ISD::SRL_I8; + break; + default: + assert ( 0 && "This shift is not implemented yet."); + return SDValue(); + } + SmallVector Ops(2); + Ops[0] = Value; + Ops[1] = Amt; + SDValue Call = MakePIC16Libcall(CallCode, N->getValueType(0), &Ops[0], 2, + true, DAG, N->getDebugLoc()); + return Call; +} + +void +PIC16TargetLowering::LowerOperationWrapper(SDNode *N, + SmallVectorImpl&Results, + SelectionDAG &DAG) { + SDValue Op = SDValue(N, 0); + SDValue Res; + unsigned i; + switch (Op.getOpcode()) { + case ISD::FORMAL_ARGUMENTS: + Res = LowerFORMAL_ARGUMENTS(Op, DAG); break; + case ISD::LOAD: + Res = ExpandLoad(Op.getNode(), DAG); break; + case ISD::CALL: + Res = LowerCALL(Op, DAG); break; + default: { + // All other operations are handled in LowerOperation. + Res = LowerOperation(Op, DAG); + if (Res.getNode()) + Results.push_back(Res); + + return; + } + } + + N = Res.getNode(); + unsigned NumValues = N->getNumValues(); + for (i = 0; i < NumValues ; i++) { + Results.push_back(SDValue(N, i)); + } +} + +SDValue PIC16TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { + switch (Op.getOpcode()) { + case ISD::FORMAL_ARGUMENTS: + return LowerFORMAL_ARGUMENTS(Op, DAG); + case ISD::ADD: + case ISD::ADDC: + case ISD::ADDE: + return LowerADD(Op, DAG); + case ISD::SUB: + case ISD::SUBC: + case ISD::SUBE: + return LowerSUB(Op, DAG); + case ISD::LOAD: + return ExpandLoad(Op.getNode(), DAG); + case ISD::STORE: + return ExpandStore(Op.getNode(), DAG); + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + return LowerShift(Op, DAG); + case ISD::OR: + case ISD::AND: + case ISD::XOR: + return LowerBinOp(Op, DAG); + case ISD::CALL: + return LowerCALL(Op, DAG); + case ISD::RET: + return LowerRET(Op, DAG); + case ISD::BR_CC: + return LowerBR_CC(Op, DAG); + case ISD::SELECT_CC: + return LowerSELECT_CC(Op, DAG); + } + return SDValue(); +} + +SDValue PIC16TargetLowering::ConvertToMemOperand(SDValue Op, + SelectionDAG &DAG, + DebugLoc dl) { + assert (Op.getValueType() == MVT::i8 + && "illegal value type to store on stack."); + + MachineFunction &MF = DAG.getMachineFunction(); + const Function *Func = MF.getFunction(); + const std::string FuncName = Func->getName(); + + + // Put the value on stack. + // Get a stack slot index and convert to es. + int FI = MF.getFrameInfo()->CreateStackObject(1, 1); + const char *tmpName = createESName(PAN::getTempdataLabel(FuncName)); + SDValue ES = DAG.getTargetExternalSymbol(tmpName, MVT::i8); + + // Store the value to ES. + SDValue Store = DAG.getNode (PIC16ISD::PIC16Store, dl, MVT::Other, + DAG.getEntryNode(), + Op, ES, + DAG.getConstant (1, MVT::i8), // Banksel. + DAG.getConstant (GetTmpOffsetForFI(FI, 1), + MVT::i8)); + + // Load the value from ES. + SDVTList Tys = DAG.getVTList(MVT::i8, MVT::Other); + SDValue Load = DAG.getNode(PIC16ISD::PIC16Load, dl, Tys, Store, + ES, DAG.getConstant (1, MVT::i8), + DAG.getConstant (GetTmpOffsetForFI(FI, 1), + MVT::i8)); + + return Load.getValue(0); +} + +SDValue PIC16TargetLowering:: +LowerIndirectCallArguments(SDValue Op, SDValue Chain, SDValue InFlag, + SDValue DataAddr_Lo, SDValue DataAddr_Hi, + SelectionDAG &DAG) { + CallSDNode *TheCall = dyn_cast(Op); + unsigned NumOps = TheCall->getNumArgs(); + DebugLoc dl = TheCall->getDebugLoc(); + + // If call has no arguments then do nothing and return. + if (NumOps == 0) + return Chain; + + std::vector Ops; + SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); + SDValue Arg, StoreRet; + + // For PIC16 ABI the arguments come after the return value. + unsigned RetVals = TheCall->getNumRetVals(); + for (unsigned i = 0, ArgOffset = RetVals; i < NumOps; i++) { + // Get the arguments + Arg = TheCall->getArg(i); + + Ops.clear(); + Ops.push_back(Chain); + Ops.push_back(Arg); + Ops.push_back(DataAddr_Lo); + Ops.push_back(DataAddr_Hi); + Ops.push_back(DAG.getConstant(ArgOffset, MVT::i8)); + Ops.push_back(InFlag); + + StoreRet = DAG.getNode (PIC16ISD::PIC16StWF, dl, Tys, &Ops[0], Ops.size()); + + Chain = getChain(StoreRet); + InFlag = getOutFlag(StoreRet); + ArgOffset++; + } + return Chain; +} + +SDValue PIC16TargetLowering:: +LowerDirectCallArguments(SDValue Op, SDValue Chain, SDValue ArgLabel, + SDValue InFlag, SelectionDAG &DAG) { + CallSDNode *TheCall = dyn_cast(Op); + unsigned NumOps = TheCall->getNumArgs(); + DebugLoc dl = TheCall->getDebugLoc(); + std::string Name; + SDValue Arg, StoreAt; + MVT ArgVT; + unsigned Size=0; + unsigned ArgCount=0; + + // If call has no arguments then do nothing and return. + if (NumOps == 0) + return Chain; + + // FIXME: This portion of code currently assumes only + // primitive types being passed as arguments. + + // Legalize the address before use + SDValue PtrLo, PtrHi; + unsigned AddressOffset; + int StoreOffset = 0; + LegalizeAddress(ArgLabel, DAG, PtrLo, PtrHi, AddressOffset, dl); + SDValue StoreRet; + + std::vector Ops; + SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); + for (unsigned i=ArgCount, Offset = 0; igetArg(i); + StoreOffset = (Offset + AddressOffset); + + // Store the argument on frame + + Ops.clear(); + Ops.push_back(Chain); + Ops.push_back(Arg); + Ops.push_back(PtrLo); + Ops.push_back(PtrHi); + Ops.push_back(DAG.getConstant(StoreOffset, MVT::i8)); + Ops.push_back(InFlag); + + StoreRet = DAG.getNode (PIC16ISD::PIC16StWF, dl, Tys, &Ops[0], Ops.size()); + + Chain = getChain(StoreRet); + InFlag = getOutFlag(StoreRet); + + // Update the frame offset to be used for next argument + ArgVT = Arg.getValueType(); + Size = ArgVT.getSizeInBits(); + Size = Size/8; // Calculate size in bytes + Offset += Size; // Increase the frame offset + } + return Chain; +} + +SDValue PIC16TargetLowering:: +LowerIndirectCallReturn (SDValue Op, SDValue Chain, SDValue InFlag, + SDValue DataAddr_Lo, SDValue DataAddr_Hi, + SelectionDAG &DAG) { + CallSDNode *TheCall = dyn_cast(Op); + DebugLoc dl = TheCall->getDebugLoc(); + unsigned RetVals = TheCall->getNumRetVals(); + + // If call does not have anything to return + // then do nothing and go back. + if (RetVals == 0) + return Chain; + + // Call has something to return + std::vector ResultVals; + SDValue LoadRet; + + SDVTList Tys = DAG.getVTList(MVT::i8, MVT::Other, MVT::Flag); + for(unsigned i=0;i(Op); + DebugLoc dl = TheCall->getDebugLoc(); + // Currently handling primitive types only. They will come in + // i8 parts + unsigned RetVals = TheCall->getNumRetVals(); + + std::vector ResultVals; + + // Return immediately if the return type is void + if (RetVals == 0) + return Chain; + + // Call has something to return + + // Legalize the address before use + SDValue LdLo, LdHi; + unsigned LdOffset; + LegalizeAddress(RetLabel, DAG, LdLo, LdHi, LdOffset, dl); + + SDVTList Tys = DAG.getVTList(MVT::i8, MVT::Other, MVT::Flag); + SDValue LoadRet; + + for(unsigned i=0, Offset=0;igetName(); + + const char *tmpName = createESName(PAN::getFrameLabel(FuncName)); + SDVTList VTs = DAG.getVTList (MVT::i8, MVT::Other); + SDValue ES = DAG.getTargetExternalSymbol(tmpName, MVT::i8); + SDValue BS = DAG.getConstant(1, MVT::i8); + SDValue RetVal; + for(unsigned i=0;igetOperand(2*i + 1); + Chain = DAG.getNode (PIC16ISD::PIC16Store, dl, MVT::Other, Chain, RetVal, + ES, BS, + DAG.getConstant (i, MVT::i8)); + + } + return DAG.getNode(ISD::RET, dl, MVT::Other, Chain); +} + +// CALL node may have some operands non-legal to PIC16. Generate new CALL +// node with all the operands legal. +// Currently only Callee operand of the CALL node is non-legal. This function +// legalizes the Callee operand and uses all other operands as are to generate +// new CALL node. + +SDValue PIC16TargetLowering::LegalizeCALL(SDValue Op, SelectionDAG &DAG) { + CallSDNode *TheCall = dyn_cast(Op); + SDValue Chain = TheCall->getChain(); + SDValue Callee = TheCall->getCallee(); + DebugLoc dl = TheCall->getDebugLoc(); + unsigned i =0; + + assert(Callee.getValueType() == MVT::i16 && + "Don't know how to legalize this call node!!!"); + assert(Callee.getOpcode() == ISD::BUILD_PAIR && + "Don't know how to legalize this call node!!!"); + + if (isDirectAddress(Callee)) { + // Come here for direct calls + Callee = Callee.getOperand(0).getOperand(0); + } else { + // Come here for indirect calls + SDValue Lo, Hi; + // Indirect addresses. Get the hi and lo parts of ptr. + GetExpandedParts(Callee, DAG, Lo, Hi); + // Connect Lo and Hi parts of the callee with the PIC16Connect + Callee = DAG.getNode(PIC16ISD::PIC16Connect, dl, MVT::i8, Lo, Hi); + } + std::vector Ops; + Ops.push_back(Chain); + Ops.push_back(Callee); + + // Add the call arguments and their flags + unsigned NumArgs = TheCall->getNumArgs(); + for(i=0;igetArg(i)); + Ops.push_back(TheCall->getArgFlagsVal(i)); + } + std::vector NodeTys; + unsigned NumRets = TheCall->getNumRetVals(); + for(i=0;igetRetValType(i)); + + // Return a Chain as well + NodeTys.push_back(MVT::Other); + + SDVTList VTs = DAG.getVTList(&NodeTys[0], NodeTys.size()); + // Generate new call with all the operands legal + return DAG.getCall(TheCall->getCallingConv(), dl, + TheCall->isVarArg(), TheCall->isTailCall(), + TheCall->isInreg(), VTs, &Ops[0], Ops.size()); +} + +void PIC16TargetLowering:: +GetDataAddress(DebugLoc dl, SDValue Callee, SDValue &Chain, + SDValue &DataAddr_Lo, SDValue &DataAddr_Hi, + SelectionDAG &DAG) { + assert (Callee.getOpcode() == PIC16ISD::PIC16Connect + && "Don't know what to do of such callee!!"); + SDValue ZeroOperand = DAG.getConstant(0, MVT::i8); + SDValue SeqStart = DAG.getCALLSEQ_START(Chain, ZeroOperand); + Chain = getChain(SeqStart); + SDValue OperFlag = getOutFlag(SeqStart); // To manage the data dependency + + // Get the Lo and Hi part of code address + SDValue Lo = Callee.getOperand(0); + SDValue Hi = Callee.getOperand(1); + + SDValue Data_Lo, Data_Hi; + SDVTList Tys = DAG.getVTList(MVT::i8, MVT::Other, MVT::Flag); + // Subtract 2 from Address to get the Lower part of DataAddress. + SDVTList VTList = DAG.getVTList(MVT::i8, MVT::Flag); + Data_Lo = DAG.getNode(ISD::SUBC, dl, VTList, Lo, + DAG.getConstant(2, MVT::i8)); + SDValue Ops[3] = { Hi, DAG.getConstant(0, MVT::i8), Data_Lo.getValue(1)}; + Data_Hi = DAG.getNode(ISD::SUBE, dl, VTList, Ops, 3); + SDValue PCLATH = DAG.getNode(PIC16ISD::MTPCLATH, dl, MVT::i8, Data_Hi); + Callee = DAG.getNode(PIC16ISD::PIC16Connect, dl, MVT::i8, Data_Lo, PCLATH); + SDValue Call = DAG.getNode(PIC16ISD::CALLW, dl, Tys, Chain, Callee, + OperFlag); + Chain = getChain(Call); + OperFlag = getOutFlag(Call); + SDValue SeqEnd = DAG.getCALLSEQ_END(Chain, ZeroOperand, ZeroOperand, + OperFlag); + Chain = getChain(SeqEnd); + OperFlag = getOutFlag(SeqEnd); + + // Low part of Data Address + DataAddr_Lo = DAG.getNode(PIC16ISD::MTLO, dl, MVT::i8, Call, OperFlag); + + // Make the second call. + SeqStart = DAG.getCALLSEQ_START(Chain, ZeroOperand); + Chain = getChain(SeqStart); + OperFlag = getOutFlag(SeqStart); // To manage the data dependency + + // Subtract 1 from Address to get high part of data address. + Data_Lo = DAG.getNode(ISD::SUBC, dl, VTList, Lo, + DAG.getConstant(1, MVT::i8)); + SDValue HiOps[3] = { Hi, DAG.getConstant(0, MVT::i8), Data_Lo.getValue(1)}; + Data_Hi = DAG.getNode(ISD::SUBE, dl, VTList, HiOps, 3); + PCLATH = DAG.getNode(PIC16ISD::MTPCLATH, dl, MVT::i8, Data_Hi); + + // Use new Lo to make another CALLW + Callee = DAG.getNode(PIC16ISD::PIC16Connect, dl, MVT::i8, Data_Lo, PCLATH); + Call = DAG.getNode(PIC16ISD::CALLW, dl, Tys, Chain, Callee, OperFlag); + Chain = getChain(Call); + OperFlag = getOutFlag(Call); + SeqEnd = DAG.getCALLSEQ_END(Chain, ZeroOperand, ZeroOperand, + OperFlag); + Chain = getChain(SeqEnd); + OperFlag = getOutFlag(SeqEnd); + // Hi part of Data Address + DataAddr_Hi = DAG.getNode(PIC16ISD::MTHI, dl, MVT::i8, Call, OperFlag); +} + + +SDValue PIC16TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) { + CallSDNode *TheCall = dyn_cast(Op); + SDValue Chain = TheCall->getChain(); + SDValue Callee = TheCall->getCallee(); + DebugLoc dl = TheCall->getDebugLoc(); + if (Callee.getValueType() == MVT::i16 && + Callee.getOpcode() == ISD::BUILD_PAIR) { + // Control should come here only from TypeLegalizer for lowering + + // Legalize the non-legal arguments of call and return the + // new call with legal arguments. + return LegalizeCALL(Op, DAG); + } + // Control should come here from Legalize DAG. + // Here all the operands of CALL node should be legal. + + // If this is an indirect call then to pass the arguments + // and read the return value back, we need the data address + // of the function being called. + // To get the data address two more calls need to be made. + + // The flag to track if this is a direct or indirect call. + bool IsDirectCall = true; + unsigned RetVals = TheCall->getNumRetVals(); + unsigned NumArgs = TheCall->getNumArgs(); + + SDValue DataAddr_Lo, DataAddr_Hi; + if (Callee.getOpcode() == PIC16ISD::PIC16Connect) { + IsDirectCall = false; // This is indirect call + // Read DataAddress only if we have to pass arguments or + // read return value. + if ((RetVals > 0) || (NumArgs > 0)) + GetDataAddress(dl, Callee, Chain, DataAddr_Lo, DataAddr_Hi, DAG); + } + + SDValue ZeroOperand = DAG.getConstant(0, MVT::i8); + + // Start the call sequence. + // Carring the Constant 0 along the CALLSEQSTART + // because there is nothing else to carry. + SDValue SeqStart = DAG.getCALLSEQ_START(Chain, ZeroOperand); + Chain = getChain(SeqStart); + SDValue OperFlag = getOutFlag(SeqStart); // To manage the data dependency + std::string Name; + + // For any direct call - callee will be GlobalAddressNode or + // ExternalSymbol + SDValue ArgLabel, RetLabel; + if (IsDirectCall) { + // Considering the GlobalAddressNode case here. + if (GlobalAddressSDNode *G = dyn_cast(Callee)) { + GlobalValue *GV = G->getGlobal(); + Callee = DAG.getTargetGlobalAddress(GV, MVT::i8); + Name = G->getGlobal()->getName(); + } else {// Considering the ExternalSymbol case here + ExternalSymbolSDNode *ES = dyn_cast(Callee); + Callee = DAG.getTargetExternalSymbol(ES->getSymbol(), MVT::i8); + Name = ES->getSymbol(); + } + + // Label for argument passing + const char *argFrame = createESName(PAN::getArgsLabel(Name)); + ArgLabel = DAG.getTargetExternalSymbol(argFrame, MVT::i8); + + // Label for reading return value + const char *retName = createESName(PAN::getRetvalLabel(Name)); + RetLabel = DAG.getTargetExternalSymbol(retName, MVT::i8); + } else { + // if indirect call + SDValue CodeAddr_Lo = Callee.getOperand(0); + SDValue CodeAddr_Hi = Callee.getOperand(1); + + /*CodeAddr_Lo = DAG.getNode(ISD::ADD, dl, MVT::i8, CodeAddr_Lo, + DAG.getConstant(2, MVT::i8));*/ + + // move Hi part in PCLATH + CodeAddr_Hi = DAG.getNode(PIC16ISD::MTPCLATH, dl, MVT::i8, CodeAddr_Hi); + Callee = DAG.getNode(PIC16ISD::PIC16Connect, dl, MVT::i8, CodeAddr_Lo, + CodeAddr_Hi); + } + + // Pass the argument to function before making the call. + SDValue CallArgs; + if (IsDirectCall) { + CallArgs = LowerDirectCallArguments(Op, Chain, ArgLabel, OperFlag, DAG); + Chain = getChain(CallArgs); + OperFlag = getOutFlag(CallArgs); + } else { + CallArgs = LowerIndirectCallArguments(Op, Chain, OperFlag, DataAddr_Lo, + DataAddr_Hi, DAG); + Chain = getChain(CallArgs); + OperFlag = getOutFlag(CallArgs); + } + + SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); + SDValue PICCall = DAG.getNode(PIC16ISD::CALL, dl, Tys, Chain, Callee, + OperFlag); + Chain = getChain(PICCall); + OperFlag = getOutFlag(PICCall); + + + // Carrying the Constant 0 along the CALLSEQSTART + // because there is nothing else to carry. + SDValue SeqEnd = DAG.getCALLSEQ_END(Chain, ZeroOperand, ZeroOperand, + OperFlag); + Chain = getChain(SeqEnd); + OperFlag = getOutFlag(SeqEnd); + + // Lower the return value reading after the call. + if (IsDirectCall) + return LowerDirectCallReturn(Op, Chain, RetLabel, OperFlag, DAG); + else + return LowerIndirectCallReturn(Op, Chain, OperFlag, DataAddr_Lo, + DataAddr_Hi, DAG); +} + +bool PIC16TargetLowering::isDirectLoad(const SDValue Op) { + if (Op.getOpcode() == PIC16ISD::PIC16Load) + if (Op.getOperand(1).getOpcode() == ISD::TargetGlobalAddress + || Op.getOperand(1).getOpcode() == ISD::TargetExternalSymbol) + return true; + return false; +} + +// NeedToConvertToMemOp - Returns true if one of the operands of the +// operation 'Op' needs to be put into memory. Also returns the +// operand no. of the operand to be converted in 'MemOp'. Remember, PIC16 has +// no instruction that can operation on two registers. Most insns take +// one register and one memory operand (addwf) / Constant (addlw). +bool PIC16TargetLowering::NeedToConvertToMemOp(SDValue Op, unsigned &MemOp) { + // If one of the operand is a constant, return false. + if (Op.getOperand(0).getOpcode() == ISD::Constant || + Op.getOperand(1).getOpcode() == ISD::Constant) + return false; + + // Return false if one of the operands is already a direct + // load and that operand has only one use. + if (isDirectLoad(Op.getOperand(0))) { + if (Op.getOperand(0).hasOneUse()) + return false; + else + MemOp = 0; + } + if (isDirectLoad(Op.getOperand(1))) { + if (Op.getOperand(1).hasOneUse()) + return false; + else + MemOp = 1; + } + return true; +} + +// LowerBinOp - Lower a commutative binary operation that does not +// affect status flag carry. +SDValue PIC16TargetLowering::LowerBinOp(SDValue Op, SelectionDAG &DAG) { + DebugLoc dl = Op.getDebugLoc(); + + // We should have handled larger operands in type legalizer itself. + assert (Op.getValueType() == MVT::i8 && "illegal Op to lower"); + + unsigned MemOp = 1; + if (NeedToConvertToMemOp(Op, MemOp)) { + // Put one value on stack. + SDValue NewVal = ConvertToMemOperand (Op.getOperand(MemOp), DAG, dl); + + return DAG.getNode(Op.getOpcode(), dl, MVT::i8, Op.getOperand(MemOp ^ 1), + NewVal); + } + else { + return Op; + } +} + +// LowerADD - Lower all types of ADD operations including the ones +// that affects carry. +SDValue PIC16TargetLowering::LowerADD(SDValue Op, SelectionDAG &DAG) { + // We should have handled larger operands in type legalizer itself. + assert (Op.getValueType() == MVT::i8 && "illegal add to lower"); + DebugLoc dl = Op.getDebugLoc(); + unsigned MemOp = 1; + if (NeedToConvertToMemOp(Op, MemOp)) { + // Put one value on stack. + SDValue NewVal = ConvertToMemOperand (Op.getOperand(MemOp), DAG, dl); + + // ADDC and ADDE produce two results. + SDVTList Tys = DAG.getVTList(MVT::i8, MVT::Flag); + + // ADDE has three operands, the last one is the carry bit. + if (Op.getOpcode() == ISD::ADDE) + return DAG.getNode(Op.getOpcode(), dl, Tys, Op.getOperand(MemOp ^ 1), + NewVal, Op.getOperand(2)); + // ADDC has two operands. + else if (Op.getOpcode() == ISD::ADDC) + return DAG.getNode(Op.getOpcode(), dl, Tys, Op.getOperand(MemOp ^ 1), + NewVal); + // ADD it is. It produces only one result. + else + return DAG.getNode(Op.getOpcode(), dl, MVT::i8, Op.getOperand(MemOp ^ 1), + NewVal); + } + else + return Op; +} + +SDValue PIC16TargetLowering::LowerSUB(SDValue Op, SelectionDAG &DAG) { + DebugLoc dl = Op.getDebugLoc(); + // We should have handled larger operands in type legalizer itself. + assert (Op.getValueType() == MVT::i8 && "illegal sub to lower"); + + // Nothing to do if the first operand is already a direct load and it has + // only one use. + if (isDirectLoad(Op.getOperand(0)) && Op.getOperand(0).hasOneUse()) + return Op; + + // Put first operand on stack. + SDValue NewVal = ConvertToMemOperand (Op.getOperand(0), DAG, dl); + + SDVTList Tys = DAG.getVTList(MVT::i8, MVT::Flag); + if (Op.getOpcode() == ISD::SUBE) + return DAG.getNode(Op.getOpcode(), dl, Tys, NewVal, Op.getOperand(1), + Op.getOperand(2)); + else + return DAG.getNode(Op.getOpcode(), dl, Tys, NewVal, Op.getOperand(1)); +} + +void PIC16TargetLowering::InitReservedFrameCount(const Function *F) { + unsigned NumArgs = F->arg_size(); + + bool isVoidFunc = (F->getReturnType()->getTypeID() == Type::VoidTyID); + + if (isVoidFunc) + ReservedFrameCount = NumArgs; + else + ReservedFrameCount = NumArgs + 1; +} + +// LowerFORMAL_ARGUMENTS - Argument values are loaded from the +// .args + offset. All arguments are already broken to leaglized +// types, so the offset just runs from 0 to NumArgVals - 1. + +SDValue PIC16TargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, + SelectionDAG &DAG) { + SmallVector ArgValues; + unsigned NumArgVals = Op.getNode()->getNumValues() - 1; + DebugLoc dl = Op.getDebugLoc(); + SDValue Chain = Op.getOperand(0); // Formal arguments' chain + + + // Get the callee's name to create the .args label to pass args. + MachineFunction &MF = DAG.getMachineFunction(); + const Function *F = MF.getFunction(); + std::string FuncName = F->getName(); + + // Reset the map of FI and TmpOffset + ResetTmpOffsetMap(); + // Initialize the ReserveFrameCount + InitReservedFrameCount(F); + + // Create the .args external symbol. + const char *tmpName = createESName(PAN::getArgsLabel(FuncName)); + SDValue ES = DAG.getTargetExternalSymbol(tmpName, MVT::i8); + + // Load arg values from the label + offset. + SDVTList VTs = DAG.getVTList (MVT::i8, MVT::Other); + SDValue BS = DAG.getConstant(1, MVT::i8); + for (unsigned i = 0; i < NumArgVals ; ++i) { + SDValue Offset = DAG.getConstant(i, MVT::i8); + SDValue PICLoad = DAG.getNode(PIC16ISD::PIC16LdArg, dl, VTs, Chain, ES, BS, + Offset); + Chain = getChain(PICLoad); + ArgValues.push_back(PICLoad); + } + + // Return a MERGE_VALUE node. + ArgValues.push_back(Op.getOperand(0)); + return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getNode()->getVTList(), + &ArgValues[0], ArgValues.size()).getValue(Op.getResNo()); +} + +// Perform DAGCombine of PIC16Load. +// FIXME - Need a more elaborate comment here. +SDValue PIC16TargetLowering:: +PerformPIC16LoadCombine(SDNode *N, DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; + SDValue Chain = N->getOperand(0); + if (N->hasNUsesOfValue(0, 0)) { + DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), Chain); + } + return SDValue(); +} + +// For all the functions with arguments some STORE nodes are generated +// that store the argument on the frameindex. However in PIC16 the arguments +// are passed on stack only. Therefore these STORE nodes are redundant. +// To remove these STORE nodes will be removed in PerformStoreCombine +// +// Currently this function is doint nothing and will be updated for removing +// unwanted store operations +SDValue PIC16TargetLowering:: +PerformStoreCombine(SDNode *N, DAGCombinerInfo &DCI) const { + return SDValue(N, 0); + /* + // Storing an undef value is of no use, so remove it + if (isStoringUndef(N, Chain, DAG)) { + return Chain; // remove the store and return the chain + } + //else everything is ok. + return SDValue(N, 0); + */ +} + +SDValue PIC16TargetLowering::PerformDAGCombine(SDNode *N, + DAGCombinerInfo &DCI) const { + switch (N->getOpcode()) { + case ISD::STORE: + return PerformStoreCombine(N, DCI); + case PIC16ISD::PIC16Load: + return PerformPIC16LoadCombine(N, DCI); + } + return SDValue(); +} + +static PIC16CC::CondCodes IntCCToPIC16CC(ISD::CondCode CC) { + switch (CC) { + default: assert(0 && "Unknown condition code!"); + case ISD::SETNE: return PIC16CC::NE; + case ISD::SETEQ: return PIC16CC::EQ; + case ISD::SETGT: return PIC16CC::GT; + case ISD::SETGE: return PIC16CC::GE; + case ISD::SETLT: return PIC16CC::LT; + case ISD::SETLE: return PIC16CC::LE; + case ISD::SETULT: return PIC16CC::ULT; + case ISD::SETULE: return PIC16CC::LE; + case ISD::SETUGE: return PIC16CC::GE; + case ISD::SETUGT: return PIC16CC::UGT; + } +} + +// Look at LHS/RHS/CC and see if they are a lowered setcc instruction. If so +// set LHS/RHS and SPCC to the LHS/RHS of the setcc and SPCC to the condition. +static void LookThroughSetCC(SDValue &LHS, SDValue &RHS, + ISD::CondCode CC, unsigned &SPCC) { + if (isa(RHS) && + cast(RHS)->getZExtValue() == 0 && + CC == ISD::SETNE && + (LHS.getOpcode() == PIC16ISD::SELECT_ICC && + LHS.getOperand(3).getOpcode() == PIC16ISD::SUBCC) && + isa(LHS.getOperand(0)) && + isa(LHS.getOperand(1)) && + cast(LHS.getOperand(0))->getZExtValue() == 1 && + cast(LHS.getOperand(1))->getZExtValue() == 0) { + SDValue CMPCC = LHS.getOperand(3); + SPCC = cast(LHS.getOperand(2))->getZExtValue(); + LHS = CMPCC.getOperand(0); + RHS = CMPCC.getOperand(1); + } +} + +// Returns appropriate CMP insn and corresponding condition code in PIC16CC +SDValue PIC16TargetLowering::getPIC16Cmp(SDValue LHS, SDValue RHS, + unsigned CC, SDValue &PIC16CC, + SelectionDAG &DAG, DebugLoc dl) { + PIC16CC::CondCodes CondCode = (PIC16CC::CondCodes) CC; + + // PIC16 sub is literal - W. So Swap the operands and condition if needed. + // i.e. a < 12 can be rewritten as 12 > a. + if (RHS.getOpcode() == ISD::Constant) { + + SDValue Tmp = LHS; + LHS = RHS; + RHS = Tmp; + + switch (CondCode) { + default: break; + case PIC16CC::LT: + CondCode = PIC16CC::GT; + break; + case PIC16CC::GT: + CondCode = PIC16CC::LT; + break; + case PIC16CC::ULT: + CondCode = PIC16CC::UGT; + break; + case PIC16CC::UGT: + CondCode = PIC16CC::ULT; + break; + case PIC16CC::GE: + CondCode = PIC16CC::LE; + break; + case PIC16CC::LE: + CondCode = PIC16CC::GE; + break; + case PIC16CC::ULE: + CondCode = PIC16CC::UGE; + break; + case PIC16CC::UGE: + CondCode = PIC16CC::ULE; + break; + } + } + + PIC16CC = DAG.getConstant(CondCode, MVT::i8); + + // These are signed comparisons. + SDValue Mask = DAG.getConstant(128, MVT::i8); + if (isSignedComparison(CondCode)) { + LHS = DAG.getNode (ISD::XOR, dl, MVT::i8, LHS, Mask); + RHS = DAG.getNode (ISD::XOR, dl, MVT::i8, RHS, Mask); + } + + SDVTList VTs = DAG.getVTList (MVT::i8, MVT::Flag); + // We can use a subtract operation to set the condition codes. But + // we need to put one operand in memory if required. + // Nothing to do if the first operand is already a valid type (direct load + // for subwf and literal for sublw) and it is used by this operation only. + if ((LHS.getOpcode() == ISD::Constant || isDirectLoad(LHS)) + && LHS.hasOneUse()) + return DAG.getNode(PIC16ISD::SUBCC, dl, VTs, LHS, RHS); + + // else convert the first operand to mem. + LHS = ConvertToMemOperand (LHS, DAG, dl); + return DAG.getNode(PIC16ISD::SUBCC, dl, VTs, LHS, RHS); +} + + +SDValue PIC16TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) { + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + ISD::CondCode CC = cast(Op.getOperand(4))->get(); + SDValue TrueVal = Op.getOperand(2); + SDValue FalseVal = Op.getOperand(3); + unsigned ORIGCC = ~0; + DebugLoc dl = Op.getDebugLoc(); + + // If this is a select_cc of a "setcc", and if the setcc got lowered into + // an CMP[IF]CC/SELECT_[IF]CC pair, find the original compared values. + // i.e. + // A setcc: lhs, rhs, cc is expanded by llvm to + // select_cc: result of setcc, 0, 1, 0, setne + // We can think of it as: + // select_cc: lhs, rhs, 1, 0, cc + LookThroughSetCC(LHS, RHS, CC, ORIGCC); + if (ORIGCC == ~0U) ORIGCC = IntCCToPIC16CC (CC); + + SDValue PIC16CC; + SDValue Cmp = getPIC16Cmp(LHS, RHS, ORIGCC, PIC16CC, DAG, dl); + + return DAG.getNode (PIC16ISD::SELECT_ICC, dl, TrueVal.getValueType(), TrueVal, + FalseVal, PIC16CC, Cmp.getValue(1)); +} + +MachineBasicBlock * +PIC16TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *BB) const { + const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo(); + unsigned CC = (PIC16CC::CondCodes)MI->getOperand(3).getImm(); + DebugLoc dl = MI->getDebugLoc(); + + // To "insert" a SELECT_CC instruction, we actually have to insert the diamond + // control-flow pattern. The incoming instruction knows the destination vreg + // to set, the condition code register to branch on, the true/false values to + // select between, and a branch opcode to use. + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction::iterator It = BB; + ++It; + + // thisMBB: + // ... + // TrueVal = ... + // [f]bCC copy1MBB + // fallthrough --> copy0MBB + MachineBasicBlock *thisMBB = BB; + MachineFunction *F = BB->getParent(); + MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); + BuildMI(BB, dl, TII.get(PIC16::pic16brcond)).addMBB(sinkMBB).addImm(CC); + F->insert(It, copy0MBB); + F->insert(It, sinkMBB); + + // Update machine-CFG edges by transferring all successors of the current + // block to the new block which will contain the Phi node for the select. + sinkMBB->transferSuccessors(BB); + // Next, add the true and fallthrough blocks as its successors. + BB->addSuccessor(copy0MBB); + BB->addSuccessor(sinkMBB); + + // copy0MBB: + // %FalseValue = ... + // # fallthrough to sinkMBB + BB = copy0MBB; + + // Update machine-CFG edges + BB->addSuccessor(sinkMBB); + + // sinkMBB: + // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] + // ... + BB = sinkMBB; + BuildMI(BB, dl, TII.get(PIC16::PHI), MI->getOperand(0).getReg()) + .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB) + .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB); + + F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. + return BB; +} + + +SDValue PIC16TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) { + SDValue Chain = Op.getOperand(0); + ISD::CondCode CC = cast(Op.getOperand(1))->get(); + SDValue LHS = Op.getOperand(2); // LHS of the condition. + SDValue RHS = Op.getOperand(3); // RHS of the condition. + SDValue Dest = Op.getOperand(4); // BB to jump to + unsigned ORIGCC = ~0; + DebugLoc dl = Op.getDebugLoc(); + + // If this is a br_cc of a "setcc", and if the setcc got lowered into + // an CMP[IF]CC/SELECT_[IF]CC pair, find the original compared values. + LookThroughSetCC(LHS, RHS, CC, ORIGCC); + if (ORIGCC == ~0U) ORIGCC = IntCCToPIC16CC (CC); + + // Get the Compare insn and condition code. + SDValue PIC16CC; + SDValue Cmp = getPIC16Cmp(LHS, RHS, ORIGCC, PIC16CC, DAG, dl); + + return DAG.getNode(PIC16ISD::BRCOND, dl, MVT::Other, Chain, Dest, PIC16CC, + Cmp.getValue(1)); +} + diff --git a/lib/Target/PIC16/PIC16ISelLowering.h b/lib/Target/PIC16/PIC16ISelLowering.h new file mode 100644 index 000000000000..ca9650d6b19e --- /dev/null +++ b/lib/Target/PIC16/PIC16ISelLowering.h @@ -0,0 +1,227 @@ +//===-- PIC16ISelLowering.h - PIC16 DAG Lowering Interface ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interfaces that PIC16 uses to lower LLVM code into a +// selection DAG. +// +//===----------------------------------------------------------------------===// + +#ifndef PIC16ISELLOWERING_H +#define PIC16ISELLOWERING_H + +#include "PIC16.h" +#include "PIC16Subtarget.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/Target/TargetLowering.h" +#include + +namespace llvm { + namespace PIC16ISD { + enum NodeType { + // Start the numbering from where ISD NodeType finishes. + FIRST_NUMBER = ISD::BUILTIN_OP_END, + + Lo, // Low 8-bits of GlobalAddress. + Hi, // High 8-bits of GlobalAddress. + PIC16Load, + PIC16LdArg, // This is replica of PIC16Load but used to load function + // arguments and is being used for facilitating for some + // store removal optimizations. + + PIC16LdWF, + PIC16Store, + PIC16StWF, + Banksel, + MTLO, // Move to low part of FSR + MTHI, // Move to high part of FSR + MTPCLATH, // Move to PCLATCH + PIC16Connect, // General connector for PIC16 nodes + BCF, + LSLF, // PIC16 Logical shift left + LRLF, // PIC16 Logical shift right + RLF, // Rotate left through carry + RRF, // Rotate right through carry + CALL, // PIC16 Call instruction + CALLW, // PIC16 CALLW instruction + SUBCC, // Compare for equality or inequality. + SELECT_ICC, // Psuedo to be caught in schedular and expanded to brcond. + BRCOND, // Conditional branch. + Dummy + }; + + // Keep track of different address spaces. + enum AddressSpace { + RAM_SPACE = 0, // RAM address space + ROM_SPACE = 1 // ROM address space number is 1 + }; + enum PIC16Libcall { + MUL_I8 = RTLIB::UNKNOWN_LIBCALL + 1, + SRA_I8, + SLL_I8, + SRL_I8, + PIC16UnknownCall + }; + } + + + //===--------------------------------------------------------------------===// + // TargetLowering Implementation + //===--------------------------------------------------------------------===// + class PIC16TargetLowering : public TargetLowering { + public: + explicit PIC16TargetLowering(PIC16TargetMachine &TM); + + /// getTargetNodeName - This method returns the name of a target specific + /// DAG node. + virtual const char *getTargetNodeName(unsigned Opcode) const; + /// getSetCCResultType - Return the ISD::SETCC ValueType + virtual MVT getSetCCResultType(MVT ValType) const; + SDValue LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG); + SDValue LowerShift(SDValue Op, SelectionDAG &DAG); + SDValue LowerADD(SDValue Op, SelectionDAG &DAG); + SDValue LowerSUB(SDValue Op, SelectionDAG &DAG); + SDValue LowerBinOp(SDValue Op, SelectionDAG &DAG); + SDValue LowerCALL(SDValue Op, SelectionDAG &DAG); + SDValue LowerRET(SDValue Op, SelectionDAG &DAG); + // Call returns + SDValue + LowerDirectCallReturn(SDValue Op, SDValue Chain, SDValue FrameAddress, + SDValue InFlag, SelectionDAG &DAG); + SDValue + LowerIndirectCallReturn(SDValue Op, SDValue Chain, SDValue InFlag, + SDValue DataAddr_Lo, SDValue DataAddr_Hi, + SelectionDAG &DAG); + + // Call arguments + SDValue + LowerDirectCallArguments(SDValue Op, SDValue Chain, SDValue FrameAddress, + SDValue InFlag, SelectionDAG &DAG); + + SDValue + LowerIndirectCallArguments(SDValue Op, SDValue Chain, SDValue InFlag, + SDValue DataAddr_Lo, SDValue DataAddr_Hi, + SelectionDAG &DAG); + + SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG); + SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG); + SDValue getPIC16Cmp(SDValue LHS, SDValue RHS, unsigned OrigCC, SDValue &CC, + SelectionDAG &DAG, DebugLoc dl); + virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *MBB) const; + + + virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG); + virtual void ReplaceNodeResults(SDNode *N, + SmallVectorImpl &Results, + SelectionDAG &DAG); + virtual void LowerOperationWrapper(SDNode *N, + SmallVectorImpl &Results, + SelectionDAG &DAG); + + SDValue ExpandStore(SDNode *N, SelectionDAG &DAG); + SDValue ExpandLoad(SDNode *N, SelectionDAG &DAG); + SDValue ExpandGlobalAddress(SDNode *N, SelectionDAG &DAG); + SDValue ExpandExternalSymbol(SDNode *N, SelectionDAG &DAG); + SDValue ExpandFrameIndex(SDNode *N, SelectionDAG &DAG); + + SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue PerformPIC16LoadCombine(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue PerformStoreCombine(SDNode *N, DAGCombinerInfo &DCI) const; + + // This function returns the Tmp Offset for FrameIndex. If any TmpOffset + // already exists for the FI then it returns the same else it creates the + // new offset and returns. + unsigned GetTmpOffsetForFI(unsigned FI, unsigned slot_size); + void ResetTmpOffsetMap() { FiTmpOffsetMap.clear(); SetTmpSize(0); } + void InitReservedFrameCount(const Function *F); + + // Return the size of Tmp variable + unsigned GetTmpSize() { return TmpSize; } + void SetTmpSize(unsigned Size) { TmpSize = Size; } + + private: + // If the Node is a BUILD_PAIR representing a direct Address, + // then this function will return true. + bool isDirectAddress(const SDValue &Op); + + // If the Node is a DirectAddress in ROM_SPACE then this + // function will return true + bool isRomAddress(const SDValue &Op); + + // Extract the Lo and Hi component of Op. + void GetExpandedParts(SDValue Op, SelectionDAG &DAG, SDValue &Lo, + SDValue &Hi); + + + // Load pointer can be a direct or indirect address. In PIC16 direct + // addresses need Banksel and Indirect addresses need to be loaded to + // FSR first. Handle address specific cases here. + void LegalizeAddress(SDValue Ptr, SelectionDAG &DAG, SDValue &Chain, + SDValue &NewPtr, unsigned &Offset, DebugLoc dl); + + // FrameIndex should be broken down into ExternalSymbol and FrameOffset. + void LegalizeFrameIndex(SDValue Op, SelectionDAG &DAG, SDValue &ES, + int &Offset); + + + // CALL node should have all legal operands only. Legalize all non-legal + // operands of CALL node and then return the new call will all operands + // legal. + SDValue LegalizeCALL(SDValue Op, SelectionDAG &DAG); + + // For indirect calls data address of the callee frame need to be + // extracted. This function fills the arguments DataAddr_Lo and + // DataAddr_Hi with the address of the callee frame. + void GetDataAddress(DebugLoc dl, SDValue Callee, SDValue &Chain, + SDValue &DataAddr_Lo, SDValue &DataAddr_Hi, + SelectionDAG &DAG); + + // We can not have both operands of a binary operation in W. + // This function is used to put one operand on stack and generate a load. + SDValue ConvertToMemOperand(SDValue Op, SelectionDAG &DAG, DebugLoc dl); + + // This function checks if we need to put an operand of an operation on + // stack and generate a load or not. + bool NeedToConvertToMemOp(SDValue Op, unsigned &MemOp); + + /// Subtarget - Keep a pointer to the PIC16Subtarget around so that we can + /// make the right decision when generating code for different targets. + const PIC16Subtarget *Subtarget; + + + // Extending the LIB Call framework of LLVM + // to hold the names of PIC16Libcalls. + const char *PIC16LibcallNames[PIC16ISD::PIC16UnknownCall]; + + // To set and retrieve the lib call names. + void setPIC16LibcallName(PIC16ISD::PIC16Libcall Call, const char *Name); + const char *getPIC16LibcallName(PIC16ISD::PIC16Libcall Call); + + // Make PIC16 Libcall. + SDValue MakePIC16Libcall(PIC16ISD::PIC16Libcall Call, MVT RetVT, + const SDValue *Ops, unsigned NumOps, bool isSigned, + SelectionDAG &DAG, DebugLoc dl); + + // Check if operation has a direct load operand. + inline bool isDirectLoad(const SDValue Op); + + private: + // The frameindexes generated for spill/reload are stack based. + // This maps maintain zero based indexes for these FIs. + std::map FiTmpOffsetMap; + unsigned TmpSize; + + // These are the frames for return value and argument passing + // These FrameIndices will be expanded to foo.frame external symbol + // and all others will be expanded to foo.tmp external symbol. + unsigned ReservedFrameCount; + }; +} // namespace llvm + +#endif // PIC16ISELLOWERING_H diff --git a/lib/Target/PIC16/PIC16InstrFormats.td b/lib/Target/PIC16/PIC16InstrFormats.td new file mode 100644 index 000000000000..e213ea847fc8 --- /dev/null +++ b/lib/Target/PIC16/PIC16InstrFormats.td @@ -0,0 +1,117 @@ +//===- PIC16InstrFormats.td - PIC16 Instruction Formats-------*- tblgen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Describe PIC16 instructions format +// +// All the possible PIC16 fields are: +// +// opcode - operation code. +// f - 7-bit register file address. +// d - 1-bit direction specifier +// k - 8/11 bit literals +// b - 3 bits bit num specifier +// +//===----------------------------------------------------------------------===// + +// Generic PIC16 Format +// PIC16 Instructions are 14-bit wide. + +// FIXME: Add Cooper Specific Formats if any. + +class PIC16Inst pattern> + : Instruction { + field bits<14> Inst; + + let Namespace = "PIC16"; + dag OutOperandList = outs; + dag InOperandList = ins; + let AsmString = asmstr; + let Pattern = pattern; +} + + +//===----------------------------------------------------------------------===// +// Byte Oriented instruction class in PIC16 : <|opcode|d|f|> +// opcode = 6 bits. +// d = direction = 1 bit. +// f = file register address = 7 bits. +//===----------------------------------------------------------------------===// + +class ByteFormat opcode, dag outs, dag ins, string asmstr, + list pattern> + :PIC16Inst { + bits<1> d; + bits<7> f; + + let Inst{13-8} = opcode; + + let Inst{7} = d; + let Inst{6-0} = f; +} + +//===----------------------------------------------------------------------===// +// Bit Oriented instruction class in PIC16 : <|opcode|b|f|> +// opcode = 4 bits. +// b = bit specifier = 3 bits. +// f = file register address = 7 bits. +//===----------------------------------------------------------------------===// + +class BitFormat opcode, dag outs, dag ins, string asmstr, + list pattern> + : PIC16Inst { + bits<3> b; + bits<7> f; + + let Inst{13-10} = opcode; + + let Inst{9-7} = b; + let Inst{6-0} = f; +} + +//===----------------------------------------------------------------------===// +// Literal Format instruction class in PIC16 : <|opcode|k|> +// opcode = 6 bits +// k = literal = 8 bits +//===----------------------------------------------------------------------===// + +class LiteralFormat opcode, dag outs, dag ins, string asmstr, + list pattern> + : PIC16Inst { + bits<8> k; + + let Inst{13-8} = opcode; + + let Inst{7-0} = k; +} + +//===----------------------------------------------------------------------===// +// Control Format instruction class in PIC16 : <|opcode|k|> +// opcode = 3 bits. +// k = jump address = 11 bits. +//===----------------------------------------------------------------------===// + +class ControlFormat opcode, dag outs, dag ins, string asmstr, + list pattern> + : PIC16Inst { + bits<11> k; + + let Inst{13-11} = opcode; + + let Inst{10-0} = k; +} + +//===----------------------------------------------------------------------===// +// Pseudo instruction class in PIC16 +//===----------------------------------------------------------------------===// + +class Pseudo pattern> + : PIC16Inst { + let Inst{13-6} = 0; +} diff --git a/lib/Target/PIC16/PIC16InstrInfo.cpp b/lib/Target/PIC16/PIC16InstrInfo.cpp new file mode 100644 index 000000000000..2a769e8ad16e --- /dev/null +++ b/lib/Target/PIC16/PIC16InstrInfo.cpp @@ -0,0 +1,186 @@ +//===- PIC16InstrInfo.cpp - PIC16 Instruction Information -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the PIC16 implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#include "PIC16.h" +#include "PIC16InstrInfo.h" +#include "PIC16TargetMachine.h" +#include "PIC16GenInstrInfo.inc" +#include "llvm/Function.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include + + +using namespace llvm; + +// FIXME: Add the subtarget support on this constructor. +PIC16InstrInfo::PIC16InstrInfo(PIC16TargetMachine &tm) + : TargetInstrInfoImpl(PIC16Insts, array_lengthof(PIC16Insts)), + TM(tm), + RegInfo(*this, *TM.getSubtargetImpl()) {} + + +/// isStoreToStackSlot - If the specified machine instruction is a direct +/// store to a stack slot, return the virtual or physical register number of +/// the source reg along with the FrameIndex of the loaded stack slot. +/// If not, return 0. This predicate must return 0 if the instruction has +/// any side effects other than storing to the stack slot. +unsigned PIC16InstrInfo::isStoreToStackSlot(const MachineInstr *MI, + int &FrameIndex) const { + if (MI->getOpcode() == PIC16::movwf + && MI->getOperand(0).isReg() + && MI->getOperand(1).isSymbol()) { + FrameIndex = MI->getOperand(1).getIndex(); + return MI->getOperand(0).getReg(); + } + return 0; +} + +/// isLoadFromStackSlot - If the specified machine instruction is a direct +/// load from a stack slot, return the virtual or physical register number of +/// the dest reg along with the FrameIndex of the stack slot. +/// If not, return 0. This predicate must return 0 if the instruction has +/// any side effects other than storing to the stack slot. +unsigned PIC16InstrInfo::isLoadFromStackSlot(const MachineInstr *MI, + int &FrameIndex) const { + if (MI->getOpcode() == PIC16::movf + && MI->getOperand(0).isReg() + && MI->getOperand(1).isSymbol()) { + FrameIndex = MI->getOperand(1).getIndex(); + return MI->getOperand(0).getReg(); + } + return 0; +} + + +void PIC16InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned SrcReg, bool isKill, int FI, + const TargetRegisterClass *RC) const { + PIC16TargetLowering *PTLI = TM.getTargetLowering(); + DebugLoc DL = DebugLoc::getUnknownLoc(); + if (I != MBB.end()) DL = I->getDebugLoc(); + + const Function *Func = MBB.getParent()->getFunction(); + const std::string FuncName = Func->getName(); + + const char *tmpName = createESName(PAN::getTempdataLabel(FuncName)); + + // On the order of operands here: think "movwf SrcReg, tmp_slot, offset". + if (RC == PIC16::GPRRegisterClass) { + //MachineFunction &MF = *MBB.getParent(); + //MachineRegisterInfo &RI = MF.getRegInfo(); + BuildMI(MBB, I, DL, get(PIC16::movwf)) + .addReg(SrcReg, getKillRegState(isKill)) + .addImm(PTLI->GetTmpOffsetForFI(FI, 1)) + .addExternalSymbol(tmpName) + .addImm(1); // Emit banksel for it. + } + else if (RC == PIC16::FSR16RegisterClass) { + // This is a 16-bit register and the frameindex given by llvm is of + // size two here. Break this index N into two zero based indexes and + // put one into the map. The second one is always obtained by adding 1 + // to the first zero based index. In fact it is going to use 3 slots + // as saving FSRs corrupts W also and hence we need to save/restore W also. + + unsigned opcode = (SrcReg == PIC16::FSR0) ? PIC16::save_fsr0 + : PIC16::save_fsr1; + BuildMI(MBB, I, DL, get(opcode)) + .addReg(SrcReg, getKillRegState(isKill)) + .addImm(PTLI->GetTmpOffsetForFI(FI, 3)) + .addExternalSymbol(tmpName) + .addImm(1); // Emit banksel for it. + } + else + assert(0 && "Can't store this register to stack slot"); +} + +void PIC16InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned DestReg, int FI, + const TargetRegisterClass *RC) const { + PIC16TargetLowering *PTLI = TM.getTargetLowering(); + DebugLoc DL = DebugLoc::getUnknownLoc(); + if (I != MBB.end()) DL = I->getDebugLoc(); + + const Function *Func = MBB.getParent()->getFunction(); + const std::string FuncName = Func->getName(); + + const char *tmpName = createESName(PAN::getTempdataLabel(FuncName)); + + // On the order of operands here: think "movf FrameIndex, W". + if (RC == PIC16::GPRRegisterClass) { + //MachineFunction &MF = *MBB.getParent(); + //MachineRegisterInfo &RI = MF.getRegInfo(); + BuildMI(MBB, I, DL, get(PIC16::movf), DestReg) + .addImm(PTLI->GetTmpOffsetForFI(FI, 1)) + .addExternalSymbol(tmpName) + .addImm(1); // Emit banksel for it. + } + else if (RC == PIC16::FSR16RegisterClass) { + // This is a 16-bit register and the frameindex given by llvm is of + // size two here. Break this index N into two zero based indexes and + // put one into the map. The second one is always obtained by adding 1 + // to the first zero based index. In fact it is going to use 3 slots + // as saving FSRs corrupts W also and hence we need to save/restore W also. + + unsigned opcode = (DestReg == PIC16::FSR0) ? PIC16::restore_fsr0 + : PIC16::restore_fsr1; + BuildMI(MBB, I, DL, get(opcode), DestReg) + .addImm(PTLI->GetTmpOffsetForFI(FI, 3)) + .addExternalSymbol(tmpName) + .addImm(1); // Emit banksel for it. + } + else + assert(0 && "Can't load this register from stack slot"); +} + +bool PIC16InstrInfo::copyRegToReg (MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned DestReg, unsigned SrcReg, + const TargetRegisterClass *DestRC, + const TargetRegisterClass *SrcRC) const { + DebugLoc DL = DebugLoc::getUnknownLoc(); + if (I != MBB.end()) DL = I->getDebugLoc(); + + if (DestRC == PIC16::FSR16RegisterClass) { + BuildMI(MBB, I, DL, get(PIC16::copy_fsr), DestReg).addReg(SrcReg); + return true; + } + + if (DestRC == PIC16::GPRRegisterClass) { + BuildMI(MBB, I, DL, get(PIC16::copy_w), DestReg).addReg(SrcReg); + return true; + } + + // Not yet supported. + return false; +} + +bool PIC16InstrInfo::isMoveInstr(const MachineInstr &MI, + unsigned &SrcReg, unsigned &DestReg, + unsigned &SrcSubIdx, unsigned &DstSubIdx) const { + SrcSubIdx = DstSubIdx = 0; // No sub-registers. + + if (MI.getOpcode() == PIC16::copy_fsr + || MI.getOpcode() == PIC16::copy_w) { + DestReg = MI.getOperand(0).getReg(); + SrcReg = MI.getOperand(1).getReg(); + return true; + } + + return false; +} + diff --git a/lib/Target/PIC16/PIC16InstrInfo.h b/lib/Target/PIC16/PIC16InstrInfo.h new file mode 100644 index 000000000000..0b6767969875 --- /dev/null +++ b/lib/Target/PIC16/PIC16InstrInfo.h @@ -0,0 +1,70 @@ +//===- PIC16InstrInfo.h - PIC16 Instruction Information----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the niversity of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the PIC16 implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef PIC16INSTRUCTIONINFO_H +#define PIC16INSTRUCTIONINFO_H + +#include "PIC16.h" +#include "PIC16RegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" + +namespace llvm { + + +class PIC16InstrInfo : public TargetInstrInfoImpl +{ + PIC16TargetMachine &TM; + const PIC16RegisterInfo RegInfo; +public: + explicit PIC16InstrInfo(PIC16TargetMachine &TM); + + virtual const PIC16RegisterInfo &getRegisterInfo() const { return RegInfo; } + + /// isLoadFromStackSlot - If the specified machine instruction is a direct + /// load from a stack slot, return the virtual or physical register number of + /// the destination along with the FrameIndex of the loaded stack slot. If + /// not, return 0. This predicate must return 0 if the instruction has + /// any side effects other than loading from the stack slot. + virtual unsigned isLoadFromStackSlot(const MachineInstr *MI, + int &FrameIndex) const; + + /// isStoreToStackSlot - If the specified machine instruction is a direct + /// store to a stack slot, return the virtual or physical register number of + /// the source reg along with the FrameIndex of the loaded stack slot. If + /// not, return 0. This predicate must return 0 if the instruction has + /// any side effects other than storing to the stack slot. + virtual unsigned isStoreToStackSlot(const MachineInstr *MI, + int &FrameIndex) const; + + virtual void storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned SrcReg, bool isKill, int FrameIndex, + const TargetRegisterClass *RC) const; + + virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned DestReg, int FrameIndex, + const TargetRegisterClass *RC) const; + virtual bool copyRegToReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned DestReg, unsigned SrcReg, + const TargetRegisterClass *DestRC, + const TargetRegisterClass *SrcRC) const; + virtual bool isMoveInstr(const MachineInstr &MI, + unsigned &SrcReg, unsigned &DstReg, + unsigned &SrcSubIdx, unsigned &DstSubIdx) const; + + }; +} // namespace llvm + +#endif diff --git a/lib/Target/PIC16/PIC16InstrInfo.td b/lib/Target/PIC16/PIC16InstrInfo.td new file mode 100644 index 000000000000..c572188cef22 --- /dev/null +++ b/lib/Target/PIC16/PIC16InstrInfo.td @@ -0,0 +1,522 @@ +//===- PIC16InstrInfo.td - PIC16 Instruction defs -------------*- tblgen-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the PIC16 instructions in TableGen format. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// PIC16 Specific Type Constraints. +//===----------------------------------------------------------------------===// +class SDTCisI8 : SDTCisVT; +class SDTCisI16 : SDTCisVT; + +//===----------------------------------------------------------------------===// +// PIC16 Specific Type Profiles. +//===----------------------------------------------------------------------===// + +// Generic type profiles for i8/i16 unary/binary operations. +// Taking one i8 or i16 and producing void. +def SDTI8VoidOp : SDTypeProfile<0, 1, [SDTCisI8<0>]>; +def SDTI16VoidOp : SDTypeProfile<0, 1, [SDTCisI16<0>]>; + +// Taking one value and producing an output of same type. +def SDTI8UnaryOp : SDTypeProfile<1, 1, [SDTCisI8<0>, SDTCisI8<1>]>; +def SDTI16UnaryOp : SDTypeProfile<1, 1, [SDTCisI16<0>, SDTCisI16<1>]>; + +// Taking two values and producing an output of same type. +def SDTI8BinOp : SDTypeProfile<1, 2, [SDTCisI8<0>, SDTCisI8<1>, SDTCisI8<2>]>; +def SDTI16BinOp : SDTypeProfile<1, 2, [SDTCisI16<0>, SDTCisI16<1>, + SDTCisI16<2>]>; + +// Node specific type profiles. +def SDT_PIC16Load : SDTypeProfile<1, 3, [SDTCisI8<0>, SDTCisI8<1>, + SDTCisI8<2>, SDTCisI8<3>]>; + +def SDT_PIC16Store : SDTypeProfile<0, 4, [SDTCisI8<0>, SDTCisI8<1>, + SDTCisI8<2>, SDTCisI8<3>]>; + +def SDT_PIC16Connect : SDTypeProfile<1, 2, [SDTCisI8<0>, SDTCisI8<1>, + SDTCisI8<2>]>; + +// PIC16ISD::CALL type prorile +def SDT_PIC16call : SDTypeProfile<0, -1, [SDTCisInt<0>]>; +def SDT_PIC16callw : SDTypeProfile<1, -1, [SDTCisInt<0>]>; + +// PIC16ISD::BRCOND +def SDT_PIC16Brcond: SDTypeProfile<0, 2, + [SDTCisVT<0, OtherVT>, SDTCisI8<1>]>; + +// PIC16ISD::BRCOND +def SDT_PIC16Selecticc: SDTypeProfile<1, 3, + [SDTCisI8<0>, SDTCisI8<1>, SDTCisI8<2>, + SDTCisI8<3>]>; + +//===----------------------------------------------------------------------===// +// PIC16 addressing modes matching via DAG. +//===----------------------------------------------------------------------===// +def diraddr : ComplexPattern; + +//===----------------------------------------------------------------------===// +// PIC16 Specific Node Definitions. +//===----------------------------------------------------------------------===// +def PIC16callseq_start : SDNode<"ISD::CALLSEQ_START", SDTI8VoidOp, + [SDNPHasChain, SDNPOutFlag]>; +def PIC16callseq_end : SDNode<"ISD::CALLSEQ_END", SDTI8VoidOp, + [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; + +// Low 8-bits of GlobalAddress. +def PIC16Lo : SDNode<"PIC16ISD::Lo", SDTI8BinOp>; + +// High 8-bits of GlobalAddress. +def PIC16Hi : SDNode<"PIC16ISD::Hi", SDTI8BinOp>; + +// The MTHI and MTLO nodes are used only to match them in the incoming +// DAG for replacement by corresponding set_fsrhi, set_fsrlo insntructions. +// These nodes are not used for defining any instructions. +def MTLO : SDNode<"PIC16ISD::MTLO", SDTI8UnaryOp>; +def MTHI : SDNode<"PIC16ISD::MTHI", SDTI8UnaryOp>; +def MTPCLATH : SDNode<"PIC16ISD::MTPCLATH", SDTI8UnaryOp>; + +// Node to generate Bank Select for a GlobalAddress. +def Banksel : SDNode<"PIC16ISD::Banksel", SDTI8UnaryOp>; + +// Node to match a direct store operation. +def PIC16Store : SDNode<"PIC16ISD::PIC16Store", SDT_PIC16Store, [SDNPHasChain]>; +def PIC16StWF : SDNode<"PIC16ISD::PIC16StWF", SDT_PIC16Store, + [SDNPHasChain, SDNPInFlag, SDNPOutFlag]>; + +// Node to match a direct load operation. +def PIC16Load : SDNode<"PIC16ISD::PIC16Load", SDT_PIC16Load, [SDNPHasChain]>; +def PIC16LdArg : SDNode<"PIC16ISD::PIC16LdArg", SDT_PIC16Load, [SDNPHasChain]>; +def PIC16LdWF : SDNode<"PIC16ISD::PIC16LdWF", SDT_PIC16Load, + [SDNPHasChain, SDNPInFlag, SDNPOutFlag]>; +def PIC16Connect: SDNode<"PIC16ISD::PIC16Connect", SDT_PIC16Connect, []>; + +// Node to match PIC16 call +def PIC16call : SDNode<"PIC16ISD::CALL", SDT_PIC16call, + [SDNPHasChain , SDNPOptInFlag, SDNPOutFlag]>; +def PIC16callw : SDNode<"PIC16ISD::CALLW", SDT_PIC16callw, + [SDNPHasChain , SDNPOptInFlag, SDNPOutFlag]>; + +// Node to match a comparison instruction. +def PIC16Subcc : SDNode<"PIC16ISD::SUBCC", SDTI8BinOp, [SDNPOutFlag]>; + +// Node to match a conditional branch. +def PIC16Brcond : SDNode<"PIC16ISD::BRCOND", SDT_PIC16Brcond, + [SDNPHasChain, SDNPInFlag]>; + +def PIC16Selecticc : SDNode<"PIC16ISD::SELECT_ICC", SDT_PIC16Selecticc, + [SDNPInFlag]>; + +//===----------------------------------------------------------------------===// +// PIC16 Operand Definitions. +//===----------------------------------------------------------------------===// +def i8mem : Operand; +def brtarget: Operand; + +// Operand for printing out a condition code. +let PrintMethod = "printCCOperand" in + def CCOp : Operand; + +include "PIC16InstrFormats.td" + +//===----------------------------------------------------------------------===// +// PIC16 Common Classes. +//===----------------------------------------------------------------------===// + +// W = W Op F : Load the value from F and do Op to W. +let isTwoAddress = 1, mayLoad = 1 in +class BinOpFW OpCode, string OpcStr, SDNode OpNode>: + ByteFormat; + +// F = F Op W : Load the value from F, do op with W and store in F. +// This insn class is not marked as TwoAddress because the reg is +// being used as a source operand only. (Remember a TwoAddress insn +// needs a copyRegToReg.) +let mayStore = 1 in +class BinOpWF OpCode, string OpcStr, SDNode OpNode>: + ByteFormat; + +// W = W Op L : Do Op of L with W and place result in W. +let isTwoAddress = 1 in +class BinOpLW opcode, string OpcStr, SDNode OpNode> : + LiteralFormat; + +//===----------------------------------------------------------------------===// +// PIC16 Instructions. +//===----------------------------------------------------------------------===// + +// Pseudo-instructions. +def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i8imm:$amt), + "!ADJCALLSTACKDOWN $amt", + [(PIC16callseq_start imm:$amt)]>; + +def ADJCALLSTACKUP : Pseudo<(outs), (ins i8imm:$amt), + "!ADJCALLSTACKUP $amt", + [(PIC16callseq_end imm:$amt)]>; + +//----------------------------------- +// Vaious movlw insn patterns. +//----------------------------------- +let isReMaterializable = 1 in { +// Move 8-bit literal to W. +def movlw : BitFormat<12, (outs GPR:$dst), (ins i8imm:$src), + "movlw $src", + [(set GPR:$dst, (i8 imm:$src))]>; + +// Move a Lo(TGA) to W. +def movlw_lo_1 : BitFormat<12, (outs GPR:$dst), (ins i8imm:$src, i8imm:$src2), + "movlw LOW(${src}) + ${src2}", + [(set GPR:$dst, (PIC16Lo tglobaladdr:$src, imm:$src2 ))]>; + +// Move a Lo(TES) to W. +def movlw_lo_2 : BitFormat<12, (outs GPR:$dst), (ins i8imm:$src, i8imm:$src2), + "movlw LOW(${src}) + ${src2}", + [(set GPR:$dst, (PIC16Lo texternalsym:$src, imm:$src2 ))]>; + +// Move a Hi(TGA) to W. +def movlw_hi_1 : BitFormat<12, (outs GPR:$dst), (ins i8imm:$src, i8imm:$src2), + "movlw HIGH(${src}) + ${src2}", + [(set GPR:$dst, (PIC16Hi tglobaladdr:$src, imm:$src2))]>; + +// Move a Hi(TES) to W. +def movlw_hi_2 : BitFormat<12, (outs GPR:$dst), (ins i8imm:$src, i8imm:$src2), + "movlw HIGH(${src}) + ${src2}", + [(set GPR:$dst, (PIC16Hi texternalsym:$src, imm:$src2))]>; +} + +//------------------- +// FSR setting insns. +//------------------- +// These insns are matched via a DAG replacement pattern. +def set_fsrlo: + ByteFormat<0, (outs FSR16:$fsr), + (ins GPR:$val), + "movwf ${fsr}L", + []>; + +let isTwoAddress = 1 in +def set_fsrhi: + ByteFormat<0, (outs FSR16:$dst), + (ins FSR16:$src, GPR:$val), + "movwf ${dst}H", + []>; + +def set_pclath: + ByteFormat<0, (outs PCLATHR:$dst), + (ins GPR:$val), + "movwf ${dst}", + [(set PCLATHR:$dst , (MTPCLATH GPR:$val))]>; + +//---------------------------- +// copyRegToReg +// copyRegToReg insns. These are dummy. They should always be deleted +// by the optimizer and never be present in the final generated code. +// if they are, then we have to write correct macros for these insns. +//---------------------------- +def copy_fsr: + Pseudo<(outs FSR16:$dst), (ins FSR16:$src), "copy_fsr $dst, $src", []>; + +def copy_w: + Pseudo<(outs GPR:$dst), (ins GPR:$src), "copy_w $dst, $src", []>; + +class SAVE_FSR: + Pseudo<(outs), + (ins FSR16:$src, i8imm:$offset, i8mem:$ptrlo, i8imm:$ptrhi), + !strconcat(OpcStr, " $ptrlo, $offset"), + []>; + +def save_fsr0: SAVE_FSR<"save_fsr0">; +def save_fsr1: SAVE_FSR<"save_fsr1">; + +class RESTORE_FSR: + Pseudo<(outs FSR16:$dst), + (ins i8imm:$offset, i8mem:$ptrlo, i8imm:$ptrhi), + !strconcat(OpcStr, " $ptrlo, $offset"), + []>; + +def restore_fsr0: RESTORE_FSR<"restore_fsr0">; +def restore_fsr1: RESTORE_FSR<"restore_fsr1">; + +//-------------------------- +// Store to memory +//------------------------- + +// Direct store. +// Input operands are: val = W, ptrlo = GA, offset = offset, ptrhi = banksel. +let mayStore = 1 in +class MOVWF_INSN OpCode, SDNode OpNodeDest, SDNode Op>: + ByteFormat<0, (outs), + (ins GPR:$val, i8imm:$offset, i8mem:$ptrlo, i8imm:$ptrhi), + "movwf ${ptrlo} + ${offset}", + [(Op GPR:$val, OpNodeDest:$ptrlo, (i8 imm:$ptrhi), + (i8 imm:$offset))]>; + +// Store W to a Global Address. +def movwf : MOVWF_INSN<0, tglobaladdr, PIC16Store>; + +// Store W to an External Symobol. +def movwf_1 : MOVWF_INSN<0, texternalsym, PIC16Store>; + +// Store with InFlag and OutFlag +// This is same as movwf_1 but has a flag. A flag is required to +// order the stores while passing the params to function. +def movwf_2 : MOVWF_INSN<0, texternalsym, PIC16StWF>; + +// Indirect store. Matched via a DAG replacement pattern. +def store_indirect : + ByteFormat<0, (outs), + (ins GPR:$val, FSR16:$fsr, i8imm:$offset), + "movwi $offset[$fsr]", + []>; + +//---------------------------- +// Load from memory +//---------------------------- +// Direct load. +// Input Operands are: ptrlo = GA, offset = offset, ptrhi = banksel. +// Output: dst = W +let mayLoad = 1 in +class MOVF_INSN OpCode, SDNode OpNodeSrc, SDNode Op>: + ByteFormat<0, (outs GPR:$dst), + (ins i8imm:$offset, i8mem:$ptrlo, i8imm:$ptrhi), + "movf ${ptrlo} + ${offset}, W", + [(set GPR:$dst, + (Op OpNodeSrc:$ptrlo, (i8 imm:$ptrhi), + (i8 imm:$offset)))]>; + +// Load from a GA. +def movf : MOVF_INSN<0, tglobaladdr, PIC16Load>; + +// Load from an ES. +def movf_1 : MOVF_INSN<0, texternalsym, PIC16Load>; +def movf_1_1 : MOVF_INSN<0, texternalsym, PIC16LdArg>; + +// Load with InFlag and OutFlag +// This is same as movf_1 but has a flag. A flag is required to +// order the loads while copying the return value of a function. +def movf_2 : MOVF_INSN<0, texternalsym, PIC16LdWF>; + +// Indirect load. Matched via a DAG replacement pattern. +def load_indirect : + ByteFormat<0, (outs GPR:$dst), + (ins FSR16:$fsr, i8imm:$offset), + "moviw $offset[$fsr]", + []>; + +//------------------------- +// Bitwise operations patterns +//-------------------------- +// W = W op [F] +let Defs = [STATUS] in { +def OrFW : BinOpFW<0, "iorwf", or>; +def XOrFW : BinOpFW<0, "xorwf", xor>; +def AndFW : BinOpFW<0, "andwf", and>; + +// F = W op [F] +def OrWF : BinOpWF<0, "iorwf", or>; +def XOrWF : BinOpWF<0, "xorwf", xor>; +def AndWF : BinOpWF<0, "andwf", and>; + +//------------------------- +// Various add/sub patterns. +//------------------------- + +// W = W + [F] +def addfw_1: BinOpFW<0, "addwf", add>; +def addfw_2: BinOpFW<0, "addwf", addc>; + +let Uses = [STATUS] in +def addfwc: BinOpFW<0, "addwfc", adde>; // With Carry. + +// F = W + [F] +def addwf_1: BinOpWF<0, "addwf", add>; +def addwf_2: BinOpWF<0, "addwf", addc>; +let Uses = [STATUS] in +def addwfc: BinOpWF<0, "addwfc", adde>; // With Carry. +} + +// W -= [F] ; load from F and sub the value from W. +let isTwoAddress = 1, mayLoad = 1 in +class SUBFW OpCode, string OpcStr, SDNode OpNode>: + ByteFormat; +let Defs = [STATUS] in { +def subfw_1: SUBFW<0, "subwf", sub>; +def subfw_2: SUBFW<0, "subwf", subc>; + +let Uses = [STATUS] in +def subfwb: SUBFW<0, "subwfb", sube>; // With Borrow. + +def subfw_cc: SUBFW<0, "subwf", PIC16Subcc>; +} + +// [F] -= W ; +let mayStore = 1 in +class SUBWF OpCode, string OpcStr, SDNode OpNode>: + ByteFormat; + +let Defs = [STATUS] in { +def subwf_1: SUBWF<0, "subwf", sub>; +def subwf_2: SUBWF<0, "subwf", subc>; + +let Uses = [STATUS] in + def subwfb: SUBWF<0, "subwfb", sube>; // With Borrow. + +def subwf_cc: SUBWF<0, "subwf", PIC16Subcc>; +} + +// addlw +let Defs = [STATUS] in { +def addlw_1 : BinOpLW<0, "addlw", add>; +def addlw_2 : BinOpLW<0, "addlw", addc>; + +let Uses = [STATUS] in +def addlwc : BinOpLW<0, "addlwc", adde>; // With Carry. (Assembler macro). + +// bitwise operations involving a literal and w. +def andlw : BinOpLW<0, "andlw", and>; +def xorlw : BinOpLW<0, "xorlw", xor>; +def orlw : BinOpLW<0, "iorlw", or>; +} + +// sublw +// W = C - W ; sub W from literal. (Without borrow). +let isTwoAddress = 1 in +class SUBLW opcode, SDNode OpNode> : + LiteralFormat; + +let Defs = [STATUS] in { +def sublw_1 : SUBLW<0, sub>; +def sublw_2 : SUBLW<0, subc>; +def sublw_cc : SUBLW<0, PIC16Subcc>; +} + +// Call instruction. +let isCall = 1, + Defs = [W, FSR0, FSR1] in { + def CALL: LiteralFormat<0x1, (outs), (ins i8imm:$func), + //"call ${func} + 2", + "call ${func}", + [(PIC16call diraddr:$func)]>; +} + +let isCall = 1, + Defs = [W, FSR0, FSR1] in { + def CALL_1: LiteralFormat<0x1, (outs), (ins GPR:$func, PCLATHR:$pc), + "callw", + [(PIC16call (PIC16Connect GPR:$func, PCLATHR:$pc))]>; +} + +let isCall = 1, + Defs = [FSR0, FSR1] in { + def CALLW: LiteralFormat<0x1, (outs GPR:$dest), + (ins GPR:$func, PCLATHR:$pc), + "callw", + [(set GPR:$dest, (PIC16callw (PIC16Connect GPR:$func, PCLATHR:$pc)))]>; +} + +let Uses = [STATUS], isBranch = 1, isTerminator = 1, hasDelaySlot = 0 in +def pic16brcond: ControlFormat<0x0, (outs), (ins brtarget:$dst, CCOp:$cc), + "b$cc $dst", + [(PIC16Brcond bb:$dst, imm:$cc)]>; + +// Unconditional branch. +let isBranch = 1, isTerminator = 1, hasDelaySlot = 0 in +def br_uncond: ControlFormat<0x0, (outs), (ins brtarget:$dst), + "goto $dst", + [(br bb:$dst)]>; + +// SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded by the +// scheduler into a branch sequence. +let usesCustomDAGSchedInserter = 1 in { // Expanded by the scheduler. + def SELECT_CC_Int_ICC + : Pseudo<(outs GPR:$dst), (ins GPR:$T, GPR:$F, i8imm:$Cond), + "; SELECT_CC_Int_ICC PSEUDO!", + [(set GPR:$dst, (PIC16Selecticc GPR:$T, GPR:$F, + imm:$Cond))]>; +} + + +// Banksel. +def banksel : + Pseudo<(outs), + (ins i8mem:$ptr), + "banksel $ptr", + []>; + +def pagesel : + Pseudo<(outs), + (ins i8mem:$ptr), + "movlp $ptr", + []>; + + +// Return insn. +def Return : + ControlFormat<0, (outs), (ins), "return", [(ret)]>; + +//===----------------------------------------------------------------------===// +// PIC16 Replacment Patterns. +//===----------------------------------------------------------------------===// + +// Identify an indirect store and select insns for it. +def : Pat<(PIC16Store GPR:$val, (MTLO GPR:$loaddr), (MTHI GPR:$hiaddr), + imm:$offset), + (store_indirect GPR:$val, + (set_fsrhi (set_fsrlo GPR:$loaddr), GPR:$hiaddr), + imm:$offset)>; + +def : Pat<(PIC16StWF GPR:$val, (MTLO GPR:$loaddr), (MTHI GPR:$hiaddr), + imm:$offset), + (store_indirect GPR:$val, + (set_fsrhi (set_fsrlo GPR:$loaddr), GPR:$hiaddr), + imm:$offset)>; + +// Identify an indirect load and select insns for it. +def : Pat<(PIC16Load (MTLO GPR:$loaddr), (MTHI GPR:$hiaddr), + imm:$offset), + (load_indirect (set_fsrhi (set_fsrlo GPR:$loaddr), GPR:$hiaddr), + imm:$offset)>; + +def : Pat<(PIC16LdWF (MTLO GPR:$loaddr), (MTHI GPR:$hiaddr), + imm:$offset), + (load_indirect (set_fsrhi (set_fsrlo GPR:$loaddr), GPR:$hiaddr), + imm:$offset)>; + diff --git a/lib/Target/PIC16/PIC16MemSelOpt.cpp b/lib/Target/PIC16/PIC16MemSelOpt.cpp new file mode 100644 index 000000000000..20f926def398 --- /dev/null +++ b/lib/Target/PIC16/PIC16MemSelOpt.cpp @@ -0,0 +1,169 @@ +//===-- PIC16MemSelOpt.cpp - PIC16 banksel optimizer --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the pass which optimizes the emitting of banksel +// instructions before accessing data memory. This currently works within +// a basic block only and keep tracks of the last accessed memory bank. +// If memory access continues to be in the same bank it just makes banksel +// immediate, which is a part of the insn accessing the data memory, from 1 +// to zero. The asm printer emits a banksel only if that immediate is 1. +// +// FIXME: this is not implemented yet. The banksel pass only works on local +// basic blocks. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "pic16-codegen" +#include "PIC16.h" +#include "PIC16InstrInfo.h" +#include "PIC16TargetAsmInfo.h" +#include "PIC16TargetMachine.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/GlobalValue.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Support/Compiler.h" + +using namespace llvm; + +namespace { + struct VISIBILITY_HIDDEN MemSelOpt : public MachineFunctionPass { + static char ID; + MemSelOpt() : MachineFunctionPass(&ID) {} + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addPreservedID(MachineLoopInfoID); + AU.addPreservedID(MachineDominatorsID); + MachineFunctionPass::getAnalysisUsage(AU); + } + + virtual bool runOnMachineFunction(MachineFunction &MF); + + virtual const char *getPassName() const { + return "PIC16 Memsel Optimizer"; + } + + bool processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB); + bool processInstruction(MachineInstr *MI); + + private: + const TargetInstrInfo *TII; // Machine instruction info. + MachineBasicBlock *MBB; // Current basic block + std::string CurBank; + + }; + char MemSelOpt::ID = 0; +} + +FunctionPass *llvm::createPIC16MemSelOptimizerPass() { + return new MemSelOpt(); +} + + +/// runOnMachineFunction - Loop over all of the basic blocks, transforming FP +/// register references into FP stack references. +/// +bool MemSelOpt::runOnMachineFunction(MachineFunction &MF) { + TII = MF.getTarget().getInstrInfo(); + bool Changed = false; + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); + I != E; ++I) { + Changed |= processBasicBlock(MF, *I); + } + + return Changed; +} + +/// processBasicBlock - Loop over all of the instructions in the basic block, +/// transforming FP instructions into their stack form. +/// +bool MemSelOpt::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) { + bool Changed = false; + MBB = &BB; + + // Let us assume that when entering a basic block now bank is selected. + // Ideally we should look at the predecessors for this information. + CurBank=""; + + for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) { + Changed |= processInstruction(I); + } + return Changed; +} + +bool MemSelOpt::processInstruction(MachineInstr *MI) { + bool Changed = false; + + unsigned NumOperands = MI->getNumOperands(); + if (NumOperands == 0) return false; + + + // If this insn is not going to access any memory, return. + const TargetInstrDesc &TID = TII->get(MI->getOpcode()); + if (! (TID.isCall() || TID.mayLoad() || TID.mayStore())) + return false; + + // Scan for the memory address operand. + // FIXME: Should we use standard interfaces like memoperands_iterator, + // hasMemOperand() etc ? + int MemOpPos = -1; + for (unsigned i = 0; i < NumOperands; i++) { + MachineOperand Op = MI->getOperand(i); + if (Op.getType() == MachineOperand::MO_GlobalAddress || + Op.getType() == MachineOperand::MO_ExternalSymbol) { + // We found one mem operand. Next one should be BS. + MemOpPos = i; + break; + } + } + + // If we did not find an insn accessing memory. Continue. + if (MemOpPos == -1) return Changed; + + // Get the MemOp. + MachineOperand &Op = MI->getOperand(MemOpPos); + + // If this is a pagesel material, handle it first. + if (MI->getOpcode() == PIC16::CALL) { + DebugLoc dl = MI->getDebugLoc(); + BuildMI(*MBB, MI, dl, TII->get(PIC16::pagesel)). + addOperand(Op); + return true; + } + + // Get the section name(NewBank) for MemOp. + // This assumes that the section names for globals are laready set by + // AsmPrinter->doInitialization. + std::string NewBank = CurBank; + if (Op.getType() == MachineOperand::MO_GlobalAddress && + Op.getGlobal()->getType()->getAddressSpace() == PIC16ISD::RAM_SPACE) { + NewBank = Op.getGlobal()->getSection(); + } else if (Op.getType() == MachineOperand::MO_ExternalSymbol) { + // External Symbol is generated for temp data and arguments. They are + // in fpdata..# section. + std::string Sym = Op.getSymbolName(); + NewBank = PAN::getSectionNameForSym(Sym); + } + + // If the previous and new section names are same, we don't need to + // emit banksel. + if (NewBank.compare(CurBank) != 0 ) { + DebugLoc dl = MI->getDebugLoc(); + BuildMI(*MBB, MI, dl, TII->get(PIC16::banksel)). + addOperand(Op); + Changed = true; + CurBank = NewBank; + } + + return Changed; +} + diff --git a/lib/Target/PIC16/PIC16RegisterInfo.cpp b/lib/Target/PIC16/PIC16RegisterInfo.cpp new file mode 100644 index 000000000000..eb758d8543d0 --- /dev/null +++ b/lib/Target/PIC16/PIC16RegisterInfo.cpp @@ -0,0 +1,91 @@ +//===- PIC16RegisterInfo.cpp - PIC16 Register Information -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the PIC16 implementation of the TargetRegisterInfo class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "pic16-reg-info" + +#include "PIC16.h" +#include "PIC16RegisterInfo.h" +#include "llvm/ADT/BitVector.h" + + +using namespace llvm; + +PIC16RegisterInfo::PIC16RegisterInfo(const TargetInstrInfo &tii, + const PIC16Subtarget &st) + : PIC16GenRegisterInfo(PIC16::ADJCALLSTACKDOWN, PIC16::ADJCALLSTACKUP), + TII(tii), + ST(st) {} + +#include "PIC16GenRegisterInfo.inc" + +/// PIC16 Callee Saved Registers +const unsigned* PIC16RegisterInfo:: +getCalleeSavedRegs(const MachineFunction *MF) const { + static const unsigned CalleeSavedRegs[] = { 0 }; + return CalleeSavedRegs; +} + +// PIC16 Callee Saved Reg Classes +const TargetRegisterClass* const* +PIC16RegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { + static const TargetRegisterClass * const CalleeSavedRegClasses[] = { 0 }; + return CalleeSavedRegClasses; +} + +BitVector PIC16RegisterInfo::getReservedRegs(const MachineFunction &MF) const { + BitVector Reserved(getNumRegs()); + return Reserved; +} + +bool PIC16RegisterInfo::hasFP(const MachineFunction &MF) const { + return false; +} + +void PIC16RegisterInfo:: +eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, + RegScavenger *RS) const +{ /* NOT YET IMPLEMENTED */ } + +void PIC16RegisterInfo::emitPrologue(MachineFunction &MF) const +{ /* NOT YET IMPLEMENTED */ } + +void PIC16RegisterInfo:: +emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const +{ /* NOT YET IMPLEMENTED */ } + +int PIC16RegisterInfo:: +getDwarfRegNum(unsigned RegNum, bool isEH) const { + assert(0 && "Not keeping track of debug information yet!!"); + return -1; +} + +unsigned PIC16RegisterInfo::getFrameRegister(MachineFunction &MF) const { + assert(0 && "PIC16 Does not have any frame register"); + return 0; +} + +unsigned PIC16RegisterInfo::getRARegister() const { + assert(0 && "PIC16 Does not have any return address register"); + return 0; +} + +// This function eliminates ADJCALLSTACKDOWN, +// ADJCALLSTACKUP pseudo instructions +void PIC16RegisterInfo:: +eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + // Simply discard ADJCALLSTACKDOWN, + // ADJCALLSTACKUP instructions. + MBB.erase(I); +} + diff --git a/lib/Target/PIC16/PIC16RegisterInfo.h b/lib/Target/PIC16/PIC16RegisterInfo.h new file mode 100644 index 000000000000..83689d0486b1 --- /dev/null +++ b/lib/Target/PIC16/PIC16RegisterInfo.h @@ -0,0 +1,68 @@ +//===- PIC16RegisterInfo.h - PIC16 Register Information Impl ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the PIC16 implementation of the TargetRegisterInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef PIC16REGISTERINFO_H +#define PIC16REGISTERINFO_H + +#include "PIC16GenRegisterInfo.h.inc" +#include "llvm/Target/TargetRegisterInfo.h" + +namespace llvm { + +// Forward Declarations. + class PIC16Subtarget; + class TargetInstrInfo; + +class PIC16RegisterInfo : public PIC16GenRegisterInfo { + private: + const TargetInstrInfo &TII; + const PIC16Subtarget &ST; + + public: + PIC16RegisterInfo(const TargetInstrInfo &tii, + const PIC16Subtarget &st); + + + //------------------------------------------------------ + // Pure virtual functions from TargetRegisterInfo + //------------------------------------------------------ + + // PIC16 callee saved registers + virtual const unsigned* + getCalleeSavedRegs(const MachineFunction *MF = 0) const; + + // PIC16 callee saved register classes + virtual const TargetRegisterClass* const * + getCalleeSavedRegClasses(const MachineFunction *MF) const; + + virtual BitVector getReservedRegs(const MachineFunction &MF) const; + virtual bool hasFP(const MachineFunction &MF) const; + + virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI, + int SPAdj, RegScavenger *RS=NULL) const; + + void eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const; + + virtual void emitPrologue(MachineFunction &MF) const; + virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; + virtual int getDwarfRegNum(unsigned RegNum, bool isEH) const; + virtual unsigned getFrameRegister(MachineFunction &MF) const; + virtual unsigned getRARegister() const; + +}; + +} // end namespace llvm + +#endif diff --git a/lib/Target/PIC16/PIC16RegisterInfo.td b/lib/Target/PIC16/PIC16RegisterInfo.td new file mode 100644 index 000000000000..2959d912ec32 --- /dev/null +++ b/lib/Target/PIC16/PIC16RegisterInfo.td @@ -0,0 +1,33 @@ +//===- PIC16RegisterInfo.td - PIC16 Register defs ------------*- tblgen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Declarations that describe the PIC16 register file +//===----------------------------------------------------------------------===// + +class PIC16Reg : Register { + let Namespace = "PIC16"; +} + +// PIC16 Registers. +def W : PIC16Reg<"W">; +def FSR0 : PIC16Reg<"FSR0">; +def FSR1 : PIC16Reg<"FSR1">; +def BS : PIC16Reg<"BS">; +def PCLATH : PIC16Reg<"PCLATH">; + +def STATUS : PIC16Reg<"STATUS">; + +// PIC16 Register classes. +def GPR : RegisterClass<"PIC16", [i8], 8, [W]>; +def FSR16 : RegisterClass<"PIC16", [i16], 8, [FSR0, FSR1]>; +def BSR : RegisterClass<"PIC16", [i8], 8, [BS]>; +def PCLATHR : RegisterClass<"PIC16", [i8], 8, [PCLATH]>; +def STATUSR : RegisterClass<"PIC16", [i8], 8, [STATUS]>; + diff --git a/lib/Target/PIC16/PIC16Subtarget.cpp b/lib/Target/PIC16/PIC16Subtarget.cpp new file mode 100644 index 000000000000..db8a5d84a4bf --- /dev/null +++ b/lib/Target/PIC16/PIC16Subtarget.cpp @@ -0,0 +1,27 @@ +//===- PIC16Subtarget.cpp - PIC16 Subtarget Information -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the PIC16 specific subclass of TargetSubtarget. +// +//===----------------------------------------------------------------------===// + +#include "PIC16Subtarget.h" +#include "PIC16GenSubtarget.inc" + +using namespace llvm; + +PIC16Subtarget::PIC16Subtarget(const Module &M, const std::string &FS, + bool Cooper) + :IsCooper(Cooper) +{ + std::string CPU = "generic"; + + // Parse features string. + ParseSubtargetFeatures(FS, CPU); +} diff --git a/lib/Target/PIC16/PIC16Subtarget.h b/lib/Target/PIC16/PIC16Subtarget.h new file mode 100644 index 000000000000..e5147a0cf892 --- /dev/null +++ b/lib/Target/PIC16/PIC16Subtarget.h @@ -0,0 +1,45 @@ +//=====-- PIC16Subtarget.h - Define Subtarget for the PIC16 ---*- C++ -*--====// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the PIC16 specific subclass of TargetSubtarget. +// +//===----------------------------------------------------------------------===// + +#ifndef PIC16SUBTARGET_H +#define PIC16SUBTARGET_H + +#include "llvm/Target/TargetSubtarget.h" + +#include + +namespace llvm { +class Module; + +class PIC16Subtarget : public TargetSubtarget { + + // IsCooper - Target ISA is Cooper. + bool IsCooper; + +public: + /// This constructor initializes the data members to match that + /// of the specified module. + /// + PIC16Subtarget(const Module &M, const std::string &FS, bool Cooper); + + /// isCooper - Returns true if the target ISA is Cooper. + bool isCooper() const { return IsCooper; } + + /// ParseSubtargetFeatures - Parses features string setting specified + /// subtarget options. Definition of function is auto generated by tblgen. + std::string ParseSubtargetFeatures(const std::string &FS, + const std::string &CPU); +}; +} // End llvm namespace + +#endif // PIC16SUBTARGET_H diff --git a/lib/Target/PIC16/PIC16TargetAsmInfo.cpp b/lib/Target/PIC16/PIC16TargetAsmInfo.cpp new file mode 100644 index 000000000000..d2657f018f88 --- /dev/null +++ b/lib/Target/PIC16/PIC16TargetAsmInfo.cpp @@ -0,0 +1,264 @@ +//===-- PIC16TargetAsmInfo.cpp - PIC16 asm properties ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declarations of the PIC16TargetAsmInfo properties. +// +//===----------------------------------------------------------------------===// + +#include "PIC16TargetAsmInfo.h" +#include "PIC16TargetMachine.h" +#include "llvm/GlobalValue.h" +#include "llvm/GlobalVariable.h" +#include "llvm/DerivedTypes.h" + +using namespace llvm; + +PIC16TargetAsmInfo:: +PIC16TargetAsmInfo(const PIC16TargetMachine &TM) + : TargetAsmInfo(TM) { + CommentString = ";"; + GlobalPrefix = PAN::getTagName(PAN::PREFIX_SYMBOL); + GlobalDirective = "\tglobal\t"; + ExternDirective = "\textern\t"; + + Data8bitsDirective = " db "; + Data16bitsDirective = " dw "; + Data32bitsDirective = " dl "; + RomData8bitsDirective = " dw "; + RomData16bitsDirective = " rom_di "; + RomData32bitsDirective = " rom_dl "; + ZeroDirective = NULL; + AsciiDirective = " dt "; + AscizDirective = NULL; + BSSSection_ = getNamedSection("udata.# UDATA", + SectionFlags::Writeable | SectionFlags::BSS); + ReadOnlySection = getNamedSection("romdata.# ROMDATA", SectionFlags::None); + DataSection = getNamedSection("idata.# IDATA", SectionFlags::Writeable); + SwitchToSectionDirective = ""; + // Need because otherwise a .text symbol is emitted by DwarfWriter + // in BeginModule, and gpasm cribbs for that .text symbol. + TextSection = getUnnamedSection("", SectionFlags::Code); + ROSection = new PIC16Section(getReadOnlySection()); + ExternalVarDecls = new PIC16Section(getNamedSection("ExternalVarDecls")); + ExternalVarDefs = new PIC16Section(getNamedSection("ExternalVarDefs")); + // Set it to false because we weed to generate c file name and not bc file + // name. + HasSingleParameterDotFile = false; +} + +const char *PIC16TargetAsmInfo::getRomDirective(unsigned size) const +{ + if (size == 8) + return RomData8bitsDirective; + else if (size == 16) + return RomData16bitsDirective; + else if (size == 32) + return RomData32bitsDirective; + else + return NULL; +} + + +const char *PIC16TargetAsmInfo::getASDirective(unsigned size, + unsigned AS) const { + if (AS == PIC16ISD::ROM_SPACE) + return getRomDirective(size); + else + return NULL; +} + +const Section * +PIC16TargetAsmInfo::getBSSSectionForGlobal(const GlobalVariable *GV) const { + assert (GV->hasInitializer() && "This global doesn't need space"); + Constant *C = GV->getInitializer(); + assert (C->isNullValue() && "Unitialized globals has non-zero initializer"); + + // Find how much space this global needs. + const TargetData *TD = TM.getTargetData(); + const Type *Ty = C->getType(); + unsigned ValSize = TD->getTypeAllocSize(Ty); + + // Go through all BSS Sections and assign this variable + // to the first available section having enough space. + PIC16Section *FoundBSS = NULL; + for (unsigned i = 0; i < BSSSections.size(); i++) { + if (DataBankSize - BSSSections[i]->Size >= ValSize) { + FoundBSS = BSSSections[i]; + break; + } + } + + // No BSS section spacious enough was found. Crate a new one. + if (! FoundBSS) { + std::string name = PAN::getUdataSectionName(BSSSections.size()); + const Section *NewSection = getNamedSection (name.c_str()); + + FoundBSS = new PIC16Section(NewSection); + + // Add this newly created BSS section to the list of BSSSections. + BSSSections.push_back(FoundBSS); + } + + // Insert the GV into this BSS. + FoundBSS->Items.push_back(GV); + FoundBSS->Size += ValSize; + + // We can't do this here because GV is const . + // const std::string SName = FoundBSS->S_->getName(); + // GV->setSection(SName); + + return FoundBSS->S_; +} + +const Section * +PIC16TargetAsmInfo::getIDATASectionForGlobal(const GlobalVariable *GV) const { + assert (GV->hasInitializer() && "This global doesn't need space"); + Constant *C = GV->getInitializer(); + assert (!C->isNullValue() && "initialized globals has zero initializer"); + assert (GV->getType()->getAddressSpace() == PIC16ISD::RAM_SPACE && + "can split initialized RAM data only"); + + // Find how much space this global needs. + const TargetData *TD = TM.getTargetData(); + const Type *Ty = C->getType(); + unsigned ValSize = TD->getTypeAllocSize(Ty); + + // Go through all IDATA Sections and assign this variable + // to the first available section having enough space. + PIC16Section *FoundIDATA = NULL; + for (unsigned i = 0; i < IDATASections.size(); i++) { + if ( DataBankSize - IDATASections[i]->Size >= ValSize) { + FoundIDATA = IDATASections[i]; + break; + } + } + + // No IDATA section spacious enough was found. Crate a new one. + if (! FoundIDATA) { + std::string name = PAN::getIdataSectionName(IDATASections.size()); + const Section *NewSection = getNamedSection (name.c_str()); + + FoundIDATA = new PIC16Section(NewSection); + + // Add this newly created IDATA section to the list of IDATASections. + IDATASections.push_back(FoundIDATA); + } + + // Insert the GV into this IDATA. + FoundIDATA->Items.push_back(GV); + FoundIDATA->Size += ValSize; + + // We can't do this here because GV is const . + // GV->setSection(FoundIDATA->S->getName()); + + return FoundIDATA->S_; +} + +// Get the section for an automatic variable of a function. +// For PIC16 they are globals only with mangled names. +const Section * +PIC16TargetAsmInfo::getSectionForAuto(const GlobalVariable *GV) const { + + const std::string name = PAN::getSectionNameForSym(GV->getName()); + + // Go through all Auto Sections and assign this variable + // to the appropriate section. + PIC16Section *FoundAutoSec = NULL; + for (unsigned i = 0; i < AutosSections.size(); i++) { + if ( AutosSections[i]->S_->getName() == name) { + FoundAutoSec = AutosSections[i]; + break; + } + } + + // No Auto section was found. Crate a new one. + if (! FoundAutoSec) { + const Section *NewSection = getNamedSection (name.c_str()); + + FoundAutoSec = new PIC16Section(NewSection); + + // Add this newly created autos section to the list of AutosSections. + AutosSections.push_back(FoundAutoSec); + } + + // Insert the auto into this section. + FoundAutoSec->Items.push_back(GV); + + return FoundAutoSec->S_; +} + + +// Override default implementation to put the true globals into +// multiple data sections if required. +const Section* +PIC16TargetAsmInfo::SelectSectionForGlobal(const GlobalValue *GV1) const { + // We select the section based on the initializer here, so it really + // has to be a GlobalVariable. + const GlobalVariable *GV = dyn_cast(GV1); + + if (!GV) + return TargetAsmInfo::SelectSectionForGlobal(GV1); + + // Record Exteranl Var Decls. + if (GV->isDeclaration()) { + ExternalVarDecls->Items.push_back(GV); + return ExternalVarDecls->S_; + } + + assert (GV->hasInitializer() && "A def without initializer?"); + + // First, if this is an automatic variable for a function, get the section + // name for it and return. + const std::string name = GV->getName(); + if (PAN::isLocalName(name)) { + return getSectionForAuto(GV); + } + + // Record Exteranl Var Defs. + if (GV->hasExternalLinkage() || GV->hasCommonLinkage()) { + ExternalVarDefs->Items.push_back(GV); + } + + // See if this is an uninitialized global. + const Constant *C = GV->getInitializer(); + if (C->isNullValue()) + return getBSSSectionForGlobal(GV); + + // If this is initialized data in RAM. Put it in the correct IDATA section. + if (GV->getType()->getAddressSpace() == PIC16ISD::RAM_SPACE) + return getIDATASectionForGlobal(GV); + + // This is initialized data in rom, put it in the readonly section. + if (GV->getType()->getAddressSpace() == PIC16ISD::ROM_SPACE) { + ROSection->Items.push_back(GV); + return ROSection->S_; + } + + // Else let the default implementation take care of it. + return TargetAsmInfo::SelectSectionForGlobal(GV); +} + +PIC16TargetAsmInfo::~PIC16TargetAsmInfo() { + + for (unsigned i = 0; i < BSSSections.size(); i++) { + delete BSSSections[i]; + } + + for (unsigned i = 0; i < IDATASections.size(); i++) { + delete IDATASections[i]; + } + + for (unsigned i = 0; i < AutosSections.size(); i++) { + delete AutosSections[i]; + } + + delete ROSection; + delete ExternalVarDecls; + delete ExternalVarDefs; +} diff --git a/lib/Target/PIC16/PIC16TargetAsmInfo.h b/lib/Target/PIC16/PIC16TargetAsmInfo.h new file mode 100644 index 000000000000..e464e36f7887 --- /dev/null +++ b/lib/Target/PIC16/PIC16TargetAsmInfo.h @@ -0,0 +1,79 @@ +//=====-- PIC16TargetAsmInfo.h - PIC16 asm properties ---------*- C++ -*--====// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the PIC16TargetAsmInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef PIC16TARGETASMINFO_H +#define PIC16TARGETASMINFO_H + +#include "PIC16.h" +#include "llvm/Target/TargetAsmInfo.h" +#include +#include "llvm/Module.h" +#define DataBankSize 80 +namespace llvm { + + // Forward declaration. + class PIC16TargetMachine; + class GlobalVariable; + + // PIC16 Splits the global data into mulitple udata and idata sections. + // Each udata and idata section needs to contain a list of globals that + // they contain, in order to avoid scanning over all the global values + // again and printing only those that match the current section. + // Keeping values inside the sections make printing a section much easier. + struct PIC16Section { + const Section *S_; // Connection to actual Section. + unsigned Size; // Total size of the objects contained. + std::vector Items; + + PIC16Section (const Section *s) { S_ = s; Size = 0; } + }; + + struct PIC16TargetAsmInfo : public TargetAsmInfo { + std::string getSectionNameForSym(const std::string &Sym) const; + PIC16TargetAsmInfo(const PIC16TargetMachine &TM); + mutable std::vector BSSSections; + mutable std::vector IDATASections; + mutable std::vector AutosSections; + mutable PIC16Section *ROSection; + mutable PIC16Section *ExternalVarDecls; + mutable PIC16Section *ExternalVarDefs; + virtual ~PIC16TargetAsmInfo(); + + private: + const char *RomData8bitsDirective; + const char *RomData16bitsDirective; + const char *RomData32bitsDirective; + const char *getRomDirective(unsigned size) const; + virtual const char *getASDirective(unsigned size, unsigned AS) const; + const Section *getBSSSectionForGlobal(const GlobalVariable *GV) const; + const Section *getIDATASectionForGlobal(const GlobalVariable *GV) const; + const Section *getSectionForAuto(const GlobalVariable *GV) const; + virtual const Section *SelectSectionForGlobal(const GlobalValue *GV) const; + + + public: + void SetSectionForGVs(Module &M); + std::vector getBSSSections() const { + return BSSSections; + } + std::vector getIDATASections() const { + return IDATASections; + } + std::vector getAutosSections() const { + return AutosSections; + } + }; + +} // namespace llvm + +#endif diff --git a/lib/Target/PIC16/PIC16TargetMachine.cpp b/lib/Target/PIC16/PIC16TargetMachine.cpp new file mode 100644 index 000000000000..bda632608ea7 --- /dev/null +++ b/lib/Target/PIC16/PIC16TargetMachine.cpp @@ -0,0 +1,79 @@ +//===-- PIC16TargetMachine.cpp - Define TargetMachine for PIC16 -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Top-level implementation for the PIC16 target. +// +//===----------------------------------------------------------------------===// + +#include "PIC16.h" +#include "PIC16TargetAsmInfo.h" +#include "PIC16TargetMachine.h" +#include "llvm/Module.h" +#include "llvm/PassManager.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/Target/TargetMachineRegistry.h" + +using namespace llvm; + +/// PIC16TargetMachineModule - Note that this is used on hosts that +/// cannot link in a library unless there are references into the +/// library. In particular, it seems that it is not possible to get +/// things to work on Win32 without this. Though it is unused, do not +/// remove it. +extern "C" int PIC16TargetMachineModule; +int PIC16TargetMachineModule = 0; + + +// Register the targets +static RegisterTarget +X("pic16", "PIC16 14-bit [experimental]."); +static RegisterTarget +Y("cooper", "PIC16 Cooper [experimental]."); + +// PIC16TargetMachine - Traditional PIC16 Machine. +PIC16TargetMachine::PIC16TargetMachine(const Module &M, const std::string &FS, + bool Cooper) +: Subtarget(M, FS, Cooper), + DataLayout("e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"), + InstrInfo(*this), TLInfo(*this), + FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0) { } + +// CooperTargetMachine - Uses the same PIC16TargetMachine, but makes IsCooper +// as true. +CooperTargetMachine::CooperTargetMachine(const Module &M, const std::string &FS) + : PIC16TargetMachine(M, FS, true) {} + + +const TargetAsmInfo *PIC16TargetMachine::createTargetAsmInfo() const { + return new PIC16TargetAsmInfo(*this); +} + +bool PIC16TargetMachine::addInstSelector(PassManagerBase &PM, + CodeGenOpt::Level OptLevel) { + // Install an instruction selector. + PM.add(createPIC16ISelDag(*this)); + return false; +} + +bool PIC16TargetMachine:: +addAssemblyEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, + bool Verbose, raw_ostream &Out) { + // Output assembly language. + PM.add(createPIC16CodePrinterPass(Out, *this, OptLevel, Verbose)); + return false; +} + +bool PIC16TargetMachine::addPostRegAlloc(PassManagerBase &PM, + CodeGenOpt::Level OptLevel) { + PM.add(createPIC16MemSelOptimizerPass()); + return true; // -print-machineinstr should print after this. +} + + diff --git a/lib/Target/PIC16/PIC16TargetMachine.h b/lib/Target/PIC16/PIC16TargetMachine.h new file mode 100644 index 000000000000..7f62d5c13d64 --- /dev/null +++ b/lib/Target/PIC16/PIC16TargetMachine.h @@ -0,0 +1,76 @@ +//===-- PIC16TargetMachine.h - Define TargetMachine for PIC16 ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the PIC16 specific subclass of TargetMachine. +// +//===----------------------------------------------------------------------===// + + +#ifndef PIC16_TARGETMACHINE_H +#define PIC16_TARGETMACHINE_H + +#include "PIC16InstrInfo.h" +#include "PIC16ISelLowering.h" +#include "PIC16RegisterInfo.h" +#include "PIC16Subtarget.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetMachine.h" + +namespace llvm { + +/// PIC16TargetMachine +/// +class PIC16TargetMachine : public LLVMTargetMachine { + PIC16Subtarget Subtarget; + const TargetData DataLayout; // Calculates type size & alignment + PIC16InstrInfo InstrInfo; + PIC16TargetLowering TLInfo; + + // PIC16 does not have any call stack frame, therefore not having + // any PIC16 specific FrameInfo class. + TargetFrameInfo FrameInfo; + +protected: + virtual const TargetAsmInfo *createTargetAsmInfo() const; + +public: + PIC16TargetMachine(const Module &M, const std::string &FS, + bool Cooper = false); + + virtual const TargetFrameInfo *getFrameInfo() const { return &FrameInfo; } + virtual const PIC16InstrInfo *getInstrInfo() const { return &InstrInfo; } + virtual const TargetData *getTargetData() const { return &DataLayout;} + virtual const PIC16Subtarget *getSubtargetImpl() const { return &Subtarget; } + + virtual const PIC16RegisterInfo *getRegisterInfo() const { + return &(InstrInfo.getRegisterInfo()); + } + + virtual PIC16TargetLowering *getTargetLowering() const { + return const_cast(&TLInfo); + } + + virtual bool addInstSelector(PassManagerBase &PM, + CodeGenOpt::Level OptLevel); + virtual bool addAssemblyEmitter(PassManagerBase &PM, + CodeGenOpt::Level OptLevel, + bool Verbose, raw_ostream &Out); + virtual bool addPostRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel); +}; // PIC16TargetMachine. + +/// CooperTargetMachine +class CooperTargetMachine : public PIC16TargetMachine { +public: + CooperTargetMachine(const Module &M, const std::string &FS); +}; // CooperTargetMachine. + +} // end namespace llvm + +#endif diff --git a/lib/Target/PowerPC/AsmPrinter/CMakeLists.txt b/lib/Target/PowerPC/AsmPrinter/CMakeLists.txt new file mode 100644 index 000000000000..1ed483ab21e9 --- /dev/null +++ b/lib/Target/PowerPC/AsmPrinter/CMakeLists.txt @@ -0,0 +1,9 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_partially_linked_object(LLVMPowerPCAsmPrinter + PPCAsmPrinter.cpp + ) + +target_name_of_partially_linked_object(LLVMPowerPCCodeGen n) + +add_dependencies(LLVMPowerPCAsmPrinter ${n}) diff --git a/lib/Target/PowerPC/AsmPrinter/Makefile b/lib/Target/PowerPC/AsmPrinter/Makefile new file mode 100644 index 000000000000..269ef9204433 --- /dev/null +++ b/lib/Target/PowerPC/AsmPrinter/Makefile @@ -0,0 +1,15 @@ +##===- lib/Target/PowerPC/AsmPrinter/Makefile --------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../../.. +LIBRARYNAME = LLVMPowerPCAsmPrinter + +# Hack: we need to include 'main' PowerPC target directory to grab private headers +CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp new file mode 100644 index 000000000000..7723982f69ad --- /dev/null +++ b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp @@ -0,0 +1,1204 @@ +//===-- PPCAsmPrinter.cpp - Print machine instrs to PowerPC assembly --------=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a printer that converts from our internal representation +// of machine-dependent LLVM code to PowerPC assembly language. This printer is +// the output mechanism used by `llc'. +// +// Documentation at http://developer.apple.com/documentation/DeveloperTools/ +// Reference/Assembler/ASMIntroduction/chapter_1_section_1.html +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "asmprinter" +#include "PPC.h" +#include "PPCPredicates.h" +#include "PPCTargetMachine.h" +#include "PPCSubtarget.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Module.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/DwarfWriter.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/Support/Mangler.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringSet.h" +using namespace llvm; + +STATISTIC(EmittedInsts, "Number of machine instrs printed"); + +namespace { + class VISIBILITY_HIDDEN PPCAsmPrinter : public AsmPrinter { + protected: + StringSet<> FnStubs, GVStubs, HiddenGVStubs; + const PPCSubtarget &Subtarget; + public: + explicit PPCAsmPrinter(raw_ostream &O, TargetMachine &TM, + const TargetAsmInfo *T, CodeGenOpt::Level OL, + bool V) + : AsmPrinter(O, TM, T, OL, V), + Subtarget(TM.getSubtarget()) {} + + virtual const char *getPassName() const { + return "PowerPC Assembly Printer"; + } + + PPCTargetMachine &getTM() { + return static_cast(TM); + } + + unsigned enumRegToMachineReg(unsigned enumReg) { + switch (enumReg) { + default: assert(0 && "Unhandled register!"); break; + case PPC::CR0: return 0; + case PPC::CR1: return 1; + case PPC::CR2: return 2; + case PPC::CR3: return 3; + case PPC::CR4: return 4; + case PPC::CR5: return 5; + case PPC::CR6: return 6; + case PPC::CR7: return 7; + } + abort(); + } + + /// printInstruction - This method is automatically generated by tablegen + /// from the instruction set description. This method returns true if the + /// machine instruction was sufficiently described to print it, otherwise it + /// returns false. + bool printInstruction(const MachineInstr *MI); + + void printMachineInstruction(const MachineInstr *MI); + void printOp(const MachineOperand &MO); + + /// stripRegisterPrefix - This method strips the character prefix from a + /// register name so that only the number is left. Used by for linux asm. + const char *stripRegisterPrefix(const char *RegName) { + switch (RegName[0]) { + case 'r': + case 'f': + case 'v': return RegName + 1; + case 'c': if (RegName[1] == 'r') return RegName + 2; + } + + return RegName; + } + + /// printRegister - Print register according to target requirements. + /// + void printRegister(const MachineOperand &MO, bool R0AsZero) { + unsigned RegNo = MO.getReg(); + assert(TargetRegisterInfo::isPhysicalRegister(RegNo) && "Not physreg??"); + + // If we should use 0 for R0. + if (R0AsZero && RegNo == PPC::R0) { + O << "0"; + return; + } + + const char *RegName = TM.getRegisterInfo()->get(RegNo).AsmName; + // Linux assembler (Others?) does not take register mnemonics. + // FIXME - What about special registers used in mfspr/mtspr? + if (!Subtarget.isDarwin()) RegName = stripRegisterPrefix(RegName); + O << RegName; + } + + void printOperand(const MachineInstr *MI, unsigned OpNo) { + const MachineOperand &MO = MI->getOperand(OpNo); + if (MO.isReg()) { + printRegister(MO, false); + } else if (MO.isImm()) { + O << MO.getImm(); + } else { + printOp(MO); + } + } + + bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode); + bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode); + + + void printS5ImmOperand(const MachineInstr *MI, unsigned OpNo) { + char value = MI->getOperand(OpNo).getImm(); + value = (value << (32-5)) >> (32-5); + O << (int)value; + } + void printU5ImmOperand(const MachineInstr *MI, unsigned OpNo) { + unsigned char value = MI->getOperand(OpNo).getImm(); + assert(value <= 31 && "Invalid u5imm argument!"); + O << (unsigned int)value; + } + void printU6ImmOperand(const MachineInstr *MI, unsigned OpNo) { + unsigned char value = MI->getOperand(OpNo).getImm(); + assert(value <= 63 && "Invalid u6imm argument!"); + O << (unsigned int)value; + } + void printS16ImmOperand(const MachineInstr *MI, unsigned OpNo) { + O << (short)MI->getOperand(OpNo).getImm(); + } + void printU16ImmOperand(const MachineInstr *MI, unsigned OpNo) { + O << (unsigned short)MI->getOperand(OpNo).getImm(); + } + void printS16X4ImmOperand(const MachineInstr *MI, unsigned OpNo) { + if (MI->getOperand(OpNo).isImm()) { + O << (short)(MI->getOperand(OpNo).getImm()*4); + } else { + O << "lo16("; + printOp(MI->getOperand(OpNo)); + if (TM.getRelocationModel() == Reloc::PIC_) + O << "-\"L" << getFunctionNumber() << "$pb\")"; + else + O << ')'; + } + } + void printBranchOperand(const MachineInstr *MI, unsigned OpNo) { + // Branches can take an immediate operand. This is used by the branch + // selection pass to print $+8, an eight byte displacement from the PC. + if (MI->getOperand(OpNo).isImm()) { + O << "$+" << MI->getOperand(OpNo).getImm()*4; + } else { + printOp(MI->getOperand(OpNo)); + } + } + void printCallOperand(const MachineInstr *MI, unsigned OpNo) { + const MachineOperand &MO = MI->getOperand(OpNo); + if (TM.getRelocationModel() != Reloc::Static) { + if (MO.getType() == MachineOperand::MO_GlobalAddress) { + GlobalValue *GV = MO.getGlobal(); + if (((GV->isDeclaration() || GV->hasWeakLinkage() || + GV->hasLinkOnceLinkage() || GV->hasCommonLinkage()))) { + // Dynamically-resolved functions need a stub for the function. + std::string Name = Mang->getValueName(GV); + FnStubs.insert(Name); + printSuffixedName(Name, "$stub"); + if (GV->hasExternalWeakLinkage()) + ExtWeakSymbols.insert(GV); + return; + } + } + if (MO.getType() == MachineOperand::MO_ExternalSymbol) { + std::string Name(TAI->getGlobalPrefix()); Name += MO.getSymbolName(); + FnStubs.insert(Name); + printSuffixedName(Name, "$stub"); + return; + } + } + + printOp(MI->getOperand(OpNo)); + } + void printAbsAddrOperand(const MachineInstr *MI, unsigned OpNo) { + O << (int)MI->getOperand(OpNo).getImm()*4; + } + void printPICLabel(const MachineInstr *MI, unsigned OpNo) { + O << "\"L" << getFunctionNumber() << "$pb\"\n"; + O << "\"L" << getFunctionNumber() << "$pb\":"; + } + void printSymbolHi(const MachineInstr *MI, unsigned OpNo) { + if (MI->getOperand(OpNo).isImm()) { + printS16ImmOperand(MI, OpNo); + } else { + if (Subtarget.isDarwin()) O << "ha16("; + printOp(MI->getOperand(OpNo)); + if (TM.getRelocationModel() == Reloc::PIC_) + O << "-\"L" << getFunctionNumber() << "$pb\""; + if (Subtarget.isDarwin()) + O << ')'; + else + O << "@ha"; + } + } + void printSymbolLo(const MachineInstr *MI, unsigned OpNo) { + if (MI->getOperand(OpNo).isImm()) { + printS16ImmOperand(MI, OpNo); + } else { + if (Subtarget.isDarwin()) O << "lo16("; + printOp(MI->getOperand(OpNo)); + if (TM.getRelocationModel() == Reloc::PIC_) + O << "-\"L" << getFunctionNumber() << "$pb\""; + if (Subtarget.isDarwin()) + O << ')'; + else + O << "@l"; + } + } + void printcrbitm(const MachineInstr *MI, unsigned OpNo) { + unsigned CCReg = MI->getOperand(OpNo).getReg(); + unsigned RegNo = enumRegToMachineReg(CCReg); + O << (0x80 >> RegNo); + } + // The new addressing mode printers. + void printMemRegImm(const MachineInstr *MI, unsigned OpNo) { + printSymbolLo(MI, OpNo); + O << '('; + if (MI->getOperand(OpNo+1).isReg() && + MI->getOperand(OpNo+1).getReg() == PPC::R0) + O << "0"; + else + printOperand(MI, OpNo+1); + O << ')'; + } + void printMemRegImmShifted(const MachineInstr *MI, unsigned OpNo) { + if (MI->getOperand(OpNo).isImm()) + printS16X4ImmOperand(MI, OpNo); + else + printSymbolLo(MI, OpNo); + O << '('; + if (MI->getOperand(OpNo+1).isReg() && + MI->getOperand(OpNo+1).getReg() == PPC::R0) + O << "0"; + else + printOperand(MI, OpNo+1); + O << ')'; + } + + void printMemRegReg(const MachineInstr *MI, unsigned OpNo) { + // When used as the base register, r0 reads constant zero rather than + // the value contained in the register. For this reason, the darwin + // assembler requires that we print r0 as 0 (no r) when used as the base. + const MachineOperand &MO = MI->getOperand(OpNo); + printRegister(MO, true); + O << ", "; + printOperand(MI, OpNo+1); + } + + void printPredicateOperand(const MachineInstr *MI, unsigned OpNo, + const char *Modifier); + + virtual bool runOnMachineFunction(MachineFunction &F) = 0; + virtual bool doFinalization(Module &M) = 0; + + virtual void EmitExternalGlobal(const GlobalVariable *GV); + }; + + /// PPCLinuxAsmPrinter - PowerPC assembly printer, customized for Linux + class VISIBILITY_HIDDEN PPCLinuxAsmPrinter : public PPCAsmPrinter { + DwarfWriter *DW; + MachineModuleInfo *MMI; + public: + explicit PPCLinuxAsmPrinter(raw_ostream &O, PPCTargetMachine &TM, + const TargetAsmInfo *T, CodeGenOpt::Level OL, + bool V) + : PPCAsmPrinter(O, TM, T, OL, V), DW(0), MMI(0) {} + + virtual const char *getPassName() const { + return "Linux PPC Assembly Printer"; + } + + bool runOnMachineFunction(MachineFunction &F); + bool doInitialization(Module &M); + bool doFinalization(Module &M); + + void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired(); + AU.addRequired(); + PPCAsmPrinter::getAnalysisUsage(AU); + } + + void printModuleLevelGV(const GlobalVariable* GVar); + }; + + /// PPCDarwinAsmPrinter - PowerPC assembly printer, customized for Darwin/Mac + /// OS X + class VISIBILITY_HIDDEN PPCDarwinAsmPrinter : public PPCAsmPrinter { + DwarfWriter *DW; + MachineModuleInfo *MMI; + raw_ostream &OS; + public: + explicit PPCDarwinAsmPrinter(raw_ostream &O, PPCTargetMachine &TM, + const TargetAsmInfo *T, CodeGenOpt::Level OL, + bool V) + : PPCAsmPrinter(O, TM, T, OL, V), DW(0), MMI(0), OS(O) {} + + virtual const char *getPassName() const { + return "Darwin PPC Assembly Printer"; + } + + bool runOnMachineFunction(MachineFunction &F); + bool doInitialization(Module &M); + bool doFinalization(Module &M); + + void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired(); + AU.addRequired(); + PPCAsmPrinter::getAnalysisUsage(AU); + } + + void printModuleLevelGV(const GlobalVariable* GVar); + }; +} // end of anonymous namespace + +// Include the auto-generated portion of the assembly writer +#include "PPCGenAsmWriter.inc" + +void PPCAsmPrinter::printOp(const MachineOperand &MO) { + switch (MO.getType()) { + case MachineOperand::MO_Immediate: + cerr << "printOp() does not handle immediate values\n"; + abort(); + return; + + case MachineOperand::MO_MachineBasicBlock: + printBasicBlockLabel(MO.getMBB()); + return; + case MachineOperand::MO_JumpTableIndex: + O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() + << '_' << MO.getIndex(); + // FIXME: PIC relocation model + return; + case MachineOperand::MO_ConstantPoolIndex: + O << TAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() + << '_' << MO.getIndex(); + return; + case MachineOperand::MO_ExternalSymbol: + // Computing the address of an external symbol, not calling it. + if (TM.getRelocationModel() != Reloc::Static) { + std::string Name(TAI->getGlobalPrefix()); Name += MO.getSymbolName(); + GVStubs.insert(Name); + printSuffixedName(Name, "$non_lazy_ptr"); + return; + } + O << TAI->getGlobalPrefix() << MO.getSymbolName(); + return; + case MachineOperand::MO_GlobalAddress: { + // Computing the address of a global symbol, not calling it. + GlobalValue *GV = MO.getGlobal(); + std::string Name = Mang->getValueName(GV); + + // External or weakly linked global variables need non-lazily-resolved stubs + if (TM.getRelocationModel() != Reloc::Static) { + if (GV->isDeclaration() || GV->isWeakForLinker()) { + if (GV->hasHiddenVisibility()) { + if (!GV->isDeclaration() && !GV->hasCommonLinkage()) + O << Name; + else { + HiddenGVStubs.insert(Name); + printSuffixedName(Name, "$non_lazy_ptr"); + } + } else { + GVStubs.insert(Name); + printSuffixedName(Name, "$non_lazy_ptr"); + } + if (GV->hasExternalWeakLinkage()) + ExtWeakSymbols.insert(GV); + return; + } + } + O << Name; + + printOffset(MO.getOffset()); + + if (GV->hasExternalWeakLinkage()) + ExtWeakSymbols.insert(GV); + return; + } + + default: + O << ""; + return; + } +} + +/// EmitExternalGlobal - In this case we need to use the indirect symbol. +/// +void PPCAsmPrinter::EmitExternalGlobal(const GlobalVariable *GV) { + std::string Name; + getGlobalLinkName(GV, Name); + if (TM.getRelocationModel() != Reloc::Static) { + if (GV->hasHiddenVisibility()) + HiddenGVStubs.insert(Name); + else + GVStubs.insert(Name); + printSuffixedName(Name, "$non_lazy_ptr"); + return; + } + O << Name; +} + +/// PrintAsmOperand - Print out an operand for an inline asm expression. +/// +bool PPCAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, + const char *ExtraCode) { + // Does this asm operand have a single letter operand modifier? + if (ExtraCode && ExtraCode[0]) { + if (ExtraCode[1] != 0) return true; // Unknown modifier. + + switch (ExtraCode[0]) { + default: return true; // Unknown modifier. + case 'c': // Don't print "$" before a global var name or constant. + // PPC never has a prefix. + printOperand(MI, OpNo); + return false; + case 'L': // Write second word of DImode reference. + // Verify that this operand has two consecutive registers. + if (!MI->getOperand(OpNo).isReg() || + OpNo+1 == MI->getNumOperands() || + !MI->getOperand(OpNo+1).isReg()) + return true; + ++OpNo; // Return the high-part. + break; + case 'I': + // Write 'i' if an integer constant, otherwise nothing. Used to print + // addi vs add, etc. + if (MI->getOperand(OpNo).isImm()) + O << "i"; + return false; + } + } + + printOperand(MI, OpNo); + return false; +} + +bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, + const char *ExtraCode) { + if (ExtraCode && ExtraCode[0]) + return true; // Unknown modifier. + if (MI->getOperand(OpNo).isReg()) + printMemRegReg(MI, OpNo); + else + printMemRegImm(MI, OpNo); + return false; +} + +void PPCAsmPrinter::printPredicateOperand(const MachineInstr *MI, unsigned OpNo, + const char *Modifier) { + assert(Modifier && "Must specify 'cc' or 'reg' as predicate op modifier!"); + unsigned Code = MI->getOperand(OpNo).getImm(); + if (!strcmp(Modifier, "cc")) { + switch ((PPC::Predicate)Code) { + case PPC::PRED_ALWAYS: return; // Don't print anything for always. + case PPC::PRED_LT: O << "lt"; return; + case PPC::PRED_LE: O << "le"; return; + case PPC::PRED_EQ: O << "eq"; return; + case PPC::PRED_GE: O << "ge"; return; + case PPC::PRED_GT: O << "gt"; return; + case PPC::PRED_NE: O << "ne"; return; + case PPC::PRED_UN: O << "un"; return; + case PPC::PRED_NU: O << "nu"; return; + } + + } else { + assert(!strcmp(Modifier, "reg") && + "Need to specify 'cc' or 'reg' as predicate op modifier!"); + // Don't print the register for 'always'. + if (Code == PPC::PRED_ALWAYS) return; + printOperand(MI, OpNo+1); + } +} + + +/// printMachineInstruction -- Print out a single PowerPC MI in Darwin syntax to +/// the current output stream. +/// +void PPCAsmPrinter::printMachineInstruction(const MachineInstr *MI) { + ++EmittedInsts; + + // Check for slwi/srwi mnemonics. + if (MI->getOpcode() == PPC::RLWINM) { + bool FoundMnemonic = false; + unsigned char SH = MI->getOperand(2).getImm(); + unsigned char MB = MI->getOperand(3).getImm(); + unsigned char ME = MI->getOperand(4).getImm(); + if (SH <= 31 && MB == 0 && ME == (31-SH)) { + O << "\tslwi "; FoundMnemonic = true; + } + if (SH <= 31 && MB == (32-SH) && ME == 31) { + O << "\tsrwi "; FoundMnemonic = true; + SH = 32-SH; + } + if (FoundMnemonic) { + printOperand(MI, 0); + O << ", "; + printOperand(MI, 1); + O << ", " << (unsigned int)SH << '\n'; + return; + } + } else if (MI->getOpcode() == PPC::OR || MI->getOpcode() == PPC::OR8) { + if (MI->getOperand(1).getReg() == MI->getOperand(2).getReg()) { + O << "\tmr "; + printOperand(MI, 0); + O << ", "; + printOperand(MI, 1); + O << '\n'; + return; + } + } else if (MI->getOpcode() == PPC::RLDICR) { + unsigned char SH = MI->getOperand(2).getImm(); + unsigned char ME = MI->getOperand(3).getImm(); + // rldicr RA, RS, SH, 63-SH == sldi RA, RS, SH + if (63-SH == ME) { + O << "\tsldi "; + printOperand(MI, 0); + O << ", "; + printOperand(MI, 1); + O << ", " << (unsigned int)SH << '\n'; + return; + } + } + + if (printInstruction(MI)) + return; // Printer was automatically generated + + assert(0 && "Unhandled instruction in asm writer!"); + abort(); + return; +} + +/// runOnMachineFunction - This uses the printMachineInstruction() +/// method to print assembly for each instruction. +/// +bool PPCLinuxAsmPrinter::runOnMachineFunction(MachineFunction &MF) { + this->MF = &MF; + + SetupMachineFunction(MF); + O << "\n\n"; + + // Print out constants referenced by the function + EmitConstantPool(MF.getConstantPool()); + + // Print out labels for the function. + const Function *F = MF.getFunction(); + SwitchToSection(TAI->SectionForGlobal(F)); + + switch (F->getLinkage()) { + default: assert(0 && "Unknown linkage type!"); + case Function::PrivateLinkage: + case Function::InternalLinkage: // Symbols default to internal. + break; + case Function::ExternalLinkage: + O << "\t.global\t" << CurrentFnName << '\n' + << "\t.type\t" << CurrentFnName << ", @function\n"; + break; + case Function::WeakAnyLinkage: + case Function::WeakODRLinkage: + case Function::LinkOnceAnyLinkage: + case Function::LinkOnceODRLinkage: + O << "\t.global\t" << CurrentFnName << '\n'; + O << "\t.weak\t" << CurrentFnName << '\n'; + break; + } + + printVisibility(CurrentFnName, F->getVisibility()); + + EmitAlignment(2, F); + O << CurrentFnName << ":\n"; + + // Emit pre-function debug information. + DW->BeginFunction(&MF); + + // Print out code for the function. + for (MachineFunction::const_iterator I = MF.begin(), E = MF.end(); + I != E; ++I) { + // Print a label for the basic block. + if (I != MF.begin()) { + printBasicBlockLabel(I, true, true); + O << '\n'; + } + for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end(); + II != E; ++II) { + // Print the assembly for the instruction. + printMachineInstruction(II); + } + } + + O << "\t.size\t" << CurrentFnName << ",.-" << CurrentFnName << '\n'; + + // Print out jump tables referenced by the function. + EmitJumpTableInfo(MF.getJumpTableInfo(), MF); + + SwitchToSection(TAI->SectionForGlobal(F)); + + // Emit post-function debug information. + DW->EndFunction(&MF); + + O.flush(); + + // We didn't modify anything. + return false; +} + +bool PPCLinuxAsmPrinter::doInitialization(Module &M) { + bool Result = AsmPrinter::doInitialization(M); + + // Emit initial debug information. + MMI = getAnalysisIfAvailable(); + assert(MMI); + DW = getAnalysisIfAvailable(); + assert(DW && "DwarfWriter is not available"); + DW->BeginModule(&M, MMI, O, this, TAI); + + // GNU as handles section names wrapped in quotes + Mang->setUseQuotes(true); + + SwitchToSection(TAI->getTextSection()); + + return Result; +} + +/// PrintUnmangledNameSafely - Print out the printable characters in the name. +/// Don't print things like \\n or \\0. +static void PrintUnmangledNameSafely(const Value *V, raw_ostream &OS) { + for (const char *Name = V->getNameStart(), *E = Name+V->getNameLen(); + Name != E; ++Name) + if (isprint(*Name)) + OS << *Name; +} + +void PPCLinuxAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) { + const TargetData *TD = TM.getTargetData(); + + if (!GVar->hasInitializer()) + return; // External global require no code + + // Check to see if this is a special global used by LLVM, if so, emit it. + if (EmitSpecialLLVMGlobal(GVar)) + return; + + std::string name = Mang->getValueName(GVar); + + printVisibility(name, GVar->getVisibility()); + + Constant *C = GVar->getInitializer(); + const Type *Type = C->getType(); + unsigned Size = TD->getTypeAllocSize(Type); + unsigned Align = TD->getPreferredAlignmentLog(GVar); + + SwitchToSection(TAI->SectionForGlobal(GVar)); + + if (C->isNullValue() && /* FIXME: Verify correct */ + !GVar->hasSection() && + (GVar->hasLocalLinkage() || GVar->hasExternalLinkage() || + GVar->isWeakForLinker())) { + if (Size == 0) Size = 1; // .comm Foo, 0 is undefined, avoid it. + + if (GVar->hasExternalLinkage()) { + O << "\t.global " << name << '\n'; + O << "\t.type " << name << ", @object\n"; + O << name << ":\n"; + O << "\t.zero " << Size << '\n'; + } else if (GVar->hasLocalLinkage()) { + O << TAI->getLCOMMDirective() << name << ',' << Size; + } else { + O << ".comm " << name << ',' << Size; + } + if (VerboseAsm) { + O << "\t\t" << TAI->getCommentString() << " '"; + PrintUnmangledNameSafely(GVar, O); + O << "'"; + } + O << '\n'; + return; + } + + switch (GVar->getLinkage()) { + case GlobalValue::LinkOnceAnyLinkage: + case GlobalValue::LinkOnceODRLinkage: + case GlobalValue::WeakAnyLinkage: + case GlobalValue::WeakODRLinkage: + case GlobalValue::CommonLinkage: + O << "\t.global " << name << '\n' + << "\t.type " << name << ", @object\n" + << "\t.weak " << name << '\n'; + break; + case GlobalValue::AppendingLinkage: + // FIXME: appending linkage variables should go into a section of + // their name or something. For now, just emit them as external. + case GlobalValue::ExternalLinkage: + // If external or appending, declare as a global symbol + O << "\t.global " << name << '\n' + << "\t.type " << name << ", @object\n"; + // FALL THROUGH + case GlobalValue::InternalLinkage: + case GlobalValue::PrivateLinkage: + break; + default: + cerr << "Unknown linkage type!"; + abort(); + } + + EmitAlignment(Align, GVar); + O << name << ":"; + if (VerboseAsm) { + O << "\t\t\t\t" << TAI->getCommentString() << " '"; + PrintUnmangledNameSafely(GVar, O); + O << "'"; + } + O << '\n'; + + // If the initializer is a extern weak symbol, remember to emit the weak + // reference! + if (const GlobalValue *GV = dyn_cast(C)) + if (GV->hasExternalWeakLinkage()) + ExtWeakSymbols.insert(GV); + + EmitGlobalConstant(C); + O << '\n'; +} + +bool PPCLinuxAsmPrinter::doFinalization(Module &M) { + // Print out module-level global variables here. + for (Module::const_global_iterator I = M.global_begin(), E = M.global_end(); + I != E; ++I) + printModuleLevelGV(I); + + // TODO + + // Emit initial debug information. + DW->EndModule(); + + return AsmPrinter::doFinalization(M); +} + +/// runOnMachineFunction - This uses the printMachineInstruction() +/// method to print assembly for each instruction. +/// +bool PPCDarwinAsmPrinter::runOnMachineFunction(MachineFunction &MF) { + this->MF = &MF; + + SetupMachineFunction(MF); + O << "\n\n"; + + // Print out constants referenced by the function + EmitConstantPool(MF.getConstantPool()); + + // Print out labels for the function. + const Function *F = MF.getFunction(); + SwitchToSection(TAI->SectionForGlobal(F)); + + switch (F->getLinkage()) { + default: assert(0 && "Unknown linkage type!"); + case Function::PrivateLinkage: + case Function::InternalLinkage: // Symbols default to internal. + break; + case Function::ExternalLinkage: + O << "\t.globl\t" << CurrentFnName << '\n'; + break; + case Function::WeakAnyLinkage: + case Function::WeakODRLinkage: + case Function::LinkOnceAnyLinkage: + case Function::LinkOnceODRLinkage: + O << "\t.globl\t" << CurrentFnName << '\n'; + O << "\t.weak_definition\t" << CurrentFnName << '\n'; + break; + } + + printVisibility(CurrentFnName, F->getVisibility()); + + EmitAlignment(F->hasFnAttr(Attribute::OptimizeForSize) ? 2 : 4, F); + O << CurrentFnName << ":\n"; + + // Emit pre-function debug information. + DW->BeginFunction(&MF); + + // If the function is empty, then we need to emit *something*. Otherwise, the + // function's label might be associated with something that it wasn't meant to + // be associated with. We emit a noop in this situation. + MachineFunction::iterator I = MF.begin(); + + if (++I == MF.end() && MF.front().empty()) + O << "\tnop\n"; + + // Print out code for the function. + for (MachineFunction::const_iterator I = MF.begin(), E = MF.end(); + I != E; ++I) { + // Print a label for the basic block. + if (I != MF.begin()) { + printBasicBlockLabel(I, true, true, VerboseAsm); + O << '\n'; + } + for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end(); + II != IE; ++II) { + // Print the assembly for the instruction. + printMachineInstruction(II); + } + } + + // Print out jump tables referenced by the function. + EmitJumpTableInfo(MF.getJumpTableInfo(), MF); + + // Emit post-function debug information. + DW->EndFunction(&MF); + + // We didn't modify anything. + return false; +} + + +bool PPCDarwinAsmPrinter::doInitialization(Module &M) { + static const char *const CPUDirectives[] = { + "", + "ppc", + "ppc601", + "ppc602", + "ppc603", + "ppc7400", + "ppc750", + "ppc970", + "ppc64" + }; + + unsigned Directive = Subtarget.getDarwinDirective(); + if (Subtarget.isGigaProcessor() && Directive < PPC::DIR_970) + Directive = PPC::DIR_970; + if (Subtarget.hasAltivec() && Directive < PPC::DIR_7400) + Directive = PPC::DIR_7400; + if (Subtarget.isPPC64() && Directive < PPC::DIR_970) + Directive = PPC::DIR_64; + assert(Directive <= PPC::DIR_64 && "Directive out of range."); + O << "\t.machine " << CPUDirectives[Directive] << '\n'; + + bool Result = AsmPrinter::doInitialization(M); + + // Emit initial debug information. + // We need this for Personality functions. + // AsmPrinter::doInitialization should have done this analysis. + MMI = getAnalysisIfAvailable(); + assert(MMI); + DW = getAnalysisIfAvailable(); + assert(DW && "DwarfWriter is not available"); + DW->BeginModule(&M, MMI, O, this, TAI); + + // Darwin wants symbols to be quoted if they have complex names. + Mang->setUseQuotes(true); + + // Prime text sections so they are adjacent. This reduces the likelihood a + // large data or debug section causes a branch to exceed 16M limit. + SwitchToTextSection("\t.section __TEXT,__textcoal_nt,coalesced," + "pure_instructions"); + if (TM.getRelocationModel() == Reloc::PIC_) { + SwitchToTextSection("\t.section __TEXT,__picsymbolstub1,symbol_stubs," + "pure_instructions,32"); + } else if (TM.getRelocationModel() == Reloc::DynamicNoPIC) { + SwitchToTextSection("\t.section __TEXT,__symbol_stub1,symbol_stubs," + "pure_instructions,16"); + } + SwitchToSection(TAI->getTextSection()); + + return Result; +} + +void PPCDarwinAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) { + const TargetData *TD = TM.getTargetData(); + + if (!GVar->hasInitializer()) + return; // External global require no code + + // Check to see if this is a special global used by LLVM, if so, emit it. + if (EmitSpecialLLVMGlobal(GVar)) { + if (TM.getRelocationModel() == Reloc::Static) { + if (GVar->getName() == "llvm.global_ctors") + O << ".reference .constructors_used\n"; + else if (GVar->getName() == "llvm.global_dtors") + O << ".reference .destructors_used\n"; + } + return; + } + + std::string name = Mang->getValueName(GVar); + + printVisibility(name, GVar->getVisibility()); + + Constant *C = GVar->getInitializer(); + const Type *Type = C->getType(); + unsigned Size = TD->getTypeAllocSize(Type); + unsigned Align = TD->getPreferredAlignmentLog(GVar); + + SwitchToSection(TAI->SectionForGlobal(GVar)); + + if (C->isNullValue() && /* FIXME: Verify correct */ + !GVar->hasSection() && + (GVar->hasLocalLinkage() || GVar->hasExternalLinkage() || + GVar->isWeakForLinker()) && + TAI->SectionKindForGlobal(GVar) != SectionKind::RODataMergeStr) { + if (Size == 0) Size = 1; // .comm Foo, 0 is undefined, avoid it. + + if (GVar->hasExternalLinkage()) { + O << "\t.globl " << name << '\n'; + O << "\t.zerofill __DATA, __common, " << name << ", " + << Size << ", " << Align; + } else if (GVar->hasLocalLinkage()) { + O << TAI->getLCOMMDirective() << name << ',' << Size << ',' << Align; + } else if (!GVar->hasCommonLinkage()) { + O << "\t.globl " << name << '\n' + << TAI->getWeakDefDirective() << name << '\n'; + EmitAlignment(Align, GVar); + O << name << ":"; + if (VerboseAsm) { + O << "\t\t\t\t" << TAI->getCommentString() << " "; + PrintUnmangledNameSafely(GVar, O); + } + O << '\n'; + EmitGlobalConstant(C); + return; + } else { + O << ".comm " << name << ',' << Size; + // Darwin 9 and above support aligned common data. + if (Subtarget.isDarwin9()) + O << ',' << Align; + } + if (VerboseAsm) { + O << "\t\t" << TAI->getCommentString() << " '"; + PrintUnmangledNameSafely(GVar, O); + O << "'"; + } + O << '\n'; + return; + } + + switch (GVar->getLinkage()) { + case GlobalValue::LinkOnceAnyLinkage: + case GlobalValue::LinkOnceODRLinkage: + case GlobalValue::WeakAnyLinkage: + case GlobalValue::WeakODRLinkage: + case GlobalValue::CommonLinkage: + O << "\t.globl " << name << '\n' + << "\t.weak_definition " << name << '\n'; + break; + case GlobalValue::AppendingLinkage: + // FIXME: appending linkage variables should go into a section of + // their name or something. For now, just emit them as external. + case GlobalValue::ExternalLinkage: + // If external or appending, declare as a global symbol + O << "\t.globl " << name << '\n'; + // FALL THROUGH + case GlobalValue::InternalLinkage: + case GlobalValue::PrivateLinkage: + break; + default: + cerr << "Unknown linkage type!"; + abort(); + } + + EmitAlignment(Align, GVar); + O << name << ":"; + if (VerboseAsm) { + O << "\t\t\t\t" << TAI->getCommentString() << " '"; + PrintUnmangledNameSafely(GVar, O); + O << "'"; + } + O << '\n'; + + // If the initializer is a extern weak symbol, remember to emit the weak + // reference! + if (const GlobalValue *GV = dyn_cast(C)) + if (GV->hasExternalWeakLinkage()) + ExtWeakSymbols.insert(GV); + + EmitGlobalConstant(C); + O << '\n'; +} + +bool PPCDarwinAsmPrinter::doFinalization(Module &M) { + const TargetData *TD = TM.getTargetData(); + + // Print out module-level global variables here. + for (Module::const_global_iterator I = M.global_begin(), E = M.global_end(); + I != E; ++I) + printModuleLevelGV(I); + + bool isPPC64 = TD->getPointerSizeInBits() == 64; + + // Output stubs for dynamically-linked functions + if (TM.getRelocationModel() == Reloc::PIC_) { + for (StringSet<>::iterator i = FnStubs.begin(), e = FnStubs.end(); + i != e; ++i) { + SwitchToTextSection("\t.section __TEXT,__picsymbolstub1,symbol_stubs," + "pure_instructions,32"); + EmitAlignment(4); + const char *p = i->getKeyData(); + bool hasQuote = p[0]=='\"'; + printSuffixedName(p, "$stub"); + O << ":\n"; + O << "\t.indirect_symbol " << p << '\n'; + O << "\tmflr r0\n"; + O << "\tbcl 20,31,"; + if (hasQuote) + O << "\"L0$" << &p[1]; + else + O << "L0$" << p; + O << '\n'; + if (hasQuote) + O << "\"L0$" << &p[1]; + else + O << "L0$" << p; + O << ":\n"; + O << "\tmflr r11\n"; + O << "\taddis r11,r11,ha16("; + printSuffixedName(p, "$lazy_ptr"); + O << "-"; + if (hasQuote) + O << "\"L0$" << &p[1]; + else + O << "L0$" << p; + O << ")\n"; + O << "\tmtlr r0\n"; + if (isPPC64) + O << "\tldu r12,lo16("; + else + O << "\tlwzu r12,lo16("; + printSuffixedName(p, "$lazy_ptr"); + O << "-"; + if (hasQuote) + O << "\"L0$" << &p[1]; + else + O << "L0$" << p; + O << ")(r11)\n"; + O << "\tmtctr r12\n"; + O << "\tbctr\n"; + SwitchToDataSection(".lazy_symbol_pointer"); + printSuffixedName(p, "$lazy_ptr"); + O << ":\n"; + O << "\t.indirect_symbol " << p << '\n'; + if (isPPC64) + O << "\t.quad dyld_stub_binding_helper\n"; + else + O << "\t.long dyld_stub_binding_helper\n"; + } + } else { + for (StringSet<>::iterator i = FnStubs.begin(), e = FnStubs.end(); + i != e; ++i) { + SwitchToTextSection("\t.section __TEXT,__symbol_stub1,symbol_stubs," + "pure_instructions,16"); + EmitAlignment(4); + const char *p = i->getKeyData(); + printSuffixedName(p, "$stub"); + O << ":\n"; + O << "\t.indirect_symbol " << p << '\n'; + O << "\tlis r11,ha16("; + printSuffixedName(p, "$lazy_ptr"); + O << ")\n"; + if (isPPC64) + O << "\tldu r12,lo16("; + else + O << "\tlwzu r12,lo16("; + printSuffixedName(p, "$lazy_ptr"); + O << ")(r11)\n"; + O << "\tmtctr r12\n"; + O << "\tbctr\n"; + SwitchToDataSection(".lazy_symbol_pointer"); + printSuffixedName(p, "$lazy_ptr"); + O << ":\n"; + O << "\t.indirect_symbol " << p << '\n'; + if (isPPC64) + O << "\t.quad dyld_stub_binding_helper\n"; + else + O << "\t.long dyld_stub_binding_helper\n"; + } + } + + O << '\n'; + + if (TAI->doesSupportExceptionHandling() && MMI) { + // Add the (possibly multiple) personalities to the set of global values. + // Only referenced functions get into the Personalities list. + const std::vector& Personalities = MMI->getPersonalities(); + + for (std::vector::const_iterator I = Personalities.begin(), + E = Personalities.end(); I != E; ++I) + if (*I) GVStubs.insert("_" + (*I)->getName()); + } + + // Output stubs for external and common global variables. + if (!GVStubs.empty()) { + SwitchToDataSection(".non_lazy_symbol_pointer"); + for (StringSet<>::iterator i = GVStubs.begin(), e = GVStubs.end(); + i != e; ++i) { + std::string p = i->getKeyData(); + printSuffixedName(p, "$non_lazy_ptr"); + O << ":\n"; + O << "\t.indirect_symbol " << p << '\n'; + if (isPPC64) + O << "\t.quad\t0\n"; + else + O << "\t.long\t0\n"; + } + } + + if (!HiddenGVStubs.empty()) { + SwitchToSection(TAI->getDataSection()); + for (StringSet<>::iterator i = HiddenGVStubs.begin(), e = HiddenGVStubs.end(); + i != e; ++i) { + std::string p = i->getKeyData(); + EmitAlignment(isPPC64 ? 3 : 2); + printSuffixedName(p, "$non_lazy_ptr"); + O << ":\n"; + if (isPPC64) + O << "\t.quad\t"; + else + O << "\t.long\t"; + O << p << '\n'; + } + } + + + // Emit initial debug information. + DW->EndModule(); + + // Funny Darwin hack: This flag tells the linker that no global symbols + // contain code that falls through to other global symbols (e.g. the obvious + // implementation of multiple entry points). If this doesn't occur, the + // linker can safely perform dead code stripping. Since LLVM never generates + // code that does this, it is always safe to set. + O << "\t.subsections_via_symbols\n"; + + return AsmPrinter::doFinalization(M); +} + + + +/// createPPCAsmPrinterPass - Returns a pass that prints the PPC assembly code +/// for a MachineFunction to the given output stream, in a format that the +/// Darwin assembler can deal with. +/// +FunctionPass *llvm::createPPCAsmPrinterPass(raw_ostream &o, + PPCTargetMachine &tm, + CodeGenOpt::Level OptLevel, + bool verbose) { + const PPCSubtarget *Subtarget = &tm.getSubtarget(); + + if (Subtarget->isDarwin()) { + return new PPCDarwinAsmPrinter(o, tm, tm.getTargetAsmInfo(), + OptLevel, verbose); + } else { + return new PPCLinuxAsmPrinter(o, tm, tm.getTargetAsmInfo(), + OptLevel, verbose); + } +} + +namespace { + static struct Register { + Register() { + PPCTargetMachine::registerAsmPrinter(createPPCAsmPrinterPass); + } + } Registrator; +} + +extern "C" int PowerPCAsmPrinterForceLink; +int PowerPCAsmPrinterForceLink = 0; diff --git a/lib/Target/PowerPC/CMakeLists.txt b/lib/Target/PowerPC/CMakeLists.txt new file mode 100644 index 000000000000..0b67aff2156f --- /dev/null +++ b/lib/Target/PowerPC/CMakeLists.txt @@ -0,0 +1,28 @@ +set(LLVM_TARGET_DEFINITIONS PPC.td) + +tablegen(PPCGenInstrNames.inc -gen-instr-enums) +tablegen(PPCGenRegisterNames.inc -gen-register-enums) +tablegen(PPCGenAsmWriter.inc -gen-asm-writer) +tablegen(PPCGenCodeEmitter.inc -gen-emitter) +tablegen(PPCGenRegisterInfo.h.inc -gen-register-desc-header) +tablegen(PPCGenRegisterInfo.inc -gen-register-desc) +tablegen(PPCGenInstrInfo.inc -gen-instr-desc) +tablegen(PPCGenDAGISel.inc -gen-dag-isel) +tablegen(PPCGenCallingConv.inc -gen-callingconv) +tablegen(PPCGenSubtarget.inc -gen-subtarget) + +add_llvm_target(PowerPCCodeGen + PPCBranchSelector.cpp + PPCCodeEmitter.cpp + PPCHazardRecognizers.cpp + PPCInstrInfo.cpp + PPCISelDAGToDAG.cpp + PPCISelLowering.cpp + PPCJITInfo.cpp + PPCMachOWriterInfo.cpp + PPCPredicates.cpp + PPCRegisterInfo.cpp + PPCSubtarget.cpp + PPCTargetAsmInfo.cpp + PPCTargetMachine.cpp + ) diff --git a/lib/Target/PowerPC/Makefile b/lib/Target/PowerPC/Makefile new file mode 100644 index 000000000000..db688970e9e7 --- /dev/null +++ b/lib/Target/PowerPC/Makefile @@ -0,0 +1,22 @@ +##===- lib/Target/PowerPC/Makefile -------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../.. +LIBRARYNAME = LLVMPowerPCCodeGen +TARGET = PPC + +# Make sure that tblgen is run, first thing. +BUILT_SOURCES = PPCGenInstrNames.inc PPCGenRegisterNames.inc \ + PPCGenAsmWriter.inc PPCGenCodeEmitter.inc \ + PPCGenRegisterInfo.h.inc PPCGenRegisterInfo.inc \ + PPCGenInstrInfo.inc PPCGenDAGISel.inc \ + PPCGenSubtarget.inc PPCGenCallingConv.inc + +DIRS = AsmPrinter + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h new file mode 100644 index 000000000000..c844e21990b3 --- /dev/null +++ b/lib/Target/PowerPC/PPC.h @@ -0,0 +1,49 @@ +//===-- PPC.h - Top-level interface for PowerPC Target ----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the entry points for global functions defined in the LLVM +// PowerPC back-end. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_POWERPC_H +#define LLVM_TARGET_POWERPC_H + +// GCC #defines PPC on Linux but we use it as our namespace name +#undef PPC + +#include "llvm/Target/TargetMachine.h" + +namespace llvm { + class PPCTargetMachine; + class FunctionPass; + class MachineCodeEmitter; + class raw_ostream; + +FunctionPass *createPPCBranchSelectionPass(); +FunctionPass *createPPCISelDag(PPCTargetMachine &TM); +FunctionPass *createPPCAsmPrinterPass(raw_ostream &OS, + PPCTargetMachine &TM, + CodeGenOpt::Level OptLevel, bool Verbose); +FunctionPass *createPPCCodeEmitterPass(PPCTargetMachine &TM, + MachineCodeEmitter &MCE); +FunctionPass *createPPCJITCodeEmitterPass(PPCTargetMachine &TM, + JITCodeEmitter &MCE); +} // end namespace llvm; + +// Defines symbolic names for PowerPC registers. This defines a mapping from +// register name to register number. +// +#include "PPCGenRegisterNames.inc" + +// Defines symbolic names for the PowerPC instructions. +// +#include "PPCGenInstrNames.inc" + +#endif diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td new file mode 100644 index 000000000000..08f5bb43087f --- /dev/null +++ b/lib/Target/PowerPC/PPC.td @@ -0,0 +1,114 @@ +//===- PPC.td - Describe the PowerPC Target Machine --------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is the top level entry point for the PowerPC target. +// +//===----------------------------------------------------------------------===// + +// Get the target-independent interfaces which we are implementing. +// +include "llvm/Target/Target.td" + +//===----------------------------------------------------------------------===// +// PowerPC Subtarget features. +// + +//===----------------------------------------------------------------------===// +// CPU Directives // +//===----------------------------------------------------------------------===// + +def Directive601 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_601", "">; +def Directive602 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_602", "">; +def Directive603 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_603", "">; +def Directive604 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_603", "">; +def Directive620 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_603", "">; +def Directive7400: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_7400", "">; +def Directive750 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_750", "">; +def Directive970 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_970", "">; +def Directive32 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_32", "">; +def Directive64 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_64", "">; + +def Feature64Bit : SubtargetFeature<"64bit","Has64BitSupport", "true", + "Enable 64-bit instructions">; +def Feature64BitRegs : SubtargetFeature<"64bitregs","Use64BitRegs", "true", + "Enable 64-bit registers usage for ppc32 [beta]">; +def FeatureAltivec : SubtargetFeature<"altivec","HasAltivec", "true", + "Enable Altivec instructions">; +def FeatureGPUL : SubtargetFeature<"gpul","IsGigaProcessor", "true", + "Enable GPUL instructions">; +def FeatureFSqrt : SubtargetFeature<"fsqrt","HasFSQRT", "true", + "Enable the fsqrt instruction">; +def FeatureSTFIWX : SubtargetFeature<"stfiwx","HasSTFIWX", "true", + "Enable the stfiwx instruction">; + +//===----------------------------------------------------------------------===// +// Register File Description +//===----------------------------------------------------------------------===// + +include "PPCRegisterInfo.td" +include "PPCSchedule.td" +include "PPCInstrInfo.td" + +//===----------------------------------------------------------------------===// +// PowerPC processors supported. +// + +def : Processor<"generic", G3Itineraries, [Directive32]>; +def : Processor<"601", G3Itineraries, [Directive601]>; +def : Processor<"602", G3Itineraries, [Directive602]>; +def : Processor<"603", G3Itineraries, [Directive603]>; +def : Processor<"603e", G3Itineraries, [Directive603]>; +def : Processor<"603ev", G3Itineraries, [Directive603]>; +def : Processor<"604", G3Itineraries, [Directive604]>; +def : Processor<"604e", G3Itineraries, [Directive604]>; +def : Processor<"620", G3Itineraries, [Directive620]>; +def : Processor<"g3", G3Itineraries, [Directive7400]>; +def : Processor<"7400", G4Itineraries, [Directive7400, FeatureAltivec]>; +def : Processor<"g4", G4Itineraries, [Directive7400, FeatureAltivec]>; +def : Processor<"7450", G4PlusItineraries, [Directive7400, FeatureAltivec]>; +def : Processor<"g4+", G4PlusItineraries, [Directive750, FeatureAltivec]>; +def : Processor<"750", G4Itineraries, [Directive750, FeatureAltivec]>; +def : Processor<"970", G5Itineraries, + [Directive970, FeatureAltivec, + FeatureGPUL, FeatureFSqrt, FeatureSTFIWX, + Feature64Bit /*, Feature64BitRegs */]>; +def : Processor<"g5", G5Itineraries, + [Directive970, FeatureAltivec, + FeatureGPUL, FeatureFSqrt, FeatureSTFIWX, + Feature64Bit /*, Feature64BitRegs */]>; +def : Processor<"ppc", G3Itineraries, [Directive32]>; +def : Processor<"ppc64", G5Itineraries, + [Directive64, FeatureAltivec, + FeatureGPUL, FeatureFSqrt, FeatureSTFIWX, + Feature64Bit /*, Feature64BitRegs */]>; + + +//===----------------------------------------------------------------------===// +// Calling Conventions +//===----------------------------------------------------------------------===// + +include "PPCCallingConv.td" + +def PPCInstrInfo : InstrInfo { + // Define how we want to layout our TargetSpecific information field... This + // should be kept up-to-date with the fields in the PPCInstrInfo.h file. + let TSFlagsFields = ["PPC970_First", + "PPC970_Single", + "PPC970_Cracked", + "PPC970_Unit"]; + let TSFlagsShifts = [0, 1, 2, 3]; + + let isLittleEndianEncoding = 1; +} + + +def PPC : Target { + // Information about the instructions. + let InstructionSet = PPCInstrInfo; +} diff --git a/lib/Target/PowerPC/PPCBranchSelector.cpp b/lib/Target/PowerPC/PPCBranchSelector.cpp new file mode 100644 index 000000000000..b95a502d9187 --- /dev/null +++ b/lib/Target/PowerPC/PPCBranchSelector.cpp @@ -0,0 +1,174 @@ +//===-- PPCBranchSelector.cpp - Emit long conditional branches-----*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a pass that scans a machine function to determine which +// conditional branches need more than 16 bits of displacement to reach their +// target basic block. It does this in two passes; a calculation of basic block +// positions pass, and a branch psuedo op to machine branch opcode pass. This +// pass should be run last, just before the assembly printer. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "ppc-branch-select" +#include "PPC.h" +#include "PPCInstrBuilder.h" +#include "PPCInstrInfo.h" +#include "PPCPredicates.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/MathExtras.h" +using namespace llvm; + +STATISTIC(NumExpanded, "Number of branches expanded to long format"); + +namespace { + struct VISIBILITY_HIDDEN PPCBSel : public MachineFunctionPass { + static char ID; + PPCBSel() : MachineFunctionPass(&ID) {} + + /// BlockSizes - The sizes of the basic blocks in the function. + std::vector BlockSizes; + + virtual bool runOnMachineFunction(MachineFunction &Fn); + + virtual const char *getPassName() const { + return "PowerPC Branch Selector"; + } + }; + char PPCBSel::ID = 0; +} + +/// createPPCBranchSelectionPass - returns an instance of the Branch Selection +/// Pass +/// +FunctionPass *llvm::createPPCBranchSelectionPass() { + return new PPCBSel(); +} + +bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) { + const TargetInstrInfo *TII = Fn.getTarget().getInstrInfo(); + // Give the blocks of the function a dense, in-order, numbering. + Fn.RenumberBlocks(); + BlockSizes.resize(Fn.getNumBlockIDs()); + + // Measure each MBB and compute a size for the entire function. + unsigned FuncSize = 0; + for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; + ++MFI) { + MachineBasicBlock *MBB = MFI; + + unsigned BlockSize = 0; + for (MachineBasicBlock::iterator MBBI = MBB->begin(), EE = MBB->end(); + MBBI != EE; ++MBBI) + BlockSize += TII->GetInstSizeInBytes(MBBI); + + BlockSizes[MBB->getNumber()] = BlockSize; + FuncSize += BlockSize; + } + + // If the entire function is smaller than the displacement of a branch field, + // we know we don't need to shrink any branches in this function. This is a + // common case. + if (FuncSize < (1 << 15)) { + BlockSizes.clear(); + return false; + } + + // For each conditional branch, if the offset to its destination is larger + // than the offset field allows, transform it into a long branch sequence + // like this: + // short branch: + // bCC MBB + // long branch: + // b!CC $PC+8 + // b MBB + // + bool MadeChange = true; + bool EverMadeChange = false; + while (MadeChange) { + // Iteratively expand branches until we reach a fixed point. + MadeChange = false; + + for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; + ++MFI) { + MachineBasicBlock &MBB = *MFI; + unsigned MBBStartOffset = 0; + for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); + I != E; ++I) { + if (I->getOpcode() != PPC::BCC || I->getOperand(2).isImm()) { + MBBStartOffset += TII->GetInstSizeInBytes(I); + continue; + } + + // Determine the offset from the current branch to the destination + // block. + MachineBasicBlock *Dest = I->getOperand(2).getMBB(); + + int BranchSize; + if (Dest->getNumber() <= MBB.getNumber()) { + // If this is a backwards branch, the delta is the offset from the + // start of this block to this branch, plus the sizes of all blocks + // from this block to the dest. + BranchSize = MBBStartOffset; + + for (unsigned i = Dest->getNumber(), e = MBB.getNumber(); i != e; ++i) + BranchSize += BlockSizes[i]; + } else { + // Otherwise, add the size of the blocks between this block and the + // dest to the number of bytes left in this block. + BranchSize = -MBBStartOffset; + + for (unsigned i = MBB.getNumber(), e = Dest->getNumber(); i != e; ++i) + BranchSize += BlockSizes[i]; + } + + // If this branch is in range, ignore it. + if (isInt16(BranchSize)) { + MBBStartOffset += 4; + continue; + } + + // Otherwise, we have to expand it to a long branch. + // The BCC operands are: + // 0. PPC branch predicate + // 1. CR register + // 2. Target MBB + PPC::Predicate Pred = (PPC::Predicate)I->getOperand(0).getImm(); + unsigned CRReg = I->getOperand(1).getReg(); + + MachineInstr *OldBranch = I; + DebugLoc dl = OldBranch->getDebugLoc(); + + // Jump over the uncond branch inst (i.e. $PC+8) on opposite condition. + BuildMI(MBB, I, dl, TII->get(PPC::BCC)) + .addImm(PPC::InvertPredicate(Pred)).addReg(CRReg).addImm(2); + + // Uncond branch to the real destination. + I = BuildMI(MBB, I, dl, TII->get(PPC::B)).addMBB(Dest); + + // Remove the old branch from the function. + OldBranch->eraseFromParent(); + + // Remember that this instruction is 8-bytes, increase the size of the + // block by 4, remember to iterate. + BlockSizes[MBB.getNumber()] += 4; + MBBStartOffset += 8; + ++NumExpanded; + MadeChange = true; + } + } + EverMadeChange |= MadeChange; + } + + BlockSizes.clear(); + return true; +} + diff --git a/lib/Target/PowerPC/PPCCallingConv.td b/lib/Target/PowerPC/PPCCallingConv.td new file mode 100644 index 000000000000..9f916f38d5e4 --- /dev/null +++ b/lib/Target/PowerPC/PPCCallingConv.td @@ -0,0 +1,66 @@ +//===- PPCCallingConv.td - Calling Conventions for PowerPC ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This describes the calling conventions for the PowerPC 32- and 64-bit +// architectures. +// +//===----------------------------------------------------------------------===// + +/// CCIfSubtarget - Match if the current subtarget has a feature F. +class CCIfSubtarget + : CCIf().", F), A>; + +//===----------------------------------------------------------------------===// +// Return Value Calling Convention +//===----------------------------------------------------------------------===// + +// Return-value convention for PowerPC +def RetCC_PPC : CallingConv<[ + CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>, + CCIfType<[i64], CCAssignToReg<[X3, X4, X5, X6]>>, + + CCIfType<[f32], CCAssignToReg<[F1]>>, + CCIfType<[f64], CCAssignToReg<[F1, F2]>>, + + // Vector types are always returned in V2. + CCIfType<[v16i8, v8i16, v4i32, v4f32], CCAssignToReg<[V2]>> +]>; + + +//===----------------------------------------------------------------------===// +// PowerPC Argument Calling Conventions +//===----------------------------------------------------------------------===// +/* +def CC_PPC : CallingConv<[ + // The first 8 integer arguments are passed in integer registers. + CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>, + CCIfType<[i64], CCAssignToReg<[X3, X4, X5, X6, X7, X8, X9, X10]>>, + + // Common sub-targets passes FP values in F1 - F13 + CCIfType<[f32, f64], CCIfSubtarget<"isMachoABI()", + CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8,F9,F10,F11,F12,F13]>>>, + // ELF32 sub-target pass FP values in F1 - F8. + CCIfType<[f32, f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>, + + // The first 12 Vector arguments are passed in altivec registers. + CCIfType<[v16i8, v8i16, v4i32, v4f32], + CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9, V10,V11,V12,V13]>> + +/* + // Integer/FP values get stored in stack slots that are 8 bytes in size and + // 8-byte aligned if there are no more registers to hold them. + CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>, + + // Vectors get 16-byte stack slots that are 16-byte aligned. + CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], + CCAssignToStack<16, 16>>*/ +]>; + +*/ + diff --git a/lib/Target/PowerPC/PPCCodeEmitter.cpp b/lib/Target/PowerPC/PPCCodeEmitter.cpp new file mode 100644 index 000000000000..aa3dce19e505 --- /dev/null +++ b/lib/Target/PowerPC/PPCCodeEmitter.cpp @@ -0,0 +1,266 @@ +//===-- PPCCodeEmitter.cpp - JIT Code Emitter for PowerPC32 -------*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the PowerPC 32-bit CodeEmitter and associated machinery to +// JIT-compile bitcode to native PowerPC. +// +//===----------------------------------------------------------------------===// + +#include "PPCTargetMachine.h" +#include "PPCRelocations.h" +#include "PPC.h" +#include "llvm/Module.h" +#include "llvm/PassManager.h" +#include "llvm/CodeGen/MachineCodeEmitter.h" +#include "llvm/CodeGen/JITCodeEmitter.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Target/TargetOptions.h" +using namespace llvm; + +namespace { + class PPCCodeEmitter { + TargetMachine &TM; + MachineCodeEmitter &MCE; + public: + PPCCodeEmitter(TargetMachine &tm, MachineCodeEmitter &mce): + TM(tm), MCE(mce) {} + + /// getBinaryCodeForInstr - This function, generated by the + /// CodeEmitterGenerator using TableGen, produces the binary encoding for + /// machine instructions. + + unsigned getBinaryCodeForInstr(const MachineInstr &MI); + + /// getMachineOpValue - evaluates the MachineOperand of a given MachineInstr + + unsigned getMachineOpValue(const MachineInstr &MI, + const MachineOperand &MO); + + /// MovePCtoLROffset - When/if we see a MovePCtoLR instruction, we record + /// its address in the function into this pointer. + + void *MovePCtoLROffset; + }; + + template + class VISIBILITY_HIDDEN Emitter : public MachineFunctionPass, + public PPCCodeEmitter + { + TargetMachine &TM; + CodeEmitter &MCE; + + void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + public: + static char ID; + Emitter(TargetMachine &tm, CodeEmitter &mce) + : MachineFunctionPass(&ID), PPCCodeEmitter(tm, mce), TM(tm), MCE(mce) {} + + const char *getPassName() const { return "PowerPC Machine Code Emitter"; } + + /// runOnMachineFunction - emits the given MachineFunction to memory + /// + bool runOnMachineFunction(MachineFunction &MF); + + /// emitBasicBlock - emits the given MachineBasicBlock to memory + /// + void emitBasicBlock(MachineBasicBlock &MBB); + + /// getValueBit - return the particular bit of Val + /// + unsigned getValueBit(int64_t Val, unsigned bit) { return (Val >> bit) & 1; } + }; + + template + char Emitter::ID = 0; +} + +/// createPPCCodeEmitterPass - Return a pass that emits the collected PPC code +/// to the specified MCE object. +FunctionPass *llvm::createPPCCodeEmitterPass(PPCTargetMachine &TM, + MachineCodeEmitter &MCE) { + return new Emitter(TM, MCE); +} + +FunctionPass *llvm::createPPCJITCodeEmitterPass(PPCTargetMachine &TM, + JITCodeEmitter &JCE) { + return new Emitter(TM, JCE); +} + +template +bool Emitter::runOnMachineFunction(MachineFunction &MF) { + assert((MF.getTarget().getRelocationModel() != Reloc::Default || + MF.getTarget().getRelocationModel() != Reloc::Static) && + "JIT relocation model must be set to static or default!"); + + MCE.setModuleInfo(&getAnalysis()); + do { + MovePCtoLROffset = 0; + MCE.startFunction(MF); + for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB) + emitBasicBlock(*BB); + } while (MCE.finishFunction(MF)); + + return false; +} + +template +void Emitter::emitBasicBlock(MachineBasicBlock &MBB) { + MCE.StartMachineBasicBlock(&MBB); + + for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ++I){ + const MachineInstr &MI = *I; + switch (MI.getOpcode()) { + default: + MCE.emitWordBE(getBinaryCodeForInstr(MI)); + break; + case TargetInstrInfo::DBG_LABEL: + case TargetInstrInfo::EH_LABEL: + MCE.emitLabel(MI.getOperand(0).getImm()); + break; + case TargetInstrInfo::IMPLICIT_DEF: + break; // pseudo opcode, no side effects + case PPC::MovePCtoLR: + case PPC::MovePCtoLR8: + assert(TM.getRelocationModel() == Reloc::PIC_); + MovePCtoLROffset = (void*)MCE.getCurrentPCValue(); + MCE.emitWordBE(0x48000005); // bl 1 + break; + } + } +} + +unsigned PPCCodeEmitter::getMachineOpValue(const MachineInstr &MI, + const MachineOperand &MO) { + + unsigned rv = 0; // Return value; defaults to 0 for unhandled cases + // or things that get fixed up later by the JIT. + if (MO.isReg()) { + rv = PPCRegisterInfo::getRegisterNumbering(MO.getReg()); + + // Special encoding for MTCRF and MFOCRF, which uses a bit mask for the + // register, not the register number directly. + if ((MI.getOpcode() == PPC::MTCRF || MI.getOpcode() == PPC::MFOCRF) && + (MO.getReg() >= PPC::CR0 && MO.getReg() <= PPC::CR7)) { + rv = 0x80 >> rv; + } + } else if (MO.isImm()) { + rv = MO.getImm(); + } else if (MO.isGlobal() || MO.isSymbol() || + MO.isCPI() || MO.isJTI()) { + unsigned Reloc = 0; + if (MI.getOpcode() == PPC::BL_Macho || MI.getOpcode() == PPC::BL8_Macho || + MI.getOpcode() == PPC::BL_ELF || MI.getOpcode() == PPC::BL8_ELF || + MI.getOpcode() == PPC::TAILB || MI.getOpcode() == PPC::TAILB8) + Reloc = PPC::reloc_pcrel_bx; + else { + if (TM.getRelocationModel() == Reloc::PIC_) { + assert(MovePCtoLROffset && "MovePCtoLR not seen yet?"); + } + switch (MI.getOpcode()) { + default: MI.dump(); assert(0 && "Unknown instruction for relocation!"); + case PPC::LIS: + case PPC::LIS8: + case PPC::ADDIS: + case PPC::ADDIS8: + Reloc = PPC::reloc_absolute_high; // Pointer to symbol + break; + case PPC::LI: + case PPC::LI8: + case PPC::LA: + // Loads. + case PPC::LBZ: + case PPC::LBZ8: + case PPC::LHA: + case PPC::LHA8: + case PPC::LHZ: + case PPC::LHZ8: + case PPC::LWZ: + case PPC::LWZ8: + case PPC::LFS: + case PPC::LFD: + + // Stores. + case PPC::STB: + case PPC::STB8: + case PPC::STH: + case PPC::STH8: + case PPC::STW: + case PPC::STW8: + case PPC::STFS: + case PPC::STFD: + Reloc = PPC::reloc_absolute_low; + break; + + case PPC::LWA: + case PPC::LD: + case PPC::STD: + case PPC::STD_32: + Reloc = PPC::reloc_absolute_low_ix; + break; + } + } + + MachineRelocation R; + if (MO.isGlobal()) { + R = MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc, + MO.getGlobal(), 0, + isa(MO.getGlobal())); + } else if (MO.isSymbol()) { + R = MachineRelocation::getExtSym(MCE.getCurrentPCOffset(), + Reloc, MO.getSymbolName(), 0); + } else if (MO.isCPI()) { + R = MachineRelocation::getConstPool(MCE.getCurrentPCOffset(), + Reloc, MO.getIndex(), 0); + } else { + assert(MO.isJTI()); + R = MachineRelocation::getJumpTable(MCE.getCurrentPCOffset(), + Reloc, MO.getIndex(), 0); + } + + // If in PIC mode, we need to encode the negated address of the + // 'movepctolr' into the unrelocated field. After relocation, we'll have + // &gv-&movepctolr-4 in the imm field. Once &movepctolr is added to the imm + // field, we get &gv. This doesn't happen for branch relocations, which are + // always implicitly pc relative. + if (TM.getRelocationModel() == Reloc::PIC_ && Reloc != PPC::reloc_pcrel_bx){ + assert(MovePCtoLROffset && "MovePCtoLR not seen yet?"); + R.setConstantVal(-(intptr_t)MovePCtoLROffset - 4); + } + MCE.addRelocation(R); + + } else if (MO.isMBB()) { + unsigned Reloc = 0; + unsigned Opcode = MI.getOpcode(); + if (Opcode == PPC::B || Opcode == PPC::BL_Macho || + Opcode == PPC::BLA_Macho || Opcode == PPC::BL_ELF || + Opcode == PPC::BLA_ELF) + Reloc = PPC::reloc_pcrel_bx; + else // BCC instruction + Reloc = PPC::reloc_pcrel_bcx; + MCE.addRelocation(MachineRelocation::getBB(MCE.getCurrentPCOffset(), + Reloc, MO.getMBB())); + } else { + cerr << "ERROR: Unknown type of MachineOperand: " << MO << "\n"; + abort(); + } + + return rv; +} + +#include "PPCGenCodeEmitter.inc" + diff --git a/lib/Target/PowerPC/PPCFrameInfo.h b/lib/Target/PowerPC/PPCFrameInfo.h new file mode 100644 index 000000000000..1b5893da0ce2 --- /dev/null +++ b/lib/Target/PowerPC/PPCFrameInfo.h @@ -0,0 +1,93 @@ +//===-- PPCFrameInfo.h - Define TargetFrameInfo for PowerPC -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// +//===----------------------------------------------------------------------===// + +#ifndef POWERPC_FRAMEINFO_H +#define POWERPC_FRAMEINFO_H + +#include "PPC.h" +#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetMachine.h" + +namespace llvm { + +class PPCFrameInfo: public TargetFrameInfo { + const TargetMachine &TM; + +public: + PPCFrameInfo(const TargetMachine &tm, bool LP64) + : TargetFrameInfo(TargetFrameInfo::StackGrowsDown, 16, 0), TM(tm) { + } + + /// getReturnSaveOffset - Return the previous frame offset to save the + /// return address. + static unsigned getReturnSaveOffset(bool LP64, bool isMacho) { + if (isMacho) + return LP64 ? 16 : 8; + // For ELF 32 ABI: + return 4; + } + + /// getFramePointerSaveOffset - Return the previous frame offset to save the + /// frame pointer. + static unsigned getFramePointerSaveOffset(bool LP64, bool isMacho) { + // For MachO ABI: + // Use the TOC save slot in the PowerPC linkage area for saving the frame + // pointer (if needed.) LLVM does not generate code that uses the TOC (R2 + // is treated as a caller saved register.) + if (isMacho) + return LP64 ? 40 : 20; + + // For ELF 32 ABI: + // Save it right before the link register + return -4U; + } + + /// getLinkageSize - Return the size of the PowerPC ABI linkage area. + /// + static unsigned getLinkageSize(bool LP64, bool isMacho) { + if (isMacho) + return 6 * (LP64 ? 8 : 4); + + // For ELF 32 ABI: + return 8; + } + + /// getMinCallArgumentsSize - Return the size of the minium PowerPC ABI + /// argument area. + static unsigned getMinCallArgumentsSize(bool LP64, bool isMacho) { + // For Macho ABI: + // The prolog code of the callee may store up to 8 GPR argument registers to + // the stack, allowing va_start to index over them in memory if its varargs. + // Because we cannot tell if this is needed on the caller side, we have to + // conservatively assume that it is needed. As such, make sure we have at + // least enough stack space for the caller to store the 8 GPRs. + if (isMacho) + return 8 * (LP64 ? 8 : 4); + + // For ELF 32 ABI: + // There is no default stack allocated for the 8 first GPR arguments. + return 0; + } + + /// getMinCallFrameSize - Return the minimum size a call frame can be using + /// the PowerPC ABI. + static unsigned getMinCallFrameSize(bool LP64, bool isMacho) { + // The call frame needs to be at least big enough for linkage and 8 args. + return getLinkageSize(LP64, isMacho) + + getMinCallArgumentsSize(LP64, isMacho); + } + +}; + +} // End llvm namespace + +#endif diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/lib/Target/PowerPC/PPCHazardRecognizers.cpp new file mode 100644 index 000000000000..e7658fc9d4ae --- /dev/null +++ b/lib/Target/PowerPC/PPCHazardRecognizers.cpp @@ -0,0 +1,304 @@ +//===-- PPCHazardRecognizers.cpp - PowerPC Hazard Recognizer Impls --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements hazard recognizers for scheduling on PowerPC processors. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "pre-RA-sched" +#include "PPCHazardRecognizers.h" +#include "PPC.h" +#include "PPCInstrInfo.h" +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/Support/Debug.h" +using namespace llvm; + +//===----------------------------------------------------------------------===// +// PowerPC 970 Hazard Recognizer +// +// This models the dispatch group formation of the PPC970 processor. Dispatch +// groups are bundles of up to five instructions that can contain various mixes +// of instructions. The PPC970 can dispatch a peak of 4 non-branch and one +// branch instruction per-cycle. +// +// There are a number of restrictions to dispatch group formation: some +// instructions can only be issued in the first slot of a dispatch group, & some +// instructions fill an entire dispatch group. Additionally, only branches can +// issue in the 5th (last) slot. +// +// Finally, there are a number of "structural" hazards on the PPC970. These +// conditions cause large performance penalties due to misprediction, recovery, +// and replay logic that has to happen. These cases include setting a CTR and +// branching through it in the same dispatch group, and storing to an address, +// then loading from the same address within a dispatch group. To avoid these +// conditions, we insert no-op instructions when appropriate. +// +// FIXME: This is missing some significant cases: +// 1. Modeling of microcoded instructions. +// 2. Handling of serialized operations. +// 3. Handling of the esoteric cases in "Resource-based Instruction Grouping". +// + +PPCHazardRecognizer970::PPCHazardRecognizer970(const TargetInstrInfo &tii) + : TII(tii) { + EndDispatchGroup(); +} + +void PPCHazardRecognizer970::EndDispatchGroup() { + DOUT << "=== Start of dispatch group\n"; + NumIssued = 0; + + // Structural hazard info. + HasCTRSet = false; + NumStores = 0; +} + + +PPCII::PPC970_Unit +PPCHazardRecognizer970::GetInstrType(unsigned Opcode, + bool &isFirst, bool &isSingle, + bool &isCracked, + bool &isLoad, bool &isStore) { + if ((int)Opcode >= 0) { + isFirst = isSingle = isCracked = isLoad = isStore = false; + return PPCII::PPC970_Pseudo; + } + Opcode = ~Opcode; + + const TargetInstrDesc &TID = TII.get(Opcode); + + isLoad = TID.mayLoad(); + isStore = TID.mayStore(); + + unsigned TSFlags = TID.TSFlags; + + isFirst = TSFlags & PPCII::PPC970_First; + isSingle = TSFlags & PPCII::PPC970_Single; + isCracked = TSFlags & PPCII::PPC970_Cracked; + return (PPCII::PPC970_Unit)(TSFlags & PPCII::PPC970_Mask); +} + +/// isLoadOfStoredAddress - If we have a load from the previously stored pointer +/// as indicated by StorePtr1/StorePtr2/StoreSize, return true. +bool PPCHazardRecognizer970:: +isLoadOfStoredAddress(unsigned LoadSize, SDValue Ptr1, SDValue Ptr2) const { + for (unsigned i = 0, e = NumStores; i != e; ++i) { + // Handle exact and commuted addresses. + if (Ptr1 == StorePtr1[i] && Ptr2 == StorePtr2[i]) + return true; + if (Ptr2 == StorePtr1[i] && Ptr1 == StorePtr2[i]) + return true; + + // Okay, we don't have an exact match, if this is an indexed offset, see if + // we have overlap (which happens during fp->int conversion for example). + if (StorePtr2[i] == Ptr2) { + if (ConstantSDNode *StoreOffset = dyn_cast(StorePtr1[i])) + if (ConstantSDNode *LoadOffset = dyn_cast(Ptr1)) { + // Okay the base pointers match, so we have [c1+r] vs [c2+r]. Check + // to see if the load and store actually overlap. + int StoreOffs = StoreOffset->getZExtValue(); + int LoadOffs = LoadOffset->getZExtValue(); + if (StoreOffs < LoadOffs) { + if (int(StoreOffs+StoreSize[i]) > LoadOffs) return true; + } else { + if (int(LoadOffs+LoadSize) > StoreOffs) return true; + } + } + } + } + return false; +} + +/// getHazardType - We return hazard for any non-branch instruction that would +/// terminate terminate the dispatch group. We turn NoopHazard for any +/// instructions that wouldn't terminate the dispatch group that would cause a +/// pipeline flush. +ScheduleHazardRecognizer::HazardType PPCHazardRecognizer970:: +getHazardType(SUnit *SU) { + const SDNode *Node = SU->getNode()->getFlaggedMachineNode(); + bool isFirst, isSingle, isCracked, isLoad, isStore; + PPCII::PPC970_Unit InstrType = + GetInstrType(Node->getOpcode(), isFirst, isSingle, isCracked, + isLoad, isStore); + if (InstrType == PPCII::PPC970_Pseudo) return NoHazard; + unsigned Opcode = Node->getMachineOpcode(); + + // We can only issue a PPC970_First/PPC970_Single instruction (such as + // crand/mtspr/etc) if this is the first cycle of the dispatch group. + if (NumIssued != 0 && (isFirst || isSingle)) + return Hazard; + + // If this instruction is cracked into two ops by the decoder, we know that + // it is not a branch and that it cannot issue if 3 other instructions are + // already in the dispatch group. + if (isCracked && NumIssued > 2) + return Hazard; + + switch (InstrType) { + default: assert(0 && "Unknown instruction type!"); + case PPCII::PPC970_FXU: + case PPCII::PPC970_LSU: + case PPCII::PPC970_FPU: + case PPCII::PPC970_VALU: + case PPCII::PPC970_VPERM: + // We can only issue a branch as the last instruction in a group. + if (NumIssued == 4) return Hazard; + break; + case PPCII::PPC970_CRU: + // We can only issue a CR instruction in the first two slots. + if (NumIssued >= 2) return Hazard; + break; + case PPCII::PPC970_BRU: + break; + } + + // Do not allow MTCTR and BCTRL to be in the same dispatch group. + if (HasCTRSet && (Opcode == PPC::BCTRL_Macho || Opcode == PPC::BCTRL_ELF)) + return NoopHazard; + + // If this is a load following a store, make sure it's not to the same or + // overlapping address. + if (isLoad && NumStores) { + unsigned LoadSize; + switch (Opcode) { + default: assert(0 && "Unknown load!"); + case PPC::LBZ: case PPC::LBZU: + case PPC::LBZX: + case PPC::LBZ8: case PPC::LBZU8: + case PPC::LBZX8: + case PPC::LVEBX: + LoadSize = 1; + break; + case PPC::LHA: case PPC::LHAU: + case PPC::LHAX: + case PPC::LHZ: case PPC::LHZU: + case PPC::LHZX: + case PPC::LVEHX: + case PPC::LHBRX: + case PPC::LHA8: case PPC::LHAU8: + case PPC::LHAX8: + case PPC::LHZ8: case PPC::LHZU8: + case PPC::LHZX8: + LoadSize = 2; + break; + case PPC::LFS: case PPC::LFSU: + case PPC::LFSX: + case PPC::LWZ: case PPC::LWZU: + case PPC::LWZX: + case PPC::LWA: + case PPC::LWAX: + case PPC::LVEWX: + case PPC::LWBRX: + case PPC::LWZ8: + case PPC::LWZX8: + LoadSize = 4; + break; + case PPC::LFD: case PPC::LFDU: + case PPC::LFDX: + case PPC::LD: case PPC::LDU: + case PPC::LDX: + LoadSize = 8; + break; + case PPC::LVX: + case PPC::LVXL: + LoadSize = 16; + break; + } + + if (isLoadOfStoredAddress(LoadSize, + Node->getOperand(0), Node->getOperand(1))) + return NoopHazard; + } + + return NoHazard; +} + +void PPCHazardRecognizer970::EmitInstruction(SUnit *SU) { + const SDNode *Node = SU->getNode()->getFlaggedMachineNode(); + bool isFirst, isSingle, isCracked, isLoad, isStore; + PPCII::PPC970_Unit InstrType = + GetInstrType(Node->getOpcode(), isFirst, isSingle, isCracked, + isLoad, isStore); + if (InstrType == PPCII::PPC970_Pseudo) return; + unsigned Opcode = Node->getMachineOpcode(); + + // Update structural hazard information. + if (Opcode == PPC::MTCTR) HasCTRSet = true; + + // Track the address stored to. + if (isStore) { + unsigned ThisStoreSize; + switch (Opcode) { + default: assert(0 && "Unknown store instruction!"); + case PPC::STB: case PPC::STB8: + case PPC::STBU: case PPC::STBU8: + case PPC::STBX: case PPC::STBX8: + case PPC::STVEBX: + ThisStoreSize = 1; + break; + case PPC::STH: case PPC::STH8: + case PPC::STHU: case PPC::STHU8: + case PPC::STHX: case PPC::STHX8: + case PPC::STVEHX: + case PPC::STHBRX: + ThisStoreSize = 2; + break; + case PPC::STFS: + case PPC::STFSU: + case PPC::STFSX: + case PPC::STWX: case PPC::STWX8: + case PPC::STWUX: + case PPC::STW: case PPC::STW8: + case PPC::STWU: case PPC::STWU8: + case PPC::STVEWX: + case PPC::STFIWX: + case PPC::STWBRX: + ThisStoreSize = 4; + break; + case PPC::STD_32: + case PPC::STDX_32: + case PPC::STD: + case PPC::STDU: + case PPC::STFD: + case PPC::STFDX: + case PPC::STDX: + case PPC::STDUX: + ThisStoreSize = 8; + break; + case PPC::STVX: + case PPC::STVXL: + ThisStoreSize = 16; + break; + } + + StoreSize[NumStores] = ThisStoreSize; + StorePtr1[NumStores] = Node->getOperand(1); + StorePtr2[NumStores] = Node->getOperand(2); + ++NumStores; + } + + if (InstrType == PPCII::PPC970_BRU || isSingle) + NumIssued = 4; // Terminate a d-group. + ++NumIssued; + + // If this instruction is cracked into two ops by the decoder, remember that + // we issued two pieces. + if (isCracked) + ++NumIssued; + + if (NumIssued == 5) + EndDispatchGroup(); +} + +void PPCHazardRecognizer970::AdvanceCycle() { + assert(NumIssued < 5 && "Illegal dispatch group!"); + ++NumIssued; + if (NumIssued == 5) + EndDispatchGroup(); +} diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.h b/lib/Target/PowerPC/PPCHazardRecognizers.h new file mode 100644 index 000000000000..74bf8e52d8fa --- /dev/null +++ b/lib/Target/PowerPC/PPCHazardRecognizers.h @@ -0,0 +1,73 @@ +//===-- PPCHazardRecognizers.h - PowerPC Hazard Recognizers -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines hazard recognizers for scheduling on PowerPC processors. +// +//===----------------------------------------------------------------------===// + +#ifndef PPCHAZRECS_H +#define PPCHAZRECS_H + +#include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "PPCInstrInfo.h" + +namespace llvm { + +/// PPCHazardRecognizer970 - This class defines a finite state automata that +/// models the dispatch logic on the PowerPC 970 (aka G5) processor. This +/// promotes good dispatch group formation and implements noop insertion to +/// avoid structural hazards that cause significant performance penalties (e.g. +/// setting the CTR register then branching through it within a dispatch group), +/// or storing then loading from the same address within a dispatch group. +class PPCHazardRecognizer970 : public ScheduleHazardRecognizer { + const TargetInstrInfo &TII; + + unsigned NumIssued; // Number of insts issued, including advanced cycles. + + // Various things that can cause a structural hazard. + + // HasCTRSet - If the CTR register is set in this group, disallow BCTRL. + bool HasCTRSet; + + // StoredPtr - Keep track of the address of any store. If we see a load from + // the same address (or one that aliases it), disallow the store. We can have + // up to four stores in one dispatch group, hence we track up to 4. + // + // This is null if we haven't seen a store yet. We keep track of both + // operands of the store here, since we support [r+r] and [r+i] addressing. + SDValue StorePtr1[4], StorePtr2[4]; + unsigned StoreSize[4]; + unsigned NumStores; + +public: + PPCHazardRecognizer970(const TargetInstrInfo &TII); + virtual HazardType getHazardType(SUnit *SU); + virtual void EmitInstruction(SUnit *SU); + virtual void AdvanceCycle(); + +private: + /// EndDispatchGroup - Called when we are finishing a new dispatch group. + /// + void EndDispatchGroup(); + + /// GetInstrType - Classify the specified powerpc opcode according to its + /// pipeline. + PPCII::PPC970_Unit GetInstrType(unsigned Opcode, + bool &isFirst, bool &isSingle,bool &isCracked, + bool &isLoad, bool &isStore); + + bool isLoadOfStoredAddress(unsigned LoadSize, + SDValue Ptr1, SDValue Ptr2) const; +}; + +} // end namespace llvm + +#endif + diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp new file mode 100644 index 000000000000..823e3162191e --- /dev/null +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -0,0 +1,1170 @@ +//===-- PPCISelDAGToDAG.cpp - PPC --pattern matching inst selector --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines a pattern matching instruction selector for PowerPC, +// converting from a legalized dag to a PPC dag. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "ppc-codegen" +#include "PPC.h" +#include "PPCPredicates.h" +#include "PPCTargetMachine.h" +#include "PPCISelLowering.h" +#include "PPCHazardRecognizers.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Constants.h" +#include "llvm/Function.h" +#include "llvm/GlobalValue.h" +#include "llvm/Intrinsics.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/Compiler.h" +using namespace llvm; + +namespace { + //===--------------------------------------------------------------------===// + /// PPCDAGToDAGISel - PPC specific code to select PPC machine + /// instructions for SelectionDAG operations. + /// + class VISIBILITY_HIDDEN PPCDAGToDAGISel : public SelectionDAGISel { + PPCTargetMachine &TM; + PPCTargetLowering &PPCLowering; + const PPCSubtarget &PPCSubTarget; + unsigned GlobalBaseReg; + public: + explicit PPCDAGToDAGISel(PPCTargetMachine &tm) + : SelectionDAGISel(tm), TM(tm), + PPCLowering(*TM.getTargetLowering()), + PPCSubTarget(*TM.getSubtargetImpl()) {} + + virtual bool runOnFunction(Function &Fn) { + // Do not codegen any 'available_externally' functions at all, they have + // definitions outside the translation unit. + if (Fn.hasAvailableExternallyLinkage()) + return false; + + // Make sure we re-emit a set of the global base reg if necessary + GlobalBaseReg = 0; + SelectionDAGISel::runOnFunction(Fn); + + InsertVRSaveCode(Fn); + return true; + } + + /// getI32Imm - Return a target constant with the specified value, of type + /// i32. + inline SDValue getI32Imm(unsigned Imm) { + return CurDAG->getTargetConstant(Imm, MVT::i32); + } + + /// getI64Imm - Return a target constant with the specified value, of type + /// i64. + inline SDValue getI64Imm(uint64_t Imm) { + return CurDAG->getTargetConstant(Imm, MVT::i64); + } + + /// getSmallIPtrImm - Return a target constant of pointer type. + inline SDValue getSmallIPtrImm(unsigned Imm) { + return CurDAG->getTargetConstant(Imm, PPCLowering.getPointerTy()); + } + + /// isRunOfOnes - Returns true iff Val consists of one contiguous run of 1s + /// with any number of 0s on either side. The 1s are allowed to wrap from + /// LSB to MSB, so 0x000FFF0, 0x0000FFFF, and 0xFF0000FF are all runs. + /// 0x0F0F0000 is not, since all 1s are not contiguous. + static bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME); + + + /// isRotateAndMask - Returns true if Mask and Shift can be folded into a + /// rotate and mask opcode and mask operation. + static bool isRotateAndMask(SDNode *N, unsigned Mask, bool IsShiftMask, + unsigned &SH, unsigned &MB, unsigned &ME); + + /// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC + /// base register. Return the virtual register that holds this value. + SDNode *getGlobalBaseReg(); + + // Select - Convert the specified operand from a target-independent to a + // target-specific node if it hasn't already been changed. + SDNode *Select(SDValue Op); + + SDNode *SelectBitfieldInsert(SDNode *N); + + /// SelectCC - Select a comparison of the specified values with the + /// specified condition code, returning the CR# of the expression. + SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, DebugLoc dl); + + /// SelectAddrImm - Returns true if the address N can be represented by + /// a base register plus a signed 16-bit displacement [r+imm]. + bool SelectAddrImm(SDValue Op, SDValue N, SDValue &Disp, + SDValue &Base) { + return PPCLowering.SelectAddressRegImm(N, Disp, Base, *CurDAG); + } + + /// SelectAddrImmOffs - Return true if the operand is valid for a preinc + /// immediate field. Because preinc imms have already been validated, just + /// accept it. + bool SelectAddrImmOffs(SDValue Op, SDValue N, SDValue &Out) const { + Out = N; + return true; + } + + /// SelectAddrIdx - Given the specified addressed, check to see if it can be + /// represented as an indexed [r+r] operation. Returns false if it can + /// be represented by [r+imm], which are preferred. + bool SelectAddrIdx(SDValue Op, SDValue N, SDValue &Base, + SDValue &Index) { + return PPCLowering.SelectAddressRegReg(N, Base, Index, *CurDAG); + } + + /// SelectAddrIdxOnly - Given the specified addressed, force it to be + /// represented as an indexed [r+r] operation. + bool SelectAddrIdxOnly(SDValue Op, SDValue N, SDValue &Base, + SDValue &Index) { + return PPCLowering.SelectAddressRegRegOnly(N, Base, Index, *CurDAG); + } + + /// SelectAddrImmShift - Returns true if the address N can be represented by + /// a base register plus a signed 14-bit displacement [r+imm*4]. Suitable + /// for use by STD and friends. + bool SelectAddrImmShift(SDValue Op, SDValue N, SDValue &Disp, + SDValue &Base) { + return PPCLowering.SelectAddressRegImmShift(N, Disp, Base, *CurDAG); + } + + /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for + /// inline asm expressions. + virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, + char ConstraintCode, + std::vector &OutOps) { + SDValue Op0, Op1; + switch (ConstraintCode) { + default: return true; + case 'm': // memory + if (!SelectAddrIdx(Op, Op, Op0, Op1)) + SelectAddrImm(Op, Op, Op0, Op1); + break; + case 'o': // offsetable + if (!SelectAddrImm(Op, Op, Op0, Op1)) { + Op0 = Op; + Op1 = getSmallIPtrImm(0); + } + break; + case 'v': // not offsetable + SelectAddrIdxOnly(Op, Op, Op0, Op1); + break; + } + + OutOps.push_back(Op0); + OutOps.push_back(Op1); + return false; + } + + SDValue BuildSDIVSequence(SDNode *N); + SDValue BuildUDIVSequence(SDNode *N); + + /// InstructionSelect - This callback is invoked by + /// SelectionDAGISel when it has created a SelectionDAG for us to codegen. + virtual void InstructionSelect(); + + void InsertVRSaveCode(Function &Fn); + + virtual const char *getPassName() const { + return "PowerPC DAG->DAG Pattern Instruction Selection"; + } + + /// CreateTargetHazardRecognizer - Return the hazard recognizer to use for + /// this target when scheduling the DAG. + virtual ScheduleHazardRecognizer *CreateTargetHazardRecognizer() { + // Should use subtarget info to pick the right hazard recognizer. For + // now, always return a PPC970 recognizer. + const TargetInstrInfo *II = TM.getInstrInfo(); + assert(II && "No InstrInfo?"); + return new PPCHazardRecognizer970(*II); + } + +// Include the pieces autogenerated from the target description. +#include "PPCGenDAGISel.inc" + +private: + SDNode *SelectSETCC(SDValue Op); + }; +} + +/// InstructionSelect - This callback is invoked by +/// SelectionDAGISel when it has created a SelectionDAG for us to codegen. +void PPCDAGToDAGISel::InstructionSelect() { + DEBUG(BB->dump()); + + // Select target instructions for the DAG. + SelectRoot(*CurDAG); + CurDAG->RemoveDeadNodes(); +} + +/// InsertVRSaveCode - Once the entire function has been instruction selected, +/// all virtual registers are created and all machine instructions are built, +/// check to see if we need to save/restore VRSAVE. If so, do it. +void PPCDAGToDAGISel::InsertVRSaveCode(Function &F) { + // Check to see if this function uses vector registers, which means we have to + // save and restore the VRSAVE register and update it with the regs we use. + // + // In this case, there will be virtual registers of vector type type created + // by the scheduler. Detect them now. + MachineFunction &Fn = MachineFunction::get(&F); + bool HasVectorVReg = false; + for (unsigned i = TargetRegisterInfo::FirstVirtualRegister, + e = RegInfo->getLastVirtReg()+1; i != e; ++i) + if (RegInfo->getRegClass(i) == &PPC::VRRCRegClass) { + HasVectorVReg = true; + break; + } + if (!HasVectorVReg) return; // nothing to do. + + // If we have a vector register, we want to emit code into the entry and exit + // blocks to save and restore the VRSAVE register. We do this here (instead + // of marking all vector instructions as clobbering VRSAVE) for two reasons: + // + // 1. This (trivially) reduces the load on the register allocator, by not + // having to represent the live range of the VRSAVE register. + // 2. This (more significantly) allows us to create a temporary virtual + // register to hold the saved VRSAVE value, allowing this temporary to be + // register allocated, instead of forcing it to be spilled to the stack. + + // Create two vregs - one to hold the VRSAVE register that is live-in to the + // function and one for the value after having bits or'd into it. + unsigned InVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass); + unsigned UpdatedVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass); + + const TargetInstrInfo &TII = *TM.getInstrInfo(); + MachineBasicBlock &EntryBB = *Fn.begin(); + DebugLoc dl = DebugLoc::getUnknownLoc(); + // Emit the following code into the entry block: + // InVRSAVE = MFVRSAVE + // UpdatedVRSAVE = UPDATE_VRSAVE InVRSAVE + // MTVRSAVE UpdatedVRSAVE + MachineBasicBlock::iterator IP = EntryBB.begin(); // Insert Point + BuildMI(EntryBB, IP, dl, TII.get(PPC::MFVRSAVE), InVRSAVE); + BuildMI(EntryBB, IP, dl, TII.get(PPC::UPDATE_VRSAVE), + UpdatedVRSAVE).addReg(InVRSAVE); + BuildMI(EntryBB, IP, dl, TII.get(PPC::MTVRSAVE)).addReg(UpdatedVRSAVE); + + // Find all return blocks, outputting a restore in each epilog. + for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { + if (!BB->empty() && BB->back().getDesc().isReturn()) { + IP = BB->end(); --IP; + + // Skip over all terminator instructions, which are part of the return + // sequence. + MachineBasicBlock::iterator I2 = IP; + while (I2 != BB->begin() && (--I2)->getDesc().isTerminator()) + IP = I2; + + // Emit: MTVRSAVE InVRSave + BuildMI(*BB, IP, dl, TII.get(PPC::MTVRSAVE)).addReg(InVRSAVE); + } + } +} + + +/// getGlobalBaseReg - Output the instructions required to put the +/// base address to use for accessing globals into a register. +/// +SDNode *PPCDAGToDAGISel::getGlobalBaseReg() { + if (!GlobalBaseReg) { + const TargetInstrInfo &TII = *TM.getInstrInfo(); + // Insert the set of GlobalBaseReg into the first MBB of the function + MachineBasicBlock &FirstMBB = BB->getParent()->front(); + MachineBasicBlock::iterator MBBI = FirstMBB.begin(); + DebugLoc dl = DebugLoc::getUnknownLoc(); + + if (PPCLowering.getPointerTy() == MVT::i32) { + GlobalBaseReg = RegInfo->createVirtualRegister(PPC::GPRCRegisterClass); + BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR), PPC::LR); + BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg); + } else { + GlobalBaseReg = RegInfo->createVirtualRegister(PPC::G8RCRegisterClass); + BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR8), PPC::LR8); + BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR8), GlobalBaseReg); + } + } + return CurDAG->getRegister(GlobalBaseReg, + PPCLowering.getPointerTy()).getNode(); +} + +/// isIntS16Immediate - This method tests to see if the node is either a 32-bit +/// or 64-bit immediate, and if the value can be accurately represented as a +/// sign extension from a 16-bit value. If so, this returns true and the +/// immediate. +static bool isIntS16Immediate(SDNode *N, short &Imm) { + if (N->getOpcode() != ISD::Constant) + return false; + + Imm = (short)cast(N)->getZExtValue(); + if (N->getValueType(0) == MVT::i32) + return Imm == (int32_t)cast(N)->getZExtValue(); + else + return Imm == (int64_t)cast(N)->getZExtValue(); +} + +static bool isIntS16Immediate(SDValue Op, short &Imm) { + return isIntS16Immediate(Op.getNode(), Imm); +} + + +/// isInt32Immediate - This method tests to see if the node is a 32-bit constant +/// operand. If so Imm will receive the 32-bit value. +static bool isInt32Immediate(SDNode *N, unsigned &Imm) { + if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) { + Imm = cast(N)->getZExtValue(); + return true; + } + return false; +} + +/// isInt64Immediate - This method tests to see if the node is a 64-bit constant +/// operand. If so Imm will receive the 64-bit value. +static bool isInt64Immediate(SDNode *N, uint64_t &Imm) { + if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i64) { + Imm = cast(N)->getZExtValue(); + return true; + } + return false; +} + +// isInt32Immediate - This method tests to see if a constant operand. +// If so Imm will receive the 32 bit value. +static bool isInt32Immediate(SDValue N, unsigned &Imm) { + return isInt32Immediate(N.getNode(), Imm); +} + + +// isOpcWithIntImmediate - This method tests to see if the node is a specific +// opcode and that it has a immediate integer right operand. +// If so Imm will receive the 32 bit value. +static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) { + return N->getOpcode() == Opc + && isInt32Immediate(N->getOperand(1).getNode(), Imm); +} + +bool PPCDAGToDAGISel::isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME) { + if (isShiftedMask_32(Val)) { + // look for the first non-zero bit + MB = CountLeadingZeros_32(Val); + // look for the first zero bit after the run of ones + ME = CountLeadingZeros_32((Val - 1) ^ Val); + return true; + } else { + Val = ~Val; // invert mask + if (isShiftedMask_32(Val)) { + // effectively look for the first zero bit + ME = CountLeadingZeros_32(Val) - 1; + // effectively look for the first one bit after the run of zeros + MB = CountLeadingZeros_32((Val - 1) ^ Val) + 1; + return true; + } + } + // no run present + return false; +} + +bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask, + bool IsShiftMask, unsigned &SH, + unsigned &MB, unsigned &ME) { + // Don't even go down this path for i64, since different logic will be + // necessary for rldicl/rldicr/rldimi. + if (N->getValueType(0) != MVT::i32) + return false; + + unsigned Shift = 32; + unsigned Indeterminant = ~0; // bit mask marking indeterminant results + unsigned Opcode = N->getOpcode(); + if (N->getNumOperands() != 2 || + !isInt32Immediate(N->getOperand(1).getNode(), Shift) || (Shift > 31)) + return false; + + if (Opcode == ISD::SHL) { + // apply shift left to mask if it comes first + if (IsShiftMask) Mask = Mask << Shift; + // determine which bits are made indeterminant by shift + Indeterminant = ~(0xFFFFFFFFu << Shift); + } else if (Opcode == ISD::SRL) { + // apply shift right to mask if it comes first + if (IsShiftMask) Mask = Mask >> Shift; + // determine which bits are made indeterminant by shift + Indeterminant = ~(0xFFFFFFFFu >> Shift); + // adjust for the left rotate + Shift = 32 - Shift; + } else if (Opcode == ISD::ROTL) { + Indeterminant = 0; + } else { + return false; + } + + // if the mask doesn't intersect any Indeterminant bits + if (Mask && !(Mask & Indeterminant)) { + SH = Shift & 31; + // make sure the mask is still a mask (wrap arounds may not be) + return isRunOfOnes(Mask, MB, ME); + } + return false; +} + +/// SelectBitfieldInsert - turn an or of two masked values into +/// the rotate left word immediate then mask insert (rlwimi) instruction. +SDNode *PPCDAGToDAGISel::SelectBitfieldInsert(SDNode *N) { + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + DebugLoc dl = N->getDebugLoc(); + + APInt LKZ, LKO, RKZ, RKO; + CurDAG->ComputeMaskedBits(Op0, APInt::getAllOnesValue(32), LKZ, LKO); + CurDAG->ComputeMaskedBits(Op1, APInt::getAllOnesValue(32), RKZ, RKO); + + unsigned TargetMask = LKZ.getZExtValue(); + unsigned InsertMask = RKZ.getZExtValue(); + + if ((TargetMask | InsertMask) == 0xFFFFFFFF) { + unsigned Op0Opc = Op0.getOpcode(); + unsigned Op1Opc = Op1.getOpcode(); + unsigned Value, SH = 0; + TargetMask = ~TargetMask; + InsertMask = ~InsertMask; + + // If the LHS has a foldable shift and the RHS does not, then swap it to the + // RHS so that we can fold the shift into the insert. + if (Op0Opc == ISD::AND && Op1Opc == ISD::AND) { + if (Op0.getOperand(0).getOpcode() == ISD::SHL || + Op0.getOperand(0).getOpcode() == ISD::SRL) { + if (Op1.getOperand(0).getOpcode() != ISD::SHL && + Op1.getOperand(0).getOpcode() != ISD::SRL) { + std::swap(Op0, Op1); + std::swap(Op0Opc, Op1Opc); + std::swap(TargetMask, InsertMask); + } + } + } else if (Op0Opc == ISD::SHL || Op0Opc == ISD::SRL) { + if (Op1Opc == ISD::AND && Op1.getOperand(0).getOpcode() != ISD::SHL && + Op1.getOperand(0).getOpcode() != ISD::SRL) { + std::swap(Op0, Op1); + std::swap(Op0Opc, Op1Opc); + std::swap(TargetMask, InsertMask); + } + } + + unsigned MB, ME; + if (InsertMask && isRunOfOnes(InsertMask, MB, ME)) { + SDValue Tmp1, Tmp2, Tmp3; + bool DisjointMask = (TargetMask ^ InsertMask) == 0xFFFFFFFF; + + if ((Op1Opc == ISD::SHL || Op1Opc == ISD::SRL) && + isInt32Immediate(Op1.getOperand(1), Value)) { + Op1 = Op1.getOperand(0); + SH = (Op1Opc == ISD::SHL) ? Value : 32 - Value; + } + if (Op1Opc == ISD::AND) { + unsigned SHOpc = Op1.getOperand(0).getOpcode(); + if ((SHOpc == ISD::SHL || SHOpc == ISD::SRL) && + isInt32Immediate(Op1.getOperand(0).getOperand(1), Value)) { + Op1 = Op1.getOperand(0).getOperand(0); + SH = (SHOpc == ISD::SHL) ? Value : 32 - Value; + } else { + Op1 = Op1.getOperand(0); + } + } + + Tmp3 = (Op0Opc == ISD::AND && DisjointMask) ? Op0.getOperand(0) : Op0; + SH &= 31; + SDValue Ops[] = { Tmp3, Op1, getI32Imm(SH), getI32Imm(MB), + getI32Imm(ME) }; + return CurDAG->getTargetNode(PPC::RLWIMI, dl, MVT::i32, Ops, 5); + } + } + return 0; +} + +/// SelectCC - Select a comparison of the specified values with the specified +/// condition code, returning the CR# of the expression. +SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, + ISD::CondCode CC, DebugLoc dl) { + // Always select the LHS. + unsigned Opc; + + if (LHS.getValueType() == MVT::i32) { + unsigned Imm; + if (CC == ISD::SETEQ || CC == ISD::SETNE) { + if (isInt32Immediate(RHS, Imm)) { + // SETEQ/SETNE comparison with 16-bit immediate, fold it. + if (isUInt16(Imm)) + return SDValue(CurDAG->getTargetNode(PPC::CMPLWI, dl, MVT::i32, LHS, + getI32Imm(Imm & 0xFFFF)), 0); + // If this is a 16-bit signed immediate, fold it. + if (isInt16((int)Imm)) + return SDValue(CurDAG->getTargetNode(PPC::CMPWI, dl, MVT::i32, LHS, + getI32Imm(Imm & 0xFFFF)), 0); + + // For non-equality comparisons, the default code would materialize the + // constant, then compare against it, like this: + // lis r2, 4660 + // ori r2, r2, 22136 + // cmpw cr0, r3, r2 + // Since we are just comparing for equality, we can emit this instead: + // xoris r0,r3,0x1234 + // cmplwi cr0,r0,0x5678 + // beq cr0,L6 + SDValue Xor(CurDAG->getTargetNode(PPC::XORIS, dl, MVT::i32, LHS, + getI32Imm(Imm >> 16)), 0); + return SDValue(CurDAG->getTargetNode(PPC::CMPLWI, dl, MVT::i32, Xor, + getI32Imm(Imm & 0xFFFF)), 0); + } + Opc = PPC::CMPLW; + } else if (ISD::isUnsignedIntSetCC(CC)) { + if (isInt32Immediate(RHS, Imm) && isUInt16(Imm)) + return SDValue(CurDAG->getTargetNode(PPC::CMPLWI, dl, MVT::i32, LHS, + getI32Imm(Imm & 0xFFFF)), 0); + Opc = PPC::CMPLW; + } else { + short SImm; + if (isIntS16Immediate(RHS, SImm)) + return SDValue(CurDAG->getTargetNode(PPC::CMPWI, dl, MVT::i32, LHS, + getI32Imm((int)SImm & 0xFFFF)), + 0); + Opc = PPC::CMPW; + } + } else if (LHS.getValueType() == MVT::i64) { + uint64_t Imm; + if (CC == ISD::SETEQ || CC == ISD::SETNE) { + if (isInt64Immediate(RHS.getNode(), Imm)) { + // SETEQ/SETNE comparison with 16-bit immediate, fold it. + if (isUInt16(Imm)) + return SDValue(CurDAG->getTargetNode(PPC::CMPLDI, dl, MVT::i64, LHS, + getI32Imm(Imm & 0xFFFF)), 0); + // If this is a 16-bit signed immediate, fold it. + if (isInt16(Imm)) + return SDValue(CurDAG->getTargetNode(PPC::CMPDI, dl, MVT::i64, LHS, + getI32Imm(Imm & 0xFFFF)), 0); + + // For non-equality comparisons, the default code would materialize the + // constant, then compare against it, like this: + // lis r2, 4660 + // ori r2, r2, 22136 + // cmpd cr0, r3, r2 + // Since we are just comparing for equality, we can emit this instead: + // xoris r0,r3,0x1234 + // cmpldi cr0,r0,0x5678 + // beq cr0,L6 + if (isUInt32(Imm)) { + SDValue Xor(CurDAG->getTargetNode(PPC::XORIS8, dl, MVT::i64, LHS, + getI64Imm(Imm >> 16)), 0); + return SDValue(CurDAG->getTargetNode(PPC::CMPLDI, dl, MVT::i64, Xor, + getI64Imm(Imm & 0xFFFF)), 0); + } + } + Opc = PPC::CMPLD; + } else if (ISD::isUnsignedIntSetCC(CC)) { + if (isInt64Immediate(RHS.getNode(), Imm) && isUInt16(Imm)) + return SDValue(CurDAG->getTargetNode(PPC::CMPLDI, dl, MVT::i64, LHS, + getI64Imm(Imm & 0xFFFF)), 0); + Opc = PPC::CMPLD; + } else { + short SImm; + if (isIntS16Immediate(RHS, SImm)) + return SDValue(CurDAG->getTargetNode(PPC::CMPDI, dl, MVT::i64, LHS, + getI64Imm(SImm & 0xFFFF)), + 0); + Opc = PPC::CMPD; + } + } else if (LHS.getValueType() == MVT::f32) { + Opc = PPC::FCMPUS; + } else { + assert(LHS.getValueType() == MVT::f64 && "Unknown vt!"); + Opc = PPC::FCMPUD; + } + return SDValue(CurDAG->getTargetNode(Opc, dl, MVT::i32, LHS, RHS), 0); +} + +static PPC::Predicate getPredicateForSetCC(ISD::CondCode CC) { + switch (CC) { + case ISD::SETUEQ: + case ISD::SETONE: + case ISD::SETOLE: + case ISD::SETOGE: + assert(0 && "Should be lowered by legalize!"); + default: assert(0 && "Unknown condition!"); abort(); + case ISD::SETOEQ: + case ISD::SETEQ: return PPC::PRED_EQ; + case ISD::SETUNE: + case ISD::SETNE: return PPC::PRED_NE; + case ISD::SETOLT: + case ISD::SETLT: return PPC::PRED_LT; + case ISD::SETULE: + case ISD::SETLE: return PPC::PRED_LE; + case ISD::SETOGT: + case ISD::SETGT: return PPC::PRED_GT; + case ISD::SETUGE: + case ISD::SETGE: return PPC::PRED_GE; + case ISD::SETO: return PPC::PRED_NU; + case ISD::SETUO: return PPC::PRED_UN; + // These two are invalid for floating point. Assume we have int. + case ISD::SETULT: return PPC::PRED_LT; + case ISD::SETUGT: return PPC::PRED_GT; + } +} + +/// getCRIdxForSetCC - Return the index of the condition register field +/// associated with the SetCC condition, and whether or not the field is +/// treated as inverted. That is, lt = 0; ge = 0 inverted. +/// +/// If this returns with Other != -1, then the returned comparison is an or of +/// two simpler comparisons. In this case, Invert is guaranteed to be false. +static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert, int &Other) { + Invert = false; + Other = -1; + switch (CC) { + default: assert(0 && "Unknown condition!"); abort(); + case ISD::SETOLT: + case ISD::SETLT: return 0; // Bit #0 = SETOLT + case ISD::SETOGT: + case ISD::SETGT: return 1; // Bit #1 = SETOGT + case ISD::SETOEQ: + case ISD::SETEQ: return 2; // Bit #2 = SETOEQ + case ISD::SETUO: return 3; // Bit #3 = SETUO + case ISD::SETUGE: + case ISD::SETGE: Invert = true; return 0; // !Bit #0 = SETUGE + case ISD::SETULE: + case ISD::SETLE: Invert = true; return 1; // !Bit #1 = SETULE + case ISD::SETUNE: + case ISD::SETNE: Invert = true; return 2; // !Bit #2 = SETUNE + case ISD::SETO: Invert = true; return 3; // !Bit #3 = SETO + case ISD::SETUEQ: + case ISD::SETOGE: + case ISD::SETOLE: + case ISD::SETONE: + assert(0 && "Invalid branch code: should be expanded by legalize"); + // These are invalid for floating point. Assume integer. + case ISD::SETULT: return 0; + case ISD::SETUGT: return 1; + } + return 0; +} + +SDNode *PPCDAGToDAGISel::SelectSETCC(SDValue Op) { + SDNode *N = Op.getNode(); + DebugLoc dl = N->getDebugLoc(); + unsigned Imm; + ISD::CondCode CC = cast(N->getOperand(2))->get(); + if (isInt32Immediate(N->getOperand(1), Imm)) { + // We can codegen setcc op, imm very efficiently compared to a brcond. + // Check for those cases here. + // setcc op, 0 + if (Imm == 0) { + SDValue Op = N->getOperand(0); + switch (CC) { + default: break; + case ISD::SETEQ: { + Op = SDValue(CurDAG->getTargetNode(PPC::CNTLZW, dl, MVT::i32, Op), 0); + SDValue Ops[] = { Op, getI32Imm(27), getI32Imm(5), getI32Imm(31) }; + return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4); + } + case ISD::SETNE: { + SDValue AD = + SDValue(CurDAG->getTargetNode(PPC::ADDIC, dl, MVT::i32, MVT::Flag, + Op, getI32Imm(~0U)), 0); + return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, AD, Op, + AD.getValue(1)); + } + case ISD::SETLT: { + SDValue Ops[] = { Op, getI32Imm(1), getI32Imm(31), getI32Imm(31) }; + return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4); + } + case ISD::SETGT: { + SDValue T = + SDValue(CurDAG->getTargetNode(PPC::NEG, dl, MVT::i32, Op), 0); + T = SDValue(CurDAG->getTargetNode(PPC::ANDC, dl, MVT::i32, T, Op), 0); + SDValue Ops[] = { T, getI32Imm(1), getI32Imm(31), getI32Imm(31) }; + return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4); + } + } + } else if (Imm == ~0U) { // setcc op, -1 + SDValue Op = N->getOperand(0); + switch (CC) { + default: break; + case ISD::SETEQ: + Op = SDValue(CurDAG->getTargetNode(PPC::ADDIC, dl, MVT::i32, MVT::Flag, + Op, getI32Imm(1)), 0); + return CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32, + SDValue(CurDAG->getTargetNode(PPC::LI, dl, + MVT::i32, + getI32Imm(0)), 0), + Op.getValue(1)); + case ISD::SETNE: { + Op = SDValue(CurDAG->getTargetNode(PPC::NOR, dl, MVT::i32, Op, Op), 0); + SDNode *AD = CurDAG->getTargetNode(PPC::ADDIC, dl, MVT::i32, MVT::Flag, + Op, getI32Imm(~0U)); + return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(AD, 0), + Op, SDValue(AD, 1)); + } + case ISD::SETLT: { + SDValue AD = SDValue(CurDAG->getTargetNode(PPC::ADDI, dl, MVT::i32, Op, + getI32Imm(1)), 0); + SDValue AN = SDValue(CurDAG->getTargetNode(PPC::AND, dl, MVT::i32, AD, + Op), 0); + SDValue Ops[] = { AN, getI32Imm(1), getI32Imm(31), getI32Imm(31) }; + return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4); + } + case ISD::SETGT: { + SDValue Ops[] = { Op, getI32Imm(1), getI32Imm(31), getI32Imm(31) }; + Op = SDValue(CurDAG->getTargetNode(PPC::RLWINM, dl, MVT::i32, Ops, 4), + 0); + return CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Op, + getI32Imm(1)); + } + } + } + } + + bool Inv; + int OtherCondIdx; + unsigned Idx = getCRIdxForSetCC(CC, Inv, OtherCondIdx); + SDValue CCReg = SelectCC(N->getOperand(0), N->getOperand(1), CC, dl); + SDValue IntCR; + + // Force the ccreg into CR7. + SDValue CR7Reg = CurDAG->getRegister(PPC::CR7, MVT::i32); + + SDValue InFlag(0, 0); // Null incoming flag value. + CCReg = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, CR7Reg, CCReg, + InFlag).getValue(1); + + if (PPCSubTarget.isGigaProcessor() && OtherCondIdx == -1) + IntCR = SDValue(CurDAG->getTargetNode(PPC::MFOCRF, dl, MVT::i32, CR7Reg, + CCReg), 0); + else + IntCR = SDValue(CurDAG->getTargetNode(PPC::MFCR, dl, MVT::i32, CCReg), 0); + + SDValue Ops[] = { IntCR, getI32Imm((32-(3-Idx)) & 31), + getI32Imm(31), getI32Imm(31) }; + if (OtherCondIdx == -1 && !Inv) + return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4); + + // Get the specified bit. + SDValue Tmp = + SDValue(CurDAG->getTargetNode(PPC::RLWINM, dl, MVT::i32, Ops, 4), 0); + if (Inv) { + assert(OtherCondIdx == -1 && "Can't have split plus negation"); + return CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Tmp, getI32Imm(1)); + } + + // Otherwise, we have to turn an operation like SETONE -> SETOLT | SETOGT. + // We already got the bit for the first part of the comparison (e.g. SETULE). + + // Get the other bit of the comparison. + Ops[1] = getI32Imm((32-(3-OtherCondIdx)) & 31); + SDValue OtherCond = + SDValue(CurDAG->getTargetNode(PPC::RLWINM, dl, MVT::i32, Ops, 4), 0); + + return CurDAG->SelectNodeTo(N, PPC::OR, MVT::i32, Tmp, OtherCond); +} + + +// Select - Convert the specified operand from a target-independent to a +// target-specific node if it hasn't already been changed. +SDNode *PPCDAGToDAGISel::Select(SDValue Op) { + SDNode *N = Op.getNode(); + DebugLoc dl = Op.getDebugLoc(); + if (N->isMachineOpcode()) + return NULL; // Already selected. + + switch (N->getOpcode()) { + default: break; + + case ISD::Constant: { + if (N->getValueType(0) == MVT::i64) { + // Get 64 bit value. + int64_t Imm = cast(N)->getZExtValue(); + // Assume no remaining bits. + unsigned Remainder = 0; + // Assume no shift required. + unsigned Shift = 0; + + // If it can't be represented as a 32 bit value. + if (!isInt32(Imm)) { + Shift = CountTrailingZeros_64(Imm); + int64_t ImmSh = static_cast(Imm) >> Shift; + + // If the shifted value fits 32 bits. + if (isInt32(ImmSh)) { + // Go with the shifted value. + Imm = ImmSh; + } else { + // Still stuck with a 64 bit value. + Remainder = Imm; + Shift = 32; + Imm >>= 32; + } + } + + // Intermediate operand. + SDNode *Result; + + // Handle first 32 bits. + unsigned Lo = Imm & 0xFFFF; + unsigned Hi = (Imm >> 16) & 0xFFFF; + + // Simple value. + if (isInt16(Imm)) { + // Just the Lo bits. + Result = CurDAG->getTargetNode(PPC::LI8, dl, MVT::i64, getI32Imm(Lo)); + } else if (Lo) { + // Handle the Hi bits. + unsigned OpC = Hi ? PPC::LIS8 : PPC::LI8; + Result = CurDAG->getTargetNode(OpC, dl, MVT::i64, getI32Imm(Hi)); + // And Lo bits. + Result = CurDAG->getTargetNode(PPC::ORI8, dl, MVT::i64, + SDValue(Result, 0), getI32Imm(Lo)); + } else { + // Just the Hi bits. + Result = CurDAG->getTargetNode(PPC::LIS8, dl, MVT::i64, getI32Imm(Hi)); + } + + // If no shift, we're done. + if (!Shift) return Result; + + // Shift for next step if the upper 32-bits were not zero. + if (Imm) { + Result = CurDAG->getTargetNode(PPC::RLDICR, dl, MVT::i64, + SDValue(Result, 0), + getI32Imm(Shift), getI32Imm(63 - Shift)); + } + + // Add in the last bits as required. + if ((Hi = (Remainder >> 16) & 0xFFFF)) { + Result = CurDAG->getTargetNode(PPC::ORIS8, dl, MVT::i64, + SDValue(Result, 0), getI32Imm(Hi)); + } + if ((Lo = Remainder & 0xFFFF)) { + Result = CurDAG->getTargetNode(PPC::ORI8, dl, MVT::i64, + SDValue(Result, 0), getI32Imm(Lo)); + } + + return Result; + } + break; + } + + case ISD::SETCC: + return SelectSETCC(Op); + case PPCISD::GlobalBaseReg: + return getGlobalBaseReg(); + + case ISD::FrameIndex: { + int FI = cast(N)->getIndex(); + SDValue TFI = CurDAG->getTargetFrameIndex(FI, Op.getValueType()); + unsigned Opc = Op.getValueType() == MVT::i32 ? PPC::ADDI : PPC::ADDI8; + if (N->hasOneUse()) + return CurDAG->SelectNodeTo(N, Opc, Op.getValueType(), TFI, + getSmallIPtrImm(0)); + return CurDAG->getTargetNode(Opc, dl, Op.getValueType(), TFI, + getSmallIPtrImm(0)); + } + + case PPCISD::MFCR: { + SDValue InFlag = N->getOperand(1); + // Use MFOCRF if supported. + if (PPCSubTarget.isGigaProcessor()) + return CurDAG->getTargetNode(PPC::MFOCRF, dl, MVT::i32, + N->getOperand(0), InFlag); + else + return CurDAG->getTargetNode(PPC::MFCR, dl, MVT::i32, InFlag); + } + + case ISD::SDIV: { + // FIXME: since this depends on the setting of the carry flag from the srawi + // we should really be making notes about that for the scheduler. + // FIXME: It sure would be nice if we could cheaply recognize the + // srl/add/sra pattern the dag combiner will generate for this as + // sra/addze rather than having to handle sdiv ourselves. oh well. + unsigned Imm; + if (isInt32Immediate(N->getOperand(1), Imm)) { + SDValue N0 = N->getOperand(0); + if ((signed)Imm > 0 && isPowerOf2_32(Imm)) { + SDNode *Op = + CurDAG->getTargetNode(PPC::SRAWI, dl, MVT::i32, MVT::Flag, + N0, getI32Imm(Log2_32(Imm))); + return CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32, + SDValue(Op, 0), SDValue(Op, 1)); + } else if ((signed)Imm < 0 && isPowerOf2_32(-Imm)) { + SDNode *Op = + CurDAG->getTargetNode(PPC::SRAWI, dl, MVT::i32, MVT::Flag, + N0, getI32Imm(Log2_32(-Imm))); + SDValue PT = + SDValue(CurDAG->getTargetNode(PPC::ADDZE, dl, MVT::i32, + SDValue(Op, 0), SDValue(Op, 1)), + 0); + return CurDAG->SelectNodeTo(N, PPC::NEG, MVT::i32, PT); + } + } + + // Other cases are autogenerated. + break; + } + + case ISD::LOAD: { + // Handle preincrement loads. + LoadSDNode *LD = cast(Op); + MVT LoadedVT = LD->getMemoryVT(); + + // Normal loads are handled by code generated from the .td file. + if (LD->getAddressingMode() != ISD::PRE_INC) + break; + + SDValue Offset = LD->getOffset(); + if (isa(Offset) || + Offset.getOpcode() == ISD::TargetGlobalAddress) { + + unsigned Opcode; + bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD; + if (LD->getValueType(0) != MVT::i64) { + // Handle PPC32 integer and normal FP loads. + assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load"); + switch (LoadedVT.getSimpleVT()) { + default: assert(0 && "Invalid PPC load type!"); + case MVT::f64: Opcode = PPC::LFDU; break; + case MVT::f32: Opcode = PPC::LFSU; break; + case MVT::i32: Opcode = PPC::LWZU; break; + case MVT::i16: Opcode = isSExt ? PPC::LHAU : PPC::LHZU; break; + case MVT::i1: + case MVT::i8: Opcode = PPC::LBZU; break; + } + } else { + assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!"); + assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load"); + switch (LoadedVT.getSimpleVT()) { + default: assert(0 && "Invalid PPC load type!"); + case MVT::i64: Opcode = PPC::LDU; break; + case MVT::i32: Opcode = PPC::LWZU8; break; + case MVT::i16: Opcode = isSExt ? PPC::LHAU8 : PPC::LHZU8; break; + case MVT::i1: + case MVT::i8: Opcode = PPC::LBZU8; break; + } + } + + SDValue Chain = LD->getChain(); + SDValue Base = LD->getBasePtr(); + SDValue Ops[] = { Offset, Base, Chain }; + // FIXME: PPC64 + return CurDAG->getTargetNode(Opcode, dl, LD->getValueType(0), + PPCLowering.getPointerTy(), + MVT::Other, Ops, 3); + } else { + assert(0 && "R+R preindex loads not supported yet!"); + } + } + + case ISD::AND: { + unsigned Imm, Imm2, SH, MB, ME; + + // If this is an and of a value rotated between 0 and 31 bits and then and'd + // with a mask, emit rlwinm + if (isInt32Immediate(N->getOperand(1), Imm) && + isRotateAndMask(N->getOperand(0).getNode(), Imm, false, SH, MB, ME)) { + SDValue Val = N->getOperand(0).getOperand(0); + SDValue Ops[] = { Val, getI32Imm(SH), getI32Imm(MB), getI32Imm(ME) }; + return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4); + } + // If this is just a masked value where the input is not handled above, and + // is not a rotate-left (handled by a pattern in the .td file), emit rlwinm + if (isInt32Immediate(N->getOperand(1), Imm) && + isRunOfOnes(Imm, MB, ME) && + N->getOperand(0).getOpcode() != ISD::ROTL) { + SDValue Val = N->getOperand(0); + SDValue Ops[] = { Val, getI32Imm(0), getI32Imm(MB), getI32Imm(ME) }; + return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4); + } + // AND X, 0 -> 0, not "rlwinm 32". + if (isInt32Immediate(N->getOperand(1), Imm) && (Imm == 0)) { + ReplaceUses(SDValue(N, 0), N->getOperand(1)); + return NULL; + } + // ISD::OR doesn't get all the bitfield insertion fun. + // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) is a bitfield insert + if (isInt32Immediate(N->getOperand(1), Imm) && + N->getOperand(0).getOpcode() == ISD::OR && + isInt32Immediate(N->getOperand(0).getOperand(1), Imm2)) { + unsigned MB, ME; + Imm = ~(Imm^Imm2); + if (isRunOfOnes(Imm, MB, ME)) { + SDValue Ops[] = { N->getOperand(0).getOperand(0), + N->getOperand(0).getOperand(1), + getI32Imm(0), getI32Imm(MB),getI32Imm(ME) }; + return CurDAG->getTargetNode(PPC::RLWIMI, dl, MVT::i32, Ops, 5); + } + } + + // Other cases are autogenerated. + break; + } + case ISD::OR: + if (N->getValueType(0) == MVT::i32) + if (SDNode *I = SelectBitfieldInsert(N)) + return I; + + // Other cases are autogenerated. + break; + case ISD::SHL: { + unsigned Imm, SH, MB, ME; + if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) && + isRotateAndMask(N, Imm, true, SH, MB, ME)) { + SDValue Ops[] = { N->getOperand(0).getOperand(0), + getI32Imm(SH), getI32Imm(MB), getI32Imm(ME) }; + return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4); + } + + // Other cases are autogenerated. + break; + } + case ISD::SRL: { + unsigned Imm, SH, MB, ME; + if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) && + isRotateAndMask(N, Imm, true, SH, MB, ME)) { + SDValue Ops[] = { N->getOperand(0).getOperand(0), + getI32Imm(SH), getI32Imm(MB), getI32Imm(ME) }; + return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4); + } + + // Other cases are autogenerated. + break; + } + case ISD::SELECT_CC: { + ISD::CondCode CC = cast(N->getOperand(4))->get(); + + // Handle the setcc cases here. select_cc lhs, 0, 1, 0, cc + if (ConstantSDNode *N1C = dyn_cast(N->getOperand(1))) + if (ConstantSDNode *N2C = dyn_cast(N->getOperand(2))) + if (ConstantSDNode *N3C = dyn_cast(N->getOperand(3))) + if (N1C->isNullValue() && N3C->isNullValue() && + N2C->getZExtValue() == 1ULL && CC == ISD::SETNE && + // FIXME: Implement this optzn for PPC64. + N->getValueType(0) == MVT::i32) { + SDNode *Tmp = + CurDAG->getTargetNode(PPC::ADDIC, dl, MVT::i32, MVT::Flag, + N->getOperand(0), getI32Imm(~0U)); + return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, + SDValue(Tmp, 0), N->getOperand(0), + SDValue(Tmp, 1)); + } + + SDValue CCReg = SelectCC(N->getOperand(0), N->getOperand(1), CC, dl); + unsigned BROpc = getPredicateForSetCC(CC); + + unsigned SelectCCOp; + if (N->getValueType(0) == MVT::i32) + SelectCCOp = PPC::SELECT_CC_I4; + else if (N->getValueType(0) == MVT::i64) + SelectCCOp = PPC::SELECT_CC_I8; + else if (N->getValueType(0) == MVT::f32) + SelectCCOp = PPC::SELECT_CC_F4; + else if (N->getValueType(0) == MVT::f64) + SelectCCOp = PPC::SELECT_CC_F8; + else + SelectCCOp = PPC::SELECT_CC_VRRC; + + SDValue Ops[] = { CCReg, N->getOperand(2), N->getOperand(3), + getI32Imm(BROpc) }; + return CurDAG->SelectNodeTo(N, SelectCCOp, N->getValueType(0), Ops, 4); + } + case PPCISD::COND_BRANCH: { + // Op #0 is the Chain. + // Op #1 is the PPC::PRED_* number. + // Op #2 is the CR# + // Op #3 is the Dest MBB + // Op #4 is the Flag. + // Prevent PPC::PRED_* from being selected into LI. + SDValue Pred = + getI32Imm(cast(N->getOperand(1))->getZExtValue()); + SDValue Ops[] = { Pred, N->getOperand(2), N->getOperand(3), + N->getOperand(0), N->getOperand(4) }; + return CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops, 5); + } + case ISD::BR_CC: { + ISD::CondCode CC = cast(N->getOperand(1))->get(); + SDValue CondCode = SelectCC(N->getOperand(2), N->getOperand(3), CC, dl); + SDValue Ops[] = { getI32Imm(getPredicateForSetCC(CC)), CondCode, + N->getOperand(4), N->getOperand(0) }; + return CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops, 4); + } + case ISD::BRIND: { + // FIXME: Should custom lower this. + SDValue Chain = N->getOperand(0); + SDValue Target = N->getOperand(1); + unsigned Opc = Target.getValueType() == MVT::i32 ? PPC::MTCTR : PPC::MTCTR8; + Chain = SDValue(CurDAG->getTargetNode(Opc, dl, MVT::Other, Target, + Chain), 0); + return CurDAG->SelectNodeTo(N, PPC::BCTR, MVT::Other, Chain); + } + case ISD::DECLARE: { + SDValue Chain = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue N2 = N->getOperand(2); + FrameIndexSDNode *FINode = dyn_cast(N1); + + // FIXME: We need to handle this for VLAs. + if (!FINode) { + ReplaceUses(Op.getValue(0), Chain); + return NULL; + } + + if (N2.getOpcode() == ISD::ADD) { + if (N2.getOperand(0).getOpcode() == ISD::ADD && + N2.getOperand(0).getOperand(0).getOpcode() == PPCISD::GlobalBaseReg && + N2.getOperand(0).getOperand(1).getOpcode() == PPCISD::Hi && + N2.getOperand(1).getOpcode() == PPCISD::Lo) + N2 = N2.getOperand(0).getOperand(1).getOperand(0); + else if (N2.getOperand(0).getOpcode() == ISD::ADD && + N2.getOperand(0).getOperand(0).getOpcode() == PPCISD::GlobalBaseReg && + N2.getOperand(0).getOperand(1).getOpcode() == PPCISD::Lo && + N2.getOperand(1).getOpcode() == PPCISD::Hi) + N2 = N2.getOperand(0).getOperand(1).getOperand(0); + else if (N2.getOperand(0).getOpcode() == PPCISD::Hi && + N2.getOperand(1).getOpcode() == PPCISD::Lo) + N2 = N2.getOperand(0).getOperand(0); + } + + // If we don't have a global address here, the debug info is mangled, just + // drop it. + if (!isa(N2)) { + ReplaceUses(Op.getValue(0), Chain); + return NULL; + } + int FI = cast(N1)->getIndex(); + GlobalValue *GV = cast(N2)->getGlobal(); + SDValue Tmp1 = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); + SDValue Tmp2 = CurDAG->getTargetGlobalAddress(GV, TLI.getPointerTy()); + return CurDAG->SelectNodeTo(N, TargetInstrInfo::DECLARE, + MVT::Other, Tmp1, Tmp2, Chain); + } + } + + return SelectCode(Op); +} + + + +/// createPPCISelDag - This pass converts a legalized DAG into a +/// PowerPC-specific DAG, ready for instruction scheduling. +/// +FunctionPass *llvm::createPPCISelDag(PPCTargetMachine &TM) { + return new PPCDAGToDAGISel(TM); +} + diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp new file mode 100644 index 000000000000..a7744b8f7a4e --- /dev/null +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -0,0 +1,4878 @@ +//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the PPCISelLowering class. +// +//===----------------------------------------------------------------------===// + +#include "PPCISelLowering.h" +#include "PPCMachineFunctionInfo.h" +#include "PPCPredicates.h" +#include "PPCTargetMachine.h" +#include "PPCPerfectShuffle.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/VectorExtras.h" +#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CallingConv.h" +#include "llvm/Constants.h" +#include "llvm/Function.h" +#include "llvm/Intrinsics.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/DerivedTypes.h" +using namespace llvm; + +static cl::opt EnablePPCPreinc("enable-ppc-preinc", +cl::desc("enable preincrement load/store generation on PPC (experimental)"), + cl::Hidden); + +PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) + : TargetLowering(TM), PPCSubTarget(*TM.getSubtargetImpl()) { + + setPow2DivIsCheap(); + + // Use _setjmp/_longjmp instead of setjmp/longjmp. + setUseUnderscoreSetJmp(true); + setUseUnderscoreLongJmp(true); + + // Set up the register classes. + addRegisterClass(MVT::i32, PPC::GPRCRegisterClass); + addRegisterClass(MVT::f32, PPC::F4RCRegisterClass); + addRegisterClass(MVT::f64, PPC::F8RCRegisterClass); + + // PowerPC has an i16 but no i8 (or i1) SEXTLOAD + setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); + setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Expand); + + setTruncStoreAction(MVT::f64, MVT::f32, Expand); + + // PowerPC has pre-inc load and store's. + setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal); + setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal); + setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal); + setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal); + setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal); + setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal); + setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal); + setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal); + setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal); + setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal); + + // This is used in the ppcf128->int sequence. Note it has different semantics + // from FP_ROUND: that rounds to nearest, this rounds to zero. + setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom); + + // PowerPC has no SREM/UREM instructions + setOperationAction(ISD::SREM, MVT::i32, Expand); + setOperationAction(ISD::UREM, MVT::i32, Expand); + setOperationAction(ISD::SREM, MVT::i64, Expand); + setOperationAction(ISD::UREM, MVT::i64, Expand); + + // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM. + setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); + setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); + setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); + setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); + setOperationAction(ISD::UDIVREM, MVT::i32, Expand); + setOperationAction(ISD::SDIVREM, MVT::i32, Expand); + setOperationAction(ISD::UDIVREM, MVT::i64, Expand); + setOperationAction(ISD::SDIVREM, MVT::i64, Expand); + + // We don't support sin/cos/sqrt/fmod/pow + setOperationAction(ISD::FSIN , MVT::f64, Expand); + setOperationAction(ISD::FCOS , MVT::f64, Expand); + setOperationAction(ISD::FREM , MVT::f64, Expand); + setOperationAction(ISD::FPOW , MVT::f64, Expand); + setOperationAction(ISD::FSIN , MVT::f32, Expand); + setOperationAction(ISD::FCOS , MVT::f32, Expand); + setOperationAction(ISD::FREM , MVT::f32, Expand); + setOperationAction(ISD::FPOW , MVT::f32, Expand); + + setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); + + // If we're enabling GP optimizations, use hardware square root + if (!TM.getSubtarget().hasFSQRT()) { + setOperationAction(ISD::FSQRT, MVT::f64, Expand); + setOperationAction(ISD::FSQRT, MVT::f32, Expand); + } + + setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); + setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); + + // PowerPC does not have BSWAP, CTPOP or CTTZ + setOperationAction(ISD::BSWAP, MVT::i32 , Expand); + setOperationAction(ISD::CTPOP, MVT::i32 , Expand); + setOperationAction(ISD::CTTZ , MVT::i32 , Expand); + setOperationAction(ISD::BSWAP, MVT::i64 , Expand); + setOperationAction(ISD::CTPOP, MVT::i64 , Expand); + setOperationAction(ISD::CTTZ , MVT::i64 , Expand); + + // PowerPC does not have ROTR + setOperationAction(ISD::ROTR, MVT::i32 , Expand); + setOperationAction(ISD::ROTR, MVT::i64 , Expand); + + // PowerPC does not have Select + setOperationAction(ISD::SELECT, MVT::i32, Expand); + setOperationAction(ISD::SELECT, MVT::i64, Expand); + setOperationAction(ISD::SELECT, MVT::f32, Expand); + setOperationAction(ISD::SELECT, MVT::f64, Expand); + + // PowerPC wants to turn select_cc of FP into fsel when possible. + setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); + setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); + + // PowerPC wants to optimize integer setcc a bit + setOperationAction(ISD::SETCC, MVT::i32, Custom); + + // PowerPC does not have BRCOND which requires SetCC + setOperationAction(ISD::BRCOND, MVT::Other, Expand); + + setOperationAction(ISD::BR_JT, MVT::Other, Expand); + + // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores. + setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); + + // PowerPC does not have [U|S]INT_TO_FP + setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand); + setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand); + + setOperationAction(ISD::BIT_CONVERT, MVT::f32, Expand); + setOperationAction(ISD::BIT_CONVERT, MVT::i32, Expand); + setOperationAction(ISD::BIT_CONVERT, MVT::i64, Expand); + setOperationAction(ISD::BIT_CONVERT, MVT::f64, Expand); + + // We cannot sextinreg(i1). Expand to shifts. + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); + + // Support label based line numbers. + setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand); + setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand); + + setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand); + setOperationAction(ISD::EHSELECTION, MVT::i64, Expand); + setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); + setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); + + + // We want to legalize GlobalAddress and ConstantPool nodes into the + // appropriate instructions to materialize the address. + setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); + setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); + setOperationAction(ISD::ConstantPool, MVT::i32, Custom); + setOperationAction(ISD::JumpTable, MVT::i32, Custom); + setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); + setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom); + setOperationAction(ISD::ConstantPool, MVT::i64, Custom); + setOperationAction(ISD::JumpTable, MVT::i64, Custom); + + // RET must be custom lowered, to meet ABI requirements. + setOperationAction(ISD::RET , MVT::Other, Custom); + + // TRAP is legal. + setOperationAction(ISD::TRAP, MVT::Other, Legal); + + // TRAMPOLINE is custom lowered. + setOperationAction(ISD::TRAMPOLINE, MVT::Other, Custom); + + // VASTART needs to be custom lowered to use the VarArgsFrameIndex + setOperationAction(ISD::VASTART , MVT::Other, Custom); + + // VAARG is custom lowered with ELF 32 ABI + if (TM.getSubtarget().isELF32_ABI()) + setOperationAction(ISD::VAARG, MVT::Other, Custom); + else + setOperationAction(ISD::VAARG, MVT::Other, Expand); + + // Use the default implementation. + setOperationAction(ISD::VACOPY , MVT::Other, Expand); + setOperationAction(ISD::VAEND , MVT::Other, Expand); + setOperationAction(ISD::STACKSAVE , MVT::Other, Expand); + setOperationAction(ISD::STACKRESTORE , MVT::Other, Custom); + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom); + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Custom); + + // We want to custom lower some of our intrinsics. + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); + + // Comparisons that require checking two conditions. + setCondCodeAction(ISD::SETULT, MVT::f32, Expand); + setCondCodeAction(ISD::SETULT, MVT::f64, Expand); + setCondCodeAction(ISD::SETUGT, MVT::f32, Expand); + setCondCodeAction(ISD::SETUGT, MVT::f64, Expand); + setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand); + setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand); + setCondCodeAction(ISD::SETOGE, MVT::f32, Expand); + setCondCodeAction(ISD::SETOGE, MVT::f64, Expand); + setCondCodeAction(ISD::SETOLE, MVT::f32, Expand); + setCondCodeAction(ISD::SETOLE, MVT::f64, Expand); + setCondCodeAction(ISD::SETONE, MVT::f32, Expand); + setCondCodeAction(ISD::SETONE, MVT::f64, Expand); + + if (TM.getSubtarget().has64BitSupport()) { + // They also have instructions for converting between i64 and fp. + setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); + setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); + setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); + setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); + + // FIXME: disable this lowered code. This generates 64-bit register values, + // and we don't model the fact that the top part is clobbered by calls. We + // need to flag these together so that the value isn't live across a call. + //setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); + + // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT + setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote); + } else { + // PowerPC does not have FP_TO_UINT on 32-bit implementations. + setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); + } + + if (TM.getSubtarget().use64BitRegs()) { + // 64-bit PowerPC implementations can support i64 types directly + addRegisterClass(MVT::i64, PPC::G8RCRegisterClass); + // BUILD_PAIR can't be handled natively, and should be expanded to shl/or + setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand); + // 64-bit PowerPC wants to expand i128 shifts itself. + setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom); + setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom); + setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom); + } else { + // 32-bit PowerPC wants to expand i64 shifts itself. + setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); + setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); + setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); + } + + if (TM.getSubtarget().hasAltivec()) { + // First set operation action for all vector types to expand. Then we + // will selectively turn on ones that can be effectively codegen'd. + for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; + i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) { + MVT VT = (MVT::SimpleValueType)i; + + // add/sub are legal for all supported vector VT's. + setOperationAction(ISD::ADD , VT, Legal); + setOperationAction(ISD::SUB , VT, Legal); + + // We promote all shuffles to v16i8. + setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote); + AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8); + + // We promote all non-typed operations to v4i32. + setOperationAction(ISD::AND , VT, Promote); + AddPromotedToType (ISD::AND , VT, MVT::v4i32); + setOperationAction(ISD::OR , VT, Promote); + AddPromotedToType (ISD::OR , VT, MVT::v4i32); + setOperationAction(ISD::XOR , VT, Promote); + AddPromotedToType (ISD::XOR , VT, MVT::v4i32); + setOperationAction(ISD::LOAD , VT, Promote); + AddPromotedToType (ISD::LOAD , VT, MVT::v4i32); + setOperationAction(ISD::SELECT, VT, Promote); + AddPromotedToType (ISD::SELECT, VT, MVT::v4i32); + setOperationAction(ISD::STORE, VT, Promote); + AddPromotedToType (ISD::STORE, VT, MVT::v4i32); + + // No other operations are legal. + setOperationAction(ISD::MUL , VT, Expand); + setOperationAction(ISD::SDIV, VT, Expand); + setOperationAction(ISD::SREM, VT, Expand); + setOperationAction(ISD::UDIV, VT, Expand); + setOperationAction(ISD::UREM, VT, Expand); + setOperationAction(ISD::FDIV, VT, Expand); + setOperationAction(ISD::FNEG, VT, Expand); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand); + setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand); + setOperationAction(ISD::BUILD_VECTOR, VT, Expand); + setOperationAction(ISD::UMUL_LOHI, VT, Expand); + setOperationAction(ISD::SMUL_LOHI, VT, Expand); + setOperationAction(ISD::UDIVREM, VT, Expand); + setOperationAction(ISD::SDIVREM, VT, Expand); + setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand); + setOperationAction(ISD::FPOW, VT, Expand); + setOperationAction(ISD::CTPOP, VT, Expand); + setOperationAction(ISD::CTLZ, VT, Expand); + setOperationAction(ISD::CTTZ, VT, Expand); + } + + // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle + // with merges, splats, etc. + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom); + + setOperationAction(ISD::AND , MVT::v4i32, Legal); + setOperationAction(ISD::OR , MVT::v4i32, Legal); + setOperationAction(ISD::XOR , MVT::v4i32, Legal); + setOperationAction(ISD::LOAD , MVT::v4i32, Legal); + setOperationAction(ISD::SELECT, MVT::v4i32, Expand); + setOperationAction(ISD::STORE , MVT::v4i32, Legal); + + addRegisterClass(MVT::v4f32, PPC::VRRCRegisterClass); + addRegisterClass(MVT::v4i32, PPC::VRRCRegisterClass); + addRegisterClass(MVT::v8i16, PPC::VRRCRegisterClass); + addRegisterClass(MVT::v16i8, PPC::VRRCRegisterClass); + + setOperationAction(ISD::MUL, MVT::v4f32, Legal); + setOperationAction(ISD::MUL, MVT::v4i32, Custom); + setOperationAction(ISD::MUL, MVT::v8i16, Custom); + setOperationAction(ISD::MUL, MVT::v16i8, Custom); + + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom); + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom); + + setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom); + setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom); + setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom); + setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); + } + + setShiftAmountType(MVT::i32); + setBooleanContents(ZeroOrOneBooleanContent); + + if (TM.getSubtarget().isPPC64()) { + setStackPointerRegisterToSaveRestore(PPC::X1); + setExceptionPointerRegister(PPC::X3); + setExceptionSelectorRegister(PPC::X4); + } else { + setStackPointerRegisterToSaveRestore(PPC::R1); + setExceptionPointerRegister(PPC::R3); + setExceptionSelectorRegister(PPC::R4); + } + + // We have target-specific dag combine patterns for the following nodes: + setTargetDAGCombine(ISD::SINT_TO_FP); + setTargetDAGCombine(ISD::STORE); + setTargetDAGCombine(ISD::BR_CC); + setTargetDAGCombine(ISD::BSWAP); + + // Darwin long double math library functions have $LDBL128 appended. + if (TM.getSubtarget().isDarwin()) { + setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128"); + setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128"); + setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128"); + setLibcallName(RTLIB::SIN_PPCF128, "sinl$LDBL128"); + setLibcallName(RTLIB::SQRT_PPCF128, "sqrtl$LDBL128"); + setLibcallName(RTLIB::LOG_PPCF128, "logl$LDBL128"); + setLibcallName(RTLIB::LOG2_PPCF128, "log2l$LDBL128"); + setLibcallName(RTLIB::LOG10_PPCF128, "log10l$LDBL128"); + setLibcallName(RTLIB::EXP_PPCF128, "expl$LDBL128"); + setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128"); + } + + computeRegisterProperties(); +} + +/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate +/// function arguments in the caller parameter area. +unsigned PPCTargetLowering::getByValTypeAlignment(const Type *Ty) const { + TargetMachine &TM = getTargetMachine(); + // Darwin passes everything on 4 byte boundary. + if (TM.getSubtarget().isDarwin()) + return 4; + // FIXME Elf TBD + return 4; +} + +const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { + switch (Opcode) { + default: return 0; + case PPCISD::FSEL: return "PPCISD::FSEL"; + case PPCISD::FCFID: return "PPCISD::FCFID"; + case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ"; + case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ"; + case PPCISD::STFIWX: return "PPCISD::STFIWX"; + case PPCISD::VMADDFP: return "PPCISD::VMADDFP"; + case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP"; + case PPCISD::VPERM: return "PPCISD::VPERM"; + case PPCISD::Hi: return "PPCISD::Hi"; + case PPCISD::Lo: return "PPCISD::Lo"; + case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC"; + case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg"; + case PPCISD::SRL: return "PPCISD::SRL"; + case PPCISD::SRA: return "PPCISD::SRA"; + case PPCISD::SHL: return "PPCISD::SHL"; + case PPCISD::EXTSW_32: return "PPCISD::EXTSW_32"; + case PPCISD::STD_32: return "PPCISD::STD_32"; + case PPCISD::CALL_ELF: return "PPCISD::CALL_ELF"; + case PPCISD::CALL_Macho: return "PPCISD::CALL_Macho"; + case PPCISD::MTCTR: return "PPCISD::MTCTR"; + case PPCISD::BCTRL_Macho: return "PPCISD::BCTRL_Macho"; + case PPCISD::BCTRL_ELF: return "PPCISD::BCTRL_ELF"; + case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG"; + case PPCISD::MFCR: return "PPCISD::MFCR"; + case PPCISD::VCMP: return "PPCISD::VCMP"; + case PPCISD::VCMPo: return "PPCISD::VCMPo"; + case PPCISD::LBRX: return "PPCISD::LBRX"; + case PPCISD::STBRX: return "PPCISD::STBRX"; + case PPCISD::LARX: return "PPCISD::LARX"; + case PPCISD::STCX: return "PPCISD::STCX"; + case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH"; + case PPCISD::MFFS: return "PPCISD::MFFS"; + case PPCISD::MTFSB0: return "PPCISD::MTFSB0"; + case PPCISD::MTFSB1: return "PPCISD::MTFSB1"; + case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ"; + case PPCISD::MTFSF: return "PPCISD::MTFSF"; + case PPCISD::TAILCALL: return "PPCISD::TAILCALL"; + case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN"; + } +} + + +MVT PPCTargetLowering::getSetCCResultType(MVT VT) const { + return MVT::i32; +} + + +//===----------------------------------------------------------------------===// +// Node matching predicates, for use by the tblgen matching code. +//===----------------------------------------------------------------------===// + +/// isFloatingPointZero - Return true if this is 0.0 or -0.0. +static bool isFloatingPointZero(SDValue Op) { + if (ConstantFPSDNode *CFP = dyn_cast(Op)) + return CFP->getValueAPF().isZero(); + else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) { + // Maybe this has already been legalized into the constant pool? + if (ConstantPoolSDNode *CP = dyn_cast(Op.getOperand(1))) + if (ConstantFP *CFP = dyn_cast(CP->getConstVal())) + return CFP->getValueAPF().isZero(); + } + return false; +} + +/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return +/// true if Op is undef or if it matches the specified value. +static bool isConstantOrUndef(int Op, int Val) { + return Op < 0 || Op == Val; +} + +/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a +/// VPKUHUM instruction. +bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) { + if (!isUnary) { + for (unsigned i = 0; i != 16; ++i) + if (!isConstantOrUndef(N->getMaskElt(i), i*2+1)) + return false; + } else { + for (unsigned i = 0; i != 8; ++i) + if (!isConstantOrUndef(N->getMaskElt(i), i*2+1) || + !isConstantOrUndef(N->getMaskElt(i+8), i*2+1)) + return false; + } + return true; +} + +/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a +/// VPKUWUM instruction. +bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) { + if (!isUnary) { + for (unsigned i = 0; i != 16; i += 2) + if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) || + !isConstantOrUndef(N->getMaskElt(i+1), i*2+3)) + return false; + } else { + for (unsigned i = 0; i != 8; i += 2) + if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) || + !isConstantOrUndef(N->getMaskElt(i+1), i*2+3) || + !isConstantOrUndef(N->getMaskElt(i+8), i*2+2) || + !isConstantOrUndef(N->getMaskElt(i+9), i*2+3)) + return false; + } + return true; +} + +/// isVMerge - Common function, used to match vmrg* shuffles. +/// +static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, + unsigned LHSStart, unsigned RHSStart) { + assert(N->getValueType(0) == MVT::v16i8 && + "PPC only supports shuffles by bytes!"); + assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) && + "Unsupported merge size!"); + + for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units + for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit + if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j), + LHSStart+j+i*UnitSize) || + !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j), + RHSStart+j+i*UnitSize)) + return false; + } + return true; +} + +/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for +/// a VRGL* instruction with the specified unit size (1,2 or 4 bytes). +bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, + bool isUnary) { + if (!isUnary) + return isVMerge(N, UnitSize, 8, 24); + return isVMerge(N, UnitSize, 8, 8); +} + +/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for +/// a VRGH* instruction with the specified unit size (1,2 or 4 bytes). +bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, + bool isUnary) { + if (!isUnary) + return isVMerge(N, UnitSize, 0, 16); + return isVMerge(N, UnitSize, 0, 0); +} + + +/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift +/// amount, otherwise return -1. +int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary) { + assert(N->getValueType(0) == MVT::v16i8 && + "PPC only supports shuffles by bytes!"); + + ShuffleVectorSDNode *SVOp = cast(N); + + // Find the first non-undef value in the shuffle mask. + unsigned i; + for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i) + /*search*/; + + if (i == 16) return -1; // all undef. + + // Otherwise, check to see if the rest of the elements are consecutively + // numbered from this value. + unsigned ShiftAmt = SVOp->getMaskElt(i); + if (ShiftAmt < i) return -1; + ShiftAmt -= i; + + if (!isUnary) { + // Check the rest of the elements to see if they are consecutive. + for (++i; i != 16; ++i) + if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i)) + return -1; + } else { + // Check the rest of the elements to see if they are consecutive. + for (++i; i != 16; ++i) + if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15)) + return -1; + } + return ShiftAmt; +} + +/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand +/// specifies a splat of a single element that is suitable for input to +/// VSPLTB/VSPLTH/VSPLTW. +bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) { + assert(N->getValueType(0) == MVT::v16i8 && + (EltSize == 1 || EltSize == 2 || EltSize == 4)); + + // This is a splat operation if each element of the permute is the same, and + // if the value doesn't reference the second vector. + unsigned ElementBase = N->getMaskElt(0); + + // FIXME: Handle UNDEF elements too! + if (ElementBase >= 16) + return false; + + // Check that the indices are consecutive, in the case of a multi-byte element + // splatted with a v16i8 mask. + for (unsigned i = 1; i != EltSize; ++i) + if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase)) + return false; + + for (unsigned i = EltSize, e = 16; i != e; i += EltSize) { + if (N->getMaskElt(i) < 0) continue; + for (unsigned j = 0; j != EltSize; ++j) + if (N->getMaskElt(i+j) != N->getMaskElt(j)) + return false; + } + return true; +} + +/// isAllNegativeZeroVector - Returns true if all elements of build_vector +/// are -0.0. +bool PPC::isAllNegativeZeroVector(SDNode *N) { + BuildVectorSDNode *BV = cast(N); + + APInt APVal, APUndef; + unsigned BitSize; + bool HasAnyUndefs; + + if (BV->isConstantSplat(APVal, APUndef, BitSize, HasAnyUndefs, 32)) + if (ConstantFPSDNode *CFP = dyn_cast(N->getOperand(0))) + return CFP->getValueAPF().isNegZero(); + + return false; +} + +/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the +/// specified isSplatShuffleMask VECTOR_SHUFFLE mask. +unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize) { + ShuffleVectorSDNode *SVOp = cast(N); + assert(isSplatShuffleMask(SVOp, EltSize)); + return SVOp->getMaskElt(0) / EltSize; +} + +/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed +/// by using a vspltis[bhw] instruction of the specified element size, return +/// the constant being splatted. The ByteSize field indicates the number of +/// bytes of each element [124] -> [bhw]. +SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { + SDValue OpVal(0, 0); + + // If ByteSize of the splat is bigger than the element size of the + // build_vector, then we have a case where we are checking for a splat where + // multiple elements of the buildvector are folded together into a single + // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8). + unsigned EltSize = 16/N->getNumOperands(); + if (EltSize < ByteSize) { + unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval. + SDValue UniquedVals[4]; + assert(Multiple > 1 && Multiple <= 4 && "How can this happen?"); + + // See if all of the elements in the buildvector agree across. + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; + // If the element isn't a constant, bail fully out. + if (!isa(N->getOperand(i))) return SDValue(); + + + if (UniquedVals[i&(Multiple-1)].getNode() == 0) + UniquedVals[i&(Multiple-1)] = N->getOperand(i); + else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i)) + return SDValue(); // no match. + } + + // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains + // either constant or undef values that are identical for each chunk. See + // if these chunks can form into a larger vspltis*. + + // Check to see if all of the leading entries are either 0 or -1. If + // neither, then this won't fit into the immediate field. + bool LeadingZero = true; + bool LeadingOnes = true; + for (unsigned i = 0; i != Multiple-1; ++i) { + if (UniquedVals[i].getNode() == 0) continue; // Must have been undefs. + + LeadingZero &= cast(UniquedVals[i])->isNullValue(); + LeadingOnes &= cast(UniquedVals[i])->isAllOnesValue(); + } + // Finally, check the least significant entry. + if (LeadingZero) { + if (UniquedVals[Multiple-1].getNode() == 0) + return DAG.getTargetConstant(0, MVT::i32); // 0,0,0,undef + int Val = cast(UniquedVals[Multiple-1])->getZExtValue(); + if (Val < 16) + return DAG.getTargetConstant(Val, MVT::i32); // 0,0,0,4 -> vspltisw(4) + } + if (LeadingOnes) { + if (UniquedVals[Multiple-1].getNode() == 0) + return DAG.getTargetConstant(~0U, MVT::i32); // -1,-1,-1,undef + int Val =cast(UniquedVals[Multiple-1])->getSExtValue(); + if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2) + return DAG.getTargetConstant(Val, MVT::i32); + } + + return SDValue(); + } + + // Check to see if this buildvec has a single non-undef value in its elements. + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; + if (OpVal.getNode() == 0) + OpVal = N->getOperand(i); + else if (OpVal != N->getOperand(i)) + return SDValue(); + } + + if (OpVal.getNode() == 0) return SDValue(); // All UNDEF: use implicit def. + + unsigned ValSizeInBytes = EltSize; + uint64_t Value = 0; + if (ConstantSDNode *CN = dyn_cast(OpVal)) { + Value = CN->getZExtValue(); + } else if (ConstantFPSDNode *CN = dyn_cast(OpVal)) { + assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!"); + Value = FloatToBits(CN->getValueAPF().convertToFloat()); + } + + // If the splat value is larger than the element value, then we can never do + // this splat. The only case that we could fit the replicated bits into our + // immediate field for would be zero, and we prefer to use vxor for it. + if (ValSizeInBytes < ByteSize) return SDValue(); + + // If the element value is larger than the splat value, cut it in half and + // check to see if the two halves are equal. Continue doing this until we + // get to ByteSize. This allows us to handle 0x01010101 as 0x01. + while (ValSizeInBytes > ByteSize) { + ValSizeInBytes >>= 1; + + // If the top half equals the bottom half, we're still ok. + if (((Value >> (ValSizeInBytes*8)) & ((1 << (8*ValSizeInBytes))-1)) != + (Value & ((1 << (8*ValSizeInBytes))-1))) + return SDValue(); + } + + // Properly sign extend the value. + int ShAmt = (4-ByteSize)*8; + int MaskVal = ((int)Value << ShAmt) >> ShAmt; + + // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros. + if (MaskVal == 0) return SDValue(); + + // Finally, if this value fits in a 5 bit sext field, return it + if (((MaskVal << (32-5)) >> (32-5)) == MaskVal) + return DAG.getTargetConstant(MaskVal, MVT::i32); + return SDValue(); +} + +//===----------------------------------------------------------------------===// +// Addressing Mode Selection +//===----------------------------------------------------------------------===// + +/// isIntS16Immediate - This method tests to see if the node is either a 32-bit +/// or 64-bit immediate, and if the value can be accurately represented as a +/// sign extension from a 16-bit value. If so, this returns true and the +/// immediate. +static bool isIntS16Immediate(SDNode *N, short &Imm) { + if (N->getOpcode() != ISD::Constant) + return false; + + Imm = (short)cast(N)->getZExtValue(); + if (N->getValueType(0) == MVT::i32) + return Imm == (int32_t)cast(N)->getZExtValue(); + else + return Imm == (int64_t)cast(N)->getZExtValue(); +} +static bool isIntS16Immediate(SDValue Op, short &Imm) { + return isIntS16Immediate(Op.getNode(), Imm); +} + + +/// SelectAddressRegReg - Given the specified addressed, check to see if it +/// can be represented as an indexed [r+r] operation. Returns false if it +/// can be more efficiently represented with [r+imm]. +bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base, + SDValue &Index, + SelectionDAG &DAG) const { + short imm = 0; + if (N.getOpcode() == ISD::ADD) { + if (isIntS16Immediate(N.getOperand(1), imm)) + return false; // r+i + if (N.getOperand(1).getOpcode() == PPCISD::Lo) + return false; // r+i + + Base = N.getOperand(0); + Index = N.getOperand(1); + return true; + } else if (N.getOpcode() == ISD::OR) { + if (isIntS16Immediate(N.getOperand(1), imm)) + return false; // r+i can fold it if we can. + + // If this is an or of disjoint bitfields, we can codegen this as an add + // (for better address arithmetic) if the LHS and RHS of the OR are provably + // disjoint. + APInt LHSKnownZero, LHSKnownOne; + APInt RHSKnownZero, RHSKnownOne; + DAG.ComputeMaskedBits(N.getOperand(0), + APInt::getAllOnesValue(N.getOperand(0) + .getValueSizeInBits()), + LHSKnownZero, LHSKnownOne); + + if (LHSKnownZero.getBoolValue()) { + DAG.ComputeMaskedBits(N.getOperand(1), + APInt::getAllOnesValue(N.getOperand(1) + .getValueSizeInBits()), + RHSKnownZero, RHSKnownOne); + // If all of the bits are known zero on the LHS or RHS, the add won't + // carry. + if (~(LHSKnownZero | RHSKnownZero) == 0) { + Base = N.getOperand(0); + Index = N.getOperand(1); + return true; + } + } + } + + return false; +} + +/// Returns true if the address N can be represented by a base register plus +/// a signed 16-bit displacement [r+imm], and if it is not better +/// represented as reg+reg. +bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, + SDValue &Base, + SelectionDAG &DAG) const { + // FIXME dl should come from parent load or store, not from address + DebugLoc dl = N.getDebugLoc(); + // If this can be more profitably realized as r+r, fail. + if (SelectAddressRegReg(N, Disp, Base, DAG)) + return false; + + if (N.getOpcode() == ISD::ADD) { + short imm = 0; + if (isIntS16Immediate(N.getOperand(1), imm)) { + Disp = DAG.getTargetConstant((int)imm & 0xFFFF, MVT::i32); + if (FrameIndexSDNode *FI = dyn_cast(N.getOperand(0))) { + Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); + } else { + Base = N.getOperand(0); + } + return true; // [r+i] + } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) { + // Match LOAD (ADD (X, Lo(G))). + assert(!cast(N.getOperand(1).getOperand(1))->getZExtValue() + && "Cannot handle constant offsets yet!"); + Disp = N.getOperand(1).getOperand(0); // The global address. + assert(Disp.getOpcode() == ISD::TargetGlobalAddress || + Disp.getOpcode() == ISD::TargetConstantPool || + Disp.getOpcode() == ISD::TargetJumpTable); + Base = N.getOperand(0); + return true; // [&g+r] + } + } else if (N.getOpcode() == ISD::OR) { + short imm = 0; + if (isIntS16Immediate(N.getOperand(1), imm)) { + // If this is an or of disjoint bitfields, we can codegen this as an add + // (for better address arithmetic) if the LHS and RHS of the OR are + // provably disjoint. + APInt LHSKnownZero, LHSKnownOne; + DAG.ComputeMaskedBits(N.getOperand(0), + APInt::getAllOnesValue(N.getOperand(0) + .getValueSizeInBits()), + LHSKnownZero, LHSKnownOne); + + if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) { + // If all of the bits are known zero on the LHS or RHS, the add won't + // carry. + Base = N.getOperand(0); + Disp = DAG.getTargetConstant((int)imm & 0xFFFF, MVT::i32); + return true; + } + } + } else if (ConstantSDNode *CN = dyn_cast(N)) { + // Loading from a constant address. + + // If this address fits entirely in a 16-bit sext immediate field, codegen + // this as "d, 0" + short Imm; + if (isIntS16Immediate(CN, Imm)) { + Disp = DAG.getTargetConstant(Imm, CN->getValueType(0)); + Base = DAG.getRegister(PPC::R0, CN->getValueType(0)); + return true; + } + + // Handle 32-bit sext immediates with LIS + addr mode. + if (CN->getValueType(0) == MVT::i32 || + (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) { + int Addr = (int)CN->getZExtValue(); + + // Otherwise, break this down into an LIS + disp. + Disp = DAG.getTargetConstant((short)Addr, MVT::i32); + + Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, MVT::i32); + unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8; + Base = SDValue(DAG.getTargetNode(Opc, dl, CN->getValueType(0), Base), 0); + return true; + } + } + + Disp = DAG.getTargetConstant(0, getPointerTy()); + if (FrameIndexSDNode *FI = dyn_cast(N)) + Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); + else + Base = N; + return true; // [r+0] +} + +/// SelectAddressRegRegOnly - Given the specified addressed, force it to be +/// represented as an indexed [r+r] operation. +bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base, + SDValue &Index, + SelectionDAG &DAG) const { + // Check to see if we can easily represent this as an [r+r] address. This + // will fail if it thinks that the address is more profitably represented as + // reg+imm, e.g. where imm = 0. + if (SelectAddressRegReg(N, Base, Index, DAG)) + return true; + + // If the operand is an addition, always emit this as [r+r], since this is + // better (for code size, and execution, as the memop does the add for free) + // than emitting an explicit add. + if (N.getOpcode() == ISD::ADD) { + Base = N.getOperand(0); + Index = N.getOperand(1); + return true; + } + + // Otherwise, do it the hard way, using R0 as the base register. + Base = DAG.getRegister(PPC::R0, N.getValueType()); + Index = N; + return true; +} + +/// SelectAddressRegImmShift - Returns true if the address N can be +/// represented by a base register plus a signed 14-bit displacement +/// [r+imm*4]. Suitable for use by STD and friends. +bool PPCTargetLowering::SelectAddressRegImmShift(SDValue N, SDValue &Disp, + SDValue &Base, + SelectionDAG &DAG) const { + // FIXME dl should come from the parent load or store, not the address + DebugLoc dl = N.getDebugLoc(); + // If this can be more profitably realized as r+r, fail. + if (SelectAddressRegReg(N, Disp, Base, DAG)) + return false; + + if (N.getOpcode() == ISD::ADD) { + short imm = 0; + if (isIntS16Immediate(N.getOperand(1), imm) && (imm & 3) == 0) { + Disp = DAG.getTargetConstant(((int)imm & 0xFFFF) >> 2, MVT::i32); + if (FrameIndexSDNode *FI = dyn_cast(N.getOperand(0))) { + Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); + } else { + Base = N.getOperand(0); + } + return true; // [r+i] + } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) { + // Match LOAD (ADD (X, Lo(G))). + assert(!cast(N.getOperand(1).getOperand(1))->getZExtValue() + && "Cannot handle constant offsets yet!"); + Disp = N.getOperand(1).getOperand(0); // The global address. + assert(Disp.getOpcode() == ISD::TargetGlobalAddress || + Disp.getOpcode() == ISD::TargetConstantPool || + Disp.getOpcode() == ISD::TargetJumpTable); + Base = N.getOperand(0); + return true; // [&g+r] + } + } else if (N.getOpcode() == ISD::OR) { + short imm = 0; + if (isIntS16Immediate(N.getOperand(1), imm) && (imm & 3) == 0) { + // If this is an or of disjoint bitfields, we can codegen this as an add + // (for better address arithmetic) if the LHS and RHS of the OR are + // provably disjoint. + APInt LHSKnownZero, LHSKnownOne; + DAG.ComputeMaskedBits(N.getOperand(0), + APInt::getAllOnesValue(N.getOperand(0) + .getValueSizeInBits()), + LHSKnownZero, LHSKnownOne); + if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) { + // If all of the bits are known zero on the LHS or RHS, the add won't + // carry. + Base = N.getOperand(0); + Disp = DAG.getTargetConstant(((int)imm & 0xFFFF) >> 2, MVT::i32); + return true; + } + } + } else if (ConstantSDNode *CN = dyn_cast(N)) { + // Loading from a constant address. Verify low two bits are clear. + if ((CN->getZExtValue() & 3) == 0) { + // If this address fits entirely in a 14-bit sext immediate field, codegen + // this as "d, 0" + short Imm; + if (isIntS16Immediate(CN, Imm)) { + Disp = DAG.getTargetConstant((unsigned short)Imm >> 2, getPointerTy()); + Base = DAG.getRegister(PPC::R0, CN->getValueType(0)); + return true; + } + + // Fold the low-part of 32-bit absolute addresses into addr mode. + if (CN->getValueType(0) == MVT::i32 || + (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) { + int Addr = (int)CN->getZExtValue(); + + // Otherwise, break this down into an LIS + disp. + Disp = DAG.getTargetConstant((short)Addr >> 2, MVT::i32); + Base = DAG.getTargetConstant((Addr-(signed short)Addr) >> 16, MVT::i32); + unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8; + Base = SDValue(DAG.getTargetNode(Opc, dl, CN->getValueType(0), Base),0); + return true; + } + } + } + + Disp = DAG.getTargetConstant(0, getPointerTy()); + if (FrameIndexSDNode *FI = dyn_cast(N)) + Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); + else + Base = N; + return true; // [r+0] +} + + +/// getPreIndexedAddressParts - returns true by value, base pointer and +/// offset pointer and addressing mode by reference if the node's address +/// can be legally represented as pre-indexed load / store address. +bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, + SDValue &Offset, + ISD::MemIndexedMode &AM, + SelectionDAG &DAG) const { + // Disabled by default for now. + if (!EnablePPCPreinc) return false; + + SDValue Ptr; + MVT VT; + if (LoadSDNode *LD = dyn_cast(N)) { + Ptr = LD->getBasePtr(); + VT = LD->getMemoryVT(); + + } else if (StoreSDNode *ST = dyn_cast(N)) { + ST = ST; + Ptr = ST->getBasePtr(); + VT = ST->getMemoryVT(); + } else + return false; + + // PowerPC doesn't have preinc load/store instructions for vectors. + if (VT.isVector()) + return false; + + // TODO: Check reg+reg first. + + // LDU/STU use reg+imm*4, others use reg+imm. + if (VT != MVT::i64) { + // reg + imm + if (!SelectAddressRegImm(Ptr, Offset, Base, DAG)) + return false; + } else { + // reg + imm * 4. + if (!SelectAddressRegImmShift(Ptr, Offset, Base, DAG)) + return false; + } + + if (LoadSDNode *LD = dyn_cast(N)) { + // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of + // sext i32 to i64 when addr mode is r+i. + if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 && + LD->getExtensionType() == ISD::SEXTLOAD && + isa(Offset)) + return false; + } + + AM = ISD::PRE_INC; + return true; +} + +//===----------------------------------------------------------------------===// +// LowerOperation implementation +//===----------------------------------------------------------------------===// + +SDValue PPCTargetLowering::LowerConstantPool(SDValue Op, + SelectionDAG &DAG) { + MVT PtrVT = Op.getValueType(); + ConstantPoolSDNode *CP = cast(Op); + Constant *C = CP->getConstVal(); + SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment()); + SDValue Zero = DAG.getConstant(0, PtrVT); + // FIXME there isn't really any debug info here + DebugLoc dl = Op.getDebugLoc(); + + const TargetMachine &TM = DAG.getTarget(); + + SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, CPI, Zero); + SDValue Lo = DAG.getNode(PPCISD::Lo, dl, PtrVT, CPI, Zero); + + // If this is a non-darwin platform, we don't support non-static relo models + // yet. + if (TM.getRelocationModel() == Reloc::Static || + !TM.getSubtarget().isDarwin()) { + // Generate non-pic code that has direct accesses to the constant pool. + // The address of the global is just (hi(&g)+lo(&g)). + return DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Lo); + } + + if (TM.getRelocationModel() == Reloc::PIC_) { + // With PIC, the first instruction is actually "GR+hi(&G)". + Hi = DAG.getNode(ISD::ADD, dl, PtrVT, + DAG.getNode(PPCISD::GlobalBaseReg, + DebugLoc::getUnknownLoc(), PtrVT), Hi); + } + + Lo = DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Lo); + return Lo; +} + +SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) { + MVT PtrVT = Op.getValueType(); + JumpTableSDNode *JT = cast(Op); + SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); + SDValue Zero = DAG.getConstant(0, PtrVT); + // FIXME there isn't really any debug loc here + DebugLoc dl = Op.getDebugLoc(); + + const TargetMachine &TM = DAG.getTarget(); + + SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, JTI, Zero); + SDValue Lo = DAG.getNode(PPCISD::Lo, dl, PtrVT, JTI, Zero); + + // If this is a non-darwin platform, we don't support non-static relo models + // yet. + if (TM.getRelocationModel() == Reloc::Static || + !TM.getSubtarget().isDarwin()) { + // Generate non-pic code that has direct accesses to the constant pool. + // The address of the global is just (hi(&g)+lo(&g)). + return DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Lo); + } + + if (TM.getRelocationModel() == Reloc::PIC_) { + // With PIC, the first instruction is actually "GR+hi(&G)". + Hi = DAG.getNode(ISD::ADD, dl, PtrVT, + DAG.getNode(PPCISD::GlobalBaseReg, + DebugLoc::getUnknownLoc(), PtrVT), Hi); + } + + Lo = DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Lo); + return Lo; +} + +SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op, + SelectionDAG &DAG) { + assert(0 && "TLS not implemented for PPC."); + return SDValue(); // Not reached +} + +SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op, + SelectionDAG &DAG) { + MVT PtrVT = Op.getValueType(); + GlobalAddressSDNode *GSDN = cast(Op); + GlobalValue *GV = GSDN->getGlobal(); + SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset()); + SDValue Zero = DAG.getConstant(0, PtrVT); + // FIXME there isn't really any debug info here + DebugLoc dl = GSDN->getDebugLoc(); + + const TargetMachine &TM = DAG.getTarget(); + + SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, GA, Zero); + SDValue Lo = DAG.getNode(PPCISD::Lo, dl, PtrVT, GA, Zero); + + // If this is a non-darwin platform, we don't support non-static relo models + // yet. + if (TM.getRelocationModel() == Reloc::Static || + !TM.getSubtarget().isDarwin()) { + // Generate non-pic code that has direct accesses to globals. + // The address of the global is just (hi(&g)+lo(&g)). + return DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Lo); + } + + if (TM.getRelocationModel() == Reloc::PIC_) { + // With PIC, the first instruction is actually "GR+hi(&G)". + Hi = DAG.getNode(ISD::ADD, dl, PtrVT, + DAG.getNode(PPCISD::GlobalBaseReg, + DebugLoc::getUnknownLoc(), PtrVT), Hi); + } + + Lo = DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Lo); + + if (!TM.getSubtarget().hasLazyResolverStub(GV)) + return Lo; + + // If the global is weak or external, we have to go through the lazy + // resolution stub. + return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Lo, NULL, 0); +} + +SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) { + ISD::CondCode CC = cast(Op.getOperand(2))->get(); + DebugLoc dl = Op.getDebugLoc(); + + // If we're comparing for equality to zero, expose the fact that this is + // implented as a ctlz/srl pair on ppc, so that the dag combiner can + // fold the new nodes. + if (ConstantSDNode *C = dyn_cast(Op.getOperand(1))) { + if (C->isNullValue() && CC == ISD::SETEQ) { + MVT VT = Op.getOperand(0).getValueType(); + SDValue Zext = Op.getOperand(0); + if (VT.bitsLT(MVT::i32)) { + VT = MVT::i32; + Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0)); + } + unsigned Log2b = Log2_32(VT.getSizeInBits()); + SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext); + SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz, + DAG.getConstant(Log2b, MVT::i32)); + return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc); + } + // Leave comparisons against 0 and -1 alone for now, since they're usually + // optimized. FIXME: revisit this when we can custom lower all setcc + // optimizations. + if (C->isAllOnesValue() || C->isNullValue()) + return SDValue(); + } + + // If we have an integer seteq/setne, turn it into a compare against zero + // by xor'ing the rhs with the lhs, which is faster than setting a + // condition register, reading it back out, and masking the correct bit. The + // normal approach here uses sub to do this instead of xor. Using xor exposes + // the result to other bit-twiddling opportunities. + MVT LHSVT = Op.getOperand(0).getValueType(); + if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) { + MVT VT = Op.getValueType(); + SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0), + Op.getOperand(1)); + return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, LHSVT), CC); + } + return SDValue(); +} + +SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG, + int VarArgsFrameIndex, + int VarArgsStackOffset, + unsigned VarArgsNumGPR, + unsigned VarArgsNumFPR, + const PPCSubtarget &Subtarget) { + + assert(0 && "VAARG in ELF32 ABI not implemented yet!"); + return SDValue(); // Not reached +} + +SDValue PPCTargetLowering::LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) { + SDValue Chain = Op.getOperand(0); + SDValue Trmp = Op.getOperand(1); // trampoline + SDValue FPtr = Op.getOperand(2); // nested function + SDValue Nest = Op.getOperand(3); // 'nest' parameter value + DebugLoc dl = Op.getDebugLoc(); + + MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + bool isPPC64 = (PtrVT == MVT::i64); + const Type *IntPtrTy = + DAG.getTargetLoweringInfo().getTargetData()->getIntPtrType(); + + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + + Entry.Ty = IntPtrTy; + Entry.Node = Trmp; Args.push_back(Entry); + + // TrampSize == (isPPC64 ? 48 : 40); + Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, + isPPC64 ? MVT::i64 : MVT::i32); + Args.push_back(Entry); + + Entry.Node = FPtr; Args.push_back(Entry); + Entry.Node = Nest; Args.push_back(Entry); + + // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg) + std::pair CallResult = + LowerCallTo(Chain, Op.getValueType().getTypeForMVT(), false, false, + false, false, CallingConv::C, false, + DAG.getExternalSymbol("__trampoline_setup", PtrVT), + Args, DAG, dl); + + SDValue Ops[] = + { CallResult.first, CallResult.second }; + + return DAG.getMergeValues(Ops, 2, dl); +} + +SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG, + int VarArgsFrameIndex, + int VarArgsStackOffset, + unsigned VarArgsNumGPR, + unsigned VarArgsNumFPR, + const PPCSubtarget &Subtarget) { + DebugLoc dl = Op.getDebugLoc(); + + if (Subtarget.isMachoABI()) { + // vastart just stores the address of the VarArgsFrameIndex slot into the + // memory location argument. + MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + SDValue FR = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT); + const Value *SV = cast(Op.getOperand(2))->getValue(); + return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0); + } + + // For ELF 32 ABI we follow the layout of the va_list struct. + // We suppose the given va_list is already allocated. + // + // typedef struct { + // char gpr; /* index into the array of 8 GPRs + // * stored in the register save area + // * gpr=0 corresponds to r3, + // * gpr=1 to r4, etc. + // */ + // char fpr; /* index into the array of 8 FPRs + // * stored in the register save area + // * fpr=0 corresponds to f1, + // * fpr=1 to f2, etc. + // */ + // char *overflow_arg_area; + // /* location on stack that holds + // * the next overflow argument + // */ + // char *reg_save_area; + // /* where r3:r10 and f1:f8 (if saved) + // * are stored + // */ + // } va_list[1]; + + + SDValue ArgGPR = DAG.getConstant(VarArgsNumGPR, MVT::i8); + SDValue ArgFPR = DAG.getConstant(VarArgsNumFPR, MVT::i8); + + + MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + + SDValue StackOffsetFI = DAG.getFrameIndex(VarArgsStackOffset, PtrVT); + SDValue FR = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT); + + uint64_t FrameOffset = PtrVT.getSizeInBits()/8; + SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, PtrVT); + + uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1; + SDValue ConstStackOffset = DAG.getConstant(StackOffset, PtrVT); + + uint64_t FPROffset = 1; + SDValue ConstFPROffset = DAG.getConstant(FPROffset, PtrVT); + + const Value *SV = cast(Op.getOperand(2))->getValue(); + + // Store first byte : number of int regs + SDValue firstStore = DAG.getStore(Op.getOperand(0), dl, ArgGPR, + Op.getOperand(1), SV, 0); + uint64_t nextOffset = FPROffset; + SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1), + ConstFPROffset); + + // Store second byte : number of float regs + SDValue secondStore = + DAG.getStore(firstStore, dl, ArgFPR, nextPtr, SV, nextOffset); + nextOffset += StackOffset; + nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset); + + // Store second word : arguments given on stack + SDValue thirdStore = + DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr, SV, nextOffset); + nextOffset += FrameOffset; + nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset); + + // Store third word : arguments given in registers + return DAG.getStore(thirdStore, dl, FR, nextPtr, SV, nextOffset); + +} + +#include "PPCGenCallingConv.inc" + +/// GetFPR - Get the set of FP registers that should be allocated for arguments, +/// depending on which subtarget is selected. +static const unsigned *GetFPR(const PPCSubtarget &Subtarget) { + if (Subtarget.isMachoABI()) { + static const unsigned FPR[] = { + PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, + PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13 + }; + return FPR; + } + + + static const unsigned FPR[] = { + PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, + PPC::F8 + }; + return FPR; +} + +/// CalculateStackSlotSize - Calculates the size reserved for this argument on +/// the stack. +static unsigned CalculateStackSlotSize(SDValue Arg, ISD::ArgFlagsTy Flags, + bool isVarArg, unsigned PtrByteSize) { + MVT ArgVT = Arg.getValueType(); + unsigned ArgSize =ArgVT.getSizeInBits()/8; + if (Flags.isByVal()) + ArgSize = Flags.getByValSize(); + ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; + + return ArgSize; +} + +SDValue +PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, + SelectionDAG &DAG, + int &VarArgsFrameIndex, + int &VarArgsStackOffset, + unsigned &VarArgsNumGPR, + unsigned &VarArgsNumFPR, + const PPCSubtarget &Subtarget) { + // TODO: add description of PPC stack frame format, or at least some docs. + // + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + SmallVector ArgValues; + SDValue Root = Op.getOperand(0); + bool isVarArg = cast(Op.getOperand(2))->getZExtValue() != 0; + DebugLoc dl = Op.getDebugLoc(); + + MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + bool isPPC64 = PtrVT == MVT::i64; + bool isMachoABI = Subtarget.isMachoABI(); + bool isELF32_ABI = Subtarget.isELF32_ABI(); + // Potential tail calls could cause overwriting of argument stack slots. + unsigned CC = MF.getFunction()->getCallingConv(); + bool isImmutable = !(PerformTailCallOpt && (CC==CallingConv::Fast)); + unsigned PtrByteSize = isPPC64 ? 8 : 4; + + unsigned ArgOffset = PPCFrameInfo::getLinkageSize(isPPC64, isMachoABI); + // Area that is at least reserved in caller of this function. + unsigned MinReservedArea = ArgOffset; + + static const unsigned GPR_32[] = { // 32-bit registers. + PPC::R3, PPC::R4, PPC::R5, PPC::R6, + PPC::R7, PPC::R8, PPC::R9, PPC::R10, + }; + static const unsigned GPR_64[] = { // 64-bit registers. + PPC::X3, PPC::X4, PPC::X5, PPC::X6, + PPC::X7, PPC::X8, PPC::X9, PPC::X10, + }; + + static const unsigned *FPR = GetFPR(Subtarget); + + static const unsigned VR[] = { + PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, + PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 + }; + + const unsigned Num_GPR_Regs = array_lengthof(GPR_32); + const unsigned Num_FPR_Regs = isMachoABI ? 13 : 8; + const unsigned Num_VR_Regs = array_lengthof( VR); + + unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; + + const unsigned *GPR = isPPC64 ? GPR_64 : GPR_32; + + // In 32-bit non-varargs functions, the stack space for vectors is after the + // stack space for non-vectors. We do not use this space unless we have + // too many vectors to fit in registers, something that only occurs in + // constructed examples:), but we have to walk the arglist to figure + // that out...for the pathological case, compute VecArgOffset as the + // start of the vector parameter area. Computing VecArgOffset is the + // entire point of the following loop. + // Altivec is not mentioned in the ppc32 Elf Supplement, so I'm not trying + // to handle Elf here. + unsigned VecArgOffset = ArgOffset; + if (!isVarArg && !isPPC64) { + for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues()-1; ArgNo != e; + ++ArgNo) { + MVT ObjectVT = Op.getValue(ArgNo).getValueType(); + unsigned ObjSize = ObjectVT.getSizeInBits()/8; + ISD::ArgFlagsTy Flags = + cast(Op.getOperand(ArgNo+3))->getArgFlags(); + + if (Flags.isByVal()) { + // ObjSize is the true size, ArgSize rounded up to multiple of regs. + ObjSize = Flags.getByValSize(); + unsigned ArgSize = + ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; + VecArgOffset += ArgSize; + continue; + } + + switch(ObjectVT.getSimpleVT()) { + default: assert(0 && "Unhandled argument type!"); + case MVT::i32: + case MVT::f32: + VecArgOffset += isPPC64 ? 8 : 4; + break; + case MVT::i64: // PPC64 + case MVT::f64: + VecArgOffset += 8; + break; + case MVT::v4f32: + case MVT::v4i32: + case MVT::v8i16: + case MVT::v16i8: + // Nothing to do, we're only looking at Nonvector args here. + break; + } + } + } + // We've found where the vector parameter area in memory is. Skip the + // first 12 parameters; these don't use that memory. + VecArgOffset = ((VecArgOffset+15)/16)*16; + VecArgOffset += 12*16; + + // Add DAG nodes to load the arguments or copy them out of registers. On + // entry to a function on PPC, the arguments start after the linkage area, + // although the first ones are often in registers. + // + // In the ELF 32 ABI, GPRs and stack are double word align: an argument + // represented with two words (long long or double) must be copied to an + // even GPR_idx value or to an even ArgOffset value. + + SmallVector MemOps; + unsigned nAltivecParamsAtEnd = 0; + for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues() - 1; + ArgNo != e; ++ArgNo) { + SDValue ArgVal; + bool needsLoad = false; + MVT ObjectVT = Op.getValue(ArgNo).getValueType(); + unsigned ObjSize = ObjectVT.getSizeInBits()/8; + unsigned ArgSize = ObjSize; + ISD::ArgFlagsTy Flags = + cast(Op.getOperand(ArgNo+3))->getArgFlags(); + // See if next argument requires stack alignment in ELF + bool Align = Flags.isSplit(); + + unsigned CurArgOffset = ArgOffset; + + // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary. + if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 || + ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) { + if (isVarArg || isPPC64) { + MinReservedArea = ((MinReservedArea+15)/16)*16; + MinReservedArea += CalculateStackSlotSize(Op.getValue(ArgNo), + Flags, + isVarArg, + PtrByteSize); + } else nAltivecParamsAtEnd++; + } else + // Calculate min reserved area. + MinReservedArea += CalculateStackSlotSize(Op.getValue(ArgNo), + Flags, + isVarArg, + PtrByteSize); + + // FIXME alignment for ELF may not be right + // FIXME the codegen can be much improved in some cases. + // We do not have to keep everything in memory. + if (Flags.isByVal()) { + // ObjSize is the true size, ArgSize rounded up to multiple of registers. + ObjSize = Flags.getByValSize(); + ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; + // Double word align in ELF + if (Align && isELF32_ABI) GPR_idx += (GPR_idx % 2); + // Objects of size 1 and 2 are right justified, everything else is + // left justified. This means the memory address is adjusted forwards. + if (ObjSize==1 || ObjSize==2) { + CurArgOffset = CurArgOffset + (4 - ObjSize); + } + // The value of the object is its address. + int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset); + SDValue FIN = DAG.getFrameIndex(FI, PtrVT); + ArgValues.push_back(FIN); + if (ObjSize==1 || ObjSize==2) { + if (GPR_idx != Num_GPR_Regs) { + unsigned VReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass); + RegInfo.addLiveIn(GPR[GPR_idx], VReg); + SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, PtrVT); + SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN, + NULL, 0, ObjSize==1 ? MVT::i8 : MVT::i16 ); + MemOps.push_back(Store); + ++GPR_idx; + if (isMachoABI) ArgOffset += PtrByteSize; + } else { + ArgOffset += PtrByteSize; + } + continue; + } + for (unsigned j = 0; j < ArgSize; j += PtrByteSize) { + // Store whatever pieces of the object are in registers + // to memory. ArgVal will be address of the beginning of + // the object. + if (GPR_idx != Num_GPR_Regs) { + unsigned VReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass); + RegInfo.addLiveIn(GPR[GPR_idx], VReg); + int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset); + SDValue FIN = DAG.getFrameIndex(FI, PtrVT); + SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, PtrVT); + SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0); + MemOps.push_back(Store); + ++GPR_idx; + if (isMachoABI) ArgOffset += PtrByteSize; + } else { + ArgOffset += ArgSize - (ArgOffset-CurArgOffset); + break; + } + } + continue; + } + + switch (ObjectVT.getSimpleVT()) { + default: assert(0 && "Unhandled argument type!"); + case MVT::i32: + if (!isPPC64) { + // Double word align in ELF + if (Align && isELF32_ABI) GPR_idx += (GPR_idx % 2); + + if (GPR_idx != Num_GPR_Regs) { + unsigned VReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass); + RegInfo.addLiveIn(GPR[GPR_idx], VReg); + ArgVal = DAG.getCopyFromReg(Root, dl, VReg, MVT::i32); + ++GPR_idx; + } else { + needsLoad = true; + ArgSize = PtrByteSize; + } + // Stack align in ELF + if (needsLoad && Align && isELF32_ABI) + ArgOffset += ((ArgOffset/4) % 2) * PtrByteSize; + // All int arguments reserve stack space in Macho ABI. + if (isMachoABI || needsLoad) ArgOffset += PtrByteSize; + break; + } + // FALLTHROUGH + case MVT::i64: // PPC64 + if (GPR_idx != Num_GPR_Regs) { + unsigned VReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass); + RegInfo.addLiveIn(GPR[GPR_idx], VReg); + ArgVal = DAG.getCopyFromReg(Root, dl, VReg, MVT::i64); + + if (ObjectVT == MVT::i32) { + // PPC64 passes i8, i16, and i32 values in i64 registers. Promote + // value to MVT::i64 and then truncate to the correct register size. + if (Flags.isSExt()) + ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal, + DAG.getValueType(ObjectVT)); + else if (Flags.isZExt()) + ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal, + DAG.getValueType(ObjectVT)); + + ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal); + } + + ++GPR_idx; + } else { + needsLoad = true; + ArgSize = PtrByteSize; + } + // All int arguments reserve stack space in Macho ABI. + if (isMachoABI || needsLoad) ArgOffset += 8; + break; + + case MVT::f32: + case MVT::f64: + // Every 4 bytes of argument space consumes one of the GPRs available for + // argument passing. + if (GPR_idx != Num_GPR_Regs && isMachoABI) { + ++GPR_idx; + if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64) + ++GPR_idx; + } + if (FPR_idx != Num_FPR_Regs) { + unsigned VReg; + if (ObjectVT == MVT::f32) + VReg = RegInfo.createVirtualRegister(&PPC::F4RCRegClass); + else + VReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass); + RegInfo.addLiveIn(FPR[FPR_idx], VReg); + ArgVal = DAG.getCopyFromReg(Root, dl, VReg, ObjectVT); + ++FPR_idx; + } else { + needsLoad = true; + } + + // Stack align in ELF + if (needsLoad && Align && isELF32_ABI) + ArgOffset += ((ArgOffset/4) % 2) * PtrByteSize; + // All FP arguments reserve stack space in Macho ABI. + if (isMachoABI || needsLoad) ArgOffset += isPPC64 ? 8 : ObjSize; + break; + case MVT::v4f32: + case MVT::v4i32: + case MVT::v8i16: + case MVT::v16i8: + // Note that vector arguments in registers don't reserve stack space, + // except in varargs functions. + if (VR_idx != Num_VR_Regs) { + unsigned VReg = RegInfo.createVirtualRegister(&PPC::VRRCRegClass); + RegInfo.addLiveIn(VR[VR_idx], VReg); + ArgVal = DAG.getCopyFromReg(Root, dl, VReg, ObjectVT); + if (isVarArg) { + while ((ArgOffset % 16) != 0) { + ArgOffset += PtrByteSize; + if (GPR_idx != Num_GPR_Regs) + GPR_idx++; + } + ArgOffset += 16; + GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); + } + ++VR_idx; + } else { + if (!isVarArg && !isPPC64) { + // Vectors go after all the nonvectors. + CurArgOffset = VecArgOffset; + VecArgOffset += 16; + } else { + // Vectors are aligned. + ArgOffset = ((ArgOffset+15)/16)*16; + CurArgOffset = ArgOffset; + ArgOffset += 16; + } + needsLoad = true; + } + break; + } + + // We need to load the argument to a virtual register if we determined above + // that we ran out of physical registers of the appropriate type. + if (needsLoad) { + int FI = MFI->CreateFixedObject(ObjSize, + CurArgOffset + (ArgSize - ObjSize), + isImmutable); + SDValue FIN = DAG.getFrameIndex(FI, PtrVT); + ArgVal = DAG.getLoad(ObjectVT, dl, Root, FIN, NULL, 0); + } + + ArgValues.push_back(ArgVal); + } + + // Set the size that is at least reserved in caller of this function. Tail + // call optimized function's reserved stack space needs to be aligned so that + // taking the difference between two stack areas will result in an aligned + // stack. + PPCFunctionInfo *FI = MF.getInfo(); + // Add the Altivec parameters at the end, if needed. + if (nAltivecParamsAtEnd) { + MinReservedArea = ((MinReservedArea+15)/16)*16; + MinReservedArea += 16*nAltivecParamsAtEnd; + } + MinReservedArea = + std::max(MinReservedArea, + PPCFrameInfo::getMinCallFrameSize(isPPC64, isMachoABI)); + unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameInfo()-> + getStackAlignment(); + unsigned AlignMask = TargetAlign-1; + MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask; + FI->setMinReservedArea(MinReservedArea); + + // If the function takes variable number of arguments, make a frame index for + // the start of the first vararg value... for expansion of llvm.va_start. + if (isVarArg) { + + int depth; + if (isELF32_ABI) { + VarArgsNumGPR = GPR_idx; + VarArgsNumFPR = FPR_idx; + + // Make room for Num_GPR_Regs, Num_FPR_Regs and for a possible frame + // pointer. + depth = -(Num_GPR_Regs * PtrVT.getSizeInBits()/8 + + Num_FPR_Regs * MVT(MVT::f64).getSizeInBits()/8 + + PtrVT.getSizeInBits()/8); + + VarArgsStackOffset = MFI->CreateFixedObject(PtrVT.getSizeInBits()/8, + ArgOffset); + + } + else + depth = ArgOffset; + + VarArgsFrameIndex = MFI->CreateFixedObject(PtrVT.getSizeInBits()/8, + depth); + SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT); + + // In ELF 32 ABI, the fixed integer arguments of a variadic function are + // stored to the VarArgsFrameIndex on the stack. + if (isELF32_ABI) { + for (GPR_idx = 0; GPR_idx != VarArgsNumGPR; ++GPR_idx) { + SDValue Val = DAG.getRegister(GPR[GPR_idx], PtrVT); + SDValue Store = DAG.getStore(Root, dl, Val, FIN, NULL, 0); + MemOps.push_back(Store); + // Increment the address by four for the next argument to store + SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT); + FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); + } + } + + // If this function is vararg, store any remaining integer argument regs + // to their spots on the stack so that they may be loaded by deferencing the + // result of va_next. + for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) { + unsigned VReg; + if (isPPC64) + VReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass); + else + VReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass); + + RegInfo.addLiveIn(GPR[GPR_idx], VReg); + SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, PtrVT); + SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0); + MemOps.push_back(Store); + // Increment the address by four for the next argument to store + SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT); + FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); + } + + // In ELF 32 ABI, the double arguments are stored to the VarArgsFrameIndex + // on the stack. + if (isELF32_ABI) { + for (FPR_idx = 0; FPR_idx != VarArgsNumFPR; ++FPR_idx) { + SDValue Val = DAG.getRegister(FPR[FPR_idx], MVT::f64); + SDValue Store = DAG.getStore(Root, dl, Val, FIN, NULL, 0); + MemOps.push_back(Store); + // Increment the address by eight for the next argument to store + SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, + PtrVT); + FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); + } + + for (; FPR_idx != Num_FPR_Regs; ++FPR_idx) { + unsigned VReg; + VReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass); + + RegInfo.addLiveIn(FPR[FPR_idx], VReg); + SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, MVT::f64); + SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0); + MemOps.push_back(Store); + // Increment the address by eight for the next argument to store + SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, + PtrVT); + FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); + } + } + } + + if (!MemOps.empty()) + Root = DAG.getNode(ISD::TokenFactor, dl, + MVT::Other, &MemOps[0], MemOps.size()); + + ArgValues.push_back(Root); + + // Return the new list of results. + return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getNode()->getVTList(), + &ArgValues[0], ArgValues.size()); +} + +/// CalculateParameterAndLinkageAreaSize - Get the size of the paramter plus +/// linkage area. +static unsigned +CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG, + bool isPPC64, + bool isMachoABI, + bool isVarArg, + unsigned CC, + CallSDNode *TheCall, + unsigned &nAltivecParamsAtEnd) { + // Count how many bytes are to be pushed on the stack, including the linkage + // area, and parameter passing area. We start with 24/48 bytes, which is + // prereserved space for [SP][CR][LR][3 x unused]. + unsigned NumBytes = PPCFrameInfo::getLinkageSize(isPPC64, isMachoABI); + unsigned NumOps = TheCall->getNumArgs(); + unsigned PtrByteSize = isPPC64 ? 8 : 4; + + // Add up all the space actually used. + // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually + // they all go in registers, but we must reserve stack space for them for + // possible use by the caller. In varargs or 64-bit calls, parameters are + // assigned stack space in order, with padding so Altivec parameters are + // 16-byte aligned. + nAltivecParamsAtEnd = 0; + for (unsigned i = 0; i != NumOps; ++i) { + SDValue Arg = TheCall->getArg(i); + ISD::ArgFlagsTy Flags = TheCall->getArgFlags(i); + MVT ArgVT = Arg.getValueType(); + // Varargs Altivec parameters are padded to a 16 byte boundary. + if (ArgVT==MVT::v4f32 || ArgVT==MVT::v4i32 || + ArgVT==MVT::v8i16 || ArgVT==MVT::v16i8) { + if (!isVarArg && !isPPC64) { + // Non-varargs Altivec parameters go after all the non-Altivec + // parameters; handle those later so we know how much padding we need. + nAltivecParamsAtEnd++; + continue; + } + // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary. + NumBytes = ((NumBytes+15)/16)*16; + } + NumBytes += CalculateStackSlotSize(Arg, Flags, isVarArg, PtrByteSize); + } + + // Allow for Altivec parameters at the end, if needed. + if (nAltivecParamsAtEnd) { + NumBytes = ((NumBytes+15)/16)*16; + NumBytes += 16*nAltivecParamsAtEnd; + } + + // The prolog code of the callee may store up to 8 GPR argument registers to + // the stack, allowing va_start to index over them in memory if its varargs. + // Because we cannot tell if this is needed on the caller side, we have to + // conservatively assume that it is needed. As such, make sure we have at + // least enough stack space for the caller to store the 8 GPRs. + NumBytes = std::max(NumBytes, + PPCFrameInfo::getMinCallFrameSize(isPPC64, isMachoABI)); + + // Tail call needs the stack to be aligned. + if (CC==CallingConv::Fast && PerformTailCallOpt) { + unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameInfo()-> + getStackAlignment(); + unsigned AlignMask = TargetAlign-1; + NumBytes = (NumBytes + AlignMask) & ~AlignMask; + } + + return NumBytes; +} + +/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be +/// adjusted to accomodate the arguments for the tailcall. +static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool IsTailCall, + unsigned ParamSize) { + + if (!IsTailCall) return 0; + + PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo(); + unsigned CallerMinReservedArea = FI->getMinReservedArea(); + int SPDiff = (int)CallerMinReservedArea - (int)ParamSize; + // Remember only if the new adjustement is bigger. + if (SPDiff < FI->getTailCallSPDelta()) + FI->setTailCallSPDelta(SPDiff); + + return SPDiff; +} + +/// IsEligibleForTailCallElimination - Check to see whether the next instruction +/// following the call is a return. A function is eligible if caller/callee +/// calling conventions match, currently only fastcc supports tail calls, and +/// the function CALL is immediatly followed by a RET. +bool +PPCTargetLowering::IsEligibleForTailCallOptimization(CallSDNode *TheCall, + SDValue Ret, + SelectionDAG& DAG) const { + // Variable argument functions are not supported. + if (!PerformTailCallOpt || TheCall->isVarArg()) + return false; + + if (CheckTailCallReturnConstraints(TheCall, Ret)) { + MachineFunction &MF = DAG.getMachineFunction(); + unsigned CallerCC = MF.getFunction()->getCallingConv(); + unsigned CalleeCC = TheCall->getCallingConv(); + if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) { + // Functions containing by val parameters are not supported. + for (unsigned i = 0; i != TheCall->getNumArgs(); i++) { + ISD::ArgFlagsTy Flags = TheCall->getArgFlags(i); + if (Flags.isByVal()) return false; + } + + SDValue Callee = TheCall->getCallee(); + // Non PIC/GOT tail calls are supported. + if (getTargetMachine().getRelocationModel() != Reloc::PIC_) + return true; + + // At the moment we can only do local tail calls (in same module, hidden + // or protected) if we are generating PIC. + if (GlobalAddressSDNode *G = dyn_cast(Callee)) + return G->getGlobal()->hasHiddenVisibility() + || G->getGlobal()->hasProtectedVisibility(); + } + } + + return false; +} + +/// isCallCompatibleAddress - Return the immediate to use if the specified +/// 32-bit value is representable in the immediate field of a BxA instruction. +static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) { + ConstantSDNode *C = dyn_cast(Op); + if (!C) return 0; + + int Addr = C->getZExtValue(); + if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero. + (Addr << 6 >> 6) != Addr) + return 0; // Top 6 bits have to be sext of immediate. + + return DAG.getConstant((int)C->getZExtValue() >> 2, + DAG.getTargetLoweringInfo().getPointerTy()).getNode(); +} + +namespace { + +struct TailCallArgumentInfo { + SDValue Arg; + SDValue FrameIdxOp; + int FrameIdx; + + TailCallArgumentInfo() : FrameIdx(0) {} +}; + +} + +/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot. +static void +StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG, + SDValue Chain, + const SmallVector &TailCallArgs, + SmallVector &MemOpChains, + DebugLoc dl) { + for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) { + SDValue Arg = TailCallArgs[i].Arg; + SDValue FIN = TailCallArgs[i].FrameIdxOp; + int FI = TailCallArgs[i].FrameIdx; + // Store relative to framepointer. + MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, FIN, + PseudoSourceValue::getFixedStack(FI), + 0)); + } +} + +/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to +/// the appropriate stack slot for the tail call optimized function call. +static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, + MachineFunction &MF, + SDValue Chain, + SDValue OldRetAddr, + SDValue OldFP, + int SPDiff, + bool isPPC64, + bool isMachoABI, + DebugLoc dl) { + if (SPDiff) { + // Calculate the new stack slot for the return address. + int SlotSize = isPPC64 ? 8 : 4; + int NewRetAddrLoc = SPDiff + PPCFrameInfo::getReturnSaveOffset(isPPC64, + isMachoABI); + int NewRetAddr = MF.getFrameInfo()->CreateFixedObject(SlotSize, + NewRetAddrLoc); + int NewFPLoc = SPDiff + PPCFrameInfo::getFramePointerSaveOffset(isPPC64, + isMachoABI); + int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc); + + MVT VT = isPPC64 ? MVT::i64 : MVT::i32; + SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT); + Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx, + PseudoSourceValue::getFixedStack(NewRetAddr), 0); + SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT); + Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx, + PseudoSourceValue::getFixedStack(NewFPIdx), 0); + } + return Chain; +} + +/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate +/// the position of the argument. +static void +CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64, + SDValue Arg, int SPDiff, unsigned ArgOffset, + SmallVector& TailCallArguments) { + int Offset = ArgOffset + SPDiff; + uint32_t OpSize = (Arg.getValueType().getSizeInBits()+7)/8; + int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset); + MVT VT = isPPC64 ? MVT::i64 : MVT::i32; + SDValue FIN = DAG.getFrameIndex(FI, VT); + TailCallArgumentInfo Info; + Info.Arg = Arg; + Info.FrameIdxOp = FIN; + Info.FrameIdx = FI; + TailCallArguments.push_back(Info); +} + +/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address +/// stack slot. Returns the chain as result and the loaded frame pointers in +/// LROpOut/FPOpout. Used when tail calling. +SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG, + int SPDiff, + SDValue Chain, + SDValue &LROpOut, + SDValue &FPOpOut, + DebugLoc dl) { + if (SPDiff) { + // Load the LR and FP stack slot for later adjusting. + MVT VT = PPCSubTarget.isPPC64() ? MVT::i64 : MVT::i32; + LROpOut = getReturnAddrFrameIndex(DAG); + LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, NULL, 0); + Chain = SDValue(LROpOut.getNode(), 1); + FPOpOut = getFramePointerFrameIndex(DAG); + FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, NULL, 0); + Chain = SDValue(FPOpOut.getNode(), 1); + } + return Chain; +} + +/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified +/// by "Src" to address "Dst" of size "Size". Alignment information is +/// specified by the specific parameter attribute. The copy will be passed as +/// a byval function parameter. +/// Sometimes what we are copying is the end of a larger object, the part that +/// does not fit in registers. +static SDValue +CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, + ISD::ArgFlagsTy Flags, SelectionDAG &DAG, + unsigned Size, DebugLoc dl) { + SDValue SizeNode = DAG.getConstant(Size, MVT::i32); + return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), + false, NULL, 0, NULL, 0); +} + +/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of +/// tail calls. +static void +LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, + SDValue Arg, SDValue PtrOff, int SPDiff, + unsigned ArgOffset, bool isPPC64, bool isTailCall, + bool isVector, SmallVector &MemOpChains, + SmallVector& TailCallArguments, + DebugLoc dl) { + MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + if (!isTailCall) { + if (isVector) { + SDValue StackPtr; + if (isPPC64) + StackPtr = DAG.getRegister(PPC::X1, MVT::i64); + else + StackPtr = DAG.getRegister(PPC::R1, MVT::i32); + PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, + DAG.getConstant(ArgOffset, PtrVT)); + } + MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0)); + // Calculate and remember argument location. + } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset, + TailCallArguments); +} + +SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG, + const PPCSubtarget &Subtarget, + TargetMachine &TM) { + CallSDNode *TheCall = cast(Op.getNode()); + SDValue Chain = TheCall->getChain(); + bool isVarArg = TheCall->isVarArg(); + unsigned CC = TheCall->getCallingConv(); + bool isTailCall = TheCall->isTailCall() + && CC == CallingConv::Fast && PerformTailCallOpt; + SDValue Callee = TheCall->getCallee(); + unsigned NumOps = TheCall->getNumArgs(); + DebugLoc dl = TheCall->getDebugLoc(); + + bool isMachoABI = Subtarget.isMachoABI(); + bool isELF32_ABI = Subtarget.isELF32_ABI(); + + MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + bool isPPC64 = PtrVT == MVT::i64; + unsigned PtrByteSize = isPPC64 ? 8 : 4; + + MachineFunction &MF = DAG.getMachineFunction(); + + // args_to_use will accumulate outgoing args for the PPCISD::CALL case in + // SelectExpr to use to put the arguments in the appropriate registers. + std::vector args_to_use; + + // Mark this function as potentially containing a function that contains a + // tail call. As a consequence the frame pointer will be used for dynamicalloc + // and restoring the callers stack pointer in this functions epilog. This is + // done because by tail calling the called function might overwrite the value + // in this function's (MF) stack pointer stack slot 0(SP). + if (PerformTailCallOpt && CC==CallingConv::Fast) + MF.getInfo()->setHasFastCall(); + + unsigned nAltivecParamsAtEnd = 0; + + // Count how many bytes are to be pushed on the stack, including the linkage + // area, and parameter passing area. We start with 24/48 bytes, which is + // prereserved space for [SP][CR][LR][3 x unused]. + unsigned NumBytes = + CalculateParameterAndLinkageAreaSize(DAG, isPPC64, isMachoABI, isVarArg, CC, + TheCall, nAltivecParamsAtEnd); + + // Calculate by how many bytes the stack has to be adjusted in case of tail + // call optimization. + int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes); + + // Adjust the stack pointer for the new arguments... + // These operations are automatically eliminated by the prolog/epilog pass + Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); + SDValue CallSeqStart = Chain; + + // Load the return address and frame pointer so it can be move somewhere else + // later. + SDValue LROp, FPOp; + Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl); + + // Set up a copy of the stack pointer for use loading and storing any + // arguments that may not fit in the registers available for argument + // passing. + SDValue StackPtr; + if (isPPC64) + StackPtr = DAG.getRegister(PPC::X1, MVT::i64); + else + StackPtr = DAG.getRegister(PPC::R1, MVT::i32); + + // Figure out which arguments are going to go in registers, and which in + // memory. Also, if this is a vararg function, floating point operations + // must be stored to our stack, and loaded into integer regs as well, if + // any integer regs are available for argument passing. + unsigned ArgOffset = PPCFrameInfo::getLinkageSize(isPPC64, isMachoABI); + unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; + + static const unsigned GPR_32[] = { // 32-bit registers. + PPC::R3, PPC::R4, PPC::R5, PPC::R6, + PPC::R7, PPC::R8, PPC::R9, PPC::R10, + }; + static const unsigned GPR_64[] = { // 64-bit registers. + PPC::X3, PPC::X4, PPC::X5, PPC::X6, + PPC::X7, PPC::X8, PPC::X9, PPC::X10, + }; + static const unsigned *FPR = GetFPR(Subtarget); + + static const unsigned VR[] = { + PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, + PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 + }; + const unsigned NumGPRs = array_lengthof(GPR_32); + const unsigned NumFPRs = isMachoABI ? 13 : 8; + const unsigned NumVRs = array_lengthof( VR); + + const unsigned *GPR = isPPC64 ? GPR_64 : GPR_32; + + std::vector > RegsToPass; + SmallVector TailCallArguments; + + SmallVector MemOpChains; + for (unsigned i = 0; i != NumOps; ++i) { + bool inMem = false; + SDValue Arg = TheCall->getArg(i); + ISD::ArgFlagsTy Flags = TheCall->getArgFlags(i); + // See if next argument requires stack alignment in ELF + bool Align = Flags.isSplit(); + + // PtrOff will be used to store the current argument to the stack if a + // register cannot be found for it. + SDValue PtrOff; + + // Stack align in ELF 32 + if (isELF32_ABI && Align) + PtrOff = DAG.getConstant(ArgOffset + ((ArgOffset/4) % 2) * PtrByteSize, + StackPtr.getValueType()); + else + PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType()); + + PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); + + // On PPC64, promote integers to 64-bit values. + if (isPPC64 && Arg.getValueType() == MVT::i32) { + // FIXME: Should this use ANY_EXTEND if neither sext nor zext? + unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; + Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg); + } + + // FIXME Elf untested, what are alignment rules? + // FIXME memcpy is used way more than necessary. Correctness first. + if (Flags.isByVal()) { + unsigned Size = Flags.getByValSize(); + if (isELF32_ABI && Align) GPR_idx += (GPR_idx % 2); + if (Size==1 || Size==2) { + // Very small objects are passed right-justified. + // Everything else is passed left-justified. + MVT VT = (Size==1) ? MVT::i8 : MVT::i16; + if (GPR_idx != NumGPRs) { + SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg, + NULL, 0, VT); + MemOpChains.push_back(Load.getValue(1)); + RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); + if (isMachoABI) + ArgOffset += PtrByteSize; + } else { + SDValue Const = DAG.getConstant(4 - Size, PtrOff.getValueType()); + SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); + SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, AddPtr, + CallSeqStart.getNode()->getOperand(0), + Flags, DAG, Size, dl); + // This must go outside the CALLSEQ_START..END. + SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, + CallSeqStart.getNode()->getOperand(1)); + DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), + NewCallSeqStart.getNode()); + Chain = CallSeqStart = NewCallSeqStart; + ArgOffset += PtrByteSize; + } + continue; + } + // Copy entire object into memory. There are cases where gcc-generated + // code assumes it is there, even if it could be put entirely into + // registers. (This is not what the doc says.) + SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff, + CallSeqStart.getNode()->getOperand(0), + Flags, DAG, Size, dl); + // This must go outside the CALLSEQ_START..END. + SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, + CallSeqStart.getNode()->getOperand(1)); + DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), NewCallSeqStart.getNode()); + Chain = CallSeqStart = NewCallSeqStart; + // And copy the pieces of it that fit into registers. + for (unsigned j=0; j NumVRs) { + unsigned j = 0; + // Offset is aligned; skip 1st 12 params which go in V registers. + ArgOffset = ((ArgOffset+15)/16)*16; + ArgOffset += 12*16; + for (unsigned i = 0; i != NumOps; ++i) { + SDValue Arg = TheCall->getArg(i); + MVT ArgType = Arg.getValueType(); + if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 || + ArgType==MVT::v8i16 || ArgType==MVT::v16i8) { + if (++j > NumVRs) { + SDValue PtrOff; + // We are emitting Altivec params in order. + LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, + isPPC64, isTailCall, true, MemOpChains, + TailCallArguments, dl); + ArgOffset += 16; + } + } + } + } + + if (!MemOpChains.empty()) + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &MemOpChains[0], MemOpChains.size()); + + // Build a sequence of copy-to-reg nodes chained together with token chain + // and flag operands which copy the outgoing args into the appropriate regs. + SDValue InFlag; + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { + Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, + RegsToPass[i].second, InFlag); + InFlag = Chain.getValue(1); + } + + // With the ELF 32 ABI, set CR6 to true if this is a vararg call. + if (isVarArg && isELF32_ABI) { + SDValue SetCR(DAG.getTargetNode(PPC::CRSET, dl, MVT::i32), 0); + Chain = DAG.getCopyToReg(Chain, dl, PPC::CR1EQ, SetCR, InFlag); + InFlag = Chain.getValue(1); + } + + // Emit a sequence of copyto/copyfrom virtual registers for arguments that + // might overwrite each other in case of tail call optimization. + if (isTailCall) { + SmallVector MemOpChains2; + // Do not flag preceeding copytoreg stuff together with the following stuff. + InFlag = SDValue(); + StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments, + MemOpChains2, dl); + if (!MemOpChains2.empty()) + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &MemOpChains2[0], MemOpChains2.size()); + + // Store the return address to the appropriate stack slot. + Chain = EmitTailCallStoreFPAndRetAddr(DAG, MF, Chain, LROp, FPOp, SPDiff, + isPPC64, isMachoABI, dl); + } + + // Emit callseq_end just before tailcall node. + if (isTailCall) { + Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), + DAG.getIntPtrConstant(0, true), InFlag); + InFlag = Chain.getValue(1); + } + + std::vector NodeTys; + NodeTys.push_back(MVT::Other); // Returns a chain + NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. + + SmallVector Ops; + unsigned CallOpc = isMachoABI? PPCISD::CALL_Macho : PPCISD::CALL_ELF; + + // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every + // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol + // node so that legalize doesn't hack it. + if (GlobalAddressSDNode *G = dyn_cast(Callee)) + Callee = DAG.getTargetGlobalAddress(G->getGlobal(), Callee.getValueType()); + else if (ExternalSymbolSDNode *S = dyn_cast(Callee)) + Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType()); + else if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) + // If this is an absolute destination address, use the munged value. + Callee = SDValue(Dest, 0); + else { + // Otherwise, this is an indirect call. We have to use a MTCTR/BCTRL pair + // to do the call, we can't use PPCISD::CALL. + SDValue MTCTROps[] = {Chain, Callee, InFlag}; + Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys, MTCTROps, + 2 + (InFlag.getNode() != 0)); + InFlag = Chain.getValue(1); + + // Copy the callee address into R12/X12 on darwin. + if (isMachoABI) { + unsigned Reg = Callee.getValueType() == MVT::i32 ? PPC::R12 : PPC::X12; + Chain = DAG.getCopyToReg(Chain, dl, Reg, Callee, InFlag); + InFlag = Chain.getValue(1); + } + + NodeTys.clear(); + NodeTys.push_back(MVT::Other); + NodeTys.push_back(MVT::Flag); + Ops.push_back(Chain); + CallOpc = isMachoABI ? PPCISD::BCTRL_Macho : PPCISD::BCTRL_ELF; + Callee.setNode(0); + // Add CTR register as callee so a bctr can be emitted later. + if (isTailCall) + Ops.push_back(DAG.getRegister(PPC::CTR, getPointerTy())); + } + + // If this is a direct call, pass the chain and the callee. + if (Callee.getNode()) { + Ops.push_back(Chain); + Ops.push_back(Callee); + } + // If this is a tail call add stack pointer delta. + if (isTailCall) + Ops.push_back(DAG.getConstant(SPDiff, MVT::i32)); + + // Add argument registers to the end of the list so that they are known live + // into the call. + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) + Ops.push_back(DAG.getRegister(RegsToPass[i].first, + RegsToPass[i].second.getValueType())); + + // When performing tail call optimization the callee pops its arguments off + // the stack. Account for this here so these bytes can be pushed back on in + // PPCRegisterInfo::eliminateCallFramePseudoInstr. + int BytesCalleePops = + (CC==CallingConv::Fast && PerformTailCallOpt) ? NumBytes : 0; + + if (InFlag.getNode()) + Ops.push_back(InFlag); + + // Emit tail call. + if (isTailCall) { + assert(InFlag.getNode() && + "Flag must be set. Depend on flag being set in LowerRET"); + Chain = DAG.getNode(PPCISD::TAILCALL, dl, + TheCall->getVTList(), &Ops[0], Ops.size()); + return SDValue(Chain.getNode(), Op.getResNo()); + } + + Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size()); + InFlag = Chain.getValue(1); + + Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), + DAG.getIntPtrConstant(BytesCalleePops, true), + InFlag); + if (TheCall->getValueType(0) != MVT::Other) + InFlag = Chain.getValue(1); + + SmallVector ResultVals; + SmallVector RVLocs; + unsigned CallerCC = DAG.getMachineFunction().getFunction()->getCallingConv(); + CCState CCInfo(CallerCC, isVarArg, TM, RVLocs); + CCInfo.AnalyzeCallResult(TheCall, RetCC_PPC); + + // Copy all of the result registers out of their specified physreg. + for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { + CCValAssign &VA = RVLocs[i]; + MVT VT = VA.getValVT(); + assert(VA.isRegLoc() && "Can only return in registers!"); + Chain = DAG.getCopyFromReg(Chain, dl, + VA.getLocReg(), VT, InFlag).getValue(1); + ResultVals.push_back(Chain.getValue(0)); + InFlag = Chain.getValue(2); + } + + // If the function returns void, just return the chain. + if (RVLocs.empty()) + return Chain; + + // Otherwise, merge everything together with a MERGE_VALUES node. + ResultVals.push_back(Chain); + SDValue Res = DAG.getNode(ISD::MERGE_VALUES, dl, TheCall->getVTList(), + &ResultVals[0], ResultVals.size()); + return Res.getValue(Op.getResNo()); +} + +SDValue PPCTargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG, + TargetMachine &TM) { + SmallVector RVLocs; + unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv(); + bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); + DebugLoc dl = Op.getDebugLoc(); + CCState CCInfo(CC, isVarArg, TM, RVLocs); + CCInfo.AnalyzeReturn(Op.getNode(), RetCC_PPC); + + // If this is the first return lowered for this function, add the regs to the + // liveout set for the function. + if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { + for (unsigned i = 0; i != RVLocs.size(); ++i) + DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); + } + + SDValue Chain = Op.getOperand(0); + + Chain = GetPossiblePreceedingTailCall(Chain, PPCISD::TAILCALL); + if (Chain.getOpcode() == PPCISD::TAILCALL) { + SDValue TailCall = Chain; + SDValue TargetAddress = TailCall.getOperand(1); + SDValue StackAdjustment = TailCall.getOperand(2); + + assert(((TargetAddress.getOpcode() == ISD::Register && + cast(TargetAddress)->getReg() == PPC::CTR) || + TargetAddress.getOpcode() == ISD::TargetExternalSymbol || + TargetAddress.getOpcode() == ISD::TargetGlobalAddress || + isa(TargetAddress)) && + "Expecting an global address, external symbol, absolute value or register"); + + assert(StackAdjustment.getOpcode() == ISD::Constant && + "Expecting a const value"); + + SmallVector Operands; + Operands.push_back(Chain.getOperand(0)); + Operands.push_back(TargetAddress); + Operands.push_back(StackAdjustment); + // Copy registers used by the call. Last operand is a flag so it is not + // copied. + for (unsigned i=3; i < TailCall.getNumOperands()-1; i++) { + Operands.push_back(Chain.getOperand(i)); + } + return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, &Operands[0], + Operands.size()); + } + + SDValue Flag; + + // Copy the result values into the output registers. + for (unsigned i = 0; i != RVLocs.size(); ++i) { + CCValAssign &VA = RVLocs[i]; + assert(VA.isRegLoc() && "Can only return in registers!"); + Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), + Op.getOperand(i*2+1), Flag); + Flag = Chain.getValue(1); + } + + if (Flag.getNode()) + return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, Chain, Flag); + else + return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, Chain); +} + +SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG, + const PPCSubtarget &Subtarget) { + // When we pop the dynamic allocation we need to restore the SP link. + DebugLoc dl = Op.getDebugLoc(); + + // Get the corect type for pointers. + MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + + // Construct the stack pointer operand. + bool IsPPC64 = Subtarget.isPPC64(); + unsigned SP = IsPPC64 ? PPC::X1 : PPC::R1; + SDValue StackPtr = DAG.getRegister(SP, PtrVT); + + // Get the operands for the STACKRESTORE. + SDValue Chain = Op.getOperand(0); + SDValue SaveSP = Op.getOperand(1); + + // Load the old link SP. + SDValue LoadLinkSP = DAG.getLoad(PtrVT, dl, Chain, StackPtr, NULL, 0); + + // Restore the stack pointer. + Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP); + + // Store the old link SP. + return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, NULL, 0); +} + + + +SDValue +PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); + bool IsPPC64 = PPCSubTarget.isPPC64(); + bool isMachoABI = PPCSubTarget.isMachoABI(); + MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + + // Get current frame pointer save index. The users of this index will be + // primarily DYNALLOC instructions. + PPCFunctionInfo *FI = MF.getInfo(); + int RASI = FI->getReturnAddrSaveIndex(); + + // If the frame pointer save index hasn't been defined yet. + if (!RASI) { + // Find out what the fix offset of the frame pointer save area. + int LROffset = PPCFrameInfo::getReturnSaveOffset(IsPPC64, isMachoABI); + // Allocate the frame index for frame pointer save area. + RASI = MF.getFrameInfo()->CreateFixedObject(IsPPC64? 8 : 4, LROffset); + // Save the result. + FI->setReturnAddrSaveIndex(RASI); + } + return DAG.getFrameIndex(RASI, PtrVT); +} + +SDValue +PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); + bool IsPPC64 = PPCSubTarget.isPPC64(); + bool isMachoABI = PPCSubTarget.isMachoABI(); + MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + + // Get current frame pointer save index. The users of this index will be + // primarily DYNALLOC instructions. + PPCFunctionInfo *FI = MF.getInfo(); + int FPSI = FI->getFramePointerSaveIndex(); + + // If the frame pointer save index hasn't been defined yet. + if (!FPSI) { + // Find out what the fix offset of the frame pointer save area. + int FPOffset = PPCFrameInfo::getFramePointerSaveOffset(IsPPC64, isMachoABI); + + // Allocate the frame index for frame pointer save area. + FPSI = MF.getFrameInfo()->CreateFixedObject(IsPPC64? 8 : 4, FPOffset); + // Save the result. + FI->setFramePointerSaveIndex(FPSI); + } + return DAG.getFrameIndex(FPSI, PtrVT); +} + +SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, + SelectionDAG &DAG, + const PPCSubtarget &Subtarget) { + // Get the inputs. + SDValue Chain = Op.getOperand(0); + SDValue Size = Op.getOperand(1); + DebugLoc dl = Op.getDebugLoc(); + + // Get the corect type for pointers. + MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + // Negate the size. + SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT, + DAG.getConstant(0, PtrVT), Size); + // Construct a node for the frame pointer save index. + SDValue FPSIdx = getFramePointerFrameIndex(DAG); + // Build a DYNALLOC node. + SDValue Ops[3] = { Chain, NegSize, FPSIdx }; + SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other); + return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops, 3); +} + +/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when +/// possible. +SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) { + // Not FP? Not a fsel. + if (!Op.getOperand(0).getValueType().isFloatingPoint() || + !Op.getOperand(2).getValueType().isFloatingPoint()) + return Op; + + ISD::CondCode CC = cast(Op.getOperand(4))->get(); + + // Cannot handle SETEQ/SETNE. + if (CC == ISD::SETEQ || CC == ISD::SETNE) return Op; + + MVT ResVT = Op.getValueType(); + MVT CmpVT = Op.getOperand(0).getValueType(); + SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); + SDValue TV = Op.getOperand(2), FV = Op.getOperand(3); + DebugLoc dl = Op.getDebugLoc(); + + // If the RHS of the comparison is a 0.0, we don't need to do the + // subtraction at all. + if (isFloatingPointZero(RHS)) + switch (CC) { + default: break; // SETUO etc aren't handled by fsel. + case ISD::SETULT: + case ISD::SETLT: + std::swap(TV, FV); // fsel is natively setge, swap operands for setlt + case ISD::SETOGE: + case ISD::SETGE: + if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits + LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS); + return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV); + case ISD::SETUGT: + case ISD::SETGT: + std::swap(TV, FV); // fsel is natively setge, swap operands for setlt + case ISD::SETOLE: + case ISD::SETLE: + if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits + LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS); + return DAG.getNode(PPCISD::FSEL, dl, ResVT, + DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV); + } + + SDValue Cmp; + switch (CC) { + default: break; // SETUO etc aren't handled by fsel. + case ISD::SETULT: + case ISD::SETLT: + Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS); + if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits + Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); + return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV); + case ISD::SETOGE: + case ISD::SETGE: + Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS); + if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits + Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); + return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); + case ISD::SETUGT: + case ISD::SETGT: + Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS); + if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits + Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); + return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV); + case ISD::SETOLE: + case ISD::SETLE: + Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS); + if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits + Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); + return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); + } + return Op; +} + +// FIXME: Split this code up when LegalizeDAGTypes lands. +SDValue PPCTargetLowering::LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG, + DebugLoc dl) { + assert(Op.getOperand(0).getValueType().isFloatingPoint()); + SDValue Src = Op.getOperand(0); + if (Src.getValueType() == MVT::f32) + Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src); + + SDValue Tmp; + switch (Op.getValueType().getSimpleVT()) { + default: assert(0 && "Unhandled FP_TO_SINT type in custom expander!"); + case MVT::i32: + Tmp = DAG.getNode(PPCISD::FCTIWZ, dl, MVT::f64, Src); + break; + case MVT::i64: + Tmp = DAG.getNode(PPCISD::FCTIDZ, dl, MVT::f64, Src); + break; + } + + // Convert the FP value to an int value through memory. + SDValue FIPtr = DAG.CreateStackTemporary(MVT::f64); + + // Emit a store to the stack slot. + SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr, NULL, 0); + + // Result is a load from the stack slot. If loading 4 bytes, make sure to + // add in a bias. + if (Op.getValueType() == MVT::i32) + FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, + DAG.getConstant(4, FIPtr.getValueType())); + return DAG.getLoad(Op.getValueType(), dl, Chain, FIPtr, NULL, 0); +} + +SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) { + DebugLoc dl = Op.getDebugLoc(); + // Don't handle ppc_fp128 here; let it be lowered to a libcall. + if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64) + return SDValue(); + + if (Op.getOperand(0).getValueType() == MVT::i64) { + SDValue Bits = DAG.getNode(ISD::BIT_CONVERT, dl, + MVT::f64, Op.getOperand(0)); + SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Bits); + if (Op.getValueType() == MVT::f32) + FP = DAG.getNode(ISD::FP_ROUND, dl, + MVT::f32, FP, DAG.getIntPtrConstant(0)); + return FP; + } + + assert(Op.getOperand(0).getValueType() == MVT::i32 && + "Unhandled SINT_TO_FP type in custom expander!"); + // Since we only generate this in 64-bit mode, we can take advantage of + // 64-bit registers. In particular, sign extend the input value into the + // 64-bit register with extsw, store the WHOLE 64-bit value into the stack + // then lfd it and fcfid it. + MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); + int FrameIdx = FrameInfo->CreateStackObject(8, 8); + MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); + + SDValue Ext64 = DAG.getNode(PPCISD::EXTSW_32, dl, MVT::i32, + Op.getOperand(0)); + + // STD the extended value into the stack slot. + MachineMemOperand MO(PseudoSourceValue::getFixedStack(FrameIdx), + MachineMemOperand::MOStore, 0, 8, 8); + SDValue Store = DAG.getNode(PPCISD::STD_32, dl, MVT::Other, + DAG.getEntryNode(), Ext64, FIdx, + DAG.getMemOperand(MO)); + // Load the value as a double. + SDValue Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx, NULL, 0); + + // FCFID it and return it. + SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Ld); + if (Op.getValueType() == MVT::f32) + FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, DAG.getIntPtrConstant(0)); + return FP; +} + +SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) { + DebugLoc dl = Op.getDebugLoc(); + /* + The rounding mode is in bits 30:31 of FPSR, and has the following + settings: + 00 Round to nearest + 01 Round to 0 + 10 Round to +inf + 11 Round to -inf + + FLT_ROUNDS, on the other hand, expects the following: + -1 Undefined + 0 Round to 0 + 1 Round to nearest + 2 Round to +inf + 3 Round to -inf + + To perform the conversion, we do: + ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1)) + */ + + MachineFunction &MF = DAG.getMachineFunction(); + MVT VT = Op.getValueType(); + MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + std::vector NodeTys; + SDValue MFFSreg, InFlag; + + // Save FP Control Word to register + NodeTys.push_back(MVT::f64); // return register + NodeTys.push_back(MVT::Flag); // unused in this context + SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, &InFlag, 0); + + // Save FP register to stack slot + int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); + SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT); + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain, + StackSlot, NULL, 0); + + // Load FP Control Word from low 32 bits of stack slot. + SDValue Four = DAG.getConstant(4, PtrVT); + SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four); + SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, NULL, 0); + + // Transform as necessary + SDValue CWD1 = + DAG.getNode(ISD::AND, dl, MVT::i32, + CWD, DAG.getConstant(3, MVT::i32)); + SDValue CWD2 = + DAG.getNode(ISD::SRL, dl, MVT::i32, + DAG.getNode(ISD::AND, dl, MVT::i32, + DAG.getNode(ISD::XOR, dl, MVT::i32, + CWD, DAG.getConstant(3, MVT::i32)), + DAG.getConstant(3, MVT::i32)), + DAG.getConstant(1, MVT::i32)); + + SDValue RetVal = + DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2); + + return DAG.getNode((VT.getSizeInBits() < 16 ? + ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal); +} + +SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) { + MVT VT = Op.getValueType(); + unsigned BitWidth = VT.getSizeInBits(); + DebugLoc dl = Op.getDebugLoc(); + assert(Op.getNumOperands() == 3 && + VT == Op.getOperand(1).getValueType() && + "Unexpected SHL!"); + + // Expand into a bunch of logical ops. Note that these ops + // depend on the PPC behavior for oversized shift amounts. + SDValue Lo = Op.getOperand(0); + SDValue Hi = Op.getOperand(1); + SDValue Amt = Op.getOperand(2); + MVT AmtVT = Amt.getValueType(); + + SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT, + DAG.getConstant(BitWidth, AmtVT), Amt); + SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt); + SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1); + SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3); + SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt, + DAG.getConstant(-BitWidth, AmtVT)); + SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5); + SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6); + SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt); + SDValue OutOps[] = { OutLo, OutHi }; + return DAG.getMergeValues(OutOps, 2, dl); +} + +SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) { + MVT VT = Op.getValueType(); + DebugLoc dl = Op.getDebugLoc(); + unsigned BitWidth = VT.getSizeInBits(); + assert(Op.getNumOperands() == 3 && + VT == Op.getOperand(1).getValueType() && + "Unexpected SRL!"); + + // Expand into a bunch of logical ops. Note that these ops + // depend on the PPC behavior for oversized shift amounts. + SDValue Lo = Op.getOperand(0); + SDValue Hi = Op.getOperand(1); + SDValue Amt = Op.getOperand(2); + MVT AmtVT = Amt.getValueType(); + + SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT, + DAG.getConstant(BitWidth, AmtVT), Amt); + SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt); + SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1); + SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); + SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt, + DAG.getConstant(-BitWidth, AmtVT)); + SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5); + SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6); + SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt); + SDValue OutOps[] = { OutLo, OutHi }; + return DAG.getMergeValues(OutOps, 2, dl); +} + +SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) { + DebugLoc dl = Op.getDebugLoc(); + MVT VT = Op.getValueType(); + unsigned BitWidth = VT.getSizeInBits(); + assert(Op.getNumOperands() == 3 && + VT == Op.getOperand(1).getValueType() && + "Unexpected SRA!"); + + // Expand into a bunch of logical ops, followed by a select_cc. + SDValue Lo = Op.getOperand(0); + SDValue Hi = Op.getOperand(1); + SDValue Amt = Op.getOperand(2); + MVT AmtVT = Amt.getValueType(); + + SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT, + DAG.getConstant(BitWidth, AmtVT), Amt); + SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt); + SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1); + SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); + SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt, + DAG.getConstant(-BitWidth, AmtVT)); + SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5); + SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt); + SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, AmtVT), + Tmp4, Tmp6, ISD::SETLE); + SDValue OutOps[] = { OutLo, OutHi }; + return DAG.getMergeValues(OutOps, 2, dl); +} + +//===----------------------------------------------------------------------===// +// Vector related lowering. +// + +/// BuildSplatI - Build a canonical splati of Val with an element size of +/// SplatSize. Cast the result to VT. +static SDValue BuildSplatI(int Val, unsigned SplatSize, MVT VT, + SelectionDAG &DAG, DebugLoc dl) { + assert(Val >= -16 && Val <= 15 && "vsplti is out of range!"); + + static const MVT VTys[] = { // canonical VT to use for each size. + MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32 + }; + + MVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1]; + + // Force vspltis[hw] -1 to vspltisb -1 to canonicalize. + if (Val == -1) + SplatSize = 1; + + MVT CanonicalVT = VTys[SplatSize-1]; + + // Build a canonical splat for this value. + SDValue Elt = DAG.getConstant(Val, MVT::i32); + SmallVector Ops; + Ops.assign(CanonicalVT.getVectorNumElements(), Elt); + SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, dl, CanonicalVT, + &Ops[0], Ops.size()); + return DAG.getNode(ISD::BIT_CONVERT, dl, ReqVT, Res); +} + +/// BuildIntrinsicOp - Return a binary operator intrinsic node with the +/// specified intrinsic ID. +static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS, + SelectionDAG &DAG, DebugLoc dl, + MVT DestVT = MVT::Other) { + if (DestVT == MVT::Other) DestVT = LHS.getValueType(); + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, + DAG.getConstant(IID, MVT::i32), LHS, RHS); +} + +/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the +/// specified intrinsic ID. +static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1, + SDValue Op2, SelectionDAG &DAG, + DebugLoc dl, MVT DestVT = MVT::Other) { + if (DestVT == MVT::Other) DestVT = Op0.getValueType(); + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, + DAG.getConstant(IID, MVT::i32), Op0, Op1, Op2); +} + + +/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified +/// amount. The result has the specified value type. +static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, + MVT VT, SelectionDAG &DAG, DebugLoc dl) { + // Force LHS/RHS to be the right type. + LHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, LHS); + RHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, RHS); + + int Ops[16]; + for (unsigned i = 0; i != 16; ++i) + Ops[i] = i + Amt; + SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops); + return DAG.getNode(ISD::BIT_CONVERT, dl, VT, T); +} + +// If this is a case we can't handle, return null and let the default +// expansion code take care of it. If we CAN select this case, and if it +// selects to a single instruction, return Op. Otherwise, if we can codegen +// this case more efficiently than a constant pool load, lower it to the +// sequence of ops that should be used. +SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { + DebugLoc dl = Op.getDebugLoc(); + BuildVectorSDNode *BVN = dyn_cast(Op.getNode()); + assert(BVN != 0 && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR"); + + // Check if this is a splat of a constant value. + APInt APSplatBits, APSplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, + HasAnyUndefs) || SplatBitSize > 32) + return SDValue(); + + unsigned SplatBits = APSplatBits.getZExtValue(); + unsigned SplatUndef = APSplatUndef.getZExtValue(); + unsigned SplatSize = SplatBitSize / 8; + + // First, handle single instruction cases. + + // All zeros? + if (SplatBits == 0) { + // Canonicalize all zero vectors to be v4i32. + if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) { + SDValue Z = DAG.getConstant(0, MVT::i32); + Z = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Z, Z, Z, Z); + Op = DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Z); + } + return Op; + } + + // If the sign extended value is in the range [-16,15], use VSPLTI[bhw]. + int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >> + (32-SplatBitSize)); + if (SextVal >= -16 && SextVal <= 15) + return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG, dl); + + + // Two instruction sequences. + + // If this value is in the range [-32,30] and is even, use: + // tmp = VSPLTI[bhw], result = add tmp, tmp + if (SextVal >= -32 && SextVal <= 30 && (SextVal & 1) == 0) { + SDValue Res = BuildSplatI(SextVal >> 1, SplatSize, MVT::Other, DAG, dl); + Res = DAG.getNode(ISD::ADD, dl, Res.getValueType(), Res, Res); + return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Res); + } + + // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is + // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important + // for fneg/fabs. + if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) { + // Make -1 and vspltisw -1: + SDValue OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG, dl); + + // Make the VSLW intrinsic, computing 0x8000_0000. + SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV, + OnesV, DAG, dl); + + // xor by OnesV to invert it. + Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV); + return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Res); + } + + // Check to see if this is a wide variety of vsplti*, binop self cases. + static const signed char SplatCsts[] = { + -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7, + -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16 + }; + + for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) { + // Indirect through the SplatCsts array so that we favor 'vsplti -1' for + // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1' + int i = SplatCsts[idx]; + + // Figure out what shift amount will be used by altivec if shifted by i in + // this splat size. + unsigned TypeShiftAmt = i & (SplatBitSize-1); + + // vsplti + shl self. + if (SextVal == (i << (int)TypeShiftAmt)) { + SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); + static const unsigned IIDs[] = { // Intrinsic to use for each size. + Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0, + Intrinsic::ppc_altivec_vslw + }; + Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); + return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Res); + } + + // vsplti + srl self. + if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) { + SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); + static const unsigned IIDs[] = { // Intrinsic to use for each size. + Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0, + Intrinsic::ppc_altivec_vsrw + }; + Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); + return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Res); + } + + // vsplti + sra self. + if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) { + SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); + static const unsigned IIDs[] = { // Intrinsic to use for each size. + Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0, + Intrinsic::ppc_altivec_vsraw + }; + Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); + return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Res); + } + + // vsplti + rol self. + if (SextVal == (int)(((unsigned)i << TypeShiftAmt) | + ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) { + SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); + static const unsigned IIDs[] = { // Intrinsic to use for each size. + Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0, + Intrinsic::ppc_altivec_vrlw + }; + Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); + return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Res); + } + + // t = vsplti c, result = vsldoi t, t, 1 + if (SextVal == ((i << 8) | (i >> (TypeShiftAmt-8)))) { + SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); + return BuildVSLDOI(T, T, 1, Op.getValueType(), DAG, dl); + } + // t = vsplti c, result = vsldoi t, t, 2 + if (SextVal == ((i << 16) | (i >> (TypeShiftAmt-16)))) { + SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); + return BuildVSLDOI(T, T, 2, Op.getValueType(), DAG, dl); + } + // t = vsplti c, result = vsldoi t, t, 3 + if (SextVal == ((i << 24) | (i >> (TypeShiftAmt-24)))) { + SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); + return BuildVSLDOI(T, T, 3, Op.getValueType(), DAG, dl); + } + } + + // Three instruction sequences. + + // Odd, in range [17,31]: (vsplti C)-(vsplti -16). + if (SextVal >= 0 && SextVal <= 31) { + SDValue LHS = BuildSplatI(SextVal-16, SplatSize, MVT::Other, DAG, dl); + SDValue RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG, dl); + LHS = DAG.getNode(ISD::SUB, dl, LHS.getValueType(), LHS, RHS); + return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), LHS); + } + // Odd, in range [-31,-17]: (vsplti C)+(vsplti -16). + if (SextVal >= -31 && SextVal <= 0) { + SDValue LHS = BuildSplatI(SextVal+16, SplatSize, MVT::Other, DAG, dl); + SDValue RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG, dl); + LHS = DAG.getNode(ISD::ADD, dl, LHS.getValueType(), LHS, RHS); + return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), LHS); + } + + return SDValue(); +} + +/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit +/// the specified operations to build the shuffle. +static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, + SDValue RHS, SelectionDAG &DAG, + DebugLoc dl) { + unsigned OpNum = (PFEntry >> 26) & 0x0F; + unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1); + unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1); + + enum { + OP_COPY = 0, // Copy, used for things like to say it is <0,1,2,3> + OP_VMRGHW, + OP_VMRGLW, + OP_VSPLTISW0, + OP_VSPLTISW1, + OP_VSPLTISW2, + OP_VSPLTISW3, + OP_VSLDOI4, + OP_VSLDOI8, + OP_VSLDOI12 + }; + + if (OpNum == OP_COPY) { + if (LHSID == (1*9+2)*9+3) return LHS; + assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!"); + return RHS; + } + + SDValue OpLHS, OpRHS; + OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl); + OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl); + + int ShufIdxs[16]; + switch (OpNum) { + default: assert(0 && "Unknown i32 permute!"); + case OP_VMRGHW: + ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3; + ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19; + ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7; + ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23; + break; + case OP_VMRGLW: + ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11; + ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27; + ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15; + ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31; + break; + case OP_VSPLTISW0: + for (unsigned i = 0; i != 16; ++i) + ShufIdxs[i] = (i&3)+0; + break; + case OP_VSPLTISW1: + for (unsigned i = 0; i != 16; ++i) + ShufIdxs[i] = (i&3)+4; + break; + case OP_VSPLTISW2: + for (unsigned i = 0; i != 16; ++i) + ShufIdxs[i] = (i&3)+8; + break; + case OP_VSPLTISW3: + for (unsigned i = 0; i != 16; ++i) + ShufIdxs[i] = (i&3)+12; + break; + case OP_VSLDOI4: + return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl); + case OP_VSLDOI8: + return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl); + case OP_VSLDOI12: + return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl); + } + MVT VT = OpLHS.getValueType(); + OpLHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, OpLHS); + OpRHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, OpRHS); + SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs); + return DAG.getNode(ISD::BIT_CONVERT, dl, VT, T); +} + +/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this +/// is a shuffle we can handle in a single instruction, return it. Otherwise, +/// return the code it can be lowered into. Worst case, it can always be +/// lowered into a vperm. +SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, + SelectionDAG &DAG) { + DebugLoc dl = Op.getDebugLoc(); + SDValue V1 = Op.getOperand(0); + SDValue V2 = Op.getOperand(1); + ShuffleVectorSDNode *SVOp = cast(Op); + MVT VT = Op.getValueType(); + + // Cases that are handled by instructions that take permute immediates + // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be + // selected by the instruction selector. + if (V2.getOpcode() == ISD::UNDEF) { + if (PPC::isSplatShuffleMask(SVOp, 1) || + PPC::isSplatShuffleMask(SVOp, 2) || + PPC::isSplatShuffleMask(SVOp, 4) || + PPC::isVPKUWUMShuffleMask(SVOp, true) || + PPC::isVPKUHUMShuffleMask(SVOp, true) || + PPC::isVSLDOIShuffleMask(SVOp, true) != -1 || + PPC::isVMRGLShuffleMask(SVOp, 1, true) || + PPC::isVMRGLShuffleMask(SVOp, 2, true) || + PPC::isVMRGLShuffleMask(SVOp, 4, true) || + PPC::isVMRGHShuffleMask(SVOp, 1, true) || + PPC::isVMRGHShuffleMask(SVOp, 2, true) || + PPC::isVMRGHShuffleMask(SVOp, 4, true)) { + return Op; + } + } + + // Altivec has a variety of "shuffle immediates" that take two vector inputs + // and produce a fixed permutation. If any of these match, do not lower to + // VPERM. + if (PPC::isVPKUWUMShuffleMask(SVOp, false) || + PPC::isVPKUHUMShuffleMask(SVOp, false) || + PPC::isVSLDOIShuffleMask(SVOp, false) != -1 || + PPC::isVMRGLShuffleMask(SVOp, 1, false) || + PPC::isVMRGLShuffleMask(SVOp, 2, false) || + PPC::isVMRGLShuffleMask(SVOp, 4, false) || + PPC::isVMRGHShuffleMask(SVOp, 1, false) || + PPC::isVMRGHShuffleMask(SVOp, 2, false) || + PPC::isVMRGHShuffleMask(SVOp, 4, false)) + return Op; + + // Check to see if this is a shuffle of 4-byte values. If so, we can use our + // perfect shuffle table to emit an optimal matching sequence. + SmallVector PermMask; + SVOp->getMask(PermMask); + + unsigned PFIndexes[4]; + bool isFourElementShuffle = true; + for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number + unsigned EltNo = 8; // Start out undef. + for (unsigned j = 0; j != 4; ++j) { // Intra-element byte. + if (PermMask[i*4+j] < 0) + continue; // Undef, ignore it. + + unsigned ByteSource = PermMask[i*4+j]; + if ((ByteSource & 3) != j) { + isFourElementShuffle = false; + break; + } + + if (EltNo == 8) { + EltNo = ByteSource/4; + } else if (EltNo != ByteSource/4) { + isFourElementShuffle = false; + break; + } + } + PFIndexes[i] = EltNo; + } + + // If this shuffle can be expressed as a shuffle of 4-byte elements, use the + // perfect shuffle vector to determine if it is cost effective to do this as + // discrete instructions, or whether we should use a vperm. + if (isFourElementShuffle) { + // Compute the index in the perfect shuffle table. + unsigned PFTableIndex = + PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; + + unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; + unsigned Cost = (PFEntry >> 30); + + // Determining when to avoid vperm is tricky. Many things affect the cost + // of vperm, particularly how many times the perm mask needs to be computed. + // For example, if the perm mask can be hoisted out of a loop or is already + // used (perhaps because there are multiple permutes with the same shuffle + // mask?) the vperm has a cost of 1. OTOH, hoisting the permute mask out of + // the loop requires an extra register. + // + // As a compromise, we only emit discrete instructions if the shuffle can be + // generated in 3 or fewer operations. When we have loop information + // available, if this block is within a loop, we should avoid using vperm + // for 3-operation perms and use a constant pool load instead. + if (Cost < 3) + return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl); + } + + // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant + // vector that will get spilled to the constant pool. + if (V2.getOpcode() == ISD::UNDEF) V2 = V1; + + // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except + // that it is in input element units, not in bytes. Convert now. + MVT EltVT = V1.getValueType().getVectorElementType(); + unsigned BytesPerElement = EltVT.getSizeInBits()/8; + + SmallVector ResultMask; + for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) { + unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i]; + + for (unsigned j = 0; j != BytesPerElement; ++j) + ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j, + MVT::i32)); + } + + SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8, + &ResultMask[0], ResultMask.size()); + return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(), V1, V2, VPermMask); +} + +/// getAltivecCompareInfo - Given an intrinsic, return false if it is not an +/// altivec comparison. If it is, return true and fill in Opc/isDot with +/// information about the intrinsic. +static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc, + bool &isDot) { + unsigned IntrinsicID = + cast(Intrin.getOperand(0))->getZExtValue(); + CompareOpc = -1; + isDot = false; + switch (IntrinsicID) { + default: return false; + // Comparison predicates. + case Intrinsic::ppc_altivec_vcmpbfp_p: CompareOpc = 966; isDot = 1; break; + case Intrinsic::ppc_altivec_vcmpeqfp_p: CompareOpc = 198; isDot = 1; break; + case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc = 6; isDot = 1; break; + case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc = 70; isDot = 1; break; + case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break; + case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break; + case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break; + case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break; + case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break; + case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break; + case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break; + case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break; + case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break; + + // Normal Comparisons. + case Intrinsic::ppc_altivec_vcmpbfp: CompareOpc = 966; isDot = 0; break; + case Intrinsic::ppc_altivec_vcmpeqfp: CompareOpc = 198; isDot = 0; break; + case Intrinsic::ppc_altivec_vcmpequb: CompareOpc = 6; isDot = 0; break; + case Intrinsic::ppc_altivec_vcmpequh: CompareOpc = 70; isDot = 0; break; + case Intrinsic::ppc_altivec_vcmpequw: CompareOpc = 134; isDot = 0; break; + case Intrinsic::ppc_altivec_vcmpgefp: CompareOpc = 454; isDot = 0; break; + case Intrinsic::ppc_altivec_vcmpgtfp: CompareOpc = 710; isDot = 0; break; + case Intrinsic::ppc_altivec_vcmpgtsb: CompareOpc = 774; isDot = 0; break; + case Intrinsic::ppc_altivec_vcmpgtsh: CompareOpc = 838; isDot = 0; break; + case Intrinsic::ppc_altivec_vcmpgtsw: CompareOpc = 902; isDot = 0; break; + case Intrinsic::ppc_altivec_vcmpgtub: CompareOpc = 518; isDot = 0; break; + case Intrinsic::ppc_altivec_vcmpgtuh: CompareOpc = 582; isDot = 0; break; + case Intrinsic::ppc_altivec_vcmpgtuw: CompareOpc = 646; isDot = 0; break; + } + return true; +} + +/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom +/// lower, do it, otherwise return null. +SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, + SelectionDAG &DAG) { + // If this is a lowered altivec predicate compare, CompareOpc is set to the + // opcode number of the comparison. + DebugLoc dl = Op.getDebugLoc(); + int CompareOpc; + bool isDot; + if (!getAltivecCompareInfo(Op, CompareOpc, isDot)) + return SDValue(); // Don't custom lower most intrinsics. + + // If this is a non-dot comparison, make the VCMP node and we are done. + if (!isDot) { + SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(), + Op.getOperand(1), Op.getOperand(2), + DAG.getConstant(CompareOpc, MVT::i32)); + return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Tmp); + } + + // Create the PPCISD altivec 'dot' comparison node. + SDValue Ops[] = { + Op.getOperand(2), // LHS + Op.getOperand(3), // RHS + DAG.getConstant(CompareOpc, MVT::i32) + }; + std::vector VTs; + VTs.push_back(Op.getOperand(2).getValueType()); + VTs.push_back(MVT::Flag); + SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3); + + // Now that we have the comparison, emit a copy from the CR to a GPR. + // This is flagged to the above dot comparison. + SDValue Flags = DAG.getNode(PPCISD::MFCR, dl, MVT::i32, + DAG.getRegister(PPC::CR6, MVT::i32), + CompNode.getValue(1)); + + // Unpack the result based on how the target uses it. + unsigned BitNo; // Bit # of CR6. + bool InvertBit; // Invert result? + switch (cast(Op.getOperand(1))->getZExtValue()) { + default: // Can't happen, don't crash on invalid number though. + case 0: // Return the value of the EQ bit of CR6. + BitNo = 0; InvertBit = false; + break; + case 1: // Return the inverted value of the EQ bit of CR6. + BitNo = 0; InvertBit = true; + break; + case 2: // Return the value of the LT bit of CR6. + BitNo = 2; InvertBit = false; + break; + case 3: // Return the inverted value of the LT bit of CR6. + BitNo = 2; InvertBit = true; + break; + } + + // Shift the bit into the low position. + Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags, + DAG.getConstant(8-(3-BitNo), MVT::i32)); + // Isolate the bit. + Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags, + DAG.getConstant(1, MVT::i32)); + + // If we are supposed to, toggle the bit. + if (InvertBit) + Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags, + DAG.getConstant(1, MVT::i32)); + return Flags; +} + +SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, + SelectionDAG &DAG) { + DebugLoc dl = Op.getDebugLoc(); + // Create a stack slot that is 16-byte aligned. + MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); + int FrameIdx = FrameInfo->CreateStackObject(16, 16); + MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); + + // Store the input value into Value#0 of the stack slot. + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, + Op.getOperand(0), FIdx, NULL, 0); + // Load it out. + return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, NULL, 0); +} + +SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) { + DebugLoc dl = Op.getDebugLoc(); + if (Op.getValueType() == MVT::v4i32) { + SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); + + SDValue Zero = BuildSplatI( 0, 1, MVT::v4i32, DAG, dl); + SDValue Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG, dl);//+16 as shift amt. + + SDValue RHSSwap = // = vrlw RHS, 16 + BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl); + + // Shrinkify inputs to v8i16. + LHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, LHS); + RHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, RHS); + RHSSwap = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, RHSSwap); + + // Low parts multiplied together, generating 32-bit results (we ignore the + // top parts). + SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh, + LHS, RHS, DAG, dl, MVT::v4i32); + + SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm, + LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32); + // Shift the high parts up 16 bits. + HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd, + Neg16, DAG, dl); + return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd); + } else if (Op.getValueType() == MVT::v8i16) { + SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); + + SDValue Zero = BuildSplatI(0, 1, MVT::v8i16, DAG, dl); + + return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm, + LHS, RHS, Zero, DAG, dl); + } else if (Op.getValueType() == MVT::v16i8) { + SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); + + // Multiply the even 8-bit parts, producing 16-bit sums. + SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub, + LHS, RHS, DAG, dl, MVT::v8i16); + EvenParts = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, EvenParts); + + // Multiply the odd 8-bit parts, producing 16-bit sums. + SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub, + LHS, RHS, DAG, dl, MVT::v8i16); + OddParts = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, OddParts); + + // Merge the results together. + int Ops[16]; + for (unsigned i = 0; i != 8; ++i) { + Ops[i*2 ] = 2*i+1; + Ops[i*2+1] = 2*i+1+16; + } + return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops); + } else { + assert(0 && "Unknown mul to lower!"); + abort(); + } +} + +/// LowerOperation - Provide custom lowering hooks for some operations. +/// +SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { + switch (Op.getOpcode()) { + default: assert(0 && "Wasn't expecting to be able to lower this!"); + case ISD::ConstantPool: return LowerConstantPool(Op, DAG); + case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); + case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); + case ISD::JumpTable: return LowerJumpTable(Op, DAG); + case ISD::SETCC: return LowerSETCC(Op, DAG); + case ISD::TRAMPOLINE: return LowerTRAMPOLINE(Op, DAG); + case ISD::VASTART: + return LowerVASTART(Op, DAG, VarArgsFrameIndex, VarArgsStackOffset, + VarArgsNumGPR, VarArgsNumFPR, PPCSubTarget); + + case ISD::VAARG: + return LowerVAARG(Op, DAG, VarArgsFrameIndex, VarArgsStackOffset, + VarArgsNumGPR, VarArgsNumFPR, PPCSubTarget); + + case ISD::FORMAL_ARGUMENTS: + return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex, + VarArgsStackOffset, VarArgsNumGPR, + VarArgsNumFPR, PPCSubTarget); + + case ISD::CALL: return LowerCALL(Op, DAG, PPCSubTarget, + getTargetMachine()); + case ISD::RET: return LowerRET(Op, DAG, getTargetMachine()); + case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG, PPCSubTarget); + case ISD::DYNAMIC_STACKALLOC: + return LowerDYNAMIC_STACKALLOC(Op, DAG, PPCSubTarget); + + case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); + case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG, + Op.getDebugLoc()); + case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); + case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); + + // Lower 64-bit shifts. + case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG); + case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG); + case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG); + + // Vector-related lowering. + case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); + case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); + case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); + case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); + case ISD::MUL: return LowerMUL(Op, DAG); + + // Frame & Return address. + case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); + case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); + } + return SDValue(); +} + +void PPCTargetLowering::ReplaceNodeResults(SDNode *N, + SmallVectorImpl&Results, + SelectionDAG &DAG) { + DebugLoc dl = N->getDebugLoc(); + switch (N->getOpcode()) { + default: + assert(false && "Do not know how to custom type legalize this operation!"); + return; + case ISD::FP_ROUND_INREG: { + assert(N->getValueType(0) == MVT::ppcf128); + assert(N->getOperand(0).getValueType() == MVT::ppcf128); + SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, + MVT::f64, N->getOperand(0), + DAG.getIntPtrConstant(0)); + SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, + MVT::f64, N->getOperand(0), + DAG.getIntPtrConstant(1)); + + // This sequence changes FPSCR to do round-to-zero, adds the two halves + // of the long double, and puts FPSCR back the way it was. We do not + // actually model FPSCR. + std::vector NodeTys; + SDValue Ops[4], Result, MFFSreg, InFlag, FPreg; + + NodeTys.push_back(MVT::f64); // Return register + NodeTys.push_back(MVT::Flag); // Returns a flag for later insns + Result = DAG.getNode(PPCISD::MFFS, dl, NodeTys, &InFlag, 0); + MFFSreg = Result.getValue(0); + InFlag = Result.getValue(1); + + NodeTys.clear(); + NodeTys.push_back(MVT::Flag); // Returns a flag + Ops[0] = DAG.getConstant(31, MVT::i32); + Ops[1] = InFlag; + Result = DAG.getNode(PPCISD::MTFSB1, dl, NodeTys, Ops, 2); + InFlag = Result.getValue(0); + + NodeTys.clear(); + NodeTys.push_back(MVT::Flag); // Returns a flag + Ops[0] = DAG.getConstant(30, MVT::i32); + Ops[1] = InFlag; + Result = DAG.getNode(PPCISD::MTFSB0, dl, NodeTys, Ops, 2); + InFlag = Result.getValue(0); + + NodeTys.clear(); + NodeTys.push_back(MVT::f64); // result of add + NodeTys.push_back(MVT::Flag); // Returns a flag + Ops[0] = Lo; + Ops[1] = Hi; + Ops[2] = InFlag; + Result = DAG.getNode(PPCISD::FADDRTZ, dl, NodeTys, Ops, 3); + FPreg = Result.getValue(0); + InFlag = Result.getValue(1); + + NodeTys.clear(); + NodeTys.push_back(MVT::f64); + Ops[0] = DAG.getConstant(1, MVT::i32); + Ops[1] = MFFSreg; + Ops[2] = FPreg; + Ops[3] = InFlag; + Result = DAG.getNode(PPCISD::MTFSF, dl, NodeTys, Ops, 4); + FPreg = Result.getValue(0); + + // We know the low half is about to be thrown away, so just use something + // convenient. + Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128, + FPreg, FPreg)); + return; + } + case ISD::FP_TO_SINT: + Results.push_back(LowerFP_TO_SINT(SDValue(N, 0), DAG, dl)); + return; + } +} + + +//===----------------------------------------------------------------------===// +// Other Lowering Code +//===----------------------------------------------------------------------===// + +MachineBasicBlock * +PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, + bool is64bit, unsigned BinOpcode) const { + // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction *F = BB->getParent(); + MachineFunction::iterator It = BB; + ++It; + + unsigned dest = MI->getOperand(0).getReg(); + unsigned ptrA = MI->getOperand(1).getReg(); + unsigned ptrB = MI->getOperand(2).getReg(); + unsigned incr = MI->getOperand(3).getReg(); + DebugLoc dl = MI->getDebugLoc(); + + MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); + F->insert(It, loopMBB); + F->insert(It, exitMBB); + exitMBB->transferSuccessors(BB); + + MachineRegisterInfo &RegInfo = F->getRegInfo(); + unsigned TmpReg = (!BinOpcode) ? incr : + RegInfo.createVirtualRegister( + is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass : + (const TargetRegisterClass *) &PPC::GPRCRegClass); + + // thisMBB: + // ... + // fallthrough --> loopMBB + BB->addSuccessor(loopMBB); + + // loopMBB: + // l[wd]arx dest, ptr + // add r0, dest, incr + // st[wd]cx. r0, ptr + // bne- loopMBB + // fallthrough --> exitMBB + BB = loopMBB; + BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest) + .addReg(ptrA).addReg(ptrB); + if (BinOpcode) + BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest); + BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX)) + .addReg(TmpReg).addReg(ptrA).addReg(ptrB); + BuildMI(BB, dl, TII->get(PPC::BCC)) + .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB); + BB->addSuccessor(loopMBB); + BB->addSuccessor(exitMBB); + + // exitMBB: + // ... + BB = exitMBB; + return BB; +} + +MachineBasicBlock * +PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI, + MachineBasicBlock *BB, + bool is8bit, // operation + unsigned BinOpcode) const { + // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + // In 64 bit mode we have to use 64 bits for addresses, even though the + // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address + // registers without caring whether they're 32 or 64, but here we're + // doing actual arithmetic on the addresses. + bool is64bit = PPCSubTarget.isPPC64(); + + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction *F = BB->getParent(); + MachineFunction::iterator It = BB; + ++It; + + unsigned dest = MI->getOperand(0).getReg(); + unsigned ptrA = MI->getOperand(1).getReg(); + unsigned ptrB = MI->getOperand(2).getReg(); + unsigned incr = MI->getOperand(3).getReg(); + DebugLoc dl = MI->getDebugLoc(); + + MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); + F->insert(It, loopMBB); + F->insert(It, exitMBB); + exitMBB->transferSuccessors(BB); + + MachineRegisterInfo &RegInfo = F->getRegInfo(); + const TargetRegisterClass *RC = + is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass : + (const TargetRegisterClass *) &PPC::GPRCRegClass; + unsigned PtrReg = RegInfo.createVirtualRegister(RC); + unsigned Shift1Reg = RegInfo.createVirtualRegister(RC); + unsigned ShiftReg = RegInfo.createVirtualRegister(RC); + unsigned Incr2Reg = RegInfo.createVirtualRegister(RC); + unsigned MaskReg = RegInfo.createVirtualRegister(RC); + unsigned Mask2Reg = RegInfo.createVirtualRegister(RC); + unsigned Mask3Reg = RegInfo.createVirtualRegister(RC); + unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC); + unsigned Tmp3Reg = RegInfo.createVirtualRegister(RC); + unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC); + unsigned TmpDestReg = RegInfo.createVirtualRegister(RC); + unsigned Ptr1Reg; + unsigned TmpReg = (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(RC); + + // thisMBB: + // ... + // fallthrough --> loopMBB + BB->addSuccessor(loopMBB); + + // The 4-byte load must be aligned, while a char or short may be + // anywhere in the word. Hence all this nasty bookkeeping code. + // add ptr1, ptrA, ptrB [copy if ptrA==0] + // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27] + // xori shift, shift1, 24 [16] + // rlwinm ptr, ptr1, 0, 0, 29 + // slw incr2, incr, shift + // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535] + // slw mask, mask2, shift + // loopMBB: + // lwarx tmpDest, ptr + // add tmp, tmpDest, incr2 + // andc tmp2, tmpDest, mask + // and tmp3, tmp, mask + // or tmp4, tmp3, tmp2 + // stwcx. tmp4, ptr + // bne- loopMBB + // fallthrough --> exitMBB + // srw dest, tmpDest, shift + + if (ptrA!=PPC::R0) { + Ptr1Reg = RegInfo.createVirtualRegister(RC); + BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg) + .addReg(ptrA).addReg(ptrB); + } else { + Ptr1Reg = ptrB; + } + BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg) + .addImm(3).addImm(27).addImm(is8bit ? 28 : 27); + BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg) + .addReg(Shift1Reg).addImm(is8bit ? 24 : 16); + if (is64bit) + BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg) + .addReg(Ptr1Reg).addImm(0).addImm(61); + else + BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg) + .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29); + BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg) + .addReg(incr).addReg(ShiftReg); + if (is8bit) + BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255); + else { + BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0); + BuildMI(BB, dl, TII->get(PPC::ORI),Mask2Reg).addReg(Mask3Reg).addImm(65535); + } + BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg) + .addReg(Mask2Reg).addReg(ShiftReg); + + BB = loopMBB; + BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg) + .addReg(PPC::R0).addReg(PtrReg); + if (BinOpcode) + BuildMI(BB, dl, TII->get(BinOpcode), TmpReg) + .addReg(Incr2Reg).addReg(TmpDestReg); + BuildMI(BB, dl, TII->get(is64bit ? PPC::ANDC8 : PPC::ANDC), Tmp2Reg) + .addReg(TmpDestReg).addReg(MaskReg); + BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), Tmp3Reg) + .addReg(TmpReg).addReg(MaskReg); + BuildMI(BB, dl, TII->get(is64bit ? PPC::OR8 : PPC::OR), Tmp4Reg) + .addReg(Tmp3Reg).addReg(Tmp2Reg); + BuildMI(BB, dl, TII->get(PPC::STWCX)) + .addReg(Tmp4Reg).addReg(PPC::R0).addReg(PtrReg); + BuildMI(BB, dl, TII->get(PPC::BCC)) + .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB); + BB->addSuccessor(loopMBB); + BB->addSuccessor(exitMBB); + + // exitMBB: + // ... + BB = exitMBB; + BuildMI(BB, dl, TII->get(PPC::SRW), dest).addReg(TmpDestReg).addReg(ShiftReg); + return BB; +} + +MachineBasicBlock * +PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *BB) const { + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + + // To "insert" these instructions we actually have to insert their + // control-flow patterns. + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction::iterator It = BB; + ++It; + + MachineFunction *F = BB->getParent(); + + if (MI->getOpcode() == PPC::SELECT_CC_I4 || + MI->getOpcode() == PPC::SELECT_CC_I8 || + MI->getOpcode() == PPC::SELECT_CC_F4 || + MI->getOpcode() == PPC::SELECT_CC_F8 || + MI->getOpcode() == PPC::SELECT_CC_VRRC) { + + // The incoming instruction knows the destination vreg to set, the + // condition code register to branch on, the true/false values to + // select between, and a branch opcode to use. + + // thisMBB: + // ... + // TrueVal = ... + // cmpTY ccX, r1, r2 + // bCC copy1MBB + // fallthrough --> copy0MBB + MachineBasicBlock *thisMBB = BB; + MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); + unsigned SelectPred = MI->getOperand(4).getImm(); + DebugLoc dl = MI->getDebugLoc(); + BuildMI(BB, dl, TII->get(PPC::BCC)) + .addImm(SelectPred).addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB); + F->insert(It, copy0MBB); + F->insert(It, sinkMBB); + // Update machine-CFG edges by transferring all successors of the current + // block to the new block which will contain the Phi node for the select. + sinkMBB->transferSuccessors(BB); + // Next, add the true and fallthrough blocks as its successors. + BB->addSuccessor(copy0MBB); + BB->addSuccessor(sinkMBB); + + // copy0MBB: + // %FalseValue = ... + // # fallthrough to sinkMBB + BB = copy0MBB; + + // Update machine-CFG edges + BB->addSuccessor(sinkMBB); + + // sinkMBB: + // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] + // ... + BB = sinkMBB; + BuildMI(BB, dl, TII->get(PPC::PHI), MI->getOperand(0).getReg()) + .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB) + .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); + } + else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I8) + BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4); + else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I16) + BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4); + else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I32) + BB = EmitAtomicBinary(MI, BB, false, PPC::ADD4); + else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I64) + BB = EmitAtomicBinary(MI, BB, true, PPC::ADD8); + + else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I8) + BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND); + else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I16) + BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND); + else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I32) + BB = EmitAtomicBinary(MI, BB, false, PPC::AND); + else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I64) + BB = EmitAtomicBinary(MI, BB, true, PPC::AND8); + + else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I8) + BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR); + else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I16) + BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR); + else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I32) + BB = EmitAtomicBinary(MI, BB, false, PPC::OR); + else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I64) + BB = EmitAtomicBinary(MI, BB, true, PPC::OR8); + + else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I8) + BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR); + else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I16) + BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR); + else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I32) + BB = EmitAtomicBinary(MI, BB, false, PPC::XOR); + else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I64) + BB = EmitAtomicBinary(MI, BB, true, PPC::XOR8); + + else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I8) + BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ANDC); + else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I16) + BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ANDC); + else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I32) + BB = EmitAtomicBinary(MI, BB, false, PPC::ANDC); + else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I64) + BB = EmitAtomicBinary(MI, BB, true, PPC::ANDC8); + + else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I8) + BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF); + else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I16) + BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF); + else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I32) + BB = EmitAtomicBinary(MI, BB, false, PPC::SUBF); + else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I64) + BB = EmitAtomicBinary(MI, BB, true, PPC::SUBF8); + + else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I8) + BB = EmitPartwordAtomicBinary(MI, BB, true, 0); + else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I16) + BB = EmitPartwordAtomicBinary(MI, BB, false, 0); + else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I32) + BB = EmitAtomicBinary(MI, BB, false, 0); + else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I64) + BB = EmitAtomicBinary(MI, BB, true, 0); + + else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 || + MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64) { + bool is64bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64; + + unsigned dest = MI->getOperand(0).getReg(); + unsigned ptrA = MI->getOperand(1).getReg(); + unsigned ptrB = MI->getOperand(2).getReg(); + unsigned oldval = MI->getOperand(3).getReg(); + unsigned newval = MI->getOperand(4).getReg(); + DebugLoc dl = MI->getDebugLoc(); + + MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); + F->insert(It, loop1MBB); + F->insert(It, loop2MBB); + F->insert(It, midMBB); + F->insert(It, exitMBB); + exitMBB->transferSuccessors(BB); + + // thisMBB: + // ... + // fallthrough --> loopMBB + BB->addSuccessor(loop1MBB); + + // loop1MBB: + // l[wd]arx dest, ptr + // cmp[wd] dest, oldval + // bne- midMBB + // loop2MBB: + // st[wd]cx. newval, ptr + // bne- loopMBB + // b exitBB + // midMBB: + // st[wd]cx. dest, ptr + // exitBB: + BB = loop1MBB; + BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest) + .addReg(ptrA).addReg(ptrB); + BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0) + .addReg(oldval).addReg(dest); + BuildMI(BB, dl, TII->get(PPC::BCC)) + .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB); + BB->addSuccessor(loop2MBB); + BB->addSuccessor(midMBB); + + BB = loop2MBB; + BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX)) + .addReg(newval).addReg(ptrA).addReg(ptrB); + BuildMI(BB, dl, TII->get(PPC::BCC)) + .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB); + BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB); + BB->addSuccessor(loop1MBB); + BB->addSuccessor(exitMBB); + + BB = midMBB; + BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX)) + .addReg(dest).addReg(ptrA).addReg(ptrB); + BB->addSuccessor(exitMBB); + + // exitMBB: + // ... + BB = exitMBB; + } else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 || + MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) { + // We must use 64-bit registers for addresses when targeting 64-bit, + // since we're actually doing arithmetic on them. Other registers + // can be 32-bit. + bool is64bit = PPCSubTarget.isPPC64(); + bool is8bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8; + + unsigned dest = MI->getOperand(0).getReg(); + unsigned ptrA = MI->getOperand(1).getReg(); + unsigned ptrB = MI->getOperand(2).getReg(); + unsigned oldval = MI->getOperand(3).getReg(); + unsigned newval = MI->getOperand(4).getReg(); + DebugLoc dl = MI->getDebugLoc(); + + MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); + F->insert(It, loop1MBB); + F->insert(It, loop2MBB); + F->insert(It, midMBB); + F->insert(It, exitMBB); + exitMBB->transferSuccessors(BB); + + MachineRegisterInfo &RegInfo = F->getRegInfo(); + const TargetRegisterClass *RC = + is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass : + (const TargetRegisterClass *) &PPC::GPRCRegClass; + unsigned PtrReg = RegInfo.createVirtualRegister(RC); + unsigned Shift1Reg = RegInfo.createVirtualRegister(RC); + unsigned ShiftReg = RegInfo.createVirtualRegister(RC); + unsigned NewVal2Reg = RegInfo.createVirtualRegister(RC); + unsigned NewVal3Reg = RegInfo.createVirtualRegister(RC); + unsigned OldVal2Reg = RegInfo.createVirtualRegister(RC); + unsigned OldVal3Reg = RegInfo.createVirtualRegister(RC); + unsigned MaskReg = RegInfo.createVirtualRegister(RC); + unsigned Mask2Reg = RegInfo.createVirtualRegister(RC); + unsigned Mask3Reg = RegInfo.createVirtualRegister(RC); + unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC); + unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC); + unsigned TmpDestReg = RegInfo.createVirtualRegister(RC); + unsigned Ptr1Reg; + unsigned TmpReg = RegInfo.createVirtualRegister(RC); + // thisMBB: + // ... + // fallthrough --> loopMBB + BB->addSuccessor(loop1MBB); + + // The 4-byte load must be aligned, while a char or short may be + // anywhere in the word. Hence all this nasty bookkeeping code. + // add ptr1, ptrA, ptrB [copy if ptrA==0] + // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27] + // xori shift, shift1, 24 [16] + // rlwinm ptr, ptr1, 0, 0, 29 + // slw newval2, newval, shift + // slw oldval2, oldval,shift + // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535] + // slw mask, mask2, shift + // and newval3, newval2, mask + // and oldval3, oldval2, mask + // loop1MBB: + // lwarx tmpDest, ptr + // and tmp, tmpDest, mask + // cmpw tmp, oldval3 + // bne- midMBB + // loop2MBB: + // andc tmp2, tmpDest, mask + // or tmp4, tmp2, newval3 + // stwcx. tmp4, ptr + // bne- loop1MBB + // b exitBB + // midMBB: + // stwcx. tmpDest, ptr + // exitBB: + // srw dest, tmpDest, shift + if (ptrA!=PPC::R0) { + Ptr1Reg = RegInfo.createVirtualRegister(RC); + BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg) + .addReg(ptrA).addReg(ptrB); + } else { + Ptr1Reg = ptrB; + } + BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg) + .addImm(3).addImm(27).addImm(is8bit ? 28 : 27); + BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg) + .addReg(Shift1Reg).addImm(is8bit ? 24 : 16); + if (is64bit) + BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg) + .addReg(Ptr1Reg).addImm(0).addImm(61); + else + BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg) + .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29); + BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg) + .addReg(newval).addReg(ShiftReg); + BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg) + .addReg(oldval).addReg(ShiftReg); + if (is8bit) + BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255); + else { + BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0); + BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg) + .addReg(Mask3Reg).addImm(65535); + } + BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg) + .addReg(Mask2Reg).addReg(ShiftReg); + BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg) + .addReg(NewVal2Reg).addReg(MaskReg); + BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg) + .addReg(OldVal2Reg).addReg(MaskReg); + + BB = loop1MBB; + BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg) + .addReg(PPC::R0).addReg(PtrReg); + BuildMI(BB, dl, TII->get(PPC::AND),TmpReg) + .addReg(TmpDestReg).addReg(MaskReg); + BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0) + .addReg(TmpReg).addReg(OldVal3Reg); + BuildMI(BB, dl, TII->get(PPC::BCC)) + .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB); + BB->addSuccessor(loop2MBB); + BB->addSuccessor(midMBB); + + BB = loop2MBB; + BuildMI(BB, dl, TII->get(PPC::ANDC),Tmp2Reg) + .addReg(TmpDestReg).addReg(MaskReg); + BuildMI(BB, dl, TII->get(PPC::OR),Tmp4Reg) + .addReg(Tmp2Reg).addReg(NewVal3Reg); + BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(Tmp4Reg) + .addReg(PPC::R0).addReg(PtrReg); + BuildMI(BB, dl, TII->get(PPC::BCC)) + .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB); + BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB); + BB->addSuccessor(loop1MBB); + BB->addSuccessor(exitMBB); + + BB = midMBB; + BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(TmpDestReg) + .addReg(PPC::R0).addReg(PtrReg); + BB->addSuccessor(exitMBB); + + // exitMBB: + // ... + BB = exitMBB; + BuildMI(BB, dl, TII->get(PPC::SRW),dest).addReg(TmpReg).addReg(ShiftReg); + } else { + assert(0 && "Unexpected instr type to insert"); + } + + F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. + return BB; +} + +//===----------------------------------------------------------------------===// +// Target Optimization Hooks +//===----------------------------------------------------------------------===// + +SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, + DAGCombinerInfo &DCI) const { + TargetMachine &TM = getTargetMachine(); + SelectionDAG &DAG = DCI.DAG; + DebugLoc dl = N->getDebugLoc(); + switch (N->getOpcode()) { + default: break; + case PPCISD::SHL: + if (ConstantSDNode *C = dyn_cast(N->getOperand(0))) { + if (C->getZExtValue() == 0) // 0 << V -> 0. + return N->getOperand(0); + } + break; + case PPCISD::SRL: + if (ConstantSDNode *C = dyn_cast(N->getOperand(0))) { + if (C->getZExtValue() == 0) // 0 >>u V -> 0. + return N->getOperand(0); + } + break; + case PPCISD::SRA: + if (ConstantSDNode *C = dyn_cast(N->getOperand(0))) { + if (C->getZExtValue() == 0 || // 0 >>s V -> 0. + C->isAllOnesValue()) // -1 >>s V -> -1. + return N->getOperand(0); + } + break; + + case ISD::SINT_TO_FP: + if (TM.getSubtarget().has64BitSupport()) { + if (N->getOperand(0).getOpcode() == ISD::FP_TO_SINT) { + // Turn (sint_to_fp (fp_to_sint X)) -> fctidz/fcfid without load/stores. + // We allow the src/dst to be either f32/f64, but the intermediate + // type must be i64. + if (N->getOperand(0).getValueType() == MVT::i64 && + N->getOperand(0).getOperand(0).getValueType() != MVT::ppcf128) { + SDValue Val = N->getOperand(0).getOperand(0); + if (Val.getValueType() == MVT::f32) { + Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val); + DCI.AddToWorklist(Val.getNode()); + } + + Val = DAG.getNode(PPCISD::FCTIDZ, dl, MVT::f64, Val); + DCI.AddToWorklist(Val.getNode()); + Val = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Val); + DCI.AddToWorklist(Val.getNode()); + if (N->getValueType(0) == MVT::f32) { + Val = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Val, + DAG.getIntPtrConstant(0)); + DCI.AddToWorklist(Val.getNode()); + } + return Val; + } else if (N->getOperand(0).getValueType() == MVT::i32) { + // If the intermediate type is i32, we can avoid the load/store here + // too. + } + } + } + break; + case ISD::STORE: + // Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)). + if (TM.getSubtarget().hasSTFIWX() && + !cast(N)->isTruncatingStore() && + N->getOperand(1).getOpcode() == ISD::FP_TO_SINT && + N->getOperand(1).getValueType() == MVT::i32 && + N->getOperand(1).getOperand(0).getValueType() != MVT::ppcf128) { + SDValue Val = N->getOperand(1).getOperand(0); + if (Val.getValueType() == MVT::f32) { + Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val); + DCI.AddToWorklist(Val.getNode()); + } + Val = DAG.getNode(PPCISD::FCTIWZ, dl, MVT::f64, Val); + DCI.AddToWorklist(Val.getNode()); + + Val = DAG.getNode(PPCISD::STFIWX, dl, MVT::Other, N->getOperand(0), Val, + N->getOperand(2), N->getOperand(3)); + DCI.AddToWorklist(Val.getNode()); + return Val; + } + + // Turn STORE (BSWAP) -> sthbrx/stwbrx. + if (N->getOperand(1).getOpcode() == ISD::BSWAP && + N->getOperand(1).getNode()->hasOneUse() && + (N->getOperand(1).getValueType() == MVT::i32 || + N->getOperand(1).getValueType() == MVT::i16)) { + SDValue BSwapOp = N->getOperand(1).getOperand(0); + // Do an any-extend to 32-bits if this is a half-word input. + if (BSwapOp.getValueType() == MVT::i16) + BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp); + + return DAG.getNode(PPCISD::STBRX, dl, MVT::Other, N->getOperand(0), + BSwapOp, N->getOperand(2), N->getOperand(3), + DAG.getValueType(N->getOperand(1).getValueType())); + } + break; + case ISD::BSWAP: + // Turn BSWAP (LOAD) -> lhbrx/lwbrx. + if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) && + N->getOperand(0).hasOneUse() && + (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16)) { + SDValue Load = N->getOperand(0); + LoadSDNode *LD = cast(Load); + // Create the byte-swapping load. + std::vector VTs; + VTs.push_back(MVT::i32); + VTs.push_back(MVT::Other); + SDValue MO = DAG.getMemOperand(LD->getMemOperand()); + SDValue Ops[] = { + LD->getChain(), // Chain + LD->getBasePtr(), // Ptr + MO, // MemOperand + DAG.getValueType(N->getValueType(0)) // VT + }; + SDValue BSLoad = DAG.getNode(PPCISD::LBRX, dl, VTs, Ops, 4); + + // If this is an i16 load, insert the truncate. + SDValue ResVal = BSLoad; + if (N->getValueType(0) == MVT::i16) + ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad); + + // First, combine the bswap away. This makes the value produced by the + // load dead. + DCI.CombineTo(N, ResVal); + + // Next, combine the load away, we give it a bogus result value but a real + // chain result. The result value is dead because the bswap is dead. + DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1)); + + // Return N so it doesn't get rechecked! + return SDValue(N, 0); + } + + break; + case PPCISD::VCMP: { + // If a VCMPo node already exists with exactly the same operands as this + // node, use its result instead of this node (VCMPo computes both a CR6 and + // a normal output). + // + if (!N->getOperand(0).hasOneUse() && + !N->getOperand(1).hasOneUse() && + !N->getOperand(2).hasOneUse()) { + + // Scan all of the users of the LHS, looking for VCMPo's that match. + SDNode *VCMPoNode = 0; + + SDNode *LHSN = N->getOperand(0).getNode(); + for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end(); + UI != E; ++UI) + if (UI->getOpcode() == PPCISD::VCMPo && + UI->getOperand(1) == N->getOperand(1) && + UI->getOperand(2) == N->getOperand(2) && + UI->getOperand(0) == N->getOperand(0)) { + VCMPoNode = *UI; + break; + } + + // If there is no VCMPo node, or if the flag value has a single use, don't + // transform this. + if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1)) + break; + + // Look at the (necessarily single) use of the flag value. If it has a + // chain, this transformation is more complex. Note that multiple things + // could use the value result, which we should ignore. + SDNode *FlagUser = 0; + for (SDNode::use_iterator UI = VCMPoNode->use_begin(); + FlagUser == 0; ++UI) { + assert(UI != VCMPoNode->use_end() && "Didn't find user!"); + SDNode *User = *UI; + for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) { + if (User->getOperand(i) == SDValue(VCMPoNode, 1)) { + FlagUser = User; + break; + } + } + } + + // If the user is a MFCR instruction, we know this is safe. Otherwise we + // give up for right now. + if (FlagUser->getOpcode() == PPCISD::MFCR) + return SDValue(VCMPoNode, 0); + } + break; + } + case ISD::BR_CC: { + // If this is a branch on an altivec predicate comparison, lower this so + // that we don't have to do a MFCR: instead, branch directly on CR6. This + // lowering is done pre-legalize, because the legalizer lowers the predicate + // compare down to code that is difficult to reassemble. + ISD::CondCode CC = cast(N->getOperand(1))->get(); + SDValue LHS = N->getOperand(2), RHS = N->getOperand(3); + int CompareOpc; + bool isDot; + + if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN && + isa(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) && + getAltivecCompareInfo(LHS, CompareOpc, isDot)) { + assert(isDot && "Can't compare against a vector result!"); + + // If this is a comparison against something other than 0/1, then we know + // that the condition is never/always true. + unsigned Val = cast(RHS)->getZExtValue(); + if (Val != 0 && Val != 1) { + if (CC == ISD::SETEQ) // Cond never true, remove branch. + return N->getOperand(0); + // Always !=, turn it into an unconditional branch. + return DAG.getNode(ISD::BR, dl, MVT::Other, + N->getOperand(0), N->getOperand(4)); + } + + bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0); + + // Create the PPCISD altivec 'dot' comparison node. + std::vector VTs; + SDValue Ops[] = { + LHS.getOperand(2), // LHS of compare + LHS.getOperand(3), // RHS of compare + DAG.getConstant(CompareOpc, MVT::i32) + }; + VTs.push_back(LHS.getOperand(2).getValueType()); + VTs.push_back(MVT::Flag); + SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3); + + // Unpack the result based on how the target uses it. + PPC::Predicate CompOpc; + switch (cast(LHS.getOperand(1))->getZExtValue()) { + default: // Can't happen, don't crash on invalid number though. + case 0: // Branch on the value of the EQ bit of CR6. + CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE; + break; + case 1: // Branch on the inverted value of the EQ bit of CR6. + CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ; + break; + case 2: // Branch on the value of the LT bit of CR6. + CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE; + break; + case 3: // Branch on the inverted value of the LT bit of CR6. + CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT; + break; + } + + return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0), + DAG.getConstant(CompOpc, MVT::i32), + DAG.getRegister(PPC::CR6, MVT::i32), + N->getOperand(4), CompNode.getValue(1)); + } + break; + } + } + + return SDValue(); +} + +//===----------------------------------------------------------------------===// +// Inline Assembly Support +//===----------------------------------------------------------------------===// + +void PPCTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, + const APInt &Mask, + APInt &KnownZero, + APInt &KnownOne, + const SelectionDAG &DAG, + unsigned Depth) const { + KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); + switch (Op.getOpcode()) { + default: break; + case PPCISD::LBRX: { + // lhbrx is known to have the top bits cleared out. + if (cast(Op.getOperand(3))->getVT() == MVT::i16) + KnownZero = 0xFFFF0000; + break; + } + case ISD::INTRINSIC_WO_CHAIN: { + switch (cast(Op.getOperand(0))->getZExtValue()) { + default: break; + case Intrinsic::ppc_altivec_vcmpbfp_p: + case Intrinsic::ppc_altivec_vcmpeqfp_p: + case Intrinsic::ppc_altivec_vcmpequb_p: + case Intrinsic::ppc_altivec_vcmpequh_p: + case Intrinsic::ppc_altivec_vcmpequw_p: + case Intrinsic::ppc_altivec_vcmpgefp_p: + case Intrinsic::ppc_altivec_vcmpgtfp_p: + case Intrinsic::ppc_altivec_vcmpgtsb_p: + case Intrinsic::ppc_altivec_vcmpgtsh_p: + case Intrinsic::ppc_altivec_vcmpgtsw_p: + case Intrinsic::ppc_altivec_vcmpgtub_p: + case Intrinsic::ppc_altivec_vcmpgtuh_p: + case Intrinsic::ppc_altivec_vcmpgtuw_p: + KnownZero = ~1U; // All bits but the low one are known to be zero. + break; + } + } + } +} + + +/// getConstraintType - Given a constraint, return the type of +/// constraint it is for this target. +PPCTargetLowering::ConstraintType +PPCTargetLowering::getConstraintType(const std::string &Constraint) const { + if (Constraint.size() == 1) { + switch (Constraint[0]) { + default: break; + case 'b': + case 'r': + case 'f': + case 'v': + case 'y': + return C_RegisterClass; + } + } + return TargetLowering::getConstraintType(Constraint); +} + +std::pair +PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, + MVT VT) const { + if (Constraint.size() == 1) { + // GCC RS6000 Constraint Letters + switch (Constraint[0]) { + case 'b': // R1-R31 + case 'r': // R0-R31 + if (VT == MVT::i64 && PPCSubTarget.isPPC64()) + return std::make_pair(0U, PPC::G8RCRegisterClass); + return std::make_pair(0U, PPC::GPRCRegisterClass); + case 'f': + if (VT == MVT::f32) + return std::make_pair(0U, PPC::F4RCRegisterClass); + else if (VT == MVT::f64) + return std::make_pair(0U, PPC::F8RCRegisterClass); + break; + case 'v': + return std::make_pair(0U, PPC::VRRCRegisterClass); + case 'y': // crrc + return std::make_pair(0U, PPC::CRRCRegisterClass); + } + } + + return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); +} + + +/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops +/// vector. If it is invalid, don't add anything to Ops. If hasMemory is true +/// it means one of the asm constraint of the inline asm instruction being +/// processed is 'm'. +void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op, char Letter, + bool hasMemory, + std::vector&Ops, + SelectionDAG &DAG) const { + SDValue Result(0,0); + switch (Letter) { + default: break; + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': { + ConstantSDNode *CST = dyn_cast(Op); + if (!CST) return; // Must be an immediate to match. + unsigned Value = CST->getZExtValue(); + switch (Letter) { + default: assert(0 && "Unknown constraint letter!"); + case 'I': // "I" is a signed 16-bit constant. + if ((short)Value == (int)Value) + Result = DAG.getTargetConstant(Value, Op.getValueType()); + break; + case 'J': // "J" is a constant with only the high-order 16 bits nonzero. + case 'L': // "L" is a signed 16-bit constant shifted left 16 bits. + if ((short)Value == 0) + Result = DAG.getTargetConstant(Value, Op.getValueType()); + break; + case 'K': // "K" is a constant with only the low-order 16 bits nonzero. + if ((Value >> 16) == 0) + Result = DAG.getTargetConstant(Value, Op.getValueType()); + break; + case 'M': // "M" is a constant that is greater than 31. + if (Value > 31) + Result = DAG.getTargetConstant(Value, Op.getValueType()); + break; + case 'N': // "N" is a positive constant that is an exact power of two. + if ((int)Value > 0 && isPowerOf2_32(Value)) + Result = DAG.getTargetConstant(Value, Op.getValueType()); + break; + case 'O': // "O" is the constant zero. + if (Value == 0) + Result = DAG.getTargetConstant(Value, Op.getValueType()); + break; + case 'P': // "P" is a constant whose negation is a signed 16-bit constant. + if ((short)-Value == (int)-Value) + Result = DAG.getTargetConstant(Value, Op.getValueType()); + break; + } + break; + } + } + + if (Result.getNode()) { + Ops.push_back(Result); + return; + } + + // Handle standard constraint letters. + TargetLowering::LowerAsmOperandForConstraint(Op, Letter, hasMemory, Ops, DAG); +} + +// isLegalAddressingMode - Return true if the addressing mode represented +// by AM is legal for this target, for a load/store of the specified type. +bool PPCTargetLowering::isLegalAddressingMode(const AddrMode &AM, + const Type *Ty) const { + // FIXME: PPC does not allow r+i addressing modes for vectors! + + // PPC allows a sign-extended 16-bit immediate field. + if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1) + return false; + + // No global is ever allowed as a base. + if (AM.BaseGV) + return false; + + // PPC only support r+r, + switch (AM.Scale) { + case 0: // "r+i" or just "i", depending on HasBaseReg. + break; + case 1: + if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed. + return false; + // Otherwise we have r+r or r+i. + break; + case 2: + if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed. + return false; + // Allow 2*r as r+r. + break; + default: + // No other scales are supported. + return false; + } + + return true; +} + +/// isLegalAddressImmediate - Return true if the integer value can be used +/// as the offset of the target addressing mode for load / store of the +/// given type. +bool PPCTargetLowering::isLegalAddressImmediate(int64_t V,const Type *Ty) const{ + // PPC allows a sign-extended 16-bit immediate field. + return (V > -(1 << 16) && V < (1 << 16)-1); +} + +bool PPCTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const { + return false; +} + +SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) { + DebugLoc dl = Op.getDebugLoc(); + // Depths > 0 not supported yet! + if (cast(Op.getOperand(0))->getZExtValue() > 0) + return SDValue(); + + MachineFunction &MF = DAG.getMachineFunction(); + PPCFunctionInfo *FuncInfo = MF.getInfo(); + + // Just load the return address off the stack. + SDValue RetAddrFI = getReturnAddrFrameIndex(DAG); + + // Make sure the function really does not optimize away the store of the RA + // to the stack. + FuncInfo->setLRStoreRequired(); + return DAG.getLoad(getPointerTy(), dl, + DAG.getEntryNode(), RetAddrFI, NULL, 0); +} + +SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) { + DebugLoc dl = Op.getDebugLoc(); + // Depths > 0 not supported yet! + if (cast(Op.getOperand(0))->getZExtValue() > 0) + return SDValue(); + + MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + bool isPPC64 = PtrVT == MVT::i64; + + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + bool is31 = (NoFramePointerElim || MFI->hasVarSizedObjects()) + && MFI->getStackSize(); + + if (isPPC64) + return DAG.getCopyFromReg(DAG.getEntryNode(), dl, is31 ? PPC::X31 : PPC::X1, + MVT::i64); + else + return DAG.getCopyFromReg(DAG.getEntryNode(), dl, is31 ? PPC::R31 : PPC::R1, + MVT::i32); +} + +bool +PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { + // The PowerPC target isn't yet aware of offsets. + return false; +} diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h new file mode 100644 index 000000000000..79464749724e --- /dev/null +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -0,0 +1,394 @@ +//===-- PPCISelLowering.h - PPC32 DAG Lowering Interface --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interfaces that PPC uses to lower LLVM code into a +// selection DAG. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H +#define LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H + +#include "llvm/Target/TargetLowering.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "PPC.h" +#include "PPCSubtarget.h" + +namespace llvm { + namespace PPCISD { + enum NodeType { + // Start the numbering where the builtin ops and target ops leave off. + FIRST_NUMBER = ISD::BUILTIN_OP_END, + + /// FSEL - Traditional three-operand fsel node. + /// + FSEL, + + /// FCFID - The FCFID instruction, taking an f64 operand and producing + /// and f64 value containing the FP representation of the integer that + /// was temporarily in the f64 operand. + FCFID, + + /// FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64 + /// operand, producing an f64 value containing the integer representation + /// of that FP value. + FCTIDZ, FCTIWZ, + + /// STFIWX - The STFIWX instruction. The first operand is an input token + /// chain, then an f64 value to store, then an address to store it to, + /// then a SRCVALUE for the address. + STFIWX, + + // VMADDFP, VNMSUBFP - The VMADDFP and VNMSUBFP instructions, taking + // three v4f32 operands and producing a v4f32 result. + VMADDFP, VNMSUBFP, + + /// VPERM - The PPC VPERM Instruction. + /// + VPERM, + + /// Hi/Lo - These represent the high and low 16-bit parts of a global + /// address respectively. These nodes have two operands, the first of + /// which must be a TargetGlobalAddress, and the second of which must be a + /// Constant. Selected naively, these turn into 'lis G+C' and 'li G+C', + /// though these are usually folded into other nodes. + Hi, Lo, + + /// OPRC, CHAIN = DYNALLOC(CHAIN, NEGSIZE, FRAME_INDEX) + /// This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to + /// compute an allocation on the stack. + DYNALLOC, + + /// GlobalBaseReg - On Darwin, this node represents the result of the mflr + /// at function entry, used for PIC code. + GlobalBaseReg, + + /// These nodes represent the 32-bit PPC shifts that operate on 6-bit + /// shift amounts. These nodes are generated by the multi-precision shift + /// code. + SRL, SRA, SHL, + + /// EXTSW_32 - This is the EXTSW instruction for use with "32-bit" + /// registers. + EXTSW_32, + + /// STD_32 - This is the STD instruction for use with "32-bit" registers. + STD_32, + + /// CALL - A direct function call. + CALL_Macho, CALL_ELF, + + /// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a + /// MTCTR instruction. + MTCTR, + + /// CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a + /// BCTRL instruction. + BCTRL_Macho, BCTRL_ELF, + + /// Return with a flag operand, matched by 'blr' + RET_FLAG, + + /// R32 = MFCR(CRREG, INFLAG) - Represents the MFCR/MFOCRF instructions. + /// This copies the bits corresponding to the specified CRREG into the + /// resultant GPR. Bits corresponding to other CR regs are undefined. + MFCR, + + /// RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP* + /// instructions. For lack of better number, we use the opcode number + /// encoding for the OPC field to identify the compare. For example, 838 + /// is VCMPGTSH. + VCMP, + + /// RESVEC, OUTFLAG = VCMPo(LHS, RHS, OPC) - Represents one of the + /// altivec VCMP*o instructions. For lack of better number, we use the + /// opcode number encoding for the OPC field to identify the compare. For + /// example, 838 is VCMPGTSH. + VCMPo, + + /// CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This + /// corresponds to the COND_BRANCH pseudo instruction. CRRC is the + /// condition register to branch on, OPC is the branch opcode to use (e.g. + /// PPC::BLE), DESTBB is the destination block to branch to, and INFLAG is + /// an optional input flag argument. + COND_BRANCH, + + /// CHAIN = STBRX CHAIN, GPRC, Ptr, SRCVALUE, Type - This is a + /// byte-swapping store instruction. It byte-swaps the low "Type" bits of + /// the GPRC input, then stores it through Ptr. Type can be either i16 or + /// i32. + STBRX, + + /// GPRC, CHAIN = LBRX CHAIN, Ptr, SRCVALUE, Type - This is a + /// byte-swapping load instruction. It loads "Type" bits, byte swaps it, + /// then puts it in the bottom bits of the GPRC. TYPE can be either i16 + /// or i32. + LBRX, + + // The following 5 instructions are used only as part of the + // long double-to-int conversion sequence. + + /// OUTFLAG = MFFS F8RC - This moves the FPSCR (not modelled) into the + /// register. + MFFS, + + /// OUTFLAG = MTFSB0 INFLAG - This clears a bit in the FPSCR. + MTFSB0, + + /// OUTFLAG = MTFSB1 INFLAG - This sets a bit in the FPSCR. + MTFSB1, + + /// F8RC, OUTFLAG = FADDRTZ F8RC, F8RC, INFLAG - This is an FADD done with + /// rounding towards zero. It has flags added so it won't move past the + /// FPSCR-setting instructions. + FADDRTZ, + + /// MTFSF = F8RC, INFLAG - This moves the register into the FPSCR. + MTFSF, + + /// LARX = This corresponds to PPC l{w|d}arx instrcution: load and + /// reserve indexed. This is used to implement atomic operations. + LARX, + + /// STCX = This corresponds to PPC stcx. instrcution: store conditional + /// indexed. This is used to implement atomic operations. + STCX, + + /// TAILCALL - Indicates a tail call should be taken. + TAILCALL, + /// TC_RETURN - A tail call return. + /// operand #0 chain + /// operand #1 callee (register or absolute) + /// operand #2 stack adjustment + /// operand #3 optional in flag + TC_RETURN + }; + } + + /// Define some predicates that are used for node matching. + namespace PPC { + /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a + /// VPKUHUM instruction. + bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary); + + /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a + /// VPKUWUM instruction. + bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary); + + /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for + /// a VRGL* instruction with the specified unit size (1,2 or 4 bytes). + bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, + bool isUnary); + + /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for + /// a VRGH* instruction with the specified unit size (1,2 or 4 bytes). + bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, + bool isUnary); + + /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift + /// amount, otherwise return -1. + int isVSLDOIShuffleMask(SDNode *N, bool isUnary); + + /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand + /// specifies a splat of a single element that is suitable for input to + /// VSPLTB/VSPLTH/VSPLTW. + bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize); + + /// isAllNegativeZeroVector - Returns true if all elements of build_vector + /// are -0.0. + bool isAllNegativeZeroVector(SDNode *N); + + /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the + /// specified isSplatShuffleMask VECTOR_SHUFFLE mask. + unsigned getVSPLTImmediate(SDNode *N, unsigned EltSize); + + /// get_VSPLTI_elt - If this is a build_vector of constants which can be + /// formed by using a vspltis[bhw] instruction of the specified element + /// size, return the constant being splatted. The ByteSize field indicates + /// the number of bytes of each element [124] -> [bhw]. + SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG); + } + + class PPCTargetLowering : public TargetLowering { + int VarArgsFrameIndex; // FrameIndex for start of varargs area. + int VarArgsStackOffset; // StackOffset for start of stack + // arguments. + unsigned VarArgsNumGPR; // Index of the first unused integer + // register for parameter passing. + unsigned VarArgsNumFPR; // Index of the first unused double + // register for parameter passing. + int ReturnAddrIndex; // FrameIndex for return slot. + const PPCSubtarget &PPCSubTarget; + public: + explicit PPCTargetLowering(PPCTargetMachine &TM); + + /// getTargetNodeName() - This method returns the name of a target specific + /// DAG node. + virtual const char *getTargetNodeName(unsigned Opcode) const; + + /// getSetCCResultType - Return the ISD::SETCC ValueType + virtual MVT getSetCCResultType(MVT VT) const; + + /// getPreIndexedAddressParts - returns true by value, base pointer and + /// offset pointer and addressing mode by reference if the node's address + /// can be legally represented as pre-indexed load / store address. + virtual bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, + SDValue &Offset, + ISD::MemIndexedMode &AM, + SelectionDAG &DAG) const; + + /// SelectAddressRegReg - Given the specified addressed, check to see if it + /// can be represented as an indexed [r+r] operation. Returns false if it + /// can be more efficiently represented with [r+imm]. + bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index, + SelectionDAG &DAG) const; + + /// SelectAddressRegImm - Returns true if the address N can be represented + /// by a base register plus a signed 16-bit displacement [r+imm], and if it + /// is not better represented as reg+reg. + bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base, + SelectionDAG &DAG) const; + + /// SelectAddressRegRegOnly - Given the specified addressed, force it to be + /// represented as an indexed [r+r] operation. + bool SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index, + SelectionDAG &DAG) const; + + /// SelectAddressRegImmShift - Returns true if the address N can be + /// represented by a base register plus a signed 14-bit displacement + /// [r+imm*4]. Suitable for use by STD and friends. + bool SelectAddressRegImmShift(SDValue N, SDValue &Disp, SDValue &Base, + SelectionDAG &DAG) const; + + + /// LowerOperation - Provide custom lowering hooks for some operations. + /// + virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG); + + /// ReplaceNodeResults - Replace the results of node with an illegal result + /// type with new values built out of custom code. + /// + virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl&Results, + SelectionDAG &DAG); + + virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; + + virtual void computeMaskedBitsForTargetNode(const SDValue Op, + const APInt &Mask, + APInt &KnownZero, + APInt &KnownOne, + const SelectionDAG &DAG, + unsigned Depth = 0) const; + + virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *MBB) const; + MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI, + MachineBasicBlock *MBB, bool is64Bit, + unsigned BinOpcode) const; + MachineBasicBlock *EmitPartwordAtomicBinary(MachineInstr *MI, + MachineBasicBlock *MBB, + bool is8bit, unsigned Opcode) const; + + ConstraintType getConstraintType(const std::string &Constraint) const; + std::pair + getRegForInlineAsmConstraint(const std::string &Constraint, + MVT VT) const; + + /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate + /// function arguments in the caller parameter area. This is the actual + /// alignment, not its logarithm. + unsigned getByValTypeAlignment(const Type *Ty) const; + + /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops + /// vector. If it is invalid, don't add anything to Ops. If hasMemory is + /// true it means one of the asm constraint of the inline asm instruction + /// being processed is 'm'. + virtual void LowerAsmOperandForConstraint(SDValue Op, + char ConstraintLetter, + bool hasMemory, + std::vector &Ops, + SelectionDAG &DAG) const; + + /// isLegalAddressingMode - Return true if the addressing mode represented + /// by AM is legal for this target, for a load/store of the specified type. + virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty)const; + + /// isLegalAddressImmediate - Return true if the integer value can be used + /// as the offset of the target addressing mode for load / store of the + /// given type. + virtual bool isLegalAddressImmediate(int64_t V, const Type *Ty) const; + + /// isLegalAddressImmediate - Return true if the GlobalValue can be used as + /// the offset of the target addressing mode. + virtual bool isLegalAddressImmediate(GlobalValue *GV) const; + + /// IsEligibleForTailCallOptimization - Check whether the call is eligible + /// for tail call optimization. Target which want to do tail call + /// optimization should implement this function. + virtual bool IsEligibleForTailCallOptimization(CallSDNode *TheCall, + SDValue Ret, + SelectionDAG &DAG) const; + + virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; + + private: + SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const; + SDValue getReturnAddrFrameIndex(SelectionDAG & DAG) const; + + SDValue EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG, + int SPDiff, + SDValue Chain, + SDValue &LROpOut, + SDValue &FPOpOut, + DebugLoc dl); + + SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG); + SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG); + SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG); + SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG); + SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG); + SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG); + SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG); + SDValue LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG); + SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG, + int VarArgsFrameIndex, int VarArgsStackOffset, + unsigned VarArgsNumGPR, unsigned VarArgsNumFPR, + const PPCSubtarget &Subtarget); + SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG, int VarArgsFrameIndex, + int VarArgsStackOffset, unsigned VarArgsNumGPR, + unsigned VarArgsNumFPR, const PPCSubtarget &Subtarget); + SDValue LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, + int &VarArgsFrameIndex, + int &VarArgsStackOffset, + unsigned &VarArgsNumGPR, + unsigned &VarArgsNumFPR, + const PPCSubtarget &Subtarget); + SDValue LowerCALL(SDValue Op, SelectionDAG &DAG, + const PPCSubtarget &Subtarget, TargetMachine &TM); + SDValue LowerRET(SDValue Op, SelectionDAG &DAG, TargetMachine &TM); + SDValue LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG, + const PPCSubtarget &Subtarget); + SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG, + const PPCSubtarget &Subtarget); + SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG); + SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG, DebugLoc dl); + SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG); + SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG); + SDValue LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG); + SDValue LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG); + SDValue LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG); + SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG); + SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG); + SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG); + SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG); + SDValue LowerMUL(SDValue Op, SelectionDAG &DAG); + }; +} + +#endif // LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td new file mode 100644 index 000000000000..417c8ed6e906 --- /dev/null +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -0,0 +1,723 @@ +//===- PPCInstr64Bit.td - The PowerPC 64-bit Support -------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the PowerPC 64-bit instructions. These patterns are used +// both when in ppc64 mode and when in "use 64-bit extensions in 32-bit" mode. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// 64-bit operands. +// +def s16imm64 : Operand { + let PrintMethod = "printS16ImmOperand"; +} +def u16imm64 : Operand { + let PrintMethod = "printU16ImmOperand"; +} +def symbolHi64 : Operand { + let PrintMethod = "printSymbolHi"; +} +def symbolLo64 : Operand { + let PrintMethod = "printSymbolLo"; +} + +//===----------------------------------------------------------------------===// +// 64-bit transformation functions. +// + +def SHL64 : SDNodeXFormgetZExtValue()); +}]>; + +def SRL64 : SDNodeXFormgetZExtValue() ? getI32Imm(64 - N->getZExtValue()) : getI32Imm(0); +}]>; + +def HI32_48 : SDNodeXFormgetZExtValue() >> 32)); +}]>; + +def HI48_64 : SDNodeXFormgetZExtValue() >> 48)); +}]>; + + +//===----------------------------------------------------------------------===// +// Calls. +// + +let Defs = [LR8] in + def MovePCtoLR8 : Pseudo<(outs), (ins piclabel:$label), "bl $label", []>, + PPC970_Unit_BRU; + +// Macho ABI Calls. +let isCall = 1, PPC970_Unit = 7, + // All calls clobber the PPC64 non-callee saved registers. + Defs = [X0,X2,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12, + F0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13, + V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19, + LR8,CTR8, + CR0,CR1,CR5,CR6,CR7] in { + // Convenient aliases for call instructions + let Uses = [RM] in { + def BL8_Macho : IForm<18, 0, 1, + (outs), (ins calltarget:$func, variable_ops), + "bl $func", BrB, []>; // See Pat patterns below. + def BLA8_Macho : IForm<18, 1, 1, + (outs), (ins aaddr:$func, variable_ops), + "bla $func", BrB, [(PPCcall_Macho (i64 imm:$func))]>; + } + let Uses = [CTR8, RM] in { + def BCTRL8_Macho : XLForm_2_ext<19, 528, 20, 0, 1, + (outs), (ins variable_ops), + "bctrl", BrB, + [(PPCbctrl_Macho)]>, Requires<[In64BitMode]>; + } +} + +// ELF 64 ABI Calls = Macho ABI Calls +// Used to define BL8_ELF and BLA8_ELF +let isCall = 1, PPC970_Unit = 7, + // All calls clobber the PPC64 non-callee saved registers. + Defs = [X0,X2,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12, + F0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13, + V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19, + LR8,CTR8, + CR0,CR1,CR5,CR6,CR7] in { + // Convenient aliases for call instructions + let Uses = [RM] in { + def BL8_ELF : IForm<18, 0, 1, + (outs), (ins calltarget:$func, variable_ops), + "bl $func", BrB, []>; // See Pat patterns below. + def BLA8_ELF : IForm<18, 1, 1, + (outs), (ins aaddr:$func, variable_ops), + "bla $func", BrB, [(PPCcall_ELF (i64 imm:$func))]>; + } + let Uses = [CTR8, RM] in { + def BCTRL8_ELF : XLForm_2_ext<19, 528, 20, 0, 1, + (outs), (ins variable_ops), + "bctrl", BrB, + [(PPCbctrl_ELF)]>, Requires<[In64BitMode]>; + } +} + + +// Calls +def : Pat<(PPCcall_Macho (i64 tglobaladdr:$dst)), + (BL8_Macho tglobaladdr:$dst)>; +def : Pat<(PPCcall_Macho (i64 texternalsym:$dst)), + (BL8_Macho texternalsym:$dst)>; + +def : Pat<(PPCcall_ELF (i64 tglobaladdr:$dst)), + (BL8_ELF tglobaladdr:$dst)>; +def : Pat<(PPCcall_ELF (i64 texternalsym:$dst)), + (BL8_ELF texternalsym:$dst)>; + +// Atomic operations +let usesCustomDAGSchedInserter = 1 in { + let Uses = [CR0] in { + def ATOMIC_LOAD_ADD_I64 : Pseudo< + (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), + "${:comment} ATOMIC_LOAD_ADD_I64 PSEUDO!", + [(set G8RC:$dst, (atomic_load_add_64 xoaddr:$ptr, G8RC:$incr))]>; + def ATOMIC_LOAD_SUB_I64 : Pseudo< + (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), + "${:comment} ATOMIC_LOAD_SUB_I64 PSEUDO!", + [(set G8RC:$dst, (atomic_load_sub_64 xoaddr:$ptr, G8RC:$incr))]>; + def ATOMIC_LOAD_OR_I64 : Pseudo< + (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), + "${:comment} ATOMIC_LOAD_OR_I64 PSEUDO!", + [(set G8RC:$dst, (atomic_load_or_64 xoaddr:$ptr, G8RC:$incr))]>; + def ATOMIC_LOAD_XOR_I64 : Pseudo< + (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), + "${:comment} ATOMIC_LOAD_XOR_I64 PSEUDO!", + [(set G8RC:$dst, (atomic_load_xor_64 xoaddr:$ptr, G8RC:$incr))]>; + def ATOMIC_LOAD_AND_I64 : Pseudo< + (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), + "${:comment} ATOMIC_LOAD_AND_I64 PSEUDO!", + [(set G8RC:$dst, (atomic_load_and_64 xoaddr:$ptr, G8RC:$incr))]>; + def ATOMIC_LOAD_NAND_I64 : Pseudo< + (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), + "${:comment} ATOMIC_LOAD_NAND_I64 PSEUDO!", + [(set G8RC:$dst, (atomic_load_nand_64 xoaddr:$ptr, G8RC:$incr))]>; + + def ATOMIC_CMP_SWAP_I64 : Pseudo< + (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$old, G8RC:$new), + "${:comment} ATOMIC_CMP_SWAP_I64 PSEUDO!", + [(set G8RC:$dst, + (atomic_cmp_swap_64 xoaddr:$ptr, G8RC:$old, G8RC:$new))]>; + + def ATOMIC_SWAP_I64 : Pseudo< + (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$new), + "${:comment} ATOMIC_SWAP_I64 PSEUDO!", + [(set G8RC:$dst, (atomic_swap_64 xoaddr:$ptr, G8RC:$new))]>; + } +} + +// Instructions to support atomic operations +def LDARX : XForm_1<31, 84, (outs G8RC:$rD), (ins memrr:$ptr), + "ldarx $rD, $ptr", LdStLDARX, + [(set G8RC:$rD, (PPClarx xoaddr:$ptr))]>; + +let Defs = [CR0] in +def STDCX : XForm_1<31, 214, (outs), (ins G8RC:$rS, memrr:$dst), + "stdcx. $rS, $dst", LdStSTDCX, + [(PPCstcx G8RC:$rS, xoaddr:$dst)]>, + isDOT; + +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in +def TCRETURNdi8 :Pseudo< (outs), + (ins calltarget:$dst, i32imm:$offset, variable_ops), + "#TC_RETURNd8 $dst $offset", + []>; + +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in +def TCRETURNai8 :Pseudo<(outs), (ins aaddr:$func, i32imm:$offset, variable_ops), + "#TC_RETURNa8 $func $offset", + [(PPCtc_return (i64 imm:$func), imm:$offset)]>; + +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in +def TCRETURNri8 : Pseudo<(outs), (ins CTRRC8:$dst, i32imm:$offset, variable_ops), + "#TC_RETURNr8 $dst $offset", + []>; + + +let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7, isBranch = 1, + isIndirectBranch = 1, isCall = 1, isReturn = 1, Uses = [CTR, RM] in +def TAILBCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>, + Requires<[In64BitMode]>; + + + +let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7, + isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in +def TAILB8 : IForm<18, 0, 0, (outs), (ins calltarget:$dst), + "b $dst", BrB, + []>; + + +let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7, + isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in +def TAILBA8 : IForm<18, 0, 0, (outs), (ins aaddr:$dst), + "ba $dst", BrB, + []>; + +def : Pat<(PPCtc_return (i64 tglobaladdr:$dst), imm:$imm), + (TCRETURNdi8 tglobaladdr:$dst, imm:$imm)>; + +def : Pat<(PPCtc_return (i64 texternalsym:$dst), imm:$imm), + (TCRETURNdi8 texternalsym:$dst, imm:$imm)>; + +def : Pat<(PPCtc_return CTRRC8:$dst, imm:$imm), + (TCRETURNri8 CTRRC8:$dst, imm:$imm)>; + + +//===----------------------------------------------------------------------===// +// 64-bit SPR manipulation instrs. + +let Uses = [CTR8] in { +def MFCTR8 : XFXForm_1_ext<31, 339, 9, (outs G8RC:$rT), (ins), + "mfctr $rT", SprMFSPR>, + PPC970_DGroup_First, PPC970_Unit_FXU; +} +let Pattern = [(PPCmtctr G8RC:$rS)], Defs = [CTR8] in { +def MTCTR8 : XFXForm_7_ext<31, 467, 9, (outs), (ins G8RC:$rS), + "mtctr $rS", SprMTSPR>, + PPC970_DGroup_First, PPC970_Unit_FXU; +} + +let Defs = [X1], Uses = [X1] in +def DYNALLOC8 : Pseudo<(outs G8RC:$result), (ins G8RC:$negsize, memri:$fpsi), + "${:comment} DYNALLOC8 $result, $negsize, $fpsi", + [(set G8RC:$result, + (PPCdynalloc G8RC:$negsize, iaddr:$fpsi))]>; + +let Defs = [LR8] in { +def MTLR8 : XFXForm_7_ext<31, 467, 8, (outs), (ins G8RC:$rS), + "mtlr $rS", SprMTSPR>, + PPC970_DGroup_First, PPC970_Unit_FXU; +} +let Uses = [LR8] in { +def MFLR8 : XFXForm_1_ext<31, 339, 8, (outs G8RC:$rT), (ins), + "mflr $rT", SprMFSPR>, + PPC970_DGroup_First, PPC970_Unit_FXU; +} + +//===----------------------------------------------------------------------===// +// Fixed point instructions. +// + +let PPC970_Unit = 1 in { // FXU Operations. + +// Copies, extends, truncates. +def OR4To8 : XForm_6<31, 444, (outs G8RC:$rA), (ins GPRC:$rS, GPRC:$rB), + "or $rA, $rS, $rB", IntGeneral, + []>; +def OR8To4 : XForm_6<31, 444, (outs GPRC:$rA), (ins G8RC:$rS, G8RC:$rB), + "or $rA, $rS, $rB", IntGeneral, + []>; + +def LI8 : DForm_2_r0<14, (outs G8RC:$rD), (ins symbolLo64:$imm), + "li $rD, $imm", IntGeneral, + [(set G8RC:$rD, immSExt16:$imm)]>; +def LIS8 : DForm_2_r0<15, (outs G8RC:$rD), (ins symbolHi64:$imm), + "lis $rD, $imm", IntGeneral, + [(set G8RC:$rD, imm16ShiftedSExt:$imm)]>; + +// Logical ops. +def NAND8: XForm_6<31, 476, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB), + "nand $rA, $rS, $rB", IntGeneral, + [(set G8RC:$rA, (not (and G8RC:$rS, G8RC:$rB)))]>; +def AND8 : XForm_6<31, 28, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB), + "and $rA, $rS, $rB", IntGeneral, + [(set G8RC:$rA, (and G8RC:$rS, G8RC:$rB))]>; +def ANDC8: XForm_6<31, 60, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB), + "andc $rA, $rS, $rB", IntGeneral, + [(set G8RC:$rA, (and G8RC:$rS, (not G8RC:$rB)))]>; +def OR8 : XForm_6<31, 444, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB), + "or $rA, $rS, $rB", IntGeneral, + [(set G8RC:$rA, (or G8RC:$rS, G8RC:$rB))]>; +def NOR8 : XForm_6<31, 124, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB), + "nor $rA, $rS, $rB", IntGeneral, + [(set G8RC:$rA, (not (or G8RC:$rS, G8RC:$rB)))]>; +def ORC8 : XForm_6<31, 412, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB), + "orc $rA, $rS, $rB", IntGeneral, + [(set G8RC:$rA, (or G8RC:$rS, (not G8RC:$rB)))]>; +def EQV8 : XForm_6<31, 284, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB), + "eqv $rA, $rS, $rB", IntGeneral, + [(set G8RC:$rA, (not (xor G8RC:$rS, G8RC:$rB)))]>; +def XOR8 : XForm_6<31, 316, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB), + "xor $rA, $rS, $rB", IntGeneral, + [(set G8RC:$rA, (xor G8RC:$rS, G8RC:$rB))]>; + +// Logical ops with immediate. +def ANDIo8 : DForm_4<28, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2), + "andi. $dst, $src1, $src2", IntGeneral, + [(set G8RC:$dst, (and G8RC:$src1, immZExt16:$src2))]>, + isDOT; +def ANDISo8 : DForm_4<29, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2), + "andis. $dst, $src1, $src2", IntGeneral, + [(set G8RC:$dst, (and G8RC:$src1,imm16ShiftedZExt:$src2))]>, + isDOT; +def ORI8 : DForm_4<24, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2), + "ori $dst, $src1, $src2", IntGeneral, + [(set G8RC:$dst, (or G8RC:$src1, immZExt16:$src2))]>; +def ORIS8 : DForm_4<25, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2), + "oris $dst, $src1, $src2", IntGeneral, + [(set G8RC:$dst, (or G8RC:$src1, imm16ShiftedZExt:$src2))]>; +def XORI8 : DForm_4<26, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2), + "xori $dst, $src1, $src2", IntGeneral, + [(set G8RC:$dst, (xor G8RC:$src1, immZExt16:$src2))]>; +def XORIS8 : DForm_4<27, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2), + "xoris $dst, $src1, $src2", IntGeneral, + [(set G8RC:$dst, (xor G8RC:$src1, imm16ShiftedZExt:$src2))]>; + +def ADD8 : XOForm_1<31, 266, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), + "add $rT, $rA, $rB", IntGeneral, + [(set G8RC:$rT, (add G8RC:$rA, G8RC:$rB))]>; + +def ADDC8 : XOForm_1<31, 10, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), + "addc $rT, $rA, $rB", IntGeneral, + [(set G8RC:$rT, (addc G8RC:$rA, G8RC:$rB))]>, + PPC970_DGroup_Cracked; +def ADDE8 : XOForm_1<31, 138, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), + "adde $rT, $rA, $rB", IntGeneral, + [(set G8RC:$rT, (adde G8RC:$rA, G8RC:$rB))]>; + +def ADDI8 : DForm_2<14, (outs G8RC:$rD), (ins G8RC:$rA, s16imm64:$imm), + "addi $rD, $rA, $imm", IntGeneral, + [(set G8RC:$rD, (add G8RC:$rA, immSExt16:$imm))]>; +def ADDIS8 : DForm_2<15, (outs G8RC:$rD), (ins G8RC:$rA, symbolHi64:$imm), + "addis $rD, $rA, $imm", IntGeneral, + [(set G8RC:$rD, (add G8RC:$rA, imm16ShiftedSExt:$imm))]>; + +def SUBFIC8: DForm_2< 8, (outs G8RC:$rD), (ins G8RC:$rA, s16imm64:$imm), + "subfic $rD, $rA, $imm", IntGeneral, + [(set G8RC:$rD, (subc immSExt16:$imm, G8RC:$rA))]>; +def SUBF8 : XOForm_1<31, 40, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), + "subf $rT, $rA, $rB", IntGeneral, + [(set G8RC:$rT, (sub G8RC:$rB, G8RC:$rA))]>; +def SUBFC8 : XOForm_1<31, 8, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), + "subfc $rT, $rA, $rB", IntGeneral, + [(set G8RC:$rT, (subc G8RC:$rB, G8RC:$rA))]>, + PPC970_DGroup_Cracked; + +def SUBFE8 : XOForm_1<31, 136, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), + "subfe $rT, $rA, $rB", IntGeneral, + [(set G8RC:$rT, (sube G8RC:$rB, G8RC:$rA))]>; +def ADDME8 : XOForm_3<31, 234, 0, (outs G8RC:$rT), (ins G8RC:$rA), + "addme $rT, $rA", IntGeneral, + [(set G8RC:$rT, (adde G8RC:$rA, immAllOnes))]>; +def ADDZE8 : XOForm_3<31, 202, 0, (outs G8RC:$rT), (ins G8RC:$rA), + "addze $rT, $rA", IntGeneral, + [(set G8RC:$rT, (adde G8RC:$rA, 0))]>; +def NEG8 : XOForm_3<31, 104, 0, (outs G8RC:$rT), (ins G8RC:$rA), + "neg $rT, $rA", IntGeneral, + [(set G8RC:$rT, (ineg G8RC:$rA))]>; +def SUBFME8 : XOForm_3<31, 232, 0, (outs G8RC:$rT), (ins G8RC:$rA), + "subfme $rT, $rA", IntGeneral, + [(set G8RC:$rT, (sube immAllOnes, G8RC:$rA))]>; +def SUBFZE8 : XOForm_3<31, 200, 0, (outs G8RC:$rT), (ins G8RC:$rA), + "subfze $rT, $rA", IntGeneral, + [(set G8RC:$rT, (sube 0, G8RC:$rA))]>; + + + +def MULHD : XOForm_1<31, 73, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), + "mulhd $rT, $rA, $rB", IntMulHW, + [(set G8RC:$rT, (mulhs G8RC:$rA, G8RC:$rB))]>; +def MULHDU : XOForm_1<31, 9, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), + "mulhdu $rT, $rA, $rB", IntMulHWU, + [(set G8RC:$rT, (mulhu G8RC:$rA, G8RC:$rB))]>; + +def CMPD : XForm_16_ext<31, 0, (outs CRRC:$crD), (ins G8RC:$rA, G8RC:$rB), + "cmpd $crD, $rA, $rB", IntCompare>, isPPC64; +def CMPLD : XForm_16_ext<31, 32, (outs CRRC:$crD), (ins G8RC:$rA, G8RC:$rB), + "cmpld $crD, $rA, $rB", IntCompare>, isPPC64; +def CMPDI : DForm_5_ext<11, (outs CRRC:$crD), (ins G8RC:$rA, s16imm:$imm), + "cmpdi $crD, $rA, $imm", IntCompare>, isPPC64; +def CMPLDI : DForm_6_ext<10, (outs CRRC:$dst), (ins G8RC:$src1, u16imm:$src2), + "cmpldi $dst, $src1, $src2", IntCompare>, isPPC64; + +def SLD : XForm_6<31, 27, (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB), + "sld $rA, $rS, $rB", IntRotateD, + [(set G8RC:$rA, (PPCshl G8RC:$rS, GPRC:$rB))]>, isPPC64; +def SRD : XForm_6<31, 539, (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB), + "srd $rA, $rS, $rB", IntRotateD, + [(set G8RC:$rA, (PPCsrl G8RC:$rS, GPRC:$rB))]>, isPPC64; +def SRAD : XForm_6<31, 794, (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB), + "srad $rA, $rS, $rB", IntRotateD, + [(set G8RC:$rA, (PPCsra G8RC:$rS, GPRC:$rB))]>, isPPC64; + +def EXTSB8 : XForm_11<31, 954, (outs G8RC:$rA), (ins G8RC:$rS), + "extsb $rA, $rS", IntGeneral, + [(set G8RC:$rA, (sext_inreg G8RC:$rS, i8))]>; +def EXTSH8 : XForm_11<31, 922, (outs G8RC:$rA), (ins G8RC:$rS), + "extsh $rA, $rS", IntGeneral, + [(set G8RC:$rA, (sext_inreg G8RC:$rS, i16))]>; + +def EXTSW : XForm_11<31, 986, (outs G8RC:$rA), (ins G8RC:$rS), + "extsw $rA, $rS", IntGeneral, + [(set G8RC:$rA, (sext_inreg G8RC:$rS, i32))]>, isPPC64; +/// EXTSW_32 - Just like EXTSW, but works on '32-bit' registers. +def EXTSW_32 : XForm_11<31, 986, (outs GPRC:$rA), (ins GPRC:$rS), + "extsw $rA, $rS", IntGeneral, + [(set GPRC:$rA, (PPCextsw_32 GPRC:$rS))]>, isPPC64; +def EXTSW_32_64 : XForm_11<31, 986, (outs G8RC:$rA), (ins GPRC:$rS), + "extsw $rA, $rS", IntGeneral, + [(set G8RC:$rA, (sext GPRC:$rS))]>, isPPC64; + +def SRADI : XSForm_1<31, 413, (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH), + "sradi $rA, $rS, $SH", IntRotateD, + [(set G8RC:$rA, (sra G8RC:$rS, (i32 imm:$SH)))]>, isPPC64; +def CNTLZD : XForm_11<31, 58, (outs G8RC:$rA), (ins G8RC:$rS), + "cntlzd $rA, $rS", IntGeneral, + [(set G8RC:$rA, (ctlz G8RC:$rS))]>; + +def DIVD : XOForm_1<31, 489, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), + "divd $rT, $rA, $rB", IntDivD, + [(set G8RC:$rT, (sdiv G8RC:$rA, G8RC:$rB))]>, isPPC64, + PPC970_DGroup_First, PPC970_DGroup_Cracked; +def DIVDU : XOForm_1<31, 457, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), + "divdu $rT, $rA, $rB", IntDivD, + [(set G8RC:$rT, (udiv G8RC:$rA, G8RC:$rB))]>, isPPC64, + PPC970_DGroup_First, PPC970_DGroup_Cracked; +def MULLD : XOForm_1<31, 233, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), + "mulld $rT, $rA, $rB", IntMulHD, + [(set G8RC:$rT, (mul G8RC:$rA, G8RC:$rB))]>, isPPC64; + + +let isCommutable = 1 in { +def RLDIMI : MDForm_1<30, 3, + (outs G8RC:$rA), (ins G8RC:$rSi, G8RC:$rS, u6imm:$SH, u6imm:$MB), + "rldimi $rA, $rS, $SH, $MB", IntRotateD, + []>, isPPC64, RegConstraint<"$rSi = $rA">, + NoEncode<"$rSi">; +} + +// Rotate instructions. +def RLDCL : MDForm_1<30, 0, + (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB, u6imm:$MB), + "rldcl $rA, $rS, $rB, $MB", IntRotateD, + []>, isPPC64; +def RLDICL : MDForm_1<30, 0, + (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH, u6imm:$MB), + "rldicl $rA, $rS, $SH, $MB", IntRotateD, + []>, isPPC64; +def RLDICR : MDForm_1<30, 1, + (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH, u6imm:$ME), + "rldicr $rA, $rS, $SH, $ME", IntRotateD, + []>, isPPC64; +} // End FXU Operations. + + +//===----------------------------------------------------------------------===// +// Load/Store instructions. +// + + +// Sign extending loads. +let canFoldAsLoad = 1, PPC970_Unit = 2 in { +def LHA8: DForm_1<42, (outs G8RC:$rD), (ins memri:$src), + "lha $rD, $src", LdStLHA, + [(set G8RC:$rD, (sextloadi16 iaddr:$src))]>, + PPC970_DGroup_Cracked; +def LWA : DSForm_1<58, 2, (outs G8RC:$rD), (ins memrix:$src), + "lwa $rD, $src", LdStLWA, + [(set G8RC:$rD, (sextloadi32 ixaddr:$src))]>, isPPC64, + PPC970_DGroup_Cracked; +def LHAX8: XForm_1<31, 343, (outs G8RC:$rD), (ins memrr:$src), + "lhax $rD, $src", LdStLHA, + [(set G8RC:$rD, (sextloadi16 xaddr:$src))]>, + PPC970_DGroup_Cracked; +def LWAX : XForm_1<31, 341, (outs G8RC:$rD), (ins memrr:$src), + "lwax $rD, $src", LdStLHA, + [(set G8RC:$rD, (sextloadi32 xaddr:$src))]>, isPPC64, + PPC970_DGroup_Cracked; + +// Update forms. +let mayLoad = 1 in +def LHAU8 : DForm_1<43, (outs G8RC:$rD, ptr_rc:$ea_result), (ins symbolLo:$disp, + ptr_rc:$rA), + "lhau $rD, $disp($rA)", LdStGeneral, + []>, RegConstraint<"$rA = $ea_result">, + NoEncode<"$ea_result">; +// NO LWAU! + +} + +// Zero extending loads. +let canFoldAsLoad = 1, PPC970_Unit = 2 in { +def LBZ8 : DForm_1<34, (outs G8RC:$rD), (ins memri:$src), + "lbz $rD, $src", LdStGeneral, + [(set G8RC:$rD, (zextloadi8 iaddr:$src))]>; +def LHZ8 : DForm_1<40, (outs G8RC:$rD), (ins memri:$src), + "lhz $rD, $src", LdStGeneral, + [(set G8RC:$rD, (zextloadi16 iaddr:$src))]>; +def LWZ8 : DForm_1<32, (outs G8RC:$rD), (ins memri:$src), + "lwz $rD, $src", LdStGeneral, + [(set G8RC:$rD, (zextloadi32 iaddr:$src))]>, isPPC64; + +def LBZX8 : XForm_1<31, 87, (outs G8RC:$rD), (ins memrr:$src), + "lbzx $rD, $src", LdStGeneral, + [(set G8RC:$rD, (zextloadi8 xaddr:$src))]>; +def LHZX8 : XForm_1<31, 279, (outs G8RC:$rD), (ins memrr:$src), + "lhzx $rD, $src", LdStGeneral, + [(set G8RC:$rD, (zextloadi16 xaddr:$src))]>; +def LWZX8 : XForm_1<31, 23, (outs G8RC:$rD), (ins memrr:$src), + "lwzx $rD, $src", LdStGeneral, + [(set G8RC:$rD, (zextloadi32 xaddr:$src))]>; + + +// Update forms. +let mayLoad = 1 in { +def LBZU8 : DForm_1<35, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), + "lbzu $rD, $addr", LdStGeneral, + []>, RegConstraint<"$addr.reg = $ea_result">, + NoEncode<"$ea_result">; +def LHZU8 : DForm_1<41, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), + "lhzu $rD, $addr", LdStGeneral, + []>, RegConstraint<"$addr.reg = $ea_result">, + NoEncode<"$ea_result">; +def LWZU8 : DForm_1<33, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), + "lwzu $rD, $addr", LdStGeneral, + []>, RegConstraint<"$addr.reg = $ea_result">, + NoEncode<"$ea_result">; +} +} + + +// Full 8-byte loads. +let canFoldAsLoad = 1, PPC970_Unit = 2 in { +def LD : DSForm_1<58, 0, (outs G8RC:$rD), (ins memrix:$src), + "ld $rD, $src", LdStLD, + [(set G8RC:$rD, (load ixaddr:$src))]>, isPPC64; +def LDX : XForm_1<31, 21, (outs G8RC:$rD), (ins memrr:$src), + "ldx $rD, $src", LdStLD, + [(set G8RC:$rD, (load xaddr:$src))]>, isPPC64; + +let mayLoad = 1 in +def LDU : DSForm_1<58, 1, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memrix:$addr), + "ldu $rD, $addr", LdStLD, + []>, RegConstraint<"$addr.reg = $ea_result">, isPPC64, + NoEncode<"$ea_result">; + +} + +let PPC970_Unit = 2 in { +// Truncating stores. +def STB8 : DForm_1<38, (outs), (ins G8RC:$rS, memri:$src), + "stb $rS, $src", LdStGeneral, + [(truncstorei8 G8RC:$rS, iaddr:$src)]>; +def STH8 : DForm_1<44, (outs), (ins G8RC:$rS, memri:$src), + "sth $rS, $src", LdStGeneral, + [(truncstorei16 G8RC:$rS, iaddr:$src)]>; +def STW8 : DForm_1<36, (outs), (ins G8RC:$rS, memri:$src), + "stw $rS, $src", LdStGeneral, + [(truncstorei32 G8RC:$rS, iaddr:$src)]>; +def STBX8 : XForm_8<31, 215, (outs), (ins G8RC:$rS, memrr:$dst), + "stbx $rS, $dst", LdStGeneral, + [(truncstorei8 G8RC:$rS, xaddr:$dst)]>, + PPC970_DGroup_Cracked; +def STHX8 : XForm_8<31, 407, (outs), (ins G8RC:$rS, memrr:$dst), + "sthx $rS, $dst", LdStGeneral, + [(truncstorei16 G8RC:$rS, xaddr:$dst)]>, + PPC970_DGroup_Cracked; +def STWX8 : XForm_8<31, 151, (outs), (ins G8RC:$rS, memrr:$dst), + "stwx $rS, $dst", LdStGeneral, + [(truncstorei32 G8RC:$rS, xaddr:$dst)]>, + PPC970_DGroup_Cracked; +// Normal 8-byte stores. +def STD : DSForm_1<62, 0, (outs), (ins G8RC:$rS, memrix:$dst), + "std $rS, $dst", LdStSTD, + [(store G8RC:$rS, ixaddr:$dst)]>, isPPC64; +def STDX : XForm_8<31, 149, (outs), (ins G8RC:$rS, memrr:$dst), + "stdx $rS, $dst", LdStSTD, + [(store G8RC:$rS, xaddr:$dst)]>, isPPC64, + PPC970_DGroup_Cracked; +} + +let PPC970_Unit = 2 in { + +def STBU8 : DForm_1<38, (outs ptr_rc:$ea_res), (ins G8RC:$rS, + symbolLo:$ptroff, ptr_rc:$ptrreg), + "stbu $rS, $ptroff($ptrreg)", LdStGeneral, + [(set ptr_rc:$ea_res, + (pre_truncsti8 G8RC:$rS, ptr_rc:$ptrreg, + iaddroff:$ptroff))]>, + RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; +def STHU8 : DForm_1<45, (outs ptr_rc:$ea_res), (ins G8RC:$rS, + symbolLo:$ptroff, ptr_rc:$ptrreg), + "sthu $rS, $ptroff($ptrreg)", LdStGeneral, + [(set ptr_rc:$ea_res, + (pre_truncsti16 G8RC:$rS, ptr_rc:$ptrreg, + iaddroff:$ptroff))]>, + RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; +def STWU8 : DForm_1<37, (outs ptr_rc:$ea_res), (ins G8RC:$rS, + symbolLo:$ptroff, ptr_rc:$ptrreg), + "stwu $rS, $ptroff($ptrreg)", LdStGeneral, + [(set ptr_rc:$ea_res, (pre_store G8RC:$rS, ptr_rc:$ptrreg, + iaddroff:$ptroff))]>, + RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; + + +def STDU : DSForm_1<62, 1, (outs ptr_rc:$ea_res), (ins G8RC:$rS, + s16immX4:$ptroff, ptr_rc:$ptrreg), + "stdu $rS, $ptroff($ptrreg)", LdStSTD, + [(set ptr_rc:$ea_res, (pre_store G8RC:$rS, ptr_rc:$ptrreg, + iaddroff:$ptroff))]>, + RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">, + isPPC64; + +let mayStore = 1 in +def STDUX : XForm_8<31, 181, (outs), (ins G8RC:$rS, memrr:$dst), + "stdux $rS, $dst", LdStSTD, + []>, isPPC64; + +// STD_32/STDX_32 - Just like STD/STDX, but uses a '32-bit' input register. +def STD_32 : DSForm_1<62, 0, (outs), (ins GPRC:$rT, memrix:$dst), + "std $rT, $dst", LdStSTD, + [(PPCstd_32 GPRC:$rT, ixaddr:$dst)]>, isPPC64; +def STDX_32 : XForm_8<31, 149, (outs), (ins GPRC:$rT, memrr:$dst), + "stdx $rT, $dst", LdStSTD, + [(PPCstd_32 GPRC:$rT, xaddr:$dst)]>, isPPC64, + PPC970_DGroup_Cracked; +} + + + +//===----------------------------------------------------------------------===// +// Floating point instructions. +// + + +let PPC970_Unit = 3, Uses = [RM] in { // FPU Operations. +def FCFID : XForm_26<63, 846, (outs F8RC:$frD), (ins F8RC:$frB), + "fcfid $frD, $frB", FPGeneral, + [(set F8RC:$frD, (PPCfcfid F8RC:$frB))]>, isPPC64; +def FCTIDZ : XForm_26<63, 815, (outs F8RC:$frD), (ins F8RC:$frB), + "fctidz $frD, $frB", FPGeneral, + [(set F8RC:$frD, (PPCfctidz F8RC:$frB))]>, isPPC64; +} + + +//===----------------------------------------------------------------------===// +// Instruction Patterns +// + +// Extensions and truncates to/from 32-bit regs. +def : Pat<(i64 (zext GPRC:$in)), + (RLDICL (OR4To8 GPRC:$in, GPRC:$in), 0, 32)>; +def : Pat<(i64 (anyext GPRC:$in)), + (OR4To8 GPRC:$in, GPRC:$in)>; +def : Pat<(i32 (trunc G8RC:$in)), + (OR8To4 G8RC:$in, G8RC:$in)>; + +// Extending loads with i64 targets. +def : Pat<(zextloadi1 iaddr:$src), + (LBZ8 iaddr:$src)>; +def : Pat<(zextloadi1 xaddr:$src), + (LBZX8 xaddr:$src)>; +def : Pat<(extloadi1 iaddr:$src), + (LBZ8 iaddr:$src)>; +def : Pat<(extloadi1 xaddr:$src), + (LBZX8 xaddr:$src)>; +def : Pat<(extloadi8 iaddr:$src), + (LBZ8 iaddr:$src)>; +def : Pat<(extloadi8 xaddr:$src), + (LBZX8 xaddr:$src)>; +def : Pat<(extloadi16 iaddr:$src), + (LHZ8 iaddr:$src)>; +def : Pat<(extloadi16 xaddr:$src), + (LHZX8 xaddr:$src)>; +def : Pat<(extloadi32 iaddr:$src), + (LWZ8 iaddr:$src)>; +def : Pat<(extloadi32 xaddr:$src), + (LWZX8 xaddr:$src)>; + +// Standard shifts. These are represented separately from the real shifts above +// so that we can distinguish between shifts that allow 6-bit and 7-bit shift +// amounts. +def : Pat<(sra G8RC:$rS, GPRC:$rB), + (SRAD G8RC:$rS, GPRC:$rB)>; +def : Pat<(srl G8RC:$rS, GPRC:$rB), + (SRD G8RC:$rS, GPRC:$rB)>; +def : Pat<(shl G8RC:$rS, GPRC:$rB), + (SLD G8RC:$rS, GPRC:$rB)>; + +// SHL/SRL +def : Pat<(shl G8RC:$in, (i32 imm:$imm)), + (RLDICR G8RC:$in, imm:$imm, (SHL64 imm:$imm))>; +def : Pat<(srl G8RC:$in, (i32 imm:$imm)), + (RLDICL G8RC:$in, (SRL64 imm:$imm), imm:$imm)>; + +// ROTL +def : Pat<(rotl G8RC:$in, GPRC:$sh), + (RLDCL G8RC:$in, GPRC:$sh, 0)>; +def : Pat<(rotl G8RC:$in, (i32 imm:$imm)), + (RLDICL G8RC:$in, imm:$imm, 0)>; + +// Hi and Lo for Darwin Global Addresses. +def : Pat<(PPChi tglobaladdr:$in, 0), (LIS8 tglobaladdr:$in)>; +def : Pat<(PPClo tglobaladdr:$in, 0), (LI8 tglobaladdr:$in)>; +def : Pat<(PPChi tconstpool:$in , 0), (LIS8 tconstpool:$in)>; +def : Pat<(PPClo tconstpool:$in , 0), (LI8 tconstpool:$in)>; +def : Pat<(PPChi tjumptable:$in , 0), (LIS8 tjumptable:$in)>; +def : Pat<(PPClo tjumptable:$in , 0), (LI8 tjumptable:$in)>; +def : Pat<(add G8RC:$in, (PPChi tglobaladdr:$g, 0)), + (ADDIS8 G8RC:$in, tglobaladdr:$g)>; +def : Pat<(add G8RC:$in, (PPChi tconstpool:$g, 0)), + (ADDIS8 G8RC:$in, tconstpool:$g)>; +def : Pat<(add G8RC:$in, (PPChi tjumptable:$g, 0)), + (ADDIS8 G8RC:$in, tjumptable:$g)>; diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td new file mode 100644 index 000000000000..9a5be79e816f --- /dev/null +++ b/lib/Target/PowerPC/PPCInstrAltivec.td @@ -0,0 +1,668 @@ +//===- PPCInstrAltivec.td - The PowerPC Altivec Extension --*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the Altivec extension to the PowerPC instruction set. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Altivec transformation functions and pattern fragments. +// + + +def vpkuhum_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVPKUHUMShuffleMask(cast(N), false); +}]>; +def vpkuwum_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVPKUWUMShuffleMask(cast(N), false); +}]>; +def vpkuhum_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVPKUHUMShuffleMask(cast(N), true); +}]>; +def vpkuwum_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVPKUWUMShuffleMask(cast(N), true); +}]>; + + +def vmrglb_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVMRGLShuffleMask(cast(N), 1, false); +}]>; +def vmrglh_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVMRGLShuffleMask(cast(N), 2, false); +}]>; +def vmrglw_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVMRGLShuffleMask(cast(N), 4, false); +}]>; +def vmrghb_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVMRGHShuffleMask(cast(N), 1, false); +}]>; +def vmrghh_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVMRGHShuffleMask(cast(N), 2, false); +}]>; +def vmrghw_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVMRGHShuffleMask(cast(N), 4, false); +}]>; + + +def vmrglb_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVMRGLShuffleMask(cast(N), 1, true); +}]>; +def vmrglh_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVMRGLShuffleMask(cast(N), 2, true); +}]>; +def vmrglw_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVMRGLShuffleMask(cast(N), 4, true); +}]>; +def vmrghb_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVMRGHShuffleMask(cast(N), 1, true); +}]>; +def vmrghh_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVMRGHShuffleMask(cast(N), 2, true); +}]>; +def vmrghw_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVMRGHShuffleMask(cast(N), 4, true); +}]>; + + +def VSLDOI_get_imm : SDNodeXForm; +def vsldoi_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVSLDOIShuffleMask(N, false) != -1; +}], VSLDOI_get_imm>; + + +/// VSLDOI_unary* - These are used to match vsldoi(X,X), which is turned into +/// vector_shuffle(X,undef,mask) by the dag combiner. +def VSLDOI_unary_get_imm : SDNodeXForm; +def vsldoi_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVSLDOIShuffleMask(N, true) != -1; +}], VSLDOI_unary_get_imm>; + + +// VSPLT*_get_imm xform function: convert vector_shuffle mask to VSPLT* imm. +def VSPLTB_get_imm : SDNodeXForm; +def vspltb_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isSplatShuffleMask(cast(N), 1); +}], VSPLTB_get_imm>; +def VSPLTH_get_imm : SDNodeXForm; +def vsplth_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isSplatShuffleMask(cast(N), 2); +}], VSPLTH_get_imm>; +def VSPLTW_get_imm : SDNodeXForm; +def vspltw_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isSplatShuffleMask(cast(N), 4); +}], VSPLTW_get_imm>; + + +// VSPLTISB_get_imm xform function: convert build_vector to VSPLTISB imm. +def VSPLTISB_get_imm : SDNodeXForm; +def vecspltisb : PatLeaf<(build_vector), [{ + return PPC::get_VSPLTI_elt(N, 1, *CurDAG).getNode() != 0; +}], VSPLTISB_get_imm>; + +// VSPLTISH_get_imm xform function: convert build_vector to VSPLTISH imm. +def VSPLTISH_get_imm : SDNodeXForm; +def vecspltish : PatLeaf<(build_vector), [{ + return PPC::get_VSPLTI_elt(N, 2, *CurDAG).getNode() != 0; +}], VSPLTISH_get_imm>; + +// VSPLTISW_get_imm xform function: convert build_vector to VSPLTISW imm. +def VSPLTISW_get_imm : SDNodeXForm; +def vecspltisw : PatLeaf<(build_vector), [{ + return PPC::get_VSPLTI_elt(N, 4, *CurDAG).getNode() != 0; +}], VSPLTISW_get_imm>; + +def V_immneg0 : PatLeaf<(build_vector), [{ + return PPC::isAllNegativeZeroVector(N); +}]>; + +//===----------------------------------------------------------------------===// +// Helpers for defining instructions that directly correspond to intrinsics. + +// VA1a_Int - A VAForm_1a intrinsic definition. +class VA1a_Int xo, string opc, Intrinsic IntID> + : VAForm_1a; + +// VX1_Int - A VXForm_1 intrinsic definition. +class VX1_Int xo, string opc, Intrinsic IntID> + : VXForm_1; + +// VX2_Int - A VXForm_2 intrinsic definition. +class VX2_Int xo, string opc, Intrinsic IntID> + : VXForm_2; + +//===----------------------------------------------------------------------===// +// Instruction Definitions. + +def DSS : DSS_Form<822, (outs), + (ins u5imm:$ZERO0, u5imm:$STRM,u5imm:$ZERO1,u5imm:$ZERO2), + "dss $STRM", LdStGeneral /*FIXME*/, []>; +def DSSALL : DSS_Form<822, (outs), + (ins u5imm:$ONE, u5imm:$ZERO0,u5imm:$ZERO1,u5imm:$ZERO2), + "dssall", LdStGeneral /*FIXME*/, []>; +def DST : DSS_Form<342, (outs), + (ins u5imm:$ZERO, u5imm:$STRM, GPRC:$rA, GPRC:$rB), + "dst $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>; +def DSTT : DSS_Form<342, (outs), + (ins u5imm:$ONE, u5imm:$STRM, GPRC:$rA, GPRC:$rB), + "dstt $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>; +def DSTST : DSS_Form<374, (outs), + (ins u5imm:$ZERO, u5imm:$STRM, GPRC:$rA, GPRC:$rB), + "dstst $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>; +def DSTSTT : DSS_Form<374, (outs), + (ins u5imm:$ONE, u5imm:$STRM, GPRC:$rA, GPRC:$rB), + "dststt $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>; + +def DST64 : DSS_Form<342, (outs), + (ins u5imm:$ZERO, u5imm:$STRM, G8RC:$rA, GPRC:$rB), + "dst $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>; +def DSTT64 : DSS_Form<342, (outs), + (ins u5imm:$ONE, u5imm:$STRM, G8RC:$rA, GPRC:$rB), + "dstt $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>; +def DSTST64 : DSS_Form<374, (outs), + (ins u5imm:$ZERO, u5imm:$STRM, G8RC:$rA, GPRC:$rB), + "dstst $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>; +def DSTSTT64 : DSS_Form<374, (outs), + (ins u5imm:$ONE, u5imm:$STRM, G8RC:$rA, GPRC:$rB), + "dststt $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>; + +def MFVSCR : VXForm_4<1540, (outs VRRC:$vD), (ins), + "mfvscr $vD", LdStGeneral, + [(set VRRC:$vD, (int_ppc_altivec_mfvscr))]>; +def MTVSCR : VXForm_5<1604, (outs), (ins VRRC:$vB), + "mtvscr $vB", LdStGeneral, + [(int_ppc_altivec_mtvscr VRRC:$vB)]>; + +let canFoldAsLoad = 1, PPC970_Unit = 2 in { // Loads. +def LVEBX: XForm_1<31, 7, (outs VRRC:$vD), (ins memrr:$src), + "lvebx $vD, $src", LdStGeneral, + [(set VRRC:$vD, (int_ppc_altivec_lvebx xoaddr:$src))]>; +def LVEHX: XForm_1<31, 39, (outs VRRC:$vD), (ins memrr:$src), + "lvehx $vD, $src", LdStGeneral, + [(set VRRC:$vD, (int_ppc_altivec_lvehx xoaddr:$src))]>; +def LVEWX: XForm_1<31, 71, (outs VRRC:$vD), (ins memrr:$src), + "lvewx $vD, $src", LdStGeneral, + [(set VRRC:$vD, (int_ppc_altivec_lvewx xoaddr:$src))]>; +def LVX : XForm_1<31, 103, (outs VRRC:$vD), (ins memrr:$src), + "lvx $vD, $src", LdStGeneral, + [(set VRRC:$vD, (int_ppc_altivec_lvx xoaddr:$src))]>; +def LVXL : XForm_1<31, 359, (outs VRRC:$vD), (ins memrr:$src), + "lvxl $vD, $src", LdStGeneral, + [(set VRRC:$vD, (int_ppc_altivec_lvxl xoaddr:$src))]>; +} + +def LVSL : XForm_1<31, 6, (outs VRRC:$vD), (ins memrr:$src), + "lvsl $vD, $src", LdStGeneral, + [(set VRRC:$vD, (int_ppc_altivec_lvsl xoaddr:$src))]>, + PPC970_Unit_LSU; +def LVSR : XForm_1<31, 38, (outs VRRC:$vD), (ins memrr:$src), + "lvsr $vD, $src", LdStGeneral, + [(set VRRC:$vD, (int_ppc_altivec_lvsr xoaddr:$src))]>, + PPC970_Unit_LSU; + +let PPC970_Unit = 2 in { // Stores. +def STVEBX: XForm_8<31, 135, (outs), (ins VRRC:$rS, memrr:$dst), + "stvebx $rS, $dst", LdStGeneral, + [(int_ppc_altivec_stvebx VRRC:$rS, xoaddr:$dst)]>; +def STVEHX: XForm_8<31, 167, (outs), (ins VRRC:$rS, memrr:$dst), + "stvehx $rS, $dst", LdStGeneral, + [(int_ppc_altivec_stvehx VRRC:$rS, xoaddr:$dst)]>; +def STVEWX: XForm_8<31, 199, (outs), (ins VRRC:$rS, memrr:$dst), + "stvewx $rS, $dst", LdStGeneral, + [(int_ppc_altivec_stvewx VRRC:$rS, xoaddr:$dst)]>; +def STVX : XForm_8<31, 231, (outs), (ins VRRC:$rS, memrr:$dst), + "stvx $rS, $dst", LdStGeneral, + [(int_ppc_altivec_stvx VRRC:$rS, xoaddr:$dst)]>; +def STVXL : XForm_8<31, 487, (outs), (ins VRRC:$rS, memrr:$dst), + "stvxl $rS, $dst", LdStGeneral, + [(int_ppc_altivec_stvxl VRRC:$rS, xoaddr:$dst)]>; +} + +let PPC970_Unit = 5 in { // VALU Operations. +// VA-Form instructions. 3-input AltiVec ops. +def VMADDFP : VAForm_1<46, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vC, VRRC:$vB), + "vmaddfp $vD, $vA, $vC, $vB", VecFP, + [(set VRRC:$vD, (fadd (fmul VRRC:$vA, VRRC:$vC), + VRRC:$vB))]>, + Requires<[FPContractions]>; +def VNMSUBFP: VAForm_1<47, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vC, VRRC:$vB), + "vnmsubfp $vD, $vA, $vC, $vB", VecFP, + [(set VRRC:$vD, (fsub V_immneg0, + (fsub (fmul VRRC:$vA, VRRC:$vC), + VRRC:$vB)))]>, + Requires<[FPContractions]>; + +def VMHADDSHS : VA1a_Int<32, "vmhaddshs", int_ppc_altivec_vmhaddshs>; +def VMHRADDSHS : VA1a_Int<33, "vmhraddshs", int_ppc_altivec_vmhraddshs>; +def VMLADDUHM : VA1a_Int<34, "vmladduhm", int_ppc_altivec_vmladduhm>; +def VPERM : VA1a_Int<43, "vperm", int_ppc_altivec_vperm>; +def VSEL : VA1a_Int<42, "vsel", int_ppc_altivec_vsel>; + +// Shuffles. +def VSLDOI : VAForm_2<44, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB, u5imm:$SH), + "vsldoi $vD, $vA, $vB, $SH", VecFP, + [(set VRRC:$vD, + (vsldoi_shuffle:$SH (v16i8 VRRC:$vA), VRRC:$vB))]>; + +// VX-Form instructions. AltiVec arithmetic ops. +def VADDFP : VXForm_1<10, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), + "vaddfp $vD, $vA, $vB", VecFP, + [(set VRRC:$vD, (fadd VRRC:$vA, VRRC:$vB))]>; + +def VADDUBM : VXForm_1<0, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), + "vaddubm $vD, $vA, $vB", VecGeneral, + [(set VRRC:$vD, (add (v16i8 VRRC:$vA), VRRC:$vB))]>; +def VADDUHM : VXForm_1<64, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), + "vadduhm $vD, $vA, $vB", VecGeneral, + [(set VRRC:$vD, (add (v8i16 VRRC:$vA), VRRC:$vB))]>; +def VADDUWM : VXForm_1<128, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), + "vadduwm $vD, $vA, $vB", VecGeneral, + [(set VRRC:$vD, (add (v4i32 VRRC:$vA), VRRC:$vB))]>; + +def VADDCUW : VX1_Int<384, "vaddcuw", int_ppc_altivec_vaddcuw>; +def VADDSBS : VX1_Int<768, "vaddsbs", int_ppc_altivec_vaddsbs>; +def VADDSHS : VX1_Int<832, "vaddshs", int_ppc_altivec_vaddshs>; +def VADDSWS : VX1_Int<896, "vaddsws", int_ppc_altivec_vaddsws>; +def VADDUBS : VX1_Int<512, "vaddubs", int_ppc_altivec_vaddubs>; +def VADDUHS : VX1_Int<576, "vadduhs", int_ppc_altivec_vadduhs>; +def VADDUWS : VX1_Int<640, "vadduws", int_ppc_altivec_vadduws>; + + +def VAND : VXForm_1<1028, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), + "vand $vD, $vA, $vB", VecFP, + [(set VRRC:$vD, (and (v4i32 VRRC:$vA), VRRC:$vB))]>; +def VANDC : VXForm_1<1092, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), + "vandc $vD, $vA, $vB", VecFP, + [(set VRRC:$vD, (and (v4i32 VRRC:$vA), (vnot VRRC:$vB)))]>; + +def VCFSX : VXForm_1<842, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB), + "vcfsx $vD, $vB, $UIMM", VecFP, + [(set VRRC:$vD, + (int_ppc_altivec_vcfsx VRRC:$vB, imm:$UIMM))]>; +def VCFUX : VXForm_1<778, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB), + "vcfux $vD, $vB, $UIMM", VecFP, + [(set VRRC:$vD, + (int_ppc_altivec_vcfux VRRC:$vB, imm:$UIMM))]>; +def VCTSXS : VXForm_1<970, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB), + "vctsxs $vD, $vB, $UIMM", VecFP, + [(set VRRC:$vD, + (int_ppc_altivec_vctsxs VRRC:$vB, imm:$UIMM))]>; +def VCTUXS : VXForm_1<906, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB), + "vctuxs $vD, $vB, $UIMM", VecFP, + [(set VRRC:$vD, + (int_ppc_altivec_vctuxs VRRC:$vB, imm:$UIMM))]>; +def VEXPTEFP : VX2_Int<394, "vexptefp", int_ppc_altivec_vexptefp>; +def VLOGEFP : VX2_Int<458, "vlogefp", int_ppc_altivec_vlogefp>; + +def VAVGSB : VX1_Int<1282, "vavgsb", int_ppc_altivec_vavgsb>; +def VAVGSH : VX1_Int<1346, "vavgsh", int_ppc_altivec_vavgsh>; +def VAVGSW : VX1_Int<1410, "vavgsw", int_ppc_altivec_vavgsw>; +def VAVGUB : VX1_Int<1026, "vavgub", int_ppc_altivec_vavgub>; +def VAVGUH : VX1_Int<1090, "vavguh", int_ppc_altivec_vavguh>; +def VAVGUW : VX1_Int<1154, "vavguw", int_ppc_altivec_vavguw>; + +def VMAXFP : VX1_Int<1034, "vmaxfp", int_ppc_altivec_vmaxfp>; +def VMAXSB : VX1_Int< 258, "vmaxsb", int_ppc_altivec_vmaxsb>; +def VMAXSH : VX1_Int< 322, "vmaxsh", int_ppc_altivec_vmaxsh>; +def VMAXSW : VX1_Int< 386, "vmaxsw", int_ppc_altivec_vmaxsw>; +def VMAXUB : VX1_Int< 2, "vmaxub", int_ppc_altivec_vmaxub>; +def VMAXUH : VX1_Int< 66, "vmaxuh", int_ppc_altivec_vmaxuh>; +def VMAXUW : VX1_Int< 130, "vmaxuw", int_ppc_altivec_vmaxuw>; +def VMINFP : VX1_Int<1098, "vminfp", int_ppc_altivec_vminfp>; +def VMINSB : VX1_Int< 770, "vminsb", int_ppc_altivec_vminsb>; +def VMINSH : VX1_Int< 834, "vminsh", int_ppc_altivec_vminsh>; +def VMINSW : VX1_Int< 898, "vminsw", int_ppc_altivec_vminsw>; +def VMINUB : VX1_Int< 514, "vminub", int_ppc_altivec_vminub>; +def VMINUH : VX1_Int< 578, "vminuh", int_ppc_altivec_vminuh>; +def VMINUW : VX1_Int< 642, "vminuw", int_ppc_altivec_vminuw>; + +def VMRGHB : VXForm_1< 12, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), + "vmrghb $vD, $vA, $vB", VecFP, + [(set VRRC:$vD, (vmrghb_shuffle VRRC:$vA, VRRC:$vB))]>; +def VMRGHH : VXForm_1< 76, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), + "vmrghh $vD, $vA, $vB", VecFP, + [(set VRRC:$vD, (vmrghh_shuffle VRRC:$vA, VRRC:$vB))]>; +def VMRGHW : VXForm_1<140, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), + "vmrghw $vD, $vA, $vB", VecFP, + [(set VRRC:$vD, (vmrghw_shuffle VRRC:$vA, VRRC:$vB))]>; +def VMRGLB : VXForm_1<268, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), + "vmrglb $vD, $vA, $vB", VecFP, + [(set VRRC:$vD, (vmrglb_shuffle VRRC:$vA, VRRC:$vB))]>; +def VMRGLH : VXForm_1<332, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), + "vmrglh $vD, $vA, $vB", VecFP, + [(set VRRC:$vD, (vmrglh_shuffle VRRC:$vA, VRRC:$vB))]>; +def VMRGLW : VXForm_1<396, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), + "vmrglw $vD, $vA, $vB", VecFP, + [(set VRRC:$vD, (vmrglw_shuffle VRRC:$vA, VRRC:$vB))]>; + +def VMSUMMBM : VA1a_Int<37, "vmsummbm", int_ppc_altivec_vmsummbm>; +def VMSUMSHM : VA1a_Int<40, "vmsumshm", int_ppc_altivec_vmsumshm>; +def VMSUMSHS : VA1a_Int<41, "vmsumshs", int_ppc_altivec_vmsumshs>; +def VMSUMUBM : VA1a_Int<36, "vmsumubm", int_ppc_altivec_vmsumubm>; +def VMSUMUHM : VA1a_Int<38, "vmsumuhm", int_ppc_altivec_vmsumuhm>; +def VMSUMUHS : VA1a_Int<39, "vmsumuhs", int_ppc_altivec_vmsumuhs>; + +def VMULESB : VX1_Int<776, "vmulesb", int_ppc_altivec_vmulesb>; +def VMULESH : VX1_Int<840, "vmulesh", int_ppc_altivec_vmulesh>; +def VMULEUB : VX1_Int<520, "vmuleub", int_ppc_altivec_vmuleub>; +def VMULEUH : VX1_Int<584, "vmuleuh", int_ppc_altivec_vmuleuh>; +def VMULOSB : VX1_Int<264, "vmulosb", int_ppc_altivec_vmulosb>; +def VMULOSH : VX1_Int<328, "vmulosh", int_ppc_altivec_vmulosh>; +def VMULOUB : VX1_Int< 8, "vmuloub", int_ppc_altivec_vmuloub>; +def VMULOUH : VX1_Int< 72, "vmulouh", int_ppc_altivec_vmulouh>; + +def VREFP : VX2_Int<266, "vrefp", int_ppc_altivec_vrefp>; +def VRFIM : VX2_Int<714, "vrfim", int_ppc_altivec_vrfim>; +def VRFIN : VX2_Int<522, "vrfin", int_ppc_altivec_vrfin>; +def VRFIP : VX2_Int<650, "vrfip", int_ppc_altivec_vrfip>; +def VRFIZ : VX2_Int<586, "vrfiz", int_ppc_altivec_vrfiz>; +def VRSQRTEFP : VX2_Int<330, "vrsqrtefp", int_ppc_altivec_vrsqrtefp>; + +def VSUBCUW : VX1_Int<74, "vsubcuw", int_ppc_altivec_vsubcuw>; + +def VSUBFP : VXForm_1<74, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), + "vsubfp $vD, $vA, $vB", VecGeneral, + [(set VRRC:$vD, (fsub VRRC:$vA, VRRC:$vB))]>; +def VSUBUBM : VXForm_1<1024, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), + "vsububm $vD, $vA, $vB", VecGeneral, + [(set VRRC:$vD, (sub (v16i8 VRRC:$vA), VRRC:$vB))]>; +def VSUBUHM : VXForm_1<1088, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), + "vsubuhm $vD, $vA, $vB", VecGeneral, + [(set VRRC:$vD, (sub (v8i16 VRRC:$vA), VRRC:$vB))]>; +def VSUBUWM : VXForm_1<1152, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), + "vsubuwm $vD, $vA, $vB", VecGeneral, + [(set VRRC:$vD, (sub (v4i32 VRRC:$vA), VRRC:$vB))]>; + +def VSUBSBS : VX1_Int<1792, "vsubsbs" , int_ppc_altivec_vsubsbs>; +def VSUBSHS : VX1_Int<1856, "vsubshs" , int_ppc_altivec_vsubshs>; +def VSUBSWS : VX1_Int<1920, "vsubsws" , int_ppc_altivec_vsubsws>; +def VSUBUBS : VX1_Int<1536, "vsububs" , int_ppc_altivec_vsububs>; +def VSUBUHS : VX1_Int<1600, "vsubuhs" , int_ppc_altivec_vsubuhs>; +def VSUBUWS : VX1_Int<1664, "vsubuws" , int_ppc_altivec_vsubuws>; +def VSUMSWS : VX1_Int<1928, "vsumsws" , int_ppc_altivec_vsumsws>; +def VSUM2SWS: VX1_Int<1672, "vsum2sws", int_ppc_altivec_vsum2sws>; +def VSUM4SBS: VX1_Int<1672, "vsum4sbs", int_ppc_altivec_vsum4sbs>; +def VSUM4SHS: VX1_Int<1608, "vsum4shs", int_ppc_altivec_vsum4shs>; +def VSUM4UBS: VX1_Int<1544, "vsum4ubs", int_ppc_altivec_vsum4ubs>; + +def VNOR : VXForm_1<1284, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), + "vnor $vD, $vA, $vB", VecFP, + [(set VRRC:$vD, (vnot (or (v4i32 VRRC:$vA), VRRC:$vB)))]>; +def VOR : VXForm_1<1156, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), + "vor $vD, $vA, $vB", VecFP, + [(set VRRC:$vD, (or (v4i32 VRRC:$vA), VRRC:$vB))]>; +def VXOR : VXForm_1<1220, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), + "vxor $vD, $vA, $vB", VecFP, + [(set VRRC:$vD, (xor (v4i32 VRRC:$vA), VRRC:$vB))]>; + +def VRLB : VX1_Int< 4, "vrlb", int_ppc_altivec_vrlb>; +def VRLH : VX1_Int< 68, "vrlh", int_ppc_altivec_vrlh>; +def VRLW : VX1_Int< 132, "vrlw", int_ppc_altivec_vrlw>; + +def VSL : VX1_Int< 452, "vsl" , int_ppc_altivec_vsl >; +def VSLO : VX1_Int<1036, "vslo", int_ppc_altivec_vslo>; +def VSLB : VX1_Int< 260, "vslb", int_ppc_altivec_vslb>; +def VSLH : VX1_Int< 324, "vslh", int_ppc_altivec_vslh>; +def VSLW : VX1_Int< 388, "vslw", int_ppc_altivec_vslw>; + +def VSPLTB : VXForm_1<524, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB), + "vspltb $vD, $vB, $UIMM", VecPerm, + [(set VRRC:$vD, + (vspltb_shuffle:$UIMM (v16i8 VRRC:$vB), (undef)))]>; +def VSPLTH : VXForm_1<588, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB), + "vsplth $vD, $vB, $UIMM", VecPerm, + [(set VRRC:$vD, + (vsplth_shuffle:$UIMM (v16i8 VRRC:$vB), (undef)))]>; +def VSPLTW : VXForm_1<652, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB), + "vspltw $vD, $vB, $UIMM", VecPerm, + [(set VRRC:$vD, + (vspltw_shuffle:$UIMM (v16i8 VRRC:$vB), (undef)))]>; + +def VSR : VX1_Int< 708, "vsr" , int_ppc_altivec_vsr>; +def VSRO : VX1_Int<1100, "vsro" , int_ppc_altivec_vsro>; +def VSRAB : VX1_Int< 772, "vsrab", int_ppc_altivec_vsrab>; +def VSRAH : VX1_Int< 836, "vsrah", int_ppc_altivec_vsrah>; +def VSRAW : VX1_Int< 900, "vsraw", int_ppc_altivec_vsraw>; +def VSRB : VX1_Int< 516, "vsrb" , int_ppc_altivec_vsrb>; +def VSRH : VX1_Int< 580, "vsrh" , int_ppc_altivec_vsrh>; +def VSRW : VX1_Int< 644, "vsrw" , int_ppc_altivec_vsrw>; + + +def VSPLTISB : VXForm_3<780, (outs VRRC:$vD), (ins s5imm:$SIMM), + "vspltisb $vD, $SIMM", VecPerm, + [(set VRRC:$vD, (v16i8 vecspltisb:$SIMM))]>; +def VSPLTISH : VXForm_3<844, (outs VRRC:$vD), (ins s5imm:$SIMM), + "vspltish $vD, $SIMM", VecPerm, + [(set VRRC:$vD, (v8i16 vecspltish:$SIMM))]>; +def VSPLTISW : VXForm_3<908, (outs VRRC:$vD), (ins s5imm:$SIMM), + "vspltisw $vD, $SIMM", VecPerm, + [(set VRRC:$vD, (v4i32 vecspltisw:$SIMM))]>; + +// Vector Pack. +def VPKPX : VX1_Int<782, "vpkpx", int_ppc_altivec_vpkpx>; +def VPKSHSS : VX1_Int<398, "vpkshss", int_ppc_altivec_vpkshss>; +def VPKSHUS : VX1_Int<270, "vpkshus", int_ppc_altivec_vpkshus>; +def VPKSWSS : VX1_Int<462, "vpkswss", int_ppc_altivec_vpkswss>; +def VPKSWUS : VX1_Int<334, "vpkswus", int_ppc_altivec_vpkswus>; +def VPKUHUM : VXForm_1<14, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), + "vpkuhum $vD, $vA, $vB", VecFP, + [(set VRRC:$vD, + (vpkuhum_shuffle (v16i8 VRRC:$vA), VRRC:$vB))]>; +def VPKUHUS : VX1_Int<142, "vpkuhus", int_ppc_altivec_vpkuhus>; +def VPKUWUM : VXForm_1<78, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), + "vpkuwum $vD, $vA, $vB", VecFP, + [(set VRRC:$vD, + (vpkuwum_shuffle (v16i8 VRRC:$vA), VRRC:$vB))]>; +def VPKUWUS : VX1_Int<206, "vpkuwus", int_ppc_altivec_vpkuwus>; + +// Vector Unpack. +def VUPKHPX : VX2_Int<846, "vupkhpx", int_ppc_altivec_vupkhpx>; +def VUPKHSB : VX2_Int<526, "vupkhsb", int_ppc_altivec_vupkhsb>; +def VUPKHSH : VX2_Int<590, "vupkhsh", int_ppc_altivec_vupkhsh>; +def VUPKLPX : VX2_Int<974, "vupklpx", int_ppc_altivec_vupklpx>; +def VUPKLSB : VX2_Int<654, "vupklsb", int_ppc_altivec_vupklsb>; +def VUPKLSH : VX2_Int<718, "vupklsh", int_ppc_altivec_vupklsh>; + + +// Altivec Comparisons. + +class VCMP xo, string asmstr, ValueType Ty> + : VXRForm_1; +class VCMPo xo, string asmstr, ValueType Ty> + : VXRForm_1 { + let Defs = [CR6]; + let RC = 1; +} + +// f32 element comparisons.0 +def VCMPBFP : VCMP <966, "vcmpbfp $vD, $vA, $vB" , v4f32>; +def VCMPBFPo : VCMPo<966, "vcmpbfp. $vD, $vA, $vB" , v4f32>; +def VCMPEQFP : VCMP <198, "vcmpeqfp $vD, $vA, $vB" , v4f32>; +def VCMPEQFPo : VCMPo<198, "vcmpeqfp. $vD, $vA, $vB", v4f32>; +def VCMPGEFP : VCMP <454, "vcmpgefp $vD, $vA, $vB" , v4f32>; +def VCMPGEFPo : VCMPo<454, "vcmpgefp. $vD, $vA, $vB", v4f32>; +def VCMPGTFP : VCMP <710, "vcmpgtfp $vD, $vA, $vB" , v4f32>; +def VCMPGTFPo : VCMPo<710, "vcmpgtfp. $vD, $vA, $vB", v4f32>; + +// i8 element comparisons. +def VCMPEQUB : VCMP < 6, "vcmpequb $vD, $vA, $vB" , v16i8>; +def VCMPEQUBo : VCMPo< 6, "vcmpequb. $vD, $vA, $vB", v16i8>; +def VCMPGTSB : VCMP <774, "vcmpgtsb $vD, $vA, $vB" , v16i8>; +def VCMPGTSBo : VCMPo<774, "vcmpgtsb. $vD, $vA, $vB", v16i8>; +def VCMPGTUB : VCMP <518, "vcmpgtub $vD, $vA, $vB" , v16i8>; +def VCMPGTUBo : VCMPo<518, "vcmpgtub. $vD, $vA, $vB", v16i8>; + +// i16 element comparisons. +def VCMPEQUH : VCMP < 70, "vcmpequh $vD, $vA, $vB" , v8i16>; +def VCMPEQUHo : VCMPo< 70, "vcmpequh. $vD, $vA, $vB", v8i16>; +def VCMPGTSH : VCMP <838, "vcmpgtsh $vD, $vA, $vB" , v8i16>; +def VCMPGTSHo : VCMPo<838, "vcmpgtsh. $vD, $vA, $vB", v8i16>; +def VCMPGTUH : VCMP <582, "vcmpgtuh $vD, $vA, $vB" , v8i16>; +def VCMPGTUHo : VCMPo<582, "vcmpgtuh. $vD, $vA, $vB", v8i16>; + +// i32 element comparisons. +def VCMPEQUW : VCMP <134, "vcmpequw $vD, $vA, $vB" , v4i32>; +def VCMPEQUWo : VCMPo<134, "vcmpequw. $vD, $vA, $vB", v4i32>; +def VCMPGTSW : VCMP <902, "vcmpgtsw $vD, $vA, $vB" , v4i32>; +def VCMPGTSWo : VCMPo<902, "vcmpgtsw. $vD, $vA, $vB", v4i32>; +def VCMPGTUW : VCMP <646, "vcmpgtuw $vD, $vA, $vB" , v4i32>; +def VCMPGTUWo : VCMPo<646, "vcmpgtuw. $vD, $vA, $vB", v4i32>; + +def V_SET0 : VXForm_setzero<1220, (outs VRRC:$vD), (ins), + "vxor $vD, $vD, $vD", VecFP, + [(set VRRC:$vD, (v4i32 immAllZerosV))]>; +} + +//===----------------------------------------------------------------------===// +// Additional Altivec Patterns +// + +// DS* intrinsics +def : Pat<(int_ppc_altivec_dssall), (DSSALL 1, 0, 0, 0)>; +def : Pat<(int_ppc_altivec_dss imm:$STRM), (DSS 0, imm:$STRM, 0, 0)>; + +// * 32-bit +def : Pat<(int_ppc_altivec_dst GPRC:$rA, GPRC:$rB, imm:$STRM), + (DST 0, imm:$STRM, GPRC:$rA, GPRC:$rB)>; +def : Pat<(int_ppc_altivec_dstt GPRC:$rA, GPRC:$rB, imm:$STRM), + (DSTT 1, imm:$STRM, GPRC:$rA, GPRC:$rB)>; +def : Pat<(int_ppc_altivec_dstst GPRC:$rA, GPRC:$rB, imm:$STRM), + (DSTST 0, imm:$STRM, GPRC:$rA, GPRC:$rB)>; +def : Pat<(int_ppc_altivec_dststt GPRC:$rA, GPRC:$rB, imm:$STRM), + (DSTSTT 1, imm:$STRM, GPRC:$rA, GPRC:$rB)>; + +// * 64-bit +def : Pat<(int_ppc_altivec_dst G8RC:$rA, GPRC:$rB, imm:$STRM), + (DST64 0, imm:$STRM, (i64 G8RC:$rA), GPRC:$rB)>; +def : Pat<(int_ppc_altivec_dstt G8RC:$rA, GPRC:$rB, imm:$STRM), + (DSTT64 1, imm:$STRM, (i64 G8RC:$rA), GPRC:$rB)>; +def : Pat<(int_ppc_altivec_dstst G8RC:$rA, GPRC:$rB, imm:$STRM), + (DSTST64 0, imm:$STRM, (i64 G8RC:$rA), GPRC:$rB)>; +def : Pat<(int_ppc_altivec_dststt G8RC:$rA, GPRC:$rB, imm:$STRM), + (DSTSTT64 1, imm:$STRM, (i64 G8RC:$rA), GPRC:$rB)>; + +// Loads. +def : Pat<(v4i32 (load xoaddr:$src)), (LVX xoaddr:$src)>; + +// Stores. +def : Pat<(store (v4i32 VRRC:$rS), xoaddr:$dst), + (STVX (v4i32 VRRC:$rS), xoaddr:$dst)>; + +// Bit conversions. +def : Pat<(v16i8 (bitconvert (v8i16 VRRC:$src))), (v16i8 VRRC:$src)>; +def : Pat<(v16i8 (bitconvert (v4i32 VRRC:$src))), (v16i8 VRRC:$src)>; +def : Pat<(v16i8 (bitconvert (v4f32 VRRC:$src))), (v16i8 VRRC:$src)>; + +def : Pat<(v8i16 (bitconvert (v16i8 VRRC:$src))), (v8i16 VRRC:$src)>; +def : Pat<(v8i16 (bitconvert (v4i32 VRRC:$src))), (v8i16 VRRC:$src)>; +def : Pat<(v8i16 (bitconvert (v4f32 VRRC:$src))), (v8i16 VRRC:$src)>; + +def : Pat<(v4i32 (bitconvert (v16i8 VRRC:$src))), (v4i32 VRRC:$src)>; +def : Pat<(v4i32 (bitconvert (v8i16 VRRC:$src))), (v4i32 VRRC:$src)>; +def : Pat<(v4i32 (bitconvert (v4f32 VRRC:$src))), (v4i32 VRRC:$src)>; + +def : Pat<(v4f32 (bitconvert (v16i8 VRRC:$src))), (v4f32 VRRC:$src)>; +def : Pat<(v4f32 (bitconvert (v8i16 VRRC:$src))), (v4f32 VRRC:$src)>; +def : Pat<(v4f32 (bitconvert (v4i32 VRRC:$src))), (v4f32 VRRC:$src)>; + +// Shuffles. + +// Match vsldoi(x,x), vpkuwum(x,x), vpkuhum(x,x) +def:Pat<(vsldoi_unary_shuffle:$in (v16i8 VRRC:$vA), undef), + (VSLDOI VRRC:$vA, VRRC:$vA, (VSLDOI_unary_get_imm VRRC:$in))>; +def:Pat<(vpkuwum_unary_shuffle (v16i8 VRRC:$vA), undef), + (VPKUWUM VRRC:$vA, VRRC:$vA)>; +def:Pat<(vpkuhum_unary_shuffle (v16i8 VRRC:$vA), undef), + (VPKUHUM VRRC:$vA, VRRC:$vA)>; + +// Match vmrg*(x,x) +def:Pat<(vmrglb_unary_shuffle (v16i8 VRRC:$vA), undef), + (VMRGLB VRRC:$vA, VRRC:$vA)>; +def:Pat<(vmrglh_unary_shuffle (v16i8 VRRC:$vA), undef), + (VMRGLH VRRC:$vA, VRRC:$vA)>; +def:Pat<(vmrglw_unary_shuffle (v16i8 VRRC:$vA), undef), + (VMRGLW VRRC:$vA, VRRC:$vA)>; +def:Pat<(vmrghb_unary_shuffle (v16i8 VRRC:$vA), undef), + (VMRGHB VRRC:$vA, VRRC:$vA)>; +def:Pat<(vmrghh_unary_shuffle (v16i8 VRRC:$vA), undef), + (VMRGHH VRRC:$vA, VRRC:$vA)>; +def:Pat<(vmrghw_unary_shuffle (v16i8 VRRC:$vA), undef), + (VMRGHW VRRC:$vA, VRRC:$vA)>; + +// Logical Operations +def : Pat<(v4i32 (vnot VRRC:$vA)), (VNOR VRRC:$vA, VRRC:$vA)>; +def : Pat<(v4i32 (vnot_conv VRRC:$vA)), (VNOR VRRC:$vA, VRRC:$vA)>; + +def : Pat<(v4i32 (vnot_conv (or VRRC:$A, VRRC:$B))), + (VNOR VRRC:$A, VRRC:$B)>; +def : Pat<(v4i32 (and VRRC:$A, (vnot_conv VRRC:$B))), + (VANDC VRRC:$A, VRRC:$B)>; + +def : Pat<(fmul VRRC:$vA, VRRC:$vB), + (VMADDFP VRRC:$vA, VRRC:$vB, (v4i32 (V_SET0)))>; + +// Fused multiply add and multiply sub for packed float. These are represented +// separately from the real instructions above, for operations that must have +// the additional precision, such as Newton-Rhapson (used by divide, sqrt) +def : Pat<(PPCvmaddfp VRRC:$A, VRRC:$B, VRRC:$C), + (VMADDFP VRRC:$A, VRRC:$B, VRRC:$C)>; +def : Pat<(PPCvnmsubfp VRRC:$A, VRRC:$B, VRRC:$C), + (VNMSUBFP VRRC:$A, VRRC:$B, VRRC:$C)>; + +def : Pat<(int_ppc_altivec_vmaddfp VRRC:$A, VRRC:$B, VRRC:$C), + (VMADDFP VRRC:$A, VRRC:$B, VRRC:$C)>; +def : Pat<(int_ppc_altivec_vnmsubfp VRRC:$A, VRRC:$B, VRRC:$C), + (VNMSUBFP VRRC:$A, VRRC:$B, VRRC:$C)>; + +def : Pat<(PPCvperm (v16i8 VRRC:$vA), VRRC:$vB, VRRC:$vC), + (VPERM VRRC:$vA, VRRC:$vB, VRRC:$vC)>; diff --git a/lib/Target/PowerPC/PPCInstrBuilder.h b/lib/Target/PowerPC/PPCInstrBuilder.h new file mode 100644 index 000000000000..1de69116cd58 --- /dev/null +++ b/lib/Target/PowerPC/PPCInstrBuilder.h @@ -0,0 +1,43 @@ +//===-- PPCInstrBuilder.h - Aides for building PPC insts --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file exposes functions that may be used with BuildMI from the +// MachineInstrBuilder.h file to simplify generating frame and constant pool +// references. +// +// For reference, the order of operands for memory references is: +// (Operand), Dest Reg, Base Reg, and either Reg Index or Immediate +// Displacement. +// +//===----------------------------------------------------------------------===// + +#ifndef POWERPC_INSTRBUILDER_H +#define POWERPC_INSTRBUILDER_H + +#include "llvm/CodeGen/MachineInstrBuilder.h" + +namespace llvm { + +/// addFrameReference - This function is used to add a reference to the base of +/// an abstract object on the stack frame of the current function. This +/// reference has base register as the FrameIndex offset until it is resolved. +/// This allows a constant offset to be specified as well... +/// +inline const MachineInstrBuilder& +addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0, + bool mem = true) { + if (mem) + return MIB.addImm(Offset).addFrameIndex(FI); + else + return MIB.addFrameIndex(FI).addImm(Offset); +} + +} // End llvm namespace + +#endif diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td new file mode 100644 index 000000000000..54cebcdecd61 --- /dev/null +++ b/lib/Target/PowerPC/PPCInstrFormats.td @@ -0,0 +1,875 @@ +//===- PowerPCInstrFormats.td - PowerPC Instruction Formats --*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// +// PowerPC instruction formats + +class I opcode, dag OOL, dag IOL, string asmstr, InstrItinClass itin> + : Instruction { + field bits<32> Inst; + + bit PPC64 = 0; // Default value, override with isPPC64 + + let Namespace = "PPC"; + let Inst{0-5} = opcode; + let OutOperandList = OOL; + let InOperandList = IOL; + let AsmString = asmstr; + let Itinerary = itin; + + /// These fields correspond to the fields in PPCInstrInfo.h. Any changes to + /// these must be reflected there! See comments there for what these are. + bits<1> PPC970_First = 0; + bits<1> PPC970_Single = 0; + bits<1> PPC970_Cracked = 0; + bits<3> PPC970_Unit = 0; +} + +class PPC970_DGroup_First { bits<1> PPC970_First = 1; } +class PPC970_DGroup_Single { bits<1> PPC970_Single = 1; } +class PPC970_DGroup_Cracked { bits<1> PPC970_Cracked = 1; } +class PPC970_MicroCode; + +class PPC970_Unit_Pseudo { bits<3> PPC970_Unit = 0; } +class PPC970_Unit_FXU { bits<3> PPC970_Unit = 1; } +class PPC970_Unit_LSU { bits<3> PPC970_Unit = 2; } +class PPC970_Unit_FPU { bits<3> PPC970_Unit = 3; } +class PPC970_Unit_CRU { bits<3> PPC970_Unit = 4; } +class PPC970_Unit_VALU { bits<3> PPC970_Unit = 5; } +class PPC970_Unit_VPERM { bits<3> PPC970_Unit = 6; } +class PPC970_Unit_BRU { bits<3> PPC970_Unit = 7; } + + +// 1.7.1 I-Form +class IForm opcode, bit aa, bit lk, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : I { + let Pattern = pattern; + bits<24> LI; + + let Inst{6-29} = LI; + let Inst{30} = aa; + let Inst{31} = lk; +} + +// 1.7.2 B-Form +class BForm opcode, bit aa, bit lk, dag OOL, dag IOL, string asmstr> + : I { + bits<7> BIBO; // 2 bits of BI and 5 bits of BO. + bits<3> CR; + bits<14> BD; + + bits<5> BI; + let BI{0-1} = BIBO{5-6}; + let BI{2-4} = CR{0-2}; + + let Inst{6-10} = BIBO{4-0}; + let Inst{11-15} = BI; + let Inst{16-29} = BD; + let Inst{30} = aa; + let Inst{31} = lk; +} + + +// 1.7.4 D-Form +class DForm_base opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : I { + bits<5> A; + bits<5> B; + bits<16> C; + + let Pattern = pattern; + + let Inst{6-10} = A; + let Inst{11-15} = B; + let Inst{16-31} = C; +} + +class DForm_1 opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : I { + bits<5> A; + bits<16> C; + bits<5> B; + + let Pattern = pattern; + + let Inst{6-10} = A; + let Inst{11-15} = B; + let Inst{16-31} = C; +} + +class DForm_2 opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : DForm_base; + +class DForm_2_r0 opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : I { + bits<5> A; + bits<16> B; + + let Pattern = pattern; + + let Inst{6-10} = A; + let Inst{11-15} = 0; + let Inst{16-31} = B; +} + +class DForm_4 opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : I { + bits<5> B; + bits<5> A; + bits<16> C; + + let Pattern = pattern; + + let Inst{6-10} = A; + let Inst{11-15} = B; + let Inst{16-31} = C; +} + +class DForm_4_zero opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : DForm_1 { + let A = 0; + let B = 0; + let C = 0; +} + +class DForm_5 opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin> + : I { + bits<3> BF; + bits<1> L; + bits<5> RA; + bits<16> I; + + let Inst{6-8} = BF; + let Inst{9} = 0; + let Inst{10} = L; + let Inst{11-15} = RA; + let Inst{16-31} = I; +} + +class DForm_5_ext opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin> + : DForm_5 { + let L = PPC64; +} + +class DForm_6 opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin> + : DForm_5; + +class DForm_6_ext opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin> + : DForm_6 { + let L = PPC64; +} + + +// 1.7.5 DS-Form +class DSForm_1 opcode, bits<2> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : I { + bits<5> RST; + bits<14> DS; + bits<5> RA; + + let Pattern = pattern; + + let Inst{6-10} = RST; + let Inst{11-15} = RA; + let Inst{16-29} = DS; + let Inst{30-31} = xo; +} + +// 1.7.6 X-Form +class XForm_base_r3xo opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : I { + bits<5> RST; + bits<5> A; + bits<5> B; + + let Pattern = pattern; + + bit RC = 0; // set by isDOT + + let Inst{6-10} = RST; + let Inst{11-15} = A; + let Inst{16-20} = B; + let Inst{21-30} = xo; + let Inst{31} = RC; +} + +// This is the same as XForm_base_r3xo, but the first two operands are swapped +// when code is emitted. +class XForm_base_r3xo_swapped + opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin> + : I { + bits<5> A; + bits<5> RST; + bits<5> B; + + bit RC = 0; // set by isDOT + + let Inst{6-10} = RST; + let Inst{11-15} = A; + let Inst{16-20} = B; + let Inst{21-30} = xo; + let Inst{31} = RC; +} + + +class XForm_1 opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : XForm_base_r3xo; + +class XForm_6 opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : XForm_base_r3xo_swapped { + let Pattern = pattern; +} + +class XForm_8 opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : XForm_base_r3xo; + +class XForm_10 opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : XForm_base_r3xo_swapped { + let Pattern = pattern; +} + +class XForm_11 opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : XForm_base_r3xo_swapped { + let B = 0; + let Pattern = pattern; +} + +class XForm_16 opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin> + : I { + bits<3> BF; + bits<1> L; + bits<5> RA; + bits<5> RB; + + let Inst{6-8} = BF; + let Inst{9} = 0; + let Inst{10} = L; + let Inst{11-15} = RA; + let Inst{16-20} = RB; + let Inst{21-30} = xo; + let Inst{31} = 0; +} + +class XForm_16_ext opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin> + : XForm_16 { + let L = PPC64; +} + +class XForm_17 opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin> + : I { + bits<3> BF; + bits<5> FRA; + bits<5> FRB; + + let Inst{6-8} = BF; + let Inst{9-10} = 0; + let Inst{11-15} = FRA; + let Inst{16-20} = FRB; + let Inst{21-30} = xo; + let Inst{31} = 0; +} + +class XForm_24 opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : I { + let Pattern = pattern; + let Inst{6-10} = 31; + let Inst{11-15} = 0; + let Inst{16-20} = 0; + let Inst{21-30} = xo; + let Inst{31} = 0; +} + +class XForm_24_sync opcode, bits<10> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, list pattern> + : I { + let Pattern = pattern; + let Inst{6-10} = 0; + let Inst{11-15} = 0; + let Inst{16-20} = 0; + let Inst{21-30} = xo; + let Inst{31} = 0; +} + +class XForm_25 opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : XForm_base_r3xo { +} + +class XForm_26 opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : XForm_base_r3xo { + let A = 0; +} + +class XForm_28 opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : XForm_base_r3xo { +} + +// This is used for MFFS, MTFSB0, MTFSB1. 42 is arbitrary; this series of +// numbers presumably relates to some document, but I haven't found it. +class XForm_42 opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : XForm_base_r3xo { + let Pattern = pattern; + + bit RC = 0; // set by isDOT + + let Inst{6-10} = RST; + let Inst{11-20} = 0; + let Inst{21-30} = xo; + let Inst{31} = RC; +} +class XForm_43 opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : XForm_base_r3xo { + let Pattern = pattern; + bits<5> FM; + + bit RC = 0; // set by isDOT + + let Inst{6-10} = FM; + let Inst{11-20} = 0; + let Inst{21-30} = xo; + let Inst{31} = RC; +} + +// DCB_Form - Form X instruction, used for dcb* instructions. +class DCB_Form xo, bits<5> immfield, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : I<31, OOL, IOL, asmstr, itin> { + bits<5> A; + bits<5> B; + + let Pattern = pattern; + + let Inst{6-10} = immfield; + let Inst{11-15} = A; + let Inst{16-20} = B; + let Inst{21-30} = xo; + let Inst{31} = 0; +} + + +// DSS_Form - Form X instruction, used for altivec dss* instructions. +class DSS_Form xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : I<31, OOL, IOL, asmstr, itin> { + bits<1> T; + bits<2> STRM; + bits<5> A; + bits<5> B; + + let Pattern = pattern; + + let Inst{6} = T; + let Inst{7-8} = 0; + let Inst{9-10} = STRM; + let Inst{11-15} = A; + let Inst{16-20} = B; + let Inst{21-30} = xo; + let Inst{31} = 0; +} + +// 1.7.7 XL-Form +class XLForm_1 opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : I { + bits<5> CRD; + bits<5> CRA; + bits<5> CRB; + + let Pattern = pattern; + + let Inst{6-10} = CRD; + let Inst{11-15} = CRA; + let Inst{16-20} = CRB; + let Inst{21-30} = xo; + let Inst{31} = 0; +} + +class XLForm_1_ext opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : I { + bits<5> CRD; + + let Pattern = pattern; + + let Inst{6-10} = CRD; + let Inst{11-15} = CRD; + let Inst{16-20} = CRD; + let Inst{21-30} = xo; + let Inst{31} = 0; +} + +class XLForm_2 opcode, bits<10> xo, bit lk, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : I { + bits<5> BO; + bits<5> BI; + bits<2> BH; + + let Pattern = pattern; + + let Inst{6-10} = BO; + let Inst{11-15} = BI; + let Inst{16-18} = 0; + let Inst{19-20} = BH; + let Inst{21-30} = xo; + let Inst{31} = lk; +} + +class XLForm_2_br opcode, bits<10> xo, bit lk, + dag OOL, dag IOL, string asmstr, InstrItinClass itin, list pattern> + : XLForm_2 { + bits<7> BIBO; // 2 bits of BI and 5 bits of BO. + bits<3> CR; + + let BO = BIBO{2-6}; + let BI{0-1} = BIBO{0-1}; + let BI{2-4} = CR; + let BH = 0; +} + + +class XLForm_2_ext opcode, bits<10> xo, bits<5> bo, bits<5> bi, bit lk, + dag OOL, dag IOL, string asmstr, InstrItinClass itin, list pattern> + : XLForm_2 { + let BO = bo; + let BI = bi; + let BH = 0; +} + +class XLForm_3 opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin> + : I { + bits<3> BF; + bits<3> BFA; + + let Inst{6-8} = BF; + let Inst{9-10} = 0; + let Inst{11-13} = BFA; + let Inst{14-15} = 0; + let Inst{16-20} = 0; + let Inst{21-30} = xo; + let Inst{31} = 0; +} + +// 1.7.8 XFX-Form +class XFXForm_1 opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin> + : I { + bits<5> RT; + bits<10> SPR; + + let Inst{6-10} = RT; + let Inst{11} = SPR{4}; + let Inst{12} = SPR{3}; + let Inst{13} = SPR{2}; + let Inst{14} = SPR{1}; + let Inst{15} = SPR{0}; + let Inst{16} = SPR{9}; + let Inst{17} = SPR{8}; + let Inst{18} = SPR{7}; + let Inst{19} = SPR{6}; + let Inst{20} = SPR{5}; + let Inst{21-30} = xo; + let Inst{31} = 0; +} + +class XFXForm_1_ext opcode, bits<10> xo, bits<10> spr, + dag OOL, dag IOL, string asmstr, InstrItinClass itin> + : XFXForm_1 { + let SPR = spr; +} + +class XFXForm_3 opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin> + : I { + bits<5> RT; + + let Inst{6-10} = RT; + let Inst{11-20} = 0; + let Inst{21-30} = xo; + let Inst{31} = 0; +} + +class XFXForm_5 opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin> + : I { + bits<8> FXM; + bits<5> ST; + + let Inst{6-10} = ST; + let Inst{11} = 0; + let Inst{12-19} = FXM; + let Inst{20} = 0; + let Inst{21-30} = xo; + let Inst{31} = 0; +} + +class XFXForm_5a opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin> + : I { + bits<5> ST; + bits<8> FXM; + + let Inst{6-10} = ST; + let Inst{11} = 1; + let Inst{12-19} = FXM; + let Inst{20} = 0; + let Inst{21-30} = xo; + let Inst{31} = 0; +} + +class XFXForm_7 opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin> + : XFXForm_1; + +class XFXForm_7_ext opcode, bits<10> xo, bits<10> spr, + dag OOL, dag IOL, string asmstr, InstrItinClass itin> + : XFXForm_7 { + let SPR = spr; +} + +// XFL-Form - MTFSF +// This is probably 1.7.9, but I don't have the reference that uses this +// numbering scheme... +class XFLForm opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + string cstr, InstrItinClass itin, listpattern> + : I { + bits<8> FM; + bits<5> RT; + + bit RC = 0; // set by isDOT + let Pattern = pattern; + let Constraints = cstr; + + let Inst{6} = 0; + let Inst{7-14} = FM; + let Inst{15} = 0; + let Inst{16-20} = RT; + let Inst{21-30} = xo; + let Inst{31} = RC; +} + +// 1.7.10 XS-Form - SRADI. +class XSForm_1 opcode, bits<9> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : I { + bits<5> A; + bits<5> RS; + bits<6> SH; + + bit RC = 0; // set by isDOT + let Pattern = pattern; + + let Inst{6-10} = RS; + let Inst{11-15} = A; + let Inst{16-20} = SH{4,3,2,1,0}; + let Inst{21-29} = xo; + let Inst{30} = SH{5}; + let Inst{31} = RC; +} + +// 1.7.11 XO-Form +class XOForm_1 opcode, bits<9> xo, bit oe, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : I { + bits<5> RT; + bits<5> RA; + bits<5> RB; + + let Pattern = pattern; + + bit RC = 0; // set by isDOT + + let Inst{6-10} = RT; + let Inst{11-15} = RA; + let Inst{16-20} = RB; + let Inst{21} = oe; + let Inst{22-30} = xo; + let Inst{31} = RC; +} + +class XOForm_3 opcode, bits<9> xo, bit oe, + dag OOL, dag IOL, string asmstr, InstrItinClass itin, list pattern> + : XOForm_1 { + let RB = 0; +} + +// 1.7.12 A-Form +class AForm_1 opcode, bits<5> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : I { + bits<5> FRT; + bits<5> FRA; + bits<5> FRC; + bits<5> FRB; + + let Pattern = pattern; + + bit RC = 0; // set by isDOT + + let Inst{6-10} = FRT; + let Inst{11-15} = FRA; + let Inst{16-20} = FRB; + let Inst{21-25} = FRC; + let Inst{26-30} = xo; + let Inst{31} = RC; +} + +class AForm_2 opcode, bits<5> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : AForm_1 { + let FRC = 0; +} + +class AForm_3 opcode, bits<5> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : AForm_1 { + let FRB = 0; +} + +// 1.7.13 M-Form +class MForm_1 opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : I { + bits<5> RA; + bits<5> RS; + bits<5> RB; + bits<5> MB; + bits<5> ME; + + let Pattern = pattern; + + bit RC = 0; // set by isDOT + + let Inst{6-10} = RS; + let Inst{11-15} = RA; + let Inst{16-20} = RB; + let Inst{21-25} = MB; + let Inst{26-30} = ME; + let Inst{31} = RC; +} + +class MForm_2 opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : MForm_1 { +} + +// 1.7.14 MD-Form +class MDForm_1 opcode, bits<3> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : I { + bits<5> RA; + bits<5> RS; + bits<6> SH; + bits<6> MBE; + + let Pattern = pattern; + + bit RC = 0; // set by isDOT + + let Inst{6-10} = RS; + let Inst{11-15} = RA; + let Inst{16-20} = SH{4,3,2,1,0}; + let Inst{21-26} = MBE{4,3,2,1,0,5}; + let Inst{27-29} = xo; + let Inst{30} = SH{5}; + let Inst{31} = RC; +} + + + +// E-1 VA-Form + +// VAForm_1 - DACB ordering. +class VAForm_1 xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : I<4, OOL, IOL, asmstr, itin> { + bits<5> VD; + bits<5> VA; + bits<5> VC; + bits<5> VB; + + let Pattern = pattern; + + let Inst{6-10} = VD; + let Inst{11-15} = VA; + let Inst{16-20} = VB; + let Inst{21-25} = VC; + let Inst{26-31} = xo; +} + +// VAForm_1a - DABC ordering. +class VAForm_1a xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : I<4, OOL, IOL, asmstr, itin> { + bits<5> VD; + bits<5> VA; + bits<5> VB; + bits<5> VC; + + let Pattern = pattern; + + let Inst{6-10} = VD; + let Inst{11-15} = VA; + let Inst{16-20} = VB; + let Inst{21-25} = VC; + let Inst{26-31} = xo; +} + +class VAForm_2 xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : I<4, OOL, IOL, asmstr, itin> { + bits<5> VD; + bits<5> VA; + bits<5> VB; + bits<4> SH; + + let Pattern = pattern; + + let Inst{6-10} = VD; + let Inst{11-15} = VA; + let Inst{16-20} = VB; + let Inst{21} = 0; + let Inst{22-25} = SH; + let Inst{26-31} = xo; +} + +// E-2 VX-Form +class VXForm_1 xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : I<4, OOL, IOL, asmstr, itin> { + bits<5> VD; + bits<5> VA; + bits<5> VB; + + let Pattern = pattern; + + let Inst{6-10} = VD; + let Inst{11-15} = VA; + let Inst{16-20} = VB; + let Inst{21-31} = xo; +} + +class VXForm_setzero xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : VXForm_1 { + let VA = VD; + let VB = VD; +} + + +class VXForm_2 xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : I<4, OOL, IOL, asmstr, itin> { + bits<5> VD; + bits<5> VB; + + let Pattern = pattern; + + let Inst{6-10} = VD; + let Inst{11-15} = 0; + let Inst{16-20} = VB; + let Inst{21-31} = xo; +} + +class VXForm_3 xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : I<4, OOL, IOL, asmstr, itin> { + bits<5> VD; + bits<5> IMM; + + let Pattern = pattern; + + let Inst{6-10} = VD; + let Inst{11-15} = IMM; + let Inst{16-20} = 0; + let Inst{21-31} = xo; +} + +/// VXForm_4 - VX instructions with "VD,0,0" register fields, like mfvscr. +class VXForm_4 xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : I<4, OOL, IOL, asmstr, itin> { + bits<5> VD; + + let Pattern = pattern; + + let Inst{6-10} = VD; + let Inst{11-15} = 0; + let Inst{16-20} = 0; + let Inst{21-31} = xo; +} + +/// VXForm_5 - VX instructions with "0,0,VB" register fields, like mtvscr. +class VXForm_5 xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : I<4, OOL, IOL, asmstr, itin> { + bits<5> VB; + + let Pattern = pattern; + + let Inst{6-10} = 0; + let Inst{11-15} = 0; + let Inst{16-20} = VB; + let Inst{21-31} = xo; +} + +// E-4 VXR-Form +class VXRForm_1 xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : I<4, OOL, IOL, asmstr, itin> { + bits<5> VD; + bits<5> VA; + bits<5> VB; + bit RC = 0; + + let Pattern = pattern; + + let Inst{6-10} = VD; + let Inst{11-15} = VA; + let Inst{16-20} = VB; + let Inst{21} = RC; + let Inst{22-31} = xo; +} + +//===----------------------------------------------------------------------===// +class Pseudo pattern> + : I<0, OOL, IOL, asmstr, NoItinerary> { + let PPC64 = 0; + let Pattern = pattern; + let Inst{31-0} = 0; +} diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp new file mode 100644 index 000000000000..778f0349d10f --- /dev/null +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -0,0 +1,818 @@ +//===- PPCInstrInfo.cpp - PowerPC32 Instruction Information -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the PowerPC implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#include "PPCInstrInfo.h" +#include "PPCInstrBuilder.h" +#include "PPCMachineFunctionInfo.h" +#include "PPCPredicates.h" +#include "PPCGenInstrInfo.inc" +#include "PPCTargetMachine.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Target/TargetAsmInfo.h" +using namespace llvm; + +extern cl::opt EnablePPC32RS; // FIXME (64-bit): See PPCRegisterInfo.cpp. +extern cl::opt EnablePPC64RS; // FIXME (64-bit): See PPCRegisterInfo.cpp. + +PPCInstrInfo::PPCInstrInfo(PPCTargetMachine &tm) + : TargetInstrInfoImpl(PPCInsts, array_lengthof(PPCInsts)), TM(tm), + RI(*TM.getSubtargetImpl(), *this) {} + +bool PPCInstrInfo::isMoveInstr(const MachineInstr& MI, + unsigned& sourceReg, + unsigned& destReg, + unsigned& sourceSubIdx, + unsigned& destSubIdx) const { + sourceSubIdx = destSubIdx = 0; // No sub-registers. + + unsigned oc = MI.getOpcode(); + if (oc == PPC::OR || oc == PPC::OR8 || oc == PPC::VOR || + oc == PPC::OR4To8 || oc == PPC::OR8To4) { // or r1, r2, r2 + assert(MI.getNumOperands() >= 3 && + MI.getOperand(0).isReg() && + MI.getOperand(1).isReg() && + MI.getOperand(2).isReg() && + "invalid PPC OR instruction!"); + if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) { + sourceReg = MI.getOperand(1).getReg(); + destReg = MI.getOperand(0).getReg(); + return true; + } + } else if (oc == PPC::ADDI) { // addi r1, r2, 0 + assert(MI.getNumOperands() >= 3 && + MI.getOperand(0).isReg() && + MI.getOperand(2).isImm() && + "invalid PPC ADDI instruction!"); + if (MI.getOperand(1).isReg() && MI.getOperand(2).getImm() == 0) { + sourceReg = MI.getOperand(1).getReg(); + destReg = MI.getOperand(0).getReg(); + return true; + } + } else if (oc == PPC::ORI) { // ori r1, r2, 0 + assert(MI.getNumOperands() >= 3 && + MI.getOperand(0).isReg() && + MI.getOperand(1).isReg() && + MI.getOperand(2).isImm() && + "invalid PPC ORI instruction!"); + if (MI.getOperand(2).getImm() == 0) { + sourceReg = MI.getOperand(1).getReg(); + destReg = MI.getOperand(0).getReg(); + return true; + } + } else if (oc == PPC::FMRS || oc == PPC::FMRD || + oc == PPC::FMRSD) { // fmr r1, r2 + assert(MI.getNumOperands() >= 2 && + MI.getOperand(0).isReg() && + MI.getOperand(1).isReg() && + "invalid PPC FMR instruction"); + sourceReg = MI.getOperand(1).getReg(); + destReg = MI.getOperand(0).getReg(); + return true; + } else if (oc == PPC::MCRF) { // mcrf cr1, cr2 + assert(MI.getNumOperands() >= 2 && + MI.getOperand(0).isReg() && + MI.getOperand(1).isReg() && + "invalid PPC MCRF instruction"); + sourceReg = MI.getOperand(1).getReg(); + destReg = MI.getOperand(0).getReg(); + return true; + } + return false; +} + +unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, + int &FrameIndex) const { + switch (MI->getOpcode()) { + default: break; + case PPC::LD: + case PPC::LWZ: + case PPC::LFS: + case PPC::LFD: + if (MI->getOperand(1).isImm() && !MI->getOperand(1).getImm() && + MI->getOperand(2).isFI()) { + FrameIndex = MI->getOperand(2).getIndex(); + return MI->getOperand(0).getReg(); + } + break; + } + return 0; +} + +unsigned PPCInstrInfo::isStoreToStackSlot(const MachineInstr *MI, + int &FrameIndex) const { + switch (MI->getOpcode()) { + default: break; + case PPC::STD: + case PPC::STW: + case PPC::STFS: + case PPC::STFD: + if (MI->getOperand(1).isImm() && !MI->getOperand(1).getImm() && + MI->getOperand(2).isFI()) { + FrameIndex = MI->getOperand(2).getIndex(); + return MI->getOperand(0).getReg(); + } + break; + } + return 0; +} + +// commuteInstruction - We can commute rlwimi instructions, but only if the +// rotate amt is zero. We also have to munge the immediates a bit. +MachineInstr * +PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { + MachineFunction &MF = *MI->getParent()->getParent(); + + // Normal instructions can be commuted the obvious way. + if (MI->getOpcode() != PPC::RLWIMI) + return TargetInstrInfoImpl::commuteInstruction(MI, NewMI); + + // Cannot commute if it has a non-zero rotate count. + if (MI->getOperand(3).getImm() != 0) + return 0; + + // If we have a zero rotate count, we have: + // M = mask(MB,ME) + // Op0 = (Op1 & ~M) | (Op2 & M) + // Change this to: + // M = mask((ME+1)&31, (MB-1)&31) + // Op0 = (Op2 & ~M) | (Op1 & M) + + // Swap op1/op2 + unsigned Reg0 = MI->getOperand(0).getReg(); + unsigned Reg1 = MI->getOperand(1).getReg(); + unsigned Reg2 = MI->getOperand(2).getReg(); + bool Reg1IsKill = MI->getOperand(1).isKill(); + bool Reg2IsKill = MI->getOperand(2).isKill(); + bool ChangeReg0 = false; + // If machine instrs are no longer in two-address forms, update + // destination register as well. + if (Reg0 == Reg1) { + // Must be two address instruction! + assert(MI->getDesc().getOperandConstraint(0, TOI::TIED_TO) && + "Expecting a two-address instruction!"); + Reg2IsKill = false; + ChangeReg0 = true; + } + + // Masks. + unsigned MB = MI->getOperand(4).getImm(); + unsigned ME = MI->getOperand(5).getImm(); + + if (NewMI) { + // Create a new instruction. + unsigned Reg0 = ChangeReg0 ? Reg2 : MI->getOperand(0).getReg(); + bool Reg0IsDead = MI->getOperand(0).isDead(); + return BuildMI(MF, MI->getDebugLoc(), MI->getDesc()) + .addReg(Reg0, RegState::Define | getDeadRegState(Reg0IsDead)) + .addReg(Reg2, getKillRegState(Reg2IsKill)) + .addReg(Reg1, getKillRegState(Reg1IsKill)) + .addImm((ME+1) & 31) + .addImm((MB-1) & 31); + } + + if (ChangeReg0) + MI->getOperand(0).setReg(Reg2); + MI->getOperand(2).setReg(Reg1); + MI->getOperand(1).setReg(Reg2); + MI->getOperand(2).setIsKill(Reg1IsKill); + MI->getOperand(1).setIsKill(Reg2IsKill); + + // Swap the mask around. + MI->getOperand(4).setImm((ME+1) & 31); + MI->getOperand(5).setImm((MB-1) & 31); + return MI; +} + +void PPCInstrInfo::insertNoop(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const { + DebugLoc DL = DebugLoc::getUnknownLoc(); + if (MI != MBB.end()) DL = MI->getDebugLoc(); + + BuildMI(MBB, MI, DL, get(PPC::NOP)); +} + + +// Branch analysis. +bool PPCInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl &Cond, + bool AllowModify) const { + // If the block has no terminators, it just falls into the block after it. + MachineBasicBlock::iterator I = MBB.end(); + if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) + return false; + + // Get the last instruction in the block. + MachineInstr *LastInst = I; + + // If there is only one terminator instruction, process it. + if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { + if (LastInst->getOpcode() == PPC::B) { + if (!LastInst->getOperand(0).isMBB()) + return true; + TBB = LastInst->getOperand(0).getMBB(); + return false; + } else if (LastInst->getOpcode() == PPC::BCC) { + if (!LastInst->getOperand(2).isMBB()) + return true; + // Block ends with fall-through condbranch. + TBB = LastInst->getOperand(2).getMBB(); + Cond.push_back(LastInst->getOperand(0)); + Cond.push_back(LastInst->getOperand(1)); + return false; + } + // Otherwise, don't know what this is. + return true; + } + + // Get the instruction before it if it's a terminator. + MachineInstr *SecondLastInst = I; + + // If there are three terminators, we don't know what sort of block this is. + if (SecondLastInst && I != MBB.begin() && + isUnpredicatedTerminator(--I)) + return true; + + // If the block ends with PPC::B and PPC:BCC, handle it. + if (SecondLastInst->getOpcode() == PPC::BCC && + LastInst->getOpcode() == PPC::B) { + if (!SecondLastInst->getOperand(2).isMBB() || + !LastInst->getOperand(0).isMBB()) + return true; + TBB = SecondLastInst->getOperand(2).getMBB(); + Cond.push_back(SecondLastInst->getOperand(0)); + Cond.push_back(SecondLastInst->getOperand(1)); + FBB = LastInst->getOperand(0).getMBB(); + return false; + } + + // If the block ends with two PPC:Bs, handle it. The second one is not + // executed, so remove it. + if (SecondLastInst->getOpcode() == PPC::B && + LastInst->getOpcode() == PPC::B) { + if (!SecondLastInst->getOperand(0).isMBB()) + return true; + TBB = SecondLastInst->getOperand(0).getMBB(); + I = LastInst; + if (AllowModify) + I->eraseFromParent(); + return false; + } + + // Otherwise, can't handle this. + return true; +} + +unsigned PPCInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { + MachineBasicBlock::iterator I = MBB.end(); + if (I == MBB.begin()) return 0; + --I; + if (I->getOpcode() != PPC::B && I->getOpcode() != PPC::BCC) + return 0; + + // Remove the branch. + I->eraseFromParent(); + + I = MBB.end(); + + if (I == MBB.begin()) return 1; + --I; + if (I->getOpcode() != PPC::BCC) + return 1; + + // Remove the branch. + I->eraseFromParent(); + return 2; +} + +unsigned +PPCInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + const SmallVectorImpl &Cond) const { + // FIXME this should probably have a DebugLoc argument + DebugLoc dl = DebugLoc::getUnknownLoc(); + // Shouldn't be a fall through. + assert(TBB && "InsertBranch must not be told to insert a fallthrough"); + assert((Cond.size() == 2 || Cond.size() == 0) && + "PPC branch conditions have two components!"); + + // One-way branch. + if (FBB == 0) { + if (Cond.empty()) // Unconditional branch + BuildMI(&MBB, dl, get(PPC::B)).addMBB(TBB); + else // Conditional branch + BuildMI(&MBB, dl, get(PPC::BCC)) + .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB); + return 1; + } + + // Two-way Conditional Branch. + BuildMI(&MBB, dl, get(PPC::BCC)) + .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB); + BuildMI(&MBB, dl, get(PPC::B)).addMBB(FBB); + return 2; +} + +bool PPCInstrInfo::copyRegToReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned DestReg, unsigned SrcReg, + const TargetRegisterClass *DestRC, + const TargetRegisterClass *SrcRC) const { + if (DestRC != SrcRC) { + // Not yet supported! + return false; + } + + DebugLoc DL = DebugLoc::getUnknownLoc(); + if (MI != MBB.end()) DL = MI->getDebugLoc(); + + if (DestRC == PPC::GPRCRegisterClass) { + BuildMI(MBB, MI, DL, get(PPC::OR), DestReg).addReg(SrcReg).addReg(SrcReg); + } else if (DestRC == PPC::G8RCRegisterClass) { + BuildMI(MBB, MI, DL, get(PPC::OR8), DestReg).addReg(SrcReg).addReg(SrcReg); + } else if (DestRC == PPC::F4RCRegisterClass) { + BuildMI(MBB, MI, DL, get(PPC::FMRS), DestReg).addReg(SrcReg); + } else if (DestRC == PPC::F8RCRegisterClass) { + BuildMI(MBB, MI, DL, get(PPC::FMRD), DestReg).addReg(SrcReg); + } else if (DestRC == PPC::CRRCRegisterClass) { + BuildMI(MBB, MI, DL, get(PPC::MCRF), DestReg).addReg(SrcReg); + } else if (DestRC == PPC::VRRCRegisterClass) { + BuildMI(MBB, MI, DL, get(PPC::VOR), DestReg).addReg(SrcReg).addReg(SrcReg); + } else if (DestRC == PPC::CRBITRCRegisterClass) { + BuildMI(MBB, MI, DL, get(PPC::CROR), DestReg).addReg(SrcReg).addReg(SrcReg); + } else { + // Attempt to copy register that is not GPR or FPR + return false; + } + + return true; +} + +bool +PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF, + unsigned SrcReg, bool isKill, + int FrameIdx, + const TargetRegisterClass *RC, + SmallVectorImpl &NewMIs) const{ + DebugLoc DL = DebugLoc::getUnknownLoc(); + if (RC == PPC::GPRCRegisterClass) { + if (SrcReg != PPC::LR) { + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW)) + .addReg(SrcReg, + getKillRegState(isKill)), + FrameIdx)); + } else { + // FIXME: this spills LR immediately to memory in one step. To do this, + // we use R11, which we know cannot be used in the prolog/epilog. This is + // a hack. + NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFLR), PPC::R11)); + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW)) + .addReg(PPC::R11, + getKillRegState(isKill)), + FrameIdx)); + } + } else if (RC == PPC::G8RCRegisterClass) { + if (SrcReg != PPC::LR8) { + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STD)) + .addReg(SrcReg, + getKillRegState(isKill)), + FrameIdx)); + } else { + // FIXME: this spills LR immediately to memory in one step. To do this, + // we use R11, which we know cannot be used in the prolog/epilog. This is + // a hack. + NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFLR8), PPC::X11)); + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STD)) + .addReg(PPC::X11, + getKillRegState(isKill)), + FrameIdx)); + } + } else if (RC == PPC::F8RCRegisterClass) { + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STFD)) + .addReg(SrcReg, + getKillRegState(isKill)), + FrameIdx)); + } else if (RC == PPC::F4RCRegisterClass) { + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STFS)) + .addReg(SrcReg, + getKillRegState(isKill)), + FrameIdx)); + } else if (RC == PPC::CRRCRegisterClass) { + if ((EnablePPC32RS && !TM.getSubtargetImpl()->isPPC64()) || + (EnablePPC64RS && TM.getSubtargetImpl()->isPPC64())) { + // FIXME (64-bit): Enable + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_CR)) + .addReg(SrcReg, + getKillRegState(isKill)), + FrameIdx)); + return true; + } else { + // FIXME: We use R0 here, because it isn't available for RA. We need to + // store the CR in the low 4-bits of the saved value. First, issue a MFCR + // to save all of the CRBits. + NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFCR), PPC::R0)); + + // If the saved register wasn't CR0, shift the bits left so that they are + // in CR0's slot. + if (SrcReg != PPC::CR0) { + unsigned ShiftBits = PPCRegisterInfo::getRegisterNumbering(SrcReg)*4; + // rlwinm r0, r0, ShiftBits, 0, 31. + NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), PPC::R0) + .addReg(PPC::R0).addImm(ShiftBits).addImm(0).addImm(31)); + } + + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW)) + .addReg(PPC::R0, + getKillRegState(isKill)), + FrameIdx)); + } + } else if (RC == PPC::CRBITRCRegisterClass) { + // FIXME: We use CRi here because there is no mtcrf on a bit. Since the + // backend currently only uses CR1EQ as an individual bit, this should + // not cause any bug. If we need other uses of CR bits, the following + // code may be invalid. + unsigned Reg = 0; + if (SrcReg >= PPC::CR0LT || SrcReg <= PPC::CR0UN) + Reg = PPC::CR0; + else if (SrcReg >= PPC::CR1LT || SrcReg <= PPC::CR1UN) + Reg = PPC::CR1; + else if (SrcReg >= PPC::CR2LT || SrcReg <= PPC::CR2UN) + Reg = PPC::CR2; + else if (SrcReg >= PPC::CR3LT || SrcReg <= PPC::CR3UN) + Reg = PPC::CR3; + else if (SrcReg >= PPC::CR4LT || SrcReg <= PPC::CR4UN) + Reg = PPC::CR4; + else if (SrcReg >= PPC::CR5LT || SrcReg <= PPC::CR5UN) + Reg = PPC::CR5; + else if (SrcReg >= PPC::CR6LT || SrcReg <= PPC::CR6UN) + Reg = PPC::CR6; + else if (SrcReg >= PPC::CR7LT || SrcReg <= PPC::CR7UN) + Reg = PPC::CR7; + + return StoreRegToStackSlot(MF, Reg, isKill, FrameIdx, + PPC::CRRCRegisterClass, NewMIs); + + } else if (RC == PPC::VRRCRegisterClass) { + // We don't have indexed addressing for vector loads. Emit: + // R0 = ADDI FI# + // STVX VAL, 0, R0 + // + // FIXME: We use R0 here, because it isn't available for RA. + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::ADDI), PPC::R0), + FrameIdx, 0, 0)); + NewMIs.push_back(BuildMI(MF, DL, get(PPC::STVX)) + .addReg(SrcReg, getKillRegState(isKill)) + .addReg(PPC::R0) + .addReg(PPC::R0)); + } else { + assert(0 && "Unknown regclass!"); + abort(); + } + + return false; +} + +void +PPCInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned SrcReg, bool isKill, int FrameIdx, + const TargetRegisterClass *RC) const { + MachineFunction &MF = *MBB.getParent(); + SmallVector NewMIs; + + if (StoreRegToStackSlot(MF, SrcReg, isKill, FrameIdx, RC, NewMIs)) { + PPCFunctionInfo *FuncInfo = MF.getInfo(); + FuncInfo->setSpillsCR(); + } + + for (unsigned i = 0, e = NewMIs.size(); i != e; ++i) + MBB.insert(MI, NewMIs[i]); +} + +void PPCInstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg, + bool isKill, + SmallVectorImpl &Addr, + const TargetRegisterClass *RC, + SmallVectorImpl &NewMIs) const{ + if (Addr[0].isFI()) { + if (StoreRegToStackSlot(MF, SrcReg, isKill, + Addr[0].getIndex(), RC, NewMIs)) { + PPCFunctionInfo *FuncInfo = MF.getInfo(); + FuncInfo->setSpillsCR(); + } + + return; + } + + DebugLoc DL = DebugLoc::getUnknownLoc(); + unsigned Opc = 0; + if (RC == PPC::GPRCRegisterClass) { + Opc = PPC::STW; + } else if (RC == PPC::G8RCRegisterClass) { + Opc = PPC::STD; + } else if (RC == PPC::F8RCRegisterClass) { + Opc = PPC::STFD; + } else if (RC == PPC::F4RCRegisterClass) { + Opc = PPC::STFS; + } else if (RC == PPC::VRRCRegisterClass) { + Opc = PPC::STVX; + } else { + assert(0 && "Unknown regclass!"); + abort(); + } + MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc)) + .addReg(SrcReg, getKillRegState(isKill)); + for (unsigned i = 0, e = Addr.size(); i != e; ++i) + MIB.addOperand(Addr[i]); + NewMIs.push_back(MIB); + return; +} + +void +PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, + unsigned DestReg, int FrameIdx, + const TargetRegisterClass *RC, + SmallVectorImpl &NewMIs)const{ + if (RC == PPC::GPRCRegisterClass) { + if (DestReg != PPC::LR) { + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ), + DestReg), FrameIdx)); + } else { + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ), + PPC::R11), FrameIdx)); + NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTLR)).addReg(PPC::R11)); + } + } else if (RC == PPC::G8RCRegisterClass) { + if (DestReg != PPC::LR8) { + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LD), DestReg), + FrameIdx)); + } else { + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LD), + PPC::R11), FrameIdx)); + NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTLR8)).addReg(PPC::R11)); + } + } else if (RC == PPC::F8RCRegisterClass) { + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFD), DestReg), + FrameIdx)); + } else if (RC == PPC::F4RCRegisterClass) { + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFS), DestReg), + FrameIdx)); + } else if (RC == PPC::CRRCRegisterClass) { + // FIXME: We use R0 here, because it isn't available for RA. + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ), PPC::R0), + FrameIdx)); + + // If the reloaded register isn't CR0, shift the bits right so that they are + // in the right CR's slot. + if (DestReg != PPC::CR0) { + unsigned ShiftBits = PPCRegisterInfo::getRegisterNumbering(DestReg)*4; + // rlwinm r11, r11, 32-ShiftBits, 0, 31. + NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), PPC::R0) + .addReg(PPC::R0).addImm(32-ShiftBits).addImm(0).addImm(31)); + } + + NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTCRF), DestReg).addReg(PPC::R0)); + } else if (RC == PPC::CRBITRCRegisterClass) { + + unsigned Reg = 0; + if (DestReg >= PPC::CR0LT || DestReg <= PPC::CR0UN) + Reg = PPC::CR0; + else if (DestReg >= PPC::CR1LT || DestReg <= PPC::CR1UN) + Reg = PPC::CR1; + else if (DestReg >= PPC::CR2LT || DestReg <= PPC::CR2UN) + Reg = PPC::CR2; + else if (DestReg >= PPC::CR3LT || DestReg <= PPC::CR3UN) + Reg = PPC::CR3; + else if (DestReg >= PPC::CR4LT || DestReg <= PPC::CR4UN) + Reg = PPC::CR4; + else if (DestReg >= PPC::CR5LT || DestReg <= PPC::CR5UN) + Reg = PPC::CR5; + else if (DestReg >= PPC::CR6LT || DestReg <= PPC::CR6UN) + Reg = PPC::CR6; + else if (DestReg >= PPC::CR7LT || DestReg <= PPC::CR7UN) + Reg = PPC::CR7; + + return LoadRegFromStackSlot(MF, DL, Reg, FrameIdx, + PPC::CRRCRegisterClass, NewMIs); + + } else if (RC == PPC::VRRCRegisterClass) { + // We don't have indexed addressing for vector loads. Emit: + // R0 = ADDI FI# + // Dest = LVX 0, R0 + // + // FIXME: We use R0 here, because it isn't available for RA. + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::ADDI), PPC::R0), + FrameIdx, 0, 0)); + NewMIs.push_back(BuildMI(MF, DL, get(PPC::LVX),DestReg).addReg(PPC::R0) + .addReg(PPC::R0)); + } else { + assert(0 && "Unknown regclass!"); + abort(); + } +} + +void +PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned DestReg, int FrameIdx, + const TargetRegisterClass *RC) const { + MachineFunction &MF = *MBB.getParent(); + SmallVector NewMIs; + DebugLoc DL = DebugLoc::getUnknownLoc(); + if (MI != MBB.end()) DL = MI->getDebugLoc(); + LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs); + for (unsigned i = 0, e = NewMIs.size(); i != e; ++i) + MBB.insert(MI, NewMIs[i]); +} + +void PPCInstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg, + SmallVectorImpl &Addr, + const TargetRegisterClass *RC, + SmallVectorImpl &NewMIs)const{ + if (Addr[0].isFI()) { + LoadRegFromStackSlot(MF, DebugLoc::getUnknownLoc(), + DestReg, Addr[0].getIndex(), RC, NewMIs); + return; + } + + unsigned Opc = 0; + if (RC == PPC::GPRCRegisterClass) { + assert(DestReg != PPC::LR && "Can't handle this yet!"); + Opc = PPC::LWZ; + } else if (RC == PPC::G8RCRegisterClass) { + assert(DestReg != PPC::LR8 && "Can't handle this yet!"); + Opc = PPC::LD; + } else if (RC == PPC::F8RCRegisterClass) { + Opc = PPC::LFD; + } else if (RC == PPC::F4RCRegisterClass) { + Opc = PPC::LFS; + } else if (RC == PPC::VRRCRegisterClass) { + Opc = PPC::LVX; + } else { + assert(0 && "Unknown regclass!"); + abort(); + } + DebugLoc DL = DebugLoc::getUnknownLoc(); + MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg); + for (unsigned i = 0, e = Addr.size(); i != e; ++i) + MIB.addOperand(Addr[i]); + NewMIs.push_back(MIB); + return; +} + +/// foldMemoryOperand - PowerPC (like most RISC's) can only fold spills into +/// copy instructions, turning them into load/store instructions. +MachineInstr *PPCInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr *MI, + const SmallVectorImpl &Ops, + int FrameIndex) const { + if (Ops.size() != 1) return NULL; + + // Make sure this is a reg-reg copy. Note that we can't handle MCRF, because + // it takes more than one instruction to store it. + unsigned Opc = MI->getOpcode(); + unsigned OpNum = Ops[0]; + + MachineInstr *NewMI = NULL; + if ((Opc == PPC::OR && + MI->getOperand(1).getReg() == MI->getOperand(2).getReg())) { + if (OpNum == 0) { // move -> store + unsigned InReg = MI->getOperand(1).getReg(); + bool isKill = MI->getOperand(1).isKill(); + NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::STW)) + .addReg(InReg, getKillRegState(isKill)), + FrameIndex); + } else { // move -> load + unsigned OutReg = MI->getOperand(0).getReg(); + bool isDead = MI->getOperand(0).isDead(); + NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::LWZ)) + .addReg(OutReg, + RegState::Define | + getDeadRegState(isDead)), + FrameIndex); + } + } else if ((Opc == PPC::OR8 && + MI->getOperand(1).getReg() == MI->getOperand(2).getReg())) { + if (OpNum == 0) { // move -> store + unsigned InReg = MI->getOperand(1).getReg(); + bool isKill = MI->getOperand(1).isKill(); + NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::STD)) + .addReg(InReg, getKillRegState(isKill)), + FrameIndex); + } else { // move -> load + unsigned OutReg = MI->getOperand(0).getReg(); + bool isDead = MI->getOperand(0).isDead(); + NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::LD)) + .addReg(OutReg, + RegState::Define | + getDeadRegState(isDead)), + FrameIndex); + } + } else if (Opc == PPC::FMRD) { + if (OpNum == 0) { // move -> store + unsigned InReg = MI->getOperand(1).getReg(); + bool isKill = MI->getOperand(1).isKill(); + NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::STFD)) + .addReg(InReg, getKillRegState(isKill)), + FrameIndex); + } else { // move -> load + unsigned OutReg = MI->getOperand(0).getReg(); + bool isDead = MI->getOperand(0).isDead(); + NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::LFD)) + .addReg(OutReg, + RegState::Define | + getDeadRegState(isDead)), + FrameIndex); + } + } else if (Opc == PPC::FMRS) { + if (OpNum == 0) { // move -> store + unsigned InReg = MI->getOperand(1).getReg(); + bool isKill = MI->getOperand(1).isKill(); + NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::STFS)) + .addReg(InReg, getKillRegState(isKill)), + FrameIndex); + } else { // move -> load + unsigned OutReg = MI->getOperand(0).getReg(); + bool isDead = MI->getOperand(0).isDead(); + NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::LFS)) + .addReg(OutReg, + RegState::Define | + getDeadRegState(isDead)), + FrameIndex); + } + } + + return NewMI; +} + +bool PPCInstrInfo::canFoldMemoryOperand(const MachineInstr *MI, + const SmallVectorImpl &Ops) const { + if (Ops.size() != 1) return false; + + // Make sure this is a reg-reg copy. Note that we can't handle MCRF, because + // it takes more than one instruction to store it. + unsigned Opc = MI->getOpcode(); + + if ((Opc == PPC::OR && + MI->getOperand(1).getReg() == MI->getOperand(2).getReg())) + return true; + else if ((Opc == PPC::OR8 && + MI->getOperand(1).getReg() == MI->getOperand(2).getReg())) + return true; + else if (Opc == PPC::FMRD || Opc == PPC::FMRS) + return true; + + return false; +} + + +bool PPCInstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const { + if (MBB.empty()) return false; + + switch (MBB.back().getOpcode()) { + case PPC::BLR: // Return. + case PPC::B: // Uncond branch. + case PPC::BCTR: // Indirect branch. + return true; + default: return false; + } +} + +bool PPCInstrInfo:: +ReverseBranchCondition(SmallVectorImpl &Cond) const { + assert(Cond.size() == 2 && "Invalid PPC branch opcode!"); + // Leave the CR# the same, but invert the condition. + Cond[0].setImm(PPC::InvertPredicate((PPC::Predicate)Cond[0].getImm())); + return false; +} + +/// GetInstSize - Return the number of bytes of code the specified +/// instruction may be. This returns the maximum number of bytes. +/// +unsigned PPCInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { + switch (MI->getOpcode()) { + case PPC::INLINEASM: { // Inline Asm: Variable size. + const MachineFunction *MF = MI->getParent()->getParent(); + const char *AsmStr = MI->getOperand(0).getSymbolName(); + return MF->getTarget().getTargetAsmInfo()->getInlineAsmLength(AsmStr); + } + case PPC::DBG_LABEL: + case PPC::EH_LABEL: + case PPC::GC_LABEL: + return 0; + default: + return 4; // PowerPC instructions are all 4 bytes + } +} diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h new file mode 100644 index 000000000000..492634c979eb --- /dev/null +++ b/lib/Target/PowerPC/PPCInstrInfo.h @@ -0,0 +1,168 @@ +//===- PPCInstrInfo.h - PowerPC Instruction Information ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the PowerPC implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef POWERPC32_INSTRUCTIONINFO_H +#define POWERPC32_INSTRUCTIONINFO_H + +#include "PPC.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "PPCRegisterInfo.h" + +namespace llvm { + +/// PPCII - This namespace holds all of the PowerPC target-specific +/// per-instruction flags. These must match the corresponding definitions in +/// PPC.td and PPCInstrFormats.td. +namespace PPCII { +enum { + // PPC970 Instruction Flags. These flags describe the characteristics of the + // PowerPC 970 (aka G5) dispatch groups and how they are formed out of + // raw machine instructions. + + /// PPC970_First - This instruction starts a new dispatch group, so it will + /// always be the first one in the group. + PPC970_First = 0x1, + + /// PPC970_Single - This instruction starts a new dispatch group and + /// terminates it, so it will be the sole instruction in the group. + PPC970_Single = 0x2, + + /// PPC970_Cracked - This instruction is cracked into two pieces, requiring + /// two dispatch pipes to be available to issue. + PPC970_Cracked = 0x4, + + /// PPC970_Mask/Shift - This is a bitmask that selects the pipeline type that + /// an instruction is issued to. + PPC970_Shift = 3, + PPC970_Mask = 0x07 << PPC970_Shift +}; +enum PPC970_Unit { + /// These are the various PPC970 execution unit pipelines. Each instruction + /// is one of these. + PPC970_Pseudo = 0 << PPC970_Shift, // Pseudo instruction + PPC970_FXU = 1 << PPC970_Shift, // Fixed Point (aka Integer/ALU) Unit + PPC970_LSU = 2 << PPC970_Shift, // Load Store Unit + PPC970_FPU = 3 << PPC970_Shift, // Floating Point Unit + PPC970_CRU = 4 << PPC970_Shift, // Control Register Unit + PPC970_VALU = 5 << PPC970_Shift, // Vector ALU + PPC970_VPERM = 6 << PPC970_Shift, // Vector Permute Unit + PPC970_BRU = 7 << PPC970_Shift // Branch Unit +}; +} + + +class PPCInstrInfo : public TargetInstrInfoImpl { + PPCTargetMachine &TM; + const PPCRegisterInfo RI; + + bool StoreRegToStackSlot(MachineFunction &MF, + unsigned SrcReg, bool isKill, int FrameIdx, + const TargetRegisterClass *RC, + SmallVectorImpl &NewMIs) const; + void LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, + unsigned DestReg, int FrameIdx, + const TargetRegisterClass *RC, + SmallVectorImpl &NewMIs) const; +public: + explicit PPCInstrInfo(PPCTargetMachine &TM); + + /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As + /// such, whenever a client has an instance of instruction info, it should + /// always be able to get register info as well (through this method). + /// + virtual const PPCRegisterInfo &getRegisterInfo() const { return RI; } + + /// Return true if the instruction is a register to register move and return + /// the source and dest operands and their sub-register indices by reference. + virtual bool isMoveInstr(const MachineInstr &MI, + unsigned &SrcReg, unsigned &DstReg, + unsigned &SrcSubIdx, unsigned &DstSubIdx) const; + + unsigned isLoadFromStackSlot(const MachineInstr *MI, + int &FrameIndex) const; + unsigned isStoreToStackSlot(const MachineInstr *MI, + int &FrameIndex) const; + + // commuteInstruction - We can commute rlwimi instructions, but only if the + // rotate amt is zero. We also have to munge the immediates a bit. + virtual MachineInstr *commuteInstruction(MachineInstr *MI, bool NewMI) const; + + virtual void insertNoop(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const; + + + // Branch analysis. + virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl &Cond, + bool AllowModify) const; + virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const; + virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + const SmallVectorImpl &Cond) const; + virtual bool copyRegToReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned DestReg, unsigned SrcReg, + const TargetRegisterClass *DestRC, + const TargetRegisterClass *SrcRC) const; + + virtual void storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned SrcReg, bool isKill, int FrameIndex, + const TargetRegisterClass *RC) const; + + virtual void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill, + SmallVectorImpl &Addr, + const TargetRegisterClass *RC, + SmallVectorImpl &NewMIs) const; + + virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned DestReg, int FrameIndex, + const TargetRegisterClass *RC) const; + + virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg, + SmallVectorImpl &Addr, + const TargetRegisterClass *RC, + SmallVectorImpl &NewMIs) const; + + /// foldMemoryOperand - PowerPC (like most RISC's) can only fold spills into + /// copy instructions, turning them into load/store instructions. + virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr* MI, + const SmallVectorImpl &Ops, + int FrameIndex) const; + + virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr* MI, + const SmallVectorImpl &Ops, + MachineInstr* LoadMI) const { + return 0; + } + + virtual bool canFoldMemoryOperand(const MachineInstr *MI, + const SmallVectorImpl &Ops) const; + + virtual bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const; + virtual + bool ReverseBranchCondition(SmallVectorImpl &Cond) const; + + /// GetInstSize - Return the number of bytes of code the specified + /// instruction may be. This returns the maximum number of bytes. + /// + virtual unsigned GetInstSizeInBytes(const MachineInstr *MI) const; +}; + +} + +#endif diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td new file mode 100644 index 000000000000..772e25ad232f --- /dev/null +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -0,0 +1,1475 @@ +//===- PPCInstrInfo.td - The PowerPC Instruction Set -------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the subset of the 32-bit PowerPC instruction set, as used +// by the PowerPC instruction selector. +// +//===----------------------------------------------------------------------===// + +include "PPCInstrFormats.td" + +//===----------------------------------------------------------------------===// +// PowerPC specific type constraints. +// +def SDT_PPCstfiwx : SDTypeProfile<0, 2, [ // stfiwx + SDTCisVT<0, f64>, SDTCisPtrTy<1> +]>; +def SDT_PPCCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>; +def SDT_PPCCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>, + SDTCisVT<1, i32> ]>; +def SDT_PPCvperm : SDTypeProfile<1, 3, [ + SDTCisVT<3, v16i8>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2> +]>; + +def SDT_PPCvcmp : SDTypeProfile<1, 3, [ + SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisVT<3, i32> +]>; + +def SDT_PPCcondbr : SDTypeProfile<0, 3, [ + SDTCisVT<0, i32>, SDTCisVT<2, OtherVT> +]>; + +def SDT_PPClbrx : SDTypeProfile<1, 3, [ + SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>, SDTCisVT<3, OtherVT> +]>; +def SDT_PPCstbrx : SDTypeProfile<0, 4, [ + SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>, SDTCisVT<3, OtherVT> +]>; + +def SDT_PPClarx : SDTypeProfile<1, 1, [ + SDTCisInt<0>, SDTCisPtrTy<1> +]>; +def SDT_PPCstcx : SDTypeProfile<0, 2, [ + SDTCisInt<0>, SDTCisPtrTy<1> +]>; + +def SDT_PPCTC_ret : SDTypeProfile<0, 2, [ + SDTCisPtrTy<0>, SDTCisVT<1, i32> +]>; + +//===----------------------------------------------------------------------===// +// PowerPC specific DAG Nodes. +// + +def PPCfcfid : SDNode<"PPCISD::FCFID" , SDTFPUnaryOp, []>; +def PPCfctidz : SDNode<"PPCISD::FCTIDZ", SDTFPUnaryOp, []>; +def PPCfctiwz : SDNode<"PPCISD::FCTIWZ", SDTFPUnaryOp, []>; +def PPCstfiwx : SDNode<"PPCISD::STFIWX", SDT_PPCstfiwx, + [SDNPHasChain, SDNPMayStore]>; + +// This sequence is used for long double->int conversions. It changes the +// bits in the FPSCR which is not modelled. +def PPCmffs : SDNode<"PPCISD::MFFS", SDTypeProfile<1, 0, [SDTCisVT<0, f64>]>, + [SDNPOutFlag]>; +def PPCmtfsb0 : SDNode<"PPCISD::MTFSB0", SDTypeProfile<0, 1, [SDTCisInt<0>]>, + [SDNPInFlag, SDNPOutFlag]>; +def PPCmtfsb1 : SDNode<"PPCISD::MTFSB1", SDTypeProfile<0, 1, [SDTCisInt<0>]>, + [SDNPInFlag, SDNPOutFlag]>; +def PPCfaddrtz: SDNode<"PPCISD::FADDRTZ", SDTFPBinOp, + [SDNPInFlag, SDNPOutFlag]>; +def PPCmtfsf : SDNode<"PPCISD::MTFSF", SDTypeProfile<1, 3, + [SDTCisVT<0, f64>, SDTCisInt<1>, SDTCisVT<2, f64>, + SDTCisVT<3, f64>]>, + [SDNPInFlag]>; + +def PPCfsel : SDNode<"PPCISD::FSEL", + // Type constraint for fsel. + SDTypeProfile<1, 3, [SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, + SDTCisFP<0>, SDTCisVT<1, f64>]>, []>; + +def PPChi : SDNode<"PPCISD::Hi", SDTIntBinOp, []>; +def PPClo : SDNode<"PPCISD::Lo", SDTIntBinOp, []>; +def PPCvmaddfp : SDNode<"PPCISD::VMADDFP", SDTFPTernaryOp, []>; +def PPCvnmsubfp : SDNode<"PPCISD::VNMSUBFP", SDTFPTernaryOp, []>; + +def PPCvperm : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>; + +// These nodes represent the 32-bit PPC shifts that operate on 6-bit shift +// amounts. These nodes are generated by the multi-precision shift code. +def PPCsrl : SDNode<"PPCISD::SRL" , SDTIntShiftOp>; +def PPCsra : SDNode<"PPCISD::SRA" , SDTIntShiftOp>; +def PPCshl : SDNode<"PPCISD::SHL" , SDTIntShiftOp>; + +def PPCextsw_32 : SDNode<"PPCISD::EXTSW_32" , SDTIntUnaryOp>; +def PPCstd_32 : SDNode<"PPCISD::STD_32" , SDTStore, + [SDNPHasChain, SDNPMayStore]>; + +// These are target-independent nodes, but have target-specific formats. +def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_PPCCallSeqStart, + [SDNPHasChain, SDNPOutFlag]>; +def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_PPCCallSeqEnd, + [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; + +def SDT_PPCCall : SDTypeProfile<0, -1, [SDTCisInt<0>]>; +def PPCcall_Macho : SDNode<"PPCISD::CALL_Macho", SDT_PPCCall, + [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; +def PPCcall_ELF : SDNode<"PPCISD::CALL_ELF", SDT_PPCCall, + [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; +def PPCmtctr : SDNode<"PPCISD::MTCTR", SDT_PPCCall, + [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; +def PPCbctrl_Macho : SDNode<"PPCISD::BCTRL_Macho", SDTNone, + [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; + +def PPCbctrl_ELF : SDNode<"PPCISD::BCTRL_ELF", SDTNone, + [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; + +def retflag : SDNode<"PPCISD::RET_FLAG", SDTNone, + [SDNPHasChain, SDNPOptInFlag]>; + +def PPCtc_return : SDNode<"PPCISD::TC_RETURN", SDT_PPCTC_ret, + [SDNPHasChain, SDNPOptInFlag]>; + +def PPCtailcall : SDNode<"PPCISD::TAILCALL", SDT_PPCCall, + [SDNPHasChain, SDNPOutFlag, SDNPOptInFlag]>; + +def PPCvcmp : SDNode<"PPCISD::VCMP" , SDT_PPCvcmp, []>; +def PPCvcmp_o : SDNode<"PPCISD::VCMPo", SDT_PPCvcmp, [SDNPOutFlag]>; + +def PPCcondbranch : SDNode<"PPCISD::COND_BRANCH", SDT_PPCcondbr, + [SDNPHasChain, SDNPOptInFlag]>; + +def PPClbrx : SDNode<"PPCISD::LBRX", SDT_PPClbrx, + [SDNPHasChain, SDNPMayLoad]>; +def PPCstbrx : SDNode<"PPCISD::STBRX", SDT_PPCstbrx, + [SDNPHasChain, SDNPMayStore]>; + +// Instructions to support atomic operations +def PPClarx : SDNode<"PPCISD::LARX", SDT_PPClarx, + [SDNPHasChain, SDNPMayLoad]>; +def PPCstcx : SDNode<"PPCISD::STCX", SDT_PPCstcx, + [SDNPHasChain, SDNPMayStore]>; + +// Instructions to support dynamic alloca. +def SDTDynOp : SDTypeProfile<1, 2, []>; +def PPCdynalloc : SDNode<"PPCISD::DYNALLOC", SDTDynOp, [SDNPHasChain]>; + +//===----------------------------------------------------------------------===// +// PowerPC specific transformation functions and pattern fragments. +// + +def SHL32 : SDNodeXFormgetZExtValue()); +}]>; + +def SRL32 : SDNodeXFormgetZExtValue() ? getI32Imm(32 - N->getZExtValue()) : getI32Imm(0); +}]>; + +def LO16 : SDNodeXFormgetZExtValue()); +}]>; + +def HI16 : SDNodeXFormgetZExtValue() >> 16); +}]>; + +def HA16 : SDNodeXFormgetZExtValue(); + return getI32Imm((Val - (signed short)Val) >> 16); +}]>; +def MB : SDNodeXFormgetZExtValue(), mb, me); + return getI32Imm(mb); +}]>; + +def ME : SDNodeXFormgetZExtValue(), mb, me); + return getI32Imm(me); +}]>; +def maskimm32 : PatLeaf<(imm), [{ + // maskImm predicate - True if immediate is a run of ones. + unsigned mb, me; + if (N->getValueType(0) == MVT::i32) + return isRunOfOnes((unsigned)N->getZExtValue(), mb, me); + else + return false; +}]>; + +def immSExt16 : PatLeaf<(imm), [{ + // immSExt16 predicate - True if the immediate fits in a 16-bit sign extended + // field. Used by instructions like 'addi'. + if (N->getValueType(0) == MVT::i32) + return (int32_t)N->getZExtValue() == (short)N->getZExtValue(); + else + return (int64_t)N->getZExtValue() == (short)N->getZExtValue(); +}]>; +def immZExt16 : PatLeaf<(imm), [{ + // immZExt16 predicate - True if the immediate fits in a 16-bit zero extended + // field. Used by instructions like 'ori'. + return (uint64_t)N->getZExtValue() == (unsigned short)N->getZExtValue(); +}], LO16>; + +// imm16Shifted* - These match immediates where the low 16-bits are zero. There +// are two forms: imm16ShiftedSExt and imm16ShiftedZExt. These two forms are +// identical in 32-bit mode, but in 64-bit mode, they return true if the +// immediate fits into a sign/zero extended 32-bit immediate (with the low bits +// clear). +def imm16ShiftedZExt : PatLeaf<(imm), [{ + // imm16ShiftedZExt predicate - True if only bits in the top 16-bits of the + // immediate are set. Used by instructions like 'xoris'. + return (N->getZExtValue() & ~uint64_t(0xFFFF0000)) == 0; +}], HI16>; + +def imm16ShiftedSExt : PatLeaf<(imm), [{ + // imm16ShiftedSExt predicate - True if only bits in the top 16-bits of the + // immediate are set. Used by instructions like 'addis'. Identical to + // imm16ShiftedZExt in 32-bit mode. + if (N->getZExtValue() & 0xFFFF) return false; + if (N->getValueType(0) == MVT::i32) + return true; + // For 64-bit, make sure it is sext right. + return N->getZExtValue() == (uint64_t)(int)N->getZExtValue(); +}], HI16>; + + +//===----------------------------------------------------------------------===// +// PowerPC Flag Definitions. + +class isPPC64 { bit PPC64 = 1; } +class isDOT { + list Defs = [CR0]; + bit RC = 1; +} + +class RegConstraint { + string Constraints = C; +} +class NoEncode { + string DisableEncoding = E; +} + + +//===----------------------------------------------------------------------===// +// PowerPC Operand Definitions. + +def s5imm : Operand { + let PrintMethod = "printS5ImmOperand"; +} +def u5imm : Operand { + let PrintMethod = "printU5ImmOperand"; +} +def u6imm : Operand { + let PrintMethod = "printU6ImmOperand"; +} +def s16imm : Operand { + let PrintMethod = "printS16ImmOperand"; +} +def u16imm : Operand { + let PrintMethod = "printU16ImmOperand"; +} +def s16immX4 : Operand { // Multiply imm by 4 before printing. + let PrintMethod = "printS16X4ImmOperand"; +} +def target : Operand { + let PrintMethod = "printBranchOperand"; +} +def calltarget : Operand { + let PrintMethod = "printCallOperand"; +} +def aaddr : Operand { + let PrintMethod = "printAbsAddrOperand"; +} +def piclabel: Operand { + let PrintMethod = "printPICLabel"; +} +def symbolHi: Operand { + let PrintMethod = "printSymbolHi"; +} +def symbolLo: Operand { + let PrintMethod = "printSymbolLo"; +} +def crbitm: Operand { + let PrintMethod = "printcrbitm"; +} +// Address operands +def memri : Operand { + let PrintMethod = "printMemRegImm"; + let MIOperandInfo = (ops i32imm:$imm, ptr_rc:$reg); +} +def memrr : Operand { + let PrintMethod = "printMemRegReg"; + let MIOperandInfo = (ops ptr_rc, ptr_rc); +} +def memrix : Operand { // memri where the imm is shifted 2 bits. + let PrintMethod = "printMemRegImmShifted"; + let MIOperandInfo = (ops i32imm:$imm, ptr_rc:$reg); +} + +// PowerPC Predicate operand. 20 = (0<<5)|20 = always, CR0 is a dummy reg +// that doesn't matter. +def pred : PredicateOperand { + let PrintMethod = "printPredicateOperand"; +} + +// Define PowerPC specific addressing mode. +def iaddr : ComplexPattern; +def xaddr : ComplexPattern; +def xoaddr : ComplexPattern; +def ixaddr : ComplexPattern; // "std" + +/// This is just the offset part of iaddr, used for preinc. +def iaddroff : ComplexPattern; + +//===----------------------------------------------------------------------===// +// PowerPC Instruction Predicate Definitions. +def FPContractions : Predicate<"!NoExcessFPPrecision">; +def In32BitMode : Predicate<"!PPCSubTarget.isPPC64()">; +def In64BitMode : Predicate<"PPCSubTarget.isPPC64()">; + + +//===----------------------------------------------------------------------===// +// PowerPC Instruction Definitions. + +// Pseudo-instructions: + +let hasCtrlDep = 1 in { +let Defs = [R1], Uses = [R1] in { +def ADJCALLSTACKDOWN : Pseudo<(outs), (ins u16imm:$amt), + "${:comment} ADJCALLSTACKDOWN", + [(callseq_start timm:$amt)]>; +def ADJCALLSTACKUP : Pseudo<(outs), (ins u16imm:$amt1, u16imm:$amt2), + "${:comment} ADJCALLSTACKUP", + [(callseq_end timm:$amt1, timm:$amt2)]>; +} + +def UPDATE_VRSAVE : Pseudo<(outs GPRC:$rD), (ins GPRC:$rS), + "UPDATE_VRSAVE $rD, $rS", []>; +} + +let Defs = [R1], Uses = [R1] in +def DYNALLOC : Pseudo<(outs GPRC:$result), (ins GPRC:$negsize, memri:$fpsi), + "${:comment} DYNALLOC $result, $negsize, $fpsi", + [(set GPRC:$result, + (PPCdynalloc GPRC:$negsize, iaddr:$fpsi))]>; + +// SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded by the +// scheduler into a branch sequence. +let usesCustomDAGSchedInserter = 1, // Expanded by the scheduler. + PPC970_Single = 1 in { + def SELECT_CC_I4 : Pseudo<(outs GPRC:$dst), (ins CRRC:$cond, GPRC:$T, GPRC:$F, + i32imm:$BROPC), "${:comment} SELECT_CC PSEUDO!", + []>; + def SELECT_CC_I8 : Pseudo<(outs G8RC:$dst), (ins CRRC:$cond, G8RC:$T, G8RC:$F, + i32imm:$BROPC), "${:comment} SELECT_CC PSEUDO!", + []>; + def SELECT_CC_F4 : Pseudo<(outs F4RC:$dst), (ins CRRC:$cond, F4RC:$T, F4RC:$F, + i32imm:$BROPC), "${:comment} SELECT_CC PSEUDO!", + []>; + def SELECT_CC_F8 : Pseudo<(outs F8RC:$dst), (ins CRRC:$cond, F8RC:$T, F8RC:$F, + i32imm:$BROPC), "${:comment} SELECT_CC PSEUDO!", + []>; + def SELECT_CC_VRRC: Pseudo<(outs VRRC:$dst), (ins CRRC:$cond, VRRC:$T, VRRC:$F, + i32imm:$BROPC), "${:comment} SELECT_CC PSEUDO!", + []>; +} + +// SPILL_CR - Indicate that we're dumping the CR register, so we'll need to +// scavenge a register for it. +def SPILL_CR : Pseudo<(outs), (ins GPRC:$cond, memri:$F), + "${:comment} SPILL_CR $cond $F", []>; + +let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in { + let isReturn = 1, Uses = [LR, RM] in + def BLR : XLForm_2_br<19, 16, 0, (outs), (ins pred:$p), + "b${p:cc}lr ${p:reg}", BrB, + [(retflag)]>; + let isBranch = 1, isIndirectBranch = 1, Uses = [CTR] in + def BCTR : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>; +} + +let Defs = [LR] in + def MovePCtoLR : Pseudo<(outs), (ins piclabel:$label), "bl $label", []>, + PPC970_Unit_BRU; + +let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in { + let isBarrier = 1 in { + def B : IForm<18, 0, 0, (outs), (ins target:$dst), + "b $dst", BrB, + [(br bb:$dst)]>; + } + + // BCC represents an arbitrary conditional branch on a predicate. + // FIXME: should be able to write a pattern for PPCcondbranch, but can't use + // a two-value operand where a dag node expects two operands. :( + def BCC : BForm<16, 0, 0, (outs), (ins pred:$cond, target:$dst), + "b${cond:cc} ${cond:reg}, $dst" + /*[(PPCcondbranch CRRC:$crS, imm:$opc, bb:$dst)]*/>; +} + +// Macho ABI Calls. +let isCall = 1, PPC970_Unit = 7, + // All calls clobber the non-callee saved registers... + Defs = [R0,R2,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12, + F0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13, + V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19, + LR,CTR, + CR0,CR1,CR5,CR6,CR7, + CR0LT,CR0GT,CR0EQ,CR0UN,CR1LT,CR1GT,CR1EQ,CR1UN,CR5LT,CR5GT,CR5EQ, + CR5UN,CR6LT,CR6GT,CR6EQ,CR6UN,CR7LT,CR7GT,CR7EQ,CR7UN] in { + // Convenient aliases for call instructions + let Uses = [RM] in { + def BL_Macho : IForm<18, 0, 1, + (outs), (ins calltarget:$func, variable_ops), + "bl $func", BrB, []>; // See Pat patterns below. + def BLA_Macho : IForm<18, 1, 1, + (outs), (ins aaddr:$func, variable_ops), + "bla $func", BrB, [(PPCcall_Macho (i32 imm:$func))]>; + } + let Uses = [CTR, RM] in { + def BCTRL_Macho : XLForm_2_ext<19, 528, 20, 0, 1, + (outs), (ins variable_ops), + "bctrl", BrB, + [(PPCbctrl_Macho)]>, Requires<[In32BitMode]>; + } +} + +// ELF ABI Calls. +let isCall = 1, PPC970_Unit = 7, + // All calls clobber the non-callee saved registers... + Defs = [R0,R2,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12, + F0,F1,F2,F3,F4,F5,F6,F7,F8, + V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19, + LR,CTR, + CR0,CR1,CR5,CR6,CR7, + CR0LT,CR0GT,CR0EQ,CR0UN,CR1LT,CR1GT,CR1EQ,CR1UN,CR5LT,CR5GT,CR5EQ, + CR5UN,CR6LT,CR6GT,CR6EQ,CR6UN,CR7LT,CR7GT,CR7EQ,CR7UN] in { + // Convenient aliases for call instructions + let Uses = [RM] in { + def BL_ELF : IForm<18, 0, 1, + (outs), (ins calltarget:$func, variable_ops), + "bl $func", BrB, []>; // See Pat patterns below. + def BLA_ELF : IForm<18, 1, 1, + (outs), (ins aaddr:$func, variable_ops), + "bla $func", BrB, + [(PPCcall_ELF (i32 imm:$func))]>; + } + let Uses = [CTR, RM] in { + def BCTRL_ELF : XLForm_2_ext<19, 528, 20, 0, 1, + (outs), (ins variable_ops), + "bctrl", BrB, + [(PPCbctrl_ELF)]>, Requires<[In32BitMode]>; + } +} + + +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in +def TCRETURNdi :Pseudo< (outs), + (ins calltarget:$dst, i32imm:$offset, variable_ops), + "#TC_RETURNd $dst $offset", + []>; + + +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in +def TCRETURNai :Pseudo<(outs), (ins aaddr:$func, i32imm:$offset, variable_ops), + "#TC_RETURNa $func $offset", + [(PPCtc_return (i32 imm:$func), imm:$offset)]>; + +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in +def TCRETURNri : Pseudo<(outs), (ins CTRRC:$dst, i32imm:$offset, variable_ops), + "#TC_RETURNr $dst $offset", + []>; + + +let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7, isBranch = 1, + isIndirectBranch = 1, isCall = 1, isReturn = 1, Uses = [CTR, RM] in +def TAILBCTR : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>, + Requires<[In32BitMode]>; + + + +let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7, + isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in +def TAILB : IForm<18, 0, 0, (outs), (ins calltarget:$dst), + "b $dst", BrB, + []>; + + +let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7, + isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in +def TAILBA : IForm<18, 0, 0, (outs), (ins aaddr:$dst), + "ba $dst", BrB, + []>; + + +// DCB* instructions. +def DCBA : DCB_Form<758, 0, (outs), (ins memrr:$dst), + "dcba $dst", LdStDCBF, [(int_ppc_dcba xoaddr:$dst)]>, + PPC970_DGroup_Single; +def DCBF : DCB_Form<86, 0, (outs), (ins memrr:$dst), + "dcbf $dst", LdStDCBF, [(int_ppc_dcbf xoaddr:$dst)]>, + PPC970_DGroup_Single; +def DCBI : DCB_Form<470, 0, (outs), (ins memrr:$dst), + "dcbi $dst", LdStDCBF, [(int_ppc_dcbi xoaddr:$dst)]>, + PPC970_DGroup_Single; +def DCBST : DCB_Form<54, 0, (outs), (ins memrr:$dst), + "dcbst $dst", LdStDCBF, [(int_ppc_dcbst xoaddr:$dst)]>, + PPC970_DGroup_Single; +def DCBT : DCB_Form<278, 0, (outs), (ins memrr:$dst), + "dcbt $dst", LdStDCBF, [(int_ppc_dcbt xoaddr:$dst)]>, + PPC970_DGroup_Single; +def DCBTST : DCB_Form<246, 0, (outs), (ins memrr:$dst), + "dcbtst $dst", LdStDCBF, [(int_ppc_dcbtst xoaddr:$dst)]>, + PPC970_DGroup_Single; +def DCBZ : DCB_Form<1014, 0, (outs), (ins memrr:$dst), + "dcbz $dst", LdStDCBF, [(int_ppc_dcbz xoaddr:$dst)]>, + PPC970_DGroup_Single; +def DCBZL : DCB_Form<1014, 1, (outs), (ins memrr:$dst), + "dcbzl $dst", LdStDCBF, [(int_ppc_dcbzl xoaddr:$dst)]>, + PPC970_DGroup_Single; + +// Atomic operations +let usesCustomDAGSchedInserter = 1 in { + let Uses = [CR0] in { + def ATOMIC_LOAD_ADD_I8 : Pseudo< + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), + "${:comment} ATOMIC_LOAD_ADD_I8 PSEUDO!", + [(set GPRC:$dst, (atomic_load_add_8 xoaddr:$ptr, GPRC:$incr))]>; + def ATOMIC_LOAD_SUB_I8 : Pseudo< + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), + "${:comment} ATOMIC_LOAD_SUB_I8 PSEUDO!", + [(set GPRC:$dst, (atomic_load_sub_8 xoaddr:$ptr, GPRC:$incr))]>; + def ATOMIC_LOAD_AND_I8 : Pseudo< + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), + "${:comment} ATOMIC_LOAD_AND_I8 PSEUDO!", + [(set GPRC:$dst, (atomic_load_and_8 xoaddr:$ptr, GPRC:$incr))]>; + def ATOMIC_LOAD_OR_I8 : Pseudo< + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), + "${:comment} ATOMIC_LOAD_OR_I8 PSEUDO!", + [(set GPRC:$dst, (atomic_load_or_8 xoaddr:$ptr, GPRC:$incr))]>; + def ATOMIC_LOAD_XOR_I8 : Pseudo< + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), + "${:comment} ATOMIC_LOAD_XOR_I8 PSEUDO!", + [(set GPRC:$dst, (atomic_load_xor_8 xoaddr:$ptr, GPRC:$incr))]>; + def ATOMIC_LOAD_NAND_I8 : Pseudo< + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), + "${:comment} ATOMIC_LOAD_NAND_I8 PSEUDO!", + [(set GPRC:$dst, (atomic_load_nand_8 xoaddr:$ptr, GPRC:$incr))]>; + def ATOMIC_LOAD_ADD_I16 : Pseudo< + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), + "${:comment} ATOMIC_LOAD_ADD_I16 PSEUDO!", + [(set GPRC:$dst, (atomic_load_add_16 xoaddr:$ptr, GPRC:$incr))]>; + def ATOMIC_LOAD_SUB_I16 : Pseudo< + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), + "${:comment} ATOMIC_LOAD_SUB_I16 PSEUDO!", + [(set GPRC:$dst, (atomic_load_sub_16 xoaddr:$ptr, GPRC:$incr))]>; + def ATOMIC_LOAD_AND_I16 : Pseudo< + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), + "${:comment} ATOMIC_LOAD_AND_I16 PSEUDO!", + [(set GPRC:$dst, (atomic_load_and_16 xoaddr:$ptr, GPRC:$incr))]>; + def ATOMIC_LOAD_OR_I16 : Pseudo< + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), + "${:comment} ATOMIC_LOAD_OR_I16 PSEUDO!", + [(set GPRC:$dst, (atomic_load_or_16 xoaddr:$ptr, GPRC:$incr))]>; + def ATOMIC_LOAD_XOR_I16 : Pseudo< + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), + "${:comment} ATOMIC_LOAD_XOR_I16 PSEUDO!", + [(set GPRC:$dst, (atomic_load_xor_16 xoaddr:$ptr, GPRC:$incr))]>; + def ATOMIC_LOAD_NAND_I16 : Pseudo< + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), + "${:comment} ATOMIC_LOAD_NAND_I16 PSEUDO!", + [(set GPRC:$dst, (atomic_load_nand_16 xoaddr:$ptr, GPRC:$incr))]>; + def ATOMIC_LOAD_ADD_I32 : Pseudo< + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), + "${:comment} ATOMIC_LOAD_ADD_I32 PSEUDO!", + [(set GPRC:$dst, (atomic_load_add_32 xoaddr:$ptr, GPRC:$incr))]>; + def ATOMIC_LOAD_SUB_I32 : Pseudo< + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), + "${:comment} ATOMIC_LOAD_SUB_I32 PSEUDO!", + [(set GPRC:$dst, (atomic_load_sub_32 xoaddr:$ptr, GPRC:$incr))]>; + def ATOMIC_LOAD_AND_I32 : Pseudo< + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), + "${:comment} ATOMIC_LOAD_AND_I32 PSEUDO!", + [(set GPRC:$dst, (atomic_load_and_32 xoaddr:$ptr, GPRC:$incr))]>; + def ATOMIC_LOAD_OR_I32 : Pseudo< + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), + "${:comment} ATOMIC_LOAD_OR_I32 PSEUDO!", + [(set GPRC:$dst, (atomic_load_or_32 xoaddr:$ptr, GPRC:$incr))]>; + def ATOMIC_LOAD_XOR_I32 : Pseudo< + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), + "${:comment} ATOMIC_LOAD_XOR_I32 PSEUDO!", + [(set GPRC:$dst, (atomic_load_xor_32 xoaddr:$ptr, GPRC:$incr))]>; + def ATOMIC_LOAD_NAND_I32 : Pseudo< + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), + "${:comment} ATOMIC_LOAD_NAND_I32 PSEUDO!", + [(set GPRC:$dst, (atomic_load_nand_32 xoaddr:$ptr, GPRC:$incr))]>; + + def ATOMIC_CMP_SWAP_I8 : Pseudo< + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), + "${:comment} ATOMIC_CMP_SWAP_I8 PSEUDO!", + [(set GPRC:$dst, + (atomic_cmp_swap_8 xoaddr:$ptr, GPRC:$old, GPRC:$new))]>; + def ATOMIC_CMP_SWAP_I16 : Pseudo< + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), + "${:comment} ATOMIC_CMP_SWAP_I16 PSEUDO!", + [(set GPRC:$dst, + (atomic_cmp_swap_16 xoaddr:$ptr, GPRC:$old, GPRC:$new))]>; + def ATOMIC_CMP_SWAP_I32 : Pseudo< + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), + "${:comment} ATOMIC_CMP_SWAP_I32 PSEUDO!", + [(set GPRC:$dst, + (atomic_cmp_swap_32 xoaddr:$ptr, GPRC:$old, GPRC:$new))]>; + + def ATOMIC_SWAP_I8 : Pseudo< + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), + "${:comment} ATOMIC_SWAP_I8 PSEUDO!", + [(set GPRC:$dst, (atomic_swap_8 xoaddr:$ptr, GPRC:$new))]>; + def ATOMIC_SWAP_I16 : Pseudo< + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), + "${:comment} ATOMIC_SWAP_I16 PSEUDO!", + [(set GPRC:$dst, (atomic_swap_16 xoaddr:$ptr, GPRC:$new))]>; + def ATOMIC_SWAP_I32 : Pseudo< + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), + "${:comment} ATOMIC_SWAP_I32 PSEUDO!", + [(set GPRC:$dst, (atomic_swap_32 xoaddr:$ptr, GPRC:$new))]>; + } +} + +// Instructions to support atomic operations +def LWARX : XForm_1<31, 20, (outs GPRC:$rD), (ins memrr:$src), + "lwarx $rD, $src", LdStLWARX, + [(set GPRC:$rD, (PPClarx xoaddr:$src))]>; + +let Defs = [CR0] in +def STWCX : XForm_1<31, 150, (outs), (ins GPRC:$rS, memrr:$dst), + "stwcx. $rS, $dst", LdStSTWCX, + [(PPCstcx GPRC:$rS, xoaddr:$dst)]>, + isDOT; + +let isBarrier = 1, hasCtrlDep = 1 in +def TRAP : XForm_24<31, 4, (outs), (ins), "trap", LdStGeneral, [(trap)]>; + +//===----------------------------------------------------------------------===// +// PPC32 Load Instructions. +// + +// Unindexed (r+i) Loads. +let canFoldAsLoad = 1, PPC970_Unit = 2 in { +def LBZ : DForm_1<34, (outs GPRC:$rD), (ins memri:$src), + "lbz $rD, $src", LdStGeneral, + [(set GPRC:$rD, (zextloadi8 iaddr:$src))]>; +def LHA : DForm_1<42, (outs GPRC:$rD), (ins memri:$src), + "lha $rD, $src", LdStLHA, + [(set GPRC:$rD, (sextloadi16 iaddr:$src))]>, + PPC970_DGroup_Cracked; +def LHZ : DForm_1<40, (outs GPRC:$rD), (ins memri:$src), + "lhz $rD, $src", LdStGeneral, + [(set GPRC:$rD, (zextloadi16 iaddr:$src))]>; +def LWZ : DForm_1<32, (outs GPRC:$rD), (ins memri:$src), + "lwz $rD, $src", LdStGeneral, + [(set GPRC:$rD, (load iaddr:$src))]>; + +def LFS : DForm_1<48, (outs F4RC:$rD), (ins memri:$src), + "lfs $rD, $src", LdStLFDU, + [(set F4RC:$rD, (load iaddr:$src))]>; +def LFD : DForm_1<50, (outs F8RC:$rD), (ins memri:$src), + "lfd $rD, $src", LdStLFD, + [(set F8RC:$rD, (load iaddr:$src))]>; + + +// Unindexed (r+i) Loads with Update (preinc). +let mayLoad = 1 in { +def LBZU : DForm_1<35, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr), + "lbzu $rD, $addr", LdStGeneral, + []>, RegConstraint<"$addr.reg = $ea_result">, + NoEncode<"$ea_result">; + +def LHAU : DForm_1<43, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr), + "lhau $rD, $addr", LdStGeneral, + []>, RegConstraint<"$addr.reg = $ea_result">, + NoEncode<"$ea_result">; + +def LHZU : DForm_1<41, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr), + "lhzu $rD, $addr", LdStGeneral, + []>, RegConstraint<"$addr.reg = $ea_result">, + NoEncode<"$ea_result">; + +def LWZU : DForm_1<33, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr), + "lwzu $rD, $addr", LdStGeneral, + []>, RegConstraint<"$addr.reg = $ea_result">, + NoEncode<"$ea_result">; + +def LFSU : DForm_1<49, (outs F4RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), + "lfs $rD, $addr", LdStLFDU, + []>, RegConstraint<"$addr.reg = $ea_result">, + NoEncode<"$ea_result">; + +def LFDU : DForm_1<51, (outs F8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), + "lfd $rD, $addr", LdStLFD, + []>, RegConstraint<"$addr.reg = $ea_result">, + NoEncode<"$ea_result">; +} +} + +// Indexed (r+r) Loads. +// +let canFoldAsLoad = 1, PPC970_Unit = 2 in { +def LBZX : XForm_1<31, 87, (outs GPRC:$rD), (ins memrr:$src), + "lbzx $rD, $src", LdStGeneral, + [(set GPRC:$rD, (zextloadi8 xaddr:$src))]>; +def LHAX : XForm_1<31, 343, (outs GPRC:$rD), (ins memrr:$src), + "lhax $rD, $src", LdStLHA, + [(set GPRC:$rD, (sextloadi16 xaddr:$src))]>, + PPC970_DGroup_Cracked; +def LHZX : XForm_1<31, 279, (outs GPRC:$rD), (ins memrr:$src), + "lhzx $rD, $src", LdStGeneral, + [(set GPRC:$rD, (zextloadi16 xaddr:$src))]>; +def LWZX : XForm_1<31, 23, (outs GPRC:$rD), (ins memrr:$src), + "lwzx $rD, $src", LdStGeneral, + [(set GPRC:$rD, (load xaddr:$src))]>; + + +def LHBRX : XForm_1<31, 790, (outs GPRC:$rD), (ins memrr:$src), + "lhbrx $rD, $src", LdStGeneral, + [(set GPRC:$rD, (PPClbrx xoaddr:$src, srcvalue:$sv, i16))]>; +def LWBRX : XForm_1<31, 534, (outs GPRC:$rD), (ins memrr:$src), + "lwbrx $rD, $src", LdStGeneral, + [(set GPRC:$rD, (PPClbrx xoaddr:$src, srcvalue:$sv, i32))]>; + +def LFSX : XForm_25<31, 535, (outs F4RC:$frD), (ins memrr:$src), + "lfsx $frD, $src", LdStLFDU, + [(set F4RC:$frD, (load xaddr:$src))]>; +def LFDX : XForm_25<31, 599, (outs F8RC:$frD), (ins memrr:$src), + "lfdx $frD, $src", LdStLFDU, + [(set F8RC:$frD, (load xaddr:$src))]>; +} + +//===----------------------------------------------------------------------===// +// PPC32 Store Instructions. +// + +// Unindexed (r+i) Stores. +let PPC970_Unit = 2 in { +def STB : DForm_1<38, (outs), (ins GPRC:$rS, memri:$src), + "stb $rS, $src", LdStGeneral, + [(truncstorei8 GPRC:$rS, iaddr:$src)]>; +def STH : DForm_1<44, (outs), (ins GPRC:$rS, memri:$src), + "sth $rS, $src", LdStGeneral, + [(truncstorei16 GPRC:$rS, iaddr:$src)]>; +def STW : DForm_1<36, (outs), (ins GPRC:$rS, memri:$src), + "stw $rS, $src", LdStGeneral, + [(store GPRC:$rS, iaddr:$src)]>; +def STFS : DForm_1<52, (outs), (ins F4RC:$rS, memri:$dst), + "stfs $rS, $dst", LdStUX, + [(store F4RC:$rS, iaddr:$dst)]>; +def STFD : DForm_1<54, (outs), (ins F8RC:$rS, memri:$dst), + "stfd $rS, $dst", LdStUX, + [(store F8RC:$rS, iaddr:$dst)]>; +} + +// Unindexed (r+i) Stores with Update (preinc). +let PPC970_Unit = 2 in { +def STBU : DForm_1<39, (outs ptr_rc:$ea_res), (ins GPRC:$rS, + symbolLo:$ptroff, ptr_rc:$ptrreg), + "stbu $rS, $ptroff($ptrreg)", LdStGeneral, + [(set ptr_rc:$ea_res, + (pre_truncsti8 GPRC:$rS, ptr_rc:$ptrreg, + iaddroff:$ptroff))]>, + RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; +def STHU : DForm_1<45, (outs ptr_rc:$ea_res), (ins GPRC:$rS, + symbolLo:$ptroff, ptr_rc:$ptrreg), + "sthu $rS, $ptroff($ptrreg)", LdStGeneral, + [(set ptr_rc:$ea_res, + (pre_truncsti16 GPRC:$rS, ptr_rc:$ptrreg, + iaddroff:$ptroff))]>, + RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; +def STWU : DForm_1<37, (outs ptr_rc:$ea_res), (ins GPRC:$rS, + symbolLo:$ptroff, ptr_rc:$ptrreg), + "stwu $rS, $ptroff($ptrreg)", LdStGeneral, + [(set ptr_rc:$ea_res, (pre_store GPRC:$rS, ptr_rc:$ptrreg, + iaddroff:$ptroff))]>, + RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; +def STFSU : DForm_1<37, (outs ptr_rc:$ea_res), (ins F4RC:$rS, + symbolLo:$ptroff, ptr_rc:$ptrreg), + "stfsu $rS, $ptroff($ptrreg)", LdStGeneral, + [(set ptr_rc:$ea_res, (pre_store F4RC:$rS, ptr_rc:$ptrreg, + iaddroff:$ptroff))]>, + RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; +def STFDU : DForm_1<37, (outs ptr_rc:$ea_res), (ins F8RC:$rS, + symbolLo:$ptroff, ptr_rc:$ptrreg), + "stfdu $rS, $ptroff($ptrreg)", LdStGeneral, + [(set ptr_rc:$ea_res, (pre_store F8RC:$rS, ptr_rc:$ptrreg, + iaddroff:$ptroff))]>, + RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; +} + + +// Indexed (r+r) Stores. +// +let PPC970_Unit = 2 in { +def STBX : XForm_8<31, 215, (outs), (ins GPRC:$rS, memrr:$dst), + "stbx $rS, $dst", LdStGeneral, + [(truncstorei8 GPRC:$rS, xaddr:$dst)]>, + PPC970_DGroup_Cracked; +def STHX : XForm_8<31, 407, (outs), (ins GPRC:$rS, memrr:$dst), + "sthx $rS, $dst", LdStGeneral, + [(truncstorei16 GPRC:$rS, xaddr:$dst)]>, + PPC970_DGroup_Cracked; +def STWX : XForm_8<31, 151, (outs), (ins GPRC:$rS, memrr:$dst), + "stwx $rS, $dst", LdStGeneral, + [(store GPRC:$rS, xaddr:$dst)]>, + PPC970_DGroup_Cracked; + +let mayStore = 1 in { +def STWUX : XForm_8<31, 183, (outs), (ins GPRC:$rS, GPRC:$rA, GPRC:$rB), + "stwux $rS, $rA, $rB", LdStGeneral, + []>; +} +def STHBRX: XForm_8<31, 918, (outs), (ins GPRC:$rS, memrr:$dst), + "sthbrx $rS, $dst", LdStGeneral, + [(PPCstbrx GPRC:$rS, xoaddr:$dst, srcvalue:$dummy, i16)]>, + PPC970_DGroup_Cracked; +def STWBRX: XForm_8<31, 662, (outs), (ins GPRC:$rS, memrr:$dst), + "stwbrx $rS, $dst", LdStGeneral, + [(PPCstbrx GPRC:$rS, xoaddr:$dst, srcvalue:$dummy, i32)]>, + PPC970_DGroup_Cracked; + +def STFIWX: XForm_28<31, 983, (outs), (ins F8RC:$frS, memrr:$dst), + "stfiwx $frS, $dst", LdStUX, + [(PPCstfiwx F8RC:$frS, xoaddr:$dst)]>; + +def STFSX : XForm_28<31, 663, (outs), (ins F4RC:$frS, memrr:$dst), + "stfsx $frS, $dst", LdStUX, + [(store F4RC:$frS, xaddr:$dst)]>; +def STFDX : XForm_28<31, 727, (outs), (ins F8RC:$frS, memrr:$dst), + "stfdx $frS, $dst", LdStUX, + [(store F8RC:$frS, xaddr:$dst)]>; +} + +let isBarrier = 1 in +def SYNC : XForm_24_sync<31, 598, (outs), (ins), + "sync", LdStSync, + [(int_ppc_sync)]>; + +//===----------------------------------------------------------------------===// +// PPC32 Arithmetic Instructions. +// + +let PPC970_Unit = 1 in { // FXU Operations. +def ADDI : DForm_2<14, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm), + "addi $rD, $rA, $imm", IntGeneral, + [(set GPRC:$rD, (add GPRC:$rA, immSExt16:$imm))]>; +def ADDIC : DForm_2<12, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm), + "addic $rD, $rA, $imm", IntGeneral, + [(set GPRC:$rD, (addc GPRC:$rA, immSExt16:$imm))]>, + PPC970_DGroup_Cracked; +def ADDICo : DForm_2<13, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm), + "addic. $rD, $rA, $imm", IntGeneral, + []>; +def ADDIS : DForm_2<15, (outs GPRC:$rD), (ins GPRC:$rA, symbolHi:$imm), + "addis $rD, $rA, $imm", IntGeneral, + [(set GPRC:$rD, (add GPRC:$rA, imm16ShiftedSExt:$imm))]>; +def LA : DForm_2<14, (outs GPRC:$rD), (ins GPRC:$rA, symbolLo:$sym), + "la $rD, $sym($rA)", IntGeneral, + [(set GPRC:$rD, (add GPRC:$rA, + (PPClo tglobaladdr:$sym, 0)))]>; +def MULLI : DForm_2< 7, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm), + "mulli $rD, $rA, $imm", IntMulLI, + [(set GPRC:$rD, (mul GPRC:$rA, immSExt16:$imm))]>; +def SUBFIC : DForm_2< 8, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm), + "subfic $rD, $rA, $imm", IntGeneral, + [(set GPRC:$rD, (subc immSExt16:$imm, GPRC:$rA))]>; + +let isReMaterializable = 1 in { + def LI : DForm_2_r0<14, (outs GPRC:$rD), (ins symbolLo:$imm), + "li $rD, $imm", IntGeneral, + [(set GPRC:$rD, immSExt16:$imm)]>; + def LIS : DForm_2_r0<15, (outs GPRC:$rD), (ins symbolHi:$imm), + "lis $rD, $imm", IntGeneral, + [(set GPRC:$rD, imm16ShiftedSExt:$imm)]>; +} +} + +let PPC970_Unit = 1 in { // FXU Operations. +def ANDIo : DForm_4<28, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2), + "andi. $dst, $src1, $src2", IntGeneral, + [(set GPRC:$dst, (and GPRC:$src1, immZExt16:$src2))]>, + isDOT; +def ANDISo : DForm_4<29, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2), + "andis. $dst, $src1, $src2", IntGeneral, + [(set GPRC:$dst, (and GPRC:$src1,imm16ShiftedZExt:$src2))]>, + isDOT; +def ORI : DForm_4<24, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2), + "ori $dst, $src1, $src2", IntGeneral, + [(set GPRC:$dst, (or GPRC:$src1, immZExt16:$src2))]>; +def ORIS : DForm_4<25, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2), + "oris $dst, $src1, $src2", IntGeneral, + [(set GPRC:$dst, (or GPRC:$src1, imm16ShiftedZExt:$src2))]>; +def XORI : DForm_4<26, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2), + "xori $dst, $src1, $src2", IntGeneral, + [(set GPRC:$dst, (xor GPRC:$src1, immZExt16:$src2))]>; +def XORIS : DForm_4<27, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2), + "xoris $dst, $src1, $src2", IntGeneral, + [(set GPRC:$dst, (xor GPRC:$src1,imm16ShiftedZExt:$src2))]>; +def NOP : DForm_4_zero<24, (outs), (ins), "nop", IntGeneral, + []>; +def CMPWI : DForm_5_ext<11, (outs CRRC:$crD), (ins GPRC:$rA, s16imm:$imm), + "cmpwi $crD, $rA, $imm", IntCompare>; +def CMPLWI : DForm_6_ext<10, (outs CRRC:$dst), (ins GPRC:$src1, u16imm:$src2), + "cmplwi $dst, $src1, $src2", IntCompare>; +} + + +let PPC970_Unit = 1 in { // FXU Operations. +def NAND : XForm_6<31, 476, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB), + "nand $rA, $rS, $rB", IntGeneral, + [(set GPRC:$rA, (not (and GPRC:$rS, GPRC:$rB)))]>; +def AND : XForm_6<31, 28, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB), + "and $rA, $rS, $rB", IntGeneral, + [(set GPRC:$rA, (and GPRC:$rS, GPRC:$rB))]>; +def ANDC : XForm_6<31, 60, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB), + "andc $rA, $rS, $rB", IntGeneral, + [(set GPRC:$rA, (and GPRC:$rS, (not GPRC:$rB)))]>; +def OR : XForm_6<31, 444, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB), + "or $rA, $rS, $rB", IntGeneral, + [(set GPRC:$rA, (or GPRC:$rS, GPRC:$rB))]>; +def NOR : XForm_6<31, 124, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB), + "nor $rA, $rS, $rB", IntGeneral, + [(set GPRC:$rA, (not (or GPRC:$rS, GPRC:$rB)))]>; +def ORC : XForm_6<31, 412, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB), + "orc $rA, $rS, $rB", IntGeneral, + [(set GPRC:$rA, (or GPRC:$rS, (not GPRC:$rB)))]>; +def EQV : XForm_6<31, 284, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB), + "eqv $rA, $rS, $rB", IntGeneral, + [(set GPRC:$rA, (not (xor GPRC:$rS, GPRC:$rB)))]>; +def XOR : XForm_6<31, 316, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB), + "xor $rA, $rS, $rB", IntGeneral, + [(set GPRC:$rA, (xor GPRC:$rS, GPRC:$rB))]>; +def SLW : XForm_6<31, 24, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB), + "slw $rA, $rS, $rB", IntGeneral, + [(set GPRC:$rA, (PPCshl GPRC:$rS, GPRC:$rB))]>; +def SRW : XForm_6<31, 536, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB), + "srw $rA, $rS, $rB", IntGeneral, + [(set GPRC:$rA, (PPCsrl GPRC:$rS, GPRC:$rB))]>; +def SRAW : XForm_6<31, 792, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB), + "sraw $rA, $rS, $rB", IntShift, + [(set GPRC:$rA, (PPCsra GPRC:$rS, GPRC:$rB))]>; +} + +let PPC970_Unit = 1 in { // FXU Operations. +def SRAWI : XForm_10<31, 824, (outs GPRC:$rA), (ins GPRC:$rS, u5imm:$SH), + "srawi $rA, $rS, $SH", IntShift, + [(set GPRC:$rA, (sra GPRC:$rS, (i32 imm:$SH)))]>; +def CNTLZW : XForm_11<31, 26, (outs GPRC:$rA), (ins GPRC:$rS), + "cntlzw $rA, $rS", IntGeneral, + [(set GPRC:$rA, (ctlz GPRC:$rS))]>; +def EXTSB : XForm_11<31, 954, (outs GPRC:$rA), (ins GPRC:$rS), + "extsb $rA, $rS", IntGeneral, + [(set GPRC:$rA, (sext_inreg GPRC:$rS, i8))]>; +def EXTSH : XForm_11<31, 922, (outs GPRC:$rA), (ins GPRC:$rS), + "extsh $rA, $rS", IntGeneral, + [(set GPRC:$rA, (sext_inreg GPRC:$rS, i16))]>; + +def CMPW : XForm_16_ext<31, 0, (outs CRRC:$crD), (ins GPRC:$rA, GPRC:$rB), + "cmpw $crD, $rA, $rB", IntCompare>; +def CMPLW : XForm_16_ext<31, 32, (outs CRRC:$crD), (ins GPRC:$rA, GPRC:$rB), + "cmplw $crD, $rA, $rB", IntCompare>; +} +let PPC970_Unit = 3 in { // FPU Operations. +//def FCMPO : XForm_17<63, 32, (outs CRRC:$crD), (ins FPRC:$fA, FPRC:$fB), +// "fcmpo $crD, $fA, $fB", FPCompare>; +def FCMPUS : XForm_17<63, 0, (outs CRRC:$crD), (ins F4RC:$fA, F4RC:$fB), + "fcmpu $crD, $fA, $fB", FPCompare>; +def FCMPUD : XForm_17<63, 0, (outs CRRC:$crD), (ins F8RC:$fA, F8RC:$fB), + "fcmpu $crD, $fA, $fB", FPCompare>; + +let Uses = [RM] in { + def FCTIWZ : XForm_26<63, 15, (outs F8RC:$frD), (ins F8RC:$frB), + "fctiwz $frD, $frB", FPGeneral, + [(set F8RC:$frD, (PPCfctiwz F8RC:$frB))]>; + def FRSP : XForm_26<63, 12, (outs F4RC:$frD), (ins F8RC:$frB), + "frsp $frD, $frB", FPGeneral, + [(set F4RC:$frD, (fround F8RC:$frB))]>; + def FSQRT : XForm_26<63, 22, (outs F8RC:$frD), (ins F8RC:$frB), + "fsqrt $frD, $frB", FPSqrt, + [(set F8RC:$frD, (fsqrt F8RC:$frB))]>; + def FSQRTS : XForm_26<59, 22, (outs F4RC:$frD), (ins F4RC:$frB), + "fsqrts $frD, $frB", FPSqrt, + [(set F4RC:$frD, (fsqrt F4RC:$frB))]>; + } +} + +/// FMR is split into 3 versions, one for 4/8 byte FP, and one for extending. +/// +/// Note that these are defined as pseudo-ops on the PPC970 because they are +/// often coalesced away and we don't want the dispatch group builder to think +/// that they will fill slots (which could cause the load of a LSU reject to +/// sneak into a d-group with a store). +def FMRS : XForm_26<63, 72, (outs F4RC:$frD), (ins F4RC:$frB), + "fmr $frD, $frB", FPGeneral, + []>, // (set F4RC:$frD, F4RC:$frB) + PPC970_Unit_Pseudo; +def FMRD : XForm_26<63, 72, (outs F8RC:$frD), (ins F8RC:$frB), + "fmr $frD, $frB", FPGeneral, + []>, // (set F8RC:$frD, F8RC:$frB) + PPC970_Unit_Pseudo; +def FMRSD : XForm_26<63, 72, (outs F8RC:$frD), (ins F4RC:$frB), + "fmr $frD, $frB", FPGeneral, + [(set F8RC:$frD, (fextend F4RC:$frB))]>, + PPC970_Unit_Pseudo; + +let PPC970_Unit = 3 in { // FPU Operations. +// These are artificially split into two different forms, for 4/8 byte FP. +def FABSS : XForm_26<63, 264, (outs F4RC:$frD), (ins F4RC:$frB), + "fabs $frD, $frB", FPGeneral, + [(set F4RC:$frD, (fabs F4RC:$frB))]>; +def FABSD : XForm_26<63, 264, (outs F8RC:$frD), (ins F8RC:$frB), + "fabs $frD, $frB", FPGeneral, + [(set F8RC:$frD, (fabs F8RC:$frB))]>; +def FNABSS : XForm_26<63, 136, (outs F4RC:$frD), (ins F4RC:$frB), + "fnabs $frD, $frB", FPGeneral, + [(set F4RC:$frD, (fneg (fabs F4RC:$frB)))]>; +def FNABSD : XForm_26<63, 136, (outs F8RC:$frD), (ins F8RC:$frB), + "fnabs $frD, $frB", FPGeneral, + [(set F8RC:$frD, (fneg (fabs F8RC:$frB)))]>; +def FNEGS : XForm_26<63, 40, (outs F4RC:$frD), (ins F4RC:$frB), + "fneg $frD, $frB", FPGeneral, + [(set F4RC:$frD, (fneg F4RC:$frB))]>; +def FNEGD : XForm_26<63, 40, (outs F8RC:$frD), (ins F8RC:$frB), + "fneg $frD, $frB", FPGeneral, + [(set F8RC:$frD, (fneg F8RC:$frB))]>; +} + + +// XL-Form instructions. condition register logical ops. +// +def MCRF : XLForm_3<19, 0, (outs CRRC:$BF), (ins CRRC:$BFA), + "mcrf $BF, $BFA", BrMCR>, + PPC970_DGroup_First, PPC970_Unit_CRU; + +def CREQV : XLForm_1<19, 289, (outs CRBITRC:$CRD), + (ins CRBITRC:$CRA, CRBITRC:$CRB), + "creqv $CRD, $CRA, $CRB", BrCR, + []>; + +def CROR : XLForm_1<19, 449, (outs CRBITRC:$CRD), + (ins CRBITRC:$CRA, CRBITRC:$CRB), + "cror $CRD, $CRA, $CRB", BrCR, + []>; + +def CRSET : XLForm_1_ext<19, 289, (outs CRBITRC:$dst), (ins), + "creqv $dst, $dst, $dst", BrCR, + []>; + +// XFX-Form instructions. Instructions that deal with SPRs. +// +let Uses = [CTR] in { +def MFCTR : XFXForm_1_ext<31, 339, 9, (outs GPRC:$rT), (ins), + "mfctr $rT", SprMFSPR>, + PPC970_DGroup_First, PPC970_Unit_FXU; +} +let Defs = [CTR], Pattern = [(PPCmtctr GPRC:$rS)] in { +def MTCTR : XFXForm_7_ext<31, 467, 9, (outs), (ins GPRC:$rS), + "mtctr $rS", SprMTSPR>, + PPC970_DGroup_First, PPC970_Unit_FXU; +} + +let Defs = [LR] in { +def MTLR : XFXForm_7_ext<31, 467, 8, (outs), (ins GPRC:$rS), + "mtlr $rS", SprMTSPR>, + PPC970_DGroup_First, PPC970_Unit_FXU; +} +let Uses = [LR] in { +def MFLR : XFXForm_1_ext<31, 339, 8, (outs GPRC:$rT), (ins), + "mflr $rT", SprMFSPR>, + PPC970_DGroup_First, PPC970_Unit_FXU; +} + +// Move to/from VRSAVE: despite being a SPR, the VRSAVE register is renamed like +// a GPR on the PPC970. As such, copies in and out have the same performance +// characteristics as an OR instruction. +def MTVRSAVE : XFXForm_7_ext<31, 467, 256, (outs), (ins GPRC:$rS), + "mtspr 256, $rS", IntGeneral>, + PPC970_DGroup_Single, PPC970_Unit_FXU; +def MFVRSAVE : XFXForm_1_ext<31, 339, 256, (outs GPRC:$rT), (ins), + "mfspr $rT, 256", IntGeneral>, + PPC970_DGroup_First, PPC970_Unit_FXU; + +def MTCRF : XFXForm_5<31, 144, (outs), (ins crbitm:$FXM, GPRC:$rS), + "mtcrf $FXM, $rS", BrMCRX>, + PPC970_MicroCode, PPC970_Unit_CRU; +// FIXME: this Uses all the CR registers. Marking it as such is +// necessary for DeadMachineInstructionElim to do the right thing. +// However, marking it also exposes PR 2964, and causes crashes in +// the Local RA because it doesn't like this sequence: +// vreg = MCRF CR0 +// MFCR +// For now DeadMachineInstructionElim is turned off, so don't do the marking. +def MFCR : XFXForm_3<31, 19, (outs GPRC:$rT), (ins), "mfcr $rT", SprMFCR>, + PPC970_MicroCode, PPC970_Unit_CRU; +def MFOCRF: XFXForm_5a<31, 19, (outs GPRC:$rT), (ins crbitm:$FXM), + "mfcr $rT, $FXM", SprMFCR>, + PPC970_DGroup_First, PPC970_Unit_CRU; + +// Instructions to manipulate FPSCR. Only long double handling uses these. +// FPSCR is not modelled; we use the SDNode Flag to keep things in order. + +let Uses = [RM], Defs = [RM] in { + def MTFSB0 : XForm_43<63, 70, (outs), (ins u5imm:$FM), + "mtfsb0 $FM", IntMTFSB0, + [(PPCmtfsb0 (i32 imm:$FM))]>, + PPC970_DGroup_Single, PPC970_Unit_FPU; + def MTFSB1 : XForm_43<63, 38, (outs), (ins u5imm:$FM), + "mtfsb1 $FM", IntMTFSB0, + [(PPCmtfsb1 (i32 imm:$FM))]>, + PPC970_DGroup_Single, PPC970_Unit_FPU; + // MTFSF does not actually produce an FP result. We pretend it copies + // input reg B to the output. If we didn't do this it would look like the + // instruction had no outputs (because we aren't modelling the FPSCR) and + // it would be deleted. + def MTFSF : XFLForm<63, 711, (outs F8RC:$FRA), + (ins i32imm:$FM, F8RC:$rT, F8RC:$FRB), + "mtfsf $FM, $rT", "$FRB = $FRA", IntMTFSB0, + [(set F8RC:$FRA, (PPCmtfsf (i32 imm:$FM), + F8RC:$rT, F8RC:$FRB))]>, + PPC970_DGroup_Single, PPC970_Unit_FPU; +} +let Uses = [RM] in { + def MFFS : XForm_42<63, 583, (outs F8RC:$rT), (ins), + "mffs $rT", IntMFFS, + [(set F8RC:$rT, (PPCmffs))]>, + PPC970_DGroup_Single, PPC970_Unit_FPU; + def FADDrtz: AForm_2<63, 21, + (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB), + "fadd $FRT, $FRA, $FRB", FPGeneral, + [(set F8RC:$FRT, (PPCfaddrtz F8RC:$FRA, F8RC:$FRB))]>, + PPC970_DGroup_Single, PPC970_Unit_FPU; +} + + +let PPC970_Unit = 1 in { // FXU Operations. + +// XO-Form instructions. Arithmetic instructions that can set overflow bit +// +def ADD4 : XOForm_1<31, 266, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB), + "add $rT, $rA, $rB", IntGeneral, + [(set GPRC:$rT, (add GPRC:$rA, GPRC:$rB))]>; +def ADDC : XOForm_1<31, 10, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB), + "addc $rT, $rA, $rB", IntGeneral, + [(set GPRC:$rT, (addc GPRC:$rA, GPRC:$rB))]>, + PPC970_DGroup_Cracked; +def ADDE : XOForm_1<31, 138, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB), + "adde $rT, $rA, $rB", IntGeneral, + [(set GPRC:$rT, (adde GPRC:$rA, GPRC:$rB))]>; +def DIVW : XOForm_1<31, 491, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB), + "divw $rT, $rA, $rB", IntDivW, + [(set GPRC:$rT, (sdiv GPRC:$rA, GPRC:$rB))]>, + PPC970_DGroup_First, PPC970_DGroup_Cracked; +def DIVWU : XOForm_1<31, 459, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB), + "divwu $rT, $rA, $rB", IntDivW, + [(set GPRC:$rT, (udiv GPRC:$rA, GPRC:$rB))]>, + PPC970_DGroup_First, PPC970_DGroup_Cracked; +def MULHW : XOForm_1<31, 75, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB), + "mulhw $rT, $rA, $rB", IntMulHW, + [(set GPRC:$rT, (mulhs GPRC:$rA, GPRC:$rB))]>; +def MULHWU : XOForm_1<31, 11, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB), + "mulhwu $rT, $rA, $rB", IntMulHWU, + [(set GPRC:$rT, (mulhu GPRC:$rA, GPRC:$rB))]>; +def MULLW : XOForm_1<31, 235, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB), + "mullw $rT, $rA, $rB", IntMulHW, + [(set GPRC:$rT, (mul GPRC:$rA, GPRC:$rB))]>; +def SUBF : XOForm_1<31, 40, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB), + "subf $rT, $rA, $rB", IntGeneral, + [(set GPRC:$rT, (sub GPRC:$rB, GPRC:$rA))]>; +def SUBFC : XOForm_1<31, 8, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB), + "subfc $rT, $rA, $rB", IntGeneral, + [(set GPRC:$rT, (subc GPRC:$rB, GPRC:$rA))]>, + PPC970_DGroup_Cracked; +def SUBFE : XOForm_1<31, 136, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB), + "subfe $rT, $rA, $rB", IntGeneral, + [(set GPRC:$rT, (sube GPRC:$rB, GPRC:$rA))]>; +def ADDME : XOForm_3<31, 234, 0, (outs GPRC:$rT), (ins GPRC:$rA), + "addme $rT, $rA", IntGeneral, + [(set GPRC:$rT, (adde GPRC:$rA, immAllOnes))]>; +def ADDZE : XOForm_3<31, 202, 0, (outs GPRC:$rT), (ins GPRC:$rA), + "addze $rT, $rA", IntGeneral, + [(set GPRC:$rT, (adde GPRC:$rA, 0))]>; +def NEG : XOForm_3<31, 104, 0, (outs GPRC:$rT), (ins GPRC:$rA), + "neg $rT, $rA", IntGeneral, + [(set GPRC:$rT, (ineg GPRC:$rA))]>; +def SUBFME : XOForm_3<31, 232, 0, (outs GPRC:$rT), (ins GPRC:$rA), + "subfme $rT, $rA", IntGeneral, + [(set GPRC:$rT, (sube immAllOnes, GPRC:$rA))]>; +def SUBFZE : XOForm_3<31, 200, 0, (outs GPRC:$rT), (ins GPRC:$rA), + "subfze $rT, $rA", IntGeneral, + [(set GPRC:$rT, (sube 0, GPRC:$rA))]>; +} + +// A-Form instructions. Most of the instructions executed in the FPU are of +// this type. +// +let PPC970_Unit = 3 in { // FPU Operations. +let Uses = [RM] in { + def FMADD : AForm_1<63, 29, + (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB), + "fmadd $FRT, $FRA, $FRC, $FRB", FPFused, + [(set F8RC:$FRT, (fadd (fmul F8RC:$FRA, F8RC:$FRC), + F8RC:$FRB))]>, + Requires<[FPContractions]>; + def FMADDS : AForm_1<59, 29, + (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB), + "fmadds $FRT, $FRA, $FRC, $FRB", FPGeneral, + [(set F4RC:$FRT, (fadd (fmul F4RC:$FRA, F4RC:$FRC), + F4RC:$FRB))]>, + Requires<[FPContractions]>; + def FMSUB : AForm_1<63, 28, + (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB), + "fmsub $FRT, $FRA, $FRC, $FRB", FPFused, + [(set F8RC:$FRT, (fsub (fmul F8RC:$FRA, F8RC:$FRC), + F8RC:$FRB))]>, + Requires<[FPContractions]>; + def FMSUBS : AForm_1<59, 28, + (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB), + "fmsubs $FRT, $FRA, $FRC, $FRB", FPGeneral, + [(set F4RC:$FRT, (fsub (fmul F4RC:$FRA, F4RC:$FRC), + F4RC:$FRB))]>, + Requires<[FPContractions]>; + def FNMADD : AForm_1<63, 31, + (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB), + "fnmadd $FRT, $FRA, $FRC, $FRB", FPFused, + [(set F8RC:$FRT, (fneg (fadd (fmul F8RC:$FRA, F8RC:$FRC), + F8RC:$FRB)))]>, + Requires<[FPContractions]>; + def FNMADDS : AForm_1<59, 31, + (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB), + "fnmadds $FRT, $FRA, $FRC, $FRB", FPGeneral, + [(set F4RC:$FRT, (fneg (fadd (fmul F4RC:$FRA, F4RC:$FRC), + F4RC:$FRB)))]>, + Requires<[FPContractions]>; + def FNMSUB : AForm_1<63, 30, + (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB), + "fnmsub $FRT, $FRA, $FRC, $FRB", FPFused, + [(set F8RC:$FRT, (fneg (fsub (fmul F8RC:$FRA, F8RC:$FRC), + F8RC:$FRB)))]>, + Requires<[FPContractions]>; + def FNMSUBS : AForm_1<59, 30, + (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB), + "fnmsubs $FRT, $FRA, $FRC, $FRB", FPGeneral, + [(set F4RC:$FRT, (fneg (fsub (fmul F4RC:$FRA, F4RC:$FRC), + F4RC:$FRB)))]>, + Requires<[FPContractions]>; +} +// FSEL is artificially split into 4 and 8-byte forms for the result. To avoid +// having 4 of these, force the comparison to always be an 8-byte double (code +// should use an FMRSD if the input comparison value really wants to be a float) +// and 4/8 byte forms for the result and operand type.. +def FSELD : AForm_1<63, 23, + (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB), + "fsel $FRT, $FRA, $FRC, $FRB", FPGeneral, + [(set F8RC:$FRT, (PPCfsel F8RC:$FRA,F8RC:$FRC,F8RC:$FRB))]>; +def FSELS : AForm_1<63, 23, + (outs F4RC:$FRT), (ins F8RC:$FRA, F4RC:$FRC, F4RC:$FRB), + "fsel $FRT, $FRA, $FRC, $FRB", FPGeneral, + [(set F4RC:$FRT, (PPCfsel F8RC:$FRA,F4RC:$FRC,F4RC:$FRB))]>; +let Uses = [RM] in { + def FADD : AForm_2<63, 21, + (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB), + "fadd $FRT, $FRA, $FRB", FPGeneral, + [(set F8RC:$FRT, (fadd F8RC:$FRA, F8RC:$FRB))]>; + def FADDS : AForm_2<59, 21, + (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB), + "fadds $FRT, $FRA, $FRB", FPGeneral, + [(set F4RC:$FRT, (fadd F4RC:$FRA, F4RC:$FRB))]>; + def FDIV : AForm_2<63, 18, + (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB), + "fdiv $FRT, $FRA, $FRB", FPDivD, + [(set F8RC:$FRT, (fdiv F8RC:$FRA, F8RC:$FRB))]>; + def FDIVS : AForm_2<59, 18, + (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB), + "fdivs $FRT, $FRA, $FRB", FPDivS, + [(set F4RC:$FRT, (fdiv F4RC:$FRA, F4RC:$FRB))]>; + def FMUL : AForm_3<63, 25, + (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB), + "fmul $FRT, $FRA, $FRB", FPFused, + [(set F8RC:$FRT, (fmul F8RC:$FRA, F8RC:$FRB))]>; + def FMULS : AForm_3<59, 25, + (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB), + "fmuls $FRT, $FRA, $FRB", FPGeneral, + [(set F4RC:$FRT, (fmul F4RC:$FRA, F4RC:$FRB))]>; + def FSUB : AForm_2<63, 20, + (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB), + "fsub $FRT, $FRA, $FRB", FPGeneral, + [(set F8RC:$FRT, (fsub F8RC:$FRA, F8RC:$FRB))]>; + def FSUBS : AForm_2<59, 20, + (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB), + "fsubs $FRT, $FRA, $FRB", FPGeneral, + [(set F4RC:$FRT, (fsub F4RC:$FRA, F4RC:$FRB))]>; + } +} + +let PPC970_Unit = 1 in { // FXU Operations. +// M-Form instructions. rotate and mask instructions. +// +let isCommutable = 1 in { +// RLWIMI can be commuted if the rotate amount is zero. +def RLWIMI : MForm_2<20, + (outs GPRC:$rA), (ins GPRC:$rSi, GPRC:$rS, u5imm:$SH, u5imm:$MB, + u5imm:$ME), "rlwimi $rA, $rS, $SH, $MB, $ME", IntRotate, + []>, PPC970_DGroup_Cracked, RegConstraint<"$rSi = $rA">, + NoEncode<"$rSi">; +} +def RLWINM : MForm_2<21, + (outs GPRC:$rA), (ins GPRC:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME), + "rlwinm $rA, $rS, $SH, $MB, $ME", IntGeneral, + []>; +def RLWINMo : MForm_2<21, + (outs GPRC:$rA), (ins GPRC:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME), + "rlwinm. $rA, $rS, $SH, $MB, $ME", IntGeneral, + []>, isDOT, PPC970_DGroup_Cracked; +def RLWNM : MForm_2<23, + (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB, u5imm:$MB, u5imm:$ME), + "rlwnm $rA, $rS, $rB, $MB, $ME", IntGeneral, + []>; +} + + +//===----------------------------------------------------------------------===// +// DWARF Pseudo Instructions +// + +def DWARF_LOC : Pseudo<(outs), (ins i32imm:$line, i32imm:$col, i32imm:$file), + "${:comment} .loc $file, $line, $col", + [(dwarf_loc (i32 imm:$line), (i32 imm:$col), + (i32 imm:$file))]>; + +//===----------------------------------------------------------------------===// +// PowerPC Instruction Patterns +// + +// Arbitrary immediate support. Implement in terms of LIS/ORI. +def : Pat<(i32 imm:$imm), + (ORI (LIS (HI16 imm:$imm)), (LO16 imm:$imm))>; + +// Implement the 'not' operation with the NOR instruction. +def NOT : Pat<(not GPRC:$in), + (NOR GPRC:$in, GPRC:$in)>; + +// ADD an arbitrary immediate. +def : Pat<(add GPRC:$in, imm:$imm), + (ADDIS (ADDI GPRC:$in, (LO16 imm:$imm)), (HA16 imm:$imm))>; +// OR an arbitrary immediate. +def : Pat<(or GPRC:$in, imm:$imm), + (ORIS (ORI GPRC:$in, (LO16 imm:$imm)), (HI16 imm:$imm))>; +// XOR an arbitrary immediate. +def : Pat<(xor GPRC:$in, imm:$imm), + (XORIS (XORI GPRC:$in, (LO16 imm:$imm)), (HI16 imm:$imm))>; +// SUBFIC +def : Pat<(sub immSExt16:$imm, GPRC:$in), + (SUBFIC GPRC:$in, imm:$imm)>; + +// SHL/SRL +def : Pat<(shl GPRC:$in, (i32 imm:$imm)), + (RLWINM GPRC:$in, imm:$imm, 0, (SHL32 imm:$imm))>; +def : Pat<(srl GPRC:$in, (i32 imm:$imm)), + (RLWINM GPRC:$in, (SRL32 imm:$imm), imm:$imm, 31)>; + +// ROTL +def : Pat<(rotl GPRC:$in, GPRC:$sh), + (RLWNM GPRC:$in, GPRC:$sh, 0, 31)>; +def : Pat<(rotl GPRC:$in, (i32 imm:$imm)), + (RLWINM GPRC:$in, imm:$imm, 0, 31)>; + +// RLWNM +def : Pat<(and (rotl GPRC:$in, GPRC:$sh), maskimm32:$imm), + (RLWNM GPRC:$in, GPRC:$sh, (MB maskimm32:$imm), (ME maskimm32:$imm))>; + +// Calls +def : Pat<(PPCcall_Macho (i32 tglobaladdr:$dst)), + (BL_Macho tglobaladdr:$dst)>; +def : Pat<(PPCcall_Macho (i32 texternalsym:$dst)), + (BL_Macho texternalsym:$dst)>; +def : Pat<(PPCcall_ELF (i32 tglobaladdr:$dst)), + (BL_ELF tglobaladdr:$dst)>; +def : Pat<(PPCcall_ELF (i32 texternalsym:$dst)), + (BL_ELF texternalsym:$dst)>; + + +def : Pat<(PPCtc_return (i32 tglobaladdr:$dst), imm:$imm), + (TCRETURNdi tglobaladdr:$dst, imm:$imm)>; + +def : Pat<(PPCtc_return (i32 texternalsym:$dst), imm:$imm), + (TCRETURNdi texternalsym:$dst, imm:$imm)>; + +def : Pat<(PPCtc_return CTRRC:$dst, imm:$imm), + (TCRETURNri CTRRC:$dst, imm:$imm)>; + + + +// Hi and Lo for Darwin Global Addresses. +def : Pat<(PPChi tglobaladdr:$in, 0), (LIS tglobaladdr:$in)>; +def : Pat<(PPClo tglobaladdr:$in, 0), (LI tglobaladdr:$in)>; +def : Pat<(PPChi tconstpool:$in, 0), (LIS tconstpool:$in)>; +def : Pat<(PPClo tconstpool:$in, 0), (LI tconstpool:$in)>; +def : Pat<(PPChi tjumptable:$in, 0), (LIS tjumptable:$in)>; +def : Pat<(PPClo tjumptable:$in, 0), (LI tjumptable:$in)>; +def : Pat<(add GPRC:$in, (PPChi tglobaladdr:$g, 0)), + (ADDIS GPRC:$in, tglobaladdr:$g)>; +def : Pat<(add GPRC:$in, (PPChi tconstpool:$g, 0)), + (ADDIS GPRC:$in, tconstpool:$g)>; +def : Pat<(add GPRC:$in, (PPChi tjumptable:$g, 0)), + (ADDIS GPRC:$in, tjumptable:$g)>; + +// Fused negative multiply subtract, alternate pattern +def : Pat<(fsub F8RC:$B, (fmul F8RC:$A, F8RC:$C)), + (FNMSUB F8RC:$A, F8RC:$C, F8RC:$B)>, + Requires<[FPContractions]>; +def : Pat<(fsub F4RC:$B, (fmul F4RC:$A, F4RC:$C)), + (FNMSUBS F4RC:$A, F4RC:$C, F4RC:$B)>, + Requires<[FPContractions]>; + +// Standard shifts. These are represented separately from the real shifts above +// so that we can distinguish between shifts that allow 5-bit and 6-bit shift +// amounts. +def : Pat<(sra GPRC:$rS, GPRC:$rB), + (SRAW GPRC:$rS, GPRC:$rB)>; +def : Pat<(srl GPRC:$rS, GPRC:$rB), + (SRW GPRC:$rS, GPRC:$rB)>; +def : Pat<(shl GPRC:$rS, GPRC:$rB), + (SLW GPRC:$rS, GPRC:$rB)>; + +def : Pat<(zextloadi1 iaddr:$src), + (LBZ iaddr:$src)>; +def : Pat<(zextloadi1 xaddr:$src), + (LBZX xaddr:$src)>; +def : Pat<(extloadi1 iaddr:$src), + (LBZ iaddr:$src)>; +def : Pat<(extloadi1 xaddr:$src), + (LBZX xaddr:$src)>; +def : Pat<(extloadi8 iaddr:$src), + (LBZ iaddr:$src)>; +def : Pat<(extloadi8 xaddr:$src), + (LBZX xaddr:$src)>; +def : Pat<(extloadi16 iaddr:$src), + (LHZ iaddr:$src)>; +def : Pat<(extloadi16 xaddr:$src), + (LHZX xaddr:$src)>; +def : Pat<(extloadf32 iaddr:$src), + (FMRSD (LFS iaddr:$src))>; +def : Pat<(extloadf32 xaddr:$src), + (FMRSD (LFSX xaddr:$src))>; + +// Memory barriers +def : Pat<(membarrier (i32 imm:$ll), + (i32 imm:$ls), + (i32 imm:$sl), + (i32 imm:$ss), + (i32 imm:$device)), + (SYNC)>; + +include "PPCInstrAltivec.td" +include "PPCInstr64Bit.td" diff --git a/lib/Target/PowerPC/PPCJITInfo.cpp b/lib/Target/PowerPC/PPCJITInfo.cpp new file mode 100644 index 000000000000..035647ec5a30 --- /dev/null +++ b/lib/Target/PowerPC/PPCJITInfo.cpp @@ -0,0 +1,437 @@ +//===-- PPCJITInfo.cpp - Implement the JIT interfaces for the PowerPC -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the JIT interfaces for the 32-bit PowerPC target. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "jit" +#include "PPCJITInfo.h" +#include "PPCRelocations.h" +#include "PPCTargetMachine.h" +#include "llvm/Function.h" +#include "llvm/System/Memory.h" +#include "llvm/Support/Debug.h" +using namespace llvm; + +static TargetJITInfo::JITCompilerFn JITCompilerFunction; + +#define BUILD_ADDIS(RD,RS,IMM16) \ + ((15 << 26) | ((RD) << 21) | ((RS) << 16) | ((IMM16) & 65535)) +#define BUILD_ORI(RD,RS,UIMM16) \ + ((24 << 26) | ((RS) << 21) | ((RD) << 16) | ((UIMM16) & 65535)) +#define BUILD_ORIS(RD,RS,UIMM16) \ + ((25 << 26) | ((RS) << 21) | ((RD) << 16) | ((UIMM16) & 65535)) +#define BUILD_RLDICR(RD,RS,SH,ME) \ + ((30 << 26) | ((RS) << 21) | ((RD) << 16) | (((SH) & 31) << 11) | \ + (((ME) & 63) << 6) | (1 << 2) | ((((SH) >> 5) & 1) << 1)) +#define BUILD_MTSPR(RS,SPR) \ + ((31 << 26) | ((RS) << 21) | ((SPR) << 16) | (467 << 1)) +#define BUILD_BCCTRx(BO,BI,LINK) \ + ((19 << 26) | ((BO) << 21) | ((BI) << 16) | (528 << 1) | ((LINK) & 1)) +#define BUILD_B(TARGET, LINK) \ + ((18 << 26) | (((TARGET) & 0x00FFFFFF) << 2) | ((LINK) & 1)) + +// Pseudo-ops +#define BUILD_LIS(RD,IMM16) BUILD_ADDIS(RD,0,IMM16) +#define BUILD_SLDI(RD,RS,IMM6) BUILD_RLDICR(RD,RS,IMM6,63-IMM6) +#define BUILD_MTCTR(RS) BUILD_MTSPR(RS,9) +#define BUILD_BCTR(LINK) BUILD_BCCTRx(20,0,LINK) + +static void EmitBranchToAt(uint64_t At, uint64_t To, bool isCall, bool is64Bit){ + intptr_t Offset = ((intptr_t)To - (intptr_t)At) >> 2; + unsigned *AtI = (unsigned*)(intptr_t)At; + + if (Offset >= -(1 << 23) && Offset < (1 << 23)) { // In range? + AtI[0] = BUILD_B(Offset, isCall); // b/bl target + } else if (!is64Bit) { + AtI[0] = BUILD_LIS(12, To >> 16); // lis r12, hi16(address) + AtI[1] = BUILD_ORI(12, 12, To); // ori r12, r12, lo16(address) + AtI[2] = BUILD_MTCTR(12); // mtctr r12 + AtI[3] = BUILD_BCTR(isCall); // bctr/bctrl + } else { + AtI[0] = BUILD_LIS(12, To >> 48); // lis r12, hi16(address) + AtI[1] = BUILD_ORI(12, 12, To >> 32); // ori r12, r12, lo16(address) + AtI[2] = BUILD_SLDI(12, 12, 32); // sldi r12, r12, 32 + AtI[3] = BUILD_ORIS(12, 12, To >> 16); // oris r12, r12, hi16(address) + AtI[4] = BUILD_ORI(12, 12, To); // ori r12, r12, lo16(address) + AtI[5] = BUILD_MTCTR(12); // mtctr r12 + AtI[6] = BUILD_BCTR(isCall); // bctr/bctrl + } +} + +extern "C" void PPC32CompilationCallback(); +extern "C" void PPC64CompilationCallback(); + +#if (defined(__POWERPC__) || defined (__ppc__) || defined(_POWER)) && \ + !(defined(__ppc64__) || defined(__FreeBSD__)) +// CompilationCallback stub - We can't use a C function with inline assembly in +// it, because we the prolog/epilog inserted by GCC won't work for us. Instead, +// write our own wrapper, which does things our way, so we have complete control +// over register saving and restoring. +asm( + ".text\n" + ".align 2\n" + ".globl _PPC32CompilationCallback\n" +"_PPC32CompilationCallback:\n" + // Make space for 8 ints r[3-10] and 13 doubles f[1-13] and the + // FIXME: need to save v[0-19] for altivec? + // FIXME: could shrink frame + // Set up a proper stack frame + // FIXME Layout + // PowerPC64 ABI linkage - 24 bytes + // parameters - 32 bytes + // 13 double registers - 104 bytes + // 8 int registers - 32 bytes + "mflr r0\n" + "stw r0, 8(r1)\n" + "stwu r1, -208(r1)\n" + // Save all int arg registers + "stw r10, 204(r1)\n" "stw r9, 200(r1)\n" + "stw r8, 196(r1)\n" "stw r7, 192(r1)\n" + "stw r6, 188(r1)\n" "stw r5, 184(r1)\n" + "stw r4, 180(r1)\n" "stw r3, 176(r1)\n" + // Save all call-clobbered FP regs. + "stfd f13, 168(r1)\n" "stfd f12, 160(r1)\n" + "stfd f11, 152(r1)\n" "stfd f10, 144(r1)\n" + "stfd f9, 136(r1)\n" "stfd f8, 128(r1)\n" + "stfd f7, 120(r1)\n" "stfd f6, 112(r1)\n" + "stfd f5, 104(r1)\n" "stfd f4, 96(r1)\n" + "stfd f3, 88(r1)\n" "stfd f2, 80(r1)\n" + "stfd f1, 72(r1)\n" + // Arguments to Compilation Callback: + // r3 - our lr (address of the call instruction in stub plus 4) + // r4 - stub's lr (address of instruction that called the stub plus 4) + // r5 - is64Bit - always 0. + "mr r3, r0\n" + "lwz r2, 208(r1)\n" // stub's frame + "lwz r4, 8(r2)\n" // stub's lr + "li r5, 0\n" // 0 == 32 bit + "bl _PPCCompilationCallbackC\n" + "mtctr r3\n" + // Restore all int arg registers + "lwz r10, 204(r1)\n" "lwz r9, 200(r1)\n" + "lwz r8, 196(r1)\n" "lwz r7, 192(r1)\n" + "lwz r6, 188(r1)\n" "lwz r5, 184(r1)\n" + "lwz r4, 180(r1)\n" "lwz r3, 176(r1)\n" + // Restore all FP arg registers + "lfd f13, 168(r1)\n" "lfd f12, 160(r1)\n" + "lfd f11, 152(r1)\n" "lfd f10, 144(r1)\n" + "lfd f9, 136(r1)\n" "lfd f8, 128(r1)\n" + "lfd f7, 120(r1)\n" "lfd f6, 112(r1)\n" + "lfd f5, 104(r1)\n" "lfd f4, 96(r1)\n" + "lfd f3, 88(r1)\n" "lfd f2, 80(r1)\n" + "lfd f1, 72(r1)\n" + // Pop 3 frames off the stack and branch to target + "lwz r1, 208(r1)\n" + "lwz r2, 8(r1)\n" + "mtlr r2\n" + "bctr\n" + ); + +#elif defined(__PPC__) && !defined(__ppc64__) +// Linux & FreeBSD / PPC 32 support + +// CompilationCallback stub - We can't use a C function with inline assembly in +// it, because we the prolog/epilog inserted by GCC won't work for us. Instead, +// write our own wrapper, which does things our way, so we have complete control +// over register saving and restoring. +asm( + ".text\n" + ".align 2\n" + ".globl PPC32CompilationCallback\n" +"PPC32CompilationCallback:\n" + // Make space for 8 ints r[3-10] and 8 doubles f[1-8] and the + // FIXME: need to save v[0-19] for altivec? + // FIXME: could shrink frame + // Set up a proper stack frame + // FIXME Layout + // 8 double registers - 64 bytes + // 8 int registers - 32 bytes + "mflr 0\n" + "stw 0, 4(1)\n" + "stwu 1, -104(1)\n" + // Save all int arg registers + "stw 10, 100(1)\n" "stw 9, 96(1)\n" + "stw 8, 92(1)\n" "stw 7, 88(1)\n" + "stw 6, 84(1)\n" "stw 5, 80(1)\n" + "stw 4, 76(1)\n" "stw 3, 72(1)\n" + // Save all call-clobbered FP regs. + "stfd 8, 64(1)\n" + "stfd 7, 56(1)\n" "stfd 6, 48(1)\n" + "stfd 5, 40(1)\n" "stfd 4, 32(1)\n" + "stfd 3, 24(1)\n" "stfd 2, 16(1)\n" + "stfd 1, 8(1)\n" + // Arguments to Compilation Callback: + // r3 - our lr (address of the call instruction in stub plus 4) + // r4 - stub's lr (address of instruction that called the stub plus 4) + // r5 - is64Bit - always 0. + "mr 3, 0\n" + "lwz 5, 104(1)\n" // stub's frame + "lwz 4, 4(5)\n" // stub's lr + "li 5, 0\n" // 0 == 32 bit + "bl PPCCompilationCallbackC\n" + "mtctr 3\n" + // Restore all int arg registers + "lwz 10, 100(1)\n" "lwz 9, 96(1)\n" + "lwz 8, 92(1)\n" "lwz 7, 88(1)\n" + "lwz 6, 84(1)\n" "lwz 5, 80(1)\n" + "lwz 4, 76(1)\n" "lwz 3, 72(1)\n" + // Restore all FP arg registers + "lfd 8, 64(1)\n" + "lfd 7, 56(1)\n" "lfd 6, 48(1)\n" + "lfd 5, 40(1)\n" "lfd 4, 32(1)\n" + "lfd 3, 24(1)\n" "lfd 2, 16(1)\n" + "lfd 1, 8(1)\n" + // Pop 3 frames off the stack and branch to target + "lwz 1, 104(1)\n" + "lwz 0, 4(1)\n" + "mtlr 0\n" + "bctr\n" + ); +#else +void PPC32CompilationCallback() { + assert(0 && "This is not a power pc, you can't execute this!"); + abort(); +} +#endif + +#if (defined(__POWERPC__) || defined (__ppc__) || defined(_POWER)) && \ + defined(__ppc64__) +asm( + ".text\n" + ".align 2\n" + ".globl _PPC64CompilationCallback\n" +"_PPC64CompilationCallback:\n" + // Make space for 8 ints r[3-10] and 13 doubles f[1-13] and the + // FIXME: need to save v[0-19] for altivec? + // Set up a proper stack frame + // Layout + // PowerPC64 ABI linkage - 48 bytes + // parameters - 64 bytes + // 13 double registers - 104 bytes + // 8 int registers - 64 bytes + "mflr r0\n" + "std r0, 16(r1)\n" + "stdu r1, -280(r1)\n" + // Save all int arg registers + "std r10, 272(r1)\n" "std r9, 264(r1)\n" + "std r8, 256(r1)\n" "std r7, 248(r1)\n" + "std r6, 240(r1)\n" "std r5, 232(r1)\n" + "std r4, 224(r1)\n" "std r3, 216(r1)\n" + // Save all call-clobbered FP regs. + "stfd f13, 208(r1)\n" "stfd f12, 200(r1)\n" + "stfd f11, 192(r1)\n" "stfd f10, 184(r1)\n" + "stfd f9, 176(r1)\n" "stfd f8, 168(r1)\n" + "stfd f7, 160(r1)\n" "stfd f6, 152(r1)\n" + "stfd f5, 144(r1)\n" "stfd f4, 136(r1)\n" + "stfd f3, 128(r1)\n" "stfd f2, 120(r1)\n" + "stfd f1, 112(r1)\n" + // Arguments to Compilation Callback: + // r3 - our lr (address of the call instruction in stub plus 4) + // r4 - stub's lr (address of instruction that called the stub plus 4) + // r5 - is64Bit - always 1. + "mr r3, r0\n" + "ld r2, 280(r1)\n" // stub's frame + "ld r4, 16(r2)\n" // stub's lr + "li r5, 1\n" // 1 == 64 bit + "bl _PPCCompilationCallbackC\n" + "mtctr r3\n" + // Restore all int arg registers + "ld r10, 272(r1)\n" "ld r9, 264(r1)\n" + "ld r8, 256(r1)\n" "ld r7, 248(r1)\n" + "ld r6, 240(r1)\n" "ld r5, 232(r1)\n" + "ld r4, 224(r1)\n" "ld r3, 216(r1)\n" + // Restore all FP arg registers + "lfd f13, 208(r1)\n" "lfd f12, 200(r1)\n" + "lfd f11, 192(r1)\n" "lfd f10, 184(r1)\n" + "lfd f9, 176(r1)\n" "lfd f8, 168(r1)\n" + "lfd f7, 160(r1)\n" "lfd f6, 152(r1)\n" + "lfd f5, 144(r1)\n" "lfd f4, 136(r1)\n" + "lfd f3, 128(r1)\n" "lfd f2, 120(r1)\n" + "lfd f1, 112(r1)\n" + // Pop 3 frames off the stack and branch to target + "ld r1, 280(r1)\n" + "ld r2, 16(r1)\n" + "mtlr r2\n" + "bctr\n" + ); +#else +void PPC64CompilationCallback() { + assert(0 && "This is not a power pc, you can't execute this!"); + abort(); +} +#endif + +extern "C" void *PPCCompilationCallbackC(unsigned *StubCallAddrPlus4, + unsigned *OrigCallAddrPlus4, + bool is64Bit) { + // Adjust the pointer to the address of the call instruction in the stub + // emitted by emitFunctionStub, rather than the instruction after it. + unsigned *StubCallAddr = StubCallAddrPlus4 - 1; + unsigned *OrigCallAddr = OrigCallAddrPlus4 - 1; + + void *Target = JITCompilerFunction(StubCallAddr); + + // Check to see if *OrigCallAddr is a 'bl' instruction, and if we can rewrite + // it to branch directly to the destination. If so, rewrite it so it does not + // need to go through the stub anymore. + unsigned OrigCallInst = *OrigCallAddr; + if ((OrigCallInst >> 26) == 18) { // Direct call. + intptr_t Offset = ((intptr_t)Target - (intptr_t)OrigCallAddr) >> 2; + + if (Offset >= -(1 << 23) && Offset < (1 << 23)) { // In range? + // Clear the original target out. + OrigCallInst &= (63 << 26) | 3; + // Fill in the new target. + OrigCallInst |= (Offset & ((1 << 24)-1)) << 2; + // Replace the call. + *OrigCallAddr = OrigCallInst; + } + } + + // Assert that we are coming from a stub that was created with our + // emitFunctionStub. + if ((*StubCallAddr >> 26) == 18) + StubCallAddr -= 3; + else { + assert((*StubCallAddr >> 26) == 19 && "Call in stub is not indirect!"); + StubCallAddr -= is64Bit ? 9 : 6; + } + + // Rewrite the stub with an unconditional branch to the target, for any users + // who took the address of the stub. + EmitBranchToAt((intptr_t)StubCallAddr, (intptr_t)Target, false, is64Bit); + + // Put the address of the target function to call and the address to return to + // after calling the target function in a place that is easy to get on the + // stack after we restore all regs. + return Target; +} + + + +TargetJITInfo::LazyResolverFn +PPCJITInfo::getLazyResolverFunction(JITCompilerFn Fn) { + JITCompilerFunction = Fn; + return is64Bit ? PPC64CompilationCallback : PPC32CompilationCallback; +} + +#if (defined(__POWERPC__) || defined (__ppc__) || defined(_POWER)) && \ +defined(__APPLE__) +extern "C" void sys_icache_invalidate(const void *Addr, size_t len); +#endif + +void *PPCJITInfo::emitFunctionStub(const Function* F, void *Fn, + JITCodeEmitter &JCE) { + // If this is just a call to an external function, emit a branch instead of a + // call. The code is the same except for one bit of the last instruction. + if (Fn != (void*)(intptr_t)PPC32CompilationCallback && + Fn != (void*)(intptr_t)PPC64CompilationCallback) { + JCE.startGVStub(F, 7*4); + intptr_t Addr = (intptr_t)JCE.getCurrentPCValue(); + JCE.emitWordBE(0); + JCE.emitWordBE(0); + JCE.emitWordBE(0); + JCE.emitWordBE(0); + JCE.emitWordBE(0); + JCE.emitWordBE(0); + JCE.emitWordBE(0); + EmitBranchToAt(Addr, (intptr_t)Fn, false, is64Bit); + sys::Memory::InvalidateInstructionCache((void*)Addr, 7*4); + return JCE.finishGVStub(F); + } + + JCE.startGVStub(F, 10*4); + intptr_t Addr = (intptr_t)JCE.getCurrentPCValue(); + if (is64Bit) { + JCE.emitWordBE(0xf821ffb1); // stdu r1,-80(r1) + JCE.emitWordBE(0x7d6802a6); // mflr r11 + JCE.emitWordBE(0xf9610060); // std r11, 96(r1) + } else if (TM.getSubtargetImpl()->isMachoABI()){ + JCE.emitWordBE(0x9421ffe0); // stwu r1,-32(r1) + JCE.emitWordBE(0x7d6802a6); // mflr r11 + JCE.emitWordBE(0x91610028); // stw r11, 40(r1) + } else { + JCE.emitWordBE(0x9421ffe0); // stwu r1,-32(r1) + JCE.emitWordBE(0x7d6802a6); // mflr r11 + JCE.emitWordBE(0x91610024); // stw r11, 36(r1) + } + intptr_t BranchAddr = (intptr_t)JCE.getCurrentPCValue(); + JCE.emitWordBE(0); + JCE.emitWordBE(0); + JCE.emitWordBE(0); + JCE.emitWordBE(0); + JCE.emitWordBE(0); + JCE.emitWordBE(0); + JCE.emitWordBE(0); + EmitBranchToAt(BranchAddr, (intptr_t)Fn, true, is64Bit); + sys::Memory::InvalidateInstructionCache((void*)Addr, 10*4); + return JCE.finishGVStub(F); +} + + +void PPCJITInfo::relocate(void *Function, MachineRelocation *MR, + unsigned NumRelocs, unsigned char* GOTBase) { + for (unsigned i = 0; i != NumRelocs; ++i, ++MR) { + unsigned *RelocPos = (unsigned*)Function + MR->getMachineCodeOffset()/4; + intptr_t ResultPtr = (intptr_t)MR->getResultPointer(); + switch ((PPC::RelocationType)MR->getRelocationType()) { + default: assert(0 && "Unknown relocation type!"); + case PPC::reloc_pcrel_bx: + // PC-relative relocation for b and bl instructions. + ResultPtr = (ResultPtr-(intptr_t)RelocPos) >> 2; + assert(ResultPtr >= -(1 << 23) && ResultPtr < (1 << 23) && + "Relocation out of range!"); + *RelocPos |= (ResultPtr & ((1 << 24)-1)) << 2; + break; + case PPC::reloc_pcrel_bcx: + // PC-relative relocation for BLT,BLE,BEQ,BGE,BGT,BNE, or other + // bcx instructions. + ResultPtr = (ResultPtr-(intptr_t)RelocPos) >> 2; + assert(ResultPtr >= -(1 << 13) && ResultPtr < (1 << 13) && + "Relocation out of range!"); + *RelocPos |= (ResultPtr & ((1 << 14)-1)) << 2; + break; + case PPC::reloc_absolute_high: // high bits of ref -> low 16 of instr + case PPC::reloc_absolute_low: { // low bits of ref -> low 16 of instr + ResultPtr += MR->getConstantVal(); + + // If this is a high-part access, get the high-part. + if (MR->getRelocationType() == PPC::reloc_absolute_high) { + // If the low part will have a carry (really a borrow) from the low + // 16-bits into the high 16, add a bit to borrow from. + if (((int)ResultPtr << 16) < 0) + ResultPtr += 1 << 16; + ResultPtr >>= 16; + } + + // Do the addition then mask, so the addition does not overflow the 16-bit + // immediate section of the instruction. + unsigned LowBits = (*RelocPos + ResultPtr) & 65535; + unsigned HighBits = *RelocPos & ~65535; + *RelocPos = LowBits | HighBits; // Slam into low 16-bits + break; + } + case PPC::reloc_absolute_low_ix: { // low bits of ref -> low 14 of instr + ResultPtr += MR->getConstantVal(); + // Do the addition then mask, so the addition does not overflow the 16-bit + // immediate section of the instruction. + unsigned LowBits = (*RelocPos + ResultPtr) & 0xFFFC; + unsigned HighBits = *RelocPos & 0xFFFF0003; + *RelocPos = LowBits | HighBits; // Slam into low 14-bits. + break; + } + } + } +} + +void PPCJITInfo::replaceMachineCodeForFunction(void *Old, void *New) { + EmitBranchToAt((intptr_t)Old, (intptr_t)New, false, is64Bit); +} diff --git a/lib/Target/PowerPC/PPCJITInfo.h b/lib/Target/PowerPC/PPCJITInfo.h new file mode 100644 index 000000000000..2e25b295f432 --- /dev/null +++ b/lib/Target/PowerPC/PPCJITInfo.h @@ -0,0 +1,48 @@ +//===- PPCJITInfo.h - PowerPC impl. of the JIT interface --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the PowerPC implementation of the TargetJITInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef POWERPC_JITINFO_H +#define POWERPC_JITINFO_H + +#include "llvm/Target/TargetJITInfo.h" +#include "llvm/CodeGen/JITCodeEmitter.h" + +namespace llvm { + class PPCTargetMachine; + + class PPCJITInfo : public TargetJITInfo { + protected: + PPCTargetMachine &TM; + bool is64Bit; + public: + PPCJITInfo(PPCTargetMachine &tm, bool tmIs64Bit) : TM(tm) { + useGOT = 0; + is64Bit = tmIs64Bit; + } + + virtual void *emitFunctionStub(const Function* F, void *Fn, + JITCodeEmitter &JCE); + virtual LazyResolverFn getLazyResolverFunction(JITCompilerFn); + virtual void relocate(void *Function, MachineRelocation *MR, + unsigned NumRelocs, unsigned char* GOTBase); + + /// replaceMachineCodeForFunction - Make it so that calling the function + /// whose machine code is at OLD turns into a call to NEW, perhaps by + /// overwriting OLD with a branch to NEW. This is used for self-modifying + /// code. + /// + virtual void replaceMachineCodeForFunction(void *Old, void *New); + }; +} + +#endif diff --git a/lib/Target/PowerPC/PPCMachOWriterInfo.cpp b/lib/Target/PowerPC/PPCMachOWriterInfo.cpp new file mode 100644 index 000000000000..3bfa6d719105 --- /dev/null +++ b/lib/Target/PowerPC/PPCMachOWriterInfo.cpp @@ -0,0 +1,151 @@ +//===-- PPCMachOWriterInfo.cpp - Mach-O Writer Info for the PowerPC -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements Mach-O writer information for the PowerPC backend. +// +//===----------------------------------------------------------------------===// + +#include "PPCMachOWriterInfo.h" +#include "PPCRelocations.h" +#include "PPCTargetMachine.h" +#include "llvm/CodeGen/MachORelocation.h" +#include "llvm/Support/OutputBuffer.h" +#include +using namespace llvm; + +PPCMachOWriterInfo::PPCMachOWriterInfo(const PPCTargetMachine &TM) + : TargetMachOWriterInfo(TM.getTargetData()->getPointerSizeInBits() == 64 ? + HDR_CPU_TYPE_POWERPC64 : + HDR_CPU_TYPE_POWERPC, + HDR_CPU_SUBTYPE_POWERPC_ALL) {} +PPCMachOWriterInfo::~PPCMachOWriterInfo() {} + +/// GetTargetRelocation - For the MachineRelocation MR, convert it to one or +/// more PowerPC MachORelocation(s), add the new relocations to the +/// MachOSection, and rewrite the instruction at the section offset if required +/// by that relocation type. +unsigned PPCMachOWriterInfo::GetTargetRelocation(MachineRelocation &MR, + unsigned FromIdx, + unsigned ToAddr, + unsigned ToIdx, + OutputBuffer &RelocOut, + OutputBuffer &SecOut, + bool Scattered, + bool isExtern) const { + unsigned NumRelocs = 0; + uint64_t Addr = 0; + + // Get the address of whatever it is we're relocating, if possible. + if (!isExtern) + Addr = (uintptr_t)MR.getResultPointer() + ToAddr; + + switch ((PPC::RelocationType)MR.getRelocationType()) { + default: assert(0 && "Unknown PPC relocation type!"); + case PPC::reloc_absolute_low_ix: + assert(0 && "Unhandled PPC relocation type!"); + break; + case PPC::reloc_vanilla: + { + // FIXME: need to handle 64 bit vanilla relocs + MachORelocation VANILLA(MR.getMachineCodeOffset(), ToIdx, + false, 2, isExtern, + PPC_RELOC_VANILLA, + Scattered, (intptr_t)MR.getResultPointer()); + ++NumRelocs; + + if (Scattered) { + RelocOut.outword(VANILLA.getPackedFields()); + RelocOut.outword(VANILLA.getAddress()); + } else { + RelocOut.outword(VANILLA.getAddress()); + RelocOut.outword(VANILLA.getPackedFields()); + } + + intptr_t SymbolOffset; + + if (Scattered) + SymbolOffset = Addr + MR.getConstantVal(); + else + SymbolOffset = Addr; + + printf("vanilla fixup: sec_%x[%x] = %x\n", FromIdx, + unsigned(MR.getMachineCodeOffset()), + unsigned(SymbolOffset)); + SecOut.fixword(SymbolOffset, MR.getMachineCodeOffset()); + } + break; + case PPC::reloc_pcrel_bx: + { + // FIXME: Presumably someday we will need to branch to other, non-extern + // functions too. Need to figure out some way to distinguish between + // target is BB and target is function. + if (isExtern) { + MachORelocation BR24(MR.getMachineCodeOffset(), ToIdx, true, 2, + isExtern, PPC_RELOC_BR24, Scattered, + (intptr_t)MR.getMachineCodeOffset()); + RelocOut.outword(BR24.getAddress()); + RelocOut.outword(BR24.getPackedFields()); + ++NumRelocs; + } + + Addr -= MR.getMachineCodeOffset(); + Addr >>= 2; + Addr &= 0xFFFFFF; + Addr <<= 2; + Addr |= (SecOut[MR.getMachineCodeOffset()] << 24); + Addr |= (SecOut[MR.getMachineCodeOffset()+3] & 0x3); + SecOut.fixword(Addr, MR.getMachineCodeOffset()); + break; + } + case PPC::reloc_pcrel_bcx: + { + Addr -= MR.getMachineCodeOffset(); + Addr &= 0xFFFC; + + SecOut.fixhalf(Addr, MR.getMachineCodeOffset() + 2); + break; + } + case PPC::reloc_absolute_high: + { + MachORelocation HA16(MR.getMachineCodeOffset(), ToIdx, false, 2, + isExtern, PPC_RELOC_HA16); + MachORelocation PAIR(Addr & 0xFFFF, 0xFFFFFF, false, 2, isExtern, + PPC_RELOC_PAIR); + NumRelocs = 2; + + RelocOut.outword(HA16.getRawAddress()); + RelocOut.outword(HA16.getPackedFields()); + RelocOut.outword(PAIR.getRawAddress()); + RelocOut.outword(PAIR.getPackedFields()); + + Addr += 0x8000; + + SecOut.fixhalf(Addr >> 16, MR.getMachineCodeOffset() + 2); + break; + } + case PPC::reloc_absolute_low: + { + MachORelocation LO16(MR.getMachineCodeOffset(), ToIdx, false, 2, + isExtern, PPC_RELOC_LO16); + MachORelocation PAIR(Addr >> 16, 0xFFFFFF, false, 2, isExtern, + PPC_RELOC_PAIR); + NumRelocs = 2; + + RelocOut.outword(LO16.getRawAddress()); + RelocOut.outword(LO16.getPackedFields()); + RelocOut.outword(PAIR.getRawAddress()); + RelocOut.outword(PAIR.getPackedFields()); + + SecOut.fixhalf(Addr, MR.getMachineCodeOffset() + 2); + break; + } + } + + return NumRelocs; +} diff --git a/lib/Target/PowerPC/PPCMachOWriterInfo.h b/lib/Target/PowerPC/PPCMachOWriterInfo.h new file mode 100644 index 000000000000..d46334df2602 --- /dev/null +++ b/lib/Target/PowerPC/PPCMachOWriterInfo.h @@ -0,0 +1,55 @@ +//===-- PPCMachOWriterInfo.h - Mach-O Writer Info for PowerPC ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements Mach-O writer information for the PowerPC backend. +// +//===----------------------------------------------------------------------===// + +#ifndef PPC_MACHO_WRITER_INFO_H +#define PPC_MACHO_WRITER_INFO_H + +#include "llvm/Target/TargetMachOWriterInfo.h" + +namespace llvm { + + // Forward declarations + class MachineRelocation; + class OutputBuffer; + class PPCTargetMachine; + + class PPCMachOWriterInfo : public TargetMachOWriterInfo { + public: + PPCMachOWriterInfo(const PPCTargetMachine &TM); + virtual ~PPCMachOWriterInfo(); + + virtual unsigned GetTargetRelocation(MachineRelocation &MR, + unsigned FromIdx, + unsigned ToAddr, + unsigned ToIdx, + OutputBuffer &RelocOut, + OutputBuffer &SecOut, + bool Scattered, bool Extern) const; + + // Constants for the relocation r_type field. + // See + enum { + PPC_RELOC_VANILLA, // generic relocation + PPC_RELOC_PAIR, // the second relocation entry of a pair + PPC_RELOC_BR14, // 14 bit branch displacement to word address + PPC_RELOC_BR24, // 24 bit branch displacement to word address + PPC_RELOC_HI16, // a PAIR follows with the low 16 bits + PPC_RELOC_LO16, // a PAIR follows with the high 16 bits + PPC_RELOC_HA16, // a PAIR follows, which is sign extended to 32b + PPC_RELOC_LO14 // LO16 with low 2 bits implicitly zero + }; + }; + +} // end llvm namespace + +#endif // PPC_MACHO_WRITER_INFO_H diff --git a/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/lib/Target/PowerPC/PPCMachineFunctionInfo.h new file mode 100644 index 000000000000..42883d78728e --- /dev/null +++ b/lib/Target/PowerPC/PPCMachineFunctionInfo.h @@ -0,0 +1,104 @@ +//===-- PPCMachineFunctionInfo.h - Private data used for PowerPC --*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the PowerPC specific subclass of MachineFunctionInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef PPC_MACHINE_FUNCTION_INFO_H +#define PPC_MACHINE_FUNCTION_INFO_H + +#include "llvm/CodeGen/MachineFunction.h" + +namespace llvm { + +/// PPCFunctionInfo - This class is derived from MachineFunction private +/// PowerPC target-specific information for each MachineFunction. +class PPCFunctionInfo : public MachineFunctionInfo { +private: + /// FramePointerSaveIndex - Frame index of where the old frame pointer is + /// stored. Also used as an anchor for instructions that need to be altered + /// when using frame pointers (dyna_add, dyna_sub.) + int FramePointerSaveIndex; + + /// ReturnAddrSaveIndex - Frame index of where the return address is stored. + /// + int ReturnAddrSaveIndex; + + /// MustSaveLR - Indicates whether LR is defined (or clobbered) in the current + /// function. This is only valid after the initial scan of the function by + /// PEI. + bool MustSaveLR; + + /// SpillsCR - Indicates whether CR is spilled in the current function. + bool SpillsCR; + + /// LRStoreRequired - The bool indicates whether there is some explicit use of + /// the LR/LR8 stack slot that is not obvious from scanning the code. This + /// requires that the code generator produce a store of LR to the stack on + /// entry, even though LR may otherwise apparently not be used. + bool LRStoreRequired; + + /// MinReservedArea - This is the frame size that is at least reserved in a + /// potential caller (parameter+linkage area). + unsigned MinReservedArea; + + /// TailCallSPDelta - Stack pointer delta used when tail calling. Maximum + /// amount the stack pointer is adjusted to make the frame bigger for tail + /// calls. Used for creating an area before the register spill area. + int TailCallSPDelta; + + /// HasFastCall - Does this function contain a fast call. Used to determine + /// how the caller's stack pointer should be calculated (epilog/dynamicalloc). + bool HasFastCall; + +public: + PPCFunctionInfo(MachineFunction &MF) + : FramePointerSaveIndex(0), + ReturnAddrSaveIndex(0), + SpillsCR(false), + LRStoreRequired(false), + MinReservedArea(0), + TailCallSPDelta(0), + HasFastCall(false) {} + + int getFramePointerSaveIndex() const { return FramePointerSaveIndex; } + void setFramePointerSaveIndex(int Idx) { FramePointerSaveIndex = Idx; } + + int getReturnAddrSaveIndex() const { return ReturnAddrSaveIndex; } + void setReturnAddrSaveIndex(int idx) { ReturnAddrSaveIndex = idx; } + + unsigned getMinReservedArea() const { return MinReservedArea; } + void setMinReservedArea(unsigned size) { MinReservedArea = size; } + + int getTailCallSPDelta() const { return TailCallSPDelta; } + void setTailCallSPDelta(int size) { TailCallSPDelta = size; } + + /// MustSaveLR - This is set when the prolog/epilog inserter does its initial + /// scan of the function. It is true if the LR/LR8 register is ever explicitly + /// defined/clobbered in the machine function (e.g. by calls and movpctolr, + /// which is used in PIC generation), or if the LR stack slot is explicitly + /// referenced by builtin_return_address. + void setMustSaveLR(bool U) { MustSaveLR = U; } + bool mustSaveLR() const { return MustSaveLR; } + + void setSpillsCR() { SpillsCR = true; } + bool isCRSpilled() const { return SpillsCR; } + + void setLRStoreRequired() { LRStoreRequired = true; } + bool isLRStoreRequired() const { return LRStoreRequired; } + + void setHasFastCall() { HasFastCall = true; } + bool hasFastCall() const { return HasFastCall;} +}; + +} // end of namespace llvm + + +#endif diff --git a/lib/Target/PowerPC/PPCPerfectShuffle.h b/lib/Target/PowerPC/PPCPerfectShuffle.h new file mode 100644 index 000000000000..3164e33faae9 --- /dev/null +++ b/lib/Target/PowerPC/PPCPerfectShuffle.h @@ -0,0 +1,6586 @@ +//===-- PPCPerfectShuffle.h - Altivec Perfect Shuffle Table ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file, which was autogenerated by llvm-PerfectShuffle, contains data +// for the optimal way to build a perfect shuffle without using vperm. +// +//===----------------------------------------------------------------------===// + +// 31 entries have cost 0 +// 292 entries have cost 1 +// 1384 entries have cost 2 +// 3061 entries have cost 3 +// 1733 entries have cost 4 +// 60 entries have cost 5 + +// This table is 6561*4 = 26244 bytes in size. +static const unsigned PerfectShuffleTable[6561+1] = { + 202162278U, // <0,0,0,0>: Cost 1 vspltisw0 LHS + 1140850790U, // <0,0,0,1>: Cost 2 vmrghw <0,0,0,0>, LHS + 2617247181U, // <0,0,0,2>: Cost 3 vsldoi4 <0,0,0,0>, <2,0,3,0> + 2635163787U, // <0,0,0,3>: Cost 3 vsldoi4 <3,0,0,0>, <3,0,0,0> + 1543507254U, // <0,0,0,4>: Cost 2 vsldoi4 <0,0,0,0>, RHS + 2281701705U, // <0,0,0,5>: Cost 3 vmrglw <0,0,0,0>, <0,4,0,5> + 2617250133U, // <0,0,0,6>: Cost 3 vsldoi4 <0,0,0,0>, <6,0,7,0> + 2659054575U, // <0,0,0,7>: Cost 3 vsldoi4 <7,0,0,0>, <7,0,0,0> + 202162278U, // <0,0,0,u>: Cost 1 vspltisw0 LHS + 1141686282U, // <0,0,1,0>: Cost 2 vmrghw LHS, <0,0,1,1> + 67944550U, // <0,0,1,1>: Cost 1 vmrghw LHS, LHS + 1685241958U, // <0,0,1,2>: Cost 2 vsldoi12 <1,2,3,0>, LHS + 2215870716U, // <0,0,1,3>: Cost 3 vmrghw LHS, <0,3,1,0> + 1141727570U, // <0,0,1,4>: Cost 2 vmrghw LHS, <0,4,1,5> + 2215428562U, // <0,0,1,5>: Cost 3 vmrghw LHS, <0,5,6,7> + 2215428589U, // <0,0,1,6>: Cost 3 vmrghw LHS, <0,6,0,7> + 2659062768U, // <0,0,1,7>: Cost 3 vsldoi4 <7,0,0,1>, <7,0,0,1> + 67945117U, // <0,0,1,u>: Cost 1 vmrghw LHS, LHS + 2684356045U, // <0,0,2,0>: Cost 3 vsldoi8 <0,0,0,0>, <2,0,3,0> + 2216009830U, // <0,0,2,1>: Cost 3 vmrghw <0,2,1,2>, LHS + 2216009901U, // <0,0,2,2>: Cost 3 vmrghw <0,2,1,2>, <0,2,1,2> + 2698290853U, // <0,0,2,3>: Cost 3 vsldoi8 <2,3,0,0>, <2,3,0,0> + 3289751890U, // <0,0,2,4>: Cost 4 vmrghw <0,2,1,2>, <0,4,1,5> + 3758098275U, // <0,0,2,5>: Cost 4 vsldoi8 <0,0,0,0>, <2,5,3,1> + 2684356538U, // <0,0,2,6>: Cost 3 vsldoi8 <0,0,0,0>, <2,6,3,7> + 3758098410U, // <0,0,2,7>: Cost 4 vsldoi8 <0,0,0,0>, <2,7,0,1> + 2216010397U, // <0,0,2,u>: Cost 3 vmrghw <0,2,1,2>, LHS + 2702272651U, // <0,0,3,0>: Cost 3 vsldoi8 <3,0,0,0>, <3,0,0,0> + 2216656998U, // <0,0,3,1>: Cost 3 vmrghw <0,3,1,0>, LHS + 3844669704U, // <0,0,3,2>: Cost 4 vsldoi12 <3,2,3,0>, <0,3,2,3> + 2216657148U, // <0,0,3,3>: Cost 3 vmrghw <0,3,1,0>, <0,3,1,0> + 2684357122U, // <0,0,3,4>: Cost 3 vsldoi8 <0,0,0,0>, <3,4,5,6> + 3732820066U, // <0,0,3,5>: Cost 4 vsldoi4 <7,0,0,3>, <5,6,7,0> + 3778005624U, // <0,0,3,6>: Cost 4 vsldoi8 <3,3,0,0>, <3,6,0,7> + 3374713464U, // <0,0,3,7>: Cost 4 vmrglw <3,2,0,3>, <3,6,0,7> + 2216657565U, // <0,0,3,u>: Cost 3 vmrghw <0,3,1,0>, LHS + 2217361408U, // <0,0,4,0>: Cost 3 vmrghw <0,4,1,5>, <0,0,0,0> + 1143619686U, // <0,0,4,1>: Cost 2 vmrghw <0,4,1,5>, LHS + 3291103405U, // <0,0,4,2>: Cost 4 vmrghw <0,4,1,5>, <0,2,1,2> + 3827269988U, // <0,0,4,3>: Cost 4 vsldoi12 <0,3,1,0>, <0,4,3,5> + 1143619922U, // <0,0,4,4>: Cost 2 vmrghw <0,4,1,5>, <0,4,1,5> + 1610616118U, // <0,0,4,5>: Cost 2 vsldoi8 <0,0,0,0>, RHS + 3758099833U, // <0,0,4,6>: Cost 4 vsldoi8 <0,0,0,0>, <4,6,5,2> + 3854107016U, // <0,0,4,7>: Cost 4 vsldoi12 <4,7,5,0>, <0,4,7,5> + 1143620253U, // <0,0,4,u>: Cost 2 vmrghw <0,4,1,5>, LHS + 2284396544U, // <0,0,5,0>: Cost 3 vmrglw <0,4,0,5>, <0,0,0,0> + 2218025062U, // <0,0,5,1>: Cost 3 vmrghw <0,5,1,5>, LHS + 3758100203U, // <0,0,5,2>: Cost 4 vsldoi8 <0,0,0,0>, <5,2,1,3> + 3395966100U, // <0,0,5,3>: Cost 4 vmrglw <6,7,0,5>, <7,2,0,3> + 3804549052U, // <0,0,5,4>: Cost 4 vsldoi8 <7,7,0,0>, <5,4,6,5> + 2302314964U, // <0,0,5,5>: Cost 3 vmrglw <3,4,0,5>, <3,4,0,5> + 2785821138U, // <0,0,5,6>: Cost 3 vsldoi12 <5,6,7,0>, <0,5,6,7> + 3395966428U, // <0,0,5,7>: Cost 4 vmrglw <6,7,0,5>, <7,6,0,7> + 2787148260U, // <0,0,5,u>: Cost 3 vsldoi12 <5,u,7,0>, <0,5,u,7> + 2684358997U, // <0,0,6,0>: Cost 3 vsldoi8 <0,0,0,0>, <6,0,7,0> + 2218631270U, // <0,0,6,1>: Cost 3 vmrghw <0,6,0,7>, LHS + 2684359162U, // <0,0,6,2>: Cost 3 vsldoi8 <0,0,0,0>, <6,2,7,3> + 3758101042U, // <0,0,6,3>: Cost 4 vsldoi8 <0,0,0,0>, <6,3,4,5> + 3732843830U, // <0,0,6,4>: Cost 4 vsldoi4 <7,0,0,6>, RHS + 3758101227U, // <0,0,6,5>: Cost 4 vsldoi8 <0,0,0,0>, <6,5,7,1> + 2684359480U, // <0,0,6,6>: Cost 3 vsldoi8 <0,0,0,0>, <6,6,6,6> + 2724836173U, // <0,0,6,7>: Cost 3 vsldoi8 <6,7,0,0>, <6,7,0,0> + 2725499806U, // <0,0,6,u>: Cost 3 vsldoi8 <6,u,0,0>, <6,u,0,0> + 2726163439U, // <0,0,7,0>: Cost 3 vsldoi8 <7,0,0,0>, <7,0,0,0> + 2219311206U, // <0,0,7,1>: Cost 3 vmrghw <0,7,1,0>, LHS + 3868557900U, // <0,0,7,2>: Cost 4 vsldoi12 <7,2,3,0>, <0,7,2,3> + 3377400112U, // <0,0,7,3>: Cost 4 vmrglw <3,6,0,7>, <3,2,0,3> + 2684360038U, // <0,0,7,4>: Cost 3 vsldoi8 <0,0,0,0>, <7,4,5,6> + 3732852834U, // <0,0,7,5>: Cost 4 vsldoi4 <7,0,0,7>, <5,6,7,0> + 3871507060U, // <0,0,7,6>: Cost 4 vsldoi12 <7,6,7,0>, <0,7,6,7> + 2303658616U, // <0,0,7,7>: Cost 3 vmrglw <3,6,0,7>, <3,6,0,7> + 2726163439U, // <0,0,7,u>: Cost 3 vsldoi8 <7,0,0,0>, <7,0,0,0> + 202162278U, // <0,0,u,0>: Cost 1 vspltisw0 LHS + 72589414U, // <0,0,u,1>: Cost 1 vmrghw LHS, LHS + 1685242525U, // <0,0,u,2>: Cost 2 vsldoi12 <1,2,3,0>, LHS + 2220073212U, // <0,0,u,3>: Cost 3 vmrghw LHS, <0,3,1,0> + 1146331474U, // <0,0,u,4>: Cost 2 vmrghw LHS, <0,4,1,5> + 1610619034U, // <0,0,u,5>: Cost 2 vsldoi8 <0,0,0,0>, RHS + 2785821138U, // <0,0,u,6>: Cost 3 vsldoi12 <5,6,7,0>, <0,5,6,7> + 2659120119U, // <0,0,u,7>: Cost 3 vsldoi4 <7,0,0,u>, <7,0,0,u> + 72589981U, // <0,0,u,u>: Cost 1 vmrghw LHS, LHS + 2698297344U, // <0,1,0,0>: Cost 3 vsldoi8 <2,3,0,1>, <0,0,0,0> + 1624555622U, // <0,1,0,1>: Cost 2 vsldoi8 <2,3,0,1>, LHS + 2758984428U, // <0,1,0,2>: Cost 3 vsldoi12 <1,2,3,0>, <1,0,2,1> + 2635237524U, // <0,1,0,3>: Cost 3 vsldoi4 <3,0,1,0>, <3,0,1,0> + 2693652818U, // <0,1,0,4>: Cost 3 vsldoi8 <1,5,0,1>, <0,4,1,5> + 2281701714U, // <0,1,0,5>: Cost 3 vmrglw <0,0,0,0>, <0,4,1,5> + 2698297846U, // <0,1,0,6>: Cost 3 vsldoi8 <2,3,0,1>, <0,6,1,7> + 2659128312U, // <0,1,0,7>: Cost 3 vsldoi4 <7,0,1,0>, <7,0,1,0> + 1624556189U, // <0,1,0,u>: Cost 2 vsldoi8 <2,3,0,1>, LHS + 1543585802U, // <0,1,1,0>: Cost 2 vsldoi4 <0,0,1,1>, <0,0,1,1> + 1141728052U, // <0,1,1,1>: Cost 2 vmrghw LHS, <1,1,1,1> + 1141728150U, // <0,1,1,2>: Cost 2 vmrghw LHS, <1,2,3,0> + 2295644334U, // <0,1,1,3>: Cost 3 vmrglw <2,3,0,1>, <0,2,1,3> + 1543589174U, // <0,1,1,4>: Cost 2 vsldoi4 <0,0,1,1>, RHS + 2290999634U, // <0,1,1,5>: Cost 3 vmrglw <1,5,0,1>, <0,4,1,5> + 2617332135U, // <0,1,1,6>: Cost 3 vsldoi4 <0,0,1,1>, <6,1,7,1> + 2617332720U, // <0,1,1,7>: Cost 3 vsldoi4 <0,0,1,1>, <7,0,0,1> + 1142171004U, // <0,1,1,u>: Cost 2 vmrghw LHS, <1,u,3,0> + 1561509990U, // <0,1,2,0>: Cost 2 vsldoi4 <3,0,1,2>, LHS + 2623308516U, // <0,1,2,1>: Cost 3 vsldoi4 <1,0,1,2>, <1,0,1,2> + 2698298984U, // <0,1,2,2>: Cost 3 vsldoi8 <2,3,0,1>, <2,2,2,2> + 835584U, // <0,1,2,3>: Cost 0 copy LHS + 1561513270U, // <0,1,2,4>: Cost 2 vsldoi4 <3,0,1,2>, RHS + 2647199304U, // <0,1,2,5>: Cost 3 vsldoi4 <5,0,1,2>, <5,0,1,2> + 2698299322U, // <0,1,2,6>: Cost 3 vsldoi8 <2,3,0,1>, <2,6,3,7> + 1585402874U, // <0,1,2,7>: Cost 2 vsldoi4 <7,0,1,2>, <7,0,1,2> + 835584U, // <0,1,2,u>: Cost 0 copy LHS + 2698299540U, // <0,1,3,0>: Cost 3 vsldoi8 <2,3,0,1>, <3,0,1,0> + 3290399540U, // <0,1,3,1>: Cost 4 vmrghw <0,3,1,0>, <1,1,1,1> + 2698299720U, // <0,1,3,2>: Cost 3 vsldoi8 <2,3,0,1>, <3,2,3,0> + 2698299804U, // <0,1,3,3>: Cost 3 vsldoi8 <2,3,0,1>, <3,3,3,3> + 2698299906U, // <0,1,3,4>: Cost 3 vsldoi8 <2,3,0,1>, <3,4,5,6> + 3832726521U, // <0,1,3,5>: Cost 4 vsldoi12 <1,2,3,0>, <1,3,5,0> + 2724842160U, // <0,1,3,6>: Cost 3 vsldoi8 <6,7,0,1>, <3,6,7,0> + 2706926275U, // <0,1,3,7>: Cost 3 vsldoi8 <3,7,0,1>, <3,7,0,1> + 2698300190U, // <0,1,3,u>: Cost 3 vsldoi8 <2,3,0,1>, <3,u,1,2> + 2635268198U, // <0,1,4,0>: Cost 3 vsldoi4 <3,0,1,4>, LHS + 2217362228U, // <0,1,4,1>: Cost 3 vmrghw <0,4,1,5>, <1,1,1,1> + 2217362326U, // <0,1,4,2>: Cost 3 vmrghw <0,4,1,5>, <1,2,3,0> + 2635270296U, // <0,1,4,3>: Cost 3 vsldoi4 <3,0,1,4>, <3,0,1,4> + 2635271478U, // <0,1,4,4>: Cost 3 vsldoi4 <3,0,1,4>, RHS + 1624558902U, // <0,1,4,5>: Cost 2 vsldoi8 <2,3,0,1>, RHS + 2659160910U, // <0,1,4,6>: Cost 3 vsldoi4 <7,0,1,4>, <6,7,0,1> + 2659161084U, // <0,1,4,7>: Cost 3 vsldoi4 <7,0,1,4>, <7,0,1,4> + 1624559145U, // <0,1,4,u>: Cost 2 vsldoi8 <2,3,0,1>, RHS + 3832726639U, // <0,1,5,0>: Cost 4 vsldoi12 <1,2,3,0>, <1,5,0,1> + 2714889871U, // <0,1,5,1>: Cost 3 vsldoi8 <5,1,0,1>, <5,1,0,1> + 2302314646U, // <0,1,5,2>: Cost 3 vmrglw <3,4,0,5>, <3,0,1,2> + 3834717321U, // <0,1,5,3>: Cost 4 vsldoi12 <1,5,3,0>, <1,5,3,0> + 3832726679U, // <0,1,5,4>: Cost 4 vsldoi12 <1,2,3,0>, <1,5,4,5> + 2717544403U, // <0,1,5,5>: Cost 3 vsldoi8 <5,5,0,1>, <5,5,0,1> + 2718208036U, // <0,1,5,6>: Cost 3 vsldoi8 <5,6,0,1>, <5,6,0,1> + 3792613493U, // <0,1,5,7>: Cost 4 vsldoi8 <5,7,0,1>, <5,7,0,1> + 2719535302U, // <0,1,5,u>: Cost 3 vsldoi8 <5,u,0,1>, <5,u,0,1> + 2659172454U, // <0,1,6,0>: Cost 3 vsldoi4 <7,0,1,6>, LHS + 3832726735U, // <0,1,6,1>: Cost 4 vsldoi12 <1,2,3,0>, <1,6,1,7> + 2724844026U, // <0,1,6,2>: Cost 3 vsldoi8 <6,7,0,1>, <6,2,7,3> + 3775361608U, // <0,1,6,3>: Cost 4 vsldoi8 <2,u,0,1>, <6,3,7,0> + 2659175734U, // <0,1,6,4>: Cost 3 vsldoi4 <7,0,1,6>, RHS + 3832726771U, // <0,1,6,5>: Cost 4 vsldoi12 <1,2,3,0>, <1,6,5,7> + 2724844344U, // <0,1,6,6>: Cost 3 vsldoi8 <6,7,0,1>, <6,6,6,6> + 1651102542U, // <0,1,6,7>: Cost 2 vsldoi8 <6,7,0,1>, <6,7,0,1> + 1651766175U, // <0,1,6,u>: Cost 2 vsldoi8 <6,u,0,1>, <6,u,0,1> + 2724844536U, // <0,1,7,0>: Cost 3 vsldoi8 <6,7,0,1>, <7,0,1,0> + 3377397770U, // <0,1,7,1>: Cost 4 vmrglw <3,6,0,7>, <0,0,1,1> + 2698302636U, // <0,1,7,2>: Cost 3 vsldoi8 <2,3,0,1>, <7,2,3,0> + 2728162531U, // <0,1,7,3>: Cost 3 vsldoi8 <7,3,0,1>, <7,3,0,1> + 2724844902U, // <0,1,7,4>: Cost 3 vsldoi8 <6,7,0,1>, <7,4,5,6> + 3377398098U, // <0,1,7,5>: Cost 4 vmrglw <3,6,0,7>, <0,4,1,5> + 2724845076U, // <0,1,7,6>: Cost 3 vsldoi8 <6,7,0,1>, <7,6,7,0> + 2724845164U, // <0,1,7,7>: Cost 3 vsldoi8 <6,7,0,1>, <7,7,7,7> + 2724845186U, // <0,1,7,u>: Cost 3 vsldoi8 <6,7,0,1>, <7,u,1,2> + 1561559142U, // <0,1,u,0>: Cost 2 vsldoi4 <3,0,1,u>, LHS + 1146331956U, // <0,1,u,1>: Cost 2 vmrghw LHS, <1,1,1,1> + 1146332054U, // <0,1,u,2>: Cost 2 vmrghw LHS, <1,2,3,0> + 835584U, // <0,1,u,3>: Cost 0 copy LHS + 1561562422U, // <0,1,u,4>: Cost 2 vsldoi4 <3,0,1,u>, RHS + 1624561818U, // <0,1,u,5>: Cost 2 vsldoi8 <2,3,0,1>, RHS + 2220074191U, // <0,1,u,6>: Cost 3 vmrghw LHS, <1,6,1,7> + 1585452032U, // <0,1,u,7>: Cost 2 vsldoi4 <7,0,1,u>, <7,0,1,u> + 835584U, // <0,1,u,u>: Cost 0 copy LHS + 2214593997U, // <0,2,0,0>: Cost 3 vmrghw <0,0,0,0>, <2,0,3,0> + 2214675999U, // <0,2,0,1>: Cost 3 vmrghw <0,0,1,1>, <2,1,3,1> + 2214594152U, // <0,2,0,2>: Cost 3 vmrghw <0,0,0,0>, <2,2,2,2> + 1207959654U, // <0,2,0,3>: Cost 2 vmrglw <0,0,0,0>, LHS + 3709054262U, // <0,2,0,4>: Cost 4 vsldoi4 <3,0,2,0>, RHS + 3375350836U, // <0,2,0,5>: Cost 4 vmrglw <3,3,0,0>, <1,4,2,5> + 2214594490U, // <0,2,0,6>: Cost 3 vmrghw <0,0,0,0>, <2,6,3,7> + 3288336362U, // <0,2,0,7>: Cost 4 vmrghw <0,0,0,0>, <2,7,0,1> + 1207959659U, // <0,2,0,u>: Cost 2 vmrglw <0,0,0,0>, LHS + 2215871994U, // <0,2,1,0>: Cost 3 vmrghw LHS, <2,0,u,0> + 2215470623U, // <0,2,1,1>: Cost 3 vmrghw LHS, <2,1,3,1> + 1141728872U, // <0,2,1,2>: Cost 2 vmrghw LHS, <2,2,2,2> + 1141728934U, // <0,2,1,3>: Cost 2 vmrghw LHS, <2,3,0,1> + 2215872323U, // <0,2,1,4>: Cost 3 vmrghw LHS, <2,4,u,5> + 2215872405U, // <0,2,1,5>: Cost 3 vmrghw LHS, <2,5,u,6> + 1141729210U, // <0,2,1,6>: Cost 2 vmrghw LHS, <2,6,3,7> + 2215430122U, // <0,2,1,7>: Cost 3 vmrghw LHS, <2,7,0,1> + 1141729368U, // <0,2,1,u>: Cost 2 vmrghw LHS, <2,u,3,3> + 3289736698U, // <0,2,2,0>: Cost 4 vmrghw <0,2,1,0>, <2,0,u,0> + 3289744927U, // <0,2,2,1>: Cost 4 vmrghw <0,2,1,1>, <2,1,3,1> + 2216011368U, // <0,2,2,2>: Cost 3 vmrghw <0,2,1,2>, <2,2,2,2> + 2216019622U, // <0,2,2,3>: Cost 3 vmrghw <0,2,1,3>, <2,3,0,1> + 3289769795U, // <0,2,2,4>: Cost 4 vmrghw <0,2,1,4>, <2,4,u,5> + 3289778069U, // <0,2,2,5>: Cost 4 vmrghw <0,2,1,5>, <2,5,u,6> + 2216044474U, // <0,2,2,6>: Cost 3 vmrghw <0,2,1,6>, <2,6,3,7> + 3732960259U, // <0,2,2,7>: Cost 4 vsldoi4 <7,0,2,2>, <7,0,2,2> + 2216061016U, // <0,2,2,u>: Cost 3 vmrghw <0,2,1,u>, <2,u,3,3> + 2758985382U, // <0,2,3,0>: Cost 3 vsldoi12 <1,2,3,0>, <2,3,0,1> + 2758985392U, // <0,2,3,1>: Cost 3 vsldoi12 <1,2,3,0>, <2,3,1,2> + 3290400360U, // <0,2,3,2>: Cost 4 vmrghw <0,3,1,0>, <2,2,2,2> + 2758985408U, // <0,2,3,3>: Cost 3 vsldoi12 <1,2,3,0>, <2,3,3,0> + 2758985422U, // <0,2,3,4>: Cost 3 vsldoi12 <1,2,3,0>, <2,3,4,5> + 2785822424U, // <0,2,3,5>: Cost 3 vsldoi12 <5,6,7,0>, <2,3,5,6> + 3290400698U, // <0,2,3,6>: Cost 4 vmrghw <0,3,1,0>, <2,6,3,7> + 2765915876U, // <0,2,3,7>: Cost 3 vsldoi12 <2,3,7,0>, <2,3,7,0> + 2758985453U, // <0,2,3,u>: Cost 3 vsldoi12 <1,2,3,0>, <2,3,u,0> + 3291104762U, // <0,2,4,0>: Cost 4 vmrghw <0,4,1,5>, <2,0,u,0> + 2217362979U, // <0,2,4,1>: Cost 3 vmrghw <0,4,1,5>, <2,1,3,5> + 2217363048U, // <0,2,4,2>: Cost 3 vmrghw <0,4,1,5>, <2,2,2,2> + 2217363110U, // <0,2,4,3>: Cost 3 vmrghw <0,4,1,5>, <2,3,0,1> + 3291105087U, // <0,2,4,4>: Cost 4 vmrghw <0,4,1,5>, <2,4,u,1> + 3291105173U, // <0,2,4,5>: Cost 4 vmrghw <0,4,1,5>, <2,5,u,6> + 2217363386U, // <0,2,4,6>: Cost 3 vmrghw <0,4,1,5>, <2,6,3,7> + 3788639688U, // <0,2,4,7>: Cost 4 vsldoi8 <5,1,0,2>, <4,7,5,0> + 2217363515U, // <0,2,4,u>: Cost 3 vmrghw <0,4,1,5>, <2,u,0,1> + 3376054371U, // <0,2,5,0>: Cost 4 vmrglw <3,4,0,5>, <0,1,2,0> + 3788639888U, // <0,2,5,1>: Cost 4 vsldoi8 <5,1,0,2>, <5,1,0,2> + 3376055912U, // <0,2,5,2>: Cost 4 vmrglw <3,4,0,5>, <2,2,2,2> + 2302312550U, // <0,2,5,3>: Cost 3 vmrglw <3,4,0,5>, LHS + 3376054375U, // <0,2,5,4>: Cost 4 vmrglw <3,4,0,5>, <0,1,2,4> + 3374728244U, // <0,2,5,5>: Cost 4 vmrglw <3,2,0,5>, <1,4,2,5> + 3805229154U, // <0,2,5,6>: Cost 4 vsldoi8 <7,u,0,2>, <5,6,7,0> + 3376055512U, // <0,2,5,7>: Cost 4 vmrglw <3,4,0,5>, <1,6,2,7> + 2302312555U, // <0,2,5,u>: Cost 3 vmrglw <3,4,0,5>, LHS + 3709100134U, // <0,2,6,0>: Cost 4 vsldoi4 <3,0,2,6>, LHS + 3709100950U, // <0,2,6,1>: Cost 4 vsldoi4 <3,0,2,6>, <1,2,3,0> + 3709102010U, // <0,2,6,2>: Cost 4 vsldoi4 <3,0,2,6>, <2,6,3,7> + 2758985658U, // <0,2,6,3>: Cost 3 vsldoi12 <1,2,3,0>, <2,6,3,7> + 3709103414U, // <0,2,6,4>: Cost 4 vsldoi4 <3,0,2,6>, RHS + 3732992098U, // <0,2,6,5>: Cost 4 vsldoi4 <7,0,2,6>, <5,6,7,0> + 3292374970U, // <0,2,6,6>: Cost 4 vmrghw <0,6,0,7>, <2,6,3,7> + 3798594383U, // <0,2,6,7>: Cost 4 vsldoi8 <6,7,0,2>, <6,7,0,2> + 2758985703U, // <0,2,6,u>: Cost 3 vsldoi12 <1,2,3,0>, <2,6,u,7> + 3788641274U, // <0,2,7,0>: Cost 4 vsldoi8 <5,1,0,2>, <7,0,1,2> + 3377398508U, // <0,2,7,1>: Cost 4 vmrglw <3,6,0,7>, <1,0,2,1> + 3377398590U, // <0,2,7,2>: Cost 4 vmrglw <3,6,0,7>, <1,1,2,2> + 2303656038U, // <0,2,7,3>: Cost 3 vmrglw <3,6,0,7>, LHS + 3709111606U, // <0,2,7,4>: Cost 4 vsldoi4 <3,0,2,7>, RHS + 3377398836U, // <0,2,7,5>: Cost 4 vmrglw <3,6,0,7>, <1,4,2,5> + 3803903447U, // <0,2,7,6>: Cost 4 vsldoi8 <7,6,0,2>, <7,6,0,2> + 3293054954U, // <0,2,7,7>: Cost 4 vmrghw <0,7,1,0>, <2,7,0,1> + 2303656043U, // <0,2,7,u>: Cost 3 vmrglw <3,6,0,7>, LHS + 2220074490U, // <0,2,u,0>: Cost 3 vmrghw LHS, <2,0,u,0> + 2220074527U, // <0,2,u,1>: Cost 3 vmrghw LHS, <2,1,3,1> + 1146332776U, // <0,2,u,2>: Cost 2 vmrghw LHS, <2,2,2,2> + 1146332838U, // <0,2,u,3>: Cost 2 vmrghw LHS, <2,3,0,1> + 2220074819U, // <0,2,u,4>: Cost 3 vmrghw LHS, <2,4,u,5> + 2220074901U, // <0,2,u,5>: Cost 3 vmrghw LHS, <2,5,u,6> + 1146333114U, // <0,2,u,6>: Cost 2 vmrghw LHS, <2,6,3,7> + 2220074986U, // <0,2,u,7>: Cost 3 vmrghw LHS, <2,7,0,1> + 1146333243U, // <0,2,u,u>: Cost 2 vmrghw LHS, <2,u,0,1> + 2629410816U, // <0,3,0,0>: Cost 3 vsldoi4 <2,0,3,0>, <0,0,0,0> + 2753530006U, // <0,3,0,1>: Cost 3 vsldoi12 <0,3,1,0>, <3,0,1,2> + 2629412301U, // <0,3,0,2>: Cost 3 vsldoi4 <2,0,3,0>, <2,0,3,0> + 2214594972U, // <0,3,0,3>: Cost 3 vmrghw <0,0,0,0>, <3,3,3,3> + 2758985908U, // <0,3,0,4>: Cost 3 vsldoi12 <1,2,3,0>, <3,0,4,5> + 3733016674U, // <0,3,0,5>: Cost 4 vsldoi4 <7,0,3,0>, <5,6,7,0> + 3777364488U, // <0,3,0,6>: Cost 4 vsldoi8 <3,2,0,3>, <0,6,3,7> + 2281703354U, // <0,3,0,7>: Cost 3 vmrglw <0,0,0,0>, <2,6,3,7> + 2758985941U, // <0,3,0,u>: Cost 3 vsldoi12 <1,2,3,0>, <3,0,u,2> + 1141729430U, // <0,3,1,0>: Cost 2 vmrghw LHS, <3,0,1,2> + 2215471334U, // <0,3,1,1>: Cost 3 vmrghw LHS, <3,1,1,1> + 2215471425U, // <0,3,1,2>: Cost 3 vmrghw LHS, <3,2,2,2> + 1141729692U, // <0,3,1,3>: Cost 2 vmrghw LHS, <3,3,3,3> + 1141729794U, // <0,3,1,4>: Cost 2 vmrghw LHS, <3,4,5,6> + 2215430738U, // <0,3,1,5>: Cost 3 vmrghw LHS, <3,5,5,5> + 2215430776U, // <0,3,1,6>: Cost 3 vmrghw LHS, <3,6,0,7> + 2295646138U, // <0,3,1,7>: Cost 3 vmrglw <2,3,0,1>, <2,6,3,7> + 1141730078U, // <0,3,1,u>: Cost 2 vmrghw LHS, <3,u,1,2> + 2758986032U, // <0,3,2,0>: Cost 3 vsldoi12 <1,2,3,0>, <3,2,0,3> + 3709141910U, // <0,3,2,1>: Cost 4 vsldoi4 <3,0,3,2>, <1,2,3,0> + 3289753921U, // <0,3,2,2>: Cost 4 vmrghw <0,2,1,2>, <3,2,2,2> + 2770929992U, // <0,3,2,3>: Cost 3 vsldoi12 <3,2,3,0>, <3,2,3,0> + 3289754114U, // <0,3,2,4>: Cost 4 vmrghw <0,2,1,2>, <3,4,5,6> + 3362095460U, // <0,3,2,5>: Cost 5 vmrglw <1,1,0,2>, <0,4,3,5> + 3832727910U, // <0,3,2,6>: Cost 4 vsldoi12 <1,2,3,0>, <3,2,6,3> + 3365414842U, // <0,3,2,7>: Cost 4 vmrglw <1,6,0,2>, <2,6,3,7> + 2771298677U, // <0,3,2,u>: Cost 3 vsldoi12 <3,2,u,0>, <3,2,u,0> + 2216659094U, // <0,3,3,0>: Cost 3 vmrghw <0,3,1,0>, <3,0,1,2> + 3290409190U, // <0,3,3,1>: Cost 4 vmrghw <0,3,1,1>, <3,1,1,1> + 2703624496U, // <0,3,3,2>: Cost 3 vsldoi8 <3,2,0,3>, <3,2,0,3> + 2216683932U, // <0,3,3,3>: Cost 3 vmrghw <0,3,1,3>, <3,3,3,3> + 2216692226U, // <0,3,3,4>: Cost 3 vmrghw <0,3,1,4>, <3,4,5,6> + 3733041250U, // <0,3,3,5>: Cost 4 vsldoi4 <7,0,3,3>, <5,6,7,0> + 3832727988U, // <0,3,3,6>: Cost 4 vsldoi12 <1,2,3,0>, <3,3,6,0> + 3374712762U, // <0,3,3,7>: Cost 4 vmrglw <3,2,0,3>, <2,6,3,7> + 2216725278U, // <0,3,3,u>: Cost 3 vmrghw <0,3,1,u>, <3,u,1,2> + 2217363606U, // <0,3,4,0>: Cost 3 vmrghw <0,4,1,5>, <3,0,1,2> + 3291105510U, // <0,3,4,1>: Cost 4 vmrghw <0,4,1,5>, <3,1,1,1> + 3291105601U, // <0,3,4,2>: Cost 4 vmrghw <0,4,1,5>, <3,2,2,2> + 2217363868U, // <0,3,4,3>: Cost 3 vmrghw <0,4,1,5>, <3,3,3,3> + 2217363970U, // <0,3,4,4>: Cost 3 vmrghw <0,4,1,5>, <3,4,5,6> + 2758986242U, // <0,3,4,5>: Cost 3 vsldoi12 <1,2,3,0>, <3,4,5,6> + 3727077685U, // <0,3,4,6>: Cost 4 vsldoi4 <6,0,3,4>, <6,0,3,4> + 3364767674U, // <0,3,4,7>: Cost 4 vmrglw <1,5,0,4>, <2,6,3,7> + 2217364254U, // <0,3,4,u>: Cost 3 vmrghw <0,4,1,5>, <3,u,1,2> + 3832728102U, // <0,3,5,0>: Cost 4 vsldoi12 <1,2,3,0>, <3,5,0,6> + 3405916003U, // <0,3,5,1>: Cost 4 vmrglw , <2,5,3,1> + 3376055840U, // <0,3,5,2>: Cost 4 vmrglw <3,4,0,5>, <2,1,3,2> + 3376055679U, // <0,3,5,3>: Cost 4 vmrglw <3,4,0,5>, <1,u,3,3> + 3376055194U, // <0,3,5,4>: Cost 4 vmrglw <3,4,0,5>, <1,2,3,4> + 3859565138U, // <0,3,5,5>: Cost 4 vsldoi12 <5,6,7,0>, <3,5,5,5> + 2727514210U, // <0,3,5,6>: Cost 3 vsldoi8 <7,2,0,3>, <5,6,7,0> + 3376056250U, // <0,3,5,7>: Cost 4 vmrglw <3,4,0,5>, <2,6,3,7> + 2727514210U, // <0,3,5,u>: Cost 3 vsldoi8 <7,2,0,3>, <5,6,7,0> + 2758986360U, // <0,3,6,0>: Cost 3 vsldoi12 <1,2,3,0>, <3,6,0,7> + 3709174678U, // <0,3,6,1>: Cost 4 vsldoi4 <3,0,3,6>, <1,2,3,0> + 3795284411U, // <0,3,6,2>: Cost 4 vsldoi8 <6,2,0,3>, <6,2,0,3> + 3709175980U, // <0,3,6,3>: Cost 4 vsldoi4 <3,0,3,6>, <3,0,3,6> + 3833096860U, // <0,3,6,4>: Cost 4 vsldoi12 <1,2,u,0>, <3,6,4,7> + 3376728235U, // <0,3,6,5>: Cost 5 vmrglw <3,5,0,6>, <3,0,3,5> + 3859565229U, // <0,3,6,6>: Cost 4 vsldoi12 <5,6,7,0>, <3,6,6,6> + 2773879472U, // <0,3,6,7>: Cost 3 vsldoi12 <3,6,7,0>, <3,6,7,0> + 2758986360U, // <0,3,6,u>: Cost 3 vsldoi12 <1,2,3,0>, <3,6,0,7> + 2303656854U, // <0,3,7,0>: Cost 3 vmrglw <3,6,0,7>, <1,2,3,0> + 3807229018U, // <0,3,7,1>: Cost 4 vsldoi8 , <7,1,2,u> + 2727515284U, // <0,3,7,2>: Cost 3 vsldoi8 <7,2,0,3>, <7,2,0,3> + 3377399410U, // <0,3,7,3>: Cost 4 vmrglw <3,6,0,7>, <2,2,3,3> + 3377398682U, // <0,3,7,4>: Cost 4 vmrglw <3,6,0,7>, <1,2,3,4> + 3801257409U, // <0,3,7,5>: Cost 4 vsldoi8 <7,2,0,3>, <7,5,6,7> + 3377399980U, // <0,3,7,6>: Cost 4 vmrglw <3,6,0,7>, <3,0,3,6> + 3375409082U, // <0,3,7,7>: Cost 4 vmrglw <3,3,0,7>, <2,6,3,7> + 2731497082U, // <0,3,7,u>: Cost 3 vsldoi8 <7,u,0,3>, <7,u,0,3> + 1146333334U, // <0,3,u,0>: Cost 2 vmrghw LHS, <3,0,1,2> + 2220075238U, // <0,3,u,1>: Cost 3 vmrghw LHS, <3,1,1,1> + 2220075329U, // <0,3,u,2>: Cost 3 vmrghw LHS, <3,2,2,2> + 1146333596U, // <0,3,u,3>: Cost 2 vmrghw LHS, <3,3,3,3> + 1146333698U, // <0,3,u,4>: Cost 2 vmrghw LHS, <3,4,5,6> + 2758986566U, // <0,3,u,5>: Cost 3 vsldoi12 <1,2,3,0>, <3,u,5,6> + 2803739472U, // <0,3,u,6>: Cost 3 vsldoi12 , <3,u,6,7> + 2295703482U, // <0,3,u,7>: Cost 3 vmrglw <2,3,0,u>, <2,6,3,7> + 1146333982U, // <0,3,u,u>: Cost 2 vmrghw LHS, <3,u,1,2> + 2214595473U, // <0,4,0,0>: Cost 3 vmrghw <0,0,0,0>, <4,0,5,0> + 2693677158U, // <0,4,0,1>: Cost 3 vsldoi8 <1,5,0,4>, LHS + 3839437689U, // <0,4,0,2>: Cost 4 vsldoi12 <2,3,4,0>, <4,0,2,3> + 3709200559U, // <0,4,0,3>: Cost 4 vsldoi4 <3,0,4,0>, <3,0,4,0> + 2693677394U, // <0,4,0,4>: Cost 3 vsldoi8 <1,5,0,4>, <0,4,1,5> + 1140854070U, // <0,4,0,5>: Cost 2 vmrghw <0,0,0,0>, RHS + 3767419409U, // <0,4,0,6>: Cost 4 vsldoi8 <1,5,0,4>, <0,6,4,7> + 3854109604U, // <0,4,0,7>: Cost 4 vsldoi12 <4,7,5,0>, <4,0,7,1> + 1140854313U, // <0,4,0,u>: Cost 2 vmrghw <0,0,0,0>, RHS + 1141689234U, // <0,4,1,0>: Cost 2 vmrghw LHS, <4,0,5,1> + 2215431114U, // <0,4,1,1>: Cost 3 vmrghw LHS, <4,1,2,3> + 2215431221U, // <0,4,1,2>: Cost 3 vmrghw LHS, <4,2,5,2> + 2635466928U, // <0,4,1,3>: Cost 3 vsldoi4 <3,0,4,1>, <3,0,4,1> + 1141689552U, // <0,4,1,4>: Cost 2 vmrghw LHS, <4,4,4,4> + 67947830U, // <0,4,1,5>: Cost 1 vmrghw LHS, RHS + 2215431545U, // <0,4,1,6>: Cost 3 vmrghw LHS, <4,6,5,2> + 2659357716U, // <0,4,1,7>: Cost 3 vsldoi4 <7,0,4,1>, <7,0,4,1> + 67948073U, // <0,4,1,u>: Cost 1 vmrghw LHS, RHS + 3767420369U, // <0,4,2,0>: Cost 4 vsldoi8 <1,5,0,4>, <2,0,3,4> + 3767420451U, // <0,4,2,1>: Cost 4 vsldoi8 <1,5,0,4>, <2,1,3,5> + 3767420520U, // <0,4,2,2>: Cost 4 vsldoi8 <1,5,0,4>, <2,2,2,2> + 2698323625U, // <0,4,2,3>: Cost 3 vsldoi8 <2,3,0,4>, <2,3,0,4> + 3709218102U, // <0,4,2,4>: Cost 4 vsldoi4 <3,0,4,2>, RHS + 2216013110U, // <0,4,2,5>: Cost 3 vmrghw <0,2,1,2>, RHS + 3767420858U, // <0,4,2,6>: Cost 4 vsldoi8 <1,5,0,4>, <2,6,3,7> + 3774719981U, // <0,4,2,7>: Cost 4 vsldoi8 <2,7,0,4>, <2,7,0,4> + 2216013353U, // <0,4,2,u>: Cost 3 vmrghw <0,2,1,2>, RHS + 3767421078U, // <0,4,3,0>: Cost 4 vsldoi8 <1,5,0,4>, <3,0,1,2> + 3776710880U, // <0,4,3,1>: Cost 4 vsldoi8 <3,1,0,4>, <3,1,0,4> + 3833097325U, // <0,4,3,2>: Cost 5 vsldoi12 <1,2,u,0>, <4,3,2,4> + 3767421340U, // <0,4,3,3>: Cost 4 vsldoi8 <1,5,0,4>, <3,3,3,3> + 3767421442U, // <0,4,3,4>: Cost 4 vsldoi8 <1,5,0,4>, <3,4,5,6> + 2216660278U, // <0,4,3,5>: Cost 3 vmrghw <0,3,1,0>, RHS + 3833097361U, // <0,4,3,6>: Cost 5 vsldoi12 <1,2,u,0>, <4,3,6,4> + 3780692678U, // <0,4,3,7>: Cost 4 vsldoi8 <3,7,0,4>, <3,7,0,4> + 2216660521U, // <0,4,3,u>: Cost 3 vmrghw <0,3,1,0>, RHS + 2617573416U, // <0,4,4,0>: Cost 3 vsldoi4 <0,0,4,4>, <0,0,4,4> + 2217364450U, // <0,4,4,1>: Cost 3 vmrghw <0,4,1,5>, <4,1,5,0> + 3691316771U, // <0,4,4,2>: Cost 4 vsldoi4 <0,0,4,4>, <2,1,3,5> + 3709233331U, // <0,4,4,3>: Cost 4 vsldoi4 <3,0,4,4>, <3,0,4,4> + 2785823952U, // <0,4,4,4>: Cost 3 vsldoi12 <5,6,7,0>, <4,4,4,4> + 1143622966U, // <0,4,4,5>: Cost 2 vmrghw <0,4,1,5>, RHS + 3691319723U, // <0,4,4,6>: Cost 4 vsldoi4 <0,0,4,4>, <6,1,7,5> + 3854109932U, // <0,4,4,7>: Cost 4 vsldoi12 <4,7,5,0>, <4,4,7,5> + 1143623209U, // <0,4,4,u>: Cost 2 vmrghw <0,4,1,5>, RHS + 2635497574U, // <0,4,5,0>: Cost 3 vsldoi4 <3,0,4,5>, LHS + 2635498390U, // <0,4,5,1>: Cost 3 vsldoi4 <3,0,4,5>, <1,2,3,0> + 3709240936U, // <0,4,5,2>: Cost 4 vsldoi4 <3,0,4,5>, <2,2,2,2> + 2635499700U, // <0,4,5,3>: Cost 3 vsldoi4 <3,0,4,5>, <3,0,4,5> + 2635500854U, // <0,4,5,4>: Cost 3 vsldoi4 <3,0,4,5>, RHS + 2785824044U, // <0,4,5,5>: Cost 3 vsldoi12 <5,6,7,0>, <4,5,5,6> + 1685245238U, // <0,4,5,6>: Cost 2 vsldoi12 <1,2,3,0>, RHS + 2659390488U, // <0,4,5,7>: Cost 3 vsldoi4 <7,0,4,5>, <7,0,4,5> + 1685245256U, // <0,4,5,u>: Cost 2 vsldoi12 <1,2,3,0>, RHS + 3839438161U, // <0,4,6,0>: Cost 4 vsldoi12 <2,3,4,0>, <4,6,0,7> + 3798610347U, // <0,4,6,1>: Cost 4 vsldoi8 <6,7,0,4>, <6,1,7,5> + 3798610426U, // <0,4,6,2>: Cost 4 vsldoi8 <6,7,0,4>, <6,2,7,3> + 3795956237U, // <0,4,6,3>: Cost 4 vsldoi8 <6,3,0,4>, <6,3,0,4> + 3733138742U, // <0,4,6,4>: Cost 4 vsldoi4 <7,0,4,6>, RHS + 2218634550U, // <0,4,6,5>: Cost 3 vmrghw <0,6,0,7>, RHS + 3798610744U, // <0,4,6,6>: Cost 4 vsldoi8 <6,7,0,4>, <6,6,6,6> + 2724868945U, // <0,4,6,7>: Cost 3 vsldoi8 <6,7,0,4>, <6,7,0,4> + 2725532578U, // <0,4,6,u>: Cost 3 vsldoi8 <6,u,0,4>, <6,u,0,4> + 3383371465U, // <0,4,7,0>: Cost 4 vmrglw <4,6,0,7>, <2,3,4,0> + 3800601668U, // <0,4,7,1>: Cost 4 vsldoi8 <7,1,0,4>, <7,1,0,4> + 3775386826U, // <0,4,7,2>: Cost 5 vsldoi8 <2,u,0,4>, <7,2,6,3> + 3801928934U, // <0,4,7,3>: Cost 4 vsldoi8 <7,3,0,4>, <7,3,0,4> + 3721202998U, // <0,4,7,4>: Cost 4 vsldoi4 <5,0,4,7>, RHS + 2780368328U, // <0,4,7,5>: Cost 3 vsldoi12 <4,7,5,0>, <4,7,5,0> + 3383372686U, // <0,4,7,6>: Cost 5 vmrglw <4,6,0,7>, <4,0,4,6> + 3854110170U, // <0,4,7,7>: Cost 4 vsldoi12 <4,7,5,0>, <4,7,7,0> + 2780368328U, // <0,4,7,u>: Cost 3 vsldoi12 <4,7,5,0>, <4,7,5,0> + 1146334098U, // <0,4,u,0>: Cost 2 vmrghw LHS, <4,0,5,1> + 2220076002U, // <0,4,u,1>: Cost 3 vmrghw LHS, <4,1,5,0> + 2220076085U, // <0,4,u,2>: Cost 3 vmrghw LHS, <4,2,5,2> + 2635524279U, // <0,4,u,3>: Cost 3 vsldoi4 <3,0,4,u>, <3,0,4,u> + 1146334416U, // <0,4,u,4>: Cost 2 vmrghw LHS, <4,4,4,4> + 72592694U, // <0,4,u,5>: Cost 1 vmrghw LHS, RHS + 1685245481U, // <0,4,u,6>: Cost 2 vsldoi12 <1,2,3,0>, RHS + 2659415067U, // <0,4,u,7>: Cost 3 vsldoi4 <7,0,4,u>, <7,0,4,u> + 72592937U, // <0,4,u,u>: Cost 1 vmrghw LHS, RHS + 2281704337U, // <0,5,0,0>: Cost 3 vmrglw <0,0,0,0>, <4,0,5,0> + 2704965734U, // <0,5,0,1>: Cost 3 vsldoi8 <3,4,0,5>, LHS + 3778707666U, // <0,5,0,2>: Cost 4 vsldoi8 <3,4,0,5>, <0,2,5,3> + 3778707708U, // <0,5,0,3>: Cost 4 vsldoi8 <3,4,0,5>, <0,3,1,0> + 2687050057U, // <0,5,0,4>: Cost 3 vsldoi8 <0,4,0,5>, <0,4,0,5> + 2214596612U, // <0,5,0,5>: Cost 3 vmrghw <0,0,0,0>, <5,5,5,5> + 2785824372U, // <0,5,0,6>: Cost 3 vsldoi12 <5,6,7,0>, <5,0,6,1> + 3854110332U, // <0,5,0,7>: Cost 4 vsldoi12 <4,7,5,0>, <5,0,7,0> + 2704966301U, // <0,5,0,u>: Cost 3 vsldoi8 <3,4,0,5>, LHS + 1567768678U, // <0,5,1,0>: Cost 2 vsldoi4 <4,0,5,1>, LHS + 2312236570U, // <0,5,1,1>: Cost 3 vmrglw <5,1,0,1>, <4,u,5,1> + 2215431915U, // <0,5,1,2>: Cost 3 vmrghw LHS, <5,2,1,3> + 2641512598U, // <0,5,1,3>: Cost 3 vsldoi4 <4,0,5,1>, <3,0,1,2> + 1567771538U, // <0,5,1,4>: Cost 2 vsldoi4 <4,0,5,1>, <4,0,5,1> + 1141690372U, // <0,5,1,5>: Cost 2 vmrghw LHS, <5,5,5,5> + 1141690466U, // <0,5,1,6>: Cost 2 vmrghw LHS, <5,6,7,0> + 2641515514U, // <0,5,1,7>: Cost 3 vsldoi4 <4,0,5,1>, <7,0,1,2> + 1141690615U, // <0,5,1,u>: Cost 2 vmrghw LHS, <5,u,5,5> + 3772736973U, // <0,5,2,0>: Cost 4 vsldoi8 <2,4,0,5>, <2,0,3,0> + 3778709024U, // <0,5,2,1>: Cost 4 vsldoi8 <3,4,0,5>, <2,1,3,2> + 3778709096U, // <0,5,2,2>: Cost 4 vsldoi8 <3,4,0,5>, <2,2,2,2> + 3778709158U, // <0,5,2,3>: Cost 4 vsldoi8 <3,4,0,5>, <2,3,0,1> + 3772737275U, // <0,5,2,4>: Cost 4 vsldoi8 <2,4,0,5>, <2,4,0,5> + 3859566351U, // <0,5,2,5>: Cost 4 vsldoi12 <5,6,7,0>, <5,2,5,3> + 3778709434U, // <0,5,2,6>: Cost 4 vsldoi8 <3,4,0,5>, <2,6,3,7> + 3805251562U, // <0,5,2,7>: Cost 4 vsldoi8 <7,u,0,5>, <2,7,0,1> + 3775391807U, // <0,5,2,u>: Cost 4 vsldoi8 <2,u,0,5>, <2,u,0,5> + 2704967830U, // <0,5,3,0>: Cost 3 vsldoi8 <3,4,0,5>, <3,0,1,2> + 3776719073U, // <0,5,3,1>: Cost 4 vsldoi8 <3,1,0,5>, <3,1,0,5> + 3777382706U, // <0,5,3,2>: Cost 4 vsldoi8 <3,2,0,5>, <3,2,0,5> + 3778709887U, // <0,5,3,3>: Cost 4 vsldoi8 <3,4,0,5>, <3,3,0,1> + 2704968148U, // <0,5,3,4>: Cost 3 vsldoi8 <3,4,0,5>, <3,4,0,5> + 3857428317U, // <0,5,3,5>: Cost 4 vsldoi12 <5,3,5,0>, <5,3,5,0> + 3364096514U, // <0,5,3,6>: Cost 4 vmrglw <1,4,0,3>, <3,4,5,6> + 3780700871U, // <0,5,3,7>: Cost 4 vsldoi8 <3,7,0,5>, <3,7,0,5> + 2707622680U, // <0,5,3,u>: Cost 3 vsldoi8 <3,u,0,5>, <3,u,0,5> + 2728856466U, // <0,5,4,0>: Cost 3 vsldoi8 <7,4,0,5>, <4,0,5,1> + 3697361674U, // <0,5,4,1>: Cost 4 vsldoi4 <1,0,5,4>, <1,0,5,4> + 3697362601U, // <0,5,4,2>: Cost 4 vsldoi4 <1,0,5,4>, <2,3,0,4> + 3364766635U, // <0,5,4,3>: Cost 4 vmrglw <1,5,0,4>, <1,2,5,3> + 2217365428U, // <0,5,4,4>: Cost 3 vmrghw <0,4,1,5>, <5,4,5,6> + 2704969014U, // <0,5,4,5>: Cost 3 vsldoi8 <3,4,0,5>, RHS + 2785824700U, // <0,5,4,6>: Cost 3 vsldoi12 <5,6,7,0>, <5,4,6,5> + 3364766963U, // <0,5,4,7>: Cost 4 vmrglw <1,5,0,4>, <1,6,5,7> + 2704969257U, // <0,5,4,u>: Cost 3 vsldoi8 <3,4,0,5>, RHS + 3846148050U, // <0,5,5,0>: Cost 4 vsldoi12 <3,4,5,0>, <5,5,0,0> + 2326203282U, // <0,5,5,1>: Cost 3 vmrglw <7,4,0,5>, <4,0,5,1> + 3291746027U, // <0,5,5,2>: Cost 4 vmrghw <0,5,1,2>, <5,2,1,3> + 3376054482U, // <0,5,5,3>: Cost 4 vmrglw <3,4,0,5>, <0,2,5,3> + 3790655366U, // <0,5,5,4>: Cost 4 vsldoi8 <5,4,0,5>, <5,4,0,5> + 2785824772U, // <0,5,5,5>: Cost 3 vsldoi12 <5,6,7,0>, <5,5,5,5> + 2724876386U, // <0,5,5,6>: Cost 3 vsldoi8 <6,7,0,5>, <5,6,7,0> + 3858903057U, // <0,5,5,7>: Cost 4 vsldoi12 <5,5,7,0>, <5,5,7,0> + 2736820484U, // <0,5,5,u>: Cost 3 vsldoi8 , <5,u,7,0> + 2659467366U, // <0,5,6,0>: Cost 3 vsldoi4 <7,0,5,6>, LHS + 3859566643U, // <0,5,6,1>: Cost 4 vsldoi12 <5,6,7,0>, <5,6,1,7> + 3798618618U, // <0,5,6,2>: Cost 4 vsldoi8 <6,7,0,5>, <6,2,7,3> + 3852857410U, // <0,5,6,3>: Cost 4 vsldoi12 <4,5,6,0>, <5,6,3,4> + 2659470646U, // <0,5,6,4>: Cost 3 vsldoi4 <7,0,5,6>, RHS + 2659471458U, // <0,5,6,5>: Cost 3 vsldoi4 <7,0,5,6>, <5,6,7,0> + 3832729696U, // <0,5,6,6>: Cost 4 vsldoi12 <1,2,3,0>, <5,6,6,7> + 1712083042U, // <0,5,6,7>: Cost 2 vsldoi12 <5,6,7,0>, <5,6,7,0> + 1712156779U, // <0,5,6,u>: Cost 2 vsldoi12 <5,6,u,0>, <5,6,u,0> + 2731512826U, // <0,5,7,0>: Cost 3 vsldoi8 <7,u,0,5>, <7,0,1,2> + 3859566717U, // <0,5,7,1>: Cost 4 vsldoi12 <5,6,7,0>, <5,7,1,0> + 3798619284U, // <0,5,7,2>: Cost 4 vsldoi8 <6,7,0,5>, <7,2,0,3> + 3778712803U, // <0,5,7,3>: Cost 4 vsldoi8 <3,4,0,5>, <7,3,0,1> + 2728858936U, // <0,5,7,4>: Cost 3 vsldoi8 <7,4,0,5>, <7,4,0,5> + 3859566753U, // <0,5,7,5>: Cost 4 vsldoi12 <5,6,7,0>, <5,7,5,0> + 3377398135U, // <0,5,7,6>: Cost 4 vmrglw <3,6,0,7>, <0,4,5,6> + 3798619686U, // <0,5,7,7>: Cost 4 vsldoi8 <6,7,0,5>, <7,7,0,0> + 2731513468U, // <0,5,7,u>: Cost 3 vsldoi8 <7,u,0,5>, <7,u,0,5> + 1567826022U, // <0,5,u,0>: Cost 2 vsldoi4 <4,0,5,u>, LHS + 2704971566U, // <0,5,u,1>: Cost 3 vsldoi8 <3,4,0,5>, LHS + 2220076779U, // <0,5,u,2>: Cost 3 vmrghw LHS, <5,2,1,3> + 2641569942U, // <0,5,u,3>: Cost 3 vsldoi4 <4,0,5,u>, <3,0,1,2> + 1567828889U, // <0,5,u,4>: Cost 2 vsldoi4 <4,0,5,u>, <4,0,5,u> + 1146335236U, // <0,5,u,5>: Cost 2 vmrghw LHS, <5,5,5,5> + 1146335330U, // <0,5,u,6>: Cost 2 vmrghw LHS, <5,6,7,0> + 1713410308U, // <0,5,u,7>: Cost 2 vsldoi12 <5,u,7,0>, <5,u,7,0> + 1713484045U, // <0,5,u,u>: Cost 2 vsldoi12 <5,u,u,0>, <5,u,u,0> + 2214596949U, // <0,6,0,0>: Cost 3 vmrghw <0,0,0,0>, <6,0,7,0> + 2214678951U, // <0,6,0,1>: Cost 3 vmrghw <0,0,1,1>, <6,1,7,1> + 2214597114U, // <0,6,0,2>: Cost 3 vmrghw <0,0,0,0>, <6,2,7,3> + 3852857653U, // <0,6,0,3>: Cost 4 vsldoi12 <4,5,6,0>, <6,0,3,4> + 3832729919U, // <0,6,0,4>: Cost 4 vsldoi12 <1,2,3,0>, <6,0,4,5> + 3721293427U, // <0,6,0,5>: Cost 4 vsldoi4 <5,0,6,0>, <5,0,6,0> + 2214597432U, // <0,6,0,6>: Cost 3 vmrghw <0,0,0,0>, <6,6,6,6> + 1207962934U, // <0,6,0,7>: Cost 2 vmrglw <0,0,0,0>, RHS + 1207962935U, // <0,6,0,u>: Cost 2 vmrglw <0,0,0,0>, RHS + 2215432481U, // <0,6,1,0>: Cost 3 vmrghw LHS, <6,0,1,2> + 2215432615U, // <0,6,1,1>: Cost 3 vmrghw LHS, <6,1,7,1> + 1141690874U, // <0,6,1,2>: Cost 2 vmrghw LHS, <6,2,7,3> + 2215432754U, // <0,6,1,3>: Cost 3 vmrghw LHS, <6,3,4,5> + 2215432817U, // <0,6,1,4>: Cost 3 vmrghw LHS, <6,4,2,5> + 2215432939U, // <0,6,1,5>: Cost 3 vmrghw LHS, <6,5,7,1> + 1141691192U, // <0,6,1,6>: Cost 2 vmrghw LHS, <6,6,6,6> + 1221905718U, // <0,6,1,7>: Cost 2 vmrglw <2,3,0,1>, RHS + 1221905719U, // <0,6,1,u>: Cost 2 vmrglw <2,3,0,1>, RHS + 3852857787U, // <0,6,2,0>: Cost 4 vsldoi12 <4,5,6,0>, <6,2,0,3> + 3289764265U, // <0,6,2,1>: Cost 4 vmrghw <0,2,1,3>, <6,1,7,3> + 3289690618U, // <0,6,2,2>: Cost 4 vmrghw <0,2,0,3>, <6,2,7,3> + 3862589907U, // <0,6,2,3>: Cost 4 vsldoi12 <6,2,3,0>, <6,2,3,0> + 3733253430U, // <0,6,2,4>: Cost 4 vsldoi4 <7,0,6,2>, RHS + 3733254242U, // <0,6,2,5>: Cost 4 vsldoi4 <7,0,6,2>, <5,6,7,0> + 3777390522U, // <0,6,2,6>: Cost 4 vsldoi8 <3,2,0,6>, <2,6,3,7> + 2785825274U, // <0,6,2,7>: Cost 3 vsldoi12 <5,6,7,0>, <6,2,7,3> + 2785825283U, // <0,6,2,u>: Cost 3 vsldoi12 <5,6,7,0>, <6,2,u,3> + 3777390742U, // <0,6,3,0>: Cost 4 vsldoi8 <3,2,0,6>, <3,0,1,2> + 3863106066U, // <0,6,3,1>: Cost 4 vsldoi12 <6,3,1,0>, <6,3,1,0> + 3777390899U, // <0,6,3,2>: Cost 4 vsldoi8 <3,2,0,6>, <3,2,0,6> + 3290436146U, // <0,6,3,3>: Cost 4 vmrghw <0,3,1,4>, <6,3,4,5> + 3779381762U, // <0,6,3,4>: Cost 4 vsldoi8 <3,5,0,6>, <3,4,5,6> + 3779381798U, // <0,6,3,5>: Cost 4 vsldoi8 <3,5,0,6>, <3,5,0,6> + 3733262920U, // <0,6,3,6>: Cost 4 vsldoi4 <7,0,6,3>, <6,3,7,0> + 2300972342U, // <0,6,3,7>: Cost 3 vmrglw <3,2,0,3>, RHS + 2300972343U, // <0,6,3,u>: Cost 3 vmrglw <3,2,0,3>, RHS + 3802606482U, // <0,6,4,0>: Cost 4 vsldoi8 <7,4,0,6>, <4,0,5,1> + 2217365931U, // <0,6,4,1>: Cost 3 vmrghw <0,4,1,5>, <6,1,7,5> + 2217366010U, // <0,6,4,2>: Cost 3 vmrghw <0,4,1,5>, <6,2,7,3> + 3291107890U, // <0,6,4,3>: Cost 4 vmrghw <0,4,1,5>, <6,3,4,5> + 3291099805U, // <0,6,4,4>: Cost 4 vmrghw <0,4,1,4>, <6,4,7,4> + 3777391926U, // <0,6,4,5>: Cost 4 vsldoi8 <3,2,0,6>, RHS + 2217366328U, // <0,6,4,6>: Cost 3 vmrghw <0,4,1,5>, <6,6,6,6> + 2291027254U, // <0,6,4,7>: Cost 3 vmrglw <1,5,0,4>, RHS + 2291027255U, // <0,6,4,u>: Cost 3 vmrglw <1,5,0,4>, RHS + 3852858033U, // <0,6,5,0>: Cost 4 vsldoi12 <4,5,6,0>, <6,5,0,6> + 3395964532U, // <0,6,5,1>: Cost 4 vmrglw <6,7,0,5>, <5,0,6,1> + 3864507069U, // <0,6,5,2>: Cost 4 vsldoi12 <6,5,2,0>, <6,5,2,0> + 3376056678U, // <0,6,5,3>: Cost 5 vmrglw <3,4,0,5>, <3,2,6,3> + 3721334070U, // <0,6,5,4>: Cost 4 vsldoi4 <5,0,6,5>, RHS + 3395964860U, // <0,6,5,5>: Cost 4 vmrglw <6,7,0,5>, <5,4,6,5> + 3864802017U, // <0,6,5,6>: Cost 4 vsldoi12 <6,5,6,0>, <6,5,6,0> + 2302315830U, // <0,6,5,7>: Cost 3 vmrglw <3,4,0,5>, RHS + 2302315831U, // <0,6,5,u>: Cost 3 vmrglw <3,4,0,5>, RHS + 3852858108U, // <0,6,6,0>: Cost 4 vsldoi12 <4,5,6,0>, <6,6,0,0> + 3398624745U, // <0,6,6,1>: Cost 4 vmrglw <7,2,0,6>, <2,0,6,1> + 2218668538U, // <0,6,6,2>: Cost 3 vmrghw <0,6,1,2>, <6,2,7,3> + 3292418610U, // <0,6,6,3>: Cost 4 vmrghw <0,6,1,3>, <6,3,4,5> + 3733286198U, // <0,6,6,4>: Cost 4 vsldoi4 <7,0,6,6>, RHS + 3797299889U, // <0,6,6,5>: Cost 4 vsldoi8 <6,5,0,6>, <6,5,0,6> + 2785825592U, // <0,6,6,6>: Cost 3 vsldoi12 <5,6,7,0>, <6,6,6,6> + 2785825602U, // <0,6,6,7>: Cost 3 vsldoi12 <5,6,7,0>, <6,6,7,7> + 2785825611U, // <0,6,6,u>: Cost 3 vsldoi12 <5,6,7,0>, <6,6,u,7> + 2785825614U, // <0,6,7,0>: Cost 3 vsldoi12 <5,6,7,0>, <6,7,0,1> + 2758988632U, // <0,6,7,1>: Cost 3 vsldoi12 <1,2,3,0>, <6,7,1,2> + 3377400084U, // <0,6,7,2>: Cost 4 vmrglw <3,6,0,7>, <3,1,6,2> + 2792166248U, // <0,6,7,3>: Cost 3 vsldoi12 <6,7,3,0>, <6,7,3,0> + 2785825654U, // <0,6,7,4>: Cost 3 vsldoi12 <5,6,7,0>, <6,7,4,5> + 2785825664U, // <0,6,7,5>: Cost 3 vsldoi12 <5,6,7,0>, <6,7,5,6> + 3859567493U, // <0,6,7,6>: Cost 4 vsldoi12 <5,6,7,0>, <6,7,6,2> + 2303659318U, // <0,6,7,7>: Cost 3 vmrglw <3,6,0,7>, RHS + 2303659319U, // <0,6,7,u>: Cost 3 vmrglw <3,6,0,7>, RHS + 2785825695U, // <0,6,u,0>: Cost 3 vsldoi12 <5,6,7,0>, <6,u,0,1> + 2220077479U, // <0,6,u,1>: Cost 3 vmrghw LHS, <6,1,7,1> + 1146335738U, // <0,6,u,2>: Cost 2 vmrghw LHS, <6,2,7,3> + 2792829881U, // <0,6,u,3>: Cost 3 vsldoi12 <6,u,3,0>, <6,u,3,0> + 2785825735U, // <0,6,u,4>: Cost 3 vsldoi12 <5,6,7,0>, <6,u,4,5> + 2785825664U, // <0,6,u,5>: Cost 3 vsldoi12 <5,6,7,0>, <6,7,5,6> + 1146336056U, // <0,6,u,6>: Cost 2 vmrghw LHS, <6,6,6,6> + 1221963062U, // <0,6,u,7>: Cost 2 vmrglw <2,3,0,u>, RHS + 1221963063U, // <0,6,u,u>: Cost 2 vmrglw <2,3,0,u>, RHS + 2653593600U, // <0,7,0,0>: Cost 3 vsldoi4 <6,0,7,0>, <0,0,0,0> + 2706309222U, // <0,7,0,1>: Cost 3 vsldoi8 <3,6,0,7>, LHS + 3709421498U, // <0,7,0,2>: Cost 4 vsldoi4 <3,0,7,0>, <2,6,3,7> + 2281705978U, // <0,7,0,3>: Cost 3 vmrglw <0,0,0,0>, <6,2,7,3> + 2785825816U, // <0,7,0,4>: Cost 3 vsldoi12 <5,6,7,0>, <7,0,4,5> + 2785825826U, // <0,7,0,5>: Cost 3 vsldoi12 <5,6,7,0>, <7,0,5,6> + 2653598037U, // <0,7,0,6>: Cost 3 vsldoi4 <6,0,7,0>, <6,0,7,0> + 2214598252U, // <0,7,0,7>: Cost 3 vmrghw <0,0,0,0>, <7,7,7,7> + 2706309789U, // <0,7,0,u>: Cost 3 vsldoi8 <3,6,0,7>, LHS + 1141691386U, // <0,7,1,0>: Cost 2 vmrghw LHS, <7,0,1,2> + 2215433290U, // <0,7,1,1>: Cost 3 vmrghw LHS, <7,1,1,1> + 2706310038U, // <0,7,1,2>: Cost 3 vsldoi8 <3,6,0,7>, <1,2,3,0> + 2322190842U, // <0,7,1,3>: Cost 3 vmrglw <6,7,0,1>, <6,2,7,3> + 1141691750U, // <0,7,1,4>: Cost 2 vmrghw LHS, <7,4,5,6> + 2215433654U, // <0,7,1,5>: Cost 3 vmrghw LHS, <7,5,5,5> + 2653606230U, // <0,7,1,6>: Cost 3 vsldoi4 <6,0,7,1>, <6,0,7,1> + 1141692012U, // <0,7,1,7>: Cost 2 vmrghw LHS, <7,7,7,7> + 1141692034U, // <0,7,1,u>: Cost 2 vmrghw LHS, <7,u,1,2> + 2785825940U, // <0,7,2,0>: Cost 3 vsldoi12 <5,6,7,0>, <7,2,0,3> + 3768108576U, // <0,7,2,1>: Cost 5 vsldoi8 <1,6,0,7>, <2,1,3,2> + 3780052584U, // <0,7,2,2>: Cost 4 vsldoi8 <3,6,0,7>, <2,2,2,2> + 2794820780U, // <0,7,2,3>: Cost 3 vsldoi12 <7,2,3,0>, <7,2,3,0> + 3859641528U, // <0,7,2,4>: Cost 4 vsldoi12 <5,6,u,0>, <7,2,4,3> + 3733327970U, // <0,7,2,5>: Cost 4 vsldoi4 <7,0,7,2>, <5,6,7,0> + 3778062266U, // <0,7,2,6>: Cost 4 vsldoi8 <3,3,0,7>, <2,6,3,7> + 3733328944U, // <0,7,2,7>: Cost 4 vsldoi4 <7,0,7,2>, <7,0,7,2> + 2795189465U, // <0,7,2,u>: Cost 3 vsldoi12 <7,2,u,0>, <7,2,u,0> + 2324861026U, // <0,7,3,0>: Cost 3 vmrglw <7,2,0,3>, <5,6,7,0> + 3780053233U, // <0,7,3,1>: Cost 4 vsldoi8 <3,6,0,7>, <3,1,2,3> + 3780053296U, // <0,7,3,2>: Cost 4 vsldoi8 <3,6,0,7>, <3,2,0,3> + 3778062725U, // <0,7,3,3>: Cost 4 vsldoi8 <3,3,0,7>, <3,3,0,7> + 3780053506U, // <0,7,3,4>: Cost 4 vsldoi8 <3,6,0,7>, <3,4,5,6> + 3803941469U, // <0,7,3,5>: Cost 4 vsldoi8 <7,6,0,7>, <3,5,6,7> + 2706311800U, // <0,7,3,6>: Cost 3 vsldoi8 <3,6,0,7>, <3,6,0,7> + 3398603586U, // <0,7,3,7>: Cost 4 vmrglw <7,2,0,3>, <6,6,7,7> + 2707639066U, // <0,7,3,u>: Cost 3 vsldoi8 <3,u,0,7>, <3,u,0,7> + 2217366522U, // <0,7,4,0>: Cost 3 vmrghw <0,4,1,5>, <7,0,1,2> + 3727369110U, // <0,7,4,1>: Cost 4 vsldoi4 <6,0,7,4>, <1,2,3,0> + 3291108500U, // <0,7,4,2>: Cost 4 vmrghw <0,4,1,5>, <7,2,0,3> + 3727370872U, // <0,7,4,3>: Cost 4 vsldoi4 <6,0,7,4>, <3,6,0,7> + 2217366886U, // <0,7,4,4>: Cost 3 vmrghw <0,4,1,5>, <7,4,5,6> + 2706312502U, // <0,7,4,5>: Cost 3 vsldoi8 <3,6,0,7>, RHS + 3786026321U, // <0,7,4,6>: Cost 4 vsldoi8 <4,6,0,7>, <4,6,0,7> + 2217367148U, // <0,7,4,7>: Cost 3 vmrghw <0,4,1,5>, <7,7,7,7> + 2706312745U, // <0,7,4,u>: Cost 3 vsldoi8 <3,6,0,7>, RHS + 2322223202U, // <0,7,5,0>: Cost 3 vmrglw <6,7,0,5>, <5,6,7,0> + 3399946987U, // <0,7,5,1>: Cost 4 vmrglw <7,4,0,5>, <6,5,7,1> + 3291780244U, // <0,7,5,2>: Cost 4 vmrghw <0,5,1,6>, <7,2,0,3> + 3727378582U, // <0,7,5,3>: Cost 4 vsldoi4 <6,0,7,5>, <3,0,1,2> + 3727379766U, // <0,7,5,4>: Cost 4 vsldoi4 <6,0,7,5>, RHS + 3859568054U, // <0,7,5,5>: Cost 4 vsldoi12 <5,6,7,0>, <7,5,5,5> + 2785826241U, // <0,7,5,6>: Cost 3 vsldoi12 <5,6,7,0>, <7,5,6,7> + 3395965762U, // <0,7,5,7>: Cost 4 vmrglw <6,7,0,5>, <6,6,7,7> + 2787153363U, // <0,7,5,u>: Cost 3 vsldoi12 <5,u,7,0>, <7,5,u,7> + 2785826268U, // <0,7,6,0>: Cost 3 vsldoi12 <5,6,7,0>, <7,6,0,7> + 3780055420U, // <0,7,6,1>: Cost 5 vsldoi8 <3,6,0,7>, <6,1,2,3> + 3859568110U, // <0,7,6,2>: Cost 4 vsldoi12 <5,6,7,0>, <7,6,2,7> + 3874534903U, // <0,7,6,3>: Cost 4 vsldoi12 , <7,6,3,7> + 3859641856U, // <0,7,6,4>: Cost 4 vsldoi12 <5,6,u,0>, <7,6,4,7> + 3733360738U, // <0,7,6,5>: Cost 4 vsldoi4 <7,0,7,6>, <5,6,7,0> + 3859568145U, // <0,7,6,6>: Cost 4 vsldoi12 <5,6,7,0>, <7,6,6,6> + 2797770260U, // <0,7,6,7>: Cost 3 vsldoi12 <7,6,7,0>, <7,6,7,0> + 2797843997U, // <0,7,6,u>: Cost 3 vsldoi12 <7,6,u,0>, <7,6,u,0> + 2785826342U, // <0,7,7,0>: Cost 3 vsldoi12 <5,6,7,0>, <7,7,0,0> + 3727393686U, // <0,7,7,1>: Cost 4 vsldoi4 <6,0,7,7>, <1,2,3,0> + 3868563003U, // <0,7,7,2>: Cost 4 vsldoi12 <7,2,3,0>, <7,7,2,3> + 3377397988U, // <0,7,7,3>: Cost 4 vmrglw <3,6,0,7>, <0,2,7,3> + 2219349350U, // <0,7,7,4>: Cost 3 vmrghw <0,7,1,4>, <7,4,5,6> + 3859568217U, // <0,7,7,5>: Cost 4 vsldoi12 <5,6,7,0>, <7,7,5,6> + 2730202588U, // <0,7,7,6>: Cost 3 vsldoi8 <7,6,0,7>, <7,6,0,7> + 2785826412U, // <0,7,7,7>: Cost 3 vsldoi12 <5,6,7,0>, <7,7,7,7> + 2731529854U, // <0,7,7,u>: Cost 3 vsldoi8 <7,u,0,7>, <7,u,0,7> + 1146336250U, // <0,7,u,0>: Cost 2 vmrghw LHS, <7,0,1,2> + 2706315054U, // <0,7,u,1>: Cost 3 vsldoi8 <3,6,0,7>, LHS + 2653660845U, // <0,7,u,2>: Cost 3 vsldoi4 <6,0,7,u>, <2,3,0,u> + 2322248186U, // <0,7,u,3>: Cost 3 vmrglw <6,7,0,u>, <6,2,7,3> + 1146336614U, // <0,7,u,4>: Cost 2 vmrghw LHS, <7,4,5,6> + 2706315418U, // <0,7,u,5>: Cost 3 vsldoi8 <3,6,0,7>, RHS + 2653663581U, // <0,7,u,6>: Cost 3 vsldoi4 <6,0,7,u>, <6,0,7,u> + 1146336876U, // <0,7,u,7>: Cost 2 vmrghw LHS, <7,7,7,7> + 1146336898U, // <0,7,u,u>: Cost 2 vmrghw LHS, <7,u,1,2> + 202162278U, // <0,u,0,0>: Cost 1 vspltisw0 LHS + 1624612966U, // <0,u,0,1>: Cost 2 vsldoi8 <2,3,0,u>, LHS + 2629780986U, // <0,u,0,2>: Cost 3 vsldoi4 <2,0,u,0>, <2,0,u,0> + 1207959708U, // <0,u,0,3>: Cost 2 vmrglw <0,0,0,0>, LHS + 1544097078U, // <0,u,0,4>: Cost 2 vsldoi4 <0,0,u,0>, RHS + 1140856986U, // <0,u,0,5>: Cost 2 vmrghw <0,0,0,0>, RHS + 2698355253U, // <0,u,0,6>: Cost 3 vsldoi8 <2,3,0,u>, <0,6,u,7> + 1207962952U, // <0,u,0,7>: Cost 2 vmrglw <0,0,0,0>, RHS + 202162278U, // <0,u,0,u>: Cost 1 vspltisw0 LHS + 1142134483U, // <0,u,1,0>: Cost 2 vmrghw LHS, + 67950382U, // <0,u,1,1>: Cost 1 vmrghw LHS, LHS + 1142175624U, // <0,u,1,2>: Cost 2 vmrghw LHS, + 1142175676U, // <0,u,1,3>: Cost 2 vmrghw LHS, + 1142134847U, // <0,u,1,4>: Cost 2 vmrghw LHS, + 67950746U, // <0,u,1,5>: Cost 1 vmrghw LHS, RHS + 1142175952U, // <0,u,1,6>: Cost 2 vmrghw LHS, + 1221905736U, // <0,u,1,7>: Cost 2 vmrglw <2,3,0,1>, RHS + 67950949U, // <0,u,1,u>: Cost 1 vmrghw LHS, LHS + 1562026086U, // <0,u,2,0>: Cost 2 vsldoi4 <3,0,u,2>, LHS + 2216015662U, // <0,u,2,1>: Cost 3 vmrghw <0,2,1,2>, LHS + 2698356328U, // <0,u,2,2>: Cost 3 vsldoi8 <2,3,0,u>, <2,2,2,2> + 835584U, // <0,u,2,3>: Cost 0 copy LHS + 1562029366U, // <0,u,2,4>: Cost 2 vsldoi4 <3,0,u,2>, RHS + 2216016026U, // <0,u,2,5>: Cost 3 vmrghw <0,2,1,2>, RHS + 2698356666U, // <0,u,2,6>: Cost 3 vsldoi8 <2,3,0,u>, <2,6,3,7> + 1585919033U, // <0,u,2,7>: Cost 2 vsldoi4 <7,0,u,2>, <7,0,u,2> + 835584U, // <0,u,2,u>: Cost 0 copy LHS + 2758989756U, // <0,u,3,0>: Cost 3 vsldoi12 <1,2,3,0>, + 2216662830U, // <0,u,3,1>: Cost 3 vmrghw <0,3,1,0>, LHS + 2703665461U, // <0,u,3,2>: Cost 3 vsldoi8 <3,2,0,u>, <3,2,0,u> + 2758989782U, // <0,u,3,3>: Cost 3 vsldoi12 <1,2,3,0>, + 2758989796U, // <0,u,3,4>: Cost 3 vsldoi12 <1,2,3,0>, + 2216663194U, // <0,u,3,5>: Cost 3 vmrghw <0,3,1,0>, RHS + 2706319993U, // <0,u,3,6>: Cost 3 vsldoi8 <3,6,0,u>, <3,6,0,u> + 2300972360U, // <0,u,3,7>: Cost 3 vmrglw <3,2,0,3>, RHS + 2216663397U, // <0,u,3,u>: Cost 3 vmrghw <0,3,1,0>, LHS + 2217367251U, // <0,u,4,0>: Cost 3 vmrghw <0,4,1,5>, + 1143625518U, // <0,u,4,1>: Cost 2 vmrghw <0,4,1,5>, LHS + 2217367432U, // <0,u,4,2>: Cost 3 vmrghw <0,4,1,5>, + 2217367484U, // <0,u,4,3>: Cost 3 vmrghw <0,4,1,5>, + 1143619922U, // <0,u,4,4>: Cost 2 vmrghw <0,4,1,5>, <0,4,1,5> + 1143625882U, // <0,u,4,5>: Cost 2 vmrghw <0,4,1,5>, RHS + 2217367760U, // <0,u,4,6>: Cost 3 vmrghw <0,4,1,5>, + 2291027272U, // <0,u,4,7>: Cost 3 vmrglw <1,5,0,4>, RHS + 1143626085U, // <0,u,4,u>: Cost 2 vmrghw <0,4,1,5>, LHS + 2635792486U, // <0,u,5,0>: Cost 3 vsldoi4 <3,0,u,5>, LHS + 2635793302U, // <0,u,5,1>: Cost 3 vsldoi4 <3,0,u,5>, <1,2,3,0> + 2302314646U, // <0,u,5,2>: Cost 3 vmrglw <3,4,0,5>, <3,0,1,2> + 2635794648U, // <0,u,5,3>: Cost 3 vsldoi4 <3,0,u,5>, <3,0,u,5> + 2635795766U, // <0,u,5,4>: Cost 3 vsldoi4 <3,0,u,5>, RHS + 2717601754U, // <0,u,5,5>: Cost 3 vsldoi8 <5,5,0,u>, <5,5,0,u> + 1685248154U, // <0,u,5,6>: Cost 2 vsldoi12 <1,2,3,0>, RHS + 2302315848U, // <0,u,5,7>: Cost 3 vmrglw <3,4,0,5>, RHS + 1685248172U, // <0,u,5,u>: Cost 2 vsldoi12 <1,2,3,0>, RHS + 2759358645U, // <0,u,6,0>: Cost 3 vsldoi12 <1,2,u,0>, + 2218637102U, // <0,u,6,1>: Cost 3 vmrghw <0,6,0,7>, LHS + 2724901370U, // <0,u,6,2>: Cost 3 vsldoi8 <6,7,0,u>, <6,2,7,3> + 2758990032U, // <0,u,6,3>: Cost 3 vsldoi12 <1,2,3,0>, + 2659691830U, // <0,u,6,4>: Cost 3 vsldoi4 <7,0,u,6>, RHS + 2659471458U, // <0,u,6,5>: Cost 3 vsldoi4 <7,0,5,6>, <5,6,7,0> + 2724901688U, // <0,u,6,6>: Cost 3 vsldoi8 <6,7,0,u>, <6,6,6,6> + 1651159893U, // <0,u,6,7>: Cost 2 vsldoi8 <6,7,0,u>, <6,7,0,u> + 1651823526U, // <0,u,6,u>: Cost 2 vsldoi8 <6,u,0,u>, <6,u,0,u> + 2785827072U, // <0,u,7,0>: Cost 3 vsldoi12 <5,6,7,0>, + 2803964168U, // <0,u,7,1>: Cost 3 vsldoi12 , + 2727556249U, // <0,u,7,2>: Cost 3 vsldoi8 <7,2,0,u>, <7,2,0,u> + 2303656092U, // <0,u,7,3>: Cost 3 vmrglw <3,6,0,7>, LHS + 2785827112U, // <0,u,7,4>: Cost 3 vsldoi12 <5,6,7,0>, + 2785827122U, // <0,u,7,5>: Cost 3 vsldoi12 <5,6,7,0>, + 2730210781U, // <0,u,7,6>: Cost 3 vsldoi8 <7,6,0,u>, <7,6,0,u> + 2303659336U, // <0,u,7,7>: Cost 3 vmrglw <3,6,0,7>, RHS + 2303656097U, // <0,u,7,u>: Cost 3 vmrglw <3,6,0,7>, LHS + 202162278U, // <0,u,u,0>: Cost 1 vspltisw0 LHS + 72595246U, // <0,u,u,1>: Cost 1 vmrghw LHS, LHS + 1146337160U, // <0,u,u,2>: Cost 2 vmrghw LHS, + 835584U, // <0,u,u,3>: Cost 0 copy LHS + 1146337343U, // <0,u,u,4>: Cost 2 vmrghw LHS, + 72595610U, // <0,u,u,5>: Cost 1 vmrghw LHS, RHS + 1146337488U, // <0,u,u,6>: Cost 2 vmrghw LHS, + 1221963080U, // <0,u,u,7>: Cost 2 vmrglw <2,3,0,u>, RHS + 835584U, // <0,u,u,u>: Cost 0 copy LHS + 2756853760U, // <1,0,0,0>: Cost 3 vsldoi12 <0,u,1,1>, <0,0,0,0> + 1677803530U, // <1,0,0,1>: Cost 2 vsldoi12 <0,0,1,1>, <0,0,1,1> + 3759497387U, // <1,0,0,2>: Cost 4 vsldoi8 <0,2,1,0>, <0,2,1,0> + 2686419196U, // <1,0,0,3>: Cost 3 vsldoi8 <0,3,1,0>, <0,3,1,0> + 2751766565U, // <1,0,0,4>: Cost 3 vsldoi12 <0,0,4,1>, <0,0,4,1> + 2687746462U, // <1,0,0,5>: Cost 3 vsldoi8 <0,5,1,0>, <0,5,1,0> + 3776086518U, // <1,0,0,6>: Cost 4 vsldoi8 <3,0,1,0>, <0,6,1,7> + 2689073728U, // <1,0,0,7>: Cost 3 vsldoi8 <0,7,1,0>, <0,7,1,0> + 1678319689U, // <1,0,0,u>: Cost 2 vsldoi12 <0,0,u,1>, <0,0,u,1> + 2287091712U, // <1,0,1,0>: Cost 3 vmrglw <0,u,1,1>, <0,0,0,0> + 1147568230U, // <1,0,1,1>: Cost 2 vmrghw <1,1,1,1>, LHS + 1683112038U, // <1,0,1,2>: Cost 2 vsldoi12 <0,u,1,1>, LHS + 3294970108U, // <1,0,1,3>: Cost 4 vmrghw <1,1,0,0>, <0,3,1,0> + 2623892790U, // <1,0,1,4>: Cost 3 vsldoi4 <1,1,0,1>, RHS + 2647781007U, // <1,0,1,5>: Cost 3 vsldoi4 <5,1,0,1>, <5,1,0,1> + 2791948430U, // <1,0,1,6>: Cost 3 vsldoi12 <6,7,0,1>, <0,1,6,7> + 3721524218U, // <1,0,1,7>: Cost 4 vsldoi4 <5,1,0,1>, <7,0,1,2> + 1683112092U, // <1,0,1,u>: Cost 2 vsldoi12 <0,u,1,1>, LHS + 2222112768U, // <1,0,2,0>: Cost 3 vmrghw <1,2,3,0>, <0,0,0,0> + 1148371046U, // <1,0,2,1>: Cost 2 vmrghw <1,2,3,0>, LHS + 3356862524U, // <1,0,2,2>: Cost 4 vmrglw <0,2,1,2>, <2,u,0,2> + 2702345894U, // <1,0,2,3>: Cost 3 vsldoi8 <3,0,1,0>, <2,3,0,1> + 2222113106U, // <1,0,2,4>: Cost 3 vmrghw <1,2,3,0>, <0,4,1,5> + 2299709908U, // <1,0,2,5>: Cost 3 vmrglw <3,0,1,2>, <3,4,0,5> + 3760162746U, // <1,0,2,6>: Cost 4 vsldoi8 <0,3,1,0>, <2,6,3,7> + 3369470584U, // <1,0,2,7>: Cost 4 vmrglw <2,3,1,2>, <3,6,0,7> + 1148371613U, // <1,0,2,u>: Cost 2 vmrghw <1,2,3,0>, LHS + 2686421142U, // <1,0,3,0>: Cost 3 vsldoi8 <0,3,1,0>, <3,0,1,2> + 2283128486U, // <1,0,3,1>: Cost 3 vmrglw <0,2,1,3>, <2,3,0,1> + 3296305326U, // <1,0,3,2>: Cost 4 vmrghw <1,3,0,1>, <0,2,1,3> + 3760163199U, // <1,0,3,3>: Cost 4 vsldoi8 <0,3,1,0>, <3,3,0,1> + 3760163330U, // <1,0,3,4>: Cost 4 vsldoi8 <0,3,1,0>, <3,4,5,6> + 3779406377U, // <1,0,3,5>: Cost 4 vsldoi8 <3,5,1,0>, <3,5,1,0> + 3865690416U, // <1,0,3,6>: Cost 4 vsldoi12 <6,7,0,1>, <0,3,6,7> + 3366824568U, // <1,0,3,7>: Cost 5 vmrglw <1,u,1,3>, <3,6,0,7> + 2707655452U, // <1,0,3,u>: Cost 3 vsldoi8 <3,u,1,0>, <3,u,1,0> + 2734861202U, // <1,0,4,0>: Cost 3 vsldoi8 , <4,0,5,1> + 2756854098U, // <1,0,4,1>: Cost 3 vsldoi12 <0,u,1,1>, <0,4,1,5> + 3830595931U, // <1,0,4,2>: Cost 5 vsldoi12 <0,u,1,1>, <0,4,2,5> + 3296968960U, // <1,0,4,3>: Cost 4 vmrghw <1,4,0,1>, <0,3,1,4> + 3830595949U, // <1,0,4,4>: Cost 4 vsldoi12 <0,u,1,1>, <0,4,4,5> + 2686422326U, // <1,0,4,5>: Cost 3 vsldoi8 <0,3,1,0>, RHS + 3297378806U, // <1,0,4,6>: Cost 5 vmrghw <1,4,5,6>, <0,6,1,7> + 3810594248U, // <1,0,4,7>: Cost 4 vsldoi8 , <4,7,5,0> + 2686422569U, // <1,0,4,u>: Cost 3 vsldoi8 <0,3,1,0>, RHS + 2284470272U, // <1,0,5,0>: Cost 3 vmrglw <0,4,1,5>, <0,0,0,0> + 2284471974U, // <1,0,5,1>: Cost 3 vmrglw <0,4,1,5>, <2,3,0,1> + 3809267435U, // <1,0,5,2>: Cost 4 vsldoi8 , <5,2,1,3> + 3297968384U, // <1,0,5,3>: Cost 4 vmrghw <1,5,4,6>, <0,3,1,4> + 2284471977U, // <1,0,5,4>: Cost 3 vmrglw <0,4,1,5>, <2,3,0,4> + 3721555603U, // <1,0,5,5>: Cost 4 vsldoi4 <5,1,0,5>, <5,1,0,5> + 3792679010U, // <1,0,5,6>: Cost 4 vsldoi8 <5,7,1,0>, <5,6,7,0> + 3792679037U, // <1,0,5,7>: Cost 4 vsldoi8 <5,7,1,0>, <5,7,1,0> + 2284471981U, // <1,0,5,u>: Cost 3 vmrglw <0,4,1,5>, <2,3,0,u> + 3356893184U, // <1,0,6,0>: Cost 4 vmrglw <0,2,1,6>, <0,0,0,0> + 2224676966U, // <1,0,6,1>: Cost 3 vmrghw <1,6,1,7>, LHS + 3298295985U, // <1,0,6,2>: Cost 4 vmrghw <1,6,0,1>, <0,2,1,6> + 3298345212U, // <1,0,6,3>: Cost 4 vmrghw <1,6,0,7>, <0,3,1,0> + 2224972114U, // <1,0,6,4>: Cost 3 vmrghw <1,6,5,7>, <0,4,1,5> + 3808604907U, // <1,0,6,5>: Cost 4 vsldoi8 , <6,5,7,1> + 3799978808U, // <1,0,6,6>: Cost 4 vsldoi8 <7,0,1,0>, <6,6,6,6> + 2726237006U, // <1,0,6,7>: Cost 3 vsldoi8 <7,0,1,0>, <6,7,0,1> + 2224677522U, // <1,0,6,u>: Cost 3 vmrghw <1,6,1,7>, <0,u,1,1> + 2726237176U, // <1,0,7,0>: Cost 3 vsldoi8 <7,0,1,0>, <7,0,1,0> + 2285815462U, // <1,0,7,1>: Cost 3 vmrglw <0,6,1,7>, <2,3,0,1> + 3805951193U, // <1,0,7,2>: Cost 4 vsldoi8 , <7,2,u,0> + 3807941859U, // <1,0,7,3>: Cost 4 vsldoi8 , <7,3,0,1> + 3799979366U, // <1,0,7,4>: Cost 4 vsldoi8 <7,0,1,0>, <7,4,5,6> + 3803297165U, // <1,0,7,5>: Cost 4 vsldoi8 <7,5,1,0>, <7,5,1,0> + 3799979540U, // <1,0,7,6>: Cost 4 vsldoi8 <7,0,1,0>, <7,6,7,0> + 3799979628U, // <1,0,7,7>: Cost 4 vsldoi8 <7,0,1,0>, <7,7,7,7> + 2731546240U, // <1,0,7,u>: Cost 3 vsldoi8 <7,u,1,0>, <7,u,1,0> + 2284494848U, // <1,0,u,0>: Cost 3 vmrglw <0,4,1,u>, <0,0,0,0> + 1683112594U, // <1,0,u,1>: Cost 2 vsldoi12 <0,u,1,1>, <0,u,1,1> + 1683112605U, // <1,0,u,2>: Cost 2 vsldoi12 <0,u,1,1>, LHS + 2734200772U, // <1,0,u,3>: Cost 3 vsldoi8 , + 2757075629U, // <1,0,u,4>: Cost 3 vsldoi12 <0,u,4,1>, <0,u,4,1> + 2686425242U, // <1,0,u,5>: Cost 3 vsldoi8 <0,3,1,0>, RHS + 2791948430U, // <1,0,u,6>: Cost 3 vsldoi12 <6,7,0,1>, <0,1,6,7> + 2736855304U, // <1,0,u,7>: Cost 3 vsldoi8 , + 1683112659U, // <1,0,u,u>: Cost 2 vsldoi12 <0,u,1,1>, LHS + 1610694666U, // <1,1,0,0>: Cost 2 vsldoi8 <0,0,1,1>, <0,0,1,1> + 1616003174U, // <1,1,0,1>: Cost 2 vsldoi8 <0,u,1,1>, LHS + 2283767958U, // <1,1,0,2>: Cost 3 vmrglw <0,3,1,0>, <3,0,1,2> + 3357507596U, // <1,1,0,3>: Cost 4 vmrglw <0,3,1,0>, <0,0,1,3> + 2689745234U, // <1,1,0,4>: Cost 3 vsldoi8 <0,u,1,1>, <0,4,1,5> + 3357507922U, // <1,1,0,5>: Cost 4 vmrglw <0,3,1,0>, <0,4,1,5> + 3294397647U, // <1,1,0,6>: Cost 4 vmrghw <1,0,1,2>, <1,6,1,7> + 3373433334U, // <1,1,0,7>: Cost 4 vmrglw <3,0,1,0>, <0,6,1,7> + 1616003730U, // <1,1,0,u>: Cost 2 vsldoi8 <0,u,1,1>, <0,u,1,1> + 1550221414U, // <1,1,1,0>: Cost 2 vsldoi4 <1,1,1,1>, LHS + 269271142U, // <1,1,1,1>: Cost 1 vspltisw1 LHS + 2287093910U, // <1,1,1,2>: Cost 3 vmrglw <0,u,1,1>, <3,0,1,2> + 2287092615U, // <1,1,1,3>: Cost 3 vmrglw <0,u,1,1>, <1,2,1,3> + 1550224694U, // <1,1,1,4>: Cost 2 vsldoi4 <1,1,1,1>, RHS + 2287092050U, // <1,1,1,5>: Cost 3 vmrglw <0,u,1,1>, <0,4,1,5> + 2689746127U, // <1,1,1,6>: Cost 3 vsldoi8 <0,u,1,1>, <1,6,1,7> + 2659800138U, // <1,1,1,7>: Cost 3 vsldoi4 <7,1,1,1>, <7,1,1,1> + 269271142U, // <1,1,1,u>: Cost 1 vspltisw1 LHS + 2222113516U, // <1,1,2,0>: Cost 3 vmrghw <1,2,3,0>, <1,0,2,1> + 2756854663U, // <1,1,2,1>: Cost 3 vsldoi12 <0,u,1,1>, <1,2,1,3> + 1148371862U, // <1,1,2,2>: Cost 2 vmrghw <1,2,3,0>, <1,2,3,0> + 2689746598U, // <1,1,2,3>: Cost 3 vsldoi8 <0,u,1,1>, <2,3,0,1> + 2618002742U, // <1,1,2,4>: Cost 3 vsldoi4 <0,1,1,2>, RHS + 2299707730U, // <1,1,2,5>: Cost 3 vmrglw <3,0,1,2>, <0,4,1,5> + 2689746874U, // <1,1,2,6>: Cost 3 vsldoi8 <0,u,1,1>, <2,6,3,7> + 3361506511U, // <1,1,2,7>: Cost 4 vmrglw <1,0,1,2>, <1,6,1,7> + 1148371862U, // <1,1,2,u>: Cost 2 vmrghw <1,2,3,0>, <1,2,3,0> + 2689747094U, // <1,1,3,0>: Cost 3 vsldoi8 <0,u,1,1>, <3,0,1,2> + 2691074278U, // <1,1,3,1>: Cost 3 vsldoi8 <1,1,1,1>, <3,1,1,1> + 3356870806U, // <1,1,3,2>: Cost 4 vmrglw <0,2,1,3>, <3,0,1,2> + 2283126958U, // <1,1,3,3>: Cost 3 vmrglw <0,2,1,3>, <0,2,1,3> + 2689747458U, // <1,1,3,4>: Cost 3 vsldoi8 <0,u,1,1>, <3,4,5,6> + 3356868946U, // <1,1,3,5>: Cost 4 vmrglw <0,2,1,3>, <0,4,1,5> + 3811265144U, // <1,1,3,6>: Cost 4 vsldoi8 , <3,6,0,7> + 3362841807U, // <1,1,3,7>: Cost 4 vmrglw <1,2,1,3>, <1,6,1,7> + 2689747742U, // <1,1,3,u>: Cost 3 vsldoi8 <0,u,1,1>, <3,u,1,2> + 2623987814U, // <1,1,4,0>: Cost 3 vsldoi4 <1,1,1,4>, LHS + 2758181931U, // <1,1,4,1>: Cost 3 vsldoi12 <1,1,1,1>, <1,4,1,5> + 2223408022U, // <1,1,4,2>: Cost 3 vmrghw <1,4,2,5>, <1,2,3,0> + 3697731734U, // <1,1,4,3>: Cost 4 vsldoi4 <1,1,1,4>, <3,0,1,2> + 2283798784U, // <1,1,4,4>: Cost 3 vmrglw <0,3,1,4>, <0,3,1,4> + 1616006454U, // <1,1,4,5>: Cost 2 vsldoi8 <0,u,1,1>, RHS + 3297379535U, // <1,1,4,6>: Cost 4 vmrghw <1,4,5,6>, <1,6,1,7> + 3373466102U, // <1,1,4,7>: Cost 4 vmrglw <3,0,1,4>, <0,6,1,7> + 1616006697U, // <1,1,4,u>: Cost 2 vsldoi8 <0,u,1,1>, RHS + 2760762479U, // <1,1,5,0>: Cost 3 vsldoi12 <1,5,0,1>, <1,5,0,1> + 2284470282U, // <1,1,5,1>: Cost 3 vmrglw <0,4,1,5>, <0,0,1,1> + 2284472470U, // <1,1,5,2>: Cost 3 vmrglw <0,4,1,5>, <3,0,1,2> + 3358212270U, // <1,1,5,3>: Cost 4 vmrglw <0,4,1,5>, <0,2,1,3> + 2284470285U, // <1,1,5,4>: Cost 3 vmrglw <0,4,1,5>, <0,0,1,4> + 1210728786U, // <1,1,5,5>: Cost 2 vmrglw <0,4,1,5>, <0,4,1,5> + 2737524834U, // <1,1,5,6>: Cost 3 vsldoi8 , <5,6,7,0> + 3360867535U, // <1,1,5,7>: Cost 4 vmrglw <0,u,1,5>, <1,6,1,7> + 1210728786U, // <1,1,5,u>: Cost 2 vmrglw <0,4,1,5>, <0,4,1,5> + 3697746022U, // <1,1,6,0>: Cost 4 vsldoi4 <1,1,1,6>, LHS + 2756854991U, // <1,1,6,1>: Cost 3 vsldoi12 <0,u,1,1>, <1,6,1,7> + 2737525242U, // <1,1,6,2>: Cost 3 vsldoi8 , <6,2,7,3> + 3839149281U, // <1,1,6,3>: Cost 4 vsldoi12 <2,3,0,1>, <1,6,3,7> + 3697749302U, // <1,1,6,4>: Cost 4 vsldoi4 <1,1,1,6>, RHS + 3356893522U, // <1,1,6,5>: Cost 4 vmrglw <0,2,1,6>, <0,4,1,5> + 2283151537U, // <1,1,6,6>: Cost 3 vmrglw <0,2,1,6>, <0,2,1,6> + 2791949566U, // <1,1,6,7>: Cost 3 vsldoi12 <6,7,0,1>, <1,6,7,0> + 2792613127U, // <1,1,6,u>: Cost 3 vsldoi12 <6,u,0,1>, <1,6,u,0> + 2737525754U, // <1,1,7,0>: Cost 3 vsldoi8 , <7,0,1,2> + 2291786386U, // <1,1,7,1>: Cost 3 vmrglw <1,6,1,7>, <0,u,1,1> + 3365528292U, // <1,1,7,2>: Cost 4 vmrglw <1,6,1,7>, <1,0,1,2> + 3365528455U, // <1,1,7,3>: Cost 4 vmrglw <1,6,1,7>, <1,2,1,3> + 2737526118U, // <1,1,7,4>: Cost 3 vsldoi8 , <7,4,5,6> + 3365527890U, // <1,1,7,5>: Cost 4 vmrglw <1,6,1,7>, <0,4,1,5> + 3365528377U, // <1,1,7,6>: Cost 4 vmrglw <1,6,1,7>, <1,1,1,6> + 2291786959U, // <1,1,7,7>: Cost 3 vmrglw <1,6,1,7>, <1,6,1,7> + 2737526402U, // <1,1,7,u>: Cost 3 vsldoi8 , <7,u,1,2> + 1550221414U, // <1,1,u,0>: Cost 2 vsldoi4 <1,1,1,1>, LHS + 269271142U, // <1,1,u,1>: Cost 1 vspltisw1 LHS + 1148371862U, // <1,1,u,2>: Cost 2 vmrghw <1,2,3,0>, <1,2,3,0> + 2689750972U, // <1,1,u,3>: Cost 3 vsldoi8 <0,u,1,1>, + 1550224694U, // <1,1,u,4>: Cost 2 vsldoi4 <1,1,1,1>, RHS + 1616009370U, // <1,1,u,5>: Cost 2 vsldoi8 <0,u,1,1>, RHS + 2689751248U, // <1,1,u,6>: Cost 3 vsldoi8 <0,u,1,1>, + 2736863497U, // <1,1,u,7>: Cost 3 vsldoi8 , + 269271142U, // <1,1,u,u>: Cost 1 vspltisw1 LHS + 2702360576U, // <1,2,0,0>: Cost 3 vsldoi8 <3,0,1,2>, <0,0,0,0> + 1628618854U, // <1,2,0,1>: Cost 2 vsldoi8 <3,0,1,2>, LHS + 2685771949U, // <1,2,0,2>: Cost 3 vsldoi8 <0,2,1,2>, <0,2,1,2> + 2283765862U, // <1,2,0,3>: Cost 3 vmrglw <0,3,1,0>, LHS + 2702360914U, // <1,2,0,4>: Cost 3 vsldoi8 <3,0,1,2>, <0,4,1,5> + 3788046813U, // <1,2,0,5>: Cost 4 vsldoi8 <5,0,1,2>, <0,5,u,0> + 2688426481U, // <1,2,0,6>: Cost 3 vsldoi8 <0,6,1,2>, <0,6,1,2> + 2726249024U, // <1,2,0,7>: Cost 3 vsldoi8 <7,0,1,2>, <0,7,1,0> + 1628619421U, // <1,2,0,u>: Cost 2 vsldoi8 <3,0,1,2>, LHS + 2690417380U, // <1,2,1,0>: Cost 3 vsldoi8 <1,0,1,2>, <1,0,1,2> + 2702361396U, // <1,2,1,1>: Cost 3 vsldoi8 <3,0,1,2>, <1,1,1,1> + 2287093352U, // <1,2,1,2>: Cost 3 vmrglw <0,u,1,1>, <2,2,2,2> + 1213349990U, // <1,2,1,3>: Cost 2 vmrglw <0,u,1,1>, LHS + 3764159522U, // <1,2,1,4>: Cost 4 vsldoi8 <1,0,1,2>, <1,4,0,5> + 3295053672U, // <1,2,1,5>: Cost 4 vmrghw <1,1,1,1>, <2,5,3,6> + 2221311930U, // <1,2,1,6>: Cost 3 vmrghw <1,1,1,1>, <2,6,3,7> + 3799991593U, // <1,2,1,7>: Cost 4 vsldoi8 <7,0,1,2>, <1,7,2,7> + 1213349995U, // <1,2,1,u>: Cost 2 vmrglw <0,u,1,1>, LHS + 2624045158U, // <1,2,2,0>: Cost 3 vsldoi4 <1,1,2,2>, LHS + 2702362144U, // <1,2,2,1>: Cost 3 vsldoi8 <3,0,1,2>, <2,1,3,2> + 2283120232U, // <1,2,2,2>: Cost 3 vmrglw <0,2,1,2>, <2,2,2,2> + 1225965670U, // <1,2,2,3>: Cost 2 vmrglw <3,0,1,2>, LHS + 2624048438U, // <1,2,2,4>: Cost 3 vsldoi4 <1,1,2,2>, RHS + 3356860763U, // <1,2,2,5>: Cost 4 vmrglw <0,2,1,2>, <0,4,2,5> + 2222114746U, // <1,2,2,6>: Cost 3 vmrghw <1,2,3,0>, <2,6,3,7> + 2299708632U, // <1,2,2,7>: Cost 3 vmrglw <3,0,1,2>, <1,6,2,7> + 1225965675U, // <1,2,2,u>: Cost 2 vmrglw <3,0,1,2>, LHS + 470597734U, // <1,2,3,0>: Cost 1 vsldoi4 LHS, LHS + 1544340276U, // <1,2,3,1>: Cost 2 vsldoi4 LHS, <1,1,1,1> + 1544341096U, // <1,2,3,2>: Cost 2 vsldoi4 LHS, <2,2,2,2> + 1544341916U, // <1,2,3,3>: Cost 2 vsldoi4 LHS, <3,3,3,3> + 470601014U, // <1,2,3,4>: Cost 1 vsldoi4 LHS, RHS + 1592119300U, // <1,2,3,5>: Cost 2 vsldoi4 LHS, <5,5,5,5> + 1592119802U, // <1,2,3,6>: Cost 2 vsldoi4 LHS, <6,2,7,3> + 1592120314U, // <1,2,3,7>: Cost 2 vsldoi4 LHS, <7,0,1,2> + 470603566U, // <1,2,3,u>: Cost 1 vsldoi4 LHS, LHS + 2708335471U, // <1,2,4,0>: Cost 3 vsldoi8 <4,0,1,2>, <4,0,1,2> + 3838043908U, // <1,2,4,1>: Cost 4 vsldoi12 <2,1,3,1>, <2,4,1,5> + 3357541992U, // <1,2,4,2>: Cost 4 vmrglw <0,3,1,4>, <2,2,2,2> + 2283798630U, // <1,2,4,3>: Cost 3 vmrglw <0,3,1,4>, LHS + 2726251728U, // <1,2,4,4>: Cost 3 vsldoi8 <7,0,1,2>, <4,4,4,4> + 1628622134U, // <1,2,4,5>: Cost 2 vsldoi8 <3,0,1,2>, RHS + 3297077178U, // <1,2,4,6>: Cost 4 vmrghw <1,4,1,5>, <2,6,3,7> + 2726251976U, // <1,2,4,7>: Cost 3 vsldoi8 <7,0,1,2>, <4,7,5,0> + 1628622377U, // <1,2,4,u>: Cost 2 vsldoi8 <3,0,1,2>, RHS + 2714308168U, // <1,2,5,0>: Cost 3 vsldoi8 <5,0,1,2>, <5,0,1,2> + 3297633827U, // <1,2,5,1>: Cost 4 vmrghw <1,5,0,1>, <2,1,3,5> + 2284471912U, // <1,2,5,2>: Cost 3 vmrglw <0,4,1,5>, <2,2,2,2> + 1210728550U, // <1,2,5,3>: Cost 2 vmrglw <0,4,1,5>, LHS + 3776106420U, // <1,2,5,4>: Cost 4 vsldoi8 <3,0,1,2>, <5,4,5,6> + 2726252548U, // <1,2,5,5>: Cost 3 vsldoi8 <7,0,1,2>, <5,5,5,5> + 2726252642U, // <1,2,5,6>: Cost 3 vsldoi8 <7,0,1,2>, <5,6,7,0> + 3799994538U, // <1,2,5,7>: Cost 4 vsldoi8 <7,0,1,2>, <5,7,6,0> + 1210728555U, // <1,2,5,u>: Cost 2 vmrglw <0,4,1,5>, LHS + 2720280865U, // <1,2,6,0>: Cost 3 vsldoi8 <6,0,1,2>, <6,0,1,2> + 2702365096U, // <1,2,6,1>: Cost 3 vsldoi8 <3,0,1,2>, <6,1,7,2> + 2726253050U, // <1,2,6,2>: Cost 3 vsldoi8 <7,0,1,2>, <6,2,7,3> + 2283151462U, // <1,2,6,3>: Cost 3 vmrglw <0,2,1,6>, LHS + 3697823030U, // <1,2,6,4>: Cost 4 vsldoi4 <1,1,2,6>, RHS + 3298715497U, // <1,2,6,5>: Cost 4 vmrghw <1,6,5,7>, <2,5,3,7> + 2726253368U, // <1,2,6,6>: Cost 3 vsldoi8 <7,0,1,2>, <6,6,6,6> + 2724926296U, // <1,2,6,7>: Cost 3 vsldoi8 <6,7,1,2>, <6,7,1,2> + 2283151467U, // <1,2,6,u>: Cost 3 vmrglw <0,2,1,6>, LHS + 1652511738U, // <1,2,7,0>: Cost 2 vsldoi8 <7,0,1,2>, <7,0,1,2> + 3371500916U, // <1,2,7,1>: Cost 4 vmrglw <2,6,1,7>, <1,u,2,1> + 3365529192U, // <1,2,7,2>: Cost 4 vmrglw <1,6,1,7>, <2,2,2,2> + 2291785830U, // <1,2,7,3>: Cost 3 vmrglw <1,6,1,7>, LHS + 2726253926U, // <1,2,7,4>: Cost 3 vsldoi8 <7,0,1,2>, <7,4,5,6> + 3788051845U, // <1,2,7,5>: Cost 4 vsldoi8 <5,0,1,2>, <7,5,0,1> + 3794023894U, // <1,2,7,6>: Cost 4 vsldoi8 <6,0,1,2>, <7,6,0,1> + 2726254119U, // <1,2,7,7>: Cost 3 vsldoi8 <7,0,1,2>, <7,7,0,1> + 1657820802U, // <1,2,7,u>: Cost 2 vsldoi8 <7,u,1,2>, <7,u,1,2> + 470638699U, // <1,2,u,0>: Cost 1 vsldoi4 LHS, LHS + 1544381236U, // <1,2,u,1>: Cost 2 vsldoi4 LHS, <1,1,1,1> + 1544382056U, // <1,2,u,2>: Cost 2 vsldoi4 LHS, <2,2,2,2> + 1544382614U, // <1,2,u,3>: Cost 2 vsldoi4 LHS, <3,0,1,2> + 470641974U, // <1,2,u,4>: Cost 1 vsldoi4 LHS, RHS + 1628625050U, // <1,2,u,5>: Cost 2 vsldoi8 <3,0,1,2>, RHS + 1592160762U, // <1,2,u,6>: Cost 2 vsldoi4 LHS, <6,2,7,3> + 1592161274U, // <1,2,u,7>: Cost 2 vsldoi4 LHS, <7,0,1,2> + 470644526U, // <1,2,u,u>: Cost 1 vsldoi4 LHS, LHS + 2769389708U, // <1,3,0,0>: Cost 3 vsldoi12 <3,0,0,1>, <3,0,0,1> + 2685780070U, // <1,3,0,1>: Cost 3 vsldoi8 <0,2,1,3>, LHS + 2685780142U, // <1,3,0,2>: Cost 3 vsldoi8 <0,2,1,3>, <0,2,1,3> + 2686443775U, // <1,3,0,3>: Cost 3 vsldoi8 <0,3,1,3>, <0,3,1,3> + 2769684656U, // <1,3,0,4>: Cost 3 vsldoi12 <3,0,4,1>, <3,0,4,1> + 3357507940U, // <1,3,0,5>: Cost 4 vmrglw <0,3,1,0>, <0,4,3,5> + 3759522294U, // <1,3,0,6>: Cost 4 vsldoi8 <0,2,1,3>, <0,6,1,7> + 3357509562U, // <1,3,0,7>: Cost 4 vmrglw <0,3,1,0>, <2,6,3,7> + 2685780637U, // <1,3,0,u>: Cost 3 vsldoi8 <0,2,1,3>, LHS + 2287092630U, // <1,3,1,0>: Cost 3 vmrglw <0,u,1,1>, <1,2,3,0> + 2221312230U, // <1,3,1,1>: Cost 3 vmrghw <1,1,1,1>, <3,1,1,1> + 2691752839U, // <1,3,1,2>: Cost 3 vsldoi8 <1,2,1,3>, <1,2,1,3> + 2287093362U, // <1,3,1,3>: Cost 3 vmrglw <0,u,1,1>, <2,2,3,3> + 2287092634U, // <1,3,1,4>: Cost 3 vmrglw <0,u,1,1>, <1,2,3,4> + 3360835107U, // <1,3,1,5>: Cost 4 vmrglw <0,u,1,1>, <2,1,3,5> + 3759523041U, // <1,3,1,6>: Cost 4 vsldoi8 <0,2,1,3>, <1,6,3,7> + 2287093690U, // <1,3,1,7>: Cost 3 vmrglw <0,u,1,1>, <2,6,3,7> + 2287092638U, // <1,3,1,u>: Cost 3 vmrglw <0,u,1,1>, <1,2,3,u> + 2222114966U, // <1,3,2,0>: Cost 3 vmrghw <1,2,3,0>, <3,0,1,2> + 2222115057U, // <1,3,2,1>: Cost 3 vmrghw <1,2,3,0>, <3,1,2,3> + 2630092320U, // <1,3,2,2>: Cost 3 vsldoi4 <2,1,3,2>, <2,1,3,2> + 2685781670U, // <1,3,2,3>: Cost 3 vsldoi8 <0,2,1,3>, <2,3,0,1> + 2222115330U, // <1,3,2,4>: Cost 3 vmrghw <1,2,3,0>, <3,4,5,6> + 3373449572U, // <1,3,2,5>: Cost 4 vmrglw <3,0,1,2>, <0,4,3,5> + 2222115448U, // <1,3,2,6>: Cost 3 vmrghw <1,2,3,0>, <3,6,0,7> + 2299709370U, // <1,3,2,7>: Cost 3 vmrglw <3,0,1,2>, <2,6,3,7> + 2222115614U, // <1,3,2,u>: Cost 3 vmrghw <1,2,3,0>, <3,u,1,2> + 2771380607U, // <1,3,3,0>: Cost 3 vsldoi12 <3,3,0,1>, <3,3,0,1> + 3356874468U, // <1,3,3,1>: Cost 4 vmrglw <0,2,1,3>, + 3759524168U, // <1,3,3,2>: Cost 4 vsldoi8 <0,2,1,3>, <3,2,3,0> + 2283792796U, // <1,3,3,3>: Cost 3 vmrglw <0,3,1,3>, <3,3,3,3> + 3356869530U, // <1,3,3,4>: Cost 4 vmrglw <0,2,1,3>, <1,2,3,4> + 3721760428U, // <1,3,3,5>: Cost 4 vsldoi4 <5,1,3,3>, <5,1,3,3> + 3296496248U, // <1,3,3,6>: Cost 4 vmrghw <1,3,2,6>, <3,6,0,7> + 3356870586U, // <1,3,3,7>: Cost 4 vmrglw <0,2,1,3>, <2,6,3,7> + 2771970503U, // <1,3,3,u>: Cost 3 vsldoi12 <3,3,u,1>, <3,3,u,1> + 2772044240U, // <1,3,4,0>: Cost 3 vsldoi12 <3,4,0,1>, <3,4,0,1> + 3362186135U, // <1,3,4,1>: Cost 4 vmrglw <1,1,1,4>, <1,2,3,1> + 3297151280U, // <1,3,4,2>: Cost 4 vmrghw <1,4,2,5>, <3,2,0,3> + 3357542002U, // <1,3,4,3>: Cost 4 vmrglw <0,3,1,4>, <2,2,3,3> + 3357540626U, // <1,3,4,4>: Cost 4 vmrglw <0,3,1,4>, <0,3,3,4> + 2685783350U, // <1,3,4,5>: Cost 3 vsldoi8 <0,2,1,3>, RHS + 3357546622U, // <1,3,4,6>: Cost 4 vmrglw <0,3,1,4>, + 3357542330U, // <1,3,4,7>: Cost 4 vmrglw <0,3,1,4>, <2,6,3,7> + 2685783593U, // <1,3,4,u>: Cost 3 vsldoi8 <0,2,1,3>, RHS + 2284471190U, // <1,3,5,0>: Cost 3 vmrglw <0,4,1,5>, <1,2,3,0> + 3358213015U, // <1,3,5,1>: Cost 4 vmrglw <0,4,1,5>, <1,2,3,1> + 2630116899U, // <1,3,5,2>: Cost 3 vsldoi4 <2,1,3,5>, <2,1,3,5> + 2284471922U, // <1,3,5,3>: Cost 3 vmrglw <0,4,1,5>, <2,2,3,3> + 2284471194U, // <1,3,5,4>: Cost 3 vmrglw <0,4,1,5>, <1,2,3,4> + 2284471843U, // <1,3,5,5>: Cost 3 vmrglw <0,4,1,5>, <2,1,3,5> + 3358218366U, // <1,3,5,6>: Cost 4 vmrglw <0,4,1,5>, + 2284472250U, // <1,3,5,7>: Cost 3 vmrglw <0,4,1,5>, <2,6,3,7> + 2284471198U, // <1,3,5,u>: Cost 3 vmrglw <0,4,1,5>, <1,2,3,u> + 2224752790U, // <1,3,6,0>: Cost 3 vmrghw <1,6,2,7>, <3,0,1,2> + 3832736385U, // <1,3,6,1>: Cost 4 vsldoi12 <1,2,3,1>, <3,6,1,7> + 3703866916U, // <1,3,6,2>: Cost 4 vsldoi4 <2,1,3,6>, <2,1,3,6> + 3356894834U, // <1,3,6,3>: Cost 4 vmrglw <0,2,1,6>, <2,2,3,3> + 3356894106U, // <1,3,6,4>: Cost 4 vmrglw <0,2,1,6>, <1,2,3,4> + 3356894755U, // <1,3,6,5>: Cost 5 vmrglw <0,2,1,6>, <2,1,3,5> + 3356899130U, // <1,3,6,6>: Cost 4 vmrglw <0,2,1,6>, + 2283153338U, // <1,3,6,7>: Cost 3 vmrglw <0,2,1,6>, <2,6,3,7> + 2283153338U, // <1,3,6,u>: Cost 3 vmrglw <0,2,1,6>, <2,6,3,7> + 2774035139U, // <1,3,7,0>: Cost 3 vsldoi12 <3,7,0,1>, <3,7,0,1> + 3703874767U, // <1,3,7,1>: Cost 4 vsldoi4 <2,1,3,7>, <1,6,1,7> + 3703875109U, // <1,3,7,2>: Cost 4 vsldoi4 <2,1,3,7>, <2,1,3,7> + 3365529202U, // <1,3,7,3>: Cost 4 vmrglw <1,6,1,7>, <2,2,3,3> + 3365528474U, // <1,3,7,4>: Cost 4 vmrglw <1,6,1,7>, <1,2,3,4> + 3789387159U, // <1,3,7,5>: Cost 4 vsldoi8 <5,2,1,3>, <7,5,2,1> + 3865692927U, // <1,3,7,6>: Cost 4 vsldoi12 <6,7,0,1>, <3,7,6,7> + 3363538874U, // <1,3,7,7>: Cost 4 vmrglw <1,3,1,7>, <2,6,3,7> + 2774625035U, // <1,3,7,u>: Cost 3 vsldoi12 <3,7,u,1>, <3,7,u,1> + 2284495766U, // <1,3,u,0>: Cost 3 vmrglw <0,4,1,u>, <1,2,3,0> + 2685785902U, // <1,3,u,1>: Cost 3 vsldoi8 <0,2,1,3>, LHS + 2630141478U, // <1,3,u,2>: Cost 3 vsldoi4 <2,1,3,u>, <2,1,3,u> + 2283169880U, // <1,3,u,3>: Cost 3 vmrglw <0,2,1,u>, <2,u,3,3> + 2284495770U, // <1,3,u,4>: Cost 3 vmrglw <0,4,1,u>, <1,2,3,4> + 2685786266U, // <1,3,u,5>: Cost 3 vsldoi8 <0,2,1,3>, RHS + 2222115448U, // <1,3,u,6>: Cost 3 vmrghw <1,2,3,0>, <3,6,0,7> + 2284496826U, // <1,3,u,7>: Cost 3 vmrglw <0,4,1,u>, <2,6,3,7> + 2685786469U, // <1,3,u,u>: Cost 3 vsldoi8 <0,2,1,3>, LHS + 2684461069U, // <1,4,0,0>: Cost 3 vsldoi8 <0,0,1,4>, <0,0,1,4> + 2686451814U, // <1,4,0,1>: Cost 3 vsldoi8 <0,3,1,4>, LHS + 3759530159U, // <1,4,0,2>: Cost 4 vsldoi8 <0,2,1,4>, <0,2,1,4> + 2686451968U, // <1,4,0,3>: Cost 3 vsldoi8 <0,3,1,4>, <0,3,1,4> + 2684461394U, // <1,4,0,4>: Cost 3 vsldoi8 <0,0,1,4>, <0,4,1,5> + 1701989266U, // <1,4,0,5>: Cost 2 vsldoi12 <4,0,5,1>, <4,0,5,1> + 3776119286U, // <1,4,0,6>: Cost 4 vsldoi8 <3,0,1,4>, <0,6,1,7> + 2689106500U, // <1,4,0,7>: Cost 3 vsldoi8 <0,7,1,4>, <0,7,1,4> + 1702210477U, // <1,4,0,u>: Cost 2 vsldoi12 <4,0,u,1>, <4,0,u,1> + 2221312914U, // <1,4,1,0>: Cost 3 vmrghw <1,1,1,1>, <4,0,5,1> + 2691097399U, // <1,4,1,1>: Cost 3 vsldoi8 <1,1,1,4>, <1,1,1,4> + 3760194454U, // <1,4,1,2>: Cost 4 vsldoi8 <0,3,1,4>, <1,2,3,0> + 3766166489U, // <1,4,1,3>: Cost 4 vsldoi8 <1,3,1,4>, <1,3,1,4> + 2334870736U, // <1,4,1,4>: Cost 3 vmrglw , <4,4,4,4> + 1147571510U, // <1,4,1,5>: Cost 2 vmrghw <1,1,1,1>, RHS + 3760194794U, // <1,4,1,6>: Cost 4 vsldoi8 <0,3,1,4>, <1,6,4,7> + 3867315188U, // <1,4,1,7>: Cost 4 vsldoi12 <7,0,4,1>, <4,1,7,0> + 1147571753U, // <1,4,1,u>: Cost 2 vmrghw <1,1,1,1>, RHS + 2222115730U, // <1,4,2,0>: Cost 3 vmrghw <1,2,3,0>, <4,0,5,1> + 2222115812U, // <1,4,2,1>: Cost 3 vmrghw <1,2,3,0>, <4,1,5,2> + 3760195176U, // <1,4,2,2>: Cost 4 vsldoi8 <0,3,1,4>, <2,2,2,2> + 2702378662U, // <1,4,2,3>: Cost 3 vsldoi8 <3,0,1,4>, <2,3,0,1> + 2323598544U, // <1,4,2,4>: Cost 3 vmrglw <7,0,1,2>, <4,4,4,4> + 1148374326U, // <1,4,2,5>: Cost 2 vmrghw <1,2,3,0>, RHS + 3760195514U, // <1,4,2,6>: Cost 4 vsldoi8 <0,3,1,4>, <2,6,3,7> + 3373451932U, // <1,4,2,7>: Cost 4 vmrglw <3,0,1,2>, <3,6,4,7> + 1148374569U, // <1,4,2,u>: Cost 2 vmrghw <1,2,3,0>, RHS + 2702379160U, // <1,4,3,0>: Cost 3 vsldoi8 <3,0,1,4>, <3,0,1,4> + 3760195840U, // <1,4,3,1>: Cost 4 vsldoi8 <0,3,1,4>, <3,1,4,0> + 3776121160U, // <1,4,3,2>: Cost 4 vsldoi8 <3,0,1,4>, <3,2,3,0> + 3760195996U, // <1,4,3,3>: Cost 4 vsldoi8 <0,3,1,4>, <3,3,3,3> + 2686454274U, // <1,4,3,4>: Cost 3 vsldoi8 <0,3,1,4>, <3,4,5,6> + 3356870350U, // <1,4,3,5>: Cost 4 vmrglw <0,2,1,3>, <2,3,4,5> + 3800009392U, // <1,4,3,6>: Cost 4 vsldoi8 <7,0,1,4>, <3,6,7,0> + 3366824604U, // <1,4,3,7>: Cost 5 vmrglw <1,u,1,3>, <3,6,4,7> + 2707688224U, // <1,4,3,u>: Cost 3 vsldoi8 <3,u,1,4>, <3,u,1,4> + 2775731368U, // <1,4,4,0>: Cost 3 vsldoi12 <4,0,5,1>, <4,4,0,0> + 3830820018U, // <1,4,4,1>: Cost 4 vsldoi12 <0,u,4,1>, <4,4,1,1> + 3691980454U, // <1,4,4,2>: Cost 4 vsldoi4 <0,1,4,4>, <2,3,0,1> + 3357541282U, // <1,4,4,3>: Cost 4 vmrglw <0,3,1,4>, <1,2,4,3> + 2781039824U, // <1,4,4,4>: Cost 3 vsldoi12 <4,u,5,1>, <4,4,4,4> + 2686455094U, // <1,4,4,5>: Cost 3 vsldoi8 <0,3,1,4>, RHS + 3357541528U, // <1,4,4,6>: Cost 4 vmrglw <0,3,1,4>, <1,5,4,6> + 3810627020U, // <1,4,4,7>: Cost 4 vsldoi8 , <4,7,5,4> + 2686455337U, // <1,4,4,u>: Cost 3 vsldoi8 <0,3,1,4>, RHS + 2624217190U, // <1,4,5,0>: Cost 3 vsldoi4 <1,1,4,5>, LHS + 2284470309U, // <1,4,5,1>: Cost 3 vmrglw <0,4,1,5>, <0,0,4,1> + 2618246822U, // <1,4,5,2>: Cost 3 vsldoi4 <0,1,4,5>, <2,3,0,1> + 3358212297U, // <1,4,5,3>: Cost 4 vmrglw <0,4,1,5>, <0,2,4,3> + 2284470312U, // <1,4,5,4>: Cost 3 vmrglw <0,4,1,5>, <0,0,4,4> + 2284470637U, // <1,4,5,5>: Cost 3 vmrglw <0,4,1,5>, <0,4,4,5> + 1683115318U, // <1,4,5,6>: Cost 2 vsldoi12 <0,u,1,1>, RHS + 3721851898U, // <1,4,5,7>: Cost 4 vsldoi4 <5,1,4,5>, <7,0,1,2> + 1683115336U, // <1,4,5,u>: Cost 2 vsldoi12 <0,u,1,1>, RHS + 3794039075U, // <1,4,6,0>: Cost 4 vsldoi8 <6,0,1,4>, <6,0,1,4> + 3830820186U, // <1,4,6,1>: Cost 4 vsldoi12 <0,u,4,1>, <4,6,1,7> + 3800011258U, // <1,4,6,2>: Cost 4 vsldoi8 <7,0,1,4>, <6,2,7,3> + 3807973938U, // <1,4,6,3>: Cost 4 vsldoi8 , <6,3,4,5> + 3298716880U, // <1,4,6,4>: Cost 4 vmrghw <1,6,5,7>, <4,4,4,4> + 2224680246U, // <1,4,6,5>: Cost 3 vmrghw <1,6,1,7>, RHS + 3800011576U, // <1,4,6,6>: Cost 4 vsldoi8 <7,0,1,4>, <6,6,6,6> + 2726269774U, // <1,4,6,7>: Cost 3 vsldoi8 <7,0,1,4>, <6,7,0,1> + 2224680489U, // <1,4,6,u>: Cost 3 vmrghw <1,6,1,7>, RHS + 2726269948U, // <1,4,7,0>: Cost 3 vsldoi8 <7,0,1,4>, <7,0,1,4> + 3383444141U, // <1,4,7,1>: Cost 4 vmrglw <4,6,1,7>, <0,u,4,1> + 3805983961U, // <1,4,7,2>: Cost 4 vsldoi8 , <7,2,u,0> + 3807974667U, // <1,4,7,3>: Cost 4 vsldoi8 , <7,3,4,5> + 2736887142U, // <1,4,7,4>: Cost 3 vsldoi8 , <7,4,5,6> + 3365528403U, // <1,4,7,5>: Cost 4 vmrglw <1,6,1,7>, <1,1,4,5> + 3800012308U, // <1,4,7,6>: Cost 4 vsldoi8 <7,0,1,4>, <7,6,7,0> + 3800012396U, // <1,4,7,7>: Cost 4 vsldoi8 <7,0,1,4>, <7,7,7,7> + 2731579012U, // <1,4,7,u>: Cost 3 vsldoi8 <7,u,1,4>, <7,u,1,4> + 2624241766U, // <1,4,u,0>: Cost 3 vsldoi4 <1,1,4,u>, LHS + 2686457646U, // <1,4,u,1>: Cost 3 vsldoi8 <0,3,1,4>, LHS + 2618271398U, // <1,4,u,2>: Cost 3 vsldoi4 <0,1,4,u>, <2,3,0,1> + 2734233544U, // <1,4,u,3>: Cost 3 vsldoi8 , + 2689775679U, // <1,4,u,4>: Cost 3 vsldoi8 <0,u,1,4>, + 1152355638U, // <1,4,u,5>: Cost 2 vmrghw <1,u,3,0>, RHS + 1683115561U, // <1,4,u,6>: Cost 2 vsldoi12 <0,u,1,1>, RHS + 2736888076U, // <1,4,u,7>: Cost 3 vsldoi8 , + 1683115579U, // <1,4,u,u>: Cost 2 vsldoi12 <0,u,1,1>, RHS + 2687123456U, // <1,5,0,0>: Cost 3 vsldoi8 <0,4,1,5>, <0,0,0,0> + 1613381734U, // <1,5,0,1>: Cost 2 vsldoi8 <0,4,1,5>, LHS + 3759538352U, // <1,5,0,2>: Cost 4 vsldoi8 <0,2,1,5>, <0,2,1,5> + 3760865532U, // <1,5,0,3>: Cost 4 vsldoi8 <0,4,1,5>, <0,3,1,0> + 1613381970U, // <1,5,0,4>: Cost 2 vsldoi8 <0,4,1,5>, <0,4,1,5> + 2687787427U, // <1,5,0,5>: Cost 3 vsldoi8 <0,5,1,5>, <0,5,1,5> + 2781777524U, // <1,5,0,6>: Cost 3 vsldoi12 <5,0,6,1>, <5,0,6,1> + 3733828717U, // <1,5,0,7>: Cost 4 vsldoi4 <7,1,5,0>, <7,1,5,0> + 1613382301U, // <1,5,0,u>: Cost 2 vsldoi8 <0,4,1,5>, LHS + 2781040271U, // <1,5,1,0>: Cost 3 vsldoi12 <4,u,5,1>, <5,1,0,1> + 2687124276U, // <1,5,1,1>: Cost 3 vsldoi8 <0,4,1,5>, <1,1,1,1> + 2687124374U, // <1,5,1,2>: Cost 3 vsldoi8 <0,4,1,5>, <1,2,3,0> + 3760866297U, // <1,5,1,3>: Cost 4 vsldoi8 <0,4,1,5>, <1,3,5,0> + 2693096491U, // <1,5,1,4>: Cost 3 vsldoi8 <1,4,1,5>, <1,4,1,5> + 2687124591U, // <1,5,1,5>: Cost 3 vsldoi8 <0,4,1,5>, <1,5,0,1> + 2687124723U, // <1,5,1,6>: Cost 3 vsldoi8 <0,4,1,5>, <1,6,5,7> + 3360834803U, // <1,5,1,7>: Cost 4 vmrglw <0,u,1,1>, <1,6,5,7> + 2687124860U, // <1,5,1,u>: Cost 3 vsldoi8 <0,4,1,5>, <1,u,3,0> + 2323598792U, // <1,5,2,0>: Cost 3 vmrglw <7,0,1,2>, <4,7,5,0> + 2687125027U, // <1,5,2,1>: Cost 3 vsldoi8 <0,4,1,5>, <2,1,3,5> + 2687125096U, // <1,5,2,2>: Cost 3 vsldoi8 <0,4,1,5>, <2,2,2,2> + 2687125158U, // <1,5,2,3>: Cost 3 vsldoi8 <0,4,1,5>, <2,3,0,1> + 2642185188U, // <1,5,2,4>: Cost 3 vsldoi4 <4,1,5,2>, <4,1,5,2> + 2323598554U, // <1,5,2,5>: Cost 3 vmrglw <7,0,1,2>, <4,4,5,5> + 2687125434U, // <1,5,2,6>: Cost 3 vsldoi8 <0,4,1,5>, <2,6,3,7> + 3373450483U, // <1,5,2,7>: Cost 4 vmrglw <3,0,1,2>, <1,6,5,7> + 2687125563U, // <1,5,2,u>: Cost 3 vsldoi8 <0,4,1,5>, <2,u,0,1> + 2687125654U, // <1,5,3,0>: Cost 3 vsldoi8 <0,4,1,5>, <3,0,1,2> + 2312990234U, // <1,5,3,1>: Cost 3 vmrglw <5,2,1,3>, <4,u,5,1> + 3760867649U, // <1,5,3,2>: Cost 4 vsldoi8 <0,4,1,5>, <3,2,2,2> + 2687125916U, // <1,5,3,3>: Cost 3 vsldoi8 <0,4,1,5>, <3,3,3,3> + 2687126018U, // <1,5,3,4>: Cost 3 vsldoi8 <0,4,1,5>, <3,4,5,6> + 3386731738U, // <1,5,3,5>: Cost 4 vmrglw <5,2,1,3>, <4,4,5,5> + 3356871170U, // <1,5,3,6>: Cost 4 vmrglw <0,2,1,3>, <3,4,5,6> + 3808643779U, // <1,5,3,7>: Cost 4 vsldoi8 , <3,7,0,1> + 2687126302U, // <1,5,3,u>: Cost 3 vsldoi8 <0,4,1,5>, <3,u,1,2> + 2642198630U, // <1,5,4,0>: Cost 3 vsldoi4 <4,1,5,4>, LHS + 2687126498U, // <1,5,4,1>: Cost 3 vsldoi8 <0,4,1,5>, <4,1,5,0> + 3715941923U, // <1,5,4,2>: Cost 4 vsldoi4 <4,1,5,4>, <2,1,3,5> + 3709970701U, // <1,5,4,3>: Cost 4 vsldoi4 <3,1,5,4>, <3,1,5,4> + 2687126736U, // <1,5,4,4>: Cost 3 vsldoi8 <0,4,1,5>, <4,4,4,4> + 1613385014U, // <1,5,4,5>: Cost 2 vsldoi8 <0,4,1,5>, RHS + 2283801090U, // <1,5,4,6>: Cost 3 vmrglw <0,3,1,4>, <3,4,5,6> + 3733861489U, // <1,5,4,7>: Cost 4 vsldoi4 <7,1,5,4>, <7,1,5,4> + 1613385257U, // <1,5,4,u>: Cost 2 vsldoi8 <0,4,1,5>, RHS + 2624290918U, // <1,5,5,0>: Cost 3 vsldoi4 <1,1,5,5>, LHS + 2624291676U, // <1,5,5,1>: Cost 3 vsldoi4 <1,1,5,5>, <1,1,5,5> + 3698034211U, // <1,5,5,2>: Cost 4 vsldoi4 <1,1,5,5>, <2,1,3,5> + 2284471211U, // <1,5,5,3>: Cost 3 vmrglw <0,4,1,5>, <1,2,5,3> + 2624294198U, // <1,5,5,4>: Cost 3 vsldoi4 <1,1,5,5>, RHS + 2284471132U, // <1,5,5,5>: Cost 3 vmrglw <0,4,1,5>, <1,1,5,5> + 2284472834U, // <1,5,5,6>: Cost 3 vmrglw <0,4,1,5>, <3,4,5,6> + 2284471539U, // <1,5,5,7>: Cost 3 vmrglw <0,4,1,5>, <1,6,5,7> + 2284471216U, // <1,5,5,u>: Cost 3 vmrglw <0,4,1,5>, <1,2,5,u> + 2785316900U, // <1,5,6,0>: Cost 3 vsldoi12 <5,6,0,1>, <5,6,0,1> + 2781040691U, // <1,5,6,1>: Cost 3 vsldoi12 <4,u,5,1>, <5,6,1,7> + 2734903802U, // <1,5,6,2>: Cost 3 vsldoi8 , <6,2,7,3> + 3848736834U, // <1,5,6,3>: Cost 4 vsldoi12 <3,u,4,1>, <5,6,3,4> + 3298717620U, // <1,5,6,4>: Cost 4 vmrghw <1,6,5,7>, <5,4,5,6> + 3298717700U, // <1,5,6,5>: Cost 4 vmrghw <1,6,5,7>, <5,5,5,5> + 2734904120U, // <1,5,6,6>: Cost 3 vsldoi8 , <6,6,6,6> + 2781040738U, // <1,5,6,7>: Cost 3 vsldoi12 <4,u,5,1>, <5,6,7,0> + 2781040747U, // <1,5,6,u>: Cost 3 vsldoi12 <4,u,5,1>, <5,6,u,0> + 2734904314U, // <1,5,7,0>: Cost 3 vsldoi8 , <7,0,1,2> + 2315677210U, // <1,5,7,1>: Cost 3 vmrglw <5,6,1,7>, <4,u,5,1> + 3808646292U, // <1,5,7,2>: Cost 4 vsldoi8 , <7,2,0,3> + 3808646371U, // <1,5,7,3>: Cost 4 vsldoi8 , <7,3,0,1> + 2734904678U, // <1,5,7,4>: Cost 3 vsldoi8 , <7,4,5,6> + 3389418714U, // <1,5,7,5>: Cost 4 vmrglw <5,6,1,7>, <4,4,5,5> + 3365528656U, // <1,5,7,6>: Cost 4 vmrglw <1,6,1,7>, <1,4,5,6> + 2734904940U, // <1,5,7,7>: Cost 3 vsldoi8 , <7,7,7,7> + 2734904962U, // <1,5,7,u>: Cost 3 vsldoi8 , <7,u,1,2> + 2687129299U, // <1,5,u,0>: Cost 3 vsldoi8 <0,4,1,5>, + 1613387566U, // <1,5,u,1>: Cost 2 vsldoi8 <0,4,1,5>, LHS + 2687129480U, // <1,5,u,2>: Cost 3 vsldoi8 <0,4,1,5>, + 2687129532U, // <1,5,u,3>: Cost 3 vsldoi8 <0,4,1,5>, + 1661163546U, // <1,5,u,4>: Cost 2 vsldoi8 , + 1613387930U, // <1,5,u,5>: Cost 2 vsldoi8 <0,4,1,5>, RHS + 2687129808U, // <1,5,u,6>: Cost 3 vsldoi8 <0,4,1,5>, + 2781040900U, // <1,5,u,7>: Cost 3 vsldoi12 <4,u,5,1>, <5,u,7,0> + 1613388133U, // <1,5,u,u>: Cost 2 vsldoi8 <0,4,1,5>, LHS + 3759546368U, // <1,6,0,0>: Cost 4 vsldoi8 <0,2,1,6>, <0,0,0,0> + 2685804646U, // <1,6,0,1>: Cost 3 vsldoi8 <0,2,1,6>, LHS + 2685804721U, // <1,6,0,2>: Cost 3 vsldoi8 <0,2,1,6>, <0,2,1,6> + 3861270834U, // <1,6,0,3>: Cost 4 vsldoi12 <6,0,3,1>, <6,0,3,1> + 3759546706U, // <1,6,0,4>: Cost 4 vsldoi8 <0,2,1,6>, <0,4,1,5> + 2687795620U, // <1,6,0,5>: Cost 3 vsldoi8 <0,5,1,6>, <0,5,1,6> + 2688459253U, // <1,6,0,6>: Cost 3 vsldoi8 <0,6,1,6>, <0,6,1,6> + 2283769142U, // <1,6,0,7>: Cost 3 vmrglw <0,3,1,0>, RHS + 2685805213U, // <1,6,0,u>: Cost 3 vsldoi8 <0,2,1,6>, LHS + 3698073702U, // <1,6,1,0>: Cost 4 vsldoi4 <1,1,6,1>, LHS + 3759547188U, // <1,6,1,1>: Cost 4 vsldoi8 <0,2,1,6>, <1,1,1,1> + 2221314554U, // <1,6,1,2>: Cost 3 vmrghw <1,1,1,1>, <6,2,7,3> + 3759547401U, // <1,6,1,3>: Cost 4 vsldoi8 <0,2,1,6>, <1,3,6,7> + 3698076982U, // <1,6,1,4>: Cost 4 vsldoi4 <1,1,6,1>, RHS + 3767510141U, // <1,6,1,5>: Cost 4 vsldoi8 <1,5,1,6>, <1,5,1,6> + 2334872376U, // <1,6,1,6>: Cost 3 vmrglw , <6,6,6,6> + 1213353270U, // <1,6,1,7>: Cost 2 vmrglw <0,u,1,1>, RHS + 1213353271U, // <1,6,1,u>: Cost 2 vmrglw <0,u,1,1>, RHS + 3704053862U, // <1,6,2,0>: Cost 4 vsldoi4 <2,1,6,2>, LHS + 3759547961U, // <1,6,2,1>: Cost 4 vsldoi8 <0,2,1,6>, <2,1,6,0> + 2222117370U, // <1,6,2,2>: Cost 3 vmrghw <1,2,3,0>, <6,2,7,3> + 3759548070U, // <1,6,2,3>: Cost 4 vsldoi8 <0,2,1,6>, <2,3,0,1> + 3704057142U, // <1,6,2,4>: Cost 4 vsldoi4 <2,1,6,2>, RHS + 3373451057U, // <1,6,2,5>: Cost 4 vmrglw <3,0,1,2>, <2,4,6,5> + 2685806522U, // <1,6,2,6>: Cost 3 vsldoi8 <0,2,1,6>, <2,6,3,7> + 1225968950U, // <1,6,2,7>: Cost 2 vmrglw <3,0,1,2>, RHS + 1225968951U, // <1,6,2,u>: Cost 2 vmrglw <3,0,1,2>, RHS + 3759548566U, // <1,6,3,0>: Cost 4 vsldoi8 <0,2,1,6>, <3,0,1,2> + 3842912793U, // <1,6,3,1>: Cost 4 vsldoi12 <2,u,6,1>, <6,3,1,7> + 3759548774U, // <1,6,3,2>: Cost 4 vsldoi8 <0,2,1,6>, <3,2,6,3> + 3759548828U, // <1,6,3,3>: Cost 4 vsldoi8 <0,2,1,6>, <3,3,3,3> + 3759548930U, // <1,6,3,4>: Cost 4 vsldoi8 <0,2,1,6>, <3,4,5,6> + 3809315421U, // <1,6,3,5>: Cost 4 vsldoi8 , <3,5,6,7> + 3386733368U, // <1,6,3,6>: Cost 4 vmrglw <5,2,1,3>, <6,6,6,6> + 2283130166U, // <1,6,3,7>: Cost 3 vmrglw <0,2,1,3>, RHS + 2283130167U, // <1,6,3,u>: Cost 3 vmrglw <0,2,1,3>, RHS + 3704070246U, // <1,6,4,0>: Cost 4 vsldoi4 <2,1,6,4>, LHS + 3862229608U, // <1,6,4,1>: Cost 4 vsldoi12 <6,1,7,1>, <6,4,1,5> + 3704071741U, // <1,6,4,2>: Cost 4 vsldoi4 <2,1,6,4>, <2,1,6,4> + 3721988610U, // <1,6,4,3>: Cost 4 vsldoi4 <5,1,6,4>, <3,4,5,6> + 3704073526U, // <1,6,4,4>: Cost 4 vsldoi4 <2,1,6,4>, RHS + 2685807926U, // <1,6,4,5>: Cost 3 vsldoi8 <0,2,1,6>, RHS + 3865621141U, // <1,6,4,6>: Cost 4 vsldoi12 <6,6,u,1>, <6,4,6,5> + 2283801910U, // <1,6,4,7>: Cost 3 vmrglw <0,3,1,4>, RHS + 2685808169U, // <1,6,4,u>: Cost 3 vsldoi8 <0,2,1,6>, RHS + 3710050406U, // <1,6,5,0>: Cost 4 vsldoi4 <3,1,6,5>, LHS + 3710051571U, // <1,6,5,1>: Cost 4 vsldoi4 <3,1,6,5>, <1,6,5,7> + 3405989597U, // <1,6,5,2>: Cost 4 vmrglw , <2,3,6,2> + 3358214502U, // <1,6,5,3>: Cost 4 vmrglw <0,4,1,5>, <3,2,6,3> + 3710053686U, // <1,6,5,4>: Cost 4 vsldoi4 <3,1,6,5>, RHS + 3721998025U, // <1,6,5,5>: Cost 4 vsldoi4 <5,1,6,5>, <5,1,6,5> + 2332250936U, // <1,6,5,6>: Cost 3 vmrglw , <6,6,6,6> + 1210731830U, // <1,6,5,7>: Cost 2 vmrglw <0,4,1,5>, RHS + 1210731831U, // <1,6,5,u>: Cost 2 vmrglw <0,4,1,5>, RHS + 2791289597U, // <1,6,6,0>: Cost 3 vsldoi12 <6,6,0,1>, <6,6,0,1> + 3698115430U, // <1,6,6,1>: Cost 4 vsldoi4 <1,1,6,6>, <1,1,6,6> + 3698116538U, // <1,6,6,2>: Cost 4 vsldoi4 <1,1,6,6>, <2,6,3,7> + 3356894132U, // <1,6,6,3>: Cost 4 vmrglw <0,2,1,6>, <1,2,6,3> + 3698117942U, // <1,6,6,4>: Cost 4 vsldoi4 <1,1,6,6>, RHS + 3722006218U, // <1,6,6,5>: Cost 4 vsldoi4 <5,1,6,6>, <5,1,6,6> + 2781041464U, // <1,6,6,6>: Cost 3 vsldoi12 <4,u,5,1>, <6,6,6,6> + 2283154742U, // <1,6,6,7>: Cost 3 vmrglw <0,2,1,6>, RHS + 2283154743U, // <1,6,6,u>: Cost 3 vmrglw <0,2,1,6>, RHS + 1718211406U, // <1,6,7,0>: Cost 2 vsldoi12 <6,7,0,1>, <6,7,0,1> + 2792026967U, // <1,6,7,1>: Cost 3 vsldoi12 <6,7,1,1>, <6,7,1,1> + 2765411170U, // <1,6,7,2>: Cost 3 vsldoi12 <2,3,0,1>, <6,7,2,3> + 3854783336U, // <1,6,7,3>: Cost 4 vsldoi12 <4,u,5,1>, <6,7,3,0> + 2781041526U, // <1,6,7,4>: Cost 3 vsldoi12 <4,u,5,1>, <6,7,4,5> + 3365528664U, // <1,6,7,5>: Cost 4 vmrglw <1,6,1,7>, <1,4,6,5> + 2791953290U, // <1,6,7,6>: Cost 3 vsldoi12 <6,7,0,1>, <6,7,6,7> + 2291789110U, // <1,6,7,7>: Cost 3 vmrglw <1,6,1,7>, RHS + 1718801302U, // <1,6,7,u>: Cost 2 vsldoi12 <6,7,u,1>, <6,7,u,1> + 1718875039U, // <1,6,u,0>: Cost 2 vsldoi12 <6,u,0,1>, <6,u,0,1> + 2685810478U, // <1,6,u,1>: Cost 3 vsldoi8 <0,2,1,6>, LHS + 2792764337U, // <1,6,u,2>: Cost 3 vsldoi12 <6,u,2,1>, <6,u,2,1> + 3759552444U, // <1,6,u,3>: Cost 4 vsldoi8 <0,2,1,6>, + 2781041607U, // <1,6,u,4>: Cost 3 vsldoi12 <4,u,5,1>, <6,u,4,5> + 2685810842U, // <1,6,u,5>: Cost 3 vsldoi8 <0,2,1,6>, RHS + 2689792208U, // <1,6,u,6>: Cost 3 vsldoi8 <0,u,1,6>, + 1210756406U, // <1,6,u,7>: Cost 2 vmrglw <0,4,1,u>, RHS + 1210756407U, // <1,6,u,u>: Cost 2 vmrglw <0,4,1,u>, RHS + 2793280496U, // <1,7,0,0>: Cost 3 vsldoi12 <7,0,0,1>, <7,0,0,1> + 2694439014U, // <1,7,0,1>: Cost 3 vsldoi8 <1,6,1,7>, LHS + 3393343912U, // <1,7,0,2>: Cost 4 vmrglw <6,3,1,0>, <6,1,7,2> + 3397325306U, // <1,7,0,3>: Cost 4 vmrglw <7,0,1,0>, <6,2,7,3> + 2793575444U, // <1,7,0,4>: Cost 3 vsldoi12 <7,0,4,1>, <7,0,4,1> + 3722030797U, // <1,7,0,5>: Cost 4 vsldoi4 <5,1,7,0>, <5,1,7,0> + 2688467446U, // <1,7,0,6>: Cost 3 vsldoi8 <0,6,1,7>, <0,6,1,7> + 2689131079U, // <1,7,0,7>: Cost 3 vsldoi8 <0,7,1,7>, <0,7,1,7> + 2694439570U, // <1,7,0,u>: Cost 3 vsldoi8 <1,6,1,7>, <0,u,1,1> + 2654265354U, // <1,7,1,0>: Cost 3 vsldoi4 <6,1,7,1>, <0,0,1,1> + 2794017866U, // <1,7,1,1>: Cost 3 vsldoi12 <7,1,1,1>, <7,1,1,1> + 3768181639U, // <1,7,1,2>: Cost 4 vsldoi8 <1,6,1,7>, <1,2,1,3> + 2334872058U, // <1,7,1,3>: Cost 3 vmrglw , <6,2,7,3> + 2654268726U, // <1,7,1,4>: Cost 3 vsldoi4 <6,1,7,1>, RHS + 3792069797U, // <1,7,1,5>: Cost 4 vsldoi8 <5,6,1,7>, <1,5,6,1> + 2694440143U, // <1,7,1,6>: Cost 3 vsldoi8 <1,6,1,7>, <1,6,1,7> + 2334872386U, // <1,7,1,7>: Cost 3 vmrglw , <6,6,7,7> + 2695767409U, // <1,7,1,u>: Cost 3 vsldoi8 <1,u,1,7>, <1,u,1,7> + 2654273638U, // <1,7,2,0>: Cost 3 vsldoi4 <6,1,7,2>, LHS + 2222117973U, // <1,7,2,1>: Cost 3 vmrghw <1,2,3,0>, <7,1,2,3> + 2299711912U, // <1,7,2,2>: Cost 3 vmrglw <3,0,1,2>, <6,1,7,2> + 2654275734U, // <1,7,2,3>: Cost 3 vsldoi4 <6,1,7,2>, <3,0,1,2> + 2654276918U, // <1,7,2,4>: Cost 3 vsldoi4 <6,1,7,2>, RHS + 3385397675U, // <1,7,2,5>: Cost 4 vmrglw <5,0,1,2>, <6,1,7,5> + 2654278056U, // <1,7,2,6>: Cost 3 vsldoi4 <6,1,7,2>, <6,1,7,2> + 2323599627U, // <1,7,2,7>: Cost 3 vmrglw <7,0,1,2>, <5,u,7,7> + 2654279470U, // <1,7,2,u>: Cost 3 vsldoi4 <6,1,7,2>, LHS + 2795271395U, // <1,7,3,0>: Cost 3 vsldoi12 <7,3,0,1>, <7,3,0,1> + 3768183059U, // <1,7,3,1>: Cost 4 vsldoi8 <1,6,1,7>, <3,1,6,1> + 3728025254U, // <1,7,3,2>: Cost 4 vsldoi4 <6,1,7,3>, <2,3,0,1> + 3768183196U, // <1,7,3,3>: Cost 4 vsldoi8 <1,6,1,7>, <3,3,3,3> + 3768183298U, // <1,7,3,4>: Cost 4 vsldoi8 <1,6,1,7>, <3,4,5,6> + 3792071255U, // <1,7,3,5>: Cost 4 vsldoi8 <5,6,1,7>, <3,5,6,1> + 3780127361U, // <1,7,3,6>: Cost 4 vsldoi8 <3,6,1,7>, <3,6,1,7> + 3847779617U, // <1,7,3,7>: Cost 4 vsldoi12 <3,7,0,1>, <7,3,7,0> + 2795861291U, // <1,7,3,u>: Cost 3 vsldoi12 <7,3,u,1>, <7,3,u,1> + 2795935028U, // <1,7,4,0>: Cost 3 vsldoi12 <7,4,0,1>, <7,4,0,1> + 3728032975U, // <1,7,4,1>: Cost 4 vsldoi4 <6,1,7,4>, <1,6,1,7> + 3839153480U, // <1,7,4,2>: Cost 4 vsldoi12 <2,3,0,1>, <7,4,2,3> + 3397358074U, // <1,7,4,3>: Cost 4 vmrglw <7,0,1,4>, <6,2,7,3> + 3854783835U, // <1,7,4,4>: Cost 4 vsldoi12 <4,u,5,1>, <7,4,4,4> + 2694442294U, // <1,7,4,5>: Cost 3 vsldoi8 <1,6,1,7>, RHS + 3786100058U, // <1,7,4,6>: Cost 4 vsldoi8 <4,6,1,7>, <4,6,1,7> + 3722065254U, // <1,7,4,7>: Cost 4 vsldoi4 <5,1,7,4>, <7,4,5,6> + 2694442537U, // <1,7,4,u>: Cost 3 vsldoi8 <1,6,1,7>, RHS + 2654298214U, // <1,7,5,0>: Cost 3 vsldoi4 <6,1,7,5>, LHS + 3854783893U, // <1,7,5,1>: Cost 4 vsldoi12 <4,u,5,1>, <7,5,1,u> + 3710126010U, // <1,7,5,2>: Cost 4 vsldoi4 <3,1,7,5>, <2,6,3,7> + 2332250618U, // <1,7,5,3>: Cost 3 vmrglw , <6,2,7,3> + 2654301494U, // <1,7,5,4>: Cost 3 vsldoi4 <6,1,7,5>, RHS + 2284474795U, // <1,7,5,5>: Cost 3 vmrglw <0,4,1,5>, <6,1,7,5> + 2718330931U, // <1,7,5,6>: Cost 3 vsldoi8 <5,6,1,7>, <5,6,1,7> + 2332250946U, // <1,7,5,7>: Cost 3 vmrglw , <6,6,7,7> + 2719658197U, // <1,7,5,u>: Cost 3 vsldoi8 <5,u,1,7>, <5,u,1,7> + 2332921954U, // <1,7,6,0>: Cost 3 vmrglw , <5,6,7,0> + 3768185254U, // <1,7,6,1>: Cost 4 vsldoi8 <1,6,1,7>, <6,1,7,0> + 3710134202U, // <1,7,6,2>: Cost 4 vsldoi4 <3,1,7,6>, <2,6,3,7> + 3710134561U, // <1,7,6,3>: Cost 4 vsldoi4 <3,1,7,6>, <3,1,7,6> + 3710135606U, // <1,7,6,4>: Cost 4 vsldoi4 <3,1,7,6>, RHS + 3864884745U, // <1,7,6,5>: Cost 4 vsldoi12 <6,5,7,1>, <7,6,5,7> + 3854784017U, // <1,7,6,6>: Cost 4 vsldoi12 <4,u,5,1>, <7,6,6,6> + 2791953940U, // <1,7,6,7>: Cost 3 vsldoi12 <6,7,0,1>, <7,6,7,0> + 2792617501U, // <1,7,6,u>: Cost 3 vsldoi12 <6,u,0,1>, <7,6,u,0> + 2797925927U, // <1,7,7,0>: Cost 3 vsldoi12 <7,7,0,1>, <7,7,0,1> + 3365528426U, // <1,7,7,1>: Cost 4 vmrglw <1,6,1,7>, <1,1,7,1> + 3728058022U, // <1,7,7,2>: Cost 4 vsldoi4 <6,1,7,7>, <2,3,0,1> + 3365528509U, // <1,7,7,3>: Cost 4 vmrglw <1,6,1,7>, <1,2,7,3> + 3854784079U, // <1,7,7,4>: Cost 4 vsldoi12 <4,u,5,1>, <7,7,4,5> + 3722088148U, // <1,7,7,5>: Cost 4 vsldoi4 <5,1,7,7>, <5,1,7,7> + 3728060845U, // <1,7,7,6>: Cost 4 vsldoi4 <6,1,7,7>, <6,1,7,7> + 2781042284U, // <1,7,7,7>: Cost 3 vsldoi12 <4,u,5,1>, <7,7,7,7> + 2798515823U, // <1,7,7,u>: Cost 3 vsldoi12 <7,7,u,1>, <7,7,u,1> + 2654322705U, // <1,7,u,0>: Cost 3 vsldoi4 <6,1,7,u>, <0,0,1,u> + 2694444846U, // <1,7,u,1>: Cost 3 vsldoi8 <1,6,1,7>, LHS + 2299711912U, // <1,7,u,2>: Cost 3 vmrglw <3,0,1,2>, <6,1,7,2> + 2323649018U, // <1,7,u,3>: Cost 3 vmrglw <7,0,1,u>, <6,2,7,3> + 2654326070U, // <1,7,u,4>: Cost 3 vsldoi4 <6,1,7,u>, RHS + 2694445210U, // <1,7,u,5>: Cost 3 vsldoi8 <1,6,1,7>, RHS + 2654327214U, // <1,7,u,6>: Cost 3 vsldoi4 <6,1,7,u>, <6,1,7,u> + 2323649346U, // <1,7,u,7>: Cost 3 vmrglw <7,0,1,u>, <6,6,7,7> + 2694445413U, // <1,7,u,u>: Cost 3 vsldoi8 <1,6,1,7>, LHS + 1610752017U, // <1,u,0,0>: Cost 2 vsldoi8 <0,0,1,u>, <0,0,1,u> + 1613406310U, // <1,u,0,1>: Cost 2 vsldoi8 <0,4,1,u>, LHS + 2685821107U, // <1,u,0,2>: Cost 3 vsldoi8 <0,2,1,u>, <0,2,1,u> + 2283765916U, // <1,u,0,3>: Cost 3 vmrglw <0,3,1,0>, LHS + 1613406549U, // <1,u,0,4>: Cost 2 vsldoi8 <0,4,1,u>, <0,4,1,u> + 1725880054U, // <1,u,0,5>: Cost 2 vsldoi12 , + 2688475639U, // <1,u,0,6>: Cost 3 vsldoi8 <0,6,1,u>, <0,6,1,u> + 2283769160U, // <1,u,0,7>: Cost 3 vmrglw <0,3,1,0>, RHS + 1613406877U, // <1,u,0,u>: Cost 2 vsldoi8 <0,4,1,u>, LHS + 1550221414U, // <1,u,1,0>: Cost 2 vsldoi4 <1,1,1,1>, LHS + 269271142U, // <1,u,1,1>: Cost 1 vspltisw1 LHS + 1683117870U, // <1,u,1,2>: Cost 2 vsldoi12 <0,u,1,1>, LHS + 1213350044U, // <1,u,1,3>: Cost 2 vmrglw <0,u,1,1>, LHS + 1550224694U, // <1,u,1,4>: Cost 2 vsldoi4 <1,1,1,1>, RHS + 1147574426U, // <1,u,1,5>: Cost 2 vmrghw <1,1,1,1>, RHS + 2687149326U, // <1,u,1,6>: Cost 3 vsldoi8 <0,4,1,u>, <1,6,u,7> + 1213353288U, // <1,u,1,7>: Cost 2 vmrglw <0,u,1,1>, RHS + 269271142U, // <1,u,1,u>: Cost 1 vspltisw1 LHS + 2222118611U, // <1,u,2,0>: Cost 3 vmrghw <1,2,3,0>, + 1148376878U, // <1,u,2,1>: Cost 2 vmrghw <1,2,3,0>, LHS + 1148371862U, // <1,u,2,2>: Cost 2 vmrghw <1,2,3,0>, <1,2,3,0> + 1225965724U, // <1,u,2,3>: Cost 2 vmrglw <3,0,1,2>, LHS + 2222118975U, // <1,u,2,4>: Cost 3 vmrghw <1,2,3,0>, + 1148377242U, // <1,u,2,5>: Cost 2 vmrghw <1,2,3,0>, RHS + 2687150010U, // <1,u,2,6>: Cost 3 vsldoi8 <0,4,1,u>, <2,6,3,7> + 1225968968U, // <1,u,2,7>: Cost 2 vmrglw <3,0,1,2>, RHS + 1148377445U, // <1,u,2,u>: Cost 2 vmrghw <1,2,3,0>, LHS + 471040156U, // <1,u,3,0>: Cost 1 vsldoi4 LHS, LHS + 1544782644U, // <1,u,3,1>: Cost 2 vsldoi4 LHS, <1,1,1,1> + 1544783464U, // <1,u,3,2>: Cost 2 vsldoi4 LHS, <2,2,2,2> + 1544784022U, // <1,u,3,3>: Cost 2 vsldoi4 LHS, <3,0,1,2> + 471043382U, // <1,u,3,4>: Cost 1 vsldoi4 LHS, RHS + 1592561668U, // <1,u,3,5>: Cost 2 vsldoi4 LHS, <5,5,5,5> + 1592562170U, // <1,u,3,6>: Cost 2 vsldoi4 LHS, <6,2,7,3> + 1592562682U, // <1,u,3,7>: Cost 2 vsldoi4 LHS, <7,0,1,2> + 471045934U, // <1,u,3,u>: Cost 1 vsldoi4 LHS, LHS + 2708384629U, // <1,u,4,0>: Cost 3 vsldoi8 <4,0,1,u>, <4,0,1,u> + 2687151101U, // <1,u,4,1>: Cost 3 vsldoi8 <0,4,1,u>, <4,1,u,0> + 2223408022U, // <1,u,4,2>: Cost 3 vmrghw <1,4,2,5>, <1,2,3,0> + 2283798684U, // <1,u,4,3>: Cost 3 vmrglw <0,3,1,4>, LHS + 2642422785U, // <1,u,4,4>: Cost 3 vsldoi4 <4,1,u,4>, <4,1,u,4> + 1613409590U, // <1,u,4,5>: Cost 2 vsldoi8 <0,4,1,u>, RHS + 2283801090U, // <1,u,4,6>: Cost 3 vmrglw <0,3,1,4>, <3,4,5,6> + 2283801928U, // <1,u,4,7>: Cost 3 vmrglw <0,3,1,4>, RHS + 1613409833U, // <1,u,4,u>: Cost 2 vsldoi8 <0,4,1,u>, RHS + 2284471235U, // <1,u,5,0>: Cost 3 vmrglw <0,4,1,5>, <1,2,u,0> + 2284472046U, // <1,u,5,1>: Cost 3 vmrglw <0,4,1,5>, <2,3,u,1> + 2284472533U, // <1,u,5,2>: Cost 3 vmrglw <0,4,1,5>, <3,0,u,2> + 1210728604U, // <1,u,5,3>: Cost 2 vmrglw <0,4,1,5>, LHS + 2284471239U, // <1,u,5,4>: Cost 3 vmrglw <0,4,1,5>, <1,2,u,4> + 1210728786U, // <1,u,5,5>: Cost 2 vmrglw <0,4,1,5>, <0,4,1,5> + 1683118234U, // <1,u,5,6>: Cost 2 vsldoi12 <0,u,1,1>, RHS + 1210731848U, // <1,u,5,7>: Cost 2 vmrglw <0,4,1,5>, RHS + 1210728609U, // <1,u,5,u>: Cost 2 vmrglw <0,4,1,5>, LHS + 2720330023U, // <1,u,6,0>: Cost 3 vsldoi8 <6,0,1,u>, <6,0,1,u> + 2757376190U, // <1,u,6,1>: Cost 3 vsldoi12 <0,u,u,1>, + 2726302202U, // <1,u,6,2>: Cost 3 vsldoi8 <7,0,1,u>, <6,2,7,3> + 2283151516U, // <1,u,6,3>: Cost 3 vmrglw <0,2,1,6>, LHS + 2224972114U, // <1,u,6,4>: Cost 3 vmrghw <1,6,5,7>, <0,4,1,5> + 2224683162U, // <1,u,6,5>: Cost 3 vmrghw <1,6,1,7>, RHS + 2726302520U, // <1,u,6,6>: Cost 3 vsldoi8 <7,0,1,u>, <6,6,6,6> + 2283154760U, // <1,u,6,7>: Cost 3 vmrglw <0,2,1,6>, RHS + 2283151521U, // <1,u,6,u>: Cost 3 vmrglw <0,2,1,6>, LHS + 1652560896U, // <1,u,7,0>: Cost 2 vsldoi8 <7,0,1,u>, <7,0,1,u> + 2333590225U, // <1,u,7,1>: Cost 3 vmrglw , <0,u,u,1> + 2765412628U, // <1,u,7,2>: Cost 3 vsldoi12 <2,3,0,1>, + 2291785884U, // <1,u,7,3>: Cost 3 vmrglw <1,6,1,7>, LHS + 2781042984U, // <1,u,7,4>: Cost 3 vsldoi12 <4,u,5,1>, + 3365527953U, // <1,u,7,5>: Cost 4 vmrglw <1,6,1,7>, <0,4,u,5> + 2791954748U, // <1,u,7,6>: Cost 3 vsldoi12 <6,7,0,1>, + 2291789128U, // <1,u,7,7>: Cost 3 vmrglw <1,6,1,7>, RHS + 1657869960U, // <1,u,7,u>: Cost 2 vsldoi8 <7,u,1,u>, <7,u,1,u> + 471081121U, // <1,u,u,0>: Cost 1 vsldoi4 LHS, LHS + 269271142U, // <1,u,u,1>: Cost 1 vspltisw1 LHS + 1544824424U, // <1,u,u,2>: Cost 2 vsldoi4 LHS, <2,2,2,2> + 1544824982U, // <1,u,u,3>: Cost 2 vsldoi4 LHS, <3,0,1,2> + 471084342U, // <1,u,u,4>: Cost 1 vsldoi4 LHS, RHS + 1613412506U, // <1,u,u,5>: Cost 2 vsldoi8 <0,4,1,u>, RHS + 1683118477U, // <1,u,u,6>: Cost 2 vsldoi12 <0,u,1,1>, RHS + 1210756424U, // <1,u,u,7>: Cost 2 vmrglw <0,4,1,u>, RHS + 471086894U, // <1,u,u,u>: Cost 1 vsldoi4 LHS, LHS + 2226757632U, // <2,0,0,0>: Cost 3 vmrghw <2,0,3,0>, <0,0,0,0> + 2226757734U, // <2,0,0,1>: Cost 3 vmrghw <2,0,3,0>, LHS + 3826622483U, // <2,0,0,2>: Cost 4 vsldoi12 <0,2,1,2>, <0,0,2,1> + 3843211292U, // <2,0,0,3>: Cost 4 vsldoi12 <3,0,1,2>, <0,0,3,1> + 3300499794U, // <2,0,0,4>: Cost 4 vmrghw <2,0,3,0>, <0,4,1,5> + 3356256724U, // <2,0,0,5>: Cost 4 vmrglw <0,1,2,0>, <3,4,0,5> + 3825664056U, // <2,0,0,6>: Cost 4 vsldoi12 <0,0,6,2>, <0,0,6,2> + 3762889289U, // <2,0,0,7>: Cost 4 vsldoi8 <0,7,2,0>, <0,7,2,0> + 2226758301U, // <2,0,0,u>: Cost 3 vmrghw <2,0,3,0>, LHS + 2227429386U, // <2,0,1,0>: Cost 3 vmrghw <2,1,3,1>, <0,0,1,1> + 2227429478U, // <2,0,1,1>: Cost 3 vmrghw <2,1,3,1>, LHS + 1691156582U, // <2,0,1,2>: Cost 2 vsldoi12 <2,2,2,2>, LHS + 2666358997U, // <2,0,1,3>: Cost 3 vsldoi4 , <3,0,u,2> + 2227462482U, // <2,0,1,4>: Cost 3 vmrghw <2,1,3,5>, <0,4,1,5> + 3722186464U, // <2,0,1,5>: Cost 4 vsldoi4 <5,2,0,1>, <5,2,0,1> + 3867099278U, // <2,0,1,6>: Cost 4 vsldoi12 <7,0,1,2>, <0,1,6,7> + 3366881912U, // <2,0,1,7>: Cost 4 vmrglw <1,u,2,1>, <3,6,0,7> + 1691156636U, // <2,0,1,u>: Cost 2 vsldoi12 <2,2,2,2>, LHS + 2228027392U, // <2,0,2,0>: Cost 3 vmrghw <2,2,2,2>, <0,0,0,0> + 1154285670U, // <2,0,2,1>: Cost 2 vmrghw <2,2,2,2>, LHS + 2228027565U, // <2,0,2,2>: Cost 3 vmrghw <2,2,2,2>, <0,2,1,2> + 3301769468U, // <2,0,2,3>: Cost 4 vmrghw <2,2,2,2>, <0,3,1,0> + 2228027730U, // <2,0,2,4>: Cost 3 vmrghw <2,2,2,2>, <0,4,1,5> + 3301769635U, // <2,0,2,5>: Cost 4 vmrghw <2,2,2,2>, <0,5,1,5> + 3780806586U, // <2,0,2,6>: Cost 4 vsldoi8 <3,7,2,0>, <2,6,3,7> + 3368880760U, // <2,0,2,7>: Cost 4 vmrglw <2,2,2,2>, <3,6,0,7> + 1154286237U, // <2,0,2,u>: Cost 2 vmrghw <2,2,2,2>, LHS + 1213440000U, // <2,0,3,0>: Cost 2 vmrglw LHS, <0,0,0,0> + 1213441702U, // <2,0,3,1>: Cost 2 vmrglw LHS, <2,3,0,1> + 2228535470U, // <2,0,3,2>: Cost 3 vmrghw <2,3,0,1>, <0,2,1,3> + 2636515632U, // <2,0,3,3>: Cost 3 vsldoi4 <3,2,0,3>, <3,2,0,3> + 2287182962U, // <2,0,3,4>: Cost 3 vmrglw LHS, <1,5,0,4> + 2660405346U, // <2,0,3,5>: Cost 3 vsldoi4 <7,2,0,3>, <5,6,7,0> + 2228535798U, // <2,0,3,6>: Cost 3 vmrghw <2,3,0,1>, <0,6,1,7> + 2660406420U, // <2,0,3,7>: Cost 3 vsldoi4 <7,2,0,3>, <7,2,0,3> + 1213441709U, // <2,0,3,u>: Cost 2 vmrglw LHS, <2,3,0,u> + 3368894464U, // <2,0,4,0>: Cost 4 vmrglw <2,2,2,4>, <0,0,0,0> + 2764898642U, // <2,0,4,1>: Cost 3 vsldoi12 <2,2,2,2>, <0,4,1,5> + 3826622811U, // <2,0,4,2>: Cost 4 vsldoi12 <0,2,1,2>, <0,4,2,5> + 3843211620U, // <2,0,4,3>: Cost 4 vsldoi12 <3,0,1,2>, <0,4,3,5> + 3838640493U, // <2,0,4,4>: Cost 4 vsldoi12 <2,2,2,2>, <0,4,4,5> + 2732944694U, // <2,0,4,5>: Cost 3 vsldoi8 , RHS + 3797396857U, // <2,0,4,6>: Cost 4 vsldoi8 <6,5,2,0>, <4,6,5,2> + 3867099528U, // <2,0,4,7>: Cost 4 vsldoi12 <7,0,1,2>, <0,4,7,5> + 2764898705U, // <2,0,4,u>: Cost 3 vsldoi12 <2,2,2,2>, <0,4,u,5> + 3364257792U, // <2,0,5,0>: Cost 4 vmrglw <1,4,2,5>, <0,0,0,0> + 2230124646U, // <2,0,5,1>: Cost 3 vmrghw <2,5,3,6>, LHS + 3304235184U, // <2,0,5,2>: Cost 4 vmrghw <2,5,u,6>, <0,2,1,5> + 3364260144U, // <2,0,5,3>: Cost 4 vmrglw <1,4,2,5>, <3,2,0,3> + 3303817554U, // <2,0,5,4>: Cost 4 vmrghw <2,5,3,0>, <0,4,1,5> + 3364260146U, // <2,0,5,5>: Cost 4 vmrglw <1,4,2,5>, <3,2,0,5> + 3867099602U, // <2,0,5,6>: Cost 4 vsldoi12 <7,0,1,2>, <0,5,6,7> + 3364260472U, // <2,0,5,7>: Cost 4 vmrglw <1,4,2,5>, <3,6,0,7> + 2230125213U, // <2,0,5,u>: Cost 3 vmrghw <2,5,3,6>, LHS + 2230796288U, // <2,0,6,0>: Cost 3 vmrghw <2,6,3,7>, <0,0,0,0> + 1157054566U, // <2,0,6,1>: Cost 2 vmrghw <2,6,3,7>, LHS + 2230796465U, // <2,0,6,2>: Cost 3 vmrghw <2,6,3,7>, <0,2,1,6> + 3304538364U, // <2,0,6,3>: Cost 4 vmrghw <2,6,3,7>, <0,3,1,0> + 2230796626U, // <2,0,6,4>: Cost 3 vmrghw <2,6,3,7>, <0,4,1,5> + 3797398205U, // <2,0,6,5>: Cost 4 vsldoi8 <6,5,2,0>, <6,5,2,0> + 3304538614U, // <2,0,6,6>: Cost 4 vmrghw <2,6,3,7>, <0,6,1,7> + 3798725471U, // <2,0,6,7>: Cost 4 vsldoi8 <6,7,2,0>, <6,7,2,0> + 1157055133U, // <2,0,6,u>: Cost 2 vmrghw <2,6,3,7>, LHS + 3371573248U, // <2,0,7,0>: Cost 4 vmrglw <2,6,2,7>, <0,0,0,0> + 2231189606U, // <2,0,7,1>: Cost 3 vmrghw <2,7,0,1>, LHS + 3801380003U, // <2,0,7,2>: Cost 4 vsldoi8 <7,2,2,0>, <7,2,2,0> + 3802043636U, // <2,0,7,3>: Cost 4 vsldoi8 <7,3,2,0>, <7,3,2,0> + 3806688614U, // <2,0,7,4>: Cost 4 vsldoi8 , <7,4,5,6> + 3356317308U, // <2,0,7,5>: Cost 4 vmrglw <0,1,2,7>, <7,u,0,5> + 3804034535U, // <2,0,7,6>: Cost 4 vsldoi8 <7,6,2,0>, <7,6,2,0> + 3806688876U, // <2,0,7,7>: Cost 4 vsldoi8 , <7,7,7,7> + 2231190173U, // <2,0,7,u>: Cost 3 vmrghw <2,7,0,1>, LHS + 1208836096U, // <2,0,u,0>: Cost 2 vmrglw LHS, <0,0,0,0> + 1208837798U, // <2,0,u,1>: Cost 2 vmrglw LHS, <2,3,0,1> + 1691157149U, // <2,0,u,2>: Cost 2 vsldoi12 <2,2,2,2>, LHS + 2636556597U, // <2,0,u,3>: Cost 3 vsldoi4 <3,2,0,u>, <3,2,0,u> + 2282579625U, // <2,0,u,4>: Cost 3 vmrglw LHS, <2,3,0,4> + 2660446306U, // <2,0,u,5>: Cost 3 vsldoi4 <7,2,0,u>, <5,6,7,0> + 2228535798U, // <2,0,u,6>: Cost 3 vmrghw <2,3,0,1>, <0,6,1,7> + 2660447385U, // <2,0,u,7>: Cost 3 vsldoi4 <7,2,0,u>, <7,2,0,u> + 1208837805U, // <2,0,u,u>: Cost 2 vmrglw LHS, <2,3,0,u> + 3692388523U, // <2,1,0,0>: Cost 4 vsldoi4 <0,2,1,0>, <0,2,1,0> + 2757526244U, // <2,1,0,1>: Cost 3 vsldoi12 <1,0,1,2>, <1,0,1,2> + 2330290974U, // <2,1,0,2>: Cost 3 vmrglw , <3,u,1,2> + 3843212020U, // <2,1,0,3>: Cost 4 vsldoi12 <3,0,1,2>, <1,0,3,0> + 3692391734U, // <2,1,0,4>: Cost 4 vsldoi4 <0,2,1,0>, RHS + 3300533362U, // <2,1,0,5>: Cost 4 vmrghw <2,0,3,4>, <1,5,0,4> + 3794084337U, // <2,1,0,6>: Cost 4 vsldoi8 <6,0,2,1>, <0,6,1,2> + 3374170614U, // <2,1,0,7>: Cost 5 vmrglw <3,1,2,0>, <0,6,1,7> + 2758042403U, // <2,1,0,u>: Cost 3 vsldoi12 <1,0,u,2>, <1,0,u,2> + 2690482924U, // <2,1,1,0>: Cost 3 vsldoi8 <1,0,2,1>, <1,0,2,1> + 2764899124U, // <2,1,1,1>: Cost 3 vsldoi12 <2,2,2,2>, <1,1,1,1> + 2695791510U, // <2,1,1,2>: Cost 3 vsldoi8 <1,u,2,1>, <1,2,3,0> + 3362235271U, // <2,1,1,3>: Cost 4 vmrglw <1,1,2,1>, <1,2,1,3> + 3692399926U, // <2,1,1,4>: Cost 4 vsldoi4 <0,2,1,1>, RHS + 3832226649U, // <2,1,1,5>: Cost 4 vsldoi12 <1,1,5,2>, <1,1,5,2> + 3301205235U, // <2,1,1,6>: Cost 4 vmrghw <2,1,3,5>, <1,6,5,7> + 3768870179U, // <2,1,1,7>: Cost 4 vsldoi8 <1,7,2,1>, <1,7,2,1> + 2695791988U, // <2,1,1,u>: Cost 3 vsldoi8 <1,u,2,1>, <1,u,2,1> + 2618663085U, // <2,1,2,0>: Cost 3 vsldoi4 <0,2,1,2>, <0,2,1,2> + 2228028212U, // <2,1,2,1>: Cost 3 vmrghw <2,2,2,2>, <1,1,1,1> + 2618664552U, // <2,1,2,2>: Cost 3 vsldoi4 <0,2,1,2>, <2,2,2,2> + 2759000984U, // <2,1,2,3>: Cost 3 vsldoi12 <1,2,3,2>, <1,2,3,2> + 2618666294U, // <2,1,2,4>: Cost 3 vsldoi4 <0,2,1,2>, RHS + 2295136594U, // <2,1,2,5>: Cost 3 vmrglw <2,2,2,2>, <0,4,1,5> + 3769534376U, // <2,1,2,6>: Cost 4 vsldoi8 <1,u,2,1>, <2,6,1,7> + 2793358266U, // <2,1,2,7>: Cost 3 vsldoi12 <7,0,1,2>, <1,2,7,0> + 2618668846U, // <2,1,2,u>: Cost 3 vsldoi4 <0,2,1,2>, LHS + 2282536969U, // <2,1,3,0>: Cost 3 vmrglw LHS, <0,0,1,0> + 1208795146U, // <2,1,3,1>: Cost 2 vmrglw LHS, <0,0,1,1> + 1213442198U, // <2,1,3,2>: Cost 2 vmrglw LHS, <3,0,1,2> + 2287181998U, // <2,1,3,3>: Cost 3 vmrglw LHS, <0,2,1,3> + 2618674486U, // <2,1,3,4>: Cost 3 vsldoi4 <0,2,1,3>, RHS + 1208795474U, // <2,1,3,5>: Cost 2 vmrglw LHS, <0,4,1,5> + 2287182001U, // <2,1,3,6>: Cost 3 vmrglw LHS, <0,2,1,6> + 2287183055U, // <2,1,3,7>: Cost 3 vmrglw LHS, <1,6,1,7> + 1208795153U, // <2,1,3,u>: Cost 2 vmrglw LHS, <0,0,1,u> + 3692421295U, // <2,1,4,0>: Cost 4 vsldoi4 <0,2,1,4>, <0,2,1,4> + 3838641195U, // <2,1,4,1>: Cost 4 vsldoi12 <2,2,2,2>, <1,4,1,5> + 2330323742U, // <2,1,4,2>: Cost 3 vmrglw , <3,u,1,2> + 3692423318U, // <2,1,4,3>: Cost 5 vsldoi4 <0,2,1,4>, <3,0,1,2> + 3692424502U, // <2,1,4,4>: Cost 4 vsldoi4 <0,2,1,4>, RHS + 2695793974U, // <2,1,4,5>: Cost 3 vsldoi8 <1,u,2,1>, RHS + 3799395705U, // <2,1,4,6>: Cost 4 vsldoi8 <6,u,2,1>, <4,6,5,2> + 3368895695U, // <2,1,4,7>: Cost 5 vmrglw <2,2,2,4>, <1,6,1,7> + 2695794217U, // <2,1,4,u>: Cost 3 vsldoi8 <1,u,2,1>, RHS + 3692429488U, // <2,1,5,0>: Cost 4 vsldoi4 <0,2,1,5>, <0,2,1,5> + 3364257802U, // <2,1,5,1>: Cost 4 vmrglw <1,4,2,5>, <0,0,1,1> + 3692431253U, // <2,1,5,2>: Cost 4 vsldoi4 <0,2,1,5>, <2,5,u,6> + 3692431874U, // <2,1,5,3>: Cost 4 vsldoi4 <0,2,1,5>, <3,4,5,6> + 3692432694U, // <2,1,5,4>: Cost 4 vsldoi4 <0,2,1,5>, RHS + 3364258130U, // <2,1,5,5>: Cost 4 vmrglw <1,4,2,5>, <0,4,1,5> + 3303875827U, // <2,1,5,6>: Cost 4 vmrghw <2,5,3,7>, <1,6,5,7> + 3867100333U, // <2,1,5,7>: Cost 4 vsldoi12 <7,0,1,2>, <1,5,7,0> + 3692435246U, // <2,1,5,u>: Cost 4 vsldoi4 <0,2,1,5>, LHS + 2618695857U, // <2,1,6,0>: Cost 3 vsldoi4 <0,2,1,6>, <0,2,1,6> + 2230797108U, // <2,1,6,1>: Cost 3 vmrghw <2,6,3,7>, <1,1,1,1> + 2618697658U, // <2,1,6,2>: Cost 3 vsldoi4 <0,2,1,6>, <2,6,3,7> + 3692439702U, // <2,1,6,3>: Cost 4 vsldoi4 <0,2,1,6>, <3,0,1,2> + 2618699062U, // <2,1,6,4>: Cost 3 vsldoi4 <0,2,1,6>, RHS + 3364929874U, // <2,1,6,5>: Cost 4 vmrglw <1,5,2,6>, <0,4,1,5> + 3692442424U, // <2,1,6,6>: Cost 4 vsldoi4 <0,2,1,6>, <6,6,6,6> + 3798733664U, // <2,1,6,7>: Cost 4 vsldoi8 <6,7,2,1>, <6,7,2,1> + 2618701614U, // <2,1,6,u>: Cost 3 vsldoi4 <0,2,1,6>, LHS + 3799397370U, // <2,1,7,0>: Cost 4 vsldoi8 <6,u,2,1>, <7,0,1,2> + 3371573258U, // <2,1,7,1>: Cost 4 vmrglw <2,6,2,7>, <0,0,1,1> + 2330351234U, // <2,1,7,2>: Cost 3 vmrglw , <7,u,1,2> + 3799397658U, // <2,1,7,3>: Cost 4 vsldoi8 <6,u,2,1>, <7,3,6,2> + 3799397734U, // <2,1,7,4>: Cost 4 vsldoi8 <6,u,2,1>, <7,4,5,6> + 3371573586U, // <2,1,7,5>: Cost 4 vmrglw <2,6,2,7>, <0,4,1,5> + 3799397870U, // <2,1,7,6>: Cost 4 vsldoi8 <6,u,2,1>, <7,6,2,7> + 3799397956U, // <2,1,7,7>: Cost 4 vsldoi8 <6,u,2,1>, <7,7,3,3> + 2330351234U, // <2,1,7,u>: Cost 3 vmrglw , <7,u,1,2> + 2282577929U, // <2,1,u,0>: Cost 3 vmrglw LHS, <0,0,1,0> + 1208836106U, // <2,1,u,1>: Cost 2 vmrglw LHS, <0,0,1,1> + 1208838294U, // <2,1,u,2>: Cost 2 vmrglw LHS, <3,0,1,2> + 2282578094U, // <2,1,u,3>: Cost 3 vmrglw LHS, <0,2,1,3> + 2282577933U, // <2,1,u,4>: Cost 3 vmrglw LHS, <0,0,1,4> + 1208836434U, // <2,1,u,5>: Cost 2 vmrglw LHS, <0,4,1,5> + 2282578097U, // <2,1,u,6>: Cost 3 vmrglw LHS, <0,2,1,6> + 2287224015U, // <2,1,u,7>: Cost 3 vmrglw LHS, <1,6,1,7> + 1208836113U, // <2,1,u,u>: Cost 2 vmrglw LHS, <0,0,1,u> + 2226759117U, // <2,2,0,0>: Cost 3 vmrghw <2,0,3,0>, <2,0,3,0> + 1624047718U, // <2,2,0,1>: Cost 2 vsldoi8 <2,2,2,2>, LHS + 2697789613U, // <2,2,0,2>: Cost 3 vsldoi8 <2,2,2,2>, <0,2,1,2> + 2226767526U, // <2,2,0,3>: Cost 3 vmrghw <2,0,3,1>, <2,3,0,1> + 2697789778U, // <2,2,0,4>: Cost 3 vsldoi8 <2,2,2,2>, <0,4,1,5> + 3300657000U, // <2,2,0,5>: Cost 4 vmrghw <2,0,5,1>, <2,5,3,6> + 2226988986U, // <2,2,0,6>: Cost 3 vmrghw <2,0,6,1>, <2,6,3,7> + 3734271139U, // <2,2,0,7>: Cost 4 vsldoi4 <7,2,2,0>, <7,2,2,0> + 1624048285U, // <2,2,0,u>: Cost 2 vsldoi8 <2,2,2,2>, LHS + 3831268868U, // <2,2,1,0>: Cost 4 vsldoi12 <1,0,1,2>, <2,1,0,1> + 2293138804U, // <2,2,1,1>: Cost 3 vmrglw <1,u,2,1>, <1,u,2,1> + 2697790358U, // <2,2,1,2>: Cost 3 vsldoi8 <2,2,2,2>, <1,2,3,0> + 2293137510U, // <2,2,1,3>: Cost 3 vmrglw <1,u,2,1>, LHS + 3771532331U, // <2,2,1,4>: Cost 4 vsldoi8 <2,2,2,2>, <1,4,1,5> + 3767551106U, // <2,2,1,5>: Cost 4 vsldoi8 <1,5,2,2>, <1,5,2,2> + 3301173178U, // <2,2,1,6>: Cost 4 vmrghw <2,1,3,1>, <2,6,3,7> + 3372853169U, // <2,2,1,7>: Cost 4 vmrglw <2,u,2,1>, <2,6,2,7> + 2293137515U, // <2,2,1,u>: Cost 3 vmrglw <1,u,2,1>, LHS + 1556938854U, // <2,2,2,0>: Cost 2 vsldoi4 <2,2,2,2>, LHS + 2295137733U, // <2,2,2,1>: Cost 3 vmrglw <2,2,2,2>, <2,0,2,1> + 336380006U, // <2,2,2,2>: Cost 1 vspltisw2 LHS + 1221394534U, // <2,2,2,3>: Cost 2 vmrglw <2,2,2,2>, LHS + 1556942134U, // <2,2,2,4>: Cost 2 vsldoi4 <2,2,2,2>, RHS + 2295138061U, // <2,2,2,5>: Cost 3 vmrglw <2,2,2,2>, <2,4,2,5> + 2228029370U, // <2,2,2,6>: Cost 3 vmrghw <2,2,2,2>, <2,6,3,7> + 2660545701U, // <2,2,2,7>: Cost 3 vsldoi4 <7,2,2,2>, <7,2,2,2> + 336380006U, // <2,2,2,u>: Cost 1 vspltisw2 LHS + 2697791638U, // <2,2,3,0>: Cost 3 vsldoi8 <2,2,2,2>, <3,0,1,2> + 2765489840U, // <2,2,3,1>: Cost 3 vsldoi12 <2,3,1,2>, <2,3,1,2> + 1213441640U, // <2,2,3,2>: Cost 2 vmrglw LHS, <2,2,2,2> + 135053414U, // <2,2,3,3>: Cost 1 vmrglw LHS, LHS + 2697792002U, // <2,2,3,4>: Cost 3 vsldoi8 <2,2,2,2>, <3,4,5,6> + 2330313780U, // <2,2,3,5>: Cost 3 vmrglw LHS, <1,4,2,5> + 2287183549U, // <2,2,3,6>: Cost 3 vmrglw LHS, <2,3,2,6> + 2660553894U, // <2,2,3,7>: Cost 3 vsldoi4 <7,2,2,3>, <7,2,2,3> + 135053419U, // <2,2,3,u>: Cost 1 vmrglw LHS, LHS + 2630697062U, // <2,2,4,0>: Cost 3 vsldoi4 <2,2,2,4>, LHS + 3771534282U, // <2,2,4,1>: Cost 4 vsldoi8 <2,2,2,2>, <4,1,2,3> + 2764900109U, // <2,2,4,2>: Cost 3 vsldoi12 <2,2,2,2>, <2,4,2,5> + 2295152742U, // <2,2,4,3>: Cost 3 vmrglw <2,2,2,4>, LHS + 2295154282U, // <2,2,4,4>: Cost 3 vmrglw <2,2,2,4>, <2,2,2,4> + 1624050998U, // <2,2,4,5>: Cost 2 vsldoi8 <2,2,2,2>, RHS + 2229675962U, // <2,2,4,6>: Cost 3 vmrghw <2,4,6,5>, <2,6,3,7> + 3368896433U, // <2,2,4,7>: Cost 4 vmrglw <2,2,2,4>, <2,6,2,7> + 1624051241U, // <2,2,4,u>: Cost 2 vsldoi8 <2,2,2,2>, RHS + 3771534920U, // <2,2,5,0>: Cost 4 vsldoi8 <2,2,2,2>, <5,0,1,2> + 3364258540U, // <2,2,5,1>: Cost 4 vmrglw <1,4,2,5>, <1,0,2,1> + 2296489576U, // <2,2,5,2>: Cost 3 vmrglw <2,4,2,5>, <2,2,2,2> + 2290516070U, // <2,2,5,3>: Cost 3 vmrglw <1,4,2,5>, LHS + 3771535284U, // <2,2,5,4>: Cost 4 vsldoi8 <2,2,2,2>, <5,4,5,6> + 2290517044U, // <2,2,5,5>: Cost 3 vmrglw <1,4,2,5>, <1,4,2,5> + 2697793634U, // <2,2,5,6>: Cost 3 vsldoi8 <2,2,2,2>, <5,6,7,0> + 3370231729U, // <2,2,5,7>: Cost 4 vmrglw <2,4,2,5>, <2,6,2,7> + 2290516075U, // <2,2,5,u>: Cost 3 vmrglw <1,4,2,5>, LHS + 2230797801U, // <2,2,6,0>: Cost 3 vmrghw <2,6,3,7>, <2,0,6,1> + 3304539679U, // <2,2,6,1>: Cost 4 vmrghw <2,6,3,7>, <2,1,3,1> + 2764900273U, // <2,2,6,2>: Cost 3 vsldoi12 <2,2,2,2>, <2,6,2,7> + 2764900282U, // <2,2,6,3>: Cost 3 vsldoi12 <2,2,2,2>, <2,6,3,7> + 2230798129U, // <2,2,6,4>: Cost 3 vmrghw <2,6,3,7>, <2,4,6,5> + 3304540008U, // <2,2,6,5>: Cost 4 vmrghw <2,6,3,7>, <2,5,3,6> + 1157056442U, // <2,2,6,6>: Cost 2 vmrghw <2,6,3,7>, <2,6,3,7> + 2725000033U, // <2,2,6,7>: Cost 3 vsldoi8 <6,7,2,2>, <6,7,2,2> + 1157056442U, // <2,2,6,u>: Cost 2 vmrghw <2,6,3,7>, <2,6,3,7> + 2793359338U, // <2,2,7,0>: Cost 3 vsldoi12 <7,0,1,2>, <2,7,0,1> + 3371574725U, // <2,2,7,1>: Cost 4 vmrglw <2,6,2,7>, <2,0,2,1> + 2297833064U, // <2,2,7,2>: Cost 3 vmrglw <2,6,2,7>, <2,2,2,2> + 2297831526U, // <2,2,7,3>: Cost 3 vmrglw <2,6,2,7>, LHS + 2697794918U, // <2,2,7,4>: Cost 3 vsldoi8 <2,2,2,2>, <7,4,5,6> + 3371575053U, // <2,2,7,5>: Cost 4 vmrglw <2,6,2,7>, <2,4,2,5> + 3304933297U, // <2,2,7,6>: Cost 4 vmrghw <2,7,0,1>, <2,6,2,7> + 2297833393U, // <2,2,7,7>: Cost 3 vmrglw <2,6,2,7>, <2,6,2,7> + 2297831531U, // <2,2,7,u>: Cost 3 vmrglw <2,6,2,7>, LHS + 1556938854U, // <2,2,u,0>: Cost 2 vsldoi4 <2,2,2,2>, LHS + 1624053550U, // <2,2,u,1>: Cost 2 vsldoi8 <2,2,2,2>, LHS + 336380006U, // <2,2,u,2>: Cost 1 vspltisw2 LHS + 135094374U, // <2,2,u,3>: Cost 1 vmrglw LHS, LHS + 1556942134U, // <2,2,u,4>: Cost 2 vsldoi4 <2,2,2,2>, RHS + 1624053914U, // <2,2,u,5>: Cost 2 vsldoi8 <2,2,2,2>, RHS + 1157056442U, // <2,2,u,6>: Cost 2 vmrghw <2,6,3,7>, <2,6,3,7> + 2660594859U, // <2,2,u,7>: Cost 3 vsldoi4 <7,2,2,u>, <7,2,2,u> + 135094379U, // <2,2,u,u>: Cost 1 vmrglw LHS, LHS + 1611448320U, // <2,3,0,0>: Cost 2 vsldoi8 LHS, <0,0,0,0> + 537706598U, // <2,3,0,1>: Cost 1 vsldoi8 LHS, LHS + 2689835181U, // <2,3,0,2>: Cost 3 vsldoi8 LHS, <0,2,1,2> + 2689835260U, // <2,3,0,3>: Cost 3 vsldoi8 LHS, <0,3,1,0> + 1611448658U, // <2,3,0,4>: Cost 2 vsldoi8 LHS, <0,4,1,5> + 2732966354U, // <2,3,0,5>: Cost 3 vsldoi8 LHS, <0,5,6,7> + 2732966390U, // <2,3,0,6>: Cost 3 vsldoi8 LHS, <0,6,1,7> + 2660603052U, // <2,3,0,7>: Cost 3 vsldoi4 <7,2,3,0>, <7,2,3,0> + 537707165U, // <2,3,0,u>: Cost 1 vsldoi8 LHS, LHS + 2689835748U, // <2,3,1,0>: Cost 3 vsldoi8 LHS, <1,0,1,2> + 1611449140U, // <2,3,1,1>: Cost 2 vsldoi8 LHS, <1,1,1,1> + 1611449238U, // <2,3,1,2>: Cost 2 vsldoi8 LHS, <1,2,3,0> + 3763577805U, // <2,3,1,3>: Cost 4 vsldoi8 LHS, <1,3,0,1> + 2689836112U, // <2,3,1,4>: Cost 3 vsldoi8 LHS, <1,4,5,6> + 2689836143U, // <2,3,1,5>: Cost 3 vsldoi8 LHS, <1,5,0,1> + 2689836239U, // <2,3,1,6>: Cost 3 vsldoi8 LHS, <1,6,1,7> + 3366881210U, // <2,3,1,7>: Cost 4 vmrglw <1,u,2,1>, <2,6,3,7> + 1616094588U, // <2,3,1,u>: Cost 2 vsldoi8 LHS, <1,u,3,0> + 2689836493U, // <2,3,2,0>: Cost 3 vsldoi8 LHS, <2,0,3,0> + 2685191711U, // <2,3,2,1>: Cost 3 vsldoi8 LHS, <2,1,3,1> + 1611449960U, // <2,3,2,2>: Cost 2 vsldoi8 LHS, <2,2,2,2> + 1611450022U, // <2,3,2,3>: Cost 2 vsldoi8 LHS, <2,3,0,1> + 2689836822U, // <2,3,2,4>: Cost 3 vsldoi8 LHS, <2,4,3,5> + 2689836904U, // <2,3,2,5>: Cost 3 vsldoi8 LHS, <2,5,3,6> + 1611450298U, // <2,3,2,6>: Cost 2 vsldoi8 LHS, <2,6,3,7> + 2295138234U, // <2,3,2,7>: Cost 3 vmrglw <2,2,2,2>, <2,6,3,7> + 1611450456U, // <2,3,2,u>: Cost 2 vsldoi8 LHS, <2,u,3,3> + 1213440918U, // <2,3,3,0>: Cost 2 vmrglw LHS, <1,2,3,0> + 2282538527U, // <2,3,3,1>: Cost 3 vmrglw LHS, <2,1,3,1> + 1557022322U, // <2,3,3,2>: Cost 2 vsldoi4 <2,2,3,3>, <2,2,3,3> + 1208796786U, // <2,3,3,3>: Cost 2 vmrglw LHS, <2,2,3,3> + 1213440922U, // <2,3,3,4>: Cost 2 vmrglw LHS, <1,2,3,4> + 2282538531U, // <2,3,3,5>: Cost 3 vmrglw LHS, <2,1,3,5> + 2287188094U, // <2,3,3,6>: Cost 3 vmrglw LHS, + 1213441978U, // <2,3,3,7>: Cost 2 vmrglw LHS, <2,6,3,7> + 1208796791U, // <2,3,3,u>: Cost 2 vmrglw LHS, <2,2,3,u> + 1551056998U, // <2,3,4,0>: Cost 2 vsldoi4 <1,2,3,4>, LHS + 1551057818U, // <2,3,4,1>: Cost 2 vsldoi4 <1,2,3,4>, <1,2,3,4> + 2624800360U, // <2,3,4,2>: Cost 3 vsldoi4 <1,2,3,4>, <2,2,2,2> + 2624800918U, // <2,3,4,3>: Cost 3 vsldoi4 <1,2,3,4>, <3,0,1,2> + 1551060278U, // <2,3,4,4>: Cost 2 vsldoi4 <1,2,3,4>, RHS + 537709878U, // <2,3,4,5>: Cost 1 vsldoi8 LHS, RHS + 2732969337U, // <2,3,4,6>: Cost 3 vsldoi8 LHS, <4,6,5,2> + 2660635824U, // <2,3,4,7>: Cost 3 vsldoi4 <7,2,3,4>, <7,2,3,4> + 537710121U, // <2,3,4,u>: Cost 1 vsldoi8 LHS, RHS + 2689838664U, // <2,3,5,0>: Cost 3 vsldoi8 LHS, <5,0,1,2> + 2732969615U, // <2,3,5,1>: Cost 3 vsldoi8 LHS, <5,1,0,1> + 2732969707U, // <2,3,5,2>: Cost 3 vsldoi8 LHS, <5,2,1,3> + 3763580721U, // <2,3,5,3>: Cost 4 vsldoi8 LHS, <5,3,0,1> + 2689839028U, // <2,3,5,4>: Cost 3 vsldoi8 LHS, <5,4,5,6> + 1659228164U, // <2,3,5,5>: Cost 2 vsldoi8 LHS, <5,5,5,5> + 1659228258U, // <2,3,5,6>: Cost 2 vsldoi8 LHS, <5,6,7,0> + 3364259770U, // <2,3,5,7>: Cost 4 vmrglw <1,4,2,5>, <2,6,3,7> + 1659228420U, // <2,3,5,u>: Cost 2 vsldoi8 LHS, <5,u,7,0> + 2230798486U, // <2,3,6,0>: Cost 3 vmrghw <2,6,3,7>, <3,0,1,2> + 2732970407U, // <2,3,6,1>: Cost 3 vsldoi8 LHS, <6,1,7,1> + 1659228666U, // <2,3,6,2>: Cost 2 vsldoi8 LHS, <6,2,7,3> + 2230798748U, // <2,3,6,3>: Cost 3 vmrghw <2,6,3,7>, <3,3,3,3> + 2230798850U, // <2,3,6,4>: Cost 3 vmrghw <2,6,3,7>, <3,4,5,6> + 2732970731U, // <2,3,6,5>: Cost 3 vsldoi8 LHS, <6,5,7,1> + 1659228984U, // <2,3,6,6>: Cost 2 vsldoi8 LHS, <6,6,6,6> + 1659229006U, // <2,3,6,7>: Cost 2 vsldoi8 LHS, <6,7,0,1> + 1659229087U, // <2,3,6,u>: Cost 2 vsldoi8 LHS, <6,u,0,1> + 1659229178U, // <2,3,7,0>: Cost 2 vsldoi8 LHS, <7,0,1,2> + 2726999125U, // <2,3,7,1>: Cost 3 vsldoi8 <7,1,2,3>, <7,1,2,3> + 2727662758U, // <2,3,7,2>: Cost 3 vsldoi8 <7,2,2,3>, <7,2,2,3> + 2732971235U, // <2,3,7,3>: Cost 3 vsldoi8 LHS, <7,3,0,1> + 1659229542U, // <2,3,7,4>: Cost 2 vsldoi8 LHS, <7,4,5,6> + 2732971446U, // <2,3,7,5>: Cost 3 vsldoi8 LHS, <7,5,5,5> + 2732971484U, // <2,3,7,6>: Cost 3 vsldoi8 LHS, <7,6,0,7> + 1659229804U, // <2,3,7,7>: Cost 2 vsldoi8 LHS, <7,7,7,7> + 1659229826U, // <2,3,7,u>: Cost 2 vsldoi8 LHS, <7,u,1,2> + 1208837014U, // <2,3,u,0>: Cost 2 vmrglw LHS, <1,2,3,0> + 537712430U, // <2,3,u,1>: Cost 1 vsldoi8 LHS, LHS + 1616099205U, // <2,3,u,2>: Cost 2 vsldoi8 LHS, + 1208837746U, // <2,3,u,3>: Cost 2 vmrglw LHS, <2,2,3,3> + 1208837018U, // <2,3,u,4>: Cost 2 vmrglw LHS, <1,2,3,4> + 537712794U, // <2,3,u,5>: Cost 1 vsldoi8 LHS, RHS + 1616099536U, // <2,3,u,6>: Cost 2 vsldoi8 LHS, + 1208838074U, // <2,3,u,7>: Cost 2 vmrglw LHS, <2,6,3,7> + 537712997U, // <2,3,u,u>: Cost 1 vsldoi8 LHS, LHS + 3771547648U, // <2,4,0,0>: Cost 4 vsldoi8 <2,2,2,4>, <0,0,0,0> + 2697805926U, // <2,4,0,1>: Cost 3 vsldoi8 <2,2,2,4>, LHS + 3770884269U, // <2,4,0,2>: Cost 4 vsldoi8 <2,1,2,4>, <0,2,1,2> + 3806716164U, // <2,4,0,3>: Cost 4 vsldoi8 , <0,3,1,u> + 3771547986U, // <2,4,0,4>: Cost 4 vsldoi8 <2,2,2,4>, <0,4,1,5> + 2226761014U, // <2,4,0,5>: Cost 3 vmrghw <2,0,3,0>, RHS + 3853462427U, // <2,4,0,6>: Cost 4 vsldoi12 <4,6,5,2>, <4,0,6,1> + 3867102116U, // <2,4,0,7>: Cost 4 vsldoi12 <7,0,1,2>, <4,0,7,1> + 2226761257U, // <2,4,0,u>: Cost 3 vmrghw <2,0,3,0>, RHS + 3849186231U, // <2,4,1,0>: Cost 4 vsldoi12 <4,0,1,2>, <4,1,0,2> + 3301207010U, // <2,4,1,1>: Cost 4 vmrghw <2,1,3,5>, <4,1,5,0> + 3766240150U, // <2,4,1,2>: Cost 4 vsldoi8 <1,3,2,4>, <1,2,3,0> + 3766240226U, // <2,4,1,3>: Cost 4 vsldoi8 <1,3,2,4>, <1,3,2,4> + 3301207248U, // <2,4,1,4>: Cost 4 vmrghw <2,1,3,5>, <4,4,4,4> + 2227432758U, // <2,4,1,5>: Cost 3 vmrghw <2,1,3,1>, RHS + 3758941400U, // <2,4,1,6>: Cost 4 vsldoi8 <0,1,2,4>, <1,6,2,7> + 3768894758U, // <2,4,1,7>: Cost 4 vsldoi8 <1,7,2,4>, <1,7,2,4> + 2227433001U, // <2,4,1,u>: Cost 3 vmrghw <2,1,3,1>, RHS + 2228030354U, // <2,4,2,0>: Cost 3 vmrghw <2,2,2,2>, <4,0,5,1> + 3770885657U, // <2,4,2,1>: Cost 4 vsldoi8 <2,1,2,4>, <2,1,2,4> + 2697807466U, // <2,4,2,2>: Cost 3 vsldoi8 <2,2,2,4>, <2,2,2,4> + 3368880468U, // <2,4,2,3>: Cost 4 vmrglw <2,2,2,2>, <3,2,4,3> + 2228030672U, // <2,4,2,4>: Cost 3 vmrghw <2,2,2,2>, <4,4,4,4> + 1154288950U, // <2,4,2,5>: Cost 2 vmrghw <2,2,2,2>, RHS + 3771549617U, // <2,4,2,6>: Cost 4 vsldoi8 <2,2,2,4>, <2,6,2,7> + 3368880796U, // <2,4,2,7>: Cost 4 vmrglw <2,2,2,2>, <3,6,4,7> + 1154289193U, // <2,4,2,u>: Cost 2 vmrghw <2,2,2,2>, RHS + 2636808294U, // <2,4,3,0>: Cost 3 vsldoi4 <3,2,4,3>, LHS + 2287181861U, // <2,4,3,1>: Cost 3 vmrglw LHS, <0,0,4,1> + 2228866102U, // <2,4,3,2>: Cost 3 vmrghw <2,3,4,5>, <4,2,5,3> + 2636810580U, // <2,4,3,3>: Cost 3 vsldoi4 <3,2,4,3>, <3,2,4,3> + 1256574160U, // <2,4,3,4>: Cost 2 vmrglw LHS, <4,4,4,4> + 1213441742U, // <2,4,3,5>: Cost 2 vmrglw LHS, <2,3,4,5> + 2228866430U, // <2,4,3,6>: Cost 3 vmrghw <2,3,4,5>, <4,6,5,7> + 2660701368U, // <2,4,3,7>: Cost 3 vsldoi4 <7,2,4,3>, <7,2,4,3> + 1213441745U, // <2,4,3,u>: Cost 2 vmrglw LHS, <2,3,4,u> + 3704586342U, // <2,4,4,0>: Cost 4 vsldoi4 <2,2,4,4>, LHS + 3782831051U, // <2,4,4,1>: Cost 4 vsldoi8 <4,1,2,4>, <4,1,2,4> + 3704587900U, // <2,4,4,2>: Cost 4 vsldoi4 <2,2,4,4>, <2,2,4,4> + 3368896123U, // <2,4,4,3>: Cost 4 vmrglw <2,2,2,4>, <2,2,4,3> + 2793360592U, // <2,4,4,4>: Cost 3 vsldoi12 <7,0,1,2>, <4,4,4,4> + 2697809206U, // <2,4,4,5>: Cost 3 vsldoi8 <2,2,2,4>, RHS + 3303198078U, // <2,4,4,6>: Cost 4 vmrghw <2,4,3,5>, <4,6,5,7> + 3867102444U, // <2,4,4,7>: Cost 4 vsldoi12 <7,0,1,2>, <4,4,7,5> + 2697809449U, // <2,4,4,u>: Cost 3 vsldoi8 <2,2,2,4>, RHS + 2630852710U, // <2,4,5,0>: Cost 3 vsldoi4 <2,2,4,5>, LHS + 2624881572U, // <2,4,5,1>: Cost 3 vsldoi4 <1,2,4,5>, <1,2,4,5> + 2630854269U, // <2,4,5,2>: Cost 3 vsldoi4 <2,2,4,5>, <2,2,4,5> + 2666686677U, // <2,4,5,3>: Cost 3 vsldoi4 , <3,0,u,2> + 2630855990U, // <2,4,5,4>: Cost 3 vsldoi4 <2,2,4,5>, RHS + 2230127926U, // <2,4,5,5>: Cost 3 vmrghw <2,5,3,6>, RHS + 1691159862U, // <2,4,5,6>: Cost 2 vsldoi12 <2,2,2,2>, RHS + 3867102520U, // <2,4,5,7>: Cost 4 vsldoi12 <7,0,1,2>, <4,5,7,0> + 1691159880U, // <2,4,5,u>: Cost 2 vsldoi12 <2,2,2,2>, RHS + 2230799250U, // <2,4,6,0>: Cost 3 vmrghw <2,6,3,7>, <4,0,5,1> + 3304541130U, // <2,4,6,1>: Cost 4 vmrghw <2,6,3,7>, <4,1,2,3> + 2230799417U, // <2,4,6,2>: Cost 3 vmrghw <2,6,3,7>, <4,2,5,6> + 3304541323U, // <2,4,6,3>: Cost 4 vmrghw <2,6,3,7>, <4,3,5,7> + 2230799568U, // <2,4,6,4>: Cost 3 vmrghw <2,6,3,7>, <4,4,4,4> + 1157057846U, // <2,4,6,5>: Cost 2 vmrghw <2,6,3,7>, RHS + 3304541566U, // <2,4,6,6>: Cost 4 vmrghw <2,6,3,7>, <4,6,5,7> + 3798758243U, // <2,4,6,7>: Cost 4 vsldoi8 <6,7,2,4>, <6,7,2,4> + 1157058089U, // <2,4,6,u>: Cost 2 vmrghw <2,6,3,7>, RHS + 3806721018U, // <2,4,7,0>: Cost 4 vsldoi8 , <7,0,1,2> + 3853831590U, // <2,4,7,1>: Cost 4 vsldoi12 <4,7,1,2>, <4,7,1,2> + 3801412775U, // <2,4,7,2>: Cost 4 vsldoi8 <7,2,2,4>, <7,2,2,4> + 3802076408U, // <2,4,7,3>: Cost 4 vsldoi8 <7,3,2,4>, <7,3,2,4> + 3401436368U, // <2,4,7,4>: Cost 4 vmrglw <7,6,2,7>, <4,4,4,4> + 2793360840U, // <2,4,7,5>: Cost 3 vsldoi12 <7,0,1,2>, <4,7,5,0> + 3804067307U, // <2,4,7,6>: Cost 4 vsldoi8 <7,6,2,4>, <7,6,2,4> + 3867102682U, // <2,4,7,7>: Cost 4 vsldoi12 <7,0,1,2>, <4,7,7,0> + 2793360867U, // <2,4,7,u>: Cost 3 vsldoi12 <7,0,1,2>, <4,7,u,0> + 2630877286U, // <2,4,u,0>: Cost 3 vsldoi4 <2,2,4,u>, LHS + 2282580144U, // <2,4,u,1>: Cost 3 vmrglw LHS, <3,0,4,1> + 2630878848U, // <2,4,u,2>: Cost 3 vsldoi4 <2,2,4,u>, <2,2,4,u> + 2636851545U, // <2,4,u,3>: Cost 3 vsldoi4 <3,2,4,u>, <3,2,4,u> + 1256615120U, // <2,4,u,4>: Cost 2 vmrglw LHS, <4,4,4,4> + 1208837838U, // <2,4,u,5>: Cost 2 vmrglw LHS, <2,3,4,5> + 1691160105U, // <2,4,u,6>: Cost 2 vsldoi12 <2,2,2,2>, RHS + 2660742333U, // <2,4,u,7>: Cost 3 vsldoi4 <7,2,4,u>, <7,2,4,u> + 1208837841U, // <2,4,u,u>: Cost 2 vmrglw LHS, <2,3,4,u> + 3766910976U, // <2,5,0,0>: Cost 4 vsldoi8 <1,4,2,5>, <0,0,0,0> + 2693169254U, // <2,5,0,1>: Cost 3 vsldoi8 <1,4,2,5>, LHS + 3760939181U, // <2,5,0,2>: Cost 4 vsldoi8 <0,4,2,5>, <0,2,1,2> + 3843214936U, // <2,5,0,3>: Cost 4 vsldoi12 <3,0,1,2>, <5,0,3,0> + 3760939355U, // <2,5,0,4>: Cost 4 vsldoi8 <0,4,2,5>, <0,4,2,5> + 3867102827U, // <2,5,0,5>: Cost 4 vsldoi12 <7,0,1,2>, <5,0,5,1> + 3867102836U, // <2,5,0,6>: Cost 4 vsldoi12 <7,0,1,2>, <5,0,6,1> + 3867102844U, // <2,5,0,7>: Cost 4 vsldoi12 <7,0,1,2>, <5,0,7,0> + 2693169821U, // <2,5,0,u>: Cost 3 vsldoi8 <1,4,2,5>, LHS + 3766911724U, // <2,5,1,0>: Cost 4 vsldoi8 <1,4,2,5>, <1,0,2,1> + 3766911796U, // <2,5,1,1>: Cost 4 vsldoi8 <1,4,2,5>, <1,1,1,1> + 2693170070U, // <2,5,1,2>: Cost 3 vsldoi8 <1,4,2,5>, <1,2,3,0> + 3384798262U, // <2,5,1,3>: Cost 4 vmrglw <4,u,2,1>, <4,2,5,3> + 2693170228U, // <2,5,1,4>: Cost 3 vsldoi8 <1,4,2,5>, <1,4,2,5> + 3301208068U, // <2,5,1,5>: Cost 4 vmrghw <2,1,3,5>, <5,5,5,5> + 3366879607U, // <2,5,1,6>: Cost 4 vmrglw <1,u,2,1>, <0,4,5,6> + 3867102925U, // <2,5,1,7>: Cost 4 vsldoi12 <7,0,1,2>, <5,1,7,0> + 2695824760U, // <2,5,1,u>: Cost 3 vsldoi8 <1,u,2,5>, <1,u,2,5> + 2642845798U, // <2,5,2,0>: Cost 3 vsldoi4 <4,2,5,2>, LHS + 2295139218U, // <2,5,2,1>: Cost 3 vmrglw <2,2,2,2>, <4,0,5,1> + 2699142760U, // <2,5,2,2>: Cost 3 vsldoi8 <2,4,2,5>, <2,2,2,2> + 3766912678U, // <2,5,2,3>: Cost 4 vsldoi8 <1,4,2,5>, <2,3,0,1> + 2699142925U, // <2,5,2,4>: Cost 3 vsldoi8 <2,4,2,5>, <2,4,2,5> + 2228031492U, // <2,5,2,5>: Cost 3 vmrghw <2,2,2,2>, <5,5,5,5> + 2295138818U, // <2,5,2,6>: Cost 3 vmrglw <2,2,2,2>, <3,4,5,6> + 3368879347U, // <2,5,2,7>: Cost 4 vmrglw <2,2,2,2>, <1,6,5,7> + 2295138820U, // <2,5,2,u>: Cost 3 vmrglw <2,2,2,2>, <3,4,5,u> + 2287184866U, // <2,5,3,0>: Cost 3 vmrglw LHS, <4,1,5,0> + 1256573842U, // <2,5,3,1>: Cost 2 vmrglw LHS, <4,0,5,1> + 2642855630U, // <2,5,3,2>: Cost 3 vsldoi4 <4,2,5,3>, <2,3,4,5> + 2287182763U, // <2,5,3,3>: Cost 3 vmrglw LHS, <1,2,5,3> + 2287184870U, // <2,5,3,4>: Cost 3 vmrglw LHS, <4,1,5,4> + 1256574170U, // <2,5,3,5>: Cost 2 vmrglw LHS, <4,4,5,5> + 1213442562U, // <2,5,3,6>: Cost 2 vmrglw LHS, <3,4,5,6> + 2287183091U, // <2,5,3,7>: Cost 3 vmrglw LHS, <1,6,5,7> + 1213442564U, // <2,5,3,u>: Cost 2 vmrglw LHS, <3,4,5,u> + 3716604006U, // <2,5,4,0>: Cost 4 vsldoi4 <4,2,5,4>, LHS + 3716604822U, // <2,5,4,1>: Cost 4 vsldoi4 <4,2,5,4>, <1,2,3,0> + 3766914099U, // <2,5,4,2>: Cost 4 vsldoi8 <1,4,2,5>, <4,2,5,0> + 3368895403U, // <2,5,4,3>: Cost 5 vmrglw <2,2,2,4>, <1,2,5,3> + 3716607031U, // <2,5,4,4>: Cost 4 vsldoi4 <4,2,5,4>, <4,2,5,4> + 2693172534U, // <2,5,4,5>: Cost 3 vsldoi8 <1,4,2,5>, RHS + 3363588610U, // <2,5,4,6>: Cost 4 vmrglw <1,3,2,4>, <3,4,5,6> + 3368895731U, // <2,5,4,7>: Cost 5 vmrglw <2,2,2,4>, <1,6,5,7> + 2693172777U, // <2,5,4,u>: Cost 3 vsldoi8 <1,4,2,5>, RHS + 3704668262U, // <2,5,5,0>: Cost 4 vsldoi4 <2,2,5,5>, LHS + 3704669078U, // <2,5,5,1>: Cost 4 vsldoi4 <2,2,5,5>, <1,2,3,0> + 3704669830U, // <2,5,5,2>: Cost 4 vsldoi4 <2,2,5,5>, <2,2,5,5> + 3364259460U, // <2,5,5,3>: Cost 4 vmrglw <1,4,2,5>, <2,2,5,3> + 3704671542U, // <2,5,5,4>: Cost 4 vsldoi4 <2,2,5,5>, RHS + 2793361412U, // <2,5,5,5>: Cost 3 vsldoi12 <7,0,1,2>, <5,5,5,5> + 3364258167U, // <2,5,5,6>: Cost 4 vmrglw <1,4,2,5>, <0,4,5,6> + 3867103249U, // <2,5,5,7>: Cost 4 vsldoi12 <7,0,1,2>, <5,5,7,0> + 2793361412U, // <2,5,5,u>: Cost 3 vsldoi12 <7,0,1,2>, <5,5,5,5> + 2642878566U, // <2,5,6,0>: Cost 3 vsldoi4 <4,2,5,6>, LHS + 3386166810U, // <2,5,6,1>: Cost 4 vmrglw <5,1,2,6>, <4,u,5,1> + 2723033594U, // <2,5,6,2>: Cost 3 vsldoi8 <6,4,2,5>, <6,2,7,3> + 3848523842U, // <2,5,6,3>: Cost 4 vsldoi12 <3,u,1,2>, <5,6,3,4> + 2723033713U, // <2,5,6,4>: Cost 3 vsldoi8 <6,4,2,5>, <6,4,2,5> + 2230800388U, // <2,5,6,5>: Cost 3 vmrghw <2,6,3,7>, <5,5,5,5> + 2230800482U, // <2,5,6,6>: Cost 3 vmrghw <2,6,3,7>, <5,6,7,0> + 2785841252U, // <2,5,6,7>: Cost 3 vsldoi12 <5,6,7,2>, <5,6,7,2> + 2785914989U, // <2,5,6,u>: Cost 3 vsldoi12 <5,6,u,2>, <5,6,u,2> + 3796775930U, // <2,5,7,0>: Cost 4 vsldoi8 <6,4,2,5>, <7,0,1,2> + 3800757335U, // <2,5,7,1>: Cost 4 vsldoi8 <7,1,2,5>, <7,1,2,5> + 3853463689U, // <2,5,7,2>: Cost 4 vsldoi12 <4,6,5,2>, <5,7,2,3> + 3796776218U, // <2,5,7,3>: Cost 4 vsldoi8 <6,4,2,5>, <7,3,6,2> + 3796776294U, // <2,5,7,4>: Cost 4 vsldoi8 <6,4,2,5>, <7,4,5,6> + 3803411867U, // <2,5,7,5>: Cost 4 vsldoi8 <7,5,2,5>, <7,5,2,5> + 3371575081U, // <2,5,7,6>: Cost 4 vmrglw <2,6,2,7>, <2,4,5,6> + 3796776516U, // <2,5,7,7>: Cost 4 vsldoi8 <6,4,2,5>, <7,7,3,3> + 3371575083U, // <2,5,7,u>: Cost 4 vmrglw <2,6,2,7>, <2,4,5,u> + 2287225826U, // <2,5,u,0>: Cost 3 vmrglw LHS, <4,1,5,0> + 1256614802U, // <2,5,u,1>: Cost 2 vmrglw LHS, <4,0,5,1> + 2642896590U, // <2,5,u,2>: Cost 3 vsldoi4 <4,2,5,u>, <2,3,4,5> + 2287223723U, // <2,5,u,3>: Cost 3 vmrglw LHS, <1,2,5,3> + 2287225830U, // <2,5,u,4>: Cost 3 vmrglw LHS, <4,1,5,4> + 1256615130U, // <2,5,u,5>: Cost 2 vmrglw LHS, <4,4,5,5> + 1208838658U, // <2,5,u,6>: Cost 2 vmrglw LHS, <3,4,5,6> + 2287224051U, // <2,5,u,7>: Cost 3 vmrglw LHS, <1,6,5,7> + 1208838660U, // <2,5,u,u>: Cost 2 vmrglw LHS, <3,4,5,u> + 3772227584U, // <2,6,0,0>: Cost 4 vsldoi8 <2,3,2,6>, <0,0,0,0> + 2698485862U, // <2,6,0,1>: Cost 3 vsldoi8 <2,3,2,6>, LHS + 3759620282U, // <2,6,0,2>: Cost 4 vsldoi8 <0,2,2,6>, <0,2,2,6> + 3710675299U, // <2,6,0,3>: Cost 4 vsldoi4 <3,2,6,0>, <3,2,6,0> + 3767583058U, // <2,6,0,4>: Cost 4 vsldoi8 <1,5,2,6>, <0,4,1,5> + 3378153265U, // <2,6,0,5>: Cost 5 vmrglw <3,7,2,0>, <2,4,6,5> + 3865186637U, // <2,6,0,6>: Cost 4 vsldoi12 <6,6,2,2>, <6,0,6,1> + 2330291510U, // <2,6,0,7>: Cost 3 vmrglw , RHS + 2698486429U, // <2,6,0,u>: Cost 3 vsldoi8 <2,3,2,6>, LHS + 3734569062U, // <2,6,1,0>: Cost 4 vsldoi4 <7,2,6,1>, LHS + 3764929346U, // <2,6,1,1>: Cost 4 vsldoi8 <1,1,2,6>, <1,1,2,6> + 3772228502U, // <2,6,1,2>: Cost 4 vsldoi8 <2,3,2,6>, <1,2,3,0> + 3734571158U, // <2,6,1,3>: Cost 4 vsldoi4 <7,2,6,1>, <3,0,1,2> + 3734572342U, // <2,6,1,4>: Cost 4 vsldoi4 <7,2,6,1>, RHS + 3767583878U, // <2,6,1,5>: Cost 4 vsldoi8 <1,5,2,6>, <1,5,2,6> + 3768247511U, // <2,6,1,6>: Cost 4 vsldoi8 <1,6,2,6>, <1,6,2,6> + 2293140790U, // <2,6,1,7>: Cost 3 vmrglw <1,u,2,1>, RHS + 2293140791U, // <2,6,1,u>: Cost 3 vmrglw <1,u,2,1>, RHS + 3704717414U, // <2,6,2,0>: Cost 4 vsldoi4 <2,2,6,2>, LHS + 3395424589U, // <2,6,2,1>: Cost 4 vmrglw <6,6,2,2>, <6,0,6,1> + 2228031993U, // <2,6,2,2>: Cost 3 vmrghw <2,2,2,2>, <6,2,7,2> + 2698487485U, // <2,6,2,3>: Cost 3 vsldoi8 <2,3,2,6>, <2,3,2,6> + 3704720694U, // <2,6,2,4>: Cost 4 vsldoi4 <2,2,6,2>, RHS + 3773556575U, // <2,6,2,5>: Cost 4 vsldoi8 <2,5,2,6>, <2,5,2,6> + 2698487738U, // <2,6,2,6>: Cost 3 vsldoi8 <2,3,2,6>, <2,6,3,7> + 1221397814U, // <2,6,2,7>: Cost 2 vmrglw <2,2,2,2>, RHS + 1221397815U, // <2,6,2,u>: Cost 2 vmrglw <2,2,2,2>, RHS + 2636955750U, // <2,6,3,0>: Cost 3 vsldoi4 <3,2,6,3>, LHS + 2330314217U, // <2,6,3,1>: Cost 3 vmrglw LHS, <2,0,6,1> + 2636957626U, // <2,6,3,2>: Cost 3 vsldoi4 <3,2,6,3>, <2,6,3,7> + 2287184230U, // <2,6,3,3>: Cost 3 vmrglw LHS, <3,2,6,3> + 2636959030U, // <2,6,3,4>: Cost 3 vsldoi4 <3,2,6,3>, RHS + 2648903448U, // <2,6,3,5>: Cost 3 vsldoi4 <5,2,6,3>, <5,2,6,3> + 1256575800U, // <2,6,3,6>: Cost 2 vmrglw LHS, <6,6,6,6> + 135056694U, // <2,6,3,7>: Cost 1 vmrglw LHS, RHS + 135056695U, // <2,6,3,u>: Cost 1 vmrglw LHS, RHS + 3710705766U, // <2,6,4,0>: Cost 4 vsldoi4 <3,2,6,4>, LHS + 3698762677U, // <2,6,4,1>: Cost 5 vsldoi4 <1,2,6,4>, <1,2,6,4> + 3710707389U, // <2,6,4,2>: Cost 4 vsldoi4 <3,2,6,4>, <2,3,2,6> + 3710708071U, // <2,6,4,3>: Cost 4 vsldoi4 <3,2,6,4>, <3,2,6,4> + 3710709046U, // <2,6,4,4>: Cost 4 vsldoi4 <3,2,6,4>, RHS + 2698489142U, // <2,6,4,5>: Cost 3 vsldoi8 <2,3,2,6>, RHS + 3796782457U, // <2,6,4,6>: Cost 4 vsldoi8 <6,4,2,6>, <4,6,5,2> + 2295156022U, // <2,6,4,7>: Cost 3 vmrglw <2,2,2,4>, RHS + 2295156023U, // <2,6,4,u>: Cost 3 vmrglw <2,2,2,4>, RHS + 3303870753U, // <2,6,5,0>: Cost 4 vmrghw <2,5,3,6>, <6,0,1,2> + 3788820134U, // <2,6,5,1>: Cost 4 vsldoi8 <5,1,2,6>, <5,1,2,6> + 3779530520U, // <2,6,5,2>: Cost 4 vsldoi8 <3,5,2,6>, <5,2,6,3> + 3303871026U, // <2,6,5,3>: Cost 4 vmrghw <2,5,3,6>, <6,3,4,5> + 3303871117U, // <2,6,5,4>: Cost 4 vmrghw <2,5,3,6>, <6,4,5,6> + 3791474666U, // <2,6,5,5>: Cost 4 vsldoi8 <5,5,2,6>, <5,5,2,6> + 3792138299U, // <2,6,5,6>: Cost 4 vsldoi8 <5,6,2,6>, <5,6,2,6> + 2290519350U, // <2,6,5,7>: Cost 3 vmrglw <1,4,2,5>, RHS + 2290519351U, // <2,6,5,u>: Cost 3 vmrglw <1,4,2,5>, RHS + 2631008358U, // <2,6,6,0>: Cost 3 vsldoi4 <2,2,6,6>, LHS + 3372893673U, // <2,6,6,1>: Cost 4 vmrglw <2,u,2,6>, <2,0,6,1> + 2791445264U, // <2,6,6,2>: Cost 3 vsldoi12 <6,6,2,2>, <6,6,2,2> + 2230800968U, // <2,6,6,3>: Cost 3 vmrghw <2,6,3,7>, <6,3,7,0> + 2631011638U, // <2,6,6,4>: Cost 3 vsldoi4 <2,2,6,6>, RHS + 3372894001U, // <2,6,6,5>: Cost 4 vmrglw <2,u,2,6>, <2,4,6,5> + 2793362232U, // <2,6,6,6>: Cost 3 vsldoi12 <7,0,1,2>, <6,6,6,6> + 2295835958U, // <2,6,6,7>: Cost 3 vmrglw <2,3,2,6>, RHS + 2295835959U, // <2,6,6,u>: Cost 3 vmrglw <2,3,2,6>, RHS + 2793362254U, // <2,6,7,0>: Cost 3 vsldoi12 <7,0,1,2>, <6,7,0,1> + 2792035160U, // <2,6,7,1>: Cost 3 vsldoi12 <6,7,1,2>, <6,7,1,2> + 2792108897U, // <2,6,7,2>: Cost 3 vsldoi12 <6,7,2,2>, <6,7,2,2> + 2769474408U, // <2,6,7,3>: Cost 3 vsldoi12 <3,0,1,2>, <6,7,3,0> + 2793362294U, // <2,6,7,4>: Cost 3 vsldoi12 <7,0,1,2>, <6,7,4,5> + 3371575089U, // <2,6,7,5>: Cost 4 vmrglw <2,6,2,7>, <2,4,6,5> + 2792403845U, // <2,6,7,6>: Cost 3 vsldoi12 <6,7,6,2>, <6,7,6,2> + 2297834806U, // <2,6,7,7>: Cost 3 vmrglw <2,6,2,7>, RHS + 2297834807U, // <2,6,7,u>: Cost 3 vmrglw <2,6,2,7>, RHS + 2636996710U, // <2,6,u,0>: Cost 3 vsldoi4 <3,2,6,u>, LHS + 2698491694U, // <2,6,u,1>: Cost 3 vsldoi8 <2,3,2,6>, LHS + 2636998631U, // <2,6,u,2>: Cost 3 vsldoi4 <3,2,6,u>, <2,6,u,7> + 2282580326U, // <2,6,u,3>: Cost 3 vmrglw LHS, <3,2,6,3> + 2636999990U, // <2,6,u,4>: Cost 3 vsldoi4 <3,2,6,u>, RHS + 2698492058U, // <2,6,u,5>: Cost 3 vsldoi8 <2,3,2,6>, RHS + 1256616760U, // <2,6,u,6>: Cost 2 vmrglw LHS, <6,6,6,6> + 135097654U, // <2,6,u,7>: Cost 1 vmrglw LHS, RHS + 135097655U, // <2,6,u,u>: Cost 1 vmrglw LHS, RHS + 2666864742U, // <2,7,0,0>: Cost 3 vsldoi4 , LHS + 1719620602U, // <2,7,0,1>: Cost 2 vsldoi12 <7,0,1,2>, <7,0,1,2> + 3768254637U, // <2,7,0,2>: Cost 4 vsldoi8 <1,6,2,7>, <0,2,1,2> + 3393417722U, // <2,7,0,3>: Cost 4 vmrglw <6,3,2,0>, <6,2,7,3> + 2666868022U, // <2,7,0,4>: Cost 3 vsldoi4 , RHS + 3867104290U, // <2,7,0,5>: Cost 4 vsldoi12 <7,0,1,2>, <7,0,5,6> + 3728667127U, // <2,7,0,6>: Cost 4 vsldoi4 <6,2,7,0>, <6,2,7,0> + 2666869817U, // <2,7,0,7>: Cost 3 vsldoi4 , <7,0,u,2> + 1720136761U, // <2,7,0,u>: Cost 2 vsldoi12 <7,0,u,2>, <7,0,u,2> + 3728670822U, // <2,7,1,0>: Cost 4 vsldoi4 <6,2,7,1>, LHS + 3774227252U, // <2,7,1,1>: Cost 4 vsldoi8 <2,6,2,7>, <1,1,1,1> + 3774227350U, // <2,7,1,2>: Cost 4 vsldoi8 <2,6,2,7>, <1,2,3,0> + 2323001850U, // <2,7,1,3>: Cost 3 vmrglw <6,u,2,1>, <6,2,7,3> + 3728674102U, // <2,7,1,4>: Cost 4 vsldoi4 <6,2,7,1>, RHS + 3774227567U, // <2,7,1,5>: Cost 5 vsldoi8 <2,6,2,7>, <1,5,0,1> + 2694513880U, // <2,7,1,6>: Cost 3 vsldoi8 <1,6,2,7>, <1,6,2,7> + 3396744002U, // <2,7,1,7>: Cost 4 vmrglw <6,u,2,1>, <6,6,7,7> + 2323001850U, // <2,7,1,u>: Cost 3 vmrglw <6,u,2,1>, <6,2,7,3> + 2654937190U, // <2,7,2,0>: Cost 3 vsldoi4 <6,2,7,2>, LHS + 3728679732U, // <2,7,2,1>: Cost 4 vsldoi4 <6,2,7,2>, <1,1,1,1> + 2700486248U, // <2,7,2,2>: Cost 3 vsldoi8 <2,6,2,7>, <2,2,2,2> + 2321682938U, // <2,7,2,3>: Cost 3 vmrglw <6,6,2,2>, <6,2,7,3> + 2654940470U, // <2,7,2,4>: Cost 3 vsldoi4 <6,2,7,2>, RHS + 3859584196U, // <2,7,2,5>: Cost 4 vsldoi12 <5,6,7,2>, <7,2,5,6> + 2700486577U, // <2,7,2,6>: Cost 3 vsldoi8 <2,6,2,7>, <2,6,2,7> + 2228033132U, // <2,7,2,7>: Cost 3 vmrghw <2,2,2,2>, <7,7,7,7> + 2701813843U, // <2,7,2,u>: Cost 3 vsldoi8 <2,u,2,7>, <2,u,2,7> + 1581203558U, // <2,7,3,0>: Cost 2 vsldoi4 <6,2,7,3>, LHS + 2654946100U, // <2,7,3,1>: Cost 3 vsldoi4 <6,2,7,3>, <1,1,1,1> + 2637031354U, // <2,7,3,2>: Cost 3 vsldoi4 <3,2,7,3>, <2,6,3,7> + 1256575482U, // <2,7,3,3>: Cost 2 vmrglw LHS, <6,2,7,3> + 1581206838U, // <2,7,3,4>: Cost 2 vsldoi4 <6,2,7,3>, RHS + 2654949380U, // <2,7,3,5>: Cost 3 vsldoi4 <6,2,7,3>, <5,5,5,5> + 1581208058U, // <2,7,3,6>: Cost 2 vsldoi4 <6,2,7,3>, <6,2,7,3> + 1256575810U, // <2,7,3,7>: Cost 2 vmrglw LHS, <6,6,7,7> + 1581209390U, // <2,7,3,u>: Cost 2 vsldoi4 <6,2,7,3>, LHS + 3728695398U, // <2,7,4,0>: Cost 4 vsldoi4 <6,2,7,4>, LHS + 3869758782U, // <2,7,4,1>: Cost 4 vsldoi12 <7,4,1,2>, <7,4,1,2> + 3728696936U, // <2,7,4,2>: Cost 4 vsldoi4 <6,2,7,4>, <2,2,2,2> + 3393450490U, // <2,7,4,3>: Cost 4 vmrglw <6,3,2,4>, <6,2,7,3> + 3728698678U, // <2,7,4,4>: Cost 4 vsldoi4 <6,2,7,4>, RHS + 2700487990U, // <2,7,4,5>: Cost 3 vsldoi8 <2,6,2,7>, RHS + 3728699899U, // <2,7,4,6>: Cost 4 vsldoi4 <6,2,7,4>, <6,2,7,4> + 3867104626U, // <2,7,4,7>: Cost 4 vsldoi12 <7,0,1,2>, <7,4,7,0> + 2700488233U, // <2,7,4,u>: Cost 3 vsldoi8 <2,6,2,7>, RHS + 3855160709U, // <2,7,5,0>: Cost 4 vsldoi12 <5,0,1,2>, <7,5,0,1> + 3728704406U, // <2,7,5,1>: Cost 4 vsldoi4 <6,2,7,5>, <1,2,3,0> + 3370233956U, // <2,7,5,2>: Cost 4 vmrglw <2,4,2,5>, <5,6,7,2> + 2320380410U, // <2,7,5,3>: Cost 3 vmrglw <6,4,2,5>, <6,2,7,3> + 3728706870U, // <2,7,5,4>: Cost 4 vsldoi4 <6,2,7,5>, RHS + 3867104694U, // <2,7,5,5>: Cost 4 vsldoi12 <7,0,1,2>, <7,5,5,5> + 3792146492U, // <2,7,5,6>: Cost 4 vsldoi8 <5,6,2,7>, <5,6,2,7> + 3394122562U, // <2,7,5,7>: Cost 4 vmrglw <6,4,2,5>, <6,6,7,7> + 2320380410U, // <2,7,5,u>: Cost 3 vmrglw <6,4,2,5>, <6,2,7,3> + 2230801402U, // <2,7,6,0>: Cost 3 vmrghw <2,6,3,7>, <7,0,1,2> + 3768258984U, // <2,7,6,1>: Cost 4 vsldoi8 <1,6,2,7>, <6,1,7,2> + 2730349050U, // <2,7,6,2>: Cost 3 vsldoi8 <7,6,2,7>, <6,2,7,3> + 3372894575U, // <2,7,6,3>: Cost 4 vmrglw <2,u,2,6>, <3,2,7,3> + 2230801766U, // <2,7,6,4>: Cost 3 vmrghw <2,6,3,7>, <7,4,5,6> + 3304543670U, // <2,7,6,5>: Cost 4 vmrghw <2,6,3,7>, <7,5,5,5> + 3728716285U, // <2,7,6,6>: Cost 4 vsldoi4 <6,2,7,6>, <6,2,7,6> + 2230802028U, // <2,7,6,7>: Cost 3 vmrghw <2,6,3,7>, <7,7,7,7> + 2730349050U, // <2,7,6,u>: Cost 3 vsldoi8 <7,6,2,7>, <6,2,7,3> + 2793362983U, // <2,7,7,0>: Cost 3 vsldoi12 <7,0,1,2>, <7,7,0,1> + 3728721112U, // <2,7,7,1>: Cost 4 vsldoi4 <6,2,7,7>, <1,6,2,7> + 3371574933U, // <2,7,7,2>: Cost 4 vmrglw <2,6,2,7>, <2,2,7,2> + 2327695866U, // <2,7,7,3>: Cost 3 vmrglw <7,6,2,7>, <6,2,7,3> + 3728723254U, // <2,7,7,4>: Cost 4 vsldoi4 <6,2,7,7>, RHS + 3371574855U, // <2,7,7,5>: Cost 5 vmrglw <2,6,2,7>, <2,1,7,5> + 2730350062U, // <2,7,7,6>: Cost 3 vsldoi8 <7,6,2,7>, <7,6,2,7> + 2793363052U, // <2,7,7,7>: Cost 3 vsldoi12 <7,0,1,2>, <7,7,7,7> + 2798671471U, // <2,7,7,u>: Cost 3 vsldoi12 <7,u,1,2>, <7,7,u,1> + 1581244518U, // <2,7,u,0>: Cost 2 vsldoi4 <6,2,7,u>, LHS + 1724929666U, // <2,7,u,1>: Cost 2 vsldoi12 <7,u,1,2>, <7,u,1,2> + 2637072314U, // <2,7,u,2>: Cost 3 vsldoi4 <3,2,7,u>, <2,6,3,7> + 1256616442U, // <2,7,u,3>: Cost 2 vmrglw LHS, <6,2,7,3> + 1581247798U, // <2,7,u,4>: Cost 2 vsldoi4 <6,2,7,u>, RHS + 2700490906U, // <2,7,u,5>: Cost 3 vsldoi8 <2,6,2,7>, RHS + 1581249023U, // <2,7,u,6>: Cost 2 vsldoi4 <6,2,7,u>, <6,2,7,u> + 1256616770U, // <2,7,u,7>: Cost 2 vmrglw LHS, <6,6,7,7> + 1581250350U, // <2,7,u,u>: Cost 2 vsldoi4 <6,2,7,u>, LHS + 1611489280U, // <2,u,0,0>: Cost 2 vsldoi8 LHS, <0,0,0,0> + 537747563U, // <2,u,0,1>: Cost 1 vsldoi8 LHS, LHS + 2685231277U, // <2,u,0,2>: Cost 3 vsldoi8 LHS, <0,2,1,2> + 2685231356U, // <2,u,0,3>: Cost 3 vsldoi8 LHS, <0,3,1,0> + 1611489618U, // <2,u,0,4>: Cost 2 vsldoi8 LHS, <0,4,1,5> + 2226763930U, // <2,u,0,5>: Cost 3 vmrghw <2,0,3,0>, RHS + 2733007350U, // <2,u,0,6>: Cost 3 vsldoi8 LHS, <0,6,1,7> + 2660971737U, // <2,u,0,7>: Cost 3 vsldoi4 <7,2,u,0>, <7,2,u,0> + 537748125U, // <2,u,0,u>: Cost 1 vsldoi8 LHS, LHS + 2689876708U, // <2,u,1,0>: Cost 3 vsldoi8 LHS, <1,0,1,2> + 1611490100U, // <2,u,1,1>: Cost 2 vsldoi8 LHS, <1,1,1,1> + 1611490198U, // <2,u,1,2>: Cost 2 vsldoi8 LHS, <1,2,3,0> + 2293137564U, // <2,u,1,3>: Cost 3 vmrglw <1,u,2,1>, LHS + 2689877072U, // <2,u,1,4>: Cost 3 vsldoi8 LHS, <1,4,5,6> + 2689877103U, // <2,u,1,5>: Cost 3 vsldoi8 LHS, <1,5,0,1> + 2689877199U, // <2,u,1,6>: Cost 3 vsldoi8 LHS, <1,6,1,7> + 2293140808U, // <2,u,1,7>: Cost 3 vmrglw <1,u,2,1>, RHS + 1616135548U, // <2,u,1,u>: Cost 2 vsldoi8 LHS, <1,u,3,0> + 1556938854U, // <2,u,2,0>: Cost 2 vsldoi4 <2,2,2,2>, LHS + 1154291502U, // <2,u,2,1>: Cost 2 vmrghw <2,2,2,2>, LHS + 336380006U, // <2,u,2,2>: Cost 1 vspltisw2 LHS + 1611490982U, // <2,u,2,3>: Cost 2 vsldoi8 LHS, <2,3,0,1> + 1556942134U, // <2,u,2,4>: Cost 2 vsldoi4 <2,2,2,2>, RHS + 1154291866U, // <2,u,2,5>: Cost 2 vmrghw <2,2,2,2>, RHS + 1611491258U, // <2,u,2,6>: Cost 2 vsldoi8 LHS, <2,6,3,7> + 1221397832U, // <2,u,2,7>: Cost 2 vmrglw <2,2,2,2>, RHS + 336380006U, // <2,u,2,u>: Cost 1 vspltisw2 LHS + 1611491478U, // <2,u,3,0>: Cost 2 vsldoi8 LHS, <3,0,1,2> + 1213440073U, // <2,u,3,1>: Cost 2 vmrglw LHS, <0,0,u,1> + 1213442261U, // <2,u,3,2>: Cost 2 vmrglw LHS, <3,0,u,2> + 135053468U, // <2,u,3,3>: Cost 1 vmrglw LHS, LHS + 1611491842U, // <2,u,3,4>: Cost 2 vsldoi8 LHS, <3,4,5,6> + 1213440401U, // <2,u,3,5>: Cost 2 vmrglw LHS, <0,4,u,5> + 1213442589U, // <2,u,3,6>: Cost 2 vmrglw LHS, <3,4,u,6> + 135056712U, // <2,u,3,7>: Cost 1 vmrglw LHS, RHS + 135053473U, // <2,u,3,u>: Cost 1 vmrglw LHS, LHS + 1551425638U, // <2,u,4,0>: Cost 2 vsldoi4 <1,2,u,4>, LHS + 1551426503U, // <2,u,4,1>: Cost 2 vsldoi4 <1,2,u,4>, <1,2,u,4> + 2625169000U, // <2,u,4,2>: Cost 3 vsldoi4 <1,2,u,4>, <2,2,2,2> + 2625169558U, // <2,u,4,3>: Cost 3 vsldoi4 <1,2,u,4>, <3,0,1,2> + 1551428918U, // <2,u,4,4>: Cost 2 vsldoi4 <1,2,u,4>, RHS + 537750838U, // <2,u,4,5>: Cost 1 vsldoi8 LHS, RHS + 2733010297U, // <2,u,4,6>: Cost 3 vsldoi8 LHS, <4,6,5,2> + 2295156040U, // <2,u,4,7>: Cost 3 vmrglw <2,2,2,4>, RHS + 537751081U, // <2,u,4,u>: Cost 1 vsldoi8 LHS, RHS + 2689879624U, // <2,u,5,0>: Cost 3 vsldoi8 LHS, <5,0,1,2> + 2230130478U, // <2,u,5,1>: Cost 3 vmrghw <2,5,3,6>, LHS + 2631149217U, // <2,u,5,2>: Cost 3 vsldoi4 <2,2,u,5>, <2,2,u,5> + 2290516124U, // <2,u,5,3>: Cost 3 vmrglw <1,4,2,5>, LHS + 2689879988U, // <2,u,5,4>: Cost 3 vsldoi8 LHS, <5,4,5,6> + 1659269124U, // <2,u,5,5>: Cost 2 vsldoi8 LHS, <5,5,5,5> + 1691162778U, // <2,u,5,6>: Cost 2 vsldoi12 <2,2,2,2>, RHS + 2290519368U, // <2,u,5,7>: Cost 3 vmrglw <1,4,2,5>, RHS + 1691162796U, // <2,u,5,u>: Cost 2 vsldoi12 <2,2,2,2>, RHS + 2230802131U, // <2,u,6,0>: Cost 3 vmrghw <2,6,3,7>, + 1157060398U, // <2,u,6,1>: Cost 2 vmrghw <2,6,3,7>, LHS + 1659269626U, // <2,u,6,2>: Cost 2 vsldoi8 LHS, <6,2,7,3> + 2764904656U, // <2,u,6,3>: Cost 3 vsldoi12 <2,2,2,2>, + 2230802495U, // <2,u,6,4>: Cost 3 vmrghw <2,6,3,7>, + 1157060762U, // <2,u,6,5>: Cost 2 vmrghw <2,6,3,7>, RHS + 1659269944U, // <2,u,6,6>: Cost 2 vsldoi8 LHS, <6,6,6,6> + 1659269966U, // <2,u,6,7>: Cost 2 vsldoi8 LHS, <6,7,0,1> + 1157060965U, // <2,u,6,u>: Cost 2 vmrghw <2,6,3,7>, LHS + 1659270138U, // <2,u,7,0>: Cost 2 vsldoi8 LHS, <7,0,1,2> + 2727040090U, // <2,u,7,1>: Cost 3 vsldoi8 <7,1,2,u>, <7,1,2,u> + 2727703723U, // <2,u,7,2>: Cost 3 vsldoi8 <7,2,2,u>, <7,2,2,u> + 2297831580U, // <2,u,7,3>: Cost 3 vmrglw <2,6,2,7>, LHS + 1659270502U, // <2,u,7,4>: Cost 2 vsldoi8 LHS, <7,4,5,6> + 2733012406U, // <2,u,7,5>: Cost 3 vsldoi8 LHS, <7,5,5,5> + 2730358255U, // <2,u,7,6>: Cost 3 vsldoi8 <7,6,2,u>, <7,6,2,u> + 1659270764U, // <2,u,7,7>: Cost 2 vsldoi8 LHS, <7,7,7,7> + 1659270786U, // <2,u,7,u>: Cost 2 vsldoi8 LHS, <7,u,1,2> + 1213481923U, // <2,u,u,0>: Cost 2 vmrglw LHS, <1,2,u,0> + 537753390U, // <2,u,u,1>: Cost 1 vsldoi8 LHS, LHS + 336380006U, // <2,u,u,2>: Cost 1 vspltisw2 LHS + 135094428U, // <2,u,u,3>: Cost 1 vmrglw LHS, LHS + 1213481927U, // <2,u,u,4>: Cost 2 vmrglw LHS, <1,2,u,4> + 537753754U, // <2,u,u,5>: Cost 1 vsldoi8 LHS, RHS + 1208838685U, // <2,u,u,6>: Cost 2 vmrglw LHS, <3,4,u,6> + 135097672U, // <2,u,u,7>: Cost 1 vmrglw LHS, RHS + 135094433U, // <2,u,u,u>: Cost 1 vmrglw LHS, LHS + 1678557184U, // <3,0,0,0>: Cost 2 vsldoi12 LHS, <0,0,0,0> + 1678557194U, // <3,0,0,1>: Cost 2 vsldoi12 LHS, <0,0,1,1> + 2631181989U, // <3,0,0,2>: Cost 3 vsldoi4 <2,3,0,0>, <2,3,0,0> + 2289223984U, // <3,0,0,3>: Cost 3 vmrglw <1,2,3,0>, <3,2,0,3> + 2756943909U, // <3,0,0,4>: Cost 3 vsldoi12 LHS, <0,0,4,1> + 3362965729U, // <3,0,0,5>: Cost 4 vmrglw <1,2,3,0>, <3,1,0,5> + 3362966054U, // <3,0,0,6>: Cost 4 vmrglw <1,2,3,0>, <3,5,0,6> + 2289224312U, // <3,0,0,7>: Cost 3 vmrglw <1,2,3,0>, <3,6,0,7> + 1683202121U, // <3,0,0,u>: Cost 2 vsldoi12 LHS, <0,0,u,1> + 1557446758U, // <3,0,1,0>: Cost 2 vsldoi4 <2,3,0,1>, LHS + 2752741467U, // <3,0,1,1>: Cost 3 vsldoi12 LHS, <0,1,1,1> + 604815462U, // <3,0,1,2>: Cost 1 vsldoi12 LHS, LHS + 2631190676U, // <3,0,1,3>: Cost 3 vsldoi4 <2,3,0,1>, <3,0,1,0> + 1557450038U, // <3,0,1,4>: Cost 2 vsldoi4 <2,3,0,1>, RHS + 2667024388U, // <3,0,1,5>: Cost 3 vsldoi4 , <5,5,5,5> + 2800074894U, // <3,0,1,6>: Cost 3 vsldoi12 LHS, <0,1,6,7> + 2661053667U, // <3,0,1,7>: Cost 3 vsldoi4 <7,3,0,1>, <7,3,0,1> + 604815516U, // <3,0,1,u>: Cost 1 vsldoi12 LHS, LHS + 2696521165U, // <3,0,2,0>: Cost 3 vsldoi8 <2,0,3,0>, <2,0,3,0> + 2752741549U, // <3,0,2,1>: Cost 3 vsldoi12 LHS, <0,2,1,2> + 2691876456U, // <3,0,2,2>: Cost 3 vsldoi8 <1,2,3,0>, <2,2,2,2> + 2691876518U, // <3,0,2,3>: Cost 3 vsldoi8 <1,2,3,0>, <2,3,0,1> + 3830685895U, // <3,0,2,4>: Cost 4 vsldoi12 LHS, <0,2,4,1> + 3765618536U, // <3,0,2,5>: Cost 4 vsldoi8 <1,2,3,0>, <2,5,3,6> + 2691876794U, // <3,0,2,6>: Cost 3 vsldoi8 <1,2,3,0>, <2,6,3,7> + 2701166596U, // <3,0,2,7>: Cost 3 vsldoi8 <2,7,3,0>, <2,7,3,0> + 2756944108U, // <3,0,2,u>: Cost 3 vsldoi12 LHS, <0,2,u,2> + 2691877014U, // <3,0,3,0>: Cost 3 vsldoi8 <1,2,3,0>, <3,0,1,2> + 1161003110U, // <3,0,3,1>: Cost 2 vmrghw <3,3,3,3>, LHS + 2691877168U, // <3,0,3,2>: Cost 3 vsldoi8 <1,2,3,0>, <3,2,0,3> + 2691877246U, // <3,0,3,3>: Cost 3 vsldoi8 <1,2,3,0>, <3,3,0,0> + 2691877378U, // <3,0,3,4>: Cost 3 vsldoi8 <1,2,3,0>, <3,4,5,6> + 3765619238U, // <3,0,3,5>: Cost 4 vsldoi8 <1,2,3,0>, <3,5,0,6> + 2691877496U, // <3,0,3,6>: Cost 3 vsldoi8 <1,2,3,0>, <3,6,0,7> + 3368962680U, // <3,0,3,7>: Cost 4 vmrglw <2,2,3,3>, <3,6,0,7> + 1161003677U, // <3,0,3,u>: Cost 2 vmrghw <3,3,3,3>, LHS + 2289254400U, // <3,0,4,0>: Cost 3 vmrglw <1,2,3,4>, <0,0,0,0> + 1678557522U, // <3,0,4,1>: Cost 2 vsldoi12 LHS, <0,4,1,5> + 2631214761U, // <3,0,4,2>: Cost 3 vsldoi4 <2,3,0,4>, <2,3,0,4> + 2235580672U, // <3,0,4,3>: Cost 3 vmrghw <3,4,5,6>, <0,3,1,4> + 2756944237U, // <3,0,4,4>: Cost 3 vsldoi12 LHS, <0,4,4,5> + 1618136374U, // <3,0,4,5>: Cost 2 vsldoi8 <1,2,3,0>, RHS + 3309322742U, // <3,0,4,6>: Cost 4 vmrghw <3,4,5,6>, <0,6,1,7> + 3362998904U, // <3,0,4,7>: Cost 4 vmrglw <1,2,3,4>, <3,6,0,7> + 1683202449U, // <3,0,4,u>: Cost 2 vsldoi12 LHS, <0,4,u,5> + 3765620296U, // <3,0,5,0>: Cost 4 vsldoi8 <1,2,3,0>, <5,0,1,2> + 2752299427U, // <3,0,5,1>: Cost 3 vsldoi12 LHS, <0,5,1,5> + 3789508346U, // <3,0,5,2>: Cost 4 vsldoi8 <5,2,3,0>, <5,2,3,0> + 3403486842U, // <3,0,5,3>: Cost 4 vmrglw , <7,u,0,3> + 3765620660U, // <3,0,5,4>: Cost 4 vsldoi8 <1,2,3,0>, <5,4,5,6> + 2733682692U, // <3,0,5,5>: Cost 3 vsldoi8 , <5,5,5,5> + 2800075218U, // <3,0,5,6>: Cost 3 vsldoi12 LHS, <0,5,6,7> + 3873817044U, // <3,0,5,7>: Cost 4 vsldoi12 LHS, <0,5,7,0> + 2800075234U, // <3,0,5,u>: Cost 3 vsldoi12 LHS, <0,5,u,5> + 2752299501U, // <3,0,6,0>: Cost 3 vsldoi12 LHS, <0,6,0,7> + 2236547174U, // <3,0,6,1>: Cost 3 vmrghw <3,6,0,7>, LHS + 2733683194U, // <3,0,6,2>: Cost 3 vsldoi8 , <6,2,7,3> + 3844473352U, // <3,0,6,3>: Cost 4 vsldoi12 <3,2,0,3>, <0,6,3,7> + 3310289234U, // <3,0,6,4>: Cost 4 vmrghw <3,6,0,7>, <0,4,1,5> + 3873817114U, // <3,0,6,5>: Cost 4 vsldoi12 LHS, <0,6,5,7> + 2733683512U, // <3,0,6,6>: Cost 3 vsldoi8 , <6,6,6,6> + 2725057384U, // <3,0,6,7>: Cost 3 vsldoi8 <6,7,3,0>, <6,7,3,0> + 2236547741U, // <3,0,6,u>: Cost 3 vmrghw <3,6,0,7>, LHS + 2297905152U, // <3,0,7,0>: Cost 3 vmrglw <2,6,3,7>, <0,0,0,0> + 2297906854U, // <3,0,7,1>: Cost 3 vmrglw <2,6,3,7>, <2,3,0,1> + 2727711916U, // <3,0,7,2>: Cost 3 vsldoi8 <7,2,3,0>, <7,2,3,0> + 3371649328U, // <3,0,7,3>: Cost 4 vmrglw <2,6,3,7>, <3,2,0,3> + 2733684070U, // <3,0,7,4>: Cost 3 vsldoi8 , <7,4,5,6> + 3734843490U, // <3,0,7,5>: Cost 4 vsldoi4 <7,3,0,7>, <5,6,7,0> + 3798799895U, // <3,0,7,6>: Cost 4 vsldoi8 <6,7,3,0>, <7,6,7,3> + 2733684332U, // <3,0,7,7>: Cost 3 vsldoi8 , <7,7,7,7> + 2297906861U, // <3,0,7,u>: Cost 3 vmrglw <2,6,3,7>, <2,3,0,u> + 1557504102U, // <3,0,u,0>: Cost 2 vsldoi4 <2,3,0,u>, LHS + 1678557842U, // <3,0,u,1>: Cost 2 vsldoi12 LHS, <0,u,1,1> + 604816029U, // <3,0,u,2>: Cost 1 vsldoi12 LHS, LHS + 2691880892U, // <3,0,u,3>: Cost 3 vsldoi8 <1,2,3,0>, + 1557507382U, // <3,0,u,4>: Cost 2 vsldoi4 <2,3,0,u>, RHS + 1618139290U, // <3,0,u,5>: Cost 2 vsldoi8 <1,2,3,0>, RHS + 2691881168U, // <3,0,u,6>: Cost 3 vsldoi8 <1,2,3,0>, + 2661111018U, // <3,0,u,7>: Cost 3 vsldoi4 <7,3,0,u>, <7,3,0,u> + 604816083U, // <3,0,u,u>: Cost 1 vsldoi12 LHS, LHS + 2619310332U, // <3,1,0,0>: Cost 3 vsldoi4 <0,3,1,0>, <0,3,1,0> + 2756944612U, // <3,1,0,1>: Cost 3 vsldoi12 LHS, <1,0,1,2> + 2289221724U, // <3,1,0,2>: Cost 3 vmrglw <1,2,3,0>, <0,1,1,2> + 2619312278U, // <3,1,0,3>: Cost 3 vsldoi4 <0,3,1,0>, <3,0,1,2> + 2619313462U, // <3,1,0,4>: Cost 3 vsldoi4 <0,3,1,0>, RHS + 2289221970U, // <3,1,0,5>: Cost 3 vmrglw <1,2,3,0>, <0,4,1,5> + 2232599768U, // <3,1,0,6>: Cost 3 vmrghw <3,0,1,2>, <1,6,2,7> + 3362964687U, // <3,1,0,7>: Cost 4 vmrglw <1,2,3,0>, <1,6,1,7> + 2619316014U, // <3,1,0,u>: Cost 3 vsldoi4 <0,3,1,0>, LHS + 2756944683U, // <3,1,1,0>: Cost 3 vsldoi12 LHS, <1,1,0,1> + 1678558004U, // <3,1,1,1>: Cost 2 vsldoi12 LHS, <1,1,1,1> + 2691883927U, // <3,1,1,2>: Cost 3 vsldoi8 <1,2,3,1>, <1,2,3,1> + 3826631496U, // <3,1,1,3>: Cost 4 vsldoi12 <0,2,1,3>, <1,1,3,3> + 2756944723U, // <3,1,1,4>: Cost 3 vsldoi12 LHS, <1,1,4,5> + 2756944732U, // <3,1,1,5>: Cost 3 vsldoi12 LHS, <1,1,5,5> + 3830686561U, // <3,1,1,6>: Cost 4 vsldoi12 LHS, <1,1,6,1> + 3734869228U, // <3,1,1,7>: Cost 4 vsldoi4 <7,3,1,1>, <7,3,1,1> + 1678558004U, // <3,1,1,u>: Cost 2 vsldoi12 LHS, <1,1,1,1> + 2696529358U, // <3,1,2,0>: Cost 3 vsldoi8 <2,0,3,1>, <2,0,3,1> + 2756944775U, // <3,1,2,1>: Cost 3 vsldoi12 LHS, <1,2,1,3> + 2294548630U, // <3,1,2,2>: Cost 3 vmrglw <2,1,3,2>, <3,0,1,2> + 1678558102U, // <3,1,2,3>: Cost 2 vsldoi12 LHS, <1,2,3,0> + 2631273782U, // <3,1,2,4>: Cost 3 vsldoi4 <2,3,1,2>, RHS + 2756944811U, // <3,1,2,5>: Cost 3 vsldoi12 LHS, <1,2,5,3> + 3830686644U, // <3,1,2,6>: Cost 4 vsldoi12 LHS, <1,2,6,3> + 2800075706U, // <3,1,2,7>: Cost 3 vsldoi12 LHS, <1,2,7,0> + 1679000515U, // <3,1,2,u>: Cost 2 vsldoi12 LHS, <1,2,u,0> + 2619334911U, // <3,1,3,0>: Cost 3 vsldoi4 <0,3,1,3>, <0,3,1,3> + 2295218186U, // <3,1,3,1>: Cost 3 vmrglw <2,2,3,3>, <0,0,1,1> + 2293229718U, // <3,1,3,2>: Cost 3 vmrglw <1,u,3,3>, <3,0,1,2> + 2619337116U, // <3,1,3,3>: Cost 3 vsldoi4 <0,3,1,3>, <3,3,3,3> + 2619338038U, // <3,1,3,4>: Cost 3 vsldoi4 <0,3,1,3>, RHS + 2295218514U, // <3,1,3,5>: Cost 3 vmrglw <2,2,3,3>, <0,4,1,5> + 3830686729U, // <3,1,3,6>: Cost 4 vsldoi12 LHS, <1,3,6,7> + 3368961231U, // <3,1,3,7>: Cost 4 vmrglw <2,2,3,3>, <1,6,1,7> + 2619340590U, // <3,1,3,u>: Cost 3 vsldoi4 <0,3,1,3>, LHS + 2619343104U, // <3,1,4,0>: Cost 3 vsldoi4 <0,3,1,4>, <0,3,1,4> + 2289254410U, // <3,1,4,1>: Cost 3 vmrglw <1,2,3,4>, <0,0,1,1> + 2289256598U, // <3,1,4,2>: Cost 3 vmrglw <1,2,3,4>, <3,0,1,2> + 2619345410U, // <3,1,4,3>: Cost 3 vsldoi4 <0,3,1,4>, <3,4,5,6> + 2619346230U, // <3,1,4,4>: Cost 3 vsldoi4 <0,3,1,4>, RHS + 2756944976U, // <3,1,4,5>: Cost 3 vsldoi12 LHS, <1,4,5,6> + 3362996401U, // <3,1,4,6>: Cost 4 vmrglw <1,2,3,4>, <0,2,1,6> + 3362997455U, // <3,1,4,7>: Cost 4 vmrglw <1,2,3,4>, <1,6,1,7> + 2619348782U, // <3,1,4,u>: Cost 3 vsldoi4 <0,3,1,4>, LHS + 2756945007U, // <3,1,5,0>: Cost 3 vsldoi12 LHS, <1,5,0,1> + 3830686840U, // <3,1,5,1>: Cost 4 vsldoi12 LHS, <1,5,1,1> + 3358361750U, // <3,1,5,2>: Cost 4 vmrglw <0,4,3,5>, <3,0,1,2> + 3830686857U, // <3,1,5,3>: Cost 4 vsldoi12 LHS, <1,5,3,0> + 2756945047U, // <3,1,5,4>: Cost 3 vsldoi12 LHS, <1,5,4,5> + 2294571346U, // <3,1,5,5>: Cost 3 vmrglw <2,1,3,5>, <0,4,1,5> + 3806105698U, // <3,1,5,6>: Cost 4 vsldoi8 , <5,6,7,0> + 3873817774U, // <3,1,5,7>: Cost 4 vsldoi12 LHS, <1,5,7,1> + 2756945079U, // <3,1,5,u>: Cost 3 vsldoi12 LHS, <1,5,u,1> + 3830686912U, // <3,1,6,0>: Cost 4 vsldoi12 LHS, <1,6,0,1> + 2756945103U, // <3,1,6,1>: Cost 3 vsldoi12 LHS, <1,6,1,7> + 2236547990U, // <3,1,6,2>: Cost 3 vmrghw <3,6,0,7>, <1,2,3,0> + 3826631905U, // <3,1,6,3>: Cost 4 vsldoi12 <0,2,1,3>, <1,6,3,7> + 3830686952U, // <3,1,6,4>: Cost 4 vsldoi12 LHS, <1,6,4,5> + 2756945139U, // <3,1,6,5>: Cost 3 vsldoi12 LHS, <1,6,5,7> + 3830686972U, // <3,1,6,6>: Cost 4 vsldoi12 LHS, <1,6,6,7> + 2800076030U, // <3,1,6,7>: Cost 3 vsldoi12 LHS, <1,6,7,0> + 2756945166U, // <3,1,6,u>: Cost 3 vsldoi12 LHS, <1,6,u,7> + 3699081318U, // <3,1,7,0>: Cost 4 vsldoi4 <1,3,1,7>, LHS + 2297905162U, // <3,1,7,1>: Cost 3 vmrglw <2,6,3,7>, <0,0,1,1> + 2297907350U, // <3,1,7,2>: Cost 3 vmrglw <2,6,3,7>, <3,0,1,2> + 3365675182U, // <3,1,7,3>: Cost 4 vmrglw <1,6,3,7>, <0,2,1,3> + 3699084598U, // <3,1,7,4>: Cost 4 vsldoi4 <1,3,1,7>, RHS + 2297905490U, // <3,1,7,5>: Cost 3 vmrglw <2,6,3,7>, <0,4,1,5> + 2297905329U, // <3,1,7,6>: Cost 3 vmrglw <2,6,3,7>, <0,2,1,6> + 3368330447U, // <3,1,7,7>: Cost 4 vmrglw <2,1,3,7>, <1,6,1,7> + 2297905169U, // <3,1,7,u>: Cost 3 vmrglw <2,6,3,7>, <0,0,1,u> + 2619375876U, // <3,1,u,0>: Cost 3 vsldoi4 <0,3,1,u>, <0,3,1,u> + 1678558004U, // <3,1,u,1>: Cost 2 vsldoi12 LHS, <1,1,1,1> + 2289289366U, // <3,1,u,2>: Cost 3 vmrglw <1,2,3,u>, <3,0,1,2> + 1679000956U, // <3,1,u,3>: Cost 2 vsldoi12 LHS, <1,u,3,0> + 2619378998U, // <3,1,u,4>: Cost 3 vsldoi4 <0,3,1,u>, RHS + 2756945297U, // <3,1,u,5>: Cost 3 vsldoi12 LHS, <1,u,5,3> + 2297905329U, // <3,1,u,6>: Cost 3 vmrglw <2,6,3,7>, <0,2,1,6> + 2800076192U, // <3,1,u,7>: Cost 3 vsldoi12 LHS, <1,u,7,0> + 1683203497U, // <3,1,u,u>: Cost 2 vsldoi12 LHS, <1,u,u,0> + 3362964203U, // <3,2,0,0>: Cost 4 vmrglw <1,2,3,0>, <1,0,2,0> + 2289222380U, // <3,2,0,1>: Cost 3 vmrglw <1,2,3,0>, <1,0,2,1> + 2289222462U, // <3,2,0,2>: Cost 3 vmrglw <1,2,3,0>, <1,1,2,2> + 1215479910U, // <3,2,0,3>: Cost 2 vmrglw <1,2,3,0>, LHS + 3362964207U, // <3,2,0,4>: Cost 4 vmrglw <1,2,3,0>, <1,0,2,4> + 2289222708U, // <3,2,0,5>: Cost 3 vmrglw <1,2,3,0>, <1,4,2,5> + 2232600506U, // <3,2,0,6>: Cost 3 vmrghw <3,0,1,2>, <2,6,3,7> + 3396142296U, // <3,2,0,7>: Cost 4 vmrglw <6,7,3,0>, <1,6,2,7> + 1215479915U, // <3,2,0,u>: Cost 2 vmrglw <1,2,3,0>, LHS + 3699105894U, // <3,2,1,0>: Cost 4 vsldoi4 <1,3,2,1>, LHS + 3765633844U, // <3,2,1,1>: Cost 4 vsldoi8 <1,2,3,2>, <1,1,1,1> + 2691892120U, // <3,2,1,2>: Cost 3 vsldoi8 <1,2,3,2>, <1,2,3,2> + 2752300575U, // <3,2,1,3>: Cost 3 vsldoi12 LHS, <2,1,3,1> + 3699109174U, // <3,2,1,4>: Cost 4 vsldoi4 <1,3,2,1>, RHS + 3830687280U, // <3,2,1,5>: Cost 5 vsldoi12 LHS, <2,1,5,0> + 3830687289U, // <3,2,1,6>: Cost 4 vsldoi12 LHS, <2,1,6,0> + 3874260548U, // <3,2,1,7>: Cost 4 vsldoi12 LHS, <2,1,7,2> + 2752742988U, // <3,2,1,u>: Cost 3 vsldoi12 LHS, <2,1,u,1> + 2631344230U, // <3,2,2,0>: Cost 3 vsldoi4 <2,3,2,2>, LHS + 2697201184U, // <3,2,2,1>: Cost 3 vsldoi8 <2,1,3,2>, <2,1,3,2> + 1678558824U, // <3,2,2,2>: Cost 2 vsldoi12 LHS, <2,2,2,2> + 1678558834U, // <3,2,2,3>: Cost 2 vsldoi12 LHS, <2,2,3,3> + 2631347510U, // <3,2,2,4>: Cost 3 vsldoi4 <2,3,2,2>, RHS + 3368953613U, // <3,2,2,5>: Cost 4 vmrglw <2,2,3,2>, <2,4,2,5> + 2234304442U, // <3,2,2,6>: Cost 3 vmrghw <3,2,6,3>, <2,6,3,7> + 3368953777U, // <3,2,2,7>: Cost 4 vmrglw <2,2,3,2>, <2,6,2,7> + 1679001247U, // <3,2,2,u>: Cost 2 vsldoi12 LHS, <2,2,u,3> + 1678558886U, // <3,2,3,0>: Cost 2 vsldoi12 LHS, <2,3,0,1> + 2752300719U, // <3,2,3,1>: Cost 3 vsldoi12 LHS, <2,3,1,1> + 2752300729U, // <3,2,3,2>: Cost 3 vsldoi12 LHS, <2,3,2,2> + 1221476454U, // <3,2,3,3>: Cost 2 vmrglw <2,2,3,3>, LHS + 1678558926U, // <3,2,3,4>: Cost 2 vsldoi12 LHS, <2,3,4,5> + 2800076503U, // <3,2,3,5>: Cost 3 vsldoi12 LHS, <2,3,5,5> + 2234746810U, // <3,2,3,6>: Cost 3 vmrghw <3,3,3,3>, <2,6,3,7> + 2800076516U, // <3,2,3,7>: Cost 3 vsldoi12 LHS, <2,3,7,0> + 1678558958U, // <3,2,3,u>: Cost 2 vsldoi12 LHS, <2,3,u,1> + 3699130470U, // <3,2,4,0>: Cost 4 vsldoi4 <1,3,2,4>, LHS + 3362996972U, // <3,2,4,1>: Cost 4 vmrglw <1,2,3,4>, <1,0,2,1> + 2289256040U, // <3,2,4,2>: Cost 3 vmrglw <1,2,3,4>, <2,2,2,2> + 1215512678U, // <3,2,4,3>: Cost 2 vmrglw <1,2,3,4>, LHS + 3362998676U, // <3,2,4,4>: Cost 4 vmrglw <1,2,3,4>, <3,3,2,4> + 2691894582U, // <3,2,4,5>: Cost 3 vsldoi8 <1,2,3,2>, RHS + 2235582394U, // <3,2,4,6>: Cost 3 vmrghw <3,4,5,6>, <2,6,3,7> + 3734967544U, // <3,2,4,7>: Cost 4 vsldoi4 <7,3,2,4>, <7,3,2,4> + 1215512683U, // <3,2,4,u>: Cost 2 vmrglw <1,2,3,4>, LHS + 3705110630U, // <3,2,5,0>: Cost 4 vsldoi4 <2,3,2,5>, LHS + 3368313985U, // <3,2,5,1>: Cost 4 vmrglw <2,1,3,5>, <1,5,2,1> + 3368314472U, // <3,2,5,2>: Cost 4 vmrglw <2,1,3,5>, <2,2,2,2> + 2756945768U, // <3,2,5,3>: Cost 3 vsldoi12 LHS, <2,5,3,6> + 3705113910U, // <3,2,5,4>: Cost 4 vsldoi4 <2,3,2,5>, RHS + 3310061416U, // <3,2,5,5>: Cost 4 vmrghw <3,5,6,6>, <2,5,3,6> + 3310135226U, // <3,2,5,6>: Cost 4 vmrghw <3,5,7,6>, <2,6,3,7> + 3370305457U, // <3,2,5,7>: Cost 5 vmrglw <2,4,3,5>, <2,6,2,7> + 2752743317U, // <3,2,5,u>: Cost 3 vsldoi12 LHS, <2,5,u,6> + 2631376998U, // <3,2,6,0>: Cost 3 vsldoi4 <2,3,2,6>, LHS + 3705119540U, // <3,2,6,1>: Cost 4 vsldoi4 <2,3,2,6>, <1,1,1,1> + 2631378621U, // <3,2,6,2>: Cost 3 vsldoi4 <2,3,2,6>, <2,3,2,6> + 1678559162U, // <3,2,6,3>: Cost 2 vsldoi12 LHS, <2,6,3,7> + 2631380278U, // <3,2,6,4>: Cost 3 vsldoi4 <2,3,2,6>, RHS + 3370976956U, // <3,2,6,5>: Cost 4 vmrglw <2,5,3,6>, <2,3,2,5> + 2237065146U, // <3,2,6,6>: Cost 3 vmrghw <3,6,7,7>, <2,6,3,7> + 3798815594U, // <3,2,6,7>: Cost 4 vsldoi8 <6,7,3,2>, <6,7,3,2> + 1679001575U, // <3,2,6,u>: Cost 2 vsldoi12 LHS, <2,6,u,7> + 2800076778U, // <3,2,7,0>: Cost 3 vsldoi12 LHS, <2,7,0,1> + 3371647724U, // <3,2,7,1>: Cost 4 vmrglw <2,6,3,7>, <1,0,2,1> + 2297906792U, // <3,2,7,2>: Cost 3 vmrglw <2,6,3,7>, <2,2,2,2> + 1224163430U, // <3,2,7,3>: Cost 2 vmrglw <2,6,3,7>, LHS + 3705130294U, // <3,2,7,4>: Cost 4 vsldoi4 <2,3,2,7>, RHS + 3371648052U, // <3,2,7,5>: Cost 4 vmrglw <2,6,3,7>, <1,4,2,5> + 2297906877U, // <3,2,7,6>: Cost 3 vmrglw <2,6,3,7>, <2,3,2,6> + 3371648702U, // <3,2,7,7>: Cost 4 vmrglw <2,6,3,7>, <2,3,2,7> + 1224163435U, // <3,2,7,u>: Cost 2 vmrglw <2,6,3,7>, LHS + 1679001659U, // <3,2,u,0>: Cost 2 vsldoi12 LHS, <2,u,0,1> + 2752743492U, // <3,2,u,1>: Cost 3 vsldoi12 LHS, <2,u,1,1> + 1678558824U, // <3,2,u,2>: Cost 2 vsldoi12 LHS, <2,2,2,2> + 1678559320U, // <3,2,u,3>: Cost 2 vsldoi12 LHS, <2,u,3,3> + 1679001699U, // <3,2,u,4>: Cost 2 vsldoi12 LHS, <2,u,4,5> + 2691897498U, // <3,2,u,5>: Cost 3 vsldoi8 <1,2,3,2>, RHS + 2237908922U, // <3,2,u,6>: Cost 3 vmrghw <3,u,1,2>, <2,6,3,7> + 2800519289U, // <3,2,u,7>: Cost 3 vsldoi12 LHS, <2,u,7,0> + 1679001731U, // <3,2,u,u>: Cost 2 vsldoi12 LHS, <2,u,u,1> + 1215480726U, // <3,3,0,0>: Cost 2 vmrglw <1,2,3,0>, <1,2,3,0> + 1678559382U, // <3,3,0,1>: Cost 2 vsldoi12 LHS, <3,0,1,2> + 2631403200U, // <3,3,0,2>: Cost 3 vsldoi4 <2,3,3,0>, <2,3,3,0> + 2289223282U, // <3,3,0,3>: Cost 3 vmrglw <1,2,3,0>, <2,2,3,3> + 2752301232U, // <3,3,0,4>: Cost 3 vsldoi12 LHS, <3,0,4,1> + 3362965027U, // <3,3,0,5>: Cost 4 vmrglw <1,2,3,0>, <2,1,3,5> + 3362965352U, // <3,3,0,6>: Cost 4 vmrglw <1,2,3,0>, <2,5,3,6> + 2289223610U, // <3,3,0,7>: Cost 3 vmrglw <1,2,3,0>, <2,6,3,7> + 1678559445U, // <3,3,0,u>: Cost 2 vsldoi12 LHS, <3,0,u,2> + 3830687964U, // <3,3,1,0>: Cost 4 vsldoi12 LHS, <3,1,0,0> + 2752301286U, // <3,3,1,1>: Cost 3 vsldoi12 LHS, <3,1,1,1> + 2752301297U, // <3,3,1,2>: Cost 3 vsldoi12 LHS, <3,1,2,3> + 2305157532U, // <3,3,1,3>: Cost 3 vmrglw <3,u,3,1>, <3,3,3,3> + 3830688000U, // <3,3,1,4>: Cost 4 vsldoi12 LHS, <3,1,4,0> + 3830688009U, // <3,3,1,5>: Cost 4 vsldoi12 LHS, <3,1,5,0> + 3830688019U, // <3,3,1,6>: Cost 4 vsldoi12 LHS, <3,1,6,1> + 3362973626U, // <3,3,1,7>: Cost 4 vmrglw <1,2,3,1>, <2,6,3,7> + 2752743719U, // <3,3,1,u>: Cost 3 vsldoi12 LHS, <3,1,u,3> + 2631417958U, // <3,3,2,0>: Cost 3 vsldoi4 <2,3,3,2>, LHS + 3826043193U, // <3,3,2,1>: Cost 4 vsldoi12 LHS, <3,2,1,3> + 1624131186U, // <3,3,2,2>: Cost 2 vsldoi8 <2,2,3,3>, <2,2,3,3> + 2752301384U, // <3,3,2,3>: Cost 3 vsldoi12 LHS, <3,2,3,0> + 2631421238U, // <3,3,2,4>: Cost 3 vsldoi4 <2,3,3,2>, RHS + 3826485602U, // <3,3,2,5>: Cost 4 vsldoi12 LHS, <3,2,5,u> + 2752301414U, // <3,3,2,6>: Cost 3 vsldoi12 LHS, <3,2,6,3> + 2771249519U, // <3,3,2,7>: Cost 3 vsldoi12 <3,2,7,3>, <3,2,7,3> + 1628112984U, // <3,3,2,u>: Cost 2 vsldoi8 <2,u,3,3>, <2,u,3,3> + 1563656294U, // <3,3,3,0>: Cost 2 vsldoi4 <3,3,3,3>, LHS + 2301855911U, // <3,3,3,1>: Cost 3 vmrglw <3,3,3,3>, <3,0,3,1> + 2697873730U, // <3,3,3,2>: Cost 3 vsldoi8 <2,2,3,3>, <3,2,2,3> + 403488870U, // <3,3,3,3>: Cost 1 vspltisw3 LHS + 1563659574U, // <3,3,3,4>: Cost 2 vsldoi4 <3,3,3,3>, RHS + 2301856239U, // <3,3,3,5>: Cost 3 vmrglw <3,3,3,3>, <3,4,3,5> + 2697874067U, // <3,3,3,6>: Cost 3 vsldoi8 <2,2,3,3>, <3,6,3,7> + 2295220154U, // <3,3,3,7>: Cost 3 vmrglw <2,2,3,3>, <2,6,3,7> + 403488870U, // <3,3,3,u>: Cost 1 vspltisw3 LHS + 2289255318U, // <3,3,4,0>: Cost 3 vmrglw <1,2,3,4>, <1,2,3,0> + 2631435162U, // <3,3,4,1>: Cost 3 vsldoi4 <2,3,3,4>, <1,2,3,4> + 2631435972U, // <3,3,4,2>: Cost 3 vsldoi4 <2,3,3,4>, <2,3,3,4> + 2289256050U, // <3,3,4,3>: Cost 3 vmrglw <1,2,3,4>, <2,2,3,3> + 1215513498U, // <3,3,4,4>: Cost 2 vmrglw <1,2,3,4>, <1,2,3,4> + 1679002114U, // <3,3,4,5>: Cost 2 vsldoi12 LHS, <3,4,5,6> + 3362998120U, // <3,3,4,6>: Cost 4 vmrglw <1,2,3,4>, <2,5,3,6> + 2289256378U, // <3,3,4,7>: Cost 3 vmrglw <1,2,3,4>, <2,6,3,7> + 1679002141U, // <3,3,4,u>: Cost 2 vsldoi12 LHS, <3,4,u,6> + 3831130657U, // <3,3,5,0>: Cost 4 vsldoi12 LHS, <3,5,0,1> + 3376277671U, // <3,3,5,1>: Cost 4 vmrglw <3,4,3,5>, <3,0,3,1> + 3771617012U, // <3,3,5,2>: Cost 4 vsldoi8 <2,2,3,3>, <5,2,2,3> + 2302536092U, // <3,3,5,3>: Cost 3 vmrglw <3,4,3,5>, <3,3,3,3> + 3831130697U, // <3,3,5,4>: Cost 4 vsldoi12 LHS, <3,5,4,5> + 2294572579U, // <3,3,5,5>: Cost 3 vmrglw <2,1,3,5>, <2,1,3,5> + 2800519773U, // <3,3,5,6>: Cost 3 vsldoi12 LHS, <3,5,6,7> + 3368314810U, // <3,3,5,7>: Cost 4 vmrglw <2,1,3,5>, <2,6,3,7> + 2800519791U, // <3,3,5,u>: Cost 3 vsldoi12 LHS, <3,5,u,7> + 2800077432U, // <3,3,6,0>: Cost 3 vsldoi12 LHS, <3,6,0,7> + 3310291185U, // <3,3,6,1>: Cost 4 vmrghw <3,6,0,7>, <3,1,2,3> + 2789165706U, // <3,3,6,2>: Cost 3 vsldoi12 <6,2,7,3>, <3,6,2,7> + 2764982931U, // <3,3,6,3>: Cost 3 vsldoi12 <2,2,3,3>, <3,6,3,7> + 2800077468U, // <3,3,6,4>: Cost 3 vsldoi12 LHS, <3,6,4,7> + 3873819301U, // <3,3,6,5>: Cost 4 vsldoi12 LHS, <3,6,5,7> + 2297235304U, // <3,3,6,6>: Cost 3 vmrglw <2,5,3,6>, <2,5,3,6> + 2725081963U, // <3,3,6,7>: Cost 3 vsldoi8 <6,7,3,3>, <6,7,3,3> + 2725745596U, // <3,3,6,u>: Cost 3 vsldoi8 <6,u,3,3>, <6,u,3,3> + 2631458918U, // <3,3,7,0>: Cost 3 vsldoi4 <2,3,3,7>, LHS + 3705201460U, // <3,3,7,1>: Cost 4 vsldoi4 <2,3,3,7>, <1,1,1,1> + 2631460551U, // <3,3,7,2>: Cost 3 vsldoi4 <2,3,3,7>, <2,3,3,7> + 2297906802U, // <3,3,7,3>: Cost 3 vmrglw <2,6,3,7>, <2,2,3,3> + 2631462198U, // <3,3,7,4>: Cost 3 vsldoi4 <2,3,3,7>, RHS + 3371648547U, // <3,3,7,5>: Cost 4 vmrglw <2,6,3,7>, <2,1,3,5> + 3371648548U, // <3,3,7,6>: Cost 4 vmrglw <2,6,3,7>, <2,1,3,6> + 1224165306U, // <3,3,7,7>: Cost 2 vmrglw <2,6,3,7>, <2,6,3,7> + 1224165306U, // <3,3,7,u>: Cost 2 vmrglw <2,6,3,7>, <2,6,3,7> + 1215480726U, // <3,3,u,0>: Cost 2 vmrglw <1,2,3,0>, <1,2,3,0> + 1679002398U, // <3,3,u,1>: Cost 2 vsldoi12 LHS, <3,u,1,2> + 1659967368U, // <3,3,u,2>: Cost 2 vsldoi8 , + 403488870U, // <3,3,u,3>: Cost 1 vspltisw3 LHS + 1563659574U, // <3,3,u,4>: Cost 2 vsldoi4 <3,3,3,3>, RHS + 1679002438U, // <3,3,u,5>: Cost 2 vsldoi12 LHS, <3,u,5,6> + 2756946764U, // <3,3,u,6>: Cost 3 vsldoi12 LHS, <3,u,6,3> + 1224165306U, // <3,3,u,7>: Cost 2 vmrglw <2,6,3,7>, <2,6,3,7> + 403488870U, // <3,3,u,u>: Cost 1 vspltisw3 LHS + 2691907584U, // <3,4,0,0>: Cost 3 vsldoi8 <1,2,3,4>, <0,0,0,0> + 1618165862U, // <3,4,0,1>: Cost 2 vsldoi8 <1,2,3,4>, LHS + 2631476937U, // <3,4,0,2>: Cost 3 vsldoi4 <2,3,4,0>, <2,3,4,0> + 2232601732U, // <3,4,0,3>: Cost 3 vmrghw <3,0,1,2>, <4,3,5,0> + 2691907922U, // <3,4,0,4>: Cost 3 vsldoi8 <1,2,3,4>, <0,4,1,5> + 1158860086U, // <3,4,0,5>: Cost 2 vmrghw <3,0,1,2>, RHS + 3306343806U, // <3,4,0,6>: Cost 4 vmrghw <3,0,1,2>, <4,6,5,7> + 3366947484U, // <3,4,0,7>: Cost 4 vmrglw <1,u,3,0>, <3,6,4,7> + 1618166429U, // <3,4,0,u>: Cost 2 vsldoi8 <1,2,3,4>, LHS + 2631483494U, // <3,4,1,0>: Cost 3 vsldoi4 <2,3,4,1>, LHS + 2691908404U, // <3,4,1,1>: Cost 3 vsldoi8 <1,2,3,4>, <1,1,1,1> + 1618166682U, // <3,4,1,2>: Cost 2 vsldoi8 <1,2,3,4>, <1,2,3,4> + 3765650393U, // <3,4,1,3>: Cost 4 vsldoi8 <1,2,3,4>, <1,3,1,4> + 2631486774U, // <3,4,1,4>: Cost 3 vsldoi4 <2,3,4,1>, RHS + 2756946914U, // <3,4,1,5>: Cost 3 vsldoi12 LHS, <4,1,5,0> + 3765650639U, // <3,4,1,6>: Cost 4 vsldoi8 <1,2,3,4>, <1,6,1,7> + 3735090439U, // <3,4,1,7>: Cost 4 vsldoi4 <7,3,4,1>, <7,3,4,1> + 1622148480U, // <3,4,1,u>: Cost 2 vsldoi8 <1,u,3,4>, <1,u,3,4> + 3765650893U, // <3,4,2,0>: Cost 4 vsldoi8 <1,2,3,4>, <2,0,3,0> + 3831131154U, // <3,4,2,1>: Cost 4 vsldoi12 LHS, <4,2,1,3> + 2691909224U, // <3,4,2,2>: Cost 3 vsldoi8 <1,2,3,4>, <2,2,2,2> + 2691909286U, // <3,4,2,3>: Cost 3 vsldoi8 <1,2,3,4>, <2,3,0,1> + 2699208469U, // <3,4,2,4>: Cost 3 vsldoi8 <2,4,3,4>, <2,4,3,4> + 2233863478U, // <3,4,2,5>: Cost 3 vmrghw <3,2,0,3>, RHS + 2691909562U, // <3,4,2,6>: Cost 3 vsldoi8 <1,2,3,4>, <2,6,3,7> + 2701199368U, // <3,4,2,7>: Cost 3 vsldoi8 <2,7,3,4>, <2,7,3,4> + 2691909691U, // <3,4,2,u>: Cost 3 vsldoi8 <1,2,3,4>, <2,u,0,1> + 2691909782U, // <3,4,3,0>: Cost 3 vsldoi8 <1,2,3,4>, <3,0,1,2> + 3765651686U, // <3,4,3,1>: Cost 4 vsldoi8 <1,2,3,4>, <3,1,1,1> + 2691909972U, // <3,4,3,2>: Cost 3 vsldoi8 <1,2,3,4>, <3,2,4,3> + 2691910044U, // <3,4,3,3>: Cost 3 vsldoi8 <1,2,3,4>, <3,3,3,3> + 2691910096U, // <3,4,3,4>: Cost 3 vsldoi8 <1,2,3,4>, <3,4,0,1> + 1161006390U, // <3,4,3,5>: Cost 2 vmrghw <3,3,3,3>, RHS + 2691910300U, // <3,4,3,6>: Cost 3 vsldoi8 <1,2,3,4>, <3,6,4,7> + 3368962716U, // <3,4,3,7>: Cost 4 vmrglw <2,2,3,3>, <3,6,4,7> + 1161006633U, // <3,4,3,u>: Cost 2 vmrghw <3,3,3,3>, RHS + 2631508070U, // <3,4,4,0>: Cost 3 vsldoi4 <2,3,4,4>, LHS + 2631508890U, // <3,4,4,1>: Cost 3 vsldoi4 <2,3,4,4>, <1,2,3,4> + 2631509709U, // <3,4,4,2>: Cost 3 vsldoi4 <2,3,4,4>, <2,3,4,4> + 2289256788U, // <3,4,4,3>: Cost 3 vmrglw <1,2,3,4>, <3,2,4,3> + 1726336208U, // <3,4,4,4>: Cost 2 vsldoi12 LHS, <4,4,4,4> + 1618169142U, // <3,4,4,5>: Cost 2 vsldoi8 <1,2,3,4>, RHS + 3362998858U, // <3,4,4,6>: Cost 4 vmrglw <1,2,3,4>, <3,5,4,6> + 2289257116U, // <3,4,4,7>: Cost 3 vmrglw <1,2,3,4>, <3,6,4,7> + 1618169385U, // <3,4,4,u>: Cost 2 vsldoi8 <1,2,3,4>, RHS + 1557774438U, // <3,4,5,0>: Cost 2 vsldoi4 <2,3,4,5>, LHS + 2631516980U, // <3,4,5,1>: Cost 3 vsldoi4 <2,3,4,5>, <1,1,1,1> + 1557776078U, // <3,4,5,2>: Cost 2 vsldoi4 <2,3,4,5>, <2,3,4,5> + 2631518358U, // <3,4,5,3>: Cost 3 vsldoi4 <2,3,4,5>, <3,0,1,2> + 1557777718U, // <3,4,5,4>: Cost 2 vsldoi4 <2,3,4,5>, RHS + 2296563406U, // <3,4,5,5>: Cost 3 vmrglw <2,4,3,5>, <2,3,4,5> + 604818742U, // <3,4,5,6>: Cost 1 vsldoi12 LHS, RHS + 2661381387U, // <3,4,5,7>: Cost 3 vsldoi4 <7,3,4,5>, <7,3,4,5> + 604818760U, // <3,4,5,u>: Cost 1 vsldoi12 LHS, RHS + 3705266278U, // <3,4,6,0>: Cost 4 vsldoi4 <2,3,4,6>, LHS + 3831131482U, // <3,4,6,1>: Cost 4 vsldoi12 LHS, <4,6,1,7> + 2733715962U, // <3,4,6,2>: Cost 3 vsldoi8 , <6,2,7,3> + 3844771180U, // <3,4,6,3>: Cost 4 vsldoi12 <3,2,4,3>, <4,6,3,7> + 2800078197U, // <3,4,6,4>: Cost 3 vsldoi12 LHS, <4,6,4,7> + 2236550454U, // <3,4,6,5>: Cost 3 vmrghw <3,6,0,7>, RHS + 2733716280U, // <3,4,6,6>: Cost 3 vsldoi8 , <6,6,6,6> + 2725090156U, // <3,4,6,7>: Cost 3 vsldoi8 <6,7,3,4>, <6,7,3,4> + 2236550697U, // <3,4,6,u>: Cost 3 vmrghw <3,6,0,7>, RHS + 2733716474U, // <3,4,7,0>: Cost 3 vsldoi8 , <7,0,1,2> + 3371647013U, // <3,4,7,1>: Cost 4 vmrglw <2,6,3,7>, <0,0,4,1> + 2727744688U, // <3,4,7,2>: Cost 3 vsldoi8 <7,2,3,4>, <7,2,3,4> + 3371649364U, // <3,4,7,3>: Cost 4 vmrglw <2,6,3,7>, <3,2,4,3> + 2733716838U, // <3,4,7,4>: Cost 3 vsldoi8 , <7,4,5,6> + 2297906894U, // <3,4,7,5>: Cost 3 vmrglw <2,6,3,7>, <2,3,4,5> + 3371647180U, // <3,4,7,6>: Cost 4 vmrglw <2,6,3,7>, <0,2,4,6> + 2733717100U, // <3,4,7,7>: Cost 3 vsldoi8 , <7,7,7,7> + 2297906897U, // <3,4,7,u>: Cost 3 vmrglw <2,6,3,7>, <2,3,4,u> + 1557799014U, // <3,4,u,0>: Cost 2 vsldoi4 <2,3,4,u>, LHS + 1618171694U, // <3,4,u,1>: Cost 2 vsldoi8 <1,2,3,4>, LHS + 1557800657U, // <3,4,u,2>: Cost 2 vsldoi4 <2,3,4,u>, <2,3,4,u> + 2691913660U, // <3,4,u,3>: Cost 3 vsldoi8 <1,2,3,4>, + 1557802294U, // <3,4,u,4>: Cost 2 vsldoi4 <2,3,4,u>, RHS + 1618172058U, // <3,4,u,5>: Cost 2 vsldoi8 <1,2,3,4>, RHS + 604818985U, // <3,4,u,6>: Cost 1 vsldoi12 LHS, RHS + 2661405966U, // <3,4,u,7>: Cost 3 vsldoi4 <7,3,4,u>, <7,3,4,u> + 604819003U, // <3,4,u,u>: Cost 1 vsldoi12 LHS, RHS + 2643492966U, // <3,5,0,0>: Cost 3 vsldoi4 <4,3,5,0>, LHS + 2756947528U, // <3,5,0,1>: Cost 3 vsldoi12 LHS, <5,0,1,2> + 2331029019U, // <3,5,0,2>: Cost 3 vmrglw , <4,u,5,2> + 2643495062U, // <3,5,0,3>: Cost 3 vsldoi4 <4,3,5,0>, <3,0,1,2> + 2756947554U, // <3,5,0,4>: Cost 3 vsldoi12 LHS, <5,0,4,1> + 2800078443U, // <3,5,0,5>: Cost 3 vsldoi12 LHS, <5,0,5,1> + 2289224194U, // <3,5,0,6>: Cost 3 vmrglw <1,2,3,0>, <3,4,5,6> + 3362964723U, // <3,5,0,7>: Cost 4 vmrglw <1,2,3,0>, <1,6,5,7> + 2756947590U, // <3,5,0,u>: Cost 3 vsldoi12 LHS, <5,0,u,1> + 2800078479U, // <3,5,1,0>: Cost 3 vsldoi12 LHS, <5,1,0,1> + 2333027218U, // <3,5,1,1>: Cost 3 vmrglw , <4,0,5,1> + 2691916699U, // <3,5,1,2>: Cost 3 vsldoi8 <1,2,3,5>, <1,2,3,5> + 3832901294U, // <3,5,1,3>: Cost 4 vsldoi12 <1,2,5,3>, <5,1,3,5> + 2800078519U, // <3,5,1,4>: Cost 3 vsldoi12 LHS, <5,1,4,5> + 3830689467U, // <3,5,1,5>: Cost 4 vsldoi12 LHS, <5,1,5,0> + 3830689481U, // <3,5,1,6>: Cost 4 vsldoi12 LHS, <5,1,6,5> + 3873820365U, // <3,5,1,7>: Cost 4 vsldoi12 LHS, <5,1,7,0> + 2800078551U, // <3,5,1,u>: Cost 3 vsldoi12 LHS, <5,1,u,1> + 3770967487U, // <3,5,2,0>: Cost 4 vsldoi8 <2,1,3,5>, <2,0,1,4> + 2697225763U, // <3,5,2,1>: Cost 3 vsldoi8 <2,1,3,5>, <2,1,3,5> + 3830689523U, // <3,5,2,2>: Cost 4 vsldoi12 LHS, <5,2,2,2> + 2699216590U, // <3,5,2,3>: Cost 3 vsldoi8 <2,4,3,5>, <2,3,4,5> + 2699216662U, // <3,5,2,4>: Cost 3 vsldoi8 <2,4,3,5>, <2,4,3,5> + 2783047439U, // <3,5,2,5>: Cost 3 vsldoi12 <5,2,5,3>, <5,2,5,3> + 2783121176U, // <3,5,2,6>: Cost 3 vsldoi12 <5,2,6,3>, <5,2,6,3> + 3856936737U, // <3,5,2,7>: Cost 4 vsldoi12 <5,2,7,3>, <5,2,7,3> + 2701871194U, // <3,5,2,u>: Cost 3 vsldoi8 <2,u,3,5>, <2,u,3,5> + 2643517542U, // <3,5,3,0>: Cost 3 vsldoi4 <4,3,5,3>, LHS + 2331052946U, // <3,5,3,1>: Cost 3 vmrglw , <4,0,5,1> + 3699345010U, // <3,5,3,2>: Cost 4 vsldoi4 <1,3,5,3>, <2,2,3,3> + 2705189276U, // <3,5,3,3>: Cost 3 vsldoi8 <3,4,3,5>, <3,3,3,3> + 2705189359U, // <3,5,3,4>: Cost 3 vsldoi8 <3,4,3,5>, <3,4,3,5> + 2331053274U, // <3,5,3,5>: Cost 3 vmrglw , <4,4,5,5> + 2295220738U, // <3,5,3,6>: Cost 3 vmrglw <2,2,3,3>, <3,4,5,6> + 3368961267U, // <3,5,3,7>: Cost 4 vmrglw <2,2,3,3>, <1,6,5,7> + 2295220740U, // <3,5,3,u>: Cost 3 vmrglw <2,2,3,3>, <3,4,5,u> + 2643525734U, // <3,5,4,0>: Cost 3 vsldoi4 <4,3,5,4>, LHS + 2331061138U, // <3,5,4,1>: Cost 3 vmrglw , <4,0,5,1> + 2235584280U, // <3,5,4,2>: Cost 3 vmrghw <3,4,5,6>, <5,2,6,3> + 2643528194U, // <3,5,4,3>: Cost 3 vsldoi4 <4,3,5,4>, <3,4,5,6> + 2735713498U, // <3,5,4,4>: Cost 3 vsldoi8 , <4,4,5,5> + 2756947892U, // <3,5,4,5>: Cost 3 vsldoi12 LHS, <5,4,5,6> + 2289256962U, // <3,5,4,6>: Cost 3 vmrglw <1,2,3,4>, <3,4,5,6> + 3362997491U, // <3,5,4,7>: Cost 4 vmrglw <1,2,3,4>, <1,6,5,7> + 2756947919U, // <3,5,4,u>: Cost 3 vsldoi12 LHS, <5,4,u,6> + 2800078803U, // <3,5,5,0>: Cost 3 vsldoi12 LHS, <5,5,0,1> + 2800078812U, // <3,5,5,1>: Cost 3 vsldoi12 LHS, <5,5,1,1> + 2631591639U, // <3,5,5,2>: Cost 3 vsldoi4 <2,3,5,5>, <2,3,5,5> + 3832901616U, // <3,5,5,3>: Cost 4 vsldoi12 <1,2,5,3>, <5,5,3,3> + 2800078843U, // <3,5,5,4>: Cost 3 vsldoi12 LHS, <5,5,4,5> + 1726337028U, // <3,5,5,5>: Cost 2 vsldoi12 LHS, <5,5,5,5> + 2800078862U, // <3,5,5,6>: Cost 3 vsldoi12 LHS, <5,5,6,6> + 3368314099U, // <3,5,5,7>: Cost 4 vmrglw <2,1,3,5>, <1,6,5,7> + 1726337028U, // <3,5,5,u>: Cost 2 vsldoi12 LHS, <5,5,5,5> + 2800078884U, // <3,5,6,0>: Cost 3 vsldoi12 LHS, <5,6,0,1> + 2800078899U, // <3,5,6,1>: Cost 3 vsldoi12 LHS, <5,6,1,7> + 2631599832U, // <3,5,6,2>: Cost 3 vsldoi4 <2,3,5,6>, <2,3,5,6> + 2800078914U, // <3,5,6,3>: Cost 3 vsldoi12 LHS, <5,6,3,4> + 2800078924U, // <3,5,6,4>: Cost 3 vsldoi12 LHS, <5,6,4,5> + 2800078935U, // <3,5,6,5>: Cost 3 vsldoi12 LHS, <5,6,5,7> + 2297235970U, // <3,5,6,6>: Cost 3 vmrglw <2,5,3,6>, <3,4,5,6> + 1726337122U, // <3,5,6,7>: Cost 2 vsldoi12 LHS, <5,6,7,0> + 1726337131U, // <3,5,6,u>: Cost 2 vsldoi12 LHS, <5,6,u,0> + 3699376230U, // <3,5,7,0>: Cost 4 vsldoi4 <1,3,5,7>, LHS + 2333739922U, // <3,5,7,1>: Cost 3 vmrglw , <4,0,5,1> + 3699378106U, // <3,5,7,2>: Cost 4 vsldoi4 <1,3,5,7>, <2,6,3,7> + 3371647915U, // <3,5,7,3>: Cost 4 vmrglw <2,6,3,7>, <1,2,5,3> + 3699379510U, // <3,5,7,4>: Cost 4 vsldoi4 <1,3,5,7>, RHS + 2333740250U, // <3,5,7,5>: Cost 3 vmrglw , <4,4,5,5> + 2297907714U, // <3,5,7,6>: Cost 3 vmrglw <2,6,3,7>, <3,4,5,6> + 3370984691U, // <3,5,7,7>: Cost 4 vmrglw <2,5,3,7>, <1,6,5,7> + 2297907716U, // <3,5,7,u>: Cost 3 vmrglw <2,6,3,7>, <3,4,5,u> + 2800079046U, // <3,5,u,0>: Cost 3 vsldoi12 LHS, <5,u,0,1> + 2756948176U, // <3,5,u,1>: Cost 3 vsldoi12 LHS, <5,u,1,2> + 2331029019U, // <3,5,u,2>: Cost 3 vmrglw , <4,u,5,2> + 2800079076U, // <3,5,u,3>: Cost 3 vsldoi12 LHS, <5,u,3,4> + 2800079085U, // <3,5,u,4>: Cost 3 vsldoi12 LHS, <5,u,4,4> + 1726337028U, // <3,5,u,5>: Cost 2 vsldoi12 LHS, <5,5,5,5> + 2289289730U, // <3,5,u,6>: Cost 3 vmrglw <1,2,3,u>, <3,4,5,6> + 1726337284U, // <3,5,u,7>: Cost 2 vsldoi12 LHS, <5,u,7,0> + 1726337293U, // <3,5,u,u>: Cost 2 vsldoi12 LHS, <5,u,u,0> + 3773628416U, // <3,6,0,0>: Cost 4 vsldoi8 <2,5,3,6>, <0,0,0,0> + 2699886694U, // <3,6,0,1>: Cost 3 vsldoi8 <2,5,3,6>, LHS + 2789167401U, // <3,6,0,2>: Cost 3 vsldoi12 <6,2,7,3>, <6,0,2,1> + 3362965862U, // <3,6,0,3>: Cost 4 vmrglw <1,2,3,0>, <3,2,6,3> + 3773628754U, // <3,6,0,4>: Cost 4 vsldoi8 <2,5,3,6>, <0,4,1,5> + 3723284326U, // <3,6,0,5>: Cost 4 vsldoi4 <5,3,6,0>, <5,3,6,0> + 2800079181U, // <3,6,0,6>: Cost 3 vsldoi12 LHS, <6,0,6,1> + 1215483190U, // <3,6,0,7>: Cost 2 vmrglw <1,2,3,0>, RHS + 1215483191U, // <3,6,0,u>: Cost 2 vmrglw <1,2,3,0>, RHS + 3873821032U, // <3,6,1,0>: Cost 4 vsldoi12 LHS, <6,1,0,1> + 3773629236U, // <3,6,1,1>: Cost 4 vsldoi8 <2,5,3,6>, <1,1,1,1> + 2691924892U, // <3,6,1,2>: Cost 3 vsldoi8 <1,2,3,6>, <1,2,3,6> + 3830690184U, // <3,6,1,3>: Cost 5 vsldoi12 LHS, <6,1,3,6> + 3873821072U, // <3,6,1,4>: Cost 4 vsldoi12 LHS, <6,1,4,5> + 3873821082U, // <3,6,1,5>: Cost 4 vsldoi12 LHS, <6,1,5,6> + 3403453240U, // <3,6,1,6>: Cost 4 vmrglw , <6,6,6,6> + 2289233206U, // <3,6,1,7>: Cost 3 vmrglw <1,2,3,1>, RHS + 2289233207U, // <3,6,1,u>: Cost 3 vmrglw <1,2,3,1>, RHS + 2661498982U, // <3,6,2,0>: Cost 3 vsldoi4 <7,3,6,2>, LHS + 3770975780U, // <3,6,2,1>: Cost 4 vsldoi8 <2,1,3,6>, <2,1,3,6> + 2631640797U, // <3,6,2,2>: Cost 3 vsldoi4 <2,3,6,2>, <2,3,6,2> + 3771639485U, // <3,6,2,3>: Cost 4 vsldoi8 <2,2,3,6>, <2,3,2,6> + 2661502262U, // <3,6,2,4>: Cost 3 vsldoi4 <7,3,6,2>, RHS + 2699888488U, // <3,6,2,5>: Cost 3 vsldoi8 <2,5,3,6>, <2,5,3,6> + 2661503482U, // <3,6,2,6>: Cost 3 vsldoi4 <7,3,6,2>, <6,2,7,3> + 1715425786U, // <3,6,2,7>: Cost 2 vsldoi12 <6,2,7,3>, <6,2,7,3> + 1715499523U, // <3,6,2,u>: Cost 2 vsldoi12 <6,2,u,3>, <6,2,u,3> + 3773630614U, // <3,6,3,0>: Cost 4 vsldoi8 <2,5,3,6>, <3,0,1,2> + 3372942825U, // <3,6,3,1>: Cost 4 vmrglw <2,u,3,3>, <2,0,6,1> + 2234749434U, // <3,6,3,2>: Cost 3 vmrghw <3,3,3,3>, <6,2,7,3> + 3368962406U, // <3,6,3,3>: Cost 4 vmrglw <2,2,3,3>, <3,2,6,3> + 2699889154U, // <3,6,3,4>: Cost 3 vsldoi8 <2,5,3,6>, <3,4,5,6> + 3773631068U, // <3,6,3,5>: Cost 4 vsldoi8 <2,5,3,6>, <3,5,6,6> + 2331054904U, // <3,6,3,6>: Cost 3 vmrglw , <6,6,6,6> + 1221479734U, // <3,6,3,7>: Cost 2 vmrglw <2,2,3,3>, RHS + 1221479735U, // <3,6,3,u>: Cost 2 vmrglw <2,2,3,3>, RHS + 2235584801U, // <3,6,4,0>: Cost 3 vmrghw <3,4,5,6>, <6,0,1,2> + 3717342106U, // <3,6,4,1>: Cost 4 vsldoi4 <4,3,6,4>, <1,2,3,4> + 2789167729U, // <3,6,4,2>: Cost 3 vsldoi12 <6,2,7,3>, <6,4,2,5> + 2235585074U, // <3,6,4,3>: Cost 3 vmrghw <3,4,5,6>, <6,3,4,5> + 2235585165U, // <3,6,4,4>: Cost 3 vmrghw <3,4,5,6>, <6,4,5,6> + 2699889974U, // <3,6,4,5>: Cost 3 vsldoi8 <2,5,3,6>, RHS + 2800079509U, // <3,6,4,6>: Cost 3 vsldoi12 LHS, <6,4,6,5> + 1215515958U, // <3,6,4,7>: Cost 2 vmrglw <1,2,3,4>, RHS + 1215515959U, // <3,6,4,u>: Cost 2 vmrglw <1,2,3,4>, RHS + 3873821356U, // <3,6,5,0>: Cost 4 vsldoi12 LHS, <6,5,0,1> + 3372959209U, // <3,6,5,1>: Cost 5 vmrglw <2,u,3,5>, <2,0,6,1> + 3862909629U, // <3,6,5,2>: Cost 4 vsldoi12 <6,2,7,3>, <6,5,2,0> + 3773632358U, // <3,6,5,3>: Cost 4 vsldoi8 <2,5,3,6>, <5,3,6,0> + 3873821396U, // <3,6,5,4>: Cost 4 vsldoi12 LHS, <6,5,4,5> + 3873821405U, // <3,6,5,5>: Cost 4 vsldoi12 LHS, <6,5,5,5> + 3862909672U, // <3,6,5,6>: Cost 4 vsldoi12 <6,2,7,3>, <6,5,6,7> + 2294574390U, // <3,6,5,7>: Cost 3 vmrglw <2,1,3,5>, RHS + 2294574391U, // <3,6,5,u>: Cost 3 vmrglw <2,1,3,5>, RHS + 2800079613U, // <3,6,6,0>: Cost 3 vsldoi12 LHS, <6,6,0,1> + 3873821446U, // <3,6,6,1>: Cost 4 vsldoi12 LHS, <6,6,1,1> + 2789167888U, // <3,6,6,2>: Cost 3 vsldoi12 <6,2,7,3>, <6,6,2,2> + 3844920090U, // <3,6,6,3>: Cost 4 vsldoi12 <3,2,6,3>, <6,6,3,3> + 2800079653U, // <3,6,6,4>: Cost 3 vsldoi12 LHS, <6,6,4,5> + 3723333484U, // <3,6,6,5>: Cost 4 vsldoi4 <5,3,6,6>, <5,3,6,6> + 1726337848U, // <3,6,6,6>: Cost 2 vsldoi12 LHS, <6,6,6,6> + 1726337858U, // <3,6,6,7>: Cost 2 vsldoi12 LHS, <6,6,7,7> + 1726337867U, // <3,6,6,u>: Cost 2 vsldoi12 LHS, <6,6,u,7> + 1726337870U, // <3,6,7,0>: Cost 2 vsldoi12 LHS, <6,7,0,1> + 2297906665U, // <3,6,7,1>: Cost 3 vmrglw <2,6,3,7>, <2,0,6,1> + 2792117090U, // <3,6,7,2>: Cost 3 vsldoi12 <6,7,2,3>, <6,7,2,3> + 2297907558U, // <3,6,7,3>: Cost 3 vmrglw <2,6,3,7>, <3,2,6,3> + 1726337910U, // <3,6,7,4>: Cost 2 vsldoi12 LHS, <6,7,4,5> + 2297906993U, // <3,6,7,5>: Cost 3 vmrglw <2,6,3,7>, <2,4,6,5> + 2297906832U, // <3,6,7,6>: Cost 3 vmrglw <2,6,3,7>, <2,2,6,6> + 1224166710U, // <3,6,7,7>: Cost 2 vmrglw <2,6,3,7>, RHS + 1224166711U, // <3,6,7,u>: Cost 2 vmrglw <2,6,3,7>, RHS + 1726337951U, // <3,6,u,0>: Cost 2 vsldoi12 LHS, <6,u,0,1> + 2699892526U, // <3,6,u,1>: Cost 3 vsldoi8 <2,5,3,6>, LHS + 2789168049U, // <3,6,u,2>: Cost 3 vsldoi12 <6,2,7,3>, <6,u,2,1> + 2792854460U, // <3,6,u,3>: Cost 3 vsldoi12 <6,u,3,3>, <6,u,3,3> + 1726337991U, // <3,6,u,4>: Cost 2 vsldoi12 LHS, <6,u,4,5> + 2699892890U, // <3,6,u,5>: Cost 3 vsldoi8 <2,5,3,6>, RHS + 1726337848U, // <3,6,u,6>: Cost 2 vsldoi12 LHS, <6,6,6,6> + 1215548726U, // <3,6,u,7>: Cost 2 vmrglw <1,2,3,u>, RHS + 1215548727U, // <3,6,u,u>: Cost 2 vmrglw <1,2,3,u>, RHS + 2700558336U, // <3,7,0,0>: Cost 3 vsldoi8 <2,6,3,7>, <0,0,0,0> + 1626816614U, // <3,7,0,1>: Cost 2 vsldoi8 <2,6,3,7>, LHS + 2700558513U, // <3,7,0,2>: Cost 3 vsldoi8 <2,6,3,7>, <0,2,1,6> + 2331030010U, // <3,7,0,3>: Cost 3 vmrglw , <6,2,7,3> + 2700558674U, // <3,7,0,4>: Cost 3 vsldoi8 <2,6,3,7>, <0,4,1,5> + 2800079906U, // <3,7,0,5>: Cost 3 vsldoi12 LHS, <7,0,5,6> + 2655588936U, // <3,7,0,6>: Cost 3 vsldoi4 <6,3,7,0>, <6,3,7,0> + 2800079919U, // <3,7,0,7>: Cost 3 vsldoi12 LHS, <7,0,7,1> + 1626817181U, // <3,7,0,u>: Cost 2 vsldoi8 <2,6,3,7>, LHS + 3774300899U, // <3,7,1,0>: Cost 4 vsldoi8 <2,6,3,7>, <1,0,1,1> + 2700559156U, // <3,7,1,1>: Cost 3 vsldoi8 <2,6,3,7>, <1,1,1,1> + 2700559254U, // <3,7,1,2>: Cost 3 vsldoi8 <2,6,3,7>, <1,2,3,0> + 3774301148U, // <3,7,1,3>: Cost 4 vsldoi8 <2,6,3,7>, <1,3,1,7> + 3774301227U, // <3,7,1,4>: Cost 4 vsldoi8 <2,6,3,7>, <1,4,1,5> + 3774301295U, // <3,7,1,5>: Cost 4 vsldoi8 <2,6,3,7>, <1,5,0,1> + 3768329441U, // <3,7,1,6>: Cost 4 vsldoi8 <1,6,3,7>, <1,6,3,7> + 3403453250U, // <3,7,1,7>: Cost 4 vmrglw , <6,6,7,7> + 2700559740U, // <3,7,1,u>: Cost 3 vsldoi8 <2,6,3,7>, <1,u,3,0> + 2700559849U, // <3,7,2,0>: Cost 3 vsldoi8 <2,6,3,7>, <2,0,6,1> + 3770983973U, // <3,7,2,1>: Cost 4 vsldoi8 <2,1,3,7>, <2,1,3,7> + 2700559976U, // <3,7,2,2>: Cost 3 vsldoi8 <2,6,3,7>, <2,2,2,2> + 2698569415U, // <3,7,2,3>: Cost 3 vsldoi8 <2,3,3,7>, <2,3,3,7> + 2700560177U, // <3,7,2,4>: Cost 3 vsldoi8 <2,6,3,7>, <2,4,6,5> + 3773638505U, // <3,7,2,5>: Cost 4 vsldoi8 <2,5,3,7>, <2,5,3,7> + 1626818490U, // <3,7,2,6>: Cost 2 vsldoi8 <2,6,3,7>, <2,6,3,7> + 2795140307U, // <3,7,2,7>: Cost 3 vsldoi12 <7,2,7,3>, <7,2,7,3> + 1628145756U, // <3,7,2,u>: Cost 2 vsldoi8 <2,u,3,7>, <2,u,3,7> + 2700560534U, // <3,7,3,0>: Cost 3 vsldoi8 <2,6,3,7>, <3,0,1,2> + 3774302438U, // <3,7,3,1>: Cost 4 vsldoi8 <2,6,3,7>, <3,1,1,1> + 2700560742U, // <3,7,3,2>: Cost 3 vsldoi8 <2,6,3,7>, <3,2,6,3> + 2700560796U, // <3,7,3,3>: Cost 3 vsldoi8 <2,6,3,7>, <3,3,3,3> + 2700560898U, // <3,7,3,4>: Cost 3 vsldoi8 <2,6,3,7>, <3,4,5,6> + 3774302821U, // <3,7,3,5>: Cost 4 vsldoi8 <2,6,3,7>, <3,5,7,6> + 2700561079U, // <3,7,3,6>: Cost 3 vsldoi8 <2,6,3,7>, <3,6,7,7> + 2700561091U, // <3,7,3,7>: Cost 3 vsldoi8 <2,6,3,7>, <3,7,0,1> + 2700561182U, // <3,7,3,u>: Cost 3 vsldoi8 <2,6,3,7>, <3,u,1,2> + 2655617126U, // <3,7,4,0>: Cost 3 vsldoi4 <6,3,7,4>, LHS + 3774303178U, // <3,7,4,1>: Cost 4 vsldoi8 <2,6,3,7>, <4,1,2,3> + 2655619002U, // <3,7,4,2>: Cost 3 vsldoi4 <6,3,7,4>, <2,6,3,7> + 2331062778U, // <3,7,4,3>: Cost 3 vmrglw , <6,2,7,3> + 2655620406U, // <3,7,4,4>: Cost 3 vsldoi4 <6,3,7,4>, RHS + 1626819894U, // <3,7,4,5>: Cost 2 vsldoi8 <2,6,3,7>, RHS + 2655621708U, // <3,7,4,6>: Cost 3 vsldoi4 <6,3,7,4>, <6,3,7,4> + 2800080247U, // <3,7,4,7>: Cost 3 vsldoi12 LHS, <7,4,7,5> + 1626820137U, // <3,7,4,u>: Cost 2 vsldoi8 <2,6,3,7>, RHS + 3774303816U, // <3,7,5,0>: Cost 4 vsldoi8 <2,6,3,7>, <5,0,1,2> + 3873822093U, // <3,7,5,1>: Cost 4 vsldoi12 LHS, <7,5,1,0> + 3774303998U, // <3,7,5,2>: Cost 4 vsldoi8 <2,6,3,7>, <5,2,3,4> + 3862910368U, // <3,7,5,3>: Cost 4 vsldoi12 <6,2,7,3>, <7,5,3,1> + 3774304180U, // <3,7,5,4>: Cost 4 vsldoi8 <2,6,3,7>, <5,4,5,6> + 2800080310U, // <3,7,5,5>: Cost 3 vsldoi12 LHS, <7,5,5,5> + 2800080321U, // <3,7,5,6>: Cost 3 vsldoi12 LHS, <7,5,6,7> + 3873822147U, // <3,7,5,7>: Cost 4 vsldoi12 LHS, <7,5,7,0> + 2800080339U, // <3,7,5,u>: Cost 3 vsldoi12 LHS, <7,5,u,7> + 2800080348U, // <3,7,6,0>: Cost 3 vsldoi12 LHS, <7,6,0,7> + 3873822181U, // <3,7,6,1>: Cost 4 vsldoi12 LHS, <7,6,1,7> + 2789168622U, // <3,7,6,2>: Cost 3 vsldoi12 <6,2,7,3>, <7,6,2,7> + 2700563016U, // <3,7,6,3>: Cost 3 vsldoi8 <2,6,3,7>, <6,3,7,0> + 2800080384U, // <3,7,6,4>: Cost 3 vsldoi12 LHS, <7,6,4,7> + 3862910472U, // <3,7,6,5>: Cost 4 vsldoi12 <6,2,7,3>, <7,6,5,6> + 2700563256U, // <3,7,6,6>: Cost 3 vsldoi8 <2,6,3,7>, <6,6,6,6> + 2800080404U, // <3,7,6,7>: Cost 3 vsldoi12 LHS, <7,6,7,0> + 2793149988U, // <3,7,6,u>: Cost 3 vsldoi12 <6,u,7,3>, <7,6,u,7> + 2637725798U, // <3,7,7,0>: Cost 3 vsldoi4 <3,3,7,7>, LHS + 3371649227U, // <3,7,7,1>: Cost 4 vmrglw <2,6,3,7>, <3,0,7,1> + 2637727674U, // <3,7,7,2>: Cost 3 vsldoi4 <3,3,7,7>, <2,6,3,7> + 2297907567U, // <3,7,7,3>: Cost 3 vmrglw <2,6,3,7>, <3,2,7,3> + 2637729078U, // <3,7,7,4>: Cost 3 vsldoi4 <3,3,7,7>, RHS + 3371649312U, // <3,7,7,5>: Cost 4 vmrglw <2,6,3,7>, <3,1,7,5> + 2655646287U, // <3,7,7,6>: Cost 3 vsldoi4 <6,3,7,7>, <6,3,7,7> + 1726338668U, // <3,7,7,7>: Cost 2 vsldoi12 LHS, <7,7,7,7> + 1726338668U, // <3,7,7,u>: Cost 2 vsldoi12 LHS, <7,7,7,7> + 2700564179U, // <3,7,u,0>: Cost 3 vsldoi8 <2,6,3,7>, + 1626822446U, // <3,7,u,1>: Cost 2 vsldoi8 <2,6,3,7>, LHS + 2700564357U, // <3,7,u,2>: Cost 3 vsldoi8 <2,6,3,7>, + 2700564412U, // <3,7,u,3>: Cost 3 vsldoi8 <2,6,3,7>, + 2700564543U, // <3,7,u,4>: Cost 3 vsldoi8 <2,6,3,7>, + 1626822810U, // <3,7,u,5>: Cost 2 vsldoi8 <2,6,3,7>, RHS + 1662654672U, // <3,7,u,6>: Cost 2 vsldoi8 , + 1726338668U, // <3,7,u,7>: Cost 2 vsldoi12 LHS, <7,7,7,7> + 1626823013U, // <3,7,u,u>: Cost 2 vsldoi8 <2,6,3,7>, LHS + 1678557184U, // <3,u,0,0>: Cost 2 vsldoi12 LHS, <0,0,0,0> + 1679005395U, // <3,u,0,1>: Cost 2 vsldoi12 LHS, + 2289221787U, // <3,u,0,2>: Cost 3 vmrglw <1,2,3,0>, <0,1,u,2> + 1215479964U, // <3,u,0,3>: Cost 2 vmrglw <1,2,3,0>, LHS + 2752747245U, // <3,u,0,4>: Cost 3 vsldoi12 LHS, + 1158863002U, // <3,u,0,5>: Cost 2 vmrghw <3,0,1,2>, RHS + 2289224221U, // <3,u,0,6>: Cost 3 vmrglw <1,2,3,0>, <3,4,u,6> + 1215483208U, // <3,u,0,7>: Cost 2 vmrglw <1,2,3,0>, RHS + 1679005458U, // <3,u,0,u>: Cost 2 vsldoi12 LHS, + 1558036582U, // <3,u,1,0>: Cost 2 vsldoi4 <2,3,u,1>, LHS + 1678558004U, // <3,u,1,1>: Cost 2 vsldoi12 LHS, <1,1,1,1> + 604821294U, // <3,u,1,2>: Cost 1 vsldoi12 LHS, LHS + 2752747317U, // <3,u,1,3>: Cost 3 vsldoi12 LHS, + 1558039862U, // <3,u,1,4>: Cost 2 vsldoi4 <2,3,u,1>, RHS + 2756949830U, // <3,u,1,5>: Cost 3 vsldoi12 LHS, + 2800080726U, // <3,u,1,6>: Cost 3 vsldoi12 LHS, + 2289233224U, // <3,u,1,7>: Cost 3 vmrglw <1,2,3,1>, RHS + 604821348U, // <3,u,1,u>: Cost 1 vsldoi12 LHS, LHS + 2696586709U, // <3,u,2,0>: Cost 3 vsldoi8 <2,0,3,u>, <2,0,3,u> + 2757392246U, // <3,u,2,1>: Cost 3 vsldoi12 LHS, + 1624172151U, // <3,u,2,2>: Cost 2 vsldoi8 <2,2,3,u>, <2,2,3,u> + 1679005576U, // <3,u,2,3>: Cost 2 vsldoi12 LHS, + 2631789878U, // <3,u,2,4>: Cost 3 vsldoi4 <2,3,u,2>, RHS + 2699904874U, // <3,u,2,5>: Cost 3 vsldoi8 <2,5,3,u>, <2,5,3,u> + 1626826683U, // <3,u,2,6>: Cost 2 vsldoi8 <2,6,3,u>, <2,6,3,u> + 1726338988U, // <3,u,2,7>: Cost 2 vsldoi12 LHS, + 1683208117U, // <3,u,2,u>: Cost 2 vsldoi12 LHS, + 1679005628U, // <3,u,3,0>: Cost 2 vsldoi12 LHS, + 1161008942U, // <3,u,3,1>: Cost 2 vmrghw <3,3,3,3>, LHS + 2752747471U, // <3,u,3,2>: Cost 3 vsldoi12 LHS, + 403488870U, // <3,u,3,3>: Cost 1 vspltisw3 LHS + 1679005668U, // <3,u,3,4>: Cost 2 vsldoi12 LHS, + 1161009306U, // <3,u,3,5>: Cost 2 vmrghw <3,3,3,3>, RHS + 2691943104U, // <3,u,3,6>: Cost 3 vsldoi8 <1,2,3,u>, <3,6,u,7> + 1221479752U, // <3,u,3,7>: Cost 2 vmrglw <2,2,3,3>, RHS + 403488870U, // <3,u,3,u>: Cost 1 vspltisw3 LHS + 2289255363U, // <3,u,4,0>: Cost 3 vmrglw <1,2,3,4>, <1,2,u,0> + 1161844526U, // <3,u,4,1>: Cost 2 vmrghw <3,4,5,6>, LHS + 2289256661U, // <3,u,4,2>: Cost 3 vmrglw <1,2,3,4>, <3,0,u,2> + 1215512732U, // <3,u,4,3>: Cost 2 vmrglw <1,2,3,4>, LHS + 1215513498U, // <3,u,4,4>: Cost 2 vmrglw <1,2,3,4>, <1,2,3,4> + 1679005759U, // <3,u,4,5>: Cost 2 vsldoi12 LHS, + 2289256989U, // <3,u,4,6>: Cost 3 vmrglw <1,2,3,4>, <3,4,u,6> + 1215515976U, // <3,u,4,7>: Cost 2 vmrglw <1,2,3,4>, RHS + 1679005786U, // <3,u,4,u>: Cost 2 vsldoi12 LHS, + 1558069350U, // <3,u,5,0>: Cost 2 vsldoi4 <2,3,u,5>, LHS + 2631811892U, // <3,u,5,1>: Cost 3 vsldoi4 <2,3,u,5>, <1,1,1,1> + 1558071026U, // <3,u,5,2>: Cost 2 vsldoi4 <2,3,u,5>, <2,3,u,5> + 2752747646U, // <3,u,5,3>: Cost 3 vsldoi12 LHS, + 1558072630U, // <3,u,5,4>: Cost 2 vsldoi4 <2,3,u,5>, RHS + 1726337028U, // <3,u,5,5>: Cost 2 vsldoi12 LHS, <5,5,5,5> + 604821658U, // <3,u,5,6>: Cost 1 vsldoi12 LHS, RHS + 2294574408U, // <3,u,5,7>: Cost 3 vmrglw <2,1,3,5>, RHS + 604821676U, // <3,u,5,u>: Cost 1 vsldoi12 LHS, RHS + 2631819366U, // <3,u,6,0>: Cost 3 vsldoi4 <2,3,u,6>, LHS + 2757392574U, // <3,u,6,1>: Cost 3 vsldoi12 LHS, + 2631821043U, // <3,u,6,2>: Cost 3 vsldoi4 <2,3,u,6>, <2,3,u,6> + 1679005904U, // <3,u,6,3>: Cost 2 vsldoi12 LHS, + 2631822646U, // <3,u,6,4>: Cost 3 vsldoi4 <2,3,u,6>, RHS + 2236553370U, // <3,u,6,5>: Cost 3 vmrghw <3,6,0,7>, RHS + 1726337848U, // <3,u,6,6>: Cost 2 vsldoi12 LHS, <6,6,6,6> + 1726339309U, // <3,u,6,7>: Cost 2 vsldoi12 LHS, + 1683208445U, // <3,u,6,u>: Cost 2 vsldoi12 LHS, + 1726339328U, // <3,u,7,0>: Cost 2 vsldoi12 LHS, + 2297905225U, // <3,u,7,1>: Cost 3 vmrglw <2,6,3,7>, <0,0,u,1> + 2631829236U, // <3,u,7,2>: Cost 3 vsldoi4 <2,3,u,7>, <2,3,u,7> + 1224163484U, // <3,u,7,3>: Cost 2 vmrglw <2,6,3,7>, LHS + 1726339368U, // <3,u,7,4>: Cost 2 vsldoi12 LHS, + 2297905553U, // <3,u,7,5>: Cost 3 vmrglw <2,6,3,7>, <0,4,u,5> + 2297905392U, // <3,u,7,6>: Cost 3 vmrglw <2,6,3,7>, <0,2,u,6> + 1224166728U, // <3,u,7,7>: Cost 2 vmrglw <2,6,3,7>, RHS + 1224163489U, // <3,u,7,u>: Cost 2 vmrglw <2,6,3,7>, LHS + 1683208529U, // <3,u,u,0>: Cost 2 vsldoi12 LHS, + 1679006043U, // <3,u,u,1>: Cost 2 vsldoi12 LHS, + 604821861U, // <3,u,u,2>: Cost 1 vsldoi12 LHS, LHS + 403488870U, // <3,u,u,3>: Cost 1 vspltisw3 LHS + 1683208569U, // <3,u,u,4>: Cost 2 vsldoi12 LHS, + 1679006083U, // <3,u,u,5>: Cost 2 vsldoi12 LHS, + 604821901U, // <3,u,u,6>: Cost 1 vsldoi12 LHS, RHS + 1215548744U, // <3,u,u,7>: Cost 2 vmrglw <1,2,3,u>, RHS + 604821915U, // <3,u,u,u>: Cost 1 vsldoi12 LHS, LHS + 2759016448U, // <4,0,0,0>: Cost 3 vsldoi12 <1,2,3,4>, <0,0,0,0> + 1165115494U, // <4,0,0,1>: Cost 2 vmrghw <4,0,5,1>, LHS + 3717531337U, // <4,0,0,2>: Cost 4 vsldoi4 <4,4,0,0>, <2,3,4,0> + 3369675785U, // <4,0,0,3>: Cost 4 vmrglw <2,3,4,0>, <4,2,0,3> + 2751791144U, // <4,0,0,4>: Cost 3 vsldoi12 <0,0,4,4>, <0,0,4,4> + 2238857630U, // <4,0,0,5>: Cost 3 vmrghw <4,0,5,1>, <0,5,1,0> + 3312591341U, // <4,0,0,6>: Cost 4 vmrghw <4,0,5,0>, <0,6,0,7> + 3369676113U, // <4,0,0,7>: Cost 4 vmrglw <2,3,4,0>, <4,6,0,7> + 1165116061U, // <4,0,0,u>: Cost 2 vmrghw <4,0,5,1>, LHS + 2637824102U, // <4,0,1,0>: Cost 3 vsldoi4 <3,4,0,1>, LHS + 2637824922U, // <4,0,1,1>: Cost 3 vsldoi4 <3,4,0,1>, <1,2,3,4> + 1685274726U, // <4,0,1,2>: Cost 2 vsldoi12 <1,2,3,4>, LHS + 2637826512U, // <4,0,1,3>: Cost 3 vsldoi4 <3,4,0,1>, <3,4,0,1> + 2637827382U, // <4,0,1,4>: Cost 3 vsldoi4 <3,4,0,1>, RHS + 2661716070U, // <4,0,1,5>: Cost 3 vsldoi4 <7,4,0,1>, <5,6,7,4> + 3729486427U, // <4,0,1,6>: Cost 4 vsldoi4 <6,4,0,1>, <6,4,0,1> + 2661717300U, // <4,0,1,7>: Cost 3 vsldoi4 <7,4,0,1>, <7,4,0,1> + 1685274780U, // <4,0,1,u>: Cost 2 vsldoi12 <1,2,3,4>, LHS + 3711574118U, // <4,0,2,0>: Cost 4 vsldoi4 <3,4,0,2>, LHS + 2240200806U, // <4,0,2,1>: Cost 3 vmrghw <4,2,5,3>, LHS + 3771663992U, // <4,0,2,2>: Cost 4 vsldoi8 <2,2,4,0>, <2,2,4,0> + 2698585801U, // <4,0,2,3>: Cost 3 vsldoi8 <2,3,4,0>, <2,3,4,0> + 3373672105U, // <4,0,2,4>: Cost 4 vmrglw <3,0,4,2>, <2,3,0,4> + 3810813795U, // <4,0,2,5>: Cost 4 vsldoi8 , <2,5,3,1> + 3772327866U, // <4,0,2,6>: Cost 4 vsldoi8 <2,3,4,0>, <2,6,3,7> + 3386280568U, // <4,0,2,7>: Cost 5 vmrglw <5,1,4,2>, <3,6,0,7> + 2701903966U, // <4,0,2,u>: Cost 3 vsldoi8 <2,u,4,0>, <2,u,4,0> + 3699638374U, // <4,0,3,0>: Cost 4 vsldoi4 <1,4,0,3>, LHS + 2753560832U, // <4,0,3,1>: Cost 3 vsldoi12 <0,3,1,4>, <0,3,1,4> + 3772328276U, // <4,0,3,2>: Cost 4 vsldoi8 <2,3,4,0>, <3,2,4,3> + 3827302674U, // <4,0,3,3>: Cost 4 vsldoi12 <0,3,1,4>, <0,3,3,4> + 3699641654U, // <4,0,3,4>: Cost 4 vsldoi4 <1,4,0,3>, RHS + 3779627588U, // <4,0,3,5>: Cost 4 vsldoi8 <3,5,4,0>, <3,5,4,0> + 3772328604U, // <4,0,3,6>: Cost 4 vsldoi8 <2,3,4,0>, <3,6,4,7> + 3780954854U, // <4,0,3,7>: Cost 4 vsldoi8 <3,7,4,0>, <3,7,4,0> + 2753560832U, // <4,0,3,u>: Cost 3 vsldoi12 <0,3,1,4>, <0,3,1,4> + 2725129106U, // <4,0,4,0>: Cost 3 vsldoi8 <6,7,4,0>, <4,0,5,1> + 1167720550U, // <4,0,4,1>: Cost 2 vmrghw <4,4,4,4>, LHS + 3839172953U, // <4,0,4,2>: Cost 4 vsldoi12 <2,3,0,4>, <0,4,2,3> + 3772329051U, // <4,0,4,3>: Cost 4 vsldoi8 <2,3,4,0>, <4,3,0,4> + 2241462610U, // <4,0,4,4>: Cost 3 vmrghw <4,4,4,4>, <0,4,1,5> + 2698587446U, // <4,0,4,5>: Cost 3 vsldoi8 <2,3,4,0>, RHS + 3772329297U, // <4,0,4,6>: Cost 4 vsldoi8 <2,3,4,0>, <4,6,0,7> + 3735483703U, // <4,0,4,7>: Cost 4 vsldoi4 <7,4,0,4>, <7,4,0,4> + 1167721117U, // <4,0,4,u>: Cost 2 vmrghw <4,4,4,4>, LHS + 1168556032U, // <4,0,5,0>: Cost 2 vmrghw RHS, <0,0,0,0> + 94814310U, // <4,0,5,1>: Cost 1 vmrghw RHS, LHS + 2242298029U, // <4,0,5,2>: Cost 3 vmrghw RHS, <0,2,1,2> + 2637859284U, // <4,0,5,3>: Cost 3 vsldoi4 <3,4,0,5>, <3,4,0,5> + 1168556370U, // <4,0,5,4>: Cost 2 vmrghw RHS, <0,4,1,5> + 2242306530U, // <4,0,5,5>: Cost 3 vmrghw RHS, <0,5,u,5> + 2242298358U, // <4,0,5,6>: Cost 3 vmrghw RHS, <0,6,1,7> + 2661750072U, // <4,0,5,7>: Cost 3 vsldoi4 <7,4,0,5>, <7,4,0,5> + 94814877U, // <4,0,5,u>: Cost 1 vmrghw RHS, LHS + 3316580362U, // <4,0,6,0>: Cost 4 vmrghw <4,6,5,1>, <0,0,1,1> + 2242846822U, // <4,0,6,1>: Cost 3 vmrghw <4,6,5,2>, LHS + 3798872570U, // <4,0,6,2>: Cost 4 vsldoi8 <6,7,4,0>, <6,2,7,3> + 3796218413U, // <4,0,6,3>: Cost 4 vsldoi8 <6,3,4,0>, <6,3,4,0> + 3834528273U, // <4,0,6,4>: Cost 4 vsldoi12 <1,5,0,4>, <0,6,4,7> + 3798872811U, // <4,0,6,5>: Cost 4 vsldoi8 <6,7,4,0>, <6,5,7,1> + 3316621876U, // <4,0,6,6>: Cost 4 vmrghw <4,6,5,6>, <0,6,u,6> + 2725131121U, // <4,0,6,7>: Cost 3 vsldoi8 <6,7,4,0>, <6,7,4,0> + 2242847389U, // <4,0,6,u>: Cost 3 vmrghw <4,6,5,2>, LHS + 3377692672U, // <4,0,7,0>: Cost 4 vmrglw <3,6,4,7>, <0,0,0,0> + 2243493990U, // <4,0,7,1>: Cost 3 vmrghw <4,7,5,0>, LHS + 3775648970U, // <4,0,7,2>: Cost 5 vsldoi8 <2,u,4,0>, <7,2,6,3> + 3802191110U, // <4,0,7,3>: Cost 4 vsldoi8 <7,3,4,0>, <7,3,4,0> + 3317236050U, // <4,0,7,4>: Cost 4 vmrghw <4,7,5,0>, <0,4,1,5> + 3803518376U, // <4,0,7,5>: Cost 4 vsldoi8 <7,5,4,0>, <7,5,4,0> + 3317236214U, // <4,0,7,6>: Cost 5 vmrghw <4,7,5,0>, <0,6,1,7> + 3798873708U, // <4,0,7,7>: Cost 4 vsldoi8 <6,7,4,0>, <7,7,7,7> + 2243494557U, // <4,0,7,u>: Cost 3 vmrghw <4,7,5,0>, LHS + 1170546688U, // <4,0,u,0>: Cost 2 vmrghw RHS, <0,0,0,0> + 96804966U, // <4,0,u,1>: Cost 1 vmrghw RHS, LHS + 1685275293U, // <4,0,u,2>: Cost 2 vsldoi12 <1,2,3,4>, LHS + 2637883863U, // <4,0,u,3>: Cost 3 vsldoi4 <3,4,0,u>, <3,4,0,u> + 1170547026U, // <4,0,u,4>: Cost 2 vmrghw RHS, <0,4,1,5> + 2698590362U, // <4,0,u,5>: Cost 3 vsldoi8 <2,3,4,0>, RHS + 2244289014U, // <4,0,u,6>: Cost 3 vmrghw RHS, <0,6,1,7> + 2661774651U, // <4,0,u,7>: Cost 3 vsldoi4 <7,4,0,u>, <7,4,0,u> + 96805533U, // <4,0,u,u>: Cost 1 vmrghw RHS, LHS + 2667749478U, // <4,1,0,0>: Cost 3 vsldoi4 , LHS + 2689966182U, // <4,1,0,1>: Cost 3 vsldoi8 <0,u,4,1>, LHS + 2238571418U, // <4,1,0,2>: Cost 3 vmrghw <4,0,1,2>, <1,2,3,4> + 3711633880U, // <4,1,0,3>: Cost 4 vsldoi4 <3,4,1,0>, <3,4,1,0> + 2689966418U, // <4,1,0,4>: Cost 3 vsldoi8 <0,u,4,1>, <0,4,1,5> + 3361046866U, // <4,1,0,5>: Cost 4 vmrglw <0,u,4,0>, <0,4,1,5> + 3741495802U, // <4,1,0,6>: Cost 4 vsldoi4 , <6,2,7,3> + 3741496314U, // <4,1,0,7>: Cost 4 vsldoi4 , <7,0,1,2> + 2689966765U, // <4,1,0,u>: Cost 3 vsldoi8 <0,u,4,1>, <0,u,4,1> + 3764372222U, // <4,1,1,0>: Cost 4 vsldoi8 <1,0,4,1>, <1,0,4,1> + 2758206263U, // <4,1,1,1>: Cost 3 vsldoi12 <1,1,1,4>, <1,1,1,4> + 2698593178U, // <4,1,1,2>: Cost 3 vsldoi8 <2,3,4,1>, <1,2,3,4> + 3361057810U, // <4,1,1,3>: Cost 4 vmrglw <0,u,4,1>, <4,2,1,3> + 3827303250U, // <4,1,1,4>: Cost 4 vsldoi12 <0,3,1,4>, <1,1,4,4> + 2287313234U, // <4,1,1,5>: Cost 3 vmrglw <0,u,4,1>, <0,4,1,5> + 3763709171U, // <4,1,1,6>: Cost 4 vsldoi8 <0,u,4,1>, <1,6,5,7> + 3361058138U, // <4,1,1,7>: Cost 4 vmrglw <0,u,4,1>, <4,6,1,7> + 2239759744U, // <4,1,1,u>: Cost 3 vmrghw <4,1,u,3>, <1,u,3,4> + 2637906022U, // <4,1,2,0>: Cost 3 vsldoi4 <3,4,1,2>, LHS + 2637906842U, // <4,1,2,1>: Cost 3 vsldoi4 <3,4,1,2>, <1,2,3,4> + 3763709544U, // <4,1,2,2>: Cost 4 vsldoi8 <0,u,4,1>, <2,2,2,2> + 1685275546U, // <4,1,2,3>: Cost 2 vsldoi12 <1,2,3,4>, <1,2,3,4> + 2637909302U, // <4,1,2,4>: Cost 3 vsldoi4 <3,4,1,2>, RHS + 3361063250U, // <4,1,2,5>: Cost 4 vmrglw <0,u,4,2>, <0,4,1,5> + 3763709882U, // <4,1,2,6>: Cost 4 vsldoi8 <0,u,4,1>, <2,6,3,7> + 3735541054U, // <4,1,2,7>: Cost 4 vsldoi4 <7,4,1,2>, <7,4,1,2> + 1685644231U, // <4,1,2,u>: Cost 2 vsldoi12 <1,2,u,4>, <1,2,u,4> + 2702575792U, // <4,1,3,0>: Cost 3 vsldoi8 <3,0,4,1>, <3,0,4,1> + 3832759257U, // <4,1,3,1>: Cost 4 vsldoi12 <1,2,3,4>, <1,3,1,4> + 3833349090U, // <4,1,3,2>: Cost 4 vsldoi12 <1,3,2,4>, <1,3,2,4> + 3763710364U, // <4,1,3,3>: Cost 4 vsldoi8 <0,u,4,1>, <3,3,3,3> + 2707884546U, // <4,1,3,4>: Cost 3 vsldoi8 <3,u,4,1>, <3,4,5,6> + 3361071442U, // <4,1,3,5>: Cost 4 vmrglw <0,u,4,3>, <0,4,1,5> + 3772336796U, // <4,1,3,6>: Cost 4 vsldoi8 <2,3,4,1>, <3,6,4,7> + 3775654595U, // <4,1,3,7>: Cost 5 vsldoi8 <2,u,4,1>, <3,7,0,1> + 2707884856U, // <4,1,3,u>: Cost 3 vsldoi8 <3,u,4,1>, <3,u,4,1> + 2667782246U, // <4,1,4,0>: Cost 3 vsldoi4 , LHS + 2241463092U, // <4,1,4,1>: Cost 3 vmrghw <4,4,4,4>, <1,1,1,1> + 2241553306U, // <4,1,4,2>: Cost 3 vmrghw <4,4,5,6>, <1,2,3,4> + 3827303484U, // <4,1,4,3>: Cost 4 vsldoi12 <0,3,1,4>, <1,4,3,4> + 2667785424U, // <4,1,4,4>: Cost 3 vsldoi4 , <4,4,4,4> + 2689969462U, // <4,1,4,5>: Cost 3 vsldoi8 <0,u,4,1>, RHS + 3763711322U, // <4,1,4,6>: Cost 4 vsldoi8 <0,u,4,1>, <4,6,1,7> + 3867116636U, // <4,1,4,7>: Cost 4 vsldoi12 <7,0,1,4>, <1,4,7,0> + 2689969705U, // <4,1,4,u>: Cost 3 vsldoi8 <0,u,4,1>, RHS + 1546273106U, // <4,1,5,0>: Cost 2 vsldoi4 <0,4,1,5>, <0,4,1,5> + 1168556852U, // <4,1,5,1>: Cost 2 vmrghw RHS, <1,1,1,1> + 1168556950U, // <4,1,5,2>: Cost 2 vmrghw RHS, <1,2,3,0> + 2620016790U, // <4,1,5,3>: Cost 3 vsldoi4 <0,4,1,5>, <3,0,1,2> + 1546276150U, // <4,1,5,4>: Cost 2 vsldoi4 <0,4,1,5>, RHS + 2620018692U, // <4,1,5,5>: Cost 3 vsldoi4 <0,4,1,5>, <5,5,5,5> + 2242299087U, // <4,1,5,6>: Cost 3 vmrghw RHS, <1,6,1,7> + 2667795450U, // <4,1,5,7>: Cost 3 vsldoi4 , <7,0,1,2> + 1546278702U, // <4,1,5,u>: Cost 2 vsldoi4 <0,4,1,5>, LHS + 3781628193U, // <4,1,6,0>: Cost 4 vsldoi8 <3,u,4,1>, <6,0,1,2> + 3832759503U, // <4,1,6,1>: Cost 4 vsldoi12 <1,2,3,4>, <1,6,1,7> + 3316261786U, // <4,1,6,2>: Cost 4 vmrghw <4,6,0,7>, <1,2,3,4> + 3781628466U, // <4,1,6,3>: Cost 4 vsldoi8 <3,u,4,1>, <6,3,4,5> + 3827303658U, // <4,1,6,4>: Cost 4 vsldoi12 <0,3,1,4>, <1,6,4,7> + 3361096018U, // <4,1,6,5>: Cost 4 vmrglw <0,u,4,6>, <0,4,1,5> + 3788264248U, // <4,1,6,6>: Cost 4 vsldoi8 <5,0,4,1>, <6,6,6,6> + 3788264270U, // <4,1,6,7>: Cost 4 vsldoi8 <5,0,4,1>, <6,7,0,1> + 3832759566U, // <4,1,6,u>: Cost 4 vsldoi12 <1,2,3,4>, <1,6,u,7> + 2726466580U, // <4,1,7,0>: Cost 3 vsldoi8 <7,0,4,1>, <7,0,4,1> + 3377692682U, // <4,1,7,1>: Cost 4 vmrglw <3,6,4,7>, <0,0,1,1> + 3377694870U, // <4,1,7,2>: Cost 4 vmrglw <3,6,4,7>, <3,0,1,2> + 3802199303U, // <4,1,7,3>: Cost 4 vsldoi8 <7,3,4,1>, <7,3,4,1> + 2731775334U, // <4,1,7,4>: Cost 3 vsldoi8 <7,u,4,1>, <7,4,5,6> + 3377693010U, // <4,1,7,5>: Cost 4 vmrglw <3,6,4,7>, <0,4,1,5> + 3365749804U, // <4,1,7,6>: Cost 5 vmrglw <1,6,4,7>, <1,4,1,6> + 3788265068U, // <4,1,7,7>: Cost 4 vsldoi8 <5,0,4,1>, <7,7,7,7> + 2731775644U, // <4,1,7,u>: Cost 3 vsldoi8 <7,u,4,1>, <7,u,4,1> + 1546297685U, // <4,1,u,0>: Cost 2 vsldoi4 <0,4,1,u>, <0,4,1,u> + 1170547508U, // <4,1,u,1>: Cost 2 vmrghw RHS, <1,1,1,1> + 1170547606U, // <4,1,u,2>: Cost 2 vmrghw RHS, <1,2,3,0> + 1689257344U, // <4,1,u,3>: Cost 2 vsldoi12 <1,u,3,4>, <1,u,3,4> + 1546300726U, // <4,1,u,4>: Cost 2 vsldoi4 <0,4,1,u>, RHS + 2284716370U, // <4,1,u,5>: Cost 3 vmrglw <0,4,4,u>, <0,4,1,5> + 2244289743U, // <4,1,u,6>: Cost 3 vmrghw RHS, <1,6,1,7> + 2667820026U, // <4,1,u,7>: Cost 3 vsldoi4 , <7,0,1,2> + 1546303278U, // <4,1,u,u>: Cost 2 vsldoi4 <0,4,1,u>, LHS + 3729621094U, // <4,2,0,0>: Cost 4 vsldoi4 <6,4,2,0>, LHS + 3763716198U, // <4,2,0,1>: Cost 4 vsldoi8 <0,u,4,2>, LHS + 2238858856U, // <4,2,0,2>: Cost 3 vmrghw <4,0,5,1>, <2,2,2,2> + 2295930982U, // <4,2,0,3>: Cost 3 vmrglw <2,3,4,0>, LHS + 3763716434U, // <4,2,0,4>: Cost 4 vsldoi8 <0,u,4,2>, <0,4,1,5> + 2238859107U, // <4,2,0,5>: Cost 3 vmrghw <4,0,5,1>, <2,5,3,1> + 2238859194U, // <4,2,0,6>: Cost 3 vmrghw <4,0,5,1>, <2,6,3,7> + 3312601066U, // <4,2,0,7>: Cost 4 vmrghw <4,0,5,1>, <2,7,0,1> + 2295930987U, // <4,2,0,u>: Cost 3 vmrglw <2,3,4,0>, LHS + 3699769446U, // <4,2,1,0>: Cost 4 vsldoi4 <1,4,2,1>, LHS + 3313255971U, // <4,2,1,1>: Cost 4 vmrghw <4,1,5,0>, <2,1,3,5> + 3361056360U, // <4,2,1,2>: Cost 4 vmrglw <0,u,4,1>, <2,2,2,2> + 2287312998U, // <4,2,1,3>: Cost 3 vmrglw <0,u,4,1>, LHS + 3788932148U, // <4,2,1,4>: Cost 4 vsldoi8 <5,1,4,2>, <1,4,2,5> + 3313256290U, // <4,2,1,5>: Cost 4 vmrghw <4,1,5,0>, <2,5,3,0> + 3838289469U, // <4,2,1,6>: Cost 4 vsldoi12 <2,1,6,4>, <2,1,6,4> + 3369682865U, // <4,2,1,7>: Cost 5 vmrglw <2,3,4,1>, <2,6,2,7> + 2287313003U, // <4,2,1,u>: Cost 3 vmrglw <0,u,4,1>, LHS + 3838658133U, // <4,2,2,0>: Cost 4 vsldoi12 <2,2,2,4>, <2,2,0,1> + 3711722394U, // <4,2,2,1>: Cost 4 vsldoi4 <3,4,2,2>, <1,2,3,4> + 2759018088U, // <4,2,2,2>: Cost 3 vsldoi12 <1,2,3,4>, <2,2,2,2> + 2759018098U, // <4,2,2,3>: Cost 3 vsldoi12 <1,2,3,4>, <2,2,3,3> + 3838658168U, // <4,2,2,4>: Cost 4 vsldoi12 <2,2,2,4>, <2,2,4,0> + 3369027341U, // <4,2,2,5>: Cost 4 vmrglw <2,2,4,2>, <2,4,2,5> + 2240227258U, // <4,2,2,6>: Cost 3 vmrghw <4,2,5,6>, <2,6,3,7> + 3735614791U, // <4,2,2,7>: Cost 4 vsldoi4 <7,4,2,2>, <7,4,2,2> + 2759018143U, // <4,2,2,u>: Cost 3 vsldoi12 <1,2,3,4>, <2,2,u,3> + 2759018150U, // <4,2,3,0>: Cost 3 vsldoi12 <1,2,3,4>, <2,3,0,1> + 3831948975U, // <4,2,3,1>: Cost 4 vsldoi12 <1,1,1,4>, <2,3,1,1> + 3832759993U, // <4,2,3,2>: Cost 4 vsldoi12 <1,2,3,4>, <2,3,2,2> + 2759018180U, // <4,2,3,3>: Cost 3 vsldoi12 <1,2,3,4>, <2,3,3,4> + 2759018185U, // <4,2,3,4>: Cost 3 vsldoi12 <1,2,3,4>, <2,3,4,0> + 3839542998U, // <4,2,3,5>: Cost 4 vsldoi12 <2,3,5,4>, <2,3,5,4> + 3314640826U, // <4,2,3,6>: Cost 4 vmrghw <4,3,5,7>, <2,6,3,7> + 2765948648U, // <4,2,3,7>: Cost 3 vsldoi12 <2,3,7,4>, <2,3,7,4> + 2759018222U, // <4,2,3,u>: Cost 3 vsldoi12 <1,2,3,4>, <2,3,u,1> + 3838658295U, // <4,2,4,0>: Cost 4 vsldoi12 <2,2,2,4>, <2,4,0,1> + 3315205667U, // <4,2,4,1>: Cost 4 vmrghw <4,4,4,4>, <2,1,3,5> + 2241463912U, // <4,2,4,2>: Cost 3 vmrghw <4,4,4,4>, <2,2,2,2> + 1234829414U, // <4,2,4,3>: Cost 2 vmrglw <4,4,4,4>, LHS + 2241464085U, // <4,2,4,4>: Cost 3 vmrghw <4,4,4,4>, <2,4,3,4> + 2241546087U, // <4,2,4,5>: Cost 3 vmrghw <4,4,5,5>, <2,5,3,5> + 2241464250U, // <4,2,4,6>: Cost 3 vmrghw <4,4,4,4>, <2,6,3,7> + 3741602873U, // <4,2,4,7>: Cost 4 vsldoi4 , <7,0,u,2> + 1234829419U, // <4,2,4,u>: Cost 2 vmrglw <4,4,4,4>, LHS + 2626060390U, // <4,2,5,0>: Cost 3 vsldoi4 <1,4,2,5>, LHS + 2626061364U, // <4,2,5,1>: Cost 3 vsldoi4 <1,4,2,5>, <1,4,2,5> + 1168557672U, // <4,2,5,2>: Cost 2 vmrghw RHS, <2,2,2,2> + 1222230118U, // <4,2,5,3>: Cost 2 vmrglw <2,3,4,5>, LHS + 2626063670U, // <4,2,5,4>: Cost 3 vsldoi4 <1,4,2,5>, RHS + 2242299752U, // <4,2,5,5>: Cost 3 vmrghw RHS, <2,5,3,6> + 1168558010U, // <4,2,5,6>: Cost 2 vmrghw RHS, <2,6,3,7> + 2242299882U, // <4,2,5,7>: Cost 3 vmrghw RHS, <2,7,0,1> + 1222230123U, // <4,2,5,u>: Cost 2 vmrglw <2,3,4,5>, LHS + 3711754342U, // <4,2,6,0>: Cost 4 vsldoi4 <3,4,2,6>, LHS + 3711755162U, // <4,2,6,1>: Cost 4 vsldoi4 <3,4,2,6>, <1,2,3,4> + 3838658481U, // <4,2,6,2>: Cost 4 vsldoi12 <2,2,2,4>, <2,6,2,7> + 2759018426U, // <4,2,6,3>: Cost 3 vsldoi12 <1,2,3,4>, <2,6,3,7> + 3838658499U, // <4,2,6,4>: Cost 4 vsldoi12 <2,2,2,4>, <2,6,4,7> + 3735646310U, // <4,2,6,5>: Cost 4 vsldoi4 <7,4,2,6>, <5,6,7,4> + 3316590522U, // <4,2,6,6>: Cost 4 vmrghw <4,6,5,2>, <2,6,3,7> + 3798889331U, // <4,2,6,7>: Cost 4 vsldoi8 <6,7,4,2>, <6,7,4,2> + 2759018471U, // <4,2,6,u>: Cost 3 vsldoi12 <1,2,3,4>, <2,6,u,7> + 3874564074U, // <4,2,7,0>: Cost 4 vsldoi12 , <2,7,0,1> + 3800880230U, // <4,2,7,1>: Cost 4 vsldoi8 <7,1,4,2>, <7,1,4,2> + 3371722344U, // <4,2,7,2>: Cost 4 vmrglw <2,6,4,7>, <2,2,2,2> + 2303950950U, // <4,2,7,3>: Cost 3 vmrglw <3,6,4,7>, LHS + 3371722346U, // <4,2,7,4>: Cost 4 vmrglw <2,6,4,7>, <2,2,2,4> + 3371722509U, // <4,2,7,5>: Cost 5 vmrglw <2,6,4,7>, <2,4,2,5> + 3317237690U, // <4,2,7,6>: Cost 4 vmrghw <4,7,5,0>, <2,6,3,7> + 3317237738U, // <4,2,7,7>: Cost 4 vmrghw <4,7,5,0>, <2,7,0,1> + 2303950955U, // <4,2,7,u>: Cost 3 vmrglw <3,6,4,7>, LHS + 2759018555U, // <4,2,u,0>: Cost 3 vsldoi12 <1,2,3,4>, <2,u,0,1> + 2626085943U, // <4,2,u,1>: Cost 3 vsldoi4 <1,4,2,u>, <1,4,2,u> + 1170548328U, // <4,2,u,2>: Cost 2 vmrghw RHS, <2,2,2,2> + 1222254694U, // <4,2,u,3>: Cost 2 vmrglw <2,3,4,u>, LHS + 2759018595U, // <4,2,u,4>: Cost 3 vsldoi12 <1,2,3,4>, <2,u,4,5> + 2244290408U, // <4,2,u,5>: Cost 3 vmrghw RHS, <2,5,3,6> + 1170548666U, // <4,2,u,6>: Cost 2 vmrghw RHS, <2,6,3,7> + 2769266813U, // <4,2,u,7>: Cost 3 vsldoi12 <2,u,7,4>, <2,u,7,4> + 1222254699U, // <4,2,u,u>: Cost 2 vmrglw <2,3,4,u>, LHS + 2238859414U, // <4,3,0,0>: Cost 3 vmrghw <4,0,5,1>, <3,0,1,2> + 2759018646U, // <4,3,0,1>: Cost 3 vsldoi12 <1,2,3,4>, <3,0,1,2> + 3312314708U, // <4,3,0,2>: Cost 4 vmrghw <4,0,1,2>, <3,2,4,3> + 2238859676U, // <4,3,0,3>: Cost 3 vmrghw <4,0,5,1>, <3,3,3,3> + 2295931802U, // <4,3,0,4>: Cost 3 vmrglw <2,3,4,0>, <1,2,3,4> + 3735670886U, // <4,3,0,5>: Cost 4 vsldoi4 <7,4,3,0>, <5,6,7,4> + 3312315036U, // <4,3,0,6>: Cost 4 vmrghw <4,0,1,2>, <3,6,4,7> + 3369674682U, // <4,3,0,7>: Cost 4 vmrglw <2,3,4,0>, <2,6,3,7> + 2759018709U, // <4,3,0,u>: Cost 3 vsldoi12 <1,2,3,4>, <3,0,u,2> + 3361055638U, // <4,3,1,0>: Cost 4 vmrglw <0,u,4,1>, <1,2,3,0> + 3831949542U, // <4,3,1,1>: Cost 4 vsldoi12 <1,1,1,4>, <3,1,1,1> + 2703917978U, // <4,3,1,2>: Cost 3 vsldoi8 <3,2,4,3>, <1,2,3,4> + 3361056370U, // <4,3,1,3>: Cost 4 vmrglw <0,u,4,1>, <2,2,3,3> + 2295939994U, // <4,3,1,4>: Cost 3 vmrglw <2,3,4,1>, <1,2,3,4> + 3361056291U, // <4,3,1,5>: Cost 4 vmrglw <0,u,4,1>, <2,1,3,5> + 3378972520U, // <4,3,1,6>: Cost 4 vmrglw <3,u,4,1>, <2,5,3,6> + 3361056698U, // <4,3,1,7>: Cost 4 vmrglw <0,u,4,1>, <2,6,3,7> + 2703917978U, // <4,3,1,u>: Cost 3 vsldoi8 <3,2,4,3>, <1,2,3,4> + 3832760624U, // <4,3,2,0>: Cost 4 vsldoi12 <1,2,3,4>, <3,2,0,3> + 3711796122U, // <4,3,2,1>: Cost 4 vsldoi4 <3,4,3,2>, <1,2,3,4> + 3832760641U, // <4,3,2,2>: Cost 4 vsldoi12 <1,2,3,4>, <3,2,2,2> + 2770962764U, // <4,3,2,3>: Cost 3 vsldoi12 <3,2,3,4>, <3,2,3,4> + 2759018836U, // <4,3,2,4>: Cost 3 vsldoi12 <1,2,3,4>, <3,2,4,3> + 3827304802U, // <4,3,2,5>: Cost 5 vsldoi12 <0,3,1,4>, <3,2,5,u> + 3832760678U, // <4,3,2,6>: Cost 4 vsldoi12 <1,2,3,4>, <3,2,6,3> + 3859597679U, // <4,3,2,7>: Cost 4 vsldoi12 <5,6,7,4>, <3,2,7,3> + 2771331449U, // <4,3,2,u>: Cost 3 vsldoi12 <3,2,u,4>, <3,2,u,4> + 2240841878U, // <4,3,3,0>: Cost 3 vmrghw <4,3,5,0>, <3,0,1,2> + 3776997635U, // <4,3,3,1>: Cost 4 vsldoi8 <3,1,4,3>, <3,1,4,3> + 2703919444U, // <4,3,3,2>: Cost 3 vsldoi8 <3,2,4,3>, <3,2,4,3> + 2759018908U, // <4,3,3,3>: Cost 3 vsldoi12 <1,2,3,4>, <3,3,3,3> + 2759018918U, // <4,3,3,4>: Cost 3 vsldoi12 <1,2,3,4>, <3,3,4,4> + 3386951446U, // <4,3,3,5>: Cost 4 vmrglw <5,2,4,3>, <2,4,3,5> + 3777661596U, // <4,3,3,6>: Cost 4 vsldoi8 <3,2,4,3>, <3,6,4,7> + 3375007674U, // <4,3,3,7>: Cost 4 vmrglw <3,2,4,3>, <2,6,3,7> + 2707901242U, // <4,3,3,u>: Cost 3 vsldoi8 <3,u,4,3>, <3,u,4,3> + 2759018960U, // <4,3,4,0>: Cost 3 vsldoi12 <1,2,3,4>, <3,4,0,1> + 2759018970U, // <4,3,4,1>: Cost 3 vsldoi12 <1,2,3,4>, <3,4,1,2> + 2632099605U, // <4,3,4,2>: Cost 3 vsldoi4 <2,4,3,4>, <2,4,3,4> + 2241464732U, // <4,3,4,3>: Cost 3 vmrghw <4,4,4,4>, <3,3,3,3> + 2759019000U, // <4,3,4,4>: Cost 3 vsldoi12 <1,2,3,4>, <3,4,4,5> + 2753563138U, // <4,3,4,5>: Cost 3 vsldoi12 <0,3,1,4>, <3,4,5,6> + 3777662316U, // <4,3,4,6>: Cost 4 vsldoi8 <3,2,4,3>, <4,6,3,7> + 2308573114U, // <4,3,4,7>: Cost 3 vmrglw <4,4,4,4>, <2,6,3,7> + 2759019032U, // <4,3,4,u>: Cost 3 vsldoi12 <1,2,3,4>, <3,4,u,1> + 1168558230U, // <4,3,5,0>: Cost 2 vmrghw RHS, <3,0,1,2> + 2242300134U, // <4,3,5,1>: Cost 3 vmrghw RHS, <3,1,1,1> + 2632107798U, // <4,3,5,2>: Cost 3 vsldoi4 <2,4,3,5>, <2,4,3,5> + 1168558492U, // <4,3,5,3>: Cost 2 vmrghw RHS, <3,3,3,3> + 1168558594U, // <4,3,5,4>: Cost 2 vmrghw RHS, <3,4,5,6> + 2295973654U, // <4,3,5,5>: Cost 3 vmrglw <2,3,4,5>, <2,4,3,5> + 2242300536U, // <4,3,5,6>: Cost 3 vmrghw RHS, <3,6,0,7> + 2295973818U, // <4,3,5,7>: Cost 3 vmrglw <2,3,4,5>, <2,6,3,7> + 1168558878U, // <4,3,5,u>: Cost 2 vmrghw RHS, <3,u,1,2> + 3832760952U, // <4,3,6,0>: Cost 4 vsldoi12 <1,2,3,4>, <3,6,0,7> + 3711828890U, // <4,3,6,1>: Cost 4 vsldoi4 <3,4,3,6>, <1,2,3,4> + 3316484436U, // <4,3,6,2>: Cost 4 vmrghw <4,6,3,7>, <3,2,4,3> + 3711830512U, // <4,3,6,3>: Cost 4 vsldoi4 <3,4,3,6>, <3,4,3,6> + 2759019164U, // <4,3,6,4>: Cost 3 vsldoi12 <1,2,3,4>, <3,6,4,7> + 3361097251U, // <4,3,6,5>: Cost 5 vmrglw <0,u,4,6>, <2,1,3,5> + 3316624045U, // <4,3,6,6>: Cost 4 vmrghw <4,6,5,6>, <3,6,6,6> + 2773912244U, // <4,3,6,7>: Cost 3 vsldoi12 <3,6,7,4>, <3,6,7,4> + 2759019164U, // <4,3,6,u>: Cost 3 vsldoi12 <1,2,3,4>, <3,6,4,7> + 3377693590U, // <4,3,7,0>: Cost 4 vmrglw <3,6,4,7>, <1,2,3,0> + 3365751680U, // <4,3,7,1>: Cost 5 vmrglw <1,6,4,7>, <4,0,3,1> + 2727810232U, // <4,3,7,2>: Cost 3 vsldoi8 <7,2,4,3>, <7,2,4,3> + 3377694322U, // <4,3,7,3>: Cost 4 vmrglw <3,6,4,7>, <2,2,3,3> + 2303951770U, // <4,3,7,4>: Cost 3 vmrglw <3,6,4,7>, <1,2,3,4> + 3741700198U, // <4,3,7,5>: Cost 4 vsldoi4 , <5,6,7,4> + 3377695216U, // <4,3,7,6>: Cost 4 vmrglw <3,6,4,7>, <3,4,3,6> + 3375703994U, // <4,3,7,7>: Cost 4 vmrglw <3,3,4,7>, <2,6,3,7> + 2731792030U, // <4,3,7,u>: Cost 3 vsldoi8 <7,u,4,3>, <7,u,4,3> + 1170548886U, // <4,3,u,0>: Cost 2 vmrghw RHS, <3,0,1,2> + 2759019294U, // <4,3,u,1>: Cost 3 vsldoi12 <1,2,3,4>, <3,u,1,2> + 2632132377U, // <4,3,u,2>: Cost 3 vsldoi4 <2,4,3,u>, <2,4,3,u> + 1170549148U, // <4,3,u,3>: Cost 2 vmrghw RHS, <3,3,3,3> + 1170549250U, // <4,3,u,4>: Cost 2 vmrghw RHS, <3,4,5,6> + 2759019334U, // <4,3,u,5>: Cost 3 vsldoi12 <1,2,3,4>, <3,u,5,6> + 2244291192U, // <4,3,u,6>: Cost 3 vmrghw RHS, <3,6,0,7> + 2295998394U, // <4,3,u,7>: Cost 3 vmrglw <2,3,4,u>, <2,6,3,7> + 1170549534U, // <4,3,u,u>: Cost 2 vmrghw RHS, <3,u,1,2> + 1165118354U, // <4,4,0,0>: Cost 2 vmrghw <4,0,5,1>, <4,0,5,1> + 1637482598U, // <4,4,0,1>: Cost 2 vsldoi8 <4,4,4,4>, LHS + 3711854285U, // <4,4,0,2>: Cost 4 vsldoi4 <3,4,4,0>, <2,3,4,4> + 3827305344U, // <4,4,0,3>: Cost 4 vsldoi12 <0,3,1,4>, <4,0,3,1> + 2711224658U, // <4,4,0,4>: Cost 3 vsldoi8 <4,4,4,4>, <0,4,1,5> + 1165118774U, // <4,4,0,5>: Cost 2 vmrghw <4,0,5,1>, RHS + 3312602489U, // <4,4,0,6>: Cost 4 vmrghw <4,0,5,1>, <4,6,5,2> + 3369675420U, // <4,4,0,7>: Cost 4 vmrglw <2,3,4,0>, <3,6,4,7> + 1165119017U, // <4,4,0,u>: Cost 2 vmrghw <4,0,5,1>, RHS + 3369682633U, // <4,4,1,0>: Cost 4 vmrglw <2,3,4,1>, <2,3,4,0> + 2287313581U, // <4,4,1,1>: Cost 3 vmrglw <0,u,4,1>, <0,u,4,1> + 2759019466U, // <4,4,1,2>: Cost 3 vsldoi12 <1,2,3,4>, <4,1,2,3> + 3369683284U, // <4,4,1,3>: Cost 4 vmrglw <2,3,4,1>, <3,2,4,3> + 2311204048U, // <4,4,1,4>: Cost 3 vmrglw <4,u,4,1>, <4,4,4,4> + 2239319350U, // <4,4,1,5>: Cost 3 vmrghw <4,1,2,3>, RHS + 3784967411U, // <4,4,1,6>: Cost 4 vsldoi8 <4,4,4,4>, <1,6,5,7> + 3369683612U, // <4,4,1,7>: Cost 4 vmrglw <2,3,4,1>, <3,6,4,7> + 2763000832U, // <4,4,1,u>: Cost 3 vsldoi12 <1,u,3,4>, <4,1,u,3> + 3711869030U, // <4,4,2,0>: Cost 4 vsldoi4 <3,4,4,2>, LHS + 3711869850U, // <4,4,2,1>: Cost 4 vsldoi4 <3,4,4,2>, <1,2,3,4> + 2240203830U, // <4,4,2,2>: Cost 3 vmrghw <4,2,5,3>, <4,2,5,3> + 2698618573U, // <4,4,2,3>: Cost 3 vsldoi8 <2,3,4,4>, <2,3,4,4> + 2711226133U, // <4,4,2,4>: Cost 3 vsldoi8 <4,4,4,4>, <2,4,3,4> + 2240204086U, // <4,4,2,5>: Cost 3 vmrghw <4,2,5,3>, RHS + 2711226298U, // <4,4,2,6>: Cost 3 vsldoi8 <4,4,4,4>, <2,6,3,7> + 3832761416U, // <4,4,2,7>: Cost 4 vsldoi12 <1,2,3,4>, <4,2,7,3> + 2701936738U, // <4,4,2,u>: Cost 3 vsldoi8 <2,u,4,4>, <2,u,4,4> + 2711226518U, // <4,4,3,0>: Cost 3 vsldoi8 <4,4,4,4>, <3,0,1,2> + 3777005828U, // <4,4,3,1>: Cost 4 vsldoi8 <3,1,4,4>, <3,1,4,4> + 3832761453U, // <4,4,3,2>: Cost 4 vsldoi12 <1,2,3,4>, <4,3,2,4> + 2301266260U, // <4,4,3,3>: Cost 3 vmrglw <3,2,4,3>, <3,2,4,3> + 2705254903U, // <4,4,3,4>: Cost 3 vsldoi8 <3,4,4,4>, <3,4,4,4> + 2240843062U, // <4,4,3,5>: Cost 3 vmrghw <4,3,5,0>, RHS + 3832761489U, // <4,4,3,6>: Cost 4 vsldoi12 <1,2,3,4>, <4,3,6,4> + 3375008412U, // <4,4,3,7>: Cost 4 vmrglw <3,2,4,3>, <3,6,4,7> + 2301266260U, // <4,4,3,u>: Cost 3 vmrglw <3,2,4,3>, <3,2,4,3> + 1570373734U, // <4,4,4,0>: Cost 2 vsldoi4 <4,4,4,4>, LHS + 2308574089U, // <4,4,4,1>: Cost 3 vmrglw <4,4,4,4>, <4,0,4,1> + 2644117096U, // <4,4,4,2>: Cost 3 vsldoi4 <4,4,4,4>, <2,2,2,2> + 2638146039U, // <4,4,4,3>: Cost 3 vsldoi4 <3,4,4,4>, <3,4,4,4> + 229035318U, // <4,4,4,4>: Cost 1 vspltisw0 RHS + 1167723830U, // <4,4,4,5>: Cost 2 vmrghw <4,4,4,4>, RHS + 2644120058U, // <4,4,4,6>: Cost 3 vsldoi4 <4,4,4,4>, <6,2,7,3> + 2662036827U, // <4,4,4,7>: Cost 3 vsldoi4 <7,4,4,4>, <7,4,4,4> + 229035318U, // <4,4,4,u>: Cost 1 vspltisw0 RHS + 1168558994U, // <4,4,5,0>: Cost 2 vmrghw RHS, <4,0,5,1> + 2638152602U, // <4,4,5,1>: Cost 3 vsldoi4 <3,4,4,5>, <1,2,3,4> + 2242300981U, // <4,4,5,2>: Cost 3 vmrghw RHS, <4,2,5,2> + 2638154232U, // <4,4,5,3>: Cost 3 vsldoi4 <3,4,4,5>, <3,4,4,5> + 1168559322U, // <4,4,5,4>: Cost 2 vmrghw RHS, <4,4,5,5> + 94817590U, // <4,4,5,5>: Cost 1 vmrghw RHS, RHS + 1685278006U, // <4,4,5,6>: Cost 2 vsldoi12 <1,2,3,4>, RHS + 2242309576U, // <4,4,5,7>: Cost 3 vmrghw RHS, <4,7,5,0> + 94817833U, // <4,4,5,u>: Cost 1 vmrghw RHS, RHS + 3316591506U, // <4,4,6,0>: Cost 4 vmrghw <4,6,5,2>, <4,0,5,1> + 3758428587U, // <4,4,6,1>: Cost 4 vsldoi8 <0,0,4,4>, <6,1,7,5> + 2711228922U, // <4,4,6,2>: Cost 3 vsldoi8 <4,4,4,4>, <6,2,7,3> + 3796251185U, // <4,4,6,3>: Cost 4 vsldoi8 <6,3,4,4>, <6,3,4,4> + 2711229085U, // <4,4,6,4>: Cost 3 vsldoi8 <4,4,4,4>, <6,4,7,4> + 2242850102U, // <4,4,6,5>: Cost 3 vmrghw <4,6,5,2>, RHS + 2242850169U, // <4,4,6,6>: Cost 3 vmrghw <4,6,5,2>, <4,6,5,2> + 2725163893U, // <4,4,6,7>: Cost 3 vsldoi8 <6,7,4,4>, <6,7,4,4> + 2242850345U, // <4,4,6,u>: Cost 3 vmrghw <4,6,5,2>, RHS + 2711229434U, // <4,4,7,0>: Cost 3 vsldoi8 <4,4,4,4>, <7,0,1,2> + 3377694410U, // <4,4,7,1>: Cost 4 vmrglw <3,6,4,7>, <2,3,4,1> + 3868593584U, // <4,4,7,2>: Cost 4 vsldoi12 <7,2,3,4>, <4,7,2,3> + 3377695060U, // <4,4,7,3>: Cost 4 vmrglw <3,6,4,7>, <3,2,4,3> + 2729145691U, // <4,4,7,4>: Cost 3 vsldoi8 <7,4,4,4>, <7,4,4,4> + 2243497270U, // <4,4,7,5>: Cost 3 vmrghw <4,7,5,0>, RHS + 3871542744U, // <4,4,7,6>: Cost 4 vsldoi12 <7,6,7,4>, <4,7,6,7> + 2303953564U, // <4,4,7,7>: Cost 3 vmrglw <3,6,4,7>, <3,6,4,7> + 2243497513U, // <4,4,7,u>: Cost 3 vmrghw <4,7,5,0>, RHS + 1170549650U, // <4,4,u,0>: Cost 2 vmrghw RHS, <4,0,5,1> + 1637488430U, // <4,4,u,1>: Cost 2 vsldoi8 <4,4,4,4>, LHS + 2244291637U, // <4,4,u,2>: Cost 3 vmrghw RHS, <4,2,5,2> + 2638178811U, // <4,4,u,3>: Cost 3 vsldoi4 <3,4,4,u>, <3,4,4,u> + 229035318U, // <4,4,u,4>: Cost 1 vspltisw0 RHS + 96808246U, // <4,4,u,5>: Cost 1 vmrghw RHS, RHS + 1685278249U, // <4,4,u,6>: Cost 2 vsldoi12 <1,2,3,4>, RHS + 2244292040U, // <4,4,u,7>: Cost 3 vmrghw RHS, <4,7,5,0> + 96808489U, // <4,4,u,u>: Cost 1 vmrghw RHS, RHS + 2698625024U, // <4,5,0,0>: Cost 3 vsldoi8 <2,3,4,5>, <0,0,0,0> + 1624883302U, // <4,5,0,1>: Cost 2 vsldoi8 <2,3,4,5>, LHS + 2638186190U, // <4,5,0,2>: Cost 3 vsldoi4 <3,4,5,0>, <2,3,4,5> + 2638187004U, // <4,5,0,3>: Cost 3 vsldoi4 <3,4,5,0>, <3,4,5,0> + 2687345005U, // <4,5,0,4>: Cost 3 vsldoi8 <0,4,4,5>, <0,4,4,5> + 2238861316U, // <4,5,0,5>: Cost 3 vmrghw <4,0,5,1>, <5,5,5,5> + 2662077302U, // <4,5,0,6>: Cost 3 vsldoi4 <7,4,5,0>, <6,7,4,5> + 2662077792U, // <4,5,0,7>: Cost 3 vsldoi4 <7,4,5,0>, <7,4,5,0> + 1624883869U, // <4,5,0,u>: Cost 2 vsldoi8 <2,3,4,5>, LHS + 3361057762U, // <4,5,1,0>: Cost 4 vmrglw <0,u,4,1>, <4,1,5,0> + 2691326803U, // <4,5,1,1>: Cost 3 vsldoi8 <1,1,4,5>, <1,1,4,5> + 2698625942U, // <4,5,1,2>: Cost 3 vsldoi8 <2,3,4,5>, <1,2,3,0> + 3361055659U, // <4,5,1,3>: Cost 4 vmrglw <0,u,4,1>, <1,2,5,3> + 3761087567U, // <4,5,1,4>: Cost 4 vsldoi8 <0,4,4,5>, <1,4,5,5> + 2693981335U, // <4,5,1,5>: Cost 3 vsldoi8 <1,5,4,5>, <1,5,4,5> + 2305231362U, // <4,5,1,6>: Cost 3 vmrglw <3,u,4,1>, <3,4,5,6> + 3361055987U, // <4,5,1,7>: Cost 4 vmrglw <0,u,4,1>, <1,6,5,7> + 2695972234U, // <4,5,1,u>: Cost 3 vsldoi8 <1,u,4,5>, <1,u,4,5> + 2638200934U, // <4,5,2,0>: Cost 3 vsldoi4 <3,4,5,2>, LHS + 3761088035U, // <4,5,2,1>: Cost 4 vsldoi8 <0,4,4,5>, <2,1,3,5> + 2697963133U, // <4,5,2,2>: Cost 3 vsldoi8 <2,2,4,5>, <2,2,4,5> + 1624884942U, // <4,5,2,3>: Cost 2 vsldoi8 <2,3,4,5>, <2,3,4,5> + 2698626838U, // <4,5,2,4>: Cost 3 vsldoi8 <2,3,4,5>, <2,4,3,5> + 3772368744U, // <4,5,2,5>: Cost 4 vsldoi8 <2,3,4,5>, <2,5,3,6> + 2698627002U, // <4,5,2,6>: Cost 3 vsldoi8 <2,3,4,5>, <2,6,3,7> + 3775023122U, // <4,5,2,7>: Cost 4 vsldoi8 <2,7,4,5>, <2,7,4,5> + 1628203107U, // <4,5,2,u>: Cost 2 vsldoi8 <2,u,4,5>, <2,u,4,5> + 2698627222U, // <4,5,3,0>: Cost 3 vsldoi8 <2,3,4,5>, <3,0,1,2> + 3765070057U, // <4,5,3,1>: Cost 4 vsldoi8 <1,1,4,5>, <3,1,1,4> + 2698627404U, // <4,5,3,2>: Cost 3 vsldoi8 <2,3,4,5>, <3,2,3,4> + 2698627484U, // <4,5,3,3>: Cost 3 vsldoi8 <2,3,4,5>, <3,3,3,3> + 2698627580U, // <4,5,3,4>: Cost 3 vsldoi8 <2,3,4,5>, <3,4,5,0> + 3779668553U, // <4,5,3,5>: Cost 4 vsldoi8 <3,5,4,5>, <3,5,4,5> + 2725169844U, // <4,5,3,6>: Cost 3 vsldoi8 <6,7,4,5>, <3,6,7,4> + 2707253995U, // <4,5,3,7>: Cost 3 vsldoi8 <3,7,4,5>, <3,7,4,5> + 2698627870U, // <4,5,3,u>: Cost 3 vsldoi8 <2,3,4,5>, <3,u,1,2> + 2638217318U, // <4,5,4,0>: Cost 3 vsldoi4 <3,4,5,4>, LHS + 2308574098U, // <4,5,4,1>: Cost 3 vmrglw <4,4,4,4>, <4,0,5,1> + 2698628150U, // <4,5,4,2>: Cost 3 vsldoi8 <2,3,4,5>, <4,2,5,3> + 2638219776U, // <4,5,4,3>: Cost 3 vsldoi4 <3,4,5,4>, <3,4,5,4> + 2698628314U, // <4,5,4,4>: Cost 3 vsldoi8 <2,3,4,5>, <4,4,5,5> + 1624886582U, // <4,5,4,5>: Cost 2 vsldoi8 <2,3,4,5>, RHS + 2698628478U, // <4,5,4,6>: Cost 3 vsldoi8 <2,3,4,5>, <4,6,5,7> + 2662110564U, // <4,5,4,7>: Cost 3 vsldoi4 <7,4,5,4>, <7,4,5,4> + 1624886825U, // <4,5,4,u>: Cost 2 vsldoi8 <2,3,4,5>, RHS + 1570455654U, // <4,5,5,0>: Cost 2 vsldoi4 <4,4,5,5>, LHS + 2312564250U, // <4,5,5,1>: Cost 3 vmrglw <5,1,4,5>, <4,u,5,1> + 2644199118U, // <4,5,5,2>: Cost 3 vsldoi4 <4,4,5,5>, <2,3,4,5> + 2295974966U, // <4,5,5,3>: Cost 3 vmrglw <2,3,4,5>, <4,2,5,3> + 1570458842U, // <4,5,5,4>: Cost 2 vsldoi4 <4,4,5,5>, <4,4,5,5> + 1168568324U, // <4,5,5,5>: Cost 2 vmrghw RHS, <5,5,5,5> + 1168568418U, // <4,5,5,6>: Cost 2 vmrghw RHS, <5,6,7,0> + 2295975294U, // <4,5,5,7>: Cost 3 vmrglw <2,3,4,5>, <4,6,5,7> + 1168716036U, // <4,5,5,u>: Cost 2 vmrghw RHS, <5,u,7,0> + 1564491878U, // <4,5,6,0>: Cost 2 vsldoi4 <3,4,5,6>, LHS + 2626290768U, // <4,5,6,1>: Cost 3 vsldoi4 <1,4,5,6>, <1,4,5,6> + 2632263465U, // <4,5,6,2>: Cost 3 vsldoi4 <2,4,5,6>, <2,4,5,6> + 1564494338U, // <4,5,6,3>: Cost 2 vsldoi4 <3,4,5,6>, <3,4,5,6> + 1564495158U, // <4,5,6,4>: Cost 2 vsldoi4 <3,4,5,6>, RHS + 2638237464U, // <4,5,6,5>: Cost 3 vsldoi4 <3,4,5,6>, <5,2,6,3> + 2656154253U, // <4,5,6,6>: Cost 3 vsldoi4 <6,4,5,6>, <6,4,5,6> + 27705344U, // <4,5,6,7>: Cost 0 copy RHS + 27705344U, // <4,5,6,u>: Cost 0 copy RHS + 2725172218U, // <4,5,7,0>: Cost 3 vsldoi8 <6,7,4,5>, <7,0,1,2> + 3859599489U, // <4,5,7,1>: Cost 4 vsldoi12 <5,6,7,4>, <5,7,1,4> + 2698630320U, // <4,5,7,2>: Cost 3 vsldoi8 <2,3,4,5>, <7,2,3,4> + 2728490251U, // <4,5,7,3>: Cost 3 vsldoi8 <7,3,4,5>, <7,3,4,5> + 2725172576U, // <4,5,7,4>: Cost 3 vsldoi8 <6,7,4,5>, <7,4,5,0> + 3317239812U, // <4,5,7,5>: Cost 4 vmrghw <4,7,5,0>, <5,5,5,5> + 2725172760U, // <4,5,7,6>: Cost 3 vsldoi8 <6,7,4,5>, <7,6,7,4> + 2725172844U, // <4,5,7,7>: Cost 3 vsldoi8 <6,7,4,5>, <7,7,7,7> + 2725172866U, // <4,5,7,u>: Cost 3 vsldoi8 <6,7,4,5>, <7,u,1,2> + 1564508262U, // <4,5,u,0>: Cost 2 vsldoi4 <3,4,5,u>, LHS + 1624889134U, // <4,5,u,1>: Cost 2 vsldoi8 <2,3,4,5>, LHS + 2698631045U, // <4,5,u,2>: Cost 3 vsldoi8 <2,3,4,5>, + 1564510724U, // <4,5,u,3>: Cost 2 vsldoi4 <3,4,5,u>, <3,4,5,u> + 1564511542U, // <4,5,u,4>: Cost 2 vsldoi4 <3,4,5,u>, RHS + 1624889498U, // <4,5,u,5>: Cost 2 vsldoi8 <2,3,4,5>, RHS + 1170550882U, // <4,5,u,6>: Cost 2 vmrghw RHS, <5,6,7,0> + 27705344U, // <4,5,u,7>: Cost 0 copy RHS + 27705344U, // <4,5,u,u>: Cost 0 copy RHS + 3312595285U, // <4,6,0,0>: Cost 4 vmrghw <4,0,5,0>, <6,0,7,0> + 3763748966U, // <4,6,0,1>: Cost 4 vsldoi8 <0,u,4,6>, LHS + 2238861818U, // <4,6,0,2>: Cost 3 vmrghw <4,0,5,1>, <6,2,7,3> + 3767730432U, // <4,6,0,3>: Cost 4 vsldoi8 <1,5,4,6>, <0,3,1,4> + 3763749202U, // <4,6,0,4>: Cost 4 vsldoi8 <0,u,4,6>, <0,4,1,5> + 2238862059U, // <4,6,0,5>: Cost 3 vmrghw <4,0,5,1>, <6,5,7,1> + 2238862136U, // <4,6,0,6>: Cost 3 vmrghw <4,0,5,1>, <6,6,6,6> + 2295934262U, // <4,6,0,7>: Cost 3 vmrglw <2,3,4,0>, RHS + 2295934263U, // <4,6,0,u>: Cost 3 vmrglw <2,3,4,0>, RHS + 3378973999U, // <4,6,1,0>: Cost 4 vmrglw <3,u,4,1>, <4,5,6,0> + 3378974648U, // <4,6,1,1>: Cost 4 vmrglw <3,u,4,1>, <5,4,6,1> + 3779675034U, // <4,6,1,2>: Cost 4 vsldoi8 <3,5,4,6>, <1,2,3,4> + 3378974002U, // <4,6,1,3>: Cost 4 vmrglw <3,u,4,1>, <4,5,6,3> + 3378974003U, // <4,6,1,4>: Cost 4 vmrglw <3,u,4,1>, <4,5,6,4> + 3767731352U, // <4,6,1,5>: Cost 4 vsldoi8 <1,5,4,6>, <1,5,4,6> + 3378974734U, // <4,6,1,6>: Cost 4 vmrglw <3,u,4,1>, <5,5,6,6> + 2287316278U, // <4,6,1,7>: Cost 3 vmrglw <0,u,4,1>, RHS + 2287316279U, // <4,6,1,u>: Cost 3 vmrglw <0,u,4,1>, RHS + 3735904358U, // <4,6,2,0>: Cost 4 vsldoi4 <7,4,6,2>, LHS + 3763750435U, // <4,6,2,1>: Cost 5 vsldoi8 <0,u,4,6>, <2,1,3,5> + 3313938937U, // <4,6,2,2>: Cost 4 vmrghw <4,2,5,2>, <6,2,7,2> + 3772376782U, // <4,6,2,3>: Cost 4 vsldoi8 <2,3,4,6>, <2,3,4,5> + 3852890591U, // <4,6,2,4>: Cost 4 vsldoi12 <4,5,6,4>, <6,2,4,3> + 3735908454U, // <4,6,2,5>: Cost 4 vsldoi4 <7,4,6,2>, <5,6,7,4> + 3801573306U, // <4,6,2,6>: Cost 4 vsldoi8 <7,2,4,6>, <2,6,3,7> + 2785858042U, // <4,6,2,7>: Cost 3 vsldoi12 <5,6,7,4>, <6,2,7,3> + 2785858051U, // <4,6,2,u>: Cost 3 vsldoi12 <5,6,7,4>, <6,2,u,3> + 3863065101U, // <4,6,3,0>: Cost 4 vsldoi12 <6,3,0,4>, <6,3,0,4> + 3314586024U, // <4,6,3,1>: Cost 4 vmrghw <4,3,5,0>, <6,1,7,2> + 3863212575U, // <4,6,3,2>: Cost 4 vsldoi12 <6,3,2,4>, <6,3,2,4> + 3863286312U, // <4,6,3,3>: Cost 4 vsldoi12 <6,3,3,4>, <6,3,3,4> + 3767732738U, // <4,6,3,4>: Cost 4 vsldoi8 <1,5,4,6>, <3,4,5,6> + 3779676746U, // <4,6,3,5>: Cost 4 vsldoi8 <3,5,4,6>, <3,5,4,6> + 3398898488U, // <4,6,3,6>: Cost 4 vmrglw <7,2,4,3>, <6,6,6,6> + 2301267254U, // <4,6,3,7>: Cost 3 vmrglw <3,2,4,3>, RHS + 2301267255U, // <4,6,3,u>: Cost 3 vmrglw <3,2,4,3>, RHS + 3852890715U, // <4,6,4,0>: Cost 4 vsldoi12 <4,5,6,4>, <6,4,0,1> + 3315208615U, // <4,6,4,1>: Cost 4 vmrghw <4,4,4,4>, <6,1,7,1> + 2241466874U, // <4,6,4,2>: Cost 3 vmrghw <4,4,4,4>, <6,2,7,3> + 3852890745U, // <4,6,4,3>: Cost 4 vsldoi12 <4,5,6,4>, <6,4,3,4> + 2241467037U, // <4,6,4,4>: Cost 3 vmrghw <4,4,4,4>, <6,4,7,4> + 2241549039U, // <4,6,4,5>: Cost 3 vmrghw <4,4,5,5>, <6,5,7,5> + 2241467192U, // <4,6,4,6>: Cost 3 vmrghw <4,4,4,4>, <6,6,6,6> + 1234832694U, // <4,6,4,7>: Cost 2 vmrglw <4,4,4,4>, RHS + 1234832695U, // <4,6,4,u>: Cost 2 vmrglw <4,4,4,4>, RHS + 2242302241U, // <4,6,5,0>: Cost 3 vmrghw RHS, <6,0,1,2> + 2242310567U, // <4,6,5,1>: Cost 3 vmrghw RHS, <6,1,7,1> + 1168568826U, // <4,6,5,2>: Cost 2 vmrghw RHS, <6,2,7,3> + 2242302514U, // <4,6,5,3>: Cost 3 vmrghw RHS, <6,3,4,5> + 2242302605U, // <4,6,5,4>: Cost 3 vmrghw RHS, <6,4,5,6> + 2242310891U, // <4,6,5,5>: Cost 3 vmrghw RHS, <6,5,7,1> + 1168569144U, // <4,6,5,6>: Cost 2 vmrghw RHS, <6,6,6,6> + 1222233398U, // <4,6,5,7>: Cost 2 vmrglw <2,3,4,5>, RHS + 1222233399U, // <4,6,5,u>: Cost 2 vmrglw <2,3,4,5>, RHS + 3316576545U, // <4,6,6,0>: Cost 4 vmrghw <4,6,5,0>, <6,0,1,2> + 3316584871U, // <4,6,6,1>: Cost 4 vmrghw <4,6,5,1>, <6,1,7,1> + 2242851322U, // <4,6,6,2>: Cost 3 vmrghw <4,6,5,2>, <6,2,7,3> + 3316601394U, // <4,6,6,3>: Cost 4 vmrghw <4,6,5,3>, <6,3,4,5> + 3852890916U, // <4,6,6,4>: Cost 4 vsldoi12 <4,5,6,4>, <6,6,4,4> + 3316617963U, // <4,6,6,5>: Cost 4 vmrghw <4,6,5,5>, <6,5,7,1> + 2242884408U, // <4,6,6,6>: Cost 3 vmrghw <4,6,5,6>, <6,6,6,6> + 2785858370U, // <4,6,6,7>: Cost 3 vsldoi12 <5,6,7,4>, <6,6,7,7> + 2785858379U, // <4,6,6,u>: Cost 3 vsldoi12 <5,6,7,4>, <6,6,u,7> + 2785858382U, // <4,6,7,0>: Cost 3 vsldoi12 <5,6,7,4>, <6,7,0,1> + 3859600215U, // <4,6,7,1>: Cost 4 vsldoi12 <5,6,7,4>, <6,7,1,1> + 3317240314U, // <4,6,7,2>: Cost 4 vmrghw <4,7,5,0>, <6,2,7,3> + 2792199020U, // <4,6,7,3>: Cost 3 vsldoi12 <6,7,3,4>, <6,7,3,4> + 2785858422U, // <4,6,7,4>: Cost 3 vsldoi12 <5,6,7,4>, <6,7,4,5> + 3856651132U, // <4,6,7,5>: Cost 4 vsldoi12 <5,2,3,4>, <6,7,5,2> + 3317240632U, // <4,6,7,6>: Cost 4 vmrghw <4,7,5,0>, <6,6,6,6> + 2303954230U, // <4,6,7,7>: Cost 3 vmrglw <3,6,4,7>, RHS + 2303954231U, // <4,6,7,u>: Cost 3 vmrglw <3,6,4,7>, RHS + 2244292897U, // <4,6,u,0>: Cost 3 vmrghw RHS, <6,0,1,2> + 2244293031U, // <4,6,u,1>: Cost 3 vmrghw RHS, <6,1,7,1> + 1170551290U, // <4,6,u,2>: Cost 2 vmrghw RHS, <6,2,7,3> + 2244293170U, // <4,6,u,3>: Cost 3 vmrghw RHS, <6,3,4,5> + 2244293261U, // <4,6,u,4>: Cost 3 vmrghw RHS, <6,4,5,6> + 2244293355U, // <4,6,u,5>: Cost 3 vmrghw RHS, <6,5,7,1> + 1170551608U, // <4,6,u,6>: Cost 2 vmrghw RHS, <6,6,6,6> + 1222257974U, // <4,6,u,7>: Cost 2 vmrglw <2,3,4,u>, RHS + 1222257975U, // <4,6,u,u>: Cost 2 vmrglw <2,3,4,u>, RHS + 2238862330U, // <4,7,0,0>: Cost 3 vmrghw <4,0,5,1>, <7,0,1,2> + 2706604134U, // <4,7,0,1>: Cost 3 vsldoi8 <3,6,4,7>, LHS + 3312604308U, // <4,7,0,2>: Cost 4 vmrghw <4,0,5,1>, <7,2,0,3> + 3768402176U, // <4,7,0,3>: Cost 4 vsldoi8 <1,6,4,7>, <0,3,1,4> + 2238862648U, // <4,7,0,4>: Cost 3 vmrghw <4,0,5,1>, <7,4,0,5> + 3859600418U, // <4,7,0,5>: Cost 4 vsldoi12 <5,6,7,4>, <7,0,5,6> + 3729994393U, // <4,7,0,6>: Cost 4 vsldoi4 <6,4,7,0>, <6,4,7,0> + 2238862956U, // <4,7,0,7>: Cost 3 vmrghw <4,0,5,1>, <7,7,7,7> + 2706604701U, // <4,7,0,u>: Cost 3 vsldoi8 <3,6,4,7>, LHS + 3385610338U, // <4,7,1,0>: Cost 4 vmrglw <5,0,4,1>, <5,6,7,0> + 3780346676U, // <4,7,1,1>: Cost 4 vsldoi8 <3,6,4,7>, <1,1,1,1> + 2706604954U, // <4,7,1,2>: Cost 3 vsldoi8 <3,6,4,7>, <1,2,3,4> + 3385610746U, // <4,7,1,3>: Cost 4 vmrglw <5,0,4,1>, <6,2,7,3> + 3385610342U, // <4,7,1,4>: Cost 4 vmrglw <5,0,4,1>, <5,6,7,4> + 3385610667U, // <4,7,1,5>: Cost 4 vmrglw <5,0,4,1>, <6,1,7,5> + 3768403178U, // <4,7,1,6>: Cost 4 vsldoi8 <1,6,4,7>, <1,6,4,7> + 3385611074U, // <4,7,1,7>: Cost 4 vmrglw <5,0,4,1>, <6,6,7,7> + 2706604954U, // <4,7,1,u>: Cost 3 vsldoi8 <3,6,4,7>, <1,2,3,4> + 3859600532U, // <4,7,2,0>: Cost 4 vsldoi12 <5,6,7,4>, <7,2,0,3> + 3712091034U, // <4,7,2,1>: Cost 5 vsldoi4 <3,4,7,2>, <1,2,3,4> + 3774375528U, // <4,7,2,2>: Cost 4 vsldoi8 <2,6,4,7>, <2,2,2,2> + 2794853552U, // <4,7,2,3>: Cost 3 vsldoi12 <7,2,3,4>, <7,2,3,4> + 2785858744U, // <4,7,2,4>: Cost 3 vsldoi12 <5,6,7,4>, <7,2,4,3> + 3735982182U, // <4,7,2,5>: Cost 4 vsldoi4 <7,4,7,2>, <5,6,7,4> + 3774375875U, // <4,7,2,6>: Cost 4 vsldoi8 <2,6,4,7>, <2,6,4,7> + 3735983476U, // <4,7,2,7>: Cost 4 vsldoi4 <7,4,7,2>, <7,4,7,2> + 2795222237U, // <4,7,2,u>: Cost 3 vsldoi12 <7,2,u,4>, <7,2,u,4> + 3780348054U, // <4,7,3,0>: Cost 4 vsldoi8 <3,6,4,7>, <3,0,1,2> + 3730015130U, // <4,7,3,1>: Cost 4 vsldoi4 <6,4,7,3>, <1,2,3,4> + 3780348244U, // <4,7,3,2>: Cost 4 vsldoi8 <3,6,4,7>, <3,2,4,3> + 3778357673U, // <4,7,3,3>: Cost 4 vsldoi8 <3,3,4,7>, <3,3,4,7> + 2325155942U, // <4,7,3,4>: Cost 3 vmrglw <7,2,4,3>, <5,6,7,4> + 3779684939U, // <4,7,3,5>: Cost 5 vsldoi8 <3,5,4,7>, <3,5,4,7> + 2706606748U, // <4,7,3,6>: Cost 3 vsldoi8 <3,6,4,7>, <3,6,4,7> + 3398898498U, // <4,7,3,7>: Cost 4 vmrglw <7,2,4,3>, <6,6,7,7> + 2707934014U, // <4,7,3,u>: Cost 3 vsldoi8 <3,u,4,7>, <3,u,4,7> + 2785858868U, // <4,7,4,0>: Cost 3 vsldoi12 <5,6,7,4>, <7,4,0,1> + 3780348874U, // <4,7,4,1>: Cost 4 vsldoi8 <3,6,4,7>, <4,1,2,3> + 3780349000U, // <4,7,4,2>: Cost 4 vsldoi8 <3,6,4,7>, <4,2,7,3> + 2308575738U, // <4,7,4,3>: Cost 3 vmrglw <4,4,4,4>, <6,2,7,3> + 2656283856U, // <4,7,4,4>: Cost 3 vsldoi4 <6,4,7,4>, <4,4,4,4> + 2706607414U, // <4,7,4,5>: Cost 3 vsldoi8 <3,6,4,7>, RHS + 2656285341U, // <4,7,4,6>: Cost 3 vsldoi4 <6,4,7,4>, <6,4,7,4> + 2241468012U, // <4,7,4,7>: Cost 3 vmrghw <4,4,4,4>, <7,7,7,7> + 2706607657U, // <4,7,4,u>: Cost 3 vsldoi8 <3,6,4,7>, RHS + 1168569338U, // <4,7,5,0>: Cost 2 vmrghw RHS, <7,0,1,2> + 2242311242U, // <4,7,5,1>: Cost 3 vmrghw RHS, <7,1,1,1> + 2242303178U, // <4,7,5,2>: Cost 3 vmrghw RHS, <7,2,6,3> + 2242311395U, // <4,7,5,3>: Cost 3 vmrghw RHS, <7,3,0,1> + 1168569702U, // <4,7,5,4>: Cost 2 vmrghw RHS, <7,4,5,6> + 2242311606U, // <4,7,5,5>: Cost 3 vmrghw RHS, <7,5,5,5> + 2242311662U, // <4,7,5,6>: Cost 3 vmrghw RHS, <7,6,2,7> + 1168569964U, // <4,7,5,7>: Cost 2 vmrghw RHS, <7,7,7,7> + 1168569986U, // <4,7,5,u>: Cost 2 vmrghw RHS, <7,u,1,2> + 3316593658U, // <4,7,6,0>: Cost 4 vmrghw <4,6,5,2>, <7,0,1,2> + 3316593738U, // <4,7,6,1>: Cost 5 vmrghw <4,6,5,2>, <7,1,1,1> + 3316634800U, // <4,7,6,2>: Cost 4 vmrghw <4,6,5,7>, <7,2,3,4> + 3386978810U, // <4,7,6,3>: Cost 4 vmrglw <5,2,4,6>, <6,2,7,3> + 2785859072U, // <4,7,6,4>: Cost 3 vsldoi12 <5,6,7,4>, <7,6,4,7> + 3736014950U, // <4,7,6,5>: Cost 4 vsldoi4 <7,4,7,6>, <5,6,7,4> + 3316594158U, // <4,7,6,6>: Cost 4 vmrghw <4,6,5,2>, <7,6,2,7> + 2797803032U, // <4,7,6,7>: Cost 3 vsldoi12 <7,6,7,4>, <7,6,7,4> + 2797876769U, // <4,7,6,u>: Cost 3 vsldoi12 <7,6,u,4>, <7,6,u,4> + 2243499002U, // <4,7,7,0>: Cost 3 vmrghw <4,7,5,0>, <7,0,1,2> + 3718103962U, // <4,7,7,1>: Cost 4 vsldoi4 <4,4,7,7>, <1,2,3,4> + 3317257418U, // <4,7,7,2>: Cost 4 vmrghw <4,7,5,2>, <7,2,6,3> + 3377695816U, // <4,7,7,3>: Cost 4 vmrglw <3,6,4,7>, <4,2,7,3> + 2243532134U, // <4,7,7,4>: Cost 3 vmrghw <4,7,5,4>, <7,4,5,6> + 3317282230U, // <4,7,7,5>: Cost 4 vmrghw <4,7,5,5>, <7,5,5,5> + 2730497536U, // <4,7,7,6>: Cost 3 vsldoi8 <7,6,4,7>, <7,6,4,7> + 2243556972U, // <4,7,7,7>: Cost 3 vmrghw <4,7,5,7>, <7,7,7,7> + 2243565186U, // <4,7,7,u>: Cost 3 vmrghw <4,7,5,u>, <7,u,1,2> + 1170551802U, // <4,7,u,0>: Cost 2 vmrghw RHS, <7,0,1,2> + 2706609966U, // <4,7,u,1>: Cost 3 vsldoi8 <3,6,4,7>, LHS + 2244293797U, // <4,7,u,2>: Cost 3 vmrghw RHS, <7,2,2,2> + 2244293859U, // <4,7,u,3>: Cost 3 vmrghw RHS, <7,3,0,1> + 1170552166U, // <4,7,u,4>: Cost 2 vmrghw RHS, <7,4,5,6> + 2706610330U, // <4,7,u,5>: Cost 3 vsldoi8 <3,6,4,7>, RHS + 2244294126U, // <4,7,u,6>: Cost 3 vmrghw RHS, <7,6,2,7> + 1170552428U, // <4,7,u,7>: Cost 2 vmrghw RHS, <7,7,7,7> + 1170552450U, // <4,7,u,u>: Cost 2 vmrghw RHS, <7,u,1,2> + 1165118354U, // <4,u,0,0>: Cost 2 vmrghw <4,0,5,1>, <4,0,5,1> + 1624907878U, // <4,u,0,1>: Cost 2 vsldoi8 <2,3,4,u>, LHS + 2638407377U, // <4,u,0,2>: Cost 3 vsldoi4 <3,4,u,0>, <2,3,4,u> + 2295931036U, // <4,u,0,3>: Cost 3 vmrglw <2,3,4,0>, LHS + 2687369584U, // <4,u,0,4>: Cost 3 vsldoi8 <0,4,4,u>, <0,4,4,u> + 1165121690U, // <4,u,0,5>: Cost 2 vmrghw <4,0,5,1>, RHS + 2662298489U, // <4,u,0,6>: Cost 3 vsldoi4 <7,4,u,0>, <6,7,4,u> + 2295934280U, // <4,u,0,7>: Cost 3 vmrglw <2,3,4,0>, RHS + 1624908445U, // <4,u,0,u>: Cost 2 vsldoi8 <2,3,4,u>, LHS + 2638413926U, // <4,u,1,0>: Cost 3 vsldoi4 <3,4,u,1>, LHS + 2691351382U, // <4,u,1,1>: Cost 3 vsldoi8 <1,1,4,u>, <1,1,4,u> + 1685280558U, // <4,u,1,2>: Cost 2 vsldoi12 <1,2,3,4>, LHS + 2287313052U, // <4,u,1,3>: Cost 3 vmrglw <0,u,4,1>, LHS + 2299257799U, // <4,u,1,4>: Cost 3 vmrglw <2,u,4,1>, <1,2,u,4> + 2694005914U, // <4,u,1,5>: Cost 3 vsldoi8 <1,5,4,u>, <1,5,4,u> + 2305231362U, // <4,u,1,6>: Cost 3 vmrglw <3,u,4,1>, <3,4,5,6> + 2287316296U, // <4,u,1,7>: Cost 3 vmrglw <0,u,4,1>, RHS + 1685280612U, // <4,u,1,u>: Cost 2 vsldoi12 <1,2,3,4>, LHS + 2638422118U, // <4,u,2,0>: Cost 3 vsldoi4 <3,4,u,2>, LHS + 2240206638U, // <4,u,2,1>: Cost 3 vmrghw <4,2,5,3>, LHS + 2697987712U, // <4,u,2,2>: Cost 3 vsldoi8 <2,2,4,u>, <2,2,4,u> + 1624909521U, // <4,u,2,3>: Cost 2 vsldoi8 <2,3,4,u>, <2,3,4,u> + 2759391121U, // <4,u,2,4>: Cost 3 vsldoi12 <1,2,u,4>, + 2240207002U, // <4,u,2,5>: Cost 3 vmrghw <4,2,5,3>, RHS + 2698651578U, // <4,u,2,6>: Cost 3 vsldoi8 <2,3,4,u>, <2,6,3,7> + 2785859500U, // <4,u,2,7>: Cost 3 vsldoi12 <5,6,7,4>, + 1628227686U, // <4,u,2,u>: Cost 2 vsldoi8 <2,u,4,u>, <2,u,4,u> + 2759022524U, // <4,u,3,0>: Cost 3 vsldoi12 <1,2,3,4>, + 2801342408U, // <4,u,3,1>: Cost 3 vsldoi12 , + 2703960409U, // <4,u,3,2>: Cost 3 vsldoi8 <3,2,4,u>, <3,2,4,u> + 2759022554U, // <4,u,3,3>: Cost 3 vsldoi12 <1,2,3,4>, + 2759022564U, // <4,u,3,4>: Cost 3 vsldoi12 <1,2,3,4>, + 2240845978U, // <4,u,3,5>: Cost 3 vmrghw <4,3,5,0>, RHS + 2706614941U, // <4,u,3,6>: Cost 3 vsldoi8 <3,6,4,u>, <3,6,4,u> + 2301267272U, // <4,u,3,7>: Cost 3 vmrglw <3,2,4,3>, RHS + 2759022596U, // <4,u,3,u>: Cost 3 vsldoi12 <1,2,3,4>, + 1570668646U, // <4,u,4,0>: Cost 2 vsldoi4 <4,4,u,4>, LHS + 1167726382U, // <4,u,4,1>: Cost 2 vmrghw <4,4,4,4>, LHS + 2698652753U, // <4,u,4,2>: Cost 3 vsldoi8 <2,3,4,u>, <4,2,u,3> + 1234829468U, // <4,u,4,3>: Cost 2 vmrglw <4,4,4,4>, LHS + 229035318U, // <4,u,4,4>: Cost 1 vspltisw0 RHS + 1624911158U, // <4,u,4,5>: Cost 2 vsldoi8 <2,3,4,u>, RHS + 2698653081U, // <4,u,4,6>: Cost 3 vsldoi8 <2,3,4,u>, <4,6,u,7> + 1234832712U, // <4,u,4,7>: Cost 2 vmrglw <4,4,4,4>, RHS + 229035318U, // <4,u,4,u>: Cost 1 vspltisw0 RHS + 1168561875U, // <4,u,5,0>: Cost 2 vmrghw RHS, + 94820142U, // <4,u,5,1>: Cost 1 vmrghw RHS, LHS + 1168562053U, // <4,u,5,2>: Cost 2 vmrghw RHS, + 1222230172U, // <4,u,5,3>: Cost 2 vmrglw <2,3,4,5>, LHS + 1168562239U, // <4,u,5,4>: Cost 2 vmrghw RHS, + 94820506U, // <4,u,5,5>: Cost 1 vmrghw RHS, RHS + 1685280922U, // <4,u,5,6>: Cost 2 vsldoi12 <1,2,3,4>, RHS + 1222233416U, // <4,u,5,7>: Cost 2 vmrglw <2,3,4,5>, RHS + 94820709U, // <4,u,5,u>: Cost 1 vmrghw RHS, LHS + 1564713062U, // <4,u,6,0>: Cost 2 vsldoi4 <3,4,u,6>, LHS + 2626511979U, // <4,u,6,1>: Cost 3 vsldoi4 <1,4,u,6>, <1,4,u,6> + 2632484676U, // <4,u,6,2>: Cost 3 vsldoi4 <2,4,u,6>, <2,4,u,6> + 1564715549U, // <4,u,6,3>: Cost 2 vsldoi4 <3,4,u,6>, <3,4,u,6> + 1564716342U, // <4,u,6,4>: Cost 2 vsldoi4 <3,4,u,6>, RHS + 2242853018U, // <4,u,6,5>: Cost 3 vmrghw <4,6,5,2>, RHS + 2656375464U, // <4,u,6,6>: Cost 3 vsldoi4 <6,4,u,6>, <6,4,u,6> + 27705344U, // <4,u,6,7>: Cost 0 copy RHS + 27705344U, // <4,u,6,u>: Cost 0 copy RHS + 2785859840U, // <4,u,7,0>: Cost 3 vsldoi12 <5,6,7,4>, + 2243499822U, // <4,u,7,1>: Cost 3 vmrghw <4,7,5,0>, LHS + 2727851197U, // <4,u,7,2>: Cost 3 vsldoi8 <7,2,4,u>, <7,2,4,u> + 2303951004U, // <4,u,7,3>: Cost 3 vmrglw <3,6,4,7>, LHS + 2785859880U, // <4,u,7,4>: Cost 3 vsldoi12 <5,6,7,4>, + 2243500186U, // <4,u,7,5>: Cost 3 vmrghw <4,7,5,0>, RHS + 2730505729U, // <4,u,7,6>: Cost 3 vsldoi8 <7,6,4,u>, <7,6,4,u> + 2303954248U, // <4,u,7,7>: Cost 3 vmrglw <3,6,4,7>, RHS + 2303951009U, // <4,u,7,u>: Cost 3 vmrglw <3,6,4,7>, LHS + 1564729446U, // <4,u,u,0>: Cost 2 vsldoi4 <3,4,u,u>, LHS + 96810798U, // <4,u,u,1>: Cost 1 vmrghw RHS, LHS + 1685281125U, // <4,u,u,2>: Cost 2 vsldoi12 <1,2,3,4>, LHS + 1222254748U, // <4,u,u,3>: Cost 2 vmrglw <2,3,4,u>, LHS + 229035318U, // <4,u,u,4>: Cost 1 vspltisw0 RHS + 96811162U, // <4,u,u,5>: Cost 1 vmrghw RHS, RHS + 1685281165U, // <4,u,u,6>: Cost 2 vsldoi12 <1,2,3,4>, RHS + 27705344U, // <4,u,u,7>: Cost 0 copy RHS + 27705344U, // <4,u,u,u>: Cost 0 copy RHS + 2754232320U, // <5,0,0,0>: Cost 3 vsldoi12 <0,4,1,5>, <0,0,0,0> + 2754232330U, // <5,0,0,1>: Cost 3 vsldoi12 <0,4,1,5>, <0,0,1,1> + 3718194894U, // <5,0,0,2>: Cost 4 vsldoi4 <4,5,0,0>, <2,3,4,5> + 3376385762U, // <5,0,0,3>: Cost 4 vmrglw <3,4,5,0>, <5,2,0,3> + 2754232357U, // <5,0,0,4>: Cost 3 vsldoi12 <0,4,1,5>, <0,0,4,1> + 3845816370U, // <5,0,0,5>: Cost 4 vsldoi12 <3,4,0,5>, <0,0,5,5> + 3782353389U, // <5,0,0,6>: Cost 4 vsldoi8 <4,0,5,0>, <0,6,0,7> + 3376386090U, // <5,0,0,7>: Cost 4 vmrglw <3,4,5,0>, <5,6,0,7> + 2757402697U, // <5,0,0,u>: Cost 3 vsldoi12 <0,u,u,5>, <0,0,u,1> + 2626543718U, // <5,0,1,0>: Cost 3 vsldoi4 <1,5,0,1>, LHS + 2626544751U, // <5,0,1,1>: Cost 3 vsldoi4 <1,5,0,1>, <1,5,0,1> + 1680490598U, // <5,0,1,2>: Cost 2 vsldoi12 <0,4,1,5>, LHS + 3766428665U, // <5,0,1,3>: Cost 4 vsldoi8 <1,3,5,0>, <1,3,5,0> + 2626546998U, // <5,0,1,4>: Cost 3 vsldoi4 <1,5,0,1>, RHS + 2650435539U, // <5,0,1,5>: Cost 3 vsldoi4 <5,5,0,1>, <5,5,0,1> + 3783017715U, // <5,0,1,6>: Cost 4 vsldoi8 <4,1,5,0>, <1,6,5,7> + 3385019000U, // <5,0,1,7>: Cost 4 vmrglw <4,u,5,1>, <3,6,0,7> + 1680490652U, // <5,0,1,u>: Cost 2 vsldoi12 <0,4,1,5>, LHS + 3376398336U, // <5,0,2,0>: Cost 4 vmrglw <3,4,5,2>, <0,0,0,0> + 2245877862U, // <5,0,2,1>: Cost 3 vmrghw <5,2,1,3>, LHS + 3773064808U, // <5,0,2,2>: Cost 4 vsldoi8 <2,4,5,0>, <2,2,2,2> + 2705295054U, // <5,0,2,3>: Cost 3 vsldoi8 <3,4,5,0>, <2,3,4,5> + 3827974343U, // <5,0,2,4>: Cost 4 vsldoi12 <0,4,1,5>, <0,2,4,1> + 3845816530U, // <5,0,2,5>: Cost 4 vsldoi12 <3,4,0,5>, <0,2,5,3> + 3779037114U, // <5,0,2,6>: Cost 4 vsldoi8 <3,4,5,0>, <2,6,3,7> + 3810887658U, // <5,0,2,7>: Cost 4 vsldoi8 , <2,7,0,1> + 2245878429U, // <5,0,2,u>: Cost 3 vmrghw <5,2,1,3>, LHS + 2710603926U, // <5,0,3,0>: Cost 3 vsldoi8 <4,3,5,0>, <3,0,1,2> + 3827974396U, // <5,0,3,1>: Cost 4 vsldoi12 <0,4,1,5>, <0,3,1,0> + 3779037516U, // <5,0,3,2>: Cost 4 vsldoi8 <3,4,5,0>, <3,2,3,4> + 3779037596U, // <5,0,3,3>: Cost 4 vsldoi8 <3,4,5,0>, <3,3,3,3> + 2705295868U, // <5,0,3,4>: Cost 3 vsldoi8 <3,4,5,0>, <3,4,5,0> + 3379726804U, // <5,0,3,5>: Cost 4 vmrglw <4,0,5,3>, <3,4,0,5> + 3802925748U, // <5,0,3,6>: Cost 4 vsldoi8 <7,4,5,0>, <3,6,7,4> + 3363138168U, // <5,0,3,7>: Cost 5 vmrglw <1,2,5,3>, <3,6,0,7> + 2707950400U, // <5,0,3,u>: Cost 3 vsldoi8 <3,u,5,0>, <3,u,5,0> + 2626568294U, // <5,0,4,0>: Cost 3 vsldoi4 <1,5,0,4>, LHS + 1680490834U, // <5,0,4,1>: Cost 2 vsldoi12 <0,4,1,5>, <0,4,1,5> + 3828048219U, // <5,0,4,2>: Cost 4 vsldoi12 <0,4,2,5>, <0,4,2,5> + 2710604932U, // <5,0,4,3>: Cost 3 vsldoi8 <4,3,5,0>, <4,3,5,0> + 2754232685U, // <5,0,4,4>: Cost 3 vsldoi12 <0,4,1,5>, <0,4,4,5> + 2705296694U, // <5,0,4,5>: Cost 3 vsldoi8 <3,4,5,0>, RHS + 3779038590U, // <5,0,4,6>: Cost 4 vsldoi8 <3,4,5,0>, <4,6,5,7> + 2713259464U, // <5,0,4,7>: Cost 3 vsldoi8 <4,7,5,0>, <4,7,5,0> + 1680490834U, // <5,0,4,u>: Cost 2 vsldoi12 <0,4,1,5>, <0,4,1,5> + 2311307264U, // <5,0,5,0>: Cost 3 vmrglw <4,u,5,5>, <0,0,0,0> + 1174437990U, // <5,0,5,1>: Cost 2 vmrghw <5,5,5,5>, LHS + 3779038946U, // <5,0,5,2>: Cost 4 vsldoi8 <3,4,5,0>, <5,2,0,3> + 3845816752U, // <5,0,5,3>: Cost 4 vsldoi12 <3,4,0,5>, <0,5,3,0> + 2248180050U, // <5,0,5,4>: Cost 3 vmrghw <5,5,5,5>, <0,4,1,5> + 2248180194U, // <5,0,5,5>: Cost 3 vmrghw <5,5,5,5>, <0,5,u,5> + 3779039274U, // <5,0,5,6>: Cost 4 vsldoi8 <3,4,5,0>, <5,6,0,7> + 3385051768U, // <5,0,5,7>: Cost 4 vmrglw <4,u,5,5>, <3,6,0,7> + 1174438557U, // <5,0,5,u>: Cost 2 vmrghw <5,5,5,5>, LHS + 2302689280U, // <5,0,6,0>: Cost 3 vmrglw <3,4,5,6>, <0,0,0,0> + 1175208038U, // <5,0,6,1>: Cost 2 vmrghw <5,6,7,0>, LHS + 3787002362U, // <5,0,6,2>: Cost 4 vsldoi8 <4,7,5,0>, <6,2,7,3> + 3376432160U, // <5,0,6,3>: Cost 4 vmrglw <3,4,5,6>, <1,4,0,3> + 2248950098U, // <5,0,6,4>: Cost 3 vmrghw <5,6,7,0>, <0,4,1,5> + 2248950180U, // <5,0,6,5>: Cost 3 vmrghw <5,6,7,0>, <0,5,1,6> + 3376433702U, // <5,0,6,6>: Cost 4 vmrglw <3,4,5,6>, <3,5,0,6> + 2729186166U, // <5,0,6,7>: Cost 3 vsldoi8 <7,4,5,0>, <6,7,4,5> + 1175208605U, // <5,0,6,u>: Cost 2 vmrghw <5,6,7,0>, LHS + 2713261050U, // <5,0,7,0>: Cost 3 vsldoi8 <4,7,5,0>, <7,0,1,2> + 3365823599U, // <5,0,7,1>: Cost 4 vmrglw <1,6,5,7>, <1,5,0,1> + 3808900317U, // <5,0,7,2>: Cost 4 vsldoi8 , <7,2,u,4> + 3784348899U, // <5,0,7,3>: Cost 4 vsldoi8 <4,3,5,0>, <7,3,0,1> + 2729186656U, // <5,0,7,4>: Cost 3 vsldoi8 <7,4,5,0>, <7,4,5,0> + 3787003268U, // <5,0,7,5>: Cost 4 vsldoi8 <4,7,5,0>, <7,5,0,0> + 3802928664U, // <5,0,7,6>: Cost 4 vsldoi8 <7,4,5,0>, <7,6,7,4> + 3787003431U, // <5,0,7,7>: Cost 4 vsldoi8 <4,7,5,0>, <7,7,0,1> + 2731841188U, // <5,0,7,u>: Cost 3 vsldoi8 <7,u,5,0>, <7,u,5,0> + 2626601062U, // <5,0,u,0>: Cost 3 vsldoi4 <1,5,0,u>, LHS + 1683145366U, // <5,0,u,1>: Cost 2 vsldoi12 <0,u,1,5>, <0,u,1,5> + 1680491165U, // <5,0,u,2>: Cost 2 vsldoi12 <0,4,1,5>, LHS + 2705295054U, // <5,0,u,3>: Cost 3 vsldoi8 <3,4,5,0>, <2,3,4,5> + 2754233005U, // <5,0,u,4>: Cost 3 vsldoi12 <0,4,1,5>, <0,u,4,1> + 2705299610U, // <5,0,u,5>: Cost 3 vsldoi8 <3,4,5,0>, RHS + 3779041488U, // <5,0,u,6>: Cost 4 vsldoi8 <3,4,5,0>, + 2737150252U, // <5,0,u,7>: Cost 3 vsldoi8 , + 1680491219U, // <5,0,u,u>: Cost 2 vsldoi12 <0,4,1,5>, LHS + 2713927680U, // <5,1,0,0>: Cost 3 vsldoi8 <4,u,5,1>, <0,0,0,0> + 1640185958U, // <5,1,0,1>: Cost 2 vsldoi8 <4,u,5,1>, LHS + 2310607866U, // <5,1,0,2>: Cost 3 vmrglw <4,7,5,0>, <7,0,1,2> + 3787669756U, // <5,1,0,3>: Cost 4 vsldoi8 <4,u,5,1>, <0,3,1,0> + 2713928018U, // <5,1,0,4>: Cost 3 vsldoi8 <4,u,5,1>, <0,4,1,5> + 2306621778U, // <5,1,0,5>: Cost 3 vmrglw <4,1,5,0>, <0,4,1,5> + 3787670006U, // <5,1,0,6>: Cost 4 vsldoi8 <4,u,5,1>, <0,6,1,7> + 3736188301U, // <5,1,0,7>: Cost 4 vsldoi4 <7,5,1,0>, <7,5,1,0> + 1640186525U, // <5,1,0,u>: Cost 2 vsldoi8 <4,u,5,1>, LHS + 2650505318U, // <5,1,1,0>: Cost 3 vsldoi4 <5,5,1,1>, LHS + 2754233140U, // <5,1,1,1>: Cost 3 vsldoi12 <0,4,1,5>, <1,1,1,1> + 2311276694U, // <5,1,1,2>: Cost 3 vmrglw <4,u,5,1>, <3,0,1,2> + 2311278315U, // <5,1,1,3>: Cost 3 vmrglw <4,u,5,1>, <5,2,1,3> + 2758435667U, // <5,1,1,4>: Cost 3 vsldoi12 <1,1,4,5>, <1,1,4,5> + 2754233180U, // <5,1,1,5>: Cost 3 vsldoi12 <0,4,1,5>, <1,1,5,5> + 3385016497U, // <5,1,1,6>: Cost 4 vmrglw <4,u,5,1>, <0,2,1,6> + 2311278643U, // <5,1,1,7>: Cost 3 vmrglw <4,u,5,1>, <5,6,1,7> + 2758730615U, // <5,1,1,u>: Cost 3 vsldoi12 <1,1,u,5>, <1,1,u,5> + 3700367462U, // <5,1,2,0>: Cost 4 vsldoi4 <1,5,1,2>, LHS + 3830629255U, // <5,1,2,1>: Cost 4 vsldoi12 <0,u,1,5>, <1,2,1,3> + 2713929320U, // <5,1,2,2>: Cost 3 vsldoi8 <4,u,5,1>, <2,2,2,2> + 2754233238U, // <5,1,2,3>: Cost 3 vsldoi12 <0,4,1,5>, <1,2,3,0> + 2759099300U, // <5,1,2,4>: Cost 3 vsldoi12 <1,2,4,5>, <1,2,4,5> + 2754233259U, // <5,1,2,5>: Cost 3 vsldoi12 <0,4,1,5>, <1,2,5,3> + 2713929658U, // <5,1,2,6>: Cost 3 vsldoi8 <4,u,5,1>, <2,6,3,7> + 3872359354U, // <5,1,2,7>: Cost 4 vsldoi12 <7,u,0,5>, <1,2,7,0> + 2754233283U, // <5,1,2,u>: Cost 3 vsldoi12 <0,4,1,5>, <1,2,u,0> + 2713929878U, // <5,1,3,0>: Cost 3 vsldoi8 <4,u,5,1>, <3,0,1,2> + 3363135498U, // <5,1,3,1>: Cost 4 vmrglw <1,2,5,3>, <0,0,1,1> + 3363137686U, // <5,1,3,2>: Cost 4 vmrglw <1,2,5,3>, <3,0,1,2> + 2713930140U, // <5,1,3,3>: Cost 3 vsldoi8 <4,u,5,1>, <3,3,3,3> + 2713930242U, // <5,1,3,4>: Cost 3 vsldoi8 <4,u,5,1>, <3,4,5,6> + 2289394002U, // <5,1,3,5>: Cost 3 vmrglw <1,2,5,3>, <0,4,1,5> + 3787672184U, // <5,1,3,6>: Cost 4 vsldoi8 <4,u,5,1>, <3,6,0,7> + 3787672259U, // <5,1,3,7>: Cost 4 vsldoi8 <4,u,5,1>, <3,7,0,1> + 2713930526U, // <5,1,3,u>: Cost 3 vsldoi8 <4,u,5,1>, <3,u,1,2> + 1634880402U, // <5,1,4,0>: Cost 2 vsldoi8 <4,0,5,1>, <4,0,5,1> + 2760205355U, // <5,1,4,1>: Cost 3 vsldoi12 <1,4,1,5>, <1,4,1,5> + 2760279092U, // <5,1,4,2>: Cost 3 vsldoi12 <1,4,2,5>, <1,4,2,5> + 3787672708U, // <5,1,4,3>: Cost 4 vsldoi8 <4,u,5,1>, <4,3,5,0> + 2713930960U, // <5,1,4,4>: Cost 3 vsldoi8 <4,u,5,1>, <4,4,4,4> + 1640189238U, // <5,1,4,5>: Cost 2 vsldoi8 <4,u,5,1>, RHS + 3786345848U, // <5,1,4,6>: Cost 4 vsldoi8 <4,6,5,1>, <4,6,5,1> + 3787009481U, // <5,1,4,7>: Cost 4 vsldoi8 <4,7,5,1>, <4,7,5,1> + 1640189466U, // <5,1,4,u>: Cost 2 vsldoi8 <4,u,5,1>, <4,u,5,1> + 2754233455U, // <5,1,5,0>: Cost 3 vsldoi12 <0,4,1,5>, <1,5,0,1> + 2713931407U, // <5,1,5,1>: Cost 3 vsldoi8 <4,u,5,1>, <5,1,0,1> + 2713931499U, // <5,1,5,2>: Cost 3 vsldoi8 <4,u,5,1>, <5,2,1,3> + 3827975305U, // <5,1,5,3>: Cost 4 vsldoi12 <0,4,1,5>, <1,5,3,0> + 2754233495U, // <5,1,5,4>: Cost 3 vsldoi12 <0,4,1,5>, <1,5,4,5> + 2288746834U, // <5,1,5,5>: Cost 3 vmrglw <1,1,5,5>, <0,4,1,5> + 2713931827U, // <5,1,5,6>: Cost 3 vsldoi8 <4,u,5,1>, <5,6,1,7> + 3787673725U, // <5,1,5,7>: Cost 4 vsldoi8 <4,u,5,1>, <5,7,1,0> + 2754233527U, // <5,1,5,u>: Cost 3 vsldoi12 <0,4,1,5>, <1,5,u,1> + 2668462182U, // <5,1,6,0>: Cost 3 vsldoi4 , LHS + 2290746002U, // <5,1,6,1>: Cost 3 vmrglw <1,4,5,6>, <0,u,1,1> + 2302691478U, // <5,1,6,2>: Cost 3 vmrglw <3,4,5,6>, <3,0,1,2> + 3364488071U, // <5,1,6,3>: Cost 4 vmrglw <1,4,5,6>, <1,2,1,3> + 2302689536U, // <5,1,6,4>: Cost 3 vmrglw <3,4,5,6>, <0,3,1,4> + 2754233587U, // <5,1,6,5>: Cost 3 vsldoi12 <0,4,1,5>, <1,6,5,7> + 2713932600U, // <5,1,6,6>: Cost 3 vsldoi8 <4,u,5,1>, <6,6,6,6> + 2713932622U, // <5,1,6,7>: Cost 3 vsldoi8 <4,u,5,1>, <6,7,0,1> + 2302689297U, // <5,1,6,u>: Cost 3 vmrglw <3,4,5,6>, <0,0,1,u> + 2713932794U, // <5,1,7,0>: Cost 3 vsldoi8 <4,u,5,1>, <7,0,1,2> + 3365822474U, // <5,1,7,1>: Cost 4 vmrglw <1,6,5,7>, <0,0,1,1> + 3365824662U, // <5,1,7,2>: Cost 4 vmrglw <1,6,5,7>, <3,0,1,2> + 3787674851U, // <5,1,7,3>: Cost 4 vsldoi8 <4,u,5,1>, <7,3,0,1> + 2713933158U, // <5,1,7,4>: Cost 3 vsldoi8 <4,u,5,1>, <7,4,5,6> + 2292080978U, // <5,1,7,5>: Cost 3 vmrglw <1,6,5,7>, <0,4,1,5> + 3365823613U, // <5,1,7,6>: Cost 4 vmrglw <1,6,5,7>, <1,5,1,6> + 2713933420U, // <5,1,7,7>: Cost 3 vsldoi8 <4,u,5,1>, <7,7,7,7> + 2713933442U, // <5,1,7,u>: Cost 3 vsldoi8 <4,u,5,1>, <7,u,1,2> + 1658771190U, // <5,1,u,0>: Cost 2 vsldoi8 , + 1640191790U, // <5,1,u,1>: Cost 2 vsldoi8 <4,u,5,1>, LHS + 2762933624U, // <5,1,u,2>: Cost 3 vsldoi12 <1,u,2,5>, <1,u,2,5> + 2754233724U, // <5,1,u,3>: Cost 3 vsldoi12 <0,4,1,5>, <1,u,3,0> + 2763081098U, // <5,1,u,4>: Cost 3 vsldoi12 <1,u,4,5>, <1,u,4,5> + 1640192154U, // <5,1,u,5>: Cost 2 vsldoi8 <4,u,5,1>, RHS + 2713934032U, // <5,1,u,6>: Cost 3 vsldoi8 <4,u,5,1>, + 2713934080U, // <5,1,u,7>: Cost 3 vsldoi8 <4,u,5,1>, + 1640192357U, // <5,1,u,u>: Cost 2 vsldoi8 <4,u,5,1>, LHS + 3779051520U, // <5,2,0,0>: Cost 4 vsldoi8 <3,4,5,2>, <0,0,0,0> + 2705309798U, // <5,2,0,1>: Cost 3 vsldoi8 <3,4,5,2>, LHS + 3838813637U, // <5,2,0,2>: Cost 4 vsldoi12 <2,2,4,5>, <2,0,2,1> + 2302640230U, // <5,2,0,3>: Cost 3 vmrglw <3,4,5,0>, LHS + 3765117266U, // <5,2,0,4>: Cost 4 vsldoi8 <1,1,5,2>, <0,4,1,5> + 3381027892U, // <5,2,0,5>: Cost 4 vmrglw <4,2,5,0>, <1,4,2,5> + 3842794985U, // <5,2,0,6>: Cost 4 vsldoi12 <2,u,4,5>, <2,0,6,1> + 3408232554U, // <5,2,0,7>: Cost 4 vmrglw , <0,1,2,7> + 2302640235U, // <5,2,0,u>: Cost 3 vmrglw <3,4,5,0>, LHS + 3700432998U, // <5,2,1,0>: Cost 4 vsldoi4 <1,5,2,1>, LHS + 3765117785U, // <5,2,1,1>: Cost 4 vsldoi8 <1,1,5,2>, <1,1,5,2> + 2311276136U, // <5,2,1,2>: Cost 3 vmrglw <4,u,5,1>, <2,2,2,2> + 1237532774U, // <5,2,1,3>: Cost 2 vmrglw <4,u,5,1>, LHS + 3700436278U, // <5,2,1,4>: Cost 4 vsldoi4 <1,5,2,1>, RHS + 3381036084U, // <5,2,1,5>: Cost 4 vmrglw <4,2,5,1>, <1,4,2,5> + 3385018045U, // <5,2,1,6>: Cost 4 vmrglw <4,u,5,1>, <2,3,2,6> + 3385017560U, // <5,2,1,7>: Cost 4 vmrglw <4,u,5,1>, <1,6,2,7> + 1237532779U, // <5,2,1,u>: Cost 2 vmrglw <4,u,5,1>, LHS + 3700441190U, // <5,2,2,0>: Cost 4 vsldoi4 <1,5,2,2>, LHS + 3700442242U, // <5,2,2,1>: Cost 4 vsldoi4 <1,5,2,2>, <1,5,2,2> + 2754233960U, // <5,2,2,2>: Cost 3 vsldoi12 <0,4,1,5>, <2,2,2,2> + 2754233970U, // <5,2,2,3>: Cost 3 vsldoi12 <0,4,1,5>, <2,2,3,3> + 2765071997U, // <5,2,2,4>: Cost 3 vsldoi12 <2,2,4,5>, <2,2,4,5> + 3834021508U, // <5,2,2,5>: Cost 4 vsldoi12 <1,4,2,5>, <2,2,5,3> + 3842795152U, // <5,2,2,6>: Cost 4 vsldoi12 <2,u,4,5>, <2,2,6,6> + 3376402492U, // <5,2,2,7>: Cost 4 vmrglw <3,4,5,2>, <5,6,2,7> + 2754234015U, // <5,2,2,u>: Cost 3 vsldoi12 <0,4,1,5>, <2,2,u,3> + 2754234022U, // <5,2,3,0>: Cost 3 vsldoi12 <0,4,1,5>, <2,3,0,1> + 3827975855U, // <5,2,3,1>: Cost 4 vsldoi12 <0,4,1,5>, <2,3,1,1> + 2644625102U, // <5,2,3,2>: Cost 3 vsldoi4 <4,5,2,3>, <2,3,4,5> + 2289393766U, // <5,2,3,3>: Cost 3 vmrglw <1,2,5,3>, LHS + 1691993806U, // <5,2,3,4>: Cost 2 vsldoi12 <2,3,4,5>, <2,3,4,5> + 2785052375U, // <5,2,3,5>: Cost 3 vsldoi12 <5,5,5,5>, <2,3,5,5> + 3854812897U, // <5,2,3,6>: Cost 4 vsldoi12 <4,u,5,5>, <2,3,6,6> + 3802942187U, // <5,2,3,7>: Cost 4 vsldoi8 <7,4,5,2>, <3,7,4,5> + 1692288754U, // <5,2,3,u>: Cost 2 vsldoi12 <2,3,u,5>, <2,3,u,5> + 3839846139U, // <5,2,4,0>: Cost 4 vsldoi12 <2,4,0,5>, <2,4,0,5> + 2709294052U, // <5,2,4,1>: Cost 3 vsldoi8 <4,1,5,2>, <4,1,5,2> + 2766251789U, // <5,2,4,2>: Cost 3 vsldoi12 <2,4,2,5>, <2,4,2,5> + 2765735702U, // <5,2,4,3>: Cost 3 vsldoi12 <2,3,4,5>, <2,4,3,5> + 3840141087U, // <5,2,4,4>: Cost 4 vsldoi12 <2,4,4,5>, <2,4,4,5> + 2705313078U, // <5,2,4,5>: Cost 3 vsldoi8 <3,4,5,2>, RHS + 2712612217U, // <5,2,4,6>: Cost 3 vsldoi8 <4,6,5,2>, <4,6,5,2> + 3787017674U, // <5,2,4,7>: Cost 4 vsldoi8 <4,7,5,2>, <4,7,5,2> + 2765735747U, // <5,2,4,u>: Cost 3 vsldoi12 <2,3,4,5>, <2,4,u,5> + 3834021704U, // <5,2,5,0>: Cost 4 vsldoi12 <1,4,2,5>, <2,5,0,1> + 3834021714U, // <5,2,5,1>: Cost 4 vsldoi12 <1,4,2,5>, <2,5,1,2> + 2311308904U, // <5,2,5,2>: Cost 3 vmrglw <4,u,5,5>, <2,2,2,2> + 1237565542U, // <5,2,5,3>: Cost 2 vmrglw <4,u,5,5>, LHS + 3834021744U, // <5,2,5,4>: Cost 4 vsldoi12 <1,4,2,5>, <2,5,4,5> + 3369124916U, // <5,2,5,5>: Cost 4 vmrglw <2,2,5,5>, <1,4,2,5> + 2248181690U, // <5,2,5,6>: Cost 3 vmrghw <5,5,5,5>, <2,6,3,7> + 3786354825U, // <5,2,5,7>: Cost 4 vsldoi8 <4,6,5,2>, <5,7,2,3> + 1237565547U, // <5,2,5,u>: Cost 2 vmrglw <4,u,5,5>, LHS + 3700473958U, // <5,2,6,0>: Cost 4 vsldoi4 <1,5,2,6>, LHS + 3700475014U, // <5,2,6,1>: Cost 4 vsldoi4 <1,5,2,6>, <1,5,2,6> + 2296718952U, // <5,2,6,2>: Cost 3 vmrglw <2,4,5,6>, <2,2,2,2> + 1228947558U, // <5,2,6,3>: Cost 2 vmrglw <3,4,5,6>, LHS + 3700477238U, // <5,2,6,4>: Cost 4 vsldoi4 <1,5,2,6>, RHS + 3834021836U, // <5,2,6,5>: Cost 4 vsldoi12 <1,4,2,5>, <2,6,5,7> + 2248951738U, // <5,2,6,6>: Cost 3 vmrghw <5,6,7,0>, <2,6,3,7> + 3370461105U, // <5,2,6,7>: Cost 4 vmrglw <2,4,5,6>, <2,6,2,7> + 1228947563U, // <5,2,6,u>: Cost 2 vmrglw <3,4,5,6>, LHS + 3786355706U, // <5,2,7,0>: Cost 4 vsldoi8 <4,6,5,2>, <7,0,1,2> + 3783038037U, // <5,2,7,1>: Cost 4 vsldoi8 <4,1,5,2>, <7,1,2,3> + 3365824104U, // <5,2,7,2>: Cost 4 vmrglw <1,6,5,7>, <2,2,2,2> + 2292080742U, // <5,2,7,3>: Cost 3 vmrglw <1,6,5,7>, LHS + 3842131986U, // <5,2,7,4>: Cost 4 vsldoi12 <2,7,4,5>, <2,7,4,5> + 3371795508U, // <5,2,7,5>: Cost 4 vmrglw <2,6,5,7>, <1,4,2,5> + 3786356206U, // <5,2,7,6>: Cost 4 vsldoi8 <4,6,5,2>, <7,6,2,7> + 3786356332U, // <5,2,7,7>: Cost 4 vsldoi8 <4,6,5,2>, <7,7,7,7> + 2292080747U, // <5,2,7,u>: Cost 3 vmrglw <1,6,5,7>, LHS + 2754234427U, // <5,2,u,0>: Cost 3 vsldoi12 <0,4,1,5>, <2,u,0,1> + 2705315630U, // <5,2,u,1>: Cost 3 vsldoi8 <3,4,5,2>, LHS + 2296735336U, // <5,2,u,2>: Cost 3 vmrglw <2,4,5,u>, <2,2,2,2> + 1228963942U, // <5,2,u,3>: Cost 2 vmrglw <3,4,5,u>, LHS + 1695311971U, // <5,2,u,4>: Cost 2 vsldoi12 <2,u,4,5>, <2,u,4,5> + 2705315994U, // <5,2,u,5>: Cost 3 vsldoi8 <3,4,5,2>, RHS + 2769201269U, // <5,2,u,6>: Cost 3 vsldoi12 <2,u,6,5>, <2,u,6,5> + 3370477489U, // <5,2,u,7>: Cost 4 vmrglw <2,4,5,u>, <2,6,2,7> + 1695606919U, // <5,2,u,u>: Cost 2 vsldoi12 <2,u,u,5>, <2,u,u,5> + 3827976331U, // <5,3,0,0>: Cost 4 vsldoi12 <0,4,1,5>, <3,0,0,0> + 2754234518U, // <5,3,0,1>: Cost 3 vsldoi12 <0,4,1,5>, <3,0,1,2> + 3706472290U, // <5,3,0,2>: Cost 4 vsldoi4 <2,5,3,0>, <2,5,3,0> + 3700500630U, // <5,3,0,3>: Cost 4 vsldoi4 <1,5,3,0>, <3,0,1,2> + 2754234544U, // <5,3,0,4>: Cost 3 vsldoi12 <0,4,1,5>, <3,0,4,1> + 3376383766U, // <5,3,0,5>: Cost 4 vmrglw <3,4,5,0>, <2,4,3,5> + 3769770513U, // <5,3,0,6>: Cost 5 vsldoi8 <1,u,5,3>, <0,6,4,7> + 3376383930U, // <5,3,0,7>: Cost 4 vmrglw <3,4,5,0>, <2,6,3,7> + 2754234581U, // <5,3,0,u>: Cost 3 vsldoi12 <0,4,1,5>, <3,0,u,2> + 2311275414U, // <5,3,1,0>: Cost 3 vmrglw <4,u,5,1>, <1,2,3,0> + 2305967971U, // <5,3,1,1>: Cost 3 vmrglw <4,0,5,1>, <2,5,3,1> + 2692047787U, // <5,3,1,2>: Cost 3 vsldoi8 <1,2,5,3>, <1,2,5,3> + 2311276146U, // <5,3,1,3>: Cost 3 vmrglw <4,u,5,1>, <2,2,3,3> + 2311275418U, // <5,3,1,4>: Cost 3 vmrglw <4,u,5,1>, <1,2,3,4> + 3765789807U, // <5,3,1,5>: Cost 4 vsldoi8 <1,2,5,3>, <1,5,0,1> + 3765789939U, // <5,3,1,6>: Cost 4 vsldoi8 <1,2,5,3>, <1,6,5,7> + 2311276474U, // <5,3,1,7>: Cost 3 vmrglw <4,u,5,1>, <2,6,3,7> + 2696029585U, // <5,3,1,u>: Cost 3 vsldoi8 <1,u,5,3>, <1,u,5,3> + 2311288709U, // <5,3,2,0>: Cost 3 vmrglw <4,u,5,2>, + 3765790243U, // <5,3,2,1>: Cost 4 vsldoi8 <1,2,5,3>, <2,1,3,5> + 3827976513U, // <5,3,2,2>: Cost 4 vsldoi12 <0,4,1,5>, <3,2,2,2> + 2765736268U, // <5,3,2,3>: Cost 3 vsldoi12 <2,3,4,5>, <3,2,3,4> + 2246248962U, // <5,3,2,4>: Cost 3 vmrghw <5,2,6,3>, <3,4,5,6> + 3765790563U, // <5,3,2,5>: Cost 4 vsldoi8 <1,2,5,3>, <2,5,3,1> + 3827976550U, // <5,3,2,6>: Cost 4 vsldoi12 <0,4,1,5>, <3,2,6,3> + 3842795887U, // <5,3,2,7>: Cost 4 vsldoi12 <2,u,4,5>, <3,2,7,3> + 2769054073U, // <5,3,2,u>: Cost 3 vsldoi12 <2,u,4,5>, <3,2,u,4> + 3827976575U, // <5,3,3,0>: Cost 4 vsldoi12 <0,4,1,5>, <3,3,0,1> + 3765790963U, // <5,3,3,1>: Cost 4 vsldoi8 <1,2,5,3>, <3,1,2,5> + 3839478162U, // <5,3,3,2>: Cost 4 vsldoi12 <2,3,4,5>, <3,3,2,2> + 2754234780U, // <5,3,3,3>: Cost 3 vsldoi12 <0,4,1,5>, <3,3,3,3> + 2771708327U, // <5,3,3,4>: Cost 3 vsldoi12 <3,3,4,5>, <3,3,4,5> + 3363137059U, // <5,3,3,5>: Cost 4 vmrglw <1,2,5,3>, <2,1,3,5> + 3375081320U, // <5,3,3,6>: Cost 4 vmrglw <3,2,5,3>, <2,5,3,6> + 3363137466U, // <5,3,3,7>: Cost 4 vmrglw <1,2,5,3>, <2,6,3,7> + 2772003275U, // <5,3,3,u>: Cost 3 vsldoi12 <3,3,u,5>, <3,3,u,5> + 2772077012U, // <5,3,4,0>: Cost 3 vsldoi12 <3,4,0,5>, <3,4,0,5> + 3765791714U, // <5,3,4,1>: Cost 4 vsldoi8 <1,2,5,3>, <4,1,5,0> + 2709965878U, // <5,3,4,2>: Cost 3 vsldoi8 <4,2,5,3>, <4,2,5,3> + 2772298223U, // <5,3,4,3>: Cost 3 vsldoi12 <3,4,3,5>, <3,4,3,5> + 2772371960U, // <5,3,4,4>: Cost 3 vsldoi12 <3,4,4,5>, <3,4,4,5> + 2754234882U, // <5,3,4,5>: Cost 3 vsldoi12 <0,4,1,5>, <3,4,5,6> + 3839478282U, // <5,3,4,6>: Cost 4 vsldoi12 <2,3,4,5>, <3,4,6,5> + 3376416698U, // <5,3,4,7>: Cost 4 vmrglw <3,4,5,4>, <2,6,3,7> + 2754234909U, // <5,3,4,u>: Cost 3 vsldoi12 <0,4,1,5>, <3,4,u,6> + 2311308182U, // <5,3,5,0>: Cost 3 vmrglw <4,u,5,5>, <1,2,3,0> + 3765792421U, // <5,3,5,1>: Cost 4 vsldoi8 <1,2,5,3>, <5,1,2,5> + 2715938575U, // <5,3,5,2>: Cost 3 vsldoi8 <5,2,5,3>, <5,2,5,3> + 2311308914U, // <5,3,5,3>: Cost 3 vmrglw <4,u,5,5>, <2,2,3,3> + 2311308186U, // <5,3,5,4>: Cost 3 vmrglw <4,u,5,5>, <1,2,3,4> + 2248182354U, // <5,3,5,5>: Cost 3 vmrghw <5,5,5,5>, <3,5,5,5> + 3765792837U, // <5,3,5,6>: Cost 4 vsldoi8 <1,2,5,3>, <5,6,3,7> + 2311309242U, // <5,3,5,7>: Cost 3 vmrglw <4,u,5,5>, <2,6,3,7> + 2311308190U, // <5,3,5,u>: Cost 3 vmrglw <4,u,5,5>, <1,2,3,u> + 2632777830U, // <5,3,6,0>: Cost 3 vsldoi4 <2,5,3,6>, LHS + 3706520372U, // <5,3,6,1>: Cost 4 vsldoi4 <2,5,3,6>, <1,1,1,1> + 2632779624U, // <5,3,6,2>: Cost 3 vsldoi4 <2,5,3,6>, <2,5,3,6> + 2632780290U, // <5,3,6,3>: Cost 3 vsldoi4 <2,5,3,6>, <3,4,5,6> + 2632781110U, // <5,3,6,4>: Cost 3 vsldoi4 <2,5,3,6>, RHS + 2248952413U, // <5,3,6,5>: Cost 3 vmrghw <5,6,7,0>, <3,5,6,7> + 2302691176U, // <5,3,6,6>: Cost 3 vmrglw <3,4,5,6>, <2,5,3,6> + 2302691258U, // <5,3,6,7>: Cost 3 vmrglw <3,4,5,6>, <2,6,3,7> + 2632783662U, // <5,3,6,u>: Cost 3 vsldoi4 <2,5,3,6>, LHS + 3365823382U, // <5,3,7,0>: Cost 4 vmrglw <1,6,5,7>, <1,2,3,0> + 3706529011U, // <5,3,7,1>: Cost 4 vsldoi4 <2,5,3,7>, <1,6,5,7> + 3706529641U, // <5,3,7,2>: Cost 4 vsldoi4 <2,5,3,7>, <2,5,3,7> + 3365824114U, // <5,3,7,3>: Cost 4 vmrglw <1,6,5,7>, <2,2,3,3> + 2774362859U, // <5,3,7,4>: Cost 3 vsldoi12 <3,7,4,5>, <3,7,4,5> + 3365824035U, // <5,3,7,5>: Cost 4 vmrglw <1,6,5,7>, <2,1,3,5> + 3383740183U, // <5,3,7,6>: Cost 4 vmrglw <4,6,5,7>, <2,4,3,6> + 3363833786U, // <5,3,7,7>: Cost 4 vmrglw <1,3,5,7>, <2,6,3,7> + 2774657807U, // <5,3,7,u>: Cost 3 vsldoi12 <3,7,u,5>, <3,7,u,5> + 2632794214U, // <5,3,u,0>: Cost 3 vsldoi4 <2,5,3,u>, LHS + 2754235166U, // <5,3,u,1>: Cost 3 vsldoi12 <0,4,1,5>, <3,u,1,2> + 2632796010U, // <5,3,u,2>: Cost 3 vsldoi4 <2,5,3,u>, <2,5,3,u> + 2632796676U, // <5,3,u,3>: Cost 3 vsldoi4 <2,5,3,u>, <3,4,5,u> + 2632797494U, // <5,3,u,4>: Cost 3 vsldoi4 <2,5,3,u>, RHS + 2754235206U, // <5,3,u,5>: Cost 3 vsldoi12 <0,4,1,5>, <3,u,5,6> + 2302691176U, // <5,3,u,6>: Cost 3 vmrglw <3,4,5,6>, <2,5,3,6> + 2302707642U, // <5,3,u,7>: Cost 3 vmrglw <3,4,5,u>, <2,6,3,7> + 2754235229U, // <5,3,u,u>: Cost 3 vsldoi12 <0,4,1,5>, <3,u,u,2> + 3765133325U, // <5,4,0,0>: Cost 4 vsldoi8 <1,1,5,4>, <0,0,1,4> + 2705326182U, // <5,4,0,1>: Cost 3 vsldoi8 <3,4,5,4>, LHS + 3718489806U, // <5,4,0,2>: Cost 4 vsldoi4 <4,5,4,0>, <2,3,4,5> + 3718490624U, // <5,4,0,3>: Cost 4 vsldoi4 <4,5,4,0>, <3,4,5,4> + 2709307730U, // <5,4,0,4>: Cost 3 vsldoi8 <4,1,5,4>, <0,4,1,5> + 2302641870U, // <5,4,0,5>: Cost 3 vmrglw <3,4,5,0>, <2,3,4,5> + 3376383695U, // <5,4,0,6>: Cost 5 vmrglw <3,4,5,0>, <2,3,4,6> + 3384351018U, // <5,4,0,7>: Cost 4 vmrglw <4,7,5,0>, + 2705326749U, // <5,4,0,u>: Cost 3 vsldoi8 <3,4,5,4>, LHS + 2305971057U, // <5,4,1,0>: Cost 3 vmrglw <4,0,5,1>, <6,7,4,0> + 3765134171U, // <5,4,1,1>: Cost 4 vsldoi8 <1,1,5,4>, <1,1,5,4> + 3766461338U, // <5,4,1,2>: Cost 4 vsldoi8 <1,3,5,4>, <1,2,3,4> + 3766461437U, // <5,4,1,3>: Cost 4 vsldoi8 <1,3,5,4>, <1,3,5,4> + 2311277776U, // <5,4,1,4>: Cost 3 vmrglw <4,u,5,1>, <4,4,4,4> + 2754235362U, // <5,4,1,5>: Cost 3 vsldoi12 <0,4,1,5>, <4,1,5,0> + 3783050483U, // <5,4,1,6>: Cost 4 vsldoi8 <4,1,5,4>, <1,6,5,7> + 3385019036U, // <5,4,1,7>: Cost 4 vmrglw <4,u,5,1>, <3,6,4,7> + 2311276241U, // <5,4,1,u>: Cost 3 vmrglw <4,u,5,1>, <2,3,4,u> + 3718504550U, // <5,4,2,0>: Cost 4 vsldoi4 <4,5,4,2>, LHS + 3783050787U, // <5,4,2,1>: Cost 4 vsldoi8 <4,1,5,4>, <2,1,3,5> + 3773097576U, // <5,4,2,2>: Cost 4 vsldoi8 <2,4,5,4>, <2,2,2,2> + 2705327822U, // <5,4,2,3>: Cost 3 vsldoi8 <3,4,5,4>, <2,3,4,5> + 3773097767U, // <5,4,2,4>: Cost 4 vsldoi8 <2,4,5,4>, <2,4,5,4> + 2765737014U, // <5,4,2,5>: Cost 3 vsldoi12 <2,3,4,5>, <4,2,5,3> + 3779069882U, // <5,4,2,6>: Cost 4 vsldoi8 <3,4,5,4>, <2,6,3,7> + 3376401052U, // <5,4,2,7>: Cost 5 vmrglw <3,4,5,2>, <3,6,4,7> + 2245881370U, // <5,4,2,u>: Cost 3 vmrghw <5,2,1,3>, <4,u,5,1> + 3779070102U, // <5,4,3,0>: Cost 4 vsldoi8 <3,4,5,4>, <3,0,1,2> + 3363135525U, // <5,4,3,1>: Cost 4 vmrglw <1,2,5,3>, <0,0,4,1> + 3779070284U, // <5,4,3,2>: Cost 4 vsldoi8 <3,4,5,4>, <3,2,3,4> + 3779070364U, // <5,4,3,3>: Cost 4 vsldoi8 <3,4,5,4>, <3,3,3,3> + 2705328640U, // <5,4,3,4>: Cost 3 vsldoi8 <3,4,5,4>, <3,4,5,4> + 2307311310U, // <5,4,3,5>: Cost 3 vmrglw <4,2,5,3>, <2,3,4,5> + 3866021012U, // <5,4,3,6>: Cost 4 vsldoi12 <6,7,4,5>, <4,3,6,7> + 3363138204U, // <5,4,3,7>: Cost 5 vmrglw <1,2,5,3>, <3,6,4,7> + 2707983172U, // <5,4,3,u>: Cost 3 vsldoi8 <3,u,5,4>, <3,u,5,4> + 2708646805U, // <5,4,4,0>: Cost 3 vsldoi8 <4,0,5,4>, <4,0,5,4> + 2709310438U, // <5,4,4,1>: Cost 3 vsldoi8 <4,1,5,4>, <4,1,5,4> + 3779071030U, // <5,4,4,2>: Cost 4 vsldoi8 <3,4,5,4>, <4,2,5,3> + 2710637704U, // <5,4,4,3>: Cost 3 vsldoi8 <4,3,5,4>, <4,3,5,4> + 2754235600U, // <5,4,4,4>: Cost 3 vsldoi12 <0,4,1,5>, <4,4,4,4> + 1704676570U, // <5,4,4,5>: Cost 2 vsldoi12 <4,4,5,5>, <4,4,5,5> + 3779071358U, // <5,4,4,6>: Cost 4 vsldoi8 <3,4,5,4>, <4,6,5,7> + 2713292236U, // <5,4,4,7>: Cost 3 vsldoi8 <4,7,5,4>, <4,7,5,4> + 1704897781U, // <5,4,4,u>: Cost 2 vsldoi12 <4,4,u,5>, <4,4,u,5> + 2626871398U, // <5,4,5,0>: Cost 3 vsldoi4 <1,5,4,5>, LHS + 2626872471U, // <5,4,5,1>: Cost 3 vsldoi4 <1,5,4,5>, <1,5,4,5> + 2765737230U, // <5,4,5,2>: Cost 3 vsldoi12 <2,3,4,5>, <4,5,2,3> + 3700615318U, // <5,4,5,3>: Cost 4 vsldoi4 <1,5,4,5>, <3,0,1,2> + 2626874678U, // <5,4,5,4>: Cost 3 vsldoi4 <1,5,4,5>, RHS + 1174441270U, // <5,4,5,5>: Cost 2 vmrghw <5,5,5,5>, RHS + 1680493878U, // <5,4,5,6>: Cost 2 vsldoi12 <0,4,1,5>, RHS + 3385051804U, // <5,4,5,7>: Cost 4 vmrglw <4,u,5,5>, <3,6,4,7> + 1680493896U, // <5,4,5,u>: Cost 2 vsldoi12 <0,4,1,5>, RHS + 2248952722U, // <5,4,6,0>: Cost 3 vmrghw <5,6,7,0>, <4,0,5,1> + 2302692152U, // <5,4,6,1>: Cost 3 vmrglw <3,4,5,6>, <3,u,4,1> + 3382406107U, // <5,4,6,2>: Cost 4 vmrglw <4,4,5,6>, <4,1,4,2> + 3700623874U, // <5,4,6,3>: Cost 4 vsldoi4 <1,5,4,6>, <3,4,5,6> + 2248953040U, // <5,4,6,4>: Cost 3 vmrghw <5,6,7,0>, <4,4,4,4> + 1175211318U, // <5,4,6,5>: Cost 2 vmrghw <5,6,7,0>, RHS + 3376432280U, // <5,4,6,6>: Cost 4 vmrglw <3,4,5,6>, <1,5,4,6> + 2729218934U, // <5,4,6,7>: Cost 3 vsldoi8 <7,4,5,4>, <6,7,4,5> + 1175211561U, // <5,4,6,u>: Cost 2 vmrghw <5,6,7,0>, RHS + 3787035642U, // <5,4,7,0>: Cost 4 vsldoi8 <4,7,5,4>, <7,0,1,2> + 3365822501U, // <5,4,7,1>: Cost 4 vmrglw <1,6,5,7>, <0,0,4,1> + 3808933085U, // <5,4,7,2>: Cost 4 vsldoi8 , <7,2,u,4> + 3784381707U, // <5,4,7,3>: Cost 4 vsldoi8 <4,3,5,4>, <7,3,4,5> + 2713294182U, // <5,4,7,4>: Cost 3 vsldoi8 <4,7,5,4>, <7,4,5,6> + 2309998286U, // <5,4,7,5>: Cost 3 vmrglw <4,6,5,7>, <2,3,4,5> + 3383740111U, // <5,4,7,6>: Cost 4 vmrglw <4,6,5,7>, <2,3,4,6> + 3787036239U, // <5,4,7,7>: Cost 4 vsldoi8 <4,7,5,4>, <7,7,4,5> + 2731873960U, // <5,4,7,u>: Cost 3 vsldoi8 <7,u,5,4>, <7,u,5,4> + 2626895974U, // <5,4,u,0>: Cost 3 vsldoi4 <1,5,4,u>, LHS + 2626897050U, // <5,4,u,1>: Cost 3 vsldoi4 <1,5,4,u>, <1,5,4,u> + 2644813518U, // <5,4,u,2>: Cost 3 vsldoi4 <4,5,4,u>, <2,3,4,5> + 2705327822U, // <5,4,u,3>: Cost 3 vsldoi8 <3,4,5,4>, <2,3,4,5> + 2626899254U, // <5,4,u,4>: Cost 3 vsldoi4 <1,5,4,u>, RHS + 1707331102U, // <5,4,u,5>: Cost 2 vsldoi12 <4,u,5,5>, <4,u,5,5> + 1680494121U, // <5,4,u,6>: Cost 2 vsldoi12 <0,4,1,5>, RHS + 2737183024U, // <5,4,u,7>: Cost 3 vsldoi8 , + 1680494139U, // <5,4,u,u>: Cost 2 vsldoi12 <0,4,1,5>, RHS + 2302642684U, // <5,5,0,0>: Cost 3 vmrglw <3,4,5,0>, <3,4,5,0> + 1640218726U, // <5,5,0,1>: Cost 2 vsldoi8 <4,u,5,5>, LHS + 3376384510U, // <5,5,0,2>: Cost 4 vmrglw <3,4,5,0>, <3,4,5,2> + 3376385078U, // <5,5,0,3>: Cost 4 vmrglw <3,4,5,0>, <4,2,5,3> + 2754236002U, // <5,5,0,4>: Cost 3 vsldoi12 <0,4,1,5>, <5,0,4,1> + 2717942242U, // <5,5,0,5>: Cost 3 vsldoi8 <5,5,5,5>, <0,5,u,5> + 2244907106U, // <5,5,0,6>: Cost 3 vmrghw <5,0,6,1>, <5,6,7,0> + 3376385406U, // <5,5,0,7>: Cost 4 vmrglw <3,4,5,0>, <4,6,5,7> + 1640219293U, // <5,5,0,u>: Cost 2 vsldoi8 <4,u,5,5>, LHS + 2305969365U, // <5,5,1,0>: Cost 3 vmrglw <4,0,5,1>, <4,4,5,0> + 1237536282U, // <5,5,1,1>: Cost 2 vmrglw <4,u,5,1>, <4,u,5,1> + 2713961366U, // <5,5,1,2>: Cost 3 vsldoi8 <4,u,5,5>, <1,2,3,0> + 3766469630U, // <5,5,1,3>: Cost 4 vsldoi8 <1,3,5,5>, <1,3,5,5> + 2782326455U, // <5,5,1,4>: Cost 3 vsldoi12 <5,1,4,5>, <5,1,4,5> + 2311277786U, // <5,5,1,5>: Cost 3 vmrglw <4,u,5,1>, <4,4,5,5> + 2311277058U, // <5,5,1,6>: Cost 3 vmrglw <4,u,5,1>, <3,4,5,6> + 3385017587U, // <5,5,1,7>: Cost 4 vmrglw <4,u,5,1>, <1,6,5,7> + 1237536282U, // <5,5,1,u>: Cost 2 vmrglw <4,u,5,1>, <4,u,5,1> + 3376400892U, // <5,5,2,0>: Cost 4 vmrglw <3,4,5,2>, <3,4,5,0> + 3827977963U, // <5,5,2,1>: Cost 4 vsldoi12 <0,4,1,5>, <5,2,1,3> + 2302659070U, // <5,5,2,2>: Cost 3 vmrglw <3,4,5,2>, <3,4,5,2> + 2765737726U, // <5,5,2,3>: Cost 3 vsldoi12 <2,3,4,5>, <5,2,3,4> + 3839479558U, // <5,5,2,4>: Cost 4 vsldoi12 <2,3,4,5>, <5,2,4,3> + 2781073167U, // <5,5,2,5>: Cost 3 vsldoi12 <4,u,5,5>, <5,2,5,3> + 2713962426U, // <5,5,2,6>: Cost 3 vsldoi8 <4,u,5,5>, <2,6,3,7> + 3376401790U, // <5,5,2,7>: Cost 4 vmrglw <3,4,5,2>, <4,6,5,7> + 2769055531U, // <5,5,2,u>: Cost 3 vsldoi12 <2,u,4,5>, <5,2,u,4> + 2713962646U, // <5,5,3,0>: Cost 3 vsldoi8 <4,u,5,5>, <3,0,1,2> + 3765143786U, // <5,5,3,1>: Cost 4 vsldoi8 <1,1,5,5>, <3,1,1,5> + 3839479621U, // <5,5,3,2>: Cost 4 vsldoi12 <2,3,4,5>, <5,3,2,3> + 2289394603U, // <5,5,3,3>: Cost 3 vmrglw <1,2,5,3>, <1,2,5,3> + 2713963010U, // <5,5,3,4>: Cost 3 vsldoi8 <4,u,5,5>, <3,4,5,6> + 2313285150U, // <5,5,3,5>: Cost 3 vmrglw <5,2,5,3>, <4,u,5,5> + 3363138050U, // <5,5,3,6>: Cost 4 vmrglw <1,2,5,3>, <3,4,5,6> + 3363136755U, // <5,5,3,7>: Cost 4 vmrglw <1,2,5,3>, <1,6,5,7> + 2713963294U, // <5,5,3,u>: Cost 3 vsldoi8 <4,u,5,5>, <3,u,1,2> + 2713963410U, // <5,5,4,0>: Cost 3 vsldoi8 <4,u,5,5>, <4,0,5,1> + 3827978127U, // <5,5,4,1>: Cost 4 vsldoi12 <0,4,1,5>, <5,4,1,5> + 3839479704U, // <5,5,4,2>: Cost 4 vsldoi12 <2,3,4,5>, <5,4,2,5> + 3376417846U, // <5,5,4,3>: Cost 4 vmrglw <3,4,5,4>, <4,2,5,3> + 1637567706U, // <5,5,4,4>: Cost 2 vsldoi8 <4,4,5,5>, <4,4,5,5> + 1640222006U, // <5,5,4,5>: Cost 2 vsldoi8 <4,u,5,5>, RHS + 2310640998U, // <5,5,4,6>: Cost 3 vmrglw <4,7,5,4>, <7,4,5,6> + 3376418174U, // <5,5,4,7>: Cost 4 vmrglw <3,4,5,4>, <4,6,5,7> + 1640222238U, // <5,5,4,u>: Cost 2 vsldoi8 <4,u,5,5>, <4,u,5,5> + 1577091174U, // <5,5,5,0>: Cost 2 vsldoi4 <5,5,5,5>, LHS + 2311310226U, // <5,5,5,1>: Cost 3 vmrglw <4,u,5,5>, <4,0,5,1> + 2713964303U, // <5,5,5,2>: Cost 3 vsldoi8 <4,u,5,5>, <5,2,5,3> + 2311311119U, // <5,5,5,3>: Cost 3 vmrglw <4,u,5,5>, <5,2,5,3> + 1577094454U, // <5,5,5,4>: Cost 2 vsldoi4 <5,5,5,5>, RHS + 296144182U, // <5,5,5,5>: Cost 1 vspltisw1 RHS + 2311309826U, // <5,5,5,6>: Cost 3 vmrglw <4,u,5,5>, <3,4,5,6> + 2311311447U, // <5,5,5,7>: Cost 3 vmrglw <4,u,5,5>, <5,6,5,7> + 296144182U, // <5,5,5,u>: Cost 1 vspltisw1 RHS + 2248953460U, // <5,5,6,0>: Cost 3 vmrghw <5,6,7,0>, <5,0,6,1> + 2326580114U, // <5,5,6,1>: Cost 3 vmrglw <7,4,5,6>, <4,0,5,1> + 2713965050U, // <5,5,6,2>: Cost 3 vsldoi8 <4,u,5,5>, <6,2,7,3> + 3700697602U, // <5,5,6,3>: Cost 4 vsldoi4 <1,5,5,6>, <3,4,5,6> + 2785644620U, // <5,5,6,4>: Cost 3 vsldoi12 <5,6,4,5>, <5,6,4,5> + 2781073495U, // <5,5,6,5>: Cost 3 vsldoi12 <4,u,5,5>, <5,6,5,7> + 1228950018U, // <5,5,6,6>: Cost 2 vmrglw <3,4,5,6>, <3,4,5,6> + 2713965390U, // <5,5,6,7>: Cost 3 vsldoi8 <4,u,5,5>, <6,7,0,1> + 1228950018U, // <5,5,6,u>: Cost 2 vmrglw <3,4,5,6>, <3,4,5,6> + 2713965562U, // <5,5,7,0>: Cost 3 vsldoi8 <4,u,5,5>, <7,0,1,2> + 3383741330U, // <5,5,7,1>: Cost 4 vmrglw <4,6,5,7>, <4,0,5,1> + 3718620878U, // <5,5,7,2>: Cost 4 vsldoi4 <4,5,5,7>, <2,3,4,5> + 3365823403U, // <5,5,7,3>: Cost 4 vmrglw <1,6,5,7>, <1,2,5,3> + 2713965926U, // <5,5,7,4>: Cost 3 vsldoi8 <4,u,5,5>, <7,4,5,6> + 2717947318U, // <5,5,7,5>: Cost 3 vsldoi8 <5,5,5,5>, <7,5,5,5> + 3365825026U, // <5,5,7,6>: Cost 4 vmrglw <1,6,5,7>, <3,4,5,6> + 2292081907U, // <5,5,7,7>: Cost 3 vmrglw <1,6,5,7>, <1,6,5,7> + 2713966210U, // <5,5,7,u>: Cost 3 vsldoi8 <4,u,5,5>, <7,u,1,2> + 1577091174U, // <5,5,u,0>: Cost 2 vsldoi4 <5,5,5,5>, LHS + 1640224558U, // <5,5,u,1>: Cost 2 vsldoi8 <4,u,5,5>, LHS + 2713966469U, // <5,5,u,2>: Cost 3 vsldoi8 <4,u,5,5>, + 2713966524U, // <5,5,u,3>: Cost 3 vsldoi8 <4,u,5,5>, + 1577094454U, // <5,5,u,4>: Cost 2 vsldoi4 <5,5,5,5>, RHS + 296144182U, // <5,5,u,5>: Cost 1 vspltisw1 RHS + 1228950018U, // <5,5,u,6>: Cost 2 vmrglw <3,4,5,6>, <3,4,5,6> + 2713966848U, // <5,5,u,7>: Cost 3 vsldoi8 <4,u,5,5>, + 296144182U, // <5,5,u,u>: Cost 1 vspltisw1 RHS + 2705342464U, // <5,6,0,0>: Cost 3 vsldoi8 <3,4,5,6>, <0,0,0,0> + 1631600742U, // <5,6,0,1>: Cost 2 vsldoi8 <3,4,5,6>, LHS + 3773112493U, // <5,6,0,2>: Cost 4 vsldoi8 <2,4,5,6>, <0,2,1,2> + 2705342720U, // <5,6,0,3>: Cost 3 vsldoi8 <3,4,5,6>, <0,3,1,4> + 2705342802U, // <5,6,0,4>: Cost 3 vsldoi8 <3,4,5,6>, <0,4,1,5> + 3779084708U, // <5,6,0,5>: Cost 4 vsldoi8 <3,4,5,6>, <0,5,1,6> + 3779084790U, // <5,6,0,6>: Cost 4 vsldoi8 <3,4,5,6>, <0,6,1,7> + 2302643510U, // <5,6,0,7>: Cost 3 vmrglw <3,4,5,0>, RHS + 1631601309U, // <5,6,0,u>: Cost 2 vsldoi8 <3,4,5,6>, LHS + 3767141092U, // <5,6,1,0>: Cost 4 vsldoi8 <1,4,5,6>, <1,0,1,2> + 2705343284U, // <5,6,1,1>: Cost 3 vsldoi8 <3,4,5,6>, <1,1,1,1> + 2705343382U, // <5,6,1,2>: Cost 3 vsldoi8 <3,4,5,6>, <1,2,3,0> + 3779085282U, // <5,6,1,3>: Cost 4 vsldoi8 <3,4,5,6>, <1,3,2,4> + 2693399632U, // <5,6,1,4>: Cost 3 vsldoi8 <1,4,5,6>, <1,4,5,6> + 3767805089U, // <5,6,1,5>: Cost 4 vsldoi8 <1,5,5,6>, <1,5,5,6> + 2311279416U, // <5,6,1,6>: Cost 3 vmrglw <4,u,5,1>, <6,6,6,6> + 1237536054U, // <5,6,1,7>: Cost 2 vmrglw <4,u,5,1>, RHS + 1237536055U, // <5,6,1,u>: Cost 2 vmrglw <4,u,5,1>, RHS + 3773113789U, // <5,6,2,0>: Cost 4 vsldoi8 <2,4,5,6>, <2,0,1,2> + 3779085855U, // <5,6,2,1>: Cost 4 vsldoi8 <3,4,5,6>, <2,1,3,1> + 2699372136U, // <5,6,2,2>: Cost 3 vsldoi8 <2,4,5,6>, <2,2,2,2> + 2705344166U, // <5,6,2,3>: Cost 3 vsldoi8 <3,4,5,6>, <2,3,0,1> + 2699372329U, // <5,6,2,4>: Cost 3 vsldoi8 <2,4,5,6>, <2,4,5,6> + 2705344360U, // <5,6,2,5>: Cost 3 vsldoi8 <3,4,5,6>, <2,5,3,6> + 2705344442U, // <5,6,2,6>: Cost 3 vsldoi8 <3,4,5,6>, <2,6,3,7> + 2302659894U, // <5,6,2,7>: Cost 3 vmrglw <3,4,5,2>, RHS + 2702026861U, // <5,6,2,u>: Cost 3 vsldoi8 <2,u,5,6>, <2,u,5,6> + 2705344662U, // <5,6,3,0>: Cost 3 vsldoi8 <3,4,5,6>, <3,0,1,2> + 3767142661U, // <5,6,3,1>: Cost 4 vsldoi8 <1,4,5,6>, <3,1,4,5> + 3773114689U, // <5,6,3,2>: Cost 4 vsldoi8 <2,4,5,6>, <3,2,2,2> + 2705344924U, // <5,6,3,3>: Cost 3 vsldoi8 <3,4,5,6>, <3,3,3,3> + 1631603202U, // <5,6,3,4>: Cost 2 vsldoi8 <3,4,5,6>, <3,4,5,6> + 3842945597U, // <5,6,3,5>: Cost 4 vsldoi12 <2,u,6,5>, <6,3,5,7> + 3779086962U, // <5,6,3,6>: Cost 4 vsldoi8 <3,4,5,6>, <3,6,0,1> + 2289397046U, // <5,6,3,7>: Cost 3 vmrglw <1,2,5,3>, RHS + 1634257734U, // <5,6,3,u>: Cost 2 vsldoi8 <3,u,5,6>, <3,u,5,6> + 2644926566U, // <5,6,4,0>: Cost 3 vsldoi4 <4,5,6,4>, LHS + 3779087306U, // <5,6,4,1>: Cost 4 vsldoi8 <3,4,5,6>, <4,1,2,3> + 2790142577U, // <5,6,4,2>: Cost 3 vsldoi12 <6,4,2,5>, <6,4,2,5> + 2644929026U, // <5,6,4,3>: Cost 3 vsldoi4 <4,5,6,4>, <3,4,5,6> + 2711317723U, // <5,6,4,4>: Cost 3 vsldoi8 <4,4,5,6>, <4,4,5,6> + 1631604022U, // <5,6,4,5>: Cost 2 vsldoi8 <3,4,5,6>, RHS + 2712644989U, // <5,6,4,6>: Cost 3 vsldoi8 <4,6,5,6>, <4,6,5,6> + 2302676278U, // <5,6,4,7>: Cost 3 vmrglw <3,4,5,4>, RHS + 1631604265U, // <5,6,4,u>: Cost 2 vsldoi8 <3,4,5,6>, RHS + 3842945708U, // <5,6,5,0>: Cost 4 vsldoi12 <2,u,6,5>, <6,5,0,1> + 3767144133U, // <5,6,5,1>: Cost 4 vsldoi8 <1,4,5,6>, <5,1,6,1> + 2705346328U, // <5,6,5,2>: Cost 3 vsldoi8 <3,4,5,6>, <5,2,6,3> + 3779088207U, // <5,6,5,3>: Cost 4 vsldoi8 <3,4,5,6>, <5,3,3,4> + 2717290420U, // <5,6,5,4>: Cost 3 vsldoi8 <5,4,5,6>, <5,4,5,6> + 2705346574U, // <5,6,5,5>: Cost 3 vsldoi8 <3,4,5,6>, <5,5,6,6> + 2705346596U, // <5,6,5,6>: Cost 3 vsldoi8 <3,4,5,6>, <5,6,0,1> + 1237568822U, // <5,6,5,7>: Cost 2 vmrglw <4,u,5,5>, RHS + 1237568823U, // <5,6,5,u>: Cost 2 vmrglw <4,u,5,5>, RHS + 2650914918U, // <5,6,6,0>: Cost 3 vsldoi4 <5,5,6,6>, LHS + 3364490949U, // <5,6,6,1>: Cost 4 vmrglw <1,4,5,6>, <5,1,6,1> + 2248954362U, // <5,6,6,2>: Cost 3 vmrghw <5,6,7,0>, <6,2,7,3> + 2302693144U, // <5,6,6,3>: Cost 3 vmrglw <3,4,5,6>, <5,2,6,3> + 2650918198U, // <5,6,6,4>: Cost 3 vsldoi4 <5,5,6,6>, RHS + 2650918926U, // <5,6,6,5>: Cost 3 vsldoi4 <5,5,6,6>, <5,5,6,6> + 2302693390U, // <5,6,6,6>: Cost 3 vmrglw <3,4,5,6>, <5,5,6,6> + 1228950838U, // <5,6,6,7>: Cost 2 vmrglw <3,4,5,6>, RHS + 1228950839U, // <5,6,6,u>: Cost 2 vmrglw <3,4,5,6>, RHS + 497467494U, // <5,6,7,0>: Cost 1 vsldoi4 RHS, LHS + 1571210036U, // <5,6,7,1>: Cost 2 vsldoi4 RHS, <1,1,1,1> + 1571210856U, // <5,6,7,2>: Cost 2 vsldoi4 RHS, <2,2,2,2> + 1571211414U, // <5,6,7,3>: Cost 2 vsldoi4 RHS, <3,0,1,2> + 497470774U, // <5,6,7,4>: Cost 1 vsldoi4 RHS, RHS + 1571213316U, // <5,6,7,5>: Cost 2 vsldoi4 RHS, <5,5,5,5> + 1571213818U, // <5,6,7,6>: Cost 2 vsldoi4 RHS, <6,2,7,3> + 1571214956U, // <5,6,7,7>: Cost 2 vsldoi4 RHS, <7,7,7,7> + 497473326U, // <5,6,7,u>: Cost 1 vsldoi4 RHS, LHS + 497475686U, // <5,6,u,0>: Cost 1 vsldoi4 RHS, LHS + 1631606574U, // <5,6,u,1>: Cost 2 vsldoi8 <3,4,5,6>, LHS + 1571219048U, // <5,6,u,2>: Cost 2 vsldoi4 RHS, <2,2,2,2> + 1571219606U, // <5,6,u,3>: Cost 2 vsldoi4 RHS, <3,0,1,2> + 497478967U, // <5,6,u,4>: Cost 1 vsldoi4 RHS, RHS + 1631606938U, // <5,6,u,5>: Cost 2 vsldoi8 <3,4,5,6>, RHS + 1571222010U, // <5,6,u,6>: Cost 2 vsldoi4 RHS, <6,2,7,3> + 1228967222U, // <5,6,u,7>: Cost 2 vmrglw <3,4,5,u>, RHS + 497481518U, // <5,6,u,u>: Cost 1 vsldoi4 RHS, LHS + 3768475648U, // <5,7,0,0>: Cost 4 vsldoi8 <1,6,5,7>, <0,0,0,0> + 2694733926U, // <5,7,0,1>: Cost 3 vsldoi8 <1,6,5,7>, LHS + 3718711395U, // <5,7,0,2>: Cost 4 vsldoi4 <4,5,7,0>, <2,u,4,5> + 3384349178U, // <5,7,0,3>: Cost 4 vmrglw <4,7,5,0>, <6,2,7,3> + 2694734162U, // <5,7,0,4>: Cost 3 vsldoi8 <1,6,5,7>, <0,4,1,5> + 3384347884U, // <5,7,0,5>: Cost 4 vmrglw <4,7,5,0>, <4,4,7,5> + 3730658026U, // <5,7,0,6>: Cost 4 vsldoi4 <6,5,7,0>, <6,5,7,0> + 3718714362U, // <5,7,0,7>: Cost 4 vsldoi4 <4,5,7,0>, <7,0,1,2> + 2694734493U, // <5,7,0,u>: Cost 3 vsldoi8 <1,6,5,7>, LHS + 2311278690U, // <5,7,1,0>: Cost 3 vmrglw <4,u,5,1>, <5,6,7,0> + 2305970923U, // <5,7,1,1>: Cost 3 vmrglw <4,0,5,1>, <6,5,7,1> + 3768476566U, // <5,7,1,2>: Cost 4 vsldoi8 <1,6,5,7>, <1,2,3,0> + 2311279098U, // <5,7,1,3>: Cost 3 vmrglw <4,u,5,1>, <6,2,7,3> + 2311278694U, // <5,7,1,4>: Cost 3 vmrglw <4,u,5,1>, <5,6,7,4> + 3768476783U, // <5,7,1,5>: Cost 4 vsldoi8 <1,6,5,7>, <1,5,0,1> + 2694735091U, // <5,7,1,6>: Cost 3 vsldoi8 <1,6,5,7>, <1,6,5,7> + 2311279426U, // <5,7,1,7>: Cost 3 vmrglw <4,u,5,1>, <6,6,7,7> + 2696062357U, // <5,7,1,u>: Cost 3 vsldoi8 <1,u,5,7>, <1,u,5,7> + 3383701602U, // <5,7,2,0>: Cost 4 vmrglw <4,6,5,2>, <5,6,7,0> + 3768477219U, // <5,7,2,1>: Cost 4 vsldoi8 <1,6,5,7>, <2,1,3,5> + 3768477288U, // <5,7,2,2>: Cost 4 vsldoi8 <1,6,5,7>, <2,2,2,2> + 2309960186U, // <5,7,2,3>: Cost 3 vmrglw <4,6,5,2>, <6,2,7,3> + 3383701606U, // <5,7,2,4>: Cost 4 vmrglw <4,6,5,2>, <5,6,7,4> + 3768477545U, // <5,7,2,5>: Cost 4 vsldoi8 <1,6,5,7>, <2,5,3,7> + 3766486970U, // <5,7,2,6>: Cost 4 vsldoi8 <1,3,5,7>, <2,6,3,7> + 3383702338U, // <5,7,2,7>: Cost 4 vmrglw <4,6,5,2>, <6,6,7,7> + 2309960186U, // <5,7,2,u>: Cost 3 vmrglw <4,6,5,2>, <6,2,7,3> + 3768477846U, // <5,7,3,0>: Cost 4 vsldoi8 <1,6,5,7>, <3,0,1,2> + 3768477975U, // <5,7,3,1>: Cost 4 vsldoi8 <1,6,5,7>, <3,1,6,5> + 3786393932U, // <5,7,3,2>: Cost 4 vsldoi8 <4,6,5,7>, <3,2,3,4> + 3768478108U, // <5,7,3,3>: Cost 4 vsldoi8 <1,6,5,7>, <3,3,3,3> + 2795599115U, // <5,7,3,4>: Cost 3 vsldoi12 <7,3,4,5>, <7,3,4,5> + 3385037470U, // <5,7,3,5>: Cost 4 vmrglw <4,u,5,3>, <6,4,7,5> + 3780422309U, // <5,7,3,6>: Cost 4 vsldoi8 <3,6,5,7>, <3,6,5,7> + 3848107301U, // <5,7,3,7>: Cost 4 vsldoi12 <3,7,4,5>, <7,3,7,4> + 2795894063U, // <5,7,3,u>: Cost 3 vsldoi12 <7,3,u,5>, <7,3,u,5> + 2795967800U, // <5,7,4,0>: Cost 3 vsldoi12 <7,4,0,5>, <7,4,0,5> + 3768478690U, // <5,7,4,1>: Cost 4 vsldoi8 <1,6,5,7>, <4,1,5,0> + 3718744163U, // <5,7,4,2>: Cost 4 vsldoi4 <4,5,7,4>, <2,u,4,5> + 3784404107U, // <5,7,4,3>: Cost 4 vsldoi8 <4,3,5,7>, <4,3,5,7> + 2796262748U, // <5,7,4,4>: Cost 3 vsldoi12 <7,4,4,5>, <7,4,4,5> + 2694737206U, // <5,7,4,5>: Cost 3 vsldoi8 <1,6,5,7>, RHS + 2712653182U, // <5,7,4,6>: Cost 3 vsldoi8 <4,6,5,7>, <4,6,5,7> + 2713316815U, // <5,7,4,7>: Cost 3 vsldoi8 <4,7,5,7>, <4,7,5,7> + 2694737449U, // <5,7,4,u>: Cost 3 vsldoi8 <1,6,5,7>, RHS + 2311311458U, // <5,7,5,0>: Cost 3 vmrglw <4,u,5,5>, <5,6,7,0> + 3768479433U, // <5,7,5,1>: Cost 4 vsldoi8 <1,6,5,7>, <5,1,6,5> + 3768479521U, // <5,7,5,2>: Cost 4 vsldoi8 <1,6,5,7>, <5,2,7,3> + 2311311866U, // <5,7,5,3>: Cost 3 vmrglw <4,u,5,5>, <6,2,7,3> + 2311311462U, // <5,7,5,4>: Cost 3 vmrglw <4,u,5,5>, <5,6,7,4> + 2248185270U, // <5,7,5,5>: Cost 3 vmrghw <5,5,5,5>, <7,5,5,5> + 2718625879U, // <5,7,5,6>: Cost 3 vsldoi8 <5,6,5,7>, <5,6,5,7> + 2311312194U, // <5,7,5,7>: Cost 3 vmrglw <4,u,5,5>, <6,6,7,7> + 2311311466U, // <5,7,5,u>: Cost 3 vmrglw <4,u,5,5>, <5,6,7,u> + 2248954874U, // <5,7,6,0>: Cost 3 vmrghw <5,6,7,0>, <7,0,1,2> + 3322696778U, // <5,7,6,1>: Cost 4 vmrghw <5,6,7,0>, <7,1,1,1> + 2248955028U, // <5,7,6,2>: Cost 3 vmrghw <5,6,7,0>, <7,2,0,3> + 2656963074U, // <5,7,6,3>: Cost 3 vsldoi4 <6,5,7,6>, <3,4,5,6> + 2248955238U, // <5,7,6,4>: Cost 3 vmrghw <5,6,7,0>, <7,4,5,6> + 2248955329U, // <5,7,6,5>: Cost 3 vmrghw <5,6,7,0>, <7,5,6,7> + 2656965360U, // <5,7,6,6>: Cost 3 vsldoi4 <6,5,7,6>, <6,5,7,6> + 2248955500U, // <5,7,6,7>: Cost 3 vmrghw <5,6,7,0>, <7,7,7,7> + 2248955522U, // <5,7,6,u>: Cost 3 vmrghw <5,6,7,0>, <7,u,1,2> + 3718766694U, // <5,7,7,0>: Cost 4 vsldoi4 <4,5,7,7>, LHS + 3724739827U, // <5,7,7,1>: Cost 4 vsldoi4 <5,5,7,7>, <1,6,5,7> + 3718768739U, // <5,7,7,2>: Cost 4 vsldoi4 <4,5,7,7>, <2,u,4,5> + 3365826337U, // <5,7,7,3>: Cost 4 vmrglw <1,6,5,7>, <5,2,7,3> + 2798253647U, // <5,7,7,4>: Cost 3 vsldoi12 <7,7,4,5>, <7,7,4,5> + 3365826258U, // <5,7,7,5>: Cost 4 vmrglw <1,6,5,7>, <5,1,7,5> + 3730715377U, // <5,7,7,6>: Cost 4 vsldoi4 <6,5,7,7>, <6,5,7,7> + 2310665836U, // <5,7,7,7>: Cost 3 vmrglw <4,7,5,7>, <7,7,7,7> + 2798548595U, // <5,7,7,u>: Cost 3 vsldoi12 <7,7,u,5>, <7,7,u,5> + 2311336034U, // <5,7,u,0>: Cost 3 vmrglw <4,u,5,u>, <5,6,7,0> + 2694739758U, // <5,7,u,1>: Cost 3 vsldoi8 <1,6,5,7>, LHS + 2248955028U, // <5,7,u,2>: Cost 3 vmrghw <5,6,7,0>, <7,2,0,3> + 2311336442U, // <5,7,u,3>: Cost 3 vmrglw <4,u,5,u>, <6,2,7,3> + 2311336038U, // <5,7,u,4>: Cost 3 vmrglw <4,u,5,u>, <5,6,7,4> + 2694740122U, // <5,7,u,5>: Cost 3 vsldoi8 <1,6,5,7>, RHS + 2656981746U, // <5,7,u,6>: Cost 3 vsldoi4 <6,5,7,u>, <6,5,7,u> + 2311336770U, // <5,7,u,7>: Cost 3 vmrglw <4,u,5,u>, <6,6,7,7> + 2694740325U, // <5,7,u,u>: Cost 3 vsldoi8 <1,6,5,7>, LHS + 2705358848U, // <5,u,0,0>: Cost 3 vsldoi8 <3,4,5,u>, <0,0,0,0> + 1631617126U, // <5,u,0,1>: Cost 2 vsldoi8 <3,4,5,u>, LHS + 2310607866U, // <5,u,0,2>: Cost 3 vmrglw <4,7,5,0>, <7,0,1,2> + 2302640284U, // <5,u,0,3>: Cost 3 vmrglw <3,4,5,0>, LHS + 2754238189U, // <5,u,0,4>: Cost 3 vsldoi12 <0,4,1,5>, + 2305296114U, // <5,u,0,5>: Cost 3 vmrglw <3,u,5,0>, <2,3,u,5> + 2244907106U, // <5,u,0,6>: Cost 3 vmrghw <5,0,6,1>, <5,6,7,0> + 2302643528U, // <5,u,0,7>: Cost 3 vmrglw <3,4,5,0>, RHS + 1631617693U, // <5,u,0,u>: Cost 2 vsldoi8 <3,4,5,u>, LHS + 2627133542U, // <5,u,1,0>: Cost 3 vsldoi4 <1,5,u,1>, LHS + 1237536282U, // <5,u,1,1>: Cost 2 vmrglw <4,u,5,1>, <4,u,5,1> + 1680496430U, // <5,u,1,2>: Cost 2 vsldoi12 <0,4,1,5>, LHS + 1237532828U, // <5,u,1,3>: Cost 2 vmrglw <4,u,5,1>, LHS + 2693416018U, // <5,u,1,4>: Cost 3 vsldoi8 <1,4,5,u>, <1,4,5,u> + 2756892486U, // <5,u,1,5>: Cost 3 vsldoi12 <0,u,1,5>, + 2694743284U, // <5,u,1,6>: Cost 3 vsldoi8 <1,6,5,u>, <1,6,5,u> + 1237536072U, // <5,u,1,7>: Cost 2 vmrglw <4,u,5,1>, RHS + 1680496484U, // <5,u,1,u>: Cost 2 vsldoi12 <0,4,1,5>, LHS + 2311288709U, // <5,u,2,0>: Cost 3 vmrglw <4,u,5,2>, + 2245883694U, // <5,u,2,1>: Cost 3 vmrghw <5,2,1,3>, LHS + 2699388520U, // <5,u,2,2>: Cost 3 vsldoi8 <2,4,5,u>, <2,2,2,2> + 2754238344U, // <5,u,2,3>: Cost 3 vsldoi12 <0,4,1,5>, + 2699388715U, // <5,u,2,4>: Cost 3 vsldoi8 <2,4,5,u>, <2,4,5,u> + 2757408666U, // <5,u,2,5>: Cost 3 vsldoi12 <0,u,u,5>, + 2705360826U, // <5,u,2,6>: Cost 3 vsldoi8 <3,4,5,u>, <2,6,3,7> + 2302659912U, // <5,u,2,7>: Cost 3 vmrglw <3,4,5,2>, RHS + 2754238389U, // <5,u,2,u>: Cost 3 vsldoi12 <0,4,1,5>, + 2754238396U, // <5,u,3,0>: Cost 3 vsldoi12 <0,4,1,5>, + 3827980229U, // <5,u,3,1>: Cost 4 vsldoi12 <0,4,1,5>, + 2644625102U, // <5,u,3,2>: Cost 3 vsldoi4 <4,5,2,3>, <2,3,4,5> + 2289393820U, // <5,u,3,3>: Cost 3 vmrglw <1,2,5,3>, LHS + 1631619588U, // <5,u,3,4>: Cost 2 vsldoi8 <3,4,5,u>, <3,4,5,u> + 2785056749U, // <5,u,3,5>: Cost 3 vsldoi12 <5,5,5,5>, + 3363138077U, // <5,u,3,6>: Cost 4 vmrglw <1,2,5,3>, <3,4,u,6> + 2289397064U, // <5,u,3,7>: Cost 3 vmrglw <1,2,5,3>, RHS + 1634274120U, // <5,u,3,u>: Cost 2 vsldoi8 <3,u,5,u>, <3,u,5,u> + 1634937753U, // <5,u,4,0>: Cost 2 vsldoi8 <4,0,5,u>, <4,0,5,u> + 1728272410U, // <5,u,4,1>: Cost 2 vsldoi12 , + 2710006843U, // <5,u,4,2>: Cost 3 vsldoi8 <4,2,5,u>, <4,2,5,u> + 2765740076U, // <5,u,4,3>: Cost 3 vsldoi12 <2,3,4,5>, + 1637592285U, // <5,u,4,4>: Cost 2 vsldoi8 <4,4,5,u>, <4,4,5,u> + 1631620406U, // <5,u,4,5>: Cost 2 vsldoi8 <3,4,5,u>, RHS + 2712661375U, // <5,u,4,6>: Cost 3 vsldoi8 <4,6,5,u>, <4,6,5,u> + 2302676296U, // <5,u,4,7>: Cost 3 vmrglw <3,4,5,4>, RHS + 1631620649U, // <5,u,4,u>: Cost 2 vsldoi8 <3,4,5,u>, RHS + 1577091174U, // <5,u,5,0>: Cost 2 vsldoi4 <5,5,5,5>, LHS + 1174443822U, // <5,u,5,1>: Cost 2 vmrghw <5,5,5,5>, LHS + 2766035058U, // <5,u,5,2>: Cost 3 vsldoi12 <2,3,u,5>, + 1237565596U, // <5,u,5,3>: Cost 2 vmrglw <4,u,5,5>, LHS + 1577094454U, // <5,u,5,4>: Cost 2 vsldoi4 <5,5,5,5>, RHS + 296144182U, // <5,u,5,5>: Cost 1 vspltisw1 RHS + 1680496794U, // <5,u,5,6>: Cost 2 vsldoi12 <0,4,1,5>, RHS + 1237568840U, // <5,u,5,7>: Cost 2 vmrglw <4,u,5,5>, RHS + 296144182U, // <5,u,5,u>: Cost 1 vspltisw1 RHS + 2633146470U, // <5,u,6,0>: Cost 3 vsldoi4 <2,5,u,6>, LHS + 1175213870U, // <5,u,6,1>: Cost 2 vmrghw <5,6,7,0>, LHS + 2633148309U, // <5,u,6,2>: Cost 3 vsldoi4 <2,5,u,6>, <2,5,u,6> + 1228947612U, // <5,u,6,3>: Cost 2 vmrglw <3,4,5,6>, LHS + 2633149750U, // <5,u,6,4>: Cost 3 vsldoi4 <2,5,u,6>, RHS + 1175214234U, // <5,u,6,5>: Cost 2 vmrghw <5,6,7,0>, RHS + 1228950018U, // <5,u,6,6>: Cost 2 vmrglw <3,4,5,6>, <3,4,5,6> + 1228950856U, // <5,u,6,7>: Cost 2 vmrglw <3,4,5,6>, RHS + 1228947617U, // <5,u,6,u>: Cost 2 vmrglw <3,4,5,6>, LHS + 497614950U, // <5,u,7,0>: Cost 1 vsldoi4 RHS, LHS + 1571357492U, // <5,u,7,1>: Cost 2 vsldoi4 RHS, <1,1,1,1> + 1571358312U, // <5,u,7,2>: Cost 2 vsldoi4 RHS, <2,2,2,2> + 1571358870U, // <5,u,7,3>: Cost 2 vsldoi4 RHS, <3,0,1,2> + 497618248U, // <5,u,7,4>: Cost 1 vsldoi4 RHS, RHS + 1571360772U, // <5,u,7,5>: Cost 2 vsldoi4 RHS, <5,5,5,5> + 1571361274U, // <5,u,7,6>: Cost 2 vsldoi4 RHS, <6,2,7,3> + 1571361786U, // <5,u,7,7>: Cost 2 vsldoi4 RHS, <7,0,1,2> + 497620782U, // <5,u,7,u>: Cost 1 vsldoi4 RHS, LHS + 497623142U, // <5,u,u,0>: Cost 1 vsldoi4 RHS, LHS + 1631622958U, // <5,u,u,1>: Cost 2 vsldoi8 <3,4,5,u>, LHS + 1680496997U, // <5,u,u,2>: Cost 2 vsldoi12 <0,4,1,5>, LHS + 1228963996U, // <5,u,u,3>: Cost 2 vmrglw <3,4,5,u>, LHS + 497626441U, // <5,u,u,4>: Cost 1 vsldoi4 RHS, RHS + 296144182U, // <5,u,u,5>: Cost 1 vspltisw1 RHS + 1680497037U, // <5,u,u,6>: Cost 2 vsldoi12 <0,4,1,5>, RHS + 1228967240U, // <5,u,u,7>: Cost 2 vmrglw <3,4,5,u>, RHS + 497628974U, // <5,u,u,u>: Cost 1 vsldoi4 RHS, LHS + 2772451328U, // <6,0,0,0>: Cost 3 vsldoi12 <3,4,5,6>, <0,0,0,0> + 2772451338U, // <6,0,0,1>: Cost 3 vsldoi12 <3,4,5,6>, <0,0,1,1> + 3771146417U, // <6,0,0,2>: Cost 4 vsldoi8 <2,1,6,0>, <0,2,1,6> + 3383095739U, // <6,0,0,3>: Cost 4 vmrglw <4,5,6,0>, <6,2,0,3> + 3846193189U, // <6,0,0,4>: Cost 4 vsldoi12 <3,4,5,6>, <0,0,4,1> + 3724832803U, // <6,0,0,5>: Cost 4 vsldoi4 <5,6,0,0>, <5,6,0,0> + 3383095985U, // <6,0,0,6>: Cost 4 vmrglw <4,5,6,0>, <6,5,0,6> + 3383096067U, // <6,0,0,7>: Cost 4 vmrglw <4,5,6,0>, <6,6,0,7> + 2772451401U, // <6,0,0,u>: Cost 3 vsldoi12 <3,4,5,6>, <0,0,u,1> + 2651095142U, // <6,0,1,0>: Cost 3 vsldoi4 <5,6,0,1>, LHS + 2251612262U, // <6,0,1,1>: Cost 3 vmrghw <6,1,7,1>, LHS + 1698709606U, // <6,0,1,2>: Cost 2 vsldoi12 <3,4,5,6>, LHS + 2651097602U, // <6,0,1,3>: Cost 3 vsldoi4 <5,6,0,1>, <3,4,5,6> + 2651098422U, // <6,0,1,4>: Cost 3 vsldoi4 <5,6,0,1>, RHS + 2651099172U, // <6,0,1,5>: Cost 3 vsldoi4 <5,6,0,1>, <5,6,0,1> + 2657071869U, // <6,0,1,6>: Cost 3 vsldoi4 <6,6,0,1>, <6,6,0,1> + 3724841978U, // <6,0,1,7>: Cost 4 vsldoi4 <5,6,0,1>, <7,0,1,2> + 1698709660U, // <6,0,1,u>: Cost 2 vsldoi12 <3,4,5,6>, LHS + 2252292096U, // <6,0,2,0>: Cost 3 vmrghw <6,2,7,3>, <0,0,0,0> + 1178550374U, // <6,0,2,1>: Cost 2 vmrghw <6,2,7,3>, LHS + 3826655418U, // <6,0,2,2>: Cost 4 vsldoi12 <0,2,1,6>, <0,2,2,6> + 3777783485U, // <6,0,2,3>: Cost 4 vsldoi8 <3,2,6,0>, <2,3,2,6> + 2252292434U, // <6,0,2,4>: Cost 3 vmrghw <6,2,7,3>, <0,4,1,5> + 3785746280U, // <6,0,2,5>: Cost 4 vsldoi8 <4,5,6,0>, <2,5,3,6> + 2252292593U, // <6,0,2,6>: Cost 3 vmrghw <6,2,7,3>, <0,6,1,2> + 3736794583U, // <6,0,2,7>: Cost 4 vsldoi4 <7,6,0,2>, <7,6,0,2> + 1178550941U, // <6,0,2,u>: Cost 2 vmrghw <6,2,7,3>, LHS + 3375153152U, // <6,0,3,0>: Cost 4 vmrglw <3,2,6,3>, <0,0,0,0> + 2772451584U, // <6,0,3,1>: Cost 3 vsldoi12 <3,4,5,6>, <0,3,1,4> + 3777784163U, // <6,0,3,2>: Cost 4 vsldoi8 <3,2,6,0>, <3,2,6,0> + 3846193426U, // <6,0,3,3>: Cost 4 vsldoi12 <3,4,5,6>, <0,3,3,4> + 2712005122U, // <6,0,3,4>: Cost 3 vsldoi8 <4,5,6,0>, <3,4,5,6> + 3724857382U, // <6,0,3,5>: Cost 4 vsldoi4 <5,6,0,3>, <5,6,0,3> + 3802335864U, // <6,0,3,6>: Cost 4 vsldoi8 <7,3,6,0>, <3,6,0,7> + 3801672410U, // <6,0,3,7>: Cost 4 vsldoi8 <7,2,6,0>, <3,7,2,6> + 2772451647U, // <6,0,3,u>: Cost 3 vsldoi12 <3,4,5,6>, <0,3,u,4> + 3383123968U, // <6,0,4,0>: Cost 4 vmrglw <4,5,6,4>, <0,0,0,0> + 2772451666U, // <6,0,4,1>: Cost 3 vsldoi12 <3,4,5,6>, <0,4,1,5> + 3773803577U, // <6,0,4,2>: Cost 4 vsldoi8 <2,5,6,0>, <4,2,5,6> + 3724864002U, // <6,0,4,3>: Cost 4 vsldoi4 <5,6,0,4>, <3,4,5,6> + 3846193517U, // <6,0,4,4>: Cost 4 vsldoi12 <3,4,5,6>, <0,4,4,5> + 2712005935U, // <6,0,4,5>: Cost 3 vsldoi8 <4,5,6,0>, <4,5,6,0> + 3327009265U, // <6,0,4,6>: Cost 4 vmrghw <6,4,2,5>, <0,6,1,2> + 3383126648U, // <6,0,4,7>: Cost 5 vmrglw <4,5,6,4>, <3,6,0,7> + 2772451729U, // <6,0,4,u>: Cost 3 vsldoi12 <3,4,5,6>, <0,4,u,5> + 3373178880U, // <6,0,5,0>: Cost 4 vmrglw <2,u,6,5>, <0,0,0,0> + 2254266470U, // <6,0,5,1>: Cost 3 vmrghw <6,5,7,1>, LHS + 3785748248U, // <6,0,5,2>: Cost 4 vsldoi8 <4,5,6,0>, <5,2,6,3> + 3790393190U, // <6,0,5,3>: Cost 4 vsldoi8 <5,3,6,0>, <5,3,6,0> + 3328000338U, // <6,0,5,4>: Cost 4 vmrghw <6,5,7,0>, <0,4,1,5> + 3785748494U, // <6,0,5,5>: Cost 4 vsldoi8 <4,5,6,0>, <5,5,6,6> + 3785748516U, // <6,0,5,6>: Cost 4 vsldoi8 <4,5,6,0>, <5,6,0,1> + 3379153528U, // <6,0,5,7>: Cost 4 vmrglw <3,u,6,5>, <3,6,0,7> + 2254267037U, // <6,0,5,u>: Cost 3 vmrghw <6,5,7,1>, LHS + 2254897152U, // <6,0,6,0>: Cost 3 vmrghw <6,6,6,6>, <0,0,0,0> + 1181155430U, // <6,0,6,1>: Cost 2 vmrghw <6,6,6,6>, LHS + 3785748923U, // <6,0,6,2>: Cost 4 vsldoi8 <4,5,6,0>, <6,2,0,3> + 3785749042U, // <6,0,6,3>: Cost 4 vsldoi8 <4,5,6,0>, <6,3,4,5> + 2254897490U, // <6,0,6,4>: Cost 3 vmrghw <6,6,6,6>, <0,4,1,5> + 3785749169U, // <6,0,6,5>: Cost 4 vsldoi8 <4,5,6,0>, <6,5,0,6> + 2724614962U, // <6,0,6,6>: Cost 3 vsldoi8 <6,6,6,0>, <6,6,6,0> + 3787739982U, // <6,0,6,7>: Cost 4 vsldoi8 <4,u,6,0>, <6,7,0,1> + 1181155997U, // <6,0,6,u>: Cost 2 vmrghw <6,6,6,6>, LHS + 1235664896U, // <6,0,7,0>: Cost 2 vmrglw RHS, <0,0,0,0> + 1235666598U, // <6,0,7,1>: Cost 2 vmrglw RHS, <2,3,0,1> + 3712943720U, // <6,0,7,2>: Cost 4 vsldoi4 <3,6,0,7>, <2,2,2,2> + 2639202936U, // <6,0,7,3>: Cost 3 vsldoi4 <3,6,0,7>, <3,6,0,7> + 2639203638U, // <6,0,7,4>: Cost 3 vsldoi4 <3,6,0,7>, RHS + 2309409236U, // <6,0,7,5>: Cost 3 vmrglw RHS, <3,4,0,5> + 3712946517U, // <6,0,7,6>: Cost 4 vsldoi4 <3,6,0,7>, <6,0,7,0> + 2309409400U, // <6,0,7,7>: Cost 3 vmrglw RHS, <3,6,0,7> + 1235666605U, // <6,0,7,u>: Cost 2 vmrglw RHS, <2,3,0,u> + 1235673088U, // <6,0,u,0>: Cost 2 vmrglw RHS, <0,0,0,0> + 1235674790U, // <6,0,u,1>: Cost 2 vmrglw RHS, <2,3,0,1> + 1698710173U, // <6,0,u,2>: Cost 2 vsldoi12 <3,4,5,6>, LHS + 2639211129U, // <6,0,u,3>: Cost 3 vsldoi4 <3,6,0,u>, <3,6,0,u> + 2639211830U, // <6,0,u,4>: Cost 3 vsldoi4 <3,6,0,u>, RHS + 2712008858U, // <6,0,u,5>: Cost 3 vsldoi8 <4,5,6,0>, RHS + 2657129220U, // <6,0,u,6>: Cost 3 vsldoi4 <6,6,0,u>, <6,6,0,u> + 2309417592U, // <6,0,u,7>: Cost 3 vmrglw RHS, <3,6,0,7> + 1698710227U, // <6,0,u,u>: Cost 2 vsldoi12 <3,4,5,6>, LHS + 3775799296U, // <6,1,0,0>: Cost 4 vsldoi8 <2,u,6,1>, <0,0,0,0> + 2702057574U, // <6,1,0,1>: Cost 3 vsldoi8 <2,u,6,1>, LHS + 3373143763U, // <6,1,0,2>: Cost 4 vmrglw <2,u,6,0>, + 3695045122U, // <6,1,0,3>: Cost 4 vsldoi4 <0,6,1,0>, <3,4,5,6> + 3775799634U, // <6,1,0,4>: Cost 4 vsldoi8 <2,u,6,1>, <0,4,1,5> + 3383091538U, // <6,1,0,5>: Cost 4 vmrglw <4,5,6,0>, <0,4,1,5> + 3368493233U, // <6,1,0,6>: Cost 4 vmrglw <2,1,6,0>, <0,2,1,6> + 3362522319U, // <6,1,0,7>: Cost 5 vmrglw <1,1,6,0>, <1,6,1,7> + 2702058141U, // <6,1,0,u>: Cost 3 vsldoi8 <2,u,6,1>, LHS + 3834250027U, // <6,1,1,0>: Cost 4 vsldoi12 <1,4,5,6>, <1,1,0,1> + 2772452148U, // <6,1,1,1>: Cost 3 vsldoi12 <3,4,5,6>, <1,1,1,1> + 3832038210U, // <6,1,1,2>: Cost 4 vsldoi12 <1,1,2,6>, <1,1,2,6> + 3373150660U, // <6,1,1,3>: Cost 4 vmrglw <2,u,6,1>, <6,2,1,3> + 3834250067U, // <6,1,1,4>: Cost 4 vsldoi12 <1,4,5,6>, <1,1,4,5> + 3373146450U, // <6,1,1,5>: Cost 4 vmrglw <2,u,6,1>, <0,4,1,5> + 3826656102U, // <6,1,1,6>: Cost 4 vsldoi12 <0,2,1,6>, <1,1,6,6> + 3362530511U, // <6,1,1,7>: Cost 4 vmrglw <1,1,6,1>, <1,6,1,7> + 2772452148U, // <6,1,1,u>: Cost 3 vsldoi12 <3,4,5,6>, <1,1,1,1> + 2669092966U, // <6,1,2,0>: Cost 3 vsldoi4 , LHS + 2252292916U, // <6,1,2,1>: Cost 3 vmrghw <6,2,7,3>, <1,1,1,1> + 2252293014U, // <6,1,2,2>: Cost 3 vmrghw <6,2,7,3>, <1,2,3,0> + 2772452246U, // <6,1,2,3>: Cost 3 vsldoi12 <3,4,5,6>, <1,2,3,0> + 2669096246U, // <6,1,2,4>: Cost 3 vsldoi4 , RHS + 3846194091U, // <6,1,2,5>: Cost 4 vsldoi12 <3,4,5,6>, <1,2,5,3> + 2702059450U, // <6,1,2,6>: Cost 3 vsldoi8 <2,u,6,1>, <2,6,3,7> + 3870081978U, // <6,1,2,7>: Cost 4 vsldoi12 <7,4,5,6>, <1,2,7,0> + 2702059633U, // <6,1,2,u>: Cost 3 vsldoi8 <2,u,6,1>, <2,u,6,1> + 3775801494U, // <6,1,3,0>: Cost 4 vsldoi8 <2,u,6,1>, <3,0,1,2> + 3777128723U, // <6,1,3,1>: Cost 4 vsldoi8 <3,1,6,1>, <3,1,6,1> + 3775801702U, // <6,1,3,2>: Cost 4 vsldoi8 <2,u,6,1>, <3,2,6,3> + 3775801756U, // <6,1,3,3>: Cost 4 vsldoi8 <2,u,6,1>, <3,3,3,3> + 3775801858U, // <6,1,3,4>: Cost 4 vsldoi8 <2,u,6,1>, <3,4,5,6> + 3375153490U, // <6,1,3,5>: Cost 4 vmrglw <3,2,6,3>, <0,4,1,5> + 3826656265U, // <6,1,3,6>: Cost 4 vsldoi12 <0,2,1,6>, <1,3,6,7> + 3775802051U, // <6,1,3,7>: Cost 4 vsldoi8 <2,u,6,1>, <3,7,0,1> + 3775802142U, // <6,1,3,u>: Cost 4 vsldoi8 <2,u,6,1>, <3,u,1,2> + 3846194206U, // <6,1,4,0>: Cost 4 vsldoi12 <3,4,5,6>, <1,4,0,1> + 3846194219U, // <6,1,4,1>: Cost 4 vsldoi12 <3,4,5,6>, <1,4,1,5> + 3846194228U, // <6,1,4,2>: Cost 4 vsldoi12 <3,4,5,6>, <1,4,2,5> + 3846194236U, // <6,1,4,3>: Cost 4 vsldoi12 <3,4,5,6>, <1,4,3,4> + 3846194246U, // <6,1,4,4>: Cost 4 vsldoi12 <3,4,5,6>, <1,4,4,5> + 2760508496U, // <6,1,4,5>: Cost 3 vsldoi12 <1,4,5,6>, <1,4,5,6> + 3368526001U, // <6,1,4,6>: Cost 4 vmrglw <2,1,6,4>, <0,2,1,6> + 3870082144U, // <6,1,4,7>: Cost 4 vsldoi12 <7,4,5,6>, <1,4,7,4> + 2760729707U, // <6,1,4,u>: Cost 3 vsldoi12 <1,4,u,6>, <1,4,u,6> + 2714668660U, // <6,1,5,0>: Cost 3 vsldoi8 <5,0,6,1>, <5,0,6,1> + 3834619005U, // <6,1,5,1>: Cost 4 vsldoi12 <1,5,1,6>, <1,5,1,6> + 3834692742U, // <6,1,5,2>: Cost 4 vsldoi12 <1,5,2,6>, <1,5,2,6> + 3846194317U, // <6,1,5,3>: Cost 4 vsldoi12 <3,4,5,6>, <1,5,3,4> + 3834840216U, // <6,1,5,4>: Cost 4 vsldoi12 <1,5,4,6>, <1,5,4,6> + 3834913953U, // <6,1,5,5>: Cost 4 vsldoi12 <1,5,5,6>, <1,5,5,6> + 2719977570U, // <6,1,5,6>: Cost 3 vsldoi8 <5,u,6,1>, <5,6,7,0> + 3367208143U, // <6,1,5,7>: Cost 4 vmrglw <1,u,6,5>, <1,6,1,7> + 2719977724U, // <6,1,5,u>: Cost 3 vsldoi8 <5,u,6,1>, <5,u,6,1> + 2669125734U, // <6,1,6,0>: Cost 3 vsldoi4 , LHS + 2254897972U, // <6,1,6,1>: Cost 3 vmrghw <6,6,6,6>, <1,1,1,1> + 2254898070U, // <6,1,6,2>: Cost 3 vmrghw <6,6,6,6>, <1,2,3,0> + 3775803929U, // <6,1,6,3>: Cost 4 vsldoi8 <2,u,6,1>, <6,3,1,7> + 2669129014U, // <6,1,6,4>: Cost 3 vsldoi4 , RHS + 2322006354U, // <6,1,6,5>: Cost 3 vmrglw <6,6,6,6>, <0,4,1,5> + 2725950264U, // <6,1,6,6>: Cost 3 vsldoi8 <6,u,6,1>, <6,6,6,6> + 3793720142U, // <6,1,6,7>: Cost 4 vsldoi8 <5,u,6,1>, <6,7,0,1> + 2254898556U, // <6,1,6,u>: Cost 3 vmrghw <6,6,6,6>, <1,u,3,0> + 2627330150U, // <6,1,7,0>: Cost 3 vsldoi4 <1,6,1,7>, LHS + 1235664906U, // <6,1,7,1>: Cost 2 vmrglw RHS, <0,0,1,1> + 1235667094U, // <6,1,7,2>: Cost 2 vmrglw RHS, <3,0,1,2> + 2309406894U, // <6,1,7,3>: Cost 3 vmrglw RHS, <0,2,1,3> + 2627333430U, // <6,1,7,4>: Cost 3 vsldoi4 <1,6,1,7>, RHS + 1235665234U, // <6,1,7,5>: Cost 2 vmrglw RHS, <0,4,1,5> + 2309406897U, // <6,1,7,6>: Cost 3 vmrglw RHS, <0,2,1,6> + 2309407222U, // <6,1,7,7>: Cost 3 vmrglw RHS, <0,6,1,7> + 1235664913U, // <6,1,7,u>: Cost 2 vmrglw RHS, <0,0,1,u> + 2627338342U, // <6,1,u,0>: Cost 3 vsldoi4 <1,6,1,u>, LHS + 1235673098U, // <6,1,u,1>: Cost 2 vmrglw RHS, <0,0,1,1> + 1235675286U, // <6,1,u,2>: Cost 2 vmrglw RHS, <3,0,1,2> + 2772452732U, // <6,1,u,3>: Cost 3 vsldoi12 <3,4,5,6>, <1,u,3,0> + 2627341622U, // <6,1,u,4>: Cost 3 vsldoi4 <1,6,1,u>, RHS + 1235673426U, // <6,1,u,5>: Cost 2 vmrglw RHS, <0,4,1,5> + 2309415089U, // <6,1,u,6>: Cost 3 vmrglw RHS, <0,2,1,6> + 2309415414U, // <6,1,u,7>: Cost 3 vmrglw RHS, <0,6,1,7> + 1235673105U, // <6,1,u,u>: Cost 2 vmrglw RHS, <0,0,1,u> + 3324683725U, // <6,2,0,0>: Cost 4 vmrghw <6,0,7,0>, <2,0,3,0> + 2725290086U, // <6,2,0,1>: Cost 3 vsldoi8 <6,7,6,2>, LHS + 3771162801U, // <6,2,0,2>: Cost 4 vsldoi8 <2,1,6,2>, <0,2,1,6> + 2309349478U, // <6,2,0,3>: Cost 3 vmrglw <4,5,6,0>, LHS + 3730951478U, // <6,2,0,4>: Cost 4 vsldoi4 <6,6,2,0>, RHS + 3840738784U, // <6,2,0,5>: Cost 4 vsldoi12 <2,5,3,6>, <2,0,5,1> + 3842655721U, // <6,2,0,6>: Cost 4 vsldoi12 <2,u,2,6>, <2,0,6,1> + 3736925671U, // <6,2,0,7>: Cost 4 vsldoi4 <7,6,2,0>, <7,6,2,0> + 2309349483U, // <6,2,0,u>: Cost 3 vmrglw <4,5,6,0>, LHS + 3367840468U, // <6,2,1,0>: Cost 4 vmrglw <2,0,6,1>, <3,7,2,0> + 3325355551U, // <6,2,1,1>: Cost 4 vmrghw <6,1,7,1>, <2,1,3,1> + 3373147752U, // <6,2,1,2>: Cost 4 vmrglw <2,u,6,1>, <2,2,2,2> + 2299404390U, // <6,2,1,3>: Cost 3 vmrglw <2,u,6,1>, LHS + 3701099830U, // <6,2,1,4>: Cost 5 vsldoi4 <1,6,2,1>, RHS + 3767846054U, // <6,2,1,5>: Cost 4 vsldoi8 <1,5,6,2>, <1,5,6,2> + 3826656825U, // <6,2,1,6>: Cost 4 vsldoi12 <0,2,1,6>, <2,1,6,0> + 3373147838U, // <6,2,1,7>: Cost 5 vmrglw <2,u,6,1>, <2,3,2,7> + 2299404395U, // <6,2,1,u>: Cost 3 vmrglw <2,u,6,1>, LHS + 2657222758U, // <6,2,2,0>: Cost 3 vsldoi4 <6,6,2,2>, LHS + 3771164219U, // <6,2,2,1>: Cost 4 vsldoi8 <2,1,6,2>, <2,1,6,2> + 2766481000U, // <6,2,2,2>: Cost 3 vsldoi12 <2,4,5,6>, <2,2,2,2> + 2772452978U, // <6,2,2,3>: Cost 3 vsldoi12 <3,4,5,6>, <2,2,3,3> + 2657226038U, // <6,2,2,4>: Cost 3 vsldoi4 <6,6,2,2>, RHS + 3790407528U, // <6,2,2,5>: Cost 4 vsldoi8 <5,3,6,2>, <2,5,3,6> + 2252294074U, // <6,2,2,6>: Cost 3 vmrghw <6,2,7,3>, <2,6,3,7> + 2252294148U, // <6,2,2,7>: Cost 3 vmrghw <6,2,7,3>, <2,7,3,0> + 2772453023U, // <6,2,2,u>: Cost 3 vsldoi12 <3,4,5,6>, <2,2,u,3> + 2772453030U, // <6,2,3,0>: Cost 3 vsldoi12 <3,4,5,6>, <2,3,0,1> + 3834250930U, // <6,2,3,1>: Cost 4 vsldoi12 <1,4,5,6>, <2,3,1,4> + 2765596349U, // <6,2,3,2>: Cost 3 vsldoi12 <2,3,2,6>, <2,3,2,6> + 2301411430U, // <6,2,3,3>: Cost 3 vmrglw <3,2,6,3>, LHS + 2772453070U, // <6,2,3,4>: Cost 3 vsldoi12 <3,4,5,6>, <2,3,4,5> + 2765817560U, // <6,2,3,5>: Cost 3 vsldoi12 <2,3,5,6>, <2,3,5,6> + 2252933050U, // <6,2,3,6>: Cost 3 vmrghw <6,3,7,0>, <2,6,3,7> + 2796340968U, // <6,2,3,7>: Cost 3 vsldoi12 <7,4,5,6>, <2,3,7,4> + 2766038771U, // <6,2,3,u>: Cost 3 vsldoi12 <2,3,u,6>, <2,3,u,6> + 3725008998U, // <6,2,4,0>: Cost 4 vsldoi4 <5,6,2,4>, LHS + 3368530217U, // <6,2,4,1>: Cost 5 vmrglw <2,1,6,4>, <6,0,2,1> + 3840222989U, // <6,2,4,2>: Cost 4 vsldoi12 <2,4,5,6>, <2,4,2,5> + 2309382246U, // <6,2,4,3>: Cost 3 vmrglw <4,5,6,4>, LHS + 3725012278U, // <6,2,4,4>: Cost 4 vsldoi4 <5,6,2,4>, RHS + 2766481193U, // <6,2,4,5>: Cost 3 vsldoi12 <2,4,5,6>, <2,4,5,6> + 3842656049U, // <6,2,4,6>: Cost 4 vsldoi12 <2,u,2,6>, <2,4,6,5> + 3327010820U, // <6,2,4,7>: Cost 4 vmrghw <6,4,2,5>, <2,7,3,0> + 2766702404U, // <6,2,4,u>: Cost 3 vsldoi12 <2,4,u,6>, <2,4,u,6> + 3713073254U, // <6,2,5,0>: Cost 4 vsldoi4 <3,6,2,5>, LHS + 3789082310U, // <6,2,5,1>: Cost 4 vsldoi8 <5,1,6,2>, <5,1,6,2> + 3840665439U, // <6,2,5,2>: Cost 4 vsldoi12 <2,5,2,6>, <2,5,2,6> + 2766997352U, // <6,2,5,3>: Cost 3 vsldoi12 <2,5,3,6>, <2,5,3,6> + 3713076534U, // <6,2,5,4>: Cost 4 vsldoi4 <3,6,2,5>, RHS + 3791736842U, // <6,2,5,5>: Cost 4 vsldoi8 <5,5,6,2>, <5,5,6,2> + 3373180605U, // <6,2,5,6>: Cost 4 vmrglw <2,u,6,5>, <2,3,2,6> + 3793064108U, // <6,2,5,7>: Cost 4 vsldoi8 <5,7,6,2>, <5,7,6,2> + 2767366037U, // <6,2,5,u>: Cost 3 vsldoi12 <2,5,u,6>, <2,5,u,6> + 3701137510U, // <6,2,6,0>: Cost 4 vsldoi4 <1,6,2,6>, LHS + 3701138647U, // <6,2,6,1>: Cost 4 vsldoi4 <1,6,2,6>, <1,6,2,6> + 2254898792U, // <6,2,6,2>: Cost 3 vmrghw <6,6,6,6>, <2,2,2,2> + 1248264294U, // <6,2,6,3>: Cost 2 vmrglw <6,6,6,6>, LHS + 3701140790U, // <6,2,6,4>: Cost 4 vsldoi4 <1,6,2,6>, RHS + 3725029435U, // <6,2,6,5>: Cost 4 vsldoi4 <5,6,2,6>, <5,6,2,6> + 2254899130U, // <6,2,6,6>: Cost 3 vmrghw <6,6,6,6>, <2,6,3,7> + 2725294981U, // <6,2,6,7>: Cost 3 vsldoi8 <6,7,6,2>, <6,7,6,2> + 1248264299U, // <6,2,6,u>: Cost 2 vmrglw <6,6,6,6>, LHS + 2633375846U, // <6,2,7,0>: Cost 3 vsldoi4 <2,6,2,7>, LHS + 2309407468U, // <6,2,7,1>: Cost 3 vmrglw RHS, <1,0,2,1> + 1235666536U, // <6,2,7,2>: Cost 2 vmrglw RHS, <2,2,2,2> + 161923174U, // <6,2,7,3>: Cost 1 vmrglw RHS, LHS + 2633379126U, // <6,2,7,4>: Cost 3 vsldoi4 <2,6,2,7>, RHS + 2309407796U, // <6,2,7,5>: Cost 3 vmrglw RHS, <1,4,2,5> + 2309408445U, // <6,2,7,6>: Cost 3 vmrglw RHS, <2,3,2,6> + 2309407960U, // <6,2,7,7>: Cost 3 vmrglw RHS, <1,6,2,7> + 161923179U, // <6,2,7,u>: Cost 1 vmrglw RHS, LHS + 2633384038U, // <6,2,u,0>: Cost 3 vsldoi4 <2,6,2,u>, LHS + 2309415660U, // <6,2,u,1>: Cost 3 vmrglw RHS, <1,0,2,1> + 1235674728U, // <6,2,u,2>: Cost 2 vmrglw RHS, <2,2,2,2> + 161931366U, // <6,2,u,3>: Cost 1 vmrglw RHS, LHS + 2633387318U, // <6,2,u,4>: Cost 3 vsldoi4 <2,6,2,u>, RHS + 2769135725U, // <6,2,u,5>: Cost 3 vsldoi12 <2,u,5,6>, <2,u,5,6> + 2309416637U, // <6,2,u,6>: Cost 3 vmrglw RHS, <2,3,2,6> + 2309416152U, // <6,2,u,7>: Cost 3 vmrglw RHS, <1,6,2,7> + 161931371U, // <6,2,u,u>: Cost 1 vmrglw RHS, LHS + 3777806336U, // <6,3,0,0>: Cost 4 vsldoi8 <3,2,6,3>, <0,0,0,0> + 2704064614U, // <6,3,0,1>: Cost 3 vsldoi8 <3,2,6,3>, LHS + 3765862577U, // <6,3,0,2>: Cost 4 vsldoi8 <1,2,6,3>, <0,2,1,6> + 3843393708U, // <6,3,0,3>: Cost 4 vsldoi12 <3,0,3,6>, <3,0,3,6> + 2250516994U, // <6,3,0,4>: Cost 3 vmrghw <6,0,1,2>, <3,4,5,6> + 3725054014U, // <6,3,0,5>: Cost 4 vsldoi4 <5,6,3,0>, <5,6,3,0> + 3383093096U, // <6,3,0,6>: Cost 4 vmrglw <4,5,6,0>, <2,5,3,6> + 3368495034U, // <6,3,0,7>: Cost 4 vmrglw <2,1,6,0>, <2,6,3,7> + 2704065181U, // <6,3,0,u>: Cost 3 vsldoi8 <3,2,6,3>, LHS + 2251622550U, // <6,3,1,0>: Cost 3 vmrghw <6,1,7,2>, <3,0,1,2> + 3777807156U, // <6,3,1,1>: Cost 4 vsldoi8 <3,2,6,3>, <1,1,1,1> + 3765863348U, // <6,3,1,2>: Cost 4 vsldoi8 <1,2,6,3>, <1,2,6,3> + 3373147762U, // <6,3,1,3>: Cost 4 vmrglw <2,u,6,1>, <2,2,3,3> + 3834251525U, // <6,3,1,4>: Cost 4 vsldoi12 <1,4,5,6>, <3,1,4,5> + 3373147683U, // <6,3,1,5>: Cost 5 vmrglw <2,u,6,1>, <2,1,3,5> + 3391727545U, // <6,3,1,6>: Cost 4 vmrglw <6,0,6,1>, <2,6,3,6> + 2299406266U, // <6,3,1,7>: Cost 3 vmrglw <2,u,6,1>, <2,6,3,7> + 2251622550U, // <6,3,1,u>: Cost 3 vmrghw <6,1,7,2>, <3,0,1,2> + 2252294294U, // <6,3,2,0>: Cost 3 vmrghw <6,2,7,3>, <3,0,1,2> + 3326036198U, // <6,3,2,1>: Cost 4 vmrghw <6,2,7,3>, <3,1,1,1> + 3771836045U, // <6,3,2,2>: Cost 4 vsldoi8 <2,2,6,3>, <2,2,6,3> + 2252294556U, // <6,3,2,3>: Cost 3 vmrghw <6,2,7,3>, <3,3,3,3> + 2252294658U, // <6,3,2,4>: Cost 3 vmrghw <6,2,7,3>, <3,4,5,6> + 3840739677U, // <6,3,2,5>: Cost 4 vsldoi12 <2,5,3,6>, <3,2,5,3> + 2704066490U, // <6,3,2,6>: Cost 3 vsldoi8 <3,2,6,3>, <2,6,3,7> + 3368511418U, // <6,3,2,7>: Cost 4 vmrglw <2,1,6,2>, <2,6,3,7> + 2252294942U, // <6,3,2,u>: Cost 3 vmrghw <6,2,7,3>, <3,u,1,2> + 3707158630U, // <6,3,3,0>: Cost 4 vsldoi4 <2,6,3,3>, LHS + 3765864692U, // <6,3,3,1>: Cost 5 vsldoi8 <1,2,6,3>, <3,1,2,6> + 2704066918U, // <6,3,3,2>: Cost 3 vsldoi8 <3,2,6,3>, <3,2,6,3> + 2772453788U, // <6,3,3,3>: Cost 3 vsldoi12 <3,4,5,6>, <3,3,3,3> + 2772453799U, // <6,3,3,4>: Cost 3 vsldoi12 <3,4,5,6>, <3,3,4,5> + 3789752888U, // <6,3,3,5>: Cost 4 vsldoi8 <5,2,6,3>, <3,5,2,6> + 3840739770U, // <6,3,3,6>: Cost 4 vsldoi12 <2,5,3,6>, <3,3,6,6> + 2301413306U, // <6,3,3,7>: Cost 3 vmrglw <3,2,6,3>, <2,6,3,7> + 2775108043U, // <6,3,3,u>: Cost 3 vsldoi12 <3,u,5,6>, <3,3,u,5> + 2651340902U, // <6,3,4,0>: Cost 3 vsldoi4 <5,6,3,4>, LHS + 3846195674U, // <6,3,4,1>: Cost 4 vsldoi12 <3,4,5,6>, <3,4,1,2> + 3845974503U, // <6,3,4,2>: Cost 4 vsldoi12 <3,4,2,6>, <3,4,2,6> + 2651343362U, // <6,3,4,3>: Cost 3 vsldoi4 <5,6,3,4>, <3,4,5,6> + 2651344182U, // <6,3,4,4>: Cost 3 vsldoi4 <5,6,3,4>, RHS + 1698712066U, // <6,3,4,5>: Cost 2 vsldoi12 <3,4,5,6>, <3,4,5,6> + 3383125864U, // <6,3,4,6>: Cost 4 vmrglw <4,5,6,4>, <2,5,3,6> + 3368527802U, // <6,3,4,7>: Cost 4 vmrglw <2,1,6,4>, <2,6,3,7> + 1698933277U, // <6,3,4,u>: Cost 2 vsldoi12 <3,4,u,6>, <3,4,u,6> + 3373179798U, // <6,3,5,0>: Cost 4 vmrglw <2,u,6,5>, <1,2,3,0> + 3707176179U, // <6,3,5,1>: Cost 5 vsldoi4 <2,6,3,5>, <1,6,5,7> + 2716012312U, // <6,3,5,2>: Cost 3 vsldoi8 <5,2,6,3>, <5,2,6,3> + 3373180530U, // <6,3,5,3>: Cost 4 vmrglw <2,u,6,5>, <2,2,3,3> + 2254309890U, // <6,3,5,4>: Cost 3 vmrghw <6,5,7,6>, <3,4,5,6> + 3785773070U, // <6,3,5,5>: Cost 4 vsldoi8 <4,5,6,3>, <5,5,6,6> + 3840739932U, // <6,3,5,6>: Cost 4 vsldoi12 <2,5,3,6>, <3,5,6,6> + 2299439034U, // <6,3,5,7>: Cost 3 vmrglw <2,u,6,5>, <2,6,3,7> + 2719994110U, // <6,3,5,u>: Cost 3 vsldoi8 <5,u,6,3>, <5,u,6,3> + 2254899350U, // <6,3,6,0>: Cost 3 vmrghw <6,6,6,6>, <3,0,1,2> + 3328641254U, // <6,3,6,1>: Cost 4 vmrghw <6,6,6,6>, <3,1,1,1> + 2633443257U, // <6,3,6,2>: Cost 3 vsldoi4 <2,6,3,6>, <2,6,3,6> + 2254899612U, // <6,3,6,3>: Cost 3 vmrghw <6,6,6,6>, <3,3,3,3> + 2254899714U, // <6,3,6,4>: Cost 3 vmrghw <6,6,6,6>, <3,4,5,6> + 3785773772U, // <6,3,6,5>: Cost 4 vsldoi8 <4,5,6,3>, <6,5,3,6> + 2725966648U, // <6,3,6,6>: Cost 3 vsldoi8 <6,u,6,3>, <6,6,6,6> + 2322007994U, // <6,3,6,7>: Cost 3 vmrglw <6,6,6,6>, <2,6,3,7> + 2254899998U, // <6,3,6,u>: Cost 3 vmrghw <6,6,6,6>, <3,u,1,2> + 1559707750U, // <6,3,7,0>: Cost 2 vsldoi4 <2,6,3,7>, LHS + 2633450292U, // <6,3,7,1>: Cost 3 vsldoi4 <2,6,3,7>, <1,1,1,1> + 1559709626U, // <6,3,7,2>: Cost 2 vsldoi4 <2,6,3,7>, <2,6,3,7> + 1235666546U, // <6,3,7,3>: Cost 2 vmrglw RHS, <2,2,3,3> + 1559711030U, // <6,3,7,4>: Cost 2 vsldoi4 <2,6,3,7>, RHS + 2309408291U, // <6,3,7,5>: Cost 3 vmrglw RHS, <2,1,3,5> + 2633454152U, // <6,3,7,6>: Cost 3 vsldoi4 <2,6,3,7>, <6,3,7,0> + 1235666874U, // <6,3,7,7>: Cost 2 vmrglw RHS, <2,6,3,7> + 1559713582U, // <6,3,7,u>: Cost 2 vsldoi4 <2,6,3,7>, LHS + 1559715942U, // <6,3,u,0>: Cost 2 vsldoi4 <2,6,3,u>, LHS + 2633458484U, // <6,3,u,1>: Cost 3 vsldoi4 <2,6,3,u>, <1,1,1,1> + 1559717819U, // <6,3,u,2>: Cost 2 vsldoi4 <2,6,3,u>, <2,6,3,u> + 1235674738U, // <6,3,u,3>: Cost 2 vmrglw RHS, <2,2,3,3> + 1559719222U, // <6,3,u,4>: Cost 2 vsldoi4 <2,6,3,u>, RHS + 1701366598U, // <6,3,u,5>: Cost 2 vsldoi12 <3,u,5,6>, <3,u,5,6> + 2633462353U, // <6,3,u,6>: Cost 3 vsldoi4 <2,6,3,u>, <6,3,u,0> + 1235675066U, // <6,3,u,7>: Cost 2 vmrglw RHS, <2,6,3,7> + 1559721774U, // <6,3,u,u>: Cost 2 vsldoi4 <2,6,3,u>, LHS + 3785777152U, // <6,4,0,0>: Cost 4 vsldoi8 <4,5,6,4>, <0,0,0,0> + 2712035430U, // <6,4,0,1>: Cost 3 vsldoi8 <4,5,6,4>, LHS + 3771179185U, // <6,4,0,2>: Cost 4 vsldoi8 <2,1,6,4>, <0,2,1,6> + 3846196096U, // <6,4,0,3>: Cost 4 vsldoi12 <3,4,5,6>, <4,0,3,1> + 3785777490U, // <6,4,0,4>: Cost 4 vsldoi8 <4,5,6,4>, <0,4,1,5> + 2250517814U, // <6,4,0,5>: Cost 3 vmrghw <6,0,1,2>, RHS + 3324259703U, // <6,4,0,6>: Cost 4 vmrghw <6,0,1,2>, <4,6,5,0> + 3383092458U, // <6,4,0,7>: Cost 5 vmrglw <4,5,6,0>, <1,6,4,7> + 2712035997U, // <6,4,0,u>: Cost 3 vsldoi8 <4,5,6,4>, LHS + 3325356946U, // <6,4,1,0>: Cost 4 vmrghw <6,1,7,1>, <4,0,5,1> + 3785777972U, // <6,4,1,1>: Cost 4 vsldoi8 <4,5,6,4>, <1,1,1,1> + 3846196170U, // <6,4,1,2>: Cost 4 vsldoi12 <3,4,5,6>, <4,1,2,3> + 3325365380U, // <6,4,1,3>: Cost 4 vmrghw <6,1,7,2>, <4,3,5,0> + 3852168155U, // <6,4,1,4>: Cost 4 vsldoi12 <4,4,5,6>, <4,1,4,2> + 2251615542U, // <6,4,1,5>: Cost 3 vmrghw <6,1,7,1>, RHS + 3325357432U, // <6,4,1,6>: Cost 4 vmrghw <6,1,7,1>, <4,6,5,1> + 3870084088U, // <6,4,1,7>: Cost 4 vsldoi12 <7,4,5,6>, <4,1,7,4> + 2251615785U, // <6,4,1,u>: Cost 3 vmrghw <6,1,7,1>, RHS + 2252295058U, // <6,4,2,0>: Cost 3 vmrghw <6,2,7,3>, <4,0,5,1> + 3771180605U, // <6,4,2,1>: Cost 4 vsldoi8 <2,1,6,4>, <2,1,6,4> + 3785778792U, // <6,4,2,2>: Cost 4 vsldoi8 <4,5,6,4>, <2,2,2,2> + 3777816253U, // <6,4,2,3>: Cost 4 vsldoi8 <3,2,6,4>, <2,3,2,6> + 2252295376U, // <6,4,2,4>: Cost 3 vmrghw <6,2,7,3>, <4,4,4,4> + 1178553654U, // <6,4,2,5>: Cost 2 vmrghw <6,2,7,3>, RHS + 2252295545U, // <6,4,2,6>: Cost 3 vmrghw <6,2,7,3>, <4,6,5,2> + 3326037448U, // <6,4,2,7>: Cost 4 vmrghw <6,2,7,3>, <4,7,5,0> + 1178553897U, // <6,4,2,u>: Cost 2 vmrghw <6,2,7,3>, RHS + 3785779350U, // <6,4,3,0>: Cost 4 vsldoi8 <4,5,6,4>, <3,0,1,2> + 3383118648U, // <6,4,3,1>: Cost 4 vmrglw <4,5,6,3>, <3,u,4,1> + 3777816935U, // <6,4,3,2>: Cost 4 vsldoi8 <3,2,6,4>, <3,2,6,4> + 3785779612U, // <6,4,3,3>: Cost 4 vsldoi8 <4,5,6,4>, <3,3,3,3> + 2712037890U, // <6,4,3,4>: Cost 3 vsldoi8 <4,5,6,4>, <3,4,5,6> + 2252754230U, // <6,4,3,5>: Cost 3 vmrghw <6,3,4,5>, RHS + 3784452764U, // <6,4,3,6>: Cost 4 vsldoi8 <4,3,6,4>, <3,6,4,7> + 3801705178U, // <6,4,3,7>: Cost 4 vsldoi8 <7,2,6,4>, <3,7,2,6> + 2252754473U, // <6,4,3,u>: Cost 3 vmrghw <6,3,4,5>, RHS + 3787770770U, // <6,4,4,0>: Cost 4 vsldoi8 <4,u,6,4>, <4,0,5,1> + 3383126840U, // <6,4,4,1>: Cost 4 vmrglw <4,5,6,4>, <3,u,4,1> + 3327380534U, // <6,4,4,2>: Cost 4 vmrghw <6,4,7,5>, <4,2,5,3> + 3784453265U, // <6,4,4,3>: Cost 4 vsldoi8 <4,3,6,4>, <4,3,6,4> + 2253630672U, // <6,4,4,4>: Cost 3 vmrghw <6,4,7,4>, <4,4,4,4> + 2778426587U, // <6,4,4,5>: Cost 3 vsldoi12 <4,4,5,6>, <4,4,5,6> + 3383128789U, // <6,4,4,6>: Cost 4 vmrglw <4,5,6,4>, <6,5,4,6> + 3381799580U, // <6,4,4,7>: Cost 4 vmrglw <4,3,6,4>, <3,6,4,7> + 2778647798U, // <6,4,4,u>: Cost 3 vsldoi12 <4,4,u,6>, <4,4,u,6> + 2651422822U, // <6,4,5,0>: Cost 3 vsldoi4 <5,6,4,5>, LHS + 3701277928U, // <6,4,5,1>: Cost 4 vsldoi4 <1,6,4,5>, <1,6,4,5> + 3701278650U, // <6,4,5,2>: Cost 4 vsldoi4 <1,6,4,5>, <2,6,3,7> + 2651425282U, // <6,4,5,3>: Cost 3 vsldoi4 <5,6,4,5>, <3,4,5,6> + 2651426102U, // <6,4,5,4>: Cost 3 vsldoi4 <5,6,4,5>, RHS + 2651426892U, // <6,4,5,5>: Cost 3 vsldoi4 <5,6,4,5>, <5,6,4,5> + 1698712886U, // <6,4,5,6>: Cost 2 vsldoi12 <3,4,5,6>, RHS + 3725169658U, // <6,4,5,7>: Cost 4 vsldoi4 <5,6,4,5>, <7,0,1,2> + 1698712904U, // <6,4,5,u>: Cost 2 vsldoi12 <3,4,5,6>, RHS + 2254900114U, // <6,4,6,0>: Cost 3 vmrghw <6,6,6,6>, <4,0,5,1> + 3389115192U, // <6,4,6,1>: Cost 4 vmrglw <5,5,6,6>, <3,u,4,1> + 3785781727U, // <6,4,6,2>: Cost 4 vsldoi8 <4,5,6,4>, <6,2,4,3> + 3785781810U, // <6,4,6,3>: Cost 4 vsldoi8 <4,5,6,4>, <6,3,4,5> + 2254900432U, // <6,4,6,4>: Cost 3 vmrghw <6,6,6,6>, <4,4,4,4> + 1181158710U, // <6,4,6,5>: Cost 2 vmrghw <6,6,6,6>, RHS + 2254900605U, // <6,4,6,6>: Cost 3 vmrghw <6,6,6,6>, <4,6,5,6> + 3787772750U, // <6,4,6,7>: Cost 4 vsldoi8 <4,u,6,4>, <6,7,0,1> + 1181158953U, // <6,4,6,u>: Cost 2 vmrghw <6,6,6,6>, RHS + 2639495270U, // <6,4,7,0>: Cost 3 vsldoi4 <3,6,4,7>, LHS + 2639496090U, // <6,4,7,1>: Cost 3 vsldoi4 <3,6,4,7>, <1,2,3,4> + 3707267011U, // <6,4,7,2>: Cost 4 vsldoi4 <2,6,4,7>, <2,6,4,7> + 2639497884U, // <6,4,7,3>: Cost 3 vsldoi4 <3,6,4,7>, <3,6,4,7> + 1237658832U, // <6,4,7,4>: Cost 2 vmrglw RHS, <4,4,4,4> + 1235666638U, // <6,4,7,5>: Cost 2 vmrglw RHS, <2,3,4,5> + 3713241753U, // <6,4,7,6>: Cost 4 vsldoi4 <3,6,4,7>, <6,4,7,0> + 2309409436U, // <6,4,7,7>: Cost 3 vmrglw RHS, <3,6,4,7> + 1235666641U, // <6,4,7,u>: Cost 2 vmrglw RHS, <2,3,4,u> + 2639503462U, // <6,4,u,0>: Cost 3 vsldoi4 <3,6,4,u>, LHS + 2639504282U, // <6,4,u,1>: Cost 3 vsldoi4 <3,6,4,u>, <1,2,3,4> + 3701303226U, // <6,4,u,2>: Cost 4 vsldoi4 <1,6,4,u>, <2,6,3,7> + 2639506077U, // <6,4,u,3>: Cost 3 vsldoi4 <3,6,4,u>, <3,6,4,u> + 1235676368U, // <6,4,u,4>: Cost 2 vmrglw RHS, <4,4,4,4> + 1235674830U, // <6,4,u,5>: Cost 2 vmrglw RHS, <2,3,4,5> + 1698713129U, // <6,4,u,6>: Cost 2 vsldoi12 <3,4,5,6>, RHS + 2309417628U, // <6,4,u,7>: Cost 3 vmrglw RHS, <3,6,4,7> + 1698713147U, // <6,4,u,u>: Cost 2 vsldoi12 <3,4,5,6>, RHS + 3775832064U, // <6,5,0,0>: Cost 4 vsldoi8 <2,u,6,5>, <0,0,0,0> + 2702090342U, // <6,5,0,1>: Cost 3 vsldoi8 <2,u,6,5>, LHS + 3775832241U, // <6,5,0,2>: Cost 4 vsldoi8 <2,u,6,5>, <0,2,1,6> + 3719227906U, // <6,5,0,3>: Cost 4 vsldoi4 <4,6,5,0>, <3,4,5,6> + 3775832402U, // <6,5,0,4>: Cost 4 vsldoi8 <2,u,6,5>, <0,4,1,5> + 3385085146U, // <6,5,0,5>: Cost 4 vmrglw <4,u,6,0>, <4,4,5,5> + 2309351938U, // <6,5,0,6>: Cost 3 vmrglw <4,5,6,0>, <3,4,5,6> + 3376459134U, // <6,5,0,7>: Cost 5 vmrglw <3,4,6,0>, <4,6,5,7> + 2702090909U, // <6,5,0,u>: Cost 3 vsldoi8 <2,u,6,5>, LHS + 3719233546U, // <6,5,1,0>: Cost 4 vsldoi4 <4,6,5,1>, <0,0,1,1> + 3775832884U, // <6,5,1,1>: Cost 4 vsldoi8 <2,u,6,5>, <1,1,1,1> + 3775832982U, // <6,5,1,2>: Cost 4 vsldoi8 <2,u,6,5>, <1,2,3,0> + 3846196909U, // <6,5,1,3>: Cost 4 vsldoi12 <3,4,5,6>, <5,1,3,4> + 3719236984U, // <6,5,1,4>: Cost 4 vsldoi4 <4,6,5,1>, <4,6,5,1> + 3856150209U, // <6,5,1,5>: Cost 4 vsldoi12 <5,1,5,6>, <5,1,5,6> + 3834252997U, // <6,5,1,6>: Cost 4 vsldoi12 <1,4,5,6>, <5,1,6,1> + 3870084817U, // <6,5,1,7>: Cost 4 vsldoi12 <7,4,5,6>, <5,1,7,4> + 3769861532U, // <6,5,1,u>: Cost 4 vsldoi8 <1,u,6,5>, <1,u,6,5> + 2645500006U, // <6,5,2,0>: Cost 3 vsldoi4 <4,6,5,2>, LHS + 3719242548U, // <6,5,2,1>: Cost 4 vsldoi4 <4,6,5,2>, <1,1,1,1> + 3775833704U, // <6,5,2,2>: Cost 4 vsldoi8 <2,u,6,5>, <2,2,2,2> + 3775833766U, // <6,5,2,3>: Cost 4 vsldoi8 <2,u,6,5>, <2,3,0,1> + 2645503353U, // <6,5,2,4>: Cost 3 vsldoi4 <4,6,5,2>, <4,6,5,2> + 2252296196U, // <6,5,2,5>: Cost 3 vmrghw <6,2,7,3>, <5,5,5,5> + 2702092218U, // <6,5,2,6>: Cost 3 vsldoi8 <2,u,6,5>, <2,6,3,7> + 3719246842U, // <6,5,2,7>: Cost 4 vsldoi4 <4,6,5,2>, <7,0,1,2> + 2702092405U, // <6,5,2,u>: Cost 3 vsldoi8 <2,u,6,5>, <2,u,6,5> + 3775834262U, // <6,5,3,0>: Cost 4 vsldoi8 <2,u,6,5>, <3,0,1,2> + 3777161495U, // <6,5,3,1>: Cost 4 vsldoi8 <3,1,6,5>, <3,1,6,5> + 3775834470U, // <6,5,3,2>: Cost 4 vsldoi8 <2,u,6,5>, <3,2,6,3> + 3775834524U, // <6,5,3,3>: Cost 4 vsldoi8 <2,u,6,5>, <3,3,3,3> + 3775834626U, // <6,5,3,4>: Cost 4 vsldoi8 <2,u,6,5>, <3,4,5,6> + 3385109722U, // <6,5,3,5>: Cost 4 vmrglw <4,u,6,3>, <4,4,5,5> + 2309376514U, // <6,5,3,6>: Cost 3 vmrglw <4,5,6,3>, <3,4,5,6> + 3775834819U, // <6,5,3,7>: Cost 4 vsldoi8 <2,u,6,5>, <3,7,0,1> + 2309376514U, // <6,5,3,u>: Cost 3 vmrglw <4,5,6,3>, <3,4,5,6> + 3719258214U, // <6,5,4,0>: Cost 4 vsldoi4 <4,6,5,4>, LHS + 3385117586U, // <6,5,4,1>: Cost 4 vmrglw <4,u,6,4>, <4,0,5,1> + 3327242008U, // <6,5,4,2>: Cost 4 vmrghw <6,4,5,6>, <5,2,6,3> + 3719260674U, // <6,5,4,3>: Cost 4 vsldoi4 <4,6,5,4>, <3,4,5,6> + 3719261563U, // <6,5,4,4>: Cost 4 vsldoi4 <4,6,5,4>, <4,6,5,4> + 2702093622U, // <6,5,4,5>: Cost 3 vsldoi8 <2,u,6,5>, RHS + 2309384706U, // <6,5,4,6>: Cost 3 vmrglw <4,5,6,4>, <3,4,5,6> + 3870085060U, // <6,5,4,7>: Cost 4 vsldoi12 <7,4,5,6>, <5,4,7,4> + 2702093865U, // <6,5,4,u>: Cost 3 vsldoi8 <2,u,6,5>, RHS + 3719266406U, // <6,5,5,0>: Cost 4 vsldoi4 <4,6,5,5>, LHS + 3789106889U, // <6,5,5,1>: Cost 4 vsldoi8 <5,1,6,5>, <5,1,6,5> + 3785789208U, // <6,5,5,2>: Cost 4 vsldoi8 <4,5,6,5>, <5,2,6,3> + 3373183950U, // <6,5,5,3>: Cost 4 vmrglw <2,u,6,5>, <6,u,5,3> + 2717355964U, // <6,5,5,4>: Cost 3 vsldoi8 <5,4,6,5>, <5,4,6,5> + 2791772164U, // <6,5,5,5>: Cost 3 vsldoi12 <6,6,6,6>, <5,5,5,5> + 2772455438U, // <6,5,5,6>: Cost 3 vsldoi12 <3,4,5,6>, <5,5,6,6> + 3373183549U, // <6,5,5,7>: Cost 4 vmrglw <2,u,6,5>, <6,3,5,7> + 2720010496U, // <6,5,5,u>: Cost 3 vsldoi8 <5,u,6,5>, <5,u,6,5> + 2772455460U, // <6,5,6,0>: Cost 3 vsldoi12 <3,4,5,6>, <5,6,0,1> + 2322008978U, // <6,5,6,1>: Cost 3 vmrglw <6,6,6,6>, <4,0,5,1> + 3840225335U, // <6,5,6,2>: Cost 4 vsldoi12 <2,4,5,6>, <5,6,2,2> + 2772455490U, // <6,5,6,3>: Cost 3 vsldoi12 <3,4,5,6>, <5,6,3,4> + 2772455500U, // <6,5,6,4>: Cost 3 vsldoi12 <3,4,5,6>, <5,6,4,5> + 2254901252U, // <6,5,6,5>: Cost 3 vmrghw <6,6,6,6>, <5,5,5,5> + 2772455520U, // <6,5,6,6>: Cost 3 vsldoi12 <3,4,5,6>, <5,6,6,7> + 2785874024U, // <6,5,6,7>: Cost 3 vsldoi12 <5,6,7,6>, <5,6,7,6> + 2772455532U, // <6,5,6,u>: Cost 3 vsldoi12 <3,4,5,6>, <5,6,u,1> + 2627625062U, // <6,5,7,0>: Cost 3 vsldoi4 <1,6,5,7>, LHS + 1235667858U, // <6,5,7,1>: Cost 2 vmrglw RHS, <4,0,5,1> + 2309409278U, // <6,5,7,2>: Cost 3 vmrglw RHS, <3,4,5,2> + 2309407659U, // <6,5,7,3>: Cost 3 vmrglw RHS, <1,2,5,3> + 2627628342U, // <6,5,7,4>: Cost 3 vsldoi4 <1,6,5,7>, RHS + 1235668186U, // <6,5,7,5>: Cost 2 vmrglw RHS, <4,4,5,5> + 1235667458U, // <6,5,7,6>: Cost 2 vmrglw RHS, <3,4,5,6> + 2309407987U, // <6,5,7,7>: Cost 3 vmrglw RHS, <1,6,5,7> + 1235667460U, // <6,5,7,u>: Cost 2 vmrglw RHS, <3,4,5,u> + 2627633254U, // <6,5,u,0>: Cost 3 vsldoi4 <1,6,5,u>, LHS + 1235676050U, // <6,5,u,1>: Cost 2 vmrglw RHS, <4,0,5,1> + 2309417470U, // <6,5,u,2>: Cost 3 vmrglw RHS, <3,4,5,2> + 2309415851U, // <6,5,u,3>: Cost 3 vmrglw RHS, <1,2,5,3> + 2627636534U, // <6,5,u,4>: Cost 3 vsldoi4 <1,6,5,u>, RHS + 1235676378U, // <6,5,u,5>: Cost 2 vmrglw RHS, <4,4,5,5> + 1235675650U, // <6,5,u,6>: Cost 2 vmrglw RHS, <3,4,5,6> + 2309416179U, // <6,5,u,7>: Cost 3 vmrglw RHS, <1,6,5,7> + 1235675652U, // <6,5,u,u>: Cost 2 vmrglw RHS, <3,4,5,u> + 2309352751U, // <6,6,0,0>: Cost 3 vmrglw <4,5,6,0>, <4,5,6,0> + 1650917478U, // <6,6,0,1>: Cost 2 vsldoi8 <6,6,6,6>, LHS + 2250584570U, // <6,6,0,2>: Cost 3 vmrghw <6,0,2,1>, <6,2,7,3> + 3846197554U, // <6,6,0,3>: Cost 4 vsldoi12 <3,4,5,6>, <6,0,3,1> + 2724659538U, // <6,6,0,4>: Cost 3 vsldoi8 <6,6,6,6>, <0,4,1,5> + 3725275225U, // <6,6,0,5>: Cost 4 vsldoi4 <5,6,6,0>, <5,6,6,0> + 2791772493U, // <6,6,0,6>: Cost 3 vsldoi12 <6,6,6,6>, <6,0,6,1> + 2309352758U, // <6,6,0,7>: Cost 3 vmrglw <4,5,6,0>, RHS + 1650918045U, // <6,6,0,u>: Cost 2 vsldoi8 <6,6,6,6>, LHS + 3325358368U, // <6,6,1,0>: Cost 4 vmrghw <6,1,7,1>, <6,0,1,1> + 2299406449U, // <6,6,1,1>: Cost 3 vmrglw <2,u,6,1>, <2,u,6,1> + 2724660118U, // <6,6,1,2>: Cost 3 vsldoi8 <6,6,6,6>, <1,2,3,0> + 3373148518U, // <6,6,1,3>: Cost 4 vmrglw <2,u,6,1>, <3,2,6,3> + 3834253712U, // <6,6,1,4>: Cost 4 vsldoi12 <1,4,5,6>, <6,1,4,5> + 3373147953U, // <6,6,1,5>: Cost 4 vmrglw <2,u,6,1>, <2,4,6,5> + 2323297080U, // <6,6,1,6>: Cost 3 vmrglw <6,u,6,1>, <6,6,6,6> + 2299407670U, // <6,6,1,7>: Cost 3 vmrglw <2,u,6,1>, RHS + 2299407671U, // <6,6,1,u>: Cost 3 vmrglw <2,u,6,1>, RHS + 2252296489U, // <6,6,2,0>: Cost 3 vmrghw <6,2,7,3>, <6,0,2,1> + 3326038394U, // <6,6,2,1>: Cost 4 vmrghw <6,2,7,3>, <6,1,2,1> + 1178554874U, // <6,6,2,2>: Cost 2 vmrghw <6,2,7,3>, <6,2,7,3> + 2724660902U, // <6,6,2,3>: Cost 3 vsldoi8 <6,6,6,6>, <2,3,0,1> + 2252296817U, // <6,6,2,4>: Cost 3 vmrghw <6,2,7,3>, <6,4,2,5> + 3840741864U, // <6,6,2,5>: Cost 4 vsldoi12 <2,5,3,6>, <6,2,5,3> + 2252296976U, // <6,6,2,6>: Cost 3 vmrghw <6,2,7,3>, <6,6,2,2> + 2785874426U, // <6,6,2,7>: Cost 3 vsldoi12 <5,6,7,6>, <6,2,7,3> + 1178554874U, // <6,6,2,u>: Cost 2 vmrghw <6,2,7,3>, <6,2,7,3> + 2724661398U, // <6,6,3,0>: Cost 3 vsldoi8 <6,6,6,6>, <3,0,1,2> + 3375154665U, // <6,6,3,1>: Cost 4 vmrglw <3,2,6,3>, <2,0,6,1> + 3375154909U, // <6,6,3,2>: Cost 4 vmrglw <3,2,6,3>, <2,3,6,2> + 2301413734U, // <6,6,3,3>: Cost 3 vmrglw <3,2,6,3>, <3,2,6,3> + 2772455986U, // <6,6,3,4>: Cost 3 vsldoi12 <3,4,5,6>, <6,3,4,5> + 3375154993U, // <6,6,3,5>: Cost 4 vmrglw <3,2,6,3>, <2,4,6,5> + 2323313464U, // <6,6,3,6>: Cost 3 vmrglw <6,u,6,3>, <6,6,6,6> + 2301414710U, // <6,6,3,7>: Cost 3 vmrglw <3,2,6,3>, RHS + 2301414711U, // <6,6,3,u>: Cost 3 vmrglw <3,2,6,3>, RHS + 2724662162U, // <6,6,4,0>: Cost 3 vsldoi8 <6,6,6,6>, <4,0,5,1> + 3326939559U, // <6,6,4,1>: Cost 4 vmrghw <6,4,1,5>, <6,1,7,1> + 2253271546U, // <6,6,4,2>: Cost 3 vmrghw <6,4,2,5>, <6,2,7,3> + 3383127346U, // <6,6,4,3>: Cost 4 vmrglw <4,5,6,4>, <4,5,6,3> + 2309385523U, // <6,6,4,4>: Cost 3 vmrglw <4,5,6,4>, <4,5,6,4> + 1650920758U, // <6,6,4,5>: Cost 2 vsldoi8 <6,6,6,6>, RHS + 2724662653U, // <6,6,4,6>: Cost 3 vsldoi8 <6,6,6,6>, <4,6,5,6> + 2309385526U, // <6,6,4,7>: Cost 3 vmrglw <4,5,6,4>, RHS + 1650921001U, // <6,6,4,u>: Cost 2 vsldoi8 <6,6,6,6>, RHS + 3725312102U, // <6,6,5,0>: Cost 4 vsldoi4 <5,6,6,5>, LHS + 3373180393U, // <6,6,5,1>: Cost 4 vmrglw <2,u,6,5>, <2,0,6,1> + 3791769368U, // <6,6,5,2>: Cost 4 vsldoi8 <5,5,6,6>, <5,2,6,3> + 3373181286U, // <6,6,5,3>: Cost 4 vmrglw <2,u,6,5>, <3,2,6,3> + 3725315382U, // <6,6,5,4>: Cost 4 vsldoi4 <5,6,6,5>, RHS + 2299439221U, // <6,6,5,5>: Cost 3 vmrglw <2,u,6,5>, <2,u,6,5> + 2724663394U, // <6,6,5,6>: Cost 3 vsldoi8 <6,6,6,6>, <5,6,7,0> + 2299440438U, // <6,6,5,7>: Cost 3 vmrglw <2,u,6,5>, RHS + 2299440439U, // <6,6,5,u>: Cost 3 vmrglw <2,u,6,5>, RHS + 1583808614U, // <6,6,6,0>: Cost 2 vsldoi4 <6,6,6,6>, LHS + 2322010445U, // <6,6,6,1>: Cost 3 vmrglw <6,6,6,6>, <6,0,6,1> + 2254574074U, // <6,6,6,2>: Cost 3 vmrghw <6,6,2,2>, <6,2,7,3> + 2322010609U, // <6,6,6,3>: Cost 3 vmrglw <6,6,6,6>, <6,2,6,3> + 1583811894U, // <6,6,6,4>: Cost 2 vsldoi4 <6,6,6,6>, RHS + 2322010773U, // <6,6,6,5>: Cost 3 vmrglw <6,6,6,6>, <6,4,6,5> + 363253046U, // <6,6,6,6>: Cost 1 vspltisw2 RHS + 1248267574U, // <6,6,6,7>: Cost 2 vmrglw <6,6,6,6>, RHS + 363253046U, // <6,6,6,u>: Cost 1 vspltisw2 RHS + 2309410095U, // <6,6,7,0>: Cost 3 vmrglw RHS, <4,5,6,0> + 2309408233U, // <6,6,7,1>: Cost 3 vmrglw RHS, <2,0,6,1> + 2311402373U, // <6,6,7,2>: Cost 3 vmrglw RHS, <6,7,6,2> + 2309409126U, // <6,6,7,3>: Cost 3 vmrglw RHS, <3,2,6,3> + 2309410099U, // <6,6,7,4>: Cost 3 vmrglw RHS, <4,5,6,4> + 2309408561U, // <6,6,7,5>: Cost 3 vmrglw RHS, <2,4,6,5> + 1237660472U, // <6,6,7,6>: Cost 2 vmrglw RHS, <6,6,6,6> + 161926454U, // <6,6,7,7>: Cost 1 vmrglw RHS, RHS + 161926455U, // <6,6,7,u>: Cost 1 vmrglw RHS, RHS + 1583808614U, // <6,6,u,0>: Cost 2 vsldoi4 <6,6,6,6>, LHS + 1650923310U, // <6,6,u,1>: Cost 2 vsldoi8 <6,6,6,6>, LHS + 1178554874U, // <6,6,u,2>: Cost 2 vmrghw <6,2,7,3>, <6,2,7,3> + 2309417318U, // <6,6,u,3>: Cost 3 vmrglw RHS, <3,2,6,3> + 1583811894U, // <6,6,u,4>: Cost 2 vsldoi4 <6,6,6,6>, RHS + 1650923674U, // <6,6,u,5>: Cost 2 vsldoi8 <6,6,6,6>, RHS + 363253046U, // <6,6,u,6>: Cost 1 vspltisw2 RHS + 161934646U, // <6,6,u,7>: Cost 1 vmrglw RHS, RHS + 161934647U, // <6,6,u,u>: Cost 1 vmrglw RHS, RHS + 1638318080U, // <6,7,0,0>: Cost 2 vsldoi8 RHS, <0,0,0,0> + 564576358U, // <6,7,0,1>: Cost 1 vsldoi8 RHS, LHS + 2712060077U, // <6,7,0,2>: Cost 3 vsldoi8 RHS, <0,2,1,2> + 2712060156U, // <6,7,0,3>: Cost 3 vsldoi8 RHS, <0,3,1,0> + 1638318418U, // <6,7,0,4>: Cost 2 vsldoi8 RHS, <0,4,1,5> + 1577865314U, // <6,7,0,5>: Cost 2 vsldoi4 <5,6,7,0>, <5,6,7,0> + 2712060406U, // <6,7,0,6>: Cost 3 vsldoi8 RHS, <0,6,1,7> + 2651608058U, // <6,7,0,7>: Cost 3 vsldoi4 <5,6,7,0>, <7,0,1,2> + 564576925U, // <6,7,0,u>: Cost 1 vsldoi8 RHS, LHS + 2712060643U, // <6,7,1,0>: Cost 3 vsldoi8 RHS, <1,0,1,1> + 1638318900U, // <6,7,1,1>: Cost 2 vsldoi8 RHS, <1,1,1,1> + 1638318998U, // <6,7,1,2>: Cost 2 vsldoi8 RHS, <1,2,3,0> + 3766559753U, // <6,7,1,3>: Cost 4 vsldoi8 <1,3,6,7>, <1,3,6,7> + 2712060971U, // <6,7,1,4>: Cost 3 vsldoi8 RHS, <1,4,1,5> + 2712061039U, // <6,7,1,5>: Cost 3 vsldoi8 RHS, <1,5,0,1> + 2712061135U, // <6,7,1,6>: Cost 3 vsldoi8 RHS, <1,6,1,7> + 3373148612U, // <6,7,1,7>: Cost 4 vmrglw <2,u,6,1>, <3,3,7,7> + 1638319484U, // <6,7,1,u>: Cost 2 vsldoi8 RHS, <1,u,3,0> + 2712061373U, // <6,7,2,0>: Cost 3 vsldoi8 RHS, <2,0,1,2> + 2712061471U, // <6,7,2,1>: Cost 3 vsldoi8 RHS, <2,1,3,1> + 1638319720U, // <6,7,2,2>: Cost 2 vsldoi8 RHS, <2,2,2,2> + 1638319782U, // <6,7,2,3>: Cost 2 vsldoi8 RHS, <2,3,0,1> + 2712061709U, // <6,7,2,4>: Cost 3 vsldoi8 RHS, <2,4,2,5> + 2712061800U, // <6,7,2,5>: Cost 3 vsldoi8 RHS, <2,5,3,6> + 1638320058U, // <6,7,2,6>: Cost 2 vsldoi8 RHS, <2,6,3,7> + 2252297836U, // <6,7,2,7>: Cost 3 vmrghw <6,2,7,3>, <7,7,7,7> + 1638320187U, // <6,7,2,u>: Cost 2 vsldoi8 RHS, <2,u,0,1> + 1638320278U, // <6,7,3,0>: Cost 2 vsldoi8 RHS, <3,0,1,2> + 2712062182U, // <6,7,3,1>: Cost 3 vsldoi8 RHS, <3,1,1,1> + 2712062256U, // <6,7,3,2>: Cost 3 vsldoi8 RHS, <3,2,0,3> + 1638320540U, // <6,7,3,3>: Cost 2 vsldoi8 RHS, <3,3,3,3> + 1638320642U, // <6,7,3,4>: Cost 2 vsldoi8 RHS, <3,4,5,6> + 2712062546U, // <6,7,3,5>: Cost 3 vsldoi8 RHS, <3,5,5,5> + 2712062584U, // <6,7,3,6>: Cost 3 vsldoi8 RHS, <3,6,0,7> + 2712062659U, // <6,7,3,7>: Cost 3 vsldoi8 RHS, <3,7,0,1> + 1638320926U, // <6,7,3,u>: Cost 2 vsldoi8 RHS, <3,u,1,2> + 1638321042U, // <6,7,4,0>: Cost 2 vsldoi8 RHS, <4,0,5,1> + 2712062922U, // <6,7,4,1>: Cost 3 vsldoi8 RHS, <4,1,2,3> + 2712063029U, // <6,7,4,2>: Cost 3 vsldoi8 RHS, <4,2,5,2> + 2712063108U, // <6,7,4,3>: Cost 3 vsldoi8 RHS, <4,3,5,0> + 1638321360U, // <6,7,4,4>: Cost 2 vsldoi8 RHS, <4,4,4,4> + 564579638U, // <6,7,4,5>: Cost 1 vsldoi8 RHS, RHS + 2712063357U, // <6,7,4,6>: Cost 3 vsldoi8 RHS, <4,6,5,6> + 2712063439U, // <6,7,4,7>: Cost 3 vsldoi8 RHS, <4,7,5,7> + 564579881U, // <6,7,4,u>: Cost 1 vsldoi8 RHS, RHS + 2712063560U, // <6,7,5,0>: Cost 3 vsldoi8 RHS, <5,0,1,2> + 2714054287U, // <6,7,5,1>: Cost 3 vsldoi8 RHS, <5,1,0,1> + 2712063742U, // <6,7,5,2>: Cost 3 vsldoi8 RHS, <5,2,3,4> + 3373181295U, // <6,7,5,3>: Cost 4 vmrglw <2,u,6,5>, <3,2,7,3> + 2712063924U, // <6,7,5,4>: Cost 3 vsldoi8 RHS, <5,4,5,6> + 1638322180U, // <6,7,5,5>: Cost 2 vsldoi8 RHS, <5,5,5,5> + 1638322274U, // <6,7,5,6>: Cost 2 vsldoi8 RHS, <5,6,7,0> + 3373181380U, // <6,7,5,7>: Cost 4 vmrglw <2,u,6,5>, <3,3,7,7> + 1640313092U, // <6,7,5,u>: Cost 2 vsldoi8 RHS, <5,u,7,0> + 2712064289U, // <6,7,6,0>: Cost 3 vsldoi8 RHS, <6,0,1,2> + 2712064423U, // <6,7,6,1>: Cost 3 vsldoi8 RHS, <6,1,7,1> + 1638322682U, // <6,7,6,2>: Cost 2 vsldoi8 RHS, <6,2,7,3> + 2712064562U, // <6,7,6,3>: Cost 3 vsldoi8 RHS, <6,3,4,5> + 2712064653U, // <6,7,6,4>: Cost 3 vsldoi8 RHS, <6,4,5,6> + 2712064747U, // <6,7,6,5>: Cost 3 vsldoi8 RHS, <6,5,7,1> + 1638323000U, // <6,7,6,6>: Cost 2 vsldoi8 RHS, <6,6,6,6> + 1638323022U, // <6,7,6,7>: Cost 2 vsldoi8 RHS, <6,7,0,1> + 1638323168U, // <6,7,6,u>: Cost 2 vsldoi8 RHS, <6,u,7,3> + 1237659746U, // <6,7,7,0>: Cost 2 vmrglw RHS, <5,6,7,0> + 2309411158U, // <6,7,7,1>: Cost 3 vmrglw RHS, <6,0,7,1> + 2639718330U, // <6,7,7,2>: Cost 3 vsldoi4 <3,6,7,7>, <2,6,3,7> + 1235669498U, // <6,7,7,3>: Cost 2 vmrglw RHS, <6,2,7,3> + 1237659750U, // <6,7,7,4>: Cost 2 vmrglw RHS, <5,6,7,4> + 2309411243U, // <6,7,7,5>: Cost 3 vmrglw RHS, <6,1,7,5> + 1583895362U, // <6,7,7,6>: Cost 2 vsldoi4 <6,6,7,7>, <6,6,7,7> + 1235669826U, // <6,7,7,7>: Cost 2 vmrglw RHS, <6,6,7,7> + 1235669503U, // <6,7,7,u>: Cost 2 vmrglw RHS, <6,2,7,u> + 1638323923U, // <6,7,u,0>: Cost 2 vsldoi8 RHS, + 564582190U, // <6,7,u,1>: Cost 1 vsldoi8 RHS, LHS + 1638324101U, // <6,7,u,2>: Cost 2 vsldoi8 RHS, + 1638324156U, // <6,7,u,3>: Cost 2 vsldoi8 RHS, + 1638324287U, // <6,7,u,4>: Cost 2 vsldoi8 RHS, + 564582554U, // <6,7,u,5>: Cost 1 vsldoi8 RHS, RHS + 1638324432U, // <6,7,u,6>: Cost 2 vsldoi8 RHS, + 1235678018U, // <6,7,u,7>: Cost 2 vmrglw RHS, <6,6,7,7> + 564582757U, // <6,7,u,u>: Cost 1 vsldoi8 RHS, LHS + 1638326272U, // <6,u,0,0>: Cost 2 vsldoi8 RHS, <0,0,0,0> + 564584550U, // <6,u,0,1>: Cost 1 vsldoi8 RHS, LHS + 2712068269U, // <6,u,0,2>: Cost 3 vsldoi8 RHS, <0,2,1,2> + 2309349532U, // <6,u,0,3>: Cost 3 vmrglw <4,5,6,0>, LHS + 1638326610U, // <6,u,0,4>: Cost 2 vsldoi8 RHS, <0,4,1,5> + 1577939051U, // <6,u,0,5>: Cost 2 vsldoi4 <5,6,u,0>, <5,6,u,0> + 2712068598U, // <6,u,0,6>: Cost 3 vsldoi8 RHS, <0,6,1,7> + 2309352776U, // <6,u,0,7>: Cost 3 vmrglw <4,5,6,0>, RHS + 564585117U, // <6,u,0,u>: Cost 1 vsldoi8 RHS, LHS + 2712068835U, // <6,u,1,0>: Cost 3 vsldoi8 RHS, <1,0,1,1> + 1638327092U, // <6,u,1,1>: Cost 2 vsldoi8 RHS, <1,1,1,1> + 1698715438U, // <6,u,1,2>: Cost 2 vsldoi12 <3,4,5,6>, LHS + 2299404444U, // <6,u,1,3>: Cost 3 vmrglw <2,u,6,1>, LHS + 2712069163U, // <6,u,1,4>: Cost 3 vsldoi8 RHS, <1,4,1,5> + 2712069231U, // <6,u,1,5>: Cost 3 vsldoi8 RHS, <1,5,0,1> + 2712069327U, // <6,u,1,6>: Cost 3 vsldoi8 RHS, <1,6,1,7> + 2299407688U, // <6,u,1,7>: Cost 3 vmrglw <2,u,6,1>, RHS + 1698715492U, // <6,u,1,u>: Cost 2 vsldoi12 <3,4,5,6>, LHS + 2712069565U, // <6,u,2,0>: Cost 3 vsldoi8 RHS, <2,0,1,2> + 1178556206U, // <6,u,2,1>: Cost 2 vmrghw <6,2,7,3>, LHS + 1638327912U, // <6,u,2,2>: Cost 2 vsldoi8 RHS, <2,2,2,2> + 1638327974U, // <6,u,2,3>: Cost 2 vsldoi8 RHS, <2,3,0,1> + 2712069901U, // <6,u,2,4>: Cost 3 vsldoi8 RHS, <2,4,2,5> + 1178556570U, // <6,u,2,5>: Cost 2 vmrghw <6,2,7,3>, RHS + 1638328250U, // <6,u,2,6>: Cost 2 vsldoi8 RHS, <2,6,3,7> + 2252298496U, // <6,u,2,7>: Cost 3 vmrghw <6,2,7,3>, + 1638328379U, // <6,u,2,u>: Cost 2 vsldoi8 RHS, <2,u,0,1> + 1638328470U, // <6,u,3,0>: Cost 2 vsldoi8 RHS, <3,0,1,2> + 2712070374U, // <6,u,3,1>: Cost 3 vsldoi8 RHS, <3,1,1,1> + 2704107883U, // <6,u,3,2>: Cost 3 vsldoi8 <3,2,6,u>, <3,2,6,u> + 1638328732U, // <6,u,3,3>: Cost 2 vsldoi8 RHS, <3,3,3,3> + 1638328834U, // <6,u,3,4>: Cost 2 vsldoi8 RHS, <3,4,5,6> + 2712070738U, // <6,u,3,5>: Cost 3 vsldoi8 RHS, <3,5,5,5> + 2712070776U, // <6,u,3,6>: Cost 3 vsldoi8 RHS, <3,6,0,7> + 2301414728U, // <6,u,3,7>: Cost 3 vmrglw <3,2,6,3>, RHS + 1638329118U, // <6,u,3,u>: Cost 2 vsldoi8 RHS, <3,u,1,2> + 1638329234U, // <6,u,4,0>: Cost 2 vsldoi8 RHS, <4,0,5,1> + 2712071114U, // <6,u,4,1>: Cost 3 vsldoi8 RHS, <4,1,2,3> + 2712071221U, // <6,u,4,2>: Cost 3 vsldoi8 RHS, <4,2,5,2> + 2309382300U, // <6,u,4,3>: Cost 3 vmrglw <4,5,6,4>, LHS + 1638329552U, // <6,u,4,4>: Cost 2 vsldoi8 RHS, <4,4,4,4> + 564587831U, // <6,u,4,5>: Cost 1 vsldoi8 RHS, RHS + 2712071545U, // <6,u,4,6>: Cost 3 vsldoi8 RHS, <4,6,5,2> + 2309385544U, // <6,u,4,7>: Cost 3 vmrglw <4,5,6,4>, RHS + 564588073U, // <6,u,4,u>: Cost 1 vsldoi8 RHS, RHS + 2712071752U, // <6,u,5,0>: Cost 3 vsldoi8 RHS, <5,0,1,2> + 2714062479U, // <6,u,5,1>: Cost 3 vsldoi8 RHS, <5,1,0,1> + 2712071934U, // <6,u,5,2>: Cost 3 vsldoi8 RHS, <5,2,3,4> + 2299437212U, // <6,u,5,3>: Cost 3 vmrglw <2,u,6,5>, LHS + 2712072116U, // <6,u,5,4>: Cost 3 vsldoi8 RHS, <5,4,5,6> + 1638330372U, // <6,u,5,5>: Cost 2 vsldoi8 RHS, <5,5,5,5> + 1698715802U, // <6,u,5,6>: Cost 2 vsldoi12 <3,4,5,6>, RHS + 2299440456U, // <6,u,5,7>: Cost 3 vmrglw <2,u,6,5>, RHS + 1698715820U, // <6,u,5,u>: Cost 2 vsldoi12 <3,4,5,6>, RHS + 1583808614U, // <6,u,6,0>: Cost 2 vsldoi4 <6,6,6,6>, LHS + 1181161262U, // <6,u,6,1>: Cost 2 vmrghw <6,6,6,6>, LHS + 1638330874U, // <6,u,6,2>: Cost 2 vsldoi8 RHS, <6,2,7,3> + 1248264348U, // <6,u,6,3>: Cost 2 vmrglw <6,6,6,6>, LHS + 1583811894U, // <6,u,6,4>: Cost 2 vsldoi4 <6,6,6,6>, RHS + 1181161626U, // <6,u,6,5>: Cost 2 vmrghw <6,6,6,6>, RHS + 363253046U, // <6,u,6,6>: Cost 1 vspltisw2 RHS + 1638331214U, // <6,u,6,7>: Cost 2 vsldoi8 RHS, <6,7,0,1> + 363253046U, // <6,u,6,u>: Cost 1 vspltisw2 RHS + 1560076390U, // <6,u,7,0>: Cost 2 vsldoi4 <2,6,u,7>, LHS + 1235664969U, // <6,u,7,1>: Cost 2 vmrglw RHS, <0,0,u,1> + 1560078311U, // <6,u,7,2>: Cost 2 vsldoi4 <2,6,u,7>, <2,6,u,7> + 161923228U, // <6,u,7,3>: Cost 1 vmrglw RHS, LHS + 1560079670U, // <6,u,7,4>: Cost 2 vsldoi4 <2,6,u,7>, RHS + 1235665297U, // <6,u,7,5>: Cost 2 vmrglw RHS, <0,4,u,5> + 1235667485U, // <6,u,7,6>: Cost 2 vmrglw RHS, <3,4,u,6> + 161926472U, // <6,u,7,7>: Cost 1 vmrglw RHS, RHS + 161923233U, // <6,u,7,u>: Cost 1 vmrglw RHS, LHS + 1560084582U, // <6,u,u,0>: Cost 2 vsldoi4 <2,6,u,u>, LHS + 564590382U, // <6,u,u,1>: Cost 1 vsldoi8 RHS, LHS + 1560086504U, // <6,u,u,2>: Cost 2 vsldoi4 <2,6,u,u>, <2,6,u,u> + 161931420U, // <6,u,u,3>: Cost 1 vmrglw RHS, LHS + 1560087862U, // <6,u,u,4>: Cost 2 vsldoi4 <2,6,u,u>, RHS + 564590746U, // <6,u,u,5>: Cost 1 vsldoi8 RHS, RHS + 363253046U, // <6,u,u,6>: Cost 1 vspltisw2 RHS + 161934664U, // <6,u,u,7>: Cost 1 vmrglw RHS, RHS + 161931425U, // <6,u,u,u>: Cost 1 vmrglw RHS, LHS + 1705426944U, // <7,0,0,0>: Cost 2 vsldoi12 RHS, <0,0,0,0> + 1705426954U, // <7,0,0,1>: Cost 2 vsldoi12 RHS, <0,0,1,1> + 3713550266U, // <7,0,0,2>: Cost 4 vsldoi4 <3,7,0,0>, <2,6,3,7> + 2316063892U, // <7,0,0,3>: Cost 3 vmrglw <5,6,7,0>, <7,2,0,3> + 2779168805U, // <7,0,0,4>: Cost 3 vsldoi12 RHS, <0,0,4,1> + 2663698530U, // <7,0,0,5>: Cost 3 vsldoi4 <7,7,0,0>, <5,6,7,0> + 2657727309U, // <7,0,0,6>: Cost 3 vsldoi4 <6,7,0,0>, <6,7,0,0> + 2316064220U, // <7,0,0,7>: Cost 3 vmrglw <5,6,7,0>, <7,6,0,7> + 1705427017U, // <7,0,0,u>: Cost 2 vsldoi12 RHS, <0,0,u,1> + 1583988838U, // <7,0,1,0>: Cost 2 vsldoi4 <6,7,0,1>, LHS + 2779168859U, // <7,0,1,1>: Cost 3 vsldoi12 RHS, <0,1,1,1> + 631685222U, // <7,0,1,2>: Cost 1 vsldoi12 RHS, LHS + 2639817411U, // <7,0,1,3>: Cost 3 vsldoi4 <3,7,0,1>, <3,7,0,1> + 1583992118U, // <7,0,1,4>: Cost 2 vsldoi4 <6,7,0,1>, RHS + 2657734660U, // <7,0,1,5>: Cost 3 vsldoi4 <6,7,0,1>, <5,5,5,5> + 1583993678U, // <7,0,1,6>: Cost 2 vsldoi4 <6,7,0,1>, <6,7,0,1> + 2657735672U, // <7,0,1,7>: Cost 3 vsldoi4 <6,7,0,1>, <7,0,1,0> + 631685276U, // <7,0,1,u>: Cost 1 vsldoi12 RHS, LHS + 2779168933U, // <7,0,2,0>: Cost 3 vsldoi12 RHS, <0,2,0,3> + 2767667377U, // <7,0,2,1>: Cost 3 vsldoi12 <2,6,3,7>, <0,2,1,6> + 2718713448U, // <7,0,2,2>: Cost 3 vsldoi8 <5,6,7,0>, <2,2,2,2> + 2718713510U, // <7,0,2,3>: Cost 3 vsldoi8 <5,6,7,0>, <2,3,0,1> + 3841409228U, // <7,0,2,4>: Cost 4 vsldoi12 <2,6,3,7>, <0,2,4,6> + 3852910802U, // <7,0,2,5>: Cost 4 vsldoi12 RHS, <0,2,5,3> + 2718713786U, // <7,0,2,6>: Cost 3 vsldoi8 <5,6,7,0>, <2,6,3,7> + 3847160036U, // <7,0,2,7>: Cost 4 vsldoi12 <3,6,0,7>, <0,2,7,3> + 2767667440U, // <7,0,2,u>: Cost 3 vsldoi12 <2,6,3,7>, <0,2,u,6> + 2718714006U, // <7,0,3,0>: Cost 3 vsldoi8 <5,6,7,0>, <3,0,1,2> + 2779169020U, // <7,0,3,1>: Cost 3 vsldoi12 RHS, <0,3,1,0> + 3852910853U, // <7,0,3,2>: Cost 4 vsldoi12 RHS, <0,3,2,0> + 2718714268U, // <7,0,3,3>: Cost 3 vsldoi8 <5,6,7,0>, <3,3,3,3> + 2718714370U, // <7,0,3,4>: Cost 3 vsldoi8 <5,6,7,0>, <3,4,5,6> + 2718714461U, // <7,0,3,5>: Cost 3 vsldoi8 <5,6,7,0>, <3,5,6,7> + 2706770608U, // <7,0,3,6>: Cost 3 vsldoi8 <3,6,7,0>, <3,6,7,0> + 3847160114U, // <7,0,3,7>: Cost 4 vsldoi12 <3,6,0,7>, <0,3,7,0> + 2779169083U, // <7,0,3,u>: Cost 3 vsldoi12 RHS, <0,3,u,0> + 2718714770U, // <7,0,4,0>: Cost 3 vsldoi8 <5,6,7,0>, <4,0,5,1> + 1705427282U, // <7,0,4,1>: Cost 2 vsldoi12 RHS, <0,4,1,5> + 3713583034U, // <7,0,4,2>: Cost 4 vsldoi4 <3,7,0,4>, <2,6,3,7> + 3713583814U, // <7,0,4,3>: Cost 4 vsldoi4 <3,7,0,4>, <3,7,0,4> + 2779169133U, // <7,0,4,4>: Cost 3 vsldoi12 RHS, <0,4,4,5> + 1644973366U, // <7,0,4,5>: Cost 2 vsldoi8 <5,6,7,0>, RHS + 2657760081U, // <7,0,4,6>: Cost 3 vsldoi4 <6,7,0,4>, <6,7,0,4> + 2259468868U, // <7,0,4,7>: Cost 3 vmrghw <7,4,5,6>, <0,7,1,4> + 1705427345U, // <7,0,4,u>: Cost 2 vsldoi12 RHS, <0,4,u,5> + 2718715508U, // <7,0,5,0>: Cost 3 vsldoi8 <5,6,7,0>, <5,0,6,1> + 2260123750U, // <7,0,5,1>: Cost 3 vmrghw <7,5,5,5>, LHS + 3792457451U, // <7,0,5,2>: Cost 4 vsldoi8 <5,6,7,0>, <5,2,1,3> + 3852911024U, // <7,0,5,3>: Cost 4 vsldoi12 RHS, <0,5,3,0> + 2718715836U, // <7,0,5,4>: Cost 3 vsldoi8 <5,6,7,0>, <5,4,6,5> + 2718715908U, // <7,0,5,5>: Cost 3 vsldoi8 <5,6,7,0>, <5,5,5,5> + 1644974178U, // <7,0,5,6>: Cost 2 vsldoi8 <5,6,7,0>, <5,6,7,0> + 3792457853U, // <7,0,5,7>: Cost 4 vsldoi8 <5,6,7,0>, <5,7,1,0> + 1646301444U, // <7,0,5,u>: Cost 2 vsldoi8 <5,u,7,0>, <5,u,7,0> + 2720706901U, // <7,0,6,0>: Cost 3 vsldoi8 <6,0,7,0>, <6,0,7,0> + 2779169270U, // <7,0,6,1>: Cost 3 vsldoi12 RHS, <0,6,1,7> + 2718716410U, // <7,0,6,2>: Cost 3 vsldoi8 <5,6,7,0>, <6,2,7,3> + 2722697800U, // <7,0,6,3>: Cost 3 vsldoi8 <6,3,7,0>, <6,3,7,0> + 3852911121U, // <7,0,6,4>: Cost 4 vsldoi12 RHS, <0,6,4,7> + 3852911130U, // <7,0,6,5>: Cost 4 vsldoi12 RHS, <0,6,5,7> + 2718716728U, // <7,0,6,6>: Cost 3 vsldoi8 <5,6,7,0>, <6,6,6,6> + 2718716750U, // <7,0,6,7>: Cost 3 vsldoi8 <5,6,7,0>, <6,7,0,1> + 2779169333U, // <7,0,6,u>: Cost 3 vsldoi12 RHS, <0,6,u,7> + 2718716922U, // <7,0,7,0>: Cost 3 vsldoi8 <5,6,7,0>, <7,0,1,2> + 1187872870U, // <7,0,7,1>: Cost 2 vmrghw <7,7,7,7>, LHS + 2718717076U, // <7,0,7,2>: Cost 3 vsldoi8 <5,6,7,0>, <7,2,0,3> + 3847160408U, // <7,0,7,3>: Cost 4 vsldoi12 <3,6,0,7>, <0,7,3,6> + 2718717286U, // <7,0,7,4>: Cost 3 vsldoi8 <5,6,7,0>, <7,4,5,6> + 2718717377U, // <7,0,7,5>: Cost 3 vsldoi8 <5,6,7,0>, <7,5,6,7> + 2718717404U, // <7,0,7,6>: Cost 3 vsldoi8 <5,6,7,0>, <7,6,0,7> + 2718717478U, // <7,0,7,7>: Cost 3 vsldoi8 <5,6,7,0>, <7,7,0,0> + 1187873437U, // <7,0,7,u>: Cost 2 vmrghw <7,7,7,7>, LHS + 1584046182U, // <7,0,u,0>: Cost 2 vsldoi4 <6,7,0,u>, LHS + 1705427602U, // <7,0,u,1>: Cost 2 vsldoi12 RHS, <0,u,1,1> + 631685789U, // <7,0,u,2>: Cost 1 vsldoi12 RHS, LHS + 2639874762U, // <7,0,u,3>: Cost 3 vsldoi4 <3,7,0,u>, <3,7,0,u> + 1584049462U, // <7,0,u,4>: Cost 2 vsldoi4 <6,7,0,u>, RHS + 1644976282U, // <7,0,u,5>: Cost 2 vsldoi8 <5,6,7,0>, RHS + 1584051029U, // <7,0,u,6>: Cost 2 vsldoi4 <6,7,0,u>, <6,7,0,u> + 2718718208U, // <7,0,u,7>: Cost 3 vsldoi8 <5,6,7,0>, + 631685843U, // <7,0,u,u>: Cost 1 vsldoi12 RHS, LHS + 2721374218U, // <7,1,0,0>: Cost 3 vsldoi8 <6,1,7,1>, <0,0,1,1> + 2779169507U, // <7,1,0,1>: Cost 3 vsldoi12 RHS, <1,0,1,1> + 2779169516U, // <7,1,0,2>: Cost 3 vsldoi12 RHS, <1,0,2,1> + 3852911348U, // <7,1,0,3>: Cost 4 vsldoi12 RHS, <1,0,3,0> + 2669743414U, // <7,1,0,4>: Cost 3 vsldoi4 , RHS + 2316058962U, // <7,1,0,5>: Cost 3 vmrglw <5,6,7,0>, <0,4,1,5> + 2316059044U, // <7,1,0,6>: Cost 3 vmrglw <5,6,7,0>, <0,5,1,6> + 2669745146U, // <7,1,0,7>: Cost 3 vsldoi4 , <7,0,1,2> + 2779169570U, // <7,1,0,u>: Cost 3 vsldoi12 RHS, <1,0,u,1> + 2779169579U, // <7,1,1,0>: Cost 3 vsldoi12 RHS, <1,1,0,1> + 1705427764U, // <7,1,1,1>: Cost 2 vsldoi12 RHS, <1,1,1,1> + 2779169598U, // <7,1,1,2>: Cost 3 vsldoi12 RHS, <1,1,2,2> + 3713632972U, // <7,1,1,3>: Cost 4 vsldoi4 <3,7,1,1>, <3,7,1,1> + 2779169619U, // <7,1,1,4>: Cost 3 vsldoi12 RHS, <1,1,4,5> + 2779169628U, // <7,1,1,5>: Cost 3 vsldoi12 RHS, <1,1,5,5> + 2657809239U, // <7,1,1,6>: Cost 3 vsldoi4 <6,7,1,1>, <6,7,1,1> + 3835290474U, // <7,1,1,7>: Cost 4 vsldoi12 <1,6,1,7>, <1,1,7,1> + 1705427764U, // <7,1,1,u>: Cost 2 vsldoi12 RHS, <1,1,1,1> + 2779169660U, // <7,1,2,0>: Cost 3 vsldoi12 RHS, <1,2,0,1> + 2779169671U, // <7,1,2,1>: Cost 3 vsldoi12 RHS, <1,2,1,3> + 2779169680U, // <7,1,2,2>: Cost 3 vsldoi12 RHS, <1,2,2,3> + 1705427862U, // <7,1,2,3>: Cost 2 vsldoi12 RHS, <1,2,3,0> + 2779169700U, // <7,1,2,4>: Cost 3 vsldoi12 RHS, <1,2,4,5> + 2779169707U, // <7,1,2,5>: Cost 3 vsldoi12 RHS, <1,2,5,3> + 2657817432U, // <7,1,2,6>: Cost 3 vsldoi4 <6,7,1,2>, <6,7,1,2> + 2803057594U, // <7,1,2,7>: Cost 3 vsldoi12 RHS, <1,2,7,0> + 1705427907U, // <7,1,2,u>: Cost 2 vsldoi12 RHS, <1,2,u,0> + 3776538827U, // <7,1,3,0>: Cost 4 vsldoi8 <3,0,7,1>, <3,0,7,1> + 2319400970U, // <7,1,3,1>: Cost 3 vmrglw <6,2,7,3>, <0,0,1,1> + 2316085398U, // <7,1,3,2>: Cost 3 vmrglw <5,6,7,3>, <3,0,1,2> + 3852911591U, // <7,1,3,3>: Cost 4 vsldoi12 RHS, <1,3,3,0> + 3852911600U, // <7,1,3,4>: Cost 4 vsldoi12 RHS, <1,3,4,0> + 2319401298U, // <7,1,3,5>: Cost 3 vmrglw <6,2,7,3>, <0,4,1,5> + 3833668617U, // <7,1,3,6>: Cost 4 vsldoi12 <1,3,6,7>, <1,3,6,7> + 3367265487U, // <7,1,3,7>: Cost 4 vmrglw <1,u,7,3>, <1,6,1,7> + 2319400977U, // <7,1,3,u>: Cost 3 vmrglw <6,2,7,3>, <0,0,1,u> + 2724031378U, // <7,1,4,0>: Cost 3 vsldoi8 <6,5,7,1>, <4,0,5,1> + 2779169835U, // <7,1,4,1>: Cost 3 vsldoi12 RHS, <1,4,1,5> + 2779169844U, // <7,1,4,2>: Cost 3 vsldoi12 RHS, <1,4,2,5> + 3852911672U, // <7,1,4,3>: Cost 4 vsldoi12 RHS, <1,4,3,0> + 2669776182U, // <7,1,4,4>: Cost 3 vsldoi4 , RHS + 2779169872U, // <7,1,4,5>: Cost 3 vsldoi12 RHS, <1,4,5,6> + 3835290712U, // <7,1,4,6>: Cost 4 vsldoi12 <1,6,1,7>, <1,4,6,5> + 2669778278U, // <7,1,4,7>: Cost 3 vsldoi4 , <7,4,5,6> + 2779169898U, // <7,1,4,u>: Cost 3 vsldoi12 RHS, <1,4,u,5> + 2779169903U, // <7,1,5,0>: Cost 3 vsldoi12 RHS, <1,5,0,1> + 3835585661U, // <7,1,5,1>: Cost 4 vsldoi12 <1,6,5,7>, <1,5,1,6> + 3841410182U, // <7,1,5,2>: Cost 4 vsldoi12 <2,6,3,7>, <1,5,2,6> + 3852911753U, // <7,1,5,3>: Cost 4 vsldoi12 RHS, <1,5,3,0> + 2779169943U, // <7,1,5,4>: Cost 3 vsldoi12 RHS, <1,5,4,5> + 2318754130U, // <7,1,5,5>: Cost 3 vmrglw <6,1,7,5>, <0,4,1,5> + 2718724195U, // <7,1,5,6>: Cost 3 vsldoi8 <5,6,7,1>, <5,6,7,1> + 3859178670U, // <7,1,5,7>: Cost 4 vsldoi12 <5,6,1,7>, <1,5,7,1> + 2779169975U, // <7,1,5,u>: Cost 3 vsldoi12 RHS, <1,5,u,1> + 2720715094U, // <7,1,6,0>: Cost 3 vsldoi8 <6,0,7,1>, <6,0,7,1> + 2761549007U, // <7,1,6,1>: Cost 3 vsldoi12 <1,6,1,7>, <1,6,1,7> + 2779170008U, // <7,1,6,2>: Cost 3 vsldoi12 RHS, <1,6,2,7> + 3835438305U, // <7,1,6,3>: Cost 4 vsldoi12 <1,6,3,7>, <1,6,3,7> + 3835512042U, // <7,1,6,4>: Cost 4 vsldoi12 <1,6,4,7>, <1,6,4,7> + 2761843955U, // <7,1,6,5>: Cost 3 vsldoi12 <1,6,5,7>, <1,6,5,7> + 3835659516U, // <7,1,6,6>: Cost 4 vsldoi12 <1,6,6,7>, <1,6,6,7> + 2803057918U, // <7,1,6,7>: Cost 3 vsldoi12 RHS, <1,6,7,0> + 2762065166U, // <7,1,6,u>: Cost 3 vsldoi12 <1,6,u,7>, <1,6,u,7> + 2669797478U, // <7,1,7,0>: Cost 3 vsldoi4 , LHS + 2322087946U, // <7,1,7,1>: Cost 3 vmrglw <6,6,7,7>, <0,0,1,1> + 2317448186U, // <7,1,7,2>: Cost 3 vmrglw <5,u,7,7>, <7,0,1,2> + 3395829934U, // <7,1,7,3>: Cost 4 vmrglw <6,6,7,7>, <0,2,1,3> + 2669800758U, // <7,1,7,4>: Cost 3 vsldoi4 , RHS + 2322088274U, // <7,1,7,5>: Cost 3 vmrglw <6,6,7,7>, <0,4,1,5> + 3375923377U, // <7,1,7,6>: Cost 4 vmrglw <3,3,7,7>, <0,2,1,6> + 2731996780U, // <7,1,7,7>: Cost 3 vsldoi8 <7,u,7,1>, <7,7,7,7> + 2322087953U, // <7,1,7,u>: Cost 3 vmrglw <6,6,7,7>, <0,0,1,u> + 2779170146U, // <7,1,u,0>: Cost 3 vsldoi12 RHS, <1,u,0,1> + 1705427764U, // <7,1,u,1>: Cost 2 vsldoi12 RHS, <1,1,1,1> + 2779170164U, // <7,1,u,2>: Cost 3 vsldoi12 RHS, <1,u,2,1> + 1705428348U, // <7,1,u,3>: Cost 2 vsldoi12 RHS, <1,u,3,0> + 2779170186U, // <7,1,u,4>: Cost 3 vsldoi12 RHS, <1,u,4,5> + 2763171221U, // <7,1,u,5>: Cost 3 vsldoi12 <1,u,5,7>, <1,u,5,7> + 2657866590U, // <7,1,u,6>: Cost 3 vsldoi4 <6,7,1,u>, <6,7,1,u> + 2803058080U, // <7,1,u,7>: Cost 3 vsldoi12 RHS, <1,u,7,0> + 1705428393U, // <7,1,u,u>: Cost 2 vsldoi12 RHS, <1,u,u,0> + 3713695846U, // <7,2,0,0>: Cost 4 vsldoi4 <3,7,2,0>, LHS + 2779170237U, // <7,2,0,1>: Cost 3 vsldoi12 RHS, <2,0,1,2> + 2779170245U, // <7,2,0,2>: Cost 3 vsldoi12 RHS, <2,0,2,1> + 1242316902U, // <7,2,0,3>: Cost 2 vmrglw <5,6,7,0>, LHS + 3713699126U, // <7,2,0,4>: Cost 4 vsldoi4 <3,7,2,0>, RHS + 3852912096U, // <7,2,0,5>: Cost 4 vsldoi12 RHS, <2,0,5,1> + 2767668713U, // <7,2,0,6>: Cost 3 vsldoi12 <2,6,3,7>, <2,0,6,1> + 2256488426U, // <7,2,0,7>: Cost 3 vmrghw <7,0,1,2>, <2,7,0,1> + 1242316907U, // <7,2,0,u>: Cost 2 vmrglw <5,6,7,0>, LHS + 3852912132U, // <7,2,1,0>: Cost 4 vsldoi12 RHS, <2,1,0,1> + 3852912141U, // <7,2,1,1>: Cost 4 vsldoi12 RHS, <2,1,1,1> + 3852912149U, // <7,2,1,2>: Cost 4 vsldoi12 RHS, <2,1,2,0> + 2779170335U, // <7,2,1,3>: Cost 3 vsldoi12 RHS, <2,1,3,1> + 3852912172U, // <7,2,1,4>: Cost 4 vsldoi12 RHS, <2,1,4,5> + 3840747062U, // <7,2,1,5>: Cost 5 vsldoi12 <2,5,3,7>, <2,1,5,6> + 3841410617U, // <7,2,1,6>: Cost 4 vsldoi12 <2,6,3,7>, <2,1,6,0> + 3795125538U, // <7,2,1,7>: Cost 4 vsldoi8 <6,1,7,2>, <1,7,2,0> + 2779170380U, // <7,2,1,u>: Cost 3 vsldoi12 RHS, <2,1,u,1> + 2779170389U, // <7,2,2,0>: Cost 3 vsldoi12 RHS, <2,2,0,1> + 3852912222U, // <7,2,2,1>: Cost 4 vsldoi12 RHS, <2,2,1,1> + 1705428584U, // <7,2,2,2>: Cost 2 vsldoi12 RHS, <2,2,2,2> + 1705428594U, // <7,2,2,3>: Cost 2 vsldoi12 RHS, <2,2,3,3> + 2779170429U, // <7,2,2,4>: Cost 3 vsldoi12 RHS, <2,2,4,5> + 3852912259U, // <7,2,2,5>: Cost 4 vsldoi12 RHS, <2,2,5,2> + 2767668880U, // <7,2,2,6>: Cost 3 vsldoi12 <2,6,3,7>, <2,2,6,6> + 3841336981U, // <7,2,2,7>: Cost 4 vsldoi12 <2,6,2,7>, <2,2,7,2> + 1705428639U, // <7,2,2,u>: Cost 2 vsldoi12 RHS, <2,2,u,3> + 1705428646U, // <7,2,3,0>: Cost 2 vsldoi12 RHS, <2,3,0,1> + 2779170479U, // <7,2,3,1>: Cost 3 vsldoi12 RHS, <2,3,1,1> + 2767668925U, // <7,2,3,2>: Cost 3 vsldoi12 <2,6,3,7>, <2,3,2,6> + 1245659238U, // <7,2,3,3>: Cost 2 vmrglw <6,2,7,3>, LHS + 1705428686U, // <7,2,3,4>: Cost 2 vsldoi12 RHS, <2,3,4,5> + 2779170519U, // <7,2,3,5>: Cost 3 vsldoi12 RHS, <2,3,5,5> + 2657899362U, // <7,2,3,6>: Cost 3 vsldoi4 <6,7,2,3>, <6,7,2,3> + 2319406574U, // <7,2,3,7>: Cost 3 vmrglw <6,2,7,3>, <7,6,2,7> + 1705428718U, // <7,2,3,u>: Cost 2 vsldoi12 RHS, <2,3,u,1> + 3713728614U, // <7,2,4,0>: Cost 4 vsldoi4 <3,7,2,4>, LHS + 3852912388U, // <7,2,4,1>: Cost 4 vsldoi12 RHS, <2,4,1,5> + 2779170573U, // <7,2,4,2>: Cost 3 vsldoi12 RHS, <2,4,2,5> + 1242349670U, // <7,2,4,3>: Cost 2 vmrglw <5,6,7,4>, LHS + 3713731894U, // <7,2,4,4>: Cost 4 vsldoi4 <3,7,2,4>, RHS + 2779170601U, // <7,2,4,5>: Cost 3 vsldoi12 RHS, <2,4,5,6> + 2767669041U, // <7,2,4,6>: Cost 3 vsldoi12 <2,6,3,7>, <2,4,6,5> + 3389834456U, // <7,2,4,7>: Cost 4 vmrglw <5,6,7,4>, <1,6,2,7> + 1242349675U, // <7,2,4,u>: Cost 2 vmrglw <5,6,7,4>, LHS + 3852912456U, // <7,2,5,0>: Cost 4 vsldoi12 RHS, <2,5,0,1> + 3852912466U, // <7,2,5,1>: Cost 4 vsldoi12 RHS, <2,5,1,2> + 3852912475U, // <7,2,5,2>: Cost 4 vsldoi12 RHS, <2,5,2,2> + 2779170664U, // <7,2,5,3>: Cost 3 vsldoi12 RHS, <2,5,3,6> + 3852912496U, // <7,2,5,4>: Cost 4 vsldoi12 RHS, <2,5,4,5> + 3792474116U, // <7,2,5,5>: Cost 4 vsldoi8 <5,6,7,2>, <5,5,5,5> + 2718732388U, // <7,2,5,6>: Cost 3 vsldoi8 <5,6,7,2>, <5,6,7,2> + 3841337228U, // <7,2,5,7>: Cost 5 vsldoi12 <2,6,2,7>, <2,5,7,6> + 2779170709U, // <7,2,5,u>: Cost 3 vsldoi12 RHS, <2,5,u,6> + 2640003174U, // <7,2,6,0>: Cost 3 vsldoi4 <3,7,2,6>, LHS + 2721386920U, // <7,2,6,1>: Cost 3 vsldoi8 <6,1,7,2>, <6,1,7,2> + 2767595441U, // <7,2,6,2>: Cost 3 vsldoi12 <2,6,2,7>, <2,6,2,7> + 1693927354U, // <7,2,6,3>: Cost 2 vsldoi12 <2,6,3,7>, <2,6,3,7> + 2640006454U, // <7,2,6,4>: Cost 3 vsldoi4 <3,7,2,6>, RHS + 3841558476U, // <7,2,6,5>: Cost 4 vsldoi12 <2,6,5,7>, <2,6,5,7> + 2657923941U, // <7,2,6,6>: Cost 3 vsldoi4 <6,7,2,6>, <6,7,2,6> + 3841337310U, // <7,2,6,7>: Cost 4 vsldoi12 <2,6,2,7>, <2,6,7,7> + 1694296039U, // <7,2,6,u>: Cost 2 vsldoi12 <2,6,u,7>, <2,6,u,7> + 2803058666U, // <7,2,7,0>: Cost 3 vsldoi12 RHS, <2,7,0,1> + 3852912632U, // <7,2,7,1>: Cost 4 vsldoi12 RHS, <2,7,1,6> + 2322089576U, // <7,2,7,2>: Cost 3 vmrglw <6,6,7,7>, <2,2,2,2> + 1248346214U, // <7,2,7,3>: Cost 2 vmrglw <6,6,7,7>, LHS + 3841337362U, // <7,2,7,4>: Cost 4 vsldoi12 <2,6,2,7>, <2,7,4,5> + 3395830836U, // <7,2,7,5>: Cost 4 vmrglw <6,6,7,7>, <1,4,2,5> + 2261616570U, // <7,2,7,6>: Cost 3 vmrghw <7,7,7,7>, <2,6,3,7> + 3371943857U, // <7,2,7,7>: Cost 4 vmrglw <2,6,7,7>, <2,6,2,7> + 1248346219U, // <7,2,7,u>: Cost 2 vmrglw <6,6,7,7>, LHS + 1705429051U, // <7,2,u,0>: Cost 2 vsldoi12 RHS, <2,u,0,1> + 2779170884U, // <7,2,u,1>: Cost 3 vsldoi12 RHS, <2,u,1,1> + 1705428584U, // <7,2,u,2>: Cost 2 vsldoi12 RHS, <2,2,2,2> + 1695254620U, // <7,2,u,3>: Cost 2 vsldoi12 <2,u,3,7>, <2,u,3,7> + 1705429091U, // <7,2,u,4>: Cost 2 vsldoi12 RHS, <2,u,4,5> + 2779170924U, // <7,2,u,5>: Cost 3 vsldoi12 RHS, <2,u,5,5> + 2767669361U, // <7,2,u,6>: Cost 3 vsldoi12 <2,6,3,7>, <2,u,6,1> + 2803058809U, // <7,2,u,7>: Cost 3 vsldoi12 RHS, <2,u,7,0> + 1695623305U, // <7,2,u,u>: Cost 2 vsldoi12 <2,u,u,7>, <2,u,u,7> + 2779170955U, // <7,3,0,0>: Cost 3 vsldoi12 RHS, <3,0,0,0> + 1705429142U, // <7,3,0,1>: Cost 2 vsldoi12 RHS, <3,0,1,2> + 2634057732U, // <7,3,0,2>: Cost 3 vsldoi4 <2,7,3,0>, <2,7,3,0> + 2779170983U, // <7,3,0,3>: Cost 3 vsldoi12 RHS, <3,0,3,1> + 2779170992U, // <7,3,0,4>: Cost 3 vsldoi12 RHS, <3,0,4,1> + 3852912829U, // <7,3,0,5>: Cost 4 vsldoi12 RHS, <3,0,5,5> + 2657948520U, // <7,3,0,6>: Cost 3 vsldoi4 <6,7,3,0>, <6,7,3,0> + 2316060602U, // <7,3,0,7>: Cost 3 vmrglw <5,6,7,0>, <2,6,3,7> + 1705429205U, // <7,3,0,u>: Cost 2 vsldoi12 RHS, <3,0,u,2> + 3852912860U, // <7,3,1,0>: Cost 4 vsldoi12 RHS, <3,1,0,0> + 2779171046U, // <7,3,1,1>: Cost 3 vsldoi12 RHS, <3,1,1,1> + 2779171057U, // <7,3,1,2>: Cost 3 vsldoi12 RHS, <3,1,2,3> + 3852912887U, // <7,3,1,3>: Cost 4 vsldoi12 RHS, <3,1,3,0> + 3852912896U, // <7,3,1,4>: Cost 4 vsldoi12 RHS, <3,1,4,0> + 3852912905U, // <7,3,1,5>: Cost 4 vsldoi12 RHS, <3,1,5,0> + 3835291923U, // <7,3,1,6>: Cost 4 vsldoi12 <1,6,1,7>, <3,1,6,1> + 3841411356U, // <7,3,1,7>: Cost 4 vsldoi12 <2,6,3,7>, <3,1,7,1> + 2779171111U, // <7,3,1,u>: Cost 3 vsldoi12 RHS, <3,1,u,3> + 2779171120U, // <7,3,2,0>: Cost 3 vsldoi12 RHS, <3,2,0,3> + 3852912952U, // <7,3,2,1>: Cost 4 vsldoi12 RHS, <3,2,1,2> + 2779171137U, // <7,3,2,2>: Cost 3 vsldoi12 RHS, <3,2,2,2> + 2779171144U, // <7,3,2,3>: Cost 3 vsldoi12 RHS, <3,2,3,0> + 2779171156U, // <7,3,2,4>: Cost 3 vsldoi12 RHS, <3,2,4,3> + 3852912989U, // <7,3,2,5>: Cost 4 vsldoi12 RHS, <3,2,5,3> + 2767669606U, // <7,3,2,6>: Cost 3 vsldoi12 <2,6,3,7>, <3,2,6,3> + 2767669615U, // <7,3,2,7>: Cost 3 vsldoi12 <2,6,3,7>, <3,2,7,3> + 2779171189U, // <7,3,2,u>: Cost 3 vsldoi12 RHS, <3,2,u,0> + 2779171198U, // <7,3,3,0>: Cost 3 vsldoi12 RHS, <3,3,0,0> + 3852913032U, // <7,3,3,1>: Cost 4 vsldoi12 RHS, <3,3,1,1> + 2704140655U, // <7,3,3,2>: Cost 3 vsldoi8 <3,2,7,3>, <3,2,7,3> + 1705429404U, // <7,3,3,3>: Cost 2 vsldoi12 RHS, <3,3,3,3> + 2779171238U, // <7,3,3,4>: Cost 3 vsldoi12 RHS, <3,3,4,4> + 3852913070U, // <7,3,3,5>: Cost 4 vsldoi12 RHS, <3,3,5,3> + 2657973099U, // <7,3,3,6>: Cost 3 vsldoi4 <6,7,3,3>, <6,7,3,3> + 2767669700U, // <7,3,3,7>: Cost 3 vsldoi12 <2,6,3,7>, <3,3,7,7> + 1705429404U, // <7,3,3,u>: Cost 2 vsldoi12 RHS, <3,3,3,3> + 2779171280U, // <7,3,4,0>: Cost 3 vsldoi12 RHS, <3,4,0,1> + 2779171290U, // <7,3,4,1>: Cost 3 vsldoi12 RHS, <3,4,1,2> + 2634090504U, // <7,3,4,2>: Cost 3 vsldoi4 <2,7,3,4>, <2,7,3,4> + 2779171311U, // <7,3,4,3>: Cost 3 vsldoi12 RHS, <3,4,3,5> + 2779171319U, // <7,3,4,4>: Cost 3 vsldoi12 RHS, <3,4,4,4> + 1705429506U, // <7,3,4,5>: Cost 2 vsldoi12 RHS, <3,4,5,6> + 2722057593U, // <7,3,4,6>: Cost 3 vsldoi8 <6,2,7,3>, <4,6,5,2> + 2316093370U, // <7,3,4,7>: Cost 3 vmrglw <5,6,7,4>, <2,6,3,7> + 1705429533U, // <7,3,4,u>: Cost 2 vsldoi12 RHS, <3,4,u,6> + 3852913185U, // <7,3,5,0>: Cost 4 vsldoi12 RHS, <3,5,0,1> + 3795799695U, // <7,3,5,1>: Cost 4 vsldoi8 <6,2,7,3>, <5,1,0,1> + 3852913203U, // <7,3,5,2>: Cost 4 vsldoi12 RHS, <3,5,2,1> + 3852913214U, // <7,3,5,3>: Cost 4 vsldoi12 RHS, <3,5,3,3> + 3852913225U, // <7,3,5,4>: Cost 4 vsldoi12 RHS, <3,5,4,5> + 2779171410U, // <7,3,5,5>: Cost 3 vsldoi12 RHS, <3,5,5,5> + 2718740581U, // <7,3,5,6>: Cost 3 vsldoi8 <5,6,7,3>, <5,6,7,3> + 3841411685U, // <7,3,5,7>: Cost 4 vsldoi12 <2,6,3,7>, <3,5,7,6> + 2720067847U, // <7,3,5,u>: Cost 3 vsldoi8 <5,u,7,3>, <5,u,7,3> + 2773420664U, // <7,3,6,0>: Cost 3 vsldoi12 <3,6,0,7>, <3,6,0,7> + 3847236225U, // <7,3,6,1>: Cost 4 vsldoi12 <3,6,1,7>, <3,6,1,7> + 1648316922U, // <7,3,6,2>: Cost 2 vsldoi8 <6,2,7,3>, <6,2,7,3> + 2773641875U, // <7,3,6,3>: Cost 3 vsldoi12 <3,6,3,7>, <3,6,3,7> + 2773715612U, // <7,3,6,4>: Cost 3 vsldoi12 <3,6,4,7>, <3,6,4,7> + 3847531173U, // <7,3,6,5>: Cost 4 vsldoi12 <3,6,5,7>, <3,6,5,7> + 2722059024U, // <7,3,6,6>: Cost 3 vsldoi8 <6,2,7,3>, <6,6,2,2> + 2767669943U, // <7,3,6,7>: Cost 3 vsldoi12 <2,6,3,7>, <3,6,7,7> + 1652298720U, // <7,3,6,u>: Cost 2 vsldoi8 <6,u,7,3>, <6,u,7,3> + 2767669955U, // <7,3,7,0>: Cost 3 vsldoi12 <2,6,3,7>, <3,7,0,1> + 3841411788U, // <7,3,7,1>: Cost 4 vsldoi12 <2,6,3,7>, <3,7,1,1> + 2767669978U, // <7,3,7,2>: Cost 3 vsldoi12 <2,6,3,7>, <3,7,2,6> + 2722059546U, // <7,3,7,3>: Cost 3 vsldoi8 <6,2,7,3>, <7,3,6,2> + 2767669995U, // <7,3,7,4>: Cost 3 vsldoi12 <2,6,3,7>, <3,7,4,5> + 3852913396U, // <7,3,7,5>: Cost 4 vsldoi12 RHS, <3,7,5,5> + 2722059758U, // <7,3,7,6>: Cost 3 vsldoi8 <6,2,7,3>, <7,6,2,7> + 2302183354U, // <7,3,7,7>: Cost 3 vmrglw <3,3,7,7>, <2,6,3,7> + 2767670027U, // <7,3,7,u>: Cost 3 vsldoi12 <2,6,3,7>, <3,7,u,1> + 2774747930U, // <7,3,u,0>: Cost 3 vsldoi12 <3,u,0,7>, <3,u,0,7> + 1705429790U, // <7,3,u,1>: Cost 2 vsldoi12 RHS, <3,u,1,2> + 1660262316U, // <7,3,u,2>: Cost 2 vsldoi8 , + 1705429404U, // <7,3,u,3>: Cost 2 vsldoi12 RHS, <3,3,3,3> + 2775042878U, // <7,3,u,4>: Cost 3 vsldoi12 <3,u,4,7>, <3,u,4,7> + 1705429830U, // <7,3,u,5>: Cost 2 vsldoi12 RHS, <3,u,5,6> + 2779171660U, // <7,3,u,6>: Cost 3 vsldoi12 RHS, <3,u,6,3> + 2767670101U, // <7,3,u,7>: Cost 3 vsldoi12 <2,6,3,7>, <3,u,7,3> + 1705429853U, // <7,3,u,u>: Cost 2 vsldoi12 RHS, <3,u,u,2> + 2718744576U, // <7,4,0,0>: Cost 3 vsldoi8 <5,6,7,4>, <0,0,0,0> + 1645002854U, // <7,4,0,1>: Cost 2 vsldoi8 <5,6,7,4>, LHS + 3852913527U, // <7,4,0,2>: Cost 4 vsldoi12 RHS, <4,0,2,1> + 3852913536U, // <7,4,0,3>: Cost 4 vsldoi12 RHS, <4,0,3,1> + 2316061904U, // <7,4,0,4>: Cost 3 vmrglw <5,6,7,0>, <4,4,4,4> + 1705429906U, // <7,4,0,5>: Cost 2 vsldoi12 RHS, <4,0,5,1> + 2658022257U, // <7,4,0,6>: Cost 3 vsldoi4 <6,7,4,0>, <6,7,4,0> + 2256489928U, // <7,4,0,7>: Cost 3 vmrghw <7,0,1,2>, <4,7,5,0> + 1707420589U, // <7,4,0,u>: Cost 2 vsldoi12 RHS, <4,0,u,1> + 3852913590U, // <7,4,1,0>: Cost 4 vsldoi12 RHS, <4,1,0,1> + 2718745396U, // <7,4,1,1>: Cost 3 vsldoi8 <5,6,7,4>, <1,1,1,1> + 2779171786U, // <7,4,1,2>: Cost 3 vsldoi12 RHS, <4,1,2,3> + 3852913616U, // <7,4,1,3>: Cost 4 vsldoi12 RHS, <4,1,3,0> + 3852913627U, // <7,4,1,4>: Cost 4 vsldoi12 RHS, <4,1,4,2> + 2779171810U, // <7,4,1,5>: Cost 3 vsldoi12 RHS, <4,1,5,0> + 3792487631U, // <7,4,1,6>: Cost 4 vsldoi8 <5,6,7,4>, <1,6,1,7> + 3394456220U, // <7,4,1,7>: Cost 4 vmrglw <6,4,7,1>, <3,6,4,7> + 2779171837U, // <7,4,1,u>: Cost 3 vsldoi12 RHS, <4,1,u,0> + 3852913673U, // <7,4,2,0>: Cost 4 vsldoi12 RHS, <4,2,0,3> + 3852913682U, // <7,4,2,1>: Cost 4 vsldoi12 RHS, <4,2,1,3> + 2718746216U, // <7,4,2,2>: Cost 3 vsldoi8 <5,6,7,4>, <2,2,2,2> + 2718746278U, // <7,4,2,3>: Cost 3 vsldoi8 <5,6,7,4>, <2,3,0,1> + 2779171885U, // <7,4,2,4>: Cost 3 vsldoi12 RHS, <4,2,4,3> + 2779171893U, // <7,4,2,5>: Cost 3 vsldoi12 RHS, <4,2,5,2> + 2718746554U, // <7,4,2,6>: Cost 3 vsldoi8 <5,6,7,4>, <2,6,3,7> + 3847457864U, // <7,4,2,7>: Cost 4 vsldoi12 <3,6,4,7>, <4,2,7,3> + 2779171921U, // <7,4,2,u>: Cost 3 vsldoi12 RHS, <4,2,u,3> + 2718746774U, // <7,4,3,0>: Cost 3 vsldoi8 <5,6,7,4>, <3,0,1,2> + 3852913762U, // <7,4,3,1>: Cost 4 vsldoi12 RHS, <4,3,1,2> + 3852913772U, // <7,4,3,2>: Cost 4 vsldoi12 RHS, <4,3,2,3> + 2718747036U, // <7,4,3,3>: Cost 3 vsldoi8 <5,6,7,4>, <3,3,3,3> + 2718747138U, // <7,4,3,4>: Cost 3 vsldoi8 <5,6,7,4>, <3,4,5,6> + 2779171972U, // <7,4,3,5>: Cost 3 vsldoi12 RHS, <4,3,5,0> + 2706803380U, // <7,4,3,6>: Cost 3 vsldoi8 <3,6,7,4>, <3,6,7,4> + 3847457946U, // <7,4,3,7>: Cost 4 vsldoi12 <3,6,4,7>, <4,3,7,4> + 2781162655U, // <7,4,3,u>: Cost 3 vsldoi12 RHS, <4,3,u,0> + 2718747538U, // <7,4,4,0>: Cost 3 vsldoi8 <5,6,7,4>, <4,0,5,1> + 3852913842U, // <7,4,4,1>: Cost 4 vsldoi12 RHS, <4,4,1,1> + 3852913852U, // <7,4,4,2>: Cost 4 vsldoi12 RHS, <4,4,2,2> + 2316096696U, // <7,4,4,3>: Cost 3 vmrglw <5,6,7,4>, <7,2,4,3> + 1705430224U, // <7,4,4,4>: Cost 2 vsldoi12 RHS, <4,4,4,4> + 1705430234U, // <7,4,4,5>: Cost 2 vsldoi12 RHS, <4,4,5,5> + 2658055029U, // <7,4,4,6>: Cost 3 vsldoi4 <6,7,4,4>, <6,7,4,4> + 2316097024U, // <7,4,4,7>: Cost 3 vmrglw <5,6,7,4>, <7,6,4,7> + 1707420917U, // <7,4,4,u>: Cost 2 vsldoi12 RHS, <4,4,u,5> + 1584316518U, // <7,4,5,0>: Cost 2 vsldoi4 <6,7,4,5>, LHS + 2658059060U, // <7,4,5,1>: Cost 3 vsldoi4 <6,7,4,5>, <1,1,1,1> + 2640144314U, // <7,4,5,2>: Cost 3 vsldoi4 <3,7,4,5>, <2,6,3,7> + 2640145131U, // <7,4,5,3>: Cost 3 vsldoi4 <3,7,4,5>, <3,7,4,5> + 1584319798U, // <7,4,5,4>: Cost 2 vsldoi4 <6,7,4,5>, RHS + 2779172134U, // <7,4,5,5>: Cost 3 vsldoi12 RHS, <4,5,5,0> + 631688502U, // <7,4,5,6>: Cost 1 vsldoi12 RHS, RHS + 2658063354U, // <7,4,5,7>: Cost 3 vsldoi4 <6,7,4,5>, <7,0,1,2> + 631688520U, // <7,4,5,u>: Cost 1 vsldoi12 RHS, RHS + 3852914001U, // <7,4,6,0>: Cost 4 vsldoi12 RHS, <4,6,0,7> + 3852914010U, // <7,4,6,1>: Cost 4 vsldoi12 RHS, <4,6,1,7> + 2718749178U, // <7,4,6,2>: Cost 3 vsldoi8 <5,6,7,4>, <6,2,7,3> + 2722730572U, // <7,4,6,3>: Cost 3 vsldoi8 <6,3,7,4>, <6,3,7,4> + 2723394205U, // <7,4,6,4>: Cost 3 vsldoi8 <6,4,7,4>, <6,4,7,4> + 2779172221U, // <7,4,6,5>: Cost 3 vsldoi12 RHS, <4,6,5,6> + 2718749496U, // <7,4,6,6>: Cost 3 vsldoi8 <5,6,7,4>, <6,6,6,6> + 2718749518U, // <7,4,6,7>: Cost 3 vsldoi8 <5,6,7,4>, <6,7,0,1> + 2779172249U, // <7,4,6,u>: Cost 3 vsldoi12 RHS, <4,6,u,7> + 2718749690U, // <7,4,7,0>: Cost 3 vsldoi8 <5,6,7,4>, <7,0,1,2> + 3847458214U, // <7,4,7,1>: Cost 4 vsldoi12 <3,6,4,7>, <4,7,1,2> + 2718749880U, // <7,4,7,2>: Cost 3 vsldoi8 <5,6,7,4>, <7,2,4,3> + 3847458236U, // <7,4,7,3>: Cost 4 vsldoi12 <3,6,4,7>, <4,7,3,6> + 2718750004U, // <7,4,7,4>: Cost 3 vsldoi8 <5,6,7,4>, <7,4,0,1> + 1187876150U, // <7,4,7,5>: Cost 2 vmrghw <7,7,7,7>, RHS + 2718750208U, // <7,4,7,6>: Cost 3 vsldoi8 <5,6,7,4>, <7,6,4,7> + 2718750286U, // <7,4,7,7>: Cost 3 vsldoi8 <5,6,7,4>, <7,7,4,4> + 1187876393U, // <7,4,7,u>: Cost 2 vmrghw <7,7,7,7>, RHS + 1584341094U, // <7,4,u,0>: Cost 2 vsldoi4 <6,7,4,u>, LHS + 1645008686U, // <7,4,u,1>: Cost 2 vsldoi8 <5,6,7,4>, LHS + 2640168890U, // <7,4,u,2>: Cost 3 vsldoi4 <3,7,4,u>, <2,6,3,7> + 2640169710U, // <7,4,u,3>: Cost 3 vsldoi4 <3,7,4,u>, <3,7,4,u> + 1584344374U, // <7,4,u,4>: Cost 2 vsldoi4 <6,7,4,u>, RHS + 1705430554U, // <7,4,u,5>: Cost 2 vsldoi12 RHS, <4,u,5,1> + 631688745U, // <7,4,u,6>: Cost 1 vsldoi12 RHS, RHS + 2718750976U, // <7,4,u,7>: Cost 3 vsldoi8 <5,6,7,4>, + 631688763U, // <7,4,u,u>: Cost 1 vsldoi12 RHS, RHS + 2646147174U, // <7,5,0,0>: Cost 3 vsldoi4 <4,7,5,0>, LHS + 2779172424U, // <7,5,0,1>: Cost 3 vsldoi12 RHS, <5,0,1,2> + 3852914258U, // <7,5,0,2>: Cost 4 vsldoi12 RHS, <5,0,2,3> + 3852914268U, // <7,5,0,3>: Cost 4 vsldoi12 RHS, <5,0,3,4> + 2779172450U, // <7,5,0,4>: Cost 3 vsldoi12 RHS, <5,0,4,1> + 2316061914U, // <7,5,0,5>: Cost 3 vmrglw <5,6,7,0>, <4,4,5,5> + 2316061186U, // <7,5,0,6>: Cost 3 vmrglw <5,6,7,0>, <3,4,5,6> + 2646152186U, // <7,5,0,7>: Cost 3 vsldoi4 <4,7,5,0>, <7,0,1,2> + 2779172486U, // <7,5,0,u>: Cost 3 vsldoi12 RHS, <5,0,u,1> + 2781163151U, // <7,5,1,0>: Cost 3 vsldoi12 RHS, <5,1,0,1> + 2321378194U, // <7,5,1,1>: Cost 3 vmrglw <6,5,7,1>, <4,0,5,1> + 3852914339U, // <7,5,1,2>: Cost 4 vsldoi12 RHS, <5,1,2,3> + 3852914350U, // <7,5,1,3>: Cost 4 vsldoi12 RHS, <5,1,3,5> + 2781163191U, // <7,5,1,4>: Cost 3 vsldoi12 RHS, <5,1,4,5> + 3852914363U, // <7,5,1,5>: Cost 4 vsldoi12 RHS, <5,1,5,0> + 3835588297U, // <7,5,1,6>: Cost 4 vsldoi12 <1,6,5,7>, <5,1,6,5> + 3835588306U, // <7,5,1,7>: Cost 4 vsldoi12 <1,6,5,7>, <5,1,7,5> + 2781163223U, // <7,5,1,u>: Cost 3 vsldoi12 RHS, <5,1,u,1> + 3852914400U, // <7,5,2,0>: Cost 4 vsldoi12 RHS, <5,2,0,1> + 2781163243U, // <7,5,2,1>: Cost 3 vsldoi12 RHS, <5,2,1,3> + 3852914419U, // <7,5,2,2>: Cost 4 vsldoi12 RHS, <5,2,2,2> + 2779172606U, // <7,5,2,3>: Cost 3 vsldoi12 RHS, <5,2,3,4> + 3780552497U, // <7,5,2,4>: Cost 4 vsldoi8 <3,6,7,5>, <2,4,6,5> + 2781163279U, // <7,5,2,5>: Cost 3 vsldoi12 RHS, <5,2,5,3> + 2779172632U, // <7,5,2,6>: Cost 3 vsldoi12 RHS, <5,2,6,3> + 3835588385U, // <7,5,2,7>: Cost 4 vsldoi12 <1,6,5,7>, <5,2,7,3> + 2779172650U, // <7,5,2,u>: Cost 3 vsldoi12 RHS, <5,2,u,3> + 3852914481U, // <7,5,3,0>: Cost 4 vsldoi12 RHS, <5,3,0,1> + 2319403922U, // <7,5,3,1>: Cost 3 vmrglw <6,2,7,3>, <4,0,5,1> + 2319404409U, // <7,5,3,2>: Cost 3 vmrglw <6,2,7,3>, <4,6,5,2> + 3852914510U, // <7,5,3,3>: Cost 4 vsldoi12 RHS, <5,3,3,3> + 3779226131U, // <7,5,3,4>: Cost 4 vsldoi8 <3,4,7,5>, <3,4,7,5> + 2319404250U, // <7,5,3,5>: Cost 3 vmrglw <6,2,7,3>, <4,4,5,5> + 2319403522U, // <7,5,3,6>: Cost 3 vmrglw <6,2,7,3>, <3,4,5,6> + 3852914547U, // <7,5,3,7>: Cost 4 vsldoi12 RHS, <5,3,7,4> + 2319403524U, // <7,5,3,u>: Cost 3 vmrglw <6,2,7,3>, <3,4,5,u> + 2646179942U, // <7,5,4,0>: Cost 3 vsldoi4 <4,7,5,4>, LHS + 2316094354U, // <7,5,4,1>: Cost 3 vmrglw <5,6,7,4>, <4,0,5,1> + 3852914582U, // <7,5,4,2>: Cost 4 vsldoi12 RHS, <5,4,2,3> + 3852914592U, // <7,5,4,3>: Cost 4 vsldoi12 RHS, <5,4,3,4> + 2646183372U, // <7,5,4,4>: Cost 3 vsldoi4 <4,7,5,4>, <4,7,5,4> + 2779172788U, // <7,5,4,5>: Cost 3 vsldoi12 RHS, <5,4,5,6> + 2316093954U, // <7,5,4,6>: Cost 3 vmrglw <5,6,7,4>, <3,4,5,6> + 2646185318U, // <7,5,4,7>: Cost 3 vsldoi4 <4,7,5,4>, <7,4,5,6> + 2779172815U, // <7,5,4,u>: Cost 3 vsldoi12 RHS, <5,4,u,6> + 2781163475U, // <7,5,5,0>: Cost 3 vsldoi12 RHS, <5,5,0,1> + 2781163484U, // <7,5,5,1>: Cost 3 vsldoi12 RHS, <5,5,1,1> + 3852914662U, // <7,5,5,2>: Cost 4 vsldoi12 RHS, <5,5,2,2> + 3852914672U, // <7,5,5,3>: Cost 4 vsldoi12 RHS, <5,5,3,3> + 2781163515U, // <7,5,5,4>: Cost 3 vsldoi12 RHS, <5,5,4,5> + 1705431044U, // <7,5,5,5>: Cost 2 vsldoi12 RHS, <5,5,5,5> + 2779172878U, // <7,5,5,6>: Cost 3 vsldoi12 RHS, <5,5,6,6> + 3835588632U, // <7,5,5,7>: Cost 4 vsldoi12 <1,6,5,7>, <5,5,7,7> + 1705431044U, // <7,5,5,u>: Cost 2 vsldoi12 RHS, <5,5,5,5> + 2779172900U, // <7,5,6,0>: Cost 3 vsldoi12 RHS, <5,6,0,1> + 2781163571U, // <7,5,6,1>: Cost 3 vsldoi12 RHS, <5,6,1,7> + 3852914743U, // <7,5,6,2>: Cost 4 vsldoi12 RHS, <5,6,2,2> + 2779172930U, // <7,5,6,3>: Cost 3 vsldoi12 RHS, <5,6,3,4> + 2779172940U, // <7,5,6,4>: Cost 3 vsldoi12 RHS, <5,6,4,5> + 2781163607U, // <7,5,6,5>: Cost 3 vsldoi12 RHS, <5,6,5,7> + 2779172960U, // <7,5,6,6>: Cost 3 vsldoi12 RHS, <5,6,6,7> + 1705431138U, // <7,5,6,7>: Cost 2 vsldoi12 RHS, <5,6,7,0> + 1705578603U, // <7,5,6,u>: Cost 2 vsldoi12 RHS, <5,6,u,0> + 2646204518U, // <7,5,7,0>: Cost 3 vsldoi4 <4,7,5,7>, LHS + 2322090898U, // <7,5,7,1>: Cost 3 vmrglw <6,6,7,7>, <4,0,5,1> + 3719947880U, // <7,5,7,2>: Cost 4 vsldoi4 <4,7,5,7>, <2,2,2,2> + 3719948438U, // <7,5,7,3>: Cost 4 vsldoi4 <4,7,5,7>, <3,0,1,2> + 2646207951U, // <7,5,7,4>: Cost 3 vsldoi4 <4,7,5,7>, <4,7,5,7> + 2322091226U, // <7,5,7,5>: Cost 3 vmrglw <6,6,7,7>, <4,4,5,5> + 2322090498U, // <7,5,7,6>: Cost 3 vmrglw <6,6,7,7>, <3,4,5,6> + 2646210156U, // <7,5,7,7>: Cost 3 vsldoi4 <4,7,5,7>, <7,7,7,7> + 2646210350U, // <7,5,7,u>: Cost 3 vsldoi4 <4,7,5,7>, LHS + 2779173062U, // <7,5,u,0>: Cost 3 vsldoi12 RHS, <5,u,0,1> + 2779173072U, // <7,5,u,1>: Cost 3 vsldoi12 RHS, <5,u,1,2> + 2319404409U, // <7,5,u,2>: Cost 3 vmrglw <6,2,7,3>, <4,6,5,2> + 2779173092U, // <7,5,u,3>: Cost 3 vsldoi12 RHS, <5,u,3,4> + 2779173101U, // <7,5,u,4>: Cost 3 vsldoi12 RHS, <5,u,4,4> + 1705431044U, // <7,5,u,5>: Cost 2 vsldoi12 RHS, <5,5,5,5> + 2779173118U, // <7,5,u,6>: Cost 3 vsldoi12 RHS, <5,u,6,3> + 1705578756U, // <7,5,u,7>: Cost 2 vsldoi12 RHS, <5,u,7,0> + 1707421965U, // <7,5,u,u>: Cost 2 vsldoi12 RHS, <5,u,u,0> + 3852914966U, // <7,6,0,0>: Cost 4 vsldoi12 RHS, <6,0,0,0> + 2779173153U, // <7,6,0,1>: Cost 3 vsldoi12 RHS, <6,0,1,2> + 2256491002U, // <7,6,0,2>: Cost 3 vmrghw <7,0,1,2>, <6,2,7,3> + 3852914994U, // <7,6,0,3>: Cost 4 vsldoi12 RHS, <6,0,3,1> + 3852915003U, // <7,6,0,4>: Cost 4 vsldoi12 RHS, <6,0,4,1> + 2316062652U, // <7,6,0,5>: Cost 3 vmrglw <5,6,7,0>, <5,4,6,5> + 2316063544U, // <7,6,0,6>: Cost 3 vmrglw <5,6,7,0>, <6,6,6,6> + 1242320182U, // <7,6,0,7>: Cost 2 vmrglw <5,6,7,0>, RHS + 1242320183U, // <7,6,0,u>: Cost 2 vmrglw <5,6,7,0>, RHS + 3852915048U, // <7,6,1,0>: Cost 4 vsldoi12 RHS, <6,1,0,1> + 3377866217U, // <7,6,1,1>: Cost 4 vmrglw <3,6,7,1>, <2,0,6,1> + 3852915068U, // <7,6,1,2>: Cost 4 vsldoi12 RHS, <6,1,2,3> + 3833672072U, // <7,6,1,3>: Cost 5 vsldoi12 <1,3,6,7>, <6,1,3,6> + 3852915088U, // <7,6,1,4>: Cost 4 vsldoi12 RHS, <6,1,4,5> + 3395122056U, // <7,6,1,5>: Cost 4 vmrglw <6,5,7,1>, <6,7,6,5> + 3389813560U, // <7,6,1,6>: Cost 4 vmrglw <5,6,7,1>, <6,6,6,6> + 2779173287U, // <7,6,1,7>: Cost 3 vsldoi12 RHS, <6,1,7,1> + 2779320752U, // <7,6,1,u>: Cost 3 vsldoi12 RHS, <6,1,u,1> + 2658181222U, // <7,6,2,0>: Cost 3 vsldoi4 <6,7,6,2>, LHS + 3852915140U, // <7,6,2,1>: Cost 4 vsldoi12 RHS, <6,2,1,3> + 2257973754U, // <7,6,2,2>: Cost 3 vmrghw <7,2,3,3>, <6,2,7,3> + 3841413589U, // <7,6,2,3>: Cost 4 vsldoi12 <2,6,3,7>, <6,2,3,2> + 2658184502U, // <7,6,2,4>: Cost 3 vsldoi4 <6,7,6,2>, RHS + 3852915176U, // <7,6,2,5>: Cost 4 vsldoi12 RHS, <6,2,5,3> + 2658186117U, // <7,6,2,6>: Cost 3 vsldoi4 <6,7,6,2>, <6,7,6,2> + 1705431546U, // <7,6,2,7>: Cost 2 vsldoi12 RHS, <6,2,7,3> + 1705579011U, // <7,6,2,u>: Cost 2 vsldoi12 RHS, <6,2,u,3> + 3714015334U, // <7,6,3,0>: Cost 4 vsldoi4 <3,7,6,3>, LHS + 3777243425U, // <7,6,3,1>: Cost 4 vsldoi8 <3,1,7,6>, <3,1,7,6> + 2319405957U, // <7,6,3,2>: Cost 3 vmrglw <6,2,7,3>, <6,7,6,2> + 3375229286U, // <7,6,3,3>: Cost 4 vmrglw <3,2,7,3>, <3,2,6,3> + 2779173426U, // <7,6,3,4>: Cost 3 vsldoi12 RHS, <6,3,4,5> + 3375228721U, // <7,6,3,5>: Cost 4 vmrglw <3,2,7,3>, <2,4,6,5> + 2319405880U, // <7,6,3,6>: Cost 3 vmrglw <6,2,7,3>, <6,6,6,6> + 1245662518U, // <7,6,3,7>: Cost 2 vmrglw <6,2,7,3>, RHS + 1245662519U, // <7,6,3,u>: Cost 2 vmrglw <6,2,7,3>, RHS + 3852915291U, // <7,6,4,0>: Cost 4 vsldoi12 RHS, <6,4,0,1> + 3389834729U, // <7,6,4,1>: Cost 4 vmrglw <5,6,7,4>, <2,0,6,1> + 2259472890U, // <7,6,4,2>: Cost 3 vmrghw <7,4,5,6>, <6,2,7,3> + 3852915321U, // <7,6,4,3>: Cost 4 vsldoi12 RHS, <6,4,3,4> + 3852915330U, // <7,6,4,4>: Cost 4 vsldoi12 RHS, <6,4,4,4> + 2779173517U, // <7,6,4,5>: Cost 3 vsldoi12 RHS, <6,4,5,6> + 2316096312U, // <7,6,4,6>: Cost 3 vmrglw <5,6,7,4>, <6,6,6,6> + 1242352950U, // <7,6,4,7>: Cost 2 vmrglw <5,6,7,4>, RHS + 1242352951U, // <7,6,4,u>: Cost 2 vmrglw <5,6,7,4>, RHS + 3852915372U, // <7,6,5,0>: Cost 4 vsldoi12 RHS, <6,5,0,1> + 3835294392U, // <7,6,5,1>: Cost 5 vsldoi12 <1,6,1,7>, <6,5,1,4> + 3852915395U, // <7,6,5,2>: Cost 4 vsldoi12 RHS, <6,5,2,6> + 3852915404U, // <7,6,5,3>: Cost 4 vsldoi12 RHS, <6,5,3,6> + 3852915412U, // <7,6,5,4>: Cost 4 vsldoi12 RHS, <6,5,4,5> + 3377899313U, // <7,6,5,5>: Cost 4 vmrglw <3,6,7,5>, <2,4,6,5> + 2718765160U, // <7,6,5,6>: Cost 3 vsldoi8 <5,6,7,6>, <5,6,7,6> + 2779173611U, // <7,6,5,7>: Cost 3 vsldoi12 RHS, <6,5,7,1> + 2779321076U, // <7,6,5,u>: Cost 3 vsldoi12 RHS, <6,5,u,1> + 2658213990U, // <7,6,6,0>: Cost 3 vsldoi4 <6,7,6,6>, LHS + 3852915462U, // <7,6,6,1>: Cost 4 vsldoi12 RHS, <6,6,1,1> + 2718765562U, // <7,6,6,2>: Cost 3 vsldoi8 <5,6,7,6>, <6,2,7,3> + 3714042622U, // <7,6,6,3>: Cost 4 vsldoi4 <3,7,6,6>, <3,7,6,6> + 2658217270U, // <7,6,6,4>: Cost 3 vsldoi4 <6,7,6,6>, RHS + 2724074224U, // <7,6,6,5>: Cost 3 vsldoi8 <6,5,7,6>, <6,5,7,6> + 1705431864U, // <7,6,6,6>: Cost 2 vsldoi12 RHS, <6,6,6,6> + 1705431874U, // <7,6,6,7>: Cost 2 vsldoi12 RHS, <6,6,7,7> + 1705579339U, // <7,6,6,u>: Cost 2 vsldoi12 RHS, <6,6,u,7> + 1705431886U, // <7,6,7,0>: Cost 2 vsldoi12 RHS, <6,7,0,1> + 2779173719U, // <7,6,7,1>: Cost 3 vsldoi12 RHS, <6,7,1,1> + 2779173729U, // <7,6,7,2>: Cost 3 vsldoi12 RHS, <6,7,2,2> + 2779173736U, // <7,6,7,3>: Cost 3 vsldoi12 RHS, <6,7,3,0> + 1705431926U, // <7,6,7,4>: Cost 2 vsldoi12 RHS, <6,7,4,5> + 2779173759U, // <7,6,7,5>: Cost 3 vsldoi12 RHS, <6,7,5,5> + 2779173765U, // <7,6,7,6>: Cost 3 vsldoi12 RHS, <6,7,6,2> + 1248349494U, // <7,6,7,7>: Cost 2 vmrglw <6,6,7,7>, RHS + 1705431958U, // <7,6,7,u>: Cost 2 vsldoi12 RHS, <6,7,u,1> + 1705579423U, // <7,6,u,0>: Cost 2 vsldoi12 RHS, <6,u,0,1> + 2779173801U, // <7,6,u,1>: Cost 3 vsldoi12 RHS, <6,u,1,2> + 2779321266U, // <7,6,u,2>: Cost 3 vsldoi12 RHS, <6,u,2,2> + 2779321273U, // <7,6,u,3>: Cost 3 vsldoi12 RHS, <6,u,3,0> + 1705579463U, // <7,6,u,4>: Cost 2 vsldoi12 RHS, <6,u,4,5> + 2779173841U, // <7,6,u,5>: Cost 3 vsldoi12 RHS, <6,u,5,6> + 1705431864U, // <7,6,u,6>: Cost 2 vsldoi12 RHS, <6,6,6,6> + 1705432032U, // <7,6,u,7>: Cost 2 vsldoi12 RHS, <6,u,7,3> + 1705579495U, // <7,6,u,u>: Cost 2 vsldoi12 RHS, <6,u,u,1> + 1242320994U, // <7,7,0,0>: Cost 2 vmrglw <5,6,7,0>, <5,6,7,0> + 1705432058U, // <7,7,0,1>: Cost 2 vsldoi12 RHS, <7,0,1,2> + 3841414146U, // <7,7,0,2>: Cost 4 vsldoi12 <2,6,3,7>, <7,0,2,1> + 2316063226U, // <7,7,0,3>: Cost 3 vmrglw <5,6,7,0>, <6,2,7,3> + 2779173908U, // <7,7,0,4>: Cost 3 vsldoi12 RHS, <7,0,4,1> + 2658242658U, // <7,7,0,5>: Cost 3 vsldoi4 <6,7,7,0>, <5,6,7,0> + 2658243468U, // <7,7,0,6>: Cost 3 vsldoi4 <6,7,7,0>, <6,7,7,0> + 2316063554U, // <7,7,0,7>: Cost 3 vmrglw <5,6,7,0>, <6,6,7,7> + 1705432121U, // <7,7,0,u>: Cost 2 vsldoi12 RHS, <7,0,u,2> + 3852915777U, // <7,7,1,0>: Cost 4 vsldoi12 RHS, <7,1,0,1> + 2779173962U, // <7,7,1,1>: Cost 3 vsldoi12 RHS, <7,1,1,1> + 2779173973U, // <7,7,1,2>: Cost 3 vsldoi12 RHS, <7,1,2,3> + 3389813242U, // <7,7,1,3>: Cost 4 vmrglw <5,6,7,1>, <6,2,7,3> + 3852915813U, // <7,7,1,4>: Cost 4 vsldoi12 RHS, <7,1,4,1> + 3852915821U, // <7,7,1,5>: Cost 4 vsldoi12 RHS, <7,1,5,0> + 3835294839U, // <7,7,1,6>: Cost 4 vsldoi12 <1,6,1,7>, <7,1,6,1> + 2329343596U, // <7,7,1,7>: Cost 3 vmrglw <7,u,7,1>, <7,7,7,7> + 2779174027U, // <7,7,1,u>: Cost 3 vsldoi12 RHS, <7,1,u,3> + 2803061908U, // <7,7,2,0>: Cost 3 vsldoi12 RHS, <7,2,0,3> + 3852915869U, // <7,7,2,1>: Cost 4 vsldoi12 RHS, <7,2,1,3> + 2779174053U, // <7,7,2,2>: Cost 3 vsldoi12 RHS, <7,2,2,2> + 2779174060U, // <7,7,2,3>: Cost 3 vsldoi12 RHS, <7,2,3,0> + 2803061944U, // <7,7,2,4>: Cost 3 vsldoi12 RHS, <7,2,4,3> + 3852915905U, // <7,7,2,5>: Cost 4 vsldoi12 RHS, <7,2,5,3> + 2767672522U, // <7,7,2,6>: Cost 3 vsldoi12 <2,6,3,7>, <7,2,6,3> + 2791855315U, // <7,7,2,7>: Cost 3 vsldoi12 <6,6,7,7>, <7,2,7,3> + 2768999644U, // <7,7,2,u>: Cost 3 vsldoi12 <2,u,3,7>, <7,2,u,3> + 2779174115U, // <7,7,3,0>: Cost 3 vsldoi12 RHS, <7,3,0,1> + 3852915948U, // <7,7,3,1>: Cost 4 vsldoi12 RHS, <7,3,1,1> + 3841414394U, // <7,7,3,2>: Cost 4 vsldoi12 <2,6,3,7>, <7,3,2,6> + 1245663738U, // <7,7,3,3>: Cost 2 vmrglw <6,2,7,3>, <6,2,7,3> + 2779174155U, // <7,7,3,4>: Cost 3 vsldoi12 RHS, <7,3,4,5> + 3852915988U, // <7,7,3,5>: Cost 4 vsldoi12 RHS, <7,3,5,5> + 2706827959U, // <7,7,3,6>: Cost 3 vsldoi8 <3,6,7,7>, <3,6,7,7> + 2319405890U, // <7,7,3,7>: Cost 3 vmrglw <6,2,7,3>, <6,6,7,7> + 1245663738U, // <7,7,3,u>: Cost 2 vmrglw <6,2,7,3>, <6,2,7,3> + 2779174200U, // <7,7,4,0>: Cost 3 vsldoi12 RHS, <7,4,0,5> + 3852916030U, // <7,7,4,1>: Cost 4 vsldoi12 RHS, <7,4,1,2> + 3714099130U, // <7,7,4,2>: Cost 4 vsldoi4 <3,7,7,4>, <2,6,3,7> + 2316095994U, // <7,7,4,3>: Cost 3 vmrglw <5,6,7,4>, <6,2,7,3> + 1242353766U, // <7,7,4,4>: Cost 2 vmrglw <5,6,7,4>, <5,6,7,4> + 1705432422U, // <7,7,4,5>: Cost 2 vsldoi12 RHS, <7,4,5,6> + 2658276240U, // <7,7,4,6>: Cost 3 vsldoi4 <6,7,7,4>, <6,7,7,4> + 2316096322U, // <7,7,4,7>: Cost 3 vmrglw <5,6,7,4>, <6,6,7,7> + 1705432449U, // <7,7,4,u>: Cost 2 vsldoi12 RHS, <7,4,u,6> + 3852916101U, // <7,7,5,0>: Cost 4 vsldoi12 RHS, <7,5,0,1> + 3854906765U, // <7,7,5,1>: Cost 4 vsldoi12 RHS, <7,5,1,0> + 3852916121U, // <7,7,5,2>: Cost 4 vsldoi12 RHS, <7,5,2,3> + 3389846010U, // <7,7,5,3>: Cost 4 vmrglw <5,6,7,5>, <6,2,7,3> + 3852916141U, // <7,7,5,4>: Cost 4 vsldoi12 RHS, <7,5,4,5> + 2779174326U, // <7,7,5,5>: Cost 3 vsldoi12 RHS, <7,5,5,5> + 2779174337U, // <7,7,5,6>: Cost 3 vsldoi12 RHS, <7,5,6,7> + 2329376364U, // <7,7,5,7>: Cost 3 vmrglw <7,u,7,5>, <7,7,7,7> + 2779321811U, // <7,7,5,u>: Cost 3 vsldoi12 RHS, <7,5,u,7> + 2658287718U, // <7,7,6,0>: Cost 3 vsldoi4 <6,7,7,6>, LHS + 3852916197U, // <7,7,6,1>: Cost 4 vsldoi12 RHS, <7,6,1,7> + 2779174382U, // <7,7,6,2>: Cost 3 vsldoi12 RHS, <7,6,2,7> + 2316112378U, // <7,7,6,3>: Cost 3 vmrglw <5,6,7,6>, <6,2,7,3> + 2658290998U, // <7,7,6,4>: Cost 3 vsldoi4 <6,7,7,6>, RHS + 3852916233U, // <7,7,6,5>: Cost 4 vsldoi12 RHS, <7,6,5,7> + 1651004226U, // <7,7,6,6>: Cost 2 vsldoi8 <6,6,7,7>, <6,6,7,7> + 2779174420U, // <7,7,6,7>: Cost 3 vsldoi12 RHS, <7,6,7,0> + 1652331492U, // <7,7,6,u>: Cost 2 vsldoi8 <6,u,7,7>, <6,u,7,7> + 1590526054U, // <7,7,7,0>: Cost 2 vsldoi4 <7,7,7,7>, LHS + 2328728623U, // <7,7,7,1>: Cost 3 vmrglw <7,7,7,7>, <7,0,7,1> + 2724746451U, // <7,7,7,2>: Cost 3 vsldoi8 <6,6,7,7>, <7,2,7,3> + 2322092538U, // <7,7,7,3>: Cost 3 vmrglw <6,6,7,7>, <6,2,7,3> + 1590529334U, // <7,7,7,4>: Cost 2 vsldoi4 <7,7,7,7>, RHS + 2328728951U, // <7,7,7,5>: Cost 3 vmrglw <7,7,7,7>, <7,4,7,5> + 2724746770U, // <7,7,7,6>: Cost 3 vsldoi8 <6,6,7,7>, <7,6,6,7> + 430361910U, // <7,7,7,7>: Cost 1 vspltisw3 RHS + 430361910U, // <7,7,7,u>: Cost 1 vspltisw3 RHS + 1242320994U, // <7,7,u,0>: Cost 2 vmrglw <5,6,7,0>, <5,6,7,0> + 1705580162U, // <7,7,u,1>: Cost 2 vsldoi12 RHS, <7,u,1,2> + 2779321996U, // <7,7,u,2>: Cost 3 vsldoi12 RHS, <7,u,2,3> + 1245663738U, // <7,7,u,3>: Cost 2 vmrglw <6,2,7,3>, <6,2,7,3> + 1242353766U, // <7,7,u,4>: Cost 2 vmrglw <5,6,7,4>, <5,6,7,4> + 1705580202U, // <7,7,u,5>: Cost 2 vsldoi12 RHS, <7,u,5,6> + 1662949620U, // <7,7,u,6>: Cost 2 vsldoi8 , + 430361910U, // <7,7,u,7>: Cost 1 vspltisw3 RHS + 430361910U, // <7,7,u,u>: Cost 1 vspltisw3 RHS + 1705426944U, // <7,u,0,0>: Cost 2 vsldoi12 RHS, <0,0,0,0> + 1705432787U, // <7,u,0,1>: Cost 2 vsldoi12 RHS, + 2316060885U, // <7,u,0,2>: Cost 3 vmrglw <5,6,7,0>, <3,0,u,2> + 1242316956U, // <7,u,0,3>: Cost 2 vmrglw <5,6,7,0>, LHS + 2779174637U, // <7,u,0,4>: Cost 3 vsldoi12 RHS, + 1182750874U, // <7,u,0,5>: Cost 2 vmrghw <7,0,1,2>, RHS + 2316061213U, // <7,u,0,6>: Cost 3 vmrglw <5,6,7,0>, <3,4,u,6> + 1242320200U, // <7,u,0,7>: Cost 2 vmrglw <5,6,7,0>, RHS + 1705432850U, // <7,u,0,u>: Cost 2 vsldoi12 RHS, + 1584578662U, // <7,u,1,0>: Cost 2 vsldoi4 <6,7,u,1>, LHS + 1705427764U, // <7,u,1,1>: Cost 2 vsldoi12 RHS, <1,1,1,1> + 631691054U, // <7,u,1,2>: Cost 1 vsldoi12 RHS, LHS + 2640407307U, // <7,u,1,3>: Cost 3 vsldoi4 <3,7,u,1>, <3,7,u,1> + 1584581942U, // <7,u,1,4>: Cost 2 vsldoi4 <6,7,u,1>, RHS + 2779174726U, // <7,u,1,5>: Cost 3 vsldoi12 RHS, + 1584583574U, // <7,u,1,6>: Cost 2 vsldoi4 <6,7,u,1>, <6,7,u,1> + 2779322201U, // <7,u,1,7>: Cost 3 vsldoi12 RHS, + 631691108U, // <7,u,1,u>: Cost 1 vsldoi12 RHS, LHS + 2779174763U, // <7,u,2,0>: Cost 3 vsldoi12 RHS, + 2779174774U, // <7,u,2,1>: Cost 3 vsldoi12 RHS, + 1705428584U, // <7,u,2,2>: Cost 2 vsldoi12 RHS, <2,2,2,2> + 1705432965U, // <7,u,2,3>: Cost 2 vsldoi12 RHS, + 2779174801U, // <7,u,2,4>: Cost 3 vsldoi12 RHS, + 2779174810U, // <7,u,2,5>: Cost 3 vsldoi12 RHS, + 2767673251U, // <7,u,2,6>: Cost 3 vsldoi12 <2,6,3,7>, + 1705580460U, // <7,u,2,7>: Cost 2 vsldoi12 RHS, + 1705433010U, // <7,u,2,u>: Cost 2 vsldoi12 RHS, + 1705433020U, // <7,u,3,0>: Cost 2 vsldoi12 RHS, + 2779174853U, // <7,u,3,1>: Cost 3 vsldoi12 RHS, + 2767673299U, // <7,u,3,2>: Cost 3 vsldoi12 <2,6,3,7>, + 1245659292U, // <7,u,3,3>: Cost 2 vmrglw <6,2,7,3>, LHS + 1705433060U, // <7,u,3,4>: Cost 2 vsldoi12 RHS, + 2779174893U, // <7,u,3,5>: Cost 3 vsldoi12 RHS, + 2706836152U, // <7,u,3,6>: Cost 3 vsldoi8 <3,6,7,u>, <3,6,7,u> + 1245662536U, // <7,u,3,7>: Cost 2 vmrglw <6,2,7,3>, RHS + 1705433092U, // <7,u,3,u>: Cost 2 vsldoi12 RHS, + 2779174925U, // <7,u,4,0>: Cost 3 vsldoi12 RHS, + 1185732398U, // <7,u,4,1>: Cost 2 vmrghw <7,4,5,6>, LHS + 2316093653U, // <7,u,4,2>: Cost 3 vmrglw <5,6,7,4>, <3,0,u,2> + 1242349724U, // <7,u,4,3>: Cost 2 vmrglw <5,6,7,4>, LHS + 1705430224U, // <7,u,4,4>: Cost 2 vsldoi12 RHS, <4,4,4,4> + 1705433151U, // <7,u,4,5>: Cost 2 vsldoi12 RHS, + 2316093981U, // <7,u,4,6>: Cost 3 vmrglw <5,6,7,4>, <3,4,u,6> + 1242352968U, // <7,u,4,7>: Cost 2 vmrglw <5,6,7,4>, RHS + 1705433178U, // <7,u,4,u>: Cost 2 vsldoi12 RHS, + 1584611430U, // <7,u,5,0>: Cost 2 vsldoi4 <6,7,u,5>, LHS + 2781165670U, // <7,u,5,1>: Cost 3 vsldoi12 RHS, + 2640439226U, // <7,u,5,2>: Cost 3 vsldoi4 <3,7,u,5>, <2,6,3,7> + 2640440079U, // <7,u,5,3>: Cost 3 vsldoi4 <3,7,u,5>, <3,7,u,5> + 1584614710U, // <7,u,5,4>: Cost 2 vsldoi4 <6,7,u,5>, RHS + 1705431044U, // <7,u,5,5>: Cost 2 vsldoi12 RHS, <5,5,5,5> + 631691418U, // <7,u,5,6>: Cost 1 vsldoi12 RHS, RHS + 2779322525U, // <7,u,5,7>: Cost 3 vsldoi12 RHS, + 631691436U, // <7,u,5,u>: Cost 1 vsldoi12 RHS, RHS + 2779175087U, // <7,u,6,0>: Cost 3 vsldoi12 RHS, + 2779175102U, // <7,u,6,1>: Cost 3 vsldoi12 RHS, + 1648357887U, // <7,u,6,2>: Cost 2 vsldoi8 <6,2,7,u>, <6,2,7,u> + 1705433296U, // <7,u,6,3>: Cost 2 vsldoi12 RHS, + 2779175127U, // <7,u,6,4>: Cost 3 vsldoi12 RHS, + 2779175138U, // <7,u,6,5>: Cost 3 vsldoi12 RHS, + 1651012419U, // <7,u,6,6>: Cost 2 vsldoi8 <6,6,7,u>, <6,6,7,u> + 1705580788U, // <7,u,6,7>: Cost 2 vsldoi12 RHS, + 1705433341U, // <7,u,6,u>: Cost 2 vsldoi12 RHS, + 1705580800U, // <7,u,7,0>: Cost 2 vsldoi12 RHS, + 1187878702U, // <7,u,7,1>: Cost 2 vmrghw <7,7,7,7>, LHS + 2768042263U, // <7,u,7,2>: Cost 3 vsldoi12 <2,6,u,7>, + 1248346268U, // <7,u,7,3>: Cost 2 vmrglw <6,6,7,7>, LHS + 1705580840U, // <7,u,7,4>: Cost 2 vsldoi12 RHS, + 1187879066U, // <7,u,7,5>: Cost 2 vmrghw <7,7,7,7>, RHS + 2779322679U, // <7,u,7,6>: Cost 3 vsldoi12 RHS, + 430361910U, // <7,u,7,7>: Cost 1 vspltisw3 RHS + 430361910U, // <7,u,7,u>: Cost 1 vspltisw3 RHS + 1705433425U, // <7,u,u,0>: Cost 2 vsldoi12 RHS, + 1705433435U, // <7,u,u,1>: Cost 2 vsldoi12 RHS, + 631691621U, // <7,u,u,2>: Cost 1 vsldoi12 RHS, LHS + 1705433451U, // <7,u,u,3>: Cost 2 vsldoi12 RHS, + 1705433465U, // <7,u,u,4>: Cost 2 vsldoi12 RHS, + 1705433475U, // <7,u,u,5>: Cost 2 vsldoi12 RHS, + 631691661U, // <7,u,u,6>: Cost 1 vsldoi12 RHS, RHS + 430361910U, // <7,u,u,7>: Cost 1 vspltisw3 RHS + 631691675U, // <7,u,u,u>: Cost 1 vsldoi12 RHS, LHS + 202162278U, // : Cost 1 vspltisw0 LHS + 1678598154U, // : Cost 2 vsldoi12 LHS, <0,0,1,1> + 2634500154U, // : Cost 3 vsldoi4 <2,u,0,0>, <2,u,0,0> + 2289596269U, // : Cost 3 vmrglw <1,2,u,0>, + 1548815670U, // : Cost 2 vsldoi4 <0,u,0,0>, RHS + 2663698530U, // : Cost 3 vsldoi4 <7,7,0,0>, <5,6,7,0> + 2658390942U, // : Cost 3 vsldoi4 <6,u,0,0>, <6,u,0,0> + 2289596597U, // : Cost 3 vmrglw <1,2,u,0>, + 202162278U, // : Cost 1 vspltisw0 LHS + 1560764518U, // : Cost 2 vsldoi4 <2,u,0,1>, LHS + 115720294U, // : Cost 1 vmrghw LHS, LHS + 604856427U, // : Cost 1 vsldoi12 LHS, LHS + 2634508438U, // : Cost 3 vsldoi4 <2,u,0,1>, <3,0,1,2> + 1560767798U, // : Cost 2 vsldoi4 <2,u,0,1>, RHS + 2652426438U, // : Cost 3 vsldoi4 <5,u,0,1>, <5,u,0,1> + 1584657311U, // : Cost 2 vsldoi4 <6,u,0,1>, <6,u,0,1> + 2658399226U, // : Cost 3 vsldoi4 <6,u,0,1>, <7,0,1,2> + 604856476U, // : Cost 1 vsldoi12 LHS, LHS + 2696889850U, // : Cost 3 vsldoi8 <2,0,u,0>, <2,0,u,0> + 1190174822U, // : Cost 2 vmrghw , LHS + 2692245096U, // : Cost 3 vsldoi8 <1,2,u,0>, <2,2,2,2> + 2692245158U, // : Cost 3 vsldoi8 <1,2,u,0>, <2,3,0,1> + 2263916882U, // : Cost 3 vmrghw , <0,4,1,5> + 2299709908U, // : Cost 3 vmrglw <3,0,1,2>, <3,4,0,5> + 2692245434U, // : Cost 3 vsldoi8 <1,2,u,0>, <2,6,3,7> + 2701535281U, // : Cost 3 vsldoi8 <2,7,u,0>, <2,7,u,0> + 1190175389U, // : Cost 2 vmrghw , LHS + 1209237504U, // : Cost 2 vmrglw LHS, <0,0,0,0> + 1209239206U, // : Cost 2 vmrglw LHS, <2,3,0,1> + 2704189813U, // : Cost 3 vsldoi8 <3,2,u,0>, <3,2,u,0> + 2692245916U, // : Cost 3 vsldoi8 <1,2,u,0>, <3,3,3,3> + 2282981033U, // : Cost 3 vmrglw LHS, <2,3,0,4> + 2664386658U, // : Cost 3 vsldoi4 <7,u,0,3>, <5,6,7,0> + 2691877496U, // : Cost 3 vsldoi8 <1,2,3,0>, <3,6,0,7> + 2664388218U, // : Cost 3 vsldoi4 <7,u,0,3>, <7,u,0,3> + 1209239213U, // : Cost 2 vmrglw LHS, <2,3,0,u> + 2289623040U, // : Cost 3 vmrglw <1,2,u,4>, <0,0,0,0> + 1678598482U, // : Cost 2 vsldoi12 LHS, <0,4,1,5> + 2634532926U, // : Cost 3 vsldoi4 <2,u,0,4>, <2,u,0,4> + 2235580672U, // : Cost 3 vmrghw <3,4,5,6>, <0,3,1,4> + 1143619922U, // : Cost 2 vmrghw <0,4,1,5>, <0,4,1,5> + 1618505014U, // : Cost 2 vsldoi8 <1,2,u,0>, RHS + 2658423714U, // : Cost 3 vsldoi4 <6,u,0,4>, <6,u,0,4> + 2713259464U, // : Cost 3 vsldoi8 <4,7,5,0>, <4,7,5,0> + 1683243409U, // : Cost 2 vsldoi12 LHS, <0,4,u,5> + 1192443904U, // : Cost 2 vmrghw RHS, <0,0,0,0> + 118702182U, // : Cost 1 vmrghw RHS, LHS + 2266185901U, // : Cost 3 vmrghw RHS, <0,2,1,2> + 2640513816U, // : Cost 3 vsldoi4 <3,u,0,5>, <3,u,0,5> + 1192444242U, // : Cost 2 vmrghw RHS, <0,4,1,5> + 2718789636U, // : Cost 3 vsldoi8 <5,6,u,0>, <5,5,5,5> + 1645047915U, // : Cost 2 vsldoi8 <5,6,u,0>, <5,6,u,0> + 2664404604U, // : Cost 3 vsldoi4 <7,u,0,5>, <7,u,0,5> + 118702749U, // : Cost 1 vmrghw RHS, LHS + 2302910464U, // : Cost 3 vmrglw <3,4,u,6>, <0,0,0,0> + 1192886374U, // : Cost 2 vmrghw , LHS + 2718790138U, // : Cost 3 vsldoi8 <5,6,u,0>, <6,2,7,3> + 2722771537U, // : Cost 3 vsldoi8 <6,3,u,0>, <6,3,u,0> + 2266628434U, // : Cost 3 vmrghw , <0,4,1,5> + 2248950180U, // : Cost 3 vmrghw <5,6,7,0>, <0,5,1,6> + 2718790456U, // : Cost 3 vsldoi8 <5,6,u,0>, <6,6,6,6> + 2718790478U, // : Cost 3 vsldoi8 <5,6,u,0>, <6,7,0,1> + 1192886941U, // : Cost 2 vmrghw , LHS + 1235812352U, // : Cost 2 vmrglw RHS, <0,0,0,0> + 1235814054U, // : Cost 2 vmrglw RHS, <2,3,0,1> + 2728080601U, // : Cost 3 vsldoi8 <7,2,u,0>, <7,2,u,0> + 2640530202U, // : Cost 3 vsldoi4 <3,u,0,7>, <3,u,0,7> + 2640530742U, // : Cost 3 vsldoi4 <3,u,0,7>, RHS + 2309556692U, // : Cost 3 vmrglw RHS, <3,4,0,5> + 2730735133U, // : Cost 3 vsldoi8 <7,6,u,0>, <7,6,u,0> + 2309556856U, // : Cost 3 vmrglw RHS, <3,6,0,7> + 1235814061U, // : Cost 2 vmrglw RHS, <2,3,0,u> + 202162278U, // : Cost 1 vspltisw0 LHS + 120365158U, // : Cost 1 vmrghw LHS, LHS + 604856989U, // : Cost 1 vsldoi12 LHS, LHS + 2692249532U, // : Cost 3 vsldoi8 <1,2,u,0>, + 1560825142U, // : Cost 2 vsldoi4 <2,u,0,u>, RHS + 1618507930U, // : Cost 2 vsldoi8 <1,2,u,0>, RHS + 1584714662U, // : Cost 2 vsldoi4 <6,u,0,u>, <6,u,0,u> + 2309565048U, // : Cost 3 vmrglw RHS, <3,6,0,7> + 604857043U, // : Cost 1 vsldoi12 LHS, LHS + 1611210825U, // : Cost 2 vsldoi8 <0,0,u,1>, <0,0,u,1> + 1616519270U, // : Cost 2 vsldoi8 <0,u,u,1>, LHS + 2287605459U, // : Cost 3 vmrglw <0,u,u,0>, + 2640546588U, // : Cost 3 vsldoi4 <3,u,1,0>, <3,u,1,0> + 2622631222U, // : Cost 3 vsldoi4 <0,u,1,0>, RHS + 2289590610U, // : Cost 3 vmrglw <1,2,u,0>, <0,4,1,5> + 2664436630U, // : Cost 3 vsldoi4 <7,u,1,0>, <6,7,u,1> + 2664437376U, // : Cost 3 vsldoi4 <7,u,1,0>, <7,u,1,0> + 1616519889U, // : Cost 2 vsldoi8 <0,u,u,1>, <0,u,u,1> + 1548894866U, // : Cost 2 vsldoi4 <0,u,1,1>, <0,u,1,1> + 269271142U, // : Cost 1 vspltisw1 LHS + 1189462934U, // : Cost 2 vmrghw LHS, <1,2,3,0> + 2622638230U, // : Cost 3 vsldoi4 <0,u,1,1>, <3,0,1,2> + 1548897590U, // : Cost 2 vsldoi4 <0,u,1,1>, RHS + 2756985692U, // : Cost 3 vsldoi12 LHS, <1,1,5,5> + 2658472872U, // : Cost 3 vsldoi4 <6,u,1,1>, <6,u,1,1> + 2287614142U, // : Cost 3 vmrglw <0,u,u,1>, + 269271142U, // : Cost 1 vspltisw1 LHS + 1566818406U, // : Cost 2 vsldoi4 <3,u,1,2>, LHS + 2756985735U, // : Cost 3 vsldoi12 LHS, <1,2,1,3> + 1148371862U, // : Cost 2 vmrghw <1,2,3,0>, <1,2,3,0> + 835584U, // : Cost 0 copy LHS + 1566821686U, // : Cost 2 vsldoi4 <3,u,1,2>, RHS + 2756985771U, // : Cost 3 vsldoi12 LHS, <1,2,5,3> + 2690262970U, // : Cost 3 vsldoi8 <0,u,u,1>, <2,6,3,7> + 1590711938U, // : Cost 2 vsldoi4 <7,u,1,2>, <7,u,1,2> + 835584U, // : Cost 0 copy LHS + 2282979337U, // : Cost 3 vmrglw LHS, <0,0,1,0> + 1209237514U, // : Cost 2 vmrglw LHS, <0,0,1,1> + 1209239702U, // : Cost 2 vmrglw LHS, <3,0,1,2> + 2282979502U, // : Cost 3 vmrglw LHS, <0,2,1,3> + 2282979341U, // : Cost 3 vmrglw LHS, <0,0,1,4> + 1209237842U, // : Cost 2 vmrglw LHS, <0,4,1,5> + 2282979505U, // : Cost 3 vmrglw LHS, <0,2,1,6> + 2287625423U, // : Cost 3 vmrglw LHS, <1,6,1,7> + 1209237521U, // : Cost 2 vmrglw LHS, <0,0,1,u> + 1635101613U, // : Cost 2 vsldoi8 <4,0,u,1>, <4,0,u,1> + 2289623050U, // : Cost 3 vmrglw <1,2,u,4>, <0,0,1,1> + 2289625238U, // : Cost 3 vmrglw <1,2,u,4>, <3,0,1,2> + 2640579360U, // : Cost 3 vsldoi4 <3,u,1,4>, <3,u,1,4> + 2622663990U, // : Cost 3 vsldoi4 <0,u,1,4>, RHS + 1616522550U, // : Cost 2 vsldoi8 <0,u,u,1>, RHS + 2664469398U, // : Cost 3 vsldoi4 <7,u,1,4>, <6,7,u,1> + 2664470148U, // : Cost 3 vsldoi4 <7,u,1,4>, <7,u,1,4> + 1616522793U, // : Cost 2 vsldoi8 <0,u,u,1>, RHS + 1548927638U, // : Cost 2 vsldoi4 <0,u,1,5>, <0,u,1,5> + 1192444724U, // : Cost 2 vmrghw RHS, <1,1,1,1> + 1192444822U, // : Cost 2 vmrghw RHS, <1,2,3,0> + 2622670998U, // : Cost 3 vsldoi4 <0,u,1,5>, <3,0,1,2> + 1548930358U, // : Cost 2 vsldoi4 <0,u,1,5>, RHS + 1210728786U, // : Cost 2 vmrglw <0,4,1,5>, <0,4,1,5> + 2714153058U, // : Cost 3 vsldoi8 <4,u,u,1>, <5,6,7,0> + 2670449658U, // : Cost 3 vsldoi4 , <7,0,1,2> + 1548932910U, // : Cost 2 vsldoi4 <0,u,1,5>, LHS + 2622677655U, // : Cost 3 vsldoi4 <0,u,1,6>, <0,u,1,6> + 2756986063U, // : Cost 3 vsldoi12 LHS, <1,6,1,7> + 2302912662U, // : Cost 3 vmrglw <3,4,u,6>, <3,0,1,2> + 3696421014U, // : Cost 4 vsldoi4 <0,u,1,6>, <3,0,1,2> + 2622680374U, // : Cost 3 vsldoi4 <0,u,1,6>, RHS + 2756986099U, // : Cost 3 vsldoi12 LHS, <1,6,5,7> + 2714153784U, // : Cost 3 vsldoi8 <4,u,u,1>, <6,6,6,6> + 1651692438U, // : Cost 2 vsldoi8 <6,7,u,1>, <6,7,u,1> + 1652356071U, // : Cost 2 vsldoi8 <6,u,u,1>, <6,u,u,1> + 2628657254U, // : Cost 3 vsldoi4 <1,u,1,7>, LHS + 1235812362U, // : Cost 2 vmrglw RHS, <0,0,1,1> + 1235814550U, // : Cost 2 vmrglw RHS, <3,0,1,2> + 2309554350U, // : Cost 3 vmrglw RHS, <0,2,1,3> + 2628660534U, // : Cost 3 vsldoi4 <1,u,1,7>, RHS + 1235812690U, // : Cost 2 vmrglw RHS, <0,4,1,5> + 2309554353U, // : Cost 3 vmrglw RHS, <0,2,1,6> + 2309554678U, // : Cost 3 vmrglw RHS, <0,6,1,7> + 1235812369U, // : Cost 2 vmrglw RHS, <0,0,1,u> + 1548952217U, // : Cost 2 vsldoi4 <0,u,1,u>, <0,u,1,u> + 269271142U, // : Cost 1 vspltisw1 LHS + 1209280662U, // : Cost 2 vmrglw LHS, <3,0,1,2> + 835584U, // : Cost 0 copy LHS + 1548954934U, // : Cost 2 vsldoi4 <0,u,1,u>, RHS + 1209278802U, // : Cost 2 vmrglw LHS, <0,4,1,5> + 2283020465U, // : Cost 3 vmrglw LHS, <0,2,1,6> + 1590761096U, // : Cost 2 vsldoi4 <7,u,1,u>, <7,u,1,u> + 835584U, // : Cost 0 copy LHS + 2702876672U, // : Cost 3 vsldoi8 <3,0,u,2>, <0,0,0,0> + 1629134950U, // : Cost 2 vsldoi8 <3,0,u,2>, LHS + 2289591912U, // : Cost 3 vmrglw <1,2,u,0>, <2,2,2,2> + 1215848550U, // : Cost 2 vmrglw <1,2,u,0>, LHS + 2702877010U, // : Cost 3 vsldoi8 <3,0,u,2>, <0,4,1,5> + 2289222708U, // : Cost 3 vmrglw <1,2,3,0>, <1,4,2,5> + 2779178473U, // : Cost 3 vsldoi12 RHS, <2,0,6,1> + 2726249024U, // : Cost 3 vsldoi8 <7,0,1,2>, <0,7,1,0> + 1215848555U, // : Cost 2 vmrglw <1,2,u,0>, LHS + 2690933539U, // : Cost 3 vsldoi8 <1,0,u,2>, <1,0,u,2> + 2628683124U, // : Cost 3 vsldoi4 <1,u,2,1>, <1,u,2,1> + 1189463656U, // : Cost 2 vmrghw LHS, <2,2,2,2> + 1213866086U, // : Cost 2 vmrglw <0,u,u,1>, LHS + 2628685110U, // : Cost 3 vsldoi4 <1,u,2,1>, RHS + 2263205736U, // : Cost 3 vmrghw LHS, <2,5,3,6> + 1189463994U, // : Cost 2 vmrghw LHS, <2,6,3,7> + 2263205866U, // : Cost 3 vmrghw LHS, <2,7,0,1> + 1213866091U, // : Cost 2 vmrglw <0,u,u,1>, LHS + 1556938854U, // : Cost 2 vsldoi4 <2,2,2,2>, LHS + 2697569869U, // : Cost 3 vsldoi8 <2,1,u,2>, <2,1,u,2> + 336380006U, // : Cost 1 vspltisw2 LHS + 1678599794U, // : Cost 2 vsldoi12 LHS, <2,2,3,3> + 1556942134U, // : Cost 2 vsldoi4 <2,2,2,2>, RHS + 2295138061U, // : Cost 3 vmrglw <2,2,2,2>, <2,4,2,5> + 2702878650U, // : Cost 3 vsldoi8 <3,0,u,2>, <2,6,3,7> + 2300229831U, // : Cost 3 vmrglw <3,0,u,2>, + 336380006U, // : Cost 1 vspltisw2 LHS + 475243165U, // : Cost 1 vsldoi4 LHS, LHS + 1548985140U, // : Cost 2 vsldoi4 LHS, <1,1,1,1> + 1209239144U, // : Cost 2 vmrglw LHS, <2,2,2,2> + 135495782U, // : Cost 1 vmrglw LHS, LHS + 475245878U, // : Cost 1 vsldoi4 LHS, RHS + 1596764164U, // : Cost 2 vsldoi4 LHS, <5,5,5,5> + 1596764666U, // : Cost 2 vsldoi4 LHS, <6,2,7,3> + 1596765178U, // : Cost 2 vsldoi4 LHS, <7,0,1,2> + 135495787U, // : Cost 1 vmrglw LHS, LHS + 2708851630U, // : Cost 3 vsldoi8 <4,0,u,2>, <4,0,u,2> + 2217362979U, // : Cost 3 vmrghw <0,4,1,5>, <2,1,3,5> + 2289624680U, // : Cost 3 vmrglw <1,2,u,4>, <2,2,2,2> + 1215881318U, // : Cost 2 vmrglw <1,2,u,4>, LHS + 2726767824U, // : Cost 3 vsldoi8 <7,0,u,2>, <4,4,4,4> + 1629138230U, // : Cost 2 vsldoi8 <3,0,u,2>, RHS + 2779178801U, // : Cost 3 vsldoi12 RHS, <2,4,6,5> + 2726251976U, // : Cost 3 vsldoi8 <7,0,1,2>, <4,7,5,0> + 1215881323U, // : Cost 2 vmrglw <1,2,u,4>, LHS + 2628714598U, // : Cost 3 vsldoi4 <1,u,2,5>, LHS + 2628715896U, // : Cost 3 vsldoi4 <1,u,2,5>, <1,u,2,5> + 1192445544U, // : Cost 2 vmrghw RHS, <2,2,2,2> + 1213898854U, // : Cost 2 vmrglw <0,u,u,5>, LHS + 2628717878U, // : Cost 3 vsldoi4 <1,u,2,5>, RHS + 2726768644U, // : Cost 3 vsldoi8 <7,0,u,2>, <5,5,5,5> + 1192445882U, // : Cost 2 vmrghw RHS, <2,6,3,7> + 2266187754U, // : Cost 3 vmrghw RHS, <2,7,0,1> + 1213898859U, // : Cost 2 vmrglw <0,u,u,5>, LHS + 2634694758U, // : Cost 3 vsldoi4 <2,u,2,6>, LHS + 2721460657U, // : Cost 3 vsldoi8 <6,1,u,2>, <6,1,u,2> + 2296940136U, // : Cost 3 vmrglw <2,4,u,6>, <2,2,2,2> + 1678600122U, // : Cost 2 vsldoi12 LHS, <2,6,3,7> + 2634698038U, // : Cost 3 vsldoi4 <2,u,2,6>, RHS + 3370682125U, // : Cost 4 vmrglw <2,4,u,6>, <2,4,2,5> + 1157056442U, // : Cost 2 vmrghw <2,6,3,7>, <2,6,3,7> + 2725442455U, // : Cost 3 vsldoi8 <6,7,u,2>, <6,7,u,2> + 1678600167U, // : Cost 2 vsldoi12 LHS, <2,6,u,7> + 1653027897U, // : Cost 2 vsldoi8 <7,0,u,2>, <7,0,u,2> + 2309554924U, // : Cost 3 vmrglw RHS, <1,0,2,1> + 1235813992U, // : Cost 2 vmrglw RHS, <2,2,2,2> + 162070630U, // : Cost 1 vmrglw RHS, LHS + 2634706230U, // : Cost 3 vsldoi4 <2,u,2,7>, RHS + 2309555252U, // : Cost 3 vmrglw RHS, <1,4,2,5> + 2309555901U, // : Cost 3 vmrglw RHS, <2,3,2,6> + 2309555416U, // : Cost 3 vmrglw RHS, <1,6,2,7> + 162070635U, // : Cost 1 vmrglw RHS, LHS + 475284130U, // : Cost 1 vsldoi4 LHS, LHS + 1549026100U, // : Cost 2 vsldoi4 LHS, <1,1,1,1> + 336380006U, // : Cost 1 vspltisw2 LHS + 135536742U, // : Cost 1 vmrglw LHS, LHS + 475286838U, // : Cost 1 vsldoi4 LHS, RHS + 1629141146U, // : Cost 2 vsldoi8 <3,0,u,2>, RHS + 1194108858U, // : Cost 2 vmrghw LHS, <2,6,3,7> + 1596806138U, // : Cost 2 vsldoi4 LHS, <7,0,1,2> + 135536747U, // : Cost 1 vmrglw LHS, LHS + 1611890688U, // : Cost 2 vsldoi8 LHS, <0,0,0,0> + 538149020U, // : Cost 1 vsldoi8 LHS, LHS + 2685632685U, // : Cost 3 vsldoi8 LHS, <0,2,1,2> + 2685632764U, // : Cost 3 vsldoi8 LHS, <0,3,1,0> + 1611891026U, // : Cost 2 vsldoi8 LHS, <0,4,1,5> + 2733408722U, // : Cost 3 vsldoi8 LHS, <0,5,6,7> + 2658612153U, // : Cost 3 vsldoi4 <6,u,3,0>, <6,u,3,0> + 2289592250U, // : Cost 3 vmrglw <1,2,u,0>, <2,6,3,7> + 538149533U, // : Cost 1 vsldoi8 LHS, LHS + 1189464214U, // : Cost 2 vmrghw LHS, <3,0,1,2> + 1611891508U, // : Cost 2 vsldoi8 LHS, <1,1,1,1> + 1611891606U, // : Cost 2 vsldoi8 LHS, <1,2,3,0> + 1189464476U, // : Cost 2 vmrghw LHS, <3,3,3,3> + 1189464578U, // : Cost 2 vmrghw LHS, <3,4,5,6> + 2690278511U, // : Cost 3 vsldoi8 LHS, <1,5,0,1> + 2690278607U, // : Cost 3 vsldoi8 LHS, <1,6,1,7> + 2287609786U, // : Cost 3 vmrglw <0,u,u,1>, <2,6,3,7> + 1611892092U, // : Cost 2 vsldoi8 LHS, <1,u,3,0> + 2685634042U, // : Cost 3 vsldoi8 LHS, <2,0,u,0> + 2685634079U, // : Cost 3 vsldoi8 LHS, <2,1,3,1> + 1611892328U, // : Cost 2 vsldoi8 LHS, <2,2,2,2> + 1611892390U, // : Cost 2 vsldoi8 LHS, <2,3,0,1> + 2685634371U, // : Cost 3 vsldoi8 LHS, <2,4,u,5> + 2685634453U, // : Cost 3 vsldoi8 LHS, <2,5,u,6> + 1611892666U, // : Cost 2 vsldoi8 LHS, <2,6,3,7> + 2300225466U, // : Cost 3 vmrglw <3,0,u,2>, <2,6,3,7> + 1611892795U, // : Cost 2 vsldoi8 LHS, <2,u,0,1> + 1209238422U, // : Cost 2 vmrglw LHS, <1,2,3,0> + 2282980247U, // : Cost 3 vmrglw LHS, <1,2,3,1> + 1561004120U, // : Cost 2 vsldoi4 <2,u,3,3>, <2,u,3,3> + 403488870U, // : Cost 1 vspltisw3 LHS + 1209238426U, // : Cost 2 vmrglw LHS, <1,2,3,4> + 2282980899U, // : Cost 3 vmrglw LHS, <2,1,3,5> + 2282985598U, // : Cost 3 vmrglw LHS, + 1209239482U, // : Cost 2 vmrglw LHS, <2,6,3,7> + 403488870U, // : Cost 1 vspltisw3 LHS + 1555038310U, // : Cost 2 vsldoi4 <1,u,3,4>, LHS + 1555039616U, // : Cost 2 vsldoi4 <1,u,3,4>, <1,u,3,4> + 2628781672U, // : Cost 3 vsldoi4 <1,u,3,4>, <2,2,2,2> + 2289624690U, // : Cost 3 vmrglw <1,2,u,4>, <2,2,3,3> + 1555041590U, // : Cost 2 vsldoi4 <1,u,3,4>, RHS + 538152246U, // : Cost 1 vsldoi8 LHS, RHS + 2658644925U, // : Cost 3 vsldoi4 <6,u,3,4>, <6,u,3,4> + 2289625018U, // : Cost 3 vmrglw <1,2,u,4>, <2,6,3,7> + 538152489U, // : Cost 1 vsldoi8 LHS, RHS + 1192446102U, // : Cost 2 vmrghw RHS, <3,0,1,2> + 2733411983U, // : Cost 3 vsldoi8 LHS, <5,1,0,1> + 2634762330U, // : Cost 3 vsldoi4 <2,u,3,5>, <2,u,3,5> + 1192446364U, // : Cost 2 vmrghw RHS, <3,3,3,3> + 1192446466U, // : Cost 2 vmrghw RHS, <3,4,5,6> + 1659670532U, // : Cost 2 vsldoi8 LHS, <5,5,5,5> + 1659670626U, // : Cost 2 vsldoi8 LHS, <5,6,7,0> + 2287642554U, // : Cost 3 vmrglw <0,u,u,5>, <2,6,3,7> + 1659670788U, // : Cost 2 vsldoi8 LHS, <5,u,7,0> + 2634768486U, // : Cost 3 vsldoi4 <2,u,3,6>, LHS + 2733412775U, // : Cost 3 vsldoi8 LHS, <6,1,7,1> + 1648390659U, // : Cost 2 vsldoi8 <6,2,u,3>, <6,2,u,3> + 2634770973U, // : Cost 3 vsldoi4 <2,u,3,6>, <3,4,u,6> + 2634771766U, // : Cost 3 vsldoi4 <2,u,3,6>, RHS + 2733413099U, // : Cost 3 vsldoi8 LHS, <6,5,7,1> + 1659671352U, // : Cost 2 vsldoi8 LHS, <6,6,6,6> + 1659671374U, // : Cost 2 vsldoi8 LHS, <6,7,0,1> + 1652372457U, // : Cost 2 vsldoi8 <6,u,u,3>, <6,u,u,3> + 1561034854U, // : Cost 2 vsldoi4 <2,u,3,7>, LHS + 2634777396U, // : Cost 3 vsldoi4 <2,u,3,7>, <1,1,1,1> + 1561036892U, // : Cost 2 vsldoi4 <2,u,3,7>, <2,u,3,7> + 1235814002U, // : Cost 2 vmrglw RHS, <2,2,3,3> + 1561038134U, // : Cost 2 vsldoi4 <2,u,3,7>, RHS + 2309555747U, // : Cost 3 vmrglw RHS, <2,1,3,5> + 2309556072U, // : Cost 3 vmrglw RHS, <2,5,3,6> + 1235814330U, // : Cost 2 vmrglw RHS, <2,6,3,7> + 1561040686U, // : Cost 2 vsldoi4 <2,u,3,7>, LHS + 1611896531U, // : Cost 2 vsldoi8 LHS, + 538154798U, // : Cost 1 vsldoi8 LHS, LHS + 1611896712U, // : Cost 2 vsldoi8 LHS, + 403488870U, // : Cost 1 vspltisw3 LHS + 1611896895U, // : Cost 2 vsldoi8 LHS, + 538155162U, // : Cost 1 vsldoi8 LHS, RHS + 1611897040U, // : Cost 2 vsldoi8 LHS, + 1209280442U, // : Cost 2 vmrglw LHS, <2,6,3,7> + 538155365U, // : Cost 1 vsldoi8 LHS, LHS + 1165118354U, // : Cost 2 vmrghw <4,0,5,1>, <4,0,5,1> + 1618534502U, // : Cost 2 vsldoi8 <1,2,u,4>, LHS + 2634795102U, // : Cost 3 vsldoi4 <2,u,4,0>, <2,u,4,0> + 2686451968U, // : Cost 3 vsldoi8 <0,3,1,4>, <0,3,1,4> + 2692276562U, // : Cost 3 vsldoi8 <1,2,u,4>, <0,4,1,5> + 1705438098U, // : Cost 2 vsldoi12 RHS, <4,0,5,1> + 2658685890U, // : Cost 3 vsldoi4 <6,u,4,0>, <6,u,4,0> + 2256489928U, // : Cost 3 vmrghw <7,0,1,2>, <4,7,5,0> + 1618535069U, // : Cost 2 vsldoi8 <1,2,u,4>, LHS + 1189464978U, // : Cost 2 vmrghw LHS, <4,0,5,1> + 2692277044U, // : Cost 3 vsldoi8 <1,2,u,4>, <1,1,1,1> + 1618535367U, // : Cost 2 vsldoi8 <1,2,u,4>, <1,2,u,4> + 2640775992U, // : Cost 3 vsldoi4 <3,u,4,1>, <3,u,4,1> + 1189465296U, // : Cost 2 vmrghw LHS, <4,4,4,4> + 115723574U, // : Cost 1 vmrghw LHS, RHS + 2263207289U, // : Cost 3 vmrghw LHS, <4,6,5,2> + 2664666780U, // : Cost 3 vsldoi4 <7,u,4,1>, <7,u,4,1> + 115723817U, // : Cost 1 vmrghw LHS, RHS + 2263919506U, // : Cost 3 vmrghw , <4,0,5,1> + 2222115812U, // : Cost 3 vmrghw <1,2,3,0>, <4,1,5,2> + 2692277864U, // : Cost 3 vsldoi8 <1,2,u,4>, <2,2,2,2> + 2692277926U, // : Cost 3 vsldoi8 <1,2,u,4>, <2,3,0,1> + 2324114640U, // : Cost 3 vmrglw <7,0,u,2>, <4,4,4,4> + 1190178102U, // : Cost 2 vmrghw , RHS + 2692278202U, // : Cost 3 vsldoi8 <1,2,u,4>, <2,6,3,7> + 2701568053U, // : Cost 3 vsldoi8 <2,7,u,4>, <2,7,u,4> + 1190178345U, // : Cost 2 vmrghw , RHS + 2692278422U, // : Cost 3 vsldoi8 <1,2,u,4>, <3,0,1,2> + 2282981552U, // : Cost 3 vmrglw LHS, <3,0,4,1> + 2704222585U, // : Cost 3 vsldoi8 <3,2,u,4>, <3,2,u,4> + 2692278684U, // : Cost 3 vsldoi8 <1,2,u,4>, <3,3,3,3> + 1257016528U, // : Cost 2 vmrglw LHS, <4,4,4,4> + 1209239246U, // : Cost 2 vmrglw LHS, <2,3,4,5> + 2691910300U, // : Cost 3 vsldoi8 <1,2,3,4>, <3,6,4,7> + 2664683166U, // : Cost 3 vsldoi4 <7,u,4,3>, <7,u,4,3> + 1209239249U, // : Cost 2 vmrglw LHS, <2,3,4,u> + 1573027942U, // : Cost 2 vsldoi4 <4,u,4,4>, LHS + 2634826695U, // : Cost 3 vsldoi4 <2,u,4,4>, <1,2,u,4> + 2634827874U, // : Cost 3 vsldoi4 <2,u,4,4>, <2,u,4,4> + 2289629073U, // : Cost 3 vmrglw <1,2,u,4>, + 229035318U, // : Cost 1 vspltisw0 RHS + 1618537782U, // : Cost 2 vsldoi8 <1,2,u,4>, RHS + 2658718662U, // : Cost 3 vsldoi4 <6,u,4,4>, <6,u,4,4> + 2289629401U, // : Cost 3 vmrglw <1,2,u,4>, + 229035318U, // : Cost 1 vspltisw0 RHS + 1561092198U, // : Cost 2 vsldoi4 <2,u,4,5>, LHS + 2628863370U, // : Cost 3 vsldoi4 <1,u,4,5>, <1,u,4,5> + 1561094243U, // : Cost 2 vsldoi4 <2,u,4,5>, <2,u,4,5> + 2634836118U, // : Cost 3 vsldoi4 <2,u,4,5>, <3,0,1,2> + 1561095478U, // : Cost 2 vsldoi4 <2,u,4,5>, RHS + 118705462U, // : Cost 1 vmrghw RHS, RHS + 604859702U, // : Cost 1 vsldoi12 LHS, RHS + 2658726906U, // : Cost 3 vsldoi4 <6,u,4,5>, <7,0,1,2> + 604859720U, // : Cost 1 vsldoi12 LHS, RHS + 2266631058U, // : Cost 3 vmrghw , <4,0,5,1> + 2302692152U, // : Cost 3 vmrglw <3,4,5,6>, <3,u,4,1> + 2718822906U, // : Cost 3 vsldoi8 <5,6,u,4>, <6,2,7,3> + 2722804309U, // : Cost 3 vsldoi8 <6,3,u,4>, <6,3,u,4> + 2723467942U, // : Cost 3 vsldoi8 <6,4,u,4>, <6,4,u,4> + 1192889654U, // : Cost 2 vmrghw , RHS + 2718823224U, // : Cost 3 vsldoi8 <5,6,u,4>, <6,6,6,6> + 2718823246U, // : Cost 3 vsldoi8 <5,6,u,4>, <6,7,0,1> + 1192889897U, // : Cost 2 vmrghw , RHS + 2640822374U, // : Cost 3 vsldoi4 <3,u,4,7>, LHS + 2640823194U, // : Cost 3 vsldoi4 <3,u,4,7>, <1,2,3,4> + 2728113373U, // : Cost 3 vsldoi8 <7,2,u,4>, <7,2,u,4> + 2640825150U, // : Cost 3 vsldoi4 <3,u,4,7>, <3,u,4,7> + 1235815632U, // : Cost 2 vmrglw RHS, <4,4,4,4> + 1235814094U, // : Cost 2 vmrglw RHS, <2,3,4,5> + 2730767905U, // : Cost 3 vsldoi8 <7,6,u,4>, <7,6,u,4> + 2309556892U, // : Cost 3 vmrglw RHS, <3,6,4,7> + 1235814097U, // : Cost 2 vmrglw RHS, <2,3,4,u> + 1561116774U, // : Cost 2 vsldoi4 <2,u,4,u>, LHS + 1618540334U, // : Cost 2 vsldoi8 <1,2,u,4>, LHS + 1561118822U, // : Cost 2 vsldoi4 <2,u,4,u>, <2,u,4,u> + 2692282300U, // : Cost 3 vsldoi8 <1,2,u,4>, + 229035318U, // : Cost 1 vspltisw0 RHS + 120368438U, // : Cost 1 vmrghw LHS, RHS + 604859945U, // : Cost 1 vsldoi12 LHS, RHS + 2309565084U, // : Cost 3 vmrglw RHS, <3,6,4,7> + 604859963U, // : Cost 1 vsldoi12 LHS, RHS + 2690293760U, // : Cost 3 vsldoi8 <0,u,u,5>, <0,0,0,0> + 1616552038U, // : Cost 2 vsldoi8 <0,u,u,5>, LHS + 2640840434U, // : Cost 3 vsldoi4 <3,u,5,0>, <2,3,u,5> + 2640841536U, // : Cost 3 vsldoi4 <3,u,5,0>, <3,u,5,0> + 1613381970U, // : Cost 2 vsldoi8 <0,4,1,5>, <0,4,1,5> + 2316135642U, // : Cost 3 vmrglw <5,6,u,0>, <4,4,5,5> + 2289592834U, // : Cost 3 vmrglw <1,2,u,0>, <3,4,5,6> + 2664732324U, // : Cost 3 vsldoi4 <7,u,5,0>, <7,u,5,0> + 1616552661U, // : Cost 2 vsldoi8 <0,u,u,5>, <0,u,u,5> + 1573077094U, // : Cost 2 vsldoi4 <4,u,5,1>, LHS + 1237536282U, // : Cost 2 vmrglw <4,u,5,1>, <4,u,5,1> + 2690294678U, // : Cost 3 vsldoi8 <0,u,u,5>, <1,2,3,0> + 2646821014U, // : Cost 3 vsldoi4 <4,u,5,1>, <3,0,1,2> + 1573080602U, // : Cost 2 vsldoi4 <4,u,5,1>, <4,u,5,1> + 1189466116U, // : Cost 2 vmrghw LHS, <5,5,5,5> + 1189466210U, // : Cost 2 vmrghw LHS, <5,6,7,0> + 2646823930U, // : Cost 3 vsldoi4 <4,u,5,1>, <7,0,1,2> + 1573082926U, // : Cost 2 vsldoi4 <4,u,5,1>, LHS + 2640855142U, // : Cost 3 vsldoi4 <3,u,5,2>, LHS + 2697594448U, // : Cost 3 vsldoi8 <2,1,u,5>, <2,1,u,5> + 2690295400U, // : Cost 3 vsldoi8 <0,u,u,5>, <2,2,2,2> + 1625179890U, // : Cost 2 vsldoi8 <2,3,u,5>, <2,3,u,5> + 2699585347U, // : Cost 3 vsldoi8 <2,4,u,5>, <2,4,u,5> + 2781171471U, // : Cost 3 vsldoi12 RHS, <5,2,5,3> + 2690295738U, // : Cost 3 vsldoi8 <0,u,u,5>, <2,6,3,7> + 3775318070U, // : Cost 4 vsldoi8 <2,7,u,5>, <2,7,u,5> + 1628498055U, // : Cost 2 vsldoi8 <2,u,u,5>, <2,u,u,5> + 2287627234U, // : Cost 3 vmrglw LHS, <4,1,5,0> + 1257016210U, // : Cost 2 vmrglw LHS, <4,0,5,1> + 2646836942U, // : Cost 3 vsldoi4 <4,u,5,3>, <2,3,4,5> + 2287625131U, // : Cost 3 vmrglw LHS, <1,2,5,3> + 2287627238U, // : Cost 3 vmrglw LHS, <4,1,5,4> + 1257016538U, // : Cost 2 vmrglw LHS, <4,4,5,5> + 1209240066U, // : Cost 2 vmrglw LHS, <3,4,5,6> + 2287625459U, // : Cost 3 vmrglw LHS, <1,6,5,7> + 1209240068U, // : Cost 2 vmrglw LHS, <3,4,5,u> + 2640871526U, // : Cost 3 vsldoi4 <3,u,5,4>, LHS + 2316168082U, // : Cost 3 vmrglw <5,6,u,4>, <4,0,5,1> + 2640873202U, // : Cost 3 vsldoi4 <3,u,5,4>, <2,3,u,5> + 2640874308U, // : Cost 3 vsldoi4 <3,u,5,4>, <3,u,5,4> + 1637788917U, // : Cost 2 vsldoi8 <4,4,u,5>, <4,4,u,5> + 1616555318U, // : Cost 2 vsldoi8 <0,u,u,5>, RHS + 2287638591U, // : Cost 3 vmrglw <0,u,u,4>, + 2664765096U, // : Cost 3 vsldoi4 <7,u,5,4>, <7,u,5,4> + 1616555561U, // : Cost 2 vsldoi8 <0,u,u,5>, RHS + 1573109862U, // : Cost 2 vsldoi4 <4,u,5,5>, LHS + 2646852404U, // : Cost 3 vsldoi4 <4,u,5,5>, <1,1,1,1> + 2646853224U, // : Cost 3 vsldoi4 <4,u,5,5>, <2,2,2,2> + 2287646618U, // : Cost 3 vmrglw <0,u,u,5>, + 1573113374U, // : Cost 2 vsldoi4 <4,u,5,5>, <4,u,5,5> + 296144182U, // : Cost 1 vspltisw1 RHS + 1192448098U, // : Cost 2 vmrghw RHS, <5,6,7,0> + 2287646946U, // : Cost 3 vmrglw <0,u,u,5>, + 296144182U, // : Cost 1 vspltisw1 RHS + 1567146086U, // : Cost 2 vsldoi4 <3,u,5,6>, LHS + 2628945300U, // : Cost 3 vsldoi4 <1,u,5,6>, <1,u,5,6> + 2634917997U, // : Cost 3 vsldoi4 <2,u,5,6>, <2,u,5,6> + 1567148870U, // : Cost 2 vsldoi4 <3,u,5,6>, <3,u,5,6> + 1567149366U, // : Cost 2 vsldoi4 <3,u,5,6>, RHS + 2781171799U, // : Cost 3 vsldoi12 RHS, <5,6,5,7> + 1228950018U, // : Cost 2 vmrglw <3,4,5,6>, <3,4,5,6> + 27705344U, // : Cost 0 copy RHS + 27705344U, // : Cost 0 copy RHS + 2628952166U, // : Cost 3 vsldoi4 <1,u,5,7>, LHS + 1235815314U, // : Cost 2 vmrglw RHS, <4,0,5,1> + 2309556734U, // : Cost 3 vmrglw RHS, <3,4,5,2> + 2309555115U, // : Cost 3 vmrglw RHS, <1,2,5,3> + 2628955446U, // : Cost 3 vsldoi4 <1,u,5,7>, RHS + 1235815642U, // : Cost 2 vmrglw RHS, <4,4,5,5> + 1235814914U, // : Cost 2 vmrglw RHS, <3,4,5,6> + 2309555443U, // : Cost 3 vmrglw RHS, <1,6,5,7> + 1235814916U, // : Cost 2 vmrglw RHS, <3,4,5,u> + 1567162470U, // : Cost 2 vsldoi4 <3,u,5,u>, LHS + 1616557870U, // : Cost 2 vsldoi8 <0,u,u,5>, LHS + 2690299781U, // : Cost 3 vsldoi8 <0,u,u,5>, + 1567165256U, // : Cost 2 vsldoi4 <3,u,5,u>, <3,u,5,u> + 1567165750U, // : Cost 2 vsldoi4 <3,u,5,u>, RHS + 296144182U, // : Cost 1 vspltisw1 RHS + 1209281026U, // : Cost 2 vmrglw LHS, <3,4,5,6> + 27705344U, // : Cost 0 copy RHS + 27705344U, // : Cost 0 copy RHS + 2705563648U, // : Cost 3 vsldoi8 <3,4,u,6>, <0,0,0,0> + 1631821926U, // : Cost 2 vsldoi8 <3,4,u,6>, LHS + 2262462970U, // : Cost 3 vmrghw , <6,2,7,3> + 2646886941U, // : Cost 3 vsldoi4 <4,u,6,0>, <3,4,u,6> + 2705563986U, // : Cost 3 vsldoi8 <3,4,u,6>, <0,4,1,5> + 2316062652U, // : Cost 3 vmrglw <5,6,7,0>, <5,4,6,5> + 2316137272U, // : Cost 3 vmrglw <5,6,u,0>, <6,6,6,6> + 1215851830U, // : Cost 2 vmrglw <1,2,u,0>, RHS + 1215851831U, // : Cost 2 vmrglw <1,2,u,0>, RHS + 2634948710U, // : Cost 3 vsldoi4 <2,u,6,1>, LHS + 2705564468U, // : Cost 3 vsldoi8 <3,4,u,6>, <1,1,1,1> + 1189466618U, // : Cost 2 vmrghw LHS, <6,2,7,3> + 2263208498U, // : Cost 3 vmrghw LHS, <6,3,4,5> + 2693620843U, // : Cost 3 vsldoi8 <1,4,u,6>, <1,4,u,6> + 2652868860U, // : Cost 3 vsldoi4 <5,u,6,1>, <5,u,6,1> + 1189466936U, // : Cost 2 vmrghw LHS, <6,6,6,6> + 1213869366U, // : Cost 2 vmrglw <0,u,u,1>, RHS + 1213869367U, // : Cost 2 vmrglw <0,u,u,1>, RHS + 2658844774U, // : Cost 3 vsldoi4 <6,u,6,2>, LHS + 3771344465U, // : Cost 4 vsldoi8 <2,1,u,6>, <2,1,u,6> + 1178554874U, // : Cost 2 vmrghw <6,2,7,3>, <6,2,7,3> + 2698929907U, // : Cost 3 vsldoi8 <2,3,u,6>, <2,3,u,6> + 2699593540U, // : Cost 3 vsldoi8 <2,4,u,6>, <2,4,u,6> + 2700257173U, // : Cost 3 vsldoi8 <2,5,u,6>, <2,5,u,6> + 2705565626U, // : Cost 3 vsldoi8 <3,4,u,6>, <2,6,3,7> + 1226485046U, // : Cost 2 vmrglw <3,0,u,2>, RHS + 1226485047U, // : Cost 2 vmrglw <3,0,u,2>, RHS + 2705565846U, // : Cost 3 vsldoi8 <3,4,u,6>, <3,0,1,2> + 2330756585U, // : Cost 3 vmrglw LHS, <2,0,6,1> + 2330756829U, // : Cost 3 vmrglw LHS, <2,3,6,2> + 2282981734U, // : Cost 3 vmrglw LHS, <3,2,6,3> + 1631824413U, // : Cost 2 vsldoi8 <3,4,u,6>, <3,4,u,6> + 2652885246U, // : Cost 3 vsldoi4 <5,u,6,3>, <5,u,6,3> + 1257018168U, // : Cost 2 vmrglw LHS, <6,6,6,6> + 135499062U, // : Cost 1 vmrglw LHS, RHS + 135499063U, // : Cost 1 vmrglw LHS, RHS + 2646917222U, // : Cost 3 vsldoi4 <4,u,6,4>, LHS + 2217365931U, // : Cost 3 vmrghw <0,4,1,5>, <6,1,7,5> + 2790167156U, // : Cost 3 vsldoi12 <6,4,2,u>, <6,4,2,u> + 2646919709U, // : Cost 3 vsldoi4 <4,u,6,4>, <3,4,u,6> + 2711538934U, // : Cost 3 vsldoi8 <4,4,u,6>, <4,4,u,6> + 1631825206U, // : Cost 2 vsldoi8 <3,4,u,6>, RHS + 2316170040U, // : Cost 3 vmrglw <5,6,u,4>, <6,6,6,6> + 1215884598U, // : Cost 2 vmrglw <1,2,u,4>, RHS + 1215884599U, // : Cost 2 vmrglw <1,2,u,4>, RHS + 2634981478U, // : Cost 3 vsldoi4 <2,u,6,5>, LHS + 2266190247U, // : Cost 3 vmrghw RHS, <6,1,7,1> + 1192448506U, // : Cost 2 vmrghw RHS, <6,2,7,3> + 2266190386U, // : Cost 3 vmrghw RHS, <6,3,4,5> + 2634984758U, // : Cost 3 vsldoi4 <2,u,6,5>, RHS + 2652901632U, // : Cost 3 vsldoi4 <5,u,6,5>, <5,u,6,5> + 1192448824U, // : Cost 2 vmrghw RHS, <6,6,6,6> + 1213902134U, // : Cost 2 vmrglw <0,u,u,5>, RHS + 1213902135U, // : Cost 2 vmrglw <0,u,u,5>, RHS + 1583808614U, // : Cost 2 vsldoi4 <6,6,6,6>, LHS + 2322010445U, // : Cost 3 vmrglw <6,6,6,6>, <6,0,6,1> + 2718839290U, // : Cost 3 vsldoi8 <5,6,u,6>, <6,2,7,3> + 2670823965U, // : Cost 3 vsldoi4 , <3,4,u,6> + 1583811894U, // : Cost 2 vsldoi4 <6,6,6,6>, RHS + 2724147961U, // : Cost 3 vsldoi8 <6,5,u,6>, <6,5,u,6> + 363253046U, // : Cost 1 vspltisw2 RHS + 1229172022U, // : Cost 2 vmrglw <3,4,u,6>, RHS + 363253046U, // : Cost 1 vspltisw2 RHS + 499458150U, // : Cost 1 vsldoi4 RHS, LHS + 1573200692U, // : Cost 2 vsldoi4 RHS, <1,1,1,1> + 1573201512U, // : Cost 2 vsldoi4 RHS, <2,2,2,2> + 1573202070U, // : Cost 2 vsldoi4 RHS, <3,0,1,2> + 499461673U, // : Cost 1 vsldoi4 RHS, RHS + 1573203972U, // : Cost 2 vsldoi4 RHS, <5,5,5,5> + 1235817272U, // : Cost 2 vmrglw RHS, <6,6,6,6> + 162073910U, // : Cost 1 vmrglw RHS, RHS + 162073911U, // : Cost 1 vmrglw RHS, RHS + 499466342U, // : Cost 1 vsldoi4 RHS, LHS + 1631827758U, // : Cost 2 vsldoi8 <3,4,u,6>, LHS + 1573209704U, // : Cost 2 vsldoi4 RHS, <2,2,2,2> + 1573210262U, // : Cost 2 vsldoi4 RHS, <3,0,1,2> + 499469866U, // : Cost 1 vsldoi4 RHS, RHS + 1631828122U, // : Cost 2 vsldoi8 <3,4,u,6>, RHS + 363253046U, // : Cost 1 vspltisw2 RHS + 135540022U, // : Cost 1 vmrglw LHS, RHS + 135540023U, // : Cost 1 vmrglw LHS, RHS + 1638465536U, // : Cost 2 vsldoi8 RHS, <0,0,0,0> + 564723814U, // : Cost 1 vsldoi8 RHS, LHS + 2712207533U, // : Cost 3 vsldoi8 RHS, <0,2,1,2> + 2712207612U, // : Cost 3 vsldoi8 RHS, <0,3,1,0> + 1638465874U, // : Cost 2 vsldoi8 RHS, <0,4,1,5> + 1579192580U, // : Cost 2 vsldoi4 <5,u,7,0>, <5,u,7,0> + 2712207862U, // : Cost 3 vsldoi8 RHS, <0,6,1,7> + 2316137282U, // : Cost 3 vmrglw <5,6,u,0>, <6,6,7,7> + 564724381U, // : Cost 1 vsldoi8 RHS, LHS + 1189467130U, // : Cost 2 vmrghw LHS, <7,0,1,2> + 1638466356U, // : Cost 2 vsldoi8 RHS, <1,1,1,1> + 1638466454U, // : Cost 2 vsldoi8 RHS, <1,2,3,0> + 2311500282U, // : Cost 3 vmrglw <4,u,u,1>, <6,2,7,3> + 1189467494U, // : Cost 2 vmrghw LHS, <7,4,5,6> + 2712208495U, // : Cost 3 vsldoi8 RHS, <1,5,0,1> + 2694956302U, // : Cost 3 vsldoi8 <1,6,u,7>, <1,6,u,7> + 1189467756U, // : Cost 2 vmrghw LHS, <7,7,7,7> + 1638466940U, // : Cost 2 vsldoi8 RHS, <1,u,3,0> + 2712208829U, // : Cost 3 vsldoi8 RHS, <2,0,1,2> + 2712208927U, // : Cost 3 vsldoi8 RHS, <2,1,3,1> + 1638467176U, // : Cost 2 vsldoi8 RHS, <2,2,2,2> + 1638467238U, // : Cost 2 vsldoi8 RHS, <2,3,0,1> + 2712209165U, // : Cost 3 vsldoi8 RHS, <2,4,2,5> + 2712209256U, // : Cost 3 vsldoi8 RHS, <2,5,3,6> + 1627187175U, // : Cost 2 vsldoi8 <2,6,u,7>, <2,6,u,7> + 2324116290U, // : Cost 3 vmrglw <7,0,u,2>, <6,6,7,7> + 1628514441U, // : Cost 2 vsldoi8 <2,u,u,7>, <2,u,u,7> + 1638467734U, // : Cost 2 vsldoi8 RHS, <3,0,1,2> + 2712209638U, // : Cost 3 vsldoi8 RHS, <3,1,1,1> + 2700929387U, // : Cost 3 vsldoi8 <2,6,u,7>, <3,2,6,u> + 1638467996U, // : Cost 2 vsldoi8 RHS, <3,3,3,3> + 1638468098U, // : Cost 2 vsldoi8 RHS, <3,4,5,6> + 2712210002U, // : Cost 3 vsldoi8 RHS, <3,5,5,5> + 1585189856U, // : Cost 2 vsldoi4 <6,u,7,3>, <6,u,7,3> + 1257018178U, // : Cost 2 vmrglw LHS, <6,6,7,7> + 1638468382U, // : Cost 2 vsldoi8 RHS, <3,u,1,2> + 1638468498U, // : Cost 2 vsldoi8 RHS, <4,0,5,1> + 2712210378U, // : Cost 3 vsldoi8 RHS, <4,1,2,3> + 2712210485U, // : Cost 3 vsldoi8 RHS, <4,2,5,2> + 2712210564U, // : Cost 3 vsldoi8 RHS, <4,3,5,0> + 1638468816U, // : Cost 2 vsldoi8 RHS, <4,4,4,4> + 564727112U, // : Cost 1 vsldoi8 RHS, RHS + 2712210809U, // : Cost 3 vsldoi8 RHS, <4,6,5,2> + 2712210888U, // : Cost 3 vsldoi8 RHS, <4,7,5,0> + 564727337U, // : Cost 1 vsldoi8 RHS, RHS + 1192449018U, // : Cost 2 vmrghw RHS, <7,0,1,2> + 2714201743U, // : Cost 3 vsldoi8 RHS, <5,1,0,1> + 2712211198U, // : Cost 3 vsldoi8 RHS, <5,2,3,4> + 2311533050U, // : Cost 3 vmrglw <4,u,u,5>, <6,2,7,3> + 1192449382U, // : Cost 2 vmrghw RHS, <7,4,5,6> + 1638469636U, // : Cost 2 vsldoi8 RHS, <5,5,5,5> + 1638469730U, // : Cost 2 vsldoi8 RHS, <5,6,7,0> + 1192449644U, // : Cost 2 vmrghw RHS, <7,7,7,7> + 1638469892U, // : Cost 2 vsldoi8 RHS, <5,u,7,0> + 2712211745U, // : Cost 3 vsldoi8 RHS, <6,0,1,2> + 2712211879U, // : Cost 3 vsldoi8 RHS, <6,1,7,1> + 1638470138U, // : Cost 2 vsldoi8 RHS, <6,2,7,3> + 2712212018U, // : Cost 3 vsldoi8 RHS, <6,3,4,5> + 2712212109U, // : Cost 3 vsldoi8 RHS, <6,4,5,6> + 2712212203U, // : Cost 3 vsldoi8 RHS, <6,5,7,1> + 1638470456U, // : Cost 2 vsldoi8 RHS, <6,6,6,6> + 1638470478U, // : Cost 2 vsldoi8 RHS, <6,7,0,1> + 1638470559U, // : Cost 2 vsldoi8 RHS, <6,u,0,1> + 1235816546U, // : Cost 2 vmrglw RHS, <5,6,7,0> + 2309558371U, // : Cost 3 vmrglw RHS, <5,6,7,1> + 2641045434U, // : Cost 3 vsldoi4 <3,u,7,7>, <2,6,3,7> + 1235816954U, // : Cost 2 vmrglw RHS, <6,2,7,3> + 1235816550U, // : Cost 2 vmrglw RHS, <5,6,7,4> + 2309558375U, // : Cost 3 vmrglw RHS, <5,6,7,5> + 1585222628U, // : Cost 2 vsldoi4 <6,u,7,7>, <6,u,7,7> + 430361910U, // : Cost 1 vspltisw3 RHS + 430361910U, // : Cost 1 vspltisw3 RHS + 1638471379U, // : Cost 2 vsldoi8 RHS, + 564729646U, // : Cost 1 vsldoi8 RHS, LHS + 1638471557U, // : Cost 2 vsldoi8 RHS, + 1638471612U, // : Cost 2 vsldoi8 RHS, + 1638471743U, // : Cost 2 vsldoi8 RHS, + 564730010U, // : Cost 1 vsldoi8 RHS, RHS + 1638471888U, // : Cost 2 vsldoi8 RHS, + 430361910U, // : Cost 1 vspltisw3 RHS + 564730213U, // : Cost 1 vsldoi8 RHS, LHS + 202162278U, // : Cost 1 vspltisw0 LHS + 538189985U, // : Cost 1 vsldoi8 LHS, LHS + 2685673645U, // : Cost 3 vsldoi8 LHS, <0,2,1,2> + 1215848604U, // : Cost 2 vmrglw <1,2,u,0>, LHS + 1611931986U, // : Cost 2 vsldoi8 LHS, <0,4,1,5> + 1579266317U, // : Cost 2 vsldoi4 <5,u,u,0>, <5,u,u,0> + 2289592861U, // : Cost 3 vmrglw <1,2,u,0>, <3,4,u,6> + 1215851848U, // : Cost 2 vmrglw <1,2,u,0>, RHS + 538190493U, // : Cost 1 vsldoi8 LHS, LHS + 1549411025U, // : Cost 2 vsldoi4 <0,u,u,1>, <0,u,u,1> + 115726126U, // : Cost 1 vmrghw LHS, LHS + 604862254U, // : Cost 1 vsldoi12 LHS, LHS + 1213866140U, // : Cost 2 vmrglw <0,u,u,1>, LHS + 1549413686U, // : Cost 2 vsldoi4 <0,u,u,1>, RHS + 115726490U, // : Cost 1 vmrghw LHS, RHS + 1585247207U, // : Cost 2 vsldoi4 <6,u,u,1>, <6,u,u,1> + 1213869384U, // : Cost 2 vmrglw <0,u,u,1>, RHS + 604862308U, // : Cost 1 vsldoi12 LHS, LHS + 1567334502U, // : Cost 2 vsldoi4 <3,u,u,2>, LHS + 1190180654U, // : Cost 2 vmrghw , LHS + 336380006U, // : Cost 1 vspltisw2 LHS + 835584U, // : Cost 0 copy LHS + 1567337782U, // : Cost 2 vsldoi4 <3,u,u,2>, RHS + 1190181018U, // : Cost 2 vmrghw , RHS + 1611933626U, // : Cost 2 vsldoi8 LHS, <2,6,3,7> + 1226485064U, // : Cost 2 vmrglw <3,0,u,2>, RHS + 835584U, // : Cost 0 copy LHS + 475685587U, // : Cost 1 vsldoi4 LHS, LHS + 1209239278U, // : Cost 2 vmrglw LHS, <2,3,u,1> + 1209239765U, // : Cost 2 vmrglw LHS, <3,0,u,2> + 135495836U, // : Cost 1 vmrglw LHS, LHS + 475688246U, // : Cost 1 vsldoi4 LHS, RHS + 1209239282U, // : Cost 2 vmrglw LHS, <2,3,u,5> + 1209240093U, // : Cost 2 vmrglw LHS, <3,4,u,6> + 135499080U, // : Cost 1 vmrglw LHS, RHS + 135495841U, // : Cost 1 vmrglw LHS, LHS + 1555406950U, // : Cost 2 vsldoi4 <1,u,u,4>, LHS + 1555408301U, // : Cost 2 vsldoi4 <1,u,u,4>, <1,u,u,4> + 2289625301U, // : Cost 3 vmrglw <1,2,u,4>, <3,0,u,2> + 1215881372U, // : Cost 2 vmrglw <1,2,u,4>, LHS + 229035318U, // : Cost 1 vspltisw0 RHS + 538193206U, // : Cost 1 vsldoi8 LHS, RHS + 2289625629U, // : Cost 3 vmrglw <1,2,u,4>, <3,4,u,6> + 1215884616U, // : Cost 2 vmrglw <1,2,u,4>, RHS + 538193449U, // : Cost 1 vsldoi8 LHS, RHS + 1549443797U, // : Cost 2 vsldoi4 <0,u,u,5>, <0,u,u,5> + 118708014U, // : Cost 1 vmrghw RHS, LHS + 1561389191U, // : Cost 2 vsldoi4 <2,u,u,5>, <2,u,u,5> + 1213898908U, // : Cost 2 vmrglw <0,u,u,5>, LHS + 1549446454U, // : Cost 2 vsldoi4 <0,u,u,5>, RHS + 118708378U, // : Cost 1 vmrghw RHS, RHS + 604862618U, // : Cost 1 vsldoi12 LHS, RHS + 1213902152U, // : Cost 2 vmrglw <0,u,u,5>, RHS + 604862636U, // : Cost 1 vsldoi12 LHS, RHS + 1567367270U, // : Cost 2 vsldoi4 <3,u,u,6>, LHS + 1192892206U, // : Cost 2 vmrghw , LHS + 1638478330U, // : Cost 2 vsldoi8 RHS, <6,2,7,3> + 1679046864U, // : Cost 2 vsldoi12 LHS, + 1567370550U, // : Cost 2 vsldoi4 <3,u,u,6>, RHS + 1192892570U, // : Cost 2 vmrghw , RHS + 363253046U, // : Cost 1 vspltisw2 RHS + 27705344U, // : Cost 0 copy RHS + 27705344U, // : Cost 0 copy RHS + 499605606U, // : Cost 1 vsldoi4 RHS, LHS + 1235812425U, // : Cost 2 vmrglw RHS, <0,0,u,1> + 1561405577U, // : Cost 2 vsldoi4 <2,u,u,7>, <2,u,u,7> + 162070684U, // : Cost 1 vmrglw RHS, LHS + 499609147U, // : Cost 1 vsldoi4 RHS, RHS + 1235812753U, // : Cost 2 vmrglw RHS, <0,4,u,5> + 1235814941U, // : Cost 2 vmrglw RHS, <3,4,u,6> + 162073928U, // : Cost 1 vmrglw RHS, RHS + 162070689U, // : Cost 1 vmrglw RHS, LHS + 475726552U, // : Cost 1 vsldoi4 LHS, LHS + 538195758U, // : Cost 1 vsldoi8 LHS, LHS + 604862821U, // : Cost 1 vsldoi12 LHS, LHS + 835584U, // : Cost 0 copy LHS + 475729206U, // : Cost 1 vsldoi4 LHS, RHS + 538196122U, // : Cost 1 vsldoi8 LHS, RHS + 604862861U, // : Cost 1 vsldoi12 LHS, RHS + 27705344U, // : Cost 0 copy RHS + 835584U, // : Cost 0 copy LHS + 0 +}; diff --git a/lib/Target/PowerPC/PPCPredicates.cpp b/lib/Target/PowerPC/PPCPredicates.cpp new file mode 100644 index 000000000000..08a281259e1f --- /dev/null +++ b/lib/Target/PowerPC/PPCPredicates.cpp @@ -0,0 +1,30 @@ +//===-- PPCPredicates.cpp - PPC Branch Predicate Information --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the PowerPC branch predicates. +// +//===----------------------------------------------------------------------===// + +#include "PPCPredicates.h" +#include +using namespace llvm; + +PPC::Predicate PPC::InvertPredicate(PPC::Predicate Opcode) { + switch (Opcode) { + default: assert(0 && "Unknown PPC branch opcode!"); + case PPC::PRED_EQ: return PPC::PRED_NE; + case PPC::PRED_NE: return PPC::PRED_EQ; + case PPC::PRED_LT: return PPC::PRED_GE; + case PPC::PRED_GE: return PPC::PRED_LT; + case PPC::PRED_GT: return PPC::PRED_LE; + case PPC::PRED_LE: return PPC::PRED_GT; + case PPC::PRED_NU: return PPC::PRED_UN; + case PPC::PRED_UN: return PPC::PRED_NU; + } +} diff --git a/lib/Target/PowerPC/PPCPredicates.h b/lib/Target/PowerPC/PPCPredicates.h new file mode 100644 index 000000000000..b2c831579f79 --- /dev/null +++ b/lib/Target/PowerPC/PPCPredicates.h @@ -0,0 +1,39 @@ +//===-- PPCPredicates.h - PPC Branch Predicate Information ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the PowerPC branch predicates. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_POWERPC_PPCPREDICATES_H +#define LLVM_TARGET_POWERPC_PPCPREDICATES_H + +#include "PPC.h" + +namespace llvm { +namespace PPC { + /// Predicate - These are "(BI << 5) | BO" for various predicates. + enum Predicate { + PRED_ALWAYS = (0 << 5) | 20, + PRED_LT = (0 << 5) | 12, + PRED_LE = (1 << 5) | 4, + PRED_EQ = (2 << 5) | 12, + PRED_GE = (0 << 5) | 4, + PRED_GT = (1 << 5) | 12, + PRED_NE = (2 << 5) | 4, + PRED_UN = (3 << 5) | 12, + PRED_NU = (3 << 5) | 4 + }; + + /// Invert the specified predicate. != -> ==, < -> >=. + Predicate InvertPredicate(Predicate Opcode); +} +} + +#endif diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp new file mode 100644 index 000000000000..5d5beebda705 --- /dev/null +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -0,0 +1,1446 @@ +//===- PPCRegisterInfo.cpp - PowerPC Register Information -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the PowerPC implementation of the TargetRegisterInfo +// class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "reginfo" +#include "PPC.h" +#include "PPCInstrBuilder.h" +#include "PPCMachineFunctionInfo.h" +#include "PPCRegisterInfo.h" +#include "PPCFrameInfo.h" +#include "PPCSubtarget.h" +#include "llvm/CallingConv.h" +#include "llvm/Constants.h" +#include "llvm/Function.h" +#include "llvm/Type.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineLocation.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/STLExtras.h" +#include +using namespace llvm; + +// FIXME This disables some code that aligns the stack to a boundary +// bigger than the default (16 bytes on Darwin) when there is a stack local +// of greater alignment. This does not currently work, because the delta +// between old and new stack pointers is added to offsets that reference +// incoming parameters after the prolog is generated, and the code that +// does that doesn't handle a variable delta. You don't want to do that +// anyway; a better approach is to reserve another register that retains +// to the incoming stack pointer, and reference parameters relative to that. +#define ALIGN_STACK 0 + +// FIXME (64-bit): Eventually enable by default. +cl::opt EnablePPC32RS("enable-ppc32-regscavenger", + cl::init(false), + cl::desc("Enable PPC32 register scavenger"), + cl::Hidden); +cl::opt EnablePPC64RS("enable-ppc64-regscavenger", + cl::init(false), + cl::desc("Enable PPC64 register scavenger"), + cl::Hidden); +#define EnableRegisterScavenging \ + ((EnablePPC32RS && !Subtarget.isPPC64()) || \ + (EnablePPC64RS && Subtarget.isPPC64())) + +// FIXME (64-bit): Should be inlined. +bool +PPCRegisterInfo::requiresRegisterScavenging(const MachineFunction &) const { + return EnableRegisterScavenging; +} + +/// getRegisterNumbering - Given the enum value for some register, e.g. +/// PPC::F14, return the number that it corresponds to (e.g. 14). +unsigned PPCRegisterInfo::getRegisterNumbering(unsigned RegEnum) { + using namespace PPC; + switch (RegEnum) { + case 0: return 0; + case R0 : case X0 : case F0 : case V0 : case CR0: case CR0LT: return 0; + case R1 : case X1 : case F1 : case V1 : case CR1: case CR0GT: return 1; + case R2 : case X2 : case F2 : case V2 : case CR2: case CR0EQ: return 2; + case R3 : case X3 : case F3 : case V3 : case CR3: case CR0UN: return 3; + case R4 : case X4 : case F4 : case V4 : case CR4: case CR1LT: return 4; + case R5 : case X5 : case F5 : case V5 : case CR5: case CR1GT: return 5; + case R6 : case X6 : case F6 : case V6 : case CR6: case CR1EQ: return 6; + case R7 : case X7 : case F7 : case V7 : case CR7: case CR1UN: return 7; + case R8 : case X8 : case F8 : case V8 : case CR2LT: return 8; + case R9 : case X9 : case F9 : case V9 : case CR2GT: return 9; + case R10: case X10: case F10: case V10: case CR2EQ: return 10; + case R11: case X11: case F11: case V11: case CR2UN: return 11; + case R12: case X12: case F12: case V12: case CR3LT: return 12; + case R13: case X13: case F13: case V13: case CR3GT: return 13; + case R14: case X14: case F14: case V14: case CR3EQ: return 14; + case R15: case X15: case F15: case V15: case CR3UN: return 15; + case R16: case X16: case F16: case V16: case CR4LT: return 16; + case R17: case X17: case F17: case V17: case CR4GT: return 17; + case R18: case X18: case F18: case V18: case CR4EQ: return 18; + case R19: case X19: case F19: case V19: case CR4UN: return 19; + case R20: case X20: case F20: case V20: case CR5LT: return 20; + case R21: case X21: case F21: case V21: case CR5GT: return 21; + case R22: case X22: case F22: case V22: case CR5EQ: return 22; + case R23: case X23: case F23: case V23: case CR5UN: return 23; + case R24: case X24: case F24: case V24: case CR6LT: return 24; + case R25: case X25: case F25: case V25: case CR6GT: return 25; + case R26: case X26: case F26: case V26: case CR6EQ: return 26; + case R27: case X27: case F27: case V27: case CR6UN: return 27; + case R28: case X28: case F28: case V28: case CR7LT: return 28; + case R29: case X29: case F29: case V29: case CR7GT: return 29; + case R30: case X30: case F30: case V30: case CR7EQ: return 30; + case R31: case X31: case F31: case V31: case CR7UN: return 31; + default: + cerr << "Unhandled reg in PPCRegisterInfo::getRegisterNumbering!\n"; + abort(); + } +} + +PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST, + const TargetInstrInfo &tii) + : PPCGenRegisterInfo(PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP), + Subtarget(ST), TII(tii) { + ImmToIdxMap[PPC::LD] = PPC::LDX; ImmToIdxMap[PPC::STD] = PPC::STDX; + ImmToIdxMap[PPC::LBZ] = PPC::LBZX; ImmToIdxMap[PPC::STB] = PPC::STBX; + ImmToIdxMap[PPC::LHZ] = PPC::LHZX; ImmToIdxMap[PPC::LHA] = PPC::LHAX; + ImmToIdxMap[PPC::LWZ] = PPC::LWZX; ImmToIdxMap[PPC::LWA] = PPC::LWAX; + ImmToIdxMap[PPC::LFS] = PPC::LFSX; ImmToIdxMap[PPC::LFD] = PPC::LFDX; + ImmToIdxMap[PPC::STH] = PPC::STHX; ImmToIdxMap[PPC::STW] = PPC::STWX; + ImmToIdxMap[PPC::STFS] = PPC::STFSX; ImmToIdxMap[PPC::STFD] = PPC::STFDX; + ImmToIdxMap[PPC::ADDI] = PPC::ADD4; + + // 64-bit + ImmToIdxMap[PPC::LHA8] = PPC::LHAX8; ImmToIdxMap[PPC::LBZ8] = PPC::LBZX8; + ImmToIdxMap[PPC::LHZ8] = PPC::LHZX8; ImmToIdxMap[PPC::LWZ8] = PPC::LWZX8; + ImmToIdxMap[PPC::STB8] = PPC::STBX8; ImmToIdxMap[PPC::STH8] = PPC::STHX8; + ImmToIdxMap[PPC::STW8] = PPC::STWX8; ImmToIdxMap[PPC::STDU] = PPC::STDUX; + ImmToIdxMap[PPC::ADDI8] = PPC::ADD8; ImmToIdxMap[PPC::STD_32] = PPC::STDX_32; +} + +/// getPointerRegClass - Return the register class to use to hold pointers. +/// This is used for addressing modes. +const TargetRegisterClass *PPCRegisterInfo::getPointerRegClass() const { + if (Subtarget.isPPC64()) + return &PPC::G8RCRegClass; + else + return &PPC::GPRCRegClass; +} + +const unsigned* +PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { + // 32-bit Darwin calling convention. + static const unsigned Macho32_CalleeSavedRegs[] = { + PPC::R13, PPC::R14, PPC::R15, + PPC::R16, PPC::R17, PPC::R18, PPC::R19, + PPC::R20, PPC::R21, PPC::R22, PPC::R23, + PPC::R24, PPC::R25, PPC::R26, PPC::R27, + PPC::R28, PPC::R29, PPC::R30, PPC::R31, + + PPC::F14, PPC::F15, PPC::F16, PPC::F17, + PPC::F18, PPC::F19, PPC::F20, PPC::F21, + PPC::F22, PPC::F23, PPC::F24, PPC::F25, + PPC::F26, PPC::F27, PPC::F28, PPC::F29, + PPC::F30, PPC::F31, + + PPC::CR2, PPC::CR3, PPC::CR4, + PPC::V20, PPC::V21, PPC::V22, PPC::V23, + PPC::V24, PPC::V25, PPC::V26, PPC::V27, + PPC::V28, PPC::V29, PPC::V30, PPC::V31, + + PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN, + PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN, + PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN, + + PPC::LR, 0 + }; + + static const unsigned ELF32_CalleeSavedRegs[] = { + PPC::R13, PPC::R14, PPC::R15, + PPC::R16, PPC::R17, PPC::R18, PPC::R19, + PPC::R20, PPC::R21, PPC::R22, PPC::R23, + PPC::R24, PPC::R25, PPC::R26, PPC::R27, + PPC::R28, PPC::R29, PPC::R30, PPC::R31, + + PPC::F9, + PPC::F10, PPC::F11, PPC::F12, PPC::F13, + PPC::F14, PPC::F15, PPC::F16, PPC::F17, + PPC::F18, PPC::F19, PPC::F20, PPC::F21, + PPC::F22, PPC::F23, PPC::F24, PPC::F25, + PPC::F26, PPC::F27, PPC::F28, PPC::F29, + PPC::F30, PPC::F31, + + PPC::CR2, PPC::CR3, PPC::CR4, + PPC::V20, PPC::V21, PPC::V22, PPC::V23, + PPC::V24, PPC::V25, PPC::V26, PPC::V27, + PPC::V28, PPC::V29, PPC::V30, PPC::V31, + + PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN, + PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN, + PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN, + + PPC::LR, 0 + }; + // 64-bit Darwin calling convention. + static const unsigned Macho64_CalleeSavedRegs[] = { + PPC::X14, PPC::X15, + PPC::X16, PPC::X17, PPC::X18, PPC::X19, + PPC::X20, PPC::X21, PPC::X22, PPC::X23, + PPC::X24, PPC::X25, PPC::X26, PPC::X27, + PPC::X28, PPC::X29, PPC::X30, PPC::X31, + + PPC::F14, PPC::F15, PPC::F16, PPC::F17, + PPC::F18, PPC::F19, PPC::F20, PPC::F21, + PPC::F22, PPC::F23, PPC::F24, PPC::F25, + PPC::F26, PPC::F27, PPC::F28, PPC::F29, + PPC::F30, PPC::F31, + + PPC::CR2, PPC::CR3, PPC::CR4, + PPC::V20, PPC::V21, PPC::V22, PPC::V23, + PPC::V24, PPC::V25, PPC::V26, PPC::V27, + PPC::V28, PPC::V29, PPC::V30, PPC::V31, + + PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN, + PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN, + PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN, + + PPC::LR8, 0 + }; + + if (Subtarget.isMachoABI()) + return Subtarget.isPPC64() ? Macho64_CalleeSavedRegs : + Macho32_CalleeSavedRegs; + + // ELF 32. + return ELF32_CalleeSavedRegs; +} + +const TargetRegisterClass* const* +PPCRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { + // 32-bit Macho calling convention. + static const TargetRegisterClass * const Macho32_CalleeSavedRegClasses[] = { + &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass, + &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass, + &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass, + &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass, + &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass, + + &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass, + &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass, + &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass, + &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass, + &PPC::F8RCRegClass,&PPC::F8RCRegClass, + + &PPC::CRRCRegClass,&PPC::CRRCRegClass,&PPC::CRRCRegClass, + + &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass, + &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass, + &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass, + + &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass, + &PPC::CRBITRCRegClass, + &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass, + &PPC::CRBITRCRegClass, + &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass, + &PPC::CRBITRCRegClass, + + &PPC::GPRCRegClass, 0 + }; + + static const TargetRegisterClass * const ELF32_CalleeSavedRegClasses[] = { + &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass, + &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass, + &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass, + &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass, + &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass, + + &PPC::F8RCRegClass, + &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass, + &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass, + &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass, + &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass, + &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass, + &PPC::F8RCRegClass,&PPC::F8RCRegClass, + + &PPC::CRRCRegClass,&PPC::CRRCRegClass,&PPC::CRRCRegClass, + + &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass, + &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass, + &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass, + + &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass, + &PPC::CRBITRCRegClass, + &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass, + &PPC::CRBITRCRegClass, + &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass, + &PPC::CRBITRCRegClass, + + &PPC::GPRCRegClass, 0 + }; + + // 64-bit Macho calling convention. + static const TargetRegisterClass * const Macho64_CalleeSavedRegClasses[] = { + &PPC::G8RCRegClass,&PPC::G8RCRegClass, + &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass, + &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass, + &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass, + &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass, + + &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass, + &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass, + &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass, + &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass, + &PPC::F8RCRegClass,&PPC::F8RCRegClass, + + &PPC::CRRCRegClass,&PPC::CRRCRegClass,&PPC::CRRCRegClass, + + &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass, + &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass, + &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass, + + &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass, + &PPC::CRBITRCRegClass, + &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass, + &PPC::CRBITRCRegClass, + &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass, + &PPC::CRBITRCRegClass, + + &PPC::G8RCRegClass, 0 + }; + + if (Subtarget.isMachoABI()) + return Subtarget.isPPC64() ? Macho64_CalleeSavedRegClasses : + Macho32_CalleeSavedRegClasses; + + // ELF 32. + return ELF32_CalleeSavedRegClasses; +} + +// needsFP - Return true if the specified function should have a dedicated frame +// pointer register. This is true if the function has variable sized allocas or +// if frame pointer elimination is disabled. +// +static bool needsFP(const MachineFunction &MF) { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + return NoFramePointerElim || MFI->hasVarSizedObjects() || + (PerformTailCallOpt && MF.getInfo()->hasFastCall()); +} + +static bool spillsCR(const MachineFunction &MF) { + const PPCFunctionInfo *FuncInfo = MF.getInfo(); + return FuncInfo->isCRSpilled(); +} + +BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { + BitVector Reserved(getNumRegs()); + Reserved.set(PPC::R0); + Reserved.set(PPC::R1); + Reserved.set(PPC::LR); + Reserved.set(PPC::LR8); + Reserved.set(PPC::RM); + + // In Linux, r2 is reserved for the OS. + if (!Subtarget.isDarwin()) + Reserved.set(PPC::R2); + + // On PPC64, r13 is the thread pointer. Never allocate this register. Note + // that this is over conservative, as it also prevents allocation of R31 when + // the FP is not needed. + if (Subtarget.isPPC64()) { + Reserved.set(PPC::R13); + Reserved.set(PPC::R31); + + if (!EnableRegisterScavenging) + Reserved.set(PPC::R0); // FIXME (64-bit): Remove + + Reserved.set(PPC::X0); + Reserved.set(PPC::X1); + Reserved.set(PPC::X13); + Reserved.set(PPC::X31); + } + + if (needsFP(MF)) + Reserved.set(PPC::R31); + + return Reserved; +} + +//===----------------------------------------------------------------------===// +// Stack Frame Processing methods +//===----------------------------------------------------------------------===// + +// hasFP - Return true if the specified function actually has a dedicated frame +// pointer register. This is true if the function needs a frame pointer and has +// a non-zero stack size. +bool PPCRegisterInfo::hasFP(const MachineFunction &MF) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + return MFI->getStackSize() && needsFP(MF); +} + +/// MustSaveLR - Return true if this function requires that we save the LR +/// register onto the stack in the prolog and restore it in the epilog of the +/// function. +static bool MustSaveLR(const MachineFunction &MF, unsigned LR) { + const PPCFunctionInfo *MFI = MF.getInfo(); + + // We need a save/restore of LR if there is any def of LR (which is + // defined by calls, including the PIC setup sequence), or if there is + // some use of the LR stack slot (e.g. for builtin_return_address). + // (LR comes in 32 and 64 bit versions.) + MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR); + return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired(); +} + + + +void PPCRegisterInfo:: +eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + if (PerformTailCallOpt && I->getOpcode() == PPC::ADJCALLSTACKUP) { + // Add (actually subtract) back the amount the callee popped on return. + if (int CalleeAmt = I->getOperand(1).getImm()) { + bool is64Bit = Subtarget.isPPC64(); + CalleeAmt *= -1; + unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1; + unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0; + unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI; + unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4; + unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS; + unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI; + MachineInstr *MI = I; + DebugLoc dl = MI->getDebugLoc(); + + if (isInt16(CalleeAmt)) { + BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg).addReg(StackReg). + addImm(CalleeAmt); + } else { + MachineBasicBlock::iterator MBBI = I; + BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg) + .addImm(CalleeAmt >> 16); + BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg) + .addReg(TmpReg, RegState::Kill) + .addImm(CalleeAmt & 0xFFFF); + BuildMI(MBB, MBBI, dl, TII.get(ADDInstr)) + .addReg(StackReg) + .addReg(StackReg) + .addReg(TmpReg); + } + } + } + // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions. + MBB.erase(I); +} + +/// findScratchRegister - Find a 'free' PPC register. Try for a call-clobbered +/// register first and then a spilled callee-saved register if that fails. +static +unsigned findScratchRegister(MachineBasicBlock::iterator II, RegScavenger *RS, + const TargetRegisterClass *RC, int SPAdj) { + assert(RS && "Register scavenging must be on"); + unsigned Reg = RS->FindUnusedReg(RC, true); + // FIXME: move ARM callee-saved reg scan to target independent code, then + // search for already spilled CS register here. + if (Reg == 0) + Reg = RS->scavengeRegister(RC, II, SPAdj); + return Reg; +} + +/// lowerDynamicAlloc - Generate the code for allocating an object in the +/// current frame. The sequence of code with be in the general form +/// +/// addi R0, SP, \#frameSize ; get the address of the previous frame +/// stwxu R0, SP, Rnegsize ; add and update the SP with the negated size +/// addi Rnew, SP, \#maxCalFrameSize ; get the top of the allocation +/// +void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II, + int SPAdj, RegScavenger *RS) const { + // Get the instruction. + MachineInstr &MI = *II; + // Get the instruction's basic block. + MachineBasicBlock &MBB = *MI.getParent(); + // Get the basic block's function. + MachineFunction &MF = *MBB.getParent(); + // Get the frame info. + MachineFrameInfo *MFI = MF.getFrameInfo(); + // Determine whether 64-bit pointers are used. + bool LP64 = Subtarget.isPPC64(); + DebugLoc dl = MI.getDebugLoc(); + + // Get the maximum call stack size. + unsigned maxCallFrameSize = MFI->getMaxCallFrameSize(); + // Get the total frame size. + unsigned FrameSize = MFI->getStackSize(); + + // Get stack alignments. + unsigned TargetAlign = MF.getTarget().getFrameInfo()->getStackAlignment(); + unsigned MaxAlign = MFI->getMaxAlignment(); + assert(MaxAlign <= TargetAlign && + "Dynamic alloca with large aligns not supported"); + + // Determine the previous frame's address. If FrameSize can't be + // represented as 16 bits or we need special alignment, then we load the + // previous frame's address from 0(SP). Why not do an addis of the hi? + // Because R0 is our only safe tmp register and addi/addis treat R0 as zero. + // Constructing the constant and adding would take 3 instructions. + // Fortunately, a frame greater than 32K is rare. + const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; + const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; + const TargetRegisterClass *RC = LP64 ? G8RC : GPRC; + + // FIXME (64-bit): Use "findScratchRegister" + unsigned Reg; + if (EnableRegisterScavenging) + Reg = findScratchRegister(II, RS, RC, SPAdj); + else + Reg = PPC::R0; + + if (MaxAlign < TargetAlign && isInt16(FrameSize)) { + BuildMI(MBB, II, dl, TII.get(PPC::ADDI), Reg) + .addReg(PPC::R31) + .addImm(FrameSize); + } else if (LP64) { + if (EnableRegisterScavenging) // FIXME (64-bit): Use "true" part. + BuildMI(MBB, II, dl, TII.get(PPC::LD), Reg) + .addImm(0) + .addReg(PPC::X1); + else + BuildMI(MBB, II, dl, TII.get(PPC::LD), PPC::X0) + .addImm(0) + .addReg(PPC::X1); + } else { + BuildMI(MBB, II, dl, TII.get(PPC::LWZ), Reg) + .addImm(0) + .addReg(PPC::R1); + } + + // Grow the stack and update the stack pointer link, then determine the + // address of new allocated space. + if (LP64) { + if (EnableRegisterScavenging) // FIXME (64-bit): Use "true" part. + BuildMI(MBB, II, dl, TII.get(PPC::STDUX)) + .addReg(Reg, RegState::Kill) + .addReg(PPC::X1) + .addReg(MI.getOperand(1).getReg()); + else + BuildMI(MBB, II, dl, TII.get(PPC::STDUX)) + .addReg(PPC::X0, RegState::Kill) + .addReg(PPC::X1) + .addReg(MI.getOperand(1).getReg()); + + if (!MI.getOperand(1).isKill()) + BuildMI(MBB, II, dl, TII.get(PPC::ADDI8), MI.getOperand(0).getReg()) + .addReg(PPC::X1) + .addImm(maxCallFrameSize); + else + // Implicitly kill the register. + BuildMI(MBB, II, dl, TII.get(PPC::ADDI8), MI.getOperand(0).getReg()) + .addReg(PPC::X1) + .addImm(maxCallFrameSize) + .addReg(MI.getOperand(1).getReg(), RegState::ImplicitKill); + } else { + BuildMI(MBB, II, dl, TII.get(PPC::STWUX)) + .addReg(Reg, RegState::Kill) + .addReg(PPC::R1) + .addReg(MI.getOperand(1).getReg()); + + if (!MI.getOperand(1).isKill()) + BuildMI(MBB, II, dl, TII.get(PPC::ADDI), MI.getOperand(0).getReg()) + .addReg(PPC::R1) + .addImm(maxCallFrameSize); + else + // Implicitly kill the register. + BuildMI(MBB, II, dl, TII.get(PPC::ADDI), MI.getOperand(0).getReg()) + .addReg(PPC::R1) + .addImm(maxCallFrameSize) + .addReg(MI.getOperand(1).getReg(), RegState::ImplicitKill); + } + + // Discard the DYNALLOC instruction. + MBB.erase(II); +} + +/// lowerCRSpilling - Generate the code for spilling a CR register. Instead of +/// reserving a whole register (R0), we scrounge for one here. This generates +/// code like this: +/// +/// mfcr rA ; Move the conditional register into GPR rA. +/// rlwinm rA, rA, SB, 0, 31 ; Shift the bits left so they are in CR0's slot. +/// stw rA, FI ; Store rA to the frame. +/// +void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II, + unsigned FrameIndex, int SPAdj, + RegScavenger *RS) const { + // Get the instruction. + MachineInstr &MI = *II; // ; SPILL_CR , , + // Get the instruction's basic block. + MachineBasicBlock &MBB = *MI.getParent(); + DebugLoc dl = MI.getDebugLoc(); + + const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; + const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; + const TargetRegisterClass *RC = Subtarget.isPPC64() ? G8RC : GPRC; + unsigned Reg = findScratchRegister(II, RS, RC, SPAdj); + + // We need to store the CR in the low 4-bits of the saved value. First, issue + // an MFCR to save all of the CRBits. Add an implicit kill of the CR. + if (!MI.getOperand(0).isKill()) + BuildMI(MBB, II, dl, TII.get(PPC::MFCR), Reg); + else + // Implicitly kill the CR register. + BuildMI(MBB, II, dl, TII.get(PPC::MFCR), Reg) + .addReg(MI.getOperand(0).getReg(), RegState::ImplicitKill); + + // If the saved register wasn't CR0, shift the bits left so that they are in + // CR0's slot. + unsigned SrcReg = MI.getOperand(0).getReg(); + if (SrcReg != PPC::CR0) + // rlwinm rA, rA, ShiftBits, 0, 31. + BuildMI(MBB, II, dl, TII.get(PPC::RLWINM), Reg) + .addReg(Reg, RegState::Kill) + .addImm(PPCRegisterInfo::getRegisterNumbering(SrcReg) * 4) + .addImm(0) + .addImm(31); + + addFrameReference(BuildMI(MBB, II, dl, TII.get(PPC::STW)) + .addReg(Reg, getKillRegState(MI.getOperand(1).getImm())), + FrameIndex); + + // Discard the pseudo instruction. + MBB.erase(II); +} + +void PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, RegScavenger *RS) const { + assert(SPAdj == 0 && "Unexpected"); + + // Get the instruction. + MachineInstr &MI = *II; + // Get the instruction's basic block. + MachineBasicBlock &MBB = *MI.getParent(); + // Get the basic block's function. + MachineFunction &MF = *MBB.getParent(); + // Get the frame info. + MachineFrameInfo *MFI = MF.getFrameInfo(); + DebugLoc dl = MI.getDebugLoc(); + + // Find out which operand is the frame index. + unsigned FIOperandNo = 0; + while (!MI.getOperand(FIOperandNo).isFI()) { + ++FIOperandNo; + assert(FIOperandNo != MI.getNumOperands() && + "Instr doesn't have FrameIndex operand!"); + } + // Take into account whether it's an add or mem instruction + unsigned OffsetOperandNo = (FIOperandNo == 2) ? 1 : 2; + if (MI.getOpcode() == TargetInstrInfo::INLINEASM) + OffsetOperandNo = FIOperandNo-1; + + // Get the frame index. + int FrameIndex = MI.getOperand(FIOperandNo).getIndex(); + + // Get the frame pointer save index. Users of this index are primarily + // DYNALLOC instructions. + PPCFunctionInfo *FI = MF.getInfo(); + int FPSI = FI->getFramePointerSaveIndex(); + // Get the instruction opcode. + unsigned OpC = MI.getOpcode(); + + // Special case for dynamic alloca. + if (FPSI && FrameIndex == FPSI && + (OpC == PPC::DYNALLOC || OpC == PPC::DYNALLOC8)) { + lowerDynamicAlloc(II, SPAdj, RS); + return; + } + + // Special case for pseudo-op SPILL_CR. + if (EnableRegisterScavenging) // FIXME (64-bit): Enable by default. + if (OpC == PPC::SPILL_CR) { + lowerCRSpilling(II, FrameIndex, SPAdj, RS); + return; + } + + // Replace the FrameIndex with base register with GPR1 (SP) or GPR31 (FP). + MI.getOperand(FIOperandNo).ChangeToRegister(hasFP(MF) ? PPC::R31 : PPC::R1, + false); + + // Figure out if the offset in the instruction is shifted right two bits. This + // is true for instructions like "STD", which the machine implicitly adds two + // low zeros to. + bool isIXAddr = false; + switch (OpC) { + case PPC::LWA: + case PPC::LD: + case PPC::STD: + case PPC::STD_32: + isIXAddr = true; + break; + } + + // Now add the frame object offset to the offset from r1. + int Offset = MFI->getObjectOffset(FrameIndex); + if (!isIXAddr) + Offset += MI.getOperand(OffsetOperandNo).getImm(); + else + Offset += MI.getOperand(OffsetOperandNo).getImm() << 2; + + // If we're not using a Frame Pointer that has been set to the value of the + // SP before having the stack size subtracted from it, then add the stack size + // to Offset to get the correct offset. + Offset += MFI->getStackSize(); + + // If we can, encode the offset directly into the instruction. If this is a + // normal PPC "ri" instruction, any 16-bit value can be safely encoded. If + // this is a PPC64 "ix" instruction, only a 16-bit value with the low two bits + // clear can be encoded. This is extremely uncommon, because normally you + // only "std" to a stack slot that is at least 4-byte aligned, but it can + // happen in invalid code. + if (isInt16(Offset) && (!isIXAddr || (Offset & 3) == 0)) { + if (isIXAddr) + Offset >>= 2; // The actual encoded value has the low two bits zero. + MI.getOperand(OffsetOperandNo).ChangeToImmediate(Offset); + return; + } + + // The offset doesn't fit into a single register, scavenge one to build the + // offset in. + // FIXME: figure out what SPAdj is doing here. + + // FIXME (64-bit): Use "findScratchRegister". + unsigned SReg; + if (EnableRegisterScavenging) + SReg = findScratchRegister(II, RS, &PPC::GPRCRegClass, SPAdj); + else + SReg = PPC::R0; + + // Insert a set of rA with the full offset value before the ld, st, or add + BuildMI(MBB, II, dl, TII.get(PPC::LIS), SReg) + .addImm(Offset >> 16); + BuildMI(MBB, II, dl, TII.get(PPC::ORI), SReg) + .addReg(SReg, RegState::Kill) + .addImm(Offset); + + // Convert into indexed form of the instruction: + // + // sth 0:rA, 1:imm 2:(rB) ==> sthx 0:rA, 2:rB, 1:r0 + // addi 0:rA 1:rB, 2, imm ==> add 0:rA, 1:rB, 2:r0 + unsigned OperandBase; + + if (OpC != TargetInstrInfo::INLINEASM) { + assert(ImmToIdxMap.count(OpC) && + "No indexed form of load or store available!"); + unsigned NewOpcode = ImmToIdxMap.find(OpC)->second; + MI.setDesc(TII.get(NewOpcode)); + OperandBase = 1; + } else { + OperandBase = OffsetOperandNo; + } + + unsigned StackReg = MI.getOperand(FIOperandNo).getReg(); + MI.getOperand(OperandBase).ChangeToRegister(StackReg, false); + MI.getOperand(OperandBase + 1).ChangeToRegister(SReg, false); +} + +/// VRRegNo - Map from a numbered VR register to its enum value. +/// +static const unsigned short VRRegNo[] = { + PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 , PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 , + PPC::V8 , PPC::V9 , PPC::V10, PPC::V11, PPC::V12, PPC::V13, PPC::V14, PPC::V15, + PPC::V16, PPC::V17, PPC::V18, PPC::V19, PPC::V20, PPC::V21, PPC::V22, PPC::V23, + PPC::V24, PPC::V25, PPC::V26, PPC::V27, PPC::V28, PPC::V29, PPC::V30, PPC::V31 +}; + +/// RemoveVRSaveCode - We have found that this function does not need any code +/// to manipulate the VRSAVE register, even though it uses vector registers. +/// This can happen when the only registers used are known to be live in or out +/// of the function. Remove all of the VRSAVE related code from the function. +static void RemoveVRSaveCode(MachineInstr *MI) { + MachineBasicBlock *Entry = MI->getParent(); + MachineFunction *MF = Entry->getParent(); + + // We know that the MTVRSAVE instruction immediately follows MI. Remove it. + MachineBasicBlock::iterator MBBI = MI; + ++MBBI; + assert(MBBI != Entry->end() && MBBI->getOpcode() == PPC::MTVRSAVE); + MBBI->eraseFromParent(); + + bool RemovedAllMTVRSAVEs = true; + // See if we can find and remove the MTVRSAVE instruction from all of the + // epilog blocks. + for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) { + // If last instruction is a return instruction, add an epilogue + if (!I->empty() && I->back().getDesc().isReturn()) { + bool FoundIt = false; + for (MBBI = I->end(); MBBI != I->begin(); ) { + --MBBI; + if (MBBI->getOpcode() == PPC::MTVRSAVE) { + MBBI->eraseFromParent(); // remove it. + FoundIt = true; + break; + } + } + RemovedAllMTVRSAVEs &= FoundIt; + } + } + + // If we found and removed all MTVRSAVE instructions, remove the read of + // VRSAVE as well. + if (RemovedAllMTVRSAVEs) { + MBBI = MI; + assert(MBBI != Entry->begin() && "UPDATE_VRSAVE is first instr in block?"); + --MBBI; + assert(MBBI->getOpcode() == PPC::MFVRSAVE && "VRSAVE instrs wandered?"); + MBBI->eraseFromParent(); + } + + // Finally, nuke the UPDATE_VRSAVE. + MI->eraseFromParent(); +} + +// HandleVRSaveUpdate - MI is the UPDATE_VRSAVE instruction introduced by the +// instruction selector. Based on the vector registers that have been used, +// transform this into the appropriate ORI instruction. +static void HandleVRSaveUpdate(MachineInstr *MI, const TargetInstrInfo &TII) { + MachineFunction *MF = MI->getParent()->getParent(); + DebugLoc dl = MI->getDebugLoc(); + + unsigned UsedRegMask = 0; + for (unsigned i = 0; i != 32; ++i) + if (MF->getRegInfo().isPhysRegUsed(VRRegNo[i])) + UsedRegMask |= 1 << (31-i); + + // Live in and live out values already must be in the mask, so don't bother + // marking them. + for (MachineRegisterInfo::livein_iterator + I = MF->getRegInfo().livein_begin(), + E = MF->getRegInfo().livein_end(); I != E; ++I) { + unsigned RegNo = PPCRegisterInfo::getRegisterNumbering(I->first); + if (VRRegNo[RegNo] == I->first) // If this really is a vector reg. + UsedRegMask &= ~(1 << (31-RegNo)); // Doesn't need to be marked. + } + for (MachineRegisterInfo::liveout_iterator + I = MF->getRegInfo().liveout_begin(), + E = MF->getRegInfo().liveout_end(); I != E; ++I) { + unsigned RegNo = PPCRegisterInfo::getRegisterNumbering(*I); + if (VRRegNo[RegNo] == *I) // If this really is a vector reg. + UsedRegMask &= ~(1 << (31-RegNo)); // Doesn't need to be marked. + } + + // If no registers are used, turn this into a copy. + if (UsedRegMask == 0) { + // Remove all VRSAVE code. + RemoveVRSaveCode(MI); + return; + } + + unsigned SrcReg = MI->getOperand(1).getReg(); + unsigned DstReg = MI->getOperand(0).getReg(); + + if ((UsedRegMask & 0xFFFF) == UsedRegMask) { + if (DstReg != SrcReg) + BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORI), DstReg) + .addReg(SrcReg) + .addImm(UsedRegMask); + else + BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORI), DstReg) + .addReg(SrcReg, RegState::Kill) + .addImm(UsedRegMask); + } else if ((UsedRegMask & 0xFFFF0000) == UsedRegMask) { + if (DstReg != SrcReg) + BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) + .addReg(SrcReg) + .addImm(UsedRegMask >> 16); + else + BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) + .addReg(SrcReg, RegState::Kill) + .addImm(UsedRegMask >> 16); + } else { + if (DstReg != SrcReg) + BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) + .addReg(SrcReg) + .addImm(UsedRegMask >> 16); + else + BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) + .addReg(SrcReg, RegState::Kill) + .addImm(UsedRegMask >> 16); + + BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORI), DstReg) + .addReg(DstReg, RegState::Kill) + .addImm(UsedRegMask & 0xFFFF); + } + + // Remove the old UPDATE_VRSAVE instruction. + MI->eraseFromParent(); +} + +/// determineFrameLayout - Determine the size of the frame and maximum call +/// frame size. +void PPCRegisterInfo::determineFrameLayout(MachineFunction &MF) const { + MachineFrameInfo *MFI = MF.getFrameInfo(); + + // Get the number of bytes to allocate from the FrameInfo + unsigned FrameSize = MFI->getStackSize(); + + // Get the alignments provided by the target, and the maximum alignment + // (if any) of the fixed frame objects. + unsigned MaxAlign = MFI->getMaxAlignment(); + unsigned TargetAlign = MF.getTarget().getFrameInfo()->getStackAlignment(); + unsigned AlignMask = TargetAlign - 1; // + + // If we are a leaf function, and use up to 224 bytes of stack space, + // don't have a frame pointer, calls, or dynamic alloca then we do not need + // to adjust the stack pointer (we fit in the Red Zone). + if (!DisableRedZone && + FrameSize <= 224 && // Fits in red zone. + !MFI->hasVarSizedObjects() && // No dynamic alloca. + !MFI->hasCalls() && // No calls. + (!ALIGN_STACK || MaxAlign <= TargetAlign)) { // No special alignment. + // No need for frame + MFI->setStackSize(0); + return; + } + + // Get the maximum call frame size of all the calls. + unsigned maxCallFrameSize = MFI->getMaxCallFrameSize(); + + // Maximum call frame needs to be at least big enough for linkage and 8 args. + unsigned minCallFrameSize = + PPCFrameInfo::getMinCallFrameSize(Subtarget.isPPC64(), + Subtarget.isMachoABI()); + maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize); + + // If we have dynamic alloca then maxCallFrameSize needs to be aligned so + // that allocations will be aligned. + if (MFI->hasVarSizedObjects()) + maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask; + + // Update maximum call frame size. + MFI->setMaxCallFrameSize(maxCallFrameSize); + + // Include call frame size in total. + FrameSize += maxCallFrameSize; + + // Make sure the frame is aligned. + FrameSize = (FrameSize + AlignMask) & ~AlignMask; + + // Update frame info. + MFI->setStackSize(FrameSize); +} + +void +PPCRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, + RegScavenger *RS) const { + // Save and clear the LR state. + PPCFunctionInfo *FI = MF.getInfo(); + unsigned LR = getRARegister(); + FI->setMustSaveLR(MustSaveLR(MF, LR)); + MF.getRegInfo().setPhysRegUnused(LR); + + // Save R31 if necessary + int FPSI = FI->getFramePointerSaveIndex(); + bool IsPPC64 = Subtarget.isPPC64(); + bool IsELF32_ABI = Subtarget.isELF32_ABI(); + bool IsMachoABI = Subtarget.isMachoABI(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + + // If the frame pointer save index hasn't been defined yet. + if (!FPSI && (NoFramePointerElim || MFI->hasVarSizedObjects()) && + IsELF32_ABI) { + // Find out what the fix offset of the frame pointer save area. + int FPOffset = PPCFrameInfo::getFramePointerSaveOffset(IsPPC64, + IsMachoABI); + // Allocate the frame index for frame pointer save area. + FPSI = MF.getFrameInfo()->CreateFixedObject(IsPPC64? 8 : 4, FPOffset); + // Save the result. + FI->setFramePointerSaveIndex(FPSI); + } + + // Reserve stack space to move the linkage area to in case of a tail call. + int TCSPDelta = 0; + if (PerformTailCallOpt && (TCSPDelta=FI->getTailCallSPDelta()) < 0) { + int AddFPOffsetAmount = IsELF32_ABI ? -4 : 0; + MF.getFrameInfo()->CreateFixedObject( -1 * TCSPDelta, + AddFPOffsetAmount + TCSPDelta); + } + // Reserve a slot closest to SP or frame pointer if we have a dynalloc or + // a large stack, which will require scavenging a register to materialize a + // large offset. + // FIXME: this doesn't actually check stack size, so is a bit pessimistic + // FIXME: doesn't detect whether or not we need to spill vXX, which requires + // r0 for now. + + if (EnableRegisterScavenging) // FIXME (64-bit): Enable. + if (needsFP(MF) || spillsCR(MF)) { + const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; + const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; + const TargetRegisterClass *RC = IsPPC64 ? G8RC : GPRC; + RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), + RC->getAlignment())); + } +} + +void +PPCRegisterInfo::emitPrologue(MachineFunction &MF) const { + MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB + MachineBasicBlock::iterator MBBI = MBB.begin(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + MachineModuleInfo *MMI = MFI->getMachineModuleInfo(); + DebugLoc dl = DebugLoc::getUnknownLoc(); + bool needsFrameMoves = (MMI && MMI->hasDebugInfo()) || + !MF.getFunction()->doesNotThrow() || + UnwindTablesMandatory; + + // Prepare for frame info. + unsigned FrameLabelId = 0; + + // Scan the prolog, looking for an UPDATE_VRSAVE instruction. If we find it, + // process it. + for (unsigned i = 0; MBBI != MBB.end(); ++i, ++MBBI) { + if (MBBI->getOpcode() == PPC::UPDATE_VRSAVE) { + HandleVRSaveUpdate(MBBI, TII); + break; + } + } + + // Move MBBI back to the beginning of the function. + MBBI = MBB.begin(); + + // Work out frame sizes. + determineFrameLayout(MF); + unsigned FrameSize = MFI->getStackSize(); + + int NegFrameSize = -FrameSize; + + // Get processor type. + bool IsPPC64 = Subtarget.isPPC64(); + // Get operating system + bool IsMachoABI = Subtarget.isMachoABI(); + // Check if the link register (LR) must be saved. + PPCFunctionInfo *FI = MF.getInfo(); + bool MustSaveLR = FI->mustSaveLR(); + // Do we have a frame pointer for this function? + bool HasFP = hasFP(MF) && FrameSize; + + int LROffset = PPCFrameInfo::getReturnSaveOffset(IsPPC64, IsMachoABI); + int FPOffset = PPCFrameInfo::getFramePointerSaveOffset(IsPPC64, IsMachoABI); + + if (IsPPC64) { + if (MustSaveLR) + BuildMI(MBB, MBBI, dl, TII.get(PPC::MFLR8), PPC::X0); + + if (HasFP) + BuildMI(MBB, MBBI, dl, TII.get(PPC::STD)) + .addReg(PPC::X31) + .addImm(FPOffset/4) + .addReg(PPC::X1); + + if (MustSaveLR) + BuildMI(MBB, MBBI, dl, TII.get(PPC::STD)) + .addReg(PPC::X0) + .addImm(LROffset / 4) + .addReg(PPC::X1); + } else { + if (MustSaveLR) + BuildMI(MBB, MBBI, dl, TII.get(PPC::MFLR), PPC::R0); + + if (HasFP) + BuildMI(MBB, MBBI, dl, TII.get(PPC::STW)) + .addReg(PPC::R31) + .addImm(FPOffset) + .addReg(PPC::R1); + + if (MustSaveLR) + BuildMI(MBB, MBBI, dl, TII.get(PPC::STW)) + .addReg(PPC::R0) + .addImm(LROffset) + .addReg(PPC::R1); + } + + // Skip if a leaf routine. + if (!FrameSize) return; + + // Get stack alignments. + unsigned TargetAlign = MF.getTarget().getFrameInfo()->getStackAlignment(); + unsigned MaxAlign = MFI->getMaxAlignment(); + + if (needsFrameMoves) { + // Mark effective beginning of when frame pointer becomes valid. + FrameLabelId = MMI->NextLabelID(); + BuildMI(MBB, MBBI, dl, TII.get(PPC::DBG_LABEL)).addImm(FrameLabelId); + } + + // Adjust stack pointer: r1 += NegFrameSize. + // If there is a preferred stack alignment, align R1 now + if (!IsPPC64) { + // PPC32. + if (ALIGN_STACK && MaxAlign > TargetAlign) { + assert(isPowerOf2_32(MaxAlign)&&isInt16(MaxAlign)&&"Invalid alignment!"); + assert(isInt16(NegFrameSize) && "Unhandled stack size and alignment!"); + + BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), PPC::R0) + .addReg(PPC::R1) + .addImm(0) + .addImm(32 - Log2_32(MaxAlign)) + .addImm(31); + BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBFIC) ,PPC::R0) + .addReg(PPC::R0, RegState::Kill) + .addImm(NegFrameSize); + BuildMI(MBB, MBBI, dl, TII.get(PPC::STWUX)) + .addReg(PPC::R1) + .addReg(PPC::R1) + .addReg(PPC::R0); + } else if (isInt16(NegFrameSize)) { + BuildMI(MBB, MBBI, dl, TII.get(PPC::STWU), PPC::R1) + .addReg(PPC::R1) + .addImm(NegFrameSize) + .addReg(PPC::R1); + } else { + BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS), PPC::R0) + .addImm(NegFrameSize >> 16); + BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI), PPC::R0) + .addReg(PPC::R0, RegState::Kill) + .addImm(NegFrameSize & 0xFFFF); + BuildMI(MBB, MBBI, dl, TII.get(PPC::STWUX)) + .addReg(PPC::R1) + .addReg(PPC::R1) + .addReg(PPC::R0); + } + } else { // PPC64. + if (ALIGN_STACK && MaxAlign > TargetAlign) { + assert(isPowerOf2_32(MaxAlign)&&isInt16(MaxAlign)&&"Invalid alignment!"); + assert(isInt16(NegFrameSize) && "Unhandled stack size and alignment!"); + + BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), PPC::X0) + .addReg(PPC::X1) + .addImm(0) + .addImm(64 - Log2_32(MaxAlign)); + BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBFIC8), PPC::X0) + .addReg(PPC::X0) + .addImm(NegFrameSize); + BuildMI(MBB, MBBI, dl, TII.get(PPC::STDUX)) + .addReg(PPC::X1) + .addReg(PPC::X1) + .addReg(PPC::X0); + } else if (isInt16(NegFrameSize)) { + BuildMI(MBB, MBBI, dl, TII.get(PPC::STDU), PPC::X1) + .addReg(PPC::X1) + .addImm(NegFrameSize / 4) + .addReg(PPC::X1); + } else { + BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS8), PPC::X0) + .addImm(NegFrameSize >> 16); + BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI8), PPC::X0) + .addReg(PPC::X0, RegState::Kill) + .addImm(NegFrameSize & 0xFFFF); + BuildMI(MBB, MBBI, dl, TII.get(PPC::STDUX)) + .addReg(PPC::X1) + .addReg(PPC::X1) + .addReg(PPC::X0); + } + } + + if (needsFrameMoves) { + std::vector &Moves = MMI->getFrameMoves(); + + if (NegFrameSize) { + // Show update of SP. + MachineLocation SPDst(MachineLocation::VirtualFP); + MachineLocation SPSrc(MachineLocation::VirtualFP, NegFrameSize); + Moves.push_back(MachineMove(FrameLabelId, SPDst, SPSrc)); + } else { + MachineLocation SP(IsPPC64 ? PPC::X31 : PPC::R31); + Moves.push_back(MachineMove(FrameLabelId, SP, SP)); + } + + if (HasFP) { + MachineLocation FPDst(MachineLocation::VirtualFP, FPOffset); + MachineLocation FPSrc(IsPPC64 ? PPC::X31 : PPC::R31); + Moves.push_back(MachineMove(FrameLabelId, FPDst, FPSrc)); + } + + // Add callee saved registers to move list. + const std::vector &CSI = MFI->getCalleeSavedInfo(); + for (unsigned I = 0, E = CSI.size(); I != E; ++I) { + int Offset = MFI->getObjectOffset(CSI[I].getFrameIdx()); + unsigned Reg = CSI[I].getReg(); + if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue; + MachineLocation CSDst(MachineLocation::VirtualFP, Offset); + MachineLocation CSSrc(Reg); + Moves.push_back(MachineMove(FrameLabelId, CSDst, CSSrc)); + } + + MachineLocation LRDst(MachineLocation::VirtualFP, LROffset); + MachineLocation LRSrc(IsPPC64 ? PPC::LR8 : PPC::LR); + Moves.push_back(MachineMove(FrameLabelId, LRDst, LRSrc)); + + // Mark effective beginning of when frame pointer is ready. + unsigned ReadyLabelId = MMI->NextLabelID(); + BuildMI(MBB, MBBI, dl, TII.get(PPC::DBG_LABEL)).addImm(ReadyLabelId); + + MachineLocation FPDst(HasFP ? (IsPPC64 ? PPC::X31 : PPC::R31) : + (IsPPC64 ? PPC::X1 : PPC::R1)); + MachineLocation FPSrc(MachineLocation::VirtualFP); + Moves.push_back(MachineMove(ReadyLabelId, FPDst, FPSrc)); + } + + // If there is a frame pointer, copy R1 into R31 + if (HasFP) { + if (!IsPPC64) { + BuildMI(MBB, MBBI, dl, TII.get(PPC::OR), PPC::R31) + .addReg(PPC::R1) + .addReg(PPC::R1); + } else { + BuildMI(MBB, MBBI, dl, TII.get(PPC::OR8), PPC::X31) + .addReg(PPC::X1) + .addReg(PPC::X1); + } + } +} + +void PPCRegisterInfo::emitEpilogue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + MachineBasicBlock::iterator MBBI = prior(MBB.end()); + unsigned RetOpcode = MBBI->getOpcode(); + DebugLoc dl = DebugLoc::getUnknownLoc(); + + assert( (RetOpcode == PPC::BLR || + RetOpcode == PPC::TCRETURNri || + RetOpcode == PPC::TCRETURNdi || + RetOpcode == PPC::TCRETURNai || + RetOpcode == PPC::TCRETURNri8 || + RetOpcode == PPC::TCRETURNdi8 || + RetOpcode == PPC::TCRETURNai8) && + "Can only insert epilog into returning blocks"); + + // Get alignment info so we know how to restore r1 + const MachineFrameInfo *MFI = MF.getFrameInfo(); + unsigned TargetAlign = MF.getTarget().getFrameInfo()->getStackAlignment(); + unsigned MaxAlign = MFI->getMaxAlignment(); + + // Get the number of bytes allocated from the FrameInfo. + int FrameSize = MFI->getStackSize(); + + // Get processor type. + bool IsPPC64 = Subtarget.isPPC64(); + // Get operating system + bool IsMachoABI = Subtarget.isMachoABI(); + // Check if the link register (LR) has been saved. + PPCFunctionInfo *FI = MF.getInfo(); + bool MustSaveLR = FI->mustSaveLR(); + // Do we have a frame pointer for this function? + bool HasFP = hasFP(MF) && FrameSize; + + int LROffset = PPCFrameInfo::getReturnSaveOffset(IsPPC64, IsMachoABI); + int FPOffset = PPCFrameInfo::getFramePointerSaveOffset(IsPPC64, IsMachoABI); + + bool UsesTCRet = RetOpcode == PPC::TCRETURNri || + RetOpcode == PPC::TCRETURNdi || + RetOpcode == PPC::TCRETURNai || + RetOpcode == PPC::TCRETURNri8 || + RetOpcode == PPC::TCRETURNdi8 || + RetOpcode == PPC::TCRETURNai8; + + if (UsesTCRet) { + int MaxTCRetDelta = FI->getTailCallSPDelta(); + MachineOperand &StackAdjust = MBBI->getOperand(1); + assert(StackAdjust.isImm() && "Expecting immediate value."); + // Adjust stack pointer. + int StackAdj = StackAdjust.getImm(); + int Delta = StackAdj - MaxTCRetDelta; + assert((Delta >= 0) && "Delta must be positive"); + if (MaxTCRetDelta>0) + FrameSize += (StackAdj +Delta); + else + FrameSize += StackAdj; + } + + if (FrameSize) { + // The loaded (or persistent) stack pointer value is offset by the 'stwu' + // on entry to the function. Add this offset back now. + if (!IsPPC64) { + // If this function contained a fastcc call and PerformTailCallOpt is + // enabled (=> hasFastCall()==true) the fastcc call might contain a tail + // call which invalidates the stack pointer value in SP(0). So we use the + // value of R31 in this case. + if (FI->hasFastCall() && isInt16(FrameSize)) { + assert(hasFP(MF) && "Expecting a valid the frame pointer."); + BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), PPC::R1) + .addReg(PPC::R31).addImm(FrameSize); + } else if(FI->hasFastCall()) { + BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS), PPC::R0) + .addImm(FrameSize >> 16); + BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI), PPC::R0) + .addReg(PPC::R0, RegState::Kill) + .addImm(FrameSize & 0xFFFF); + BuildMI(MBB, MBBI, dl, TII.get(PPC::ADD4)) + .addReg(PPC::R1) + .addReg(PPC::R31) + .addReg(PPC::R0); + } else if (isInt16(FrameSize) && + (!ALIGN_STACK || TargetAlign >= MaxAlign) && + !MFI->hasVarSizedObjects()) { + BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), PPC::R1) + .addReg(PPC::R1).addImm(FrameSize); + } else { + BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ),PPC::R1) + .addImm(0).addReg(PPC::R1); + } + } else { + if (FI->hasFastCall() && isInt16(FrameSize)) { + assert(hasFP(MF) && "Expecting a valid the frame pointer."); + BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI8), PPC::X1) + .addReg(PPC::X31).addImm(FrameSize); + } else if(FI->hasFastCall()) { + BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS8), PPC::X0) + .addImm(FrameSize >> 16); + BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI8), PPC::X0) + .addReg(PPC::X0, RegState::Kill) + .addImm(FrameSize & 0xFFFF); + BuildMI(MBB, MBBI, dl, TII.get(PPC::ADD8)) + .addReg(PPC::X1) + .addReg(PPC::X31) + .addReg(PPC::X0); + } else if (isInt16(FrameSize) && TargetAlign >= MaxAlign && + !MFI->hasVarSizedObjects()) { + BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI8), PPC::X1) + .addReg(PPC::X1).addImm(FrameSize); + } else { + BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X1) + .addImm(0).addReg(PPC::X1); + } + } + } + + if (IsPPC64) { + if (MustSaveLR) + BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X0) + .addImm(LROffset/4).addReg(PPC::X1); + + if (HasFP) + BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X31) + .addImm(FPOffset/4).addReg(PPC::X1); + + if (MustSaveLR) + BuildMI(MBB, MBBI, dl, TII.get(PPC::MTLR8)).addReg(PPC::X0); + } else { + if (MustSaveLR) + BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ), PPC::R0) + .addImm(LROffset).addReg(PPC::R1); + + if (HasFP) + BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ), PPC::R31) + .addImm(FPOffset).addReg(PPC::R1); + + if (MustSaveLR) + BuildMI(MBB, MBBI, dl, TII.get(PPC::MTLR)).addReg(PPC::R0); + } + + // Callee pop calling convention. Pop parameter/linkage area. Used for tail + // call optimization + if (PerformTailCallOpt && RetOpcode == PPC::BLR && + MF.getFunction()->getCallingConv() == CallingConv::Fast) { + PPCFunctionInfo *FI = MF.getInfo(); + unsigned CallerAllocatedAmt = FI->getMinReservedArea(); + unsigned StackReg = IsPPC64 ? PPC::X1 : PPC::R1; + unsigned FPReg = IsPPC64 ? PPC::X31 : PPC::R31; + unsigned TmpReg = IsPPC64 ? PPC::X0 : PPC::R0; + unsigned ADDIInstr = IsPPC64 ? PPC::ADDI8 : PPC::ADDI; + unsigned ADDInstr = IsPPC64 ? PPC::ADD8 : PPC::ADD4; + unsigned LISInstr = IsPPC64 ? PPC::LIS8 : PPC::LIS; + unsigned ORIInstr = IsPPC64 ? PPC::ORI8 : PPC::ORI; + + if (CallerAllocatedAmt && isInt16(CallerAllocatedAmt)) { + BuildMI(MBB, MBBI, dl, TII.get(ADDIInstr), StackReg) + .addReg(StackReg).addImm(CallerAllocatedAmt); + } else { + BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg) + .addImm(CallerAllocatedAmt >> 16); + BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg) + .addReg(TmpReg, RegState::Kill) + .addImm(CallerAllocatedAmt & 0xFFFF); + BuildMI(MBB, MBBI, dl, TII.get(ADDInstr)) + .addReg(StackReg) + .addReg(FPReg) + .addReg(TmpReg); + } + } else if (RetOpcode == PPC::TCRETURNdi) { + MBBI = prior(MBB.end()); + MachineOperand &JumpTarget = MBBI->getOperand(0); + BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)). + addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); + } else if (RetOpcode == PPC::TCRETURNri) { + MBBI = prior(MBB.end()); + assert(MBBI->getOperand(0).isReg() && "Expecting register operand."); + BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR)); + } else if (RetOpcode == PPC::TCRETURNai) { + MBBI = prior(MBB.end()); + MachineOperand &JumpTarget = MBBI->getOperand(0); + BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm()); + } else if (RetOpcode == PPC::TCRETURNdi8) { + MBBI = prior(MBB.end()); + MachineOperand &JumpTarget = MBBI->getOperand(0); + BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)). + addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); + } else if (RetOpcode == PPC::TCRETURNri8) { + MBBI = prior(MBB.end()); + assert(MBBI->getOperand(0).isReg() && "Expecting register operand."); + BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8)); + } else if (RetOpcode == PPC::TCRETURNai8) { + MBBI = prior(MBB.end()); + MachineOperand &JumpTarget = MBBI->getOperand(0); + BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm()); + } +} + +unsigned PPCRegisterInfo::getRARegister() const { + return !Subtarget.isPPC64() ? PPC::LR : PPC::LR8; +} + +unsigned PPCRegisterInfo::getFrameRegister(MachineFunction &MF) const { + if (!Subtarget.isPPC64()) + return hasFP(MF) ? PPC::R31 : PPC::R1; + else + return hasFP(MF) ? PPC::X31 : PPC::X1; +} + +void PPCRegisterInfo::getInitialFrameState(std::vector &Moves) + const { + // Initial state of the frame pointer is R1. + MachineLocation Dst(MachineLocation::VirtualFP); + MachineLocation Src(PPC::R1, 0); + Moves.push_back(MachineMove(0, Dst, Src)); +} + +unsigned PPCRegisterInfo::getEHExceptionRegister() const { + return !Subtarget.isPPC64() ? PPC::R3 : PPC::X3; +} + +unsigned PPCRegisterInfo::getEHHandlerRegister() const { + return !Subtarget.isPPC64() ? PPC::R4 : PPC::X4; +} + +int PPCRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const { + // FIXME: Most probably dwarf numbers differs for Linux and Darwin + return PPCGenRegisterInfo::getDwarfRegNumFull(RegNum, 0); +} + +#include "PPCGenRegisterInfo.inc" + diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h new file mode 100644 index 000000000000..9506b651c5b3 --- /dev/null +++ b/lib/Target/PowerPC/PPCRegisterInfo.h @@ -0,0 +1,95 @@ +//===- PPCRegisterInfo.h - PowerPC Register Information Impl -----*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the PowerPC implementation of the TargetRegisterInfo +// class. +// +//===----------------------------------------------------------------------===// + +#ifndef POWERPC32_REGISTERINFO_H +#define POWERPC32_REGISTERINFO_H + +#include "PPC.h" +#include "PPCGenRegisterInfo.h.inc" +#include + +namespace llvm { +class PPCSubtarget; +class TargetInstrInfo; +class Type; + +class PPCRegisterInfo : public PPCGenRegisterInfo { + std::map ImmToIdxMap; + const PPCSubtarget &Subtarget; + const TargetInstrInfo &TII; +public: + PPCRegisterInfo(const PPCSubtarget &SubTarget, const TargetInstrInfo &tii); + + /// getRegisterNumbering - Given the enum value for some register, e.g. + /// PPC::F14, return the number that it corresponds to (e.g. 14). + static unsigned getRegisterNumbering(unsigned RegEnum); + + /// getPointerRegClass - Return the register class to use to hold pointers. + /// This is used for addressing modes. + virtual const TargetRegisterClass *getPointerRegClass() const; + + /// Code Generation virtual methods... + const unsigned *getCalleeSavedRegs(const MachineFunction* MF = 0) const; + + const TargetRegisterClass* const* + getCalleeSavedRegClasses(const MachineFunction *MF = 0) const; + + BitVector getReservedRegs(const MachineFunction &MF) const; + + /// targetHandlesStackFrameRounding - Returns true if the target is + /// responsible for rounding up the stack frame (probably at emitPrologue + /// time). + bool targetHandlesStackFrameRounding() const { return true; } + + /// requiresRegisterScavenging - We require a register scavenger. + /// FIXME (64-bit): Should be inlined. + bool requiresRegisterScavenging(const MachineFunction &MF) const; + + bool hasFP(const MachineFunction &MF) const; + + void eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const; + + void lowerDynamicAlloc(MachineBasicBlock::iterator II, + int SPAdj, RegScavenger *RS) const; + void lowerCRSpilling(MachineBasicBlock::iterator II, unsigned FrameIndex, + int SPAdj, RegScavenger *RS) const; + void eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, RegScavenger *RS = NULL) const; + + /// determineFrameLayout - Determine the size of the frame and maximum call + /// frame size. + void determineFrameLayout(MachineFunction &MF) const; + + void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, + RegScavenger *RS = NULL) const; + void emitPrologue(MachineFunction &MF) const; + void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; + + // Debug information queries. + unsigned getRARegister() const; + unsigned getFrameRegister(MachineFunction &MF) const; + void getInitialFrameState(std::vector &Moves) const; + + // Exception handling queries. + unsigned getEHExceptionRegister() const; + unsigned getEHHandlerRegister() const; + + int getDwarfRegNum(unsigned RegNum, bool isEH) const; +}; + +} // end namespace llvm + +#endif diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td new file mode 100644 index 000000000000..9e15a55781c8 --- /dev/null +++ b/lib/Target/PowerPC/PPCRegisterInfo.td @@ -0,0 +1,360 @@ +//===- PPCRegisterInfo.td - The PowerPC Register File ------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// +//===----------------------------------------------------------------------===// + +class PPCReg : Register { + let Namespace = "PPC"; +} + +// We identify all our registers with a 5-bit ID, for consistency's sake. + +// GPR - One of the 32 32-bit general-purpose registers +class GPR num, string n> : PPCReg { + field bits<5> Num = num; +} + +// GP8 - One of the 32 64-bit general-purpose registers +class GP8 : PPCReg { + field bits<5> Num = SubReg.Num; + let SubRegs = [SubReg]; +} + +// SPR - One of the 32-bit special-purpose registers +class SPR num, string n> : PPCReg { + field bits<10> Num = num; +} + +// FPR - One of the 32 64-bit floating-point registers +class FPR num, string n> : PPCReg { + field bits<5> Num = num; +} + +// VR - One of the 32 128-bit vector registers +class VR num, string n> : PPCReg { + field bits<5> Num = num; +} + +// CR - One of the 8 4-bit condition registers +class CR num, string n> : PPCReg { + field bits<3> Num = num; +} + +// CRBIT - One of the 32 1-bit condition register fields +class CRBIT num, string n> : PPCReg { + field bits<5> Num = num; +} + + +// General-purpose registers +def R0 : GPR< 0, "r0">, DwarfRegNum<[0]>; +def R1 : GPR< 1, "r1">, DwarfRegNum<[1]>; +def R2 : GPR< 2, "r2">, DwarfRegNum<[2]>; +def R3 : GPR< 3, "r3">, DwarfRegNum<[3]>; +def R4 : GPR< 4, "r4">, DwarfRegNum<[4]>; +def R5 : GPR< 5, "r5">, DwarfRegNum<[5]>; +def R6 : GPR< 6, "r6">, DwarfRegNum<[6]>; +def R7 : GPR< 7, "r7">, DwarfRegNum<[7]>; +def R8 : GPR< 8, "r8">, DwarfRegNum<[8]>; +def R9 : GPR< 9, "r9">, DwarfRegNum<[9]>; +def R10 : GPR<10, "r10">, DwarfRegNum<[10]>; +def R11 : GPR<11, "r11">, DwarfRegNum<[11]>; +def R12 : GPR<12, "r12">, DwarfRegNum<[12]>; +def R13 : GPR<13, "r13">, DwarfRegNum<[13]>; +def R14 : GPR<14, "r14">, DwarfRegNum<[14]>; +def R15 : GPR<15, "r15">, DwarfRegNum<[15]>; +def R16 : GPR<16, "r16">, DwarfRegNum<[16]>; +def R17 : GPR<17, "r17">, DwarfRegNum<[17]>; +def R18 : GPR<18, "r18">, DwarfRegNum<[18]>; +def R19 : GPR<19, "r19">, DwarfRegNum<[19]>; +def R20 : GPR<20, "r20">, DwarfRegNum<[20]>; +def R21 : GPR<21, "r21">, DwarfRegNum<[21]>; +def R22 : GPR<22, "r22">, DwarfRegNum<[22]>; +def R23 : GPR<23, "r23">, DwarfRegNum<[23]>; +def R24 : GPR<24, "r24">, DwarfRegNum<[24]>; +def R25 : GPR<25, "r25">, DwarfRegNum<[25]>; +def R26 : GPR<26, "r26">, DwarfRegNum<[26]>; +def R27 : GPR<27, "r27">, DwarfRegNum<[27]>; +def R28 : GPR<28, "r28">, DwarfRegNum<[28]>; +def R29 : GPR<29, "r29">, DwarfRegNum<[29]>; +def R30 : GPR<30, "r30">, DwarfRegNum<[30]>; +def R31 : GPR<31, "r31">, DwarfRegNum<[31]>; + +// 64-bit General-purpose registers +def X0 : GP8< R0, "r0">, DwarfRegNum<[0]>; +def X1 : GP8< R1, "r1">, DwarfRegNum<[1]>; +def X2 : GP8< R2, "r2">, DwarfRegNum<[2]>; +def X3 : GP8< R3, "r3">, DwarfRegNum<[3]>; +def X4 : GP8< R4, "r4">, DwarfRegNum<[4]>; +def X5 : GP8< R5, "r5">, DwarfRegNum<[5]>; +def X6 : GP8< R6, "r6">, DwarfRegNum<[6]>; +def X7 : GP8< R7, "r7">, DwarfRegNum<[7]>; +def X8 : GP8< R8, "r8">, DwarfRegNum<[8]>; +def X9 : GP8< R9, "r9">, DwarfRegNum<[9]>; +def X10 : GP8, DwarfRegNum<[10]>; +def X11 : GP8, DwarfRegNum<[11]>; +def X12 : GP8, DwarfRegNum<[12]>; +def X13 : GP8, DwarfRegNum<[13]>; +def X14 : GP8, DwarfRegNum<[14]>; +def X15 : GP8, DwarfRegNum<[15]>; +def X16 : GP8, DwarfRegNum<[16]>; +def X17 : GP8, DwarfRegNum<[17]>; +def X18 : GP8, DwarfRegNum<[18]>; +def X19 : GP8, DwarfRegNum<[19]>; +def X20 : GP8, DwarfRegNum<[20]>; +def X21 : GP8, DwarfRegNum<[21]>; +def X22 : GP8, DwarfRegNum<[22]>; +def X23 : GP8, DwarfRegNum<[23]>; +def X24 : GP8, DwarfRegNum<[24]>; +def X25 : GP8, DwarfRegNum<[25]>; +def X26 : GP8, DwarfRegNum<[26]>; +def X27 : GP8, DwarfRegNum<[27]>; +def X28 : GP8, DwarfRegNum<[28]>; +def X29 : GP8, DwarfRegNum<[29]>; +def X30 : GP8, DwarfRegNum<[30]>; +def X31 : GP8, DwarfRegNum<[31]>; + +// Floating-point registers +def F0 : FPR< 0, "f0">, DwarfRegNum<[32]>; +def F1 : FPR< 1, "f1">, DwarfRegNum<[33]>; +def F2 : FPR< 2, "f2">, DwarfRegNum<[34]>; +def F3 : FPR< 3, "f3">, DwarfRegNum<[35]>; +def F4 : FPR< 4, "f4">, DwarfRegNum<[36]>; +def F5 : FPR< 5, "f5">, DwarfRegNum<[37]>; +def F6 : FPR< 6, "f6">, DwarfRegNum<[38]>; +def F7 : FPR< 7, "f7">, DwarfRegNum<[39]>; +def F8 : FPR< 8, "f8">, DwarfRegNum<[40]>; +def F9 : FPR< 9, "f9">, DwarfRegNum<[41]>; +def F10 : FPR<10, "f10">, DwarfRegNum<[42]>; +def F11 : FPR<11, "f11">, DwarfRegNum<[43]>; +def F12 : FPR<12, "f12">, DwarfRegNum<[44]>; +def F13 : FPR<13, "f13">, DwarfRegNum<[45]>; +def F14 : FPR<14, "f14">, DwarfRegNum<[46]>; +def F15 : FPR<15, "f15">, DwarfRegNum<[47]>; +def F16 : FPR<16, "f16">, DwarfRegNum<[48]>; +def F17 : FPR<17, "f17">, DwarfRegNum<[49]>; +def F18 : FPR<18, "f18">, DwarfRegNum<[50]>; +def F19 : FPR<19, "f19">, DwarfRegNum<[51]>; +def F20 : FPR<20, "f20">, DwarfRegNum<[52]>; +def F21 : FPR<21, "f21">, DwarfRegNum<[53]>; +def F22 : FPR<22, "f22">, DwarfRegNum<[54]>; +def F23 : FPR<23, "f23">, DwarfRegNum<[55]>; +def F24 : FPR<24, "f24">, DwarfRegNum<[56]>; +def F25 : FPR<25, "f25">, DwarfRegNum<[57]>; +def F26 : FPR<26, "f26">, DwarfRegNum<[58]>; +def F27 : FPR<27, "f27">, DwarfRegNum<[59]>; +def F28 : FPR<28, "f28">, DwarfRegNum<[60]>; +def F29 : FPR<29, "f29">, DwarfRegNum<[61]>; +def F30 : FPR<30, "f30">, DwarfRegNum<[62]>; +def F31 : FPR<31, "f31">, DwarfRegNum<[63]>; + +// Vector registers +def V0 : VR< 0, "v0">, DwarfRegNum<[77]>; +def V1 : VR< 1, "v1">, DwarfRegNum<[78]>; +def V2 : VR< 2, "v2">, DwarfRegNum<[79]>; +def V3 : VR< 3, "v3">, DwarfRegNum<[80]>; +def V4 : VR< 4, "v4">, DwarfRegNum<[81]>; +def V5 : VR< 5, "v5">, DwarfRegNum<[82]>; +def V6 : VR< 6, "v6">, DwarfRegNum<[83]>; +def V7 : VR< 7, "v7">, DwarfRegNum<[84]>; +def V8 : VR< 8, "v8">, DwarfRegNum<[85]>; +def V9 : VR< 9, "v9">, DwarfRegNum<[86]>; +def V10 : VR<10, "v10">, DwarfRegNum<[87]>; +def V11 : VR<11, "v11">, DwarfRegNum<[88]>; +def V12 : VR<12, "v12">, DwarfRegNum<[89]>; +def V13 : VR<13, "v13">, DwarfRegNum<[90]>; +def V14 : VR<14, "v14">, DwarfRegNum<[91]>; +def V15 : VR<15, "v15">, DwarfRegNum<[92]>; +def V16 : VR<16, "v16">, DwarfRegNum<[93]>; +def V17 : VR<17, "v17">, DwarfRegNum<[94]>; +def V18 : VR<18, "v18">, DwarfRegNum<[95]>; +def V19 : VR<19, "v19">, DwarfRegNum<[96]>; +def V20 : VR<20, "v20">, DwarfRegNum<[97]>; +def V21 : VR<21, "v21">, DwarfRegNum<[98]>; +def V22 : VR<22, "v22">, DwarfRegNum<[99]>; +def V23 : VR<23, "v23">, DwarfRegNum<[100]>; +def V24 : VR<24, "v24">, DwarfRegNum<[101]>; +def V25 : VR<25, "v25">, DwarfRegNum<[102]>; +def V26 : VR<26, "v26">, DwarfRegNum<[103]>; +def V27 : VR<27, "v27">, DwarfRegNum<[104]>; +def V28 : VR<28, "v28">, DwarfRegNum<[105]>; +def V29 : VR<29, "v29">, DwarfRegNum<[106]>; +def V30 : VR<30, "v30">, DwarfRegNum<[107]>; +def V31 : VR<31, "v31">, DwarfRegNum<[108]>; + +// Condition registers +def CR0 : CR<0, "cr0">, DwarfRegNum<[68]>; +def CR1 : CR<1, "cr1">, DwarfRegNum<[69]>; +def CR2 : CR<2, "cr2">, DwarfRegNum<[70]>; +def CR3 : CR<3, "cr3">, DwarfRegNum<[71]>; +def CR4 : CR<4, "cr4">, DwarfRegNum<[72]>; +def CR5 : CR<5, "cr5">, DwarfRegNum<[73]>; +def CR6 : CR<6, "cr6">, DwarfRegNum<[74]>; +def CR7 : CR<7, "cr7">, DwarfRegNum<[75]>; + +// Condition register bits +def CR0LT : CRBIT< 0, "0">, DwarfRegNum<[0]>; +def CR0GT : CRBIT< 1, "1">, DwarfRegNum<[0]>; +def CR0EQ : CRBIT< 2, "2">, DwarfRegNum<[0]>; +def CR0UN : CRBIT< 3, "3">, DwarfRegNum<[0]>; +def CR1LT : CRBIT< 4, "4">, DwarfRegNum<[0]>; +def CR1GT : CRBIT< 5, "5">, DwarfRegNum<[0]>; +def CR1EQ : CRBIT< 6, "6">, DwarfRegNum<[0]>; +def CR1UN : CRBIT< 7, "7">, DwarfRegNum<[0]>; +def CR2LT : CRBIT< 8, "8">, DwarfRegNum<[0]>; +def CR2GT : CRBIT< 9, "9">, DwarfRegNum<[0]>; +def CR2EQ : CRBIT<10, "10">, DwarfRegNum<[0]>; +def CR2UN : CRBIT<11, "11">, DwarfRegNum<[0]>; +def CR3LT : CRBIT<12, "12">, DwarfRegNum<[0]>; +def CR3GT : CRBIT<13, "13">, DwarfRegNum<[0]>; +def CR3EQ : CRBIT<14, "14">, DwarfRegNum<[0]>; +def CR3UN : CRBIT<15, "15">, DwarfRegNum<[0]>; +def CR4LT : CRBIT<16, "16">, DwarfRegNum<[0]>; +def CR4GT : CRBIT<17, "17">, DwarfRegNum<[0]>; +def CR4EQ : CRBIT<18, "18">, DwarfRegNum<[0]>; +def CR4UN : CRBIT<19, "19">, DwarfRegNum<[0]>; +def CR5LT : CRBIT<20, "20">, DwarfRegNum<[0]>; +def CR5GT : CRBIT<21, "21">, DwarfRegNum<[0]>; +def CR5EQ : CRBIT<22, "22">, DwarfRegNum<[0]>; +def CR5UN : CRBIT<23, "23">, DwarfRegNum<[0]>; +def CR6LT : CRBIT<24, "24">, DwarfRegNum<[0]>; +def CR6GT : CRBIT<25, "25">, DwarfRegNum<[0]>; +def CR6EQ : CRBIT<26, "26">, DwarfRegNum<[0]>; +def CR6UN : CRBIT<27, "27">, DwarfRegNum<[0]>; +def CR7LT : CRBIT<28, "28">, DwarfRegNum<[0]>; +def CR7GT : CRBIT<29, "29">, DwarfRegNum<[0]>; +def CR7EQ : CRBIT<30, "30">, DwarfRegNum<[0]>; +def CR7UN : CRBIT<31, "31">, DwarfRegNum<[0]>; + +def : SubRegSet<1, [CR0, CR1, CR2, CR3, CR4, CR5, CR6, CR7], + [CR0LT, CR1LT, CR2LT, CR3LT, CR4LT, CR5LT, CR6LT, CR7LT]>; +def : SubRegSet<2, [CR0, CR1, CR2, CR3, CR4, CR5, CR6, CR7], + [CR0GT, CR1GT, CR2GT, CR3GT, CR4GT, CR5GT, CR6GT, CR7GT]>; +def : SubRegSet<3, [CR0, CR1, CR2, CR3, CR4, CR5, CR6, CR7], + [CR0EQ, CR1EQ, CR2EQ, CR3EQ, CR4EQ, CR5EQ, CR6EQ, CR7EQ]>; +def : SubRegSet<4, [CR0, CR1, CR2, CR3, CR4, CR5, CR6, CR7], + [CR0UN, CR1UN, CR2UN, CR3UN, CR4UN, CR5UN, CR6UN, CR7UN]>; + +// Link register +def LR : SPR<8, "lr">, DwarfRegNum<[65]>; +//let Aliases = [LR] in +def LR8 : SPR<8, "lr">, DwarfRegNum<[65]>; + +// Count register +def CTR : SPR<9, "ctr">, DwarfRegNum<[66]>; +def CTR8 : SPR<9, "ctr">, DwarfRegNum<[66]>; + +// VRsave register +def VRSAVE: SPR<256, "VRsave">, DwarfRegNum<[107]>; + +// FP rounding mode: bits 30 and 31 of the FP status and control register +// This is not allocated as a normal register; it appears only in +// Uses and Defs. The ABI says it needs to be preserved by a function, +// but this is not achieved by saving and restoring it as with +// most registers, it has to be done in code; to make this work all the +// return and call instructions are described as Uses of RM, so instructions +// that do nothing but change RM will not get deleted. +// Also, in the architecture it is not really a SPR; 512 is arbitrary. +def RM: SPR<512, "**ROUNDING MODE**">, DwarfRegNum<[0]>; + +/// Register classes +// Allocate volatiles first +// then nonvolatiles in reverse order since stmw/lmw save from rN to r31 +def GPRC : RegisterClass<"PPC", [i32], 32, + [R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, + R30, R29, R28, R27, R26, R25, R24, R23, R22, R21, R20, R19, R18, R17, + R16, R15, R14, R13, R31, R0, R1, LR]> +{ + let MethodProtos = [{ + iterator allocation_order_begin(const MachineFunction &MF) const; + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + GPRCClass::iterator + GPRCClass::allocation_order_begin(const MachineFunction &MF) const { + // In Linux, r2 is reserved for the OS. + if (!MF.getTarget().getSubtarget().isDarwin()) + return begin()+1; + + return begin(); + } + GPRCClass::iterator + GPRCClass::allocation_order_end(const MachineFunction &MF) const { + // On PPC64, r13 is the thread pointer. Never allocate this register. + // Note that this is overconservative, as it also prevents allocation of + // R31 when the FP is not needed. + if (MF.getTarget().getSubtarget().isPPC64()) + return end()-5; // don't allocate R13, R31, R0, R1, LR + + if (needsFP(MF)) + return end()-4; // don't allocate R31, R0, R1, LR + else + return end()-3; // don't allocate R0, R1, LR + } + }]; +} +def G8RC : RegisterClass<"PPC", [i64], 64, + [X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X12, + X30, X29, X28, X27, X26, X25, X24, X23, X22, X21, X20, X19, X18, X17, + X16, X15, X14, X31, X13, X0, X1, LR8]> +{ + let MethodProtos = [{ + iterator allocation_order_begin(const MachineFunction &MF) const; + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + G8RCClass::iterator + G8RCClass::allocation_order_begin(const MachineFunction &MF) const { + return begin(); + } + G8RCClass::iterator + G8RCClass::allocation_order_end(const MachineFunction &MF) const { + if (needsFP(MF)) + return end()-5; + else + return end()-4; + } + }]; +} + + + +def F8RC : RegisterClass<"PPC", [f64], 64, [F0, F1, F2, F3, F4, F5, F6, F7, + F8, F9, F10, F11, F12, F13, F14, F15, F16, F17, F18, F19, F20, F21, + F22, F23, F24, F25, F26, F27, F28, F29, F30, F31]>; +def F4RC : RegisterClass<"PPC", [f32], 32, [F0, F1, F2, F3, F4, F5, F6, F7, + F8, F9, F10, F11, F12, F13, F14, F15, F16, F17, F18, F19, F20, F21, + F22, F23, F24, F25, F26, F27, F28, F29, F30, F31]>; + +def VRRC : RegisterClass<"PPC", [v16i8,v8i16,v4i32,v4f32], 128, + [V2, V3, V4, V5, V0, V1, + V6, V7, V8, V9, V10, V11, V12, V13, V14, V15, V16, V17, V18, V19, V20, V21, + V22, V23, V24, V25, V26, V27, V28, V29, V30, V31]>; + +def CRRC : RegisterClass<"PPC", [i32], 32, [CR0, CR1, CR5, CR6, CR7, CR2, + CR3, CR4]>; + +def CRBITRC : RegisterClass<"PPC", [i32], 32, + [CR0LT, CR0GT, CR0EQ, CR0UN, + CR1LT, CR1GT, CR1EQ, CR1UN, + CR2LT, CR2GT, CR2EQ, CR2UN, + CR3LT, CR3GT, CR3EQ, CR3UN, + CR4LT, CR4GT, CR4EQ, CR4UN, + CR5LT, CR5GT, CR5EQ, CR5UN, + CR6LT, CR6GT, CR6EQ, CR6UN, + CR7LT, CR7GT, CR7EQ, CR7UN + ]> +{ + let CopyCost = -1; +} + + +def CTRRC : RegisterClass<"PPC", [i32], 32, [CTR]>; +def CTRRC8 : RegisterClass<"PPC", [i64], 64, [CTR8]>; diff --git a/lib/Target/PowerPC/PPCRelocations.h b/lib/Target/PowerPC/PPCRelocations.h new file mode 100644 index 000000000000..a33e7e03370c --- /dev/null +++ b/lib/Target/PowerPC/PPCRelocations.h @@ -0,0 +1,56 @@ +//===- PPCRelocations.h - PPC32 Code Relocations ----------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the PowerPC 32-bit target-specific relocation types. +// +//===----------------------------------------------------------------------===// + +#ifndef PPC32RELOCATIONS_H +#define PPC32RELOCATIONS_H + +#include "llvm/CodeGen/MachineRelocation.h" + +// Hack to rid us of a PPC pre-processor symbol which is erroneously +// defined in a PowerPC header file (bug in Linux/PPC) +#ifdef PPC +#undef PPC +#endif + +namespace llvm { + namespace PPC { + enum RelocationType { + // reloc_vanilla - A standard relocation, where the address of the + // relocated object completely overwrites the address of the relocation. + reloc_vanilla, + + // reloc_pcrel_bx - PC relative relocation, for the b or bl instructions. + reloc_pcrel_bx, + + // reloc_pcrel_bcx - PC relative relocation, for BLT,BLE,BEQ,BGE,BGT,BNE, + // and other bcx instructions. + reloc_pcrel_bcx, + + // reloc_absolute_high - Absolute relocation, for the loadhi instruction + // (which is really addis). Add the high 16-bits of the specified global + // address into the low 16-bits of the instruction. + reloc_absolute_high, + + // reloc_absolute_low - Absolute relocation, for the la instruction (which + // is really an addi). Add the low 16-bits of the specified global + // address into the low 16-bits of the instruction. + reloc_absolute_low, + + // reloc_absolute_low_ix - Absolute relocation for the 64-bit load/store + // instruction which have two implicit zero bits. + reloc_absolute_low_ix + }; + } +} + +#endif diff --git a/lib/Target/PowerPC/PPCSchedule.td b/lib/Target/PowerPC/PPCSchedule.td new file mode 100644 index 000000000000..d589414c0154 --- /dev/null +++ b/lib/Target/PowerPC/PPCSchedule.td @@ -0,0 +1,508 @@ +//===- PPCSchedule.td - PowerPC Scheduling Definitions -----*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Functional units across PowerPC chips sets +// +def BPU : FuncUnit; // Branch unit +def SLU : FuncUnit; // Store/load unit +def SRU : FuncUnit; // special register unit +def IU1 : FuncUnit; // integer unit 1 (simple) +def IU2 : FuncUnit; // integer unit 2 (complex) +def IU3 : FuncUnit; // integer unit 3 (7450 simple) +def IU4 : FuncUnit; // integer unit 4 (7450 simple) +def FPU1 : FuncUnit; // floating point unit 1 +def FPU2 : FuncUnit; // floating point unit 2 +def VPU : FuncUnit; // vector permutation unit +def VIU1 : FuncUnit; // vector integer unit 1 (simple) +def VIU2 : FuncUnit; // vector integer unit 2 (complex) +def VFPU : FuncUnit; // vector floating point unit + + +//===----------------------------------------------------------------------===// +// Instruction Itinerary classes used for PowerPC +// +def IntGeneral : InstrItinClass; +def IntCompare : InstrItinClass; +def IntDivD : InstrItinClass; +def IntDivW : InstrItinClass; +def IntMFFS : InstrItinClass; +def IntMFVSCR : InstrItinClass; +def IntMTFSB0 : InstrItinClass; +def IntMTSRD : InstrItinClass; +def IntMulHD : InstrItinClass; +def IntMulHW : InstrItinClass; +def IntMulHWU : InstrItinClass; +def IntMulLI : InstrItinClass; +def IntRFID : InstrItinClass; +def IntRotateD : InstrItinClass; +def IntRotate : InstrItinClass; +def IntShift : InstrItinClass; +def IntTrapD : InstrItinClass; +def IntTrapW : InstrItinClass; +def BrB : InstrItinClass; +def BrCR : InstrItinClass; +def BrMCR : InstrItinClass; +def BrMCRX : InstrItinClass; +def LdStDCBA : InstrItinClass; +def LdStDCBF : InstrItinClass; +def LdStDCBI : InstrItinClass; +def LdStGeneral : InstrItinClass; +def LdStDSS : InstrItinClass; +def LdStICBI : InstrItinClass; +def LdStUX : InstrItinClass; +def LdStLD : InstrItinClass; +def LdStLDARX : InstrItinClass; +def LdStLFD : InstrItinClass; +def LdStLFDU : InstrItinClass; +def LdStLHA : InstrItinClass; +def LdStLMW : InstrItinClass; +def LdStLVecX : InstrItinClass; +def LdStLWA : InstrItinClass; +def LdStLWARX : InstrItinClass; +def LdStSLBIA : InstrItinClass; +def LdStSLBIE : InstrItinClass; +def LdStSTD : InstrItinClass; +def LdStSTDCX : InstrItinClass; +def LdStSTVEBX : InstrItinClass; +def LdStSTWCX : InstrItinClass; +def LdStSync : InstrItinClass; +def SprISYNC : InstrItinClass; +def SprMFSR : InstrItinClass; +def SprMTMSR : InstrItinClass; +def SprMTSR : InstrItinClass; +def SprTLBSYNC : InstrItinClass; +def SprMFCR : InstrItinClass; +def SprMFMSR : InstrItinClass; +def SprMFSPR : InstrItinClass; +def SprMFTB : InstrItinClass; +def SprMTSPR : InstrItinClass; +def SprMTSRIN : InstrItinClass; +def SprRFI : InstrItinClass; +def SprSC : InstrItinClass; +def FPGeneral : InstrItinClass; +def FPCompare : InstrItinClass; +def FPDivD : InstrItinClass; +def FPDivS : InstrItinClass; +def FPFused : InstrItinClass; +def FPRes : InstrItinClass; +def FPSqrt : InstrItinClass; +def VecGeneral : InstrItinClass; +def VecFP : InstrItinClass; +def VecFPCompare : InstrItinClass; +def VecComplex : InstrItinClass; +def VecPerm : InstrItinClass; +def VecFPRound : InstrItinClass; +def VecVSL : InstrItinClass; +def VecVSR : InstrItinClass; + +//===----------------------------------------------------------------------===// +// Processor instruction itineraries. + +include "PPCScheduleG3.td" +include "PPCScheduleG4.td" +include "PPCScheduleG4Plus.td" +include "PPCScheduleG5.td" + +//===----------------------------------------------------------------------===// +// Instruction to itinerary class map - When add new opcodes to the supported +// set, refer to the following table to determine which itinerary class the +// opcode belongs. +// +// opcode itinerary class +// ====== =============== +// add IntGeneral +// addc IntGeneral +// adde IntGeneral +// addi IntGeneral +// addic IntGeneral +// addic. IntGeneral +// addis IntGeneral +// addme IntGeneral +// addze IntGeneral +// and IntGeneral +// andc IntGeneral +// andi. IntGeneral +// andis. IntGeneral +// b BrB +// bc BrB +// bcctr BrB +// bclr BrB +// cmp IntCompare +// cmpi IntCompare +// cmpl IntCompare +// cmpli IntCompare +// cntlzd IntRotateD +// cntlzw IntGeneral +// crand BrCR +// crandc BrCR +// creqv BrCR +// crnand BrCR +// crnor BrCR +// cror BrCR +// crorc BrCR +// crxor BrCR +// dcba LdStDCBA +// dcbf LdStDCBF +// dcbi LdStDCBI +// dcbst LdStDCBF +// dcbt LdStGeneral +// dcbtst LdStGeneral +// dcbz LdStDCBF +// divd IntDivD +// divdu IntDivD +// divw IntDivW +// divwu IntDivW +// dss LdStDSS +// dst LdStDSS +// dstst LdStDSS +// eciwx LdStGeneral +// ecowx LdStGeneral +// eieio LdStGeneral +// eqv IntGeneral +// extsb IntGeneral +// extsh IntGeneral +// extsw IntRotateD +// fabs FPGeneral +// fadd FPGeneral +// fadds FPGeneral +// fcfid FPGeneral +// fcmpo FPCompare +// fcmpu FPCompare +// fctid FPGeneral +// fctidz FPGeneral +// fctiw FPGeneral +// fctiwz FPGeneral +// fdiv FPDivD +// fdivs FPDivS +// fmadd FPFused +// fmadds FPGeneral +// fmr FPGeneral +// fmsub FPFused +// fmsubs FPGeneral +// fmul FPFused +// fmuls FPGeneral +// fnabs FPGeneral +// fneg FPGeneral +// fnmadd FPFused +// fnmadds FPGeneral +// fnmsub FPFused +// fnmsubs FPGeneral +// fres FPRes +// frsp FPGeneral +// frsqrte FPGeneral +// fsel FPGeneral +// fsqrt FPSqrt +// fsqrts FPSqrt +// fsub FPGeneral +// fsubs FPGeneral +// icbi LdStICBI +// isync SprISYNC +// lbz LdStGeneral +// lbzu LdStGeneral +// lbzux LdStUX +// lbzx LdStGeneral +// ld LdStLD +// ldarx LdStLDARX +// ldu LdStLD +// ldux LdStLD +// ldx LdStLD +// lfd LdStLFD +// lfdu LdStLFDU +// lfdux LdStLFDU +// lfdx LdStLFDU +// lfs LdStLFDU +// lfsu LdStLFDU +// lfsux LdStLFDU +// lfsx LdStLFDU +// lha LdStLHA +// lhau LdStLHA +// lhaux LdStLHA +// lhax LdStLHA +// lhbrx LdStGeneral +// lhz LdStGeneral +// lhzu LdStGeneral +// lhzux LdStUX +// lhzx LdStGeneral +// lmw LdStLMW +// lswi LdStLMW +// lswx LdStLMW +// lvebx LdStLVecX +// lvehx LdStLVecX +// lvewx LdStLVecX +// lvsl LdStLVecX +// lvsr LdStLVecX +// lvx LdStLVecX +// lvxl LdStLVecX +// lwa LdStLWA +// lwarx LdStLWARX +// lwaux LdStLHA +// lwax LdStLHA +// lwbrx LdStGeneral +// lwz LdStGeneral +// lwzu LdStGeneral +// lwzux LdStUX +// lwzx LdStGeneral +// mcrf BrMCR +// mcrfs FPGeneral +// mcrxr BrMCRX +// mfcr SprMFCR +// mffs IntMFFS +// mfmsr SprMFMSR +// mfspr SprMFSPR +// mfsr SprMFSR +// mfsrin SprMFSR +// mftb SprMFTB +// mfvscr IntMFVSCR +// mtcrf BrMCRX +// mtfsb0 IntMTFSB0 +// mtfsb1 IntMTFSB0 +// mtfsf IntMTFSB0 +// mtfsfi IntMTFSB0 +// mtmsr SprMTMSR +// mtmsrd LdStLD +// mtspr SprMTSPR +// mtsr SprMTSR +// mtsrd IntMTSRD +// mtsrdin IntMTSRD +// mtsrin SprMTSRIN +// mtvscr IntMFVSCR +// mulhd IntMulHD +// mulhdu IntMulHD +// mulhw IntMulHW +// mulhwu IntMulHWU +// mulld IntMulHD +// mulli IntMulLI +// mullw IntMulHW +// nand IntGeneral +// neg IntGeneral +// nor IntGeneral +// or IntGeneral +// orc IntGeneral +// ori IntGeneral +// oris IntGeneral +// rfi SprRFI +// rfid IntRFID +// rldcl IntRotateD +// rldcr IntRotateD +// rldic IntRotateD +// rldicl IntRotateD +// rldicr IntRotateD +// rldimi IntRotateD +// rlwimi IntRotate +// rlwinm IntGeneral +// rlwnm IntGeneral +// sc SprSC +// slbia LdStSLBIA +// slbie LdStSLBIE +// sld IntRotateD +// slw IntGeneral +// srad IntRotateD +// sradi IntRotateD +// sraw IntShift +// srawi IntShift +// srd IntRotateD +// srw IntGeneral +// stb LdStGeneral +// stbu LdStGeneral +// stbux LdStGeneral +// stbx LdStGeneral +// std LdStSTD +// stdcx. LdStSTDCX +// stdu LdStSTD +// stdux LdStSTD +// stdx LdStSTD +// stfd LdStUX +// stfdu LdStUX +// stfdux LdStUX +// stfdx LdStUX +// stfiwx LdStUX +// stfs LdStUX +// stfsu LdStUX +// stfsux LdStUX +// stfsx LdStUX +// sth LdStGeneral +// sthbrx LdStGeneral +// sthu LdStGeneral +// sthux LdStGeneral +// sthx LdStGeneral +// stmw LdStLMW +// stswi LdStLMW +// stswx LdStLMW +// stvebx LdStSTVEBX +// stvehx LdStSTVEBX +// stvewx LdStSTVEBX +// stvx LdStSTVEBX +// stvxl LdStSTVEBX +// stw LdStGeneral +// stwbrx LdStGeneral +// stwcx. LdStSTWCX +// stwu LdStGeneral +// stwux LdStGeneral +// stwx LdStGeneral +// subf IntGeneral +// subfc IntGeneral +// subfe IntGeneral +// subfic IntGeneral +// subfme IntGeneral +// subfze IntGeneral +// sync LdStSync +// td IntTrapD +// tdi IntTrapD +// tlbia LdStSLBIA +// tlbie LdStDCBF +// tlbsync SprTLBSYNC +// tw IntTrapW +// twi IntTrapW +// vaddcuw VecGeneral +// vaddfp VecFP +// vaddsbs VecGeneral +// vaddshs VecGeneral +// vaddsws VecGeneral +// vaddubm VecGeneral +// vaddubs VecGeneral +// vadduhm VecGeneral +// vadduhs VecGeneral +// vadduwm VecGeneral +// vadduws VecGeneral +// vand VecGeneral +// vandc VecGeneral +// vavgsb VecGeneral +// vavgsh VecGeneral +// vavgsw VecGeneral +// vavgub VecGeneral +// vavguh VecGeneral +// vavguw VecGeneral +// vcfsx VecFP +// vcfux VecFP +// vcmpbfp VecFPCompare +// vcmpeqfp VecFPCompare +// vcmpequb VecGeneral +// vcmpequh VecGeneral +// vcmpequw VecGeneral +// vcmpgefp VecFPCompare +// vcmpgtfp VecFPCompare +// vcmpgtsb VecGeneral +// vcmpgtsh VecGeneral +// vcmpgtsw VecGeneral +// vcmpgtub VecGeneral +// vcmpgtuh VecGeneral +// vcmpgtuw VecGeneral +// vctsxs VecFP +// vctuxs VecFP +// vexptefp VecFP +// vlogefp VecFP +// vmaddfp VecFP +// vmaxfp VecFPCompare +// vmaxsb VecGeneral +// vmaxsh VecGeneral +// vmaxsw VecGeneral +// vmaxub VecGeneral +// vmaxuh VecGeneral +// vmaxuw VecGeneral +// vmhaddshs VecComplex +// vmhraddshs VecComplex +// vminfp VecFPCompare +// vminsb VecGeneral +// vminsh VecGeneral +// vminsw VecGeneral +// vminub VecGeneral +// vminuh VecGeneral +// vminuw VecGeneral +// vmladduhm VecComplex +// vmrghb VecPerm +// vmrghh VecPerm +// vmrghw VecPerm +// vmrglb VecPerm +// vmrglh VecPerm +// vmrglw VecPerm +// vmsubfp VecFP +// vmsummbm VecComplex +// vmsumshm VecComplex +// vmsumshs VecComplex +// vmsumubm VecComplex +// vmsumuhm VecComplex +// vmsumuhs VecComplex +// vmulesb VecComplex +// vmulesh VecComplex +// vmuleub VecComplex +// vmuleuh VecComplex +// vmulosb VecComplex +// vmulosh VecComplex +// vmuloub VecComplex +// vmulouh VecComplex +// vnor VecGeneral +// vor VecGeneral +// vperm VecPerm +// vpkpx VecPerm +// vpkshss VecPerm +// vpkshus VecPerm +// vpkswss VecPerm +// vpkswus VecPerm +// vpkuhum VecPerm +// vpkuhus VecPerm +// vpkuwum VecPerm +// vpkuwus VecPerm +// vrefp VecFPRound +// vrfim VecFPRound +// vrfin VecFPRound +// vrfip VecFPRound +// vrfiz VecFPRound +// vrlb VecGeneral +// vrlh VecGeneral +// vrlw VecGeneral +// vrsqrtefp VecFP +// vsel VecGeneral +// vsl VecVSL +// vslb VecGeneral +// vsldoi VecPerm +// vslh VecGeneral +// vslo VecPerm +// vslw VecGeneral +// vspltb VecPerm +// vsplth VecPerm +// vspltisb VecPerm +// vspltish VecPerm +// vspltisw VecPerm +// vspltw VecPerm +// vsr VecVSR +// vsrab VecGeneral +// vsrah VecGeneral +// vsraw VecGeneral +// vsrb VecGeneral +// vsrh VecGeneral +// vsro VecPerm +// vsrw VecGeneral +// vsubcuw VecGeneral +// vsubfp VecFP +// vsubsbs VecGeneral +// vsubshs VecGeneral +// vsubsws VecGeneral +// vsububm VecGeneral +// vsububs VecGeneral +// vsubuhm VecGeneral +// vsubuhs VecGeneral +// vsubuwm VecGeneral +// vsubuws VecGeneral +// vsum2sws VecComplex +// vsum4sbs VecComplex +// vsum4shs VecComplex +// vsum4ubs VecComplex +// vsumsws VecComplex +// vupkhpx VecPerm +// vupkhsb VecPerm +// vupkhsh VecPerm +// vupklpx VecPerm +// vupklsb VecPerm +// vupklsh VecPerm +// vxor VecGeneral +// xor IntGeneral +// xori IntGeneral +// xoris IntGeneral +// diff --git a/lib/Target/PowerPC/PPCScheduleG3.td b/lib/Target/PowerPC/PPCScheduleG3.td new file mode 100644 index 000000000000..f72194d6de0e --- /dev/null +++ b/lib/Target/PowerPC/PPCScheduleG3.td @@ -0,0 +1,63 @@ +//===- PPCScheduleG3.td - PPC G3 Scheduling Definitions ----*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the itinerary class data for the G3 (750) processor. +// +//===----------------------------------------------------------------------===// + + +def G3Itineraries : ProcessorItineraries<[ + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]> +]>; diff --git a/lib/Target/PowerPC/PPCScheduleG4.td b/lib/Target/PowerPC/PPCScheduleG4.td new file mode 100644 index 000000000000..92ed20f17ce5 --- /dev/null +++ b/lib/Target/PowerPC/PPCScheduleG4.td @@ -0,0 +1,73 @@ +//===- PPCScheduleG4.td - PPC G4 Scheduling Definitions ----*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the itinerary class data for the G4 (7400) processor. +// +//===----------------------------------------------------------------------===// + +def G4Itineraries : ProcessorItineraries<[ + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]> +]>; diff --git a/lib/Target/PowerPC/PPCScheduleG4Plus.td b/lib/Target/PowerPC/PPCScheduleG4Plus.td new file mode 100644 index 000000000000..7474ba494d10 --- /dev/null +++ b/lib/Target/PowerPC/PPCScheduleG4Plus.td @@ -0,0 +1,76 @@ +//===- PPCScheduleG4Plus.td - PPC G4+ Scheduling Defs. -----*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the itinerary class data for the G4+ (7450) processor. +// +//===----------------------------------------------------------------------===// + +def G4PlusItineraries : ProcessorItineraries<[ + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]> +]>; diff --git a/lib/Target/PowerPC/PPCScheduleG5.td b/lib/Target/PowerPC/PPCScheduleG5.td new file mode 100644 index 000000000000..d28214715a76 --- /dev/null +++ b/lib/Target/PowerPC/PPCScheduleG5.td @@ -0,0 +1,83 @@ +//===- PPCScheduleG5.td - PPC G5 Scheduling Definitions ----*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the itinerary class data for the G5 (970) processor. +// +//===----------------------------------------------------------------------===// + +def G5Itineraries : ProcessorItineraries<[ + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, // needs work + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, // needs work + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]> +]>; diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp new file mode 100644 index 000000000000..425d8e6195c6 --- /dev/null +++ b/lib/Target/PowerPC/PPCSubtarget.cpp @@ -0,0 +1,152 @@ +//===- PowerPCSubtarget.cpp - PPC Subtarget Information -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the PPC specific subclass of TargetSubtarget. +// +//===----------------------------------------------------------------------===// + +#include "PPCSubtarget.h" +#include "PPC.h" +#include "llvm/Module.h" +#include "llvm/Target/TargetMachine.h" +#include "PPCGenSubtarget.inc" +#include +using namespace llvm; + +#if defined(__APPLE__) +#include +#include +#include +#include + +/// GetCurrentPowerPCFeatures - Returns the current CPUs features. +static const char *GetCurrentPowerPCCPU() { + host_basic_info_data_t hostInfo; + mach_msg_type_number_t infoCount; + + infoCount = HOST_BASIC_INFO_COUNT; + host_info(mach_host_self(), HOST_BASIC_INFO, (host_info_t)&hostInfo, + &infoCount); + + if (hostInfo.cpu_type != CPU_TYPE_POWERPC) return "generic"; + + switch(hostInfo.cpu_subtype) { + case CPU_SUBTYPE_POWERPC_601: return "601"; + case CPU_SUBTYPE_POWERPC_602: return "602"; + case CPU_SUBTYPE_POWERPC_603: return "603"; + case CPU_SUBTYPE_POWERPC_603e: return "603e"; + case CPU_SUBTYPE_POWERPC_603ev: return "603ev"; + case CPU_SUBTYPE_POWERPC_604: return "604"; + case CPU_SUBTYPE_POWERPC_604e: return "604e"; + case CPU_SUBTYPE_POWERPC_620: return "620"; + case CPU_SUBTYPE_POWERPC_750: return "750"; + case CPU_SUBTYPE_POWERPC_7400: return "7400"; + case CPU_SUBTYPE_POWERPC_7450: return "7450"; + case CPU_SUBTYPE_POWERPC_970: return "970"; + default: ; + } + + return "generic"; +} +#endif + + +PPCSubtarget::PPCSubtarget(const TargetMachine &tm, const Module &M, + const std::string &FS, bool is64Bit) + : TM(tm) + , StackAlignment(16) + , DarwinDirective(PPC::DIR_NONE) + , IsGigaProcessor(false) + , Has64BitSupport(false) + , Use64BitRegs(false) + , IsPPC64(is64Bit) + , HasAltivec(false) + , HasFSQRT(false) + , HasSTFIWX(false) + , HasLazyResolverStubs(false) + , DarwinVers(0) { + + // Determine default and user specified characteristics + std::string CPU = "generic"; +#if defined(__APPLE__) + CPU = GetCurrentPowerPCCPU(); +#endif + + // Parse features string. + ParseSubtargetFeatures(FS, CPU); + + // If we are generating code for ppc64, verify that options make sense. + if (is64Bit) { + Has64BitSupport = true; + // Silently force 64-bit register use on ppc64. + Use64BitRegs = true; + } + + // If the user requested use of 64-bit regs, but the cpu selected doesn't + // support it, ignore. + if (use64BitRegs() && !has64BitSupport()) + Use64BitRegs = false; + + // Set the boolean corresponding to the current target triple, or the default + // if one cannot be determined, to true. + const std::string &TT = M.getTargetTriple(); + if (TT.length() > 7) { + // Determine which version of darwin this is. + size_t DarwinPos = TT.find("-darwin"); + if (DarwinPos != std::string::npos) { + if (isdigit(TT[DarwinPos+7])) + DarwinVers = atoi(&TT[DarwinPos+7]); + else + DarwinVers = 8; // Minimum supported darwin is Tiger. + } + } else if (TT.empty()) { + // Try to autosense the subtarget from the host compiler. +#if defined(__APPLE__) +#if __APPLE_CC__ > 5400 + DarwinVers = 9; // GCC 5400+ is Leopard. +#else + DarwinVers = 8; // Minimum supported darwin is Tiger. +#endif +#endif + } + + // Set up darwin-specific properties. + if (isDarwin()) { + HasLazyResolverStubs = true; + AsmFlavor = NewMnemonic; + } else { + AsmFlavor = OldMnemonic; + } +} + +/// SetJITMode - This is called to inform the subtarget info that we are +/// producing code for the JIT. +void PPCSubtarget::SetJITMode() { + // JIT mode doesn't want lazy resolver stubs, it knows exactly where + // everything is. This matters for PPC64, which codegens in PIC mode without + // stubs. + HasLazyResolverStubs = false; +} + + +/// hasLazyResolverStub - Return true if accesses to the specified global have +/// to go through a dyld lazy resolution stub. This means that an extra load +/// is required to get the address of the global. +bool PPCSubtarget::hasLazyResolverStub(const GlobalValue *GV) const { + // We never hae stubs if HasLazyResolverStubs=false or if in static mode. + if (!HasLazyResolverStubs || TM.getRelocationModel() == Reloc::Static) + return false; + // If symbol visibility is hidden, the extra load is not needed if + // the symbol is definitely defined in the current translation unit. + bool isDecl = GV->isDeclaration() && !GV->hasNotBeenReadFromBitcode(); + if (GV->hasHiddenVisibility() && !isDecl && !GV->hasCommonLinkage()) + return false; + return GV->hasWeakLinkage() || GV->hasLinkOnceLinkage() || + GV->hasCommonLinkage() || isDecl; +} diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h new file mode 100644 index 000000000000..176f3e19477a --- /dev/null +++ b/lib/Target/PowerPC/PPCSubtarget.h @@ -0,0 +1,160 @@ +//=====-- PPCSubtarget.h - Define Subtarget for the PPC -------*- C++ -*--====// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the PowerPC specific subclass of TargetSubtarget. +// +//===----------------------------------------------------------------------===// + +#ifndef POWERPCSUBTARGET_H +#define POWERPCSUBTARGET_H + +#include "llvm/Target/TargetInstrItineraries.h" +#include "llvm/Target/TargetSubtarget.h" + +#include + +// GCC #defines PPC on Linux but we use it as our namespace name +#undef PPC + +namespace llvm { + +namespace PPC { + // -m directive values. + enum { + DIR_NONE, + DIR_32, + DIR_601, + DIR_602, + DIR_603, + DIR_7400, + DIR_750, + DIR_970, + DIR_64 + }; +} + +class Module; +class GlobalValue; +class TargetMachine; + +class PPCSubtarget : public TargetSubtarget { +public: + enum AsmWriterFlavorTy { + OldMnemonic, NewMnemonic, Unset + }; +protected: + const TargetMachine &TM; + + /// stackAlignment - The minimum alignment known to hold of the stack frame on + /// entry to the function and which must be maintained by every function. + unsigned StackAlignment; + + /// Selected instruction itineraries (one entry per itinerary class.) + InstrItineraryData InstrItins; + + /// Which cpu directive was used. + unsigned DarwinDirective; + + /// AsmFlavor - Which PPC asm dialect to use. + AsmWriterFlavorTy AsmFlavor; + + /// Used by the ISel to turn in optimizations for POWER4-derived architectures + bool IsGigaProcessor; + bool Has64BitSupport; + bool Use64BitRegs; + bool IsPPC64; + bool HasAltivec; + bool HasFSQRT; + bool HasSTFIWX; + bool HasLazyResolverStubs; + + /// DarwinVers - Nonzero if this is a darwin platform. Otherwise, the numeric + /// version of the platform, e.g. 8 = 10.4 (Tiger), 9 = 10.5 (Leopard), etc. + unsigned char DarwinVers; // Is any darwin-ppc platform. +public: + /// This constructor initializes the data members to match that + /// of the specified module. + /// + PPCSubtarget(const TargetMachine &TM, const Module &M, + const std::string &FS, bool is64Bit); + + /// ParseSubtargetFeatures - Parses features string setting specified + /// subtarget options. Definition of function is auto generated by tblgen. + std::string ParseSubtargetFeatures(const std::string &FS, + const std::string &CPU); + + + /// SetJITMode - This is called to inform the subtarget info that we are + /// producing code for the JIT. + void SetJITMode(); + + /// getStackAlignment - Returns the minimum alignment known to hold of the + /// stack frame on entry to the function and which must be maintained by every + /// function for this subtarget. + unsigned getStackAlignment() const { return StackAlignment; } + + /// getDarwinDirective - Returns the -m directive specified for the cpu. + /// + unsigned getDarwinDirective() const { return DarwinDirective; } + + /// getInstrItins - Return the instruction itineraies based on subtarget + /// selection. + const InstrItineraryData &getInstrItineraryData() const { return InstrItins; } + + /// getTargetDataString - Return the pointer size and type alignment + /// properties of this subtarget. + const char *getTargetDataString() const { + // Note, the alignment values for f64 and i64 on ppc64 in Darwin + // documentation are wrong; these are correct (i.e. "what gcc does"). + return isPPC64() ? "E-p:64:64-f64:64:64-i64:64:64-f128:64:128" + : "E-p:32:32-f64:32:64-i64:32:64-f128:64:128"; + } + + /// isPPC64 - Return true if we are generating code for 64-bit pointer mode. + /// + bool isPPC64() const { return IsPPC64; } + + /// has64BitSupport - Return true if the selected CPU supports 64-bit + /// instructions, regardless of whether we are in 32-bit or 64-bit mode. + bool has64BitSupport() const { return Has64BitSupport; } + + /// use64BitRegs - Return true if in 64-bit mode or if we should use 64-bit + /// registers in 32-bit mode when possible. This can only true if + /// has64BitSupport() returns true. + bool use64BitRegs() const { return Use64BitRegs; } + + /// hasLazyResolverStub - Return true if accesses to the specified global have + /// to go through a dyld lazy resolution stub. This means that an extra load + /// is required to get the address of the global. + bool hasLazyResolverStub(const GlobalValue *GV) const; + + // Specific obvious features. + bool hasFSQRT() const { return HasFSQRT; } + bool hasSTFIWX() const { return HasSTFIWX; } + bool hasAltivec() const { return HasAltivec; } + bool isGigaProcessor() const { return IsGigaProcessor; } + + /// isDarwin - True if this is any darwin platform. + bool isDarwin() const { return DarwinVers != 0; } + /// isDarwin - True if this is darwin9 (leopard, 10.5) or above. + bool isDarwin9() const { return DarwinVers >= 9; } + + /// getDarwinVers - Return the darwin version number, 8 = tiger, 9 = leopard. + unsigned getDarwinVers() const { return DarwinVers; } + + bool isMachoABI() const { return isDarwin() || IsPPC64; } + bool isELF32_ABI() const { return !isDarwin() && !IsPPC64; } + + unsigned getAsmFlavor() const { + return AsmFlavor != Unset ? unsigned(AsmFlavor) : 0; + } +}; +} // End llvm namespace + +#endif diff --git a/lib/Target/PowerPC/PPCTargetAsmInfo.cpp b/lib/Target/PowerPC/PPCTargetAsmInfo.cpp new file mode 100644 index 000000000000..c69e591a6632 --- /dev/null +++ b/lib/Target/PowerPC/PPCTargetAsmInfo.cpp @@ -0,0 +1,161 @@ +//===-- PPCTargetAsmInfo.cpp - PPC asm properties ---------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declarations of the DarwinTargetAsmInfo properties. +// +//===----------------------------------------------------------------------===// + +#include "PPCTargetAsmInfo.h" +#include "PPCTargetMachine.h" +#include "llvm/Function.h" +#include "llvm/Support/Dwarf.h" + +using namespace llvm; +using namespace llvm::dwarf; + +PPCDarwinTargetAsmInfo::PPCDarwinTargetAsmInfo(const PPCTargetMachine &TM): + PPCTargetAsmInfo(TM) { + PCSymbol = "."; + CommentString = ";"; + GlobalPrefix = "_"; + PrivateGlobalPrefix = "L"; + LessPrivateGlobalPrefix = "l"; + StringConstantPrefix = "\1LC"; + ConstantPoolSection = "\t.const\t"; + JumpTableDataSection = ".const"; + CStringSection = "\t.cstring"; + if (TM.getRelocationModel() == Reloc::Static) { + StaticCtorsSection = ".constructor"; + StaticDtorsSection = ".destructor"; + } else { + StaticCtorsSection = ".mod_init_func"; + StaticDtorsSection = ".mod_term_func"; + } + HasSingleParameterDotFile = false; + SwitchToSectionDirective = "\t.section "; + UsedDirective = "\t.no_dead_strip\t"; + WeakDefDirective = "\t.weak_definition "; + WeakRefDirective = "\t.weak_reference "; + HiddenDirective = "\t.private_extern "; + SupportsExceptionHandling = true; + NeedsIndirectEncoding = true; + NeedsSet = true; + BSSSection = 0; + + DwarfEHFrameSection = + ".section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support"; + DwarfExceptionSection = ".section __DATA,__gcc_except_tab"; + GlobalEHDirective = "\t.globl\t"; + SupportsWeakOmittedEHFrame = false; + + DwarfAbbrevSection = ".section __DWARF,__debug_abbrev,regular,debug"; + DwarfInfoSection = ".section __DWARF,__debug_info,regular,debug"; + DwarfLineSection = ".section __DWARF,__debug_line,regular,debug"; + DwarfFrameSection = ".section __DWARF,__debug_frame,regular,debug"; + DwarfPubNamesSection = ".section __DWARF,__debug_pubnames,regular,debug"; + DwarfPubTypesSection = ".section __DWARF,__debug_pubtypes,regular,debug"; + DwarfStrSection = ".section __DWARF,__debug_str,regular,debug"; + DwarfLocSection = ".section __DWARF,__debug_loc,regular,debug"; + DwarfARangesSection = ".section __DWARF,__debug_aranges,regular,debug"; + DwarfRangesSection = ".section __DWARF,__debug_ranges,regular,debug"; + DwarfMacInfoSection = ".section __DWARF,__debug_macinfo,regular,debug"; + + // In non-PIC modes, emit a special label before jump tables so that the + // linker can perform more accurate dead code stripping. + if (TM.getRelocationModel() != Reloc::PIC_) { + // Emit a local label that is preserved until the linker runs. + JumpTableSpecialLabelPrefix = "l"; + } +} + +/// PreferredEHDataFormat - This hook allows the target to select data +/// format used for encoding pointers in exception handling data. Reason is +/// 0 for data, 1 for code labels, 2 for function pointers. Global is true +/// if the symbol can be relocated. +unsigned +PPCDarwinTargetAsmInfo::PreferredEHDataFormat(DwarfEncoding::Target Reason, + bool Global) const { + if (Reason == DwarfEncoding::Functions && Global) + return (DW_EH_PE_pcrel | DW_EH_PE_indirect | DW_EH_PE_sdata4); + else if (Reason == DwarfEncoding::CodeLabels || !Global) + return DW_EH_PE_pcrel; + else + return DW_EH_PE_absptr; +} + +const char * +PPCDarwinTargetAsmInfo::getEHGlobalPrefix() const +{ + const PPCSubtarget* Subtarget = &TM.getSubtarget(); + if (Subtarget->getDarwinVers() > 9) + return PrivateGlobalPrefix; + else + return ""; +} + +PPCLinuxTargetAsmInfo::PPCLinuxTargetAsmInfo(const PPCTargetMachine &TM) : + PPCTargetAsmInfo(TM) { + CommentString = "#"; + GlobalPrefix = ""; + PrivateGlobalPrefix = ".L"; + ConstantPoolSection = "\t.section .rodata.cst4\t"; + JumpTableDataSection = ".section .rodata.cst4"; + CStringSection = ".rodata.str"; + StaticCtorsSection = ".section\t.ctors,\"aw\",@progbits"; + StaticDtorsSection = ".section\t.dtors,\"aw\",@progbits"; + UsedDirective = "\t# .no_dead_strip\t"; + WeakRefDirective = "\t.weak\t"; + BSSSection = "\t.section\t\".sbss\",\"aw\",@nobits"; + + // PPC/Linux normally uses named section for BSS. + BSSSection_ = getNamedSection("\t.bss", + SectionFlags::Writeable | SectionFlags::BSS, + /* Override */ true); + + // Debug Information + AbsoluteDebugSectionOffsets = true; + SupportsDebugInformation = true; + DwarfAbbrevSection = "\t.section\t.debug_abbrev,\"\",@progbits"; + DwarfInfoSection = "\t.section\t.debug_info,\"\",@progbits"; + DwarfLineSection = "\t.section\t.debug_line,\"\",@progbits"; + DwarfFrameSection = "\t.section\t.debug_frame,\"\",@progbits"; + DwarfPubNamesSection ="\t.section\t.debug_pubnames,\"\",@progbits"; + DwarfPubTypesSection ="\t.section\t.debug_pubtypes,\"\",@progbits"; + DwarfStrSection = "\t.section\t.debug_str,\"\",@progbits"; + DwarfLocSection = "\t.section\t.debug_loc,\"\",@progbits"; + DwarfARangesSection = "\t.section\t.debug_aranges,\"\",@progbits"; + DwarfRangesSection = "\t.section\t.debug_ranges,\"\",@progbits"; + DwarfMacInfoSection = "\t.section\t.debug_macinfo,\"\",@progbits"; + + PCSymbol = "."; + + // Set up DWARF directives + HasLEB128 = true; // Target asm supports leb128 directives (little-endian) + + // Exceptions handling + if (!TM.getSubtargetImpl()->isPPC64()) + SupportsExceptionHandling = true; + AbsoluteEHSectionOffsets = false; + DwarfEHFrameSection = "\t.section\t.eh_frame,\"aw\",@progbits"; + DwarfExceptionSection = "\t.section\t.gcc_except_table,\"a\",@progbits"; +} + +/// PreferredEHDataFormat - This hook allows the target to select data +/// format used for encoding pointers in exception handling data. Reason is +/// 0 for data, 1 for code labels, 2 for function pointers. Global is true +/// if the symbol can be relocated. +unsigned +PPCLinuxTargetAsmInfo::PreferredEHDataFormat(DwarfEncoding::Target Reason, + bool Global) const { + // We really need to write something here. + return TargetAsmInfo::PreferredEHDataFormat(Reason, Global); +} + +// Instantiate default implementation. +TEMPLATE_INSTANTIATION(class PPCTargetAsmInfo); diff --git a/lib/Target/PowerPC/PPCTargetAsmInfo.h b/lib/Target/PowerPC/PPCTargetAsmInfo.h new file mode 100644 index 000000000000..edf40c9346cf --- /dev/null +++ b/lib/Target/PowerPC/PPCTargetAsmInfo.h @@ -0,0 +1,62 @@ +//=====-- PPCTargetAsmInfo.h - PPC asm properties -------------*- C++ -*--====// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the DarwinTargetAsmInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef PPCTARGETASMINFO_H +#define PPCTARGETASMINFO_H + +#include "PPCTargetMachine.h" +#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/Target/DarwinTargetAsmInfo.h" +#include "llvm/Target/ELFTargetAsmInfo.h" +#include "llvm/Support/Compiler.h" + +namespace llvm { + + template + struct PPCTargetAsmInfo : public BaseTAI { + explicit PPCTargetAsmInfo(const PPCTargetMachine &TM): + BaseTAI(TM) { + const PPCSubtarget *Subtarget = &TM.getSubtarget(); + bool isPPC64 = Subtarget->isPPC64(); + + BaseTAI::ZeroDirective = "\t.space\t"; + BaseTAI::SetDirective = "\t.set"; + BaseTAI::Data64bitsDirective = isPPC64 ? "\t.quad\t" : 0; + BaseTAI::AlignmentIsInBytes = false; + BaseTAI::LCOMMDirective = "\t.lcomm\t"; + BaseTAI::InlineAsmStart = "# InlineAsm Start"; + BaseTAI::InlineAsmEnd = "# InlineAsm End"; + BaseTAI::AssemblerDialect = Subtarget->getAsmFlavor(); + } + }; + + typedef PPCTargetAsmInfo PPCGenericTargetAsmInfo; + + EXTERN_TEMPLATE_INSTANTIATION(class PPCTargetAsmInfo); + + struct PPCDarwinTargetAsmInfo : public PPCTargetAsmInfo { + explicit PPCDarwinTargetAsmInfo(const PPCTargetMachine &TM); + virtual unsigned PreferredEHDataFormat(DwarfEncoding::Target Reason, + bool Global) const; + virtual const char *getEHGlobalPrefix() const; + }; + + struct PPCLinuxTargetAsmInfo : public PPCTargetAsmInfo { + explicit PPCLinuxTargetAsmInfo(const PPCTargetMachine &TM); + virtual unsigned PreferredEHDataFormat(DwarfEncoding::Target Reason, + bool Global) const; + }; + +} // namespace llvm + +#endif diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp new file mode 100644 index 000000000000..ef3f0fc04219 --- /dev/null +++ b/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -0,0 +1,250 @@ +//===-- PPCTargetMachine.cpp - Define TargetMachine for PowerPC -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Top-level implementation for the PowerPC target. +// +//===----------------------------------------------------------------------===// + +#include "PPC.h" +#include "PPCTargetAsmInfo.h" +#include "PPCTargetMachine.h" +#include "llvm/Module.h" +#include "llvm/PassManager.h" +#include "llvm/Target/TargetMachineRegistry.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +/// PowerPCTargetMachineModule - Note that this is used on hosts that +/// cannot link in a library unless there are references into the +/// library. In particular, it seems that it is not possible to get +/// things to work on Win32 without this. Though it is unused, do not +/// remove it. +extern "C" int PowerPCTargetMachineModule; +int PowerPCTargetMachineModule = 0; + +// Register the targets +static RegisterTarget +X("ppc32", "PowerPC 32"); +static RegisterTarget +Y("ppc64", "PowerPC 64"); + +// No assembler printer by default +PPCTargetMachine::AsmPrinterCtorFn PPCTargetMachine::AsmPrinterCtor = 0; + +const TargetAsmInfo *PPCTargetMachine::createTargetAsmInfo() const { + if (Subtarget.isDarwin()) + return new PPCDarwinTargetAsmInfo(*this); + else + return new PPCLinuxTargetAsmInfo(*this); +} + +unsigned PPC32TargetMachine::getJITMatchQuality() { +#if defined(__POWERPC__) || defined (__ppc__) || defined(_POWER) || defined(__PPC__) + if (sizeof(void*) == 4) + return 10; +#endif + return 0; +} +unsigned PPC64TargetMachine::getJITMatchQuality() { +#if defined(__POWERPC__) || defined (__ppc__) || defined(_POWER) || defined(__PPC__) + if (sizeof(void*) == 8) + return 10; +#endif + return 0; +} + +unsigned PPC32TargetMachine::getModuleMatchQuality(const Module &M) { + // We strongly match "powerpc-*". + std::string TT = M.getTargetTriple(); + if (TT.size() >= 8 && std::string(TT.begin(), TT.begin()+8) == "powerpc-") + return 20; + + // If the target triple is something non-powerpc, we don't match. + if (!TT.empty()) return 0; + + if (M.getEndianness() == Module::BigEndian && + M.getPointerSize() == Module::Pointer32) + return 10; // Weak match + else if (M.getEndianness() != Module::AnyEndianness || + M.getPointerSize() != Module::AnyPointerSize) + return 0; // Match for some other target + + return getJITMatchQuality()/2; +} + +unsigned PPC64TargetMachine::getModuleMatchQuality(const Module &M) { + // We strongly match "powerpc64-*". + std::string TT = M.getTargetTriple(); + if (TT.size() >= 10 && std::string(TT.begin(), TT.begin()+10) == "powerpc64-") + return 20; + + if (M.getEndianness() == Module::BigEndian && + M.getPointerSize() == Module::Pointer64) + return 10; // Weak match + else if (M.getEndianness() != Module::AnyEndianness || + M.getPointerSize() != Module::AnyPointerSize) + return 0; // Match for some other target + + return getJITMatchQuality()/2; +} + + +PPCTargetMachine::PPCTargetMachine(const Module &M, const std::string &FS, + bool is64Bit) + : Subtarget(*this, M, FS, is64Bit), + DataLayout(Subtarget.getTargetDataString()), InstrInfo(*this), + FrameInfo(*this, is64Bit), JITInfo(*this, is64Bit), TLInfo(*this), + InstrItins(Subtarget.getInstrItineraryData()), MachOWriterInfo(*this) { + + if (getRelocationModel() == Reloc::Default) { + if (Subtarget.isDarwin()) + setRelocationModel(Reloc::DynamicNoPIC); + else + setRelocationModel(Reloc::Static); + } +} + +/// Override this for PowerPC. Tail merging happily breaks up instruction issue +/// groups, which typically degrades performance. +bool PPCTargetMachine::getEnableTailMergeDefault() const { return false; } + +PPC32TargetMachine::PPC32TargetMachine(const Module &M, const std::string &FS) + : PPCTargetMachine(M, FS, false) { +} + + +PPC64TargetMachine::PPC64TargetMachine(const Module &M, const std::string &FS) + : PPCTargetMachine(M, FS, true) { +} + + +//===----------------------------------------------------------------------===// +// Pass Pipeline Configuration +//===----------------------------------------------------------------------===// + +bool PPCTargetMachine::addInstSelector(PassManagerBase &PM, + CodeGenOpt::Level OptLevel) { + // Install an instruction selector. + PM.add(createPPCISelDag(*this)); + return false; +} + +bool PPCTargetMachine::addPreEmitPass(PassManagerBase &PM, + CodeGenOpt::Level OptLevel) { + // Must run branch selection immediately preceding the asm printer. + PM.add(createPPCBranchSelectionPass()); + return false; +} + +bool PPCTargetMachine::addAssemblyEmitter(PassManagerBase &PM, + CodeGenOpt::Level OptLevel, + bool Verbose, + raw_ostream &Out) { + assert(AsmPrinterCtor && "AsmPrinter was not linked in"); + if (AsmPrinterCtor) + PM.add(AsmPrinterCtor(Out, *this, OptLevel, Verbose)); + + return false; +} + +bool PPCTargetMachine::addCodeEmitter(PassManagerBase &PM, + CodeGenOpt::Level OptLevel, + bool DumpAsm, MachineCodeEmitter &MCE) { + // The JIT should use the static relocation model in ppc32 mode, PIC in ppc64. + // FIXME: This should be moved to TargetJITInfo!! + if (Subtarget.isPPC64()) { + // We use PIC codegen in ppc64 mode, because otherwise we'd have to use many + // instructions to materialize arbitrary global variable + function + + // constant pool addresses. + setRelocationModel(Reloc::PIC_); + // Temporary workaround for the inability of PPC64 JIT to handle jump + // tables. + DisableJumpTables = true; + } else { + setRelocationModel(Reloc::Static); + } + + // Inform the subtarget that we are in JIT mode. FIXME: does this break macho + // writing? + Subtarget.SetJITMode(); + + // Machine code emitter pass for PowerPC. + PM.add(createPPCCodeEmitterPass(*this, MCE)); + if (DumpAsm) { + assert(AsmPrinterCtor && "AsmPrinter was not linked in"); + if (AsmPrinterCtor) + PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true)); + } + + return false; +} + +bool PPCTargetMachine::addCodeEmitter(PassManagerBase &PM, + CodeGenOpt::Level OptLevel, + bool DumpAsm, JITCodeEmitter &JCE) { + // The JIT should use the static relocation model in ppc32 mode, PIC in ppc64. + // FIXME: This should be moved to TargetJITInfo!! + if (Subtarget.isPPC64()) { + // We use PIC codegen in ppc64 mode, because otherwise we'd have to use many + // instructions to materialize arbitrary global variable + function + + // constant pool addresses. + setRelocationModel(Reloc::PIC_); + // Temporary workaround for the inability of PPC64 JIT to handle jump + // tables. + DisableJumpTables = true; + } else { + setRelocationModel(Reloc::Static); + } + + // Inform the subtarget that we are in JIT mode. FIXME: does this break macho + // writing? + Subtarget.SetJITMode(); + + // Machine code emitter pass for PowerPC. + PM.add(createPPCJITCodeEmitterPass(*this, JCE)); + if (DumpAsm) { + assert(AsmPrinterCtor && "AsmPrinter was not linked in"); + if (AsmPrinterCtor) + PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true)); + } + + return false; +} + +bool PPCTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM, + CodeGenOpt::Level OptLevel, + bool DumpAsm, + MachineCodeEmitter &MCE) { + // Machine code emitter pass for PowerPC. + PM.add(createPPCCodeEmitterPass(*this, MCE)); + if (DumpAsm) { + assert(AsmPrinterCtor && "AsmPrinter was not linked in"); + if (AsmPrinterCtor) + PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true)); + } + + return false; +} + +bool PPCTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM, + CodeGenOpt::Level OptLevel, + bool DumpAsm, + JITCodeEmitter &JCE) { + // Machine code emitter pass for PowerPC. + PM.add(createPPCJITCodeEmitterPass(*this, JCE)); + if (DumpAsm) { + assert(AsmPrinterCtor && "AsmPrinter was not linked in"); + if (AsmPrinterCtor) + PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true)); + } + + return false; +} + diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h new file mode 100644 index 000000000000..086d2f4cf81d --- /dev/null +++ b/lib/Target/PowerPC/PPCTargetMachine.h @@ -0,0 +1,120 @@ +//===-- PPCTargetMachine.h - Define TargetMachine for PowerPC -----*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the PowerPC specific subclass of TargetMachine. +// +//===----------------------------------------------------------------------===// + +#ifndef PPC_TARGETMACHINE_H +#define PPC_TARGETMACHINE_H + +#include "PPCFrameInfo.h" +#include "PPCSubtarget.h" +#include "PPCJITInfo.h" +#include "PPCInstrInfo.h" +#include "PPCISelLowering.h" +#include "PPCMachOWriterInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetData.h" + +namespace llvm { +class PassManager; +class GlobalValue; + +/// PPCTargetMachine - Common code between 32-bit and 64-bit PowerPC targets. +/// +class PPCTargetMachine : public LLVMTargetMachine { + PPCSubtarget Subtarget; + const TargetData DataLayout; // Calculates type size & alignment + PPCInstrInfo InstrInfo; + PPCFrameInfo FrameInfo; + PPCJITInfo JITInfo; + PPCTargetLowering TLInfo; + InstrItineraryData InstrItins; + PPCMachOWriterInfo MachOWriterInfo; + +protected: + virtual const TargetAsmInfo *createTargetAsmInfo() const; + + // To avoid having target depend on the asmprinter stuff libraries, asmprinter + // set this functions to ctor pointer at startup time if they are linked in. + typedef FunctionPass *(*AsmPrinterCtorFn)(raw_ostream &o, + PPCTargetMachine &tm, + CodeGenOpt::Level OptLevel, + bool verbose); + static AsmPrinterCtorFn AsmPrinterCtor; + +public: + PPCTargetMachine(const Module &M, const std::string &FS, bool is64Bit); + + virtual const PPCInstrInfo *getInstrInfo() const { return &InstrInfo; } + virtual const PPCFrameInfo *getFrameInfo() const { return &FrameInfo; } + virtual PPCJITInfo *getJITInfo() { return &JITInfo; } + virtual PPCTargetLowering *getTargetLowering() const { + return const_cast(&TLInfo); + } + virtual const PPCRegisterInfo *getRegisterInfo() const { + return &InstrInfo.getRegisterInfo(); + } + + virtual const TargetData *getTargetData() const { return &DataLayout; } + virtual const PPCSubtarget *getSubtargetImpl() const { return &Subtarget; } + virtual const InstrItineraryData getInstrItineraryData() const { + return InstrItins; + } + virtual const PPCMachOWriterInfo *getMachOWriterInfo() const { + return &MachOWriterInfo; + } + + static void registerAsmPrinter(AsmPrinterCtorFn F) { + AsmPrinterCtor = F; + } + + // Pass Pipeline Configuration + virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel); + virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel); + virtual bool addAssemblyEmitter(PassManagerBase &PM, + CodeGenOpt::Level OptLevel, + bool Verbose, raw_ostream &Out); + virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, + bool DumpAsm, MachineCodeEmitter &MCE); + virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, + bool DumpAsm, JITCodeEmitter &JCE); + virtual bool addSimpleCodeEmitter(PassManagerBase &PM, + CodeGenOpt::Level OptLevel, + bool DumpAsm, MachineCodeEmitter &MCE); + virtual bool addSimpleCodeEmitter(PassManagerBase &PM, + CodeGenOpt::Level OptLevel, + bool DumpAsm, JITCodeEmitter &JCE); + virtual bool getEnableTailMergeDefault() const; +}; + +/// PPC32TargetMachine - PowerPC 32-bit target machine. +/// +class PPC32TargetMachine : public PPCTargetMachine { +public: + PPC32TargetMachine(const Module &M, const std::string &FS); + + static unsigned getJITMatchQuality(); + static unsigned getModuleMatchQuality(const Module &M); +}; + +/// PPC64TargetMachine - PowerPC 64-bit target machine. +/// +class PPC64TargetMachine : public PPCTargetMachine { +public: + PPC64TargetMachine(const Module &M, const std::string &FS); + + static unsigned getJITMatchQuality(); + static unsigned getModuleMatchQuality(const Module &M); +}; + +} // end namespace llvm + +#endif diff --git a/lib/Target/PowerPC/README.txt b/lib/Target/PowerPC/README.txt new file mode 100644 index 000000000000..688fb3090803 --- /dev/null +++ b/lib/Target/PowerPC/README.txt @@ -0,0 +1,799 @@ +//===- README.txt - Notes for improving PowerPC-specific code gen ---------===// + +TODO: +* gpr0 allocation +* implement do-loop -> bdnz transform +* lmw/stmw pass a la arm load store optimizer for prolog/epilog + +===-------------------------------------------------------------------------=== + +Support 'update' load/store instructions. These are cracked on the G5, but are +still a codesize win. + +With preinc enabled, this: + +long *%test4(long *%X, long *%dest) { + %Y = getelementptr long* %X, int 4 + %A = load long* %Y + store long %A, long* %dest + ret long* %Y +} + +compiles to: + +_test4: + mr r2, r3 + lwzu r5, 32(r2) + lwz r3, 36(r3) + stw r5, 0(r4) + stw r3, 4(r4) + mr r3, r2 + blr + +with -sched=list-burr, I get: + +_test4: + lwz r2, 36(r3) + lwzu r5, 32(r3) + stw r2, 4(r4) + stw r5, 0(r4) + blr + +===-------------------------------------------------------------------------=== + +We compile the hottest inner loop of viterbi to: + + li r6, 0 + b LBB1_84 ;bb432.i +LBB1_83: ;bb420.i + lbzx r8, r5, r7 + addi r6, r7, 1 + stbx r8, r4, r7 +LBB1_84: ;bb432.i + mr r7, r6 + cmplwi cr0, r7, 143 + bne cr0, LBB1_83 ;bb420.i + +The CBE manages to produce: + + li r0, 143 + mtctr r0 +loop: + lbzx r2, r2, r11 + stbx r0, r2, r9 + addi r2, r2, 1 + bdz later + b loop + +This could be much better (bdnz instead of bdz) but it still beats us. If we +produced this with bdnz, the loop would be a single dispatch group. + +===-------------------------------------------------------------------------=== + +Compile: + +void foo(int *P) { + if (P) *P = 0; +} + +into: + +_foo: + cmpwi cr0,r3,0 + beqlr cr0 + li r0,0 + stw r0,0(r3) + blr + +This is effectively a simple form of predication. + +===-------------------------------------------------------------------------=== + +Lump the constant pool for each function into ONE pic object, and reference +pieces of it as offsets from the start. For functions like this (contrived +to have lots of constants obviously): + +double X(double Y) { return (Y*1.23 + 4.512)*2.34 + 14.38; } + +We generate: + +_X: + lis r2, ha16(.CPI_X_0) + lfd f0, lo16(.CPI_X_0)(r2) + lis r2, ha16(.CPI_X_1) + lfd f2, lo16(.CPI_X_1)(r2) + fmadd f0, f1, f0, f2 + lis r2, ha16(.CPI_X_2) + lfd f1, lo16(.CPI_X_2)(r2) + lis r2, ha16(.CPI_X_3) + lfd f2, lo16(.CPI_X_3)(r2) + fmadd f1, f0, f1, f2 + blr + +It would be better to materialize .CPI_X into a register, then use immediates +off of the register to avoid the lis's. This is even more important in PIC +mode. + +Note that this (and the static variable version) is discussed here for GCC: +http://gcc.gnu.org/ml/gcc-patches/2006-02/msg00133.html + +Here's another example (the sgn function): +double testf(double a) { + return a == 0.0 ? 0.0 : (a > 0.0 ? 1.0 : -1.0); +} + +it produces a BB like this: +LBB1_1: ; cond_true + lis r2, ha16(LCPI1_0) + lfs f0, lo16(LCPI1_0)(r2) + lis r2, ha16(LCPI1_1) + lis r3, ha16(LCPI1_2) + lfs f2, lo16(LCPI1_2)(r3) + lfs f3, lo16(LCPI1_1)(r2) + fsub f0, f0, f1 + fsel f1, f0, f2, f3 + blr + +===-------------------------------------------------------------------------=== + +PIC Code Gen IPO optimization: + +Squish small scalar globals together into a single global struct, allowing the +address of the struct to be CSE'd, avoiding PIC accesses (also reduces the size +of the GOT on targets with one). + +Note that this is discussed here for GCC: +http://gcc.gnu.org/ml/gcc-patches/2006-02/msg00133.html + +===-------------------------------------------------------------------------=== + +Implement Newton-Rhapson method for improving estimate instructions to the +correct accuracy, and implementing divide as multiply by reciprocal when it has +more than one use. Itanium will want this too. + +===-------------------------------------------------------------------------=== + +Compile offsets from allocas: + +int *%test() { + %X = alloca { int, int } + %Y = getelementptr {int,int}* %X, int 0, uint 1 + ret int* %Y +} + +into a single add, not two: + +_test: + addi r2, r1, -8 + addi r3, r2, 4 + blr + +--> important for C++. + +===-------------------------------------------------------------------------=== + +No loads or stores of the constants should be needed: + +struct foo { double X, Y; }; +void xxx(struct foo F); +void bar() { struct foo R = { 1.0, 2.0 }; xxx(R); } + +===-------------------------------------------------------------------------=== + +Darwin Stub LICM optimization: + +Loops like this: + + for (...) bar(); + +Have to go through an indirect stub if bar is external or linkonce. It would +be better to compile it as: + + fp = &bar; + for (...) fp(); + +which only computes the address of bar once (instead of each time through the +stub). This is Darwin specific and would have to be done in the code generator. +Probably not a win on x86. + +===-------------------------------------------------------------------------=== + +Simple IPO for argument passing, change: + void foo(int X, double Y, int Z) -> void foo(int X, int Z, double Y) + +the Darwin ABI specifies that any integer arguments in the first 32 bytes worth +of arguments get assigned to r3 through r10. That is, if you have a function +foo(int, double, int) you get r3, f1, r6, since the 64 bit double ate up the +argument bytes for r4 and r5. The trick then would be to shuffle the argument +order for functions we can internalize so that the maximum number of +integers/pointers get passed in regs before you see any of the fp arguments. + +Instead of implementing this, it would actually probably be easier to just +implement a PPC fastcc, where we could do whatever we wanted to the CC, +including having this work sanely. + +===-------------------------------------------------------------------------=== + +Fix Darwin FP-In-Integer Registers ABI + +Darwin passes doubles in structures in integer registers, which is very very +bad. Add something like a BIT_CONVERT to LLVM, then do an i-p transformation +that percolates these things out of functions. + +Check out how horrible this is: +http://gcc.gnu.org/ml/gcc/2005-10/msg01036.html + +This is an extension of "interprocedural CC unmunging" that can't be done with +just fastcc. + +===-------------------------------------------------------------------------=== + +Compile this: + +int foo(int a) { + int b = (a < 8); + if (b) { + return b * 3; // ignore the fact that this is always 3. + } else { + return 2; + } +} + +into something not this: + +_foo: +1) cmpwi cr7, r3, 8 + mfcr r2, 1 + rlwinm r2, r2, 29, 31, 31 +1) cmpwi cr0, r3, 7 + bgt cr0, LBB1_2 ; UnifiedReturnBlock +LBB1_1: ; then + rlwinm r2, r2, 0, 31, 31 + mulli r3, r2, 3 + blr +LBB1_2: ; UnifiedReturnBlock + li r3, 2 + blr + +In particular, the two compares (marked 1) could be shared by reversing one. +This could be done in the dag combiner, by swapping a BR_CC when a SETCC of the +same operands (but backwards) exists. In this case, this wouldn't save us +anything though, because the compares still wouldn't be shared. + +===-------------------------------------------------------------------------=== + +We should custom expand setcc instead of pretending that we have it. That +would allow us to expose the access of the crbit after the mfcr, allowing +that access to be trivially folded into other ops. A simple example: + +int foo(int a, int b) { return (a < b) << 4; } + +compiles into: + +_foo: + cmpw cr7, r3, r4 + mfcr r2, 1 + rlwinm r2, r2, 29, 31, 31 + slwi r3, r2, 4 + blr + +===-------------------------------------------------------------------------=== + +Fold add and sub with constant into non-extern, non-weak addresses so this: + +static int a; +void bar(int b) { a = b; } +void foo(unsigned char *c) { + *c = a; +} + +So that + +_foo: + lis r2, ha16(_a) + la r2, lo16(_a)(r2) + lbz r2, 3(r2) + stb r2, 0(r3) + blr + +Becomes + +_foo: + lis r2, ha16(_a+3) + lbz r2, lo16(_a+3)(r2) + stb r2, 0(r3) + blr + +===-------------------------------------------------------------------------=== + +We generate really bad code for this: + +int f(signed char *a, _Bool b, _Bool c) { + signed char t = 0; + if (b) t = *a; + if (c) *a = t; +} + +===-------------------------------------------------------------------------=== + +This: +int test(unsigned *P) { return *P >> 24; } + +Should compile to: + +_test: + lbz r3,0(r3) + blr + +not: + +_test: + lwz r2, 0(r3) + srwi r3, r2, 24 + blr + +===-------------------------------------------------------------------------=== + +On the G5, logical CR operations are more expensive in their three +address form: ops that read/write the same register are half as expensive as +those that read from two registers that are different from their destination. + +We should model this with two separate instructions. The isel should generate +the "two address" form of the instructions. When the register allocator +detects that it needs to insert a copy due to the two-addresness of the CR +logical op, it will invoke PPCInstrInfo::convertToThreeAddress. At this point +we can convert to the "three address" instruction, to save code space. + +This only matters when we start generating cr logical ops. + +===-------------------------------------------------------------------------=== + +We should compile these two functions to the same thing: + +#include +void f(int a, int b, int *P) { + *P = (a-b)>=0?(a-b):(b-a); +} +void g(int a, int b, int *P) { + *P = abs(a-b); +} + +Further, they should compile to something better than: + +_g: + subf r2, r4, r3 + subfic r3, r2, 0 + cmpwi cr0, r2, -1 + bgt cr0, LBB2_2 ; entry +LBB2_1: ; entry + mr r2, r3 +LBB2_2: ; entry + stw r2, 0(r5) + blr + +GCC produces: + +_g: + subf r4,r4,r3 + srawi r2,r4,31 + xor r0,r2,r4 + subf r0,r2,r0 + stw r0,0(r5) + blr + +... which is much nicer. + +This theoretically may help improve twolf slightly (used in dimbox.c:142?). + +===-------------------------------------------------------------------------=== + +int foo(int N, int ***W, int **TK, int X) { + int t, i; + + for (t = 0; t < N; ++t) + for (i = 0; i < 4; ++i) + W[t / X][i][t % X] = TK[i][t]; + + return 5; +} + +We generate relatively atrocious code for this loop compared to gcc. + +We could also strength reduce the rem and the div: +http://www.lcs.mit.edu/pubs/pdf/MIT-LCS-TM-600.pdf + +===-------------------------------------------------------------------------=== + +float foo(float X) { return (int)(X); } + +Currently produces: + +_foo: + fctiwz f0, f1 + stfd f0, -8(r1) + lwz r2, -4(r1) + extsw r2, r2 + std r2, -16(r1) + lfd f0, -16(r1) + fcfid f0, f0 + frsp f1, f0 + blr + +We could use a target dag combine to turn the lwz/extsw into an lwa when the +lwz has a single use. Since LWA is cracked anyway, this would be a codesize +win only. + +===-------------------------------------------------------------------------=== + +We generate ugly code for this: + +void func(unsigned int *ret, float dx, float dy, float dz, float dw) { + unsigned code = 0; + if(dx < -dw) code |= 1; + if(dx > dw) code |= 2; + if(dy < -dw) code |= 4; + if(dy > dw) code |= 8; + if(dz < -dw) code |= 16; + if(dz > dw) code |= 32; + *ret = code; +} + +===-------------------------------------------------------------------------=== + +Complete the signed i32 to FP conversion code using 64-bit registers +transformation, good for PI. See PPCISelLowering.cpp, this comment: + + // FIXME: disable this lowered code. This generates 64-bit register values, + // and we don't model the fact that the top part is clobbered by calls. We + // need to flag these together so that the value isn't live across a call. + //setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); + +Also, if the registers are spilled to the stack, we have to ensure that all +64-bits of them are save/restored, otherwise we will miscompile the code. It +sounds like we need to get the 64-bit register classes going. + +===-------------------------------------------------------------------------=== + +%struct.B = type { i8, [3 x i8] } + +define void @bar(%struct.B* %b) { +entry: + %tmp = bitcast %struct.B* %b to i32* ; [#uses=1] + %tmp = load i32* %tmp ; [#uses=1] + %tmp3 = bitcast %struct.B* %b to i32* ; [#uses=1] + %tmp4 = load i32* %tmp3 ; [#uses=1] + %tmp8 = bitcast %struct.B* %b to i32* ; [#uses=2] + %tmp9 = load i32* %tmp8 ; [#uses=1] + %tmp4.mask17 = shl i32 %tmp4, i8 1 ; [#uses=1] + %tmp1415 = and i32 %tmp4.mask17, 2147483648 ; [#uses=1] + %tmp.masked = and i32 %tmp, 2147483648 ; [#uses=1] + %tmp11 = or i32 %tmp1415, %tmp.masked ; [#uses=1] + %tmp12 = and i32 %tmp9, 2147483647 ; [#uses=1] + %tmp13 = or i32 %tmp12, %tmp11 ; [#uses=1] + store i32 %tmp13, i32* %tmp8 + ret void +} + +We emit: + +_foo: + lwz r2, 0(r3) + slwi r4, r2, 1 + or r4, r4, r2 + rlwimi r2, r4, 0, 0, 0 + stw r2, 0(r3) + blr + +We could collapse a bunch of those ORs and ANDs and generate the following +equivalent code: + +_foo: + lwz r2, 0(r3) + rlwinm r4, r2, 1, 0, 0 + or r2, r2, r4 + stw r2, 0(r3) + blr + +===-------------------------------------------------------------------------=== + +We compile: + +unsigned test6(unsigned x) { + return ((x & 0x00FF0000) >> 16) | ((x & 0x000000FF) << 16); +} + +into: + +_test6: + lis r2, 255 + rlwinm r3, r3, 16, 0, 31 + ori r2, r2, 255 + and r3, r3, r2 + blr + +GCC gets it down to: + +_test6: + rlwinm r0,r3,16,8,15 + rlwinm r3,r3,16,24,31 + or r3,r3,r0 + blr + + +===-------------------------------------------------------------------------=== + +Consider a function like this: + +float foo(float X) { return X + 1234.4123f; } + +The FP constant ends up in the constant pool, so we need to get the LR register. + This ends up producing code like this: + +_foo: +.LBB_foo_0: ; entry + mflr r11 +*** stw r11, 8(r1) + bl "L00000$pb" +"L00000$pb": + mflr r2 + addis r2, r2, ha16(.CPI_foo_0-"L00000$pb") + lfs f0, lo16(.CPI_foo_0-"L00000$pb")(r2) + fadds f1, f1, f0 +*** lwz r11, 8(r1) + mtlr r11 + blr + +This is functional, but there is no reason to spill the LR register all the way +to the stack (the two marked instrs): spilling it to a GPR is quite enough. + +Implementing this will require some codegen improvements. Nate writes: + +"So basically what we need to support the "no stack frame save and restore" is a +generalization of the LR optimization to "callee-save regs". + +Currently, we have LR marked as a callee-save reg. The register allocator sees +that it's callee save, and spills it directly to the stack. + +Ideally, something like this would happen: + +LR would be in a separate register class from the GPRs. The class of LR would be +marked "unspillable". When the register allocator came across an unspillable +reg, it would ask "what is the best class to copy this into that I *can* spill" +If it gets a class back, which it will in this case (the gprs), it grabs a free +register of that class. If it is then later necessary to spill that reg, so be +it. + +===-------------------------------------------------------------------------=== + +We compile this: +int test(_Bool X) { + return X ? 524288 : 0; +} + +to: +_test: + cmplwi cr0, r3, 0 + lis r2, 8 + li r3, 0 + beq cr0, LBB1_2 ;entry +LBB1_1: ;entry + mr r3, r2 +LBB1_2: ;entry + blr + +instead of: +_test: + addic r2,r3,-1 + subfe r0,r2,r3 + slwi r3,r0,19 + blr + +This sort of thing occurs a lot due to globalopt. + +===-------------------------------------------------------------------------=== + +We currently compile 32-bit bswap: + +declare i32 @llvm.bswap.i32(i32 %A) +define i32 @test(i32 %A) { + %B = call i32 @llvm.bswap.i32(i32 %A) + ret i32 %B +} + +to: + +_test: + rlwinm r2, r3, 24, 16, 23 + slwi r4, r3, 24 + rlwimi r2, r3, 8, 24, 31 + rlwimi r4, r3, 8, 8, 15 + rlwimi r4, r2, 0, 16, 31 + mr r3, r4 + blr + +it would be more efficient to produce: + +_foo: mr r0,r3 + rlwinm r3,r3,8,0xffffffff + rlwimi r3,r0,24,0,7 + rlwimi r3,r0,24,16,23 + blr + +===-------------------------------------------------------------------------=== + +test/CodeGen/PowerPC/2007-03-24-cntlzd.ll compiles to: + +__ZNK4llvm5APInt17countLeadingZerosEv: + ld r2, 0(r3) + cntlzd r2, r2 + or r2, r2, r2 <<-- silly. + addi r3, r2, -64 + blr + +The dead or is a 'truncate' from 64- to 32-bits. + +===-------------------------------------------------------------------------=== + +We generate horrible ppc code for this: + +#define N 2000000 +double a[N],c[N]; +void simpleloop() { + int j; + for (j=0; j + inline std::pair full_add(unsigned a, unsigned b) + { return std::make_pair(a + b, a + b < a); } + bool no_overflow(unsigned a, unsigned b) + { return !full_add(a, b).second; } + +Should compile to: + +__Z11no_overflowjj: + add r4,r3,r4 + subfc r3,r3,r4 + li r3,0 + adde r3,r3,r3 + blr + +(or better) not: + +__Z11no_overflowjj: + add r2, r4, r3 + cmplw cr7, r2, r3 + mfcr r2 + rlwinm r2, r2, 29, 31, 31 + xori r3, r2, 1 + blr + +//===---------------------------------------------------------------------===// + +We compile some FP comparisons into an mfcr with two rlwinms and an or. For +example: +#include +int test(double x, double y) { return islessequal(x, y);} +int test2(double x, double y) { return islessgreater(x, y);} +int test3(double x, double y) { return !islessequal(x, y);} + +Compiles into (all three are similar, but the bits differ): + +_test: + fcmpu cr7, f1, f2 + mfcr r2 + rlwinm r3, r2, 29, 31, 31 + rlwinm r2, r2, 31, 31, 31 + or r3, r2, r3 + blr + +GCC compiles this into: + + _test: + fcmpu cr7,f1,f2 + cror 30,28,30 + mfcr r3 + rlwinm r3,r3,31,1 + blr + +which is more efficient and can use mfocr. See PR642 for some more context. + +//===---------------------------------------------------------------------===// + +void foo(float *data, float d) { + long i; + for (i = 0; i < 8000; i++) + data[i] = d; +} +void foo2(float *data, float d) { + long i; + data--; + for (i = 0; i < 8000; i++) { + data[1] = d; + data++; + } +} + +These compile to: + +_foo: + li r2, 0 +LBB1_1: ; bb + addi r4, r2, 4 + stfsx f1, r3, r2 + cmplwi cr0, r4, 32000 + mr r2, r4 + bne cr0, LBB1_1 ; bb + blr +_foo2: + li r2, 0 +LBB2_1: ; bb + addi r4, r2, 4 + stfsx f1, r3, r2 + cmplwi cr0, r4, 32000 + mr r2, r4 + bne cr0, LBB2_1 ; bb + blr + +The 'mr' could be eliminated to folding the add into the cmp better. + +//===---------------------------------------------------------------------===// +Codegen for the following (low-probability) case deteriorated considerably +when the correctness fixes for unordered comparisons went in (PR 642, 58871). +It should be possible to recover the code quality described in the comments. + +; RUN: llvm-as < %s | llc -march=ppc32 | grep or | count 3 +; This should produce one 'or' or 'cror' instruction per function. + +; RUN: llvm-as < %s | llc -march=ppc32 | grep mfcr | count 3 +; PR2964 + +define i32 @test(double %x, double %y) nounwind { +entry: + %tmp3 = fcmp ole double %x, %y ; [#uses=1] + %tmp345 = zext i1 %tmp3 to i32 ; [#uses=1] + ret i32 %tmp345 +} + +define i32 @test2(double %x, double %y) nounwind { +entry: + %tmp3 = fcmp one double %x, %y ; [#uses=1] + %tmp345 = zext i1 %tmp3 to i32 ; [#uses=1] + ret i32 %tmp345 +} + +define i32 @test3(double %x, double %y) nounwind { +entry: + %tmp3 = fcmp ugt double %x, %y ; [#uses=1] + %tmp34 = zext i1 %tmp3 to i32 ; [#uses=1] + ret i32 %tmp34 +} +//===----------------------------------------------------------------------===// +; RUN: llvm-as < %s | llc -march=ppc32 | not grep fneg + +; This could generate FSEL with appropriate flags (FSEL is not IEEE-safe, and +; should not be generated except with -enable-finite-only-fp-math or the like). +; With the correctness fixes for PR642 (58871) LowerSELECT_CC would need to +; recognize a more elaborate tree than a simple SETxx. + +define double @test_FNEG_sel(double %A, double %B, double %C) { + %D = sub double -0.000000e+00, %A ; [#uses=1] + %Cond = fcmp ugt double %D, -0.000000e+00 ; [#uses=1] + %E = select i1 %Cond, double %B, double %C ; [#uses=1] + ret double %E +} + diff --git a/lib/Target/PowerPC/README_ALTIVEC.txt b/lib/Target/PowerPC/README_ALTIVEC.txt new file mode 100644 index 000000000000..1e4c6fb98440 --- /dev/null +++ b/lib/Target/PowerPC/README_ALTIVEC.txt @@ -0,0 +1,211 @@ +//===- README_ALTIVEC.txt - Notes for improving Altivec code gen ----------===// + +Implement PPCInstrInfo::isLoadFromStackSlot/isStoreToStackSlot for vector +registers, to generate better spill code. + +//===----------------------------------------------------------------------===// + +The first should be a single lvx from the constant pool, the second should be +a xor/stvx: + +void foo(void) { + int x[8] __attribute__((aligned(128))) = { 1, 1, 1, 17, 1, 1, 1, 1 }; + bar (x); +} + +#include +void foo(void) { + int x[8] __attribute__((aligned(128))); + memset (x, 0, sizeof (x)); + bar (x); +} + +//===----------------------------------------------------------------------===// + +Altivec: Codegen'ing MUL with vector FMADD should add -0.0, not 0.0: +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=8763 + +When -ffast-math is on, we can use 0.0. + +//===----------------------------------------------------------------------===// + + Consider this: + v4f32 Vector; + v4f32 Vector2 = { Vector.X, Vector.X, Vector.X, Vector.X }; + +Since we know that "Vector" is 16-byte aligned and we know the element offset +of ".X", we should change the load into a lve*x instruction, instead of doing +a load/store/lve*x sequence. + +//===----------------------------------------------------------------------===// + +For functions that use altivec AND have calls, we are VRSAVE'ing all call +clobbered regs. + +//===----------------------------------------------------------------------===// + +Implement passing vectors by value into calls and receiving them as arguments. + +//===----------------------------------------------------------------------===// + +GCC apparently tries to codegen { C1, C2, Variable, C3 } as a constant pool load +of C1/C2/C3, then a load and vperm of Variable. + +//===----------------------------------------------------------------------===// + +We need a way to teach tblgen that some operands of an intrinsic are required to +be constants. The verifier should enforce this constraint. + +//===----------------------------------------------------------------------===// + +We currently codegen SCALAR_TO_VECTOR as a store of the scalar to a 16-byte +aligned stack slot, followed by a load/vperm. We should probably just store it +to a scalar stack slot, then use lvsl/vperm to load it. If the value is already +in memory this is a big win. + +//===----------------------------------------------------------------------===// + +extract_vector_elt of an arbitrary constant vector can be done with the +following instructions: + +vTemp = vec_splat(v0,2); // 2 is the element the src is in. +vec_ste(&destloc,0,vTemp); + +We can do an arbitrary non-constant value by using lvsr/perm/ste. + +//===----------------------------------------------------------------------===// + +If we want to tie instruction selection into the scheduler, we can do some +constant formation with different instructions. For example, we can generate +"vsplti -1" with "vcmpequw R,R" and 1,1,1,1 with "vsubcuw R,R", and 0,0,0,0 with +"vsplti 0" or "vxor", each of which use different execution units, thus could +help scheduling. + +This is probably only reasonable for a post-pass scheduler. + +//===----------------------------------------------------------------------===// + +For this function: + +void test(vector float *A, vector float *B) { + vector float C = (vector float)vec_cmpeq(*A, *B); + if (!vec_any_eq(*A, *B)) + *B = (vector float){0,0,0,0}; + *A = C; +} + +we get the following basic block: + + ... + lvx v2, 0, r4 + lvx v3, 0, r3 + vcmpeqfp v4, v3, v2 + vcmpeqfp. v2, v3, v2 + bne cr6, LBB1_2 ; cond_next + +The vcmpeqfp/vcmpeqfp. instructions currently cannot be merged when the +vcmpeqfp. result is used by a branch. This can be improved. + +//===----------------------------------------------------------------------===// + +The code generated for this is truly aweful: + +vector float test(float a, float b) { + return (vector float){ 0.0, a, 0.0, 0.0}; +} + +LCPI1_0: ; float + .space 4 + .text + .globl _test + .align 4 +_test: + mfspr r2, 256 + oris r3, r2, 4096 + mtspr 256, r3 + lis r3, ha16(LCPI1_0) + addi r4, r1, -32 + stfs f1, -16(r1) + addi r5, r1, -16 + lfs f0, lo16(LCPI1_0)(r3) + stfs f0, -32(r1) + lvx v2, 0, r4 + lvx v3, 0, r5 + vmrghw v3, v3, v2 + vspltw v2, v2, 0 + vmrghw v2, v2, v3 + mtspr 256, r2 + blr + +//===----------------------------------------------------------------------===// + +int foo(vector float *x, vector float *y) { + if (vec_all_eq(*x,*y)) return 3245; + else return 12; +} + +A predicate compare being used in a select_cc should have the same peephole +applied to it as a predicate compare used by a br_cc. There should be no +mfcr here: + +_foo: + mfspr r2, 256 + oris r5, r2, 12288 + mtspr 256, r5 + li r5, 12 + li r6, 3245 + lvx v2, 0, r4 + lvx v3, 0, r3 + vcmpeqfp. v2, v3, v2 + mfcr r3, 2 + rlwinm r3, r3, 25, 31, 31 + cmpwi cr0, r3, 0 + bne cr0, LBB1_2 ; entry +LBB1_1: ; entry + mr r6, r5 +LBB1_2: ; entry + mr r3, r6 + mtspr 256, r2 + blr + +//===----------------------------------------------------------------------===// + +CodeGen/PowerPC/vec_constants.ll has an and operation that should be +codegen'd to andc. The issue is that the 'all ones' build vector is +SelectNodeTo'd a VSPLTISB instruction node before the and/xor is selected +which prevents the vnot pattern from matching. + + +//===----------------------------------------------------------------------===// + +An alternative to the store/store/load approach for illegal insert element +lowering would be: + +1. store element to any ol' slot +2. lvx the slot +3. lvsl 0; splat index; vcmpeq to generate a select mask +4. lvsl slot + x; vperm to rotate result into correct slot +5. vsel result together. + +//===----------------------------------------------------------------------===// + +Should codegen branches on vec_any/vec_all to avoid mfcr. Two examples: + +#include + int f(vector float a, vector float b) + { + int aa = 0; + if (vec_all_ge(a, b)) + aa |= 0x1; + if (vec_any_ge(a,b)) + aa |= 0x2; + return aa; +} + +vector float f(vector float a, vector float b) { + if (vec_any_eq(a, b)) + return a; + else + return b; +} + diff --git a/lib/Target/README.txt b/lib/Target/README.txt new file mode 100644 index 000000000000..f68cf0e40df0 --- /dev/null +++ b/lib/Target/README.txt @@ -0,0 +1,1679 @@ +Target Independent Opportunities: + +//===---------------------------------------------------------------------===// + +With the recent changes to make the implicit def/use set explicit in +machineinstrs, we should change the target descriptions for 'call' instructions +so that the .td files don't list all the call-clobbered registers as implicit +defs. Instead, these should be added by the code generator (e.g. on the dag). + +This has a number of uses: + +1. PPC32/64 and X86 32/64 can avoid having multiple copies of call instructions + for their different impdef sets. +2. Targets with multiple calling convs (e.g. x86) which have different clobber + sets don't need copies of call instructions. +3. 'Interprocedural register allocation' can be done to reduce the clobber sets + of calls. + +//===---------------------------------------------------------------------===// + +Make the PPC branch selector target independant + +//===---------------------------------------------------------------------===// + +Get the C front-end to expand hypot(x,y) -> llvm.sqrt(x*x+y*y) when errno and +precision don't matter (ffastmath). Misc/mandel will like this. :) This isn't +safe in general, even on darwin. See the libm implementation of hypot for +examples (which special case when x/y are exactly zero to get signed zeros etc +right). + +//===---------------------------------------------------------------------===// + +Solve this DAG isel folding deficiency: + +int X, Y; + +void fn1(void) +{ + X = X | (Y << 3); +} + +compiles to + +fn1: + movl Y, %eax + shll $3, %eax + orl X, %eax + movl %eax, X + ret + +The problem is the store's chain operand is not the load X but rather +a TokenFactor of the load X and load Y, which prevents the folding. + +There are two ways to fix this: + +1. The dag combiner can start using alias analysis to realize that y/x + don't alias, making the store to X not dependent on the load from Y. +2. The generated isel could be made smarter in the case it can't + disambiguate the pointers. + +Number 1 is the preferred solution. + +This has been "fixed" by a TableGen hack. But that is a short term workaround +which will be removed once the proper fix is made. + +//===---------------------------------------------------------------------===// + +On targets with expensive 64-bit multiply, we could LSR this: + +for (i = ...; ++i) { + x = 1ULL << i; + +into: + long long tmp = 1; + for (i = ...; ++i, tmp+=tmp) + x = tmp; + +This would be a win on ppc32, but not x86 or ppc64. + +//===---------------------------------------------------------------------===// + +Shrink: (setlt (loadi32 P), 0) -> (setlt (loadi8 Phi), 0) + +//===---------------------------------------------------------------------===// + +Reassociate should turn: X*X*X*X -> t=(X*X) (t*t) to eliminate a multiply. + +//===---------------------------------------------------------------------===// + +Interesting? testcase for add/shift/mul reassoc: + +int bar(int x, int y) { + return x*x*x+y+x*x*x*x*x*y*y*y*y; +} +int foo(int z, int n) { + return bar(z, n) + bar(2*z, 2*n); +} + +Reassociate should handle the example in GCC PR16157. + +//===---------------------------------------------------------------------===// + +These two functions should generate the same code on big-endian systems: + +int g(int *j,int *l) { return memcmp(j,l,4); } +int h(int *j, int *l) { return *j - *l; } + +this could be done in SelectionDAGISel.cpp, along with other special cases, +for 1,2,4,8 bytes. + +//===---------------------------------------------------------------------===// + +It would be nice to revert this patch: +http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20060213/031986.html + +And teach the dag combiner enough to simplify the code expanded before +legalize. It seems plausible that this knowledge would let it simplify other +stuff too. + +//===---------------------------------------------------------------------===// + +For vector types, TargetData.cpp::getTypeInfo() returns alignment that is equal +to the type size. It works but can be overly conservative as the alignment of +specific vector types are target dependent. + +//===---------------------------------------------------------------------===// + +We should produce an unaligned load from code like this: + +v4sf example(float *P) { + return (v4sf){P[0], P[1], P[2], P[3] }; +} + +//===---------------------------------------------------------------------===// + +Add support for conditional increments, and other related patterns. Instead +of: + + movl 136(%esp), %eax + cmpl $0, %eax + je LBB16_2 #cond_next +LBB16_1: #cond_true + incl _foo +LBB16_2: #cond_next + +emit: + movl _foo, %eax + cmpl $1, %edi + sbbl $-1, %eax + movl %eax, _foo + +//===---------------------------------------------------------------------===// + +Combine: a = sin(x), b = cos(x) into a,b = sincos(x). + +Expand these to calls of sin/cos and stores: + double sincos(double x, double *sin, double *cos); + float sincosf(float x, float *sin, float *cos); + long double sincosl(long double x, long double *sin, long double *cos); + +Doing so could allow SROA of the destination pointers. See also: +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=17687 + +This is now easily doable with MRVs. We could even make an intrinsic for this +if anyone cared enough about sincos. + +//===---------------------------------------------------------------------===// + +Turn this into a single byte store with no load (the other 3 bytes are +unmodified): + +define void @test(i32* %P) { + %tmp = load i32* %P + %tmp14 = or i32 %tmp, 3305111552 + %tmp15 = and i32 %tmp14, 3321888767 + store i32 %tmp15, i32* %P + ret void +} + +//===---------------------------------------------------------------------===// + +dag/inst combine "clz(x)>>5 -> x==0" for 32-bit x. + +Compile: + +int bar(int x) +{ + int t = __builtin_clz(x); + return -(t>>5); +} + +to: + +_bar: addic r3,r3,-1 + subfe r3,r3,r3 + blr + +//===---------------------------------------------------------------------===// + +Legalize should lower ctlz like this: + ctlz(x) = popcnt((x-1) & ~x) + +on targets that have popcnt but not ctlz. itanium, what else? + +//===---------------------------------------------------------------------===// + +quantum_sigma_x in 462.libquantum contains the following loop: + + for(i=0; isize; i++) + { + /* Flip the target bit of each basis state */ + reg->node[i].state ^= ((MAX_UNSIGNED) 1 << target); + } + +Where MAX_UNSIGNED/state is a 64-bit int. On a 32-bit platform it would be just +so cool to turn it into something like: + + long long Res = ((MAX_UNSIGNED) 1 << target); + if (target < 32) { + for(i=0; isize; i++) + reg->node[i].state ^= Res & 0xFFFFFFFFULL; + } else { + for(i=0; isize; i++) + reg->node[i].state ^= Res & 0xFFFFFFFF00000000ULL + } + +... which would only do one 32-bit XOR per loop iteration instead of two. + +It would also be nice to recognize the reg->size doesn't alias reg->node[i], but +alas... + +//===---------------------------------------------------------------------===// + +This isn't recognized as bswap by instcombine (yes, it really is bswap): + +unsigned long reverse(unsigned v) { + unsigned t; + t = v ^ ((v << 16) | (v >> 16)); + t &= ~0xff0000; + v = (v << 24) | (v >> 8); + return v ^ (t >> 8); +} + +//===---------------------------------------------------------------------===// + +These idioms should be recognized as popcount (see PR1488): + +unsigned countbits_slow(unsigned v) { + unsigned c; + for (c = 0; v; v >>= 1) + c += v & 1; + return c; +} +unsigned countbits_fast(unsigned v){ + unsigned c; + for (c = 0; v; c++) + v &= v - 1; // clear the least significant bit set + return c; +} + +BITBOARD = unsigned long long +int PopCnt(register BITBOARD a) { + register int c=0; + while(a) { + c++; + a &= a - 1; + } + return c; +} +unsigned int popcount(unsigned int input) { + unsigned int count = 0; + for (unsigned int i = 0; i < 4 * 8; i++) + count += (input >> i) & i; + return count; +} + +//===---------------------------------------------------------------------===// + +These should turn into single 16-bit (unaligned?) loads on little/big endian +processors. + +unsigned short read_16_le(const unsigned char *adr) { + return adr[0] | (adr[1] << 8); +} +unsigned short read_16_be(const unsigned char *adr) { + return (adr[0] << 8) | adr[1]; +} + +//===---------------------------------------------------------------------===// + +-instcombine should handle this transform: + icmp pred (sdiv X / C1 ), C2 +when X, C1, and C2 are unsigned. Similarly for udiv and signed operands. + +Currently InstCombine avoids this transform but will do it when the signs of +the operands and the sign of the divide match. See the FIXME in +InstructionCombining.cpp in the visitSetCondInst method after the switch case +for Instruction::UDiv (around line 4447) for more details. + +The SingleSource/Benchmarks/Shootout-C++/hash and hash2 tests have examples of +this construct. + +//===---------------------------------------------------------------------===// + +viterbi speeds up *significantly* if the various "history" related copy loops +are turned into memcpy calls at the source level. We need a "loops to memcpy" +pass. + +//===---------------------------------------------------------------------===// + +Consider: + +typedef unsigned U32; +typedef unsigned long long U64; +int test (U32 *inst, U64 *regs) { + U64 effective_addr2; + U32 temp = *inst; + int r1 = (temp >> 20) & 0xf; + int b2 = (temp >> 16) & 0xf; + effective_addr2 = temp & 0xfff; + if (b2) effective_addr2 += regs[b2]; + b2 = (temp >> 12) & 0xf; + if (b2) effective_addr2 += regs[b2]; + effective_addr2 &= regs[4]; + if ((effective_addr2 & 3) == 0) + return 1; + return 0; +} + +Note that only the low 2 bits of effective_addr2 are used. On 32-bit systems, +we don't eliminate the computation of the top half of effective_addr2 because +we don't have whole-function selection dags. On x86, this means we use one +extra register for the function when effective_addr2 is declared as U64 than +when it is declared U32. + +//===---------------------------------------------------------------------===// + +Promote for i32 bswap can use i64 bswap + shr. Useful on targets with 64-bit +regs and bswap, like itanium. + +//===---------------------------------------------------------------------===// + +LSR should know what GPR types a target has. This code: + +volatile short X, Y; // globals + +void foo(int N) { + int i; + for (i = 0; i < N; i++) { X = i; Y = i*4; } +} + +produces two identical IV's (after promotion) on PPC/ARM: + +LBB1_1: @bb.preheader + mov r3, #0 + mov r2, r3 + mov r1, r3 +LBB1_2: @bb + ldr r12, LCPI1_0 + ldr r12, [r12] + strh r2, [r12] + ldr r12, LCPI1_1 + ldr r12, [r12] + strh r3, [r12] + add r1, r1, #1 <- [0,+,1] + add r3, r3, #4 + add r2, r2, #1 <- [0,+,1] + cmp r1, r0 + bne LBB1_2 @bb + + +//===---------------------------------------------------------------------===// + +Tail call elim should be more aggressive, checking to see if the call is +followed by an uncond branch to an exit block. + +; This testcase is due to tail-duplication not wanting to copy the return +; instruction into the terminating blocks because there was other code +; optimized out of the function after the taildup happened. +; RUN: llvm-as < %s | opt -tailcallelim | llvm-dis | not grep call + +define i32 @t4(i32 %a) { +entry: + %tmp.1 = and i32 %a, 1 ; [#uses=1] + %tmp.2 = icmp ne i32 %tmp.1, 0 ; [#uses=1] + br i1 %tmp.2, label %then.0, label %else.0 + +then.0: ; preds = %entry + %tmp.5 = add i32 %a, -1 ; [#uses=1] + %tmp.3 = call i32 @t4( i32 %tmp.5 ) ; [#uses=1] + br label %return + +else.0: ; preds = %entry + %tmp.7 = icmp ne i32 %a, 0 ; [#uses=1] + br i1 %tmp.7, label %then.1, label %return + +then.1: ; preds = %else.0 + %tmp.11 = add i32 %a, -2 ; [#uses=1] + %tmp.9 = call i32 @t4( i32 %tmp.11 ) ; [#uses=1] + br label %return + +return: ; preds = %then.1, %else.0, %then.0 + %result.0 = phi i32 [ 0, %else.0 ], [ %tmp.3, %then.0 ], + [ %tmp.9, %then.1 ] + ret i32 %result.0 +} + +//===---------------------------------------------------------------------===// + +Tail recursion elimination is not transforming this function, because it is +returning n, which fails the isDynamicConstant check in the accumulator +recursion checks. + +long long fib(const long long n) { + switch(n) { + case 0: + case 1: + return n; + default: + return fib(n-1) + fib(n-2); + } +} + +//===---------------------------------------------------------------------===// + +Tail recursion elimination should handle: + +int pow2m1(int n) { + if (n == 0) + return 0; + return 2 * pow2m1 (n - 1) + 1; +} + +Also, multiplies can be turned into SHL's, so they should be handled as if +they were associative. "return foo() << 1" can be tail recursion eliminated. + +//===---------------------------------------------------------------------===// + +Argument promotion should promote arguments for recursive functions, like +this: + +; RUN: llvm-as < %s | opt -argpromotion | llvm-dis | grep x.val + +define internal i32 @foo(i32* %x) { +entry: + %tmp = load i32* %x ; [#uses=0] + %tmp.foo = call i32 @foo( i32* %x ) ; [#uses=1] + ret i32 %tmp.foo +} + +define i32 @bar(i32* %x) { +entry: + %tmp3 = call i32 @foo( i32* %x ) ; [#uses=1] + ret i32 %tmp3 +} + +//===---------------------------------------------------------------------===// + +"basicaa" should know how to look through "or" instructions that act like add +instructions. For example in this code, the x*4+1 is turned into x*4 | 1, and +basicaa can't analyze the array subscript, leading to duplicated loads in the +generated code: + +void test(int X, int Y, int a[]) { +int i; + for (i=2; i<1000; i+=4) { + a[i+0] = a[i-1+0]*a[i-2+0]; + a[i+1] = a[i-1+1]*a[i-2+1]; + a[i+2] = a[i-1+2]*a[i-2+2]; + a[i+3] = a[i-1+3]*a[i-2+3]; + } +} + +BasicAA also doesn't do this for add. It needs to know that &A[i+1] != &A[i]. + +//===---------------------------------------------------------------------===// + +We should investigate an instruction sinking pass. Consider this silly +example in pic mode: + +#include +void foo(int x) { + assert(x); + //... +} + +we compile this to: +_foo: + subl $28, %esp + call "L1$pb" +"L1$pb": + popl %eax + cmpl $0, 32(%esp) + je LBB1_2 # cond_true +LBB1_1: # return + # ... + addl $28, %esp + ret +LBB1_2: # cond_true +... + +The PIC base computation (call+popl) is only used on one path through the +code, but is currently always computed in the entry block. It would be +better to sink the picbase computation down into the block for the +assertion, as it is the only one that uses it. This happens for a lot of +code with early outs. + +Another example is loads of arguments, which are usually emitted into the +entry block on targets like x86. If not used in all paths through a +function, they should be sunk into the ones that do. + +In this case, whole-function-isel would also handle this. + +//===---------------------------------------------------------------------===// + +Investigate lowering of sparse switch statements into perfect hash tables: +http://burtleburtle.net/bob/hash/perfect.html + +//===---------------------------------------------------------------------===// + +We should turn things like "load+fabs+store" and "load+fneg+store" into the +corresponding integer operations. On a yonah, this loop: + +double a[256]; +void foo() { + int i, b; + for (b = 0; b < 10000000; b++) + for (i = 0; i < 256; i++) + a[i] = -a[i]; +} + +is twice as slow as this loop: + +long long a[256]; +void foo() { + int i, b; + for (b = 0; b < 10000000; b++) + for (i = 0; i < 256; i++) + a[i] ^= (1ULL << 63); +} + +and I suspect other processors are similar. On X86 in particular this is a +big win because doing this with integers allows the use of read/modify/write +instructions. + +//===---------------------------------------------------------------------===// + +DAG Combiner should try to combine small loads into larger loads when +profitable. For example, we compile this C++ example: + +struct THotKey { short Key; bool Control; bool Shift; bool Alt; }; +extern THotKey m_HotKey; +THotKey GetHotKey () { return m_HotKey; } + +into (-O3 -fno-exceptions -static -fomit-frame-pointer): + +__Z9GetHotKeyv: + pushl %esi + movl 8(%esp), %eax + movb _m_HotKey+3, %cl + movb _m_HotKey+4, %dl + movb _m_HotKey+2, %ch + movw _m_HotKey, %si + movw %si, (%eax) + movb %ch, 2(%eax) + movb %cl, 3(%eax) + movb %dl, 4(%eax) + popl %esi + ret $4 + +GCC produces: + +__Z9GetHotKeyv: + movl _m_HotKey, %edx + movl 4(%esp), %eax + movl %edx, (%eax) + movzwl _m_HotKey+4, %edx + movw %dx, 4(%eax) + ret $4 + +The LLVM IR contains the needed alignment info, so we should be able to +merge the loads and stores into 4-byte loads: + + %struct.THotKey = type { i16, i8, i8, i8 } +define void @_Z9GetHotKeyv(%struct.THotKey* sret %agg.result) nounwind { +... + %tmp2 = load i16* getelementptr (@m_HotKey, i32 0, i32 0), align 8 + %tmp5 = load i8* getelementptr (@m_HotKey, i32 0, i32 1), align 2 + %tmp8 = load i8* getelementptr (@m_HotKey, i32 0, i32 2), align 1 + %tmp11 = load i8* getelementptr (@m_HotKey, i32 0, i32 3), align 2 + +Alternatively, we should use a small amount of base-offset alias analysis +to make it so the scheduler doesn't need to hold all the loads in regs at +once. + +//===---------------------------------------------------------------------===// + +We should add an FRINT node to the DAG to model targets that have legal +implementations of ceil/floor/rint. + +//===---------------------------------------------------------------------===// + +This GCC bug: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=34043 +contains a testcase that compiles down to: + + %struct.XMM128 = type { <4 x float> } +.. + %src = alloca %struct.XMM128 +.. + %tmp6263 = bitcast %struct.XMM128* %src to <2 x i64>* + %tmp65 = getelementptr %struct.XMM128* %src, i32 0, i32 0 + store <2 x i64> %tmp5899, <2 x i64>* %tmp6263, align 16 + %tmp66 = load <4 x float>* %tmp65, align 16 + %tmp71 = add <4 x float> %tmp66, %tmp66 + +If the mid-level optimizer turned the bitcast of pointer + store of tmp5899 +into a bitcast of the vector value and a store to the pointer, then the +store->load could be easily removed. + +//===---------------------------------------------------------------------===// + +Consider: + +int test() { + long long input[8] = {1,1,1,1,1,1,1,1}; + foo(input); +} + +We currently compile this into a memcpy from a global array since the +initializer is fairly large and not memset'able. This is good, but the memcpy +gets lowered to load/stores in the code generator. This is also ok, except +that the codegen lowering for memcpy doesn't handle the case when the source +is a constant global. This gives us atrocious code like this: + + call "L1$pb" +"L1$pb": + popl %eax + movl _C.0.1444-"L1$pb"+32(%eax), %ecx + movl %ecx, 40(%esp) + movl _C.0.1444-"L1$pb"+20(%eax), %ecx + movl %ecx, 28(%esp) + movl _C.0.1444-"L1$pb"+36(%eax), %ecx + movl %ecx, 44(%esp) + movl _C.0.1444-"L1$pb"+44(%eax), %ecx + movl %ecx, 52(%esp) + movl _C.0.1444-"L1$pb"+40(%eax), %ecx + movl %ecx, 48(%esp) + movl _C.0.1444-"L1$pb"+12(%eax), %ecx + movl %ecx, 20(%esp) + movl _C.0.1444-"L1$pb"+4(%eax), %ecx +... + +instead of: + movl $1, 16(%esp) + movl $0, 20(%esp) + movl $1, 24(%esp) + movl $0, 28(%esp) + movl $1, 32(%esp) + movl $0, 36(%esp) + ... + +//===---------------------------------------------------------------------===// + +http://llvm.org/PR717: + +The following code should compile into "ret int undef". Instead, LLVM +produces "ret int 0": + +int f() { + int x = 4; + int y; + if (x == 3) y = 0; + return y; +} + +//===---------------------------------------------------------------------===// + +The loop unroller should partially unroll loops (instead of peeling them) +when code growth isn't too bad and when an unroll count allows simplification +of some code within the loop. One trivial example is: + +#include +int main() { + int nRet = 17; + int nLoop; + for ( nLoop = 0; nLoop < 1000; nLoop++ ) { + if ( nLoop & 1 ) + nRet += 2; + else + nRet -= 1; + } + return nRet; +} + +Unrolling by 2 would eliminate the '&1' in both copies, leading to a net +reduction in code size. The resultant code would then also be suitable for +exit value computation. + +//===---------------------------------------------------------------------===// + +We miss a bunch of rotate opportunities on various targets, including ppc, x86, +etc. On X86, we miss a bunch of 'rotate by variable' cases because the rotate +matching code in dag combine doesn't look through truncates aggressively +enough. Here are some testcases reduces from GCC PR17886: + +unsigned long long f(unsigned long long x, int y) { + return (x << y) | (x >> 64-y); +} +unsigned f2(unsigned x, int y){ + return (x << y) | (x >> 32-y); +} +unsigned long long f3(unsigned long long x){ + int y = 9; + return (x << y) | (x >> 64-y); +} +unsigned f4(unsigned x){ + int y = 10; + return (x << y) | (x >> 32-y); +} +unsigned long long f5(unsigned long long x, unsigned long long y) { + return (x << 8) | ((y >> 48) & 0xffull); +} +unsigned long long f6(unsigned long long x, unsigned long long y, int z) { + switch(z) { + case 1: + return (x << 8) | ((y >> 48) & 0xffull); + case 2: + return (x << 16) | ((y >> 40) & 0xffffull); + case 3: + return (x << 24) | ((y >> 32) & 0xffffffull); + case 4: + return (x << 32) | ((y >> 24) & 0xffffffffull); + default: + return (x << 40) | ((y >> 16) & 0xffffffffffull); + } +} + +On X86-64, we only handle f2/f3/f4 right. On x86-32, a few of these +generate truly horrible code, instead of using shld and friends. On +ARM, we end up with calls to L___lshrdi3/L___ashldi3 in f, which is +badness. PPC64 misses f, f5 and f6. CellSPU aborts in isel. + +//===---------------------------------------------------------------------===// + +We do a number of simplifications in simplify libcalls to strength reduce +standard library functions, but we don't currently merge them together. For +example, it is useful to merge memcpy(a,b,strlen(b)) -> strcpy. This can only +be done safely if "b" isn't modified between the strlen and memcpy of course. + +//===---------------------------------------------------------------------===// + +Reassociate should turn things like: + +int factorial(int X) { + return X*X*X*X*X*X*X*X; +} + +into llvm.powi calls, allowing the code generator to produce balanced +multiplication trees. + +//===---------------------------------------------------------------------===// + +We generate a horrible libcall for llvm.powi. For example, we compile: + +#include +double f(double a) { return std::pow(a, 4); } + +into: + +__Z1fd: + subl $12, %esp + movsd 16(%esp), %xmm0 + movsd %xmm0, (%esp) + movl $4, 8(%esp) + call L___powidf2$stub + addl $12, %esp + ret + +GCC produces: + +__Z1fd: + subl $12, %esp + movsd 16(%esp), %xmm0 + mulsd %xmm0, %xmm0 + mulsd %xmm0, %xmm0 + movsd %xmm0, (%esp) + fldl (%esp) + addl $12, %esp + ret + +//===---------------------------------------------------------------------===// + +We compile this program: (from GCC PR11680) +http://gcc.gnu.org/bugzilla/attachment.cgi?id=4487 + +Into code that runs the same speed in fast/slow modes, but both modes run 2x +slower than when compile with GCC (either 4.0 or 4.2): + +$ llvm-g++ perf.cpp -O3 -fno-exceptions +$ time ./a.out fast +1.821u 0.003s 0:01.82 100.0% 0+0k 0+0io 0pf+0w + +$ g++ perf.cpp -O3 -fno-exceptions +$ time ./a.out fast +0.821u 0.001s 0:00.82 100.0% 0+0k 0+0io 0pf+0w + +It looks like we are making the same inlining decisions, so this may be raw +codegen badness or something else (haven't investigated). + +//===---------------------------------------------------------------------===// + +We miss some instcombines for stuff like this: +void bar (void); +void foo (unsigned int a) { + /* This one is equivalent to a >= (3 << 2). */ + if ((a >> 2) >= 3) + bar (); +} + +A few other related ones are in GCC PR14753. + +//===---------------------------------------------------------------------===// + +Divisibility by constant can be simplified (according to GCC PR12849) from +being a mulhi to being a mul lo (cheaper). Testcase: + +void bar(unsigned n) { + if (n % 3 == 0) + true(); +} + +I think this basically amounts to a dag combine to simplify comparisons against +multiply hi's into a comparison against the mullo. + +//===---------------------------------------------------------------------===// + +Better mod/ref analysis for scanf would allow us to eliminate the vtable and a +bunch of other stuff from this example (see PR1604): + +#include +struct test { + int val; + virtual ~test() {} +}; + +int main() { + test t; + std::scanf("%d", &t.val); + std::printf("%d\n", t.val); +} + +//===---------------------------------------------------------------------===// + +Instcombine will merge comparisons like (x >= 10) && (x < 20) by producing (x - +10) u< 10, but only when the comparisons have matching sign. + +This could be converted with a similiar technique. (PR1941) + +define i1 @test(i8 %x) { + %A = icmp uge i8 %x, 5 + %B = icmp slt i8 %x, 20 + %C = and i1 %A, %B + ret i1 %C +} + +//===---------------------------------------------------------------------===// + +These functions perform the same computation, but produce different assembly. + +define i8 @select(i8 %x) readnone nounwind { + %A = icmp ult i8 %x, 250 + %B = select i1 %A, i8 0, i8 1 + ret i8 %B +} + +define i8 @addshr(i8 %x) readnone nounwind { + %A = zext i8 %x to i9 + %B = add i9 %A, 6 ;; 256 - 250 == 6 + %C = lshr i9 %B, 8 + %D = trunc i9 %C to i8 + ret i8 %D +} + +//===---------------------------------------------------------------------===// + +From gcc bug 24696: +int +f (unsigned long a, unsigned long b, unsigned long c) +{ + return ((a & (c - 1)) != 0) || ((b & (c - 1)) != 0); +} +int +f (unsigned long a, unsigned long b, unsigned long c) +{ + return ((a & (c - 1)) != 0) | ((b & (c - 1)) != 0); +} +Both should combine to ((a|b) & (c-1)) != 0. Currently not optimized with +"clang -emit-llvm-bc | opt -std-compile-opts". + +//===---------------------------------------------------------------------===// + +From GCC Bug 20192: +#define PMD_MASK (~((1UL << 23) - 1)) +void clear_pmd_range(unsigned long start, unsigned long end) +{ + if (!(start & ~PMD_MASK) && !(end & ~PMD_MASK)) + f(); +} +The expression should optimize to something like +"!((start|end)&~PMD_MASK). Currently not optimized with "clang +-emit-llvm-bc | opt -std-compile-opts". + +//===---------------------------------------------------------------------===// + +From GCC Bug 15241: +unsigned int +foo (unsigned int a, unsigned int b) +{ + if (a <= 7 && b <= 7) + baz (); +} +Should combine to "(a|b) <= 7". Currently not optimized with "clang +-emit-llvm-bc | opt -std-compile-opts". + +//===---------------------------------------------------------------------===// + +From GCC Bug 3756: +int +pn (int n) +{ + return (n >= 0 ? 1 : -1); +} +Should combine to (n >> 31) | 1. Currently not optimized with "clang +-emit-llvm-bc | opt -std-compile-opts | llc". + +//===---------------------------------------------------------------------===// + +From GCC Bug 28685: +int test(int a, int b) +{ + int lt = a < b; + int eq = a == b; + + return (lt || eq); +} +Should combine to "a <= b". Currently not optimized with "clang +-emit-llvm-bc | opt -std-compile-opts | llc". + +//===---------------------------------------------------------------------===// + +void a(int variable) +{ + if (variable == 4 || variable == 6) + bar(); +} +This should optimize to "if ((variable | 2) == 6)". Currently not +optimized with "clang -emit-llvm-bc | opt -std-compile-opts | llc". + +//===---------------------------------------------------------------------===// + +unsigned int f(unsigned int i, unsigned int n) {++i; if (i == n) ++i; return +i;} +unsigned int f2(unsigned int i, unsigned int n) {++i; i += i == n; return i;} +These should combine to the same thing. Currently, the first function +produces better code on X86. + +//===---------------------------------------------------------------------===// + +From GCC Bug 15784: +#define abs(x) x>0?x:-x +int f(int x, int y) +{ + return (abs(x)) >= 0; +} +This should optimize to x == INT_MIN. (With -fwrapv.) Currently not +optimized with "clang -emit-llvm-bc | opt -std-compile-opts". + +//===---------------------------------------------------------------------===// + +From GCC Bug 14753: +void +rotate_cst (unsigned int a) +{ + a = (a << 10) | (a >> 22); + if (a == 123) + bar (); +} +void +minus_cst (unsigned int a) +{ + unsigned int tem; + + tem = 20 - a; + if (tem == 5) + bar (); +} +void +mask_gt (unsigned int a) +{ + /* This is equivalent to a > 15. */ + if ((a & ~7) > 8) + bar (); +} +void +rshift_gt (unsigned int a) +{ + /* This is equivalent to a > 23. */ + if ((a >> 2) > 5) + bar (); +} +All should simplify to a single comparison. All of these are +currently not optimized with "clang -emit-llvm-bc | opt +-std-compile-opts". + +//===---------------------------------------------------------------------===// + +From GCC Bug 32605: +int c(int* x) {return (char*)x+2 == (char*)x;} +Should combine to 0. Currently not optimized with "clang +-emit-llvm-bc | opt -std-compile-opts" (although llc can optimize it). + +//===---------------------------------------------------------------------===// + +int a(unsigned char* b) {return *b > 99;} +There's an unnecessary zext in the generated code with "clang +-emit-llvm-bc | opt -std-compile-opts". + +//===---------------------------------------------------------------------===// + +int a(unsigned b) {return ((b << 31) | (b << 30)) >> 31;} +Should be combined to "((b >> 1) | b) & 1". Currently not optimized +with "clang -emit-llvm-bc | opt -std-compile-opts". + +//===---------------------------------------------------------------------===// + +unsigned a(unsigned x, unsigned y) { return x | (y & 1) | (y & 2);} +Should combine to "x | (y & 3)". Currently not optimized with "clang +-emit-llvm-bc | opt -std-compile-opts". + +//===---------------------------------------------------------------------===// + +unsigned a(unsigned a) {return ((a | 1) & 3) | (a & -4);} +Should combine to "a | 1". Currently not optimized with "clang +-emit-llvm-bc | opt -std-compile-opts". + +//===---------------------------------------------------------------------===// + +int a(int a, int b, int c) {return (~a & c) | ((c|a) & b);} +Should fold to "(~a & c) | (a & b)". Currently not optimized with +"clang -emit-llvm-bc | opt -std-compile-opts". + +//===---------------------------------------------------------------------===// + +int a(int a,int b) {return (~(a|b))|a;} +Should fold to "a|~b". Currently not optimized with "clang +-emit-llvm-bc | opt -std-compile-opts". + +//===---------------------------------------------------------------------===// + +int a(int a, int b) {return (a&&b) || (a&&!b);} +Should fold to "a". Currently not optimized with "clang -emit-llvm-bc +| opt -std-compile-opts". + +//===---------------------------------------------------------------------===// + +int a(int a, int b, int c) {return (a&&b) || (!a&&c);} +Should fold to "a ? b : c", or at least something sane. Currently not +optimized with "clang -emit-llvm-bc | opt -std-compile-opts". + +//===---------------------------------------------------------------------===// + +int a(int a, int b, int c) {return (a&&b) || (a&&c) || (a&&b&&c);} +Should fold to a && (b || c). Currently not optimized with "clang +-emit-llvm-bc | opt -std-compile-opts". + +//===---------------------------------------------------------------------===// + +int a(int x) {return x | ((x & 8) ^ 8);} +Should combine to x | 8. Currently not optimized with "clang +-emit-llvm-bc | opt -std-compile-opts". + +//===---------------------------------------------------------------------===// + +int a(int x) {return x ^ ((x & 8) ^ 8);} +Should also combine to x | 8. Currently not optimized with "clang +-emit-llvm-bc | opt -std-compile-opts". + +//===---------------------------------------------------------------------===// + +int a(int x) {return (x & 8) == 0 ? -1 : -9;} +Should combine to (x | -9) ^ 8. Currently not optimized with "clang +-emit-llvm-bc | opt -std-compile-opts". + +//===---------------------------------------------------------------------===// + +int a(int x) {return (x & 8) == 0 ? -9 : -1;} +Should combine to x | -9. Currently not optimized with "clang +-emit-llvm-bc | opt -std-compile-opts". + +//===---------------------------------------------------------------------===// + +int a(int x) {return ((x | -9) ^ 8) & x;} +Should combine to x & -9. Currently not optimized with "clang +-emit-llvm-bc | opt -std-compile-opts". + +//===---------------------------------------------------------------------===// + +unsigned a(unsigned a) {return a * 0x11111111 >> 28 & 1;} +Should combine to "a * 0x88888888 >> 31". Currently not optimized +with "clang -emit-llvm-bc | opt -std-compile-opts". + +//===---------------------------------------------------------------------===// + +unsigned a(char* x) {if ((*x & 32) == 0) return b();} +There's an unnecessary zext in the generated code with "clang +-emit-llvm-bc | opt -std-compile-opts". + +//===---------------------------------------------------------------------===// + +unsigned a(unsigned long long x) {return 40 * (x >> 1);} +Should combine to "20 * (((unsigned)x) & -2)". Currently not +optimized with "clang -emit-llvm-bc | opt -std-compile-opts". + +//===---------------------------------------------------------------------===// + +We would like to do the following transform in the instcombiner: + + -X/C -> X/-C + +However, this isn't valid if (-X) overflows. We can implement this when we +have the concept of a "C signed subtraction" operator that which is undefined +on overflow. + +//===---------------------------------------------------------------------===// + +This was noticed in the entryblock for grokdeclarator in 403.gcc: + + %tmp = icmp eq i32 %decl_context, 4 + %decl_context_addr.0 = select i1 %tmp, i32 3, i32 %decl_context + %tmp1 = icmp eq i32 %decl_context_addr.0, 1 + %decl_context_addr.1 = select i1 %tmp1, i32 0, i32 %decl_context_addr.0 + +tmp1 should be simplified to something like: + (!tmp || decl_context == 1) + +This allows recursive simplifications, tmp1 is used all over the place in +the function, e.g. by: + + %tmp23 = icmp eq i32 %decl_context_addr.1, 0 ; [#uses=1] + %tmp24 = xor i1 %tmp1, true ; [#uses=1] + %or.cond8 = and i1 %tmp23, %tmp24 ; [#uses=1] + +later. + +//===---------------------------------------------------------------------===// + +Store sinking: This code: + +void f (int n, int *cond, int *res) { + int i; + *res = 0; + for (i = 0; i < n; i++) + if (*cond) + *res ^= 234; /* (*) */ +} + +On this function GVN hoists the fully redundant value of *res, but nothing +moves the store out. This gives us this code: + +bb: ; preds = %bb2, %entry + %.rle = phi i32 [ 0, %entry ], [ %.rle6, %bb2 ] + %i.05 = phi i32 [ 0, %entry ], [ %indvar.next, %bb2 ] + %1 = load i32* %cond, align 4 + %2 = icmp eq i32 %1, 0 + br i1 %2, label %bb2, label %bb1 + +bb1: ; preds = %bb + %3 = xor i32 %.rle, 234 + store i32 %3, i32* %res, align 4 + br label %bb2 + +bb2: ; preds = %bb, %bb1 + %.rle6 = phi i32 [ %3, %bb1 ], [ %.rle, %bb ] + %indvar.next = add i32 %i.05, 1 + %exitcond = icmp eq i32 %indvar.next, %n + br i1 %exitcond, label %return, label %bb + +DSE should sink partially dead stores to get the store out of the loop. + +Here's another partial dead case: +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12395 + +//===---------------------------------------------------------------------===// + +Scalar PRE hoists the mul in the common block up to the else: + +int test (int a, int b, int c, int g) { + int d, e; + if (a) + d = b * c; + else + d = b - c; + e = b * c + g; + return d + e; +} + +It would be better to do the mul once to reduce codesize above the if. +This is GCC PR38204. + +//===---------------------------------------------------------------------===// + +GCC PR37810 is an interesting case where we should sink load/store reload +into the if block and outside the loop, so we don't reload/store it on the +non-call path. + +for () { + *P += 1; + if () + call(); + else + ... +-> +tmp = *P +for () { + tmp += 1; + if () { + *P = tmp; + call(); + tmp = *P; + } else ... +} +*P = tmp; + +We now hoist the reload after the call (Transforms/GVN/lpre-call-wrap.ll), but +we don't sink the store. We need partially dead store sinking. + +//===---------------------------------------------------------------------===// + +[PHI TRANSLATE GEPs] + +GCC PR37166: Sinking of loads prevents SROA'ing the "g" struct on the stack +leading to excess stack traffic. This could be handled by GVN with some crazy +symbolic phi translation. The code we get looks like (g is on the stack): + +bb2: ; preds = %bb1 +.. + %9 = getelementptr %struct.f* %g, i32 0, i32 0 + store i32 %8, i32* %9, align bel %bb3 + +bb3: ; preds = %bb1, %bb2, %bb + %c_addr.0 = phi %struct.f* [ %g, %bb2 ], [ %c, %bb ], [ %c, %bb1 ] + %b_addr.0 = phi %struct.f* [ %b, %bb2 ], [ %g, %bb ], [ %b, %bb1 ] + %10 = getelementptr %struct.f* %c_addr.0, i32 0, i32 0 + %11 = load i32* %10, align 4 + +%11 is fully redundant, an in BB2 it should have the value %8. + +GCC PR33344 is a similar case. + +//===---------------------------------------------------------------------===// + +There are many load PRE testcases in testsuite/gcc.dg/tree-ssa/loadpre* in the +GCC testsuite. There are many pre testcases as ssa-pre-*.c + +//===---------------------------------------------------------------------===// + +There are some interesting cases in testsuite/gcc.dg/tree-ssa/pred-comm* in the +GCC testsuite. For example, predcom-1.c is: + + for (i = 2; i < 1000; i++) + fib[i] = (fib[i-1] + fib[i - 2]) & 0xffff; + +which compiles into: + +bb1: ; preds = %bb1, %bb1.thread + %indvar = phi i32 [ 0, %bb1.thread ], [ %0, %bb1 ] + %i.0.reg2mem.0 = add i32 %indvar, 2 + %0 = add i32 %indvar, 1 ; [#uses=3] + %1 = getelementptr [1000 x i32]* @fib, i32 0, i32 %0 + %2 = load i32* %1, align 4 ; [#uses=1] + %3 = getelementptr [1000 x i32]* @fib, i32 0, i32 %indvar + %4 = load i32* %3, align 4 ; [#uses=1] + %5 = add i32 %4, %2 ; [#uses=1] + %6 = and i32 %5, 65535 ; [#uses=1] + %7 = getelementptr [1000 x i32]* @fib, i32 0, i32 %i.0.reg2mem.0 + store i32 %6, i32* %7, align 4 + %exitcond = icmp eq i32 %0, 998 ; [#uses=1] + br i1 %exitcond, label %return, label %bb1 + +This is basically: + LOAD fib[i+1] + LOAD fib[i] + STORE fib[i+2] + +instead of handling this as a loop or other xform, all we'd need to do is teach +load PRE to phi translate the %0 add (i+1) into the predecessor as (i'+1+1) = +(i'+2) (where i' is the previous iteration of i). This would find the store +which feeds it. + +predcom-2.c is apparently the same as predcom-1.c +predcom-3.c is very similar but needs loads feeding each other instead of +store->load. +predcom-4.c seems the same as the rest. + + +//===---------------------------------------------------------------------===// + +Other simple load PRE cases: +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=35287 [LPRE crit edge splitting] + +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=34677 (licm does this, LPRE crit edge) + llvm-gcc t2.c -S -o - -O0 -emit-llvm | llvm-as | opt -mem2reg -simplifycfg -gvn | llvm-dis + +//===---------------------------------------------------------------------===// + +Type based alias analysis: +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=14705 + +//===---------------------------------------------------------------------===// + +When GVN/PRE finds a store of float* to a must aliases pointer when expecting +an int*, it should turn it into a bitcast. This is a nice generalization of +the SROA hack that would apply to other cases, e.g.: + +int foo(int C, int *P, float X) { + if (C) { + bar(); + *P = 42; + } else + *(float*)P = X; + + return *P; +} + + +One example (that requires crazy phi translation) is: +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=16799 [BITCAST PHI TRANS] + +//===---------------------------------------------------------------------===// + +A/B get pinned to the stack because we turn an if/then into a select instead +of PRE'ing the load/store. This may be fixable in instcombine: +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=37892 + + + +Interesting missed case because of control flow flattening (should be 2 loads): +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=26629 +With: llvm-gcc t2.c -S -o - -O0 -emit-llvm | llvm-as | + opt -mem2reg -gvn -instcombine | llvm-dis +we miss it because we need 1) GEP PHI TRAN, 2) CRIT EDGE 3) MULTIPLE DIFFERENT +VALS PRODUCED BY ONE BLOCK OVER DIFFERENT PATHS + +//===---------------------------------------------------------------------===// + +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19633 +We could eliminate the branch condition here, loading from null is undefined: + +struct S { int w, x, y, z; }; +struct T { int r; struct S s; }; +void bar (struct S, int); +void foo (int a, struct T b) +{ + struct S *c = 0; + if (a) + c = &b.s; + bar (*c, a); +} + +//===---------------------------------------------------------------------===// + +simplifylibcalls should do several optimizations for strspn/strcspn: + +strcspn(x, "") -> strlen(x) +strcspn("", x) -> 0 +strspn("", x) -> 0 +strspn(x, "") -> strlen(x) +strspn(x, "a") -> strchr(x, 'a')-x + +strcspn(x, "a") -> inlined loop for up to 3 letters (similarly for strspn): + +size_t __strcspn_c3 (__const char *__s, int __reject1, int __reject2, + int __reject3) { + register size_t __result = 0; + while (__s[__result] != '\0' && __s[__result] != __reject1 && + __s[__result] != __reject2 && __s[__result] != __reject3) + ++__result; + return __result; +} + +This should turn into a switch on the character. See PR3253 for some notes on +codegen. + +456.hmmer apparently uses strcspn and strspn a lot. 471.omnetpp uses strspn. + +//===---------------------------------------------------------------------===// + +"gas" uses this idiom: + else if (strchr ("+-/*%|&^:[]()~", *intel_parser.op_string)) +.. + else if (strchr ("<>", *intel_parser.op_string) + +Those should be turned into a switch. + +//===---------------------------------------------------------------------===// + +252.eon contains this interesting code: + + %3072 = getelementptr [100 x i8]* %tempString, i32 0, i32 0 + %3073 = call i8* @strcpy(i8* %3072, i8* %3071) nounwind + %strlen = call i32 @strlen(i8* %3072) ; uses = 1 + %endptr = getelementptr [100 x i8]* %tempString, i32 0, i32 %strlen + call void @llvm.memcpy.i32(i8* %endptr, + i8* getelementptr ([5 x i8]* @"\01LC42", i32 0, i32 0), i32 5, i32 1) + %3074 = call i32 @strlen(i8* %endptr) nounwind readonly + +This is interesting for a couple reasons. First, in this: + + %3073 = call i8* @strcpy(i8* %3072, i8* %3071) nounwind + %strlen = call i32 @strlen(i8* %3072) + +The strlen could be replaced with: %strlen = sub %3072, %3073, because the +strcpy call returns a pointer to the end of the string. Based on that, the +endptr GEP just becomes equal to 3073, which eliminates a strlen call and GEP. + +Second, the memcpy+strlen strlen can be replaced with: + + %3074 = call i32 @strlen([5 x i8]* @"\01LC42") nounwind readonly + +Because the destination was just copied into the specified memory buffer. This, +in turn, can be constant folded to "4". + +In other code, it contains: + + %endptr6978 = bitcast i8* %endptr69 to i32* + store i32 7107374, i32* %endptr6978, align 1 + %3167 = call i32 @strlen(i8* %endptr69) nounwind readonly + +Which could also be constant folded. Whatever is producing this should probably +be fixed to leave this as a memcpy from a string. + +Further, eon also has an interesting partially redundant strlen call: + +bb8: ; preds = %_ZN18eonImageCalculatorC1Ev.exit + %682 = getelementptr i8** %argv, i32 6 ; [#uses=2] + %683 = load i8** %682, align 4 ; [#uses=4] + %684 = load i8* %683, align 1 ; [#uses=1] + %685 = icmp eq i8 %684, 0 ; [#uses=1] + br i1 %685, label %bb10, label %bb9 + +bb9: ; preds = %bb8 + %686 = call i32 @strlen(i8* %683) nounwind readonly + %687 = icmp ugt i32 %686, 254 ; [#uses=1] + br i1 %687, label %bb10, label %bb11 + +bb10: ; preds = %bb9, %bb8 + %688 = call i32 @strlen(i8* %683) nounwind readonly + +This could be eliminated by doing the strlen once in bb8, saving code size and +improving perf on the bb8->9->10 path. + +//===---------------------------------------------------------------------===// + +I see an interesting fully redundant call to strlen left in 186.crafty:InputMove +which looks like: + %movetext11 = getelementptr [128 x i8]* %movetext, i32 0, i32 0 + + +bb62: ; preds = %bb55, %bb53 + %promote.0 = phi i32 [ %169, %bb55 ], [ 0, %bb53 ] + %171 = call i32 @strlen(i8* %movetext11) nounwind readonly align 1 + %172 = add i32 %171, -1 ; [#uses=1] + %173 = getelementptr [128 x i8]* %movetext, i32 0, i32 %172 + +... no stores ... + br i1 %or.cond, label %bb65, label %bb72 + +bb65: ; preds = %bb62 + store i8 0, i8* %173, align 1 + br label %bb72 + +bb72: ; preds = %bb65, %bb62 + %trank.1 = phi i32 [ %176, %bb65 ], [ -1, %bb62 ] + %177 = call i32 @strlen(i8* %movetext11) nounwind readonly align 1 + +Note that on the bb62->bb72 path, that the %177 strlen call is partially +redundant with the %171 call. At worst, we could shove the %177 strlen call +up into the bb65 block moving it out of the bb62->bb72 path. However, note +that bb65 stores to the string, zeroing out the last byte. This means that on +that path the value of %177 is actually just %171-1. A sub is cheaper than a +strlen! + +This pattern repeats several times, basically doing: + + A = strlen(P); + P[A-1] = 0; + B = strlen(P); + where it is "obvious" that B = A-1. + +//===---------------------------------------------------------------------===// + +186.crafty contains this interesting pattern: + +%77 = call i8* @strstr(i8* getelementptr ([6 x i8]* @"\01LC5", i32 0, i32 0), + i8* %30) +%phitmp648 = icmp eq i8* %77, getelementptr ([6 x i8]* @"\01LC5", i32 0, i32 0) +br i1 %phitmp648, label %bb70, label %bb76 + +bb70: ; preds = %OptionMatch.exit91, %bb69 + %78 = call i32 @strlen(i8* %30) nounwind readonly align 1 ; [#uses=1] + +This is basically: + cststr = "abcdef"; + if (strstr(cststr, P) == cststr) { + x = strlen(P); + ... + +The strstr call would be significantly cheaper written as: + +cststr = "abcdef"; +if (memcmp(P, str, strlen(P))) + x = strlen(P); + +This is memcmp+strlen instead of strstr. This also makes the strlen fully +redundant. + +//===---------------------------------------------------------------------===// + +186.crafty also contains this code: + +%1906 = call i32 @strlen(i8* getelementptr ([32 x i8]* @pgn_event, i32 0,i32 0)) +%1907 = getelementptr [32 x i8]* @pgn_event, i32 0, i32 %1906 +%1908 = call i8* @strcpy(i8* %1907, i8* %1905) nounwind align 1 +%1909 = call i32 @strlen(i8* getelementptr ([32 x i8]* @pgn_event, i32 0,i32 0)) +%1910 = getelementptr [32 x i8]* @pgn_event, i32 0, i32 %1909 + +The last strlen is computable as 1908-@pgn_event, which means 1910=1908. + +//===---------------------------------------------------------------------===// + +186.crafty has this interesting pattern with the "out.4543" variable: + +call void @llvm.memcpy.i32( + i8* getelementptr ([10 x i8]* @out.4543, i32 0, i32 0), + i8* getelementptr ([7 x i8]* @"\01LC28700", i32 0, i32 0), i32 7, i32 1) +%101 = call@printf(i8* ... @out.4543, i32 0, i32 0)) nounwind + +It is basically doing: + + memcpy(globalarray, "string"); + printf(..., globalarray); + +Anyway, by knowing that printf just reads the memory and forward substituting +the string directly into the printf, this eliminates reads from globalarray. +Since this pattern occurs frequently in crafty (due to the "DisplayTime" and +other similar functions) there are many stores to "out". Once all the printfs +stop using "out", all that is left is the memcpy's into it. This should allow +globalopt to remove the "stored only" global. + +//===---------------------------------------------------------------------===// + +This code: + +define inreg i32 @foo(i8* inreg %p) nounwind { + %tmp0 = load i8* %p + %tmp1 = ashr i8 %tmp0, 5 + %tmp2 = sext i8 %tmp1 to i32 + ret i32 %tmp2 +} + +could be dagcombine'd to a sign-extending load with a shift. +For example, on x86 this currently gets this: + + movb (%eax), %al + sarb $5, %al + movsbl %al, %eax + +while it could get this: + + movsbl (%eax), %eax + sarl $5, %eax + +//===---------------------------------------------------------------------===// + +GCC PR31029: + +int test(int x) { return 1-x == x; } // --> return false +int test2(int x) { return 2-x == x; } // --> return x == 1 ? + +Always foldable for odd constants, what is the rule for even? + +//===---------------------------------------------------------------------===// + +PR 3381: GEP to field of size 0 inside a struct could be turned into GEP +for next field in struct (which is at same address). + +For example: store of float into { {{}}, float } could be turned into a store to +the float directly. + +//===---------------------------------------------------------------------===// + +#include +double foo(double a) { return sin(a); } + +This compiles into this on x86-64 Linux: +foo: + subq $8, %rsp + call sin + addq $8, %rsp + ret +vs: + +foo: + jmp sin + +//===---------------------------------------------------------------------===// + +The arg promotion pass should make use of nocapture to make its alias analysis +stuff much more precise. + +//===---------------------------------------------------------------------===// + +The following functions should be optimized to use a select instead of a +branch (from gcc PR40072): + +char char_int(int m) {if(m>7) return 0; return m;} +int int_char(char m) {if(m>7) return 0; return m;} + +//===---------------------------------------------------------------------===// + +Instcombine should replace the load with a constant in: + + static const char x[4] = {'a', 'b', 'c', 'd'}; + + unsigned int y(void) { + return *(unsigned int *)x; + } + +It currently only does this transformation when the size of the constant +is the same as the size of the integer (so, try x[5]) and the last byte +is a null (making it a C string). There's no need for these restrictions. + +//===---------------------------------------------------------------------===// + +InstCombine's "turn load from constant into constant" optimization should be +more aggressive in the presence of bitcasts. For example, because of unions, +this code: + +union vec2d { + double e[2]; + double v __attribute__((vector_size(16))); +}; +typedef union vec2d vec2d; + +static vec2d a={{1,2}}, b={{3,4}}; + +vec2d foo () { + return (vec2d){ .v = a.v + b.v * (vec2d){{5,5}}.v }; +} + +Compiles into: + +@a = internal constant %0 { [2 x double] + [double 1.000000e+00, double 2.000000e+00] }, align 16 +@b = internal constant %0 { [2 x double] + [double 3.000000e+00, double 4.000000e+00] }, align 16 +... +define void @foo(%struct.vec2d* noalias nocapture sret %agg.result) nounwind { +entry: + %0 = load <2 x double>* getelementptr (%struct.vec2d* + bitcast (%0* @a to %struct.vec2d*), i32 0, i32 0), align 16 + %1 = load <2 x double>* getelementptr (%struct.vec2d* + bitcast (%0* @b to %struct.vec2d*), i32 0, i32 0), align 16 + + +Instcombine should be able to optimize away the loads (and thus the globals). + + +//===---------------------------------------------------------------------===// diff --git a/lib/Target/Sparc/AsmPrinter/CMakeLists.txt b/lib/Target/Sparc/AsmPrinter/CMakeLists.txt new file mode 100644 index 000000000000..394b4cd40e76 --- /dev/null +++ b/lib/Target/Sparc/AsmPrinter/CMakeLists.txt @@ -0,0 +1,9 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_partially_linked_object(LLVMSparcAsmPrinter + SparcAsmPrinter.cpp + ) + +target_name_of_partially_linked_object(LLVMSparcCodeGen n) + +add_dependencies(LLVMSparcAsmPrinter ${n}) diff --git a/lib/Target/Sparc/AsmPrinter/Makefile b/lib/Target/Sparc/AsmPrinter/Makefile new file mode 100644 index 000000000000..f12a6ac39891 --- /dev/null +++ b/lib/Target/Sparc/AsmPrinter/Makefile @@ -0,0 +1,15 @@ +##===- lib/Target/Sparc/Makefile ---------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../../.. +LIBRARYNAME = LLVMSparcAsmPrinter + +# Hack: we need to include 'main' Sparc target directory to grab private headers +CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp b/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp new file mode 100644 index 000000000000..61707f5556fb --- /dev/null +++ b/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp @@ -0,0 +1,355 @@ +//===-- SparcAsmPrinter.cpp - Sparc LLVM assembly writer ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a printer that converts from our internal representation +// of machine-dependent LLVM code to GAS-format SPARC assembly language. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "asm-printer" +#include "Sparc.h" +#include "SparcInstrInfo.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Module.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/DwarfWriter.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/Mangler.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/MathExtras.h" +#include +#include +#include +using namespace llvm; + +STATISTIC(EmittedInsts, "Number of machine instrs printed"); + +namespace { + class VISIBILITY_HIDDEN SparcAsmPrinter : public AsmPrinter { + /// We name each basic block in a Function with a unique number, so + /// that we can consistently refer to them later. This is cleared + /// at the beginning of each call to runOnMachineFunction(). + /// + typedef std::map ValueMapTy; + ValueMapTy NumberForBB; + public: + explicit SparcAsmPrinter(raw_ostream &O, TargetMachine &TM, + const TargetAsmInfo *T, CodeGenOpt::Level OL, + bool V) + : AsmPrinter(O, TM, T, OL, V) {} + + virtual const char *getPassName() const { + return "Sparc Assembly Printer"; + } + + void printModuleLevelGV(const GlobalVariable* GVar); + void printOperand(const MachineInstr *MI, int opNum); + void printMemOperand(const MachineInstr *MI, int opNum, + const char *Modifier = 0); + void printCCOperand(const MachineInstr *MI, int opNum); + + bool printInstruction(const MachineInstr *MI); // autogenerated. + bool runOnMachineFunction(MachineFunction &F); + bool doInitialization(Module &M); + bool doFinalization(Module &M); + bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode); + bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode); + }; +} // end of anonymous namespace + +#include "SparcGenAsmWriter.inc" + +/// createSparcCodePrinterPass - Returns a pass that prints the SPARC +/// assembly code for a MachineFunction to the given output stream, +/// using the given target machine description. This should work +/// regardless of whether the function is in SSA form. +/// +FunctionPass *llvm::createSparcCodePrinterPass(raw_ostream &o, + TargetMachine &tm, + CodeGenOpt::Level OptLevel, + bool verbose) { + return new SparcAsmPrinter(o, tm, tm.getTargetAsmInfo(), OptLevel, verbose); +} + +/// runOnMachineFunction - This uses the printInstruction() +/// method to print assembly for each instruction. +/// +bool SparcAsmPrinter::runOnMachineFunction(MachineFunction &MF) { + this->MF = &MF; + + SetupMachineFunction(MF); + + // Print out constants referenced by the function + EmitConstantPool(MF.getConstantPool()); + + // BBNumber is used here so that a given Printer will never give two + // BBs the same name. (If you have a better way, please let me know!) + static unsigned BBNumber = 0; + + O << "\n\n"; + + // Print out the label for the function. + const Function *F = MF.getFunction(); + SwitchToSection(TAI->SectionForGlobal(F)); + EmitAlignment(4, F); + O << "\t.globl\t" << CurrentFnName << '\n'; + + printVisibility(CurrentFnName, F->getVisibility()); + + O << "\t.type\t" << CurrentFnName << ", #function\n"; + O << CurrentFnName << ":\n"; + + // Number each basic block so that we can consistently refer to them + // in PC-relative references. + // FIXME: Why not use the MBB numbers? + NumberForBB.clear(); + for (MachineFunction::const_iterator I = MF.begin(), E = MF.end(); + I != E; ++I) { + NumberForBB[I->getBasicBlock()] = BBNumber++; + } + + // Print out code for the function. + for (MachineFunction::const_iterator I = MF.begin(), E = MF.end(); + I != E; ++I) { + // Print a label for the basic block. + if (I != MF.begin()) { + printBasicBlockLabel(I, true, true); + O << '\n'; + } + for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end(); + II != E; ++II) { + // Print the assembly for the instruction. + printInstruction(II); + ++EmittedInsts; + } + } + + // We didn't modify anything. + return false; +} + +void SparcAsmPrinter::printOperand(const MachineInstr *MI, int opNum) { + const MachineOperand &MO = MI->getOperand (opNum); + const TargetRegisterInfo &RI = *TM.getRegisterInfo(); + bool CloseParen = false; + if (MI->getOpcode() == SP::SETHIi && !MO.isReg() && !MO.isImm()) { + O << "%hi("; + CloseParen = true; + } else if ((MI->getOpcode() == SP::ORri || MI->getOpcode() == SP::ADDri) && + !MO.isReg() && !MO.isImm()) { + O << "%lo("; + CloseParen = true; + } + switch (MO.getType()) { + case MachineOperand::MO_Register: + if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) + O << "%" << LowercaseString (RI.get(MO.getReg()).AsmName); + else + O << "%reg" << MO.getReg(); + break; + + case MachineOperand::MO_Immediate: + O << (int)MO.getImm(); + break; + case MachineOperand::MO_MachineBasicBlock: + printBasicBlockLabel(MO.getMBB()); + return; + case MachineOperand::MO_GlobalAddress: + { + const GlobalValue *GV = MO.getGlobal(); + O << Mang->getValueName(GV); + } + break; + case MachineOperand::MO_ExternalSymbol: + O << MO.getSymbolName(); + break; + case MachineOperand::MO_ConstantPoolIndex: + O << TAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << "_" + << MO.getIndex(); + break; + default: + O << ""; abort (); break; + } + if (CloseParen) O << ")"; +} + +void SparcAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum, + const char *Modifier) { + printOperand(MI, opNum); + + // If this is an ADD operand, emit it like normal operands. + if (Modifier && !strcmp(Modifier, "arith")) { + O << ", "; + printOperand(MI, opNum+1); + return; + } + + if (MI->getOperand(opNum+1).isReg() && + MI->getOperand(opNum+1).getReg() == SP::G0) + return; // don't print "+%g0" + if (MI->getOperand(opNum+1).isImm() && + MI->getOperand(opNum+1).getImm() == 0) + return; // don't print "+0" + + O << "+"; + if (MI->getOperand(opNum+1).isGlobal() || + MI->getOperand(opNum+1).isCPI()) { + O << "%lo("; + printOperand(MI, opNum+1); + O << ")"; + } else { + printOperand(MI, opNum+1); + } +} + +void SparcAsmPrinter::printCCOperand(const MachineInstr *MI, int opNum) { + int CC = (int)MI->getOperand(opNum).getImm(); + O << SPARCCondCodeToString((SPCC::CondCodes)CC); +} + +bool SparcAsmPrinter::doInitialization(Module &M) { + Mang = new Mangler(M, "", TAI->getPrivateGlobalPrefix()); + return false; // success +} + +bool SparcAsmPrinter::doFinalization(Module &M) { + // Print out module-level global variables here. + for (Module::const_global_iterator I = M.global_begin(), E = M.global_end(); + I != E; ++I) + printModuleLevelGV(I); + + O << '\n'; + + return AsmPrinter::doFinalization(M); +} + +void SparcAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) { + const TargetData *TD = TM.getTargetData(); + + if (!GVar->hasInitializer()) + return; // External global require no code + + // Check to see if this is a special global used by LLVM, if so, emit it. + if (EmitSpecialLLVMGlobal(GVar)) + return; + + O << "\n\n"; + std::string name = Mang->getValueName(GVar); + Constant *C = GVar->getInitializer(); + unsigned Size = TD->getTypeAllocSize(C->getType()); + unsigned Align = TD->getPreferredAlignment(GVar); + + printVisibility(name, GVar->getVisibility()); + + SwitchToSection(TAI->SectionForGlobal(GVar)); + + if (C->isNullValue() && !GVar->hasSection()) { + if (!GVar->isThreadLocal() && + (GVar->hasLocalLinkage() || GVar->isWeakForLinker())) { + if (Size == 0) Size = 1; // .comm Foo, 0 is undefined, avoid it. + + if (GVar->hasLocalLinkage()) + O << "\t.local " << name << '\n'; + + O << TAI->getCOMMDirective() << name << ',' << Size; + if (TAI->getCOMMDirectiveTakesAlignment()) + O << ',' << (1 << Align); + + O << '\n'; + return; + } + } + + switch (GVar->getLinkage()) { + case GlobalValue::CommonLinkage: + case GlobalValue::LinkOnceAnyLinkage: + case GlobalValue::LinkOnceODRLinkage: + case GlobalValue::WeakAnyLinkage: // FIXME: Verify correct for weak. + case GlobalValue::WeakODRLinkage: // FIXME: Verify correct for weak. + // Nonnull linkonce -> weak + O << "\t.weak " << name << '\n'; + break; + case GlobalValue::AppendingLinkage: + // FIXME: appending linkage variables should go into a section of + // their name or something. For now, just emit them as external. + case GlobalValue::ExternalLinkage: + // If external or appending, declare as a global symbol + O << TAI->getGlobalDirective() << name << '\n'; + // FALL THROUGH + case GlobalValue::PrivateLinkage: + case GlobalValue::InternalLinkage: + break; + case GlobalValue::GhostLinkage: + cerr << "Should not have any unmaterialized functions!\n"; + abort(); + case GlobalValue::DLLImportLinkage: + cerr << "DLLImport linkage is not supported by this target!\n"; + abort(); + case GlobalValue::DLLExportLinkage: + cerr << "DLLExport linkage is not supported by this target!\n"; + abort(); + default: + assert(0 && "Unknown linkage type!"); + } + + EmitAlignment(Align, GVar); + + if (TAI->hasDotTypeDotSizeDirective()) { + O << "\t.type " << name << ",#object\n"; + O << "\t.size " << name << ',' << Size << '\n'; + } + + O << name << ":\n"; + EmitGlobalConstant(C); +} + +/// PrintAsmOperand - Print out an operand for an inline asm expression. +/// +bool SparcAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, + const char *ExtraCode) { + if (ExtraCode && ExtraCode[0]) { + if (ExtraCode[1] != 0) return true; // Unknown modifier. + + switch (ExtraCode[0]) { + default: return true; // Unknown modifier. + case 'r': + break; + } + } + + printOperand(MI, OpNo); + + return false; +} + +bool SparcAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, + unsigned OpNo, + unsigned AsmVariant, + const char *ExtraCode) { + if (ExtraCode && ExtraCode[0]) + return true; // Unknown modifier + + O << '['; + printMemOperand(MI, OpNo); + O << ']'; + + return false; +} diff --git a/lib/Target/Sparc/CMakeLists.txt b/lib/Target/Sparc/CMakeLists.txt new file mode 100644 index 000000000000..eefa7e8f4d9a --- /dev/null +++ b/lib/Target/Sparc/CMakeLists.txt @@ -0,0 +1,23 @@ +set(LLVM_TARGET_DEFINITIONS Sparc.td) + +tablegen(SparcGenRegisterInfo.h.inc -gen-register-desc-header) +tablegen(SparcGenRegisterNames.inc -gen-register-enums) +tablegen(SparcGenRegisterInfo.inc -gen-register-desc) +tablegen(SparcGenInstrNames.inc -gen-instr-enums) +tablegen(SparcGenInstrInfo.inc -gen-instr-desc) +tablegen(SparcGenAsmWriter.inc -gen-asm-writer) +tablegen(SparcGenDAGISel.inc -gen-dag-isel) +tablegen(SparcGenSubtarget.inc -gen-subtarget) +tablegen(SparcGenCallingConv.inc -gen-callingconv) + +add_llvm_target(SparcCodeGen + DelaySlotFiller.cpp + FPMover.cpp + SparcInstrInfo.cpp + SparcISelDAGToDAG.cpp + SparcISelLowering.cpp + SparcRegisterInfo.cpp + SparcSubtarget.cpp + SparcTargetAsmInfo.cpp + SparcTargetMachine.cpp + ) diff --git a/lib/Target/Sparc/DelaySlotFiller.cpp b/lib/Target/Sparc/DelaySlotFiller.cpp new file mode 100644 index 000000000000..15b26c29872f --- /dev/null +++ b/lib/Target/Sparc/DelaySlotFiller.cpp @@ -0,0 +1,76 @@ +//===-- DelaySlotFiller.cpp - SPARC delay slot filler ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is a simple local pass that fills delay slots with NOPs. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "delayslotfiller" +#include "Sparc.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/ADT/Statistic.h" +using namespace llvm; + +STATISTIC(FilledSlots, "Number of delay slots filled"); + +namespace { + struct Filler : public MachineFunctionPass { + /// Target machine description which we query for reg. names, data + /// layout, etc. + /// + TargetMachine &TM; + const TargetInstrInfo *TII; + + static char ID; + Filler(TargetMachine &tm) + : MachineFunctionPass(&ID), TM(tm), TII(tm.getInstrInfo()) { } + + virtual const char *getPassName() const { + return "SPARC Delay Slot Filler"; + } + + bool runOnMachineBasicBlock(MachineBasicBlock &MBB); + bool runOnMachineFunction(MachineFunction &F) { + bool Changed = false; + for (MachineFunction::iterator FI = F.begin(), FE = F.end(); + FI != FE; ++FI) + Changed |= runOnMachineBasicBlock(*FI); + return Changed; + } + + }; + char Filler::ID = 0; +} // end of anonymous namespace + +/// createSparcDelaySlotFillerPass - Returns a pass that fills in delay +/// slots in Sparc MachineFunctions +/// +FunctionPass *llvm::createSparcDelaySlotFillerPass(TargetMachine &tm) { + return new Filler(tm); +} + +/// runOnMachineBasicBlock - Fill in delay slots for the given basic block. +/// Currently, we fill delay slots with NOPs. We assume there is only one +/// delay slot per delayed instruction. +/// +bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) { + bool Changed = false; + for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) + if (I->getDesc().hasDelaySlot()) { + MachineBasicBlock::iterator J = I; + ++J; + BuildMI(MBB, J, DebugLoc::getUnknownLoc(), TII->get(SP::NOP)); + ++FilledSlots; + Changed = true; + } + return Changed; +} diff --git a/lib/Target/Sparc/FPMover.cpp b/lib/Target/Sparc/FPMover.cpp new file mode 100644 index 000000000000..f72a4c4645c1 --- /dev/null +++ b/lib/Target/Sparc/FPMover.cpp @@ -0,0 +1,139 @@ +//===-- FPMover.cpp - Sparc double-precision floating point move fixer ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Expand FpMOVD/FpABSD/FpNEGD instructions into their single-precision pieces. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "fpmover" +#include "Sparc.h" +#include "SparcSubtarget.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/Debug.h" +using namespace llvm; + +STATISTIC(NumFpDs , "Number of instructions translated"); +STATISTIC(NoopFpDs, "Number of noop instructions removed"); + +namespace { + struct FPMover : public MachineFunctionPass { + /// Target machine description which we query for reg. names, data + /// layout, etc. + /// + TargetMachine &TM; + + static char ID; + explicit FPMover(TargetMachine &tm) + : MachineFunctionPass(&ID), TM(tm) { } + + virtual const char *getPassName() const { + return "Sparc Double-FP Move Fixer"; + } + + bool runOnMachineBasicBlock(MachineBasicBlock &MBB); + bool runOnMachineFunction(MachineFunction &F); + }; + char FPMover::ID = 0; +} // end of anonymous namespace + +/// createSparcFPMoverPass - Returns a pass that turns FpMOVD +/// instructions into FMOVS instructions +/// +FunctionPass *llvm::createSparcFPMoverPass(TargetMachine &tm) { + return new FPMover(tm); +} + +/// getDoubleRegPair - Given a DFP register, return the even and odd FP +/// registers that correspond to it. +static void getDoubleRegPair(unsigned DoubleReg, unsigned &EvenReg, + unsigned &OddReg) { + static const unsigned EvenHalvesOfPairs[] = { + SP::F0, SP::F2, SP::F4, SP::F6, SP::F8, SP::F10, SP::F12, SP::F14, + SP::F16, SP::F18, SP::F20, SP::F22, SP::F24, SP::F26, SP::F28, SP::F30 + }; + static const unsigned OddHalvesOfPairs[] = { + SP::F1, SP::F3, SP::F5, SP::F7, SP::F9, SP::F11, SP::F13, SP::F15, + SP::F17, SP::F19, SP::F21, SP::F23, SP::F25, SP::F27, SP::F29, SP::F31 + }; + static const unsigned DoubleRegsInOrder[] = { + SP::D0, SP::D1, SP::D2, SP::D3, SP::D4, SP::D5, SP::D6, SP::D7, SP::D8, + SP::D9, SP::D10, SP::D11, SP::D12, SP::D13, SP::D14, SP::D15 + }; + for (unsigned i = 0; i < sizeof(DoubleRegsInOrder)/sizeof(unsigned); ++i) + if (DoubleRegsInOrder[i] == DoubleReg) { + EvenReg = EvenHalvesOfPairs[i]; + OddReg = OddHalvesOfPairs[i]; + return; + } + assert(0 && "Can't find reg"); +} + +/// runOnMachineBasicBlock - Fixup FpMOVD instructions in this MBB. +/// +bool FPMover::runOnMachineBasicBlock(MachineBasicBlock &MBB) { + bool Changed = false; + for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ) { + MachineInstr *MI = I++; + DebugLoc dl = MI->getDebugLoc(); + if (MI->getOpcode() == SP::FpMOVD || MI->getOpcode() == SP::FpABSD || + MI->getOpcode() == SP::FpNEGD) { + Changed = true; + unsigned DestDReg = MI->getOperand(0).getReg(); + unsigned SrcDReg = MI->getOperand(1).getReg(); + if (DestDReg == SrcDReg && MI->getOpcode() == SP::FpMOVD) { + MBB.erase(MI); // Eliminate the noop copy. + ++NoopFpDs; + continue; + } + + unsigned EvenSrcReg = 0, OddSrcReg = 0, EvenDestReg = 0, OddDestReg = 0; + getDoubleRegPair(DestDReg, EvenDestReg, OddDestReg); + getDoubleRegPair(SrcDReg, EvenSrcReg, OddSrcReg); + + const TargetInstrInfo *TII = TM.getInstrInfo(); + if (MI->getOpcode() == SP::FpMOVD) + MI->setDesc(TII->get(SP::FMOVS)); + else if (MI->getOpcode() == SP::FpNEGD) + MI->setDesc(TII->get(SP::FNEGS)); + else if (MI->getOpcode() == SP::FpABSD) + MI->setDesc(TII->get(SP::FABSS)); + else + assert(0 && "Unknown opcode!"); + + MI->getOperand(0).setReg(EvenDestReg); + MI->getOperand(1).setReg(EvenSrcReg); + DOUT << "FPMover: the modified instr is: " << *MI; + // Insert copy for the other half of the double. + if (DestDReg != SrcDReg) { + MI = BuildMI(MBB, I, dl, TM.getInstrInfo()->get(SP::FMOVS), OddDestReg) + .addReg(OddSrcReg); + DOUT << "FPMover: the inserted instr is: " << *MI; + } + ++NumFpDs; + } + } + return Changed; +} + +bool FPMover::runOnMachineFunction(MachineFunction &F) { + // If the target has V9 instructions, the fp-mover pseudos will never be + // emitted. Avoid a scan of the instructions to improve compile time. + if (TM.getSubtarget().isV9()) + return false; + + bool Changed = false; + for (MachineFunction::iterator FI = F.begin(), FE = F.end(); + FI != FE; ++FI) + Changed |= runOnMachineBasicBlock(*FI); + return Changed; +} diff --git a/lib/Target/Sparc/Makefile b/lib/Target/Sparc/Makefile new file mode 100644 index 000000000000..fdf6afaee076 --- /dev/null +++ b/lib/Target/Sparc/Makefile @@ -0,0 +1,22 @@ +##===- lib/Target/Sparc/Makefile ---------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../.. +LIBRARYNAME = LLVMSparcCodeGen +TARGET = Sparc + +# Make sure that tblgen is run, first thing. +BUILT_SOURCES = SparcGenRegisterInfo.h.inc SparcGenRegisterNames.inc \ + SparcGenRegisterInfo.inc SparcGenInstrNames.inc \ + SparcGenInstrInfo.inc SparcGenAsmWriter.inc \ + SparcGenDAGISel.inc SparcGenSubtarget.inc SparcGenCallingConv.inc + +DIRS = AsmPrinter + +include $(LEVEL)/Makefile.common + diff --git a/lib/Target/Sparc/README.txt b/lib/Target/Sparc/README.txt new file mode 100644 index 000000000000..cc24abf7286f --- /dev/null +++ b/lib/Target/Sparc/README.txt @@ -0,0 +1,58 @@ + +To-do +----- + +* Keep the address of the constant pool in a register instead of forming its + address all of the time. +* We can fold small constant offsets into the %hi/%lo references to constant + pool addresses as well. +* When in V9 mode, register allocate %icc[0-3]. +* Add support for isel'ing UMUL_LOHI instead of marking it as Expand. +* Emit the 'Branch on Integer Register with Prediction' instructions. It's + not clear how to write a pattern for this though: + +float %t1(int %a, int* %p) { + %C = seteq int %a, 0 + br bool %C, label %T, label %F +T: + store int 123, int* %p + br label %F +F: + ret float undef +} + +codegens to this: + +t1: + save -96, %o6, %o6 +1) subcc %i0, 0, %l0 +1) bne .LBBt1_2 ! F + nop +.LBBt1_1: ! T + or %g0, 123, %l0 + st %l0, [%i1] +.LBBt1_2: ! F + restore %g0, %g0, %g0 + retl + nop + +1) should be replaced with a brz in V9 mode. + +* Same as above, but emit conditional move on register zero (p192) in V9 + mode. Testcase: + +int %t1(int %a, int %b) { + %C = seteq int %a, 0 + %D = select bool %C, int %a, int %b + ret int %D +} + +* Emit MULX/[SU]DIVX instructions in V9 mode instead of fiddling + with the Y register, if they are faster. + +* Codegen bswap(load)/store(bswap) -> load/store ASI + +* Implement frame pointer elimination, e.g. eliminate save/restore for + leaf fns. +* Fill delay slots + diff --git a/lib/Target/Sparc/Sparc.h b/lib/Target/Sparc/Sparc.h new file mode 100644 index 000000000000..bb03f30f2dd6 --- /dev/null +++ b/lib/Target/Sparc/Sparc.h @@ -0,0 +1,119 @@ +//===-- Sparc.h - Top-level interface for Sparc representation --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the entry points for global functions defined in the LLVM +// Sparc back-end. +// +//===----------------------------------------------------------------------===// + +#ifndef TARGET_SPARC_H +#define TARGET_SPARC_H + +#include "llvm/Target/TargetMachine.h" +#include + +namespace llvm { + class FunctionPass; + class SparcTargetMachine; + class raw_ostream; + + FunctionPass *createSparcISelDag(SparcTargetMachine &TM); + FunctionPass *createSparcCodePrinterPass(raw_ostream &OS, TargetMachine &TM, + CodeGenOpt::Level OptLevel, + bool Verbose); + FunctionPass *createSparcDelaySlotFillerPass(TargetMachine &TM); + FunctionPass *createSparcFPMoverPass(TargetMachine &TM); +} // end namespace llvm; + +// Defines symbolic names for Sparc registers. This defines a mapping from +// register name to register number. +// +#include "SparcGenRegisterNames.inc" + +// Defines symbolic names for the Sparc instructions. +// +#include "SparcGenInstrNames.inc" + + +namespace llvm { + // Enums corresponding to Sparc condition codes, both icc's and fcc's. These + // values must be kept in sync with the ones in the .td file. + namespace SPCC { + enum CondCodes { + //ICC_A = 8 , // Always + //ICC_N = 0 , // Never + ICC_NE = 9 , // Not Equal + ICC_E = 1 , // Equal + ICC_G = 10 , // Greater + ICC_LE = 2 , // Less or Equal + ICC_GE = 11 , // Greater or Equal + ICC_L = 3 , // Less + ICC_GU = 12 , // Greater Unsigned + ICC_LEU = 4 , // Less or Equal Unsigned + ICC_CC = 13 , // Carry Clear/Great or Equal Unsigned + ICC_CS = 5 , // Carry Set/Less Unsigned + ICC_POS = 14 , // Positive + ICC_NEG = 6 , // Negative + ICC_VC = 15 , // Overflow Clear + ICC_VS = 7 , // Overflow Set + + //FCC_A = 8+16, // Always + //FCC_N = 0+16, // Never + FCC_U = 7+16, // Unordered + FCC_G = 6+16, // Greater + FCC_UG = 5+16, // Unordered or Greater + FCC_L = 4+16, // Less + FCC_UL = 3+16, // Unordered or Less + FCC_LG = 2+16, // Less or Greater + FCC_NE = 1+16, // Not Equal + FCC_E = 9+16, // Equal + FCC_UE = 10+16, // Unordered or Equal + FCC_GE = 11+16, // Greater or Equal + FCC_UGE = 12+16, // Unordered or Greater or Equal + FCC_LE = 13+16, // Less or Equal + FCC_ULE = 14+16, // Unordered or Less or Equal + FCC_O = 15+16 // Ordered + }; + } + + inline static const char *SPARCCondCodeToString(SPCC::CondCodes CC) { + switch (CC) { + default: assert(0 && "Unknown condition code"); + case SPCC::ICC_NE: return "ne"; + case SPCC::ICC_E: return "e"; + case SPCC::ICC_G: return "g"; + case SPCC::ICC_LE: return "le"; + case SPCC::ICC_GE: return "ge"; + case SPCC::ICC_L: return "l"; + case SPCC::ICC_GU: return "gu"; + case SPCC::ICC_LEU: return "leu"; + case SPCC::ICC_CC: return "cc"; + case SPCC::ICC_CS: return "cs"; + case SPCC::ICC_POS: return "pos"; + case SPCC::ICC_NEG: return "neg"; + case SPCC::ICC_VC: return "vc"; + case SPCC::ICC_VS: return "vs"; + case SPCC::FCC_U: return "u"; + case SPCC::FCC_G: return "g"; + case SPCC::FCC_UG: return "ug"; + case SPCC::FCC_L: return "l"; + case SPCC::FCC_UL: return "ul"; + case SPCC::FCC_LG: return "lg"; + case SPCC::FCC_NE: return "ne"; + case SPCC::FCC_E: return "e"; + case SPCC::FCC_UE: return "ue"; + case SPCC::FCC_GE: return "ge"; + case SPCC::FCC_UGE: return "uge"; + case SPCC::FCC_LE: return "le"; + case SPCC::FCC_ULE: return "ule"; + case SPCC::FCC_O: return "o"; + } + } +} // end namespace llvm +#endif diff --git a/lib/Target/Sparc/Sparc.td b/lib/Target/Sparc/Sparc.td new file mode 100644 index 000000000000..53ea8f4a35f0 --- /dev/null +++ b/lib/Target/Sparc/Sparc.td @@ -0,0 +1,76 @@ +//===- Sparc.td - Describe the Sparc Target Machine -------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Target-independent interfaces which we are implementing +//===----------------------------------------------------------------------===// + +include "llvm/Target/Target.td" + +//===----------------------------------------------------------------------===// +// SPARC Subtarget features. +// + +def FeatureV9 + : SubtargetFeature<"v9", "IsV9", "true", + "Enable SPARC-V9 instructions">; +def FeatureV8Deprecated + : SubtargetFeature<"deprecated-v8", "V8DeprecatedInsts", "true", + "Enable deprecated V8 instructions in V9 mode">; +def FeatureVIS + : SubtargetFeature<"vis", "IsVIS", "true", + "Enable UltraSPARC Visual Instruction Set extensions">; + +//===----------------------------------------------------------------------===// +// Register File, Calling Conv, Instruction Descriptions +//===----------------------------------------------------------------------===// + +include "SparcRegisterInfo.td" +include "SparcCallingConv.td" +include "SparcInstrInfo.td" + +def SparcInstrInfo : InstrInfo { + // Define how we want to layout our target-specific information field. + let TSFlagsFields = []; + let TSFlagsShifts = []; +} + +//===----------------------------------------------------------------------===// +// SPARC processors supported. +//===----------------------------------------------------------------------===// + +class Proc Features> + : Processor; + +def : Proc<"generic", []>; +def : Proc<"v8", []>; +def : Proc<"supersparc", []>; +def : Proc<"sparclite", []>; +def : Proc<"f934", []>; +def : Proc<"hypersparc", []>; +def : Proc<"sparclite86x", []>; +def : Proc<"sparclet", []>; +def : Proc<"tsc701", []>; +def : Proc<"v9", [FeatureV9]>; +def : Proc<"ultrasparc", [FeatureV9, FeatureV8Deprecated]>; +def : Proc<"ultrasparc3", [FeatureV9, FeatureV8Deprecated]>; +def : Proc<"ultrasparc3-vis", [FeatureV9, FeatureV8Deprecated, FeatureVIS]>; + + +//===----------------------------------------------------------------------===// +// Declare the target which we are implementing +//===----------------------------------------------------------------------===// + +def Sparc : Target { + // Pull in Instruction Info: + let InstructionSet = SparcInstrInfo; +} diff --git a/lib/Target/Sparc/SparcCallingConv.td b/lib/Target/Sparc/SparcCallingConv.td new file mode 100644 index 000000000000..33ecfdf5f750 --- /dev/null +++ b/lib/Target/Sparc/SparcCallingConv.td @@ -0,0 +1,32 @@ +//===- SparcCallingConv.td - Calling Conventions Sparc -----*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This describes the calling conventions for the Sparc architectures. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Return Value Calling Conventions +//===----------------------------------------------------------------------===// + +// Sparc 32-bit C return-value convention. +def RetCC_Sparc32 : CallingConv<[ + CCIfType<[i32], CCAssignToReg<[I0, I1, I2, I3, I4, I5]>>, + CCIfType<[f32], CCAssignToReg<[F0, F1, F2, F3]>>, + CCIfType<[f64], CCAssignToReg<[D0, D1]>> +]>; + +// Sparc 32-bit C Calling convention. +def CC_Sparc32 : CallingConv<[ + // All arguments get passed in integer registers if there is space. + CCIfType<[i32, f32, f64], CCAssignToReg<[I0, I1, I2, I3, I4, I5]>>, + + // Alternatively, they are assigned to the stack in 4-byte aligned units. + CCAssignToStack<4, 4> +]>; diff --git a/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/lib/Target/Sparc/SparcISelDAGToDAG.cpp new file mode 100644 index 000000000000..c9bd62d0e20d --- /dev/null +++ b/lib/Target/Sparc/SparcISelDAGToDAG.cpp @@ -0,0 +1,215 @@ +//===-- SparcISelDAGToDAG.cpp - A dag to dag inst selector for Sparc ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines an instruction selector for the SPARC target. +// +//===----------------------------------------------------------------------===// + +#include "SparcISelLowering.h" +#include "SparcTargetMachine.h" +#include "llvm/Intrinsics.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +using namespace llvm; + +//===----------------------------------------------------------------------===// +// Instruction Selector Implementation +//===----------------------------------------------------------------------===// + +//===--------------------------------------------------------------------===// +/// SparcDAGToDAGISel - SPARC specific code to select SPARC machine +/// instructions for SelectionDAG operations. +/// +namespace { +class SparcDAGToDAGISel : public SelectionDAGISel { + /// Subtarget - Keep a pointer to the Sparc Subtarget around so that we can + /// make the right decision when generating code for different targets. + const SparcSubtarget &Subtarget; +public: + explicit SparcDAGToDAGISel(SparcTargetMachine &TM) + : SelectionDAGISel(TM), + Subtarget(TM.getSubtarget()) { + } + + SDNode *Select(SDValue Op); + + // Complex Pattern Selectors. + bool SelectADDRrr(SDValue Op, SDValue N, SDValue &R1, SDValue &R2); + bool SelectADDRri(SDValue Op, SDValue N, SDValue &Base, + SDValue &Offset); + + /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for + /// inline asm expressions. + virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, + char ConstraintCode, + std::vector &OutOps); + + /// InstructionSelect - This callback is invoked by + /// SelectionDAGISel when it has created a SelectionDAG for us to codegen. + virtual void InstructionSelect(); + + virtual const char *getPassName() const { + return "SPARC DAG->DAG Pattern Instruction Selection"; + } + + // Include the pieces autogenerated from the target description. +#include "SparcGenDAGISel.inc" +}; +} // end anonymous namespace + +/// InstructionSelect - This callback is invoked by +/// SelectionDAGISel when it has created a SelectionDAG for us to codegen. +void SparcDAGToDAGISel::InstructionSelect() { + DEBUG(BB->dump()); + + // Select target instructions for the DAG. + SelectRoot(*CurDAG); + CurDAG->RemoveDeadNodes(); +} + +bool SparcDAGToDAGISel::SelectADDRri(SDValue Op, SDValue Addr, + SDValue &Base, SDValue &Offset) { + if (FrameIndexSDNode *FIN = dyn_cast(Addr)) { + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); + Offset = CurDAG->getTargetConstant(0, MVT::i32); + return true; + } + if (Addr.getOpcode() == ISD::TargetExternalSymbol || + Addr.getOpcode() == ISD::TargetGlobalAddress) + return false; // direct calls. + + if (Addr.getOpcode() == ISD::ADD) { + if (ConstantSDNode *CN = dyn_cast(Addr.getOperand(1))) { + if (Predicate_simm13(CN)) { + if (FrameIndexSDNode *FIN = + dyn_cast(Addr.getOperand(0))) { + // Constant offset from frame ref. + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); + } else { + Base = Addr.getOperand(0); + } + Offset = CurDAG->getTargetConstant(CN->getZExtValue(), MVT::i32); + return true; + } + } + if (Addr.getOperand(0).getOpcode() == SPISD::Lo) { + Base = Addr.getOperand(1); + Offset = Addr.getOperand(0).getOperand(0); + return true; + } + if (Addr.getOperand(1).getOpcode() == SPISD::Lo) { + Base = Addr.getOperand(0); + Offset = Addr.getOperand(1).getOperand(0); + return true; + } + } + Base = Addr; + Offset = CurDAG->getTargetConstant(0, MVT::i32); + return true; +} + +bool SparcDAGToDAGISel::SelectADDRrr(SDValue Op, SDValue Addr, + SDValue &R1, SDValue &R2) { + if (Addr.getOpcode() == ISD::FrameIndex) return false; + if (Addr.getOpcode() == ISD::TargetExternalSymbol || + Addr.getOpcode() == ISD::TargetGlobalAddress) + return false; // direct calls. + + if (Addr.getOpcode() == ISD::ADD) { + if (isa(Addr.getOperand(1)) && + Predicate_simm13(Addr.getOperand(1).getNode())) + return false; // Let the reg+imm pattern catch this! + if (Addr.getOperand(0).getOpcode() == SPISD::Lo || + Addr.getOperand(1).getOpcode() == SPISD::Lo) + return false; // Let the reg+imm pattern catch this! + R1 = Addr.getOperand(0); + R2 = Addr.getOperand(1); + return true; + } + + R1 = Addr; + R2 = CurDAG->getRegister(SP::G0, MVT::i32); + return true; +} + +SDNode *SparcDAGToDAGISel::Select(SDValue Op) { + SDNode *N = Op.getNode(); + DebugLoc dl = N->getDebugLoc(); + if (N->isMachineOpcode()) + return NULL; // Already selected. + + switch (N->getOpcode()) { + default: break; + case ISD::SDIV: + case ISD::UDIV: { + // FIXME: should use a custom expander to expose the SRA to the dag. + SDValue DivLHS = N->getOperand(0); + SDValue DivRHS = N->getOperand(1); + + // Set the Y register to the high-part. + SDValue TopPart; + if (N->getOpcode() == ISD::SDIV) { + TopPart = SDValue(CurDAG->getTargetNode(SP::SRAri, dl, MVT::i32, DivLHS, + CurDAG->getTargetConstant(31, MVT::i32)), 0); + } else { + TopPart = CurDAG->getRegister(SP::G0, MVT::i32); + } + TopPart = SDValue(CurDAG->getTargetNode(SP::WRYrr, dl, MVT::Flag, TopPart, + CurDAG->getRegister(SP::G0, MVT::i32)), 0); + + // FIXME: Handle div by immediate. + unsigned Opcode = N->getOpcode() == ISD::SDIV ? SP::SDIVrr : SP::UDIVrr; + return CurDAG->SelectNodeTo(N, Opcode, MVT::i32, DivLHS, DivRHS, + TopPart); + } + case ISD::MULHU: + case ISD::MULHS: { + // FIXME: Handle mul by immediate. + SDValue MulLHS = N->getOperand(0); + SDValue MulRHS = N->getOperand(1); + unsigned Opcode = N->getOpcode() == ISD::MULHU ? SP::UMULrr : SP::SMULrr; + SDNode *Mul = CurDAG->getTargetNode(Opcode, dl, MVT::i32, MVT::Flag, + MulLHS, MulRHS); + // The high part is in the Y register. + return CurDAG->SelectNodeTo(N, SP::RDY, MVT::i32, SDValue(Mul, 1)); + return NULL; + } + } + + return SelectCode(Op); +} + + +/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for +/// inline asm expressions. +bool +SparcDAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op, + char ConstraintCode, + std::vector &OutOps) { + SDValue Op0, Op1; + switch (ConstraintCode) { + default: return true; + case 'm': // memory + if (!SelectADDRrr(Op, Op, Op0, Op1)) + SelectADDRri(Op, Op, Op0, Op1); + break; + } + + OutOps.push_back(Op0); + OutOps.push_back(Op1); + return false; +} + +/// createSparcISelDag - This pass converts a legalized DAG into a +/// SPARC-specific DAG, ready for instruction scheduling. +/// +FunctionPass *llvm::createSparcISelDag(SparcTargetMachine &TM) { + return new SparcDAGToDAGISel(TM); +} diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp new file mode 100644 index 000000000000..3ec7e06f0985 --- /dev/null +++ b/lib/Target/Sparc/SparcISelLowering.cpp @@ -0,0 +1,1049 @@ +//===-- SparcISelLowering.cpp - Sparc DAG Lowering Implementation ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the interfaces that Sparc uses to lower LLVM code into a +// selection DAG. +// +//===----------------------------------------------------------------------===// + +#include "SparcISelLowering.h" +#include "SparcTargetMachine.h" +#include "llvm/Function.h" +#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/ADT/VectorExtras.h" +using namespace llvm; + + +//===----------------------------------------------------------------------===// +// Calling Convention Implementation +//===----------------------------------------------------------------------===// + +#include "SparcGenCallingConv.inc" + +static SDValue LowerRET(SDValue Op, SelectionDAG &DAG) { + // CCValAssign - represent the assignment of the return value to locations. + SmallVector RVLocs; + unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv(); + bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); + DebugLoc dl = Op.getDebugLoc(); + + // CCState - Info about the registers and stack slot. + CCState CCInfo(CC, isVarArg, DAG.getTarget(), RVLocs); + + // Analize return values of ISD::RET + CCInfo.AnalyzeReturn(Op.getNode(), RetCC_Sparc32); + + // If this is the first return lowered for this function, add the regs to the + // liveout set for the function. + if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { + for (unsigned i = 0; i != RVLocs.size(); ++i) + if (RVLocs[i].isRegLoc()) + DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); + } + + SDValue Chain = Op.getOperand(0); + SDValue Flag; + + // Copy the result values into the output registers. + for (unsigned i = 0; i != RVLocs.size(); ++i) { + CCValAssign &VA = RVLocs[i]; + assert(VA.isRegLoc() && "Can only return in registers!"); + + // ISD::RET => ret chain, (regnum1,val1), ... + // So i*2+1 index only the regnums. + Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), + Op.getOperand(i*2+1), Flag); + + // Guarantee that all emitted copies are stuck together with flags. + Flag = Chain.getValue(1); + } + + if (Flag.getNode()) + return DAG.getNode(SPISD::RET_FLAG, dl, MVT::Other, Chain, Flag); + return DAG.getNode(SPISD::RET_FLAG, dl, MVT::Other, Chain); +} + +/// LowerArguments - V8 uses a very simple ABI, where all values are passed in +/// either one or two GPRs, including FP values. TODO: we should pass FP values +/// in FP registers for fastcc functions. +void +SparcTargetLowering::LowerArguments(Function &F, SelectionDAG &DAG, + SmallVectorImpl &ArgValues, + DebugLoc dl) { + MachineFunction &MF = DAG.getMachineFunction(); + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + + static const unsigned ArgRegs[] = { + SP::I0, SP::I1, SP::I2, SP::I3, SP::I4, SP::I5 + }; + + const unsigned *CurArgReg = ArgRegs, *ArgRegEnd = ArgRegs+6; + unsigned ArgOffset = 68; + + SDValue Root = DAG.getRoot(); + std::vector OutChains; + + for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) { + MVT ObjectVT = getValueType(I->getType()); + + switch (ObjectVT.getSimpleVT()) { + default: assert(0 && "Unhandled argument type!"); + case MVT::i1: + case MVT::i8: + case MVT::i16: + case MVT::i32: + if (I->use_empty()) { // Argument is dead. + if (CurArgReg < ArgRegEnd) ++CurArgReg; + ArgValues.push_back(DAG.getUNDEF(ObjectVT)); + } else if (CurArgReg < ArgRegEnd) { // Lives in an incoming GPR + unsigned VReg = RegInfo.createVirtualRegister(&SP::IntRegsRegClass); + MF.getRegInfo().addLiveIn(*CurArgReg++, VReg); + SDValue Arg = DAG.getCopyFromReg(Root, dl, VReg, MVT::i32); + if (ObjectVT != MVT::i32) { + unsigned AssertOp = ISD::AssertSext; + Arg = DAG.getNode(AssertOp, dl, MVT::i32, Arg, + DAG.getValueType(ObjectVT)); + Arg = DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, Arg); + } + ArgValues.push_back(Arg); + } else { + int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset); + SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32); + SDValue Load; + if (ObjectVT == MVT::i32) { + Load = DAG.getLoad(MVT::i32, dl, Root, FIPtr, NULL, 0); + } else { + ISD::LoadExtType LoadOp = ISD::SEXTLOAD; + + // Sparc is big endian, so add an offset based on the ObjectVT. + unsigned Offset = 4-std::max(1U, ObjectVT.getSizeInBits()/8); + FIPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, FIPtr, + DAG.getConstant(Offset, MVT::i32)); + Load = DAG.getExtLoad(LoadOp, dl, MVT::i32, Root, FIPtr, + NULL, 0, ObjectVT); + Load = DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, Load); + } + ArgValues.push_back(Load); + } + + ArgOffset += 4; + break; + case MVT::f32: + if (I->use_empty()) { // Argument is dead. + if (CurArgReg < ArgRegEnd) ++CurArgReg; + ArgValues.push_back(DAG.getUNDEF(ObjectVT)); + } else if (CurArgReg < ArgRegEnd) { // Lives in an incoming GPR + // FP value is passed in an integer register. + unsigned VReg = RegInfo.createVirtualRegister(&SP::IntRegsRegClass); + MF.getRegInfo().addLiveIn(*CurArgReg++, VReg); + SDValue Arg = DAG.getCopyFromReg(Root, dl, VReg, MVT::i32); + + Arg = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, Arg); + ArgValues.push_back(Arg); + } else { + int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset); + SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32); + SDValue Load = DAG.getLoad(MVT::f32, dl, Root, FIPtr, NULL, 0); + ArgValues.push_back(Load); + } + ArgOffset += 4; + break; + + case MVT::i64: + case MVT::f64: + if (I->use_empty()) { // Argument is dead. + if (CurArgReg < ArgRegEnd) ++CurArgReg; + if (CurArgReg < ArgRegEnd) ++CurArgReg; + ArgValues.push_back(DAG.getUNDEF(ObjectVT)); + } else { + SDValue HiVal; + if (CurArgReg < ArgRegEnd) { // Lives in an incoming GPR + unsigned VRegHi = RegInfo.createVirtualRegister(&SP::IntRegsRegClass); + MF.getRegInfo().addLiveIn(*CurArgReg++, VRegHi); + HiVal = DAG.getCopyFromReg(Root, dl, VRegHi, MVT::i32); + } else { + int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset); + SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32); + HiVal = DAG.getLoad(MVT::i32, dl, Root, FIPtr, NULL, 0); + } + + SDValue LoVal; + if (CurArgReg < ArgRegEnd) { // Lives in an incoming GPR + unsigned VRegLo = RegInfo.createVirtualRegister(&SP::IntRegsRegClass); + MF.getRegInfo().addLiveIn(*CurArgReg++, VRegLo); + LoVal = DAG.getCopyFromReg(Root, dl, VRegLo, MVT::i32); + } else { + int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset+4); + SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32); + LoVal = DAG.getLoad(MVT::i32, dl, Root, FIPtr, NULL, 0); + } + + // Compose the two halves together into an i64 unit. + SDValue WholeValue = + DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, LoVal, HiVal); + + // If we want a double, do a bit convert. + if (ObjectVT == MVT::f64) + WholeValue = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, WholeValue); + + ArgValues.push_back(WholeValue); + } + ArgOffset += 8; + break; + } + } + + // Store remaining ArgRegs to the stack if this is a varargs function. + if (F.isVarArg()) { + // Remember the vararg offset for the va_start implementation. + VarArgsFrameOffset = ArgOffset; + + for (; CurArgReg != ArgRegEnd; ++CurArgReg) { + unsigned VReg = RegInfo.createVirtualRegister(&SP::IntRegsRegClass); + MF.getRegInfo().addLiveIn(*CurArgReg, VReg); + SDValue Arg = DAG.getCopyFromReg(DAG.getRoot(), dl, VReg, MVT::i32); + + int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset); + SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32); + + OutChains.push_back(DAG.getStore(DAG.getRoot(), dl, Arg, FIPtr, NULL, 0)); + ArgOffset += 4; + } + } + + if (!OutChains.empty()) + DAG.setRoot(DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &OutChains[0], OutChains.size())); +} + +static SDValue LowerCALL(SDValue Op, SelectionDAG &DAG) { + CallSDNode *TheCall = cast(Op.getNode()); + unsigned CallingConv = TheCall->getCallingConv(); + SDValue Chain = TheCall->getChain(); + SDValue Callee = TheCall->getCallee(); + bool isVarArg = TheCall->isVarArg(); + DebugLoc dl = TheCall->getDebugLoc(); + +#if 0 + // Analyze operands of the call, assigning locations to each operand. + SmallVector ArgLocs; + CCState CCInfo(CallingConv, isVarArg, DAG.getTarget(), ArgLocs); + CCInfo.AnalyzeCallOperands(Op.getNode(), CC_Sparc32); + + // Get the size of the outgoing arguments stack space requirement. + unsigned ArgsSize = CCInfo.getNextStackOffset(); + // FIXME: We can't use this until f64 is known to take two GPRs. +#else + (void)CC_Sparc32; + + // Count the size of the outgoing arguments. + unsigned ArgsSize = 0; + for (unsigned i = 0, e = TheCall->getNumArgs(); i != e; ++i) { + switch (TheCall->getArg(i).getValueType().getSimpleVT()) { + default: assert(0 && "Unknown value type!"); + case MVT::i1: + case MVT::i8: + case MVT::i16: + case MVT::i32: + case MVT::f32: + ArgsSize += 4; + break; + case MVT::i64: + case MVT::f64: + ArgsSize += 8; + break; + } + } + if (ArgsSize > 4*6) + ArgsSize -= 4*6; // Space for first 6 arguments is prereserved. + else + ArgsSize = 0; +#endif + + // Keep stack frames 8-byte aligned. + ArgsSize = (ArgsSize+7) & ~7; + + Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(ArgsSize, true)); + + SmallVector, 8> RegsToPass; + SmallVector MemOpChains; + +#if 0 + // Walk the register/memloc assignments, inserting copies/loads. + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + + // Arguments start after the 5 first operands of ISD::CALL + SDValue Arg = TheCall->getArg(i); + + // Promote the value if needed. + switch (VA.getLocInfo()) { + default: assert(0 && "Unknown loc info!"); + case CCValAssign::Full: break; + case CCValAssign::SExt: + Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg); + break; + case CCValAssign::ZExt: + Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg); + break; + case CCValAssign::AExt: + Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg); + break; + } + + // Arguments that can be passed on register must be kept at + // RegsToPass vector + if (VA.isRegLoc()) { + RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); + continue; + } + + assert(VA.isMemLoc()); + + // Create a store off the stack pointer for this argument. + SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32); + // FIXME: VERIFY THAT 68 IS RIGHT. + SDValue PtrOff = DAG.getIntPtrConstant(VA.getLocMemOffset()+68); + PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff); + MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0)); + } + +#else + static const unsigned ArgRegs[] = { + SP::I0, SP::I1, SP::I2, SP::I3, SP::I4, SP::I5 + }; + unsigned ArgOffset = 68; + + for (unsigned i = 0, e = TheCall->getNumArgs(); i != e; ++i) { + SDValue Val = TheCall->getArg(i); + MVT ObjectVT = Val.getValueType(); + SDValue ValToStore(0, 0); + unsigned ObjSize; + switch (ObjectVT.getSimpleVT()) { + default: assert(0 && "Unhandled argument type!"); + case MVT::i32: + ObjSize = 4; + + if (RegsToPass.size() >= 6) { + ValToStore = Val; + } else { + RegsToPass.push_back(std::make_pair(ArgRegs[RegsToPass.size()], Val)); + } + break; + case MVT::f32: + ObjSize = 4; + if (RegsToPass.size() >= 6) { + ValToStore = Val; + } else { + // Convert this to a FP value in an int reg. + Val = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Val); + RegsToPass.push_back(std::make_pair(ArgRegs[RegsToPass.size()], Val)); + } + break; + case MVT::f64: { + ObjSize = 8; + if (RegsToPass.size() >= 6) { + ValToStore = Val; // Whole thing is passed in memory. + break; + } + + // Break into top and bottom parts by storing to the stack and loading + // out the parts as integers. Top part goes in a reg. + SDValue StackPtr = DAG.CreateStackTemporary(MVT::f64, MVT::i32); + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, + Val, StackPtr, NULL, 0); + // Sparc is big-endian, so the high part comes first. + SDValue Hi = DAG.getLoad(MVT::i32, dl, Store, StackPtr, NULL, 0, 0); + // Increment the pointer to the other half. + StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, + DAG.getIntPtrConstant(4)); + // Load the low part. + SDValue Lo = DAG.getLoad(MVT::i32, dl, Store, StackPtr, NULL, 0, 0); + + RegsToPass.push_back(std::make_pair(ArgRegs[RegsToPass.size()], Hi)); + + if (RegsToPass.size() >= 6) { + ValToStore = Lo; + ArgOffset += 4; + ObjSize = 4; + } else { + RegsToPass.push_back(std::make_pair(ArgRegs[RegsToPass.size()], Lo)); + } + break; + } + case MVT::i64: { + ObjSize = 8; + if (RegsToPass.size() >= 6) { + ValToStore = Val; // Whole thing is passed in memory. + break; + } + + // Split the value into top and bottom part. Top part goes in a reg. + SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Val, + DAG.getConstant(1, MVT::i32)); + SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Val, + DAG.getConstant(0, MVT::i32)); + RegsToPass.push_back(std::make_pair(ArgRegs[RegsToPass.size()], Hi)); + + if (RegsToPass.size() >= 6) { + ValToStore = Lo; + ArgOffset += 4; + ObjSize = 4; + } else { + RegsToPass.push_back(std::make_pair(ArgRegs[RegsToPass.size()], Lo)); + } + break; + } + } + + if (ValToStore.getNode()) { + SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32); + SDValue PtrOff = DAG.getConstant(ArgOffset, MVT::i32); + PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff); + MemOpChains.push_back(DAG.getStore(Chain, dl, ValToStore, + PtrOff, NULL, 0)); + } + ArgOffset += ObjSize; + } +#endif + + // Emit all stores, make sure the occur before any copies into physregs. + if (!MemOpChains.empty()) + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &MemOpChains[0], MemOpChains.size()); + + // Build a sequence of copy-to-reg nodes chained together with token + // chain and flag operands which copy the outgoing args into registers. + // The InFlag in necessary since all emited instructions must be + // stuck together. + SDValue InFlag; + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { + unsigned Reg = RegsToPass[i].first; + // Remap I0->I7 -> O0->O7. + if (Reg >= SP::I0 && Reg <= SP::I7) + Reg = Reg-SP::I0+SP::O0; + + Chain = DAG.getCopyToReg(Chain, dl, Reg, RegsToPass[i].second, InFlag); + InFlag = Chain.getValue(1); + } + + // If the callee is a GlobalAddress node (quite common, every direct call is) + // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. + // Likewise ExternalSymbol -> TargetExternalSymbol. + if (GlobalAddressSDNode *G = dyn_cast(Callee)) + Callee = DAG.getTargetGlobalAddress(G->getGlobal(), MVT::i32); + else if (ExternalSymbolSDNode *E = dyn_cast(Callee)) + Callee = DAG.getTargetExternalSymbol(E->getSymbol(), MVT::i32); + + std::vector NodeTys; + NodeTys.push_back(MVT::Other); // Returns a chain + NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. + SDValue Ops[] = { Chain, Callee, InFlag }; + Chain = DAG.getNode(SPISD::CALL, dl, NodeTys, Ops, InFlag.getNode() ? 3 : 2); + InFlag = Chain.getValue(1); + + Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(ArgsSize, true), + DAG.getIntPtrConstant(0, true), InFlag); + InFlag = Chain.getValue(1); + + // Assign locations to each value returned by this call. + SmallVector RVLocs; + CCState RVInfo(CallingConv, isVarArg, DAG.getTarget(), RVLocs); + + RVInfo.AnalyzeCallResult(TheCall, RetCC_Sparc32); + SmallVector ResultVals; + + // Copy all of the result registers out of their specified physreg. + for (unsigned i = 0; i != RVLocs.size(); ++i) { + unsigned Reg = RVLocs[i].getLocReg(); + + // Remap I0->I7 -> O0->O7. + if (Reg >= SP::I0 && Reg <= SP::I7) + Reg = Reg-SP::I0+SP::O0; + + Chain = DAG.getCopyFromReg(Chain, dl, Reg, + RVLocs[i].getValVT(), InFlag).getValue(1); + InFlag = Chain.getValue(2); + ResultVals.push_back(Chain.getValue(0)); + } + + ResultVals.push_back(Chain); + + // Merge everything together with a MERGE_VALUES node. + return DAG.getNode(ISD::MERGE_VALUES, dl, + TheCall->getVTList(), &ResultVals[0], + ResultVals.size()); +} + + + +//===----------------------------------------------------------------------===// +// TargetLowering Implementation +//===----------------------------------------------------------------------===// + +/// IntCondCCodeToICC - Convert a DAG integer condition code to a SPARC ICC +/// condition. +static SPCC::CondCodes IntCondCCodeToICC(ISD::CondCode CC) { + switch (CC) { + default: assert(0 && "Unknown integer condition code!"); + case ISD::SETEQ: return SPCC::ICC_E; + case ISD::SETNE: return SPCC::ICC_NE; + case ISD::SETLT: return SPCC::ICC_L; + case ISD::SETGT: return SPCC::ICC_G; + case ISD::SETLE: return SPCC::ICC_LE; + case ISD::SETGE: return SPCC::ICC_GE; + case ISD::SETULT: return SPCC::ICC_CS; + case ISD::SETULE: return SPCC::ICC_LEU; + case ISD::SETUGT: return SPCC::ICC_GU; + case ISD::SETUGE: return SPCC::ICC_CC; + } +} + +/// FPCondCCodeToFCC - Convert a DAG floatingp oint condition code to a SPARC +/// FCC condition. +static SPCC::CondCodes FPCondCCodeToFCC(ISD::CondCode CC) { + switch (CC) { + default: assert(0 && "Unknown fp condition code!"); + case ISD::SETEQ: + case ISD::SETOEQ: return SPCC::FCC_E; + case ISD::SETNE: + case ISD::SETUNE: return SPCC::FCC_NE; + case ISD::SETLT: + case ISD::SETOLT: return SPCC::FCC_L; + case ISD::SETGT: + case ISD::SETOGT: return SPCC::FCC_G; + case ISD::SETLE: + case ISD::SETOLE: return SPCC::FCC_LE; + case ISD::SETGE: + case ISD::SETOGE: return SPCC::FCC_GE; + case ISD::SETULT: return SPCC::FCC_UL; + case ISD::SETULE: return SPCC::FCC_ULE; + case ISD::SETUGT: return SPCC::FCC_UG; + case ISD::SETUGE: return SPCC::FCC_UGE; + case ISD::SETUO: return SPCC::FCC_U; + case ISD::SETO: return SPCC::FCC_O; + case ISD::SETONE: return SPCC::FCC_LG; + case ISD::SETUEQ: return SPCC::FCC_UE; + } +} + + +SparcTargetLowering::SparcTargetLowering(TargetMachine &TM) + : TargetLowering(TM) { + + // Set up the register classes. + addRegisterClass(MVT::i32, SP::IntRegsRegisterClass); + addRegisterClass(MVT::f32, SP::FPRegsRegisterClass); + addRegisterClass(MVT::f64, SP::DFPRegsRegisterClass); + + // Turn FP extload into load/fextend + setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); + // Sparc doesn't have i1 sign extending load + setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); + // Turn FP truncstore into trunc + store. + setTruncStoreAction(MVT::f64, MVT::f32, Expand); + + // Custom legalize GlobalAddress nodes into LO/HI parts. + setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); + setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); + setOperationAction(ISD::ConstantPool , MVT::i32, Custom); + + // Sparc doesn't have sext_inreg, replace them with shl/sra + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand); + + // Sparc has no REM or DIVREM operations. + setOperationAction(ISD::UREM, MVT::i32, Expand); + setOperationAction(ISD::SREM, MVT::i32, Expand); + setOperationAction(ISD::SDIVREM, MVT::i32, Expand); + setOperationAction(ISD::UDIVREM, MVT::i32, Expand); + + // Custom expand fp<->sint + setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); + setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); + + // Expand fp<->uint + setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); + setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand); + + setOperationAction(ISD::BIT_CONVERT, MVT::f32, Expand); + setOperationAction(ISD::BIT_CONVERT, MVT::i32, Expand); + + // Sparc has no select or setcc: expand to SELECT_CC. + setOperationAction(ISD::SELECT, MVT::i32, Expand); + setOperationAction(ISD::SELECT, MVT::f32, Expand); + setOperationAction(ISD::SELECT, MVT::f64, Expand); + setOperationAction(ISD::SETCC, MVT::i32, Expand); + setOperationAction(ISD::SETCC, MVT::f32, Expand); + setOperationAction(ISD::SETCC, MVT::f64, Expand); + + // Sparc doesn't have BRCOND either, it has BR_CC. + setOperationAction(ISD::BRCOND, MVT::Other, Expand); + setOperationAction(ISD::BRIND, MVT::Other, Expand); + setOperationAction(ISD::BR_JT, MVT::Other, Expand); + setOperationAction(ISD::BR_CC, MVT::i32, Custom); + setOperationAction(ISD::BR_CC, MVT::f32, Custom); + setOperationAction(ISD::BR_CC, MVT::f64, Custom); + + setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); + setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); + setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); + + // SPARC has no intrinsics for these particular operations. + setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); + + setOperationAction(ISD::FSIN , MVT::f64, Expand); + setOperationAction(ISD::FCOS , MVT::f64, Expand); + setOperationAction(ISD::FREM , MVT::f64, Expand); + setOperationAction(ISD::FSIN , MVT::f32, Expand); + setOperationAction(ISD::FCOS , MVT::f32, Expand); + setOperationAction(ISD::FREM , MVT::f32, Expand); + setOperationAction(ISD::CTPOP, MVT::i32, Expand); + setOperationAction(ISD::CTTZ , MVT::i32, Expand); + setOperationAction(ISD::CTLZ , MVT::i32, Expand); + setOperationAction(ISD::ROTL , MVT::i32, Expand); + setOperationAction(ISD::ROTR , MVT::i32, Expand); + setOperationAction(ISD::BSWAP, MVT::i32, Expand); + setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); + setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); + setOperationAction(ISD::FPOW , MVT::f64, Expand); + setOperationAction(ISD::FPOW , MVT::f32, Expand); + + setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand); + setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand); + setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand); + + // FIXME: Sparc provides these multiplies, but we don't have them yet. + setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); + setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); + + // We don't have line number support yet. + setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand); + setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand); + setOperationAction(ISD::DBG_LABEL, MVT::Other, Expand); + setOperationAction(ISD::EH_LABEL, MVT::Other, Expand); + + // RET must be custom lowered, to meet ABI requirements + setOperationAction(ISD::RET , MVT::Other, Custom); + + // VASTART needs to be custom lowered to use the VarArgsFrameIndex. + setOperationAction(ISD::VASTART , MVT::Other, Custom); + // VAARG needs to be lowered to not do unaligned accesses for doubles. + setOperationAction(ISD::VAARG , MVT::Other, Custom); + + // Use the default implementation. + setOperationAction(ISD::VACOPY , MVT::Other, Expand); + setOperationAction(ISD::VAEND , MVT::Other, Expand); + setOperationAction(ISD::STACKSAVE , MVT::Other, Expand); + setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand); + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom); + + // No debug info support yet. + setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand); + setOperationAction(ISD::DBG_LABEL, MVT::Other, Expand); + setOperationAction(ISD::EH_LABEL, MVT::Other, Expand); + setOperationAction(ISD::DECLARE, MVT::Other, Expand); + + setStackPointerRegisterToSaveRestore(SP::O6); + + if (TM.getSubtarget().isV9()) + setOperationAction(ISD::CTPOP, MVT::i32, Legal); + + computeRegisterProperties(); +} + +const char *SparcTargetLowering::getTargetNodeName(unsigned Opcode) const { + switch (Opcode) { + default: return 0; + case SPISD::CMPICC: return "SPISD::CMPICC"; + case SPISD::CMPFCC: return "SPISD::CMPFCC"; + case SPISD::BRICC: return "SPISD::BRICC"; + case SPISD::BRFCC: return "SPISD::BRFCC"; + case SPISD::SELECT_ICC: return "SPISD::SELECT_ICC"; + case SPISD::SELECT_FCC: return "SPISD::SELECT_FCC"; + case SPISD::Hi: return "SPISD::Hi"; + case SPISD::Lo: return "SPISD::Lo"; + case SPISD::FTOI: return "SPISD::FTOI"; + case SPISD::ITOF: return "SPISD::ITOF"; + case SPISD::CALL: return "SPISD::CALL"; + case SPISD::RET_FLAG: return "SPISD::RET_FLAG"; + } +} + +/// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to +/// be zero. Op is expected to be a target specific node. Used by DAG +/// combiner. +void SparcTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, + const APInt &Mask, + APInt &KnownZero, + APInt &KnownOne, + const SelectionDAG &DAG, + unsigned Depth) const { + APInt KnownZero2, KnownOne2; + KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); // Don't know anything. + + switch (Op.getOpcode()) { + default: break; + case SPISD::SELECT_ICC: + case SPISD::SELECT_FCC: + DAG.ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, + Depth+1); + DAG.ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2, + Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // Only known if known in both the LHS and RHS. + KnownOne &= KnownOne2; + KnownZero &= KnownZero2; + break; + } +} + +// Look at LHS/RHS/CC and see if they are a lowered setcc instruction. If so +// set LHS/RHS and SPCC to the LHS/RHS of the setcc and SPCC to the condition. +static void LookThroughSetCC(SDValue &LHS, SDValue &RHS, + ISD::CondCode CC, unsigned &SPCC) { + if (isa(RHS) && + cast(RHS)->getZExtValue() == 0 && + CC == ISD::SETNE && + ((LHS.getOpcode() == SPISD::SELECT_ICC && + LHS.getOperand(3).getOpcode() == SPISD::CMPICC) || + (LHS.getOpcode() == SPISD::SELECT_FCC && + LHS.getOperand(3).getOpcode() == SPISD::CMPFCC)) && + isa(LHS.getOperand(0)) && + isa(LHS.getOperand(1)) && + cast(LHS.getOperand(0))->getZExtValue() == 1 && + cast(LHS.getOperand(1))->getZExtValue() == 0) { + SDValue CMPCC = LHS.getOperand(3); + SPCC = cast(LHS.getOperand(2))->getZExtValue(); + LHS = CMPCC.getOperand(0); + RHS = CMPCC.getOperand(1); + } +} + +static SDValue LowerGLOBALADDRESS(SDValue Op, SelectionDAG &DAG) { + GlobalValue *GV = cast(Op)->getGlobal(); + // FIXME there isn't really any debug info here + DebugLoc dl = Op.getDebugLoc(); + SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32); + SDValue Hi = DAG.getNode(SPISD::Hi, dl, MVT::i32, GA); + SDValue Lo = DAG.getNode(SPISD::Lo, dl, MVT::i32, GA); + return DAG.getNode(ISD::ADD, dl, MVT::i32, Lo, Hi); +} + +static SDValue LowerCONSTANTPOOL(SDValue Op, SelectionDAG &DAG) { + ConstantPoolSDNode *N = cast(Op); + // FIXME there isn't really any debug info here + DebugLoc dl = Op.getDebugLoc(); + Constant *C = N->getConstVal(); + SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment()); + SDValue Hi = DAG.getNode(SPISD::Hi, dl, MVT::i32, CP); + SDValue Lo = DAG.getNode(SPISD::Lo, dl, MVT::i32, CP); + return DAG.getNode(ISD::ADD, dl, MVT::i32, Lo, Hi); +} + +static SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) { + DebugLoc dl = Op.getDebugLoc(); + // Convert the fp value to integer in an FP register. + assert(Op.getValueType() == MVT::i32); + Op = DAG.getNode(SPISD::FTOI, dl, MVT::f32, Op.getOperand(0)); + return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op); +} + +static SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) { + DebugLoc dl = Op.getDebugLoc(); + assert(Op.getOperand(0).getValueType() == MVT::i32); + SDValue Tmp = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, Op.getOperand(0)); + // Convert the int value to FP in an FP register. + return DAG.getNode(SPISD::ITOF, dl, Op.getValueType(), Tmp); +} + +static SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) { + SDValue Chain = Op.getOperand(0); + ISD::CondCode CC = cast(Op.getOperand(1))->get(); + SDValue LHS = Op.getOperand(2); + SDValue RHS = Op.getOperand(3); + SDValue Dest = Op.getOperand(4); + DebugLoc dl = Op.getDebugLoc(); + unsigned Opc, SPCC = ~0U; + + // If this is a br_cc of a "setcc", and if the setcc got lowered into + // an CMP[IF]CC/SELECT_[IF]CC pair, find the original compared values. + LookThroughSetCC(LHS, RHS, CC, SPCC); + + // Get the condition flag. + SDValue CompareFlag; + if (LHS.getValueType() == MVT::i32) { + std::vector VTs; + VTs.push_back(MVT::i32); + VTs.push_back(MVT::Flag); + SDValue Ops[2] = { LHS, RHS }; + CompareFlag = DAG.getNode(SPISD::CMPICC, dl, VTs, Ops, 2).getValue(1); + if (SPCC == ~0U) SPCC = IntCondCCodeToICC(CC); + Opc = SPISD::BRICC; + } else { + CompareFlag = DAG.getNode(SPISD::CMPFCC, dl, MVT::Flag, LHS, RHS); + if (SPCC == ~0U) SPCC = FPCondCCodeToFCC(CC); + Opc = SPISD::BRFCC; + } + return DAG.getNode(Opc, dl, MVT::Other, Chain, Dest, + DAG.getConstant(SPCC, MVT::i32), CompareFlag); +} + +static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) { + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + ISD::CondCode CC = cast(Op.getOperand(4))->get(); + SDValue TrueVal = Op.getOperand(2); + SDValue FalseVal = Op.getOperand(3); + DebugLoc dl = Op.getDebugLoc(); + unsigned Opc, SPCC = ~0U; + + // If this is a select_cc of a "setcc", and if the setcc got lowered into + // an CMP[IF]CC/SELECT_[IF]CC pair, find the original compared values. + LookThroughSetCC(LHS, RHS, CC, SPCC); + + SDValue CompareFlag; + if (LHS.getValueType() == MVT::i32) { + std::vector VTs; + VTs.push_back(LHS.getValueType()); // subcc returns a value + VTs.push_back(MVT::Flag); + SDValue Ops[2] = { LHS, RHS }; + CompareFlag = DAG.getNode(SPISD::CMPICC, dl, VTs, Ops, 2).getValue(1); + Opc = SPISD::SELECT_ICC; + if (SPCC == ~0U) SPCC = IntCondCCodeToICC(CC); + } else { + CompareFlag = DAG.getNode(SPISD::CMPFCC, dl, MVT::Flag, LHS, RHS); + Opc = SPISD::SELECT_FCC; + if (SPCC == ~0U) SPCC = FPCondCCodeToFCC(CC); + } + return DAG.getNode(Opc, dl, TrueVal.getValueType(), TrueVal, FalseVal, + DAG.getConstant(SPCC, MVT::i32), CompareFlag); +} + +static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG, + SparcTargetLowering &TLI) { + // vastart just stores the address of the VarArgsFrameIndex slot into the + // memory location argument. + DebugLoc dl = Op.getDebugLoc(); + SDValue Offset = DAG.getNode(ISD::ADD, dl, MVT::i32, + DAG.getRegister(SP::I6, MVT::i32), + DAG.getConstant(TLI.getVarArgsFrameOffset(), + MVT::i32)); + const Value *SV = cast(Op.getOperand(2))->getValue(); + return DAG.getStore(Op.getOperand(0), dl, Offset, Op.getOperand(1), SV, 0); +} + +static SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) { + SDNode *Node = Op.getNode(); + MVT VT = Node->getValueType(0); + SDValue InChain = Node->getOperand(0); + SDValue VAListPtr = Node->getOperand(1); + const Value *SV = cast(Node->getOperand(2))->getValue(); + DebugLoc dl = Node->getDebugLoc(); + SDValue VAList = DAG.getLoad(MVT::i32, dl, InChain, VAListPtr, SV, 0); + // Increment the pointer, VAList, to the next vaarg + SDValue NextPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, VAList, + DAG.getConstant(VT.getSizeInBits()/8, + MVT::i32)); + // Store the incremented VAList to the legalized pointer + InChain = DAG.getStore(VAList.getValue(1), dl, NextPtr, + VAListPtr, SV, 0); + // Load the actual argument out of the pointer VAList, unless this is an + // f64 load. + if (VT != MVT::f64) + return DAG.getLoad(VT, dl, InChain, VAList, NULL, 0); + + // Otherwise, load it as i64, then do a bitconvert. + SDValue V = DAG.getLoad(MVT::i64, dl, InChain, VAList, NULL, 0); + + // Bit-Convert the value to f64. + SDValue Ops[2] = { + DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, V), + V.getValue(1) + }; + return DAG.getMergeValues(Ops, 2, dl); +} + +static SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) { + SDValue Chain = Op.getOperand(0); // Legalize the chain. + SDValue Size = Op.getOperand(1); // Legalize the size. + DebugLoc dl = Op.getDebugLoc(); + + unsigned SPReg = SP::O6; + SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, MVT::i32); + SDValue NewSP = DAG.getNode(ISD::SUB, dl, MVT::i32, SP, Size); // Value + Chain = DAG.getCopyToReg(SP.getValue(1), dl, SPReg, NewSP); // Output chain + + // The resultant pointer is actually 16 words from the bottom of the stack, + // to provide a register spill area. + SDValue NewVal = DAG.getNode(ISD::ADD, dl, MVT::i32, NewSP, + DAG.getConstant(96, MVT::i32)); + SDValue Ops[2] = { NewVal, Chain }; + return DAG.getMergeValues(Ops, 2, dl); +} + + +SDValue SparcTargetLowering:: +LowerOperation(SDValue Op, SelectionDAG &DAG) { + switch (Op.getOpcode()) { + default: assert(0 && "Should not custom lower this!"); + // Frame & Return address. Currently unimplemented + case ISD::RETURNADDR: return SDValue(); + case ISD::FRAMEADDR: return SDValue(); + case ISD::GlobalTLSAddress: + assert(0 && "TLS not implemented for Sparc."); + case ISD::GlobalAddress: return LowerGLOBALADDRESS(Op, DAG); + case ISD::ConstantPool: return LowerCONSTANTPOOL(Op, DAG); + case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG); + case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); + case ISD::BR_CC: return LowerBR_CC(Op, DAG); + case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); + case ISD::VASTART: return LowerVASTART(Op, DAG, *this); + case ISD::VAARG: return LowerVAARG(Op, DAG); + case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); + case ISD::CALL: return LowerCALL(Op, DAG); + case ISD::RET: return LowerRET(Op, DAG); + } +} + +MachineBasicBlock * +SparcTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *BB) const { + const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo(); + unsigned BROpcode; + unsigned CC; + DebugLoc dl = MI->getDebugLoc(); + // Figure out the conditional branch opcode to use for this select_cc. + switch (MI->getOpcode()) { + default: assert(0 && "Unknown SELECT_CC!"); + case SP::SELECT_CC_Int_ICC: + case SP::SELECT_CC_FP_ICC: + case SP::SELECT_CC_DFP_ICC: + BROpcode = SP::BCOND; + break; + case SP::SELECT_CC_Int_FCC: + case SP::SELECT_CC_FP_FCC: + case SP::SELECT_CC_DFP_FCC: + BROpcode = SP::FBCOND; + break; + } + + CC = (SPCC::CondCodes)MI->getOperand(3).getImm(); + + // To "insert" a SELECT_CC instruction, we actually have to insert the diamond + // control-flow pattern. The incoming instruction knows the destination vreg + // to set, the condition code register to branch on, the true/false values to + // select between, and a branch opcode to use. + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction::iterator It = BB; + ++It; + + // thisMBB: + // ... + // TrueVal = ... + // [f]bCC copy1MBB + // fallthrough --> copy0MBB + MachineBasicBlock *thisMBB = BB; + MachineFunction *F = BB->getParent(); + MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); + BuildMI(BB, dl, TII.get(BROpcode)).addMBB(sinkMBB).addImm(CC); + F->insert(It, copy0MBB); + F->insert(It, sinkMBB); + // Update machine-CFG edges by transferring all successors of the current + // block to the new block which will contain the Phi node for the select. + sinkMBB->transferSuccessors(BB); + // Next, add the true and fallthrough blocks as its successors. + BB->addSuccessor(copy0MBB); + BB->addSuccessor(sinkMBB); + + // copy0MBB: + // %FalseValue = ... + // # fallthrough to sinkMBB + BB = copy0MBB; + + // Update machine-CFG edges + BB->addSuccessor(sinkMBB); + + // sinkMBB: + // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] + // ... + BB = sinkMBB; + BuildMI(BB, dl, TII.get(SP::PHI), MI->getOperand(0).getReg()) + .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB) + .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB); + + F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. + return BB; +} + +//===----------------------------------------------------------------------===// +// Sparc Inline Assembly Support +//===----------------------------------------------------------------------===// + +/// getConstraintType - Given a constraint letter, return the type of +/// constraint it is for this target. +SparcTargetLowering::ConstraintType +SparcTargetLowering::getConstraintType(const std::string &Constraint) const { + if (Constraint.size() == 1) { + switch (Constraint[0]) { + default: break; + case 'r': return C_RegisterClass; + } + } + + return TargetLowering::getConstraintType(Constraint); +} + +std::pair +SparcTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, + MVT VT) const { + if (Constraint.size() == 1) { + switch (Constraint[0]) { + case 'r': + return std::make_pair(0U, SP::IntRegsRegisterClass); + } + } + + return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); +} + +std::vector SparcTargetLowering:: +getRegClassForInlineAsmConstraint(const std::string &Constraint, + MVT VT) const { + if (Constraint.size() != 1) + return std::vector(); + + switch (Constraint[0]) { + default: break; + case 'r': + return make_vector(SP::L0, SP::L1, SP::L2, SP::L3, + SP::L4, SP::L5, SP::L6, SP::L7, + SP::I0, SP::I1, SP::I2, SP::I3, + SP::I4, SP::I5, + SP::O0, SP::O1, SP::O2, SP::O3, + SP::O4, SP::O5, SP::O7, 0); + } + + return std::vector(); +} + +bool +SparcTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { + // The Sparc target isn't yet aware of offsets. + return false; +} diff --git a/lib/Target/Sparc/SparcISelLowering.h b/lib/Target/Sparc/SparcISelLowering.h new file mode 100644 index 000000000000..fe6811f8c370 --- /dev/null +++ b/lib/Target/Sparc/SparcISelLowering.h @@ -0,0 +1,79 @@ +//===-- SparcISelLowering.h - Sparc DAG Lowering Interface ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interfaces that Sparc uses to lower LLVM code into a +// selection DAG. +// +//===----------------------------------------------------------------------===// + +#ifndef SPARC_ISELLOWERING_H +#define SPARC_ISELLOWERING_H + +#include "llvm/Target/TargetLowering.h" +#include "Sparc.h" + +namespace llvm { + namespace SPISD { + enum { + FIRST_NUMBER = ISD::BUILTIN_OP_END, + CMPICC, // Compare two GPR operands, set icc. + CMPFCC, // Compare two FP operands, set fcc. + BRICC, // Branch to dest on icc condition + BRFCC, // Branch to dest on fcc condition + SELECT_ICC, // Select between two values using the current ICC flags. + SELECT_FCC, // Select between two values using the current FCC flags. + + Hi, Lo, // Hi/Lo operations, typically on a global address. + + FTOI, // FP to Int within a FP register. + ITOF, // Int to FP within a FP register. + + CALL, // A call instruction. + RET_FLAG // Return with a flag operand. + }; + } + + class SparcTargetLowering : public TargetLowering { + int VarArgsFrameOffset; // Frame offset to start of varargs area. + public: + SparcTargetLowering(TargetMachine &TM); + virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG); + + int getVarArgsFrameOffset() const { return VarArgsFrameOffset; } + + /// computeMaskedBitsForTargetNode - Determine which of the bits specified + /// in Mask are known to be either zero or one and return them in the + /// KnownZero/KnownOne bitsets. + virtual void computeMaskedBitsForTargetNode(const SDValue Op, + const APInt &Mask, + APInt &KnownZero, + APInt &KnownOne, + const SelectionDAG &DAG, + unsigned Depth = 0) const; + + virtual void LowerArguments(Function &F, SelectionDAG &DAG, + SmallVectorImpl &ArgValues, + DebugLoc dl); + virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *MBB) const; + + virtual const char *getTargetNodeName(unsigned Opcode) const; + + ConstraintType getConstraintType(const std::string &Constraint) const; + std::pair + getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const; + std::vector + getRegClassForInlineAsmConstraint(const std::string &Constraint, + MVT VT) const; + + virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; + }; +} // end namespace llvm + +#endif // SPARC_ISELLOWERING_H diff --git a/lib/Target/Sparc/SparcInstrFormats.td b/lib/Target/Sparc/SparcInstrFormats.td new file mode 100644 index 000000000000..6535259e16ff --- /dev/null +++ b/lib/Target/Sparc/SparcInstrFormats.td @@ -0,0 +1,114 @@ +//===- SparcInstrFormats.td - Sparc Instruction Formats ----*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +class InstSP pattern> : Instruction { + field bits<32> Inst; + + let Namespace = "SP"; + + bits<2> op; + let Inst{31-30} = op; // Top two bits are the 'op' field + + dag OutOperandList = outs; + dag InOperandList = ins; + let AsmString = asmstr; + let Pattern = pattern; +} + +//===----------------------------------------------------------------------===// +// Format #2 instruction classes in the Sparc +//===----------------------------------------------------------------------===// + +// Format 2 instructions +class F2 pattern> + : InstSP { + bits<3> op2; + bits<22> imm22; + let op = 0; // op = 0 + let Inst{24-22} = op2; + let Inst{21-0} = imm22; +} + +// Specific F2 classes: SparcV8 manual, page 44 +// +class F2_1 op2Val, dag outs, dag ins, string asmstr, list pattern> + : F2 { + bits<5> rd; + + let op2 = op2Val; + + let Inst{29-25} = rd; +} + +class F2_2 condVal, bits<3> op2Val, dag outs, dag ins, string asmstr, + list pattern> : F2 { + bits<4> cond; + bit annul = 0; // currently unused + + let cond = condVal; + let op2 = op2Val; + + let Inst{29} = annul; + let Inst{28-25} = cond; +} + +//===----------------------------------------------------------------------===// +// Format #3 instruction classes in the Sparc +//===----------------------------------------------------------------------===// + +class F3 pattern> + : InstSP { + bits<5> rd; + bits<6> op3; + bits<5> rs1; + let op{1} = 1; // Op = 2 or 3 + let Inst{29-25} = rd; + let Inst{24-19} = op3; + let Inst{18-14} = rs1; +} + +// Specific F3 classes: SparcV8 manual, page 44 +// +class F3_1 opVal, bits<6> op3val, dag outs, dag ins, + string asmstr, list pattern> : F3 { + bits<8> asi = 0; // asi not currently used + bits<5> rs2; + + let op = opVal; + let op3 = op3val; + + let Inst{13} = 0; // i field = 0 + let Inst{12-5} = asi; // address space identifier + let Inst{4-0} = rs2; +} + +class F3_2 opVal, bits<6> op3val, dag outs, dag ins, + string asmstr, list pattern> : F3 { + bits<13> simm13; + + let op = opVal; + let op3 = op3val; + + let Inst{13} = 1; // i field = 1 + let Inst{12-0} = simm13; +} + +// floating-point +class F3_3 opVal, bits<6> op3val, bits<9> opfval, dag outs, dag ins, + string asmstr, list pattern> : F3 { + bits<5> rs2; + + let op = opVal; + let op3 = op3val; + + let Inst{13-5} = opfval; // fp opcode + let Inst{4-0} = rs2; +} + + diff --git a/lib/Target/Sparc/SparcInstrInfo.cpp b/lib/Target/Sparc/SparcInstrInfo.cpp new file mode 100644 index 000000000000..d2f6b9bdcb3d --- /dev/null +++ b/lib/Target/Sparc/SparcInstrInfo.cpp @@ -0,0 +1,277 @@ +//===- SparcInstrInfo.cpp - Sparc Instruction Information -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Sparc implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#include "SparcInstrInfo.h" +#include "SparcSubtarget.h" +#include "Sparc.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "SparcGenInstrInfo.inc" +using namespace llvm; + +SparcInstrInfo::SparcInstrInfo(SparcSubtarget &ST) + : TargetInstrInfoImpl(SparcInsts, array_lengthof(SparcInsts)), + RI(ST, *this), Subtarget(ST) { +} + +static bool isZeroImm(const MachineOperand &op) { + return op.isImm() && op.getImm() == 0; +} + +/// Return true if the instruction is a register to register move and +/// leave the source and dest operands in the passed parameters. +/// +bool SparcInstrInfo::isMoveInstr(const MachineInstr &MI, + unsigned &SrcReg, unsigned &DstReg, + unsigned &SrcSR, unsigned &DstSR) const { + SrcSR = DstSR = 0; // No sub-registers. + + // We look for 3 kinds of patterns here: + // or with G0 or 0 + // add with G0 or 0 + // fmovs or FpMOVD (pseudo double move). + if (MI.getOpcode() == SP::ORrr || MI.getOpcode() == SP::ADDrr) { + if (MI.getOperand(1).getReg() == SP::G0) { + DstReg = MI.getOperand(0).getReg(); + SrcReg = MI.getOperand(2).getReg(); + return true; + } else if (MI.getOperand(2).getReg() == SP::G0) { + DstReg = MI.getOperand(0).getReg(); + SrcReg = MI.getOperand(1).getReg(); + return true; + } + } else if ((MI.getOpcode() == SP::ORri || MI.getOpcode() == SP::ADDri) && + isZeroImm(MI.getOperand(2)) && MI.getOperand(1).isReg()) { + DstReg = MI.getOperand(0).getReg(); + SrcReg = MI.getOperand(1).getReg(); + return true; + } else if (MI.getOpcode() == SP::FMOVS || MI.getOpcode() == SP::FpMOVD || + MI.getOpcode() == SP::FMOVD) { + SrcReg = MI.getOperand(1).getReg(); + DstReg = MI.getOperand(0).getReg(); + return true; + } + return false; +} + +/// isLoadFromStackSlot - If the specified machine instruction is a direct +/// load from a stack slot, return the virtual or physical register number of +/// the destination along with the FrameIndex of the loaded stack slot. If +/// not, return 0. This predicate must return 0 if the instruction has +/// any side effects other than loading from the stack slot. +unsigned SparcInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, + int &FrameIndex) const { + if (MI->getOpcode() == SP::LDri || + MI->getOpcode() == SP::LDFri || + MI->getOpcode() == SP::LDDFri) { + if (MI->getOperand(1).isFI() && MI->getOperand(2).isImm() && + MI->getOperand(2).getImm() == 0) { + FrameIndex = MI->getOperand(1).getIndex(); + return MI->getOperand(0).getReg(); + } + } + return 0; +} + +/// isStoreToStackSlot - If the specified machine instruction is a direct +/// store to a stack slot, return the virtual or physical register number of +/// the source reg along with the FrameIndex of the loaded stack slot. If +/// not, return 0. This predicate must return 0 if the instruction has +/// any side effects other than storing to the stack slot. +unsigned SparcInstrInfo::isStoreToStackSlot(const MachineInstr *MI, + int &FrameIndex) const { + if (MI->getOpcode() == SP::STri || + MI->getOpcode() == SP::STFri || + MI->getOpcode() == SP::STDFri) { + if (MI->getOperand(0).isFI() && MI->getOperand(1).isImm() && + MI->getOperand(1).getImm() == 0) { + FrameIndex = MI->getOperand(0).getIndex(); + return MI->getOperand(2).getReg(); + } + } + return 0; +} + +unsigned +SparcInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + const SmallVectorImpl &Cond)const{ + // FIXME this should probably take a DebugLoc argument + DebugLoc dl = DebugLoc::getUnknownLoc(); + // Can only insert uncond branches so far. + assert(Cond.empty() && !FBB && TBB && "Can only handle uncond branches!"); + BuildMI(&MBB, dl, get(SP::BA)).addMBB(TBB); + return 1; +} + +bool SparcInstrInfo::copyRegToReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned DestReg, unsigned SrcReg, + const TargetRegisterClass *DestRC, + const TargetRegisterClass *SrcRC) const { + if (DestRC != SrcRC) { + // Not yet supported! + return false; + } + + DebugLoc DL = DebugLoc::getUnknownLoc(); + if (I != MBB.end()) DL = I->getDebugLoc(); + + if (DestRC == SP::IntRegsRegisterClass) + BuildMI(MBB, I, DL, get(SP::ORrr), DestReg).addReg(SP::G0).addReg(SrcReg); + else if (DestRC == SP::FPRegsRegisterClass) + BuildMI(MBB, I, DL, get(SP::FMOVS), DestReg).addReg(SrcReg); + else if (DestRC == SP::DFPRegsRegisterClass) + BuildMI(MBB, I, DL, get(Subtarget.isV9() ? SP::FMOVD : SP::FpMOVD),DestReg) + .addReg(SrcReg); + else + // Can't copy this register + return false; + + return true; +} + +void SparcInstrInfo:: +storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + unsigned SrcReg, bool isKill, int FI, + const TargetRegisterClass *RC) const { + DebugLoc DL = DebugLoc::getUnknownLoc(); + if (I != MBB.end()) DL = I->getDebugLoc(); + + // On the order of operands here: think "[FrameIdx + 0] = SrcReg". + if (RC == SP::IntRegsRegisterClass) + BuildMI(MBB, I, DL, get(SP::STri)).addFrameIndex(FI).addImm(0) + .addReg(SrcReg, getKillRegState(isKill)); + else if (RC == SP::FPRegsRegisterClass) + BuildMI(MBB, I, DL, get(SP::STFri)).addFrameIndex(FI).addImm(0) + .addReg(SrcReg, getKillRegState(isKill)); + else if (RC == SP::DFPRegsRegisterClass) + BuildMI(MBB, I, DL, get(SP::STDFri)).addFrameIndex(FI).addImm(0) + .addReg(SrcReg, getKillRegState(isKill)); + else + assert(0 && "Can't store this register to stack slot"); +} + +void SparcInstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg, + bool isKill, + SmallVectorImpl &Addr, + const TargetRegisterClass *RC, + SmallVectorImpl &NewMIs) const { + unsigned Opc = 0; + DebugLoc DL = DebugLoc::getUnknownLoc(); + if (RC == SP::IntRegsRegisterClass) + Opc = SP::STri; + else if (RC == SP::FPRegsRegisterClass) + Opc = SP::STFri; + else if (RC == SP::DFPRegsRegisterClass) + Opc = SP::STDFri; + else + assert(0 && "Can't load this register"); + MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc)); + for (unsigned i = 0, e = Addr.size(); i != e; ++i) + MIB.addOperand(Addr[i]); + MIB.addReg(SrcReg, getKillRegState(isKill)); + NewMIs.push_back(MIB); + return; +} + +void SparcInstrInfo:: +loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + unsigned DestReg, int FI, + const TargetRegisterClass *RC) const { + DebugLoc DL = DebugLoc::getUnknownLoc(); + if (I != MBB.end()) DL = I->getDebugLoc(); + + if (RC == SP::IntRegsRegisterClass) + BuildMI(MBB, I, DL, get(SP::LDri), DestReg).addFrameIndex(FI).addImm(0); + else if (RC == SP::FPRegsRegisterClass) + BuildMI(MBB, I, DL, get(SP::LDFri), DestReg).addFrameIndex(FI).addImm(0); + else if (RC == SP::DFPRegsRegisterClass) + BuildMI(MBB, I, DL, get(SP::LDDFri), DestReg).addFrameIndex(FI).addImm(0); + else + assert(0 && "Can't load this register from stack slot"); +} + +void SparcInstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg, + SmallVectorImpl &Addr, + const TargetRegisterClass *RC, + SmallVectorImpl &NewMIs) const { + unsigned Opc = 0; + if (RC == SP::IntRegsRegisterClass) + Opc = SP::LDri; + else if (RC == SP::FPRegsRegisterClass) + Opc = SP::LDFri; + else if (RC == SP::DFPRegsRegisterClass) + Opc = SP::LDDFri; + else + assert(0 && "Can't load this register"); + DebugLoc DL = DebugLoc::getUnknownLoc(); + MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg); + for (unsigned i = 0, e = Addr.size(); i != e; ++i) + MIB.addOperand(Addr[i]); + NewMIs.push_back(MIB); + return; +} + +MachineInstr *SparcInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr* MI, + const SmallVectorImpl &Ops, + int FI) const { + if (Ops.size() != 1) return NULL; + + unsigned OpNum = Ops[0]; + bool isFloat = false; + MachineInstr *NewMI = NULL; + switch (MI->getOpcode()) { + case SP::ORrr: + if (MI->getOperand(1).isReg() && MI->getOperand(1).getReg() == SP::G0&& + MI->getOperand(0).isReg() && MI->getOperand(2).isReg()) { + if (OpNum == 0) // COPY -> STORE + NewMI = BuildMI(MF, MI->getDebugLoc(), get(SP::STri)) + .addFrameIndex(FI) + .addImm(0) + .addReg(MI->getOperand(2).getReg()); + else // COPY -> LOAD + NewMI = BuildMI(MF, MI->getDebugLoc(), get(SP::LDri), + MI->getOperand(0).getReg()) + .addFrameIndex(FI) + .addImm(0); + } + break; + case SP::FMOVS: + isFloat = true; + // FALLTHROUGH + case SP::FMOVD: + if (OpNum == 0) { // COPY -> STORE + unsigned SrcReg = MI->getOperand(1).getReg(); + bool isKill = MI->getOperand(1).isKill(); + NewMI = BuildMI(MF, MI->getDebugLoc(), + get(isFloat ? SP::STFri : SP::STDFri)) + .addFrameIndex(FI) + .addImm(0) + .addReg(SrcReg, getKillRegState(isKill)); + } else { // COPY -> LOAD + unsigned DstReg = MI->getOperand(0).getReg(); + bool isDead = MI->getOperand(0).isDead(); + NewMI = BuildMI(MF, MI->getDebugLoc(), + get(isFloat ? SP::LDFri : SP::LDDFri)) + .addReg(DstReg, RegState::Define | getDeadRegState(isDead)) + .addFrameIndex(FI) + .addImm(0); + } + break; + } + + return NewMI; +} diff --git a/lib/Target/Sparc/SparcInstrInfo.h b/lib/Target/Sparc/SparcInstrInfo.h new file mode 100644 index 000000000000..ab661b991d74 --- /dev/null +++ b/lib/Target/Sparc/SparcInstrInfo.h @@ -0,0 +1,114 @@ +//===- SparcInstrInfo.h - Sparc Instruction Information ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Sparc implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef SPARCINSTRUCTIONINFO_H +#define SPARCINSTRUCTIONINFO_H + +#include "llvm/Target/TargetInstrInfo.h" +#include "SparcRegisterInfo.h" + +namespace llvm { + +/// SPII - This namespace holds all of the target specific flags that +/// instruction info tracks. +/// +namespace SPII { + enum { + Pseudo = (1<<0), + Load = (1<<1), + Store = (1<<2), + DelaySlot = (1<<3) + }; +} + +class SparcInstrInfo : public TargetInstrInfoImpl { + const SparcRegisterInfo RI; + const SparcSubtarget& Subtarget; +public: + explicit SparcInstrInfo(SparcSubtarget &ST); + + /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As + /// such, whenever a client has an instance of instruction info, it should + /// always be able to get register info as well (through this method). + /// + virtual const SparcRegisterInfo &getRegisterInfo() const { return RI; } + + /// Return true if the instruction is a register to register move and return + /// the source and dest operands and their sub-register indices by reference. + virtual bool isMoveInstr(const MachineInstr &MI, + unsigned &SrcReg, unsigned &DstReg, + unsigned &SrcSubIdx, unsigned &DstSubIdx) const; + + /// isLoadFromStackSlot - If the specified machine instruction is a direct + /// load from a stack slot, return the virtual or physical register number of + /// the destination along with the FrameIndex of the loaded stack slot. If + /// not, return 0. This predicate must return 0 if the instruction has + /// any side effects other than loading from the stack slot. + virtual unsigned isLoadFromStackSlot(const MachineInstr *MI, + int &FrameIndex) const; + + /// isStoreToStackSlot - If the specified machine instruction is a direct + /// store to a stack slot, return the virtual or physical register number of + /// the source reg along with the FrameIndex of the loaded stack slot. If + /// not, return 0. This predicate must return 0 if the instruction has + /// any side effects other than storing to the stack slot. + virtual unsigned isStoreToStackSlot(const MachineInstr *MI, + int &FrameIndex) const; + + + virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + const SmallVectorImpl &Cond) const; + + virtual bool copyRegToReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned DestReg, unsigned SrcReg, + const TargetRegisterClass *DestRC, + const TargetRegisterClass *SrcRC) const; + + virtual void storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned SrcReg, bool isKill, int FrameIndex, + const TargetRegisterClass *RC) const; + + virtual void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill, + SmallVectorImpl &Addr, + const TargetRegisterClass *RC, + SmallVectorImpl &NewMIs) const; + + virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned DestReg, int FrameIndex, + const TargetRegisterClass *RC) const; + + virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg, + SmallVectorImpl &Addr, + const TargetRegisterClass *RC, + SmallVectorImpl &NewMIs) const; + + virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr* MI, + const SmallVectorImpl &Ops, + int FrameIndex) const; + + virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr* MI, + const SmallVectorImpl &Ops, + MachineInstr* LoadMI) const { + return 0; + } +}; + +} + +#endif diff --git a/lib/Target/Sparc/SparcInstrInfo.td b/lib/Target/Sparc/SparcInstrInfo.td new file mode 100644 index 000000000000..2d6c9209e6ae --- /dev/null +++ b/lib/Target/Sparc/SparcInstrInfo.td @@ -0,0 +1,769 @@ +//===- SparcInstrInfo.td - Target Description for Sparc Target ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the Sparc instructions in TableGen format. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Instruction format superclass +//===----------------------------------------------------------------------===// + +include "SparcInstrFormats.td" + +//===----------------------------------------------------------------------===// +// Feature predicates. +//===----------------------------------------------------------------------===// + +// HasV9 - This predicate is true when the target processor supports V9 +// instructions. Note that the machine may be running in 32-bit mode. +def HasV9 : Predicate<"Subtarget.isV9()">; + +// HasNoV9 - This predicate is true when the target doesn't have V9 +// instructions. Use of this is just a hack for the isel not having proper +// costs for V8 instructions that are more expensive than their V9 ones. +def HasNoV9 : Predicate<"!Subtarget.isV9()">; + +// HasVIS - This is true when the target processor has VIS extensions. +def HasVIS : Predicate<"Subtarget.isVIS()">; + +// UseDeprecatedInsts - This predicate is true when the target processor is a +// V8, or when it is V9 but the V8 deprecated instructions are efficient enough +// to use when appropriate. In either of these cases, the instruction selector +// will pick deprecated instructions. +def UseDeprecatedInsts : Predicate<"Subtarget.useDeprecatedV8Instructions()">; + +//===----------------------------------------------------------------------===// +// Instruction Pattern Stuff +//===----------------------------------------------------------------------===// + +def simm11 : PatLeaf<(imm), [{ + // simm11 predicate - True if the imm fits in a 11-bit sign extended field. + return (((int)N->getZExtValue() << (32-11)) >> (32-11)) == + (int)N->getZExtValue(); +}]>; + +def simm13 : PatLeaf<(imm), [{ + // simm13 predicate - True if the imm fits in a 13-bit sign extended field. + return (((int)N->getZExtValue() << (32-13)) >> (32-13)) == + (int)N->getZExtValue(); +}]>; + +def LO10 : SDNodeXFormgetTargetConstant((unsigned)N->getZExtValue() & 1023, + MVT::i32); +}]>; + +def HI22 : SDNodeXFormgetTargetConstant((unsigned)N->getZExtValue() >> 10, MVT::i32); +}]>; + +def SETHIimm : PatLeaf<(imm), [{ + return (((unsigned)N->getZExtValue() >> 10) << 10) == + (unsigned)N->getZExtValue(); +}], HI22>; + +// Addressing modes. +def ADDRrr : ComplexPattern; +def ADDRri : ComplexPattern; + +// Address operands +def MEMrr : Operand { + let PrintMethod = "printMemOperand"; + let MIOperandInfo = (ops IntRegs, IntRegs); +} +def MEMri : Operand { + let PrintMethod = "printMemOperand"; + let MIOperandInfo = (ops IntRegs, i32imm); +} + +// Branch targets have OtherVT type. +def brtarget : Operand; +def calltarget : Operand; + +// Operand for printing out a condition code. +let PrintMethod = "printCCOperand" in + def CCOp : Operand; + +def SDTSPcmpfcc : +SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisSameAs<0, 1>]>; +def SDTSPbrcc : +SDTypeProfile<0, 2, [SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>]>; +def SDTSPselectcc : +SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisVT<3, i32>]>; +def SDTSPFTOI : +SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisFP<1>]>; +def SDTSPITOF : +SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVT<1, f32>]>; + +def SPcmpicc : SDNode<"SPISD::CMPICC", SDTIntBinOp, [SDNPOutFlag]>; +def SPcmpfcc : SDNode<"SPISD::CMPFCC", SDTSPcmpfcc, [SDNPOutFlag]>; +def SPbricc : SDNode<"SPISD::BRICC", SDTSPbrcc, [SDNPHasChain, SDNPInFlag]>; +def SPbrfcc : SDNode<"SPISD::BRFCC", SDTSPbrcc, [SDNPHasChain, SDNPInFlag]>; + +def SPhi : SDNode<"SPISD::Hi", SDTIntUnaryOp>; +def SPlo : SDNode<"SPISD::Lo", SDTIntUnaryOp>; + +def SPftoi : SDNode<"SPISD::FTOI", SDTSPFTOI>; +def SPitof : SDNode<"SPISD::ITOF", SDTSPITOF>; + +def SPselecticc : SDNode<"SPISD::SELECT_ICC", SDTSPselectcc, [SDNPInFlag]>; +def SPselectfcc : SDNode<"SPISD::SELECT_FCC", SDTSPselectcc, [SDNPInFlag]>; + +// These are target-independent nodes, but have target-specific formats. +def SDT_SPCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>; +def SDT_SPCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>, + SDTCisVT<1, i32> ]>; + +def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_SPCallSeqStart, + [SDNPHasChain, SDNPOutFlag]>; +def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_SPCallSeqEnd, + [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; + +def SDT_SPCall : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>; +def call : SDNode<"SPISD::CALL", SDT_SPCall, + [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; + +def retflag : SDNode<"SPISD::RET_FLAG", SDTNone, + [SDNPHasChain, SDNPOptInFlag]>; + +//===----------------------------------------------------------------------===// +// SPARC Flag Conditions +//===----------------------------------------------------------------------===// + +// Note that these values must be kept in sync with the CCOp::CondCode enum +// values. +class ICC_VAL : PatLeaf<(i32 N)>; +def ICC_NE : ICC_VAL< 9>; // Not Equal +def ICC_E : ICC_VAL< 1>; // Equal +def ICC_G : ICC_VAL<10>; // Greater +def ICC_LE : ICC_VAL< 2>; // Less or Equal +def ICC_GE : ICC_VAL<11>; // Greater or Equal +def ICC_L : ICC_VAL< 3>; // Less +def ICC_GU : ICC_VAL<12>; // Greater Unsigned +def ICC_LEU : ICC_VAL< 4>; // Less or Equal Unsigned +def ICC_CC : ICC_VAL<13>; // Carry Clear/Great or Equal Unsigned +def ICC_CS : ICC_VAL< 5>; // Carry Set/Less Unsigned +def ICC_POS : ICC_VAL<14>; // Positive +def ICC_NEG : ICC_VAL< 6>; // Negative +def ICC_VC : ICC_VAL<15>; // Overflow Clear +def ICC_VS : ICC_VAL< 7>; // Overflow Set + +class FCC_VAL : PatLeaf<(i32 N)>; +def FCC_U : FCC_VAL<23>; // Unordered +def FCC_G : FCC_VAL<22>; // Greater +def FCC_UG : FCC_VAL<21>; // Unordered or Greater +def FCC_L : FCC_VAL<20>; // Less +def FCC_UL : FCC_VAL<19>; // Unordered or Less +def FCC_LG : FCC_VAL<18>; // Less or Greater +def FCC_NE : FCC_VAL<17>; // Not Equal +def FCC_E : FCC_VAL<25>; // Equal +def FCC_UE : FCC_VAL<24>; // Unordered or Equal +def FCC_GE : FCC_VAL<25>; // Greater or Equal +def FCC_UGE : FCC_VAL<26>; // Unordered or Greater or Equal +def FCC_LE : FCC_VAL<27>; // Less or Equal +def FCC_ULE : FCC_VAL<28>; // Unordered or Less or Equal +def FCC_O : FCC_VAL<29>; // Ordered + +//===----------------------------------------------------------------------===// +// Instruction Class Templates +//===----------------------------------------------------------------------===// + +/// F3_12 multiclass - Define a normal F3_1/F3_2 pattern in one shot. +multiclass F3_12 Op3Val, SDNode OpNode> { + def rr : F3_1<2, Op3Val, + (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c), + !strconcat(OpcStr, " $b, $c, $dst"), + [(set IntRegs:$dst, (OpNode IntRegs:$b, IntRegs:$c))]>; + def ri : F3_2<2, Op3Val, + (outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c), + !strconcat(OpcStr, " $b, $c, $dst"), + [(set IntRegs:$dst, (OpNode IntRegs:$b, simm13:$c))]>; +} + +/// F3_12np multiclass - Define a normal F3_1/F3_2 pattern in one shot, with no +/// pattern. +multiclass F3_12np Op3Val> { + def rr : F3_1<2, Op3Val, + (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c), + !strconcat(OpcStr, " $b, $c, $dst"), []>; + def ri : F3_2<2, Op3Val, + (outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c), + !strconcat(OpcStr, " $b, $c, $dst"), []>; +} + +//===----------------------------------------------------------------------===// +// Instructions +//===----------------------------------------------------------------------===// + +// Pseudo instructions. +class Pseudo pattern> + : InstSP; + +let Defs = [O6], Uses = [O6] in { +def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt), + "!ADJCALLSTACKDOWN $amt", + [(callseq_start timm:$amt)]>; +def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), + "!ADJCALLSTACKUP $amt1", + [(callseq_end timm:$amt1, timm:$amt2)]>; +} + +// FpMOVD/FpNEGD/FpABSD - These are lowered to single-precision ops by the +// fpmover pass. +let Predicates = [HasNoV9] in { // Only emit these in V8 mode. + def FpMOVD : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$src), + "!FpMOVD $src, $dst", []>; + def FpNEGD : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$src), + "!FpNEGD $src, $dst", + [(set DFPRegs:$dst, (fneg DFPRegs:$src))]>; + def FpABSD : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$src), + "!FpABSD $src, $dst", + [(set DFPRegs:$dst, (fabs DFPRegs:$src))]>; +} + +// SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded by the +// scheduler into a branch sequence. This has to handle all permutations of +// selection between i32/f32/f64 on ICC and FCC. +let usesCustomDAGSchedInserter = 1 in { // Expanded by the scheduler. + def SELECT_CC_Int_ICC + : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, i32imm:$Cond), + "; SELECT_CC_Int_ICC PSEUDO!", + [(set IntRegs:$dst, (SPselecticc IntRegs:$T, IntRegs:$F, + imm:$Cond))]>; + def SELECT_CC_Int_FCC + : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, i32imm:$Cond), + "; SELECT_CC_Int_FCC PSEUDO!", + [(set IntRegs:$dst, (SPselectfcc IntRegs:$T, IntRegs:$F, + imm:$Cond))]>; + def SELECT_CC_FP_ICC + : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, i32imm:$Cond), + "; SELECT_CC_FP_ICC PSEUDO!", + [(set FPRegs:$dst, (SPselecticc FPRegs:$T, FPRegs:$F, + imm:$Cond))]>; + def SELECT_CC_FP_FCC + : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, i32imm:$Cond), + "; SELECT_CC_FP_FCC PSEUDO!", + [(set FPRegs:$dst, (SPselectfcc FPRegs:$T, FPRegs:$F, + imm:$Cond))]>; + def SELECT_CC_DFP_ICC + : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, i32imm:$Cond), + "; SELECT_CC_DFP_ICC PSEUDO!", + [(set DFPRegs:$dst, (SPselecticc DFPRegs:$T, DFPRegs:$F, + imm:$Cond))]>; + def SELECT_CC_DFP_FCC + : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, i32imm:$Cond), + "; SELECT_CC_DFP_FCC PSEUDO!", + [(set DFPRegs:$dst, (SPselectfcc DFPRegs:$T, DFPRegs:$F, + imm:$Cond))]>; +} + + +// Section A.3 - Synthetic Instructions, p. 85 +// special cases of JMPL: +let isReturn = 1, isTerminator = 1, hasDelaySlot = 1 in { + let rd = O7.Num, rs1 = G0.Num, simm13 = 8 in + def RETL: F3_2<2, 0b111000, (outs), (ins), "retl", [(retflag)]>; +} + +// Section B.1 - Load Integer Instructions, p. 90 +def LDSBrr : F3_1<3, 0b001001, + (outs IntRegs:$dst), (ins MEMrr:$addr), + "ldsb [$addr], $dst", + [(set IntRegs:$dst, (sextloadi8 ADDRrr:$addr))]>; +def LDSBri : F3_2<3, 0b001001, + (outs IntRegs:$dst), (ins MEMri:$addr), + "ldsb [$addr], $dst", + [(set IntRegs:$dst, (sextloadi8 ADDRri:$addr))]>; +def LDSHrr : F3_1<3, 0b001010, + (outs IntRegs:$dst), (ins MEMrr:$addr), + "ldsh [$addr], $dst", + [(set IntRegs:$dst, (sextloadi16 ADDRrr:$addr))]>; +def LDSHri : F3_2<3, 0b001010, + (outs IntRegs:$dst), (ins MEMri:$addr), + "ldsh [$addr], $dst", + [(set IntRegs:$dst, (sextloadi16 ADDRri:$addr))]>; +def LDUBrr : F3_1<3, 0b000001, + (outs IntRegs:$dst), (ins MEMrr:$addr), + "ldub [$addr], $dst", + [(set IntRegs:$dst, (zextloadi8 ADDRrr:$addr))]>; +def LDUBri : F3_2<3, 0b000001, + (outs IntRegs:$dst), (ins MEMri:$addr), + "ldub [$addr], $dst", + [(set IntRegs:$dst, (zextloadi8 ADDRri:$addr))]>; +def LDUHrr : F3_1<3, 0b000010, + (outs IntRegs:$dst), (ins MEMrr:$addr), + "lduh [$addr], $dst", + [(set IntRegs:$dst, (zextloadi16 ADDRrr:$addr))]>; +def LDUHri : F3_2<3, 0b000010, + (outs IntRegs:$dst), (ins MEMri:$addr), + "lduh [$addr], $dst", + [(set IntRegs:$dst, (zextloadi16 ADDRri:$addr))]>; +def LDrr : F3_1<3, 0b000000, + (outs IntRegs:$dst), (ins MEMrr:$addr), + "ld [$addr], $dst", + [(set IntRegs:$dst, (load ADDRrr:$addr))]>; +def LDri : F3_2<3, 0b000000, + (outs IntRegs:$dst), (ins MEMri:$addr), + "ld [$addr], $dst", + [(set IntRegs:$dst, (load ADDRri:$addr))]>; + +// Section B.2 - Load Floating-point Instructions, p. 92 +def LDFrr : F3_1<3, 0b100000, + (outs FPRegs:$dst), (ins MEMrr:$addr), + "ld [$addr], $dst", + [(set FPRegs:$dst, (load ADDRrr:$addr))]>; +def LDFri : F3_2<3, 0b100000, + (outs FPRegs:$dst), (ins MEMri:$addr), + "ld [$addr], $dst", + [(set FPRegs:$dst, (load ADDRri:$addr))]>; +def LDDFrr : F3_1<3, 0b100011, + (outs DFPRegs:$dst), (ins MEMrr:$addr), + "ldd [$addr], $dst", + [(set DFPRegs:$dst, (load ADDRrr:$addr))]>; +def LDDFri : F3_2<3, 0b100011, + (outs DFPRegs:$dst), (ins MEMri:$addr), + "ldd [$addr], $dst", + [(set DFPRegs:$dst, (load ADDRri:$addr))]>; + +// Section B.4 - Store Integer Instructions, p. 95 +def STBrr : F3_1<3, 0b000101, + (outs), (ins MEMrr:$addr, IntRegs:$src), + "stb $src, [$addr]", + [(truncstorei8 IntRegs:$src, ADDRrr:$addr)]>; +def STBri : F3_2<3, 0b000101, + (outs), (ins MEMri:$addr, IntRegs:$src), + "stb $src, [$addr]", + [(truncstorei8 IntRegs:$src, ADDRri:$addr)]>; +def STHrr : F3_1<3, 0b000110, + (outs), (ins MEMrr:$addr, IntRegs:$src), + "sth $src, [$addr]", + [(truncstorei16 IntRegs:$src, ADDRrr:$addr)]>; +def STHri : F3_2<3, 0b000110, + (outs), (ins MEMri:$addr, IntRegs:$src), + "sth $src, [$addr]", + [(truncstorei16 IntRegs:$src, ADDRri:$addr)]>; +def STrr : F3_1<3, 0b000100, + (outs), (ins MEMrr:$addr, IntRegs:$src), + "st $src, [$addr]", + [(store IntRegs:$src, ADDRrr:$addr)]>; +def STri : F3_2<3, 0b000100, + (outs), (ins MEMri:$addr, IntRegs:$src), + "st $src, [$addr]", + [(store IntRegs:$src, ADDRri:$addr)]>; + +// Section B.5 - Store Floating-point Instructions, p. 97 +def STFrr : F3_1<3, 0b100100, + (outs), (ins MEMrr:$addr, FPRegs:$src), + "st $src, [$addr]", + [(store FPRegs:$src, ADDRrr:$addr)]>; +def STFri : F3_2<3, 0b100100, + (outs), (ins MEMri:$addr, FPRegs:$src), + "st $src, [$addr]", + [(store FPRegs:$src, ADDRri:$addr)]>; +def STDFrr : F3_1<3, 0b100111, + (outs), (ins MEMrr:$addr, DFPRegs:$src), + "std $src, [$addr]", + [(store DFPRegs:$src, ADDRrr:$addr)]>; +def STDFri : F3_2<3, 0b100111, + (outs), (ins MEMri:$addr, DFPRegs:$src), + "std $src, [$addr]", + [(store DFPRegs:$src, ADDRri:$addr)]>; + +// Section B.9 - SETHI Instruction, p. 104 +def SETHIi: F2_1<0b100, + (outs IntRegs:$dst), (ins i32imm:$src), + "sethi $src, $dst", + [(set IntRegs:$dst, SETHIimm:$src)]>; + +// Section B.10 - NOP Instruction, p. 105 +// (It's a special case of SETHI) +let rd = 0, imm22 = 0 in + def NOP : F2_1<0b100, (outs), (ins), "nop", []>; + +// Section B.11 - Logical Instructions, p. 106 +defm AND : F3_12<"and", 0b000001, and>; + +def ANDNrr : F3_1<2, 0b000101, + (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c), + "andn $b, $c, $dst", + [(set IntRegs:$dst, (and IntRegs:$b, (not IntRegs:$c)))]>; +def ANDNri : F3_2<2, 0b000101, + (outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c), + "andn $b, $c, $dst", []>; + +defm OR : F3_12<"or", 0b000010, or>; + +def ORNrr : F3_1<2, 0b000110, + (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c), + "orn $b, $c, $dst", + [(set IntRegs:$dst, (or IntRegs:$b, (not IntRegs:$c)))]>; +def ORNri : F3_2<2, 0b000110, + (outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c), + "orn $b, $c, $dst", []>; +defm XOR : F3_12<"xor", 0b000011, xor>; + +def XNORrr : F3_1<2, 0b000111, + (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c), + "xnor $b, $c, $dst", + [(set IntRegs:$dst, (not (xor IntRegs:$b, IntRegs:$c)))]>; +def XNORri : F3_2<2, 0b000111, + (outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c), + "xnor $b, $c, $dst", []>; + +// Section B.12 - Shift Instructions, p. 107 +defm SLL : F3_12<"sll", 0b100101, shl>; +defm SRL : F3_12<"srl", 0b100110, srl>; +defm SRA : F3_12<"sra", 0b100111, sra>; + +// Section B.13 - Add Instructions, p. 108 +defm ADD : F3_12<"add", 0b000000, add>; + +// "LEA" forms of add (patterns to make tblgen happy) +def LEA_ADDri : F3_2<2, 0b000000, + (outs IntRegs:$dst), (ins MEMri:$addr), + "add ${addr:arith}, $dst", + [(set IntRegs:$dst, ADDRri:$addr)]>; + +defm ADDCC : F3_12<"addcc", 0b010000, addc>; +defm ADDX : F3_12<"addx", 0b001000, adde>; + +// Section B.15 - Subtract Instructions, p. 110 +defm SUB : F3_12 <"sub" , 0b000100, sub>; +defm SUBX : F3_12 <"subx" , 0b001100, sube>; +defm SUBCC : F3_12 <"subcc", 0b010100, SPcmpicc>; + +def SUBXCCrr: F3_1<2, 0b011100, + (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c), + "subxcc $b, $c, $dst", []>; + +// Section B.18 - Multiply Instructions, p. 113 +defm UMUL : F3_12np<"umul", 0b001010>; +defm SMUL : F3_12 <"smul", 0b001011, mul>; + + +// Section B.19 - Divide Instructions, p. 115 +defm UDIV : F3_12np<"udiv", 0b001110>; +defm SDIV : F3_12np<"sdiv", 0b001111>; + +// Section B.20 - SAVE and RESTORE, p. 117 +defm SAVE : F3_12np<"save" , 0b111100>; +defm RESTORE : F3_12np<"restore", 0b111101>; + +// Section B.21 - Branch on Integer Condition Codes Instructions, p. 119 + +// conditional branch class: +class BranchSP cc, dag ins, string asmstr, list pattern> + : F2_2 { + let isBranch = 1; + let isTerminator = 1; + let hasDelaySlot = 1; +} + +let isBarrier = 1 in + def BA : BranchSP<0b1000, (ins brtarget:$dst), + "ba $dst", + [(br bb:$dst)]>; + +// FIXME: the encoding for the JIT should look at the condition field. +def BCOND : BranchSP<0, (ins brtarget:$dst, CCOp:$cc), + "b$cc $dst", + [(SPbricc bb:$dst, imm:$cc)]>; + + +// Section B.22 - Branch on Floating-point Condition Codes Instructions, p. 121 + +// floating-point conditional branch class: +class FPBranchSP cc, dag ins, string asmstr, list pattern> + : F2_2 { + let isBranch = 1; + let isTerminator = 1; + let hasDelaySlot = 1; +} + +// FIXME: the encoding for the JIT should look at the condition field. +def FBCOND : FPBranchSP<0, (ins brtarget:$dst, CCOp:$cc), + "fb$cc $dst", + [(SPbrfcc bb:$dst, imm:$cc)]>; + + +// Section B.24 - Call and Link Instruction, p. 125 +// This is the only Format 1 instruction +let Uses = [O0, O1, O2, O3, O4, O5], + hasDelaySlot = 1, isCall = 1, + Defs = [O0, O1, O2, O3, O4, O5, O7, G1, G2, G3, G4, G5, G6, G7, + D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15] in { + def CALL : InstSP<(outs), (ins calltarget:$dst), + "call $dst", []> { + bits<30> disp; + let op = 1; + let Inst{29-0} = disp; + } + + // indirect calls + def JMPLrr : F3_1<2, 0b111000, + (outs), (ins MEMrr:$ptr), + "call $ptr", + [(call ADDRrr:$ptr)]>; + def JMPLri : F3_2<2, 0b111000, + (outs), (ins MEMri:$ptr), + "call $ptr", + [(call ADDRri:$ptr)]>; +} + +// Section B.28 - Read State Register Instructions +def RDY : F3_1<2, 0b101000, + (outs IntRegs:$dst), (ins), + "rd %y, $dst", []>; + +// Section B.29 - Write State Register Instructions +def WRYrr : F3_1<2, 0b110000, + (outs), (ins IntRegs:$b, IntRegs:$c), + "wr $b, $c, %y", []>; +def WRYri : F3_2<2, 0b110000, + (outs), (ins IntRegs:$b, i32imm:$c), + "wr $b, $c, %y", []>; + +// Convert Integer to Floating-point Instructions, p. 141 +def FITOS : F3_3<2, 0b110100, 0b011000100, + (outs FPRegs:$dst), (ins FPRegs:$src), + "fitos $src, $dst", + [(set FPRegs:$dst, (SPitof FPRegs:$src))]>; +def FITOD : F3_3<2, 0b110100, 0b011001000, + (outs DFPRegs:$dst), (ins FPRegs:$src), + "fitod $src, $dst", + [(set DFPRegs:$dst, (SPitof FPRegs:$src))]>; + +// Convert Floating-point to Integer Instructions, p. 142 +def FSTOI : F3_3<2, 0b110100, 0b011010001, + (outs FPRegs:$dst), (ins FPRegs:$src), + "fstoi $src, $dst", + [(set FPRegs:$dst, (SPftoi FPRegs:$src))]>; +def FDTOI : F3_3<2, 0b110100, 0b011010010, + (outs FPRegs:$dst), (ins DFPRegs:$src), + "fdtoi $src, $dst", + [(set FPRegs:$dst, (SPftoi DFPRegs:$src))]>; + +// Convert between Floating-point Formats Instructions, p. 143 +def FSTOD : F3_3<2, 0b110100, 0b011001001, + (outs DFPRegs:$dst), (ins FPRegs:$src), + "fstod $src, $dst", + [(set DFPRegs:$dst, (fextend FPRegs:$src))]>; +def FDTOS : F3_3<2, 0b110100, 0b011000110, + (outs FPRegs:$dst), (ins DFPRegs:$src), + "fdtos $src, $dst", + [(set FPRegs:$dst, (fround DFPRegs:$src))]>; + +// Floating-point Move Instructions, p. 144 +def FMOVS : F3_3<2, 0b110100, 0b000000001, + (outs FPRegs:$dst), (ins FPRegs:$src), + "fmovs $src, $dst", []>; +def FNEGS : F3_3<2, 0b110100, 0b000000101, + (outs FPRegs:$dst), (ins FPRegs:$src), + "fnegs $src, $dst", + [(set FPRegs:$dst, (fneg FPRegs:$src))]>; +def FABSS : F3_3<2, 0b110100, 0b000001001, + (outs FPRegs:$dst), (ins FPRegs:$src), + "fabss $src, $dst", + [(set FPRegs:$dst, (fabs FPRegs:$src))]>; + + +// Floating-point Square Root Instructions, p.145 +def FSQRTS : F3_3<2, 0b110100, 0b000101001, + (outs FPRegs:$dst), (ins FPRegs:$src), + "fsqrts $src, $dst", + [(set FPRegs:$dst, (fsqrt FPRegs:$src))]>; +def FSQRTD : F3_3<2, 0b110100, 0b000101010, + (outs DFPRegs:$dst), (ins DFPRegs:$src), + "fsqrtd $src, $dst", + [(set DFPRegs:$dst, (fsqrt DFPRegs:$src))]>; + + + +// Floating-point Add and Subtract Instructions, p. 146 +def FADDS : F3_3<2, 0b110100, 0b001000001, + (outs FPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2), + "fadds $src1, $src2, $dst", + [(set FPRegs:$dst, (fadd FPRegs:$src1, FPRegs:$src2))]>; +def FADDD : F3_3<2, 0b110100, 0b001000010, + (outs DFPRegs:$dst), (ins DFPRegs:$src1, DFPRegs:$src2), + "faddd $src1, $src2, $dst", + [(set DFPRegs:$dst, (fadd DFPRegs:$src1, DFPRegs:$src2))]>; +def FSUBS : F3_3<2, 0b110100, 0b001000101, + (outs FPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2), + "fsubs $src1, $src2, $dst", + [(set FPRegs:$dst, (fsub FPRegs:$src1, FPRegs:$src2))]>; +def FSUBD : F3_3<2, 0b110100, 0b001000110, + (outs DFPRegs:$dst), (ins DFPRegs:$src1, DFPRegs:$src2), + "fsubd $src1, $src2, $dst", + [(set DFPRegs:$dst, (fsub DFPRegs:$src1, DFPRegs:$src2))]>; + +// Floating-point Multiply and Divide Instructions, p. 147 +def FMULS : F3_3<2, 0b110100, 0b001001001, + (outs FPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2), + "fmuls $src1, $src2, $dst", + [(set FPRegs:$dst, (fmul FPRegs:$src1, FPRegs:$src2))]>; +def FMULD : F3_3<2, 0b110100, 0b001001010, + (outs DFPRegs:$dst), (ins DFPRegs:$src1, DFPRegs:$src2), + "fmuld $src1, $src2, $dst", + [(set DFPRegs:$dst, (fmul DFPRegs:$src1, DFPRegs:$src2))]>; +def FSMULD : F3_3<2, 0b110100, 0b001101001, + (outs DFPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2), + "fsmuld $src1, $src2, $dst", + [(set DFPRegs:$dst, (fmul (fextend FPRegs:$src1), + (fextend FPRegs:$src2)))]>; +def FDIVS : F3_3<2, 0b110100, 0b001001101, + (outs FPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2), + "fdivs $src1, $src2, $dst", + [(set FPRegs:$dst, (fdiv FPRegs:$src1, FPRegs:$src2))]>; +def FDIVD : F3_3<2, 0b110100, 0b001001110, + (outs DFPRegs:$dst), (ins DFPRegs:$src1, DFPRegs:$src2), + "fdivd $src1, $src2, $dst", + [(set DFPRegs:$dst, (fdiv DFPRegs:$src1, DFPRegs:$src2))]>; + +// Floating-point Compare Instructions, p. 148 +// Note: the 2nd template arg is different for these guys. +// Note 2: the result of a FCMP is not available until the 2nd cycle +// after the instr is retired, but there is no interlock. This behavior +// is modelled with a forced noop after the instruction. +def FCMPS : F3_3<2, 0b110101, 0b001010001, + (outs), (ins FPRegs:$src1, FPRegs:$src2), + "fcmps $src1, $src2\n\tnop", + [(SPcmpfcc FPRegs:$src1, FPRegs:$src2)]>; +def FCMPD : F3_3<2, 0b110101, 0b001010010, + (outs), (ins DFPRegs:$src1, DFPRegs:$src2), + "fcmpd $src1, $src2\n\tnop", + [(SPcmpfcc DFPRegs:$src1, DFPRegs:$src2)]>; + + +//===----------------------------------------------------------------------===// +// V9 Instructions +//===----------------------------------------------------------------------===// + +// V9 Conditional Moves. +let Predicates = [HasV9], isTwoAddress = 1 in { + // Move Integer Register on Condition (MOVcc) p. 194 of the V9 manual. + // FIXME: Add instruction encodings for the JIT some day. + def MOVICCrr + : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, CCOp:$cc), + "mov$cc %icc, $F, $dst", + [(set IntRegs:$dst, + (SPselecticc IntRegs:$F, IntRegs:$T, imm:$cc))]>; + def MOVICCri + : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, i32imm:$F, CCOp:$cc), + "mov$cc %icc, $F, $dst", + [(set IntRegs:$dst, + (SPselecticc simm11:$F, IntRegs:$T, imm:$cc))]>; + + def MOVFCCrr + : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, CCOp:$cc), + "mov$cc %fcc0, $F, $dst", + [(set IntRegs:$dst, + (SPselectfcc IntRegs:$F, IntRegs:$T, imm:$cc))]>; + def MOVFCCri + : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, i32imm:$F, CCOp:$cc), + "mov$cc %fcc0, $F, $dst", + [(set IntRegs:$dst, + (SPselectfcc simm11:$F, IntRegs:$T, imm:$cc))]>; + + def FMOVS_ICC + : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, CCOp:$cc), + "fmovs$cc %icc, $F, $dst", + [(set FPRegs:$dst, + (SPselecticc FPRegs:$F, FPRegs:$T, imm:$cc))]>; + def FMOVD_ICC + : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, CCOp:$cc), + "fmovd$cc %icc, $F, $dst", + [(set DFPRegs:$dst, + (SPselecticc DFPRegs:$F, DFPRegs:$T, imm:$cc))]>; + def FMOVS_FCC + : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, CCOp:$cc), + "fmovs$cc %fcc0, $F, $dst", + [(set FPRegs:$dst, + (SPselectfcc FPRegs:$F, FPRegs:$T, imm:$cc))]>; + def FMOVD_FCC + : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, CCOp:$cc), + "fmovd$cc %fcc0, $F, $dst", + [(set DFPRegs:$dst, + (SPselectfcc DFPRegs:$F, DFPRegs:$T, imm:$cc))]>; + +} + +// Floating-Point Move Instructions, p. 164 of the V9 manual. +let Predicates = [HasV9] in { + def FMOVD : F3_3<2, 0b110100, 0b000000010, + (outs DFPRegs:$dst), (ins DFPRegs:$src), + "fmovd $src, $dst", []>; + def FNEGD : F3_3<2, 0b110100, 0b000000110, + (outs DFPRegs:$dst), (ins DFPRegs:$src), + "fnegd $src, $dst", + [(set DFPRegs:$dst, (fneg DFPRegs:$src))]>; + def FABSD : F3_3<2, 0b110100, 0b000001010, + (outs DFPRegs:$dst), (ins DFPRegs:$src), + "fabsd $src, $dst", + [(set DFPRegs:$dst, (fabs DFPRegs:$src))]>; +} + +// POPCrr - This does a ctpop of a 64-bit register. As such, we have to clear +// the top 32-bits before using it. To do this clearing, we use a SLLri X,0. +def POPCrr : F3_1<2, 0b101110, + (outs IntRegs:$dst), (ins IntRegs:$src), + "popc $src, $dst", []>, Requires<[HasV9]>; +def : Pat<(ctpop IntRegs:$src), + (POPCrr (SLLri IntRegs:$src, 0))>; + +//===----------------------------------------------------------------------===// +// Non-Instruction Patterns +//===----------------------------------------------------------------------===// + +// Small immediates. +def : Pat<(i32 simm13:$val), + (ORri G0, imm:$val)>; +// Arbitrary immediates. +def : Pat<(i32 imm:$val), + (ORri (SETHIi (HI22 imm:$val)), (LO10 imm:$val))>; + +// subc +def : Pat<(subc IntRegs:$b, IntRegs:$c), + (SUBCCrr IntRegs:$b, IntRegs:$c)>; +def : Pat<(subc IntRegs:$b, simm13:$val), + (SUBCCri IntRegs:$b, imm:$val)>; + +// Global addresses, constant pool entries +def : Pat<(SPhi tglobaladdr:$in), (SETHIi tglobaladdr:$in)>; +def : Pat<(SPlo tglobaladdr:$in), (ORri G0, tglobaladdr:$in)>; +def : Pat<(SPhi tconstpool:$in), (SETHIi tconstpool:$in)>; +def : Pat<(SPlo tconstpool:$in), (ORri G0, tconstpool:$in)>; + +// Add reg, lo. This is used when taking the addr of a global/constpool entry. +def : Pat<(add IntRegs:$r, (SPlo tglobaladdr:$in)), + (ADDri IntRegs:$r, tglobaladdr:$in)>; +def : Pat<(add IntRegs:$r, (SPlo tconstpool:$in)), + (ADDri IntRegs:$r, tconstpool:$in)>; + +// Calls: +def : Pat<(call tglobaladdr:$dst), + (CALL tglobaladdr:$dst)>; +def : Pat<(call texternalsym:$dst), + (CALL texternalsym:$dst)>; + +def : Pat<(ret), (RETL)>; + +// Map integer extload's to zextloads. +def : Pat<(i32 (extloadi1 ADDRrr:$src)), (LDUBrr ADDRrr:$src)>; +def : Pat<(i32 (extloadi1 ADDRri:$src)), (LDUBri ADDRri:$src)>; +def : Pat<(i32 (extloadi8 ADDRrr:$src)), (LDUBrr ADDRrr:$src)>; +def : Pat<(i32 (extloadi8 ADDRri:$src)), (LDUBri ADDRri:$src)>; +def : Pat<(i32 (extloadi16 ADDRrr:$src)), (LDUHrr ADDRrr:$src)>; +def : Pat<(i32 (extloadi16 ADDRri:$src)), (LDUHri ADDRri:$src)>; + +// zextload bool -> zextload byte +def : Pat<(i32 (zextloadi1 ADDRrr:$src)), (LDUBrr ADDRrr:$src)>; +def : Pat<(i32 (zextloadi1 ADDRri:$src)), (LDUBri ADDRri:$src)>; diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp new file mode 100644 index 000000000000..59efb19ab9c5 --- /dev/null +++ b/lib/Target/Sparc/SparcRegisterInfo.cpp @@ -0,0 +1,196 @@ +//===- SparcRegisterInfo.cpp - SPARC Register Information -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the SPARC implementation of the TargetRegisterInfo class. +// +//===----------------------------------------------------------------------===// + +#include "Sparc.h" +#include "SparcRegisterInfo.h" +#include "SparcSubtarget.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineLocation.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Type.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/STLExtras.h" +using namespace llvm; + +SparcRegisterInfo::SparcRegisterInfo(SparcSubtarget &st, + const TargetInstrInfo &tii) + : SparcGenRegisterInfo(SP::ADJCALLSTACKDOWN, SP::ADJCALLSTACKUP), + Subtarget(st), TII(tii) { +} + +const unsigned* SparcRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) + const { + static const unsigned CalleeSavedRegs[] = { 0 }; + return CalleeSavedRegs; +} + +BitVector SparcRegisterInfo::getReservedRegs(const MachineFunction &MF) const { + BitVector Reserved(getNumRegs()); + Reserved.set(SP::G2); + Reserved.set(SP::G3); + Reserved.set(SP::G4); + Reserved.set(SP::O6); + Reserved.set(SP::I6); + Reserved.set(SP::I7); + Reserved.set(SP::G0); + Reserved.set(SP::G5); + Reserved.set(SP::G6); + Reserved.set(SP::G7); + return Reserved; +} + + +const TargetRegisterClass* const* +SparcRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { + static const TargetRegisterClass * const CalleeSavedRegClasses[] = { 0 }; + return CalleeSavedRegClasses; +} + +bool SparcRegisterInfo::hasFP(const MachineFunction &MF) const { + return false; +} + +void SparcRegisterInfo:: +eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + MachineInstr &MI = *I; + DebugLoc dl = MI.getDebugLoc(); + int Size = MI.getOperand(0).getImm(); + if (MI.getOpcode() == SP::ADJCALLSTACKDOWN) + Size = -Size; + if (Size) + BuildMI(MBB, I, dl, TII.get(SP::ADDri), SP::O6).addReg(SP::O6).addImm(Size); + MBB.erase(I); +} + +void SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, RegScavenger *RS) const { + assert(SPAdj == 0 && "Unexpected"); + + unsigned i = 0; + MachineInstr &MI = *II; + DebugLoc dl = MI.getDebugLoc(); + while (!MI.getOperand(i).isFI()) { + ++i; + assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!"); + } + + int FrameIndex = MI.getOperand(i).getIndex(); + + // Addressable stack objects are accessed using neg. offsets from %fp + MachineFunction &MF = *MI.getParent()->getParent(); + int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) + + MI.getOperand(i+1).getImm(); + + // Replace frame index with a frame pointer reference. + if (Offset >= -4096 && Offset <= 4095) { + // If the offset is small enough to fit in the immediate field, directly + // encode it. + MI.getOperand(i).ChangeToRegister(SP::I6, false); + MI.getOperand(i+1).ChangeToImmediate(Offset); + } else { + // Otherwise, emit a G1 = SETHI %hi(offset). FIXME: it would be better to + // scavenge a register here instead of reserving G1 all of the time. + unsigned OffHi = (unsigned)Offset >> 10U; + BuildMI(*MI.getParent(), II, dl, TII.get(SP::SETHIi), SP::G1).addImm(OffHi); + // Emit G1 = G1 + I6 + BuildMI(*MI.getParent(), II, dl, TII.get(SP::ADDrr), SP::G1).addReg(SP::G1) + .addReg(SP::I6); + // Insert: G1+%lo(offset) into the user. + MI.getOperand(i).ChangeToRegister(SP::G1, false); + MI.getOperand(i+1).ChangeToImmediate(Offset & ((1 << 10)-1)); + } +} + +void SparcRegisterInfo:: +processFunctionBeforeFrameFinalized(MachineFunction &MF) const {} + +void SparcRegisterInfo::emitPrologue(MachineFunction &MF) const { + MachineBasicBlock &MBB = MF.front(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + MachineBasicBlock::iterator MBBI = MBB.begin(); + DebugLoc dl = (MBBI != MBB.end() ? + MBBI->getDebugLoc() : DebugLoc::getUnknownLoc()); + + // Get the number of bytes to allocate from the FrameInfo + int NumBytes = (int) MFI->getStackSize(); + + // Emit the correct save instruction based on the number of bytes in + // the frame. Minimum stack frame size according to V8 ABI is: + // 16 words for register window spill + // 1 word for address of returned aggregate-value + // + 6 words for passing parameters on the stack + // ---------- + // 23 words * 4 bytes per word = 92 bytes + NumBytes += 92; + + // Round up to next doubleword boundary -- a double-word boundary + // is required by the ABI. + NumBytes = (NumBytes + 7) & ~7; + NumBytes = -NumBytes; + + if (NumBytes >= -4096) { + BuildMI(MBB, MBBI, dl, TII.get(SP::SAVEri), SP::O6) + .addReg(SP::O6).addImm(NumBytes); + } else { + // Emit this the hard way. This clobbers G1 which we always know is + // available here. + unsigned OffHi = (unsigned)NumBytes >> 10U; + BuildMI(MBB, MBBI, dl, TII.get(SP::SETHIi), SP::G1).addImm(OffHi); + // Emit G1 = G1 + I6 + BuildMI(MBB, MBBI, dl, TII.get(SP::ORri), SP::G1) + .addReg(SP::G1).addImm(NumBytes & ((1 << 10)-1)); + BuildMI(MBB, MBBI, dl, TII.get(SP::SAVErr), SP::O6) + .addReg(SP::O6).addReg(SP::G1); + } +} + +void SparcRegisterInfo::emitEpilogue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + MachineBasicBlock::iterator MBBI = prior(MBB.end()); + DebugLoc dl = MBBI->getDebugLoc(); + assert(MBBI->getOpcode() == SP::RETL && + "Can only put epilog before 'retl' instruction!"); + BuildMI(MBB, MBBI, dl, TII.get(SP::RESTORErr), SP::G0).addReg(SP::G0) + .addReg(SP::G0); +} + +unsigned SparcRegisterInfo::getRARegister() const { + assert(0 && "What is the return address register"); + return 0; +} + +unsigned SparcRegisterInfo::getFrameRegister(MachineFunction &MF) const { + assert(0 && "What is the frame register"); + return SP::G1; +} + +unsigned SparcRegisterInfo::getEHExceptionRegister() const { + assert(0 && "What is the exception register"); + return 0; +} + +unsigned SparcRegisterInfo::getEHHandlerRegister() const { + assert(0 && "What is the exception handler register"); + return 0; +} + +int SparcRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const { + assert(0 && "What is the dwarf register number"); + return -1; +} + +#include "SparcGenRegisterInfo.inc" + diff --git a/lib/Target/Sparc/SparcRegisterInfo.h b/lib/Target/Sparc/SparcRegisterInfo.h new file mode 100644 index 000000000000..fc863f3b28f0 --- /dev/null +++ b/lib/Target/Sparc/SparcRegisterInfo.h @@ -0,0 +1,67 @@ +//===- SparcRegisterInfo.h - Sparc Register Information Impl ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Sparc implementation of the TargetRegisterInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef SPARCREGISTERINFO_H +#define SPARCREGISTERINFO_H + +#include "llvm/Target/TargetRegisterInfo.h" +#include "SparcGenRegisterInfo.h.inc" + +namespace llvm { + +class SparcSubtarget; +class TargetInstrInfo; +class Type; + +struct SparcRegisterInfo : public SparcGenRegisterInfo { + SparcSubtarget &Subtarget; + const TargetInstrInfo &TII; + + SparcRegisterInfo(SparcSubtarget &st, const TargetInstrInfo &tii); + + /// Code Generation virtual methods... + const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const; + + const TargetRegisterClass* const* getCalleeSavedRegClasses( + const MachineFunction *MF = 0) const; + + BitVector getReservedRegs(const MachineFunction &MF) const; + + bool hasFP(const MachineFunction &MF) const; + + void eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const; + + void eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, RegScavenger *RS = NULL) const; + + void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; + + void emitPrologue(MachineFunction &MF) const; + void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; + + // Debug information queries. + unsigned getRARegister() const; + unsigned getFrameRegister(MachineFunction &MF) const; + + // Exception handling queries. + unsigned getEHExceptionRegister() const; + unsigned getEHHandlerRegister() const; + + int getDwarfRegNum(unsigned RegNum, bool isEH) const; +}; + +} // end namespace llvm + +#endif diff --git a/lib/Target/Sparc/SparcRegisterInfo.td b/lib/Target/Sparc/SparcRegisterInfo.td new file mode 100644 index 000000000000..e3a50ca42bbb --- /dev/null +++ b/lib/Target/Sparc/SparcRegisterInfo.td @@ -0,0 +1,158 @@ +//===- SparcRegisterInfo.td - Sparc Register defs ----------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Declarations that describe the Sparc register file +//===----------------------------------------------------------------------===// + +class SparcReg : Register { + field bits<5> Num; + let Namespace = "SP"; +} + +// Registers are identified with 5-bit ID numbers. +// Ri - 32-bit integer registers +class Ri num, string n> : SparcReg { + let Num = num; +} +// Rf - 32-bit floating-point registers +class Rf num, string n> : SparcReg { + let Num = num; +} +// Rd - Slots in the FP register file for 64-bit floating-point values. +class Rd num, string n, list subregs> : SparcReg { + let Num = num; + let SubRegs = subregs; +} + +// Integer registers +def G0 : Ri< 0, "G0">, DwarfRegNum<[0]>; +def G1 : Ri< 1, "G1">, DwarfRegNum<[1]>; +def G2 : Ri< 2, "G2">, DwarfRegNum<[2]>; +def G3 : Ri< 3, "G3">, DwarfRegNum<[3]>; +def G4 : Ri< 4, "G4">, DwarfRegNum<[4]>; +def G5 : Ri< 5, "G5">, DwarfRegNum<[5]>; +def G6 : Ri< 6, "G6">, DwarfRegNum<[6]>; +def G7 : Ri< 7, "G7">, DwarfRegNum<[7]>; +def O0 : Ri< 8, "O0">, DwarfRegNum<[8]>; +def O1 : Ri< 9, "O1">, DwarfRegNum<[9]>; +def O2 : Ri<10, "O2">, DwarfRegNum<[10]>; +def O3 : Ri<11, "O3">, DwarfRegNum<[11]>; +def O4 : Ri<12, "O4">, DwarfRegNum<[12]>; +def O5 : Ri<13, "O5">, DwarfRegNum<[13]>; +def O6 : Ri<14, "O6">, DwarfRegNum<[14]>; +def O7 : Ri<15, "O7">, DwarfRegNum<[15]>; +def L0 : Ri<16, "L0">, DwarfRegNum<[16]>; +def L1 : Ri<17, "L1">, DwarfRegNum<[17]>; +def L2 : Ri<18, "L2">, DwarfRegNum<[18]>; +def L3 : Ri<19, "L3">, DwarfRegNum<[19]>; +def L4 : Ri<20, "L4">, DwarfRegNum<[20]>; +def L5 : Ri<21, "L5">, DwarfRegNum<[21]>; +def L6 : Ri<22, "L6">, DwarfRegNum<[22]>; +def L7 : Ri<23, "L7">, DwarfRegNum<[23]>; +def I0 : Ri<24, "I0">, DwarfRegNum<[24]>; +def I1 : Ri<25, "I1">, DwarfRegNum<[25]>; +def I2 : Ri<26, "I2">, DwarfRegNum<[26]>; +def I3 : Ri<27, "I3">, DwarfRegNum<[27]>; +def I4 : Ri<28, "I4">, DwarfRegNum<[28]>; +def I5 : Ri<29, "I5">, DwarfRegNum<[29]>; +def I6 : Ri<30, "I6">, DwarfRegNum<[30]>; +def I7 : Ri<31, "I7">, DwarfRegNum<[31]>; + +// Floating-point registers +def F0 : Rf< 0, "F0">, DwarfRegNum<[32]>; +def F1 : Rf< 1, "F1">, DwarfRegNum<[33]>; +def F2 : Rf< 2, "F2">, DwarfRegNum<[34]>; +def F3 : Rf< 3, "F3">, DwarfRegNum<[35]>; +def F4 : Rf< 4, "F4">, DwarfRegNum<[36]>; +def F5 : Rf< 5, "F5">, DwarfRegNum<[37]>; +def F6 : Rf< 6, "F6">, DwarfRegNum<[38]>; +def F7 : Rf< 7, "F7">, DwarfRegNum<[39]>; +def F8 : Rf< 8, "F8">, DwarfRegNum<[40]>; +def F9 : Rf< 9, "F9">, DwarfRegNum<[41]>; +def F10 : Rf<10, "F10">, DwarfRegNum<[42]>; +def F11 : Rf<11, "F11">, DwarfRegNum<[43]>; +def F12 : Rf<12, "F12">, DwarfRegNum<[44]>; +def F13 : Rf<13, "F13">, DwarfRegNum<[45]>; +def F14 : Rf<14, "F14">, DwarfRegNum<[46]>; +def F15 : Rf<15, "F15">, DwarfRegNum<[47]>; +def F16 : Rf<16, "F16">, DwarfRegNum<[48]>; +def F17 : Rf<17, "F17">, DwarfRegNum<[49]>; +def F18 : Rf<18, "F18">, DwarfRegNum<[50]>; +def F19 : Rf<19, "F19">, DwarfRegNum<[51]>; +def F20 : Rf<20, "F20">, DwarfRegNum<[52]>; +def F21 : Rf<21, "F21">, DwarfRegNum<[53]>; +def F22 : Rf<22, "F22">, DwarfRegNum<[54]>; +def F23 : Rf<23, "F23">, DwarfRegNum<[55]>; +def F24 : Rf<24, "F24">, DwarfRegNum<[56]>; +def F25 : Rf<25, "F25">, DwarfRegNum<[57]>; +def F26 : Rf<26, "F26">, DwarfRegNum<[58]>; +def F27 : Rf<27, "F27">, DwarfRegNum<[59]>; +def F28 : Rf<28, "F28">, DwarfRegNum<[60]>; +def F29 : Rf<29, "F29">, DwarfRegNum<[61]>; +def F30 : Rf<30, "F30">, DwarfRegNum<[62]>; +def F31 : Rf<31, "F31">, DwarfRegNum<[63]>; + +// Aliases of the F* registers used to hold 64-bit fp values (doubles) +def D0 : Rd< 0, "F0", [F0, F1]>, DwarfRegNum<[32]>; +def D1 : Rd< 2, "F2", [F2, F3]>, DwarfRegNum<[34]>; +def D2 : Rd< 4, "F4", [F4, F5]>, DwarfRegNum<[36]>; +def D3 : Rd< 6, "F6", [F6, F7]>, DwarfRegNum<[38]>; +def D4 : Rd< 8, "F8", [F8, F9]>, DwarfRegNum<[40]>; +def D5 : Rd<10, "F10", [F10, F11]>, DwarfRegNum<[42]>; +def D6 : Rd<12, "F12", [F12, F13]>, DwarfRegNum<[44]>; +def D7 : Rd<14, "F14", [F14, F15]>, DwarfRegNum<[46]>; +def D8 : Rd<16, "F16", [F16, F17]>, DwarfRegNum<[48]>; +def D9 : Rd<18, "F18", [F18, F19]>, DwarfRegNum<[50]>; +def D10 : Rd<20, "F20", [F20, F21]>, DwarfRegNum<[52]>; +def D11 : Rd<22, "F22", [F22, F23]>, DwarfRegNum<[54]>; +def D12 : Rd<24, "F24", [F24, F25]>, DwarfRegNum<[56]>; +def D13 : Rd<26, "F26", [F26, F27]>, DwarfRegNum<[58]>; +def D14 : Rd<28, "F28", [F28, F29]>, DwarfRegNum<[60]>; +def D15 : Rd<30, "F30", [F30, F31]>, DwarfRegNum<[62]>; + +// Register classes. +// +// FIXME: the register order should be defined in terms of the preferred +// allocation order... +// +def IntRegs : RegisterClass<"SP", [i32], 32, [L0, L1, L2, L3, L4, L5, L6, L7, + I0, I1, I2, I3, I4, I5, + O0, O1, O2, O3, O4, O5, O7, + + // FIXME: G1 reserved for now for large imm generation by frame code. + G1, + // Non-allocatable regs: + G2, G3, G4, // FIXME: OK for use only in + // applications, not libraries. + O6, // stack ptr + I6, // frame ptr + I7, // return address + G0, // constant zero + G5, G6, G7 // reserved for kernel + ]> { + let MethodProtos = [{ + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + IntRegsClass::iterator + IntRegsClass::allocation_order_end(const MachineFunction &MF) const { + // FIXME: These special regs should be taken out of the regclass! + return end()-10 // Don't allocate special registers + -1; // FIXME: G1 reserved for large imm generation by frame code. + } + }]; +} + +def FPRegs : RegisterClass<"SP", [f32], 32, [F0, F1, F2, F3, F4, F5, F6, F7, F8, + F9, F10, F11, F12, F13, F14, F15, F16, F17, F18, F19, F20, F21, F22, + F23, F24, F25, F26, F27, F28, F29, F30, F31]>; + +def DFPRegs : RegisterClass<"SP", [f64], 64, [D0, D1, D2, D3, D4, D5, D6, D7, + D8, D9, D10, D11, D12, D13, D14, D15]>; diff --git a/lib/Target/Sparc/SparcSubtarget.cpp b/lib/Target/Sparc/SparcSubtarget.cpp new file mode 100644 index 000000000000..aaddbff073ad --- /dev/null +++ b/lib/Target/Sparc/SparcSubtarget.cpp @@ -0,0 +1,43 @@ +//===- SparcSubtarget.cpp - SPARC Subtarget Information -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the SPARC specific subclass of TargetSubtarget. +// +//===----------------------------------------------------------------------===// + +#include "SparcSubtarget.h" +#include "SparcGenSubtarget.inc" +using namespace llvm; + +// FIXME: temporary. +#include "llvm/Support/CommandLine.h" +namespace { + cl::opt EnableV9("enable-sparc-v9-insts", cl::Hidden, + cl::desc("Enable V9 instructions in the V8 target")); +} + +SparcSubtarget::SparcSubtarget(const Module &M, const std::string &FS) { + // Set the default features. + IsV9 = false; + V8DeprecatedInsts = false; + IsVIS = false; + + // Determine default and user specified characteristics + std::string CPU = "generic"; + + // FIXME: autodetect host here! + CPU = "v9"; // What is a good way to detect V9? + + // Parse features string. + ParseSubtargetFeatures(FS, CPU); + + // Unless explicitly enabled, disable the V9 instructions. + if (!EnableV9) + IsV9 = false; +} diff --git a/lib/Target/Sparc/SparcSubtarget.h b/lib/Target/Sparc/SparcSubtarget.h new file mode 100644 index 000000000000..e5a5ba47f106 --- /dev/null +++ b/lib/Target/Sparc/SparcSubtarget.h @@ -0,0 +1,43 @@ +//=====-- SparcSubtarget.h - Define Subtarget for the SPARC ----*- C++ -*-====// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the SPARC specific subclass of TargetSubtarget. +// +//===----------------------------------------------------------------------===// + +#ifndef SPARC_SUBTARGET_H +#define SPARC_SUBTARGET_H + +#include "llvm/Target/TargetSubtarget.h" +#include + +namespace llvm { + class Module; + +class SparcSubtarget : public TargetSubtarget { + bool IsV9; + bool V8DeprecatedInsts; + bool IsVIS; +public: + SparcSubtarget(const Module &M, const std::string &FS); + + bool isV9() const { return IsV9; } + bool isVIS() const { return IsVIS; } + bool useDeprecatedV8Instructions() const { return V8DeprecatedInsts; } + + /// ParseSubtargetFeatures - Parses features string setting specified + /// subtarget options. Definition of function is auto generated by tblgen. + std::string ParseSubtargetFeatures(const std::string &FS, + const std::string &CPU); + +}; + +} // end namespace llvm + +#endif diff --git a/lib/Target/Sparc/SparcTargetAsmInfo.cpp b/lib/Target/Sparc/SparcTargetAsmInfo.cpp new file mode 100644 index 000000000000..c13d45ceec7c --- /dev/null +++ b/lib/Target/Sparc/SparcTargetAsmInfo.cpp @@ -0,0 +1,50 @@ +//===-- SparcTargetAsmInfo.cpp - Sparc asm properties -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declarations of the SparcTargetAsmInfo properties. +// +//===----------------------------------------------------------------------===// + +#include "SparcTargetAsmInfo.h" + +using namespace llvm; + +SparcELFTargetAsmInfo::SparcELFTargetAsmInfo(const TargetMachine &TM): + ELFTargetAsmInfo(TM) { + Data16bitsDirective = "\t.half\t"; + Data32bitsDirective = "\t.word\t"; + Data64bitsDirective = 0; // .xword is only supported by V9. + ZeroDirective = "\t.skip\t"; + CommentString = "!"; + ConstantPoolSection = "\t.section \".rodata\",#alloc\n"; + COMMDirectiveTakesAlignment = true; + CStringSection=".rodata.str"; + + // Sparc normally uses named section for BSS. + BSSSection_ = getNamedSection("\t.bss", + SectionFlags::Writeable | SectionFlags::BSS, + /* Override */ true); +} + +std::string SparcELFTargetAsmInfo::printSectionFlags(unsigned flags) const { + if (flags & SectionFlags::Mergeable) + return ELFTargetAsmInfo::printSectionFlags(flags); + + std::string Flags; + if (!(flags & SectionFlags::Debug)) + Flags += ",#alloc"; + if (flags & SectionFlags::Code) + Flags += ",#execinstr"; + if (flags & SectionFlags::Writeable) + Flags += ",#write"; + if (flags & SectionFlags::TLS) + Flags += ",#tls"; + + return Flags; +} diff --git a/lib/Target/Sparc/SparcTargetAsmInfo.h b/lib/Target/Sparc/SparcTargetAsmInfo.h new file mode 100644 index 000000000000..1af5d80b5503 --- /dev/null +++ b/lib/Target/Sparc/SparcTargetAsmInfo.h @@ -0,0 +1,33 @@ +//=====-- SparcTargetAsmInfo.h - Sparc asm properties ---------*- C++ -*--====// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the SparcTargetAsmInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef SPARCTARGETASMINFO_H +#define SPARCTARGETASMINFO_H + +#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/Target/ELFTargetAsmInfo.h" + +namespace llvm { + + // Forward declaration. + class TargetMachine; + + struct SparcELFTargetAsmInfo : public ELFTargetAsmInfo { + explicit SparcELFTargetAsmInfo(const TargetMachine &TM); + + std::string printSectionFlags(unsigned flags) const; + }; + +} // namespace llvm + +#endif diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp new file mode 100644 index 000000000000..eda030924100 --- /dev/null +++ b/lib/Target/Sparc/SparcTargetMachine.cpp @@ -0,0 +1,94 @@ +//===-- SparcTargetMachine.cpp - Define TargetMachine for Sparc -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// +//===----------------------------------------------------------------------===// + +#include "SparcTargetAsmInfo.h" +#include "SparcTargetMachine.h" +#include "Sparc.h" +#include "llvm/Module.h" +#include "llvm/PassManager.h" +#include "llvm/Target/TargetMachineRegistry.h" +using namespace llvm; + +/// SparcTargetMachineModule - Note that this is used on hosts that +/// cannot link in a library unless there are references into the +/// library. In particular, it seems that it is not possible to get +/// things to work on Win32 without this. Though it is unused, do not +/// remove it. +extern "C" int SparcTargetMachineModule; +int SparcTargetMachineModule = 0; + +// Register the target. +static RegisterTarget X("sparc", "SPARC"); + +const TargetAsmInfo *SparcTargetMachine::createTargetAsmInfo() const { + // FIXME: Handle Solaris subtarget someday :) + return new SparcELFTargetAsmInfo(*this); +} + +/// SparcTargetMachine ctor - Create an ILP32 architecture model +/// +SparcTargetMachine::SparcTargetMachine(const Module &M, const std::string &FS) + : DataLayout("E-p:32:32-f128:128:128"), + Subtarget(M, FS), TLInfo(*this), InstrInfo(Subtarget), + FrameInfo(TargetFrameInfo::StackGrowsDown, 8, 0) { +} + +unsigned SparcTargetMachine::getModuleMatchQuality(const Module &M) { + std::string TT = M.getTargetTriple(); + if (TT.size() >= 6 && std::string(TT.begin(), TT.begin()+6) == "sparc-") + return 20; + + // If the target triple is something non-sparc, we don't match. + if (!TT.empty()) return 0; + + if (M.getEndianness() == Module::BigEndian && + M.getPointerSize() == Module::Pointer32) +#ifdef __sparc__ + return 20; // BE/32 ==> Prefer sparc on sparc +#else + return 5; // BE/32 ==> Prefer ppc elsewhere +#endif + else if (M.getEndianness() != Module::AnyEndianness || + M.getPointerSize() != Module::AnyPointerSize) + return 0; // Match for some other target + +#if defined(__sparc__) + return 10; +#else + return 0; +#endif +} + +bool SparcTargetMachine::addInstSelector(PassManagerBase &PM, + CodeGenOpt::Level OptLevel) { + PM.add(createSparcISelDag(*this)); + return false; +} + +/// addPreEmitPass - This pass may be implemented by targets that want to run +/// passes immediately before machine code is emitted. This should return +/// true if -print-machineinstrs should print out the code after the passes. +bool SparcTargetMachine::addPreEmitPass(PassManagerBase &PM, + CodeGenOpt::Level OptLevel){ + PM.add(createSparcFPMoverPass(*this)); + PM.add(createSparcDelaySlotFillerPass(*this)); + return true; +} + +bool SparcTargetMachine::addAssemblyEmitter(PassManagerBase &PM, + CodeGenOpt::Level OptLevel, + bool Verbose, + raw_ostream &Out) { + // Output assembly language. + PM.add(createSparcCodePrinterPass(Out, *this, OptLevel, Verbose)); + return false; +} diff --git a/lib/Target/Sparc/SparcTargetMachine.h b/lib/Target/Sparc/SparcTargetMachine.h new file mode 100644 index 000000000000..40b44f2fb34d --- /dev/null +++ b/lib/Target/Sparc/SparcTargetMachine.h @@ -0,0 +1,63 @@ +//===-- SparcTargetMachine.h - Define TargetMachine for Sparc ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the Sparc specific subclass of TargetMachine. +// +//===----------------------------------------------------------------------===// + +#ifndef SPARCTARGETMACHINE_H +#define SPARCTARGETMACHINE_H + +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetFrameInfo.h" +#include "SparcInstrInfo.h" +#include "SparcSubtarget.h" +#include "SparcISelLowering.h" + +namespace llvm { + +class Module; + +class SparcTargetMachine : public LLVMTargetMachine { + const TargetData DataLayout; // Calculates type size & alignment + SparcSubtarget Subtarget; + SparcTargetLowering TLInfo; + SparcInstrInfo InstrInfo; + TargetFrameInfo FrameInfo; + +protected: + virtual const TargetAsmInfo *createTargetAsmInfo() const; + +public: + SparcTargetMachine(const Module &M, const std::string &FS); + + virtual const SparcInstrInfo *getInstrInfo() const { return &InstrInfo; } + virtual const TargetFrameInfo *getFrameInfo() const { return &FrameInfo; } + virtual const SparcSubtarget *getSubtargetImpl() const{ return &Subtarget; } + virtual const SparcRegisterInfo *getRegisterInfo() const { + return &InstrInfo.getRegisterInfo(); + } + virtual SparcTargetLowering* getTargetLowering() const { + return const_cast(&TLInfo); + } + virtual const TargetData *getTargetData() const { return &DataLayout; } + static unsigned getModuleMatchQuality(const Module &M); + + // Pass Pipeline Configuration + virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel); + virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel); + virtual bool addAssemblyEmitter(PassManagerBase &PM, + CodeGenOpt::Level OptLevel, + bool Verbose, raw_ostream &Out); +}; + +} // end namespace llvm + +#endif diff --git a/lib/Target/SubtargetFeature.cpp b/lib/Target/SubtargetFeature.cpp new file mode 100644 index 000000000000..f9370256c602 --- /dev/null +++ b/lib/Target/SubtargetFeature.cpp @@ -0,0 +1,364 @@ +//===- SubtargetFeature.cpp - CPU characteristics Implementation ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the SubtargetFeature interface. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Target/SubtargetFeature.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/Streams.h" +#include +#include +#include +#include +using namespace llvm; + +//===----------------------------------------------------------------------===// +// Static Helper Functions +//===----------------------------------------------------------------------===// + +/// hasFlag - Determine if a feature has a flag; '+' or '-' +/// +static inline bool hasFlag(const std::string &Feature) { + assert(!Feature.empty() && "Empty string"); + // Get first character + char Ch = Feature[0]; + // Check if first character is '+' or '-' flag + return Ch == '+' || Ch =='-'; +} + +/// StripFlag - Return string stripped of flag. +/// +static inline std::string StripFlag(const std::string &Feature) { + return hasFlag(Feature) ? Feature.substr(1) : Feature; +} + +/// isEnabled - Return true if enable flag; '+'. +/// +static inline bool isEnabled(const std::string &Feature) { + assert(!Feature.empty() && "Empty string"); + // Get first character + char Ch = Feature[0]; + // Check if first character is '+' for enabled + return Ch == '+'; +} + +/// PrependFlag - Return a string with a prepended flag; '+' or '-'. +/// +static inline std::string PrependFlag(const std::string &Feature, + bool IsEnabled) { + assert(!Feature.empty() && "Empty string"); + if (hasFlag(Feature)) return Feature; + return std::string(IsEnabled ? "+" : "-") + Feature; +} + +/// Split - Splits a string of comma separated items in to a vector of strings. +/// +static void Split(std::vector &V, const std::string &S) { + // Start at beginning of string. + size_t Pos = 0; + while (true) { + // Find the next comma + size_t Comma = S.find(',', Pos); + // If no comma found then the the rest of the string is used + if (Comma == std::string::npos) { + // Add string to vector + V.push_back(S.substr(Pos)); + break; + } + // Otherwise add substring to vector + V.push_back(S.substr(Pos, Comma - Pos)); + // Advance to next item + Pos = Comma + 1; + } +} + +/// Join a vector of strings to a string with a comma separating each element. +/// +static std::string Join(const std::vector &V) { + // Start with empty string. + std::string Result; + // If the vector is not empty + if (!V.empty()) { + // Start with the CPU feature + Result = V[0]; + // For each successive feature + for (size_t i = 1; i < V.size(); i++) { + // Add a comma + Result += ","; + // Add the feature + Result += V[i]; + } + } + // Return the features string + return Result; +} + +/// Adding features. +void SubtargetFeatures::AddFeature(const std::string &String, + bool IsEnabled) { + // Don't add empty features + if (!String.empty()) { + // Convert to lowercase, prepend flag and add to vector + Features.push_back(PrependFlag(LowercaseString(String), IsEnabled)); + } +} + +/// Find KV in array using binary search. +template const T *Find(const std::string &S, const T *A, size_t L) { + // Make the lower bound element we're looking for + T KV; + KV.Key = S.c_str(); + // Determine the end of the array + const T *Hi = A + L; + // Binary search the array + const T *F = std::lower_bound(A, Hi, KV); + // If not found then return NULL + if (F == Hi || std::string(F->Key) != S) return NULL; + // Return the found array item + return F; +} + +/// getLongestEntryLength - Return the length of the longest entry in the table. +/// +static size_t getLongestEntryLength(const SubtargetFeatureKV *Table, + size_t Size) { + size_t MaxLen = 0; + for (size_t i = 0; i < Size; i++) + MaxLen = std::max(MaxLen, std::strlen(Table[i].Key)); + return MaxLen; +} + +/// Display help for feature choices. +/// +static void Help(const SubtargetFeatureKV *CPUTable, size_t CPUTableSize, + const SubtargetFeatureKV *FeatTable, size_t FeatTableSize) { + // Determine the length of the longest CPU and Feature entries. + unsigned MaxCPULen = getLongestEntryLength(CPUTable, CPUTableSize); + unsigned MaxFeatLen = getLongestEntryLength(FeatTable, FeatTableSize); + + // Print the CPU table. + cerr << "Available CPUs for this target:\n\n"; + for (size_t i = 0; i != CPUTableSize; i++) + cerr << " " << CPUTable[i].Key + << std::string(MaxCPULen - std::strlen(CPUTable[i].Key), ' ') + << " - " << CPUTable[i].Desc << ".\n"; + cerr << "\n"; + + // Print the Feature table. + cerr << "Available features for this target:\n\n"; + for (size_t i = 0; i != FeatTableSize; i++) + cerr << " " << FeatTable[i].Key + << std::string(MaxFeatLen - std::strlen(FeatTable[i].Key), ' ') + << " - " << FeatTable[i].Desc << ".\n"; + cerr << "\n"; + + cerr << "Use +feature to enable a feature, or -feature to disable it.\n" + << "For example, llc -mcpu=mycpu -mattr=+feature1,-feature2\n"; + exit(1); +} + +//===----------------------------------------------------------------------===// +// SubtargetFeatures Implementation +//===----------------------------------------------------------------------===// + +SubtargetFeatures::SubtargetFeatures(const std::string &Initial) { + // Break up string into separate features + Split(Features, Initial); +} + + +std::string SubtargetFeatures::getString() const { + return Join(Features); +} +void SubtargetFeatures::setString(const std::string &Initial) { + // Throw out old features + Features.clear(); + // Break up string into separate features + Split(Features, LowercaseString(Initial)); +} + + +/// setCPU - Set the CPU string. Replaces previous setting. Setting to "" +/// clears CPU. +void SubtargetFeatures::setCPU(const std::string &String) { + Features[0] = LowercaseString(String); +} + + +/// setCPUIfNone - Setting CPU string only if no string is set. +/// +void SubtargetFeatures::setCPUIfNone(const std::string &String) { + if (Features[0].empty()) setCPU(String); +} + +/// getCPU - Returns current CPU. +/// +const std::string & SubtargetFeatures::getCPU() const { + return Features[0]; +} + + +/// SetImpliedBits - For each feature that is (transitively) implied by this +/// feature, set it. +/// +static +void SetImpliedBits(uint32_t &Bits, const SubtargetFeatureKV *FeatureEntry, + const SubtargetFeatureKV *FeatureTable, + size_t FeatureTableSize) { + for (size_t i = 0; i < FeatureTableSize; ++i) { + const SubtargetFeatureKV &FE = FeatureTable[i]; + + if (FeatureEntry->Value == FE.Value) continue; + + if (FeatureEntry->Implies & FE.Value) { + Bits |= FE.Value; + SetImpliedBits(Bits, &FE, FeatureTable, FeatureTableSize); + } + } +} + +/// ClearImpliedBits - For each feature that (transitively) implies this +/// feature, clear it. +/// +static +void ClearImpliedBits(uint32_t &Bits, const SubtargetFeatureKV *FeatureEntry, + const SubtargetFeatureKV *FeatureTable, + size_t FeatureTableSize) { + for (size_t i = 0; i < FeatureTableSize; ++i) { + const SubtargetFeatureKV &FE = FeatureTable[i]; + + if (FeatureEntry->Value == FE.Value) continue; + + if (FE.Implies & FeatureEntry->Value) { + Bits &= ~FE.Value; + ClearImpliedBits(Bits, &FE, FeatureTable, FeatureTableSize); + } + } +} + +/// getBits - Get feature bits. +/// +uint32_t SubtargetFeatures::getBits(const SubtargetFeatureKV *CPUTable, + size_t CPUTableSize, + const SubtargetFeatureKV *FeatureTable, + size_t FeatureTableSize) { + assert(CPUTable && "missing CPU table"); + assert(FeatureTable && "missing features table"); +#ifndef NDEBUG + for (size_t i = 1; i < CPUTableSize; i++) { + assert(strcmp(CPUTable[i - 1].Key, CPUTable[i].Key) < 0 && + "CPU table is not sorted"); + } + for (size_t i = 1; i < FeatureTableSize; i++) { + assert(strcmp(FeatureTable[i - 1].Key, FeatureTable[i].Key) < 0 && + "CPU features table is not sorted"); + } +#endif + uint32_t Bits = 0; // Resulting bits + + // Check if help is needed + if (Features[0] == "help") + Help(CPUTable, CPUTableSize, FeatureTable, FeatureTableSize); + + // Find CPU entry + const SubtargetFeatureKV *CPUEntry = + Find(Features[0], CPUTable, CPUTableSize); + // If there is a match + if (CPUEntry) { + // Set base feature bits + Bits = CPUEntry->Value; + + // Set the feature implied by this CPU feature, if any. + for (size_t i = 0; i < FeatureTableSize; ++i) { + const SubtargetFeatureKV &FE = FeatureTable[i]; + if (CPUEntry->Value & FE.Value) + SetImpliedBits(Bits, &FE, FeatureTable, FeatureTableSize); + } + } else { + cerr << "'" << Features[0] + << "' is not a recognized processor for this target" + << " (ignoring processor)" + << "\n"; + } + // Iterate through each feature + for (size_t i = 1; i < Features.size(); i++) { + const std::string &Feature = Features[i]; + + // Check for help + if (Feature == "+help") + Help(CPUTable, CPUTableSize, FeatureTable, FeatureTableSize); + + // Find feature in table. + const SubtargetFeatureKV *FeatureEntry = + Find(StripFlag(Feature), FeatureTable, FeatureTableSize); + // If there is a match + if (FeatureEntry) { + // Enable/disable feature in bits + if (isEnabled(Feature)) { + Bits |= FeatureEntry->Value; + + // For each feature that this implies, set it. + SetImpliedBits(Bits, FeatureEntry, FeatureTable, FeatureTableSize); + } else { + Bits &= ~FeatureEntry->Value; + + // For each feature that implies this, clear it. + ClearImpliedBits(Bits, FeatureEntry, FeatureTable, FeatureTableSize); + } + } else { + cerr << "'" << Feature + << "' is not a recognized feature for this target" + << " (ignoring feature)" + << "\n"; + } + } + + return Bits; +} + +/// Get info pointer +void *SubtargetFeatures::getInfo(const SubtargetInfoKV *Table, + size_t TableSize) { + assert(Table && "missing table"); +#ifndef NDEBUG + for (size_t i = 1; i < TableSize; i++) { + assert(strcmp(Table[i - 1].Key, Table[i].Key) < 0 && "Table is not sorted"); + } +#endif + + // Find entry + const SubtargetInfoKV *Entry = Find(Features[0], Table, TableSize); + + if (Entry) { + return Entry->Value; + } else { + cerr << "'" << Features[0] + << "' is not a recognized processor for this target" + << " (ignoring processor)" + << "\n"; + return NULL; + } +} + +/// print - Print feature string. +/// +void SubtargetFeatures::print(std::ostream &OS) const { + for (size_t i = 0; i < Features.size(); i++) { + OS << Features[i] << " "; + } + OS << "\n"; +} + +/// dump - Dump feature info. +/// +void SubtargetFeatures::dump() const { + print(*cerr.stream()); +} diff --git a/lib/Target/Target.cpp b/lib/Target/Target.cpp new file mode 100644 index 000000000000..ed544b73eaed --- /dev/null +++ b/lib/Target/Target.cpp @@ -0,0 +1,94 @@ +//===-- Target.cpp --------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the C bindings for libLLVMTarget.a, which implements +// target information. +// +//===----------------------------------------------------------------------===// + +#include "llvm-c/Target.h" +#include "llvm/PassManager.h" +#include "llvm/Target/TargetData.h" +#include + +using namespace llvm; + +LLVMTargetDataRef LLVMCreateTargetData(const char *StringRep) { + return wrap(new TargetData(StringRep)); +} + +void LLVMAddTargetData(LLVMTargetDataRef TD, LLVMPassManagerRef PM) { + unwrap(PM)->add(new TargetData(*unwrap(TD))); +} + +char *LLVMCopyStringRepOfTargetData(LLVMTargetDataRef TD) { + std::string StringRep = unwrap(TD)->getStringRepresentation(); + return strdup(StringRep.c_str()); +} + +LLVMByteOrdering LLVMByteOrder(LLVMTargetDataRef TD) { + return unwrap(TD)->isLittleEndian(); +} + +unsigned LLVMPointerSize(LLVMTargetDataRef TD) { + return unwrap(TD)->getPointerSize(); +} + +LLVMTypeRef LLVMIntPtrType(LLVMTargetDataRef TD) { + return wrap(unwrap(TD)->getIntPtrType()); +} + +unsigned long long LLVMSizeOfTypeInBits(LLVMTargetDataRef TD, LLVMTypeRef Ty) { + return unwrap(TD)->getTypeSizeInBits(unwrap(Ty)); +} + +unsigned long long LLVMStoreSizeOfType(LLVMTargetDataRef TD, LLVMTypeRef Ty) { + return unwrap(TD)->getTypeStoreSize(unwrap(Ty)); +} + +unsigned long long LLVMABISizeOfType(LLVMTargetDataRef TD, LLVMTypeRef Ty) { + return unwrap(TD)->getTypeAllocSize(unwrap(Ty)); +} + +unsigned LLVMABIAlignmentOfType(LLVMTargetDataRef TD, LLVMTypeRef Ty) { + return unwrap(TD)->getABITypeAlignment(unwrap(Ty)); +} + +unsigned LLVMCallFrameAlignmentOfType(LLVMTargetDataRef TD, LLVMTypeRef Ty) { + return unwrap(TD)->getCallFrameTypeAlignment(unwrap(Ty)); +} + +unsigned LLVMPreferredAlignmentOfType(LLVMTargetDataRef TD, LLVMTypeRef Ty) { + return unwrap(TD)->getPrefTypeAlignment(unwrap(Ty)); +} + +unsigned LLVMPreferredAlignmentOfGlobal(LLVMTargetDataRef TD, + LLVMValueRef GlobalVar) { + return unwrap(TD)->getPreferredAlignment(unwrap(GlobalVar)); +} + +unsigned LLVMElementAtOffset(LLVMTargetDataRef TD, LLVMTypeRef StructTy, + unsigned long long Offset) { + const StructType *STy = unwrap(StructTy); + return unwrap(TD)->getStructLayout(STy)->getElementContainingOffset(Offset); +} + +unsigned long long LLVMOffsetOfElement(LLVMTargetDataRef TD, LLVMTypeRef StructTy, + unsigned Element) { + const StructType *STy = unwrap(StructTy); + return unwrap(TD)->getStructLayout(STy)->getElementOffset(Element); +} + +void LLVMInvalidateStructLayout(LLVMTargetDataRef TD, LLVMTypeRef StructTy) { + unwrap(TD)->InvalidateStructLayoutInfo(unwrap(StructTy)); +} + +void LLVMDisposeTargetData(LLVMTargetDataRef TD) { + delete unwrap(TD); +} diff --git a/lib/Target/TargetAsmInfo.cpp b/lib/Target/TargetAsmInfo.cpp new file mode 100644 index 000000000000..6a2de6f582a6 --- /dev/null +++ b/lib/Target/TargetAsmInfo.cpp @@ -0,0 +1,461 @@ +//===-- TargetAsmInfo.cpp - Asm Info ---------------------------------------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines target asm properties related what form asm statements +// should take. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/GlobalVariable.h" +#include "llvm/Function.h" +#include "llvm/Module.h" +#include "llvm/Type.h" +#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Support/Dwarf.h" +#include +#include + +using namespace llvm; + +void TargetAsmInfo::fillDefaultValues() { + BSSSection = "\t.bss"; + BSSSection_ = 0; + ReadOnlySection = 0; + SmallDataSection = 0; + SmallBSSSection = 0; + SmallRODataSection = 0; + TLSDataSection = 0; + TLSBSSSection = 0; + ZeroFillDirective = 0; + NonexecutableStackDirective = 0; + NeedsSet = false; + MaxInstLength = 4; + PCSymbol = "$"; + SeparatorChar = ';'; + CommentString = "#"; + GlobalPrefix = ""; + PrivateGlobalPrefix = "."; + LessPrivateGlobalPrefix = ""; + JumpTableSpecialLabelPrefix = 0; + GlobalVarAddrPrefix = ""; + GlobalVarAddrSuffix = ""; + FunctionAddrPrefix = ""; + FunctionAddrSuffix = ""; + PersonalityPrefix = ""; + PersonalitySuffix = ""; + NeedsIndirectEncoding = false; + InlineAsmStart = "#APP"; + InlineAsmEnd = "#NO_APP"; + AssemblerDialect = 0; + StringConstantPrefix = ".str"; + ZeroDirective = "\t.zero\t"; + ZeroDirectiveSuffix = 0; + AsciiDirective = "\t.ascii\t"; + AscizDirective = "\t.asciz\t"; + Data8bitsDirective = "\t.byte\t"; + Data16bitsDirective = "\t.short\t"; + Data32bitsDirective = "\t.long\t"; + Data64bitsDirective = "\t.quad\t"; + AlignDirective = "\t.align\t"; + AlignmentIsInBytes = true; + TextAlignFillValue = 0; + SwitchToSectionDirective = "\t.section\t"; + TextSectionStartSuffix = ""; + DataSectionStartSuffix = ""; + SectionEndDirectiveSuffix = 0; + ConstantPoolSection = "\t.section .rodata"; + JumpTableDataSection = "\t.section .rodata"; + JumpTableDirective = 0; + CStringSection = 0; + CStringSection_ = 0; + // FIXME: Flags are ELFish - replace with normal section stuff. + StaticCtorsSection = "\t.section .ctors,\"aw\",@progbits"; + StaticDtorsSection = "\t.section .dtors,\"aw\",@progbits"; + GlobalDirective = "\t.globl\t"; + SetDirective = 0; + LCOMMDirective = 0; + COMMDirective = "\t.comm\t"; + COMMDirectiveTakesAlignment = true; + HasDotTypeDotSizeDirective = true; + HasSingleParameterDotFile = true; + UsedDirective = 0; + WeakRefDirective = 0; + WeakDefDirective = 0; + // FIXME: These are ELFish - move to ELFTAI. + HiddenDirective = "\t.hidden\t"; + ProtectedDirective = "\t.protected\t"; + AbsoluteDebugSectionOffsets = false; + AbsoluteEHSectionOffsets = false; + HasLEB128 = false; + HasDotLocAndDotFile = false; + SupportsDebugInformation = false; + SupportsExceptionHandling = false; + DwarfRequiresFrameSection = true; + DwarfUsesInlineInfoSection = false; + SupportsMacInfoSection = true; + NonLocalEHFrameLabel = false; + GlobalEHDirective = 0; + SupportsWeakOmittedEHFrame = true; + DwarfSectionOffsetDirective = 0; + DwarfAbbrevSection = ".debug_abbrev"; + DwarfInfoSection = ".debug_info"; + DwarfLineSection = ".debug_line"; + DwarfFrameSection = ".debug_frame"; + DwarfPubNamesSection = ".debug_pubnames"; + DwarfPubTypesSection = ".debug_pubtypes"; + DwarfDebugInlineSection = ".debug_inlined"; + DwarfStrSection = ".debug_str"; + DwarfLocSection = ".debug_loc"; + DwarfARangesSection = ".debug_aranges"; + DwarfRangesSection = ".debug_ranges"; + DwarfMacInfoSection = ".debug_macinfo"; + DwarfEHFrameSection = ".eh_frame"; + DwarfExceptionSection = ".gcc_except_table"; + AsmTransCBE = 0; + TextSection = getUnnamedSection("\t.text", SectionFlags::Code); + DataSection = getUnnamedSection("\t.data", SectionFlags::Writeable); +} + +TargetAsmInfo::TargetAsmInfo(const TargetMachine &tm) + : TM(tm) { + fillDefaultValues(); +} + +TargetAsmInfo::~TargetAsmInfo() { +} + +/// Measure the specified inline asm to determine an approximation of its +/// length. +/// Comments (which run till the next SeparatorChar or newline) do not +/// count as an instruction. +/// Any other non-whitespace text is considered an instruction, with +/// multiple instructions separated by SeparatorChar or newlines. +/// Variable-length instructions are not handled here; this function +/// may be overloaded in the target code to do that. +unsigned TargetAsmInfo::getInlineAsmLength(const char *Str) const { + // Count the number of instructions in the asm. + bool atInsnStart = true; + unsigned Length = 0; + for (; *Str; ++Str) { + if (*Str == '\n' || *Str == SeparatorChar) + atInsnStart = true; + if (atInsnStart && !isspace(*Str)) { + Length += MaxInstLength; + atInsnStart = false; + } + if (atInsnStart && strncmp(Str, CommentString, strlen(CommentString))==0) + atInsnStart = false; + } + + return Length; +} + +unsigned TargetAsmInfo::PreferredEHDataFormat(DwarfEncoding::Target Reason, + bool Global) const { + return dwarf::DW_EH_PE_absptr; +} + +static bool isSuitableForBSS(const GlobalVariable *GV) { + if (!GV->hasInitializer()) + return true; + + // Leave constant zeros in readonly constant sections, so they can be shared + Constant *C = GV->getInitializer(); + return (C->isNullValue() && !GV->isConstant() && !NoZerosInBSS); +} + +static bool isConstantString(const Constant *C) { + // First check: is we have constant array of i8 terminated with zero + const ConstantArray *CVA = dyn_cast(C); + // Check, if initializer is a null-terminated string + if (CVA && CVA->isCString()) + return true; + + // Another possibility: [1 x i8] zeroinitializer + if (isa(C)) { + if (const ArrayType *Ty = dyn_cast(C->getType())) { + return (Ty->getElementType() == Type::Int8Ty && + Ty->getNumElements() == 1); + } + } + + return false; +} + +unsigned TargetAsmInfo::RelocBehaviour() const { + // By default - all relocations in PIC mode would force symbol to be + // placed in r/w section. + return (TM.getRelocationModel() != Reloc::Static ? + Reloc::LocalOrGlobal : Reloc::None); +} + +SectionKind::Kind +TargetAsmInfo::SectionKindForGlobal(const GlobalValue *GV) const { + // Early exit - functions should be always in text sections. + if (isa(GV)) + return SectionKind::Text; + + const GlobalVariable* GVar = dyn_cast(GV); + bool isThreadLocal = GVar->isThreadLocal(); + assert(GVar && "Invalid global value for section selection"); + + if (isSuitableForBSS(GVar)) { + // Variable can be easily put to BSS section. + return (isThreadLocal ? SectionKind::ThreadBSS : SectionKind::BSS); + } else if (GVar->isConstant() && !isThreadLocal) { + // Now we know, that varible has initializer and it is constant. We need to + // check its initializer to decide, which section to output it into. Also + // note, there is no thread-local r/o section. + Constant *C = GVar->getInitializer(); + if (C->ContainsRelocations(Reloc::LocalOrGlobal)) { + // Decide, whether it is still possible to put symbol into r/o section. + unsigned Reloc = RelocBehaviour(); + + // We already did a query for 'all' relocs, thus - early exits. + if (Reloc == Reloc::LocalOrGlobal) + return SectionKind::Data; + else if (Reloc == Reloc::None) + return SectionKind::ROData; + else { + // Ok, target wants something funny. Honour it. + return (C->ContainsRelocations(Reloc) ? + SectionKind::Data : SectionKind::ROData); + } + } else { + // Check, if initializer is a null-terminated string + if (isConstantString(C)) + return SectionKind::RODataMergeStr; + else + return SectionKind::RODataMergeConst; + } + } + + // Variable either is not constant or thread-local - output to data section. + return (isThreadLocal ? SectionKind::ThreadData : SectionKind::Data); +} + +unsigned +TargetAsmInfo::SectionFlagsForGlobal(const GlobalValue *GV, + const char* Name) const { + unsigned Flags = SectionFlags::None; + + // Decode flags from global itself. + if (GV) { + SectionKind::Kind Kind = SectionKindForGlobal(GV); + switch (Kind) { + case SectionKind::Text: + Flags |= SectionFlags::Code; + break; + case SectionKind::ThreadData: + case SectionKind::ThreadBSS: + Flags |= SectionFlags::TLS; + // FALLS THROUGH + case SectionKind::Data: + case SectionKind::DataRel: + case SectionKind::DataRelLocal: + case SectionKind::DataRelRO: + case SectionKind::DataRelROLocal: + case SectionKind::BSS: + Flags |= SectionFlags::Writeable; + break; + case SectionKind::ROData: + case SectionKind::RODataMergeStr: + case SectionKind::RODataMergeConst: + // No additional flags here + break; + case SectionKind::SmallData: + case SectionKind::SmallBSS: + Flags |= SectionFlags::Writeable; + // FALLS THROUGH + case SectionKind::SmallROData: + Flags |= SectionFlags::Small; + break; + default: + assert(0 && "Unexpected section kind!"); + } + + if (GV->isWeakForLinker()) + Flags |= SectionFlags::Linkonce; + } + + // Add flags from sections, if any. + if (Name && *Name) { + Flags |= SectionFlags::Named; + + // Some lame default implementation based on some magic section names. + if (strncmp(Name, ".gnu.linkonce.b.", 16) == 0 || + strncmp(Name, ".llvm.linkonce.b.", 17) == 0 || + strncmp(Name, ".gnu.linkonce.sb.", 17) == 0 || + strncmp(Name, ".llvm.linkonce.sb.", 18) == 0) + Flags |= SectionFlags::BSS; + else if (strcmp(Name, ".tdata") == 0 || + strncmp(Name, ".tdata.", 7) == 0 || + strncmp(Name, ".gnu.linkonce.td.", 17) == 0 || + strncmp(Name, ".llvm.linkonce.td.", 18) == 0) + Flags |= SectionFlags::TLS; + else if (strcmp(Name, ".tbss") == 0 || + strncmp(Name, ".tbss.", 6) == 0 || + strncmp(Name, ".gnu.linkonce.tb.", 17) == 0 || + strncmp(Name, ".llvm.linkonce.tb.", 18) == 0) + Flags |= SectionFlags::BSS | SectionFlags::TLS; + } + + return Flags; +} + +const Section* +TargetAsmInfo::SectionForGlobal(const GlobalValue *GV) const { + const Section* S; + // Select section name + if (GV->hasSection()) { + // Honour section already set, if any + unsigned Flags = SectionFlagsForGlobal(GV, + GV->getSection().c_str()); + S = getNamedSection(GV->getSection().c_str(), Flags); + } else { + // Use default section depending on the 'type' of global + S = SelectSectionForGlobal(GV); + } + + return S; +} + +// Lame default implementation. Calculate the section name for global. +const Section* +TargetAsmInfo::SelectSectionForGlobal(const GlobalValue *GV) const { + SectionKind::Kind Kind = SectionKindForGlobal(GV); + + if (GV->isWeakForLinker()) { + std::string Name = UniqueSectionForGlobal(GV, Kind); + unsigned Flags = SectionFlagsForGlobal(GV, Name.c_str()); + return getNamedSection(Name.c_str(), Flags); + } else { + if (Kind == SectionKind::Text) + return getTextSection(); + else if (isBSS(Kind) && getBSSSection_()) + return getBSSSection_(); + else if (getReadOnlySection() && SectionKind::isReadOnly(Kind)) + return getReadOnlySection(); + } + + return getDataSection(); +} + +// Lame default implementation. Calculate the section name for machine const. +const Section* +TargetAsmInfo::SelectSectionForMachineConst(const Type *Ty) const { + // FIXME: Support data.rel stuff someday + return getDataSection(); +} + +std::string +TargetAsmInfo::UniqueSectionForGlobal(const GlobalValue* GV, + SectionKind::Kind Kind) const { + switch (Kind) { + case SectionKind::Text: + return ".gnu.linkonce.t." + GV->getName(); + case SectionKind::Data: + return ".gnu.linkonce.d." + GV->getName(); + case SectionKind::DataRel: + return ".gnu.linkonce.d.rel" + GV->getName(); + case SectionKind::DataRelLocal: + return ".gnu.linkonce.d.rel.local" + GV->getName(); + case SectionKind::DataRelRO: + return ".gnu.linkonce.d.rel.ro" + GV->getName(); + case SectionKind::DataRelROLocal: + return ".gnu.linkonce.d.rel.ro.local" + GV->getName(); + case SectionKind::SmallData: + return ".gnu.linkonce.s." + GV->getName(); + case SectionKind::BSS: + return ".gnu.linkonce.b." + GV->getName(); + case SectionKind::SmallBSS: + return ".gnu.linkonce.sb." + GV->getName(); + case SectionKind::ROData: + case SectionKind::RODataMergeConst: + case SectionKind::RODataMergeStr: + return ".gnu.linkonce.r." + GV->getName(); + case SectionKind::SmallROData: + return ".gnu.linkonce.s2." + GV->getName(); + case SectionKind::ThreadData: + return ".gnu.linkonce.td." + GV->getName(); + case SectionKind::ThreadBSS: + return ".gnu.linkonce.tb." + GV->getName(); + default: + assert(0 && "Unknown section kind"); + } + return NULL; +} + +const Section* +TargetAsmInfo::getNamedSection(const char *Name, unsigned Flags, + bool Override) const { + Section& S = Sections[Name]; + + // This is newly-created section, set it up properly. + if (S.Flags == SectionFlags::Invalid || Override) { + S.Flags = Flags | SectionFlags::Named; + S.Name = Name; + } + + return &S; +} + +const Section* +TargetAsmInfo::getUnnamedSection(const char *Directive, unsigned Flags, + bool Override) const { + Section& S = Sections[Directive]; + + // This is newly-created section, set it up properly. + if (S.Flags == SectionFlags::Invalid || Override) { + S.Flags = Flags & ~SectionFlags::Named; + S.Name = Directive; + } + + return &S; +} + +const std::string& +TargetAsmInfo::getSectionFlags(unsigned Flags) const { + SectionFlags::FlagsStringsMapType::iterator I = FlagsStrings.find(Flags); + + // We didn't print these flags yet, print and save them to map. This reduces + // amount of heap trashing due to std::string construction / concatenation. + if (I == FlagsStrings.end()) + I = FlagsStrings.insert(std::make_pair(Flags, + printSectionFlags(Flags))).first; + + return I->second; +} + +unsigned TargetAsmInfo::getULEB128Size(unsigned Value) { + unsigned Size = 0; + do { + Value >>= 7; + Size += sizeof(int8_t); + } while (Value); + return Size; +} + +unsigned TargetAsmInfo::getSLEB128Size(int Value) { + unsigned Size = 0; + int Sign = Value >> (8 * sizeof(Value) - 1); + bool IsMore; + + do { + unsigned Byte = Value & 0x7f; + Value >>= 7; + IsMore = Value != Sign || ((Byte ^ Sign) & 0x40) != 0; + Size += sizeof(int8_t); + } while (IsMore); + return Size; +} diff --git a/lib/Target/TargetData.cpp b/lib/Target/TargetData.cpp new file mode 100644 index 000000000000..67fefbb70b6a --- /dev/null +++ b/lib/Target/TargetData.cpp @@ -0,0 +1,603 @@ +//===-- TargetData.cpp - Data size & alignment routines --------------------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines target properties related to datatype size/offset/alignment +// information. +// +// This structure should be created once, filled in if the defaults are not +// correct and then passed around by const&. None of the members functions +// require modification to the object. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Target/TargetData.h" +#include "llvm/Module.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Constants.h" +#include "llvm/Support/GetElementPtrTypeIterator.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/ManagedStatic.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StringExtras.h" +#include +#include +using namespace llvm; + +// Handle the Pass registration stuff necessary to use TargetData's. + +// Register the default SparcV9 implementation... +static RegisterPass X("targetdata", "Target Data Layout", false, + true); +char TargetData::ID = 0; + +//===----------------------------------------------------------------------===// +// Support for StructLayout +//===----------------------------------------------------------------------===// + +StructLayout::StructLayout(const StructType *ST, const TargetData &TD) { + StructAlignment = 0; + StructSize = 0; + NumElements = ST->getNumElements(); + + // Loop over each of the elements, placing them in memory. + for (unsigned i = 0, e = NumElements; i != e; ++i) { + const Type *Ty = ST->getElementType(i); + unsigned TyAlign = ST->isPacked() ? 1 : TD.getABITypeAlignment(Ty); + + // Add padding if necessary to align the data element properly. + if ((StructSize & (TyAlign-1)) != 0) + StructSize = TargetData::RoundUpAlignment(StructSize, TyAlign); + + // Keep track of maximum alignment constraint. + StructAlignment = std::max(TyAlign, StructAlignment); + + MemberOffsets[i] = StructSize; + StructSize += TD.getTypeAllocSize(Ty); // Consume space for this data item + } + + // Empty structures have alignment of 1 byte. + if (StructAlignment == 0) StructAlignment = 1; + + // Add padding to the end of the struct so that it could be put in an array + // and all array elements would be aligned correctly. + if ((StructSize & (StructAlignment-1)) != 0) + StructSize = TargetData::RoundUpAlignment(StructSize, StructAlignment); +} + + +/// getElementContainingOffset - Given a valid offset into the structure, +/// return the structure index that contains it. +unsigned StructLayout::getElementContainingOffset(uint64_t Offset) const { + const uint64_t *SI = + std::upper_bound(&MemberOffsets[0], &MemberOffsets[NumElements], Offset); + assert(SI != &MemberOffsets[0] && "Offset not in structure type!"); + --SI; + assert(*SI <= Offset && "upper_bound didn't work"); + assert((SI == &MemberOffsets[0] || *(SI-1) <= Offset) && + (SI+1 == &MemberOffsets[NumElements] || *(SI+1) > Offset) && + "Upper bound didn't work!"); + + // Multiple fields can have the same offset if any of them are zero sized. + // For example, in { i32, [0 x i32], i32 }, searching for offset 4 will stop + // at the i32 element, because it is the last element at that offset. This is + // the right one to return, because anything after it will have a higher + // offset, implying that this element is non-empty. + return SI-&MemberOffsets[0]; +} + +//===----------------------------------------------------------------------===// +// TargetAlignElem, TargetAlign support +//===----------------------------------------------------------------------===// + +TargetAlignElem +TargetAlignElem::get(AlignTypeEnum align_type, unsigned char abi_align, + unsigned char pref_align, uint32_t bit_width) { + assert(abi_align <= pref_align && "Preferred alignment worse than ABI!"); + TargetAlignElem retval; + retval.AlignType = align_type; + retval.ABIAlign = abi_align; + retval.PrefAlign = pref_align; + retval.TypeBitWidth = bit_width; + return retval; +} + +bool +TargetAlignElem::operator==(const TargetAlignElem &rhs) const { + return (AlignType == rhs.AlignType + && ABIAlign == rhs.ABIAlign + && PrefAlign == rhs.PrefAlign + && TypeBitWidth == rhs.TypeBitWidth); +} + +std::ostream & +TargetAlignElem::dump(std::ostream &os) const { + return os << AlignType + << TypeBitWidth + << ":" << (int) (ABIAlign * 8) + << ":" << (int) (PrefAlign * 8); +} + +const TargetAlignElem TargetData::InvalidAlignmentElem = + TargetAlignElem::get((AlignTypeEnum) -1, 0, 0, 0); + +//===----------------------------------------------------------------------===// +// TargetData Class Implementation +//===----------------------------------------------------------------------===// + +/*! + A TargetDescription string consists of a sequence of hyphen-delimited + specifiers for target endianness, pointer size and alignments, and various + primitive type sizes and alignments. A typical string looks something like: +

+ "E-p:32:32:32-i1:8:8-i8:8:8-i32:32:32-i64:32:64-f32:32:32-f64:32:64" +

+ (note: this string is not fully specified and is only an example.) + \p + Alignments come in two flavors: ABI and preferred. ABI alignment (abi_align, + below) dictates how a type will be aligned within an aggregate and when used + as an argument. Preferred alignment (pref_align, below) determines a type's + alignment when emitted as a global. + \p + Specifier string details: +

+ [E|e]: Endianness. "E" specifies a big-endian target data model, "e" + specifies a little-endian target data model. +

+ p:@verbatim::@endverbatim: Pointer size, + ABI and preferred alignment. +

+ @verbatim::@endverbatim: Numeric type + alignment. Type is + one of i|f|v|a, corresponding to integer, floating point, vector (aka + packed) or aggregate. Size indicates the size, e.g., 32 or 64 bits. + \p + The default string, fully specified is: +

+ "E-p:64:64:64-a0:0:0-f32:32:32-f64:0:64" + "-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:0:64" + "-v64:64:64-v128:128:128" +

+ Note that in the case of aggregates, 0 is the default ABI and preferred + alignment. This is a special case, where the aggregate's computed worst-case + alignment will be used. + */ +void TargetData::init(const std::string &TargetDescription) { + std::string temp = TargetDescription; + + LittleEndian = false; + PointerMemSize = 8; + PointerABIAlign = 8; + PointerPrefAlign = PointerABIAlign; + + // Default alignments + setAlignment(INTEGER_ALIGN, 1, 1, 1); // i1 + setAlignment(INTEGER_ALIGN, 1, 1, 8); // i8 + setAlignment(INTEGER_ALIGN, 2, 2, 16); // i16 + setAlignment(INTEGER_ALIGN, 4, 4, 32); // i32 + setAlignment(INTEGER_ALIGN, 4, 8, 64); // i64 + setAlignment(FLOAT_ALIGN, 4, 4, 32); // float + setAlignment(FLOAT_ALIGN, 8, 8, 64); // double + setAlignment(VECTOR_ALIGN, 8, 8, 64); // v2i32 + setAlignment(VECTOR_ALIGN, 16, 16, 128); // v16i8, v8i16, v4i32, ... + setAlignment(AGGREGATE_ALIGN, 0, 8, 0); // struct, union, class, ... + + while (!temp.empty()) { + std::string token = getToken(temp, "-"); + std::string arg0 = getToken(token, ":"); + const char *p = arg0.c_str(); + switch(*p) { + case 'E': + LittleEndian = false; + break; + case 'e': + LittleEndian = true; + break; + case 'p': + PointerMemSize = atoi(getToken(token,":").c_str()) / 8; + PointerABIAlign = atoi(getToken(token,":").c_str()) / 8; + PointerPrefAlign = atoi(getToken(token,":").c_str()) / 8; + if (PointerPrefAlign == 0) + PointerPrefAlign = PointerABIAlign; + break; + case 'i': + case 'v': + case 'f': + case 'a': + case 's': { + AlignTypeEnum align_type = STACK_ALIGN; // Dummy init, silence warning + switch(*p) { + case 'i': align_type = INTEGER_ALIGN; break; + case 'v': align_type = VECTOR_ALIGN; break; + case 'f': align_type = FLOAT_ALIGN; break; + case 'a': align_type = AGGREGATE_ALIGN; break; + case 's': align_type = STACK_ALIGN; break; + } + uint32_t size = (uint32_t) atoi(++p); + unsigned char abi_align = atoi(getToken(token, ":").c_str()) / 8; + unsigned char pref_align = atoi(getToken(token, ":").c_str()) / 8; + if (pref_align == 0) + pref_align = abi_align; + setAlignment(align_type, abi_align, pref_align, size); + break; + } + default: + break; + } + } +} + +TargetData::TargetData(const Module *M) + : ImmutablePass(&ID) { + init(M->getDataLayout()); +} + +void +TargetData::setAlignment(AlignTypeEnum align_type, unsigned char abi_align, + unsigned char pref_align, uint32_t bit_width) { + assert(abi_align <= pref_align && "Preferred alignment worse than ABI!"); + for (unsigned i = 0, e = Alignments.size(); i != e; ++i) { + if (Alignments[i].AlignType == align_type && + Alignments[i].TypeBitWidth == bit_width) { + // Update the abi, preferred alignments. + Alignments[i].ABIAlign = abi_align; + Alignments[i].PrefAlign = pref_align; + return; + } + } + + Alignments.push_back(TargetAlignElem::get(align_type, abi_align, + pref_align, bit_width)); +} + +/// getAlignmentInfo - Return the alignment (either ABI if ABIInfo = true or +/// preferred if ABIInfo = false) the target wants for the specified datatype. +unsigned TargetData::getAlignmentInfo(AlignTypeEnum AlignType, + uint32_t BitWidth, bool ABIInfo, + const Type *Ty) const { + // Check to see if we have an exact match and remember the best match we see. + int BestMatchIdx = -1; + int LargestInt = -1; + for (unsigned i = 0, e = Alignments.size(); i != e; ++i) { + if (Alignments[i].AlignType == AlignType && + Alignments[i].TypeBitWidth == BitWidth) + return ABIInfo ? Alignments[i].ABIAlign : Alignments[i].PrefAlign; + + // The best match so far depends on what we're looking for. + if (AlignType == VECTOR_ALIGN && Alignments[i].AlignType == VECTOR_ALIGN) { + // If this is a specification for a smaller vector type, we will fall back + // to it. This happens because <128 x double> can be implemented in terms + // of 64 <2 x double>. + if (Alignments[i].TypeBitWidth < BitWidth) { + // Verify that we pick the biggest of the fallbacks. + if (BestMatchIdx == -1 || + Alignments[BestMatchIdx].TypeBitWidth < Alignments[i].TypeBitWidth) + BestMatchIdx = i; + } + } else if (AlignType == INTEGER_ALIGN && + Alignments[i].AlignType == INTEGER_ALIGN) { + // The "best match" for integers is the smallest size that is larger than + // the BitWidth requested. + if (Alignments[i].TypeBitWidth > BitWidth && (BestMatchIdx == -1 || + Alignments[i].TypeBitWidth < Alignments[BestMatchIdx].TypeBitWidth)) + BestMatchIdx = i; + // However, if there isn't one that's larger, then we must use the + // largest one we have (see below) + if (LargestInt == -1 || + Alignments[i].TypeBitWidth > Alignments[LargestInt].TypeBitWidth) + LargestInt = i; + } + } + + // Okay, we didn't find an exact solution. Fall back here depending on what + // is being looked for. + if (BestMatchIdx == -1) { + // If we didn't find an integer alignment, fall back on most conservative. + if (AlignType == INTEGER_ALIGN) { + BestMatchIdx = LargestInt; + } else { + assert(AlignType == VECTOR_ALIGN && "Unknown alignment type!"); + + // If we didn't find a vector size that is smaller or equal to this type, + // then we will end up scalarizing this to its element type. Just return + // the alignment of the element. + return getAlignment(cast(Ty)->getElementType(), ABIInfo); + } + } + + // Since we got a "best match" index, just return it. + return ABIInfo ? Alignments[BestMatchIdx].ABIAlign + : Alignments[BestMatchIdx].PrefAlign; +} + +namespace { + +/// LayoutInfo - The lazy cache of structure layout information maintained by +/// TargetData. Note that the struct types must have been free'd before +/// llvm_shutdown is called (and thus this is deallocated) because all the +/// targets with cached elements should have been destroyed. +/// +typedef std::pair LayoutKey; + +struct DenseMapLayoutKeyInfo { + static inline LayoutKey getEmptyKey() { return LayoutKey(0, 0); } + static inline LayoutKey getTombstoneKey() { + return LayoutKey((TargetData*)(intptr_t)-1, 0); + } + static unsigned getHashValue(const LayoutKey &Val) { + return DenseMapInfo::getHashValue(Val.first) ^ + DenseMapInfo::getHashValue(Val.second); + } + static bool isEqual(const LayoutKey &LHS, const LayoutKey &RHS) { + return LHS == RHS; + } + + static bool isPod() { return true; } +}; + +typedef DenseMap LayoutInfoTy; + +} + +static ManagedStatic LayoutInfo; + +TargetData::~TargetData() { + if (!LayoutInfo.isConstructed()) + return; + + // Remove any layouts for this TD. + LayoutInfoTy &TheMap = *LayoutInfo; + for (LayoutInfoTy::iterator I = TheMap.begin(), E = TheMap.end(); I != E; ) { + if (I->first.first == this) { + I->second->~StructLayout(); + free(I->second); + TheMap.erase(I++); + } else { + ++I; + } + } +} + +const StructLayout *TargetData::getStructLayout(const StructType *Ty) const { + LayoutInfoTy &TheMap = *LayoutInfo; + + StructLayout *&SL = TheMap[LayoutKey(this, Ty)]; + if (SL) return SL; + + // Otherwise, create the struct layout. Because it is variable length, we + // malloc it, then use placement new. + int NumElts = Ty->getNumElements(); + StructLayout *L = + (StructLayout *)malloc(sizeof(StructLayout)+(NumElts-1)*sizeof(uint64_t)); + + // Set SL before calling StructLayout's ctor. The ctor could cause other + // entries to be added to TheMap, invalidating our reference. + SL = L; + + new (L) StructLayout(Ty, *this); + return L; +} + +/// InvalidateStructLayoutInfo - TargetData speculatively caches StructLayout +/// objects. If a TargetData object is alive when types are being refined and +/// removed, this method must be called whenever a StructType is removed to +/// avoid a dangling pointer in this cache. +void TargetData::InvalidateStructLayoutInfo(const StructType *Ty) const { + if (!LayoutInfo.isConstructed()) return; // No cache. + + LayoutInfoTy::iterator I = LayoutInfo->find(LayoutKey(this, Ty)); + if (I == LayoutInfo->end()) return; + + I->second->~StructLayout(); + free(I->second); + LayoutInfo->erase(I); +} + + +std::string TargetData::getStringRepresentation() const { + std::string repr; + repr.append(LittleEndian ? "e" : "E"); + repr.append("-p:").append(itostr((int64_t) (PointerMemSize * 8))). + append(":").append(itostr((int64_t) (PointerABIAlign * 8))). + append(":").append(itostr((int64_t) (PointerPrefAlign * 8))); + for (align_const_iterator I = Alignments.begin(); + I != Alignments.end(); + ++I) { + repr.append("-").append(1, (char) I->AlignType). + append(utostr((int64_t) I->TypeBitWidth)). + append(":").append(utostr((uint64_t) (I->ABIAlign * 8))). + append(":").append(utostr((uint64_t) (I->PrefAlign * 8))); + } + return repr; +} + + +uint64_t TargetData::getTypeSizeInBits(const Type *Ty) const { + assert(Ty->isSized() && "Cannot getTypeInfo() on a type that is unsized!"); + switch (Ty->getTypeID()) { + case Type::LabelTyID: + case Type::PointerTyID: + return getPointerSizeInBits(); + case Type::ArrayTyID: { + const ArrayType *ATy = cast(Ty); + return getTypeAllocSizeInBits(ATy->getElementType())*ATy->getNumElements(); + } + case Type::StructTyID: + // Get the layout annotation... which is lazily created on demand. + return getStructLayout(cast(Ty))->getSizeInBits(); + case Type::IntegerTyID: + return cast(Ty)->getBitWidth(); + case Type::VoidTyID: + return 8; + case Type::FloatTyID: + return 32; + case Type::DoubleTyID: + return 64; + case Type::PPC_FP128TyID: + case Type::FP128TyID: + return 128; + // In memory objects this is always aligned to a higher boundary, but + // only 80 bits contain information. + case Type::X86_FP80TyID: + return 80; + case Type::VectorTyID: + return cast(Ty)->getBitWidth(); + default: + assert(0 && "TargetData::getTypeSizeInBits(): Unsupported type"); + break; + } + return 0; +} + +/*! + \param abi_or_pref Flag that determines which alignment is returned. true + returns the ABI alignment, false returns the preferred alignment. + \param Ty The underlying type for which alignment is determined. + + Get the ABI (\a abi_or_pref == true) or preferred alignment (\a abi_or_pref + == false) for the requested type \a Ty. + */ +unsigned char TargetData::getAlignment(const Type *Ty, bool abi_or_pref) const { + int AlignType = -1; + + assert(Ty->isSized() && "Cannot getTypeInfo() on a type that is unsized!"); + switch (Ty->getTypeID()) { + // Early escape for the non-numeric types. + case Type::LabelTyID: + case Type::PointerTyID: + return (abi_or_pref + ? getPointerABIAlignment() + : getPointerPrefAlignment()); + case Type::ArrayTyID: + return getAlignment(cast(Ty)->getElementType(), abi_or_pref); + + case Type::StructTyID: { + // Packed structure types always have an ABI alignment of one. + if (cast(Ty)->isPacked() && abi_or_pref) + return 1; + + // Get the layout annotation... which is lazily created on demand. + const StructLayout *Layout = getStructLayout(cast(Ty)); + unsigned Align = getAlignmentInfo(AGGREGATE_ALIGN, 0, abi_or_pref, Ty); + return std::max(Align, (unsigned)Layout->getAlignment()); + } + case Type::IntegerTyID: + case Type::VoidTyID: + AlignType = INTEGER_ALIGN; + break; + case Type::FloatTyID: + case Type::DoubleTyID: + // PPC_FP128TyID and FP128TyID have different data contents, but the + // same size and alignment, so they look the same here. + case Type::PPC_FP128TyID: + case Type::FP128TyID: + case Type::X86_FP80TyID: + AlignType = FLOAT_ALIGN; + break; + case Type::VectorTyID: + AlignType = VECTOR_ALIGN; + break; + default: + assert(0 && "Bad type for getAlignment!!!"); + break; + } + + return getAlignmentInfo((AlignTypeEnum)AlignType, getTypeSizeInBits(Ty), + abi_or_pref, Ty); +} + +unsigned char TargetData::getABITypeAlignment(const Type *Ty) const { + return getAlignment(Ty, true); +} + +unsigned char TargetData::getCallFrameTypeAlignment(const Type *Ty) const { + for (unsigned i = 0, e = Alignments.size(); i != e; ++i) + if (Alignments[i].AlignType == STACK_ALIGN) + return Alignments[i].ABIAlign; + + return getABITypeAlignment(Ty); +} + +unsigned char TargetData::getPrefTypeAlignment(const Type *Ty) const { + return getAlignment(Ty, false); +} + +unsigned char TargetData::getPreferredTypeAlignmentShift(const Type *Ty) const { + unsigned Align = (unsigned) getPrefTypeAlignment(Ty); + assert(!(Align & (Align-1)) && "Alignment is not a power of two!"); + return Log2_32(Align); +} + +/// getIntPtrType - Return an unsigned integer type that is the same size or +/// greater to the host pointer size. +const IntegerType *TargetData::getIntPtrType() const { + return IntegerType::get(getPointerSizeInBits()); +} + + +uint64_t TargetData::getIndexedOffset(const Type *ptrTy, Value* const* Indices, + unsigned NumIndices) const { + const Type *Ty = ptrTy; + assert(isa(Ty) && "Illegal argument for getIndexedOffset()"); + uint64_t Result = 0; + + generic_gep_type_iterator + TI = gep_type_begin(ptrTy, Indices, Indices+NumIndices); + for (unsigned CurIDX = 0; CurIDX != NumIndices; ++CurIDX, ++TI) { + if (const StructType *STy = dyn_cast(*TI)) { + assert(Indices[CurIDX]->getType() == Type::Int32Ty && + "Illegal struct idx"); + unsigned FieldNo = cast(Indices[CurIDX])->getZExtValue(); + + // Get structure layout information... + const StructLayout *Layout = getStructLayout(STy); + + // Add in the offset, as calculated by the structure layout info... + Result += Layout->getElementOffset(FieldNo); + + // Update Ty to refer to current element + Ty = STy->getElementType(FieldNo); + } else { + // Update Ty to refer to current element + Ty = cast(Ty)->getElementType(); + + // Get the array index and the size of each array element. + int64_t arrayIdx = cast(Indices[CurIDX])->getSExtValue(); + Result += arrayIdx * (int64_t)getTypeAllocSize(Ty); + } + } + + return Result; +} + +/// getPreferredAlignment - Return the preferred alignment of the specified +/// global. This includes an explicitly requested alignment (if the global +/// has one). +unsigned TargetData::getPreferredAlignment(const GlobalVariable *GV) const { + const Type *ElemType = GV->getType()->getElementType(); + unsigned Alignment = getPrefTypeAlignment(ElemType); + if (GV->getAlignment() > Alignment) + Alignment = GV->getAlignment(); + + if (GV->hasInitializer()) { + if (Alignment < 16) { + // If the global is not external, see if it is large. If so, give it a + // larger alignment. + if (getTypeSizeInBits(ElemType) > 128) + Alignment = 16; // 16-byte alignment. + } + } + return Alignment; +} + +/// getPreferredAlignmentLog - Return the preferred alignment of the +/// specified global, returned in log form. This includes an explicitly +/// requested alignment (if the global has one). +unsigned TargetData::getPreferredAlignmentLog(const GlobalVariable *GV) const { + return Log2_32(getPreferredAlignment(GV)); +} diff --git a/lib/Target/TargetFrameInfo.cpp b/lib/Target/TargetFrameInfo.cpp new file mode 100644 index 000000000000..873d60a1b5ff --- /dev/null +++ b/lib/Target/TargetFrameInfo.cpp @@ -0,0 +1,19 @@ +//===-- TargetFrameInfo.cpp - Implement machine frame interface -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implements the layout of a stack frame on the target machine. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Target/TargetFrameInfo.h" +#include +using namespace llvm; + +TargetFrameInfo::~TargetFrameInfo() { +} diff --git a/lib/Target/TargetInstrInfo.cpp b/lib/Target/TargetInstrInfo.cpp new file mode 100644 index 000000000000..ceaea0c2027c --- /dev/null +++ b/lib/Target/TargetInstrInfo.cpp @@ -0,0 +1,50 @@ +//===-- TargetInstrInfo.cpp - Target Instruction Information --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Constant.h" +#include "llvm/DerivedTypes.h" +using namespace llvm; + +TargetInstrInfo::TargetInstrInfo(const TargetInstrDesc* Desc, + unsigned numOpcodes) + : Descriptors(Desc), NumOpcodes(numOpcodes) { +} + +TargetInstrInfo::~TargetInstrInfo() { +} + +bool TargetInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const { + const TargetInstrDesc &TID = MI->getDesc(); + if (!TID.isTerminator()) return false; + + // Conditional branch is a special case. + if (TID.isBranch() && !TID.isBarrier()) + return true; + if (!TID.isPredicable()) + return true; + return !isPredicated(MI); +} + +/// getInstrOperandRegClass - Return register class of the operand of an +/// instruction of the specified TargetInstrDesc. +const TargetRegisterClass* +llvm::getInstrOperandRegClass(const TargetRegisterInfo *TRI, + const TargetInstrDesc &II, unsigned Op) { + if (Op >= II.getNumOperands()) + return NULL; + if (II.OpInfo[Op].isLookupPtrRegClass()) + return TRI->getPointerRegClass(); + return TRI->getRegClass(II.OpInfo[Op].RegClass); +} diff --git a/lib/Target/TargetIntrinsicInfo.cpp b/lib/Target/TargetIntrinsicInfo.cpp new file mode 100644 index 000000000000..d8da08e4f1d3 --- /dev/null +++ b/lib/Target/TargetIntrinsicInfo.cpp @@ -0,0 +1,22 @@ +//===-- TargetIntrinsicInfo.cpp - Target Instruction Information ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the TargetIntrinsicInfo class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Target/TargetIntrinsicInfo.h" +using namespace llvm; + +TargetIntrinsicInfo::TargetIntrinsicInfo(const char **desc, unsigned count) + : Intrinsics(desc), NumIntrinsics(count) { +} + +TargetIntrinsicInfo::~TargetIntrinsicInfo() { +} diff --git a/lib/Target/TargetMachOWriterInfo.cpp b/lib/Target/TargetMachOWriterInfo.cpp new file mode 100644 index 000000000000..d608119817d6 --- /dev/null +++ b/lib/Target/TargetMachOWriterInfo.cpp @@ -0,0 +1,25 @@ +//===-- llvm/Target/TargetMachOWriterInfo.h - MachO Writer Info -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the TargetMachOWriterInfo class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Target/TargetMachOWriterInfo.h" +#include "llvm/CodeGen/MachineRelocation.h" +using namespace llvm; + +TargetMachOWriterInfo::~TargetMachOWriterInfo() {} + +MachineRelocation +TargetMachOWriterInfo::GetJTRelocation(unsigned Offset, + MachineBasicBlock *MBB) const { + // FIXME: do something about PIC + return MachineRelocation::getBB(Offset, MachineRelocation::VANILLA, MBB); +} diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp new file mode 100644 index 000000000000..1b042ddef9b8 --- /dev/null +++ b/lib/Target/TargetMachine.cpp @@ -0,0 +1,229 @@ +//===-- TargetMachine.cpp - General Target Information ---------------------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the general parts of a Target machine. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Support/CommandLine.h" +using namespace llvm; + +//--------------------------------------------------------------------------- +// Command-line options that tend to be useful on more than one back-end. +// + +namespace llvm { + bool LessPreciseFPMADOption; + bool PrintMachineCode; + bool NoFramePointerElim; + bool NoExcessFPPrecision; + bool UnsafeFPMath; + bool FiniteOnlyFPMathOption; + bool HonorSignDependentRoundingFPMathOption; + bool UseSoftFloat; + bool NoImplicitFloat; + bool NoZerosInBSS; + bool ExceptionHandling; + bool UnwindTablesMandatory; + Reloc::Model RelocationModel; + CodeModel::Model CMModel; + bool PerformTailCallOpt; + unsigned StackAlignment; + bool RealignStack; + bool DisableJumpTables; + bool StrongPHIElim; + bool DisableRedZone; + bool AsmVerbosityDefault(false); +} + +static cl::opt +PrintCode("print-machineinstrs", + cl::desc("Print generated machine code"), + cl::location(PrintMachineCode), cl::init(false)); +static cl::opt +DisableFPElim("disable-fp-elim", + cl::desc("Disable frame pointer elimination optimization"), + cl::location(NoFramePointerElim), + cl::init(false)); +static cl::opt +DisableExcessPrecision("disable-excess-fp-precision", + cl::desc("Disable optimizations that may increase FP precision"), + cl::location(NoExcessFPPrecision), + cl::init(false)); +static cl::opt +EnableFPMAD("enable-fp-mad", + cl::desc("Enable less precise MAD instructions to be generated"), + cl::location(LessPreciseFPMADOption), + cl::init(false)); +static cl::opt +EnableUnsafeFPMath("enable-unsafe-fp-math", + cl::desc("Enable optimizations that may decrease FP precision"), + cl::location(UnsafeFPMath), + cl::init(false)); +static cl::opt +EnableFiniteOnlyFPMath("enable-finite-only-fp-math", + cl::desc("Enable optimizations that assumes non- NaNs / +-Infs"), + cl::location(FiniteOnlyFPMathOption), + cl::init(false)); +static cl::opt +EnableHonorSignDependentRoundingFPMath("enable-sign-dependent-rounding-fp-math", + cl::Hidden, + cl::desc("Force codegen to assume rounding mode can change dynamically"), + cl::location(HonorSignDependentRoundingFPMathOption), + cl::init(false)); +static cl::opt +GenerateSoftFloatCalls("soft-float", + cl::desc("Generate software floating point library calls"), + cl::location(UseSoftFloat), + cl::init(false)); +static cl::opt +GenerateNoImplicitFloats("no-implicit-float", + cl::desc("Don't generate implicit floating point instructions (x86-only)"), + cl::location(NoImplicitFloat), + cl::init(false)); +static cl::opt +DontPlaceZerosInBSS("nozero-initialized-in-bss", + cl::desc("Don't place zero-initialized symbols into bss section"), + cl::location(NoZerosInBSS), + cl::init(false)); +static cl::opt +EnableExceptionHandling("enable-eh", + cl::desc("Emit DWARF exception handling (default if target supports)"), + cl::location(ExceptionHandling), + cl::init(false)); +static cl::opt +EnableUnwindTables("unwind-tables", + cl::desc("Generate unwinding tables for all functions"), + cl::location(UnwindTablesMandatory), + cl::init(false)); + +static cl::opt +DefRelocationModel("relocation-model", + cl::desc("Choose relocation model"), + cl::location(RelocationModel), + cl::init(Reloc::Default), + cl::values( + clEnumValN(Reloc::Default, "default", + "Target default relocation model"), + clEnumValN(Reloc::Static, "static", + "Non-relocatable code"), + clEnumValN(Reloc::PIC_, "pic", + "Fully relocatable, position independent code"), + clEnumValN(Reloc::DynamicNoPIC, "dynamic-no-pic", + "Relocatable external references, non-relocatable code"), + clEnumValEnd)); +static cl::opt +DefCodeModel("code-model", + cl::desc("Choose code model"), + cl::location(CMModel), + cl::init(CodeModel::Default), + cl::values( + clEnumValN(CodeModel::Default, "default", + "Target default code model"), + clEnumValN(CodeModel::Small, "small", + "Small code model"), + clEnumValN(CodeModel::Kernel, "kernel", + "Kernel code model"), + clEnumValN(CodeModel::Medium, "medium", + "Medium code model"), + clEnumValN(CodeModel::Large, "large", + "Large code model"), + clEnumValEnd)); +static cl::opt +EnablePerformTailCallOpt("tailcallopt", + cl::desc("Turn on tail call optimization."), + cl::location(PerformTailCallOpt), + cl::init(false)); +static cl::opt +OverrideStackAlignment("stack-alignment", + cl::desc("Override default stack alignment"), + cl::location(StackAlignment), + cl::init(0)); +static cl::opt +EnableRealignStack("realign-stack", + cl::desc("Realign stack if needed"), + cl::location(RealignStack), + cl::init(true)); +static cl::opt +DisableSwitchTables(cl::Hidden, "disable-jump-tables", + cl::desc("Do not generate jump tables."), + cl::location(DisableJumpTables), + cl::init(false)); +static cl::opt +EnableStrongPHIElim(cl::Hidden, "strong-phi-elim", + cl::desc("Use strong PHI elimination."), + cl::location(StrongPHIElim), + cl::init(false)); +static cl::opt +DisableRedZoneOption("disable-red-zone", + cl::desc("Do not emit code that uses the red zone."), + cl::location(DisableRedZone), + cl::init(false)); + +//--------------------------------------------------------------------------- +// TargetMachine Class +// + +TargetMachine::~TargetMachine() { + delete AsmInfo; +} + +/// getRelocationModel - Returns the code generation relocation model. The +/// choices are static, PIC, and dynamic-no-pic, and target default. +Reloc::Model TargetMachine::getRelocationModel() { + return RelocationModel; +} + +/// setRelocationModel - Sets the code generation relocation model. +void TargetMachine::setRelocationModel(Reloc::Model Model) { + RelocationModel = Model; +} + +/// getCodeModel - Returns the code model. The choices are small, kernel, +/// medium, large, and target default. +CodeModel::Model TargetMachine::getCodeModel() { + return CMModel; +} + +/// setCodeModel - Sets the code model. +void TargetMachine::setCodeModel(CodeModel::Model Model) { + CMModel = Model; +} + +bool TargetMachine::getAsmVerbosityDefault() { + return AsmVerbosityDefault; +} + +void TargetMachine::setAsmVerbosityDefault(bool V) { + AsmVerbosityDefault = V; +} + +namespace llvm { + /// LessPreciseFPMAD - This flag return true when -enable-fp-mad option + /// is specified on the command line. When this flag is off(default), the + /// code generator is not allowed to generate mad (multiply add) if the + /// result is "less precise" than doing those operations individually. + bool LessPreciseFPMAD() { return UnsafeFPMath || LessPreciseFPMADOption; } + + /// FiniteOnlyFPMath - This returns true when the -enable-finite-only-fp-math + /// option is specified on the command line. If this returns false (default), + /// the code generator is not allowed to assume that FP arithmetic arguments + /// and results are never NaNs or +-Infs. + bool FiniteOnlyFPMath() { return UnsafeFPMath || FiniteOnlyFPMathOption; } + + /// HonorSignDependentRoundingFPMath - Return true if the codegen must assume + /// that the rounding mode of the FPU can change from its default. + bool HonorSignDependentRoundingFPMath() { + return !UnsafeFPMath && HonorSignDependentRoundingFPMathOption; + } +} + diff --git a/lib/Target/TargetMachineRegistry.cpp b/lib/Target/TargetMachineRegistry.cpp new file mode 100644 index 000000000000..c1a4777c6314 --- /dev/null +++ b/lib/Target/TargetMachineRegistry.cpp @@ -0,0 +1,78 @@ +//===-- TargetMachineRegistry.cpp - Target Auto Registration Impl ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file exposes the RegisterTarget class, which TargetMachine +// implementations should use to register themselves with the system. This file +// also exposes the TargetMachineRegistry class, which allows tools to inspect +// all of registered targets. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Target/TargetMachineRegistry.h" +#include +using namespace llvm; + +/// getClosestStaticTargetForModule - Given an LLVM module, pick the best target +/// that is compatible with the module. If no close target can be found, this +/// returns null and sets the Error string to a reason. +const TargetMachineRegistry::entry * +TargetMachineRegistry::getClosestStaticTargetForModule(const Module &M, + std::string &Error) { + std::vector > UsableTargets; + for (Registry::iterator I = begin(), E = end(); I != E; ++I) + if (unsigned Qual = I->ModuleMatchQualityFn(M)) + UsableTargets.push_back(std::make_pair(Qual, &*I)); + + if (UsableTargets.empty()) { + Error = "No available targets are compatible with this module"; + return 0; + } else if (UsableTargets.size() == 1) + return UsableTargets.back().second; + + // Otherwise, take the best target, but make sure we don't have two equally + // good best targets. + std::sort(UsableTargets.begin(), UsableTargets.end()); + if (UsableTargets.back().first ==UsableTargets[UsableTargets.size()-2].first){ + Error = "Cannot choose between targets \"" + + std::string(UsableTargets.back().second->Name) + "\" and \"" + + std::string(UsableTargets[UsableTargets.size()-2].second->Name) + "\""; + return 0; + } + return UsableTargets.back().second; +} + +/// getClosestTargetForJIT - Pick the best target that is compatible with +/// the current host. If no close target can be found, this returns null +/// and sets the Error string to a reason. +const TargetMachineRegistry::entry * +TargetMachineRegistry::getClosestTargetForJIT(std::string &Error) { + std::vector > UsableTargets; + for (Registry::iterator I = begin(), E = end(); I != E; ++I) + if (unsigned Qual = I->JITMatchQualityFn()) + UsableTargets.push_back(std::make_pair(Qual, &*I)); + + if (UsableTargets.empty()) { + Error = "No JIT is available for this host"; + return 0; + } else if (UsableTargets.size() == 1) + return UsableTargets.back().second; + + // Otherwise, take the best target. If there is a tie, just pick one. + unsigned MaxQual = UsableTargets.front().first; + const entry *MaxQualTarget = UsableTargets.front().second; + + for (unsigned i = 1, e = UsableTargets.size(); i != e; ++i) + if (UsableTargets[i].first > MaxQual) { + MaxQual = UsableTargets[i].first; + MaxQualTarget = UsableTargets[i].second; + } + + return MaxQualTarget; +} + diff --git a/lib/Target/TargetRegisterInfo.cpp b/lib/Target/TargetRegisterInfo.cpp new file mode 100644 index 000000000000..a84fdaa4a802 --- /dev/null +++ b/lib/Target/TargetRegisterInfo.cpp @@ -0,0 +1,144 @@ +//===- TargetRegisterInfo.cpp - Target Register Information Implementation ===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the TargetRegisterInfo interface. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/ADT/BitVector.h" + +using namespace llvm; + +TargetRegisterInfo::TargetRegisterInfo(const TargetRegisterDesc *D, unsigned NR, + regclass_iterator RCB, regclass_iterator RCE, + int CFSO, int CFDO, + const unsigned* subregs, const unsigned subregsize, + const unsigned* superregs, const unsigned superregsize, + const unsigned* aliases, const unsigned aliasessize) + : SubregHash(subregs), SubregHashSize(subregsize), + SuperregHash(superregs), SuperregHashSize(superregsize), + AliasesHash(aliases), AliasesHashSize(aliasessize), + Desc(D), NumRegs(NR), RegClassBegin(RCB), RegClassEnd(RCE) { + assert(NumRegs < FirstVirtualRegister && + "Target has too many physical registers!"); + + CallFrameSetupOpcode = CFSO; + CallFrameDestroyOpcode = CFDO; +} + +TargetRegisterInfo::~TargetRegisterInfo() {} + +/// getPhysicalRegisterRegClass - Returns the Register Class of a physical +/// register of the given type. If type is MVT::Other, then just return any +/// register class the register belongs to. +const TargetRegisterClass * +TargetRegisterInfo::getPhysicalRegisterRegClass(unsigned reg, MVT VT) const { + assert(isPhysicalRegister(reg) && "reg must be a physical register"); + + // Pick the most super register class of the right type that contains + // this physreg. + const TargetRegisterClass* BestRC = 0; + for (regclass_iterator I = regclass_begin(), E = regclass_end(); I != E; ++I){ + const TargetRegisterClass* RC = *I; + if ((VT == MVT::Other || RC->hasType(VT)) && RC->contains(reg) && + (!BestRC || BestRC->hasSuperClass(RC))) + BestRC = RC; + } + + assert(BestRC && "Couldn't find the register class"); + return BestRC; +} + +/// getAllocatableSetForRC - Toggle the bits that represent allocatable +/// registers for the specific register class. +static void getAllocatableSetForRC(MachineFunction &MF, + const TargetRegisterClass *RC, BitVector &R){ + for (TargetRegisterClass::iterator I = RC->allocation_order_begin(MF), + E = RC->allocation_order_end(MF); I != E; ++I) + R.set(*I); +} + +BitVector TargetRegisterInfo::getAllocatableSet(MachineFunction &MF, + const TargetRegisterClass *RC) const { + BitVector Allocatable(NumRegs); + if (RC) { + getAllocatableSetForRC(MF, RC, Allocatable); + return Allocatable; + } + + for (TargetRegisterInfo::regclass_iterator I = regclass_begin(), + E = regclass_end(); I != E; ++I) + getAllocatableSetForRC(MF, *I, Allocatable); + return Allocatable; +} + +/// getFrameIndexOffset - Returns the displacement from the frame register to +/// the stack frame of the specified index. This is the default implementation +/// which is likely incorrect for the target. +int TargetRegisterInfo::getFrameIndexOffset(MachineFunction &MF, int FI) const { + const TargetFrameInfo &TFI = *MF.getTarget().getFrameInfo(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + return MFI->getObjectOffset(FI) + MFI->getStackSize() - + TFI.getOffsetOfLocalArea() + MFI->getOffsetAdjustment(); +} + +/// getInitialFrameState - Returns a list of machine moves that are assumed +/// on entry to a function. +void +TargetRegisterInfo::getInitialFrameState(std::vector &Moves) const { + // Default is to do nothing. +} + +const TargetRegisterClass * +llvm::getCommonSubClass(const TargetRegisterClass *A, + const TargetRegisterClass *B) { + // First take care of the trivial cases + if (A == B) + return A; + if (!A || !B) + return 0; + + // If B is a subclass of A, it will be handled in the loop below + if (B->hasSubClass(A)) + return A; + + const TargetRegisterClass *Best = 0; + for (TargetRegisterClass::sc_iterator I = A->subclasses_begin(); + const TargetRegisterClass *X = *I; ++I) { + if (X == B) + return B; // B is a subclass of A + + // X must be a common subclass of A and B + if (!B->hasSubClass(X)) + continue; + + // A superclass is definitely better. + if (!Best || Best->hasSuperClass(X)) { + Best = X; + continue; + } + + // A subclass is definitely worse + if (Best->hasSubClass(X)) + continue; + + // Best and *I have no super/sub class relation - pick the larger class, or + // the smaller spill size. + int nb = std::distance(Best->begin(), Best->end()); + int ni = std::distance(X->begin(), X->end()); + if (ni>nb || (ni==nb && X->getSize() < Best->getSize())) + Best = X; + } + return Best; +} diff --git a/lib/Target/TargetSubtarget.cpp b/lib/Target/TargetSubtarget.cpp new file mode 100644 index 000000000000..95c92cabaf70 --- /dev/null +++ b/lib/Target/TargetSubtarget.cpp @@ -0,0 +1,22 @@ +//===-- TargetSubtarget.cpp - General Target Information -------------------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the general parts of a Subtarget. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Target/TargetSubtarget.h" +using namespace llvm; + +//--------------------------------------------------------------------------- +// TargetSubtarget Class +// +TargetSubtarget::TargetSubtarget() {} + +TargetSubtarget::~TargetSubtarget() {} diff --git a/lib/Target/X86/AsmPrinter/CMakeLists.txt b/lib/Target/X86/AsmPrinter/CMakeLists.txt new file mode 100644 index 000000000000..dbd03d8b9633 --- /dev/null +++ b/lib/Target/X86/AsmPrinter/CMakeLists.txt @@ -0,0 +1,11 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_partially_linked_object(LLVMX86AsmPrinter + X86ATTAsmPrinter.cpp + X86AsmPrinter.cpp + X86IntelAsmPrinter.cpp + ) + +target_name_of_partially_linked_object(LLVMX86CodeGen n) + +add_dependencies(LLVMX86AsmPrinter ${n}) diff --git a/lib/Target/X86/AsmPrinter/Makefile b/lib/Target/X86/AsmPrinter/Makefile new file mode 100644 index 000000000000..ba89ac69bf68 --- /dev/null +++ b/lib/Target/X86/AsmPrinter/Makefile @@ -0,0 +1,15 @@ +##===- lib/Target/X86/Makefile -----------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../../.. +LIBRARYNAME = LLVMX86AsmPrinter + +# Hack: we need to include 'main' x86 target directory to grab private headers +CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.cpp new file mode 100644 index 000000000000..8afe2ea9e10b --- /dev/null +++ b/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.cpp @@ -0,0 +1,1075 @@ +//===-- X86ATTAsmPrinter.cpp - Convert X86 LLVM code to AT&T assembly -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a printer that converts from our internal representation +// of machine-dependent LLVM code to AT&T format assembly +// language. This printer is the output mechanism used by `llc'. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "asm-printer" +#include "X86ATTAsmPrinter.h" +#include "X86.h" +#include "X86COFF.h" +#include "X86MachineFunctionInfo.h" +#include "X86TargetMachine.h" +#include "X86TargetAsmInfo.h" +#include "llvm/CallingConv.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Module.h" +#include "llvm/Type.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/CodeGen/DwarfWriter.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/Support/Mangler.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/Target/TargetOptions.h" +using namespace llvm; + +STATISTIC(EmittedInsts, "Number of machine instrs printed"); + +static std::string getPICLabelString(unsigned FnNum, + const TargetAsmInfo *TAI, + const X86Subtarget* Subtarget) { + std::string label; + if (Subtarget->isTargetDarwin()) + label = "\"L" + utostr_32(FnNum) + "$pb\""; + else if (Subtarget->isTargetELF()) + label = ".Lllvm$" + utostr_32(FnNum) + "." "$piclabel"; + else + assert(0 && "Don't know how to print PIC label!\n"); + + return label; +} + +static X86MachineFunctionInfo calculateFunctionInfo(const Function *F, + const TargetData *TD) { + X86MachineFunctionInfo Info; + uint64_t Size = 0; + + switch (F->getCallingConv()) { + case CallingConv::X86_StdCall: + Info.setDecorationStyle(StdCall); + break; + case CallingConv::X86_FastCall: + Info.setDecorationStyle(FastCall); + break; + default: + return Info; + } + + unsigned argNum = 1; + for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end(); + AI != AE; ++AI, ++argNum) { + const Type* Ty = AI->getType(); + + // 'Dereference' type in case of byval parameter attribute + if (F->paramHasAttr(argNum, Attribute::ByVal)) + Ty = cast(Ty)->getElementType(); + + // Size should be aligned to DWORD boundary + Size += ((TD->getTypeAllocSize(Ty) + 3)/4)*4; + } + + // We're not supporting tooooo huge arguments :) + Info.setBytesToPopOnReturn((unsigned int)Size); + return Info; +} + +/// PrintUnmangledNameSafely - Print out the printable characters in the name. +/// Don't print things like \\n or \\0. +static void PrintUnmangledNameSafely(const Value *V, raw_ostream &OS) { + for (const char *Name = V->getNameStart(), *E = Name+V->getNameLen(); + Name != E; ++Name) + if (isprint(*Name)) + OS << *Name; +} + +/// decorateName - Query FunctionInfoMap and use this information for various +/// name decoration. +void X86ATTAsmPrinter::decorateName(std::string &Name, + const GlobalValue *GV) { + const Function *F = dyn_cast(GV); + if (!F) return; + + // We don't want to decorate non-stdcall or non-fastcall functions right now + unsigned CC = F->getCallingConv(); + if (CC != CallingConv::X86_StdCall && CC != CallingConv::X86_FastCall) + return; + + // Decorate names only when we're targeting Cygwin/Mingw32 targets + if (!Subtarget->isTargetCygMing()) + return; + + FMFInfoMap::const_iterator info_item = FunctionInfoMap.find(F); + + const X86MachineFunctionInfo *Info; + if (info_item == FunctionInfoMap.end()) { + // Calculate apropriate function info and populate map + FunctionInfoMap[F] = calculateFunctionInfo(F, TM.getTargetData()); + Info = &FunctionInfoMap[F]; + } else { + Info = &info_item->second; + } + + const FunctionType *FT = F->getFunctionType(); + switch (Info->getDecorationStyle()) { + case None: + break; + case StdCall: + // "Pure" variadic functions do not receive @0 suffix. + if (!FT->isVarArg() || (FT->getNumParams() == 0) || + (FT->getNumParams() == 1 && F->hasStructRetAttr())) + Name += '@' + utostr_32(Info->getBytesToPopOnReturn()); + break; + case FastCall: + // "Pure" variadic functions do not receive @0 suffix. + if (!FT->isVarArg() || (FT->getNumParams() == 0) || + (FT->getNumParams() == 1 && F->hasStructRetAttr())) + Name += '@' + utostr_32(Info->getBytesToPopOnReturn()); + + if (Name[0] == '_') { + Name[0] = '@'; + } else { + Name = '@' + Name; + } + break; + default: + assert(0 && "Unsupported DecorationStyle"); + } +} + +void X86ATTAsmPrinter::emitFunctionHeader(const MachineFunction &MF) { + const Function *F = MF.getFunction(); + + decorateName(CurrentFnName, F); + + SwitchToSection(TAI->SectionForGlobal(F)); + + unsigned FnAlign = 4; + if (F->hasFnAttr(Attribute::OptimizeForSize)) + FnAlign = 1; + switch (F->getLinkage()) { + default: assert(0 && "Unknown linkage type!"); + case Function::InternalLinkage: // Symbols default to internal. + case Function::PrivateLinkage: + EmitAlignment(FnAlign, F); + break; + case Function::DLLExportLinkage: + case Function::ExternalLinkage: + EmitAlignment(FnAlign, F); + O << "\t.globl\t" << CurrentFnName << '\n'; + break; + case Function::LinkOnceAnyLinkage: + case Function::LinkOnceODRLinkage: + case Function::WeakAnyLinkage: + case Function::WeakODRLinkage: + EmitAlignment(FnAlign, F); + if (Subtarget->isTargetDarwin()) { + O << "\t.globl\t" << CurrentFnName << '\n'; + O << TAI->getWeakDefDirective() << CurrentFnName << '\n'; + } else if (Subtarget->isTargetCygMing()) { + O << "\t.globl\t" << CurrentFnName << "\n" + "\t.linkonce discard\n"; + } else { + O << "\t.weak\t" << CurrentFnName << '\n'; + } + break; + } + + printVisibility(CurrentFnName, F->getVisibility()); + + if (Subtarget->isTargetELF()) + O << "\t.type\t" << CurrentFnName << ",@function\n"; + else if (Subtarget->isTargetCygMing()) { + O << "\t.def\t " << CurrentFnName + << ";\t.scl\t" << + (F->hasInternalLinkage() ? COFF::C_STAT : COFF::C_EXT) + << ";\t.type\t" << (COFF::DT_FCN << COFF::N_BTSHFT) + << ";\t.endef\n"; + } + + O << CurrentFnName << ":\n"; + // Add some workaround for linkonce linkage on Cygwin\MinGW + if (Subtarget->isTargetCygMing() && + (F->hasLinkOnceLinkage() || F->hasWeakLinkage())) + O << "Lllvm$workaround$fake$stub$" << CurrentFnName << ":\n"; +} + +/// runOnMachineFunction - This uses the printMachineInstruction() +/// method to print assembly for each instruction. +/// +bool X86ATTAsmPrinter::runOnMachineFunction(MachineFunction &MF) { + const Function *F = MF.getFunction(); + this->MF = &MF; + unsigned CC = F->getCallingConv(); + + SetupMachineFunction(MF); + O << "\n\n"; + + // Populate function information map. Actually, We don't want to populate + // non-stdcall or non-fastcall functions' information right now. + if (CC == CallingConv::X86_StdCall || CC == CallingConv::X86_FastCall) + FunctionInfoMap[F] = *MF.getInfo(); + + // Print out constants referenced by the function + EmitConstantPool(MF.getConstantPool()); + + if (F->hasDLLExportLinkage()) + DLLExportedFns.insert(Mang->makeNameProper(F->getName(), "")); + + // Print the 'header' of function + emitFunctionHeader(MF); + + // Emit pre-function debug and/or EH information. + if (TAI->doesSupportDebugInformation() || TAI->doesSupportExceptionHandling()) + DW->BeginFunction(&MF); + + // Print out code for the function. + bool hasAnyRealCode = false; + for (MachineFunction::const_iterator I = MF.begin(), E = MF.end(); + I != E; ++I) { + // Print a label for the basic block. + if (!VerboseAsm && (I->pred_empty() || I->isOnlyReachableByFallthrough())) { + // This is an entry block or a block that's only reachable via a + // fallthrough edge. In non-VerboseAsm mode, don't print the label. + } else { + printBasicBlockLabel(I, true, true, VerboseAsm); + O << '\n'; + } + for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end(); + II != IE; ++II) { + // Print the assembly for the instruction. + if (!II->isLabel()) + hasAnyRealCode = true; + printMachineInstruction(II); + } + } + + if (Subtarget->isTargetDarwin() && !hasAnyRealCode) { + // If the function is empty, then we need to emit *something*. Otherwise, + // the function's label might be associated with something that it wasn't + // meant to be associated with. We emit a noop in this situation. + // We are assuming inline asms are code. + O << "\tnop\n"; + } + + if (TAI->hasDotTypeDotSizeDirective()) + O << "\t.size\t" << CurrentFnName << ", .-" << CurrentFnName << '\n'; + + // Emit post-function debug information. + if (TAI->doesSupportDebugInformation()) + DW->EndFunction(&MF); + + // Print out jump tables referenced by the function. + EmitJumpTableInfo(MF.getJumpTableInfo(), MF); + + O.flush(); + + // We didn't modify anything. + return false; +} + +static inline bool shouldPrintGOT(TargetMachine &TM, const X86Subtarget* ST) { + return ST->isPICStyleGOT() && TM.getRelocationModel() == Reloc::PIC_; +} + +static inline bool shouldPrintPLT(TargetMachine &TM, const X86Subtarget* ST) { + return ST->isTargetELF() && TM.getRelocationModel() == Reloc::PIC_ && + (ST->isPICStyleRIPRel() || ST->isPICStyleGOT()); +} + +static inline bool shouldPrintStub(TargetMachine &TM, const X86Subtarget* ST) { + return ST->isPICStyleStub() && TM.getRelocationModel() != Reloc::Static; +} + +void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, + const char *Modifier, bool NotRIPRel) { + const MachineOperand &MO = MI->getOperand(OpNo); + switch (MO.getType()) { + case MachineOperand::MO_Register: { + assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) && + "Virtual registers should not make it this far!"); + O << '%'; + unsigned Reg = MO.getReg(); + if (Modifier && strncmp(Modifier, "subreg", strlen("subreg")) == 0) { + MVT VT = (strcmp(Modifier+6,"64") == 0) ? + MVT::i64 : ((strcmp(Modifier+6, "32") == 0) ? MVT::i32 : + ((strcmp(Modifier+6,"16") == 0) ? MVT::i16 : MVT::i8)); + Reg = getX86SubSuperRegister(Reg, VT); + } + O << TRI->getAsmName(Reg); + return; + } + + case MachineOperand::MO_Immediate: + if (!Modifier || (strcmp(Modifier, "debug") && + strcmp(Modifier, "mem") && + strcmp(Modifier, "call"))) + O << '$'; + O << MO.getImm(); + return; + case MachineOperand::MO_MachineBasicBlock: + printBasicBlockLabel(MO.getMBB(), false, false, VerboseAsm); + return; + case MachineOperand::MO_JumpTableIndex: { + bool isMemOp = Modifier && !strcmp(Modifier, "mem"); + if (!isMemOp) O << '$'; + O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() << '_' + << MO.getIndex(); + + if (TM.getRelocationModel() == Reloc::PIC_) { + if (Subtarget->isPICStyleStub()) + O << "-\"" << TAI->getPrivateGlobalPrefix() << getFunctionNumber() + << "$pb\""; + else if (Subtarget->isPICStyleGOT()) + O << "@GOTOFF"; + } + + if (isMemOp && Subtarget->isPICStyleRIPRel() && !NotRIPRel) + O << "(%rip)"; + return; + } + case MachineOperand::MO_ConstantPoolIndex: { + bool isMemOp = Modifier && !strcmp(Modifier, "mem"); + if (!isMemOp) O << '$'; + O << TAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << '_' + << MO.getIndex(); + + if (TM.getRelocationModel() == Reloc::PIC_) { + if (Subtarget->isPICStyleStub()) + O << "-\"" << TAI->getPrivateGlobalPrefix() << getFunctionNumber() + << "$pb\""; + else if (Subtarget->isPICStyleGOT()) + O << "@GOTOFF"; + } + + printOffset(MO.getOffset()); + + if (isMemOp && Subtarget->isPICStyleRIPRel() && !NotRIPRel) + O << "(%rip)"; + return; + } + case MachineOperand::MO_GlobalAddress: { + bool isCallOp = Modifier && !strcmp(Modifier, "call"); + bool isMemOp = Modifier && !strcmp(Modifier, "mem"); + bool needCloseParen = false; + + const GlobalValue *GV = MO.getGlobal(); + const GlobalVariable *GVar = dyn_cast(GV); + if (!GVar) { + // If GV is an alias then use the aliasee for determining + // thread-localness. + if (const GlobalAlias *GA = dyn_cast(GV)) + GVar = dyn_cast_or_null(GA->resolveAliasedGlobal(false)); + } + + bool isThreadLocal = GVar && GVar->isThreadLocal(); + + std::string Name = Mang->getValueName(GV); + decorateName(Name, GV); + + if (!isMemOp && !isCallOp) + O << '$'; + else if (Name[0] == '$') { + // The name begins with a dollar-sign. In order to avoid having it look + // like an integer immediate to the assembler, enclose it in parens. + O << '('; + needCloseParen = true; + } + + if (shouldPrintStub(TM, Subtarget)) { + // Link-once, declaration, or Weakly-linked global variables need + // non-lazily-resolved stubs + if (GV->isDeclaration() || GV->isWeakForLinker()) { + // Dynamically-resolved functions need a stub for the function. + if (isCallOp && isa(GV)) { + // Function stubs are no longer needed for Mac OS X 10.5 and up. + if (Subtarget->isTargetDarwin() && Subtarget->getDarwinVers() >= 9) { + O << Name; + } else { + FnStubs.insert(Name); + printSuffixedName(Name, "$stub"); + } + } else if (GV->hasHiddenVisibility()) { + if (!GV->isDeclaration() && !GV->hasCommonLinkage()) + // Definition is not definitely in the current translation unit. + O << Name; + else { + HiddenGVStubs.insert(Name); + printSuffixedName(Name, "$non_lazy_ptr"); + } + } else { + GVStubs.insert(Name); + printSuffixedName(Name, "$non_lazy_ptr"); + } + } else { + if (GV->hasDLLImportLinkage()) + O << "__imp_"; + O << Name; + } + + if (!isCallOp && TM.getRelocationModel() == Reloc::PIC_) + O << '-' << getPICLabelString(getFunctionNumber(), TAI, Subtarget); + } else { + if (GV->hasDLLImportLinkage()) { + O << "__imp_"; + } + O << Name; + + if (isCallOp) { + if (shouldPrintPLT(TM, Subtarget)) { + // Assemble call via PLT for externally visible symbols + if (!GV->hasHiddenVisibility() && !GV->hasProtectedVisibility() && + !GV->hasLocalLinkage()) + O << "@PLT"; + } + if (Subtarget->isTargetCygMing() && GV->isDeclaration()) + // Save function name for later type emission + FnStubs.insert(Name); + } + } + + if (GV->hasExternalWeakLinkage()) + ExtWeakSymbols.insert(GV); + + printOffset(MO.getOffset()); + + if (isThreadLocal) { + TLSModel::Model model = getTLSModel(GVar, TM.getRelocationModel()); + switch (model) { + case TLSModel::GeneralDynamic: + O << "@TLSGD"; + break; + case TLSModel::LocalDynamic: + // O << "@TLSLD"; // local dynamic not implemented + O << "@TLSGD"; + break; + case TLSModel::InitialExec: + if (Subtarget->is64Bit()) { + assert (!NotRIPRel); + O << "@GOTTPOFF(%rip)"; + } else { + O << "@INDNTPOFF"; + } + break; + case TLSModel::LocalExec: + if (Subtarget->is64Bit()) + O << "@TPOFF"; + else + O << "@NTPOFF"; + break; + default: + assert (0 && "Unknown TLS model"); + } + } else if (isMemOp) { + if (shouldPrintGOT(TM, Subtarget)) { + if (Subtarget->GVRequiresExtraLoad(GV, TM, false)) + O << "@GOT"; + else + O << "@GOTOFF"; + } else if (Subtarget->isPICStyleRIPRel() && !NotRIPRel) { + if (TM.getRelocationModel() != Reloc::Static) { + if (Subtarget->GVRequiresExtraLoad(GV, TM, false)) + O << "@GOTPCREL"; + + if (needCloseParen) { + needCloseParen = false; + O << ')'; + } + } + + // Use rip when possible to reduce code size, except when + // index or base register are also part of the address. e.g. + // foo(%rip)(%rcx,%rax,4) is not legal + O << "(%rip)"; + } + } + + if (needCloseParen) + O << ')'; + + return; + } + case MachineOperand::MO_ExternalSymbol: { + bool isCallOp = Modifier && !strcmp(Modifier, "call"); + bool isMemOp = Modifier && !strcmp(Modifier, "mem"); + bool needCloseParen = false; + std::string Name(TAI->getGlobalPrefix()); + Name += MO.getSymbolName(); + // Print function stub suffix unless it's Mac OS X 10.5 and up. + if (isCallOp && shouldPrintStub(TM, Subtarget) && + !(Subtarget->isTargetDarwin() && Subtarget->getDarwinVers() >= 9)) { + FnStubs.insert(Name); + printSuffixedName(Name, "$stub"); + return; + } + if (!isMemOp && !isCallOp) + O << '$'; + else if (Name[0] == '$') { + // The name begins with a dollar-sign. In order to avoid having it look + // like an integer immediate to the assembler, enclose it in parens. + O << '('; + needCloseParen = true; + } + + O << Name; + + if (shouldPrintPLT(TM, Subtarget)) { + std::string GOTName(TAI->getGlobalPrefix()); + GOTName+="_GLOBAL_OFFSET_TABLE_"; + if (Name == GOTName) + // HACK! Emit extra offset to PC during printing GOT offset to + // compensate for the size of popl instruction. The resulting code + // should look like: + // call .piclabel + // piclabel: + // popl %some_register + // addl $_GLOBAL_ADDRESS_TABLE_ + [.-piclabel], %some_register + O << " + [.-" + << getPICLabelString(getFunctionNumber(), TAI, Subtarget) << ']'; + + if (isCallOp) + O << "@PLT"; + } + + if (needCloseParen) + O << ')'; + + if (!isCallOp && Subtarget->isPICStyleRIPRel()) + O << "(%rip)"; + + return; + } + default: + O << ""; return; + } +} + +void X86ATTAsmPrinter::printSSECC(const MachineInstr *MI, unsigned Op) { + unsigned char value = MI->getOperand(Op).getImm(); + assert(value <= 7 && "Invalid ssecc argument!"); + switch (value) { + case 0: O << "eq"; break; + case 1: O << "lt"; break; + case 2: O << "le"; break; + case 3: O << "unord"; break; + case 4: O << "neq"; break; + case 5: O << "nlt"; break; + case 6: O << "nle"; break; + case 7: O << "ord"; break; + } +} + +void X86ATTAsmPrinter::printLeaMemReference(const MachineInstr *MI, unsigned Op, + const char *Modifier, + bool NotRIPRel) { + MachineOperand BaseReg = MI->getOperand(Op); + MachineOperand IndexReg = MI->getOperand(Op+2); + const MachineOperand &DispSpec = MI->getOperand(Op+3); + + NotRIPRel |= IndexReg.getReg() || BaseReg.getReg(); + if (DispSpec.isGlobal() || + DispSpec.isCPI() || + DispSpec.isJTI() || + DispSpec.isSymbol()) { + printOperand(MI, Op+3, "mem", NotRIPRel); + } else { + int DispVal = DispSpec.getImm(); + if (DispVal || (!IndexReg.getReg() && !BaseReg.getReg())) + O << DispVal; + } + + if (IndexReg.getReg() || BaseReg.getReg()) { + unsigned ScaleVal = MI->getOperand(Op+1).getImm(); + unsigned BaseRegOperand = 0, IndexRegOperand = 2; + + // There are cases where we can end up with ESP/RSP in the indexreg slot. + // If this happens, swap the base/index register to support assemblers that + // don't work when the index is *SP. + if (IndexReg.getReg() == X86::ESP || IndexReg.getReg() == X86::RSP) { + assert(ScaleVal == 1 && "Scale not supported for stack pointer!"); + std::swap(BaseReg, IndexReg); + std::swap(BaseRegOperand, IndexRegOperand); + } + + O << '('; + if (BaseReg.getReg()) + printOperand(MI, Op+BaseRegOperand, Modifier); + + if (IndexReg.getReg()) { + O << ','; + printOperand(MI, Op+IndexRegOperand, Modifier); + if (ScaleVal != 1) + O << ',' << ScaleVal; + } + O << ')'; + } +} + +void X86ATTAsmPrinter::printMemReference(const MachineInstr *MI, unsigned Op, + const char *Modifier, bool NotRIPRel){ + assert(isMem(MI, Op) && "Invalid memory reference!"); + MachineOperand Segment = MI->getOperand(Op+4); + if (Segment.getReg()) { + printOperand(MI, Op+4, Modifier); + O << ':'; + } + printLeaMemReference(MI, Op, Modifier, NotRIPRel); +} + +void X86ATTAsmPrinter::printPICJumpTableSetLabel(unsigned uid, + const MachineBasicBlock *MBB) const { + if (!TAI->getSetDirective()) + return; + + // We don't need .set machinery if we have GOT-style relocations + if (Subtarget->isPICStyleGOT()) + return; + + O << TAI->getSetDirective() << ' ' << TAI->getPrivateGlobalPrefix() + << getFunctionNumber() << '_' << uid << "_set_" << MBB->getNumber() << ','; + printBasicBlockLabel(MBB, false, false, false); + if (Subtarget->isPICStyleRIPRel()) + O << '-' << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() + << '_' << uid << '\n'; + else + O << '-' << getPICLabelString(getFunctionNumber(), TAI, Subtarget) << '\n'; +} + +void X86ATTAsmPrinter::printPICLabel(const MachineInstr *MI, unsigned Op) { + std::string label = getPICLabelString(getFunctionNumber(), TAI, Subtarget); + O << label << '\n' << label << ':'; +} + + +void X86ATTAsmPrinter::printPICJumpTableEntry(const MachineJumpTableInfo *MJTI, + const MachineBasicBlock *MBB, + unsigned uid) const +{ + const char *JTEntryDirective = MJTI->getEntrySize() == 4 ? + TAI->getData32bitsDirective() : TAI->getData64bitsDirective(); + + O << JTEntryDirective << ' '; + + if (TM.getRelocationModel() == Reloc::PIC_) { + if (Subtarget->isPICStyleRIPRel() || Subtarget->isPICStyleStub()) { + O << TAI->getPrivateGlobalPrefix() << getFunctionNumber() + << '_' << uid << "_set_" << MBB->getNumber(); + } else if (Subtarget->isPICStyleGOT()) { + printBasicBlockLabel(MBB, false, false, false); + O << "@GOTOFF"; + } else + assert(0 && "Don't know how to print MBB label for this PIC mode"); + } else + printBasicBlockLabel(MBB, false, false, false); +} + +bool X86ATTAsmPrinter::printAsmMRegister(const MachineOperand &MO, + const char Mode) { + unsigned Reg = MO.getReg(); + switch (Mode) { + default: return true; // Unknown mode. + case 'b': // Print QImode register + Reg = getX86SubSuperRegister(Reg, MVT::i8); + break; + case 'h': // Print QImode high register + Reg = getX86SubSuperRegister(Reg, MVT::i8, true); + break; + case 'w': // Print HImode register + Reg = getX86SubSuperRegister(Reg, MVT::i16); + break; + case 'k': // Print SImode register + Reg = getX86SubSuperRegister(Reg, MVT::i32); + break; + case 'q': // Print DImode register + Reg = getX86SubSuperRegister(Reg, MVT::i64); + break; + } + + O << '%'<< TRI->getAsmName(Reg); + return false; +} + +/// PrintAsmOperand - Print out an operand for an inline asm expression. +/// +bool X86ATTAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, + const char *ExtraCode) { + // Does this asm operand have a single letter operand modifier? + if (ExtraCode && ExtraCode[0]) { + if (ExtraCode[1] != 0) return true; // Unknown modifier. + + switch (ExtraCode[0]) { + default: return true; // Unknown modifier. + case 'c': // Don't print "$" before a global var name or constant. + printOperand(MI, OpNo, "mem", /*NotRIPRel=*/true); + return false; + case 'b': // Print QImode register + case 'h': // Print QImode high register + case 'w': // Print HImode register + case 'k': // Print SImode register + case 'q': // Print DImode register + if (MI->getOperand(OpNo).isReg()) + return printAsmMRegister(MI->getOperand(OpNo), ExtraCode[0]); + printOperand(MI, OpNo); + return false; + + case 'P': // Don't print @PLT, but do print as memory. + printOperand(MI, OpNo, "mem", /*NotRIPRel=*/true); + return false; + } + } + + printOperand(MI, OpNo); + return false; +} + +bool X86ATTAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, + unsigned OpNo, + unsigned AsmVariant, + const char *ExtraCode) { + if (ExtraCode && ExtraCode[0]) { + if (ExtraCode[1] != 0) return true; // Unknown modifier. + + switch (ExtraCode[0]) { + default: return true; // Unknown modifier. + case 'b': // Print QImode register + case 'h': // Print QImode high register + case 'w': // Print HImode register + case 'k': // Print SImode register + case 'q': // Print SImode register + // These only apply to registers, ignore on mem. + break; + case 'P': // Don't print @PLT, but do print as memory. + printMemReference(MI, OpNo, "mem", /*NotRIPRel=*/true); + return false; + } + } + printMemReference(MI, OpNo); + return false; +} + +/// printMachineInstruction -- Print out a single X86 LLVM instruction MI in +/// AT&T syntax to the current output stream. +/// +void X86ATTAsmPrinter::printMachineInstruction(const MachineInstr *MI) { + ++EmittedInsts; + + // Call the autogenerated instruction printer routines. + printInstruction(MI); +} + +/// doInitialization +bool X86ATTAsmPrinter::doInitialization(Module &M) { + + bool Result = AsmPrinter::doInitialization(M); + + if (TAI->doesSupportDebugInformation()) { + // Let PassManager know we need debug information and relay + // the MachineModuleInfo address on to DwarfWriter. + // AsmPrinter::doInitialization did this analysis. + MMI = getAnalysisIfAvailable(); + DW = getAnalysisIfAvailable(); + DW->BeginModule(&M, MMI, O, this, TAI); + } + + // Darwin wants symbols to be quoted if they have complex names. + if (Subtarget->isTargetDarwin()) + Mang->setUseQuotes(true); + + return Result; +} + + +void X86ATTAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) { + const TargetData *TD = TM.getTargetData(); + + if (!GVar->hasInitializer()) + return; // External global require no code + + // Check to see if this is a special global used by LLVM, if so, emit it. + if (EmitSpecialLLVMGlobal(GVar)) { + if (Subtarget->isTargetDarwin() && + TM.getRelocationModel() == Reloc::Static) { + if (GVar->getName() == "llvm.global_ctors") + O << ".reference .constructors_used\n"; + else if (GVar->getName() == "llvm.global_dtors") + O << ".reference .destructors_used\n"; + } + return; + } + + std::string name = Mang->getValueName(GVar); + Constant *C = GVar->getInitializer(); + const Type *Type = C->getType(); + unsigned Size = TD->getTypeAllocSize(Type); + unsigned Align = TD->getPreferredAlignmentLog(GVar); + + printVisibility(name, GVar->getVisibility()); + + if (Subtarget->isTargetELF()) + O << "\t.type\t" << name << ",@object\n"; + + SwitchToSection(TAI->SectionForGlobal(GVar)); + + if (C->isNullValue() && !GVar->hasSection() && + !(Subtarget->isTargetDarwin() && + TAI->SectionKindForGlobal(GVar) == SectionKind::RODataMergeStr)) { + // FIXME: This seems to be pretty darwin-specific + if (GVar->hasExternalLinkage()) { + if (const char *Directive = TAI->getZeroFillDirective()) { + O << "\t.globl " << name << '\n'; + O << Directive << "__DATA, __common, " << name << ", " + << Size << ", " << Align << '\n'; + return; + } + } + + if (!GVar->isThreadLocal() && + (GVar->hasLocalLinkage() || GVar->isWeakForLinker())) { + if (Size == 0) Size = 1; // .comm Foo, 0 is undefined, avoid it. + + if (TAI->getLCOMMDirective() != NULL) { + if (GVar->hasLocalLinkage()) { + O << TAI->getLCOMMDirective() << name << ',' << Size; + if (Subtarget->isTargetDarwin()) + O << ',' << Align; + } else if (Subtarget->isTargetDarwin() && !GVar->hasCommonLinkage()) { + O << "\t.globl " << name << '\n' + << TAI->getWeakDefDirective() << name << '\n'; + EmitAlignment(Align, GVar); + O << name << ":"; + if (VerboseAsm) { + O << "\t\t\t\t" << TAI->getCommentString() << ' '; + PrintUnmangledNameSafely(GVar, O); + } + O << '\n'; + EmitGlobalConstant(C); + return; + } else { + O << TAI->getCOMMDirective() << name << ',' << Size; + if (TAI->getCOMMDirectiveTakesAlignment()) + O << ',' << (TAI->getAlignmentIsInBytes() ? (1 << Align) : Align); + } + } else { + if (!Subtarget->isTargetCygMing()) { + if (GVar->hasLocalLinkage()) + O << "\t.local\t" << name << '\n'; + } + O << TAI->getCOMMDirective() << name << ',' << Size; + if (TAI->getCOMMDirectiveTakesAlignment()) + O << ',' << (TAI->getAlignmentIsInBytes() ? (1 << Align) : Align); + } + if (VerboseAsm) { + O << "\t\t" << TAI->getCommentString() << ' '; + PrintUnmangledNameSafely(GVar, O); + } + O << '\n'; + return; + } + } + + switch (GVar->getLinkage()) { + case GlobalValue::CommonLinkage: + case GlobalValue::LinkOnceAnyLinkage: + case GlobalValue::LinkOnceODRLinkage: + case GlobalValue::WeakAnyLinkage: + case GlobalValue::WeakODRLinkage: + if (Subtarget->isTargetDarwin()) { + O << "\t.globl " << name << '\n' + << TAI->getWeakDefDirective() << name << '\n'; + } else if (Subtarget->isTargetCygMing()) { + O << "\t.globl\t" << name << "\n" + "\t.linkonce same_size\n"; + } else { + O << "\t.weak\t" << name << '\n'; + } + break; + case GlobalValue::DLLExportLinkage: + case GlobalValue::AppendingLinkage: + // FIXME: appending linkage variables should go into a section of + // their name or something. For now, just emit them as external. + case GlobalValue::ExternalLinkage: + // If external or appending, declare as a global symbol + O << "\t.globl " << name << '\n'; + // FALL THROUGH + case GlobalValue::PrivateLinkage: + case GlobalValue::InternalLinkage: + break; + default: + assert(0 && "Unknown linkage type!"); + } + + EmitAlignment(Align, GVar); + O << name << ":"; + if (VerboseAsm){ + O << "\t\t\t\t" << TAI->getCommentString() << ' '; + PrintUnmangledNameSafely(GVar, O); + } + O << '\n'; + if (TAI->hasDotTypeDotSizeDirective()) + O << "\t.size\t" << name << ", " << Size << '\n'; + + EmitGlobalConstant(C); +} + +/// printGVStub - Print stub for a global value. +/// +void X86ATTAsmPrinter::printGVStub(const char *GV, const char *Prefix) { + printSuffixedName(GV, "$non_lazy_ptr", Prefix); + O << ":\n\t.indirect_symbol "; + if (Prefix) O << Prefix; + O << GV << "\n\t.long\t0\n"; +} + +/// printHiddenGVStub - Print stub for a hidden global value. +/// +void X86ATTAsmPrinter::printHiddenGVStub(const char *GV, const char *Prefix) { + EmitAlignment(2); + printSuffixedName(GV, "$non_lazy_ptr", Prefix); + if (Prefix) O << Prefix; + O << ":\n" << TAI->getData32bitsDirective() << GV << '\n'; +} + + +bool X86ATTAsmPrinter::doFinalization(Module &M) { + // Print out module-level global variables here. + for (Module::const_global_iterator I = M.global_begin(), E = M.global_end(); + I != E; ++I) { + printModuleLevelGV(I); + + if (I->hasDLLExportLinkage()) + DLLExportedGVs.insert(Mang->makeNameProper(I->getName(),"")); + + // If the global is a extern weak symbol, remember to emit the weak + // reference! + // FIXME: This is rather hacky, since we'll emit references to ALL weak + // stuff, not used. But currently it's the only way to deal with extern weak + // initializers hidden deep inside constant expressions. + if (I->hasExternalWeakLinkage()) + ExtWeakSymbols.insert(I); + } + + for (Module::const_iterator I = M.begin(), E = M.end(); + I != E; ++I) { + // If the global is a extern weak symbol, remember to emit the weak + // reference! + // FIXME: This is rather hacky, since we'll emit references to ALL weak + // stuff, not used. But currently it's the only way to deal with extern weak + // initializers hidden deep inside constant expressions. + if (I->hasExternalWeakLinkage()) + ExtWeakSymbols.insert(I); + } + + // Output linker support code for dllexported globals + if (!DLLExportedGVs.empty()) + SwitchToDataSection(".section .drectve"); + + for (StringSet<>::iterator i = DLLExportedGVs.begin(), + e = DLLExportedGVs.end(); + i != e; ++i) + O << "\t.ascii \" -export:" << i->getKeyData() << ",data\"\n"; + + if (!DLLExportedFns.empty()) { + SwitchToDataSection(".section .drectve"); + } + + for (StringSet<>::iterator i = DLLExportedFns.begin(), + e = DLLExportedFns.end(); + i != e; ++i) + O << "\t.ascii \" -export:" << i->getKeyData() << "\"\n"; + + if (Subtarget->isTargetDarwin()) { + SwitchToDataSection(""); + + // Output stubs for dynamically-linked functions + for (StringSet<>::iterator i = FnStubs.begin(), e = FnStubs.end(); + i != e; ++i) { + SwitchToDataSection("\t.section __IMPORT,__jump_table,symbol_stubs," + "self_modifying_code+pure_instructions,5", 0); + const char *p = i->getKeyData(); + printSuffixedName(p, "$stub"); + O << ":\n" + "\t.indirect_symbol " << p << "\n" + "\thlt ; hlt ; hlt ; hlt ; hlt\n"; + } + + O << '\n'; + + // Print global value stubs. + bool InStubSection = false; + if (TAI->doesSupportExceptionHandling() && MMI && !Subtarget->is64Bit()) { + // Add the (possibly multiple) personalities to the set of global values. + // Only referenced functions get into the Personalities list. + const std::vector& Personalities = MMI->getPersonalities(); + for (std::vector::const_iterator I = Personalities.begin(), + E = Personalities.end(); I != E; ++I) { + if (!*I) + continue; + if (!InStubSection) { + SwitchToDataSection( + "\t.section __IMPORT,__pointers,non_lazy_symbol_pointers"); + InStubSection = true; + } + printGVStub((*I)->getNameStart(), "_"); + } + } + + // Output stubs for external and common global variables. + if (!InStubSection && !GVStubs.empty()) + SwitchToDataSection( + "\t.section __IMPORT,__pointers,non_lazy_symbol_pointers"); + for (StringSet<>::iterator i = GVStubs.begin(), e = GVStubs.end(); + i != e; ++i) + printGVStub(i->getKeyData()); + + if (!HiddenGVStubs.empty()) { + SwitchToSection(TAI->getDataSection()); + for (StringSet<>::iterator i = HiddenGVStubs.begin(), e = HiddenGVStubs.end(); + i != e; ++i) + printHiddenGVStub(i->getKeyData()); + } + + // Emit final debug information. + DwarfWriter *DW = getAnalysisIfAvailable(); + DW->EndModule(); + + // Funny Darwin hack: This flag tells the linker that no global symbols + // contain code that falls through to other global symbols (e.g. the obvious + // implementation of multiple entry points). If this doesn't occur, the + // linker can safely perform dead code stripping. Since LLVM never + // generates code that does this, it is always safe to set. + O << "\t.subsections_via_symbols\n"; + } else if (Subtarget->isTargetCygMing()) { + // Emit type information for external functions + for (StringSet<>::iterator i = FnStubs.begin(), e = FnStubs.end(); + i != e; ++i) { + O << "\t.def\t " << i->getKeyData() + << ";\t.scl\t" << COFF::C_EXT + << ";\t.type\t" << (COFF::DT_FCN << COFF::N_BTSHFT) + << ";\t.endef\n"; + } + + // Emit final debug information. + DwarfWriter *DW = getAnalysisIfAvailable(); + DW->EndModule(); + } else if (Subtarget->isTargetELF()) { + // Emit final debug information. + DwarfWriter *DW = getAnalysisIfAvailable(); + DW->EndModule(); + } + + return AsmPrinter::doFinalization(M); +} + +// Include the auto-generated portion of the assembly writer. +#include "X86GenAsmWriter.inc" diff --git a/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.h b/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.h new file mode 100644 index 000000000000..5b40e73bcb66 --- /dev/null +++ b/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.h @@ -0,0 +1,164 @@ +//===-- X86ATTAsmPrinter.h - Convert X86 LLVM code to AT&T assembly -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// AT&T assembly code printer class. +// +//===----------------------------------------------------------------------===// + +#ifndef X86ATTASMPRINTER_H +#define X86ATTASMPRINTER_H + +#include "../X86.h" +#include "../X86MachineFunctionInfo.h" +#include "../X86TargetMachine.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/DwarfWriter.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/Support/Compiler.h" + +namespace llvm { + +class MachineJumpTableInfo; + +class VISIBILITY_HIDDEN X86ATTAsmPrinter : public AsmPrinter { + DwarfWriter *DW; + MachineModuleInfo *MMI; + const X86Subtarget *Subtarget; + public: + explicit X86ATTAsmPrinter(raw_ostream &O, X86TargetMachine &TM, + const TargetAsmInfo *T, CodeGenOpt::Level OL, + bool V) + : AsmPrinter(O, TM, T, OL, V), DW(0), MMI(0) { + Subtarget = &TM.getSubtarget(); + } + + virtual const char *getPassName() const { + return "X86 AT&T-Style Assembly Printer"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + if (Subtarget->isTargetDarwin() || + Subtarget->isTargetELF() || + Subtarget->isTargetCygMing()) { + AU.addRequired(); + } + AU.addRequired(); + AsmPrinter::getAnalysisUsage(AU); + } + + bool doInitialization(Module &M); + bool doFinalization(Module &M); + + /// printInstruction - This method is automatically generated by tablegen + /// from the instruction set description. This method returns true if the + /// machine instruction was sufficiently described to print it, otherwise it + /// returns false. + bool printInstruction(const MachineInstr *MI); + + // These methods are used by the tablegen'erated instruction printer. + void printOperand(const MachineInstr *MI, unsigned OpNo, + const char *Modifier = 0, bool NotRIPRel = false); + void printi8mem(const MachineInstr *MI, unsigned OpNo) { + printMemReference(MI, OpNo); + } + void printi16mem(const MachineInstr *MI, unsigned OpNo) { + printMemReference(MI, OpNo); + } + void printi32mem(const MachineInstr *MI, unsigned OpNo) { + printMemReference(MI, OpNo); + } + void printi64mem(const MachineInstr *MI, unsigned OpNo) { + printMemReference(MI, OpNo); + } + void printi128mem(const MachineInstr *MI, unsigned OpNo) { + printMemReference(MI, OpNo); + } + void printf32mem(const MachineInstr *MI, unsigned OpNo) { + printMemReference(MI, OpNo); + } + void printf64mem(const MachineInstr *MI, unsigned OpNo) { + printMemReference(MI, OpNo); + } + void printf80mem(const MachineInstr *MI, unsigned OpNo) { + printMemReference(MI, OpNo); + } + void printf128mem(const MachineInstr *MI, unsigned OpNo) { + printMemReference(MI, OpNo); + } + void printlea32mem(const MachineInstr *MI, unsigned OpNo) { + printLeaMemReference(MI, OpNo); + } + void printlea64mem(const MachineInstr *MI, unsigned OpNo) { + printLeaMemReference(MI, OpNo); + } + void printlea64_32mem(const MachineInstr *MI, unsigned OpNo) { + printLeaMemReference(MI, OpNo, "subreg64"); + } + + bool printAsmMRegister(const MachineOperand &MO, const char Mode); + bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode); + bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode); + + void printMachineInstruction(const MachineInstr *MI); + void printSSECC(const MachineInstr *MI, unsigned Op); + void printMemReference(const MachineInstr *MI, unsigned Op, + const char *Modifier=NULL, bool NotRIPRel = false); + void printLeaMemReference(const MachineInstr *MI, unsigned Op, + const char *Modifier=NULL, bool NotRIPRel = false); + void printPICJumpTableSetLabel(unsigned uid, + const MachineBasicBlock *MBB) const; + void printPICJumpTableSetLabel(unsigned uid, unsigned uid2, + const MachineBasicBlock *MBB) const { + AsmPrinter::printPICJumpTableSetLabel(uid, uid2, MBB); + } + void printPICJumpTableEntry(const MachineJumpTableInfo *MJTI, + const MachineBasicBlock *MBB, + unsigned uid) const; + + void printPICLabel(const MachineInstr *MI, unsigned Op); + void printModuleLevelGV(const GlobalVariable* GVar); + + void printGVStub(const char *GV, const char *Prefix = NULL); + void printHiddenGVStub(const char *GV, const char *Prefix = NULL); + + bool runOnMachineFunction(MachineFunction &F); + + void emitFunctionHeader(const MachineFunction &MF); + + // Necessary for Darwin to print out the apprioriate types of linker stubs + StringSet<> FnStubs, GVStubs, HiddenGVStubs; + + // Necessary for dllexport support + StringSet<> DLLExportedFns, DLLExportedGVs; + + // We have to propagate some information about MachineFunction to + // AsmPrinter. It's ok, when we're printing the function, since we have + // access to MachineFunction and can get the appropriate MachineFunctionInfo. + // Unfortunately, this is not possible when we're printing reference to + // Function (e.g. calling it and so on). Even more, there is no way to get the + // corresponding MachineFunctions: it can even be not created at all. That's + // why we should use additional structure, when we're collecting all necessary + // information. + // + // This structure is using e.g. for name decoration for stdcall & fastcall'ed + // function, since we have to use arguments' size for decoration. + typedef std::map FMFInfoMap; + FMFInfoMap FunctionInfoMap; + + void decorateName(std::string& Name, const GlobalValue* GV); +}; + +} // end namespace llvm + +#endif diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp new file mode 100644 index 000000000000..c874849dc18e --- /dev/null +++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp @@ -0,0 +1,50 @@ +//===-- X86AsmPrinter.cpp - Convert X86 LLVM IR to X86 assembly -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file the shared super class printer that converts from our internal +// representation of machine-dependent LLVM code to Intel and AT&T format +// assembly language. +// This printer is the output mechanism used by `llc'. +// +//===----------------------------------------------------------------------===// + +#include "X86ATTAsmPrinter.h" +#include "X86IntelAsmPrinter.h" +#include "X86Subtarget.h" +using namespace llvm; + +/// createX86CodePrinterPass - Returns a pass that prints the X86 assembly code +/// for a MachineFunction to the given output stream, using the given target +/// machine description. +/// +FunctionPass *llvm::createX86CodePrinterPass(raw_ostream &o, + X86TargetMachine &tm, + CodeGenOpt::Level OptLevel, + bool verbose) { + const X86Subtarget *Subtarget = &tm.getSubtarget(); + + if (Subtarget->isFlavorIntel()) { + return new X86IntelAsmPrinter(o, tm, tm.getTargetAsmInfo(), + OptLevel, verbose); + } else { + return new X86ATTAsmPrinter(o, tm, tm.getTargetAsmInfo(), + OptLevel, verbose); + } +} + +namespace { + static struct Register { + Register() { + X86TargetMachine::registerAsmPrinter(createX86CodePrinterPass); + } + } Registrator; +} + +extern "C" int X86AsmPrinterForceLink; +int X86AsmPrinterForceLink = 0; diff --git a/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.cpp new file mode 100644 index 000000000000..659934930d46 --- /dev/null +++ b/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.cpp @@ -0,0 +1,609 @@ +//===-- X86IntelAsmPrinter.cpp - Convert X86 LLVM code to Intel assembly --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a printer that converts from our internal representation +// of machine-dependent LLVM code to Intel format assembly language. +// This printer is the output mechanism used by `llc'. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "asm-printer" +#include "X86IntelAsmPrinter.h" +#include "X86InstrInfo.h" +#include "X86TargetAsmInfo.h" +#include "X86.h" +#include "llvm/CallingConv.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Module.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/CodeGen/DwarfWriter.h" +#include "llvm/Support/Mangler.h" +#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/Target/TargetOptions.h" +using namespace llvm; + +STATISTIC(EmittedInsts, "Number of machine instrs printed"); + +static X86MachineFunctionInfo calculateFunctionInfo(const Function *F, + const TargetData *TD) { + X86MachineFunctionInfo Info; + uint64_t Size = 0; + + switch (F->getCallingConv()) { + case CallingConv::X86_StdCall: + Info.setDecorationStyle(StdCall); + break; + case CallingConv::X86_FastCall: + Info.setDecorationStyle(FastCall); + break; + default: + return Info; + } + + unsigned argNum = 1; + for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end(); + AI != AE; ++AI, ++argNum) { + const Type* Ty = AI->getType(); + + // 'Dereference' type in case of byval parameter attribute + if (F->paramHasAttr(argNum, Attribute::ByVal)) + Ty = cast(Ty)->getElementType(); + + // Size should be aligned to DWORD boundary + Size += ((TD->getTypeAllocSize(Ty) + 3)/4)*4; + } + + // We're not supporting tooooo huge arguments :) + Info.setBytesToPopOnReturn((unsigned int)Size); + return Info; +} + + +/// decorateName - Query FunctionInfoMap and use this information for various +/// name decoration. +void X86IntelAsmPrinter::decorateName(std::string &Name, + const GlobalValue *GV) { + const Function *F = dyn_cast(GV); + if (!F) return; + + // We don't want to decorate non-stdcall or non-fastcall functions right now + unsigned CC = F->getCallingConv(); + if (CC != CallingConv::X86_StdCall && CC != CallingConv::X86_FastCall) + return; + + FMFInfoMap::const_iterator info_item = FunctionInfoMap.find(F); + + const X86MachineFunctionInfo *Info; + if (info_item == FunctionInfoMap.end()) { + // Calculate apropriate function info and populate map + FunctionInfoMap[F] = calculateFunctionInfo(F, TM.getTargetData()); + Info = &FunctionInfoMap[F]; + } else { + Info = &info_item->second; + } + + const FunctionType *FT = F->getFunctionType(); + switch (Info->getDecorationStyle()) { + case None: + break; + case StdCall: + // "Pure" variadic functions do not receive @0 suffix. + if (!FT->isVarArg() || (FT->getNumParams() == 0) || + (FT->getNumParams() == 1 && F->hasStructRetAttr())) + Name += '@' + utostr_32(Info->getBytesToPopOnReturn()); + break; + case FastCall: + // "Pure" variadic functions do not receive @0 suffix. + if (!FT->isVarArg() || (FT->getNumParams() == 0) || + (FT->getNumParams() == 1 && F->hasStructRetAttr())) + Name += '@' + utostr_32(Info->getBytesToPopOnReturn()); + + if (Name[0] == '_') + Name[0] = '@'; + else + Name = '@' + Name; + + break; + default: + assert(0 && "Unsupported DecorationStyle"); + } +} + +/// runOnMachineFunction - This uses the printMachineInstruction() +/// method to print assembly for each instruction. +/// +bool X86IntelAsmPrinter::runOnMachineFunction(MachineFunction &MF) { + this->MF = &MF; + SetupMachineFunction(MF); + O << "\n\n"; + + // Print out constants referenced by the function + EmitConstantPool(MF.getConstantPool()); + + // Print out labels for the function. + const Function *F = MF.getFunction(); + unsigned CC = F->getCallingConv(); + + // Populate function information map. Actually, We don't want to populate + // non-stdcall or non-fastcall functions' information right now. + if (CC == CallingConv::X86_StdCall || CC == CallingConv::X86_FastCall) + FunctionInfoMap[F] = *MF.getInfo(); + + decorateName(CurrentFnName, F); + + SwitchToTextSection("_text", F); + + unsigned FnAlign = 4; + if (F->hasFnAttr(Attribute::OptimizeForSize)) + FnAlign = 1; + switch (F->getLinkage()) { + default: assert(0 && "Unsupported linkage type!"); + case Function::PrivateLinkage: + case Function::InternalLinkage: + EmitAlignment(FnAlign); + break; + case Function::DLLExportLinkage: + DLLExportedFns.insert(CurrentFnName); + //FALLS THROUGH + case Function::ExternalLinkage: + O << "\tpublic " << CurrentFnName << "\n"; + EmitAlignment(FnAlign); + break; + } + + O << CurrentFnName << "\tproc near\n"; + + // Print out code for the function. + for (MachineFunction::const_iterator I = MF.begin(), E = MF.end(); + I != E; ++I) { + // Print a label for the basic block if there are any predecessors. + if (!I->pred_empty()) { + printBasicBlockLabel(I, true, true); + O << '\n'; + } + for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end(); + II != E; ++II) { + // Print the assembly for the instruction. + printMachineInstruction(II); + } + } + + // Print out jump tables referenced by the function. + EmitJumpTableInfo(MF.getJumpTableInfo(), MF); + + O << CurrentFnName << "\tendp\n"; + + O.flush(); + + // We didn't modify anything. + return false; +} + +void X86IntelAsmPrinter::printSSECC(const MachineInstr *MI, unsigned Op) { + unsigned char value = MI->getOperand(Op).getImm(); + assert(value <= 7 && "Invalid ssecc argument!"); + switch (value) { + case 0: O << "eq"; break; + case 1: O << "lt"; break; + case 2: O << "le"; break; + case 3: O << "unord"; break; + case 4: O << "neq"; break; + case 5: O << "nlt"; break; + case 6: O << "nle"; break; + case 7: O << "ord"; break; + } +} + +void X86IntelAsmPrinter::printOp(const MachineOperand &MO, + const char *Modifier) { + switch (MO.getType()) { + case MachineOperand::MO_Register: { + if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) { + unsigned Reg = MO.getReg(); + if (Modifier && strncmp(Modifier, "subreg", strlen("subreg")) == 0) { + MVT VT = (strcmp(Modifier,"subreg64") == 0) ? + MVT::i64 : ((strcmp(Modifier, "subreg32") == 0) ? MVT::i32 : + ((strcmp(Modifier,"subreg16") == 0) ? MVT::i16 :MVT::i8)); + Reg = getX86SubSuperRegister(Reg, VT); + } + O << TRI->getName(Reg); + } else + O << "reg" << MO.getReg(); + return; + } + case MachineOperand::MO_Immediate: + O << MO.getImm(); + return; + case MachineOperand::MO_MachineBasicBlock: + printBasicBlockLabel(MO.getMBB()); + return; + case MachineOperand::MO_JumpTableIndex: { + bool isMemOp = Modifier && !strcmp(Modifier, "mem"); + if (!isMemOp) O << "OFFSET "; + O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() + << "_" << MO.getIndex(); + return; + } + case MachineOperand::MO_ConstantPoolIndex: { + bool isMemOp = Modifier && !strcmp(Modifier, "mem"); + if (!isMemOp) O << "OFFSET "; + O << "[" << TAI->getPrivateGlobalPrefix() << "CPI" + << getFunctionNumber() << "_" << MO.getIndex(); + printOffset(MO.getOffset()); + O << "]"; + return; + } + case MachineOperand::MO_GlobalAddress: { + bool isCallOp = Modifier && !strcmp(Modifier, "call"); + bool isMemOp = Modifier && !strcmp(Modifier, "mem"); + GlobalValue *GV = MO.getGlobal(); + std::string Name = Mang->getValueName(GV); + + decorateName(Name, GV); + + if (!isMemOp && !isCallOp) O << "OFFSET "; + if (GV->hasDLLImportLinkage()) { + // FIXME: This should be fixed with full support of stdcall & fastcall + // CC's + O << "__imp_"; + } + O << Name; + printOffset(MO.getOffset()); + return; + } + case MachineOperand::MO_ExternalSymbol: { + bool isCallOp = Modifier && !strcmp(Modifier, "call"); + if (!isCallOp) O << "OFFSET "; + O << TAI->getGlobalPrefix() << MO.getSymbolName(); + return; + } + default: + O << ""; return; + } +} + +void X86IntelAsmPrinter::printLeaMemReference(const MachineInstr *MI, + unsigned Op, + const char *Modifier) { + const MachineOperand &BaseReg = MI->getOperand(Op); + int ScaleVal = MI->getOperand(Op+1).getImm(); + const MachineOperand &IndexReg = MI->getOperand(Op+2); + const MachineOperand &DispSpec = MI->getOperand(Op+3); + + O << "["; + bool NeedPlus = false; + if (BaseReg.getReg()) { + printOp(BaseReg, Modifier); + NeedPlus = true; + } + + if (IndexReg.getReg()) { + if (NeedPlus) O << " + "; + if (ScaleVal != 1) + O << ScaleVal << "*"; + printOp(IndexReg, Modifier); + NeedPlus = true; + } + + if (DispSpec.isGlobal() || DispSpec.isCPI() || + DispSpec.isJTI()) { + if (NeedPlus) + O << " + "; + printOp(DispSpec, "mem"); + } else { + int DispVal = DispSpec.getImm(); + if (DispVal || (!BaseReg.getReg() && !IndexReg.getReg())) { + if (NeedPlus) { + if (DispVal > 0) + O << " + "; + else { + O << " - "; + DispVal = -DispVal; + } + } + O << DispVal; + } + } + O << "]"; +} + +void X86IntelAsmPrinter::printMemReference(const MachineInstr *MI, unsigned Op, + const char *Modifier) { + assert(isMem(MI, Op) && "Invalid memory reference!"); + MachineOperand Segment = MI->getOperand(Op+4); + if (Segment.getReg()) { + printOperand(MI, Op+4, Modifier); + O << ':'; + } + printLeaMemReference(MI, Op, Modifier); +} + +void X86IntelAsmPrinter::printPICJumpTableSetLabel(unsigned uid, + const MachineBasicBlock *MBB) const { + if (!TAI->getSetDirective()) + return; + + O << TAI->getSetDirective() << ' ' << TAI->getPrivateGlobalPrefix() + << getFunctionNumber() << '_' << uid << "_set_" << MBB->getNumber() << ','; + printBasicBlockLabel(MBB, false, false, false); + O << '-' << "\"L" << getFunctionNumber() << "$pb\"'\n"; +} + +void X86IntelAsmPrinter::printPICLabel(const MachineInstr *MI, unsigned Op) { + O << "\"L" << getFunctionNumber() << "$pb\"\n"; + O << "\"L" << getFunctionNumber() << "$pb\":"; +} + +bool X86IntelAsmPrinter::printAsmMRegister(const MachineOperand &MO, + const char Mode) { + unsigned Reg = MO.getReg(); + switch (Mode) { + default: return true; // Unknown mode. + case 'b': // Print QImode register + Reg = getX86SubSuperRegister(Reg, MVT::i8); + break; + case 'h': // Print QImode high register + Reg = getX86SubSuperRegister(Reg, MVT::i8, true); + break; + case 'w': // Print HImode register + Reg = getX86SubSuperRegister(Reg, MVT::i16); + break; + case 'k': // Print SImode register + Reg = getX86SubSuperRegister(Reg, MVT::i32); + break; + } + + O << '%' << TRI->getName(Reg); + return false; +} + +/// PrintAsmOperand - Print out an operand for an inline asm expression. +/// +bool X86IntelAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, + const char *ExtraCode) { + // Does this asm operand have a single letter operand modifier? + if (ExtraCode && ExtraCode[0]) { + if (ExtraCode[1] != 0) return true; // Unknown modifier. + + switch (ExtraCode[0]) { + default: return true; // Unknown modifier. + case 'b': // Print QImode register + case 'h': // Print QImode high register + case 'w': // Print HImode register + case 'k': // Print SImode register + return printAsmMRegister(MI->getOperand(OpNo), ExtraCode[0]); + } + } + + printOperand(MI, OpNo); + return false; +} + +bool X86IntelAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, + unsigned OpNo, + unsigned AsmVariant, + const char *ExtraCode) { + if (ExtraCode && ExtraCode[0]) + return true; // Unknown modifier. + printMemReference(MI, OpNo); + return false; +} + +/// printMachineInstruction -- Print out a single X86 LLVM instruction +/// MI in Intel syntax to the current output stream. +/// +void X86IntelAsmPrinter::printMachineInstruction(const MachineInstr *MI) { + ++EmittedInsts; + + // Call the autogenerated instruction printer routines. + printInstruction(MI); +} + +bool X86IntelAsmPrinter::doInitialization(Module &M) { + bool Result = AsmPrinter::doInitialization(M); + + Mang->markCharUnacceptable('.'); + + O << "\t.686\n\t.model flat\n\n"; + + // Emit declarations for external functions. + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) + if (I->isDeclaration()) { + std::string Name = Mang->getValueName(I); + decorateName(Name, I); + + O << "\textern " ; + if (I->hasDLLImportLinkage()) { + O << "__imp_"; + } + O << Name << ":near\n"; + } + + // Emit declarations for external globals. Note that VC++ always declares + // external globals to have type byte, and if that's good enough for VC++... + for (Module::const_global_iterator I = M.global_begin(), E = M.global_end(); + I != E; ++I) { + if (I->isDeclaration()) { + std::string Name = Mang->getValueName(I); + + O << "\textern " ; + if (I->hasDLLImportLinkage()) { + O << "__imp_"; + } + O << Name << ":byte\n"; + } + } + + return Result; +} + +bool X86IntelAsmPrinter::doFinalization(Module &M) { + const TargetData *TD = TM.getTargetData(); + + // Print out module-level global variables here. + for (Module::const_global_iterator I = M.global_begin(), E = M.global_end(); + I != E; ++I) { + if (I->isDeclaration()) continue; // External global require no code + + // Check to see if this is a special global used by LLVM, if so, emit it. + if (EmitSpecialLLVMGlobal(I)) + continue; + + std::string name = Mang->getValueName(I); + Constant *C = I->getInitializer(); + unsigned Align = TD->getPreferredAlignmentLog(I); + bool bCustomSegment = false; + + switch (I->getLinkage()) { + case GlobalValue::CommonLinkage: + case GlobalValue::LinkOnceAnyLinkage: + case GlobalValue::LinkOnceODRLinkage: + case GlobalValue::WeakAnyLinkage: + case GlobalValue::WeakODRLinkage: + SwitchToDataSection(""); + O << name << "?\tsegment common 'COMMON'\n"; + bCustomSegment = true; + // FIXME: the default alignment is 16 bytes, but 1, 2, 4, and 256 + // are also available. + break; + case GlobalValue::AppendingLinkage: + SwitchToDataSection(""); + O << name << "?\tsegment public 'DATA'\n"; + bCustomSegment = true; + // FIXME: the default alignment is 16 bytes, but 1, 2, 4, and 256 + // are also available. + break; + case GlobalValue::DLLExportLinkage: + DLLExportedGVs.insert(name); + // FALL THROUGH + case GlobalValue::ExternalLinkage: + O << "\tpublic " << name << "\n"; + // FALL THROUGH + case GlobalValue::InternalLinkage: + SwitchToSection(TAI->getDataSection()); + break; + default: + assert(0 && "Unknown linkage type!"); + } + + if (!bCustomSegment) + EmitAlignment(Align, I); + + O << name << ":"; + if (VerboseAsm) + O << "\t\t\t\t" << TAI->getCommentString() + << " " << I->getName(); + O << '\n'; + + EmitGlobalConstant(C); + + if (bCustomSegment) + O << name << "?\tends\n"; + } + + // Output linker support code for dllexported globals + if (!DLLExportedGVs.empty() || !DLLExportedFns.empty()) { + SwitchToDataSection(""); + O << "; WARNING: The following code is valid only with MASM v8.x" + << "and (possible) higher\n" + << "; This version of MASM is usually shipped with Microsoft " + << "Visual Studio 2005\n" + << "; or (possible) further versions. Unfortunately, there is no " + << "way to support\n" + << "; dllexported symbols in the earlier versions of MASM in fully " + << "automatic way\n\n"; + O << "_drectve\t segment info alias('.drectve')\n"; + } + + for (StringSet<>::iterator i = DLLExportedGVs.begin(), + e = DLLExportedGVs.end(); + i != e; ++i) + O << "\t db ' /EXPORT:" << i->getKeyData() << ",data'\n"; + + for (StringSet<>::iterator i = DLLExportedFns.begin(), + e = DLLExportedFns.end(); + i != e; ++i) + O << "\t db ' /EXPORT:" << i->getKeyData() << "'\n"; + + if (!DLLExportedGVs.empty() || !DLLExportedFns.empty()) + O << "_drectve\t ends\n"; + + // Bypass X86SharedAsmPrinter::doFinalization(). + bool Result = AsmPrinter::doFinalization(M); + SwitchToDataSection(""); + O << "\tend\n"; + return Result; +} + +void X86IntelAsmPrinter::EmitString(const ConstantArray *CVA) const { + unsigned NumElts = CVA->getNumOperands(); + if (NumElts) { + // ML does not have escape sequences except '' for '. It also has a maximum + // string length of 255. + unsigned len = 0; + bool inString = false; + for (unsigned i = 0; i < NumElts; i++) { + int n = cast(CVA->getOperand(i))->getZExtValue() & 255; + if (len == 0) + O << "\tdb "; + + if (n >= 32 && n <= 127) { + if (!inString) { + if (len > 0) { + O << ",'"; + len += 2; + } else { + O << "'"; + len++; + } + inString = true; + } + if (n == '\'') { + O << "'"; + len++; + } + O << char(n); + } else { + if (inString) { + O << "'"; + len++; + inString = false; + } + if (len > 0) { + O << ","; + len++; + } + O << n; + len += 1 + (n > 9) + (n > 99); + } + + if (len > 60) { + if (inString) { + O << "'"; + inString = false; + } + O << "\n"; + len = 0; + } + } + + if (len > 0) { + if (inString) + O << "'"; + O << "\n"; + } + } +} + +// Include the auto-generated portion of the assembly writer. +#include "X86GenAsmWriter1.inc" diff --git a/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.h b/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.h new file mode 100644 index 000000000000..9520d982f692 --- /dev/null +++ b/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.h @@ -0,0 +1,152 @@ +//===-- X86IntelAsmPrinter.h - Convert X86 LLVM code to Intel assembly ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Intel assembly code printer class. +// +//===----------------------------------------------------------------------===// + +#ifndef X86INTELASMPRINTER_H +#define X86INTELASMPRINTER_H + +#include "../X86.h" +#include "../X86MachineFunctionInfo.h" +#include "../X86TargetMachine.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { + +struct VISIBILITY_HIDDEN X86IntelAsmPrinter : public AsmPrinter { + explicit X86IntelAsmPrinter(raw_ostream &O, X86TargetMachine &TM, + const TargetAsmInfo *T, CodeGenOpt::Level OL, + bool V) + : AsmPrinter(O, TM, T, OL, V) {} + + virtual const char *getPassName() const { + return "X86 Intel-Style Assembly Printer"; + } + + /// printInstruction - This method is automatically generated by tablegen + /// from the instruction set description. This method returns true if the + /// machine instruction was sufficiently described to print it, otherwise it + /// returns false. + bool printInstruction(const MachineInstr *MI); + + // This method is used by the tablegen'erated instruction printer. + void printOperand(const MachineInstr *MI, unsigned OpNo, + const char *Modifier = 0) { + const MachineOperand &MO = MI->getOperand(OpNo); + if (MO.isReg()) { + assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) && + "Not physreg??"); + O << TM.getRegisterInfo()->get(MO.getReg()).Name; // Capitalized names + } else { + printOp(MO, Modifier); + } + } + + void printi8mem(const MachineInstr *MI, unsigned OpNo) { + O << "BYTE PTR "; + printMemReference(MI, OpNo); + } + void printi16mem(const MachineInstr *MI, unsigned OpNo) { + O << "WORD PTR "; + printMemReference(MI, OpNo); + } + void printi32mem(const MachineInstr *MI, unsigned OpNo) { + O << "DWORD PTR "; + printMemReference(MI, OpNo); + } + void printi64mem(const MachineInstr *MI, unsigned OpNo) { + O << "QWORD PTR "; + printMemReference(MI, OpNo); + } + void printi128mem(const MachineInstr *MI, unsigned OpNo) { + O << "XMMWORD PTR "; + printMemReference(MI, OpNo); + } + void printf32mem(const MachineInstr *MI, unsigned OpNo) { + O << "DWORD PTR "; + printMemReference(MI, OpNo); + } + void printf64mem(const MachineInstr *MI, unsigned OpNo) { + O << "QWORD PTR "; + printMemReference(MI, OpNo); + } + void printf80mem(const MachineInstr *MI, unsigned OpNo) { + O << "XWORD PTR "; + printMemReference(MI, OpNo); + } + void printf128mem(const MachineInstr *MI, unsigned OpNo) { + O << "XMMWORD PTR "; + printMemReference(MI, OpNo); + } + void printlea32mem(const MachineInstr *MI, unsigned OpNo) { + O << "DWORD PTR "; + printLeaMemReference(MI, OpNo); + } + void printlea64mem(const MachineInstr *MI, unsigned OpNo) { + O << "QWORD PTR "; + printLeaMemReference(MI, OpNo); + } + void printlea64_32mem(const MachineInstr *MI, unsigned OpNo) { + O << "QWORD PTR "; + printLeaMemReference(MI, OpNo, "subreg64"); + } + + bool printAsmMRegister(const MachineOperand &MO, const char Mode); + bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode); + bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode); + void printMachineInstruction(const MachineInstr *MI); + void printOp(const MachineOperand &MO, const char *Modifier = 0); + void printSSECC(const MachineInstr *MI, unsigned Op); + void printMemReference(const MachineInstr *MI, unsigned Op, + const char *Modifier=NULL); + void printLeaMemReference(const MachineInstr *MI, unsigned Op, + const char *Modifier=NULL); + void printPICJumpTableSetLabel(unsigned uid, + const MachineBasicBlock *MBB) const; + void printPICJumpTableSetLabel(unsigned uid, unsigned uid2, + const MachineBasicBlock *MBB) const { + AsmPrinter::printPICJumpTableSetLabel(uid, uid2, MBB); + } + void printPICLabel(const MachineInstr *MI, unsigned Op); + bool runOnMachineFunction(MachineFunction &F); + bool doInitialization(Module &M); + bool doFinalization(Module &M); + + // We have to propagate some information about MachineFunction to + // AsmPrinter. It's ok, when we're printing the function, since we have + // access to MachineFunction and can get the appropriate MachineFunctionInfo. + // Unfortunately, this is not possible when we're printing reference to + // Function (e.g. calling it and so on). Even more, there is no way to get the + // corresponding MachineFunctions: it can even be not created at all. That's + // why we should use additional structure, when we're collecting all necessary + // information. + // + // This structure is using e.g. for name decoration for stdcall & fastcall'ed + // function, since we have to use arguments' size for decoration. + typedef std::map FMFInfoMap; + FMFInfoMap FunctionInfoMap; + + void decorateName(std::string& Name, const GlobalValue* GV); + + virtual void EmitString(const ConstantArray *CVA) const; + + // Necessary for dllexport support + StringSet<> DLLExportedFns, DLLExportedGVs; +}; + +} // end namespace llvm + +#endif diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt new file mode 100644 index 000000000000..d98299049a2d --- /dev/null +++ b/lib/Target/X86/CMakeLists.txt @@ -0,0 +1,29 @@ +set(LLVM_TARGET_DEFINITIONS X86.td) + +tablegen(X86GenRegisterInfo.h.inc -gen-register-desc-header) +tablegen(X86GenRegisterNames.inc -gen-register-enums) +tablegen(X86GenRegisterInfo.inc -gen-register-desc) +tablegen(X86GenInstrNames.inc -gen-instr-enums) +tablegen(X86GenInstrInfo.inc -gen-instr-desc) +tablegen(X86GenAsmWriter.inc -gen-asm-writer) +tablegen(X86GenAsmWriter1.inc -gen-asm-writer -asmwriternum=1) +tablegen(X86GenDAGISel.inc -gen-dag-isel) +tablegen(X86GenFastISel.inc -gen-fast-isel) +tablegen(X86GenCallingConv.inc -gen-callingconv) +tablegen(X86GenSubtarget.inc -gen-subtarget) + +add_llvm_target(X86CodeGen + X86CodeEmitter.cpp + X86ELFWriterInfo.cpp + X86FloatingPoint.cpp + X86FloatingPointRegKill.cpp + X86ISelDAGToDAG.cpp + X86ISelLowering.cpp + X86InstrInfo.cpp + X86JITInfo.cpp + X86RegisterInfo.cpp + X86Subtarget.cpp + X86TargetAsmInfo.cpp + X86TargetMachine.cpp + X86FastISel.cpp + ) diff --git a/lib/Target/X86/Makefile b/lib/Target/X86/Makefile new file mode 100644 index 000000000000..44f1c5d5a509 --- /dev/null +++ b/lib/Target/X86/Makefile @@ -0,0 +1,23 @@ +##===- lib/Target/X86/Makefile -----------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../.. +LIBRARYNAME = LLVMX86CodeGen +TARGET = X86 + +# Make sure that tblgen is run, first thing. +BUILT_SOURCES = X86GenRegisterInfo.h.inc X86GenRegisterNames.inc \ + X86GenRegisterInfo.inc X86GenInstrNames.inc \ + X86GenInstrInfo.inc X86GenAsmWriter.inc \ + X86GenAsmWriter1.inc X86GenDAGISel.inc \ + X86GenFastISel.inc \ + X86GenCallingConv.inc X86GenSubtarget.inc + +DIRS = AsmPrinter + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/X86/README-FPStack.txt b/lib/Target/X86/README-FPStack.txt new file mode 100644 index 000000000000..be28e8b394a4 --- /dev/null +++ b/lib/Target/X86/README-FPStack.txt @@ -0,0 +1,85 @@ +//===---------------------------------------------------------------------===// +// Random ideas for the X86 backend: FP stack related stuff +//===---------------------------------------------------------------------===// + +//===---------------------------------------------------------------------===// + +Some targets (e.g. athlons) prefer freep to fstp ST(0): +http://gcc.gnu.org/ml/gcc-patches/2004-04/msg00659.html + +//===---------------------------------------------------------------------===// + +This should use fiadd on chips where it is profitable: +double foo(double P, int *I) { return P+*I; } + +We have fiadd patterns now but the followings have the same cost and +complexity. We need a way to specify the later is more profitable. + +def FpADD32m : FpI<(ops RFP:$dst, RFP:$src1, f32mem:$src2), OneArgFPRW, + [(set RFP:$dst, (fadd RFP:$src1, + (extloadf64f32 addr:$src2)))]>; + // ST(0) = ST(0) + [mem32] + +def FpIADD32m : FpI<(ops RFP:$dst, RFP:$src1, i32mem:$src2), OneArgFPRW, + [(set RFP:$dst, (fadd RFP:$src1, + (X86fild addr:$src2, i32)))]>; + // ST(0) = ST(0) + [mem32int] + +//===---------------------------------------------------------------------===// + +The FP stackifier needs to be global. Also, it should handle simple permutates +to reduce number of shuffle instructions, e.g. turning: + +fld P -> fld Q +fld Q fld P +fxch + +or: + +fxch -> fucomi +fucomi jl X +jg X + +Ideas: +http://gcc.gnu.org/ml/gcc-patches/2004-11/msg02410.html + + +//===---------------------------------------------------------------------===// + +Add a target specific hook to DAG combiner to handle SINT_TO_FP and +FP_TO_SINT when the source operand is already in memory. + +//===---------------------------------------------------------------------===// + +Open code rint,floor,ceil,trunc: +http://gcc.gnu.org/ml/gcc-patches/2004-08/msg02006.html +http://gcc.gnu.org/ml/gcc-patches/2004-08/msg02011.html + +Opencode the sincos[f] libcall. + +//===---------------------------------------------------------------------===// + +None of the FPStack instructions are handled in +X86RegisterInfo::foldMemoryOperand, which prevents the spiller from +folding spill code into the instructions. + +//===---------------------------------------------------------------------===// + +Currently the x86 codegen isn't very good at mixing SSE and FPStack +code: + +unsigned int foo(double x) { return x; } + +foo: + subl $20, %esp + movsd 24(%esp), %xmm0 + movsd %xmm0, 8(%esp) + fldl 8(%esp) + fisttpll (%esp) + movl (%esp), %eax + addl $20, %esp + ret + +This just requires being smarter when custom expanding fptoui. + +//===---------------------------------------------------------------------===// diff --git a/lib/Target/X86/README-MMX.txt b/lib/Target/X86/README-MMX.txt new file mode 100644 index 000000000000..a6c8616b6d2c --- /dev/null +++ b/lib/Target/X86/README-MMX.txt @@ -0,0 +1,71 @@ +//===---------------------------------------------------------------------===// +// Random ideas for the X86 backend: MMX-specific stuff. +//===---------------------------------------------------------------------===// + +//===---------------------------------------------------------------------===// + +This: + +#include + +__v2si qux(int A) { + return (__v2si){ 0, A }; +} + +is compiled into: + +_qux: + subl $28, %esp + movl 32(%esp), %eax + movd %eax, %mm0 + movq %mm0, (%esp) + movl (%esp), %eax + movl %eax, 20(%esp) + movq %mm0, 8(%esp) + movl 12(%esp), %eax + movl %eax, 16(%esp) + movq 16(%esp), %mm0 + addl $28, %esp + ret + +Yuck! + +GCC gives us: + +_qux: + subl $12, %esp + movl 16(%esp), %eax + movl 20(%esp), %edx + movl $0, (%eax) + movl %edx, 4(%eax) + addl $12, %esp + ret $4 + +//===---------------------------------------------------------------------===// + +We generate crappy code for this: + +__m64 t() { + return _mm_cvtsi32_si64(1); +} + +_t: + subl $12, %esp + movl $1, %eax + movd %eax, %mm0 + movq %mm0, (%esp) + movl (%esp), %eax + movl 4(%esp), %edx + addl $12, %esp + ret + +The extra stack traffic is covered in the previous entry. But the other reason +is we are not smart about materializing constants in MMX registers. With -m64 + + movl $1, %eax + movd %eax, %mm0 + movd %mm0, %rax + ret + +We should be using a constantpool load instead: + movq LC0(%rip), %rax diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt new file mode 100644 index 000000000000..71ad51c7984e --- /dev/null +++ b/lib/Target/X86/README-SSE.txt @@ -0,0 +1,918 @@ +//===---------------------------------------------------------------------===// +// Random ideas for the X86 backend: SSE-specific stuff. +//===---------------------------------------------------------------------===// + +- Consider eliminating the unaligned SSE load intrinsics, replacing them with + unaligned LLVM load instructions. + +//===---------------------------------------------------------------------===// + +Expand libm rounding functions inline: Significant speedups possible. +http://gcc.gnu.org/ml/gcc-patches/2006-10/msg00909.html + +//===---------------------------------------------------------------------===// + +When compiled with unsafemath enabled, "main" should enable SSE DAZ mode and +other fast SSE modes. + +//===---------------------------------------------------------------------===// + +Think about doing i64 math in SSE regs on x86-32. + +//===---------------------------------------------------------------------===// + +This testcase should have no SSE instructions in it, and only one load from +a constant pool: + +double %test3(bool %B) { + %C = select bool %B, double 123.412, double 523.01123123 + ret double %C +} + +Currently, the select is being lowered, which prevents the dag combiner from +turning 'select (load CPI1), (load CPI2)' -> 'load (select CPI1, CPI2)' + +The pattern isel got this one right. + +//===---------------------------------------------------------------------===// + +SSE doesn't have [mem] op= reg instructions. If we have an SSE instruction +like this: + + X += y + +and the register allocator decides to spill X, it is cheaper to emit this as: + +Y += [xslot] +store Y -> [xslot] + +than as: + +tmp = [xslot] +tmp += y +store tmp -> [xslot] + +..and this uses one fewer register (so this should be done at load folding +time, not at spiller time). *Note* however that this can only be done +if Y is dead. Here's a testcase: + +@.str_3 = external global [15 x i8] +declare void @printf(i32, ...) +define void @main() { +build_tree.exit: + br label %no_exit.i7 + +no_exit.i7: ; preds = %no_exit.i7, %build_tree.exit + %tmp.0.1.0.i9 = phi double [ 0.000000e+00, %build_tree.exit ], + [ %tmp.34.i18, %no_exit.i7 ] + %tmp.0.0.0.i10 = phi double [ 0.000000e+00, %build_tree.exit ], + [ %tmp.28.i16, %no_exit.i7 ] + %tmp.28.i16 = add double %tmp.0.0.0.i10, 0.000000e+00 + %tmp.34.i18 = add double %tmp.0.1.0.i9, 0.000000e+00 + br i1 false, label %Compute_Tree.exit23, label %no_exit.i7 + +Compute_Tree.exit23: ; preds = %no_exit.i7 + tail call void (i32, ...)* @printf( i32 0 ) + store double %tmp.34.i18, double* null + ret void +} + +We currently emit: + +.BBmain_1: + xorpd %XMM1, %XMM1 + addsd %XMM0, %XMM1 +*** movsd %XMM2, QWORD PTR [%ESP + 8] +*** addsd %XMM2, %XMM1 +*** movsd QWORD PTR [%ESP + 8], %XMM2 + jmp .BBmain_1 # no_exit.i7 + +This is a bugpoint reduced testcase, which is why the testcase doesn't make +much sense (e.g. its an infinite loop). :) + +//===---------------------------------------------------------------------===// + +SSE should implement 'select_cc' using 'emulated conditional moves' that use +pcmp/pand/pandn/por to do a selection instead of a conditional branch: + +double %X(double %Y, double %Z, double %A, double %B) { + %C = setlt double %A, %B + %z = add double %Z, 0.0 ;; select operand is not a load + %D = select bool %C, double %Y, double %z + ret double %D +} + +We currently emit: + +_X: + subl $12, %esp + xorpd %xmm0, %xmm0 + addsd 24(%esp), %xmm0 + movsd 32(%esp), %xmm1 + movsd 16(%esp), %xmm2 + ucomisd 40(%esp), %xmm1 + jb LBB_X_2 +LBB_X_1: + movsd %xmm0, %xmm2 +LBB_X_2: + movsd %xmm2, (%esp) + fldl (%esp) + addl $12, %esp + ret + +//===---------------------------------------------------------------------===// + +It's not clear whether we should use pxor or xorps / xorpd to clear XMM +registers. The choice may depend on subtarget information. We should do some +more experiments on different x86 machines. + +//===---------------------------------------------------------------------===// + +Lower memcpy / memset to a series of SSE 128 bit move instructions when it's +feasible. + +//===---------------------------------------------------------------------===// + +Codegen: + if (copysign(1.0, x) == copysign(1.0, y)) +into: + if (x^y & mask) +when using SSE. + +//===---------------------------------------------------------------------===// + +Use movhps to update upper 64-bits of a v4sf value. Also movlps on lower half +of a v4sf value. + +//===---------------------------------------------------------------------===// + +Better codegen for vector_shuffles like this { x, 0, 0, 0 } or { x, 0, x, 0}. +Perhaps use pxor / xorp* to clear a XMM register first? + +//===---------------------------------------------------------------------===// + +How to decide when to use the "floating point version" of logical ops? Here are +some code fragments: + + movaps LCPI5_5, %xmm2 + divps %xmm1, %xmm2 + mulps %xmm2, %xmm3 + mulps 8656(%ecx), %xmm3 + addps 8672(%ecx), %xmm3 + andps LCPI5_6, %xmm2 + andps LCPI5_1, %xmm3 + por %xmm2, %xmm3 + movdqa %xmm3, (%edi) + + movaps LCPI5_5, %xmm1 + divps %xmm0, %xmm1 + mulps %xmm1, %xmm3 + mulps 8656(%ecx), %xmm3 + addps 8672(%ecx), %xmm3 + andps LCPI5_6, %xmm1 + andps LCPI5_1, %xmm3 + orps %xmm1, %xmm3 + movaps %xmm3, 112(%esp) + movaps %xmm3, (%ebx) + +Due to some minor source change, the later case ended up using orps and movaps +instead of por and movdqa. Does it matter? + +//===---------------------------------------------------------------------===// + +X86RegisterInfo::copyRegToReg() returns X86::MOVAPSrr for VR128. Is it possible +to choose between movaps, movapd, and movdqa based on types of source and +destination? + +How about andps, andpd, and pand? Do we really care about the type of the packed +elements? If not, why not always use the "ps" variants which are likely to be +shorter. + +//===---------------------------------------------------------------------===// + +External test Nurbs exposed some problems. Look for +__ZN15Nurbs_SSE_Cubic17TessellateSurfaceE, bb cond_next140. This is what icc +emits: + + movaps (%edx), %xmm2 #59.21 + movaps (%edx), %xmm5 #60.21 + movaps (%edx), %xmm4 #61.21 + movaps (%edx), %xmm3 #62.21 + movl 40(%ecx), %ebp #69.49 + shufps $0, %xmm2, %xmm5 #60.21 + movl 100(%esp), %ebx #69.20 + movl (%ebx), %edi #69.20 + imull %ebp, %edi #69.49 + addl (%eax), %edi #70.33 + shufps $85, %xmm2, %xmm4 #61.21 + shufps $170, %xmm2, %xmm3 #62.21 + shufps $255, %xmm2, %xmm2 #63.21 + lea (%ebp,%ebp,2), %ebx #69.49 + negl %ebx #69.49 + lea -3(%edi,%ebx), %ebx #70.33 + shll $4, %ebx #68.37 + addl 32(%ecx), %ebx #68.37 + testb $15, %bl #91.13 + jne L_B1.24 # Prob 5% #91.13 + +This is the llvm code after instruction scheduling: + +cond_next140 (0xa910740, LLVM BB @0xa90beb0): + %reg1078 = MOV32ri -3 + %reg1079 = ADD32rm %reg1078, %reg1068, 1, %NOREG, 0 + %reg1037 = MOV32rm %reg1024, 1, %NOREG, 40 + %reg1080 = IMUL32rr %reg1079, %reg1037 + %reg1081 = MOV32rm %reg1058, 1, %NOREG, 0 + %reg1038 = LEA32r %reg1081, 1, %reg1080, -3 + %reg1036 = MOV32rm %reg1024, 1, %NOREG, 32 + %reg1082 = SHL32ri %reg1038, 4 + %reg1039 = ADD32rr %reg1036, %reg1082 + %reg1083 = MOVAPSrm %reg1059, 1, %NOREG, 0 + %reg1034 = SHUFPSrr %reg1083, %reg1083, 170 + %reg1032 = SHUFPSrr %reg1083, %reg1083, 0 + %reg1035 = SHUFPSrr %reg1083, %reg1083, 255 + %reg1033 = SHUFPSrr %reg1083, %reg1083, 85 + %reg1040 = MOV32rr %reg1039 + %reg1084 = AND32ri8 %reg1039, 15 + CMP32ri8 %reg1084, 0 + JE mbb + +Still ok. After register allocation: + +cond_next140 (0xa910740, LLVM BB @0xa90beb0): + %EAX = MOV32ri -3 + %EDX = MOV32rm , 1, %NOREG, 0 + ADD32rm %EAX, %EDX, 1, %NOREG, 0 + %EDX = MOV32rm , 1, %NOREG, 0 + %EDX = MOV32rm %EDX, 1, %NOREG, 40 + IMUL32rr %EAX, %EDX + %ESI = MOV32rm , 1, %NOREG, 0 + %ESI = MOV32rm %ESI, 1, %NOREG, 0 + MOV32mr , 1, %NOREG, 0, %ESI + %EAX = LEA32r %ESI, 1, %EAX, -3 + %ESI = MOV32rm , 1, %NOREG, 0 + %ESI = MOV32rm %ESI, 1, %NOREG, 32 + %EDI = MOV32rr %EAX + SHL32ri %EDI, 4 + ADD32rr %EDI, %ESI + %XMM0 = MOVAPSrm %ECX, 1, %NOREG, 0 + %XMM1 = MOVAPSrr %XMM0 + SHUFPSrr %XMM1, %XMM1, 170 + %XMM2 = MOVAPSrr %XMM0 + SHUFPSrr %XMM2, %XMM2, 0 + %XMM3 = MOVAPSrr %XMM0 + SHUFPSrr %XMM3, %XMM3, 255 + SHUFPSrr %XMM0, %XMM0, 85 + %EBX = MOV32rr %EDI + AND32ri8 %EBX, 15 + CMP32ri8 %EBX, 0 + JE mbb + +This looks really bad. The problem is shufps is a destructive opcode. Since it +appears as operand two in more than one shufps ops. It resulted in a number of +copies. Note icc also suffers from the same problem. Either the instruction +selector should select pshufd or The register allocator can made the two-address +to three-address transformation. + +It also exposes some other problems. See MOV32ri -3 and the spills. + +//===---------------------------------------------------------------------===// + +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=25500 + +LLVM is producing bad code. + +LBB_main_4: # cond_true44 + addps %xmm1, %xmm2 + subps %xmm3, %xmm2 + movaps (%ecx), %xmm4 + movaps %xmm2, %xmm1 + addps %xmm4, %xmm1 + addl $16, %ecx + incl %edx + cmpl $262144, %edx + movaps %xmm3, %xmm2 + movaps %xmm4, %xmm3 + jne LBB_main_4 # cond_true44 + +There are two problems. 1) No need to two loop induction variables. We can +compare against 262144 * 16. 2) Known register coalescer issue. We should +be able eliminate one of the movaps: + + addps %xmm2, %xmm1 <=== Commute! + subps %xmm3, %xmm1 + movaps (%ecx), %xmm4 + movaps %xmm1, %xmm1 <=== Eliminate! + addps %xmm4, %xmm1 + addl $16, %ecx + incl %edx + cmpl $262144, %edx + movaps %xmm3, %xmm2 + movaps %xmm4, %xmm3 + jne LBB_main_4 # cond_true44 + +//===---------------------------------------------------------------------===// + +Consider: + +__m128 test(float a) { + return _mm_set_ps(0.0, 0.0, 0.0, a*a); +} + +This compiles into: + +movss 4(%esp), %xmm1 +mulss %xmm1, %xmm1 +xorps %xmm0, %xmm0 +movss %xmm1, %xmm0 +ret + +Because mulss doesn't modify the top 3 elements, the top elements of +xmm1 are already zero'd. We could compile this to: + +movss 4(%esp), %xmm0 +mulss %xmm0, %xmm0 +ret + +//===---------------------------------------------------------------------===// + +Here's a sick and twisted idea. Consider code like this: + +__m128 test(__m128 a) { + float b = *(float*)&A; + ... + return _mm_set_ps(0.0, 0.0, 0.0, b); +} + +This might compile to this code: + +movaps c(%esp), %xmm1 +xorps %xmm0, %xmm0 +movss %xmm1, %xmm0 +ret + +Now consider if the ... code caused xmm1 to get spilled. This might produce +this code: + +movaps c(%esp), %xmm1 +movaps %xmm1, c2(%esp) +... + +xorps %xmm0, %xmm0 +movaps c2(%esp), %xmm1 +movss %xmm1, %xmm0 +ret + +However, since the reload is only used by these instructions, we could +"fold" it into the uses, producing something like this: + +movaps c(%esp), %xmm1 +movaps %xmm1, c2(%esp) +... + +movss c2(%esp), %xmm0 +ret + +... saving two instructions. + +The basic idea is that a reload from a spill slot, can, if only one 4-byte +chunk is used, bring in 3 zeros the the one element instead of 4 elements. +This can be used to simplify a variety of shuffle operations, where the +elements are fixed zeros. + +//===---------------------------------------------------------------------===// + +__m128d test1( __m128d A, __m128d B) { + return _mm_shuffle_pd(A, B, 0x3); +} + +compiles to + +shufpd $3, %xmm1, %xmm0 + +Perhaps it's better to use unpckhpd instead? + +unpckhpd %xmm1, %xmm0 + +Don't know if unpckhpd is faster. But it is shorter. + +//===---------------------------------------------------------------------===// + +This code generates ugly code, probably due to costs being off or something: + +define void @test(float* %P, <4 x float>* %P2 ) { + %xFloat0.688 = load float* %P + %tmp = load <4 x float>* %P2 + %inFloat3.713 = insertelement <4 x float> %tmp, float 0.0, i32 3 + store <4 x float> %inFloat3.713, <4 x float>* %P2 + ret void +} + +Generates: + +_test: + movl 8(%esp), %eax + movaps (%eax), %xmm0 + pxor %xmm1, %xmm1 + movaps %xmm0, %xmm2 + shufps $50, %xmm1, %xmm2 + shufps $132, %xmm2, %xmm0 + movaps %xmm0, (%eax) + ret + +Would it be better to generate: + +_test: + movl 8(%esp), %ecx + movaps (%ecx), %xmm0 + xor %eax, %eax + pinsrw $6, %eax, %xmm0 + pinsrw $7, %eax, %xmm0 + movaps %xmm0, (%ecx) + ret + +? + +//===---------------------------------------------------------------------===// + +Some useful information in the Apple Altivec / SSE Migration Guide: + +http://developer.apple.com/documentation/Performance/Conceptual/ +Accelerate_sse_migration/index.html + +e.g. SSE select using and, andnot, or. Various SSE compare translations. + +//===---------------------------------------------------------------------===// + +Add hooks to commute some CMPP operations. + +//===---------------------------------------------------------------------===// + +Apply the same transformation that merged four float into a single 128-bit load +to loads from constant pool. + +//===---------------------------------------------------------------------===// + +Floating point max / min are commutable when -enable-unsafe-fp-path is +specified. We should turn int_x86_sse_max_ss and X86ISD::FMIN etc. into other +nodes which are selected to max / min instructions that are marked commutable. + +//===---------------------------------------------------------------------===// + +We should materialize vector constants like "all ones" and "signbit" with +code like: + + cmpeqps xmm1, xmm1 ; xmm1 = all-ones + +and: + cmpeqps xmm1, xmm1 ; xmm1 = all-ones + psrlq xmm1, 31 ; xmm1 = all 100000000000... + +instead of using a load from the constant pool. The later is important for +ABS/NEG/copysign etc. + +//===---------------------------------------------------------------------===// + +These functions: + +#include +__m128i a; +void x(unsigned short n) { + a = _mm_slli_epi32 (a, n); +} +void y(unsigned n) { + a = _mm_slli_epi32 (a, n); +} + +compile to ( -O3 -static -fomit-frame-pointer): +_x: + movzwl 4(%esp), %eax + movd %eax, %xmm0 + movaps _a, %xmm1 + pslld %xmm0, %xmm1 + movaps %xmm1, _a + ret +_y: + movd 4(%esp), %xmm0 + movaps _a, %xmm1 + pslld %xmm0, %xmm1 + movaps %xmm1, _a + ret + +"y" looks good, but "x" does silly movzwl stuff around into a GPR. It seems +like movd would be sufficient in both cases as the value is already zero +extended in the 32-bit stack slot IIRC. For signed short, it should also be +save, as a really-signed value would be undefined for pslld. + + +//===---------------------------------------------------------------------===// + +#include +int t1(double d) { return signbit(d); } + +This currently compiles to: + subl $12, %esp + movsd 16(%esp), %xmm0 + movsd %xmm0, (%esp) + movl 4(%esp), %eax + shrl $31, %eax + addl $12, %esp + ret + +We should use movmskp{s|d} instead. + +//===---------------------------------------------------------------------===// + +CodeGen/X86/vec_align.ll tests whether we can turn 4 scalar loads into a single +(aligned) vector load. This functionality has a couple of problems. + +1. The code to infer alignment from loads of globals is in the X86 backend, + not the dag combiner. This is because dagcombine2 needs to be able to see + through the X86ISD::Wrapper node, which DAGCombine can't really do. +2. The code for turning 4 x load into a single vector load is target + independent and should be moved to the dag combiner. +3. The code for turning 4 x load into a vector load can only handle a direct + load from a global or a direct load from the stack. It should be generalized + to handle any load from P, P+4, P+8, P+12, where P can be anything. +4. The alignment inference code cannot handle loads from globals in non-static + mode because it doesn't look through the extra dyld stub load. If you try + vec_align.ll without -relocation-model=static, you'll see what I mean. + +//===---------------------------------------------------------------------===// + +We should lower store(fneg(load p), q) into an integer load+xor+store, which +eliminates a constant pool load. For example, consider: + +define i64 @ccosf(float %z.0, float %z.1) nounwind readonly { +entry: + %tmp6 = sub float -0.000000e+00, %z.1 ; [#uses=1] + %tmp20 = tail call i64 @ccoshf( float %tmp6, float %z.0 ) nounwind readonly + ret i64 %tmp20 +} + +This currently compiles to: + +LCPI1_0: # <4 x float> + .long 2147483648 # float -0 + .long 2147483648 # float -0 + .long 2147483648 # float -0 + .long 2147483648 # float -0 +_ccosf: + subl $12, %esp + movss 16(%esp), %xmm0 + movss %xmm0, 4(%esp) + movss 20(%esp), %xmm0 + xorps LCPI1_0, %xmm0 + movss %xmm0, (%esp) + call L_ccoshf$stub + addl $12, %esp + ret + +Note the load into xmm0, then xor (to negate), then store. In PIC mode, +this code computes the pic base and does two loads to do the constant pool +load, so the improvement is much bigger. + +The tricky part about this xform is that the argument load/store isn't exposed +until post-legalize, and at that point, the fneg has been custom expanded into +an X86 fxor. This means that we need to handle this case in the x86 backend +instead of in target independent code. + +//===---------------------------------------------------------------------===// + +Non-SSE4 insert into 16 x i8 is atrociously bad. + +//===---------------------------------------------------------------------===// + +<2 x i64> extract is substantially worse than <2 x f64>, even if the destination +is memory. + +//===---------------------------------------------------------------------===// + +SSE4 extract-to-mem ops aren't being pattern matched because of the AssertZext +sitting between the truncate and the extract. + +//===---------------------------------------------------------------------===// + +INSERTPS can match any insert (extract, imm1), imm2 for 4 x float, and insert +any number of 0.0 simultaneously. Currently we only use it for simple +insertions. + +See comments in LowerINSERT_VECTOR_ELT_SSE4. + +//===---------------------------------------------------------------------===// + +On a random note, SSE2 should declare insert/extract of 2 x f64 as legal, not +Custom. All combinations of insert/extract reg-reg, reg-mem, and mem-reg are +legal, it'll just take a few extra patterns written in the .td file. + +Note: this is not a code quality issue; the custom lowered code happens to be +right, but we shouldn't have to custom lower anything. This is probably related +to <2 x i64> ops being so bad. + +//===---------------------------------------------------------------------===// + +'select' on vectors and scalars could be a whole lot better. We currently +lower them to conditional branches. On x86-64 for example, we compile this: + +double test(double a, double b, double c, double d) { return a + +typedef short vSInt16 __attribute__ ((__vector_size__ (16))); + +static const vSInt16 a = {- 22725, - 12873, - 22725, - 12873, - 22725, - 12873, +- 22725, - 12873};; + +vSInt16 madd(vSInt16 b) +{ + return _mm_madd_epi16(a, b); +} + +Generated code (x86-32, linux): +madd: + pushl %ebp + movl %esp, %ebp + andl $-16, %esp + movaps .LCPI1_0, %xmm1 + pmaddwd %xmm1, %xmm0 + movl %ebp, %esp + popl %ebp + ret + +//===---------------------------------------------------------------------===// + +Consider: +#include +__m128 foo2 (float x) { + return _mm_set_ps (0, 0, x, 0); +} + +In x86-32 mode, we generate this spiffy code: + +_foo2: + movss 4(%esp), %xmm0 + pshufd $81, %xmm0, %xmm0 + ret + +in x86-64 mode, we generate this code, which could be better: + +_foo2: + xorps %xmm1, %xmm1 + movss %xmm0, %xmm1 + pshufd $81, %xmm1, %xmm0 + ret + +In sse4 mode, we could use insertps to make both better. + +Here's another testcase that could use insertps [mem]: + +#include +extern float x2, x3; +__m128 foo1 (float x1, float x4) { + return _mm_set_ps (x2, x1, x3, x4); +} + +gcc mainline compiles it to: + +foo1: + insertps $0x10, x2(%rip), %xmm0 + insertps $0x10, x3(%rip), %xmm1 + movaps %xmm1, %xmm2 + movlhps %xmm0, %xmm2 + movaps %xmm2, %xmm0 + ret + +//===---------------------------------------------------------------------===// + +We compile vector multiply-by-constant into poor code: + +define <4 x i32> @f(<4 x i32> %i) nounwind { + %A = mul <4 x i32> %i, < i32 10, i32 10, i32 10, i32 10 > + ret <4 x i32> %A +} + +On targets without SSE4.1, this compiles into: + +LCPI1_0: ## <4 x i32> + .long 10 + .long 10 + .long 10 + .long 10 + .text + .align 4,0x90 + .globl _f +_f: + pshufd $3, %xmm0, %xmm1 + movd %xmm1, %eax + imull LCPI1_0+12, %eax + movd %eax, %xmm1 + pshufd $1, %xmm0, %xmm2 + movd %xmm2, %eax + imull LCPI1_0+4, %eax + movd %eax, %xmm2 + punpckldq %xmm1, %xmm2 + movd %xmm0, %eax + imull LCPI1_0, %eax + movd %eax, %xmm1 + movhlps %xmm0, %xmm0 + movd %xmm0, %eax + imull LCPI1_0+8, %eax + movd %eax, %xmm0 + punpckldq %xmm0, %xmm1 + movaps %xmm1, %xmm0 + punpckldq %xmm2, %xmm0 + ret + +It would be better to synthesize integer vector multiplication by constants +using shifts and adds, pslld and paddd here. And even on targets with SSE4.1, +simple cases such as multiplication by powers of two would be better as +vector shifts than as multiplications. + +//===---------------------------------------------------------------------===// + +We compile this: + +__m128i +foo2 (char x) +{ + return _mm_set_epi8 (1, 0, 0, 0, 0, 0, 0, 0, 0, x, 0, 1, 0, 0, 0, 0); +} + +into: + movl $1, %eax + xorps %xmm0, %xmm0 + pinsrw $2, %eax, %xmm0 + movzbl 4(%esp), %eax + pinsrw $3, %eax, %xmm0 + movl $256, %eax + pinsrw $7, %eax, %xmm0 + ret + + +gcc-4.2: + subl $12, %esp + movzbl 16(%esp), %eax + movdqa LC0, %xmm0 + pinsrw $3, %eax, %xmm0 + addl $12, %esp + ret + .const + .align 4 +LC0: + .word 0 + .word 0 + .word 1 + .word 0 + .word 0 + .word 0 + .word 0 + .word 256 + +With SSE4, it should be + movdqa .LC0(%rip), %xmm0 + pinsrb $6, %edi, %xmm0 + +//===---------------------------------------------------------------------===// + +We should transform a shuffle of two vectors of constants into a single vector +of constants. Also, insertelement of a constant into a vector of constants +should also result in a vector of constants. e.g. 2008-06-25-VecISelBug.ll. + +We compiled it to something horrible: + + .align 4 +LCPI1_1: ## float + .long 1065353216 ## float 1 + .const + + .align 4 +LCPI1_0: ## <4 x float> + .space 4 + .long 1065353216 ## float 1 + .space 4 + .long 1065353216 ## float 1 + .text + .align 4,0x90 + .globl _t +_t: + xorps %xmm0, %xmm0 + movhps LCPI1_0, %xmm0 + movss LCPI1_1, %xmm1 + movaps %xmm0, %xmm2 + shufps $2, %xmm1, %xmm2 + shufps $132, %xmm2, %xmm0 + movaps %xmm0, 0 + +//===---------------------------------------------------------------------===// +rdar://5907648 + +This function: + +float foo(unsigned char x) { + return x; +} + +compiles to (x86-32): + +define float @foo(i8 zeroext %x) nounwind { + %tmp12 = uitofp i8 %x to float ; [#uses=1] + ret float %tmp12 +} + +compiles to: + +_foo: + subl $4, %esp + movzbl 8(%esp), %eax + cvtsi2ss %eax, %xmm0 + movss %xmm0, (%esp) + flds (%esp) + addl $4, %esp + ret + +We should be able to use: + cvtsi2ss 8($esp), %xmm0 +since we know the stack slot is already zext'd. + +//===---------------------------------------------------------------------===// + +Consider using movlps instead of movsd to implement (scalar_to_vector (loadf64)) +when code size is critical. movlps is slower than movsd on core2 but it's one +byte shorter. + +//===---------------------------------------------------------------------===// + +We should use a dynamic programming based approach to tell when using FPStack +operations is cheaper than SSE. SciMark montecarlo contains code like this +for example: + +double MonteCarlo_num_flops(int Num_samples) { + return ((double) Num_samples)* 4.0; +} + +In fpstack mode, this compiles into: + +LCPI1_0: + .long 1082130432 ## float 4.000000e+00 +_MonteCarlo_num_flops: + subl $4, %esp + movl 8(%esp), %eax + movl %eax, (%esp) + fildl (%esp) + fmuls LCPI1_0 + addl $4, %esp + ret + +in SSE mode, it compiles into significantly slower code: + +_MonteCarlo_num_flops: + subl $12, %esp + cvtsi2sd 16(%esp), %xmm0 + mulsd LCPI1_0, %xmm0 + movsd %xmm0, (%esp) + fldl (%esp) + addl $12, %esp + ret + +There are also other cases in scimark where using fpstack is better, it is +cheaper to do fld1 than load from a constant pool for example, so +"load, add 1.0, store" is better done in the fp stack, etc. + +//===---------------------------------------------------------------------===// diff --git a/lib/Target/X86/README-UNIMPLEMENTED.txt b/lib/Target/X86/README-UNIMPLEMENTED.txt new file mode 100644 index 000000000000..69dc8ee1af7c --- /dev/null +++ b/lib/Target/X86/README-UNIMPLEMENTED.txt @@ -0,0 +1,14 @@ +//===---------------------------------------------------------------------===// +// Testcases that crash the X86 backend because they aren't implemented +//===---------------------------------------------------------------------===// + +These are cases we know the X86 backend doesn't handle. Patches are welcome +and appreciated, because no one has signed up to implemented these yet. +Implementing these would allow elimination of the corresponding intrinsics, +which would be great. + +1) vector shifts +2) vector comparisons +3) vector fp<->int conversions: PR2683, PR2684, PR2685, PR2686, PR2688 +4) bitcasts from vectors to scalars: PR2804 + diff --git a/lib/Target/X86/README-X86-64.txt b/lib/Target/X86/README-X86-64.txt new file mode 100644 index 000000000000..ad12137c8913 --- /dev/null +++ b/lib/Target/X86/README-X86-64.txt @@ -0,0 +1,251 @@ +//===- README_X86_64.txt - Notes for X86-64 code gen ----------------------===// + +Implement different PIC models? Right now we only support Mac OS X with small +PIC code model. + +//===---------------------------------------------------------------------===// + +For this: + +extern void xx(void); +void bar(void) { + xx(); +} + +gcc compiles to: + +.globl _bar +_bar: + jmp _xx + +We need to do the tailcall optimization as well. + +//===---------------------------------------------------------------------===// + +AMD64 Optimization Manual 8.2 has some nice information about optimizing integer +multiplication by a constant. How much of it applies to Intel's X86-64 +implementation? There are definite trade-offs to consider: latency vs. register +pressure vs. code size. + +//===---------------------------------------------------------------------===// + +Are we better off using branches instead of cmove to implement FP to +unsigned i64? + +_conv: + ucomiss LC0(%rip), %xmm0 + cvttss2siq %xmm0, %rdx + jb L3 + subss LC0(%rip), %xmm0 + movabsq $-9223372036854775808, %rax + cvttss2siq %xmm0, %rdx + xorq %rax, %rdx +L3: + movq %rdx, %rax + ret + +instead of + +_conv: + movss LCPI1_0(%rip), %xmm1 + cvttss2siq %xmm0, %rcx + movaps %xmm0, %xmm2 + subss %xmm1, %xmm2 + cvttss2siq %xmm2, %rax + movabsq $-9223372036854775808, %rdx + xorq %rdx, %rax + ucomiss %xmm1, %xmm0 + cmovb %rcx, %rax + ret + +Seems like the jb branch has high likelyhood of being taken. It would have +saved a few instructions. + +//===---------------------------------------------------------------------===// + +Poor codegen: + +int X[2]; +int b; +void test(void) { + memset(X, b, 2*sizeof(X[0])); +} + +llc: + movq _b@GOTPCREL(%rip), %rax + movzbq (%rax), %rax + movq %rax, %rcx + shlq $8, %rcx + orq %rax, %rcx + movq %rcx, %rax + shlq $16, %rax + orq %rcx, %rax + movq %rax, %rcx + shlq $32, %rcx + movq _X@GOTPCREL(%rip), %rdx + orq %rax, %rcx + movq %rcx, (%rdx) + ret + +gcc: + movq _b@GOTPCREL(%rip), %rax + movabsq $72340172838076673, %rdx + movzbq (%rax), %rax + imulq %rdx, %rax + movq _X@GOTPCREL(%rip), %rdx + movq %rax, (%rdx) + ret + +//===---------------------------------------------------------------------===// + +Vararg function prologue can be further optimized. Currently all XMM registers +are stored into register save area. Most of them can be eliminated since the +upper bound of the number of XMM registers used are passed in %al. gcc produces +something like the following: + + movzbl %al, %edx + leaq 0(,%rdx,4), %rax + leaq 4+L2(%rip), %rdx + leaq 239(%rsp), %rax + jmp *%rdx + movaps %xmm7, -15(%rax) + movaps %xmm6, -31(%rax) + movaps %xmm5, -47(%rax) + movaps %xmm4, -63(%rax) + movaps %xmm3, -79(%rax) + movaps %xmm2, -95(%rax) + movaps %xmm1, -111(%rax) + movaps %xmm0, -127(%rax) +L2: + +It jumps over the movaps that do not need to be stored. Hard to see this being +significant as it added 5 instruciton (including a indirect branch) to avoid +executing 0 to 8 stores in the function prologue. + +Perhaps we can optimize for the common case where no XMM registers are used for +parameter passing. i.e. is %al == 0 jump over all stores. Or in the case of a +leaf function where we can determine that no XMM input parameter is need, avoid +emitting the stores at all. + +//===---------------------------------------------------------------------===// + +AMD64 has a complex calling convention for aggregate passing by value: + +1. If the size of an object is larger than two eightbytes, or in C++, is a non- + POD structure or union type, or contains unaligned fields, it has class + MEMORY. +2. Both eightbytes get initialized to class NO_CLASS. +3. Each field of an object is classified recursively so that always two fields + are considered. The resulting class is calculated according to the classes + of the fields in the eightbyte: + (a) If both classes are equal, this is the resulting class. + (b) If one of the classes is NO_CLASS, the resulting class is the other + class. + (c) If one of the classes is MEMORY, the result is the MEMORY class. + (d) If one of the classes is INTEGER, the result is the INTEGER. + (e) If one of the classes is X87, X87UP, COMPLEX_X87 class, MEMORY is used as + class. + (f) Otherwise class SSE is used. +4. Then a post merger cleanup is done: + (a) If one of the classes is MEMORY, the whole argument is passed in memory. + (b) If SSEUP is not preceeded by SSE, it is converted to SSE. + +Currently llvm frontend does not handle this correctly. + +Problem 1: + typedef struct { int i; double d; } QuadWordS; +It is currently passed in two i64 integer registers. However, gcc compiled +callee expects the second element 'd' to be passed in XMM0. + +Problem 2: + typedef struct { int32_t i; float j; double d; } QuadWordS; +The size of the first two fields == i64 so they will be combined and passed in +a integer register RDI. The third field is still passed in XMM0. + +Problem 3: + typedef struct { int64_t i; int8_t j; int64_t d; } S; + void test(S s) +The size of this aggregate is greater than two i64 so it should be passed in +memory. Currently llvm breaks this down and passed it in three integer +registers. + +Problem 4: +Taking problem 3 one step ahead where a function expects a aggregate value +in memory followed by more parameter(s) passed in register(s). + void test(S s, int b) + +LLVM IR does not allow parameter passing by aggregates, therefore it must break +the aggregates value (in problem 3 and 4) into a number of scalar values: + void %test(long %s.i, byte %s.j, long %s.d); + +However, if the backend were to lower this code literally it would pass the 3 +values in integer registers. To force it be passed in memory, the frontend +should change the function signiture to: + void %test(long %undef1, long %undef2, long %undef3, long %undef4, + long %undef5, long %undef6, + long %s.i, byte %s.j, long %s.d); +And the callee would look something like this: + call void %test( undef, undef, undef, undef, undef, undef, + %tmp.s.i, %tmp.s.j, %tmp.s.d ); +The first 6 undef parameters would exhaust the 6 integer registers used for +parameter passing. The following three integer values would then be forced into +memory. + +For problem 4, the parameter 'd' would be moved to the front of the parameter +list so it will be passed in register: + void %test(int %d, + long %undef1, long %undef2, long %undef3, long %undef4, + long %undef5, long %undef6, + long %s.i, byte %s.j, long %s.d); + +//===---------------------------------------------------------------------===// + +Right now the asm printer assumes GlobalAddress are accessed via RIP relative +addressing. Therefore, it is not possible to generate this: + movabsq $__ZTV10polynomialIdE+16, %rax + +That is ok for now since we currently only support small model. So the above +is selected as + leaq __ZTV10polynomialIdE+16(%rip), %rax + +This is probably slightly slower but is much shorter than movabsq. However, if +we were to support medium or larger code models, we need to use the movabs +instruction. We should probably introduce something like AbsoluteAddress to +distinguish it from GlobalAddress so the asm printer and JIT code emitter can +do the right thing. + +//===---------------------------------------------------------------------===// + +It's not possible to reference AH, BH, CH, and DH registers in an instruction +requiring REX prefix. However, divb and mulb both produce results in AH. If isel +emits a CopyFromReg which gets turned into a movb and that can be allocated a +r8b - r15b. + +To get around this, isel emits a CopyFromReg from AX and then right shift it +down by 8 and truncate it. It's not pretty but it works. We need some register +allocation magic to make the hack go away (e.g. putting additional constraints +on the result of the movb). + +//===---------------------------------------------------------------------===// + +The x86-64 ABI for hidden-argument struct returns requires that the +incoming value of %rdi be copied into %rax by the callee upon return. + +The idea is that it saves callers from having to remember this value, +which would often require a callee-saved register. Callees usually +need to keep this value live for most of their body anyway, so it +doesn't add a significant burden on them. + +We currently implement this in codegen, however this is suboptimal +because it means that it would be quite awkward to implement the +optimization for callers. + +A better implementation would be to relax the LLVM IR rules for sret +arguments to allow a function with an sret argument to have a non-void +return type, and to have the front-end to set up the sret argument value +as the return value of the function. The front-end could more easily +emit uses of the returned struct value to be in terms of the function's +lowered return value, and it would free non-C frontends from a +complication only required by a C-based ABI. + +//===---------------------------------------------------------------------===// diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt new file mode 100644 index 000000000000..710bd0357433 --- /dev/null +++ b/lib/Target/X86/README.txt @@ -0,0 +1,1899 @@ +//===---------------------------------------------------------------------===// +// Random ideas for the X86 backend. +//===---------------------------------------------------------------------===// + +We should add support for the "movbe" instruction, which does a byte-swapping +copy (3-addr bswap + memory support?) This is available on Atom processors. + +//===---------------------------------------------------------------------===// + +CodeGen/X86/lea-3.ll:test3 should be a single LEA, not a shift/move. The X86 +backend knows how to three-addressify this shift, but it appears the register +allocator isn't even asking it to do so in this case. We should investigate +why this isn't happening, it could have significant impact on other important +cases for X86 as well. + +//===---------------------------------------------------------------------===// + +This should be one DIV/IDIV instruction, not a libcall: + +unsigned test(unsigned long long X, unsigned Y) { + return X/Y; +} + +This can be done trivially with a custom legalizer. What about overflow +though? http://gcc.gnu.org/bugzilla/show_bug.cgi?id=14224 + +//===---------------------------------------------------------------------===// + +Improvements to the multiply -> shift/add algorithm: +http://gcc.gnu.org/ml/gcc-patches/2004-08/msg01590.html + +//===---------------------------------------------------------------------===// + +Improve code like this (occurs fairly frequently, e.g. in LLVM): +long long foo(int x) { return 1LL << x; } + +http://gcc.gnu.org/ml/gcc-patches/2004-09/msg01109.html +http://gcc.gnu.org/ml/gcc-patches/2004-09/msg01128.html +http://gcc.gnu.org/ml/gcc-patches/2004-09/msg01136.html + +Another useful one would be ~0ULL >> X and ~0ULL << X. + +One better solution for 1LL << x is: + xorl %eax, %eax + xorl %edx, %edx + testb $32, %cl + sete %al + setne %dl + sall %cl, %eax + sall %cl, %edx + +But that requires good 8-bit subreg support. + +Also, this might be better. It's an extra shift, but it's one instruction +shorter, and doesn't stress 8-bit subreg support. +(From http://gcc.gnu.org/ml/gcc-patches/2004-09/msg01148.html, +but without the unnecessary and.) + movl %ecx, %eax + shrl $5, %eax + movl %eax, %edx + xorl $1, %edx + sall %cl, %eax + sall %cl. %edx + +64-bit shifts (in general) expand to really bad code. Instead of using +cmovs, we should expand to a conditional branch like GCC produces. + +//===---------------------------------------------------------------------===// + +Compile this: +_Bool f(_Bool a) { return a!=1; } + +into: + movzbl %dil, %eax + xorl $1, %eax + ret + +(Although note that this isn't a legal way to express the code that llvm-gcc +currently generates for that function.) + +//===---------------------------------------------------------------------===// + +Some isel ideas: + +1. Dynamic programming based approach when compile time if not an + issue. +2. Code duplication (addressing mode) during isel. +3. Other ideas from "Register-Sensitive Selection, Duplication, and + Sequencing of Instructions". +4. Scheduling for reduced register pressure. E.g. "Minimum Register + Instruction Sequence Problem: Revisiting Optimal Code Generation for DAGs" + and other related papers. + http://citeseer.ist.psu.edu/govindarajan01minimum.html + +//===---------------------------------------------------------------------===// + +Should we promote i16 to i32 to avoid partial register update stalls? + +//===---------------------------------------------------------------------===// + +Leave any_extend as pseudo instruction and hint to register +allocator. Delay codegen until post register allocation. +Note. any_extend is now turned into an INSERT_SUBREG. We still need to teach +the coalescer how to deal with it though. + +//===---------------------------------------------------------------------===// + +It appears icc use push for parameter passing. Need to investigate. + +//===---------------------------------------------------------------------===// + +Only use inc/neg/not instructions on processors where they are faster than +add/sub/xor. They are slower on the P4 due to only updating some processor +flags. + +//===---------------------------------------------------------------------===// + +The instruction selector sometimes misses folding a load into a compare. The +pattern is written as (cmp reg, (load p)). Because the compare isn't +commutative, it is not matched with the load on both sides. The dag combiner +should be made smart enough to cannonicalize the load into the RHS of a compare +when it can invert the result of the compare for free. + +//===---------------------------------------------------------------------===// + +How about intrinsics? An example is: + *res = _mm_mulhi_epu16(*A, _mm_mul_epu32(*B, *C)); + +compiles to + pmuludq (%eax), %xmm0 + movl 8(%esp), %eax + movdqa (%eax), %xmm1 + pmulhuw %xmm0, %xmm1 + +The transformation probably requires a X86 specific pass or a DAG combiner +target specific hook. + +//===---------------------------------------------------------------------===// + +In many cases, LLVM generates code like this: + +_test: + movl 8(%esp), %eax + cmpl %eax, 4(%esp) + setl %al + movzbl %al, %eax + ret + +on some processors (which ones?), it is more efficient to do this: + +_test: + movl 8(%esp), %ebx + xor %eax, %eax + cmpl %ebx, 4(%esp) + setl %al + ret + +Doing this correctly is tricky though, as the xor clobbers the flags. + +//===---------------------------------------------------------------------===// + +We should generate bts/btr/etc instructions on targets where they are cheap or +when codesize is important. e.g., for: + +void setbit(int *target, int bit) { + *target |= (1 << bit); +} +void clearbit(int *target, int bit) { + *target &= ~(1 << bit); +} + +//===---------------------------------------------------------------------===// + +Instead of the following for memset char*, 1, 10: + + movl $16843009, 4(%edx) + movl $16843009, (%edx) + movw $257, 8(%edx) + +It might be better to generate + + movl $16843009, %eax + movl %eax, 4(%edx) + movl %eax, (%edx) + movw al, 8(%edx) + +when we can spare a register. It reduces code size. + +//===---------------------------------------------------------------------===// + +Evaluate what the best way to codegen sdiv X, (2^C) is. For X/8, we currently +get this: + +define i32 @test1(i32 %X) { + %Y = sdiv i32 %X, 8 + ret i32 %Y +} + +_test1: + movl 4(%esp), %eax + movl %eax, %ecx + sarl $31, %ecx + shrl $29, %ecx + addl %ecx, %eax + sarl $3, %eax + ret + +GCC knows several different ways to codegen it, one of which is this: + +_test1: + movl 4(%esp), %eax + cmpl $-1, %eax + leal 7(%eax), %ecx + cmovle %ecx, %eax + sarl $3, %eax + ret + +which is probably slower, but it's interesting at least :) + +//===---------------------------------------------------------------------===// + +We are currently lowering large (1MB+) memmove/memcpy to rep/stosl and rep/movsl +We should leave these as libcalls for everything over a much lower threshold, +since libc is hand tuned for medium and large mem ops (avoiding RFO for large +stores, TLB preheating, etc) + +//===---------------------------------------------------------------------===// + +Optimize this into something reasonable: + x * copysign(1.0, y) * copysign(1.0, z) + +//===---------------------------------------------------------------------===// + +Optimize copysign(x, *y) to use an integer load from y. + +//===---------------------------------------------------------------------===// + +The following tests perform worse with LSR: + +lambda, siod, optimizer-eval, ackermann, hash2, nestedloop, strcat, and Treesor. + +//===---------------------------------------------------------------------===// + +Teach the coalescer to coalesce vregs of different register classes. e.g. FR32 / +FR64 to VR128. + +//===---------------------------------------------------------------------===// + +Adding to the list of cmp / test poor codegen issues: + +int test(__m128 *A, __m128 *B) { + if (_mm_comige_ss(*A, *B)) + return 3; + else + return 4; +} + +_test: + movl 8(%esp), %eax + movaps (%eax), %xmm0 + movl 4(%esp), %eax + movaps (%eax), %xmm1 + comiss %xmm0, %xmm1 + setae %al + movzbl %al, %ecx + movl $3, %eax + movl $4, %edx + cmpl $0, %ecx + cmove %edx, %eax + ret + +Note the setae, movzbl, cmpl, cmove can be replaced with a single cmovae. There +are a number of issues. 1) We are introducing a setcc between the result of the +intrisic call and select. 2) The intrinsic is expected to produce a i32 value +so a any extend (which becomes a zero extend) is added. + +We probably need some kind of target DAG combine hook to fix this. + +//===---------------------------------------------------------------------===// + +We generate significantly worse code for this than GCC: +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=21150 +http://gcc.gnu.org/bugzilla/attachment.cgi?id=8701 + +There is also one case we do worse on PPC. + +//===---------------------------------------------------------------------===// + +For this: + +int test(int a) +{ + return a * 3; +} + +We currently emits + imull $3, 4(%esp), %eax + +Perhaps this is what we really should generate is? Is imull three or four +cycles? Note: ICC generates this: + movl 4(%esp), %eax + leal (%eax,%eax,2), %eax + +The current instruction priority is based on pattern complexity. The former is +more "complex" because it folds a load so the latter will not be emitted. + +Perhaps we should use AddedComplexity to give LEA32r a higher priority? We +should always try to match LEA first since the LEA matching code does some +estimate to determine whether the match is profitable. + +However, if we care more about code size, then imull is better. It's two bytes +shorter than movl + leal. + +On a Pentium M, both variants have the same characteristics with regard +to throughput; however, the multiplication has a latency of four cycles, as +opposed to two cycles for the movl+lea variant. + +//===---------------------------------------------------------------------===// + +__builtin_ffs codegen is messy. + +int ffs_(unsigned X) { return __builtin_ffs(X); } + +llvm produces: +ffs_: + movl 4(%esp), %ecx + bsfl %ecx, %eax + movl $32, %edx + cmove %edx, %eax + incl %eax + xorl %edx, %edx + testl %ecx, %ecx + cmove %edx, %eax + ret + +vs gcc: + +_ffs_: + movl $-1, %edx + bsfl 4(%esp), %eax + cmove %edx, %eax + addl $1, %eax + ret + +Another example of __builtin_ffs (use predsimplify to eliminate a select): + +int foo (unsigned long j) { + if (j) + return __builtin_ffs (j) - 1; + else + return 0; +} + +//===---------------------------------------------------------------------===// + +It appears gcc place string data with linkonce linkage in +.section __TEXT,__const_coal,coalesced instead of +.section __DATA,__const_coal,coalesced. +Take a look at darwin.h, there are other Darwin assembler directives that we +do not make use of. + +//===---------------------------------------------------------------------===// + +define i32 @foo(i32* %a, i32 %t) { +entry: + br label %cond_true + +cond_true: ; preds = %cond_true, %entry + %x.0.0 = phi i32 [ 0, %entry ], [ %tmp9, %cond_true ] ; [#uses=3] + %t_addr.0.0 = phi i32 [ %t, %entry ], [ %tmp7, %cond_true ] ; [#uses=1] + %tmp2 = getelementptr i32* %a, i32 %x.0.0 ; [#uses=1] + %tmp3 = load i32* %tmp2 ; [#uses=1] + %tmp5 = add i32 %t_addr.0.0, %x.0.0 ; [#uses=1] + %tmp7 = add i32 %tmp5, %tmp3 ; [#uses=2] + %tmp9 = add i32 %x.0.0, 1 ; [#uses=2] + %tmp = icmp sgt i32 %tmp9, 39 ; [#uses=1] + br i1 %tmp, label %bb12, label %cond_true + +bb12: ; preds = %cond_true + ret i32 %tmp7 +} +is pessimized by -loop-reduce and -indvars + +//===---------------------------------------------------------------------===// + +u32 to float conversion improvement: + +float uint32_2_float( unsigned u ) { + float fl = (int) (u & 0xffff); + float fh = (int) (u >> 16); + fh *= 0x1.0p16f; + return fh + fl; +} + +00000000 subl $0x04,%esp +00000003 movl 0x08(%esp,1),%eax +00000007 movl %eax,%ecx +00000009 shrl $0x10,%ecx +0000000c cvtsi2ss %ecx,%xmm0 +00000010 andl $0x0000ffff,%eax +00000015 cvtsi2ss %eax,%xmm1 +00000019 mulss 0x00000078,%xmm0 +00000021 addss %xmm1,%xmm0 +00000025 movss %xmm0,(%esp,1) +0000002a flds (%esp,1) +0000002d addl $0x04,%esp +00000030 ret + +//===---------------------------------------------------------------------===// + +When using fastcc abi, align stack slot of argument of type double on 8 byte +boundary to improve performance. + +//===---------------------------------------------------------------------===// + +Codegen: + +int f(int a, int b) { + if (a == 4 || a == 6) + b++; + return b; +} + + +as: + +or eax, 2 +cmp eax, 6 +jz label + +//===---------------------------------------------------------------------===// + +GCC's ix86_expand_int_movcc function (in i386.c) has a ton of interesting +simplifications for integer "x cmp y ? a : b". For example, instead of: + +int G; +void f(int X, int Y) { + G = X < 0 ? 14 : 13; +} + +compiling to: + +_f: + movl $14, %eax + movl $13, %ecx + movl 4(%esp), %edx + testl %edx, %edx + cmovl %eax, %ecx + movl %ecx, _G + ret + +it could be: +_f: + movl 4(%esp), %eax + sarl $31, %eax + notl %eax + addl $14, %eax + movl %eax, _G + ret + +etc. + +Another is: +int usesbb(unsigned int a, unsigned int b) { + return (a < b ? -1 : 0); +} +to: +_usesbb: + movl 8(%esp), %eax + cmpl %eax, 4(%esp) + sbbl %eax, %eax + ret + +instead of: +_usesbb: + xorl %eax, %eax + movl 8(%esp), %ecx + cmpl %ecx, 4(%esp) + movl $4294967295, %ecx + cmovb %ecx, %eax + ret + +//===---------------------------------------------------------------------===// + +Currently we don't have elimination of redundant stack manipulations. Consider +the code: + +int %main() { +entry: + call fastcc void %test1( ) + call fastcc void %test2( sbyte* cast (void ()* %test1 to sbyte*) ) + ret int 0 +} + +declare fastcc void %test1() + +declare fastcc void %test2(sbyte*) + + +This currently compiles to: + + subl $16, %esp + call _test5 + addl $12, %esp + subl $16, %esp + movl $_test5, (%esp) + call _test6 + addl $12, %esp + +The add\sub pair is really unneeded here. + +//===---------------------------------------------------------------------===// + +Consider the expansion of: + +define i32 @test3(i32 %X) { + %tmp1 = urem i32 %X, 255 + ret i32 %tmp1 +} + +Currently it compiles to: + +... + movl $2155905153, %ecx + movl 8(%esp), %esi + movl %esi, %eax + mull %ecx +... + +This could be "reassociated" into: + + movl $2155905153, %eax + movl 8(%esp), %ecx + mull %ecx + +to avoid the copy. In fact, the existing two-address stuff would do this +except that mul isn't a commutative 2-addr instruction. I guess this has +to be done at isel time based on the #uses to mul? + +//===---------------------------------------------------------------------===// + +Make sure the instruction which starts a loop does not cross a cacheline +boundary. This requires knowning the exact length of each machine instruction. +That is somewhat complicated, but doable. Example 256.bzip2: + +In the new trace, the hot loop has an instruction which crosses a cacheline +boundary. In addition to potential cache misses, this can't help decoding as I +imagine there has to be some kind of complicated decoder reset and realignment +to grab the bytes from the next cacheline. + +532 532 0x3cfc movb (1809(%esp, %esi), %bl <<<--- spans 2 64 byte lines +942 942 0x3d03 movl %dh, (1809(%esp, %esi) +937 937 0x3d0a incl %esi +3 3 0x3d0b cmpb %bl, %dl +27 27 0x3d0d jnz 0x000062db + +//===---------------------------------------------------------------------===// + +In c99 mode, the preprocessor doesn't like assembly comments like #TRUNCATE. + +//===---------------------------------------------------------------------===// + +This could be a single 16-bit load. + +int f(char *p) { + if ((p[0] == 1) & (p[1] == 2)) return 1; + return 0; +} + +//===---------------------------------------------------------------------===// + +We should inline lrintf and probably other libc functions. + +//===---------------------------------------------------------------------===// + +Start using the flags more. For example, compile: + +int add_zf(int *x, int y, int a, int b) { + if ((*x += y) == 0) + return a; + else + return b; +} + +to: + addl %esi, (%rdi) + movl %edx, %eax + cmovne %ecx, %eax + ret +instead of: + +_add_zf: + addl (%rdi), %esi + movl %esi, (%rdi) + testl %esi, %esi + cmove %edx, %ecx + movl %ecx, %eax + ret + +and: + +int add_zf(int *x, int y, int a, int b) { + if ((*x + y) < 0) + return a; + else + return b; +} + +to: + +add_zf: + addl (%rdi), %esi + movl %edx, %eax + cmovns %ecx, %eax + ret + +instead of: + +_add_zf: + addl (%rdi), %esi + testl %esi, %esi + cmovs %edx, %ecx + movl %ecx, %eax + ret + +//===---------------------------------------------------------------------===// + +These two functions have identical effects: + +unsigned int f(unsigned int i, unsigned int n) {++i; if (i == n) ++i; return i;} +unsigned int f2(unsigned int i, unsigned int n) {++i; i += i == n; return i;} + +We currently compile them to: + +_f: + movl 4(%esp), %eax + movl %eax, %ecx + incl %ecx + movl 8(%esp), %edx + cmpl %edx, %ecx + jne LBB1_2 #UnifiedReturnBlock +LBB1_1: #cond_true + addl $2, %eax + ret +LBB1_2: #UnifiedReturnBlock + movl %ecx, %eax + ret +_f2: + movl 4(%esp), %eax + movl %eax, %ecx + incl %ecx + cmpl 8(%esp), %ecx + sete %cl + movzbl %cl, %ecx + leal 1(%ecx,%eax), %eax + ret + +both of which are inferior to GCC's: + +_f: + movl 4(%esp), %edx + leal 1(%edx), %eax + addl $2, %edx + cmpl 8(%esp), %eax + cmove %edx, %eax + ret +_f2: + movl 4(%esp), %eax + addl $1, %eax + xorl %edx, %edx + cmpl 8(%esp), %eax + sete %dl + addl %edx, %eax + ret + +//===---------------------------------------------------------------------===// + +This code: + +void test(int X) { + if (X) abort(); +} + +is currently compiled to: + +_test: + subl $12, %esp + cmpl $0, 16(%esp) + jne LBB1_1 + addl $12, %esp + ret +LBB1_1: + call L_abort$stub + +It would be better to produce: + +_test: + subl $12, %esp + cmpl $0, 16(%esp) + jne L_abort$stub + addl $12, %esp + ret + +This can be applied to any no-return function call that takes no arguments etc. +Alternatively, the stack save/restore logic could be shrink-wrapped, producing +something like this: + +_test: + cmpl $0, 4(%esp) + jne LBB1_1 + ret +LBB1_1: + subl $12, %esp + call L_abort$stub + +Both are useful in different situations. Finally, it could be shrink-wrapped +and tail called, like this: + +_test: + cmpl $0, 4(%esp) + jne LBB1_1 + ret +LBB1_1: + pop %eax # realign stack. + call L_abort$stub + +Though this probably isn't worth it. + +//===---------------------------------------------------------------------===// + +We need to teach the codegen to convert two-address INC instructions to LEA +when the flags are dead (likewise dec). For example, on X86-64, compile: + +int foo(int A, int B) { + return A+1; +} + +to: + +_foo: + leal 1(%edi), %eax + ret + +instead of: + +_foo: + incl %edi + movl %edi, %eax + ret + +Another example is: + +;; X's live range extends beyond the shift, so the register allocator +;; cannot coalesce it with Y. Because of this, a copy needs to be +;; emitted before the shift to save the register value before it is +;; clobbered. However, this copy is not needed if the register +;; allocator turns the shift into an LEA. This also occurs for ADD. + +; Check that the shift gets turned into an LEA. +; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=intel | \ +; RUN: not grep {mov E.X, E.X} + +@G = external global i32 ; [#uses=3] + +define i32 @test1(i32 %X, i32 %Y) { + %Z = add i32 %X, %Y ; [#uses=1] + volatile store i32 %Y, i32* @G + volatile store i32 %Z, i32* @G + ret i32 %X +} + +define i32 @test2(i32 %X) { + %Z = add i32 %X, 1 ; [#uses=1] + volatile store i32 %Z, i32* @G + ret i32 %X +} + +//===---------------------------------------------------------------------===// + +Sometimes it is better to codegen subtractions from a constant (e.g. 7-x) with +a neg instead of a sub instruction. Consider: + +int test(char X) { return 7-X; } + +we currently produce: +_test: + movl $7, %eax + movsbl 4(%esp), %ecx + subl %ecx, %eax + ret + +We would use one fewer register if codegen'd as: + + movsbl 4(%esp), %eax + neg %eax + add $7, %eax + ret + +Note that this isn't beneficial if the load can be folded into the sub. In +this case, we want a sub: + +int test(int X) { return 7-X; } +_test: + movl $7, %eax + subl 4(%esp), %eax + ret + +//===---------------------------------------------------------------------===// + +Leaf functions that require one 4-byte spill slot have a prolog like this: + +_foo: + pushl %esi + subl $4, %esp +... +and an epilog like this: + addl $4, %esp + popl %esi + ret + +It would be smaller, and potentially faster, to push eax on entry and to +pop into a dummy register instead of using addl/subl of esp. Just don't pop +into any return registers :) + +//===---------------------------------------------------------------------===// + +The X86 backend should fold (branch (or (setcc, setcc))) into multiple +branches. We generate really poor code for: + +double testf(double a) { + return a == 0.0 ? 0.0 : (a > 0.0 ? 1.0 : -1.0); +} + +For example, the entry BB is: + +_testf: + subl $20, %esp + pxor %xmm0, %xmm0 + movsd 24(%esp), %xmm1 + ucomisd %xmm0, %xmm1 + setnp %al + sete %cl + testb %cl, %al + jne LBB1_5 # UnifiedReturnBlock +LBB1_1: # cond_true + + +it would be better to replace the last four instructions with: + + jp LBB1_1 + je LBB1_5 +LBB1_1: + +We also codegen the inner ?: into a diamond: + + cvtss2sd LCPI1_0(%rip), %xmm2 + cvtss2sd LCPI1_1(%rip), %xmm3 + ucomisd %xmm1, %xmm0 + ja LBB1_3 # cond_true +LBB1_2: # cond_true + movapd %xmm3, %xmm2 +LBB1_3: # cond_true + movapd %xmm2, %xmm0 + ret + +We should sink the load into xmm3 into the LBB1_2 block. This should +be pretty easy, and will nuke all the copies. + +//===---------------------------------------------------------------------===// + +This: + #include + inline std::pair full_add(unsigned a, unsigned b) + { return std::make_pair(a + b, a + b < a); } + bool no_overflow(unsigned a, unsigned b) + { return !full_add(a, b).second; } + +Should compile to: + + + _Z11no_overflowjj: + addl %edi, %esi + setae %al + ret + +FIXME: That code looks wrong; bool return is normally defined as zext. + +on x86-64, not: + +__Z11no_overflowjj: + addl %edi, %esi + cmpl %edi, %esi + setae %al + movzbl %al, %eax + ret + + +//===---------------------------------------------------------------------===// + +Re-materialize MOV32r0 etc. with xor instead of changing them to moves if the +condition register is dead. xor reg reg is shorter than mov reg, #0. + +//===---------------------------------------------------------------------===// + +We aren't matching RMW instructions aggressively +enough. Here's a reduced testcase (more in PR1160): + +define void @test(i32* %huge_ptr, i32* %target_ptr) { + %A = load i32* %huge_ptr ; [#uses=1] + %B = load i32* %target_ptr ; [#uses=1] + %C = or i32 %A, %B ; [#uses=1] + store i32 %C, i32* %target_ptr + ret void +} + +$ llvm-as < t.ll | llc -march=x86-64 + +_test: + movl (%rdi), %eax + orl (%rsi), %eax + movl %eax, (%rsi) + ret + +That should be something like: + +_test: + movl (%rdi), %eax + orl %eax, (%rsi) + ret + +//===---------------------------------------------------------------------===// + +The following code: + +bb114.preheader: ; preds = %cond_next94 + %tmp231232 = sext i16 %tmp62 to i32 ; [#uses=1] + %tmp233 = sub i32 32, %tmp231232 ; [#uses=1] + %tmp245246 = sext i16 %tmp65 to i32 ; [#uses=1] + %tmp252253 = sext i16 %tmp68 to i32 ; [#uses=1] + %tmp254 = sub i32 32, %tmp252253 ; [#uses=1] + %tmp553554 = bitcast i16* %tmp37 to i8* ; [#uses=2] + %tmp583584 = sext i16 %tmp98 to i32 ; [#uses=1] + %tmp585 = sub i32 32, %tmp583584 ; [#uses=1] + %tmp614615 = sext i16 %tmp101 to i32 ; [#uses=1] + %tmp621622 = sext i16 %tmp104 to i32 ; [#uses=1] + %tmp623 = sub i32 32, %tmp621622 ; [#uses=1] + br label %bb114 + +produces: + +LBB3_5: # bb114.preheader + movswl -68(%ebp), %eax + movl $32, %ecx + movl %ecx, -80(%ebp) + subl %eax, -80(%ebp) + movswl -52(%ebp), %eax + movl %ecx, -84(%ebp) + subl %eax, -84(%ebp) + movswl -70(%ebp), %eax + movl %ecx, -88(%ebp) + subl %eax, -88(%ebp) + movswl -50(%ebp), %eax + subl %eax, %ecx + movl %ecx, -76(%ebp) + movswl -42(%ebp), %eax + movl %eax, -92(%ebp) + movswl -66(%ebp), %eax + movl %eax, -96(%ebp) + movw $0, -98(%ebp) + +This appears to be bad because the RA is not folding the store to the stack +slot into the movl. The above instructions could be: + movl $32, -80(%ebp) +... + movl $32, -84(%ebp) +... +This seems like a cross between remat and spill folding. + +This has redundant subtractions of %eax from a stack slot. However, %ecx doesn't +change, so we could simply subtract %eax from %ecx first and then use %ecx (or +vice-versa). + +//===---------------------------------------------------------------------===// + +This code: + + %tmp659 = icmp slt i16 %tmp654, 0 ; [#uses=1] + br i1 %tmp659, label %cond_true662, label %cond_next715 + +produces this: + + testw %cx, %cx + movswl %cx, %esi + jns LBB4_109 # cond_next715 + +Shark tells us that using %cx in the testw instruction is sub-optimal. It +suggests using the 32-bit register (which is what ICC uses). + +//===---------------------------------------------------------------------===// + +We compile this: + +void compare (long long foo) { + if (foo < 4294967297LL) + abort(); +} + +to: + +compare: + subl $4, %esp + cmpl $0, 8(%esp) + setne %al + movzbw %al, %ax + cmpl $1, 12(%esp) + setg %cl + movzbw %cl, %cx + cmove %ax, %cx + testb $1, %cl + jne .LBB1_2 # UnifiedReturnBlock +.LBB1_1: # ifthen + call abort +.LBB1_2: # UnifiedReturnBlock + addl $4, %esp + ret + +(also really horrible code on ppc). This is due to the expand code for 64-bit +compares. GCC produces multiple branches, which is much nicer: + +compare: + subl $12, %esp + movl 20(%esp), %edx + movl 16(%esp), %eax + decl %edx + jle .L7 +.L5: + addl $12, %esp + ret + .p2align 4,,7 +.L7: + jl .L4 + cmpl $0, %eax + .p2align 4,,8 + ja .L5 +.L4: + .p2align 4,,9 + call abort + +//===---------------------------------------------------------------------===// + +Tail call optimization improvements: Tail call optimization currently +pushes all arguments on the top of the stack (their normal place for +non-tail call optimized calls) that source from the callers arguments +or that source from a virtual register (also possibly sourcing from +callers arguments). +This is done to prevent overwriting of parameters (see example +below) that might be used later. + +example: + +int callee(int32, int64); +int caller(int32 arg1, int32 arg2) { + int64 local = arg2 * 2; + return callee(arg2, (int64)local); +} + +[arg1] [!arg2 no longer valid since we moved local onto it] +[arg2] -> [(int64) +[RETADDR] local ] + +Moving arg1 onto the stack slot of callee function would overwrite +arg2 of the caller. + +Possible optimizations: + + + - Analyse the actual parameters of the callee to see which would + overwrite a caller parameter which is used by the callee and only + push them onto the top of the stack. + + int callee (int32 arg1, int32 arg2); + int caller (int32 arg1, int32 arg2) { + return callee(arg1,arg2); + } + + Here we don't need to write any variables to the top of the stack + since they don't overwrite each other. + + int callee (int32 arg1, int32 arg2); + int caller (int32 arg1, int32 arg2) { + return callee(arg2,arg1); + } + + Here we need to push the arguments because they overwrite each + other. + +//===---------------------------------------------------------------------===// + +main () +{ + int i = 0; + unsigned long int z = 0; + + do { + z -= 0x00004000; + i++; + if (i > 0x00040000) + abort (); + } while (z > 0); + exit (0); +} + +gcc compiles this to: + +_main: + subl $28, %esp + xorl %eax, %eax + jmp L2 +L3: + cmpl $262144, %eax + je L10 +L2: + addl $1, %eax + cmpl $262145, %eax + jne L3 + call L_abort$stub +L10: + movl $0, (%esp) + call L_exit$stub + +llvm: + +_main: + subl $12, %esp + movl $1, %eax + movl $16384, %ecx +LBB1_1: # bb + cmpl $262145, %eax + jge LBB1_4 # cond_true +LBB1_2: # cond_next + incl %eax + addl $4294950912, %ecx + cmpl $16384, %ecx + jne LBB1_1 # bb +LBB1_3: # bb11 + xorl %eax, %eax + addl $12, %esp + ret +LBB1_4: # cond_true + call L_abort$stub + +1. LSR should rewrite the first cmp with induction variable %ecx. +2. DAG combiner should fold + leal 1(%eax), %edx + cmpl $262145, %edx + => + cmpl $262144, %eax + +//===---------------------------------------------------------------------===// + +define i64 @test(double %X) { + %Y = fptosi double %X to i64 + ret i64 %Y +} + +compiles to: + +_test: + subl $20, %esp + movsd 24(%esp), %xmm0 + movsd %xmm0, 8(%esp) + fldl 8(%esp) + fisttpll (%esp) + movl 4(%esp), %edx + movl (%esp), %eax + addl $20, %esp + #FP_REG_KILL + ret + +This should just fldl directly from the input stack slot. + +//===---------------------------------------------------------------------===// + +This code: +int foo (int x) { return (x & 65535) | 255; } + +Should compile into: + +_foo: + movzwl 4(%esp), %eax + orl $255, %eax + ret + +instead of: +_foo: + movl $255, %eax + orl 4(%esp), %eax + andl $65535, %eax + ret + +//===---------------------------------------------------------------------===// + +We're codegen'ing multiply of long longs inefficiently: + +unsigned long long LLM(unsigned long long arg1, unsigned long long arg2) { + return arg1 * arg2; +} + +We compile to (fomit-frame-pointer): + +_LLM: + pushl %esi + movl 8(%esp), %ecx + movl 16(%esp), %esi + movl %esi, %eax + mull %ecx + imull 12(%esp), %esi + addl %edx, %esi + imull 20(%esp), %ecx + movl %esi, %edx + addl %ecx, %edx + popl %esi + ret + +This looks like a scheduling deficiency and lack of remat of the load from +the argument area. ICC apparently produces: + + movl 8(%esp), %ecx + imull 12(%esp), %ecx + movl 16(%esp), %eax + imull 4(%esp), %eax + addl %eax, %ecx + movl 4(%esp), %eax + mull 12(%esp) + addl %ecx, %edx + ret + +Note that it remat'd loads from 4(esp) and 12(esp). See this GCC PR: +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=17236 + +//===---------------------------------------------------------------------===// + +We can fold a store into "zeroing a reg". Instead of: + +xorl %eax, %eax +movl %eax, 124(%esp) + +we should get: + +movl $0, 124(%esp) + +if the flags of the xor are dead. + +Likewise, we isel "x<<1" into "add reg,reg". If reg is spilled, this should +be folded into: shl [mem], 1 + +//===---------------------------------------------------------------------===// + +This testcase misses a read/modify/write opportunity (from PR1425): + +void vertical_decompose97iH1(int *b0, int *b1, int *b2, int width){ + int i; + for(i=0; i>0; +} + +We compile it down to: + +LBB1_2: # bb + movl (%esi,%edi,4), %ebx + addl (%ecx,%edi,4), %ebx + addl (%edx,%edi,4), %ebx + movl %ebx, (%ecx,%edi,4) + incl %edi + cmpl %eax, %edi + jne LBB1_2 # bb + +the inner loop should add to the memory location (%ecx,%edi,4), saving +a mov. Something like: + + movl (%esi,%edi,4), %ebx + addl (%edx,%edi,4), %ebx + addl %ebx, (%ecx,%edi,4) + +Here is another interesting example: + +void vertical_compose97iH1(int *b0, int *b1, int *b2, int width){ + int i; + for(i=0; i>0; +} + +We miss the r/m/w opportunity here by using 2 subs instead of an add+sub[mem]: + +LBB9_2: # bb + movl (%ecx,%edi,4), %ebx + subl (%esi,%edi,4), %ebx + subl (%edx,%edi,4), %ebx + movl %ebx, (%ecx,%edi,4) + incl %edi + cmpl %eax, %edi + jne LBB9_2 # bb + +Additionally, LSR should rewrite the exit condition of these loops to use +a stride-4 IV, would would allow all the scales in the loop to go away. +This would result in smaller code and more efficient microops. + +//===---------------------------------------------------------------------===// + +In SSE mode, we turn abs and neg into a load from the constant pool plus a xor +or and instruction, for example: + + xorpd LCPI1_0, %xmm2 + +However, if xmm2 gets spilled, we end up with really ugly code like this: + + movsd (%esp), %xmm0 + xorpd LCPI1_0, %xmm0 + movsd %xmm0, (%esp) + +Since we 'know' that this is a 'neg', we can actually "fold" the spill into +the neg/abs instruction, turning it into an *integer* operation, like this: + + xorl 2147483648, [mem+4] ## 2147483648 = (1 << 31) + +you could also use xorb, but xorl is less likely to lead to a partial register +stall. Here is a contrived testcase: + +double a, b, c; +void test(double *P) { + double X = *P; + a = X; + bar(); + X = -X; + b = X; + bar(); + c = X; +} + +//===---------------------------------------------------------------------===// + +handling llvm.memory.barrier on pre SSE2 cpus + +should generate: +lock ; mov %esp, %esp + +//===---------------------------------------------------------------------===// + +The generated code on x86 for checking for signed overflow on a multiply the +obvious way is much longer than it needs to be. + +int x(int a, int b) { + long long prod = (long long)a*b; + return prod > 0x7FFFFFFF || prod < (-0x7FFFFFFF-1); +} + +See PR2053 for more details. + +//===---------------------------------------------------------------------===// + +We should investigate using cdq/ctld (effect: edx = sar eax, 31) +more aggressively; it should cost the same as a move+shift on any modern +processor, but it's a lot shorter. Downside is that it puts more +pressure on register allocation because it has fixed operands. + +Example: +int abs(int x) {return x < 0 ? -x : x;} + +gcc compiles this to the following when using march/mtune=pentium2/3/4/m/etc.: +abs: + movl 4(%esp), %eax + cltd + xorl %edx, %eax + subl %edx, %eax + ret + +//===---------------------------------------------------------------------===// + +Consider: +int test(unsigned long a, unsigned long b) { return -(a < b); } + +We currently compile this to: + +define i32 @test(i32 %a, i32 %b) nounwind { + %tmp3 = icmp ult i32 %a, %b ; [#uses=1] + %tmp34 = zext i1 %tmp3 to i32 ; [#uses=1] + %tmp5 = sub i32 0, %tmp34 ; [#uses=1] + ret i32 %tmp5 +} + +and + +_test: + movl 8(%esp), %eax + cmpl %eax, 4(%esp) + setb %al + movzbl %al, %eax + negl %eax + ret + +Several deficiencies here. First, we should instcombine zext+neg into sext: + +define i32 @test2(i32 %a, i32 %b) nounwind { + %tmp3 = icmp ult i32 %a, %b ; [#uses=1] + %tmp34 = sext i1 %tmp3 to i32 ; [#uses=1] + ret i32 %tmp34 +} + +However, before we can do that, we have to fix the bad codegen that we get for +sext from bool: + +_test2: + movl 8(%esp), %eax + cmpl %eax, 4(%esp) + setb %al + movzbl %al, %eax + shll $31, %eax + sarl $31, %eax + ret + +This code should be at least as good as the code above. Once this is fixed, we +can optimize this specific case even more to: + + movl 8(%esp), %eax + xorl %ecx, %ecx + cmpl %eax, 4(%esp) + sbbl %ecx, %ecx + +//===---------------------------------------------------------------------===// + +Take the following code (from +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=16541): + +extern unsigned char first_one[65536]; +int FirstOnet(unsigned long long arg1) +{ + if (arg1 >> 48) + return (first_one[arg1 >> 48]); + return 0; +} + + +The following code is currently generated: +FirstOnet: + movl 8(%esp), %eax + cmpl $65536, %eax + movl 4(%esp), %ecx + jb .LBB1_2 # UnifiedReturnBlock +.LBB1_1: # ifthen + shrl $16, %eax + movzbl first_one(%eax), %eax + ret +.LBB1_2: # UnifiedReturnBlock + xorl %eax, %eax + ret + +There are a few possible improvements here: +1. We should be able to eliminate the dead load into %ecx +2. We could change the "movl 8(%esp), %eax" into + "movzwl 10(%esp), %eax"; this lets us change the cmpl + into a testl, which is shorter, and eliminate the shift. + +We could also in theory eliminate the branch by using a conditional +for the address of the load, but that seems unlikely to be worthwhile +in general. + +//===---------------------------------------------------------------------===// + +We compile this function: + +define i32 @foo(i32 %a, i32 %b, i32 %c, i8 zeroext %d) nounwind { +entry: + %tmp2 = icmp eq i8 %d, 0 ; [#uses=1] + br i1 %tmp2, label %bb7, label %bb + +bb: ; preds = %entry + %tmp6 = add i32 %b, %a ; [#uses=1] + ret i32 %tmp6 + +bb7: ; preds = %entry + %tmp10 = sub i32 %a, %c ; [#uses=1] + ret i32 %tmp10 +} + +to: + +_foo: + cmpb $0, 16(%esp) + movl 12(%esp), %ecx + movl 8(%esp), %eax + movl 4(%esp), %edx + je LBB1_2 # bb7 +LBB1_1: # bb + addl %edx, %eax + ret +LBB1_2: # bb7 + movl %edx, %eax + subl %ecx, %eax + ret + +The coalescer could coalesce "edx" with "eax" to avoid the movl in LBB1_2 +if it commuted the addl in LBB1_1. + +//===---------------------------------------------------------------------===// + +See rdar://4653682. + +From flops: + +LBB1_15: # bb310 + cvtss2sd LCPI1_0, %xmm1 + addsd %xmm1, %xmm0 + movsd 176(%esp), %xmm2 + mulsd %xmm0, %xmm2 + movapd %xmm2, %xmm3 + mulsd %xmm3, %xmm3 + movapd %xmm3, %xmm4 + mulsd LCPI1_23, %xmm4 + addsd LCPI1_24, %xmm4 + mulsd %xmm3, %xmm4 + addsd LCPI1_25, %xmm4 + mulsd %xmm3, %xmm4 + addsd LCPI1_26, %xmm4 + mulsd %xmm3, %xmm4 + addsd LCPI1_27, %xmm4 + mulsd %xmm3, %xmm4 + addsd LCPI1_28, %xmm4 + mulsd %xmm3, %xmm4 + addsd %xmm1, %xmm4 + mulsd %xmm2, %xmm4 + movsd 152(%esp), %xmm1 + addsd %xmm4, %xmm1 + movsd %xmm1, 152(%esp) + incl %eax + cmpl %eax, %esi + jge LBB1_15 # bb310 +LBB1_16: # bb358.loopexit + movsd 152(%esp), %xmm0 + addsd %xmm0, %xmm0 + addsd LCPI1_22, %xmm0 + movsd %xmm0, 152(%esp) + +Rather than spilling the result of the last addsd in the loop, we should have +insert a copy to split the interval (one for the duration of the loop, one +extending to the fall through). The register pressure in the loop isn't high +enough to warrant the spill. + +Also check why xmm7 is not used at all in the function. + +//===---------------------------------------------------------------------===// + +Legalize loses track of the fact that bools are always zero extended when in +memory. This causes us to compile abort_gzip (from 164.gzip) from: + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" +target triple = "i386-apple-darwin8" +@in_exit.4870.b = internal global i1 false ; [#uses=2] +define fastcc void @abort_gzip() noreturn nounwind { +entry: + %tmp.b.i = load i1* @in_exit.4870.b ; [#uses=1] + br i1 %tmp.b.i, label %bb.i, label %bb4.i +bb.i: ; preds = %entry + tail call void @exit( i32 1 ) noreturn nounwind + unreachable +bb4.i: ; preds = %entry + store i1 true, i1* @in_exit.4870.b + tail call void @exit( i32 1 ) noreturn nounwind + unreachable +} +declare void @exit(i32) noreturn nounwind + +into: + +_abort_gzip: + subl $12, %esp + movb _in_exit.4870.b, %al + notb %al + testb $1, %al + jne LBB1_2 ## bb4.i +LBB1_1: ## bb.i + ... + +//===---------------------------------------------------------------------===// + +We compile: + +int test(int x, int y) { + return x-y-1; +} + +into (-m64): + +_test: + decl %edi + movl %edi, %eax + subl %esi, %eax + ret + +it would be better to codegen as: x+~y (notl+addl) + +//===---------------------------------------------------------------------===// + +This code: + +int foo(const char *str,...) +{ + __builtin_va_list a; int x; + __builtin_va_start(a,str); x = __builtin_va_arg(a,int); __builtin_va_end(a); + return x; +} + +gets compiled into this on x86-64: + subq $200, %rsp + movaps %xmm7, 160(%rsp) + movaps %xmm6, 144(%rsp) + movaps %xmm5, 128(%rsp) + movaps %xmm4, 112(%rsp) + movaps %xmm3, 96(%rsp) + movaps %xmm2, 80(%rsp) + movaps %xmm1, 64(%rsp) + movaps %xmm0, 48(%rsp) + movq %r9, 40(%rsp) + movq %r8, 32(%rsp) + movq %rcx, 24(%rsp) + movq %rdx, 16(%rsp) + movq %rsi, 8(%rsp) + leaq (%rsp), %rax + movq %rax, 192(%rsp) + leaq 208(%rsp), %rax + movq %rax, 184(%rsp) + movl $48, 180(%rsp) + movl $8, 176(%rsp) + movl 176(%rsp), %eax + cmpl $47, %eax + jbe .LBB1_3 # bb +.LBB1_1: # bb3 + movq 184(%rsp), %rcx + leaq 8(%rcx), %rax + movq %rax, 184(%rsp) +.LBB1_2: # bb4 + movl (%rcx), %eax + addq $200, %rsp + ret +.LBB1_3: # bb + movl %eax, %ecx + addl $8, %eax + addq 192(%rsp), %rcx + movl %eax, 176(%rsp) + jmp .LBB1_2 # bb4 + +gcc 4.3 generates: + subq $96, %rsp +.LCFI0: + leaq 104(%rsp), %rax + movq %rsi, -80(%rsp) + movl $8, -120(%rsp) + movq %rax, -112(%rsp) + leaq -88(%rsp), %rax + movq %rax, -104(%rsp) + movl $8, %eax + cmpl $48, %eax + jb .L6 + movq -112(%rsp), %rdx + movl (%rdx), %eax + addq $96, %rsp + ret + .p2align 4,,10 + .p2align 3 +.L6: + mov %eax, %edx + addq -104(%rsp), %rdx + addl $8, %eax + movl %eax, -120(%rsp) + movl (%rdx), %eax + addq $96, %rsp + ret + +and it gets compiled into this on x86: + pushl %ebp + movl %esp, %ebp + subl $4, %esp + leal 12(%ebp), %eax + movl %eax, -4(%ebp) + leal 16(%ebp), %eax + movl %eax, -4(%ebp) + movl 12(%ebp), %eax + addl $4, %esp + popl %ebp + ret + +gcc 4.3 generates: + pushl %ebp + movl %esp, %ebp + movl 12(%ebp), %eax + popl %ebp + ret + +//===---------------------------------------------------------------------===// + +Teach tblgen not to check bitconvert source type in some cases. This allows us +to consolidate the following patterns in X86InstrMMX.td: + +def : Pat<(v2i32 (bitconvert (i64 (vector_extract (v2i64 VR128:$src), + (iPTR 0))))), + (v2i32 (MMX_MOVDQ2Qrr VR128:$src))>; +def : Pat<(v4i16 (bitconvert (i64 (vector_extract (v2i64 VR128:$src), + (iPTR 0))))), + (v4i16 (MMX_MOVDQ2Qrr VR128:$src))>; +def : Pat<(v8i8 (bitconvert (i64 (vector_extract (v2i64 VR128:$src), + (iPTR 0))))), + (v8i8 (MMX_MOVDQ2Qrr VR128:$src))>; + +There are other cases in various td files. + +//===---------------------------------------------------------------------===// + +Take something like the following on x86-32: +unsigned a(unsigned long long x, unsigned y) {return x % y;} + +We currently generate a libcall, but we really shouldn't: the expansion is +shorter and likely faster than the libcall. The expected code is something +like the following: + + movl 12(%ebp), %eax + movl 16(%ebp), %ecx + xorl %edx, %edx + divl %ecx + movl 8(%ebp), %eax + divl %ecx + movl %edx, %eax + ret + +A similar code sequence works for division. + +//===---------------------------------------------------------------------===// + +These should compile to the same code, but the later codegen's to useless +instructions on X86. This may be a trivial dag combine (GCC PR7061): + +struct s1 { unsigned char a, b; }; +unsigned long f1(struct s1 x) { + return x.a + x.b; +} +struct s2 { unsigned a: 8, b: 8; }; +unsigned long f2(struct s2 x) { + return x.a + x.b; +} + +//===---------------------------------------------------------------------===// + +We currently compile this: + +define i32 @func1(i32 %v1, i32 %v2) nounwind { +entry: + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2) + %sum = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + br i1 %obit, label %overflow, label %normal +normal: + ret i32 %sum +overflow: + call void @llvm.trap() + unreachable +} +declare {i32, i1} @llvm.sadd.with.overflow.i32(i32, i32) +declare void @llvm.trap() + +to: + +_func1: + movl 4(%esp), %eax + addl 8(%esp), %eax + jo LBB1_2 ## overflow +LBB1_1: ## normal + ret +LBB1_2: ## overflow + ud2 + +it would be nice to produce "into" someday. + +//===---------------------------------------------------------------------===// + +This code: + +void vec_mpys1(int y[], const int x[], int scaler) { +int i; +for (i = 0; i < 150; i++) + y[i] += (((long long)scaler * (long long)x[i]) >> 31); +} + +Compiles to this loop with GCC 3.x: + +.L5: + movl %ebx, %eax + imull (%edi,%ecx,4) + shrdl $31, %edx, %eax + addl %eax, (%esi,%ecx,4) + incl %ecx + cmpl $149, %ecx + jle .L5 + +llvm-gcc compiles it to the much uglier: + +LBB1_1: ## bb1 + movl 24(%esp), %eax + movl (%eax,%edi,4), %ebx + movl %ebx, %ebp + imull %esi, %ebp + movl %ebx, %eax + mull %ecx + addl %ebp, %edx + sarl $31, %ebx + imull %ecx, %ebx + addl %edx, %ebx + shldl $1, %eax, %ebx + movl 20(%esp), %eax + addl %ebx, (%eax,%edi,4) + incl %edi + cmpl $150, %edi + jne LBB1_1 ## bb1 + +//===---------------------------------------------------------------------===// + +Test instructions can be eliminated by using EFLAGS values from arithmetic +instructions. This is currently not done for mul, and, or, xor, neg, shl, +sra, srl, shld, shrd, atomic ops, and others. It is also currently not done +for read-modify-write instructions. It is also current not done if the +OF or CF flags are needed. + +The shift operators have the complication that when the shift count is +zero, EFLAGS is not set, so they can only subsume a test instruction if +the shift count is known to be non-zero. Also, using the EFLAGS value +from a shift is apparently very slow on some x86 implementations. + +In read-modify-write instructions, the root node in the isel match is +the store, and isel has no way for the use of the EFLAGS result of the +arithmetic to be remapped to the new node. + +Add and subtract instructions set OF on signed overflow and CF on unsiged +overflow, while test instructions always clear OF and CF. In order to +replace a test with an add or subtract in a situation where OF or CF is +needed, codegen must be able to prove that the operation cannot see +signed or unsigned overflow, respectively. + +//===---------------------------------------------------------------------===// + +memcpy/memmove do not lower to SSE copies when possible. A silly example is: +define <16 x float> @foo(<16 x float> %A) nounwind { + %tmp = alloca <16 x float>, align 16 + %tmp2 = alloca <16 x float>, align 16 + store <16 x float> %A, <16 x float>* %tmp + %s = bitcast <16 x float>* %tmp to i8* + %s2 = bitcast <16 x float>* %tmp2 to i8* + call void @llvm.memcpy.i64(i8* %s, i8* %s2, i64 64, i32 16) + %R = load <16 x float>* %tmp2 + ret <16 x float> %R +} + +declare void @llvm.memcpy.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind + +which compiles to: + +_foo: + subl $140, %esp + movaps %xmm3, 112(%esp) + movaps %xmm2, 96(%esp) + movaps %xmm1, 80(%esp) + movaps %xmm0, 64(%esp) + movl 60(%esp), %eax + movl %eax, 124(%esp) + movl 56(%esp), %eax + movl %eax, 120(%esp) + movl 52(%esp), %eax + + movaps (%esp), %xmm0 + movaps 16(%esp), %xmm1 + movaps 32(%esp), %xmm2 + movaps 48(%esp), %xmm3 + addl $140, %esp + ret + +On Nehalem, it may even be cheaper to just use movups when unaligned than to +fall back to lower-granularity chunks. + +//===---------------------------------------------------------------------===// + +Implement processor-specific optimizations for parity with GCC on these +processors. GCC does two optimizations: + +1. ix86_pad_returns inserts a noop before ret instructions if immediately + preceeded by a conditional branch or is the target of a jump. +2. ix86_avoid_jump_misspredicts inserts noops in cases where a 16-byte block of + code contains more than 3 branches. + +The first one is done for all AMDs, Core2, and "Generic" +The second one is done for: Atom, Pentium Pro, all AMDs, Pentium 4, Nocona, + Core 2, and "Generic" + +//===---------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h new file mode 100644 index 000000000000..fd13b02e1367 --- /dev/null +++ b/lib/Target/X86/X86.h @@ -0,0 +1,84 @@ +//===-- X86.h - Top-level interface for X86 representation ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the entry points for global functions defined in the x86 +// target library, as used by the LLVM JIT. +// +//===----------------------------------------------------------------------===// + +#ifndef TARGET_X86_H +#define TARGET_X86_H + +#include "llvm/Target/TargetMachine.h" + +namespace llvm { + +class X86TargetMachine; +class FunctionPass; +class MachineCodeEmitter; +class JITCodeEmitter; +class raw_ostream; + +/// createX86ISelDag - This pass converts a legalized DAG into a +/// X86-specific DAG, ready for instruction scheduling. +/// +FunctionPass *createX86ISelDag(X86TargetMachine &TM, + CodeGenOpt::Level OptLevel); + +/// createX86FloatingPointStackifierPass - This function returns a pass which +/// converts floating point register references and pseudo instructions into +/// floating point stack references and physical instructions. +/// +FunctionPass *createX86FloatingPointStackifierPass(); + +/// createX87FPRegKillInserterPass - This function returns a pass which +/// inserts FP_REG_KILL instructions where needed. +/// +FunctionPass *createX87FPRegKillInserterPass(); + +/// createX86CodePrinterPass - Returns a pass that prints the X86 +/// assembly code for a MachineFunction to the given output stream, +/// using the given target machine description. +/// +FunctionPass *createX86CodePrinterPass(raw_ostream &o, + X86TargetMachine &tm, + CodeGenOpt::Level OptLevel, + bool Verbose); + +/// createX86CodeEmitterPass - Return a pass that emits the collected X86 code +/// to the specified MCE object. + +FunctionPass *createX86CodeEmitterPass(X86TargetMachine &TM, + MachineCodeEmitter &MCE); +FunctionPass *createX86JITCodeEmitterPass(X86TargetMachine &TM, + JITCodeEmitter &JCE); + +/// createX86EmitCodeToMemory - Returns a pass that converts a register +/// allocated function into raw machine code in a dynamically +/// allocated chunk of memory. +/// +FunctionPass *createEmitX86CodeToMemory(); + +/// createX86MaxStackAlignmentCalculatorPass - This function returns a pass +/// which calculates maximal stack alignment required for function +/// +FunctionPass *createX86MaxStackAlignmentCalculatorPass(); + +} // End llvm namespace + +// Defines symbolic names for X86 registers. This defines a mapping from +// register name to register number. +// +#include "X86GenRegisterNames.inc" + +// Defines symbolic names for the X86 instructions. +// +#include "X86GenInstrNames.inc" + +#endif diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td new file mode 100644 index 000000000000..8df138d8d7a7 --- /dev/null +++ b/lib/Target/X86/X86.td @@ -0,0 +1,184 @@ +//===- X86.td - Target definition file for the Intel X86 ---*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is a target description file for the Intel i386 architecture, refered to +// here as the "X86" architecture. +// +//===----------------------------------------------------------------------===// + +// Get the target-independent interfaces which we are implementing... +// +include "llvm/Target/Target.td" + +//===----------------------------------------------------------------------===// +// X86 Subtarget features. +//===----------------------------------------------------------------------===// + +def FeatureMMX : SubtargetFeature<"mmx","X86SSELevel", "MMX", + "Enable MMX instructions">; +def FeatureSSE1 : SubtargetFeature<"sse", "X86SSELevel", "SSE1", + "Enable SSE instructions", + [FeatureMMX]>; +def FeatureSSE2 : SubtargetFeature<"sse2", "X86SSELevel", "SSE2", + "Enable SSE2 instructions", + [FeatureSSE1]>; +def FeatureSSE3 : SubtargetFeature<"sse3", "X86SSELevel", "SSE3", + "Enable SSE3 instructions", + [FeatureSSE2]>; +def FeatureSSSE3 : SubtargetFeature<"ssse3", "X86SSELevel", "SSSE3", + "Enable SSSE3 instructions", + [FeatureSSE3]>; +def FeatureSSE41 : SubtargetFeature<"sse41", "X86SSELevel", "SSE41", + "Enable SSE 4.1 instructions", + [FeatureSSSE3]>; +def FeatureSSE42 : SubtargetFeature<"sse42", "X86SSELevel", "SSE42", + "Enable SSE 4.2 instructions", + [FeatureSSE41]>; +def Feature3DNow : SubtargetFeature<"3dnow", "X863DNowLevel", "ThreeDNow", + "Enable 3DNow! instructions">; +def Feature3DNowA : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA", + "Enable 3DNow! Athlon instructions", + [Feature3DNow]>; +// All x86-64 hardware has SSE2, but we don't mark SSE2 as an implied +// feature, because SSE2 can be disabled (e.g. for compiling OS kernels) +// without disabling 64-bit mode. +def Feature64Bit : SubtargetFeature<"64bit", "HasX86_64", "true", + "Support 64-bit instructions">; +def FeatureSlowBTMem : SubtargetFeature<"slow-bt-mem", "IsBTMemSlow", "true", + "Bit testing of memory is slow">; +def FeatureSSE4A : SubtargetFeature<"sse4a", "HasSSE4A", "true", + "Support SSE 4a instructions">; + +//===----------------------------------------------------------------------===// +// X86 processors supported. +//===----------------------------------------------------------------------===// + +class Proc Features> + : Processor; + +def : Proc<"generic", []>; +def : Proc<"i386", []>; +def : Proc<"i486", []>; +def : Proc<"i586", []>; +def : Proc<"pentium", []>; +def : Proc<"pentium-mmx", [FeatureMMX]>; +def : Proc<"i686", []>; +def : Proc<"pentiumpro", []>; +def : Proc<"pentium2", [FeatureMMX]>; +def : Proc<"pentium3", [FeatureSSE1]>; +def : Proc<"pentium-m", [FeatureSSE2, FeatureSlowBTMem]>; +def : Proc<"pentium4", [FeatureSSE2]>; +def : Proc<"x86-64", [FeatureSSE2, Feature64Bit, FeatureSlowBTMem]>; +def : Proc<"yonah", [FeatureSSE3, FeatureSlowBTMem]>; +def : Proc<"prescott", [FeatureSSE3, FeatureSlowBTMem]>; +def : Proc<"nocona", [FeatureSSE3, Feature64Bit, FeatureSlowBTMem]>; +def : Proc<"core2", [FeatureSSSE3, Feature64Bit, FeatureSlowBTMem]>; +def : Proc<"penryn", [FeatureSSE41, Feature64Bit, FeatureSlowBTMem]>; +def : Proc<"atom", [FeatureSSE3, Feature64Bit, FeatureSlowBTMem]>; +def : Proc<"corei7", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem]>; + +def : Proc<"k6", [FeatureMMX]>; +def : Proc<"k6-2", [FeatureMMX, Feature3DNow]>; +def : Proc<"k6-3", [FeatureMMX, Feature3DNow]>; +def : Proc<"athlon", [FeatureMMX, Feature3DNowA, FeatureSlowBTMem]>; +def : Proc<"athlon-tbird", [FeatureMMX, Feature3DNowA, FeatureSlowBTMem]>; +def : Proc<"athlon-4", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem]>; +def : Proc<"athlon-xp", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem]>; +def : Proc<"athlon-mp", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem]>; +def : Proc<"k8", [FeatureSSE2, Feature3DNowA, Feature64Bit, + FeatureSlowBTMem]>; +def : Proc<"opteron", [FeatureSSE2, Feature3DNowA, Feature64Bit, + FeatureSlowBTMem]>; +def : Proc<"athlon64", [FeatureSSE2, Feature3DNowA, Feature64Bit, + FeatureSlowBTMem]>; +def : Proc<"athlon-fx", [FeatureSSE2, Feature3DNowA, Feature64Bit, + FeatureSlowBTMem]>; +def : Proc<"k8-sse3", [FeatureSSE3, Feature3DNowA, Feature64Bit, + FeatureSlowBTMem]>; +def : Proc<"opteron-sse3", [FeatureSSE3, Feature3DNowA, Feature64Bit, + FeatureSlowBTMem]>; +def : Proc<"athlon64-sse3", [FeatureSSE3, Feature3DNowA, Feature64Bit, + FeatureSlowBTMem]>; +def : Proc<"amdfam10", [FeatureSSE3, FeatureSSE4A, + Feature3DNowA, Feature64Bit, FeatureSlowBTMem]>; +def : Proc<"barcelona", [FeatureSSE3, FeatureSSE4A, + Feature3DNowA, Feature64Bit, FeatureSlowBTMem]>; + +def : Proc<"winchip-c6", [FeatureMMX]>; +def : Proc<"winchip2", [FeatureMMX, Feature3DNow]>; +def : Proc<"c3", [FeatureMMX, Feature3DNow]>; +def : Proc<"c3-2", [FeatureSSE1]>; + +//===----------------------------------------------------------------------===// +// Register File Description +//===----------------------------------------------------------------------===// + +include "X86RegisterInfo.td" + +//===----------------------------------------------------------------------===// +// Instruction Descriptions +//===----------------------------------------------------------------------===// + +include "X86InstrInfo.td" + +def X86InstrInfo : InstrInfo { + + // Define how we want to layout our TargetSpecific information field... This + // should be kept up-to-date with the fields in the X86InstrInfo.h file. + let TSFlagsFields = ["FormBits", + "hasOpSizePrefix", + "hasAdSizePrefix", + "Prefix", + "hasREX_WPrefix", + "ImmTypeBits", + "FPFormBits", + "hasLockPrefix", + "SegOvrBits", + "Opcode"]; + let TSFlagsShifts = [0, + 6, + 7, + 8, + 12, + 13, + 16, + 19, + 20, + 24]; +} + +//===----------------------------------------------------------------------===// +// Calling Conventions +//===----------------------------------------------------------------------===// + +include "X86CallingConv.td" + + +//===----------------------------------------------------------------------===// +// Assembly Printers +//===----------------------------------------------------------------------===// + +// The X86 target supports two different syntaxes for emitting machine code. +// This is controlled by the -x86-asm-syntax={att|intel} +def ATTAsmWriter : AsmWriter { + string AsmWriterClassName = "ATTAsmPrinter"; + int Variant = 0; +} +def IntelAsmWriter : AsmWriter { + string AsmWriterClassName = "IntelAsmPrinter"; + int Variant = 1; +} + + +def X86 : Target { + // Information about the instructions... + let InstructionSet = X86InstrInfo; + + let AssemblyWriters = [ATTAsmWriter, IntelAsmWriter]; +} diff --git a/lib/Target/X86/X86COFF.h b/lib/Target/X86/X86COFF.h new file mode 100644 index 000000000000..0a8e4e6ac6db --- /dev/null +++ b/lib/Target/X86/X86COFF.h @@ -0,0 +1,95 @@ +//===--- X86COFF.h - Some definitions from COFF documentations ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file just defines some symbols found in COFF documentation. They are +// used to emit function type information for COFF targets (Cygwin/Mingw32). +// +//===----------------------------------------------------------------------===// + +#ifndef X86COFF_H +#define X86COFF_H + +namespace COFF +{ +/// Storage class tells where and what the symbol represents +enum StorageClass { + C_EFCN = -1, ///< Physical end of function + C_NULL = 0, ///< No symbol + C_AUTO = 1, ///< External definition + C_EXT = 2, ///< External symbol + C_STAT = 3, ///< Static + C_REG = 4, ///< Register variable + C_EXTDEF = 5, ///< External definition + C_LABEL = 6, ///< Label + C_ULABEL = 7, ///< Undefined label + C_MOS = 8, ///< Member of structure + C_ARG = 9, ///< Function argument + C_STRTAG = 10, ///< Structure tag + C_MOU = 11, ///< Member of union + C_UNTAG = 12, ///< Union tag + C_TPDEF = 13, ///< Type definition + C_USTATIC = 14, ///< Undefined static + C_ENTAG = 15, ///< Enumeration tag + C_MOE = 16, ///< Member of enumeration + C_REGPARM = 17, ///< Register parameter + C_FIELD = 18, ///< Bit field + + C_BLOCK = 100, ///< ".bb" or ".eb" - beginning or end of block + C_FCN = 101, ///< ".bf" or ".ef" - beginning or end of function + C_EOS = 102, ///< End of structure + C_FILE = 103, ///< File name + C_LINE = 104, ///< Line number, reformatted as symbol + C_ALIAS = 105, ///< Duplicate tag + C_HIDDEN = 106 ///< External symbol in dmert public lib +}; + +/// The type of the symbol. This is made up of a base type and a derived type. +/// For example, pointer to int is "pointer to T" and "int" +enum SymbolType { + T_NULL = 0, ///< No type info + T_ARG = 1, ///< Void function argument (only used by compiler) + T_VOID = 1, ///< The same as above. Just named differently in some specs. + T_CHAR = 2, ///< Character + T_SHORT = 3, ///< Short integer + T_INT = 4, ///< Integer + T_LONG = 5, ///< Long integer + T_FLOAT = 6, ///< Floating point + T_DOUBLE = 7, ///< Double word + T_STRUCT = 8, ///< Structure + T_UNION = 9, ///< Union + T_ENUM = 10, ///< Enumeration + T_MOE = 11, ///< Member of enumeration + T_UCHAR = 12, ///< Unsigned character + T_USHORT = 13, ///< Unsigned short + T_UINT = 14, ///< Unsigned integer + T_ULONG = 15 ///< Unsigned long +}; + +/// Derived type of symbol +enum SymbolDerivedType { + DT_NON = 0, ///< No derived type + DT_PTR = 1, ///< Pointer to T + DT_FCN = 2, ///< Function returning T + DT_ARY = 3 ///< Array of T +}; + +/// Masks for extracting parts of type +enum SymbolTypeMasks { + N_BTMASK = 017, ///< Mask for base type + N_TMASK = 060 ///< Mask for derived type +}; + +/// Offsets of parts of type +enum Shifts { + N_BTSHFT = 4 ///< Type is formed as (base + derived << N_BTSHIFT) +}; + +} + +#endif // X86COFF_H diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td new file mode 100644 index 000000000000..7f99203a8333 --- /dev/null +++ b/lib/Target/X86/X86CallingConv.td @@ -0,0 +1,360 @@ +//===- X86CallingConv.td - Calling Conventions X86 32/64 ---*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This describes the calling conventions for the X86-32 and X86-64 +// architectures. +// +//===----------------------------------------------------------------------===// + +/// CCIfSubtarget - Match if the current subtarget has a feature F. +class CCIfSubtarget + : CCIf().", F), A>; + +//===----------------------------------------------------------------------===// +// Return Value Calling Conventions +//===----------------------------------------------------------------------===// + +// Return-value conventions common to all X86 CC's. +def RetCC_X86Common : CallingConv<[ + // Scalar values are returned in AX first, then DX. For i8, the ABI + // requires the values to be in AL and AH, however this code uses AL and DL + // instead. This is because using AH for the second register conflicts with + // the way LLVM does multiple return values -- a return of {i16,i8} would end + // up in AX and AH, which overlap. Front-ends wishing to conform to the ABI + // for functions that return two i8 values are currently expected to pack the + // values into an i16 (which uses AX, and thus AL:AH). + CCIfType<[i8] , CCAssignToReg<[AL, DL]>>, + CCIfType<[i16], CCAssignToReg<[AX, DX]>>, + CCIfType<[i32], CCAssignToReg<[EAX, EDX]>>, + CCIfType<[i64], CCAssignToReg<[RAX, RDX]>>, + + // Vector types are returned in XMM0 and XMM1, when they fit. XMMM2 and XMM3 + // can only be used by ABI non-compliant code. If the target doesn't have XMM + // registers, it won't have vector types. + CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], + CCAssignToReg<[XMM0,XMM1,XMM2,XMM3]>>, + + // MMX vector types are always returned in MM0. If the target doesn't have + // MM0, it doesn't support these vector types. + CCIfType<[v8i8, v4i16, v2i32, v1i64, v2f32], CCAssignToReg<[MM0]>>, + + // Long double types are always returned in ST0 (even with SSE). + CCIfType<[f80], CCAssignToReg<[ST0, ST1]>> +]>; + +// X86-32 C return-value convention. +def RetCC_X86_32_C : CallingConv<[ + // The X86-32 calling convention returns FP values in ST0, unless marked + // with "inreg" (used here to distinguish one kind of reg from another, + // weirdly; this is really the sse-regparm calling convention) in which + // case they use XMM0, otherwise it is the same as the common X86 calling + // conv. + CCIfInReg>>>, + CCIfType<[f32,f64], CCAssignToReg<[ST0, ST1]>>, + CCDelegateTo +]>; + +// X86-32 FastCC return-value convention. +def RetCC_X86_32_Fast : CallingConv<[ + // The X86-32 fastcc returns 1, 2, or 3 FP values in XMM0-2 if the target has + // SSE2, otherwise it is the the C calling conventions. + // This can happen when a float, 2 x float, or 3 x float vector is split by + // target lowering, and is returned in 1-3 sse regs. + CCIfType<[f32], CCIfSubtarget<"hasSSE2()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>, + CCIfType<[f64], CCIfSubtarget<"hasSSE2()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>, + CCDelegateTo +]>; + +// X86-64 C return-value convention. +def RetCC_X86_64_C : CallingConv<[ + // The X86-64 calling convention always returns FP values in XMM0. + CCIfType<[f32], CCAssignToReg<[XMM0, XMM1]>>, + CCIfType<[f64], CCAssignToReg<[XMM0, XMM1]>>, + + // MMX vector types are always returned in XMM0 except for v1i64 which is + // returned in RAX. This disagrees with ABI documentation but is bug + // compatible with gcc. + CCIfType<[v1i64], CCAssignToReg<[RAX]>>, + CCIfType<[v8i8, v4i16, v2i32, v2f32], CCAssignToReg<[XMM0, XMM1]>>, + CCDelegateTo +]>; + +// X86-Win64 C return-value convention. +def RetCC_X86_Win64_C : CallingConv<[ + // The X86-Win64 calling convention always returns __m64 values in RAX. + CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToReg<[RAX]>>, + + // And FP in XMM0 only. + CCIfType<[f32], CCAssignToReg<[XMM0]>>, + CCIfType<[f64], CCAssignToReg<[XMM0]>>, + + // Otherwise, everything is the same as 'normal' X86-64 C CC. + CCDelegateTo +]>; + + +// This is the root return-value convention for the X86-32 backend. +def RetCC_X86_32 : CallingConv<[ + // If FastCC, use RetCC_X86_32_Fast. + CCIfCC<"CallingConv::Fast", CCDelegateTo>, + // Otherwise, use RetCC_X86_32_C. + CCDelegateTo +]>; + +// This is the root return-value convention for the X86-64 backend. +def RetCC_X86_64 : CallingConv<[ + // Mingw64 and native Win64 use Win64 CC + CCIfSubtarget<"isTargetWin64()", CCDelegateTo>, + + // Otherwise, drop to normal X86-64 CC + CCDelegateTo +]>; + +// This is the return-value convention used for the entire X86 backend. +def RetCC_X86 : CallingConv<[ + CCIfSubtarget<"is64Bit()", CCDelegateTo>, + CCDelegateTo +]>; + +//===----------------------------------------------------------------------===// +// X86-64 Argument Calling Conventions +//===----------------------------------------------------------------------===// + +def CC_X86_64_C : CallingConv<[ + // Handles byval parameters. + CCIfByVal>, + + // Promote i8/i16 arguments to i32. + CCIfType<[i8, i16], CCPromoteToType>, + + // The 'nest' parameter, if any, is passed in R10. + CCIfNest>, + + // The first 6 integer arguments are passed in integer registers. + CCIfType<[i32], CCAssignToReg<[EDI, ESI, EDX, ECX, R8D, R9D]>>, + CCIfType<[i64], CCAssignToReg<[RDI, RSI, RDX, RCX, R8 , R9 ]>>, + + // The first 8 FP/Vector arguments are passed in XMM registers. + CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], + CCIfSubtarget<"hasSSE1()", + CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>, + + // The first 8 MMX (except for v1i64) vector arguments are passed in XMM + // registers on Darwin. + CCIfType<[v8i8, v4i16, v2i32, v2f32], + CCIfSubtarget<"isTargetDarwin()", + CCIfSubtarget<"hasSSE2()", + CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>>, + + // The first 8 v1i64 vector arguments are passed in GPRs on Darwin. + CCIfType<[v1i64], + CCIfSubtarget<"isTargetDarwin()", + CCAssignToReg<[RDI, RSI, RDX, RCX, R8]>>>, + + // Integer/FP values get stored in stack slots that are 8 bytes in size and + // 8-byte aligned if there are no more registers to hold them. + CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>, + + // Long doubles get stack slots whose size and alignment depends on the + // subtarget. + CCIfType<[f80], CCAssignToStack<0, 0>>, + + // Vectors get 16-byte stack slots that are 16-byte aligned. + CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>>, + + // __m64 vectors get 8-byte stack slots that are 8-byte aligned. + CCIfType<[v8i8, v4i16, v2i32, v1i64, v2f32], CCAssignToStack<8, 8>> +]>; + +// Calling convention used on Win64 +def CC_X86_Win64_C : CallingConv<[ + // FIXME: Handle byval stuff. + // FIXME: Handle varargs. + + // Promote i8/i16 arguments to i32. + CCIfType<[i8, i16], CCPromoteToType>, + + // The 'nest' parameter, if any, is passed in R10. + CCIfNest>, + + // The first 4 integer arguments are passed in integer registers. + CCIfType<[i32], CCAssignToRegWithShadow<[ECX , EDX , R8D , R9D ], + [XMM0, XMM1, XMM2, XMM3]>>, + CCIfType<[i64], CCAssignToRegWithShadow<[RCX , RDX , R8 , R9 ], + [XMM0, XMM1, XMM2, XMM3]>>, + + // The first 4 FP/Vector arguments are passed in XMM registers. + CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], + CCAssignToRegWithShadow<[XMM0, XMM1, XMM2, XMM3], + [RCX , RDX , R8 , R9 ]>>, + + // The first 4 MMX vector arguments are passed in GPRs. + CCIfType<[v8i8, v4i16, v2i32, v1i64, v2f32], + CCAssignToRegWithShadow<[RCX , RDX , R8 , R9 ], + [XMM0, XMM1, XMM2, XMM3]>>, + + // Integer/FP values get stored in stack slots that are 8 bytes in size and + // 16-byte aligned if there are no more registers to hold them. + CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 16>>, + + // Long doubles get stack slots whose size and alignment depends on the + // subtarget. + CCIfType<[f80], CCAssignToStack<0, 0>>, + + // Vectors get 16-byte stack slots that are 16-byte aligned. + CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>>, + + // __m64 vectors get 8-byte stack slots that are 16-byte aligned. + CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 16>> +]>; + +// Tail call convention (fast): One register is reserved for target address, +// namely R9 +def CC_X86_64_TailCall : CallingConv<[ + // Handles byval parameters. + CCIfByVal>, + + // Promote i8/i16 arguments to i32. + CCIfType<[i8, i16], CCPromoteToType>, + + // The 'nest' parameter, if any, is passed in R10. + CCIfNest>, + + // The first 6 integer arguments are passed in integer registers. + CCIfType<[i32], CCAssignToReg<[EDI, ESI, EDX, ECX, R8D]>>, + CCIfType<[i64], CCAssignToReg<[RDI, RSI, RDX, RCX, R8]>>, + + // The first 8 FP/Vector arguments are passed in XMM registers. + CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], + CCIfSubtarget<"hasSSE1()", + CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>, + + // The first 8 MMX (except for v1i64) vector arguments are passed in XMM + // registers on Darwin. + CCIfType<[v8i8, v4i16, v2i32, v2f32], + CCIfSubtarget<"isTargetDarwin()", + CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>, + + // The first 8 v1i64 vector arguments are passed in GPRs on Darwin. + CCIfType<[v1i64], + CCIfSubtarget<"isTargetDarwin()", + CCAssignToReg<[RDI, RSI, RDX, RCX, R8]>>>, + + // Integer/FP values get stored in stack slots that are 8 bytes in size and + // 8-byte aligned if there are no more registers to hold them. + CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>, + + // Vectors get 16-byte stack slots that are 16-byte aligned. + CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>>, + + // __m64 vectors get 8-byte stack slots that are 8-byte aligned. + CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 8>> +]>; + + +//===----------------------------------------------------------------------===// +// X86 C Calling Convention +//===----------------------------------------------------------------------===// + +/// CC_X86_32_Common - In all X86-32 calling conventions, extra integers and FP +/// values are spilled on the stack, and the first 4 vector values go in XMM +/// regs. +def CC_X86_32_Common : CallingConv<[ + // Handles byval parameters. + CCIfByVal>, + + // The first 3 float or double arguments, if marked 'inreg' and if the call + // is not a vararg call and if SSE2 is available, are passed in SSE registers. + CCIfNotVarArg>>>>, + + // The first 3 __m64 (except for v1i64) vector arguments are passed in mmx + // registers if the call is not a vararg call. + CCIfNotVarArg>>, + + // Integer/Float values get stored in stack slots that are 4 bytes in + // size and 4-byte aligned. + CCIfType<[i32, f32], CCAssignToStack<4, 4>>, + + // Doubles get 8-byte slots that are 4-byte aligned. + CCIfType<[f64], CCAssignToStack<8, 4>>, + + // Long doubles get slots whose size depends on the subtarget. + CCIfType<[f80], CCAssignToStack<0, 4>>, + + // The first 4 SSE vector arguments are passed in XMM registers. + CCIfNotVarArg>>, + + // Other SSE vectors get 16-byte stack slots that are 16-byte aligned. + CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>>, + + // __m64 vectors get 8-byte stack slots that are 4-byte aligned. They are + // passed in the parameter area. + CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 4>>]>; + +def CC_X86_32_C : CallingConv<[ + // Promote i8/i16 arguments to i32. + CCIfType<[i8, i16], CCPromoteToType>, + + // The 'nest' parameter, if any, is passed in ECX. + CCIfNest>, + + // The first 3 integer arguments, if marked 'inreg' and if the call is not + // a vararg call, are passed in integer registers. + CCIfNotVarArg>>>, + + // Otherwise, same as everything else. + CCDelegateTo +]>; + +def CC_X86_32_FastCall : CallingConv<[ + // Promote i8/i16 arguments to i32. + CCIfType<[i8, i16], CCPromoteToType>, + + // The 'nest' parameter, if any, is passed in EAX. + CCIfNest>, + + // The first 2 integer arguments are passed in ECX/EDX + CCIfType<[i32], CCAssignToReg<[ECX, EDX]>>, + + // Otherwise, same as everything else. + CCDelegateTo +]>; + +def CC_X86_32_FastCC : CallingConv<[ + // Handles byval parameters. Note that we can't rely on the delegation + // to CC_X86_32_Common for this because that happens after code that + // puts arguments in registers. + CCIfByVal>, + + // Promote i8/i16 arguments to i32. + CCIfType<[i8, i16], CCPromoteToType>, + + // The 'nest' parameter, if any, is passed in EAX. + CCIfNest>, + + // The first 2 integer arguments are passed in ECX/EDX + CCIfType<[i32], CCAssignToReg<[ECX, EDX]>>, + + // The first 3 float or double arguments, if the call is not a vararg + // call and if SSE2 is available, are passed in SSE registers. + CCIfNotVarArg>>>, + + // Doubles get 8-byte slots that are 8-byte aligned. + CCIfType<[f64], CCAssignToStack<8, 8>>, + + // Otherwise, same as everything else. + CCDelegateTo +]>; diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp new file mode 100644 index 000000000000..e988a5ca9d04 --- /dev/null +++ b/lib/Target/X86/X86CodeEmitter.cpp @@ -0,0 +1,811 @@ +//===-- X86/X86CodeEmitter.cpp - Convert X86 code to machine code ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the pass that transforms the X86 machine instructions into +// relocatable machine code. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "x86-emitter" +#include "X86InstrInfo.h" +#include "X86JITInfo.h" +#include "X86Subtarget.h" +#include "X86TargetMachine.h" +#include "X86Relocations.h" +#include "X86.h" +#include "llvm/PassManager.h" +#include "llvm/CodeGen/MachineCodeEmitter.h" +#include "llvm/CodeGen/JITCodeEmitter.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Function.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetOptions.h" +using namespace llvm; + +STATISTIC(NumEmitted, "Number of machine instructions emitted"); + +namespace { +template + class VISIBILITY_HIDDEN Emitter : public MachineFunctionPass { + const X86InstrInfo *II; + const TargetData *TD; + X86TargetMachine &TM; + CodeEmitter &MCE; + intptr_t PICBaseOffset; + bool Is64BitMode; + bool IsPIC; + public: + static char ID; + explicit Emitter(X86TargetMachine &tm, CodeEmitter &mce) + : MachineFunctionPass(&ID), II(0), TD(0), TM(tm), + MCE(mce), PICBaseOffset(0), Is64BitMode(false), + IsPIC(TM.getRelocationModel() == Reloc::PIC_) {} + Emitter(X86TargetMachine &tm, CodeEmitter &mce, + const X86InstrInfo &ii, const TargetData &td, bool is64) + : MachineFunctionPass(&ID), II(&ii), TD(&td), TM(tm), + MCE(mce), PICBaseOffset(0), Is64BitMode(is64), + IsPIC(TM.getRelocationModel() == Reloc::PIC_) {} + + bool runOnMachineFunction(MachineFunction &MF); + + virtual const char *getPassName() const { + return "X86 Machine Code Emitter"; + } + + void emitInstruction(const MachineInstr &MI, + const TargetInstrDesc *Desc); + + void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + private: + void emitPCRelativeBlockAddress(MachineBasicBlock *MBB); + void emitGlobalAddress(GlobalValue *GV, unsigned Reloc, + intptr_t Disp = 0, intptr_t PCAdj = 0, + bool NeedStub = false, bool Indirect = false); + void emitExternalSymbolAddress(const char *ES, unsigned Reloc); + void emitConstPoolAddress(unsigned CPI, unsigned Reloc, intptr_t Disp = 0, + intptr_t PCAdj = 0); + void emitJumpTableAddress(unsigned JTI, unsigned Reloc, + intptr_t PCAdj = 0); + + void emitDisplacementField(const MachineOperand *RelocOp, int DispVal, + intptr_t PCAdj = 0); + + void emitRegModRMByte(unsigned ModRMReg, unsigned RegOpcodeField); + void emitRegModRMByte(unsigned RegOpcodeField); + void emitSIBByte(unsigned SS, unsigned Index, unsigned Base); + void emitConstant(uint64_t Val, unsigned Size); + + void emitMemModRMByte(const MachineInstr &MI, + unsigned Op, unsigned RegOpcodeField, + intptr_t PCAdj = 0); + + unsigned getX86RegNum(unsigned RegNo) const; + + bool gvNeedsNonLazyPtr(const GlobalValue *GV); + }; + +template + char Emitter::ID = 0; +} + +/// createX86CodeEmitterPass - Return a pass that emits the collected X86 code +/// to the specified templated MachineCodeEmitter object. + +namespace llvm { + +FunctionPass *createX86CodeEmitterPass(X86TargetMachine &TM, + MachineCodeEmitter &MCE) { + return new Emitter(TM, MCE); +} +FunctionPass *createX86JITCodeEmitterPass(X86TargetMachine &TM, + JITCodeEmitter &JCE) { + return new Emitter(TM, JCE); +} + +} // end namespace llvm + +template +bool Emitter::runOnMachineFunction(MachineFunction &MF) { + + MCE.setModuleInfo(&getAnalysis()); + + II = TM.getInstrInfo(); + TD = TM.getTargetData(); + Is64BitMode = TM.getSubtarget().is64Bit(); + IsPIC = TM.getRelocationModel() == Reloc::PIC_; + + do { + DOUT << "JITTing function '" << MF.getFunction()->getName() << "'\n"; + MCE.startFunction(MF); + for (MachineFunction::iterator MBB = MF.begin(), E = MF.end(); + MBB != E; ++MBB) { + MCE.StartMachineBasicBlock(MBB); + for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end(); + I != E; ++I) { + const TargetInstrDesc &Desc = I->getDesc(); + emitInstruction(*I, &Desc); + // MOVPC32r is basically a call plus a pop instruction. + if (Desc.getOpcode() == X86::MOVPC32r) + emitInstruction(*I, &II->get(X86::POP32r)); + NumEmitted++; // Keep track of the # of mi's emitted + } + } + } while (MCE.finishFunction(MF)); + + return false; +} + +/// emitPCRelativeBlockAddress - This method keeps track of the information +/// necessary to resolve the address of this block later and emits a dummy +/// value. +/// +template +void Emitter::emitPCRelativeBlockAddress(MachineBasicBlock *MBB) { + // Remember where this reference was and where it is to so we can + // deal with it later. + MCE.addRelocation(MachineRelocation::getBB(MCE.getCurrentPCOffset(), + X86::reloc_pcrel_word, MBB)); + MCE.emitWordLE(0); +} + +/// emitGlobalAddress - Emit the specified address to the code stream assuming +/// this is part of a "take the address of a global" instruction. +/// +template +void Emitter::emitGlobalAddress(GlobalValue *GV, unsigned Reloc, + intptr_t Disp /* = 0 */, + intptr_t PCAdj /* = 0 */, + bool NeedStub /* = false */, + bool Indirect /* = false */) { + intptr_t RelocCST = 0; + if (Reloc == X86::reloc_picrel_word) + RelocCST = PICBaseOffset; + else if (Reloc == X86::reloc_pcrel_word) + RelocCST = PCAdj; + MachineRelocation MR = Indirect + ? MachineRelocation::getIndirectSymbol(MCE.getCurrentPCOffset(), Reloc, + GV, RelocCST, NeedStub) + : MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc, + GV, RelocCST, NeedStub); + MCE.addRelocation(MR); + // The relocated value will be added to the displacement + if (Reloc == X86::reloc_absolute_dword) + MCE.emitDWordLE(Disp); + else + MCE.emitWordLE((int32_t)Disp); +} + +/// emitExternalSymbolAddress - Arrange for the address of an external symbol to +/// be emitted to the current location in the function, and allow it to be PC +/// relative. +template +void Emitter::emitExternalSymbolAddress(const char *ES, + unsigned Reloc) { + intptr_t RelocCST = (Reloc == X86::reloc_picrel_word) ? PICBaseOffset : 0; + MCE.addRelocation(MachineRelocation::getExtSym(MCE.getCurrentPCOffset(), + Reloc, ES, RelocCST)); + if (Reloc == X86::reloc_absolute_dword) + MCE.emitDWordLE(0); + else + MCE.emitWordLE(0); +} + +/// emitConstPoolAddress - Arrange for the address of an constant pool +/// to be emitted to the current location in the function, and allow it to be PC +/// relative. +template +void Emitter::emitConstPoolAddress(unsigned CPI, unsigned Reloc, + intptr_t Disp /* = 0 */, + intptr_t PCAdj /* = 0 */) { + intptr_t RelocCST = 0; + if (Reloc == X86::reloc_picrel_word) + RelocCST = PICBaseOffset; + else if (Reloc == X86::reloc_pcrel_word) + RelocCST = PCAdj; + MCE.addRelocation(MachineRelocation::getConstPool(MCE.getCurrentPCOffset(), + Reloc, CPI, RelocCST)); + // The relocated value will be added to the displacement + if (Reloc == X86::reloc_absolute_dword) + MCE.emitDWordLE(Disp); + else + MCE.emitWordLE((int32_t)Disp); +} + +/// emitJumpTableAddress - Arrange for the address of a jump table to +/// be emitted to the current location in the function, and allow it to be PC +/// relative. +template +void Emitter::emitJumpTableAddress(unsigned JTI, unsigned Reloc, + intptr_t PCAdj /* = 0 */) { + intptr_t RelocCST = 0; + if (Reloc == X86::reloc_picrel_word) + RelocCST = PICBaseOffset; + else if (Reloc == X86::reloc_pcrel_word) + RelocCST = PCAdj; + MCE.addRelocation(MachineRelocation::getJumpTable(MCE.getCurrentPCOffset(), + Reloc, JTI, RelocCST)); + // The relocated value will be added to the displacement + if (Reloc == X86::reloc_absolute_dword) + MCE.emitDWordLE(0); + else + MCE.emitWordLE(0); +} + +template +unsigned Emitter::getX86RegNum(unsigned RegNo) const { + return II->getRegisterInfo().getX86RegNum(RegNo); +} + +inline static unsigned char ModRMByte(unsigned Mod, unsigned RegOpcode, + unsigned RM) { + assert(Mod < 4 && RegOpcode < 8 && RM < 8 && "ModRM Fields out of range!"); + return RM | (RegOpcode << 3) | (Mod << 6); +} + +template +void Emitter::emitRegModRMByte(unsigned ModRMReg, + unsigned RegOpcodeFld){ + MCE.emitByte(ModRMByte(3, RegOpcodeFld, getX86RegNum(ModRMReg))); +} + +template +void Emitter::emitRegModRMByte(unsigned RegOpcodeFld) { + MCE.emitByte(ModRMByte(3, RegOpcodeFld, 0)); +} + +template +void Emitter::emitSIBByte(unsigned SS, + unsigned Index, + unsigned Base) { + // SIB byte is in the same format as the ModRMByte... + MCE.emitByte(ModRMByte(SS, Index, Base)); +} + +template +void Emitter::emitConstant(uint64_t Val, unsigned Size) { + // Output the constant in little endian byte order... + for (unsigned i = 0; i != Size; ++i) { + MCE.emitByte(Val & 255); + Val >>= 8; + } +} + +/// isDisp8 - Return true if this signed displacement fits in a 8-bit +/// sign-extended field. +static bool isDisp8(int Value) { + return Value == (signed char)Value; +} + +template +bool Emitter::gvNeedsNonLazyPtr(const GlobalValue *GV) { + // For Darwin, simulate the linktime GOT by using the same non-lazy-pointer + // mechanism as 32-bit mode. + return (!Is64BitMode || TM.getSubtarget().isTargetDarwin()) && + TM.getSubtarget().GVRequiresExtraLoad(GV, TM, false); +} + +template +void Emitter::emitDisplacementField(const MachineOperand *RelocOp, + int DispVal, intptr_t PCAdj) { + // If this is a simple integer displacement that doesn't require a relocation, + // emit it now. + if (!RelocOp) { + emitConstant(DispVal, 4); + return; + } + + // Otherwise, this is something that requires a relocation. Emit it as such + // now. + if (RelocOp->isGlobal()) { + // In 64-bit static small code model, we could potentially emit absolute. + // But it's probably not beneficial. + // 89 05 00 00 00 00 mov %eax,0(%rip) # PC-relative + // 89 04 25 00 00 00 00 mov %eax,0x0 # Absolute + unsigned rt = Is64BitMode ? X86::reloc_pcrel_word + : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word); + bool NeedStub = isa(RelocOp->getGlobal()); + bool Indirect = gvNeedsNonLazyPtr(RelocOp->getGlobal()); + emitGlobalAddress(RelocOp->getGlobal(), rt, RelocOp->getOffset(), + PCAdj, NeedStub, Indirect); + } else if (RelocOp->isCPI()) { + unsigned rt = Is64BitMode ? X86::reloc_pcrel_word : X86::reloc_picrel_word; + emitConstPoolAddress(RelocOp->getIndex(), rt, + RelocOp->getOffset(), PCAdj); + } else if (RelocOp->isJTI()) { + unsigned rt = Is64BitMode ? X86::reloc_pcrel_word : X86::reloc_picrel_word; + emitJumpTableAddress(RelocOp->getIndex(), rt, PCAdj); + } else { + assert(0 && "Unknown value to relocate!"); + } +} + +template +void Emitter::emitMemModRMByte(const MachineInstr &MI, + unsigned Op, unsigned RegOpcodeField, + intptr_t PCAdj) { + const MachineOperand &Op3 = MI.getOperand(Op+3); + int DispVal = 0; + const MachineOperand *DispForReloc = 0; + + // Figure out what sort of displacement we have to handle here. + if (Op3.isGlobal()) { + DispForReloc = &Op3; + } else if (Op3.isCPI()) { + if (Is64BitMode || IsPIC) { + DispForReloc = &Op3; + } else { + DispVal += MCE.getConstantPoolEntryAddress(Op3.getIndex()); + DispVal += Op3.getOffset(); + } + } else if (Op3.isJTI()) { + if (Is64BitMode || IsPIC) { + DispForReloc = &Op3; + } else { + DispVal += MCE.getJumpTableEntryAddress(Op3.getIndex()); + } + } else { + DispVal = Op3.getImm(); + } + + const MachineOperand &Base = MI.getOperand(Op); + const MachineOperand &Scale = MI.getOperand(Op+1); + const MachineOperand &IndexReg = MI.getOperand(Op+2); + + unsigned BaseReg = Base.getReg(); + + // Is a SIB byte needed? + if ((!Is64BitMode || DispForReloc || BaseReg != 0) && + IndexReg.getReg() == 0 && + (BaseReg == 0 || getX86RegNum(BaseReg) != N86::ESP)) { + if (BaseReg == 0) { // Just a displacement? + // Emit special case [disp32] encoding + MCE.emitByte(ModRMByte(0, RegOpcodeField, 5)); + + emitDisplacementField(DispForReloc, DispVal, PCAdj); + } else { + unsigned BaseRegNo = getX86RegNum(BaseReg); + if (!DispForReloc && DispVal == 0 && BaseRegNo != N86::EBP) { + // Emit simple indirect register encoding... [EAX] f.e. + MCE.emitByte(ModRMByte(0, RegOpcodeField, BaseRegNo)); + } else if (!DispForReloc && isDisp8(DispVal)) { + // Emit the disp8 encoding... [REG+disp8] + MCE.emitByte(ModRMByte(1, RegOpcodeField, BaseRegNo)); + emitConstant(DispVal, 1); + } else { + // Emit the most general non-SIB encoding: [REG+disp32] + MCE.emitByte(ModRMByte(2, RegOpcodeField, BaseRegNo)); + emitDisplacementField(DispForReloc, DispVal, PCAdj); + } + } + + } else { // We need a SIB byte, so start by outputting the ModR/M byte first + assert(IndexReg.getReg() != X86::ESP && + IndexReg.getReg() != X86::RSP && "Cannot use ESP as index reg!"); + + bool ForceDisp32 = false; + bool ForceDisp8 = false; + if (BaseReg == 0) { + // If there is no base register, we emit the special case SIB byte with + // MOD=0, BASE=5, to JUST get the index, scale, and displacement. + MCE.emitByte(ModRMByte(0, RegOpcodeField, 4)); + ForceDisp32 = true; + } else if (DispForReloc) { + // Emit the normal disp32 encoding. + MCE.emitByte(ModRMByte(2, RegOpcodeField, 4)); + ForceDisp32 = true; + } else if (DispVal == 0 && getX86RegNum(BaseReg) != N86::EBP) { + // Emit no displacement ModR/M byte + MCE.emitByte(ModRMByte(0, RegOpcodeField, 4)); + } else if (isDisp8(DispVal)) { + // Emit the disp8 encoding... + MCE.emitByte(ModRMByte(1, RegOpcodeField, 4)); + ForceDisp8 = true; // Make sure to force 8 bit disp if Base=EBP + } else { + // Emit the normal disp32 encoding... + MCE.emitByte(ModRMByte(2, RegOpcodeField, 4)); + } + + // Calculate what the SS field value should be... + static const unsigned SSTable[] = { ~0, 0, 1, ~0, 2, ~0, ~0, ~0, 3 }; + unsigned SS = SSTable[Scale.getImm()]; + + if (BaseReg == 0) { + // Handle the SIB byte for the case where there is no base. The + // displacement has already been output. + unsigned IndexRegNo; + if (IndexReg.getReg()) + IndexRegNo = getX86RegNum(IndexReg.getReg()); + else + IndexRegNo = 4; // For example [ESP+1*+4] + emitSIBByte(SS, IndexRegNo, 5); + } else { + unsigned BaseRegNo = getX86RegNum(BaseReg); + unsigned IndexRegNo; + if (IndexReg.getReg()) + IndexRegNo = getX86RegNum(IndexReg.getReg()); + else + IndexRegNo = 4; // For example [ESP+1*+4] + emitSIBByte(SS, IndexRegNo, BaseRegNo); + } + + // Do we need to output a displacement? + if (ForceDisp8) { + emitConstant(DispVal, 1); + } else if (DispVal != 0 || ForceDisp32) { + emitDisplacementField(DispForReloc, DispVal, PCAdj); + } + } +} + +template +void Emitter::emitInstruction( + const MachineInstr &MI, + const TargetInstrDesc *Desc) { + DOUT << MI; + + unsigned Opcode = Desc->Opcode; + + // Emit the lock opcode prefix as needed. + if (Desc->TSFlags & X86II::LOCK) MCE.emitByte(0xF0); + + // Emit segment override opcode prefix as needed. + switch (Desc->TSFlags & X86II::SegOvrMask) { + case X86II::FS: + MCE.emitByte(0x64); + break; + case X86II::GS: + MCE.emitByte(0x65); + break; + default: assert(0 && "Invalid segment!"); + case 0: break; // No segment override! + } + + // Emit the repeat opcode prefix as needed. + if ((Desc->TSFlags & X86II::Op0Mask) == X86II::REP) MCE.emitByte(0xF3); + + // Emit the operand size opcode prefix as needed. + if (Desc->TSFlags & X86II::OpSize) MCE.emitByte(0x66); + + // Emit the address size opcode prefix as needed. + if (Desc->TSFlags & X86II::AdSize) MCE.emitByte(0x67); + + bool Need0FPrefix = false; + switch (Desc->TSFlags & X86II::Op0Mask) { + case X86II::TB: // Two-byte opcode prefix + case X86II::T8: // 0F 38 + case X86II::TA: // 0F 3A + Need0FPrefix = true; + break; + case X86II::REP: break; // already handled. + case X86II::XS: // F3 0F + MCE.emitByte(0xF3); + Need0FPrefix = true; + break; + case X86II::XD: // F2 0F + MCE.emitByte(0xF2); + Need0FPrefix = true; + break; + case X86II::D8: case X86II::D9: case X86II::DA: case X86II::DB: + case X86II::DC: case X86II::DD: case X86II::DE: case X86II::DF: + MCE.emitByte(0xD8+ + (((Desc->TSFlags & X86II::Op0Mask)-X86II::D8) + >> X86II::Op0Shift)); + break; // Two-byte opcode prefix + default: assert(0 && "Invalid prefix!"); + case 0: break; // No prefix! + } + + if (Is64BitMode) { + // REX prefix + unsigned REX = X86InstrInfo::determineREX(MI); + if (REX) + MCE.emitByte(0x40 | REX); + } + + // 0x0F escape code must be emitted just before the opcode. + if (Need0FPrefix) + MCE.emitByte(0x0F); + + switch (Desc->TSFlags & X86II::Op0Mask) { + case X86II::T8: // 0F 38 + MCE.emitByte(0x38); + break; + case X86II::TA: // 0F 3A + MCE.emitByte(0x3A); + break; + } + + // If this is a two-address instruction, skip one of the register operands. + unsigned NumOps = Desc->getNumOperands(); + unsigned CurOp = 0; + if (NumOps > 1 && Desc->getOperandConstraint(1, TOI::TIED_TO) != -1) + ++CurOp; + else if (NumOps > 2 && Desc->getOperandConstraint(NumOps-1, TOI::TIED_TO)== 0) + // Skip the last source operand that is tied_to the dest reg. e.g. LXADD32 + --NumOps; + + unsigned char BaseOpcode = II->getBaseOpcodeFor(Desc); + switch (Desc->TSFlags & X86II::FormMask) { + default: assert(0 && "Unknown FormMask value in X86 MachineCodeEmitter!"); + case X86II::Pseudo: + // Remember the current PC offset, this is the PIC relocation + // base address. + switch (Opcode) { + default: + assert(0 && "psuedo instructions should be removed before code emission"); + break; + case TargetInstrInfo::INLINEASM: { + // We allow inline assembler nodes with empty bodies - they can + // implicitly define registers, which is ok for JIT. + if (MI.getOperand(0).getSymbolName()[0]) { + assert(0 && "JIT does not support inline asm!\n"); + abort(); + } + break; + } + case TargetInstrInfo::DBG_LABEL: + case TargetInstrInfo::EH_LABEL: + MCE.emitLabel(MI.getOperand(0).getImm()); + break; + case TargetInstrInfo::IMPLICIT_DEF: + case TargetInstrInfo::DECLARE: + case X86::DWARF_LOC: + case X86::FP_REG_KILL: + break; + case X86::MOVPC32r: { + // This emits the "call" portion of this pseudo instruction. + MCE.emitByte(BaseOpcode); + emitConstant(0, X86InstrInfo::sizeOfImm(Desc)); + // Remember PIC base. + PICBaseOffset = (intptr_t) MCE.getCurrentPCOffset(); + X86JITInfo *JTI = TM.getJITInfo(); + JTI->setPICBase(MCE.getCurrentPCValue()); + break; + } + } + CurOp = NumOps; + break; + case X86II::RawFrm: + MCE.emitByte(BaseOpcode); + + if (CurOp != NumOps) { + const MachineOperand &MO = MI.getOperand(CurOp++); + + DOUT << "RawFrm CurOp " << CurOp << "\n"; + DOUT << "isMBB " << MO.isMBB() << "\n"; + DOUT << "isGlobal " << MO.isGlobal() << "\n"; + DOUT << "isSymbol " << MO.isSymbol() << "\n"; + DOUT << "isImm " << MO.isImm() << "\n"; + + if (MO.isMBB()) { + emitPCRelativeBlockAddress(MO.getMBB()); + } else if (MO.isGlobal()) { + // Assume undefined functions may be outside the Small codespace. + bool NeedStub = + (Is64BitMode && + (TM.getCodeModel() == CodeModel::Large || + TM.getSubtarget().isTargetDarwin())) || + Opcode == X86::TAILJMPd; + emitGlobalAddress(MO.getGlobal(), X86::reloc_pcrel_word, + MO.getOffset(), 0, NeedStub); + } else if (MO.isSymbol()) { + emitExternalSymbolAddress(MO.getSymbolName(), X86::reloc_pcrel_word); + } else if (MO.isImm()) { + if (Opcode == X86::CALLpcrel32 || Opcode == X86::CALL64pcrel32) { + // Fix up immediate operand for pc relative calls. + intptr_t Imm = (intptr_t)MO.getImm(); + Imm = Imm - MCE.getCurrentPCValue() - 4; + emitConstant(Imm, X86InstrInfo::sizeOfImm(Desc)); + } else + emitConstant(MO.getImm(), X86InstrInfo::sizeOfImm(Desc)); + } else { + assert(0 && "Unknown RawFrm operand!"); + } + } + break; + + case X86II::AddRegFrm: + MCE.emitByte(BaseOpcode + getX86RegNum(MI.getOperand(CurOp++).getReg())); + + if (CurOp != NumOps) { + const MachineOperand &MO1 = MI.getOperand(CurOp++); + unsigned Size = X86InstrInfo::sizeOfImm(Desc); + if (MO1.isImm()) + emitConstant(MO1.getImm(), Size); + else { + unsigned rt = Is64BitMode ? X86::reloc_pcrel_word + : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word); + // This should not occur on Darwin for relocatable objects. + if (Opcode == X86::MOV64ri) + rt = X86::reloc_absolute_dword; // FIXME: add X86II flag? + if (MO1.isGlobal()) { + bool NeedStub = isa(MO1.getGlobal()); + bool Indirect = gvNeedsNonLazyPtr(MO1.getGlobal()); + emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0, + NeedStub, Indirect); + } else if (MO1.isSymbol()) + emitExternalSymbolAddress(MO1.getSymbolName(), rt); + else if (MO1.isCPI()) + emitConstPoolAddress(MO1.getIndex(), rt); + else if (MO1.isJTI()) + emitJumpTableAddress(MO1.getIndex(), rt); + } + } + break; + + case X86II::MRMDestReg: { + MCE.emitByte(BaseOpcode); + emitRegModRMByte(MI.getOperand(CurOp).getReg(), + getX86RegNum(MI.getOperand(CurOp+1).getReg())); + CurOp += 2; + if (CurOp != NumOps) + emitConstant(MI.getOperand(CurOp++).getImm(), X86InstrInfo::sizeOfImm(Desc)); + break; + } + case X86II::MRMDestMem: { + MCE.emitByte(BaseOpcode); + emitMemModRMByte(MI, CurOp, + getX86RegNum(MI.getOperand(CurOp + X86AddrNumOperands) + .getReg())); + CurOp += X86AddrNumOperands + 1; + if (CurOp != NumOps) + emitConstant(MI.getOperand(CurOp++).getImm(), X86InstrInfo::sizeOfImm(Desc)); + break; + } + + case X86II::MRMSrcReg: + MCE.emitByte(BaseOpcode); + emitRegModRMByte(MI.getOperand(CurOp+1).getReg(), + getX86RegNum(MI.getOperand(CurOp).getReg())); + CurOp += 2; + if (CurOp != NumOps) + emitConstant(MI.getOperand(CurOp++).getImm(), + X86InstrInfo::sizeOfImm(Desc)); + break; + + case X86II::MRMSrcMem: { + // FIXME: Maybe lea should have its own form? + int AddrOperands; + if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r || + Opcode == X86::LEA16r || Opcode == X86::LEA32r) + AddrOperands = X86AddrNumOperands - 1; // No segment register + else + AddrOperands = X86AddrNumOperands; + + intptr_t PCAdj = (CurOp + AddrOperands + 1 != NumOps) ? + X86InstrInfo::sizeOfImm(Desc) : 0; + + MCE.emitByte(BaseOpcode); + emitMemModRMByte(MI, CurOp+1, getX86RegNum(MI.getOperand(CurOp).getReg()), + PCAdj); + CurOp += AddrOperands + 1; + if (CurOp != NumOps) + emitConstant(MI.getOperand(CurOp++).getImm(), + X86InstrInfo::sizeOfImm(Desc)); + break; + } + + case X86II::MRM0r: case X86II::MRM1r: + case X86II::MRM2r: case X86II::MRM3r: + case X86II::MRM4r: case X86II::MRM5r: + case X86II::MRM6r: case X86II::MRM7r: { + MCE.emitByte(BaseOpcode); + + // Special handling of lfence, mfence, monitor, and mwait. + if (Desc->getOpcode() == X86::LFENCE || + Desc->getOpcode() == X86::MFENCE || + Desc->getOpcode() == X86::MONITOR || + Desc->getOpcode() == X86::MWAIT) { + emitRegModRMByte((Desc->TSFlags & X86II::FormMask)-X86II::MRM0r); + + switch (Desc->getOpcode()) { + default: break; + case X86::MONITOR: + MCE.emitByte(0xC8); + break; + case X86::MWAIT: + MCE.emitByte(0xC9); + break; + } + } else { + emitRegModRMByte(MI.getOperand(CurOp++).getReg(), + (Desc->TSFlags & X86II::FormMask)-X86II::MRM0r); + } + + if (CurOp != NumOps) { + const MachineOperand &MO1 = MI.getOperand(CurOp++); + unsigned Size = X86InstrInfo::sizeOfImm(Desc); + if (MO1.isImm()) + emitConstant(MO1.getImm(), Size); + else { + unsigned rt = Is64BitMode ? X86::reloc_pcrel_word + : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word); + if (Opcode == X86::MOV64ri32) + rt = X86::reloc_absolute_word; // FIXME: add X86II flag? + if (MO1.isGlobal()) { + bool NeedStub = isa(MO1.getGlobal()); + bool Indirect = gvNeedsNonLazyPtr(MO1.getGlobal()); + emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0, + NeedStub, Indirect); + } else if (MO1.isSymbol()) + emitExternalSymbolAddress(MO1.getSymbolName(), rt); + else if (MO1.isCPI()) + emitConstPoolAddress(MO1.getIndex(), rt); + else if (MO1.isJTI()) + emitJumpTableAddress(MO1.getIndex(), rt); + } + } + break; + } + + case X86II::MRM0m: case X86II::MRM1m: + case X86II::MRM2m: case X86II::MRM3m: + case X86II::MRM4m: case X86II::MRM5m: + case X86II::MRM6m: case X86II::MRM7m: { + intptr_t PCAdj = (CurOp + X86AddrNumOperands != NumOps) ? + (MI.getOperand(CurOp+X86AddrNumOperands).isImm() ? + X86InstrInfo::sizeOfImm(Desc) : 4) : 0; + + MCE.emitByte(BaseOpcode); + emitMemModRMByte(MI, CurOp, (Desc->TSFlags & X86II::FormMask)-X86II::MRM0m, + PCAdj); + CurOp += X86AddrNumOperands; + + if (CurOp != NumOps) { + const MachineOperand &MO = MI.getOperand(CurOp++); + unsigned Size = X86InstrInfo::sizeOfImm(Desc); + if (MO.isImm()) + emitConstant(MO.getImm(), Size); + else { + unsigned rt = Is64BitMode ? X86::reloc_pcrel_word + : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word); + if (Opcode == X86::MOV64mi32) + rt = X86::reloc_absolute_word; // FIXME: add X86II flag? + if (MO.isGlobal()) { + bool NeedStub = isa(MO.getGlobal()); + bool Indirect = gvNeedsNonLazyPtr(MO.getGlobal()); + emitGlobalAddress(MO.getGlobal(), rt, MO.getOffset(), 0, + NeedStub, Indirect); + } else if (MO.isSymbol()) + emitExternalSymbolAddress(MO.getSymbolName(), rt); + else if (MO.isCPI()) + emitConstPoolAddress(MO.getIndex(), rt); + else if (MO.isJTI()) + emitJumpTableAddress(MO.getIndex(), rt); + } + } + break; + } + + case X86II::MRMInitReg: + MCE.emitByte(BaseOpcode); + // Duplicate register, used by things like MOV8r0 (aka xor reg,reg). + emitRegModRMByte(MI.getOperand(CurOp).getReg(), + getX86RegNum(MI.getOperand(CurOp).getReg())); + ++CurOp; + break; + } + + if (!Desc->isVariadic() && CurOp != NumOps) { + cerr << "Cannot encode: "; + MI.dump(); + cerr << '\n'; + abort(); + } +} + diff --git a/lib/Target/X86/X86CompilationCallback_Win64.asm b/lib/Target/X86/X86CompilationCallback_Win64.asm new file mode 100644 index 000000000000..8002f98765f0 --- /dev/null +++ b/lib/Target/X86/X86CompilationCallback_Win64.asm @@ -0,0 +1,67 @@ +;;===-- X86CompilationCallback_Win64.asm - Implement Win64 JIT callback ---=== +;; +;; The LLVM Compiler Infrastructure +;; +;; This file is distributed under the University of Illinois Open Source +;; License. See LICENSE.TXT for details. +;; +;;===----------------------------------------------------------------------=== +;; +;; This file implements the JIT interfaces for the X86 target. +;; +;;===----------------------------------------------------------------------=== + +extrn X86CompilationCallback2: PROC + +.code +X86CompilationCallback proc + push rbp + + ; Save RSP + mov rbp, rsp + + ; Save all int arg registers + push rcx + push rdx + push r8 + push r9 + + ; Align stack on 16-byte boundary. + and rsp, -16 + + ; Save all XMM arg registers + sub rsp, 64 + movaps [rsp], xmm0 + movaps [rsp+16], xmm1 + movaps [rsp+32], xmm2 + movaps [rsp+48], xmm3 + + ; JIT callee + + ; Pass prev frame and return address + mov rcx, rbp + mov rdx, qword ptr [rbp+8] + call X86CompilationCallback2 + + ; Restore all XMM arg registers + movaps xmm3, [rsp+48] + movaps xmm2, [rsp+32] + movaps xmm1, [rsp+16] + movaps xmm0, [rsp] + + ; Restore RSP + mov rsp, rbp + + ; Restore all int arg registers + sub rsp, 32 + pop r9 + pop r8 + pop rdx + pop rcx + + ; Restore RBP + pop rbp + ret +X86CompilationCallback endp + +End diff --git a/lib/Target/X86/X86ELFWriterInfo.cpp b/lib/Target/X86/X86ELFWriterInfo.cpp new file mode 100644 index 000000000000..4c3cc828f46e --- /dev/null +++ b/lib/Target/X86/X86ELFWriterInfo.cpp @@ -0,0 +1,18 @@ +//===-- X86ELFWriterInfo.cpp - ELF Writer Info for the X86 backend --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements ELF writer information for the X86 backend. +// +//===----------------------------------------------------------------------===// + +#include "X86ELFWriterInfo.h" +using namespace llvm; + +X86ELFWriterInfo::X86ELFWriterInfo() : TargetELFWriterInfo(EM_386) {} +X86ELFWriterInfo::~X86ELFWriterInfo() {} diff --git a/lib/Target/X86/X86ELFWriterInfo.h b/lib/Target/X86/X86ELFWriterInfo.h new file mode 100644 index 000000000000..06e051a34ac9 --- /dev/null +++ b/lib/Target/X86/X86ELFWriterInfo.h @@ -0,0 +1,29 @@ +//===-- X86ELFWriterInfo.h - ELF Writer Info for X86 ------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements ELF writer information for the X86 backend. +// +//===----------------------------------------------------------------------===// + +#ifndef X86_ELF_WRITER_INFO_H +#define X86_ELF_WRITER_INFO_H + +#include "llvm/Target/TargetELFWriterInfo.h" + +namespace llvm { + + class X86ELFWriterInfo : public TargetELFWriterInfo { + public: + X86ELFWriterInfo(); + virtual ~X86ELFWriterInfo(); + }; + +} // end llvm namespace + +#endif // X86_ELF_WRITER_INFO_H diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp new file mode 100644 index 000000000000..b3667be0d035 --- /dev/null +++ b/lib/Target/X86/X86FastISel.cpp @@ -0,0 +1,1549 @@ +//===-- X86FastISel.cpp - X86 FastISel implementation ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the X86-specific support for the FastISel class. Much +// of the target-specific code is generated by tablegen in the file +// X86GenFastISel.inc, which is #included here. +// +//===----------------------------------------------------------------------===// + +#include "X86.h" +#include "X86InstrBuilder.h" +#include "X86ISelLowering.h" +#include "X86RegisterInfo.h" +#include "X86Subtarget.h" +#include "X86TargetMachine.h" +#include "llvm/CallingConv.h" +#include "llvm/DerivedTypes.h" +#include "llvm/GlobalVariable.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/CodeGen/FastISel.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/GetElementPtrTypeIterator.h" +#include "llvm/Target/TargetOptions.h" +using namespace llvm; + +namespace { + +class X86FastISel : public FastISel { + /// Subtarget - Keep a pointer to the X86Subtarget around so that we can + /// make the right decision when generating code for different targets. + const X86Subtarget *Subtarget; + + /// StackPtr - Register used as the stack pointer. + /// + unsigned StackPtr; + + /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87 + /// floating point ops. + /// When SSE is available, use it for f32 operations. + /// When SSE2 is available, use it for f64 operations. + bool X86ScalarSSEf64; + bool X86ScalarSSEf32; + +public: + explicit X86FastISel(MachineFunction &mf, + MachineModuleInfo *mmi, + DwarfWriter *dw, + DenseMap &vm, + DenseMap &bm, + DenseMap &am +#ifndef NDEBUG + , SmallSet &cil +#endif + ) + : FastISel(mf, mmi, dw, vm, bm, am +#ifndef NDEBUG + , cil +#endif + ) { + Subtarget = &TM.getSubtarget(); + StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP; + X86ScalarSSEf64 = Subtarget->hasSSE2(); + X86ScalarSSEf32 = Subtarget->hasSSE1(); + } + + virtual bool TargetSelectInstruction(Instruction *I); + +#include "X86GenFastISel.inc" + +private: + bool X86FastEmitCompare(Value *LHS, Value *RHS, MVT VT); + + bool X86FastEmitLoad(MVT VT, const X86AddressMode &AM, unsigned &RR); + + bool X86FastEmitStore(MVT VT, Value *Val, + const X86AddressMode &AM); + bool X86FastEmitStore(MVT VT, unsigned Val, + const X86AddressMode &AM); + + bool X86FastEmitExtend(ISD::NodeType Opc, MVT DstVT, unsigned Src, MVT SrcVT, + unsigned &ResultReg); + + bool X86SelectAddress(Value *V, X86AddressMode &AM, bool isCall); + + bool X86SelectLoad(Instruction *I); + + bool X86SelectStore(Instruction *I); + + bool X86SelectCmp(Instruction *I); + + bool X86SelectZExt(Instruction *I); + + bool X86SelectBranch(Instruction *I); + + bool X86SelectShift(Instruction *I); + + bool X86SelectSelect(Instruction *I); + + bool X86SelectTrunc(Instruction *I); + + bool X86SelectFPExt(Instruction *I); + bool X86SelectFPTrunc(Instruction *I); + + bool X86SelectExtractValue(Instruction *I); + + bool X86VisitIntrinsicCall(IntrinsicInst &I); + bool X86SelectCall(Instruction *I); + + CCAssignFn *CCAssignFnForCall(unsigned CC, bool isTailCall = false); + + const X86InstrInfo *getInstrInfo() const { + return getTargetMachine()->getInstrInfo(); + } + const X86TargetMachine *getTargetMachine() const { + return static_cast(&TM); + } + + unsigned TargetMaterializeConstant(Constant *C); + + unsigned TargetMaterializeAlloca(AllocaInst *C); + + /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is + /// computed in an SSE register, not on the X87 floating point stack. + bool isScalarFPTypeInSSEReg(MVT VT) const { + return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2 + (VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1 + } + + bool isTypeLegal(const Type *Ty, MVT &VT, bool AllowI1 = false); +}; + +} // end anonymous namespace. + +bool X86FastISel::isTypeLegal(const Type *Ty, MVT &VT, bool AllowI1) { + VT = TLI.getValueType(Ty, /*HandleUnknown=*/true); + if (VT == MVT::Other || !VT.isSimple()) + // Unhandled type. Halt "fast" selection and bail. + return false; + + // For now, require SSE/SSE2 for performing floating-point operations, + // since x87 requires additional work. + if (VT == MVT::f64 && !X86ScalarSSEf64) + return false; + if (VT == MVT::f32 && !X86ScalarSSEf32) + return false; + // Similarly, no f80 support yet. + if (VT == MVT::f80) + return false; + // We only handle legal types. For example, on x86-32 the instruction + // selector contains all of the 64-bit instructions from x86-64, + // under the assumption that i64 won't be used if the target doesn't + // support it. + return (AllowI1 && VT == MVT::i1) || TLI.isTypeLegal(VT); +} + +#include "X86GenCallingConv.inc" + +/// CCAssignFnForCall - Selects the correct CCAssignFn for a given calling +/// convention. +CCAssignFn *X86FastISel::CCAssignFnForCall(unsigned CC, bool isTaillCall) { + if (Subtarget->is64Bit()) { + if (Subtarget->isTargetWin64()) + return CC_X86_Win64_C; + else if (CC == CallingConv::Fast && isTaillCall) + return CC_X86_64_TailCall; + else + return CC_X86_64_C; + } + + if (CC == CallingConv::X86_FastCall) + return CC_X86_32_FastCall; + else if (CC == CallingConv::Fast) + return CC_X86_32_FastCC; + else + return CC_X86_32_C; +} + +/// X86FastEmitLoad - Emit a machine instruction to load a value of type VT. +/// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV. +/// Return true and the result register by reference if it is possible. +bool X86FastISel::X86FastEmitLoad(MVT VT, const X86AddressMode &AM, + unsigned &ResultReg) { + // Get opcode and regclass of the output for the given load instruction. + unsigned Opc = 0; + const TargetRegisterClass *RC = NULL; + switch (VT.getSimpleVT()) { + default: return false; + case MVT::i8: + Opc = X86::MOV8rm; + RC = X86::GR8RegisterClass; + break; + case MVT::i16: + Opc = X86::MOV16rm; + RC = X86::GR16RegisterClass; + break; + case MVT::i32: + Opc = X86::MOV32rm; + RC = X86::GR32RegisterClass; + break; + case MVT::i64: + // Must be in x86-64 mode. + Opc = X86::MOV64rm; + RC = X86::GR64RegisterClass; + break; + case MVT::f32: + if (Subtarget->hasSSE1()) { + Opc = X86::MOVSSrm; + RC = X86::FR32RegisterClass; + } else { + Opc = X86::LD_Fp32m; + RC = X86::RFP32RegisterClass; + } + break; + case MVT::f64: + if (Subtarget->hasSSE2()) { + Opc = X86::MOVSDrm; + RC = X86::FR64RegisterClass; + } else { + Opc = X86::LD_Fp64m; + RC = X86::RFP64RegisterClass; + } + break; + case MVT::f80: + // No f80 support yet. + return false; + } + + ResultReg = createResultReg(RC); + addFullAddress(BuildMI(MBB, DL, TII.get(Opc), ResultReg), AM); + return true; +} + +/// X86FastEmitStore - Emit a machine instruction to store a value Val of +/// type VT. The address is either pre-computed, consisted of a base ptr, Ptr +/// and a displacement offset, or a GlobalAddress, +/// i.e. V. Return true if it is possible. +bool +X86FastISel::X86FastEmitStore(MVT VT, unsigned Val, + const X86AddressMode &AM) { + // Get opcode and regclass of the output for the given store instruction. + unsigned Opc = 0; + switch (VT.getSimpleVT()) { + case MVT::f80: // No f80 support yet. + default: return false; + case MVT::i8: Opc = X86::MOV8mr; break; + case MVT::i16: Opc = X86::MOV16mr; break; + case MVT::i32: Opc = X86::MOV32mr; break; + case MVT::i64: Opc = X86::MOV64mr; break; // Must be in x86-64 mode. + case MVT::f32: + Opc = Subtarget->hasSSE1() ? X86::MOVSSmr : X86::ST_Fp32m; + break; + case MVT::f64: + Opc = Subtarget->hasSSE2() ? X86::MOVSDmr : X86::ST_Fp64m; + break; + } + + addFullAddress(BuildMI(MBB, DL, TII.get(Opc)), AM).addReg(Val); + return true; +} + +bool X86FastISel::X86FastEmitStore(MVT VT, Value *Val, + const X86AddressMode &AM) { + // Handle 'null' like i32/i64 0. + if (isa(Val)) + Val = Constant::getNullValue(TD.getIntPtrType()); + + // If this is a store of a simple constant, fold the constant into the store. + if (ConstantInt *CI = dyn_cast(Val)) { + unsigned Opc = 0; + switch (VT.getSimpleVT()) { + default: break; + case MVT::i8: Opc = X86::MOV8mi; break; + case MVT::i16: Opc = X86::MOV16mi; break; + case MVT::i32: Opc = X86::MOV32mi; break; + case MVT::i64: + // Must be a 32-bit sign extended value. + if ((int)CI->getSExtValue() == CI->getSExtValue()) + Opc = X86::MOV64mi32; + break; + } + + if (Opc) { + addFullAddress(BuildMI(MBB, DL, TII.get(Opc)), AM) + .addImm(CI->getSExtValue()); + return true; + } + } + + unsigned ValReg = getRegForValue(Val); + if (ValReg == 0) + return false; + + return X86FastEmitStore(VT, ValReg, AM); +} + +/// X86FastEmitExtend - Emit a machine instruction to extend a value Src of +/// type SrcVT to type DstVT using the specified extension opcode Opc (e.g. +/// ISD::SIGN_EXTEND). +bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, MVT DstVT, + unsigned Src, MVT SrcVT, + unsigned &ResultReg) { + unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc, Src); + + if (RR != 0) { + ResultReg = RR; + return true; + } else + return false; +} + +/// X86SelectAddress - Attempt to fill in an address from the given value. +/// +bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM, bool isCall) { + User *U; + unsigned Opcode = Instruction::UserOp1; + if (Instruction *I = dyn_cast(V)) { + Opcode = I->getOpcode(); + U = I; + } else if (ConstantExpr *C = dyn_cast(V)) { + Opcode = C->getOpcode(); + U = C; + } + + switch (Opcode) { + default: break; + case Instruction::BitCast: + // Look past bitcasts. + return X86SelectAddress(U->getOperand(0), AM, isCall); + + case Instruction::IntToPtr: + // Look past no-op inttoptrs. + if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy()) + return X86SelectAddress(U->getOperand(0), AM, isCall); + break; + + case Instruction::PtrToInt: + // Look past no-op ptrtoints. + if (TLI.getValueType(U->getType()) == TLI.getPointerTy()) + return X86SelectAddress(U->getOperand(0), AM, isCall); + break; + + case Instruction::Alloca: { + if (isCall) break; + // Do static allocas. + const AllocaInst *A = cast(V); + DenseMap::iterator SI = StaticAllocaMap.find(A); + if (SI != StaticAllocaMap.end()) { + AM.BaseType = X86AddressMode::FrameIndexBase; + AM.Base.FrameIndex = SI->second; + return true; + } + break; + } + + case Instruction::Add: { + if (isCall) break; + // Adds of constants are common and easy enough. + if (ConstantInt *CI = dyn_cast(U->getOperand(1))) { + uint64_t Disp = (int32_t)AM.Disp + (uint64_t)CI->getSExtValue(); + // They have to fit in the 32-bit signed displacement field though. + if (isInt32(Disp)) { + AM.Disp = (uint32_t)Disp; + return X86SelectAddress(U->getOperand(0), AM, isCall); + } + } + break; + } + + case Instruction::GetElementPtr: { + if (isCall) break; + // Pattern-match simple GEPs. + uint64_t Disp = (int32_t)AM.Disp; + unsigned IndexReg = AM.IndexReg; + unsigned Scale = AM.Scale; + gep_type_iterator GTI = gep_type_begin(U); + // Iterate through the indices, folding what we can. Constants can be + // folded, and one dynamic index can be handled, if the scale is supported. + for (User::op_iterator i = U->op_begin() + 1, e = U->op_end(); + i != e; ++i, ++GTI) { + Value *Op = *i; + if (const StructType *STy = dyn_cast(*GTI)) { + const StructLayout *SL = TD.getStructLayout(STy); + unsigned Idx = cast(Op)->getZExtValue(); + Disp += SL->getElementOffset(Idx); + } else { + uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType()); + if (ConstantInt *CI = dyn_cast(Op)) { + // Constant-offset addressing. + Disp += CI->getSExtValue() * S; + } else if (IndexReg == 0 && + (!AM.GV || + !getTargetMachine()->symbolicAddressesAreRIPRel()) && + (S == 1 || S == 2 || S == 4 || S == 8)) { + // Scaled-index addressing. + Scale = S; + IndexReg = getRegForGEPIndex(Op); + if (IndexReg == 0) + return false; + } else + // Unsupported. + goto unsupported_gep; + } + } + // Check for displacement overflow. + if (!isInt32(Disp)) + break; + // Ok, the GEP indices were covered by constant-offset and scaled-index + // addressing. Update the address state and move on to examining the base. + AM.IndexReg = IndexReg; + AM.Scale = Scale; + AM.Disp = (uint32_t)Disp; + return X86SelectAddress(U->getOperand(0), AM, isCall); + unsupported_gep: + // Ok, the GEP indices weren't all covered. + break; + } + } + + // Handle constant address. + if (GlobalValue *GV = dyn_cast(V)) { + // Can't handle alternate code models yet. + if (TM.getCodeModel() != CodeModel::Default && + TM.getCodeModel() != CodeModel::Small) + return false; + + // RIP-relative addresses can't have additional register operands. + if (getTargetMachine()->symbolicAddressesAreRIPRel() && + (AM.Base.Reg != 0 || AM.IndexReg != 0)) + return false; + + // Can't handle TLS yet. + if (GlobalVariable *GVar = dyn_cast(GV)) + if (GVar->isThreadLocal()) + return false; + + // Set up the basic address. + AM.GV = GV; + if (!isCall && + TM.getRelocationModel() == Reloc::PIC_ && + !Subtarget->is64Bit()) + AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(&MF); + + // Emit an extra load if the ABI requires it. + if (Subtarget->GVRequiresExtraLoad(GV, TM, isCall)) { + // Check to see if we've already materialized this + // value in a register in this block. + if (unsigned Reg = LocalValueMap[V]) { + AM.Base.Reg = Reg; + AM.GV = 0; + return true; + } + // Issue load from stub if necessary. + unsigned Opc = 0; + const TargetRegisterClass *RC = NULL; + if (TLI.getPointerTy() == MVT::i32) { + Opc = X86::MOV32rm; + RC = X86::GR32RegisterClass; + } else { + Opc = X86::MOV64rm; + RC = X86::GR64RegisterClass; + } + + X86AddressMode StubAM; + StubAM.Base.Reg = AM.Base.Reg; + StubAM.GV = AM.GV; + unsigned ResultReg = createResultReg(RC); + addFullAddress(BuildMI(MBB, DL, TII.get(Opc), ResultReg), StubAM); + + // Now construct the final address. Note that the Disp, Scale, + // and Index values may already be set here. + AM.Base.Reg = ResultReg; + AM.GV = 0; + + // Prevent loading GV stub multiple times in same MBB. + LocalValueMap[V] = AM.Base.Reg; + } + return true; + } + + // If all else fails, try to materialize the value in a register. + if (!AM.GV || !getTargetMachine()->symbolicAddressesAreRIPRel()) { + if (AM.Base.Reg == 0) { + AM.Base.Reg = getRegForValue(V); + return AM.Base.Reg != 0; + } + if (AM.IndexReg == 0) { + assert(AM.Scale == 1 && "Scale with no index!"); + AM.IndexReg = getRegForValue(V); + return AM.IndexReg != 0; + } + } + + return false; +} + +/// X86SelectStore - Select and emit code to implement store instructions. +bool X86FastISel::X86SelectStore(Instruction* I) { + MVT VT; + if (!isTypeLegal(I->getOperand(0)->getType(), VT)) + return false; + + X86AddressMode AM; + if (!X86SelectAddress(I->getOperand(1), AM, false)) + return false; + + return X86FastEmitStore(VT, I->getOperand(0), AM); +} + +/// X86SelectLoad - Select and emit code to implement load instructions. +/// +bool X86FastISel::X86SelectLoad(Instruction *I) { + MVT VT; + if (!isTypeLegal(I->getType(), VT)) + return false; + + X86AddressMode AM; + if (!X86SelectAddress(I->getOperand(0), AM, false)) + return false; + + unsigned ResultReg = 0; + if (X86FastEmitLoad(VT, AM, ResultReg)) { + UpdateValueMap(I, ResultReg); + return true; + } + return false; +} + +static unsigned X86ChooseCmpOpcode(MVT VT) { + switch (VT.getSimpleVT()) { + default: return 0; + case MVT::i8: return X86::CMP8rr; + case MVT::i16: return X86::CMP16rr; + case MVT::i32: return X86::CMP32rr; + case MVT::i64: return X86::CMP64rr; + case MVT::f32: return X86::UCOMISSrr; + case MVT::f64: return X86::UCOMISDrr; + } +} + +/// X86ChooseCmpImmediateOpcode - If we have a comparison with RHS as the RHS +/// of the comparison, return an opcode that works for the compare (e.g. +/// CMP32ri) otherwise return 0. +static unsigned X86ChooseCmpImmediateOpcode(MVT VT, ConstantInt *RHSC) { + switch (VT.getSimpleVT()) { + // Otherwise, we can't fold the immediate into this comparison. + default: return 0; + case MVT::i8: return X86::CMP8ri; + case MVT::i16: return X86::CMP16ri; + case MVT::i32: return X86::CMP32ri; + case MVT::i64: + // 64-bit comparisons are only valid if the immediate fits in a 32-bit sext + // field. + if ((int)RHSC->getSExtValue() == RHSC->getSExtValue()) + return X86::CMP64ri32; + return 0; + } +} + +bool X86FastISel::X86FastEmitCompare(Value *Op0, Value *Op1, MVT VT) { + unsigned Op0Reg = getRegForValue(Op0); + if (Op0Reg == 0) return false; + + // Handle 'null' like i32/i64 0. + if (isa(Op1)) + Op1 = Constant::getNullValue(TD.getIntPtrType()); + + // We have two options: compare with register or immediate. If the RHS of + // the compare is an immediate that we can fold into this compare, use + // CMPri, otherwise use CMPrr. + if (ConstantInt *Op1C = dyn_cast(Op1)) { + if (unsigned CompareImmOpc = X86ChooseCmpImmediateOpcode(VT, Op1C)) { + BuildMI(MBB, DL, TII.get(CompareImmOpc)).addReg(Op0Reg) + .addImm(Op1C->getSExtValue()); + return true; + } + } + + unsigned CompareOpc = X86ChooseCmpOpcode(VT); + if (CompareOpc == 0) return false; + + unsigned Op1Reg = getRegForValue(Op1); + if (Op1Reg == 0) return false; + BuildMI(MBB, DL, TII.get(CompareOpc)).addReg(Op0Reg).addReg(Op1Reg); + + return true; +} + +bool X86FastISel::X86SelectCmp(Instruction *I) { + CmpInst *CI = cast(I); + + MVT VT; + if (!isTypeLegal(I->getOperand(0)->getType(), VT)) + return false; + + unsigned ResultReg = createResultReg(&X86::GR8RegClass); + unsigned SetCCOpc; + bool SwapArgs; // false -> compare Op0, Op1. true -> compare Op1, Op0. + switch (CI->getPredicate()) { + case CmpInst::FCMP_OEQ: { + if (!X86FastEmitCompare(CI->getOperand(0), CI->getOperand(1), VT)) + return false; + + unsigned EReg = createResultReg(&X86::GR8RegClass); + unsigned NPReg = createResultReg(&X86::GR8RegClass); + BuildMI(MBB, DL, TII.get(X86::SETEr), EReg); + BuildMI(MBB, DL, TII.get(X86::SETNPr), NPReg); + BuildMI(MBB, DL, + TII.get(X86::AND8rr), ResultReg).addReg(NPReg).addReg(EReg); + UpdateValueMap(I, ResultReg); + return true; + } + case CmpInst::FCMP_UNE: { + if (!X86FastEmitCompare(CI->getOperand(0), CI->getOperand(1), VT)) + return false; + + unsigned NEReg = createResultReg(&X86::GR8RegClass); + unsigned PReg = createResultReg(&X86::GR8RegClass); + BuildMI(MBB, DL, TII.get(X86::SETNEr), NEReg); + BuildMI(MBB, DL, TII.get(X86::SETPr), PReg); + BuildMI(MBB, DL, TII.get(X86::OR8rr), ResultReg).addReg(PReg).addReg(NEReg); + UpdateValueMap(I, ResultReg); + return true; + } + case CmpInst::FCMP_OGT: SwapArgs = false; SetCCOpc = X86::SETAr; break; + case CmpInst::FCMP_OGE: SwapArgs = false; SetCCOpc = X86::SETAEr; break; + case CmpInst::FCMP_OLT: SwapArgs = true; SetCCOpc = X86::SETAr; break; + case CmpInst::FCMP_OLE: SwapArgs = true; SetCCOpc = X86::SETAEr; break; + case CmpInst::FCMP_ONE: SwapArgs = false; SetCCOpc = X86::SETNEr; break; + case CmpInst::FCMP_ORD: SwapArgs = false; SetCCOpc = X86::SETNPr; break; + case CmpInst::FCMP_UNO: SwapArgs = false; SetCCOpc = X86::SETPr; break; + case CmpInst::FCMP_UEQ: SwapArgs = false; SetCCOpc = X86::SETEr; break; + case CmpInst::FCMP_UGT: SwapArgs = true; SetCCOpc = X86::SETBr; break; + case CmpInst::FCMP_UGE: SwapArgs = true; SetCCOpc = X86::SETBEr; break; + case CmpInst::FCMP_ULT: SwapArgs = false; SetCCOpc = X86::SETBr; break; + case CmpInst::FCMP_ULE: SwapArgs = false; SetCCOpc = X86::SETBEr; break; + + case CmpInst::ICMP_EQ: SwapArgs = false; SetCCOpc = X86::SETEr; break; + case CmpInst::ICMP_NE: SwapArgs = false; SetCCOpc = X86::SETNEr; break; + case CmpInst::ICMP_UGT: SwapArgs = false; SetCCOpc = X86::SETAr; break; + case CmpInst::ICMP_UGE: SwapArgs = false; SetCCOpc = X86::SETAEr; break; + case CmpInst::ICMP_ULT: SwapArgs = false; SetCCOpc = X86::SETBr; break; + case CmpInst::ICMP_ULE: SwapArgs = false; SetCCOpc = X86::SETBEr; break; + case CmpInst::ICMP_SGT: SwapArgs = false; SetCCOpc = X86::SETGr; break; + case CmpInst::ICMP_SGE: SwapArgs = false; SetCCOpc = X86::SETGEr; break; + case CmpInst::ICMP_SLT: SwapArgs = false; SetCCOpc = X86::SETLr; break; + case CmpInst::ICMP_SLE: SwapArgs = false; SetCCOpc = X86::SETLEr; break; + default: + return false; + } + + Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1); + if (SwapArgs) + std::swap(Op0, Op1); + + // Emit a compare of Op0/Op1. + if (!X86FastEmitCompare(Op0, Op1, VT)) + return false; + + BuildMI(MBB, DL, TII.get(SetCCOpc), ResultReg); + UpdateValueMap(I, ResultReg); + return true; +} + +bool X86FastISel::X86SelectZExt(Instruction *I) { + // Handle zero-extension from i1 to i8, which is common. + if (I->getType() == Type::Int8Ty && + I->getOperand(0)->getType() == Type::Int1Ty) { + unsigned ResultReg = getRegForValue(I->getOperand(0)); + if (ResultReg == 0) return false; + // Set the high bits to zero. + ResultReg = FastEmitZExtFromI1(MVT::i8, ResultReg); + if (ResultReg == 0) return false; + UpdateValueMap(I, ResultReg); + return true; + } + + return false; +} + + +bool X86FastISel::X86SelectBranch(Instruction *I) { + // Unconditional branches are selected by tablegen-generated code. + // Handle a conditional branch. + BranchInst *BI = cast(I); + MachineBasicBlock *TrueMBB = MBBMap[BI->getSuccessor(0)]; + MachineBasicBlock *FalseMBB = MBBMap[BI->getSuccessor(1)]; + + // Fold the common case of a conditional branch with a comparison. + if (CmpInst *CI = dyn_cast(BI->getCondition())) { + if (CI->hasOneUse()) { + MVT VT = TLI.getValueType(CI->getOperand(0)->getType()); + + // Try to take advantage of fallthrough opportunities. + CmpInst::Predicate Predicate = CI->getPredicate(); + if (MBB->isLayoutSuccessor(TrueMBB)) { + std::swap(TrueMBB, FalseMBB); + Predicate = CmpInst::getInversePredicate(Predicate); + } + + bool SwapArgs; // false -> compare Op0, Op1. true -> compare Op1, Op0. + unsigned BranchOpc; // Opcode to jump on, e.g. "X86::JA" + + switch (Predicate) { + case CmpInst::FCMP_OEQ: + std::swap(TrueMBB, FalseMBB); + Predicate = CmpInst::FCMP_UNE; + // FALL THROUGH + case CmpInst::FCMP_UNE: SwapArgs = false; BranchOpc = X86::JNE; break; + case CmpInst::FCMP_OGT: SwapArgs = false; BranchOpc = X86::JA; break; + case CmpInst::FCMP_OGE: SwapArgs = false; BranchOpc = X86::JAE; break; + case CmpInst::FCMP_OLT: SwapArgs = true; BranchOpc = X86::JA; break; + case CmpInst::FCMP_OLE: SwapArgs = true; BranchOpc = X86::JAE; break; + case CmpInst::FCMP_ONE: SwapArgs = false; BranchOpc = X86::JNE; break; + case CmpInst::FCMP_ORD: SwapArgs = false; BranchOpc = X86::JNP; break; + case CmpInst::FCMP_UNO: SwapArgs = false; BranchOpc = X86::JP; break; + case CmpInst::FCMP_UEQ: SwapArgs = false; BranchOpc = X86::JE; break; + case CmpInst::FCMP_UGT: SwapArgs = true; BranchOpc = X86::JB; break; + case CmpInst::FCMP_UGE: SwapArgs = true; BranchOpc = X86::JBE; break; + case CmpInst::FCMP_ULT: SwapArgs = false; BranchOpc = X86::JB; break; + case CmpInst::FCMP_ULE: SwapArgs = false; BranchOpc = X86::JBE; break; + + case CmpInst::ICMP_EQ: SwapArgs = false; BranchOpc = X86::JE; break; + case CmpInst::ICMP_NE: SwapArgs = false; BranchOpc = X86::JNE; break; + case CmpInst::ICMP_UGT: SwapArgs = false; BranchOpc = X86::JA; break; + case CmpInst::ICMP_UGE: SwapArgs = false; BranchOpc = X86::JAE; break; + case CmpInst::ICMP_ULT: SwapArgs = false; BranchOpc = X86::JB; break; + case CmpInst::ICMP_ULE: SwapArgs = false; BranchOpc = X86::JBE; break; + case CmpInst::ICMP_SGT: SwapArgs = false; BranchOpc = X86::JG; break; + case CmpInst::ICMP_SGE: SwapArgs = false; BranchOpc = X86::JGE; break; + case CmpInst::ICMP_SLT: SwapArgs = false; BranchOpc = X86::JL; break; + case CmpInst::ICMP_SLE: SwapArgs = false; BranchOpc = X86::JLE; break; + default: + return false; + } + + Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1); + if (SwapArgs) + std::swap(Op0, Op1); + + // Emit a compare of the LHS and RHS, setting the flags. + if (!X86FastEmitCompare(Op0, Op1, VT)) + return false; + + BuildMI(MBB, DL, TII.get(BranchOpc)).addMBB(TrueMBB); + + if (Predicate == CmpInst::FCMP_UNE) { + // X86 requires a second branch to handle UNE (and OEQ, + // which is mapped to UNE above). + BuildMI(MBB, DL, TII.get(X86::JP)).addMBB(TrueMBB); + } + + FastEmitBranch(FalseMBB); + MBB->addSuccessor(TrueMBB); + return true; + } + } else if (ExtractValueInst *EI = + dyn_cast(BI->getCondition())) { + // Check to see if the branch instruction is from an "arithmetic with + // overflow" intrinsic. The main way these intrinsics are used is: + // + // %t = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2) + // %sum = extractvalue { i32, i1 } %t, 0 + // %obit = extractvalue { i32, i1 } %t, 1 + // br i1 %obit, label %overflow, label %normal + // + // The %sum and %obit are converted in an ADD and a SETO/SETB before + // reaching the branch. Therefore, we search backwards through the MBB + // looking for the SETO/SETB instruction. If an instruction modifies the + // EFLAGS register before we reach the SETO/SETB instruction, then we can't + // convert the branch into a JO/JB instruction. + if (IntrinsicInst *CI = dyn_cast(EI->getAggregateOperand())){ + if (CI->getIntrinsicID() == Intrinsic::sadd_with_overflow || + CI->getIntrinsicID() == Intrinsic::uadd_with_overflow) { + const MachineInstr *SetMI = 0; + unsigned Reg = lookUpRegForValue(EI); + + for (MachineBasicBlock::const_reverse_iterator + RI = MBB->rbegin(), RE = MBB->rend(); RI != RE; ++RI) { + const MachineInstr &MI = *RI; + + if (MI.modifiesRegister(Reg)) { + unsigned Src, Dst, SrcSR, DstSR; + + if (getInstrInfo()->isMoveInstr(MI, Src, Dst, SrcSR, DstSR)) { + Reg = Src; + continue; + } + + SetMI = &MI; + break; + } + + const TargetInstrDesc &TID = MI.getDesc(); + if (TID.hasUnmodeledSideEffects() || + TID.hasImplicitDefOfPhysReg(X86::EFLAGS)) + break; + } + + if (SetMI) { + unsigned OpCode = SetMI->getOpcode(); + + if (OpCode == X86::SETOr || OpCode == X86::SETBr) { + BuildMI(MBB, DL, TII.get(OpCode == X86::SETOr ? X86::JO : X86::JB)) + .addMBB(TrueMBB); + FastEmitBranch(FalseMBB); + MBB->addSuccessor(TrueMBB); + return true; + } + } + } + } + } + + // Otherwise do a clumsy setcc and re-test it. + unsigned OpReg = getRegForValue(BI->getCondition()); + if (OpReg == 0) return false; + + BuildMI(MBB, DL, TII.get(X86::TEST8rr)).addReg(OpReg).addReg(OpReg); + BuildMI(MBB, DL, TII.get(X86::JNE)).addMBB(TrueMBB); + FastEmitBranch(FalseMBB); + MBB->addSuccessor(TrueMBB); + return true; +} + +bool X86FastISel::X86SelectShift(Instruction *I) { + unsigned CReg = 0, OpReg = 0, OpImm = 0; + const TargetRegisterClass *RC = NULL; + if (I->getType() == Type::Int8Ty) { + CReg = X86::CL; + RC = &X86::GR8RegClass; + switch (I->getOpcode()) { + case Instruction::LShr: OpReg = X86::SHR8rCL; OpImm = X86::SHR8ri; break; + case Instruction::AShr: OpReg = X86::SAR8rCL; OpImm = X86::SAR8ri; break; + case Instruction::Shl: OpReg = X86::SHL8rCL; OpImm = X86::SHL8ri; break; + default: return false; + } + } else if (I->getType() == Type::Int16Ty) { + CReg = X86::CX; + RC = &X86::GR16RegClass; + switch (I->getOpcode()) { + case Instruction::LShr: OpReg = X86::SHR16rCL; OpImm = X86::SHR16ri; break; + case Instruction::AShr: OpReg = X86::SAR16rCL; OpImm = X86::SAR16ri; break; + case Instruction::Shl: OpReg = X86::SHL16rCL; OpImm = X86::SHL16ri; break; + default: return false; + } + } else if (I->getType() == Type::Int32Ty) { + CReg = X86::ECX; + RC = &X86::GR32RegClass; + switch (I->getOpcode()) { + case Instruction::LShr: OpReg = X86::SHR32rCL; OpImm = X86::SHR32ri; break; + case Instruction::AShr: OpReg = X86::SAR32rCL; OpImm = X86::SAR32ri; break; + case Instruction::Shl: OpReg = X86::SHL32rCL; OpImm = X86::SHL32ri; break; + default: return false; + } + } else if (I->getType() == Type::Int64Ty) { + CReg = X86::RCX; + RC = &X86::GR64RegClass; + switch (I->getOpcode()) { + case Instruction::LShr: OpReg = X86::SHR64rCL; OpImm = X86::SHR64ri; break; + case Instruction::AShr: OpReg = X86::SAR64rCL; OpImm = X86::SAR64ri; break; + case Instruction::Shl: OpReg = X86::SHL64rCL; OpImm = X86::SHL64ri; break; + default: return false; + } + } else { + return false; + } + + MVT VT = TLI.getValueType(I->getType(), /*HandleUnknown=*/true); + if (VT == MVT::Other || !isTypeLegal(I->getType(), VT)) + return false; + + unsigned Op0Reg = getRegForValue(I->getOperand(0)); + if (Op0Reg == 0) return false; + + // Fold immediate in shl(x,3). + if (ConstantInt *CI = dyn_cast(I->getOperand(1))) { + unsigned ResultReg = createResultReg(RC); + BuildMI(MBB, DL, TII.get(OpImm), + ResultReg).addReg(Op0Reg).addImm(CI->getZExtValue() & 0xff); + UpdateValueMap(I, ResultReg); + return true; + } + + unsigned Op1Reg = getRegForValue(I->getOperand(1)); + if (Op1Reg == 0) return false; + TII.copyRegToReg(*MBB, MBB->end(), CReg, Op1Reg, RC, RC); + + // The shift instruction uses X86::CL. If we defined a super-register + // of X86::CL, emit an EXTRACT_SUBREG to precisely describe what + // we're doing here. + if (CReg != X86::CL) + BuildMI(MBB, DL, TII.get(TargetInstrInfo::EXTRACT_SUBREG), X86::CL) + .addReg(CReg).addImm(X86::SUBREG_8BIT); + + unsigned ResultReg = createResultReg(RC); + BuildMI(MBB, DL, TII.get(OpReg), ResultReg).addReg(Op0Reg); + UpdateValueMap(I, ResultReg); + return true; +} + +bool X86FastISel::X86SelectSelect(Instruction *I) { + MVT VT = TLI.getValueType(I->getType(), /*HandleUnknown=*/true); + if (VT == MVT::Other || !isTypeLegal(I->getType(), VT)) + return false; + + unsigned Opc = 0; + const TargetRegisterClass *RC = NULL; + if (VT.getSimpleVT() == MVT::i16) { + Opc = X86::CMOVE16rr; + RC = &X86::GR16RegClass; + } else if (VT.getSimpleVT() == MVT::i32) { + Opc = X86::CMOVE32rr; + RC = &X86::GR32RegClass; + } else if (VT.getSimpleVT() == MVT::i64) { + Opc = X86::CMOVE64rr; + RC = &X86::GR64RegClass; + } else { + return false; + } + + unsigned Op0Reg = getRegForValue(I->getOperand(0)); + if (Op0Reg == 0) return false; + unsigned Op1Reg = getRegForValue(I->getOperand(1)); + if (Op1Reg == 0) return false; + unsigned Op2Reg = getRegForValue(I->getOperand(2)); + if (Op2Reg == 0) return false; + + BuildMI(MBB, DL, TII.get(X86::TEST8rr)).addReg(Op0Reg).addReg(Op0Reg); + unsigned ResultReg = createResultReg(RC); + BuildMI(MBB, DL, TII.get(Opc), ResultReg).addReg(Op1Reg).addReg(Op2Reg); + UpdateValueMap(I, ResultReg); + return true; +} + +bool X86FastISel::X86SelectFPExt(Instruction *I) { + // fpext from float to double. + if (Subtarget->hasSSE2() && I->getType() == Type::DoubleTy) { + Value *V = I->getOperand(0); + if (V->getType() == Type::FloatTy) { + unsigned OpReg = getRegForValue(V); + if (OpReg == 0) return false; + unsigned ResultReg = createResultReg(X86::FR64RegisterClass); + BuildMI(MBB, DL, TII.get(X86::CVTSS2SDrr), ResultReg).addReg(OpReg); + UpdateValueMap(I, ResultReg); + return true; + } + } + + return false; +} + +bool X86FastISel::X86SelectFPTrunc(Instruction *I) { + if (Subtarget->hasSSE2()) { + if (I->getType() == Type::FloatTy) { + Value *V = I->getOperand(0); + if (V->getType() == Type::DoubleTy) { + unsigned OpReg = getRegForValue(V); + if (OpReg == 0) return false; + unsigned ResultReg = createResultReg(X86::FR32RegisterClass); + BuildMI(MBB, DL, TII.get(X86::CVTSD2SSrr), ResultReg).addReg(OpReg); + UpdateValueMap(I, ResultReg); + return true; + } + } + } + + return false; +} + +bool X86FastISel::X86SelectTrunc(Instruction *I) { + if (Subtarget->is64Bit()) + // All other cases should be handled by the tblgen generated code. + return false; + MVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()); + MVT DstVT = TLI.getValueType(I->getType()); + + // This code only handles truncation to byte right now. + if (DstVT != MVT::i8 && DstVT != MVT::i1) + // All other cases should be handled by the tblgen generated code. + return false; + if (SrcVT != MVT::i16 && SrcVT != MVT::i32) + // All other cases should be handled by the tblgen generated code. + return false; + + unsigned InputReg = getRegForValue(I->getOperand(0)); + if (!InputReg) + // Unhandled operand. Halt "fast" selection and bail. + return false; + + // First issue a copy to GR16_ABCD or GR32_ABCD. + unsigned CopyOpc = (SrcVT == MVT::i16) ? X86::MOV16rr : X86::MOV32rr; + const TargetRegisterClass *CopyRC = (SrcVT == MVT::i16) + ? X86::GR16_ABCDRegisterClass : X86::GR32_ABCDRegisterClass; + unsigned CopyReg = createResultReg(CopyRC); + BuildMI(MBB, DL, TII.get(CopyOpc), CopyReg).addReg(InputReg); + + // Then issue an extract_subreg. + unsigned ResultReg = FastEmitInst_extractsubreg(MVT::i8, + CopyReg, X86::SUBREG_8BIT); + if (!ResultReg) + return false; + + UpdateValueMap(I, ResultReg); + return true; +} + +bool X86FastISel::X86SelectExtractValue(Instruction *I) { + ExtractValueInst *EI = cast(I); + Value *Agg = EI->getAggregateOperand(); + + if (IntrinsicInst *CI = dyn_cast(Agg)) { + switch (CI->getIntrinsicID()) { + default: break; + case Intrinsic::sadd_with_overflow: + case Intrinsic::uadd_with_overflow: + // Cheat a little. We know that the registers for "add" and "seto" are + // allocated sequentially. However, we only keep track of the register + // for "add" in the value map. Use extractvalue's index to get the + // correct register for "seto". + UpdateValueMap(I, lookUpRegForValue(Agg) + *EI->idx_begin()); + return true; + } + } + + return false; +} + +bool X86FastISel::X86VisitIntrinsicCall(IntrinsicInst &I) { + // FIXME: Handle more intrinsics. + switch (I.getIntrinsicID()) { + default: return false; + case Intrinsic::sadd_with_overflow: + case Intrinsic::uadd_with_overflow: { + // Replace "add with overflow" intrinsics with an "add" instruction followed + // by a seto/setc instruction. Later on, when the "extractvalue" + // instructions are encountered, we use the fact that two registers were + // created sequentially to get the correct registers for the "sum" and the + // "overflow bit". + const Function *Callee = I.getCalledFunction(); + const Type *RetTy = + cast(Callee->getReturnType())->getTypeAtIndex(unsigned(0)); + + MVT VT; + if (!isTypeLegal(RetTy, VT)) + return false; + + Value *Op1 = I.getOperand(1); + Value *Op2 = I.getOperand(2); + unsigned Reg1 = getRegForValue(Op1); + unsigned Reg2 = getRegForValue(Op2); + + if (Reg1 == 0 || Reg2 == 0) + // FIXME: Handle values *not* in registers. + return false; + + unsigned OpC = 0; + if (VT == MVT::i32) + OpC = X86::ADD32rr; + else if (VT == MVT::i64) + OpC = X86::ADD64rr; + else + return false; + + unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); + BuildMI(MBB, DL, TII.get(OpC), ResultReg).addReg(Reg1).addReg(Reg2); + unsigned DestReg1 = UpdateValueMap(&I, ResultReg); + + // If the add with overflow is an intra-block value then we just want to + // create temporaries for it like normal. If it is a cross-block value then + // UpdateValueMap will return the cross-block register used. Since we + // *really* want the value to be live in the register pair known by + // UpdateValueMap, we have to use DestReg1+1 as the destination register in + // the cross block case. In the non-cross-block case, we should just make + // another register for the value. + if (DestReg1 != ResultReg) + ResultReg = DestReg1+1; + else + ResultReg = createResultReg(TLI.getRegClassFor(MVT::i8)); + + unsigned Opc = X86::SETBr; + if (I.getIntrinsicID() == Intrinsic::sadd_with_overflow) + Opc = X86::SETOr; + BuildMI(MBB, DL, TII.get(Opc), ResultReg); + return true; + } + } +} + +bool X86FastISel::X86SelectCall(Instruction *I) { + CallInst *CI = cast(I); + Value *Callee = I->getOperand(0); + + // Can't handle inline asm yet. + if (isa(Callee)) + return false; + + // Handle intrinsic calls. + if (IntrinsicInst *II = dyn_cast(CI)) + return X86VisitIntrinsicCall(*II); + + // Handle only C and fastcc calling conventions for now. + CallSite CS(CI); + unsigned CC = CS.getCallingConv(); + if (CC != CallingConv::C && + CC != CallingConv::Fast && + CC != CallingConv::X86_FastCall) + return false; + + // On X86, -tailcallopt changes the fastcc ABI. FastISel doesn't + // handle this for now. + if (CC == CallingConv::Fast && PerformTailCallOpt) + return false; + + // Let SDISel handle vararg functions. + const PointerType *PT = cast(CS.getCalledValue()->getType()); + const FunctionType *FTy = cast(PT->getElementType()); + if (FTy->isVarArg()) + return false; + + // Handle *simple* calls for now. + const Type *RetTy = CS.getType(); + MVT RetVT; + if (RetTy == Type::VoidTy) + RetVT = MVT::isVoid; + else if (!isTypeLegal(RetTy, RetVT, true)) + return false; + + // Materialize callee address in a register. FIXME: GV address can be + // handled with a CALLpcrel32 instead. + X86AddressMode CalleeAM; + if (!X86SelectAddress(Callee, CalleeAM, true)) + return false; + unsigned CalleeOp = 0; + GlobalValue *GV = 0; + if (CalleeAM.Base.Reg != 0) { + assert(CalleeAM.GV == 0); + CalleeOp = CalleeAM.Base.Reg; + } else if (CalleeAM.GV != 0) { + assert(CalleeAM.GV != 0); + GV = CalleeAM.GV; + } else + return false; + + // Allow calls which produce i1 results. + bool AndToI1 = false; + if (RetVT == MVT::i1) { + RetVT = MVT::i8; + AndToI1 = true; + } + + // Deal with call operands first. + SmallVector ArgVals; + SmallVector Args; + SmallVector ArgVTs; + SmallVector ArgFlags; + Args.reserve(CS.arg_size()); + ArgVals.reserve(CS.arg_size()); + ArgVTs.reserve(CS.arg_size()); + ArgFlags.reserve(CS.arg_size()); + for (CallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); + i != e; ++i) { + unsigned Arg = getRegForValue(*i); + if (Arg == 0) + return false; + ISD::ArgFlagsTy Flags; + unsigned AttrInd = i - CS.arg_begin() + 1; + if (CS.paramHasAttr(AttrInd, Attribute::SExt)) + Flags.setSExt(); + if (CS.paramHasAttr(AttrInd, Attribute::ZExt)) + Flags.setZExt(); + + // FIXME: Only handle *easy* calls for now. + if (CS.paramHasAttr(AttrInd, Attribute::InReg) || + CS.paramHasAttr(AttrInd, Attribute::StructRet) || + CS.paramHasAttr(AttrInd, Attribute::Nest) || + CS.paramHasAttr(AttrInd, Attribute::ByVal)) + return false; + + const Type *ArgTy = (*i)->getType(); + MVT ArgVT; + if (!isTypeLegal(ArgTy, ArgVT)) + return false; + unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy); + Flags.setOrigAlign(OriginalAlignment); + + Args.push_back(Arg); + ArgVals.push_back(*i); + ArgVTs.push_back(ArgVT); + ArgFlags.push_back(Flags); + } + + // Analyze operands of the call, assigning locations to each operand. + SmallVector ArgLocs; + CCState CCInfo(CC, false, TM, ArgLocs); + CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CCAssignFnForCall(CC)); + + // Get a count of how many bytes are to be pushed on the stack. + unsigned NumBytes = CCInfo.getNextStackOffset(); + + // Issue CALLSEQ_START + unsigned AdjStackDown = TM.getRegisterInfo()->getCallFrameSetupOpcode(); + BuildMI(MBB, DL, TII.get(AdjStackDown)).addImm(NumBytes); + + // Process argument: walk the register/memloc assignments, inserting + // copies / loads. + SmallVector RegArgs; + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + unsigned Arg = Args[VA.getValNo()]; + MVT ArgVT = ArgVTs[VA.getValNo()]; + + // Promote the value if needed. + switch (VA.getLocInfo()) { + default: assert(0 && "Unknown loc info!"); + case CCValAssign::Full: break; + case CCValAssign::SExt: { + bool Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), + Arg, ArgVT, Arg); + assert(Emitted && "Failed to emit a sext!"); Emitted=Emitted; + Emitted = true; + ArgVT = VA.getLocVT(); + break; + } + case CCValAssign::ZExt: { + bool Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), + Arg, ArgVT, Arg); + assert(Emitted && "Failed to emit a zext!"); Emitted=Emitted; + Emitted = true; + ArgVT = VA.getLocVT(); + break; + } + case CCValAssign::AExt: { + bool Emitted = X86FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(), + Arg, ArgVT, Arg); + if (!Emitted) + Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), + Arg, ArgVT, Arg); + if (!Emitted) + Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), + Arg, ArgVT, Arg); + + assert(Emitted && "Failed to emit a aext!"); Emitted=Emitted; + ArgVT = VA.getLocVT(); + break; + } + } + + if (VA.isRegLoc()) { + TargetRegisterClass* RC = TLI.getRegClassFor(ArgVT); + bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), VA.getLocReg(), + Arg, RC, RC); + assert(Emitted && "Failed to emit a copy instruction!"); Emitted=Emitted; + Emitted = true; + RegArgs.push_back(VA.getLocReg()); + } else { + unsigned LocMemOffset = VA.getLocMemOffset(); + X86AddressMode AM; + AM.Base.Reg = StackPtr; + AM.Disp = LocMemOffset; + Value *ArgVal = ArgVals[VA.getValNo()]; + + // If this is a really simple value, emit this with the Value* version of + // X86FastEmitStore. If it isn't simple, we don't want to do this, as it + // can cause us to reevaluate the argument. + if (isa(ArgVal) || isa(ArgVal)) + X86FastEmitStore(ArgVT, ArgVal, AM); + else + X86FastEmitStore(ArgVT, Arg, AM); + } + } + + // ELF / PIC requires GOT in the EBX register before function calls via PLT + // GOT pointer. + if (!Subtarget->is64Bit() && + TM.getRelocationModel() == Reloc::PIC_ && + Subtarget->isPICStyleGOT()) { + TargetRegisterClass *RC = X86::GR32RegisterClass; + unsigned Base = getInstrInfo()->getGlobalBaseReg(&MF); + bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), X86::EBX, Base, RC, RC); + assert(Emitted && "Failed to emit a copy instruction!"); Emitted=Emitted; + Emitted = true; + } + + // Issue the call. + unsigned CallOpc = CalleeOp + ? (Subtarget->is64Bit() ? X86::CALL64r : X86::CALL32r) + : (Subtarget->is64Bit() ? X86::CALL64pcrel32 : X86::CALLpcrel32); + MachineInstrBuilder MIB = CalleeOp + ? BuildMI(MBB, DL, TII.get(CallOpc)).addReg(CalleeOp) + : BuildMI(MBB, DL, TII.get(CallOpc)).addGlobalAddress(GV); + + // Add an implicit use GOT pointer in EBX. + if (!Subtarget->is64Bit() && + TM.getRelocationModel() == Reloc::PIC_ && + Subtarget->isPICStyleGOT()) + MIB.addReg(X86::EBX); + + // Add implicit physical register uses to the call. + for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) + MIB.addReg(RegArgs[i]); + + // Issue CALLSEQ_END + unsigned AdjStackUp = TM.getRegisterInfo()->getCallFrameDestroyOpcode(); + BuildMI(MBB, DL, TII.get(AdjStackUp)).addImm(NumBytes).addImm(0); + + // Now handle call return value (if any). + if (RetVT.getSimpleVT() != MVT::isVoid) { + SmallVector RVLocs; + CCState CCInfo(CC, false, TM, RVLocs); + CCInfo.AnalyzeCallResult(RetVT, RetCC_X86); + + // Copy all of the result registers out of their specified physreg. + assert(RVLocs.size() == 1 && "Can't handle multi-value calls!"); + MVT CopyVT = RVLocs[0].getValVT(); + TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT); + TargetRegisterClass *SrcRC = DstRC; + + // If this is a call to a function that returns an fp value on the x87 fp + // stack, but where we prefer to use the value in xmm registers, copy it + // out as F80 and use a truncate to move it from fp stack reg to xmm reg. + if ((RVLocs[0].getLocReg() == X86::ST0 || + RVLocs[0].getLocReg() == X86::ST1) && + isScalarFPTypeInSSEReg(RVLocs[0].getValVT())) { + CopyVT = MVT::f80; + SrcRC = X86::RSTRegisterClass; + DstRC = X86::RFP80RegisterClass; + } + + unsigned ResultReg = createResultReg(DstRC); + bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, + RVLocs[0].getLocReg(), DstRC, SrcRC); + assert(Emitted && "Failed to emit a copy instruction!"); Emitted=Emitted; + Emitted = true; + if (CopyVT != RVLocs[0].getValVT()) { + // Round the F80 the right size, which also moves to the appropriate xmm + // register. This is accomplished by storing the F80 value in memory and + // then loading it back. Ewww... + MVT ResVT = RVLocs[0].getValVT(); + unsigned Opc = ResVT == MVT::f32 ? X86::ST_Fp80m32 : X86::ST_Fp80m64; + unsigned MemSize = ResVT.getSizeInBits()/8; + int FI = MFI.CreateStackObject(MemSize, MemSize); + addFrameReference(BuildMI(MBB, DL, TII.get(Opc)), FI).addReg(ResultReg); + DstRC = ResVT == MVT::f32 + ? X86::FR32RegisterClass : X86::FR64RegisterClass; + Opc = ResVT == MVT::f32 ? X86::MOVSSrm : X86::MOVSDrm; + ResultReg = createResultReg(DstRC); + addFrameReference(BuildMI(MBB, DL, TII.get(Opc), ResultReg), FI); + } + + if (AndToI1) { + // Mask out all but lowest bit for some call which produces an i1. + unsigned AndResult = createResultReg(X86::GR8RegisterClass); + BuildMI(MBB, DL, + TII.get(X86::AND8ri), AndResult).addReg(ResultReg).addImm(1); + ResultReg = AndResult; + } + + UpdateValueMap(I, ResultReg); + } + + return true; +} + + +bool +X86FastISel::TargetSelectInstruction(Instruction *I) { + switch (I->getOpcode()) { + default: break; + case Instruction::Load: + return X86SelectLoad(I); + case Instruction::Store: + return X86SelectStore(I); + case Instruction::ICmp: + case Instruction::FCmp: + return X86SelectCmp(I); + case Instruction::ZExt: + return X86SelectZExt(I); + case Instruction::Br: + return X86SelectBranch(I); + case Instruction::Call: + return X86SelectCall(I); + case Instruction::LShr: + case Instruction::AShr: + case Instruction::Shl: + return X86SelectShift(I); + case Instruction::Select: + return X86SelectSelect(I); + case Instruction::Trunc: + return X86SelectTrunc(I); + case Instruction::FPExt: + return X86SelectFPExt(I); + case Instruction::FPTrunc: + return X86SelectFPTrunc(I); + case Instruction::ExtractValue: + return X86SelectExtractValue(I); + case Instruction::IntToPtr: // Deliberate fall-through. + case Instruction::PtrToInt: { + MVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()); + MVT DstVT = TLI.getValueType(I->getType()); + if (DstVT.bitsGT(SrcVT)) + return X86SelectZExt(I); + if (DstVT.bitsLT(SrcVT)) + return X86SelectTrunc(I); + unsigned Reg = getRegForValue(I->getOperand(0)); + if (Reg == 0) return false; + UpdateValueMap(I, Reg); + return true; + } + } + + return false; +} + +unsigned X86FastISel::TargetMaterializeConstant(Constant *C) { + MVT VT; + if (!isTypeLegal(C->getType(), VT)) + return false; + + // Get opcode and regclass of the output for the given load instruction. + unsigned Opc = 0; + const TargetRegisterClass *RC = NULL; + switch (VT.getSimpleVT()) { + default: return false; + case MVT::i8: + Opc = X86::MOV8rm; + RC = X86::GR8RegisterClass; + break; + case MVT::i16: + Opc = X86::MOV16rm; + RC = X86::GR16RegisterClass; + break; + case MVT::i32: + Opc = X86::MOV32rm; + RC = X86::GR32RegisterClass; + break; + case MVT::i64: + // Must be in x86-64 mode. + Opc = X86::MOV64rm; + RC = X86::GR64RegisterClass; + break; + case MVT::f32: + if (Subtarget->hasSSE1()) { + Opc = X86::MOVSSrm; + RC = X86::FR32RegisterClass; + } else { + Opc = X86::LD_Fp32m; + RC = X86::RFP32RegisterClass; + } + break; + case MVT::f64: + if (Subtarget->hasSSE2()) { + Opc = X86::MOVSDrm; + RC = X86::FR64RegisterClass; + } else { + Opc = X86::LD_Fp64m; + RC = X86::RFP64RegisterClass; + } + break; + case MVT::f80: + // No f80 support yet. + return false; + } + + // Materialize addresses with LEA instructions. + if (isa(C)) { + X86AddressMode AM; + if (X86SelectAddress(C, AM, false)) { + if (TLI.getPointerTy() == MVT::i32) + Opc = X86::LEA32r; + else + Opc = X86::LEA64r; + unsigned ResultReg = createResultReg(RC); + addLeaAddress(BuildMI(MBB, DL, TII.get(Opc), ResultReg), AM); + return ResultReg; + } + return 0; + } + + // MachineConstantPool wants an explicit alignment. + unsigned Align = TD.getPrefTypeAlignment(C->getType()); + if (Align == 0) { + // Alignment of vector types. FIXME! + Align = TD.getTypeAllocSize(C->getType()); + } + + // x86-32 PIC requires a PIC base register for constant pools. + unsigned PICBase = 0; + if (TM.getRelocationModel() == Reloc::PIC_ && + !Subtarget->is64Bit()) + PICBase = getInstrInfo()->getGlobalBaseReg(&MF); + + // Create the load from the constant pool. + unsigned MCPOffset = MCP.getConstantPoolIndex(C, Align); + unsigned ResultReg = createResultReg(RC); + addConstantPoolReference(BuildMI(MBB, DL, TII.get(Opc), ResultReg), MCPOffset, + PICBase); + + return ResultReg; +} + +unsigned X86FastISel::TargetMaterializeAlloca(AllocaInst *C) { + // Fail on dynamic allocas. At this point, getRegForValue has already + // checked its CSE maps, so if we're here trying to handle a dynamic + // alloca, we're not going to succeed. X86SelectAddress has a + // check for dynamic allocas, because it's called directly from + // various places, but TargetMaterializeAlloca also needs a check + // in order to avoid recursion between getRegForValue, + // X86SelectAddrss, and TargetMaterializeAlloca. + if (!StaticAllocaMap.count(C)) + return 0; + + X86AddressMode AM; + if (!X86SelectAddress(C, AM, false)) + return 0; + unsigned Opc = Subtarget->is64Bit() ? X86::LEA64r : X86::LEA32r; + TargetRegisterClass* RC = TLI.getRegClassFor(TLI.getPointerTy()); + unsigned ResultReg = createResultReg(RC); + addLeaAddress(BuildMI(MBB, DL, TII.get(Opc), ResultReg), AM); + return ResultReg; +} + +namespace llvm { + llvm::FastISel *X86::createFastISel(MachineFunction &mf, + MachineModuleInfo *mmi, + DwarfWriter *dw, + DenseMap &vm, + DenseMap &bm, + DenseMap &am +#ifndef NDEBUG + , SmallSet &cil +#endif + ) { + return new X86FastISel(mf, mmi, dw, vm, bm, am +#ifndef NDEBUG + , cil +#endif + ); + } +} diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp new file mode 100644 index 000000000000..0f2fbcc98694 --- /dev/null +++ b/lib/Target/X86/X86FloatingPoint.cpp @@ -0,0 +1,1187 @@ +//===-- X86FloatingPoint.cpp - Floating point Reg -> Stack converter ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the pass which converts floating point instructions from +// virtual registers into register stack instructions. This pass uses live +// variable information to indicate where the FPn registers are used and their +// lifetimes. +// +// This pass is hampered by the lack of decent CFG manipulation routines for +// machine code. In particular, this wants to be able to split critical edges +// as necessary, traverse the machine basic block CFG in depth-first order, and +// allow there to be multiple machine basic blocks for each LLVM basicblock +// (needed for critical edge splitting). +// +// In particular, this pass currently barfs on critical edges. Because of this, +// it requires the instruction selector to insert FP_REG_KILL instructions on +// the exits of any basic block that has critical edges going from it, or which +// branch to a critical basic block. +// +// FIXME: this is not implemented yet. The stackifier pass only works on local +// basic blocks. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "x86-codegen" +#include "X86.h" +#include "X86InstrInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Compiler.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +#include +using namespace llvm; + +STATISTIC(NumFXCH, "Number of fxch instructions inserted"); +STATISTIC(NumFP , "Number of floating point instructions"); + +namespace { + struct VISIBILITY_HIDDEN FPS : public MachineFunctionPass { + static char ID; + FPS() : MachineFunctionPass(&ID) {} + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addPreservedID(MachineLoopInfoID); + AU.addPreservedID(MachineDominatorsID); + MachineFunctionPass::getAnalysisUsage(AU); + } + + virtual bool runOnMachineFunction(MachineFunction &MF); + + virtual const char *getPassName() const { return "X86 FP Stackifier"; } + + private: + const TargetInstrInfo *TII; // Machine instruction info. + MachineBasicBlock *MBB; // Current basic block + unsigned Stack[8]; // FP Registers in each stack slot... + unsigned RegMap[8]; // Track which stack slot contains each register + unsigned StackTop; // The current top of the FP stack. + + void dumpStack() const { + cerr << "Stack contents:"; + for (unsigned i = 0; i != StackTop; ++i) { + cerr << " FP" << Stack[i]; + assert(RegMap[Stack[i]] == i && "Stack[] doesn't match RegMap[]!"); + } + cerr << "\n"; + } + private: + /// isStackEmpty - Return true if the FP stack is empty. + bool isStackEmpty() const { + return StackTop == 0; + } + + // getSlot - Return the stack slot number a particular register number is + // in. + unsigned getSlot(unsigned RegNo) const { + assert(RegNo < 8 && "Regno out of range!"); + return RegMap[RegNo]; + } + + // getStackEntry - Return the X86::FP register in register ST(i). + unsigned getStackEntry(unsigned STi) const { + assert(STi < StackTop && "Access past stack top!"); + return Stack[StackTop-1-STi]; + } + + // getSTReg - Return the X86::ST(i) register which contains the specified + // FP register. + unsigned getSTReg(unsigned RegNo) const { + return StackTop - 1 - getSlot(RegNo) + llvm::X86::ST0; + } + + // pushReg - Push the specified FP register onto the stack. + void pushReg(unsigned Reg) { + assert(Reg < 8 && "Register number out of range!"); + assert(StackTop < 8 && "Stack overflow!"); + Stack[StackTop] = Reg; + RegMap[Reg] = StackTop++; + } + + bool isAtTop(unsigned RegNo) const { return getSlot(RegNo) == StackTop-1; } + void moveToTop(unsigned RegNo, MachineBasicBlock::iterator I) { + MachineInstr *MI = I; + DebugLoc dl = MI->getDebugLoc(); + if (isAtTop(RegNo)) return; + + unsigned STReg = getSTReg(RegNo); + unsigned RegOnTop = getStackEntry(0); + + // Swap the slots the regs are in. + std::swap(RegMap[RegNo], RegMap[RegOnTop]); + + // Swap stack slot contents. + assert(RegMap[RegOnTop] < StackTop); + std::swap(Stack[RegMap[RegOnTop]], Stack[StackTop-1]); + + // Emit an fxch to update the runtime processors version of the state. + BuildMI(*MBB, I, dl, TII->get(X86::XCH_F)).addReg(STReg); + NumFXCH++; + } + + void duplicateToTop(unsigned RegNo, unsigned AsReg, MachineInstr *I) { + DebugLoc dl = I->getDebugLoc(); + unsigned STReg = getSTReg(RegNo); + pushReg(AsReg); // New register on top of stack + + BuildMI(*MBB, I, dl, TII->get(X86::LD_Frr)).addReg(STReg); + } + + // popStackAfter - Pop the current value off of the top of the FP stack + // after the specified instruction. + void popStackAfter(MachineBasicBlock::iterator &I); + + // freeStackSlotAfter - Free the specified register from the register stack, + // so that it is no longer in a register. If the register is currently at + // the top of the stack, we just pop the current instruction, otherwise we + // store the current top-of-stack into the specified slot, then pop the top + // of stack. + void freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned Reg); + + bool processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB); + + void handleZeroArgFP(MachineBasicBlock::iterator &I); + void handleOneArgFP(MachineBasicBlock::iterator &I); + void handleOneArgFPRW(MachineBasicBlock::iterator &I); + void handleTwoArgFP(MachineBasicBlock::iterator &I); + void handleCompareFP(MachineBasicBlock::iterator &I); + void handleCondMovFP(MachineBasicBlock::iterator &I); + void handleSpecialFP(MachineBasicBlock::iterator &I); + }; + char FPS::ID = 0; +} + +FunctionPass *llvm::createX86FloatingPointStackifierPass() { return new FPS(); } + +/// getFPReg - Return the X86::FPx register number for the specified operand. +/// For example, this returns 3 for X86::FP3. +static unsigned getFPReg(const MachineOperand &MO) { + assert(MO.isReg() && "Expected an FP register!"); + unsigned Reg = MO.getReg(); + assert(Reg >= X86::FP0 && Reg <= X86::FP6 && "Expected FP register!"); + return Reg - X86::FP0; +} + + +/// runOnMachineFunction - Loop over all of the basic blocks, transforming FP +/// register references into FP stack references. +/// +bool FPS::runOnMachineFunction(MachineFunction &MF) { + // We only need to run this pass if there are any FP registers used in this + // function. If it is all integer, there is nothing for us to do! + bool FPIsUsed = false; + + assert(X86::FP6 == X86::FP0+6 && "Register enums aren't sorted right!"); + for (unsigned i = 0; i <= 6; ++i) + if (MF.getRegInfo().isPhysRegUsed(X86::FP0+i)) { + FPIsUsed = true; + break; + } + + // Early exit. + if (!FPIsUsed) return false; + + TII = MF.getTarget().getInstrInfo(); + StackTop = 0; + + // Process the function in depth first order so that we process at least one + // of the predecessors for every reachable block in the function. + SmallPtrSet Processed; + MachineBasicBlock *Entry = MF.begin(); + + bool Changed = false; + for (df_ext_iterator > + I = df_ext_begin(Entry, Processed), E = df_ext_end(Entry, Processed); + I != E; ++I) + Changed |= processBasicBlock(MF, **I); + + return Changed; +} + +/// processBasicBlock - Loop over all of the instructions in the basic block, +/// transforming FP instructions into their stack form. +/// +bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) { + bool Changed = false; + MBB = &BB; + + for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) { + MachineInstr *MI = I; + unsigned Flags = MI->getDesc().TSFlags; + + unsigned FPInstClass = Flags & X86II::FPTypeMask; + if (MI->getOpcode() == TargetInstrInfo::INLINEASM) + FPInstClass = X86II::SpecialFP; + + if (FPInstClass == X86II::NotFP) + continue; // Efficiently ignore non-fp insts! + + MachineInstr *PrevMI = 0; + if (I != BB.begin()) + PrevMI = prior(I); + + ++NumFP; // Keep track of # of pseudo instrs + DOUT << "\nFPInst:\t" << *MI; + + // Get dead variables list now because the MI pointer may be deleted as part + // of processing! + SmallVector DeadRegs; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.isDead()) + DeadRegs.push_back(MO.getReg()); + } + + switch (FPInstClass) { + case X86II::ZeroArgFP: handleZeroArgFP(I); break; + case X86II::OneArgFP: handleOneArgFP(I); break; // fstp ST(0) + case X86II::OneArgFPRW: handleOneArgFPRW(I); break; // ST(0) = fsqrt(ST(0)) + case X86II::TwoArgFP: handleTwoArgFP(I); break; + case X86II::CompareFP: handleCompareFP(I); break; + case X86II::CondMovFP: handleCondMovFP(I); break; + case X86II::SpecialFP: handleSpecialFP(I); break; + default: assert(0 && "Unknown FP Type!"); + } + + // Check to see if any of the values defined by this instruction are dead + // after definition. If so, pop them. + for (unsigned i = 0, e = DeadRegs.size(); i != e; ++i) { + unsigned Reg = DeadRegs[i]; + if (Reg >= X86::FP0 && Reg <= X86::FP6) { + DOUT << "Register FP#" << Reg-X86::FP0 << " is dead!\n"; + freeStackSlotAfter(I, Reg-X86::FP0); + } + } + + // Print out all of the instructions expanded to if -debug + DEBUG( + MachineBasicBlock::iterator PrevI(PrevMI); + if (I == PrevI) { + cerr << "Just deleted pseudo instruction\n"; + } else { + MachineBasicBlock::iterator Start = I; + // Rewind to first instruction newly inserted. + while (Start != BB.begin() && prior(Start) != PrevI) --Start; + cerr << "Inserted instructions:\n\t"; + Start->print(*cerr.stream(), &MF.getTarget()); + while (++Start != next(I)) {} + } + dumpStack(); + ); + + Changed = true; + } + + assert(isStackEmpty() && "Stack not empty at end of basic block?"); + return Changed; +} + +//===----------------------------------------------------------------------===// +// Efficient Lookup Table Support +//===----------------------------------------------------------------------===// + +namespace { + struct TableEntry { + unsigned from; + unsigned to; + bool operator<(const TableEntry &TE) const { return from < TE.from; } + friend bool operator<(const TableEntry &TE, unsigned V) { + return TE.from < V; + } + friend bool operator<(unsigned V, const TableEntry &TE) { + return V < TE.from; + } + }; +} + +#ifndef NDEBUG +static bool TableIsSorted(const TableEntry *Table, unsigned NumEntries) { + for (unsigned i = 0; i != NumEntries-1; ++i) + if (!(Table[i] < Table[i+1])) return false; + return true; +} +#endif + +static int Lookup(const TableEntry *Table, unsigned N, unsigned Opcode) { + const TableEntry *I = std::lower_bound(Table, Table+N, Opcode); + if (I != Table+N && I->from == Opcode) + return I->to; + return -1; +} + +#ifdef NDEBUG +#define ASSERT_SORTED(TABLE) +#else +#define ASSERT_SORTED(TABLE) \ + { static bool TABLE##Checked = false; \ + if (!TABLE##Checked) { \ + assert(TableIsSorted(TABLE, array_lengthof(TABLE)) && \ + "All lookup tables must be sorted for efficient access!"); \ + TABLE##Checked = true; \ + } \ + } +#endif + +//===----------------------------------------------------------------------===// +// Register File -> Register Stack Mapping Methods +//===----------------------------------------------------------------------===// + +// OpcodeTable - Sorted map of register instructions to their stack version. +// The first element is an register file pseudo instruction, the second is the +// concrete X86 instruction which uses the register stack. +// +static const TableEntry OpcodeTable[] = { + { X86::ABS_Fp32 , X86::ABS_F }, + { X86::ABS_Fp64 , X86::ABS_F }, + { X86::ABS_Fp80 , X86::ABS_F }, + { X86::ADD_Fp32m , X86::ADD_F32m }, + { X86::ADD_Fp64m , X86::ADD_F64m }, + { X86::ADD_Fp64m32 , X86::ADD_F32m }, + { X86::ADD_Fp80m32 , X86::ADD_F32m }, + { X86::ADD_Fp80m64 , X86::ADD_F64m }, + { X86::ADD_FpI16m32 , X86::ADD_FI16m }, + { X86::ADD_FpI16m64 , X86::ADD_FI16m }, + { X86::ADD_FpI16m80 , X86::ADD_FI16m }, + { X86::ADD_FpI32m32 , X86::ADD_FI32m }, + { X86::ADD_FpI32m64 , X86::ADD_FI32m }, + { X86::ADD_FpI32m80 , X86::ADD_FI32m }, + { X86::CHS_Fp32 , X86::CHS_F }, + { X86::CHS_Fp64 , X86::CHS_F }, + { X86::CHS_Fp80 , X86::CHS_F }, + { X86::CMOVBE_Fp32 , X86::CMOVBE_F }, + { X86::CMOVBE_Fp64 , X86::CMOVBE_F }, + { X86::CMOVBE_Fp80 , X86::CMOVBE_F }, + { X86::CMOVB_Fp32 , X86::CMOVB_F }, + { X86::CMOVB_Fp64 , X86::CMOVB_F }, + { X86::CMOVB_Fp80 , X86::CMOVB_F }, + { X86::CMOVE_Fp32 , X86::CMOVE_F }, + { X86::CMOVE_Fp64 , X86::CMOVE_F }, + { X86::CMOVE_Fp80 , X86::CMOVE_F }, + { X86::CMOVNBE_Fp32 , X86::CMOVNBE_F }, + { X86::CMOVNBE_Fp64 , X86::CMOVNBE_F }, + { X86::CMOVNBE_Fp80 , X86::CMOVNBE_F }, + { X86::CMOVNB_Fp32 , X86::CMOVNB_F }, + { X86::CMOVNB_Fp64 , X86::CMOVNB_F }, + { X86::CMOVNB_Fp80 , X86::CMOVNB_F }, + { X86::CMOVNE_Fp32 , X86::CMOVNE_F }, + { X86::CMOVNE_Fp64 , X86::CMOVNE_F }, + { X86::CMOVNE_Fp80 , X86::CMOVNE_F }, + { X86::CMOVNP_Fp32 , X86::CMOVNP_F }, + { X86::CMOVNP_Fp64 , X86::CMOVNP_F }, + { X86::CMOVNP_Fp80 , X86::CMOVNP_F }, + { X86::CMOVP_Fp32 , X86::CMOVP_F }, + { X86::CMOVP_Fp64 , X86::CMOVP_F }, + { X86::CMOVP_Fp80 , X86::CMOVP_F }, + { X86::COS_Fp32 , X86::COS_F }, + { X86::COS_Fp64 , X86::COS_F }, + { X86::COS_Fp80 , X86::COS_F }, + { X86::DIVR_Fp32m , X86::DIVR_F32m }, + { X86::DIVR_Fp64m , X86::DIVR_F64m }, + { X86::DIVR_Fp64m32 , X86::DIVR_F32m }, + { X86::DIVR_Fp80m32 , X86::DIVR_F32m }, + { X86::DIVR_Fp80m64 , X86::DIVR_F64m }, + { X86::DIVR_FpI16m32, X86::DIVR_FI16m}, + { X86::DIVR_FpI16m64, X86::DIVR_FI16m}, + { X86::DIVR_FpI16m80, X86::DIVR_FI16m}, + { X86::DIVR_FpI32m32, X86::DIVR_FI32m}, + { X86::DIVR_FpI32m64, X86::DIVR_FI32m}, + { X86::DIVR_FpI32m80, X86::DIVR_FI32m}, + { X86::DIV_Fp32m , X86::DIV_F32m }, + { X86::DIV_Fp64m , X86::DIV_F64m }, + { X86::DIV_Fp64m32 , X86::DIV_F32m }, + { X86::DIV_Fp80m32 , X86::DIV_F32m }, + { X86::DIV_Fp80m64 , X86::DIV_F64m }, + { X86::DIV_FpI16m32 , X86::DIV_FI16m }, + { X86::DIV_FpI16m64 , X86::DIV_FI16m }, + { X86::DIV_FpI16m80 , X86::DIV_FI16m }, + { X86::DIV_FpI32m32 , X86::DIV_FI32m }, + { X86::DIV_FpI32m64 , X86::DIV_FI32m }, + { X86::DIV_FpI32m80 , X86::DIV_FI32m }, + { X86::ILD_Fp16m32 , X86::ILD_F16m }, + { X86::ILD_Fp16m64 , X86::ILD_F16m }, + { X86::ILD_Fp16m80 , X86::ILD_F16m }, + { X86::ILD_Fp32m32 , X86::ILD_F32m }, + { X86::ILD_Fp32m64 , X86::ILD_F32m }, + { X86::ILD_Fp32m80 , X86::ILD_F32m }, + { X86::ILD_Fp64m32 , X86::ILD_F64m }, + { X86::ILD_Fp64m64 , X86::ILD_F64m }, + { X86::ILD_Fp64m80 , X86::ILD_F64m }, + { X86::ISTT_Fp16m32 , X86::ISTT_FP16m}, + { X86::ISTT_Fp16m64 , X86::ISTT_FP16m}, + { X86::ISTT_Fp16m80 , X86::ISTT_FP16m}, + { X86::ISTT_Fp32m32 , X86::ISTT_FP32m}, + { X86::ISTT_Fp32m64 , X86::ISTT_FP32m}, + { X86::ISTT_Fp32m80 , X86::ISTT_FP32m}, + { X86::ISTT_Fp64m32 , X86::ISTT_FP64m}, + { X86::ISTT_Fp64m64 , X86::ISTT_FP64m}, + { X86::ISTT_Fp64m80 , X86::ISTT_FP64m}, + { X86::IST_Fp16m32 , X86::IST_F16m }, + { X86::IST_Fp16m64 , X86::IST_F16m }, + { X86::IST_Fp16m80 , X86::IST_F16m }, + { X86::IST_Fp32m32 , X86::IST_F32m }, + { X86::IST_Fp32m64 , X86::IST_F32m }, + { X86::IST_Fp32m80 , X86::IST_F32m }, + { X86::IST_Fp64m32 , X86::IST_FP64m }, + { X86::IST_Fp64m64 , X86::IST_FP64m }, + { X86::IST_Fp64m80 , X86::IST_FP64m }, + { X86::LD_Fp032 , X86::LD_F0 }, + { X86::LD_Fp064 , X86::LD_F0 }, + { X86::LD_Fp080 , X86::LD_F0 }, + { X86::LD_Fp132 , X86::LD_F1 }, + { X86::LD_Fp164 , X86::LD_F1 }, + { X86::LD_Fp180 , X86::LD_F1 }, + { X86::LD_Fp32m , X86::LD_F32m }, + { X86::LD_Fp32m64 , X86::LD_F32m }, + { X86::LD_Fp32m80 , X86::LD_F32m }, + { X86::LD_Fp64m , X86::LD_F64m }, + { X86::LD_Fp64m80 , X86::LD_F64m }, + { X86::LD_Fp80m , X86::LD_F80m }, + { X86::MUL_Fp32m , X86::MUL_F32m }, + { X86::MUL_Fp64m , X86::MUL_F64m }, + { X86::MUL_Fp64m32 , X86::MUL_F32m }, + { X86::MUL_Fp80m32 , X86::MUL_F32m }, + { X86::MUL_Fp80m64 , X86::MUL_F64m }, + { X86::MUL_FpI16m32 , X86::MUL_FI16m }, + { X86::MUL_FpI16m64 , X86::MUL_FI16m }, + { X86::MUL_FpI16m80 , X86::MUL_FI16m }, + { X86::MUL_FpI32m32 , X86::MUL_FI32m }, + { X86::MUL_FpI32m64 , X86::MUL_FI32m }, + { X86::MUL_FpI32m80 , X86::MUL_FI32m }, + { X86::SIN_Fp32 , X86::SIN_F }, + { X86::SIN_Fp64 , X86::SIN_F }, + { X86::SIN_Fp80 , X86::SIN_F }, + { X86::SQRT_Fp32 , X86::SQRT_F }, + { X86::SQRT_Fp64 , X86::SQRT_F }, + { X86::SQRT_Fp80 , X86::SQRT_F }, + { X86::ST_Fp32m , X86::ST_F32m }, + { X86::ST_Fp64m , X86::ST_F64m }, + { X86::ST_Fp64m32 , X86::ST_F32m }, + { X86::ST_Fp80m32 , X86::ST_F32m }, + { X86::ST_Fp80m64 , X86::ST_F64m }, + { X86::ST_FpP80m , X86::ST_FP80m }, + { X86::SUBR_Fp32m , X86::SUBR_F32m }, + { X86::SUBR_Fp64m , X86::SUBR_F64m }, + { X86::SUBR_Fp64m32 , X86::SUBR_F32m }, + { X86::SUBR_Fp80m32 , X86::SUBR_F32m }, + { X86::SUBR_Fp80m64 , X86::SUBR_F64m }, + { X86::SUBR_FpI16m32, X86::SUBR_FI16m}, + { X86::SUBR_FpI16m64, X86::SUBR_FI16m}, + { X86::SUBR_FpI16m80, X86::SUBR_FI16m}, + { X86::SUBR_FpI32m32, X86::SUBR_FI32m}, + { X86::SUBR_FpI32m64, X86::SUBR_FI32m}, + { X86::SUBR_FpI32m80, X86::SUBR_FI32m}, + { X86::SUB_Fp32m , X86::SUB_F32m }, + { X86::SUB_Fp64m , X86::SUB_F64m }, + { X86::SUB_Fp64m32 , X86::SUB_F32m }, + { X86::SUB_Fp80m32 , X86::SUB_F32m }, + { X86::SUB_Fp80m64 , X86::SUB_F64m }, + { X86::SUB_FpI16m32 , X86::SUB_FI16m }, + { X86::SUB_FpI16m64 , X86::SUB_FI16m }, + { X86::SUB_FpI16m80 , X86::SUB_FI16m }, + { X86::SUB_FpI32m32 , X86::SUB_FI32m }, + { X86::SUB_FpI32m64 , X86::SUB_FI32m }, + { X86::SUB_FpI32m80 , X86::SUB_FI32m }, + { X86::TST_Fp32 , X86::TST_F }, + { X86::TST_Fp64 , X86::TST_F }, + { X86::TST_Fp80 , X86::TST_F }, + { X86::UCOM_FpIr32 , X86::UCOM_FIr }, + { X86::UCOM_FpIr64 , X86::UCOM_FIr }, + { X86::UCOM_FpIr80 , X86::UCOM_FIr }, + { X86::UCOM_Fpr32 , X86::UCOM_Fr }, + { X86::UCOM_Fpr64 , X86::UCOM_Fr }, + { X86::UCOM_Fpr80 , X86::UCOM_Fr }, +}; + +static unsigned getConcreteOpcode(unsigned Opcode) { + ASSERT_SORTED(OpcodeTable); + int Opc = Lookup(OpcodeTable, array_lengthof(OpcodeTable), Opcode); + assert(Opc != -1 && "FP Stack instruction not in OpcodeTable!"); + return Opc; +} + +//===----------------------------------------------------------------------===// +// Helper Methods +//===----------------------------------------------------------------------===// + +// PopTable - Sorted map of instructions to their popping version. The first +// element is an instruction, the second is the version which pops. +// +static const TableEntry PopTable[] = { + { X86::ADD_FrST0 , X86::ADD_FPrST0 }, + + { X86::DIVR_FrST0, X86::DIVR_FPrST0 }, + { X86::DIV_FrST0 , X86::DIV_FPrST0 }, + + { X86::IST_F16m , X86::IST_FP16m }, + { X86::IST_F32m , X86::IST_FP32m }, + + { X86::MUL_FrST0 , X86::MUL_FPrST0 }, + + { X86::ST_F32m , X86::ST_FP32m }, + { X86::ST_F64m , X86::ST_FP64m }, + { X86::ST_Frr , X86::ST_FPrr }, + + { X86::SUBR_FrST0, X86::SUBR_FPrST0 }, + { X86::SUB_FrST0 , X86::SUB_FPrST0 }, + + { X86::UCOM_FIr , X86::UCOM_FIPr }, + + { X86::UCOM_FPr , X86::UCOM_FPPr }, + { X86::UCOM_Fr , X86::UCOM_FPr }, +}; + +/// popStackAfter - Pop the current value off of the top of the FP stack after +/// the specified instruction. This attempts to be sneaky and combine the pop +/// into the instruction itself if possible. The iterator is left pointing to +/// the last instruction, be it a new pop instruction inserted, or the old +/// instruction if it was modified in place. +/// +void FPS::popStackAfter(MachineBasicBlock::iterator &I) { + MachineInstr* MI = I; + DebugLoc dl = MI->getDebugLoc(); + ASSERT_SORTED(PopTable); + assert(StackTop > 0 && "Cannot pop empty stack!"); + RegMap[Stack[--StackTop]] = ~0; // Update state + + // Check to see if there is a popping version of this instruction... + int Opcode = Lookup(PopTable, array_lengthof(PopTable), I->getOpcode()); + if (Opcode != -1) { + I->setDesc(TII->get(Opcode)); + if (Opcode == X86::UCOM_FPPr) + I->RemoveOperand(0); + } else { // Insert an explicit pop + I = BuildMI(*MBB, ++I, dl, TII->get(X86::ST_FPrr)).addReg(X86::ST0); + } +} + +/// freeStackSlotAfter - Free the specified register from the register stack, so +/// that it is no longer in a register. If the register is currently at the top +/// of the stack, we just pop the current instruction, otherwise we store the +/// current top-of-stack into the specified slot, then pop the top of stack. +void FPS::freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned FPRegNo) { + if (getStackEntry(0) == FPRegNo) { // already at the top of stack? easy. + popStackAfter(I); + return; + } + + // Otherwise, store the top of stack into the dead slot, killing the operand + // without having to add in an explicit xchg then pop. + // + unsigned STReg = getSTReg(FPRegNo); + unsigned OldSlot = getSlot(FPRegNo); + unsigned TopReg = Stack[StackTop-1]; + Stack[OldSlot] = TopReg; + RegMap[TopReg] = OldSlot; + RegMap[FPRegNo] = ~0; + Stack[--StackTop] = ~0; + MachineInstr *MI = I; + DebugLoc dl = MI->getDebugLoc(); + I = BuildMI(*MBB, ++I, dl, TII->get(X86::ST_FPrr)).addReg(STReg); +} + + +//===----------------------------------------------------------------------===// +// Instruction transformation implementation +//===----------------------------------------------------------------------===// + +/// handleZeroArgFP - ST(0) = fld0 ST(0) = flds +/// +void FPS::handleZeroArgFP(MachineBasicBlock::iterator &I) { + MachineInstr *MI = I; + unsigned DestReg = getFPReg(MI->getOperand(0)); + + // Change from the pseudo instruction to the concrete instruction. + MI->RemoveOperand(0); // Remove the explicit ST(0) operand + MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode()))); + + // Result gets pushed on the stack. + pushReg(DestReg); +} + +/// handleOneArgFP - fst , ST(0) +/// +void FPS::handleOneArgFP(MachineBasicBlock::iterator &I) { + MachineInstr *MI = I; + unsigned NumOps = MI->getDesc().getNumOperands(); + assert((NumOps == X86AddrNumOperands + 1 || NumOps == 1) && + "Can only handle fst* & ftst instructions!"); + + // Is this the last use of the source register? + unsigned Reg = getFPReg(MI->getOperand(NumOps-1)); + bool KillsSrc = MI->killsRegister(X86::FP0+Reg); + + // FISTP64m is strange because there isn't a non-popping versions. + // If we have one _and_ we don't want to pop the operand, duplicate the value + // on the stack instead of moving it. This ensure that popping the value is + // always ok. + // Ditto FISTTP16m, FISTTP32m, FISTTP64m, ST_FpP80m. + // + if (!KillsSrc && + (MI->getOpcode() == X86::IST_Fp64m32 || + MI->getOpcode() == X86::ISTT_Fp16m32 || + MI->getOpcode() == X86::ISTT_Fp32m32 || + MI->getOpcode() == X86::ISTT_Fp64m32 || + MI->getOpcode() == X86::IST_Fp64m64 || + MI->getOpcode() == X86::ISTT_Fp16m64 || + MI->getOpcode() == X86::ISTT_Fp32m64 || + MI->getOpcode() == X86::ISTT_Fp64m64 || + MI->getOpcode() == X86::IST_Fp64m80 || + MI->getOpcode() == X86::ISTT_Fp16m80 || + MI->getOpcode() == X86::ISTT_Fp32m80 || + MI->getOpcode() == X86::ISTT_Fp64m80 || + MI->getOpcode() == X86::ST_FpP80m)) { + duplicateToTop(Reg, 7 /*temp register*/, I); + } else { + moveToTop(Reg, I); // Move to the top of the stack... + } + + // Convert from the pseudo instruction to the concrete instruction. + MI->RemoveOperand(NumOps-1); // Remove explicit ST(0) operand + MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode()))); + + if (MI->getOpcode() == X86::IST_FP64m || + MI->getOpcode() == X86::ISTT_FP16m || + MI->getOpcode() == X86::ISTT_FP32m || + MI->getOpcode() == X86::ISTT_FP64m || + MI->getOpcode() == X86::ST_FP80m) { + assert(StackTop > 0 && "Stack empty??"); + --StackTop; + } else if (KillsSrc) { // Last use of operand? + popStackAfter(I); + } +} + + +/// handleOneArgFPRW: Handle instructions that read from the top of stack and +/// replace the value with a newly computed value. These instructions may have +/// non-fp operands after their FP operands. +/// +/// Examples: +/// R1 = fchs R2 +/// R1 = fadd R2, [mem] +/// +void FPS::handleOneArgFPRW(MachineBasicBlock::iterator &I) { + MachineInstr *MI = I; +#ifndef NDEBUG + unsigned NumOps = MI->getDesc().getNumOperands(); + assert(NumOps >= 2 && "FPRW instructions must have 2 ops!!"); +#endif + + // Is this the last use of the source register? + unsigned Reg = getFPReg(MI->getOperand(1)); + bool KillsSrc = MI->killsRegister(X86::FP0+Reg); + + if (KillsSrc) { + // If this is the last use of the source register, just make sure it's on + // the top of the stack. + moveToTop(Reg, I); + assert(StackTop > 0 && "Stack cannot be empty!"); + --StackTop; + pushReg(getFPReg(MI->getOperand(0))); + } else { + // If this is not the last use of the source register, _copy_ it to the top + // of the stack. + duplicateToTop(Reg, getFPReg(MI->getOperand(0)), I); + } + + // Change from the pseudo instruction to the concrete instruction. + MI->RemoveOperand(1); // Drop the source operand. + MI->RemoveOperand(0); // Drop the destination operand. + MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode()))); +} + + +//===----------------------------------------------------------------------===// +// Define tables of various ways to map pseudo instructions +// + +// ForwardST0Table - Map: A = B op C into: ST(0) = ST(0) op ST(i) +static const TableEntry ForwardST0Table[] = { + { X86::ADD_Fp32 , X86::ADD_FST0r }, + { X86::ADD_Fp64 , X86::ADD_FST0r }, + { X86::ADD_Fp80 , X86::ADD_FST0r }, + { X86::DIV_Fp32 , X86::DIV_FST0r }, + { X86::DIV_Fp64 , X86::DIV_FST0r }, + { X86::DIV_Fp80 , X86::DIV_FST0r }, + { X86::MUL_Fp32 , X86::MUL_FST0r }, + { X86::MUL_Fp64 , X86::MUL_FST0r }, + { X86::MUL_Fp80 , X86::MUL_FST0r }, + { X86::SUB_Fp32 , X86::SUB_FST0r }, + { X86::SUB_Fp64 , X86::SUB_FST0r }, + { X86::SUB_Fp80 , X86::SUB_FST0r }, +}; + +// ReverseST0Table - Map: A = B op C into: ST(0) = ST(i) op ST(0) +static const TableEntry ReverseST0Table[] = { + { X86::ADD_Fp32 , X86::ADD_FST0r }, // commutative + { X86::ADD_Fp64 , X86::ADD_FST0r }, // commutative + { X86::ADD_Fp80 , X86::ADD_FST0r }, // commutative + { X86::DIV_Fp32 , X86::DIVR_FST0r }, + { X86::DIV_Fp64 , X86::DIVR_FST0r }, + { X86::DIV_Fp80 , X86::DIVR_FST0r }, + { X86::MUL_Fp32 , X86::MUL_FST0r }, // commutative + { X86::MUL_Fp64 , X86::MUL_FST0r }, // commutative + { X86::MUL_Fp80 , X86::MUL_FST0r }, // commutative + { X86::SUB_Fp32 , X86::SUBR_FST0r }, + { X86::SUB_Fp64 , X86::SUBR_FST0r }, + { X86::SUB_Fp80 , X86::SUBR_FST0r }, +}; + +// ForwardSTiTable - Map: A = B op C into: ST(i) = ST(0) op ST(i) +static const TableEntry ForwardSTiTable[] = { + { X86::ADD_Fp32 , X86::ADD_FrST0 }, // commutative + { X86::ADD_Fp64 , X86::ADD_FrST0 }, // commutative + { X86::ADD_Fp80 , X86::ADD_FrST0 }, // commutative + { X86::DIV_Fp32 , X86::DIVR_FrST0 }, + { X86::DIV_Fp64 , X86::DIVR_FrST0 }, + { X86::DIV_Fp80 , X86::DIVR_FrST0 }, + { X86::MUL_Fp32 , X86::MUL_FrST0 }, // commutative + { X86::MUL_Fp64 , X86::MUL_FrST0 }, // commutative + { X86::MUL_Fp80 , X86::MUL_FrST0 }, // commutative + { X86::SUB_Fp32 , X86::SUBR_FrST0 }, + { X86::SUB_Fp64 , X86::SUBR_FrST0 }, + { X86::SUB_Fp80 , X86::SUBR_FrST0 }, +}; + +// ReverseSTiTable - Map: A = B op C into: ST(i) = ST(i) op ST(0) +static const TableEntry ReverseSTiTable[] = { + { X86::ADD_Fp32 , X86::ADD_FrST0 }, + { X86::ADD_Fp64 , X86::ADD_FrST0 }, + { X86::ADD_Fp80 , X86::ADD_FrST0 }, + { X86::DIV_Fp32 , X86::DIV_FrST0 }, + { X86::DIV_Fp64 , X86::DIV_FrST0 }, + { X86::DIV_Fp80 , X86::DIV_FrST0 }, + { X86::MUL_Fp32 , X86::MUL_FrST0 }, + { X86::MUL_Fp64 , X86::MUL_FrST0 }, + { X86::MUL_Fp80 , X86::MUL_FrST0 }, + { X86::SUB_Fp32 , X86::SUB_FrST0 }, + { X86::SUB_Fp64 , X86::SUB_FrST0 }, + { X86::SUB_Fp80 , X86::SUB_FrST0 }, +}; + + +/// handleTwoArgFP - Handle instructions like FADD and friends which are virtual +/// instructions which need to be simplified and possibly transformed. +/// +/// Result: ST(0) = fsub ST(0), ST(i) +/// ST(i) = fsub ST(0), ST(i) +/// ST(0) = fsubr ST(0), ST(i) +/// ST(i) = fsubr ST(0), ST(i) +/// +void FPS::handleTwoArgFP(MachineBasicBlock::iterator &I) { + ASSERT_SORTED(ForwardST0Table); ASSERT_SORTED(ReverseST0Table); + ASSERT_SORTED(ForwardSTiTable); ASSERT_SORTED(ReverseSTiTable); + MachineInstr *MI = I; + + unsigned NumOperands = MI->getDesc().getNumOperands(); + assert(NumOperands == 3 && "Illegal TwoArgFP instruction!"); + unsigned Dest = getFPReg(MI->getOperand(0)); + unsigned Op0 = getFPReg(MI->getOperand(NumOperands-2)); + unsigned Op1 = getFPReg(MI->getOperand(NumOperands-1)); + bool KillsOp0 = MI->killsRegister(X86::FP0+Op0); + bool KillsOp1 = MI->killsRegister(X86::FP0+Op1); + DebugLoc dl = MI->getDebugLoc(); + + unsigned TOS = getStackEntry(0); + + // One of our operands must be on the top of the stack. If neither is yet, we + // need to move one. + if (Op0 != TOS && Op1 != TOS) { // No operand at TOS? + // We can choose to move either operand to the top of the stack. If one of + // the operands is killed by this instruction, we want that one so that we + // can update right on top of the old version. + if (KillsOp0) { + moveToTop(Op0, I); // Move dead operand to TOS. + TOS = Op0; + } else if (KillsOp1) { + moveToTop(Op1, I); + TOS = Op1; + } else { + // All of the operands are live after this instruction executes, so we + // cannot update on top of any operand. Because of this, we must + // duplicate one of the stack elements to the top. It doesn't matter + // which one we pick. + // + duplicateToTop(Op0, Dest, I); + Op0 = TOS = Dest; + KillsOp0 = true; + } + } else if (!KillsOp0 && !KillsOp1) { + // If we DO have one of our operands at the top of the stack, but we don't + // have a dead operand, we must duplicate one of the operands to a new slot + // on the stack. + duplicateToTop(Op0, Dest, I); + Op0 = TOS = Dest; + KillsOp0 = true; + } + + // Now we know that one of our operands is on the top of the stack, and at + // least one of our operands is killed by this instruction. + assert((TOS == Op0 || TOS == Op1) && (KillsOp0 || KillsOp1) && + "Stack conditions not set up right!"); + + // We decide which form to use based on what is on the top of the stack, and + // which operand is killed by this instruction. + const TableEntry *InstTable; + bool isForward = TOS == Op0; + bool updateST0 = (TOS == Op0 && !KillsOp1) || (TOS == Op1 && !KillsOp0); + if (updateST0) { + if (isForward) + InstTable = ForwardST0Table; + else + InstTable = ReverseST0Table; + } else { + if (isForward) + InstTable = ForwardSTiTable; + else + InstTable = ReverseSTiTable; + } + + int Opcode = Lookup(InstTable, array_lengthof(ForwardST0Table), + MI->getOpcode()); + assert(Opcode != -1 && "Unknown TwoArgFP pseudo instruction!"); + + // NotTOS - The register which is not on the top of stack... + unsigned NotTOS = (TOS == Op0) ? Op1 : Op0; + + // Replace the old instruction with a new instruction + MBB->remove(I++); + I = BuildMI(*MBB, I, dl, TII->get(Opcode)).addReg(getSTReg(NotTOS)); + + // If both operands are killed, pop one off of the stack in addition to + // overwriting the other one. + if (KillsOp0 && KillsOp1 && Op0 != Op1) { + assert(!updateST0 && "Should have updated other operand!"); + popStackAfter(I); // Pop the top of stack + } + + // Update stack information so that we know the destination register is now on + // the stack. + unsigned UpdatedSlot = getSlot(updateST0 ? TOS : NotTOS); + assert(UpdatedSlot < StackTop && Dest < 7); + Stack[UpdatedSlot] = Dest; + RegMap[Dest] = UpdatedSlot; + MBB->getParent()->DeleteMachineInstr(MI); // Remove the old instruction +} + +/// handleCompareFP - Handle FUCOM and FUCOMI instructions, which have two FP +/// register arguments and no explicit destinations. +/// +void FPS::handleCompareFP(MachineBasicBlock::iterator &I) { + ASSERT_SORTED(ForwardST0Table); ASSERT_SORTED(ReverseST0Table); + ASSERT_SORTED(ForwardSTiTable); ASSERT_SORTED(ReverseSTiTable); + MachineInstr *MI = I; + + unsigned NumOperands = MI->getDesc().getNumOperands(); + assert(NumOperands == 2 && "Illegal FUCOM* instruction!"); + unsigned Op0 = getFPReg(MI->getOperand(NumOperands-2)); + unsigned Op1 = getFPReg(MI->getOperand(NumOperands-1)); + bool KillsOp0 = MI->killsRegister(X86::FP0+Op0); + bool KillsOp1 = MI->killsRegister(X86::FP0+Op1); + + // Make sure the first operand is on the top of stack, the other one can be + // anywhere. + moveToTop(Op0, I); + + // Change from the pseudo instruction to the concrete instruction. + MI->getOperand(0).setReg(getSTReg(Op1)); + MI->RemoveOperand(1); + MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode()))); + + // If any of the operands are killed by this instruction, free them. + if (KillsOp0) freeStackSlotAfter(I, Op0); + if (KillsOp1 && Op0 != Op1) freeStackSlotAfter(I, Op1); +} + +/// handleCondMovFP - Handle two address conditional move instructions. These +/// instructions move a st(i) register to st(0) iff a condition is true. These +/// instructions require that the first operand is at the top of the stack, but +/// otherwise don't modify the stack at all. +void FPS::handleCondMovFP(MachineBasicBlock::iterator &I) { + MachineInstr *MI = I; + + unsigned Op0 = getFPReg(MI->getOperand(0)); + unsigned Op1 = getFPReg(MI->getOperand(2)); + bool KillsOp1 = MI->killsRegister(X86::FP0+Op1); + + // The first operand *must* be on the top of the stack. + moveToTop(Op0, I); + + // Change the second operand to the stack register that the operand is in. + // Change from the pseudo instruction to the concrete instruction. + MI->RemoveOperand(0); + MI->RemoveOperand(1); + MI->getOperand(0).setReg(getSTReg(Op1)); + MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode()))); + + // If we kill the second operand, make sure to pop it from the stack. + if (Op0 != Op1 && KillsOp1) { + // Get this value off of the register stack. + freeStackSlotAfter(I, Op1); + } +} + + +/// handleSpecialFP - Handle special instructions which behave unlike other +/// floating point instructions. This is primarily intended for use by pseudo +/// instructions. +/// +void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) { + MachineInstr *MI = I; + DebugLoc dl = MI->getDebugLoc(); + switch (MI->getOpcode()) { + default: assert(0 && "Unknown SpecialFP instruction!"); + case X86::FpGET_ST0_32:// Appears immediately after a call returning FP type! + case X86::FpGET_ST0_64:// Appears immediately after a call returning FP type! + case X86::FpGET_ST0_80:// Appears immediately after a call returning FP type! + assert(StackTop == 0 && "Stack should be empty after a call!"); + pushReg(getFPReg(MI->getOperand(0))); + break; + case X86::FpGET_ST1_32:// Appears immediately after a call returning FP type! + case X86::FpGET_ST1_64:// Appears immediately after a call returning FP type! + case X86::FpGET_ST1_80:{// Appears immediately after a call returning FP type! + // FpGET_ST1 should occur right after a FpGET_ST0 for a call or inline asm. + // The pattern we expect is: + // CALL + // FP1 = FpGET_ST0 + // FP4 = FpGET_ST1 + // + // At this point, we've pushed FP1 on the top of stack, so it should be + // present if it isn't dead. If it was dead, we already emitted a pop to + // remove it from the stack and StackTop = 0. + + // Push FP4 as top of stack next. + pushReg(getFPReg(MI->getOperand(0))); + + // If StackTop was 0 before we pushed our operand, then ST(0) must have been + // dead. In this case, the ST(1) value is the only thing that is live, so + // it should be on the TOS (after the pop that was emitted) and is. Just + // continue in this case. + if (StackTop == 1) + break; + + // Because pushReg just pushed ST(1) as TOS, we now have to swap the two top + // elements so that our accounting is correct. + unsigned RegOnTop = getStackEntry(0); + unsigned RegNo = getStackEntry(1); + + // Swap the slots the regs are in. + std::swap(RegMap[RegNo], RegMap[RegOnTop]); + + // Swap stack slot contents. + assert(RegMap[RegOnTop] < StackTop); + std::swap(Stack[RegMap[RegOnTop]], Stack[StackTop-1]); + break; + } + case X86::FpSET_ST0_32: + case X86::FpSET_ST0_64: + case X86::FpSET_ST0_80: + assert((StackTop == 1 || StackTop == 2) + && "Stack should have one or two element on it to return!"); + --StackTop; // "Forget" we have something on the top of stack! + break; + case X86::FpSET_ST1_32: + case X86::FpSET_ST1_64: + case X86::FpSET_ST1_80: + // StackTop can be 1 if a FpSET_ST0_* was before this. Exchange them. + if (StackTop == 1) { + BuildMI(*MBB, I, dl, TII->get(X86::XCH_F)).addReg(X86::ST1); + NumFXCH++; + StackTop = 0; + break; + } + assert(StackTop == 2 && "Stack should have two element on it to return!"); + --StackTop; // "Forget" we have something on the top of stack! + break; + case X86::MOV_Fp3232: + case X86::MOV_Fp3264: + case X86::MOV_Fp6432: + case X86::MOV_Fp6464: + case X86::MOV_Fp3280: + case X86::MOV_Fp6480: + case X86::MOV_Fp8032: + case X86::MOV_Fp8064: + case X86::MOV_Fp8080: { + const MachineOperand &MO1 = MI->getOperand(1); + unsigned SrcReg = getFPReg(MO1); + + const MachineOperand &MO0 = MI->getOperand(0); + // These can be created due to inline asm. Two address pass can introduce + // copies from RFP registers to virtual registers. + if (MO0.getReg() == X86::ST0 && SrcReg == 0) { + assert(MO1.isKill()); + // Treat %ST0 = MOV_Fp8080 %FP0 + // like FpSET_ST0_80 %FP0, %ST0 + assert((StackTop == 1 || StackTop == 2) + && "Stack should have one or two element on it to return!"); + --StackTop; // "Forget" we have something on the top of stack! + break; + } else if (MO0.getReg() == X86::ST1 && SrcReg == 1) { + assert(MO1.isKill()); + // Treat %ST1 = MOV_Fp8080 %FP1 + // like FpSET_ST1_80 %FP0, %ST1 + // StackTop can be 1 if a FpSET_ST0_* was before this. Exchange them. + if (StackTop == 1) { + BuildMI(*MBB, I, dl, TII->get(X86::XCH_F)).addReg(X86::ST1); + NumFXCH++; + StackTop = 0; + break; + } + assert(StackTop == 2 && "Stack should have two element on it to return!"); + --StackTop; // "Forget" we have something on the top of stack! + break; + } + + unsigned DestReg = getFPReg(MO0); + if (MI->killsRegister(X86::FP0+SrcReg)) { + // If the input operand is killed, we can just change the owner of the + // incoming stack slot into the result. + unsigned Slot = getSlot(SrcReg); + assert(Slot < 7 && DestReg < 7 && "FpMOV operands invalid!"); + Stack[Slot] = DestReg; + RegMap[DestReg] = Slot; + + } else { + // For FMOV we just duplicate the specified value to a new stack slot. + // This could be made better, but would require substantial changes. + duplicateToTop(SrcReg, DestReg, I); + } + } + break; + case TargetInstrInfo::INLINEASM: { + // The inline asm MachineInstr currently only *uses* FP registers for the + // 'f' constraint. These should be turned into the current ST(x) register + // in the machine instr. Also, any kills should be explicitly popped after + // the inline asm. + unsigned Kills[7]; + unsigned NumKills = 0; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &Op = MI->getOperand(i); + if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6) + continue; + assert(Op.isUse() && "Only handle inline asm uses right now"); + + unsigned FPReg = getFPReg(Op); + Op.setReg(getSTReg(FPReg)); + + // If we kill this operand, make sure to pop it from the stack after the + // asm. We just remember it for now, and pop them all off at the end in + // a batch. + if (Op.isKill()) + Kills[NumKills++] = FPReg; + } + + // If this asm kills any FP registers (is the last use of them) we must + // explicitly emit pop instructions for them. Do this now after the asm has + // executed so that the ST(x) numbers are not off (which would happen if we + // did this inline with operand rewriting). + // + // Note: this might be a non-optimal pop sequence. We might be able to do + // better by trying to pop in stack order or something. + MachineBasicBlock::iterator InsertPt = MI; + while (NumKills) + freeStackSlotAfter(InsertPt, Kills[--NumKills]); + + // Don't delete the inline asm! + return; + } + + case X86::RET: + case X86::RETI: + // If RET has an FP register use operand, pass the first one in ST(0) and + // the second one in ST(1). + if (isStackEmpty()) return; // Quick check to see if any are possible. + + // Find the register operands. + unsigned FirstFPRegOp = ~0U, SecondFPRegOp = ~0U; + + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &Op = MI->getOperand(i); + if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6) + continue; + // FP Register uses must be kills unless there are two uses of the same + // register, in which case only one will be a kill. + assert(Op.isUse() && + (Op.isKill() || // Marked kill. + getFPReg(Op) == FirstFPRegOp || // Second instance. + MI->killsRegister(Op.getReg())) && // Later use is marked kill. + "Ret only defs operands, and values aren't live beyond it"); + + if (FirstFPRegOp == ~0U) + FirstFPRegOp = getFPReg(Op); + else { + assert(SecondFPRegOp == ~0U && "More than two fp operands!"); + SecondFPRegOp = getFPReg(Op); + } + + // Remove the operand so that later passes don't see it. + MI->RemoveOperand(i); + --i, --e; + } + + // There are only four possibilities here: + // 1) we are returning a single FP value. In this case, it has to be in + // ST(0) already, so just declare success by removing the value from the + // FP Stack. + if (SecondFPRegOp == ~0U) { + // Assert that the top of stack contains the right FP register. + assert(StackTop == 1 && FirstFPRegOp == getStackEntry(0) && + "Top of stack not the right register for RET!"); + + // Ok, everything is good, mark the value as not being on the stack + // anymore so that our assertion about the stack being empty at end of + // block doesn't fire. + StackTop = 0; + return; + } + + // Otherwise, we are returning two values: + // 2) If returning the same value for both, we only have one thing in the FP + // stack. Consider: RET FP1, FP1 + if (StackTop == 1) { + assert(FirstFPRegOp == SecondFPRegOp && FirstFPRegOp == getStackEntry(0)&& + "Stack misconfiguration for RET!"); + + // Duplicate the TOS so that we return it twice. Just pick some other FPx + // register to hold it. + unsigned NewReg = (FirstFPRegOp+1)%7; + duplicateToTop(FirstFPRegOp, NewReg, MI); + FirstFPRegOp = NewReg; + } + + /// Okay we know we have two different FPx operands now: + assert(StackTop == 2 && "Must have two values live!"); + + /// 3) If SecondFPRegOp is currently in ST(0) and FirstFPRegOp is currently + /// in ST(1). In this case, emit an fxch. + if (getStackEntry(0) == SecondFPRegOp) { + assert(getStackEntry(1) == FirstFPRegOp && "Unknown regs live"); + moveToTop(FirstFPRegOp, MI); + } + + /// 4) Finally, FirstFPRegOp must be in ST(0) and SecondFPRegOp must be in + /// ST(1). Just remove both from our understanding of the stack and return. + assert(getStackEntry(0) == FirstFPRegOp && "Unknown regs live"); + assert(getStackEntry(1) == SecondFPRegOp && "Unknown regs live"); + StackTop = 0; + return; + } + + I = MBB->erase(I); // Remove the pseudo instruction + --I; +} diff --git a/lib/Target/X86/X86FloatingPointRegKill.cpp b/lib/Target/X86/X86FloatingPointRegKill.cpp new file mode 100644 index 000000000000..009846e2e0b5 --- /dev/null +++ b/lib/Target/X86/X86FloatingPointRegKill.cpp @@ -0,0 +1,139 @@ +//===-- X86FloatingPoint.cpp - FP_REG_KILL inserter -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the pass which inserts FP_REG_KILL instructions. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "x86-codegen" +#include "X86.h" +#include "X86InstrInfo.h" +#include "X86Subtarget.h" +#include "llvm/Instructions.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/CFG.h" +#include "llvm/ADT/Statistic.h" +using namespace llvm; + +STATISTIC(NumFPKill, "Number of FP_REG_KILL instructions added"); + +namespace { + struct VISIBILITY_HIDDEN FPRegKiller : public MachineFunctionPass { + static char ID; + FPRegKiller() : MachineFunctionPass(&ID) {} + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addPreservedID(MachineLoopInfoID); + AU.addPreservedID(MachineDominatorsID); + MachineFunctionPass::getAnalysisUsage(AU); + } + + virtual bool runOnMachineFunction(MachineFunction &MF); + + virtual const char *getPassName() const { return "X86 FP_REG_KILL inserter"; } + }; + char FPRegKiller::ID = 0; +} + +FunctionPass *llvm::createX87FPRegKillInserterPass() { return new FPRegKiller(); } + +bool FPRegKiller::runOnMachineFunction(MachineFunction &MF) { + // If we are emitting FP stack code, scan the basic block to determine if this + // block defines any FP values. If so, put an FP_REG_KILL instruction before + // the terminator of the block. + + // Note that FP stack instructions are used in all modes for long double, + // so we always need to do this check. + // Also note that it's possible for an FP stack register to be live across + // an instruction that produces multiple basic blocks (SSE CMOV) so we + // must check all the generated basic blocks. + + // Scan all of the machine instructions in these MBBs, checking for FP + // stores. (RFP32 and RFP64 will not exist in SSE mode, but RFP80 might.) + + // Fast-path: If nothing is using the x87 registers, we don't need to do + // any scanning. + MachineRegisterInfo &MRI = MF.getRegInfo(); + if (MRI.getRegClassVirtRegs(X86::RFP80RegisterClass).empty() && + MRI.getRegClassVirtRegs(X86::RFP64RegisterClass).empty() && + MRI.getRegClassVirtRegs(X86::RFP32RegisterClass).empty()) + return false; + + bool Changed = false; + const X86Subtarget &Subtarget = MF.getTarget().getSubtarget(); + MachineFunction::iterator MBBI = MF.begin(); + MachineFunction::iterator EndMBB = MF.end(); + for (; MBBI != EndMBB; ++MBBI) { + MachineBasicBlock *MBB = MBBI; + + // If this block returns, ignore it. We don't want to insert an FP_REG_KILL + // before the return. + if (!MBB->empty()) { + MachineBasicBlock::iterator EndI = MBB->end(); + --EndI; + if (EndI->getDesc().isReturn()) + continue; + } + + bool ContainsFPCode = false; + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); + !ContainsFPCode && I != E; ++I) { + if (I->getNumOperands() != 0 && I->getOperand(0).isReg()) { + const TargetRegisterClass *clas; + for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) { + if (I->getOperand(op).isReg() && I->getOperand(op).isDef() && + TargetRegisterInfo::isVirtualRegister(I->getOperand(op).getReg()) && + ((clas = MRI.getRegClass(I->getOperand(op).getReg())) == + X86::RFP32RegisterClass || + clas == X86::RFP64RegisterClass || + clas == X86::RFP80RegisterClass)) { + ContainsFPCode = true; + break; + } + } + } + } + // Check PHI nodes in successor blocks. These PHI's will be lowered to have + // a copy of the input value in this block. In SSE mode, we only care about + // 80-bit values. + if (!ContainsFPCode) { + // Final check, check LLVM BB's that are successors to the LLVM BB + // corresponding to BB for FP PHI nodes. + const BasicBlock *LLVMBB = MBB->getBasicBlock(); + const PHINode *PN; + for (succ_const_iterator SI = succ_begin(LLVMBB), E = succ_end(LLVMBB); + !ContainsFPCode && SI != E; ++SI) { + for (BasicBlock::const_iterator II = SI->begin(); + (PN = dyn_cast(II)); ++II) { + if (PN->getType()==Type::X86_FP80Ty || + (!Subtarget.hasSSE1() && PN->getType()->isFloatingPoint()) || + (!Subtarget.hasSSE2() && PN->getType()==Type::DoubleTy)) { + ContainsFPCode = true; + break; + } + } + } + } + // Finally, if we found any FP code, emit the FP_REG_KILL instruction. + if (ContainsFPCode) { + BuildMI(*MBB, MBBI->getFirstTerminator(), DebugLoc::getUnknownLoc(), + MF.getTarget().getInstrInfo()->get(X86::FP_REG_KILL)); + ++NumFPKill; + Changed = true; + } + } + + return Changed; +} diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp new file mode 100644 index 000000000000..bd1fea71a6d0 --- /dev/null +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -0,0 +1,1716 @@ +//===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines a DAG pattern matching instruction selector for X86, +// converting from a legalized dag to a X86 dag. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "x86-isel" +#include "X86.h" +#include "X86InstrBuilder.h" +#include "X86ISelLowering.h" +#include "X86MachineFunctionInfo.h" +#include "X86RegisterInfo.h" +#include "X86Subtarget.h" +#include "X86TargetMachine.h" +#include "llvm/GlobalValue.h" +#include "llvm/Instructions.h" +#include "llvm/Intrinsics.h" +#include "llvm/Support/CFG.h" +#include "llvm/Type.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/Streams.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +using namespace llvm; + +#include "llvm/Support/CommandLine.h" +static cl::opt AvoidDupAddrCompute("x86-avoid-dup-address", cl::Hidden); + +STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor"); + +//===----------------------------------------------------------------------===// +// Pattern Matcher Implementation +//===----------------------------------------------------------------------===// + +namespace { + /// X86ISelAddressMode - This corresponds to X86AddressMode, but uses + /// SDValue's instead of register numbers for the leaves of the matched + /// tree. + struct X86ISelAddressMode { + enum { + RegBase, + FrameIndexBase + } BaseType; + + struct { // This is really a union, discriminated by BaseType! + SDValue Reg; + int FrameIndex; + } Base; + + bool isRIPRel; // RIP as base? + unsigned Scale; + SDValue IndexReg; + int32_t Disp; + SDValue Segment; + GlobalValue *GV; + Constant *CP; + const char *ES; + int JT; + unsigned Align; // CP alignment. + + X86ISelAddressMode() + : BaseType(RegBase), isRIPRel(false), Scale(1), IndexReg(), Disp(0), + Segment(), GV(0), CP(0), ES(0), JT(-1), Align(0) { + } + + bool hasSymbolicDisplacement() const { + return GV != 0 || CP != 0 || ES != 0 || JT != -1; + } + + void dump() { + cerr << "X86ISelAddressMode " << this << "\n"; + cerr << "Base.Reg "; + if (Base.Reg.getNode() != 0) Base.Reg.getNode()->dump(); + else cerr << "nul"; + cerr << " Base.FrameIndex " << Base.FrameIndex << "\n"; + cerr << "isRIPRel " << isRIPRel << " Scale" << Scale << "\n"; + cerr << "IndexReg "; + if (IndexReg.getNode() != 0) IndexReg.getNode()->dump(); + else cerr << "nul"; + cerr << " Disp " << Disp << "\n"; + cerr << "GV "; if (GV) GV->dump(); + else cerr << "nul"; + cerr << " CP "; if (CP) CP->dump(); + else cerr << "nul"; + cerr << "\n"; + cerr << "ES "; if (ES) cerr << ES; else cerr << "nul"; + cerr << " JT" << JT << " Align" << Align << "\n"; + } + }; +} + +namespace { + //===--------------------------------------------------------------------===// + /// ISel - X86 specific code to select X86 machine instructions for + /// SelectionDAG operations. + /// + class VISIBILITY_HIDDEN X86DAGToDAGISel : public SelectionDAGISel { + /// TM - Keep a reference to X86TargetMachine. + /// + X86TargetMachine &TM; + + /// X86Lowering - This object fully describes how to lower LLVM code to an + /// X86-specific SelectionDAG. + X86TargetLowering &X86Lowering; + + /// Subtarget - Keep a pointer to the X86Subtarget around so that we can + /// make the right decision when generating code for different targets. + const X86Subtarget *Subtarget; + + /// CurBB - Current BB being isel'd. + /// + MachineBasicBlock *CurBB; + + /// OptForSize - If true, selector should try to optimize for code size + /// instead of performance. + bool OptForSize; + + public: + explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel) + : SelectionDAGISel(tm, OptLevel), + TM(tm), X86Lowering(*TM.getTargetLowering()), + Subtarget(&TM.getSubtarget()), + OptForSize(false) {} + + virtual const char *getPassName() const { + return "X86 DAG->DAG Instruction Selection"; + } + + /// InstructionSelect - This callback is invoked by + /// SelectionDAGISel when it has created a SelectionDAG for us to codegen. + virtual void InstructionSelect(); + + virtual void EmitFunctionEntryCode(Function &Fn, MachineFunction &MF); + + virtual + bool IsLegalAndProfitableToFold(SDNode *N, SDNode *U, SDNode *Root) const; + +// Include the pieces autogenerated from the target description. +#include "X86GenDAGISel.inc" + + private: + SDNode *Select(SDValue N); + SDNode *SelectAtomic64(SDNode *Node, unsigned Opc); + + bool MatchSegmentBaseAddress(SDValue N, X86ISelAddressMode &AM); + bool MatchLoad(SDValue N, X86ISelAddressMode &AM); + bool MatchWrapper(SDValue N, X86ISelAddressMode &AM); + bool MatchAddress(SDValue N, X86ISelAddressMode &AM, + unsigned Depth = 0); + bool MatchAddressBase(SDValue N, X86ISelAddressMode &AM); + bool SelectAddr(SDValue Op, SDValue N, SDValue &Base, + SDValue &Scale, SDValue &Index, SDValue &Disp, + SDValue &Segment); + bool SelectLEAAddr(SDValue Op, SDValue N, SDValue &Base, + SDValue &Scale, SDValue &Index, SDValue &Disp); + bool SelectScalarSSELoad(SDValue Op, SDValue Pred, + SDValue N, SDValue &Base, SDValue &Scale, + SDValue &Index, SDValue &Disp, + SDValue &Segment, + SDValue &InChain, SDValue &OutChain); + bool TryFoldLoad(SDValue P, SDValue N, + SDValue &Base, SDValue &Scale, + SDValue &Index, SDValue &Disp, + SDValue &Segment); + void PreprocessForRMW(); + void PreprocessForFPConvert(); + + /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for + /// inline asm expressions. + virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, + char ConstraintCode, + std::vector &OutOps); + + void EmitSpecialCodeForMain(MachineBasicBlock *BB, MachineFrameInfo *MFI); + + inline void getAddressOperands(X86ISelAddressMode &AM, SDValue &Base, + SDValue &Scale, SDValue &Index, + SDValue &Disp, SDValue &Segment) { + Base = (AM.BaseType == X86ISelAddressMode::FrameIndexBase) ? + CurDAG->getTargetFrameIndex(AM.Base.FrameIndex, TLI.getPointerTy()) : + AM.Base.Reg; + Scale = getI8Imm(AM.Scale); + Index = AM.IndexReg; + // These are 32-bit even in 64-bit mode since RIP relative offset + // is 32-bit. + if (AM.GV) + Disp = CurDAG->getTargetGlobalAddress(AM.GV, MVT::i32, AM.Disp); + else if (AM.CP) + Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32, + AM.Align, AM.Disp); + else if (AM.ES) + Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32); + else if (AM.JT != -1) + Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32); + else + Disp = CurDAG->getTargetConstant(AM.Disp, MVT::i32); + + if (AM.Segment.getNode()) + Segment = AM.Segment; + else + Segment = CurDAG->getRegister(0, MVT::i32); + } + + /// getI8Imm - Return a target constant with the specified value, of type + /// i8. + inline SDValue getI8Imm(unsigned Imm) { + return CurDAG->getTargetConstant(Imm, MVT::i8); + } + + /// getI16Imm - Return a target constant with the specified value, of type + /// i16. + inline SDValue getI16Imm(unsigned Imm) { + return CurDAG->getTargetConstant(Imm, MVT::i16); + } + + /// getI32Imm - Return a target constant with the specified value, of type + /// i32. + inline SDValue getI32Imm(unsigned Imm) { + return CurDAG->getTargetConstant(Imm, MVT::i32); + } + + /// getGlobalBaseReg - Return an SDNode that returns the value of + /// the global base register. Output instructions required to + /// initialize the global base register, if necessary. + /// + SDNode *getGlobalBaseReg(); + +#ifndef NDEBUG + unsigned Indent; +#endif + }; +} + + +bool X86DAGToDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U, + SDNode *Root) const { + if (OptLevel == CodeGenOpt::None) return false; + + if (U == Root) + switch (U->getOpcode()) { + default: break; + case ISD::ADD: + case ISD::ADDC: + case ISD::ADDE: + case ISD::AND: + case ISD::OR: + case ISD::XOR: { + SDValue Op1 = U->getOperand(1); + + // If the other operand is a 8-bit immediate we should fold the immediate + // instead. This reduces code size. + // e.g. + // movl 4(%esp), %eax + // addl $4, %eax + // vs. + // movl $4, %eax + // addl 4(%esp), %eax + // The former is 2 bytes shorter. In case where the increment is 1, then + // the saving can be 4 bytes (by using incl %eax). + if (ConstantSDNode *Imm = dyn_cast(Op1)) + if (Imm->getAPIntValue().isSignedIntN(8)) + return false; + + // If the other operand is a TLS address, we should fold it instead. + // This produces + // movl %gs:0, %eax + // leal i@NTPOFF(%eax), %eax + // instead of + // movl $i@NTPOFF, %eax + // addl %gs:0, %eax + // if the block also has an access to a second TLS address this will save + // a load. + // FIXME: This is probably also true for non TLS addresses. + if (Op1.getOpcode() == X86ISD::Wrapper) { + SDValue Val = Op1.getOperand(0); + if (Val.getOpcode() == ISD::TargetGlobalTLSAddress) + return false; + } + } + } + + // Proceed to 'generic' cycle finder code + return SelectionDAGISel::IsLegalAndProfitableToFold(N, U, Root); +} + +/// MoveBelowTokenFactor - Replace TokenFactor operand with load's chain operand +/// and move load below the TokenFactor. Replace store's chain operand with +/// load's chain result. +static void MoveBelowTokenFactor(SelectionDAG *CurDAG, SDValue Load, + SDValue Store, SDValue TF) { + SmallVector Ops; + for (unsigned i = 0, e = TF.getNode()->getNumOperands(); i != e; ++i) + if (Load.getNode() == TF.getOperand(i).getNode()) + Ops.push_back(Load.getOperand(0)); + else + Ops.push_back(TF.getOperand(i)); + CurDAG->UpdateNodeOperands(TF, &Ops[0], Ops.size()); + CurDAG->UpdateNodeOperands(Load, TF, Load.getOperand(1), Load.getOperand(2)); + CurDAG->UpdateNodeOperands(Store, Load.getValue(1), Store.getOperand(1), + Store.getOperand(2), Store.getOperand(3)); +} + +/// isRMWLoad - Return true if N is a load that's part of RMW sub-DAG. +/// +static bool isRMWLoad(SDValue N, SDValue Chain, SDValue Address, + SDValue &Load) { + if (N.getOpcode() == ISD::BIT_CONVERT) + N = N.getOperand(0); + + LoadSDNode *LD = dyn_cast(N); + if (!LD || LD->isVolatile()) + return false; + if (LD->getAddressingMode() != ISD::UNINDEXED) + return false; + + ISD::LoadExtType ExtType = LD->getExtensionType(); + if (ExtType != ISD::NON_EXTLOAD && ExtType != ISD::EXTLOAD) + return false; + + if (N.hasOneUse() && + N.getOperand(1) == Address && + N.getNode()->isOperandOf(Chain.getNode())) { + Load = N; + return true; + } + return false; +} + +/// MoveBelowCallSeqStart - Replace CALLSEQ_START operand with load's chain +/// operand and move load below the call's chain operand. +static void MoveBelowCallSeqStart(SelectionDAG *CurDAG, SDValue Load, + SDValue Call, SDValue CallSeqStart) { + SmallVector Ops; + SDValue Chain = CallSeqStart.getOperand(0); + if (Chain.getNode() == Load.getNode()) + Ops.push_back(Load.getOperand(0)); + else { + assert(Chain.getOpcode() == ISD::TokenFactor && + "Unexpected CallSeqStart chain operand"); + for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i) + if (Chain.getOperand(i).getNode() == Load.getNode()) + Ops.push_back(Load.getOperand(0)); + else + Ops.push_back(Chain.getOperand(i)); + SDValue NewChain = + CurDAG->getNode(ISD::TokenFactor, Load.getDebugLoc(), + MVT::Other, &Ops[0], Ops.size()); + Ops.clear(); + Ops.push_back(NewChain); + } + for (unsigned i = 1, e = CallSeqStart.getNumOperands(); i != e; ++i) + Ops.push_back(CallSeqStart.getOperand(i)); + CurDAG->UpdateNodeOperands(CallSeqStart, &Ops[0], Ops.size()); + CurDAG->UpdateNodeOperands(Load, Call.getOperand(0), + Load.getOperand(1), Load.getOperand(2)); + Ops.clear(); + Ops.push_back(SDValue(Load.getNode(), 1)); + for (unsigned i = 1, e = Call.getNode()->getNumOperands(); i != e; ++i) + Ops.push_back(Call.getOperand(i)); + CurDAG->UpdateNodeOperands(Call, &Ops[0], Ops.size()); +} + +/// isCalleeLoad - Return true if call address is a load and it can be +/// moved below CALLSEQ_START and the chains leading up to the call. +/// Return the CALLSEQ_START by reference as a second output. +static bool isCalleeLoad(SDValue Callee, SDValue &Chain) { + if (Callee.getNode() == Chain.getNode() || !Callee.hasOneUse()) + return false; + LoadSDNode *LD = dyn_cast(Callee.getNode()); + if (!LD || + LD->isVolatile() || + LD->getAddressingMode() != ISD::UNINDEXED || + LD->getExtensionType() != ISD::NON_EXTLOAD) + return false; + + // Now let's find the callseq_start. + while (Chain.getOpcode() != ISD::CALLSEQ_START) { + if (!Chain.hasOneUse()) + return false; + Chain = Chain.getOperand(0); + } + + if (Chain.getOperand(0).getNode() == Callee.getNode()) + return true; + if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor && + Callee.getValue(1).isOperandOf(Chain.getOperand(0).getNode())) + return true; + return false; +} + + +/// PreprocessForRMW - Preprocess the DAG to make instruction selection better. +/// This is only run if not in -O0 mode. +/// This allows the instruction selector to pick more read-modify-write +/// instructions. This is a common case: +/// +/// [Load chain] +/// ^ +/// | +/// [Load] +/// ^ ^ +/// | | +/// / \- +/// / | +/// [TokenFactor] [Op] +/// ^ ^ +/// | | +/// \ / +/// \ / +/// [Store] +/// +/// The fact the store's chain operand != load's chain will prevent the +/// (store (op (load))) instruction from being selected. We can transform it to: +/// +/// [Load chain] +/// ^ +/// | +/// [TokenFactor] +/// ^ +/// | +/// [Load] +/// ^ ^ +/// | | +/// | \- +/// | | +/// | [Op] +/// | ^ +/// | | +/// \ / +/// \ / +/// [Store] +void X86DAGToDAGISel::PreprocessForRMW() { + for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), + E = CurDAG->allnodes_end(); I != E; ++I) { + if (I->getOpcode() == X86ISD::CALL) { + /// Also try moving call address load from outside callseq_start to just + /// before the call to allow it to be folded. + /// + /// [Load chain] + /// ^ + /// | + /// [Load] + /// ^ ^ + /// | | + /// / \-- + /// / | + ///[CALLSEQ_START] | + /// ^ | + /// | | + /// [LOAD/C2Reg] | + /// | | + /// \ / + /// \ / + /// [CALL] + SDValue Chain = I->getOperand(0); + SDValue Load = I->getOperand(1); + if (!isCalleeLoad(Load, Chain)) + continue; + MoveBelowCallSeqStart(CurDAG, Load, SDValue(I, 0), Chain); + ++NumLoadMoved; + continue; + } + + if (!ISD::isNON_TRUNCStore(I)) + continue; + SDValue Chain = I->getOperand(0); + + if (Chain.getNode()->getOpcode() != ISD::TokenFactor) + continue; + + SDValue N1 = I->getOperand(1); + SDValue N2 = I->getOperand(2); + if ((N1.getValueType().isFloatingPoint() && + !N1.getValueType().isVector()) || + !N1.hasOneUse()) + continue; + + bool RModW = false; + SDValue Load; + unsigned Opcode = N1.getNode()->getOpcode(); + switch (Opcode) { + case ISD::ADD: + case ISD::MUL: + case ISD::AND: + case ISD::OR: + case ISD::XOR: + case ISD::ADDC: + case ISD::ADDE: + case ISD::VECTOR_SHUFFLE: { + SDValue N10 = N1.getOperand(0); + SDValue N11 = N1.getOperand(1); + RModW = isRMWLoad(N10, Chain, N2, Load); + if (!RModW) + RModW = isRMWLoad(N11, Chain, N2, Load); + break; + } + case ISD::SUB: + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + case ISD::ROTL: + case ISD::ROTR: + case ISD::SUBC: + case ISD::SUBE: + case X86ISD::SHLD: + case X86ISD::SHRD: { + SDValue N10 = N1.getOperand(0); + RModW = isRMWLoad(N10, Chain, N2, Load); + break; + } + } + + if (RModW) { + MoveBelowTokenFactor(CurDAG, Load, SDValue(I, 0), Chain); + ++NumLoadMoved; + } + } +} + + +/// PreprocessForFPConvert - Walk over the dag lowering fpround and fpextend +/// nodes that target the FP stack to be store and load to the stack. This is a +/// gross hack. We would like to simply mark these as being illegal, but when +/// we do that, legalize produces these when it expands calls, then expands +/// these in the same legalize pass. We would like dag combine to be able to +/// hack on these between the call expansion and the node legalization. As such +/// this pass basically does "really late" legalization of these inline with the +/// X86 isel pass. +void X86DAGToDAGISel::PreprocessForFPConvert() { + for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), + E = CurDAG->allnodes_end(); I != E; ) { + SDNode *N = I++; // Preincrement iterator to avoid invalidation issues. + if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND) + continue; + + // If the source and destination are SSE registers, then this is a legal + // conversion that should not be lowered. + MVT SrcVT = N->getOperand(0).getValueType(); + MVT DstVT = N->getValueType(0); + bool SrcIsSSE = X86Lowering.isScalarFPTypeInSSEReg(SrcVT); + bool DstIsSSE = X86Lowering.isScalarFPTypeInSSEReg(DstVT); + if (SrcIsSSE && DstIsSSE) + continue; + + if (!SrcIsSSE && !DstIsSSE) { + // If this is an FPStack extension, it is a noop. + if (N->getOpcode() == ISD::FP_EXTEND) + continue; + // If this is a value-preserving FPStack truncation, it is a noop. + if (N->getConstantOperandVal(1)) + continue; + } + + // Here we could have an FP stack truncation or an FPStack <-> SSE convert. + // FPStack has extload and truncstore. SSE can fold direct loads into other + // operations. Based on this, decide what we want to do. + MVT MemVT; + if (N->getOpcode() == ISD::FP_ROUND) + MemVT = DstVT; // FP_ROUND must use DstVT, we can't do a 'trunc load'. + else + MemVT = SrcIsSSE ? SrcVT : DstVT; + + SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT); + DebugLoc dl = N->getDebugLoc(); + + // FIXME: optimize the case where the src/dest is a load or store? + SDValue Store = CurDAG->getTruncStore(CurDAG->getEntryNode(), dl, + N->getOperand(0), + MemTmp, NULL, 0, MemVT); + SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp, + NULL, 0, MemVT); + + // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the + // extload we created. This will cause general havok on the dag because + // anything below the conversion could be folded into other existing nodes. + // To avoid invalidating 'I', back it up to the convert node. + --I; + CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); + + // Now that we did that, the node is dead. Increment the iterator to the + // next node to process, then delete N. + ++I; + CurDAG->DeleteNode(N); + } +} + +/// InstructionSelectBasicBlock - This callback is invoked by SelectionDAGISel +/// when it has created a SelectionDAG for us to codegen. +void X86DAGToDAGISel::InstructionSelect() { + CurBB = BB; // BB can change as result of isel. + const Function *F = CurDAG->getMachineFunction().getFunction(); + OptForSize = F->hasFnAttr(Attribute::OptimizeForSize); + + DEBUG(BB->dump()); + if (OptLevel != CodeGenOpt::None) + PreprocessForRMW(); + + // FIXME: This should only happen when not compiled with -O0. + PreprocessForFPConvert(); + + // Codegen the basic block. +#ifndef NDEBUG + DOUT << "===== Instruction selection begins:\n"; + Indent = 0; +#endif + SelectRoot(*CurDAG); +#ifndef NDEBUG + DOUT << "===== Instruction selection ends:\n"; +#endif + + CurDAG->RemoveDeadNodes(); +} + +/// EmitSpecialCodeForMain - Emit any code that needs to be executed only in +/// the main function. +void X86DAGToDAGISel::EmitSpecialCodeForMain(MachineBasicBlock *BB, + MachineFrameInfo *MFI) { + const TargetInstrInfo *TII = TM.getInstrInfo(); + if (Subtarget->isTargetCygMing()) + BuildMI(BB, DebugLoc::getUnknownLoc(), + TII->get(X86::CALLpcrel32)).addExternalSymbol("__main"); +} + +void X86DAGToDAGISel::EmitFunctionEntryCode(Function &Fn, MachineFunction &MF) { + // If this is main, emit special code for main. + MachineBasicBlock *BB = MF.begin(); + if (Fn.hasExternalLinkage() && Fn.getName() == "main") + EmitSpecialCodeForMain(BB, MF.getFrameInfo()); +} + + +bool X86DAGToDAGISel::MatchSegmentBaseAddress(SDValue N, + X86ISelAddressMode &AM) { + assert(N.getOpcode() == X86ISD::SegmentBaseAddress); + SDValue Segment = N.getOperand(0); + + if (AM.Segment.getNode() == 0) { + AM.Segment = Segment; + return false; + } + + return true; +} + +bool X86DAGToDAGISel::MatchLoad(SDValue N, X86ISelAddressMode &AM) { + // This optimization is valid because the GNU TLS model defines that + // gs:0 (or fs:0 on X86-64) contains its own address. + // For more information see http://people.redhat.com/drepper/tls.pdf + + SDValue Address = N.getOperand(1); + if (Address.getOpcode() == X86ISD::SegmentBaseAddress && + !MatchSegmentBaseAddress (Address, AM)) + return false; + + return true; +} + +bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) { + bool is64Bit = Subtarget->is64Bit(); + DOUT << "Wrapper: 64bit " << is64Bit; + DOUT << " AM "; DEBUG(AM.dump()); DOUT << "\n"; + + // Under X86-64 non-small code model, GV (and friends) are 64-bits. + if (is64Bit && (TM.getCodeModel() != CodeModel::Small)) + return true; + + // Base and index reg must be 0 in order to use rip as base. + bool canUsePICRel = !AM.Base.Reg.getNode() && !AM.IndexReg.getNode(); + if (is64Bit && !canUsePICRel && TM.symbolicAddressesAreRIPRel()) + return true; + + if (AM.hasSymbolicDisplacement()) + return true; + // If value is available in a register both base and index components have + // been picked, we can't fit the result available in the register in the + // addressing mode. Duplicate GlobalAddress or ConstantPool as displacement. + + SDValue N0 = N.getOperand(0); + if (GlobalAddressSDNode *G = dyn_cast(N0)) { + uint64_t Offset = G->getOffset(); + if (!is64Bit || isInt32(AM.Disp + Offset)) { + GlobalValue *GV = G->getGlobal(); + bool isRIPRel = TM.symbolicAddressesAreRIPRel(); + if (N0.getOpcode() == llvm::ISD::TargetGlobalTLSAddress) { + TLSModel::Model model = + getTLSModel (GV, TM.getRelocationModel()); + if (is64Bit && model == TLSModel::InitialExec) + isRIPRel = true; + } + AM.GV = GV; + AM.Disp += Offset; + AM.isRIPRel = isRIPRel; + return false; + } + } else if (ConstantPoolSDNode *CP = dyn_cast(N0)) { + uint64_t Offset = CP->getOffset(); + if (!is64Bit || isInt32(AM.Disp + Offset)) { + AM.CP = CP->getConstVal(); + AM.Align = CP->getAlignment(); + AM.Disp += Offset; + AM.isRIPRel = TM.symbolicAddressesAreRIPRel(); + return false; + } + } else if (ExternalSymbolSDNode *S =dyn_cast(N0)) { + AM.ES = S->getSymbol(); + AM.isRIPRel = TM.symbolicAddressesAreRIPRel(); + return false; + } else if (JumpTableSDNode *J = dyn_cast(N0)) { + AM.JT = J->getIndex(); + AM.isRIPRel = TM.symbolicAddressesAreRIPRel(); + return false; + } + + return true; +} + +/// MatchAddress - Add the specified node to the specified addressing mode, +/// returning true if it cannot be done. This just pattern matches for the +/// addressing mode. +bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM, + unsigned Depth) { + bool is64Bit = Subtarget->is64Bit(); + DebugLoc dl = N.getDebugLoc(); + DOUT << "MatchAddress: "; DEBUG(AM.dump()); + // Limit recursion. + if (Depth > 5) + return MatchAddressBase(N, AM); + + // RIP relative addressing: %rip + 32-bit displacement! + if (AM.isRIPRel) { + if (!AM.ES && AM.JT != -1 && N.getOpcode() == ISD::Constant) { + uint64_t Val = cast(N)->getSExtValue(); + if (!is64Bit || isInt32(AM.Disp + Val)) { + AM.Disp += Val; + return false; + } + } + return true; + } + + switch (N.getOpcode()) { + default: break; + case ISD::Constant: { + uint64_t Val = cast(N)->getSExtValue(); + if (!is64Bit || isInt32(AM.Disp + Val)) { + AM.Disp += Val; + return false; + } + break; + } + + case X86ISD::SegmentBaseAddress: + if (!MatchSegmentBaseAddress(N, AM)) + return false; + break; + + case X86ISD::Wrapper: + if (!MatchWrapper(N, AM)) + return false; + break; + + case ISD::LOAD: + if (!MatchLoad(N, AM)) + return false; + break; + + case ISD::FrameIndex: + if (AM.BaseType == X86ISelAddressMode::RegBase + && AM.Base.Reg.getNode() == 0) { + AM.BaseType = X86ISelAddressMode::FrameIndexBase; + AM.Base.FrameIndex = cast(N)->getIndex(); + return false; + } + break; + + case ISD::SHL: + if (AM.IndexReg.getNode() != 0 || AM.Scale != 1 || AM.isRIPRel) + break; + + if (ConstantSDNode + *CN = dyn_cast(N.getNode()->getOperand(1))) { + unsigned Val = CN->getZExtValue(); + if (Val == 1 || Val == 2 || Val == 3) { + AM.Scale = 1 << Val; + SDValue ShVal = N.getNode()->getOperand(0); + + // Okay, we know that we have a scale by now. However, if the scaled + // value is an add of something and a constant, we can fold the + // constant into the disp field here. + if (ShVal.getNode()->getOpcode() == ISD::ADD && ShVal.hasOneUse() && + isa(ShVal.getNode()->getOperand(1))) { + AM.IndexReg = ShVal.getNode()->getOperand(0); + ConstantSDNode *AddVal = + cast(ShVal.getNode()->getOperand(1)); + uint64_t Disp = AM.Disp + (AddVal->getSExtValue() << Val); + if (!is64Bit || isInt32(Disp)) + AM.Disp = Disp; + else + AM.IndexReg = ShVal; + } else { + AM.IndexReg = ShVal; + } + return false; + } + break; + } + + case ISD::SMUL_LOHI: + case ISD::UMUL_LOHI: + // A mul_lohi where we need the low part can be folded as a plain multiply. + if (N.getResNo() != 0) break; + // FALL THROUGH + case ISD::MUL: + case X86ISD::MUL_IMM: + // X*[3,5,9] -> X+X*[2,4,8] + if (AM.BaseType == X86ISelAddressMode::RegBase && + AM.Base.Reg.getNode() == 0 && + AM.IndexReg.getNode() == 0 && + !AM.isRIPRel) { + if (ConstantSDNode + *CN = dyn_cast(N.getNode()->getOperand(1))) + if (CN->getZExtValue() == 3 || CN->getZExtValue() == 5 || + CN->getZExtValue() == 9) { + AM.Scale = unsigned(CN->getZExtValue())-1; + + SDValue MulVal = N.getNode()->getOperand(0); + SDValue Reg; + + // Okay, we know that we have a scale by now. However, if the scaled + // value is an add of something and a constant, we can fold the + // constant into the disp field here. + if (MulVal.getNode()->getOpcode() == ISD::ADD && MulVal.hasOneUse() && + isa(MulVal.getNode()->getOperand(1))) { + Reg = MulVal.getNode()->getOperand(0); + ConstantSDNode *AddVal = + cast(MulVal.getNode()->getOperand(1)); + uint64_t Disp = AM.Disp + AddVal->getSExtValue() * + CN->getZExtValue(); + if (!is64Bit || isInt32(Disp)) + AM.Disp = Disp; + else + Reg = N.getNode()->getOperand(0); + } else { + Reg = N.getNode()->getOperand(0); + } + + AM.IndexReg = AM.Base.Reg = Reg; + return false; + } + } + break; + + case ISD::SUB: { + // Given A-B, if A can be completely folded into the address and + // the index field with the index field unused, use -B as the index. + // This is a win if a has multiple parts that can be folded into + // the address. Also, this saves a mov if the base register has + // other uses, since it avoids a two-address sub instruction, however + // it costs an additional mov if the index register has other uses. + + // Test if the LHS of the sub can be folded. + X86ISelAddressMode Backup = AM; + if (MatchAddress(N.getNode()->getOperand(0), AM, Depth+1)) { + AM = Backup; + break; + } + // Test if the index field is free for use. + if (AM.IndexReg.getNode() || AM.isRIPRel) { + AM = Backup; + break; + } + int Cost = 0; + SDValue RHS = N.getNode()->getOperand(1); + // If the RHS involves a register with multiple uses, this + // transformation incurs an extra mov, due to the neg instruction + // clobbering its operand. + if (!RHS.getNode()->hasOneUse() || + RHS.getNode()->getOpcode() == ISD::CopyFromReg || + RHS.getNode()->getOpcode() == ISD::TRUNCATE || + RHS.getNode()->getOpcode() == ISD::ANY_EXTEND || + (RHS.getNode()->getOpcode() == ISD::ZERO_EXTEND && + RHS.getNode()->getOperand(0).getValueType() == MVT::i32)) + ++Cost; + // If the base is a register with multiple uses, this + // transformation may save a mov. + if ((AM.BaseType == X86ISelAddressMode::RegBase && + AM.Base.Reg.getNode() && + !AM.Base.Reg.getNode()->hasOneUse()) || + AM.BaseType == X86ISelAddressMode::FrameIndexBase) + --Cost; + // If the folded LHS was interesting, this transformation saves + // address arithmetic. + if ((AM.hasSymbolicDisplacement() && !Backup.hasSymbolicDisplacement()) + + ((AM.Disp != 0) && (Backup.Disp == 0)) + + (AM.Segment.getNode() && !Backup.Segment.getNode()) >= 2) + --Cost; + // If it doesn't look like it may be an overall win, don't do it. + if (Cost >= 0) { + AM = Backup; + break; + } + + // Ok, the transformation is legal and appears profitable. Go for it. + SDValue Zero = CurDAG->getConstant(0, N.getValueType()); + SDValue Neg = CurDAG->getNode(ISD::SUB, dl, N.getValueType(), Zero, RHS); + AM.IndexReg = Neg; + AM.Scale = 1; + + // Insert the new nodes into the topological ordering. + if (Zero.getNode()->getNodeId() == -1 || + Zero.getNode()->getNodeId() > N.getNode()->getNodeId()) { + CurDAG->RepositionNode(N.getNode(), Zero.getNode()); + Zero.getNode()->setNodeId(N.getNode()->getNodeId()); + } + if (Neg.getNode()->getNodeId() == -1 || + Neg.getNode()->getNodeId() > N.getNode()->getNodeId()) { + CurDAG->RepositionNode(N.getNode(), Neg.getNode()); + Neg.getNode()->setNodeId(N.getNode()->getNodeId()); + } + return false; + } + + case ISD::ADD: { + X86ISelAddressMode Backup = AM; + if (!MatchAddress(N.getNode()->getOperand(0), AM, Depth+1) && + !MatchAddress(N.getNode()->getOperand(1), AM, Depth+1)) + return false; + AM = Backup; + if (!MatchAddress(N.getNode()->getOperand(1), AM, Depth+1) && + !MatchAddress(N.getNode()->getOperand(0), AM, Depth+1)) + return false; + AM = Backup; + + // If we couldn't fold both operands into the address at the same time, + // see if we can just put each operand into a register and fold at least + // the add. + if (AM.BaseType == X86ISelAddressMode::RegBase && + !AM.Base.Reg.getNode() && + !AM.IndexReg.getNode() && + !AM.isRIPRel) { + AM.Base.Reg = N.getNode()->getOperand(0); + AM.IndexReg = N.getNode()->getOperand(1); + AM.Scale = 1; + return false; + } + break; + } + + case ISD::OR: + // Handle "X | C" as "X + C" iff X is known to have C bits clear. + if (ConstantSDNode *CN = dyn_cast(N.getOperand(1))) { + X86ISelAddressMode Backup = AM; + uint64_t Offset = CN->getSExtValue(); + // Start with the LHS as an addr mode. + if (!MatchAddress(N.getOperand(0), AM, Depth+1) && + // Address could not have picked a GV address for the displacement. + AM.GV == NULL && + // On x86-64, the resultant disp must fit in 32-bits. + (!is64Bit || isInt32(AM.Disp + Offset)) && + // Check to see if the LHS & C is zero. + CurDAG->MaskedValueIsZero(N.getOperand(0), CN->getAPIntValue())) { + AM.Disp += Offset; + return false; + } + AM = Backup; + } + break; + + case ISD::AND: { + // Perform some heroic transforms on an and of a constant-count shift + // with a constant to enable use of the scaled offset field. + + SDValue Shift = N.getOperand(0); + if (Shift.getNumOperands() != 2) break; + + // Scale must not be used already. + if (AM.IndexReg.getNode() != 0 || AM.Scale != 1) break; + + // Not when RIP is used as the base. + if (AM.isRIPRel) break; + + SDValue X = Shift.getOperand(0); + ConstantSDNode *C2 = dyn_cast(N.getOperand(1)); + ConstantSDNode *C1 = dyn_cast(Shift.getOperand(1)); + if (!C1 || !C2) break; + + // Handle "(X >> (8-C1)) & C2" as "(X >> 8) & 0xff)" if safe. This + // allows us to convert the shift and and into an h-register extract and + // a scaled index. + if (Shift.getOpcode() == ISD::SRL && Shift.hasOneUse()) { + unsigned ScaleLog = 8 - C1->getZExtValue(); + if (ScaleLog > 0 && ScaleLog < 4 && + C2->getZExtValue() == (UINT64_C(0xff) << ScaleLog)) { + SDValue Eight = CurDAG->getConstant(8, MVT::i8); + SDValue Mask = CurDAG->getConstant(0xff, N.getValueType()); + SDValue Srl = CurDAG->getNode(ISD::SRL, dl, N.getValueType(), + X, Eight); + SDValue And = CurDAG->getNode(ISD::AND, dl, N.getValueType(), + Srl, Mask); + SDValue ShlCount = CurDAG->getConstant(ScaleLog, MVT::i8); + SDValue Shl = CurDAG->getNode(ISD::SHL, dl, N.getValueType(), + And, ShlCount); + + // Insert the new nodes into the topological ordering. + if (Eight.getNode()->getNodeId() == -1 || + Eight.getNode()->getNodeId() > X.getNode()->getNodeId()) { + CurDAG->RepositionNode(X.getNode(), Eight.getNode()); + Eight.getNode()->setNodeId(X.getNode()->getNodeId()); + } + if (Mask.getNode()->getNodeId() == -1 || + Mask.getNode()->getNodeId() > X.getNode()->getNodeId()) { + CurDAG->RepositionNode(X.getNode(), Mask.getNode()); + Mask.getNode()->setNodeId(X.getNode()->getNodeId()); + } + if (Srl.getNode()->getNodeId() == -1 || + Srl.getNode()->getNodeId() > Shift.getNode()->getNodeId()) { + CurDAG->RepositionNode(Shift.getNode(), Srl.getNode()); + Srl.getNode()->setNodeId(Shift.getNode()->getNodeId()); + } + if (And.getNode()->getNodeId() == -1 || + And.getNode()->getNodeId() > N.getNode()->getNodeId()) { + CurDAG->RepositionNode(N.getNode(), And.getNode()); + And.getNode()->setNodeId(N.getNode()->getNodeId()); + } + if (ShlCount.getNode()->getNodeId() == -1 || + ShlCount.getNode()->getNodeId() > X.getNode()->getNodeId()) { + CurDAG->RepositionNode(X.getNode(), ShlCount.getNode()); + ShlCount.getNode()->setNodeId(N.getNode()->getNodeId()); + } + if (Shl.getNode()->getNodeId() == -1 || + Shl.getNode()->getNodeId() > N.getNode()->getNodeId()) { + CurDAG->RepositionNode(N.getNode(), Shl.getNode()); + Shl.getNode()->setNodeId(N.getNode()->getNodeId()); + } + CurDAG->ReplaceAllUsesWith(N, Shl); + AM.IndexReg = And; + AM.Scale = (1 << ScaleLog); + return false; + } + } + + // Handle "(X << C1) & C2" as "(X & (C2>>C1)) << C1" if safe and if this + // allows us to fold the shift into this addressing mode. + if (Shift.getOpcode() != ISD::SHL) break; + + // Not likely to be profitable if either the AND or SHIFT node has more + // than one use (unless all uses are for address computation). Besides, + // isel mechanism requires their node ids to be reused. + if (!N.hasOneUse() || !Shift.hasOneUse()) + break; + + // Verify that the shift amount is something we can fold. + unsigned ShiftCst = C1->getZExtValue(); + if (ShiftCst != 1 && ShiftCst != 2 && ShiftCst != 3) + break; + + // Get the new AND mask, this folds to a constant. + SDValue NewANDMask = CurDAG->getNode(ISD::SRL, dl, N.getValueType(), + SDValue(C2, 0), SDValue(C1, 0)); + SDValue NewAND = CurDAG->getNode(ISD::AND, dl, N.getValueType(), X, + NewANDMask); + SDValue NewSHIFT = CurDAG->getNode(ISD::SHL, dl, N.getValueType(), + NewAND, SDValue(C1, 0)); + + // Insert the new nodes into the topological ordering. + if (C1->getNodeId() > X.getNode()->getNodeId()) { + CurDAG->RepositionNode(X.getNode(), C1); + C1->setNodeId(X.getNode()->getNodeId()); + } + if (NewANDMask.getNode()->getNodeId() == -1 || + NewANDMask.getNode()->getNodeId() > X.getNode()->getNodeId()) { + CurDAG->RepositionNode(X.getNode(), NewANDMask.getNode()); + NewANDMask.getNode()->setNodeId(X.getNode()->getNodeId()); + } + if (NewAND.getNode()->getNodeId() == -1 || + NewAND.getNode()->getNodeId() > Shift.getNode()->getNodeId()) { + CurDAG->RepositionNode(Shift.getNode(), NewAND.getNode()); + NewAND.getNode()->setNodeId(Shift.getNode()->getNodeId()); + } + if (NewSHIFT.getNode()->getNodeId() == -1 || + NewSHIFT.getNode()->getNodeId() > N.getNode()->getNodeId()) { + CurDAG->RepositionNode(N.getNode(), NewSHIFT.getNode()); + NewSHIFT.getNode()->setNodeId(N.getNode()->getNodeId()); + } + + CurDAG->ReplaceAllUsesWith(N, NewSHIFT); + + AM.Scale = 1 << ShiftCst; + AM.IndexReg = NewAND; + return false; + } + } + + return MatchAddressBase(N, AM); +} + +/// MatchAddressBase - Helper for MatchAddress. Add the specified node to the +/// specified addressing mode without any further recursion. +bool X86DAGToDAGISel::MatchAddressBase(SDValue N, X86ISelAddressMode &AM) { + // Is the base register already occupied? + if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base.Reg.getNode()) { + // If so, check to see if the scale index register is set. + if (AM.IndexReg.getNode() == 0 && !AM.isRIPRel) { + AM.IndexReg = N; + AM.Scale = 1; + return false; + } + + // Otherwise, we cannot select it. + return true; + } + + // Default, generate it as a register. + AM.BaseType = X86ISelAddressMode::RegBase; + AM.Base.Reg = N; + return false; +} + +/// SelectAddr - returns true if it is able pattern match an addressing mode. +/// It returns the operands which make up the maximal addressing mode it can +/// match by reference. +bool X86DAGToDAGISel::SelectAddr(SDValue Op, SDValue N, SDValue &Base, + SDValue &Scale, SDValue &Index, + SDValue &Disp, SDValue &Segment) { + X86ISelAddressMode AM; + bool Done = false; + if (AvoidDupAddrCompute && !N.hasOneUse()) { + unsigned Opcode = N.getOpcode(); + if (Opcode != ISD::Constant && Opcode != ISD::FrameIndex && + Opcode != X86ISD::Wrapper) { + // If we are able to fold N into addressing mode, then we'll allow it even + // if N has multiple uses. In general, addressing computation is used as + // addresses by all of its uses. But watch out for CopyToReg uses, that + // means the address computation is liveout. It will be computed by a LEA + // so we want to avoid computing the address twice. + for (SDNode::use_iterator UI = N.getNode()->use_begin(), + UE = N.getNode()->use_end(); UI != UE; ++UI) { + if (UI->getOpcode() == ISD::CopyToReg) { + MatchAddressBase(N, AM); + Done = true; + break; + } + } + } + } + + if (!Done && MatchAddress(N, AM)) + return false; + + MVT VT = N.getValueType(); + if (AM.BaseType == X86ISelAddressMode::RegBase) { + if (!AM.Base.Reg.getNode()) + AM.Base.Reg = CurDAG->getRegister(0, VT); + } + + if (!AM.IndexReg.getNode()) + AM.IndexReg = CurDAG->getRegister(0, VT); + + getAddressOperands(AM, Base, Scale, Index, Disp, Segment); + return true; +} + +/// SelectScalarSSELoad - Match a scalar SSE load. In particular, we want to +/// match a load whose top elements are either undef or zeros. The load flavor +/// is derived from the type of N, which is either v4f32 or v2f64. +bool X86DAGToDAGISel::SelectScalarSSELoad(SDValue Op, SDValue Pred, + SDValue N, SDValue &Base, + SDValue &Scale, SDValue &Index, + SDValue &Disp, SDValue &Segment, + SDValue &InChain, + SDValue &OutChain) { + if (N.getOpcode() == ISD::SCALAR_TO_VECTOR) { + InChain = N.getOperand(0).getValue(1); + if (ISD::isNON_EXTLoad(InChain.getNode()) && + InChain.getValue(0).hasOneUse() && + N.hasOneUse() && + IsLegalAndProfitableToFold(N.getNode(), Pred.getNode(), Op.getNode())) { + LoadSDNode *LD = cast(InChain); + if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index, Disp, Segment)) + return false; + OutChain = LD->getChain(); + return true; + } + } + + // Also handle the case where we explicitly require zeros in the top + // elements. This is a vector shuffle from the zero vector. + if (N.getOpcode() == X86ISD::VZEXT_MOVL && N.getNode()->hasOneUse() && + // Check to see if the top elements are all zeros (or bitcast of zeros). + N.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR && + N.getOperand(0).getNode()->hasOneUse() && + ISD::isNON_EXTLoad(N.getOperand(0).getOperand(0).getNode()) && + N.getOperand(0).getOperand(0).hasOneUse()) { + // Okay, this is a zero extending load. Fold it. + LoadSDNode *LD = cast(N.getOperand(0).getOperand(0)); + if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index, Disp, Segment)) + return false; + OutChain = LD->getChain(); + InChain = SDValue(LD, 1); + return true; + } + return false; +} + + +/// SelectLEAAddr - it calls SelectAddr and determines if the maximal addressing +/// mode it matches can be cost effectively emitted as an LEA instruction. +bool X86DAGToDAGISel::SelectLEAAddr(SDValue Op, SDValue N, + SDValue &Base, SDValue &Scale, + SDValue &Index, SDValue &Disp) { + X86ISelAddressMode AM; + + // Set AM.Segment to prevent MatchAddress from using one. LEA doesn't support + // segments. + SDValue Copy = AM.Segment; + SDValue T = CurDAG->getRegister(0, MVT::i32); + AM.Segment = T; + if (MatchAddress(N, AM)) + return false; + assert (T == AM.Segment); + AM.Segment = Copy; + + MVT VT = N.getValueType(); + unsigned Complexity = 0; + if (AM.BaseType == X86ISelAddressMode::RegBase) + if (AM.Base.Reg.getNode()) + Complexity = 1; + else + AM.Base.Reg = CurDAG->getRegister(0, VT); + else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase) + Complexity = 4; + + if (AM.IndexReg.getNode()) + Complexity++; + else + AM.IndexReg = CurDAG->getRegister(0, VT); + + // Don't match just leal(,%reg,2). It's cheaper to do addl %reg, %reg, or with + // a simple shift. + if (AM.Scale > 1) + Complexity++; + + // FIXME: We are artificially lowering the criteria to turn ADD %reg, $GA + // to a LEA. This is determined with some expermentation but is by no means + // optimal (especially for code size consideration). LEA is nice because of + // its three-address nature. Tweak the cost function again when we can run + // convertToThreeAddress() at register allocation time. + if (AM.hasSymbolicDisplacement()) { + // For X86-64, we should always use lea to materialize RIP relative + // addresses. + if (Subtarget->is64Bit()) + Complexity = 4; + else + Complexity += 2; + } + + if (AM.Disp && (AM.Base.Reg.getNode() || AM.IndexReg.getNode())) + Complexity++; + + if (Complexity > 2) { + SDValue Segment; + getAddressOperands(AM, Base, Scale, Index, Disp, Segment); + return true; + } + return false; +} + +bool X86DAGToDAGISel::TryFoldLoad(SDValue P, SDValue N, + SDValue &Base, SDValue &Scale, + SDValue &Index, SDValue &Disp, + SDValue &Segment) { + if (ISD::isNON_EXTLoad(N.getNode()) && + N.hasOneUse() && + IsLegalAndProfitableToFold(N.getNode(), P.getNode(), P.getNode())) + return SelectAddr(P, N.getOperand(1), Base, Scale, Index, Disp, Segment); + return false; +} + +/// getGlobalBaseReg - Return an SDNode that returns the value of +/// the global base register. Output instructions required to +/// initialize the global base register, if necessary. +/// +SDNode *X86DAGToDAGISel::getGlobalBaseReg() { + MachineFunction *MF = CurBB->getParent(); + unsigned GlobalBaseReg = TM.getInstrInfo()->getGlobalBaseReg(MF); + return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode(); +} + +static SDNode *FindCallStartFromCall(SDNode *Node) { + if (Node->getOpcode() == ISD::CALLSEQ_START) return Node; + assert(Node->getOperand(0).getValueType() == MVT::Other && + "Node doesn't have a token chain argument!"); + return FindCallStartFromCall(Node->getOperand(0).getNode()); +} + +SDNode *X86DAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) { + SDValue Chain = Node->getOperand(0); + SDValue In1 = Node->getOperand(1); + SDValue In2L = Node->getOperand(2); + SDValue In2H = Node->getOperand(3); + SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; + if (!SelectAddr(In1, In1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) + return NULL; + SDValue LSI = Node->getOperand(4); // MemOperand + const SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, In2L, In2H, LSI, Chain}; + return CurDAG->getTargetNode(Opc, Node->getDebugLoc(), + MVT::i32, MVT::i32, MVT::Other, Ops, + array_lengthof(Ops)); +} + +SDNode *X86DAGToDAGISel::Select(SDValue N) { + SDNode *Node = N.getNode(); + MVT NVT = Node->getValueType(0); + unsigned Opc, MOpc; + unsigned Opcode = Node->getOpcode(); + DebugLoc dl = Node->getDebugLoc(); + +#ifndef NDEBUG + DOUT << std::string(Indent, ' ') << "Selecting: "; + DEBUG(Node->dump(CurDAG)); + DOUT << "\n"; + Indent += 2; +#endif + + if (Node->isMachineOpcode()) { +#ifndef NDEBUG + DOUT << std::string(Indent-2, ' ') << "== "; + DEBUG(Node->dump(CurDAG)); + DOUT << "\n"; + Indent -= 2; +#endif + return NULL; // Already selected. + } + + switch (Opcode) { + default: break; + case X86ISD::GlobalBaseReg: + return getGlobalBaseReg(); + + case X86ISD::ATOMOR64_DAG: + return SelectAtomic64(Node, X86::ATOMOR6432); + case X86ISD::ATOMXOR64_DAG: + return SelectAtomic64(Node, X86::ATOMXOR6432); + case X86ISD::ATOMADD64_DAG: + return SelectAtomic64(Node, X86::ATOMADD6432); + case X86ISD::ATOMSUB64_DAG: + return SelectAtomic64(Node, X86::ATOMSUB6432); + case X86ISD::ATOMNAND64_DAG: + return SelectAtomic64(Node, X86::ATOMNAND6432); + case X86ISD::ATOMAND64_DAG: + return SelectAtomic64(Node, X86::ATOMAND6432); + case X86ISD::ATOMSWAP64_DAG: + return SelectAtomic64(Node, X86::ATOMSWAP6432); + + case ISD::SMUL_LOHI: + case ISD::UMUL_LOHI: { + SDValue N0 = Node->getOperand(0); + SDValue N1 = Node->getOperand(1); + + bool isSigned = Opcode == ISD::SMUL_LOHI; + if (!isSigned) + switch (NVT.getSimpleVT()) { + default: assert(0 && "Unsupported VT!"); + case MVT::i8: Opc = X86::MUL8r; MOpc = X86::MUL8m; break; + case MVT::i16: Opc = X86::MUL16r; MOpc = X86::MUL16m; break; + case MVT::i32: Opc = X86::MUL32r; MOpc = X86::MUL32m; break; + case MVT::i64: Opc = X86::MUL64r; MOpc = X86::MUL64m; break; + } + else + switch (NVT.getSimpleVT()) { + default: assert(0 && "Unsupported VT!"); + case MVT::i8: Opc = X86::IMUL8r; MOpc = X86::IMUL8m; break; + case MVT::i16: Opc = X86::IMUL16r; MOpc = X86::IMUL16m; break; + case MVT::i32: Opc = X86::IMUL32r; MOpc = X86::IMUL32m; break; + case MVT::i64: Opc = X86::IMUL64r; MOpc = X86::IMUL64m; break; + } + + unsigned LoReg, HiReg; + switch (NVT.getSimpleVT()) { + default: assert(0 && "Unsupported VT!"); + case MVT::i8: LoReg = X86::AL; HiReg = X86::AH; break; + case MVT::i16: LoReg = X86::AX; HiReg = X86::DX; break; + case MVT::i32: LoReg = X86::EAX; HiReg = X86::EDX; break; + case MVT::i64: LoReg = X86::RAX; HiReg = X86::RDX; break; + } + + SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; + bool foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); + // multiplty is commmutative + if (!foldedLoad) { + foldedLoad = TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); + if (foldedLoad) + std::swap(N0, N1); + } + + SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg, + N0, SDValue()).getValue(1); + + if (foldedLoad) { + SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0), + InFlag }; + SDNode *CNode = + CurDAG->getTargetNode(MOpc, dl, MVT::Other, MVT::Flag, Ops, + array_lengthof(Ops)); + InFlag = SDValue(CNode, 1); + // Update the chain. + ReplaceUses(N1.getValue(1), SDValue(CNode, 0)); + } else { + InFlag = + SDValue(CurDAG->getTargetNode(Opc, dl, MVT::Flag, N1, InFlag), 0); + } + + // Copy the low half of the result, if it is needed. + if (!N.getValue(0).use_empty()) { + SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, + LoReg, NVT, InFlag); + InFlag = Result.getValue(2); + ReplaceUses(N.getValue(0), Result); +#ifndef NDEBUG + DOUT << std::string(Indent-2, ' ') << "=> "; + DEBUG(Result.getNode()->dump(CurDAG)); + DOUT << "\n"; +#endif + } + // Copy the high half of the result, if it is needed. + if (!N.getValue(1).use_empty()) { + SDValue Result; + if (HiReg == X86::AH && Subtarget->is64Bit()) { + // Prevent use of AH in a REX instruction by referencing AX instead. + // Shift it down 8 bits. + Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, + X86::AX, MVT::i16, InFlag); + InFlag = Result.getValue(2); + Result = SDValue(CurDAG->getTargetNode(X86::SHR16ri, dl, MVT::i16, + Result, + CurDAG->getTargetConstant(8, MVT::i8)), 0); + // Then truncate it down to i8. + SDValue SRIdx = CurDAG->getTargetConstant(X86::SUBREG_8BIT, MVT::i32); + Result = SDValue(CurDAG->getTargetNode(X86::EXTRACT_SUBREG, dl, + MVT::i8, Result, SRIdx), 0); + } else { + Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, + HiReg, NVT, InFlag); + InFlag = Result.getValue(2); + } + ReplaceUses(N.getValue(1), Result); +#ifndef NDEBUG + DOUT << std::string(Indent-2, ' ') << "=> "; + DEBUG(Result.getNode()->dump(CurDAG)); + DOUT << "\n"; +#endif + } + +#ifndef NDEBUG + Indent -= 2; +#endif + + return NULL; + } + + case ISD::SDIVREM: + case ISD::UDIVREM: { + SDValue N0 = Node->getOperand(0); + SDValue N1 = Node->getOperand(1); + + bool isSigned = Opcode == ISD::SDIVREM; + if (!isSigned) + switch (NVT.getSimpleVT()) { + default: assert(0 && "Unsupported VT!"); + case MVT::i8: Opc = X86::DIV8r; MOpc = X86::DIV8m; break; + case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break; + case MVT::i32: Opc = X86::DIV32r; MOpc = X86::DIV32m; break; + case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break; + } + else + switch (NVT.getSimpleVT()) { + default: assert(0 && "Unsupported VT!"); + case MVT::i8: Opc = X86::IDIV8r; MOpc = X86::IDIV8m; break; + case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break; + case MVT::i32: Opc = X86::IDIV32r; MOpc = X86::IDIV32m; break; + case MVT::i64: Opc = X86::IDIV64r; MOpc = X86::IDIV64m; break; + } + + unsigned LoReg, HiReg; + unsigned ClrOpcode, SExtOpcode; + switch (NVT.getSimpleVT()) { + default: assert(0 && "Unsupported VT!"); + case MVT::i8: + LoReg = X86::AL; HiReg = X86::AH; + ClrOpcode = 0; + SExtOpcode = X86::CBW; + break; + case MVT::i16: + LoReg = X86::AX; HiReg = X86::DX; + ClrOpcode = X86::MOV16r0; + SExtOpcode = X86::CWD; + break; + case MVT::i32: + LoReg = X86::EAX; HiReg = X86::EDX; + ClrOpcode = X86::MOV32r0; + SExtOpcode = X86::CDQ; + break; + case MVT::i64: + LoReg = X86::RAX; HiReg = X86::RDX; + ClrOpcode = X86::MOV64r0; + SExtOpcode = X86::CQO; + break; + } + + SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; + bool foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); + bool signBitIsZero = CurDAG->SignBitIsZero(N0); + + SDValue InFlag; + if (NVT == MVT::i8 && (!isSigned || signBitIsZero)) { + // Special case for div8, just use a move with zero extension to AX to + // clear the upper 8 bits (AH). + SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Move, Chain; + if (TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) { + SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) }; + Move = + SDValue(CurDAG->getTargetNode(X86::MOVZX16rm8, dl, MVT::i16, + MVT::Other, Ops, + array_lengthof(Ops)), 0); + Chain = Move.getValue(1); + ReplaceUses(N0.getValue(1), Chain); + } else { + Move = + SDValue(CurDAG->getTargetNode(X86::MOVZX16rr8, dl, MVT::i16, N0),0); + Chain = CurDAG->getEntryNode(); + } + Chain = CurDAG->getCopyToReg(Chain, dl, X86::AX, Move, SDValue()); + InFlag = Chain.getValue(1); + } else { + InFlag = + CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, + LoReg, N0, SDValue()).getValue(1); + if (isSigned && !signBitIsZero) { + // Sign extend the low part into the high part. + InFlag = + SDValue(CurDAG->getTargetNode(SExtOpcode, dl, MVT::Flag, InFlag),0); + } else { + // Zero out the high part, effectively zero extending the input. + SDValue ClrNode = SDValue(CurDAG->getTargetNode(ClrOpcode, dl, NVT), + 0); + InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, HiReg, + ClrNode, InFlag).getValue(1); + } + } + + if (foldedLoad) { + SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0), + InFlag }; + SDNode *CNode = + CurDAG->getTargetNode(MOpc, dl, MVT::Other, MVT::Flag, Ops, + array_lengthof(Ops)); + InFlag = SDValue(CNode, 1); + // Update the chain. + ReplaceUses(N1.getValue(1), SDValue(CNode, 0)); + } else { + InFlag = + SDValue(CurDAG->getTargetNode(Opc, dl, MVT::Flag, N1, InFlag), 0); + } + + // Copy the division (low) result, if it is needed. + if (!N.getValue(0).use_empty()) { + SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, + LoReg, NVT, InFlag); + InFlag = Result.getValue(2); + ReplaceUses(N.getValue(0), Result); +#ifndef NDEBUG + DOUT << std::string(Indent-2, ' ') << "=> "; + DEBUG(Result.getNode()->dump(CurDAG)); + DOUT << "\n"; +#endif + } + // Copy the remainder (high) result, if it is needed. + if (!N.getValue(1).use_empty()) { + SDValue Result; + if (HiReg == X86::AH && Subtarget->is64Bit()) { + // Prevent use of AH in a REX instruction by referencing AX instead. + // Shift it down 8 bits. + Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, + X86::AX, MVT::i16, InFlag); + InFlag = Result.getValue(2); + Result = SDValue(CurDAG->getTargetNode(X86::SHR16ri, dl, MVT::i16, + Result, + CurDAG->getTargetConstant(8, MVT::i8)), + 0); + // Then truncate it down to i8. + SDValue SRIdx = CurDAG->getTargetConstant(X86::SUBREG_8BIT, MVT::i32); + Result = SDValue(CurDAG->getTargetNode(X86::EXTRACT_SUBREG, dl, + MVT::i8, Result, SRIdx), 0); + } else { + Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, + HiReg, NVT, InFlag); + InFlag = Result.getValue(2); + } + ReplaceUses(N.getValue(1), Result); +#ifndef NDEBUG + DOUT << std::string(Indent-2, ' ') << "=> "; + DEBUG(Result.getNode()->dump(CurDAG)); + DOUT << "\n"; +#endif + } + +#ifndef NDEBUG + Indent -= 2; +#endif + + return NULL; + } + + case ISD::DECLARE: { + // Handle DECLARE nodes here because the second operand may have been + // wrapped in X86ISD::Wrapper. + SDValue Chain = Node->getOperand(0); + SDValue N1 = Node->getOperand(1); + SDValue N2 = Node->getOperand(2); + FrameIndexSDNode *FINode = dyn_cast(N1); + + // FIXME: We need to handle this for VLAs. + if (!FINode) { + ReplaceUses(N.getValue(0), Chain); + return NULL; + } + + if (N2.getOpcode() == ISD::ADD && + N2.getOperand(0).getOpcode() == X86ISD::GlobalBaseReg) + N2 = N2.getOperand(1); + + // If N2 is not Wrapper(decriptor) then the llvm.declare is mangled + // somehow, just ignore it. + if (N2.getOpcode() != X86ISD::Wrapper) { + ReplaceUses(N.getValue(0), Chain); + return NULL; + } + GlobalAddressSDNode *GVNode = + dyn_cast(N2.getOperand(0)); + if (GVNode == 0) { + ReplaceUses(N.getValue(0), Chain); + return NULL; + } + SDValue Tmp1 = CurDAG->getTargetFrameIndex(FINode->getIndex(), + TLI.getPointerTy()); + SDValue Tmp2 = CurDAG->getTargetGlobalAddress(GVNode->getGlobal(), + TLI.getPointerTy()); + SDValue Ops[] = { Tmp1, Tmp2, Chain }; + return CurDAG->getTargetNode(TargetInstrInfo::DECLARE, dl, + MVT::Other, Ops, + array_lengthof(Ops)); + } + } + + SDNode *ResNode = SelectCode(N); + +#ifndef NDEBUG + DOUT << std::string(Indent-2, ' ') << "=> "; + if (ResNode == NULL || ResNode == N.getNode()) + DEBUG(N.getNode()->dump(CurDAG)); + else + DEBUG(ResNode->dump(CurDAG)); + DOUT << "\n"; + Indent -= 2; +#endif + + return ResNode; +} + +bool X86DAGToDAGISel:: +SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, + std::vector &OutOps) { + SDValue Op0, Op1, Op2, Op3, Op4; + switch (ConstraintCode) { + case 'o': // offsetable ?? + case 'v': // not offsetable ?? + default: return true; + case 'm': // memory + if (!SelectAddr(Op, Op, Op0, Op1, Op2, Op3, Op4)) + return true; + break; + } + + OutOps.push_back(Op0); + OutOps.push_back(Op1); + OutOps.push_back(Op2); + OutOps.push_back(Op3); + OutOps.push_back(Op4); + return false; +} + +/// createX86ISelDag - This pass converts a legalized DAG into a +/// X86-specific DAG, ready for instruction scheduling. +/// +FunctionPass *llvm::createX86ISelDag(X86TargetMachine &TM, + llvm::CodeGenOpt::Level OptLevel) { + return new X86DAGToDAGISel(TM, OptLevel); +} diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp new file mode 100644 index 000000000000..882ee3a01f1e --- /dev/null +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -0,0 +1,8794 @@ +//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interfaces that X86 uses to lower LLVM code into a +// selection DAG. +// +//===----------------------------------------------------------------------===// + +#include "X86.h" +#include "X86InstrBuilder.h" +#include "X86ISelLowering.h" +#include "X86MachineFunctionInfo.h" +#include "X86TargetMachine.h" +#include "llvm/CallingConv.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/GlobalVariable.h" +#include "llvm/Function.h" +#include "llvm/Intrinsics.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/VectorExtras.h" +#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/CommandLine.h" +using namespace llvm; + +static cl::opt +DisableMMX("disable-mmx", cl::Hidden, cl::desc("Disable use of MMX")); + +// Forward declarations. +static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, MVT VT, SDValue V1, + SDValue V2); + +X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) + : TargetLowering(TM) { + Subtarget = &TM.getSubtarget(); + X86ScalarSSEf64 = Subtarget->hasSSE2(); + X86ScalarSSEf32 = Subtarget->hasSSE1(); + X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP; + + RegInfo = TM.getRegisterInfo(); + TD = getTargetData(); + + // Set up the TargetLowering object. + + // X86 is weird, it always uses i8 for shift amounts and setcc results. + setShiftAmountType(MVT::i8); + setBooleanContents(ZeroOrOneBooleanContent); + setSchedulingPreference(SchedulingForRegPressure); + setShiftAmountFlavor(Mask); // shl X, 32 == shl X, 0 + setStackPointerRegisterToSaveRestore(X86StackPtr); + + if (Subtarget->isTargetDarwin()) { + // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp. + setUseUnderscoreSetJmp(false); + setUseUnderscoreLongJmp(false); + } else if (Subtarget->isTargetMingw()) { + // MS runtime is weird: it exports _setjmp, but longjmp! + setUseUnderscoreSetJmp(true); + setUseUnderscoreLongJmp(false); + } else { + setUseUnderscoreSetJmp(true); + setUseUnderscoreLongJmp(true); + } + + // Set up the register classes. + addRegisterClass(MVT::i8, X86::GR8RegisterClass); + addRegisterClass(MVT::i16, X86::GR16RegisterClass); + addRegisterClass(MVT::i32, X86::GR32RegisterClass); + if (Subtarget->is64Bit()) + addRegisterClass(MVT::i64, X86::GR64RegisterClass); + + setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); + + // We don't accept any truncstore of integer registers. + setTruncStoreAction(MVT::i64, MVT::i32, Expand); + setTruncStoreAction(MVT::i64, MVT::i16, Expand); + setTruncStoreAction(MVT::i64, MVT::i8 , Expand); + setTruncStoreAction(MVT::i32, MVT::i16, Expand); + setTruncStoreAction(MVT::i32, MVT::i8 , Expand); + setTruncStoreAction(MVT::i16, MVT::i8, Expand); + + // SETOEQ and SETUNE require checking two conditions. + setCondCodeAction(ISD::SETOEQ, MVT::f32, Expand); + setCondCodeAction(ISD::SETOEQ, MVT::f64, Expand); + setCondCodeAction(ISD::SETOEQ, MVT::f80, Expand); + setCondCodeAction(ISD::SETUNE, MVT::f32, Expand); + setCondCodeAction(ISD::SETUNE, MVT::f64, Expand); + setCondCodeAction(ISD::SETUNE, MVT::f80, Expand); + + // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this + // operation. + setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote); + setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote); + setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote); + + if (Subtarget->is64Bit()) { + setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); + setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Expand); + } else if (!UseSoftFloat) { + if (X86ScalarSSEf64) { + // We have an impenetrably clever algorithm for ui64->double only. + setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom); + } + // We have an algorithm for SSE2, and we turn this into a 64-bit + // FILD for other targets. + setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom); + } + + // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have + // this operation. + setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote); + setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote); + + if (!UseSoftFloat && !NoImplicitFloat) { + // SSE has no i16 to fp conversion, only i32 + if (X86ScalarSSEf32) { + setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote); + // f32 and f64 cases are Legal, f80 case is not + setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom); + } else { + setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom); + setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom); + } + } else { + setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote); + setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Promote); + } + + // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64 + // are Legal, f80 is custom lowered. + setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom); + setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom); + + // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have + // this operation. + setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote); + setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote); + + if (X86ScalarSSEf32) { + setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote); + // f32 and f64 cases are Legal, f80 case is not + setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom); + } else { + setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom); + setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom); + } + + // Handle FP_TO_UINT by promoting the destination to a larger signed + // conversion. + setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote); + setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote); + setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote); + + if (Subtarget->is64Bit()) { + setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand); + setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote); + } else if (!UseSoftFloat) { + if (X86ScalarSSEf32 && !Subtarget->hasSSE3()) + // Expand FP_TO_UINT into a select. + // FIXME: We would like to use a Custom expander here eventually to do + // the optimal thing for SSE vs. the default expansion in the legalizer. + setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand); + else + // With SSE3 we can use fisttpll to convert to a signed i64; without + // SSE, we're stuck with a fistpll. + setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom); + } + + // TODO: when we have SSE, these could be more efficient, by using movd/movq. + if (!X86ScalarSSEf64) { + setOperationAction(ISD::BIT_CONVERT , MVT::f32 , Expand); + setOperationAction(ISD::BIT_CONVERT , MVT::i32 , Expand); + } + + // Scalar integer divide and remainder are lowered to use operations that + // produce two results, to match the available instructions. This exposes + // the two-result form to trivial CSE, which is able to combine x/y and x%y + // into a single instruction. + // + // Scalar integer multiply-high is also lowered to use two-result + // operations, to match the available instructions. However, plain multiply + // (low) operations are left as Legal, as there are single-result + // instructions for this in x86. Using the two-result multiply instructions + // when both high and low results are needed must be arranged by dagcombine. + setOperationAction(ISD::MULHS , MVT::i8 , Expand); + setOperationAction(ISD::MULHU , MVT::i8 , Expand); + setOperationAction(ISD::SDIV , MVT::i8 , Expand); + setOperationAction(ISD::UDIV , MVT::i8 , Expand); + setOperationAction(ISD::SREM , MVT::i8 , Expand); + setOperationAction(ISD::UREM , MVT::i8 , Expand); + setOperationAction(ISD::MULHS , MVT::i16 , Expand); + setOperationAction(ISD::MULHU , MVT::i16 , Expand); + setOperationAction(ISD::SDIV , MVT::i16 , Expand); + setOperationAction(ISD::UDIV , MVT::i16 , Expand); + setOperationAction(ISD::SREM , MVT::i16 , Expand); + setOperationAction(ISD::UREM , MVT::i16 , Expand); + setOperationAction(ISD::MULHS , MVT::i32 , Expand); + setOperationAction(ISD::MULHU , MVT::i32 , Expand); + setOperationAction(ISD::SDIV , MVT::i32 , Expand); + setOperationAction(ISD::UDIV , MVT::i32 , Expand); + setOperationAction(ISD::SREM , MVT::i32 , Expand); + setOperationAction(ISD::UREM , MVT::i32 , Expand); + setOperationAction(ISD::MULHS , MVT::i64 , Expand); + setOperationAction(ISD::MULHU , MVT::i64 , Expand); + setOperationAction(ISD::SDIV , MVT::i64 , Expand); + setOperationAction(ISD::UDIV , MVT::i64 , Expand); + setOperationAction(ISD::SREM , MVT::i64 , Expand); + setOperationAction(ISD::UREM , MVT::i64 , Expand); + + setOperationAction(ISD::BR_JT , MVT::Other, Expand); + setOperationAction(ISD::BRCOND , MVT::Other, Custom); + setOperationAction(ISD::BR_CC , MVT::Other, Expand); + setOperationAction(ISD::SELECT_CC , MVT::Other, Expand); + if (Subtarget->is64Bit()) + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand); + setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand); + setOperationAction(ISD::FREM , MVT::f32 , Expand); + setOperationAction(ISD::FREM , MVT::f64 , Expand); + setOperationAction(ISD::FREM , MVT::f80 , Expand); + setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom); + + setOperationAction(ISD::CTPOP , MVT::i8 , Expand); + setOperationAction(ISD::CTTZ , MVT::i8 , Custom); + setOperationAction(ISD::CTLZ , MVT::i8 , Custom); + setOperationAction(ISD::CTPOP , MVT::i16 , Expand); + setOperationAction(ISD::CTTZ , MVT::i16 , Custom); + setOperationAction(ISD::CTLZ , MVT::i16 , Custom); + setOperationAction(ISD::CTPOP , MVT::i32 , Expand); + setOperationAction(ISD::CTTZ , MVT::i32 , Custom); + setOperationAction(ISD::CTLZ , MVT::i32 , Custom); + if (Subtarget->is64Bit()) { + setOperationAction(ISD::CTPOP , MVT::i64 , Expand); + setOperationAction(ISD::CTTZ , MVT::i64 , Custom); + setOperationAction(ISD::CTLZ , MVT::i64 , Custom); + } + + setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom); + setOperationAction(ISD::BSWAP , MVT::i16 , Expand); + + // These should be promoted to a larger select which is supported. + setOperationAction(ISD::SELECT , MVT::i1 , Promote); + setOperationAction(ISD::SELECT , MVT::i8 , Promote); + // X86 wants to expand cmov itself. + setOperationAction(ISD::SELECT , MVT::i16 , Custom); + setOperationAction(ISD::SELECT , MVT::i32 , Custom); + setOperationAction(ISD::SELECT , MVT::f32 , Custom); + setOperationAction(ISD::SELECT , MVT::f64 , Custom); + setOperationAction(ISD::SELECT , MVT::f80 , Custom); + setOperationAction(ISD::SETCC , MVT::i8 , Custom); + setOperationAction(ISD::SETCC , MVT::i16 , Custom); + setOperationAction(ISD::SETCC , MVT::i32 , Custom); + setOperationAction(ISD::SETCC , MVT::f32 , Custom); + setOperationAction(ISD::SETCC , MVT::f64 , Custom); + setOperationAction(ISD::SETCC , MVT::f80 , Custom); + if (Subtarget->is64Bit()) { + setOperationAction(ISD::SELECT , MVT::i64 , Custom); + setOperationAction(ISD::SETCC , MVT::i64 , Custom); + } + // X86 ret instruction may pop stack. + setOperationAction(ISD::RET , MVT::Other, Custom); + setOperationAction(ISD::EH_RETURN , MVT::Other, Custom); + + // Darwin ABI issue. + setOperationAction(ISD::ConstantPool , MVT::i32 , Custom); + setOperationAction(ISD::JumpTable , MVT::i32 , Custom); + setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom); + setOperationAction(ISD::GlobalTLSAddress, MVT::i32 , Custom); + if (Subtarget->is64Bit()) + setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom); + setOperationAction(ISD::ExternalSymbol , MVT::i32 , Custom); + if (Subtarget->is64Bit()) { + setOperationAction(ISD::ConstantPool , MVT::i64 , Custom); + setOperationAction(ISD::JumpTable , MVT::i64 , Custom); + setOperationAction(ISD::GlobalAddress , MVT::i64 , Custom); + setOperationAction(ISD::ExternalSymbol, MVT::i64 , Custom); + } + // 64-bit addm sub, shl, sra, srl (iff 32-bit x86) + setOperationAction(ISD::SHL_PARTS , MVT::i32 , Custom); + setOperationAction(ISD::SRA_PARTS , MVT::i32 , Custom); + setOperationAction(ISD::SRL_PARTS , MVT::i32 , Custom); + if (Subtarget->is64Bit()) { + setOperationAction(ISD::SHL_PARTS , MVT::i64 , Custom); + setOperationAction(ISD::SRA_PARTS , MVT::i64 , Custom); + setOperationAction(ISD::SRL_PARTS , MVT::i64 , Custom); + } + + if (Subtarget->hasSSE1()) + setOperationAction(ISD::PREFETCH , MVT::Other, Legal); + + if (!Subtarget->hasSSE2()) + setOperationAction(ISD::MEMBARRIER , MVT::Other, Expand); + + // Expand certain atomics + setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i8, Custom); + setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i16, Custom); + setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom); + setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom); + + setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i8, Custom); + setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i16, Custom); + setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom); + setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom); + + if (!Subtarget->is64Bit()) { + setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Custom); + setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom); + setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom); + setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, Custom); + setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Custom); + setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i64, Custom); + setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Custom); + } + + // Use the default ISD::DBG_STOPPOINT, ISD::DECLARE expansion. + setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand); + // FIXME - use subtarget debug flags + if (!Subtarget->isTargetDarwin() && + !Subtarget->isTargetELF() && + !Subtarget->isTargetCygMing()) { + setOperationAction(ISD::DBG_LABEL, MVT::Other, Expand); + setOperationAction(ISD::EH_LABEL, MVT::Other, Expand); + } + + setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand); + setOperationAction(ISD::EHSELECTION, MVT::i64, Expand); + setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); + setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); + if (Subtarget->is64Bit()) { + setExceptionPointerRegister(X86::RAX); + setExceptionSelectorRegister(X86::RDX); + } else { + setExceptionPointerRegister(X86::EAX); + setExceptionSelectorRegister(X86::EDX); + } + setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom); + setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom); + + setOperationAction(ISD::TRAMPOLINE, MVT::Other, Custom); + + setOperationAction(ISD::TRAP, MVT::Other, Legal); + + // VASTART needs to be custom lowered to use the VarArgsFrameIndex + setOperationAction(ISD::VASTART , MVT::Other, Custom); + setOperationAction(ISD::VAEND , MVT::Other, Expand); + if (Subtarget->is64Bit()) { + setOperationAction(ISD::VAARG , MVT::Other, Custom); + setOperationAction(ISD::VACOPY , MVT::Other, Custom); + } else { + setOperationAction(ISD::VAARG , MVT::Other, Expand); + setOperationAction(ISD::VACOPY , MVT::Other, Expand); + } + + setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); + setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); + if (Subtarget->is64Bit()) + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand); + if (Subtarget->isTargetCygMing()) + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); + else + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); + + if (!UseSoftFloat && X86ScalarSSEf64) { + // f32 and f64 use SSE. + // Set up the FP register classes. + addRegisterClass(MVT::f32, X86::FR32RegisterClass); + addRegisterClass(MVT::f64, X86::FR64RegisterClass); + + // Use ANDPD to simulate FABS. + setOperationAction(ISD::FABS , MVT::f64, Custom); + setOperationAction(ISD::FABS , MVT::f32, Custom); + + // Use XORP to simulate FNEG. + setOperationAction(ISD::FNEG , MVT::f64, Custom); + setOperationAction(ISD::FNEG , MVT::f32, Custom); + + // Use ANDPD and ORPD to simulate FCOPYSIGN. + setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); + setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); + + // We don't support sin/cos/fmod + setOperationAction(ISD::FSIN , MVT::f64, Expand); + setOperationAction(ISD::FCOS , MVT::f64, Expand); + setOperationAction(ISD::FSIN , MVT::f32, Expand); + setOperationAction(ISD::FCOS , MVT::f32, Expand); + + // Expand FP immediates into loads from the stack, except for the special + // cases we handle. + addLegalFPImmediate(APFloat(+0.0)); // xorpd + addLegalFPImmediate(APFloat(+0.0f)); // xorps + } else if (!UseSoftFloat && X86ScalarSSEf32) { + // Use SSE for f32, x87 for f64. + // Set up the FP register classes. + addRegisterClass(MVT::f32, X86::FR32RegisterClass); + addRegisterClass(MVT::f64, X86::RFP64RegisterClass); + + // Use ANDPS to simulate FABS. + setOperationAction(ISD::FABS , MVT::f32, Custom); + + // Use XORP to simulate FNEG. + setOperationAction(ISD::FNEG , MVT::f32, Custom); + + setOperationAction(ISD::UNDEF, MVT::f64, Expand); + + // Use ANDPS and ORPS to simulate FCOPYSIGN. + setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); + setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); + + // We don't support sin/cos/fmod + setOperationAction(ISD::FSIN , MVT::f32, Expand); + setOperationAction(ISD::FCOS , MVT::f32, Expand); + + // Special cases we handle for FP constants. + addLegalFPImmediate(APFloat(+0.0f)); // xorps + addLegalFPImmediate(APFloat(+0.0)); // FLD0 + addLegalFPImmediate(APFloat(+1.0)); // FLD1 + addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS + addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS + + if (!UnsafeFPMath) { + setOperationAction(ISD::FSIN , MVT::f64 , Expand); + setOperationAction(ISD::FCOS , MVT::f64 , Expand); + } + } else if (!UseSoftFloat) { + // f32 and f64 in x87. + // Set up the FP register classes. + addRegisterClass(MVT::f64, X86::RFP64RegisterClass); + addRegisterClass(MVT::f32, X86::RFP32RegisterClass); + + setOperationAction(ISD::UNDEF, MVT::f64, Expand); + setOperationAction(ISD::UNDEF, MVT::f32, Expand); + setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); + setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); + + if (!UnsafeFPMath) { + setOperationAction(ISD::FSIN , MVT::f64 , Expand); + setOperationAction(ISD::FCOS , MVT::f64 , Expand); + } + addLegalFPImmediate(APFloat(+0.0)); // FLD0 + addLegalFPImmediate(APFloat(+1.0)); // FLD1 + addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS + addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS + addLegalFPImmediate(APFloat(+0.0f)); // FLD0 + addLegalFPImmediate(APFloat(+1.0f)); // FLD1 + addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS + addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS + } + + // Long double always uses X87. + if (!UseSoftFloat) { + addRegisterClass(MVT::f80, X86::RFP80RegisterClass); + setOperationAction(ISD::UNDEF, MVT::f80, Expand); + setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand); + { + bool ignored; + APFloat TmpFlt(+0.0); + TmpFlt.convert(APFloat::x87DoubleExtended, APFloat::rmNearestTiesToEven, + &ignored); + addLegalFPImmediate(TmpFlt); // FLD0 + TmpFlt.changeSign(); + addLegalFPImmediate(TmpFlt); // FLD0/FCHS + APFloat TmpFlt2(+1.0); + TmpFlt2.convert(APFloat::x87DoubleExtended, APFloat::rmNearestTiesToEven, + &ignored); + addLegalFPImmediate(TmpFlt2); // FLD1 + TmpFlt2.changeSign(); + addLegalFPImmediate(TmpFlt2); // FLD1/FCHS + } + + if (!UnsafeFPMath) { + setOperationAction(ISD::FSIN , MVT::f80 , Expand); + setOperationAction(ISD::FCOS , MVT::f80 , Expand); + } + } + + // Always use a library call for pow. + setOperationAction(ISD::FPOW , MVT::f32 , Expand); + setOperationAction(ISD::FPOW , MVT::f64 , Expand); + setOperationAction(ISD::FPOW , MVT::f80 , Expand); + + setOperationAction(ISD::FLOG, MVT::f80, Expand); + setOperationAction(ISD::FLOG2, MVT::f80, Expand); + setOperationAction(ISD::FLOG10, MVT::f80, Expand); + setOperationAction(ISD::FEXP, MVT::f80, Expand); + setOperationAction(ISD::FEXP2, MVT::f80, Expand); + + // First set operation action for all vector types to either promote + // (for widening) or expand (for scalarization). Then we will selectively + // turn on ones that can be effectively codegen'd. + for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; + VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) { + setOperationAction(ISD::ADD , (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::SUB , (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::FADD, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::FNEG, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::FSUB, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::MUL , (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::FMUL, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::SDIV, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::UDIV, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::FDIV, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::SREM, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::UREM, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::LOAD, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::EXTRACT_VECTOR_ELT,(MVT::SimpleValueType)VT,Expand); + setOperationAction(ISD::EXTRACT_SUBVECTOR,(MVT::SimpleValueType)VT,Expand); + setOperationAction(ISD::INSERT_VECTOR_ELT,(MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::FABS, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::FSIN, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::FCOS, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::FREM, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::FPOWI, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::FSQRT, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::FCOPYSIGN, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::SMUL_LOHI, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::UMUL_LOHI, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::SDIVREM, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::UDIVREM, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::FPOW, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::CTPOP, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::CTTZ, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::CTLZ, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::SHL, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::SRA, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::SRL, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::ROTL, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::ROTR, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::BSWAP, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::VSETCC, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::FLOG, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::FLOG2, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::FLOG10, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::FEXP, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::FEXP2, (MVT::SimpleValueType)VT, Expand); + } + + // FIXME: In order to prevent SSE instructions being expanded to MMX ones + // with -msoft-float, disable use of MMX as well. + if (!UseSoftFloat && !DisableMMX && Subtarget->hasMMX()) { + addRegisterClass(MVT::v8i8, X86::VR64RegisterClass); + addRegisterClass(MVT::v4i16, X86::VR64RegisterClass); + addRegisterClass(MVT::v2i32, X86::VR64RegisterClass); + addRegisterClass(MVT::v2f32, X86::VR64RegisterClass); + addRegisterClass(MVT::v1i64, X86::VR64RegisterClass); + + setOperationAction(ISD::ADD, MVT::v8i8, Legal); + setOperationAction(ISD::ADD, MVT::v4i16, Legal); + setOperationAction(ISD::ADD, MVT::v2i32, Legal); + setOperationAction(ISD::ADD, MVT::v1i64, Legal); + + setOperationAction(ISD::SUB, MVT::v8i8, Legal); + setOperationAction(ISD::SUB, MVT::v4i16, Legal); + setOperationAction(ISD::SUB, MVT::v2i32, Legal); + setOperationAction(ISD::SUB, MVT::v1i64, Legal); + + setOperationAction(ISD::MULHS, MVT::v4i16, Legal); + setOperationAction(ISD::MUL, MVT::v4i16, Legal); + + setOperationAction(ISD::AND, MVT::v8i8, Promote); + AddPromotedToType (ISD::AND, MVT::v8i8, MVT::v1i64); + setOperationAction(ISD::AND, MVT::v4i16, Promote); + AddPromotedToType (ISD::AND, MVT::v4i16, MVT::v1i64); + setOperationAction(ISD::AND, MVT::v2i32, Promote); + AddPromotedToType (ISD::AND, MVT::v2i32, MVT::v1i64); + setOperationAction(ISD::AND, MVT::v1i64, Legal); + + setOperationAction(ISD::OR, MVT::v8i8, Promote); + AddPromotedToType (ISD::OR, MVT::v8i8, MVT::v1i64); + setOperationAction(ISD::OR, MVT::v4i16, Promote); + AddPromotedToType (ISD::OR, MVT::v4i16, MVT::v1i64); + setOperationAction(ISD::OR, MVT::v2i32, Promote); + AddPromotedToType (ISD::OR, MVT::v2i32, MVT::v1i64); + setOperationAction(ISD::OR, MVT::v1i64, Legal); + + setOperationAction(ISD::XOR, MVT::v8i8, Promote); + AddPromotedToType (ISD::XOR, MVT::v8i8, MVT::v1i64); + setOperationAction(ISD::XOR, MVT::v4i16, Promote); + AddPromotedToType (ISD::XOR, MVT::v4i16, MVT::v1i64); + setOperationAction(ISD::XOR, MVT::v2i32, Promote); + AddPromotedToType (ISD::XOR, MVT::v2i32, MVT::v1i64); + setOperationAction(ISD::XOR, MVT::v1i64, Legal); + + setOperationAction(ISD::LOAD, MVT::v8i8, Promote); + AddPromotedToType (ISD::LOAD, MVT::v8i8, MVT::v1i64); + setOperationAction(ISD::LOAD, MVT::v4i16, Promote); + AddPromotedToType (ISD::LOAD, MVT::v4i16, MVT::v1i64); + setOperationAction(ISD::LOAD, MVT::v2i32, Promote); + AddPromotedToType (ISD::LOAD, MVT::v2i32, MVT::v1i64); + setOperationAction(ISD::LOAD, MVT::v2f32, Promote); + AddPromotedToType (ISD::LOAD, MVT::v2f32, MVT::v1i64); + setOperationAction(ISD::LOAD, MVT::v1i64, Legal); + + setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Custom); + setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Custom); + setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Custom); + setOperationAction(ISD::BUILD_VECTOR, MVT::v2f32, Custom); + setOperationAction(ISD::BUILD_VECTOR, MVT::v1i64, Custom); + + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i32, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v1i64, Custom); + + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f32, Custom); + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i8, Custom); + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i16, Custom); + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v1i64, Custom); + + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i16, Custom); + + setTruncStoreAction(MVT::v8i16, MVT::v8i8, Expand); + setOperationAction(ISD::TRUNCATE, MVT::v8i8, Expand); + setOperationAction(ISD::SELECT, MVT::v8i8, Promote); + setOperationAction(ISD::SELECT, MVT::v4i16, Promote); + setOperationAction(ISD::SELECT, MVT::v2i32, Promote); + setOperationAction(ISD::SELECT, MVT::v1i64, Custom); + } + + if (!UseSoftFloat && Subtarget->hasSSE1()) { + addRegisterClass(MVT::v4f32, X86::VR128RegisterClass); + + setOperationAction(ISD::FADD, MVT::v4f32, Legal); + setOperationAction(ISD::FSUB, MVT::v4f32, Legal); + setOperationAction(ISD::FMUL, MVT::v4f32, Legal); + setOperationAction(ISD::FDIV, MVT::v4f32, Legal); + setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); + setOperationAction(ISD::FNEG, MVT::v4f32, Custom); + setOperationAction(ISD::LOAD, MVT::v4f32, Legal); + setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); + setOperationAction(ISD::SELECT, MVT::v4f32, Custom); + setOperationAction(ISD::VSETCC, MVT::v4f32, Custom); + } + + if (!UseSoftFloat && Subtarget->hasSSE2()) { + addRegisterClass(MVT::v2f64, X86::VR128RegisterClass); + + // FIXME: Unfortunately -soft-float and -no-implicit-float means XMM + // registers cannot be used even for integer operations. + addRegisterClass(MVT::v16i8, X86::VR128RegisterClass); + addRegisterClass(MVT::v8i16, X86::VR128RegisterClass); + addRegisterClass(MVT::v4i32, X86::VR128RegisterClass); + addRegisterClass(MVT::v2i64, X86::VR128RegisterClass); + + setOperationAction(ISD::ADD, MVT::v16i8, Legal); + setOperationAction(ISD::ADD, MVT::v8i16, Legal); + setOperationAction(ISD::ADD, MVT::v4i32, Legal); + setOperationAction(ISD::ADD, MVT::v2i64, Legal); + setOperationAction(ISD::MUL, MVT::v2i64, Custom); + setOperationAction(ISD::SUB, MVT::v16i8, Legal); + setOperationAction(ISD::SUB, MVT::v8i16, Legal); + setOperationAction(ISD::SUB, MVT::v4i32, Legal); + setOperationAction(ISD::SUB, MVT::v2i64, Legal); + setOperationAction(ISD::MUL, MVT::v8i16, Legal); + setOperationAction(ISD::FADD, MVT::v2f64, Legal); + setOperationAction(ISD::FSUB, MVT::v2f64, Legal); + setOperationAction(ISD::FMUL, MVT::v2f64, Legal); + setOperationAction(ISD::FDIV, MVT::v2f64, Legal); + setOperationAction(ISD::FSQRT, MVT::v2f64, Legal); + setOperationAction(ISD::FNEG, MVT::v2f64, Custom); + + setOperationAction(ISD::VSETCC, MVT::v2f64, Custom); + setOperationAction(ISD::VSETCC, MVT::v16i8, Custom); + setOperationAction(ISD::VSETCC, MVT::v8i16, Custom); + setOperationAction(ISD::VSETCC, MVT::v4i32, Custom); + + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom); + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); + + // Custom lower build_vector, vector_shuffle, and extract_vector_elt. + for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v2i64; ++i) { + MVT VT = (MVT::SimpleValueType)i; + // Do not attempt to custom lower non-power-of-2 vectors + if (!isPowerOf2_32(VT.getVectorNumElements())) + continue; + setOperationAction(ISD::BUILD_VECTOR, VT, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); + } + + setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom); + setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom); + + if (Subtarget->is64Bit()) { + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom); + } + + // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64. + for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) { + setOperationAction(ISD::AND, (MVT::SimpleValueType)VT, Promote); + AddPromotedToType (ISD::AND, (MVT::SimpleValueType)VT, MVT::v2i64); + setOperationAction(ISD::OR, (MVT::SimpleValueType)VT, Promote); + AddPromotedToType (ISD::OR, (MVT::SimpleValueType)VT, MVT::v2i64); + setOperationAction(ISD::XOR, (MVT::SimpleValueType)VT, Promote); + AddPromotedToType (ISD::XOR, (MVT::SimpleValueType)VT, MVT::v2i64); + setOperationAction(ISD::LOAD, (MVT::SimpleValueType)VT, Promote); + AddPromotedToType (ISD::LOAD, (MVT::SimpleValueType)VT, MVT::v2i64); + setOperationAction(ISD::SELECT, (MVT::SimpleValueType)VT, Promote); + AddPromotedToType (ISD::SELECT, (MVT::SimpleValueType)VT, MVT::v2i64); + } + + setTruncStoreAction(MVT::f64, MVT::f32, Expand); + + // Custom lower v2i64 and v2f64 selects. + setOperationAction(ISD::LOAD, MVT::v2f64, Legal); + setOperationAction(ISD::LOAD, MVT::v2i64, Legal); + setOperationAction(ISD::SELECT, MVT::v2f64, Custom); + setOperationAction(ISD::SELECT, MVT::v2i64, Custom); + + } + + if (Subtarget->hasSSE41()) { + // FIXME: Do we need to handle scalar-to-vector here? + setOperationAction(ISD::MUL, MVT::v4i32, Legal); + + // i8 and i16 vectors are custom , because the source register and source + // source memory operand types are not the same width. f32 vectors are + // custom since the immediate controlling the insert encodes additional + // information. + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); + + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); + + if (Subtarget->is64Bit()) { + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Legal); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal); + } + } + + if (Subtarget->hasSSE42()) { + setOperationAction(ISD::VSETCC, MVT::v2i64, Custom); + } + + // We want to custom lower some of our intrinsics. + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); + + // Add/Sub/Mul with overflow operations are custom lowered. + setOperationAction(ISD::SADDO, MVT::i32, Custom); + setOperationAction(ISD::SADDO, MVT::i64, Custom); + setOperationAction(ISD::UADDO, MVT::i32, Custom); + setOperationAction(ISD::UADDO, MVT::i64, Custom); + setOperationAction(ISD::SSUBO, MVT::i32, Custom); + setOperationAction(ISD::SSUBO, MVT::i64, Custom); + setOperationAction(ISD::USUBO, MVT::i32, Custom); + setOperationAction(ISD::USUBO, MVT::i64, Custom); + setOperationAction(ISD::SMULO, MVT::i32, Custom); + setOperationAction(ISD::SMULO, MVT::i64, Custom); + setOperationAction(ISD::UMULO, MVT::i32, Custom); + setOperationAction(ISD::UMULO, MVT::i64, Custom); + + if (!Subtarget->is64Bit()) { + // These libcalls are not available in 32-bit. + setLibcallName(RTLIB::SHL_I128, 0); + setLibcallName(RTLIB::SRL_I128, 0); + setLibcallName(RTLIB::SRA_I128, 0); + } + + // We have target-specific dag combine patterns for the following nodes: + setTargetDAGCombine(ISD::VECTOR_SHUFFLE); + setTargetDAGCombine(ISD::BUILD_VECTOR); + setTargetDAGCombine(ISD::SELECT); + setTargetDAGCombine(ISD::SHL); + setTargetDAGCombine(ISD::SRA); + setTargetDAGCombine(ISD::SRL); + setTargetDAGCombine(ISD::STORE); + if (Subtarget->is64Bit()) + setTargetDAGCombine(ISD::MUL); + + computeRegisterProperties(); + + // FIXME: These should be based on subtarget info. Plus, the values should + // be smaller when we are in optimizing for size mode. + maxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores + maxStoresPerMemcpy = 16; // For @llvm.memcpy -> sequence of stores + maxStoresPerMemmove = 3; // For @llvm.memmove -> sequence of stores + allowUnalignedMemoryAccesses = true; // x86 supports it! + setPrefLoopAlignment(16); + benefitFromCodePlacementOpt = true; +} + + +MVT X86TargetLowering::getSetCCResultType(MVT VT) const { + return MVT::i8; +} + + +/// getMaxByValAlign - Helper for getByValTypeAlignment to determine +/// the desired ByVal argument alignment. +static void getMaxByValAlign(const Type *Ty, unsigned &MaxAlign) { + if (MaxAlign == 16) + return; + if (const VectorType *VTy = dyn_cast(Ty)) { + if (VTy->getBitWidth() == 128) + MaxAlign = 16; + } else if (const ArrayType *ATy = dyn_cast(Ty)) { + unsigned EltAlign = 0; + getMaxByValAlign(ATy->getElementType(), EltAlign); + if (EltAlign > MaxAlign) + MaxAlign = EltAlign; + } else if (const StructType *STy = dyn_cast(Ty)) { + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { + unsigned EltAlign = 0; + getMaxByValAlign(STy->getElementType(i), EltAlign); + if (EltAlign > MaxAlign) + MaxAlign = EltAlign; + if (MaxAlign == 16) + break; + } + } + return; +} + +/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate +/// function arguments in the caller parameter area. For X86, aggregates +/// that contain SSE vectors are placed at 16-byte boundaries while the rest +/// are at 4-byte boundaries. +unsigned X86TargetLowering::getByValTypeAlignment(const Type *Ty) const { + if (Subtarget->is64Bit()) { + // Max of 8 and alignment of type. + unsigned TyAlign = TD->getABITypeAlignment(Ty); + if (TyAlign > 8) + return TyAlign; + return 8; + } + + unsigned Align = 4; + if (Subtarget->hasSSE1()) + getMaxByValAlign(Ty, Align); + return Align; +} + +/// getOptimalMemOpType - Returns the target specific optimal type for load +/// and store operations as a result of memset, memcpy, and memmove +/// lowering. It returns MVT::iAny if SelectionDAG should be responsible for +/// determining it. +MVT +X86TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned Align, + bool isSrcConst, bool isSrcStr) const { + // FIXME: This turns off use of xmm stores for memset/memcpy on targets like + // linux. This is because the stack realignment code can't handle certain + // cases like PR2962. This should be removed when PR2962 is fixed. + if (!NoImplicitFloat && Subtarget->getStackAlignment() >= 16) { + if ((isSrcConst || isSrcStr) && Subtarget->hasSSE2() && Size >= 16) + return MVT::v4i32; + if ((isSrcConst || isSrcStr) && Subtarget->hasSSE1() && Size >= 16) + return MVT::v4f32; + } + if (Subtarget->is64Bit() && Size >= 8) + return MVT::i64; + return MVT::i32; +} + +/// getPICJumpTableRelocaBase - Returns relocation base for the given PIC +/// jumptable. +SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table, + SelectionDAG &DAG) const { + if (usesGlobalOffsetTable()) + return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy()); + if (!Subtarget->isPICStyleRIPRel()) + // This doesn't have DebugLoc associated with it, but is not really the + // same as a Register. + return DAG.getNode(X86ISD::GlobalBaseReg, DebugLoc::getUnknownLoc(), + getPointerTy()); + return Table; +} + +//===----------------------------------------------------------------------===// +// Return Value Calling Convention Implementation +//===----------------------------------------------------------------------===// + +#include "X86GenCallingConv.inc" + +/// LowerRET - Lower an ISD::RET node. +SDValue X86TargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG) { + DebugLoc dl = Op.getDebugLoc(); + assert((Op.getNumOperands() & 1) == 1 && "ISD::RET should have odd # args"); + + SmallVector RVLocs; + unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv(); + bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); + CCState CCInfo(CC, isVarArg, getTargetMachine(), RVLocs); + CCInfo.AnalyzeReturn(Op.getNode(), RetCC_X86); + + // If this is the first return lowered for this function, add the regs to the + // liveout set for the function. + if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { + for (unsigned i = 0; i != RVLocs.size(); ++i) + if (RVLocs[i].isRegLoc()) + DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); + } + SDValue Chain = Op.getOperand(0); + + // Handle tail call return. + Chain = GetPossiblePreceedingTailCall(Chain, X86ISD::TAILCALL); + if (Chain.getOpcode() == X86ISD::TAILCALL) { + SDValue TailCall = Chain; + SDValue TargetAddress = TailCall.getOperand(1); + SDValue StackAdjustment = TailCall.getOperand(2); + assert(((TargetAddress.getOpcode() == ISD::Register && + (cast(TargetAddress)->getReg() == X86::EAX || + cast(TargetAddress)->getReg() == X86::R9)) || + TargetAddress.getOpcode() == ISD::TargetExternalSymbol || + TargetAddress.getOpcode() == ISD::TargetGlobalAddress) && + "Expecting an global address, external symbol, or register"); + assert(StackAdjustment.getOpcode() == ISD::Constant && + "Expecting a const value"); + + SmallVector Operands; + Operands.push_back(Chain.getOperand(0)); + Operands.push_back(TargetAddress); + Operands.push_back(StackAdjustment); + // Copy registers used by the call. Last operand is a flag so it is not + // copied. + for (unsigned i=3; i < TailCall.getNumOperands()-1; i++) { + Operands.push_back(Chain.getOperand(i)); + } + return DAG.getNode(X86ISD::TC_RETURN, dl, MVT::Other, &Operands[0], + Operands.size()); + } + + // Regular return. + SDValue Flag; + + SmallVector RetOps; + RetOps.push_back(Chain); // Operand #0 = Chain (updated below) + // Operand #1 = Bytes To Pop + RetOps.push_back(DAG.getConstant(getBytesToPopOnReturn(), MVT::i16)); + + // Copy the result values into the output registers. + for (unsigned i = 0; i != RVLocs.size(); ++i) { + CCValAssign &VA = RVLocs[i]; + assert(VA.isRegLoc() && "Can only return in registers!"); + SDValue ValToCopy = Op.getOperand(i*2+1); + + // Returns in ST0/ST1 are handled specially: these are pushed as operands to + // the RET instruction and handled by the FP Stackifier. + if (VA.getLocReg() == X86::ST0 || + VA.getLocReg() == X86::ST1) { + // If this is a copy from an xmm register to ST(0), use an FPExtend to + // change the value to the FP stack register class. + if (isScalarFPTypeInSSEReg(VA.getValVT())) + ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy); + RetOps.push_back(ValToCopy); + // Don't emit a copytoreg. + continue; + } + + // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64 + // which is returned in RAX / RDX. + if (Subtarget->is64Bit()) { + MVT ValVT = ValToCopy.getValueType(); + if (ValVT.isVector() && ValVT.getSizeInBits() == 64) { + ValToCopy = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, ValToCopy); + if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) + ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, ValToCopy); + } + } + + Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), ValToCopy, Flag); + Flag = Chain.getValue(1); + } + + // The x86-64 ABI for returning structs by value requires that we copy + // the sret argument into %rax for the return. We saved the argument into + // a virtual register in the entry block, so now we copy the value out + // and into %rax. + if (Subtarget->is64Bit() && + DAG.getMachineFunction().getFunction()->hasStructRetAttr()) { + MachineFunction &MF = DAG.getMachineFunction(); + X86MachineFunctionInfo *FuncInfo = MF.getInfo(); + unsigned Reg = FuncInfo->getSRetReturnReg(); + if (!Reg) { + Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i64)); + FuncInfo->setSRetReturnReg(Reg); + } + SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy()); + + Chain = DAG.getCopyToReg(Chain, dl, X86::RAX, Val, Flag); + Flag = Chain.getValue(1); + } + + RetOps[0] = Chain; // Update chain. + + // Add the flag if we have it. + if (Flag.getNode()) + RetOps.push_back(Flag); + + return DAG.getNode(X86ISD::RET_FLAG, dl, + MVT::Other, &RetOps[0], RetOps.size()); +} + + +/// LowerCallResult - Lower the result values of an ISD::CALL into the +/// appropriate copies out of appropriate physical registers. This assumes that +/// Chain/InFlag are the input chain/flag to use, and that TheCall is the call +/// being lowered. The returns a SDNode with the same number of values as the +/// ISD::CALL. +SDNode *X86TargetLowering:: +LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall, + unsigned CallingConv, SelectionDAG &DAG) { + + DebugLoc dl = TheCall->getDebugLoc(); + // Assign locations to each value returned by this call. + SmallVector RVLocs; + bool isVarArg = TheCall->isVarArg(); + bool Is64Bit = Subtarget->is64Bit(); + CCState CCInfo(CallingConv, isVarArg, getTargetMachine(), RVLocs); + CCInfo.AnalyzeCallResult(TheCall, RetCC_X86); + + SmallVector ResultVals; + + // Copy all of the result registers out of their specified physreg. + for (unsigned i = 0; i != RVLocs.size(); ++i) { + CCValAssign &VA = RVLocs[i]; + MVT CopyVT = VA.getValVT(); + + // If this is x86-64, and we disabled SSE, we can't return FP values + if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) && + ((Is64Bit || TheCall->isInreg()) && !Subtarget->hasSSE1())) { + cerr << "SSE register return with SSE disabled\n"; + exit(1); + } + + // If this is a call to a function that returns an fp value on the floating + // point stack, but where we prefer to use the value in xmm registers, copy + // it out as F80 and use a truncate to move it from fp stack reg to xmm reg. + if ((VA.getLocReg() == X86::ST0 || + VA.getLocReg() == X86::ST1) && + isScalarFPTypeInSSEReg(VA.getValVT())) { + CopyVT = MVT::f80; + } + + SDValue Val; + if (Is64Bit && CopyVT.isVector() && CopyVT.getSizeInBits() == 64) { + // For x86-64, MMX values are returned in XMM0 / XMM1 except for v1i64. + if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) { + Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), + MVT::v2i64, InFlag).getValue(1); + Val = Chain.getValue(0); + Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, + Val, DAG.getConstant(0, MVT::i64)); + } else { + Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), + MVT::i64, InFlag).getValue(1); + Val = Chain.getValue(0); + } + Val = DAG.getNode(ISD::BIT_CONVERT, dl, CopyVT, Val); + } else { + Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), + CopyVT, InFlag).getValue(1); + Val = Chain.getValue(0); + } + InFlag = Chain.getValue(2); + + if (CopyVT != VA.getValVT()) { + // Round the F80 the right size, which also moves to the appropriate xmm + // register. + Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val, + // This truncation won't change the value. + DAG.getIntPtrConstant(1)); + } + + ResultVals.push_back(Val); + } + + // Merge everything together with a MERGE_VALUES node. + ResultVals.push_back(Chain); + return DAG.getNode(ISD::MERGE_VALUES, dl, TheCall->getVTList(), + &ResultVals[0], ResultVals.size()).getNode(); +} + + +//===----------------------------------------------------------------------===// +// C & StdCall & Fast Calling Convention implementation +//===----------------------------------------------------------------------===// +// StdCall calling convention seems to be standard for many Windows' API +// routines and around. It differs from C calling convention just a little: +// callee should clean up the stack, not caller. Symbols should be also +// decorated in some fancy way :) It doesn't support any vector arguments. +// For info on fast calling convention see Fast Calling Convention (tail call) +// implementation LowerX86_32FastCCCallTo. + +/// CallIsStructReturn - Determines whether a CALL node uses struct return +/// semantics. +static bool CallIsStructReturn(CallSDNode *TheCall) { + unsigned NumOps = TheCall->getNumArgs(); + if (!NumOps) + return false; + + return TheCall->getArgFlags(0).isSRet(); +} + +/// ArgsAreStructReturn - Determines whether a FORMAL_ARGUMENTS node uses struct +/// return semantics. +static bool ArgsAreStructReturn(SDValue Op) { + unsigned NumArgs = Op.getNode()->getNumValues() - 1; + if (!NumArgs) + return false; + + return cast(Op.getOperand(3))->getArgFlags().isSRet(); +} + +/// IsCalleePop - Determines whether a CALL or FORMAL_ARGUMENTS node requires +/// the callee to pop its own arguments. Callee pop is necessary to support tail +/// calls. +bool X86TargetLowering::IsCalleePop(bool IsVarArg, unsigned CallingConv) { + if (IsVarArg) + return false; + + switch (CallingConv) { + default: + return false; + case CallingConv::X86_StdCall: + return !Subtarget->is64Bit(); + case CallingConv::X86_FastCall: + return !Subtarget->is64Bit(); + case CallingConv::Fast: + return PerformTailCallOpt; + } +} + +/// CCAssignFnForNode - Selects the correct CCAssignFn for a the +/// given CallingConvention value. +CCAssignFn *X86TargetLowering::CCAssignFnForNode(unsigned CC) const { + if (Subtarget->is64Bit()) { + if (Subtarget->isTargetWin64()) + return CC_X86_Win64_C; + else if (CC == CallingConv::Fast && PerformTailCallOpt) + return CC_X86_64_TailCall; + else + return CC_X86_64_C; + } + + if (CC == CallingConv::X86_FastCall) + return CC_X86_32_FastCall; + else if (CC == CallingConv::Fast) + return CC_X86_32_FastCC; + else + return CC_X86_32_C; +} + +/// NameDecorationForFORMAL_ARGUMENTS - Selects the appropriate decoration to +/// apply to a MachineFunction containing a given FORMAL_ARGUMENTS node. +NameDecorationStyle +X86TargetLowering::NameDecorationForFORMAL_ARGUMENTS(SDValue Op) { + unsigned CC = cast(Op.getOperand(1))->getZExtValue(); + if (CC == CallingConv::X86_FastCall) + return FastCall; + else if (CC == CallingConv::X86_StdCall) + return StdCall; + return None; +} + + +/// CallRequiresGOTInRegister - Check whether the call requires the GOT pointer +/// in a register before calling. +bool X86TargetLowering::CallRequiresGOTPtrInReg(bool Is64Bit, bool IsTailCall) { + return !IsTailCall && !Is64Bit && + getTargetMachine().getRelocationModel() == Reloc::PIC_ && + Subtarget->isPICStyleGOT(); +} + +/// CallRequiresFnAddressInReg - Check whether the call requires the function +/// address to be loaded in a register. +bool +X86TargetLowering::CallRequiresFnAddressInReg(bool Is64Bit, bool IsTailCall) { + return !Is64Bit && IsTailCall && + getTargetMachine().getRelocationModel() == Reloc::PIC_ && + Subtarget->isPICStyleGOT(); +} + +/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified +/// by "Src" to address "Dst" with size and alignment information specified by +/// the specific parameter attribute. The copy will be passed as a byval +/// function parameter. +static SDValue +CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, + ISD::ArgFlagsTy Flags, SelectionDAG &DAG, + DebugLoc dl) { + SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32); + return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), + /*AlwaysInline=*/true, NULL, 0, NULL, 0); +} + +SDValue X86TargetLowering::LowerMemArgument(SDValue Op, SelectionDAG &DAG, + const CCValAssign &VA, + MachineFrameInfo *MFI, + unsigned CC, + SDValue Root, unsigned i) { + // Create the nodes corresponding to a load from this parameter slot. + ISD::ArgFlagsTy Flags = + cast(Op.getOperand(3 + i))->getArgFlags(); + bool AlwaysUseMutable = (CC==CallingConv::Fast) && PerformTailCallOpt; + bool isImmutable = !AlwaysUseMutable && !Flags.isByVal(); + + // FIXME: For now, all byval parameter objects are marked mutable. This can be + // changed with more analysis. + // In case of tail call optimization mark all arguments mutable. Since they + // could be overwritten by lowering of arguments in case of a tail call. + int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8, + VA.getLocMemOffset(), isImmutable); + SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); + if (Flags.isByVal()) + return FIN; + return DAG.getLoad(VA.getValVT(), Op.getDebugLoc(), Root, FIN, + PseudoSourceValue::getFixedStack(FI), 0); +} + +SDValue +X86TargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) { + MachineFunction &MF = DAG.getMachineFunction(); + X86MachineFunctionInfo *FuncInfo = MF.getInfo(); + DebugLoc dl = Op.getDebugLoc(); + + const Function* Fn = MF.getFunction(); + if (Fn->hasExternalLinkage() && + Subtarget->isTargetCygMing() && + Fn->getName() == "main") + FuncInfo->setForceFramePointer(true); + + // Decorate the function name. + FuncInfo->setDecorationStyle(NameDecorationForFORMAL_ARGUMENTS(Op)); + + MachineFrameInfo *MFI = MF.getFrameInfo(); + SDValue Root = Op.getOperand(0); + bool isVarArg = cast(Op.getOperand(2))->getZExtValue() != 0; + unsigned CC = MF.getFunction()->getCallingConv(); + bool Is64Bit = Subtarget->is64Bit(); + bool IsWin64 = Subtarget->isTargetWin64(); + + assert(!(isVarArg && CC == CallingConv::Fast) && + "Var args not supported with calling convention fastcc"); + + // Assign locations to all of the incoming arguments. + SmallVector ArgLocs; + CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); + CCInfo.AnalyzeFormalArguments(Op.getNode(), CCAssignFnForNode(CC)); + + SmallVector ArgValues; + unsigned LastVal = ~0U; + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + // TODO: If an arg is passed in two places (e.g. reg and stack), skip later + // places. + assert(VA.getValNo() != LastVal && + "Don't support value assigned to multiple locs yet"); + LastVal = VA.getValNo(); + + if (VA.isRegLoc()) { + MVT RegVT = VA.getLocVT(); + TargetRegisterClass *RC = NULL; + if (RegVT == MVT::i32) + RC = X86::GR32RegisterClass; + else if (Is64Bit && RegVT == MVT::i64) + RC = X86::GR64RegisterClass; + else if (RegVT == MVT::f32) + RC = X86::FR32RegisterClass; + else if (RegVT == MVT::f64) + RC = X86::FR64RegisterClass; + else if (RegVT.isVector() && RegVT.getSizeInBits() == 128) + RC = X86::VR128RegisterClass; + else if (RegVT.isVector()) { + assert(RegVT.getSizeInBits() == 64); + if (!Is64Bit) + RC = X86::VR64RegisterClass; // MMX values are passed in MMXs. + else { + // Darwin calling convention passes MMX values in either GPRs or + // XMMs in x86-64. Other targets pass them in memory. + if (RegVT != MVT::v1i64 && Subtarget->hasSSE2()) { + RC = X86::VR128RegisterClass; // MMX values are passed in XMMs. + RegVT = MVT::v2i64; + } else { + RC = X86::GR64RegisterClass; // v1i64 values are passed in GPRs. + RegVT = MVT::i64; + } + } + } else { + assert(0 && "Unknown argument type!"); + } + + unsigned Reg = DAG.getMachineFunction().addLiveIn(VA.getLocReg(), RC); + SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, RegVT); + + // If this is an 8 or 16-bit value, it is really passed promoted to 32 + // bits. Insert an assert[sz]ext to capture this, then truncate to the + // right size. + if (VA.getLocInfo() == CCValAssign::SExt) + ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue, + DAG.getValueType(VA.getValVT())); + else if (VA.getLocInfo() == CCValAssign::ZExt) + ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue, + DAG.getValueType(VA.getValVT())); + + if (VA.getLocInfo() != CCValAssign::Full) + ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); + + // Handle MMX values passed in GPRs. + if (Is64Bit && RegVT != VA.getLocVT()) { + if (RegVT.getSizeInBits() == 64 && RC == X86::GR64RegisterClass) + ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getLocVT(), ArgValue); + else if (RC == X86::VR128RegisterClass) { + ArgValue = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, + ArgValue, DAG.getConstant(0, MVT::i64)); + ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getLocVT(), ArgValue); + } + } + + ArgValues.push_back(ArgValue); + } else { + assert(VA.isMemLoc()); + ArgValues.push_back(LowerMemArgument(Op, DAG, VA, MFI, CC, Root, i)); + } + } + + // The x86-64 ABI for returning structs by value requires that we copy + // the sret argument into %rax for the return. Save the argument into + // a virtual register so that we can access it from the return points. + if (Is64Bit && DAG.getMachineFunction().getFunction()->hasStructRetAttr()) { + MachineFunction &MF = DAG.getMachineFunction(); + X86MachineFunctionInfo *FuncInfo = MF.getInfo(); + unsigned Reg = FuncInfo->getSRetReturnReg(); + if (!Reg) { + Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i64)); + FuncInfo->setSRetReturnReg(Reg); + } + SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, ArgValues[0]); + Root = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Root); + } + + unsigned StackSize = CCInfo.getNextStackOffset(); + // align stack specially for tail calls + if (PerformTailCallOpt && CC == CallingConv::Fast) + StackSize = GetAlignedArgumentStackSize(StackSize, DAG); + + // If the function takes variable number of arguments, make a frame index for + // the start of the first vararg value... for expansion of llvm.va_start. + if (isVarArg) { + if (Is64Bit || CC != CallingConv::X86_FastCall) { + VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize); + } + if (Is64Bit) { + unsigned TotalNumIntRegs = 0, TotalNumXMMRegs = 0; + + // FIXME: We should really autogenerate these arrays + static const unsigned GPR64ArgRegsWin64[] = { + X86::RCX, X86::RDX, X86::R8, X86::R9 + }; + static const unsigned XMMArgRegsWin64[] = { + X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3 + }; + static const unsigned GPR64ArgRegs64Bit[] = { + X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9 + }; + static const unsigned XMMArgRegs64Bit[] = { + X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, + X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 + }; + const unsigned *GPR64ArgRegs, *XMMArgRegs; + + if (IsWin64) { + TotalNumIntRegs = 4; TotalNumXMMRegs = 4; + GPR64ArgRegs = GPR64ArgRegsWin64; + XMMArgRegs = XMMArgRegsWin64; + } else { + TotalNumIntRegs = 6; TotalNumXMMRegs = 8; + GPR64ArgRegs = GPR64ArgRegs64Bit; + XMMArgRegs = XMMArgRegs64Bit; + } + unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs, + TotalNumIntRegs); + unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, + TotalNumXMMRegs); + + assert(!(NumXMMRegs && !Subtarget->hasSSE1()) && + "SSE register cannot be used when SSE is disabled!"); + assert(!(NumXMMRegs && UseSoftFloat && NoImplicitFloat) && + "SSE register cannot be used when SSE is disabled!"); + if (UseSoftFloat || NoImplicitFloat || !Subtarget->hasSSE1()) + // Kernel mode asks for SSE to be disabled, so don't push them + // on the stack. + TotalNumXMMRegs = 0; + + // For X86-64, if there are vararg parameters that are passed via + // registers, then we must store them to their spots on the stack so they + // may be loaded by deferencing the result of va_next. + VarArgsGPOffset = NumIntRegs * 8; + VarArgsFPOffset = TotalNumIntRegs * 8 + NumXMMRegs * 16; + RegSaveFrameIndex = MFI->CreateStackObject(TotalNumIntRegs * 8 + + TotalNumXMMRegs * 16, 16); + + // Store the integer parameter registers. + SmallVector MemOps; + SDValue RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy()); + SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN, + DAG.getIntPtrConstant(VarArgsGPOffset)); + for (; NumIntRegs != TotalNumIntRegs; ++NumIntRegs) { + unsigned VReg = MF.addLiveIn(GPR64ArgRegs[NumIntRegs], + X86::GR64RegisterClass); + SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, MVT::i64); + SDValue Store = + DAG.getStore(Val.getValue(1), dl, Val, FIN, + PseudoSourceValue::getFixedStack(RegSaveFrameIndex), 0); + MemOps.push_back(Store); + FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN, + DAG.getIntPtrConstant(8)); + } + + // Now store the XMM (fp + vector) parameter registers. + FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN, + DAG.getIntPtrConstant(VarArgsFPOffset)); + for (; NumXMMRegs != TotalNumXMMRegs; ++NumXMMRegs) { + unsigned VReg = MF.addLiveIn(XMMArgRegs[NumXMMRegs], + X86::VR128RegisterClass); + SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, MVT::v4f32); + SDValue Store = + DAG.getStore(Val.getValue(1), dl, Val, FIN, + PseudoSourceValue::getFixedStack(RegSaveFrameIndex), 0); + MemOps.push_back(Store); + FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN, + DAG.getIntPtrConstant(16)); + } + if (!MemOps.empty()) + Root = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &MemOps[0], MemOps.size()); + } + } + + ArgValues.push_back(Root); + + // Some CCs need callee pop. + if (IsCalleePop(isVarArg, CC)) { + BytesToPopOnReturn = StackSize; // Callee pops everything. + BytesCallerReserves = 0; + } else { + BytesToPopOnReturn = 0; // Callee pops nothing. + // If this is an sret function, the return should pop the hidden pointer. + if (!Is64Bit && CC != CallingConv::Fast && ArgsAreStructReturn(Op)) + BytesToPopOnReturn = 4; + BytesCallerReserves = StackSize; + } + + if (!Is64Bit) { + RegSaveFrameIndex = 0xAAAAAAA; // RegSaveFrameIndex is X86-64 only. + if (CC == CallingConv::X86_FastCall) + VarArgsFrameIndex = 0xAAAAAAA; // fastcc functions can't have varargs. + } + + FuncInfo->setBytesToPopOnReturn(BytesToPopOnReturn); + + // Return the new list of results. + return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getNode()->getVTList(), + &ArgValues[0], ArgValues.size()).getValue(Op.getResNo()); +} + +SDValue +X86TargetLowering::LowerMemOpCallTo(CallSDNode *TheCall, SelectionDAG &DAG, + const SDValue &StackPtr, + const CCValAssign &VA, + SDValue Chain, + SDValue Arg, ISD::ArgFlagsTy Flags) { + DebugLoc dl = TheCall->getDebugLoc(); + unsigned LocMemOffset = VA.getLocMemOffset(); + SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); + PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); + if (Flags.isByVal()) { + return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl); + } + return DAG.getStore(Chain, dl, Arg, PtrOff, + PseudoSourceValue::getStack(), LocMemOffset); +} + +/// EmitTailCallLoadRetAddr - Emit a load of return address if tail call +/// optimization is performed and it is required. +SDValue +X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG, + SDValue &OutRetAddr, + SDValue Chain, + bool IsTailCall, + bool Is64Bit, + int FPDiff, + DebugLoc dl) { + if (!IsTailCall || FPDiff==0) return Chain; + + // Adjust the Return address stack slot. + MVT VT = getPointerTy(); + OutRetAddr = getReturnAddressFrameIndex(DAG); + + // Load the "old" Return address. + OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, NULL, 0); + return SDValue(OutRetAddr.getNode(), 1); +} + +/// EmitTailCallStoreRetAddr - Emit a store of the return adress if tail call +/// optimization is performed and it is required (FPDiff!=0). +static SDValue +EmitTailCallStoreRetAddr(SelectionDAG & DAG, MachineFunction &MF, + SDValue Chain, SDValue RetAddrFrIdx, + bool Is64Bit, int FPDiff, DebugLoc dl) { + // Store the return address to the appropriate stack slot. + if (!FPDiff) return Chain; + // Calculate the new stack slot for the return address. + int SlotSize = Is64Bit ? 8 : 4; + int NewReturnAddrFI = + MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize); + MVT VT = Is64Bit ? MVT::i64 : MVT::i32; + SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, VT); + Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx, + PseudoSourceValue::getFixedStack(NewReturnAddrFI), 0); + return Chain; +} + +SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) { + MachineFunction &MF = DAG.getMachineFunction(); + CallSDNode *TheCall = cast(Op.getNode()); + SDValue Chain = TheCall->getChain(); + unsigned CC = TheCall->getCallingConv(); + bool isVarArg = TheCall->isVarArg(); + bool IsTailCall = TheCall->isTailCall() && + CC == CallingConv::Fast && PerformTailCallOpt; + SDValue Callee = TheCall->getCallee(); + bool Is64Bit = Subtarget->is64Bit(); + bool IsStructRet = CallIsStructReturn(TheCall); + DebugLoc dl = TheCall->getDebugLoc(); + + assert(!(isVarArg && CC == CallingConv::Fast) && + "Var args not supported with calling convention fastcc"); + + // Analyze operands of the call, assigning locations to each operand. + SmallVector ArgLocs; + CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); + CCInfo.AnalyzeCallOperands(TheCall, CCAssignFnForNode(CC)); + + // Get a count of how many bytes are to be pushed on the stack. + unsigned NumBytes = CCInfo.getNextStackOffset(); + if (PerformTailCallOpt && CC == CallingConv::Fast) + NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG); + + int FPDiff = 0; + if (IsTailCall) { + // Lower arguments at fp - stackoffset + fpdiff. + unsigned NumBytesCallerPushed = + MF.getInfo()->getBytesToPopOnReturn(); + FPDiff = NumBytesCallerPushed - NumBytes; + + // Set the delta of movement of the returnaddr stackslot. + // But only set if delta is greater than previous delta. + if (FPDiff < (MF.getInfo()->getTCReturnAddrDelta())) + MF.getInfo()->setTCReturnAddrDelta(FPDiff); + } + + Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); + + SDValue RetAddrFrIdx; + // Load return adress for tail calls. + Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, IsTailCall, Is64Bit, + FPDiff, dl); + + SmallVector, 8> RegsToPass; + SmallVector MemOpChains; + SDValue StackPtr; + + // Walk the register/memloc assignments, inserting copies/loads. In the case + // of tail call optimization arguments are handle later. + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + SDValue Arg = TheCall->getArg(i); + ISD::ArgFlagsTy Flags = TheCall->getArgFlags(i); + bool isByVal = Flags.isByVal(); + + // Promote the value if needed. + switch (VA.getLocInfo()) { + default: assert(0 && "Unknown loc info!"); + case CCValAssign::Full: break; + case CCValAssign::SExt: + Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); + break; + case CCValAssign::ZExt: + Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); + break; + case CCValAssign::AExt: + Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); + break; + } + + if (VA.isRegLoc()) { + if (Is64Bit) { + MVT RegVT = VA.getLocVT(); + if (RegVT.isVector() && RegVT.getSizeInBits() == 64) + switch (VA.getLocReg()) { + default: + break; + case X86::RDI: case X86::RSI: case X86::RDX: case X86::RCX: + case X86::R8: { + // Special case: passing MMX values in GPR registers. + Arg = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, Arg); + break; + } + case X86::XMM0: case X86::XMM1: case X86::XMM2: case X86::XMM3: + case X86::XMM4: case X86::XMM5: case X86::XMM6: case X86::XMM7: { + // Special case: passing MMX values in XMM registers. + Arg = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, Arg); + Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg); + Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg); + break; + } + } + } + RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); + } else { + if (!IsTailCall || (IsTailCall && isByVal)) { + assert(VA.isMemLoc()); + if (StackPtr.getNode() == 0) + StackPtr = DAG.getCopyFromReg(Chain, dl, X86StackPtr, getPointerTy()); + + MemOpChains.push_back(LowerMemOpCallTo(TheCall, DAG, StackPtr, VA, + Chain, Arg, Flags)); + } + } + } + + if (!MemOpChains.empty()) + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &MemOpChains[0], MemOpChains.size()); + + // Build a sequence of copy-to-reg nodes chained together with token chain + // and flag operands which copy the outgoing args into registers. + SDValue InFlag; + // Tail call byval lowering might overwrite argument registers so in case of + // tail call optimization the copies to registers are lowered later. + if (!IsTailCall) + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { + Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, + RegsToPass[i].second, InFlag); + InFlag = Chain.getValue(1); + } + + // ELF / PIC requires GOT in the EBX register before function calls via PLT + // GOT pointer. + if (CallRequiresGOTPtrInReg(Is64Bit, IsTailCall)) { + Chain = DAG.getCopyToReg(Chain, dl, X86::EBX, + DAG.getNode(X86ISD::GlobalBaseReg, + DebugLoc::getUnknownLoc(), + getPointerTy()), + InFlag); + InFlag = Chain.getValue(1); + } + // If we are tail calling and generating PIC/GOT style code load the address + // of the callee into ecx. The value in ecx is used as target of the tail + // jump. This is done to circumvent the ebx/callee-saved problem for tail + // calls on PIC/GOT architectures. Normally we would just put the address of + // GOT into ebx and then call target@PLT. But for tail callss ebx would be + // restored (since ebx is callee saved) before jumping to the target@PLT. + if (CallRequiresFnAddressInReg(Is64Bit, IsTailCall)) { + // Note: The actual moving to ecx is done further down. + GlobalAddressSDNode *G = dyn_cast(Callee); + if (G && !G->getGlobal()->hasHiddenVisibility() && + !G->getGlobal()->hasProtectedVisibility()) + Callee = LowerGlobalAddress(Callee, DAG); + else if (isa(Callee)) + Callee = LowerExternalSymbol(Callee,DAG); + } + + if (Is64Bit && isVarArg) { + // From AMD64 ABI document: + // For calls that may call functions that use varargs or stdargs + // (prototype-less calls or calls to functions containing ellipsis (...) in + // the declaration) %al is used as hidden argument to specify the number + // of SSE registers used. The contents of %al do not need to match exactly + // the number of registers, but must be an ubound on the number of SSE + // registers used and is in the range 0 - 8 inclusive. + + // FIXME: Verify this on Win64 + // Count the number of XMM registers allocated. + static const unsigned XMMArgRegs[] = { + X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, + X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 + }; + unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8); + assert((Subtarget->hasSSE1() || !NumXMMRegs) + && "SSE registers cannot be used when SSE is disabled"); + + Chain = DAG.getCopyToReg(Chain, dl, X86::AL, + DAG.getConstant(NumXMMRegs, MVT::i8), InFlag); + InFlag = Chain.getValue(1); + } + + + // For tail calls lower the arguments to the 'real' stack slot. + if (IsTailCall) { + SmallVector MemOpChains2; + SDValue FIN; + int FI = 0; + // Do not flag preceeding copytoreg stuff together with the following stuff. + InFlag = SDValue(); + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + if (!VA.isRegLoc()) { + assert(VA.isMemLoc()); + SDValue Arg = TheCall->getArg(i); + ISD::ArgFlagsTy Flags = TheCall->getArgFlags(i); + // Create frame index. + int32_t Offset = VA.getLocMemOffset()+FPDiff; + uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8; + FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset); + FIN = DAG.getFrameIndex(FI, getPointerTy()); + + if (Flags.isByVal()) { + // Copy relative to framepointer. + SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset()); + if (StackPtr.getNode() == 0) + StackPtr = DAG.getCopyFromReg(Chain, dl, X86StackPtr, + getPointerTy()); + Source = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, Source); + + MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN, Chain, + Flags, DAG, dl)); + } else { + // Store relative to framepointer. + MemOpChains2.push_back( + DAG.getStore(Chain, dl, Arg, FIN, + PseudoSourceValue::getFixedStack(FI), 0)); + } + } + } + + if (!MemOpChains2.empty()) + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &MemOpChains2[0], MemOpChains2.size()); + + // Copy arguments to their registers. + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { + Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, + RegsToPass[i].second, InFlag); + InFlag = Chain.getValue(1); + } + InFlag =SDValue(); + + // Store the return address to the appropriate stack slot. + Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx, Is64Bit, + FPDiff, dl); + } + + // If the callee is a GlobalAddress node (quite common, every direct call is) + // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. + if (GlobalAddressSDNode *G = dyn_cast(Callee)) { + // We should use extra load for direct calls to dllimported functions in + // non-JIT mode. + if (!Subtarget->GVRequiresExtraLoad(G->getGlobal(), + getTargetMachine(), true)) + Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy(), + G->getOffset()); + } else if (ExternalSymbolSDNode *S = dyn_cast(Callee)) { + Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); + } else if (IsTailCall) { + unsigned Opc = Is64Bit ? X86::R9 : X86::EAX; + + Chain = DAG.getCopyToReg(Chain, dl, + DAG.getRegister(Opc, getPointerTy()), + Callee,InFlag); + Callee = DAG.getRegister(Opc, getPointerTy()); + // Add register as live out. + DAG.getMachineFunction().getRegInfo().addLiveOut(Opc); + } + + // Returns a chain & a flag for retval copy to use. + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); + SmallVector Ops; + + if (IsTailCall) { + Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), + DAG.getIntPtrConstant(0, true), InFlag); + InFlag = Chain.getValue(1); + + // Returns a chain & a flag for retval copy to use. + NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); + Ops.clear(); + } + + Ops.push_back(Chain); + Ops.push_back(Callee); + + if (IsTailCall) + Ops.push_back(DAG.getConstant(FPDiff, MVT::i32)); + + // Add argument registers to the end of the list so that they are known live + // into the call. + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) + Ops.push_back(DAG.getRegister(RegsToPass[i].first, + RegsToPass[i].second.getValueType())); + + // Add an implicit use GOT pointer in EBX. + if (!IsTailCall && !Is64Bit && + getTargetMachine().getRelocationModel() == Reloc::PIC_ && + Subtarget->isPICStyleGOT()) + Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy())); + + // Add an implicit use of AL for x86 vararg functions. + if (Is64Bit && isVarArg) + Ops.push_back(DAG.getRegister(X86::AL, MVT::i8)); + + if (InFlag.getNode()) + Ops.push_back(InFlag); + + if (IsTailCall) { + assert(InFlag.getNode() && + "Flag must be set. Depend on flag being set in LowerRET"); + Chain = DAG.getNode(X86ISD::TAILCALL, dl, + TheCall->getVTList(), &Ops[0], Ops.size()); + + return SDValue(Chain.getNode(), Op.getResNo()); + } + + Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, &Ops[0], Ops.size()); + InFlag = Chain.getValue(1); + + // Create the CALLSEQ_END node. + unsigned NumBytesForCalleeToPush; + if (IsCalleePop(isVarArg, CC)) + NumBytesForCalleeToPush = NumBytes; // Callee pops everything + else if (!Is64Bit && CC != CallingConv::Fast && IsStructRet) + // If this is is a call to a struct-return function, the callee + // pops the hidden struct pointer, so we have to push it back. + // This is common for Darwin/X86, Linux & Mingw32 targets. + NumBytesForCalleeToPush = 4; + else + NumBytesForCalleeToPush = 0; // Callee pops nothing. + + // Returns a flag for retval copy to use. + Chain = DAG.getCALLSEQ_END(Chain, + DAG.getIntPtrConstant(NumBytes, true), + DAG.getIntPtrConstant(NumBytesForCalleeToPush, + true), + InFlag); + InFlag = Chain.getValue(1); + + // Handle result values, copying them out of physregs into vregs that we + // return. + return SDValue(LowerCallResult(Chain, InFlag, TheCall, CC, DAG), + Op.getResNo()); +} + + +//===----------------------------------------------------------------------===// +// Fast Calling Convention (tail call) implementation +//===----------------------------------------------------------------------===// + +// Like std call, callee cleans arguments, convention except that ECX is +// reserved for storing the tail called function address. Only 2 registers are +// free for argument passing (inreg). Tail call optimization is performed +// provided: +// * tailcallopt is enabled +// * caller/callee are fastcc +// On X86_64 architecture with GOT-style position independent code only local +// (within module) calls are supported at the moment. +// To keep the stack aligned according to platform abi the function +// GetAlignedArgumentStackSize ensures that argument delta is always multiples +// of stack alignment. (Dynamic linkers need this - darwin's dyld for example) +// If a tail called function callee has more arguments than the caller the +// caller needs to make sure that there is room to move the RETADDR to. This is +// achieved by reserving an area the size of the argument delta right after the +// original REtADDR, but before the saved framepointer or the spilled registers +// e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4) +// stack layout: +// arg1 +// arg2 +// RETADDR +// [ new RETADDR +// move area ] +// (possible EBP) +// ESI +// EDI +// local1 .. + +/// GetAlignedArgumentStackSize - Make the stack size align e.g 16n + 12 aligned +/// for a 16 byte align requirement. +unsigned X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize, + SelectionDAG& DAG) { + MachineFunction &MF = DAG.getMachineFunction(); + const TargetMachine &TM = MF.getTarget(); + const TargetFrameInfo &TFI = *TM.getFrameInfo(); + unsigned StackAlignment = TFI.getStackAlignment(); + uint64_t AlignMask = StackAlignment - 1; + int64_t Offset = StackSize; + uint64_t SlotSize = TD->getPointerSize(); + if ( (Offset & AlignMask) <= (StackAlignment - SlotSize) ) { + // Number smaller than 12 so just add the difference. + Offset += ((StackAlignment - SlotSize) - (Offset & AlignMask)); + } else { + // Mask out lower bits, add stackalignment once plus the 12 bytes. + Offset = ((~AlignMask) & Offset) + StackAlignment + + (StackAlignment-SlotSize); + } + return Offset; +} + +/// IsEligibleForTailCallElimination - Check to see whether the next instruction +/// following the call is a return. A function is eligible if caller/callee +/// calling conventions match, currently only fastcc supports tail calls, and +/// the function CALL is immediatly followed by a RET. +bool X86TargetLowering::IsEligibleForTailCallOptimization(CallSDNode *TheCall, + SDValue Ret, + SelectionDAG& DAG) const { + if (!PerformTailCallOpt) + return false; + + if (CheckTailCallReturnConstraints(TheCall, Ret)) { + MachineFunction &MF = DAG.getMachineFunction(); + unsigned CallerCC = MF.getFunction()->getCallingConv(); + unsigned CalleeCC= TheCall->getCallingConv(); + if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) { + SDValue Callee = TheCall->getCallee(); + // On x86/32Bit PIC/GOT tail calls are supported. + if (getTargetMachine().getRelocationModel() != Reloc::PIC_ || + !Subtarget->isPICStyleGOT()|| !Subtarget->is64Bit()) + return true; + + // Can only do local tail calls (in same module, hidden or protected) on + // x86_64 PIC/GOT at the moment. + if (GlobalAddressSDNode *G = dyn_cast(Callee)) + return G->getGlobal()->hasHiddenVisibility() + || G->getGlobal()->hasProtectedVisibility(); + } + } + + return false; +} + +FastISel * +X86TargetLowering::createFastISel(MachineFunction &mf, + MachineModuleInfo *mmo, + DwarfWriter *dw, + DenseMap &vm, + DenseMap &bm, + DenseMap &am +#ifndef NDEBUG + , SmallSet &cil +#endif + ) { + return X86::createFastISel(mf, mmo, dw, vm, bm, am +#ifndef NDEBUG + , cil +#endif + ); +} + + +//===----------------------------------------------------------------------===// +// Other Lowering Hooks +//===----------------------------------------------------------------------===// + + +SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) { + MachineFunction &MF = DAG.getMachineFunction(); + X86MachineFunctionInfo *FuncInfo = MF.getInfo(); + int ReturnAddrIndex = FuncInfo->getRAIndex(); + + if (ReturnAddrIndex == 0) { + // Set up a frame object for the return address. + uint64_t SlotSize = TD->getPointerSize(); + ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize, -SlotSize); + FuncInfo->setRAIndex(ReturnAddrIndex); + } + + return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy()); +} + + +/// TranslateX86CC - do a one to one translation of a ISD::CondCode to the X86 +/// specific condition code, returning the condition code and the LHS/RHS of the +/// comparison to make. +static unsigned TranslateX86CC(ISD::CondCode SetCCOpcode, bool isFP, + SDValue &LHS, SDValue &RHS, SelectionDAG &DAG) { + if (!isFP) { + if (ConstantSDNode *RHSC = dyn_cast(RHS)) { + if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) { + // X > -1 -> X == 0, jump !sign. + RHS = DAG.getConstant(0, RHS.getValueType()); + return X86::COND_NS; + } else if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) { + // X < 0 -> X == 0, jump on sign. + return X86::COND_S; + } else if (SetCCOpcode == ISD::SETLT && RHSC->getZExtValue() == 1) { + // X < 1 -> X <= 0 + RHS = DAG.getConstant(0, RHS.getValueType()); + return X86::COND_LE; + } + } + + switch (SetCCOpcode) { + default: assert(0 && "Invalid integer condition!"); + case ISD::SETEQ: return X86::COND_E; + case ISD::SETGT: return X86::COND_G; + case ISD::SETGE: return X86::COND_GE; + case ISD::SETLT: return X86::COND_L; + case ISD::SETLE: return X86::COND_LE; + case ISD::SETNE: return X86::COND_NE; + case ISD::SETULT: return X86::COND_B; + case ISD::SETUGT: return X86::COND_A; + case ISD::SETULE: return X86::COND_BE; + case ISD::SETUGE: return X86::COND_AE; + } + } + + // First determine if it is required or is profitable to flip the operands. + + // If LHS is a foldable load, but RHS is not, flip the condition. + if ((ISD::isNON_EXTLoad(LHS.getNode()) && LHS.hasOneUse()) && + !(ISD::isNON_EXTLoad(RHS.getNode()) && RHS.hasOneUse())) { + SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode); + std::swap(LHS, RHS); + } + + switch (SetCCOpcode) { + default: break; + case ISD::SETOLT: + case ISD::SETOLE: + case ISD::SETUGT: + case ISD::SETUGE: + std::swap(LHS, RHS); + break; + } + + // On a floating point condition, the flags are set as follows: + // ZF PF CF op + // 0 | 0 | 0 | X > Y + // 0 | 0 | 1 | X < Y + // 1 | 0 | 0 | X == Y + // 1 | 1 | 1 | unordered + switch (SetCCOpcode) { + default: assert(0 && "Condcode should be pre-legalized away"); + case ISD::SETUEQ: + case ISD::SETEQ: return X86::COND_E; + case ISD::SETOLT: // flipped + case ISD::SETOGT: + case ISD::SETGT: return X86::COND_A; + case ISD::SETOLE: // flipped + case ISD::SETOGE: + case ISD::SETGE: return X86::COND_AE; + case ISD::SETUGT: // flipped + case ISD::SETULT: + case ISD::SETLT: return X86::COND_B; + case ISD::SETUGE: // flipped + case ISD::SETULE: + case ISD::SETLE: return X86::COND_BE; + case ISD::SETONE: + case ISD::SETNE: return X86::COND_NE; + case ISD::SETUO: return X86::COND_P; + case ISD::SETO: return X86::COND_NP; + } +} + +/// hasFPCMov - is there a floating point cmov for the specific X86 condition +/// code. Current x86 isa includes the following FP cmov instructions: +/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu. +static bool hasFPCMov(unsigned X86CC) { + switch (X86CC) { + default: + return false; + case X86::COND_B: + case X86::COND_BE: + case X86::COND_E: + case X86::COND_P: + case X86::COND_A: + case X86::COND_AE: + case X86::COND_NE: + case X86::COND_NP: + return true; + } +} + +/// isUndefOrInRange - Return true if Val is undef or if its value falls within +/// the specified range (L, H]. +static bool isUndefOrInRange(int Val, int Low, int Hi) { + return (Val < 0) || (Val >= Low && Val < Hi); +} + +/// isUndefOrEqual - Val is either less than zero (undef) or equal to the +/// specified value. +static bool isUndefOrEqual(int Val, int CmpVal) { + if (Val < 0 || Val == CmpVal) + return true; + return false; +} + +/// isPSHUFDMask - Return true if the node specifies a shuffle of elements that +/// is suitable for input to PSHUFD or PSHUFW. That is, it doesn't reference +/// the second operand. +static bool isPSHUFDMask(const SmallVectorImpl &Mask, MVT VT) { + if (VT == MVT::v4f32 || VT == MVT::v4i32 || VT == MVT::v4i16) + return (Mask[0] < 4 && Mask[1] < 4 && Mask[2] < 4 && Mask[3] < 4); + if (VT == MVT::v2f64 || VT == MVT::v2i64) + return (Mask[0] < 2 && Mask[1] < 2); + return false; +} + +bool X86::isPSHUFDMask(ShuffleVectorSDNode *N) { + SmallVector M; + N->getMask(M); + return ::isPSHUFDMask(M, N->getValueType(0)); +} + +/// isPSHUFHWMask - Return true if the node specifies a shuffle of elements that +/// is suitable for input to PSHUFHW. +static bool isPSHUFHWMask(const SmallVectorImpl &Mask, MVT VT) { + if (VT != MVT::v8i16) + return false; + + // Lower quadword copied in order or undef. + for (int i = 0; i != 4; ++i) + if (Mask[i] >= 0 && Mask[i] != i) + return false; + + // Upper quadword shuffled. + for (int i = 4; i != 8; ++i) + if (Mask[i] >= 0 && (Mask[i] < 4 || Mask[i] > 7)) + return false; + + return true; +} + +bool X86::isPSHUFHWMask(ShuffleVectorSDNode *N) { + SmallVector M; + N->getMask(M); + return ::isPSHUFHWMask(M, N->getValueType(0)); +} + +/// isPSHUFLWMask - Return true if the node specifies a shuffle of elements that +/// is suitable for input to PSHUFLW. +static bool isPSHUFLWMask(const SmallVectorImpl &Mask, MVT VT) { + if (VT != MVT::v8i16) + return false; + + // Upper quadword copied in order. + for (int i = 4; i != 8; ++i) + if (Mask[i] >= 0 && Mask[i] != i) + return false; + + // Lower quadword shuffled. + for (int i = 0; i != 4; ++i) + if (Mask[i] >= 4) + return false; + + return true; +} + +bool X86::isPSHUFLWMask(ShuffleVectorSDNode *N) { + SmallVector M; + N->getMask(M); + return ::isPSHUFLWMask(M, N->getValueType(0)); +} + +/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand +/// specifies a shuffle of elements that is suitable for input to SHUFP*. +static bool isSHUFPMask(const SmallVectorImpl &Mask, MVT VT) { + int NumElems = VT.getVectorNumElements(); + if (NumElems != 2 && NumElems != 4) + return false; + + int Half = NumElems / 2; + for (int i = 0; i < Half; ++i) + if (!isUndefOrInRange(Mask[i], 0, NumElems)) + return false; + for (int i = Half; i < NumElems; ++i) + if (!isUndefOrInRange(Mask[i], NumElems, NumElems*2)) + return false; + + return true; +} + +bool X86::isSHUFPMask(ShuffleVectorSDNode *N) { + SmallVector M; + N->getMask(M); + return ::isSHUFPMask(M, N->getValueType(0)); +} + +/// isCommutedSHUFP - Returns true if the shuffle mask is exactly +/// the reverse of what x86 shuffles want. x86 shuffles requires the lower +/// half elements to come from vector 1 (which would equal the dest.) and +/// the upper half to come from vector 2. +static bool isCommutedSHUFPMask(const SmallVectorImpl &Mask, MVT VT) { + int NumElems = VT.getVectorNumElements(); + + if (NumElems != 2 && NumElems != 4) + return false; + + int Half = NumElems / 2; + for (int i = 0; i < Half; ++i) + if (!isUndefOrInRange(Mask[i], NumElems, NumElems*2)) + return false; + for (int i = Half; i < NumElems; ++i) + if (!isUndefOrInRange(Mask[i], 0, NumElems)) + return false; + return true; +} + +static bool isCommutedSHUFP(ShuffleVectorSDNode *N) { + SmallVector M; + N->getMask(M); + return isCommutedSHUFPMask(M, N->getValueType(0)); +} + +/// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand +/// specifies a shuffle of elements that is suitable for input to MOVHLPS. +bool X86::isMOVHLPSMask(ShuffleVectorSDNode *N) { + if (N->getValueType(0).getVectorNumElements() != 4) + return false; + + // Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3 + return isUndefOrEqual(N->getMaskElt(0), 6) && + isUndefOrEqual(N->getMaskElt(1), 7) && + isUndefOrEqual(N->getMaskElt(2), 2) && + isUndefOrEqual(N->getMaskElt(3), 3); +} + +/// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand +/// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}. +bool X86::isMOVLPMask(ShuffleVectorSDNode *N) { + unsigned NumElems = N->getValueType(0).getVectorNumElements(); + + if (NumElems != 2 && NumElems != 4) + return false; + + for (unsigned i = 0; i < NumElems/2; ++i) + if (!isUndefOrEqual(N->getMaskElt(i), i + NumElems)) + return false; + + for (unsigned i = NumElems/2; i < NumElems; ++i) + if (!isUndefOrEqual(N->getMaskElt(i), i)) + return false; + + return true; +} + +/// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand +/// specifies a shuffle of elements that is suitable for input to MOVHP{S|D} +/// and MOVLHPS. +bool X86::isMOVHPMask(ShuffleVectorSDNode *N) { + unsigned NumElems = N->getValueType(0).getVectorNumElements(); + + if (NumElems != 2 && NumElems != 4) + return false; + + for (unsigned i = 0; i < NumElems/2; ++i) + if (!isUndefOrEqual(N->getMaskElt(i), i)) + return false; + + for (unsigned i = 0; i < NumElems/2; ++i) + if (!isUndefOrEqual(N->getMaskElt(i + NumElems/2), i + NumElems)) + return false; + + return true; +} + +/// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form +/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef, +/// <2, 3, 2, 3> +bool X86::isMOVHLPS_v_undef_Mask(ShuffleVectorSDNode *N) { + unsigned NumElems = N->getValueType(0).getVectorNumElements(); + + if (NumElems != 4) + return false; + + return isUndefOrEqual(N->getMaskElt(0), 2) && + isUndefOrEqual(N->getMaskElt(1), 3) && + isUndefOrEqual(N->getMaskElt(2), 2) && + isUndefOrEqual(N->getMaskElt(3), 3); +} + +/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand +/// specifies a shuffle of elements that is suitable for input to UNPCKL. +static bool isUNPCKLMask(const SmallVectorImpl &Mask, MVT VT, + bool V2IsSplat = false) { + int NumElts = VT.getVectorNumElements(); + if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) + return false; + + for (int i = 0, j = 0; i != NumElts; i += 2, ++j) { + int BitI = Mask[i]; + int BitI1 = Mask[i+1]; + if (!isUndefOrEqual(BitI, j)) + return false; + if (V2IsSplat) { + if (!isUndefOrEqual(BitI1, NumElts)) + return false; + } else { + if (!isUndefOrEqual(BitI1, j + NumElts)) + return false; + } + } + return true; +} + +bool X86::isUNPCKLMask(ShuffleVectorSDNode *N, bool V2IsSplat) { + SmallVector M; + N->getMask(M); + return ::isUNPCKLMask(M, N->getValueType(0), V2IsSplat); +} + +/// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand +/// specifies a shuffle of elements that is suitable for input to UNPCKH. +static bool isUNPCKHMask(const SmallVectorImpl &Mask, MVT VT, + bool V2IsSplat = false) { + int NumElts = VT.getVectorNumElements(); + if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) + return false; + + for (int i = 0, j = 0; i != NumElts; i += 2, ++j) { + int BitI = Mask[i]; + int BitI1 = Mask[i+1]; + if (!isUndefOrEqual(BitI, j + NumElts/2)) + return false; + if (V2IsSplat) { + if (isUndefOrEqual(BitI1, NumElts)) + return false; + } else { + if (!isUndefOrEqual(BitI1, j + NumElts/2 + NumElts)) + return false; + } + } + return true; +} + +bool X86::isUNPCKHMask(ShuffleVectorSDNode *N, bool V2IsSplat) { + SmallVector M; + N->getMask(M); + return ::isUNPCKHMask(M, N->getValueType(0), V2IsSplat); +} + +/// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form +/// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef, +/// <0, 0, 1, 1> +static bool isUNPCKL_v_undef_Mask(const SmallVectorImpl &Mask, MVT VT) { + int NumElems = VT.getVectorNumElements(); + if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) + return false; + + for (int i = 0, j = 0; i != NumElems; i += 2, ++j) { + int BitI = Mask[i]; + int BitI1 = Mask[i+1]; + if (!isUndefOrEqual(BitI, j)) + return false; + if (!isUndefOrEqual(BitI1, j)) + return false; + } + return true; +} + +bool X86::isUNPCKL_v_undef_Mask(ShuffleVectorSDNode *N) { + SmallVector M; + N->getMask(M); + return ::isUNPCKL_v_undef_Mask(M, N->getValueType(0)); +} + +/// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form +/// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef, +/// <2, 2, 3, 3> +static bool isUNPCKH_v_undef_Mask(const SmallVectorImpl &Mask, MVT VT) { + int NumElems = VT.getVectorNumElements(); + if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) + return false; + + for (int i = 0, j = NumElems / 2; i != NumElems; i += 2, ++j) { + int BitI = Mask[i]; + int BitI1 = Mask[i+1]; + if (!isUndefOrEqual(BitI, j)) + return false; + if (!isUndefOrEqual(BitI1, j)) + return false; + } + return true; +} + +bool X86::isUNPCKH_v_undef_Mask(ShuffleVectorSDNode *N) { + SmallVector M; + N->getMask(M); + return ::isUNPCKH_v_undef_Mask(M, N->getValueType(0)); +} + +/// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand +/// specifies a shuffle of elements that is suitable for input to MOVSS, +/// MOVSD, and MOVD, i.e. setting the lowest element. +static bool isMOVLMask(const SmallVectorImpl &Mask, MVT VT) { + int NumElts = VT.getVectorNumElements(); + if (NumElts != 2 && NumElts != 4) + return false; + + if (!isUndefOrEqual(Mask[0], NumElts)) + return false; + + for (int i = 1; i < NumElts; ++i) + if (!isUndefOrEqual(Mask[i], i)) + return false; + + return true; +} + +bool X86::isMOVLMask(ShuffleVectorSDNode *N) { + SmallVector M; + N->getMask(M); + return ::isMOVLMask(M, N->getValueType(0)); +} + +/// isCommutedMOVL - Returns true if the shuffle mask is except the reverse +/// of what x86 movss want. X86 movs requires the lowest element to be lowest +/// element of vector 2 and the other elements to come from vector 1 in order. +static bool isCommutedMOVLMask(const SmallVectorImpl &Mask, MVT VT, + bool V2IsSplat = false, bool V2IsUndef = false) { + int NumOps = VT.getVectorNumElements(); + if (NumOps != 2 && NumOps != 4 && NumOps != 8 && NumOps != 16) + return false; + + if (!isUndefOrEqual(Mask[0], 0)) + return false; + + for (int i = 1; i < NumOps; ++i) + if (!(isUndefOrEqual(Mask[i], i+NumOps) || + (V2IsUndef && isUndefOrInRange(Mask[i], NumOps, NumOps*2)) || + (V2IsSplat && isUndefOrEqual(Mask[i], NumOps)))) + return false; + + return true; +} + +static bool isCommutedMOVL(ShuffleVectorSDNode *N, bool V2IsSplat = false, + bool V2IsUndef = false) { + SmallVector M; + N->getMask(M); + return isCommutedMOVLMask(M, N->getValueType(0), V2IsSplat, V2IsUndef); +} + +/// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand +/// specifies a shuffle of elements that is suitable for input to MOVSHDUP. +bool X86::isMOVSHDUPMask(ShuffleVectorSDNode *N) { + if (N->getValueType(0).getVectorNumElements() != 4) + return false; + + // Expect 1, 1, 3, 3 + for (unsigned i = 0; i < 2; ++i) { + int Elt = N->getMaskElt(i); + if (Elt >= 0 && Elt != 1) + return false; + } + + bool HasHi = false; + for (unsigned i = 2; i < 4; ++i) { + int Elt = N->getMaskElt(i); + if (Elt >= 0 && Elt != 3) + return false; + if (Elt == 3) + HasHi = true; + } + // Don't use movshdup if it can be done with a shufps. + // FIXME: verify that matching u, u, 3, 3 is what we want. + return HasHi; +} + +/// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand +/// specifies a shuffle of elements that is suitable for input to MOVSLDUP. +bool X86::isMOVSLDUPMask(ShuffleVectorSDNode *N) { + if (N->getValueType(0).getVectorNumElements() != 4) + return false; + + // Expect 0, 0, 2, 2 + for (unsigned i = 0; i < 2; ++i) + if (N->getMaskElt(i) > 0) + return false; + + bool HasHi = false; + for (unsigned i = 2; i < 4; ++i) { + int Elt = N->getMaskElt(i); + if (Elt >= 0 && Elt != 2) + return false; + if (Elt == 2) + HasHi = true; + } + // Don't use movsldup if it can be done with a shufps. + return HasHi; +} + +/// isMOVDDUPMask - Return true if the specified VECTOR_SHUFFLE operand +/// specifies a shuffle of elements that is suitable for input to MOVDDUP. +bool X86::isMOVDDUPMask(ShuffleVectorSDNode *N) { + int e = N->getValueType(0).getVectorNumElements() / 2; + + for (int i = 0; i < e; ++i) + if (!isUndefOrEqual(N->getMaskElt(i), i)) + return false; + for (int i = 0; i < e; ++i) + if (!isUndefOrEqual(N->getMaskElt(e+i), i)) + return false; + return true; +} + +/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle +/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP* +/// instructions. +unsigned X86::getShuffleSHUFImmediate(SDNode *N) { + ShuffleVectorSDNode *SVOp = cast(N); + int NumOperands = SVOp->getValueType(0).getVectorNumElements(); + + unsigned Shift = (NumOperands == 4) ? 2 : 1; + unsigned Mask = 0; + for (int i = 0; i < NumOperands; ++i) { + int Val = SVOp->getMaskElt(NumOperands-i-1); + if (Val < 0) Val = 0; + if (Val >= NumOperands) Val -= NumOperands; + Mask |= Val; + if (i != NumOperands - 1) + Mask <<= Shift; + } + return Mask; +} + +/// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle +/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW +/// instructions. +unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) { + ShuffleVectorSDNode *SVOp = cast(N); + unsigned Mask = 0; + // 8 nodes, but we only care about the last 4. + for (unsigned i = 7; i >= 4; --i) { + int Val = SVOp->getMaskElt(i); + if (Val >= 0) + Mask |= (Val - 4); + if (i != 4) + Mask <<= 2; + } + return Mask; +} + +/// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle +/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW +/// instructions. +unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) { + ShuffleVectorSDNode *SVOp = cast(N); + unsigned Mask = 0; + // 8 nodes, but we only care about the first 4. + for (int i = 3; i >= 0; --i) { + int Val = SVOp->getMaskElt(i); + if (Val >= 0) + Mask |= Val; + if (i != 0) + Mask <<= 2; + } + return Mask; +} + +/// CommuteVectorShuffle - Swap vector_shuffle operands as well as values in +/// their permute mask. +static SDValue CommuteVectorShuffle(ShuffleVectorSDNode *SVOp, + SelectionDAG &DAG) { + MVT VT = SVOp->getValueType(0); + unsigned NumElems = VT.getVectorNumElements(); + SmallVector MaskVec; + + for (unsigned i = 0; i != NumElems; ++i) { + int idx = SVOp->getMaskElt(i); + if (idx < 0) + MaskVec.push_back(idx); + else if (idx < (int)NumElems) + MaskVec.push_back(idx + NumElems); + else + MaskVec.push_back(idx - NumElems); + } + return DAG.getVectorShuffle(VT, SVOp->getDebugLoc(), SVOp->getOperand(1), + SVOp->getOperand(0), &MaskVec[0]); +} + +/// CommuteVectorShuffleMask - Change values in a shuffle permute mask assuming +/// the two vector operands have swapped position. +static void CommuteVectorShuffleMask(SmallVectorImpl &Mask, MVT VT) { + unsigned NumElems = VT.getVectorNumElements(); + for (unsigned i = 0; i != NumElems; ++i) { + int idx = Mask[i]; + if (idx < 0) + continue; + else if (idx < (int)NumElems) + Mask[i] = idx + NumElems; + else + Mask[i] = idx - NumElems; + } +} + +/// ShouldXformToMOVHLPS - Return true if the node should be transformed to +/// match movhlps. The lower half elements should come from upper half of +/// V1 (and in order), and the upper half elements should come from the upper +/// half of V2 (and in order). +static bool ShouldXformToMOVHLPS(ShuffleVectorSDNode *Op) { + if (Op->getValueType(0).getVectorNumElements() != 4) + return false; + for (unsigned i = 0, e = 2; i != e; ++i) + if (!isUndefOrEqual(Op->getMaskElt(i), i+2)) + return false; + for (unsigned i = 2; i != 4; ++i) + if (!isUndefOrEqual(Op->getMaskElt(i), i+4)) + return false; + return true; +} + +/// isScalarLoadToVector - Returns true if the node is a scalar load that +/// is promoted to a vector. It also returns the LoadSDNode by reference if +/// required. +static bool isScalarLoadToVector(SDNode *N, LoadSDNode **LD = NULL) { + if (N->getOpcode() != ISD::SCALAR_TO_VECTOR) + return false; + N = N->getOperand(0).getNode(); + if (!ISD::isNON_EXTLoad(N)) + return false; + if (LD) + *LD = cast(N); + return true; +} + +/// ShouldXformToMOVLP{S|D} - Return true if the node should be transformed to +/// match movlp{s|d}. The lower half elements should come from lower half of +/// V1 (and in order), and the upper half elements should come from the upper +/// half of V2 (and in order). And since V1 will become the source of the +/// MOVLP, it must be either a vector load or a scalar load to vector. +static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2, + ShuffleVectorSDNode *Op) { + if (!ISD::isNON_EXTLoad(V1) && !isScalarLoadToVector(V1)) + return false; + // Is V2 is a vector load, don't do this transformation. We will try to use + // load folding shufps op. + if (ISD::isNON_EXTLoad(V2)) + return false; + + unsigned NumElems = Op->getValueType(0).getVectorNumElements(); + + if (NumElems != 2 && NumElems != 4) + return false; + for (unsigned i = 0, e = NumElems/2; i != e; ++i) + if (!isUndefOrEqual(Op->getMaskElt(i), i)) + return false; + for (unsigned i = NumElems/2; i != NumElems; ++i) + if (!isUndefOrEqual(Op->getMaskElt(i), i+NumElems)) + return false; + return true; +} + +/// isSplatVector - Returns true if N is a BUILD_VECTOR node whose elements are +/// all the same. +static bool isSplatVector(SDNode *N) { + if (N->getOpcode() != ISD::BUILD_VECTOR) + return false; + + SDValue SplatValue = N->getOperand(0); + for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i) + if (N->getOperand(i) != SplatValue) + return false; + return true; +} + +/// isZeroNode - Returns true if Elt is a constant zero or a floating point +/// constant +0.0. +static inline bool isZeroNode(SDValue Elt) { + return ((isa(Elt) && + cast(Elt)->getZExtValue() == 0) || + (isa(Elt) && + cast(Elt)->getValueAPF().isPosZero())); +} + +/// isZeroShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved +/// to an zero vector. +/// FIXME: move to dag combiner / method on ShuffleVectorSDNode +static bool isZeroShuffle(ShuffleVectorSDNode *N) { + SDValue V1 = N->getOperand(0); + SDValue V2 = N->getOperand(1); + unsigned NumElems = N->getValueType(0).getVectorNumElements(); + for (unsigned i = 0; i != NumElems; ++i) { + int Idx = N->getMaskElt(i); + if (Idx >= (int)NumElems) { + unsigned Opc = V2.getOpcode(); + if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V2.getNode())) + continue; + if (Opc != ISD::BUILD_VECTOR || !isZeroNode(V2.getOperand(Idx-NumElems))) + return false; + } else if (Idx >= 0) { + unsigned Opc = V1.getOpcode(); + if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V1.getNode())) + continue; + if (Opc != ISD::BUILD_VECTOR || !isZeroNode(V1.getOperand(Idx))) + return false; + } + } + return true; +} + +/// getZeroVector - Returns a vector of specified type with all zero elements. +/// +static SDValue getZeroVector(MVT VT, bool HasSSE2, SelectionDAG &DAG, + DebugLoc dl) { + assert(VT.isVector() && "Expected a vector type"); + + // Always build zero vectors as <4 x i32> or <2 x i32> bitcasted to their dest + // type. This ensures they get CSE'd. + SDValue Vec; + if (VT.getSizeInBits() == 64) { // MMX + SDValue Cst = DAG.getTargetConstant(0, MVT::i32); + Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i32, Cst, Cst); + } else if (HasSSE2) { // SSE2 + SDValue Cst = DAG.getTargetConstant(0, MVT::i32); + Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst); + } else { // SSE1 + SDValue Cst = DAG.getTargetConstantFP(+0.0, MVT::f32); + Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f32, Cst, Cst, Cst, Cst); + } + return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vec); +} + +/// getOnesVector - Returns a vector of specified type with all bits set. +/// +static SDValue getOnesVector(MVT VT, SelectionDAG &DAG, DebugLoc dl) { + assert(VT.isVector() && "Expected a vector type"); + + // Always build ones vectors as <4 x i32> or <2 x i32> bitcasted to their dest + // type. This ensures they get CSE'd. + SDValue Cst = DAG.getTargetConstant(~0U, MVT::i32); + SDValue Vec; + if (VT.getSizeInBits() == 64) // MMX + Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i32, Cst, Cst); + else // SSE + Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst); + return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vec); +} + + +/// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements +/// that point to V2 points to its first element. +static SDValue NormalizeMask(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { + MVT VT = SVOp->getValueType(0); + unsigned NumElems = VT.getVectorNumElements(); + + bool Changed = false; + SmallVector MaskVec; + SVOp->getMask(MaskVec); + + for (unsigned i = 0; i != NumElems; ++i) { + if (MaskVec[i] > (int)NumElems) { + MaskVec[i] = NumElems; + Changed = true; + } + } + if (Changed) + return DAG.getVectorShuffle(VT, SVOp->getDebugLoc(), SVOp->getOperand(0), + SVOp->getOperand(1), &MaskVec[0]); + return SDValue(SVOp, 0); +} + +/// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd +/// operation of specified width. +static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, MVT VT, SDValue V1, + SDValue V2) { + unsigned NumElems = VT.getVectorNumElements(); + SmallVector Mask; + Mask.push_back(NumElems); + for (unsigned i = 1; i != NumElems; ++i) + Mask.push_back(i); + return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]); +} + +/// getUnpackl - Returns a vector_shuffle node for an unpackl operation. +static SDValue getUnpackl(SelectionDAG &DAG, DebugLoc dl, MVT VT, SDValue V1, + SDValue V2) { + unsigned NumElems = VT.getVectorNumElements(); + SmallVector Mask; + for (unsigned i = 0, e = NumElems/2; i != e; ++i) { + Mask.push_back(i); + Mask.push_back(i + NumElems); + } + return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]); +} + +/// getUnpackhMask - Returns a vector_shuffle node for an unpackh operation. +static SDValue getUnpackh(SelectionDAG &DAG, DebugLoc dl, MVT VT, SDValue V1, + SDValue V2) { + unsigned NumElems = VT.getVectorNumElements(); + unsigned Half = NumElems/2; + SmallVector Mask; + for (unsigned i = 0; i != Half; ++i) { + Mask.push_back(i + Half); + Mask.push_back(i + NumElems + Half); + } + return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]); +} + +/// PromoteSplat - Promote a splat of v4f32, v8i16 or v16i8 to v4i32. +static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG, + bool HasSSE2) { + if (SV->getValueType(0).getVectorNumElements() <= 4) + return SDValue(SV, 0); + + MVT PVT = MVT::v4f32; + MVT VT = SV->getValueType(0); + DebugLoc dl = SV->getDebugLoc(); + SDValue V1 = SV->getOperand(0); + int NumElems = VT.getVectorNumElements(); + int EltNo = SV->getSplatIndex(); + + // unpack elements to the correct location + while (NumElems > 4) { + if (EltNo < NumElems/2) { + V1 = getUnpackl(DAG, dl, VT, V1, V1); + } else { + V1 = getUnpackh(DAG, dl, VT, V1, V1); + EltNo -= NumElems/2; + } + NumElems >>= 1; + } + + // Perform the splat. + int SplatMask[4] = { EltNo, EltNo, EltNo, EltNo }; + V1 = DAG.getNode(ISD::BIT_CONVERT, dl, PVT, V1); + V1 = DAG.getVectorShuffle(PVT, dl, V1, DAG.getUNDEF(PVT), &SplatMask[0]); + return DAG.getNode(ISD::BIT_CONVERT, dl, VT, V1); +} + +/// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified +/// vector of zero or undef vector. This produces a shuffle where the low +/// element of V2 is swizzled into the zero/undef vector, landing at element +/// Idx. This produces a shuffle mask like 4,1,2,3 (idx=0) or 0,1,2,4 (idx=3). +static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx, + bool isZero, bool HasSSE2, + SelectionDAG &DAG) { + MVT VT = V2.getValueType(); + SDValue V1 = isZero + ? getZeroVector(VT, HasSSE2, DAG, V2.getDebugLoc()) : DAG.getUNDEF(VT); + unsigned NumElems = VT.getVectorNumElements(); + SmallVector MaskVec; + for (unsigned i = 0; i != NumElems; ++i) + // If this is the insertion idx, put the low elt of V2 here. + MaskVec.push_back(i == Idx ? NumElems : i); + return DAG.getVectorShuffle(VT, V2.getDebugLoc(), V1, V2, &MaskVec[0]); +} + +/// getNumOfConsecutiveZeros - Return the number of elements in a result of +/// a shuffle that is zero. +static +unsigned getNumOfConsecutiveZeros(ShuffleVectorSDNode *SVOp, int NumElems, + bool Low, SelectionDAG &DAG) { + unsigned NumZeros = 0; + for (int i = 0; i < NumElems; ++i) { + unsigned Index = Low ? i : NumElems-i-1; + int Idx = SVOp->getMaskElt(Index); + if (Idx < 0) { + ++NumZeros; + continue; + } + SDValue Elt = DAG.getShuffleScalarElt(SVOp, Index); + if (Elt.getNode() && isZeroNode(Elt)) + ++NumZeros; + else + break; + } + return NumZeros; +} + +/// isVectorShift - Returns true if the shuffle can be implemented as a +/// logical left or right shift of a vector. +/// FIXME: split into pslldqi, psrldqi, palignr variants. +static bool isVectorShift(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG, + bool &isLeft, SDValue &ShVal, unsigned &ShAmt) { + int NumElems = SVOp->getValueType(0).getVectorNumElements(); + + isLeft = true; + unsigned NumZeros = getNumOfConsecutiveZeros(SVOp, NumElems, true, DAG); + if (!NumZeros) { + isLeft = false; + NumZeros = getNumOfConsecutiveZeros(SVOp, NumElems, false, DAG); + if (!NumZeros) + return false; + } + bool SeenV1 = false; + bool SeenV2 = false; + for (int i = NumZeros; i < NumElems; ++i) { + int Val = isLeft ? (i - NumZeros) : i; + int Idx = SVOp->getMaskElt(isLeft ? i : (i - NumZeros)); + if (Idx < 0) + continue; + if (Idx < NumElems) + SeenV1 = true; + else { + Idx -= NumElems; + SeenV2 = true; + } + if (Idx != Val) + return false; + } + if (SeenV1 && SeenV2) + return false; + + ShVal = SeenV1 ? SVOp->getOperand(0) : SVOp->getOperand(1); + ShAmt = NumZeros; + return true; +} + + +/// LowerBuildVectorv16i8 - Custom lower build_vector of v16i8. +/// +static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros, + unsigned NumNonZero, unsigned NumZero, + SelectionDAG &DAG, TargetLowering &TLI) { + if (NumNonZero > 8) + return SDValue(); + + DebugLoc dl = Op.getDebugLoc(); + SDValue V(0, 0); + bool First = true; + for (unsigned i = 0; i < 16; ++i) { + bool ThisIsNonZero = (NonZeros & (1 << i)) != 0; + if (ThisIsNonZero && First) { + if (NumZero) + V = getZeroVector(MVT::v8i16, true, DAG, dl); + else + V = DAG.getUNDEF(MVT::v8i16); + First = false; + } + + if ((i & 1) != 0) { + SDValue ThisElt(0, 0), LastElt(0, 0); + bool LastIsNonZero = (NonZeros & (1 << (i-1))) != 0; + if (LastIsNonZero) { + LastElt = DAG.getNode(ISD::ZERO_EXTEND, dl, + MVT::i16, Op.getOperand(i-1)); + } + if (ThisIsNonZero) { + ThisElt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Op.getOperand(i)); + ThisElt = DAG.getNode(ISD::SHL, dl, MVT::i16, + ThisElt, DAG.getConstant(8, MVT::i8)); + if (LastIsNonZero) + ThisElt = DAG.getNode(ISD::OR, dl, MVT::i16, ThisElt, LastElt); + } else + ThisElt = LastElt; + + if (ThisElt.getNode()) + V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, V, ThisElt, + DAG.getIntPtrConstant(i/2)); + } + } + + return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, V); +} + +/// LowerBuildVectorv8i16 - Custom lower build_vector of v8i16. +/// +static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros, + unsigned NumNonZero, unsigned NumZero, + SelectionDAG &DAG, TargetLowering &TLI) { + if (NumNonZero > 4) + return SDValue(); + + DebugLoc dl = Op.getDebugLoc(); + SDValue V(0, 0); + bool First = true; + for (unsigned i = 0; i < 8; ++i) { + bool isNonZero = (NonZeros & (1 << i)) != 0; + if (isNonZero) { + if (First) { + if (NumZero) + V = getZeroVector(MVT::v8i16, true, DAG, dl); + else + V = DAG.getUNDEF(MVT::v8i16); + First = false; + } + V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, + MVT::v8i16, V, Op.getOperand(i), + DAG.getIntPtrConstant(i)); + } + } + + return V; +} + +/// getVShift - Return a vector logical shift node. +/// +static SDValue getVShift(bool isLeft, MVT VT, SDValue SrcOp, + unsigned NumBits, SelectionDAG &DAG, + const TargetLowering &TLI, DebugLoc dl) { + bool isMMX = VT.getSizeInBits() == 64; + MVT ShVT = isMMX ? MVT::v1i64 : MVT::v2i64; + unsigned Opc = isLeft ? X86ISD::VSHL : X86ISD::VSRL; + SrcOp = DAG.getNode(ISD::BIT_CONVERT, dl, ShVT, SrcOp); + return DAG.getNode(ISD::BIT_CONVERT, dl, VT, + DAG.getNode(Opc, dl, ShVT, SrcOp, + DAG.getConstant(NumBits, TLI.getShiftAmountTy()))); +} + +SDValue +X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { + DebugLoc dl = Op.getDebugLoc(); + // All zero's are handled with pxor, all one's are handled with pcmpeqd. + if (ISD::isBuildVectorAllZeros(Op.getNode()) + || ISD::isBuildVectorAllOnes(Op.getNode())) { + // Canonicalize this to either <4 x i32> or <2 x i32> (SSE vs MMX) to + // 1) ensure the zero vectors are CSE'd, and 2) ensure that i64 scalars are + // eliminated on x86-32 hosts. + if (Op.getValueType() == MVT::v4i32 || Op.getValueType() == MVT::v2i32) + return Op; + + if (ISD::isBuildVectorAllOnes(Op.getNode())) + return getOnesVector(Op.getValueType(), DAG, dl); + return getZeroVector(Op.getValueType(), Subtarget->hasSSE2(), DAG, dl); + } + + MVT VT = Op.getValueType(); + MVT EVT = VT.getVectorElementType(); + unsigned EVTBits = EVT.getSizeInBits(); + + unsigned NumElems = Op.getNumOperands(); + unsigned NumZero = 0; + unsigned NumNonZero = 0; + unsigned NonZeros = 0; + bool IsAllConstants = true; + SmallSet Values; + for (unsigned i = 0; i < NumElems; ++i) { + SDValue Elt = Op.getOperand(i); + if (Elt.getOpcode() == ISD::UNDEF) + continue; + Values.insert(Elt); + if (Elt.getOpcode() != ISD::Constant && + Elt.getOpcode() != ISD::ConstantFP) + IsAllConstants = false; + if (isZeroNode(Elt)) + NumZero++; + else { + NonZeros |= (1 << i); + NumNonZero++; + } + } + + if (NumNonZero == 0) { + // All undef vector. Return an UNDEF. All zero vectors were handled above. + return DAG.getUNDEF(VT); + } + + // Special case for single non-zero, non-undef, element. + if (NumNonZero == 1 && NumElems <= 4) { + unsigned Idx = CountTrailingZeros_32(NonZeros); + SDValue Item = Op.getOperand(Idx); + + // If this is an insertion of an i64 value on x86-32, and if the top bits of + // the value are obviously zero, truncate the value to i32 and do the + // insertion that way. Only do this if the value is non-constant or if the + // value is a constant being inserted into element 0. It is cheaper to do + // a constant pool load than it is to do a movd + shuffle. + if (EVT == MVT::i64 && !Subtarget->is64Bit() && + (!IsAllConstants || Idx == 0)) { + if (DAG.MaskedValueIsZero(Item, APInt::getBitsSet(64, 32, 64))) { + // Handle MMX and SSE both. + MVT VecVT = VT == MVT::v2i64 ? MVT::v4i32 : MVT::v2i32; + unsigned VecElts = VT == MVT::v2i64 ? 4 : 2; + + // Truncate the value (which may itself be a constant) to i32, and + // convert it to a vector with movd (S2V+shuffle to zero extend). + Item = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Item); + Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, Item); + Item = getShuffleVectorZeroOrUndef(Item, 0, true, + Subtarget->hasSSE2(), DAG); + + // Now we have our 32-bit value zero extended in the low element of + // a vector. If Idx != 0, swizzle it into place. + if (Idx != 0) { + SmallVector Mask; + Mask.push_back(Idx); + for (unsigned i = 1; i != VecElts; ++i) + Mask.push_back(i); + Item = DAG.getVectorShuffle(VecVT, dl, Item, + DAG.getUNDEF(Item.getValueType()), + &Mask[0]); + } + return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Item); + } + } + + // If we have a constant or non-constant insertion into the low element of + // a vector, we can do this with SCALAR_TO_VECTOR + shuffle of zero into + // the rest of the elements. This will be matched as movd/movq/movss/movsd + // depending on what the source datatype is. Because we can only get here + // when NumElems <= 4, this only needs to handle i32/f32/i64/f64. + if (Idx == 0 && + // Don't do this for i64 values on x86-32. + (EVT != MVT::i64 || Subtarget->is64Bit())) { + Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item); + // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector. + return getShuffleVectorZeroOrUndef(Item, 0, NumZero > 0, + Subtarget->hasSSE2(), DAG); + } + + // Is it a vector logical left shift? + if (NumElems == 2 && Idx == 1 && + isZeroNode(Op.getOperand(0)) && !isZeroNode(Op.getOperand(1))) { + unsigned NumBits = VT.getSizeInBits(); + return getVShift(true, VT, + DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, + VT, Op.getOperand(1)), + NumBits/2, DAG, *this, dl); + } + + if (IsAllConstants) // Otherwise, it's better to do a constpool load. + return SDValue(); + + // Otherwise, if this is a vector with i32 or f32 elements, and the element + // is a non-constant being inserted into an element other than the low one, + // we can't use a constant pool load. Instead, use SCALAR_TO_VECTOR (aka + // movd/movss) to move this into the low element, then shuffle it into + // place. + if (EVTBits == 32) { + Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item); + + // Turn it into a shuffle of zero and zero-extended scalar to vector. + Item = getShuffleVectorZeroOrUndef(Item, 0, NumZero > 0, + Subtarget->hasSSE2(), DAG); + SmallVector MaskVec; + for (unsigned i = 0; i < NumElems; i++) + MaskVec.push_back(i == Idx ? 0 : 1); + return DAG.getVectorShuffle(VT, dl, Item, DAG.getUNDEF(VT), &MaskVec[0]); + } + } + + // Splat is obviously ok. Let legalizer expand it to a shuffle. + if (Values.size() == 1) + return SDValue(); + + // A vector full of immediates; various special cases are already + // handled, so this is best done with a single constant-pool load. + if (IsAllConstants) + return SDValue(); + + // Let legalizer expand 2-wide build_vectors. + if (EVTBits == 64) { + if (NumNonZero == 1) { + // One half is zero or undef. + unsigned Idx = CountTrailingZeros_32(NonZeros); + SDValue V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, + Op.getOperand(Idx)); + return getShuffleVectorZeroOrUndef(V2, Idx, true, + Subtarget->hasSSE2(), DAG); + } + return SDValue(); + } + + // If element VT is < 32 bits, convert it to inserts into a zero vector. + if (EVTBits == 8 && NumElems == 16) { + SDValue V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG, + *this); + if (V.getNode()) return V; + } + + if (EVTBits == 16 && NumElems == 8) { + SDValue V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG, + *this); + if (V.getNode()) return V; + } + + // If element VT is == 32 bits, turn it into a number of shuffles. + SmallVector V; + V.resize(NumElems); + if (NumElems == 4 && NumZero > 0) { + for (unsigned i = 0; i < 4; ++i) { + bool isZero = !(NonZeros & (1 << i)); + if (isZero) + V[i] = getZeroVector(VT, Subtarget->hasSSE2(), DAG, dl); + else + V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i)); + } + + for (unsigned i = 0; i < 2; ++i) { + switch ((NonZeros & (0x3 << i*2)) >> (i*2)) { + default: break; + case 0: + V[i] = V[i*2]; // Must be a zero vector. + break; + case 1: + V[i] = getMOVL(DAG, dl, VT, V[i*2+1], V[i*2]); + break; + case 2: + V[i] = getMOVL(DAG, dl, VT, V[i*2], V[i*2+1]); + break; + case 3: + V[i] = getUnpackl(DAG, dl, VT, V[i*2], V[i*2+1]); + break; + } + } + + SmallVector MaskVec; + bool Reverse = (NonZeros & 0x3) == 2; + for (unsigned i = 0; i < 2; ++i) + MaskVec.push_back(Reverse ? 1-i : i); + Reverse = ((NonZeros & (0x3 << 2)) >> 2) == 2; + for (unsigned i = 0; i < 2; ++i) + MaskVec.push_back(Reverse ? 1-i+NumElems : i+NumElems); + return DAG.getVectorShuffle(VT, dl, V[0], V[1], &MaskVec[0]); + } + + if (Values.size() > 2) { + // If we have SSE 4.1, Expand into a number of inserts unless the number of + // values to be inserted is equal to the number of elements, in which case + // use the unpack code below in the hopes of matching the consecutive elts + // load merge pattern for shuffles. + // FIXME: We could probably just check that here directly. + if (Values.size() < NumElems && VT.getSizeInBits() == 128 && + getSubtarget()->hasSSE41()) { + V[0] = DAG.getUNDEF(VT); + for (unsigned i = 0; i < NumElems; ++i) + if (Op.getOperand(i).getOpcode() != ISD::UNDEF) + V[0] = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, V[0], + Op.getOperand(i), DAG.getIntPtrConstant(i)); + return V[0]; + } + // Expand into a number of unpckl*. + // e.g. for v4f32 + // Step 1: unpcklps 0, 2 ==> X: + // : unpcklps 1, 3 ==> Y: + // Step 2: unpcklps X, Y ==> <3, 2, 1, 0> + for (unsigned i = 0; i < NumElems; ++i) + V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i)); + NumElems >>= 1; + while (NumElems != 0) { + for (unsigned i = 0; i < NumElems; ++i) + V[i] = getUnpackl(DAG, dl, VT, V[i], V[i + NumElems]); + NumElems >>= 1; + } + return V[0]; + } + + return SDValue(); +} + +// v8i16 shuffles - Prefer shuffles in the following order: +// 1. [all] pshuflw, pshufhw, optional move +// 2. [ssse3] 1 x pshufb +// 3. [ssse3] 2 x pshufb + 1 x por +// 4. [all] mov + pshuflw + pshufhw + N x (pextrw + pinsrw) +static +SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp, + SelectionDAG &DAG, X86TargetLowering &TLI) { + SDValue V1 = SVOp->getOperand(0); + SDValue V2 = SVOp->getOperand(1); + DebugLoc dl = SVOp->getDebugLoc(); + SmallVector MaskVals; + + // Determine if more than 1 of the words in each of the low and high quadwords + // of the result come from the same quadword of one of the two inputs. Undef + // mask values count as coming from any quadword, for better codegen. + SmallVector LoQuad(4); + SmallVector HiQuad(4); + BitVector InputQuads(4); + for (unsigned i = 0; i < 8; ++i) { + SmallVectorImpl &Quad = i < 4 ? LoQuad : HiQuad; + int EltIdx = SVOp->getMaskElt(i); + MaskVals.push_back(EltIdx); + if (EltIdx < 0) { + ++Quad[0]; + ++Quad[1]; + ++Quad[2]; + ++Quad[3]; + continue; + } + ++Quad[EltIdx / 4]; + InputQuads.set(EltIdx / 4); + } + + int BestLoQuad = -1; + unsigned MaxQuad = 1; + for (unsigned i = 0; i < 4; ++i) { + if (LoQuad[i] > MaxQuad) { + BestLoQuad = i; + MaxQuad = LoQuad[i]; + } + } + + int BestHiQuad = -1; + MaxQuad = 1; + for (unsigned i = 0; i < 4; ++i) { + if (HiQuad[i] > MaxQuad) { + BestHiQuad = i; + MaxQuad = HiQuad[i]; + } + } + + // For SSSE3, If all 8 words of the result come from only 1 quadword of each + // of the two input vectors, shuffle them into one input vector so only a + // single pshufb instruction is necessary. If There are more than 2 input + // quads, disable the next transformation since it does not help SSSE3. + bool V1Used = InputQuads[0] || InputQuads[1]; + bool V2Used = InputQuads[2] || InputQuads[3]; + if (TLI.getSubtarget()->hasSSSE3()) { + if (InputQuads.count() == 2 && V1Used && V2Used) { + BestLoQuad = InputQuads.find_first(); + BestHiQuad = InputQuads.find_next(BestLoQuad); + } + if (InputQuads.count() > 2) { + BestLoQuad = -1; + BestHiQuad = -1; + } + } + + // If BestLoQuad or BestHiQuad are set, shuffle the quads together and update + // the shuffle mask. If a quad is scored as -1, that means that it contains + // words from all 4 input quadwords. + SDValue NewV; + if (BestLoQuad >= 0 || BestHiQuad >= 0) { + SmallVector MaskV; + MaskV.push_back(BestLoQuad < 0 ? 0 : BestLoQuad); + MaskV.push_back(BestHiQuad < 0 ? 1 : BestHiQuad); + NewV = DAG.getVectorShuffle(MVT::v2i64, dl, + DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64, V1), + DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64, V2), &MaskV[0]); + NewV = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, NewV); + + // Rewrite the MaskVals and assign NewV to V1 if NewV now contains all the + // source words for the shuffle, to aid later transformations. + bool AllWordsInNewV = true; + bool InOrder[2] = { true, true }; + for (unsigned i = 0; i != 8; ++i) { + int idx = MaskVals[i]; + if (idx != (int)i) + InOrder[i/4] = false; + if (idx < 0 || (idx/4) == BestLoQuad || (idx/4) == BestHiQuad) + continue; + AllWordsInNewV = false; + break; + } + + bool pshuflw = AllWordsInNewV, pshufhw = AllWordsInNewV; + if (AllWordsInNewV) { + for (int i = 0; i != 8; ++i) { + int idx = MaskVals[i]; + if (idx < 0) + continue; + idx = MaskVals[i] = (idx / 4) == BestLoQuad ? (idx & 3) : (idx & 3) + 4; + if ((idx != i) && idx < 4) + pshufhw = false; + if ((idx != i) && idx > 3) + pshuflw = false; + } + V1 = NewV; + V2Used = false; + BestLoQuad = 0; + BestHiQuad = 1; + } + + // If we've eliminated the use of V2, and the new mask is a pshuflw or + // pshufhw, that's as cheap as it gets. Return the new shuffle. + if ((pshufhw && InOrder[0]) || (pshuflw && InOrder[1])) { + return DAG.getVectorShuffle(MVT::v8i16, dl, NewV, + DAG.getUNDEF(MVT::v8i16), &MaskVals[0]); + } + } + + // If we have SSSE3, and all words of the result are from 1 input vector, + // case 2 is generated, otherwise case 3 is generated. If no SSSE3 + // is present, fall back to case 4. + if (TLI.getSubtarget()->hasSSSE3()) { + SmallVector pshufbMask; + + // If we have elements from both input vectors, set the high bit of the + // shuffle mask element to zero out elements that come from V2 in the V1 + // mask, and elements that come from V1 in the V2 mask, so that the two + // results can be OR'd together. + bool TwoInputs = V1Used && V2Used; + for (unsigned i = 0; i != 8; ++i) { + int EltIdx = MaskVals[i] * 2; + if (TwoInputs && (EltIdx >= 16)) { + pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8)); + pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8)); + continue; + } + pshufbMask.push_back(DAG.getConstant(EltIdx, MVT::i8)); + pshufbMask.push_back(DAG.getConstant(EltIdx+1, MVT::i8)); + } + V1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, V1); + V1 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V1, + DAG.getNode(ISD::BUILD_VECTOR, dl, + MVT::v16i8, &pshufbMask[0], 16)); + if (!TwoInputs) + return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, V1); + + // Calculate the shuffle mask for the second input, shuffle it, and + // OR it with the first shuffled input. + pshufbMask.clear(); + for (unsigned i = 0; i != 8; ++i) { + int EltIdx = MaskVals[i] * 2; + if (EltIdx < 16) { + pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8)); + pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8)); + continue; + } + pshufbMask.push_back(DAG.getConstant(EltIdx - 16, MVT::i8)); + pshufbMask.push_back(DAG.getConstant(EltIdx - 15, MVT::i8)); + } + V2 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, V2); + V2 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V2, + DAG.getNode(ISD::BUILD_VECTOR, dl, + MVT::v16i8, &pshufbMask[0], 16)); + V1 = DAG.getNode(ISD::OR, dl, MVT::v16i8, V1, V2); + return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, V1); + } + + // If BestLoQuad >= 0, generate a pshuflw to put the low elements in order, + // and update MaskVals with new element order. + BitVector InOrder(8); + if (BestLoQuad >= 0) { + SmallVector MaskV; + for (int i = 0; i != 4; ++i) { + int idx = MaskVals[i]; + if (idx < 0) { + MaskV.push_back(-1); + InOrder.set(i); + } else if ((idx / 4) == BestLoQuad) { + MaskV.push_back(idx & 3); + InOrder.set(i); + } else { + MaskV.push_back(-1); + } + } + for (unsigned i = 4; i != 8; ++i) + MaskV.push_back(i); + NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16), + &MaskV[0]); + } + + // If BestHi >= 0, generate a pshufhw to put the high elements in order, + // and update MaskVals with the new element order. + if (BestHiQuad >= 0) { + SmallVector MaskV; + for (unsigned i = 0; i != 4; ++i) + MaskV.push_back(i); + for (unsigned i = 4; i != 8; ++i) { + int idx = MaskVals[i]; + if (idx < 0) { + MaskV.push_back(-1); + InOrder.set(i); + } else if ((idx / 4) == BestHiQuad) { + MaskV.push_back((idx & 3) + 4); + InOrder.set(i); + } else { + MaskV.push_back(-1); + } + } + NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16), + &MaskV[0]); + } + + // In case BestHi & BestLo were both -1, which means each quadword has a word + // from each of the four input quadwords, calculate the InOrder bitvector now + // before falling through to the insert/extract cleanup. + if (BestLoQuad == -1 && BestHiQuad == -1) { + NewV = V1; + for (int i = 0; i != 8; ++i) + if (MaskVals[i] < 0 || MaskVals[i] == i) + InOrder.set(i); + } + + // The other elements are put in the right place using pextrw and pinsrw. + for (unsigned i = 0; i != 8; ++i) { + if (InOrder[i]) + continue; + int EltIdx = MaskVals[i]; + if (EltIdx < 0) + continue; + SDValue ExtOp = (EltIdx < 8) + ? DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, V1, + DAG.getIntPtrConstant(EltIdx)) + : DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, V2, + DAG.getIntPtrConstant(EltIdx - 8)); + NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, NewV, ExtOp, + DAG.getIntPtrConstant(i)); + } + return NewV; +} + +// v16i8 shuffles - Prefer shuffles in the following order: +// 1. [ssse3] 1 x pshufb +// 2. [ssse3] 2 x pshufb + 1 x por +// 3. [all] v8i16 shuffle + N x pextrw + rotate + pinsrw +static +SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, + SelectionDAG &DAG, X86TargetLowering &TLI) { + SDValue V1 = SVOp->getOperand(0); + SDValue V2 = SVOp->getOperand(1); + DebugLoc dl = SVOp->getDebugLoc(); + SmallVector MaskVals; + SVOp->getMask(MaskVals); + + // If we have SSSE3, case 1 is generated when all result bytes come from + // one of the inputs. Otherwise, case 2 is generated. If no SSSE3 is + // present, fall back to case 3. + // FIXME: kill V2Only once shuffles are canonizalized by getNode. + bool V1Only = true; + bool V2Only = true; + for (unsigned i = 0; i < 16; ++i) { + int EltIdx = MaskVals[i]; + if (EltIdx < 0) + continue; + if (EltIdx < 16) + V2Only = false; + else + V1Only = false; + } + + // If SSSE3, use 1 pshufb instruction per vector with elements in the result. + if (TLI.getSubtarget()->hasSSSE3()) { + SmallVector pshufbMask; + + // If all result elements are from one input vector, then only translate + // undef mask values to 0x80 (zero out result) in the pshufb mask. + // + // Otherwise, we have elements from both input vectors, and must zero out + // elements that come from V2 in the first mask, and V1 in the second mask + // so that we can OR them together. + bool TwoInputs = !(V1Only || V2Only); + for (unsigned i = 0; i != 16; ++i) { + int EltIdx = MaskVals[i]; + if (EltIdx < 0 || (TwoInputs && EltIdx >= 16)) { + pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8)); + continue; + } + pshufbMask.push_back(DAG.getConstant(EltIdx, MVT::i8)); + } + // If all the elements are from V2, assign it to V1 and return after + // building the first pshufb. + if (V2Only) + V1 = V2; + V1 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V1, + DAG.getNode(ISD::BUILD_VECTOR, dl, + MVT::v16i8, &pshufbMask[0], 16)); + if (!TwoInputs) + return V1; + + // Calculate the shuffle mask for the second input, shuffle it, and + // OR it with the first shuffled input. + pshufbMask.clear(); + for (unsigned i = 0; i != 16; ++i) { + int EltIdx = MaskVals[i]; + if (EltIdx < 16) { + pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8)); + continue; + } + pshufbMask.push_back(DAG.getConstant(EltIdx - 16, MVT::i8)); + } + V2 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V2, + DAG.getNode(ISD::BUILD_VECTOR, dl, + MVT::v16i8, &pshufbMask[0], 16)); + return DAG.getNode(ISD::OR, dl, MVT::v16i8, V1, V2); + } + + // No SSSE3 - Calculate in place words and then fix all out of place words + // With 0-16 extracts & inserts. Worst case is 16 bytes out of order from + // the 16 different words that comprise the two doublequadword input vectors. + V1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, V1); + V2 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, V2); + SDValue NewV = V2Only ? V2 : V1; + for (int i = 0; i != 8; ++i) { + int Elt0 = MaskVals[i*2]; + int Elt1 = MaskVals[i*2+1]; + + // This word of the result is all undef, skip it. + if (Elt0 < 0 && Elt1 < 0) + continue; + + // This word of the result is already in the correct place, skip it. + if (V1Only && (Elt0 == i*2) && (Elt1 == i*2+1)) + continue; + if (V2Only && (Elt0 == i*2+16) && (Elt1 == i*2+17)) + continue; + + SDValue Elt0Src = Elt0 < 16 ? V1 : V2; + SDValue Elt1Src = Elt1 < 16 ? V1 : V2; + SDValue InsElt; + + // If Elt0 and Elt1 are defined, are consecutive, and can be load + // using a single extract together, load it and store it. + if ((Elt0 >= 0) && ((Elt0 + 1) == Elt1) && ((Elt0 & 1) == 0)) { + InsElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, Elt1Src, + DAG.getIntPtrConstant(Elt1 / 2)); + NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, NewV, InsElt, + DAG.getIntPtrConstant(i)); + continue; + } + + // If Elt1 is defined, extract it from the appropriate source. If the + // source byte is not also odd, shift the extracted word left 8 bits + // otherwise clear the bottom 8 bits if we need to do an or. + if (Elt1 >= 0) { + InsElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, Elt1Src, + DAG.getIntPtrConstant(Elt1 / 2)); + if ((Elt1 & 1) == 0) + InsElt = DAG.getNode(ISD::SHL, dl, MVT::i16, InsElt, + DAG.getConstant(8, TLI.getShiftAmountTy())); + else if (Elt0 >= 0) + InsElt = DAG.getNode(ISD::AND, dl, MVT::i16, InsElt, + DAG.getConstant(0xFF00, MVT::i16)); + } + // If Elt0 is defined, extract it from the appropriate source. If the + // source byte is not also even, shift the extracted word right 8 bits. If + // Elt1 was also defined, OR the extracted values together before + // inserting them in the result. + if (Elt0 >= 0) { + SDValue InsElt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, + Elt0Src, DAG.getIntPtrConstant(Elt0 / 2)); + if ((Elt0 & 1) != 0) + InsElt0 = DAG.getNode(ISD::SRL, dl, MVT::i16, InsElt0, + DAG.getConstant(8, TLI.getShiftAmountTy())); + else if (Elt1 >= 0) + InsElt0 = DAG.getNode(ISD::AND, dl, MVT::i16, InsElt0, + DAG.getConstant(0x00FF, MVT::i16)); + InsElt = Elt1 >= 0 ? DAG.getNode(ISD::OR, dl, MVT::i16, InsElt, InsElt0) + : InsElt0; + } + NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, NewV, InsElt, + DAG.getIntPtrConstant(i)); + } + return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, NewV); +} + +/// RewriteAsNarrowerShuffle - Try rewriting v8i16 and v16i8 shuffles as 4 wide +/// ones, or rewriting v4i32 / v2f32 as 2 wide ones if possible. This can be +/// done when every pair / quad of shuffle mask elements point to elements in +/// the right sequence. e.g. +/// vector_shuffle <>, <>, < 3, 4, | 10, 11, | 0, 1, | 14, 15> +static +SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp, + SelectionDAG &DAG, + TargetLowering &TLI, DebugLoc dl) { + MVT VT = SVOp->getValueType(0); + SDValue V1 = SVOp->getOperand(0); + SDValue V2 = SVOp->getOperand(1); + unsigned NumElems = VT.getVectorNumElements(); + unsigned NewWidth = (NumElems == 4) ? 2 : 4; + MVT MaskVT = MVT::getIntVectorWithNumElements(NewWidth); + MVT MaskEltVT = MaskVT.getVectorElementType(); + MVT NewVT = MaskVT; + switch (VT.getSimpleVT()) { + default: assert(false && "Unexpected!"); + case MVT::v4f32: NewVT = MVT::v2f64; break; + case MVT::v4i32: NewVT = MVT::v2i64; break; + case MVT::v8i16: NewVT = MVT::v4i32; break; + case MVT::v16i8: NewVT = MVT::v4i32; break; + } + + if (NewWidth == 2) { + if (VT.isInteger()) + NewVT = MVT::v2i64; + else + NewVT = MVT::v2f64; + } + int Scale = NumElems / NewWidth; + SmallVector MaskVec; + for (unsigned i = 0; i < NumElems; i += Scale) { + int StartIdx = -1; + for (int j = 0; j < Scale; ++j) { + int EltIdx = SVOp->getMaskElt(i+j); + if (EltIdx < 0) + continue; + if (StartIdx == -1) + StartIdx = EltIdx - (EltIdx % Scale); + if (EltIdx != StartIdx + j) + return SDValue(); + } + if (StartIdx == -1) + MaskVec.push_back(-1); + else + MaskVec.push_back(StartIdx / Scale); + } + + V1 = DAG.getNode(ISD::BIT_CONVERT, dl, NewVT, V1); + V2 = DAG.getNode(ISD::BIT_CONVERT, dl, NewVT, V2); + return DAG.getVectorShuffle(NewVT, dl, V1, V2, &MaskVec[0]); +} + +/// getVZextMovL - Return a zero-extending vector move low node. +/// +static SDValue getVZextMovL(MVT VT, MVT OpVT, + SDValue SrcOp, SelectionDAG &DAG, + const X86Subtarget *Subtarget, DebugLoc dl) { + if (VT == MVT::v2f64 || VT == MVT::v4f32) { + LoadSDNode *LD = NULL; + if (!isScalarLoadToVector(SrcOp.getNode(), &LD)) + LD = dyn_cast(SrcOp); + if (!LD) { + // movssrr and movsdrr do not clear top bits. Try to use movd, movq + // instead. + MVT EVT = (OpVT == MVT::v2f64) ? MVT::i64 : MVT::i32; + if ((EVT != MVT::i64 || Subtarget->is64Bit()) && + SrcOp.getOpcode() == ISD::SCALAR_TO_VECTOR && + SrcOp.getOperand(0).getOpcode() == ISD::BIT_CONVERT && + SrcOp.getOperand(0).getOperand(0).getValueType() == EVT) { + // PR2108 + OpVT = (OpVT == MVT::v2f64) ? MVT::v2i64 : MVT::v4i32; + return DAG.getNode(ISD::BIT_CONVERT, dl, VT, + DAG.getNode(X86ISD::VZEXT_MOVL, dl, OpVT, + DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, + OpVT, + SrcOp.getOperand(0) + .getOperand(0)))); + } + } + } + + return DAG.getNode(ISD::BIT_CONVERT, dl, VT, + DAG.getNode(X86ISD::VZEXT_MOVL, dl, OpVT, + DAG.getNode(ISD::BIT_CONVERT, dl, + OpVT, SrcOp))); +} + +/// LowerVECTOR_SHUFFLE_4wide - Handle all 4 wide cases with a number of +/// shuffles. +static SDValue +LowerVECTOR_SHUFFLE_4wide(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { + SDValue V1 = SVOp->getOperand(0); + SDValue V2 = SVOp->getOperand(1); + DebugLoc dl = SVOp->getDebugLoc(); + MVT VT = SVOp->getValueType(0); + + SmallVector, 8> Locs; + Locs.resize(4); + SmallVector Mask1(4U, -1); + SmallVector PermMask; + SVOp->getMask(PermMask); + + unsigned NumHi = 0; + unsigned NumLo = 0; + for (unsigned i = 0; i != 4; ++i) { + int Idx = PermMask[i]; + if (Idx < 0) { + Locs[i] = std::make_pair(-1, -1); + } else { + assert(Idx < 8 && "Invalid VECTOR_SHUFFLE index!"); + if (Idx < 4) { + Locs[i] = std::make_pair(0, NumLo); + Mask1[NumLo] = Idx; + NumLo++; + } else { + Locs[i] = std::make_pair(1, NumHi); + if (2+NumHi < 4) + Mask1[2+NumHi] = Idx; + NumHi++; + } + } + } + + if (NumLo <= 2 && NumHi <= 2) { + // If no more than two elements come from either vector. This can be + // implemented with two shuffles. First shuffle gather the elements. + // The second shuffle, which takes the first shuffle as both of its + // vector operands, put the elements into the right order. + V1 = DAG.getVectorShuffle(VT, dl, V1, V2, &Mask1[0]); + + SmallVector Mask2(4U, -1); + + for (unsigned i = 0; i != 4; ++i) { + if (Locs[i].first == -1) + continue; + else { + unsigned Idx = (i < 2) ? 0 : 4; + Idx += Locs[i].first * 2 + Locs[i].second; + Mask2[i] = Idx; + } + } + + return DAG.getVectorShuffle(VT, dl, V1, V1, &Mask2[0]); + } else if (NumLo == 3 || NumHi == 3) { + // Otherwise, we must have three elements from one vector, call it X, and + // one element from the other, call it Y. First, use a shufps to build an + // intermediate vector with the one element from Y and the element from X + // that will be in the same half in the final destination (the indexes don't + // matter). Then, use a shufps to build the final vector, taking the half + // containing the element from Y from the intermediate, and the other half + // from X. + if (NumHi == 3) { + // Normalize it so the 3 elements come from V1. + CommuteVectorShuffleMask(PermMask, VT); + std::swap(V1, V2); + } + + // Find the element from V2. + unsigned HiIndex; + for (HiIndex = 0; HiIndex < 3; ++HiIndex) { + int Val = PermMask[HiIndex]; + if (Val < 0) + continue; + if (Val >= 4) + break; + } + + Mask1[0] = PermMask[HiIndex]; + Mask1[1] = -1; + Mask1[2] = PermMask[HiIndex^1]; + Mask1[3] = -1; + V2 = DAG.getVectorShuffle(VT, dl, V1, V2, &Mask1[0]); + + if (HiIndex >= 2) { + Mask1[0] = PermMask[0]; + Mask1[1] = PermMask[1]; + Mask1[2] = HiIndex & 1 ? 6 : 4; + Mask1[3] = HiIndex & 1 ? 4 : 6; + return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask1[0]); + } else { + Mask1[0] = HiIndex & 1 ? 2 : 0; + Mask1[1] = HiIndex & 1 ? 0 : 2; + Mask1[2] = PermMask[2]; + Mask1[3] = PermMask[3]; + if (Mask1[2] >= 0) + Mask1[2] += 4; + if (Mask1[3] >= 0) + Mask1[3] += 4; + return DAG.getVectorShuffle(VT, dl, V2, V1, &Mask1[0]); + } + } + + // Break it into (shuffle shuffle_hi, shuffle_lo). + Locs.clear(); + SmallVector LoMask(4U, -1); + SmallVector HiMask(4U, -1); + + SmallVector *MaskPtr = &LoMask; + unsigned MaskIdx = 0; + unsigned LoIdx = 0; + unsigned HiIdx = 2; + for (unsigned i = 0; i != 4; ++i) { + if (i == 2) { + MaskPtr = &HiMask; + MaskIdx = 1; + LoIdx = 0; + HiIdx = 2; + } + int Idx = PermMask[i]; + if (Idx < 0) { + Locs[i] = std::make_pair(-1, -1); + } else if (Idx < 4) { + Locs[i] = std::make_pair(MaskIdx, LoIdx); + (*MaskPtr)[LoIdx] = Idx; + LoIdx++; + } else { + Locs[i] = std::make_pair(MaskIdx, HiIdx); + (*MaskPtr)[HiIdx] = Idx; + HiIdx++; + } + } + + SDValue LoShuffle = DAG.getVectorShuffle(VT, dl, V1, V2, &LoMask[0]); + SDValue HiShuffle = DAG.getVectorShuffle(VT, dl, V1, V2, &HiMask[0]); + SmallVector MaskOps; + for (unsigned i = 0; i != 4; ++i) { + if (Locs[i].first == -1) { + MaskOps.push_back(-1); + } else { + unsigned Idx = Locs[i].first * 4 + Locs[i].second; + MaskOps.push_back(Idx); + } + } + return DAG.getVectorShuffle(VT, dl, LoShuffle, HiShuffle, &MaskOps[0]); +} + +SDValue +X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { + ShuffleVectorSDNode *SVOp = cast(Op); + SDValue V1 = Op.getOperand(0); + SDValue V2 = Op.getOperand(1); + MVT VT = Op.getValueType(); + DebugLoc dl = Op.getDebugLoc(); + unsigned NumElems = VT.getVectorNumElements(); + bool isMMX = VT.getSizeInBits() == 64; + bool V1IsUndef = V1.getOpcode() == ISD::UNDEF; + bool V2IsUndef = V2.getOpcode() == ISD::UNDEF; + bool V1IsSplat = false; + bool V2IsSplat = false; + + if (isZeroShuffle(SVOp)) + return getZeroVector(VT, Subtarget->hasSSE2(), DAG, dl); + + // Promote splats to v4f32. + if (SVOp->isSplat()) { + if (isMMX || NumElems < 4) + return Op; + return PromoteSplat(SVOp, DAG, Subtarget->hasSSE2()); + } + + // If the shuffle can be profitably rewritten as a narrower shuffle, then + // do it! + if (VT == MVT::v8i16 || VT == MVT::v16i8) { + SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, *this, dl); + if (NewOp.getNode()) + return DAG.getNode(ISD::BIT_CONVERT, dl, VT, + LowerVECTOR_SHUFFLE(NewOp, DAG)); + } else if ((VT == MVT::v4i32 || (VT == MVT::v4f32 && Subtarget->hasSSE2()))) { + // FIXME: Figure out a cleaner way to do this. + // Try to make use of movq to zero out the top part. + if (ISD::isBuildVectorAllZeros(V2.getNode())) { + SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, *this, dl); + if (NewOp.getNode()) { + if (isCommutedMOVL(cast(NewOp), true, false)) + return getVZextMovL(VT, NewOp.getValueType(), NewOp.getOperand(0), + DAG, Subtarget, dl); + } + } else if (ISD::isBuildVectorAllZeros(V1.getNode())) { + SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, *this, dl); + if (NewOp.getNode() && X86::isMOVLMask(cast(NewOp))) + return getVZextMovL(VT, NewOp.getValueType(), NewOp.getOperand(1), + DAG, Subtarget, dl); + } + } + + if (X86::isPSHUFDMask(SVOp)) + return Op; + + // Check if this can be converted into a logical shift. + bool isLeft = false; + unsigned ShAmt = 0; + SDValue ShVal; + bool isShift = getSubtarget()->hasSSE2() && + isVectorShift(SVOp, DAG, isLeft, ShVal, ShAmt); + if (isShift && ShVal.hasOneUse()) { + // If the shifted value has multiple uses, it may be cheaper to use + // v_set0 + movlhps or movhlps, etc. + MVT EVT = VT.getVectorElementType(); + ShAmt *= EVT.getSizeInBits(); + return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this, dl); + } + + if (X86::isMOVLMask(SVOp)) { + if (V1IsUndef) + return V2; + if (ISD::isBuildVectorAllZeros(V1.getNode())) + return getVZextMovL(VT, VT, V2, DAG, Subtarget, dl); + if (!isMMX) + return Op; + } + + // FIXME: fold these into legal mask. + if (!isMMX && (X86::isMOVSHDUPMask(SVOp) || + X86::isMOVSLDUPMask(SVOp) || + X86::isMOVHLPSMask(SVOp) || + X86::isMOVHPMask(SVOp) || + X86::isMOVLPMask(SVOp))) + return Op; + + if (ShouldXformToMOVHLPS(SVOp) || + ShouldXformToMOVLP(V1.getNode(), V2.getNode(), SVOp)) + return CommuteVectorShuffle(SVOp, DAG); + + if (isShift) { + // No better options. Use a vshl / vsrl. + MVT EVT = VT.getVectorElementType(); + ShAmt *= EVT.getSizeInBits(); + return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this, dl); + } + + bool Commuted = false; + // FIXME: This should also accept a bitcast of a splat? Be careful, not + // 1,1,1,1 -> v8i16 though. + V1IsSplat = isSplatVector(V1.getNode()); + V2IsSplat = isSplatVector(V2.getNode()); + + // Canonicalize the splat or undef, if present, to be on the RHS. + if ((V1IsSplat || V1IsUndef) && !(V2IsSplat || V2IsUndef)) { + Op = CommuteVectorShuffle(SVOp, DAG); + SVOp = cast(Op); + V1 = SVOp->getOperand(0); + V2 = SVOp->getOperand(1); + std::swap(V1IsSplat, V2IsSplat); + std::swap(V1IsUndef, V2IsUndef); + Commuted = true; + } + + if (isCommutedMOVL(SVOp, V2IsSplat, V2IsUndef)) { + // Shuffling low element of v1 into undef, just return v1. + if (V2IsUndef) + return V1; + // If V2 is a splat, the mask may be malformed such as <4,3,3,3>, which + // the instruction selector will not match, so get a canonical MOVL with + // swapped operands to undo the commute. + return getMOVL(DAG, dl, VT, V2, V1); + } + + if (X86::isUNPCKL_v_undef_Mask(SVOp) || + X86::isUNPCKH_v_undef_Mask(SVOp) || + X86::isUNPCKLMask(SVOp) || + X86::isUNPCKHMask(SVOp)) + return Op; + + if (V2IsSplat) { + // Normalize mask so all entries that point to V2 points to its first + // element then try to match unpck{h|l} again. If match, return a + // new vector_shuffle with the corrected mask. + SDValue NewMask = NormalizeMask(SVOp, DAG); + ShuffleVectorSDNode *NSVOp = cast(NewMask); + if (NSVOp != SVOp) { + if (X86::isUNPCKLMask(NSVOp, true)) { + return NewMask; + } else if (X86::isUNPCKHMask(NSVOp, true)) { + return NewMask; + } + } + } + + if (Commuted) { + // Commute is back and try unpck* again. + // FIXME: this seems wrong. + SDValue NewOp = CommuteVectorShuffle(SVOp, DAG); + ShuffleVectorSDNode *NewSVOp = cast(NewOp); + if (X86::isUNPCKL_v_undef_Mask(NewSVOp) || + X86::isUNPCKH_v_undef_Mask(NewSVOp) || + X86::isUNPCKLMask(NewSVOp) || + X86::isUNPCKHMask(NewSVOp)) + return NewOp; + } + + // FIXME: for mmx, bitcast v2i32 to v4i16 for shuffle. + + // Normalize the node to match x86 shuffle ops if needed + if (!isMMX && V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(SVOp)) + return CommuteVectorShuffle(SVOp, DAG); + + // Check for legal shuffle and return? + SmallVector PermMask; + SVOp->getMask(PermMask); + if (isShuffleMaskLegal(PermMask, VT)) + return Op; + + // Handle v8i16 specifically since SSE can do byte extraction and insertion. + if (VT == MVT::v8i16) { + SDValue NewOp = LowerVECTOR_SHUFFLEv8i16(SVOp, DAG, *this); + if (NewOp.getNode()) + return NewOp; + } + + if (VT == MVT::v16i8) { + SDValue NewOp = LowerVECTOR_SHUFFLEv16i8(SVOp, DAG, *this); + if (NewOp.getNode()) + return NewOp; + } + + // Handle all 4 wide cases with a number of shuffles except for MMX. + if (NumElems == 4 && !isMMX) + return LowerVECTOR_SHUFFLE_4wide(SVOp, DAG); + + return SDValue(); +} + +SDValue +X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, + SelectionDAG &DAG) { + MVT VT = Op.getValueType(); + DebugLoc dl = Op.getDebugLoc(); + if (VT.getSizeInBits() == 8) { + SDValue Extract = DAG.getNode(X86ISD::PEXTRB, dl, MVT::i32, + Op.getOperand(0), Op.getOperand(1)); + SDValue Assert = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Extract, + DAG.getValueType(VT)); + return DAG.getNode(ISD::TRUNCATE, dl, VT, Assert); + } else if (VT.getSizeInBits() == 16) { + unsigned Idx = cast(Op.getOperand(1))->getZExtValue(); + // If Idx is 0, it's cheaper to do a move instead of a pextrw. + if (Idx == 0) + return DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, + DAG.getNode(ISD::BIT_CONVERT, dl, + MVT::v4i32, + Op.getOperand(0)), + Op.getOperand(1))); + SDValue Extract = DAG.getNode(X86ISD::PEXTRW, dl, MVT::i32, + Op.getOperand(0), Op.getOperand(1)); + SDValue Assert = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Extract, + DAG.getValueType(VT)); + return DAG.getNode(ISD::TRUNCATE, dl, VT, Assert); + } else if (VT == MVT::f32) { + // EXTRACTPS outputs to a GPR32 register which will require a movd to copy + // the result back to FR32 register. It's only worth matching if the + // result has a single use which is a store or a bitcast to i32. And in + // the case of a store, it's not worth it if the index is a constant 0, + // because a MOVSSmr can be used instead, which is smaller and faster. + if (!Op.hasOneUse()) + return SDValue(); + SDNode *User = *Op.getNode()->use_begin(); + if ((User->getOpcode() != ISD::STORE || + (isa(Op.getOperand(1)) && + cast(Op.getOperand(1))->isNullValue())) && + (User->getOpcode() != ISD::BIT_CONVERT || + User->getValueType(0) != MVT::i32)) + return SDValue(); + SDValue Extract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, + DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4i32, + Op.getOperand(0)), + Op.getOperand(1)); + return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, Extract); + } else if (VT == MVT::i32) { + // ExtractPS works with constant index. + if (isa(Op.getOperand(1))) + return Op; + } + return SDValue(); +} + + +SDValue +X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { + if (!isa(Op.getOperand(1))) + return SDValue(); + + if (Subtarget->hasSSE41()) { + SDValue Res = LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG); + if (Res.getNode()) + return Res; + } + + MVT VT = Op.getValueType(); + DebugLoc dl = Op.getDebugLoc(); + // TODO: handle v16i8. + if (VT.getSizeInBits() == 16) { + SDValue Vec = Op.getOperand(0); + unsigned Idx = cast(Op.getOperand(1))->getZExtValue(); + if (Idx == 0) + return DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, + DAG.getNode(ISD::BIT_CONVERT, dl, + MVT::v4i32, Vec), + Op.getOperand(1))); + // Transform it so it match pextrw which produces a 32-bit result. + MVT EVT = (MVT::SimpleValueType)(VT.getSimpleVT()+1); + SDValue Extract = DAG.getNode(X86ISD::PEXTRW, dl, EVT, + Op.getOperand(0), Op.getOperand(1)); + SDValue Assert = DAG.getNode(ISD::AssertZext, dl, EVT, Extract, + DAG.getValueType(VT)); + return DAG.getNode(ISD::TRUNCATE, dl, VT, Assert); + } else if (VT.getSizeInBits() == 32) { + unsigned Idx = cast(Op.getOperand(1))->getZExtValue(); + if (Idx == 0) + return Op; + + // SHUFPS the element to the lowest double word, then movss. + int Mask[4] = { Idx, -1, -1, -1 }; + MVT VVT = Op.getOperand(0).getValueType(); + SDValue Vec = DAG.getVectorShuffle(VVT, dl, Op.getOperand(0), + DAG.getUNDEF(VVT), Mask); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec, + DAG.getIntPtrConstant(0)); + } else if (VT.getSizeInBits() == 64) { + // FIXME: .td only matches this for <2 x f64>, not <2 x i64> on 32b + // FIXME: seems like this should be unnecessary if mov{h,l}pd were taught + // to match extract_elt for f64. + unsigned Idx = cast(Op.getOperand(1))->getZExtValue(); + if (Idx == 0) + return Op; + + // UNPCKHPD the element to the lowest double word, then movsd. + // Note if the lower 64 bits of the result of the UNPCKHPD is then stored + // to a f64mem, the whole operation is folded into a single MOVHPDmr. + int Mask[2] = { 1, -1 }; + MVT VVT = Op.getOperand(0).getValueType(); + SDValue Vec = DAG.getVectorShuffle(VVT, dl, Op.getOperand(0), + DAG.getUNDEF(VVT), Mask); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec, + DAG.getIntPtrConstant(0)); + } + + return SDValue(); +} + +SDValue +X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG){ + MVT VT = Op.getValueType(); + MVT EVT = VT.getVectorElementType(); + DebugLoc dl = Op.getDebugLoc(); + + SDValue N0 = Op.getOperand(0); + SDValue N1 = Op.getOperand(1); + SDValue N2 = Op.getOperand(2); + + if ((EVT.getSizeInBits() == 8 || EVT.getSizeInBits() == 16) && + isa(N2)) { + unsigned Opc = (EVT.getSizeInBits() == 8) ? X86ISD::PINSRB + : X86ISD::PINSRW; + // Transform it so it match pinsr{b,w} which expects a GR32 as its second + // argument. + if (N1.getValueType() != MVT::i32) + N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, N1); + if (N2.getValueType() != MVT::i32) + N2 = DAG.getIntPtrConstant(cast(N2)->getZExtValue()); + return DAG.getNode(Opc, dl, VT, N0, N1, N2); + } else if (EVT == MVT::f32 && isa(N2)) { + // Bits [7:6] of the constant are the source select. This will always be + // zero here. The DAG Combiner may combine an extract_elt index into these + // bits. For example (insert (extract, 3), 2) could be matched by putting + // the '3' into bits [7:6] of X86ISD::INSERTPS. + // Bits [5:4] of the constant are the destination select. This is the + // value of the incoming immediate. + // Bits [3:0] of the constant are the zero mask. The DAG Combiner may + // combine either bitwise AND or insert of float 0.0 to set these bits. + N2 = DAG.getIntPtrConstant(cast(N2)->getZExtValue() << 4); + return DAG.getNode(X86ISD::INSERTPS, dl, VT, N0, N1, N2); + } else if (EVT == MVT::i32) { + // InsertPS works with constant index. + if (isa(N2)) + return Op; + } + return SDValue(); +} + +SDValue +X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { + MVT VT = Op.getValueType(); + MVT EVT = VT.getVectorElementType(); + + if (Subtarget->hasSSE41()) + return LowerINSERT_VECTOR_ELT_SSE4(Op, DAG); + + if (EVT == MVT::i8) + return SDValue(); + + DebugLoc dl = Op.getDebugLoc(); + SDValue N0 = Op.getOperand(0); + SDValue N1 = Op.getOperand(1); + SDValue N2 = Op.getOperand(2); + + if (EVT.getSizeInBits() == 16) { + // Transform it so it match pinsrw which expects a 16-bit value in a GR32 + // as its second argument. + if (N1.getValueType() != MVT::i32) + N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, N1); + if (N2.getValueType() != MVT::i32) + N2 = DAG.getIntPtrConstant(cast(N2)->getZExtValue()); + return DAG.getNode(X86ISD::PINSRW, dl, VT, N0, N1, N2); + } + return SDValue(); +} + +SDValue +X86TargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) { + DebugLoc dl = Op.getDebugLoc(); + if (Op.getValueType() == MVT::v2f32) + return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f32, + DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i32, + DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, + Op.getOperand(0)))); + + SDValue AnyExt = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Op.getOperand(0)); + MVT VT = MVT::v2i32; + switch (Op.getValueType().getSimpleVT()) { + default: break; + case MVT::v16i8: + case MVT::v8i16: + VT = MVT::v4i32; + break; + } + return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), + DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, AnyExt)); +} + +// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as +// their target countpart wrapped in the X86ISD::Wrapper node. Suppose N is +// one of the above mentioned nodes. It has to be wrapped because otherwise +// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only +// be used to form addressing mode. These wrapped nodes will be selected +// into MOV32ri. +SDValue +X86TargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) { + ConstantPoolSDNode *CP = cast(Op); + // FIXME there isn't really any debug info here, should come from the parent + DebugLoc dl = CP->getDebugLoc(); + SDValue Result = DAG.getTargetConstantPool(CP->getConstVal(), getPointerTy(), + CP->getAlignment()); + Result = DAG.getNode(X86ISD::Wrapper, dl, getPointerTy(), Result); + // With PIC, the address is actually $g + Offset. + if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && + !Subtarget->isPICStyleRIPRel()) { + Result = DAG.getNode(ISD::ADD, dl, getPointerTy(), + DAG.getNode(X86ISD::GlobalBaseReg, + DebugLoc::getUnknownLoc(), + getPointerTy()), + Result); + } + + return Result; +} + +SDValue +X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl, + int64_t Offset, + SelectionDAG &DAG) const { + bool IsPic = getTargetMachine().getRelocationModel() == Reloc::PIC_; + bool ExtraLoadRequired = + Subtarget->GVRequiresExtraLoad(GV, getTargetMachine(), false); + + // Create the TargetGlobalAddress node, folding in the constant + // offset if it is legal. + SDValue Result; + if (!IsPic && !ExtraLoadRequired && isInt32(Offset)) { + Result = DAG.getTargetGlobalAddress(GV, getPointerTy(), Offset); + Offset = 0; + } else + Result = DAG.getTargetGlobalAddress(GV, getPointerTy(), 0); + Result = DAG.getNode(X86ISD::Wrapper, dl, getPointerTy(), Result); + + // With PIC, the address is actually $g + Offset. + if (IsPic && !Subtarget->isPICStyleRIPRel()) { + Result = DAG.getNode(ISD::ADD, dl, getPointerTy(), + DAG.getNode(X86ISD::GlobalBaseReg, dl, getPointerTy()), + Result); + } + + // For Darwin & Mingw32, external and weak symbols are indirect, so we want to + // load the value at address GV, not the value of GV itself. This means that + // the GlobalAddress must be in the base or index register of the address, not + // the GV offset field. Platform check is inside GVRequiresExtraLoad() call + // The same applies for external symbols during PIC codegen + if (ExtraLoadRequired) + Result = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Result, + PseudoSourceValue::getGOT(), 0); + + // If there was a non-zero offset that we didn't fold, create an explicit + // addition for it. + if (Offset != 0) + Result = DAG.getNode(ISD::ADD, dl, getPointerTy(), Result, + DAG.getConstant(Offset, getPointerTy())); + + return Result; +} + +SDValue +X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) { + const GlobalValue *GV = cast(Op)->getGlobal(); + int64_t Offset = cast(Op)->getOffset(); + return LowerGlobalAddress(GV, Op.getDebugLoc(), Offset, DAG); +} + +static SDValue +GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA, + SDValue *InFlag, const MVT PtrVT, unsigned ReturnReg) { + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); + DebugLoc dl = GA->getDebugLoc(); + SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), + GA->getValueType(0), + GA->getOffset()); + if (InFlag) { + SDValue Ops[] = { Chain, TGA, *InFlag }; + Chain = DAG.getNode(X86ISD::TLSADDR, dl, NodeTys, Ops, 3); + } else { + SDValue Ops[] = { Chain, TGA }; + Chain = DAG.getNode(X86ISD::TLSADDR, dl, NodeTys, Ops, 2); + } + SDValue Flag = Chain.getValue(1); + return DAG.getCopyFromReg(Chain, dl, ReturnReg, PtrVT, Flag); +} + +// Lower ISD::GlobalTLSAddress using the "general dynamic" model, 32 bit +static SDValue +LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA, SelectionDAG &DAG, + const MVT PtrVT) { + SDValue InFlag; + DebugLoc dl = GA->getDebugLoc(); // ? function entry point might be better + SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX, + DAG.getNode(X86ISD::GlobalBaseReg, + DebugLoc::getUnknownLoc(), + PtrVT), InFlag); + InFlag = Chain.getValue(1); + + return GetTLSADDR(DAG, Chain, GA, &InFlag, PtrVT, X86::EAX); +} + +// Lower ISD::GlobalTLSAddress using the "general dynamic" model, 64 bit +static SDValue +LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA, SelectionDAG &DAG, + const MVT PtrVT) { + return GetTLSADDR(DAG, DAG.getEntryNode(), GA, NULL, PtrVT, X86::RAX); +} + +// Lower ISD::GlobalTLSAddress using the "initial exec" (for no-pic) or +// "local exec" model. +static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, + const MVT PtrVT, TLSModel::Model model, + bool is64Bit) { + DebugLoc dl = GA->getDebugLoc(); + // Get the Thread Pointer + SDValue Base = DAG.getNode(X86ISD::SegmentBaseAddress, + DebugLoc::getUnknownLoc(), PtrVT, + DAG.getRegister(is64Bit? X86::FS : X86::GS, + MVT::i32)); + + SDValue ThreadPointer = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Base, + NULL, 0); + + // emit "addl x@ntpoff,%eax" (local exec) or "addl x@indntpoff,%eax" (initial + // exec) + SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), + GA->getValueType(0), + GA->getOffset()); + SDValue Offset = DAG.getNode(X86ISD::Wrapper, dl, PtrVT, TGA); + + if (model == TLSModel::InitialExec) + Offset = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Offset, + PseudoSourceValue::getGOT(), 0); + + // The address of the thread local variable is the add of the thread + // pointer with the offset of the variable. + return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset); +} + +SDValue +X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) { + // TODO: implement the "local dynamic" model + // TODO: implement the "initial exec"model for pic executables + assert(Subtarget->isTargetELF() && + "TLS not implemented for non-ELF targets"); + GlobalAddressSDNode *GA = cast(Op); + GlobalValue *GV = GA->getGlobal(); + TLSModel::Model model = + getTLSModel (GV, getTargetMachine().getRelocationModel()); + if (Subtarget->is64Bit()) { + switch (model) { + case TLSModel::GeneralDynamic: + case TLSModel::LocalDynamic: // not implemented + return LowerToTLSGeneralDynamicModel64(GA, DAG, getPointerTy()); + + case TLSModel::InitialExec: + case TLSModel::LocalExec: + return LowerToTLSExecModel(GA, DAG, getPointerTy(), model, true); + } + } else { + switch (model) { + case TLSModel::GeneralDynamic: + case TLSModel::LocalDynamic: // not implemented + return LowerToTLSGeneralDynamicModel32(GA, DAG, getPointerTy()); + + case TLSModel::InitialExec: + case TLSModel::LocalExec: + return LowerToTLSExecModel(GA, DAG, getPointerTy(), model, false); + } + } + assert(0 && "Unreachable"); + return SDValue(); +} + +SDValue +X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) { + // FIXME there isn't really any debug info here + DebugLoc dl = Op.getDebugLoc(); + const char *Sym = cast(Op)->getSymbol(); + SDValue Result = DAG.getTargetExternalSymbol(Sym, getPointerTy()); + Result = DAG.getNode(X86ISD::Wrapper, dl, getPointerTy(), Result); + // With PIC, the address is actually $g + Offset. + if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && + !Subtarget->isPICStyleRIPRel()) { + Result = DAG.getNode(ISD::ADD, dl, getPointerTy(), + DAG.getNode(X86ISD::GlobalBaseReg, + DebugLoc::getUnknownLoc(), + getPointerTy()), + Result); + } + + return Result; +} + +SDValue X86TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) { + JumpTableSDNode *JT = cast(Op); + // FIXME there isn't really any debug into here + DebugLoc dl = JT->getDebugLoc(); + SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), getPointerTy()); + Result = DAG.getNode(X86ISD::Wrapper, dl, getPointerTy(), Result); + // With PIC, the address is actually $g + Offset. + if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && + !Subtarget->isPICStyleRIPRel()) { + Result = DAG.getNode(ISD::ADD, dl, getPointerTy(), + DAG.getNode(X86ISD::GlobalBaseReg, + DebugLoc::getUnknownLoc(), + getPointerTy()), + Result); + } + + return Result; +} + +/// LowerShift - Lower SRA_PARTS and friends, which return two i32 values and +/// take a 2 x i32 value to shift plus a shift amount. +SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) { + assert(Op.getNumOperands() == 3 && "Not a double-shift!"); + MVT VT = Op.getValueType(); + unsigned VTBits = VT.getSizeInBits(); + DebugLoc dl = Op.getDebugLoc(); + bool isSRA = Op.getOpcode() == ISD::SRA_PARTS; + SDValue ShOpLo = Op.getOperand(0); + SDValue ShOpHi = Op.getOperand(1); + SDValue ShAmt = Op.getOperand(2); + SDValue Tmp1 = isSRA ? + DAG.getNode(ISD::SRA, dl, VT, ShOpHi, + DAG.getConstant(VTBits - 1, MVT::i8)) : + DAG.getConstant(0, VT); + + SDValue Tmp2, Tmp3; + if (Op.getOpcode() == ISD::SHL_PARTS) { + Tmp2 = DAG.getNode(X86ISD::SHLD, dl, VT, ShOpHi, ShOpLo, ShAmt); + Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); + } else { + Tmp2 = DAG.getNode(X86ISD::SHRD, dl, VT, ShOpLo, ShOpHi, ShAmt); + Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, dl, VT, ShOpHi, ShAmt); + } + + SDValue AndNode = DAG.getNode(ISD::AND, dl, MVT::i8, ShAmt, + DAG.getConstant(VTBits, MVT::i8)); + SDValue Cond = DAG.getNode(X86ISD::CMP, dl, VT, + AndNode, DAG.getConstant(0, MVT::i8)); + + SDValue Hi, Lo; + SDValue CC = DAG.getConstant(X86::COND_NE, MVT::i8); + SDValue Ops0[4] = { Tmp2, Tmp3, CC, Cond }; + SDValue Ops1[4] = { Tmp3, Tmp1, CC, Cond }; + + if (Op.getOpcode() == ISD::SHL_PARTS) { + Hi = DAG.getNode(X86ISD::CMOV, dl, VT, Ops0, 4); + Lo = DAG.getNode(X86ISD::CMOV, dl, VT, Ops1, 4); + } else { + Lo = DAG.getNode(X86ISD::CMOV, dl, VT, Ops0, 4); + Hi = DAG.getNode(X86ISD::CMOV, dl, VT, Ops1, 4); + } + + SDValue Ops[2] = { Lo, Hi }; + return DAG.getMergeValues(Ops, 2, dl); +} + +SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) { + MVT SrcVT = Op.getOperand(0).getValueType(); + assert(SrcVT.getSimpleVT() <= MVT::i64 && SrcVT.getSimpleVT() >= MVT::i16 && + "Unknown SINT_TO_FP to lower!"); + + // These are really Legal; return the operand so the caller accepts it as + // Legal. + if (SrcVT == MVT::i32 && isScalarFPTypeInSSEReg(Op.getValueType())) + return Op; + if (SrcVT == MVT::i64 && isScalarFPTypeInSSEReg(Op.getValueType()) && + Subtarget->is64Bit()) { + return Op; + } + + DebugLoc dl = Op.getDebugLoc(); + unsigned Size = SrcVT.getSizeInBits()/8; + MachineFunction &MF = DAG.getMachineFunction(); + int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size); + SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); + SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), + StackSlot, + PseudoSourceValue::getFixedStack(SSFI), 0); + return BuildFILD(Op, SrcVT, Chain, StackSlot, DAG); +} + +SDValue X86TargetLowering::BuildFILD(SDValue Op, MVT SrcVT, SDValue Chain, + SDValue StackSlot, + SelectionDAG &DAG) { + // Build the FILD + DebugLoc dl = Op.getDebugLoc(); + SDVTList Tys; + bool useSSE = isScalarFPTypeInSSEReg(Op.getValueType()); + if (useSSE) + Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Flag); + else + Tys = DAG.getVTList(Op.getValueType(), MVT::Other); + SmallVector Ops; + Ops.push_back(Chain); + Ops.push_back(StackSlot); + Ops.push_back(DAG.getValueType(SrcVT)); + SDValue Result = DAG.getNode(useSSE ? X86ISD::FILD_FLAG : X86ISD::FILD, dl, + Tys, &Ops[0], Ops.size()); + + if (useSSE) { + Chain = Result.getValue(1); + SDValue InFlag = Result.getValue(2); + + // FIXME: Currently the FST is flagged to the FILD_FLAG. This + // shouldn't be necessary except that RFP cannot be live across + // multiple blocks. When stackifier is fixed, they can be uncoupled. + MachineFunction &MF = DAG.getMachineFunction(); + int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); + SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); + Tys = DAG.getVTList(MVT::Other); + SmallVector Ops; + Ops.push_back(Chain); + Ops.push_back(Result); + Ops.push_back(StackSlot); + Ops.push_back(DAG.getValueType(Op.getValueType())); + Ops.push_back(InFlag); + Chain = DAG.getNode(X86ISD::FST, dl, Tys, &Ops[0], Ops.size()); + Result = DAG.getLoad(Op.getValueType(), dl, Chain, StackSlot, + PseudoSourceValue::getFixedStack(SSFI), 0); + } + + return Result; +} + +// LowerUINT_TO_FP_i64 - 64-bit unsigned integer to double expansion. +SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) { + // This algorithm is not obvious. Here it is in C code, more or less: + /* + double uint64_to_double( uint32_t hi, uint32_t lo ) { + static const __m128i exp = { 0x4330000045300000ULL, 0 }; + static const __m128d bias = { 0x1.0p84, 0x1.0p52 }; + + // Copy ints to xmm registers. + __m128i xh = _mm_cvtsi32_si128( hi ); + __m128i xl = _mm_cvtsi32_si128( lo ); + + // Combine into low half of a single xmm register. + __m128i x = _mm_unpacklo_epi32( xh, xl ); + __m128d d; + double sd; + + // Merge in appropriate exponents to give the integer bits the right + // magnitude. + x = _mm_unpacklo_epi32( x, exp ); + + // Subtract away the biases to deal with the IEEE-754 double precision + // implicit 1. + d = _mm_sub_pd( (__m128d) x, bias ); + + // All conversions up to here are exact. The correctly rounded result is + // calculated using the current rounding mode using the following + // horizontal add. + d = _mm_add_sd( d, _mm_unpackhi_pd( d, d ) ); + _mm_store_sd( &sd, d ); // Because we are returning doubles in XMM, this + // store doesn't really need to be here (except + // maybe to zero the other double) + return sd; + } + */ + + DebugLoc dl = Op.getDebugLoc(); + + // Build some magic constants. + std::vector CV0; + CV0.push_back(ConstantInt::get(APInt(32, 0x45300000))); + CV0.push_back(ConstantInt::get(APInt(32, 0x43300000))); + CV0.push_back(ConstantInt::get(APInt(32, 0))); + CV0.push_back(ConstantInt::get(APInt(32, 0))); + Constant *C0 = ConstantVector::get(CV0); + SDValue CPIdx0 = DAG.getConstantPool(C0, getPointerTy(), 16); + + std::vector CV1; + CV1.push_back(ConstantFP::get(APFloat(APInt(64, 0x4530000000000000ULL)))); + CV1.push_back(ConstantFP::get(APFloat(APInt(64, 0x4330000000000000ULL)))); + Constant *C1 = ConstantVector::get(CV1); + SDValue CPIdx1 = DAG.getConstantPool(C1, getPointerTy(), 16); + + SDValue XR1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, + DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, + Op.getOperand(0), + DAG.getIntPtrConstant(1))); + SDValue XR2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, + DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, + Op.getOperand(0), + DAG.getIntPtrConstant(0))); + SDValue Unpck1 = getUnpackl(DAG, dl, MVT::v4i32, XR1, XR2); + SDValue CLod0 = DAG.getLoad(MVT::v4i32, dl, DAG.getEntryNode(), CPIdx0, + PseudoSourceValue::getConstantPool(), 0, + false, 16); + SDValue Unpck2 = getUnpackl(DAG, dl, MVT::v4i32, Unpck1, CLod0); + SDValue XR2F = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Unpck2); + SDValue CLod1 = DAG.getLoad(MVT::v2f64, dl, CLod0.getValue(1), CPIdx1, + PseudoSourceValue::getConstantPool(), 0, + false, 16); + SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, XR2F, CLod1); + + // Add the halves; easiest way is to swap them into another reg first. + int ShufMask[2] = { 1, -1 }; + SDValue Shuf = DAG.getVectorShuffle(MVT::v2f64, dl, Sub, + DAG.getUNDEF(MVT::v2f64), ShufMask); + SDValue Add = DAG.getNode(ISD::FADD, dl, MVT::v2f64, Shuf, Sub); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Add, + DAG.getIntPtrConstant(0)); +} + +// LowerUINT_TO_FP_i32 - 32-bit unsigned integer to float expansion. +SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG) { + DebugLoc dl = Op.getDebugLoc(); + // FP constant to bias correct the final result. + SDValue Bias = DAG.getConstantFP(BitsToDouble(0x4330000000000000ULL), + MVT::f64); + + // Load the 32-bit value into an XMM register. + SDValue Load = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, + DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, + Op.getOperand(0), + DAG.getIntPtrConstant(0))); + + Load = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, + DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Load), + DAG.getIntPtrConstant(0)); + + // Or the load with the bias. + SDValue Or = DAG.getNode(ISD::OR, dl, MVT::v2i64, + DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64, + DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, + MVT::v2f64, Load)), + DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64, + DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, + MVT::v2f64, Bias))); + Or = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, + DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Or), + DAG.getIntPtrConstant(0)); + + // Subtract the bias. + SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Or, Bias); + + // Handle final rounding. + MVT DestVT = Op.getValueType(); + + if (DestVT.bitsLT(MVT::f64)) { + return DAG.getNode(ISD::FP_ROUND, dl, DestVT, Sub, + DAG.getIntPtrConstant(0)); + } else if (DestVT.bitsGT(MVT::f64)) { + return DAG.getNode(ISD::FP_EXTEND, dl, DestVT, Sub); + } + + // Handle final rounding. + return Sub; +} + +SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) { + SDValue N0 = Op.getOperand(0); + DebugLoc dl = Op.getDebugLoc(); + + // Now not UINT_TO_FP is legal (it's marked custom), dag combiner won't + // optimize it to a SINT_TO_FP when the sign bit is known zero. Perform + // the optimization here. + if (DAG.SignBitIsZero(N0)) + return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(), N0); + + MVT SrcVT = N0.getValueType(); + if (SrcVT == MVT::i64) { + // We only handle SSE2 f64 target here; caller can expand the rest. + if (Op.getValueType() != MVT::f64 || !X86ScalarSSEf64) + return SDValue(); + + return LowerUINT_TO_FP_i64(Op, DAG); + } else if (SrcVT == MVT::i32 && X86ScalarSSEf64) { + return LowerUINT_TO_FP_i32(Op, DAG); + } + + assert(SrcVT == MVT::i32 && "Unknown UINT_TO_FP to lower!"); + + // Make a 64-bit buffer, and use it to build an FILD. + SDValue StackSlot = DAG.CreateStackTemporary(MVT::i64); + SDValue WordOff = DAG.getConstant(4, getPointerTy()); + SDValue OffsetSlot = DAG.getNode(ISD::ADD, dl, + getPointerTy(), StackSlot, WordOff); + SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), + StackSlot, NULL, 0); + SDValue Store2 = DAG.getStore(Store1, dl, DAG.getConstant(0, MVT::i32), + OffsetSlot, NULL, 0); + return BuildFILD(Op, MVT::i64, Store2, StackSlot, DAG); +} + +std::pair X86TargetLowering:: +FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) { + DebugLoc dl = Op.getDebugLoc(); + + MVT DstTy = Op.getValueType(); + + if (!IsSigned) { + assert(DstTy == MVT::i32 && "Unexpected FP_TO_UINT"); + DstTy = MVT::i64; + } + + assert(DstTy.getSimpleVT() <= MVT::i64 && + DstTy.getSimpleVT() >= MVT::i16 && + "Unknown FP_TO_SINT to lower!"); + + // These are really Legal. + if (DstTy == MVT::i32 && + isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType())) + return std::make_pair(SDValue(), SDValue()); + if (Subtarget->is64Bit() && + DstTy == MVT::i64 && + isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType())) + return std::make_pair(SDValue(), SDValue()); + + // We lower FP->sint64 into FISTP64, followed by a load, all to a temporary + // stack slot. + MachineFunction &MF = DAG.getMachineFunction(); + unsigned MemSize = DstTy.getSizeInBits()/8; + int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); + SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); + + unsigned Opc; + switch (DstTy.getSimpleVT()) { + default: assert(0 && "Invalid FP_TO_SINT to lower!"); + case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break; + case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break; + case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break; + } + + SDValue Chain = DAG.getEntryNode(); + SDValue Value = Op.getOperand(0); + if (isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType())) { + assert(DstTy == MVT::i64 && "Invalid FP_TO_SINT to lower!"); + Chain = DAG.getStore(Chain, dl, Value, StackSlot, + PseudoSourceValue::getFixedStack(SSFI), 0); + SDVTList Tys = DAG.getVTList(Op.getOperand(0).getValueType(), MVT::Other); + SDValue Ops[] = { + Chain, StackSlot, DAG.getValueType(Op.getOperand(0).getValueType()) + }; + Value = DAG.getNode(X86ISD::FLD, dl, Tys, Ops, 3); + Chain = Value.getValue(1); + SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); + StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); + } + + // Build the FP_TO_INT*_IN_MEM + SDValue Ops[] = { Chain, Value, StackSlot }; + SDValue FIST = DAG.getNode(Opc, dl, MVT::Other, Ops, 3); + + return std::make_pair(FIST, StackSlot); +} + +SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) { + std::pair Vals = FP_TO_INTHelper(Op, DAG, true); + SDValue FIST = Vals.first, StackSlot = Vals.second; + // If FP_TO_INTHelper failed, the node is actually supposed to be Legal. + if (FIST.getNode() == 0) return Op; + + // Load the result. + return DAG.getLoad(Op.getValueType(), Op.getDebugLoc(), + FIST, StackSlot, NULL, 0); +} + +SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) { + std::pair Vals = FP_TO_INTHelper(Op, DAG, false); + SDValue FIST = Vals.first, StackSlot = Vals.second; + assert(FIST.getNode() && "Unexpected failure"); + + // Load the result. + return DAG.getLoad(Op.getValueType(), Op.getDebugLoc(), + FIST, StackSlot, NULL, 0); +} + +SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) { + DebugLoc dl = Op.getDebugLoc(); + MVT VT = Op.getValueType(); + MVT EltVT = VT; + if (VT.isVector()) + EltVT = VT.getVectorElementType(); + std::vector CV; + if (EltVT == MVT::f64) { + Constant *C = ConstantFP::get(APFloat(APInt(64, ~(1ULL << 63)))); + CV.push_back(C); + CV.push_back(C); + } else { + Constant *C = ConstantFP::get(APFloat(APInt(32, ~(1U << 31)))); + CV.push_back(C); + CV.push_back(C); + CV.push_back(C); + CV.push_back(C); + } + Constant *C = ConstantVector::get(CV); + SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); + SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, + PseudoSourceValue::getConstantPool(), 0, + false, 16); + return DAG.getNode(X86ISD::FAND, dl, VT, Op.getOperand(0), Mask); +} + +SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) { + DebugLoc dl = Op.getDebugLoc(); + MVT VT = Op.getValueType(); + MVT EltVT = VT; + unsigned EltNum = 1; + if (VT.isVector()) { + EltVT = VT.getVectorElementType(); + EltNum = VT.getVectorNumElements(); + } + std::vector CV; + if (EltVT == MVT::f64) { + Constant *C = ConstantFP::get(APFloat(APInt(64, 1ULL << 63))); + CV.push_back(C); + CV.push_back(C); + } else { + Constant *C = ConstantFP::get(APFloat(APInt(32, 1U << 31))); + CV.push_back(C); + CV.push_back(C); + CV.push_back(C); + CV.push_back(C); + } + Constant *C = ConstantVector::get(CV); + SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); + SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, + PseudoSourceValue::getConstantPool(), 0, + false, 16); + if (VT.isVector()) { + return DAG.getNode(ISD::BIT_CONVERT, dl, VT, + DAG.getNode(ISD::XOR, dl, MVT::v2i64, + DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64, + Op.getOperand(0)), + DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64, Mask))); + } else { + return DAG.getNode(X86ISD::FXOR, dl, VT, Op.getOperand(0), Mask); + } +} + +SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) { + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + DebugLoc dl = Op.getDebugLoc(); + MVT VT = Op.getValueType(); + MVT SrcVT = Op1.getValueType(); + + // If second operand is smaller, extend it first. + if (SrcVT.bitsLT(VT)) { + Op1 = DAG.getNode(ISD::FP_EXTEND, dl, VT, Op1); + SrcVT = VT; + } + // And if it is bigger, shrink it first. + if (SrcVT.bitsGT(VT)) { + Op1 = DAG.getNode(ISD::FP_ROUND, dl, VT, Op1, DAG.getIntPtrConstant(1)); + SrcVT = VT; + } + + // At this point the operands and the result should have the same + // type, and that won't be f80 since that is not custom lowered. + + // First get the sign bit of second operand. + std::vector CV; + if (SrcVT == MVT::f64) { + CV.push_back(ConstantFP::get(APFloat(APInt(64, 1ULL << 63)))); + CV.push_back(ConstantFP::get(APFloat(APInt(64, 0)))); + } else { + CV.push_back(ConstantFP::get(APFloat(APInt(32, 1U << 31)))); + CV.push_back(ConstantFP::get(APFloat(APInt(32, 0)))); + CV.push_back(ConstantFP::get(APFloat(APInt(32, 0)))); + CV.push_back(ConstantFP::get(APFloat(APInt(32, 0)))); + } + Constant *C = ConstantVector::get(CV); + SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); + SDValue Mask1 = DAG.getLoad(SrcVT, dl, DAG.getEntryNode(), CPIdx, + PseudoSourceValue::getConstantPool(), 0, + false, 16); + SDValue SignBit = DAG.getNode(X86ISD::FAND, dl, SrcVT, Op1, Mask1); + + // Shift sign bit right or left if the two operands have different types. + if (SrcVT.bitsGT(VT)) { + // Op0 is MVT::f32, Op1 is MVT::f64. + SignBit = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f64, SignBit); + SignBit = DAG.getNode(X86ISD::FSRL, dl, MVT::v2f64, SignBit, + DAG.getConstant(32, MVT::i32)); + SignBit = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32, SignBit); + SignBit = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, SignBit, + DAG.getIntPtrConstant(0)); + } + + // Clear first operand sign bit. + CV.clear(); + if (VT == MVT::f64) { + CV.push_back(ConstantFP::get(APFloat(APInt(64, ~(1ULL << 63))))); + CV.push_back(ConstantFP::get(APFloat(APInt(64, 0)))); + } else { + CV.push_back(ConstantFP::get(APFloat(APInt(32, ~(1U << 31))))); + CV.push_back(ConstantFP::get(APFloat(APInt(32, 0)))); + CV.push_back(ConstantFP::get(APFloat(APInt(32, 0)))); + CV.push_back(ConstantFP::get(APFloat(APInt(32, 0)))); + } + C = ConstantVector::get(CV); + CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); + SDValue Mask2 = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, + PseudoSourceValue::getConstantPool(), 0, + false, 16); + SDValue Val = DAG.getNode(X86ISD::FAND, dl, VT, Op0, Mask2); + + // Or the value with the sign bit. + return DAG.getNode(X86ISD::FOR, dl, VT, Val, SignBit); +} + +/// Emit nodes that will be selected as "test Op0,Op0", or something +/// equivalent. +SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, + SelectionDAG &DAG) { + DebugLoc dl = Op.getDebugLoc(); + + // CF and OF aren't always set the way we want. Determine which + // of these we need. + bool NeedCF = false; + bool NeedOF = false; + switch (X86CC) { + case X86::COND_A: case X86::COND_AE: + case X86::COND_B: case X86::COND_BE: + NeedCF = true; + break; + case X86::COND_G: case X86::COND_GE: + case X86::COND_L: case X86::COND_LE: + case X86::COND_O: case X86::COND_NO: + NeedOF = true; + break; + default: break; + } + + // See if we can use the EFLAGS value from the operand instead of + // doing a separate TEST. TEST always sets OF and CF to 0, so unless + // we prove that the arithmetic won't overflow, we can't use OF or CF. + if (Op.getResNo() == 0 && !NeedOF && !NeedCF) { + unsigned Opcode = 0; + unsigned NumOperands = 0; + switch (Op.getNode()->getOpcode()) { + case ISD::ADD: + // Due to an isel shortcoming, be conservative if this add is likely to + // be selected as part of a load-modify-store instruction. When the root + // node in a match is a store, isel doesn't know how to remap non-chain + // non-flag uses of other nodes in the match, such as the ADD in this + // case. This leads to the ADD being left around and reselected, with + // the result being two adds in the output. + for (SDNode::use_iterator UI = Op.getNode()->use_begin(), + UE = Op.getNode()->use_end(); UI != UE; ++UI) + if (UI->getOpcode() == ISD::STORE) + goto default_case; + if (ConstantSDNode *C = + dyn_cast(Op.getNode()->getOperand(1))) { + // An add of one will be selected as an INC. + if (C->getAPIntValue() == 1) { + Opcode = X86ISD::INC; + NumOperands = 1; + break; + } + // An add of negative one (subtract of one) will be selected as a DEC. + if (C->getAPIntValue().isAllOnesValue()) { + Opcode = X86ISD::DEC; + NumOperands = 1; + break; + } + } + // Otherwise use a regular EFLAGS-setting add. + Opcode = X86ISD::ADD; + NumOperands = 2; + break; + case ISD::SUB: + // Due to the ISEL shortcoming noted above, be conservative if this sub is + // likely to be selected as part of a load-modify-store instruction. + for (SDNode::use_iterator UI = Op.getNode()->use_begin(), + UE = Op.getNode()->use_end(); UI != UE; ++UI) + if (UI->getOpcode() == ISD::STORE) + goto default_case; + // Otherwise use a regular EFLAGS-setting sub. + Opcode = X86ISD::SUB; + NumOperands = 2; + break; + case X86ISD::ADD: + case X86ISD::SUB: + case X86ISD::INC: + case X86ISD::DEC: + return SDValue(Op.getNode(), 1); + default: + default_case: + break; + } + if (Opcode != 0) { + SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); + SmallVector Ops; + for (unsigned i = 0; i != NumOperands; ++i) + Ops.push_back(Op.getOperand(i)); + SDValue New = DAG.getNode(Opcode, dl, VTs, &Ops[0], NumOperands); + DAG.ReplaceAllUsesWith(Op, New); + return SDValue(New.getNode(), 1); + } + } + + // Otherwise just emit a CMP with 0, which is the TEST pattern. + return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op, + DAG.getConstant(0, Op.getValueType())); +} + +/// Emit nodes that will be selected as "cmp Op0,Op1", or something +/// equivalent. +SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, + SelectionDAG &DAG) { + if (ConstantSDNode *C = dyn_cast(Op1)) + if (C->getAPIntValue() == 0) + return EmitTest(Op0, X86CC, DAG); + + DebugLoc dl = Op0.getDebugLoc(); + return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op0, Op1); +} + +SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) { + assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer"); + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + DebugLoc dl = Op.getDebugLoc(); + ISD::CondCode CC = cast(Op.getOperand(2))->get(); + + // Lower (X & (1 << N)) == 0 to BT(X, N). + // Lower ((X >>u N) & 1) != 0 to BT(X, N). + // Lower ((X >>s N) & 1) != 0 to BT(X, N). + if (Op0.getOpcode() == ISD::AND && + Op0.hasOneUse() && + Op1.getOpcode() == ISD::Constant && + cast(Op1)->getZExtValue() == 0 && + (CC == ISD::SETEQ || CC == ISD::SETNE)) { + SDValue LHS, RHS; + if (Op0.getOperand(1).getOpcode() == ISD::SHL) { + if (ConstantSDNode *Op010C = + dyn_cast(Op0.getOperand(1).getOperand(0))) + if (Op010C->getZExtValue() == 1) { + LHS = Op0.getOperand(0); + RHS = Op0.getOperand(1).getOperand(1); + } + } else if (Op0.getOperand(0).getOpcode() == ISD::SHL) { + if (ConstantSDNode *Op000C = + dyn_cast(Op0.getOperand(0).getOperand(0))) + if (Op000C->getZExtValue() == 1) { + LHS = Op0.getOperand(1); + RHS = Op0.getOperand(0).getOperand(1); + } + } else if (Op0.getOperand(1).getOpcode() == ISD::Constant) { + ConstantSDNode *AndRHS = cast(Op0.getOperand(1)); + SDValue AndLHS = Op0.getOperand(0); + if (AndRHS->getZExtValue() == 1 && AndLHS.getOpcode() == ISD::SRL) { + LHS = AndLHS.getOperand(0); + RHS = AndLHS.getOperand(1); + } + } + + if (LHS.getNode()) { + // If LHS is i8, promote it to i16 with any_extend. There is no i8 BT + // instruction. Since the shift amount is in-range-or-undefined, we know + // that doing a bittest on the i16 value is ok. We extend to i32 because + // the encoding for the i16 version is larger than the i32 version. + if (LHS.getValueType() == MVT::i8) + LHS = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, LHS); + + // If the operand types disagree, extend the shift amount to match. Since + // BT ignores high bits (like shifts) we can use anyextend. + if (LHS.getValueType() != RHS.getValueType()) + RHS = DAG.getNode(ISD::ANY_EXTEND, dl, LHS.getValueType(), RHS); + + SDValue BT = DAG.getNode(X86ISD::BT, dl, MVT::i32, LHS, RHS); + unsigned Cond = CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B; + return DAG.getNode(X86ISD::SETCC, dl, MVT::i8, + DAG.getConstant(Cond, MVT::i8), BT); + } + } + + bool isFP = Op.getOperand(1).getValueType().isFloatingPoint(); + unsigned X86CC = TranslateX86CC(CC, isFP, Op0, Op1, DAG); + + SDValue Cond = EmitCmp(Op0, Op1, X86CC, DAG); + return DAG.getNode(X86ISD::SETCC, dl, MVT::i8, + DAG.getConstant(X86CC, MVT::i8), Cond); +} + +SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) { + SDValue Cond; + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + SDValue CC = Op.getOperand(2); + MVT VT = Op.getValueType(); + ISD::CondCode SetCCOpcode = cast(CC)->get(); + bool isFP = Op.getOperand(1).getValueType().isFloatingPoint(); + DebugLoc dl = Op.getDebugLoc(); + + if (isFP) { + unsigned SSECC = 8; + MVT VT0 = Op0.getValueType(); + assert(VT0 == MVT::v4f32 || VT0 == MVT::v2f64); + unsigned Opc = VT0 == MVT::v4f32 ? X86ISD::CMPPS : X86ISD::CMPPD; + bool Swap = false; + + switch (SetCCOpcode) { + default: break; + case ISD::SETOEQ: + case ISD::SETEQ: SSECC = 0; break; + case ISD::SETOGT: + case ISD::SETGT: Swap = true; // Fallthrough + case ISD::SETLT: + case ISD::SETOLT: SSECC = 1; break; + case ISD::SETOGE: + case ISD::SETGE: Swap = true; // Fallthrough + case ISD::SETLE: + case ISD::SETOLE: SSECC = 2; break; + case ISD::SETUO: SSECC = 3; break; + case ISD::SETUNE: + case ISD::SETNE: SSECC = 4; break; + case ISD::SETULE: Swap = true; + case ISD::SETUGE: SSECC = 5; break; + case ISD::SETULT: Swap = true; + case ISD::SETUGT: SSECC = 6; break; + case ISD::SETO: SSECC = 7; break; + } + if (Swap) + std::swap(Op0, Op1); + + // In the two special cases we can't handle, emit two comparisons. + if (SSECC == 8) { + if (SetCCOpcode == ISD::SETUEQ) { + SDValue UNORD, EQ; + UNORD = DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(3, MVT::i8)); + EQ = DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(0, MVT::i8)); + return DAG.getNode(ISD::OR, dl, VT, UNORD, EQ); + } + else if (SetCCOpcode == ISD::SETONE) { + SDValue ORD, NEQ; + ORD = DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(7, MVT::i8)); + NEQ = DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(4, MVT::i8)); + return DAG.getNode(ISD::AND, dl, VT, ORD, NEQ); + } + assert(0 && "Illegal FP comparison"); + } + // Handle all other FP comparisons here. + return DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(SSECC, MVT::i8)); + } + + // We are handling one of the integer comparisons here. Since SSE only has + // GT and EQ comparisons for integer, swapping operands and multiple + // operations may be required for some comparisons. + unsigned Opc = 0, EQOpc = 0, GTOpc = 0; + bool Swap = false, Invert = false, FlipSigns = false; + + switch (VT.getSimpleVT()) { + default: break; + case MVT::v16i8: EQOpc = X86ISD::PCMPEQB; GTOpc = X86ISD::PCMPGTB; break; + case MVT::v8i16: EQOpc = X86ISD::PCMPEQW; GTOpc = X86ISD::PCMPGTW; break; + case MVT::v4i32: EQOpc = X86ISD::PCMPEQD; GTOpc = X86ISD::PCMPGTD; break; + case MVT::v2i64: EQOpc = X86ISD::PCMPEQQ; GTOpc = X86ISD::PCMPGTQ; break; + } + + switch (SetCCOpcode) { + default: break; + case ISD::SETNE: Invert = true; + case ISD::SETEQ: Opc = EQOpc; break; + case ISD::SETLT: Swap = true; + case ISD::SETGT: Opc = GTOpc; break; + case ISD::SETGE: Swap = true; + case ISD::SETLE: Opc = GTOpc; Invert = true; break; + case ISD::SETULT: Swap = true; + case ISD::SETUGT: Opc = GTOpc; FlipSigns = true; break; + case ISD::SETUGE: Swap = true; + case ISD::SETULE: Opc = GTOpc; FlipSigns = true; Invert = true; break; + } + if (Swap) + std::swap(Op0, Op1); + + // Since SSE has no unsigned integer comparisons, we need to flip the sign + // bits of the inputs before performing those operations. + if (FlipSigns) { + MVT EltVT = VT.getVectorElementType(); + SDValue SignBit = DAG.getConstant(APInt::getSignBit(EltVT.getSizeInBits()), + EltVT); + std::vector SignBits(VT.getVectorNumElements(), SignBit); + SDValue SignVec = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &SignBits[0], + SignBits.size()); + Op0 = DAG.getNode(ISD::XOR, dl, VT, Op0, SignVec); + Op1 = DAG.getNode(ISD::XOR, dl, VT, Op1, SignVec); + } + + SDValue Result = DAG.getNode(Opc, dl, VT, Op0, Op1); + + // If the logical-not of the result is required, perform that now. + if (Invert) + Result = DAG.getNOT(dl, Result, VT); + + return Result; +} + +// isX86LogicalCmp - Return true if opcode is a X86 logical comparison. +static bool isX86LogicalCmp(SDValue Op) { + unsigned Opc = Op.getNode()->getOpcode(); + if (Opc == X86ISD::CMP || Opc == X86ISD::COMI || Opc == X86ISD::UCOMI) + return true; + if (Op.getResNo() == 1 && + (Opc == X86ISD::ADD || + Opc == X86ISD::SUB || + Opc == X86ISD::SMUL || + Opc == X86ISD::UMUL || + Opc == X86ISD::INC || + Opc == X86ISD::DEC)) + return true; + + return false; +} + +SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) { + bool addTest = true; + SDValue Cond = Op.getOperand(0); + DebugLoc dl = Op.getDebugLoc(); + SDValue CC; + + if (Cond.getOpcode() == ISD::SETCC) + Cond = LowerSETCC(Cond, DAG); + + // If condition flag is set by a X86ISD::CMP, then use it as the condition + // setting operand in place of the X86ISD::SETCC. + if (Cond.getOpcode() == X86ISD::SETCC) { + CC = Cond.getOperand(0); + + SDValue Cmp = Cond.getOperand(1); + unsigned Opc = Cmp.getOpcode(); + MVT VT = Op.getValueType(); + + bool IllegalFPCMov = false; + if (VT.isFloatingPoint() && !VT.isVector() && + !isScalarFPTypeInSSEReg(VT)) // FPStack? + IllegalFPCMov = !hasFPCMov(cast(CC)->getSExtValue()); + + if ((isX86LogicalCmp(Cmp) && !IllegalFPCMov) || + Opc == X86ISD::BT) { // FIXME + Cond = Cmp; + addTest = false; + } + } + + if (addTest) { + CC = DAG.getConstant(X86::COND_NE, MVT::i8); + Cond = EmitTest(Cond, X86::COND_NE, DAG); + } + + SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Flag); + SmallVector Ops; + // X86ISD::CMOV means set the result (which is operand 1) to the RHS if + // condition is true. + Ops.push_back(Op.getOperand(2)); + Ops.push_back(Op.getOperand(1)); + Ops.push_back(CC); + Ops.push_back(Cond); + return DAG.getNode(X86ISD::CMOV, dl, VTs, &Ops[0], Ops.size()); +} + +// isAndOrOfSingleUseSetCCs - Return true if node is an ISD::AND or +// ISD::OR of two X86ISD::SETCC nodes each of which has no other use apart +// from the AND / OR. +static bool isAndOrOfSetCCs(SDValue Op, unsigned &Opc) { + Opc = Op.getOpcode(); + if (Opc != ISD::OR && Opc != ISD::AND) + return false; + return (Op.getOperand(0).getOpcode() == X86ISD::SETCC && + Op.getOperand(0).hasOneUse() && + Op.getOperand(1).getOpcode() == X86ISD::SETCC && + Op.getOperand(1).hasOneUse()); +} + +// isXor1OfSetCC - Return true if node is an ISD::XOR of a X86ISD::SETCC and +// 1 and that the SETCC node has a single use. +static bool isXor1OfSetCC(SDValue Op) { + if (Op.getOpcode() != ISD::XOR) + return false; + ConstantSDNode *N1C = dyn_cast(Op.getOperand(1)); + if (N1C && N1C->getAPIntValue() == 1) { + return Op.getOperand(0).getOpcode() == X86ISD::SETCC && + Op.getOperand(0).hasOneUse(); + } + return false; +} + +SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) { + bool addTest = true; + SDValue Chain = Op.getOperand(0); + SDValue Cond = Op.getOperand(1); + SDValue Dest = Op.getOperand(2); + DebugLoc dl = Op.getDebugLoc(); + SDValue CC; + + if (Cond.getOpcode() == ISD::SETCC) + Cond = LowerSETCC(Cond, DAG); +#if 0 + // FIXME: LowerXALUO doesn't handle these!! + else if (Cond.getOpcode() == X86ISD::ADD || + Cond.getOpcode() == X86ISD::SUB || + Cond.getOpcode() == X86ISD::SMUL || + Cond.getOpcode() == X86ISD::UMUL) + Cond = LowerXALUO(Cond, DAG); +#endif + + // If condition flag is set by a X86ISD::CMP, then use it as the condition + // setting operand in place of the X86ISD::SETCC. + if (Cond.getOpcode() == X86ISD::SETCC) { + CC = Cond.getOperand(0); + + SDValue Cmp = Cond.getOperand(1); + unsigned Opc = Cmp.getOpcode(); + // FIXME: WHY THE SPECIAL CASING OF LogicalCmp?? + if (isX86LogicalCmp(Cmp) || Opc == X86ISD::BT) { + Cond = Cmp; + addTest = false; + } else { + switch (cast(CC)->getZExtValue()) { + default: break; + case X86::COND_O: + case X86::COND_B: + // These can only come from an arithmetic instruction with overflow, + // e.g. SADDO, UADDO. + Cond = Cond.getNode()->getOperand(1); + addTest = false; + break; + } + } + } else { + unsigned CondOpc; + if (Cond.hasOneUse() && isAndOrOfSetCCs(Cond, CondOpc)) { + SDValue Cmp = Cond.getOperand(0).getOperand(1); + if (CondOpc == ISD::OR) { + // Also, recognize the pattern generated by an FCMP_UNE. We can emit + // two branches instead of an explicit OR instruction with a + // separate test. + if (Cmp == Cond.getOperand(1).getOperand(1) && + isX86LogicalCmp(Cmp)) { + CC = Cond.getOperand(0).getOperand(0); + Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(), + Chain, Dest, CC, Cmp); + CC = Cond.getOperand(1).getOperand(0); + Cond = Cmp; + addTest = false; + } + } else { // ISD::AND + // Also, recognize the pattern generated by an FCMP_OEQ. We can emit + // two branches instead of an explicit AND instruction with a + // separate test. However, we only do this if this block doesn't + // have a fall-through edge, because this requires an explicit + // jmp when the condition is false. + if (Cmp == Cond.getOperand(1).getOperand(1) && + isX86LogicalCmp(Cmp) && + Op.getNode()->hasOneUse()) { + X86::CondCode CCode = + (X86::CondCode)Cond.getOperand(0).getConstantOperandVal(0); + CCode = X86::GetOppositeBranchCondition(CCode); + CC = DAG.getConstant(CCode, MVT::i8); + SDValue User = SDValue(*Op.getNode()->use_begin(), 0); + // Look for an unconditional branch following this conditional branch. + // We need this because we need to reverse the successors in order + // to implement FCMP_OEQ. + if (User.getOpcode() == ISD::BR) { + SDValue FalseBB = User.getOperand(1); + SDValue NewBR = + DAG.UpdateNodeOperands(User, User.getOperand(0), Dest); + assert(NewBR == User); + Dest = FalseBB; + + Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(), + Chain, Dest, CC, Cmp); + X86::CondCode CCode = + (X86::CondCode)Cond.getOperand(1).getConstantOperandVal(0); + CCode = X86::GetOppositeBranchCondition(CCode); + CC = DAG.getConstant(CCode, MVT::i8); + Cond = Cmp; + addTest = false; + } + } + } + } else if (Cond.hasOneUse() && isXor1OfSetCC(Cond)) { + // Recognize for xorb (setcc), 1 patterns. The xor inverts the condition. + // It should be transformed during dag combiner except when the condition + // is set by a arithmetics with overflow node. + X86::CondCode CCode = + (X86::CondCode)Cond.getOperand(0).getConstantOperandVal(0); + CCode = X86::GetOppositeBranchCondition(CCode); + CC = DAG.getConstant(CCode, MVT::i8); + Cond = Cond.getOperand(0).getOperand(1); + addTest = false; + } + } + + if (addTest) { + CC = DAG.getConstant(X86::COND_NE, MVT::i8); + Cond = EmitTest(Cond, X86::COND_NE, DAG); + } + return DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(), + Chain, Dest, CC, Cond); +} + + +// Lower dynamic stack allocation to _alloca call for Cygwin/Mingw targets. +// Calls to _alloca is needed to probe the stack when allocating more than 4k +// bytes in one go. Touching the stack at 4K increments is necessary to ensure +// that the guard pages used by the OS virtual memory manager are allocated in +// correct sequence. +SDValue +X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, + SelectionDAG &DAG) { + assert(Subtarget->isTargetCygMing() && + "This should be used only on Cygwin/Mingw targets"); + DebugLoc dl = Op.getDebugLoc(); + + // Get the inputs. + SDValue Chain = Op.getOperand(0); + SDValue Size = Op.getOperand(1); + // FIXME: Ensure alignment here + + SDValue Flag; + + MVT IntPtr = getPointerTy(); + MVT SPTy = Subtarget->is64Bit() ? MVT::i64 : MVT::i32; + + Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, true)); + + Chain = DAG.getCopyToReg(Chain, dl, X86::EAX, Size, Flag); + Flag = Chain.getValue(1); + + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); + SDValue Ops[] = { Chain, + DAG.getTargetExternalSymbol("_alloca", IntPtr), + DAG.getRegister(X86::EAX, IntPtr), + DAG.getRegister(X86StackPtr, SPTy), + Flag }; + Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops, 5); + Flag = Chain.getValue(1); + + Chain = DAG.getCALLSEQ_END(Chain, + DAG.getIntPtrConstant(0, true), + DAG.getIntPtrConstant(0, true), + Flag); + + Chain = DAG.getCopyFromReg(Chain, dl, X86StackPtr, SPTy).getValue(1); + + SDValue Ops1[2] = { Chain.getValue(0), Chain }; + return DAG.getMergeValues(Ops1, 2, dl); +} + +SDValue +X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, + SDValue Chain, + SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, + const Value *DstSV, + uint64_t DstSVOff) { + ConstantSDNode *ConstantSize = dyn_cast(Size); + + // If not DWORD aligned or size is more than the threshold, call the library. + // The libc version is likely to be faster for these cases. It can use the + // address value and run time information about the CPU. + if ((Align & 3) != 0 || + !ConstantSize || + ConstantSize->getZExtValue() > + getSubtarget()->getMaxInlineSizeThreshold()) { + SDValue InFlag(0, 0); + + // Check to see if there is a specialized entry-point for memory zeroing. + ConstantSDNode *V = dyn_cast(Src); + + if (const char *bzeroEntry = V && + V->isNullValue() ? Subtarget->getBZeroEntry() : 0) { + MVT IntPtr = getPointerTy(); + const Type *IntPtrTy = TD->getIntPtrType(); + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Node = Dst; + Entry.Ty = IntPtrTy; + Args.push_back(Entry); + Entry.Node = Size; + Args.push_back(Entry); + std::pair CallResult = + LowerCallTo(Chain, Type::VoidTy, false, false, false, false, + CallingConv::C, false, + DAG.getExternalSymbol(bzeroEntry, IntPtr), Args, DAG, dl); + return CallResult.second; + } + + // Otherwise have the target-independent code call memset. + return SDValue(); + } + + uint64_t SizeVal = ConstantSize->getZExtValue(); + SDValue InFlag(0, 0); + MVT AVT; + SDValue Count; + ConstantSDNode *ValC = dyn_cast(Src); + unsigned BytesLeft = 0; + bool TwoRepStos = false; + if (ValC) { + unsigned ValReg; + uint64_t Val = ValC->getZExtValue() & 255; + + // If the value is a constant, then we can potentially use larger sets. + switch (Align & 3) { + case 2: // WORD aligned + AVT = MVT::i16; + ValReg = X86::AX; + Val = (Val << 8) | Val; + break; + case 0: // DWORD aligned + AVT = MVT::i32; + ValReg = X86::EAX; + Val = (Val << 8) | Val; + Val = (Val << 16) | Val; + if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) { // QWORD aligned + AVT = MVT::i64; + ValReg = X86::RAX; + Val = (Val << 32) | Val; + } + break; + default: // Byte aligned + AVT = MVT::i8; + ValReg = X86::AL; + Count = DAG.getIntPtrConstant(SizeVal); + break; + } + + if (AVT.bitsGT(MVT::i8)) { + unsigned UBytes = AVT.getSizeInBits() / 8; + Count = DAG.getIntPtrConstant(SizeVal / UBytes); + BytesLeft = SizeVal % UBytes; + } + + Chain = DAG.getCopyToReg(Chain, dl, ValReg, DAG.getConstant(Val, AVT), + InFlag); + InFlag = Chain.getValue(1); + } else { + AVT = MVT::i8; + Count = DAG.getIntPtrConstant(SizeVal); + Chain = DAG.getCopyToReg(Chain, dl, X86::AL, Src, InFlag); + InFlag = Chain.getValue(1); + } + + Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX : + X86::ECX, + Count, InFlag); + InFlag = Chain.getValue(1); + Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI : + X86::EDI, + Dst, InFlag); + InFlag = Chain.getValue(1); + + SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); + SmallVector Ops; + Ops.push_back(Chain); + Ops.push_back(DAG.getValueType(AVT)); + Ops.push_back(InFlag); + Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, &Ops[0], Ops.size()); + + if (TwoRepStos) { + InFlag = Chain.getValue(1); + Count = Size; + MVT CVT = Count.getValueType(); + SDValue Left = DAG.getNode(ISD::AND, dl, CVT, Count, + DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT)); + Chain = DAG.getCopyToReg(Chain, dl, (CVT == MVT::i64) ? X86::RCX : + X86::ECX, + Left, InFlag); + InFlag = Chain.getValue(1); + Tys = DAG.getVTList(MVT::Other, MVT::Flag); + Ops.clear(); + Ops.push_back(Chain); + Ops.push_back(DAG.getValueType(MVT::i8)); + Ops.push_back(InFlag); + Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, &Ops[0], Ops.size()); + } else if (BytesLeft) { + // Handle the last 1 - 7 bytes. + unsigned Offset = SizeVal - BytesLeft; + MVT AddrVT = Dst.getValueType(); + MVT SizeVT = Size.getValueType(); + + Chain = DAG.getMemset(Chain, dl, + DAG.getNode(ISD::ADD, dl, AddrVT, Dst, + DAG.getConstant(Offset, AddrVT)), + Src, + DAG.getConstant(BytesLeft, SizeVT), + Align, DstSV, DstSVOff + Offset); + } + + // TODO: Use a Tokenfactor, as in memcpy, instead of a single chain. + return Chain; +} + +SDValue +X86TargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, + SDValue Chain, SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, + bool AlwaysInline, + const Value *DstSV, uint64_t DstSVOff, + const Value *SrcSV, uint64_t SrcSVOff) { + // This requires the copy size to be a constant, preferrably + // within a subtarget-specific limit. + ConstantSDNode *ConstantSize = dyn_cast(Size); + if (!ConstantSize) + return SDValue(); + uint64_t SizeVal = ConstantSize->getZExtValue(); + if (!AlwaysInline && SizeVal > getSubtarget()->getMaxInlineSizeThreshold()) + return SDValue(); + + /// If not DWORD aligned, call the library. + if ((Align & 3) != 0) + return SDValue(); + + // DWORD aligned + MVT AVT = MVT::i32; + if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) // QWORD aligned + AVT = MVT::i64; + + unsigned UBytes = AVT.getSizeInBits() / 8; + unsigned CountVal = SizeVal / UBytes; + SDValue Count = DAG.getIntPtrConstant(CountVal); + unsigned BytesLeft = SizeVal % UBytes; + + SDValue InFlag(0, 0); + Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX : + X86::ECX, + Count, InFlag); + InFlag = Chain.getValue(1); + Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI : + X86::EDI, + Dst, InFlag); + InFlag = Chain.getValue(1); + Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RSI : + X86::ESI, + Src, InFlag); + InFlag = Chain.getValue(1); + + SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); + SmallVector Ops; + Ops.push_back(Chain); + Ops.push_back(DAG.getValueType(AVT)); + Ops.push_back(InFlag); + SDValue RepMovs = DAG.getNode(X86ISD::REP_MOVS, dl, Tys, &Ops[0], Ops.size()); + + SmallVector Results; + Results.push_back(RepMovs); + if (BytesLeft) { + // Handle the last 1 - 7 bytes. + unsigned Offset = SizeVal - BytesLeft; + MVT DstVT = Dst.getValueType(); + MVT SrcVT = Src.getValueType(); + MVT SizeVT = Size.getValueType(); + Results.push_back(DAG.getMemcpy(Chain, dl, + DAG.getNode(ISD::ADD, dl, DstVT, Dst, + DAG.getConstant(Offset, DstVT)), + DAG.getNode(ISD::ADD, dl, SrcVT, Src, + DAG.getConstant(Offset, SrcVT)), + DAG.getConstant(BytesLeft, SizeVT), + Align, AlwaysInline, + DstSV, DstSVOff + Offset, + SrcSV, SrcSVOff + Offset)); + } + + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &Results[0], Results.size()); +} + +SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) { + const Value *SV = cast(Op.getOperand(2))->getValue(); + DebugLoc dl = Op.getDebugLoc(); + + if (!Subtarget->is64Bit()) { + // vastart just stores the address of the VarArgsFrameIndex slot into the + // memory location argument. + SDValue FR = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); + return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0); + } + + // __va_list_tag: + // gp_offset (0 - 6 * 8) + // fp_offset (48 - 48 + 8 * 16) + // overflow_arg_area (point to parameters coming in memory). + // reg_save_area + SmallVector MemOps; + SDValue FIN = Op.getOperand(1); + // Store gp_offset + SDValue Store = DAG.getStore(Op.getOperand(0), dl, + DAG.getConstant(VarArgsGPOffset, MVT::i32), + FIN, SV, 0); + MemOps.push_back(Store); + + // Store fp_offset + FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), + FIN, DAG.getIntPtrConstant(4)); + Store = DAG.getStore(Op.getOperand(0), dl, + DAG.getConstant(VarArgsFPOffset, MVT::i32), + FIN, SV, 0); + MemOps.push_back(Store); + + // Store ptr to overflow_arg_area + FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), + FIN, DAG.getIntPtrConstant(4)); + SDValue OVFIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); + Store = DAG.getStore(Op.getOperand(0), dl, OVFIN, FIN, SV, 0); + MemOps.push_back(Store); + + // Store ptr to reg_save_area. + FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), + FIN, DAG.getIntPtrConstant(8)); + SDValue RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy()); + Store = DAG.getStore(Op.getOperand(0), dl, RSFIN, FIN, SV, 0); + MemOps.push_back(Store); + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &MemOps[0], MemOps.size()); +} + +SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) { + // X86-64 va_list is a struct { i32, i32, i8*, i8* }. + assert(Subtarget->is64Bit() && "This code only handles 64-bit va_arg!"); + SDValue Chain = Op.getOperand(0); + SDValue SrcPtr = Op.getOperand(1); + SDValue SrcSV = Op.getOperand(2); + + assert(0 && "VAArgInst is not yet implemented for x86-64!"); + abort(); + return SDValue(); +} + +SDValue X86TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) { + // X86-64 va_list is a struct { i32, i32, i8*, i8* }. + assert(Subtarget->is64Bit() && "This code only handles 64-bit va_copy!"); + SDValue Chain = Op.getOperand(0); + SDValue DstPtr = Op.getOperand(1); + SDValue SrcPtr = Op.getOperand(2); + const Value *DstSV = cast(Op.getOperand(3))->getValue(); + const Value *SrcSV = cast(Op.getOperand(4))->getValue(); + DebugLoc dl = Op.getDebugLoc(); + + return DAG.getMemcpy(Chain, dl, DstPtr, SrcPtr, + DAG.getIntPtrConstant(24), 8, false, + DstSV, 0, SrcSV, 0); +} + +SDValue +X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { + DebugLoc dl = Op.getDebugLoc(); + unsigned IntNo = cast(Op.getOperand(0))->getZExtValue(); + switch (IntNo) { + default: return SDValue(); // Don't custom lower most intrinsics. + // Comparison intrinsics. + case Intrinsic::x86_sse_comieq_ss: + case Intrinsic::x86_sse_comilt_ss: + case Intrinsic::x86_sse_comile_ss: + case Intrinsic::x86_sse_comigt_ss: + case Intrinsic::x86_sse_comige_ss: + case Intrinsic::x86_sse_comineq_ss: + case Intrinsic::x86_sse_ucomieq_ss: + case Intrinsic::x86_sse_ucomilt_ss: + case Intrinsic::x86_sse_ucomile_ss: + case Intrinsic::x86_sse_ucomigt_ss: + case Intrinsic::x86_sse_ucomige_ss: + case Intrinsic::x86_sse_ucomineq_ss: + case Intrinsic::x86_sse2_comieq_sd: + case Intrinsic::x86_sse2_comilt_sd: + case Intrinsic::x86_sse2_comile_sd: + case Intrinsic::x86_sse2_comigt_sd: + case Intrinsic::x86_sse2_comige_sd: + case Intrinsic::x86_sse2_comineq_sd: + case Intrinsic::x86_sse2_ucomieq_sd: + case Intrinsic::x86_sse2_ucomilt_sd: + case Intrinsic::x86_sse2_ucomile_sd: + case Intrinsic::x86_sse2_ucomigt_sd: + case Intrinsic::x86_sse2_ucomige_sd: + case Intrinsic::x86_sse2_ucomineq_sd: { + unsigned Opc = 0; + ISD::CondCode CC = ISD::SETCC_INVALID; + switch (IntNo) { + default: break; + case Intrinsic::x86_sse_comieq_ss: + case Intrinsic::x86_sse2_comieq_sd: + Opc = X86ISD::COMI; + CC = ISD::SETEQ; + break; + case Intrinsic::x86_sse_comilt_ss: + case Intrinsic::x86_sse2_comilt_sd: + Opc = X86ISD::COMI; + CC = ISD::SETLT; + break; + case Intrinsic::x86_sse_comile_ss: + case Intrinsic::x86_sse2_comile_sd: + Opc = X86ISD::COMI; + CC = ISD::SETLE; + break; + case Intrinsic::x86_sse_comigt_ss: + case Intrinsic::x86_sse2_comigt_sd: + Opc = X86ISD::COMI; + CC = ISD::SETGT; + break; + case Intrinsic::x86_sse_comige_ss: + case Intrinsic::x86_sse2_comige_sd: + Opc = X86ISD::COMI; + CC = ISD::SETGE; + break; + case Intrinsic::x86_sse_comineq_ss: + case Intrinsic::x86_sse2_comineq_sd: + Opc = X86ISD::COMI; + CC = ISD::SETNE; + break; + case Intrinsic::x86_sse_ucomieq_ss: + case Intrinsic::x86_sse2_ucomieq_sd: + Opc = X86ISD::UCOMI; + CC = ISD::SETEQ; + break; + case Intrinsic::x86_sse_ucomilt_ss: + case Intrinsic::x86_sse2_ucomilt_sd: + Opc = X86ISD::UCOMI; + CC = ISD::SETLT; + break; + case Intrinsic::x86_sse_ucomile_ss: + case Intrinsic::x86_sse2_ucomile_sd: + Opc = X86ISD::UCOMI; + CC = ISD::SETLE; + break; + case Intrinsic::x86_sse_ucomigt_ss: + case Intrinsic::x86_sse2_ucomigt_sd: + Opc = X86ISD::UCOMI; + CC = ISD::SETGT; + break; + case Intrinsic::x86_sse_ucomige_ss: + case Intrinsic::x86_sse2_ucomige_sd: + Opc = X86ISD::UCOMI; + CC = ISD::SETGE; + break; + case Intrinsic::x86_sse_ucomineq_ss: + case Intrinsic::x86_sse2_ucomineq_sd: + Opc = X86ISD::UCOMI; + CC = ISD::SETNE; + break; + } + + SDValue LHS = Op.getOperand(1); + SDValue RHS = Op.getOperand(2); + unsigned X86CC = TranslateX86CC(CC, true, LHS, RHS, DAG); + SDValue Cond = DAG.getNode(Opc, dl, MVT::i32, LHS, RHS); + SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, + DAG.getConstant(X86CC, MVT::i8), Cond); + return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC); + } + + // Fix vector shift instructions where the last operand is a non-immediate + // i32 value. + case Intrinsic::x86_sse2_pslli_w: + case Intrinsic::x86_sse2_pslli_d: + case Intrinsic::x86_sse2_pslli_q: + case Intrinsic::x86_sse2_psrli_w: + case Intrinsic::x86_sse2_psrli_d: + case Intrinsic::x86_sse2_psrli_q: + case Intrinsic::x86_sse2_psrai_w: + case Intrinsic::x86_sse2_psrai_d: + case Intrinsic::x86_mmx_pslli_w: + case Intrinsic::x86_mmx_pslli_d: + case Intrinsic::x86_mmx_pslli_q: + case Intrinsic::x86_mmx_psrli_w: + case Intrinsic::x86_mmx_psrli_d: + case Intrinsic::x86_mmx_psrli_q: + case Intrinsic::x86_mmx_psrai_w: + case Intrinsic::x86_mmx_psrai_d: { + SDValue ShAmt = Op.getOperand(2); + if (isa(ShAmt)) + return SDValue(); + + unsigned NewIntNo = 0; + MVT ShAmtVT = MVT::v4i32; + switch (IntNo) { + case Intrinsic::x86_sse2_pslli_w: + NewIntNo = Intrinsic::x86_sse2_psll_w; + break; + case Intrinsic::x86_sse2_pslli_d: + NewIntNo = Intrinsic::x86_sse2_psll_d; + break; + case Intrinsic::x86_sse2_pslli_q: + NewIntNo = Intrinsic::x86_sse2_psll_q; + break; + case Intrinsic::x86_sse2_psrli_w: + NewIntNo = Intrinsic::x86_sse2_psrl_w; + break; + case Intrinsic::x86_sse2_psrli_d: + NewIntNo = Intrinsic::x86_sse2_psrl_d; + break; + case Intrinsic::x86_sse2_psrli_q: + NewIntNo = Intrinsic::x86_sse2_psrl_q; + break; + case Intrinsic::x86_sse2_psrai_w: + NewIntNo = Intrinsic::x86_sse2_psra_w; + break; + case Intrinsic::x86_sse2_psrai_d: + NewIntNo = Intrinsic::x86_sse2_psra_d; + break; + default: { + ShAmtVT = MVT::v2i32; + switch (IntNo) { + case Intrinsic::x86_mmx_pslli_w: + NewIntNo = Intrinsic::x86_mmx_psll_w; + break; + case Intrinsic::x86_mmx_pslli_d: + NewIntNo = Intrinsic::x86_mmx_psll_d; + break; + case Intrinsic::x86_mmx_pslli_q: + NewIntNo = Intrinsic::x86_mmx_psll_q; + break; + case Intrinsic::x86_mmx_psrli_w: + NewIntNo = Intrinsic::x86_mmx_psrl_w; + break; + case Intrinsic::x86_mmx_psrli_d: + NewIntNo = Intrinsic::x86_mmx_psrl_d; + break; + case Intrinsic::x86_mmx_psrli_q: + NewIntNo = Intrinsic::x86_mmx_psrl_q; + break; + case Intrinsic::x86_mmx_psrai_w: + NewIntNo = Intrinsic::x86_mmx_psra_w; + break; + case Intrinsic::x86_mmx_psrai_d: + NewIntNo = Intrinsic::x86_mmx_psra_d; + break; + default: abort(); // Can't reach here. + } + break; + } + } + MVT VT = Op.getValueType(); + ShAmt = DAG.getNode(ISD::BIT_CONVERT, dl, VT, + DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, ShAmtVT, ShAmt)); + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(NewIntNo, MVT::i32), + Op.getOperand(1), ShAmt); + } + } +} + +SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) { + unsigned Depth = cast(Op.getOperand(0))->getZExtValue(); + DebugLoc dl = Op.getDebugLoc(); + + if (Depth > 0) { + SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); + SDValue Offset = + DAG.getConstant(TD->getPointerSize(), + Subtarget->is64Bit() ? MVT::i64 : MVT::i32); + return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), + DAG.getNode(ISD::ADD, dl, getPointerTy(), + FrameAddr, Offset), + NULL, 0); + } + + // Just load the return address. + SDValue RetAddrFI = getReturnAddressFrameIndex(DAG); + return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), + RetAddrFI, NULL, 0); +} + +SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) { + MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + MFI->setFrameAddressIsTaken(true); + MVT VT = Op.getValueType(); + DebugLoc dl = Op.getDebugLoc(); // FIXME probably not meaningful + unsigned Depth = cast(Op.getOperand(0))->getZExtValue(); + unsigned FrameReg = Subtarget->is64Bit() ? X86::RBP : X86::EBP; + SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT); + while (Depth--) + FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0); + return FrameAddr; +} + +SDValue X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDValue Op, + SelectionDAG &DAG) { + return DAG.getIntPtrConstant(2*TD->getPointerSize()); +} + +SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) +{ + MachineFunction &MF = DAG.getMachineFunction(); + SDValue Chain = Op.getOperand(0); + SDValue Offset = Op.getOperand(1); + SDValue Handler = Op.getOperand(2); + DebugLoc dl = Op.getDebugLoc(); + + SDValue Frame = DAG.getRegister(Subtarget->is64Bit() ? X86::RBP : X86::EBP, + getPointerTy()); + unsigned StoreAddrReg = (Subtarget->is64Bit() ? X86::RCX : X86::ECX); + + SDValue StoreAddr = DAG.getNode(ISD::SUB, dl, getPointerTy(), Frame, + DAG.getIntPtrConstant(-TD->getPointerSize())); + StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), StoreAddr, Offset); + Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, NULL, 0); + Chain = DAG.getCopyToReg(Chain, dl, StoreAddrReg, StoreAddr); + MF.getRegInfo().addLiveOut(StoreAddrReg); + + return DAG.getNode(X86ISD::EH_RETURN, dl, + MVT::Other, + Chain, DAG.getRegister(StoreAddrReg, getPointerTy())); +} + +SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op, + SelectionDAG &DAG) { + SDValue Root = Op.getOperand(0); + SDValue Trmp = Op.getOperand(1); // trampoline + SDValue FPtr = Op.getOperand(2); // nested function + SDValue Nest = Op.getOperand(3); // 'nest' parameter value + DebugLoc dl = Op.getDebugLoc(); + + const Value *TrmpAddr = cast(Op.getOperand(4))->getValue(); + + const X86InstrInfo *TII = + ((X86TargetMachine&)getTargetMachine()).getInstrInfo(); + + if (Subtarget->is64Bit()) { + SDValue OutChains[6]; + + // Large code-model. + + const unsigned char JMP64r = TII->getBaseOpcodeFor(X86::JMP64r); + const unsigned char MOV64ri = TII->getBaseOpcodeFor(X86::MOV64ri); + + const unsigned char N86R10 = RegInfo->getX86RegNum(X86::R10); + const unsigned char N86R11 = RegInfo->getX86RegNum(X86::R11); + + const unsigned char REX_WB = 0x40 | 0x08 | 0x01; // REX prefix + + // Load the pointer to the nested function into R11. + unsigned OpCode = ((MOV64ri | N86R11) << 8) | REX_WB; // movabsq r11 + SDValue Addr = Trmp; + OutChains[0] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, MVT::i16), + Addr, TrmpAddr, 0); + + Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, + DAG.getConstant(2, MVT::i64)); + OutChains[1] = DAG.getStore(Root, dl, FPtr, Addr, TrmpAddr, 2, false, 2); + + // Load the 'nest' parameter value into R10. + // R10 is specified in X86CallingConv.td + OpCode = ((MOV64ri | N86R10) << 8) | REX_WB; // movabsq r10 + Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, + DAG.getConstant(10, MVT::i64)); + OutChains[2] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, MVT::i16), + Addr, TrmpAddr, 10); + + Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, + DAG.getConstant(12, MVT::i64)); + OutChains[3] = DAG.getStore(Root, dl, Nest, Addr, TrmpAddr, 12, false, 2); + + // Jump to the nested function. + OpCode = (JMP64r << 8) | REX_WB; // jmpq *... + Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, + DAG.getConstant(20, MVT::i64)); + OutChains[4] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, MVT::i16), + Addr, TrmpAddr, 20); + + unsigned char ModRM = N86R11 | (4 << 3) | (3 << 6); // ...r11 + Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, + DAG.getConstant(22, MVT::i64)); + OutChains[5] = DAG.getStore(Root, dl, DAG.getConstant(ModRM, MVT::i8), Addr, + TrmpAddr, 22); + + SDValue Ops[] = + { Trmp, DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 6) }; + return DAG.getMergeValues(Ops, 2, dl); + } else { + const Function *Func = + cast(cast(Op.getOperand(5))->getValue()); + unsigned CC = Func->getCallingConv(); + unsigned NestReg; + + switch (CC) { + default: + assert(0 && "Unsupported calling convention"); + case CallingConv::C: + case CallingConv::X86_StdCall: { + // Pass 'nest' parameter in ECX. + // Must be kept in sync with X86CallingConv.td + NestReg = X86::ECX; + + // Check that ECX wasn't needed by an 'inreg' parameter. + const FunctionType *FTy = Func->getFunctionType(); + const AttrListPtr &Attrs = Func->getAttributes(); + + if (!Attrs.isEmpty() && !Func->isVarArg()) { + unsigned InRegCount = 0; + unsigned Idx = 1; + + for (FunctionType::param_iterator I = FTy->param_begin(), + E = FTy->param_end(); I != E; ++I, ++Idx) + if (Attrs.paramHasAttr(Idx, Attribute::InReg)) + // FIXME: should only count parameters that are lowered to integers. + InRegCount += (TD->getTypeSizeInBits(*I) + 31) / 32; + + if (InRegCount > 2) { + cerr << "Nest register in use - reduce number of inreg parameters!\n"; + abort(); + } + } + break; + } + case CallingConv::X86_FastCall: + case CallingConv::Fast: + // Pass 'nest' parameter in EAX. + // Must be kept in sync with X86CallingConv.td + NestReg = X86::EAX; + break; + } + + SDValue OutChains[4]; + SDValue Addr, Disp; + + Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp, + DAG.getConstant(10, MVT::i32)); + Disp = DAG.getNode(ISD::SUB, dl, MVT::i32, FPtr, Addr); + + const unsigned char MOV32ri = TII->getBaseOpcodeFor(X86::MOV32ri); + const unsigned char N86Reg = RegInfo->getX86RegNum(NestReg); + OutChains[0] = DAG.getStore(Root, dl, + DAG.getConstant(MOV32ri|N86Reg, MVT::i8), + Trmp, TrmpAddr, 0); + + Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp, + DAG.getConstant(1, MVT::i32)); + OutChains[1] = DAG.getStore(Root, dl, Nest, Addr, TrmpAddr, 1, false, 1); + + const unsigned char JMP = TII->getBaseOpcodeFor(X86::JMP); + Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp, + DAG.getConstant(5, MVT::i32)); + OutChains[2] = DAG.getStore(Root, dl, DAG.getConstant(JMP, MVT::i8), Addr, + TrmpAddr, 5, false, 1); + + Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp, + DAG.getConstant(6, MVT::i32)); + OutChains[3] = DAG.getStore(Root, dl, Disp, Addr, TrmpAddr, 6, false, 1); + + SDValue Ops[] = + { Trmp, DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 4) }; + return DAG.getMergeValues(Ops, 2, dl); + } +} + +SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) { + /* + The rounding mode is in bits 11:10 of FPSR, and has the following + settings: + 00 Round to nearest + 01 Round to -inf + 10 Round to +inf + 11 Round to 0 + + FLT_ROUNDS, on the other hand, expects the following: + -1 Undefined + 0 Round to 0 + 1 Round to nearest + 2 Round to +inf + 3 Round to -inf + + To perform the conversion, we do: + (((((FPSR & 0x800) >> 11) | ((FPSR & 0x400) >> 9)) + 1) & 3) + */ + + MachineFunction &MF = DAG.getMachineFunction(); + const TargetMachine &TM = MF.getTarget(); + const TargetFrameInfo &TFI = *TM.getFrameInfo(); + unsigned StackAlignment = TFI.getStackAlignment(); + MVT VT = Op.getValueType(); + DebugLoc dl = Op.getDebugLoc(); + + // Save FP Control Word to stack slot + int SSFI = MF.getFrameInfo()->CreateStackObject(2, StackAlignment); + SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); + + SDValue Chain = DAG.getNode(X86ISD::FNSTCW16m, dl, MVT::Other, + DAG.getEntryNode(), StackSlot); + + // Load FP Control Word from stack slot + SDValue CWD = DAG.getLoad(MVT::i16, dl, Chain, StackSlot, NULL, 0); + + // Transform as necessary + SDValue CWD1 = + DAG.getNode(ISD::SRL, dl, MVT::i16, + DAG.getNode(ISD::AND, dl, MVT::i16, + CWD, DAG.getConstant(0x800, MVT::i16)), + DAG.getConstant(11, MVT::i8)); + SDValue CWD2 = + DAG.getNode(ISD::SRL, dl, MVT::i16, + DAG.getNode(ISD::AND, dl, MVT::i16, + CWD, DAG.getConstant(0x400, MVT::i16)), + DAG.getConstant(9, MVT::i8)); + + SDValue RetVal = + DAG.getNode(ISD::AND, dl, MVT::i16, + DAG.getNode(ISD::ADD, dl, MVT::i16, + DAG.getNode(ISD::OR, dl, MVT::i16, CWD1, CWD2), + DAG.getConstant(1, MVT::i16)), + DAG.getConstant(3, MVT::i16)); + + + return DAG.getNode((VT.getSizeInBits() < 16 ? + ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal); +} + +SDValue X86TargetLowering::LowerCTLZ(SDValue Op, SelectionDAG &DAG) { + MVT VT = Op.getValueType(); + MVT OpVT = VT; + unsigned NumBits = VT.getSizeInBits(); + DebugLoc dl = Op.getDebugLoc(); + + Op = Op.getOperand(0); + if (VT == MVT::i8) { + // Zero extend to i32 since there is not an i8 bsr. + OpVT = MVT::i32; + Op = DAG.getNode(ISD::ZERO_EXTEND, dl, OpVT, Op); + } + + // Issue a bsr (scan bits in reverse) which also sets EFLAGS. + SDVTList VTs = DAG.getVTList(OpVT, MVT::i32); + Op = DAG.getNode(X86ISD::BSR, dl, VTs, Op); + + // If src is zero (i.e. bsr sets ZF), returns NumBits. + SmallVector Ops; + Ops.push_back(Op); + Ops.push_back(DAG.getConstant(NumBits+NumBits-1, OpVT)); + Ops.push_back(DAG.getConstant(X86::COND_E, MVT::i8)); + Ops.push_back(Op.getValue(1)); + Op = DAG.getNode(X86ISD::CMOV, dl, OpVT, &Ops[0], 4); + + // Finally xor with NumBits-1. + Op = DAG.getNode(ISD::XOR, dl, OpVT, Op, DAG.getConstant(NumBits-1, OpVT)); + + if (VT == MVT::i8) + Op = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op); + return Op; +} + +SDValue X86TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) { + MVT VT = Op.getValueType(); + MVT OpVT = VT; + unsigned NumBits = VT.getSizeInBits(); + DebugLoc dl = Op.getDebugLoc(); + + Op = Op.getOperand(0); + if (VT == MVT::i8) { + OpVT = MVT::i32; + Op = DAG.getNode(ISD::ZERO_EXTEND, dl, OpVT, Op); + } + + // Issue a bsf (scan bits forward) which also sets EFLAGS. + SDVTList VTs = DAG.getVTList(OpVT, MVT::i32); + Op = DAG.getNode(X86ISD::BSF, dl, VTs, Op); + + // If src is zero (i.e. bsf sets ZF), returns NumBits. + SmallVector Ops; + Ops.push_back(Op); + Ops.push_back(DAG.getConstant(NumBits, OpVT)); + Ops.push_back(DAG.getConstant(X86::COND_E, MVT::i8)); + Ops.push_back(Op.getValue(1)); + Op = DAG.getNode(X86ISD::CMOV, dl, OpVT, &Ops[0], 4); + + if (VT == MVT::i8) + Op = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op); + return Op; +} + +SDValue X86TargetLowering::LowerMUL_V2I64(SDValue Op, SelectionDAG &DAG) { + MVT VT = Op.getValueType(); + assert(VT == MVT::v2i64 && "Only know how to lower V2I64 multiply"); + DebugLoc dl = Op.getDebugLoc(); + + // ulong2 Ahi = __builtin_ia32_psrlqi128( a, 32); + // ulong2 Bhi = __builtin_ia32_psrlqi128( b, 32); + // ulong2 AloBlo = __builtin_ia32_pmuludq128( a, b ); + // ulong2 AloBhi = __builtin_ia32_pmuludq128( a, Bhi ); + // ulong2 AhiBlo = __builtin_ia32_pmuludq128( Ahi, b ); + // + // AloBhi = __builtin_ia32_psllqi128( AloBhi, 32 ); + // AhiBlo = __builtin_ia32_psllqi128( AhiBlo, 32 ); + // return AloBlo + AloBhi + AhiBlo; + + SDValue A = Op.getOperand(0); + SDValue B = Op.getOperand(1); + + SDValue Ahi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(Intrinsic::x86_sse2_psrli_q, MVT::i32), + A, DAG.getConstant(32, MVT::i32)); + SDValue Bhi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(Intrinsic::x86_sse2_psrli_q, MVT::i32), + B, DAG.getConstant(32, MVT::i32)); + SDValue AloBlo = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(Intrinsic::x86_sse2_pmulu_dq, MVT::i32), + A, B); + SDValue AloBhi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(Intrinsic::x86_sse2_pmulu_dq, MVT::i32), + A, Bhi); + SDValue AhiBlo = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(Intrinsic::x86_sse2_pmulu_dq, MVT::i32), + Ahi, B); + AloBhi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(Intrinsic::x86_sse2_pslli_q, MVT::i32), + AloBhi, DAG.getConstant(32, MVT::i32)); + AhiBlo = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(Intrinsic::x86_sse2_pslli_q, MVT::i32), + AhiBlo, DAG.getConstant(32, MVT::i32)); + SDValue Res = DAG.getNode(ISD::ADD, dl, VT, AloBlo, AloBhi); + Res = DAG.getNode(ISD::ADD, dl, VT, Res, AhiBlo); + return Res; +} + + +SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) { + // Lower the "add/sub/mul with overflow" instruction into a regular ins plus + // a "setcc" instruction that checks the overflow flag. The "brcond" lowering + // looks for this combo and may remove the "setcc" instruction if the "setcc" + // has only one use. + SDNode *N = Op.getNode(); + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + unsigned BaseOp = 0; + unsigned Cond = 0; + DebugLoc dl = Op.getDebugLoc(); + + switch (Op.getOpcode()) { + default: assert(0 && "Unknown ovf instruction!"); + case ISD::SADDO: + // A subtract of one will be selected as a INC. Note that INC doesn't + // set CF, so we can't do this for UADDO. + if (ConstantSDNode *C = dyn_cast(Op)) + if (C->getAPIntValue() == 1) { + BaseOp = X86ISD::INC; + Cond = X86::COND_O; + break; + } + BaseOp = X86ISD::ADD; + Cond = X86::COND_O; + break; + case ISD::UADDO: + BaseOp = X86ISD::ADD; + Cond = X86::COND_B; + break; + case ISD::SSUBO: + // A subtract of one will be selected as a DEC. Note that DEC doesn't + // set CF, so we can't do this for USUBO. + if (ConstantSDNode *C = dyn_cast(Op)) + if (C->getAPIntValue() == 1) { + BaseOp = X86ISD::DEC; + Cond = X86::COND_O; + break; + } + BaseOp = X86ISD::SUB; + Cond = X86::COND_O; + break; + case ISD::USUBO: + BaseOp = X86ISD::SUB; + Cond = X86::COND_B; + break; + case ISD::SMULO: + BaseOp = X86ISD::SMUL; + Cond = X86::COND_O; + break; + case ISD::UMULO: + BaseOp = X86ISD::UMUL; + Cond = X86::COND_B; + break; + } + + // Also sets EFLAGS. + SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32); + SDValue Sum = DAG.getNode(BaseOp, dl, VTs, LHS, RHS); + + SDValue SetCC = + DAG.getNode(X86ISD::SETCC, dl, N->getValueType(1), + DAG.getConstant(Cond, MVT::i32), SDValue(Sum.getNode(), 1)); + + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), SetCC); + return Sum; +} + +SDValue X86TargetLowering::LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) { + MVT T = Op.getValueType(); + DebugLoc dl = Op.getDebugLoc(); + unsigned Reg = 0; + unsigned size = 0; + switch(T.getSimpleVT()) { + default: + assert(false && "Invalid value type!"); + case MVT::i8: Reg = X86::AL; size = 1; break; + case MVT::i16: Reg = X86::AX; size = 2; break; + case MVT::i32: Reg = X86::EAX; size = 4; break; + case MVT::i64: + assert(Subtarget->is64Bit() && "Node not type legal!"); + Reg = X86::RAX; size = 8; + break; + } + SDValue cpIn = DAG.getCopyToReg(Op.getOperand(0), dl, Reg, + Op.getOperand(2), SDValue()); + SDValue Ops[] = { cpIn.getValue(0), + Op.getOperand(1), + Op.getOperand(3), + DAG.getTargetConstant(size, MVT::i8), + cpIn.getValue(1) }; + SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); + SDValue Result = DAG.getNode(X86ISD::LCMPXCHG_DAG, dl, Tys, Ops, 5); + SDValue cpOut = + DAG.getCopyFromReg(Result.getValue(0), dl, Reg, T, Result.getValue(1)); + return cpOut; +} + +SDValue X86TargetLowering::LowerREADCYCLECOUNTER(SDValue Op, + SelectionDAG &DAG) { + assert(Subtarget->is64Bit() && "Result not type legalized?"); + SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); + SDValue TheChain = Op.getOperand(0); + DebugLoc dl = Op.getDebugLoc(); + SDValue rd = DAG.getNode(X86ISD::RDTSC_DAG, dl, Tys, &TheChain, 1); + SDValue rax = DAG.getCopyFromReg(rd, dl, X86::RAX, MVT::i64, rd.getValue(1)); + SDValue rdx = DAG.getCopyFromReg(rax.getValue(1), dl, X86::RDX, MVT::i64, + rax.getValue(2)); + SDValue Tmp = DAG.getNode(ISD::SHL, dl, MVT::i64, rdx, + DAG.getConstant(32, MVT::i8)); + SDValue Ops[] = { + DAG.getNode(ISD::OR, dl, MVT::i64, rax, Tmp), + rdx.getValue(1) + }; + return DAG.getMergeValues(Ops, 2, dl); +} + +SDValue X86TargetLowering::LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) { + SDNode *Node = Op.getNode(); + DebugLoc dl = Node->getDebugLoc(); + MVT T = Node->getValueType(0); + SDValue negOp = DAG.getNode(ISD::SUB, dl, T, + DAG.getConstant(0, T), Node->getOperand(2)); + return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, dl, + cast(Node)->getMemoryVT(), + Node->getOperand(0), + Node->getOperand(1), negOp, + cast(Node)->getSrcValue(), + cast(Node)->getAlignment()); +} + +/// LowerOperation - Provide custom lowering hooks for some operations. +/// +SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { + switch (Op.getOpcode()) { + default: assert(0 && "Should not custom lower this!"); + case ISD::ATOMIC_CMP_SWAP: return LowerCMP_SWAP(Op,DAG); + case ISD::ATOMIC_LOAD_SUB: return LowerLOAD_SUB(Op,DAG); + case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); + case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); + case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); + case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); + case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); + case ISD::ConstantPool: return LowerConstantPool(Op, DAG); + case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); + case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); + case ISD::ExternalSymbol: return LowerExternalSymbol(Op, DAG); + case ISD::SHL_PARTS: + case ISD::SRA_PARTS: + case ISD::SRL_PARTS: return LowerShift(Op, DAG); + case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); + case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG); + case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG); + case ISD::FP_TO_UINT: return LowerFP_TO_UINT(Op, DAG); + case ISD::FABS: return LowerFABS(Op, DAG); + case ISD::FNEG: return LowerFNEG(Op, DAG); + case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); + case ISD::SETCC: return LowerSETCC(Op, DAG); + case ISD::VSETCC: return LowerVSETCC(Op, DAG); + case ISD::SELECT: return LowerSELECT(Op, DAG); + case ISD::BRCOND: return LowerBRCOND(Op, DAG); + case ISD::JumpTable: return LowerJumpTable(Op, DAG); + case ISD::CALL: return LowerCALL(Op, DAG); + case ISD::RET: return LowerRET(Op, DAG); + case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG); + case ISD::VASTART: return LowerVASTART(Op, DAG); + case ISD::VAARG: return LowerVAARG(Op, DAG); + case ISD::VACOPY: return LowerVACOPY(Op, DAG); + case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); + case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); + case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); + case ISD::FRAME_TO_ARGS_OFFSET: + return LowerFRAME_TO_ARGS_OFFSET(Op, DAG); + case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); + case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG); + case ISD::TRAMPOLINE: return LowerTRAMPOLINE(Op, DAG); + case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); + case ISD::CTLZ: return LowerCTLZ(Op, DAG); + case ISD::CTTZ: return LowerCTTZ(Op, DAG); + case ISD::MUL: return LowerMUL_V2I64(Op, DAG); + case ISD::SADDO: + case ISD::UADDO: + case ISD::SSUBO: + case ISD::USUBO: + case ISD::SMULO: + case ISD::UMULO: return LowerXALUO(Op, DAG); + case ISD::READCYCLECOUNTER: return LowerREADCYCLECOUNTER(Op, DAG); + } +} + +void X86TargetLowering:: +ReplaceATOMIC_BINARY_64(SDNode *Node, SmallVectorImpl&Results, + SelectionDAG &DAG, unsigned NewOp) { + MVT T = Node->getValueType(0); + DebugLoc dl = Node->getDebugLoc(); + assert (T == MVT::i64 && "Only know how to expand i64 atomics"); + + SDValue Chain = Node->getOperand(0); + SDValue In1 = Node->getOperand(1); + SDValue In2L = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, + Node->getOperand(2), DAG.getIntPtrConstant(0)); + SDValue In2H = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, + Node->getOperand(2), DAG.getIntPtrConstant(1)); + // This is a generalized SDNode, not an AtomicSDNode, so it doesn't + // have a MemOperand. Pass the info through as a normal operand. + SDValue LSI = DAG.getMemOperand(cast(Node)->getMemOperand()); + SDValue Ops[] = { Chain, In1, In2L, In2H, LSI }; + SDVTList Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); + SDValue Result = DAG.getNode(NewOp, dl, Tys, Ops, 5); + SDValue OpsF[] = { Result.getValue(0), Result.getValue(1)}; + Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, OpsF, 2)); + Results.push_back(Result.getValue(2)); +} + +/// ReplaceNodeResults - Replace a node with an illegal result type +/// with a new node built out of custom code. +void X86TargetLowering::ReplaceNodeResults(SDNode *N, + SmallVectorImpl&Results, + SelectionDAG &DAG) { + DebugLoc dl = N->getDebugLoc(); + switch (N->getOpcode()) { + default: + assert(false && "Do not know how to custom type legalize this operation!"); + return; + case ISD::FP_TO_SINT: { + std::pair Vals = + FP_TO_INTHelper(SDValue(N, 0), DAG, true); + SDValue FIST = Vals.first, StackSlot = Vals.second; + if (FIST.getNode() != 0) { + MVT VT = N->getValueType(0); + // Return a load from the stack slot. + Results.push_back(DAG.getLoad(VT, dl, FIST, StackSlot, NULL, 0)); + } + return; + } + case ISD::READCYCLECOUNTER: { + SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); + SDValue TheChain = N->getOperand(0); + SDValue rd = DAG.getNode(X86ISD::RDTSC_DAG, dl, Tys, &TheChain, 1); + SDValue eax = DAG.getCopyFromReg(rd, dl, X86::EAX, MVT::i32, + rd.getValue(1)); + SDValue edx = DAG.getCopyFromReg(eax.getValue(1), dl, X86::EDX, MVT::i32, + eax.getValue(2)); + // Use a buildpair to merge the two 32-bit values into a 64-bit one. + SDValue Ops[] = { eax, edx }; + Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Ops, 2)); + Results.push_back(edx.getValue(1)); + return; + } + case ISD::ATOMIC_CMP_SWAP: { + MVT T = N->getValueType(0); + assert (T == MVT::i64 && "Only know how to expand i64 Cmp and Swap"); + SDValue cpInL, cpInH; + cpInL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(2), + DAG.getConstant(0, MVT::i32)); + cpInH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(2), + DAG.getConstant(1, MVT::i32)); + cpInL = DAG.getCopyToReg(N->getOperand(0), dl, X86::EAX, cpInL, SDValue()); + cpInH = DAG.getCopyToReg(cpInL.getValue(0), dl, X86::EDX, cpInH, + cpInL.getValue(1)); + SDValue swapInL, swapInH; + swapInL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(3), + DAG.getConstant(0, MVT::i32)); + swapInH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(3), + DAG.getConstant(1, MVT::i32)); + swapInL = DAG.getCopyToReg(cpInH.getValue(0), dl, X86::EBX, swapInL, + cpInH.getValue(1)); + swapInH = DAG.getCopyToReg(swapInL.getValue(0), dl, X86::ECX, swapInH, + swapInL.getValue(1)); + SDValue Ops[] = { swapInH.getValue(0), + N->getOperand(1), + swapInH.getValue(1) }; + SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); + SDValue Result = DAG.getNode(X86ISD::LCMPXCHG8_DAG, dl, Tys, Ops, 3); + SDValue cpOutL = DAG.getCopyFromReg(Result.getValue(0), dl, X86::EAX, + MVT::i32, Result.getValue(1)); + SDValue cpOutH = DAG.getCopyFromReg(cpOutL.getValue(1), dl, X86::EDX, + MVT::i32, cpOutL.getValue(2)); + SDValue OpsF[] = { cpOutL.getValue(0), cpOutH.getValue(0)}; + Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, OpsF, 2)); + Results.push_back(cpOutH.getValue(1)); + return; + } + case ISD::ATOMIC_LOAD_ADD: + ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMADD64_DAG); + return; + case ISD::ATOMIC_LOAD_AND: + ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMAND64_DAG); + return; + case ISD::ATOMIC_LOAD_NAND: + ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMNAND64_DAG); + return; + case ISD::ATOMIC_LOAD_OR: + ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMOR64_DAG); + return; + case ISD::ATOMIC_LOAD_SUB: + ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMSUB64_DAG); + return; + case ISD::ATOMIC_LOAD_XOR: + ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMXOR64_DAG); + return; + case ISD::ATOMIC_SWAP: + ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMSWAP64_DAG); + return; + } +} + +const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { + switch (Opcode) { + default: return NULL; + case X86ISD::BSF: return "X86ISD::BSF"; + case X86ISD::BSR: return "X86ISD::BSR"; + case X86ISD::SHLD: return "X86ISD::SHLD"; + case X86ISD::SHRD: return "X86ISD::SHRD"; + case X86ISD::FAND: return "X86ISD::FAND"; + case X86ISD::FOR: return "X86ISD::FOR"; + case X86ISD::FXOR: return "X86ISD::FXOR"; + case X86ISD::FSRL: return "X86ISD::FSRL"; + case X86ISD::FILD: return "X86ISD::FILD"; + case X86ISD::FILD_FLAG: return "X86ISD::FILD_FLAG"; + case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM"; + case X86ISD::FP_TO_INT32_IN_MEM: return "X86ISD::FP_TO_INT32_IN_MEM"; + case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM"; + case X86ISD::FLD: return "X86ISD::FLD"; + case X86ISD::FST: return "X86ISD::FST"; + case X86ISD::CALL: return "X86ISD::CALL"; + case X86ISD::TAILCALL: return "X86ISD::TAILCALL"; + case X86ISD::RDTSC_DAG: return "X86ISD::RDTSC_DAG"; + case X86ISD::BT: return "X86ISD::BT"; + case X86ISD::CMP: return "X86ISD::CMP"; + case X86ISD::COMI: return "X86ISD::COMI"; + case X86ISD::UCOMI: return "X86ISD::UCOMI"; + case X86ISD::SETCC: return "X86ISD::SETCC"; + case X86ISD::CMOV: return "X86ISD::CMOV"; + case X86ISD::BRCOND: return "X86ISD::BRCOND"; + case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG"; + case X86ISD::REP_STOS: return "X86ISD::REP_STOS"; + case X86ISD::REP_MOVS: return "X86ISD::REP_MOVS"; + case X86ISD::GlobalBaseReg: return "X86ISD::GlobalBaseReg"; + case X86ISD::Wrapper: return "X86ISD::Wrapper"; + case X86ISD::PEXTRB: return "X86ISD::PEXTRB"; + case X86ISD::PEXTRW: return "X86ISD::PEXTRW"; + case X86ISD::INSERTPS: return "X86ISD::INSERTPS"; + case X86ISD::PINSRB: return "X86ISD::PINSRB"; + case X86ISD::PINSRW: return "X86ISD::PINSRW"; + case X86ISD::PSHUFB: return "X86ISD::PSHUFB"; + case X86ISD::FMAX: return "X86ISD::FMAX"; + case X86ISD::FMIN: return "X86ISD::FMIN"; + case X86ISD::FRSQRT: return "X86ISD::FRSQRT"; + case X86ISD::FRCP: return "X86ISD::FRCP"; + case X86ISD::TLSADDR: return "X86ISD::TLSADDR"; + case X86ISD::SegmentBaseAddress: return "X86ISD::SegmentBaseAddress"; + case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN"; + case X86ISD::TC_RETURN: return "X86ISD::TC_RETURN"; + case X86ISD::FNSTCW16m: return "X86ISD::FNSTCW16m"; + case X86ISD::LCMPXCHG_DAG: return "X86ISD::LCMPXCHG_DAG"; + case X86ISD::LCMPXCHG8_DAG: return "X86ISD::LCMPXCHG8_DAG"; + case X86ISD::ATOMADD64_DAG: return "X86ISD::ATOMADD64_DAG"; + case X86ISD::ATOMSUB64_DAG: return "X86ISD::ATOMSUB64_DAG"; + case X86ISD::ATOMOR64_DAG: return "X86ISD::ATOMOR64_DAG"; + case X86ISD::ATOMXOR64_DAG: return "X86ISD::ATOMXOR64_DAG"; + case X86ISD::ATOMAND64_DAG: return "X86ISD::ATOMAND64_DAG"; + case X86ISD::ATOMNAND64_DAG: return "X86ISD::ATOMNAND64_DAG"; + case X86ISD::VZEXT_MOVL: return "X86ISD::VZEXT_MOVL"; + case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD"; + case X86ISD::VSHL: return "X86ISD::VSHL"; + case X86ISD::VSRL: return "X86ISD::VSRL"; + case X86ISD::CMPPD: return "X86ISD::CMPPD"; + case X86ISD::CMPPS: return "X86ISD::CMPPS"; + case X86ISD::PCMPEQB: return "X86ISD::PCMPEQB"; + case X86ISD::PCMPEQW: return "X86ISD::PCMPEQW"; + case X86ISD::PCMPEQD: return "X86ISD::PCMPEQD"; + case X86ISD::PCMPEQQ: return "X86ISD::PCMPEQQ"; + case X86ISD::PCMPGTB: return "X86ISD::PCMPGTB"; + case X86ISD::PCMPGTW: return "X86ISD::PCMPGTW"; + case X86ISD::PCMPGTD: return "X86ISD::PCMPGTD"; + case X86ISD::PCMPGTQ: return "X86ISD::PCMPGTQ"; + case X86ISD::ADD: return "X86ISD::ADD"; + case X86ISD::SUB: return "X86ISD::SUB"; + case X86ISD::SMUL: return "X86ISD::SMUL"; + case X86ISD::UMUL: return "X86ISD::UMUL"; + case X86ISD::INC: return "X86ISD::INC"; + case X86ISD::DEC: return "X86ISD::DEC"; + case X86ISD::MUL_IMM: return "X86ISD::MUL_IMM"; + } +} + +// isLegalAddressingMode - Return true if the addressing mode represented +// by AM is legal for this target, for a load/store of the specified type. +bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM, + const Type *Ty) const { + // X86 supports extremely general addressing modes. + + // X86 allows a sign-extended 32-bit immediate field as a displacement. + if (AM.BaseOffs <= -(1LL << 32) || AM.BaseOffs >= (1LL << 32)-1) + return false; + + if (AM.BaseGV) { + // We can only fold this if we don't need an extra load. + if (Subtarget->GVRequiresExtraLoad(AM.BaseGV, getTargetMachine(), false)) + return false; + // If BaseGV requires a register, we cannot also have a BaseReg. + if (Subtarget->GVRequiresRegister(AM.BaseGV, getTargetMachine(), false) && + AM.HasBaseReg) + return false; + + // X86-64 only supports addr of globals in small code model. + if (Subtarget->is64Bit()) { + if (getTargetMachine().getCodeModel() != CodeModel::Small) + return false; + // If lower 4G is not available, then we must use rip-relative addressing. + if (AM.BaseOffs || AM.Scale > 1) + return false; + } + } + + switch (AM.Scale) { + case 0: + case 1: + case 2: + case 4: + case 8: + // These scales always work. + break; + case 3: + case 5: + case 9: + // These scales are formed with basereg+scalereg. Only accept if there is + // no basereg yet. + if (AM.HasBaseReg) + return false; + break; + default: // Other stuff never works. + return false; + } + + return true; +} + + +bool X86TargetLowering::isTruncateFree(const Type *Ty1, const Type *Ty2) const { + if (!Ty1->isInteger() || !Ty2->isInteger()) + return false; + unsigned NumBits1 = Ty1->getPrimitiveSizeInBits(); + unsigned NumBits2 = Ty2->getPrimitiveSizeInBits(); + if (NumBits1 <= NumBits2) + return false; + return Subtarget->is64Bit() || NumBits1 < 64; +} + +bool X86TargetLowering::isTruncateFree(MVT VT1, MVT VT2) const { + if (!VT1.isInteger() || !VT2.isInteger()) + return false; + unsigned NumBits1 = VT1.getSizeInBits(); + unsigned NumBits2 = VT2.getSizeInBits(); + if (NumBits1 <= NumBits2) + return false; + return Subtarget->is64Bit() || NumBits1 < 64; +} + +bool X86TargetLowering::isZExtFree(const Type *Ty1, const Type *Ty2) const { + // x86-64 implicitly zero-extends 32-bit results in 64-bit registers. + return Ty1 == Type::Int32Ty && Ty2 == Type::Int64Ty && Subtarget->is64Bit(); +} + +bool X86TargetLowering::isZExtFree(MVT VT1, MVT VT2) const { + // x86-64 implicitly zero-extends 32-bit results in 64-bit registers. + return VT1 == MVT::i32 && VT2 == MVT::i64 && Subtarget->is64Bit(); +} + +bool X86TargetLowering::isNarrowingProfitable(MVT VT1, MVT VT2) const { + // i16 instructions are longer (0x66 prefix) and potentially slower. + return !(VT1 == MVT::i32 && VT2 == MVT::i16); +} + +/// isShuffleMaskLegal - Targets can use this to indicate that they only +/// support *some* VECTOR_SHUFFLE operations, those with specific masks. +/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values +/// are assumed to be legal. +bool +X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl &M, + MVT VT) const { + // Only do shuffles on 128-bit vector types for now. + if (VT.getSizeInBits() == 64) + return false; + + // FIXME: pshufb, blends, palignr, shifts. + return (VT.getVectorNumElements() == 2 || + ShuffleVectorSDNode::isSplatMask(&M[0], VT) || + isMOVLMask(M, VT) || + isSHUFPMask(M, VT) || + isPSHUFDMask(M, VT) || + isPSHUFHWMask(M, VT) || + isPSHUFLWMask(M, VT) || + isUNPCKLMask(M, VT) || + isUNPCKHMask(M, VT) || + isUNPCKL_v_undef_Mask(M, VT) || + isUNPCKH_v_undef_Mask(M, VT)); +} + +bool +X86TargetLowering::isVectorClearMaskLegal(const SmallVectorImpl &Mask, + MVT VT) const { + unsigned NumElts = VT.getVectorNumElements(); + // FIXME: This collection of masks seems suspect. + if (NumElts == 2) + return true; + if (NumElts == 4 && VT.getSizeInBits() == 128) { + return (isMOVLMask(Mask, VT) || + isCommutedMOVLMask(Mask, VT, true) || + isSHUFPMask(Mask, VT) || + isCommutedSHUFPMask(Mask, VT)); + } + return false; +} + +//===----------------------------------------------------------------------===// +// X86 Scheduler Hooks +//===----------------------------------------------------------------------===// + +// private utility function +MachineBasicBlock * +X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr, + MachineBasicBlock *MBB, + unsigned regOpc, + unsigned immOpc, + unsigned LoadOpc, + unsigned CXchgOpc, + unsigned copyOpc, + unsigned notOpc, + unsigned EAXreg, + TargetRegisterClass *RC, + bool invSrc) const { + // For the atomic bitwise operator, we generate + // thisMBB: + // newMBB: + // ld t1 = [bitinstr.addr] + // op t2 = t1, [bitinstr.val] + // mov EAX = t1 + // lcs dest = [bitinstr.addr], t2 [EAX is implicit] + // bz newMBB + // fallthrough -->nextMBB + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const BasicBlock *LLVM_BB = MBB->getBasicBlock(); + MachineFunction::iterator MBBIter = MBB; + ++MBBIter; + + /// First build the CFG + MachineFunction *F = MBB->getParent(); + MachineBasicBlock *thisMBB = MBB; + MachineBasicBlock *newMBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *nextMBB = F->CreateMachineBasicBlock(LLVM_BB); + F->insert(MBBIter, newMBB); + F->insert(MBBIter, nextMBB); + + // Move all successors to thisMBB to nextMBB + nextMBB->transferSuccessors(thisMBB); + + // Update thisMBB to fall through to newMBB + thisMBB->addSuccessor(newMBB); + + // newMBB jumps to itself and fall through to nextMBB + newMBB->addSuccessor(nextMBB); + newMBB->addSuccessor(newMBB); + + // Insert instructions into newMBB based on incoming instruction + assert(bInstr->getNumOperands() < X86AddrNumOperands + 4 && + "unexpected number of operands"); + DebugLoc dl = bInstr->getDebugLoc(); + MachineOperand& destOper = bInstr->getOperand(0); + MachineOperand* argOpers[2 + X86AddrNumOperands]; + int numArgs = bInstr->getNumOperands() - 1; + for (int i=0; i < numArgs; ++i) + argOpers[i] = &bInstr->getOperand(i+1); + + // x86 address has 4 operands: base, index, scale, and displacement + int lastAddrIndx = X86AddrNumOperands - 1; // [0,3] + int valArgIndx = lastAddrIndx + 1; + + unsigned t1 = F->getRegInfo().createVirtualRegister(RC); + MachineInstrBuilder MIB = BuildMI(newMBB, dl, TII->get(LoadOpc), t1); + for (int i=0; i <= lastAddrIndx; ++i) + (*MIB).addOperand(*argOpers[i]); + + unsigned tt = F->getRegInfo().createVirtualRegister(RC); + if (invSrc) { + MIB = BuildMI(newMBB, dl, TII->get(notOpc), tt).addReg(t1); + } + else + tt = t1; + + unsigned t2 = F->getRegInfo().createVirtualRegister(RC); + assert((argOpers[valArgIndx]->isReg() || + argOpers[valArgIndx]->isImm()) && + "invalid operand"); + if (argOpers[valArgIndx]->isReg()) + MIB = BuildMI(newMBB, dl, TII->get(regOpc), t2); + else + MIB = BuildMI(newMBB, dl, TII->get(immOpc), t2); + MIB.addReg(tt); + (*MIB).addOperand(*argOpers[valArgIndx]); + + MIB = BuildMI(newMBB, dl, TII->get(copyOpc), EAXreg); + MIB.addReg(t1); + + MIB = BuildMI(newMBB, dl, TII->get(CXchgOpc)); + for (int i=0; i <= lastAddrIndx; ++i) + (*MIB).addOperand(*argOpers[i]); + MIB.addReg(t2); + assert(bInstr->hasOneMemOperand() && "Unexpected number of memoperand"); + (*MIB).addMemOperand(*F, *bInstr->memoperands_begin()); + + MIB = BuildMI(newMBB, dl, TII->get(copyOpc), destOper.getReg()); + MIB.addReg(EAXreg); + + // insert branch + BuildMI(newMBB, dl, TII->get(X86::JNE)).addMBB(newMBB); + + F->DeleteMachineInstr(bInstr); // The pseudo instruction is gone now. + return nextMBB; +} + +// private utility function: 64 bit atomics on 32 bit host. +MachineBasicBlock * +X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr, + MachineBasicBlock *MBB, + unsigned regOpcL, + unsigned regOpcH, + unsigned immOpcL, + unsigned immOpcH, + bool invSrc) const { + // For the atomic bitwise operator, we generate + // thisMBB (instructions are in pairs, except cmpxchg8b) + // ld t1,t2 = [bitinstr.addr] + // newMBB: + // out1, out2 = phi (thisMBB, t1/t2) (newMBB, t3/t4) + // op t5, t6 <- out1, out2, [bitinstr.val] + // (for SWAP, substitute: mov t5, t6 <- [bitinstr.val]) + // mov ECX, EBX <- t5, t6 + // mov EAX, EDX <- t1, t2 + // cmpxchg8b [bitinstr.addr] [EAX, EDX, EBX, ECX implicit] + // mov t3, t4 <- EAX, EDX + // bz newMBB + // result in out1, out2 + // fallthrough -->nextMBB + + const TargetRegisterClass *RC = X86::GR32RegisterClass; + const unsigned LoadOpc = X86::MOV32rm; + const unsigned copyOpc = X86::MOV32rr; + const unsigned NotOpc = X86::NOT32r; + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const BasicBlock *LLVM_BB = MBB->getBasicBlock(); + MachineFunction::iterator MBBIter = MBB; + ++MBBIter; + + /// First build the CFG + MachineFunction *F = MBB->getParent(); + MachineBasicBlock *thisMBB = MBB; + MachineBasicBlock *newMBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *nextMBB = F->CreateMachineBasicBlock(LLVM_BB); + F->insert(MBBIter, newMBB); + F->insert(MBBIter, nextMBB); + + // Move all successors to thisMBB to nextMBB + nextMBB->transferSuccessors(thisMBB); + + // Update thisMBB to fall through to newMBB + thisMBB->addSuccessor(newMBB); + + // newMBB jumps to itself and fall through to nextMBB + newMBB->addSuccessor(nextMBB); + newMBB->addSuccessor(newMBB); + + DebugLoc dl = bInstr->getDebugLoc(); + // Insert instructions into newMBB based on incoming instruction + // There are 8 "real" operands plus 9 implicit def/uses, ignored here. + assert(bInstr->getNumOperands() < X86AddrNumOperands + 14 && + "unexpected number of operands"); + MachineOperand& dest1Oper = bInstr->getOperand(0); + MachineOperand& dest2Oper = bInstr->getOperand(1); + MachineOperand* argOpers[2 + X86AddrNumOperands]; + for (int i=0; i < 2 + X86AddrNumOperands; ++i) + argOpers[i] = &bInstr->getOperand(i+2); + + // x86 address has 4 operands: base, index, scale, and displacement + int lastAddrIndx = X86AddrNumOperands - 1; // [0,3] + + unsigned t1 = F->getRegInfo().createVirtualRegister(RC); + MachineInstrBuilder MIB = BuildMI(thisMBB, dl, TII->get(LoadOpc), t1); + for (int i=0; i <= lastAddrIndx; ++i) + (*MIB).addOperand(*argOpers[i]); + unsigned t2 = F->getRegInfo().createVirtualRegister(RC); + MIB = BuildMI(thisMBB, dl, TII->get(LoadOpc), t2); + // add 4 to displacement. + for (int i=0; i <= lastAddrIndx-2; ++i) + (*MIB).addOperand(*argOpers[i]); + MachineOperand newOp3 = *(argOpers[3]); + if (newOp3.isImm()) + newOp3.setImm(newOp3.getImm()+4); + else + newOp3.setOffset(newOp3.getOffset()+4); + (*MIB).addOperand(newOp3); + (*MIB).addOperand(*argOpers[lastAddrIndx]); + + // t3/4 are defined later, at the bottom of the loop + unsigned t3 = F->getRegInfo().createVirtualRegister(RC); + unsigned t4 = F->getRegInfo().createVirtualRegister(RC); + BuildMI(newMBB, dl, TII->get(X86::PHI), dest1Oper.getReg()) + .addReg(t1).addMBB(thisMBB).addReg(t3).addMBB(newMBB); + BuildMI(newMBB, dl, TII->get(X86::PHI), dest2Oper.getReg()) + .addReg(t2).addMBB(thisMBB).addReg(t4).addMBB(newMBB); + + unsigned tt1 = F->getRegInfo().createVirtualRegister(RC); + unsigned tt2 = F->getRegInfo().createVirtualRegister(RC); + if (invSrc) { + MIB = BuildMI(newMBB, dl, TII->get(NotOpc), tt1).addReg(t1); + MIB = BuildMI(newMBB, dl, TII->get(NotOpc), tt2).addReg(t2); + } else { + tt1 = t1; + tt2 = t2; + } + + int valArgIndx = lastAddrIndx + 1; + assert((argOpers[valArgIndx]->isReg() || + argOpers[valArgIndx]->isImm()) && + "invalid operand"); + unsigned t5 = F->getRegInfo().createVirtualRegister(RC); + unsigned t6 = F->getRegInfo().createVirtualRegister(RC); + if (argOpers[valArgIndx]->isReg()) + MIB = BuildMI(newMBB, dl, TII->get(regOpcL), t5); + else + MIB = BuildMI(newMBB, dl, TII->get(immOpcL), t5); + if (regOpcL != X86::MOV32rr) + MIB.addReg(tt1); + (*MIB).addOperand(*argOpers[valArgIndx]); + assert(argOpers[valArgIndx + 1]->isReg() == + argOpers[valArgIndx]->isReg()); + assert(argOpers[valArgIndx + 1]->isImm() == + argOpers[valArgIndx]->isImm()); + if (argOpers[valArgIndx + 1]->isReg()) + MIB = BuildMI(newMBB, dl, TII->get(regOpcH), t6); + else + MIB = BuildMI(newMBB, dl, TII->get(immOpcH), t6); + if (regOpcH != X86::MOV32rr) + MIB.addReg(tt2); + (*MIB).addOperand(*argOpers[valArgIndx + 1]); + + MIB = BuildMI(newMBB, dl, TII->get(copyOpc), X86::EAX); + MIB.addReg(t1); + MIB = BuildMI(newMBB, dl, TII->get(copyOpc), X86::EDX); + MIB.addReg(t2); + + MIB = BuildMI(newMBB, dl, TII->get(copyOpc), X86::EBX); + MIB.addReg(t5); + MIB = BuildMI(newMBB, dl, TII->get(copyOpc), X86::ECX); + MIB.addReg(t6); + + MIB = BuildMI(newMBB, dl, TII->get(X86::LCMPXCHG8B)); + for (int i=0; i <= lastAddrIndx; ++i) + (*MIB).addOperand(*argOpers[i]); + + assert(bInstr->hasOneMemOperand() && "Unexpected number of memoperand"); + (*MIB).addMemOperand(*F, *bInstr->memoperands_begin()); + + MIB = BuildMI(newMBB, dl, TII->get(copyOpc), t3); + MIB.addReg(X86::EAX); + MIB = BuildMI(newMBB, dl, TII->get(copyOpc), t4); + MIB.addReg(X86::EDX); + + // insert branch + BuildMI(newMBB, dl, TII->get(X86::JNE)).addMBB(newMBB); + + F->DeleteMachineInstr(bInstr); // The pseudo instruction is gone now. + return nextMBB; +} + +// private utility function +MachineBasicBlock * +X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr, + MachineBasicBlock *MBB, + unsigned cmovOpc) const { + // For the atomic min/max operator, we generate + // thisMBB: + // newMBB: + // ld t1 = [min/max.addr] + // mov t2 = [min/max.val] + // cmp t1, t2 + // cmov[cond] t2 = t1 + // mov EAX = t1 + // lcs dest = [bitinstr.addr], t2 [EAX is implicit] + // bz newMBB + // fallthrough -->nextMBB + // + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const BasicBlock *LLVM_BB = MBB->getBasicBlock(); + MachineFunction::iterator MBBIter = MBB; + ++MBBIter; + + /// First build the CFG + MachineFunction *F = MBB->getParent(); + MachineBasicBlock *thisMBB = MBB; + MachineBasicBlock *newMBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *nextMBB = F->CreateMachineBasicBlock(LLVM_BB); + F->insert(MBBIter, newMBB); + F->insert(MBBIter, nextMBB); + + // Move all successors to thisMBB to nextMBB + nextMBB->transferSuccessors(thisMBB); + + // Update thisMBB to fall through to newMBB + thisMBB->addSuccessor(newMBB); + + // newMBB jumps to newMBB and fall through to nextMBB + newMBB->addSuccessor(nextMBB); + newMBB->addSuccessor(newMBB); + + DebugLoc dl = mInstr->getDebugLoc(); + // Insert instructions into newMBB based on incoming instruction + assert(mInstr->getNumOperands() < X86AddrNumOperands + 4 && + "unexpected number of operands"); + MachineOperand& destOper = mInstr->getOperand(0); + MachineOperand* argOpers[2 + X86AddrNumOperands]; + int numArgs = mInstr->getNumOperands() - 1; + for (int i=0; i < numArgs; ++i) + argOpers[i] = &mInstr->getOperand(i+1); + + // x86 address has 4 operands: base, index, scale, and displacement + int lastAddrIndx = X86AddrNumOperands - 1; // [0,3] + int valArgIndx = lastAddrIndx + 1; + + unsigned t1 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass); + MachineInstrBuilder MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rm), t1); + for (int i=0; i <= lastAddrIndx; ++i) + (*MIB).addOperand(*argOpers[i]); + + // We only support register and immediate values + assert((argOpers[valArgIndx]->isReg() || + argOpers[valArgIndx]->isImm()) && + "invalid operand"); + + unsigned t2 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass); + if (argOpers[valArgIndx]->isReg()) + MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rr), t2); + else + MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rr), t2); + (*MIB).addOperand(*argOpers[valArgIndx]); + + MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rr), X86::EAX); + MIB.addReg(t1); + + MIB = BuildMI(newMBB, dl, TII->get(X86::CMP32rr)); + MIB.addReg(t1); + MIB.addReg(t2); + + // Generate movc + unsigned t3 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass); + MIB = BuildMI(newMBB, dl, TII->get(cmovOpc),t3); + MIB.addReg(t2); + MIB.addReg(t1); + + // Cmp and exchange if none has modified the memory location + MIB = BuildMI(newMBB, dl, TII->get(X86::LCMPXCHG32)); + for (int i=0; i <= lastAddrIndx; ++i) + (*MIB).addOperand(*argOpers[i]); + MIB.addReg(t3); + assert(mInstr->hasOneMemOperand() && "Unexpected number of memoperand"); + (*MIB).addMemOperand(*F, *mInstr->memoperands_begin()); + + MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rr), destOper.getReg()); + MIB.addReg(X86::EAX); + + // insert branch + BuildMI(newMBB, dl, TII->get(X86::JNE)).addMBB(newMBB); + + F->DeleteMachineInstr(mInstr); // The pseudo instruction is gone now. + return nextMBB; +} + + +MachineBasicBlock * +X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *BB) const { + DebugLoc dl = MI->getDebugLoc(); + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + switch (MI->getOpcode()) { + default: assert(false && "Unexpected instr type to insert"); + case X86::CMOV_V1I64: + case X86::CMOV_FR32: + case X86::CMOV_FR64: + case X86::CMOV_V4F32: + case X86::CMOV_V2F64: + case X86::CMOV_V2I64: { + // To "insert" a SELECT_CC instruction, we actually have to insert the + // diamond control-flow pattern. The incoming instruction knows the + // destination vreg to set, the condition code register to branch on, the + // true/false values to select between, and a branch opcode to use. + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction::iterator It = BB; + ++It; + + // thisMBB: + // ... + // TrueVal = ... + // cmpTY ccX, r1, r2 + // bCC copy1MBB + // fallthrough --> copy0MBB + MachineBasicBlock *thisMBB = BB; + MachineFunction *F = BB->getParent(); + MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); + unsigned Opc = + X86::GetCondBranchFromCond((X86::CondCode)MI->getOperand(3).getImm()); + BuildMI(BB, dl, TII->get(Opc)).addMBB(sinkMBB); + F->insert(It, copy0MBB); + F->insert(It, sinkMBB); + // Update machine-CFG edges by transferring all successors of the current + // block to the new block which will contain the Phi node for the select. + sinkMBB->transferSuccessors(BB); + + // Add the true and fallthrough blocks as its successors. + BB->addSuccessor(copy0MBB); + BB->addSuccessor(sinkMBB); + + // copy0MBB: + // %FalseValue = ... + // # fallthrough to sinkMBB + BB = copy0MBB; + + // Update machine-CFG edges + BB->addSuccessor(sinkMBB); + + // sinkMBB: + // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] + // ... + BB = sinkMBB; + BuildMI(BB, dl, TII->get(X86::PHI), MI->getOperand(0).getReg()) + .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB) + .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); + + F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. + return BB; + } + + case X86::FP32_TO_INT16_IN_MEM: + case X86::FP32_TO_INT32_IN_MEM: + case X86::FP32_TO_INT64_IN_MEM: + case X86::FP64_TO_INT16_IN_MEM: + case X86::FP64_TO_INT32_IN_MEM: + case X86::FP64_TO_INT64_IN_MEM: + case X86::FP80_TO_INT16_IN_MEM: + case X86::FP80_TO_INT32_IN_MEM: + case X86::FP80_TO_INT64_IN_MEM: { + // Change the floating point control register to use "round towards zero" + // mode when truncating to an integer value. + MachineFunction *F = BB->getParent(); + int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2); + addFrameReference(BuildMI(BB, dl, TII->get(X86::FNSTCW16m)), CWFrameIdx); + + // Load the old value of the high byte of the control word... + unsigned OldCW = + F->getRegInfo().createVirtualRegister(X86::GR16RegisterClass); + addFrameReference(BuildMI(BB, dl, TII->get(X86::MOV16rm), OldCW), + CWFrameIdx); + + // Set the high part to be round to zero... + addFrameReference(BuildMI(BB, dl, TII->get(X86::MOV16mi)), CWFrameIdx) + .addImm(0xC7F); + + // Reload the modified control word now... + addFrameReference(BuildMI(BB, dl, TII->get(X86::FLDCW16m)), CWFrameIdx); + + // Restore the memory image of control word to original value + addFrameReference(BuildMI(BB, dl, TII->get(X86::MOV16mr)), CWFrameIdx) + .addReg(OldCW); + + // Get the X86 opcode to use. + unsigned Opc; + switch (MI->getOpcode()) { + default: assert(0 && "illegal opcode!"); + case X86::FP32_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m32; break; + case X86::FP32_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m32; break; + case X86::FP32_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m32; break; + case X86::FP64_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m64; break; + case X86::FP64_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m64; break; + case X86::FP64_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m64; break; + case X86::FP80_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m80; break; + case X86::FP80_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m80; break; + case X86::FP80_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m80; break; + } + + X86AddressMode AM; + MachineOperand &Op = MI->getOperand(0); + if (Op.isReg()) { + AM.BaseType = X86AddressMode::RegBase; + AM.Base.Reg = Op.getReg(); + } else { + AM.BaseType = X86AddressMode::FrameIndexBase; + AM.Base.FrameIndex = Op.getIndex(); + } + Op = MI->getOperand(1); + if (Op.isImm()) + AM.Scale = Op.getImm(); + Op = MI->getOperand(2); + if (Op.isImm()) + AM.IndexReg = Op.getImm(); + Op = MI->getOperand(3); + if (Op.isGlobal()) { + AM.GV = Op.getGlobal(); + } else { + AM.Disp = Op.getImm(); + } + addFullAddress(BuildMI(BB, dl, TII->get(Opc)), AM) + .addReg(MI->getOperand(X86AddrNumOperands).getReg()); + + // Reload the original control word now. + addFrameReference(BuildMI(BB, dl, TII->get(X86::FLDCW16m)), CWFrameIdx); + + F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. + return BB; + } + case X86::ATOMAND32: + return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr, + X86::AND32ri, X86::MOV32rm, + X86::LCMPXCHG32, X86::MOV32rr, + X86::NOT32r, X86::EAX, + X86::GR32RegisterClass); + case X86::ATOMOR32: + return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR32rr, + X86::OR32ri, X86::MOV32rm, + X86::LCMPXCHG32, X86::MOV32rr, + X86::NOT32r, X86::EAX, + X86::GR32RegisterClass); + case X86::ATOMXOR32: + return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR32rr, + X86::XOR32ri, X86::MOV32rm, + X86::LCMPXCHG32, X86::MOV32rr, + X86::NOT32r, X86::EAX, + X86::GR32RegisterClass); + case X86::ATOMNAND32: + return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr, + X86::AND32ri, X86::MOV32rm, + X86::LCMPXCHG32, X86::MOV32rr, + X86::NOT32r, X86::EAX, + X86::GR32RegisterClass, true); + case X86::ATOMMIN32: + return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL32rr); + case X86::ATOMMAX32: + return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVG32rr); + case X86::ATOMUMIN32: + return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVB32rr); + case X86::ATOMUMAX32: + return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVA32rr); + + case X86::ATOMAND16: + return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND16rr, + X86::AND16ri, X86::MOV16rm, + X86::LCMPXCHG16, X86::MOV16rr, + X86::NOT16r, X86::AX, + X86::GR16RegisterClass); + case X86::ATOMOR16: + return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR16rr, + X86::OR16ri, X86::MOV16rm, + X86::LCMPXCHG16, X86::MOV16rr, + X86::NOT16r, X86::AX, + X86::GR16RegisterClass); + case X86::ATOMXOR16: + return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR16rr, + X86::XOR16ri, X86::MOV16rm, + X86::LCMPXCHG16, X86::MOV16rr, + X86::NOT16r, X86::AX, + X86::GR16RegisterClass); + case X86::ATOMNAND16: + return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND16rr, + X86::AND16ri, X86::MOV16rm, + X86::LCMPXCHG16, X86::MOV16rr, + X86::NOT16r, X86::AX, + X86::GR16RegisterClass, true); + case X86::ATOMMIN16: + return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL16rr); + case X86::ATOMMAX16: + return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVG16rr); + case X86::ATOMUMIN16: + return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVB16rr); + case X86::ATOMUMAX16: + return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVA16rr); + + case X86::ATOMAND8: + return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND8rr, + X86::AND8ri, X86::MOV8rm, + X86::LCMPXCHG8, X86::MOV8rr, + X86::NOT8r, X86::AL, + X86::GR8RegisterClass); + case X86::ATOMOR8: + return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR8rr, + X86::OR8ri, X86::MOV8rm, + X86::LCMPXCHG8, X86::MOV8rr, + X86::NOT8r, X86::AL, + X86::GR8RegisterClass); + case X86::ATOMXOR8: + return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR8rr, + X86::XOR8ri, X86::MOV8rm, + X86::LCMPXCHG8, X86::MOV8rr, + X86::NOT8r, X86::AL, + X86::GR8RegisterClass); + case X86::ATOMNAND8: + return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND8rr, + X86::AND8ri, X86::MOV8rm, + X86::LCMPXCHG8, X86::MOV8rr, + X86::NOT8r, X86::AL, + X86::GR8RegisterClass, true); + // FIXME: There are no CMOV8 instructions; MIN/MAX need some other way. + // This group is for 64-bit host. + case X86::ATOMAND64: + return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND64rr, + X86::AND64ri32, X86::MOV64rm, + X86::LCMPXCHG64, X86::MOV64rr, + X86::NOT64r, X86::RAX, + X86::GR64RegisterClass); + case X86::ATOMOR64: + return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR64rr, + X86::OR64ri32, X86::MOV64rm, + X86::LCMPXCHG64, X86::MOV64rr, + X86::NOT64r, X86::RAX, + X86::GR64RegisterClass); + case X86::ATOMXOR64: + return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR64rr, + X86::XOR64ri32, X86::MOV64rm, + X86::LCMPXCHG64, X86::MOV64rr, + X86::NOT64r, X86::RAX, + X86::GR64RegisterClass); + case X86::ATOMNAND64: + return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND64rr, + X86::AND64ri32, X86::MOV64rm, + X86::LCMPXCHG64, X86::MOV64rr, + X86::NOT64r, X86::RAX, + X86::GR64RegisterClass, true); + case X86::ATOMMIN64: + return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL64rr); + case X86::ATOMMAX64: + return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVG64rr); + case X86::ATOMUMIN64: + return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVB64rr); + case X86::ATOMUMAX64: + return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVA64rr); + + // This group does 64-bit operations on a 32-bit host. + case X86::ATOMAND6432: + return EmitAtomicBit6432WithCustomInserter(MI, BB, + X86::AND32rr, X86::AND32rr, + X86::AND32ri, X86::AND32ri, + false); + case X86::ATOMOR6432: + return EmitAtomicBit6432WithCustomInserter(MI, BB, + X86::OR32rr, X86::OR32rr, + X86::OR32ri, X86::OR32ri, + false); + case X86::ATOMXOR6432: + return EmitAtomicBit6432WithCustomInserter(MI, BB, + X86::XOR32rr, X86::XOR32rr, + X86::XOR32ri, X86::XOR32ri, + false); + case X86::ATOMNAND6432: + return EmitAtomicBit6432WithCustomInserter(MI, BB, + X86::AND32rr, X86::AND32rr, + X86::AND32ri, X86::AND32ri, + true); + case X86::ATOMADD6432: + return EmitAtomicBit6432WithCustomInserter(MI, BB, + X86::ADD32rr, X86::ADC32rr, + X86::ADD32ri, X86::ADC32ri, + false); + case X86::ATOMSUB6432: + return EmitAtomicBit6432WithCustomInserter(MI, BB, + X86::SUB32rr, X86::SBB32rr, + X86::SUB32ri, X86::SBB32ri, + false); + case X86::ATOMSWAP6432: + return EmitAtomicBit6432WithCustomInserter(MI, BB, + X86::MOV32rr, X86::MOV32rr, + X86::MOV32ri, X86::MOV32ri, + false); + } +} + +//===----------------------------------------------------------------------===// +// X86 Optimization Hooks +//===----------------------------------------------------------------------===// + +void X86TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, + const APInt &Mask, + APInt &KnownZero, + APInt &KnownOne, + const SelectionDAG &DAG, + unsigned Depth) const { + unsigned Opc = Op.getOpcode(); + assert((Opc >= ISD::BUILTIN_OP_END || + Opc == ISD::INTRINSIC_WO_CHAIN || + Opc == ISD::INTRINSIC_W_CHAIN || + Opc == ISD::INTRINSIC_VOID) && + "Should use MaskedValueIsZero if you don't know whether Op" + " is a target node!"); + + KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); // Don't know anything. + switch (Opc) { + default: break; + case X86ISD::ADD: + case X86ISD::SUB: + case X86ISD::SMUL: + case X86ISD::UMUL: + case X86ISD::INC: + case X86ISD::DEC: + // These nodes' second result is a boolean. + if (Op.getResNo() == 0) + break; + // Fallthrough + case X86ISD::SETCC: + KnownZero |= APInt::getHighBitsSet(Mask.getBitWidth(), + Mask.getBitWidth() - 1); + break; + } +} + +/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the +/// node is a GlobalAddress + offset. +bool X86TargetLowering::isGAPlusOffset(SDNode *N, + GlobalValue* &GA, int64_t &Offset) const{ + if (N->getOpcode() == X86ISD::Wrapper) { + if (isa(N->getOperand(0))) { + GA = cast(N->getOperand(0))->getGlobal(); + Offset = cast(N->getOperand(0))->getOffset(); + return true; + } + } + return TargetLowering::isGAPlusOffset(N, GA, Offset); +} + +static bool isBaseAlignmentOfN(unsigned N, SDNode *Base, + const TargetLowering &TLI) { + GlobalValue *GV; + int64_t Offset = 0; + if (TLI.isGAPlusOffset(Base, GV, Offset)) + return (GV->getAlignment() >= N && (Offset % N) == 0); + // DAG combine handles the stack object case. + return false; +} + +static bool EltsFromConsecutiveLoads(ShuffleVectorSDNode *N, unsigned NumElems, + MVT EVT, SDNode *&Base, + SelectionDAG &DAG, MachineFrameInfo *MFI, + const TargetLowering &TLI) { + Base = NULL; + for (unsigned i = 0; i < NumElems; ++i) { + if (N->getMaskElt(i) < 0) { + if (!Base) + return false; + continue; + } + + SDValue Elt = DAG.getShuffleScalarElt(N, i); + if (!Elt.getNode() || + (Elt.getOpcode() != ISD::UNDEF && !ISD::isNON_EXTLoad(Elt.getNode()))) + return false; + if (!Base) { + Base = Elt.getNode(); + if (Base->getOpcode() == ISD::UNDEF) + return false; + continue; + } + if (Elt.getOpcode() == ISD::UNDEF) + continue; + + if (!TLI.isConsecutiveLoad(Elt.getNode(), Base, + EVT.getSizeInBits()/8, i, MFI)) + return false; + } + return true; +} + +/// PerformShuffleCombine - Combine a vector_shuffle that is equal to +/// build_vector load1, load2, load3, load4, <0, 1, 2, 3> into a 128-bit load +/// if the load addresses are consecutive, non-overlapping, and in the right +/// order. In the case of v2i64, it will see if it can rewrite the +/// shuffle to be an appropriate build vector so it can take advantage of +// performBuildVectorCombine. +static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, + const TargetLowering &TLI) { + DebugLoc dl = N->getDebugLoc(); + MVT VT = N->getValueType(0); + MVT EVT = VT.getVectorElementType(); + ShuffleVectorSDNode *SVN = cast(N); + unsigned NumElems = VT.getVectorNumElements(); + + // For x86-32 machines, if we see an insert and then a shuffle in a v2i64 + // where the upper half is 0, it is advantageous to rewrite it as a build + // vector of (0, val) so it can use movq. + if (VT == MVT::v2i64) { + SDValue In[2]; + In[0] = N->getOperand(0); + In[1] = N->getOperand(1); + int Idx0 = SVN->getMaskElt(0); + int Idx1 = SVN->getMaskElt(1); + // FIXME: can we take advantage of undef index? + if (Idx0 >= 0 && Idx1 >= 0 && + In[Idx0/2].getOpcode() == ISD::INSERT_VECTOR_ELT && + In[Idx1/2].getOpcode() == ISD::BUILD_VECTOR) { + ConstantSDNode* InsertVecIdx = + dyn_cast(In[Idx0/2].getOperand(2)); + if (InsertVecIdx && + InsertVecIdx->getZExtValue() == (unsigned)(Idx0 % 2) && + isZeroNode(In[Idx1/2].getOperand(Idx1 % 2))) { + return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, + In[Idx0/2].getOperand(1), + In[Idx1/2].getOperand(Idx1 % 2)); + } + } + } + + // Try to combine a vector_shuffle into a 128-bit load. + MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + SDNode *Base = NULL; + if (!EltsFromConsecutiveLoads(SVN, NumElems, EVT, Base, DAG, MFI, TLI)) + return SDValue(); + + LoadSDNode *LD = cast(Base); + if (isBaseAlignmentOfN(16, Base->getOperand(1).getNode(), TLI)) + return DAG.getLoad(VT, dl, LD->getChain(), LD->getBasePtr(), + LD->getSrcValue(), LD->getSrcValueOffset(), + LD->isVolatile()); + return DAG.getLoad(VT, dl, LD->getChain(), LD->getBasePtr(), + LD->getSrcValue(), LD->getSrcValueOffset(), + LD->isVolatile(), LD->getAlignment()); +} + +/// PerformBuildVectorCombine - build_vector 0,(load i64 / f64) -> movq / movsd. +static SDValue PerformBuildVectorCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const X86Subtarget *Subtarget, + const TargetLowering &TLI) { + unsigned NumOps = N->getNumOperands(); + DebugLoc dl = N->getDebugLoc(); + + // Ignore single operand BUILD_VECTOR. + if (NumOps == 1) + return SDValue(); + + MVT VT = N->getValueType(0); + MVT EVT = VT.getVectorElementType(); + if ((EVT != MVT::i64 && EVT != MVT::f64) || Subtarget->is64Bit()) + // We are looking for load i64 and zero extend. We want to transform + // it before legalizer has a chance to expand it. Also look for i64 + // BUILD_PAIR bit casted to f64. + return SDValue(); + // This must be an insertion into a zero vector. + SDValue HighElt = N->getOperand(1); + if (!isZeroNode(HighElt)) + return SDValue(); + + // Value must be a load. + SDNode *Base = N->getOperand(0).getNode(); + if (!isa(Base)) { + if (Base->getOpcode() != ISD::BIT_CONVERT) + return SDValue(); + Base = Base->getOperand(0).getNode(); + if (!isa(Base)) + return SDValue(); + } + + // Transform it into VZEXT_LOAD addr. + LoadSDNode *LD = cast(Base); + + // Load must not be an extload. + if (LD->getExtensionType() != ISD::NON_EXTLOAD) + return SDValue(); + + // Load type should legal type so we don't have to legalize it. + if (!TLI.isTypeLegal(VT)) + return SDValue(); + + SDVTList Tys = DAG.getVTList(VT, MVT::Other); + SDValue Ops[] = { LD->getChain(), LD->getBasePtr() }; + SDValue ResNode = DAG.getNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, 2); + TargetLowering::TargetLoweringOpt TLO(DAG); + TLO.CombineTo(SDValue(Base, 1), ResNode.getValue(1)); + DCI.CommitTargetLoweringOpt(TLO); + return ResNode; +} + +/// PerformSELECTCombine - Do target-specific dag combines on SELECT nodes. +static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, + const X86Subtarget *Subtarget) { + DebugLoc DL = N->getDebugLoc(); + SDValue Cond = N->getOperand(0); + // Get the LHS/RHS of the select. + SDValue LHS = N->getOperand(1); + SDValue RHS = N->getOperand(2); + + // If we have SSE[12] support, try to form min/max nodes. + if (Subtarget->hasSSE2() && + (LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64) && + Cond.getOpcode() == ISD::SETCC) { + ISD::CondCode CC = cast(Cond.getOperand(2))->get(); + + unsigned Opcode = 0; + if (LHS == Cond.getOperand(0) && RHS == Cond.getOperand(1)) { + switch (CC) { + default: break; + case ISD::SETOLE: // (X <= Y) ? X : Y -> min + case ISD::SETULE: + case ISD::SETLE: + if (!UnsafeFPMath) break; + // FALL THROUGH. + case ISD::SETOLT: // (X olt/lt Y) ? X : Y -> min + case ISD::SETLT: + Opcode = X86ISD::FMIN; + break; + + case ISD::SETOGT: // (X > Y) ? X : Y -> max + case ISD::SETUGT: + case ISD::SETGT: + if (!UnsafeFPMath) break; + // FALL THROUGH. + case ISD::SETUGE: // (X uge/ge Y) ? X : Y -> max + case ISD::SETGE: + Opcode = X86ISD::FMAX; + break; + } + } else if (LHS == Cond.getOperand(1) && RHS == Cond.getOperand(0)) { + switch (CC) { + default: break; + case ISD::SETOGT: // (X > Y) ? Y : X -> min + case ISD::SETUGT: + case ISD::SETGT: + if (!UnsafeFPMath) break; + // FALL THROUGH. + case ISD::SETUGE: // (X uge/ge Y) ? Y : X -> min + case ISD::SETGE: + Opcode = X86ISD::FMIN; + break; + + case ISD::SETOLE: // (X <= Y) ? Y : X -> max + case ISD::SETULE: + case ISD::SETLE: + if (!UnsafeFPMath) break; + // FALL THROUGH. + case ISD::SETOLT: // (X olt/lt Y) ? Y : X -> max + case ISD::SETLT: + Opcode = X86ISD::FMAX; + break; + } + } + + if (Opcode) + return DAG.getNode(Opcode, DL, N->getValueType(0), LHS, RHS); + } + + // If this is a select between two integer constants, try to do some + // optimizations. + if (ConstantSDNode *TrueC = dyn_cast(LHS)) { + if (ConstantSDNode *FalseC = dyn_cast(RHS)) + // Don't do this for crazy integer types. + if (DAG.getTargetLoweringInfo().isTypeLegal(LHS.getValueType())) { + // If this is efficiently invertible, canonicalize the LHSC/RHSC values + // so that TrueC (the true value) is larger than FalseC. + bool NeedsCondInvert = false; + + if (TrueC->getAPIntValue().ult(FalseC->getAPIntValue()) && + // Efficiently invertible. + (Cond.getOpcode() == ISD::SETCC || // setcc -> invertible. + (Cond.getOpcode() == ISD::XOR && // xor(X, C) -> invertible. + isa(Cond.getOperand(1))))) { + NeedsCondInvert = true; + std::swap(TrueC, FalseC); + } + + // Optimize C ? 8 : 0 -> zext(C) << 3. Likewise for any pow2/0. + if (FalseC->getAPIntValue() == 0 && + TrueC->getAPIntValue().isPowerOf2()) { + if (NeedsCondInvert) // Invert the condition if needed. + Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond, + DAG.getConstant(1, Cond.getValueType())); + + // Zero extend the condition if needed. + Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, LHS.getValueType(), Cond); + + unsigned ShAmt = TrueC->getAPIntValue().logBase2(); + return DAG.getNode(ISD::SHL, DL, LHS.getValueType(), Cond, + DAG.getConstant(ShAmt, MVT::i8)); + } + + // Optimize Cond ? cst+1 : cst -> zext(setcc(C)+cst. + if (FalseC->getAPIntValue()+1 == TrueC->getAPIntValue()) { + if (NeedsCondInvert) // Invert the condition if needed. + Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond, + DAG.getConstant(1, Cond.getValueType())); + + // Zero extend the condition if needed. + Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, + FalseC->getValueType(0), Cond); + return DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond, + SDValue(FalseC, 0)); + } + + // Optimize cases that will turn into an LEA instruction. This requires + // an i32 or i64 and an efficient multiplier (1, 2, 3, 4, 5, 8, 9). + if (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i64) { + uint64_t Diff = TrueC->getZExtValue()-FalseC->getZExtValue(); + if (N->getValueType(0) == MVT::i32) Diff = (unsigned)Diff; + + bool isFastMultiplier = false; + if (Diff < 10) { + switch ((unsigned char)Diff) { + default: break; + case 1: // result = add base, cond + case 2: // result = lea base( , cond*2) + case 3: // result = lea base(cond, cond*2) + case 4: // result = lea base( , cond*4) + case 5: // result = lea base(cond, cond*4) + case 8: // result = lea base( , cond*8) + case 9: // result = lea base(cond, cond*8) + isFastMultiplier = true; + break; + } + } + + if (isFastMultiplier) { + APInt Diff = TrueC->getAPIntValue()-FalseC->getAPIntValue(); + if (NeedsCondInvert) // Invert the condition if needed. + Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond, + DAG.getConstant(1, Cond.getValueType())); + + // Zero extend the condition if needed. + Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0), + Cond); + // Scale the condition by the difference. + if (Diff != 1) + Cond = DAG.getNode(ISD::MUL, DL, Cond.getValueType(), Cond, + DAG.getConstant(Diff, Cond.getValueType())); + + // Add the base if non-zero. + if (FalseC->getAPIntValue() != 0) + Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond, + SDValue(FalseC, 0)); + return Cond; + } + } + } + } + + return SDValue(); +} + +/// Optimize X86ISD::CMOV [LHS, RHS, CONDCODE (e.g. X86::COND_NE), CONDVAL] +static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI) { + DebugLoc DL = N->getDebugLoc(); + + // If the flag operand isn't dead, don't touch this CMOV. + if (N->getNumValues() == 2 && !SDValue(N, 1).use_empty()) + return SDValue(); + + // If this is a select between two integer constants, try to do some + // optimizations. Note that the operands are ordered the opposite of SELECT + // operands. + if (ConstantSDNode *TrueC = dyn_cast(N->getOperand(1))) { + if (ConstantSDNode *FalseC = dyn_cast(N->getOperand(0))) { + // Canonicalize the TrueC/FalseC values so that TrueC (the true value) is + // larger than FalseC (the false value). + X86::CondCode CC = (X86::CondCode)N->getConstantOperandVal(2); + + if (TrueC->getAPIntValue().ult(FalseC->getAPIntValue())) { + CC = X86::GetOppositeBranchCondition(CC); + std::swap(TrueC, FalseC); + } + + // Optimize C ? 8 : 0 -> zext(setcc(C)) << 3. Likewise for any pow2/0. + // This is efficient for any integer data type (including i8/i16) and + // shift amount. + if (FalseC->getAPIntValue() == 0 && TrueC->getAPIntValue().isPowerOf2()) { + SDValue Cond = N->getOperand(3); + Cond = DAG.getNode(X86ISD::SETCC, DL, MVT::i8, + DAG.getConstant(CC, MVT::i8), Cond); + + // Zero extend the condition if needed. + Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, TrueC->getValueType(0), Cond); + + unsigned ShAmt = TrueC->getAPIntValue().logBase2(); + Cond = DAG.getNode(ISD::SHL, DL, Cond.getValueType(), Cond, + DAG.getConstant(ShAmt, MVT::i8)); + if (N->getNumValues() == 2) // Dead flag value? + return DCI.CombineTo(N, Cond, SDValue()); + return Cond; + } + + // Optimize Cond ? cst+1 : cst -> zext(setcc(C)+cst. This is efficient + // for any integer data type, including i8/i16. + if (FalseC->getAPIntValue()+1 == TrueC->getAPIntValue()) { + SDValue Cond = N->getOperand(3); + Cond = DAG.getNode(X86ISD::SETCC, DL, MVT::i8, + DAG.getConstant(CC, MVT::i8), Cond); + + // Zero extend the condition if needed. + Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, + FalseC->getValueType(0), Cond); + Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond, + SDValue(FalseC, 0)); + + if (N->getNumValues() == 2) // Dead flag value? + return DCI.CombineTo(N, Cond, SDValue()); + return Cond; + } + + // Optimize cases that will turn into an LEA instruction. This requires + // an i32 or i64 and an efficient multiplier (1, 2, 3, 4, 5, 8, 9). + if (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i64) { + uint64_t Diff = TrueC->getZExtValue()-FalseC->getZExtValue(); + if (N->getValueType(0) == MVT::i32) Diff = (unsigned)Diff; + + bool isFastMultiplier = false; + if (Diff < 10) { + switch ((unsigned char)Diff) { + default: break; + case 1: // result = add base, cond + case 2: // result = lea base( , cond*2) + case 3: // result = lea base(cond, cond*2) + case 4: // result = lea base( , cond*4) + case 5: // result = lea base(cond, cond*4) + case 8: // result = lea base( , cond*8) + case 9: // result = lea base(cond, cond*8) + isFastMultiplier = true; + break; + } + } + + if (isFastMultiplier) { + APInt Diff = TrueC->getAPIntValue()-FalseC->getAPIntValue(); + SDValue Cond = N->getOperand(3); + Cond = DAG.getNode(X86ISD::SETCC, DL, MVT::i8, + DAG.getConstant(CC, MVT::i8), Cond); + // Zero extend the condition if needed. + Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0), + Cond); + // Scale the condition by the difference. + if (Diff != 1) + Cond = DAG.getNode(ISD::MUL, DL, Cond.getValueType(), Cond, + DAG.getConstant(Diff, Cond.getValueType())); + + // Add the base if non-zero. + if (FalseC->getAPIntValue() != 0) + Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond, + SDValue(FalseC, 0)); + if (N->getNumValues() == 2) // Dead flag value? + return DCI.CombineTo(N, Cond, SDValue()); + return Cond; + } + } + } + } + return SDValue(); +} + + +/// PerformMulCombine - Optimize a single multiply with constant into two +/// in order to implement it with two cheaper instructions, e.g. +/// LEA + SHL, LEA + LEA. +static SDValue PerformMulCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI) { + if (DAG.getMachineFunction(). + getFunction()->hasFnAttr(Attribute::OptimizeForSize)) + return SDValue(); + + if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) + return SDValue(); + + MVT VT = N->getValueType(0); + if (VT != MVT::i64) + return SDValue(); + + ConstantSDNode *C = dyn_cast(N->getOperand(1)); + if (!C) + return SDValue(); + uint64_t MulAmt = C->getZExtValue(); + if (isPowerOf2_64(MulAmt) || MulAmt == 3 || MulAmt == 5 || MulAmt == 9) + return SDValue(); + + uint64_t MulAmt1 = 0; + uint64_t MulAmt2 = 0; + if ((MulAmt % 9) == 0) { + MulAmt1 = 9; + MulAmt2 = MulAmt / 9; + } else if ((MulAmt % 5) == 0) { + MulAmt1 = 5; + MulAmt2 = MulAmt / 5; + } else if ((MulAmt % 3) == 0) { + MulAmt1 = 3; + MulAmt2 = MulAmt / 3; + } + if (MulAmt2 && + (isPowerOf2_64(MulAmt2) || MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9)){ + DebugLoc DL = N->getDebugLoc(); + + if (isPowerOf2_64(MulAmt2) && + !(N->hasOneUse() && N->use_begin()->getOpcode() == ISD::ADD)) + // If second multiplifer is pow2, issue it first. We want the multiply by + // 3, 5, or 9 to be folded into the addressing mode unless the lone use + // is an add. + std::swap(MulAmt1, MulAmt2); + + SDValue NewMul; + if (isPowerOf2_64(MulAmt1)) + NewMul = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), + DAG.getConstant(Log2_64(MulAmt1), MVT::i8)); + else + NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0), + DAG.getConstant(MulAmt1, VT)); + + if (isPowerOf2_64(MulAmt2)) + NewMul = DAG.getNode(ISD::SHL, DL, VT, NewMul, + DAG.getConstant(Log2_64(MulAmt2), MVT::i8)); + else + NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, NewMul, + DAG.getConstant(MulAmt2, VT)); + + // Do not add new nodes to DAG combiner worklist. + DCI.CombineTo(N, NewMul, false); + } + return SDValue(); +} + + +/// PerformShiftCombine - Transforms vector shift nodes to use vector shifts +/// when possible. +static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG, + const X86Subtarget *Subtarget) { + // On X86 with SSE2 support, we can transform this to a vector shift if + // all elements are shifted by the same amount. We can't do this in legalize + // because the a constant vector is typically transformed to a constant pool + // so we have no knowledge of the shift amount. + if (!Subtarget->hasSSE2()) + return SDValue(); + + MVT VT = N->getValueType(0); + if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16) + return SDValue(); + + SDValue ShAmtOp = N->getOperand(1); + MVT EltVT = VT.getVectorElementType(); + DebugLoc DL = N->getDebugLoc(); + SDValue BaseShAmt; + if (ShAmtOp.getOpcode() == ISD::BUILD_VECTOR) { + unsigned NumElts = VT.getVectorNumElements(); + unsigned i = 0; + for (; i != NumElts; ++i) { + SDValue Arg = ShAmtOp.getOperand(i); + if (Arg.getOpcode() == ISD::UNDEF) continue; + BaseShAmt = Arg; + break; + } + for (; i != NumElts; ++i) { + SDValue Arg = ShAmtOp.getOperand(i); + if (Arg.getOpcode() == ISD::UNDEF) continue; + if (Arg != BaseShAmt) { + return SDValue(); + } + } + } else if (ShAmtOp.getOpcode() == ISD::VECTOR_SHUFFLE && + cast(ShAmtOp)->isSplat()) { + BaseShAmt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ShAmtOp, + DAG.getIntPtrConstant(0)); + } else + return SDValue(); + + if (EltVT.bitsGT(MVT::i32)) + BaseShAmt = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, BaseShAmt); + else if (EltVT.bitsLT(MVT::i32)) + BaseShAmt = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, BaseShAmt); + + // The shift amount is identical so we can do a vector shift. + SDValue ValOp = N->getOperand(0); + switch (N->getOpcode()) { + default: + assert(0 && "Unknown shift opcode!"); + break; + case ISD::SHL: + if (VT == MVT::v2i64) + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, + DAG.getConstant(Intrinsic::x86_sse2_pslli_q, MVT::i32), + ValOp, BaseShAmt); + if (VT == MVT::v4i32) + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, + DAG.getConstant(Intrinsic::x86_sse2_pslli_d, MVT::i32), + ValOp, BaseShAmt); + if (VT == MVT::v8i16) + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, + DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), + ValOp, BaseShAmt); + break; + case ISD::SRA: + if (VT == MVT::v4i32) + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, + DAG.getConstant(Intrinsic::x86_sse2_psrai_d, MVT::i32), + ValOp, BaseShAmt); + if (VT == MVT::v8i16) + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, + DAG.getConstant(Intrinsic::x86_sse2_psrai_w, MVT::i32), + ValOp, BaseShAmt); + break; + case ISD::SRL: + if (VT == MVT::v2i64) + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, + DAG.getConstant(Intrinsic::x86_sse2_psrli_q, MVT::i32), + ValOp, BaseShAmt); + if (VT == MVT::v4i32) + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, + DAG.getConstant(Intrinsic::x86_sse2_psrli_d, MVT::i32), + ValOp, BaseShAmt); + if (VT == MVT::v8i16) + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, + DAG.getConstant(Intrinsic::x86_sse2_psrli_w, MVT::i32), + ValOp, BaseShAmt); + break; + } + return SDValue(); +} + +/// PerformSTORECombine - Do target-specific dag combines on STORE nodes. +static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, + const X86Subtarget *Subtarget) { + // Turn load->store of MMX types into GPR load/stores. This avoids clobbering + // the FP state in cases where an emms may be missing. + // A preferable solution to the general problem is to figure out the right + // places to insert EMMS. This qualifies as a quick hack. + + // Similarly, turn load->store of i64 into double load/stores in 32-bit mode. + StoreSDNode *St = cast(N); + MVT VT = St->getValue().getValueType(); + if (VT.getSizeInBits() != 64) + return SDValue(); + + bool F64IsLegal = !UseSoftFloat && !NoImplicitFloat && Subtarget->hasSSE2(); + if ((VT.isVector() || + (VT == MVT::i64 && F64IsLegal && !Subtarget->is64Bit())) && + isa(St->getValue()) && + !cast(St->getValue())->isVolatile() && + St->getChain().hasOneUse() && !St->isVolatile()) { + SDNode* LdVal = St->getValue().getNode(); + LoadSDNode *Ld = 0; + int TokenFactorIndex = -1; + SmallVector Ops; + SDNode* ChainVal = St->getChain().getNode(); + // Must be a store of a load. We currently handle two cases: the load + // is a direct child, and it's under an intervening TokenFactor. It is + // possible to dig deeper under nested TokenFactors. + if (ChainVal == LdVal) + Ld = cast(St->getChain()); + else if (St->getValue().hasOneUse() && + ChainVal->getOpcode() == ISD::TokenFactor) { + for (unsigned i=0, e = ChainVal->getNumOperands(); i != e; ++i) { + if (ChainVal->getOperand(i).getNode() == LdVal) { + TokenFactorIndex = i; + Ld = cast(St->getValue()); + } else + Ops.push_back(ChainVal->getOperand(i)); + } + } + + if (!Ld || !ISD::isNormalLoad(Ld)) + return SDValue(); + + // If this is not the MMX case, i.e. we are just turning i64 load/store + // into f64 load/store, avoid the transformation if there are multiple + // uses of the loaded value. + if (!VT.isVector() && !Ld->hasNUsesOfValue(1, 0)) + return SDValue(); + + DebugLoc LdDL = Ld->getDebugLoc(); + DebugLoc StDL = N->getDebugLoc(); + // If we are a 64-bit capable x86, lower to a single movq load/store pair. + // Otherwise, if it's legal to use f64 SSE instructions, use f64 load/store + // pair instead. + if (Subtarget->is64Bit() || F64IsLegal) { + MVT LdVT = Subtarget->is64Bit() ? MVT::i64 : MVT::f64; + SDValue NewLd = DAG.getLoad(LdVT, LdDL, Ld->getChain(), + Ld->getBasePtr(), Ld->getSrcValue(), + Ld->getSrcValueOffset(), Ld->isVolatile(), + Ld->getAlignment()); + SDValue NewChain = NewLd.getValue(1); + if (TokenFactorIndex != -1) { + Ops.push_back(NewChain); + NewChain = DAG.getNode(ISD::TokenFactor, LdDL, MVT::Other, &Ops[0], + Ops.size()); + } + return DAG.getStore(NewChain, StDL, NewLd, St->getBasePtr(), + St->getSrcValue(), St->getSrcValueOffset(), + St->isVolatile(), St->getAlignment()); + } + + // Otherwise, lower to two pairs of 32-bit loads / stores. + SDValue LoAddr = Ld->getBasePtr(); + SDValue HiAddr = DAG.getNode(ISD::ADD, LdDL, MVT::i32, LoAddr, + DAG.getConstant(4, MVT::i32)); + + SDValue LoLd = DAG.getLoad(MVT::i32, LdDL, Ld->getChain(), LoAddr, + Ld->getSrcValue(), Ld->getSrcValueOffset(), + Ld->isVolatile(), Ld->getAlignment()); + SDValue HiLd = DAG.getLoad(MVT::i32, LdDL, Ld->getChain(), HiAddr, + Ld->getSrcValue(), Ld->getSrcValueOffset()+4, + Ld->isVolatile(), + MinAlign(Ld->getAlignment(), 4)); + + SDValue NewChain = LoLd.getValue(1); + if (TokenFactorIndex != -1) { + Ops.push_back(LoLd); + Ops.push_back(HiLd); + NewChain = DAG.getNode(ISD::TokenFactor, LdDL, MVT::Other, &Ops[0], + Ops.size()); + } + + LoAddr = St->getBasePtr(); + HiAddr = DAG.getNode(ISD::ADD, StDL, MVT::i32, LoAddr, + DAG.getConstant(4, MVT::i32)); + + SDValue LoSt = DAG.getStore(NewChain, StDL, LoLd, LoAddr, + St->getSrcValue(), St->getSrcValueOffset(), + St->isVolatile(), St->getAlignment()); + SDValue HiSt = DAG.getStore(NewChain, StDL, HiLd, HiAddr, + St->getSrcValue(), + St->getSrcValueOffset() + 4, + St->isVolatile(), + MinAlign(St->getAlignment(), 4)); + return DAG.getNode(ISD::TokenFactor, StDL, MVT::Other, LoSt, HiSt); + } + return SDValue(); +} + +/// PerformFORCombine - Do target-specific dag combines on X86ISD::FOR and +/// X86ISD::FXOR nodes. +static SDValue PerformFORCombine(SDNode *N, SelectionDAG &DAG) { + assert(N->getOpcode() == X86ISD::FOR || N->getOpcode() == X86ISD::FXOR); + // F[X]OR(0.0, x) -> x + // F[X]OR(x, 0.0) -> x + if (ConstantFPSDNode *C = dyn_cast(N->getOperand(0))) + if (C->getValueAPF().isPosZero()) + return N->getOperand(1); + if (ConstantFPSDNode *C = dyn_cast(N->getOperand(1))) + if (C->getValueAPF().isPosZero()) + return N->getOperand(0); + return SDValue(); +} + +/// PerformFANDCombine - Do target-specific dag combines on X86ISD::FAND nodes. +static SDValue PerformFANDCombine(SDNode *N, SelectionDAG &DAG) { + // FAND(0.0, x) -> 0.0 + // FAND(x, 0.0) -> 0.0 + if (ConstantFPSDNode *C = dyn_cast(N->getOperand(0))) + if (C->getValueAPF().isPosZero()) + return N->getOperand(0); + if (ConstantFPSDNode *C = dyn_cast(N->getOperand(1))) + if (C->getValueAPF().isPosZero()) + return N->getOperand(1); + return SDValue(); +} + +static SDValue PerformBTCombine(SDNode *N, + SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI) { + // BT ignores high bits in the bit index operand. + SDValue Op1 = N->getOperand(1); + if (Op1.hasOneUse()) { + unsigned BitWidth = Op1.getValueSizeInBits(); + APInt DemandedMask = APInt::getLowBitsSet(BitWidth, Log2_32(BitWidth)); + APInt KnownZero, KnownOne; + TargetLowering::TargetLoweringOpt TLO(DAG); + TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (TLO.ShrinkDemandedConstant(Op1, DemandedMask) || + TLI.SimplifyDemandedBits(Op1, DemandedMask, KnownZero, KnownOne, TLO)) + DCI.CommitTargetLoweringOpt(TLO); + } + return SDValue(); +} + +SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, + DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; + switch (N->getOpcode()) { + default: break; + case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, *this); + case ISD::BUILD_VECTOR: + return PerformBuildVectorCombine(N, DAG, DCI, Subtarget, *this); + case ISD::SELECT: return PerformSELECTCombine(N, DAG, Subtarget); + case X86ISD::CMOV: return PerformCMOVCombine(N, DAG, DCI); + case ISD::MUL: return PerformMulCombine(N, DAG, DCI); + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: return PerformShiftCombine(N, DAG, Subtarget); + case ISD::STORE: return PerformSTORECombine(N, DAG, Subtarget); + case X86ISD::FXOR: + case X86ISD::FOR: return PerformFORCombine(N, DAG); + case X86ISD::FAND: return PerformFANDCombine(N, DAG); + case X86ISD::BT: return PerformBTCombine(N, DAG, DCI); + } + + return SDValue(); +} + +//===----------------------------------------------------------------------===// +// X86 Inline Assembly Support +//===----------------------------------------------------------------------===// + +/// getConstraintType - Given a constraint letter, return the type of +/// constraint it is for this target. +X86TargetLowering::ConstraintType +X86TargetLowering::getConstraintType(const std::string &Constraint) const { + if (Constraint.size() == 1) { + switch (Constraint[0]) { + case 'A': + return C_Register; + case 'f': + case 'r': + case 'R': + case 'l': + case 'q': + case 'Q': + case 'x': + case 'y': + case 'Y': + return C_RegisterClass; + case 'e': + case 'Z': + return C_Other; + default: + break; + } + } + return TargetLowering::getConstraintType(Constraint); +} + +/// LowerXConstraint - try to replace an X constraint, which matches anything, +/// with another that has more specific requirements based on the type of the +/// corresponding operand. +const char *X86TargetLowering:: +LowerXConstraint(MVT ConstraintVT) const { + // FP X constraints get lowered to SSE1/2 registers if available, otherwise + // 'f' like normal targets. + if (ConstraintVT.isFloatingPoint()) { + if (Subtarget->hasSSE2()) + return "Y"; + if (Subtarget->hasSSE1()) + return "x"; + } + + return TargetLowering::LowerXConstraint(ConstraintVT); +} + +/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops +/// vector. If it is invalid, don't add anything to Ops. +void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, + char Constraint, + bool hasMemory, + std::vector&Ops, + SelectionDAG &DAG) const { + SDValue Result(0, 0); + + switch (Constraint) { + default: break; + case 'I': + if (ConstantSDNode *C = dyn_cast(Op)) { + if (C->getZExtValue() <= 31) { + Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType()); + break; + } + } + return; + case 'J': + if (ConstantSDNode *C = dyn_cast(Op)) { + if (C->getZExtValue() <= 63) { + Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType()); + break; + } + } + return; + case 'N': + if (ConstantSDNode *C = dyn_cast(Op)) { + if (C->getZExtValue() <= 255) { + Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType()); + break; + } + } + return; + case 'e': { + // 32-bit signed value + if (ConstantSDNode *C = dyn_cast(Op)) { + const ConstantInt *CI = C->getConstantIntValue(); + if (CI->isValueValidForType(Type::Int32Ty, C->getSExtValue())) { + // Widen to 64 bits here to get it sign extended. + Result = DAG.getTargetConstant(C->getSExtValue(), MVT::i64); + break; + } + // FIXME gcc accepts some relocatable values here too, but only in certain + // memory models; it's complicated. + } + return; + } + case 'Z': { + // 32-bit unsigned value + if (ConstantSDNode *C = dyn_cast(Op)) { + const ConstantInt *CI = C->getConstantIntValue(); + if (CI->isValueValidForType(Type::Int32Ty, C->getZExtValue())) { + Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType()); + break; + } + } + // FIXME gcc accepts some relocatable values here too, but only in certain + // memory models; it's complicated. + return; + } + case 'i': { + // Literal immediates are always ok. + if (ConstantSDNode *CST = dyn_cast(Op)) { + // Widen to 64 bits here to get it sign extended. + Result = DAG.getTargetConstant(CST->getSExtValue(), MVT::i64); + break; + } + + // If we are in non-pic codegen mode, we allow the address of a global (with + // an optional displacement) to be used with 'i'. + GlobalAddressSDNode *GA = 0; + int64_t Offset = 0; + + // Match either (GA), (GA+C), (GA+C1+C2), etc. + while (1) { + if ((GA = dyn_cast(Op))) { + Offset += GA->getOffset(); + break; + } else if (Op.getOpcode() == ISD::ADD) { + if (ConstantSDNode *C = dyn_cast(Op.getOperand(1))) { + Offset += C->getZExtValue(); + Op = Op.getOperand(0); + continue; + } + } else if (Op.getOpcode() == ISD::SUB) { + if (ConstantSDNode *C = dyn_cast(Op.getOperand(1))) { + Offset += -C->getZExtValue(); + Op = Op.getOperand(0); + continue; + } + } + + // Otherwise, this isn't something we can handle, reject it. + return; + } + + if (hasMemory) + Op = LowerGlobalAddress(GA->getGlobal(), Op.getDebugLoc(), Offset, DAG); + else + Op = DAG.getTargetGlobalAddress(GA->getGlobal(), GA->getValueType(0), + Offset); + Result = Op; + break; + } + } + + if (Result.getNode()) { + Ops.push_back(Result); + return; + } + return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, hasMemory, + Ops, DAG); +} + +std::vector X86TargetLowering:: +getRegClassForInlineAsmConstraint(const std::string &Constraint, + MVT VT) const { + if (Constraint.size() == 1) { + // FIXME: not handling fp-stack yet! + switch (Constraint[0]) { // GCC X86 Constraint Letters + default: break; // Unknown constraint letter + case 'q': // Q_REGS (GENERAL_REGS in 64-bit mode) + case 'Q': // Q_REGS + if (VT == MVT::i32) + return make_vector(X86::EAX, X86::EDX, X86::ECX, X86::EBX, 0); + else if (VT == MVT::i16) + return make_vector(X86::AX, X86::DX, X86::CX, X86::BX, 0); + else if (VT == MVT::i8) + return make_vector(X86::AL, X86::DL, X86::CL, X86::BL, 0); + else if (VT == MVT::i64) + return make_vector(X86::RAX, X86::RDX, X86::RCX, X86::RBX, 0); + break; + } + } + + return std::vector(); +} + +std::pair +X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, + MVT VT) const { + // First, see if this is a constraint that directly corresponds to an LLVM + // register class. + if (Constraint.size() == 1) { + // GCC Constraint Letters + switch (Constraint[0]) { + default: break; + case 'r': // GENERAL_REGS + case 'R': // LEGACY_REGS + case 'l': // INDEX_REGS + if (VT == MVT::i8) + return std::make_pair(0U, X86::GR8RegisterClass); + if (VT == MVT::i16) + return std::make_pair(0U, X86::GR16RegisterClass); + if (VT == MVT::i32 || !Subtarget->is64Bit()) + return std::make_pair(0U, X86::GR32RegisterClass); + return std::make_pair(0U, X86::GR64RegisterClass); + case 'f': // FP Stack registers. + // If SSE is enabled for this VT, use f80 to ensure the isel moves the + // value to the correct fpstack register class. + if (VT == MVT::f32 && !isScalarFPTypeInSSEReg(VT)) + return std::make_pair(0U, X86::RFP32RegisterClass); + if (VT == MVT::f64 && !isScalarFPTypeInSSEReg(VT)) + return std::make_pair(0U, X86::RFP64RegisterClass); + return std::make_pair(0U, X86::RFP80RegisterClass); + case 'y': // MMX_REGS if MMX allowed. + if (!Subtarget->hasMMX()) break; + return std::make_pair(0U, X86::VR64RegisterClass); + case 'Y': // SSE_REGS if SSE2 allowed + if (!Subtarget->hasSSE2()) break; + // FALL THROUGH. + case 'x': // SSE_REGS if SSE1 allowed + if (!Subtarget->hasSSE1()) break; + + switch (VT.getSimpleVT()) { + default: break; + // Scalar SSE types. + case MVT::f32: + case MVT::i32: + return std::make_pair(0U, X86::FR32RegisterClass); + case MVT::f64: + case MVT::i64: + return std::make_pair(0U, X86::FR64RegisterClass); + // Vector types. + case MVT::v16i8: + case MVT::v8i16: + case MVT::v4i32: + case MVT::v2i64: + case MVT::v4f32: + case MVT::v2f64: + return std::make_pair(0U, X86::VR128RegisterClass); + } + break; + } + } + + // Use the default implementation in TargetLowering to convert the register + // constraint into a member of a register class. + std::pair Res; + Res = TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); + + // Not found as a standard register? + if (Res.second == 0) { + // GCC calls "st(0)" just plain "st". + if (StringsEqualNoCase("{st}", Constraint)) { + Res.first = X86::ST0; + Res.second = X86::RFP80RegisterClass; + } + // 'A' means EAX + EDX. + if (Constraint == "A") { + Res.first = X86::EAX; + Res.second = X86::GRADRegisterClass; + } + return Res; + } + + // Otherwise, check to see if this is a register class of the wrong value + // type. For example, we want to map "{ax},i32" -> {eax}, we don't want it to + // turn into {ax},{dx}. + if (Res.second->hasType(VT)) + return Res; // Correct type already, nothing to do. + + // All of the single-register GCC register classes map their values onto + // 16-bit register pieces "ax","dx","cx","bx","si","di","bp","sp". If we + // really want an 8-bit or 32-bit register, map to the appropriate register + // class and return the appropriate register. + if (Res.second == X86::GR16RegisterClass) { + if (VT == MVT::i8) { + unsigned DestReg = 0; + switch (Res.first) { + default: break; + case X86::AX: DestReg = X86::AL; break; + case X86::DX: DestReg = X86::DL; break; + case X86::CX: DestReg = X86::CL; break; + case X86::BX: DestReg = X86::BL; break; + } + if (DestReg) { + Res.first = DestReg; + Res.second = X86::GR8RegisterClass; + } + } else if (VT == MVT::i32) { + unsigned DestReg = 0; + switch (Res.first) { + default: break; + case X86::AX: DestReg = X86::EAX; break; + case X86::DX: DestReg = X86::EDX; break; + case X86::CX: DestReg = X86::ECX; break; + case X86::BX: DestReg = X86::EBX; break; + case X86::SI: DestReg = X86::ESI; break; + case X86::DI: DestReg = X86::EDI; break; + case X86::BP: DestReg = X86::EBP; break; + case X86::SP: DestReg = X86::ESP; break; + } + if (DestReg) { + Res.first = DestReg; + Res.second = X86::GR32RegisterClass; + } + } else if (VT == MVT::i64) { + unsigned DestReg = 0; + switch (Res.first) { + default: break; + case X86::AX: DestReg = X86::RAX; break; + case X86::DX: DestReg = X86::RDX; break; + case X86::CX: DestReg = X86::RCX; break; + case X86::BX: DestReg = X86::RBX; break; + case X86::SI: DestReg = X86::RSI; break; + case X86::DI: DestReg = X86::RDI; break; + case X86::BP: DestReg = X86::RBP; break; + case X86::SP: DestReg = X86::RSP; break; + } + if (DestReg) { + Res.first = DestReg; + Res.second = X86::GR64RegisterClass; + } + } + } else if (Res.second == X86::FR32RegisterClass || + Res.second == X86::FR64RegisterClass || + Res.second == X86::VR128RegisterClass) { + // Handle references to XMM physical registers that got mapped into the + // wrong class. This can happen with constraints like {xmm0} where the + // target independent register mapper will just pick the first match it can + // find, ignoring the required type. + if (VT == MVT::f32) + Res.second = X86::FR32RegisterClass; + else if (VT == MVT::f64) + Res.second = X86::FR64RegisterClass; + else if (X86::VR128RegisterClass->hasType(VT)) + Res.second = X86::VR128RegisterClass; + } + + return Res; +} + +//===----------------------------------------------------------------------===// +// X86 Widen vector type +//===----------------------------------------------------------------------===// + +/// getWidenVectorType: given a vector type, returns the type to widen +/// to (e.g., v7i8 to v8i8). If the vector type is legal, it returns itself. +/// If there is no vector type that we want to widen to, returns MVT::Other +/// When and where to widen is target dependent based on the cost of +/// scalarizing vs using the wider vector type. + +MVT X86TargetLowering::getWidenVectorType(MVT VT) const { + assert(VT.isVector()); + if (isTypeLegal(VT)) + return VT; + + // TODO: In computeRegisterProperty, we can compute the list of legal vector + // type based on element type. This would speed up our search (though + // it may not be worth it since the size of the list is relatively + // small). + MVT EltVT = VT.getVectorElementType(); + unsigned NElts = VT.getVectorNumElements(); + + // On X86, it make sense to widen any vector wider than 1 + if (NElts <= 1) + return MVT::Other; + + for (unsigned nVT = MVT::FIRST_VECTOR_VALUETYPE; + nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { + MVT SVT = (MVT::SimpleValueType)nVT; + + if (isTypeLegal(SVT) && + SVT.getVectorElementType() == EltVT && + SVT.getVectorNumElements() > NElts) + return SVT; + } + return MVT::Other; +} diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h new file mode 100644 index 000000000000..550f8bdf9b64 --- /dev/null +++ b/lib/Target/X86/X86ISelLowering.h @@ -0,0 +1,705 @@ +//===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interfaces that X86 uses to lower LLVM code into a +// selection DAG. +// +//===----------------------------------------------------------------------===// + +#ifndef X86ISELLOWERING_H +#define X86ISELLOWERING_H + +#include "X86Subtarget.h" +#include "X86RegisterInfo.h" +#include "X86MachineFunctionInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/CodeGen/FastISel.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/CallingConvLower.h" + +namespace llvm { + namespace X86ISD { + // X86 Specific DAG Nodes + enum NodeType { + // Start the numbering where the builtin ops leave off. + FIRST_NUMBER = ISD::BUILTIN_OP_END, + + /// BSF - Bit scan forward. + /// BSR - Bit scan reverse. + BSF, + BSR, + + /// SHLD, SHRD - Double shift instructions. These correspond to + /// X86::SHLDxx and X86::SHRDxx instructions. + SHLD, + SHRD, + + /// FAND - Bitwise logical AND of floating point values. This corresponds + /// to X86::ANDPS or X86::ANDPD. + FAND, + + /// FOR - Bitwise logical OR of floating point values. This corresponds + /// to X86::ORPS or X86::ORPD. + FOR, + + /// FXOR - Bitwise logical XOR of floating point values. This corresponds + /// to X86::XORPS or X86::XORPD. + FXOR, + + /// FSRL - Bitwise logical right shift of floating point values. These + /// corresponds to X86::PSRLDQ. + FSRL, + + /// FILD, FILD_FLAG - This instruction implements SINT_TO_FP with the + /// integer source in memory and FP reg result. This corresponds to the + /// X86::FILD*m instructions. It has three inputs (token chain, address, + /// and source type) and two outputs (FP value and token chain). FILD_FLAG + /// also produces a flag). + FILD, + FILD_FLAG, + + /// FP_TO_INT*_IN_MEM - This instruction implements FP_TO_SINT with the + /// integer destination in memory and a FP reg source. This corresponds + /// to the X86::FIST*m instructions and the rounding mode change stuff. It + /// has two inputs (token chain and address) and two outputs (int value + /// and token chain). + FP_TO_INT16_IN_MEM, + FP_TO_INT32_IN_MEM, + FP_TO_INT64_IN_MEM, + + /// FLD - This instruction implements an extending load to FP stack slots. + /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain + /// operand, ptr to load from, and a ValueType node indicating the type + /// to load to. + FLD, + + /// FST - This instruction implements a truncating store to FP stack + /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a + /// chain operand, value to store, address, and a ValueType to store it + /// as. + FST, + + /// CALL/TAILCALL - These operations represent an abstract X86 call + /// instruction, which includes a bunch of information. In particular the + /// operands of these node are: + /// + /// #0 - The incoming token chain + /// #1 - The callee + /// #2 - The number of arg bytes the caller pushes on the stack. + /// #3 - The number of arg bytes the callee pops off the stack. + /// #4 - The value to pass in AL/AX/EAX (optional) + /// #5 - The value to pass in DL/DX/EDX (optional) + /// + /// The result values of these nodes are: + /// + /// #0 - The outgoing token chain + /// #1 - The first register result value (optional) + /// #2 - The second register result value (optional) + /// + /// The CALL vs TAILCALL distinction boils down to whether the callee is + /// known not to modify the caller's stack frame, as is standard with + /// LLVM. + CALL, + TAILCALL, + + /// RDTSC_DAG - This operation implements the lowering for + /// readcyclecounter + RDTSC_DAG, + + /// X86 compare and logical compare instructions. + CMP, COMI, UCOMI, + + /// X86 bit-test instructions. + BT, + + /// X86 SetCC. Operand 0 is condition code, and operand 1 is the flag + /// operand produced by a CMP instruction. + SETCC, + + /// X86 conditional moves. Operand 0 and operand 1 are the two values + /// to select from. Operand 2 is the condition code, and operand 3 is the + /// flag operand produced by a CMP or TEST instruction. It also writes a + /// flag result. + CMOV, + + /// X86 conditional branches. Operand 0 is the chain operand, operand 1 + /// is the block to branch if condition is true, operand 2 is the + /// condition code, and operand 3 is the flag operand produced by a CMP + /// or TEST instruction. + BRCOND, + + /// Return with a flag operand. Operand 0 is the chain operand, operand + /// 1 is the number of bytes of stack to pop. + RET_FLAG, + + /// REP_STOS - Repeat fill, corresponds to X86::REP_STOSx. + REP_STOS, + + /// REP_MOVS - Repeat move, corresponds to X86::REP_MOVSx. + REP_MOVS, + + /// GlobalBaseReg - On Darwin, this node represents the result of the popl + /// at function entry, used for PIC code. + GlobalBaseReg, + + /// Wrapper - A wrapper node for TargetConstantPool, + /// TargetExternalSymbol, and TargetGlobalAddress. + Wrapper, + + /// WrapperRIP - Special wrapper used under X86-64 PIC mode for RIP + /// relative displacements. + WrapperRIP, + + /// PEXTRB - Extract an 8-bit value from a vector and zero extend it to + /// i32, corresponds to X86::PEXTRB. + PEXTRB, + + /// PEXTRW - Extract a 16-bit value from a vector and zero extend it to + /// i32, corresponds to X86::PEXTRW. + PEXTRW, + + /// INSERTPS - Insert any element of a 4 x float vector into any element + /// of a destination 4 x floatvector. + INSERTPS, + + /// PINSRB - Insert the lower 8-bits of a 32-bit value to a vector, + /// corresponds to X86::PINSRB. + PINSRB, + + /// PINSRW - Insert the lower 16-bits of a 32-bit value to a vector, + /// corresponds to X86::PINSRW. + PINSRW, + + /// PSHUFB - Shuffle 16 8-bit values within a vector. + PSHUFB, + + /// FMAX, FMIN - Floating point max and min. + /// + FMAX, FMIN, + + /// FRSQRT, FRCP - Floating point reciprocal-sqrt and reciprocal + /// approximation. Note that these typically require refinement + /// in order to obtain suitable precision. + FRSQRT, FRCP, + + // TLSADDR - Thread Local Storage. + TLSADDR, + + // SegmentBaseAddress - The address segment:0 + SegmentBaseAddress, + + // EH_RETURN - Exception Handling helpers. + EH_RETURN, + + /// TC_RETURN - Tail call return. + /// operand #0 chain + /// operand #1 callee (register or absolute) + /// operand #2 stack adjustment + /// operand #3 optional in flag + TC_RETURN, + + // LCMPXCHG_DAG, LCMPXCHG8_DAG - Compare and swap. + LCMPXCHG_DAG, + LCMPXCHG8_DAG, + + // ATOMADD64_DAG, ATOMSUB64_DAG, ATOMOR64_DAG, ATOMAND64_DAG, + // ATOMXOR64_DAG, ATOMNAND64_DAG, ATOMSWAP64_DAG - + // Atomic 64-bit binary operations. + ATOMADD64_DAG, + ATOMSUB64_DAG, + ATOMOR64_DAG, + ATOMXOR64_DAG, + ATOMAND64_DAG, + ATOMNAND64_DAG, + ATOMSWAP64_DAG, + + // FNSTCW16m - Store FP control world into i16 memory. + FNSTCW16m, + + // VZEXT_MOVL - Vector move low and zero extend. + VZEXT_MOVL, + + // VZEXT_LOAD - Load, scalar_to_vector, and zero extend. + VZEXT_LOAD, + + // VSHL, VSRL - Vector logical left / right shift. + VSHL, VSRL, + + // CMPPD, CMPPS - Vector double/float comparison. + // CMPPD, CMPPS - Vector double/float comparison. + CMPPD, CMPPS, + + // PCMP* - Vector integer comparisons. + PCMPEQB, PCMPEQW, PCMPEQD, PCMPEQQ, + PCMPGTB, PCMPGTW, PCMPGTD, PCMPGTQ, + + // ADD, SUB, SMUL, UMUL, etc. - Arithmetic operations with FLAGS results. + ADD, SUB, SMUL, UMUL, + INC, DEC, + + // MUL_IMM - X86 specific multiply by immediate. + MUL_IMM + }; + } + + /// Define some predicates that are used for node matching. + namespace X86 { + /// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand + /// specifies a shuffle of elements that is suitable for input to PSHUFD. + bool isPSHUFDMask(ShuffleVectorSDNode *N); + + /// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand + /// specifies a shuffle of elements that is suitable for input to PSHUFD. + bool isPSHUFHWMask(ShuffleVectorSDNode *N); + + /// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand + /// specifies a shuffle of elements that is suitable for input to PSHUFD. + bool isPSHUFLWMask(ShuffleVectorSDNode *N); + + /// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand + /// specifies a shuffle of elements that is suitable for input to SHUFP*. + bool isSHUFPMask(ShuffleVectorSDNode *N); + + /// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand + /// specifies a shuffle of elements that is suitable for input to MOVHLPS. + bool isMOVHLPSMask(ShuffleVectorSDNode *N); + + /// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form + /// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef, + /// <2, 3, 2, 3> + bool isMOVHLPS_v_undef_Mask(ShuffleVectorSDNode *N); + + /// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand + /// specifies a shuffle of elements that is suitable for MOVLP{S|D}. + bool isMOVLPMask(ShuffleVectorSDNode *N); + + /// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand + /// specifies a shuffle of elements that is suitable for MOVHP{S|D}. + /// as well as MOVLHPS. + bool isMOVHPMask(ShuffleVectorSDNode *N); + + /// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand + /// specifies a shuffle of elements that is suitable for input to UNPCKL. + bool isUNPCKLMask(ShuffleVectorSDNode *N, bool V2IsSplat = false); + + /// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand + /// specifies a shuffle of elements that is suitable for input to UNPCKH. + bool isUNPCKHMask(ShuffleVectorSDNode *N, bool V2IsSplat = false); + + /// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form + /// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef, + /// <0, 0, 1, 1> + bool isUNPCKL_v_undef_Mask(ShuffleVectorSDNode *N); + + /// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form + /// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef, + /// <2, 2, 3, 3> + bool isUNPCKH_v_undef_Mask(ShuffleVectorSDNode *N); + + /// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand + /// specifies a shuffle of elements that is suitable for input to MOVSS, + /// MOVSD, and MOVD, i.e. setting the lowest element. + bool isMOVLMask(ShuffleVectorSDNode *N); + + /// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand + /// specifies a shuffle of elements that is suitable for input to MOVSHDUP. + bool isMOVSHDUPMask(ShuffleVectorSDNode *N); + + /// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand + /// specifies a shuffle of elements that is suitable for input to MOVSLDUP. + bool isMOVSLDUPMask(ShuffleVectorSDNode *N); + + /// isMOVDDUPMask - Return true if the specified VECTOR_SHUFFLE operand + /// specifies a shuffle of elements that is suitable for input to MOVDDUP. + bool isMOVDDUPMask(ShuffleVectorSDNode *N); + + /// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle + /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP* + /// instructions. + unsigned getShuffleSHUFImmediate(SDNode *N); + + /// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle + /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW + /// instructions. + unsigned getShufflePSHUFHWImmediate(SDNode *N); + + /// getShufflePSHUFKWImmediate - Return the appropriate immediate to shuffle + /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW + /// instructions. + unsigned getShufflePSHUFLWImmediate(SDNode *N); + } + + //===--------------------------------------------------------------------===// + // X86TargetLowering - X86 Implementation of the TargetLowering interface + class X86TargetLowering : public TargetLowering { + int VarArgsFrameIndex; // FrameIndex for start of varargs area. + int RegSaveFrameIndex; // X86-64 vararg func register save area. + unsigned VarArgsGPOffset; // X86-64 vararg func int reg offset. + unsigned VarArgsFPOffset; // X86-64 vararg func fp reg offset. + int BytesToPopOnReturn; // Number of arg bytes ret should pop. + int BytesCallerReserves; // Number of arg bytes caller makes. + + public: + explicit X86TargetLowering(X86TargetMachine &TM); + + /// getPICJumpTableRelocaBase - Returns relocation base for the given PIC + /// jumptable. + SDValue getPICJumpTableRelocBase(SDValue Table, + SelectionDAG &DAG) const; + + // Return the number of bytes that a function should pop when it returns (in + // addition to the space used by the return address). + // + unsigned getBytesToPopOnReturn() const { return BytesToPopOnReturn; } + + // Return the number of bytes that the caller reserves for arguments passed + // to this function. + unsigned getBytesCallerReserves() const { return BytesCallerReserves; } + + /// getStackPtrReg - Return the stack pointer register we are using: either + /// ESP or RSP. + unsigned getStackPtrReg() const { return X86StackPtr; } + + /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate + /// function arguments in the caller parameter area. For X86, aggregates + /// that contains are placed at 16-byte boundaries while the rest are at + /// 4-byte boundaries. + virtual unsigned getByValTypeAlignment(const Type *Ty) const; + + /// getOptimalMemOpType - Returns the target specific optimal type for load + /// and store operations as a result of memset, memcpy, and memmove + /// lowering. It returns MVT::iAny if SelectionDAG should be responsible for + /// determining it. + virtual + MVT getOptimalMemOpType(uint64_t Size, unsigned Align, + bool isSrcConst, bool isSrcStr) const; + + /// LowerOperation - Provide custom lowering hooks for some operations. + /// + virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG); + + /// ReplaceNodeResults - Replace the results of node with an illegal result + /// type with new values built out of custom code. + /// + virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl&Results, + SelectionDAG &DAG); + + + virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; + + virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *MBB) const; + + + /// getTargetNodeName - This method returns the name of a target specific + /// DAG node. + virtual const char *getTargetNodeName(unsigned Opcode) const; + + /// getSetCCResultType - Return the ISD::SETCC ValueType + virtual MVT getSetCCResultType(MVT VT) const; + + /// computeMaskedBitsForTargetNode - Determine which of the bits specified + /// in Mask are known to be either zero or one and return them in the + /// KnownZero/KnownOne bitsets. + virtual void computeMaskedBitsForTargetNode(const SDValue Op, + const APInt &Mask, + APInt &KnownZero, + APInt &KnownOne, + const SelectionDAG &DAG, + unsigned Depth = 0) const; + + virtual bool + isGAPlusOffset(SDNode *N, GlobalValue* &GA, int64_t &Offset) const; + + SDValue getReturnAddressFrameIndex(SelectionDAG &DAG); + + ConstraintType getConstraintType(const std::string &Constraint) const; + + std::vector + getRegClassForInlineAsmConstraint(const std::string &Constraint, + MVT VT) const; + + virtual const char *LowerXConstraint(MVT ConstraintVT) const; + + /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops + /// vector. If it is invalid, don't add anything to Ops. If hasMemory is + /// true it means one of the asm constraint of the inline asm instruction + /// being processed is 'm'. + virtual void LowerAsmOperandForConstraint(SDValue Op, + char ConstraintLetter, + bool hasMemory, + std::vector &Ops, + SelectionDAG &DAG) const; + + /// getRegForInlineAsmConstraint - Given a physical register constraint + /// (e.g. {edx}), return the register number and the register class for the + /// register. This should only be used for C_Register constraints. On + /// error, this returns a register number of 0. + std::pair + getRegForInlineAsmConstraint(const std::string &Constraint, + MVT VT) const; + + /// isLegalAddressingMode - Return true if the addressing mode represented + /// by AM is legal for this target, for a load/store of the specified type. + virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty)const; + + /// isTruncateFree - Return true if it's free to truncate a value of + /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in + /// register EAX to i16 by referencing its sub-register AX. + virtual bool isTruncateFree(const Type *Ty1, const Type *Ty2) const; + virtual bool isTruncateFree(MVT VT1, MVT VT2) const; + + /// isZExtFree - Return true if any actual instruction that defines a + /// value of type Ty1 implicit zero-extends the value to Ty2 in the result + /// register. This does not necessarily include registers defined in + /// unknown ways, such as incoming arguments, or copies from unknown + /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this + /// does not necessarily apply to truncate instructions. e.g. on x86-64, + /// all instructions that define 32-bit values implicit zero-extend the + /// result out to 64 bits. + virtual bool isZExtFree(const Type *Ty1, const Type *Ty2) const; + virtual bool isZExtFree(MVT VT1, MVT VT2) const; + + /// isNarrowingProfitable - Return true if it's profitable to narrow + /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow + /// from i32 to i8 but not from i32 to i16. + virtual bool isNarrowingProfitable(MVT VT1, MVT VT2) const; + + /// isShuffleMaskLegal - Targets can use this to indicate that they only + /// support *some* VECTOR_SHUFFLE operations, those with specific masks. + /// By default, if a target supports the VECTOR_SHUFFLE node, all mask + /// values are assumed to be legal. + virtual bool isShuffleMaskLegal(const SmallVectorImpl &Mask, + MVT VT) const; + + /// isVectorClearMaskLegal - Similar to isShuffleMaskLegal. This is + /// used by Targets can use this to indicate if there is a suitable + /// VECTOR_SHUFFLE that can be used to replace a VAND with a constant + /// pool entry. + virtual bool isVectorClearMaskLegal(const SmallVectorImpl &Mask, + MVT VT) const; + + /// ShouldShrinkFPConstant - If true, then instruction selection should + /// seek to shrink the FP constant of the specified type to a smaller type + /// in order to save space and / or reduce runtime. + virtual bool ShouldShrinkFPConstant(MVT VT) const { + // Don't shrink FP constpool if SSE2 is available since cvtss2sd is more + // expensive than a straight movsd. On the other hand, it's important to + // shrink long double fp constant since fldt is very slow. + return !X86ScalarSSEf64 || VT == MVT::f80; + } + + /// IsEligibleForTailCallOptimization - Check whether the call is eligible + /// for tail call optimization. Target which want to do tail call + /// optimization should implement this function. + virtual bool IsEligibleForTailCallOptimization(CallSDNode *TheCall, + SDValue Ret, + SelectionDAG &DAG) const; + + virtual const X86Subtarget* getSubtarget() { + return Subtarget; + } + + /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is + /// computed in an SSE register, not on the X87 floating point stack. + bool isScalarFPTypeInSSEReg(MVT VT) const { + return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2 + (VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1 + } + + /// getWidenVectorType: given a vector type, returns the type to widen + /// to (e.g., v7i8 to v8i8). If the vector type is legal, it returns itself. + /// If there is no vector type that we want to widen to, returns MVT::Other + /// When and were to widen is target dependent based on the cost of + /// scalarizing vs using the wider vector type. + virtual MVT getWidenVectorType(MVT VT) const; + + /// createFastISel - This method returns a target specific FastISel object, + /// or null if the target does not support "fast" ISel. + virtual FastISel * + createFastISel(MachineFunction &mf, + MachineModuleInfo *mmi, DwarfWriter *dw, + DenseMap &, + DenseMap &, + DenseMap & +#ifndef NDEBUG + , SmallSet & +#endif + ); + + private: + /// Subtarget - Keep a pointer to the X86Subtarget around so that we can + /// make the right decision when generating code for different targets. + const X86Subtarget *Subtarget; + const X86RegisterInfo *RegInfo; + const TargetData *TD; + + /// X86StackPtr - X86 physical register used as stack ptr. + unsigned X86StackPtr; + + /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87 + /// floating point ops. + /// When SSE is available, use it for f32 operations. + /// When SSE2 is available, use it for f64 operations. + bool X86ScalarSSEf32; + bool X86ScalarSSEf64; + + SDNode *LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall, + unsigned CallingConv, SelectionDAG &DAG); + + SDValue LowerMemArgument(SDValue Op, SelectionDAG &DAG, + const CCValAssign &VA, MachineFrameInfo *MFI, + unsigned CC, SDValue Root, unsigned i); + + SDValue LowerMemOpCallTo(CallSDNode *TheCall, SelectionDAG &DAG, + const SDValue &StackPtr, + const CCValAssign &VA, SDValue Chain, + SDValue Arg, ISD::ArgFlagsTy Flags); + + // Call lowering helpers. + bool IsCalleePop(bool isVarArg, unsigned CallingConv); + bool CallRequiresGOTPtrInReg(bool Is64Bit, bool IsTailCall); + bool CallRequiresFnAddressInReg(bool Is64Bit, bool IsTailCall); + SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr, + SDValue Chain, bool IsTailCall, bool Is64Bit, + int FPDiff, DebugLoc dl); + + CCAssignFn *CCAssignFnForNode(unsigned CallingConv) const; + NameDecorationStyle NameDecorationForFORMAL_ARGUMENTS(SDValue Op); + unsigned GetAlignedArgumentStackSize(unsigned StackSize, SelectionDAG &DAG); + + std::pair FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, + bool isSigned); + + SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG); + SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG); + SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG); + SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG); + SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG); + SDValue LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG); + SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG); + SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG); + SDValue LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl, + int64_t Offset, SelectionDAG &DAG) const; + SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG); + SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG); + SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG); + SDValue LowerShift(SDValue Op, SelectionDAG &DAG); + SDValue BuildFILD(SDValue Op, MVT SrcVT, SDValue Chain, SDValue StackSlot, + SelectionDAG &DAG); + SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG); + SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG); + SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG); + SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG); + SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG); + SDValue LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG); + SDValue LowerFABS(SDValue Op, SelectionDAG &DAG); + SDValue LowerFNEG(SDValue Op, SelectionDAG &DAG); + SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG); + SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG); + SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG); + SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG); + SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG); + SDValue LowerMEMSET(SDValue Op, SelectionDAG &DAG); + SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG); + SDValue LowerCALL(SDValue Op, SelectionDAG &DAG); + SDValue LowerRET(SDValue Op, SelectionDAG &DAG); + SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG); + SDValue LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG); + SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG); + SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG); + SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG); + SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG); + SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG); + SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG); + SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG); + SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG); + SDValue LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG); + SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG); + SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG); + SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG); + SDValue LowerMUL_V2I64(SDValue Op, SelectionDAG &DAG); + SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG); + + SDValue LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG); + SDValue LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG); + SDValue LowerREADCYCLECOUNTER(SDValue Op, SelectionDAG &DAG); + + void ReplaceATOMIC_BINARY_64(SDNode *N, SmallVectorImpl &Results, + SelectionDAG &DAG, unsigned NewOp); + + SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, + SDValue Chain, + SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, + const Value *DstSV, uint64_t DstSVOff); + SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, + SDValue Chain, + SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, + bool AlwaysInline, + const Value *DstSV, uint64_t DstSVOff, + const Value *SrcSV, uint64_t SrcSVOff); + + /// Utility function to emit atomic bitwise operations (and, or, xor). + // It takes the bitwise instruction to expand, the associated machine basic + // block, and the associated X86 opcodes for reg/reg and reg/imm. + MachineBasicBlock *EmitAtomicBitwiseWithCustomInserter( + MachineInstr *BInstr, + MachineBasicBlock *BB, + unsigned regOpc, + unsigned immOpc, + unsigned loadOpc, + unsigned cxchgOpc, + unsigned copyOpc, + unsigned notOpc, + unsigned EAXreg, + TargetRegisterClass *RC, + bool invSrc = false) const; + + MachineBasicBlock *EmitAtomicBit6432WithCustomInserter( + MachineInstr *BInstr, + MachineBasicBlock *BB, + unsigned regOpcL, + unsigned regOpcH, + unsigned immOpcL, + unsigned immOpcH, + bool invSrc = false) const; + + /// Utility function to emit atomic min and max. It takes the min/max + /// instruction to expand, the associated basic block, and the associated + /// cmov opcode for moving the min or max value. + MachineBasicBlock *EmitAtomicMinMaxWithCustomInserter(MachineInstr *BInstr, + MachineBasicBlock *BB, + unsigned cmovOpc) const; + + /// Emit nodes that will be selected as "test Op0,Op0", or something + /// equivalent, for use with the given x86 condition code. + SDValue EmitTest(SDValue Op0, unsigned X86CC, SelectionDAG &DAG); + + /// Emit nodes that will be selected as "cmp Op0,Op1", or something + /// equivalent, for use with the given x86 condition code. + SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, + SelectionDAG &DAG); + }; + + namespace X86 { + FastISel *createFastISel(MachineFunction &mf, + MachineModuleInfo *mmi, DwarfWriter *dw, + DenseMap &, + DenseMap &, + DenseMap & +#ifndef NDEBUG + , SmallSet & +#endif + ); + } +} + +#endif // X86ISELLOWERING_H diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td new file mode 100644 index 000000000000..dc15e4aa4ee9 --- /dev/null +++ b/lib/Target/X86/X86Instr64bit.td @@ -0,0 +1,1937 @@ +//====- X86Instr64bit.td - Describe X86-64 Instructions ----*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the X86-64 instruction set, defining the instructions, +// and properties of the instructions which are needed for code generation, +// machine code emission, and analysis. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Operand Definitions. +// + +// 64-bits but only 32 bits are significant. +def i64i32imm : Operand; +// 64-bits but only 8 bits are significant. +def i64i8imm : Operand; + +def lea64mem : Operand { + let PrintMethod = "printlea64mem"; + let MIOperandInfo = (ops GR64, i8imm, GR64, i32imm); +} + +def lea64_32mem : Operand { + let PrintMethod = "printlea64_32mem"; + let MIOperandInfo = (ops GR32, i8imm, GR32, i32imm); +} + +//===----------------------------------------------------------------------===// +// Complex Pattern Definitions. +// +def lea64addr : ComplexPattern; + +//===----------------------------------------------------------------------===// +// Pattern fragments. +// + +def i64immSExt8 : PatLeaf<(i64 imm), [{ + // i64immSExt8 predicate - True if the 64-bit immediate fits in a 8-bit + // sign extended field. + return (int64_t)N->getZExtValue() == (int8_t)N->getZExtValue(); +}]>; + +def i64immSExt32 : PatLeaf<(i64 imm), [{ + // i64immSExt32 predicate - True if the 64-bit immediate fits in a 32-bit + // sign extended field. + return (int64_t)N->getZExtValue() == (int32_t)N->getZExtValue(); +}]>; + +def i64immZExt32 : PatLeaf<(i64 imm), [{ + // i64immZExt32 predicate - True if the 64-bit immediate fits in a 32-bit + // unsignedsign extended field. + return (uint64_t)N->getZExtValue() == (uint32_t)N->getZExtValue(); +}]>; + +def sextloadi64i8 : PatFrag<(ops node:$ptr), (i64 (sextloadi8 node:$ptr))>; +def sextloadi64i16 : PatFrag<(ops node:$ptr), (i64 (sextloadi16 node:$ptr))>; +def sextloadi64i32 : PatFrag<(ops node:$ptr), (i64 (sextloadi32 node:$ptr))>; + +def zextloadi64i1 : PatFrag<(ops node:$ptr), (i64 (zextloadi1 node:$ptr))>; +def zextloadi64i8 : PatFrag<(ops node:$ptr), (i64 (zextloadi8 node:$ptr))>; +def zextloadi64i16 : PatFrag<(ops node:$ptr), (i64 (zextloadi16 node:$ptr))>; +def zextloadi64i32 : PatFrag<(ops node:$ptr), (i64 (zextloadi32 node:$ptr))>; + +def extloadi64i1 : PatFrag<(ops node:$ptr), (i64 (extloadi1 node:$ptr))>; +def extloadi64i8 : PatFrag<(ops node:$ptr), (i64 (extloadi8 node:$ptr))>; +def extloadi64i16 : PatFrag<(ops node:$ptr), (i64 (extloadi16 node:$ptr))>; +def extloadi64i32 : PatFrag<(ops node:$ptr), (i64 (extloadi32 node:$ptr))>; + +//===----------------------------------------------------------------------===// +// Instruction list... +// + +// ADJCALLSTACKDOWN/UP implicitly use/def RSP because they may be expanded into +// a stack adjustment and the codegen must know that they may modify the stack +// pointer before prolog-epilog rewriting occurs. +// Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become +// sub / add which can clobber EFLAGS. +let Defs = [RSP, EFLAGS], Uses = [RSP] in { +def ADJCALLSTACKDOWN64 : I<0, Pseudo, (outs), (ins i32imm:$amt), + "#ADJCALLSTACKDOWN", + [(X86callseq_start timm:$amt)]>, + Requires<[In64BitMode]>; +def ADJCALLSTACKUP64 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2), + "#ADJCALLSTACKUP", + [(X86callseq_end timm:$amt1, timm:$amt2)]>, + Requires<[In64BitMode]>; +} + +//===----------------------------------------------------------------------===// +// Call Instructions... +// +let isCall = 1 in + // All calls clobber the non-callee saved registers. RSP is marked as + // a use to prevent stack-pointer assignments that appear immediately + // before calls from potentially appearing dead. Uses for argument + // registers are added manually. + let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11, + FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1, + MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, + XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, + XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS], + Uses = [RSP] in { + + // NOTE: this pattern doesn't match "X86call imm", because we do not know + // that the offset between an arbitrary immediate and the call will fit in + // the 32-bit pcrel field that we have. + def CALL64pcrel32 : I<0xE8, RawFrm, + (outs), (ins i64i32imm:$dst, variable_ops), + "call\t${dst:call}", []>, + Requires<[In64BitMode]>; + def CALL64r : I<0xFF, MRM2r, (outs), (ins GR64:$dst, variable_ops), + "call\t{*}$dst", [(X86call GR64:$dst)]>; + def CALL64m : I<0xFF, MRM2m, (outs), (ins i64mem:$dst, variable_ops), + "call\t{*}$dst", [(X86call (loadi64 addr:$dst))]>; + } + + + +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in +def TCRETURNdi64 : I<0, Pseudo, (outs), (ins i64imm:$dst, i32imm:$offset, + variable_ops), + "#TC_RETURN $dst $offset", + []>; + +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in +def TCRETURNri64 : I<0, Pseudo, (outs), (ins GR64:$dst, i32imm:$offset, + variable_ops), + "#TC_RETURN $dst $offset", + []>; + + +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in + def TAILJMPr64 : I<0xFF, MRM4r, (outs), (ins GR64:$dst), + "jmp{q}\t{*}$dst # TAILCALL", + []>; + +// Branches +let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { + def JMP64r : I<0xFF, MRM4r, (outs), (ins GR64:$dst), "jmp{q}\t{*}$dst", + [(brind GR64:$dst)]>; + def JMP64m : I<0xFF, MRM4m, (outs), (ins i64mem:$dst), "jmp{q}\t{*}$dst", + [(brind (loadi64 addr:$dst))]>; +} + +//===----------------------------------------------------------------------===// +// EH Pseudo Instructions +// +let isTerminator = 1, isReturn = 1, isBarrier = 1, + hasCtrlDep = 1 in { +def EH_RETURN64 : I<0xC3, RawFrm, (outs), (ins GR64:$addr), + "ret\t#eh_return, addr: $addr", + [(X86ehret GR64:$addr)]>; + +} + +//===----------------------------------------------------------------------===// +// Miscellaneous Instructions... +// +let Defs = [RBP,RSP], Uses = [RBP,RSP], mayLoad = 1, neverHasSideEffects = 1 in +def LEAVE64 : I<0xC9, RawFrm, + (outs), (ins), "leave", []>; +let Defs = [RSP], Uses = [RSP], neverHasSideEffects=1 in { +let mayLoad = 1 in +def POP64r : I<0x58, AddRegFrm, + (outs GR64:$reg), (ins), "pop{q}\t$reg", []>; +let mayStore = 1 in +def PUSH64r : I<0x50, AddRegFrm, + (outs), (ins GR64:$reg), "push{q}\t$reg", []>; +} + +let Defs = [RSP, EFLAGS], Uses = [RSP], mayLoad = 1 in +def POPFQ : I<0x9D, RawFrm, (outs), (ins), "popf", []>, REX_W; +let Defs = [RSP], Uses = [RSP, EFLAGS], mayStore = 1 in +def PUSHFQ : I<0x9C, RawFrm, (outs), (ins), "pushf", []>; + +def LEA64_32r : I<0x8D, MRMSrcMem, + (outs GR32:$dst), (ins lea64_32mem:$src), + "lea{l}\t{$src|$dst}, {$dst|$src}", + [(set GR32:$dst, lea32addr:$src)]>, Requires<[In64BitMode]>; + +let isReMaterializable = 1 in +def LEA64r : RI<0x8D, MRMSrcMem, (outs GR64:$dst), (ins lea64mem:$src), + "lea{q}\t{$src|$dst}, {$dst|$src}", + [(set GR64:$dst, lea64addr:$src)]>; + +let isTwoAddress = 1 in +def BSWAP64r : RI<0xC8, AddRegFrm, (outs GR64:$dst), (ins GR64:$src), + "bswap{q}\t$dst", + [(set GR64:$dst, (bswap GR64:$src))]>, TB; + +// Bit scan instructions. +let Defs = [EFLAGS] in { +def BSF64rr : RI<0xBC, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), + "bsf{q}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, (X86bsf GR64:$src)), (implicit EFLAGS)]>, TB; +def BSF64rm : RI<0xBC, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), + "bsf{q}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, (X86bsf (loadi64 addr:$src))), + (implicit EFLAGS)]>, TB; + +def BSR64rr : RI<0xBD, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), + "bsr{q}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, (X86bsr GR64:$src)), (implicit EFLAGS)]>, TB; +def BSR64rm : RI<0xBD, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), + "bsr{q}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, (X86bsr (loadi64 addr:$src))), + (implicit EFLAGS)]>, TB; +} // Defs = [EFLAGS] + +// Repeat string ops +let Defs = [RCX,RDI,RSI], Uses = [RCX,RDI,RSI] in +def REP_MOVSQ : RI<0xA5, RawFrm, (outs), (ins), "{rep;movsq|rep movsq}", + [(X86rep_movs i64)]>, REP; +let Defs = [RCX,RDI], Uses = [RAX,RCX,RDI] in +def REP_STOSQ : RI<0xAB, RawFrm, (outs), (ins), "{rep;stosq|rep stosq}", + [(X86rep_stos i64)]>, REP; + +//===----------------------------------------------------------------------===// +// Move Instructions... +// + +let neverHasSideEffects = 1 in +def MOV64rr : RI<0x89, MRMDestReg, (outs GR64:$dst), (ins GR64:$src), + "mov{q}\t{$src, $dst|$dst, $src}", []>; + +let isReMaterializable = 1, isAsCheapAsAMove = 1 in { +def MOV64ri : RIi64<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64imm:$src), + "movabs{q}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, imm:$src)]>; +def MOV64ri32 : RIi32<0xC7, MRM0r, (outs GR64:$dst), (ins i64i32imm:$src), + "mov{q}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, i64immSExt32:$src)]>; +} + +let canFoldAsLoad = 1 in +def MOV64rm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), + "mov{q}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, (load addr:$src))]>; + +def MOV64mr : RI<0x89, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), + "mov{q}\t{$src, $dst|$dst, $src}", + [(store GR64:$src, addr:$dst)]>; +def MOV64mi32 : RIi32<0xC7, MRM0m, (outs), (ins i64mem:$dst, i64i32imm:$src), + "mov{q}\t{$src, $dst|$dst, $src}", + [(store i64immSExt32:$src, addr:$dst)]>; + +// Sign/Zero extenders + +// MOVSX64rr8 always has a REX prefix and it has an 8-bit register +// operand, which makes it a rare instruction with an 8-bit register +// operand that can never access an h register. If support for h registers +// were generalized, this would require a special register class. +def MOVSX64rr8 : RI<0xBE, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src), + "movs{bq|x}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, (sext GR8:$src))]>, TB; +def MOVSX64rm8 : RI<0xBE, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src), + "movs{bq|x}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, (sextloadi64i8 addr:$src))]>, TB; +def MOVSX64rr16: RI<0xBF, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src), + "movs{wq|x}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, (sext GR16:$src))]>, TB; +def MOVSX64rm16: RI<0xBF, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src), + "movs{wq|x}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, (sextloadi64i16 addr:$src))]>, TB; +def MOVSX64rr32: RI<0x63, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src), + "movs{lq|xd}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, (sext GR32:$src))]>; +def MOVSX64rm32: RI<0x63, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src), + "movs{lq|xd}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, (sextloadi64i32 addr:$src))]>; + +// Use movzbl instead of movzbq when the destination is a register; it's +// equivalent due to implicit zero-extending, and it has a smaller encoding. +def MOVZX64rr8 : I<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src), + "movz{bl|x}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}", + [(set GR64:$dst, (zext GR8:$src))]>, TB; +def MOVZX64rm8 : I<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src), + "movz{bl|x}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}", + [(set GR64:$dst, (zextloadi64i8 addr:$src))]>, TB; +// Use movzwl instead of movzwq when the destination is a register; it's +// equivalent due to implicit zero-extending, and it has a smaller encoding. +def MOVZX64rr16: I<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src), + "movz{wl|x}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}", + [(set GR64:$dst, (zext GR16:$src))]>, TB; +def MOVZX64rm16: I<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src), + "movz{wl|x}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}", + [(set GR64:$dst, (zextloadi64i16 addr:$src))]>, TB; + +// There's no movzlq instruction, but movl can be used for this purpose, using +// implicit zero-extension. The preferred way to do 32-bit-to-64-bit zero +// extension on x86-64 is to use a SUBREG_TO_REG to utilize implicit +// zero-extension, however this isn't possible when the 32-bit value is +// defined by a truncate or is copied from something where the high bits aren't +// necessarily all zero. In such cases, we fall back to these explicit zext +// instructions. +def MOVZX64rr32 : I<0x89, MRMDestReg, (outs GR64:$dst), (ins GR32:$src), + "mov{l}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}", + [(set GR64:$dst, (zext GR32:$src))]>; +def MOVZX64rm32 : I<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src), + "mov{l}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}", + [(set GR64:$dst, (zextloadi64i32 addr:$src))]>; + +// Any instruction that defines a 32-bit result leaves the high half of the +// register. Truncate can be lowered to EXTRACT_SUBREG, and CopyFromReg may +// be copying from a truncate, but any other 32-bit operation will zero-extend +// up to 64 bits. +def def32 : PatLeaf<(i32 GR32:$src), [{ + return N->getOpcode() != ISD::TRUNCATE && + N->getOpcode() != TargetInstrInfo::EXTRACT_SUBREG && + N->getOpcode() != ISD::CopyFromReg; +}]>; + +// In the case of a 32-bit def that is known to implicitly zero-extend, +// we can use a SUBREG_TO_REG. +def : Pat<(i64 (zext def32:$src)), + (SUBREG_TO_REG (i64 0), GR32:$src, x86_subreg_32bit)>; + +let neverHasSideEffects = 1 in { + let Defs = [RAX], Uses = [EAX] in + def CDQE : RI<0x98, RawFrm, (outs), (ins), + "{cltq|cdqe}", []>; // RAX = signext(EAX) + + let Defs = [RAX,RDX], Uses = [RAX] in + def CQO : RI<0x99, RawFrm, (outs), (ins), + "{cqto|cqo}", []>; // RDX:RAX = signext(RAX) +} + +//===----------------------------------------------------------------------===// +// Arithmetic Instructions... +// + +let Defs = [EFLAGS] in { +let isTwoAddress = 1 in { +let isConvertibleToThreeAddress = 1 in { +let isCommutable = 1 in +// Register-Register Addition +def ADD64rr : RI<0x01, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), + "add{q}\t{$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (add GR64:$src1, GR64:$src2)), + (implicit EFLAGS)]>; + +// Register-Integer Addition +def ADD64ri8 : RIi8<0x83, MRM0r, (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2), + "add{q}\t{$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (add GR64:$src1, i64immSExt8:$src2)), + (implicit EFLAGS)]>; +def ADD64ri32 : RIi32<0x81, MRM0r, (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2), + "add{q}\t{$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (add GR64:$src1, i64immSExt32:$src2)), + (implicit EFLAGS)]>; +} // isConvertibleToThreeAddress + +// Register-Memory Addition +def ADD64rm : RI<0x03, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), + "add{q}\t{$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (add GR64:$src1, (load addr:$src2))), + (implicit EFLAGS)]>; +} // isTwoAddress + +// Memory-Register Addition +def ADD64mr : RI<0x01, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), + "add{q}\t{$src2, $dst|$dst, $src2}", + [(store (add (load addr:$dst), GR64:$src2), addr:$dst), + (implicit EFLAGS)]>; +def ADD64mi8 : RIi8<0x83, MRM0m, (outs), (ins i64mem:$dst, i64i8imm :$src2), + "add{q}\t{$src2, $dst|$dst, $src2}", + [(store (add (load addr:$dst), i64immSExt8:$src2), addr:$dst), + (implicit EFLAGS)]>; +def ADD64mi32 : RIi32<0x81, MRM0m, (outs), (ins i64mem:$dst, i64i32imm :$src2), + "add{q}\t{$src2, $dst|$dst, $src2}", + [(store (add (load addr:$dst), i64immSExt32:$src2), addr:$dst), + (implicit EFLAGS)]>; + +let Uses = [EFLAGS] in { +let isTwoAddress = 1 in { +let isCommutable = 1 in +def ADC64rr : RI<0x11, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), + "adc{q}\t{$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (adde GR64:$src1, GR64:$src2))]>; + +def ADC64rm : RI<0x13, MRMSrcMem , (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), + "adc{q}\t{$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (adde GR64:$src1, (load addr:$src2)))]>; + +def ADC64ri8 : RIi8<0x83, MRM2r, (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2), + "adc{q}\t{$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (adde GR64:$src1, i64immSExt8:$src2))]>; +def ADC64ri32 : RIi32<0x81, MRM2r, (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2), + "adc{q}\t{$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (adde GR64:$src1, i64immSExt32:$src2))]>; +} // isTwoAddress + +def ADC64mr : RI<0x11, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), + "adc{q}\t{$src2, $dst|$dst, $src2}", + [(store (adde (load addr:$dst), GR64:$src2), addr:$dst)]>; +def ADC64mi8 : RIi8<0x83, MRM2m, (outs), (ins i64mem:$dst, i64i8imm :$src2), + "adc{q}\t{$src2, $dst|$dst, $src2}", + [(store (adde (load addr:$dst), i64immSExt8:$src2), addr:$dst)]>; +def ADC64mi32 : RIi32<0x81, MRM2m, (outs), (ins i64mem:$dst, i64i32imm:$src2), + "adc{q}\t{$src2, $dst|$dst, $src2}", + [(store (adde (load addr:$dst), i64immSExt8:$src2), addr:$dst)]>; +} // Uses = [EFLAGS] + +let isTwoAddress = 1 in { +// Register-Register Subtraction +def SUB64rr : RI<0x29, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), + "sub{q}\t{$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (sub GR64:$src1, GR64:$src2)), + (implicit EFLAGS)]>; + +// Register-Memory Subtraction +def SUB64rm : RI<0x2B, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), + "sub{q}\t{$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (sub GR64:$src1, (load addr:$src2))), + (implicit EFLAGS)]>; + +// Register-Integer Subtraction +def SUB64ri8 : RIi8<0x83, MRM5r, (outs GR64:$dst), + (ins GR64:$src1, i64i8imm:$src2), + "sub{q}\t{$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (sub GR64:$src1, i64immSExt8:$src2)), + (implicit EFLAGS)]>; +def SUB64ri32 : RIi32<0x81, MRM5r, (outs GR64:$dst), + (ins GR64:$src1, i64i32imm:$src2), + "sub{q}\t{$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (sub GR64:$src1, i64immSExt32:$src2)), + (implicit EFLAGS)]>; +} // isTwoAddress + +// Memory-Register Subtraction +def SUB64mr : RI<0x29, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), + "sub{q}\t{$src2, $dst|$dst, $src2}", + [(store (sub (load addr:$dst), GR64:$src2), addr:$dst), + (implicit EFLAGS)]>; + +// Memory-Integer Subtraction +def SUB64mi8 : RIi8<0x83, MRM5m, (outs), (ins i64mem:$dst, i64i8imm :$src2), + "sub{q}\t{$src2, $dst|$dst, $src2}", + [(store (sub (load addr:$dst), i64immSExt8:$src2), + addr:$dst), + (implicit EFLAGS)]>; +def SUB64mi32 : RIi32<0x81, MRM5m, (outs), (ins i64mem:$dst, i64i32imm:$src2), + "sub{q}\t{$src2, $dst|$dst, $src2}", + [(store (sub (load addr:$dst), i64immSExt32:$src2), + addr:$dst), + (implicit EFLAGS)]>; + +let Uses = [EFLAGS] in { +let isTwoAddress = 1 in { +def SBB64rr : RI<0x19, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), + "sbb{q}\t{$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (sube GR64:$src1, GR64:$src2))]>; + +def SBB64rm : RI<0x1B, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), + "sbb{q}\t{$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (sube GR64:$src1, (load addr:$src2)))]>; + +def SBB64ri8 : RIi8<0x83, MRM3r, (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2), + "sbb{q}\t{$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (sube GR64:$src1, i64immSExt8:$src2))]>; +def SBB64ri32 : RIi32<0x81, MRM3r, (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2), + "sbb{q}\t{$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (sube GR64:$src1, i64immSExt32:$src2))]>; +} // isTwoAddress + +def SBB64mr : RI<0x19, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), + "sbb{q}\t{$src2, $dst|$dst, $src2}", + [(store (sube (load addr:$dst), GR64:$src2), addr:$dst)]>; +def SBB64mi8 : RIi8<0x83, MRM3m, (outs), (ins i64mem:$dst, i64i8imm :$src2), + "sbb{q}\t{$src2, $dst|$dst, $src2}", + [(store (sube (load addr:$dst), i64immSExt8:$src2), addr:$dst)]>; +def SBB64mi32 : RIi32<0x81, MRM3m, (outs), (ins i64mem:$dst, i64i32imm:$src2), + "sbb{q}\t{$src2, $dst|$dst, $src2}", + [(store (sube (load addr:$dst), i64immSExt32:$src2), addr:$dst)]>; +} // Uses = [EFLAGS] +} // Defs = [EFLAGS] + +// Unsigned multiplication +let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], neverHasSideEffects = 1 in { +def MUL64r : RI<0xF7, MRM4r, (outs), (ins GR64:$src), + "mul{q}\t$src", []>; // RAX,RDX = RAX*GR64 +let mayLoad = 1 in +def MUL64m : RI<0xF7, MRM4m, (outs), (ins i64mem:$src), + "mul{q}\t$src", []>; // RAX,RDX = RAX*[mem64] + +// Signed multiplication +def IMUL64r : RI<0xF7, MRM5r, (outs), (ins GR64:$src), + "imul{q}\t$src", []>; // RAX,RDX = RAX*GR64 +let mayLoad = 1 in +def IMUL64m : RI<0xF7, MRM5m, (outs), (ins i64mem:$src), + "imul{q}\t$src", []>; // RAX,RDX = RAX*[mem64] +} + +let Defs = [EFLAGS] in { +let isTwoAddress = 1 in { +let isCommutable = 1 in +// Register-Register Signed Integer Multiplication +def IMUL64rr : RI<0xAF, MRMSrcReg, (outs GR64:$dst), + (ins GR64:$src1, GR64:$src2), + "imul{q}\t{$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (mul GR64:$src1, GR64:$src2)), + (implicit EFLAGS)]>, TB; + +// Register-Memory Signed Integer Multiplication +def IMUL64rm : RI<0xAF, MRMSrcMem, (outs GR64:$dst), + (ins GR64:$src1, i64mem:$src2), + "imul{q}\t{$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (mul GR64:$src1, (load addr:$src2))), + (implicit EFLAGS)]>, TB; +} // isTwoAddress + +// Suprisingly enough, these are not two address instructions! + +// Register-Integer Signed Integer Multiplication +def IMUL64rri8 : RIi8<0x6B, MRMSrcReg, // GR64 = GR64*I8 + (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2), + "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set GR64:$dst, (mul GR64:$src1, i64immSExt8:$src2)), + (implicit EFLAGS)]>; +def IMUL64rri32 : RIi32<0x69, MRMSrcReg, // GR64 = GR64*I32 + (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2), + "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set GR64:$dst, (mul GR64:$src1, i64immSExt32:$src2)), + (implicit EFLAGS)]>; + +// Memory-Integer Signed Integer Multiplication +def IMUL64rmi8 : RIi8<0x6B, MRMSrcMem, // GR64 = [mem64]*I8 + (outs GR64:$dst), (ins i64mem:$src1, i64i8imm: $src2), + "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set GR64:$dst, (mul (load addr:$src1), + i64immSExt8:$src2)), + (implicit EFLAGS)]>; +def IMUL64rmi32 : RIi32<0x69, MRMSrcMem, // GR64 = [mem64]*I32 + (outs GR64:$dst), (ins i64mem:$src1, i64i32imm:$src2), + "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set GR64:$dst, (mul (load addr:$src1), + i64immSExt32:$src2)), + (implicit EFLAGS)]>; +} // Defs = [EFLAGS] + +// Unsigned division / remainder +let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in { +def DIV64r : RI<0xF7, MRM6r, (outs), (ins GR64:$src), // RDX:RAX/r64 = RAX,RDX + "div{q}\t$src", []>; +// Signed division / remainder +def IDIV64r: RI<0xF7, MRM7r, (outs), (ins GR64:$src), // RDX:RAX/r64 = RAX,RDX + "idiv{q}\t$src", []>; +let mayLoad = 1 in { +def DIV64m : RI<0xF7, MRM6m, (outs), (ins i64mem:$src), // RDX:RAX/[mem64] = RAX,RDX + "div{q}\t$src", []>; +def IDIV64m: RI<0xF7, MRM7m, (outs), (ins i64mem:$src), // RDX:RAX/[mem64] = RAX,RDX + "idiv{q}\t$src", []>; +} +} + +// Unary instructions +let Defs = [EFLAGS], CodeSize = 2 in { +let isTwoAddress = 1 in +def NEG64r : RI<0xF7, MRM3r, (outs GR64:$dst), (ins GR64:$src), "neg{q}\t$dst", + [(set GR64:$dst, (ineg GR64:$src)), + (implicit EFLAGS)]>; +def NEG64m : RI<0xF7, MRM3m, (outs), (ins i64mem:$dst), "neg{q}\t$dst", + [(store (ineg (loadi64 addr:$dst)), addr:$dst), + (implicit EFLAGS)]>; + +let isTwoAddress = 1, isConvertibleToThreeAddress = 1 in +def INC64r : RI<0xFF, MRM0r, (outs GR64:$dst), (ins GR64:$src), "inc{q}\t$dst", + [(set GR64:$dst, (add GR64:$src, 1)), + (implicit EFLAGS)]>; +def INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst), "inc{q}\t$dst", + [(store (add (loadi64 addr:$dst), 1), addr:$dst), + (implicit EFLAGS)]>; + +let isTwoAddress = 1, isConvertibleToThreeAddress = 1 in +def DEC64r : RI<0xFF, MRM1r, (outs GR64:$dst), (ins GR64:$src), "dec{q}\t$dst", + [(set GR64:$dst, (add GR64:$src, -1)), + (implicit EFLAGS)]>; +def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst", + [(store (add (loadi64 addr:$dst), -1), addr:$dst), + (implicit EFLAGS)]>; + +// In 64-bit mode, single byte INC and DEC cannot be encoded. +let isTwoAddress = 1, isConvertibleToThreeAddress = 1 in { +// Can transform into LEA. +def INC64_16r : I<0xFF, MRM0r, (outs GR16:$dst), (ins GR16:$src), "inc{w}\t$dst", + [(set GR16:$dst, (add GR16:$src, 1)), + (implicit EFLAGS)]>, + OpSize, Requires<[In64BitMode]>; +def INC64_32r : I<0xFF, MRM0r, (outs GR32:$dst), (ins GR32:$src), "inc{l}\t$dst", + [(set GR32:$dst, (add GR32:$src, 1)), + (implicit EFLAGS)]>, + Requires<[In64BitMode]>; +def DEC64_16r : I<0xFF, MRM1r, (outs GR16:$dst), (ins GR16:$src), "dec{w}\t$dst", + [(set GR16:$dst, (add GR16:$src, -1)), + (implicit EFLAGS)]>, + OpSize, Requires<[In64BitMode]>; +def DEC64_32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src), "dec{l}\t$dst", + [(set GR32:$dst, (add GR32:$src, -1)), + (implicit EFLAGS)]>, + Requires<[In64BitMode]>; +} // isConvertibleToThreeAddress + +// These are duplicates of their 32-bit counterparts. Only needed so X86 knows +// how to unfold them. +let isTwoAddress = 0, CodeSize = 2 in { + def INC64_16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), "inc{w}\t$dst", + [(store (add (loadi16 addr:$dst), 1), addr:$dst), + (implicit EFLAGS)]>, + OpSize, Requires<[In64BitMode]>; + def INC64_32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), "inc{l}\t$dst", + [(store (add (loadi32 addr:$dst), 1), addr:$dst), + (implicit EFLAGS)]>, + Requires<[In64BitMode]>; + def DEC64_16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), "dec{w}\t$dst", + [(store (add (loadi16 addr:$dst), -1), addr:$dst), + (implicit EFLAGS)]>, + OpSize, Requires<[In64BitMode]>; + def DEC64_32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst", + [(store (add (loadi32 addr:$dst), -1), addr:$dst), + (implicit EFLAGS)]>, + Requires<[In64BitMode]>; +} +} // Defs = [EFLAGS], CodeSize + + +let Defs = [EFLAGS] in { +// Shift instructions +let isTwoAddress = 1 in { +let Uses = [CL] in +def SHL64rCL : RI<0xD3, MRM4r, (outs GR64:$dst), (ins GR64:$src), + "shl{q}\t{%cl, $dst|$dst, %CL}", + [(set GR64:$dst, (shl GR64:$src, CL))]>; +let isConvertibleToThreeAddress = 1 in // Can transform into LEA. +def SHL64ri : RIi8<0xC1, MRM4r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$src2), + "shl{q}\t{$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (shl GR64:$src1, (i8 imm:$src2)))]>; +// NOTE: We don't use shifts of a register by one, because 'add reg,reg' is +// cheaper. +} // isTwoAddress + +let Uses = [CL] in +def SHL64mCL : RI<0xD3, MRM4m, (outs), (ins i64mem:$dst), + "shl{q}\t{%cl, $dst|$dst, %CL}", + [(store (shl (loadi64 addr:$dst), CL), addr:$dst)]>; +def SHL64mi : RIi8<0xC1, MRM4m, (outs), (ins i64mem:$dst, i8imm:$src), + "shl{q}\t{$src, $dst|$dst, $src}", + [(store (shl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>; +def SHL64m1 : RI<0xD1, MRM4m, (outs), (ins i64mem:$dst), + "shl{q}\t$dst", + [(store (shl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>; + +let isTwoAddress = 1 in { +let Uses = [CL] in +def SHR64rCL : RI<0xD3, MRM5r, (outs GR64:$dst), (ins GR64:$src), + "shr{q}\t{%cl, $dst|$dst, %CL}", + [(set GR64:$dst, (srl GR64:$src, CL))]>; +def SHR64ri : RIi8<0xC1, MRM5r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$src2), + "shr{q}\t{$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (srl GR64:$src1, (i8 imm:$src2)))]>; +def SHR64r1 : RI<0xD1, MRM5r, (outs GR64:$dst), (ins GR64:$src1), + "shr{q}\t$dst", + [(set GR64:$dst, (srl GR64:$src1, (i8 1)))]>; +} // isTwoAddress + +let Uses = [CL] in +def SHR64mCL : RI<0xD3, MRM5m, (outs), (ins i64mem:$dst), + "shr{q}\t{%cl, $dst|$dst, %CL}", + [(store (srl (loadi64 addr:$dst), CL), addr:$dst)]>; +def SHR64mi : RIi8<0xC1, MRM5m, (outs), (ins i64mem:$dst, i8imm:$src), + "shr{q}\t{$src, $dst|$dst, $src}", + [(store (srl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>; +def SHR64m1 : RI<0xD1, MRM5m, (outs), (ins i64mem:$dst), + "shr{q}\t$dst", + [(store (srl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>; + +let isTwoAddress = 1 in { +let Uses = [CL] in +def SAR64rCL : RI<0xD3, MRM7r, (outs GR64:$dst), (ins GR64:$src), + "sar{q}\t{%cl, $dst|$dst, %CL}", + [(set GR64:$dst, (sra GR64:$src, CL))]>; +def SAR64ri : RIi8<0xC1, MRM7r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$src2), + "sar{q}\t{$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (sra GR64:$src1, (i8 imm:$src2)))]>; +def SAR64r1 : RI<0xD1, MRM7r, (outs GR64:$dst), (ins GR64:$src1), + "sar{q}\t$dst", + [(set GR64:$dst, (sra GR64:$src1, (i8 1)))]>; +} // isTwoAddress + +let Uses = [CL] in +def SAR64mCL : RI<0xD3, MRM7m, (outs), (ins i64mem:$dst), + "sar{q}\t{%cl, $dst|$dst, %CL}", + [(store (sra (loadi64 addr:$dst), CL), addr:$dst)]>; +def SAR64mi : RIi8<0xC1, MRM7m, (outs), (ins i64mem:$dst, i8imm:$src), + "sar{q}\t{$src, $dst|$dst, $src}", + [(store (sra (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>; +def SAR64m1 : RI<0xD1, MRM7m, (outs), (ins i64mem:$dst), + "sar{q}\t$dst", + [(store (sra (loadi64 addr:$dst), (i8 1)), addr:$dst)]>; + +// Rotate instructions +let isTwoAddress = 1 in { +let Uses = [CL] in +def ROL64rCL : RI<0xD3, MRM0r, (outs GR64:$dst), (ins GR64:$src), + "rol{q}\t{%cl, $dst|$dst, %CL}", + [(set GR64:$dst, (rotl GR64:$src, CL))]>; +def ROL64ri : RIi8<0xC1, MRM0r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$src2), + "rol{q}\t{$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (rotl GR64:$src1, (i8 imm:$src2)))]>; +def ROL64r1 : RI<0xD1, MRM0r, (outs GR64:$dst), (ins GR64:$src1), + "rol{q}\t$dst", + [(set GR64:$dst, (rotl GR64:$src1, (i8 1)))]>; +} // isTwoAddress + +let Uses = [CL] in +def ROL64mCL : I<0xD3, MRM0m, (outs), (ins i64mem:$dst), + "rol{q}\t{%cl, $dst|$dst, %CL}", + [(store (rotl (loadi64 addr:$dst), CL), addr:$dst)]>; +def ROL64mi : RIi8<0xC1, MRM0m, (outs), (ins i64mem:$dst, i8imm:$src), + "rol{q}\t{$src, $dst|$dst, $src}", + [(store (rotl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>; +def ROL64m1 : RI<0xD1, MRM0m, (outs), (ins i64mem:$dst), + "rol{q}\t$dst", + [(store (rotl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>; + +let isTwoAddress = 1 in { +let Uses = [CL] in +def ROR64rCL : RI<0xD3, MRM1r, (outs GR64:$dst), (ins GR64:$src), + "ror{q}\t{%cl, $dst|$dst, %CL}", + [(set GR64:$dst, (rotr GR64:$src, CL))]>; +def ROR64ri : RIi8<0xC1, MRM1r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$src2), + "ror{q}\t{$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (rotr GR64:$src1, (i8 imm:$src2)))]>; +def ROR64r1 : RI<0xD1, MRM1r, (outs GR64:$dst), (ins GR64:$src1), + "ror{q}\t$dst", + [(set GR64:$dst, (rotr GR64:$src1, (i8 1)))]>; +} // isTwoAddress + +let Uses = [CL] in +def ROR64mCL : RI<0xD3, MRM1m, (outs), (ins i64mem:$dst), + "ror{q}\t{%cl, $dst|$dst, %CL}", + [(store (rotr (loadi64 addr:$dst), CL), addr:$dst)]>; +def ROR64mi : RIi8<0xC1, MRM1m, (outs), (ins i64mem:$dst, i8imm:$src), + "ror{q}\t{$src, $dst|$dst, $src}", + [(store (rotr (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>; +def ROR64m1 : RI<0xD1, MRM1m, (outs), (ins i64mem:$dst), + "ror{q}\t$dst", + [(store (rotr (loadi64 addr:$dst), (i8 1)), addr:$dst)]>; + +// Double shift instructions (generalizations of rotate) +let isTwoAddress = 1 in { +let Uses = [CL] in { +def SHLD64rrCL : RI<0xA5, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), + "shld{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}", + [(set GR64:$dst, (X86shld GR64:$src1, GR64:$src2, CL))]>, TB; +def SHRD64rrCL : RI<0xAD, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), + "shrd{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}", + [(set GR64:$dst, (X86shrd GR64:$src1, GR64:$src2, CL))]>, TB; +} + +let isCommutable = 1 in { // FIXME: Update X86InstrInfo::commuteInstruction +def SHLD64rri8 : RIi8<0xA4, MRMDestReg, + (outs GR64:$dst), (ins GR64:$src1, GR64:$src2, i8imm:$src3), + "shld{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}", + [(set GR64:$dst, (X86shld GR64:$src1, GR64:$src2, + (i8 imm:$src3)))]>, + TB; +def SHRD64rri8 : RIi8<0xAC, MRMDestReg, + (outs GR64:$dst), (ins GR64:$src1, GR64:$src2, i8imm:$src3), + "shrd{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}", + [(set GR64:$dst, (X86shrd GR64:$src1, GR64:$src2, + (i8 imm:$src3)))]>, + TB; +} // isCommutable +} // isTwoAddress + +let Uses = [CL] in { +def SHLD64mrCL : RI<0xA5, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), + "shld{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}", + [(store (X86shld (loadi64 addr:$dst), GR64:$src2, CL), + addr:$dst)]>, TB; +def SHRD64mrCL : RI<0xAD, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), + "shrd{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}", + [(store (X86shrd (loadi64 addr:$dst), GR64:$src2, CL), + addr:$dst)]>, TB; +} +def SHLD64mri8 : RIi8<0xA4, MRMDestMem, + (outs), (ins i64mem:$dst, GR64:$src2, i8imm:$src3), + "shld{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}", + [(store (X86shld (loadi64 addr:$dst), GR64:$src2, + (i8 imm:$src3)), addr:$dst)]>, + TB; +def SHRD64mri8 : RIi8<0xAC, MRMDestMem, + (outs), (ins i64mem:$dst, GR64:$src2, i8imm:$src3), + "shrd{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}", + [(store (X86shrd (loadi64 addr:$dst), GR64:$src2, + (i8 imm:$src3)), addr:$dst)]>, + TB; +} // Defs = [EFLAGS] + +//===----------------------------------------------------------------------===// +// Logical Instructions... +// + +let isTwoAddress = 1 , AddedComplexity = 15 in +def NOT64r : RI<0xF7, MRM2r, (outs GR64:$dst), (ins GR64:$src), "not{q}\t$dst", + [(set GR64:$dst, (not GR64:$src))]>; +def NOT64m : RI<0xF7, MRM2m, (outs), (ins i64mem:$dst), "not{q}\t$dst", + [(store (not (loadi64 addr:$dst)), addr:$dst)]>; + +let Defs = [EFLAGS] in { +let isTwoAddress = 1 in { +let isCommutable = 1 in +def AND64rr : RI<0x21, MRMDestReg, + (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), + "and{q}\t{$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (and GR64:$src1, GR64:$src2)), + (implicit EFLAGS)]>; +def AND64rm : RI<0x23, MRMSrcMem, + (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), + "and{q}\t{$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (and GR64:$src1, (load addr:$src2))), + (implicit EFLAGS)]>; +def AND64ri8 : RIi8<0x83, MRM4r, + (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2), + "and{q}\t{$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (and GR64:$src1, i64immSExt8:$src2)), + (implicit EFLAGS)]>; +def AND64ri32 : RIi32<0x81, MRM4r, + (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2), + "and{q}\t{$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (and GR64:$src1, i64immSExt32:$src2)), + (implicit EFLAGS)]>; +} // isTwoAddress + +def AND64mr : RI<0x21, MRMDestMem, + (outs), (ins i64mem:$dst, GR64:$src), + "and{q}\t{$src, $dst|$dst, $src}", + [(store (and (load addr:$dst), GR64:$src), addr:$dst), + (implicit EFLAGS)]>; +def AND64mi8 : RIi8<0x83, MRM4m, + (outs), (ins i64mem:$dst, i64i8imm :$src), + "and{q}\t{$src, $dst|$dst, $src}", + [(store (and (load addr:$dst), i64immSExt8:$src), addr:$dst), + (implicit EFLAGS)]>; +def AND64mi32 : RIi32<0x81, MRM4m, + (outs), (ins i64mem:$dst, i64i32imm:$src), + "and{q}\t{$src, $dst|$dst, $src}", + [(store (and (loadi64 addr:$dst), i64immSExt32:$src), addr:$dst), + (implicit EFLAGS)]>; + +let isTwoAddress = 1 in { +let isCommutable = 1 in +def OR64rr : RI<0x09, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), + "or{q}\t{$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (or GR64:$src1, GR64:$src2)), + (implicit EFLAGS)]>; +def OR64rm : RI<0x0B, MRMSrcMem , (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), + "or{q}\t{$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (or GR64:$src1, (load addr:$src2))), + (implicit EFLAGS)]>; +def OR64ri8 : RIi8<0x83, MRM1r, (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2), + "or{q}\t{$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (or GR64:$src1, i64immSExt8:$src2)), + (implicit EFLAGS)]>; +def OR64ri32 : RIi32<0x81, MRM1r, (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2), + "or{q}\t{$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (or GR64:$src1, i64immSExt32:$src2)), + (implicit EFLAGS)]>; +} // isTwoAddress + +def OR64mr : RI<0x09, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), + "or{q}\t{$src, $dst|$dst, $src}", + [(store (or (load addr:$dst), GR64:$src), addr:$dst), + (implicit EFLAGS)]>; +def OR64mi8 : RIi8<0x83, MRM1m, (outs), (ins i64mem:$dst, i64i8imm:$src), + "or{q}\t{$src, $dst|$dst, $src}", + [(store (or (load addr:$dst), i64immSExt8:$src), addr:$dst), + (implicit EFLAGS)]>; +def OR64mi32 : RIi32<0x81, MRM1m, (outs), (ins i64mem:$dst, i64i32imm:$src), + "or{q}\t{$src, $dst|$dst, $src}", + [(store (or (loadi64 addr:$dst), i64immSExt32:$src), addr:$dst), + (implicit EFLAGS)]>; + +let isTwoAddress = 1 in { +let isCommutable = 1 in +def XOR64rr : RI<0x31, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), + "xor{q}\t{$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (xor GR64:$src1, GR64:$src2)), + (implicit EFLAGS)]>; +def XOR64rm : RI<0x33, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), + "xor{q}\t{$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (xor GR64:$src1, (load addr:$src2))), + (implicit EFLAGS)]>; +def XOR64ri8 : RIi8<0x83, MRM6r, (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2), + "xor{q}\t{$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (xor GR64:$src1, i64immSExt8:$src2)), + (implicit EFLAGS)]>; +def XOR64ri32 : RIi32<0x81, MRM6r, + (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2), + "xor{q}\t{$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (xor GR64:$src1, i64immSExt32:$src2)), + (implicit EFLAGS)]>; +} // isTwoAddress + +def XOR64mr : RI<0x31, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), + "xor{q}\t{$src, $dst|$dst, $src}", + [(store (xor (load addr:$dst), GR64:$src), addr:$dst), + (implicit EFLAGS)]>; +def XOR64mi8 : RIi8<0x83, MRM6m, (outs), (ins i64mem:$dst, i64i8imm :$src), + "xor{q}\t{$src, $dst|$dst, $src}", + [(store (xor (load addr:$dst), i64immSExt8:$src), addr:$dst), + (implicit EFLAGS)]>; +def XOR64mi32 : RIi32<0x81, MRM6m, (outs), (ins i64mem:$dst, i64i32imm:$src), + "xor{q}\t{$src, $dst|$dst, $src}", + [(store (xor (loadi64 addr:$dst), i64immSExt32:$src), addr:$dst), + (implicit EFLAGS)]>; +} // Defs = [EFLAGS] + +//===----------------------------------------------------------------------===// +// Comparison Instructions... +// + +// Integer comparison +let Defs = [EFLAGS] in { +let isCommutable = 1 in +def TEST64rr : RI<0x85, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2), + "test{q}\t{$src2, $src1|$src1, $src2}", + [(X86cmp (and GR64:$src1, GR64:$src2), 0), + (implicit EFLAGS)]>; +def TEST64rm : RI<0x85, MRMSrcMem, (outs), (ins GR64:$src1, i64mem:$src2), + "test{q}\t{$src2, $src1|$src1, $src2}", + [(X86cmp (and GR64:$src1, (loadi64 addr:$src2)), 0), + (implicit EFLAGS)]>; +def TEST64ri32 : RIi32<0xF7, MRM0r, (outs), + (ins GR64:$src1, i64i32imm:$src2), + "test{q}\t{$src2, $src1|$src1, $src2}", + [(X86cmp (and GR64:$src1, i64immSExt32:$src2), 0), + (implicit EFLAGS)]>; +def TEST64mi32 : RIi32<0xF7, MRM0m, (outs), + (ins i64mem:$src1, i64i32imm:$src2), + "test{q}\t{$src2, $src1|$src1, $src2}", + [(X86cmp (and (loadi64 addr:$src1), i64immSExt32:$src2), 0), + (implicit EFLAGS)]>; + +def CMP64rr : RI<0x39, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2), + "cmp{q}\t{$src2, $src1|$src1, $src2}", + [(X86cmp GR64:$src1, GR64:$src2), + (implicit EFLAGS)]>; +def CMP64mr : RI<0x39, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), + "cmp{q}\t{$src2, $src1|$src1, $src2}", + [(X86cmp (loadi64 addr:$src1), GR64:$src2), + (implicit EFLAGS)]>; +def CMP64rm : RI<0x3B, MRMSrcMem, (outs), (ins GR64:$src1, i64mem:$src2), + "cmp{q}\t{$src2, $src1|$src1, $src2}", + [(X86cmp GR64:$src1, (loadi64 addr:$src2)), + (implicit EFLAGS)]>; +def CMP64ri8 : RIi8<0x83, MRM7r, (outs), (ins GR64:$src1, i64i8imm:$src2), + "cmp{q}\t{$src2, $src1|$src1, $src2}", + [(X86cmp GR64:$src1, i64immSExt8:$src2), + (implicit EFLAGS)]>; +def CMP64ri32 : RIi32<0x81, MRM7r, (outs), (ins GR64:$src1, i64i32imm:$src2), + "cmp{q}\t{$src2, $src1|$src1, $src2}", + [(X86cmp GR64:$src1, i64immSExt32:$src2), + (implicit EFLAGS)]>; +def CMP64mi8 : RIi8<0x83, MRM7m, (outs), (ins i64mem:$src1, i64i8imm:$src2), + "cmp{q}\t{$src2, $src1|$src1, $src2}", + [(X86cmp (loadi64 addr:$src1), i64immSExt8:$src2), + (implicit EFLAGS)]>; +def CMP64mi32 : RIi32<0x81, MRM7m, (outs), + (ins i64mem:$src1, i64i32imm:$src2), + "cmp{q}\t{$src2, $src1|$src1, $src2}", + [(X86cmp (loadi64 addr:$src1), i64immSExt32:$src2), + (implicit EFLAGS)]>; +} // Defs = [EFLAGS] + +// Bit tests. +// TODO: BTC, BTR, and BTS +let Defs = [EFLAGS] in { +def BT64rr : RI<0xA3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2), + "bt{q}\t{$src2, $src1|$src1, $src2}", + [(X86bt GR64:$src1, GR64:$src2), + (implicit EFLAGS)]>, TB; + +// Unlike with the register+register form, the memory+register form of the +// bt instruction does not ignore the high bits of the index. From ISel's +// perspective, this is pretty bizarre. Disable these instructions for now. +//def BT64mr : RI<0xA3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), +// "bt{q}\t{$src2, $src1|$src1, $src2}", +// [(X86bt (loadi64 addr:$src1), GR64:$src2), +// (implicit EFLAGS)]>, TB; + +def BT64ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR64:$src1, i64i8imm:$src2), + "bt{q}\t{$src2, $src1|$src1, $src2}", + [(X86bt GR64:$src1, i64immSExt8:$src2), + (implicit EFLAGS)]>, TB; +// Note that these instructions don't need FastBTMem because that +// only applies when the other operand is in a register. When it's +// an immediate, bt is still fast. +def BT64mi8 : Ii8<0xBA, MRM4m, (outs), (ins i64mem:$src1, i64i8imm:$src2), + "bt{q}\t{$src2, $src1|$src1, $src2}", + [(X86bt (loadi64 addr:$src1), i64immSExt8:$src2), + (implicit EFLAGS)]>, TB; +} // Defs = [EFLAGS] + +// Conditional moves +let Uses = [EFLAGS], isTwoAddress = 1 in { +let isCommutable = 1 in { +def CMOVB64rr : RI<0x42, MRMSrcReg, // if , TB; +def CMOVAE64rr: RI<0x43, MRMSrcReg, // if >=u, GR64 = GR64 + (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), + "cmovae\t{$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2, + X86_COND_AE, EFLAGS))]>, TB; +def CMOVE64rr : RI<0x44, MRMSrcReg, // if ==, GR64 = GR64 + (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), + "cmove\t{$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2, + X86_COND_E, EFLAGS))]>, TB; +def CMOVNE64rr: RI<0x45, MRMSrcReg, // if !=, GR64 = GR64 + (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), + "cmovne\t{$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2, + X86_COND_NE, EFLAGS))]>, TB; +def CMOVBE64rr: RI<0x46, MRMSrcReg, // if <=u, GR64 = GR64 + (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), + "cmovbe\t{$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2, + X86_COND_BE, EFLAGS))]>, TB; +def CMOVA64rr : RI<0x47, MRMSrcReg, // if >u, GR64 = GR64 + (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), + "cmova\t{$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2, + X86_COND_A, EFLAGS))]>, TB; +def CMOVL64rr : RI<0x4C, MRMSrcReg, // if , TB; +def CMOVGE64rr: RI<0x4D, MRMSrcReg, // if >=s, GR64 = GR64 + (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), + "cmovge\t{$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2, + X86_COND_GE, EFLAGS))]>, TB; +def CMOVLE64rr: RI<0x4E, MRMSrcReg, // if <=s, GR64 = GR64 + (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), + "cmovle\t{$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2, + X86_COND_LE, EFLAGS))]>, TB; +def CMOVG64rr : RI<0x4F, MRMSrcReg, // if >s, GR64 = GR64 + (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), + "cmovg\t{$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2, + X86_COND_G, EFLAGS))]>, TB; +def CMOVS64rr : RI<0x48, MRMSrcReg, // if signed, GR64 = GR64 + (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), + "cmovs\t{$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2, + X86_COND_S, EFLAGS))]>, TB; +def CMOVNS64rr: RI<0x49, MRMSrcReg, // if !signed, GR64 = GR64 + (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), + "cmovns\t{$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2, + X86_COND_NS, EFLAGS))]>, TB; +def CMOVP64rr : RI<0x4A, MRMSrcReg, // if parity, GR64 = GR64 + (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), + "cmovp\t{$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2, + X86_COND_P, EFLAGS))]>, TB; +def CMOVNP64rr : RI<0x4B, MRMSrcReg, // if !parity, GR64 = GR64 + (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), + "cmovnp\t{$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2, + X86_COND_NP, EFLAGS))]>, TB; +def CMOVO64rr : RI<0x40, MRMSrcReg, // if overflow, GR64 = GR64 + (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), + "cmovo\t{$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2, + X86_COND_O, EFLAGS))]>, TB; +def CMOVNO64rr : RI<0x41, MRMSrcReg, // if !overflow, GR64 = GR64 + (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), + "cmovno\t{$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2, + X86_COND_NO, EFLAGS))]>, TB; +} // isCommutable = 1 + +def CMOVB64rm : RI<0x42, MRMSrcMem, // if , TB; +def CMOVAE64rm: RI<0x43, MRMSrcMem, // if >=u, GR64 = [mem64] + (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), + "cmovae\t{$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), + X86_COND_AE, EFLAGS))]>, TB; +def CMOVE64rm : RI<0x44, MRMSrcMem, // if ==, GR64 = [mem64] + (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), + "cmove\t{$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), + X86_COND_E, EFLAGS))]>, TB; +def CMOVNE64rm: RI<0x45, MRMSrcMem, // if !=, GR64 = [mem64] + (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), + "cmovne\t{$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), + X86_COND_NE, EFLAGS))]>, TB; +def CMOVBE64rm: RI<0x46, MRMSrcMem, // if <=u, GR64 = [mem64] + (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), + "cmovbe\t{$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), + X86_COND_BE, EFLAGS))]>, TB; +def CMOVA64rm : RI<0x47, MRMSrcMem, // if >u, GR64 = [mem64] + (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), + "cmova\t{$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), + X86_COND_A, EFLAGS))]>, TB; +def CMOVL64rm : RI<0x4C, MRMSrcMem, // if , TB; +def CMOVGE64rm: RI<0x4D, MRMSrcMem, // if >=s, GR64 = [mem64] + (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), + "cmovge\t{$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), + X86_COND_GE, EFLAGS))]>, TB; +def CMOVLE64rm: RI<0x4E, MRMSrcMem, // if <=s, GR64 = [mem64] + (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), + "cmovle\t{$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), + X86_COND_LE, EFLAGS))]>, TB; +def CMOVG64rm : RI<0x4F, MRMSrcMem, // if >s, GR64 = [mem64] + (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), + "cmovg\t{$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), + X86_COND_G, EFLAGS))]>, TB; +def CMOVS64rm : RI<0x48, MRMSrcMem, // if signed, GR64 = [mem64] + (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), + "cmovs\t{$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), + X86_COND_S, EFLAGS))]>, TB; +def CMOVNS64rm: RI<0x49, MRMSrcMem, // if !signed, GR64 = [mem64] + (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), + "cmovns\t{$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), + X86_COND_NS, EFLAGS))]>, TB; +def CMOVP64rm : RI<0x4A, MRMSrcMem, // if parity, GR64 = [mem64] + (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), + "cmovp\t{$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), + X86_COND_P, EFLAGS))]>, TB; +def CMOVNP64rm : RI<0x4B, MRMSrcMem, // if !parity, GR64 = [mem64] + (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), + "cmovnp\t{$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), + X86_COND_NP, EFLAGS))]>, TB; +def CMOVO64rm : RI<0x40, MRMSrcMem, // if overflow, GR64 = [mem64] + (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), + "cmovo\t{$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), + X86_COND_O, EFLAGS))]>, TB; +def CMOVNO64rm : RI<0x41, MRMSrcMem, // if !overflow, GR64 = [mem64] + (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), + "cmovno\t{$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), + X86_COND_NO, EFLAGS))]>, TB; +} // isTwoAddress + +//===----------------------------------------------------------------------===// +// Conversion Instructions... +// + +// f64 -> signed i64 +def Int_CVTSD2SI64rr: RSDI<0x2D, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src), + "cvtsd2si{q}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, + (int_x86_sse2_cvtsd2si64 VR128:$src))]>; +def Int_CVTSD2SI64rm: RSDI<0x2D, MRMSrcMem, (outs GR64:$dst), (ins f128mem:$src), + "cvtsd2si{q}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, (int_x86_sse2_cvtsd2si64 + (load addr:$src)))]>; +def CVTTSD2SI64rr: RSDI<0x2C, MRMSrcReg, (outs GR64:$dst), (ins FR64:$src), + "cvttsd2si{q}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, (fp_to_sint FR64:$src))]>; +def CVTTSD2SI64rm: RSDI<0x2C, MRMSrcMem, (outs GR64:$dst), (ins f64mem:$src), + "cvttsd2si{q}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, (fp_to_sint (loadf64 addr:$src)))]>; +def Int_CVTTSD2SI64rr: RSDI<0x2C, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src), + "cvttsd2si{q}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, + (int_x86_sse2_cvttsd2si64 VR128:$src))]>; +def Int_CVTTSD2SI64rm: RSDI<0x2C, MRMSrcMem, (outs GR64:$dst), (ins f128mem:$src), + "cvttsd2si{q}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, + (int_x86_sse2_cvttsd2si64 + (load addr:$src)))]>; + +// Signed i64 -> f64 +def CVTSI2SD64rr: RSDI<0x2A, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), + "cvtsi2sd{q}\t{$src, $dst|$dst, $src}", + [(set FR64:$dst, (sint_to_fp GR64:$src))]>; +def CVTSI2SD64rm: RSDI<0x2A, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src), + "cvtsi2sd{q}\t{$src, $dst|$dst, $src}", + [(set FR64:$dst, (sint_to_fp (loadi64 addr:$src)))]>; + +let isTwoAddress = 1 in { +def Int_CVTSI2SD64rr: RSDI<0x2A, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, GR64:$src2), + "cvtsi2sd{q}\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, + (int_x86_sse2_cvtsi642sd VR128:$src1, + GR64:$src2))]>; +def Int_CVTSI2SD64rm: RSDI<0x2A, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, i64mem:$src2), + "cvtsi2sd{q}\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, + (int_x86_sse2_cvtsi642sd VR128:$src1, + (loadi64 addr:$src2)))]>; +} // isTwoAddress + +// Signed i64 -> f32 +def CVTSI2SS64rr: RSSI<0x2A, MRMSrcReg, (outs FR32:$dst), (ins GR64:$src), + "cvtsi2ss{q}\t{$src, $dst|$dst, $src}", + [(set FR32:$dst, (sint_to_fp GR64:$src))]>; +def CVTSI2SS64rm: RSSI<0x2A, MRMSrcMem, (outs FR32:$dst), (ins i64mem:$src), + "cvtsi2ss{q}\t{$src, $dst|$dst, $src}", + [(set FR32:$dst, (sint_to_fp (loadi64 addr:$src)))]>; + +let isTwoAddress = 1 in { + def Int_CVTSI2SS64rr : RSSI<0x2A, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, GR64:$src2), + "cvtsi2ss{q}\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, + (int_x86_sse_cvtsi642ss VR128:$src1, + GR64:$src2))]>; + def Int_CVTSI2SS64rm : RSSI<0x2A, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, i64mem:$src2), + "cvtsi2ss{q}\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, + (int_x86_sse_cvtsi642ss VR128:$src1, + (loadi64 addr:$src2)))]>; +} + +// f32 -> signed i64 +def Int_CVTSS2SI64rr: RSSI<0x2D, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src), + "cvtss2si{q}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, + (int_x86_sse_cvtss2si64 VR128:$src))]>; +def Int_CVTSS2SI64rm: RSSI<0x2D, MRMSrcMem, (outs GR64:$dst), (ins f32mem:$src), + "cvtss2si{q}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, (int_x86_sse_cvtss2si64 + (load addr:$src)))]>; +def CVTTSS2SI64rr: RSSI<0x2C, MRMSrcReg, (outs GR64:$dst), (ins FR32:$src), + "cvttss2si{q}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, (fp_to_sint FR32:$src))]>; +def CVTTSS2SI64rm: RSSI<0x2C, MRMSrcMem, (outs GR64:$dst), (ins f32mem:$src), + "cvttss2si{q}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, (fp_to_sint (loadf32 addr:$src)))]>; +def Int_CVTTSS2SI64rr: RSSI<0x2C, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src), + "cvttss2si{q}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, + (int_x86_sse_cvttss2si64 VR128:$src))]>; +def Int_CVTTSS2SI64rm: RSSI<0x2C, MRMSrcMem, (outs GR64:$dst), (ins f32mem:$src), + "cvttss2si{q}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, + (int_x86_sse_cvttss2si64 (load addr:$src)))]>; + +//===----------------------------------------------------------------------===// +// Alias Instructions +//===----------------------------------------------------------------------===// + +// Alias instructions that map movr0 to xor. Use xorl instead of xorq; it's +// equivalent due to implicit zero-extending, and it sometimes has a smaller +// encoding. +// FIXME: remove when we can teach regalloc that xor reg, reg is ok. +// FIXME: AddedComplexity gives MOV64r0 a higher priority than MOV64ri32. Remove +// when we have a better way to specify isel priority. +let Defs = [EFLAGS], AddedComplexity = 1, + isReMaterializable = 1, isAsCheapAsAMove = 1 in +def MOV64r0 : I<0x31, MRMInitReg, (outs GR64:$dst), (ins), + "xor{l}\t${dst:subreg32}, ${dst:subreg32}", + [(set GR64:$dst, 0)]>; + +// Materialize i64 constant where top 32-bits are zero. +let AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in +def MOV64ri64i32 : Ii32<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64i32imm:$src), + "mov{l}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}", + [(set GR64:$dst, i64immZExt32:$src)]>; + +//===----------------------------------------------------------------------===// +// Thread Local Storage Instructions +//===----------------------------------------------------------------------===// + +// All calls clobber the non-callee saved registers. RSP is marked as +// a use to prevent stack-pointer assignments that appear immediately +// before calls from potentially appearing dead. +let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11, + FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1, + MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, + XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, + XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS], + Uses = [RSP] in +def TLS_addr64 : I<0, Pseudo, (outs), (ins i64imm:$sym), + ".byte\t0x66; " + "leaq\t${sym:mem}(%rip), %rdi; " + ".word\t0x6666; " + "rex64; " + "call\t__tls_get_addr@PLT", + [(X86tlsaddr tglobaltlsaddr:$sym)]>, + Requires<[In64BitMode]>; + +let AddedComplexity = 5 in +def MOV64GSrm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), + "movq\t%gs:$src, $dst", + [(set GR64:$dst, (gsload addr:$src))]>, SegGS; + +let AddedComplexity = 5 in +def MOV64FSrm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), + "movq\t%fs:$src, $dst", + [(set GR64:$dst, (fsload addr:$src))]>, SegFS; + +//===----------------------------------------------------------------------===// +// Atomic Instructions +//===----------------------------------------------------------------------===// + +let Defs = [RAX, EFLAGS], Uses = [RAX] in { +def LCMPXCHG64 : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$ptr, GR64:$swap), + "lock\n\t" + "cmpxchgq\t$swap,$ptr", + [(X86cas addr:$ptr, GR64:$swap, 8)]>, TB, LOCK; +} + +let Constraints = "$val = $dst" in { +let Defs = [EFLAGS] in +def LXADD64 : RI<0xC1, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$ptr,GR64:$val), + "lock\n\t" + "xadd\t$val, $ptr", + [(set GR64:$dst, (atomic_load_add_64 addr:$ptr, GR64:$val))]>, + TB, LOCK; +def XCHG64rm : RI<0x87, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$ptr,GR64:$val), + "xchg\t$val, $ptr", + [(set GR64:$dst, (atomic_swap_64 addr:$ptr, GR64:$val))]>; +} + +// Atomic exchange, and, or, xor +let Constraints = "$val = $dst", Defs = [EFLAGS], + usesCustomDAGSchedInserter = 1 in { +def ATOMAND64 : I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val), + "#ATOMAND64 PSEUDO!", + [(set GR64:$dst, (atomic_load_and_64 addr:$ptr, GR64:$val))]>; +def ATOMOR64 : I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val), + "#ATOMOR64 PSEUDO!", + [(set GR64:$dst, (atomic_load_or_64 addr:$ptr, GR64:$val))]>; +def ATOMXOR64 : I<0, Pseudo,(outs GR64:$dst),(ins i64mem:$ptr, GR64:$val), + "#ATOMXOR64 PSEUDO!", + [(set GR64:$dst, (atomic_load_xor_64 addr:$ptr, GR64:$val))]>; +def ATOMNAND64 : I<0, Pseudo,(outs GR64:$dst),(ins i64mem:$ptr, GR64:$val), + "#ATOMNAND64 PSEUDO!", + [(set GR64:$dst, (atomic_load_nand_64 addr:$ptr, GR64:$val))]>; +def ATOMMIN64: I<0, Pseudo, (outs GR64:$dst), (ins i64mem:$ptr, GR64:$val), + "#ATOMMIN64 PSEUDO!", + [(set GR64:$dst, (atomic_load_min_64 addr:$ptr, GR64:$val))]>; +def ATOMMAX64: I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val), + "#ATOMMAX64 PSEUDO!", + [(set GR64:$dst, (atomic_load_max_64 addr:$ptr, GR64:$val))]>; +def ATOMUMIN64: I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val), + "#ATOMUMIN64 PSEUDO!", + [(set GR64:$dst, (atomic_load_umin_64 addr:$ptr, GR64:$val))]>; +def ATOMUMAX64: I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val), + "#ATOMUMAX64 PSEUDO!", + [(set GR64:$dst, (atomic_load_umax_64 addr:$ptr, GR64:$val))]>; +} + +//===----------------------------------------------------------------------===// +// Non-Instruction Patterns +//===----------------------------------------------------------------------===// + +// ConstantPool GlobalAddress, ExternalSymbol, and JumpTable +def : Pat<(i64 (X86Wrapper tconstpool :$dst)), + (MOV64ri tconstpool :$dst)>, Requires<[NotSmallCode]>; +def : Pat<(i64 (X86Wrapper tjumptable :$dst)), + (MOV64ri tjumptable :$dst)>, Requires<[NotSmallCode]>; +def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)), + (MOV64ri tglobaladdr :$dst)>, Requires<[NotSmallCode]>; +def : Pat<(i64 (X86Wrapper texternalsym:$dst)), + (MOV64ri texternalsym:$dst)>, Requires<[NotSmallCode]>; + +def : Pat<(store (i64 (X86Wrapper tconstpool:$src)), addr:$dst), + (MOV64mi32 addr:$dst, tconstpool:$src)>, + Requires<[SmallCode, IsStatic]>; +def : Pat<(store (i64 (X86Wrapper tjumptable:$src)), addr:$dst), + (MOV64mi32 addr:$dst, tjumptable:$src)>, + Requires<[SmallCode, IsStatic]>; +def : Pat<(store (i64 (X86Wrapper tglobaladdr:$src)), addr:$dst), + (MOV64mi32 addr:$dst, tglobaladdr:$src)>, + Requires<[SmallCode, IsStatic]>; +def : Pat<(store (i64 (X86Wrapper texternalsym:$src)), addr:$dst), + (MOV64mi32 addr:$dst, texternalsym:$src)>, + Requires<[SmallCode, IsStatic]>; + +// Calls +// Direct PC relative function call for small code model. 32-bit displacement +// sign extended to 64-bit. +def : Pat<(X86call (i64 tglobaladdr:$dst)), + (CALL64pcrel32 tglobaladdr:$dst)>; +def : Pat<(X86call (i64 texternalsym:$dst)), + (CALL64pcrel32 texternalsym:$dst)>; + +def : Pat<(X86tailcall (i64 tglobaladdr:$dst)), + (CALL64pcrel32 tglobaladdr:$dst)>; +def : Pat<(X86tailcall (i64 texternalsym:$dst)), + (CALL64pcrel32 texternalsym:$dst)>; + +def : Pat<(X86tailcall GR64:$dst), + (CALL64r GR64:$dst)>; + + +// tailcall stuff +def : Pat<(X86tailcall GR32:$dst), + (TAILCALL)>; +def : Pat<(X86tailcall (i64 tglobaladdr:$dst)), + (TAILCALL)>; +def : Pat<(X86tailcall (i64 texternalsym:$dst)), + (TAILCALL)>; + +def : Pat<(X86tcret GR64:$dst, imm:$off), + (TCRETURNri64 GR64:$dst, imm:$off)>; + +def : Pat<(X86tcret (i64 tglobaladdr:$dst), imm:$off), + (TCRETURNdi64 texternalsym:$dst, imm:$off)>; + +def : Pat<(X86tcret (i64 texternalsym:$dst), imm:$off), + (TCRETURNdi64 texternalsym:$dst, imm:$off)>; + +// Comparisons. + +// TEST R,R is smaller than CMP R,0 +def : Pat<(parallel (X86cmp GR64:$src1, 0), (implicit EFLAGS)), + (TEST64rr GR64:$src1, GR64:$src1)>; + +// Conditional moves with folded loads with operands swapped and conditions +// inverted. +def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_B, EFLAGS), + (CMOVAE64rm GR64:$src2, addr:$src1)>; +def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_AE, EFLAGS), + (CMOVB64rm GR64:$src2, addr:$src1)>; +def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_E, EFLAGS), + (CMOVNE64rm GR64:$src2, addr:$src1)>; +def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_NE, EFLAGS), + (CMOVE64rm GR64:$src2, addr:$src1)>; +def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_BE, EFLAGS), + (CMOVA64rm GR64:$src2, addr:$src1)>; +def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_A, EFLAGS), + (CMOVBE64rm GR64:$src2, addr:$src1)>; +def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_L, EFLAGS), + (CMOVGE64rm GR64:$src2, addr:$src1)>; +def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_GE, EFLAGS), + (CMOVL64rm GR64:$src2, addr:$src1)>; +def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_LE, EFLAGS), + (CMOVG64rm GR64:$src2, addr:$src1)>; +def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_G, EFLAGS), + (CMOVLE64rm GR64:$src2, addr:$src1)>; +def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_P, EFLAGS), + (CMOVNP64rm GR64:$src2, addr:$src1)>; +def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_NP, EFLAGS), + (CMOVP64rm GR64:$src2, addr:$src1)>; +def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_S, EFLAGS), + (CMOVNS64rm GR64:$src2, addr:$src1)>; +def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_NS, EFLAGS), + (CMOVS64rm GR64:$src2, addr:$src1)>; +def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_O, EFLAGS), + (CMOVNO64rm GR64:$src2, addr:$src1)>; +def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_NO, EFLAGS), + (CMOVO64rm GR64:$src2, addr:$src1)>; + +// zextload bool -> zextload byte +def : Pat<(zextloadi64i1 addr:$src), (MOVZX64rm8 addr:$src)>; + +// extload +// When extloading from 16-bit and smaller memory locations into 64-bit registers, +// use zero-extending loads so that the entire 64-bit register is defined, avoiding +// partial-register updates. +def : Pat<(extloadi64i1 addr:$src), (MOVZX64rm8 addr:$src)>; +def : Pat<(extloadi64i8 addr:$src), (MOVZX64rm8 addr:$src)>; +def : Pat<(extloadi64i16 addr:$src), (MOVZX64rm16 addr:$src)>; +// For other extloads, use subregs, since the high contents of the register are +// defined after an extload. +def : Pat<(extloadi64i32 addr:$src), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (MOV32rm addr:$src), + x86_subreg_32bit)>; +def : Pat<(extloadi16i1 addr:$src), + (INSERT_SUBREG (i16 (IMPLICIT_DEF)), (MOV8rm addr:$src), + x86_subreg_8bit)>, + Requires<[In64BitMode]>; +def : Pat<(extloadi16i8 addr:$src), + (INSERT_SUBREG (i16 (IMPLICIT_DEF)), (MOV8rm addr:$src), + x86_subreg_8bit)>, + Requires<[In64BitMode]>; + +// anyext +def : Pat<(i64 (anyext GR8:$src)), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR8:$src, x86_subreg_8bit)>; +def : Pat<(i64 (anyext GR16:$src)), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR16:$src, x86_subreg_16bit)>; +def : Pat<(i64 (anyext GR32:$src)), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, x86_subreg_32bit)>; +def : Pat<(i16 (anyext GR8:$src)), + (INSERT_SUBREG (i16 (IMPLICIT_DEF)), GR8:$src, x86_subreg_8bit)>, + Requires<[In64BitMode]>; +def : Pat<(i32 (anyext GR8:$src)), + (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, x86_subreg_8bit)>, + Requires<[In64BitMode]>; + +//===----------------------------------------------------------------------===// +// Some peepholes +//===----------------------------------------------------------------------===// + +// Odd encoding trick: -128 fits into an 8-bit immediate field while +// +128 doesn't, so in this special case use a sub instead of an add. +def : Pat<(add GR64:$src1, 128), + (SUB64ri8 GR64:$src1, -128)>; +def : Pat<(store (add (loadi64 addr:$dst), 128), addr:$dst), + (SUB64mi8 addr:$dst, -128)>; + +// The same trick applies for 32-bit immediate fields in 64-bit +// instructions. +def : Pat<(add GR64:$src1, 0x0000000080000000), + (SUB64ri32 GR64:$src1, 0xffffffff80000000)>; +def : Pat<(store (add (loadi64 addr:$dst), 0x00000000800000000), addr:$dst), + (SUB64mi32 addr:$dst, 0xffffffff80000000)>; + +// r & (2^32-1) ==> movz +def : Pat<(and GR64:$src, 0x00000000FFFFFFFF), + (MOVZX64rr32 (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit))>; +// r & (2^16-1) ==> movz +def : Pat<(and GR64:$src, 0xffff), + (MOVZX64rr16 (i16 (EXTRACT_SUBREG GR64:$src, x86_subreg_16bit)))>; +// r & (2^8-1) ==> movz +def : Pat<(and GR64:$src, 0xff), + (MOVZX64rr8 (i8 (EXTRACT_SUBREG GR64:$src, x86_subreg_8bit)))>; +// r & (2^8-1) ==> movz +def : Pat<(and GR32:$src1, 0xff), + (MOVZX32rr8 (EXTRACT_SUBREG GR32:$src1, x86_subreg_8bit))>, + Requires<[In64BitMode]>; +// r & (2^8-1) ==> movz +def : Pat<(and GR16:$src1, 0xff), + (MOVZX16rr8 (i8 (EXTRACT_SUBREG GR16:$src1, x86_subreg_8bit)))>, + Requires<[In64BitMode]>; + +// sext_inreg patterns +def : Pat<(sext_inreg GR64:$src, i32), + (MOVSX64rr32 (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit))>; +def : Pat<(sext_inreg GR64:$src, i16), + (MOVSX64rr16 (EXTRACT_SUBREG GR64:$src, x86_subreg_16bit))>; +def : Pat<(sext_inreg GR64:$src, i8), + (MOVSX64rr8 (EXTRACT_SUBREG GR64:$src, x86_subreg_8bit))>; +def : Pat<(sext_inreg GR32:$src, i8), + (MOVSX32rr8 (EXTRACT_SUBREG GR32:$src, x86_subreg_8bit))>, + Requires<[In64BitMode]>; +def : Pat<(sext_inreg GR16:$src, i8), + (MOVSX16rr8 (i8 (EXTRACT_SUBREG GR16:$src, x86_subreg_8bit)))>, + Requires<[In64BitMode]>; + +// trunc patterns +def : Pat<(i32 (trunc GR64:$src)), + (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit)>; +def : Pat<(i16 (trunc GR64:$src)), + (EXTRACT_SUBREG GR64:$src, x86_subreg_16bit)>; +def : Pat<(i8 (trunc GR64:$src)), + (EXTRACT_SUBREG GR64:$src, x86_subreg_8bit)>; +def : Pat<(i8 (trunc GR32:$src)), + (EXTRACT_SUBREG GR32:$src, x86_subreg_8bit)>, + Requires<[In64BitMode]>; +def : Pat<(i8 (trunc GR16:$src)), + (EXTRACT_SUBREG GR16:$src, x86_subreg_8bit)>, + Requires<[In64BitMode]>; + +// h-register tricks. +// For now, be conservative on x86-64 and use an h-register extract only if the +// value is immediately zero-extended or stored, which are somewhat common +// cases. This uses a bunch of code to prevent a register requiring a REX prefix +// from being allocated in the same instruction as the h register, as there's +// currently no way to describe this requirement to the register allocator. + +// h-register extract and zero-extend. +def : Pat<(and (srl_su GR64:$src, (i8 8)), (i64 255)), + (SUBREG_TO_REG + (i64 0), + (MOVZX32_NOREXrr8 + (EXTRACT_SUBREG (COPY_TO_REGCLASS GR64:$src, GR64_ABCD), + x86_subreg_8bit_hi)), + x86_subreg_32bit)>; +def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)), + (MOVZX32_NOREXrr8 + (EXTRACT_SUBREG (COPY_TO_REGCLASS GR32:$src, GR32_ABCD), + x86_subreg_8bit_hi))>, + Requires<[In64BitMode]>; +def : Pat<(srl_su GR16:$src, (i8 8)), + (EXTRACT_SUBREG + (MOVZX32_NOREXrr8 + (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD), + x86_subreg_8bit_hi)), + x86_subreg_16bit)>, + Requires<[In64BitMode]>; +def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))), + (MOVZX32_NOREXrr8 + (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD), + x86_subreg_8bit_hi))>, + Requires<[In64BitMode]>; +def : Pat<(i64 (zext (srl_su GR16:$src, (i8 8)))), + (SUBREG_TO_REG + (i64 0), + (MOVZX32_NOREXrr8 + (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD), + x86_subreg_8bit_hi)), + x86_subreg_32bit)>; + +// h-register extract and store. +def : Pat<(store (i8 (trunc_su (srl_su GR64:$src, (i8 8)))), addr:$dst), + (MOV8mr_NOREX + addr:$dst, + (EXTRACT_SUBREG (COPY_TO_REGCLASS GR64:$src, GR64_ABCD), + x86_subreg_8bit_hi))>; +def : Pat<(store (i8 (trunc_su (srl_su GR32:$src, (i8 8)))), addr:$dst), + (MOV8mr_NOREX + addr:$dst, + (EXTRACT_SUBREG (COPY_TO_REGCLASS GR32:$src, GR32_ABCD), + x86_subreg_8bit_hi))>, + Requires<[In64BitMode]>; +def : Pat<(store (i8 (trunc_su (srl_su GR16:$src, (i8 8)))), addr:$dst), + (MOV8mr_NOREX + addr:$dst, + (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD), + x86_subreg_8bit_hi))>, + Requires<[In64BitMode]>; + +// (shl x, 1) ==> (add x, x) +def : Pat<(shl GR64:$src1, (i8 1)), (ADD64rr GR64:$src1, GR64:$src1)>; + +// (shl x (and y, 63)) ==> (shl x, y) +def : Pat<(shl GR64:$src1, (and CL:$amt, 63)), + (SHL64rCL GR64:$src1)>; +def : Pat<(store (shl (loadi64 addr:$dst), (and CL:$amt, 63)), addr:$dst), + (SHL64mCL addr:$dst)>; + +def : Pat<(srl GR64:$src1, (and CL:$amt, 63)), + (SHR64rCL GR64:$src1)>; +def : Pat<(store (srl (loadi64 addr:$dst), (and CL:$amt, 63)), addr:$dst), + (SHR64mCL addr:$dst)>; + +def : Pat<(sra GR64:$src1, (and CL:$amt, 63)), + (SAR64rCL GR64:$src1)>; +def : Pat<(store (sra (loadi64 addr:$dst), (and CL:$amt, 63)), addr:$dst), + (SAR64mCL addr:$dst)>; + +// (or (x >> c) | (y << (64 - c))) ==> (shrd64 x, y, c) +def : Pat<(or (srl GR64:$src1, CL:$amt), + (shl GR64:$src2, (sub 64, CL:$amt))), + (SHRD64rrCL GR64:$src1, GR64:$src2)>; + +def : Pat<(store (or (srl (loadi64 addr:$dst), CL:$amt), + (shl GR64:$src2, (sub 64, CL:$amt))), addr:$dst), + (SHRD64mrCL addr:$dst, GR64:$src2)>; + +def : Pat<(or (srl GR64:$src1, (i8 (trunc RCX:$amt))), + (shl GR64:$src2, (i8 (trunc (sub 64, RCX:$amt))))), + (SHRD64rrCL GR64:$src1, GR64:$src2)>; + +def : Pat<(store (or (srl (loadi64 addr:$dst), (i8 (trunc RCX:$amt))), + (shl GR64:$src2, (i8 (trunc (sub 64, RCX:$amt))))), + addr:$dst), + (SHRD64mrCL addr:$dst, GR64:$src2)>; + +def : Pat<(shrd GR64:$src1, (i8 imm:$amt1), GR64:$src2, (i8 imm:$amt2)), + (SHRD64rri8 GR64:$src1, GR64:$src2, (i8 imm:$amt1))>; + +def : Pat<(store (shrd (loadi64 addr:$dst), (i8 imm:$amt1), + GR64:$src2, (i8 imm:$amt2)), addr:$dst), + (SHRD64mri8 addr:$dst, GR64:$src2, (i8 imm:$amt1))>; + +// (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c) +def : Pat<(or (shl GR64:$src1, CL:$amt), + (srl GR64:$src2, (sub 64, CL:$amt))), + (SHLD64rrCL GR64:$src1, GR64:$src2)>; + +def : Pat<(store (or (shl (loadi64 addr:$dst), CL:$amt), + (srl GR64:$src2, (sub 64, CL:$amt))), addr:$dst), + (SHLD64mrCL addr:$dst, GR64:$src2)>; + +def : Pat<(or (shl GR64:$src1, (i8 (trunc RCX:$amt))), + (srl GR64:$src2, (i8 (trunc (sub 64, RCX:$amt))))), + (SHLD64rrCL GR64:$src1, GR64:$src2)>; + +def : Pat<(store (or (shl (loadi64 addr:$dst), (i8 (trunc RCX:$amt))), + (srl GR64:$src2, (i8 (trunc (sub 64, RCX:$amt))))), + addr:$dst), + (SHLD64mrCL addr:$dst, GR64:$src2)>; + +def : Pat<(shld GR64:$src1, (i8 imm:$amt1), GR64:$src2, (i8 imm:$amt2)), + (SHLD64rri8 GR64:$src1, GR64:$src2, (i8 imm:$amt1))>; + +def : Pat<(store (shld (loadi64 addr:$dst), (i8 imm:$amt1), + GR64:$src2, (i8 imm:$amt2)), addr:$dst), + (SHLD64mri8 addr:$dst, GR64:$src2, (i8 imm:$amt1))>; + +// X86 specific add which produces a flag. +def : Pat<(addc GR64:$src1, GR64:$src2), + (ADD64rr GR64:$src1, GR64:$src2)>; +def : Pat<(addc GR64:$src1, (load addr:$src2)), + (ADD64rm GR64:$src1, addr:$src2)>; +def : Pat<(addc GR64:$src1, i64immSExt8:$src2), + (ADD64ri8 GR64:$src1, i64immSExt8:$src2)>; +def : Pat<(addc GR64:$src1, i64immSExt32:$src2), + (ADD64ri32 GR64:$src1, imm:$src2)>; + +def : Pat<(subc GR64:$src1, GR64:$src2), + (SUB64rr GR64:$src1, GR64:$src2)>; +def : Pat<(subc GR64:$src1, (load addr:$src2)), + (SUB64rm GR64:$src1, addr:$src2)>; +def : Pat<(subc GR64:$src1, i64immSExt8:$src2), + (SUB64ri8 GR64:$src1, i64immSExt8:$src2)>; +def : Pat<(subc GR64:$src1, imm:$src2), + (SUB64ri32 GR64:$src1, i64immSExt32:$src2)>; + +//===----------------------------------------------------------------------===// +// EFLAGS-defining Patterns +//===----------------------------------------------------------------------===// + +// Register-Register Addition with EFLAGS result +def : Pat<(parallel (X86add_flag GR64:$src1, GR64:$src2), + (implicit EFLAGS)), + (ADD64rr GR64:$src1, GR64:$src2)>; + +// Register-Integer Addition with EFLAGS result +def : Pat<(parallel (X86add_flag GR64:$src1, i64immSExt8:$src2), + (implicit EFLAGS)), + (ADD64ri8 GR64:$src1, i64immSExt8:$src2)>; +def : Pat<(parallel (X86add_flag GR64:$src1, i64immSExt32:$src2), + (implicit EFLAGS)), + (ADD64ri32 GR64:$src1, i64immSExt32:$src2)>; + +// Register-Memory Addition with EFLAGS result +def : Pat<(parallel (X86add_flag GR64:$src1, (loadi64 addr:$src2)), + (implicit EFLAGS)), + (ADD64rm GR64:$src1, addr:$src2)>; + +// Memory-Register Addition with EFLAGS result +def : Pat<(parallel (store (X86add_flag (loadi64 addr:$dst), GR64:$src2), + addr:$dst), + (implicit EFLAGS)), + (ADD64mr addr:$dst, GR64:$src2)>; +def : Pat<(parallel (store (X86add_flag (loadi64 addr:$dst), i64immSExt8:$src2), + addr:$dst), + (implicit EFLAGS)), + (ADD64mi8 addr:$dst, i64immSExt8:$src2)>; +def : Pat<(parallel (store (X86add_flag (loadi64 addr:$dst), i64immSExt32:$src2), + addr:$dst), + (implicit EFLAGS)), + (ADD64mi32 addr:$dst, i64immSExt32:$src2)>; + +// Register-Register Subtraction with EFLAGS result +def : Pat<(parallel (X86sub_flag GR64:$src1, GR64:$src2), + (implicit EFLAGS)), + (SUB64rr GR64:$src1, GR64:$src2)>; + +// Register-Memory Subtraction with EFLAGS result +def : Pat<(parallel (X86sub_flag GR64:$src1, (loadi64 addr:$src2)), + (implicit EFLAGS)), + (SUB64rm GR64:$src1, addr:$src2)>; + +// Register-Integer Subtraction with EFLAGS result +def : Pat<(parallel (X86sub_flag GR64:$src1, i64immSExt8:$src2), + (implicit EFLAGS)), + (SUB64ri8 GR64:$src1, i64immSExt8:$src2)>; +def : Pat<(parallel (X86sub_flag GR64:$src1, i64immSExt32:$src2), + (implicit EFLAGS)), + (SUB64ri32 GR64:$src1, i64immSExt32:$src2)>; + +// Memory-Register Subtraction with EFLAGS result +def : Pat<(parallel (store (X86sub_flag (loadi64 addr:$dst), GR64:$src2), + addr:$dst), + (implicit EFLAGS)), + (SUB64mr addr:$dst, GR64:$src2)>; + +// Memory-Integer Subtraction with EFLAGS result +def : Pat<(parallel (store (X86sub_flag (loadi64 addr:$dst), i64immSExt8:$src2), + addr:$dst), + (implicit EFLAGS)), + (SUB64mi8 addr:$dst, i64immSExt8:$src2)>; +def : Pat<(parallel (store (X86sub_flag (loadi64 addr:$dst), i64immSExt32:$src2), + addr:$dst), + (implicit EFLAGS)), + (SUB64mi32 addr:$dst, i64immSExt32:$src2)>; + +// Register-Register Signed Integer Multiplication with EFLAGS result +def : Pat<(parallel (X86smul_flag GR64:$src1, GR64:$src2), + (implicit EFLAGS)), + (IMUL64rr GR64:$src1, GR64:$src2)>; + +// Register-Memory Signed Integer Multiplication with EFLAGS result +def : Pat<(parallel (X86smul_flag GR64:$src1, (loadi64 addr:$src2)), + (implicit EFLAGS)), + (IMUL64rm GR64:$src1, addr:$src2)>; + +// Register-Integer Signed Integer Multiplication with EFLAGS result +def : Pat<(parallel (X86smul_flag GR64:$src1, i64immSExt8:$src2), + (implicit EFLAGS)), + (IMUL64rri8 GR64:$src1, i64immSExt8:$src2)>; +def : Pat<(parallel (X86smul_flag GR64:$src1, i64immSExt32:$src2), + (implicit EFLAGS)), + (IMUL64rri32 GR64:$src1, i64immSExt32:$src2)>; + +// Memory-Integer Signed Integer Multiplication with EFLAGS result +def : Pat<(parallel (X86smul_flag (loadi64 addr:$src1), i64immSExt8:$src2), + (implicit EFLAGS)), + (IMUL64rmi8 addr:$src1, i64immSExt8:$src2)>; +def : Pat<(parallel (X86smul_flag (loadi64 addr:$src1), i64immSExt32:$src2), + (implicit EFLAGS)), + (IMUL64rmi32 addr:$src1, i64immSExt32:$src2)>; + +// INC and DEC with EFLAGS result. Note that these do not set CF. +def : Pat<(parallel (X86inc_flag GR16:$src), (implicit EFLAGS)), + (INC64_16r GR16:$src)>, Requires<[In64BitMode]>; +def : Pat<(parallel (store (i16 (X86inc_flag (loadi16 addr:$dst))), addr:$dst), + (implicit EFLAGS)), + (INC64_16m addr:$dst)>, Requires<[In64BitMode]>; +def : Pat<(parallel (X86dec_flag GR16:$src), (implicit EFLAGS)), + (DEC64_16r GR16:$src)>, Requires<[In64BitMode]>; +def : Pat<(parallel (store (i16 (X86dec_flag (loadi16 addr:$dst))), addr:$dst), + (implicit EFLAGS)), + (DEC64_16m addr:$dst)>, Requires<[In64BitMode]>; + +def : Pat<(parallel (X86inc_flag GR32:$src), (implicit EFLAGS)), + (INC64_32r GR32:$src)>, Requires<[In64BitMode]>; +def : Pat<(parallel (store (i32 (X86inc_flag (loadi32 addr:$dst))), addr:$dst), + (implicit EFLAGS)), + (INC64_32m addr:$dst)>, Requires<[In64BitMode]>; +def : Pat<(parallel (X86dec_flag GR32:$src), (implicit EFLAGS)), + (DEC64_32r GR32:$src)>, Requires<[In64BitMode]>; +def : Pat<(parallel (store (i32 (X86dec_flag (loadi32 addr:$dst))), addr:$dst), + (implicit EFLAGS)), + (DEC64_32m addr:$dst)>, Requires<[In64BitMode]>; + +def : Pat<(parallel (X86inc_flag GR64:$src), (implicit EFLAGS)), + (INC64r GR64:$src)>; +def : Pat<(parallel (store (i64 (X86inc_flag (loadi64 addr:$dst))), addr:$dst), + (implicit EFLAGS)), + (INC64m addr:$dst)>; +def : Pat<(parallel (X86dec_flag GR64:$src), (implicit EFLAGS)), + (DEC64r GR64:$src)>; +def : Pat<(parallel (store (i64 (X86dec_flag (loadi64 addr:$dst))), addr:$dst), + (implicit EFLAGS)), + (DEC64m addr:$dst)>; + +//===----------------------------------------------------------------------===// +// X86-64 SSE Instructions +//===----------------------------------------------------------------------===// + +// Move instructions... + +def MOV64toPQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), + "mov{d|q}\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (v2i64 (scalar_to_vector GR64:$src)))]>; +def MOVPQIto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), + "mov{d|q}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, (vector_extract (v2i64 VR128:$src), + (iPTR 0)))]>; + +def MOV64toSDrr : RPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), + "mov{d|q}\t{$src, $dst|$dst, $src}", + [(set FR64:$dst, (bitconvert GR64:$src))]>; +def MOV64toSDrm : RPDI<0x6E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src), + "movq\t{$src, $dst|$dst, $src}", + [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>; + +def MOVSDto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src), + "mov{d|q}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, (bitconvert FR64:$src))]>; +def MOVSDto64mr : RPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src), + "movq\t{$src, $dst|$dst, $src}", + [(store (i64 (bitconvert FR64:$src)), addr:$dst)]>; + +//===----------------------------------------------------------------------===// +// X86-64 SSE4.1 Instructions +//===----------------------------------------------------------------------===// + +/// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory destination +multiclass SS41I_extract64 opc, string OpcodeStr> { + def rr : SS4AIi8, OpSize, REX_W; + def mr : SS4AIi8, OpSize, REX_W; +} + +defm PEXTRQ : SS41I_extract64<0x16, "pextrq">; + +let isTwoAddress = 1 in { + multiclass SS41I_insert64 opc, string OpcodeStr> { + def rr : SS4AIi8, + OpSize, REX_W; + def rm : SS4AIi8, OpSize, REX_W; + } +} + +defm PINSRQ : SS41I_insert64<0x22, "pinsrq">; diff --git a/lib/Target/X86/X86InstrBuilder.h b/lib/Target/X86/X86InstrBuilder.h new file mode 100644 index 000000000000..39504cd44cea --- /dev/null +++ b/lib/Target/X86/X86InstrBuilder.h @@ -0,0 +1,168 @@ +//===-- X86InstrBuilder.h - Functions to aid building x86 insts -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file exposes functions that may be used with BuildMI from the +// MachineInstrBuilder.h file to handle X86'isms in a clean way. +// +// The BuildMem function may be used with the BuildMI function to add entire +// memory references in a single, typed, function call. X86 memory references +// can be very complex expressions (described in the README), so wrapping them +// up behind an easier to use interface makes sense. Descriptions of the +// functions are included below. +// +// For reference, the order of operands for memory references is: +// (Operand), Base, Scale, Index, Displacement. +// +//===----------------------------------------------------------------------===// + +#ifndef X86INSTRBUILDER_H +#define X86INSTRBUILDER_H + +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/PseudoSourceValue.h" + +namespace llvm { + +/// X86AddressMode - This struct holds a generalized full x86 address mode. +/// The base register can be a frame index, which will eventually be replaced +/// with BP or SP and Disp being offsetted accordingly. The displacement may +/// also include the offset of a global value. +struct X86AddressMode { + enum { + RegBase, + FrameIndexBase + } BaseType; + + union { + unsigned Reg; + int FrameIndex; + } Base; + + unsigned Scale; + unsigned IndexReg; + unsigned Disp; + GlobalValue *GV; + + X86AddressMode() : BaseType(RegBase), Scale(1), IndexReg(0), Disp(0), GV(0) { + Base.Reg = 0; + } +}; + +/// addDirectMem - This function is used to add a direct memory reference to the +/// current instruction -- that is, a dereference of an address in a register, +/// with no scale, index or displacement. An example is: DWORD PTR [EAX]. +/// +inline const MachineInstrBuilder &addDirectMem(const MachineInstrBuilder &MIB, + unsigned Reg) { + // Because memory references are always represented with four + // values, this adds: Reg, [1, NoReg, 0] to the instruction. + return MIB.addReg(Reg).addImm(1).addReg(0).addImm(0); +} + +inline const MachineInstrBuilder &addLeaOffset(const MachineInstrBuilder &MIB, + int Offset) { + return MIB.addImm(1).addReg(0).addImm(Offset); +} + +inline const MachineInstrBuilder &addOffset(const MachineInstrBuilder &MIB, + int Offset) { + return addLeaOffset(MIB, Offset).addReg(0); +} + +/// addRegOffset - This function is used to add a memory reference of the form +/// [Reg + Offset], i.e., one with no scale or index, but with a +/// displacement. An example is: DWORD PTR [EAX + 4]. +/// +inline const MachineInstrBuilder &addRegOffset(const MachineInstrBuilder &MIB, + unsigned Reg, bool isKill, + int Offset) { + return addOffset(MIB.addReg(Reg, getKillRegState(isKill)), Offset); +} + +inline const MachineInstrBuilder &addLeaRegOffset(const MachineInstrBuilder &MIB, + unsigned Reg, bool isKill, + int Offset) { + return addLeaOffset(MIB.addReg(Reg, getKillRegState(isKill)), Offset); +} + +/// addRegReg - This function is used to add a memory reference of the form: +/// [Reg + Reg]. +inline const MachineInstrBuilder &addRegReg(const MachineInstrBuilder &MIB, + unsigned Reg1, bool isKill1, + unsigned Reg2, bool isKill2) { + return MIB.addReg(Reg1, getKillRegState(isKill1)).addImm(1) + .addReg(Reg2, getKillRegState(isKill2)).addImm(0); +} + +inline const MachineInstrBuilder &addLeaAddress(const MachineInstrBuilder &MIB, + const X86AddressMode &AM) { + assert (AM.Scale == 1 || AM.Scale == 2 || AM.Scale == 4 || AM.Scale == 8); + + if (AM.BaseType == X86AddressMode::RegBase) + MIB.addReg(AM.Base.Reg); + else if (AM.BaseType == X86AddressMode::FrameIndexBase) + MIB.addFrameIndex(AM.Base.FrameIndex); + else + assert (0); + MIB.addImm(AM.Scale).addReg(AM.IndexReg); + if (AM.GV) + return MIB.addGlobalAddress(AM.GV, AM.Disp); + else + return MIB.addImm(AM.Disp); +} + +inline const MachineInstrBuilder &addFullAddress(const MachineInstrBuilder &MIB, + const X86AddressMode &AM) { + return addLeaAddress(MIB, AM).addReg(0); +} + +/// addFrameReference - This function is used to add a reference to the base of +/// an abstract object on the stack frame of the current function. This +/// reference has base register as the FrameIndex offset until it is resolved. +/// This allows a constant offset to be specified as well... +/// +inline const MachineInstrBuilder & +addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0) { + MachineInstr *MI = MIB; + MachineFunction &MF = *MI->getParent()->getParent(); + MachineFrameInfo &MFI = *MF.getFrameInfo(); + const TargetInstrDesc &TID = MI->getDesc(); + unsigned Flags = 0; + if (TID.mayLoad()) + Flags |= MachineMemOperand::MOLoad; + if (TID.mayStore()) + Flags |= MachineMemOperand::MOStore; + MachineMemOperand MMO(PseudoSourceValue::getFixedStack(FI), + Flags, + MFI.getObjectOffset(FI) + Offset, + MFI.getObjectSize(FI), + MFI.getObjectAlignment(FI)); + return addOffset(MIB.addFrameIndex(FI), Offset) + .addMemOperand(MMO); +} + +/// addConstantPoolReference - This function is used to add a reference to the +/// base of a constant value spilled to the per-function constant pool. The +/// reference uses the abstract ConstantPoolIndex which is retained until +/// either machine code emission or assembly output. In PIC mode on x86-32, +/// the GlobalBaseReg parameter can be used to make this a +/// GlobalBaseReg-relative reference. +/// +inline const MachineInstrBuilder & +addConstantPoolReference(const MachineInstrBuilder &MIB, unsigned CPI, + unsigned GlobalBaseReg = 0) { + //FIXME: factor this + return MIB.addReg(GlobalBaseReg).addImm(1).addReg(0) + .addConstantPoolIndex(CPI).addReg(0); +} + +} // End llvm namespace + +#endif diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td new file mode 100644 index 000000000000..bc7def457c0f --- /dev/null +++ b/lib/Target/X86/X86InstrFPStack.td @@ -0,0 +1,597 @@ +//==- X86InstrFPStack.td - Describe the X86 Instruction Set --*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the X86 x87 FPU instruction set, defining the +// instructions, and properties of the instructions which are needed for code +// generation, machine code emission, and analysis. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// FPStack specific DAG Nodes. +//===----------------------------------------------------------------------===// + +def SDTX86FpGet2 : SDTypeProfile<2, 0, [SDTCisVT<0, f80>, + SDTCisVT<1, f80>]>; +def SDTX86Fld : SDTypeProfile<1, 2, [SDTCisFP<0>, + SDTCisPtrTy<1>, + SDTCisVT<2, OtherVT>]>; +def SDTX86Fst : SDTypeProfile<0, 3, [SDTCisFP<0>, + SDTCisPtrTy<1>, + SDTCisVT<2, OtherVT>]>; +def SDTX86Fild : SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisPtrTy<1>, + SDTCisVT<2, OtherVT>]>; +def SDTX86FpToIMem : SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisPtrTy<1>]>; + +def SDTX86CwdStore : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; + +def X86fld : SDNode<"X86ISD::FLD", SDTX86Fld, + [SDNPHasChain, SDNPMayLoad]>; +def X86fst : SDNode<"X86ISD::FST", SDTX86Fst, + [SDNPHasChain, SDNPInFlag, SDNPMayStore]>; +def X86fild : SDNode<"X86ISD::FILD", SDTX86Fild, + [SDNPHasChain, SDNPMayLoad]>; +def X86fildflag : SDNode<"X86ISD::FILD_FLAG", SDTX86Fild, + [SDNPHasChain, SDNPOutFlag, SDNPMayLoad]>; +def X86fp_to_i16mem : SDNode<"X86ISD::FP_TO_INT16_IN_MEM", SDTX86FpToIMem, + [SDNPHasChain, SDNPMayStore]>; +def X86fp_to_i32mem : SDNode<"X86ISD::FP_TO_INT32_IN_MEM", SDTX86FpToIMem, + [SDNPHasChain, SDNPMayStore]>; +def X86fp_to_i64mem : SDNode<"X86ISD::FP_TO_INT64_IN_MEM", SDTX86FpToIMem, + [SDNPHasChain, SDNPMayStore]>; +def X86fp_cwd_get16 : SDNode<"X86ISD::FNSTCW16m", SDTX86CwdStore, + [SDNPHasChain, SDNPMayStore, SDNPSideEffect]>; + +//===----------------------------------------------------------------------===// +// FPStack pattern fragments +//===----------------------------------------------------------------------===// + +def fpimm0 : PatLeaf<(fpimm), [{ + return N->isExactlyValue(+0.0); +}]>; + +def fpimmneg0 : PatLeaf<(fpimm), [{ + return N->isExactlyValue(-0.0); +}]>; + +def fpimm1 : PatLeaf<(fpimm), [{ + return N->isExactlyValue(+1.0); +}]>; + +def fpimmneg1 : PatLeaf<(fpimm), [{ + return N->isExactlyValue(-1.0); +}]>; + +// Some 'special' instructions +let usesCustomDAGSchedInserter = 1 in { // Expanded by the scheduler. + def FP32_TO_INT16_IN_MEM : I<0, Pseudo, + (outs), (ins i16mem:$dst, RFP32:$src), + "##FP32_TO_INT16_IN_MEM PSEUDO!", + [(X86fp_to_i16mem RFP32:$src, addr:$dst)]>; + def FP32_TO_INT32_IN_MEM : I<0, Pseudo, + (outs), (ins i32mem:$dst, RFP32:$src), + "##FP32_TO_INT32_IN_MEM PSEUDO!", + [(X86fp_to_i32mem RFP32:$src, addr:$dst)]>; + def FP32_TO_INT64_IN_MEM : I<0, Pseudo, + (outs), (ins i64mem:$dst, RFP32:$src), + "##FP32_TO_INT64_IN_MEM PSEUDO!", + [(X86fp_to_i64mem RFP32:$src, addr:$dst)]>; + def FP64_TO_INT16_IN_MEM : I<0, Pseudo, + (outs), (ins i16mem:$dst, RFP64:$src), + "##FP64_TO_INT16_IN_MEM PSEUDO!", + [(X86fp_to_i16mem RFP64:$src, addr:$dst)]>; + def FP64_TO_INT32_IN_MEM : I<0, Pseudo, + (outs), (ins i32mem:$dst, RFP64:$src), + "##FP64_TO_INT32_IN_MEM PSEUDO!", + [(X86fp_to_i32mem RFP64:$src, addr:$dst)]>; + def FP64_TO_INT64_IN_MEM : I<0, Pseudo, + (outs), (ins i64mem:$dst, RFP64:$src), + "##FP64_TO_INT64_IN_MEM PSEUDO!", + [(X86fp_to_i64mem RFP64:$src, addr:$dst)]>; + def FP80_TO_INT16_IN_MEM : I<0, Pseudo, + (outs), (ins i16mem:$dst, RFP80:$src), + "##FP80_TO_INT16_IN_MEM PSEUDO!", + [(X86fp_to_i16mem RFP80:$src, addr:$dst)]>; + def FP80_TO_INT32_IN_MEM : I<0, Pseudo, + (outs), (ins i32mem:$dst, RFP80:$src), + "##FP80_TO_INT32_IN_MEM PSEUDO!", + [(X86fp_to_i32mem RFP80:$src, addr:$dst)]>; + def FP80_TO_INT64_IN_MEM : I<0, Pseudo, + (outs), (ins i64mem:$dst, RFP80:$src), + "##FP80_TO_INT64_IN_MEM PSEUDO!", + [(X86fp_to_i64mem RFP80:$src, addr:$dst)]>; +} + +let isTerminator = 1 in + let Defs = [FP0, FP1, FP2, FP3, FP4, FP5, FP6] in + def FP_REG_KILL : I<0, Pseudo, (outs), (ins), "##FP_REG_KILL", []>; + +// All FP Stack operations are represented with four instructions here. The +// first three instructions, generated by the instruction selector, use "RFP32" +// "RFP64" or "RFP80" registers: traditional register files to reference 32-bit, +// 64-bit or 80-bit floating point values. These sizes apply to the values, +// not the registers, which are always 80 bits; RFP32, RFP64 and RFP80 can be +// copied to each other without losing information. These instructions are all +// pseudo instructions and use the "_Fp" suffix. +// In some cases there are additional variants with a mixture of different +// register sizes. +// The second instruction is defined with FPI, which is the actual instruction +// emitted by the assembler. These use "RST" registers, although frequently +// the actual register(s) used are implicit. These are always 80 bits. +// The FP stackifier pass converts one to the other after register allocation +// occurs. +// +// Note that the FpI instruction should have instruction selection info (e.g. +// a pattern) and the FPI instruction should have emission info (e.g. opcode +// encoding and asm printing info). + +// Pseudo Instructions for FP stack return values. +def FpGET_ST0_32 : FpI_<(outs RFP32:$dst), (ins), SpecialFP, []>; // FPR = ST(0) +def FpGET_ST0_64 : FpI_<(outs RFP64:$dst), (ins), SpecialFP, []>; // FPR = ST(0) +def FpGET_ST0_80 : FpI_<(outs RFP80:$dst), (ins), SpecialFP, []>; // FPR = ST(0) + +// FpGET_ST1* should only be issued *after* an FpGET_ST0* has been issued when +// there are two values live out on the stack from a call or inlineasm. This +// magic is handled by the stackifier. It is not valid to emit FpGET_ST1* and +// then FpGET_ST0*. In addition, it is invalid for any FP-using operations to +// occur between them. +def FpGET_ST1_32 : FpI_<(outs RFP32:$dst), (ins), SpecialFP, []>; // FPR = ST(1) +def FpGET_ST1_64 : FpI_<(outs RFP64:$dst), (ins), SpecialFP, []>; // FPR = ST(1) +def FpGET_ST1_80 : FpI_<(outs RFP80:$dst), (ins), SpecialFP, []>; // FPR = ST(1) + +let Defs = [ST0] in { +def FpSET_ST0_32 : FpI_<(outs), (ins RFP32:$src), SpecialFP, []>; // ST(0) = FPR +def FpSET_ST0_64 : FpI_<(outs), (ins RFP64:$src), SpecialFP, []>; // ST(0) = FPR +def FpSET_ST0_80 : FpI_<(outs), (ins RFP80:$src), SpecialFP, []>; // ST(0) = FPR +} + +let Defs = [ST1] in { +def FpSET_ST1_32 : FpI_<(outs), (ins RFP32:$src), SpecialFP, []>; // ST(1) = FPR +def FpSET_ST1_64 : FpI_<(outs), (ins RFP64:$src), SpecialFP, []>; // ST(1) = FPR +def FpSET_ST1_80 : FpI_<(outs), (ins RFP80:$src), SpecialFP, []>; // ST(1) = FPR +} + +// FpIf32, FpIf64 - Floating Point Psuedo Instruction template. +// f32 instructions can use SSE1 and are predicated on FPStackf32 == !SSE1. +// f64 instructions can use SSE2 and are predicated on FPStackf64 == !SSE2. +// f80 instructions cannot use SSE and use neither of these. +class FpIf32 pattern> : + FpI_, Requires<[FPStackf32]>; +class FpIf64 pattern> : + FpI_, Requires<[FPStackf64]>; + +// Register copies. Just copies, the shortening ones do not truncate. +let neverHasSideEffects = 1 in { + def MOV_Fp3232 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src), SpecialFP, []>; + def MOV_Fp3264 : FpIf32<(outs RFP64:$dst), (ins RFP32:$src), SpecialFP, []>; + def MOV_Fp6432 : FpIf32<(outs RFP32:$dst), (ins RFP64:$src), SpecialFP, []>; + def MOV_Fp6464 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src), SpecialFP, []>; + def MOV_Fp8032 : FpIf32<(outs RFP32:$dst), (ins RFP80:$src), SpecialFP, []>; + def MOV_Fp3280 : FpIf32<(outs RFP80:$dst), (ins RFP32:$src), SpecialFP, []>; + def MOV_Fp8064 : FpIf64<(outs RFP64:$dst), (ins RFP80:$src), SpecialFP, []>; + def MOV_Fp6480 : FpIf64<(outs RFP80:$dst), (ins RFP64:$src), SpecialFP, []>; + def MOV_Fp8080 : FpI_ <(outs RFP80:$dst), (ins RFP80:$src), SpecialFP, []>; +} + +// Factoring for arithmetic. +multiclass FPBinary_rr { +// Register op register -> register +// These are separated out because they have no reversed form. +def _Fp32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, RFP32:$src2), TwoArgFP, + [(set RFP32:$dst, (OpNode RFP32:$src1, RFP32:$src2))]>; +def _Fp64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, RFP64:$src2), TwoArgFP, + [(set RFP64:$dst, (OpNode RFP64:$src1, RFP64:$src2))]>; +def _Fp80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, RFP80:$src2), TwoArgFP, + [(set RFP80:$dst, (OpNode RFP80:$src1, RFP80:$src2))]>; +} +// The FopST0 series are not included here because of the irregularities +// in where the 'r' goes in assembly output. +// These instructions cannot address 80-bit memory. +multiclass FPBinary { +// ST(0) = ST(0) + [mem] +def _Fp32m : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, f32mem:$src2), OneArgFPRW, + [(set RFP32:$dst, + (OpNode RFP32:$src1, (loadf32 addr:$src2)))]>; +def _Fp64m : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, f64mem:$src2), OneArgFPRW, + [(set RFP64:$dst, + (OpNode RFP64:$src1, (loadf64 addr:$src2)))]>; +def _Fp64m32: FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, f32mem:$src2), OneArgFPRW, + [(set RFP64:$dst, + (OpNode RFP64:$src1, (f64 (extloadf32 addr:$src2))))]>; +def _Fp80m32: FpI_<(outs RFP80:$dst), (ins RFP80:$src1, f32mem:$src2), OneArgFPRW, + [(set RFP80:$dst, + (OpNode RFP80:$src1, (f80 (extloadf32 addr:$src2))))]>; +def _Fp80m64: FpI_<(outs RFP80:$dst), (ins RFP80:$src1, f64mem:$src2), OneArgFPRW, + [(set RFP80:$dst, + (OpNode RFP80:$src1, (f80 (extloadf64 addr:$src2))))]>; +def _F32m : FPI<0xD8, fp, (outs), (ins f32mem:$src), + !strconcat("f", !strconcat(asmstring, "{s}\t$src"))> { let mayLoad = 1; } +def _F64m : FPI<0xDC, fp, (outs), (ins f64mem:$src), + !strconcat("f", !strconcat(asmstring, "{l}\t$src"))> { let mayLoad = 1; } +// ST(0) = ST(0) + [memint] +def _FpI16m32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, i16mem:$src2), OneArgFPRW, + [(set RFP32:$dst, (OpNode RFP32:$src1, + (X86fild addr:$src2, i16)))]>; +def _FpI32m32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, i32mem:$src2), OneArgFPRW, + [(set RFP32:$dst, (OpNode RFP32:$src1, + (X86fild addr:$src2, i32)))]>; +def _FpI16m64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, i16mem:$src2), OneArgFPRW, + [(set RFP64:$dst, (OpNode RFP64:$src1, + (X86fild addr:$src2, i16)))]>; +def _FpI32m64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, i32mem:$src2), OneArgFPRW, + [(set RFP64:$dst, (OpNode RFP64:$src1, + (X86fild addr:$src2, i32)))]>; +def _FpI16m80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, i16mem:$src2), OneArgFPRW, + [(set RFP80:$dst, (OpNode RFP80:$src1, + (X86fild addr:$src2, i16)))]>; +def _FpI32m80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, i32mem:$src2), OneArgFPRW, + [(set RFP80:$dst, (OpNode RFP80:$src1, + (X86fild addr:$src2, i32)))]>; +def _FI16m : FPI<0xDE, fp, (outs), (ins i16mem:$src), + !strconcat("fi", !strconcat(asmstring, "{s}\t$src"))> { let mayLoad = 1; } +def _FI32m : FPI<0xDA, fp, (outs), (ins i32mem:$src), + !strconcat("fi", !strconcat(asmstring, "{l}\t$src"))> { let mayLoad = 1; } +} + +defm ADD : FPBinary_rr; +defm SUB : FPBinary_rr; +defm MUL : FPBinary_rr; +defm DIV : FPBinary_rr; +defm ADD : FPBinary; +defm SUB : FPBinary; +defm SUBR: FPBinary; +defm MUL : FPBinary; +defm DIV : FPBinary; +defm DIVR: FPBinary; + +class FPST0rInst o, string asm> + : FPI, D8; +class FPrST0Inst o, string asm> + : FPI, DC; +class FPrST0PInst o, string asm> + : FPI, DE; + +// NOTE: GAS and apparently all other AT&T style assemblers have a broken notion +// of some of the 'reverse' forms of the fsub and fdiv instructions. As such, +// we have to put some 'r's in and take them out of weird places. +def ADD_FST0r : FPST0rInst <0xC0, "fadd\t$op">; +def ADD_FrST0 : FPrST0Inst <0xC0, "fadd\t{%st(0), $op|$op, %ST(0)}">; +def ADD_FPrST0 : FPrST0PInst<0xC0, "faddp\t$op">; +def SUBR_FST0r : FPST0rInst <0xE8, "fsubr\t$op">; +def SUB_FrST0 : FPrST0Inst <0xE8, "fsub{r}\t{%st(0), $op|$op, %ST(0)}">; +def SUB_FPrST0 : FPrST0PInst<0xE8, "fsub{r}p\t$op">; +def SUB_FST0r : FPST0rInst <0xE0, "fsub\t$op">; +def SUBR_FrST0 : FPrST0Inst <0xE0, "fsub{|r}\t{%st(0), $op|$op, %ST(0)}">; +def SUBR_FPrST0 : FPrST0PInst<0xE0, "fsub{|r}p\t$op">; +def MUL_FST0r : FPST0rInst <0xC8, "fmul\t$op">; +def MUL_FrST0 : FPrST0Inst <0xC8, "fmul\t{%st(0), $op|$op, %ST(0)}">; +def MUL_FPrST0 : FPrST0PInst<0xC8, "fmulp\t$op">; +def DIVR_FST0r : FPST0rInst <0xF8, "fdivr\t$op">; +def DIV_FrST0 : FPrST0Inst <0xF8, "fdiv{r}\t{%st(0), $op|$op, %ST(0)}">; +def DIV_FPrST0 : FPrST0PInst<0xF8, "fdiv{r}p\t$op">; +def DIV_FST0r : FPST0rInst <0xF0, "fdiv\t$op">; +def DIVR_FrST0 : FPrST0Inst <0xF0, "fdiv{|r}\t{%st(0), $op|$op, %ST(0)}">; +def DIVR_FPrST0 : FPrST0PInst<0xF0, "fdiv{|r}p\t$op">; + +// Unary operations. +multiclass FPUnary opcode, string asmstring> { +def _Fp32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src), OneArgFPRW, + [(set RFP32:$dst, (OpNode RFP32:$src))]>; +def _Fp64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src), OneArgFPRW, + [(set RFP64:$dst, (OpNode RFP64:$src))]>; +def _Fp80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src), OneArgFPRW, + [(set RFP80:$dst, (OpNode RFP80:$src))]>; +def _F : FPI, D9; +} + +defm CHS : FPUnary; +defm ABS : FPUnary; +defm SQRT: FPUnary; +defm SIN : FPUnary; +defm COS : FPUnary; + +let neverHasSideEffects = 1 in { +def TST_Fp32 : FpIf32<(outs), (ins RFP32:$src), OneArgFP, []>; +def TST_Fp64 : FpIf64<(outs), (ins RFP64:$src), OneArgFP, []>; +def TST_Fp80 : FpI_<(outs), (ins RFP80:$src), OneArgFP, []>; +} +def TST_F : FPI<0xE4, RawFrm, (outs), (ins), "ftst">, D9; + +// Floating point cmovs. +multiclass FPCMov { + def _Fp32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, RFP32:$src2), + CondMovFP, + [(set RFP32:$dst, (X86cmov RFP32:$src1, RFP32:$src2, + cc, EFLAGS))]>; + def _Fp64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, RFP64:$src2), + CondMovFP, + [(set RFP64:$dst, (X86cmov RFP64:$src1, RFP64:$src2, + cc, EFLAGS))]>; + def _Fp80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, RFP80:$src2), + CondMovFP, + [(set RFP80:$dst, (X86cmov RFP80:$src1, RFP80:$src2, + cc, EFLAGS))]>; +} +let Uses = [EFLAGS], isTwoAddress = 1 in { +defm CMOVB : FPCMov; +defm CMOVBE : FPCMov; +defm CMOVE : FPCMov; +defm CMOVP : FPCMov; +defm CMOVNB : FPCMov; +defm CMOVNBE: FPCMov; +defm CMOVNE : FPCMov; +defm CMOVNP : FPCMov; +} + +// These are not factored because there's no clean way to pass DA/DB. +def CMOVB_F : FPI<0xC0, AddRegFrm, (outs RST:$op), (ins), + "fcmovb\t{$op, %st(0)|%ST(0), $op}">, DA; +def CMOVBE_F : FPI<0xD0, AddRegFrm, (outs RST:$op), (ins), + "fcmovbe\t{$op, %st(0)|%ST(0), $op}">, DA; +def CMOVE_F : FPI<0xC8, AddRegFrm, (outs RST:$op), (ins), + "fcmove\t{$op, %st(0)|%ST(0), $op}">, DA; +def CMOVP_F : FPI<0xD8, AddRegFrm, (outs RST:$op), (ins), + "fcmovu\t {$op, %st(0)|%ST(0), $op}">, DA; +def CMOVNB_F : FPI<0xC0, AddRegFrm, (outs RST:$op), (ins), + "fcmovnb\t{$op, %st(0)|%ST(0), $op}">, DB; +def CMOVNBE_F: FPI<0xD0, AddRegFrm, (outs RST:$op), (ins), + "fcmovnbe\t{$op, %st(0)|%ST(0), $op}">, DB; +def CMOVNE_F : FPI<0xC8, AddRegFrm, (outs RST:$op), (ins), + "fcmovne\t{$op, %st(0)|%ST(0), $op}">, DB; +def CMOVNP_F : FPI<0xD8, AddRegFrm, (outs RST:$op), (ins), + "fcmovnu\t{$op, %st(0)|%ST(0), $op}">, DB; + +// Floating point loads & stores. +let canFoldAsLoad = 1 in { +def LD_Fp32m : FpIf32<(outs RFP32:$dst), (ins f32mem:$src), ZeroArgFP, + [(set RFP32:$dst, (loadf32 addr:$src))]>; +let isReMaterializable = 1, mayHaveSideEffects = 1 in + def LD_Fp64m : FpIf64<(outs RFP64:$dst), (ins f64mem:$src), ZeroArgFP, + [(set RFP64:$dst, (loadf64 addr:$src))]>; +def LD_Fp80m : FpI_<(outs RFP80:$dst), (ins f80mem:$src), ZeroArgFP, + [(set RFP80:$dst, (loadf80 addr:$src))]>; +} +def LD_Fp32m64 : FpIf64<(outs RFP64:$dst), (ins f32mem:$src), ZeroArgFP, + [(set RFP64:$dst, (f64 (extloadf32 addr:$src)))]>; +def LD_Fp64m80 : FpI_<(outs RFP80:$dst), (ins f64mem:$src), ZeroArgFP, + [(set RFP80:$dst, (f80 (extloadf64 addr:$src)))]>; +def LD_Fp32m80 : FpI_<(outs RFP80:$dst), (ins f32mem:$src), ZeroArgFP, + [(set RFP80:$dst, (f80 (extloadf32 addr:$src)))]>; +def ILD_Fp16m32: FpIf32<(outs RFP32:$dst), (ins i16mem:$src), ZeroArgFP, + [(set RFP32:$dst, (X86fild addr:$src, i16))]>; +def ILD_Fp32m32: FpIf32<(outs RFP32:$dst), (ins i32mem:$src), ZeroArgFP, + [(set RFP32:$dst, (X86fild addr:$src, i32))]>; +def ILD_Fp64m32: FpIf32<(outs RFP32:$dst), (ins i64mem:$src), ZeroArgFP, + [(set RFP32:$dst, (X86fild addr:$src, i64))]>; +def ILD_Fp16m64: FpIf64<(outs RFP64:$dst), (ins i16mem:$src), ZeroArgFP, + [(set RFP64:$dst, (X86fild addr:$src, i16))]>; +def ILD_Fp32m64: FpIf64<(outs RFP64:$dst), (ins i32mem:$src), ZeroArgFP, + [(set RFP64:$dst, (X86fild addr:$src, i32))]>; +def ILD_Fp64m64: FpIf64<(outs RFP64:$dst), (ins i64mem:$src), ZeroArgFP, + [(set RFP64:$dst, (X86fild addr:$src, i64))]>; +def ILD_Fp16m80: FpI_<(outs RFP80:$dst), (ins i16mem:$src), ZeroArgFP, + [(set RFP80:$dst, (X86fild addr:$src, i16))]>; +def ILD_Fp32m80: FpI_<(outs RFP80:$dst), (ins i32mem:$src), ZeroArgFP, + [(set RFP80:$dst, (X86fild addr:$src, i32))]>; +def ILD_Fp64m80: FpI_<(outs RFP80:$dst), (ins i64mem:$src), ZeroArgFP, + [(set RFP80:$dst, (X86fild addr:$src, i64))]>; + +def ST_Fp32m : FpIf32<(outs), (ins f32mem:$op, RFP32:$src), OneArgFP, + [(store RFP32:$src, addr:$op)]>; +def ST_Fp64m32 : FpIf64<(outs), (ins f32mem:$op, RFP64:$src), OneArgFP, + [(truncstoref32 RFP64:$src, addr:$op)]>; +def ST_Fp64m : FpIf64<(outs), (ins f64mem:$op, RFP64:$src), OneArgFP, + [(store RFP64:$src, addr:$op)]>; +def ST_Fp80m32 : FpI_<(outs), (ins f32mem:$op, RFP80:$src), OneArgFP, + [(truncstoref32 RFP80:$src, addr:$op)]>; +def ST_Fp80m64 : FpI_<(outs), (ins f64mem:$op, RFP80:$src), OneArgFP, + [(truncstoref64 RFP80:$src, addr:$op)]>; +// FST does not support 80-bit memory target; FSTP must be used. + +let mayStore = 1, neverHasSideEffects = 1 in { +def ST_FpP32m : FpIf32<(outs), (ins f32mem:$op, RFP32:$src), OneArgFP, []>; +def ST_FpP64m32 : FpIf64<(outs), (ins f32mem:$op, RFP64:$src), OneArgFP, []>; +def ST_FpP64m : FpIf64<(outs), (ins f64mem:$op, RFP64:$src), OneArgFP, []>; +def ST_FpP80m32 : FpI_<(outs), (ins f32mem:$op, RFP80:$src), OneArgFP, []>; +def ST_FpP80m64 : FpI_<(outs), (ins f64mem:$op, RFP80:$src), OneArgFP, []>; +} +def ST_FpP80m : FpI_<(outs), (ins f80mem:$op, RFP80:$src), OneArgFP, + [(store RFP80:$src, addr:$op)]>; +let mayStore = 1, neverHasSideEffects = 1 in { +def IST_Fp16m32 : FpIf32<(outs), (ins i16mem:$op, RFP32:$src), OneArgFP, []>; +def IST_Fp32m32 : FpIf32<(outs), (ins i32mem:$op, RFP32:$src), OneArgFP, []>; +def IST_Fp64m32 : FpIf32<(outs), (ins i64mem:$op, RFP32:$src), OneArgFP, []>; +def IST_Fp16m64 : FpIf64<(outs), (ins i16mem:$op, RFP64:$src), OneArgFP, []>; +def IST_Fp32m64 : FpIf64<(outs), (ins i32mem:$op, RFP64:$src), OneArgFP, []>; +def IST_Fp64m64 : FpIf64<(outs), (ins i64mem:$op, RFP64:$src), OneArgFP, []>; +def IST_Fp16m80 : FpI_<(outs), (ins i16mem:$op, RFP80:$src), OneArgFP, []>; +def IST_Fp32m80 : FpI_<(outs), (ins i32mem:$op, RFP80:$src), OneArgFP, []>; +def IST_Fp64m80 : FpI_<(outs), (ins i64mem:$op, RFP80:$src), OneArgFP, []>; +} + +let mayLoad = 1 in { +def LD_F32m : FPI<0xD9, MRM0m, (outs), (ins f32mem:$src), "fld{s}\t$src">; +def LD_F64m : FPI<0xDD, MRM0m, (outs), (ins f64mem:$src), "fld{l}\t$src">; +def LD_F80m : FPI<0xDB, MRM5m, (outs), (ins f80mem:$src), "fld{t}\t$src">; +def ILD_F16m : FPI<0xDF, MRM0m, (outs), (ins i16mem:$src), "fild{s}\t$src">; +def ILD_F32m : FPI<0xDB, MRM0m, (outs), (ins i32mem:$src), "fild{l}\t$src">; +def ILD_F64m : FPI<0xDF, MRM5m, (outs), (ins i64mem:$src), "fild{ll}\t$src">; +} +let mayStore = 1 in { +def ST_F32m : FPI<0xD9, MRM2m, (outs), (ins f32mem:$dst), "fst{s}\t$dst">; +def ST_F64m : FPI<0xDD, MRM2m, (outs), (ins f64mem:$dst), "fst{l}\t$dst">; +def ST_FP32m : FPI<0xD9, MRM3m, (outs), (ins f32mem:$dst), "fstp{s}\t$dst">; +def ST_FP64m : FPI<0xDD, MRM3m, (outs), (ins f64mem:$dst), "fstp{l}\t$dst">; +def ST_FP80m : FPI<0xDB, MRM7m, (outs), (ins f80mem:$dst), "fstp{t}\t$dst">; +def IST_F16m : FPI<0xDF, MRM2m, (outs), (ins i16mem:$dst), "fist{s}\t$dst">; +def IST_F32m : FPI<0xDB, MRM2m, (outs), (ins i32mem:$dst), "fist{l}\t$dst">; +def IST_FP16m : FPI<0xDF, MRM3m, (outs), (ins i16mem:$dst), "fistp{s}\t$dst">; +def IST_FP32m : FPI<0xDB, MRM3m, (outs), (ins i32mem:$dst), "fistp{l}\t$dst">; +def IST_FP64m : FPI<0xDF, MRM7m, (outs), (ins i64mem:$dst), "fistp{ll}\t$dst">; +} + +// FISTTP requires SSE3 even though it's a FPStack op. +def ISTT_Fp16m32 : FpI_<(outs), (ins i16mem:$op, RFP32:$src), OneArgFP, + [(X86fp_to_i16mem RFP32:$src, addr:$op)]>, + Requires<[HasSSE3]>; +def ISTT_Fp32m32 : FpI_<(outs), (ins i32mem:$op, RFP32:$src), OneArgFP, + [(X86fp_to_i32mem RFP32:$src, addr:$op)]>, + Requires<[HasSSE3]>; +def ISTT_Fp64m32 : FpI_<(outs), (ins i64mem:$op, RFP32:$src), OneArgFP, + [(X86fp_to_i64mem RFP32:$src, addr:$op)]>, + Requires<[HasSSE3]>; +def ISTT_Fp16m64 : FpI_<(outs), (ins i16mem:$op, RFP64:$src), OneArgFP, + [(X86fp_to_i16mem RFP64:$src, addr:$op)]>, + Requires<[HasSSE3]>; +def ISTT_Fp32m64 : FpI_<(outs), (ins i32mem:$op, RFP64:$src), OneArgFP, + [(X86fp_to_i32mem RFP64:$src, addr:$op)]>, + Requires<[HasSSE3]>; +def ISTT_Fp64m64 : FpI_<(outs), (ins i64mem:$op, RFP64:$src), OneArgFP, + [(X86fp_to_i64mem RFP64:$src, addr:$op)]>, + Requires<[HasSSE3]>; +def ISTT_Fp16m80 : FpI_<(outs), (ins i16mem:$op, RFP80:$src), OneArgFP, + [(X86fp_to_i16mem RFP80:$src, addr:$op)]>, + Requires<[HasSSE3]>; +def ISTT_Fp32m80 : FpI_<(outs), (ins i32mem:$op, RFP80:$src), OneArgFP, + [(X86fp_to_i32mem RFP80:$src, addr:$op)]>, + Requires<[HasSSE3]>; +def ISTT_Fp64m80 : FpI_<(outs), (ins i64mem:$op, RFP80:$src), OneArgFP, + [(X86fp_to_i64mem RFP80:$src, addr:$op)]>, + Requires<[HasSSE3]>; + +let mayStore = 1 in { +def ISTT_FP16m : FPI<0xDF, MRM1m, (outs), (ins i16mem:$dst), "fisttp{s}\t$dst">; +def ISTT_FP32m : FPI<0xDB, MRM1m, (outs), (ins i32mem:$dst), "fisttp{l}\t$dst">; +def ISTT_FP64m : FPI<0xDD, MRM1m, (outs), (ins i64mem:$dst), "fisttp{ll}\t$dst">; +} + +// FP Stack manipulation instructions. +def LD_Frr : FPI<0xC0, AddRegFrm, (outs), (ins RST:$op), "fld\t$op">, D9; +def ST_Frr : FPI<0xD0, AddRegFrm, (outs), (ins RST:$op), "fst\t$op">, DD; +def ST_FPrr : FPI<0xD8, AddRegFrm, (outs), (ins RST:$op), "fstp\t$op">, DD; +def XCH_F : FPI<0xC8, AddRegFrm, (outs), (ins RST:$op), "fxch\t$op">, D9; + +// Floating point constant loads. +let isReMaterializable = 1 in { +def LD_Fp032 : FpIf32<(outs RFP32:$dst), (ins), ZeroArgFP, + [(set RFP32:$dst, fpimm0)]>; +def LD_Fp132 : FpIf32<(outs RFP32:$dst), (ins), ZeroArgFP, + [(set RFP32:$dst, fpimm1)]>; +def LD_Fp064 : FpIf64<(outs RFP64:$dst), (ins), ZeroArgFP, + [(set RFP64:$dst, fpimm0)]>; +def LD_Fp164 : FpIf64<(outs RFP64:$dst), (ins), ZeroArgFP, + [(set RFP64:$dst, fpimm1)]>; +def LD_Fp080 : FpI_<(outs RFP80:$dst), (ins), ZeroArgFP, + [(set RFP80:$dst, fpimm0)]>; +def LD_Fp180 : FpI_<(outs RFP80:$dst), (ins), ZeroArgFP, + [(set RFP80:$dst, fpimm1)]>; +} + +def LD_F0 : FPI<0xEE, RawFrm, (outs), (ins), "fldz">, D9; +def LD_F1 : FPI<0xE8, RawFrm, (outs), (ins), "fld1">, D9; + + +// Floating point compares. +let Defs = [EFLAGS] in { +def UCOM_Fpr32 : FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP, + []>; // FPSW = cmp ST(0) with ST(i) +def UCOM_Fpr64 : FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP, + []>; // FPSW = cmp ST(0) with ST(i) +def UCOM_Fpr80 : FpI_ <(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP, + []>; // FPSW = cmp ST(0) with ST(i) + +def UCOM_FpIr32: FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP, + [(X86cmp RFP32:$lhs, RFP32:$rhs), + (implicit EFLAGS)]>; // CC = ST(0) cmp ST(i) +def UCOM_FpIr64: FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP, + [(X86cmp RFP64:$lhs, RFP64:$rhs), + (implicit EFLAGS)]>; // CC = ST(0) cmp ST(i) +def UCOM_FpIr80: FpI_<(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP, + [(X86cmp RFP80:$lhs, RFP80:$rhs), + (implicit EFLAGS)]>; // CC = ST(0) cmp ST(i) +} + +let Defs = [EFLAGS], Uses = [ST0] in { +def UCOM_Fr : FPI<0xE0, AddRegFrm, // FPSW = cmp ST(0) with ST(i) + (outs), (ins RST:$reg), + "fucom\t$reg">, DD; +def UCOM_FPr : FPI<0xE8, AddRegFrm, // FPSW = cmp ST(0) with ST(i), pop + (outs), (ins RST:$reg), + "fucomp\t$reg">, DD; +def UCOM_FPPr : FPI<0xE9, RawFrm, // cmp ST(0) with ST(1), pop, pop + (outs), (ins), + "fucompp">, DA; + +def UCOM_FIr : FPI<0xE8, AddRegFrm, // CC = cmp ST(0) with ST(i) + (outs), (ins RST:$reg), + "fucomi\t{$reg, %st(0)|%ST(0), $reg}">, DB; +def UCOM_FIPr : FPI<0xE8, AddRegFrm, // CC = cmp ST(0) with ST(i), pop + (outs), (ins RST:$reg), + "fucomip\t{$reg, %st(0)|%ST(0), $reg}">, DF; +} + +// Floating point flag ops. +let Defs = [AX] in +def FNSTSW8r : I<0xE0, RawFrm, // AX = fp flags + (outs), (ins), "fnstsw", []>, DF; + +def FNSTCW16m : I<0xD9, MRM7m, // [mem16] = X87 control world + (outs), (ins i16mem:$dst), "fnstcw\t$dst", + [(X86fp_cwd_get16 addr:$dst)]>; + +let mayLoad = 1 in +def FLDCW16m : I<0xD9, MRM5m, // X87 control world = [mem16] + (outs), (ins i16mem:$dst), "fldcw\t$dst", []>; + +//===----------------------------------------------------------------------===// +// Non-Instruction Patterns +//===----------------------------------------------------------------------===// + +// Required for RET of f32 / f64 / f80 values. +def : Pat<(X86fld addr:$src, f32), (LD_Fp32m addr:$src)>; +def : Pat<(X86fld addr:$src, f64), (LD_Fp64m addr:$src)>; +def : Pat<(X86fld addr:$src, f80), (LD_Fp80m addr:$src)>; + +// Required for CALL which return f32 / f64 / f80 values. +def : Pat<(X86fst RFP32:$src, addr:$op, f32), (ST_Fp32m addr:$op, RFP32:$src)>; +def : Pat<(X86fst RFP64:$src, addr:$op, f32), (ST_Fp64m32 addr:$op, RFP64:$src)>; +def : Pat<(X86fst RFP64:$src, addr:$op, f64), (ST_Fp64m addr:$op, RFP64:$src)>; +def : Pat<(X86fst RFP80:$src, addr:$op, f32), (ST_Fp80m32 addr:$op, RFP80:$src)>; +def : Pat<(X86fst RFP80:$src, addr:$op, f64), (ST_Fp80m64 addr:$op, RFP80:$src)>; +def : Pat<(X86fst RFP80:$src, addr:$op, f80), (ST_FpP80m addr:$op, RFP80:$src)>; + +// Floating point constant -0.0 and -1.0 +def : Pat<(f32 fpimmneg0), (CHS_Fp32 (LD_Fp032))>, Requires<[FPStackf32]>; +def : Pat<(f32 fpimmneg1), (CHS_Fp32 (LD_Fp132))>, Requires<[FPStackf32]>; +def : Pat<(f64 fpimmneg0), (CHS_Fp64 (LD_Fp064))>, Requires<[FPStackf64]>; +def : Pat<(f64 fpimmneg1), (CHS_Fp64 (LD_Fp164))>, Requires<[FPStackf64]>; +def : Pat<(f80 fpimmneg0), (CHS_Fp80 (LD_Fp080))>; +def : Pat<(f80 fpimmneg1), (CHS_Fp80 (LD_Fp180))>; + +// Used to conv. i64 to f64 since there isn't a SSE version. +def : Pat<(X86fildflag addr:$src, i64), (ILD_Fp64m64 addr:$src)>; + +// FP extensions map onto simple pseudo-value conversions if they are to/from +// the FP stack. +def : Pat<(f64 (fextend RFP32:$src)), (MOV_Fp3264 RFP32:$src)>, + Requires<[FPStackf32]>; +def : Pat<(f80 (fextend RFP32:$src)), (MOV_Fp3280 RFP32:$src)>, + Requires<[FPStackf32]>; +def : Pat<(f80 (fextend RFP64:$src)), (MOV_Fp6480 RFP64:$src)>, + Requires<[FPStackf64]>; + +// FP truncations map onto simple pseudo-value conversions if they are to/from +// the FP stack. We have validated that only value-preserving truncations make +// it through isel. +def : Pat<(f32 (fround RFP64:$src)), (MOV_Fp6432 RFP64:$src)>, + Requires<[FPStackf32]>; +def : Pat<(f32 (fround RFP80:$src)), (MOV_Fp8032 RFP80:$src)>, + Requires<[FPStackf32]>; +def : Pat<(f64 (fround RFP80:$src)), (MOV_Fp8064 RFP80:$src)>, + Requires<[FPStackf64]>; diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td new file mode 100644 index 000000000000..eeed5bd27ff3 --- /dev/null +++ b/lib/Target/X86/X86InstrFormats.td @@ -0,0 +1,285 @@ +//===- X86InstrFormats.td - X86 Instruction Formats --------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// X86 Instruction Format Definitions. +// + +// Format specifies the encoding used by the instruction. This is part of the +// ad-hoc solution used to emit machine instruction encodings by our machine +// code emitter. +class Format val> { + bits<6> Value = val; +} + +def Pseudo : Format<0>; def RawFrm : Format<1>; +def AddRegFrm : Format<2>; def MRMDestReg : Format<3>; +def MRMDestMem : Format<4>; def MRMSrcReg : Format<5>; +def MRMSrcMem : Format<6>; +def MRM0r : Format<16>; def MRM1r : Format<17>; def MRM2r : Format<18>; +def MRM3r : Format<19>; def MRM4r : Format<20>; def MRM5r : Format<21>; +def MRM6r : Format<22>; def MRM7r : Format<23>; +def MRM0m : Format<24>; def MRM1m : Format<25>; def MRM2m : Format<26>; +def MRM3m : Format<27>; def MRM4m : Format<28>; def MRM5m : Format<29>; +def MRM6m : Format<30>; def MRM7m : Format<31>; +def MRMInitReg : Format<32>; + + +// ImmType - This specifies the immediate type used by an instruction. This is +// part of the ad-hoc solution used to emit machine instruction encodings by our +// machine code emitter. +class ImmType val> { + bits<3> Value = val; +} +def NoImm : ImmType<0>; +def Imm8 : ImmType<1>; +def Imm16 : ImmType<2>; +def Imm32 : ImmType<3>; +def Imm64 : ImmType<4>; + +// FPFormat - This specifies what form this FP instruction has. This is used by +// the Floating-Point stackifier pass. +class FPFormat val> { + bits<3> Value = val; +} +def NotFP : FPFormat<0>; +def ZeroArgFP : FPFormat<1>; +def OneArgFP : FPFormat<2>; +def OneArgFPRW : FPFormat<3>; +def TwoArgFP : FPFormat<4>; +def CompareFP : FPFormat<5>; +def CondMovFP : FPFormat<6>; +def SpecialFP : FPFormat<7>; + +// Prefix byte classes which are used to indicate to the ad-hoc machine code +// emitter that various prefix bytes are required. +class OpSize { bit hasOpSizePrefix = 1; } +class AdSize { bit hasAdSizePrefix = 1; } +class REX_W { bit hasREX_WPrefix = 1; } +class LOCK { bit hasLockPrefix = 1; } +class SegFS { bits<2> SegOvrBits = 1; } +class SegGS { bits<2> SegOvrBits = 2; } +class TB { bits<4> Prefix = 1; } +class REP { bits<4> Prefix = 2; } +class D8 { bits<4> Prefix = 3; } +class D9 { bits<4> Prefix = 4; } +class DA { bits<4> Prefix = 5; } +class DB { bits<4> Prefix = 6; } +class DC { bits<4> Prefix = 7; } +class DD { bits<4> Prefix = 8; } +class DE { bits<4> Prefix = 9; } +class DF { bits<4> Prefix = 10; } +class XD { bits<4> Prefix = 11; } +class XS { bits<4> Prefix = 12; } +class T8 { bits<4> Prefix = 13; } +class TA { bits<4> Prefix = 14; } + +class X86Inst opcod, Format f, ImmType i, dag outs, dag ins, + string AsmStr> + : Instruction { + let Namespace = "X86"; + + bits<8> Opcode = opcod; + Format Form = f; + bits<6> FormBits = Form.Value; + ImmType ImmT = i; + bits<3> ImmTypeBits = ImmT.Value; + + dag OutOperandList = outs; + dag InOperandList = ins; + string AsmString = AsmStr; + + // + // Attributes specific to X86 instructions... + // + bit hasOpSizePrefix = 0; // Does this inst have a 0x66 prefix? + bit hasAdSizePrefix = 0; // Does this inst have a 0x67 prefix? + + bits<4> Prefix = 0; // Which prefix byte does this inst have? + bit hasREX_WPrefix = 0; // Does this inst requires the REX.W prefix? + FPFormat FPForm; // What flavor of FP instruction is this? + bits<3> FPFormBits = 0; + bit hasLockPrefix = 0; // Does this inst have a 0xF0 prefix? + bits<2> SegOvrBits = 0; // Segment override prefix. +} + +class I o, Format f, dag outs, dag ins, string asm, list pattern> + : X86Inst { + let Pattern = pattern; + let CodeSize = 3; +} +class Ii8 o, Format f, dag outs, dag ins, string asm, list pattern> + : X86Inst { + let Pattern = pattern; + let CodeSize = 3; +} +class Ii16 o, Format f, dag outs, dag ins, string asm, list pattern> + : X86Inst { + let Pattern = pattern; + let CodeSize = 3; +} +class Ii32 o, Format f, dag outs, dag ins, string asm, list pattern> + : X86Inst { + let Pattern = pattern; + let CodeSize = 3; +} + +// FPStack Instruction Templates: +// FPI - Floating Point Instruction template. +class FPI o, Format F, dag outs, dag ins, string asm> + : I {} + +// FpI_ - Floating Point Psuedo Instruction template. Not Predicated. +class FpI_ pattern> + : X86Inst<0, Pseudo, NoImm, outs, ins, ""> { + let FPForm = fp; let FPFormBits = FPForm.Value; + let Pattern = pattern; +} + +// SSE1 Instruction Templates: +// +// SSI - SSE1 instructions with XS prefix. +// PSI - SSE1 instructions with TB prefix. +// PSIi8 - SSE1 instructions with ImmT == Imm8 and TB prefix. + +class SSI o, Format F, dag outs, dag ins, string asm, list pattern> + : I, XS, Requires<[HasSSE1]>; +class SSIi8 o, Format F, dag outs, dag ins, string asm, list pattern> + : Ii8, XS, Requires<[HasSSE1]>; +class PSI o, Format F, dag outs, dag ins, string asm, list pattern> + : I, TB, Requires<[HasSSE1]>; +class PSIi8 o, Format F, dag outs, dag ins, string asm, + list pattern> + : Ii8, TB, Requires<[HasSSE1]>; + +// SSE2 Instruction Templates: +// +// SDI - SSE2 instructions with XD prefix. +// SDIi8 - SSE2 instructions with ImmT == Imm8 and XD prefix. +// SSDIi8 - SSE2 instructions with ImmT == Imm8 and XS prefix. +// PDI - SSE2 instructions with TB and OpSize prefixes. +// PDIi8 - SSE2 instructions with ImmT == Imm8 and TB and OpSize prefixes. + +class SDI o, Format F, dag outs, dag ins, string asm, list pattern> + : I, XD, Requires<[HasSSE2]>; +class SDIi8 o, Format F, dag outs, dag ins, string asm, + list pattern> + : Ii8, XD, Requires<[HasSSE2]>; +class SSDIi8 o, Format F, dag outs, dag ins, string asm, + list pattern> + : Ii8, XS, Requires<[HasSSE2]>; +class PDI o, Format F, dag outs, dag ins, string asm, list pattern> + : I, TB, OpSize, Requires<[HasSSE2]>; +class PDIi8 o, Format F, dag outs, dag ins, string asm, + list pattern> + : Ii8, TB, OpSize, Requires<[HasSSE2]>; + +// SSE3 Instruction Templates: +// +// S3I - SSE3 instructions with TB and OpSize prefixes. +// S3SI - SSE3 instructions with XS prefix. +// S3DI - SSE3 instructions with XD prefix. + +class S3SI o, Format F, dag outs, dag ins, string asm, list pattern> + : I, XS, Requires<[HasSSE3]>; +class S3DI o, Format F, dag outs, dag ins, string asm, list pattern> + : I, XD, Requires<[HasSSE3]>; +class S3I o, Format F, dag outs, dag ins, string asm, list pattern> + : I, TB, OpSize, Requires<[HasSSE3]>; + + +// SSSE3 Instruction Templates: +// +// SS38I - SSSE3 instructions with T8 prefix. +// SS3AI - SSSE3 instructions with TA prefix. +// +// Note: SSSE3 instructions have 64-bit and 128-bit versions. The 64-bit version +// uses the MMX registers. We put those instructions here because they better +// fit into the SSSE3 instruction category rather than the MMX category. + +class SS38I o, Format F, dag outs, dag ins, string asm, + list pattern> + : I, T8, Requires<[HasSSSE3]>; +class SS3AI o, Format F, dag outs, dag ins, string asm, + list pattern> + : I, TA, Requires<[HasSSSE3]>; + +// SSE4.1 Instruction Templates: +// +// SS48I - SSE 4.1 instructions with T8 prefix. +// SS41AIi8 - SSE 4.1 instructions with TA prefix and ImmT == Imm8. +// +class SS48I o, Format F, dag outs, dag ins, string asm, + list pattern> + : I, T8, Requires<[HasSSE41]>; +class SS4AIi8 o, Format F, dag outs, dag ins, string asm, + list pattern> + : Ii8, TA, Requires<[HasSSE41]>; + +// SSE4.2 Instruction Templates: +// +// SS428I - SSE 4.2 instructions with T8 prefix. +class SS428I o, Format F, dag outs, dag ins, string asm, + list pattern> + : I, T8, Requires<[HasSSE42]>; + +// X86-64 Instruction templates... +// + +class RI o, Format F, dag outs, dag ins, string asm, list pattern> + : I, REX_W; +class RIi8 o, Format F, dag outs, dag ins, string asm, + list pattern> + : Ii8, REX_W; +class RIi32 o, Format F, dag outs, dag ins, string asm, + list pattern> + : Ii32, REX_W; + +class RIi64 o, Format f, dag outs, dag ins, string asm, + list pattern> + : X86Inst, REX_W { + let Pattern = pattern; + let CodeSize = 3; +} + +class RSSI o, Format F, dag outs, dag ins, string asm, + list pattern> + : SSI, REX_W; +class RSDI o, Format F, dag outs, dag ins, string asm, + list pattern> + : SDI, REX_W; +class RPDI o, Format F, dag outs, dag ins, string asm, + list pattern> + : PDI, REX_W; + +// MMX Instruction templates +// + +// MMXI - MMX instructions with TB prefix. +// MMXI64 - MMX instructions with TB prefix valid only in 64 bit mode. +// MMX2I - MMX / SSE2 instructions with TB and OpSize prefixes. +// MMXIi8 - MMX instructions with ImmT == Imm8 and TB prefix. +// MMXIi8 - MMX instructions with ImmT == Imm8 and TB prefix. +// MMXID - MMX instructions with XD prefix. +// MMXIS - MMX instructions with XS prefix. +class MMXI o, Format F, dag outs, dag ins, string asm, list pattern> + : I, TB, Requires<[HasMMX]>; +class MMXI64 o, Format F, dag outs, dag ins, string asm, list pattern> + : I, TB, Requires<[HasMMX,In64BitMode]>; +class MMXRI o, Format F, dag outs, dag ins, string asm, list pattern> + : I, TB, REX_W, Requires<[HasMMX]>; +class MMX2I o, Format F, dag outs, dag ins, string asm, list pattern> + : I, TB, OpSize, Requires<[HasMMX]>; +class MMXIi8 o, Format F, dag outs, dag ins, string asm, list pattern> + : Ii8, TB, Requires<[HasMMX]>; +class MMXID o, Format F, dag outs, dag ins, string asm, list pattern> + : Ii8, XD, Requires<[HasMMX]>; +class MMXIS o, Format F, dag outs, dag ins, string asm, list pattern> + : Ii8, XS, Requires<[HasMMX]>; + diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp new file mode 100644 index 000000000000..2cd3733f0fb3 --- /dev/null +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -0,0 +1,3227 @@ +//===- X86InstrInfo.cpp - X86 Instruction Information -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the X86 implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#include "X86InstrInfo.h" +#include "X86.h" +#include "X86GenInstrInfo.inc" +#include "X86InstrBuilder.h" +#include "X86MachineFunctionInfo.h" +#include "X86Subtarget.h" +#include "X86TargetMachine.h" +#include "llvm/DerivedTypes.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/LiveVariables.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetAsmInfo.h" + +using namespace llvm; + +namespace { + cl::opt + NoFusing("disable-spill-fusing", + cl::desc("Disable fusing of spill code into instructions")); + cl::opt + PrintFailedFusing("print-failed-fuse-candidates", + cl::desc("Print instructions that the allocator wants to" + " fuse, but the X86 backend currently can't"), + cl::Hidden); + cl::opt + ReMatPICStubLoad("remat-pic-stub-load", + cl::desc("Re-materialize load from stub in PIC mode"), + cl::init(false), cl::Hidden); +} + +X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) + : TargetInstrInfoImpl(X86Insts, array_lengthof(X86Insts)), + TM(tm), RI(tm, *this) { + SmallVector AmbEntries; + static const unsigned OpTbl2Addr[][2] = { + { X86::ADC32ri, X86::ADC32mi }, + { X86::ADC32ri8, X86::ADC32mi8 }, + { X86::ADC32rr, X86::ADC32mr }, + { X86::ADC64ri32, X86::ADC64mi32 }, + { X86::ADC64ri8, X86::ADC64mi8 }, + { X86::ADC64rr, X86::ADC64mr }, + { X86::ADD16ri, X86::ADD16mi }, + { X86::ADD16ri8, X86::ADD16mi8 }, + { X86::ADD16rr, X86::ADD16mr }, + { X86::ADD32ri, X86::ADD32mi }, + { X86::ADD32ri8, X86::ADD32mi8 }, + { X86::ADD32rr, X86::ADD32mr }, + { X86::ADD64ri32, X86::ADD64mi32 }, + { X86::ADD64ri8, X86::ADD64mi8 }, + { X86::ADD64rr, X86::ADD64mr }, + { X86::ADD8ri, X86::ADD8mi }, + { X86::ADD8rr, X86::ADD8mr }, + { X86::AND16ri, X86::AND16mi }, + { X86::AND16ri8, X86::AND16mi8 }, + { X86::AND16rr, X86::AND16mr }, + { X86::AND32ri, X86::AND32mi }, + { X86::AND32ri8, X86::AND32mi8 }, + { X86::AND32rr, X86::AND32mr }, + { X86::AND64ri32, X86::AND64mi32 }, + { X86::AND64ri8, X86::AND64mi8 }, + { X86::AND64rr, X86::AND64mr }, + { X86::AND8ri, X86::AND8mi }, + { X86::AND8rr, X86::AND8mr }, + { X86::DEC16r, X86::DEC16m }, + { X86::DEC32r, X86::DEC32m }, + { X86::DEC64_16r, X86::DEC64_16m }, + { X86::DEC64_32r, X86::DEC64_32m }, + { X86::DEC64r, X86::DEC64m }, + { X86::DEC8r, X86::DEC8m }, + { X86::INC16r, X86::INC16m }, + { X86::INC32r, X86::INC32m }, + { X86::INC64_16r, X86::INC64_16m }, + { X86::INC64_32r, X86::INC64_32m }, + { X86::INC64r, X86::INC64m }, + { X86::INC8r, X86::INC8m }, + { X86::NEG16r, X86::NEG16m }, + { X86::NEG32r, X86::NEG32m }, + { X86::NEG64r, X86::NEG64m }, + { X86::NEG8r, X86::NEG8m }, + { X86::NOT16r, X86::NOT16m }, + { X86::NOT32r, X86::NOT32m }, + { X86::NOT64r, X86::NOT64m }, + { X86::NOT8r, X86::NOT8m }, + { X86::OR16ri, X86::OR16mi }, + { X86::OR16ri8, X86::OR16mi8 }, + { X86::OR16rr, X86::OR16mr }, + { X86::OR32ri, X86::OR32mi }, + { X86::OR32ri8, X86::OR32mi8 }, + { X86::OR32rr, X86::OR32mr }, + { X86::OR64ri32, X86::OR64mi32 }, + { X86::OR64ri8, X86::OR64mi8 }, + { X86::OR64rr, X86::OR64mr }, + { X86::OR8ri, X86::OR8mi }, + { X86::OR8rr, X86::OR8mr }, + { X86::ROL16r1, X86::ROL16m1 }, + { X86::ROL16rCL, X86::ROL16mCL }, + { X86::ROL16ri, X86::ROL16mi }, + { X86::ROL32r1, X86::ROL32m1 }, + { X86::ROL32rCL, X86::ROL32mCL }, + { X86::ROL32ri, X86::ROL32mi }, + { X86::ROL64r1, X86::ROL64m1 }, + { X86::ROL64rCL, X86::ROL64mCL }, + { X86::ROL64ri, X86::ROL64mi }, + { X86::ROL8r1, X86::ROL8m1 }, + { X86::ROL8rCL, X86::ROL8mCL }, + { X86::ROL8ri, X86::ROL8mi }, + { X86::ROR16r1, X86::ROR16m1 }, + { X86::ROR16rCL, X86::ROR16mCL }, + { X86::ROR16ri, X86::ROR16mi }, + { X86::ROR32r1, X86::ROR32m1 }, + { X86::ROR32rCL, X86::ROR32mCL }, + { X86::ROR32ri, X86::ROR32mi }, + { X86::ROR64r1, X86::ROR64m1 }, + { X86::ROR64rCL, X86::ROR64mCL }, + { X86::ROR64ri, X86::ROR64mi }, + { X86::ROR8r1, X86::ROR8m1 }, + { X86::ROR8rCL, X86::ROR8mCL }, + { X86::ROR8ri, X86::ROR8mi }, + { X86::SAR16r1, X86::SAR16m1 }, + { X86::SAR16rCL, X86::SAR16mCL }, + { X86::SAR16ri, X86::SAR16mi }, + { X86::SAR32r1, X86::SAR32m1 }, + { X86::SAR32rCL, X86::SAR32mCL }, + { X86::SAR32ri, X86::SAR32mi }, + { X86::SAR64r1, X86::SAR64m1 }, + { X86::SAR64rCL, X86::SAR64mCL }, + { X86::SAR64ri, X86::SAR64mi }, + { X86::SAR8r1, X86::SAR8m1 }, + { X86::SAR8rCL, X86::SAR8mCL }, + { X86::SAR8ri, X86::SAR8mi }, + { X86::SBB32ri, X86::SBB32mi }, + { X86::SBB32ri8, X86::SBB32mi8 }, + { X86::SBB32rr, X86::SBB32mr }, + { X86::SBB64ri32, X86::SBB64mi32 }, + { X86::SBB64ri8, X86::SBB64mi8 }, + { X86::SBB64rr, X86::SBB64mr }, + { X86::SHL16rCL, X86::SHL16mCL }, + { X86::SHL16ri, X86::SHL16mi }, + { X86::SHL32rCL, X86::SHL32mCL }, + { X86::SHL32ri, X86::SHL32mi }, + { X86::SHL64rCL, X86::SHL64mCL }, + { X86::SHL64ri, X86::SHL64mi }, + { X86::SHL8rCL, X86::SHL8mCL }, + { X86::SHL8ri, X86::SHL8mi }, + { X86::SHLD16rrCL, X86::SHLD16mrCL }, + { X86::SHLD16rri8, X86::SHLD16mri8 }, + { X86::SHLD32rrCL, X86::SHLD32mrCL }, + { X86::SHLD32rri8, X86::SHLD32mri8 }, + { X86::SHLD64rrCL, X86::SHLD64mrCL }, + { X86::SHLD64rri8, X86::SHLD64mri8 }, + { X86::SHR16r1, X86::SHR16m1 }, + { X86::SHR16rCL, X86::SHR16mCL }, + { X86::SHR16ri, X86::SHR16mi }, + { X86::SHR32r1, X86::SHR32m1 }, + { X86::SHR32rCL, X86::SHR32mCL }, + { X86::SHR32ri, X86::SHR32mi }, + { X86::SHR64r1, X86::SHR64m1 }, + { X86::SHR64rCL, X86::SHR64mCL }, + { X86::SHR64ri, X86::SHR64mi }, + { X86::SHR8r1, X86::SHR8m1 }, + { X86::SHR8rCL, X86::SHR8mCL }, + { X86::SHR8ri, X86::SHR8mi }, + { X86::SHRD16rrCL, X86::SHRD16mrCL }, + { X86::SHRD16rri8, X86::SHRD16mri8 }, + { X86::SHRD32rrCL, X86::SHRD32mrCL }, + { X86::SHRD32rri8, X86::SHRD32mri8 }, + { X86::SHRD64rrCL, X86::SHRD64mrCL }, + { X86::SHRD64rri8, X86::SHRD64mri8 }, + { X86::SUB16ri, X86::SUB16mi }, + { X86::SUB16ri8, X86::SUB16mi8 }, + { X86::SUB16rr, X86::SUB16mr }, + { X86::SUB32ri, X86::SUB32mi }, + { X86::SUB32ri8, X86::SUB32mi8 }, + { X86::SUB32rr, X86::SUB32mr }, + { X86::SUB64ri32, X86::SUB64mi32 }, + { X86::SUB64ri8, X86::SUB64mi8 }, + { X86::SUB64rr, X86::SUB64mr }, + { X86::SUB8ri, X86::SUB8mi }, + { X86::SUB8rr, X86::SUB8mr }, + { X86::XOR16ri, X86::XOR16mi }, + { X86::XOR16ri8, X86::XOR16mi8 }, + { X86::XOR16rr, X86::XOR16mr }, + { X86::XOR32ri, X86::XOR32mi }, + { X86::XOR32ri8, X86::XOR32mi8 }, + { X86::XOR32rr, X86::XOR32mr }, + { X86::XOR64ri32, X86::XOR64mi32 }, + { X86::XOR64ri8, X86::XOR64mi8 }, + { X86::XOR64rr, X86::XOR64mr }, + { X86::XOR8ri, X86::XOR8mi }, + { X86::XOR8rr, X86::XOR8mr } + }; + + for (unsigned i = 0, e = array_lengthof(OpTbl2Addr); i != e; ++i) { + unsigned RegOp = OpTbl2Addr[i][0]; + unsigned MemOp = OpTbl2Addr[i][1]; + if (!RegOp2MemOpTable2Addr.insert(std::make_pair((unsigned*)RegOp, + MemOp)).second) + assert(false && "Duplicated entries?"); + unsigned AuxInfo = 0 | (1 << 4) | (1 << 5); // Index 0,folded load and store + if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp, + std::make_pair(RegOp, + AuxInfo))).second) + AmbEntries.push_back(MemOp); + } + + // If the third value is 1, then it's folding either a load or a store. + static const unsigned OpTbl0[][3] = { + { X86::BT16ri8, X86::BT16mi8, 1 }, + { X86::BT32ri8, X86::BT32mi8, 1 }, + { X86::BT64ri8, X86::BT64mi8, 1 }, + { X86::CALL32r, X86::CALL32m, 1 }, + { X86::CALL64r, X86::CALL64m, 1 }, + { X86::CMP16ri, X86::CMP16mi, 1 }, + { X86::CMP16ri8, X86::CMP16mi8, 1 }, + { X86::CMP16rr, X86::CMP16mr, 1 }, + { X86::CMP32ri, X86::CMP32mi, 1 }, + { X86::CMP32ri8, X86::CMP32mi8, 1 }, + { X86::CMP32rr, X86::CMP32mr, 1 }, + { X86::CMP64ri32, X86::CMP64mi32, 1 }, + { X86::CMP64ri8, X86::CMP64mi8, 1 }, + { X86::CMP64rr, X86::CMP64mr, 1 }, + { X86::CMP8ri, X86::CMP8mi, 1 }, + { X86::CMP8rr, X86::CMP8mr, 1 }, + { X86::DIV16r, X86::DIV16m, 1 }, + { X86::DIV32r, X86::DIV32m, 1 }, + { X86::DIV64r, X86::DIV64m, 1 }, + { X86::DIV8r, X86::DIV8m, 1 }, + { X86::EXTRACTPSrr, X86::EXTRACTPSmr, 0 }, + { X86::FsMOVAPDrr, X86::MOVSDmr, 0 }, + { X86::FsMOVAPSrr, X86::MOVSSmr, 0 }, + { X86::IDIV16r, X86::IDIV16m, 1 }, + { X86::IDIV32r, X86::IDIV32m, 1 }, + { X86::IDIV64r, X86::IDIV64m, 1 }, + { X86::IDIV8r, X86::IDIV8m, 1 }, + { X86::IMUL16r, X86::IMUL16m, 1 }, + { X86::IMUL32r, X86::IMUL32m, 1 }, + { X86::IMUL64r, X86::IMUL64m, 1 }, + { X86::IMUL8r, X86::IMUL8m, 1 }, + { X86::JMP32r, X86::JMP32m, 1 }, + { X86::JMP64r, X86::JMP64m, 1 }, + { X86::MOV16ri, X86::MOV16mi, 0 }, + { X86::MOV16rr, X86::MOV16mr, 0 }, + { X86::MOV32ri, X86::MOV32mi, 0 }, + { X86::MOV32rr, X86::MOV32mr, 0 }, + { X86::MOV64ri32, X86::MOV64mi32, 0 }, + { X86::MOV64rr, X86::MOV64mr, 0 }, + { X86::MOV8ri, X86::MOV8mi, 0 }, + { X86::MOV8rr, X86::MOV8mr, 0 }, + { X86::MOV8rr_NOREX, X86::MOV8mr_NOREX, 0 }, + { X86::MOVAPDrr, X86::MOVAPDmr, 0 }, + { X86::MOVAPSrr, X86::MOVAPSmr, 0 }, + { X86::MOVDQArr, X86::MOVDQAmr, 0 }, + { X86::MOVPDI2DIrr, X86::MOVPDI2DImr, 0 }, + { X86::MOVPQIto64rr,X86::MOVPQI2QImr, 0 }, + { X86::MOVPS2SSrr, X86::MOVPS2SSmr, 0 }, + { X86::MOVSDrr, X86::MOVSDmr, 0 }, + { X86::MOVSDto64rr, X86::MOVSDto64mr, 0 }, + { X86::MOVSS2DIrr, X86::MOVSS2DImr, 0 }, + { X86::MOVSSrr, X86::MOVSSmr, 0 }, + { X86::MOVUPDrr, X86::MOVUPDmr, 0 }, + { X86::MOVUPSrr, X86::MOVUPSmr, 0 }, + { X86::MUL16r, X86::MUL16m, 1 }, + { X86::MUL32r, X86::MUL32m, 1 }, + { X86::MUL64r, X86::MUL64m, 1 }, + { X86::MUL8r, X86::MUL8m, 1 }, + { X86::SETAEr, X86::SETAEm, 0 }, + { X86::SETAr, X86::SETAm, 0 }, + { X86::SETBEr, X86::SETBEm, 0 }, + { X86::SETBr, X86::SETBm, 0 }, + { X86::SETEr, X86::SETEm, 0 }, + { X86::SETGEr, X86::SETGEm, 0 }, + { X86::SETGr, X86::SETGm, 0 }, + { X86::SETLEr, X86::SETLEm, 0 }, + { X86::SETLr, X86::SETLm, 0 }, + { X86::SETNEr, X86::SETNEm, 0 }, + { X86::SETNOr, X86::SETNOm, 0 }, + { X86::SETNPr, X86::SETNPm, 0 }, + { X86::SETNSr, X86::SETNSm, 0 }, + { X86::SETOr, X86::SETOm, 0 }, + { X86::SETPr, X86::SETPm, 0 }, + { X86::SETSr, X86::SETSm, 0 }, + { X86::TAILJMPr, X86::TAILJMPm, 1 }, + { X86::TEST16ri, X86::TEST16mi, 1 }, + { X86::TEST32ri, X86::TEST32mi, 1 }, + { X86::TEST64ri32, X86::TEST64mi32, 1 }, + { X86::TEST8ri, X86::TEST8mi, 1 } + }; + + for (unsigned i = 0, e = array_lengthof(OpTbl0); i != e; ++i) { + unsigned RegOp = OpTbl0[i][0]; + unsigned MemOp = OpTbl0[i][1]; + if (!RegOp2MemOpTable0.insert(std::make_pair((unsigned*)RegOp, + MemOp)).second) + assert(false && "Duplicated entries?"); + unsigned FoldedLoad = OpTbl0[i][2]; + // Index 0, folded load or store. + unsigned AuxInfo = 0 | (FoldedLoad << 4) | ((FoldedLoad^1) << 5); + if (RegOp != X86::FsMOVAPDrr && RegOp != X86::FsMOVAPSrr) + if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp, + std::make_pair(RegOp, AuxInfo))).second) + AmbEntries.push_back(MemOp); + } + + static const unsigned OpTbl1[][2] = { + { X86::CMP16rr, X86::CMP16rm }, + { X86::CMP32rr, X86::CMP32rm }, + { X86::CMP64rr, X86::CMP64rm }, + { X86::CMP8rr, X86::CMP8rm }, + { X86::CVTSD2SSrr, X86::CVTSD2SSrm }, + { X86::CVTSI2SD64rr, X86::CVTSI2SD64rm }, + { X86::CVTSI2SDrr, X86::CVTSI2SDrm }, + { X86::CVTSI2SS64rr, X86::CVTSI2SS64rm }, + { X86::CVTSI2SSrr, X86::CVTSI2SSrm }, + { X86::CVTSS2SDrr, X86::CVTSS2SDrm }, + { X86::CVTTSD2SI64rr, X86::CVTTSD2SI64rm }, + { X86::CVTTSD2SIrr, X86::CVTTSD2SIrm }, + { X86::CVTTSS2SI64rr, X86::CVTTSS2SI64rm }, + { X86::CVTTSS2SIrr, X86::CVTTSS2SIrm }, + { X86::FsMOVAPDrr, X86::MOVSDrm }, + { X86::FsMOVAPSrr, X86::MOVSSrm }, + { X86::IMUL16rri, X86::IMUL16rmi }, + { X86::IMUL16rri8, X86::IMUL16rmi8 }, + { X86::IMUL32rri, X86::IMUL32rmi }, + { X86::IMUL32rri8, X86::IMUL32rmi8 }, + { X86::IMUL64rri32, X86::IMUL64rmi32 }, + { X86::IMUL64rri8, X86::IMUL64rmi8 }, + { X86::Int_CMPSDrr, X86::Int_CMPSDrm }, + { X86::Int_CMPSSrr, X86::Int_CMPSSrm }, + { X86::Int_COMISDrr, X86::Int_COMISDrm }, + { X86::Int_COMISSrr, X86::Int_COMISSrm }, + { X86::Int_CVTDQ2PDrr, X86::Int_CVTDQ2PDrm }, + { X86::Int_CVTDQ2PSrr, X86::Int_CVTDQ2PSrm }, + { X86::Int_CVTPD2DQrr, X86::Int_CVTPD2DQrm }, + { X86::Int_CVTPD2PSrr, X86::Int_CVTPD2PSrm }, + { X86::Int_CVTPS2DQrr, X86::Int_CVTPS2DQrm }, + { X86::Int_CVTPS2PDrr, X86::Int_CVTPS2PDrm }, + { X86::Int_CVTSD2SI64rr,X86::Int_CVTSD2SI64rm }, + { X86::Int_CVTSD2SIrr, X86::Int_CVTSD2SIrm }, + { X86::Int_CVTSD2SSrr, X86::Int_CVTSD2SSrm }, + { X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm }, + { X86::Int_CVTSI2SDrr, X86::Int_CVTSI2SDrm }, + { X86::Int_CVTSI2SS64rr,X86::Int_CVTSI2SS64rm }, + { X86::Int_CVTSI2SSrr, X86::Int_CVTSI2SSrm }, + { X86::Int_CVTSS2SDrr, X86::Int_CVTSS2SDrm }, + { X86::Int_CVTSS2SI64rr,X86::Int_CVTSS2SI64rm }, + { X86::Int_CVTSS2SIrr, X86::Int_CVTSS2SIrm }, + { X86::Int_CVTTPD2DQrr, X86::Int_CVTTPD2DQrm }, + { X86::Int_CVTTPS2DQrr, X86::Int_CVTTPS2DQrm }, + { X86::Int_CVTTSD2SI64rr,X86::Int_CVTTSD2SI64rm }, + { X86::Int_CVTTSD2SIrr, X86::Int_CVTTSD2SIrm }, + { X86::Int_CVTTSS2SI64rr,X86::Int_CVTTSS2SI64rm }, + { X86::Int_CVTTSS2SIrr, X86::Int_CVTTSS2SIrm }, + { X86::Int_UCOMISDrr, X86::Int_UCOMISDrm }, + { X86::Int_UCOMISSrr, X86::Int_UCOMISSrm }, + { X86::MOV16rr, X86::MOV16rm }, + { X86::MOV32rr, X86::MOV32rm }, + { X86::MOV64rr, X86::MOV64rm }, + { X86::MOV64toPQIrr, X86::MOVQI2PQIrm }, + { X86::MOV64toSDrr, X86::MOV64toSDrm }, + { X86::MOV8rr, X86::MOV8rm }, + { X86::MOVAPDrr, X86::MOVAPDrm }, + { X86::MOVAPSrr, X86::MOVAPSrm }, + { X86::MOVDDUPrr, X86::MOVDDUPrm }, + { X86::MOVDI2PDIrr, X86::MOVDI2PDIrm }, + { X86::MOVDI2SSrr, X86::MOVDI2SSrm }, + { X86::MOVDQArr, X86::MOVDQArm }, + { X86::MOVSD2PDrr, X86::MOVSD2PDrm }, + { X86::MOVSDrr, X86::MOVSDrm }, + { X86::MOVSHDUPrr, X86::MOVSHDUPrm }, + { X86::MOVSLDUPrr, X86::MOVSLDUPrm }, + { X86::MOVSS2PSrr, X86::MOVSS2PSrm }, + { X86::MOVSSrr, X86::MOVSSrm }, + { X86::MOVSX16rr8, X86::MOVSX16rm8 }, + { X86::MOVSX32rr16, X86::MOVSX32rm16 }, + { X86::MOVSX32rr8, X86::MOVSX32rm8 }, + { X86::MOVSX64rr16, X86::MOVSX64rm16 }, + { X86::MOVSX64rr32, X86::MOVSX64rm32 }, + { X86::MOVSX64rr8, X86::MOVSX64rm8 }, + { X86::MOVUPDrr, X86::MOVUPDrm }, + { X86::MOVUPSrr, X86::MOVUPSrm }, + { X86::MOVZDI2PDIrr, X86::MOVZDI2PDIrm }, + { X86::MOVZQI2PQIrr, X86::MOVZQI2PQIrm }, + { X86::MOVZPQILo2PQIrr, X86::MOVZPQILo2PQIrm }, + { X86::MOVZX16rr8, X86::MOVZX16rm8 }, + { X86::MOVZX32rr16, X86::MOVZX32rm16 }, + { X86::MOVZX32_NOREXrr8, X86::MOVZX32_NOREXrm8 }, + { X86::MOVZX32rr8, X86::MOVZX32rm8 }, + { X86::MOVZX64rr16, X86::MOVZX64rm16 }, + { X86::MOVZX64rr32, X86::MOVZX64rm32 }, + { X86::MOVZX64rr8, X86::MOVZX64rm8 }, + { X86::PSHUFDri, X86::PSHUFDmi }, + { X86::PSHUFHWri, X86::PSHUFHWmi }, + { X86::PSHUFLWri, X86::PSHUFLWmi }, + { X86::RCPPSr, X86::RCPPSm }, + { X86::RCPPSr_Int, X86::RCPPSm_Int }, + { X86::RSQRTPSr, X86::RSQRTPSm }, + { X86::RSQRTPSr_Int, X86::RSQRTPSm_Int }, + { X86::RSQRTSSr, X86::RSQRTSSm }, + { X86::RSQRTSSr_Int, X86::RSQRTSSm_Int }, + { X86::SQRTPDr, X86::SQRTPDm }, + { X86::SQRTPDr_Int, X86::SQRTPDm_Int }, + { X86::SQRTPSr, X86::SQRTPSm }, + { X86::SQRTPSr_Int, X86::SQRTPSm_Int }, + { X86::SQRTSDr, X86::SQRTSDm }, + { X86::SQRTSDr_Int, X86::SQRTSDm_Int }, + { X86::SQRTSSr, X86::SQRTSSm }, + { X86::SQRTSSr_Int, X86::SQRTSSm_Int }, + { X86::TEST16rr, X86::TEST16rm }, + { X86::TEST32rr, X86::TEST32rm }, + { X86::TEST64rr, X86::TEST64rm }, + { X86::TEST8rr, X86::TEST8rm }, + // FIXME: TEST*rr EAX,EAX ---> CMP [mem], 0 + { X86::UCOMISDrr, X86::UCOMISDrm }, + { X86::UCOMISSrr, X86::UCOMISSrm } + }; + + for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) { + unsigned RegOp = OpTbl1[i][0]; + unsigned MemOp = OpTbl1[i][1]; + if (!RegOp2MemOpTable1.insert(std::make_pair((unsigned*)RegOp, + MemOp)).second) + assert(false && "Duplicated entries?"); + unsigned AuxInfo = 1 | (1 << 4); // Index 1, folded load + if (RegOp != X86::FsMOVAPDrr && RegOp != X86::FsMOVAPSrr) + if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp, + std::make_pair(RegOp, AuxInfo))).second) + AmbEntries.push_back(MemOp); + } + + static const unsigned OpTbl2[][2] = { + { X86::ADC32rr, X86::ADC32rm }, + { X86::ADC64rr, X86::ADC64rm }, + { X86::ADD16rr, X86::ADD16rm }, + { X86::ADD32rr, X86::ADD32rm }, + { X86::ADD64rr, X86::ADD64rm }, + { X86::ADD8rr, X86::ADD8rm }, + { X86::ADDPDrr, X86::ADDPDrm }, + { X86::ADDPSrr, X86::ADDPSrm }, + { X86::ADDSDrr, X86::ADDSDrm }, + { X86::ADDSSrr, X86::ADDSSrm }, + { X86::ADDSUBPDrr, X86::ADDSUBPDrm }, + { X86::ADDSUBPSrr, X86::ADDSUBPSrm }, + { X86::AND16rr, X86::AND16rm }, + { X86::AND32rr, X86::AND32rm }, + { X86::AND64rr, X86::AND64rm }, + { X86::AND8rr, X86::AND8rm }, + { X86::ANDNPDrr, X86::ANDNPDrm }, + { X86::ANDNPSrr, X86::ANDNPSrm }, + { X86::ANDPDrr, X86::ANDPDrm }, + { X86::ANDPSrr, X86::ANDPSrm }, + { X86::CMOVA16rr, X86::CMOVA16rm }, + { X86::CMOVA32rr, X86::CMOVA32rm }, + { X86::CMOVA64rr, X86::CMOVA64rm }, + { X86::CMOVAE16rr, X86::CMOVAE16rm }, + { X86::CMOVAE32rr, X86::CMOVAE32rm }, + { X86::CMOVAE64rr, X86::CMOVAE64rm }, + { X86::CMOVB16rr, X86::CMOVB16rm }, + { X86::CMOVB32rr, X86::CMOVB32rm }, + { X86::CMOVB64rr, X86::CMOVB64rm }, + { X86::CMOVBE16rr, X86::CMOVBE16rm }, + { X86::CMOVBE32rr, X86::CMOVBE32rm }, + { X86::CMOVBE64rr, X86::CMOVBE64rm }, + { X86::CMOVE16rr, X86::CMOVE16rm }, + { X86::CMOVE32rr, X86::CMOVE32rm }, + { X86::CMOVE64rr, X86::CMOVE64rm }, + { X86::CMOVG16rr, X86::CMOVG16rm }, + { X86::CMOVG32rr, X86::CMOVG32rm }, + { X86::CMOVG64rr, X86::CMOVG64rm }, + { X86::CMOVGE16rr, X86::CMOVGE16rm }, + { X86::CMOVGE32rr, X86::CMOVGE32rm }, + { X86::CMOVGE64rr, X86::CMOVGE64rm }, + { X86::CMOVL16rr, X86::CMOVL16rm }, + { X86::CMOVL32rr, X86::CMOVL32rm }, + { X86::CMOVL64rr, X86::CMOVL64rm }, + { X86::CMOVLE16rr, X86::CMOVLE16rm }, + { X86::CMOVLE32rr, X86::CMOVLE32rm }, + { X86::CMOVLE64rr, X86::CMOVLE64rm }, + { X86::CMOVNE16rr, X86::CMOVNE16rm }, + { X86::CMOVNE32rr, X86::CMOVNE32rm }, + { X86::CMOVNE64rr, X86::CMOVNE64rm }, + { X86::CMOVNO16rr, X86::CMOVNO16rm }, + { X86::CMOVNO32rr, X86::CMOVNO32rm }, + { X86::CMOVNO64rr, X86::CMOVNO64rm }, + { X86::CMOVNP16rr, X86::CMOVNP16rm }, + { X86::CMOVNP32rr, X86::CMOVNP32rm }, + { X86::CMOVNP64rr, X86::CMOVNP64rm }, + { X86::CMOVNS16rr, X86::CMOVNS16rm }, + { X86::CMOVNS32rr, X86::CMOVNS32rm }, + { X86::CMOVNS64rr, X86::CMOVNS64rm }, + { X86::CMOVO16rr, X86::CMOVO16rm }, + { X86::CMOVO32rr, X86::CMOVO32rm }, + { X86::CMOVO64rr, X86::CMOVO64rm }, + { X86::CMOVP16rr, X86::CMOVP16rm }, + { X86::CMOVP32rr, X86::CMOVP32rm }, + { X86::CMOVP64rr, X86::CMOVP64rm }, + { X86::CMOVS16rr, X86::CMOVS16rm }, + { X86::CMOVS32rr, X86::CMOVS32rm }, + { X86::CMOVS64rr, X86::CMOVS64rm }, + { X86::CMPPDrri, X86::CMPPDrmi }, + { X86::CMPPSrri, X86::CMPPSrmi }, + { X86::CMPSDrr, X86::CMPSDrm }, + { X86::CMPSSrr, X86::CMPSSrm }, + { X86::DIVPDrr, X86::DIVPDrm }, + { X86::DIVPSrr, X86::DIVPSrm }, + { X86::DIVSDrr, X86::DIVSDrm }, + { X86::DIVSSrr, X86::DIVSSrm }, + { X86::FsANDNPDrr, X86::FsANDNPDrm }, + { X86::FsANDNPSrr, X86::FsANDNPSrm }, + { X86::FsANDPDrr, X86::FsANDPDrm }, + { X86::FsANDPSrr, X86::FsANDPSrm }, + { X86::FsORPDrr, X86::FsORPDrm }, + { X86::FsORPSrr, X86::FsORPSrm }, + { X86::FsXORPDrr, X86::FsXORPDrm }, + { X86::FsXORPSrr, X86::FsXORPSrm }, + { X86::HADDPDrr, X86::HADDPDrm }, + { X86::HADDPSrr, X86::HADDPSrm }, + { X86::HSUBPDrr, X86::HSUBPDrm }, + { X86::HSUBPSrr, X86::HSUBPSrm }, + { X86::IMUL16rr, X86::IMUL16rm }, + { X86::IMUL32rr, X86::IMUL32rm }, + { X86::IMUL64rr, X86::IMUL64rm }, + { X86::MAXPDrr, X86::MAXPDrm }, + { X86::MAXPDrr_Int, X86::MAXPDrm_Int }, + { X86::MAXPSrr, X86::MAXPSrm }, + { X86::MAXPSrr_Int, X86::MAXPSrm_Int }, + { X86::MAXSDrr, X86::MAXSDrm }, + { X86::MAXSDrr_Int, X86::MAXSDrm_Int }, + { X86::MAXSSrr, X86::MAXSSrm }, + { X86::MAXSSrr_Int, X86::MAXSSrm_Int }, + { X86::MINPDrr, X86::MINPDrm }, + { X86::MINPDrr_Int, X86::MINPDrm_Int }, + { X86::MINPSrr, X86::MINPSrm }, + { X86::MINPSrr_Int, X86::MINPSrm_Int }, + { X86::MINSDrr, X86::MINSDrm }, + { X86::MINSDrr_Int, X86::MINSDrm_Int }, + { X86::MINSSrr, X86::MINSSrm }, + { X86::MINSSrr_Int, X86::MINSSrm_Int }, + { X86::MULPDrr, X86::MULPDrm }, + { X86::MULPSrr, X86::MULPSrm }, + { X86::MULSDrr, X86::MULSDrm }, + { X86::MULSSrr, X86::MULSSrm }, + { X86::OR16rr, X86::OR16rm }, + { X86::OR32rr, X86::OR32rm }, + { X86::OR64rr, X86::OR64rm }, + { X86::OR8rr, X86::OR8rm }, + { X86::ORPDrr, X86::ORPDrm }, + { X86::ORPSrr, X86::ORPSrm }, + { X86::PACKSSDWrr, X86::PACKSSDWrm }, + { X86::PACKSSWBrr, X86::PACKSSWBrm }, + { X86::PACKUSWBrr, X86::PACKUSWBrm }, + { X86::PADDBrr, X86::PADDBrm }, + { X86::PADDDrr, X86::PADDDrm }, + { X86::PADDQrr, X86::PADDQrm }, + { X86::PADDSBrr, X86::PADDSBrm }, + { X86::PADDSWrr, X86::PADDSWrm }, + { X86::PADDWrr, X86::PADDWrm }, + { X86::PANDNrr, X86::PANDNrm }, + { X86::PANDrr, X86::PANDrm }, + { X86::PAVGBrr, X86::PAVGBrm }, + { X86::PAVGWrr, X86::PAVGWrm }, + { X86::PCMPEQBrr, X86::PCMPEQBrm }, + { X86::PCMPEQDrr, X86::PCMPEQDrm }, + { X86::PCMPEQWrr, X86::PCMPEQWrm }, + { X86::PCMPGTBrr, X86::PCMPGTBrm }, + { X86::PCMPGTDrr, X86::PCMPGTDrm }, + { X86::PCMPGTWrr, X86::PCMPGTWrm }, + { X86::PINSRWrri, X86::PINSRWrmi }, + { X86::PMADDWDrr, X86::PMADDWDrm }, + { X86::PMAXSWrr, X86::PMAXSWrm }, + { X86::PMAXUBrr, X86::PMAXUBrm }, + { X86::PMINSWrr, X86::PMINSWrm }, + { X86::PMINUBrr, X86::PMINUBrm }, + { X86::PMULDQrr, X86::PMULDQrm }, + { X86::PMULHUWrr, X86::PMULHUWrm }, + { X86::PMULHWrr, X86::PMULHWrm }, + { X86::PMULLDrr, X86::PMULLDrm }, + { X86::PMULLDrr_int, X86::PMULLDrm_int }, + { X86::PMULLWrr, X86::PMULLWrm }, + { X86::PMULUDQrr, X86::PMULUDQrm }, + { X86::PORrr, X86::PORrm }, + { X86::PSADBWrr, X86::PSADBWrm }, + { X86::PSLLDrr, X86::PSLLDrm }, + { X86::PSLLQrr, X86::PSLLQrm }, + { X86::PSLLWrr, X86::PSLLWrm }, + { X86::PSRADrr, X86::PSRADrm }, + { X86::PSRAWrr, X86::PSRAWrm }, + { X86::PSRLDrr, X86::PSRLDrm }, + { X86::PSRLQrr, X86::PSRLQrm }, + { X86::PSRLWrr, X86::PSRLWrm }, + { X86::PSUBBrr, X86::PSUBBrm }, + { X86::PSUBDrr, X86::PSUBDrm }, + { X86::PSUBSBrr, X86::PSUBSBrm }, + { X86::PSUBSWrr, X86::PSUBSWrm }, + { X86::PSUBWrr, X86::PSUBWrm }, + { X86::PUNPCKHBWrr, X86::PUNPCKHBWrm }, + { X86::PUNPCKHDQrr, X86::PUNPCKHDQrm }, + { X86::PUNPCKHQDQrr, X86::PUNPCKHQDQrm }, + { X86::PUNPCKHWDrr, X86::PUNPCKHWDrm }, + { X86::PUNPCKLBWrr, X86::PUNPCKLBWrm }, + { X86::PUNPCKLDQrr, X86::PUNPCKLDQrm }, + { X86::PUNPCKLQDQrr, X86::PUNPCKLQDQrm }, + { X86::PUNPCKLWDrr, X86::PUNPCKLWDrm }, + { X86::PXORrr, X86::PXORrm }, + { X86::SBB32rr, X86::SBB32rm }, + { X86::SBB64rr, X86::SBB64rm }, + { X86::SHUFPDrri, X86::SHUFPDrmi }, + { X86::SHUFPSrri, X86::SHUFPSrmi }, + { X86::SUB16rr, X86::SUB16rm }, + { X86::SUB32rr, X86::SUB32rm }, + { X86::SUB64rr, X86::SUB64rm }, + { X86::SUB8rr, X86::SUB8rm }, + { X86::SUBPDrr, X86::SUBPDrm }, + { X86::SUBPSrr, X86::SUBPSrm }, + { X86::SUBSDrr, X86::SUBSDrm }, + { X86::SUBSSrr, X86::SUBSSrm }, + // FIXME: TEST*rr -> swapped operand of TEST*mr. + { X86::UNPCKHPDrr, X86::UNPCKHPDrm }, + { X86::UNPCKHPSrr, X86::UNPCKHPSrm }, + { X86::UNPCKLPDrr, X86::UNPCKLPDrm }, + { X86::UNPCKLPSrr, X86::UNPCKLPSrm }, + { X86::XOR16rr, X86::XOR16rm }, + { X86::XOR32rr, X86::XOR32rm }, + { X86::XOR64rr, X86::XOR64rm }, + { X86::XOR8rr, X86::XOR8rm }, + { X86::XORPDrr, X86::XORPDrm }, + { X86::XORPSrr, X86::XORPSrm } + }; + + for (unsigned i = 0, e = array_lengthof(OpTbl2); i != e; ++i) { + unsigned RegOp = OpTbl2[i][0]; + unsigned MemOp = OpTbl2[i][1]; + if (!RegOp2MemOpTable2.insert(std::make_pair((unsigned*)RegOp, + MemOp)).second) + assert(false && "Duplicated entries?"); + unsigned AuxInfo = 2 | (1 << 4); // Index 2, folded load + if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp, + std::make_pair(RegOp, AuxInfo))).second) + AmbEntries.push_back(MemOp); + } + + // Remove ambiguous entries. + assert(AmbEntries.empty() && "Duplicated entries in unfolding maps?"); +} + +bool X86InstrInfo::isMoveInstr(const MachineInstr& MI, + unsigned &SrcReg, unsigned &DstReg, + unsigned &SrcSubIdx, unsigned &DstSubIdx) const { + switch (MI.getOpcode()) { + default: + return false; + case X86::MOV8rr: + case X86::MOV8rr_NOREX: + case X86::MOV16rr: + case X86::MOV32rr: + case X86::MOV64rr: + case X86::MOVSSrr: + case X86::MOVSDrr: + + // FP Stack register class copies + case X86::MOV_Fp3232: case X86::MOV_Fp6464: case X86::MOV_Fp8080: + case X86::MOV_Fp3264: case X86::MOV_Fp3280: + case X86::MOV_Fp6432: case X86::MOV_Fp8032: + + case X86::FsMOVAPSrr: + case X86::FsMOVAPDrr: + case X86::MOVAPSrr: + case X86::MOVAPDrr: + case X86::MOVDQArr: + case X86::MOVSS2PSrr: + case X86::MOVSD2PDrr: + case X86::MOVPS2SSrr: + case X86::MOVPD2SDrr: + case X86::MMX_MOVQ64rr: + assert(MI.getNumOperands() >= 2 && + MI.getOperand(0).isReg() && + MI.getOperand(1).isReg() && + "invalid register-register move instruction"); + SrcReg = MI.getOperand(1).getReg(); + DstReg = MI.getOperand(0).getReg(); + SrcSubIdx = MI.getOperand(1).getSubReg(); + DstSubIdx = MI.getOperand(0).getSubReg(); + return true; + } +} + +unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr *MI, + int &FrameIndex) const { + switch (MI->getOpcode()) { + default: break; + case X86::MOV8rm: + case X86::MOV16rm: + case X86::MOV32rm: + case X86::MOV64rm: + case X86::LD_Fp64m: + case X86::MOVSSrm: + case X86::MOVSDrm: + case X86::MOVAPSrm: + case X86::MOVAPDrm: + case X86::MOVDQArm: + case X86::MMX_MOVD64rm: + case X86::MMX_MOVQ64rm: + if (MI->getOperand(1).isFI() && MI->getOperand(2).isImm() && + MI->getOperand(3).isReg() && MI->getOperand(4).isImm() && + MI->getOperand(2).getImm() == 1 && + MI->getOperand(3).getReg() == 0 && + MI->getOperand(4).getImm() == 0) { + FrameIndex = MI->getOperand(1).getIndex(); + return MI->getOperand(0).getReg(); + } + break; + } + return 0; +} + +unsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr *MI, + int &FrameIndex) const { + switch (MI->getOpcode()) { + default: break; + case X86::MOV8mr: + case X86::MOV16mr: + case X86::MOV32mr: + case X86::MOV64mr: + case X86::ST_FpP64m: + case X86::MOVSSmr: + case X86::MOVSDmr: + case X86::MOVAPSmr: + case X86::MOVAPDmr: + case X86::MOVDQAmr: + case X86::MMX_MOVD64mr: + case X86::MMX_MOVQ64mr: + case X86::MMX_MOVNTQmr: + if (MI->getOperand(0).isFI() && MI->getOperand(1).isImm() && + MI->getOperand(2).isReg() && MI->getOperand(3).isImm() && + MI->getOperand(1).getImm() == 1 && + MI->getOperand(2).getReg() == 0 && + MI->getOperand(3).getImm() == 0) { + FrameIndex = MI->getOperand(0).getIndex(); + return MI->getOperand(X86AddrNumOperands).getReg(); + } + break; + } + return 0; +} + + +/// regIsPICBase - Return true if register is PIC base (i.e.g defined by +/// X86::MOVPC32r. +static bool regIsPICBase(unsigned BaseReg, const MachineRegisterInfo &MRI) { + bool isPICBase = false; + for (MachineRegisterInfo::def_iterator I = MRI.def_begin(BaseReg), + E = MRI.def_end(); I != E; ++I) { + MachineInstr *DefMI = I.getOperand().getParent(); + if (DefMI->getOpcode() != X86::MOVPC32r) + return false; + assert(!isPICBase && "More than one PIC base?"); + isPICBase = true; + } + return isPICBase; +} + +/// isGVStub - Return true if the GV requires an extra load to get the +/// real address. +static inline bool isGVStub(GlobalValue *GV, X86TargetMachine &TM) { + return TM.getSubtarget().GVRequiresExtraLoad(GV, TM, false); +} + +bool +X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI) const { + switch (MI->getOpcode()) { + default: break; + case X86::MOV8rm: + case X86::MOV16rm: + case X86::MOV32rm: + case X86::MOV64rm: + case X86::LD_Fp64m: + case X86::MOVSSrm: + case X86::MOVSDrm: + case X86::MOVAPSrm: + case X86::MOVAPDrm: + case X86::MOVDQArm: + case X86::MMX_MOVD64rm: + case X86::MMX_MOVQ64rm: { + // Loads from constant pools are trivially rematerializable. + if (MI->getOperand(1).isReg() && + MI->getOperand(2).isImm() && + MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 && + (MI->getOperand(4).isCPI() || + (MI->getOperand(4).isGlobal() && + isGVStub(MI->getOperand(4).getGlobal(), TM)))) { + unsigned BaseReg = MI->getOperand(1).getReg(); + if (BaseReg == 0) + return true; + // Allow re-materialization of PIC load. + if (!ReMatPICStubLoad && MI->getOperand(4).isGlobal()) + return false; + const MachineFunction &MF = *MI->getParent()->getParent(); + const MachineRegisterInfo &MRI = MF.getRegInfo(); + bool isPICBase = false; + for (MachineRegisterInfo::def_iterator I = MRI.def_begin(BaseReg), + E = MRI.def_end(); I != E; ++I) { + MachineInstr *DefMI = I.getOperand().getParent(); + if (DefMI->getOpcode() != X86::MOVPC32r) + return false; + assert(!isPICBase && "More than one PIC base?"); + isPICBase = true; + } + return isPICBase; + } + return false; + } + + case X86::LEA32r: + case X86::LEA64r: { + if (MI->getOperand(2).isImm() && + MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 && + !MI->getOperand(4).isReg()) { + // lea fi#, lea GV, etc. are all rematerializable. + if (!MI->getOperand(1).isReg()) + return true; + unsigned BaseReg = MI->getOperand(1).getReg(); + if (BaseReg == 0) + return true; + // Allow re-materialization of lea PICBase + x. + const MachineFunction &MF = *MI->getParent()->getParent(); + const MachineRegisterInfo &MRI = MF.getRegInfo(); + return regIsPICBase(BaseReg, MRI); + } + return false; + } + } + + // All other instructions marked M_REMATERIALIZABLE are always trivially + // rematerializable. + return true; +} + +/// isSafeToClobberEFLAGS - Return true if it's safe insert an instruction that +/// would clobber the EFLAGS condition register. Note the result may be +/// conservative. If it cannot definitely determine the safety after visiting +/// two instructions it assumes it's not safe. +static bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) { + // It's always safe to clobber EFLAGS at the end of a block. + if (I == MBB.end()) + return true; + + // For compile time consideration, if we are not able to determine the + // safety after visiting 2 instructions, we will assume it's not safe. + for (unsigned i = 0; i < 2; ++i) { + bool SeenDef = false; + for (unsigned j = 0, e = I->getNumOperands(); j != e; ++j) { + MachineOperand &MO = I->getOperand(j); + if (!MO.isReg()) + continue; + if (MO.getReg() == X86::EFLAGS) { + if (MO.isUse()) + return false; + SeenDef = true; + } + } + + if (SeenDef) + // This instruction defines EFLAGS, no need to look any further. + return true; + ++I; + + // If we make it to the end of the block, it's safe to clobber EFLAGS. + if (I == MBB.end()) + return true; + } + + // Conservative answer. + return false; +} + +void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned DestReg, + const MachineInstr *Orig) const { + DebugLoc DL = DebugLoc::getUnknownLoc(); + if (I != MBB.end()) DL = I->getDebugLoc(); + + unsigned SubIdx = Orig->getOperand(0).isReg() + ? Orig->getOperand(0).getSubReg() : 0; + bool ChangeSubIdx = SubIdx != 0; + if (SubIdx && TargetRegisterInfo::isPhysicalRegister(DestReg)) { + DestReg = RI.getSubReg(DestReg, SubIdx); + SubIdx = 0; + } + + // MOV32r0 etc. are implemented with xor which clobbers condition code. + // Re-materialize them as movri instructions to avoid side effects. + bool Emitted = false; + switch (Orig->getOpcode()) { + default: break; + case X86::MOV8r0: + case X86::MOV16r0: + case X86::MOV32r0: + case X86::MOV64r0: { + if (!isSafeToClobberEFLAGS(MBB, I)) { + unsigned Opc = 0; + switch (Orig->getOpcode()) { + default: break; + case X86::MOV8r0: Opc = X86::MOV8ri; break; + case X86::MOV16r0: Opc = X86::MOV16ri; break; + case X86::MOV32r0: Opc = X86::MOV32ri; break; + case X86::MOV64r0: Opc = X86::MOV64ri32; break; + } + BuildMI(MBB, I, DL, get(Opc), DestReg).addImm(0); + Emitted = true; + } + break; + } + } + + if (!Emitted) { + MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig); + MI->getOperand(0).setReg(DestReg); + MBB.insert(I, MI); + } + + if (ChangeSubIdx) { + MachineInstr *NewMI = prior(I); + NewMI->getOperand(0).setSubReg(SubIdx); + } +} + +/// isInvariantLoad - Return true if the specified instruction (which is marked +/// mayLoad) is loading from a location whose value is invariant across the +/// function. For example, loading a value from the constant pool or from +/// from the argument area of a function if it does not change. This should +/// only return true of *all* loads the instruction does are invariant (if it +/// does multiple loads). +bool X86InstrInfo::isInvariantLoad(const MachineInstr *MI) const { + // This code cares about loads from three cases: constant pool entries, + // invariant argument slots, and global stubs. In order to handle these cases + // for all of the myriad of X86 instructions, we just scan for a CP/FI/GV + // operand and base our analysis on it. This is safe because the address of + // none of these three cases is ever used as anything other than a load base + // and X86 doesn't have any instructions that load from multiple places. + + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + // Loads from constant pools are trivially invariant. + if (MO.isCPI()) + return true; + + if (MO.isGlobal()) + return isGVStub(MO.getGlobal(), TM); + + // If this is a load from an invariant stack slot, the load is a constant. + if (MO.isFI()) { + const MachineFrameInfo &MFI = + *MI->getParent()->getParent()->getFrameInfo(); + int Idx = MO.getIndex(); + return MFI.isFixedObjectIndex(Idx) && MFI.isImmutableObjectIndex(Idx); + } + } + + // All other instances of these instructions are presumed to have other + // issues. + return false; +} + +/// hasLiveCondCodeDef - True if MI has a condition code def, e.g. EFLAGS, that +/// is not marked dead. +static bool hasLiveCondCodeDef(MachineInstr *MI) { + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.isDef() && + MO.getReg() == X86::EFLAGS && !MO.isDead()) { + return true; + } + } + return false; +} + +/// convertToThreeAddress - This method must be implemented by targets that +/// set the M_CONVERTIBLE_TO_3_ADDR flag. When this flag is set, the target +/// may be able to convert a two-address instruction into a true +/// three-address instruction on demand. This allows the X86 target (for +/// example) to convert ADD and SHL instructions into LEA instructions if they +/// would require register copies due to two-addressness. +/// +/// This method returns a null pointer if the transformation cannot be +/// performed, otherwise it returns the new instruction. +/// +MachineInstr * +X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, + MachineBasicBlock::iterator &MBBI, + LiveVariables *LV) const { + MachineInstr *MI = MBBI; + MachineFunction &MF = *MI->getParent()->getParent(); + // All instructions input are two-addr instructions. Get the known operands. + unsigned Dest = MI->getOperand(0).getReg(); + unsigned Src = MI->getOperand(1).getReg(); + bool isDead = MI->getOperand(0).isDead(); + bool isKill = MI->getOperand(1).isKill(); + + MachineInstr *NewMI = NULL; + // FIXME: 16-bit LEA's are really slow on Athlons, but not bad on P4's. When + // we have better subtarget support, enable the 16-bit LEA generation here. + bool DisableLEA16 = true; + + unsigned MIOpc = MI->getOpcode(); + switch (MIOpc) { + case X86::SHUFPSrri: { + assert(MI->getNumOperands() == 4 && "Unknown shufps instruction!"); + if (!TM.getSubtarget().hasSSE2()) return 0; + + unsigned B = MI->getOperand(1).getReg(); + unsigned C = MI->getOperand(2).getReg(); + if (B != C) return 0; + unsigned A = MI->getOperand(0).getReg(); + unsigned M = MI->getOperand(3).getImm(); + NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::PSHUFDri)) + .addReg(A, RegState::Define | getDeadRegState(isDead)) + .addReg(B, getKillRegState(isKill)).addImm(M); + break; + } + case X86::SHL64ri: { + assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!"); + // NOTE: LEA doesn't produce flags like shift does, but LLVM never uses + // the flags produced by a shift yet, so this is safe. + unsigned ShAmt = MI->getOperand(2).getImm(); + if (ShAmt == 0 || ShAmt >= 4) return 0; + + NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r)) + .addReg(Dest, RegState::Define | getDeadRegState(isDead)) + .addReg(0).addImm(1 << ShAmt) + .addReg(Src, getKillRegState(isKill)) + .addImm(0); + break; + } + case X86::SHL32ri: { + assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!"); + // NOTE: LEA doesn't produce flags like shift does, but LLVM never uses + // the flags produced by a shift yet, so this is safe. + unsigned ShAmt = MI->getOperand(2).getImm(); + if (ShAmt == 0 || ShAmt >= 4) return 0; + + unsigned Opc = TM.getSubtarget().is64Bit() ? + X86::LEA64_32r : X86::LEA32r; + NewMI = BuildMI(MF, MI->getDebugLoc(), get(Opc)) + .addReg(Dest, RegState::Define | getDeadRegState(isDead)) + .addReg(0).addImm(1 << ShAmt) + .addReg(Src, getKillRegState(isKill)).addImm(0); + break; + } + case X86::SHL16ri: { + assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!"); + // NOTE: LEA doesn't produce flags like shift does, but LLVM never uses + // the flags produced by a shift yet, so this is safe. + unsigned ShAmt = MI->getOperand(2).getImm(); + if (ShAmt == 0 || ShAmt >= 4) return 0; + + if (DisableLEA16) { + // If 16-bit LEA is disabled, use 32-bit LEA via subregisters. + MachineRegisterInfo &RegInfo = MFI->getParent()->getRegInfo(); + unsigned Opc = TM.getSubtarget().is64Bit() + ? X86::LEA64_32r : X86::LEA32r; + unsigned leaInReg = RegInfo.createVirtualRegister(&X86::GR32RegClass); + unsigned leaOutReg = RegInfo.createVirtualRegister(&X86::GR32RegClass); + + // Build and insert into an implicit UNDEF value. This is OK because + // well be shifting and then extracting the lower 16-bits. + BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg); + MachineInstr *InsMI = + BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::INSERT_SUBREG),leaInReg) + .addReg(leaInReg) + .addReg(Src, getKillRegState(isKill)) + .addImm(X86::SUBREG_16BIT); + + NewMI = BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(Opc), leaOutReg) + .addReg(0).addImm(1 << ShAmt) + .addReg(leaInReg, RegState::Kill) + .addImm(0); + + MachineInstr *ExtMI = + BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::EXTRACT_SUBREG)) + .addReg(Dest, RegState::Define | getDeadRegState(isDead)) + .addReg(leaOutReg, RegState::Kill) + .addImm(X86::SUBREG_16BIT); + + if (LV) { + // Update live variables + LV->getVarInfo(leaInReg).Kills.push_back(NewMI); + LV->getVarInfo(leaOutReg).Kills.push_back(ExtMI); + if (isKill) + LV->replaceKillInstruction(Src, MI, InsMI); + if (isDead) + LV->replaceKillInstruction(Dest, MI, ExtMI); + } + return ExtMI; + } else { + NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) + .addReg(Dest, RegState::Define | getDeadRegState(isDead)) + .addReg(0).addImm(1 << ShAmt) + .addReg(Src, getKillRegState(isKill)) + .addImm(0); + } + break; + } + default: { + // The following opcodes also sets the condition code register(s). Only + // convert them to equivalent lea if the condition code register def's + // are dead! + if (hasLiveCondCodeDef(MI)) + return 0; + + bool is64Bit = TM.getSubtarget().is64Bit(); + switch (MIOpc) { + default: return 0; + case X86::INC64r: + case X86::INC32r: + case X86::INC64_32r: { + assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!"); + unsigned Opc = MIOpc == X86::INC64r ? X86::LEA64r + : (is64Bit ? X86::LEA64_32r : X86::LEA32r); + NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) + .addReg(Dest, RegState::Define | + getDeadRegState(isDead)), + Src, isKill, 1); + break; + } + case X86::INC16r: + case X86::INC64_16r: + if (DisableLEA16) return 0; + assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!"); + NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) + .addReg(Dest, RegState::Define | + getDeadRegState(isDead)), + Src, isKill, 1); + break; + case X86::DEC64r: + case X86::DEC32r: + case X86::DEC64_32r: { + assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!"); + unsigned Opc = MIOpc == X86::DEC64r ? X86::LEA64r + : (is64Bit ? X86::LEA64_32r : X86::LEA32r); + NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) + .addReg(Dest, RegState::Define | + getDeadRegState(isDead)), + Src, isKill, -1); + break; + } + case X86::DEC16r: + case X86::DEC64_16r: + if (DisableLEA16) return 0; + assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!"); + NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) + .addReg(Dest, RegState::Define | + getDeadRegState(isDead)), + Src, isKill, -1); + break; + case X86::ADD64rr: + case X86::ADD32rr: { + assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); + unsigned Opc = MIOpc == X86::ADD64rr ? X86::LEA64r + : (is64Bit ? X86::LEA64_32r : X86::LEA32r); + unsigned Src2 = MI->getOperand(2).getReg(); + bool isKill2 = MI->getOperand(2).isKill(); + NewMI = addRegReg(BuildMI(MF, MI->getDebugLoc(), get(Opc)) + .addReg(Dest, RegState::Define | + getDeadRegState(isDead)), + Src, isKill, Src2, isKill2); + if (LV && isKill2) + LV->replaceKillInstruction(Src2, MI, NewMI); + break; + } + case X86::ADD16rr: { + if (DisableLEA16) return 0; + assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); + unsigned Src2 = MI->getOperand(2).getReg(); + bool isKill2 = MI->getOperand(2).isKill(); + NewMI = addRegReg(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) + .addReg(Dest, RegState::Define | + getDeadRegState(isDead)), + Src, isKill, Src2, isKill2); + if (LV && isKill2) + LV->replaceKillInstruction(Src2, MI, NewMI); + break; + } + case X86::ADD64ri32: + case X86::ADD64ri8: + assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); + if (MI->getOperand(2).isImm()) + NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r)) + .addReg(Dest, RegState::Define | + getDeadRegState(isDead)), + Src, isKill, MI->getOperand(2).getImm()); + break; + case X86::ADD32ri: + case X86::ADD32ri8: + assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); + if (MI->getOperand(2).isImm()) { + unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r; + NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) + .addReg(Dest, RegState::Define | + getDeadRegState(isDead)), + Src, isKill, MI->getOperand(2).getImm()); + } + break; + case X86::ADD16ri: + case X86::ADD16ri8: + if (DisableLEA16) return 0; + assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); + if (MI->getOperand(2).isImm()) + NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) + .addReg(Dest, RegState::Define | + getDeadRegState(isDead)), + Src, isKill, MI->getOperand(2).getImm()); + break; + case X86::SHL16ri: + if (DisableLEA16) return 0; + case X86::SHL32ri: + case X86::SHL64ri: { + assert(MI->getNumOperands() >= 3 && MI->getOperand(2).isImm() && + "Unknown shl instruction!"); + unsigned ShAmt = MI->getOperand(2).getImm(); + if (ShAmt == 1 || ShAmt == 2 || ShAmt == 3) { + X86AddressMode AM; + AM.Scale = 1 << ShAmt; + AM.IndexReg = Src; + unsigned Opc = MIOpc == X86::SHL64ri ? X86::LEA64r + : (MIOpc == X86::SHL32ri + ? (is64Bit ? X86::LEA64_32r : X86::LEA32r) : X86::LEA16r); + NewMI = addFullAddress(BuildMI(MF, MI->getDebugLoc(), get(Opc)) + .addReg(Dest, RegState::Define | + getDeadRegState(isDead)), AM); + if (isKill) + NewMI->getOperand(3).setIsKill(true); + } + break; + } + } + } + } + + if (!NewMI) return 0; + + if (LV) { // Update live variables + if (isKill) + LV->replaceKillInstruction(Src, MI, NewMI); + if (isDead) + LV->replaceKillInstruction(Dest, MI, NewMI); + } + + MFI->insert(MBBI, NewMI); // Insert the new inst + return NewMI; +} + +/// commuteInstruction - We have a few instructions that must be hacked on to +/// commute them. +/// +MachineInstr * +X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { + switch (MI->getOpcode()) { + case X86::SHRD16rri8: // A = SHRD16rri8 B, C, I -> A = SHLD16rri8 C, B, (16-I) + case X86::SHLD16rri8: // A = SHLD16rri8 B, C, I -> A = SHRD16rri8 C, B, (16-I) + case X86::SHRD32rri8: // A = SHRD32rri8 B, C, I -> A = SHLD32rri8 C, B, (32-I) + case X86::SHLD32rri8: // A = SHLD32rri8 B, C, I -> A = SHRD32rri8 C, B, (32-I) + case X86::SHRD64rri8: // A = SHRD64rri8 B, C, I -> A = SHLD64rri8 C, B, (64-I) + case X86::SHLD64rri8:{// A = SHLD64rri8 B, C, I -> A = SHRD64rri8 C, B, (64-I) + unsigned Opc; + unsigned Size; + switch (MI->getOpcode()) { + default: assert(0 && "Unreachable!"); + case X86::SHRD16rri8: Size = 16; Opc = X86::SHLD16rri8; break; + case X86::SHLD16rri8: Size = 16; Opc = X86::SHRD16rri8; break; + case X86::SHRD32rri8: Size = 32; Opc = X86::SHLD32rri8; break; + case X86::SHLD32rri8: Size = 32; Opc = X86::SHRD32rri8; break; + case X86::SHRD64rri8: Size = 64; Opc = X86::SHLD64rri8; break; + case X86::SHLD64rri8: Size = 64; Opc = X86::SHRD64rri8; break; + } + unsigned Amt = MI->getOperand(3).getImm(); + if (NewMI) { + MachineFunction &MF = *MI->getParent()->getParent(); + MI = MF.CloneMachineInstr(MI); + NewMI = false; + } + MI->setDesc(get(Opc)); + MI->getOperand(3).setImm(Size-Amt); + return TargetInstrInfoImpl::commuteInstruction(MI, NewMI); + } + case X86::CMOVB16rr: + case X86::CMOVB32rr: + case X86::CMOVB64rr: + case X86::CMOVAE16rr: + case X86::CMOVAE32rr: + case X86::CMOVAE64rr: + case X86::CMOVE16rr: + case X86::CMOVE32rr: + case X86::CMOVE64rr: + case X86::CMOVNE16rr: + case X86::CMOVNE32rr: + case X86::CMOVNE64rr: + case X86::CMOVBE16rr: + case X86::CMOVBE32rr: + case X86::CMOVBE64rr: + case X86::CMOVA16rr: + case X86::CMOVA32rr: + case X86::CMOVA64rr: + case X86::CMOVL16rr: + case X86::CMOVL32rr: + case X86::CMOVL64rr: + case X86::CMOVGE16rr: + case X86::CMOVGE32rr: + case X86::CMOVGE64rr: + case X86::CMOVLE16rr: + case X86::CMOVLE32rr: + case X86::CMOVLE64rr: + case X86::CMOVG16rr: + case X86::CMOVG32rr: + case X86::CMOVG64rr: + case X86::CMOVS16rr: + case X86::CMOVS32rr: + case X86::CMOVS64rr: + case X86::CMOVNS16rr: + case X86::CMOVNS32rr: + case X86::CMOVNS64rr: + case X86::CMOVP16rr: + case X86::CMOVP32rr: + case X86::CMOVP64rr: + case X86::CMOVNP16rr: + case X86::CMOVNP32rr: + case X86::CMOVNP64rr: + case X86::CMOVO16rr: + case X86::CMOVO32rr: + case X86::CMOVO64rr: + case X86::CMOVNO16rr: + case X86::CMOVNO32rr: + case X86::CMOVNO64rr: { + unsigned Opc = 0; + switch (MI->getOpcode()) { + default: break; + case X86::CMOVB16rr: Opc = X86::CMOVAE16rr; break; + case X86::CMOVB32rr: Opc = X86::CMOVAE32rr; break; + case X86::CMOVB64rr: Opc = X86::CMOVAE64rr; break; + case X86::CMOVAE16rr: Opc = X86::CMOVB16rr; break; + case X86::CMOVAE32rr: Opc = X86::CMOVB32rr; break; + case X86::CMOVAE64rr: Opc = X86::CMOVB64rr; break; + case X86::CMOVE16rr: Opc = X86::CMOVNE16rr; break; + case X86::CMOVE32rr: Opc = X86::CMOVNE32rr; break; + case X86::CMOVE64rr: Opc = X86::CMOVNE64rr; break; + case X86::CMOVNE16rr: Opc = X86::CMOVE16rr; break; + case X86::CMOVNE32rr: Opc = X86::CMOVE32rr; break; + case X86::CMOVNE64rr: Opc = X86::CMOVE64rr; break; + case X86::CMOVBE16rr: Opc = X86::CMOVA16rr; break; + case X86::CMOVBE32rr: Opc = X86::CMOVA32rr; break; + case X86::CMOVBE64rr: Opc = X86::CMOVA64rr; break; + case X86::CMOVA16rr: Opc = X86::CMOVBE16rr; break; + case X86::CMOVA32rr: Opc = X86::CMOVBE32rr; break; + case X86::CMOVA64rr: Opc = X86::CMOVBE64rr; break; + case X86::CMOVL16rr: Opc = X86::CMOVGE16rr; break; + case X86::CMOVL32rr: Opc = X86::CMOVGE32rr; break; + case X86::CMOVL64rr: Opc = X86::CMOVGE64rr; break; + case X86::CMOVGE16rr: Opc = X86::CMOVL16rr; break; + case X86::CMOVGE32rr: Opc = X86::CMOVL32rr; break; + case X86::CMOVGE64rr: Opc = X86::CMOVL64rr; break; + case X86::CMOVLE16rr: Opc = X86::CMOVG16rr; break; + case X86::CMOVLE32rr: Opc = X86::CMOVG32rr; break; + case X86::CMOVLE64rr: Opc = X86::CMOVG64rr; break; + case X86::CMOVG16rr: Opc = X86::CMOVLE16rr; break; + case X86::CMOVG32rr: Opc = X86::CMOVLE32rr; break; + case X86::CMOVG64rr: Opc = X86::CMOVLE64rr; break; + case X86::CMOVS16rr: Opc = X86::CMOVNS16rr; break; + case X86::CMOVS32rr: Opc = X86::CMOVNS32rr; break; + case X86::CMOVS64rr: Opc = X86::CMOVNS64rr; break; + case X86::CMOVNS16rr: Opc = X86::CMOVS16rr; break; + case X86::CMOVNS32rr: Opc = X86::CMOVS32rr; break; + case X86::CMOVNS64rr: Opc = X86::CMOVS64rr; break; + case X86::CMOVP16rr: Opc = X86::CMOVNP16rr; break; + case X86::CMOVP32rr: Opc = X86::CMOVNP32rr; break; + case X86::CMOVP64rr: Opc = X86::CMOVNP64rr; break; + case X86::CMOVNP16rr: Opc = X86::CMOVP16rr; break; + case X86::CMOVNP32rr: Opc = X86::CMOVP32rr; break; + case X86::CMOVNP64rr: Opc = X86::CMOVP64rr; break; + case X86::CMOVO16rr: Opc = X86::CMOVNO16rr; break; + case X86::CMOVO32rr: Opc = X86::CMOVNO32rr; break; + case X86::CMOVO64rr: Opc = X86::CMOVNO64rr; break; + case X86::CMOVNO16rr: Opc = X86::CMOVO16rr; break; + case X86::CMOVNO32rr: Opc = X86::CMOVO32rr; break; + case X86::CMOVNO64rr: Opc = X86::CMOVO64rr; break; + } + if (NewMI) { + MachineFunction &MF = *MI->getParent()->getParent(); + MI = MF.CloneMachineInstr(MI); + NewMI = false; + } + MI->setDesc(get(Opc)); + // Fallthrough intended. + } + default: + return TargetInstrInfoImpl::commuteInstruction(MI, NewMI); + } +} + +static X86::CondCode GetCondFromBranchOpc(unsigned BrOpc) { + switch (BrOpc) { + default: return X86::COND_INVALID; + case X86::JE: return X86::COND_E; + case X86::JNE: return X86::COND_NE; + case X86::JL: return X86::COND_L; + case X86::JLE: return X86::COND_LE; + case X86::JG: return X86::COND_G; + case X86::JGE: return X86::COND_GE; + case X86::JB: return X86::COND_B; + case X86::JBE: return X86::COND_BE; + case X86::JA: return X86::COND_A; + case X86::JAE: return X86::COND_AE; + case X86::JS: return X86::COND_S; + case X86::JNS: return X86::COND_NS; + case X86::JP: return X86::COND_P; + case X86::JNP: return X86::COND_NP; + case X86::JO: return X86::COND_O; + case X86::JNO: return X86::COND_NO; + } +} + +unsigned X86::GetCondBranchFromCond(X86::CondCode CC) { + switch (CC) { + default: assert(0 && "Illegal condition code!"); + case X86::COND_E: return X86::JE; + case X86::COND_NE: return X86::JNE; + case X86::COND_L: return X86::JL; + case X86::COND_LE: return X86::JLE; + case X86::COND_G: return X86::JG; + case X86::COND_GE: return X86::JGE; + case X86::COND_B: return X86::JB; + case X86::COND_BE: return X86::JBE; + case X86::COND_A: return X86::JA; + case X86::COND_AE: return X86::JAE; + case X86::COND_S: return X86::JS; + case X86::COND_NS: return X86::JNS; + case X86::COND_P: return X86::JP; + case X86::COND_NP: return X86::JNP; + case X86::COND_O: return X86::JO; + case X86::COND_NO: return X86::JNO; + } +} + +/// GetOppositeBranchCondition - Return the inverse of the specified condition, +/// e.g. turning COND_E to COND_NE. +X86::CondCode X86::GetOppositeBranchCondition(X86::CondCode CC) { + switch (CC) { + default: assert(0 && "Illegal condition code!"); + case X86::COND_E: return X86::COND_NE; + case X86::COND_NE: return X86::COND_E; + case X86::COND_L: return X86::COND_GE; + case X86::COND_LE: return X86::COND_G; + case X86::COND_G: return X86::COND_LE; + case X86::COND_GE: return X86::COND_L; + case X86::COND_B: return X86::COND_AE; + case X86::COND_BE: return X86::COND_A; + case X86::COND_A: return X86::COND_BE; + case X86::COND_AE: return X86::COND_B; + case X86::COND_S: return X86::COND_NS; + case X86::COND_NS: return X86::COND_S; + case X86::COND_P: return X86::COND_NP; + case X86::COND_NP: return X86::COND_P; + case X86::COND_O: return X86::COND_NO; + case X86::COND_NO: return X86::COND_O; + } +} + +bool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const { + const TargetInstrDesc &TID = MI->getDesc(); + if (!TID.isTerminator()) return false; + + // Conditional branch is a special case. + if (TID.isBranch() && !TID.isBarrier()) + return true; + if (!TID.isPredicable()) + return true; + return !isPredicated(MI); +} + +// For purposes of branch analysis do not count FP_REG_KILL as a terminator. +static bool isBrAnalysisUnpredicatedTerminator(const MachineInstr *MI, + const X86InstrInfo &TII) { + if (MI->getOpcode() == X86::FP_REG_KILL) + return false; + return TII.isUnpredicatedTerminator(MI); +} + +bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, + MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl &Cond, + bool AllowModify) const { + // Start from the bottom of the block and work up, examining the + // terminator instructions. + MachineBasicBlock::iterator I = MBB.end(); + while (I != MBB.begin()) { + --I; + // Working from the bottom, when we see a non-terminator + // instruction, we're done. + if (!isBrAnalysisUnpredicatedTerminator(I, *this)) + break; + // A terminator that isn't a branch can't easily be handled + // by this analysis. + if (!I->getDesc().isBranch()) + return true; + // Handle unconditional branches. + if (I->getOpcode() == X86::JMP) { + if (!AllowModify) { + TBB = I->getOperand(0).getMBB(); + continue; + } + + // If the block has any instructions after a JMP, delete them. + while (next(I) != MBB.end()) + next(I)->eraseFromParent(); + Cond.clear(); + FBB = 0; + // Delete the JMP if it's equivalent to a fall-through. + if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) { + TBB = 0; + I->eraseFromParent(); + I = MBB.end(); + continue; + } + // TBB is used to indicate the unconditinal destination. + TBB = I->getOperand(0).getMBB(); + continue; + } + // Handle conditional branches. + X86::CondCode BranchCode = GetCondFromBranchOpc(I->getOpcode()); + if (BranchCode == X86::COND_INVALID) + return true; // Can't handle indirect branch. + // Working from the bottom, handle the first conditional branch. + if (Cond.empty()) { + FBB = TBB; + TBB = I->getOperand(0).getMBB(); + Cond.push_back(MachineOperand::CreateImm(BranchCode)); + continue; + } + // Handle subsequent conditional branches. Only handle the case + // where all conditional branches branch to the same destination + // and their condition opcodes fit one of the special + // multi-branch idioms. + assert(Cond.size() == 1); + assert(TBB); + // Only handle the case where all conditional branches branch to + // the same destination. + if (TBB != I->getOperand(0).getMBB()) + return true; + X86::CondCode OldBranchCode = (X86::CondCode)Cond[0].getImm(); + // If the conditions are the same, we can leave them alone. + if (OldBranchCode == BranchCode) + continue; + // If they differ, see if they fit one of the known patterns. + // Theoretically we could handle more patterns here, but + // we shouldn't expect to see them if instruction selection + // has done a reasonable job. + if ((OldBranchCode == X86::COND_NP && + BranchCode == X86::COND_E) || + (OldBranchCode == X86::COND_E && + BranchCode == X86::COND_NP)) + BranchCode = X86::COND_NP_OR_E; + else if ((OldBranchCode == X86::COND_P && + BranchCode == X86::COND_NE) || + (OldBranchCode == X86::COND_NE && + BranchCode == X86::COND_P)) + BranchCode = X86::COND_NE_OR_P; + else + return true; + // Update the MachineOperand. + Cond[0].setImm(BranchCode); + } + + return false; +} + +unsigned X86InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { + MachineBasicBlock::iterator I = MBB.end(); + unsigned Count = 0; + + while (I != MBB.begin()) { + --I; + if (I->getOpcode() != X86::JMP && + GetCondFromBranchOpc(I->getOpcode()) == X86::COND_INVALID) + break; + // Remove the branch. + I->eraseFromParent(); + I = MBB.end(); + ++Count; + } + + return Count; +} + +unsigned +X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + const SmallVectorImpl &Cond) const { + // FIXME this should probably have a DebugLoc operand + DebugLoc dl = DebugLoc::getUnknownLoc(); + // Shouldn't be a fall through. + assert(TBB && "InsertBranch must not be told to insert a fallthrough"); + assert((Cond.size() == 1 || Cond.size() == 0) && + "X86 branch conditions have one component!"); + + if (Cond.empty()) { + // Unconditional branch? + assert(!FBB && "Unconditional branch with multiple successors!"); + BuildMI(&MBB, dl, get(X86::JMP)).addMBB(TBB); + return 1; + } + + // Conditional branch. + unsigned Count = 0; + X86::CondCode CC = (X86::CondCode)Cond[0].getImm(); + switch (CC) { + case X86::COND_NP_OR_E: + // Synthesize NP_OR_E with two branches. + BuildMI(&MBB, dl, get(X86::JNP)).addMBB(TBB); + ++Count; + BuildMI(&MBB, dl, get(X86::JE)).addMBB(TBB); + ++Count; + break; + case X86::COND_NE_OR_P: + // Synthesize NE_OR_P with two branches. + BuildMI(&MBB, dl, get(X86::JNE)).addMBB(TBB); + ++Count; + BuildMI(&MBB, dl, get(X86::JP)).addMBB(TBB); + ++Count; + break; + default: { + unsigned Opc = GetCondBranchFromCond(CC); + BuildMI(&MBB, dl, get(Opc)).addMBB(TBB); + ++Count; + } + } + if (FBB) { + // Two-way Conditional branch. Insert the second branch. + BuildMI(&MBB, dl, get(X86::JMP)).addMBB(FBB); + ++Count; + } + return Count; +} + +/// isHReg - Test if the given register is a physical h register. +static bool isHReg(unsigned Reg) { + return X86::GR8_ABCD_HRegClass.contains(Reg); +} + +bool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned DestReg, unsigned SrcReg, + const TargetRegisterClass *DestRC, + const TargetRegisterClass *SrcRC) const { + DebugLoc DL = DebugLoc::getUnknownLoc(); + if (MI != MBB.end()) DL = MI->getDebugLoc(); + + // Determine if DstRC and SrcRC have a common superclass in common. + const TargetRegisterClass *CommonRC = DestRC; + if (DestRC == SrcRC) + /* Source and destination have the same register class. */; + else if (CommonRC->hasSuperClass(SrcRC)) + CommonRC = SrcRC; + else if (!DestRC->hasSubClass(SrcRC)) + CommonRC = 0; + + if (CommonRC) { + unsigned Opc; + if (CommonRC == &X86::GR64RegClass) { + Opc = X86::MOV64rr; + } else if (CommonRC == &X86::GR32RegClass) { + Opc = X86::MOV32rr; + } else if (CommonRC == &X86::GR16RegClass) { + Opc = X86::MOV16rr; + } else if (CommonRC == &X86::GR8RegClass) { + // Copying to or from a physical H register on x86-64 requires a NOREX + // move. Otherwise use a normal move. + if ((isHReg(DestReg) || isHReg(SrcReg)) && + TM.getSubtarget().is64Bit()) + Opc = X86::MOV8rr_NOREX; + else + Opc = X86::MOV8rr; + } else if (CommonRC == &X86::GR64_ABCDRegClass) { + Opc = X86::MOV64rr; + } else if (CommonRC == &X86::GR32_ABCDRegClass) { + Opc = X86::MOV32rr; + } else if (CommonRC == &X86::GR16_ABCDRegClass) { + Opc = X86::MOV16rr; + } else if (CommonRC == &X86::GR8_ABCD_LRegClass) { + Opc = X86::MOV8rr; + } else if (CommonRC == &X86::GR8_ABCD_HRegClass) { + if (TM.getSubtarget().is64Bit()) + Opc = X86::MOV8rr_NOREX; + else + Opc = X86::MOV8rr; + } else if (CommonRC == &X86::GR64_NOREXRegClass) { + Opc = X86::MOV64rr; + } else if (CommonRC == &X86::GR32_NOREXRegClass) { + Opc = X86::MOV32rr; + } else if (CommonRC == &X86::GR16_NOREXRegClass) { + Opc = X86::MOV16rr; + } else if (CommonRC == &X86::GR8_NOREXRegClass) { + Opc = X86::MOV8rr; + } else if (CommonRC == &X86::RFP32RegClass) { + Opc = X86::MOV_Fp3232; + } else if (CommonRC == &X86::RFP64RegClass || CommonRC == &X86::RSTRegClass) { + Opc = X86::MOV_Fp6464; + } else if (CommonRC == &X86::RFP80RegClass) { + Opc = X86::MOV_Fp8080; + } else if (CommonRC == &X86::FR32RegClass) { + Opc = X86::FsMOVAPSrr; + } else if (CommonRC == &X86::FR64RegClass) { + Opc = X86::FsMOVAPDrr; + } else if (CommonRC == &X86::VR128RegClass) { + Opc = X86::MOVAPSrr; + } else if (CommonRC == &X86::VR64RegClass) { + Opc = X86::MMX_MOVQ64rr; + } else { + return false; + } + BuildMI(MBB, MI, DL, get(Opc), DestReg).addReg(SrcReg); + return true; + } + + // Moving EFLAGS to / from another register requires a push and a pop. + if (SrcRC == &X86::CCRRegClass) { + if (SrcReg != X86::EFLAGS) + return false; + if (DestRC == &X86::GR64RegClass) { + BuildMI(MBB, MI, DL, get(X86::PUSHFQ)); + BuildMI(MBB, MI, DL, get(X86::POP64r), DestReg); + return true; + } else if (DestRC == &X86::GR32RegClass) { + BuildMI(MBB, MI, DL, get(X86::PUSHFD)); + BuildMI(MBB, MI, DL, get(X86::POP32r), DestReg); + return true; + } + } else if (DestRC == &X86::CCRRegClass) { + if (DestReg != X86::EFLAGS) + return false; + if (SrcRC == &X86::GR64RegClass) { + BuildMI(MBB, MI, DL, get(X86::PUSH64r)).addReg(SrcReg); + BuildMI(MBB, MI, DL, get(X86::POPFQ)); + return true; + } else if (SrcRC == &X86::GR32RegClass) { + BuildMI(MBB, MI, DL, get(X86::PUSH32r)).addReg(SrcReg); + BuildMI(MBB, MI, DL, get(X86::POPFD)); + return true; + } + } + + // Moving from ST(0) turns into FpGET_ST0_32 etc. + if (SrcRC == &X86::RSTRegClass) { + // Copying from ST(0)/ST(1). + if (SrcReg != X86::ST0 && SrcReg != X86::ST1) + // Can only copy from ST(0)/ST(1) right now + return false; + bool isST0 = SrcReg == X86::ST0; + unsigned Opc; + if (DestRC == &X86::RFP32RegClass) + Opc = isST0 ? X86::FpGET_ST0_32 : X86::FpGET_ST1_32; + else if (DestRC == &X86::RFP64RegClass) + Opc = isST0 ? X86::FpGET_ST0_64 : X86::FpGET_ST1_64; + else { + if (DestRC != &X86::RFP80RegClass) + return false; + Opc = isST0 ? X86::FpGET_ST0_80 : X86::FpGET_ST1_80; + } + BuildMI(MBB, MI, DL, get(Opc), DestReg); + return true; + } + + // Moving to ST(0) turns into FpSET_ST0_32 etc. + if (DestRC == &X86::RSTRegClass) { + // Copying to ST(0) / ST(1). + if (DestReg != X86::ST0 && DestReg != X86::ST1) + // Can only copy to TOS right now + return false; + bool isST0 = DestReg == X86::ST0; + unsigned Opc; + if (SrcRC == &X86::RFP32RegClass) + Opc = isST0 ? X86::FpSET_ST0_32 : X86::FpSET_ST1_32; + else if (SrcRC == &X86::RFP64RegClass) + Opc = isST0 ? X86::FpSET_ST0_64 : X86::FpSET_ST1_64; + else { + if (SrcRC != &X86::RFP80RegClass) + return false; + Opc = isST0 ? X86::FpSET_ST0_80 : X86::FpSET_ST1_80; + } + BuildMI(MBB, MI, DL, get(Opc)).addReg(SrcReg); + return true; + } + + // Not yet supported! + return false; +} + +static unsigned getStoreRegOpcode(unsigned SrcReg, + const TargetRegisterClass *RC, + bool isStackAligned, + TargetMachine &TM) { + unsigned Opc = 0; + if (RC == &X86::GR64RegClass) { + Opc = X86::MOV64mr; + } else if (RC == &X86::GR32RegClass) { + Opc = X86::MOV32mr; + } else if (RC == &X86::GR16RegClass) { + Opc = X86::MOV16mr; + } else if (RC == &X86::GR8RegClass) { + // Copying to or from a physical H register on x86-64 requires a NOREX + // move. Otherwise use a normal move. + if (isHReg(SrcReg) && + TM.getSubtarget().is64Bit()) + Opc = X86::MOV8mr_NOREX; + else + Opc = X86::MOV8mr; + } else if (RC == &X86::GR64_ABCDRegClass) { + Opc = X86::MOV64mr; + } else if (RC == &X86::GR32_ABCDRegClass) { + Opc = X86::MOV32mr; + } else if (RC == &X86::GR16_ABCDRegClass) { + Opc = X86::MOV16mr; + } else if (RC == &X86::GR8_ABCD_LRegClass) { + Opc = X86::MOV8mr; + } else if (RC == &X86::GR8_ABCD_HRegClass) { + if (TM.getSubtarget().is64Bit()) + Opc = X86::MOV8mr_NOREX; + else + Opc = X86::MOV8mr; + } else if (RC == &X86::GR64_NOREXRegClass) { + Opc = X86::MOV64mr; + } else if (RC == &X86::GR32_NOREXRegClass) { + Opc = X86::MOV32mr; + } else if (RC == &X86::GR16_NOREXRegClass) { + Opc = X86::MOV16mr; + } else if (RC == &X86::GR8_NOREXRegClass) { + Opc = X86::MOV8mr; + } else if (RC == &X86::RFP80RegClass) { + Opc = X86::ST_FpP80m; // pops + } else if (RC == &X86::RFP64RegClass) { + Opc = X86::ST_Fp64m; + } else if (RC == &X86::RFP32RegClass) { + Opc = X86::ST_Fp32m; + } else if (RC == &X86::FR32RegClass) { + Opc = X86::MOVSSmr; + } else if (RC == &X86::FR64RegClass) { + Opc = X86::MOVSDmr; + } else if (RC == &X86::VR128RegClass) { + // If stack is realigned we can use aligned stores. + Opc = isStackAligned ? X86::MOVAPSmr : X86::MOVUPSmr; + } else if (RC == &X86::VR64RegClass) { + Opc = X86::MMX_MOVQ64mr; + } else { + assert(0 && "Unknown regclass"); + abort(); + } + + return Opc; +} + +void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned SrcReg, bool isKill, int FrameIdx, + const TargetRegisterClass *RC) const { + const MachineFunction &MF = *MBB.getParent(); + bool isAligned = (RI.getStackAlignment() >= 16) || + RI.needsStackRealignment(MF); + unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM); + DebugLoc DL = DebugLoc::getUnknownLoc(); + if (MI != MBB.end()) DL = MI->getDebugLoc(); + addFrameReference(BuildMI(MBB, MI, DL, get(Opc)), FrameIdx) + .addReg(SrcReg, getKillRegState(isKill)); +} + +void X86InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg, + bool isKill, + SmallVectorImpl &Addr, + const TargetRegisterClass *RC, + SmallVectorImpl &NewMIs) const { + bool isAligned = (RI.getStackAlignment() >= 16) || + RI.needsStackRealignment(MF); + unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM); + DebugLoc DL = DebugLoc::getUnknownLoc(); + MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc)); + for (unsigned i = 0, e = Addr.size(); i != e; ++i) + MIB.addOperand(Addr[i]); + MIB.addReg(SrcReg, getKillRegState(isKill)); + NewMIs.push_back(MIB); +} + +static unsigned getLoadRegOpcode(unsigned DestReg, + const TargetRegisterClass *RC, + bool isStackAligned, + const TargetMachine &TM) { + unsigned Opc = 0; + if (RC == &X86::GR64RegClass) { + Opc = X86::MOV64rm; + } else if (RC == &X86::GR32RegClass) { + Opc = X86::MOV32rm; + } else if (RC == &X86::GR16RegClass) { + Opc = X86::MOV16rm; + } else if (RC == &X86::GR8RegClass) { + // Copying to or from a physical H register on x86-64 requires a NOREX + // move. Otherwise use a normal move. + if (isHReg(DestReg) && + TM.getSubtarget().is64Bit()) + Opc = X86::MOV8rm_NOREX; + else + Opc = X86::MOV8rm; + } else if (RC == &X86::GR64_ABCDRegClass) { + Opc = X86::MOV64rm; + } else if (RC == &X86::GR32_ABCDRegClass) { + Opc = X86::MOV32rm; + } else if (RC == &X86::GR16_ABCDRegClass) { + Opc = X86::MOV16rm; + } else if (RC == &X86::GR8_ABCD_LRegClass) { + Opc = X86::MOV8rm; + } else if (RC == &X86::GR8_ABCD_HRegClass) { + if (TM.getSubtarget().is64Bit()) + Opc = X86::MOV8rm_NOREX; + else + Opc = X86::MOV8rm; + } else if (RC == &X86::GR64_NOREXRegClass) { + Opc = X86::MOV64rm; + } else if (RC == &X86::GR32_NOREXRegClass) { + Opc = X86::MOV32rm; + } else if (RC == &X86::GR16_NOREXRegClass) { + Opc = X86::MOV16rm; + } else if (RC == &X86::GR8_NOREXRegClass) { + Opc = X86::MOV8rm; + } else if (RC == &X86::RFP80RegClass) { + Opc = X86::LD_Fp80m; + } else if (RC == &X86::RFP64RegClass) { + Opc = X86::LD_Fp64m; + } else if (RC == &X86::RFP32RegClass) { + Opc = X86::LD_Fp32m; + } else if (RC == &X86::FR32RegClass) { + Opc = X86::MOVSSrm; + } else if (RC == &X86::FR64RegClass) { + Opc = X86::MOVSDrm; + } else if (RC == &X86::VR128RegClass) { + // If stack is realigned we can use aligned loads. + Opc = isStackAligned ? X86::MOVAPSrm : X86::MOVUPSrm; + } else if (RC == &X86::VR64RegClass) { + Opc = X86::MMX_MOVQ64rm; + } else { + assert(0 && "Unknown regclass"); + abort(); + } + + return Opc; +} + +void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned DestReg, int FrameIdx, + const TargetRegisterClass *RC) const{ + const MachineFunction &MF = *MBB.getParent(); + bool isAligned = (RI.getStackAlignment() >= 16) || + RI.needsStackRealignment(MF); + unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM); + DebugLoc DL = DebugLoc::getUnknownLoc(); + if (MI != MBB.end()) DL = MI->getDebugLoc(); + addFrameReference(BuildMI(MBB, MI, DL, get(Opc), DestReg), FrameIdx); +} + +void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg, + SmallVectorImpl &Addr, + const TargetRegisterClass *RC, + SmallVectorImpl &NewMIs) const { + bool isAligned = (RI.getStackAlignment() >= 16) || + RI.needsStackRealignment(MF); + unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM); + DebugLoc DL = DebugLoc::getUnknownLoc(); + MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg); + for (unsigned i = 0, e = Addr.size(); i != e; ++i) + MIB.addOperand(Addr[i]); + NewMIs.push_back(MIB); +} + +bool X86InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector &CSI) const { + if (CSI.empty()) + return false; + + DebugLoc DL = DebugLoc::getUnknownLoc(); + if (MI != MBB.end()) DL = MI->getDebugLoc(); + + bool is64Bit = TM.getSubtarget().is64Bit(); + unsigned SlotSize = is64Bit ? 8 : 4; + + MachineFunction &MF = *MBB.getParent(); + X86MachineFunctionInfo *X86FI = MF.getInfo(); + X86FI->setCalleeSavedFrameSize(CSI.size() * SlotSize); + + unsigned Opc = is64Bit ? X86::PUSH64r : X86::PUSH32r; + for (unsigned i = CSI.size(); i != 0; --i) { + unsigned Reg = CSI[i-1].getReg(); + // Add the callee-saved register as live-in. It's killed at the spill. + MBB.addLiveIn(Reg); + BuildMI(MBB, MI, DL, get(Opc)) + .addReg(Reg, RegState::Kill); + } + return true; +} + +bool X86InstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector &CSI) const { + if (CSI.empty()) + return false; + + DebugLoc DL = DebugLoc::getUnknownLoc(); + if (MI != MBB.end()) DL = MI->getDebugLoc(); + + bool is64Bit = TM.getSubtarget().is64Bit(); + + unsigned Opc = is64Bit ? X86::POP64r : X86::POP32r; + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + unsigned Reg = CSI[i].getReg(); + BuildMI(MBB, MI, DL, get(Opc), Reg); + } + return true; +} + +static MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode, + const SmallVectorImpl &MOs, + MachineInstr *MI, + const TargetInstrInfo &TII) { + // Create the base instruction with the memory operand as the first part. + MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(Opcode), + MI->getDebugLoc(), true); + MachineInstrBuilder MIB(NewMI); + unsigned NumAddrOps = MOs.size(); + for (unsigned i = 0; i != NumAddrOps; ++i) + MIB.addOperand(MOs[i]); + if (NumAddrOps < 4) // FrameIndex only + addOffset(MIB, 0); + + // Loop over the rest of the ri operands, converting them over. + unsigned NumOps = MI->getDesc().getNumOperands()-2; + for (unsigned i = 0; i != NumOps; ++i) { + MachineOperand &MO = MI->getOperand(i+2); + MIB.addOperand(MO); + } + for (unsigned i = NumOps+2, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + MIB.addOperand(MO); + } + return MIB; +} + +static MachineInstr *FuseInst(MachineFunction &MF, + unsigned Opcode, unsigned OpNo, + const SmallVectorImpl &MOs, + MachineInstr *MI, const TargetInstrInfo &TII) { + MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(Opcode), + MI->getDebugLoc(), true); + MachineInstrBuilder MIB(NewMI); + + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (i == OpNo) { + assert(MO.isReg() && "Expected to fold into reg operand!"); + unsigned NumAddrOps = MOs.size(); + for (unsigned i = 0; i != NumAddrOps; ++i) + MIB.addOperand(MOs[i]); + if (NumAddrOps < 4) // FrameIndex only + addOffset(MIB, 0); + } else { + MIB.addOperand(MO); + } + } + return MIB; +} + +static MachineInstr *MakeM0Inst(const TargetInstrInfo &TII, unsigned Opcode, + const SmallVectorImpl &MOs, + MachineInstr *MI) { + MachineFunction &MF = *MI->getParent()->getParent(); + MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), TII.get(Opcode)); + + unsigned NumAddrOps = MOs.size(); + for (unsigned i = 0; i != NumAddrOps; ++i) + MIB.addOperand(MOs[i]); + if (NumAddrOps < 4) // FrameIndex only + addOffset(MIB, 0); + return MIB.addImm(0); +} + +MachineInstr* +X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr *MI, unsigned i, + const SmallVectorImpl &MOs) const{ + const DenseMap *OpcodeTablePtr = NULL; + bool isTwoAddrFold = false; + unsigned NumOps = MI->getDesc().getNumOperands(); + bool isTwoAddr = NumOps > 1 && + MI->getDesc().getOperandConstraint(1, TOI::TIED_TO) != -1; + + MachineInstr *NewMI = NULL; + // Folding a memory location into the two-address part of a two-address + // instruction is different than folding it other places. It requires + // replacing the *two* registers with the memory location. + if (isTwoAddr && NumOps >= 2 && i < 2 && + MI->getOperand(0).isReg() && + MI->getOperand(1).isReg() && + MI->getOperand(0).getReg() == MI->getOperand(1).getReg()) { + OpcodeTablePtr = &RegOp2MemOpTable2Addr; + isTwoAddrFold = true; + } else if (i == 0) { // If operand 0 + if (MI->getOpcode() == X86::MOV16r0) + NewMI = MakeM0Inst(*this, X86::MOV16mi, MOs, MI); + else if (MI->getOpcode() == X86::MOV32r0) + NewMI = MakeM0Inst(*this, X86::MOV32mi, MOs, MI); + else if (MI->getOpcode() == X86::MOV64r0) + NewMI = MakeM0Inst(*this, X86::MOV64mi32, MOs, MI); + else if (MI->getOpcode() == X86::MOV8r0) + NewMI = MakeM0Inst(*this, X86::MOV8mi, MOs, MI); + if (NewMI) + return NewMI; + + OpcodeTablePtr = &RegOp2MemOpTable0; + } else if (i == 1) { + OpcodeTablePtr = &RegOp2MemOpTable1; + } else if (i == 2) { + OpcodeTablePtr = &RegOp2MemOpTable2; + } + + // If table selected... + if (OpcodeTablePtr) { + // Find the Opcode to fuse + DenseMap::iterator I = + OpcodeTablePtr->find((unsigned*)MI->getOpcode()); + if (I != OpcodeTablePtr->end()) { + if (isTwoAddrFold) + NewMI = FuseTwoAddrInst(MF, I->second, MOs, MI, *this); + else + NewMI = FuseInst(MF, I->second, i, MOs, MI, *this); + return NewMI; + } + } + + // No fusion + if (PrintFailedFusing) + cerr << "We failed to fuse operand " << i << " in " << *MI; + return NULL; +} + + +MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr *MI, + const SmallVectorImpl &Ops, + int FrameIndex) const { + // Check switch flag + if (NoFusing) return NULL; + + const MachineFrameInfo *MFI = MF.getFrameInfo(); + unsigned Alignment = MFI->getObjectAlignment(FrameIndex); + // FIXME: Move alignment requirement into tables? + if (Alignment < 16) { + switch (MI->getOpcode()) { + default: break; + // Not always safe to fold movsd into these instructions since their load + // folding variants expects the address to be 16 byte aligned. + case X86::FsANDNPDrr: + case X86::FsANDNPSrr: + case X86::FsANDPDrr: + case X86::FsANDPSrr: + case X86::FsORPDrr: + case X86::FsORPSrr: + case X86::FsXORPDrr: + case X86::FsXORPSrr: + return NULL; + } + } + + if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) { + unsigned NewOpc = 0; + switch (MI->getOpcode()) { + default: return NULL; + case X86::TEST8rr: NewOpc = X86::CMP8ri; break; + case X86::TEST16rr: NewOpc = X86::CMP16ri; break; + case X86::TEST32rr: NewOpc = X86::CMP32ri; break; + case X86::TEST64rr: NewOpc = X86::CMP64ri32; break; + } + // Change to CMPXXri r, 0 first. + MI->setDesc(get(NewOpc)); + MI->getOperand(1).ChangeToImmediate(0); + } else if (Ops.size() != 1) + return NULL; + + SmallVector MOs; + MOs.push_back(MachineOperand::CreateFI(FrameIndex)); + return foldMemoryOperandImpl(MF, MI, Ops[0], MOs); +} + +MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr *MI, + const SmallVectorImpl &Ops, + MachineInstr *LoadMI) const { + // Check switch flag + if (NoFusing) return NULL; + + // Determine the alignment of the load. + unsigned Alignment = 0; + if (LoadMI->hasOneMemOperand()) + Alignment = LoadMI->memoperands_begin()->getAlignment(); + + // FIXME: Move alignment requirement into tables? + if (Alignment < 16) { + switch (MI->getOpcode()) { + default: break; + // Not always safe to fold movsd into these instructions since their load + // folding variants expects the address to be 16 byte aligned. + case X86::FsANDNPDrr: + case X86::FsANDNPSrr: + case X86::FsANDPDrr: + case X86::FsANDPSrr: + case X86::FsORPDrr: + case X86::FsORPSrr: + case X86::FsXORPDrr: + case X86::FsXORPSrr: + return NULL; + } + } + + if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) { + unsigned NewOpc = 0; + switch (MI->getOpcode()) { + default: return NULL; + case X86::TEST8rr: NewOpc = X86::CMP8ri; break; + case X86::TEST16rr: NewOpc = X86::CMP16ri; break; + case X86::TEST32rr: NewOpc = X86::CMP32ri; break; + case X86::TEST64rr: NewOpc = X86::CMP64ri32; break; + } + // Change to CMPXXri r, 0 first. + MI->setDesc(get(NewOpc)); + MI->getOperand(1).ChangeToImmediate(0); + } else if (Ops.size() != 1) + return NULL; + + SmallVector MOs; + if (LoadMI->getOpcode() == X86::V_SET0 || + LoadMI->getOpcode() == X86::V_SETALLONES) { + // Folding a V_SET0 or V_SETALLONES as a load, to ease register pressure. + // Create a constant-pool entry and operands to load from it. + + // x86-32 PIC requires a PIC base register for constant pools. + unsigned PICBase = 0; + if (TM.getRelocationModel() == Reloc::PIC_ && + !TM.getSubtarget().is64Bit()) + // FIXME: PICBase = TM.getInstrInfo()->getGlobalBaseReg(&MF); + // This doesn't work for several reasons. + // 1. GlobalBaseReg may have been spilled. + // 2. It may not be live at MI. + return false; + + // Create a v4i32 constant-pool entry. + MachineConstantPool &MCP = *MF.getConstantPool(); + const VectorType *Ty = VectorType::get(Type::Int32Ty, 4); + Constant *C = LoadMI->getOpcode() == X86::V_SET0 ? + ConstantVector::getNullValue(Ty) : + ConstantVector::getAllOnesValue(Ty); + unsigned CPI = MCP.getConstantPoolIndex(C, 16); + + // Create operands to load from the constant pool entry. + MOs.push_back(MachineOperand::CreateReg(PICBase, false)); + MOs.push_back(MachineOperand::CreateImm(1)); + MOs.push_back(MachineOperand::CreateReg(0, false)); + MOs.push_back(MachineOperand::CreateCPI(CPI, 0)); + MOs.push_back(MachineOperand::CreateReg(0, false)); + } else { + // Folding a normal load. Just copy the load's address operands. + unsigned NumOps = LoadMI->getDesc().getNumOperands(); + for (unsigned i = NumOps - X86AddrNumOperands; i != NumOps; ++i) + MOs.push_back(LoadMI->getOperand(i)); + } + return foldMemoryOperandImpl(MF, MI, Ops[0], MOs); +} + + +bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI, + const SmallVectorImpl &Ops) const { + // Check switch flag + if (NoFusing) return 0; + + if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) { + switch (MI->getOpcode()) { + default: return false; + case X86::TEST8rr: + case X86::TEST16rr: + case X86::TEST32rr: + case X86::TEST64rr: + return true; + } + } + + if (Ops.size() != 1) + return false; + + unsigned OpNum = Ops[0]; + unsigned Opc = MI->getOpcode(); + unsigned NumOps = MI->getDesc().getNumOperands(); + bool isTwoAddr = NumOps > 1 && + MI->getDesc().getOperandConstraint(1, TOI::TIED_TO) != -1; + + // Folding a memory location into the two-address part of a two-address + // instruction is different than folding it other places. It requires + // replacing the *two* registers with the memory location. + const DenseMap *OpcodeTablePtr = NULL; + if (isTwoAddr && NumOps >= 2 && OpNum < 2) { + OpcodeTablePtr = &RegOp2MemOpTable2Addr; + } else if (OpNum == 0) { // If operand 0 + switch (Opc) { + case X86::MOV16r0: + case X86::MOV32r0: + case X86::MOV64r0: + case X86::MOV8r0: + return true; + default: break; + } + OpcodeTablePtr = &RegOp2MemOpTable0; + } else if (OpNum == 1) { + OpcodeTablePtr = &RegOp2MemOpTable1; + } else if (OpNum == 2) { + OpcodeTablePtr = &RegOp2MemOpTable2; + } + + if (OpcodeTablePtr) { + // Find the Opcode to fuse + DenseMap::iterator I = + OpcodeTablePtr->find((unsigned*)Opc); + if (I != OpcodeTablePtr->end()) + return true; + } + return false; +} + +bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, + unsigned Reg, bool UnfoldLoad, bool UnfoldStore, + SmallVectorImpl &NewMIs) const { + DenseMap >::iterator I = + MemOp2RegOpTable.find((unsigned*)MI->getOpcode()); + if (I == MemOp2RegOpTable.end()) + return false; + DebugLoc dl = MI->getDebugLoc(); + unsigned Opc = I->second.first; + unsigned Index = I->second.second & 0xf; + bool FoldedLoad = I->second.second & (1 << 4); + bool FoldedStore = I->second.second & (1 << 5); + if (UnfoldLoad && !FoldedLoad) + return false; + UnfoldLoad &= FoldedLoad; + if (UnfoldStore && !FoldedStore) + return false; + UnfoldStore &= FoldedStore; + + const TargetInstrDesc &TID = get(Opc); + const TargetOperandInfo &TOI = TID.OpInfo[Index]; + const TargetRegisterClass *RC = TOI.isLookupPtrRegClass() + ? RI.getPointerRegClass() : RI.getRegClass(TOI.RegClass); + SmallVector AddrOps; + SmallVector BeforeOps; + SmallVector AfterOps; + SmallVector ImpOps; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &Op = MI->getOperand(i); + if (i >= Index && i < Index + X86AddrNumOperands) + AddrOps.push_back(Op); + else if (Op.isReg() && Op.isImplicit()) + ImpOps.push_back(Op); + else if (i < Index) + BeforeOps.push_back(Op); + else if (i > Index) + AfterOps.push_back(Op); + } + + // Emit the load instruction. + if (UnfoldLoad) { + loadRegFromAddr(MF, Reg, AddrOps, RC, NewMIs); + if (UnfoldStore) { + // Address operands cannot be marked isKill. + for (unsigned i = 1; i != 1 + X86AddrNumOperands; ++i) { + MachineOperand &MO = NewMIs[0]->getOperand(i); + if (MO.isReg()) + MO.setIsKill(false); + } + } + } + + // Emit the data processing instruction. + MachineInstr *DataMI = MF.CreateMachineInstr(TID, MI->getDebugLoc(), true); + MachineInstrBuilder MIB(DataMI); + + if (FoldedStore) + MIB.addReg(Reg, RegState::Define); + for (unsigned i = 0, e = BeforeOps.size(); i != e; ++i) + MIB.addOperand(BeforeOps[i]); + if (FoldedLoad) + MIB.addReg(Reg); + for (unsigned i = 0, e = AfterOps.size(); i != e; ++i) + MIB.addOperand(AfterOps[i]); + for (unsigned i = 0, e = ImpOps.size(); i != e; ++i) { + MachineOperand &MO = ImpOps[i]; + MIB.addReg(MO.getReg(), + getDefRegState(MO.isDef()) | + RegState::Implicit | + getKillRegState(MO.isKill()) | + getDeadRegState(MO.isDead())); + } + // Change CMP32ri r, 0 back to TEST32rr r, r, etc. + unsigned NewOpc = 0; + switch (DataMI->getOpcode()) { + default: break; + case X86::CMP64ri32: + case X86::CMP32ri: + case X86::CMP16ri: + case X86::CMP8ri: { + MachineOperand &MO0 = DataMI->getOperand(0); + MachineOperand &MO1 = DataMI->getOperand(1); + if (MO1.getImm() == 0) { + switch (DataMI->getOpcode()) { + default: break; + case X86::CMP64ri32: NewOpc = X86::TEST64rr; break; + case X86::CMP32ri: NewOpc = X86::TEST32rr; break; + case X86::CMP16ri: NewOpc = X86::TEST16rr; break; + case X86::CMP8ri: NewOpc = X86::TEST8rr; break; + } + DataMI->setDesc(get(NewOpc)); + MO1.ChangeToRegister(MO0.getReg(), false); + } + } + } + NewMIs.push_back(DataMI); + + // Emit the store instruction. + if (UnfoldStore) { + const TargetOperandInfo &DstTOI = TID.OpInfo[0]; + const TargetRegisterClass *DstRC = DstTOI.isLookupPtrRegClass() + ? RI.getPointerRegClass() : RI.getRegClass(DstTOI.RegClass); + storeRegToAddr(MF, Reg, true, AddrOps, DstRC, NewMIs); + } + + return true; +} + +bool +X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, + SmallVectorImpl &NewNodes) const { + if (!N->isMachineOpcode()) + return false; + + DenseMap >::iterator I = + MemOp2RegOpTable.find((unsigned*)N->getMachineOpcode()); + if (I == MemOp2RegOpTable.end()) + return false; + unsigned Opc = I->second.first; + unsigned Index = I->second.second & 0xf; + bool FoldedLoad = I->second.second & (1 << 4); + bool FoldedStore = I->second.second & (1 << 5); + const TargetInstrDesc &TID = get(Opc); + const TargetOperandInfo &TOI = TID.OpInfo[Index]; + const TargetRegisterClass *RC = TOI.isLookupPtrRegClass() + ? RI.getPointerRegClass() : RI.getRegClass(TOI.RegClass); + unsigned NumDefs = TID.NumDefs; + std::vector AddrOps; + std::vector BeforeOps; + std::vector AfterOps; + DebugLoc dl = N->getDebugLoc(); + unsigned NumOps = N->getNumOperands(); + for (unsigned i = 0; i != NumOps-1; ++i) { + SDValue Op = N->getOperand(i); + if (i >= Index-NumDefs && i < Index-NumDefs + X86AddrNumOperands) + AddrOps.push_back(Op); + else if (i < Index-NumDefs) + BeforeOps.push_back(Op); + else if (i > Index-NumDefs) + AfterOps.push_back(Op); + } + SDValue Chain = N->getOperand(NumOps-1); + AddrOps.push_back(Chain); + + // Emit the load instruction. + SDNode *Load = 0; + const MachineFunction &MF = DAG.getMachineFunction(); + if (FoldedLoad) { + MVT VT = *RC->vt_begin(); + bool isAligned = (RI.getStackAlignment() >= 16) || + RI.needsStackRealignment(MF); + Load = DAG.getTargetNode(getLoadRegOpcode(0, RC, isAligned, TM), dl, + VT, MVT::Other, &AddrOps[0], AddrOps.size()); + NewNodes.push_back(Load); + } + + // Emit the data processing instruction. + std::vector VTs; + const TargetRegisterClass *DstRC = 0; + if (TID.getNumDefs() > 0) { + const TargetOperandInfo &DstTOI = TID.OpInfo[0]; + DstRC = DstTOI.isLookupPtrRegClass() + ? RI.getPointerRegClass() : RI.getRegClass(DstTOI.RegClass); + VTs.push_back(*DstRC->vt_begin()); + } + for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { + MVT VT = N->getValueType(i); + if (VT != MVT::Other && i >= (unsigned)TID.getNumDefs()) + VTs.push_back(VT); + } + if (Load) + BeforeOps.push_back(SDValue(Load, 0)); + std::copy(AfterOps.begin(), AfterOps.end(), std::back_inserter(BeforeOps)); + SDNode *NewNode= DAG.getTargetNode(Opc, dl, VTs, &BeforeOps[0], + BeforeOps.size()); + NewNodes.push_back(NewNode); + + // Emit the store instruction. + if (FoldedStore) { + AddrOps.pop_back(); + AddrOps.push_back(SDValue(NewNode, 0)); + AddrOps.push_back(Chain); + bool isAligned = (RI.getStackAlignment() >= 16) || + RI.needsStackRealignment(MF); + SDNode *Store = DAG.getTargetNode(getStoreRegOpcode(0, DstRC, + isAligned, TM), + dl, MVT::Other, + &AddrOps[0], AddrOps.size()); + NewNodes.push_back(Store); + } + + return true; +} + +unsigned X86InstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc, + bool UnfoldLoad, bool UnfoldStore) const { + DenseMap >::iterator I = + MemOp2RegOpTable.find((unsigned*)Opc); + if (I == MemOp2RegOpTable.end()) + return 0; + bool FoldedLoad = I->second.second & (1 << 4); + bool FoldedStore = I->second.second & (1 << 5); + if (UnfoldLoad && !FoldedLoad) + return 0; + if (UnfoldStore && !FoldedStore) + return 0; + return I->second.first; +} + +bool X86InstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const { + if (MBB.empty()) return false; + + switch (MBB.back().getOpcode()) { + case X86::TCRETURNri: + case X86::TCRETURNdi: + case X86::RET: // Return. + case X86::RETI: + case X86::TAILJMPd: + case X86::TAILJMPr: + case X86::TAILJMPm: + case X86::JMP: // Uncond branch. + case X86::JMP32r: // Indirect branch. + case X86::JMP64r: // Indirect branch (64-bit). + case X86::JMP32m: // Indirect branch through mem. + case X86::JMP64m: // Indirect branch through mem (64-bit). + return true; + default: return false; + } +} + +bool X86InstrInfo:: +ReverseBranchCondition(SmallVectorImpl &Cond) const { + assert(Cond.size() == 1 && "Invalid X86 branch condition!"); + X86::CondCode CC = static_cast(Cond[0].getImm()); + if (CC == X86::COND_NE_OR_P || CC == X86::COND_NP_OR_E) + return true; + Cond[0].setImm(GetOppositeBranchCondition(CC)); + return false; +} + +bool X86InstrInfo:: +isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const { + // FIXME: Return false for x87 stack register classes for now. We can't + // allow any loads of these registers before FpGet_ST0_80. + return !(RC == &X86::CCRRegClass || RC == &X86::RFP32RegClass || + RC == &X86::RFP64RegClass || RC == &X86::RFP80RegClass); +} + +unsigned X86InstrInfo::sizeOfImm(const TargetInstrDesc *Desc) { + switch (Desc->TSFlags & X86II::ImmMask) { + case X86II::Imm8: return 1; + case X86II::Imm16: return 2; + case X86II::Imm32: return 4; + case X86II::Imm64: return 8; + default: assert(0 && "Immediate size not set!"); + return 0; + } +} + +/// isX86_64ExtendedReg - Is the MachineOperand a x86-64 extended register? +/// e.g. r8, xmm8, etc. +bool X86InstrInfo::isX86_64ExtendedReg(const MachineOperand &MO) { + if (!MO.isReg()) return false; + switch (MO.getReg()) { + default: break; + case X86::R8: case X86::R9: case X86::R10: case X86::R11: + case X86::R12: case X86::R13: case X86::R14: case X86::R15: + case X86::R8D: case X86::R9D: case X86::R10D: case X86::R11D: + case X86::R12D: case X86::R13D: case X86::R14D: case X86::R15D: + case X86::R8W: case X86::R9W: case X86::R10W: case X86::R11W: + case X86::R12W: case X86::R13W: case X86::R14W: case X86::R15W: + case X86::R8B: case X86::R9B: case X86::R10B: case X86::R11B: + case X86::R12B: case X86::R13B: case X86::R14B: case X86::R15B: + case X86::XMM8: case X86::XMM9: case X86::XMM10: case X86::XMM11: + case X86::XMM12: case X86::XMM13: case X86::XMM14: case X86::XMM15: + return true; + } + return false; +} + + +/// determineREX - Determine if the MachineInstr has to be encoded with a X86-64 +/// REX prefix which specifies 1) 64-bit instructions, 2) non-default operand +/// size, and 3) use of X86-64 extended registers. +unsigned X86InstrInfo::determineREX(const MachineInstr &MI) { + unsigned REX = 0; + const TargetInstrDesc &Desc = MI.getDesc(); + + // Pseudo instructions do not need REX prefix byte. + if ((Desc.TSFlags & X86II::FormMask) == X86II::Pseudo) + return 0; + if (Desc.TSFlags & X86II::REX_W) + REX |= 1 << 3; + + unsigned NumOps = Desc.getNumOperands(); + if (NumOps) { + bool isTwoAddr = NumOps > 1 && + Desc.getOperandConstraint(1, TOI::TIED_TO) != -1; + + // If it accesses SPL, BPL, SIL, or DIL, then it requires a 0x40 REX prefix. + unsigned i = isTwoAddr ? 1 : 0; + for (unsigned e = NumOps; i != e; ++i) { + const MachineOperand& MO = MI.getOperand(i); + if (MO.isReg()) { + unsigned Reg = MO.getReg(); + if (isX86_64NonExtLowByteReg(Reg)) + REX |= 0x40; + } + } + + switch (Desc.TSFlags & X86II::FormMask) { + case X86II::MRMInitReg: + if (isX86_64ExtendedReg(MI.getOperand(0))) + REX |= (1 << 0) | (1 << 2); + break; + case X86II::MRMSrcReg: { + if (isX86_64ExtendedReg(MI.getOperand(0))) + REX |= 1 << 2; + i = isTwoAddr ? 2 : 1; + for (unsigned e = NumOps; i != e; ++i) { + const MachineOperand& MO = MI.getOperand(i); + if (isX86_64ExtendedReg(MO)) + REX |= 1 << 0; + } + break; + } + case X86II::MRMSrcMem: { + if (isX86_64ExtendedReg(MI.getOperand(0))) + REX |= 1 << 2; + unsigned Bit = 0; + i = isTwoAddr ? 2 : 1; + for (; i != NumOps; ++i) { + const MachineOperand& MO = MI.getOperand(i); + if (MO.isReg()) { + if (isX86_64ExtendedReg(MO)) + REX |= 1 << Bit; + Bit++; + } + } + break; + } + case X86II::MRM0m: case X86II::MRM1m: + case X86II::MRM2m: case X86II::MRM3m: + case X86II::MRM4m: case X86II::MRM5m: + case X86II::MRM6m: case X86II::MRM7m: + case X86II::MRMDestMem: { + unsigned e = (isTwoAddr ? X86AddrNumOperands+1 : X86AddrNumOperands); + i = isTwoAddr ? 1 : 0; + if (NumOps > e && isX86_64ExtendedReg(MI.getOperand(e))) + REX |= 1 << 2; + unsigned Bit = 0; + for (; i != e; ++i) { + const MachineOperand& MO = MI.getOperand(i); + if (MO.isReg()) { + if (isX86_64ExtendedReg(MO)) + REX |= 1 << Bit; + Bit++; + } + } + break; + } + default: { + if (isX86_64ExtendedReg(MI.getOperand(0))) + REX |= 1 << 0; + i = isTwoAddr ? 2 : 1; + for (unsigned e = NumOps; i != e; ++i) { + const MachineOperand& MO = MI.getOperand(i); + if (isX86_64ExtendedReg(MO)) + REX |= 1 << 2; + } + break; + } + } + } + return REX; +} + +/// sizePCRelativeBlockAddress - This method returns the size of a PC +/// relative block address instruction +/// +static unsigned sizePCRelativeBlockAddress() { + return 4; +} + +/// sizeGlobalAddress - Give the size of the emission of this global address +/// +static unsigned sizeGlobalAddress(bool dword) { + return dword ? 8 : 4; +} + +/// sizeConstPoolAddress - Give the size of the emission of this constant +/// pool address +/// +static unsigned sizeConstPoolAddress(bool dword) { + return dword ? 8 : 4; +} + +/// sizeExternalSymbolAddress - Give the size of the emission of this external +/// symbol +/// +static unsigned sizeExternalSymbolAddress(bool dword) { + return dword ? 8 : 4; +} + +/// sizeJumpTableAddress - Give the size of the emission of this jump +/// table address +/// +static unsigned sizeJumpTableAddress(bool dword) { + return dword ? 8 : 4; +} + +static unsigned sizeConstant(unsigned Size) { + return Size; +} + +static unsigned sizeRegModRMByte(){ + return 1; +} + +static unsigned sizeSIBByte(){ + return 1; +} + +static unsigned getDisplacementFieldSize(const MachineOperand *RelocOp) { + unsigned FinalSize = 0; + // If this is a simple integer displacement that doesn't require a relocation. + if (!RelocOp) { + FinalSize += sizeConstant(4); + return FinalSize; + } + + // Otherwise, this is something that requires a relocation. + if (RelocOp->isGlobal()) { + FinalSize += sizeGlobalAddress(false); + } else if (RelocOp->isCPI()) { + FinalSize += sizeConstPoolAddress(false); + } else if (RelocOp->isJTI()) { + FinalSize += sizeJumpTableAddress(false); + } else { + assert(0 && "Unknown value to relocate!"); + } + return FinalSize; +} + +static unsigned getMemModRMByteSize(const MachineInstr &MI, unsigned Op, + bool IsPIC, bool Is64BitMode) { + const MachineOperand &Op3 = MI.getOperand(Op+3); + int DispVal = 0; + const MachineOperand *DispForReloc = 0; + unsigned FinalSize = 0; + + // Figure out what sort of displacement we have to handle here. + if (Op3.isGlobal()) { + DispForReloc = &Op3; + } else if (Op3.isCPI()) { + if (Is64BitMode || IsPIC) { + DispForReloc = &Op3; + } else { + DispVal = 1; + } + } else if (Op3.isJTI()) { + if (Is64BitMode || IsPIC) { + DispForReloc = &Op3; + } else { + DispVal = 1; + } + } else { + DispVal = 1; + } + + const MachineOperand &Base = MI.getOperand(Op); + const MachineOperand &IndexReg = MI.getOperand(Op+2); + + unsigned BaseReg = Base.getReg(); + + // Is a SIB byte needed? + if ((!Is64BitMode || DispForReloc || BaseReg != 0) && + IndexReg.getReg() == 0 && + (BaseReg == 0 || X86RegisterInfo::getX86RegNum(BaseReg) != N86::ESP)) { + if (BaseReg == 0) { // Just a displacement? + // Emit special case [disp32] encoding + ++FinalSize; + FinalSize += getDisplacementFieldSize(DispForReloc); + } else { + unsigned BaseRegNo = X86RegisterInfo::getX86RegNum(BaseReg); + if (!DispForReloc && DispVal == 0 && BaseRegNo != N86::EBP) { + // Emit simple indirect register encoding... [EAX] f.e. + ++FinalSize; + // Be pessimistic and assume it's a disp32, not a disp8 + } else { + // Emit the most general non-SIB encoding: [REG+disp32] + ++FinalSize; + FinalSize += getDisplacementFieldSize(DispForReloc); + } + } + + } else { // We need a SIB byte, so start by outputting the ModR/M byte first + assert(IndexReg.getReg() != X86::ESP && + IndexReg.getReg() != X86::RSP && "Cannot use ESP as index reg!"); + + bool ForceDisp32 = false; + if (BaseReg == 0 || DispForReloc) { + // Emit the normal disp32 encoding. + ++FinalSize; + ForceDisp32 = true; + } else { + ++FinalSize; + } + + FinalSize += sizeSIBByte(); + + // Do we need to output a displacement? + if (DispVal != 0 || ForceDisp32) { + FinalSize += getDisplacementFieldSize(DispForReloc); + } + } + return FinalSize; +} + + +static unsigned GetInstSizeWithDesc(const MachineInstr &MI, + const TargetInstrDesc *Desc, + bool IsPIC, bool Is64BitMode) { + + unsigned Opcode = Desc->Opcode; + unsigned FinalSize = 0; + + // Emit the lock opcode prefix as needed. + if (Desc->TSFlags & X86II::LOCK) ++FinalSize; + + // Emit segment override opcode prefix as needed. + switch (Desc->TSFlags & X86II::SegOvrMask) { + case X86II::FS: + case X86II::GS: + ++FinalSize; + break; + default: assert(0 && "Invalid segment!"); + case 0: break; // No segment override! + } + + // Emit the repeat opcode prefix as needed. + if ((Desc->TSFlags & X86II::Op0Mask) == X86II::REP) ++FinalSize; + + // Emit the operand size opcode prefix as needed. + if (Desc->TSFlags & X86II::OpSize) ++FinalSize; + + // Emit the address size opcode prefix as needed. + if (Desc->TSFlags & X86II::AdSize) ++FinalSize; + + bool Need0FPrefix = false; + switch (Desc->TSFlags & X86II::Op0Mask) { + case X86II::TB: // Two-byte opcode prefix + case X86II::T8: // 0F 38 + case X86II::TA: // 0F 3A + Need0FPrefix = true; + break; + case X86II::REP: break; // already handled. + case X86II::XS: // F3 0F + ++FinalSize; + Need0FPrefix = true; + break; + case X86II::XD: // F2 0F + ++FinalSize; + Need0FPrefix = true; + break; + case X86II::D8: case X86II::D9: case X86II::DA: case X86II::DB: + case X86II::DC: case X86II::DD: case X86II::DE: case X86II::DF: + ++FinalSize; + break; // Two-byte opcode prefix + default: assert(0 && "Invalid prefix!"); + case 0: break; // No prefix! + } + + if (Is64BitMode) { + // REX prefix + unsigned REX = X86InstrInfo::determineREX(MI); + if (REX) + ++FinalSize; + } + + // 0x0F escape code must be emitted just before the opcode. + if (Need0FPrefix) + ++FinalSize; + + switch (Desc->TSFlags & X86II::Op0Mask) { + case X86II::T8: // 0F 38 + ++FinalSize; + break; + case X86II::TA: // 0F 3A + ++FinalSize; + break; + } + + // If this is a two-address instruction, skip one of the register operands. + unsigned NumOps = Desc->getNumOperands(); + unsigned CurOp = 0; + if (NumOps > 1 && Desc->getOperandConstraint(1, TOI::TIED_TO) != -1) + CurOp++; + else if (NumOps > 2 && Desc->getOperandConstraint(NumOps-1, TOI::TIED_TO)== 0) + // Skip the last source operand that is tied_to the dest reg. e.g. LXADD32 + --NumOps; + + switch (Desc->TSFlags & X86II::FormMask) { + default: assert(0 && "Unknown FormMask value in X86 MachineCodeEmitter!"); + case X86II::Pseudo: + // Remember the current PC offset, this is the PIC relocation + // base address. + switch (Opcode) { + default: + break; + case TargetInstrInfo::INLINEASM: { + const MachineFunction *MF = MI.getParent()->getParent(); + const char *AsmStr = MI.getOperand(0).getSymbolName(); + const TargetAsmInfo* AI = MF->getTarget().getTargetAsmInfo(); + FinalSize += AI->getInlineAsmLength(AsmStr); + break; + } + case TargetInstrInfo::DBG_LABEL: + case TargetInstrInfo::EH_LABEL: + break; + case TargetInstrInfo::IMPLICIT_DEF: + case TargetInstrInfo::DECLARE: + case X86::DWARF_LOC: + case X86::FP_REG_KILL: + break; + case X86::MOVPC32r: { + // This emits the "call" portion of this pseudo instruction. + ++FinalSize; + FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc)); + break; + } + } + CurOp = NumOps; + break; + case X86II::RawFrm: + ++FinalSize; + + if (CurOp != NumOps) { + const MachineOperand &MO = MI.getOperand(CurOp++); + if (MO.isMBB()) { + FinalSize += sizePCRelativeBlockAddress(); + } else if (MO.isGlobal()) { + FinalSize += sizeGlobalAddress(false); + } else if (MO.isSymbol()) { + FinalSize += sizeExternalSymbolAddress(false); + } else if (MO.isImm()) { + FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc)); + } else { + assert(0 && "Unknown RawFrm operand!"); + } + } + break; + + case X86II::AddRegFrm: + ++FinalSize; + ++CurOp; + + if (CurOp != NumOps) { + const MachineOperand &MO1 = MI.getOperand(CurOp++); + unsigned Size = X86InstrInfo::sizeOfImm(Desc); + if (MO1.isImm()) + FinalSize += sizeConstant(Size); + else { + bool dword = false; + if (Opcode == X86::MOV64ri) + dword = true; + if (MO1.isGlobal()) { + FinalSize += sizeGlobalAddress(dword); + } else if (MO1.isSymbol()) + FinalSize += sizeExternalSymbolAddress(dword); + else if (MO1.isCPI()) + FinalSize += sizeConstPoolAddress(dword); + else if (MO1.isJTI()) + FinalSize += sizeJumpTableAddress(dword); + } + } + break; + + case X86II::MRMDestReg: { + ++FinalSize; + FinalSize += sizeRegModRMByte(); + CurOp += 2; + if (CurOp != NumOps) { + ++CurOp; + FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc)); + } + break; + } + case X86II::MRMDestMem: { + ++FinalSize; + FinalSize += getMemModRMByteSize(MI, CurOp, IsPIC, Is64BitMode); + CurOp += X86AddrNumOperands + 1; + if (CurOp != NumOps) { + ++CurOp; + FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc)); + } + break; + } + + case X86II::MRMSrcReg: + ++FinalSize; + FinalSize += sizeRegModRMByte(); + CurOp += 2; + if (CurOp != NumOps) { + ++CurOp; + FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc)); + } + break; + + case X86II::MRMSrcMem: { + int AddrOperands; + if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r || + Opcode == X86::LEA16r || Opcode == X86::LEA32r) + AddrOperands = X86AddrNumOperands - 1; // No segment register + else + AddrOperands = X86AddrNumOperands; + + ++FinalSize; + FinalSize += getMemModRMByteSize(MI, CurOp+1, IsPIC, Is64BitMode); + CurOp += AddrOperands + 1; + if (CurOp != NumOps) { + ++CurOp; + FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc)); + } + break; + } + + case X86II::MRM0r: case X86II::MRM1r: + case X86II::MRM2r: case X86II::MRM3r: + case X86II::MRM4r: case X86II::MRM5r: + case X86II::MRM6r: case X86II::MRM7r: + ++FinalSize; + if (Desc->getOpcode() == X86::LFENCE || + Desc->getOpcode() == X86::MFENCE) { + // Special handling of lfence and mfence; + FinalSize += sizeRegModRMByte(); + } else if (Desc->getOpcode() == X86::MONITOR || + Desc->getOpcode() == X86::MWAIT) { + // Special handling of monitor and mwait. + FinalSize += sizeRegModRMByte() + 1; // +1 for the opcode. + } else { + ++CurOp; + FinalSize += sizeRegModRMByte(); + } + + if (CurOp != NumOps) { + const MachineOperand &MO1 = MI.getOperand(CurOp++); + unsigned Size = X86InstrInfo::sizeOfImm(Desc); + if (MO1.isImm()) + FinalSize += sizeConstant(Size); + else { + bool dword = false; + if (Opcode == X86::MOV64ri32) + dword = true; + if (MO1.isGlobal()) { + FinalSize += sizeGlobalAddress(dword); + } else if (MO1.isSymbol()) + FinalSize += sizeExternalSymbolAddress(dword); + else if (MO1.isCPI()) + FinalSize += sizeConstPoolAddress(dword); + else if (MO1.isJTI()) + FinalSize += sizeJumpTableAddress(dword); + } + } + break; + + case X86II::MRM0m: case X86II::MRM1m: + case X86II::MRM2m: case X86II::MRM3m: + case X86II::MRM4m: case X86II::MRM5m: + case X86II::MRM6m: case X86II::MRM7m: { + + ++FinalSize; + FinalSize += getMemModRMByteSize(MI, CurOp, IsPIC, Is64BitMode); + CurOp += X86AddrNumOperands; + + if (CurOp != NumOps) { + const MachineOperand &MO = MI.getOperand(CurOp++); + unsigned Size = X86InstrInfo::sizeOfImm(Desc); + if (MO.isImm()) + FinalSize += sizeConstant(Size); + else { + bool dword = false; + if (Opcode == X86::MOV64mi32) + dword = true; + if (MO.isGlobal()) { + FinalSize += sizeGlobalAddress(dword); + } else if (MO.isSymbol()) + FinalSize += sizeExternalSymbolAddress(dword); + else if (MO.isCPI()) + FinalSize += sizeConstPoolAddress(dword); + else if (MO.isJTI()) + FinalSize += sizeJumpTableAddress(dword); + } + } + break; + } + + case X86II::MRMInitReg: + ++FinalSize; + // Duplicate register, used by things like MOV8r0 (aka xor reg,reg). + FinalSize += sizeRegModRMByte(); + ++CurOp; + break; + } + + if (!Desc->isVariadic() && CurOp != NumOps) { + cerr << "Cannot determine size: "; + MI.dump(); + cerr << '\n'; + abort(); + } + + + return FinalSize; +} + + +unsigned X86InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { + const TargetInstrDesc &Desc = MI->getDesc(); + bool IsPIC = (TM.getRelocationModel() == Reloc::PIC_); + bool Is64BitMode = TM.getSubtargetImpl()->is64Bit(); + unsigned Size = GetInstSizeWithDesc(*MI, &Desc, IsPIC, Is64BitMode); + if (Desc.getOpcode() == X86::MOVPC32r) { + Size += GetInstSizeWithDesc(*MI, &get(X86::POP32r), IsPIC, Is64BitMode); + } + return Size; +} + +/// getGlobalBaseReg - Return a virtual register initialized with the +/// the global base register value. Output instructions required to +/// initialize the register in the function entry block, if necessary. +/// +unsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const { + assert(!TM.getSubtarget().is64Bit() && + "X86-64 PIC uses RIP relative addressing"); + + X86MachineFunctionInfo *X86FI = MF->getInfo(); + unsigned GlobalBaseReg = X86FI->getGlobalBaseReg(); + if (GlobalBaseReg != 0) + return GlobalBaseReg; + + // Insert the set of GlobalBaseReg into the first MBB of the function + MachineBasicBlock &FirstMBB = MF->front(); + MachineBasicBlock::iterator MBBI = FirstMBB.begin(); + DebugLoc DL = DebugLoc::getUnknownLoc(); + if (MBBI != FirstMBB.end()) DL = MBBI->getDebugLoc(); + MachineRegisterInfo &RegInfo = MF->getRegInfo(); + unsigned PC = RegInfo.createVirtualRegister(X86::GR32RegisterClass); + + const TargetInstrInfo *TII = TM.getInstrInfo(); + // Operand of MovePCtoStack is completely ignored by asm printer. It's + // only used in JIT code emission as displacement to pc. + BuildMI(FirstMBB, MBBI, DL, TII->get(X86::MOVPC32r), PC) + .addImm(0); + + // If we're using vanilla 'GOT' PIC style, we should use relative addressing + // not to pc, but to _GLOBAL_ADDRESS_TABLE_ external + if (TM.getRelocationModel() == Reloc::PIC_ && + TM.getSubtarget().isPICStyleGOT()) { + GlobalBaseReg = + RegInfo.createVirtualRegister(X86::GR32RegisterClass); + BuildMI(FirstMBB, MBBI, DL, TII->get(X86::ADD32ri), GlobalBaseReg) + .addReg(PC).addExternalSymbol("_GLOBAL_OFFSET_TABLE_"); + } else { + GlobalBaseReg = PC; + } + + X86FI->setGlobalBaseReg(GlobalBaseReg); + return GlobalBaseReg; +} diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h new file mode 100644 index 000000000000..e09769ee912c --- /dev/null +++ b/lib/Target/X86/X86InstrInfo.h @@ -0,0 +1,461 @@ +//===- X86InstrInfo.h - X86 Instruction Information ------------*- C++ -*- ===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the X86 implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef X86INSTRUCTIONINFO_H +#define X86INSTRUCTIONINFO_H + +#include "llvm/Target/TargetInstrInfo.h" +#include "X86.h" +#include "X86RegisterInfo.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/Target/TargetRegisterInfo.h" + +namespace llvm { + class X86RegisterInfo; + class X86TargetMachine; + +namespace X86 { + // X86 specific condition code. These correspond to X86_*_COND in + // X86InstrInfo.td. They must be kept in synch. + enum CondCode { + COND_A = 0, + COND_AE = 1, + COND_B = 2, + COND_BE = 3, + COND_E = 4, + COND_G = 5, + COND_GE = 6, + COND_L = 7, + COND_LE = 8, + COND_NE = 9, + COND_NO = 10, + COND_NP = 11, + COND_NS = 12, + COND_O = 13, + COND_P = 14, + COND_S = 15, + + // Artificial condition codes. These are used by AnalyzeBranch + // to indicate a block terminated with two conditional branches to + // the same location. This occurs in code using FCMP_OEQ or FCMP_UNE, + // which can't be represented on x86 with a single condition. These + // are never used in MachineInstrs. + COND_NE_OR_P, + COND_NP_OR_E, + + COND_INVALID + }; + + // Turn condition code into conditional branch opcode. + unsigned GetCondBranchFromCond(CondCode CC); + + /// GetOppositeBranchCondition - Return the inverse of the specified cond, + /// e.g. turning COND_E to COND_NE. + CondCode GetOppositeBranchCondition(X86::CondCode CC); + +} + +/// X86II - This namespace holds all of the target specific flags that +/// instruction info tracks. +/// +namespace X86II { + enum { + //===------------------------------------------------------------------===// + // Instruction types. These are the standard/most common forms for X86 + // instructions. + // + + // PseudoFrm - This represents an instruction that is a pseudo instruction + // or one that has not been implemented yet. It is illegal to code generate + // it, but tolerated for intermediate implementation stages. + Pseudo = 0, + + /// Raw - This form is for instructions that don't have any operands, so + /// they are just a fixed opcode value, like 'leave'. + RawFrm = 1, + + /// AddRegFrm - This form is used for instructions like 'push r32' that have + /// their one register operand added to their opcode. + AddRegFrm = 2, + + /// MRMDestReg - This form is used for instructions that use the Mod/RM byte + /// to specify a destination, which in this case is a register. + /// + MRMDestReg = 3, + + /// MRMDestMem - This form is used for instructions that use the Mod/RM byte + /// to specify a destination, which in this case is memory. + /// + MRMDestMem = 4, + + /// MRMSrcReg - This form is used for instructions that use the Mod/RM byte + /// to specify a source, which in this case is a register. + /// + MRMSrcReg = 5, + + /// MRMSrcMem - This form is used for instructions that use the Mod/RM byte + /// to specify a source, which in this case is memory. + /// + MRMSrcMem = 6, + + /// MRM[0-7][rm] - These forms are used to represent instructions that use + /// a Mod/RM byte, and use the middle field to hold extended opcode + /// information. In the intel manual these are represented as /0, /1, ... + /// + + // First, instructions that operate on a register r/m operand... + MRM0r = 16, MRM1r = 17, MRM2r = 18, MRM3r = 19, // Format /0 /1 /2 /3 + MRM4r = 20, MRM5r = 21, MRM6r = 22, MRM7r = 23, // Format /4 /5 /6 /7 + + // Next, instructions that operate on a memory r/m operand... + MRM0m = 24, MRM1m = 25, MRM2m = 26, MRM3m = 27, // Format /0 /1 /2 /3 + MRM4m = 28, MRM5m = 29, MRM6m = 30, MRM7m = 31, // Format /4 /5 /6 /7 + + // MRMInitReg - This form is used for instructions whose source and + // destinations are the same register. + MRMInitReg = 32, + + FormMask = 63, + + //===------------------------------------------------------------------===// + // Actual flags... + + // OpSize - Set if this instruction requires an operand size prefix (0x66), + // which most often indicates that the instruction operates on 16 bit data + // instead of 32 bit data. + OpSize = 1 << 6, + + // AsSize - Set if this instruction requires an operand size prefix (0x67), + // which most often indicates that the instruction address 16 bit address + // instead of 32 bit address (or 32 bit address in 64 bit mode). + AdSize = 1 << 7, + + //===------------------------------------------------------------------===// + // Op0Mask - There are several prefix bytes that are used to form two byte + // opcodes. These are currently 0x0F, 0xF3, and 0xD8-0xDF. This mask is + // used to obtain the setting of this field. If no bits in this field is + // set, there is no prefix byte for obtaining a multibyte opcode. + // + Op0Shift = 8, + Op0Mask = 0xF << Op0Shift, + + // TB - TwoByte - Set if this instruction has a two byte opcode, which + // starts with a 0x0F byte before the real opcode. + TB = 1 << Op0Shift, + + // REP - The 0xF3 prefix byte indicating repetition of the following + // instruction. + REP = 2 << Op0Shift, + + // D8-DF - These escape opcodes are used by the floating point unit. These + // values must remain sequential. + D8 = 3 << Op0Shift, D9 = 4 << Op0Shift, + DA = 5 << Op0Shift, DB = 6 << Op0Shift, + DC = 7 << Op0Shift, DD = 8 << Op0Shift, + DE = 9 << Op0Shift, DF = 10 << Op0Shift, + + // XS, XD - These prefix codes are for single and double precision scalar + // floating point operations performed in the SSE registers. + XD = 11 << Op0Shift, XS = 12 << Op0Shift, + + // T8, TA - Prefix after the 0x0F prefix. + T8 = 13 << Op0Shift, TA = 14 << Op0Shift, + + //===------------------------------------------------------------------===// + // REX_W - REX prefixes are instruction prefixes used in 64-bit mode. + // They are used to specify GPRs and SSE registers, 64-bit operand size, + // etc. We only cares about REX.W and REX.R bits and only the former is + // statically determined. + // + REXShift = 12, + REX_W = 1 << REXShift, + + //===------------------------------------------------------------------===// + // This three-bit field describes the size of an immediate operand. Zero is + // unused so that we can tell if we forgot to set a value. + ImmShift = 13, + ImmMask = 7 << ImmShift, + Imm8 = 1 << ImmShift, + Imm16 = 2 << ImmShift, + Imm32 = 3 << ImmShift, + Imm64 = 4 << ImmShift, + + //===------------------------------------------------------------------===// + // FP Instruction Classification... Zero is non-fp instruction. + + // FPTypeMask - Mask for all of the FP types... + FPTypeShift = 16, + FPTypeMask = 7 << FPTypeShift, + + // NotFP - The default, set for instructions that do not use FP registers. + NotFP = 0 << FPTypeShift, + + // ZeroArgFP - 0 arg FP instruction which implicitly pushes ST(0), f.e. fld0 + ZeroArgFP = 1 << FPTypeShift, + + // OneArgFP - 1 arg FP instructions which implicitly read ST(0), such as fst + OneArgFP = 2 << FPTypeShift, + + // OneArgFPRW - 1 arg FP instruction which implicitly read ST(0) and write a + // result back to ST(0). For example, fcos, fsqrt, etc. + // + OneArgFPRW = 3 << FPTypeShift, + + // TwoArgFP - 2 arg FP instructions which implicitly read ST(0), and an + // explicit argument, storing the result to either ST(0) or the implicit + // argument. For example: fadd, fsub, fmul, etc... + TwoArgFP = 4 << FPTypeShift, + + // CompareFP - 2 arg FP instructions which implicitly read ST(0) and an + // explicit argument, but have no destination. Example: fucom, fucomi, ... + CompareFP = 5 << FPTypeShift, + + // CondMovFP - "2 operand" floating point conditional move instructions. + CondMovFP = 6 << FPTypeShift, + + // SpecialFP - Special instruction forms. Dispatch by opcode explicitly. + SpecialFP = 7 << FPTypeShift, + + // Lock prefix + LOCKShift = 19, + LOCK = 1 << LOCKShift, + + // Segment override prefixes. Currently we just need ability to address + // stuff in gs and fs segments. + SegOvrShift = 20, + SegOvrMask = 3 << SegOvrShift, + FS = 1 << SegOvrShift, + GS = 2 << SegOvrShift, + + // Bits 22 -> 23 are unused + OpcodeShift = 24, + OpcodeMask = 0xFF << OpcodeShift + }; +} + +const int X86AddrNumOperands = 5; + +inline static bool isScale(const MachineOperand &MO) { + return MO.isImm() && + (MO.getImm() == 1 || MO.getImm() == 2 || + MO.getImm() == 4 || MO.getImm() == 8); +} + +inline static bool isLeaMem(const MachineInstr *MI, unsigned Op) { + if (MI->getOperand(Op).isFI()) return true; + return Op+4 <= MI->getNumOperands() && + MI->getOperand(Op ).isReg() && isScale(MI->getOperand(Op+1)) && + MI->getOperand(Op+2).isReg() && + (MI->getOperand(Op+3).isImm() || + MI->getOperand(Op+3).isGlobal() || + MI->getOperand(Op+3).isCPI() || + MI->getOperand(Op+3).isJTI()); +} + +inline static bool isMem(const MachineInstr *MI, unsigned Op) { + if (MI->getOperand(Op).isFI()) return true; + return Op+5 <= MI->getNumOperands() && + MI->getOperand(Op+4).isReg() && + isLeaMem(MI, Op); +} + +class X86InstrInfo : public TargetInstrInfoImpl { + X86TargetMachine &TM; + const X86RegisterInfo RI; + + /// RegOp2MemOpTable2Addr, RegOp2MemOpTable0, RegOp2MemOpTable1, + /// RegOp2MemOpTable2 - Load / store folding opcode maps. + /// + DenseMap RegOp2MemOpTable2Addr; + DenseMap RegOp2MemOpTable0; + DenseMap RegOp2MemOpTable1; + DenseMap RegOp2MemOpTable2; + + /// MemOp2RegOpTable - Load / store unfolding opcode map. + /// + DenseMap > MemOp2RegOpTable; + +public: + explicit X86InstrInfo(X86TargetMachine &tm); + + /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As + /// such, whenever a client has an instance of instruction info, it should + /// always be able to get register info as well (through this method). + /// + virtual const X86RegisterInfo &getRegisterInfo() const { return RI; } + + /// Return true if the instruction is a register to register move and return + /// the source and dest operands and their sub-register indices by reference. + virtual bool isMoveInstr(const MachineInstr &MI, + unsigned &SrcReg, unsigned &DstReg, + unsigned &SrcSubIdx, unsigned &DstSubIdx) const; + + unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const; + unsigned isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const; + + bool isReallyTriviallyReMaterializable(const MachineInstr *MI) const; + void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + unsigned DestReg, const MachineInstr *Orig) const; + + bool isInvariantLoad(const MachineInstr *MI) const; + + /// convertToThreeAddress - This method must be implemented by targets that + /// set the M_CONVERTIBLE_TO_3_ADDR flag. When this flag is set, the target + /// may be able to convert a two-address instruction into a true + /// three-address instruction on demand. This allows the X86 target (for + /// example) to convert ADD and SHL instructions into LEA instructions if they + /// would require register copies due to two-addressness. + /// + /// This method returns a null pointer if the transformation cannot be + /// performed, otherwise it returns the new instruction. + /// + virtual MachineInstr *convertToThreeAddress(MachineFunction::iterator &MFI, + MachineBasicBlock::iterator &MBBI, + LiveVariables *LV) const; + + /// commuteInstruction - We have a few instructions that must be hacked on to + /// commute them. + /// + virtual MachineInstr *commuteInstruction(MachineInstr *MI, bool NewMI) const; + + // Branch analysis. + virtual bool isUnpredicatedTerminator(const MachineInstr* MI) const; + virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl &Cond, + bool AllowModify) const; + virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const; + virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + const SmallVectorImpl &Cond) const; + virtual bool copyRegToReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned DestReg, unsigned SrcReg, + const TargetRegisterClass *DestRC, + const TargetRegisterClass *SrcRC) const; + virtual void storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned SrcReg, bool isKill, int FrameIndex, + const TargetRegisterClass *RC) const; + + virtual void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill, + SmallVectorImpl &Addr, + const TargetRegisterClass *RC, + SmallVectorImpl &NewMIs) const; + + virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned DestReg, int FrameIndex, + const TargetRegisterClass *RC) const; + + virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg, + SmallVectorImpl &Addr, + const TargetRegisterClass *RC, + SmallVectorImpl &NewMIs) const; + + virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector &CSI) const; + + virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector &CSI) const; + + /// foldMemoryOperand - If this target supports it, fold a load or store of + /// the specified stack slot into the specified machine instruction for the + /// specified operand(s). If this is possible, the target should perform the + /// folding and return true, otherwise it should return false. If it folds + /// the instruction, it is likely that the MachineInstruction the iterator + /// references has been changed. + virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr* MI, + const SmallVectorImpl &Ops, + int FrameIndex) const; + + /// foldMemoryOperand - Same as the previous version except it allows folding + /// of any load and store from / to any address, not just from a specific + /// stack slot. + virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr* MI, + const SmallVectorImpl &Ops, + MachineInstr* LoadMI) const; + + /// canFoldMemoryOperand - Returns true if the specified load / store is + /// folding is possible. + virtual bool canFoldMemoryOperand(const MachineInstr*, + const SmallVectorImpl &) const; + + /// unfoldMemoryOperand - Separate a single instruction which folded a load or + /// a store or a load and a store into two or more instruction. If this is + /// possible, returns true as well as the new instructions by reference. + virtual bool unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, + unsigned Reg, bool UnfoldLoad, bool UnfoldStore, + SmallVectorImpl &NewMIs) const; + + virtual bool unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, + SmallVectorImpl &NewNodes) const; + + /// getOpcodeAfterMemoryUnfold - Returns the opcode of the would be new + /// instruction after load / store are unfolded from an instruction of the + /// specified opcode. It returns zero if the specified unfolding is not + /// possible. + virtual unsigned getOpcodeAfterMemoryUnfold(unsigned Opc, + bool UnfoldLoad, bool UnfoldStore) const; + + virtual bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const; + virtual + bool ReverseBranchCondition(SmallVectorImpl &Cond) const; + + /// isSafeToMoveRegClassDefs - Return true if it's safe to move a machine + /// instruction that defines the specified register class. + bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const; + + // getBaseOpcodeFor - This function returns the "base" X86 opcode for the + // specified machine instruction. + // + unsigned char getBaseOpcodeFor(const TargetInstrDesc *TID) const { + return TID->TSFlags >> X86II::OpcodeShift; + } + unsigned char getBaseOpcodeFor(unsigned Opcode) const { + return getBaseOpcodeFor(&get(Opcode)); + } + + static bool isX86_64NonExtLowByteReg(unsigned reg) { + return (reg == X86::SPL || reg == X86::BPL || + reg == X86::SIL || reg == X86::DIL); + } + + static unsigned sizeOfImm(const TargetInstrDesc *Desc); + static bool isX86_64ExtendedReg(const MachineOperand &MO); + static unsigned determineREX(const MachineInstr &MI); + + /// GetInstSize - Returns the size of the specified MachineInstr. + /// + virtual unsigned GetInstSizeInBytes(const MachineInstr *MI) const; + + /// getGlobalBaseReg - Return a virtual register initialized with the + /// the global base register value. Output instructions required to + /// initialize the register in the function entry block, if necessary. + /// + unsigned getGlobalBaseReg(MachineFunction *MF) const; + +private: + MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr* MI, + unsigned OpNum, + const SmallVectorImpl &MOs) const; +}; + +} // End llvm namespace + +#endif diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td new file mode 100644 index 000000000000..50ae41764151 --- /dev/null +++ b/lib/Target/X86/X86InstrInfo.td @@ -0,0 +1,3961 @@ +//===- X86InstrInfo.td - Describe the X86 Instruction Set --*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the X86 instruction set, defining the instructions, and +// properties of the instructions which are needed for code generation, machine +// code emission, and analysis. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// X86 specific DAG Nodes. +// + +def SDTIntShiftDOp: SDTypeProfile<1, 3, + [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, + SDTCisInt<0>, SDTCisInt<3>]>; + +def SDTX86CmpTest : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>; + +def SDTX86Cmov : SDTypeProfile<1, 4, + [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, + SDTCisVT<3, i8>, SDTCisVT<4, i32>]>; + +// Unary and binary operator instructions that set EFLAGS as a side-effect. +def SDTUnaryArithWithFlags : SDTypeProfile<1, 1, + [SDTCisInt<0>]>; +def SDTBinaryArithWithFlags : SDTypeProfile<1, 2, + [SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>, + SDTCisInt<0>]>; +def SDTX86BrCond : SDTypeProfile<0, 3, + [SDTCisVT<0, OtherVT>, + SDTCisVT<1, i8>, SDTCisVT<2, i32>]>; + +def SDTX86SetCC : SDTypeProfile<1, 2, + [SDTCisVT<0, i8>, + SDTCisVT<1, i8>, SDTCisVT<2, i32>]>; + +def SDTX86cas : SDTypeProfile<0, 3, [SDTCisPtrTy<0>, SDTCisInt<1>, + SDTCisVT<2, i8>]>; +def SDTX86cas8 : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; + +def SDTX86atomicBinary : SDTypeProfile<2, 3, [SDTCisInt<0>, SDTCisInt<1>, + SDTCisPtrTy<2>, SDTCisInt<3>,SDTCisInt<4>]>; +def SDTX86Ret : SDTypeProfile<0, -1, [SDTCisVT<0, i16>]>; + +def SDT_X86CallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>; +def SDT_X86CallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>, + SDTCisVT<1, i32> ]>; + +def SDT_X86Call : SDTypeProfile<0, -1, [SDTCisVT<0, iPTR>]>; + +def SDTX86RepStr : SDTypeProfile<0, 1, [SDTCisVT<0, OtherVT>]>; + +def SDTX86RdTsc : SDTypeProfile<0, 0, []>; + +def SDTX86Wrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>; + +def SDT_X86TLSADDR : SDTypeProfile<0, 1, [SDTCisInt<0>]>; + +def SDT_X86SegmentBaseAddress : SDTypeProfile<1, 1, [SDTCisPtrTy<0>]>; + +def SDT_X86EHRET : SDTypeProfile<0, 1, [SDTCisInt<0>]>; + +def SDT_X86TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisVT<1, i32>]>; + +def X86bsf : SDNode<"X86ISD::BSF", SDTIntUnaryOp>; +def X86bsr : SDNode<"X86ISD::BSR", SDTIntUnaryOp>; +def X86shld : SDNode<"X86ISD::SHLD", SDTIntShiftDOp>; +def X86shrd : SDNode<"X86ISD::SHRD", SDTIntShiftDOp>; + +def X86cmp : SDNode<"X86ISD::CMP" , SDTX86CmpTest>; + +def X86bt : SDNode<"X86ISD::BT", SDTX86CmpTest>; + +def X86cmov : SDNode<"X86ISD::CMOV", SDTX86Cmov>; +def X86brcond : SDNode<"X86ISD::BRCOND", SDTX86BrCond, + [SDNPHasChain]>; +def X86setcc : SDNode<"X86ISD::SETCC", SDTX86SetCC>; + +def X86cas : SDNode<"X86ISD::LCMPXCHG_DAG", SDTX86cas, + [SDNPHasChain, SDNPInFlag, SDNPOutFlag, SDNPMayStore, + SDNPMayLoad]>; +def X86cas8 : SDNode<"X86ISD::LCMPXCHG8_DAG", SDTX86cas8, + [SDNPHasChain, SDNPInFlag, SDNPOutFlag, SDNPMayStore, + SDNPMayLoad]>; +def X86AtomAdd64 : SDNode<"X86ISD::ATOMADD64_DAG", SDTX86atomicBinary, + [SDNPHasChain, SDNPMayStore, + SDNPMayLoad, SDNPMemOperand]>; +def X86AtomSub64 : SDNode<"X86ISD::ATOMSUB64_DAG", SDTX86atomicBinary, + [SDNPHasChain, SDNPMayStore, + SDNPMayLoad, SDNPMemOperand]>; +def X86AtomOr64 : SDNode<"X86ISD::ATOMOR64_DAG", SDTX86atomicBinary, + [SDNPHasChain, SDNPMayStore, + SDNPMayLoad, SDNPMemOperand]>; +def X86AtomXor64 : SDNode<"X86ISD::ATOMXOR64_DAG", SDTX86atomicBinary, + [SDNPHasChain, SDNPMayStore, + SDNPMayLoad, SDNPMemOperand]>; +def X86AtomAnd64 : SDNode<"X86ISD::ATOMAND64_DAG", SDTX86atomicBinary, + [SDNPHasChain, SDNPMayStore, + SDNPMayLoad, SDNPMemOperand]>; +def X86AtomNand64 : SDNode<"X86ISD::ATOMNAND64_DAG", SDTX86atomicBinary, + [SDNPHasChain, SDNPMayStore, + SDNPMayLoad, SDNPMemOperand]>; +def X86AtomSwap64 : SDNode<"X86ISD::ATOMSWAP64_DAG", SDTX86atomicBinary, + [SDNPHasChain, SDNPMayStore, + SDNPMayLoad, SDNPMemOperand]>; +def X86retflag : SDNode<"X86ISD::RET_FLAG", SDTX86Ret, + [SDNPHasChain, SDNPOptInFlag]>; + +def X86callseq_start : + SDNode<"ISD::CALLSEQ_START", SDT_X86CallSeqStart, + [SDNPHasChain, SDNPOutFlag]>; +def X86callseq_end : + SDNode<"ISD::CALLSEQ_END", SDT_X86CallSeqEnd, + [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; + +def X86call : SDNode<"X86ISD::CALL", SDT_X86Call, + [SDNPHasChain, SDNPOutFlag, SDNPOptInFlag]>; + +def X86tailcall: SDNode<"X86ISD::TAILCALL", SDT_X86Call, + [SDNPHasChain, SDNPOutFlag, SDNPOptInFlag]>; + +def X86rep_stos: SDNode<"X86ISD::REP_STOS", SDTX86RepStr, + [SDNPHasChain, SDNPInFlag, SDNPOutFlag, SDNPMayStore]>; +def X86rep_movs: SDNode<"X86ISD::REP_MOVS", SDTX86RepStr, + [SDNPHasChain, SDNPInFlag, SDNPOutFlag, SDNPMayStore, + SDNPMayLoad]>; + +def X86rdtsc : SDNode<"X86ISD::RDTSC_DAG",SDTX86RdTsc, + [SDNPHasChain, SDNPOutFlag, SDNPSideEffect]>; + +def X86Wrapper : SDNode<"X86ISD::Wrapper", SDTX86Wrapper>; +def X86WrapperRIP : SDNode<"X86ISD::WrapperRIP", SDTX86Wrapper>; + +def X86tlsaddr : SDNode<"X86ISD::TLSADDR", SDT_X86TLSADDR, + [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; +def X86SegmentBaseAddress : SDNode<"X86ISD::SegmentBaseAddress", + SDT_X86SegmentBaseAddress, []>; + +def X86ehret : SDNode<"X86ISD::EH_RETURN", SDT_X86EHRET, + [SDNPHasChain]>; + +def X86tcret : SDNode<"X86ISD::TC_RETURN", SDT_X86TCRET, + [SDNPHasChain, SDNPOptInFlag]>; + +def X86add_flag : SDNode<"X86ISD::ADD", SDTBinaryArithWithFlags>; +def X86sub_flag : SDNode<"X86ISD::SUB", SDTBinaryArithWithFlags>; +def X86smul_flag : SDNode<"X86ISD::SMUL", SDTBinaryArithWithFlags>; +def X86umul_flag : SDNode<"X86ISD::UMUL", SDTUnaryArithWithFlags>; +def X86inc_flag : SDNode<"X86ISD::INC", SDTUnaryArithWithFlags>; +def X86dec_flag : SDNode<"X86ISD::DEC", SDTUnaryArithWithFlags>; + +def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>; + +//===----------------------------------------------------------------------===// +// X86 Operand Definitions. +// + +// *mem - Operand definitions for the funky X86 addressing mode operands. +// +class X86MemOperand : Operand { + let PrintMethod = printMethod; + let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc, i32imm, i8imm); +} + +def i8mem : X86MemOperand<"printi8mem">; +def i16mem : X86MemOperand<"printi16mem">; +def i32mem : X86MemOperand<"printi32mem">; +def i64mem : X86MemOperand<"printi64mem">; +def i128mem : X86MemOperand<"printi128mem">; +def f32mem : X86MemOperand<"printf32mem">; +def f64mem : X86MemOperand<"printf64mem">; +def f80mem : X86MemOperand<"printf80mem">; +def f128mem : X86MemOperand<"printf128mem">; + +// A version of i8mem for use on x86-64 that uses GR64_NOREX instead of +// plain GR64, so that it doesn't potentially require a REX prefix. +def i8mem_NOREX : Operand { + let PrintMethod = "printi8mem"; + let MIOperandInfo = (ops GR64_NOREX, i8imm, GR64_NOREX, i32imm, i8imm); +} + +def lea32mem : Operand { + let PrintMethod = "printlea32mem"; + let MIOperandInfo = (ops GR32, i8imm, GR32, i32imm); +} + +def SSECC : Operand { + let PrintMethod = "printSSECC"; +} + +def piclabel: Operand { + let PrintMethod = "printPICLabel"; +} + +// A couple of more descriptive operand definitions. +// 16-bits but only 8 bits are significant. +def i16i8imm : Operand; +// 32-bits but only 8 bits are significant. +def i32i8imm : Operand; + +// Branch targets have OtherVT type. +def brtarget : Operand; + +//===----------------------------------------------------------------------===// +// X86 Complex Pattern Definitions. +// + +// Define X86 specific addressing mode. +def addr : ComplexPattern; +def lea32addr : ComplexPattern; + +//===----------------------------------------------------------------------===// +// X86 Instruction Predicate Definitions. +def HasMMX : Predicate<"Subtarget->hasMMX()">; +def HasSSE1 : Predicate<"Subtarget->hasSSE1()">; +def HasSSE2 : Predicate<"Subtarget->hasSSE2()">; +def HasSSE3 : Predicate<"Subtarget->hasSSE3()">; +def HasSSSE3 : Predicate<"Subtarget->hasSSSE3()">; +def HasSSE41 : Predicate<"Subtarget->hasSSE41()">; +def HasSSE42 : Predicate<"Subtarget->hasSSE42()">; +def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">; +def FPStackf64 : Predicate<"!Subtarget->hasSSE2()">; +def In32BitMode : Predicate<"!Subtarget->is64Bit()">; +def In64BitMode : Predicate<"Subtarget->is64Bit()">; +def SmallCode : Predicate<"TM.getCodeModel() == CodeModel::Small">; +def NotSmallCode : Predicate<"TM.getCodeModel() != CodeModel::Small">; +def IsStatic : Predicate<"TM.getRelocationModel() == Reloc::Static">; +def OptForSpeed : Predicate<"!OptForSize">; +def FastBTMem : Predicate<"!Subtarget->isBTMemSlow()">; +def CallImmAddr : Predicate<"Subtarget->IsLegalToCallImmediateAddr(TM)">; + +//===----------------------------------------------------------------------===// +// X86 Instruction Format Definitions. +// + +include "X86InstrFormats.td" + +//===----------------------------------------------------------------------===// +// Pattern fragments... +// + +// X86 specific condition code. These correspond to CondCode in +// X86InstrInfo.h. They must be kept in synch. +def X86_COND_A : PatLeaf<(i8 0)>; // alt. COND_NBE +def X86_COND_AE : PatLeaf<(i8 1)>; // alt. COND_NC +def X86_COND_B : PatLeaf<(i8 2)>; // alt. COND_C +def X86_COND_BE : PatLeaf<(i8 3)>; // alt. COND_NA +def X86_COND_E : PatLeaf<(i8 4)>; // alt. COND_Z +def X86_COND_G : PatLeaf<(i8 5)>; // alt. COND_NLE +def X86_COND_GE : PatLeaf<(i8 6)>; // alt. COND_NL +def X86_COND_L : PatLeaf<(i8 7)>; // alt. COND_NGE +def X86_COND_LE : PatLeaf<(i8 8)>; // alt. COND_NG +def X86_COND_NE : PatLeaf<(i8 9)>; // alt. COND_NZ +def X86_COND_NO : PatLeaf<(i8 10)>; +def X86_COND_NP : PatLeaf<(i8 11)>; // alt. COND_PO +def X86_COND_NS : PatLeaf<(i8 12)>; +def X86_COND_O : PatLeaf<(i8 13)>; +def X86_COND_P : PatLeaf<(i8 14)>; // alt. COND_PE +def X86_COND_S : PatLeaf<(i8 15)>; + +def i16immSExt8 : PatLeaf<(i16 imm), [{ + // i16immSExt8 predicate - True if the 16-bit immediate fits in a 8-bit + // sign extended field. + return (int16_t)N->getZExtValue() == (int8_t)N->getZExtValue(); +}]>; + +def i32immSExt8 : PatLeaf<(i32 imm), [{ + // i32immSExt8 predicate - True if the 32-bit immediate fits in a 8-bit + // sign extended field. + return (int32_t)N->getZExtValue() == (int8_t)N->getZExtValue(); +}]>; + +// Helper fragments for loads. +// It's always safe to treat a anyext i16 load as a i32 load if the i16 is +// known to be 32-bit aligned or better. Ditto for i8 to i16. +def loadi16 : PatFrag<(ops node:$ptr), (i16 (unindexedload node:$ptr)), [{ + LoadSDNode *LD = cast(N); + if (const Value *Src = LD->getSrcValue()) + if (const PointerType *PT = dyn_cast(Src->getType())) + if (PT->getAddressSpace() > 255) + return false; + ISD::LoadExtType ExtType = LD->getExtensionType(); + if (ExtType == ISD::NON_EXTLOAD) + return true; + if (ExtType == ISD::EXTLOAD) + return LD->getAlignment() >= 2 && !LD->isVolatile(); + return false; +}]>; + +def loadi16_anyext : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)), [{ + LoadSDNode *LD = cast(N); + if (const Value *Src = LD->getSrcValue()) + if (const PointerType *PT = dyn_cast(Src->getType())) + if (PT->getAddressSpace() > 255) + return false; + ISD::LoadExtType ExtType = LD->getExtensionType(); + if (ExtType == ISD::EXTLOAD) + return LD->getAlignment() >= 2 && !LD->isVolatile(); + return false; +}]>; + +def loadi32 : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)), [{ + LoadSDNode *LD = cast(N); + if (const Value *Src = LD->getSrcValue()) + if (const PointerType *PT = dyn_cast(Src->getType())) + if (PT->getAddressSpace() > 255) + return false; + ISD::LoadExtType ExtType = LD->getExtensionType(); + if (ExtType == ISD::NON_EXTLOAD) + return true; + if (ExtType == ISD::EXTLOAD) + return LD->getAlignment() >= 4 && !LD->isVolatile(); + return false; +}]>; + +def nvloadi32 : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)), [{ + LoadSDNode *LD = cast(N); + if (const Value *Src = LD->getSrcValue()) + if (const PointerType *PT = dyn_cast(Src->getType())) + if (PT->getAddressSpace() > 255) + return false; + if (LD->isVolatile()) + return false; + ISD::LoadExtType ExtType = LD->getExtensionType(); + if (ExtType == ISD::NON_EXTLOAD) + return true; + if (ExtType == ISD::EXTLOAD) + return LD->getAlignment() >= 4; + return false; +}]>; + +def gsload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + if (const Value *Src = cast(N)->getSrcValue()) + if (const PointerType *PT = dyn_cast(Src->getType())) + return PT->getAddressSpace() == 256; + return false; +}]>; + +def fsload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + if (const Value *Src = cast(N)->getSrcValue()) + if (const PointerType *PT = dyn_cast(Src->getType())) + return PT->getAddressSpace() == 257; + return false; +}]>; + +def loadi8 : PatFrag<(ops node:$ptr), (i8 (load node:$ptr)), [{ + if (const Value *Src = cast(N)->getSrcValue()) + if (const PointerType *PT = dyn_cast(Src->getType())) + if (PT->getAddressSpace() > 255) + return false; + return true; +}]>; +def loadi64 : PatFrag<(ops node:$ptr), (i64 (load node:$ptr)), [{ + if (const Value *Src = cast(N)->getSrcValue()) + if (const PointerType *PT = dyn_cast(Src->getType())) + if (PT->getAddressSpace() > 255) + return false; + return true; +}]>; + +def loadf32 : PatFrag<(ops node:$ptr), (f32 (load node:$ptr)), [{ + if (const Value *Src = cast(N)->getSrcValue()) + if (const PointerType *PT = dyn_cast(Src->getType())) + if (PT->getAddressSpace() > 255) + return false; + return true; +}]>; +def loadf64 : PatFrag<(ops node:$ptr), (f64 (load node:$ptr)), [{ + if (const Value *Src = cast(N)->getSrcValue()) + if (const PointerType *PT = dyn_cast(Src->getType())) + if (PT->getAddressSpace() > 255) + return false; + return true; +}]>; +def loadf80 : PatFrag<(ops node:$ptr), (f80 (load node:$ptr)), [{ + if (const Value *Src = cast(N)->getSrcValue()) + if (const PointerType *PT = dyn_cast(Src->getType())) + if (PT->getAddressSpace() > 255) + return false; + return true; +}]>; + +def sextloadi16i8 : PatFrag<(ops node:$ptr), (i16 (sextloadi8 node:$ptr))>; +def sextloadi32i8 : PatFrag<(ops node:$ptr), (i32 (sextloadi8 node:$ptr))>; +def sextloadi32i16 : PatFrag<(ops node:$ptr), (i32 (sextloadi16 node:$ptr))>; + +def zextloadi8i1 : PatFrag<(ops node:$ptr), (i8 (zextloadi1 node:$ptr))>; +def zextloadi16i1 : PatFrag<(ops node:$ptr), (i16 (zextloadi1 node:$ptr))>; +def zextloadi32i1 : PatFrag<(ops node:$ptr), (i32 (zextloadi1 node:$ptr))>; +def zextloadi16i8 : PatFrag<(ops node:$ptr), (i16 (zextloadi8 node:$ptr))>; +def zextloadi32i8 : PatFrag<(ops node:$ptr), (i32 (zextloadi8 node:$ptr))>; +def zextloadi32i16 : PatFrag<(ops node:$ptr), (i32 (zextloadi16 node:$ptr))>; + +def extloadi8i1 : PatFrag<(ops node:$ptr), (i8 (extloadi1 node:$ptr))>; +def extloadi16i1 : PatFrag<(ops node:$ptr), (i16 (extloadi1 node:$ptr))>; +def extloadi32i1 : PatFrag<(ops node:$ptr), (i32 (extloadi1 node:$ptr))>; +def extloadi16i8 : PatFrag<(ops node:$ptr), (i16 (extloadi8 node:$ptr))>; +def extloadi32i8 : PatFrag<(ops node:$ptr), (i32 (extloadi8 node:$ptr))>; +def extloadi32i16 : PatFrag<(ops node:$ptr), (i32 (extloadi16 node:$ptr))>; + + +// An 'and' node with a single use. +def and_su : PatFrag<(ops node:$lhs, node:$rhs), (and node:$lhs, node:$rhs), [{ + return N->hasOneUse(); +}]>; +// An 'srl' node with a single use. +def srl_su : PatFrag<(ops node:$lhs, node:$rhs), (srl node:$lhs, node:$rhs), [{ + return N->hasOneUse(); +}]>; +// An 'trunc' node with a single use. +def trunc_su : PatFrag<(ops node:$src), (trunc node:$src), [{ + return N->hasOneUse(); +}]>; + +// 'shld' and 'shrd' instruction patterns. Note that even though these have +// the srl and shl in their patterns, the C++ code must still check for them, +// because predicates are tested before children nodes are explored. + +def shrd : PatFrag<(ops node:$src1, node:$amt1, node:$src2, node:$amt2), + (or (srl node:$src1, node:$amt1), + (shl node:$src2, node:$amt2)), [{ + assert(N->getOpcode() == ISD::OR); + return N->getOperand(0).getOpcode() == ISD::SRL && + N->getOperand(1).getOpcode() == ISD::SHL && + isa(N->getOperand(0).getOperand(1)) && + isa(N->getOperand(1).getOperand(1)) && + N->getOperand(0).getConstantOperandVal(1) == + N->getValueSizeInBits(0) - N->getOperand(1).getConstantOperandVal(1); +}]>; + +def shld : PatFrag<(ops node:$src1, node:$amt1, node:$src2, node:$amt2), + (or (shl node:$src1, node:$amt1), + (srl node:$src2, node:$amt2)), [{ + assert(N->getOpcode() == ISD::OR); + return N->getOperand(0).getOpcode() == ISD::SHL && + N->getOperand(1).getOpcode() == ISD::SRL && + isa(N->getOperand(0).getOperand(1)) && + isa(N->getOperand(1).getOperand(1)) && + N->getOperand(0).getConstantOperandVal(1) == + N->getValueSizeInBits(0) - N->getOperand(1).getConstantOperandVal(1); +}]>; + +//===----------------------------------------------------------------------===// +// Instruction list... +// + +// ADJCALLSTACKDOWN/UP implicitly use/def ESP because they may be expanded into +// a stack adjustment and the codegen must know that they may modify the stack +// pointer before prolog-epilog rewriting occurs. +// Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become +// sub / add which can clobber EFLAGS. +let Defs = [ESP, EFLAGS], Uses = [ESP] in { +def ADJCALLSTACKDOWN32 : I<0, Pseudo, (outs), (ins i32imm:$amt), + "#ADJCALLSTACKDOWN", + [(X86callseq_start timm:$amt)]>, + Requires<[In32BitMode]>; +def ADJCALLSTACKUP32 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2), + "#ADJCALLSTACKUP", + [(X86callseq_end timm:$amt1, timm:$amt2)]>, + Requires<[In32BitMode]>; +} + +// Nop +let neverHasSideEffects = 1 in + def NOOP : I<0x90, RawFrm, (outs), (ins), "nop", []>; + +// PIC base +let neverHasSideEffects = 1, isNotDuplicable = 1, Uses = [ESP] in + def MOVPC32r : Ii32<0xE8, Pseudo, (outs GR32:$reg), (ins piclabel:$label), + "call\t$label\n\t" + "pop{l}\t$reg", []>; + +//===----------------------------------------------------------------------===// +// Control Flow Instructions... +// + +// Return instructions. +let isTerminator = 1, isReturn = 1, isBarrier = 1, + hasCtrlDep = 1, FPForm = SpecialFP, FPFormBits = SpecialFP.Value in { + def RET : I <0xC3, RawFrm, (outs), (ins variable_ops), + "ret", + [(X86retflag 0)]>; + def RETI : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops), + "ret\t$amt", + [(X86retflag imm:$amt)]>; +} + +// All branches are RawFrm, Void, Branch, and Terminators +let isBranch = 1, isTerminator = 1 in + class IBr opcode, dag ins, string asm, list pattern> : + I; + +let isBranch = 1, isBarrier = 1 in + def JMP : IBr<0xE9, (ins brtarget:$dst), "jmp\t$dst", [(br bb:$dst)]>; + +// Indirect branches +let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { + def JMP32r : I<0xFF, MRM4r, (outs), (ins GR32:$dst), "jmp{l}\t{*}$dst", + [(brind GR32:$dst)]>; + def JMP32m : I<0xFF, MRM4m, (outs), (ins i32mem:$dst), "jmp{l}\t{*}$dst", + [(brind (loadi32 addr:$dst))]>; +} + +// Conditional branches +let Uses = [EFLAGS] in { +def JE : IBr<0x84, (ins brtarget:$dst), "je\t$dst", + [(X86brcond bb:$dst, X86_COND_E, EFLAGS)]>, TB; +def JNE : IBr<0x85, (ins brtarget:$dst), "jne\t$dst", + [(X86brcond bb:$dst, X86_COND_NE, EFLAGS)]>, TB; +def JL : IBr<0x8C, (ins brtarget:$dst), "jl\t$dst", + [(X86brcond bb:$dst, X86_COND_L, EFLAGS)]>, TB; +def JLE : IBr<0x8E, (ins brtarget:$dst), "jle\t$dst", + [(X86brcond bb:$dst, X86_COND_LE, EFLAGS)]>, TB; +def JG : IBr<0x8F, (ins brtarget:$dst), "jg\t$dst", + [(X86brcond bb:$dst, X86_COND_G, EFLAGS)]>, TB; +def JGE : IBr<0x8D, (ins brtarget:$dst), "jge\t$dst", + [(X86brcond bb:$dst, X86_COND_GE, EFLAGS)]>, TB; + +def JB : IBr<0x82, (ins brtarget:$dst), "jb\t$dst", + [(X86brcond bb:$dst, X86_COND_B, EFLAGS)]>, TB; +def JBE : IBr<0x86, (ins brtarget:$dst), "jbe\t$dst", + [(X86brcond bb:$dst, X86_COND_BE, EFLAGS)]>, TB; +def JA : IBr<0x87, (ins brtarget:$dst), "ja\t$dst", + [(X86brcond bb:$dst, X86_COND_A, EFLAGS)]>, TB; +def JAE : IBr<0x83, (ins brtarget:$dst), "jae\t$dst", + [(X86brcond bb:$dst, X86_COND_AE, EFLAGS)]>, TB; + +def JS : IBr<0x88, (ins brtarget:$dst), "js\t$dst", + [(X86brcond bb:$dst, X86_COND_S, EFLAGS)]>, TB; +def JNS : IBr<0x89, (ins brtarget:$dst), "jns\t$dst", + [(X86brcond bb:$dst, X86_COND_NS, EFLAGS)]>, TB; +def JP : IBr<0x8A, (ins brtarget:$dst), "jp\t$dst", + [(X86brcond bb:$dst, X86_COND_P, EFLAGS)]>, TB; +def JNP : IBr<0x8B, (ins brtarget:$dst), "jnp\t$dst", + [(X86brcond bb:$dst, X86_COND_NP, EFLAGS)]>, TB; +def JO : IBr<0x80, (ins brtarget:$dst), "jo\t$dst", + [(X86brcond bb:$dst, X86_COND_O, EFLAGS)]>, TB; +def JNO : IBr<0x81, (ins brtarget:$dst), "jno\t$dst", + [(X86brcond bb:$dst, X86_COND_NO, EFLAGS)]>, TB; +} // Uses = [EFLAGS] + +//===----------------------------------------------------------------------===// +// Call Instructions... +// +let isCall = 1 in + // All calls clobber the non-callee saved registers. ESP is marked as + // a use to prevent stack-pointer assignments that appear immediately + // before calls from potentially appearing dead. Uses for argument + // registers are added manually. + let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, + MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, + XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, + XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS], + Uses = [ESP] in { + def CALLpcrel32 : Ii32<0xE8, RawFrm, (outs), (ins i32imm:$dst,variable_ops), + "call\t${dst:call}", []>; + def CALL32r : I<0xFF, MRM2r, (outs), (ins GR32:$dst, variable_ops), + "call\t{*}$dst", [(X86call GR32:$dst)]>; + def CALL32m : I<0xFF, MRM2m, (outs), (ins i32mem:$dst, variable_ops), + "call\t{*}$dst", [(X86call (loadi32 addr:$dst))]>; + } + +// Tail call stuff. + +def TAILCALL : I<0, Pseudo, (outs), (ins), + "#TAILCALL", + []>; + +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in +def TCRETURNdi : I<0, Pseudo, (outs), (ins i32imm:$dst, i32imm:$offset, variable_ops), + "#TC_RETURN $dst $offset", + []>; + +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in +def TCRETURNri : I<0, Pseudo, (outs), (ins GR32:$dst, i32imm:$offset, variable_ops), + "#TC_RETURN $dst $offset", + []>; + +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in + + def TAILJMPd : IBr<0xE9, (ins i32imm:$dst), "jmp\t${dst:call} # TAILCALL", + []>; +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in + def TAILJMPr : I<0xFF, MRM4r, (outs), (ins GR32:$dst), "jmp{l}\t{*}$dst # TAILCALL", + []>; +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in + def TAILJMPm : I<0xFF, MRM4m, (outs), (ins i32mem:$dst), + "jmp\t{*}$dst # TAILCALL", []>; + +//===----------------------------------------------------------------------===// +// Miscellaneous Instructions... +// +let Defs = [EBP, ESP], Uses = [EBP, ESP], mayLoad = 1, neverHasSideEffects=1 in +def LEAVE : I<0xC9, RawFrm, + (outs), (ins), "leave", []>; + +let Defs = [ESP], Uses = [ESP], neverHasSideEffects=1 in { +let mayLoad = 1 in +def POP32r : I<0x58, AddRegFrm, (outs GR32:$reg), (ins), "pop{l}\t$reg", []>; + +let mayStore = 1 in +def PUSH32r : I<0x50, AddRegFrm, (outs), (ins GR32:$reg), "push{l}\t$reg",[]>; +} + +let Defs = [ESP, EFLAGS], Uses = [ESP], mayLoad = 1, neverHasSideEffects=1 in +def POPFD : I<0x9D, RawFrm, (outs), (ins), "popf", []>; +let Defs = [ESP], Uses = [ESP, EFLAGS], mayStore = 1, neverHasSideEffects=1 in +def PUSHFD : I<0x9C, RawFrm, (outs), (ins), "pushf", []>; + +let isTwoAddress = 1 in // GR32 = bswap GR32 + def BSWAP32r : I<0xC8, AddRegFrm, + (outs GR32:$dst), (ins GR32:$src), + "bswap{l}\t$dst", + [(set GR32:$dst, (bswap GR32:$src))]>, TB; + + +// Bit scan instructions. +let Defs = [EFLAGS] in { +def BSF16rr : I<0xBC, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), + "bsf{w}\t{$src, $dst|$dst, $src}", + [(set GR16:$dst, (X86bsf GR16:$src)), (implicit EFLAGS)]>, TB; +def BSF16rm : I<0xBC, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), + "bsf{w}\t{$src, $dst|$dst, $src}", + [(set GR16:$dst, (X86bsf (loadi16 addr:$src))), + (implicit EFLAGS)]>, TB; +def BSF32rr : I<0xBC, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), + "bsf{l}\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (X86bsf GR32:$src)), (implicit EFLAGS)]>, TB; +def BSF32rm : I<0xBC, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), + "bsf{l}\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (X86bsf (loadi32 addr:$src))), + (implicit EFLAGS)]>, TB; + +def BSR16rr : I<0xBD, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), + "bsr{w}\t{$src, $dst|$dst, $src}", + [(set GR16:$dst, (X86bsr GR16:$src)), (implicit EFLAGS)]>, TB; +def BSR16rm : I<0xBD, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), + "bsr{w}\t{$src, $dst|$dst, $src}", + [(set GR16:$dst, (X86bsr (loadi16 addr:$src))), + (implicit EFLAGS)]>, TB; +def BSR32rr : I<0xBD, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), + "bsr{l}\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (X86bsr GR32:$src)), (implicit EFLAGS)]>, TB; +def BSR32rm : I<0xBD, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), + "bsr{l}\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (X86bsr (loadi32 addr:$src))), + (implicit EFLAGS)]>, TB; +} // Defs = [EFLAGS] + +let neverHasSideEffects = 1 in +def LEA16r : I<0x8D, MRMSrcMem, + (outs GR16:$dst), (ins i32mem:$src), + "lea{w}\t{$src|$dst}, {$dst|$src}", []>, OpSize; +let isReMaterializable = 1 in +def LEA32r : I<0x8D, MRMSrcMem, + (outs GR32:$dst), (ins lea32mem:$src), + "lea{l}\t{$src|$dst}, {$dst|$src}", + [(set GR32:$dst, lea32addr:$src)]>, Requires<[In32BitMode]>; + +let Defs = [ECX,EDI,ESI], Uses = [ECX,EDI,ESI] in { +def REP_MOVSB : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb|rep movsb}", + [(X86rep_movs i8)]>, REP; +def REP_MOVSW : I<0xA5, RawFrm, (outs), (ins), "{rep;movsw|rep movsw}", + [(X86rep_movs i16)]>, REP, OpSize; +def REP_MOVSD : I<0xA5, RawFrm, (outs), (ins), "{rep;movsl|rep movsd}", + [(X86rep_movs i32)]>, REP; +} + +let Defs = [ECX,EDI], Uses = [AL,ECX,EDI] in +def REP_STOSB : I<0xAA, RawFrm, (outs), (ins), "{rep;stosb|rep stosb}", + [(X86rep_stos i8)]>, REP; +let Defs = [ECX,EDI], Uses = [AX,ECX,EDI] in +def REP_STOSW : I<0xAB, RawFrm, (outs), (ins), "{rep;stosw|rep stosw}", + [(X86rep_stos i16)]>, REP, OpSize; +let Defs = [ECX,EDI], Uses = [EAX,ECX,EDI] in +def REP_STOSD : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl|rep stosd}", + [(X86rep_stos i32)]>, REP; + +let Defs = [RAX, RDX] in +def RDTSC : I<0x31, RawFrm, (outs), (ins), "rdtsc", [(X86rdtsc)]>, + TB; + +let isBarrier = 1, hasCtrlDep = 1 in { +def TRAP : I<0x0B, RawFrm, (outs), (ins), "ud2", [(trap)]>, TB; +} + +//===----------------------------------------------------------------------===// +// Input/Output Instructions... +// +let Defs = [AL], Uses = [DX] in +def IN8rr : I<0xEC, RawFrm, (outs), (ins), + "in{b}\t{%dx, %al|%AL, %DX}", []>; +let Defs = [AX], Uses = [DX] in +def IN16rr : I<0xED, RawFrm, (outs), (ins), + "in{w}\t{%dx, %ax|%AX, %DX}", []>, OpSize; +let Defs = [EAX], Uses = [DX] in +def IN32rr : I<0xED, RawFrm, (outs), (ins), + "in{l}\t{%dx, %eax|%EAX, %DX}", []>; + +let Defs = [AL] in +def IN8ri : Ii8<0xE4, RawFrm, (outs), (ins i16i8imm:$port), + "in{b}\t{$port, %al|%AL, $port}", []>; +let Defs = [AX] in +def IN16ri : Ii8<0xE5, RawFrm, (outs), (ins i16i8imm:$port), + "in{w}\t{$port, %ax|%AX, $port}", []>, OpSize; +let Defs = [EAX] in +def IN32ri : Ii8<0xE5, RawFrm, (outs), (ins i16i8imm:$port), + "in{l}\t{$port, %eax|%EAX, $port}", []>; + +let Uses = [DX, AL] in +def OUT8rr : I<0xEE, RawFrm, (outs), (ins), + "out{b}\t{%al, %dx|%DX, %AL}", []>; +let Uses = [DX, AX] in +def OUT16rr : I<0xEF, RawFrm, (outs), (ins), + "out{w}\t{%ax, %dx|%DX, %AX}", []>, OpSize; +let Uses = [DX, EAX] in +def OUT32rr : I<0xEF, RawFrm, (outs), (ins), + "out{l}\t{%eax, %dx|%DX, %EAX}", []>; + +let Uses = [AL] in +def OUT8ir : Ii8<0xE6, RawFrm, (outs), (ins i16i8imm:$port), + "out{b}\t{%al, $port|$port, %AL}", []>; +let Uses = [AX] in +def OUT16ir : Ii8<0xE7, RawFrm, (outs), (ins i16i8imm:$port), + "out{w}\t{%ax, $port|$port, %AX}", []>, OpSize; +let Uses = [EAX] in +def OUT32ir : Ii8<0xE7, RawFrm, (outs), (ins i16i8imm:$port), + "out{l}\t{%eax, $port|$port, %EAX}", []>; + +//===----------------------------------------------------------------------===// +// Move Instructions... +// +let neverHasSideEffects = 1 in { +def MOV8rr : I<0x88, MRMDestReg, (outs GR8 :$dst), (ins GR8 :$src), + "mov{b}\t{$src, $dst|$dst, $src}", []>; +def MOV16rr : I<0x89, MRMDestReg, (outs GR16:$dst), (ins GR16:$src), + "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize; +def MOV32rr : I<0x89, MRMDestReg, (outs GR32:$dst), (ins GR32:$src), + "mov{l}\t{$src, $dst|$dst, $src}", []>; +} +let isReMaterializable = 1, isAsCheapAsAMove = 1 in { +def MOV8ri : Ii8 <0xB0, AddRegFrm, (outs GR8 :$dst), (ins i8imm :$src), + "mov{b}\t{$src, $dst|$dst, $src}", + [(set GR8:$dst, imm:$src)]>; +def MOV16ri : Ii16<0xB8, AddRegFrm, (outs GR16:$dst), (ins i16imm:$src), + "mov{w}\t{$src, $dst|$dst, $src}", + [(set GR16:$dst, imm:$src)]>, OpSize; +def MOV32ri : Ii32<0xB8, AddRegFrm, (outs GR32:$dst), (ins i32imm:$src), + "mov{l}\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, imm:$src)]>; +} +def MOV8mi : Ii8 <0xC6, MRM0m, (outs), (ins i8mem :$dst, i8imm :$src), + "mov{b}\t{$src, $dst|$dst, $src}", + [(store (i8 imm:$src), addr:$dst)]>; +def MOV16mi : Ii16<0xC7, MRM0m, (outs), (ins i16mem:$dst, i16imm:$src), + "mov{w}\t{$src, $dst|$dst, $src}", + [(store (i16 imm:$src), addr:$dst)]>, OpSize; +def MOV32mi : Ii32<0xC7, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src), + "mov{l}\t{$src, $dst|$dst, $src}", + [(store (i32 imm:$src), addr:$dst)]>; + +let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in { +def MOV8rm : I<0x8A, MRMSrcMem, (outs GR8 :$dst), (ins i8mem :$src), + "mov{b}\t{$src, $dst|$dst, $src}", + [(set GR8:$dst, (loadi8 addr:$src))]>; +def MOV16rm : I<0x8B, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), + "mov{w}\t{$src, $dst|$dst, $src}", + [(set GR16:$dst, (loadi16 addr:$src))]>, OpSize; +def MOV32rm : I<0x8B, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), + "mov{l}\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (loadi32 addr:$src))]>; +} + +def MOV8mr : I<0x88, MRMDestMem, (outs), (ins i8mem :$dst, GR8 :$src), + "mov{b}\t{$src, $dst|$dst, $src}", + [(store GR8:$src, addr:$dst)]>; +def MOV16mr : I<0x89, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src), + "mov{w}\t{$src, $dst|$dst, $src}", + [(store GR16:$src, addr:$dst)]>, OpSize; +def MOV32mr : I<0x89, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), + "mov{l}\t{$src, $dst|$dst, $src}", + [(store GR32:$src, addr:$dst)]>; + +// Versions of MOV8rr, MOV8mr, and MOV8rm that use i8mem_NOREX and GR8_NOREX so +// that they can be used for copying and storing h registers, which can't be +// encoded when a REX prefix is present. +let neverHasSideEffects = 1 in +def MOV8rr_NOREX : I<0x88, MRMDestReg, + (outs GR8_NOREX:$dst), (ins GR8_NOREX:$src), + "mov{b}\t{$src, $dst|$dst, $src} # NOREX", []>; +let mayStore = 1 in +def MOV8mr_NOREX : I<0x88, MRMDestMem, + (outs), (ins i8mem_NOREX:$dst, GR8_NOREX:$src), + "mov{b}\t{$src, $dst|$dst, $src} # NOREX", []>; +let mayLoad = 1, + canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in +def MOV8rm_NOREX : I<0x8A, MRMSrcMem, + (outs GR8_NOREX:$dst), (ins i8mem_NOREX:$src), + "mov{b}\t{$src, $dst|$dst, $src} # NOREX", []>; + +//===----------------------------------------------------------------------===// +// Fixed-Register Multiplication and Division Instructions... +// + +// Extra precision multiplication +let Defs = [AL,AH,EFLAGS], Uses = [AL] in +def MUL8r : I<0xF6, MRM4r, (outs), (ins GR8:$src), "mul{b}\t$src", + // FIXME: Used for 8-bit mul, ignore result upper 8 bits. + // This probably ought to be moved to a def : Pat<> if the + // syntax can be accepted. + [(set AL, (mul AL, GR8:$src)), + (implicit EFLAGS)]>; // AL,AH = AL*GR8 + +let Defs = [AX,DX,EFLAGS], Uses = [AX], neverHasSideEffects = 1 in +def MUL16r : I<0xF7, MRM4r, (outs), (ins GR16:$src), + "mul{w}\t$src", + []>, OpSize; // AX,DX = AX*GR16 + +let Defs = [EAX,EDX,EFLAGS], Uses = [EAX], neverHasSideEffects = 1 in +def MUL32r : I<0xF7, MRM4r, (outs), (ins GR32:$src), + "mul{l}\t$src", + []>; // EAX,EDX = EAX*GR32 + +let Defs = [AL,AH,EFLAGS], Uses = [AL] in +def MUL8m : I<0xF6, MRM4m, (outs), (ins i8mem :$src), + "mul{b}\t$src", + // FIXME: Used for 8-bit mul, ignore result upper 8 bits. + // This probably ought to be moved to a def : Pat<> if the + // syntax can be accepted. + [(set AL, (mul AL, (loadi8 addr:$src))), + (implicit EFLAGS)]>; // AL,AH = AL*[mem8] + +let mayLoad = 1, neverHasSideEffects = 1 in { +let Defs = [AX,DX,EFLAGS], Uses = [AX] in +def MUL16m : I<0xF7, MRM4m, (outs), (ins i16mem:$src), + "mul{w}\t$src", + []>, OpSize; // AX,DX = AX*[mem16] + +let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in +def MUL32m : I<0xF7, MRM4m, (outs), (ins i32mem:$src), + "mul{l}\t$src", + []>; // EAX,EDX = EAX*[mem32] +} + +let neverHasSideEffects = 1 in { +let Defs = [AL,AH,EFLAGS], Uses = [AL] in +def IMUL8r : I<0xF6, MRM5r, (outs), (ins GR8:$src), "imul{b}\t$src", []>; + // AL,AH = AL*GR8 +let Defs = [AX,DX,EFLAGS], Uses = [AX] in +def IMUL16r : I<0xF7, MRM5r, (outs), (ins GR16:$src), "imul{w}\t$src", []>, + OpSize; // AX,DX = AX*GR16 +let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in +def IMUL32r : I<0xF7, MRM5r, (outs), (ins GR32:$src), "imul{l}\t$src", []>; + // EAX,EDX = EAX*GR32 +let mayLoad = 1 in { +let Defs = [AL,AH,EFLAGS], Uses = [AL] in +def IMUL8m : I<0xF6, MRM5m, (outs), (ins i8mem :$src), + "imul{b}\t$src", []>; // AL,AH = AL*[mem8] +let Defs = [AX,DX,EFLAGS], Uses = [AX] in +def IMUL16m : I<0xF7, MRM5m, (outs), (ins i16mem:$src), + "imul{w}\t$src", []>, OpSize; // AX,DX = AX*[mem16] +let Defs = [EAX,EDX], Uses = [EAX] in +def IMUL32m : I<0xF7, MRM5m, (outs), (ins i32mem:$src), + "imul{l}\t$src", []>; // EAX,EDX = EAX*[mem32] +} +} // neverHasSideEffects + +// unsigned division/remainder +let Defs = [AL,AH,EFLAGS], Uses = [AX] in +def DIV8r : I<0xF6, MRM6r, (outs), (ins GR8:$src), // AX/r8 = AL,AH + "div{b}\t$src", []>; +let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in +def DIV16r : I<0xF7, MRM6r, (outs), (ins GR16:$src), // DX:AX/r16 = AX,DX + "div{w}\t$src", []>, OpSize; +let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in +def DIV32r : I<0xF7, MRM6r, (outs), (ins GR32:$src), // EDX:EAX/r32 = EAX,EDX + "div{l}\t$src", []>; +let mayLoad = 1 in { +let Defs = [AL,AH,EFLAGS], Uses = [AX] in +def DIV8m : I<0xF6, MRM6m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH + "div{b}\t$src", []>; +let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in +def DIV16m : I<0xF7, MRM6m, (outs), (ins i16mem:$src), // DX:AX/[mem16] = AX,DX + "div{w}\t$src", []>, OpSize; +let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in +def DIV32m : I<0xF7, MRM6m, (outs), (ins i32mem:$src), // EDX:EAX/[mem32] = EAX,EDX + "div{l}\t$src", []>; +} + +// Signed division/remainder. +let Defs = [AL,AH,EFLAGS], Uses = [AX] in +def IDIV8r : I<0xF6, MRM7r, (outs), (ins GR8:$src), // AX/r8 = AL,AH + "idiv{b}\t$src", []>; +let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in +def IDIV16r: I<0xF7, MRM7r, (outs), (ins GR16:$src), // DX:AX/r16 = AX,DX + "idiv{w}\t$src", []>, OpSize; +let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in +def IDIV32r: I<0xF7, MRM7r, (outs), (ins GR32:$src), // EDX:EAX/r32 = EAX,EDX + "idiv{l}\t$src", []>; +let mayLoad = 1, mayLoad = 1 in { +let Defs = [AL,AH,EFLAGS], Uses = [AX] in +def IDIV8m : I<0xF6, MRM7m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH + "idiv{b}\t$src", []>; +let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in +def IDIV16m: I<0xF7, MRM7m, (outs), (ins i16mem:$src), // DX:AX/[mem16] = AX,DX + "idiv{w}\t$src", []>, OpSize; +let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in +def IDIV32m: I<0xF7, MRM7m, (outs), (ins i32mem:$src), // EDX:EAX/[mem32] = EAX,EDX + "idiv{l}\t$src", []>; +} + +//===----------------------------------------------------------------------===// +// Two address Instructions. +// +let isTwoAddress = 1 in { + +// Conditional moves +let Uses = [EFLAGS] in { +let isCommutable = 1 in { +def CMOVB16rr : I<0x42, MRMSrcReg, // if , + TB, OpSize; +def CMOVB32rr : I<0x42, MRMSrcReg, // if , + TB; +def CMOVAE16rr: I<0x43, MRMSrcReg, // if >=u, GR16 = GR16 + (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), + "cmovae\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2, + X86_COND_AE, EFLAGS))]>, + TB, OpSize; +def CMOVAE32rr: I<0x43, MRMSrcReg, // if >=u, GR32 = GR32 + (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), + "cmovae\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2, + X86_COND_AE, EFLAGS))]>, + TB; +def CMOVE16rr : I<0x44, MRMSrcReg, // if ==, GR16 = GR16 + (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), + "cmove\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2, + X86_COND_E, EFLAGS))]>, + TB, OpSize; +def CMOVE32rr : I<0x44, MRMSrcReg, // if ==, GR32 = GR32 + (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), + "cmove\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2, + X86_COND_E, EFLAGS))]>, + TB; +def CMOVNE16rr: I<0x45, MRMSrcReg, // if !=, GR16 = GR16 + (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), + "cmovne\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2, + X86_COND_NE, EFLAGS))]>, + TB, OpSize; +def CMOVNE32rr: I<0x45, MRMSrcReg, // if !=, GR32 = GR32 + (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), + "cmovne\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2, + X86_COND_NE, EFLAGS))]>, + TB; +def CMOVBE16rr: I<0x46, MRMSrcReg, // if <=u, GR16 = GR16 + (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), + "cmovbe\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2, + X86_COND_BE, EFLAGS))]>, + TB, OpSize; +def CMOVBE32rr: I<0x46, MRMSrcReg, // if <=u, GR32 = GR32 + (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), + "cmovbe\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2, + X86_COND_BE, EFLAGS))]>, + TB; +def CMOVA16rr : I<0x47, MRMSrcReg, // if >u, GR16 = GR16 + (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), + "cmova\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2, + X86_COND_A, EFLAGS))]>, + TB, OpSize; +def CMOVA32rr : I<0x47, MRMSrcReg, // if >u, GR32 = GR32 + (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), + "cmova\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2, + X86_COND_A, EFLAGS))]>, + TB; +def CMOVL16rr : I<0x4C, MRMSrcReg, // if , + TB, OpSize; +def CMOVL32rr : I<0x4C, MRMSrcReg, // if , + TB; +def CMOVGE16rr: I<0x4D, MRMSrcReg, // if >=s, GR16 = GR16 + (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), + "cmovge\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2, + X86_COND_GE, EFLAGS))]>, + TB, OpSize; +def CMOVGE32rr: I<0x4D, MRMSrcReg, // if >=s, GR32 = GR32 + (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), + "cmovge\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2, + X86_COND_GE, EFLAGS))]>, + TB; +def CMOVLE16rr: I<0x4E, MRMSrcReg, // if <=s, GR16 = GR16 + (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), + "cmovle\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2, + X86_COND_LE, EFLAGS))]>, + TB, OpSize; +def CMOVLE32rr: I<0x4E, MRMSrcReg, // if <=s, GR32 = GR32 + (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), + "cmovle\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2, + X86_COND_LE, EFLAGS))]>, + TB; +def CMOVG16rr : I<0x4F, MRMSrcReg, // if >s, GR16 = GR16 + (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), + "cmovg\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2, + X86_COND_G, EFLAGS))]>, + TB, OpSize; +def CMOVG32rr : I<0x4F, MRMSrcReg, // if >s, GR32 = GR32 + (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), + "cmovg\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2, + X86_COND_G, EFLAGS))]>, + TB; +def CMOVS16rr : I<0x48, MRMSrcReg, // if signed, GR16 = GR16 + (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), + "cmovs\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2, + X86_COND_S, EFLAGS))]>, + TB, OpSize; +def CMOVS32rr : I<0x48, MRMSrcReg, // if signed, GR32 = GR32 + (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), + "cmovs\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2, + X86_COND_S, EFLAGS))]>, + TB; +def CMOVNS16rr: I<0x49, MRMSrcReg, // if !signed, GR16 = GR16 + (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), + "cmovns\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2, + X86_COND_NS, EFLAGS))]>, + TB, OpSize; +def CMOVNS32rr: I<0x49, MRMSrcReg, // if !signed, GR32 = GR32 + (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), + "cmovns\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2, + X86_COND_NS, EFLAGS))]>, + TB; +def CMOVP16rr : I<0x4A, MRMSrcReg, // if parity, GR16 = GR16 + (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), + "cmovp\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2, + X86_COND_P, EFLAGS))]>, + TB, OpSize; +def CMOVP32rr : I<0x4A, MRMSrcReg, // if parity, GR32 = GR32 + (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), + "cmovp\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2, + X86_COND_P, EFLAGS))]>, + TB; +def CMOVNP16rr : I<0x4B, MRMSrcReg, // if !parity, GR16 = GR16 + (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), + "cmovnp\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2, + X86_COND_NP, EFLAGS))]>, + TB, OpSize; +def CMOVNP32rr : I<0x4B, MRMSrcReg, // if !parity, GR32 = GR32 + (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), + "cmovnp\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2, + X86_COND_NP, EFLAGS))]>, + TB; +def CMOVO16rr : I<0x40, MRMSrcReg, // if overflow, GR16 = GR16 + (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), + "cmovo\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2, + X86_COND_O, EFLAGS))]>, + TB, OpSize; +def CMOVO32rr : I<0x40, MRMSrcReg, // if overflow, GR32 = GR32 + (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), + "cmovo\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2, + X86_COND_O, EFLAGS))]>, + TB; +def CMOVNO16rr : I<0x41, MRMSrcReg, // if !overflow, GR16 = GR16 + (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), + "cmovno\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2, + X86_COND_NO, EFLAGS))]>, + TB, OpSize; +def CMOVNO32rr : I<0x41, MRMSrcReg, // if !overflow, GR32 = GR32 + (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), + "cmovno\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2, + X86_COND_NO, EFLAGS))]>, + TB; +} // isCommutable = 1 + +def CMOVB16rm : I<0x42, MRMSrcMem, // if , + TB, OpSize; +def CMOVB32rm : I<0x42, MRMSrcMem, // if , + TB; +def CMOVAE16rm: I<0x43, MRMSrcMem, // if >=u, GR16 = [mem16] + (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), + "cmovae\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2), + X86_COND_AE, EFLAGS))]>, + TB, OpSize; +def CMOVAE32rm: I<0x43, MRMSrcMem, // if >=u, GR32 = [mem32] + (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), + "cmovae\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2), + X86_COND_AE, EFLAGS))]>, + TB; +def CMOVE16rm : I<0x44, MRMSrcMem, // if ==, GR16 = [mem16] + (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), + "cmove\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2), + X86_COND_E, EFLAGS))]>, + TB, OpSize; +def CMOVE32rm : I<0x44, MRMSrcMem, // if ==, GR32 = [mem32] + (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), + "cmove\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2), + X86_COND_E, EFLAGS))]>, + TB; +def CMOVNE16rm: I<0x45, MRMSrcMem, // if !=, GR16 = [mem16] + (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), + "cmovne\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2), + X86_COND_NE, EFLAGS))]>, + TB, OpSize; +def CMOVNE32rm: I<0x45, MRMSrcMem, // if !=, GR32 = [mem32] + (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), + "cmovne\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2), + X86_COND_NE, EFLAGS))]>, + TB; +def CMOVBE16rm: I<0x46, MRMSrcMem, // if <=u, GR16 = [mem16] + (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), + "cmovbe\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2), + X86_COND_BE, EFLAGS))]>, + TB, OpSize; +def CMOVBE32rm: I<0x46, MRMSrcMem, // if <=u, GR32 = [mem32] + (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), + "cmovbe\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2), + X86_COND_BE, EFLAGS))]>, + TB; +def CMOVA16rm : I<0x47, MRMSrcMem, // if >u, GR16 = [mem16] + (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), + "cmova\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2), + X86_COND_A, EFLAGS))]>, + TB, OpSize; +def CMOVA32rm : I<0x47, MRMSrcMem, // if >u, GR32 = [mem32] + (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), + "cmova\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2), + X86_COND_A, EFLAGS))]>, + TB; +def CMOVL16rm : I<0x4C, MRMSrcMem, // if , + TB, OpSize; +def CMOVL32rm : I<0x4C, MRMSrcMem, // if , + TB; +def CMOVGE16rm: I<0x4D, MRMSrcMem, // if >=s, GR16 = [mem16] + (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), + "cmovge\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2), + X86_COND_GE, EFLAGS))]>, + TB, OpSize; +def CMOVGE32rm: I<0x4D, MRMSrcMem, // if >=s, GR32 = [mem32] + (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), + "cmovge\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2), + X86_COND_GE, EFLAGS))]>, + TB; +def CMOVLE16rm: I<0x4E, MRMSrcMem, // if <=s, GR16 = [mem16] + (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), + "cmovle\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2), + X86_COND_LE, EFLAGS))]>, + TB, OpSize; +def CMOVLE32rm: I<0x4E, MRMSrcMem, // if <=s, GR32 = [mem32] + (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), + "cmovle\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2), + X86_COND_LE, EFLAGS))]>, + TB; +def CMOVG16rm : I<0x4F, MRMSrcMem, // if >s, GR16 = [mem16] + (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), + "cmovg\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2), + X86_COND_G, EFLAGS))]>, + TB, OpSize; +def CMOVG32rm : I<0x4F, MRMSrcMem, // if >s, GR32 = [mem32] + (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), + "cmovg\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2), + X86_COND_G, EFLAGS))]>, + TB; +def CMOVS16rm : I<0x48, MRMSrcMem, // if signed, GR16 = [mem16] + (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), + "cmovs\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2), + X86_COND_S, EFLAGS))]>, + TB, OpSize; +def CMOVS32rm : I<0x48, MRMSrcMem, // if signed, GR32 = [mem32] + (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), + "cmovs\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2), + X86_COND_S, EFLAGS))]>, + TB; +def CMOVNS16rm: I<0x49, MRMSrcMem, // if !signed, GR16 = [mem16] + (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), + "cmovns\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2), + X86_COND_NS, EFLAGS))]>, + TB, OpSize; +def CMOVNS32rm: I<0x49, MRMSrcMem, // if !signed, GR32 = [mem32] + (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), + "cmovns\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2), + X86_COND_NS, EFLAGS))]>, + TB; +def CMOVP16rm : I<0x4A, MRMSrcMem, // if parity, GR16 = [mem16] + (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), + "cmovp\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2), + X86_COND_P, EFLAGS))]>, + TB, OpSize; +def CMOVP32rm : I<0x4A, MRMSrcMem, // if parity, GR32 = [mem32] + (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), + "cmovp\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2), + X86_COND_P, EFLAGS))]>, + TB; +def CMOVNP16rm : I<0x4B, MRMSrcMem, // if !parity, GR16 = [mem16] + (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), + "cmovnp\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2), + X86_COND_NP, EFLAGS))]>, + TB, OpSize; +def CMOVNP32rm : I<0x4B, MRMSrcMem, // if !parity, GR32 = [mem32] + (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), + "cmovnp\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2), + X86_COND_NP, EFLAGS))]>, + TB; +def CMOVO16rm : I<0x40, MRMSrcMem, // if overflow, GR16 = [mem16] + (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), + "cmovo\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2), + X86_COND_O, EFLAGS))]>, + TB, OpSize; +def CMOVO32rm : I<0x40, MRMSrcMem, // if overflow, GR32 = [mem32] + (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), + "cmovo\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2), + X86_COND_O, EFLAGS))]>, + TB; +def CMOVNO16rm : I<0x41, MRMSrcMem, // if !overflow, GR16 = [mem16] + (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), + "cmovno\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2), + X86_COND_NO, EFLAGS))]>, + TB, OpSize; +def CMOVNO32rm : I<0x41, MRMSrcMem, // if !overflow, GR32 = [mem32] + (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), + "cmovno\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2), + X86_COND_NO, EFLAGS))]>, + TB; +} // Uses = [EFLAGS] + + +// unary instructions +let CodeSize = 2 in { +let Defs = [EFLAGS] in { +def NEG8r : I<0xF6, MRM3r, (outs GR8 :$dst), (ins GR8 :$src), "neg{b}\t$dst", + [(set GR8:$dst, (ineg GR8:$src)), + (implicit EFLAGS)]>; +def NEG16r : I<0xF7, MRM3r, (outs GR16:$dst), (ins GR16:$src), "neg{w}\t$dst", + [(set GR16:$dst, (ineg GR16:$src)), + (implicit EFLAGS)]>, OpSize; +def NEG32r : I<0xF7, MRM3r, (outs GR32:$dst), (ins GR32:$src), "neg{l}\t$dst", + [(set GR32:$dst, (ineg GR32:$src)), + (implicit EFLAGS)]>; +let isTwoAddress = 0 in { + def NEG8m : I<0xF6, MRM3m, (outs), (ins i8mem :$dst), "neg{b}\t$dst", + [(store (ineg (loadi8 addr:$dst)), addr:$dst), + (implicit EFLAGS)]>; + def NEG16m : I<0xF7, MRM3m, (outs), (ins i16mem:$dst), "neg{w}\t$dst", + [(store (ineg (loadi16 addr:$dst)), addr:$dst), + (implicit EFLAGS)]>, OpSize; + def NEG32m : I<0xF7, MRM3m, (outs), (ins i32mem:$dst), "neg{l}\t$dst", + [(store (ineg (loadi32 addr:$dst)), addr:$dst), + (implicit EFLAGS)]>; +} +} // Defs = [EFLAGS] + +// Match xor -1 to not. Favors these over a move imm + xor to save code size. +let AddedComplexity = 15 in { +def NOT8r : I<0xF6, MRM2r, (outs GR8 :$dst), (ins GR8 :$src), "not{b}\t$dst", + [(set GR8:$dst, (not GR8:$src))]>; +def NOT16r : I<0xF7, MRM2r, (outs GR16:$dst), (ins GR16:$src), "not{w}\t$dst", + [(set GR16:$dst, (not GR16:$src))]>, OpSize; +def NOT32r : I<0xF7, MRM2r, (outs GR32:$dst), (ins GR32:$src), "not{l}\t$dst", + [(set GR32:$dst, (not GR32:$src))]>; +} +let isTwoAddress = 0 in { + def NOT8m : I<0xF6, MRM2m, (outs), (ins i8mem :$dst), "not{b}\t$dst", + [(store (not (loadi8 addr:$dst)), addr:$dst)]>; + def NOT16m : I<0xF7, MRM2m, (outs), (ins i16mem:$dst), "not{w}\t$dst", + [(store (not (loadi16 addr:$dst)), addr:$dst)]>, OpSize; + def NOT32m : I<0xF7, MRM2m, (outs), (ins i32mem:$dst), "not{l}\t$dst", + [(store (not (loadi32 addr:$dst)), addr:$dst)]>; +} +} // CodeSize + +// TODO: inc/dec is slow for P4, but fast for Pentium-M. +let Defs = [EFLAGS] in { +let CodeSize = 2 in +def INC8r : I<0xFE, MRM0r, (outs GR8 :$dst), (ins GR8 :$src), "inc{b}\t$dst", + [(set GR8:$dst, (add GR8:$src, 1)), + (implicit EFLAGS)]>; +let isConvertibleToThreeAddress = 1, CodeSize = 1 in { // Can xform into LEA. +def INC16r : I<0x40, AddRegFrm, (outs GR16:$dst), (ins GR16:$src), "inc{w}\t$dst", + [(set GR16:$dst, (add GR16:$src, 1)), + (implicit EFLAGS)]>, + OpSize, Requires<[In32BitMode]>; +def INC32r : I<0x40, AddRegFrm, (outs GR32:$dst), (ins GR32:$src), "inc{l}\t$dst", + [(set GR32:$dst, (add GR32:$src, 1)), + (implicit EFLAGS)]>, Requires<[In32BitMode]>; +} +let isTwoAddress = 0, CodeSize = 2 in { + def INC8m : I<0xFE, MRM0m, (outs), (ins i8mem :$dst), "inc{b}\t$dst", + [(store (add (loadi8 addr:$dst), 1), addr:$dst), + (implicit EFLAGS)]>; + def INC16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), "inc{w}\t$dst", + [(store (add (loadi16 addr:$dst), 1), addr:$dst), + (implicit EFLAGS)]>, + OpSize, Requires<[In32BitMode]>; + def INC32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), "inc{l}\t$dst", + [(store (add (loadi32 addr:$dst), 1), addr:$dst), + (implicit EFLAGS)]>, + Requires<[In32BitMode]>; +} + +let CodeSize = 2 in +def DEC8r : I<0xFE, MRM1r, (outs GR8 :$dst), (ins GR8 :$src), "dec{b}\t$dst", + [(set GR8:$dst, (add GR8:$src, -1)), + (implicit EFLAGS)]>; +let isConvertibleToThreeAddress = 1, CodeSize = 1 in { // Can xform into LEA. +def DEC16r : I<0x48, AddRegFrm, (outs GR16:$dst), (ins GR16:$src), "dec{w}\t$dst", + [(set GR16:$dst, (add GR16:$src, -1)), + (implicit EFLAGS)]>, + OpSize, Requires<[In32BitMode]>; +def DEC32r : I<0x48, AddRegFrm, (outs GR32:$dst), (ins GR32:$src), "dec{l}\t$dst", + [(set GR32:$dst, (add GR32:$src, -1)), + (implicit EFLAGS)]>, Requires<[In32BitMode]>; +} + +let isTwoAddress = 0, CodeSize = 2 in { + def DEC8m : I<0xFE, MRM1m, (outs), (ins i8mem :$dst), "dec{b}\t$dst", + [(store (add (loadi8 addr:$dst), -1), addr:$dst), + (implicit EFLAGS)]>; + def DEC16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), "dec{w}\t$dst", + [(store (add (loadi16 addr:$dst), -1), addr:$dst), + (implicit EFLAGS)]>, + OpSize, Requires<[In32BitMode]>; + def DEC32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst", + [(store (add (loadi32 addr:$dst), -1), addr:$dst), + (implicit EFLAGS)]>, + Requires<[In32BitMode]>; +} +} // Defs = [EFLAGS] + +// Logical operators... +let Defs = [EFLAGS] in { +let isCommutable = 1 in { // X = AND Y, Z --> X = AND Z, Y +def AND8rr : I<0x20, MRMDestReg, + (outs GR8 :$dst), (ins GR8 :$src1, GR8 :$src2), + "and{b}\t{$src2, $dst|$dst, $src2}", + [(set GR8:$dst, (and GR8:$src1, GR8:$src2)), + (implicit EFLAGS)]>; +def AND16rr : I<0x21, MRMDestReg, + (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), + "and{w}\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, (and GR16:$src1, GR16:$src2)), + (implicit EFLAGS)]>, OpSize; +def AND32rr : I<0x21, MRMDestReg, + (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), + "and{l}\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, (and GR32:$src1, GR32:$src2)), + (implicit EFLAGS)]>; +} + +def AND8rm : I<0x22, MRMSrcMem, + (outs GR8 :$dst), (ins GR8 :$src1, i8mem :$src2), + "and{b}\t{$src2, $dst|$dst, $src2}", + [(set GR8:$dst, (and GR8:$src1, (loadi8 addr:$src2))), + (implicit EFLAGS)]>; +def AND16rm : I<0x23, MRMSrcMem, + (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), + "and{w}\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, (and GR16:$src1, (loadi16 addr:$src2))), + (implicit EFLAGS)]>, OpSize; +def AND32rm : I<0x23, MRMSrcMem, + (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), + "and{l}\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, (and GR32:$src1, (loadi32 addr:$src2))), + (implicit EFLAGS)]>; + +def AND8ri : Ii8<0x80, MRM4r, + (outs GR8 :$dst), (ins GR8 :$src1, i8imm :$src2), + "and{b}\t{$src2, $dst|$dst, $src2}", + [(set GR8:$dst, (and GR8:$src1, imm:$src2)), + (implicit EFLAGS)]>; +def AND16ri : Ii16<0x81, MRM4r, + (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2), + "and{w}\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, (and GR16:$src1, imm:$src2)), + (implicit EFLAGS)]>, OpSize; +def AND32ri : Ii32<0x81, MRM4r, + (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2), + "and{l}\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, (and GR32:$src1, imm:$src2)), + (implicit EFLAGS)]>; +def AND16ri8 : Ii8<0x83, MRM4r, + (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2), + "and{w}\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, (and GR16:$src1, i16immSExt8:$src2)), + (implicit EFLAGS)]>, + OpSize; +def AND32ri8 : Ii8<0x83, MRM4r, + (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2), + "and{l}\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, (and GR32:$src1, i32immSExt8:$src2)), + (implicit EFLAGS)]>; + +let isTwoAddress = 0 in { + def AND8mr : I<0x20, MRMDestMem, + (outs), (ins i8mem :$dst, GR8 :$src), + "and{b}\t{$src, $dst|$dst, $src}", + [(store (and (load addr:$dst), GR8:$src), addr:$dst), + (implicit EFLAGS)]>; + def AND16mr : I<0x21, MRMDestMem, + (outs), (ins i16mem:$dst, GR16:$src), + "and{w}\t{$src, $dst|$dst, $src}", + [(store (and (load addr:$dst), GR16:$src), addr:$dst), + (implicit EFLAGS)]>, + OpSize; + def AND32mr : I<0x21, MRMDestMem, + (outs), (ins i32mem:$dst, GR32:$src), + "and{l}\t{$src, $dst|$dst, $src}", + [(store (and (load addr:$dst), GR32:$src), addr:$dst), + (implicit EFLAGS)]>; + def AND8mi : Ii8<0x80, MRM4m, + (outs), (ins i8mem :$dst, i8imm :$src), + "and{b}\t{$src, $dst|$dst, $src}", + [(store (and (loadi8 addr:$dst), imm:$src), addr:$dst), + (implicit EFLAGS)]>; + def AND16mi : Ii16<0x81, MRM4m, + (outs), (ins i16mem:$dst, i16imm:$src), + "and{w}\t{$src, $dst|$dst, $src}", + [(store (and (loadi16 addr:$dst), imm:$src), addr:$dst), + (implicit EFLAGS)]>, + OpSize; + def AND32mi : Ii32<0x81, MRM4m, + (outs), (ins i32mem:$dst, i32imm:$src), + "and{l}\t{$src, $dst|$dst, $src}", + [(store (and (loadi32 addr:$dst), imm:$src), addr:$dst), + (implicit EFLAGS)]>; + def AND16mi8 : Ii8<0x83, MRM4m, + (outs), (ins i16mem:$dst, i16i8imm :$src), + "and{w}\t{$src, $dst|$dst, $src}", + [(store (and (load addr:$dst), i16immSExt8:$src), addr:$dst), + (implicit EFLAGS)]>, + OpSize; + def AND32mi8 : Ii8<0x83, MRM4m, + (outs), (ins i32mem:$dst, i32i8imm :$src), + "and{l}\t{$src, $dst|$dst, $src}", + [(store (and (load addr:$dst), i32immSExt8:$src), addr:$dst), + (implicit EFLAGS)]>; +} + + +let isCommutable = 1 in { // X = OR Y, Z --> X = OR Z, Y +def OR8rr : I<0x08, MRMDestReg, (outs GR8 :$dst), (ins GR8 :$src1, GR8 :$src2), + "or{b}\t{$src2, $dst|$dst, $src2}", + [(set GR8:$dst, (or GR8:$src1, GR8:$src2)), + (implicit EFLAGS)]>; +def OR16rr : I<0x09, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), + "or{w}\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, (or GR16:$src1, GR16:$src2)), + (implicit EFLAGS)]>, OpSize; +def OR32rr : I<0x09, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), + "or{l}\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, (or GR32:$src1, GR32:$src2)), + (implicit EFLAGS)]>; +} +def OR8rm : I<0x0A, MRMSrcMem , (outs GR8 :$dst), (ins GR8 :$src1, i8mem :$src2), + "or{b}\t{$src2, $dst|$dst, $src2}", + [(set GR8:$dst, (or GR8:$src1, (load addr:$src2))), + (implicit EFLAGS)]>; +def OR16rm : I<0x0B, MRMSrcMem , (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), + "or{w}\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, (or GR16:$src1, (load addr:$src2))), + (implicit EFLAGS)]>, OpSize; +def OR32rm : I<0x0B, MRMSrcMem , (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), + "or{l}\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, (or GR32:$src1, (load addr:$src2))), + (implicit EFLAGS)]>; + +def OR8ri : Ii8 <0x80, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2), + "or{b}\t{$src2, $dst|$dst, $src2}", + [(set GR8:$dst, (or GR8:$src1, imm:$src2)), + (implicit EFLAGS)]>; +def OR16ri : Ii16<0x81, MRM1r, (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2), + "or{w}\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, (or GR16:$src1, imm:$src2)), + (implicit EFLAGS)]>, OpSize; +def OR32ri : Ii32<0x81, MRM1r, (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2), + "or{l}\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, (or GR32:$src1, imm:$src2)), + (implicit EFLAGS)]>; + +def OR16ri8 : Ii8<0x83, MRM1r, (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2), + "or{w}\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, (or GR16:$src1, i16immSExt8:$src2)), + (implicit EFLAGS)]>, OpSize; +def OR32ri8 : Ii8<0x83, MRM1r, (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2), + "or{l}\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, (or GR32:$src1, i32immSExt8:$src2)), + (implicit EFLAGS)]>; +let isTwoAddress = 0 in { + def OR8mr : I<0x08, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src), + "or{b}\t{$src, $dst|$dst, $src}", + [(store (or (load addr:$dst), GR8:$src), addr:$dst), + (implicit EFLAGS)]>; + def OR16mr : I<0x09, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src), + "or{w}\t{$src, $dst|$dst, $src}", + [(store (or (load addr:$dst), GR16:$src), addr:$dst), + (implicit EFLAGS)]>, OpSize; + def OR32mr : I<0x09, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), + "or{l}\t{$src, $dst|$dst, $src}", + [(store (or (load addr:$dst), GR32:$src), addr:$dst), + (implicit EFLAGS)]>; + def OR8mi : Ii8<0x80, MRM1m, (outs), (ins i8mem :$dst, i8imm:$src), + "or{b}\t{$src, $dst|$dst, $src}", + [(store (or (loadi8 addr:$dst), imm:$src), addr:$dst), + (implicit EFLAGS)]>; + def OR16mi : Ii16<0x81, MRM1m, (outs), (ins i16mem:$dst, i16imm:$src), + "or{w}\t{$src, $dst|$dst, $src}", + [(store (or (loadi16 addr:$dst), imm:$src), addr:$dst), + (implicit EFLAGS)]>, + OpSize; + def OR32mi : Ii32<0x81, MRM1m, (outs), (ins i32mem:$dst, i32imm:$src), + "or{l}\t{$src, $dst|$dst, $src}", + [(store (or (loadi32 addr:$dst), imm:$src), addr:$dst), + (implicit EFLAGS)]>; + def OR16mi8 : Ii8<0x83, MRM1m, (outs), (ins i16mem:$dst, i16i8imm:$src), + "or{w}\t{$src, $dst|$dst, $src}", + [(store (or (load addr:$dst), i16immSExt8:$src), addr:$dst), + (implicit EFLAGS)]>, + OpSize; + def OR32mi8 : Ii8<0x83, MRM1m, (outs), (ins i32mem:$dst, i32i8imm:$src), + "or{l}\t{$src, $dst|$dst, $src}", + [(store (or (load addr:$dst), i32immSExt8:$src), addr:$dst), + (implicit EFLAGS)]>; +} // isTwoAddress = 0 + + +let isCommutable = 1 in { // X = XOR Y, Z --> X = XOR Z, Y + def XOR8rr : I<0x30, MRMDestReg, + (outs GR8 :$dst), (ins GR8 :$src1, GR8 :$src2), + "xor{b}\t{$src2, $dst|$dst, $src2}", + [(set GR8:$dst, (xor GR8:$src1, GR8:$src2)), + (implicit EFLAGS)]>; + def XOR16rr : I<0x31, MRMDestReg, + (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), + "xor{w}\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, (xor GR16:$src1, GR16:$src2)), + (implicit EFLAGS)]>, OpSize; + def XOR32rr : I<0x31, MRMDestReg, + (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), + "xor{l}\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, (xor GR32:$src1, GR32:$src2)), + (implicit EFLAGS)]>; +} // isCommutable = 1 + +def XOR8rm : I<0x32, MRMSrcMem , + (outs GR8 :$dst), (ins GR8:$src1, i8mem :$src2), + "xor{b}\t{$src2, $dst|$dst, $src2}", + [(set GR8:$dst, (xor GR8:$src1, (load addr:$src2))), + (implicit EFLAGS)]>; +def XOR16rm : I<0x33, MRMSrcMem , + (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), + "xor{w}\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, (xor GR16:$src1, (load addr:$src2))), + (implicit EFLAGS)]>, + OpSize; +def XOR32rm : I<0x33, MRMSrcMem , + (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), + "xor{l}\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, (xor GR32:$src1, (load addr:$src2))), + (implicit EFLAGS)]>; + +def XOR8ri : Ii8<0x80, MRM6r, + (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2), + "xor{b}\t{$src2, $dst|$dst, $src2}", + [(set GR8:$dst, (xor GR8:$src1, imm:$src2)), + (implicit EFLAGS)]>; +def XOR16ri : Ii16<0x81, MRM6r, + (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2), + "xor{w}\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, (xor GR16:$src1, imm:$src2)), + (implicit EFLAGS)]>, OpSize; +def XOR32ri : Ii32<0x81, MRM6r, + (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2), + "xor{l}\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, (xor GR32:$src1, imm:$src2)), + (implicit EFLAGS)]>; +def XOR16ri8 : Ii8<0x83, MRM6r, + (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2), + "xor{w}\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, (xor GR16:$src1, i16immSExt8:$src2)), + (implicit EFLAGS)]>, + OpSize; +def XOR32ri8 : Ii8<0x83, MRM6r, + (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2), + "xor{l}\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, (xor GR32:$src1, i32immSExt8:$src2)), + (implicit EFLAGS)]>; + +let isTwoAddress = 0 in { + def XOR8mr : I<0x30, MRMDestMem, + (outs), (ins i8mem :$dst, GR8 :$src), + "xor{b}\t{$src, $dst|$dst, $src}", + [(store (xor (load addr:$dst), GR8:$src), addr:$dst), + (implicit EFLAGS)]>; + def XOR16mr : I<0x31, MRMDestMem, + (outs), (ins i16mem:$dst, GR16:$src), + "xor{w}\t{$src, $dst|$dst, $src}", + [(store (xor (load addr:$dst), GR16:$src), addr:$dst), + (implicit EFLAGS)]>, + OpSize; + def XOR32mr : I<0x31, MRMDestMem, + (outs), (ins i32mem:$dst, GR32:$src), + "xor{l}\t{$src, $dst|$dst, $src}", + [(store (xor (load addr:$dst), GR32:$src), addr:$dst), + (implicit EFLAGS)]>; + def XOR8mi : Ii8<0x80, MRM6m, + (outs), (ins i8mem :$dst, i8imm :$src), + "xor{b}\t{$src, $dst|$dst, $src}", + [(store (xor (loadi8 addr:$dst), imm:$src), addr:$dst), + (implicit EFLAGS)]>; + def XOR16mi : Ii16<0x81, MRM6m, + (outs), (ins i16mem:$dst, i16imm:$src), + "xor{w}\t{$src, $dst|$dst, $src}", + [(store (xor (loadi16 addr:$dst), imm:$src), addr:$dst), + (implicit EFLAGS)]>, + OpSize; + def XOR32mi : Ii32<0x81, MRM6m, + (outs), (ins i32mem:$dst, i32imm:$src), + "xor{l}\t{$src, $dst|$dst, $src}", + [(store (xor (loadi32 addr:$dst), imm:$src), addr:$dst), + (implicit EFLAGS)]>; + def XOR16mi8 : Ii8<0x83, MRM6m, + (outs), (ins i16mem:$dst, i16i8imm :$src), + "xor{w}\t{$src, $dst|$dst, $src}", + [(store (xor (load addr:$dst), i16immSExt8:$src), addr:$dst), + (implicit EFLAGS)]>, + OpSize; + def XOR32mi8 : Ii8<0x83, MRM6m, + (outs), (ins i32mem:$dst, i32i8imm :$src), + "xor{l}\t{$src, $dst|$dst, $src}", + [(store (xor (load addr:$dst), i32immSExt8:$src), addr:$dst), + (implicit EFLAGS)]>; +} // isTwoAddress = 0 +} // Defs = [EFLAGS] + +// Shift instructions +let Defs = [EFLAGS] in { +let Uses = [CL] in { +def SHL8rCL : I<0xD2, MRM4r, (outs GR8 :$dst), (ins GR8 :$src), + "shl{b}\t{%cl, $dst|$dst, %CL}", + [(set GR8:$dst, (shl GR8:$src, CL))]>; +def SHL16rCL : I<0xD3, MRM4r, (outs GR16:$dst), (ins GR16:$src), + "shl{w}\t{%cl, $dst|$dst, %CL}", + [(set GR16:$dst, (shl GR16:$src, CL))]>, OpSize; +def SHL32rCL : I<0xD3, MRM4r, (outs GR32:$dst), (ins GR32:$src), + "shl{l}\t{%cl, $dst|$dst, %CL}", + [(set GR32:$dst, (shl GR32:$src, CL))]>; +} // Uses = [CL] + +def SHL8ri : Ii8<0xC0, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2), + "shl{b}\t{$src2, $dst|$dst, $src2}", + [(set GR8:$dst, (shl GR8:$src1, (i8 imm:$src2)))]>; +let isConvertibleToThreeAddress = 1 in { // Can transform into LEA. +def SHL16ri : Ii8<0xC1, MRM4r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2), + "shl{w}\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, (shl GR16:$src1, (i8 imm:$src2)))]>, OpSize; +def SHL32ri : Ii8<0xC1, MRM4r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2), + "shl{l}\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, (shl GR32:$src1, (i8 imm:$src2)))]>; +// NOTE: We don't use shifts of a register by one, because 'add reg,reg' is +// cheaper. +} // isConvertibleToThreeAddress = 1 + +let isTwoAddress = 0 in { + let Uses = [CL] in { + def SHL8mCL : I<0xD2, MRM4m, (outs), (ins i8mem :$dst), + "shl{b}\t{%cl, $dst|$dst, %CL}", + [(store (shl (loadi8 addr:$dst), CL), addr:$dst)]>; + def SHL16mCL : I<0xD3, MRM4m, (outs), (ins i16mem:$dst), + "shl{w}\t{%cl, $dst|$dst, %CL}", + [(store (shl (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize; + def SHL32mCL : I<0xD3, MRM4m, (outs), (ins i32mem:$dst), + "shl{l}\t{%cl, $dst|$dst, %CL}", + [(store (shl (loadi32 addr:$dst), CL), addr:$dst)]>; + } + def SHL8mi : Ii8<0xC0, MRM4m, (outs), (ins i8mem :$dst, i8imm:$src), + "shl{b}\t{$src, $dst|$dst, $src}", + [(store (shl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>; + def SHL16mi : Ii8<0xC1, MRM4m, (outs), (ins i16mem:$dst, i8imm:$src), + "shl{w}\t{$src, $dst|$dst, $src}", + [(store (shl (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>, + OpSize; + def SHL32mi : Ii8<0xC1, MRM4m, (outs), (ins i32mem:$dst, i8imm:$src), + "shl{l}\t{$src, $dst|$dst, $src}", + [(store (shl (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>; + + // Shift by 1 + def SHL8m1 : I<0xD0, MRM4m, (outs), (ins i8mem :$dst), + "shl{b}\t$dst", + [(store (shl (loadi8 addr:$dst), (i8 1)), addr:$dst)]>; + def SHL16m1 : I<0xD1, MRM4m, (outs), (ins i16mem:$dst), + "shl{w}\t$dst", + [(store (shl (loadi16 addr:$dst), (i8 1)), addr:$dst)]>, + OpSize; + def SHL32m1 : I<0xD1, MRM4m, (outs), (ins i32mem:$dst), + "shl{l}\t$dst", + [(store (shl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>; +} + +let Uses = [CL] in { +def SHR8rCL : I<0xD2, MRM5r, (outs GR8 :$dst), (ins GR8 :$src), + "shr{b}\t{%cl, $dst|$dst, %CL}", + [(set GR8:$dst, (srl GR8:$src, CL))]>; +def SHR16rCL : I<0xD3, MRM5r, (outs GR16:$dst), (ins GR16:$src), + "shr{w}\t{%cl, $dst|$dst, %CL}", + [(set GR16:$dst, (srl GR16:$src, CL))]>, OpSize; +def SHR32rCL : I<0xD3, MRM5r, (outs GR32:$dst), (ins GR32:$src), + "shr{l}\t{%cl, $dst|$dst, %CL}", + [(set GR32:$dst, (srl GR32:$src, CL))]>; +} + +def SHR8ri : Ii8<0xC0, MRM5r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2), + "shr{b}\t{$src2, $dst|$dst, $src2}", + [(set GR8:$dst, (srl GR8:$src1, (i8 imm:$src2)))]>; +def SHR16ri : Ii8<0xC1, MRM5r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2), + "shr{w}\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, (srl GR16:$src1, (i8 imm:$src2)))]>, OpSize; +def SHR32ri : Ii8<0xC1, MRM5r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2), + "shr{l}\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, (srl GR32:$src1, (i8 imm:$src2)))]>; + +// Shift by 1 +def SHR8r1 : I<0xD0, MRM5r, (outs GR8:$dst), (ins GR8:$src1), + "shr{b}\t$dst", + [(set GR8:$dst, (srl GR8:$src1, (i8 1)))]>; +def SHR16r1 : I<0xD1, MRM5r, (outs GR16:$dst), (ins GR16:$src1), + "shr{w}\t$dst", + [(set GR16:$dst, (srl GR16:$src1, (i8 1)))]>, OpSize; +def SHR32r1 : I<0xD1, MRM5r, (outs GR32:$dst), (ins GR32:$src1), + "shr{l}\t$dst", + [(set GR32:$dst, (srl GR32:$src1, (i8 1)))]>; + +let isTwoAddress = 0 in { + let Uses = [CL] in { + def SHR8mCL : I<0xD2, MRM5m, (outs), (ins i8mem :$dst), + "shr{b}\t{%cl, $dst|$dst, %CL}", + [(store (srl (loadi8 addr:$dst), CL), addr:$dst)]>; + def SHR16mCL : I<0xD3, MRM5m, (outs), (ins i16mem:$dst), + "shr{w}\t{%cl, $dst|$dst, %CL}", + [(store (srl (loadi16 addr:$dst), CL), addr:$dst)]>, + OpSize; + def SHR32mCL : I<0xD3, MRM5m, (outs), (ins i32mem:$dst), + "shr{l}\t{%cl, $dst|$dst, %CL}", + [(store (srl (loadi32 addr:$dst), CL), addr:$dst)]>; + } + def SHR8mi : Ii8<0xC0, MRM5m, (outs), (ins i8mem :$dst, i8imm:$src), + "shr{b}\t{$src, $dst|$dst, $src}", + [(store (srl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>; + def SHR16mi : Ii8<0xC1, MRM5m, (outs), (ins i16mem:$dst, i8imm:$src), + "shr{w}\t{$src, $dst|$dst, $src}", + [(store (srl (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>, + OpSize; + def SHR32mi : Ii8<0xC1, MRM5m, (outs), (ins i32mem:$dst, i8imm:$src), + "shr{l}\t{$src, $dst|$dst, $src}", + [(store (srl (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>; + + // Shift by 1 + def SHR8m1 : I<0xD0, MRM5m, (outs), (ins i8mem :$dst), + "shr{b}\t$dst", + [(store (srl (loadi8 addr:$dst), (i8 1)), addr:$dst)]>; + def SHR16m1 : I<0xD1, MRM5m, (outs), (ins i16mem:$dst), + "shr{w}\t$dst", + [(store (srl (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,OpSize; + def SHR32m1 : I<0xD1, MRM5m, (outs), (ins i32mem:$dst), + "shr{l}\t$dst", + [(store (srl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>; +} + +let Uses = [CL] in { +def SAR8rCL : I<0xD2, MRM7r, (outs GR8 :$dst), (ins GR8 :$src), + "sar{b}\t{%cl, $dst|$dst, %CL}", + [(set GR8:$dst, (sra GR8:$src, CL))]>; +def SAR16rCL : I<0xD3, MRM7r, (outs GR16:$dst), (ins GR16:$src), + "sar{w}\t{%cl, $dst|$dst, %CL}", + [(set GR16:$dst, (sra GR16:$src, CL))]>, OpSize; +def SAR32rCL : I<0xD3, MRM7r, (outs GR32:$dst), (ins GR32:$src), + "sar{l}\t{%cl, $dst|$dst, %CL}", + [(set GR32:$dst, (sra GR32:$src, CL))]>; +} + +def SAR8ri : Ii8<0xC0, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2), + "sar{b}\t{$src2, $dst|$dst, $src2}", + [(set GR8:$dst, (sra GR8:$src1, (i8 imm:$src2)))]>; +def SAR16ri : Ii8<0xC1, MRM7r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2), + "sar{w}\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, (sra GR16:$src1, (i8 imm:$src2)))]>, + OpSize; +def SAR32ri : Ii8<0xC1, MRM7r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2), + "sar{l}\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, (sra GR32:$src1, (i8 imm:$src2)))]>; + +// Shift by 1 +def SAR8r1 : I<0xD0, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1), + "sar{b}\t$dst", + [(set GR8:$dst, (sra GR8:$src1, (i8 1)))]>; +def SAR16r1 : I<0xD1, MRM7r, (outs GR16:$dst), (ins GR16:$src1), + "sar{w}\t$dst", + [(set GR16:$dst, (sra GR16:$src1, (i8 1)))]>, OpSize; +def SAR32r1 : I<0xD1, MRM7r, (outs GR32:$dst), (ins GR32:$src1), + "sar{l}\t$dst", + [(set GR32:$dst, (sra GR32:$src1, (i8 1)))]>; + +let isTwoAddress = 0 in { + let Uses = [CL] in { + def SAR8mCL : I<0xD2, MRM7m, (outs), (ins i8mem :$dst), + "sar{b}\t{%cl, $dst|$dst, %CL}", + [(store (sra (loadi8 addr:$dst), CL), addr:$dst)]>; + def SAR16mCL : I<0xD3, MRM7m, (outs), (ins i16mem:$dst), + "sar{w}\t{%cl, $dst|$dst, %CL}", + [(store (sra (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize; + def SAR32mCL : I<0xD3, MRM7m, (outs), (ins i32mem:$dst), + "sar{l}\t{%cl, $dst|$dst, %CL}", + [(store (sra (loadi32 addr:$dst), CL), addr:$dst)]>; + } + def SAR8mi : Ii8<0xC0, MRM7m, (outs), (ins i8mem :$dst, i8imm:$src), + "sar{b}\t{$src, $dst|$dst, $src}", + [(store (sra (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>; + def SAR16mi : Ii8<0xC1, MRM7m, (outs), (ins i16mem:$dst, i8imm:$src), + "sar{w}\t{$src, $dst|$dst, $src}", + [(store (sra (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>, + OpSize; + def SAR32mi : Ii8<0xC1, MRM7m, (outs), (ins i32mem:$dst, i8imm:$src), + "sar{l}\t{$src, $dst|$dst, $src}", + [(store (sra (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>; + + // Shift by 1 + def SAR8m1 : I<0xD0, MRM7m, (outs), (ins i8mem :$dst), + "sar{b}\t$dst", + [(store (sra (loadi8 addr:$dst), (i8 1)), addr:$dst)]>; + def SAR16m1 : I<0xD1, MRM7m, (outs), (ins i16mem:$dst), + "sar{w}\t$dst", + [(store (sra (loadi16 addr:$dst), (i8 1)), addr:$dst)]>, + OpSize; + def SAR32m1 : I<0xD1, MRM7m, (outs), (ins i32mem:$dst), + "sar{l}\t$dst", + [(store (sra (loadi32 addr:$dst), (i8 1)), addr:$dst)]>; +} + +// Rotate instructions +// FIXME: provide shorter instructions when imm8 == 1 +let Uses = [CL] in { +def ROL8rCL : I<0xD2, MRM0r, (outs GR8 :$dst), (ins GR8 :$src), + "rol{b}\t{%cl, $dst|$dst, %CL}", + [(set GR8:$dst, (rotl GR8:$src, CL))]>; +def ROL16rCL : I<0xD3, MRM0r, (outs GR16:$dst), (ins GR16:$src), + "rol{w}\t{%cl, $dst|$dst, %CL}", + [(set GR16:$dst, (rotl GR16:$src, CL))]>, OpSize; +def ROL32rCL : I<0xD3, MRM0r, (outs GR32:$dst), (ins GR32:$src), + "rol{l}\t{%cl, $dst|$dst, %CL}", + [(set GR32:$dst, (rotl GR32:$src, CL))]>; +} + +def ROL8ri : Ii8<0xC0, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2), + "rol{b}\t{$src2, $dst|$dst, $src2}", + [(set GR8:$dst, (rotl GR8:$src1, (i8 imm:$src2)))]>; +def ROL16ri : Ii8<0xC1, MRM0r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2), + "rol{w}\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, (rotl GR16:$src1, (i8 imm:$src2)))]>, OpSize; +def ROL32ri : Ii8<0xC1, MRM0r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2), + "rol{l}\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, (rotl GR32:$src1, (i8 imm:$src2)))]>; + +// Rotate by 1 +def ROL8r1 : I<0xD0, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1), + "rol{b}\t$dst", + [(set GR8:$dst, (rotl GR8:$src1, (i8 1)))]>; +def ROL16r1 : I<0xD1, MRM0r, (outs GR16:$dst), (ins GR16:$src1), + "rol{w}\t$dst", + [(set GR16:$dst, (rotl GR16:$src1, (i8 1)))]>, OpSize; +def ROL32r1 : I<0xD1, MRM0r, (outs GR32:$dst), (ins GR32:$src1), + "rol{l}\t$dst", + [(set GR32:$dst, (rotl GR32:$src1, (i8 1)))]>; + +let isTwoAddress = 0 in { + let Uses = [CL] in { + def ROL8mCL : I<0xD2, MRM0m, (outs), (ins i8mem :$dst), + "rol{b}\t{%cl, $dst|$dst, %CL}", + [(store (rotl (loadi8 addr:$dst), CL), addr:$dst)]>; + def ROL16mCL : I<0xD3, MRM0m, (outs), (ins i16mem:$dst), + "rol{w}\t{%cl, $dst|$dst, %CL}", + [(store (rotl (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize; + def ROL32mCL : I<0xD3, MRM0m, (outs), (ins i32mem:$dst), + "rol{l}\t{%cl, $dst|$dst, %CL}", + [(store (rotl (loadi32 addr:$dst), CL), addr:$dst)]>; + } + def ROL8mi : Ii8<0xC0, MRM0m, (outs), (ins i8mem :$dst, i8imm:$src), + "rol{b}\t{$src, $dst|$dst, $src}", + [(store (rotl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>; + def ROL16mi : Ii8<0xC1, MRM0m, (outs), (ins i16mem:$dst, i8imm:$src), + "rol{w}\t{$src, $dst|$dst, $src}", + [(store (rotl (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>, + OpSize; + def ROL32mi : Ii8<0xC1, MRM0m, (outs), (ins i32mem:$dst, i8imm:$src), + "rol{l}\t{$src, $dst|$dst, $src}", + [(store (rotl (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>; + + // Rotate by 1 + def ROL8m1 : I<0xD0, MRM0m, (outs), (ins i8mem :$dst), + "rol{b}\t$dst", + [(store (rotl (loadi8 addr:$dst), (i8 1)), addr:$dst)]>; + def ROL16m1 : I<0xD1, MRM0m, (outs), (ins i16mem:$dst), + "rol{w}\t$dst", + [(store (rotl (loadi16 addr:$dst), (i8 1)), addr:$dst)]>, + OpSize; + def ROL32m1 : I<0xD1, MRM0m, (outs), (ins i32mem:$dst), + "rol{l}\t$dst", + [(store (rotl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>; +} + +let Uses = [CL] in { +def ROR8rCL : I<0xD2, MRM1r, (outs GR8 :$dst), (ins GR8 :$src), + "ror{b}\t{%cl, $dst|$dst, %CL}", + [(set GR8:$dst, (rotr GR8:$src, CL))]>; +def ROR16rCL : I<0xD3, MRM1r, (outs GR16:$dst), (ins GR16:$src), + "ror{w}\t{%cl, $dst|$dst, %CL}", + [(set GR16:$dst, (rotr GR16:$src, CL))]>, OpSize; +def ROR32rCL : I<0xD3, MRM1r, (outs GR32:$dst), (ins GR32:$src), + "ror{l}\t{%cl, $dst|$dst, %CL}", + [(set GR32:$dst, (rotr GR32:$src, CL))]>; +} + +def ROR8ri : Ii8<0xC0, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2), + "ror{b}\t{$src2, $dst|$dst, $src2}", + [(set GR8:$dst, (rotr GR8:$src1, (i8 imm:$src2)))]>; +def ROR16ri : Ii8<0xC1, MRM1r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2), + "ror{w}\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, (rotr GR16:$src1, (i8 imm:$src2)))]>, OpSize; +def ROR32ri : Ii8<0xC1, MRM1r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2), + "ror{l}\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, (rotr GR32:$src1, (i8 imm:$src2)))]>; + +// Rotate by 1 +def ROR8r1 : I<0xD0, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1), + "ror{b}\t$dst", + [(set GR8:$dst, (rotr GR8:$src1, (i8 1)))]>; +def ROR16r1 : I<0xD1, MRM1r, (outs GR16:$dst), (ins GR16:$src1), + "ror{w}\t$dst", + [(set GR16:$dst, (rotr GR16:$src1, (i8 1)))]>, OpSize; +def ROR32r1 : I<0xD1, MRM1r, (outs GR32:$dst), (ins GR32:$src1), + "ror{l}\t$dst", + [(set GR32:$dst, (rotr GR32:$src1, (i8 1)))]>; + +let isTwoAddress = 0 in { + let Uses = [CL] in { + def ROR8mCL : I<0xD2, MRM1m, (outs), (ins i8mem :$dst), + "ror{b}\t{%cl, $dst|$dst, %CL}", + [(store (rotr (loadi8 addr:$dst), CL), addr:$dst)]>; + def ROR16mCL : I<0xD3, MRM1m, (outs), (ins i16mem:$dst), + "ror{w}\t{%cl, $dst|$dst, %CL}", + [(store (rotr (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize; + def ROR32mCL : I<0xD3, MRM1m, (outs), (ins i32mem:$dst), + "ror{l}\t{%cl, $dst|$dst, %CL}", + [(store (rotr (loadi32 addr:$dst), CL), addr:$dst)]>; + } + def ROR8mi : Ii8<0xC0, MRM1m, (outs), (ins i8mem :$dst, i8imm:$src), + "ror{b}\t{$src, $dst|$dst, $src}", + [(store (rotr (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>; + def ROR16mi : Ii8<0xC1, MRM1m, (outs), (ins i16mem:$dst, i8imm:$src), + "ror{w}\t{$src, $dst|$dst, $src}", + [(store (rotr (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>, + OpSize; + def ROR32mi : Ii8<0xC1, MRM1m, (outs), (ins i32mem:$dst, i8imm:$src), + "ror{l}\t{$src, $dst|$dst, $src}", + [(store (rotr (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>; + + // Rotate by 1 + def ROR8m1 : I<0xD0, MRM1m, (outs), (ins i8mem :$dst), + "ror{b}\t$dst", + [(store (rotr (loadi8 addr:$dst), (i8 1)), addr:$dst)]>; + def ROR16m1 : I<0xD1, MRM1m, (outs), (ins i16mem:$dst), + "ror{w}\t$dst", + [(store (rotr (loadi16 addr:$dst), (i8 1)), addr:$dst)]>, + OpSize; + def ROR32m1 : I<0xD1, MRM1m, (outs), (ins i32mem:$dst), + "ror{l}\t$dst", + [(store (rotr (loadi32 addr:$dst), (i8 1)), addr:$dst)]>; +} + + + +// Double shift instructions (generalizations of rotate) +let Uses = [CL] in { +def SHLD32rrCL : I<0xA5, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), + "shld{l}\t{%cl, $src2, $dst|$dst, $src2, %CL}", + [(set GR32:$dst, (X86shld GR32:$src1, GR32:$src2, CL))]>, TB; +def SHRD32rrCL : I<0xAD, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), + "shrd{l}\t{%cl, $src2, $dst|$dst, $src2, %CL}", + [(set GR32:$dst, (X86shrd GR32:$src1, GR32:$src2, CL))]>, TB; +def SHLD16rrCL : I<0xA5, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), + "shld{w}\t{%cl, $src2, $dst|$dst, $src2, %CL}", + [(set GR16:$dst, (X86shld GR16:$src1, GR16:$src2, CL))]>, + TB, OpSize; +def SHRD16rrCL : I<0xAD, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), + "shrd{w}\t{%cl, $src2, $dst|$dst, $src2, %CL}", + [(set GR16:$dst, (X86shrd GR16:$src1, GR16:$src2, CL))]>, + TB, OpSize; +} + +let isCommutable = 1 in { // These instructions commute to each other. +def SHLD32rri8 : Ii8<0xA4, MRMDestReg, + (outs GR32:$dst), (ins GR32:$src1, GR32:$src2, i8imm:$src3), + "shld{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}", + [(set GR32:$dst, (X86shld GR32:$src1, GR32:$src2, + (i8 imm:$src3)))]>, + TB; +def SHRD32rri8 : Ii8<0xAC, MRMDestReg, + (outs GR32:$dst), (ins GR32:$src1, GR32:$src2, i8imm:$src3), + "shrd{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}", + [(set GR32:$dst, (X86shrd GR32:$src1, GR32:$src2, + (i8 imm:$src3)))]>, + TB; +def SHLD16rri8 : Ii8<0xA4, MRMDestReg, + (outs GR16:$dst), (ins GR16:$src1, GR16:$src2, i8imm:$src3), + "shld{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}", + [(set GR16:$dst, (X86shld GR16:$src1, GR16:$src2, + (i8 imm:$src3)))]>, + TB, OpSize; +def SHRD16rri8 : Ii8<0xAC, MRMDestReg, + (outs GR16:$dst), (ins GR16:$src1, GR16:$src2, i8imm:$src3), + "shrd{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}", + [(set GR16:$dst, (X86shrd GR16:$src1, GR16:$src2, + (i8 imm:$src3)))]>, + TB, OpSize; +} + +let isTwoAddress = 0 in { + let Uses = [CL] in { + def SHLD32mrCL : I<0xA5, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2), + "shld{l}\t{%cl, $src2, $dst|$dst, $src2, %CL}", + [(store (X86shld (loadi32 addr:$dst), GR32:$src2, CL), + addr:$dst)]>, TB; + def SHRD32mrCL : I<0xAD, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2), + "shrd{l}\t{%cl, $src2, $dst|$dst, $src2, %CL}", + [(store (X86shrd (loadi32 addr:$dst), GR32:$src2, CL), + addr:$dst)]>, TB; + } + def SHLD32mri8 : Ii8<0xA4, MRMDestMem, + (outs), (ins i32mem:$dst, GR32:$src2, i8imm:$src3), + "shld{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}", + [(store (X86shld (loadi32 addr:$dst), GR32:$src2, + (i8 imm:$src3)), addr:$dst)]>, + TB; + def SHRD32mri8 : Ii8<0xAC, MRMDestMem, + (outs), (ins i32mem:$dst, GR32:$src2, i8imm:$src3), + "shrd{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}", + [(store (X86shrd (loadi32 addr:$dst), GR32:$src2, + (i8 imm:$src3)), addr:$dst)]>, + TB; + + let Uses = [CL] in { + def SHLD16mrCL : I<0xA5, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2), + "shld{w}\t{%cl, $src2, $dst|$dst, $src2, %CL}", + [(store (X86shld (loadi16 addr:$dst), GR16:$src2, CL), + addr:$dst)]>, TB, OpSize; + def SHRD16mrCL : I<0xAD, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2), + "shrd{w}\t{%cl, $src2, $dst|$dst, $src2, %CL}", + [(store (X86shrd (loadi16 addr:$dst), GR16:$src2, CL), + addr:$dst)]>, TB, OpSize; + } + def SHLD16mri8 : Ii8<0xA4, MRMDestMem, + (outs), (ins i16mem:$dst, GR16:$src2, i8imm:$src3), + "shld{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}", + [(store (X86shld (loadi16 addr:$dst), GR16:$src2, + (i8 imm:$src3)), addr:$dst)]>, + TB, OpSize; + def SHRD16mri8 : Ii8<0xAC, MRMDestMem, + (outs), (ins i16mem:$dst, GR16:$src2, i8imm:$src3), + "shrd{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}", + [(store (X86shrd (loadi16 addr:$dst), GR16:$src2, + (i8 imm:$src3)), addr:$dst)]>, + TB, OpSize; +} +} // Defs = [EFLAGS] + + +// Arithmetic. +let Defs = [EFLAGS] in { +let isCommutable = 1 in { // X = ADD Y, Z --> X = ADD Z, Y +// Register-Register Addition +def ADD8rr : I<0x00, MRMDestReg, (outs GR8 :$dst), + (ins GR8 :$src1, GR8 :$src2), + "add{b}\t{$src2, $dst|$dst, $src2}", + [(set GR8:$dst, (add GR8:$src1, GR8:$src2)), + (implicit EFLAGS)]>; + +let isConvertibleToThreeAddress = 1 in { // Can transform into LEA. +// Register-Register Addition +def ADD16rr : I<0x01, MRMDestReg, (outs GR16:$dst), + (ins GR16:$src1, GR16:$src2), + "add{w}\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, (add GR16:$src1, GR16:$src2)), + (implicit EFLAGS)]>, OpSize; +def ADD32rr : I<0x01, MRMDestReg, (outs GR32:$dst), + (ins GR32:$src1, GR32:$src2), + "add{l}\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, (add GR32:$src1, GR32:$src2)), + (implicit EFLAGS)]>; +} // end isConvertibleToThreeAddress +} // end isCommutable + +// Register-Memory Addition +def ADD8rm : I<0x02, MRMSrcMem, (outs GR8 :$dst), + (ins GR8 :$src1, i8mem :$src2), + "add{b}\t{$src2, $dst|$dst, $src2}", + [(set GR8:$dst, (add GR8:$src1, (load addr:$src2))), + (implicit EFLAGS)]>; +def ADD16rm : I<0x03, MRMSrcMem, (outs GR16:$dst), + (ins GR16:$src1, i16mem:$src2), + "add{w}\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, (add GR16:$src1, (load addr:$src2))), + (implicit EFLAGS)]>, OpSize; +def ADD32rm : I<0x03, MRMSrcMem, (outs GR32:$dst), + (ins GR32:$src1, i32mem:$src2), + "add{l}\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, (add GR32:$src1, (load addr:$src2))), + (implicit EFLAGS)]>; + +// Register-Integer Addition +def ADD8ri : Ii8<0x80, MRM0r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2), + "add{b}\t{$src2, $dst|$dst, $src2}", + [(set GR8:$dst, (add GR8:$src1, imm:$src2)), + (implicit EFLAGS)]>; + +let isConvertibleToThreeAddress = 1 in { // Can transform into LEA. +// Register-Integer Addition +def ADD16ri : Ii16<0x81, MRM0r, (outs GR16:$dst), + (ins GR16:$src1, i16imm:$src2), + "add{w}\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, (add GR16:$src1, imm:$src2)), + (implicit EFLAGS)]>, OpSize; +def ADD32ri : Ii32<0x81, MRM0r, (outs GR32:$dst), + (ins GR32:$src1, i32imm:$src2), + "add{l}\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, (add GR32:$src1, imm:$src2)), + (implicit EFLAGS)]>; +def ADD16ri8 : Ii8<0x83, MRM0r, (outs GR16:$dst), + (ins GR16:$src1, i16i8imm:$src2), + "add{w}\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, (add GR16:$src1, i16immSExt8:$src2)), + (implicit EFLAGS)]>, OpSize; +def ADD32ri8 : Ii8<0x83, MRM0r, (outs GR32:$dst), + (ins GR32:$src1, i32i8imm:$src2), + "add{l}\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, (add GR32:$src1, i32immSExt8:$src2)), + (implicit EFLAGS)]>; +} + +let isTwoAddress = 0 in { + // Memory-Register Addition + def ADD8mr : I<0x00, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2), + "add{b}\t{$src2, $dst|$dst, $src2}", + [(store (add (load addr:$dst), GR8:$src2), addr:$dst), + (implicit EFLAGS)]>; + def ADD16mr : I<0x01, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2), + "add{w}\t{$src2, $dst|$dst, $src2}", + [(store (add (load addr:$dst), GR16:$src2), addr:$dst), + (implicit EFLAGS)]>, OpSize; + def ADD32mr : I<0x01, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2), + "add{l}\t{$src2, $dst|$dst, $src2}", + [(store (add (load addr:$dst), GR32:$src2), addr:$dst), + (implicit EFLAGS)]>; + def ADD8mi : Ii8<0x80, MRM0m, (outs), (ins i8mem :$dst, i8imm :$src2), + "add{b}\t{$src2, $dst|$dst, $src2}", + [(store (add (loadi8 addr:$dst), imm:$src2), addr:$dst), + (implicit EFLAGS)]>; + def ADD16mi : Ii16<0x81, MRM0m, (outs), (ins i16mem:$dst, i16imm:$src2), + "add{w}\t{$src2, $dst|$dst, $src2}", + [(store (add (loadi16 addr:$dst), imm:$src2), addr:$dst), + (implicit EFLAGS)]>, OpSize; + def ADD32mi : Ii32<0x81, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src2), + "add{l}\t{$src2, $dst|$dst, $src2}", + [(store (add (loadi32 addr:$dst), imm:$src2), addr:$dst), + (implicit EFLAGS)]>; + def ADD16mi8 : Ii8<0x83, MRM0m, (outs), (ins i16mem:$dst, i16i8imm :$src2), + "add{w}\t{$src2, $dst|$dst, $src2}", + [(store (add (load addr:$dst), i16immSExt8:$src2), + addr:$dst), + (implicit EFLAGS)]>, OpSize; + def ADD32mi8 : Ii8<0x83, MRM0m, (outs), (ins i32mem:$dst, i32i8imm :$src2), + "add{l}\t{$src2, $dst|$dst, $src2}", + [(store (add (load addr:$dst), i32immSExt8:$src2), + addr:$dst), + (implicit EFLAGS)]>; +} + +let Uses = [EFLAGS] in { +let isCommutable = 1 in { // X = ADC Y, Z --> X = ADC Z, Y +def ADC8rr : I<0x10, MRMDestReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2), + "adc{b}\t{$src2, $dst|$dst, $src2}", + [(set GR8:$dst, (adde GR8:$src1, GR8:$src2))]>; +def ADC16rr : I<0x11, MRMDestReg, (outs GR16:$dst), + (ins GR16:$src1, GR16:$src2), + "adc{w}\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, (adde GR16:$src1, GR16:$src2))]>, OpSize; +def ADC32rr : I<0x11, MRMDestReg, (outs GR32:$dst), + (ins GR32:$src1, GR32:$src2), + "adc{l}\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, (adde GR32:$src1, GR32:$src2))]>; +} +def ADC8rm : I<0x12, MRMSrcMem , (outs GR8:$dst), + (ins GR8:$src1, i8mem:$src2), + "adc{b}\t{$src2, $dst|$dst, $src2}", + [(set GR8:$dst, (adde GR8:$src1, (load addr:$src2)))]>; +def ADC16rm : I<0x13, MRMSrcMem , (outs GR16:$dst), + (ins GR16:$src1, i16mem:$src2), + "adc{w}\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, (adde GR16:$src1, (load addr:$src2)))]>, + OpSize; +def ADC32rm : I<0x13, MRMSrcMem , (outs GR32:$dst), + (ins GR32:$src1, i32mem:$src2), + "adc{l}\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, (adde GR32:$src1, (load addr:$src2)))]>; +def ADC8ri : Ii8<0x80, MRM2r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2), + "adc{b}\t{$src2, $dst|$dst, $src2}", + [(set GR8:$dst, (adde GR8:$src1, imm:$src2))]>; +def ADC16ri : Ii16<0x81, MRM2r, (outs GR16:$dst), + (ins GR16:$src1, i16imm:$src2), + "adc{w}\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, (adde GR16:$src1, imm:$src2))]>, OpSize; +def ADC16ri8 : Ii8<0x83, MRM2r, (outs GR16:$dst), + (ins GR16:$src1, i16i8imm:$src2), + "adc{w}\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, (adde GR16:$src1, i16immSExt8:$src2))]>, + OpSize; +def ADC32ri : Ii32<0x81, MRM2r, (outs GR32:$dst), + (ins GR32:$src1, i32imm:$src2), + "adc{l}\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, (adde GR32:$src1, imm:$src2))]>; +def ADC32ri8 : Ii8<0x83, MRM2r, (outs GR32:$dst), + (ins GR32:$src1, i32i8imm:$src2), + "adc{l}\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, (adde GR32:$src1, i32immSExt8:$src2))]>; + +let isTwoAddress = 0 in { + def ADC8mr : I<0x10, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2), + "adc{b}\t{$src2, $dst|$dst, $src2}", + [(store (adde (load addr:$dst), GR8:$src2), addr:$dst)]>; + def ADC16mr : I<0x11, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2), + "adc{w}\t{$src2, $dst|$dst, $src2}", + [(store (adde (load addr:$dst), GR16:$src2), addr:$dst)]>, + OpSize; + def ADC32mr : I<0x11, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2), + "adc{l}\t{$src2, $dst|$dst, $src2}", + [(store (adde (load addr:$dst), GR32:$src2), addr:$dst)]>; + def ADC8mi : Ii8<0x80, MRM2m, (outs), (ins i8mem:$dst, i8imm:$src2), + "adc{b}\t{$src2, $dst|$dst, $src2}", + [(store (adde (loadi8 addr:$dst), imm:$src2), addr:$dst)]>; + def ADC16mi : Ii16<0x81, MRM2m, (outs), (ins i16mem:$dst, i16imm:$src2), + "adc{w}\t{$src2, $dst|$dst, $src2}", + [(store (adde (loadi16 addr:$dst), imm:$src2), addr:$dst)]>, + OpSize; + def ADC16mi8 : Ii8<0x83, MRM2m, (outs), (ins i16mem:$dst, i16i8imm :$src2), + "adc{w}\t{$src2, $dst|$dst, $src2}", + [(store (adde (load addr:$dst), i16immSExt8:$src2), addr:$dst)]>, + OpSize; + def ADC32mi : Ii32<0x81, MRM2m, (outs), (ins i32mem:$dst, i32imm:$src2), + "adc{l}\t{$src2, $dst|$dst, $src2}", + [(store (adde (loadi32 addr:$dst), imm:$src2), addr:$dst)]>; + def ADC32mi8 : Ii8<0x83, MRM2m, (outs), (ins i32mem:$dst, i32i8imm :$src2), + "adc{l}\t{$src2, $dst|$dst, $src2}", + [(store (adde (load addr:$dst), i32immSExt8:$src2), addr:$dst)]>; +} +} // Uses = [EFLAGS] + +// Register-Register Subtraction +def SUB8rr : I<0x28, MRMDestReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2), + "sub{b}\t{$src2, $dst|$dst, $src2}", + [(set GR8:$dst, (sub GR8:$src1, GR8:$src2)), + (implicit EFLAGS)]>; +def SUB16rr : I<0x29, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1,GR16:$src2), + "sub{w}\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, (sub GR16:$src1, GR16:$src2)), + (implicit EFLAGS)]>, OpSize; +def SUB32rr : I<0x29, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1,GR32:$src2), + "sub{l}\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, (sub GR32:$src1, GR32:$src2)), + (implicit EFLAGS)]>; + +// Register-Memory Subtraction +def SUB8rm : I<0x2A, MRMSrcMem, (outs GR8 :$dst), + (ins GR8 :$src1, i8mem :$src2), + "sub{b}\t{$src2, $dst|$dst, $src2}", + [(set GR8:$dst, (sub GR8:$src1, (load addr:$src2))), + (implicit EFLAGS)]>; +def SUB16rm : I<0x2B, MRMSrcMem, (outs GR16:$dst), + (ins GR16:$src1, i16mem:$src2), + "sub{w}\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, (sub GR16:$src1, (load addr:$src2))), + (implicit EFLAGS)]>, OpSize; +def SUB32rm : I<0x2B, MRMSrcMem, (outs GR32:$dst), + (ins GR32:$src1, i32mem:$src2), + "sub{l}\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, (sub GR32:$src1, (load addr:$src2))), + (implicit EFLAGS)]>; + +// Register-Integer Subtraction +def SUB8ri : Ii8 <0x80, MRM5r, (outs GR8:$dst), + (ins GR8:$src1, i8imm:$src2), + "sub{b}\t{$src2, $dst|$dst, $src2}", + [(set GR8:$dst, (sub GR8:$src1, imm:$src2)), + (implicit EFLAGS)]>; +def SUB16ri : Ii16<0x81, MRM5r, (outs GR16:$dst), + (ins GR16:$src1, i16imm:$src2), + "sub{w}\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, (sub GR16:$src1, imm:$src2)), + (implicit EFLAGS)]>, OpSize; +def SUB32ri : Ii32<0x81, MRM5r, (outs GR32:$dst), + (ins GR32:$src1, i32imm:$src2), + "sub{l}\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, (sub GR32:$src1, imm:$src2)), + (implicit EFLAGS)]>; +def SUB16ri8 : Ii8<0x83, MRM5r, (outs GR16:$dst), + (ins GR16:$src1, i16i8imm:$src2), + "sub{w}\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, (sub GR16:$src1, i16immSExt8:$src2)), + (implicit EFLAGS)]>, OpSize; +def SUB32ri8 : Ii8<0x83, MRM5r, (outs GR32:$dst), + (ins GR32:$src1, i32i8imm:$src2), + "sub{l}\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, (sub GR32:$src1, i32immSExt8:$src2)), + (implicit EFLAGS)]>; + +let isTwoAddress = 0 in { + // Memory-Register Subtraction + def SUB8mr : I<0x28, MRMDestMem, (outs), (ins i8mem :$dst, GR8 :$src2), + "sub{b}\t{$src2, $dst|$dst, $src2}", + [(store (sub (load addr:$dst), GR8:$src2), addr:$dst), + (implicit EFLAGS)]>; + def SUB16mr : I<0x29, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2), + "sub{w}\t{$src2, $dst|$dst, $src2}", + [(store (sub (load addr:$dst), GR16:$src2), addr:$dst), + (implicit EFLAGS)]>, OpSize; + def SUB32mr : I<0x29, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2), + "sub{l}\t{$src2, $dst|$dst, $src2}", + [(store (sub (load addr:$dst), GR32:$src2), addr:$dst), + (implicit EFLAGS)]>; + + // Memory-Integer Subtraction + def SUB8mi : Ii8<0x80, MRM5m, (outs), (ins i8mem :$dst, i8imm:$src2), + "sub{b}\t{$src2, $dst|$dst, $src2}", + [(store (sub (loadi8 addr:$dst), imm:$src2), addr:$dst), + (implicit EFLAGS)]>; + def SUB16mi : Ii16<0x81, MRM5m, (outs), (ins i16mem:$dst, i16imm:$src2), + "sub{w}\t{$src2, $dst|$dst, $src2}", + [(store (sub (loadi16 addr:$dst), imm:$src2),addr:$dst), + (implicit EFLAGS)]>, OpSize; + def SUB32mi : Ii32<0x81, MRM5m, (outs), (ins i32mem:$dst, i32imm:$src2), + "sub{l}\t{$src2, $dst|$dst, $src2}", + [(store (sub (loadi32 addr:$dst), imm:$src2),addr:$dst), + (implicit EFLAGS)]>; + def SUB16mi8 : Ii8<0x83, MRM5m, (outs), (ins i16mem:$dst, i16i8imm :$src2), + "sub{w}\t{$src2, $dst|$dst, $src2}", + [(store (sub (load addr:$dst), i16immSExt8:$src2), + addr:$dst), + (implicit EFLAGS)]>, OpSize; + def SUB32mi8 : Ii8<0x83, MRM5m, (outs), (ins i32mem:$dst, i32i8imm :$src2), + "sub{l}\t{$src2, $dst|$dst, $src2}", + [(store (sub (load addr:$dst), i32immSExt8:$src2), + addr:$dst), + (implicit EFLAGS)]>; +} + +let Uses = [EFLAGS] in { +def SBB8rr : I<0x18, MRMDestReg, (outs GR8:$dst), + (ins GR8:$src1, GR8:$src2), + "sbb{b}\t{$src2, $dst|$dst, $src2}", + [(set GR8:$dst, (sube GR8:$src1, GR8:$src2))]>; +def SBB16rr : I<0x19, MRMDestReg, (outs GR16:$dst), + (ins GR16:$src1, GR16:$src2), + "sbb{w}\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, (sube GR16:$src1, GR16:$src2))]>, OpSize; +def SBB32rr : I<0x19, MRMDestReg, (outs GR32:$dst), + (ins GR32:$src1, GR32:$src2), + "sbb{l}\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, (sube GR32:$src1, GR32:$src2))]>; + +let isTwoAddress = 0 in { + def SBB8mr : I<0x18, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2), + "sbb{b}\t{$src2, $dst|$dst, $src2}", + [(store (sube (load addr:$dst), GR8:$src2), addr:$dst)]>; + def SBB16mr : I<0x19, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2), + "sbb{w}\t{$src2, $dst|$dst, $src2}", + [(store (sube (load addr:$dst), GR16:$src2), addr:$dst)]>, + OpSize; + def SBB32mr : I<0x19, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2), + "sbb{l}\t{$src2, $dst|$dst, $src2}", + [(store (sube (load addr:$dst), GR32:$src2), addr:$dst)]>; + def SBB8mi : Ii32<0x80, MRM3m, (outs), (ins i8mem:$dst, i8imm:$src2), + "sbb{b}\t{$src2, $dst|$dst, $src2}", + [(store (sube (loadi8 addr:$dst), imm:$src2), addr:$dst)]>; + def SBB16mi : Ii16<0x81, MRM3m, (outs), (ins i16mem:$dst, i16imm:$src2), + "sbb{w}\t{$src2, $dst|$dst, $src2}", + [(store (sube (loadi16 addr:$dst), imm:$src2), addr:$dst)]>, + OpSize; + def SBB16mi8 : Ii8<0x83, MRM3m, (outs), (ins i16mem:$dst, i16i8imm :$src2), + "sbb{w}\t{$src2, $dst|$dst, $src2}", + [(store (sube (load addr:$dst), i16immSExt8:$src2), addr:$dst)]>, + OpSize; + def SBB32mi : Ii32<0x81, MRM3m, (outs), (ins i32mem:$dst, i32imm:$src2), + "sbb{l}\t{$src2, $dst|$dst, $src2}", + [(store (sube (loadi32 addr:$dst), imm:$src2), addr:$dst)]>; + def SBB32mi8 : Ii8<0x83, MRM3m, (outs), (ins i32mem:$dst, i32i8imm :$src2), + "sbb{l}\t{$src2, $dst|$dst, $src2}", + [(store (sube (load addr:$dst), i32immSExt8:$src2), addr:$dst)]>; +} +def SBB8rm : I<0x1A, MRMSrcMem, (outs GR8:$dst), (ins GR8:$src1, i8mem:$src2), + "sbb{b}\t{$src2, $dst|$dst, $src2}", + [(set GR8:$dst, (sube GR8:$src1, (load addr:$src2)))]>; +def SBB16rm : I<0x1B, MRMSrcMem, (outs GR16:$dst), + (ins GR16:$src1, i16mem:$src2), + "sbb{w}\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, (sube GR16:$src1, (load addr:$src2)))]>, + OpSize; +def SBB32rm : I<0x1B, MRMSrcMem, (outs GR32:$dst), + (ins GR32:$src1, i32mem:$src2), + "sbb{l}\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, (sube GR32:$src1, (load addr:$src2)))]>; +def SBB8ri : Ii8<0x80, MRM3r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2), + "sbb{b}\t{$src2, $dst|$dst, $src2}", + [(set GR8:$dst, (sube GR8:$src1, imm:$src2))]>; +def SBB16ri : Ii16<0x81, MRM3r, (outs GR16:$dst), + (ins GR16:$src1, i16imm:$src2), + "sbb{w}\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, (sube GR16:$src1, imm:$src2))]>, OpSize; +def SBB16ri8 : Ii8<0x83, MRM3r, (outs GR16:$dst), + (ins GR16:$src1, i16i8imm:$src2), + "sbb{w}\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, (sube GR16:$src1, i16immSExt8:$src2))]>, + OpSize; +def SBB32ri : Ii32<0x81, MRM3r, (outs GR32:$dst), + (ins GR32:$src1, i32imm:$src2), + "sbb{l}\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, (sube GR32:$src1, imm:$src2))]>; +def SBB32ri8 : Ii8<0x83, MRM3r, (outs GR32:$dst), + (ins GR32:$src1, i32i8imm:$src2), + "sbb{l}\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, (sube GR32:$src1, i32immSExt8:$src2))]>; +} // Uses = [EFLAGS] +} // Defs = [EFLAGS] + +let Defs = [EFLAGS] in { +let isCommutable = 1 in { // X = IMUL Y, Z --> X = IMUL Z, Y +// Register-Register Signed Integer Multiply +def IMUL16rr : I<0xAF, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1,GR16:$src2), + "imul{w}\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, (mul GR16:$src1, GR16:$src2)), + (implicit EFLAGS)]>, TB, OpSize; +def IMUL32rr : I<0xAF, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1,GR32:$src2), + "imul{l}\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, (mul GR32:$src1, GR32:$src2)), + (implicit EFLAGS)]>, TB; +} + +// Register-Memory Signed Integer Multiply +def IMUL16rm : I<0xAF, MRMSrcMem, (outs GR16:$dst), + (ins GR16:$src1, i16mem:$src2), + "imul{w}\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, (mul GR16:$src1, (load addr:$src2))), + (implicit EFLAGS)]>, TB, OpSize; +def IMUL32rm : I<0xAF, MRMSrcMem, (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), + "imul{l}\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, (mul GR32:$src1, (load addr:$src2))), + (implicit EFLAGS)]>, TB; +} // Defs = [EFLAGS] +} // end Two Address instructions + +// Suprisingly enough, these are not two address instructions! +let Defs = [EFLAGS] in { +// Register-Integer Signed Integer Multiply +def IMUL16rri : Ii16<0x69, MRMSrcReg, // GR16 = GR16*I16 + (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2), + "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set GR16:$dst, (mul GR16:$src1, imm:$src2)), + (implicit EFLAGS)]>, OpSize; +def IMUL32rri : Ii32<0x69, MRMSrcReg, // GR32 = GR32*I32 + (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2), + "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set GR32:$dst, (mul GR32:$src1, imm:$src2)), + (implicit EFLAGS)]>; +def IMUL16rri8 : Ii8<0x6B, MRMSrcReg, // GR16 = GR16*I8 + (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2), + "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set GR16:$dst, (mul GR16:$src1, i16immSExt8:$src2)), + (implicit EFLAGS)]>, OpSize; +def IMUL32rri8 : Ii8<0x6B, MRMSrcReg, // GR32 = GR32*I8 + (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2), + "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set GR32:$dst, (mul GR32:$src1, i32immSExt8:$src2)), + (implicit EFLAGS)]>; + +// Memory-Integer Signed Integer Multiply +def IMUL16rmi : Ii16<0x69, MRMSrcMem, // GR16 = [mem16]*I16 + (outs GR16:$dst), (ins i16mem:$src1, i16imm:$src2), + "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set GR16:$dst, (mul (load addr:$src1), imm:$src2)), + (implicit EFLAGS)]>, OpSize; +def IMUL32rmi : Ii32<0x69, MRMSrcMem, // GR32 = [mem32]*I32 + (outs GR32:$dst), (ins i32mem:$src1, i32imm:$src2), + "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set GR32:$dst, (mul (load addr:$src1), imm:$src2)), + (implicit EFLAGS)]>; +def IMUL16rmi8 : Ii8<0x6B, MRMSrcMem, // GR16 = [mem16]*I8 + (outs GR16:$dst), (ins i16mem:$src1, i16i8imm :$src2), + "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set GR16:$dst, (mul (load addr:$src1), + i16immSExt8:$src2)), + (implicit EFLAGS)]>, OpSize; +def IMUL32rmi8 : Ii8<0x6B, MRMSrcMem, // GR32 = [mem32]*I8 + (outs GR32:$dst), (ins i32mem:$src1, i32i8imm: $src2), + "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set GR32:$dst, (mul (load addr:$src1), + i32immSExt8:$src2)), + (implicit EFLAGS)]>; +} // Defs = [EFLAGS] + +//===----------------------------------------------------------------------===// +// Test instructions are just like AND, except they don't generate a result. +// +let Defs = [EFLAGS] in { +let isCommutable = 1 in { // TEST X, Y --> TEST Y, X +def TEST8rr : I<0x84, MRMDestReg, (outs), (ins GR8:$src1, GR8:$src2), + "test{b}\t{$src2, $src1|$src1, $src2}", + [(X86cmp (and_su GR8:$src1, GR8:$src2), 0), + (implicit EFLAGS)]>; +def TEST16rr : I<0x85, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2), + "test{w}\t{$src2, $src1|$src1, $src2}", + [(X86cmp (and_su GR16:$src1, GR16:$src2), 0), + (implicit EFLAGS)]>, + OpSize; +def TEST32rr : I<0x85, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2), + "test{l}\t{$src2, $src1|$src1, $src2}", + [(X86cmp (and_su GR32:$src1, GR32:$src2), 0), + (implicit EFLAGS)]>; +} + +def TEST8rm : I<0x84, MRMSrcMem, (outs), (ins GR8 :$src1, i8mem :$src2), + "test{b}\t{$src2, $src1|$src1, $src2}", + [(X86cmp (and GR8:$src1, (loadi8 addr:$src2)), 0), + (implicit EFLAGS)]>; +def TEST16rm : I<0x85, MRMSrcMem, (outs), (ins GR16:$src1, i16mem:$src2), + "test{w}\t{$src2, $src1|$src1, $src2}", + [(X86cmp (and GR16:$src1, (loadi16 addr:$src2)), 0), + (implicit EFLAGS)]>, OpSize; +def TEST32rm : I<0x85, MRMSrcMem, (outs), (ins GR32:$src1, i32mem:$src2), + "test{l}\t{$src2, $src1|$src1, $src2}", + [(X86cmp (and GR32:$src1, (loadi32 addr:$src2)), 0), + (implicit EFLAGS)]>; + +def TEST8ri : Ii8 <0xF6, MRM0r, // flags = GR8 & imm8 + (outs), (ins GR8:$src1, i8imm:$src2), + "test{b}\t{$src2, $src1|$src1, $src2}", + [(X86cmp (and_su GR8:$src1, imm:$src2), 0), + (implicit EFLAGS)]>; +def TEST16ri : Ii16<0xF7, MRM0r, // flags = GR16 & imm16 + (outs), (ins GR16:$src1, i16imm:$src2), + "test{w}\t{$src2, $src1|$src1, $src2}", + [(X86cmp (and_su GR16:$src1, imm:$src2), 0), + (implicit EFLAGS)]>, OpSize; +def TEST32ri : Ii32<0xF7, MRM0r, // flags = GR32 & imm32 + (outs), (ins GR32:$src1, i32imm:$src2), + "test{l}\t{$src2, $src1|$src1, $src2}", + [(X86cmp (and_su GR32:$src1, imm:$src2), 0), + (implicit EFLAGS)]>; + +def TEST8mi : Ii8 <0xF6, MRM0m, // flags = [mem8] & imm8 + (outs), (ins i8mem:$src1, i8imm:$src2), + "test{b}\t{$src2, $src1|$src1, $src2}", + [(X86cmp (and (loadi8 addr:$src1), imm:$src2), 0), + (implicit EFLAGS)]>; +def TEST16mi : Ii16<0xF7, MRM0m, // flags = [mem16] & imm16 + (outs), (ins i16mem:$src1, i16imm:$src2), + "test{w}\t{$src2, $src1|$src1, $src2}", + [(X86cmp (and (loadi16 addr:$src1), imm:$src2), 0), + (implicit EFLAGS)]>, OpSize; +def TEST32mi : Ii32<0xF7, MRM0m, // flags = [mem32] & imm32 + (outs), (ins i32mem:$src1, i32imm:$src2), + "test{l}\t{$src2, $src1|$src1, $src2}", + [(X86cmp (and (loadi32 addr:$src1), imm:$src2), 0), + (implicit EFLAGS)]>; +} // Defs = [EFLAGS] + + +// Condition code ops, incl. set if equal/not equal/... +let Defs = [EFLAGS], Uses = [AH], neverHasSideEffects = 1 in +def SAHF : I<0x9E, RawFrm, (outs), (ins), "sahf", []>; // flags = AH +let Defs = [AH], Uses = [EFLAGS], neverHasSideEffects = 1 in +def LAHF : I<0x9F, RawFrm, (outs), (ins), "lahf", []>; // AH = flags + +let Uses = [EFLAGS] in { +def SETEr : I<0x94, MRM0r, + (outs GR8 :$dst), (ins), + "sete\t$dst", + [(set GR8:$dst, (X86setcc X86_COND_E, EFLAGS))]>, + TB; // GR8 = == +def SETEm : I<0x94, MRM0m, + (outs), (ins i8mem:$dst), + "sete\t$dst", + [(store (X86setcc X86_COND_E, EFLAGS), addr:$dst)]>, + TB; // [mem8] = == + +def SETNEr : I<0x95, MRM0r, + (outs GR8 :$dst), (ins), + "setne\t$dst", + [(set GR8:$dst, (X86setcc X86_COND_NE, EFLAGS))]>, + TB; // GR8 = != +def SETNEm : I<0x95, MRM0m, + (outs), (ins i8mem:$dst), + "setne\t$dst", + [(store (X86setcc X86_COND_NE, EFLAGS), addr:$dst)]>, + TB; // [mem8] = != + +def SETLr : I<0x9C, MRM0r, + (outs GR8 :$dst), (ins), + "setl\t$dst", + [(set GR8:$dst, (X86setcc X86_COND_L, EFLAGS))]>, + TB; // GR8 = < signed +def SETLm : I<0x9C, MRM0m, + (outs), (ins i8mem:$dst), + "setl\t$dst", + [(store (X86setcc X86_COND_L, EFLAGS), addr:$dst)]>, + TB; // [mem8] = < signed + +def SETGEr : I<0x9D, MRM0r, + (outs GR8 :$dst), (ins), + "setge\t$dst", + [(set GR8:$dst, (X86setcc X86_COND_GE, EFLAGS))]>, + TB; // GR8 = >= signed +def SETGEm : I<0x9D, MRM0m, + (outs), (ins i8mem:$dst), + "setge\t$dst", + [(store (X86setcc X86_COND_GE, EFLAGS), addr:$dst)]>, + TB; // [mem8] = >= signed + +def SETLEr : I<0x9E, MRM0r, + (outs GR8 :$dst), (ins), + "setle\t$dst", + [(set GR8:$dst, (X86setcc X86_COND_LE, EFLAGS))]>, + TB; // GR8 = <= signed +def SETLEm : I<0x9E, MRM0m, + (outs), (ins i8mem:$dst), + "setle\t$dst", + [(store (X86setcc X86_COND_LE, EFLAGS), addr:$dst)]>, + TB; // [mem8] = <= signed + +def SETGr : I<0x9F, MRM0r, + (outs GR8 :$dst), (ins), + "setg\t$dst", + [(set GR8:$dst, (X86setcc X86_COND_G, EFLAGS))]>, + TB; // GR8 = > signed +def SETGm : I<0x9F, MRM0m, + (outs), (ins i8mem:$dst), + "setg\t$dst", + [(store (X86setcc X86_COND_G, EFLAGS), addr:$dst)]>, + TB; // [mem8] = > signed + +def SETBr : I<0x92, MRM0r, + (outs GR8 :$dst), (ins), + "setb\t$dst", + [(set GR8:$dst, (X86setcc X86_COND_B, EFLAGS))]>, + TB; // GR8 = < unsign +def SETBm : I<0x92, MRM0m, + (outs), (ins i8mem:$dst), + "setb\t$dst", + [(store (X86setcc X86_COND_B, EFLAGS), addr:$dst)]>, + TB; // [mem8] = < unsign + +def SETAEr : I<0x93, MRM0r, + (outs GR8 :$dst), (ins), + "setae\t$dst", + [(set GR8:$dst, (X86setcc X86_COND_AE, EFLAGS))]>, + TB; // GR8 = >= unsign +def SETAEm : I<0x93, MRM0m, + (outs), (ins i8mem:$dst), + "setae\t$dst", + [(store (X86setcc X86_COND_AE, EFLAGS), addr:$dst)]>, + TB; // [mem8] = >= unsign + +def SETBEr : I<0x96, MRM0r, + (outs GR8 :$dst), (ins), + "setbe\t$dst", + [(set GR8:$dst, (X86setcc X86_COND_BE, EFLAGS))]>, + TB; // GR8 = <= unsign +def SETBEm : I<0x96, MRM0m, + (outs), (ins i8mem:$dst), + "setbe\t$dst", + [(store (X86setcc X86_COND_BE, EFLAGS), addr:$dst)]>, + TB; // [mem8] = <= unsign + +def SETAr : I<0x97, MRM0r, + (outs GR8 :$dst), (ins), + "seta\t$dst", + [(set GR8:$dst, (X86setcc X86_COND_A, EFLAGS))]>, + TB; // GR8 = > signed +def SETAm : I<0x97, MRM0m, + (outs), (ins i8mem:$dst), + "seta\t$dst", + [(store (X86setcc X86_COND_A, EFLAGS), addr:$dst)]>, + TB; // [mem8] = > signed + +def SETSr : I<0x98, MRM0r, + (outs GR8 :$dst), (ins), + "sets\t$dst", + [(set GR8:$dst, (X86setcc X86_COND_S, EFLAGS))]>, + TB; // GR8 = +def SETSm : I<0x98, MRM0m, + (outs), (ins i8mem:$dst), + "sets\t$dst", + [(store (X86setcc X86_COND_S, EFLAGS), addr:$dst)]>, + TB; // [mem8] = +def SETNSr : I<0x99, MRM0r, + (outs GR8 :$dst), (ins), + "setns\t$dst", + [(set GR8:$dst, (X86setcc X86_COND_NS, EFLAGS))]>, + TB; // GR8 = ! +def SETNSm : I<0x99, MRM0m, + (outs), (ins i8mem:$dst), + "setns\t$dst", + [(store (X86setcc X86_COND_NS, EFLAGS), addr:$dst)]>, + TB; // [mem8] = ! + +def SETPr : I<0x9A, MRM0r, + (outs GR8 :$dst), (ins), + "setp\t$dst", + [(set GR8:$dst, (X86setcc X86_COND_P, EFLAGS))]>, + TB; // GR8 = parity +def SETPm : I<0x9A, MRM0m, + (outs), (ins i8mem:$dst), + "setp\t$dst", + [(store (X86setcc X86_COND_P, EFLAGS), addr:$dst)]>, + TB; // [mem8] = parity +def SETNPr : I<0x9B, MRM0r, + (outs GR8 :$dst), (ins), + "setnp\t$dst", + [(set GR8:$dst, (X86setcc X86_COND_NP, EFLAGS))]>, + TB; // GR8 = not parity +def SETNPm : I<0x9B, MRM0m, + (outs), (ins i8mem:$dst), + "setnp\t$dst", + [(store (X86setcc X86_COND_NP, EFLAGS), addr:$dst)]>, + TB; // [mem8] = not parity + +def SETOr : I<0x90, MRM0r, + (outs GR8 :$dst), (ins), + "seto\t$dst", + [(set GR8:$dst, (X86setcc X86_COND_O, EFLAGS))]>, + TB; // GR8 = overflow +def SETOm : I<0x90, MRM0m, + (outs), (ins i8mem:$dst), + "seto\t$dst", + [(store (X86setcc X86_COND_O, EFLAGS), addr:$dst)]>, + TB; // [mem8] = overflow +def SETNOr : I<0x91, MRM0r, + (outs GR8 :$dst), (ins), + "setno\t$dst", + [(set GR8:$dst, (X86setcc X86_COND_NO, EFLAGS))]>, + TB; // GR8 = not overflow +def SETNOm : I<0x91, MRM0m, + (outs), (ins i8mem:$dst), + "setno\t$dst", + [(store (X86setcc X86_COND_NO, EFLAGS), addr:$dst)]>, + TB; // [mem8] = not overflow +} // Uses = [EFLAGS] + + +// Integer comparisons +let Defs = [EFLAGS] in { +def CMP8rr : I<0x38, MRMDestReg, + (outs), (ins GR8 :$src1, GR8 :$src2), + "cmp{b}\t{$src2, $src1|$src1, $src2}", + [(X86cmp GR8:$src1, GR8:$src2), (implicit EFLAGS)]>; +def CMP16rr : I<0x39, MRMDestReg, + (outs), (ins GR16:$src1, GR16:$src2), + "cmp{w}\t{$src2, $src1|$src1, $src2}", + [(X86cmp GR16:$src1, GR16:$src2), (implicit EFLAGS)]>, OpSize; +def CMP32rr : I<0x39, MRMDestReg, + (outs), (ins GR32:$src1, GR32:$src2), + "cmp{l}\t{$src2, $src1|$src1, $src2}", + [(X86cmp GR32:$src1, GR32:$src2), (implicit EFLAGS)]>; +def CMP8mr : I<0x38, MRMDestMem, + (outs), (ins i8mem :$src1, GR8 :$src2), + "cmp{b}\t{$src2, $src1|$src1, $src2}", + [(X86cmp (loadi8 addr:$src1), GR8:$src2), + (implicit EFLAGS)]>; +def CMP16mr : I<0x39, MRMDestMem, + (outs), (ins i16mem:$src1, GR16:$src2), + "cmp{w}\t{$src2, $src1|$src1, $src2}", + [(X86cmp (loadi16 addr:$src1), GR16:$src2), + (implicit EFLAGS)]>, OpSize; +def CMP32mr : I<0x39, MRMDestMem, + (outs), (ins i32mem:$src1, GR32:$src2), + "cmp{l}\t{$src2, $src1|$src1, $src2}", + [(X86cmp (loadi32 addr:$src1), GR32:$src2), + (implicit EFLAGS)]>; +def CMP8rm : I<0x3A, MRMSrcMem, + (outs), (ins GR8 :$src1, i8mem :$src2), + "cmp{b}\t{$src2, $src1|$src1, $src2}", + [(X86cmp GR8:$src1, (loadi8 addr:$src2)), + (implicit EFLAGS)]>; +def CMP16rm : I<0x3B, MRMSrcMem, + (outs), (ins GR16:$src1, i16mem:$src2), + "cmp{w}\t{$src2, $src1|$src1, $src2}", + [(X86cmp GR16:$src1, (loadi16 addr:$src2)), + (implicit EFLAGS)]>, OpSize; +def CMP32rm : I<0x3B, MRMSrcMem, + (outs), (ins GR32:$src1, i32mem:$src2), + "cmp{l}\t{$src2, $src1|$src1, $src2}", + [(X86cmp GR32:$src1, (loadi32 addr:$src2)), + (implicit EFLAGS)]>; +def CMP8ri : Ii8<0x80, MRM7r, + (outs), (ins GR8:$src1, i8imm:$src2), + "cmp{b}\t{$src2, $src1|$src1, $src2}", + [(X86cmp GR8:$src1, imm:$src2), (implicit EFLAGS)]>; +def CMP16ri : Ii16<0x81, MRM7r, + (outs), (ins GR16:$src1, i16imm:$src2), + "cmp{w}\t{$src2, $src1|$src1, $src2}", + [(X86cmp GR16:$src1, imm:$src2), + (implicit EFLAGS)]>, OpSize; +def CMP32ri : Ii32<0x81, MRM7r, + (outs), (ins GR32:$src1, i32imm:$src2), + "cmp{l}\t{$src2, $src1|$src1, $src2}", + [(X86cmp GR32:$src1, imm:$src2), (implicit EFLAGS)]>; +def CMP8mi : Ii8 <0x80, MRM7m, + (outs), (ins i8mem :$src1, i8imm :$src2), + "cmp{b}\t{$src2, $src1|$src1, $src2}", + [(X86cmp (loadi8 addr:$src1), imm:$src2), + (implicit EFLAGS)]>; +def CMP16mi : Ii16<0x81, MRM7m, + (outs), (ins i16mem:$src1, i16imm:$src2), + "cmp{w}\t{$src2, $src1|$src1, $src2}", + [(X86cmp (loadi16 addr:$src1), imm:$src2), + (implicit EFLAGS)]>, OpSize; +def CMP32mi : Ii32<0x81, MRM7m, + (outs), (ins i32mem:$src1, i32imm:$src2), + "cmp{l}\t{$src2, $src1|$src1, $src2}", + [(X86cmp (loadi32 addr:$src1), imm:$src2), + (implicit EFLAGS)]>; +def CMP16ri8 : Ii8<0x83, MRM7r, + (outs), (ins GR16:$src1, i16i8imm:$src2), + "cmp{w}\t{$src2, $src1|$src1, $src2}", + [(X86cmp GR16:$src1, i16immSExt8:$src2), + (implicit EFLAGS)]>, OpSize; +def CMP16mi8 : Ii8<0x83, MRM7m, + (outs), (ins i16mem:$src1, i16i8imm:$src2), + "cmp{w}\t{$src2, $src1|$src1, $src2}", + [(X86cmp (loadi16 addr:$src1), i16immSExt8:$src2), + (implicit EFLAGS)]>, OpSize; +def CMP32mi8 : Ii8<0x83, MRM7m, + (outs), (ins i32mem:$src1, i32i8imm:$src2), + "cmp{l}\t{$src2, $src1|$src1, $src2}", + [(X86cmp (loadi32 addr:$src1), i32immSExt8:$src2), + (implicit EFLAGS)]>; +def CMP32ri8 : Ii8<0x83, MRM7r, + (outs), (ins GR32:$src1, i32i8imm:$src2), + "cmp{l}\t{$src2, $src1|$src1, $src2}", + [(X86cmp GR32:$src1, i32immSExt8:$src2), + (implicit EFLAGS)]>; +} // Defs = [EFLAGS] + +// Bit tests. +// TODO: BTC, BTR, and BTS +let Defs = [EFLAGS] in { +def BT16rr : I<0xA3, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2), + "bt{w}\t{$src2, $src1|$src1, $src2}", + [(X86bt GR16:$src1, GR16:$src2), + (implicit EFLAGS)]>, OpSize, TB; +def BT32rr : I<0xA3, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2), + "bt{l}\t{$src2, $src1|$src1, $src2}", + [(X86bt GR32:$src1, GR32:$src2), + (implicit EFLAGS)]>, TB; + +// Unlike with the register+register form, the memory+register form of the +// bt instruction does not ignore the high bits of the index. From ISel's +// perspective, this is pretty bizarre. Disable these instructions for now. +//def BT16mr : I<0xA3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2), +// "bt{w}\t{$src2, $src1|$src1, $src2}", +// [(X86bt (loadi16 addr:$src1), GR16:$src2), +// (implicit EFLAGS)]>, OpSize, TB, Requires<[FastBTMem]>; +//def BT32mr : I<0xA3, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2), +// "bt{l}\t{$src2, $src1|$src1, $src2}", +// [(X86bt (loadi32 addr:$src1), GR32:$src2), +// (implicit EFLAGS)]>, TB, Requires<[FastBTMem]>; + +def BT16ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR16:$src1, i16i8imm:$src2), + "bt{w}\t{$src2, $src1|$src1, $src2}", + [(X86bt GR16:$src1, i16immSExt8:$src2), + (implicit EFLAGS)]>, OpSize, TB; +def BT32ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR32:$src1, i32i8imm:$src2), + "bt{l}\t{$src2, $src1|$src1, $src2}", + [(X86bt GR32:$src1, i32immSExt8:$src2), + (implicit EFLAGS)]>, TB; +// Note that these instructions don't need FastBTMem because that +// only applies when the other operand is in a register. When it's +// an immediate, bt is still fast. +def BT16mi8 : Ii8<0xBA, MRM4m, (outs), (ins i16mem:$src1, i16i8imm:$src2), + "bt{w}\t{$src2, $src1|$src1, $src2}", + [(X86bt (loadi16 addr:$src1), i16immSExt8:$src2), + (implicit EFLAGS)]>, OpSize, TB; +def BT32mi8 : Ii8<0xBA, MRM4m, (outs), (ins i32mem:$src1, i32i8imm:$src2), + "bt{l}\t{$src2, $src1|$src1, $src2}", + [(X86bt (loadi32 addr:$src1), i32immSExt8:$src2), + (implicit EFLAGS)]>, TB; +} // Defs = [EFLAGS] + +// Sign/Zero extenders +// Use movsbl intead of movsbw; we don't care about the high 16 bits +// of the register here. This has a smaller encoding and avoids a +// partial-register update. +def MOVSX16rr8 : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8 :$src), + "movs{bl|x}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}", + [(set GR16:$dst, (sext GR8:$src))]>, TB; +def MOVSX16rm8 : I<0xBE, MRMSrcMem, (outs GR16:$dst), (ins i8mem :$src), + "movs{bl|x}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}", + [(set GR16:$dst, (sextloadi16i8 addr:$src))]>, TB; +def MOVSX32rr8 : I<0xBE, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src), + "movs{bl|x}\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (sext GR8:$src))]>, TB; +def MOVSX32rm8 : I<0xBE, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src), + "movs{bl|x}\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (sextloadi32i8 addr:$src))]>, TB; +def MOVSX32rr16: I<0xBF, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src), + "movs{wl|x}\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (sext GR16:$src))]>, TB; +def MOVSX32rm16: I<0xBF, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src), + "movs{wl|x}\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (sextloadi32i16 addr:$src))]>, TB; + +// Use movzbl intead of movzbw; we don't care about the high 16 bits +// of the register here. This has a smaller encoding and avoids a +// partial-register update. +def MOVZX16rr8 : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8 :$src), + "movz{bl|x}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}", + [(set GR16:$dst, (zext GR8:$src))]>, TB; +def MOVZX16rm8 : I<0xB6, MRMSrcMem, (outs GR16:$dst), (ins i8mem :$src), + "movz{bl|x}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}", + [(set GR16:$dst, (zextloadi16i8 addr:$src))]>, TB; +def MOVZX32rr8 : I<0xB6, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src), + "movz{bl|x}\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (zext GR8:$src))]>, TB; +def MOVZX32rm8 : I<0xB6, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src), + "movz{bl|x}\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (zextloadi32i8 addr:$src))]>, TB; +def MOVZX32rr16: I<0xB7, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src), + "movz{wl|x}\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (zext GR16:$src))]>, TB; +def MOVZX32rm16: I<0xB7, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src), + "movz{wl|x}\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (zextloadi32i16 addr:$src))]>, TB; + +// These are the same as the regular regular MOVZX32rr8 and MOVZX32rm8 +// except that they use GR32_NOREX for the output operand register class +// instead of GR32. This allows them to operate on h registers on x86-64. +def MOVZX32_NOREXrr8 : I<0xB6, MRMSrcReg, + (outs GR32_NOREX:$dst), (ins GR8:$src), + "movz{bl|x}\t{$src, $dst|$dst, $src} # NOREX", + []>, TB; +let mayLoad = 1 in +def MOVZX32_NOREXrm8 : I<0xB6, MRMSrcMem, + (outs GR32_NOREX:$dst), (ins i8mem:$src), + "movz{bl|x}\t{$src, $dst|$dst, $src} # NOREX", + []>, TB; + +let neverHasSideEffects = 1 in { + let Defs = [AX], Uses = [AL] in + def CBW : I<0x98, RawFrm, (outs), (ins), + "{cbtw|cbw}", []>, OpSize; // AX = signext(AL) + let Defs = [EAX], Uses = [AX] in + def CWDE : I<0x98, RawFrm, (outs), (ins), + "{cwtl|cwde}", []>; // EAX = signext(AX) + + let Defs = [AX,DX], Uses = [AX] in + def CWD : I<0x99, RawFrm, (outs), (ins), + "{cwtd|cwd}", []>, OpSize; // DX:AX = signext(AX) + let Defs = [EAX,EDX], Uses = [EAX] in + def CDQ : I<0x99, RawFrm, (outs), (ins), + "{cltd|cdq}", []>; // EDX:EAX = signext(EAX) +} + +//===----------------------------------------------------------------------===// +// Alias Instructions +//===----------------------------------------------------------------------===// + +// Alias instructions that map movr0 to xor. +// FIXME: remove when we can teach regalloc that xor reg, reg is ok. +let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1 in { +def MOV8r0 : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins), + "xor{b}\t$dst, $dst", + [(set GR8:$dst, 0)]>; +// Use xorl instead of xorw since we don't care about the high 16 bits, +// it's smaller, and it avoids a partial-register update. +def MOV16r0 : I<0x31, MRMInitReg, (outs GR16:$dst), (ins), + "xor{l}\t${dst:subreg32}, ${dst:subreg32}", + [(set GR16:$dst, 0)]>; +def MOV32r0 : I<0x31, MRMInitReg, (outs GR32:$dst), (ins), + "xor{l}\t$dst, $dst", + [(set GR32:$dst, 0)]>; +} + +//===----------------------------------------------------------------------===// +// Thread Local Storage Instructions +// + +// All calls clobber the non-callee saved registers. ESP is marked as +// a use to prevent stack-pointer assignments that appear immediately +// before calls from potentially appearing dead. +let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, + MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, + XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, + XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS], + Uses = [ESP, EBX] in +def TLS_addr32 : I<0, Pseudo, (outs), (ins i32imm:$sym), + "leal\t${sym:mem}(,%ebx,1), %eax; " + "call\t___tls_get_addr@PLT", + [(X86tlsaddr tglobaltlsaddr:$sym)]>, + Requires<[In32BitMode]>; + +let AddedComplexity = 5 in +def GS_MOV32rm : I<0x8B, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), + "movl\t%gs:$src, $dst", + [(set GR32:$dst, (gsload addr:$src))]>, SegGS; + +let AddedComplexity = 5 in +def FS_MOV32rm : I<0x8B, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), + "movl\t%fs:$src, $dst", + [(set GR32:$dst, (fsload addr:$src))]>, SegFS; + +//===----------------------------------------------------------------------===// +// DWARF Pseudo Instructions +// + +def DWARF_LOC : I<0, Pseudo, (outs), + (ins i32imm:$line, i32imm:$col, i32imm:$file), + ".loc\t${file:debug} ${line:debug} ${col:debug}", + [(dwarf_loc (i32 imm:$line), (i32 imm:$col), + (i32 imm:$file))]>; + +//===----------------------------------------------------------------------===// +// EH Pseudo Instructions +// +let isTerminator = 1, isReturn = 1, isBarrier = 1, + hasCtrlDep = 1 in { +def EH_RETURN : I<0xC3, RawFrm, (outs), (ins GR32:$addr), + "ret\t#eh_return, addr: $addr", + [(X86ehret GR32:$addr)]>; + +} + +//===----------------------------------------------------------------------===// +// Atomic support +// + +// Atomic swap. These are just normal xchg instructions. But since a memory +// operand is referenced, the atomicity is ensured. +let Constraints = "$val = $dst" in { +def XCHG32rm : I<0x87, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$ptr, GR32:$val), + "xchg{l}\t{$val, $ptr|$ptr, $val}", + [(set GR32:$dst, (atomic_swap_32 addr:$ptr, GR32:$val))]>; +def XCHG16rm : I<0x87, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$ptr, GR16:$val), + "xchg{w}\t{$val, $ptr|$ptr, $val}", + [(set GR16:$dst, (atomic_swap_16 addr:$ptr, GR16:$val))]>, + OpSize; +def XCHG8rm : I<0x86, MRMSrcMem, (outs GR8:$dst), (ins i8mem:$ptr, GR8:$val), + "xchg{b}\t{$val, $ptr|$ptr, $val}", + [(set GR8:$dst, (atomic_swap_8 addr:$ptr, GR8:$val))]>; +} + +// Atomic compare and swap. +let Defs = [EAX, EFLAGS], Uses = [EAX] in { +def LCMPXCHG32 : I<0xB1, MRMDestMem, (outs), (ins i32mem:$ptr, GR32:$swap), + "lock\n\t" + "cmpxchg{l}\t{$swap, $ptr|$ptr, $swap}", + [(X86cas addr:$ptr, GR32:$swap, 4)]>, TB, LOCK; +} +let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX] in { +def LCMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i32mem:$ptr), + "lock\n\t" + "cmpxchg8b\t$ptr", + [(X86cas8 addr:$ptr)]>, TB, LOCK; +} + +let Defs = [AX, EFLAGS], Uses = [AX] in { +def LCMPXCHG16 : I<0xB1, MRMDestMem, (outs), (ins i16mem:$ptr, GR16:$swap), + "lock\n\t" + "cmpxchg{w}\t{$swap, $ptr|$ptr, $swap}", + [(X86cas addr:$ptr, GR16:$swap, 2)]>, TB, OpSize, LOCK; +} +let Defs = [AL, EFLAGS], Uses = [AL] in { +def LCMPXCHG8 : I<0xB0, MRMDestMem, (outs), (ins i8mem:$ptr, GR8:$swap), + "lock\n\t" + "cmpxchg{b}\t{$swap, $ptr|$ptr, $swap}", + [(X86cas addr:$ptr, GR8:$swap, 1)]>, TB, LOCK; +} + +// Atomic exchange and add +let Constraints = "$val = $dst", Defs = [EFLAGS] in { +def LXADD32 : I<0xC1, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$ptr, GR32:$val), + "lock\n\t" + "xadd{l}\t{$val, $ptr|$ptr, $val}", + [(set GR32:$dst, (atomic_load_add_32 addr:$ptr, GR32:$val))]>, + TB, LOCK; +def LXADD16 : I<0xC1, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$ptr, GR16:$val), + "lock\n\t" + "xadd{w}\t{$val, $ptr|$ptr, $val}", + [(set GR16:$dst, (atomic_load_add_16 addr:$ptr, GR16:$val))]>, + TB, OpSize, LOCK; +def LXADD8 : I<0xC0, MRMSrcMem, (outs GR8:$dst), (ins i8mem:$ptr, GR8:$val), + "lock\n\t" + "xadd{b}\t{$val, $ptr|$ptr, $val}", + [(set GR8:$dst, (atomic_load_add_8 addr:$ptr, GR8:$val))]>, + TB, LOCK; +} + +// Atomic exchange, and, or, xor +let Constraints = "$val = $dst", Defs = [EFLAGS], + usesCustomDAGSchedInserter = 1 in { +def ATOMAND32 : I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val), + "#ATOMAND32 PSEUDO!", + [(set GR32:$dst, (atomic_load_and_32 addr:$ptr, GR32:$val))]>; +def ATOMOR32 : I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val), + "#ATOMOR32 PSEUDO!", + [(set GR32:$dst, (atomic_load_or_32 addr:$ptr, GR32:$val))]>; +def ATOMXOR32 : I<0, Pseudo,(outs GR32:$dst),(ins i32mem:$ptr, GR32:$val), + "#ATOMXOR32 PSEUDO!", + [(set GR32:$dst, (atomic_load_xor_32 addr:$ptr, GR32:$val))]>; +def ATOMNAND32 : I<0, Pseudo,(outs GR32:$dst),(ins i32mem:$ptr, GR32:$val), + "#ATOMNAND32 PSEUDO!", + [(set GR32:$dst, (atomic_load_nand_32 addr:$ptr, GR32:$val))]>; +def ATOMMIN32: I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$ptr, GR32:$val), + "#ATOMMIN32 PSEUDO!", + [(set GR32:$dst, (atomic_load_min_32 addr:$ptr, GR32:$val))]>; +def ATOMMAX32: I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val), + "#ATOMMAX32 PSEUDO!", + [(set GR32:$dst, (atomic_load_max_32 addr:$ptr, GR32:$val))]>; +def ATOMUMIN32: I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val), + "#ATOMUMIN32 PSEUDO!", + [(set GR32:$dst, (atomic_load_umin_32 addr:$ptr, GR32:$val))]>; +def ATOMUMAX32: I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val), + "#ATOMUMAX32 PSEUDO!", + [(set GR32:$dst, (atomic_load_umax_32 addr:$ptr, GR32:$val))]>; + +def ATOMAND16 : I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val), + "#ATOMAND16 PSEUDO!", + [(set GR16:$dst, (atomic_load_and_16 addr:$ptr, GR16:$val))]>; +def ATOMOR16 : I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val), + "#ATOMOR16 PSEUDO!", + [(set GR16:$dst, (atomic_load_or_16 addr:$ptr, GR16:$val))]>; +def ATOMXOR16 : I<0, Pseudo,(outs GR16:$dst),(ins i16mem:$ptr, GR16:$val), + "#ATOMXOR16 PSEUDO!", + [(set GR16:$dst, (atomic_load_xor_16 addr:$ptr, GR16:$val))]>; +def ATOMNAND16 : I<0, Pseudo,(outs GR16:$dst),(ins i16mem:$ptr, GR16:$val), + "#ATOMNAND16 PSEUDO!", + [(set GR16:$dst, (atomic_load_nand_16 addr:$ptr, GR16:$val))]>; +def ATOMMIN16: I<0, Pseudo, (outs GR16:$dst), (ins i16mem:$ptr, GR16:$val), + "#ATOMMIN16 PSEUDO!", + [(set GR16:$dst, (atomic_load_min_16 addr:$ptr, GR16:$val))]>; +def ATOMMAX16: I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val), + "#ATOMMAX16 PSEUDO!", + [(set GR16:$dst, (atomic_load_max_16 addr:$ptr, GR16:$val))]>; +def ATOMUMIN16: I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val), + "#ATOMUMIN16 PSEUDO!", + [(set GR16:$dst, (atomic_load_umin_16 addr:$ptr, GR16:$val))]>; +def ATOMUMAX16: I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val), + "#ATOMUMAX16 PSEUDO!", + [(set GR16:$dst, (atomic_load_umax_16 addr:$ptr, GR16:$val))]>; + +def ATOMAND8 : I<0, Pseudo, (outs GR8:$dst),(ins i8mem:$ptr, GR8:$val), + "#ATOMAND8 PSEUDO!", + [(set GR8:$dst, (atomic_load_and_8 addr:$ptr, GR8:$val))]>; +def ATOMOR8 : I<0, Pseudo, (outs GR8:$dst),(ins i8mem:$ptr, GR8:$val), + "#ATOMOR8 PSEUDO!", + [(set GR8:$dst, (atomic_load_or_8 addr:$ptr, GR8:$val))]>; +def ATOMXOR8 : I<0, Pseudo,(outs GR8:$dst),(ins i8mem:$ptr, GR8:$val), + "#ATOMXOR8 PSEUDO!", + [(set GR8:$dst, (atomic_load_xor_8 addr:$ptr, GR8:$val))]>; +def ATOMNAND8 : I<0, Pseudo,(outs GR8:$dst),(ins i8mem:$ptr, GR8:$val), + "#ATOMNAND8 PSEUDO!", + [(set GR8:$dst, (atomic_load_nand_8 addr:$ptr, GR8:$val))]>; +} + +let Constraints = "$val1 = $dst1, $val2 = $dst2", + Defs = [EFLAGS, EAX, EBX, ECX, EDX], + Uses = [EAX, EBX, ECX, EDX], + mayLoad = 1, mayStore = 1, + usesCustomDAGSchedInserter = 1 in { +def ATOMAND6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2), + (ins i64mem:$ptr, GR32:$val1, GR32:$val2), + "#ATOMAND6432 PSEUDO!", []>; +def ATOMOR6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2), + (ins i64mem:$ptr, GR32:$val1, GR32:$val2), + "#ATOMOR6432 PSEUDO!", []>; +def ATOMXOR6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2), + (ins i64mem:$ptr, GR32:$val1, GR32:$val2), + "#ATOMXOR6432 PSEUDO!", []>; +def ATOMNAND6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2), + (ins i64mem:$ptr, GR32:$val1, GR32:$val2), + "#ATOMNAND6432 PSEUDO!", []>; +def ATOMADD6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2), + (ins i64mem:$ptr, GR32:$val1, GR32:$val2), + "#ATOMADD6432 PSEUDO!", []>; +def ATOMSUB6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2), + (ins i64mem:$ptr, GR32:$val1, GR32:$val2), + "#ATOMSUB6432 PSEUDO!", []>; +def ATOMSWAP6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2), + (ins i64mem:$ptr, GR32:$val1, GR32:$val2), + "#ATOMSWAP6432 PSEUDO!", []>; +} + +//===----------------------------------------------------------------------===// +// Non-Instruction Patterns +//===----------------------------------------------------------------------===// + +// ConstantPool GlobalAddress, ExternalSymbol, and JumpTable +def : Pat<(i32 (X86Wrapper tconstpool :$dst)), (MOV32ri tconstpool :$dst)>; +def : Pat<(i32 (X86Wrapper tjumptable :$dst)), (MOV32ri tjumptable :$dst)>; +def : Pat<(i32 (X86Wrapper tglobaltlsaddr:$dst)),(MOV32ri tglobaltlsaddr:$dst)>; +def : Pat<(i32 (X86Wrapper tglobaladdr :$dst)), (MOV32ri tglobaladdr :$dst)>; +def : Pat<(i32 (X86Wrapper texternalsym:$dst)), (MOV32ri texternalsym:$dst)>; + +def : Pat<(add GR32:$src1, (X86Wrapper tconstpool:$src2)), + (ADD32ri GR32:$src1, tconstpool:$src2)>; +def : Pat<(add GR32:$src1, (X86Wrapper tjumptable:$src2)), + (ADD32ri GR32:$src1, tjumptable:$src2)>; +def : Pat<(add GR32:$src1, (X86Wrapper tglobaladdr :$src2)), + (ADD32ri GR32:$src1, tglobaladdr:$src2)>; +def : Pat<(add GR32:$src1, (X86Wrapper texternalsym:$src2)), + (ADD32ri GR32:$src1, texternalsym:$src2)>; + +def : Pat<(store (i32 (X86Wrapper tglobaladdr:$src)), addr:$dst), + (MOV32mi addr:$dst, tglobaladdr:$src)>; +def : Pat<(store (i32 (X86Wrapper texternalsym:$src)), addr:$dst), + (MOV32mi addr:$dst, texternalsym:$src)>; + +// Calls +// tailcall stuff +def : Pat<(X86tailcall GR32:$dst), + (TAILCALL)>; + +def : Pat<(X86tailcall (i32 tglobaladdr:$dst)), + (TAILCALL)>; +def : Pat<(X86tailcall (i32 texternalsym:$dst)), + (TAILCALL)>; + +def : Pat<(X86tcret GR32:$dst, imm:$off), + (TCRETURNri GR32:$dst, imm:$off)>; + +def : Pat<(X86tcret (i32 tglobaladdr:$dst), imm:$off), + (TCRETURNdi texternalsym:$dst, imm:$off)>; + +def : Pat<(X86tcret (i32 texternalsym:$dst), imm:$off), + (TCRETURNdi texternalsym:$dst, imm:$off)>; + +def : Pat<(X86call (i32 tglobaladdr:$dst)), + (CALLpcrel32 tglobaladdr:$dst)>; +def : Pat<(X86call (i32 texternalsym:$dst)), + (CALLpcrel32 texternalsym:$dst)>; +def : Pat<(X86call (i32 imm:$dst)), + (CALLpcrel32 imm:$dst)>, Requires<[CallImmAddr]>; + +// X86 specific add which produces a flag. +def : Pat<(addc GR32:$src1, GR32:$src2), + (ADD32rr GR32:$src1, GR32:$src2)>; +def : Pat<(addc GR32:$src1, (load addr:$src2)), + (ADD32rm GR32:$src1, addr:$src2)>; +def : Pat<(addc GR32:$src1, imm:$src2), + (ADD32ri GR32:$src1, imm:$src2)>; +def : Pat<(addc GR32:$src1, i32immSExt8:$src2), + (ADD32ri8 GR32:$src1, i32immSExt8:$src2)>; + +def : Pat<(subc GR32:$src1, GR32:$src2), + (SUB32rr GR32:$src1, GR32:$src2)>; +def : Pat<(subc GR32:$src1, (load addr:$src2)), + (SUB32rm GR32:$src1, addr:$src2)>; +def : Pat<(subc GR32:$src1, imm:$src2), + (SUB32ri GR32:$src1, imm:$src2)>; +def : Pat<(subc GR32:$src1, i32immSExt8:$src2), + (SUB32ri8 GR32:$src1, i32immSExt8:$src2)>; + +// Comparisons. + +// TEST R,R is smaller than CMP R,0 +def : Pat<(parallel (X86cmp GR8:$src1, 0), (implicit EFLAGS)), + (TEST8rr GR8:$src1, GR8:$src1)>; +def : Pat<(parallel (X86cmp GR16:$src1, 0), (implicit EFLAGS)), + (TEST16rr GR16:$src1, GR16:$src1)>; +def : Pat<(parallel (X86cmp GR32:$src1, 0), (implicit EFLAGS)), + (TEST32rr GR32:$src1, GR32:$src1)>; + +// Conditional moves with folded loads with operands swapped and conditions +// inverted. +def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_B, EFLAGS), + (CMOVAE16rm GR16:$src2, addr:$src1)>; +def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_B, EFLAGS), + (CMOVAE32rm GR32:$src2, addr:$src1)>; +def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_AE, EFLAGS), + (CMOVB16rm GR16:$src2, addr:$src1)>; +def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_AE, EFLAGS), + (CMOVB32rm GR32:$src2, addr:$src1)>; +def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_E, EFLAGS), + (CMOVNE16rm GR16:$src2, addr:$src1)>; +def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_E, EFLAGS), + (CMOVNE32rm GR32:$src2, addr:$src1)>; +def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_NE, EFLAGS), + (CMOVE16rm GR16:$src2, addr:$src1)>; +def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_NE, EFLAGS), + (CMOVE32rm GR32:$src2, addr:$src1)>; +def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_BE, EFLAGS), + (CMOVA16rm GR16:$src2, addr:$src1)>; +def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_BE, EFLAGS), + (CMOVA32rm GR32:$src2, addr:$src1)>; +def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_A, EFLAGS), + (CMOVBE16rm GR16:$src2, addr:$src1)>; +def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_A, EFLAGS), + (CMOVBE32rm GR32:$src2, addr:$src1)>; +def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_L, EFLAGS), + (CMOVGE16rm GR16:$src2, addr:$src1)>; +def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_L, EFLAGS), + (CMOVGE32rm GR32:$src2, addr:$src1)>; +def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_GE, EFLAGS), + (CMOVL16rm GR16:$src2, addr:$src1)>; +def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_GE, EFLAGS), + (CMOVL32rm GR32:$src2, addr:$src1)>; +def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_LE, EFLAGS), + (CMOVG16rm GR16:$src2, addr:$src1)>; +def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_LE, EFLAGS), + (CMOVG32rm GR32:$src2, addr:$src1)>; +def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_G, EFLAGS), + (CMOVLE16rm GR16:$src2, addr:$src1)>; +def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_G, EFLAGS), + (CMOVLE32rm GR32:$src2, addr:$src1)>; +def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_P, EFLAGS), + (CMOVNP16rm GR16:$src2, addr:$src1)>; +def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_P, EFLAGS), + (CMOVNP32rm GR32:$src2, addr:$src1)>; +def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_NP, EFLAGS), + (CMOVP16rm GR16:$src2, addr:$src1)>; +def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_NP, EFLAGS), + (CMOVP32rm GR32:$src2, addr:$src1)>; +def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_S, EFLAGS), + (CMOVNS16rm GR16:$src2, addr:$src1)>; +def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_S, EFLAGS), + (CMOVNS32rm GR32:$src2, addr:$src1)>; +def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_NS, EFLAGS), + (CMOVS16rm GR16:$src2, addr:$src1)>; +def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_NS, EFLAGS), + (CMOVS32rm GR32:$src2, addr:$src1)>; +def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_O, EFLAGS), + (CMOVNO16rm GR16:$src2, addr:$src1)>; +def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_O, EFLAGS), + (CMOVNO32rm GR32:$src2, addr:$src1)>; +def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_NO, EFLAGS), + (CMOVO16rm GR16:$src2, addr:$src1)>; +def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_NO, EFLAGS), + (CMOVO32rm GR32:$src2, addr:$src1)>; + +// zextload bool -> zextload byte +def : Pat<(zextloadi8i1 addr:$src), (MOV8rm addr:$src)>; +def : Pat<(zextloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>; +def : Pat<(zextloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>; + +// extload bool -> extload byte +def : Pat<(extloadi8i1 addr:$src), (MOV8rm addr:$src)>; +def : Pat<(extloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>, + Requires<[In32BitMode]>; +def : Pat<(extloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>; +def : Pat<(extloadi16i8 addr:$src), (MOVZX16rm8 addr:$src)>, + Requires<[In32BitMode]>; +def : Pat<(extloadi32i8 addr:$src), (MOVZX32rm8 addr:$src)>; +def : Pat<(extloadi32i16 addr:$src), (MOVZX32rm16 addr:$src)>; + +// anyext +def : Pat<(i16 (anyext GR8 :$src)), (MOVZX16rr8 GR8 :$src)>, + Requires<[In32BitMode]>; +def : Pat<(i32 (anyext GR8 :$src)), (MOVZX32rr8 GR8 :$src)>, + Requires<[In32BitMode]>; +def : Pat<(i32 (anyext GR16:$src)), + (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR16:$src, x86_subreg_16bit)>; + +// (and (i32 load), 255) -> (zextload i8) +def : Pat<(i32 (and (nvloadi32 addr:$src), (i32 255))), + (MOVZX32rm8 addr:$src)>; +def : Pat<(i32 (and (nvloadi32 addr:$src), (i32 65535))), + (MOVZX32rm16 addr:$src)>; + +//===----------------------------------------------------------------------===// +// Some peepholes +//===----------------------------------------------------------------------===// + +// Odd encoding trick: -128 fits into an 8-bit immediate field while +// +128 doesn't, so in this special case use a sub instead of an add. +def : Pat<(add GR16:$src1, 128), + (SUB16ri8 GR16:$src1, -128)>; +def : Pat<(store (add (loadi16 addr:$dst), 128), addr:$dst), + (SUB16mi8 addr:$dst, -128)>; +def : Pat<(add GR32:$src1, 128), + (SUB32ri8 GR32:$src1, -128)>; +def : Pat<(store (add (loadi32 addr:$dst), 128), addr:$dst), + (SUB32mi8 addr:$dst, -128)>; + +// r & (2^16-1) ==> movz +def : Pat<(and GR32:$src1, 0xffff), + (MOVZX32rr16 (EXTRACT_SUBREG GR32:$src1, x86_subreg_16bit))>; +// r & (2^8-1) ==> movz +def : Pat<(and GR32:$src1, 0xff), + (MOVZX32rr8 (EXTRACT_SUBREG (COPY_TO_REGCLASS GR32:$src1, GR32_ABCD), + x86_subreg_8bit))>, + Requires<[In32BitMode]>; +// r & (2^8-1) ==> movz +def : Pat<(and GR16:$src1, 0xff), + (MOVZX16rr8 (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src1, GR16_ABCD), + x86_subreg_8bit))>, + Requires<[In32BitMode]>; + +// sext_inreg patterns +def : Pat<(sext_inreg GR32:$src, i16), + (MOVSX32rr16 (EXTRACT_SUBREG GR32:$src, x86_subreg_16bit))>; +def : Pat<(sext_inreg GR32:$src, i8), + (MOVSX32rr8 (EXTRACT_SUBREG (COPY_TO_REGCLASS GR32:$src, GR32_ABCD), + x86_subreg_8bit))>, + Requires<[In32BitMode]>; +def : Pat<(sext_inreg GR16:$src, i8), + (MOVSX16rr8 (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD), + x86_subreg_8bit))>, + Requires<[In32BitMode]>; + +// trunc patterns +def : Pat<(i16 (trunc GR32:$src)), + (EXTRACT_SUBREG GR32:$src, x86_subreg_16bit)>; +def : Pat<(i8 (trunc GR32:$src)), + (EXTRACT_SUBREG (COPY_TO_REGCLASS GR32:$src, GR32_ABCD), + x86_subreg_8bit)>, + Requires<[In32BitMode]>; +def : Pat<(i8 (trunc GR16:$src)), + (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD), + x86_subreg_8bit)>, + Requires<[In32BitMode]>; + +// h-register tricks +def : Pat<(i8 (trunc (srl_su GR16:$src, (i8 8)))), + (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD), + x86_subreg_8bit_hi)>, + Requires<[In32BitMode]>; +def : Pat<(i8 (trunc (srl_su GR32:$src, (i8 8)))), + (EXTRACT_SUBREG (COPY_TO_REGCLASS GR32:$src, GR32_ABCD), + x86_subreg_8bit_hi)>, + Requires<[In32BitMode]>; +def : Pat<(srl_su GR16:$src, (i8 8)), + (EXTRACT_SUBREG + (MOVZX32rr8 + (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD), + x86_subreg_8bit_hi)), + x86_subreg_16bit)>, + Requires<[In32BitMode]>; +def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))), + (MOVZX32rr8 (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD), + x86_subreg_8bit_hi))>, + Requires<[In32BitMode]>; +def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)), + (MOVZX32rr8 (EXTRACT_SUBREG (COPY_TO_REGCLASS GR32:$src, GR32_ABCD), + x86_subreg_8bit_hi))>, + Requires<[In32BitMode]>; + +// (shl x, 1) ==> (add x, x) +def : Pat<(shl GR8 :$src1, (i8 1)), (ADD8rr GR8 :$src1, GR8 :$src1)>; +def : Pat<(shl GR16:$src1, (i8 1)), (ADD16rr GR16:$src1, GR16:$src1)>; +def : Pat<(shl GR32:$src1, (i8 1)), (ADD32rr GR32:$src1, GR32:$src1)>; + +// (shl x (and y, 31)) ==> (shl x, y) +def : Pat<(shl GR8:$src1, (and CL:$amt, 31)), + (SHL8rCL GR8:$src1)>; +def : Pat<(shl GR16:$src1, (and CL:$amt, 31)), + (SHL16rCL GR16:$src1)>; +def : Pat<(shl GR32:$src1, (and CL:$amt, 31)), + (SHL32rCL GR32:$src1)>; +def : Pat<(store (shl (loadi8 addr:$dst), (and CL:$amt, 31)), addr:$dst), + (SHL8mCL addr:$dst)>; +def : Pat<(store (shl (loadi16 addr:$dst), (and CL:$amt, 31)), addr:$dst), + (SHL16mCL addr:$dst)>; +def : Pat<(store (shl (loadi32 addr:$dst), (and CL:$amt, 31)), addr:$dst), + (SHL32mCL addr:$dst)>; + +def : Pat<(srl GR8:$src1, (and CL:$amt, 31)), + (SHR8rCL GR8:$src1)>; +def : Pat<(srl GR16:$src1, (and CL:$amt, 31)), + (SHR16rCL GR16:$src1)>; +def : Pat<(srl GR32:$src1, (and CL:$amt, 31)), + (SHR32rCL GR32:$src1)>; +def : Pat<(store (srl (loadi8 addr:$dst), (and CL:$amt, 31)), addr:$dst), + (SHR8mCL addr:$dst)>; +def : Pat<(store (srl (loadi16 addr:$dst), (and CL:$amt, 31)), addr:$dst), + (SHR16mCL addr:$dst)>; +def : Pat<(store (srl (loadi32 addr:$dst), (and CL:$amt, 31)), addr:$dst), + (SHR32mCL addr:$dst)>; + +def : Pat<(sra GR8:$src1, (and CL:$amt, 31)), + (SAR8rCL GR8:$src1)>; +def : Pat<(sra GR16:$src1, (and CL:$amt, 31)), + (SAR16rCL GR16:$src1)>; +def : Pat<(sra GR32:$src1, (and CL:$amt, 31)), + (SAR32rCL GR32:$src1)>; +def : Pat<(store (sra (loadi8 addr:$dst), (and CL:$amt, 31)), addr:$dst), + (SAR8mCL addr:$dst)>; +def : Pat<(store (sra (loadi16 addr:$dst), (and CL:$amt, 31)), addr:$dst), + (SAR16mCL addr:$dst)>; +def : Pat<(store (sra (loadi32 addr:$dst), (and CL:$amt, 31)), addr:$dst), + (SAR32mCL addr:$dst)>; + +// (or (x >> c) | (y << (32 - c))) ==> (shrd32 x, y, c) +def : Pat<(or (srl GR32:$src1, CL:$amt), + (shl GR32:$src2, (sub 32, CL:$amt))), + (SHRD32rrCL GR32:$src1, GR32:$src2)>; + +def : Pat<(store (or (srl (loadi32 addr:$dst), CL:$amt), + (shl GR32:$src2, (sub 32, CL:$amt))), addr:$dst), + (SHRD32mrCL addr:$dst, GR32:$src2)>; + +def : Pat<(or (srl GR32:$src1, (i8 (trunc ECX:$amt))), + (shl GR32:$src2, (i8 (trunc (sub 32, ECX:$amt))))), + (SHRD32rrCL GR32:$src1, GR32:$src2)>; + +def : Pat<(store (or (srl (loadi32 addr:$dst), (i8 (trunc ECX:$amt))), + (shl GR32:$src2, (i8 (trunc (sub 32, ECX:$amt))))), + addr:$dst), + (SHRD32mrCL addr:$dst, GR32:$src2)>; + +def : Pat<(shrd GR32:$src1, (i8 imm:$amt1), GR32:$src2, (i8 imm:$amt2)), + (SHRD32rri8 GR32:$src1, GR32:$src2, (i8 imm:$amt1))>; + +def : Pat<(store (shrd (loadi32 addr:$dst), (i8 imm:$amt1), + GR32:$src2, (i8 imm:$amt2)), addr:$dst), + (SHRD32mri8 addr:$dst, GR32:$src2, (i8 imm:$amt1))>; + +// (or (x << c) | (y >> (32 - c))) ==> (shld32 x, y, c) +def : Pat<(or (shl GR32:$src1, CL:$amt), + (srl GR32:$src2, (sub 32, CL:$amt))), + (SHLD32rrCL GR32:$src1, GR32:$src2)>; + +def : Pat<(store (or (shl (loadi32 addr:$dst), CL:$amt), + (srl GR32:$src2, (sub 32, CL:$amt))), addr:$dst), + (SHLD32mrCL addr:$dst, GR32:$src2)>; + +def : Pat<(or (shl GR32:$src1, (i8 (trunc ECX:$amt))), + (srl GR32:$src2, (i8 (trunc (sub 32, ECX:$amt))))), + (SHLD32rrCL GR32:$src1, GR32:$src2)>; + +def : Pat<(store (or (shl (loadi32 addr:$dst), (i8 (trunc ECX:$amt))), + (srl GR32:$src2, (i8 (trunc (sub 32, ECX:$amt))))), + addr:$dst), + (SHLD32mrCL addr:$dst, GR32:$src2)>; + +def : Pat<(shld GR32:$src1, (i8 imm:$amt1), GR32:$src2, (i8 imm:$amt2)), + (SHLD32rri8 GR32:$src1, GR32:$src2, (i8 imm:$amt1))>; + +def : Pat<(store (shld (loadi32 addr:$dst), (i8 imm:$amt1), + GR32:$src2, (i8 imm:$amt2)), addr:$dst), + (SHLD32mri8 addr:$dst, GR32:$src2, (i8 imm:$amt1))>; + +// (or (x >> c) | (y << (16 - c))) ==> (shrd16 x, y, c) +def : Pat<(or (srl GR16:$src1, CL:$amt), + (shl GR16:$src2, (sub 16, CL:$amt))), + (SHRD16rrCL GR16:$src1, GR16:$src2)>; + +def : Pat<(store (or (srl (loadi16 addr:$dst), CL:$amt), + (shl GR16:$src2, (sub 16, CL:$amt))), addr:$dst), + (SHRD16mrCL addr:$dst, GR16:$src2)>; + +def : Pat<(or (srl GR16:$src1, (i8 (trunc CX:$amt))), + (shl GR16:$src2, (i8 (trunc (sub 16, CX:$amt))))), + (SHRD16rrCL GR16:$src1, GR16:$src2)>; + +def : Pat<(store (or (srl (loadi16 addr:$dst), (i8 (trunc CX:$amt))), + (shl GR16:$src2, (i8 (trunc (sub 16, CX:$amt))))), + addr:$dst), + (SHRD16mrCL addr:$dst, GR16:$src2)>; + +def : Pat<(shrd GR16:$src1, (i8 imm:$amt1), GR16:$src2, (i8 imm:$amt2)), + (SHRD16rri8 GR16:$src1, GR16:$src2, (i8 imm:$amt1))>; + +def : Pat<(store (shrd (loadi16 addr:$dst), (i8 imm:$amt1), + GR16:$src2, (i8 imm:$amt2)), addr:$dst), + (SHRD16mri8 addr:$dst, GR16:$src2, (i8 imm:$amt1))>; + +// (or (x << c) | (y >> (16 - c))) ==> (shld16 x, y, c) +def : Pat<(or (shl GR16:$src1, CL:$amt), + (srl GR16:$src2, (sub 16, CL:$amt))), + (SHLD16rrCL GR16:$src1, GR16:$src2)>; + +def : Pat<(store (or (shl (loadi16 addr:$dst), CL:$amt), + (srl GR16:$src2, (sub 16, CL:$amt))), addr:$dst), + (SHLD16mrCL addr:$dst, GR16:$src2)>; + +def : Pat<(or (shl GR16:$src1, (i8 (trunc CX:$amt))), + (srl GR16:$src2, (i8 (trunc (sub 16, CX:$amt))))), + (SHLD16rrCL GR16:$src1, GR16:$src2)>; + +def : Pat<(store (or (shl (loadi16 addr:$dst), (i8 (trunc CX:$amt))), + (srl GR16:$src2, (i8 (trunc (sub 16, CX:$amt))))), + addr:$dst), + (SHLD16mrCL addr:$dst, GR16:$src2)>; + +def : Pat<(shld GR16:$src1, (i8 imm:$amt1), GR16:$src2, (i8 imm:$amt2)), + (SHLD16rri8 GR16:$src1, GR16:$src2, (i8 imm:$amt1))>; + +def : Pat<(store (shld (loadi16 addr:$dst), (i8 imm:$amt1), + GR16:$src2, (i8 imm:$amt2)), addr:$dst), + (SHLD16mri8 addr:$dst, GR16:$src2, (i8 imm:$amt1))>; + +//===----------------------------------------------------------------------===// +// EFLAGS-defining Patterns +//===----------------------------------------------------------------------===// + +// Register-Register Addition with EFLAGS result +def : Pat<(parallel (X86add_flag GR8:$src1, GR8:$src2), + (implicit EFLAGS)), + (ADD8rr GR8:$src1, GR8:$src2)>; +def : Pat<(parallel (X86add_flag GR16:$src1, GR16:$src2), + (implicit EFLAGS)), + (ADD16rr GR16:$src1, GR16:$src2)>; +def : Pat<(parallel (X86add_flag GR32:$src1, GR32:$src2), + (implicit EFLAGS)), + (ADD32rr GR32:$src1, GR32:$src2)>; + +// Register-Memory Addition with EFLAGS result +def : Pat<(parallel (X86add_flag GR8:$src1, (loadi8 addr:$src2)), + (implicit EFLAGS)), + (ADD8rm GR8:$src1, addr:$src2)>; +def : Pat<(parallel (X86add_flag GR16:$src1, (loadi16 addr:$src2)), + (implicit EFLAGS)), + (ADD16rm GR16:$src1, addr:$src2)>; +def : Pat<(parallel (X86add_flag GR32:$src1, (loadi32 addr:$src2)), + (implicit EFLAGS)), + (ADD32rm GR32:$src1, addr:$src2)>; + +// Register-Integer Addition with EFLAGS result +def : Pat<(parallel (X86add_flag GR8:$src1, imm:$src2), + (implicit EFLAGS)), + (ADD8ri GR8:$src1, imm:$src2)>; +def : Pat<(parallel (X86add_flag GR16:$src1, imm:$src2), + (implicit EFLAGS)), + (ADD16ri GR16:$src1, imm:$src2)>; +def : Pat<(parallel (X86add_flag GR32:$src1, imm:$src2), + (implicit EFLAGS)), + (ADD32ri GR32:$src1, imm:$src2)>; +def : Pat<(parallel (X86add_flag GR16:$src1, i16immSExt8:$src2), + (implicit EFLAGS)), + (ADD16ri8 GR16:$src1, i16immSExt8:$src2)>; +def : Pat<(parallel (X86add_flag GR32:$src1, i32immSExt8:$src2), + (implicit EFLAGS)), + (ADD32ri8 GR32:$src1, i32immSExt8:$src2)>; + +// Memory-Register Addition with EFLAGS result +def : Pat<(parallel (store (X86add_flag (loadi8 addr:$dst), GR8:$src2), + addr:$dst), + (implicit EFLAGS)), + (ADD8mr addr:$dst, GR8:$src2)>; +def : Pat<(parallel (store (X86add_flag (loadi16 addr:$dst), GR16:$src2), + addr:$dst), + (implicit EFLAGS)), + (ADD16mr addr:$dst, GR16:$src2)>; +def : Pat<(parallel (store (X86add_flag (loadi32 addr:$dst), GR32:$src2), + addr:$dst), + (implicit EFLAGS)), + (ADD32mr addr:$dst, GR32:$src2)>; + +// Memory-Integer Addition with EFLAGS result +def : Pat<(parallel (store (X86add_flag (loadi8 addr:$dst), imm:$src2), + addr:$dst), + (implicit EFLAGS)), + (ADD8mi addr:$dst, imm:$src2)>; +def : Pat<(parallel (store (X86add_flag (loadi16 addr:$dst), imm:$src2), + addr:$dst), + (implicit EFLAGS)), + (ADD16mi addr:$dst, imm:$src2)>; +def : Pat<(parallel (store (X86add_flag (loadi32 addr:$dst), imm:$src2), + addr:$dst), + (implicit EFLAGS)), + (ADD32mi addr:$dst, imm:$src2)>; +def : Pat<(parallel (store (X86add_flag (loadi16 addr:$dst), i16immSExt8:$src2), + addr:$dst), + (implicit EFLAGS)), + (ADD16mi8 addr:$dst, i16immSExt8:$src2)>; +def : Pat<(parallel (store (X86add_flag (loadi32 addr:$dst), i32immSExt8:$src2), + addr:$dst), + (implicit EFLAGS)), + (ADD32mi8 addr:$dst, i32immSExt8:$src2)>; + +// Register-Register Subtraction with EFLAGS result +def : Pat<(parallel (X86sub_flag GR8:$src1, GR8:$src2), + (implicit EFLAGS)), + (SUB8rr GR8:$src1, GR8:$src2)>; +def : Pat<(parallel (X86sub_flag GR16:$src1, GR16:$src2), + (implicit EFLAGS)), + (SUB16rr GR16:$src1, GR16:$src2)>; +def : Pat<(parallel (X86sub_flag GR32:$src1, GR32:$src2), + (implicit EFLAGS)), + (SUB32rr GR32:$src1, GR32:$src2)>; + +// Register-Memory Subtraction with EFLAGS result +def : Pat<(parallel (X86sub_flag GR8:$src1, (loadi8 addr:$src2)), + (implicit EFLAGS)), + (SUB8rm GR8:$src1, addr:$src2)>; +def : Pat<(parallel (X86sub_flag GR16:$src1, (loadi16 addr:$src2)), + (implicit EFLAGS)), + (SUB16rm GR16:$src1, addr:$src2)>; +def : Pat<(parallel (X86sub_flag GR32:$src1, (loadi32 addr:$src2)), + (implicit EFLAGS)), + (SUB32rm GR32:$src1, addr:$src2)>; + +// Register-Integer Subtraction with EFLAGS result +def : Pat<(parallel (X86sub_flag GR8:$src1, imm:$src2), + (implicit EFLAGS)), + (SUB8ri GR8:$src1, imm:$src2)>; +def : Pat<(parallel (X86sub_flag GR16:$src1, imm:$src2), + (implicit EFLAGS)), + (SUB16ri GR16:$src1, imm:$src2)>; +def : Pat<(parallel (X86sub_flag GR32:$src1, imm:$src2), + (implicit EFLAGS)), + (SUB32ri GR32:$src1, imm:$src2)>; +def : Pat<(parallel (X86sub_flag GR16:$src1, i16immSExt8:$src2), + (implicit EFLAGS)), + (SUB16ri8 GR16:$src1, i16immSExt8:$src2)>; +def : Pat<(parallel (X86sub_flag GR32:$src1, i32immSExt8:$src2), + (implicit EFLAGS)), + (SUB32ri8 GR32:$src1, i32immSExt8:$src2)>; + +// Memory-Register Subtraction with EFLAGS result +def : Pat<(parallel (store (X86sub_flag (loadi8 addr:$dst), GR8:$src2), + addr:$dst), + (implicit EFLAGS)), + (SUB8mr addr:$dst, GR8:$src2)>; +def : Pat<(parallel (store (X86sub_flag (loadi16 addr:$dst), GR16:$src2), + addr:$dst), + (implicit EFLAGS)), + (SUB16mr addr:$dst, GR16:$src2)>; +def : Pat<(parallel (store (X86sub_flag (loadi32 addr:$dst), GR32:$src2), + addr:$dst), + (implicit EFLAGS)), + (SUB32mr addr:$dst, GR32:$src2)>; + +// Memory-Integer Subtraction with EFLAGS result +def : Pat<(parallel (store (X86sub_flag (loadi8 addr:$dst), imm:$src2), + addr:$dst), + (implicit EFLAGS)), + (SUB8mi addr:$dst, imm:$src2)>; +def : Pat<(parallel (store (X86sub_flag (loadi16 addr:$dst), imm:$src2), + addr:$dst), + (implicit EFLAGS)), + (SUB16mi addr:$dst, imm:$src2)>; +def : Pat<(parallel (store (X86sub_flag (loadi32 addr:$dst), imm:$src2), + addr:$dst), + (implicit EFLAGS)), + (SUB32mi addr:$dst, imm:$src2)>; +def : Pat<(parallel (store (X86sub_flag (loadi16 addr:$dst), i16immSExt8:$src2), + addr:$dst), + (implicit EFLAGS)), + (SUB16mi8 addr:$dst, i16immSExt8:$src2)>; +def : Pat<(parallel (store (X86sub_flag (loadi32 addr:$dst), i32immSExt8:$src2), + addr:$dst), + (implicit EFLAGS)), + (SUB32mi8 addr:$dst, i32immSExt8:$src2)>; + + +// Register-Register Signed Integer Multiply with EFLAGS result +def : Pat<(parallel (X86smul_flag GR16:$src1, GR16:$src2), + (implicit EFLAGS)), + (IMUL16rr GR16:$src1, GR16:$src2)>; +def : Pat<(parallel (X86smul_flag GR32:$src1, GR32:$src2), + (implicit EFLAGS)), + (IMUL32rr GR32:$src1, GR32:$src2)>; + +// Register-Memory Signed Integer Multiply with EFLAGS result +def : Pat<(parallel (X86smul_flag GR16:$src1, (loadi16 addr:$src2)), + (implicit EFLAGS)), + (IMUL16rm GR16:$src1, addr:$src2)>; +def : Pat<(parallel (X86smul_flag GR32:$src1, (loadi32 addr:$src2)), + (implicit EFLAGS)), + (IMUL32rm GR32:$src1, addr:$src2)>; + +// Register-Integer Signed Integer Multiply with EFLAGS result +def : Pat<(parallel (X86smul_flag GR16:$src1, imm:$src2), + (implicit EFLAGS)), + (IMUL16rri GR16:$src1, imm:$src2)>; +def : Pat<(parallel (X86smul_flag GR32:$src1, imm:$src2), + (implicit EFLAGS)), + (IMUL32rri GR32:$src1, imm:$src2)>; +def : Pat<(parallel (X86smul_flag GR16:$src1, i16immSExt8:$src2), + (implicit EFLAGS)), + (IMUL16rri8 GR16:$src1, i16immSExt8:$src2)>; +def : Pat<(parallel (X86smul_flag GR32:$src1, i32immSExt8:$src2), + (implicit EFLAGS)), + (IMUL32rri8 GR32:$src1, i32immSExt8:$src2)>; + +// Memory-Integer Signed Integer Multiply with EFLAGS result +def : Pat<(parallel (X86smul_flag (loadi16 addr:$src1), imm:$src2), + (implicit EFLAGS)), + (IMUL16rmi addr:$src1, imm:$src2)>; +def : Pat<(parallel (X86smul_flag (loadi32 addr:$src1), imm:$src2), + (implicit EFLAGS)), + (IMUL32rmi addr:$src1, imm:$src2)>; +def : Pat<(parallel (X86smul_flag (loadi16 addr:$src1), i16immSExt8:$src2), + (implicit EFLAGS)), + (IMUL16rmi8 addr:$src1, i16immSExt8:$src2)>; +def : Pat<(parallel (X86smul_flag (loadi32 addr:$src1), i32immSExt8:$src2), + (implicit EFLAGS)), + (IMUL32rmi8 addr:$src1, i32immSExt8:$src2)>; + +// Optimize multiply by 2 with EFLAGS result. +let AddedComplexity = 2 in { +def : Pat<(parallel (X86smul_flag GR16:$src1, 2), + (implicit EFLAGS)), + (ADD16rr GR16:$src1, GR16:$src1)>; + +def : Pat<(parallel (X86smul_flag GR32:$src1, 2), + (implicit EFLAGS)), + (ADD32rr GR32:$src1, GR32:$src1)>; +} + +// INC and DEC with EFLAGS result. Note that these do not set CF. +def : Pat<(parallel (X86inc_flag GR8:$src), (implicit EFLAGS)), + (INC8r GR8:$src)>; +def : Pat<(parallel (store (i8 (X86inc_flag (loadi8 addr:$dst))), addr:$dst), + (implicit EFLAGS)), + (INC8m addr:$dst)>; +def : Pat<(parallel (X86dec_flag GR8:$src), (implicit EFLAGS)), + (DEC8r GR8:$src)>; +def : Pat<(parallel (store (i8 (X86dec_flag (loadi8 addr:$dst))), addr:$dst), + (implicit EFLAGS)), + (DEC8m addr:$dst)>; + +def : Pat<(parallel (X86inc_flag GR16:$src), (implicit EFLAGS)), + (INC16r GR16:$src)>, Requires<[In32BitMode]>; +def : Pat<(parallel (store (i16 (X86inc_flag (loadi16 addr:$dst))), addr:$dst), + (implicit EFLAGS)), + (INC16m addr:$dst)>, Requires<[In32BitMode]>; +def : Pat<(parallel (X86dec_flag GR16:$src), (implicit EFLAGS)), + (DEC16r GR16:$src)>, Requires<[In32BitMode]>; +def : Pat<(parallel (store (i16 (X86dec_flag (loadi16 addr:$dst))), addr:$dst), + (implicit EFLAGS)), + (DEC16m addr:$dst)>, Requires<[In32BitMode]>; + +def : Pat<(parallel (X86inc_flag GR32:$src), (implicit EFLAGS)), + (INC32r GR32:$src)>, Requires<[In32BitMode]>; +def : Pat<(parallel (store (i32 (X86inc_flag (loadi32 addr:$dst))), addr:$dst), + (implicit EFLAGS)), + (INC32m addr:$dst)>, Requires<[In32BitMode]>; +def : Pat<(parallel (X86dec_flag GR32:$src), (implicit EFLAGS)), + (DEC32r GR32:$src)>, Requires<[In32BitMode]>; +def : Pat<(parallel (store (i32 (X86dec_flag (loadi32 addr:$dst))), addr:$dst), + (implicit EFLAGS)), + (DEC32m addr:$dst)>, Requires<[In32BitMode]>; + +//===----------------------------------------------------------------------===// +// Floating Point Stack Support +//===----------------------------------------------------------------------===// + +include "X86InstrFPStack.td" + +//===----------------------------------------------------------------------===// +// X86-64 Support +//===----------------------------------------------------------------------===// + +include "X86Instr64bit.td" + +//===----------------------------------------------------------------------===// +// XMM Floating point support (requires SSE / SSE2) +//===----------------------------------------------------------------------===// + +include "X86InstrSSE.td" + +//===----------------------------------------------------------------------===// +// MMX and XMM Packed Integer support (requires MMX, SSE, and SSE2) +//===----------------------------------------------------------------------===// + +include "X86InstrMMX.td" diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td new file mode 100644 index 000000000000..8f287e17dc3f --- /dev/null +++ b/lib/Target/X86/X86InstrMMX.td @@ -0,0 +1,694 @@ +//====- X86InstrMMX.td - Describe the X86 Instruction Set --*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the X86 MMX instruction set, defining the instructions, +// and properties of the instructions which are needed for code generation, +// machine code emission, and analysis. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// MMX Pattern Fragments +//===----------------------------------------------------------------------===// + +def load_mmx : PatFrag<(ops node:$ptr), (v1i64 (load node:$ptr))>; + +def bc_v8i8 : PatFrag<(ops node:$in), (v8i8 (bitconvert node:$in))>; +def bc_v4i16 : PatFrag<(ops node:$in), (v4i16 (bitconvert node:$in))>; +def bc_v2i32 : PatFrag<(ops node:$in), (v2i32 (bitconvert node:$in))>; +def bc_v1i64 : PatFrag<(ops node:$in), (v1i64 (bitconvert node:$in))>; + +//===----------------------------------------------------------------------===// +// MMX Masks +//===----------------------------------------------------------------------===// + +// MMX_SHUFFLE_get_shuf_imm xform function: convert vector_shuffle mask to +// PSHUFW imm. +def MMX_SHUFFLE_get_shuf_imm : SDNodeXForm; + +// Patterns for: vector_shuffle v1, v2, <2, 6, 3, 7, ...> +def mmx_unpckh : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isUNPCKHMask(cast(N)); +}]>; + +// Patterns for: vector_shuffle v1, v2, <0, 4, 2, 5, ...> +def mmx_unpckl : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isUNPCKLMask(cast(N)); +}]>; + +// Patterns for: vector_shuffle v1, , <0, 0, 1, 1, ...> +def mmx_unpckh_undef : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isUNPCKH_v_undef_Mask(cast(N)); +}]>; + +// Patterns for: vector_shuffle v1, , <2, 2, 3, 3, ...> +def mmx_unpckl_undef : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isUNPCKL_v_undef_Mask(cast(N)); +}]>; + +def mmx_pshufw : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isPSHUFDMask(cast(N)); +}], MMX_SHUFFLE_get_shuf_imm>; + +//===----------------------------------------------------------------------===// +// MMX Multiclasses +//===----------------------------------------------------------------------===// + +let isTwoAddress = 1 in { + // MMXI_binop_rm - Simple MMX binary operator. + multiclass MMXI_binop_rm opc, string OpcodeStr, SDNode OpNode, + ValueType OpVT, bit Commutable = 0> { + def rr : MMXI { + let isCommutable = Commutable; + } + def rm : MMXI; + } + + multiclass MMXI_binop_rm_int opc, string OpcodeStr, Intrinsic IntId, + bit Commutable = 0> { + def rr : MMXI { + let isCommutable = Commutable; + } + def rm : MMXI; + } + + // MMXI_binop_rm_v1i64 - Simple MMX binary operator whose type is v1i64. + // + // FIXME: we could eliminate this and use MMXI_binop_rm instead if tblgen knew + // to collapse (bitconvert VT to VT) into its operand. + // + multiclass MMXI_binop_rm_v1i64 opc, string OpcodeStr, SDNode OpNode, + bit Commutable = 0> { + def rr : MMXI { + let isCommutable = Commutable; + } + def rm : MMXI; + } + + multiclass MMXI_binop_rmi_int opc, bits<8> opc2, Format ImmForm, + string OpcodeStr, Intrinsic IntId, + Intrinsic IntId2> { + def rr : MMXI; + def rm : MMXI; + def ri : MMXIi8; + } +} + +//===----------------------------------------------------------------------===// +// MMX EMMS & FEMMS Instructions +//===----------------------------------------------------------------------===// + +def MMX_EMMS : MMXI<0x77, RawFrm, (outs), (ins), "emms", [(int_x86_mmx_emms)]>; +def MMX_FEMMS : MMXI<0x0E, RawFrm, (outs), (ins), "femms", [(int_x86_mmx_femms)]>; + +//===----------------------------------------------------------------------===// +// MMX Scalar Instructions +//===----------------------------------------------------------------------===// + +// Data Transfer Instructions +def MMX_MOVD64rr : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR32:$src), + "movd\t{$src, $dst|$dst, $src}", + [(set VR64:$dst, (v2i32 (scalar_to_vector GR32:$src)))]>; +let canFoldAsLoad = 1, isReMaterializable = 1 in +def MMX_MOVD64rm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst), (ins i32mem:$src), + "movd\t{$src, $dst|$dst, $src}", + [(set VR64:$dst, (v2i32 (scalar_to_vector (loadi32 addr:$src))))]>; +let mayStore = 1 in +def MMX_MOVD64mr : MMXI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR64:$src), + "movd\t{$src, $dst|$dst, $src}", []>; + +let neverHasSideEffects = 1 in +def MMX_MOVD64to64rr : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src), + "movd\t{$src, $dst|$dst, $src}", + []>; + +let neverHasSideEffects = 1 in +def MMX_MOVD64from64rr : MMXRI<0x7E, MRMSrcReg, + (outs GR64:$dst), (ins VR64:$src), + "movd\t{$src, $dst|$dst, $src}", []>; + +let neverHasSideEffects = 1 in +def MMX_MOVQ64rr : MMXI<0x6F, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src), + "movq\t{$src, $dst|$dst, $src}", []>; +let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in +def MMX_MOVQ64rm : MMXI<0x6F, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src), + "movq\t{$src, $dst|$dst, $src}", + [(set VR64:$dst, (load_mmx addr:$src))]>; +def MMX_MOVQ64mr : MMXI<0x7F, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src), + "movq\t{$src, $dst|$dst, $src}", + [(store (v1i64 VR64:$src), addr:$dst)]>; + +def MMX_MOVDQ2Qrr : SDIi8<0xD6, MRMDestMem, (outs VR64:$dst), (ins VR128:$src), + "movdq2q\t{$src, $dst|$dst, $src}", + [(set VR64:$dst, + (v1i64 (bitconvert + (i64 (vector_extract (v2i64 VR128:$src), + (iPTR 0))))))]>; + +def MMX_MOVQ2DQrr : SSDIi8<0xD6, MRMDestMem, (outs VR128:$dst), (ins VR64:$src), + "movq2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (movl immAllZerosV, + (v2i64 (scalar_to_vector (i64 (bitconvert VR64:$src))))))]>; + +let neverHasSideEffects = 1 in +def MMX_MOVQ2FR64rr: SSDIi8<0xD6, MRMDestMem, (outs FR64:$dst), (ins VR64:$src), + "movq2dq\t{$src, $dst|$dst, $src}", []>; + +def MMX_MOVNTQmr : MMXI<0xE7, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src), + "movntq\t{$src, $dst|$dst, $src}", + [(int_x86_mmx_movnt_dq addr:$dst, VR64:$src)]>; + +let AddedComplexity = 15 in +// movd to MMX register zero-extends +def MMX_MOVZDI2PDIrr : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR32:$src), + "movd\t{$src, $dst|$dst, $src}", + [(set VR64:$dst, + (v2i32 (X86vzmovl (v2i32 (scalar_to_vector GR32:$src)))))]>; +let AddedComplexity = 20 in +def MMX_MOVZDI2PDIrm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst), (ins i32mem:$src), + "movd\t{$src, $dst|$dst, $src}", + [(set VR64:$dst, + (v2i32 (X86vzmovl (v2i32 + (scalar_to_vector (loadi32 addr:$src))))))]>; + +// Arithmetic Instructions + +// -- Addition +defm MMX_PADDB : MMXI_binop_rm<0xFC, "paddb", add, v8i8, 1>; +defm MMX_PADDW : MMXI_binop_rm<0xFD, "paddw", add, v4i16, 1>; +defm MMX_PADDD : MMXI_binop_rm<0xFE, "paddd", add, v2i32, 1>; +defm MMX_PADDQ : MMXI_binop_rm<0xD4, "paddq", add, v1i64, 1>; + +defm MMX_PADDSB : MMXI_binop_rm_int<0xEC, "paddsb" , int_x86_mmx_padds_b, 1>; +defm MMX_PADDSW : MMXI_binop_rm_int<0xED, "paddsw" , int_x86_mmx_padds_w, 1>; + +defm MMX_PADDUSB : MMXI_binop_rm_int<0xDC, "paddusb", int_x86_mmx_paddus_b, 1>; +defm MMX_PADDUSW : MMXI_binop_rm_int<0xDD, "paddusw", int_x86_mmx_paddus_w, 1>; + +// -- Subtraction +defm MMX_PSUBB : MMXI_binop_rm<0xF8, "psubb", sub, v8i8>; +defm MMX_PSUBW : MMXI_binop_rm<0xF9, "psubw", sub, v4i16>; +defm MMX_PSUBD : MMXI_binop_rm<0xFA, "psubd", sub, v2i32>; +defm MMX_PSUBQ : MMXI_binop_rm<0xFB, "psubq", sub, v1i64>; + +defm MMX_PSUBSB : MMXI_binop_rm_int<0xE8, "psubsb" , int_x86_mmx_psubs_b>; +defm MMX_PSUBSW : MMXI_binop_rm_int<0xE9, "psubsw" , int_x86_mmx_psubs_w>; + +defm MMX_PSUBUSB : MMXI_binop_rm_int<0xD8, "psubusb", int_x86_mmx_psubus_b>; +defm MMX_PSUBUSW : MMXI_binop_rm_int<0xD9, "psubusw", int_x86_mmx_psubus_w>; + +// -- Multiplication +defm MMX_PMULLW : MMXI_binop_rm<0xD5, "pmullw", mul, v4i16, 1>; + +defm MMX_PMULHW : MMXI_binop_rm_int<0xE5, "pmulhw", int_x86_mmx_pmulh_w, 1>; +defm MMX_PMULHUW : MMXI_binop_rm_int<0xE4, "pmulhuw", int_x86_mmx_pmulhu_w, 1>; +defm MMX_PMULUDQ : MMXI_binop_rm_int<0xF4, "pmuludq", int_x86_mmx_pmulu_dq, 1>; + +// -- Miscellanea +defm MMX_PMADDWD : MMXI_binop_rm_int<0xF5, "pmaddwd", int_x86_mmx_pmadd_wd, 1>; + +defm MMX_PAVGB : MMXI_binop_rm_int<0xE0, "pavgb", int_x86_mmx_pavg_b, 1>; +defm MMX_PAVGW : MMXI_binop_rm_int<0xE3, "pavgw", int_x86_mmx_pavg_w, 1>; + +defm MMX_PMINUB : MMXI_binop_rm_int<0xDA, "pminub", int_x86_mmx_pminu_b, 1>; +defm MMX_PMINSW : MMXI_binop_rm_int<0xEA, "pminsw", int_x86_mmx_pmins_w, 1>; + +defm MMX_PMAXUB : MMXI_binop_rm_int<0xDE, "pmaxub", int_x86_mmx_pmaxu_b, 1>; +defm MMX_PMAXSW : MMXI_binop_rm_int<0xEE, "pmaxsw", int_x86_mmx_pmaxs_w, 1>; + +defm MMX_PSADBW : MMXI_binop_rm_int<0xF6, "psadbw", int_x86_mmx_psad_bw, 1>; + +// Logical Instructions +defm MMX_PAND : MMXI_binop_rm_v1i64<0xDB, "pand", and, 1>; +defm MMX_POR : MMXI_binop_rm_v1i64<0xEB, "por" , or, 1>; +defm MMX_PXOR : MMXI_binop_rm_v1i64<0xEF, "pxor", xor, 1>; + +let isTwoAddress = 1 in { + def MMX_PANDNrr : MMXI<0xDF, MRMSrcReg, + (outs VR64:$dst), (ins VR64:$src1, VR64:$src2), + "pandn\t{$src2, $dst|$dst, $src2}", + [(set VR64:$dst, (v1i64 (and (vnot VR64:$src1), + VR64:$src2)))]>; + def MMX_PANDNrm : MMXI<0xDF, MRMSrcMem, + (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2), + "pandn\t{$src2, $dst|$dst, $src2}", + [(set VR64:$dst, (v1i64 (and (vnot VR64:$src1), + (load addr:$src2))))]>; +} + +// Shift Instructions +defm MMX_PSRLW : MMXI_binop_rmi_int<0xD1, 0x71, MRM2r, "psrlw", + int_x86_mmx_psrl_w, int_x86_mmx_psrli_w>; +defm MMX_PSRLD : MMXI_binop_rmi_int<0xD2, 0x72, MRM2r, "psrld", + int_x86_mmx_psrl_d, int_x86_mmx_psrli_d>; +defm MMX_PSRLQ : MMXI_binop_rmi_int<0xD3, 0x73, MRM2r, "psrlq", + int_x86_mmx_psrl_q, int_x86_mmx_psrli_q>; + +defm MMX_PSLLW : MMXI_binop_rmi_int<0xF1, 0x71, MRM6r, "psllw", + int_x86_mmx_psll_w, int_x86_mmx_pslli_w>; +defm MMX_PSLLD : MMXI_binop_rmi_int<0xF2, 0x72, MRM6r, "pslld", + int_x86_mmx_psll_d, int_x86_mmx_pslli_d>; +defm MMX_PSLLQ : MMXI_binop_rmi_int<0xF3, 0x73, MRM6r, "psllq", + int_x86_mmx_psll_q, int_x86_mmx_pslli_q>; + +defm MMX_PSRAW : MMXI_binop_rmi_int<0xE1, 0x71, MRM4r, "psraw", + int_x86_mmx_psra_w, int_x86_mmx_psrai_w>; +defm MMX_PSRAD : MMXI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad", + int_x86_mmx_psra_d, int_x86_mmx_psrai_d>; + +// Shift up / down and insert zero's. +def : Pat<(v1i64 (X86vshl VR64:$src, (i8 imm:$amt))), + (v1i64 (MMX_PSLLQri VR64:$src, imm:$amt))>; +def : Pat<(v1i64 (X86vshr VR64:$src, (i8 imm:$amt))), + (v1i64 (MMX_PSRLQri VR64:$src, imm:$amt))>; + +// Comparison Instructions +defm MMX_PCMPEQB : MMXI_binop_rm_int<0x74, "pcmpeqb", int_x86_mmx_pcmpeq_b>; +defm MMX_PCMPEQW : MMXI_binop_rm_int<0x75, "pcmpeqw", int_x86_mmx_pcmpeq_w>; +defm MMX_PCMPEQD : MMXI_binop_rm_int<0x76, "pcmpeqd", int_x86_mmx_pcmpeq_d>; + +defm MMX_PCMPGTB : MMXI_binop_rm_int<0x64, "pcmpgtb", int_x86_mmx_pcmpgt_b>; +defm MMX_PCMPGTW : MMXI_binop_rm_int<0x65, "pcmpgtw", int_x86_mmx_pcmpgt_w>; +defm MMX_PCMPGTD : MMXI_binop_rm_int<0x66, "pcmpgtd", int_x86_mmx_pcmpgt_d>; + +// Conversion Instructions + +// -- Unpack Instructions +let isTwoAddress = 1 in { + // Unpack High Packed Data Instructions + def MMX_PUNPCKHBWrr : MMXI<0x68, MRMSrcReg, + (outs VR64:$dst), (ins VR64:$src1, VR64:$src2), + "punpckhbw\t{$src2, $dst|$dst, $src2}", + [(set VR64:$dst, + (v8i8 (mmx_unpckh VR64:$src1, VR64:$src2)))]>; + def MMX_PUNPCKHBWrm : MMXI<0x68, MRMSrcMem, + (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2), + "punpckhbw\t{$src2, $dst|$dst, $src2}", + [(set VR64:$dst, + (v8i8 (mmx_unpckh VR64:$src1, + (bc_v8i8 (load_mmx addr:$src2)))))]>; + + def MMX_PUNPCKHWDrr : MMXI<0x69, MRMSrcReg, + (outs VR64:$dst), (ins VR64:$src1, VR64:$src2), + "punpckhwd\t{$src2, $dst|$dst, $src2}", + [(set VR64:$dst, + (v4i16 (mmx_unpckh VR64:$src1, VR64:$src2)))]>; + def MMX_PUNPCKHWDrm : MMXI<0x69, MRMSrcMem, + (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2), + "punpckhwd\t{$src2, $dst|$dst, $src2}", + [(set VR64:$dst, + (v4i16 (mmx_unpckh VR64:$src1, + (bc_v4i16 (load_mmx addr:$src2)))))]>; + + def MMX_PUNPCKHDQrr : MMXI<0x6A, MRMSrcReg, + (outs VR64:$dst), (ins VR64:$src1, VR64:$src2), + "punpckhdq\t{$src2, $dst|$dst, $src2}", + [(set VR64:$dst, + (v2i32 (mmx_unpckh VR64:$src1, VR64:$src2)))]>; + def MMX_PUNPCKHDQrm : MMXI<0x6A, MRMSrcMem, + (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2), + "punpckhdq\t{$src2, $dst|$dst, $src2}", + [(set VR64:$dst, + (v2i32 (mmx_unpckh VR64:$src1, + (bc_v2i32 (load_mmx addr:$src2)))))]>; + + // Unpack Low Packed Data Instructions + def MMX_PUNPCKLBWrr : MMXI<0x60, MRMSrcReg, + (outs VR64:$dst), (ins VR64:$src1, VR64:$src2), + "punpcklbw\t{$src2, $dst|$dst, $src2}", + [(set VR64:$dst, + (v8i8 (mmx_unpckl VR64:$src1, VR64:$src2)))]>; + def MMX_PUNPCKLBWrm : MMXI<0x60, MRMSrcMem, + (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2), + "punpcklbw\t{$src2, $dst|$dst, $src2}", + [(set VR64:$dst, + (v8i8 (mmx_unpckl VR64:$src1, + (bc_v8i8 (load_mmx addr:$src2)))))]>; + + def MMX_PUNPCKLWDrr : MMXI<0x61, MRMSrcReg, + (outs VR64:$dst), (ins VR64:$src1, VR64:$src2), + "punpcklwd\t{$src2, $dst|$dst, $src2}", + [(set VR64:$dst, + (v4i16 (mmx_unpckl VR64:$src1, VR64:$src2)))]>; + def MMX_PUNPCKLWDrm : MMXI<0x61, MRMSrcMem, + (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2), + "punpcklwd\t{$src2, $dst|$dst, $src2}", + [(set VR64:$dst, + (v4i16 (mmx_unpckl VR64:$src1, + (bc_v4i16 (load_mmx addr:$src2)))))]>; + + def MMX_PUNPCKLDQrr : MMXI<0x62, MRMSrcReg, + (outs VR64:$dst), (ins VR64:$src1, VR64:$src2), + "punpckldq\t{$src2, $dst|$dst, $src2}", + [(set VR64:$dst, + (v2i32 (mmx_unpckl VR64:$src1, VR64:$src2)))]>; + def MMX_PUNPCKLDQrm : MMXI<0x62, MRMSrcMem, + (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2), + "punpckldq\t{$src2, $dst|$dst, $src2}", + [(set VR64:$dst, + (v2i32 (mmx_unpckl VR64:$src1, + (bc_v2i32 (load_mmx addr:$src2)))))]>; +} + +// -- Pack Instructions +defm MMX_PACKSSWB : MMXI_binop_rm_int<0x63, "packsswb", int_x86_mmx_packsswb>; +defm MMX_PACKSSDW : MMXI_binop_rm_int<0x6B, "packssdw", int_x86_mmx_packssdw>; +defm MMX_PACKUSWB : MMXI_binop_rm_int<0x67, "packuswb", int_x86_mmx_packuswb>; + +// -- Shuffle Instructions +def MMX_PSHUFWri : MMXIi8<0x70, MRMSrcReg, + (outs VR64:$dst), (ins VR64:$src1, i8imm:$src2), + "pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR64:$dst, + (v4i16 (mmx_pshufw:$src2 VR64:$src1, (undef))))]>; +def MMX_PSHUFWmi : MMXIi8<0x70, MRMSrcMem, + (outs VR64:$dst), (ins i64mem:$src1, i8imm:$src2), + "pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR64:$dst, + (mmx_pshufw:$src2 (bc_v4i16 (load_mmx addr:$src1)), + (undef)))]>; + +// -- Conversion Instructions +let neverHasSideEffects = 1 in { +def MMX_CVTPD2PIrr : MMX2I<0x2D, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src), + "cvtpd2pi\t{$src, $dst|$dst, $src}", []>; +let mayLoad = 1 in +def MMX_CVTPD2PIrm : MMX2I<0x2D, MRMSrcMem, (outs VR64:$dst), (ins f128mem:$src), + "cvtpd2pi\t{$src, $dst|$dst, $src}", []>; + +def MMX_CVTPI2PDrr : MMX2I<0x2A, MRMSrcReg, (outs VR128:$dst), (ins VR64:$src), + "cvtpi2pd\t{$src, $dst|$dst, $src}", []>; +let mayLoad = 1 in +def MMX_CVTPI2PDrm : MMX2I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), + "cvtpi2pd\t{$src, $dst|$dst, $src}", []>; + +def MMX_CVTPI2PSrr : MMXI<0x2A, MRMSrcReg, (outs VR128:$dst), (ins VR64:$src), + "cvtpi2ps\t{$src, $dst|$dst, $src}", []>; +let mayLoad = 1 in +def MMX_CVTPI2PSrm : MMXI<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), + "cvtpi2ps\t{$src, $dst|$dst, $src}", []>; + +def MMX_CVTPS2PIrr : MMXI<0x2D, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src), + "cvtps2pi\t{$src, $dst|$dst, $src}", []>; +let mayLoad = 1 in +def MMX_CVTPS2PIrm : MMXI<0x2D, MRMSrcMem, (outs VR64:$dst), (ins f64mem:$src), + "cvtps2pi\t{$src, $dst|$dst, $src}", []>; + +def MMX_CVTTPD2PIrr : MMX2I<0x2C, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src), + "cvttpd2pi\t{$src, $dst|$dst, $src}", []>; +let mayLoad = 1 in +def MMX_CVTTPD2PIrm : MMX2I<0x2C, MRMSrcMem, (outs VR64:$dst), (ins f128mem:$src), + "cvttpd2pi\t{$src, $dst|$dst, $src}", []>; + +def MMX_CVTTPS2PIrr : MMXI<0x2C, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src), + "cvttps2pi\t{$src, $dst|$dst, $src}", []>; +let mayLoad = 1 in +def MMX_CVTTPS2PIrm : MMXI<0x2C, MRMSrcMem, (outs VR64:$dst), (ins f64mem:$src), + "cvttps2pi\t{$src, $dst|$dst, $src}", []>; +} // end neverHasSideEffects + + +// Extract / Insert +def MMX_X86pextrw : SDNode<"X86ISD::PEXTRW", SDTypeProfile<1, 2, []>, []>; +def MMX_X86pinsrw : SDNode<"X86ISD::PINSRW", SDTypeProfile<1, 3, []>, []>; + +def MMX_PEXTRWri : MMXIi8<0xC5, MRMSrcReg, + (outs GR32:$dst), (ins VR64:$src1, i16i8imm:$src2), + "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set GR32:$dst, (MMX_X86pextrw (v4i16 VR64:$src1), + (iPTR imm:$src2)))]>; +let isTwoAddress = 1 in { + def MMX_PINSRWrri : MMXIi8<0xC4, MRMSrcReg, + (outs VR64:$dst), (ins VR64:$src1, GR32:$src2, i16i8imm:$src3), + "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}", + [(set VR64:$dst, (v4i16 (MMX_X86pinsrw (v4i16 VR64:$src1), + GR32:$src2, (iPTR imm:$src3))))]>; + def MMX_PINSRWrmi : MMXIi8<0xC4, MRMSrcMem, + (outs VR64:$dst), (ins VR64:$src1, i16mem:$src2, i16i8imm:$src3), + "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}", + [(set VR64:$dst, + (v4i16 (MMX_X86pinsrw (v4i16 VR64:$src1), + (i32 (anyext (loadi16 addr:$src2))), + (iPTR imm:$src3))))]>; +} + +// Mask creation +def MMX_PMOVMSKBrr : MMXI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR64:$src), + "pmovmskb\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (int_x86_mmx_pmovmskb VR64:$src))]>; + +// Misc. +let Uses = [EDI] in +def MMX_MASKMOVQ : MMXI<0xF7, MRMDestMem, (outs), (ins VR64:$src, VR64:$mask), + "maskmovq\t{$mask, $src|$src, $mask}", + [(int_x86_mmx_maskmovq VR64:$src, VR64:$mask, EDI)]>; +let Uses = [RDI] in +def MMX_MASKMOVQ64: MMXI64<0xF7, MRMDestMem, (outs), (ins VR64:$src, VR64:$mask), + "maskmovq\t{$mask, $src|$src, $mask}", + [(int_x86_mmx_maskmovq VR64:$src, VR64:$mask, RDI)]>; + +//===----------------------------------------------------------------------===// +// Alias Instructions +//===----------------------------------------------------------------------===// + +// Alias instructions that map zero vector to pxor. +let isReMaterializable = 1 in { + def MMX_V_SET0 : MMXI<0xEF, MRMInitReg, (outs VR64:$dst), (ins), + "pxor\t$dst, $dst", + [(set VR64:$dst, (v2i32 immAllZerosV))]>; + def MMX_V_SETALLONES : MMXI<0x76, MRMInitReg, (outs VR64:$dst), (ins), + "pcmpeqd\t$dst, $dst", + [(set VR64:$dst, (v2i32 immAllOnesV))]>; +} + +let Predicates = [HasMMX] in { + def : Pat<(v1i64 immAllZerosV), (MMX_V_SET0)>; + def : Pat<(v4i16 immAllZerosV), (MMX_V_SET0)>; + def : Pat<(v8i8 immAllZerosV), (MMX_V_SET0)>; +} + +//===----------------------------------------------------------------------===// +// Non-Instruction Patterns +//===----------------------------------------------------------------------===// + +// Store 64-bit integer vector values. +def : Pat<(store (v8i8 VR64:$src), addr:$dst), + (MMX_MOVQ64mr addr:$dst, VR64:$src)>; +def : Pat<(store (v4i16 VR64:$src), addr:$dst), + (MMX_MOVQ64mr addr:$dst, VR64:$src)>; +def : Pat<(store (v2i32 VR64:$src), addr:$dst), + (MMX_MOVQ64mr addr:$dst, VR64:$src)>; +def : Pat<(store (v2f32 VR64:$src), addr:$dst), + (MMX_MOVQ64mr addr:$dst, VR64:$src)>; +def : Pat<(store (v1i64 VR64:$src), addr:$dst), + (MMX_MOVQ64mr addr:$dst, VR64:$src)>; + +// Bit convert. +def : Pat<(v8i8 (bitconvert (v1i64 VR64:$src))), (v8i8 VR64:$src)>; +def : Pat<(v8i8 (bitconvert (v2i32 VR64:$src))), (v8i8 VR64:$src)>; +def : Pat<(v8i8 (bitconvert (v2f32 VR64:$src))), (v8i8 VR64:$src)>; +def : Pat<(v8i8 (bitconvert (v4i16 VR64:$src))), (v8i8 VR64:$src)>; +def : Pat<(v4i16 (bitconvert (v1i64 VR64:$src))), (v4i16 VR64:$src)>; +def : Pat<(v4i16 (bitconvert (v2i32 VR64:$src))), (v4i16 VR64:$src)>; +def : Pat<(v4i16 (bitconvert (v2f32 VR64:$src))), (v4i16 VR64:$src)>; +def : Pat<(v4i16 (bitconvert (v8i8 VR64:$src))), (v4i16 VR64:$src)>; +def : Pat<(v2i32 (bitconvert (v1i64 VR64:$src))), (v2i32 VR64:$src)>; +def : Pat<(v2i32 (bitconvert (v2f32 VR64:$src))), (v2i32 VR64:$src)>; +def : Pat<(v2i32 (bitconvert (v4i16 VR64:$src))), (v2i32 VR64:$src)>; +def : Pat<(v2i32 (bitconvert (v8i8 VR64:$src))), (v2i32 VR64:$src)>; +def : Pat<(v2f32 (bitconvert (v1i64 VR64:$src))), (v2f32 VR64:$src)>; +def : Pat<(v2f32 (bitconvert (v2i32 VR64:$src))), (v2f32 VR64:$src)>; +def : Pat<(v2f32 (bitconvert (v4i16 VR64:$src))), (v2f32 VR64:$src)>; +def : Pat<(v2f32 (bitconvert (v8i8 VR64:$src))), (v2f32 VR64:$src)>; +def : Pat<(v1i64 (bitconvert (v2i32 VR64:$src))), (v1i64 VR64:$src)>; +def : Pat<(v1i64 (bitconvert (v2f32 VR64:$src))), (v1i64 VR64:$src)>; +def : Pat<(v1i64 (bitconvert (v4i16 VR64:$src))), (v1i64 VR64:$src)>; +def : Pat<(v1i64 (bitconvert (v8i8 VR64:$src))), (v1i64 VR64:$src)>; + +// 64-bit bit convert. +def : Pat<(v1i64 (bitconvert (i64 GR64:$src))), + (MMX_MOVD64to64rr GR64:$src)>; +def : Pat<(v2i32 (bitconvert (i64 GR64:$src))), + (MMX_MOVD64to64rr GR64:$src)>; +def : Pat<(v2f32 (bitconvert (i64 GR64:$src))), + (MMX_MOVD64to64rr GR64:$src)>; +def : Pat<(v4i16 (bitconvert (i64 GR64:$src))), + (MMX_MOVD64to64rr GR64:$src)>; +def : Pat<(v8i8 (bitconvert (i64 GR64:$src))), + (MMX_MOVD64to64rr GR64:$src)>; +def : Pat<(i64 (bitconvert (v1i64 VR64:$src))), + (MMX_MOVD64from64rr VR64:$src)>; +def : Pat<(i64 (bitconvert (v2i32 VR64:$src))), + (MMX_MOVD64from64rr VR64:$src)>; +def : Pat<(i64 (bitconvert (v2f32 VR64:$src))), + (MMX_MOVD64from64rr VR64:$src)>; +def : Pat<(i64 (bitconvert (v4i16 VR64:$src))), + (MMX_MOVD64from64rr VR64:$src)>; +def : Pat<(i64 (bitconvert (v8i8 VR64:$src))), + (MMX_MOVD64from64rr VR64:$src)>; +def : Pat<(f64 (bitconvert (v1i64 VR64:$src))), + (MMX_MOVQ2FR64rr VR64:$src)>; +def : Pat<(f64 (bitconvert (v2i32 VR64:$src))), + (MMX_MOVQ2FR64rr VR64:$src)>; +def : Pat<(f64 (bitconvert (v4i16 VR64:$src))), + (MMX_MOVQ2FR64rr VR64:$src)>; +def : Pat<(f64 (bitconvert (v8i8 VR64:$src))), + (MMX_MOVQ2FR64rr VR64:$src)>; + +// Move scalar to MMX zero-extended +// movd to MMX register zero-extends +let AddedComplexity = 15 in { + def : Pat<(v8i8 (X86vzmovl (bc_v8i8 (v2i32 (scalar_to_vector GR32:$src))))), + (MMX_MOVZDI2PDIrr GR32:$src)>; + def : Pat<(v4i16 (X86vzmovl (bc_v4i16 (v2i32 (scalar_to_vector GR32:$src))))), + (MMX_MOVZDI2PDIrr GR32:$src)>; +} + +let AddedComplexity = 20 in { + def : Pat<(v8i8 (X86vzmovl (bc_v8i8 (load_mmx addr:$src)))), + (MMX_MOVZDI2PDIrm addr:$src)>; + def : Pat<(v4i16 (X86vzmovl (bc_v4i16 (load_mmx addr:$src)))), + (MMX_MOVZDI2PDIrm addr:$src)>; + def : Pat<(v2i32 (X86vzmovl (bc_v2i32 (load_mmx addr:$src)))), + (MMX_MOVZDI2PDIrm addr:$src)>; +} + +// Clear top half. +let AddedComplexity = 15 in { + def : Pat<(v8i8 (X86vzmovl VR64:$src)), + (MMX_PUNPCKLDQrr VR64:$src, (MMX_V_SET0))>; + def : Pat<(v4i16 (X86vzmovl VR64:$src)), + (MMX_PUNPCKLDQrr VR64:$src, (MMX_V_SET0))>; + def : Pat<(v2i32 (X86vzmovl VR64:$src)), + (MMX_PUNPCKLDQrr VR64:$src, (MMX_V_SET0))>; +} + +// Scalar to v4i16 / v8i8. The source may be a GR32, but only the lower +// 8 or 16-bits matter. +def : Pat<(bc_v8i8 (v2i32 (scalar_to_vector GR32:$src))), + (MMX_MOVD64rr GR32:$src)>; +def : Pat<(bc_v4i16 (v2i32 (scalar_to_vector GR32:$src))), + (MMX_MOVD64rr GR32:$src)>; + +// Patterns to perform canonical versions of vector shuffling. +let AddedComplexity = 10 in { + def : Pat<(v8i8 (mmx_unpckl_undef VR64:$src, (undef))), + (MMX_PUNPCKLBWrr VR64:$src, VR64:$src)>; + def : Pat<(v4i16 (mmx_unpckl_undef VR64:$src, (undef))), + (MMX_PUNPCKLWDrr VR64:$src, VR64:$src)>; + def : Pat<(v2i32 (mmx_unpckl_undef VR64:$src, (undef))), + (MMX_PUNPCKLDQrr VR64:$src, VR64:$src)>; +} + +let AddedComplexity = 10 in { + def : Pat<(v8i8 (mmx_unpckh_undef VR64:$src, (undef))), + (MMX_PUNPCKHBWrr VR64:$src, VR64:$src)>; + def : Pat<(v4i16 (mmx_unpckh_undef VR64:$src, (undef))), + (MMX_PUNPCKHWDrr VR64:$src, VR64:$src)>; + def : Pat<(v2i32 (mmx_unpckh_undef VR64:$src, (undef))), + (MMX_PUNPCKHDQrr VR64:$src, VR64:$src)>; +} + +// Patterns to perform vector shuffling with a zeroed out vector. +let AddedComplexity = 20 in { + def : Pat<(bc_v2i32 (mmx_unpckl immAllZerosV, + (v2i32 (scalar_to_vector (load_mmx addr:$src))))), + (MMX_PUNPCKLDQrm VR64:$src, VR64:$src)>; +} + +// Some special case PANDN patterns. +// FIXME: Get rid of these. +def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v2i32 immAllOnesV))), + VR64:$src2)), + (MMX_PANDNrr VR64:$src1, VR64:$src2)>; +def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v4i16 immAllOnesV_bc))), + VR64:$src2)), + (MMX_PANDNrr VR64:$src1, VR64:$src2)>; +def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v8i8 immAllOnesV_bc))), + VR64:$src2)), + (MMX_PANDNrr VR64:$src1, VR64:$src2)>; + +def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v2i32 immAllOnesV))), + (load addr:$src2))), + (MMX_PANDNrm VR64:$src1, addr:$src2)>; +def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v4i16 immAllOnesV_bc))), + (load addr:$src2))), + (MMX_PANDNrm VR64:$src1, addr:$src2)>; +def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v8i8 immAllOnesV_bc))), + (load addr:$src2))), + (MMX_PANDNrm VR64:$src1, addr:$src2)>; + +// Move MMX to lower 64-bit of XMM +def : Pat<(v2i64 (scalar_to_vector (i64 (bitconvert (v8i8 VR64:$src))))), + (v2i64 (MMX_MOVQ2DQrr VR64:$src))>; +def : Pat<(v2i64 (scalar_to_vector (i64 (bitconvert (v4i16 VR64:$src))))), + (v2i64 (MMX_MOVQ2DQrr VR64:$src))>; +def : Pat<(v2i64 (scalar_to_vector (i64 (bitconvert (v2i32 VR64:$src))))), + (v2i64 (MMX_MOVQ2DQrr VR64:$src))>; +def : Pat<(v2i64 (scalar_to_vector (i64 (bitconvert (v1i64 VR64:$src))))), + (v2i64 (MMX_MOVQ2DQrr VR64:$src))>; + +// Move lower 64-bit of XMM to MMX. +def : Pat<(v2i32 (bitconvert (i64 (vector_extract (v2i64 VR128:$src), + (iPTR 0))))), + (v2i32 (MMX_MOVDQ2Qrr VR128:$src))>; +def : Pat<(v4i16 (bitconvert (i64 (vector_extract (v2i64 VR128:$src), + (iPTR 0))))), + (v4i16 (MMX_MOVDQ2Qrr VR128:$src))>; +def : Pat<(v8i8 (bitconvert (i64 (vector_extract (v2i64 VR128:$src), + (iPTR 0))))), + (v8i8 (MMX_MOVDQ2Qrr VR128:$src))>; + +// CMOV* - Used to implement the SELECT DAG operation. Expanded by the +// scheduler into a branch sequence. +// These are expanded by the scheduler. +let Uses = [EFLAGS], usesCustomDAGSchedInserter = 1 in { + def CMOV_V1I64 : I<0, Pseudo, + (outs VR64:$dst), (ins VR64:$t, VR64:$f, i8imm:$cond), + "#CMOV_V1I64 PSEUDO!", + [(set VR64:$dst, + (v1i64 (X86cmov VR64:$t, VR64:$f, imm:$cond, + EFLAGS)))]>; +} diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td new file mode 100644 index 000000000000..1fafa46fa2d8 --- /dev/null +++ b/lib/Target/X86/X86InstrSSE.td @@ -0,0 +1,3643 @@ +//====- X86InstrSSE.td - Describe the X86 Instruction Set --*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the X86 SSE instruction set, defining the instructions, +// and properties of the instructions which are needed for code generation, +// machine code emission, and analysis. +// +//===----------------------------------------------------------------------===// + + +//===----------------------------------------------------------------------===// +// SSE specific DAG Nodes. +//===----------------------------------------------------------------------===// + +def SDTX86FPShiftOp : SDTypeProfile<1, 2, [ SDTCisSameAs<0, 1>, + SDTCisFP<0>, SDTCisInt<2> ]>; +def SDTX86VFCMP : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<1, 2>, + SDTCisFP<1>, SDTCisVT<3, i8>]>; + +def X86fmin : SDNode<"X86ISD::FMIN", SDTFPBinOp>; +def X86fmax : SDNode<"X86ISD::FMAX", SDTFPBinOp>; +def X86fand : SDNode<"X86ISD::FAND", SDTFPBinOp, + [SDNPCommutative, SDNPAssociative]>; +def X86for : SDNode<"X86ISD::FOR", SDTFPBinOp, + [SDNPCommutative, SDNPAssociative]>; +def X86fxor : SDNode<"X86ISD::FXOR", SDTFPBinOp, + [SDNPCommutative, SDNPAssociative]>; +def X86frsqrt : SDNode<"X86ISD::FRSQRT", SDTFPUnaryOp>; +def X86frcp : SDNode<"X86ISD::FRCP", SDTFPUnaryOp>; +def X86fsrl : SDNode<"X86ISD::FSRL", SDTX86FPShiftOp>; +def X86comi : SDNode<"X86ISD::COMI", SDTX86CmpTest>; +def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86CmpTest>; +def X86pshufb : SDNode<"X86ISD::PSHUFB", + SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>, + SDTCisSameAs<0,2>]>>; +def X86pextrb : SDNode<"X86ISD::PEXTRB", + SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>; +def X86pextrw : SDNode<"X86ISD::PEXTRW", + SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>; +def X86pinsrb : SDNode<"X86ISD::PINSRB", + SDTypeProfile<1, 3, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>, + SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>; +def X86pinsrw : SDNode<"X86ISD::PINSRW", + SDTypeProfile<1, 3, [SDTCisVT<0, v8i16>, SDTCisSameAs<0,1>, + SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>; +def X86insrtps : SDNode<"X86ISD::INSERTPS", + SDTypeProfile<1, 3, [SDTCisVT<0, v4f32>, SDTCisSameAs<0,1>, + SDTCisVT<2, f32>, SDTCisPtrTy<3>]>>; +def X86vzmovl : SDNode<"X86ISD::VZEXT_MOVL", + SDTypeProfile<1, 1, [SDTCisSameAs<0,1>]>>; +def X86vzload : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad, + [SDNPHasChain, SDNPMayLoad]>; +def X86vshl : SDNode<"X86ISD::VSHL", SDTIntShiftOp>; +def X86vshr : SDNode<"X86ISD::VSRL", SDTIntShiftOp>; +def X86cmpps : SDNode<"X86ISD::CMPPS", SDTX86VFCMP>; +def X86cmppd : SDNode<"X86ISD::CMPPD", SDTX86VFCMP>; +def X86pcmpeqb : SDNode<"X86ISD::PCMPEQB", SDTIntBinOp, [SDNPCommutative]>; +def X86pcmpeqw : SDNode<"X86ISD::PCMPEQW", SDTIntBinOp, [SDNPCommutative]>; +def X86pcmpeqd : SDNode<"X86ISD::PCMPEQD", SDTIntBinOp, [SDNPCommutative]>; +def X86pcmpeqq : SDNode<"X86ISD::PCMPEQQ", SDTIntBinOp, [SDNPCommutative]>; +def X86pcmpgtb : SDNode<"X86ISD::PCMPGTB", SDTIntBinOp>; +def X86pcmpgtw : SDNode<"X86ISD::PCMPGTW", SDTIntBinOp>; +def X86pcmpgtd : SDNode<"X86ISD::PCMPGTD", SDTIntBinOp>; +def X86pcmpgtq : SDNode<"X86ISD::PCMPGTQ", SDTIntBinOp>; + +//===----------------------------------------------------------------------===// +// SSE Complex Patterns +//===----------------------------------------------------------------------===// + +// These are 'extloads' from a scalar to the low element of a vector, zeroing +// the top elements. These are used for the SSE 'ss' and 'sd' instruction +// forms. +def sse_load_f32 : ComplexPattern; +def sse_load_f64 : ComplexPattern; + +def ssmem : Operand { + let PrintMethod = "printf32mem"; + let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc, i32imm, i8imm); +} +def sdmem : Operand { + let PrintMethod = "printf64mem"; + let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc, i32imm, i8imm); +} + +//===----------------------------------------------------------------------===// +// SSE pattern fragments +//===----------------------------------------------------------------------===// + +def loadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (load node:$ptr))>; +def loadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (load node:$ptr))>; +def loadv4i32 : PatFrag<(ops node:$ptr), (v4i32 (load node:$ptr))>; +def loadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (load node:$ptr))>; + +// Like 'store', but always requires vector alignment. +def alignedstore : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return cast(N)->getAlignment() >= 16; +}]>; + +// Like 'load', but always requires vector alignment. +def alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return cast(N)->getAlignment() >= 16; +}]>; + +def alignedloadfsf32 : PatFrag<(ops node:$ptr), (f32 (alignedload node:$ptr))>; +def alignedloadfsf64 : PatFrag<(ops node:$ptr), (f64 (alignedload node:$ptr))>; +def alignedloadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (alignedload node:$ptr))>; +def alignedloadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (alignedload node:$ptr))>; +def alignedloadv4i32 : PatFrag<(ops node:$ptr), (v4i32 (alignedload node:$ptr))>; +def alignedloadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (alignedload node:$ptr))>; + +// Like 'load', but uses special alignment checks suitable for use in +// memory operands in most SSE instructions, which are required to +// be naturally aligned on some targets but not on others. +// FIXME: Actually implement support for targets that don't require the +// alignment. This probably wants a subtarget predicate. +def memop : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return cast(N)->getAlignment() >= 16; +}]>; + +def memopfsf32 : PatFrag<(ops node:$ptr), (f32 (memop node:$ptr))>; +def memopfsf64 : PatFrag<(ops node:$ptr), (f64 (memop node:$ptr))>; +def memopv4f32 : PatFrag<(ops node:$ptr), (v4f32 (memop node:$ptr))>; +def memopv2f64 : PatFrag<(ops node:$ptr), (v2f64 (memop node:$ptr))>; +def memopv4i32 : PatFrag<(ops node:$ptr), (v4i32 (memop node:$ptr))>; +def memopv2i64 : PatFrag<(ops node:$ptr), (v2i64 (memop node:$ptr))>; +def memopv16i8 : PatFrag<(ops node:$ptr), (v16i8 (memop node:$ptr))>; + +// SSSE3 uses MMX registers for some instructions. They aren't aligned on a +// 16-byte boundary. +// FIXME: 8 byte alignment for mmx reads is not required +def memop64 : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return cast(N)->getAlignment() >= 8; +}]>; + +def memopv8i8 : PatFrag<(ops node:$ptr), (v8i8 (memop64 node:$ptr))>; +def memopv4i16 : PatFrag<(ops node:$ptr), (v4i16 (memop64 node:$ptr))>; +def memopv8i16 : PatFrag<(ops node:$ptr), (v8i16 (memop64 node:$ptr))>; +def memopv2i32 : PatFrag<(ops node:$ptr), (v2i32 (memop64 node:$ptr))>; + +def bc_v4f32 : PatFrag<(ops node:$in), (v4f32 (bitconvert node:$in))>; +def bc_v2f64 : PatFrag<(ops node:$in), (v2f64 (bitconvert node:$in))>; +def bc_v16i8 : PatFrag<(ops node:$in), (v16i8 (bitconvert node:$in))>; +def bc_v8i16 : PatFrag<(ops node:$in), (v8i16 (bitconvert node:$in))>; +def bc_v4i32 : PatFrag<(ops node:$in), (v4i32 (bitconvert node:$in))>; +def bc_v2i64 : PatFrag<(ops node:$in), (v2i64 (bitconvert node:$in))>; + +def vzmovl_v2i64 : PatFrag<(ops node:$src), + (bitconvert (v2i64 (X86vzmovl + (v2i64 (scalar_to_vector (loadi64 node:$src))))))>; +def vzmovl_v4i32 : PatFrag<(ops node:$src), + (bitconvert (v4i32 (X86vzmovl + (v4i32 (scalar_to_vector (loadi32 node:$src))))))>; + +def vzload_v2i64 : PatFrag<(ops node:$src), + (bitconvert (v2i64 (X86vzload node:$src)))>; + + +def fp32imm0 : PatLeaf<(f32 fpimm), [{ + return N->isExactlyValue(+0.0); +}]>; + +def PSxLDQ_imm : SDNodeXForm> 3 + return getI32Imm(N->getZExtValue() >> 3); +}]>; + +// SHUFFLE_get_shuf_imm xform function: convert vector_shuffle mask to PSHUF*, +// SHUFP* etc. imm. +def SHUFFLE_get_shuf_imm : SDNodeXForm; + +// SHUFFLE_get_pshufhw_imm xform function: convert vector_shuffle mask to +// PSHUFHW imm. +def SHUFFLE_get_pshufhw_imm : SDNodeXForm; + +// SHUFFLE_get_pshuflw_imm xform function: convert vector_shuffle mask to +// PSHUFLW imm. +def SHUFFLE_get_pshuflw_imm : SDNodeXForm; + +def splat_lo : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + ShuffleVectorSDNode *SVOp = cast(N); + return SVOp->isSplat() && SVOp->getSplatIndex() == 0; +}]>; + +def movddup : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isMOVDDUPMask(cast(N)); +}]>; + +def movhlps : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isMOVHLPSMask(cast(N)); +}]>; + +def movhlps_undef : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isMOVHLPS_v_undef_Mask(cast(N)); +}]>; + +def movhp : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isMOVHPMask(cast(N)); +}]>; + +def movlp : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isMOVLPMask(cast(N)); +}]>; + +def movl : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isMOVLMask(cast(N)); +}]>; + +def movshdup : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isMOVSHDUPMask(cast(N)); +}]>; + +def movsldup : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isMOVSLDUPMask(cast(N)); +}]>; + +def unpckl : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isUNPCKLMask(cast(N)); +}]>; + +def unpckh : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isUNPCKHMask(cast(N)); +}]>; + +def unpckl_undef : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isUNPCKL_v_undef_Mask(cast(N)); +}]>; + +def unpckh_undef : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isUNPCKH_v_undef_Mask(cast(N)); +}]>; + +def pshufd : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isPSHUFDMask(cast(N)); +}], SHUFFLE_get_shuf_imm>; + +def shufp : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isSHUFPMask(cast(N)); +}], SHUFFLE_get_shuf_imm>; + +def pshufhw : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isPSHUFHWMask(cast(N)); +}], SHUFFLE_get_pshufhw_imm>; + +def pshuflw : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isPSHUFLWMask(cast(N)); +}], SHUFFLE_get_pshuflw_imm>; + +//===----------------------------------------------------------------------===// +// SSE scalar FP Instructions +//===----------------------------------------------------------------------===// + +// CMOV* - Used to implement the SSE SELECT DAG operation. Expanded by the +// scheduler into a branch sequence. +// These are expanded by the scheduler. +let Uses = [EFLAGS], usesCustomDAGSchedInserter = 1 in { + def CMOV_FR32 : I<0, Pseudo, + (outs FR32:$dst), (ins FR32:$t, FR32:$f, i8imm:$cond), + "#CMOV_FR32 PSEUDO!", + [(set FR32:$dst, (X86cmov FR32:$t, FR32:$f, imm:$cond, + EFLAGS))]>; + def CMOV_FR64 : I<0, Pseudo, + (outs FR64:$dst), (ins FR64:$t, FR64:$f, i8imm:$cond), + "#CMOV_FR64 PSEUDO!", + [(set FR64:$dst, (X86cmov FR64:$t, FR64:$f, imm:$cond, + EFLAGS))]>; + def CMOV_V4F32 : I<0, Pseudo, + (outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond), + "#CMOV_V4F32 PSEUDO!", + [(set VR128:$dst, + (v4f32 (X86cmov VR128:$t, VR128:$f, imm:$cond, + EFLAGS)))]>; + def CMOV_V2F64 : I<0, Pseudo, + (outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond), + "#CMOV_V2F64 PSEUDO!", + [(set VR128:$dst, + (v2f64 (X86cmov VR128:$t, VR128:$f, imm:$cond, + EFLAGS)))]>; + def CMOV_V2I64 : I<0, Pseudo, + (outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond), + "#CMOV_V2I64 PSEUDO!", + [(set VR128:$dst, + (v2i64 (X86cmov VR128:$t, VR128:$f, imm:$cond, + EFLAGS)))]>; +} + +//===----------------------------------------------------------------------===// +// SSE1 Instructions +//===----------------------------------------------------------------------===// + +// Move Instructions +let neverHasSideEffects = 1 in +def MOVSSrr : SSI<0x10, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src), + "movss\t{$src, $dst|$dst, $src}", []>; +let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in +def MOVSSrm : SSI<0x10, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src), + "movss\t{$src, $dst|$dst, $src}", + [(set FR32:$dst, (loadf32 addr:$src))]>; +def MOVSSmr : SSI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src), + "movss\t{$src, $dst|$dst, $src}", + [(store FR32:$src, addr:$dst)]>; + +// Conversion instructions +def CVTTSS2SIrr : SSI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins FR32:$src), + "cvttss2si\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (fp_to_sint FR32:$src))]>; +def CVTTSS2SIrm : SSI<0x2C, MRMSrcMem, (outs GR32:$dst), (ins f32mem:$src), + "cvttss2si\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (fp_to_sint (loadf32 addr:$src)))]>; +def CVTSI2SSrr : SSI<0x2A, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src), + "cvtsi2ss\t{$src, $dst|$dst, $src}", + [(set FR32:$dst, (sint_to_fp GR32:$src))]>; +def CVTSI2SSrm : SSI<0x2A, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src), + "cvtsi2ss\t{$src, $dst|$dst, $src}", + [(set FR32:$dst, (sint_to_fp (loadi32 addr:$src)))]>; + +// Match intrinsics which expect XMM operand(s). +def Int_CVTSS2SIrr : SSI<0x2D, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src), + "cvtss2si\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (int_x86_sse_cvtss2si VR128:$src))]>; +def Int_CVTSS2SIrm : SSI<0x2D, MRMSrcMem, (outs GR32:$dst), (ins f32mem:$src), + "cvtss2si\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (int_x86_sse_cvtss2si + (load addr:$src)))]>; + +// Match intrinisics which expect MM and XMM operand(s). +def Int_CVTPS2PIrr : PSI<0x2D, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src), + "cvtps2pi\t{$src, $dst|$dst, $src}", + [(set VR64:$dst, (int_x86_sse_cvtps2pi VR128:$src))]>; +def Int_CVTPS2PIrm : PSI<0x2D, MRMSrcMem, (outs VR64:$dst), (ins f64mem:$src), + "cvtps2pi\t{$src, $dst|$dst, $src}", + [(set VR64:$dst, (int_x86_sse_cvtps2pi + (load addr:$src)))]>; +def Int_CVTTPS2PIrr: PSI<0x2C, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src), + "cvttps2pi\t{$src, $dst|$dst, $src}", + [(set VR64:$dst, (int_x86_sse_cvttps2pi VR128:$src))]>; +def Int_CVTTPS2PIrm: PSI<0x2C, MRMSrcMem, (outs VR64:$dst), (ins f64mem:$src), + "cvttps2pi\t{$src, $dst|$dst, $src}", + [(set VR64:$dst, (int_x86_sse_cvttps2pi + (load addr:$src)))]>; +let Constraints = "$src1 = $dst" in { + def Int_CVTPI2PSrr : PSI<0x2A, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR64:$src2), + "cvtpi2ps\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (int_x86_sse_cvtpi2ps VR128:$src1, + VR64:$src2))]>; + def Int_CVTPI2PSrm : PSI<0x2A, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, i64mem:$src2), + "cvtpi2ps\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (int_x86_sse_cvtpi2ps VR128:$src1, + (load addr:$src2)))]>; +} + +// Aliases for intrinsics +def Int_CVTTSS2SIrr : SSI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src), + "cvttss2si\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, + (int_x86_sse_cvttss2si VR128:$src))]>; +def Int_CVTTSS2SIrm : SSI<0x2C, MRMSrcMem, (outs GR32:$dst), (ins f32mem:$src), + "cvttss2si\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, + (int_x86_sse_cvttss2si(load addr:$src)))]>; + +let Constraints = "$src1 = $dst" in { + def Int_CVTSI2SSrr : SSI<0x2A, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, GR32:$src2), + "cvtsi2ss\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (int_x86_sse_cvtsi2ss VR128:$src1, + GR32:$src2))]>; + def Int_CVTSI2SSrm : SSI<0x2A, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, i32mem:$src2), + "cvtsi2ss\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (int_x86_sse_cvtsi2ss VR128:$src1, + (loadi32 addr:$src2)))]>; +} + +// Comparison instructions +let Constraints = "$src1 = $dst", neverHasSideEffects = 1 in { + def CMPSSrr : SSIi8<0xC2, MRMSrcReg, + (outs FR32:$dst), (ins FR32:$src1, FR32:$src, SSECC:$cc), + "cmp${cc}ss\t{$src, $dst|$dst, $src}", []>; +let mayLoad = 1 in + def CMPSSrm : SSIi8<0xC2, MRMSrcMem, + (outs FR32:$dst), (ins FR32:$src1, f32mem:$src, SSECC:$cc), + "cmp${cc}ss\t{$src, $dst|$dst, $src}", []>; +} + +let Defs = [EFLAGS] in { +def UCOMISSrr: PSI<0x2E, MRMSrcReg, (outs), (ins FR32:$src1, FR32:$src2), + "ucomiss\t{$src2, $src1|$src1, $src2}", + [(X86cmp FR32:$src1, FR32:$src2), (implicit EFLAGS)]>; +def UCOMISSrm: PSI<0x2E, MRMSrcMem, (outs), (ins FR32:$src1, f32mem:$src2), + "ucomiss\t{$src2, $src1|$src1, $src2}", + [(X86cmp FR32:$src1, (loadf32 addr:$src2)), + (implicit EFLAGS)]>; +} // Defs = [EFLAGS] + +// Aliases to match intrinsics which expect XMM operand(s). +let Constraints = "$src1 = $dst" in { + def Int_CMPSSrr : SSIi8<0xC2, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src, SSECC:$cc), + "cmp${cc}ss\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse_cmp_ss VR128:$src1, + VR128:$src, imm:$cc))]>; + def Int_CMPSSrm : SSIi8<0xC2, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, f32mem:$src, SSECC:$cc), + "cmp${cc}ss\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse_cmp_ss VR128:$src1, + (load addr:$src), imm:$cc))]>; +} + +let Defs = [EFLAGS] in { +def Int_UCOMISSrr: PSI<0x2E, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), + "ucomiss\t{$src2, $src1|$src1, $src2}", + [(X86ucomi (v4f32 VR128:$src1), VR128:$src2), + (implicit EFLAGS)]>; +def Int_UCOMISSrm: PSI<0x2E, MRMSrcMem, (outs),(ins VR128:$src1, f128mem:$src2), + "ucomiss\t{$src2, $src1|$src1, $src2}", + [(X86ucomi (v4f32 VR128:$src1), (load addr:$src2)), + (implicit EFLAGS)]>; + +def Int_COMISSrr: PSI<0x2F, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), + "comiss\t{$src2, $src1|$src1, $src2}", + [(X86comi (v4f32 VR128:$src1), VR128:$src2), + (implicit EFLAGS)]>; +def Int_COMISSrm: PSI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), + "comiss\t{$src2, $src1|$src1, $src2}", + [(X86comi (v4f32 VR128:$src1), (load addr:$src2)), + (implicit EFLAGS)]>; +} // Defs = [EFLAGS] + +// Aliases of packed SSE1 instructions for scalar use. These all have names that +// start with 'Fs'. + +// Alias instructions that map fld0 to pxor for sse. +let isReMaterializable = 1, isAsCheapAsAMove = 1 in +def FsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins), + "pxor\t$dst, $dst", [(set FR32:$dst, fp32imm0)]>, + Requires<[HasSSE1]>, TB, OpSize; + +// Alias instruction to do FR32 reg-to-reg copy using movaps. Upper bits are +// disregarded. +let neverHasSideEffects = 1 in +def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src), + "movaps\t{$src, $dst|$dst, $src}", []>; + +// Alias instruction to load FR32 from f128mem using movaps. Upper bits are +// disregarded. +let canFoldAsLoad = 1 in +def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src), + "movaps\t{$src, $dst|$dst, $src}", + [(set FR32:$dst, (alignedloadfsf32 addr:$src))]>; + +// Alias bitwise logical operations using SSE logical ops on packed FP values. +let Constraints = "$src1 = $dst" in { +let isCommutable = 1 in { + def FsANDPSrr : PSI<0x54, MRMSrcReg, (outs FR32:$dst), + (ins FR32:$src1, FR32:$src2), + "andps\t{$src2, $dst|$dst, $src2}", + [(set FR32:$dst, (X86fand FR32:$src1, FR32:$src2))]>; + def FsORPSrr : PSI<0x56, MRMSrcReg, (outs FR32:$dst), + (ins FR32:$src1, FR32:$src2), + "orps\t{$src2, $dst|$dst, $src2}", + [(set FR32:$dst, (X86for FR32:$src1, FR32:$src2))]>; + def FsXORPSrr : PSI<0x57, MRMSrcReg, (outs FR32:$dst), + (ins FR32:$src1, FR32:$src2), + "xorps\t{$src2, $dst|$dst, $src2}", + [(set FR32:$dst, (X86fxor FR32:$src1, FR32:$src2))]>; +} + +def FsANDPSrm : PSI<0x54, MRMSrcMem, (outs FR32:$dst), + (ins FR32:$src1, f128mem:$src2), + "andps\t{$src2, $dst|$dst, $src2}", + [(set FR32:$dst, (X86fand FR32:$src1, + (memopfsf32 addr:$src2)))]>; +def FsORPSrm : PSI<0x56, MRMSrcMem, (outs FR32:$dst), + (ins FR32:$src1, f128mem:$src2), + "orps\t{$src2, $dst|$dst, $src2}", + [(set FR32:$dst, (X86for FR32:$src1, + (memopfsf32 addr:$src2)))]>; +def FsXORPSrm : PSI<0x57, MRMSrcMem, (outs FR32:$dst), + (ins FR32:$src1, f128mem:$src2), + "xorps\t{$src2, $dst|$dst, $src2}", + [(set FR32:$dst, (X86fxor FR32:$src1, + (memopfsf32 addr:$src2)))]>; + +let neverHasSideEffects = 1 in { +def FsANDNPSrr : PSI<0x55, MRMSrcReg, + (outs FR32:$dst), (ins FR32:$src1, FR32:$src2), + "andnps\t{$src2, $dst|$dst, $src2}", []>; +let mayLoad = 1 in +def FsANDNPSrm : PSI<0x55, MRMSrcMem, + (outs FR32:$dst), (ins FR32:$src1, f128mem:$src2), + "andnps\t{$src2, $dst|$dst, $src2}", []>; +} +} + +/// basic_sse1_fp_binop_rm - SSE1 binops come in both scalar and vector forms. +/// +/// In addition, we also have a special variant of the scalar form here to +/// represent the associated intrinsic operation. This form is unlike the +/// plain scalar form, in that it takes an entire vector (instead of a scalar) +/// and leaves the top elements unmodified (therefore these cannot be commuted). +/// +/// These three forms can each be reg+reg or reg+mem, so there are a total of +/// six "instructions". +/// +let Constraints = "$src1 = $dst" in { +multiclass basic_sse1_fp_binop_rm opc, string OpcodeStr, + SDNode OpNode, Intrinsic F32Int, + bit Commutable = 0> { + // Scalar operation, reg+reg. + def SSrr : SSI { + let isCommutable = Commutable; + } + + // Scalar operation, reg+mem. + def SSrm : SSI; + + // Vector operation, reg+reg. + def PSrr : PSI { + let isCommutable = Commutable; + } + + // Vector operation, reg+mem. + def PSrm : PSI; + + // Intrinsic operation, reg+reg. + def SSrr_Int : SSI; + + // Intrinsic operation, reg+mem. + def SSrm_Int : SSI; +} +} + +// Arithmetic instructions +defm ADD : basic_sse1_fp_binop_rm<0x58, "add", fadd, int_x86_sse_add_ss, 1>; +defm MUL : basic_sse1_fp_binop_rm<0x59, "mul", fmul, int_x86_sse_mul_ss, 1>; +defm SUB : basic_sse1_fp_binop_rm<0x5C, "sub", fsub, int_x86_sse_sub_ss>; +defm DIV : basic_sse1_fp_binop_rm<0x5E, "div", fdiv, int_x86_sse_div_ss>; + +/// sse1_fp_binop_rm - Other SSE1 binops +/// +/// This multiclass is like basic_sse1_fp_binop_rm, with the addition of +/// instructions for a full-vector intrinsic form. Operations that map +/// onto C operators don't use this form since they just use the plain +/// vector form instead of having a separate vector intrinsic form. +/// +/// This provides a total of eight "instructions". +/// +let Constraints = "$src1 = $dst" in { +multiclass sse1_fp_binop_rm opc, string OpcodeStr, + SDNode OpNode, + Intrinsic F32Int, + Intrinsic V4F32Int, + bit Commutable = 0> { + + // Scalar operation, reg+reg. + def SSrr : SSI { + let isCommutable = Commutable; + } + + // Scalar operation, reg+mem. + def SSrm : SSI; + + // Vector operation, reg+reg. + def PSrr : PSI { + let isCommutable = Commutable; + } + + // Vector operation, reg+mem. + def PSrm : PSI; + + // Intrinsic operation, reg+reg. + def SSrr_Int : SSI { + let isCommutable = Commutable; + } + + // Intrinsic operation, reg+mem. + def SSrm_Int : SSI; + + // Vector intrinsic operation, reg+reg. + def PSrr_Int : PSI { + let isCommutable = Commutable; + } + + // Vector intrinsic operation, reg+mem. + def PSrm_Int : PSI; +} +} + +defm MAX : sse1_fp_binop_rm<0x5F, "max", X86fmax, + int_x86_sse_max_ss, int_x86_sse_max_ps>; +defm MIN : sse1_fp_binop_rm<0x5D, "min", X86fmin, + int_x86_sse_min_ss, int_x86_sse_min_ps>; + +//===----------------------------------------------------------------------===// +// SSE packed FP Instructions + +// Move Instructions +let neverHasSideEffects = 1 in +def MOVAPSrr : PSI<0x28, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "movaps\t{$src, $dst|$dst, $src}", []>; +let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in +def MOVAPSrm : PSI<0x28, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + "movaps\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (alignedloadv4f32 addr:$src))]>; + +def MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), + "movaps\t{$src, $dst|$dst, $src}", + [(alignedstore (v4f32 VR128:$src), addr:$dst)]>; + +let neverHasSideEffects = 1 in +def MOVUPSrr : PSI<0x10, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "movups\t{$src, $dst|$dst, $src}", []>; +let canFoldAsLoad = 1 in +def MOVUPSrm : PSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + "movups\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (loadv4f32 addr:$src))]>; +def MOVUPSmr : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), + "movups\t{$src, $dst|$dst, $src}", + [(store (v4f32 VR128:$src), addr:$dst)]>; + +// Intrinsic forms of MOVUPS load and store +let canFoldAsLoad = 1 in +def MOVUPSrm_Int : PSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + "movups\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse_loadu_ps addr:$src))]>; +def MOVUPSmr_Int : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), + "movups\t{$src, $dst|$dst, $src}", + [(int_x86_sse_storeu_ps addr:$dst, VR128:$src)]>; + +let Constraints = "$src1 = $dst" in { + let AddedComplexity = 20 in { + def MOVLPSrm : PSI<0x12, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), + "movlps\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, + (movlp VR128:$src1, + (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))]>; + def MOVHPSrm : PSI<0x16, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), + "movhps\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, + (movhp VR128:$src1, + (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))]>; + } // AddedComplexity +} // Constraints = "$src1 = $dst" + + +def MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), + "movlps\t{$src, $dst|$dst, $src}", + [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)), + (iPTR 0))), addr:$dst)]>; + +// v2f64 extract element 1 is always custom lowered to unpack high to low +// and extract element 0 so the non-store version isn't too horrible. +def MOVHPSmr : PSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), + "movhps\t{$src, $dst|$dst, $src}", + [(store (f64 (vector_extract + (unpckh (bc_v2f64 (v4f32 VR128:$src)), + (undef)), (iPTR 0))), addr:$dst)]>; + +let Constraints = "$src1 = $dst" in { +let AddedComplexity = 20 in { +def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2), + "movlhps\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, + (v4f32 (movhp VR128:$src1, VR128:$src2)))]>; + +def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2), + "movhlps\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, + (v4f32 (movhlps VR128:$src1, VR128:$src2)))]>; +} // AddedComplexity +} // Constraints = "$src1 = $dst" + +let AddedComplexity = 20 in { +def : Pat<(v4f32 (movddup VR128:$src, (undef))), + (MOVLHPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>; +def : Pat<(v2i64 (movddup VR128:$src, (undef))), + (MOVLHPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>; +} + + + +// Arithmetic + +/// sse1_fp_unop_rm - SSE1 unops come in both scalar and vector forms. +/// +/// In addition, we also have a special variant of the scalar form here to +/// represent the associated intrinsic operation. This form is unlike the +/// plain scalar form, in that it takes an entire vector (instead of a +/// scalar) and leaves the top elements undefined. +/// +/// And, we have a special variant form for a full-vector intrinsic form. +/// +/// These four forms can each have a reg or a mem operand, so there are a +/// total of eight "instructions". +/// +multiclass sse1_fp_unop_rm opc, string OpcodeStr, + SDNode OpNode, + Intrinsic F32Int, + Intrinsic V4F32Int, + bit Commutable = 0> { + // Scalar operation, reg. + def SSr : SSI { + let isCommutable = Commutable; + } + + // Scalar operation, mem. + def SSm : SSI; + + // Vector operation, reg. + def PSr : PSI { + let isCommutable = Commutable; + } + + // Vector operation, mem. + def PSm : PSI; + + // Intrinsic operation, reg. + def SSr_Int : SSI { + let isCommutable = Commutable; + } + + // Intrinsic operation, mem. + def SSm_Int : SSI; + + // Vector intrinsic operation, reg + def PSr_Int : PSI { + let isCommutable = Commutable; + } + + // Vector intrinsic operation, mem + def PSm_Int : PSI; +} + +// Square root. +defm SQRT : sse1_fp_unop_rm<0x51, "sqrt", fsqrt, + int_x86_sse_sqrt_ss, int_x86_sse_sqrt_ps>; + +// Reciprocal approximations. Note that these typically require refinement +// in order to obtain suitable precision. +defm RSQRT : sse1_fp_unop_rm<0x52, "rsqrt", X86frsqrt, + int_x86_sse_rsqrt_ss, int_x86_sse_rsqrt_ps>; +defm RCP : sse1_fp_unop_rm<0x53, "rcp", X86frcp, + int_x86_sse_rcp_ss, int_x86_sse_rcp_ps>; + +// Logical +let Constraints = "$src1 = $dst" in { + let isCommutable = 1 in { + def ANDPSrr : PSI<0x54, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), + "andps\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (v2i64 + (and VR128:$src1, VR128:$src2)))]>; + def ORPSrr : PSI<0x56, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), + "orps\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (v2i64 + (or VR128:$src1, VR128:$src2)))]>; + def XORPSrr : PSI<0x57, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), + "xorps\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (v2i64 + (xor VR128:$src1, VR128:$src2)))]>; + } + + def ANDPSrm : PSI<0x54, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), + "andps\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (and (bc_v2i64 (v4f32 VR128:$src1)), + (memopv2i64 addr:$src2)))]>; + def ORPSrm : PSI<0x56, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), + "orps\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (or (bc_v2i64 (v4f32 VR128:$src1)), + (memopv2i64 addr:$src2)))]>; + def XORPSrm : PSI<0x57, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), + "xorps\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (xor (bc_v2i64 (v4f32 VR128:$src1)), + (memopv2i64 addr:$src2)))]>; + def ANDNPSrr : PSI<0x55, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), + "andnps\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, + (v2i64 (and (xor VR128:$src1, + (bc_v2i64 (v4i32 immAllOnesV))), + VR128:$src2)))]>; + def ANDNPSrm : PSI<0x55, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1,f128mem:$src2), + "andnps\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, + (v2i64 (and (xor (bc_v2i64 (v4f32 VR128:$src1)), + (bc_v2i64 (v4i32 immAllOnesV))), + (memopv2i64 addr:$src2))))]>; +} + +let Constraints = "$src1 = $dst" in { + def CMPPSrri : PSIi8<0xC2, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src, SSECC:$cc), + "cmp${cc}ps\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse_cmp_ps VR128:$src1, + VR128:$src, imm:$cc))]>; + def CMPPSrmi : PSIi8<0xC2, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, f128mem:$src, SSECC:$cc), + "cmp${cc}ps\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse_cmp_ps VR128:$src1, + (memop addr:$src), imm:$cc))]>; +} +def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), VR128:$src2, imm:$cc)), + (CMPPSrri VR128:$src1, VR128:$src2, imm:$cc)>; +def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)), + (CMPPSrmi VR128:$src1, addr:$src2, imm:$cc)>; + +// Shuffle and unpack instructions +let Constraints = "$src1 = $dst" in { + let isConvertibleToThreeAddress = 1 in // Convert to pshufd + def SHUFPSrri : PSIi8<0xC6, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, + VR128:$src2, i8imm:$src3), + "shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}", + [(set VR128:$dst, + (v4f32 (shufp:$src3 VR128:$src1, VR128:$src2)))]>; + def SHUFPSrmi : PSIi8<0xC6, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, + f128mem:$src2, i8imm:$src3), + "shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}", + [(set VR128:$dst, + (v4f32 (shufp:$src3 + VR128:$src1, (memopv4f32 addr:$src2))))]>; + + let AddedComplexity = 10 in { + def UNPCKHPSrr : PSI<0x15, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), + "unpckhps\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, + (v4f32 (unpckh VR128:$src1, VR128:$src2)))]>; + def UNPCKHPSrm : PSI<0x15, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), + "unpckhps\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, + (v4f32 (unpckh VR128:$src1, + (memopv4f32 addr:$src2))))]>; + + def UNPCKLPSrr : PSI<0x14, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), + "unpcklps\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, + (v4f32 (unpckl VR128:$src1, VR128:$src2)))]>; + def UNPCKLPSrm : PSI<0x14, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), + "unpcklps\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, + (unpckl VR128:$src1, (memopv4f32 addr:$src2)))]>; + } // AddedComplexity +} // Constraints = "$src1 = $dst" + +// Mask creation +def MOVMSKPSrr : PSI<0x50, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src), + "movmskps\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (int_x86_sse_movmsk_ps VR128:$src))]>; +def MOVMSKPDrr : PDI<0x50, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src), + "movmskpd\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (int_x86_sse2_movmsk_pd VR128:$src))]>; + +// Prefetch intrinsic. +def PREFETCHT0 : PSI<0x18, MRM1m, (outs), (ins i8mem:$src), + "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3))]>; +def PREFETCHT1 : PSI<0x18, MRM2m, (outs), (ins i8mem:$src), + "prefetcht1\t$src", [(prefetch addr:$src, imm, (i32 2))]>; +def PREFETCHT2 : PSI<0x18, MRM3m, (outs), (ins i8mem:$src), + "prefetcht2\t$src", [(prefetch addr:$src, imm, (i32 1))]>; +def PREFETCHNTA : PSI<0x18, MRM0m, (outs), (ins i8mem:$src), + "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0))]>; + +// Non-temporal stores +def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), + "movntps\t{$src, $dst|$dst, $src}", + [(int_x86_sse_movnt_ps addr:$dst, VR128:$src)]>; + +// Load, store, and memory fence +def SFENCE : PSI<0xAE, MRM7r, (outs), (ins), "sfence", [(int_x86_sse_sfence)]>; + +// MXCSR register +def LDMXCSR : PSI<0xAE, MRM2m, (outs), (ins i32mem:$src), + "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>; +def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst), + "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>; + +// Alias instructions that map zero vector to pxor / xorp* for sse. +// We set canFoldAsLoad because this can be converted to a constant-pool +// load of an all-zeros value if folding it would be beneficial. +let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1 in +def V_SET0 : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins), + "xorps\t$dst, $dst", + [(set VR128:$dst, (v4i32 immAllZerosV))]>; + +let Predicates = [HasSSE1] in { + def : Pat<(v2i64 immAllZerosV), (V_SET0)>; + def : Pat<(v8i16 immAllZerosV), (V_SET0)>; + def : Pat<(v16i8 immAllZerosV), (V_SET0)>; + def : Pat<(v2f64 immAllZerosV), (V_SET0)>; + def : Pat<(v4f32 immAllZerosV), (V_SET0)>; +} + +// FR32 to 128-bit vector conversion. +let isAsCheapAsAMove = 1 in +def MOVSS2PSrr : SSI<0x10, MRMSrcReg, (outs VR128:$dst), (ins FR32:$src), + "movss\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (v4f32 (scalar_to_vector FR32:$src)))]>; +def MOVSS2PSrm : SSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f32mem:$src), + "movss\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (v4f32 (scalar_to_vector (loadf32 addr:$src))))]>; + +// FIXME: may not be able to eliminate this movss with coalescing the src and +// dest register classes are different. We really want to write this pattern +// like this: +// def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), +// (f32 FR32:$src)>; +let isAsCheapAsAMove = 1 in +def MOVPS2SSrr : SSI<0x10, MRMSrcReg, (outs FR32:$dst), (ins VR128:$src), + "movss\t{$src, $dst|$dst, $src}", + [(set FR32:$dst, (vector_extract (v4f32 VR128:$src), + (iPTR 0)))]>; +def MOVPS2SSmr : SSI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, VR128:$src), + "movss\t{$src, $dst|$dst, $src}", + [(store (f32 (vector_extract (v4f32 VR128:$src), + (iPTR 0))), addr:$dst)]>; + + +// Move to lower bits of a VR128, leaving upper bits alone. +// Three operand (but two address) aliases. +let Constraints = "$src1 = $dst" in { +let neverHasSideEffects = 1 in + def MOVLSS2PSrr : SSI<0x10, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, FR32:$src2), + "movss\t{$src2, $dst|$dst, $src2}", []>; + + let AddedComplexity = 15 in + def MOVLPSrr : SSI<0x10, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), + "movss\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, + (v4f32 (movl VR128:$src1, VR128:$src2)))]>; +} + +// Move to lower bits of a VR128 and zeroing upper bits. +// Loading from memory automatically zeroing upper bits. +let AddedComplexity = 20 in +def MOVZSS2PSrm : SSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f32mem:$src), + "movss\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (v4f32 (X86vzmovl (v4f32 (scalar_to_vector + (loadf32 addr:$src))))))]>; + +def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))), + (MOVZSS2PSrm addr:$src)>; + +//===----------------------------------------------------------------------===// +// SSE2 Instructions +//===----------------------------------------------------------------------===// + +// Move Instructions +let neverHasSideEffects = 1 in +def MOVSDrr : SDI<0x10, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src), + "movsd\t{$src, $dst|$dst, $src}", []>; +let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in +def MOVSDrm : SDI<0x10, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src), + "movsd\t{$src, $dst|$dst, $src}", + [(set FR64:$dst, (loadf64 addr:$src))]>; +def MOVSDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src), + "movsd\t{$src, $dst|$dst, $src}", + [(store FR64:$src, addr:$dst)]>; + +// Conversion instructions +def CVTTSD2SIrr : SDI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins FR64:$src), + "cvttsd2si\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (fp_to_sint FR64:$src))]>; +def CVTTSD2SIrm : SDI<0x2C, MRMSrcMem, (outs GR32:$dst), (ins f64mem:$src), + "cvttsd2si\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (fp_to_sint (loadf64 addr:$src)))]>; +def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src), + "cvtsd2ss\t{$src, $dst|$dst, $src}", + [(set FR32:$dst, (fround FR64:$src))]>; +def CVTSD2SSrm : SDI<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src), + "cvtsd2ss\t{$src, $dst|$dst, $src}", + [(set FR32:$dst, (fround (loadf64 addr:$src)))]>; +def CVTSI2SDrr : SDI<0x2A, MRMSrcReg, (outs FR64:$dst), (ins GR32:$src), + "cvtsi2sd\t{$src, $dst|$dst, $src}", + [(set FR64:$dst, (sint_to_fp GR32:$src))]>; +def CVTSI2SDrm : SDI<0x2A, MRMSrcMem, (outs FR64:$dst), (ins i32mem:$src), + "cvtsi2sd\t{$src, $dst|$dst, $src}", + [(set FR64:$dst, (sint_to_fp (loadi32 addr:$src)))]>; + +// SSE2 instructions with XS prefix +def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src), + "cvtss2sd\t{$src, $dst|$dst, $src}", + [(set FR64:$dst, (fextend FR32:$src))]>, XS, + Requires<[HasSSE2]>; +def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src), + "cvtss2sd\t{$src, $dst|$dst, $src}", + [(set FR64:$dst, (extloadf32 addr:$src))]>, XS, + Requires<[HasSSE2]>; + +// Match intrinsics which expect XMM operand(s). +def Int_CVTSD2SIrr : SDI<0x2D, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src), + "cvtsd2si\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (int_x86_sse2_cvtsd2si VR128:$src))]>; +def Int_CVTSD2SIrm : SDI<0x2D, MRMSrcMem, (outs GR32:$dst), (ins f128mem:$src), + "cvtsd2si\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (int_x86_sse2_cvtsd2si + (load addr:$src)))]>; + +// Match intrinisics which expect MM and XMM operand(s). +def Int_CVTPD2PIrr : PDI<0x2D, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src), + "cvtpd2pi\t{$src, $dst|$dst, $src}", + [(set VR64:$dst, (int_x86_sse_cvtpd2pi VR128:$src))]>; +def Int_CVTPD2PIrm : PDI<0x2D, MRMSrcMem, (outs VR64:$dst), (ins f128mem:$src), + "cvtpd2pi\t{$src, $dst|$dst, $src}", + [(set VR64:$dst, (int_x86_sse_cvtpd2pi + (memop addr:$src)))]>; +def Int_CVTTPD2PIrr: PDI<0x2C, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src), + "cvttpd2pi\t{$src, $dst|$dst, $src}", + [(set VR64:$dst, (int_x86_sse_cvttpd2pi VR128:$src))]>; +def Int_CVTTPD2PIrm: PDI<0x2C, MRMSrcMem, (outs VR64:$dst), (ins f128mem:$src), + "cvttpd2pi\t{$src, $dst|$dst, $src}", + [(set VR64:$dst, (int_x86_sse_cvttpd2pi + (memop addr:$src)))]>; +def Int_CVTPI2PDrr : PDI<0x2A, MRMSrcReg, (outs VR128:$dst), (ins VR64:$src), + "cvtpi2pd\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse_cvtpi2pd VR64:$src))]>; +def Int_CVTPI2PDrm : PDI<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), + "cvtpi2pd\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse_cvtpi2pd + (load addr:$src)))]>; + +// Aliases for intrinsics +def Int_CVTTSD2SIrr : SDI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src), + "cvttsd2si\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, + (int_x86_sse2_cvttsd2si VR128:$src))]>; +def Int_CVTTSD2SIrm : SDI<0x2C, MRMSrcMem, (outs GR32:$dst), (ins f128mem:$src), + "cvttsd2si\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (int_x86_sse2_cvttsd2si + (load addr:$src)))]>; + +// Comparison instructions +let Constraints = "$src1 = $dst", neverHasSideEffects = 1 in { + def CMPSDrr : SDIi8<0xC2, MRMSrcReg, + (outs FR64:$dst), (ins FR64:$src1, FR64:$src, SSECC:$cc), + "cmp${cc}sd\t{$src, $dst|$dst, $src}", []>; +let mayLoad = 1 in + def CMPSDrm : SDIi8<0xC2, MRMSrcMem, + (outs FR64:$dst), (ins FR64:$src1, f64mem:$src, SSECC:$cc), + "cmp${cc}sd\t{$src, $dst|$dst, $src}", []>; +} + +let Defs = [EFLAGS] in { +def UCOMISDrr: PDI<0x2E, MRMSrcReg, (outs), (ins FR64:$src1, FR64:$src2), + "ucomisd\t{$src2, $src1|$src1, $src2}", + [(X86cmp FR64:$src1, FR64:$src2), (implicit EFLAGS)]>; +def UCOMISDrm: PDI<0x2E, MRMSrcMem, (outs), (ins FR64:$src1, f64mem:$src2), + "ucomisd\t{$src2, $src1|$src1, $src2}", + [(X86cmp FR64:$src1, (loadf64 addr:$src2)), + (implicit EFLAGS)]>; +} // Defs = [EFLAGS] + +// Aliases to match intrinsics which expect XMM operand(s). +let Constraints = "$src1 = $dst" in { + def Int_CMPSDrr : SDIi8<0xC2, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src, SSECC:$cc), + "cmp${cc}sd\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cmp_sd VR128:$src1, + VR128:$src, imm:$cc))]>; + def Int_CMPSDrm : SDIi8<0xC2, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, f64mem:$src, SSECC:$cc), + "cmp${cc}sd\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cmp_sd VR128:$src1, + (load addr:$src), imm:$cc))]>; +} + +let Defs = [EFLAGS] in { +def Int_UCOMISDrr: PDI<0x2E, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), + "ucomisd\t{$src2, $src1|$src1, $src2}", + [(X86ucomi (v2f64 VR128:$src1), (v2f64 VR128:$src2)), + (implicit EFLAGS)]>; +def Int_UCOMISDrm: PDI<0x2E, MRMSrcMem, (outs),(ins VR128:$src1, f128mem:$src2), + "ucomisd\t{$src2, $src1|$src1, $src2}", + [(X86ucomi (v2f64 VR128:$src1), (load addr:$src2)), + (implicit EFLAGS)]>; + +def Int_COMISDrr: PDI<0x2F, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), + "comisd\t{$src2, $src1|$src1, $src2}", + [(X86comi (v2f64 VR128:$src1), (v2f64 VR128:$src2)), + (implicit EFLAGS)]>; +def Int_COMISDrm: PDI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), + "comisd\t{$src2, $src1|$src1, $src2}", + [(X86comi (v2f64 VR128:$src1), (load addr:$src2)), + (implicit EFLAGS)]>; +} // Defs = [EFLAGS] + +// Aliases of packed SSE2 instructions for scalar use. These all have names that +// start with 'Fs'. + +// Alias instructions that map fld0 to pxor for sse. +let isReMaterializable = 1, isAsCheapAsAMove = 1 in +def FsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins), + "pxor\t$dst, $dst", [(set FR64:$dst, fpimm0)]>, + Requires<[HasSSE2]>, TB, OpSize; + +// Alias instruction to do FR64 reg-to-reg copy using movapd. Upper bits are +// disregarded. +let neverHasSideEffects = 1 in +def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src), + "movapd\t{$src, $dst|$dst, $src}", []>; + +// Alias instruction to load FR64 from f128mem using movapd. Upper bits are +// disregarded. +let canFoldAsLoad = 1 in +def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src), + "movapd\t{$src, $dst|$dst, $src}", + [(set FR64:$dst, (alignedloadfsf64 addr:$src))]>; + +// Alias bitwise logical operations using SSE logical ops on packed FP values. +let Constraints = "$src1 = $dst" in { +let isCommutable = 1 in { + def FsANDPDrr : PDI<0x54, MRMSrcReg, (outs FR64:$dst), + (ins FR64:$src1, FR64:$src2), + "andpd\t{$src2, $dst|$dst, $src2}", + [(set FR64:$dst, (X86fand FR64:$src1, FR64:$src2))]>; + def FsORPDrr : PDI<0x56, MRMSrcReg, (outs FR64:$dst), + (ins FR64:$src1, FR64:$src2), + "orpd\t{$src2, $dst|$dst, $src2}", + [(set FR64:$dst, (X86for FR64:$src1, FR64:$src2))]>; + def FsXORPDrr : PDI<0x57, MRMSrcReg, (outs FR64:$dst), + (ins FR64:$src1, FR64:$src2), + "xorpd\t{$src2, $dst|$dst, $src2}", + [(set FR64:$dst, (X86fxor FR64:$src1, FR64:$src2))]>; +} + +def FsANDPDrm : PDI<0x54, MRMSrcMem, (outs FR64:$dst), + (ins FR64:$src1, f128mem:$src2), + "andpd\t{$src2, $dst|$dst, $src2}", + [(set FR64:$dst, (X86fand FR64:$src1, + (memopfsf64 addr:$src2)))]>; +def FsORPDrm : PDI<0x56, MRMSrcMem, (outs FR64:$dst), + (ins FR64:$src1, f128mem:$src2), + "orpd\t{$src2, $dst|$dst, $src2}", + [(set FR64:$dst, (X86for FR64:$src1, + (memopfsf64 addr:$src2)))]>; +def FsXORPDrm : PDI<0x57, MRMSrcMem, (outs FR64:$dst), + (ins FR64:$src1, f128mem:$src2), + "xorpd\t{$src2, $dst|$dst, $src2}", + [(set FR64:$dst, (X86fxor FR64:$src1, + (memopfsf64 addr:$src2)))]>; + +let neverHasSideEffects = 1 in { +def FsANDNPDrr : PDI<0x55, MRMSrcReg, + (outs FR64:$dst), (ins FR64:$src1, FR64:$src2), + "andnpd\t{$src2, $dst|$dst, $src2}", []>; +let mayLoad = 1 in +def FsANDNPDrm : PDI<0x55, MRMSrcMem, + (outs FR64:$dst), (ins FR64:$src1, f128mem:$src2), + "andnpd\t{$src2, $dst|$dst, $src2}", []>; +} +} + +/// basic_sse2_fp_binop_rm - SSE2 binops come in both scalar and vector forms. +/// +/// In addition, we also have a special variant of the scalar form here to +/// represent the associated intrinsic operation. This form is unlike the +/// plain scalar form, in that it takes an entire vector (instead of a scalar) +/// and leaves the top elements unmodified (therefore these cannot be commuted). +/// +/// These three forms can each be reg+reg or reg+mem, so there are a total of +/// six "instructions". +/// +let Constraints = "$src1 = $dst" in { +multiclass basic_sse2_fp_binop_rm opc, string OpcodeStr, + SDNode OpNode, Intrinsic F64Int, + bit Commutable = 0> { + // Scalar operation, reg+reg. + def SDrr : SDI { + let isCommutable = Commutable; + } + + // Scalar operation, reg+mem. + def SDrm : SDI; + + // Vector operation, reg+reg. + def PDrr : PDI { + let isCommutable = Commutable; + } + + // Vector operation, reg+mem. + def PDrm : PDI; + + // Intrinsic operation, reg+reg. + def SDrr_Int : SDI; + + // Intrinsic operation, reg+mem. + def SDrm_Int : SDI; +} +} + +// Arithmetic instructions +defm ADD : basic_sse2_fp_binop_rm<0x58, "add", fadd, int_x86_sse2_add_sd, 1>; +defm MUL : basic_sse2_fp_binop_rm<0x59, "mul", fmul, int_x86_sse2_mul_sd, 1>; +defm SUB : basic_sse2_fp_binop_rm<0x5C, "sub", fsub, int_x86_sse2_sub_sd>; +defm DIV : basic_sse2_fp_binop_rm<0x5E, "div", fdiv, int_x86_sse2_div_sd>; + +/// sse2_fp_binop_rm - Other SSE2 binops +/// +/// This multiclass is like basic_sse2_fp_binop_rm, with the addition of +/// instructions for a full-vector intrinsic form. Operations that map +/// onto C operators don't use this form since they just use the plain +/// vector form instead of having a separate vector intrinsic form. +/// +/// This provides a total of eight "instructions". +/// +let Constraints = "$src1 = $dst" in { +multiclass sse2_fp_binop_rm opc, string OpcodeStr, + SDNode OpNode, + Intrinsic F64Int, + Intrinsic V2F64Int, + bit Commutable = 0> { + + // Scalar operation, reg+reg. + def SDrr : SDI { + let isCommutable = Commutable; + } + + // Scalar operation, reg+mem. + def SDrm : SDI; + + // Vector operation, reg+reg. + def PDrr : PDI { + let isCommutable = Commutable; + } + + // Vector operation, reg+mem. + def PDrm : PDI; + + // Intrinsic operation, reg+reg. + def SDrr_Int : SDI { + let isCommutable = Commutable; + } + + // Intrinsic operation, reg+mem. + def SDrm_Int : SDI; + + // Vector intrinsic operation, reg+reg. + def PDrr_Int : PDI { + let isCommutable = Commutable; + } + + // Vector intrinsic operation, reg+mem. + def PDrm_Int : PDI; +} +} + +defm MAX : sse2_fp_binop_rm<0x5F, "max", X86fmax, + int_x86_sse2_max_sd, int_x86_sse2_max_pd>; +defm MIN : sse2_fp_binop_rm<0x5D, "min", X86fmin, + int_x86_sse2_min_sd, int_x86_sse2_min_pd>; + +//===----------------------------------------------------------------------===// +// SSE packed FP Instructions + +// Move Instructions +let neverHasSideEffects = 1 in +def MOVAPDrr : PDI<0x28, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "movapd\t{$src, $dst|$dst, $src}", []>; +let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in +def MOVAPDrm : PDI<0x28, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + "movapd\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (alignedloadv2f64 addr:$src))]>; + +def MOVAPDmr : PDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), + "movapd\t{$src, $dst|$dst, $src}", + [(alignedstore (v2f64 VR128:$src), addr:$dst)]>; + +let neverHasSideEffects = 1 in +def MOVUPDrr : PDI<0x10, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "movupd\t{$src, $dst|$dst, $src}", []>; +let canFoldAsLoad = 1 in +def MOVUPDrm : PDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + "movupd\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (loadv2f64 addr:$src))]>; +def MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), + "movupd\t{$src, $dst|$dst, $src}", + [(store (v2f64 VR128:$src), addr:$dst)]>; + +// Intrinsic forms of MOVUPD load and store +def MOVUPDrm_Int : PDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + "movupd\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_loadu_pd addr:$src))]>; +def MOVUPDmr_Int : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), + "movupd\t{$src, $dst|$dst, $src}", + [(int_x86_sse2_storeu_pd addr:$dst, VR128:$src)]>; + +let Constraints = "$src1 = $dst" in { + let AddedComplexity = 20 in { + def MOVLPDrm : PDI<0x12, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), + "movlpd\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, + (v2f64 (movlp VR128:$src1, + (scalar_to_vector (loadf64 addr:$src2)))))]>; + def MOVHPDrm : PDI<0x16, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), + "movhpd\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, + (v2f64 (movhp VR128:$src1, + (scalar_to_vector (loadf64 addr:$src2)))))]>; + } // AddedComplexity +} // Constraints = "$src1 = $dst" + +def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), + "movlpd\t{$src, $dst|$dst, $src}", + [(store (f64 (vector_extract (v2f64 VR128:$src), + (iPTR 0))), addr:$dst)]>; + +// v2f64 extract element 1 is always custom lowered to unpack high to low +// and extract element 0 so the non-store version isn't too horrible. +def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), + "movhpd\t{$src, $dst|$dst, $src}", + [(store (f64 (vector_extract + (v2f64 (unpckh VR128:$src, (undef))), + (iPTR 0))), addr:$dst)]>; + +// SSE2 instructions without OpSize prefix +def Int_CVTDQ2PSrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "cvtdq2ps\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvtdq2ps VR128:$src))]>, + TB, Requires<[HasSSE2]>; +def Int_CVTDQ2PSrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), + "cvtdq2ps\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvtdq2ps + (bitconvert (memopv2i64 addr:$src))))]>, + TB, Requires<[HasSSE2]>; + +// SSE2 instructions with XS prefix +def Int_CVTDQ2PDrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "cvtdq2pd\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))]>, + XS, Requires<[HasSSE2]>; +def Int_CVTDQ2PDrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), + "cvtdq2pd\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvtdq2pd + (bitconvert (memopv2i64 addr:$src))))]>, + XS, Requires<[HasSSE2]>; + +def Int_CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "cvtps2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))]>; +def Int_CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + "cvtps2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvtps2dq + (memop addr:$src)))]>; +// SSE2 packed instructions with XS prefix +def Int_CVTTPS2DQrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "cvttps2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvttps2dq VR128:$src))]>, + XS, Requires<[HasSSE2]>; +def Int_CVTTPS2DQrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + "cvttps2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvttps2dq + (memop addr:$src)))]>, + XS, Requires<[HasSSE2]>; + +// SSE2 packed instructions with XD prefix +def Int_CVTPD2DQrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "cvtpd2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>, + XD, Requires<[HasSSE2]>; +def Int_CVTPD2DQrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + "cvtpd2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvtpd2dq + (memop addr:$src)))]>, + XD, Requires<[HasSSE2]>; + +def Int_CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "cvttpd2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))]>; +def Int_CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src), + "cvttpd2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvttpd2dq + (memop addr:$src)))]>; + +// SSE2 instructions without OpSize prefix +def Int_CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "cvtps2pd\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))]>, + TB, Requires<[HasSSE2]>; +def Int_CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), + "cvtps2pd\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvtps2pd + (load addr:$src)))]>, + TB, Requires<[HasSSE2]>; + +def Int_CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "cvtpd2ps\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))]>; +def Int_CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + "cvtpd2ps\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvtpd2ps + (memop addr:$src)))]>; + +// Match intrinsics which expect XMM operand(s). +// Aliases for intrinsics +let Constraints = "$src1 = $dst" in { +def Int_CVTSI2SDrr: SDI<0x2A, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, GR32:$src2), + "cvtsi2sd\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (int_x86_sse2_cvtsi2sd VR128:$src1, + GR32:$src2))]>; +def Int_CVTSI2SDrm: SDI<0x2A, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, i32mem:$src2), + "cvtsi2sd\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (int_x86_sse2_cvtsi2sd VR128:$src1, + (loadi32 addr:$src2)))]>; +def Int_CVTSD2SSrr: SDI<0x5A, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), + "cvtsd2ss\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1, + VR128:$src2))]>; +def Int_CVTSD2SSrm: SDI<0x5A, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), + "cvtsd2ss\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1, + (load addr:$src2)))]>; +def Int_CVTSS2SDrr: I<0x5A, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), + "cvtss2sd\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1, + VR128:$src2))]>, XS, + Requires<[HasSSE2]>; +def Int_CVTSS2SDrm: I<0x5A, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, f32mem:$src2), + "cvtss2sd\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1, + (load addr:$src2)))]>, XS, + Requires<[HasSSE2]>; +} + +// Arithmetic + +/// sse2_fp_unop_rm - SSE2 unops come in both scalar and vector forms. +/// +/// In addition, we also have a special variant of the scalar form here to +/// represent the associated intrinsic operation. This form is unlike the +/// plain scalar form, in that it takes an entire vector (instead of a +/// scalar) and leaves the top elements undefined. +/// +/// And, we have a special variant form for a full-vector intrinsic form. +/// +/// These four forms can each have a reg or a mem operand, so there are a +/// total of eight "instructions". +/// +multiclass sse2_fp_unop_rm opc, string OpcodeStr, + SDNode OpNode, + Intrinsic F64Int, + Intrinsic V2F64Int, + bit Commutable = 0> { + // Scalar operation, reg. + def SDr : SDI { + let isCommutable = Commutable; + } + + // Scalar operation, mem. + def SDm : SDI; + + // Vector operation, reg. + def PDr : PDI { + let isCommutable = Commutable; + } + + // Vector operation, mem. + def PDm : PDI; + + // Intrinsic operation, reg. + def SDr_Int : SDI { + let isCommutable = Commutable; + } + + // Intrinsic operation, mem. + def SDm_Int : SDI; + + // Vector intrinsic operation, reg + def PDr_Int : PDI { + let isCommutable = Commutable; + } + + // Vector intrinsic operation, mem + def PDm_Int : PDI; +} + +// Square root. +defm SQRT : sse2_fp_unop_rm<0x51, "sqrt", fsqrt, + int_x86_sse2_sqrt_sd, int_x86_sse2_sqrt_pd>; + +// There is no f64 version of the reciprocal approximation instructions. + +// Logical +let Constraints = "$src1 = $dst" in { + let isCommutable = 1 in { + def ANDPDrr : PDI<0x54, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), + "andpd\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, + (and (bc_v2i64 (v2f64 VR128:$src1)), + (bc_v2i64 (v2f64 VR128:$src2))))]>; + def ORPDrr : PDI<0x56, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), + "orpd\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, + (or (bc_v2i64 (v2f64 VR128:$src1)), + (bc_v2i64 (v2f64 VR128:$src2))))]>; + def XORPDrr : PDI<0x57, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), + "xorpd\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, + (xor (bc_v2i64 (v2f64 VR128:$src1)), + (bc_v2i64 (v2f64 VR128:$src2))))]>; + } + + def ANDPDrm : PDI<0x54, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), + "andpd\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, + (and (bc_v2i64 (v2f64 VR128:$src1)), + (memopv2i64 addr:$src2)))]>; + def ORPDrm : PDI<0x56, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), + "orpd\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, + (or (bc_v2i64 (v2f64 VR128:$src1)), + (memopv2i64 addr:$src2)))]>; + def XORPDrm : PDI<0x57, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), + "xorpd\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, + (xor (bc_v2i64 (v2f64 VR128:$src1)), + (memopv2i64 addr:$src2)))]>; + def ANDNPDrr : PDI<0x55, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), + "andnpd\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, + (and (vnot (bc_v2i64 (v2f64 VR128:$src1))), + (bc_v2i64 (v2f64 VR128:$src2))))]>; + def ANDNPDrm : PDI<0x55, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1,f128mem:$src2), + "andnpd\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, + (and (vnot (bc_v2i64 (v2f64 VR128:$src1))), + (memopv2i64 addr:$src2)))]>; +} + +let Constraints = "$src1 = $dst" in { + def CMPPDrri : PDIi8<0xC2, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src, SSECC:$cc), + "cmp${cc}pd\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cmp_pd VR128:$src1, + VR128:$src, imm:$cc))]>; + def CMPPDrmi : PDIi8<0xC2, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, f128mem:$src, SSECC:$cc), + "cmp${cc}pd\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cmp_pd VR128:$src1, + (memop addr:$src), imm:$cc))]>; +} +def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), VR128:$src2, imm:$cc)), + (CMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>; +def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)), + (CMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>; + +// Shuffle and unpack instructions +let Constraints = "$src1 = $dst" in { + def SHUFPDrri : PDIi8<0xC6, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i8imm:$src3), + "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}", + [(set VR128:$dst, + (v2f64 (shufp:$src3 VR128:$src1, VR128:$src2)))]>; + def SHUFPDrmi : PDIi8<0xC6, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, + f128mem:$src2, i8imm:$src3), + "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}", + [(set VR128:$dst, + (v2f64 (shufp:$src3 + VR128:$src1, (memopv2f64 addr:$src2))))]>; + + let AddedComplexity = 10 in { + def UNPCKHPDrr : PDI<0x15, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), + "unpckhpd\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, + (v2f64 (unpckh VR128:$src1, VR128:$src2)))]>; + def UNPCKHPDrm : PDI<0x15, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), + "unpckhpd\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, + (v2f64 (unpckh VR128:$src1, + (memopv2f64 addr:$src2))))]>; + + def UNPCKLPDrr : PDI<0x14, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), + "unpcklpd\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, + (v2f64 (unpckl VR128:$src1, VR128:$src2)))]>; + def UNPCKLPDrm : PDI<0x14, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), + "unpcklpd\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, + (unpckl VR128:$src1, (memopv2f64 addr:$src2)))]>; + } // AddedComplexity +} // Constraints = "$src1 = $dst" + + +//===----------------------------------------------------------------------===// +// SSE integer instructions + +// Move Instructions +let neverHasSideEffects = 1 in +def MOVDQArr : PDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "movdqa\t{$src, $dst|$dst, $src}", []>; +let canFoldAsLoad = 1, mayLoad = 1 in +def MOVDQArm : PDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), + "movdqa\t{$src, $dst|$dst, $src}", + [/*(set VR128:$dst, (alignedloadv2i64 addr:$src))*/]>; +let mayStore = 1 in +def MOVDQAmr : PDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), + "movdqa\t{$src, $dst|$dst, $src}", + [/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/]>; +let canFoldAsLoad = 1, mayLoad = 1 in +def MOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), + "movdqu\t{$src, $dst|$dst, $src}", + [/*(set VR128:$dst, (loadv2i64 addr:$src))*/]>, + XS, Requires<[HasSSE2]>; +let mayStore = 1 in +def MOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), + "movdqu\t{$src, $dst|$dst, $src}", + [/*(store (v2i64 VR128:$src), addr:$dst)*/]>, + XS, Requires<[HasSSE2]>; + +// Intrinsic forms of MOVDQU load and store +let canFoldAsLoad = 1 in +def MOVDQUrm_Int : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), + "movdqu\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_loadu_dq addr:$src))]>, + XS, Requires<[HasSSE2]>; +def MOVDQUmr_Int : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), + "movdqu\t{$src, $dst|$dst, $src}", + [(int_x86_sse2_storeu_dq addr:$dst, VR128:$src)]>, + XS, Requires<[HasSSE2]>; + +let Constraints = "$src1 = $dst" in { + +multiclass PDI_binop_rm_int opc, string OpcodeStr, Intrinsic IntId, + bit Commutable = 0> { + def rr : PDI { + let isCommutable = Commutable; + } + def rm : PDI; +} + +multiclass PDI_binop_rmi_int opc, bits<8> opc2, Format ImmForm, + string OpcodeStr, + Intrinsic IntId, Intrinsic IntId2> { + def rr : PDI; + def rm : PDI; + def ri : PDIi8; +} + +/// PDI_binop_rm - Simple SSE2 binary operator. +multiclass PDI_binop_rm opc, string OpcodeStr, SDNode OpNode, + ValueType OpVT, bit Commutable = 0> { + def rr : PDI { + let isCommutable = Commutable; + } + def rm : PDI; +} + +/// PDI_binop_rm_v2i64 - Simple SSE2 binary operator whose type is v2i64. +/// +/// FIXME: we could eliminate this and use PDI_binop_rm instead if tblgen knew +/// to collapse (bitconvert VT to VT) into its operand. +/// +multiclass PDI_binop_rm_v2i64 opc, string OpcodeStr, SDNode OpNode, + bit Commutable = 0> { + def rr : PDI { + let isCommutable = Commutable; + } + def rm : PDI; +} + +} // Constraints = "$src1 = $dst" + +// 128-bit Integer Arithmetic + +defm PADDB : PDI_binop_rm<0xFC, "paddb", add, v16i8, 1>; +defm PADDW : PDI_binop_rm<0xFD, "paddw", add, v8i16, 1>; +defm PADDD : PDI_binop_rm<0xFE, "paddd", add, v4i32, 1>; +defm PADDQ : PDI_binop_rm_v2i64<0xD4, "paddq", add, 1>; + +defm PADDSB : PDI_binop_rm_int<0xEC, "paddsb" , int_x86_sse2_padds_b, 1>; +defm PADDSW : PDI_binop_rm_int<0xED, "paddsw" , int_x86_sse2_padds_w, 1>; +defm PADDUSB : PDI_binop_rm_int<0xDC, "paddusb", int_x86_sse2_paddus_b, 1>; +defm PADDUSW : PDI_binop_rm_int<0xDD, "paddusw", int_x86_sse2_paddus_w, 1>; + +defm PSUBB : PDI_binop_rm<0xF8, "psubb", sub, v16i8>; +defm PSUBW : PDI_binop_rm<0xF9, "psubw", sub, v8i16>; +defm PSUBD : PDI_binop_rm<0xFA, "psubd", sub, v4i32>; +defm PSUBQ : PDI_binop_rm_v2i64<0xFB, "psubq", sub>; + +defm PSUBSB : PDI_binop_rm_int<0xE8, "psubsb" , int_x86_sse2_psubs_b>; +defm PSUBSW : PDI_binop_rm_int<0xE9, "psubsw" , int_x86_sse2_psubs_w>; +defm PSUBUSB : PDI_binop_rm_int<0xD8, "psubusb", int_x86_sse2_psubus_b>; +defm PSUBUSW : PDI_binop_rm_int<0xD9, "psubusw", int_x86_sse2_psubus_w>; + +defm PMULLW : PDI_binop_rm<0xD5, "pmullw", mul, v8i16, 1>; + +defm PMULHUW : PDI_binop_rm_int<0xE4, "pmulhuw", int_x86_sse2_pmulhu_w, 1>; +defm PMULHW : PDI_binop_rm_int<0xE5, "pmulhw" , int_x86_sse2_pmulh_w , 1>; +defm PMULUDQ : PDI_binop_rm_int<0xF4, "pmuludq", int_x86_sse2_pmulu_dq, 1>; + +defm PMADDWD : PDI_binop_rm_int<0xF5, "pmaddwd", int_x86_sse2_pmadd_wd, 1>; + +defm PAVGB : PDI_binop_rm_int<0xE0, "pavgb", int_x86_sse2_pavg_b, 1>; +defm PAVGW : PDI_binop_rm_int<0xE3, "pavgw", int_x86_sse2_pavg_w, 1>; + + +defm PMINUB : PDI_binop_rm_int<0xDA, "pminub", int_x86_sse2_pminu_b, 1>; +defm PMINSW : PDI_binop_rm_int<0xEA, "pminsw", int_x86_sse2_pmins_w, 1>; +defm PMAXUB : PDI_binop_rm_int<0xDE, "pmaxub", int_x86_sse2_pmaxu_b, 1>; +defm PMAXSW : PDI_binop_rm_int<0xEE, "pmaxsw", int_x86_sse2_pmaxs_w, 1>; +defm PSADBW : PDI_binop_rm_int<0xF6, "psadbw", int_x86_sse2_psad_bw, 1>; + + +defm PSLLW : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "psllw", + int_x86_sse2_psll_w, int_x86_sse2_pslli_w>; +defm PSLLD : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "pslld", + int_x86_sse2_psll_d, int_x86_sse2_pslli_d>; +defm PSLLQ : PDI_binop_rmi_int<0xF3, 0x73, MRM6r, "psllq", + int_x86_sse2_psll_q, int_x86_sse2_pslli_q>; + +defm PSRLW : PDI_binop_rmi_int<0xD1, 0x71, MRM2r, "psrlw", + int_x86_sse2_psrl_w, int_x86_sse2_psrli_w>; +defm PSRLD : PDI_binop_rmi_int<0xD2, 0x72, MRM2r, "psrld", + int_x86_sse2_psrl_d, int_x86_sse2_psrli_d>; +defm PSRLQ : PDI_binop_rmi_int<0xD3, 0x73, MRM2r, "psrlq", + int_x86_sse2_psrl_q, int_x86_sse2_psrli_q>; + +defm PSRAW : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "psraw", + int_x86_sse2_psra_w, int_x86_sse2_psrai_w>; +defm PSRAD : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad", + int_x86_sse2_psra_d, int_x86_sse2_psrai_d>; + +// 128-bit logical shifts. +let Constraints = "$src1 = $dst", neverHasSideEffects = 1 in { + def PSLLDQri : PDIi8<0x73, MRM7r, + (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), + "pslldq\t{$src2, $dst|$dst, $src2}", []>; + def PSRLDQri : PDIi8<0x73, MRM3r, + (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), + "psrldq\t{$src2, $dst|$dst, $src2}", []>; + // PSRADQri doesn't exist in SSE[1-3]. +} + +let Predicates = [HasSSE2] in { + def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2), + (v2i64 (PSLLDQri VR128:$src1, (PSxLDQ_imm imm:$src2)))>; + def : Pat<(int_x86_sse2_psrl_dq VR128:$src1, imm:$src2), + (v2i64 (PSRLDQri VR128:$src1, (PSxLDQ_imm imm:$src2)))>; + def : Pat<(int_x86_sse2_psll_dq_bs VR128:$src1, imm:$src2), + (v2i64 (PSLLDQri VR128:$src1, imm:$src2))>; + def : Pat<(int_x86_sse2_psrl_dq_bs VR128:$src1, imm:$src2), + (v2i64 (PSRLDQri VR128:$src1, imm:$src2))>; + def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)), + (v2f64 (PSRLDQri VR128:$src1, (PSxLDQ_imm imm:$src2)))>; + + // Shift up / down and insert zero's. + def : Pat<(v2i64 (X86vshl VR128:$src, (i8 imm:$amt))), + (v2i64 (PSLLDQri VR128:$src, (PSxLDQ_imm imm:$amt)))>; + def : Pat<(v2i64 (X86vshr VR128:$src, (i8 imm:$amt))), + (v2i64 (PSRLDQri VR128:$src, (PSxLDQ_imm imm:$amt)))>; +} + +// Logical +defm PAND : PDI_binop_rm_v2i64<0xDB, "pand", and, 1>; +defm POR : PDI_binop_rm_v2i64<0xEB, "por" , or , 1>; +defm PXOR : PDI_binop_rm_v2i64<0xEF, "pxor", xor, 1>; + +let Constraints = "$src1 = $dst" in { + def PANDNrr : PDI<0xDF, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), + "pandn\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1), + VR128:$src2)))]>; + + def PANDNrm : PDI<0xDF, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), + "pandn\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1), + (memopv2i64 addr:$src2))))]>; +} + +// SSE2 Integer comparison +defm PCMPEQB : PDI_binop_rm_int<0x74, "pcmpeqb", int_x86_sse2_pcmpeq_b>; +defm PCMPEQW : PDI_binop_rm_int<0x75, "pcmpeqw", int_x86_sse2_pcmpeq_w>; +defm PCMPEQD : PDI_binop_rm_int<0x76, "pcmpeqd", int_x86_sse2_pcmpeq_d>; +defm PCMPGTB : PDI_binop_rm_int<0x64, "pcmpgtb", int_x86_sse2_pcmpgt_b>; +defm PCMPGTW : PDI_binop_rm_int<0x65, "pcmpgtw", int_x86_sse2_pcmpgt_w>; +defm PCMPGTD : PDI_binop_rm_int<0x66, "pcmpgtd", int_x86_sse2_pcmpgt_d>; + +def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, VR128:$src2)), + (PCMPEQBrr VR128:$src1, VR128:$src2)>; +def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, (memop addr:$src2))), + (PCMPEQBrm VR128:$src1, addr:$src2)>; +def : Pat<(v8i16 (X86pcmpeqw VR128:$src1, VR128:$src2)), + (PCMPEQWrr VR128:$src1, VR128:$src2)>; +def : Pat<(v8i16 (X86pcmpeqw VR128:$src1, (memop addr:$src2))), + (PCMPEQWrm VR128:$src1, addr:$src2)>; +def : Pat<(v4i32 (X86pcmpeqd VR128:$src1, VR128:$src2)), + (PCMPEQDrr VR128:$src1, VR128:$src2)>; +def : Pat<(v4i32 (X86pcmpeqd VR128:$src1, (memop addr:$src2))), + (PCMPEQDrm VR128:$src1, addr:$src2)>; + +def : Pat<(v16i8 (X86pcmpgtb VR128:$src1, VR128:$src2)), + (PCMPGTBrr VR128:$src1, VR128:$src2)>; +def : Pat<(v16i8 (X86pcmpgtb VR128:$src1, (memop addr:$src2))), + (PCMPGTBrm VR128:$src1, addr:$src2)>; +def : Pat<(v8i16 (X86pcmpgtw VR128:$src1, VR128:$src2)), + (PCMPGTWrr VR128:$src1, VR128:$src2)>; +def : Pat<(v8i16 (X86pcmpgtw VR128:$src1, (memop addr:$src2))), + (PCMPGTWrm VR128:$src1, addr:$src2)>; +def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, VR128:$src2)), + (PCMPGTDrr VR128:$src1, VR128:$src2)>; +def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, (memop addr:$src2))), + (PCMPGTDrm VR128:$src1, addr:$src2)>; + + +// Pack instructions +defm PACKSSWB : PDI_binop_rm_int<0x63, "packsswb", int_x86_sse2_packsswb_128>; +defm PACKSSDW : PDI_binop_rm_int<0x6B, "packssdw", int_x86_sse2_packssdw_128>; +defm PACKUSWB : PDI_binop_rm_int<0x67, "packuswb", int_x86_sse2_packuswb_128>; + +// Shuffle and unpack instructions +def PSHUFDri : PDIi8<0x70, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2), + "pshufd\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128:$dst, (v4i32 (pshufd:$src2 + VR128:$src1, (undef))))]>; +def PSHUFDmi : PDIi8<0x70, MRMSrcMem, + (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2), + "pshufd\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128:$dst, (v4i32 (pshufd:$src2 + (bc_v4i32(memopv2i64 addr:$src1)), + (undef))))]>; + +// SSE2 with ImmT == Imm8 and XS prefix. +def PSHUFHWri : Ii8<0x70, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2), + "pshufhw\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128:$dst, (v8i16 (pshufhw:$src2 VR128:$src1, + (undef))))]>, + XS, Requires<[HasSSE2]>; +def PSHUFHWmi : Ii8<0x70, MRMSrcMem, + (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2), + "pshufhw\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128:$dst, (v8i16 (pshufhw:$src2 + (bc_v8i16 (memopv2i64 addr:$src1)), + (undef))))]>, + XS, Requires<[HasSSE2]>; + +// SSE2 with ImmT == Imm8 and XD prefix. +def PSHUFLWri : Ii8<0x70, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2), + "pshuflw\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128:$dst, (v8i16 (pshuflw:$src2 VR128:$src1, + (undef))))]>, + XD, Requires<[HasSSE2]>; +def PSHUFLWmi : Ii8<0x70, MRMSrcMem, + (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2), + "pshuflw\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128:$dst, (v8i16 (pshuflw:$src2 + (bc_v8i16 (memopv2i64 addr:$src1)), + (undef))))]>, + XD, Requires<[HasSSE2]>; + + +let Constraints = "$src1 = $dst" in { + def PUNPCKLBWrr : PDI<0x60, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), + "punpcklbw\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, + (v16i8 (unpckl VR128:$src1, VR128:$src2)))]>; + def PUNPCKLBWrm : PDI<0x60, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), + "punpcklbw\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, + (unpckl VR128:$src1, + (bc_v16i8 (memopv2i64 addr:$src2))))]>; + def PUNPCKLWDrr : PDI<0x61, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), + "punpcklwd\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, + (v8i16 (unpckl VR128:$src1, VR128:$src2)))]>; + def PUNPCKLWDrm : PDI<0x61, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), + "punpcklwd\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, + (unpckl VR128:$src1, + (bc_v8i16 (memopv2i64 addr:$src2))))]>; + def PUNPCKLDQrr : PDI<0x62, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), + "punpckldq\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, + (v4i32 (unpckl VR128:$src1, VR128:$src2)))]>; + def PUNPCKLDQrm : PDI<0x62, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), + "punpckldq\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, + (unpckl VR128:$src1, + (bc_v4i32 (memopv2i64 addr:$src2))))]>; + def PUNPCKLQDQrr : PDI<0x6C, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), + "punpcklqdq\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, + (v2i64 (unpckl VR128:$src1, VR128:$src2)))]>; + def PUNPCKLQDQrm : PDI<0x6C, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), + "punpcklqdq\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, + (v2i64 (unpckl VR128:$src1, + (memopv2i64 addr:$src2))))]>; + + def PUNPCKHBWrr : PDI<0x68, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), + "punpckhbw\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, + (v16i8 (unpckh VR128:$src1, VR128:$src2)))]>; + def PUNPCKHBWrm : PDI<0x68, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), + "punpckhbw\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, + (unpckh VR128:$src1, + (bc_v16i8 (memopv2i64 addr:$src2))))]>; + def PUNPCKHWDrr : PDI<0x69, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), + "punpckhwd\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, + (v8i16 (unpckh VR128:$src1, VR128:$src2)))]>; + def PUNPCKHWDrm : PDI<0x69, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), + "punpckhwd\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, + (unpckh VR128:$src1, + (bc_v8i16 (memopv2i64 addr:$src2))))]>; + def PUNPCKHDQrr : PDI<0x6A, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), + "punpckhdq\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, + (v4i32 (unpckh VR128:$src1, VR128:$src2)))]>; + def PUNPCKHDQrm : PDI<0x6A, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), + "punpckhdq\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, + (unpckh VR128:$src1, + (bc_v4i32 (memopv2i64 addr:$src2))))]>; + def PUNPCKHQDQrr : PDI<0x6D, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), + "punpckhqdq\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, + (v2i64 (unpckh VR128:$src1, VR128:$src2)))]>; + def PUNPCKHQDQrm : PDI<0x6D, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), + "punpckhqdq\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, + (v2i64 (unpckh VR128:$src1, + (memopv2i64 addr:$src2))))]>; +} + +// Extract / Insert +def PEXTRWri : PDIi8<0xC5, MRMSrcReg, + (outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2), + "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1), + imm:$src2))]>; +let Constraints = "$src1 = $dst" in { + def PINSRWrri : PDIi8<0xC4, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, + GR32:$src2, i32i8imm:$src3), + "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}", + [(set VR128:$dst, + (X86pinsrw VR128:$src1, GR32:$src2, imm:$src3))]>; + def PINSRWrmi : PDIi8<0xC4, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, + i16mem:$src2, i32i8imm:$src3), + "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}", + [(set VR128:$dst, + (X86pinsrw VR128:$src1, (extloadi16 addr:$src2), + imm:$src3))]>; +} + +// Mask creation +def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src), + "pmovmskb\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))]>; + +// Conditional store +let Uses = [EDI] in +def MASKMOVDQU : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask), + "maskmovdqu\t{$mask, $src|$src, $mask}", + [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>; + +let Uses = [RDI] in +def MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask), + "maskmovdqu\t{$mask, $src|$src, $mask}", + [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>; + +// Non-temporal stores +def MOVNTPDmr : PDI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), + "movntpd\t{$src, $dst|$dst, $src}", + [(int_x86_sse2_movnt_pd addr:$dst, VR128:$src)]>; +def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), + "movntdq\t{$src, $dst|$dst, $src}", + [(int_x86_sse2_movnt_dq addr:$dst, VR128:$src)]>; +def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), + "movnti\t{$src, $dst|$dst, $src}", + [(int_x86_sse2_movnt_i addr:$dst, GR32:$src)]>, + TB, Requires<[HasSSE2]>; + +// Flush cache +def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src), + "clflush\t$src", [(int_x86_sse2_clflush addr:$src)]>, + TB, Requires<[HasSSE2]>; + +// Load, store, and memory fence +def LFENCE : I<0xAE, MRM5r, (outs), (ins), + "lfence", [(int_x86_sse2_lfence)]>, TB, Requires<[HasSSE2]>; +def MFENCE : I<0xAE, MRM6r, (outs), (ins), + "mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasSSE2]>; + +//TODO: custom lower this so as to never even generate the noop +def : Pat<(membarrier (i8 imm:$ll), (i8 imm:$ls), (i8 imm:$sl), (i8 imm:$ss), + (i8 0)), (NOOP)>; +def : Pat<(membarrier (i8 0), (i8 0), (i8 0), (i8 1), (i8 1)), (SFENCE)>; +def : Pat<(membarrier (i8 1), (i8 0), (i8 0), (i8 0), (i8 1)), (LFENCE)>; +def : Pat<(membarrier (i8 imm:$ll), (i8 imm:$ls), (i8 imm:$sl), (i8 imm:$ss), + (i8 1)), (MFENCE)>; + +// Alias instructions that map zero vector to pxor / xorp* for sse. +// We set canFoldAsLoad because this can be converted to a constant-pool +// load of an all-ones value if folding it would be beneficial. +let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1 in + def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), + "pcmpeqd\t$dst, $dst", + [(set VR128:$dst, (v4i32 immAllOnesV))]>; + +// FR64 to 128-bit vector conversion. +let isAsCheapAsAMove = 1 in +def MOVSD2PDrr : SDI<0x10, MRMSrcReg, (outs VR128:$dst), (ins FR64:$src), + "movsd\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (v2f64 (scalar_to_vector FR64:$src)))]>; +def MOVSD2PDrm : SDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), + "movsd\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (v2f64 (scalar_to_vector (loadf64 addr:$src))))]>; + +def MOVDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), + "movd\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (v4i32 (scalar_to_vector GR32:$src)))]>; +def MOVDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), + "movd\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>; + +def MOVDI2SSrr : PDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src), + "movd\t{$src, $dst|$dst, $src}", + [(set FR32:$dst, (bitconvert GR32:$src))]>; + +def MOVDI2SSrm : PDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src), + "movd\t{$src, $dst|$dst, $src}", + [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))]>; + +// SSE2 instructions with XS prefix +def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), + "movq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS, + Requires<[HasSSE2]>; +def MOVPQI2QImr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), + "movq\t{$src, $dst|$dst, $src}", + [(store (i64 (vector_extract (v2i64 VR128:$src), + (iPTR 0))), addr:$dst)]>; + +// FIXME: may not be able to eliminate this movss with coalescing the src and +// dest register classes are different. We really want to write this pattern +// like this: +// def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), +// (f32 FR32:$src)>; +let isAsCheapAsAMove = 1 in +def MOVPD2SDrr : SDI<0x10, MRMSrcReg, (outs FR64:$dst), (ins VR128:$src), + "movsd\t{$src, $dst|$dst, $src}", + [(set FR64:$dst, (vector_extract (v2f64 VR128:$src), + (iPTR 0)))]>; +def MOVPD2SDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), + "movsd\t{$src, $dst|$dst, $src}", + [(store (f64 (vector_extract (v2f64 VR128:$src), + (iPTR 0))), addr:$dst)]>; +def MOVPDI2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src), + "movd\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (vector_extract (v4i32 VR128:$src), + (iPTR 0)))]>; +def MOVPDI2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src), + "movd\t{$src, $dst|$dst, $src}", + [(store (i32 (vector_extract (v4i32 VR128:$src), + (iPTR 0))), addr:$dst)]>; + +def MOVSS2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src), + "movd\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (bitconvert FR32:$src))]>; +def MOVSS2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src), + "movd\t{$src, $dst|$dst, $src}", + [(store (i32 (bitconvert FR32:$src)), addr:$dst)]>; + + +// Move to lower bits of a VR128, leaving upper bits alone. +// Three operand (but two address) aliases. +let Constraints = "$src1 = $dst" in { + let neverHasSideEffects = 1 in + def MOVLSD2PDrr : SDI<0x10, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, FR64:$src2), + "movsd\t{$src2, $dst|$dst, $src2}", []>; + + let AddedComplexity = 15 in + def MOVLPDrr : SDI<0x10, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), + "movsd\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, + (v2f64 (movl VR128:$src1, VR128:$src2)))]>; +} + +// Store / copy lower 64-bits of a XMM register. +def MOVLQ128mr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), + "movq\t{$src, $dst|$dst, $src}", + [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>; + +// Move to lower bits of a VR128 and zeroing upper bits. +// Loading from memory automatically zeroing upper bits. +let AddedComplexity = 20 in { +def MOVZSD2PDrm : SDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), + "movsd\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (v2f64 (X86vzmovl (v2f64 (scalar_to_vector + (loadf64 addr:$src))))))]>; + +def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))), + (MOVZSD2PDrm addr:$src)>; +def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))), + (MOVZSD2PDrm addr:$src)>; +def : Pat<(v2f64 (X86vzload addr:$src)), (MOVZSD2PDrm addr:$src)>; +} + +// movd / movq to XMM register zero-extends +let AddedComplexity = 15 in { +def MOVZDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), + "movd\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (v4i32 (X86vzmovl + (v4i32 (scalar_to_vector GR32:$src)))))]>; +// This is X86-64 only. +def MOVZQI2PQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), + "mov{d|q}\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (v2i64 (X86vzmovl + (v2i64 (scalar_to_vector GR64:$src)))))]>; +} + +let AddedComplexity = 20 in { +def MOVZDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), + "movd\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (v4i32 (X86vzmovl (v4i32 (scalar_to_vector + (loadi32 addr:$src))))))]>; + +def : Pat<(v4i32 (X86vzmovl (loadv4i32 addr:$src))), + (MOVZDI2PDIrm addr:$src)>; +def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))), + (MOVZDI2PDIrm addr:$src)>; +def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))), + (MOVZDI2PDIrm addr:$src)>; + +def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), + "movq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (v2i64 (X86vzmovl (v2i64 (scalar_to_vector + (loadi64 addr:$src))))))]>, XS, + Requires<[HasSSE2]>; + +def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))), + (MOVZQI2PQIrm addr:$src)>; +def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4f32 addr:$src)))), + (MOVZQI2PQIrm addr:$src)>; +def : Pat<(v2i64 (X86vzload addr:$src)), (MOVZQI2PQIrm addr:$src)>; +} + +// Moving from XMM to XMM and clear upper 64 bits. Note, there is a bug in +// IA32 document. movq xmm1, xmm2 does clear the high bits. +let AddedComplexity = 15 in +def MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "movq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>, + XS, Requires<[HasSSE2]>; + +let AddedComplexity = 20 in { +def MOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), + "movq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (v2i64 (X86vzmovl + (loadv2i64 addr:$src))))]>, + XS, Requires<[HasSSE2]>; + +def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4i32 addr:$src)))), + (MOVZPQILo2PQIrm addr:$src)>; +} + +//===----------------------------------------------------------------------===// +// SSE3 Instructions +//===----------------------------------------------------------------------===// + +// Move Instructions +def MOVSHDUPrr : S3SI<0x16, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "movshdup\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (v4f32 (movshdup + VR128:$src, (undef))))]>; +def MOVSHDUPrm : S3SI<0x16, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + "movshdup\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (movshdup + (memopv4f32 addr:$src), (undef)))]>; + +def MOVSLDUPrr : S3SI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "movsldup\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (v4f32 (movsldup + VR128:$src, (undef))))]>; +def MOVSLDUPrm : S3SI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + "movsldup\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (movsldup + (memopv4f32 addr:$src), (undef)))]>; + +def MOVDDUPrr : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "movddup\t{$src, $dst|$dst, $src}", + [(set VR128:$dst,(v2f64 (movddup VR128:$src, (undef))))]>; +def MOVDDUPrm : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), + "movddup\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (v2f64 (movddup (scalar_to_vector (loadf64 addr:$src)), + (undef))))]>; + +def : Pat<(movddup (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src)))), + (undef)), + (MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>; + +let AddedComplexity = 5 in { +def : Pat<(movddup (memopv2f64 addr:$src), (undef)), + (MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>; +def : Pat<(movddup (bc_v4f32 (memopv2f64 addr:$src)), (undef)), + (MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>; +def : Pat<(movddup (memopv2i64 addr:$src), (undef)), + (MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>; +def : Pat<(movddup (bc_v4i32 (memopv2i64 addr:$src)), (undef)), + (MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>; +} + +// Arithmetic +let Constraints = "$src1 = $dst" in { + def ADDSUBPSrr : S3DI<0xD0, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), + "addsubps\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (int_x86_sse3_addsub_ps VR128:$src1, + VR128:$src2))]>; + def ADDSUBPSrm : S3DI<0xD0, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), + "addsubps\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (int_x86_sse3_addsub_ps VR128:$src1, + (memop addr:$src2)))]>; + def ADDSUBPDrr : S3I<0xD0, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), + "addsubpd\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (int_x86_sse3_addsub_pd VR128:$src1, + VR128:$src2))]>; + def ADDSUBPDrm : S3I<0xD0, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), + "addsubpd\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (int_x86_sse3_addsub_pd VR128:$src1, + (memop addr:$src2)))]>; +} + +def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), + "lddqu\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>; + +// Horizontal ops +class S3D_Intrr o, string OpcodeStr, Intrinsic IntId> + : S3DI; +class S3D_Intrm o, string OpcodeStr, Intrinsic IntId> + : S3DI; +class S3_Intrr o, string OpcodeStr, Intrinsic IntId> + : S3I; +class S3_Intrm o, string OpcodeStr, Intrinsic IntId> + : S3I; + +let Constraints = "$src1 = $dst" in { + def HADDPSrr : S3D_Intrr<0x7C, "haddps", int_x86_sse3_hadd_ps>; + def HADDPSrm : S3D_Intrm<0x7C, "haddps", int_x86_sse3_hadd_ps>; + def HADDPDrr : S3_Intrr <0x7C, "haddpd", int_x86_sse3_hadd_pd>; + def HADDPDrm : S3_Intrm <0x7C, "haddpd", int_x86_sse3_hadd_pd>; + def HSUBPSrr : S3D_Intrr<0x7D, "hsubps", int_x86_sse3_hsub_ps>; + def HSUBPSrm : S3D_Intrm<0x7D, "hsubps", int_x86_sse3_hsub_ps>; + def HSUBPDrr : S3_Intrr <0x7D, "hsubpd", int_x86_sse3_hsub_pd>; + def HSUBPDrm : S3_Intrm <0x7D, "hsubpd", int_x86_sse3_hsub_pd>; +} + +// Thread synchronization +def MONITOR : I<0x01, MRM1r, (outs), (ins), "monitor", + [(int_x86_sse3_monitor EAX, ECX, EDX)]>,TB, Requires<[HasSSE3]>; +def MWAIT : I<0x01, MRM1r, (outs), (ins), "mwait", + [(int_x86_sse3_mwait ECX, EAX)]>, TB, Requires<[HasSSE3]>; + +// vector_shuffle v1, <1, 1, 3, 3> +let AddedComplexity = 15 in +def : Pat<(v4i32 (movshdup VR128:$src, (undef))), + (MOVSHDUPrr VR128:$src)>, Requires<[HasSSE3]>; +let AddedComplexity = 20 in +def : Pat<(v4i32 (movshdup (bc_v4i32 (memopv2i64 addr:$src)), (undef))), + (MOVSHDUPrm addr:$src)>, Requires<[HasSSE3]>; + +// vector_shuffle v1, <0, 0, 2, 2> +let AddedComplexity = 15 in + def : Pat<(v4i32 (movsldup VR128:$src, (undef))), + (MOVSLDUPrr VR128:$src)>, Requires<[HasSSE3]>; +let AddedComplexity = 20 in + def : Pat<(v4i32 (movsldup (bc_v4i32 (memopv2i64 addr:$src)), (undef))), + (MOVSLDUPrm addr:$src)>, Requires<[HasSSE3]>; + +//===----------------------------------------------------------------------===// +// SSSE3 Instructions +//===----------------------------------------------------------------------===// + +/// SS3I_unop_rm_int_8 - Simple SSSE3 unary operator whose type is v*i8. +multiclass SS3I_unop_rm_int_8 opc, string OpcodeStr, + Intrinsic IntId64, Intrinsic IntId128> { + def rr64 : SS38I; + + def rm64 : SS38I; + + def rr128 : SS38I, + OpSize; + + def rm128 : SS38I, OpSize; +} + +/// SS3I_unop_rm_int_16 - Simple SSSE3 unary operator whose type is v*i16. +multiclass SS3I_unop_rm_int_16 opc, string OpcodeStr, + Intrinsic IntId64, Intrinsic IntId128> { + def rr64 : SS38I; + + def rm64 : SS38I; + + def rr128 : SS38I, + OpSize; + + def rm128 : SS38I, OpSize; +} + +/// SS3I_unop_rm_int_32 - Simple SSSE3 unary operator whose type is v*i32. +multiclass SS3I_unop_rm_int_32 opc, string OpcodeStr, + Intrinsic IntId64, Intrinsic IntId128> { + def rr64 : SS38I; + + def rm64 : SS38I; + + def rr128 : SS38I, + OpSize; + + def rm128 : SS38I, OpSize; +} + +defm PABSB : SS3I_unop_rm_int_8 <0x1C, "pabsb", + int_x86_ssse3_pabs_b, + int_x86_ssse3_pabs_b_128>; +defm PABSW : SS3I_unop_rm_int_16<0x1D, "pabsw", + int_x86_ssse3_pabs_w, + int_x86_ssse3_pabs_w_128>; +defm PABSD : SS3I_unop_rm_int_32<0x1E, "pabsd", + int_x86_ssse3_pabs_d, + int_x86_ssse3_pabs_d_128>; + +/// SS3I_binop_rm_int_8 - Simple SSSE3 binary operator whose type is v*i8. +let Constraints = "$src1 = $dst" in { + multiclass SS3I_binop_rm_int_8 opc, string OpcodeStr, + Intrinsic IntId64, Intrinsic IntId128, + bit Commutable = 0> { + def rr64 : SS38I { + let isCommutable = Commutable; + } + def rm64 : SS38I; + + def rr128 : SS38I, + OpSize { + let isCommutable = Commutable; + } + def rm128 : SS38I, OpSize; + } +} + +/// SS3I_binop_rm_int_16 - Simple SSSE3 binary operator whose type is v*i16. +let Constraints = "$src1 = $dst" in { + multiclass SS3I_binop_rm_int_16 opc, string OpcodeStr, + Intrinsic IntId64, Intrinsic IntId128, + bit Commutable = 0> { + def rr64 : SS38I { + let isCommutable = Commutable; + } + def rm64 : SS38I; + + def rr128 : SS38I, + OpSize { + let isCommutable = Commutable; + } + def rm128 : SS38I, OpSize; + } +} + +/// SS3I_binop_rm_int_32 - Simple SSSE3 binary operator whose type is v*i32. +let Constraints = "$src1 = $dst" in { + multiclass SS3I_binop_rm_int_32 opc, string OpcodeStr, + Intrinsic IntId64, Intrinsic IntId128, + bit Commutable = 0> { + def rr64 : SS38I { + let isCommutable = Commutable; + } + def rm64 : SS38I; + + def rr128 : SS38I, + OpSize { + let isCommutable = Commutable; + } + def rm128 : SS38I, OpSize; + } +} + +defm PHADDW : SS3I_binop_rm_int_16<0x01, "phaddw", + int_x86_ssse3_phadd_w, + int_x86_ssse3_phadd_w_128>; +defm PHADDD : SS3I_binop_rm_int_32<0x02, "phaddd", + int_x86_ssse3_phadd_d, + int_x86_ssse3_phadd_d_128>; +defm PHADDSW : SS3I_binop_rm_int_16<0x03, "phaddsw", + int_x86_ssse3_phadd_sw, + int_x86_ssse3_phadd_sw_128>; +defm PHSUBW : SS3I_binop_rm_int_16<0x05, "phsubw", + int_x86_ssse3_phsub_w, + int_x86_ssse3_phsub_w_128>; +defm PHSUBD : SS3I_binop_rm_int_32<0x06, "phsubd", + int_x86_ssse3_phsub_d, + int_x86_ssse3_phsub_d_128>; +defm PHSUBSW : SS3I_binop_rm_int_16<0x07, "phsubsw", + int_x86_ssse3_phsub_sw, + int_x86_ssse3_phsub_sw_128>; +defm PMADDUBSW : SS3I_binop_rm_int_8 <0x04, "pmaddubsw", + int_x86_ssse3_pmadd_ub_sw, + int_x86_ssse3_pmadd_ub_sw_128>; +defm PMULHRSW : SS3I_binop_rm_int_16<0x0B, "pmulhrsw", + int_x86_ssse3_pmul_hr_sw, + int_x86_ssse3_pmul_hr_sw_128, 1>; +defm PSHUFB : SS3I_binop_rm_int_8 <0x00, "pshufb", + int_x86_ssse3_pshuf_b, + int_x86_ssse3_pshuf_b_128>; +defm PSIGNB : SS3I_binop_rm_int_8 <0x08, "psignb", + int_x86_ssse3_psign_b, + int_x86_ssse3_psign_b_128>; +defm PSIGNW : SS3I_binop_rm_int_16<0x09, "psignw", + int_x86_ssse3_psign_w, + int_x86_ssse3_psign_w_128>; +defm PSIGND : SS3I_binop_rm_int_32<0x0A, "psignd", + int_x86_ssse3_psign_d, + int_x86_ssse3_psign_d_128>; + +let Constraints = "$src1 = $dst" in { + def PALIGNR64rr : SS3AI<0x0F, MRMSrcReg, (outs VR64:$dst), + (ins VR64:$src1, VR64:$src2, i16imm:$src3), + "palignr\t{$src3, $src2, $dst|$dst, $src2, $src3}", + [(set VR64:$dst, + (int_x86_ssse3_palign_r + VR64:$src1, VR64:$src2, + imm:$src3))]>; + def PALIGNR64rm : SS3AI<0x0F, MRMSrcMem, (outs VR64:$dst), + (ins VR64:$src1, i64mem:$src2, i16imm:$src3), + "palignr\t{$src3, $src2, $dst|$dst, $src2, $src3}", + [(set VR64:$dst, + (int_x86_ssse3_palign_r + VR64:$src1, + (bitconvert (memopv2i32 addr:$src2)), + imm:$src3))]>; + + def PALIGNR128rr : SS3AI<0x0F, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2, i32imm:$src3), + "palignr\t{$src3, $src2, $dst|$dst, $src2, $src3}", + [(set VR128:$dst, + (int_x86_ssse3_palign_r_128 + VR128:$src1, VR128:$src2, + imm:$src3))]>, OpSize; + def PALIGNR128rm : SS3AI<0x0F, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, i128mem:$src2, i32imm:$src3), + "palignr\t{$src3, $src2, $dst|$dst, $src2, $src3}", + [(set VR128:$dst, + (int_x86_ssse3_palign_r_128 + VR128:$src1, + (bitconvert (memopv4i32 addr:$src2)), + imm:$src3))]>, OpSize; +} + +def : Pat<(X86pshufb VR128:$src, VR128:$mask), + (PSHUFBrr128 VR128:$src, VR128:$mask)>, Requires<[HasSSSE3]>; +def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))), + (PSHUFBrm128 VR128:$src, addr:$mask)>, Requires<[HasSSSE3]>; + +//===----------------------------------------------------------------------===// +// Non-Instruction Patterns +//===----------------------------------------------------------------------===// + +// extload f32 -> f64. This matches load+fextend because we have a hack in +// the isel (PreprocessForFPConvert) that can introduce loads after dag combine. +// Since these loads aren't folded into the fextend, we have to match it +// explicitly here. +let Predicates = [HasSSE2] in + def : Pat<(fextend (loadf32 addr:$src)), + (CVTSS2SDrm addr:$src)>; + +// bit_convert +let Predicates = [HasSSE2] in { + def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>; + def : Pat<(v2i64 (bitconvert (v8i16 VR128:$src))), (v2i64 VR128:$src)>; + def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>; + def : Pat<(v2i64 (bitconvert (v2f64 VR128:$src))), (v2i64 VR128:$src)>; + def : Pat<(v2i64 (bitconvert (v4f32 VR128:$src))), (v2i64 VR128:$src)>; + def : Pat<(v4i32 (bitconvert (v2i64 VR128:$src))), (v4i32 VR128:$src)>; + def : Pat<(v4i32 (bitconvert (v8i16 VR128:$src))), (v4i32 VR128:$src)>; + def : Pat<(v4i32 (bitconvert (v16i8 VR128:$src))), (v4i32 VR128:$src)>; + def : Pat<(v4i32 (bitconvert (v2f64 VR128:$src))), (v4i32 VR128:$src)>; + def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>; + def : Pat<(v8i16 (bitconvert (v2i64 VR128:$src))), (v8i16 VR128:$src)>; + def : Pat<(v8i16 (bitconvert (v4i32 VR128:$src))), (v8i16 VR128:$src)>; + def : Pat<(v8i16 (bitconvert (v16i8 VR128:$src))), (v8i16 VR128:$src)>; + def : Pat<(v8i16 (bitconvert (v2f64 VR128:$src))), (v8i16 VR128:$src)>; + def : Pat<(v8i16 (bitconvert (v4f32 VR128:$src))), (v8i16 VR128:$src)>; + def : Pat<(v16i8 (bitconvert (v2i64 VR128:$src))), (v16i8 VR128:$src)>; + def : Pat<(v16i8 (bitconvert (v4i32 VR128:$src))), (v16i8 VR128:$src)>; + def : Pat<(v16i8 (bitconvert (v8i16 VR128:$src))), (v16i8 VR128:$src)>; + def : Pat<(v16i8 (bitconvert (v2f64 VR128:$src))), (v16i8 VR128:$src)>; + def : Pat<(v16i8 (bitconvert (v4f32 VR128:$src))), (v16i8 VR128:$src)>; + def : Pat<(v4f32 (bitconvert (v2i64 VR128:$src))), (v4f32 VR128:$src)>; + def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>; + def : Pat<(v4f32 (bitconvert (v8i16 VR128:$src))), (v4f32 VR128:$src)>; + def : Pat<(v4f32 (bitconvert (v16i8 VR128:$src))), (v4f32 VR128:$src)>; + def : Pat<(v4f32 (bitconvert (v2f64 VR128:$src))), (v4f32 VR128:$src)>; + def : Pat<(v2f64 (bitconvert (v2i64 VR128:$src))), (v2f64 VR128:$src)>; + def : Pat<(v2f64 (bitconvert (v4i32 VR128:$src))), (v2f64 VR128:$src)>; + def : Pat<(v2f64 (bitconvert (v8i16 VR128:$src))), (v2f64 VR128:$src)>; + def : Pat<(v2f64 (bitconvert (v16i8 VR128:$src))), (v2f64 VR128:$src)>; + def : Pat<(v2f64 (bitconvert (v4f32 VR128:$src))), (v2f64 VR128:$src)>; +} + +// Move scalar to XMM zero-extended +// movd to XMM register zero-extends +let AddedComplexity = 15 in { +// Zeroing a VR128 then do a MOVS{S|D} to the lower bits. +def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))), + (MOVLSD2PDrr (V_SET0), FR64:$src)>, Requires<[HasSSE2]>; +def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))), + (MOVLSS2PSrr (V_SET0), FR32:$src)>, Requires<[HasSSE1]>; +def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))), + (MOVLPSrr (V_SET0), VR128:$src)>, Requires<[HasSSE1]>; +def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), + (MOVLPSrr (V_SET0), VR128:$src)>, Requires<[HasSSE1]>; +} + +// Splat v2f64 / v2i64 +let AddedComplexity = 10 in { +def : Pat<(splat_lo (v2f64 VR128:$src), (undef)), + (UNPCKLPDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; +def : Pat<(unpckh (v2f64 VR128:$src), (undef)), + (UNPCKHPDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; +def : Pat<(splat_lo (v2i64 VR128:$src), (undef)), + (PUNPCKLQDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; +def : Pat<(unpckh (v2i64 VR128:$src), (undef)), + (PUNPCKHQDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; +} + +// Special unary SHUFPSrri case. +def : Pat<(v4f32 (pshufd:$src3 VR128:$src1, (undef))), + (SHUFPSrri VR128:$src1, VR128:$src1, + (SHUFFLE_get_shuf_imm VR128:$src3))>, + Requires<[HasSSE1]>; +let AddedComplexity = 5 in +def : Pat<(v4f32 (pshufd:$src2 VR128:$src1, (undef))), + (PSHUFDri VR128:$src1, (SHUFFLE_get_shuf_imm VR128:$src2))>, + Requires<[HasSSE2]>; +// Special unary SHUFPDrri case. +def : Pat<(v2i64 (pshufd:$src3 VR128:$src1, (undef))), + (SHUFPDrri VR128:$src1, VR128:$src1, + (SHUFFLE_get_shuf_imm VR128:$src3))>, + Requires<[HasSSE2]>; +// Special unary SHUFPDrri case. +def : Pat<(v2f64 (pshufd:$src3 VR128:$src1, (undef))), + (SHUFPDrri VR128:$src1, VR128:$src1, + (SHUFFLE_get_shuf_imm VR128:$src3))>, + Requires<[HasSSE2]>; +// Unary v4f32 shuffle with PSHUF* in order to fold a load. +def : Pat<(pshufd:$src2 (bc_v4i32 (memopv4f32 addr:$src1)), (undef)), + (PSHUFDmi addr:$src1, (SHUFFLE_get_shuf_imm VR128:$src2))>, + Requires<[HasSSE2]>; + +// Special binary v4i32 shuffle cases with SHUFPS. +def : Pat<(v4i32 (shufp:$src3 VR128:$src1, (v4i32 VR128:$src2))), + (SHUFPSrri VR128:$src1, VR128:$src2, + (SHUFFLE_get_shuf_imm VR128:$src3))>, + Requires<[HasSSE2]>; +def : Pat<(v4i32 (shufp:$src3 VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))), + (SHUFPSrmi VR128:$src1, addr:$src2, + (SHUFFLE_get_shuf_imm VR128:$src3))>, + Requires<[HasSSE2]>; +// Special binary v2i64 shuffle cases using SHUFPDrri. +def : Pat<(v2i64 (shufp:$src3 VR128:$src1, VR128:$src2)), + (SHUFPDrri VR128:$src1, VR128:$src2, + (SHUFFLE_get_shuf_imm VR128:$src3))>, + Requires<[HasSSE2]>; + +// vector_shuffle v1, , <0, 0, 1, 1, ...> +let AddedComplexity = 15 in { +def : Pat<(v4i32 (unpckl_undef:$src2 VR128:$src, (undef))), + (PSHUFDri VR128:$src, (SHUFFLE_get_shuf_imm VR128:$src2))>, + Requires<[OptForSpeed, HasSSE2]>; +def : Pat<(v4f32 (unpckl_undef:$src2 VR128:$src, (undef))), + (PSHUFDri VR128:$src, (SHUFFLE_get_shuf_imm VR128:$src2))>, + Requires<[OptForSpeed, HasSSE2]>; +} +let AddedComplexity = 10 in { +def : Pat<(v4f32 (unpckl_undef VR128:$src, (undef))), + (UNPCKLPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>; +def : Pat<(v16i8 (unpckl_undef VR128:$src, (undef))), + (PUNPCKLBWrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; +def : Pat<(v8i16 (unpckl_undef VR128:$src, (undef))), + (PUNPCKLWDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; +def : Pat<(v4i32 (unpckl_undef VR128:$src, (undef))), + (PUNPCKLDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; +} + +// vector_shuffle v1, , <2, 2, 3, 3, ...> +let AddedComplexity = 15 in { +def : Pat<(v4i32 (unpckh_undef:$src2 VR128:$src, (undef))), + (PSHUFDri VR128:$src, (SHUFFLE_get_shuf_imm VR128:$src2))>, + Requires<[OptForSpeed, HasSSE2]>; +def : Pat<(v4f32 (unpckh_undef:$src2 VR128:$src, (undef))), + (PSHUFDri VR128:$src, (SHUFFLE_get_shuf_imm VR128:$src2))>, + Requires<[OptForSpeed, HasSSE2]>; +} +let AddedComplexity = 10 in { +def : Pat<(v4f32 (unpckh_undef VR128:$src, (undef))), + (UNPCKHPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>; +def : Pat<(v16i8 (unpckh_undef VR128:$src, (undef))), + (PUNPCKHBWrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; +def : Pat<(v8i16 (unpckh_undef VR128:$src, (undef))), + (PUNPCKHWDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; +def : Pat<(v4i32 (unpckh_undef VR128:$src, (undef))), + (PUNPCKHDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; +} + +let AddedComplexity = 20 in { +// vector_shuffle v1, v2 <0, 1, 4, 5> using MOVLHPS +def : Pat<(v4i32 (movhp VR128:$src1, VR128:$src2)), + (MOVLHPSrr VR128:$src1, VR128:$src2)>; + +// vector_shuffle v1, v2 <6, 7, 2, 3> using MOVHLPS +def : Pat<(v4i32 (movhlps VR128:$src1, VR128:$src2)), + (MOVHLPSrr VR128:$src1, VR128:$src2)>; + +// vector_shuffle v1, undef <2, ?, ?, ?> using MOVHLPS +def : Pat<(v4f32 (movhlps_undef VR128:$src1, (undef))), + (MOVHLPSrr VR128:$src1, VR128:$src1)>; +def : Pat<(v4i32 (movhlps_undef VR128:$src1, (undef))), + (MOVHLPSrr VR128:$src1, VR128:$src1)>; +} + +let AddedComplexity = 20 in { +// vector_shuffle v1, (load v2) <4, 5, 2, 3> using MOVLPS +// vector_shuffle v1, (load v2) <0, 1, 4, 5> using MOVHPS +def : Pat<(v4f32 (movlp VR128:$src1, (load addr:$src2))), + (MOVLPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>; +def : Pat<(v2f64 (movlp VR128:$src1, (load addr:$src2))), + (MOVLPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; +def : Pat<(v4f32 (movhp VR128:$src1, (load addr:$src2))), + (MOVHPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>; +def : Pat<(v2f64 (movhp VR128:$src1, (load addr:$src2))), + (MOVHPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; + +def : Pat<(v4i32 (movlp VR128:$src1, (load addr:$src2))), + (MOVLPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; +def : Pat<(v2i64 (movlp VR128:$src1, (load addr:$src2))), + (MOVLPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; +def : Pat<(v4i32 (movhp VR128:$src1, (load addr:$src2))), + (MOVHPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>; +def : Pat<(v2i64 (movhp VR128:$src1, (load addr:$src2))), + (MOVHPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; +} + +// (store (vector_shuffle (load addr), v2, <4, 5, 2, 3>), addr) using MOVLPS +// (store (vector_shuffle (load addr), v2, <0, 1, 4, 5>), addr) using MOVHPS +def : Pat<(store (v4f32 (movlp (load addr:$src1), VR128:$src2)), addr:$src1), + (MOVLPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>; +def : Pat<(store (v2f64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1), + (MOVLPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>; +def : Pat<(store (v4f32 (movhp (load addr:$src1), VR128:$src2)), addr:$src1), + (MOVHPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>; +def : Pat<(store (v2f64 (movhp (load addr:$src1), VR128:$src2)), addr:$src1), + (MOVHPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>; + +def : Pat<(store (v4i32 (movlp (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)), + addr:$src1), + (MOVLPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>; +def : Pat<(store (v2i64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1), + (MOVLPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>; +def : Pat<(store (v4i32 (movhp (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)), + addr:$src1), + (MOVHPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>; +def : Pat<(store (v2i64 (movhp (load addr:$src1), VR128:$src2)), addr:$src1), + (MOVHPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>; + + +let AddedComplexity = 15 in { +// Setting the lowest element in the vector. +def : Pat<(v4i32 (movl VR128:$src1, VR128:$src2)), + (MOVLPSrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>; +def : Pat<(v2i64 (movl VR128:$src1, VR128:$src2)), + (MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>; + +// vector_shuffle v1, v2 <4, 5, 2, 3> using MOVLPDrr (movsd) +def : Pat<(v4f32 (movlp VR128:$src1, VR128:$src2)), + (MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>; +def : Pat<(v4i32 (movlp VR128:$src1, VR128:$src2)), + (MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>; +} + +// Set lowest element and zero upper elements. +let AddedComplexity = 15 in +def : Pat<(v2f64 (movl immAllZerosV_bc, VR128:$src)), + (MOVZPQILo2PQIrr VR128:$src)>, Requires<[HasSSE2]>; +def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))), + (MOVZPQILo2PQIrr VR128:$src)>, Requires<[HasSSE2]>; + +// Some special case pandn patterns. +def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v4i32 immAllOnesV))), + VR128:$src2)), + (PANDNrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>; +def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v8i16 immAllOnesV))), + VR128:$src2)), + (PANDNrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>; +def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v16i8 immAllOnesV))), + VR128:$src2)), + (PANDNrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>; + +def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v4i32 immAllOnesV))), + (memop addr:$src2))), + (PANDNrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; +def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v8i16 immAllOnesV))), + (memop addr:$src2))), + (PANDNrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; +def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v16i8 immAllOnesV))), + (memop addr:$src2))), + (PANDNrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; + +// vector -> vector casts +def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))), + (Int_CVTDQ2PSrr VR128:$src)>, Requires<[HasSSE2]>; +def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))), + (Int_CVTTPS2DQrr VR128:$src)>, Requires<[HasSSE2]>; +def : Pat<(v2f64 (sint_to_fp (v2i32 VR64:$src))), + (Int_CVTPI2PDrr VR64:$src)>, Requires<[HasSSE2]>; +def : Pat<(v2i32 (fp_to_sint (v2f64 VR128:$src))), + (Int_CVTTPD2PIrr VR128:$src)>, Requires<[HasSSE2]>; + +// Use movaps / movups for SSE integer load / store (one byte shorter). +def : Pat<(alignedloadv4i32 addr:$src), + (MOVAPSrm addr:$src)>, Requires<[HasSSE1]>; +def : Pat<(loadv4i32 addr:$src), + (MOVUPSrm addr:$src)>, Requires<[HasSSE1]>; +def : Pat<(alignedloadv2i64 addr:$src), + (MOVAPSrm addr:$src)>, Requires<[HasSSE2]>; +def : Pat<(loadv2i64 addr:$src), + (MOVUPSrm addr:$src)>, Requires<[HasSSE2]>; + +def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst), + (MOVAPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>; +def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst), + (MOVAPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>; +def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst), + (MOVAPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>; +def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst), + (MOVAPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>; +def : Pat<(store (v2i64 VR128:$src), addr:$dst), + (MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>; +def : Pat<(store (v4i32 VR128:$src), addr:$dst), + (MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>; +def : Pat<(store (v8i16 VR128:$src), addr:$dst), + (MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>; +def : Pat<(store (v16i8 VR128:$src), addr:$dst), + (MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>; + +//===----------------------------------------------------------------------===// +// SSE4.1 Instructions +//===----------------------------------------------------------------------===// + +multiclass sse41_fp_unop_rm opcps, bits<8> opcpd, + string OpcodeStr, + Intrinsic V4F32Int, + Intrinsic V2F64Int> { + // Intrinsic operation, reg. + // Vector intrinsic operation, reg + def PSr_Int : SS4AIi8, + OpSize; + + // Vector intrinsic operation, mem + def PSm_Int : SS4AIi8, + OpSize; + + // Vector intrinsic operation, reg + def PDr_Int : SS4AIi8, + OpSize; + + // Vector intrinsic operation, mem + def PDm_Int : SS4AIi8, + OpSize; +} + +let Constraints = "$src1 = $dst" in { +multiclass sse41_fp_binop_rm opcss, bits<8> opcsd, + string OpcodeStr, + Intrinsic F32Int, + Intrinsic F64Int> { + // Intrinsic operation, reg. + def SSr_Int : SS4AIi8, + OpSize; + + // Intrinsic operation, mem. + def SSm_Int : SS4AIi8, + OpSize; + + // Intrinsic operation, reg. + def SDr_Int : SS4AIi8, + OpSize; + + // Intrinsic operation, mem. + def SDm_Int : SS4AIi8, + OpSize; +} +} + +// FP round - roundss, roundps, roundsd, roundpd +defm ROUND : sse41_fp_unop_rm<0x08, 0x09, "round", + int_x86_sse41_round_ps, int_x86_sse41_round_pd>; +defm ROUND : sse41_fp_binop_rm<0x0A, 0x0B, "round", + int_x86_sse41_round_ss, int_x86_sse41_round_sd>; + +// SS41I_unop_rm_int_v16 - SSE 4.1 unary operator whose type is v8i16. +multiclass SS41I_unop_rm_int_v16 opc, string OpcodeStr, + Intrinsic IntId128> { + def rr128 : SS48I, OpSize; + def rm128 : SS48I, OpSize; +} + +defm PHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "phminposuw", + int_x86_sse41_phminposuw>; + +/// SS41I_binop_rm_int - Simple SSE 4.1 binary operator +let Constraints = "$src1 = $dst" in { + multiclass SS41I_binop_rm_int opc, string OpcodeStr, + Intrinsic IntId128, bit Commutable = 0> { + def rr : SS48I, + OpSize { + let isCommutable = Commutable; + } + def rm : SS48I, OpSize; + } +} + +defm PCMPEQQ : SS41I_binop_rm_int<0x29, "pcmpeqq", + int_x86_sse41_pcmpeqq, 1>; +defm PACKUSDW : SS41I_binop_rm_int<0x2B, "packusdw", + int_x86_sse41_packusdw, 0>; +defm PMINSB : SS41I_binop_rm_int<0x38, "pminsb", + int_x86_sse41_pminsb, 1>; +defm PMINSD : SS41I_binop_rm_int<0x39, "pminsd", + int_x86_sse41_pminsd, 1>; +defm PMINUD : SS41I_binop_rm_int<0x3B, "pminud", + int_x86_sse41_pminud, 1>; +defm PMINUW : SS41I_binop_rm_int<0x3A, "pminuw", + int_x86_sse41_pminuw, 1>; +defm PMAXSB : SS41I_binop_rm_int<0x3C, "pmaxsb", + int_x86_sse41_pmaxsb, 1>; +defm PMAXSD : SS41I_binop_rm_int<0x3D, "pmaxsd", + int_x86_sse41_pmaxsd, 1>; +defm PMAXUD : SS41I_binop_rm_int<0x3F, "pmaxud", + int_x86_sse41_pmaxud, 1>; +defm PMAXUW : SS41I_binop_rm_int<0x3E, "pmaxuw", + int_x86_sse41_pmaxuw, 1>; + +defm PMULDQ : SS41I_binop_rm_int<0x28, "pmuldq", int_x86_sse41_pmuldq, 1>; + +def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, VR128:$src2)), + (PCMPEQQrr VR128:$src1, VR128:$src2)>; +def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, (memop addr:$src2))), + (PCMPEQQrm VR128:$src1, addr:$src2)>; + +/// SS41I_binop_rm_int - Simple SSE 4.1 binary operator +let Constraints = "$src1 = $dst" in { + multiclass SS41I_binop_patint opc, string OpcodeStr, ValueType OpVT, + SDNode OpNode, Intrinsic IntId128, + bit Commutable = 0> { + def rr : SS48I, OpSize { + let isCommutable = Commutable; + } + def rr_int : SS48I, + OpSize { + let isCommutable = Commutable; + } + def rm : SS48I, OpSize; + def rm_int : SS48I, + OpSize; + } +} +defm PMULLD : SS41I_binop_patint<0x40, "pmulld", v4i32, mul, + int_x86_sse41_pmulld, 1>; + +/// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate +let Constraints = "$src1 = $dst" in { + multiclass SS41I_binop_rmi_int opc, string OpcodeStr, + Intrinsic IntId128, bit Commutable = 0> { + def rri : SS4AIi8, + OpSize { + let isCommutable = Commutable; + } + def rmi : SS4AIi8, + OpSize; + } +} + +defm BLENDPS : SS41I_binop_rmi_int<0x0C, "blendps", + int_x86_sse41_blendps, 0>; +defm BLENDPD : SS41I_binop_rmi_int<0x0D, "blendpd", + int_x86_sse41_blendpd, 0>; +defm PBLENDW : SS41I_binop_rmi_int<0x0E, "pblendw", + int_x86_sse41_pblendw, 0>; +defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", + int_x86_sse41_dpps, 1>; +defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", + int_x86_sse41_dppd, 1>; +defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", + int_x86_sse41_mpsadbw, 1>; + + +/// SS41I_ternary_int - SSE 4.1 ternary operator +let Uses = [XMM0], Constraints = "$src1 = $dst" in { + multiclass SS41I_ternary_int opc, string OpcodeStr, Intrinsic IntId> { + def rr0 : SS48I, + OpSize; + + def rm0 : SS48I, OpSize; + } +} + +defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", int_x86_sse41_blendvpd>; +defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", int_x86_sse41_blendvps>; +defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", int_x86_sse41_pblendvb>; + + +multiclass SS41I_binop_rm_int8 opc, string OpcodeStr, Intrinsic IntId> { + def rr : SS48I, OpSize; + + def rm : SS48I, + OpSize; +} + +defm PMOVSXBW : SS41I_binop_rm_int8<0x20, "pmovsxbw", int_x86_sse41_pmovsxbw>; +defm PMOVSXWD : SS41I_binop_rm_int8<0x23, "pmovsxwd", int_x86_sse41_pmovsxwd>; +defm PMOVSXDQ : SS41I_binop_rm_int8<0x25, "pmovsxdq", int_x86_sse41_pmovsxdq>; +defm PMOVZXBW : SS41I_binop_rm_int8<0x30, "pmovzxbw", int_x86_sse41_pmovzxbw>; +defm PMOVZXWD : SS41I_binop_rm_int8<0x33, "pmovzxwd", int_x86_sse41_pmovzxwd>; +defm PMOVZXDQ : SS41I_binop_rm_int8<0x35, "pmovzxdq", int_x86_sse41_pmovzxdq>; + +// Common patterns involving scalar load. +def : Pat<(int_x86_sse41_pmovsxbw (vzmovl_v2i64 addr:$src)), + (PMOVSXBWrm addr:$src)>, Requires<[HasSSE41]>; +def : Pat<(int_x86_sse41_pmovsxbw (vzload_v2i64 addr:$src)), + (PMOVSXBWrm addr:$src)>, Requires<[HasSSE41]>; + +def : Pat<(int_x86_sse41_pmovsxwd (vzmovl_v2i64 addr:$src)), + (PMOVSXWDrm addr:$src)>, Requires<[HasSSE41]>; +def : Pat<(int_x86_sse41_pmovsxwd (vzload_v2i64 addr:$src)), + (PMOVSXWDrm addr:$src)>, Requires<[HasSSE41]>; + +def : Pat<(int_x86_sse41_pmovsxdq (vzmovl_v2i64 addr:$src)), + (PMOVSXDQrm addr:$src)>, Requires<[HasSSE41]>; +def : Pat<(int_x86_sse41_pmovsxdq (vzload_v2i64 addr:$src)), + (PMOVSXDQrm addr:$src)>, Requires<[HasSSE41]>; + +def : Pat<(int_x86_sse41_pmovzxbw (vzmovl_v2i64 addr:$src)), + (PMOVZXBWrm addr:$src)>, Requires<[HasSSE41]>; +def : Pat<(int_x86_sse41_pmovzxbw (vzload_v2i64 addr:$src)), + (PMOVZXBWrm addr:$src)>, Requires<[HasSSE41]>; + +def : Pat<(int_x86_sse41_pmovzxwd (vzmovl_v2i64 addr:$src)), + (PMOVZXWDrm addr:$src)>, Requires<[HasSSE41]>; +def : Pat<(int_x86_sse41_pmovzxwd (vzload_v2i64 addr:$src)), + (PMOVZXWDrm addr:$src)>, Requires<[HasSSE41]>; + +def : Pat<(int_x86_sse41_pmovzxdq (vzmovl_v2i64 addr:$src)), + (PMOVZXDQrm addr:$src)>, Requires<[HasSSE41]>; +def : Pat<(int_x86_sse41_pmovzxdq (vzload_v2i64 addr:$src)), + (PMOVZXDQrm addr:$src)>, Requires<[HasSSE41]>; + + +multiclass SS41I_binop_rm_int4 opc, string OpcodeStr, Intrinsic IntId> { + def rr : SS48I, OpSize; + + def rm : SS48I, + OpSize; +} + +defm PMOVSXBD : SS41I_binop_rm_int4<0x21, "pmovsxbd", int_x86_sse41_pmovsxbd>; +defm PMOVSXWQ : SS41I_binop_rm_int4<0x24, "pmovsxwq", int_x86_sse41_pmovsxwq>; +defm PMOVZXBD : SS41I_binop_rm_int4<0x31, "pmovzxbd", int_x86_sse41_pmovzxbd>; +defm PMOVZXWQ : SS41I_binop_rm_int4<0x34, "pmovzxwq", int_x86_sse41_pmovzxwq>; + +// Common patterns involving scalar load +def : Pat<(int_x86_sse41_pmovsxbd (vzmovl_v4i32 addr:$src)), + (PMOVSXBDrm addr:$src)>, Requires<[HasSSE41]>; +def : Pat<(int_x86_sse41_pmovsxwq (vzmovl_v4i32 addr:$src)), + (PMOVSXWQrm addr:$src)>, Requires<[HasSSE41]>; + +def : Pat<(int_x86_sse41_pmovzxbd (vzmovl_v4i32 addr:$src)), + (PMOVZXBDrm addr:$src)>, Requires<[HasSSE41]>; +def : Pat<(int_x86_sse41_pmovzxwq (vzmovl_v4i32 addr:$src)), + (PMOVZXWQrm addr:$src)>, Requires<[HasSSE41]>; + + +multiclass SS41I_binop_rm_int2 opc, string OpcodeStr, Intrinsic IntId> { + def rr : SS48I, OpSize; + + // Expecting a i16 load any extended to i32 value. + def rm : SS48I, + OpSize; +} + +defm PMOVSXBQ : SS41I_binop_rm_int2<0x22, "pmovsxbq", int_x86_sse41_pmovsxbq>; +defm PMOVZXBQ : SS41I_binop_rm_int2<0x32, "pmovsxbq", int_x86_sse41_pmovzxbq>; + +// Common patterns involving scalar load +def : Pat<(int_x86_sse41_pmovsxbq + (bitconvert (v4i32 (X86vzmovl + (v4i32 (scalar_to_vector (loadi32 addr:$src))))))), + (PMOVSXBQrm addr:$src)>, Requires<[HasSSE41]>; + +def : Pat<(int_x86_sse41_pmovzxbq + (bitconvert (v4i32 (X86vzmovl + (v4i32 (scalar_to_vector (loadi32 addr:$src))))))), + (PMOVZXBQrm addr:$src)>, Requires<[HasSSE41]>; + + +/// SS41I_binop_ext8 - SSE 4.1 extract 8 bits to 32 bit reg or 8 bit mem +multiclass SS41I_extract8 opc, string OpcodeStr> { + def rr : SS4AIi8, + OpSize; + def mr : SS4AIi8, OpSize; +// FIXME: +// There's an AssertZext in the way of writing the store pattern +// (store (i8 (trunc (X86pextrb (v16i8 VR128:$src1), imm:$src2))), addr:$dst) +} + +defm PEXTRB : SS41I_extract8<0x14, "pextrb">; + + +/// SS41I_extract16 - SSE 4.1 extract 16 bits to memory destination +multiclass SS41I_extract16 opc, string OpcodeStr> { + def mr : SS4AIi8, OpSize; +// FIXME: +// There's an AssertZext in the way of writing the store pattern +// (store (i16 (trunc (X86pextrw (v16i8 VR128:$src1), imm:$src2))), addr:$dst) +} + +defm PEXTRW : SS41I_extract16<0x15, "pextrw">; + + +/// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory destination +multiclass SS41I_extract32 opc, string OpcodeStr> { + def rr : SS4AIi8, OpSize; + def mr : SS4AIi8, OpSize; +} + +defm PEXTRD : SS41I_extract32<0x16, "pextrd">; + + +/// SS41I_extractf32 - SSE 4.1 extract 32 bits fp value to int reg or memory +/// destination +multiclass SS41I_extractf32 opc, string OpcodeStr> { + def rr : SS4AIi8, + OpSize; + def mr : SS4AIi8, OpSize; +} + +defm EXTRACTPS : SS41I_extractf32<0x17, "extractps">; + +// Also match an EXTRACTPS store when the store is done as f32 instead of i32. +def : Pat<(store (f32 (bitconvert (extractelt (bc_v4i32 (v4f32 VR128:$src1)), + imm:$src2))), + addr:$dst), + (EXTRACTPSmr addr:$dst, VR128:$src1, imm:$src2)>, + Requires<[HasSSE41]>; + +let Constraints = "$src1 = $dst" in { + multiclass SS41I_insert8 opc, string OpcodeStr> { + def rr : SS4AIi8, OpSize; + def rm : SS4AIi8, OpSize; + } +} + +defm PINSRB : SS41I_insert8<0x20, "pinsrb">; + +let Constraints = "$src1 = $dst" in { + multiclass SS41I_insert32 opc, string OpcodeStr> { + def rr : SS4AIi8, + OpSize; + def rm : SS4AIi8, OpSize; + } +} + +defm PINSRD : SS41I_insert32<0x22, "pinsrd">; + +let Constraints = "$src1 = $dst" in { + multiclass SS41I_insertf32 opc, string OpcodeStr> { + def rr : SS4AIi8, OpSize; + def rm : SS4AIi8, OpSize; + } +} + +defm INSERTPS : SS41I_insertf32<0x21, "insertps">; + +let Defs = [EFLAGS] in { +def PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), + "ptest \t{$src2, $src1|$src1, $src2}", []>, OpSize; +def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src2), + "ptest \t{$src2, $src1|$src1, $src2}", []>, OpSize; +} + +def MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), + "movntdqa\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>; + +/// SS42I_binop_rm_int - Simple SSE 4.2 binary operator +let Constraints = "$src1 = $dst" in { + multiclass SS42I_binop_rm_int opc, string OpcodeStr, + Intrinsic IntId128, bit Commutable = 0> { + def rr : SS428I, + OpSize { + let isCommutable = Commutable; + } + def rm : SS428I, OpSize; + } +} + +defm PCMPGTQ : SS42I_binop_rm_int<0x37, "pcmpgtq", int_x86_sse42_pcmpgtq>; + +def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, VR128:$src2)), + (PCMPGTQrr VR128:$src1, VR128:$src2)>; +def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, (memop addr:$src2))), + (PCMPGTQrm VR128:$src1, addr:$src2)>; diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp new file mode 100644 index 000000000000..f92310607a8e --- /dev/null +++ b/lib/Target/X86/X86JITInfo.cpp @@ -0,0 +1,560 @@ +//===-- X86JITInfo.cpp - Implement the JIT interfaces for the X86 target --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the JIT interfaces for the X86 target. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "jit" +#include "X86JITInfo.h" +#include "X86Relocations.h" +#include "X86Subtarget.h" +#include "llvm/Function.h" +#include "llvm/Config/alloca.h" +#include "llvm/Support/Compiler.h" +#include +#include +using namespace llvm; + +// Determine the platform we're running on +#if defined (__x86_64__) || defined (_M_AMD64) +# define X86_64_JIT +#elif defined(__i386__) || defined(i386) || defined(_M_IX86) +# define X86_32_JIT +#endif + +void X86JITInfo::replaceMachineCodeForFunction(void *Old, void *New) { + unsigned char *OldByte = (unsigned char *)Old; + *OldByte++ = 0xE9; // Emit JMP opcode. + unsigned *OldWord = (unsigned *)OldByte; + unsigned NewAddr = (intptr_t)New; + unsigned OldAddr = (intptr_t)OldWord; + *OldWord = NewAddr - OldAddr - 4; // Emit PC-relative addr of New code. +} + + +/// JITCompilerFunction - This contains the address of the JIT function used to +/// compile a function lazily. +static TargetJITInfo::JITCompilerFn JITCompilerFunction; + +// Get the ASMPREFIX for the current host. This is often '_'. +#ifndef __USER_LABEL_PREFIX__ +#define __USER_LABEL_PREFIX__ +#endif +#define GETASMPREFIX2(X) #X +#define GETASMPREFIX(X) GETASMPREFIX2(X) +#define ASMPREFIX GETASMPREFIX(__USER_LABEL_PREFIX__) + +// Check if building with -fPIC +#if defined(__PIC__) && __PIC__ && defined(__linux__) +#define ASMCALLSUFFIX "@PLT" +#else +#define ASMCALLSUFFIX +#endif + +// For ELF targets, use a .size and .type directive, to let tools +// know the extent of functions defined in assembler. +#if defined(__ELF__) +# define SIZE(sym) ".size " #sym ", . - " #sym "\n" +# define TYPE_FUNCTION(sym) ".type " #sym ", @function\n" +#else +# define SIZE(sym) +# define TYPE_FUNCTION(sym) +#endif + +// Provide a convenient way for disabling usage of CFI directives. +// This is needed for old/broken assemblers (for example, gas on +// Darwin is pretty old and doesn't support these directives) +#if defined(__APPLE__) +# define CFI(x) +#else +// FIXME: Disable this until we really want to use it. Also, we will +// need to add some workarounds for compilers, which support +// only subset of these directives. +# define CFI(x) +#endif + +// Provide a wrapper for X86CompilationCallback2 that saves non-traditional +// callee saved registers, for the fastcc calling convention. +extern "C" { +#if defined(X86_64_JIT) +# ifndef _MSC_VER + // No need to save EAX/EDX for X86-64. + void X86CompilationCallback(void); + asm( + ".text\n" + ".align 8\n" + ".globl " ASMPREFIX "X86CompilationCallback\n" + TYPE_FUNCTION(X86CompilationCallback) + ASMPREFIX "X86CompilationCallback:\n" + CFI(".cfi_startproc\n") + // Save RBP + "pushq %rbp\n" + CFI(".cfi_def_cfa_offset 16\n") + CFI(".cfi_offset %rbp, -16\n") + // Save RSP + "movq %rsp, %rbp\n" + CFI(".cfi_def_cfa_register %rbp\n") + // Save all int arg registers + "pushq %rdi\n" + CFI(".cfi_rel_offset %rdi, 0\n") + "pushq %rsi\n" + CFI(".cfi_rel_offset %rsi, 8\n") + "pushq %rdx\n" + CFI(".cfi_rel_offset %rdx, 16\n") + "pushq %rcx\n" + CFI(".cfi_rel_offset %rcx, 24\n") + "pushq %r8\n" + CFI(".cfi_rel_offset %r8, 32\n") + "pushq %r9\n" + CFI(".cfi_rel_offset %r9, 40\n") + // Align stack on 16-byte boundary. ESP might not be properly aligned + // (8 byte) if this is called from an indirect stub. + "andq $-16, %rsp\n" + // Save all XMM arg registers + "subq $128, %rsp\n" + "movaps %xmm0, (%rsp)\n" + "movaps %xmm1, 16(%rsp)\n" + "movaps %xmm2, 32(%rsp)\n" + "movaps %xmm3, 48(%rsp)\n" + "movaps %xmm4, 64(%rsp)\n" + "movaps %xmm5, 80(%rsp)\n" + "movaps %xmm6, 96(%rsp)\n" + "movaps %xmm7, 112(%rsp)\n" + // JIT callee + "movq %rbp, %rdi\n" // Pass prev frame and return address + "movq 8(%rbp), %rsi\n" + "call " ASMPREFIX "X86CompilationCallback2" ASMCALLSUFFIX "\n" + // Restore all XMM arg registers + "movaps 112(%rsp), %xmm7\n" + "movaps 96(%rsp), %xmm6\n" + "movaps 80(%rsp), %xmm5\n" + "movaps 64(%rsp), %xmm4\n" + "movaps 48(%rsp), %xmm3\n" + "movaps 32(%rsp), %xmm2\n" + "movaps 16(%rsp), %xmm1\n" + "movaps (%rsp), %xmm0\n" + // Restore RSP + "movq %rbp, %rsp\n" + CFI(".cfi_def_cfa_register %rsp\n") + // Restore all int arg registers + "subq $48, %rsp\n" + CFI(".cfi_adjust_cfa_offset 48\n") + "popq %r9\n" + CFI(".cfi_adjust_cfa_offset -8\n") + CFI(".cfi_restore %r9\n") + "popq %r8\n" + CFI(".cfi_adjust_cfa_offset -8\n") + CFI(".cfi_restore %r8\n") + "popq %rcx\n" + CFI(".cfi_adjust_cfa_offset -8\n") + CFI(".cfi_restore %rcx\n") + "popq %rdx\n" + CFI(".cfi_adjust_cfa_offset -8\n") + CFI(".cfi_restore %rdx\n") + "popq %rsi\n" + CFI(".cfi_adjust_cfa_offset -8\n") + CFI(".cfi_restore %rsi\n") + "popq %rdi\n" + CFI(".cfi_adjust_cfa_offset -8\n") + CFI(".cfi_restore %rdi\n") + // Restore RBP + "popq %rbp\n" + CFI(".cfi_adjust_cfa_offset -8\n") + CFI(".cfi_restore %rbp\n") + "ret\n" + CFI(".cfi_endproc\n") + SIZE(X86CompilationCallback) + ); +# else + // No inline assembler support on this platform. The routine is in external + // file. + void X86CompilationCallback(); + +# endif +#elif defined (X86_32_JIT) +# ifndef _MSC_VER + void X86CompilationCallback(void); + asm( + ".text\n" + ".align 8\n" + ".globl " ASMPREFIX "X86CompilationCallback\n" + TYPE_FUNCTION(X86CompilationCallback) + ASMPREFIX "X86CompilationCallback:\n" + CFI(".cfi_startproc\n") + "pushl %ebp\n" + CFI(".cfi_def_cfa_offset 8\n") + CFI(".cfi_offset %ebp, -8\n") + "movl %esp, %ebp\n" // Standard prologue + CFI(".cfi_def_cfa_register %ebp\n") + "pushl %eax\n" + CFI(".cfi_rel_offset %eax, 0\n") + "pushl %edx\n" // Save EAX/EDX/ECX + CFI(".cfi_rel_offset %edx, 4\n") + "pushl %ecx\n" + CFI(".cfi_rel_offset %ecx, 8\n") +# if defined(__APPLE__) + "andl $-16, %esp\n" // Align ESP on 16-byte boundary +# endif + "subl $16, %esp\n" + "movl 4(%ebp), %eax\n" // Pass prev frame and return address + "movl %eax, 4(%esp)\n" + "movl %ebp, (%esp)\n" + "call " ASMPREFIX "X86CompilationCallback2" ASMCALLSUFFIX "\n" + "movl %ebp, %esp\n" // Restore ESP + CFI(".cfi_def_cfa_register %esp\n") + "subl $12, %esp\n" + CFI(".cfi_adjust_cfa_offset 12\n") + "popl %ecx\n" + CFI(".cfi_adjust_cfa_offset -4\n") + CFI(".cfi_restore %ecx\n") + "popl %edx\n" + CFI(".cfi_adjust_cfa_offset -4\n") + CFI(".cfi_restore %edx\n") + "popl %eax\n" + CFI(".cfi_adjust_cfa_offset -4\n") + CFI(".cfi_restore %eax\n") + "popl %ebp\n" + CFI(".cfi_adjust_cfa_offset -4\n") + CFI(".cfi_restore %ebp\n") + "ret\n" + CFI(".cfi_endproc\n") + SIZE(X86CompilationCallback) + ); + + // Same as X86CompilationCallback but also saves XMM argument registers. + void X86CompilationCallback_SSE(void); + asm( + ".text\n" + ".align 8\n" + ".globl " ASMPREFIX "X86CompilationCallback_SSE\n" + TYPE_FUNCTION(X86CompilationCallback_SSE) + ASMPREFIX "X86CompilationCallback_SSE:\n" + CFI(".cfi_startproc\n") + "pushl %ebp\n" + CFI(".cfi_def_cfa_offset 8\n") + CFI(".cfi_offset %ebp, -8\n") + "movl %esp, %ebp\n" // Standard prologue + CFI(".cfi_def_cfa_register %ebp\n") + "pushl %eax\n" + CFI(".cfi_rel_offset %eax, 0\n") + "pushl %edx\n" // Save EAX/EDX/ECX + CFI(".cfi_rel_offset %edx, 4\n") + "pushl %ecx\n" + CFI(".cfi_rel_offset %ecx, 8\n") + "andl $-16, %esp\n" // Align ESP on 16-byte boundary + // Save all XMM arg registers + "subl $64, %esp\n" + // FIXME: provide frame move information for xmm registers. + // This can be tricky, because CFA register is ebp (unaligned) + // and we need to produce offsets relative to it. + "movaps %xmm0, (%esp)\n" + "movaps %xmm1, 16(%esp)\n" + "movaps %xmm2, 32(%esp)\n" + "movaps %xmm3, 48(%esp)\n" + "subl $16, %esp\n" + "movl 4(%ebp), %eax\n" // Pass prev frame and return address + "movl %eax, 4(%esp)\n" + "movl %ebp, (%esp)\n" + "call " ASMPREFIX "X86CompilationCallback2" ASMCALLSUFFIX "\n" + "addl $16, %esp\n" + "movaps 48(%esp), %xmm3\n" + CFI(".cfi_restore %xmm3\n") + "movaps 32(%esp), %xmm2\n" + CFI(".cfi_restore %xmm2\n") + "movaps 16(%esp), %xmm1\n" + CFI(".cfi_restore %xmm1\n") + "movaps (%esp), %xmm0\n" + CFI(".cfi_restore %xmm0\n") + "movl %ebp, %esp\n" // Restore ESP + CFI(".cfi_def_cfa_register esp\n") + "subl $12, %esp\n" + CFI(".cfi_adjust_cfa_offset 12\n") + "popl %ecx\n" + CFI(".cfi_adjust_cfa_offset -4\n") + CFI(".cfi_restore %ecx\n") + "popl %edx\n" + CFI(".cfi_adjust_cfa_offset -4\n") + CFI(".cfi_restore %edx\n") + "popl %eax\n" + CFI(".cfi_adjust_cfa_offset -4\n") + CFI(".cfi_restore %eax\n") + "popl %ebp\n" + CFI(".cfi_adjust_cfa_offset -4\n") + CFI(".cfi_restore %ebp\n") + "ret\n" + CFI(".cfi_endproc\n") + SIZE(X86CompilationCallback_SSE) + ); +# else + void X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr); + + _declspec(naked) void X86CompilationCallback(void) { + __asm { + push ebp + mov ebp, esp + push eax + push edx + push ecx + and esp, -16 + mov eax, dword ptr [ebp+4] + mov dword ptr [esp+4], eax + mov dword ptr [esp], ebp + call X86CompilationCallback2 + mov esp, ebp + sub esp, 12 + pop ecx + pop edx + pop eax + pop ebp + ret + } + } + +# endif // _MSC_VER + +#else // Not an i386 host + void X86CompilationCallback() { + assert(0 && "Cannot call X86CompilationCallback() on a non-x86 arch!\n"); + abort(); + } +#endif +} + +/// X86CompilationCallback2 - This is the target-specific function invoked by the +/// function stub when we did not know the real target of a call. This function +/// must locate the start of the stub or call site and pass it into the JIT +/// compiler function. +extern "C" void ATTRIBUTE_USED +X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) { + intptr_t *RetAddrLoc = &StackPtr[1]; + assert(*RetAddrLoc == RetAddr && + "Could not find return address on the stack!"); + + // It's a stub if there is an interrupt marker after the call. + bool isStub = ((unsigned char*)RetAddr)[0] == 0xCD; + + // The call instruction should have pushed the return value onto the stack... +#if defined (X86_64_JIT) + RetAddr--; // Backtrack to the reference itself... +#else + RetAddr -= 4; // Backtrack to the reference itself... +#endif + +#if 0 + DOUT << "In callback! Addr=" << (void*)RetAddr + << " ESP=" << (void*)StackPtr + << ": Resolving call to function: " + << TheVM->getFunctionReferencedName((void*)RetAddr) << "\n"; +#endif + + // Sanity check to make sure this really is a call instruction. +#if defined (X86_64_JIT) + assert(((unsigned char*)RetAddr)[-2] == 0x41 &&"Not a call instr!"); + assert(((unsigned char*)RetAddr)[-1] == 0xFF &&"Not a call instr!"); +#else + assert(((unsigned char*)RetAddr)[-1] == 0xE8 &&"Not a call instr!"); +#endif + + intptr_t NewVal = (intptr_t)JITCompilerFunction((void*)RetAddr); + + // Rewrite the call target... so that we don't end up here every time we + // execute the call. +#if defined (X86_64_JIT) + if (!isStub) + *(intptr_t *)(RetAddr - 0xa) = NewVal; +#else + *(intptr_t *)RetAddr = (intptr_t)(NewVal-RetAddr-4); +#endif + + if (isStub) { + // If this is a stub, rewrite the call into an unconditional branch + // instruction so that two return addresses are not pushed onto the stack + // when the requested function finally gets called. This also makes the + // 0xCD byte (interrupt) dead, so the marker doesn't effect anything. +#if defined (X86_64_JIT) + // If the target address is within 32-bit range of the stub, use a + // PC-relative branch instead of loading the actual address. (This is + // considerably shorter than the 64-bit immediate load already there.) + // We assume here intptr_t is 64 bits. + intptr_t diff = NewVal-RetAddr+7; + if (diff >= -2147483648LL && diff <= 2147483647LL) { + *(unsigned char*)(RetAddr-0xc) = 0xE9; + *(intptr_t *)(RetAddr-0xb) = diff & 0xffffffff; + } else { + *(intptr_t *)(RetAddr - 0xa) = NewVal; + ((unsigned char*)RetAddr)[0] = (2 | (4 << 3) | (3 << 6)); + } +#else + ((unsigned char*)RetAddr)[-1] = 0xE9; +#endif + } + + // Change the return address to reexecute the call instruction... +#if defined (X86_64_JIT) + *RetAddrLoc -= 0xd; +#else + *RetAddrLoc -= 5; +#endif +} + +TargetJITInfo::LazyResolverFn +X86JITInfo::getLazyResolverFunction(JITCompilerFn F) { + JITCompilerFunction = F; + +#if defined (X86_32_JIT) && !defined (_MSC_VER) + unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0; + union { + unsigned u[3]; + char c[12]; + } text; + + if (!X86::GetCpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1)) { + // FIXME: support for AMD family of processors. + if (memcmp(text.c, "GenuineIntel", 12) == 0) { + X86::GetCpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX); + if ((EDX >> 25) & 0x1) + return X86CompilationCallback_SSE; + } + } +#endif + + return X86CompilationCallback; +} + +void *X86JITInfo::emitGlobalValueIndirectSym(const GlobalValue* GV, void *ptr, + JITCodeEmitter &JCE) { +#if defined (X86_64_JIT) + JCE.startGVStub(GV, 8, 8); + JCE.emitWordLE((unsigned)(intptr_t)ptr); + JCE.emitWordLE((unsigned)(((intptr_t)ptr) >> 32)); +#else + JCE.startGVStub(GV, 4, 4); + JCE.emitWordLE((intptr_t)ptr); +#endif + return JCE.finishGVStub(GV); +} + +void *X86JITInfo::emitFunctionStub(const Function* F, void *Fn, + JITCodeEmitter &JCE) { + // Note, we cast to intptr_t here to silence a -pedantic warning that + // complains about casting a function pointer to a normal pointer. +#if defined (X86_32_JIT) && !defined (_MSC_VER) + bool NotCC = (Fn != (void*)(intptr_t)X86CompilationCallback && + Fn != (void*)(intptr_t)X86CompilationCallback_SSE); +#else + bool NotCC = Fn != (void*)(intptr_t)X86CompilationCallback; +#endif + if (NotCC) { +#if defined (X86_64_JIT) + JCE.startGVStub(F, 13, 4); + JCE.emitByte(0x49); // REX prefix + JCE.emitByte(0xB8+2); // movabsq r10 + JCE.emitWordLE((unsigned)(intptr_t)Fn); + JCE.emitWordLE((unsigned)(((intptr_t)Fn) >> 32)); + JCE.emitByte(0x41); // REX prefix + JCE.emitByte(0xFF); // jmpq *r10 + JCE.emitByte(2 | (4 << 3) | (3 << 6)); +#else + JCE.startGVStub(F, 5, 4); + JCE.emitByte(0xE9); + JCE.emitWordLE((intptr_t)Fn-JCE.getCurrentPCValue()-4); +#endif + return JCE.finishGVStub(F); + } + +#if defined (X86_64_JIT) + JCE.startGVStub(F, 14, 4); + JCE.emitByte(0x49); // REX prefix + JCE.emitByte(0xB8+2); // movabsq r10 + JCE.emitWordLE((unsigned)(intptr_t)Fn); + JCE.emitWordLE((unsigned)(((intptr_t)Fn) >> 32)); + JCE.emitByte(0x41); // REX prefix + JCE.emitByte(0xFF); // callq *r10 + JCE.emitByte(2 | (2 << 3) | (3 << 6)); +#else + JCE.startGVStub(F, 6, 4); + JCE.emitByte(0xE8); // Call with 32 bit pc-rel destination... + + JCE.emitWordLE((intptr_t)Fn-JCE.getCurrentPCValue()-4); +#endif + + JCE.emitByte(0xCD); // Interrupt - Just a marker identifying the stub! + return JCE.finishGVStub(F); +} + +void X86JITInfo::emitFunctionStubAtAddr(const Function* F, void *Fn, void *Stub, + JITCodeEmitter &JCE) { + // Note, we cast to intptr_t here to silence a -pedantic warning that + // complains about casting a function pointer to a normal pointer. + JCE.startGVStub(F, Stub, 5); + JCE.emitByte(0xE9); +#if defined (X86_64_JIT) + assert(((((intptr_t)Fn-JCE.getCurrentPCValue()-5) << 32) >> 32) == + ((intptr_t)Fn-JCE.getCurrentPCValue()-5) + && "PIC displacement does not fit in displacement field!"); +#endif + JCE.emitWordLE((intptr_t)Fn-JCE.getCurrentPCValue()-4); + JCE.finishGVStub(F); +} + +/// getPICJumpTableEntry - Returns the value of the jumptable entry for the +/// specific basic block. +uintptr_t X86JITInfo::getPICJumpTableEntry(uintptr_t BB, uintptr_t Entry) { +#if defined(X86_64_JIT) + return BB - Entry; +#else + return BB - PICBase; +#endif +} + +/// relocate - Before the JIT can run a block of code that has been emitted, +/// it must rewrite the code to contain the actual addresses of any +/// referenced global symbols. +void X86JITInfo::relocate(void *Function, MachineRelocation *MR, + unsigned NumRelocs, unsigned char* GOTBase) { + for (unsigned i = 0; i != NumRelocs; ++i, ++MR) { + void *RelocPos = (char*)Function + MR->getMachineCodeOffset(); + intptr_t ResultPtr = (intptr_t)MR->getResultPointer(); + switch ((X86::RelocationType)MR->getRelocationType()) { + case X86::reloc_pcrel_word: { + // PC relative relocation, add the relocated value to the value already in + // memory, after we adjust it for where the PC is. + ResultPtr = ResultPtr -(intptr_t)RelocPos - 4 - MR->getConstantVal(); + *((unsigned*)RelocPos) += (unsigned)ResultPtr; + break; + } + case X86::reloc_picrel_word: { + // PIC base relative relocation, add the relocated value to the value + // already in memory, after we adjust it for where the PIC base is. + ResultPtr = ResultPtr - ((intptr_t)Function + MR->getConstantVal()); + *((unsigned*)RelocPos) += (unsigned)ResultPtr; + break; + } + case X86::reloc_absolute_word: + // Absolute relocation, just add the relocated value to the value already + // in memory. + *((unsigned*)RelocPos) += (unsigned)ResultPtr; + break; + case X86::reloc_absolute_dword: + *((intptr_t*)RelocPos) += ResultPtr; + break; + } + } +} + +char* X86JITInfo::allocateThreadLocalMemory(size_t size) { +#if defined(X86_32_JIT) && !defined(__APPLE__) && !defined(_MSC_VER) + TLSOffset -= size; + return TLSOffset; +#else + assert(0 && "Cannot allocate thread local storage on this arch!\n"); + return 0; +#endif +} diff --git a/lib/Target/X86/X86JITInfo.h b/lib/Target/X86/X86JITInfo.h new file mode 100644 index 000000000000..6a4e2148a5aa --- /dev/null +++ b/lib/Target/X86/X86JITInfo.h @@ -0,0 +1,84 @@ +//===- X86JITInfo.h - X86 implementation of the JIT interface --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the X86 implementation of the TargetJITInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef X86JITINFO_H +#define X86JITINFO_H + +#include "llvm/Function.h" +#include "llvm/CodeGen/JITCodeEmitter.h" +#include "llvm/Target/TargetJITInfo.h" + +namespace llvm { + class X86TargetMachine; + + class X86JITInfo : public TargetJITInfo { + X86TargetMachine &TM; + uintptr_t PICBase; + char* TLSOffset; + public: + explicit X86JITInfo(X86TargetMachine &tm) : TM(tm) { + useGOT = 0; + TLSOffset = 0; + } + + /// replaceMachineCodeForFunction - Make it so that calling the function + /// whose machine code is at OLD turns into a call to NEW, perhaps by + /// overwriting OLD with a branch to NEW. This is used for self-modifying + /// code. + /// + virtual void replaceMachineCodeForFunction(void *Old, void *New); + + /// emitGlobalValueIndirectSym - Use the specified JITCodeEmitter object + /// to emit an indirect symbol which contains the address of the specified + /// ptr. + virtual void *emitGlobalValueIndirectSym(const GlobalValue* GV, void *ptr, + JITCodeEmitter &JCE); + + /// emitFunctionStub - Use the specified JITCodeEmitter object to emit a + /// small native function that simply calls the function at the specified + /// address. + virtual void *emitFunctionStub(const Function* F, void *Fn, + JITCodeEmitter &JCE); + + /// emitFunctionStubAtAddr - Use the specified JITCodeEmitter object to + /// emit a small native function that simply calls Fn. Emit the stub into + /// the supplied buffer. + virtual void emitFunctionStubAtAddr(const Function* F, void *Fn, + void *Buffer, JITCodeEmitter &JCE); + + /// getPICJumpTableEntry - Returns the value of the jumptable entry for the + /// specific basic block. + virtual uintptr_t getPICJumpTableEntry(uintptr_t BB, uintptr_t JTBase); + + /// getLazyResolverFunction - Expose the lazy resolver to the JIT. + virtual LazyResolverFn getLazyResolverFunction(JITCompilerFn); + + /// relocate - Before the JIT can run a block of code that has been emitted, + /// it must rewrite the code to contain the actual addresses of any + /// referenced global symbols. + virtual void relocate(void *Function, MachineRelocation *MR, + unsigned NumRelocs, unsigned char* GOTBase); + + /// allocateThreadLocalMemory - Each target has its own way of + /// handling thread local variables. This method returns a value only + /// meaningful to the target. + virtual char* allocateThreadLocalMemory(size_t size); + + /// setPICBase / getPICBase - Getter / setter of PICBase, used to compute + /// PIC jumptable entry. + void setPICBase(uintptr_t Base) { PICBase = Base; } + uintptr_t getPICBase() const { return PICBase; } + }; +} + +#endif diff --git a/lib/Target/X86/X86MachineFunctionInfo.h b/lib/Target/X86/X86MachineFunctionInfo.h new file mode 100644 index 000000000000..8a5ac2c9a85c --- /dev/null +++ b/lib/Target/X86/X86MachineFunctionInfo.h @@ -0,0 +1,112 @@ +//====- X86MachineFuctionInfo.h - X86 machine function info -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares X86-specific per-machine-function information. +// +//===----------------------------------------------------------------------===// + +#ifndef X86MACHINEFUNCTIONINFO_H +#define X86MACHINEFUNCTIONINFO_H + +#include "llvm/CodeGen/MachineFunction.h" + +namespace llvm { + +enum NameDecorationStyle { + None, + StdCall, + FastCall +}; + +/// X86MachineFunctionInfo - This class is derived from MachineFunction and +/// contains private X86 target-specific information for each MachineFunction. +class X86MachineFunctionInfo : public MachineFunctionInfo { + /// ForceFramePointer - True if the function is required to use of frame + /// pointer for reasons other than it containing dynamic allocation or + /// that FP eliminatation is turned off. For example, Cygwin main function + /// contains stack pointer re-alignment code which requires FP. + bool ForceFramePointer; + + /// CalleeSavedFrameSize - Size of the callee-saved register portion of the + /// stack frame in bytes. + unsigned CalleeSavedFrameSize; + + /// BytesToPopOnReturn - Number of bytes function pops on return. + /// Used on windows platform for stdcall & fastcall name decoration + unsigned BytesToPopOnReturn; + + /// DecorationStyle - If the function requires additional name decoration, + /// DecorationStyle holds the right way to do so. + NameDecorationStyle DecorationStyle; + + /// ReturnAddrIndex - FrameIndex for return slot. + int ReturnAddrIndex; + + /// TailCallReturnAddrDelta - Delta the ReturnAddr stack slot is moved + /// Used for creating an area before the register spill area on the stack + /// the returnaddr can be savely move to this area + int TailCallReturnAddrDelta; + + /// SRetReturnReg - Some subtargets require that sret lowering includes + /// returning the value of the returned struct in a register. This field + /// holds the virtual register into which the sret argument is passed. + unsigned SRetReturnReg; + + /// GlobalBaseReg - keeps track of the virtual register initialized for + /// use as the global base register. This is used for PIC in some PIC + /// relocation models. + unsigned GlobalBaseReg; + +public: + X86MachineFunctionInfo() : ForceFramePointer(false), + CalleeSavedFrameSize(0), + BytesToPopOnReturn(0), + DecorationStyle(None), + ReturnAddrIndex(0), + TailCallReturnAddrDelta(0), + SRetReturnReg(0), + GlobalBaseReg(0) {} + + X86MachineFunctionInfo(MachineFunction &MF) : ForceFramePointer(false), + CalleeSavedFrameSize(0), + BytesToPopOnReturn(0), + DecorationStyle(None), + ReturnAddrIndex(0), + TailCallReturnAddrDelta(0), + SRetReturnReg(0), + GlobalBaseReg(0) {} + + bool getForceFramePointer() const { return ForceFramePointer;} + void setForceFramePointer(bool forceFP) { ForceFramePointer = forceFP; } + + unsigned getCalleeSavedFrameSize() const { return CalleeSavedFrameSize; } + void setCalleeSavedFrameSize(unsigned bytes) { CalleeSavedFrameSize = bytes; } + + unsigned getBytesToPopOnReturn() const { return BytesToPopOnReturn; } + void setBytesToPopOnReturn (unsigned bytes) { BytesToPopOnReturn = bytes;} + + NameDecorationStyle getDecorationStyle() const { return DecorationStyle; } + void setDecorationStyle(NameDecorationStyle style) { DecorationStyle = style;} + + int getRAIndex() const { return ReturnAddrIndex; } + void setRAIndex(int Index) { ReturnAddrIndex = Index; } + + int getTCReturnAddrDelta() const { return TailCallReturnAddrDelta; } + void setTCReturnAddrDelta(int delta) {TailCallReturnAddrDelta = delta;} + + unsigned getSRetReturnReg() const { return SRetReturnReg; } + void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; } + + unsigned getGlobalBaseReg() const { return GlobalBaseReg; } + void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; } +}; + +} // End llvm namespace + +#endif diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp new file mode 100644 index 000000000000..5af1fb17818d --- /dev/null +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -0,0 +1,1280 @@ +//===- X86RegisterInfo.cpp - X86 Register Information -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the X86 implementation of the TargetRegisterInfo class. +// This file is responsible for the frame pointer elimination optimization +// on X86. +// +//===----------------------------------------------------------------------===// + +#include "X86.h" +#include "X86RegisterInfo.h" +#include "X86InstrBuilder.h" +#include "X86MachineFunctionInfo.h" +#include "X86Subtarget.h" +#include "X86TargetMachine.h" +#include "llvm/Constants.h" +#include "llvm/Function.h" +#include "llvm/Type.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineLocation.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Compiler.h" +using namespace llvm; + +X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm, + const TargetInstrInfo &tii) + : X86GenRegisterInfo(tm.getSubtarget().is64Bit() ? + X86::ADJCALLSTACKDOWN64 : + X86::ADJCALLSTACKDOWN32, + tm.getSubtarget().is64Bit() ? + X86::ADJCALLSTACKUP64 : + X86::ADJCALLSTACKUP32), + TM(tm), TII(tii) { + // Cache some information. + const X86Subtarget *Subtarget = &TM.getSubtarget(); + Is64Bit = Subtarget->is64Bit(); + IsWin64 = Subtarget->isTargetWin64(); + StackAlign = TM.getFrameInfo()->getStackAlignment(); + if (Is64Bit) { + SlotSize = 8; + StackPtr = X86::RSP; + FramePtr = X86::RBP; + } else { + SlotSize = 4; + StackPtr = X86::ESP; + FramePtr = X86::EBP; + } +} + +// getDwarfRegNum - This function maps LLVM register identifiers to the +// Dwarf specific numbering, used in debug info and exception tables. + +int X86RegisterInfo::getDwarfRegNum(unsigned RegNo, bool isEH) const { + const X86Subtarget *Subtarget = &TM.getSubtarget(); + unsigned Flavour = DWARFFlavour::X86_64; + if (!Subtarget->is64Bit()) { + if (Subtarget->isTargetDarwin()) { + if (isEH) + Flavour = DWARFFlavour::X86_32_DarwinEH; + else + Flavour = DWARFFlavour::X86_32_Generic; + } else if (Subtarget->isTargetCygMing()) { + // Unsupported by now, just quick fallback + Flavour = DWARFFlavour::X86_32_Generic; + } else { + Flavour = DWARFFlavour::X86_32_Generic; + } + } + + return X86GenRegisterInfo::getDwarfRegNumFull(RegNo, Flavour); +} + +// getX86RegNum - This function maps LLVM register identifiers to their X86 +// specific numbering, which is used in various places encoding instructions. +// +unsigned X86RegisterInfo::getX86RegNum(unsigned RegNo) { + switch(RegNo) { + case X86::RAX: case X86::EAX: case X86::AX: case X86::AL: return N86::EAX; + case X86::RCX: case X86::ECX: case X86::CX: case X86::CL: return N86::ECX; + case X86::RDX: case X86::EDX: case X86::DX: case X86::DL: return N86::EDX; + case X86::RBX: case X86::EBX: case X86::BX: case X86::BL: return N86::EBX; + case X86::RSP: case X86::ESP: case X86::SP: case X86::SPL: case X86::AH: + return N86::ESP; + case X86::RBP: case X86::EBP: case X86::BP: case X86::BPL: case X86::CH: + return N86::EBP; + case X86::RSI: case X86::ESI: case X86::SI: case X86::SIL: case X86::DH: + return N86::ESI; + case X86::RDI: case X86::EDI: case X86::DI: case X86::DIL: case X86::BH: + return N86::EDI; + + case X86::R8: case X86::R8D: case X86::R8W: case X86::R8B: + return N86::EAX; + case X86::R9: case X86::R9D: case X86::R9W: case X86::R9B: + return N86::ECX; + case X86::R10: case X86::R10D: case X86::R10W: case X86::R10B: + return N86::EDX; + case X86::R11: case X86::R11D: case X86::R11W: case X86::R11B: + return N86::EBX; + case X86::R12: case X86::R12D: case X86::R12W: case X86::R12B: + return N86::ESP; + case X86::R13: case X86::R13D: case X86::R13W: case X86::R13B: + return N86::EBP; + case X86::R14: case X86::R14D: case X86::R14W: case X86::R14B: + return N86::ESI; + case X86::R15: case X86::R15D: case X86::R15W: case X86::R15B: + return N86::EDI; + + case X86::ST0: case X86::ST1: case X86::ST2: case X86::ST3: + case X86::ST4: case X86::ST5: case X86::ST6: case X86::ST7: + return RegNo-X86::ST0; + + case X86::XMM0: case X86::XMM8: case X86::MM0: + return 0; + case X86::XMM1: case X86::XMM9: case X86::MM1: + return 1; + case X86::XMM2: case X86::XMM10: case X86::MM2: + return 2; + case X86::XMM3: case X86::XMM11: case X86::MM3: + return 3; + case X86::XMM4: case X86::XMM12: case X86::MM4: + return 4; + case X86::XMM5: case X86::XMM13: case X86::MM5: + return 5; + case X86::XMM6: case X86::XMM14: case X86::MM6: + return 6; + case X86::XMM7: case X86::XMM15: case X86::MM7: + return 7; + + default: + assert(isVirtualRegister(RegNo) && "Unknown physical register!"); + assert(0 && "Register allocator hasn't allocated reg correctly yet!"); + return 0; + } +} + +const TargetRegisterClass *X86RegisterInfo::getPointerRegClass() const { + const X86Subtarget *Subtarget = &TM.getSubtarget(); + if (Subtarget->is64Bit()) + return &X86::GR64RegClass; + else + return &X86::GR32RegClass; +} + +const TargetRegisterClass * +X86RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const { + if (RC == &X86::CCRRegClass) { + if (Is64Bit) + return &X86::GR64RegClass; + else + return &X86::GR32RegClass; + } + return NULL; +} + +const unsigned * +X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { + bool callsEHReturn = false; + + if (MF) { + const MachineFrameInfo *MFI = MF->getFrameInfo(); + const MachineModuleInfo *MMI = MFI->getMachineModuleInfo(); + callsEHReturn = (MMI ? MMI->callsEHReturn() : false); + } + + static const unsigned CalleeSavedRegs32Bit[] = { + X86::ESI, X86::EDI, X86::EBX, X86::EBP, 0 + }; + + static const unsigned CalleeSavedRegs32EHRet[] = { + X86::EAX, X86::EDX, X86::ESI, X86::EDI, X86::EBX, X86::EBP, 0 + }; + + static const unsigned CalleeSavedRegs64Bit[] = { + X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0 + }; + + static const unsigned CalleeSavedRegs64EHRet[] = { + X86::RAX, X86::RDX, X86::RBX, X86::R12, + X86::R13, X86::R14, X86::R15, X86::RBP, 0 + }; + + static const unsigned CalleeSavedRegsWin64[] = { + X86::RBX, X86::RBP, X86::RDI, X86::RSI, + X86::R12, X86::R13, X86::R14, X86::R15, + X86::XMM6, X86::XMM7, X86::XMM8, X86::XMM9, + X86::XMM10, X86::XMM11, X86::XMM12, X86::XMM13, + X86::XMM14, X86::XMM15, 0 + }; + + if (Is64Bit) { + if (IsWin64) + return CalleeSavedRegsWin64; + else + return (callsEHReturn ? CalleeSavedRegs64EHRet : CalleeSavedRegs64Bit); + } else { + return (callsEHReturn ? CalleeSavedRegs32EHRet : CalleeSavedRegs32Bit); + } +} + +const TargetRegisterClass* const* +X86RegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { + bool callsEHReturn = false; + + if (MF) { + const MachineFrameInfo *MFI = MF->getFrameInfo(); + const MachineModuleInfo *MMI = MFI->getMachineModuleInfo(); + callsEHReturn = (MMI ? MMI->callsEHReturn() : false); + } + + static const TargetRegisterClass * const CalleeSavedRegClasses32Bit[] = { + &X86::GR32RegClass, &X86::GR32RegClass, + &X86::GR32RegClass, &X86::GR32RegClass, 0 + }; + static const TargetRegisterClass * const CalleeSavedRegClasses32EHRet[] = { + &X86::GR32RegClass, &X86::GR32RegClass, + &X86::GR32RegClass, &X86::GR32RegClass, + &X86::GR32RegClass, &X86::GR32RegClass, 0 + }; + static const TargetRegisterClass * const CalleeSavedRegClasses64Bit[] = { + &X86::GR64RegClass, &X86::GR64RegClass, + &X86::GR64RegClass, &X86::GR64RegClass, + &X86::GR64RegClass, &X86::GR64RegClass, 0 + }; + static const TargetRegisterClass * const CalleeSavedRegClasses64EHRet[] = { + &X86::GR64RegClass, &X86::GR64RegClass, + &X86::GR64RegClass, &X86::GR64RegClass, + &X86::GR64RegClass, &X86::GR64RegClass, + &X86::GR64RegClass, &X86::GR64RegClass, 0 + }; + static const TargetRegisterClass * const CalleeSavedRegClassesWin64[] = { + &X86::GR64RegClass, &X86::GR64RegClass, + &X86::GR64RegClass, &X86::GR64RegClass, + &X86::GR64RegClass, &X86::GR64RegClass, + &X86::GR64RegClass, &X86::GR64RegClass, + &X86::VR128RegClass, &X86::VR128RegClass, + &X86::VR128RegClass, &X86::VR128RegClass, + &X86::VR128RegClass, &X86::VR128RegClass, + &X86::VR128RegClass, &X86::VR128RegClass, + &X86::VR128RegClass, &X86::VR128RegClass, 0 + }; + + if (Is64Bit) { + if (IsWin64) + return CalleeSavedRegClassesWin64; + else + return (callsEHReturn ? + CalleeSavedRegClasses64EHRet : CalleeSavedRegClasses64Bit); + } else { + return (callsEHReturn ? + CalleeSavedRegClasses32EHRet : CalleeSavedRegClasses32Bit); + } +} + +BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { + BitVector Reserved(getNumRegs()); + // Set the stack-pointer register and its aliases as reserved. + Reserved.set(X86::RSP); + Reserved.set(X86::ESP); + Reserved.set(X86::SP); + Reserved.set(X86::SPL); + // Set the frame-pointer register and its aliases as reserved if needed. + if (hasFP(MF)) { + Reserved.set(X86::RBP); + Reserved.set(X86::EBP); + Reserved.set(X86::BP); + Reserved.set(X86::BPL); + } + // Mark the x87 stack registers as reserved, since they don't + // behave normally with respect to liveness. We don't fully + // model the effects of x87 stack pushes and pops after + // stackification. + Reserved.set(X86::ST0); + Reserved.set(X86::ST1); + Reserved.set(X86::ST2); + Reserved.set(X86::ST3); + Reserved.set(X86::ST4); + Reserved.set(X86::ST5); + Reserved.set(X86::ST6); + Reserved.set(X86::ST7); + return Reserved; +} + +//===----------------------------------------------------------------------===// +// Stack Frame Processing methods +//===----------------------------------------------------------------------===// + +static unsigned calculateMaxStackAlignment(const MachineFrameInfo *FFI) { + unsigned MaxAlign = 0; + for (int i = FFI->getObjectIndexBegin(), + e = FFI->getObjectIndexEnd(); i != e; ++i) { + if (FFI->isDeadObjectIndex(i)) + continue; + unsigned Align = FFI->getObjectAlignment(i); + MaxAlign = std::max(MaxAlign, Align); + } + + return MaxAlign; +} + +// hasFP - Return true if the specified function should have a dedicated frame +// pointer register. This is true if the function has variable sized allocas or +// if frame pointer elimination is disabled. +// +bool X86RegisterInfo::hasFP(const MachineFunction &MF) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + const MachineModuleInfo *MMI = MFI->getMachineModuleInfo(); + + return (NoFramePointerElim || + needsStackRealignment(MF) || + MFI->hasVarSizedObjects() || + MFI->isFrameAddressTaken() || + MF.getInfo()->getForceFramePointer() || + (MMI && MMI->callsUnwindInit())); +} + +bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + + // FIXME: Currently we don't support stack realignment for functions with + // variable-sized allocas + return (RealignStack && + (MFI->getMaxAlignment() > StackAlign && + !MFI->hasVarSizedObjects())); +} + +bool X86RegisterInfo::hasReservedCallFrame(MachineFunction &MF) const { + return !MF.getFrameInfo()->hasVarSizedObjects(); +} + +int +X86RegisterInfo::getFrameIndexOffset(MachineFunction &MF, int FI) const { + int Offset = MF.getFrameInfo()->getObjectOffset(FI) + SlotSize; + uint64_t StackSize = MF.getFrameInfo()->getStackSize(); + + if (needsStackRealignment(MF)) { + if (FI < 0) + // Skip the saved EBP + Offset += SlotSize; + else { + unsigned Align = MF.getFrameInfo()->getObjectAlignment(FI); + assert( (-(Offset + StackSize)) % Align == 0); + Align = 0; + return Offset + StackSize; + } + + // FIXME: Support tail calls + } else { + if (!hasFP(MF)) + return Offset + StackSize; + + // Skip the saved EBP + Offset += SlotSize; + + // Skip the RETADDR move area + X86MachineFunctionInfo *X86FI = MF.getInfo(); + int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); + if (TailCallReturnAddrDelta < 0) Offset -= TailCallReturnAddrDelta; + } + + return Offset; +} + +void X86RegisterInfo:: +eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + if (!hasReservedCallFrame(MF)) { + // If the stack pointer can be changed after prologue, turn the + // adjcallstackup instruction into a 'sub ESP, ' and the + // adjcallstackdown instruction into 'add ESP, ' + // TODO: consider using push / pop instead of sub + store / add + MachineInstr *Old = I; + uint64_t Amount = Old->getOperand(0).getImm(); + if (Amount != 0) { + // We need to keep the stack aligned properly. To do this, we round the + // amount of space needed for the outgoing arguments up to the next + // alignment boundary. + Amount = (Amount+StackAlign-1)/StackAlign*StackAlign; + + MachineInstr *New = 0; + if (Old->getOpcode() == getCallFrameSetupOpcode()) { + New = BuildMI(MF, Old->getDebugLoc(), + TII.get(Is64Bit ? X86::SUB64ri32 : X86::SUB32ri), + StackPtr).addReg(StackPtr).addImm(Amount); + } else { + assert(Old->getOpcode() == getCallFrameDestroyOpcode()); + // factor out the amount the callee already popped. + uint64_t CalleeAmt = Old->getOperand(1).getImm(); + Amount -= CalleeAmt; + if (Amount) { + unsigned Opc = (Amount < 128) ? + (Is64Bit ? X86::ADD64ri8 : X86::ADD32ri8) : + (Is64Bit ? X86::ADD64ri32 : X86::ADD32ri); + New = BuildMI(MF, Old->getDebugLoc(), TII.get(Opc), StackPtr) + .addReg(StackPtr).addImm(Amount); + } + } + + if (New) { + // The EFLAGS implicit def is dead. + New->getOperand(3).setIsDead(); + + // Replace the pseudo instruction with a new instruction... + MBB.insert(I, New); + } + } + } else if (I->getOpcode() == getCallFrameDestroyOpcode()) { + // If we are performing frame pointer elimination and if the callee pops + // something off the stack pointer, add it back. We do this until we have + // more advanced stack pointer tracking ability. + if (uint64_t CalleeAmt = I->getOperand(1).getImm()) { + unsigned Opc = (CalleeAmt < 128) ? + (Is64Bit ? X86::SUB64ri8 : X86::SUB32ri8) : + (Is64Bit ? X86::SUB64ri32 : X86::SUB32ri); + MachineInstr *Old = I; + MachineInstr *New = + BuildMI(MF, Old->getDebugLoc(), TII.get(Opc), + StackPtr).addReg(StackPtr).addImm(CalleeAmt); + // The EFLAGS implicit def is dead. + New->getOperand(3).setIsDead(); + + MBB.insert(I, New); + } + } + + MBB.erase(I); +} + +void X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, RegScavenger *RS) const{ + assert(SPAdj == 0 && "Unexpected"); + + unsigned i = 0; + MachineInstr &MI = *II; + MachineFunction &MF = *MI.getParent()->getParent(); + while (!MI.getOperand(i).isFI()) { + ++i; + assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!"); + } + + int FrameIndex = MI.getOperand(i).getIndex(); + + unsigned BasePtr; + if (needsStackRealignment(MF)) + BasePtr = (FrameIndex < 0 ? FramePtr : StackPtr); + else + BasePtr = (hasFP(MF) ? FramePtr : StackPtr); + + // This must be part of a four operand memory reference. Replace the + // FrameIndex with base register with EBP. Add an offset to the offset. + MI.getOperand(i).ChangeToRegister(BasePtr, false); + + // Now add the frame object offset to the offset from EBP. + if (MI.getOperand(i+3).isImm()) { + // Offset is a 32-bit integer. + int Offset = getFrameIndexOffset(MF, FrameIndex) + + (int)(MI.getOperand(i+3).getImm()); + + MI.getOperand(i+3).ChangeToImmediate(Offset); + } else { + // Offset is symbolic. This is extremely rare. + uint64_t Offset = getFrameIndexOffset(MF, FrameIndex) + + (uint64_t)MI.getOperand(i+3).getOffset(); + MI.getOperand(i+3).setOffset(Offset); + } +} + +void +X86RegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, + RegScavenger *RS) const { + MachineFrameInfo *FFI = MF.getFrameInfo(); + + // Calculate and set max stack object alignment early, so we can decide + // whether we will need stack realignment (and thus FP). + unsigned MaxAlign = std::max(FFI->getMaxAlignment(), + calculateMaxStackAlignment(FFI)); + + FFI->setMaxAlignment(MaxAlign); +} + +void +X86RegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF) const{ + X86MachineFunctionInfo *X86FI = MF.getInfo(); + int32_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); + if (TailCallReturnAddrDelta < 0) { + // create RETURNADDR area + // arg + // arg + // RETADDR + // { ... + // RETADDR area + // ... + // } + // [EBP] + MF.getFrameInfo()-> + CreateFixedObject(-TailCallReturnAddrDelta, + (-1*SlotSize)+TailCallReturnAddrDelta); + } + if (hasFP(MF)) { + assert((TailCallReturnAddrDelta <= 0) && + "The Delta should always be zero or negative"); + // Create a frame entry for the EBP register that must be saved. + int FrameIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, + (int)SlotSize * -2+ + TailCallReturnAddrDelta); + assert(FrameIdx == MF.getFrameInfo()->getObjectIndexBegin() && + "Slot for EBP register must be last in order to be found!"); + FrameIdx = 0; + } +} + +/// emitSPUpdate - Emit a series of instructions to increment / decrement the +/// stack pointer by a constant value. +static +void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, + unsigned StackPtr, int64_t NumBytes, bool Is64Bit, + const TargetInstrInfo &TII) { + bool isSub = NumBytes < 0; + uint64_t Offset = isSub ? -NumBytes : NumBytes; + unsigned Opc = isSub + ? ((Offset < 128) ? + (Is64Bit ? X86::SUB64ri8 : X86::SUB32ri8) : + (Is64Bit ? X86::SUB64ri32 : X86::SUB32ri)) + : ((Offset < 128) ? + (Is64Bit ? X86::ADD64ri8 : X86::ADD32ri8) : + (Is64Bit ? X86::ADD64ri32 : X86::ADD32ri)); + uint64_t Chunk = (1LL << 31) - 1; + DebugLoc DL = (MBBI != MBB.end() ? MBBI->getDebugLoc() : + DebugLoc::getUnknownLoc()); + + while (Offset) { + uint64_t ThisVal = (Offset > Chunk) ? Chunk : Offset; + MachineInstr *MI = + BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr) + .addReg(StackPtr).addImm(ThisVal); + // The EFLAGS implicit def is dead. + MI->getOperand(3).setIsDead(); + Offset -= ThisVal; + } +} + +// mergeSPUpdatesUp - Merge two stack-manipulating instructions upper iterator. +static +void mergeSPUpdatesUp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, + unsigned StackPtr, uint64_t *NumBytes = NULL) { + if (MBBI == MBB.begin()) return; + + MachineBasicBlock::iterator PI = prior(MBBI); + unsigned Opc = PI->getOpcode(); + if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 || + Opc == X86::ADD32ri || Opc == X86::ADD32ri8) && + PI->getOperand(0).getReg() == StackPtr) { + if (NumBytes) + *NumBytes += PI->getOperand(2).getImm(); + MBB.erase(PI); + } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 || + Opc == X86::SUB32ri || Opc == X86::SUB32ri8) && + PI->getOperand(0).getReg() == StackPtr) { + if (NumBytes) + *NumBytes -= PI->getOperand(2).getImm(); + MBB.erase(PI); + } +} + +// mergeSPUpdatesUp - Merge two stack-manipulating instructions lower iterator. +static +void mergeSPUpdatesDown(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, + unsigned StackPtr, uint64_t *NumBytes = NULL) { + return; + + if (MBBI == MBB.end()) return; + + MachineBasicBlock::iterator NI = next(MBBI); + if (NI == MBB.end()) return; + + unsigned Opc = NI->getOpcode(); + if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 || + Opc == X86::ADD32ri || Opc == X86::ADD32ri8) && + NI->getOperand(0).getReg() == StackPtr) { + if (NumBytes) + *NumBytes -= NI->getOperand(2).getImm(); + MBB.erase(NI); + MBBI = NI; + } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 || + Opc == X86::SUB32ri || Opc == X86::SUB32ri8) && + NI->getOperand(0).getReg() == StackPtr) { + if (NumBytes) + *NumBytes += NI->getOperand(2).getImm(); + MBB.erase(NI); + MBBI = NI; + } +} + +/// mergeSPUpdates - Checks the instruction before/after the passed +/// instruction. If it is an ADD/SUB instruction it is deleted +/// argument and the stack adjustment is returned as a positive value for ADD +/// and a negative for SUB. +static int mergeSPUpdates(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, + unsigned StackPtr, + bool doMergeWithPrevious) { + + if ((doMergeWithPrevious && MBBI == MBB.begin()) || + (!doMergeWithPrevious && MBBI == MBB.end())) + return 0; + + int Offset = 0; + + MachineBasicBlock::iterator PI = doMergeWithPrevious ? prior(MBBI) : MBBI; + MachineBasicBlock::iterator NI = doMergeWithPrevious ? 0 : next(MBBI); + unsigned Opc = PI->getOpcode(); + if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 || + Opc == X86::ADD32ri || Opc == X86::ADD32ri8) && + PI->getOperand(0).getReg() == StackPtr){ + Offset += PI->getOperand(2).getImm(); + MBB.erase(PI); + if (!doMergeWithPrevious) MBBI = NI; + } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 || + Opc == X86::SUB32ri || Opc == X86::SUB32ri8) && + PI->getOperand(0).getReg() == StackPtr) { + Offset -= PI->getOperand(2).getImm(); + MBB.erase(PI); + if (!doMergeWithPrevious) MBBI = NI; + } + + return Offset; +} + +void X86RegisterInfo::emitFrameMoves(MachineFunction &MF, + unsigned FrameLabelId, + unsigned ReadyLabelId) const { + MachineFrameInfo *MFI = MF.getFrameInfo(); + MachineModuleInfo *MMI = MFI->getMachineModuleInfo(); + if (!MMI) + return; + + uint64_t StackSize = MFI->getStackSize(); + std::vector &Moves = MMI->getFrameMoves(); + const TargetData *TD = MF.getTarget().getTargetData(); + + // Calculate amount of bytes used for return address storing + int stackGrowth = + (MF.getTarget().getFrameInfo()->getStackGrowthDirection() == + TargetFrameInfo::StackGrowsUp ? + TD->getPointerSize() : -TD->getPointerSize()); + + if (StackSize) { + // Show update of SP. + if (hasFP(MF)) { + // Adjust SP + MachineLocation SPDst(MachineLocation::VirtualFP); + MachineLocation SPSrc(MachineLocation::VirtualFP, 2*stackGrowth); + Moves.push_back(MachineMove(FrameLabelId, SPDst, SPSrc)); + } else { + MachineLocation SPDst(MachineLocation::VirtualFP); + MachineLocation SPSrc(MachineLocation::VirtualFP, + -StackSize+stackGrowth); + Moves.push_back(MachineMove(FrameLabelId, SPDst, SPSrc)); + } + } else { + //FIXME: Verify & implement for FP + MachineLocation SPDst(StackPtr); + MachineLocation SPSrc(StackPtr, stackGrowth); + Moves.push_back(MachineMove(FrameLabelId, SPDst, SPSrc)); + } + + // Add callee saved registers to move list. + const std::vector &CSI = MFI->getCalleeSavedInfo(); + + // FIXME: This is dirty hack. The code itself is pretty mess right now. + // It should be rewritten from scratch and generalized sometimes. + + // Determine maximum offset (minumum due to stack growth) + int64_t MaxOffset = 0; + for (unsigned I = 0, E = CSI.size(); I!=E; ++I) + MaxOffset = std::min(MaxOffset, + MFI->getObjectOffset(CSI[I].getFrameIdx())); + + // Calculate offsets + int64_t saveAreaOffset = (hasFP(MF) ? 3 : 2)*stackGrowth; + for (unsigned I = 0, E = CSI.size(); I!=E; ++I) { + int64_t Offset = MFI->getObjectOffset(CSI[I].getFrameIdx()); + unsigned Reg = CSI[I].getReg(); + Offset = (MaxOffset-Offset+saveAreaOffset); + MachineLocation CSDst(MachineLocation::VirtualFP, Offset); + MachineLocation CSSrc(Reg); + Moves.push_back(MachineMove(FrameLabelId, CSDst, CSSrc)); + } + + if (hasFP(MF)) { + // Save FP + MachineLocation FPDst(MachineLocation::VirtualFP, 2*stackGrowth); + MachineLocation FPSrc(FramePtr); + Moves.push_back(MachineMove(ReadyLabelId, FPDst, FPSrc)); + } + + MachineLocation FPDst(hasFP(MF) ? FramePtr : StackPtr); + MachineLocation FPSrc(MachineLocation::VirtualFP); + Moves.push_back(MachineMove(ReadyLabelId, FPDst, FPSrc)); +} + + +void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { + MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB + MachineFrameInfo *MFI = MF.getFrameInfo(); + const Function* Fn = MF.getFunction(); + const X86Subtarget* Subtarget = &MF.getTarget().getSubtarget(); + MachineModuleInfo *MMI = MFI->getMachineModuleInfo(); + X86MachineFunctionInfo *X86FI = MF.getInfo(); + MachineBasicBlock::iterator MBBI = MBB.begin(); + bool needsFrameMoves = (MMI && MMI->hasDebugInfo()) || + !Fn->doesNotThrow() || + UnwindTablesMandatory; + DebugLoc DL = (MBBI != MBB.end() ? MBBI->getDebugLoc() : + DebugLoc::getUnknownLoc()); + + // Prepare for frame info. + unsigned FrameLabelId = 0; + + // Get the number of bytes to allocate from the FrameInfo. + uint64_t StackSize = MFI->getStackSize(); + + // Get desired stack alignment + uint64_t MaxAlign = MFI->getMaxAlignment(); + + // Add RETADDR move area to callee saved frame size. + int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); + if (TailCallReturnAddrDelta < 0) + X86FI->setCalleeSavedFrameSize( + X86FI->getCalleeSavedFrameSize() +(-TailCallReturnAddrDelta)); + + // If this is x86-64 and the Red Zone is not disabled, if we are a leaf + // function, and use up to 128 bytes of stack space, don't have a frame + // pointer, calls, or dynamic alloca then we do not need to adjust the + // stack pointer (we fit in the Red Zone). + if (Is64Bit && !DisableRedZone && + !needsStackRealignment(MF) && + !MFI->hasVarSizedObjects() && // No dynamic alloca. + !MFI->hasCalls()) { // No calls. + uint64_t MinSize = X86FI->getCalleeSavedFrameSize(); + if (hasFP(MF)) MinSize += SlotSize; + StackSize = std::max(MinSize, + StackSize > 128 ? StackSize - 128 : 0); + MFI->setStackSize(StackSize); + } + + // Insert stack pointer adjustment for later moving of return addr. Only + // applies to tail call optimized functions where the callee argument stack + // size is bigger than the callers. + if (TailCallReturnAddrDelta < 0) { + MachineInstr *MI = + BuildMI(MBB, MBBI, DL, TII.get(Is64Bit? X86::SUB64ri32 : X86::SUB32ri), + StackPtr).addReg(StackPtr).addImm(-TailCallReturnAddrDelta); + // The EFLAGS implicit def is dead. + MI->getOperand(3).setIsDead(); + } + + uint64_t NumBytes = 0; + if (hasFP(MF)) { + // Calculate required stack adjustment + uint64_t FrameSize = StackSize - SlotSize; + if (needsStackRealignment(MF)) + FrameSize = (FrameSize + MaxAlign - 1)/MaxAlign*MaxAlign; + + NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize(); + + // Get the offset of the stack slot for the EBP register... which is + // guaranteed to be the last slot by processFunctionBeforeFrameFinalized. + // Update the frame offset adjustment. + MFI->setOffsetAdjustment(-NumBytes); + + // Save EBP into the appropriate stack slot... + BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r)) + .addReg(FramePtr, RegState::Kill); + + if (needsFrameMoves) { + // Mark effective beginning of when frame pointer becomes valid. + FrameLabelId = MMI->NextLabelID(); + BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addImm(FrameLabelId); + } + + // Update EBP with the new base value... + BuildMI(MBB, MBBI, DL, + TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), FramePtr) + .addReg(StackPtr); + + // Mark the FramePtr as live-in in every block except the entry. + for (MachineFunction::iterator I = next(MF.begin()), E = MF.end(); + I != E; ++I) + I->addLiveIn(FramePtr); + + // Realign stack + if (needsStackRealignment(MF)) { + MachineInstr *MI = + BuildMI(MBB, MBBI, DL, + TII.get(Is64Bit ? X86::AND64ri32 : X86::AND32ri), + StackPtr).addReg(StackPtr).addImm(-MaxAlign); + // The EFLAGS implicit def is dead. + MI->getOperand(3).setIsDead(); + } + } else { + NumBytes = StackSize - X86FI->getCalleeSavedFrameSize(); + } + + unsigned ReadyLabelId = 0; + if (needsFrameMoves) { + // Mark effective beginning of when frame pointer is ready. + ReadyLabelId = MMI->NextLabelID(); + BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addImm(ReadyLabelId); + } + + // Skip the callee-saved push instructions. + while (MBBI != MBB.end() && + (MBBI->getOpcode() == X86::PUSH32r || + MBBI->getOpcode() == X86::PUSH64r)) + ++MBBI; + + if (MBBI != MBB.end()) + DL = MBBI->getDebugLoc(); + + if (NumBytes) { // adjust stack pointer: ESP -= numbytes + if (NumBytes >= 4096 && Subtarget->isTargetCygMing()) { + // Check, whether EAX is livein for this function + bool isEAXAlive = false; + for (MachineRegisterInfo::livein_iterator + II = MF.getRegInfo().livein_begin(), + EE = MF.getRegInfo().livein_end(); (II != EE) && !isEAXAlive; ++II) { + unsigned Reg = II->first; + isEAXAlive = (Reg == X86::EAX || Reg == X86::AX || + Reg == X86::AH || Reg == X86::AL); + } + + // Function prologue calls _alloca to probe the stack when allocating + // more than 4k bytes in one go. Touching the stack at 4K increments is + // necessary to ensure that the guard pages used by the OS virtual memory + // manager are allocated in correct sequence. + if (!isEAXAlive) { + BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) + .addImm(NumBytes); + BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32)) + .addExternalSymbol("_alloca"); + } else { + // Save EAX + BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r)) + .addReg(X86::EAX, RegState::Kill); + // Allocate NumBytes-4 bytes on stack. We'll also use 4 already + // allocated bytes for EAX. + BuildMI(MBB, MBBI, DL, + TII.get(X86::MOV32ri), X86::EAX).addImm(NumBytes-4); + BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32)) + .addExternalSymbol("_alloca"); + // Restore EAX + MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), + X86::EAX), + StackPtr, false, NumBytes-4); + MBB.insert(MBBI, MI); + } + } else { + // If there is an SUB32ri of ESP immediately before this instruction, + // merge the two. This can be the case when tail call elimination is + // enabled and the callee has more arguments then the caller. + NumBytes -= mergeSPUpdates(MBB, MBBI, StackPtr, true); + // If there is an ADD32ri or SUB32ri of ESP immediately after this + // instruction, merge the two instructions. + mergeSPUpdatesDown(MBB, MBBI, StackPtr, &NumBytes); + + if (NumBytes) + emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, TII); + } + } + + if (needsFrameMoves) + emitFrameMoves(MF, FrameLabelId, ReadyLabelId); +} + +void X86RegisterInfo::emitEpilogue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + X86MachineFunctionInfo *X86FI = MF.getInfo(); + MachineBasicBlock::iterator MBBI = prior(MBB.end()); + unsigned RetOpcode = MBBI->getOpcode(); + DebugLoc DL = MBBI->getDebugLoc(); + + switch (RetOpcode) { + case X86::RET: + case X86::RETI: + case X86::TCRETURNdi: + case X86::TCRETURNri: + case X86::TCRETURNri64: + case X86::TCRETURNdi64: + case X86::EH_RETURN: + case X86::EH_RETURN64: + case X86::TAILJMPd: + case X86::TAILJMPr: + case X86::TAILJMPm: break; // These are ok + default: + assert(0 && "Can only insert epilog into returning blocks"); + } + + // Get the number of bytes to allocate from the FrameInfo + uint64_t StackSize = MFI->getStackSize(); + uint64_t MaxAlign = MFI->getMaxAlignment(); + unsigned CSSize = X86FI->getCalleeSavedFrameSize(); + uint64_t NumBytes = 0; + + if (hasFP(MF)) { + // Calculate required stack adjustment + uint64_t FrameSize = StackSize - SlotSize; + if (needsStackRealignment(MF)) + FrameSize = (FrameSize + MaxAlign - 1)/MaxAlign*MaxAlign; + + NumBytes = FrameSize - CSSize; + + // pop EBP. + BuildMI(MBB, MBBI, DL, + TII.get(Is64Bit ? X86::POP64r : X86::POP32r), FramePtr); + } else { + NumBytes = StackSize - CSSize; + } + + // Skip the callee-saved pop instructions. + MachineBasicBlock::iterator LastCSPop = MBBI; + while (MBBI != MBB.begin()) { + MachineBasicBlock::iterator PI = prior(MBBI); + unsigned Opc = PI->getOpcode(); + if (Opc != X86::POP32r && Opc != X86::POP64r && + !PI->getDesc().isTerminator()) + break; + --MBBI; + } + + DL = MBBI->getDebugLoc(); + + // If there is an ADD32ri or SUB32ri of ESP immediately before this + // instruction, merge the two instructions. + if (NumBytes || MFI->hasVarSizedObjects()) + mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes); + + // If dynamic alloca is used, then reset esp to point to the last callee-saved + // slot before popping them off! Same applies for the case, when stack was + // realigned + if (needsStackRealignment(MF)) { + // We cannot use LEA here, because stack pointer was realigned. We need to + // deallocate local frame back + if (CSSize) { + emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII); + MBBI = prior(LastCSPop); + } + + BuildMI(MBB, MBBI, DL, + TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), + StackPtr).addReg(FramePtr); + } else if (MFI->hasVarSizedObjects()) { + if (CSSize) { + unsigned Opc = Is64Bit ? X86::LEA64r : X86::LEA32r; + MachineInstr *MI = addLeaRegOffset(BuildMI(MF, DL, TII.get(Opc), StackPtr), + FramePtr, false, -CSSize); + MBB.insert(MBBI, MI); + } else + BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), + StackPtr).addReg(FramePtr); + + } else { + // adjust stack pointer back: ESP += numbytes + if (NumBytes) + emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII); + } + + // We're returning from function via eh_return. + if (RetOpcode == X86::EH_RETURN || RetOpcode == X86::EH_RETURN64) { + MBBI = prior(MBB.end()); + MachineOperand &DestAddr = MBBI->getOperand(0); + assert(DestAddr.isReg() && "Offset should be in register!"); + BuildMI(MBB, MBBI, DL, + TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), + StackPtr).addReg(DestAddr.getReg()); + // Tail call return: adjust the stack pointer and jump to callee + } else if (RetOpcode == X86::TCRETURNri || RetOpcode == X86::TCRETURNdi || + RetOpcode== X86::TCRETURNri64 || RetOpcode == X86::TCRETURNdi64) { + MBBI = prior(MBB.end()); + MachineOperand &JumpTarget = MBBI->getOperand(0); + MachineOperand &StackAdjust = MBBI->getOperand(1); + assert(StackAdjust.isImm() && "Expecting immediate value."); + + // Adjust stack pointer. + int StackAdj = StackAdjust.getImm(); + int MaxTCDelta = X86FI->getTCReturnAddrDelta(); + int Offset = 0; + assert(MaxTCDelta <= 0 && "MaxTCDelta should never be positive"); + // Incoporate the retaddr area. + Offset = StackAdj-MaxTCDelta; + assert(Offset >= 0 && "Offset should never be negative"); + + if (Offset) { + // Check for possible merge with preceeding ADD instruction. + Offset += mergeSPUpdates(MBB, MBBI, StackPtr, true); + emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, TII); + } + + // Jump to label or value in register. + if (RetOpcode == X86::TCRETURNdi|| RetOpcode == X86::TCRETURNdi64) + BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPd)). + addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); + else if (RetOpcode== X86::TCRETURNri64) + BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr64), JumpTarget.getReg()); + else + BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr), JumpTarget.getReg()); + + // Delete the pseudo instruction TCRETURN. + MBB.erase(MBBI); + } else if ((RetOpcode == X86::RET || RetOpcode == X86::RETI) && + (X86FI->getTCReturnAddrDelta() < 0)) { + // Add the return addr area delta back since we are not tail calling. + int delta = -1*X86FI->getTCReturnAddrDelta(); + MBBI = prior(MBB.end()); + // Check for possible merge with preceeding ADD instruction. + delta += mergeSPUpdates(MBB, MBBI, StackPtr, true); + emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, TII); + } +} + +unsigned X86RegisterInfo::getRARegister() const { + if (Is64Bit) + return X86::RIP; // Should have dwarf #16 + else + return X86::EIP; // Should have dwarf #8 +} + +unsigned X86RegisterInfo::getFrameRegister(MachineFunction &MF) const { + return hasFP(MF) ? FramePtr : StackPtr; +} + +void X86RegisterInfo::getInitialFrameState(std::vector &Moves) + const { + // Calculate amount of bytes used for return address storing + int stackGrowth = (Is64Bit ? -8 : -4); + + // Initial state of the frame pointer is esp+4. + MachineLocation Dst(MachineLocation::VirtualFP); + MachineLocation Src(StackPtr, stackGrowth); + Moves.push_back(MachineMove(0, Dst, Src)); + + // Add return address to move list + MachineLocation CSDst(StackPtr, stackGrowth); + MachineLocation CSSrc(getRARegister()); + Moves.push_back(MachineMove(0, CSDst, CSSrc)); +} + +unsigned X86RegisterInfo::getEHExceptionRegister() const { + assert(0 && "What is the exception register"); + return 0; +} + +unsigned X86RegisterInfo::getEHHandlerRegister() const { + assert(0 && "What is the exception handler register"); + return 0; +} + +namespace llvm { +unsigned getX86SubSuperRegister(unsigned Reg, MVT VT, bool High) { + switch (VT.getSimpleVT()) { + default: return Reg; + case MVT::i8: + if (High) { + switch (Reg) { + default: return 0; + case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX: + return X86::AH; + case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX: + return X86::DH; + case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX: + return X86::CH; + case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX: + return X86::BH; + } + } else { + switch (Reg) { + default: return 0; + case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX: + return X86::AL; + case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX: + return X86::DL; + case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX: + return X86::CL; + case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX: + return X86::BL; + case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI: + return X86::SIL; + case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI: + return X86::DIL; + case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP: + return X86::BPL; + case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP: + return X86::SPL; + case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8: + return X86::R8B; + case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9: + return X86::R9B; + case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10: + return X86::R10B; + case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11: + return X86::R11B; + case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12: + return X86::R12B; + case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13: + return X86::R13B; + case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14: + return X86::R14B; + case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15: + return X86::R15B; + } + } + case MVT::i16: + switch (Reg) { + default: return Reg; + case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX: + return X86::AX; + case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX: + return X86::DX; + case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX: + return X86::CX; + case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX: + return X86::BX; + case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI: + return X86::SI; + case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI: + return X86::DI; + case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP: + return X86::BP; + case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP: + return X86::SP; + case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8: + return X86::R8W; + case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9: + return X86::R9W; + case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10: + return X86::R10W; + case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11: + return X86::R11W; + case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12: + return X86::R12W; + case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13: + return X86::R13W; + case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14: + return X86::R14W; + case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15: + return X86::R15W; + } + case MVT::i32: + switch (Reg) { + default: return Reg; + case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX: + return X86::EAX; + case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX: + return X86::EDX; + case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX: + return X86::ECX; + case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX: + return X86::EBX; + case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI: + return X86::ESI; + case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI: + return X86::EDI; + case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP: + return X86::EBP; + case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP: + return X86::ESP; + case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8: + return X86::R8D; + case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9: + return X86::R9D; + case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10: + return X86::R10D; + case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11: + return X86::R11D; + case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12: + return X86::R12D; + case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13: + return X86::R13D; + case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14: + return X86::R14D; + case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15: + return X86::R15D; + } + case MVT::i64: + switch (Reg) { + default: return Reg; + case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX: + return X86::RAX; + case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX: + return X86::RDX; + case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX: + return X86::RCX; + case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX: + return X86::RBX; + case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI: + return X86::RSI; + case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI: + return X86::RDI; + case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP: + return X86::RBP; + case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP: + return X86::RSP; + case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8: + return X86::R8; + case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9: + return X86::R9; + case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10: + return X86::R10; + case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11: + return X86::R11; + case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12: + return X86::R12; + case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13: + return X86::R13; + case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14: + return X86::R14; + case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15: + return X86::R15; + } + } + + return Reg; +} +} + +#include "X86GenRegisterInfo.inc" + +namespace { + struct VISIBILITY_HIDDEN MSAC : public MachineFunctionPass { + static char ID; + MSAC() : MachineFunctionPass(&ID) {} + + virtual bool runOnMachineFunction(MachineFunction &MF) { + MachineFrameInfo *FFI = MF.getFrameInfo(); + MachineRegisterInfo &RI = MF.getRegInfo(); + + // Calculate max stack alignment of all already allocated stack objects. + unsigned MaxAlign = calculateMaxStackAlignment(FFI); + + // Be over-conservative: scan over all vreg defs and find, whether vector + // registers are used. If yes - there is probability, that vector register + // will be spilled and thus stack needs to be aligned properly. + for (unsigned RegNum = TargetRegisterInfo::FirstVirtualRegister; + RegNum < RI.getLastVirtReg(); ++RegNum) + MaxAlign = std::max(MaxAlign, RI.getRegClass(RegNum)->getAlignment()); + + FFI->setMaxAlignment(MaxAlign); + + return false; + } + + virtual const char *getPassName() const { + return "X86 Maximal Stack Alignment Calculator"; + } + }; + + char MSAC::ID = 0; +} + +FunctionPass* +llvm::createX86MaxStackAlignmentCalculatorPass() { return new MSAC(); } diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h new file mode 100644 index 000000000000..33b9f5edc73a --- /dev/null +++ b/lib/Target/X86/X86RegisterInfo.h @@ -0,0 +1,163 @@ +//===- X86RegisterInfo.h - X86 Register Information Impl --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the X86 implementation of the TargetRegisterInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef X86REGISTERINFO_H +#define X86REGISTERINFO_H + +#include "llvm/Target/TargetRegisterInfo.h" +#include "X86GenRegisterInfo.h.inc" + +namespace llvm { + class Type; + class TargetInstrInfo; + class X86TargetMachine; + +/// N86 namespace - Native X86 register numbers +/// +namespace N86 { + enum { + EAX = 0, ECX = 1, EDX = 2, EBX = 3, ESP = 4, EBP = 5, ESI = 6, EDI = 7 + }; +} + +namespace X86 { + /// SubregIndex - The index of various sized subregister classes. Note that + /// these indices must be kept in sync with the class indices in the + /// X86RegisterInfo.td file. + enum SubregIndex { + SUBREG_8BIT = 1, SUBREG_8BIT_HI = 2, SUBREG_16BIT = 3, SUBREG_32BIT = 4 + }; +} + +/// DWARFFlavour - Flavour of dwarf regnumbers +/// +namespace DWARFFlavour { + enum { + X86_64 = 0, X86_32_DarwinEH = 1, X86_32_Generic = 2 + }; +} + +class X86RegisterInfo : public X86GenRegisterInfo { +public: + X86TargetMachine &TM; + const TargetInstrInfo &TII; + +private: + /// Is64Bit - Is the target 64-bits. + /// + bool Is64Bit; + + /// IsWin64 - Is the target on of win64 flavours + /// + bool IsWin64; + + /// SlotSize - Stack slot size in bytes. + /// + unsigned SlotSize; + + /// StackAlign - Default stack alignment. + /// + unsigned StackAlign; + + /// StackPtr - X86 physical register used as stack ptr. + /// + unsigned StackPtr; + + /// FramePtr - X86 physical register used as frame ptr. + /// + unsigned FramePtr; + +public: + X86RegisterInfo(X86TargetMachine &tm, const TargetInstrInfo &tii); + + /// getX86RegNum - Returns the native X86 register number for the given LLVM + /// register identifier. + static unsigned getX86RegNum(unsigned RegNo); + + unsigned getStackAlignment() const { return StackAlign; } + + /// getDwarfRegNum - allows modification of X86GenRegisterInfo::getDwarfRegNum + /// (created by TableGen) for target dependencies. + int getDwarfRegNum(unsigned RegNum, bool isEH) const; + + /// Code Generation virtual methods... + /// + + /// getPointerRegClass - Returns a TargetRegisterClass used for pointer + /// values. + const TargetRegisterClass *getPointerRegClass() const; + + /// getCrossCopyRegClass - Returns a legal register class to copy a register + /// in the specified class to or from. Returns NULL if it is possible to copy + /// between a two registers of the specified class. + const TargetRegisterClass * + getCrossCopyRegClass(const TargetRegisterClass *RC) const; + + /// getCalleeSavedRegs - Return a null-terminated list of all of the + /// callee-save registers on this target. + const unsigned *getCalleeSavedRegs(const MachineFunction* MF = 0) const; + + /// getCalleeSavedRegClasses - Return a null-terminated list of the preferred + /// register classes to spill each callee-saved register with. The order and + /// length of this list match the getCalleeSavedRegs() list. + const TargetRegisterClass* const* + getCalleeSavedRegClasses(const MachineFunction *MF = 0) const; + + /// getReservedRegs - Returns a bitset indexed by physical register number + /// indicating if a register is a special register that has particular uses and + /// should be considered unavailable at all times, e.g. SP, RA. This is used by + /// register scavenger to determine what registers are free. + BitVector getReservedRegs(const MachineFunction &MF) const; + + bool hasFP(const MachineFunction &MF) const; + + bool needsStackRealignment(const MachineFunction &MF) const; + + bool hasReservedCallFrame(MachineFunction &MF) const; + + void eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const; + + void eliminateFrameIndex(MachineBasicBlock::iterator MI, + int SPAdj, RegScavenger *RS = NULL) const; + + void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; + void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, + RegScavenger *RS = NULL) const; + + void emitPrologue(MachineFunction &MF) const; + void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; + + void emitFrameMoves(MachineFunction &MF, + unsigned FrameLabelId, unsigned ReadyLabelId) const; + + // Debug information queries. + unsigned getRARegister() const; + unsigned getFrameRegister(MachineFunction &MF) const; + int getFrameIndexOffset(MachineFunction &MF, int FI) const; + void getInitialFrameState(std::vector &Moves) const; + + // Exception handling queries. + unsigned getEHExceptionRegister() const; + unsigned getEHHandlerRegister() const; +}; + +// getX86SubSuperRegister - X86 utility function. It returns the sub or super +// register of a specific X86 register. +// e.g. getX86SubSuperRegister(X86::EAX, MVT::i16) return X86:AX +unsigned getX86SubSuperRegister(unsigned, MVT, bool High=false); + +} // End llvm namespace + +#endif diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td new file mode 100644 index 000000000000..d552cb3ab8e9 --- /dev/null +++ b/lib/Target/X86/X86RegisterInfo.td @@ -0,0 +1,762 @@ +//===- X86RegisterInfo.td - Describe the X86 Register File --*- tablegen -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the X86 Register file, defining the registers themselves, +// aliases between the registers, and the register classes built out of the +// registers. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Register definitions... +// +let Namespace = "X86" in { + + // In the register alias definitions below, we define which registers alias + // which others. We only specify which registers the small registers alias, + // because the register file generator is smart enough to figure out that + // AL aliases AX if we tell it that AX aliased AL (for example). + + // Dwarf numbering is different for 32-bit and 64-bit, and there are + // variations by target as well. Currently the first entry is for X86-64, + // second - for EH on X86-32/Darwin and third is 'generic' one (X86-32/Linux + // and debug information on X86-32/Darwin) + + // 8-bit registers + // Low registers + def AL : Register<"al">, DwarfRegNum<[0, 0, 0]>; + def DL : Register<"dl">, DwarfRegNum<[1, 2, 2]>; + def CL : Register<"cl">, DwarfRegNum<[2, 1, 1]>; + def BL : Register<"bl">, DwarfRegNum<[3, 3, 3]>; + + // X86-64 only + def SIL : Register<"sil">, DwarfRegNum<[4, 6, 6]>; + def DIL : Register<"dil">, DwarfRegNum<[5, 7, 7]>; + def BPL : Register<"bpl">, DwarfRegNum<[6, 4, 5]>; + def SPL : Register<"spl">, DwarfRegNum<[7, 5, 4]>; + def R8B : Register<"r8b">, DwarfRegNum<[8, -2, -2]>; + def R9B : Register<"r9b">, DwarfRegNum<[9, -2, -2]>; + def R10B : Register<"r10b">, DwarfRegNum<[10, -2, -2]>; + def R11B : Register<"r11b">, DwarfRegNum<[11, -2, -2]>; + def R12B : Register<"r12b">, DwarfRegNum<[12, -2, -2]>; + def R13B : Register<"r13b">, DwarfRegNum<[13, -2, -2]>; + def R14B : Register<"r14b">, DwarfRegNum<[14, -2, -2]>; + def R15B : Register<"r15b">, DwarfRegNum<[15, -2, -2]>; + + // High registers. On x86-64, these cannot be used in any instruction + // with a REX prefix. + def AH : Register<"ah">, DwarfRegNum<[0, 0, 0]>; + def DH : Register<"dh">, DwarfRegNum<[1, 2, 2]>; + def CH : Register<"ch">, DwarfRegNum<[2, 1, 1]>; + def BH : Register<"bh">, DwarfRegNum<[3, 3, 3]>; + + // 16-bit registers + def AX : RegisterWithSubRegs<"ax", [AL,AH]>, DwarfRegNum<[0, 0, 0]>; + def DX : RegisterWithSubRegs<"dx", [DL,DH]>, DwarfRegNum<[1, 2, 2]>; + def CX : RegisterWithSubRegs<"cx", [CL,CH]>, DwarfRegNum<[2, 1, 1]>; + def BX : RegisterWithSubRegs<"bx", [BL,BH]>, DwarfRegNum<[3, 3, 3]>; + def SI : RegisterWithSubRegs<"si", [SIL]>, DwarfRegNum<[4, 6, 6]>; + def DI : RegisterWithSubRegs<"di", [DIL]>, DwarfRegNum<[5, 7, 7]>; + def BP : RegisterWithSubRegs<"bp", [BPL]>, DwarfRegNum<[6, 4, 5]>; + def SP : RegisterWithSubRegs<"sp", [SPL]>, DwarfRegNum<[7, 5, 4]>; + def IP : Register<"ip">, DwarfRegNum<[16]>; + + // X86-64 only + def R8W : RegisterWithSubRegs<"r8w", [R8B]>, DwarfRegNum<[8, -2, -2]>; + def R9W : RegisterWithSubRegs<"r9w", [R9B]>, DwarfRegNum<[9, -2, -2]>; + def R10W : RegisterWithSubRegs<"r10w", [R10B]>, DwarfRegNum<[10, -2, -2]>; + def R11W : RegisterWithSubRegs<"r11w", [R11B]>, DwarfRegNum<[11, -2, -2]>; + def R12W : RegisterWithSubRegs<"r12w", [R12B]>, DwarfRegNum<[12, -2, -2]>; + def R13W : RegisterWithSubRegs<"r13w", [R13B]>, DwarfRegNum<[13, -2, -2]>; + def R14W : RegisterWithSubRegs<"r14w", [R14B]>, DwarfRegNum<[14, -2, -2]>; + def R15W : RegisterWithSubRegs<"r15w", [R15B]>, DwarfRegNum<[15, -2, -2]>; + + // 32-bit registers + def EAX : RegisterWithSubRegs<"eax", [AX]>, DwarfRegNum<[0, 0, 0]>; + def EDX : RegisterWithSubRegs<"edx", [DX]>, DwarfRegNum<[1, 2, 2]>; + def ECX : RegisterWithSubRegs<"ecx", [CX]>, DwarfRegNum<[2, 1, 1]>; + def EBX : RegisterWithSubRegs<"ebx", [BX]>, DwarfRegNum<[3, 3, 3]>; + def ESI : RegisterWithSubRegs<"esi", [SI]>, DwarfRegNum<[4, 6, 6]>; + def EDI : RegisterWithSubRegs<"edi", [DI]>, DwarfRegNum<[5, 7, 7]>; + def EBP : RegisterWithSubRegs<"ebp", [BP]>, DwarfRegNum<[6, 4, 5]>; + def ESP : RegisterWithSubRegs<"esp", [SP]>, DwarfRegNum<[7, 5, 4]>; + def EIP : RegisterWithSubRegs<"eip", [IP]>, DwarfRegNum<[16, 8, 8]>; + + // X86-64 only + def R8D : RegisterWithSubRegs<"r8d", [R8W]>, DwarfRegNum<[8, -2, -2]>; + def R9D : RegisterWithSubRegs<"r9d", [R9W]>, DwarfRegNum<[9, -2, -2]>; + def R10D : RegisterWithSubRegs<"r10d", [R10W]>, DwarfRegNum<[10, -2, -2]>; + def R11D : RegisterWithSubRegs<"r11d", [R11W]>, DwarfRegNum<[11, -2, -2]>; + def R12D : RegisterWithSubRegs<"r12d", [R12W]>, DwarfRegNum<[12, -2, -2]>; + def R13D : RegisterWithSubRegs<"r13d", [R13W]>, DwarfRegNum<[13, -2, -2]>; + def R14D : RegisterWithSubRegs<"r14d", [R14W]>, DwarfRegNum<[14, -2, -2]>; + def R15D : RegisterWithSubRegs<"r15d", [R15W]>, DwarfRegNum<[15, -2, -2]>; + + // 64-bit registers, X86-64 only + def RAX : RegisterWithSubRegs<"rax", [EAX]>, DwarfRegNum<[0, -2, -2]>; + def RDX : RegisterWithSubRegs<"rdx", [EDX]>, DwarfRegNum<[1, -2, -2]>; + def RCX : RegisterWithSubRegs<"rcx", [ECX]>, DwarfRegNum<[2, -2, -2]>; + def RBX : RegisterWithSubRegs<"rbx", [EBX]>, DwarfRegNum<[3, -2, -2]>; + def RSI : RegisterWithSubRegs<"rsi", [ESI]>, DwarfRegNum<[4, -2, -2]>; + def RDI : RegisterWithSubRegs<"rdi", [EDI]>, DwarfRegNum<[5, -2, -2]>; + def RBP : RegisterWithSubRegs<"rbp", [EBP]>, DwarfRegNum<[6, -2, -2]>; + def RSP : RegisterWithSubRegs<"rsp", [ESP]>, DwarfRegNum<[7, -2, -2]>; + + def R8 : RegisterWithSubRegs<"r8", [R8D]>, DwarfRegNum<[8, -2, -2]>; + def R9 : RegisterWithSubRegs<"r9", [R9D]>, DwarfRegNum<[9, -2, -2]>; + def R10 : RegisterWithSubRegs<"r10", [R10D]>, DwarfRegNum<[10, -2, -2]>; + def R11 : RegisterWithSubRegs<"r11", [R11D]>, DwarfRegNum<[11, -2, -2]>; + def R12 : RegisterWithSubRegs<"r12", [R12D]>, DwarfRegNum<[12, -2, -2]>; + def R13 : RegisterWithSubRegs<"r13", [R13D]>, DwarfRegNum<[13, -2, -2]>; + def R14 : RegisterWithSubRegs<"r14", [R14D]>, DwarfRegNum<[14, -2, -2]>; + def R15 : RegisterWithSubRegs<"r15", [R15D]>, DwarfRegNum<[15, -2, -2]>; + def RIP : RegisterWithSubRegs<"rip", [EIP]>, DwarfRegNum<[16, -2, -2]>; + + // MMX Registers. These are actually aliased to ST0 .. ST7 + def MM0 : Register<"mm0">, DwarfRegNum<[41, 29, 29]>; + def MM1 : Register<"mm1">, DwarfRegNum<[42, 30, 30]>; + def MM2 : Register<"mm2">, DwarfRegNum<[43, 31, 31]>; + def MM3 : Register<"mm3">, DwarfRegNum<[44, 32, 32]>; + def MM4 : Register<"mm4">, DwarfRegNum<[45, 33, 33]>; + def MM5 : Register<"mm5">, DwarfRegNum<[46, 34, 34]>; + def MM6 : Register<"mm6">, DwarfRegNum<[47, 35, 35]>; + def MM7 : Register<"mm7">, DwarfRegNum<[48, 36, 36]>; + + // Pseudo Floating Point registers + def FP0 : Register<"fp0">; + def FP1 : Register<"fp1">; + def FP2 : Register<"fp2">; + def FP3 : Register<"fp3">; + def FP4 : Register<"fp4">; + def FP5 : Register<"fp5">; + def FP6 : Register<"fp6">; + + // XMM Registers, used by the various SSE instruction set extensions + def XMM0: Register<"xmm0">, DwarfRegNum<[17, 21, 21]>; + def XMM1: Register<"xmm1">, DwarfRegNum<[18, 22, 22]>; + def XMM2: Register<"xmm2">, DwarfRegNum<[19, 23, 23]>; + def XMM3: Register<"xmm3">, DwarfRegNum<[20, 24, 24]>; + def XMM4: Register<"xmm4">, DwarfRegNum<[21, 25, 25]>; + def XMM5: Register<"xmm5">, DwarfRegNum<[22, 26, 26]>; + def XMM6: Register<"xmm6">, DwarfRegNum<[23, 27, 27]>; + def XMM7: Register<"xmm7">, DwarfRegNum<[24, 28, 28]>; + + // X86-64 only + def XMM8: Register<"xmm8">, DwarfRegNum<[25, -2, -2]>; + def XMM9: Register<"xmm9">, DwarfRegNum<[26, -2, -2]>; + def XMM10: Register<"xmm10">, DwarfRegNum<[27, -2, -2]>; + def XMM11: Register<"xmm11">, DwarfRegNum<[28, -2, -2]>; + def XMM12: Register<"xmm12">, DwarfRegNum<[29, -2, -2]>; + def XMM13: Register<"xmm13">, DwarfRegNum<[30, -2, -2]>; + def XMM14: Register<"xmm14">, DwarfRegNum<[31, -2, -2]>; + def XMM15: Register<"xmm15">, DwarfRegNum<[32, -2, -2]>; + + // Floating point stack registers + def ST0 : Register<"st(0)">, DwarfRegNum<[33, 12, 11]>; + def ST1 : Register<"st(1)">, DwarfRegNum<[34, 13, 12]>; + def ST2 : Register<"st(2)">, DwarfRegNum<[35, 14, 13]>; + def ST3 : Register<"st(3)">, DwarfRegNum<[36, 15, 14]>; + def ST4 : Register<"st(4)">, DwarfRegNum<[37, 16, 15]>; + def ST5 : Register<"st(5)">, DwarfRegNum<[38, 17, 16]>; + def ST6 : Register<"st(6)">, DwarfRegNum<[39, 18, 17]>; + def ST7 : Register<"st(7)">, DwarfRegNum<[40, 19, 18]>; + + // Status flags register + def EFLAGS : Register<"flags">; + + // Segment registers + def CS : Register<"cs">; + def DS : Register<"ds">; + def SS : Register<"ss">; + def ES : Register<"es">; + def FS : Register<"fs">; + def GS : Register<"gs">; +} + + +//===----------------------------------------------------------------------===// +// Subregister Set Definitions... now that we have all of the pieces, define the +// sub registers for each register. +// + +def x86_subreg_8bit : PatLeaf<(i32 1)>; +def x86_subreg_8bit_hi : PatLeaf<(i32 2)>; +def x86_subreg_16bit : PatLeaf<(i32 3)>; +def x86_subreg_32bit : PatLeaf<(i32 4)>; + +def : SubRegSet<1, [AX, CX, DX, BX, SP, BP, SI, DI, + R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W], + [AL, CL, DL, BL, SPL, BPL, SIL, DIL, + R8B, R9B, R10B, R11B, R12B, R13B, R14B, R15B]>; + +def : SubRegSet<2, [AX, CX, DX, BX], + [AH, CH, DH, BH]>; + +def : SubRegSet<1, [EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI, + R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D], + [AL, CL, DL, BL, SPL, BPL, SIL, DIL, + R8B, R9B, R10B, R11B, R12B, R13B, R14B, R15B]>; + +def : SubRegSet<2, [EAX, ECX, EDX, EBX], + [AH, CH, DH, BH]>; + +def : SubRegSet<3, [EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI, + R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D], + [AX, CX, DX, BX, SP, BP, SI, DI, + R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W]>; + +def : SubRegSet<1, [RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI, + R8, R9, R10, R11, R12, R13, R14, R15], + [AL, CL, DL, BL, SPL, BPL, SIL, DIL, + R8B, R9B, R10B, R11B, R12B, R13B, R14B, R15B]>; + +def : SubRegSet<2, [RAX, RCX, RDX, RBX], + [AH, CH, DH, BH]>; + +def : SubRegSet<3, [RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI, + R8, R9, R10, R11, R12, R13, R14, R15], + [AX, CX, DX, BX, SP, BP, SI, DI, + R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W]>; + +def : SubRegSet<4, [RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI, + R8, R9, R10, R11, R12, R13, R14, R15], + [EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI, + R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D]>; + +//===----------------------------------------------------------------------===// +// Register Class Definitions... now that we have all of the pieces, define the +// top-level register classes. The order specified in the register list is +// implicitly defined to be the register allocation order. +// + +// List call-clobbered registers before callee-save registers. RBX, RBP, (and +// R12, R13, R14, and R15 for X86-64) are callee-save registers. +// In 64-mode, there are 12 additional i8 registers, SIL, DIL, BPL, SPL, and +// R8B, ... R15B. +// Allocate R12 and R13 last, as these require an extra byte when +// encoded in x86_64 instructions. +// FIXME: Allow AH, CH, DH, BH to be used as general-purpose registers in +// 64-bit mode. The main complication is that they cannot be encoded in an +// instruction requiring a REX prefix, while SIL, DIL, BPL, R8D, etc. +// require a REX prefix. For example, "addb %ah, %dil" and "movzbl %ah, %r8d" +// cannot be encoded. +def GR8 : RegisterClass<"X86", [i8], 8, + [AL, CL, DL, BL, AH, CH, DH, BH, SIL, DIL, BPL, SPL, + R8B, R9B, R10B, R11B, R14B, R15B, R12B, R13B]> { + let MethodProtos = [{ + iterator allocation_order_begin(const MachineFunction &MF) const; + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + // Does the function dedicate RBP / EBP to being a frame ptr? + // If so, don't allocate SPL or BPL. + static const unsigned X86_GR8_AO_64_fp[] = { + X86::AL, X86::CL, X86::DL, X86::SIL, X86::DIL, + X86::R8B, X86::R9B, X86::R10B, X86::R11B, + X86::BL, X86::R14B, X86::R15B, X86::R12B, X86::R13B + }; + // If not, just don't allocate SPL. + static const unsigned X86_GR8_AO_64[] = { + X86::AL, X86::CL, X86::DL, X86::SIL, X86::DIL, + X86::R8B, X86::R9B, X86::R10B, X86::R11B, + X86::BL, X86::R14B, X86::R15B, X86::R12B, X86::R13B, X86::BPL + }; + // In 32-mode, none of the 8-bit registers aliases EBP or ESP. + static const unsigned X86_GR8_AO_32[] = { + X86::AL, X86::CL, X86::DL, X86::AH, X86::CH, X86::DH, X86::BL, X86::BH + }; + + GR8Class::iterator + GR8Class::allocation_order_begin(const MachineFunction &MF) const { + const TargetMachine &TM = MF.getTarget(); + const TargetRegisterInfo *RI = TM.getRegisterInfo(); + const X86Subtarget &Subtarget = TM.getSubtarget(); + if (!Subtarget.is64Bit()) + return X86_GR8_AO_32; + else if (RI->hasFP(MF)) + return X86_GR8_AO_64_fp; + else + return X86_GR8_AO_64; + } + + GR8Class::iterator + GR8Class::allocation_order_end(const MachineFunction &MF) const { + const TargetMachine &TM = MF.getTarget(); + const TargetRegisterInfo *RI = TM.getRegisterInfo(); + const X86Subtarget &Subtarget = TM.getSubtarget(); + if (!Subtarget.is64Bit()) + return X86_GR8_AO_32 + (sizeof(X86_GR8_AO_32) / sizeof(unsigned)); + else if (RI->hasFP(MF)) + return X86_GR8_AO_64_fp + (sizeof(X86_GR8_AO_64_fp) / sizeof(unsigned)); + else + return X86_GR8_AO_64 + (sizeof(X86_GR8_AO_64) / sizeof(unsigned)); + } + }]; +} + + +def GR16 : RegisterClass<"X86", [i16], 16, + [AX, CX, DX, SI, DI, BX, BP, SP, + R8W, R9W, R10W, R11W, R14W, R15W, R12W, R13W]> { + let SubRegClassList = [GR8, GR8]; + let MethodProtos = [{ + iterator allocation_order_begin(const MachineFunction &MF) const; + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + // Does the function dedicate RBP / EBP to being a frame ptr? + // If so, don't allocate SP or BP. + static const unsigned X86_GR16_AO_64_fp[] = { + X86::AX, X86::CX, X86::DX, X86::SI, X86::DI, + X86::R8W, X86::R9W, X86::R10W, X86::R11W, + X86::BX, X86::R14W, X86::R15W, X86::R12W, X86::R13W + }; + static const unsigned X86_GR16_AO_32_fp[] = { + X86::AX, X86::CX, X86::DX, X86::SI, X86::DI, X86::BX + }; + // If not, just don't allocate SP. + static const unsigned X86_GR16_AO_64[] = { + X86::AX, X86::CX, X86::DX, X86::SI, X86::DI, + X86::R8W, X86::R9W, X86::R10W, X86::R11W, + X86::BX, X86::R14W, X86::R15W, X86::R12W, X86::R13W, X86::BP + }; + static const unsigned X86_GR16_AO_32[] = { + X86::AX, X86::CX, X86::DX, X86::SI, X86::DI, X86::BX, X86::BP + }; + + GR16Class::iterator + GR16Class::allocation_order_begin(const MachineFunction &MF) const { + const TargetMachine &TM = MF.getTarget(); + const TargetRegisterInfo *RI = TM.getRegisterInfo(); + const X86Subtarget &Subtarget = TM.getSubtarget(); + if (Subtarget.is64Bit()) { + if (RI->hasFP(MF)) + return X86_GR16_AO_64_fp; + else + return X86_GR16_AO_64; + } else { + if (RI->hasFP(MF)) + return X86_GR16_AO_32_fp; + else + return X86_GR16_AO_32; + } + } + + GR16Class::iterator + GR16Class::allocation_order_end(const MachineFunction &MF) const { + const TargetMachine &TM = MF.getTarget(); + const TargetRegisterInfo *RI = TM.getRegisterInfo(); + const X86Subtarget &Subtarget = TM.getSubtarget(); + if (Subtarget.is64Bit()) { + if (RI->hasFP(MF)) + return X86_GR16_AO_64_fp+(sizeof(X86_GR16_AO_64_fp)/sizeof(unsigned)); + else + return X86_GR16_AO_64 + (sizeof(X86_GR16_AO_64) / sizeof(unsigned)); + } else { + if (RI->hasFP(MF)) + return X86_GR16_AO_32_fp+(sizeof(X86_GR16_AO_32_fp)/sizeof(unsigned)); + else + return X86_GR16_AO_32 + (sizeof(X86_GR16_AO_32) / sizeof(unsigned)); + } + } + }]; +} + + +def GR32 : RegisterClass<"X86", [i32], 32, + [EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP, + R8D, R9D, R10D, R11D, R14D, R15D, R12D, R13D]> { + let SubRegClassList = [GR8, GR8, GR16]; + let MethodProtos = [{ + iterator allocation_order_begin(const MachineFunction &MF) const; + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + // Does the function dedicate RBP / EBP to being a frame ptr? + // If so, don't allocate ESP or EBP. + static const unsigned X86_GR32_AO_64_fp[] = { + X86::EAX, X86::ECX, X86::EDX, X86::ESI, X86::EDI, + X86::R8D, X86::R9D, X86::R10D, X86::R11D, + X86::EBX, X86::R14D, X86::R15D, X86::R12D, X86::R13D + }; + static const unsigned X86_GR32_AO_32_fp[] = { + X86::EAX, X86::ECX, X86::EDX, X86::ESI, X86::EDI, X86::EBX + }; + // If not, just don't allocate ESP. + static const unsigned X86_GR32_AO_64[] = { + X86::EAX, X86::ECX, X86::EDX, X86::ESI, X86::EDI, + X86::R8D, X86::R9D, X86::R10D, X86::R11D, + X86::EBX, X86::R14D, X86::R15D, X86::R12D, X86::R13D, X86::EBP + }; + static const unsigned X86_GR32_AO_32[] = { + X86::EAX, X86::ECX, X86::EDX, X86::ESI, X86::EDI, X86::EBX, X86::EBP + }; + + GR32Class::iterator + GR32Class::allocation_order_begin(const MachineFunction &MF) const { + const TargetMachine &TM = MF.getTarget(); + const TargetRegisterInfo *RI = TM.getRegisterInfo(); + const X86Subtarget &Subtarget = TM.getSubtarget(); + if (Subtarget.is64Bit()) { + if (RI->hasFP(MF)) + return X86_GR32_AO_64_fp; + else + return X86_GR32_AO_64; + } else { + if (RI->hasFP(MF)) + return X86_GR32_AO_32_fp; + else + return X86_GR32_AO_32; + } + } + + GR32Class::iterator + GR32Class::allocation_order_end(const MachineFunction &MF) const { + const TargetMachine &TM = MF.getTarget(); + const TargetRegisterInfo *RI = TM.getRegisterInfo(); + const X86Subtarget &Subtarget = TM.getSubtarget(); + if (Subtarget.is64Bit()) { + if (RI->hasFP(MF)) + return X86_GR32_AO_64_fp+(sizeof(X86_GR32_AO_64_fp)/sizeof(unsigned)); + else + return X86_GR32_AO_64 + (sizeof(X86_GR32_AO_64) / sizeof(unsigned)); + } else { + if (RI->hasFP(MF)) + return X86_GR32_AO_32_fp+(sizeof(X86_GR32_AO_32_fp)/sizeof(unsigned)); + else + return X86_GR32_AO_32 + (sizeof(X86_GR32_AO_32) / sizeof(unsigned)); + } + } + }]; +} + + +def GR64 : RegisterClass<"X86", [i64], 64, + [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11, + RBX, R14, R15, R12, R13, RBP, RSP]> { + let SubRegClassList = [GR8, GR8, GR16, GR32]; + let MethodProtos = [{ + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + GR64Class::iterator + GR64Class::allocation_order_end(const MachineFunction &MF) const { + const TargetMachine &TM = MF.getTarget(); + const TargetRegisterInfo *RI = TM.getRegisterInfo(); + const X86Subtarget &Subtarget = TM.getSubtarget(); + if (!Subtarget.is64Bit()) + return begin(); // None of these are allocatable in 32-bit. + if (RI->hasFP(MF)) // Does the function dedicate RBP to being a frame ptr? + return end()-2; // If so, don't allocate RSP or RBP + else + return end()-1; // If not, just don't allocate RSP + } + }]; +} + + +// GR8_ABCD_L, GR8_ABCD_H, GR16_ABCD, GR32_ABCD, GR64_ABCD - Subclasses of +// GR8, GR16, GR32, and GR64 which contain just the "a" "b", "c", and "d" +// registers. On x86-32, GR16_ABCD and GR32_ABCD are classes for registers +// that support 8-bit subreg operations. On x86-64, GR16_ABCD, GR32_ABCD, +// and GR64_ABCD are classes for registers that support 8-bit h-register +// operations. +def GR8_ABCD_L : RegisterClass<"X86", [i8], 8, [AL, CL, DL, BL]> { +} +def GR8_ABCD_H : RegisterClass<"X86", [i8], 8, [AH, CH, DH, BH]> { +} +def GR16_ABCD : RegisterClass<"X86", [i16], 16, [AX, CX, DX, BX]> { + let SubRegClassList = [GR8_ABCD_L, GR8_ABCD_H]; +} +def GR32_ABCD : RegisterClass<"X86", [i32], 32, [EAX, ECX, EDX, EBX]> { + let SubRegClassList = [GR8_ABCD_L, GR8_ABCD_H, GR16_ABCD]; +} +def GR64_ABCD : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, RBX]> { + let SubRegClassList = [GR8_ABCD_L, GR8_ABCD_H, GR16_ABCD, GR32_ABCD]; +} + +// GR8_NOREX, GR16_NOREX, GR32_NOREX, GR64_NOREX - Subclasses of +// GR8, GR16, GR32, and GR64 which contain only the first 8 GPRs. +// On x86-64, GR64_NOREX, GR32_NOREX and GR16_NOREX are the classes +// of registers which do not by themselves require a REX prefix. +def GR8_NOREX : RegisterClass<"X86", [i8], 8, + [AL, CL, DL, BL, AH, CH, DH, BH, + SIL, DIL, BPL, SPL]> { + let MethodProtos = [{ + iterator allocation_order_begin(const MachineFunction &MF) const; + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + // Does the function dedicate RBP / EBP to being a frame ptr? + // If so, don't allocate SPL or BPL. + static const unsigned X86_GR8_NOREX_AO_64_fp[] = { + X86::AL, X86::CL, X86::DL, X86::SIL, X86::DIL, X86::BL + }; + // If not, just don't allocate SPL. + static const unsigned X86_GR8_NOREX_AO_64[] = { + X86::AL, X86::CL, X86::DL, X86::SIL, X86::DIL, X86::BL, X86::BPL + }; + // In 32-mode, none of the 8-bit registers aliases EBP or ESP. + static const unsigned X86_GR8_NOREX_AO_32[] = { + X86::AL, X86::CL, X86::DL, X86::AH, X86::CH, X86::DH, X86::BL, X86::BH + }; + + GR8_NOREXClass::iterator + GR8_NOREXClass::allocation_order_begin(const MachineFunction &MF) const { + const TargetMachine &TM = MF.getTarget(); + const TargetRegisterInfo *RI = TM.getRegisterInfo(); + const X86Subtarget &Subtarget = TM.getSubtarget(); + if (!Subtarget.is64Bit()) + return X86_GR8_NOREX_AO_32; + else if (RI->hasFP(MF)) + return X86_GR8_NOREX_AO_64_fp; + else + return X86_GR8_NOREX_AO_64; + } + + GR8_NOREXClass::iterator + GR8_NOREXClass::allocation_order_end(const MachineFunction &MF) const { + const TargetMachine &TM = MF.getTarget(); + const TargetRegisterInfo *RI = TM.getRegisterInfo(); + const X86Subtarget &Subtarget = TM.getSubtarget(); + if (!Subtarget.is64Bit()) + return X86_GR8_NOREX_AO_32 + + (sizeof(X86_GR8_NOREX_AO_32) / sizeof(unsigned)); + else if (RI->hasFP(MF)) + return X86_GR8_NOREX_AO_64_fp + + (sizeof(X86_GR8_NOREX_AO_64_fp) / sizeof(unsigned)); + else + return X86_GR8_NOREX_AO_64 + + (sizeof(X86_GR8_NOREX_AO_64) / sizeof(unsigned)); + } + }]; +} +def GR16_NOREX : RegisterClass<"X86", [i16], 16, + [AX, CX, DX, SI, DI, BX, BP, SP]> { + let SubRegClassList = [GR8_NOREX, GR8_NOREX]; + let MethodProtos = [{ + iterator allocation_order_begin(const MachineFunction &MF) const; + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + // Does the function dedicate RBP / EBP to being a frame ptr? + // If so, don't allocate SP or BP. + static const unsigned X86_GR16_AO_fp[] = { + X86::AX, X86::CX, X86::DX, X86::SI, X86::DI, X86::BX + }; + // If not, just don't allocate SP. + static const unsigned X86_GR16_AO[] = { + X86::AX, X86::CX, X86::DX, X86::SI, X86::DI, X86::BX, X86::BP + }; + + GR16_NOREXClass::iterator + GR16_NOREXClass::allocation_order_begin(const MachineFunction &MF) const { + const TargetMachine &TM = MF.getTarget(); + const TargetRegisterInfo *RI = TM.getRegisterInfo(); + if (RI->hasFP(MF)) + return X86_GR16_AO_fp; + else + return X86_GR16_AO; + } + + GR16_NOREXClass::iterator + GR16_NOREXClass::allocation_order_end(const MachineFunction &MF) const { + const TargetMachine &TM = MF.getTarget(); + const TargetRegisterInfo *RI = TM.getRegisterInfo(); + if (RI->hasFP(MF)) + return X86_GR16_AO_fp+(sizeof(X86_GR16_AO_fp)/sizeof(unsigned)); + else + return X86_GR16_AO + (sizeof(X86_GR16_AO) / sizeof(unsigned)); + } + }]; +} +// GR32_NOREX - GR32 registers which do not require a REX prefix. +def GR32_NOREX : RegisterClass<"X86", [i32], 32, + [EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP]> { + let SubRegClassList = [GR8_NOREX, GR8_NOREX, GR16_NOREX]; + let MethodProtos = [{ + iterator allocation_order_begin(const MachineFunction &MF) const; + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + // Does the function dedicate RBP / EBP to being a frame ptr? + // If so, don't allocate ESP or EBP. + static const unsigned X86_GR32_NOREX_AO_fp[] = { + X86::EAX, X86::ECX, X86::EDX, X86::ESI, X86::EDI, X86::EBX + }; + // If not, just don't allocate ESP. + static const unsigned X86_GR32_NOREX_AO[] = { + X86::EAX, X86::ECX, X86::EDX, X86::ESI, X86::EDI, X86::EBX, X86::EBP + }; + + GR32_NOREXClass::iterator + GR32_NOREXClass::allocation_order_begin(const MachineFunction &MF) const { + const TargetMachine &TM = MF.getTarget(); + const TargetRegisterInfo *RI = TM.getRegisterInfo(); + if (RI->hasFP(MF)) + return X86_GR32_NOREX_AO_fp; + else + return X86_GR32_NOREX_AO; + } + + GR32_NOREXClass::iterator + GR32_NOREXClass::allocation_order_end(const MachineFunction &MF) const { + const TargetMachine &TM = MF.getTarget(); + const TargetRegisterInfo *RI = TM.getRegisterInfo(); + if (RI->hasFP(MF)) + return X86_GR32_NOREX_AO_fp + + (sizeof(X86_GR32_NOREX_AO_fp) / sizeof(unsigned)); + else + return X86_GR32_NOREX_AO + + (sizeof(X86_GR32_NOREX_AO) / sizeof(unsigned)); + } + }]; +} + +// GR64_NOREX - GR64 registers which do not require a REX prefix. +def GR64_NOREX : RegisterClass<"X86", [i64], 64, + [RAX, RCX, RDX, RSI, RDI, RBX, RBP, RSP]> { + let SubRegClassList = [GR8_NOREX, GR8_NOREX, GR16_NOREX, GR32_NOREX]; + let MethodProtos = [{ + iterator allocation_order_begin(const MachineFunction &MF) const; + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + // Does the function dedicate RBP / EBP to being a frame ptr? + // If so, don't allocate RSP or RBP. + static const unsigned X86_GR64_NOREX_AO_fp[] = { + X86::RAX, X86::RCX, X86::RDX, X86::RSI, X86::RDI, X86::RBX + }; + // If not, just don't allocate RSP. + static const unsigned X86_GR64_NOREX_AO[] = { + X86::RAX, X86::RCX, X86::RDX, X86::RSI, X86::RDI, X86::RBX, X86::RBP + }; + + GR64_NOREXClass::iterator + GR64_NOREXClass::allocation_order_begin(const MachineFunction &MF) const { + const TargetMachine &TM = MF.getTarget(); + const TargetRegisterInfo *RI = TM.getRegisterInfo(); + if (RI->hasFP(MF)) + return X86_GR64_NOREX_AO_fp; + else + return X86_GR64_NOREX_AO; + } + + GR64_NOREXClass::iterator + GR64_NOREXClass::allocation_order_end(const MachineFunction &MF) const { + const TargetMachine &TM = MF.getTarget(); + const TargetRegisterInfo *RI = TM.getRegisterInfo(); + if (RI->hasFP(MF)) + return X86_GR64_NOREX_AO_fp + + (sizeof(X86_GR64_NOREX_AO_fp) / sizeof(unsigned)); + else + return X86_GR64_NOREX_AO + + (sizeof(X86_GR64_NOREX_AO) / sizeof(unsigned)); + } + }]; +} + +// A class to support the 'A' assembler constraint: EAX then EDX. +def GRAD : RegisterClass<"X86", [i32], 32, [EAX, EDX]>; + +// Scalar SSE2 floating point registers. +def FR32 : RegisterClass<"X86", [f32], 32, + [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, + XMM8, XMM9, XMM10, XMM11, + XMM12, XMM13, XMM14, XMM15]> { + let MethodProtos = [{ + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + FR32Class::iterator + FR32Class::allocation_order_end(const MachineFunction &MF) const { + const TargetMachine &TM = MF.getTarget(); + const X86Subtarget &Subtarget = TM.getSubtarget(); + if (!Subtarget.is64Bit()) + return end()-8; // Only XMM0 to XMM7 are available in 32-bit mode. + else + return end(); + } + }]; +} + +def FR64 : RegisterClass<"X86", [f64], 64, + [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, + XMM8, XMM9, XMM10, XMM11, + XMM12, XMM13, XMM14, XMM15]> { + let MethodProtos = [{ + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + FR64Class::iterator + FR64Class::allocation_order_end(const MachineFunction &MF) const { + const TargetMachine &TM = MF.getTarget(); + const X86Subtarget &Subtarget = TM.getSubtarget(); + if (!Subtarget.is64Bit()) + return end()-8; // Only XMM0 to XMM7 are available in 32-bit mode. + else + return end(); + } + }]; +} + + +// FIXME: This sets up the floating point register files as though they are f64 +// values, though they really are f80 values. This will cause us to spill +// values as 64-bit quantities instead of 80-bit quantities, which is much much +// faster on common hardware. In reality, this should be controlled by a +// command line option or something. + +def RFP32 : RegisterClass<"X86",[f32], 32, [FP0, FP1, FP2, FP3, FP4, FP5, FP6]>; +def RFP64 : RegisterClass<"X86",[f64], 32, [FP0, FP1, FP2, FP3, FP4, FP5, FP6]>; +def RFP80 : RegisterClass<"X86",[f80], 32, [FP0, FP1, FP2, FP3, FP4, FP5, FP6]>; + +// Floating point stack registers (these are not allocatable by the +// register allocator - the floating point stackifier is responsible +// for transforming FPn allocations to STn registers) +def RST : RegisterClass<"X86", [f80, f64, f32], 32, + [ST0, ST1, ST2, ST3, ST4, ST5, ST6, ST7]> { + let MethodProtos = [{ + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + RSTClass::iterator + RSTClass::allocation_order_end(const MachineFunction &MF) const { + return begin(); + } + }]; +} + +// Generic vector registers: VR64 and VR128. +def VR64 : RegisterClass<"X86", [v8i8, v4i16, v2i32, v1i64, v2f32], 64, + [MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7]>; +def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],128, + [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, + XMM8, XMM9, XMM10, XMM11, + XMM12, XMM13, XMM14, XMM15]> { + let MethodProtos = [{ + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + VR128Class::iterator + VR128Class::allocation_order_end(const MachineFunction &MF) const { + const TargetMachine &TM = MF.getTarget(); + const X86Subtarget &Subtarget = TM.getSubtarget(); + if (!Subtarget.is64Bit()) + return end()-8; // Only XMM0 to XMM7 are available in 32-bit mode. + else + return end(); + } + }]; +} + +// Status flags registers. +def CCR : RegisterClass<"X86", [i32], 32, [EFLAGS]> { + let CopyCost = -1; // Don't allow copying of status registers. +} diff --git a/lib/Target/X86/X86Relocations.h b/lib/Target/X86/X86Relocations.h new file mode 100644 index 000000000000..b225f480e4ed --- /dev/null +++ b/lib/Target/X86/X86Relocations.h @@ -0,0 +1,42 @@ +//===- X86Relocations.h - X86 Code Relocations ------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the X86 target-specific relocation types. +// +//===----------------------------------------------------------------------===// + +#ifndef X86RELOCATIONS_H +#define X86RELOCATIONS_H + +#include "llvm/CodeGen/MachineRelocation.h" + +namespace llvm { + namespace X86 { + /// RelocationType - An enum for the x86 relocation codes. Note that + /// the terminology here doesn't follow x86 convention - word means + /// 32-bit and dword means 64-bit. + enum RelocationType { + // reloc_pcrel_word - PC relative relocation, add the relocated value to + // the value already in memory, after we adjust it for where the PC is. + reloc_pcrel_word = 0, + + // reloc_picrel_word - PIC base relative relocation, add the relocated + // value to the value already in memory, after we adjust it for where the + // PIC base is. + reloc_picrel_word = 1, + + // reloc_absolute_word, reloc_absolute_dword - Absolute relocation, just + // add the relocated value to the value already in memory. + reloc_absolute_word = 2, + reloc_absolute_dword = 3 + }; + } +} + +#endif diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp new file mode 100644 index 000000000000..03ce1aee0e8a --- /dev/null +++ b/lib/Target/X86/X86Subtarget.cpp @@ -0,0 +1,446 @@ +//===-- X86Subtarget.cpp - X86 Subtarget Information ------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the X86 specific subclass of TargetSubtarget. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "subtarget" +#include "X86Subtarget.h" +#include "X86GenSubtarget.inc" +#include "llvm/Module.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +using namespace llvm; + +#if defined(_MSC_VER) + #include +#endif + +static cl::opt +AsmWriterFlavor("x86-asm-syntax", cl::init(X86Subtarget::Unset), + cl::desc("Choose style of code to emit from X86 backend:"), + cl::values( + clEnumValN(X86Subtarget::ATT, "att", "Emit AT&T-style assembly"), + clEnumValN(X86Subtarget::Intel, "intel", "Emit Intel-style assembly"), + clEnumValEnd)); + + +/// True if accessing the GV requires an extra load. For Windows, dllimported +/// symbols are indirect, loading the value at address GV rather then the +/// value of GV itself. This means that the GlobalAddress must be in the base +/// or index register of the address, not the GV offset field. +bool X86Subtarget::GVRequiresExtraLoad(const GlobalValue* GV, + const TargetMachine& TM, + bool isDirectCall) const +{ + // FIXME: PIC + if (TM.getRelocationModel() != Reloc::Static && + TM.getCodeModel() != CodeModel::Large) { + if (isTargetDarwin()) { + if (isDirectCall) + return false; + bool isDecl = GV->isDeclaration() && !GV->hasNotBeenReadFromBitcode(); + if (GV->hasHiddenVisibility() && + (Is64Bit || (!isDecl && !GV->hasCommonLinkage()))) + // If symbol visibility is hidden, the extra load is not needed if + // target is x86-64 or the symbol is definitely defined in the current + // translation unit. + return false; + return !isDirectCall && (isDecl || GV->isWeakForLinker()); + } else if (isTargetELF()) { + // Extra load is needed for all externally visible. + if (isDirectCall) + return false; + if (GV->hasLocalLinkage() || GV->hasHiddenVisibility()) + return false; + return true; + } else if (isTargetCygMing() || isTargetWindows()) { + return (GV->hasDLLImportLinkage()); + } + } + return false; +} + +/// True if accessing the GV requires a register. This is a superset of the +/// cases where GVRequiresExtraLoad is true. Some variations of PIC require +/// a register, but not an extra load. +bool X86Subtarget::GVRequiresRegister(const GlobalValue *GV, + const TargetMachine& TM, + bool isDirectCall) const +{ + if (GVRequiresExtraLoad(GV, TM, isDirectCall)) + return true; + // Code below here need only consider cases where GVRequiresExtraLoad + // returns false. + if (TM.getRelocationModel() == Reloc::PIC_) + return !isDirectCall && + (GV->hasLocalLinkage() || GV->hasExternalLinkage()); + return false; +} + +/// getBZeroEntry - This function returns the name of a function which has an +/// interface like the non-standard bzero function, if such a function exists on +/// the current subtarget and it is considered prefereable over memset with zero +/// passed as the second argument. Otherwise it returns null. +const char *X86Subtarget::getBZeroEntry() const { + // Darwin 10 has a __bzero entry point for this purpose. + if (getDarwinVers() >= 10) + return "__bzero"; + + return 0; +} + +/// IsLegalToCallImmediateAddr - Return true if the subtarget allows calls +/// to immediate address. +bool X86Subtarget::IsLegalToCallImmediateAddr(const TargetMachine &TM) const { + if (Is64Bit) + return false; + return isTargetELF() || TM.getRelocationModel() == Reloc::Static; +} + +/// getSpecialAddressLatency - For targets where it is beneficial to +/// backschedule instructions that compute addresses, return a value +/// indicating the number of scheduling cycles of backscheduling that +/// should be attempted. +unsigned X86Subtarget::getSpecialAddressLatency() const { + // For x86 out-of-order targets, back-schedule address computations so + // that loads and stores aren't blocked. + // This value was chosen arbitrarily. + return 200; +} + +/// GetCpuIDAndInfo - Execute the specified cpuid and return the 4 values in the +/// specified arguments. If we can't run cpuid on the host, return true. +bool X86::GetCpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX, + unsigned *rECX, unsigned *rEDX) { +#if defined(__x86_64__) || defined(_M_AMD64) + #if defined(__GNUC__) + // gcc doesn't know cpuid would clobber ebx/rbx. Preseve it manually. + asm ("movq\t%%rbx, %%rsi\n\t" + "cpuid\n\t" + "xchgq\t%%rbx, %%rsi\n\t" + : "=a" (*rEAX), + "=S" (*rEBX), + "=c" (*rECX), + "=d" (*rEDX) + : "a" (value)); + return false; + #elif defined(_MSC_VER) + int registers[4]; + __cpuid(registers, value); + *rEAX = registers[0]; + *rEBX = registers[1]; + *rECX = registers[2]; + *rEDX = registers[3]; + return false; + #endif +#elif defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86) + #if defined(__GNUC__) + asm ("movl\t%%ebx, %%esi\n\t" + "cpuid\n\t" + "xchgl\t%%ebx, %%esi\n\t" + : "=a" (*rEAX), + "=S" (*rEBX), + "=c" (*rECX), + "=d" (*rEDX) + : "a" (value)); + return false; + #elif defined(_MSC_VER) + __asm { + mov eax,value + cpuid + mov esi,rEAX + mov dword ptr [esi],eax + mov esi,rEBX + mov dword ptr [esi],ebx + mov esi,rECX + mov dword ptr [esi],ecx + mov esi,rEDX + mov dword ptr [esi],edx + } + return false; + #endif +#endif + return true; +} + +static void DetectFamilyModel(unsigned EAX, unsigned &Family, unsigned &Model) { + Family = (EAX >> 8) & 0xf; // Bits 8 - 11 + Model = (EAX >> 4) & 0xf; // Bits 4 - 7 + if (Family == 6 || Family == 0xf) { + if (Family == 0xf) + // Examine extended family ID if family ID is F. + Family += (EAX >> 20) & 0xff; // Bits 20 - 27 + // Examine extended model ID if family ID is 6 or F. + Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19 + } +} + +void X86Subtarget::AutoDetectSubtargetFeatures() { + unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0; + union { + unsigned u[3]; + char c[12]; + } text; + + if (X86::GetCpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1)) + return; + + X86::GetCpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX); + + if ((EDX >> 23) & 0x1) X86SSELevel = MMX; + if ((EDX >> 25) & 0x1) X86SSELevel = SSE1; + if ((EDX >> 26) & 0x1) X86SSELevel = SSE2; + if (ECX & 0x1) X86SSELevel = SSE3; + if ((ECX >> 9) & 0x1) X86SSELevel = SSSE3; + if ((ECX >> 19) & 0x1) X86SSELevel = SSE41; + if ((ECX >> 20) & 0x1) X86SSELevel = SSE42; + + bool IsIntel = memcmp(text.c, "GenuineIntel", 12) == 0; + bool IsAMD = !IsIntel && memcmp(text.c, "AuthenticAMD", 12) == 0; + if (IsIntel || IsAMD) { + // Determine if bit test memory instructions are slow. + unsigned Family = 0; + unsigned Model = 0; + DetectFamilyModel(EAX, Family, Model); + IsBTMemSlow = IsAMD || (Family == 6 && Model >= 13); + + X86::GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); + HasX86_64 = (EDX >> 29) & 0x1; + HasSSE4A = IsAMD && ((ECX >> 6) & 0x1); + } +} + +static const char *GetCurrentX86CPU() { + unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0; + if (X86::GetCpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX)) + return "generic"; + unsigned Family = 0; + unsigned Model = 0; + DetectFamilyModel(EAX, Family, Model); + + X86::GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); + bool Em64T = (EDX >> 29) & 0x1; + bool HasSSE3 = (ECX & 0x1); + + union { + unsigned u[3]; + char c[12]; + } text; + + X86::GetCpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1); + if (memcmp(text.c, "GenuineIntel", 12) == 0) { + switch (Family) { + case 3: + return "i386"; + case 4: + return "i486"; + case 5: + switch (Model) { + case 4: return "pentium-mmx"; + default: return "pentium"; + } + case 6: + switch (Model) { + case 1: return "pentiumpro"; + case 3: + case 5: + case 6: return "pentium2"; + case 7: + case 8: + case 10: + case 11: return "pentium3"; + case 9: + case 13: return "pentium-m"; + case 14: return "yonah"; + case 15: + case 22: // Celeron M 540 + return "core2"; + case 23: // 45nm: Penryn , Wolfdale, Yorkfield (XE) + return "penryn"; + default: return "i686"; + } + case 15: { + switch (Model) { + case 3: + case 4: + case 6: // same as 4, but 65nm + return (Em64T) ? "nocona" : "prescott"; + case 26: + return "corei7"; + case 28: + return "atom"; + default: + return (Em64T) ? "x86-64" : "pentium4"; + } + } + + default: + return "generic"; + } + } else if (memcmp(text.c, "AuthenticAMD", 12) == 0) { + // FIXME: this poorly matches the generated SubtargetFeatureKV table. There + // appears to be no way to generate the wide variety of AMD-specific targets + // from the information returned from CPUID. + switch (Family) { + case 4: + return "i486"; + case 5: + switch (Model) { + case 6: + case 7: return "k6"; + case 8: return "k6-2"; + case 9: + case 13: return "k6-3"; + default: return "pentium"; + } + case 6: + switch (Model) { + case 4: return "athlon-tbird"; + case 6: + case 7: + case 8: return "athlon-mp"; + case 10: return "athlon-xp"; + default: return "athlon"; + } + case 15: + if (HasSSE3) { + switch (Model) { + default: return "k8-sse3"; + } + } else { + switch (Model) { + case 1: return "opteron"; + case 5: return "athlon-fx"; // also opteron + default: return "athlon64"; + } + } + case 16: + switch (Model) { + default: return "amdfam10"; + } + default: + return "generic"; + } + } else { + return "generic"; + } +} + +X86Subtarget::X86Subtarget(const Module &M, const std::string &FS, bool is64Bit) + : AsmFlavor(AsmWriterFlavor) + , PICStyle(PICStyles::None) + , X86SSELevel(NoMMXSSE) + , X863DNowLevel(NoThreeDNow) + , HasX86_64(false) + , IsBTMemSlow(false) + , DarwinVers(0) + , IsLinux(false) + , stackAlignment(8) + // FIXME: this is a known good value for Yonah. How about others? + , MaxInlineSizeThreshold(128) + , Is64Bit(is64Bit) + , TargetType(isELF) { // Default to ELF unless otherwise specified. + + // Determine default and user specified characteristics + if (!FS.empty()) { + // If feature string is not empty, parse features string. + std::string CPU = GetCurrentX86CPU(); + ParseSubtargetFeatures(FS, CPU); + // All X86-64 CPUs also have SSE2, however user might request no SSE via + // -mattr, so don't force SSELevel here. + } else { + // Otherwise, use CPUID to auto-detect feature set. + AutoDetectSubtargetFeatures(); + // Make sure SSE2 is enabled; it is available on all X86-64 CPUs. + if (Is64Bit && X86SSELevel < SSE2) + X86SSELevel = SSE2; + } + + // If requesting codegen for X86-64, make sure that 64-bit features + // are enabled. + if (Is64Bit) + HasX86_64 = true; + + DOUT << "Subtarget features: SSELevel " << X86SSELevel + << ", 3DNowLevel " << X863DNowLevel + << ", 64bit " << HasX86_64 << "\n"; + assert((!Is64Bit || HasX86_64) && + "64-bit code requested on a subtarget that doesn't support it!"); + + // Set the boolean corresponding to the current target triple, or the default + // if one cannot be determined, to true. + const std::string& TT = M.getTargetTriple(); + if (TT.length() > 5) { + size_t Pos; + if ((Pos = TT.find("-darwin")) != std::string::npos) { + TargetType = isDarwin; + + // Compute the darwin version number. + if (isdigit(TT[Pos+7])) + DarwinVers = atoi(&TT[Pos+7]); + else + DarwinVers = 8; // Minimum supported darwin is Tiger. + } else if (TT.find("linux") != std::string::npos) { + // Linux doesn't imply ELF, but we don't currently support anything else. + TargetType = isELF; + IsLinux = true; + } else if (TT.find("cygwin") != std::string::npos) { + TargetType = isCygwin; + } else if (TT.find("mingw") != std::string::npos) { + TargetType = isMingw; + } else if (TT.find("win32") != std::string::npos) { + TargetType = isWindows; + } else if (TT.find("windows") != std::string::npos) { + TargetType = isWindows; + } + else if (TT.find("-cl") != std::string::npos) { + TargetType = isDarwin; + DarwinVers = 9; + } + } else if (TT.empty()) { +#if defined(__CYGWIN__) + TargetType = isCygwin; +#elif defined(__MINGW32__) || defined(__MINGW64__) + TargetType = isMingw; +#elif defined(__APPLE__) + TargetType = isDarwin; +#if __APPLE_CC__ > 5400 + DarwinVers = 9; // GCC 5400+ is Leopard. +#else + DarwinVers = 8; // Minimum supported darwin is Tiger. +#endif + +#elif defined(_WIN32) || defined(_WIN64) + TargetType = isWindows; +#elif defined(__linux__) + // Linux doesn't imply ELF, but we don't currently support anything else. + TargetType = isELF; + IsLinux = true; +#endif + } + + // If the asm syntax hasn't been overridden on the command line, use whatever + // the target wants. + if (AsmFlavor == X86Subtarget::Unset) { + AsmFlavor = (TargetType == isWindows) + ? X86Subtarget::Intel : X86Subtarget::ATT; + } + + // Stack alignment is 16 bytes on Darwin (both 32 and 64 bit) and for all 64 + // bit targets. + if (TargetType == isDarwin || Is64Bit) + stackAlignment = 16; + + if (StackAlignment) + stackAlignment = StackAlignment; +} diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h new file mode 100644 index 000000000000..46476f20400a --- /dev/null +++ b/lib/Target/X86/X86Subtarget.h @@ -0,0 +1,224 @@ +//=====---- X86Subtarget.h - Define Subtarget for the X86 -----*- C++ -*--====// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the X86 specific subclass of TargetSubtarget. +// +//===----------------------------------------------------------------------===// + +#ifndef X86SUBTARGET_H +#define X86SUBTARGET_H + +#include "llvm/Target/TargetSubtarget.h" +#include + +namespace llvm { +class Module; +class GlobalValue; +class TargetMachine; + +namespace PICStyles { +enum Style { + Stub, GOT, RIPRel, WinPIC, None +}; +} + +class X86Subtarget : public TargetSubtarget { +public: + enum AsmWriterFlavorTy { + // Note: This numbering has to match the GCC assembler dialects for inline + // asm alternatives to work right. + ATT = 0, Intel = 1, Unset + }; +protected: + enum X86SSEEnum { + NoMMXSSE, MMX, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42 + }; + + enum X863DNowEnum { + NoThreeDNow, ThreeDNow, ThreeDNowA + }; + + /// AsmFlavor - Which x86 asm dialect to use. + /// + AsmWriterFlavorTy AsmFlavor; + + /// PICStyle - Which PIC style to use + /// + PICStyles::Style PICStyle; + + /// X86SSELevel - MMX, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, or + /// none supported. + X86SSEEnum X86SSELevel; + + /// X863DNowLevel - 3DNow or 3DNow Athlon, or none supported. + /// + X863DNowEnum X863DNowLevel; + + /// HasX86_64 - True if the processor supports X86-64 instructions. + /// + bool HasX86_64; + + /// IsBTMemSlow - True if BT (bit test) of memory instructions are slow. + bool IsBTMemSlow; + + /// HasSSE4A - True if the processor supports SSE4A instructions. + bool HasSSE4A; + + /// DarwinVers - Nonzero if this is a darwin platform: the numeric + /// version of the platform, e.g. 8 = 10.4 (Tiger), 9 = 10.5 (Leopard), etc. + unsigned char DarwinVers; // Is any darwin-x86 platform. + + /// isLinux - true if this is a "linux" platform. + bool IsLinux; + + /// stackAlignment - The minimum alignment known to hold of the stack frame on + /// entry to the function and which must be maintained by every function. + unsigned stackAlignment; + + /// Max. memset / memcpy size that is turned into rep/movs, rep/stos ops. + /// + unsigned MaxInlineSizeThreshold; + +private: + /// Is64Bit - True if the processor supports 64-bit instructions and module + /// pointer size is 64 bit. + bool Is64Bit; + +public: + enum { + isELF, isCygwin, isDarwin, isWindows, isMingw + } TargetType; + + /// This constructor initializes the data members to match that + /// of the specified module. + /// + X86Subtarget(const Module &M, const std::string &FS, bool is64Bit); + + /// getStackAlignment - Returns the minimum alignment known to hold of the + /// stack frame on entry to the function and which must be maintained by every + /// function for this subtarget. + unsigned getStackAlignment() const { return stackAlignment; } + + /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size + /// that still makes it profitable to inline the call. + unsigned getMaxInlineSizeThreshold() const { return MaxInlineSizeThreshold; } + + /// ParseSubtargetFeatures - Parses features string setting specified + /// subtarget options. Definition of function is auto generated by tblgen. + std::string ParseSubtargetFeatures(const std::string &FS, + const std::string &CPU); + + /// AutoDetectSubtargetFeatures - Auto-detect CPU features using CPUID + /// instruction. + void AutoDetectSubtargetFeatures(); + + bool is64Bit() const { return Is64Bit; } + + PICStyles::Style getPICStyle() const { return PICStyle; } + void setPICStyle(PICStyles::Style Style) { PICStyle = Style; } + + bool hasMMX() const { return X86SSELevel >= MMX; } + bool hasSSE1() const { return X86SSELevel >= SSE1; } + bool hasSSE2() const { return X86SSELevel >= SSE2; } + bool hasSSE3() const { return X86SSELevel >= SSE3; } + bool hasSSSE3() const { return X86SSELevel >= SSSE3; } + bool hasSSE41() const { return X86SSELevel >= SSE41; } + bool hasSSE42() const { return X86SSELevel >= SSE42; } + bool hasSSE4A() const { return HasSSE4A; } + bool has3DNow() const { return X863DNowLevel >= ThreeDNow; } + bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; } + + bool isBTMemSlow() const { return IsBTMemSlow; } + + unsigned getAsmFlavor() const { + return AsmFlavor != Unset ? unsigned(AsmFlavor) : 0; + } + + bool isFlavorAtt() const { return AsmFlavor == ATT; } + bool isFlavorIntel() const { return AsmFlavor == Intel; } + + bool isTargetDarwin() const { return TargetType == isDarwin; } + bool isTargetELF() const { + return TargetType == isELF; + } + bool isTargetWindows() const { return TargetType == isWindows; } + bool isTargetMingw() const { return TargetType == isMingw; } + bool isTargetCygMing() const { return (TargetType == isMingw || + TargetType == isCygwin); } + bool isTargetCygwin() const { return TargetType == isCygwin; } + bool isTargetWin64() const { + return (Is64Bit && (TargetType == isMingw || TargetType == isWindows)); + } + + std::string getDataLayout() const { + const char *p; + if (is64Bit()) + p = "e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128"; + else { + if (isTargetDarwin()) + p = "e-p:32:32-f64:32:64-i64:32:64-f80:128:128"; + else + p = "e-p:32:32-f64:32:64-i64:32:64-f80:32:32"; + } + return std::string(p); + } + + bool isPICStyleSet() const { return PICStyle != PICStyles::None; } + bool isPICStyleGOT() const { return PICStyle == PICStyles::GOT; } + bool isPICStyleStub() const { return PICStyle == PICStyles::Stub; } + bool isPICStyleRIPRel() const { return PICStyle == PICStyles::RIPRel; } + bool isPICStyleWinPIC() const { return PICStyle == PICStyles:: WinPIC; } + + /// getDarwinVers - Return the darwin version number, 8 = tiger, 9 = leopard. + unsigned getDarwinVers() const { return DarwinVers; } + + /// isLinux - Return true if the target is "Linux". + bool isLinux() const { return IsLinux; } + + /// True if accessing the GV requires an extra load. For Windows, dllimported + /// symbols are indirect, loading the value at address GV rather then the + /// value of GV itself. This means that the GlobalAddress must be in the base + /// or index register of the address, not the GV offset field. + bool GVRequiresExtraLoad(const GlobalValue* GV, const TargetMachine& TM, + bool isDirectCall) const; + + /// True if accessing the GV requires a register. This is a superset of the + /// cases where GVRequiresExtraLoad is true. Some variations of PIC require + /// a register, but not an extra load. + bool GVRequiresRegister(const GlobalValue* GV, const TargetMachine& TM, + bool isDirectCall) const; + + /// IsLegalToCallImmediateAddr - Return true if the subtarget allows calls + /// to immediate address. + bool IsLegalToCallImmediateAddr(const TargetMachine &TM) const; + + /// This function returns the name of a function which has an interface + /// like the non-standard bzero function, if such a function exists on + /// the current subtarget and it is considered prefereable over + /// memset with zero passed as the second argument. Otherwise it + /// returns null. + const char *getBZeroEntry() const; + + /// getSpecialAddressLatency - For targets where it is beneficial to + /// backschedule instructions that compute addresses, return a value + /// indicating the number of scheduling cycles of backscheduling that + /// should be attempted. + unsigned getSpecialAddressLatency() const; +}; + +namespace X86 { + /// GetCpuIDAndInfo - Execute the specified cpuid and return the 4 values in + /// the specified arguments. If we can't run cpuid on the host, return true. + bool GetCpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX, + unsigned *rECX, unsigned *rEDX); +} + +} // End llvm namespace + +#endif diff --git a/lib/Target/X86/X86TargetAsmInfo.cpp b/lib/Target/X86/X86TargetAsmInfo.cpp new file mode 100644 index 000000000000..5dda5f4e49b0 --- /dev/null +++ b/lib/Target/X86/X86TargetAsmInfo.cpp @@ -0,0 +1,461 @@ +//===-- X86TargetAsmInfo.cpp - X86 asm properties ---------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declarations of the X86TargetAsmInfo properties. +// +//===----------------------------------------------------------------------===// + +#include "X86TargetAsmInfo.h" +#include "X86TargetMachine.h" +#include "X86Subtarget.h" +#include "llvm/DerivedTypes.h" +#include "llvm/InlineAsm.h" +#include "llvm/Instructions.h" +#include "llvm/Intrinsics.h" +#include "llvm/Module.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/Dwarf.h" + +using namespace llvm; +using namespace llvm::dwarf; + +const char *const llvm::x86_asm_table[] = { + "{si}", "S", + "{di}", "D", + "{ax}", "a", + "{cx}", "c", + "{memory}", "memory", + "{flags}", "", + "{dirflag}", "", + "{fpsr}", "", + "{cc}", "cc", + 0,0}; + +X86DarwinTargetAsmInfo::X86DarwinTargetAsmInfo(const X86TargetMachine &TM): + X86TargetAsmInfo(TM) { + const X86Subtarget* Subtarget = &TM.getSubtarget(); + bool is64Bit = Subtarget->is64Bit(); + + AlignmentIsInBytes = false; + TextAlignFillValue = 0x90; + GlobalPrefix = "_"; + if (!is64Bit) + Data64bitsDirective = 0; // we can't emit a 64-bit unit + ZeroDirective = "\t.space\t"; // ".space N" emits N zeros. + PrivateGlobalPrefix = "L"; // Marker for constant pool idxs + LessPrivateGlobalPrefix = "l"; // Marker for some ObjC metadata + BSSSection = 0; // no BSS section. + ZeroFillDirective = "\t.zerofill\t"; // Uses .zerofill + if (TM.getRelocationModel() != Reloc::Static) + ConstantPoolSection = "\t.const_data"; + else + ConstantPoolSection = "\t.const\n"; + JumpTableDataSection = "\t.const\n"; + CStringSection = "\t.cstring"; + // FIXME: Why don't always use this section? + if (is64Bit) { + SixteenByteConstantSection = getUnnamedSection("\t.literal16\n", + SectionFlags::Mergeable); + } + LCOMMDirective = "\t.lcomm\t"; + SwitchToSectionDirective = "\t.section "; + StringConstantPrefix = "\1LC"; + // Leopard and above support aligned common symbols. + COMMDirectiveTakesAlignment = (Subtarget->getDarwinVers() >= 9); + HasDotTypeDotSizeDirective = false; + HasSingleParameterDotFile = false; + NonLocalEHFrameLabel = true; + if (TM.getRelocationModel() == Reloc::Static) { + StaticCtorsSection = ".constructor"; + StaticDtorsSection = ".destructor"; + } else { + StaticCtorsSection = ".mod_init_func"; + StaticDtorsSection = ".mod_term_func"; + } + if (is64Bit) { + PersonalityPrefix = ""; + PersonalitySuffix = "+4@GOTPCREL"; + } else { + PersonalityPrefix = "L"; + PersonalitySuffix = "$non_lazy_ptr"; + } + NeedsIndirectEncoding = true; + InlineAsmStart = "## InlineAsm Start"; + InlineAsmEnd = "## InlineAsm End"; + CommentString = "##"; + SetDirective = "\t.set"; + PCSymbol = "."; + UsedDirective = "\t.no_dead_strip\t"; + WeakDefDirective = "\t.weak_definition "; + WeakRefDirective = "\t.weak_reference "; + HiddenDirective = "\t.private_extern "; + ProtectedDirective = "\t.globl\t"; + + // In non-PIC modes, emit a special label before jump tables so that the + // linker can perform more accurate dead code stripping. + if (TM.getRelocationModel() != Reloc::PIC_) { + // Emit a local label that is preserved until the linker runs. + JumpTableSpecialLabelPrefix = "l"; + } + + SupportsDebugInformation = true; + NeedsSet = true; + DwarfAbbrevSection = ".section __DWARF,__debug_abbrev,regular,debug"; + DwarfInfoSection = ".section __DWARF,__debug_info,regular,debug"; + DwarfLineSection = ".section __DWARF,__debug_line,regular,debug"; + DwarfFrameSection = ".section __DWARF,__debug_frame,regular,debug"; + DwarfPubNamesSection = ".section __DWARF,__debug_pubnames,regular,debug"; + DwarfPubTypesSection = ".section __DWARF,__debug_pubtypes,regular,debug"; + DwarfDebugInlineSection = ".section __DWARF,__debug_inlined,regular,debug"; + DwarfUsesInlineInfoSection = true; + DwarfStrSection = ".section __DWARF,__debug_str,regular,debug"; + DwarfLocSection = ".section __DWARF,__debug_loc,regular,debug"; + DwarfARangesSection = ".section __DWARF,__debug_aranges,regular,debug"; + DwarfRangesSection = ".section __DWARF,__debug_ranges,regular,debug"; + DwarfMacInfoSection = ".section __DWARF,__debug_macinfo,regular,debug"; + + // Exceptions handling + SupportsExceptionHandling = true; + GlobalEHDirective = "\t.globl\t"; + SupportsWeakOmittedEHFrame = false; + AbsoluteEHSectionOffsets = false; + DwarfEHFrameSection = + ".section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support"; + DwarfExceptionSection = ".section __DATA,__gcc_except_tab"; +} + +unsigned +X86DarwinTargetAsmInfo::PreferredEHDataFormat(DwarfEncoding::Target Reason, + bool Global) const { + if (Reason == DwarfEncoding::Functions && Global) + return (DW_EH_PE_pcrel | DW_EH_PE_indirect | DW_EH_PE_sdata4); + else if (Reason == DwarfEncoding::CodeLabels || !Global) + return DW_EH_PE_pcrel; + else + return DW_EH_PE_absptr; +} + +const char * +X86DarwinTargetAsmInfo::getEHGlobalPrefix() const +{ + const X86Subtarget* Subtarget = &TM.getSubtarget(); + if (Subtarget->getDarwinVers() > 9) + return PrivateGlobalPrefix; + else + return ""; +} + +X86ELFTargetAsmInfo::X86ELFTargetAsmInfo(const X86TargetMachine &TM): + X86TargetAsmInfo(TM) { + + CStringSection = ".rodata.str"; + PrivateGlobalPrefix = ".L"; + WeakRefDirective = "\t.weak\t"; + SetDirective = "\t.set\t"; + PCSymbol = "."; + + // Set up DWARF directives + HasLEB128 = true; // Target asm supports leb128 directives (little-endian) + + // Debug Information + AbsoluteDebugSectionOffsets = true; + SupportsDebugInformation = true; + DwarfAbbrevSection = "\t.section\t.debug_abbrev,\"\",@progbits"; + DwarfInfoSection = "\t.section\t.debug_info,\"\",@progbits"; + DwarfLineSection = "\t.section\t.debug_line,\"\",@progbits"; + DwarfFrameSection = "\t.section\t.debug_frame,\"\",@progbits"; + DwarfPubNamesSection ="\t.section\t.debug_pubnames,\"\",@progbits"; + DwarfPubTypesSection ="\t.section\t.debug_pubtypes,\"\",@progbits"; + DwarfStrSection = "\t.section\t.debug_str,\"\",@progbits"; + DwarfLocSection = "\t.section\t.debug_loc,\"\",@progbits"; + DwarfARangesSection = "\t.section\t.debug_aranges,\"\",@progbits"; + DwarfRangesSection = "\t.section\t.debug_ranges,\"\",@progbits"; + DwarfMacInfoSection = "\t.section\t.debug_macinfo,\"\",@progbits"; + + // Exceptions handling + SupportsExceptionHandling = true; + AbsoluteEHSectionOffsets = false; + DwarfEHFrameSection = "\t.section\t.eh_frame,\"aw\",@progbits"; + DwarfExceptionSection = "\t.section\t.gcc_except_table,\"a\",@progbits"; + + // On Linux we must declare when we can use a non-executable stack. + if (TM.getSubtarget().isLinux()) + NonexecutableStackDirective = "\t.section\t.note.GNU-stack,\"\",@progbits"; +} + +unsigned +X86ELFTargetAsmInfo::PreferredEHDataFormat(DwarfEncoding::Target Reason, + bool Global) const { + CodeModel::Model CM = TM.getCodeModel(); + bool is64Bit = TM.getSubtarget().is64Bit(); + + if (TM.getRelocationModel() == Reloc::PIC_) { + unsigned Format = 0; + + if (!is64Bit) + // 32 bit targets always encode pointers as 4 bytes + Format = DW_EH_PE_sdata4; + else { + // 64 bit targets encode pointers in 4 bytes iff: + // - code model is small OR + // - code model is medium and we're emitting externally visible symbols + // or any code symbols + if (CM == CodeModel::Small || + (CM == CodeModel::Medium && (Global || + Reason != DwarfEncoding::Data))) + Format = DW_EH_PE_sdata4; + else + Format = DW_EH_PE_sdata8; + } + + if (Global) + Format |= DW_EH_PE_indirect; + + return (Format | DW_EH_PE_pcrel); + } else { + if (is64Bit && + (CM == CodeModel::Small || + (CM == CodeModel::Medium && Reason != DwarfEncoding::Data))) + return DW_EH_PE_udata4; + else + return DW_EH_PE_absptr; + } +} + +X86COFFTargetAsmInfo::X86COFFTargetAsmInfo(const X86TargetMachine &TM): + X86GenericTargetAsmInfo(TM) { + + GlobalPrefix = "_"; + LCOMMDirective = "\t.lcomm\t"; + COMMDirectiveTakesAlignment = false; + HasDotTypeDotSizeDirective = false; + HasSingleParameterDotFile = false; + StaticCtorsSection = "\t.section .ctors,\"aw\""; + StaticDtorsSection = "\t.section .dtors,\"aw\""; + HiddenDirective = NULL; + PrivateGlobalPrefix = "L"; // Prefix for private global symbols + WeakRefDirective = "\t.weak\t"; + SetDirective = "\t.set\t"; + + // Set up DWARF directives + HasLEB128 = true; // Target asm supports leb128 directives (little-endian) + AbsoluteDebugSectionOffsets = true; + AbsoluteEHSectionOffsets = false; + SupportsDebugInformation = true; + DwarfSectionOffsetDirective = "\t.secrel32\t"; + DwarfAbbrevSection = "\t.section\t.debug_abbrev,\"dr\""; + DwarfInfoSection = "\t.section\t.debug_info,\"dr\""; + DwarfLineSection = "\t.section\t.debug_line,\"dr\""; + DwarfFrameSection = "\t.section\t.debug_frame,\"dr\""; + DwarfPubNamesSection ="\t.section\t.debug_pubnames,\"dr\""; + DwarfPubTypesSection ="\t.section\t.debug_pubtypes,\"dr\""; + DwarfStrSection = "\t.section\t.debug_str,\"dr\""; + DwarfLocSection = "\t.section\t.debug_loc,\"dr\""; + DwarfARangesSection = "\t.section\t.debug_aranges,\"dr\""; + DwarfRangesSection = "\t.section\t.debug_ranges,\"dr\""; + DwarfMacInfoSection = "\t.section\t.debug_macinfo,\"dr\""; +} + +unsigned +X86COFFTargetAsmInfo::PreferredEHDataFormat(DwarfEncoding::Target Reason, + bool Global) const { + CodeModel::Model CM = TM.getCodeModel(); + bool is64Bit = TM.getSubtarget().is64Bit(); + + if (TM.getRelocationModel() == Reloc::PIC_) { + unsigned Format = 0; + + if (!is64Bit) + // 32 bit targets always encode pointers as 4 bytes + Format = DW_EH_PE_sdata4; + else { + // 64 bit targets encode pointers in 4 bytes iff: + // - code model is small OR + // - code model is medium and we're emitting externally visible symbols + // or any code symbols + if (CM == CodeModel::Small || + (CM == CodeModel::Medium && (Global || + Reason != DwarfEncoding::Data))) + Format = DW_EH_PE_sdata4; + else + Format = DW_EH_PE_sdata8; + } + + if (Global) + Format |= DW_EH_PE_indirect; + + return (Format | DW_EH_PE_pcrel); + } else { + if (is64Bit && + (CM == CodeModel::Small || + (CM == CodeModel::Medium && Reason != DwarfEncoding::Data))) + return DW_EH_PE_udata4; + else + return DW_EH_PE_absptr; + } +} + +std::string +X86COFFTargetAsmInfo::UniqueSectionForGlobal(const GlobalValue* GV, + SectionKind::Kind kind) const { + switch (kind) { + case SectionKind::Text: + return ".text$linkonce" + GV->getName(); + case SectionKind::Data: + case SectionKind::BSS: + case SectionKind::ThreadData: + case SectionKind::ThreadBSS: + return ".data$linkonce" + GV->getName(); + case SectionKind::ROData: + case SectionKind::RODataMergeConst: + case SectionKind::RODataMergeStr: + return ".rdata$linkonce" + GV->getName(); + default: + assert(0 && "Unknown section kind"); + } + return NULL; +} + +std::string X86COFFTargetAsmInfo::printSectionFlags(unsigned flags) const { + std::string Flags = ",\""; + + if (flags & SectionFlags::Code) + Flags += 'x'; + if (flags & SectionFlags::Writeable) + Flags += 'w'; + + Flags += "\""; + + return Flags; +} + +X86WinTargetAsmInfo::X86WinTargetAsmInfo(const X86TargetMachine &TM): + X86GenericTargetAsmInfo(TM) { + GlobalPrefix = "_"; + CommentString = ";"; + + PrivateGlobalPrefix = "$"; + AlignDirective = "\talign\t"; + ZeroDirective = "\tdb\t"; + ZeroDirectiveSuffix = " dup(0)"; + AsciiDirective = "\tdb\t"; + AscizDirective = 0; + Data8bitsDirective = "\tdb\t"; + Data16bitsDirective = "\tdw\t"; + Data32bitsDirective = "\tdd\t"; + Data64bitsDirective = "\tdq\t"; + HasDotTypeDotSizeDirective = false; + HasSingleParameterDotFile = false; + + TextSection = getUnnamedSection("_text", SectionFlags::Code); + DataSection = getUnnamedSection("_data", SectionFlags::Writeable); + + JumpTableDataSection = NULL; + SwitchToSectionDirective = ""; + TextSectionStartSuffix = "\tsegment 'CODE'"; + DataSectionStartSuffix = "\tsegment 'DATA'"; + SectionEndDirectiveSuffix = "\tends\n"; +} + +template +bool X86TargetAsmInfo::LowerToBSwap(CallInst *CI) const { + // FIXME: this should verify that we are targetting a 486 or better. If not, + // we will turn this bswap into something that will be lowered to logical ops + // instead of emitting the bswap asm. For now, we don't support 486 or lower + // so don't worry about this. + + // Verify this is a simple bswap. + if (CI->getNumOperands() != 2 || + CI->getType() != CI->getOperand(1)->getType() || + !CI->getType()->isInteger()) + return false; + + const IntegerType *Ty = dyn_cast(CI->getType()); + if (!Ty || Ty->getBitWidth() % 16 != 0) + return false; + + // Okay, we can do this xform, do so now. + const Type *Tys[] = { Ty }; + Module *M = CI->getParent()->getParent()->getParent(); + Constant *Int = Intrinsic::getDeclaration(M, Intrinsic::bswap, Tys, 1); + + Value *Op = CI->getOperand(1); + Op = CallInst::Create(Int, Op, CI->getName(), CI); + + CI->replaceAllUsesWith(Op); + CI->eraseFromParent(); + return true; +} + +template +bool X86TargetAsmInfo::ExpandInlineAsm(CallInst *CI) const { + InlineAsm *IA = cast(CI->getCalledValue()); + std::vector Constraints = IA->ParseConstraints(); + + std::string AsmStr = IA->getAsmString(); + + // TODO: should remove alternatives from the asmstring: "foo {a|b}" -> "foo a" + std::vector AsmPieces; + SplitString(AsmStr, AsmPieces, "\n"); // ; as separator? + + switch (AsmPieces.size()) { + default: return false; + case 1: + AsmStr = AsmPieces[0]; + AsmPieces.clear(); + SplitString(AsmStr, AsmPieces, " \t"); // Split with whitespace. + + // bswap $0 + if (AsmPieces.size() == 2 && + (AsmPieces[0] == "bswap" || + AsmPieces[0] == "bswapq" || + AsmPieces[0] == "bswapl") && + (AsmPieces[1] == "$0" || + AsmPieces[1] == "${0:q}")) { + // No need to check constraints, nothing other than the equivalent of + // "=r,0" would be valid here. + return LowerToBSwap(CI); + } + // rorw $$8, ${0:w} --> llvm.bswap.i16 + if (CI->getType() == Type::Int16Ty && + AsmPieces.size() == 3 && + AsmPieces[0] == "rorw" && + AsmPieces[1] == "$$8," && + AsmPieces[2] == "${0:w}" && + IA->getConstraintString() == "=r,0,~{dirflag},~{fpsr},~{flags},~{cc}") { + return LowerToBSwap(CI); + } + break; + case 3: + if (CI->getType() == Type::Int64Ty && Constraints.size() >= 2 && + Constraints[0].Codes.size() == 1 && Constraints[0].Codes[0] == "A" && + Constraints[1].Codes.size() == 1 && Constraints[1].Codes[0] == "0") { + // bswap %eax / bswap %edx / xchgl %eax, %edx -> llvm.bswap.i64 + std::vector Words; + SplitString(AsmPieces[0], Words, " \t"); + if (Words.size() == 2 && Words[0] == "bswap" && Words[1] == "%eax") { + Words.clear(); + SplitString(AsmPieces[1], Words, " \t"); + if (Words.size() == 2 && Words[0] == "bswap" && Words[1] == "%edx") { + Words.clear(); + SplitString(AsmPieces[2], Words, " \t,"); + if (Words.size() == 3 && Words[0] == "xchgl" && Words[1] == "%eax" && + Words[2] == "%edx") { + return LowerToBSwap(CI); + } + } + } + } + break; + } + return false; +} + +// Instantiate default implementation. +TEMPLATE_INSTANTIATION(class X86TargetAsmInfo); diff --git a/lib/Target/X86/X86TargetAsmInfo.h b/lib/Target/X86/X86TargetAsmInfo.h new file mode 100644 index 000000000000..f89171d3da71 --- /dev/null +++ b/lib/Target/X86/X86TargetAsmInfo.h @@ -0,0 +1,75 @@ +//=====-- X86TargetAsmInfo.h - X86 asm properties -------------*- C++ -*--====// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the X86TargetAsmInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef X86TARGETASMINFO_H +#define X86TARGETASMINFO_H + +#include "X86TargetMachine.h" +#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/Target/ELFTargetAsmInfo.h" +#include "llvm/Target/DarwinTargetAsmInfo.h" +#include "llvm/Support/Compiler.h" + +namespace llvm { + + extern const char *const x86_asm_table[]; + + template + struct X86TargetAsmInfo : public BaseTAI { + explicit X86TargetAsmInfo(const X86TargetMachine &TM): + BaseTAI(TM) { + const X86Subtarget *Subtarget = &TM.getSubtarget(); + + BaseTAI::AsmTransCBE = x86_asm_table; + BaseTAI::AssemblerDialect = Subtarget->getAsmFlavor(); + } + + virtual bool ExpandInlineAsm(CallInst *CI) const; + + private: + bool LowerToBSwap(CallInst *CI) const; + }; + + typedef X86TargetAsmInfo X86GenericTargetAsmInfo; + + EXTERN_TEMPLATE_INSTANTIATION(class X86TargetAsmInfo); + + struct X86DarwinTargetAsmInfo : public X86TargetAsmInfo { + explicit X86DarwinTargetAsmInfo(const X86TargetMachine &TM); + virtual unsigned PreferredEHDataFormat(DwarfEncoding::Target Reason, + bool Global) const; + virtual const char *getEHGlobalPrefix() const; + }; + + struct X86ELFTargetAsmInfo : public X86TargetAsmInfo { + explicit X86ELFTargetAsmInfo(const X86TargetMachine &TM); + virtual unsigned PreferredEHDataFormat(DwarfEncoding::Target Reason, + bool Global) const; + }; + + struct X86COFFTargetAsmInfo : public X86GenericTargetAsmInfo { + explicit X86COFFTargetAsmInfo(const X86TargetMachine &TM); + virtual unsigned PreferredEHDataFormat(DwarfEncoding::Target Reason, + bool Global) const; + virtual std::string UniqueSectionForGlobal(const GlobalValue* GV, + SectionKind::Kind kind) const; + virtual std::string printSectionFlags(unsigned flags) const; + }; + + struct X86WinTargetAsmInfo : public X86GenericTargetAsmInfo { + explicit X86WinTargetAsmInfo(const X86TargetMachine &TM); + }; + +} // namespace llvm + +#endif diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp new file mode 100644 index 000000000000..8264462506aa --- /dev/null +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -0,0 +1,317 @@ +//===-- X86TargetMachine.cpp - Define TargetMachine for the X86 -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the X86 specific subclass of TargetMachine. +// +//===----------------------------------------------------------------------===// + +#include "X86TargetAsmInfo.h" +#include "X86TargetMachine.h" +#include "X86.h" +#include "llvm/Module.h" +#include "llvm/PassManager.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetMachineRegistry.h" +using namespace llvm; + +/// X86TargetMachineModule - Note that this is used on hosts that cannot link +/// in a library unless there are references into the library. In particular, +/// it seems that it is not possible to get things to work on Win32 without +/// this. Though it is unused, do not remove it. +extern "C" int X86TargetMachineModule; +int X86TargetMachineModule = 0; + +// Register the target. +static RegisterTarget +X("x86", "32-bit X86: Pentium-Pro and above"); +static RegisterTarget +Y("x86-64", "64-bit X86: EM64T and AMD64"); + +// No assembler printer by default +X86TargetMachine::AsmPrinterCtorFn X86TargetMachine::AsmPrinterCtor = 0; + +const TargetAsmInfo *X86TargetMachine::createTargetAsmInfo() const { + if (Subtarget.isFlavorIntel()) + return new X86WinTargetAsmInfo(*this); + else + switch (Subtarget.TargetType) { + case X86Subtarget::isDarwin: + return new X86DarwinTargetAsmInfo(*this); + case X86Subtarget::isELF: + return new X86ELFTargetAsmInfo(*this); + case X86Subtarget::isMingw: + case X86Subtarget::isCygwin: + return new X86COFFTargetAsmInfo(*this); + case X86Subtarget::isWindows: + return new X86WinTargetAsmInfo(*this); + default: + return new X86GenericTargetAsmInfo(*this); + } +} + +unsigned X86_32TargetMachine::getJITMatchQuality() { +#if defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86) + return 10; +#endif + return 0; +} + +unsigned X86_64TargetMachine::getJITMatchQuality() { +#if defined(__x86_64__) || defined(_M_AMD64) + return 10; +#endif + return 0; +} + +unsigned X86_32TargetMachine::getModuleMatchQuality(const Module &M) { + // We strongly match "i[3-9]86-*". + std::string TT = M.getTargetTriple(); + if (TT.size() >= 5 && TT[0] == 'i' && TT[2] == '8' && TT[3] == '6' && + TT[4] == '-' && TT[1] - '3' < 6) + return 20; + // If the target triple is something non-X86, we don't match. + if (!TT.empty()) return 0; + + if (M.getEndianness() == Module::LittleEndian && + M.getPointerSize() == Module::Pointer32) + return 10; // Weak match + else if (M.getEndianness() != Module::AnyEndianness || + M.getPointerSize() != Module::AnyPointerSize) + return 0; // Match for some other target + + return getJITMatchQuality()/2; +} + +unsigned X86_64TargetMachine::getModuleMatchQuality(const Module &M) { + // We strongly match "x86_64-*". + std::string TT = M.getTargetTriple(); + if (TT.size() >= 7 && TT[0] == 'x' && TT[1] == '8' && TT[2] == '6' && + TT[3] == '_' && TT[4] == '6' && TT[5] == '4' && TT[6] == '-') + return 20; + + // We strongly match "amd64-*". + if (TT.size() >= 6 && TT[0] == 'a' && TT[1] == 'm' && TT[2] == 'd' && + TT[3] == '6' && TT[4] == '4' && TT[5] == '-') + return 20; + + // If the target triple is something non-X86-64, we don't match. + if (!TT.empty()) return 0; + + if (M.getEndianness() == Module::LittleEndian && + M.getPointerSize() == Module::Pointer64) + return 10; // Weak match + else if (M.getEndianness() != Module::AnyEndianness || + M.getPointerSize() != Module::AnyPointerSize) + return 0; // Match for some other target + + return getJITMatchQuality()/2; +} + +X86_32TargetMachine::X86_32TargetMachine(const Module &M, const std::string &FS) + : X86TargetMachine(M, FS, false) { +} + + +X86_64TargetMachine::X86_64TargetMachine(const Module &M, const std::string &FS) + : X86TargetMachine(M, FS, true) { +} + +/// X86TargetMachine ctor - Create an ILP32 architecture model +/// +X86TargetMachine::X86TargetMachine(const Module &M, const std::string &FS, + bool is64Bit) + : Subtarget(M, FS, is64Bit), + DataLayout(Subtarget.getDataLayout()), + FrameInfo(TargetFrameInfo::StackGrowsDown, + Subtarget.getStackAlignment(), Subtarget.is64Bit() ? -8 : -4), + InstrInfo(*this), JITInfo(*this), TLInfo(*this) { + DefRelocModel = getRelocationModel(); + // FIXME: Correctly select PIC model for Win64 stuff + if (getRelocationModel() == Reloc::Default) { + if (Subtarget.isTargetDarwin() || + (Subtarget.isTargetCygMing() && !Subtarget.isTargetWin64())) + setRelocationModel(Reloc::DynamicNoPIC); + else + setRelocationModel(Reloc::Static); + } + + // ELF doesn't have a distinct dynamic-no-PIC model. Dynamic-no-PIC + // is defined as a model for code which may be used in static or + // dynamic executables but not necessarily a shared library. On ELF + // implement this by using the Static model. + if (Subtarget.isTargetELF() && + getRelocationModel() == Reloc::DynamicNoPIC) + setRelocationModel(Reloc::Static); + + if (Subtarget.is64Bit()) { + // No DynamicNoPIC support under X86-64. + if (getRelocationModel() == Reloc::DynamicNoPIC) + setRelocationModel(Reloc::PIC_); + // Default X86-64 code model is small. + if (getCodeModel() == CodeModel::Default) + setCodeModel(CodeModel::Small); + } + + if (Subtarget.isTargetCygMing()) + Subtarget.setPICStyle(PICStyles::WinPIC); + else if (Subtarget.isTargetDarwin()) { + if (Subtarget.is64Bit()) + Subtarget.setPICStyle(PICStyles::RIPRel); + else + Subtarget.setPICStyle(PICStyles::Stub); + } else if (Subtarget.isTargetELF()) { + if (Subtarget.is64Bit()) + Subtarget.setPICStyle(PICStyles::RIPRel); + else + Subtarget.setPICStyle(PICStyles::GOT); + } +} + +//===----------------------------------------------------------------------===// +// Pass Pipeline Configuration +//===----------------------------------------------------------------------===// + +bool X86TargetMachine::addInstSelector(PassManagerBase &PM, + CodeGenOpt::Level OptLevel) { + // Install an instruction selector. + PM.add(createX86ISelDag(*this, OptLevel)); + + // If we're using Fast-ISel, clean up the mess. + if (EnableFastISel) + PM.add(createDeadMachineInstructionElimPass()); + + // Install a pass to insert x87 FP_REG_KILL instructions, as needed. + PM.add(createX87FPRegKillInserterPass()); + + return false; +} + +bool X86TargetMachine::addPreRegAlloc(PassManagerBase &PM, + CodeGenOpt::Level OptLevel) { + // Calculate and set max stack object alignment early, so we can decide + // whether we will need stack realignment (and thus FP). + PM.add(createX86MaxStackAlignmentCalculatorPass()); + return false; // -print-machineinstr shouldn't print after this. +} + +bool X86TargetMachine::addPostRegAlloc(PassManagerBase &PM, + CodeGenOpt::Level OptLevel) { + PM.add(createX86FloatingPointStackifierPass()); + return true; // -print-machineinstr should print after this. +} + +bool X86TargetMachine::addAssemblyEmitter(PassManagerBase &PM, + CodeGenOpt::Level OptLevel, + bool Verbose, + raw_ostream &Out) { + assert(AsmPrinterCtor && "AsmPrinter was not linked in"); + if (AsmPrinterCtor) + PM.add(AsmPrinterCtor(Out, *this, OptLevel, Verbose)); + return false; +} + +bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM, + CodeGenOpt::Level OptLevel, + bool DumpAsm, + MachineCodeEmitter &MCE) { + // FIXME: Move this to TargetJITInfo! + // On Darwin, do not override 64-bit setting made in X86TargetMachine(). + if (DefRelocModel == Reloc::Default && + (!Subtarget.isTargetDarwin() || !Subtarget.is64Bit())) + setRelocationModel(Reloc::Static); + + // 64-bit JIT places everything in the same buffer except external functions. + // On Darwin, use small code model but hack the call instruction for + // externals. Elsewhere, do not assume globals are in the lower 4G. + if (Subtarget.is64Bit()) { + if (Subtarget.isTargetDarwin()) + setCodeModel(CodeModel::Small); + else + setCodeModel(CodeModel::Large); + } + + PM.add(createX86CodeEmitterPass(*this, MCE)); + if (DumpAsm) { + assert(AsmPrinterCtor && "AsmPrinter was not linked in"); + if (AsmPrinterCtor) + PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true)); + } + + return false; +} + +bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM, + CodeGenOpt::Level OptLevel, + bool DumpAsm, + JITCodeEmitter &JCE) { + // FIXME: Move this to TargetJITInfo! + // On Darwin, do not override 64-bit setting made in X86TargetMachine(). + if (DefRelocModel == Reloc::Default && + (!Subtarget.isTargetDarwin() || !Subtarget.is64Bit())) + setRelocationModel(Reloc::Static); + + // 64-bit JIT places everything in the same buffer except external functions. + // On Darwin, use small code model but hack the call instruction for + // externals. Elsewhere, do not assume globals are in the lower 4G. + if (Subtarget.is64Bit()) { + if (Subtarget.isTargetDarwin()) + setCodeModel(CodeModel::Small); + else + setCodeModel(CodeModel::Large); + } + + PM.add(createX86JITCodeEmitterPass(*this, JCE)); + if (DumpAsm) { + assert(AsmPrinterCtor && "AsmPrinter was not linked in"); + if (AsmPrinterCtor) + PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true)); + } + + return false; +} + +bool X86TargetMachine::addSimpleCodeEmitter(PassManagerBase &PM, + CodeGenOpt::Level OptLevel, + bool DumpAsm, + MachineCodeEmitter &MCE) { + PM.add(createX86CodeEmitterPass(*this, MCE)); + if (DumpAsm) { + assert(AsmPrinterCtor && "AsmPrinter was not linked in"); + if (AsmPrinterCtor) + PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true)); + } + + return false; +} + +bool X86TargetMachine::addSimpleCodeEmitter(PassManagerBase &PM, + CodeGenOpt::Level OptLevel, + bool DumpAsm, + JITCodeEmitter &JCE) { + PM.add(createX86JITCodeEmitterPass(*this, JCE)); + if (DumpAsm) { + assert(AsmPrinterCtor && "AsmPrinter was not linked in"); + if (AsmPrinterCtor) + PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true)); + } + + return false; +} + +/// symbolicAddressesAreRIPRel - Return true if symbolic addresses are +/// RIP-relative on this machine, taking into consideration the relocation +/// model and subtarget. RIP-relative addresses cannot have a separate +/// base or index register. +bool X86TargetMachine::symbolicAddressesAreRIPRel() const { + return getRelocationModel() != Reloc::Static && + Subtarget.isPICStyleRIPRel(); +} diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h new file mode 100644 index 000000000000..ecc1d39701de --- /dev/null +++ b/lib/Target/X86/X86TargetMachine.h @@ -0,0 +1,124 @@ +//===-- X86TargetMachine.h - Define TargetMachine for the X86 ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the X86 specific subclass of TargetMachine. +// +//===----------------------------------------------------------------------===// + +#ifndef X86TARGETMACHINE_H +#define X86TARGETMACHINE_H + +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetFrameInfo.h" +#include "X86.h" +#include "X86ELFWriterInfo.h" +#include "X86InstrInfo.h" +#include "X86JITInfo.h" +#include "X86Subtarget.h" +#include "X86ISelLowering.h" + +namespace llvm { + +class raw_ostream; + +class X86TargetMachine : public LLVMTargetMachine { + X86Subtarget Subtarget; + const TargetData DataLayout; // Calculates type size & alignment + TargetFrameInfo FrameInfo; + X86InstrInfo InstrInfo; + X86JITInfo JITInfo; + X86TargetLowering TLInfo; + X86ELFWriterInfo ELFWriterInfo; + Reloc::Model DefRelocModel; // Reloc model before it's overridden. + +protected: + virtual const TargetAsmInfo *createTargetAsmInfo() const; + + // To avoid having target depend on the asmprinter stuff libraries, asmprinter + // set this functions to ctor pointer at startup time if they are linked in. + typedef FunctionPass *(*AsmPrinterCtorFn)(raw_ostream &o, + X86TargetMachine &tm, + CodeGenOpt::Level OptLevel, + bool verbose); + static AsmPrinterCtorFn AsmPrinterCtor; + +public: + X86TargetMachine(const Module &M, const std::string &FS, bool is64Bit); + + virtual const X86InstrInfo *getInstrInfo() const { return &InstrInfo; } + virtual const TargetFrameInfo *getFrameInfo() const { return &FrameInfo; } + virtual X86JITInfo *getJITInfo() { return &JITInfo; } + virtual const X86Subtarget *getSubtargetImpl() const{ return &Subtarget; } + virtual X86TargetLowering *getTargetLowering() const { + return const_cast(&TLInfo); + } + virtual const X86RegisterInfo *getRegisterInfo() const { + return &InstrInfo.getRegisterInfo(); + } + virtual const TargetData *getTargetData() const { return &DataLayout; } + virtual const X86ELFWriterInfo *getELFWriterInfo() const { + return Subtarget.isTargetELF() ? &ELFWriterInfo : 0; + } + + static unsigned getModuleMatchQuality(const Module &M); + static unsigned getJITMatchQuality(); + + static void registerAsmPrinter(AsmPrinterCtorFn F) { + AsmPrinterCtor = F; + } + + // Set up the pass pipeline. + virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel); + virtual bool addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel); + virtual bool addPostRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel); + virtual bool addAssemblyEmitter(PassManagerBase &PM, + CodeGenOpt::Level OptLevel, + bool Verbose, raw_ostream &Out); + virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, + bool DumpAsm, MachineCodeEmitter &MCE); + virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, + bool DumpAsm, JITCodeEmitter &JCE); + virtual bool addSimpleCodeEmitter(PassManagerBase &PM, + CodeGenOpt::Level OptLevel, + bool DumpAsm, MachineCodeEmitter &MCE); + virtual bool addSimpleCodeEmitter(PassManagerBase &PM, + CodeGenOpt::Level OptLevel, + bool DumpAsm, JITCodeEmitter &JCE); + + /// symbolicAddressesAreRIPRel - Return true if symbolic addresses are + /// RIP-relative on this machine, taking into consideration the relocation + /// model and subtarget. RIP-relative addresses cannot have a separate + /// base or index register. + bool symbolicAddressesAreRIPRel() const; +}; + +/// X86_32TargetMachine - X86 32-bit target machine. +/// +class X86_32TargetMachine : public X86TargetMachine { +public: + X86_32TargetMachine(const Module &M, const std::string &FS); + + static unsigned getJITMatchQuality(); + static unsigned getModuleMatchQuality(const Module &M); +}; + +/// X86_64TargetMachine - X86 64-bit target machine. +/// +class X86_64TargetMachine : public X86TargetMachine { +public: + X86_64TargetMachine(const Module &M, const std::string &FS); + + static unsigned getJITMatchQuality(); + static unsigned getModuleMatchQuality(const Module &M); +}; + +} // End llvm namespace + +#endif diff --git a/lib/Target/XCore/CMakeLists.txt b/lib/Target/XCore/CMakeLists.txt new file mode 100644 index 000000000000..a7aba14a7a14 --- /dev/null +++ b/lib/Target/XCore/CMakeLists.txt @@ -0,0 +1,23 @@ +set(LLVM_TARGET_DEFINITIONS XCore.td) + +tablegen(XCoreGenRegisterInfo.h.inc -gen-register-desc-header) +tablegen(XCoreGenRegisterNames.inc -gen-register-enums) +tablegen(XCoreGenRegisterInfo.inc -gen-register-desc) +tablegen(XCoreGenInstrNames.inc -gen-instr-enums) +tablegen(XCoreGenInstrInfo.inc -gen-instr-desc) +tablegen(XCoreGenAsmWriter.inc -gen-asm-writer) +tablegen(XCoreGenDAGISel.inc -gen-dag-isel) +tablegen(XCoreGenCallingConv.inc -gen-callingconv) +tablegen(XCoreGenSubtarget.inc -gen-subtarget) + +add_llvm_target(XCore + XCoreAsmPrinter.cpp + XCoreFrameInfo.cpp + XCoreInstrInfo.cpp + XCoreISelDAGToDAG.cpp + XCoreISelLowering.cpp + XCoreRegisterInfo.cpp + XCoreSubtarget.cpp + XCoreTargetAsmInfo.cpp + XCoreTargetMachine.cpp + ) diff --git a/lib/Target/XCore/Makefile b/lib/Target/XCore/Makefile new file mode 100644 index 000000000000..568df70ab63a --- /dev/null +++ b/lib/Target/XCore/Makefile @@ -0,0 +1,21 @@ +##===- lib/Target/XCore/Makefile ---------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../.. +LIBRARYNAME = LLVMXCore +TARGET = XCore + +# Make sure that tblgen is run, first thing. +BUILT_SOURCES = XCoreGenRegisterInfo.h.inc XCoreGenRegisterNames.inc \ + XCoreGenRegisterInfo.inc XCoreGenInstrNames.inc \ + XCoreGenInstrInfo.inc XCoreGenAsmWriter.inc \ + XCoreGenDAGISel.inc XCoreGenCallingConv.inc \ + XCoreGenSubtarget.inc + +include $(LEVEL)/Makefile.common + diff --git a/lib/Target/XCore/README.txt b/lib/Target/XCore/README.txt new file mode 100644 index 000000000000..deaeb0f2a93b --- /dev/null +++ b/lib/Target/XCore/README.txt @@ -0,0 +1,8 @@ +To-do +----- + +* Instruction encodings +* Tailcalls +* Investigate loop alignment +* Add builtins +* Make better use of lmul / macc diff --git a/lib/Target/XCore/XCore.h b/lib/Target/XCore/XCore.h new file mode 100644 index 000000000000..5722b873e1aa --- /dev/null +++ b/lib/Target/XCore/XCore.h @@ -0,0 +1,42 @@ +//===-- XCore.h - Top-level interface for XCore representation --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the entry points for global functions defined in the LLVM +// XCore back-end. +// +//===----------------------------------------------------------------------===// + +#ifndef TARGET_XCORE_H +#define TARGET_XCORE_H + +#include "llvm/Target/TargetMachine.h" + +namespace llvm { + class FunctionPass; + class TargetMachine; + class XCoreTargetMachine; + class raw_ostream; + + FunctionPass *createXCoreISelDag(XCoreTargetMachine &TM); + FunctionPass *createXCoreCodePrinterPass(raw_ostream &OS, + XCoreTargetMachine &TM, + CodeGenOpt::Level OptLevel, + bool Verbose); +} // end namespace llvm; + +// Defines symbolic names for XCore registers. This defines a mapping from +// register name to register number. +// +#include "XCoreGenRegisterNames.inc" + +// Defines symbolic names for the XCore instructions. +// +#include "XCoreGenInstrNames.inc" + +#endif diff --git a/lib/Target/XCore/XCore.td b/lib/Target/XCore/XCore.td new file mode 100644 index 000000000000..7a2dcdbf9fe5 --- /dev/null +++ b/lib/Target/XCore/XCore.td @@ -0,0 +1,62 @@ +//===- XCore.td - Describe the XCore Target Machine --------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Target-independent interfaces which we are implementing +//===----------------------------------------------------------------------===// + +include "llvm/Target/Target.td" + +//===----------------------------------------------------------------------===// +// Descriptions +//===----------------------------------------------------------------------===// + +include "XCoreRegisterInfo.td" +include "XCoreInstrInfo.td" +include "XCoreCallingConv.td" + +def XCoreInstrInfo : InstrInfo { + let TSFlagsFields = []; + let TSFlagsShifts = []; +} + +//===----------------------------------------------------------------------===// +// XCore Subtarget features. +//===----------------------------------------------------------------------===// + +def FeatureXS1A + : SubtargetFeature<"xs1a", "IsXS1A", "true", + "Enable XS1A instructions">; + +def FeatureXS1B + : SubtargetFeature<"xs1b", "IsXS1B", "true", + "Enable XS1B instructions">; + +//===----------------------------------------------------------------------===// +// XCore processors supported. +//===----------------------------------------------------------------------===// + +class Proc Features> + : Processor; + +def : Proc<"generic", [FeatureXS1A]>; +def : Proc<"xs1a-generic", [FeatureXS1A]>; +def : Proc<"xs1b-generic", [FeatureXS1B]>; + +//===----------------------------------------------------------------------===// +// Declare the target which we are implementing +//===----------------------------------------------------------------------===// + +def XCore : Target { + // Pull in Instruction Info: + let InstructionSet = XCoreInstrInfo; +} diff --git a/lib/Target/XCore/XCoreAsmPrinter.cpp b/lib/Target/XCore/XCoreAsmPrinter.cpp new file mode 100644 index 000000000000..c9a6d8afd316 --- /dev/null +++ b/lib/Target/XCore/XCoreAsmPrinter.cpp @@ -0,0 +1,472 @@ +//===-- XCoreAsmPrinter.cpp - XCore LLVM assembly writer ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a printer that converts from our internal representation +// of machine-dependent LLVM code to the XAS-format XCore assembly language. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "asm-printer" +#include "XCore.h" +#include "XCoreInstrInfo.h" +#include "XCoreSubtarget.h" +#include "XCoreTargetMachine.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Module.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/DwarfWriter.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Support/Mangler.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +using namespace llvm; + +STATISTIC(EmittedInsts, "Number of machine instrs printed"); + +static cl::opt FileDirective("xcore-file-directive", cl::Optional, + cl::desc("Output a file directive into the assembly file"), + cl::Hidden, + cl::value_desc("filename"), + cl::init("")); + +static cl::opt MaxThreads("xcore-max-threads", cl::Optional, + cl::desc("Maximum number of threads (for emulation thread-local storage)"), + cl::Hidden, + cl::value_desc("number"), + cl::init(8)); + +namespace { + class VISIBILITY_HIDDEN XCoreAsmPrinter : public AsmPrinter { + DwarfWriter *DW; + const XCoreSubtarget &Subtarget; + public: + explicit XCoreAsmPrinter(raw_ostream &O, XCoreTargetMachine &TM, + const TargetAsmInfo *T, CodeGenOpt::Level OL, + bool V) + : AsmPrinter(O, TM, T, OL, V), DW(0), + Subtarget(*TM.getSubtargetImpl()) {} + + virtual const char *getPassName() const { + return "XCore Assembly Printer"; + } + + void printMemOperand(const MachineInstr *MI, int opNum); + void printOperand(const MachineInstr *MI, int opNum); + bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode); + + void emitFileDirective(const std::string &filename); + void emitGlobalDirective(const std::string &name); + void emitExternDirective(const std::string &name); + + void emitArrayBound(const std::string &name, const GlobalVariable *GV); + void emitGlobal(const GlobalVariable *GV); + + void emitFunctionStart(MachineFunction &MF); + void emitFunctionEnd(MachineFunction &MF); + + bool printInstruction(const MachineInstr *MI); // autogenerated. + void printMachineInstruction(const MachineInstr *MI); + bool runOnMachineFunction(MachineFunction &F); + bool doInitialization(Module &M); + bool doFinalization(Module &M); + + void getAnalysisUsage(AnalysisUsage &AU) const { + AsmPrinter::getAnalysisUsage(AU); + AU.setPreservesAll(); + AU.addRequired(); + AU.addRequired(); + } + }; +} // end of anonymous namespace + +#include "XCoreGenAsmWriter.inc" + +/// createXCoreCodePrinterPass - Returns a pass that prints the XCore +/// assembly code for a MachineFunction to the given output stream, +/// using the given target machine description. This should work +/// regardless of whether the function is in SSA form. +/// +FunctionPass *llvm::createXCoreCodePrinterPass(raw_ostream &o, + XCoreTargetMachine &tm, + CodeGenOpt::Level OptLevel, + bool verbose) { + return new XCoreAsmPrinter(o, tm, tm.getTargetAsmInfo(), OptLevel, verbose); +} + +// PrintEscapedString - Print each character of the specified string, escaping +// it if it is not printable or if it is an escape char. +static void PrintEscapedString(const std::string &Str, raw_ostream &Out) { + for (unsigned i = 0, e = Str.size(); i != e; ++i) { + unsigned char C = Str[i]; + if (isprint(C) && C != '"' && C != '\\') { + Out << C; + } else { + Out << '\\' + << (char) ((C/16 < 10) ? ( C/16 +'0') : ( C/16 -10+'A')) + << (char)(((C&15) < 10) ? ((C&15)+'0') : ((C&15)-10+'A')); + } + } +} + +void XCoreAsmPrinter:: +emitFileDirective(const std::string &name) +{ + O << "\t.file\t\""; + PrintEscapedString(name, O); + O << "\"\n"; +} + +void XCoreAsmPrinter:: +emitGlobalDirective(const std::string &name) +{ + O << TAI->getGlobalDirective() << name; + O << "\n"; +} + +void XCoreAsmPrinter:: +emitExternDirective(const std::string &name) +{ + O << "\t.extern\t" << name; + O << '\n'; +} + +void XCoreAsmPrinter:: +emitArrayBound(const std::string &name, const GlobalVariable *GV) +{ + assert(((GV->hasExternalLinkage() || + GV->hasWeakLinkage()) || + GV->hasLinkOnceLinkage()) && "Unexpected linkage"); + if (const ArrayType *ATy = dyn_cast( + cast(GV->getType())->getElementType())) + { + O << TAI->getGlobalDirective() << name << ".globound" << "\n"; + O << TAI->getSetDirective() << name << ".globound" << "," + << ATy->getNumElements() << "\n"; + if (GV->hasWeakLinkage() || GV->hasLinkOnceLinkage()) { + // TODO Use COMDAT groups for LinkOnceLinkage + O << TAI->getWeakDefDirective() << name << ".globound" << "\n"; + } + } +} + +void XCoreAsmPrinter:: +emitGlobal(const GlobalVariable *GV) +{ + const TargetData *TD = TM.getTargetData(); + + if (GV->hasInitializer()) { + // Check to see if this is a special global used by LLVM, if so, emit it. + if (EmitSpecialLLVMGlobal(GV)) + return; + + SwitchToSection(TAI->SectionForGlobal(GV)); + + std::string name = Mang->getValueName(GV); + Constant *C = GV->getInitializer(); + unsigned Align = (unsigned)TD->getPreferredTypeAlignmentShift(C->getType()); + + // Mark the start of the global + O << "\t.cc_top " << name << ".data," << name << "\n"; + + switch (GV->getLinkage()) { + case GlobalValue::AppendingLinkage: + cerr << "AppendingLinkage is not supported by this target!\n"; + abort(); + case GlobalValue::LinkOnceAnyLinkage: + case GlobalValue::LinkOnceODRLinkage: + case GlobalValue::WeakAnyLinkage: + case GlobalValue::WeakODRLinkage: + case GlobalValue::ExternalLinkage: + emitArrayBound(name, GV); + emitGlobalDirective(name); + // TODO Use COMDAT groups for LinkOnceLinkage + if (GV->hasWeakLinkage() || GV->hasLinkOnceLinkage()) { + O << TAI->getWeakDefDirective() << name << "\n"; + } + // FALL THROUGH + case GlobalValue::InternalLinkage: + case GlobalValue::PrivateLinkage: + break; + case GlobalValue::GhostLinkage: + cerr << "Should not have any unmaterialized functions!\n"; + abort(); + case GlobalValue::DLLImportLinkage: + cerr << "DLLImport linkage is not supported by this target!\n"; + abort(); + case GlobalValue::DLLExportLinkage: + cerr << "DLLExport linkage is not supported by this target!\n"; + abort(); + default: + assert(0 && "Unknown linkage type!"); + } + + EmitAlignment(Align, GV, 2); + + unsigned Size = TD->getTypeAllocSize(C->getType()); + if (GV->isThreadLocal()) { + Size *= MaxThreads; + } + if (TAI->hasDotTypeDotSizeDirective()) { + O << "\t.type " << name << ",@object\n"; + O << "\t.size " << name << "," << Size << "\n"; + } + O << name << ":\n"; + + EmitGlobalConstant(C); + if (GV->isThreadLocal()) { + for (unsigned i = 1; i < MaxThreads; ++i) { + EmitGlobalConstant(C); + } + } + if (Size < 4) { + // The ABI requires that unsigned scalar types smaller than 32 bits + // are are padded to 32 bits. + EmitZeros(4 - Size); + } + + // Mark the end of the global + O << "\t.cc_bottom " << name << ".data\n"; + } else { + if (GV->hasExternalWeakLinkage()) + ExtWeakSymbols.insert(GV); + } +} + +/// Emit the directives on the start of functions +void XCoreAsmPrinter:: +emitFunctionStart(MachineFunction &MF) +{ + // Print out the label for the function. + const Function *F = MF.getFunction(); + + SwitchToSection(TAI->SectionForGlobal(F)); + + // Mark the start of the function + O << "\t.cc_top " << CurrentFnName << ".function," << CurrentFnName << "\n"; + + switch (F->getLinkage()) { + default: assert(0 && "Unknown linkage type!"); + case Function::InternalLinkage: // Symbols default to internal. + case Function::PrivateLinkage: + break; + case Function::ExternalLinkage: + emitGlobalDirective(CurrentFnName); + break; + case Function::LinkOnceAnyLinkage: + case Function::LinkOnceODRLinkage: + case Function::WeakAnyLinkage: + case Function::WeakODRLinkage: + // TODO Use COMDAT groups for LinkOnceLinkage + O << TAI->getGlobalDirective() << CurrentFnName << "\n"; + O << TAI->getWeakDefDirective() << CurrentFnName << "\n"; + break; + } + // (1 << 1) byte aligned + EmitAlignment(1, F, 1); + if (TAI->hasDotTypeDotSizeDirective()) { + O << "\t.type " << CurrentFnName << ",@function\n"; + } + O << CurrentFnName << ":\n"; +} + +/// Emit the directives on the end of functions +void XCoreAsmPrinter:: +emitFunctionEnd(MachineFunction &MF) +{ + // Mark the end of the function + O << "\t.cc_bottom " << CurrentFnName << ".function\n"; +} + +/// runOnMachineFunction - This uses the printMachineInstruction() +/// method to print assembly for each instruction. +/// +bool XCoreAsmPrinter::runOnMachineFunction(MachineFunction &MF) +{ + this->MF = &MF; + + SetupMachineFunction(MF); + + // Print out constants referenced by the function + EmitConstantPool(MF.getConstantPool()); + + // Print out jump tables referenced by the function + EmitJumpTableInfo(MF.getJumpTableInfo(), MF); + + // Emit the function start directives + emitFunctionStart(MF); + + // Emit pre-function debug information. + DW->BeginFunction(&MF); + + // Print out code for the function. + for (MachineFunction::const_iterator I = MF.begin(), E = MF.end(); + I != E; ++I) { + + // Print a label for the basic block. + if (I != MF.begin()) { + printBasicBlockLabel(I, true , true); + O << '\n'; + } + + for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end(); + II != E; ++II) { + // Print the assembly for the instruction. + O << "\t"; + printMachineInstruction(II); + } + + // Each Basic Block is separated by a newline + O << '\n'; + } + + // Emit function end directives + emitFunctionEnd(MF); + + // Emit post-function debug information. + DW->EndFunction(&MF); + + // We didn't modify anything. + return false; +} + +void XCoreAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum) +{ + printOperand(MI, opNum); + + if (MI->getOperand(opNum+1).isImm() + && MI->getOperand(opNum+1).getImm() == 0) + return; + + O << "+"; + printOperand(MI, opNum+1); +} + +void XCoreAsmPrinter::printOperand(const MachineInstr *MI, int opNum) { + const MachineOperand &MO = MI->getOperand(opNum); + switch (MO.getType()) { + case MachineOperand::MO_Register: + if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) + O << TM.getRegisterInfo()->get(MO.getReg()).AsmName; + else + assert(0 && "not implemented"); + break; + case MachineOperand::MO_Immediate: + O << MO.getImm(); + break; + case MachineOperand::MO_MachineBasicBlock: + printBasicBlockLabel(MO.getMBB()); + break; + case MachineOperand::MO_GlobalAddress: + { + const GlobalValue *GV = MO.getGlobal(); + O << Mang->getValueName(GV); + if (GV->hasExternalWeakLinkage()) + ExtWeakSymbols.insert(GV); + } + break; + case MachineOperand::MO_ExternalSymbol: + O << MO.getSymbolName(); + break; + case MachineOperand::MO_ConstantPoolIndex: + O << TAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() + << '_' << MO.getIndex(); + break; + case MachineOperand::MO_JumpTableIndex: + O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() + << '_' << MO.getIndex(); + break; + default: + assert(0 && "not implemented"); + } +} + +/// PrintAsmOperand - Print out an operand for an inline asm expression. +/// +bool XCoreAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, + const char *ExtraCode) { + printOperand(MI, OpNo); + return false; +} + +void XCoreAsmPrinter::printMachineInstruction(const MachineInstr *MI) { + ++EmittedInsts; + + // Check for mov mnemonic + unsigned src, dst, srcSR, dstSR; + if (TM.getInstrInfo()->isMoveInstr(*MI, src, dst, srcSR, dstSR)) { + O << "\tmov "; + O << TM.getRegisterInfo()->get(dst).AsmName; + O << ", "; + O << TM.getRegisterInfo()->get(src).AsmName; + O << "\n"; + return; + } + if (printInstruction(MI)) { + return; + } + assert(0 && "Unhandled instruction in asm writer!"); +} + +bool XCoreAsmPrinter::doInitialization(Module &M) { + bool Result = AsmPrinter::doInitialization(M); + + if (!FileDirective.empty()) { + emitFileDirective(FileDirective); + } + + // Print out type strings for external functions here + for (Module::const_iterator I = M.begin(), E = M.end(); + I != E; ++I) { + if (I->isDeclaration() && !I->isIntrinsic()) { + switch (I->getLinkage()) { + default: + assert(0 && "Unexpected linkage"); + case Function::ExternalWeakLinkage: + ExtWeakSymbols.insert(I); + // fallthrough + case Function::ExternalLinkage: + break; + } + } + } + + // Emit initial debug information. + DW = getAnalysisIfAvailable(); + assert(DW && "Dwarf Writer is not available"); + DW->BeginModule(&M, getAnalysisIfAvailable(), + O, this, TAI); + return Result; +} + +bool XCoreAsmPrinter::doFinalization(Module &M) { + + // Print out module-level global variables. + for (Module::const_global_iterator I = M.global_begin(), E = M.global_end(); + I != E; ++I) { + emitGlobal(I); + } + + // Emit final debug information. + DW->EndModule(); + + return AsmPrinter::doFinalization(M); +} diff --git a/lib/Target/XCore/XCoreCallingConv.td b/lib/Target/XCore/XCoreCallingConv.td new file mode 100644 index 000000000000..8107e329bd58 --- /dev/null +++ b/lib/Target/XCore/XCoreCallingConv.td @@ -0,0 +1,33 @@ +//===- XCoreCallingConv.td - Calling Conventions for XCore -*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This describes the calling conventions for XCore architecture. +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// XCore Return Value Calling Convention +//===----------------------------------------------------------------------===// +def RetCC_XCore : CallingConv<[ + // i32 are returned in registers R0, R1, R2, R3 + CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>> +]>; + +//===----------------------------------------------------------------------===// +// XCore Argument Calling Conventions +//===----------------------------------------------------------------------===// +def CC_XCore : CallingConv<[ + // Promote i8/i16 arguments to i32. + CCIfType<[i8, i16], CCPromoteToType>, + + // The first 4 integer arguments are passed in integer registers. + CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>, + + // Integer values get stored in stack slots that are 4 bytes in + // size and 4-byte aligned. + CCIfType<[i32], CCAssignToStack<4, 4>> +]>; diff --git a/lib/Target/XCore/XCoreFrameInfo.cpp b/lib/Target/XCore/XCoreFrameInfo.cpp new file mode 100644 index 000000000000..f50dc96c6ba9 --- /dev/null +++ b/lib/Target/XCore/XCoreFrameInfo.cpp @@ -0,0 +1,27 @@ +//===-- XCoreFrameInfo.cpp - Frame info for XCore Target ---------*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains XCore frame information that doesn't fit anywhere else +// cleanly... +// +//===----------------------------------------------------------------------===// + +#include "XCore.h" +#include "XCoreFrameInfo.h" +using namespace llvm; + +//===----------------------------------------------------------------------===// +// XCoreFrameInfo: +//===----------------------------------------------------------------------===// + +XCoreFrameInfo::XCoreFrameInfo(const TargetMachine &tm): + TargetFrameInfo(TargetFrameInfo::StackGrowsDown, 4, 0) +{ + // Do nothing +} diff --git a/lib/Target/XCore/XCoreFrameInfo.h b/lib/Target/XCore/XCoreFrameInfo.h new file mode 100644 index 000000000000..2c67577181ec --- /dev/null +++ b/lib/Target/XCore/XCoreFrameInfo.h @@ -0,0 +1,34 @@ +//===-- XCoreFrameInfo.h - Frame info for XCore Target -----------*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains XCore frame information that doesn't fit anywhere else +// cleanly... +// +//===----------------------------------------------------------------------===// + +#ifndef XCOREFRAMEINFO_H +#define XCOREFRAMEINFO_H + +#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetMachine.h" + +namespace llvm { + class XCoreFrameInfo: public TargetFrameInfo { + + public: + XCoreFrameInfo(const TargetMachine &tm); + + //! Stack slot size (4 bytes) + static int stackSlotSize() { + return 4; + } + }; +} + +#endif // XCOREFRAMEINFO_H diff --git a/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/lib/Target/XCore/XCoreISelDAGToDAG.cpp new file mode 100644 index 000000000000..eed34a4b635b --- /dev/null +++ b/lib/Target/XCore/XCoreISelDAGToDAG.cpp @@ -0,0 +1,230 @@ +//===-- XCoreISelDAGToDAG.cpp - A dag to dag inst selector for XCore ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines an instruction selector for the XCore target. +// +//===----------------------------------------------------------------------===// + +#include "XCore.h" +#include "XCoreISelLowering.h" +#include "XCoreTargetMachine.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/Intrinsics.h" +#include "llvm/CallingConv.h" +#include "llvm/Constants.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include +#include +using namespace llvm; + +/// XCoreDAGToDAGISel - XCore specific code to select XCore machine +/// instructions for SelectionDAG operations. +/// +namespace { + class XCoreDAGToDAGISel : public SelectionDAGISel { + XCoreTargetLowering &Lowering; + const XCoreSubtarget &Subtarget; + + public: + XCoreDAGToDAGISel(XCoreTargetMachine &TM) + : SelectionDAGISel(TM), + Lowering(*TM.getTargetLowering()), + Subtarget(*TM.getSubtargetImpl()) { } + + SDNode *Select(SDValue Op); + + /// getI32Imm - Return a target constant with the specified value, of type + /// i32. + inline SDValue getI32Imm(unsigned Imm) { + return CurDAG->getTargetConstant(Imm, MVT::i32); + } + + // Complex Pattern Selectors. + bool SelectADDRspii(SDValue Op, SDValue Addr, SDValue &Base, + SDValue &Offset); + bool SelectADDRdpii(SDValue Op, SDValue Addr, SDValue &Base, + SDValue &Offset); + bool SelectADDRcpii(SDValue Op, SDValue Addr, SDValue &Base, + SDValue &Offset); + + virtual void InstructionSelect(); + + virtual const char *getPassName() const { + return "XCore DAG->DAG Pattern Instruction Selection"; + } + + // Include the pieces autogenerated from the target description. + #include "XCoreGenDAGISel.inc" + }; +} // end anonymous namespace + +/// createXCoreISelDag - This pass converts a legalized DAG into a +/// XCore-specific DAG, ready for instruction scheduling. +/// +FunctionPass *llvm::createXCoreISelDag(XCoreTargetMachine &TM) { + return new XCoreDAGToDAGISel(TM); +} + +bool XCoreDAGToDAGISel::SelectADDRspii(SDValue Op, SDValue Addr, + SDValue &Base, SDValue &Offset) { + FrameIndexSDNode *FIN = 0; + if ((FIN = dyn_cast(Addr))) { + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); + Offset = CurDAG->getTargetConstant(0, MVT::i32); + return true; + } + if (Addr.getOpcode() == ISD::ADD) { + ConstantSDNode *CN = 0; + if ((FIN = dyn_cast(Addr.getOperand(0))) + && (CN = dyn_cast(Addr.getOperand(1))) + && (CN->getSExtValue() % 4 == 0 && CN->getSExtValue() >= 0)) { + // Constant positive word offset from frame index + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); + Offset = CurDAG->getTargetConstant(CN->getSExtValue(), MVT::i32); + return true; + } + } + return false; +} + +bool XCoreDAGToDAGISel::SelectADDRdpii(SDValue Op, SDValue Addr, + SDValue &Base, SDValue &Offset) { + if (Addr.getOpcode() == XCoreISD::DPRelativeWrapper) { + Base = Addr.getOperand(0); + Offset = CurDAG->getTargetConstant(0, MVT::i32); + return true; + } + if (Addr.getOpcode() == ISD::ADD) { + ConstantSDNode *CN = 0; + if ((Addr.getOperand(0).getOpcode() == XCoreISD::DPRelativeWrapper) + && (CN = dyn_cast(Addr.getOperand(1))) + && (CN->getSExtValue() % 4 == 0)) { + // Constant word offset from a object in the data region + Base = Addr.getOperand(0).getOperand(0); + Offset = CurDAG->getTargetConstant(CN->getSExtValue(), MVT::i32); + return true; + } + } + return false; +} + +bool XCoreDAGToDAGISel::SelectADDRcpii(SDValue Op, SDValue Addr, + SDValue &Base, SDValue &Offset) { + if (Addr.getOpcode() == XCoreISD::CPRelativeWrapper) { + Base = Addr.getOperand(0); + Offset = CurDAG->getTargetConstant(0, MVT::i32); + return true; + } + if (Addr.getOpcode() == ISD::ADD) { + ConstantSDNode *CN = 0; + if ((Addr.getOperand(0).getOpcode() == XCoreISD::CPRelativeWrapper) + && (CN = dyn_cast(Addr.getOperand(1))) + && (CN->getSExtValue() % 4 == 0)) { + // Constant word offset from a object in the data region + Base = Addr.getOperand(0).getOperand(0); + Offset = CurDAG->getTargetConstant(CN->getSExtValue(), MVT::i32); + return true; + } + } + return false; +} + +/// InstructionSelect - This callback is invoked by +/// SelectionDAGISel when it has created a SelectionDAG for us to codegen. +void XCoreDAGToDAGISel:: +InstructionSelect() { + DEBUG(BB->dump()); + + // Select target instructions for the DAG. + SelectRoot(*CurDAG); + + CurDAG->RemoveDeadNodes(); +} + +SDNode *XCoreDAGToDAGISel::Select(SDValue Op) { + SDNode *N = Op.getNode(); + DebugLoc dl = N->getDebugLoc(); + MVT NVT = N->getValueType(0); + if (NVT == MVT::i32) { + switch (N->getOpcode()) { + default: break; + case ISD::Constant: { + if (Predicate_immMskBitp(N)) { + SDValue MskSize = Transform_msksize_xform(N); + return CurDAG->getTargetNode(XCore::MKMSK_rus, dl, MVT::i32, MskSize); + } + else if (! Predicate_immU16(N)) { + unsigned Val = cast(N)->getZExtValue(); + SDValue CPIdx = + CurDAG->getTargetConstantPool(ConstantInt::get(Type::Int32Ty, Val), + TLI.getPointerTy()); + return CurDAG->getTargetNode(XCore::LDWCP_lru6, dl, MVT::i32, + MVT::Other, CPIdx, + CurDAG->getEntryNode()); + } + break; + } + case ISD::SMUL_LOHI: { + // FIXME fold addition into the macc instruction + if (!Subtarget.isXS1A()) { + SDValue Zero(CurDAG->getTargetNode(XCore::LDC_ru6, dl, MVT::i32, + CurDAG->getTargetConstant(0, MVT::i32)), 0); + SDValue Ops[] = { Zero, Zero, Op.getOperand(0), Op.getOperand(1) }; + SDNode *ResNode = CurDAG->getTargetNode(XCore::MACCS_l4r, dl, + MVT::i32, MVT::i32, Ops, 4); + ReplaceUses(SDValue(N, 0), SDValue(ResNode, 1)); + ReplaceUses(SDValue(N, 1), SDValue(ResNode, 0)); + return NULL; + } + break; + } + case ISD::UMUL_LOHI: { + // FIXME fold addition into the macc / lmul instruction + SDValue Zero(CurDAG->getTargetNode(XCore::LDC_ru6, dl, MVT::i32, + CurDAG->getTargetConstant(0, MVT::i32)), 0); + SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1), + Zero, Zero }; + SDNode *ResNode = CurDAG->getTargetNode(XCore::LMUL_l6r, dl, MVT::i32, + MVT::i32, Ops, 4); + ReplaceUses(SDValue(N, 0), SDValue(ResNode, 1)); + ReplaceUses(SDValue(N, 1), SDValue(ResNode, 0)); + return NULL; + } + case XCoreISD::LADD: { + if (!Subtarget.isXS1A()) { + SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1), + Op.getOperand(2) }; + return CurDAG->getTargetNode(XCore::LADD_l5r, dl, MVT::i32, MVT::i32, + Ops, 3); + } + break; + } + case XCoreISD::LSUB: { + if (!Subtarget.isXS1A()) { + SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1), + Op.getOperand(2) }; + return CurDAG->getTargetNode(XCore::LSUB_l5r, dl, MVT::i32, MVT::i32, + Ops, 3); + } + break; + } + // Other cases are autogenerated. + } + } + return SelectCode(Op); +} diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp new file mode 100644 index 000000000000..93c5f59f642b --- /dev/null +++ b/lib/Target/XCore/XCoreISelLowering.cpp @@ -0,0 +1,934 @@ +//===-- XCoreISelLowering.cpp - XCore DAG Lowering Implementation ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the XCoreTargetLowering class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "xcore-lower" + +#include "XCoreISelLowering.h" +#include "XCoreMachineFunctionInfo.h" +#include "XCore.h" +#include "XCoreTargetMachine.h" +#include "XCoreSubtarget.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/Intrinsics.h" +#include "llvm/CallingConv.h" +#include "llvm/GlobalVariable.h" +#include "llvm/GlobalAlias.h" +#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/VectorExtras.h" +#include +#include +using namespace llvm; + +const char *XCoreTargetLowering:: +getTargetNodeName(unsigned Opcode) const +{ + switch (Opcode) + { + case XCoreISD::BL : return "XCoreISD::BL"; + case XCoreISD::PCRelativeWrapper : return "XCoreISD::PCRelativeWrapper"; + case XCoreISD::DPRelativeWrapper : return "XCoreISD::DPRelativeWrapper"; + case XCoreISD::CPRelativeWrapper : return "XCoreISD::CPRelativeWrapper"; + case XCoreISD::STWSP : return "XCoreISD::STWSP"; + case XCoreISD::RETSP : return "XCoreISD::RETSP"; + default : return NULL; + } +} + +XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM) + : TargetLowering(XTM), + TM(XTM), + Subtarget(*XTM.getSubtargetImpl()) { + + // Set up the register classes. + addRegisterClass(MVT::i32, XCore::GRRegsRegisterClass); + + // Compute derived properties from the register classes + computeRegisterProperties(); + + // Division is expensive + setIntDivIsCheap(false); + + setShiftAmountType(MVT::i32); + // shl X, 32 == 0 + setShiftAmountFlavor(Extend); + setStackPointerRegisterToSaveRestore(XCore::SP); + + setSchedulingPreference(SchedulingForRegPressure); + + // Use i32 for setcc operations results (slt, sgt, ...). + setBooleanContents(ZeroOrOneBooleanContent); + + // XCore does not have the NodeTypes below. + setOperationAction(ISD::BR_CC, MVT::Other, Expand); + setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); + setOperationAction(ISD::ADDC, MVT::i32, Expand); + setOperationAction(ISD::ADDE, MVT::i32, Expand); + setOperationAction(ISD::SUBC, MVT::i32, Expand); + setOperationAction(ISD::SUBE, MVT::i32, Expand); + + // Stop the combiner recombining select and set_cc + setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); + + // 64bit + if (!Subtarget.isXS1A()) { + setOperationAction(ISD::ADD, MVT::i64, Custom); + setOperationAction(ISD::SUB, MVT::i64, Custom); + } + if (Subtarget.isXS1A()) { + setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); + } + setOperationAction(ISD::MULHS, MVT::i32, Expand); + setOperationAction(ISD::MULHU, MVT::i32, Expand); + setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand); + setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand); + setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand); + + // Bit Manipulation + setOperationAction(ISD::CTPOP, MVT::i32, Expand); + setOperationAction(ISD::ROTL , MVT::i32, Expand); + setOperationAction(ISD::ROTR , MVT::i32, Expand); + + setOperationAction(ISD::TRAP, MVT::Other, Legal); + + // Expand jump tables for now + setOperationAction(ISD::BR_JT, MVT::Other, Expand); + setOperationAction(ISD::JumpTable, MVT::i32, Custom); + + // RET must be custom lowered, to meet ABI requirements + setOperationAction(ISD::RET, MVT::Other, Custom); + + setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); + + // Thread Local Storage + setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); + + // Conversion of i64 -> double produces constantpool nodes + setOperationAction(ISD::ConstantPool, MVT::i32, Custom); + + // Loads + setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote); + setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); + setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); + + setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Expand); + setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Expand); + + // Varargs + setOperationAction(ISD::VAEND, MVT::Other, Expand); + setOperationAction(ISD::VACOPY, MVT::Other, Expand); + setOperationAction(ISD::VAARG, MVT::Other, Custom); + setOperationAction(ISD::VASTART, MVT::Other, Custom); + + // Dynamic stack + setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); + setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); + + // Debug + setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand); + setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand); +} + +SDValue XCoreTargetLowering:: +LowerOperation(SDValue Op, SelectionDAG &DAG) { + switch (Op.getOpcode()) + { + case ISD::CALL: return LowerCALL(Op, DAG); + case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG); + case ISD::RET: return LowerRET(Op, DAG); + case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); + case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); + case ISD::ConstantPool: return LowerConstantPool(Op, DAG); + case ISD::JumpTable: return LowerJumpTable(Op, DAG); + case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); + case ISD::VAARG: return LowerVAARG(Op, DAG); + case ISD::VASTART: return LowerVASTART(Op, DAG); + // FIXME: Remove these when LegalizeDAGTypes lands. + case ISD::ADD: + case ISD::SUB: return ExpandADDSUB(Op.getNode(), DAG); + case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); + default: + assert(0 && "unimplemented operand"); + return SDValue(); + } +} + +/// ReplaceNodeResults - Replace the results of node with an illegal result +/// type with new values built out of custom code. +void XCoreTargetLowering::ReplaceNodeResults(SDNode *N, + SmallVectorImpl&Results, + SelectionDAG &DAG) { + switch (N->getOpcode()) { + default: + assert(0 && "Don't know how to custom expand this!"); + return; + case ISD::ADD: + case ISD::SUB: + Results.push_back(ExpandADDSUB(N, DAG)); + return; + } +} + +//===----------------------------------------------------------------------===// +// Misc Lower Operation implementation +//===----------------------------------------------------------------------===// + +SDValue XCoreTargetLowering:: +LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) +{ + DebugLoc dl = Op.getDebugLoc(); + SDValue Cond = DAG.getNode(ISD::SETCC, dl, MVT::i32, Op.getOperand(2), + Op.getOperand(3), Op.getOperand(4)); + return DAG.getNode(ISD::SELECT, dl, MVT::i32, Cond, Op.getOperand(0), + Op.getOperand(1)); +} + +SDValue XCoreTargetLowering:: +getGlobalAddressWrapper(SDValue GA, GlobalValue *GV, SelectionDAG &DAG) +{ + // FIXME there is no actual debug info here + DebugLoc dl = GA.getDebugLoc(); + if (isa(GV)) { + return DAG.getNode(XCoreISD::PCRelativeWrapper, dl, MVT::i32, GA); + } else if (!Subtarget.isXS1A()) { + const GlobalVariable *GVar = dyn_cast(GV); + if (!GVar) { + // If GV is an alias then use the aliasee to determine constness + if (const GlobalAlias *GA = dyn_cast(GV)) + GVar = dyn_cast_or_null(GA->resolveAliasedGlobal()); + } + bool isConst = GVar && GVar->isConstant(); + if (isConst) { + return DAG.getNode(XCoreISD::CPRelativeWrapper, dl, MVT::i32, GA); + } + } + return DAG.getNode(XCoreISD::DPRelativeWrapper, dl, MVT::i32, GA); +} + +SDValue XCoreTargetLowering:: +LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) +{ + GlobalValue *GV = cast(Op)->getGlobal(); + SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32); + // If it's a debug information descriptor, don't mess with it. + if (DAG.isVerifiedDebugInfoDesc(Op)) + return GA; + return getGlobalAddressWrapper(GA, GV, DAG); +} + +static inline SDValue BuildGetId(SelectionDAG &DAG, DebugLoc dl) { + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32, + DAG.getConstant(Intrinsic::xcore_getid, MVT::i32)); +} + +static inline bool isZeroLengthArray(const Type *Ty) { + const ArrayType *AT = dyn_cast_or_null(Ty); + return AT && (AT->getNumElements() == 0); +} + +SDValue XCoreTargetLowering:: +LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) +{ + // FIXME there isn't really debug info here + DebugLoc dl = Op.getDebugLoc(); + // transform to label + getid() * size + GlobalValue *GV = cast(Op)->getGlobal(); + SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32); + const GlobalVariable *GVar = dyn_cast(GV); + if (!GVar) { + // If GV is an alias then use the aliasee to determine size + if (const GlobalAlias *GA = dyn_cast(GV)) + GVar = dyn_cast_or_null(GA->resolveAliasedGlobal()); + } + if (! GVar) { + assert(0 && "Thread local object not a GlobalVariable?"); + return SDValue(); + } + const Type *Ty = cast(GV->getType())->getElementType(); + if (!Ty->isSized() || isZeroLengthArray(Ty)) { + cerr << "Size of thread local object " << GVar->getName() + << " is unknown\n"; + abort(); + } + SDValue base = getGlobalAddressWrapper(GA, GV, DAG); + const TargetData *TD = TM.getTargetData(); + unsigned Size = TD->getTypeAllocSize(Ty); + SDValue offset = DAG.getNode(ISD::MUL, dl, MVT::i32, BuildGetId(DAG, dl), + DAG.getConstant(Size, MVT::i32)); + return DAG.getNode(ISD::ADD, dl, MVT::i32, base, offset); +} + +SDValue XCoreTargetLowering:: +LowerConstantPool(SDValue Op, SelectionDAG &DAG) +{ + ConstantPoolSDNode *CP = cast(Op); + // FIXME there isn't really debug info here + DebugLoc dl = CP->getDebugLoc(); + if (Subtarget.isXS1A()) { + assert(0 && "Lowering of constant pool unimplemented"); + return SDValue(); + } else { + MVT PtrVT = Op.getValueType(); + SDValue Res; + if (CP->isMachineConstantPoolEntry()) { + Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, + CP->getAlignment()); + } else { + Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, + CP->getAlignment()); + } + return DAG.getNode(XCoreISD::CPRelativeWrapper, dl, MVT::i32, Res); + } +} + +SDValue XCoreTargetLowering:: +LowerJumpTable(SDValue Op, SelectionDAG &DAG) +{ + // FIXME there isn't really debug info here + DebugLoc dl = Op.getDebugLoc(); + MVT PtrVT = Op.getValueType(); + JumpTableSDNode *JT = cast(Op); + SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); + return DAG.getNode(XCoreISD::DPRelativeWrapper, dl, MVT::i32, JTI); +} + +SDValue XCoreTargetLowering:: +ExpandADDSUB(SDNode *N, SelectionDAG &DAG) +{ + assert(N->getValueType(0) == MVT::i64 && + (N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) && + "Unknown operand to lower!"); + assert(!Subtarget.isXS1A() && "Cannot custom lower ADD/SUB on xs1a"); + DebugLoc dl = N->getDebugLoc(); + + // Extract components + SDValue LHSL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, + N->getOperand(0), DAG.getConstant(0, MVT::i32)); + SDValue LHSH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, + N->getOperand(0), DAG.getConstant(1, MVT::i32)); + SDValue RHSL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, + N->getOperand(1), DAG.getConstant(0, MVT::i32)); + SDValue RHSH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, + N->getOperand(1), DAG.getConstant(1, MVT::i32)); + + // Expand + unsigned Opcode = (N->getOpcode() == ISD::ADD) ? XCoreISD::LADD : + XCoreISD::LSUB; + SDValue Zero = DAG.getConstant(0, MVT::i32); + SDValue Carry = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32), + LHSL, RHSL, Zero); + SDValue Lo(Carry.getNode(), 1); + + SDValue Ignored = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32), + LHSH, RHSH, Carry); + SDValue Hi(Ignored.getNode(), 1); + // Merge the pieces + return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi); +} + +SDValue XCoreTargetLowering:: +LowerVAARG(SDValue Op, SelectionDAG &DAG) +{ + assert(0 && "unimplemented"); + // FIX Arguments passed by reference need a extra dereference. + SDNode *Node = Op.getNode(); + DebugLoc dl = Node->getDebugLoc(); + const Value *V = cast(Node->getOperand(2))->getValue(); + MVT VT = Node->getValueType(0); + SDValue VAList = DAG.getLoad(getPointerTy(), dl, Node->getOperand(0), + Node->getOperand(1), V, 0); + // Increment the pointer, VAList, to the next vararg + SDValue Tmp3 = DAG.getNode(ISD::ADD, dl, getPointerTy(), VAList, + DAG.getConstant(VT.getSizeInBits(), + getPointerTy())); + // Store the incremented VAList to the legalized pointer + Tmp3 = DAG.getStore(VAList.getValue(1), dl, Tmp3, Node->getOperand(1), V, 0); + // Load the actual argument out of the pointer VAList + return DAG.getLoad(VT, dl, Tmp3, VAList, NULL, 0); +} + +SDValue XCoreTargetLowering:: +LowerVASTART(SDValue Op, SelectionDAG &DAG) +{ + DebugLoc dl = Op.getDebugLoc(); + // vastart stores the address of the VarArgsFrameIndex slot into the + // memory location argument + MachineFunction &MF = DAG.getMachineFunction(); + XCoreFunctionInfo *XFI = MF.getInfo(); + SDValue Addr = DAG.getFrameIndex(XFI->getVarArgsFrameIndex(), MVT::i32); + const Value *SV = cast(Op.getOperand(2))->getValue(); + return DAG.getStore(Op.getOperand(0), dl, Addr, Op.getOperand(1), SV, 0); +} + +SDValue XCoreTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) { + DebugLoc dl = Op.getDebugLoc(); + // Depths > 0 not supported yet! + if (cast(Op.getOperand(0))->getZExtValue() > 0) + return SDValue(); + + MachineFunction &MF = DAG.getMachineFunction(); + const TargetRegisterInfo *RegInfo = getTargetMachine().getRegisterInfo(); + return DAG.getCopyFromReg(DAG.getEntryNode(), dl, + RegInfo->getFrameRegister(MF), MVT::i32); +} + +//===----------------------------------------------------------------------===// +// Calling Convention Implementation +// +// The lower operations present on calling convention works on this order: +// LowerCALL (virt regs --> phys regs, virt regs --> stack) +// LowerFORMAL_ARGUMENTS (phys --> virt regs, stack --> virt regs) +// LowerRET (virt regs --> phys regs) +// LowerCALL (phys regs --> virt regs) +// +//===----------------------------------------------------------------------===// + +#include "XCoreGenCallingConv.inc" + +//===----------------------------------------------------------------------===// +// CALL Calling Convention Implementation +//===----------------------------------------------------------------------===// + +/// XCore custom CALL implementation +SDValue XCoreTargetLowering:: +LowerCALL(SDValue Op, SelectionDAG &DAG) +{ + CallSDNode *TheCall = cast(Op.getNode()); + unsigned CallingConv = TheCall->getCallingConv(); + // For now, only CallingConv::C implemented + switch (CallingConv) + { + default: + assert(0 && "Unsupported calling convention"); + case CallingConv::Fast: + case CallingConv::C: + return LowerCCCCallTo(Op, DAG, CallingConv); + } +} + +/// LowerCCCCallTo - functions arguments are copied from virtual +/// regs to (physical regs)/(stack frame), CALLSEQ_START and +/// CALLSEQ_END are emitted. +/// TODO: isTailCall, sret. +SDValue XCoreTargetLowering:: +LowerCCCCallTo(SDValue Op, SelectionDAG &DAG, unsigned CC) +{ + CallSDNode *TheCall = cast(Op.getNode()); + SDValue Chain = TheCall->getChain(); + SDValue Callee = TheCall->getCallee(); + bool isVarArg = TheCall->isVarArg(); + DebugLoc dl = Op.getDebugLoc(); + + // Analyze operands of the call, assigning locations to each operand. + SmallVector ArgLocs; + CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); + + // The ABI dictates there should be one stack slot available to the callee + // on function entry (for saving lr). + CCInfo.AllocateStack(4, 4); + + CCInfo.AnalyzeCallOperands(TheCall, CC_XCore); + + // Get a count of how many bytes are to be pushed on the stack. + unsigned NumBytes = CCInfo.getNextStackOffset(); + + Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, + getPointerTy(), true)); + + SmallVector, 4> RegsToPass; + SmallVector MemOpChains; + + // Walk the register/memloc assignments, inserting copies/loads. + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + + // Arguments start after the 5 first operands of ISD::CALL + SDValue Arg = TheCall->getArg(i); + + // Promote the value if needed. + switch (VA.getLocInfo()) { + default: assert(0 && "Unknown loc info!"); + case CCValAssign::Full: break; + case CCValAssign::SExt: + Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); + break; + case CCValAssign::ZExt: + Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); + break; + case CCValAssign::AExt: + Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); + break; + } + + // Arguments that can be passed on register must be kept at + // RegsToPass vector + if (VA.isRegLoc()) { + RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); + } else { + assert(VA.isMemLoc()); + + int Offset = VA.getLocMemOffset(); + + MemOpChains.push_back(DAG.getNode(XCoreISD::STWSP, dl, MVT::Other, + Chain, Arg, + DAG.getConstant(Offset/4, MVT::i32))); + } + } + + // Transform all store nodes into one single node because + // all store nodes are independent of each other. + if (!MemOpChains.empty()) + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &MemOpChains[0], MemOpChains.size()); + + // Build a sequence of copy-to-reg nodes chained together with token + // chain and flag operands which copy the outgoing args into registers. + // The InFlag in necessary since all emited instructions must be + // stuck together. + SDValue InFlag; + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { + Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, + RegsToPass[i].second, InFlag); + InFlag = Chain.getValue(1); + } + + // If the callee is a GlobalAddress node (quite common, every direct call is) + // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. + // Likewise ExternalSymbol -> TargetExternalSymbol. + if (GlobalAddressSDNode *G = dyn_cast(Callee)) + Callee = DAG.getTargetGlobalAddress(G->getGlobal(), MVT::i32); + else if (ExternalSymbolSDNode *E = dyn_cast(Callee)) + Callee = DAG.getTargetExternalSymbol(E->getSymbol(), MVT::i32); + + // XCoreBranchLink = #chain, #target_address, #opt_in_flags... + // = Chain, Callee, Reg#1, Reg#2, ... + // + // Returns a chain & a flag for retval copy to use. + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); + SmallVector Ops; + Ops.push_back(Chain); + Ops.push_back(Callee); + + // Add argument registers to the end of the list so that they are + // known live into the call. + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) + Ops.push_back(DAG.getRegister(RegsToPass[i].first, + RegsToPass[i].second.getValueType())); + + if (InFlag.getNode()) + Ops.push_back(InFlag); + + Chain = DAG.getNode(XCoreISD::BL, dl, NodeTys, &Ops[0], Ops.size()); + InFlag = Chain.getValue(1); + + // Create the CALLSEQ_END node. + Chain = DAG.getCALLSEQ_END(Chain, + DAG.getConstant(NumBytes, getPointerTy(), true), + DAG.getConstant(0, getPointerTy(), true), + InFlag); + InFlag = Chain.getValue(1); + + // Handle result values, copying them out of physregs into vregs that we + // return. + return SDValue(LowerCallResult(Chain, InFlag, TheCall, CC, DAG), + Op.getResNo()); +} + +/// LowerCallResult - Lower the result values of an ISD::CALL into the +/// appropriate copies out of appropriate physical registers. This assumes that +/// Chain/InFlag are the input chain/flag to use, and that TheCall is the call +/// being lowered. Returns a SDNode with the same number of values as the +/// ISD::CALL. +SDNode *XCoreTargetLowering:: +LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall, + unsigned CallingConv, SelectionDAG &DAG) { + bool isVarArg = TheCall->isVarArg(); + DebugLoc dl = TheCall->getDebugLoc(); + + // Assign locations to each value returned by this call. + SmallVector RVLocs; + CCState CCInfo(CallingConv, isVarArg, getTargetMachine(), RVLocs); + + CCInfo.AnalyzeCallResult(TheCall, RetCC_XCore); + SmallVector ResultVals; + + // Copy all of the result registers out of their specified physreg. + for (unsigned i = 0; i != RVLocs.size(); ++i) { + Chain = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(), + RVLocs[i].getValVT(), InFlag).getValue(1); + InFlag = Chain.getValue(2); + ResultVals.push_back(Chain.getValue(0)); + } + + ResultVals.push_back(Chain); + + // Merge everything together with a MERGE_VALUES node. + return DAG.getNode(ISD::MERGE_VALUES, dl, TheCall->getVTList(), + &ResultVals[0], ResultVals.size()).getNode(); +} + +//===----------------------------------------------------------------------===// +// FORMAL_ARGUMENTS Calling Convention Implementation +//===----------------------------------------------------------------------===// + +/// XCore custom FORMAL_ARGUMENTS implementation +SDValue XCoreTargetLowering:: +LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) +{ + unsigned CC = cast(Op.getOperand(1))->getZExtValue(); + switch(CC) + { + default: + assert(0 && "Unsupported calling convention"); + case CallingConv::C: + case CallingConv::Fast: + return LowerCCCArguments(Op, DAG); + } +} + +/// LowerCCCArguments - transform physical registers into +/// virtual registers and generate load operations for +/// arguments places on the stack. +/// TODO: sret +SDValue XCoreTargetLowering:: +LowerCCCArguments(SDValue Op, SelectionDAG &DAG) +{ + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + SDValue Root = Op.getOperand(0); + bool isVarArg = cast(Op.getOperand(2))->getZExtValue() != 0; + unsigned CC = MF.getFunction()->getCallingConv(); + DebugLoc dl = Op.getDebugLoc(); + + // Assign locations to all of the incoming arguments. + SmallVector ArgLocs; + CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); + + CCInfo.AnalyzeFormalArguments(Op.getNode(), CC_XCore); + + unsigned StackSlotSize = XCoreFrameInfo::stackSlotSize(); + + SmallVector ArgValues; + + unsigned LRSaveSize = StackSlotSize; + + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + + CCValAssign &VA = ArgLocs[i]; + + if (VA.isRegLoc()) { + // Arguments passed in registers + MVT RegVT = VA.getLocVT(); + switch (RegVT.getSimpleVT()) { + default: + cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: " + << RegVT.getSimpleVT() + << "\n"; + abort(); + case MVT::i32: + unsigned VReg = RegInfo.createVirtualRegister( + XCore::GRRegsRegisterClass); + RegInfo.addLiveIn(VA.getLocReg(), VReg); + ArgValues.push_back(DAG.getCopyFromReg(Root, dl, VReg, RegVT)); + } + } else { + // sanity check + assert(VA.isMemLoc()); + // Load the argument to a virtual register + unsigned ObjSize = VA.getLocVT().getSizeInBits()/8; + if (ObjSize > StackSlotSize) { + cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: " + << VA.getLocVT().getSimpleVT() + << "\n"; + } + // Create the frame index object for this incoming parameter... + int FI = MFI->CreateFixedObject(ObjSize, + LRSaveSize + VA.getLocMemOffset()); + + // Create the SelectionDAG nodes corresponding to a load + //from this parameter + SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); + ArgValues.push_back(DAG.getLoad(VA.getLocVT(), dl, Root, FIN, NULL, 0)); + } + } + + if (isVarArg) { + /* Argument registers */ + static const unsigned ArgRegs[] = { + XCore::R0, XCore::R1, XCore::R2, XCore::R3 + }; + XCoreFunctionInfo *XFI = MF.getInfo(); + unsigned FirstVAReg = CCInfo.getFirstUnallocated(ArgRegs, + array_lengthof(ArgRegs)); + if (FirstVAReg < array_lengthof(ArgRegs)) { + SmallVector MemOps; + int offset = 0; + // Save remaining registers, storing higher register numbers at a higher + // address + for (unsigned i = array_lengthof(ArgRegs) - 1; i >= FirstVAReg; --i) { + // Create a stack slot + int FI = MFI->CreateFixedObject(4, offset); + if (i == FirstVAReg) { + XFI->setVarArgsFrameIndex(FI); + } + offset -= StackSlotSize; + SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); + // Move argument from phys reg -> virt reg + unsigned VReg = RegInfo.createVirtualRegister( + XCore::GRRegsRegisterClass); + RegInfo.addLiveIn(ArgRegs[i], VReg); + SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, MVT::i32); + // Move argument from virt reg -> stack + SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0); + MemOps.push_back(Store); + } + if (!MemOps.empty()) + Root = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &MemOps[0], MemOps.size()); + } else { + // This will point to the next argument passed via stack. + XFI->setVarArgsFrameIndex( + MFI->CreateFixedObject(4, LRSaveSize + CCInfo.getNextStackOffset())); + } + } + + ArgValues.push_back(Root); + + // Return the new list of results. + std::vector RetVT(Op.getNode()->value_begin(), + Op.getNode()->value_end()); + return DAG.getNode(ISD::MERGE_VALUES, dl, RetVT, + &ArgValues[0], ArgValues.size()); +} + +//===----------------------------------------------------------------------===// +// Return Value Calling Convention Implementation +//===----------------------------------------------------------------------===// + +SDValue XCoreTargetLowering:: +LowerRET(SDValue Op, SelectionDAG &DAG) +{ + // CCValAssign - represent the assignment of + // the return value to a location + SmallVector RVLocs; + unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv(); + bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); + DebugLoc dl = Op.getDebugLoc(); + + // CCState - Info about the registers and stack slot. + CCState CCInfo(CC, isVarArg, getTargetMachine(), RVLocs); + + // Analize return values of ISD::RET + CCInfo.AnalyzeReturn(Op.getNode(), RetCC_XCore); + + // If this is the first return lowered for this function, add + // the regs to the liveout set for the function. + if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { + for (unsigned i = 0; i != RVLocs.size(); ++i) + if (RVLocs[i].isRegLoc()) + DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); + } + + // The chain is always operand #0 + SDValue Chain = Op.getOperand(0); + SDValue Flag; + + // Copy the result values into the output registers. + for (unsigned i = 0; i != RVLocs.size(); ++i) { + CCValAssign &VA = RVLocs[i]; + assert(VA.isRegLoc() && "Can only return in registers!"); + + // ISD::RET => ret chain, (regnum1,val1), ... + // So i*2+1 index only the regnums + Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), + Op.getOperand(i*2+1), Flag); + + // guarantee that all emitted copies are + // stuck together, avoiding something bad + Flag = Chain.getValue(1); + } + + // Return on XCore is always a "retsp 0" + if (Flag.getNode()) + return DAG.getNode(XCoreISD::RETSP, dl, MVT::Other, + Chain, DAG.getConstant(0, MVT::i32), Flag); + else // Return Void + return DAG.getNode(XCoreISD::RETSP, dl, MVT::Other, + Chain, DAG.getConstant(0, MVT::i32)); +} + +//===----------------------------------------------------------------------===// +// Other Lowering Code +//===----------------------------------------------------------------------===// + +MachineBasicBlock * +XCoreTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *BB) const { + const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo(); + DebugLoc dl = MI->getDebugLoc(); + assert((MI->getOpcode() == XCore::SELECT_CC) && + "Unexpected instr type to insert"); + + // To "insert" a SELECT_CC instruction, we actually have to insert the diamond + // control-flow pattern. The incoming instruction knows the destination vreg + // to set, the condition code register to branch on, the true/false values to + // select between, and a branch opcode to use. + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction::iterator It = BB; + ++It; + + // thisMBB: + // ... + // TrueVal = ... + // cmpTY ccX, r1, r2 + // bCC copy1MBB + // fallthrough --> copy0MBB + MachineBasicBlock *thisMBB = BB; + MachineFunction *F = BB->getParent(); + MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); + BuildMI(BB, dl, TII.get(XCore::BRFT_lru6)) + .addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB); + F->insert(It, copy0MBB); + F->insert(It, sinkMBB); + // Update machine-CFG edges by transferring all successors of the current + // block to the new block which will contain the Phi node for the select. + sinkMBB->transferSuccessors(BB); + // Next, add the true and fallthrough blocks as its successors. + BB->addSuccessor(copy0MBB); + BB->addSuccessor(sinkMBB); + + // copy0MBB: + // %FalseValue = ... + // # fallthrough to sinkMBB + BB = copy0MBB; + + // Update machine-CFG edges + BB->addSuccessor(sinkMBB); + + // sinkMBB: + // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] + // ... + BB = sinkMBB; + BuildMI(BB, dl, TII.get(XCore::PHI), MI->getOperand(0).getReg()) + .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB) + .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); + + F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. + return BB; +} + +//===----------------------------------------------------------------------===// +// Addressing mode description hooks +//===----------------------------------------------------------------------===// + +static inline bool isImmUs(int64_t val) +{ + return (val >= 0 && val <= 11); +} + +static inline bool isImmUs2(int64_t val) +{ + return (val%2 == 0 && isImmUs(val/2)); +} + +static inline bool isImmUs4(int64_t val) +{ + return (val%4 == 0 && isImmUs(val/4)); +} + +/// isLegalAddressingMode - Return true if the addressing mode represented +/// by AM is legal for this target, for a load/store of the specified type. +bool +XCoreTargetLowering::isLegalAddressingMode(const AddrMode &AM, + const Type *Ty) const { + MVT VT = getValueType(Ty, true); + // Get expected value type after legalization + switch (VT.getSimpleVT()) { + // Legal load / stores + case MVT::i8: + case MVT::i16: + case MVT::i32: + break; + // Expand i1 -> i8 + case MVT::i1: + VT = MVT::i8; + break; + // Everything else is lowered to words + default: + VT = MVT::i32; + break; + } + if (AM.BaseGV) { + return VT == MVT::i32 && !AM.HasBaseReg && AM.Scale == 0 && + AM.BaseOffs%4 == 0; + } + + switch (VT.getSimpleVT()) { + default: + return false; + case MVT::i8: + // reg + imm + if (AM.Scale == 0) { + return isImmUs(AM.BaseOffs); + } + return AM.Scale == 1 && AM.BaseOffs == 0; + case MVT::i16: + // reg + imm + if (AM.Scale == 0) { + return isImmUs2(AM.BaseOffs); + } + return AM.Scale == 2 && AM.BaseOffs == 0; + case MVT::i32: + // reg + imm + if (AM.Scale == 0) { + return isImmUs4(AM.BaseOffs); + } + // reg + reg<<2 + return AM.Scale == 4 && AM.BaseOffs == 0; + } + + return false; +} + +//===----------------------------------------------------------------------===// +// XCore Inline Assembly Support +//===----------------------------------------------------------------------===// + +std::vector XCoreTargetLowering:: +getRegClassForInlineAsmConstraint(const std::string &Constraint, + MVT VT) const +{ + if (Constraint.size() != 1) + return std::vector(); + + switch (Constraint[0]) { + default : break; + case 'r': + return make_vector(XCore::R0, XCore::R1, XCore::R2, + XCore::R3, XCore::R4, XCore::R5, + XCore::R6, XCore::R7, XCore::R8, + XCore::R9, XCore::R10, XCore::R11, 0); + break; + } + return std::vector(); +} diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h new file mode 100644 index 000000000000..993ecbdc60a8 --- /dev/null +++ b/lib/Target/XCore/XCoreISelLowering.h @@ -0,0 +1,123 @@ +//===-- XCoreISelLowering.h - XCore DAG Lowering Interface ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interfaces that XCore uses to lower LLVM code into a +// selection DAG. +// +//===----------------------------------------------------------------------===// + +#ifndef XCOREISELLOWERING_H +#define XCOREISELLOWERING_H + +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/Target/TargetLowering.h" +#include "XCore.h" + +namespace llvm { + + // Forward delcarations + class XCoreSubtarget; + class XCoreTargetMachine; + + namespace XCoreISD { + enum NodeType { + // Start the numbering where the builtin ops and target ops leave off. + FIRST_NUMBER = ISD::BUILTIN_OP_END+XCore::INSTRUCTION_LIST_END, + + // Branch and link (call) + BL, + + // pc relative address + PCRelativeWrapper, + + // dp relative address + DPRelativeWrapper, + + // cp relative address + CPRelativeWrapper, + + // Store word to stack + STWSP, + + // Corresponds to retsp instruction + RETSP, + + // Corresponds to LADD instruction + LADD, + + // Corresponds to LSUB instruction + LSUB + }; + } + + //===--------------------------------------------------------------------===// + // TargetLowering Implementation + //===--------------------------------------------------------------------===// + class XCoreTargetLowering : public TargetLowering + { + public: + + explicit XCoreTargetLowering(XCoreTargetMachine &TM); + + /// LowerOperation - Provide custom lowering hooks for some operations. + virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG); + + /// ReplaceNodeResults - Replace the results of node with an illegal result + /// type with new values built out of custom code. + /// + virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl&Results, + SelectionDAG &DAG); + + /// getTargetNodeName - This method returns the name of a target specific + // DAG node. + virtual const char *getTargetNodeName(unsigned Opcode) const; + + virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *MBB) const; + + virtual bool isLegalAddressingMode(const AddrMode &AM, + const Type *Ty) const; + + private: + const XCoreTargetMachine &TM; + const XCoreSubtarget &Subtarget; + + // Lower Operand helpers + SDValue LowerCCCArguments(SDValue Op, SelectionDAG &DAG); + SDValue LowerCCCCallTo(SDValue Op, SelectionDAG &DAG, unsigned CC); + SDNode *LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode*TheCall, + unsigned CallingConv, SelectionDAG &DAG); + SDValue getReturnAddressFrameIndex(SelectionDAG &DAG); + SDValue getGlobalAddressWrapper(SDValue GA, GlobalValue *GV, + SelectionDAG &DAG); + + // Lower Operand specifics + SDValue LowerRET(SDValue Op, SelectionDAG &DAG); + SDValue LowerCALL(SDValue Op, SelectionDAG &DAG); + SDValue LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG); + SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG); + SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG); + SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG); + SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG); + SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG); + SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG); + SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG); + SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG); + + // Inline asm support + std::vector + getRegClassForInlineAsmConstraint(const std::string &Constraint, + MVT VT) const; + + // Expand specifics + SDValue ExpandADDSUB(SDNode *Op, SelectionDAG &DAG); + }; +} + +#endif // XCOREISELLOWERING_H diff --git a/lib/Target/XCore/XCoreInstrFormats.td b/lib/Target/XCore/XCoreInstrFormats.td new file mode 100644 index 000000000000..8002c993270c --- /dev/null +++ b/lib/Target/XCore/XCoreInstrFormats.td @@ -0,0 +1,120 @@ +//===- XCoreInstrFormats.td - XCore Instruction Formats ----*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Instruction format superclass +//===----------------------------------------------------------------------===// +class InstXCore pattern> + : Instruction { + field bits<32> Inst; + + let Namespace = "XCore"; + dag OutOperandList = outs; + dag InOperandList = ins; + let AsmString = asmstr; + let Pattern = pattern; +} + +// XCore pseudo instructions format +class PseudoInstXCore pattern> + : InstXCore; + +//===----------------------------------------------------------------------===// +// Instruction formats +//===----------------------------------------------------------------------===// + +class _F3R pattern> + : InstXCore { + let Inst{31-0} = 0; +} + +class _FL3R pattern> + : InstXCore { + let Inst{31-0} = 0; +} + +class _F2RUS pattern> + : InstXCore { + let Inst{31-0} = 0; +} + +class _FL2RUS pattern> + : InstXCore { + let Inst{31-0} = 0; +} + +class _FRU6 pattern> + : InstXCore { + let Inst{31-0} = 0; +} + +class _FLRU6 pattern> + : InstXCore { + let Inst{31-0} = 0; +} + +class _FU6 pattern> + : InstXCore { + let Inst{31-0} = 0; +} + +class _FLU6 pattern> + : InstXCore { + let Inst{31-0} = 0; +} + +class _FU10 pattern> + : InstXCore { + let Inst{31-0} = 0; +} + +class _FLU10 pattern> + : InstXCore { + let Inst{31-0} = 0; +} + +class _F2R pattern> + : InstXCore { + let Inst{31-0} = 0; +} + +class _FRUS pattern> + : InstXCore { + let Inst{31-0} = 0; +} + +class _FL2R pattern> + : InstXCore { + let Inst{31-0} = 0; +} + +class _F1R pattern> + : InstXCore { + let Inst{31-0} = 0; +} + +class _F0R pattern> + : InstXCore { + let Inst{31-0} = 0; +} + +class _L4R pattern> + : InstXCore { + let Inst{31-0} = 0; +} + +class _L5R pattern> + : InstXCore { + let Inst{31-0} = 0; +} + +class _L6R pattern> + : InstXCore { + let Inst{31-0} = 0; +} diff --git a/lib/Target/XCore/XCoreInstrInfo.cpp b/lib/Target/XCore/XCoreInstrInfo.cpp new file mode 100644 index 000000000000..504d2025edcf --- /dev/null +++ b/lib/Target/XCore/XCoreInstrInfo.cpp @@ -0,0 +1,524 @@ +//===- XCoreInstrInfo.cpp - XCore Instruction Information -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the XCore implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#include "XCoreMachineFunctionInfo.h" +#include "XCoreInstrInfo.h" +#include "XCore.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineLocation.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "XCoreGenInstrInfo.inc" +#include "llvm/Support/Debug.h" + +namespace llvm { +namespace XCore { + + // XCore Condition Codes + enum CondCode { + COND_TRUE, + COND_FALSE, + COND_INVALID + }; +} +} + +using namespace llvm; + +XCoreInstrInfo::XCoreInstrInfo(void) + : TargetInstrInfoImpl(XCoreInsts, array_lengthof(XCoreInsts)), + RI(*this) { +} + +static bool isZeroImm(const MachineOperand &op) { + return op.isImm() && op.getImm() == 0; +} + +/// Return true if the instruction is a register to register move and +/// leave the source and dest operands in the passed parameters. +/// +bool XCoreInstrInfo::isMoveInstr(const MachineInstr &MI, + unsigned &SrcReg, unsigned &DstReg, + unsigned &SrcSR, unsigned &DstSR) const { + SrcSR = DstSR = 0; // No sub-registers. + + // We look for 4 kinds of patterns here: + // add dst, src, 0 + // sub dst, src, 0 + // or dst, src, src + // and dst, src, src + if ((MI.getOpcode() == XCore::ADD_2rus || MI.getOpcode() == XCore::SUB_2rus) + && isZeroImm(MI.getOperand(2))) { + DstReg = MI.getOperand(0).getReg(); + SrcReg = MI.getOperand(1).getReg(); + return true; + } else if ((MI.getOpcode() == XCore::OR_3r || MI.getOpcode() == XCore::AND_3r) + && MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) { + DstReg = MI.getOperand(0).getReg(); + SrcReg = MI.getOperand(1).getReg(); + return true; + } + return false; +} + +/// isLoadFromStackSlot - If the specified machine instruction is a direct +/// load from a stack slot, return the virtual or physical register number of +/// the destination along with the FrameIndex of the loaded stack slot. If +/// not, return 0. This predicate must return 0 if the instruction has +/// any side effects other than loading from the stack slot. +unsigned +XCoreInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const{ + int Opcode = MI->getOpcode(); + if (Opcode == XCore::LDWFI) + { + if ((MI->getOperand(1).isFI()) && // is a stack slot + (MI->getOperand(2).isImm()) && // the imm is zero + (isZeroImm(MI->getOperand(2)))) + { + FrameIndex = MI->getOperand(1).getIndex(); + return MI->getOperand(0).getReg(); + } + } + return 0; +} + + /// isStoreToStackSlot - If the specified machine instruction is a direct + /// store to a stack slot, return the virtual or physical register number of + /// the source reg along with the FrameIndex of the loaded stack slot. If + /// not, return 0. This predicate must return 0 if the instruction has + /// any side effects other than storing to the stack slot. +unsigned +XCoreInstrInfo::isStoreToStackSlot(const MachineInstr *MI, + int &FrameIndex) const { + int Opcode = MI->getOpcode(); + if (Opcode == XCore::STWFI) + { + if ((MI->getOperand(1).isFI()) && // is a stack slot + (MI->getOperand(2).isImm()) && // the imm is zero + (isZeroImm(MI->getOperand(2)))) + { + FrameIndex = MI->getOperand(1).getIndex(); + return MI->getOperand(0).getReg(); + } + } + return 0; +} + +/// isInvariantLoad - Return true if the specified instruction (which is marked +/// mayLoad) is loading from a location whose value is invariant across the +/// function. For example, loading a value from the constant pool or from +/// from the argument area of a function if it does not change. This should +/// only return true of *all* loads the instruction does are invariant (if it +/// does multiple loads). +bool +XCoreInstrInfo::isInvariantLoad(const MachineInstr *MI) const { + // Loads from constants pools and loads from invariant argument slots are + // invariant + int Opcode = MI->getOpcode(); + if (Opcode == XCore::LDWCP_ru6 || Opcode == XCore::LDWCP_lru6) { + return MI->getOperand(1).isCPI(); + } + int FrameIndex; + if (isLoadFromStackSlot(MI, FrameIndex)) { + const MachineFrameInfo &MFI = + *MI->getParent()->getParent()->getFrameInfo(); + return MFI.isFixedObjectIndex(FrameIndex) && + MFI.isImmutableObjectIndex(FrameIndex); + } + return false; +} + +//===----------------------------------------------------------------------===// +// Branch Analysis +//===----------------------------------------------------------------------===// + +static inline bool IsBRU(unsigned BrOpc) { + return BrOpc == XCore::BRFU_u6 + || BrOpc == XCore::BRFU_lu6 + || BrOpc == XCore::BRBU_u6 + || BrOpc == XCore::BRBU_lu6; +} + +static inline bool IsBRT(unsigned BrOpc) { + return BrOpc == XCore::BRFT_ru6 + || BrOpc == XCore::BRFT_lru6 + || BrOpc == XCore::BRBT_ru6 + || BrOpc == XCore::BRBT_lru6; +} + +static inline bool IsBRF(unsigned BrOpc) { + return BrOpc == XCore::BRFF_ru6 + || BrOpc == XCore::BRFF_lru6 + || BrOpc == XCore::BRBF_ru6 + || BrOpc == XCore::BRBF_lru6; +} + +static inline bool IsCondBranch(unsigned BrOpc) { + return IsBRF(BrOpc) || IsBRT(BrOpc); +} + +/// GetCondFromBranchOpc - Return the XCore CC that matches +/// the correspondent Branch instruction opcode. +static XCore::CondCode GetCondFromBranchOpc(unsigned BrOpc) +{ + if (IsBRT(BrOpc)) { + return XCore::COND_TRUE; + } else if (IsBRF(BrOpc)) { + return XCore::COND_FALSE; + } else { + return XCore::COND_INVALID; + } +} + +/// GetCondBranchFromCond - Return the Branch instruction +/// opcode that matches the cc. +static inline unsigned GetCondBranchFromCond(XCore::CondCode CC) +{ + switch (CC) { + default: assert(0 && "Illegal condition code!"); + case XCore::COND_TRUE : return XCore::BRFT_lru6; + case XCore::COND_FALSE : return XCore::BRFF_lru6; + } +} + +/// GetOppositeBranchCondition - Return the inverse of the specified +/// condition, e.g. turning COND_E to COND_NE. +static inline XCore::CondCode GetOppositeBranchCondition(XCore::CondCode CC) +{ + switch (CC) { + default: assert(0 && "Illegal condition code!"); + case XCore::COND_TRUE : return XCore::COND_FALSE; + case XCore::COND_FALSE : return XCore::COND_TRUE; + } +} + +/// AnalyzeBranch - Analyze the branching code at the end of MBB, returning +/// true if it cannot be understood (e.g. it's a switch dispatch or isn't +/// implemented for a target). Upon success, this returns false and returns +/// with the following information in various cases: +/// +/// 1. If this block ends with no branches (it just falls through to its succ) +/// just return false, leaving TBB/FBB null. +/// 2. If this block ends with only an unconditional branch, it sets TBB to be +/// the destination block. +/// 3. If this block ends with an conditional branch and it falls through to +/// an successor block, it sets TBB to be the branch destination block and a +/// list of operands that evaluate the condition. These +/// operands can be passed to other TargetInstrInfo methods to create new +/// branches. +/// 4. If this block ends with an conditional branch and an unconditional +/// block, it returns the 'true' destination in TBB, the 'false' destination +/// in FBB, and a list of operands that evaluate the condition. These +/// operands can be passed to other TargetInstrInfo methods to create new +/// branches. +/// +/// Note that RemoveBranch and InsertBranch must be implemented to support +/// cases where this method returns success. +/// +bool +XCoreInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl &Cond, + bool AllowModify) const { + // If the block has no terminators, it just falls into the block after it. + MachineBasicBlock::iterator I = MBB.end(); + if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) + return false; + + // Get the last instruction in the block. + MachineInstr *LastInst = I; + + // If there is only one terminator instruction, process it. + if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { + if (IsBRU(LastInst->getOpcode())) { + TBB = LastInst->getOperand(0).getMBB(); + return false; + } + + XCore::CondCode BranchCode = GetCondFromBranchOpc(LastInst->getOpcode()); + if (BranchCode == XCore::COND_INVALID) + return true; // Can't handle indirect branch. + + // Conditional branch + // Block ends with fall-through condbranch. + + TBB = LastInst->getOperand(1).getMBB(); + Cond.push_back(MachineOperand::CreateImm(BranchCode)); + Cond.push_back(LastInst->getOperand(0)); + return false; + } + + // Get the instruction before it if it's a terminator. + MachineInstr *SecondLastInst = I; + + // If there are three terminators, we don't know what sort of block this is. + if (SecondLastInst && I != MBB.begin() && + isUnpredicatedTerminator(--I)) + return true; + + unsigned SecondLastOpc = SecondLastInst->getOpcode(); + XCore::CondCode BranchCode = GetCondFromBranchOpc(SecondLastOpc); + + // If the block ends with conditional branch followed by unconditional, + // handle it. + if (BranchCode != XCore::COND_INVALID + && IsBRU(LastInst->getOpcode())) { + + TBB = SecondLastInst->getOperand(1).getMBB(); + Cond.push_back(MachineOperand::CreateImm(BranchCode)); + Cond.push_back(SecondLastInst->getOperand(0)); + + FBB = LastInst->getOperand(0).getMBB(); + return false; + } + + // If the block ends with two unconditional branches, handle it. The second + // one is not executed, so remove it. + if (IsBRU(SecondLastInst->getOpcode()) && + IsBRU(LastInst->getOpcode())) { + TBB = SecondLastInst->getOperand(0).getMBB(); + I = LastInst; + if (AllowModify) + I->eraseFromParent(); + return false; + } + + // Otherwise, can't handle this. + return true; +} + +unsigned +XCoreInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + const SmallVectorImpl &Cond)const{ + // FIXME there should probably be a DebugLoc argument here + DebugLoc dl = DebugLoc::getUnknownLoc(); + // Shouldn't be a fall through. + assert(TBB && "InsertBranch must not be told to insert a fallthrough"); + assert((Cond.size() == 2 || Cond.size() == 0) && + "Unexpected number of components!"); + + if (FBB == 0) { // One way branch. + if (Cond.empty()) { + // Unconditional branch + BuildMI(&MBB, dl, get(XCore::BRFU_lu6)).addMBB(TBB); + } else { + // Conditional branch. + unsigned Opc = GetCondBranchFromCond((XCore::CondCode)Cond[0].getImm()); + BuildMI(&MBB, dl, get(Opc)).addReg(Cond[1].getReg()) + .addMBB(TBB); + } + return 1; + } + + // Two-way Conditional branch. + assert(Cond.size() == 2 && "Unexpected number of components!"); + unsigned Opc = GetCondBranchFromCond((XCore::CondCode)Cond[0].getImm()); + BuildMI(&MBB, dl, get(Opc)).addReg(Cond[1].getReg()) + .addMBB(TBB); + BuildMI(&MBB, dl, get(XCore::BRFU_lu6)).addMBB(FBB); + return 2; +} + +unsigned +XCoreInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { + MachineBasicBlock::iterator I = MBB.end(); + if (I == MBB.begin()) return 0; + --I; + if (!IsBRU(I->getOpcode()) && !IsCondBranch(I->getOpcode())) + return 0; + + // Remove the branch. + I->eraseFromParent(); + + I = MBB.end(); + + if (I == MBB.begin()) return 1; + --I; + if (!IsCondBranch(I->getOpcode())) + return 1; + + // Remove the branch. + I->eraseFromParent(); + return 2; +} + +bool XCoreInstrInfo::copyRegToReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned DestReg, unsigned SrcReg, + const TargetRegisterClass *DestRC, + const TargetRegisterClass *SrcRC) const { + DebugLoc DL = DebugLoc::getUnknownLoc(); + if (I != MBB.end()) DL = I->getDebugLoc(); + + if (DestRC == SrcRC) { + if (DestRC == XCore::GRRegsRegisterClass) { + BuildMI(MBB, I, DL, get(XCore::ADD_2rus), DestReg) + .addReg(SrcReg) + .addImm(0); + return true; + } else { + return false; + } + } + + if (SrcRC == XCore::RRegsRegisterClass && SrcReg == XCore::SP && + DestRC == XCore::GRRegsRegisterClass) { + BuildMI(MBB, I, DL, get(XCore::LDAWSP_ru6), DestReg) + .addImm(0); + return true; + } + if (DestRC == XCore::RRegsRegisterClass && DestReg == XCore::SP && + SrcRC == XCore::GRRegsRegisterClass) { + BuildMI(MBB, I, DL, get(XCore::SETSP_1r)) + .addReg(SrcReg); + return true; + } + return false; +} + +void XCoreInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned SrcReg, bool isKill, + int FrameIndex, + const TargetRegisterClass *RC) const +{ + DebugLoc DL = DebugLoc::getUnknownLoc(); + if (I != MBB.end()) DL = I->getDebugLoc(); + BuildMI(MBB, I, DL, get(XCore::STWFI)) + .addReg(SrcReg, getKillRegState(isKill)) + .addFrameIndex(FrameIndex) + .addImm(0); +} + +void XCoreInstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg, + bool isKill, SmallVectorImpl &Addr, + const TargetRegisterClass *RC, + SmallVectorImpl &NewMIs) const +{ + assert(0 && "unimplemented\n"); +} + +void XCoreInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned DestReg, int FrameIndex, + const TargetRegisterClass *RC) const +{ + DebugLoc DL = DebugLoc::getUnknownLoc(); + if (I != MBB.end()) DL = I->getDebugLoc(); + BuildMI(MBB, I, DL, get(XCore::LDWFI), DestReg) + .addFrameIndex(FrameIndex) + .addImm(0); +} + +void XCoreInstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg, + SmallVectorImpl &Addr, + const TargetRegisterClass *RC, + SmallVectorImpl &NewMIs) const +{ + assert(0 && "unimplemented\n"); +} + +bool XCoreInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector &CSI) const +{ + if (CSI.empty()) { + return true; + } + MachineFunction *MF = MBB.getParent(); + const MachineFrameInfo *MFI = MF->getFrameInfo(); + MachineModuleInfo *MMI = MFI->getMachineModuleInfo(); + XCoreFunctionInfo *XFI = MF->getInfo(); + + bool emitFrameMoves = XCoreRegisterInfo::needsFrameMoves(*MF); + + DebugLoc DL = DebugLoc::getUnknownLoc(); + if (MI != MBB.end()) DL = MI->getDebugLoc(); + + for (std::vector::const_iterator it = CSI.begin(); + it != CSI.end(); ++it) { + // Add the callee-saved register as live-in. It's killed at the spill. + MBB.addLiveIn(it->getReg()); + + storeRegToStackSlot(MBB, MI, it->getReg(), true, + it->getFrameIdx(), it->getRegClass()); + if (emitFrameMoves) { + unsigned SaveLabelId = MMI->NextLabelID(); + BuildMI(MBB, MI, DL, get(XCore::DBG_LABEL)).addImm(SaveLabelId); + XFI->getSpillLabels().push_back( + std::pair(SaveLabelId, *it)); + } + } + return true; +} + +bool XCoreInstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector &CSI) const +{ + bool AtStart = MI == MBB.begin(); + MachineBasicBlock::iterator BeforeI = MI; + if (!AtStart) + --BeforeI; + for (std::vector::const_iterator it = CSI.begin(); + it != CSI.end(); ++it) { + + loadRegFromStackSlot(MBB, MI, it->getReg(), + it->getFrameIdx(), + it->getRegClass()); + assert(MI != MBB.begin() && + "loadRegFromStackSlot didn't insert any code!"); + // Insert in reverse order. loadRegFromStackSlot can insert multiple + // instructions. + if (AtStart) + MI = MBB.begin(); + else { + MI = BeforeI; + ++MI; + } + } + return true; +} + +/// BlockHasNoFallThrough - Analyse if MachineBasicBlock does not +/// fall-through into its successor block. +bool XCoreInstrInfo:: +BlockHasNoFallThrough(const MachineBasicBlock &MBB) const +{ + if (MBB.empty()) return false; + + switch (MBB.back().getOpcode()) { + case XCore::RETSP_u6: // Return. + case XCore::RETSP_lu6: + case XCore::BAU_1r: // Indirect branch. + case XCore::BRFU_u6: // Uncond branch. + case XCore::BRFU_lu6: + case XCore::BRBU_u6: + case XCore::BRBU_lu6: + return true; + default: return false; + } +} + +/// ReverseBranchCondition - Return the inverse opcode of the +/// specified Branch instruction. +bool XCoreInstrInfo:: +ReverseBranchCondition(SmallVectorImpl &Cond) const +{ + assert((Cond.size() == 2) && + "Invalid XCore branch condition!"); + Cond[0].setImm(GetOppositeBranchCondition((XCore::CondCode)Cond[0].getImm())); + return false; +} diff --git a/lib/Target/XCore/XCoreInstrInfo.h b/lib/Target/XCore/XCoreInstrInfo.h new file mode 100644 index 000000000000..08708863ad57 --- /dev/null +++ b/lib/Target/XCore/XCoreInstrInfo.h @@ -0,0 +1,110 @@ +//===- XCoreInstrInfo.h - XCore Instruction Information ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the XCore implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef XCOREINSTRUCTIONINFO_H +#define XCOREINSTRUCTIONINFO_H + +#include "llvm/Target/TargetInstrInfo.h" +#include "XCoreRegisterInfo.h" + +namespace llvm { + +class XCoreInstrInfo : public TargetInstrInfoImpl { + const XCoreRegisterInfo RI; +public: + XCoreInstrInfo(void); + + /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As + /// such, whenever a client has an instance of instruction info, it should + /// always be able to get register info as well (through this method). + /// + virtual const TargetRegisterInfo &getRegisterInfo() const { return RI; } + + /// Return true if the instruction is a register to register move and return + /// the source and dest operands and their sub-register indices by reference. + virtual bool isMoveInstr(const MachineInstr &MI, + unsigned &SrcReg, unsigned &DstReg, + unsigned &SrcSubIdx, unsigned &DstSubIdx) const; + + /// isLoadFromStackSlot - If the specified machine instruction is a direct + /// load from a stack slot, return the virtual or physical register number of + /// the destination along with the FrameIndex of the loaded stack slot. If + /// not, return 0. This predicate must return 0 if the instruction has + /// any side effects other than loading from the stack slot. + virtual unsigned isLoadFromStackSlot(const MachineInstr *MI, + int &FrameIndex) const; + + /// isStoreToStackSlot - If the specified machine instruction is a direct + /// store to a stack slot, return the virtual or physical register number of + /// the source reg along with the FrameIndex of the loaded stack slot. If + /// not, return 0. This predicate must return 0 if the instruction has + /// any side effects other than storing to the stack slot. + virtual unsigned isStoreToStackSlot(const MachineInstr *MI, + int &FrameIndex) const; + + virtual bool isInvariantLoad(const MachineInstr *MI) const; + + virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl &Cond, + bool AllowModify) const; + + virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + const SmallVectorImpl &Cond) const; + + virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const; + + virtual bool copyRegToReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned DestReg, unsigned SrcReg, + const TargetRegisterClass *DestRC, + const TargetRegisterClass *SrcRC) const; + + virtual void storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned SrcReg, bool isKill, int FrameIndex, + const TargetRegisterClass *RC) const; + + virtual void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill, + SmallVectorImpl &Addr, + const TargetRegisterClass *RC, + SmallVectorImpl &NewMIs) const; + + virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned DestReg, int FrameIndex, + const TargetRegisterClass *RC) const; + + virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg, + SmallVectorImpl &Addr, + const TargetRegisterClass *RC, + SmallVectorImpl &NewMIs) const; + + virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector &CSI) const; + + virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector &CSI) const; + + virtual bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const; + + virtual bool ReverseBranchCondition( + SmallVectorImpl &Cond) const; +}; + +} + +#endif diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td new file mode 100644 index 000000000000..65cd4fe95559 --- /dev/null +++ b/lib/Target/XCore/XCoreInstrInfo.td @@ -0,0 +1,991 @@ +//===- XCoreInstrInfo.td - Target Description for XCore ----*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the XCore instructions in TableGen format. +// +//===----------------------------------------------------------------------===// + +// Uses of CP, DP are not currently reflected in the patterns, since +// having a physical register as an operand prevents loop hoisting and +// since the value of these registers never changes during the life of the +// function. + +//===----------------------------------------------------------------------===// +// Instruction format superclass. +//===----------------------------------------------------------------------===// + +include "XCoreInstrFormats.td" + +//===----------------------------------------------------------------------===// +// Feature predicates. +//===----------------------------------------------------------------------===// + +// HasXS1A - This predicate is true when the target processor supports XS1A +// instructions. +def HasXS1A : Predicate<"Subtarget.isXS1A()">; + +// HasXS1B - This predicate is true when the target processor supports XS1B +// instructions. +def HasXS1B : Predicate<"Subtarget.isXS1B()">; + +//===----------------------------------------------------------------------===// +// XCore specific DAG Nodes. +// + +// Call +def SDT_XCoreBranchLink : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; +def XCoreBranchLink : SDNode<"XCoreISD::BL",SDT_XCoreBranchLink, + [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; + +def XCoreRetsp : SDNode<"XCoreISD::RETSP", SDTNone, + [SDNPHasChain, SDNPOptInFlag]>; + +def SDT_XCoreAddress : SDTypeProfile<1, 1, + [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>; + +def pcrelwrapper : SDNode<"XCoreISD::PCRelativeWrapper", SDT_XCoreAddress, + []>; + +def dprelwrapper : SDNode<"XCoreISD::DPRelativeWrapper", SDT_XCoreAddress, + []>; + +def cprelwrapper : SDNode<"XCoreISD::CPRelativeWrapper", SDT_XCoreAddress, + []>; + +def SDT_XCoreStwsp : SDTypeProfile<0, 2, [SDTCisInt<1>]>; +def XCoreStwsp : SDNode<"XCoreISD::STWSP", SDT_XCoreStwsp, + [SDNPHasChain]>; + +// These are target-independent nodes, but have target-specific formats. +def SDT_XCoreCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>; +def SDT_XCoreCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>, + SDTCisVT<1, i32> ]>; + +def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_XCoreCallSeqStart, + [SDNPHasChain, SDNPOutFlag]>; +def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_XCoreCallSeqEnd, + [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; + +//===----------------------------------------------------------------------===// +// Instruction Pattern Stuff +//===----------------------------------------------------------------------===// + +def div4_xform : SDNodeXFormgetZExtValue() % 4 == 0); + return getI32Imm(N->getZExtValue()/4); +}]>; + +def msksize_xform : SDNodeXFormgetZExtValue())); + // look for the first non-zero bit + return getI32Imm(32 - CountLeadingZeros_32(N->getZExtValue())); +}]>; + +def neg_xform : SDNodeXFormgetZExtValue(); + return getI32Imm(-value); +}]>; + +def div4neg_xform : SDNodeXFormgetZExtValue(); + assert(-value % 4 == 0); + return getI32Imm(-value/4); +}]>; + +def immUs4Neg : PatLeaf<(imm), [{ + uint32_t value = (uint32_t)N->getZExtValue(); + return (-value)%4 == 0 && (-value)/4 <= 11; +}]>; + +def immUs4 : PatLeaf<(imm), [{ + uint32_t value = (uint32_t)N->getZExtValue(); + return value%4 == 0 && value/4 <= 11; +}]>; + +def immUsNeg : PatLeaf<(imm), [{ + return -((uint32_t)N->getZExtValue()) <= 11; +}]>; + +def immUs : PatLeaf<(imm), [{ + return (uint32_t)N->getZExtValue() <= 11; +}]>; + +def immU6 : PatLeaf<(imm), [{ + return (uint32_t)N->getZExtValue() < (1 << 6); +}]>; + +def immU10 : PatLeaf<(imm), [{ + return (uint32_t)N->getZExtValue() < (1 << 10); +}]>; + +def immU16 : PatLeaf<(imm), [{ + return (uint32_t)N->getZExtValue() < (1 << 16); +}]>; + +def immU20 : PatLeaf<(imm), [{ + return (uint32_t)N->getZExtValue() < (1 << 20); +}]>; + +// FIXME check subtarget. Currently we check if the immediate +// is in the common subset of legal immediate values for both +// XS1A and XS1B. +def immMskBitp : PatLeaf<(imm), [{ + uint32_t value = (uint32_t)N->getZExtValue(); + if (!isMask_32(value)) { + return false; + } + int msksize = 32 - CountLeadingZeros_32(value); + return (msksize >= 1 && msksize <= 8) + || msksize == 16 + || msksize == 24 + || msksize == 32; +}]>; + +// FIXME check subtarget. Currently we check if the immediate +// is in the common subset of legal immediate values for both +// XS1A and XS1B. +def immBitp : PatLeaf<(imm), [{ + uint32_t value = (uint32_t)N->getZExtValue(); + return (value >= 1 && value <= 8) + || value == 16 + || value == 24 + || value == 32; +}]>; + +def lda16f : PatFrag<(ops node:$addr, node:$offset), + (add node:$addr, (shl node:$offset, 1))>; +def lda16b : PatFrag<(ops node:$addr, node:$offset), + (sub node:$addr, (shl node:$offset, 1))>; +def ldawf : PatFrag<(ops node:$addr, node:$offset), + (add node:$addr, (shl node:$offset, 2))>; +def ldawb : PatFrag<(ops node:$addr, node:$offset), + (sub node:$addr, (shl node:$offset, 2))>; + +// Instruction operand types +def calltarget : Operand; +def brtarget : Operand; +def pclabel : Operand; + +// Addressing modes +def ADDRspii : ComplexPattern; +def ADDRdpii : ComplexPattern; +def ADDRcpii : ComplexPattern; + +// Address operands +def MEMii : Operand { + let PrintMethod = "printMemOperand"; + let MIOperandInfo = (ops i32imm, i32imm); +} + +//===----------------------------------------------------------------------===// +// Instruction Class Templates +//===----------------------------------------------------------------------===// + +// Three operand short + +multiclass F3R_2RUS { + def _3r: _F3R< + (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c), + !strconcat(OpcStr, " $dst, $b, $c"), + [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>; + def _2rus : _F2RUS< + (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c), + !strconcat(OpcStr, " $dst, $b, $c"), + [(set GRRegs:$dst, (OpNode GRRegs:$b, immUs:$c))]>; +} + +multiclass F3R_2RUS_np { + def _3r: _F3R< + (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c), + !strconcat(OpcStr, " $dst, $b, $c"), + []>; + def _2rus : _F2RUS< + (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c), + !strconcat(OpcStr, " $dst, $b, $c"), + []>; +} + +multiclass F3R_2RBITP { + def _3r: _F3R< + (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c), + !strconcat(OpcStr, " $dst, $b, $c"), + [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>; + def _2rus : _F2RUS< + (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c), + !strconcat(OpcStr, " $dst, $b, $c"), + [(set GRRegs:$dst, (OpNode GRRegs:$b, immBitp:$c))]>; +} + +class F3R : _F3R< + (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c), + !strconcat(OpcStr, " $dst, $b, $c"), + [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>; + +class F3R_np : _F3R< + (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c), + !strconcat(OpcStr, " $dst, $b, $c"), + []>; +// Three operand long + +/// FL3R_L2RUS multiclass - Define a normal FL3R/FL2RUS pattern in one shot. +multiclass FL3R_L2RUS { + def _l3r: _FL3R< + (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c), + !strconcat(OpcStr, " $dst, $b, $c"), + [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>; + def _l2rus : _FL2RUS< + (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c), + !strconcat(OpcStr, " $dst, $b, $c"), + [(set GRRegs:$dst, (OpNode GRRegs:$b, immUs:$c))]>; +} + +/// FL3R_L2RUS multiclass - Define a normal FL3R/FL2RUS pattern in one shot. +multiclass FL3R_L2RBITP { + def _l3r: _FL3R< + (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c), + !strconcat(OpcStr, " $dst, $b, $c"), + [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>; + def _l2rus : _FL2RUS< + (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c), + !strconcat(OpcStr, " $dst, $b, $c"), + [(set GRRegs:$dst, (OpNode GRRegs:$b, immBitp:$c))]>; +} + +class FL3R : _FL3R< + (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c), + !strconcat(OpcStr, " $dst, $b, $c"), + [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>; + +// Register - U6 +// Operand register - U6 +multiclass FRU6_LRU6_branch { + def _ru6: _FRU6< + (outs), (ins GRRegs:$cond, brtarget:$dest), + !strconcat(OpcStr, " $cond, $dest"), + []>; + def _lru6: _FLRU6< + (outs), (ins GRRegs:$cond, brtarget:$dest), + !strconcat(OpcStr, " $cond, $dest"), + []>; +} + +multiclass FRU6_LRU6_cp { + def _ru6: _FRU6< + (outs GRRegs:$dst), (ins i32imm:$a), + !strconcat(OpcStr, " $dst, cp[$a]"), + []>; + def _lru6: _FLRU6< + (outs GRRegs:$dst), (ins i32imm:$a), + !strconcat(OpcStr, " $dst, cp[$a]"), + []>; +} + +// U6 +multiclass FU6_LU6 { + def _u6: _FU6< + (outs), (ins i32imm:$b), + !strconcat(OpcStr, " $b"), + [(OpNode immU6:$b)]>; + def _lu6: _FLU6< + (outs), (ins i32imm:$b), + !strconcat(OpcStr, " $b"), + [(OpNode immU16:$b)]>; +} + +multiclass FU6_LU6_np { + def _u6: _FU6< + (outs), (ins i32imm:$b), + !strconcat(OpcStr, " $b"), + []>; + def _lu6: _FLU6< + (outs), (ins i32imm:$b), + !strconcat(OpcStr, " $b"), + []>; +} + +// U10 +multiclass FU10_LU10_np { + def _u10: _FU10< + (outs), (ins i32imm:$b), + !strconcat(OpcStr, " $b"), + []>; + def _lu10: _FLU10< + (outs), (ins i32imm:$b), + !strconcat(OpcStr, " $b"), + []>; +} + +// Two operand short + +class F2R_np : _F2R< + (outs GRRegs:$dst), (ins GRRegs:$b), + !strconcat(OpcStr, " $dst, $b"), + []>; + +// Two operand long + +//===----------------------------------------------------------------------===// +// Pseudo Instructions +//===----------------------------------------------------------------------===// + +let Defs = [SP], Uses = [SP] in { +def ADJCALLSTACKDOWN : PseudoInstXCore<(outs), (ins i32imm:$amt), + "${:comment} ADJCALLSTACKDOWN $amt", + [(callseq_start timm:$amt)]>; +def ADJCALLSTACKUP : PseudoInstXCore<(outs), (ins i32imm:$amt1, i32imm:$amt2), + "${:comment} ADJCALLSTACKUP $amt1", + [(callseq_end timm:$amt1, timm:$amt2)]>; +} + +def LDWFI : PseudoInstXCore<(outs GRRegs:$dst), (ins MEMii:$addr), + "${:comment} LDWFI $dst, $addr", + [(set GRRegs:$dst, (load ADDRspii:$addr))]>; + +def LDAWFI : PseudoInstXCore<(outs GRRegs:$dst), (ins MEMii:$addr), + "${:comment} LDAWFI $dst, $addr", + [(set GRRegs:$dst, ADDRspii:$addr)]>; + +def STWFI : PseudoInstXCore<(outs), (ins GRRegs:$src, MEMii:$addr), + "${:comment} STWFI $src, $addr", + [(store GRRegs:$src, ADDRspii:$addr)]>; + +// SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded by the +// scheduler into a branch sequence. +let usesCustomDAGSchedInserter = 1 in { + def SELECT_CC : PseudoInstXCore<(outs GRRegs:$dst), + (ins GRRegs:$cond, GRRegs:$T, GRRegs:$F), + "${:comment} SELECT_CC PSEUDO!", + [(set GRRegs:$dst, + (select GRRegs:$cond, GRRegs:$T, GRRegs:$F))]>; +} + +//===----------------------------------------------------------------------===// +// Instructions +//===----------------------------------------------------------------------===// + +// Three operand short +defm ADD : F3R_2RUS<"add", add>; +defm SUB : F3R_2RUS<"sub", sub>; +let neverHasSideEffects = 1 in { +defm EQ : F3R_2RUS_np<"eq">; +def LSS_3r : F3R_np<"lss">; +def LSU_3r : F3R_np<"lsu">; +} +def AND_3r : F3R<"and", and>; +def OR_3r : F3R<"or", or>; + +let mayLoad=1 in { +def LDW_3r : _F3R<(outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset), + "ldw $dst, $addr[$offset]", + []>; + +def LDW_2rus : _F2RUS<(outs GRRegs:$dst), (ins GRRegs:$addr, i32imm:$offset), + "ldw $dst, $addr[$offset]", + []>; + +def LD16S_3r : _F3R<(outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset), + "ld16s $dst, $addr[$offset]", + []>; + +def LD8U_3r : _F3R<(outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset), + "ld8u $dst, $addr[$offset]", + []>; +} + +let mayStore=1 in { +def STW_3r : _F3R<(outs), (ins GRRegs:$val, GRRegs:$addr, GRRegs:$offset), + "stw $val, $addr[$offset]", + []>; + +def STW_2rus : _F2RUS<(outs), (ins GRRegs:$val, GRRegs:$addr, i32imm:$offset), + "stw $val, $addr[$offset]", + []>; +} + +defm SHL : F3R_2RBITP<"shl", shl>; +defm SHR : F3R_2RBITP<"shr", srl>; +// TODO tsetr + +// Three operand long +def LDAWF_l3r : _FL3R<(outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset), + "ldaw $dst, $addr[$offset]", + [(set GRRegs:$dst, (ldawf GRRegs:$addr, GRRegs:$offset))]>; + +let neverHasSideEffects = 1 in +def LDAWF_l2rus : _FL2RUS<(outs GRRegs:$dst), + (ins GRRegs:$addr, i32imm:$offset), + "ldaw $dst, $addr[$offset]", + []>; + +def LDAWB_l3r : _FL3R<(outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset), + "ldaw $dst, $addr[-$offset]", + [(set GRRegs:$dst, (ldawb GRRegs:$addr, GRRegs:$offset))]>; + +let neverHasSideEffects = 1 in +def LDAWB_l2rus : _FL2RUS<(outs GRRegs:$dst), + (ins GRRegs:$addr, i32imm:$offset), + "ldaw $dst, $addr[-$offset]", + []>; + +def LDA16F_l3r : _FL3R<(outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset), + "lda16 $dst, $addr[$offset]", + [(set GRRegs:$dst, (lda16f GRRegs:$addr, GRRegs:$offset))]>; + +def LDA16B_l3r : _FL3R<(outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset), + "lda16 $dst, $addr[-$offset]", + [(set GRRegs:$dst, (lda16b GRRegs:$addr, GRRegs:$offset))]>; + +def MUL_l3r : FL3R<"mul", mul>; +// Instructions which may trap are marked as side effecting. +let hasSideEffects = 1 in { +def DIVS_l3r : FL3R<"divs", sdiv>; +def DIVU_l3r : FL3R<"divu", udiv>; +def REMS_l3r : FL3R<"rems", srem>; +def REMU_l3r : FL3R<"remu", urem>; +} +def XOR_l3r : FL3R<"xor", xor>; +defm ASHR : FL3R_L2RBITP<"ashr", sra>; +// TODO crc32, crc8, inpw, outpw +let mayStore=1 in { +def ST16_l3r : _FL3R<(outs), (ins GRRegs:$val, GRRegs:$addr, GRRegs:$offset), + "st16 $val, $addr[$offset]", + []>; + +def ST8_l3r : _FL3R<(outs), (ins GRRegs:$val, GRRegs:$addr, GRRegs:$offset), + "st8 $val, $addr[$offset]", + []>; +} + +// Four operand long +let Predicates = [HasXS1B], Constraints = "$src1 = $dst1,$src2 = $dst2" in { +def MACCU_l4r : _L4R<(outs GRRegs:$dst1, GRRegs:$dst2), + (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3, + GRRegs:$src4), + "maccu $dst1, $dst2, $src3, $src4", + []>; + +def MACCS_l4r : _L4R<(outs GRRegs:$dst1, GRRegs:$dst2), + (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3, + GRRegs:$src4), + "maccs $dst1, $dst2, $src3, $src4", + []>; +} + +// Five operand long + +let Predicates = [HasXS1B] in { +def LADD_l5r : _L5R<(outs GRRegs:$dst1, GRRegs:$dst2), + (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3), + "ladd $dst1, $dst2, $src1, $src2, $src3", + []>; + +def LSUB_l5r : _L5R<(outs GRRegs:$dst1, GRRegs:$dst2), + (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3), + "lsub $dst1, $dst2, $src1, $src2, $src3", + []>; + +def LDIV_l5r : _L5R<(outs GRRegs:$dst1, GRRegs:$dst2), + (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3), + "ldiv $dst1, $dst2, $src1, $src2, $src3", + []>; +} + +// Six operand long + +def LMUL_l6r : _L6R<(outs GRRegs:$dst1, GRRegs:$dst2), + (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3, + GRRegs:$src4), + "lmul $dst1, $dst2, $src1, $src2, $src3, $src4", + []>; + +let Predicates = [HasXS1A] in +def MACC_l6r : _L6R<(outs GRRegs:$dst1, GRRegs:$dst2), + (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3, + GRRegs:$src4), + "macc $dst1, $dst2, $src1, $src2, $src3, $src4", + []>; + +// Register - U6 + +//let Uses = [DP] in ... +let neverHasSideEffects = 1, isReMaterializable = 1 in +def LDAWDP_ru6: _FRU6<(outs GRRegs:$dst), (ins MEMii:$a), + "ldaw $dst, dp[$a]", + []>; + +let isReMaterializable = 1 in +def LDAWDP_lru6: _FLRU6< + (outs GRRegs:$dst), (ins MEMii:$a), + "ldaw $dst, dp[$a]", + [(set GRRegs:$dst, ADDRdpii:$a)]>; + +let mayLoad=1 in +def LDWDP_ru6: _FRU6<(outs GRRegs:$dst), (ins MEMii:$a), + "ldw $dst, dp[$a]", + []>; + +def LDWDP_lru6: _FLRU6< + (outs GRRegs:$dst), (ins MEMii:$a), + "ldw $dst, dp[$a]", + [(set GRRegs:$dst, (load ADDRdpii:$a))]>; + +let mayStore=1 in +def STWDP_ru6 : _FRU6<(outs), (ins GRRegs:$val, MEMii:$addr), + "stw $val, dp[$addr]", + []>; + +def STWDP_lru6 : _FLRU6<(outs), (ins GRRegs:$val, MEMii:$addr), + "stw $val, dp[$addr]", + [(store GRRegs:$val, ADDRdpii:$addr)]>; + +//let Uses = [CP] in .. +let mayLoad = 1, isReMaterializable = 1 in +defm LDWCP : FRU6_LRU6_cp<"ldw">; + +let Uses = [SP] in { +let mayStore=1 in { +def STWSP_ru6 : _FRU6< + (outs), (ins GRRegs:$val, i32imm:$index), + "stw $val, sp[$index]", + [(XCoreStwsp GRRegs:$val, immU6:$index)]>; + +def STWSP_lru6 : _FLRU6< + (outs), (ins GRRegs:$val, i32imm:$index), + "stw $val, sp[$index]", + [(XCoreStwsp GRRegs:$val, immU16:$index)]>; +} + +let mayLoad=1 in { +def LDWSP_ru6 : _FRU6< + (outs GRRegs:$dst), (ins i32imm:$b), + "ldw $dst, sp[$b]", + []>; + +def LDWSP_lru6 : _FLRU6< + (outs GRRegs:$dst), (ins i32imm:$b), + "ldw $dst, sp[$b]", + []>; +} + +let neverHasSideEffects = 1 in { +def LDAWSP_ru6 : _FRU6< + (outs GRRegs:$dst), (ins i32imm:$b), + "ldaw $dst, sp[$b]", + []>; + +def LDAWSP_lru6 : _FLRU6< + (outs GRRegs:$dst), (ins i32imm:$b), + "ldaw $dst, sp[$b]", + []>; + +def LDAWSP_ru6_RRegs : _FRU6< + (outs RRegs:$dst), (ins i32imm:$b), + "ldaw $dst, sp[$b]", + []>; + +def LDAWSP_lru6_RRegs : _FLRU6< + (outs RRegs:$dst), (ins i32imm:$b), + "ldaw $dst, sp[$b]", + []>; +} +} + +let isReMaterializable = 1 in { +def LDC_ru6 : _FRU6< + (outs GRRegs:$dst), (ins i32imm:$b), + "ldc $dst, $b", + [(set GRRegs:$dst, immU6:$b)]>; + +def LDC_lru6 : _FLRU6< + (outs GRRegs:$dst), (ins i32imm:$b), + "ldc $dst, $b", + [(set GRRegs:$dst, immU16:$b)]>; +} + +// Operand register - U6 +// TODO setc +let isBranch = 1, isTerminator = 1 in { +defm BRFT: FRU6_LRU6_branch<"bt">; +defm BRBT: FRU6_LRU6_branch<"bt">; +defm BRFF: FRU6_LRU6_branch<"bf">; +defm BRBF: FRU6_LRU6_branch<"bf">; +} + +// U6 +let Defs = [SP], Uses = [SP] in { +let neverHasSideEffects = 1 in +defm EXTSP : FU6_LU6_np<"extsp">; +let mayStore = 1 in +defm ENTSP : FU6_LU6_np<"entsp">; + +let isReturn = 1, isTerminator = 1, mayLoad = 1 in { +defm RETSP : FU6_LU6<"retsp", XCoreRetsp>; +} +} + +// TODO extdp, kentsp, krestsp, blat, setsr +// clrsr, getsr, kalli +let isBranch = 1, isTerminator = 1 in { +def BRBU_u6 : _FU6< + (outs), + (ins brtarget:$target), + "bu $target", + []>; + +def BRBU_lu6 : _FLU6< + (outs), + (ins brtarget:$target), + "bu $target", + []>; + +def BRFU_u6 : _FU6< + (outs), + (ins brtarget:$target), + "bu $target", + []>; + +def BRFU_lu6 : _FLU6< + (outs), + (ins brtarget:$target), + "bu $target", + []>; +} + +//let Uses = [CP] in ... +let Predicates = [HasXS1B], Defs = [R11], neverHasSideEffects = 1, + isReMaterializable = 1 in +def LDAWCP_u6: _FRU6<(outs), (ins MEMii:$a), + "ldaw r11, cp[$a]", + []>; + +let Predicates = [HasXS1B], Defs = [R11], isReMaterializable = 1 in +def LDAWCP_lu6: _FLRU6< + (outs), (ins MEMii:$a), + "ldaw r11, cp[$a]", + [(set R11, ADDRcpii:$a)]>; + +// U10 +// TODO ldwcpl, blacp + +let Defs = [R11], isReMaterializable = 1, neverHasSideEffects = 1 in +def LDAP_u10 : _FU10< + (outs), + (ins i32imm:$addr), + "ldap r11, $addr", + []>; + +let Defs = [R11], isReMaterializable = 1 in +def LDAP_lu10 : _FLU10< + (outs), + (ins i32imm:$addr), + "ldap r11, $addr", + [(set R11, (pcrelwrapper tglobaladdr:$addr))]>; + +let isCall=1, +// All calls clobber the the link register and the non-callee-saved registers: +Defs = [R0, R1, R2, R3, R11, LR] in { +def BL_u10 : _FU10< + (outs), + (ins calltarget:$target, variable_ops), + "bl $target", + [(XCoreBranchLink immU10:$target)]>; + +def BL_lu10 : _FLU10< + (outs), + (ins calltarget:$target, variable_ops), + "bl $target", + [(XCoreBranchLink immU20:$target)]>; +} + +// Two operand short +// TODO getr, getst +def NOT : _F2R<(outs GRRegs:$dst), (ins GRRegs:$b), + "not $dst, $b", + [(set GRRegs:$dst, (not GRRegs:$b))]>; + +def NEG : _F2R<(outs GRRegs:$dst), (ins GRRegs:$b), + "neg $dst, $b", + [(set GRRegs:$dst, (ineg GRRegs:$b))]>; + +// TODO setd, eet, eef, getts, setpt, outct, inct, chkct, outt, intt, out, +// in, outshr, inshr, testct, testwct, tinitpc, tinitdp, tinitsp, tinitcp, +// tsetmr, sext (reg), zext (reg) +let isTwoAddress = 1 in { +let neverHasSideEffects = 1 in +def SEXT_rus : _FRUS<(outs GRRegs:$dst), (ins GRRegs:$src1, i32imm:$src2), + "sext $dst, $src2", + []>; + +let neverHasSideEffects = 1 in +def ZEXT_rus : _FRUS<(outs GRRegs:$dst), (ins GRRegs:$src1, i32imm:$src2), + "zext $dst, $src2", + []>; + +def ANDNOT_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$src1, GRRegs:$src2), + "andnot $dst, $src2", + [(set GRRegs:$dst, (and GRRegs:$src1, (not GRRegs:$src2)))]>; +} + +let isReMaterializable = 1, neverHasSideEffects = 1 in +def MKMSK_rus : _FRUS<(outs GRRegs:$dst), (ins i32imm:$size), + "mkmsk $dst, $size", + []>; + +def MKMSK_2r : _FRUS<(outs GRRegs:$dst), (ins GRRegs:$size), + "mkmsk $dst, $size", + [(set GRRegs:$dst, (add (shl 1, GRRegs:$size), 0xffffffff))]>; + +// Two operand long +// TODO settw, setclk, setrdy, setpsc, endin, peek, +// getd, testlcl, tinitlr, getps, setps +def BITREV_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src), + "bitrev $dst, $src", + [(set GRRegs:$dst, (int_xcore_bitrev GRRegs:$src))]>; + +def BYTEREV_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src), + "byterev $dst, $src", + [(set GRRegs:$dst, (bswap GRRegs:$src))]>; + +def CLZ_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src), + "clz $dst, $src", + [(set GRRegs:$dst, (ctlz GRRegs:$src))]>; + +// One operand short +// TODO edu, eeu, waitet, waitef, freer, tstart, msync, mjoin, syncr, clrtp +// bru, setdp, setcp, setv, setev, kcall +// dgetreg +let isBranch=1, isIndirectBranch=1, isTerminator=1 in +def BAU_1r : _F1R<(outs), (ins GRRegs:$addr), + "bau $addr", + [(brind GRRegs:$addr)]>; + +let Defs=[SP], neverHasSideEffects=1 in +def SETSP_1r : _F1R<(outs), (ins GRRegs:$src), + "set sp, $src", + []>; + +let isBarrier = 1, hasCtrlDep = 1 in +def ECALLT_1r : _F1R<(outs), (ins GRRegs:$src), + "ecallt $src", + []>; + +let isBarrier = 1, hasCtrlDep = 1 in +def ECALLF_1r : _F1R<(outs), (ins GRRegs:$src), + "ecallf $src", + []>; + +let isCall=1, +// All calls clobber the the link register and the non-callee-saved registers: +Defs = [R0, R1, R2, R3, R11, LR] in { +def BLA_1r : _F1R<(outs), (ins GRRegs:$addr, variable_ops), + "bla $addr", + [(XCoreBranchLink GRRegs:$addr)]>; +} + +// Zero operand short +// TODO waiteu, clre, ssync, freet, ldspc, stspc, ldssr, stssr, ldsed, stsed, +// stet, geted, getet, getkep, getksp, setkep, getid, kret, dcall, dret, +// dentsp, drestsp + +let Defs = [R11] in +def GETID_0R : _F0R<(outs), (ins), + "get r11, id", + [(set R11, (int_xcore_getid))]>; + +//===----------------------------------------------------------------------===// +// Non-Instruction Patterns +//===----------------------------------------------------------------------===// + +def : Pat<(XCoreBranchLink tglobaladdr:$addr), (BL_lu10 tglobaladdr:$addr)>; +def : Pat<(XCoreBranchLink texternalsym:$addr), (BL_lu10 texternalsym:$addr)>; + +/// sext_inreg +def : Pat<(sext_inreg GRRegs:$b, i1), (SEXT_rus GRRegs:$b, 1)>; +def : Pat<(sext_inreg GRRegs:$b, i8), (SEXT_rus GRRegs:$b, 8)>; +def : Pat<(sext_inreg GRRegs:$b, i16), (SEXT_rus GRRegs:$b, 16)>; + +/// loads +def : Pat<(zextloadi8 (add GRRegs:$addr, GRRegs:$offset)), + (LD8U_3r GRRegs:$addr, GRRegs:$offset)>; +def : Pat<(zextloadi8 GRRegs:$addr), (LD8U_3r GRRegs:$addr, (LDC_ru6 0))>; + +def : Pat<(zextloadi16 (lda16f GRRegs:$addr, GRRegs:$offset)), + (LD16S_3r GRRegs:$addr, GRRegs:$offset)>; +def : Pat<(sextloadi16 GRRegs:$addr), (LD16S_3r GRRegs:$addr, (LDC_ru6 0))>; + +def : Pat<(load (ldawf GRRegs:$addr, GRRegs:$offset)), + (LDW_3r GRRegs:$addr, GRRegs:$offset)>; +def : Pat<(load (add GRRegs:$addr, immUs4:$offset)), + (LDW_2rus GRRegs:$addr, (div4_xform immUs4:$offset))>; +def : Pat<(load GRRegs:$addr), (LDW_2rus GRRegs:$addr, 0)>; + +/// anyext +def : Pat<(extloadi8 (add GRRegs:$addr, GRRegs:$offset)), + (LD8U_3r GRRegs:$addr, GRRegs:$offset)>; +def : Pat<(extloadi8 GRRegs:$addr), (LD8U_3r GRRegs:$addr, (LDC_ru6 0))>; +def : Pat<(extloadi16 (lda16f GRRegs:$addr, GRRegs:$offset)), + (LD16S_3r GRRegs:$addr, GRRegs:$offset)>; +def : Pat<(extloadi16 GRRegs:$addr), (LD16S_3r GRRegs:$addr, (LDC_ru6 0))>; + +/// stores +def : Pat<(truncstorei8 GRRegs:$val, (add GRRegs:$addr, GRRegs:$offset)), + (ST8_l3r GRRegs:$val, GRRegs:$addr, GRRegs:$offset)>; +def : Pat<(truncstorei8 GRRegs:$val, GRRegs:$addr), + (ST8_l3r GRRegs:$val, GRRegs:$addr, (LDC_ru6 0))>; + +def : Pat<(truncstorei16 GRRegs:$val, (lda16f GRRegs:$addr, GRRegs:$offset)), + (ST16_l3r GRRegs:$val, GRRegs:$addr, GRRegs:$offset)>; +def : Pat<(truncstorei16 GRRegs:$val, GRRegs:$addr), + (ST16_l3r GRRegs:$val, GRRegs:$addr, (LDC_ru6 0))>; + +def : Pat<(store GRRegs:$val, (ldawf GRRegs:$addr, GRRegs:$offset)), + (STW_3r GRRegs:$val, GRRegs:$addr, GRRegs:$offset)>; +def : Pat<(store GRRegs:$val, (add GRRegs:$addr, immUs4:$offset)), + (STW_2rus GRRegs:$val, GRRegs:$addr, (div4_xform immUs4:$offset))>; +def : Pat<(store GRRegs:$val, GRRegs:$addr), + (STW_2rus GRRegs:$val, GRRegs:$addr, 0)>; + +/// cttz +def : Pat<(cttz GRRegs:$src), (CLZ_l2r (BITREV_l2r GRRegs:$src))>; + +/// trap +def : Pat<(trap), (ECALLF_1r (LDC_ru6 0))>; + +/// +/// branch patterns +/// + +// unconditional branch +def : Pat<(br bb:$addr), (BRFU_lu6 bb:$addr)>; + +// direct match equal/notequal zero brcond +def : Pat<(brcond (setne GRRegs:$lhs, 0), bb:$dst), + (BRFT_lru6 GRRegs:$lhs, bb:$dst)>; +def : Pat<(brcond (seteq GRRegs:$lhs, 0), bb:$dst), + (BRFF_lru6 GRRegs:$lhs, bb:$dst)>; + +def : Pat<(brcond (setle GRRegs:$lhs, GRRegs:$rhs), bb:$dst), + (BRFF_lru6 (LSS_3r GRRegs:$rhs, GRRegs:$lhs), bb:$dst)>; +def : Pat<(brcond (setule GRRegs:$lhs, GRRegs:$rhs), bb:$dst), + (BRFF_lru6 (LSU_3r GRRegs:$rhs, GRRegs:$lhs), bb:$dst)>; +def : Pat<(brcond (setge GRRegs:$lhs, GRRegs:$rhs), bb:$dst), + (BRFF_lru6 (LSS_3r GRRegs:$lhs, GRRegs:$rhs), bb:$dst)>; +def : Pat<(brcond (setuge GRRegs:$lhs, GRRegs:$rhs), bb:$dst), + (BRFF_lru6 (LSU_3r GRRegs:$lhs, GRRegs:$rhs), bb:$dst)>; +def : Pat<(brcond (setne GRRegs:$lhs, GRRegs:$rhs), bb:$dst), + (BRFF_lru6 (EQ_3r GRRegs:$lhs, GRRegs:$rhs), bb:$dst)>; +def : Pat<(brcond (setne GRRegs:$lhs, immUs:$rhs), bb:$dst), + (BRFF_lru6 (EQ_2rus GRRegs:$lhs, immUs:$rhs), bb:$dst)>; + +// generic brcond pattern +def : Pat<(brcond GRRegs:$cond, bb:$addr), (BRFT_lru6 GRRegs:$cond, bb:$addr)>; + + +/// +/// Select patterns +/// + +// direct match equal/notequal zero select +def : Pat<(select (setne GRRegs:$lhs, 0), GRRegs:$T, GRRegs:$F), + (SELECT_CC GRRegs:$lhs, GRRegs:$T, GRRegs:$F)>; + +def : Pat<(select (seteq GRRegs:$lhs, 0), GRRegs:$T, GRRegs:$F), + (SELECT_CC GRRegs:$lhs, GRRegs:$F, GRRegs:$T)>; + +def : Pat<(select (setle GRRegs:$lhs, GRRegs:$rhs), GRRegs:$T, GRRegs:$F), + (SELECT_CC (LSS_3r GRRegs:$rhs, GRRegs:$lhs), GRRegs:$F, GRRegs:$T)>; +def : Pat<(select (setule GRRegs:$lhs, GRRegs:$rhs), GRRegs:$T, GRRegs:$F), + (SELECT_CC (LSU_3r GRRegs:$rhs, GRRegs:$lhs), GRRegs:$F, GRRegs:$T)>; +def : Pat<(select (setge GRRegs:$lhs, GRRegs:$rhs), GRRegs:$T, GRRegs:$F), + (SELECT_CC (LSS_3r GRRegs:$lhs, GRRegs:$rhs), GRRegs:$F, GRRegs:$T)>; +def : Pat<(select (setuge GRRegs:$lhs, GRRegs:$rhs), GRRegs:$T, GRRegs:$F), + (SELECT_CC (LSU_3r GRRegs:$lhs, GRRegs:$rhs), GRRegs:$F, GRRegs:$T)>; +def : Pat<(select (setne GRRegs:$lhs, GRRegs:$rhs), GRRegs:$T, GRRegs:$F), + (SELECT_CC (EQ_3r GRRegs:$lhs, GRRegs:$rhs), GRRegs:$F, GRRegs:$T)>; +def : Pat<(select (setne GRRegs:$lhs, immUs:$rhs), GRRegs:$T, GRRegs:$F), + (SELECT_CC (EQ_2rus GRRegs:$lhs, immUs:$rhs), GRRegs:$F, GRRegs:$T)>; + +/// +/// setcc patterns, only matched when none of the above brcond +/// patterns match +/// + +// setcc 2 register operands +def : Pat<(setle GRRegs:$lhs, GRRegs:$rhs), + (EQ_2rus (LSS_3r GRRegs:$rhs, GRRegs:$lhs), 0)>; +def : Pat<(setule GRRegs:$lhs, GRRegs:$rhs), + (EQ_2rus (LSU_3r GRRegs:$rhs, GRRegs:$lhs), 0)>; + +def : Pat<(setgt GRRegs:$lhs, GRRegs:$rhs), + (LSS_3r GRRegs:$rhs, GRRegs:$lhs)>; +def : Pat<(setugt GRRegs:$lhs, GRRegs:$rhs), + (LSU_3r GRRegs:$rhs, GRRegs:$lhs)>; + +def : Pat<(setge GRRegs:$lhs, GRRegs:$rhs), + (EQ_2rus (LSS_3r GRRegs:$lhs, GRRegs:$rhs), 0)>; +def : Pat<(setuge GRRegs:$lhs, GRRegs:$rhs), + (EQ_2rus (LSU_3r GRRegs:$lhs, GRRegs:$rhs), 0)>; + +def : Pat<(setlt GRRegs:$lhs, GRRegs:$rhs), + (LSS_3r GRRegs:$lhs, GRRegs:$rhs)>; +def : Pat<(setult GRRegs:$lhs, GRRegs:$rhs), + (LSU_3r GRRegs:$lhs, GRRegs:$rhs)>; + +def : Pat<(setne GRRegs:$lhs, GRRegs:$rhs), + (EQ_2rus (EQ_3r GRRegs:$lhs, GRRegs:$rhs), 0)>; + +def : Pat<(seteq GRRegs:$lhs, GRRegs:$rhs), + (EQ_3r GRRegs:$lhs, GRRegs:$rhs)>; + +// setcc reg/imm operands +def : Pat<(seteq GRRegs:$lhs, immUs:$rhs), + (EQ_2rus GRRegs:$lhs, immUs:$rhs)>; +def : Pat<(setne GRRegs:$lhs, immUs:$rhs), + (EQ_2rus (EQ_2rus GRRegs:$lhs, immUs:$rhs), 0)>; + +// misc +def : Pat<(add GRRegs:$addr, immUs4:$offset), + (LDAWF_l2rus GRRegs:$addr, (div4_xform immUs4:$offset))>; + +def : Pat<(sub GRRegs:$addr, immUs4:$offset), + (LDAWB_l2rus GRRegs:$addr, (div4_xform immUs4:$offset))>; + +def : Pat<(and GRRegs:$val, immMskBitp:$mask), + (ZEXT_rus GRRegs:$val, (msksize_xform immMskBitp:$mask))>; + +// (sub X, imm) gets canonicalized to (add X, -imm). Match this form. +def : Pat<(add GRRegs:$src1, immUsNeg:$src2), + (SUB_2rus GRRegs:$src1, (neg_xform immUsNeg:$src2))>; + +def : Pat<(add GRRegs:$src1, immUs4Neg:$src2), + (LDAWB_l2rus GRRegs:$src1, (div4neg_xform immUs4Neg:$src2))>; + +/// +/// Some peepholes +/// + +def : Pat<(mul GRRegs:$src, 3), + (LDA16F_l3r GRRegs:$src, GRRegs:$src)>; + +def : Pat<(mul GRRegs:$src, 5), + (LDAWF_l3r GRRegs:$src, GRRegs:$src)>; + +def : Pat<(mul GRRegs:$src, -3), + (LDAWB_l3r GRRegs:$src, GRRegs:$src)>; + +// ashr X, 32 is equivalent to ashr X, 31 on the XCore. +def : Pat<(sra GRRegs:$src, 31), + (ASHR_l2rus GRRegs:$src, 32)>; + diff --git a/lib/Target/XCore/XCoreMachineFunctionInfo.h b/lib/Target/XCore/XCoreMachineFunctionInfo.h new file mode 100644 index 000000000000..43adb0f917c9 --- /dev/null +++ b/lib/Target/XCore/XCoreMachineFunctionInfo.h @@ -0,0 +1,69 @@ +//====- XCoreMachineFuctionInfo.h - XCore machine function info -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares XCore-specific per-machine-function information. +// +//===----------------------------------------------------------------------===// + +#ifndef XCOREMACHINEFUNCTIONINFO_H +#define XCOREMACHINEFUNCTIONINFO_H + +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include + +namespace llvm { + +// Forward declarations +class Function; + +/// XCoreFunctionInfo - This class is derived from MachineFunction private +/// XCore target-specific information for each MachineFunction. +class XCoreFunctionInfo : public MachineFunctionInfo { +private: + bool UsesLR; + int LRSpillSlot; + int FPSpillSlot; + int VarArgsFrameIndex; + std::vector > SpillLabels; + +public: + XCoreFunctionInfo() : + UsesLR(false), + LRSpillSlot(0), + FPSpillSlot(0), + VarArgsFrameIndex(0) {} + + XCoreFunctionInfo(MachineFunction &MF) : + UsesLR(false), + LRSpillSlot(0), + FPSpillSlot(0), + VarArgsFrameIndex(0) {} + + ~XCoreFunctionInfo() {} + + void setVarArgsFrameIndex(int off) { VarArgsFrameIndex = off; } + int getVarArgsFrameIndex() const { return VarArgsFrameIndex; } + + void setUsesLR(bool val) { UsesLR = val; } + bool getUsesLR() const { return UsesLR; } + + void setLRSpillSlot(int off) { LRSpillSlot = off; } + int getLRSpillSlot() const { return LRSpillSlot; } + + void setFPSpillSlot(int off) { FPSpillSlot = off; } + int getFPSpillSlot() const { return FPSpillSlot; } + + std::vector >&getSpillLabels() { + return SpillLabels; + } +}; +} // End llvm namespace + +#endif // XCOREMACHINEFUNCTIONINFO_H diff --git a/lib/Target/XCore/XCoreRegisterInfo.cpp b/lib/Target/XCore/XCoreRegisterInfo.cpp new file mode 100644 index 000000000000..82cd92d5685c --- /dev/null +++ b/lib/Target/XCore/XCoreRegisterInfo.cpp @@ -0,0 +1,598 @@ +//===- XCoreRegisterInfo.cpp - XCore Register Information -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the XCore implementation of the MRegisterInfo class. +// +//===----------------------------------------------------------------------===// + +#include "XCoreRegisterInfo.h" +#include "XCoreMachineFunctionInfo.h" +#include "XCore.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineLocation.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Type.h" +#include "llvm/Function.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +XCoreRegisterInfo::XCoreRegisterInfo(const TargetInstrInfo &tii) + : XCoreGenRegisterInfo(XCore::ADJCALLSTACKDOWN, XCore::ADJCALLSTACKUP), + TII(tii) { +} + +// helper functions +static inline bool isImmUs(unsigned val) { + return val <= 11; +} + +static inline bool isImmU6(unsigned val) { + return val < (1 << 6); +} + +static inline bool isImmU16(unsigned val) { + return val < (1 << 16); +} + +static const unsigned XCore_ArgRegs[] = { + XCore::R0, XCore::R1, XCore::R2, XCore::R3 +}; + +const unsigned * XCoreRegisterInfo::getArgRegs(const MachineFunction *MF) +{ + return XCore_ArgRegs; +} + +unsigned XCoreRegisterInfo::getNumArgRegs(const MachineFunction *MF) +{ + return array_lengthof(XCore_ArgRegs); +} + +bool XCoreRegisterInfo::needsFrameMoves(const MachineFunction &MF) +{ + const MachineFrameInfo *MFI = MF.getFrameInfo(); + MachineModuleInfo *MMI = MFI->getMachineModuleInfo(); + return (MMI && MMI->hasDebugInfo()) || + !MF.getFunction()->doesNotThrow() || + UnwindTablesMandatory; +} + +const unsigned* XCoreRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) + const { + static const unsigned CalleeSavedRegs[] = { + XCore::R4, XCore::R5, XCore::R6, XCore::R7, + XCore::R8, XCore::R9, XCore::R10, XCore::LR, + 0 + }; + return CalleeSavedRegs; +} + +const TargetRegisterClass* const* +XCoreRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { + static const TargetRegisterClass * const CalleeSavedRegClasses[] = { + XCore::GRRegsRegisterClass, XCore::GRRegsRegisterClass, + XCore::GRRegsRegisterClass, XCore::GRRegsRegisterClass, + XCore::GRRegsRegisterClass, XCore::GRRegsRegisterClass, + XCore::GRRegsRegisterClass, XCore::RRegsRegisterClass, + 0 + }; + return CalleeSavedRegClasses; +} + +BitVector XCoreRegisterInfo::getReservedRegs(const MachineFunction &MF) const { + BitVector Reserved(getNumRegs()); + Reserved.set(XCore::CP); + Reserved.set(XCore::DP); + Reserved.set(XCore::SP); + Reserved.set(XCore::LR); + if (hasFP(MF)) { + Reserved.set(XCore::R10); + } + return Reserved; +} + +bool +XCoreRegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const { + // TODO can we estimate stack size? + return hasFP(MF); +} + +bool XCoreRegisterInfo::hasFP(const MachineFunction &MF) const { + return NoFramePointerElim || MF.getFrameInfo()->hasVarSizedObjects(); +} + +// This function eliminates ADJCALLSTACKDOWN, +// ADJCALLSTACKUP pseudo instructions +void XCoreRegisterInfo:: +eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + if (!hasReservedCallFrame(MF)) { + // Turn the adjcallstackdown instruction into 'extsp ' and the + // adjcallstackup instruction into 'ldaw sp, sp[]' + MachineInstr *Old = I; + uint64_t Amount = Old->getOperand(0).getImm(); + if (Amount != 0) { + // We need to keep the stack aligned properly. To do this, we round the + // amount of space needed for the outgoing arguments up to the next + // alignment boundary. + unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment(); + Amount = (Amount+Align-1)/Align*Align; + + assert(Amount%4 == 0); + Amount /= 4; + + bool isU6 = isImmU6(Amount); + + if (!isU6 && !isImmU16(Amount)) { + // FIX could emit multiple instructions in this case. + cerr << "eliminateCallFramePseudoInstr size too big: " + << Amount << "\n"; + abort(); + } + + MachineInstr *New; + if (Old->getOpcode() == XCore::ADJCALLSTACKDOWN) { + int Opcode = isU6 ? XCore::EXTSP_u6 : XCore::EXTSP_lu6; + New=BuildMI(MF, Old->getDebugLoc(), TII.get(Opcode)) + .addImm(Amount); + } else { + assert(Old->getOpcode() == XCore::ADJCALLSTACKUP); + int Opcode = isU6 ? XCore::LDAWSP_ru6_RRegs : XCore::LDAWSP_lru6_RRegs; + New=BuildMI(MF, Old->getDebugLoc(), TII.get(Opcode), XCore::SP) + .addImm(Amount); + } + + // Replace the pseudo instruction with a new instruction... + MBB.insert(I, New); + } + } + + MBB.erase(I); +} + +void XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, RegScavenger *RS) const { + assert(SPAdj == 0 && "Unexpected"); + MachineInstr &MI = *II; + DebugLoc dl = MI.getDebugLoc(); + unsigned i = 0; + + while (!MI.getOperand(i).isFI()) { + ++i; + assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!"); + } + + MachineOperand &FrameOp = MI.getOperand(i); + int FrameIndex = FrameOp.getIndex(); + + MachineFunction &MF = *MI.getParent()->getParent(); + int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex); + int StackSize = MF.getFrameInfo()->getStackSize(); + + #ifndef NDEBUG + DOUT << "\nFunction : " << MF.getFunction()->getName() << "\n"; + DOUT << "<--------->\n"; + MI.print(DOUT); + DOUT << "FrameIndex : " << FrameIndex << "\n"; + DOUT << "FrameOffset : " << Offset << "\n"; + DOUT << "StackSize : " << StackSize << "\n"; + #endif + + Offset += StackSize; + + // fold constant into offset. + Offset += MI.getOperand(i + 1).getImm(); + MI.getOperand(i + 1).ChangeToImmediate(0); + + assert(Offset%4 == 0 && "Misaligned stack offset"); + + #ifndef NDEBUG + DOUT << "Offset : " << Offset << "\n"; + DOUT << "<--------->\n"; + #endif + + Offset/=4; + + bool FP = hasFP(MF); + + unsigned Reg = MI.getOperand(0).getReg(); + bool isKill = MI.getOpcode() == XCore::STWFI && MI.getOperand(0).isKill(); + + assert(XCore::GRRegsRegisterClass->contains(Reg) && + "Unexpected register operand"); + + MachineBasicBlock &MBB = *MI.getParent(); + + if (FP) { + bool isUs = isImmUs(Offset); + unsigned FramePtr = XCore::R10; + + MachineInstr *New = 0; + if (!isUs) { + if (!RS) { + cerr << "eliminateFrameIndex Frame size too big: " << Offset << "\n"; + abort(); + } + unsigned ScratchReg = RS->scavengeRegister(XCore::GRRegsRegisterClass, II, + SPAdj); + loadConstant(MBB, II, ScratchReg, Offset, dl); + switch (MI.getOpcode()) { + case XCore::LDWFI: + New = BuildMI(MBB, II, dl, TII.get(XCore::LDW_3r), Reg) + .addReg(FramePtr) + .addReg(ScratchReg, RegState::Kill); + break; + case XCore::STWFI: + New = BuildMI(MBB, II, dl, TII.get(XCore::STW_3r)) + .addReg(Reg, getKillRegState(isKill)) + .addReg(FramePtr) + .addReg(ScratchReg, RegState::Kill); + break; + case XCore::LDAWFI: + New = BuildMI(MBB, II, dl, TII.get(XCore::LDAWF_l3r), Reg) + .addReg(FramePtr) + .addReg(ScratchReg, RegState::Kill); + break; + default: + assert(0 && "Unexpected Opcode\n"); + } + } else { + switch (MI.getOpcode()) { + case XCore::LDWFI: + New = BuildMI(MBB, II, dl, TII.get(XCore::LDW_2rus), Reg) + .addReg(FramePtr) + .addImm(Offset); + break; + case XCore::STWFI: + New = BuildMI(MBB, II, dl, TII.get(XCore::STW_2rus)) + .addReg(Reg, getKillRegState(isKill)) + .addReg(FramePtr) + .addImm(Offset); + break; + case XCore::LDAWFI: + New = BuildMI(MBB, II, dl, TII.get(XCore::LDAWF_l2rus), Reg) + .addReg(FramePtr) + .addImm(Offset); + break; + default: + assert(0 && "Unexpected Opcode\n"); + } + } + } else { + bool isU6 = isImmU6(Offset); + if (!isU6 && !isImmU16(Offset)) { + // FIXME could make this work for LDWSP, LDAWSP. + cerr << "eliminateFrameIndex Frame size too big: " << Offset << "\n"; + abort(); + } + + switch (MI.getOpcode()) { + int NewOpcode; + case XCore::LDWFI: + NewOpcode = (isU6) ? XCore::LDWSP_ru6 : XCore::LDWSP_lru6; + BuildMI(MBB, II, dl, TII.get(NewOpcode), Reg) + .addImm(Offset); + break; + case XCore::STWFI: + NewOpcode = (isU6) ? XCore::STWSP_ru6 : XCore::STWSP_lru6; + BuildMI(MBB, II, dl, TII.get(NewOpcode)) + .addReg(Reg, getKillRegState(isKill)) + .addImm(Offset); + break; + case XCore::LDAWFI: + NewOpcode = (isU6) ? XCore::LDAWSP_ru6 : XCore::LDAWSP_lru6; + BuildMI(MBB, II, dl, TII.get(NewOpcode), Reg) + .addImm(Offset); + break; + default: + assert(0 && "Unexpected Opcode\n"); + } + } + // Erase old instruction. + MBB.erase(II); +} + +void +XCoreRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, + RegScavenger *RS) const { + MachineFrameInfo *MFI = MF.getFrameInfo(); + bool LRUsed = MF.getRegInfo().isPhysRegUsed(XCore::LR); + const TargetRegisterClass *RC = XCore::GRRegsRegisterClass; + XCoreFunctionInfo *XFI = MF.getInfo(); + if (LRUsed) { + MF.getRegInfo().setPhysRegUnused(XCore::LR); + + bool isVarArg = MF.getFunction()->isVarArg(); + int FrameIdx; + if (! isVarArg) { + // A fixed offset of 0 allows us to save / restore LR using entsp / retsp. + FrameIdx = MFI->CreateFixedObject(RC->getSize(), 0); + } else { + FrameIdx = MFI->CreateStackObject(RC->getSize(), RC->getAlignment()); + } + XFI->setUsesLR(FrameIdx); + XFI->setLRSpillSlot(FrameIdx); + } + if (requiresRegisterScavenging(MF)) { + // Reserve a slot close to SP or frame pointer. + RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), + RC->getAlignment())); + } + if (hasFP(MF)) { + // A callee save register is used to hold the FP. + // This needs saving / restoring in the epilogue / prologue. + XFI->setFPSpillSlot(MFI->CreateStackObject(RC->getSize(), + RC->getAlignment())); + } +} + +void XCoreRegisterInfo:: +processFunctionBeforeFrameFinalized(MachineFunction &MF) const { + +} + +void XCoreRegisterInfo:: +loadConstant(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + unsigned DstReg, int64_t Value, DebugLoc dl) const { + // TODO use mkmsk if possible. + if (!isImmU16(Value)) { + // TODO use constant pool. + cerr << "loadConstant value too big " << Value << "\n"; + abort(); + } + int Opcode = isImmU6(Value) ? XCore::LDC_ru6 : XCore::LDC_lru6; + BuildMI(MBB, I, dl, TII.get(Opcode), DstReg).addImm(Value); +} + +void XCoreRegisterInfo:: +storeToStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + unsigned SrcReg, int Offset, DebugLoc dl) const { + assert(Offset%4 == 0 && "Misaligned stack offset"); + Offset/=4; + bool isU6 = isImmU6(Offset); + if (!isU6 && !isImmU16(Offset)) { + cerr << "storeToStack offset too big " << Offset << "\n"; + abort(); + } + int Opcode = isU6 ? XCore::STWSP_ru6 : XCore::STWSP_lru6; + BuildMI(MBB, I, dl, TII.get(Opcode)) + .addReg(SrcReg) + .addImm(Offset); +} + +void XCoreRegisterInfo:: +loadFromStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + unsigned DstReg, int Offset, DebugLoc dl) const { + assert(Offset%4 == 0 && "Misaligned stack offset"); + Offset/=4; + bool isU6 = isImmU6(Offset); + if (!isU6 && !isImmU16(Offset)) { + cerr << "loadFromStack offset too big " << Offset << "\n"; + abort(); + } + int Opcode = isU6 ? XCore::LDWSP_ru6 : XCore::LDWSP_lru6; + BuildMI(MBB, I, dl, TII.get(Opcode), DstReg) + .addImm(Offset); +} + +void XCoreRegisterInfo::emitPrologue(MachineFunction &MF) const { + MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB + MachineBasicBlock::iterator MBBI = MBB.begin(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + MachineModuleInfo *MMI = MFI->getMachineModuleInfo(); + XCoreFunctionInfo *XFI = MF.getInfo(); + DebugLoc dl = (MBBI != MBB.end() ? + MBBI->getDebugLoc() : DebugLoc::getUnknownLoc()); + + bool FP = hasFP(MF); + + // Work out frame sizes. + int FrameSize = MFI->getStackSize(); + + assert(FrameSize%4 == 0 && "Misaligned frame size"); + + FrameSize/=4; + + bool isU6 = isImmU6(FrameSize); + + if (!isU6 && !isImmU16(FrameSize)) { + // FIXME could emit multiple instructions. + cerr << "emitPrologue Frame size too big: " << FrameSize << "\n"; + abort(); + } + bool emitFrameMoves = needsFrameMoves(MF); + + // Do we need to allocate space on the stack? + if (FrameSize) { + bool saveLR = XFI->getUsesLR(); + bool LRSavedOnEntry = false; + int Opcode; + if (saveLR && (MFI->getObjectOffset(XFI->getLRSpillSlot()) == 0)) { + Opcode = (isU6) ? XCore::ENTSP_u6 : XCore::ENTSP_lu6; + MBB.addLiveIn(XCore::LR); + saveLR = false; + LRSavedOnEntry = true; + } else { + Opcode = (isU6) ? XCore::EXTSP_u6 : XCore::EXTSP_lu6; + } + BuildMI(MBB, MBBI, dl, TII.get(Opcode)).addImm(FrameSize); + + if (emitFrameMoves) { + std::vector &Moves = MMI->getFrameMoves(); + + // Show update of SP. + unsigned FrameLabelId = MMI->NextLabelID(); + BuildMI(MBB, MBBI, dl, TII.get(XCore::DBG_LABEL)).addImm(FrameLabelId); + + MachineLocation SPDst(MachineLocation::VirtualFP); + MachineLocation SPSrc(MachineLocation::VirtualFP, -FrameSize * 4); + Moves.push_back(MachineMove(FrameLabelId, SPDst, SPSrc)); + + if (LRSavedOnEntry) { + MachineLocation CSDst(MachineLocation::VirtualFP, 0); + MachineLocation CSSrc(XCore::LR); + Moves.push_back(MachineMove(FrameLabelId, CSDst, CSSrc)); + } + } + if (saveLR) { + int LRSpillOffset = MFI->getObjectOffset(XFI->getLRSpillSlot()); + storeToStack(MBB, MBBI, XCore::LR, LRSpillOffset + FrameSize*4, dl); + MBB.addLiveIn(XCore::LR); + + if (emitFrameMoves) { + unsigned SaveLRLabelId = MMI->NextLabelID(); + BuildMI(MBB, MBBI, dl, TII.get(XCore::DBG_LABEL)).addImm(SaveLRLabelId); + MachineLocation CSDst(MachineLocation::VirtualFP, LRSpillOffset); + MachineLocation CSSrc(XCore::LR); + MMI->getFrameMoves().push_back(MachineMove(SaveLRLabelId, + CSDst, CSSrc)); + } + } + } + + if (FP) { + // Save R10 to the stack. + int FPSpillOffset = MFI->getObjectOffset(XFI->getFPSpillSlot()); + storeToStack(MBB, MBBI, XCore::R10, FPSpillOffset + FrameSize*4, dl); + // R10 is live-in. It is killed at the spill. + MBB.addLiveIn(XCore::R10); + if (emitFrameMoves) { + unsigned SaveR10LabelId = MMI->NextLabelID(); + BuildMI(MBB, MBBI, dl, TII.get(XCore::DBG_LABEL)).addImm(SaveR10LabelId); + MachineLocation CSDst(MachineLocation::VirtualFP, FPSpillOffset); + MachineLocation CSSrc(XCore::R10); + MMI->getFrameMoves().push_back(MachineMove(SaveR10LabelId, + CSDst, CSSrc)); + } + // Set the FP from the SP. + unsigned FramePtr = XCore::R10; + BuildMI(MBB, MBBI, dl, TII.get(XCore::LDAWSP_ru6), FramePtr) + .addImm(0); + if (emitFrameMoves) { + // Show FP is now valid. + unsigned FrameLabelId = MMI->NextLabelID(); + BuildMI(MBB, MBBI, dl, TII.get(XCore::DBG_LABEL)).addImm(FrameLabelId); + MachineLocation SPDst(FramePtr); + MachineLocation SPSrc(MachineLocation::VirtualFP); + MMI->getFrameMoves().push_back(MachineMove(FrameLabelId, SPDst, SPSrc)); + } + } + + if (emitFrameMoves) { + // Frame moves for callee saved. + std::vector &Moves = MMI->getFrameMoves(); + std::vector >&SpillLabels = + XFI->getSpillLabels(); + for (unsigned I = 0, E = SpillLabels.size(); I != E; ++I) { + unsigned SpillLabel = SpillLabels[I].first; + CalleeSavedInfo &CSI = SpillLabels[I].second; + int Offset = MFI->getObjectOffset(CSI.getFrameIdx()); + unsigned Reg = CSI.getReg(); + MachineLocation CSDst(MachineLocation::VirtualFP, Offset); + MachineLocation CSSrc(Reg); + Moves.push_back(MachineMove(SpillLabel, CSDst, CSSrc)); + } + } +} + +void XCoreRegisterInfo::emitEpilogue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + MachineFrameInfo *MFI = MF.getFrameInfo(); + MachineBasicBlock::iterator MBBI = prior(MBB.end()); + DebugLoc dl = MBBI->getDebugLoc(); + + bool FP = hasFP(MF); + + if (FP) { + // Restore the stack pointer. + unsigned FramePtr = XCore::R10; + BuildMI(MBB, MBBI, dl, TII.get(XCore::SETSP_1r)) + .addReg(FramePtr); + } + + // Work out frame sizes. + int FrameSize = MFI->getStackSize(); + + assert(FrameSize%4 == 0 && "Misaligned frame size"); + + FrameSize/=4; + + bool isU6 = isImmU6(FrameSize); + + if (!isU6 && !isImmU16(FrameSize)) { + // FIXME could emit multiple instructions. + cerr << "emitEpilogue Frame size too big: " << FrameSize << "\n"; + abort(); + } + + if (FrameSize) { + XCoreFunctionInfo *XFI = MF.getInfo(); + + if (FP) { + // Restore R10 + int FPSpillOffset = MFI->getObjectOffset(XFI->getFPSpillSlot()); + FPSpillOffset += FrameSize*4; + loadFromStack(MBB, MBBI, XCore::R10, FPSpillOffset, dl); + } + bool restoreLR = XFI->getUsesLR(); + if (restoreLR && MFI->getObjectOffset(XFI->getLRSpillSlot()) != 0) { + int LRSpillOffset = MFI->getObjectOffset(XFI->getLRSpillSlot()); + LRSpillOffset += FrameSize*4; + loadFromStack(MBB, MBBI, XCore::LR, LRSpillOffset, dl); + restoreLR = false; + } + if (restoreLR) { + // Fold prologue into return instruction + assert(MBBI->getOpcode() == XCore::RETSP_u6 + || MBBI->getOpcode() == XCore::RETSP_lu6); + int Opcode = (isU6) ? XCore::RETSP_u6 : XCore::RETSP_lu6; + BuildMI(MBB, MBBI, dl, TII.get(Opcode)).addImm(FrameSize); + MBB.erase(MBBI); + } else { + int Opcode = (isU6) ? XCore::LDAWSP_ru6_RRegs : XCore::LDAWSP_lru6_RRegs; + BuildMI(MBB, MBBI, dl, TII.get(Opcode), XCore::SP).addImm(FrameSize); + } + } +} + +int XCoreRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const { + return XCoreGenRegisterInfo::getDwarfRegNumFull(RegNum, 0); +} + +unsigned XCoreRegisterInfo::getFrameRegister(MachineFunction &MF) const { + bool FP = hasFP(MF); + + return FP ? XCore::R10 : XCore::SP; +} + +unsigned XCoreRegisterInfo::getRARegister() const { + return XCore::LR; +} + +void XCoreRegisterInfo::getInitialFrameState(std::vector &Moves) + const { + // Initial state of the frame pointer is SP. + MachineLocation Dst(MachineLocation::VirtualFP); + MachineLocation Src(XCore::SP, 0); + Moves.push_back(MachineMove(0, Dst, Src)); +} + +#include "XCoreGenRegisterInfo.inc" + diff --git a/lib/Target/XCore/XCoreRegisterInfo.h b/lib/Target/XCore/XCoreRegisterInfo.h new file mode 100644 index 000000000000..00b7caa96bc6 --- /dev/null +++ b/lib/Target/XCore/XCoreRegisterInfo.h @@ -0,0 +1,94 @@ +//===- XCoreRegisterInfo.h - XCore Register Information Impl ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the XCore implementation of the MRegisterInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef XCOREREGISTERINFO_H +#define XCOREREGISTERINFO_H + +#include "llvm/Target/TargetRegisterInfo.h" +#include "XCoreGenRegisterInfo.h.inc" + +namespace llvm { + +class TargetInstrInfo; + +struct XCoreRegisterInfo : public XCoreGenRegisterInfo { +private: + const TargetInstrInfo &TII; + + void loadConstant(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned DstReg, int64_t Value, DebugLoc dl) const; + + void storeToStack(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned SrcReg, int Offset, DebugLoc dl) const; + + void loadFromStack(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned DstReg, int Offset, DebugLoc dl) const; + +public: + XCoreRegisterInfo(const TargetInstrInfo &tii); + + /// Code Generation virtual methods... + + const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const; + + const TargetRegisterClass* const* getCalleeSavedRegClasses( + const MachineFunction *MF = 0) const; + + BitVector getReservedRegs(const MachineFunction &MF) const; + + bool requiresRegisterScavenging(const MachineFunction &MF) const; + + bool hasFP(const MachineFunction &MF) const; + + void eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const; + + void eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, RegScavenger *RS = NULL) const; + + void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, + RegScavenger *RS = NULL) const; + + void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; + + void emitPrologue(MachineFunction &MF) const; + void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; + + // Debug information queries. + unsigned getRARegister() const; + unsigned getFrameRegister(MachineFunction &MF) const; + void getInitialFrameState(std::vector &Moves) const; + + //! Return the array of argument passing registers + /*! + \note The size of this array is returned by getArgRegsSize(). + */ + static const unsigned *getArgRegs(const MachineFunction *MF = 0); + + //! Return the size of the argument passing register array + static unsigned getNumArgRegs(const MachineFunction *MF = 0); + + //! Return whether to emit frame moves + static bool needsFrameMoves(const MachineFunction &MF); + + //! Get DWARF debugging register number + int getDwarfRegNum(unsigned RegNum, bool isEH) const; +}; + +} // end namespace llvm + +#endif diff --git a/lib/Target/XCore/XCoreRegisterInfo.td b/lib/Target/XCore/XCoreRegisterInfo.td new file mode 100644 index 000000000000..62daf5d4567b --- /dev/null +++ b/lib/Target/XCore/XCoreRegisterInfo.td @@ -0,0 +1,91 @@ +//===- XCoreRegisterInfo.td - XCore Register defs ----------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Declarations that describe the XCore register file +//===----------------------------------------------------------------------===// + +class XCoreReg : Register { + field bits<4> Num; + let Namespace = "XCore"; +} + +// Registers are identified with 4-bit ID numbers. +// Ri - 32-bit integer registers +class Ri num, string n> : XCoreReg { + let Num = num; +} + +// CPU registers +def R0 : Ri< 0, "r0">, DwarfRegNum<[0]>; +def R1 : Ri< 1, "r1">, DwarfRegNum<[1]>; +def R2 : Ri< 2, "r2">, DwarfRegNum<[2]>; +def R3 : Ri< 3, "r3">, DwarfRegNum<[3]>; +def R4 : Ri< 4, "r4">, DwarfRegNum<[4]>; +def R5 : Ri< 5, "r5">, DwarfRegNum<[5]>; +def R6 : Ri< 6, "r6">, DwarfRegNum<[6]>; +def R7 : Ri< 7, "r7">, DwarfRegNum<[7]>; +def R8 : Ri< 8, "r8">, DwarfRegNum<[8]>; +def R9 : Ri< 9, "r9">, DwarfRegNum<[9]>; +def R10 : Ri<10, "r10">, DwarfRegNum<[10]>; +def R11 : Ri<11, "r11">, DwarfRegNum<[11]>; +def CP : Ri<12, "cp">, DwarfRegNum<[12]>; +def DP : Ri<13, "dp">, DwarfRegNum<[13]>; +def SP : Ri<14, "sp">, DwarfRegNum<[14]>; +def LR : Ri<15, "lr">, DwarfRegNum<[15]>; + +// Register classes. +// +def GRRegs : RegisterClass<"XCore", [i32], 32, + // Return values and arguments + [R0, R1, R2, R3, + // Not preserved across procedure calls + R11, + // Callee save + R4, R5, R6, R7, R8, R9, R10]> { + let MethodProtos = [{ + iterator allocation_order_begin(const MachineFunction &MF) const; + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + GRRegsClass::iterator + GRRegsClass::allocation_order_begin(const MachineFunction &MF) const { + return begin(); + } + GRRegsClass::iterator + GRRegsClass::allocation_order_end(const MachineFunction &MF) const { + const TargetMachine &TM = MF.getTarget(); + const TargetRegisterInfo *RI = TM.getRegisterInfo(); + if (RI->hasFP(MF)) + return end()-1; // don't allocate R10 + else + return end(); + } + }]; +} + +def RRegs : RegisterClass<"XCore", [i32], 32, + // Reserved + [CP, DP, SP, LR]> { + let MethodProtos = [{ + iterator allocation_order_begin(const MachineFunction &MF) const; + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + RRegsClass::iterator + RRegsClass::allocation_order_begin(const MachineFunction &MF) const { + return begin(); + } + RRegsClass::iterator + RRegsClass::allocation_order_end(const MachineFunction &MF) const { + // No allocatable registers + return begin(); + } + }]; +} diff --git a/lib/Target/XCore/XCoreSubtarget.cpp b/lib/Target/XCore/XCoreSubtarget.cpp new file mode 100644 index 000000000000..dc53da4ddf0b --- /dev/null +++ b/lib/Target/XCore/XCoreSubtarget.cpp @@ -0,0 +1,28 @@ +//===- XCoreSubtarget.cpp - XCore Subtarget Information -----------*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the XCore specific subclass of TargetSubtarget. +// +//===----------------------------------------------------------------------===// + +#include "XCoreSubtarget.h" +#include "XCore.h" +#include "XCoreGenSubtarget.inc" +using namespace llvm; + +XCoreSubtarget::XCoreSubtarget(const TargetMachine &TM, const Module &M, + const std::string &FS) + : IsXS1A(false), + IsXS1B(false) +{ + std::string CPU = "xs1a-generic"; + + // Parse features string. + ParseSubtargetFeatures(FS, CPU); +} diff --git a/lib/Target/XCore/XCoreSubtarget.h b/lib/Target/XCore/XCoreSubtarget.h new file mode 100644 index 000000000000..ff6475baa810 --- /dev/null +++ b/lib/Target/XCore/XCoreSubtarget.h @@ -0,0 +1,46 @@ +//=====-- XCoreSubtarget.h - Define Subtarget for the XCore -----*- C++ -*--==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the XCore specific subclass of TargetSubtarget. +// +//===----------------------------------------------------------------------===// + +#ifndef XCORESUBTARGET_H +#define XCORESUBTARGET_H + +#include "llvm/Target/TargetSubtarget.h" +#include "llvm/Target/TargetMachine.h" + +#include + +namespace llvm { +class Module; + +class XCoreSubtarget : public TargetSubtarget { + bool IsXS1A; + bool IsXS1B; + +public: + /// This constructor initializes the data members to match that + /// of the specified module. + /// + XCoreSubtarget(const TargetMachine &TM, const Module &M, + const std::string &FS); + + bool isXS1A() const { return IsXS1A; } + bool isXS1B() const { return IsXS1B; } + + /// ParseSubtargetFeatures - Parses features string setting specified + /// subtarget options. Definition of function is auto generated by tblgen. + std::string ParseSubtargetFeatures(const std::string &FS, + const std::string &CPU); +}; +} // End llvm namespace + +#endif diff --git a/lib/Target/XCore/XCoreTargetAsmInfo.cpp b/lib/Target/XCore/XCoreTargetAsmInfo.cpp new file mode 100644 index 000000000000..55137621deae --- /dev/null +++ b/lib/Target/XCore/XCoreTargetAsmInfo.cpp @@ -0,0 +1,201 @@ +//===-- XCoreTargetAsmInfo.cpp - XCore asm properties -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declarations of the XCoreTargetAsmInfo properties. +// We use the small section flag for the CP relative and DP relative +// flags. If a section is small and writable then it is DP relative. If a +// section is small and not writable then it is CP relative. +// +//===----------------------------------------------------------------------===// + +#include "XCoreTargetAsmInfo.h" +#include "XCoreTargetMachine.h" +#include "llvm/GlobalVariable.h" +#include "llvm/ADT/StringExtras.h" + +using namespace llvm; + +XCoreTargetAsmInfo::XCoreTargetAsmInfo(const XCoreTargetMachine &TM) + : ELFTargetAsmInfo(TM), + Subtarget(TM.getSubtargetImpl()) { + TextSection = getUnnamedSection("\t.text", SectionFlags::Code); + DataSection = getNamedSection("\t.dp.data", SectionFlags::Writeable | + SectionFlags::Small); + BSSSection_ = getNamedSection("\t.dp.bss", SectionFlags::Writeable | + SectionFlags::BSS | SectionFlags::Small); + if (Subtarget->isXS1A()) { + ReadOnlySection = getNamedSection("\t.dp.rodata", SectionFlags::None | + SectionFlags::Writeable | + SectionFlags::Small); + } else { + ReadOnlySection = getNamedSection("\t.cp.rodata", SectionFlags::None | + SectionFlags::Small); + } + Data16bitsDirective = "\t.short\t"; + Data32bitsDirective = "\t.long\t"; + Data64bitsDirective = 0; + ZeroDirective = "\t.space\t"; + CommentString = "#"; + ConstantPoolSection = "\t.section\t.cp.rodata,\"ac\",@progbits"; + JumpTableDataSection = "\t.section\t.dp.data,\"awd\",@progbits"; + PrivateGlobalPrefix = ".L"; + AscizDirective = ".asciiz"; + WeakDefDirective = "\t.weak\t"; + WeakRefDirective = "\t.weak\t"; + SetDirective = "\t.set\t"; + + // Debug + HasLEB128 = true; + AbsoluteDebugSectionOffsets = true; + + DwarfAbbrevSection = "\t.section\t.debug_abbrev,\"\",@progbits"; + DwarfInfoSection = "\t.section\t.debug_info,\"\",@progbits"; + DwarfLineSection = "\t.section\t.debug_line,\"\",@progbits"; + DwarfFrameSection = "\t.section\t.debug_frame,\"\",@progbits"; + DwarfPubNamesSection = "\t.section\t.debug_pubnames,\"\",@progbits"; + DwarfPubTypesSection = "\t.section\t.debug_pubtypes,\"\",@progbits"; + DwarfStrSection = "\t.section\t.debug_str,\"\",@progbits"; + DwarfLocSection = "\t.section\t.debug_loc,\"\",@progbits"; + DwarfARangesSection = "\t.section\t.debug_aranges,\"\",@progbits"; + DwarfRangesSection = "\t.section\t.debug_ranges,\"\",@progbits"; + DwarfMacInfoSection = "\t.section\t.debug_macinfo,\"\",@progbits"; +} + +const Section* +XCoreTargetAsmInfo::SelectSectionForGlobal(const GlobalValue *GV) const { + SectionKind::Kind Kind = SectionKindForGlobal(GV); + + if (const GlobalVariable *GVar = dyn_cast(GV)) + { + if (!GVar->isWeakForLinker()) { + switch (Kind) { + case SectionKind::RODataMergeStr: + return MergeableStringSection(GVar); + case SectionKind::RODataMergeConst: + return getReadOnlySection(); + case SectionKind::ThreadData: + return DataSection; + case SectionKind::ThreadBSS: + return getBSSSection_(); + default: + break; + } + } + } + return ELFTargetAsmInfo::SelectSectionForGlobal(GV); +} + +const Section* +XCoreTargetAsmInfo::SelectSectionForMachineConst(const Type *Ty) const { + return MergeableConstSection(Ty); +} + +const Section* +XCoreTargetAsmInfo::MergeableConstSection(const GlobalVariable *GV) const { + Constant *C = GV->getInitializer(); + return MergeableConstSection(C->getType()); +} + +inline const Section* +XCoreTargetAsmInfo::MergeableConstSection(const Type *Ty) const { + const TargetData *TD = TM.getTargetData(); + + unsigned Size = TD->getTypeAllocSize(Ty); + if (Size == 4 || Size == 8 || Size == 16) { + std::string Name = ".cp.const" + utostr(Size); + + return getNamedSection(Name.c_str(), + SectionFlags::setEntitySize(SectionFlags::Mergeable | + SectionFlags::Small, + Size)); + } + + return getReadOnlySection(); +} + +const Section* XCoreTargetAsmInfo:: +MergeableStringSection(const GlobalVariable *GV) const { + // FIXME insert in correct mergable section + return getReadOnlySection(); +} + +unsigned XCoreTargetAsmInfo:: +SectionFlagsForGlobal(const GlobalValue *GV, + const char* Name) const { + unsigned Flags = ELFTargetAsmInfo::SectionFlagsForGlobal(GV, Name); + // Mask out unsupported flags + Flags &= ~(SectionFlags::Small | SectionFlags::TLS); + + // Set CP / DP relative flags + if (GV) { + SectionKind::Kind Kind = SectionKindForGlobal(GV); + switch (Kind) { + case SectionKind::ThreadData: + case SectionKind::ThreadBSS: + case SectionKind::Data: + case SectionKind::BSS: + case SectionKind::SmallData: + case SectionKind::SmallBSS: + Flags |= SectionFlags::Small; + break; + case SectionKind::ROData: + case SectionKind::RODataMergeStr: + case SectionKind::SmallROData: + if (Subtarget->isXS1A()) { + Flags |= SectionFlags::Writeable; + } + Flags |=SectionFlags::Small; + break; + case SectionKind::RODataMergeConst: + Flags |=SectionFlags::Small; + default: + break; + } + } + + return Flags; +} + +std::string XCoreTargetAsmInfo:: +printSectionFlags(unsigned flags) const { + std::string Flags = ",\""; + + if (!(flags & SectionFlags::Debug)) + Flags += 'a'; + if (flags & SectionFlags::Code) + Flags += 'x'; + if (flags & SectionFlags::Writeable) + Flags += 'w'; + if (flags & SectionFlags::Mergeable) + Flags += 'M'; + if (flags & SectionFlags::Strings) + Flags += 'S'; + if (flags & SectionFlags::TLS) + Flags += 'T'; + if (flags & SectionFlags::Small) { + if (flags & SectionFlags::Writeable) + Flags += 'd'; // DP relative + else + Flags += 'c'; // CP relative + } + + Flags += "\","; + + Flags += '@'; + + if (flags & SectionFlags::BSS) + Flags += "nobits"; + else + Flags += "progbits"; + + if (unsigned entitySize = SectionFlags::getEntitySize(flags)) + Flags += "," + utostr(entitySize); + + return Flags; +} diff --git a/lib/Target/XCore/XCoreTargetAsmInfo.h b/lib/Target/XCore/XCoreTargetAsmInfo.h new file mode 100644 index 000000000000..79fd36aa23ce --- /dev/null +++ b/lib/Target/XCore/XCoreTargetAsmInfo.h @@ -0,0 +1,45 @@ +//=====-- XCoreTargetAsmInfo.h - XCore asm properties ---------*- C++ -*--====// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the XCoreTargetAsmInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef XCORETARGETASMINFO_H +#define XCORETARGETASMINFO_H + +#include "llvm/Target/ELFTargetAsmInfo.h" + +namespace llvm { + + // Forward declarations. + class XCoreTargetMachine; + class XCoreSubtarget; + + class XCoreTargetAsmInfo : public ELFTargetAsmInfo { + private: + const XCoreSubtarget *Subtarget; + public: + explicit XCoreTargetAsmInfo(const XCoreTargetMachine &TM); + + virtual const Section* SelectSectionForGlobal(const GlobalValue *GV) const; + virtual std::string printSectionFlags(unsigned flags) const; + const Section* MergeableConstSection(const GlobalVariable *GV) const; + inline const Section* MergeableConstSection(const Type *Ty) const; + const Section* MergeableStringSection(const GlobalVariable *GV) const; + virtual const Section* + SelectSectionForMachineConst(const Type *Ty) const; + virtual unsigned + SectionFlagsForGlobal(const GlobalValue *GV = NULL, + const char* name = NULL) const; + }; + +} // namespace llvm + +#endif diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp new file mode 100644 index 000000000000..5437c574a80f --- /dev/null +++ b/lib/Target/XCore/XCoreTargetMachine.cpp @@ -0,0 +1,71 @@ +//===-- XCoreTargetMachine.cpp - Define TargetMachine for XCore -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// +//===----------------------------------------------------------------------===// + +#include "XCoreTargetAsmInfo.h" +#include "XCoreTargetMachine.h" +#include "XCore.h" +#include "llvm/Module.h" +#include "llvm/PassManager.h" +#include "llvm/Target/TargetMachineRegistry.h" +using namespace llvm; + +/// XCoreTargetMachineModule - Note that this is used on hosts that +/// cannot link in a library unless there are references into the +/// library. In particular, it seems that it is not possible to get +/// things to work on Win32 without this. Though it is unused, do not +/// remove it. +extern "C" int XCoreTargetMachineModule; +int XCoreTargetMachineModule = 0; + +namespace { + // Register the target. + RegisterTarget X("xcore", "XCore"); +} + +const TargetAsmInfo *XCoreTargetMachine::createTargetAsmInfo() const { + return new XCoreTargetAsmInfo(*this); +} + +/// XCoreTargetMachine ctor - Create an ILP32 architecture model +/// +XCoreTargetMachine::XCoreTargetMachine(const Module &M, const std::string &FS) + : Subtarget(*this, M, FS), + DataLayout("e-p:32:32:32-a0:0:32-f32:32:32-f64:32:32-i1:8:32-i8:8:32-" + "i16:16:32-i32:32:32-i64:32:32"), + InstrInfo(), + FrameInfo(*this), + TLInfo(*this) { +} + +unsigned XCoreTargetMachine::getModuleMatchQuality(const Module &M) { + std::string TT = M.getTargetTriple(); + if (TT.size() >= 6 && std::string(TT.begin(), TT.begin()+6) == "xcore-") + return 20; + + // Otherwise we don't match. + return 0; +} + +bool XCoreTargetMachine::addInstSelector(PassManagerBase &PM, + CodeGenOpt::Level OptLevel) { + PM.add(createXCoreISelDag(*this)); + return false; +} + +bool XCoreTargetMachine::addAssemblyEmitter(PassManagerBase &PM, + CodeGenOpt::Level OptLevel, + bool Verbose, + raw_ostream &Out) { + // Output assembly language. + PM.add(createXCoreCodePrinterPass(Out, *this, OptLevel, Verbose)); + return false; +} diff --git a/lib/Target/XCore/XCoreTargetMachine.h b/lib/Target/XCore/XCoreTargetMachine.h new file mode 100644 index 000000000000..2385aedc9079 --- /dev/null +++ b/lib/Target/XCore/XCoreTargetMachine.h @@ -0,0 +1,63 @@ +//===-- XCoreTargetMachine.h - Define TargetMachine for XCore ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the XCore specific subclass of TargetMachine. +// +//===----------------------------------------------------------------------===// + +#ifndef XCORETARGETMACHINE_H +#define XCORETARGETMACHINE_H + +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetData.h" +#include "XCoreFrameInfo.h" +#include "XCoreSubtarget.h" +#include "XCoreInstrInfo.h" +#include "XCoreISelLowering.h" + +namespace llvm { + +class Module; + +class XCoreTargetMachine : public LLVMTargetMachine { + XCoreSubtarget Subtarget; + const TargetData DataLayout; // Calculates type size & alignment + XCoreInstrInfo InstrInfo; + XCoreFrameInfo FrameInfo; + XCoreTargetLowering TLInfo; + +protected: + virtual const TargetAsmInfo *createTargetAsmInfo() const; + +public: + XCoreTargetMachine(const Module &M, const std::string &FS); + + virtual const XCoreInstrInfo *getInstrInfo() const { return &InstrInfo; } + virtual const XCoreFrameInfo *getFrameInfo() const { return &FrameInfo; } + virtual const XCoreSubtarget *getSubtargetImpl() const { return &Subtarget; } + virtual XCoreTargetLowering *getTargetLowering() const { + return const_cast(&TLInfo); + } + + virtual const TargetRegisterInfo *getRegisterInfo() const { + return &InstrInfo.getRegisterInfo(); + } + virtual const TargetData *getTargetData() const { return &DataLayout; } + static unsigned getModuleMatchQuality(const Module &M); + + // Pass Pipeline Configuration + virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel); + virtual bool addAssemblyEmitter(PassManagerBase &PM, + CodeGenOpt::Level OptLevel, + bool Verbose, raw_ostream &Out); +}; + +} // end namespace llvm + +#endif diff --git a/lib/Transforms/Hello/CMakeLists.txt b/lib/Transforms/Hello/CMakeLists.txt new file mode 100644 index 000000000000..b80d15ba76d6 --- /dev/null +++ b/lib/Transforms/Hello/CMakeLists.txt @@ -0,0 +1,3 @@ +add_llvm_library( LLVMHello + Hello.cpp + ) diff --git a/lib/Transforms/Hello/Hello.cpp b/lib/Transforms/Hello/Hello.cpp new file mode 100644 index 000000000000..d07f6135257f --- /dev/null +++ b/lib/Transforms/Hello/Hello.cpp @@ -0,0 +1,67 @@ +//===- Hello.cpp - Example code from "Writing an LLVM Pass" ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements two versions of the LLVM "Hello World" pass described +// in docs/WritingAnLLVMPass.html +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "hello" +#include "llvm/Pass.h" +#include "llvm/Function.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/Streams.h" +#include "llvm/ADT/Statistic.h" +using namespace llvm; + +STATISTIC(HelloCounter, "Counts number of functions greeted"); + +namespace { + // Hello - The first implementation, without getAnalysisUsage. + struct Hello : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + Hello() : FunctionPass(&ID) {} + + virtual bool runOnFunction(Function &F) { + HelloCounter++; + std::string fname = F.getName(); + EscapeString(fname); + cerr << "Hello: " << fname << "\n"; + return false; + } + }; +} + +char Hello::ID = 0; +static RegisterPass X("hello", "Hello World Pass"); + +namespace { + // Hello2 - The second implementation with getAnalysisUsage implemented. + struct Hello2 : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + Hello2() : FunctionPass(&ID) {} + + virtual bool runOnFunction(Function &F) { + HelloCounter++; + std::string fname = F.getName(); + EscapeString(fname); + cerr << "Hello: " << fname << "\n"; + return false; + } + + // We don't modify the program, so we preserve all analyses + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + }; + }; +} + +char Hello2::ID = 0; +static RegisterPass +Y("hello2", "Hello World Pass (with getAnalysisUsage implemented)"); diff --git a/lib/Transforms/Hello/Makefile b/lib/Transforms/Hello/Makefile new file mode 100644 index 000000000000..c5e75d43af65 --- /dev/null +++ b/lib/Transforms/Hello/Makefile @@ -0,0 +1,16 @@ +##===- lib/Transforms/Hello/Makefile -----------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../.. +LIBRARYNAME = LLVMHello +LOADABLE_MODULE = 1 +USEDLIBS = + +include $(LEVEL)/Makefile.common + diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp new file mode 100644 index 000000000000..2bb6428060c3 --- /dev/null +++ b/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -0,0 +1,863 @@ +//===-- ArgumentPromotion.cpp - Promote by-reference arguments ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass promotes "by reference" arguments to be "by value" arguments. In +// practice, this means looking for internal functions that have pointer +// arguments. If it can prove, through the use of alias analysis, that an +// argument is *only* loaded, then it can pass the value into the function +// instead of the address of the value. This can cause recursive simplification +// of code and lead to the elimination of allocas (especially in C++ template +// code like the STL). +// +// This pass also handles aggregate arguments that are passed into a function, +// scalarizing them if the elements of the aggregate are only loaded. Note that +// by default it refuses to scalarize aggregates which would require passing in +// more than three operands to the function, because passing thousands of +// operands for a large array or structure is unprofitable! This limit can be +// configured or disabled, however. +// +// Note that this transformation could also be done for arguments that are only +// stored to (returning the value instead), but does not currently. This case +// would be best handled when and if LLVM begins supporting multiple return +// values from functions. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "argpromotion" +#include "llvm/Transforms/IPO.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Module.h" +#include "llvm/CallGraphSCCPass.h" +#include "llvm/Instructions.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/Compiler.h" +#include +using namespace llvm; + +STATISTIC(NumArgumentsPromoted , "Number of pointer arguments promoted"); +STATISTIC(NumAggregatesPromoted, "Number of aggregate arguments promoted"); +STATISTIC(NumByValArgsPromoted , "Number of byval arguments promoted"); +STATISTIC(NumArgumentsDead , "Number of dead pointer args eliminated"); + +namespace { + /// ArgPromotion - The 'by reference' to 'by value' argument promotion pass. + /// + struct VISIBILITY_HIDDEN ArgPromotion : public CallGraphSCCPass { + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + AU.addRequired(); + CallGraphSCCPass::getAnalysisUsage(AU); + } + + virtual bool runOnSCC(const std::vector &SCC); + static char ID; // Pass identification, replacement for typeid + explicit ArgPromotion(unsigned maxElements = 3) + : CallGraphSCCPass(&ID), maxElements(maxElements) {} + + /// A vector used to hold the indices of a single GEP instruction + typedef std::vector IndicesVector; + + private: + bool PromoteArguments(CallGraphNode *CGN); + bool isSafeToPromoteArgument(Argument *Arg, bool isByVal) const; + Function *DoPromotion(Function *F, + SmallPtrSet &ArgsToPromote, + SmallPtrSet &ByValArgsToTransform); + /// The maximum number of elements to expand, or 0 for unlimited. + unsigned maxElements; + }; +} + +char ArgPromotion::ID = 0; +static RegisterPass +X("argpromotion", "Promote 'by reference' arguments to scalars"); + +Pass *llvm::createArgumentPromotionPass(unsigned maxElements) { + return new ArgPromotion(maxElements); +} + +bool ArgPromotion::runOnSCC(const std::vector &SCC) { + bool Changed = false, LocalChange; + + do { // Iterate until we stop promoting from this SCC. + LocalChange = false; + // Attempt to promote arguments from all functions in this SCC. + for (unsigned i = 0, e = SCC.size(); i != e; ++i) + LocalChange |= PromoteArguments(SCC[i]); + Changed |= LocalChange; // Remember that we changed something. + } while (LocalChange); + + return Changed; +} + +/// PromoteArguments - This method checks the specified function to see if there +/// are any promotable arguments and if it is safe to promote the function (for +/// example, all callers are direct). If safe to promote some arguments, it +/// calls the DoPromotion method. +/// +bool ArgPromotion::PromoteArguments(CallGraphNode *CGN) { + Function *F = CGN->getFunction(); + + // Make sure that it is local to this module. + if (!F || !F->hasLocalLinkage()) return false; + + // First check: see if there are any pointer arguments! If not, quick exit. + SmallVector, 16> PointerArgs; + unsigned ArgNo = 0; + for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); + I != E; ++I, ++ArgNo) + if (isa(I->getType())) + PointerArgs.push_back(std::pair(I, ArgNo)); + if (PointerArgs.empty()) return false; + + // Second check: make sure that all callers are direct callers. We can't + // transform functions that have indirect callers. + for (Value::use_iterator UI = F->use_begin(), E = F->use_end(); + UI != E; ++UI) { + CallSite CS = CallSite::get(*UI); + if (!CS.getInstruction()) // "Taking the address" of the function + return false; + + // Ensure that this call site is CALLING the function, not passing it as + // an argument. + if (!CS.isCallee(UI)) + return false; + } + + // Check to see which arguments are promotable. If an argument is promotable, + // add it to ArgsToPromote. + SmallPtrSet ArgsToPromote; + SmallPtrSet ByValArgsToTransform; + for (unsigned i = 0; i != PointerArgs.size(); ++i) { + bool isByVal = F->paramHasAttr(PointerArgs[i].second+1, Attribute::ByVal); + + // If this is a byval argument, and if the aggregate type is small, just + // pass the elements, which is always safe. + Argument *PtrArg = PointerArgs[i].first; + if (isByVal) { + const Type *AgTy = cast(PtrArg->getType())->getElementType(); + if (const StructType *STy = dyn_cast(AgTy)) { + if (maxElements > 0 && STy->getNumElements() > maxElements) { + DOUT << "argpromotion disable promoting argument '" + << PtrArg->getName() << "' because it would require adding more " + << "than " << maxElements << " arguments to the function.\n"; + } else { + // If all the elements are single-value types, we can promote it. + bool AllSimple = true; + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) + if (!STy->getElementType(i)->isSingleValueType()) { + AllSimple = false; + break; + } + + // Safe to transform, don't even bother trying to "promote" it. + // Passing the elements as a scalar will allow scalarrepl to hack on + // the new alloca we introduce. + if (AllSimple) { + ByValArgsToTransform.insert(PtrArg); + continue; + } + } + } + } + + // Otherwise, see if we can promote the pointer to its value. + if (isSafeToPromoteArgument(PtrArg, isByVal)) + ArgsToPromote.insert(PtrArg); + } + + // No promotable pointer arguments. + if (ArgsToPromote.empty() && ByValArgsToTransform.empty()) return false; + + Function *NewF = DoPromotion(F, ArgsToPromote, ByValArgsToTransform); + + // Update the call graph to know that the function has been transformed. + getAnalysis().changeFunction(F, NewF); + return true; +} + +/// IsAlwaysValidPointer - Return true if the specified pointer is always legal +/// to load. +static bool IsAlwaysValidPointer(Value *V) { + if (isa(V) || isa(V)) return true; + if (GetElementPtrInst *GEP = dyn_cast(V)) + return IsAlwaysValidPointer(GEP->getOperand(0)); + if (ConstantExpr *CE = dyn_cast(V)) + if (CE->getOpcode() == Instruction::GetElementPtr) + return IsAlwaysValidPointer(CE->getOperand(0)); + + return false; +} + +/// AllCalleesPassInValidPointerForArgument - Return true if we can prove that +/// all callees pass in a valid pointer for the specified function argument. +static bool AllCalleesPassInValidPointerForArgument(Argument *Arg) { + Function *Callee = Arg->getParent(); + + unsigned ArgNo = std::distance(Callee->arg_begin(), + Function::arg_iterator(Arg)); + + // Look at all call sites of the function. At this pointer we know we only + // have direct callees. + for (Value::use_iterator UI = Callee->use_begin(), E = Callee->use_end(); + UI != E; ++UI) { + CallSite CS = CallSite::get(*UI); + assert(CS.getInstruction() && "Should only have direct calls!"); + + if (!IsAlwaysValidPointer(CS.getArgument(ArgNo))) + return false; + } + return true; +} + +/// Returns true if Prefix is a prefix of longer. That means, Longer has a size +/// that is greater than or equal to the size of prefix, and each of the +/// elements in Prefix is the same as the corresponding elements in Longer. +/// +/// This means it also returns true when Prefix and Longer are equal! +static bool IsPrefix(const ArgPromotion::IndicesVector &Prefix, + const ArgPromotion::IndicesVector &Longer) { + if (Prefix.size() > Longer.size()) + return false; + for (unsigned i = 0, e = Prefix.size(); i != e; ++i) + if (Prefix[i] != Longer[i]) + return false; + return true; +} + + +/// Checks if Indices, or a prefix of Indices, is in Set. +static bool PrefixIn(const ArgPromotion::IndicesVector &Indices, + std::set &Set) { + std::set::iterator Low; + Low = Set.upper_bound(Indices); + if (Low != Set.begin()) + Low--; + // Low is now the last element smaller than or equal to Indices. This means + // it points to a prefix of Indices (possibly Indices itself), if such + // prefix exists. + // + // This load is safe if any prefix of its operands is safe to load. + return Low != Set.end() && IsPrefix(*Low, Indices); +} + +/// Mark the given indices (ToMark) as safe in the the given set of indices +/// (Safe). Marking safe usually means adding ToMark to Safe. However, if there +/// is already a prefix of Indices in Safe, Indices are implicitely marked safe +/// already. Furthermore, any indices that Indices is itself a prefix of, are +/// removed from Safe (since they are implicitely safe because of Indices now). +static void MarkIndicesSafe(const ArgPromotion::IndicesVector &ToMark, + std::set &Safe) { + std::set::iterator Low; + Low = Safe.upper_bound(ToMark); + // Guard against the case where Safe is empty + if (Low != Safe.begin()) + Low--; + // Low is now the last element smaller than or equal to Indices. This + // means it points to a prefix of Indices (possibly Indices itself), if + // such prefix exists. + if (Low != Safe.end()) { + if (IsPrefix(*Low, ToMark)) + // If there is already a prefix of these indices (or exactly these + // indices) marked a safe, don't bother adding these indices + return; + + // Increment Low, so we can use it as a "insert before" hint + ++Low; + } + // Insert + Low = Safe.insert(Low, ToMark); + ++Low; + // If there we're a prefix of longer index list(s), remove those + std::set::iterator End = Safe.end(); + while (Low != End && IsPrefix(ToMark, *Low)) { + std::set::iterator Remove = Low; + ++Low; + Safe.erase(Remove); + } +} + +/// isSafeToPromoteArgument - As you might guess from the name of this method, +/// it checks to see if it is both safe and useful to promote the argument. +/// This method limits promotion of aggregates to only promote up to three +/// elements of the aggregate in order to avoid exploding the number of +/// arguments passed in. +bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const { + typedef std::set GEPIndicesSet; + + // Quick exit for unused arguments + if (Arg->use_empty()) + return true; + + // We can only promote this argument if all of the uses are loads, or are GEP + // instructions (with constant indices) that are subsequently loaded. + // + // Promoting the argument causes it to be loaded in the caller + // unconditionally. This is only safe if we can prove that either the load + // would have happened in the callee anyway (ie, there is a load in the entry + // block) or the pointer passed in at every call site is guaranteed to be + // valid. + // In the former case, invalid loads can happen, but would have happened + // anyway, in the latter case, invalid loads won't happen. This prevents us + // from introducing an invalid load that wouldn't have happened in the + // original code. + // + // This set will contain all sets of indices that are loaded in the entry + // block, and thus are safe to unconditionally load in the caller. + GEPIndicesSet SafeToUnconditionallyLoad; + + // This set contains all the sets of indices that we are planning to promote. + // This makes it possible to limit the number of arguments added. + GEPIndicesSet ToPromote; + + // If the pointer is always valid, any load with first index 0 is valid. + if(isByVal || AllCalleesPassInValidPointerForArgument(Arg)) + SafeToUnconditionallyLoad.insert(IndicesVector(1, 0)); + + // First, iterate the entry block and mark loads of (geps of) arguments as + // safe. + BasicBlock *EntryBlock = Arg->getParent()->begin(); + // Declare this here so we can reuse it + IndicesVector Indices; + for (BasicBlock::iterator I = EntryBlock->begin(), E = EntryBlock->end(); + I != E; ++I) + if (LoadInst *LI = dyn_cast(I)) { + Value *V = LI->getPointerOperand(); + if (GetElementPtrInst *GEP = dyn_cast(V)) { + V = GEP->getPointerOperand(); + if (V == Arg) { + // This load actually loads (part of) Arg? Check the indices then. + Indices.reserve(GEP->getNumIndices()); + for (User::op_iterator II = GEP->idx_begin(), IE = GEP->idx_end(); + II != IE; ++II) + if (ConstantInt *CI = dyn_cast(*II)) + Indices.push_back(CI->getSExtValue()); + else + // We found a non-constant GEP index for this argument? Bail out + // right away, can't promote this argument at all. + return false; + + // Indices checked out, mark them as safe + MarkIndicesSafe(Indices, SafeToUnconditionallyLoad); + Indices.clear(); + } + } else if (V == Arg) { + // Direct loads are equivalent to a GEP with a single 0 index. + MarkIndicesSafe(IndicesVector(1, 0), SafeToUnconditionallyLoad); + } + } + + // Now, iterate all uses of the argument to see if there are any uses that are + // not (GEP+)loads, or any (GEP+)loads that are not safe to promote. + SmallVector Loads; + IndicesVector Operands; + for (Value::use_iterator UI = Arg->use_begin(), E = Arg->use_end(); + UI != E; ++UI) { + Operands.clear(); + if (LoadInst *LI = dyn_cast(*UI)) { + if (LI->isVolatile()) return false; // Don't hack volatile loads + Loads.push_back(LI); + // Direct loads are equivalent to a GEP with a zero index and then a load. + Operands.push_back(0); + } else if (GetElementPtrInst *GEP = dyn_cast(*UI)) { + if (GEP->use_empty()) { + // Dead GEP's cause trouble later. Just remove them if we run into + // them. + getAnalysis().deleteValue(GEP); + GEP->eraseFromParent(); + // TODO: This runs the above loop over and over again for dead GEPS + // Couldn't we just do increment the UI iterator earlier and erase the + // use? + return isSafeToPromoteArgument(Arg, isByVal); + } + + // Ensure that all of the indices are constants. + for (User::op_iterator i = GEP->idx_begin(), e = GEP->idx_end(); + i != e; ++i) + if (ConstantInt *C = dyn_cast(*i)) + Operands.push_back(C->getSExtValue()); + else + return false; // Not a constant operand GEP! + + // Ensure that the only users of the GEP are load instructions. + for (Value::use_iterator UI = GEP->use_begin(), E = GEP->use_end(); + UI != E; ++UI) + if (LoadInst *LI = dyn_cast(*UI)) { + if (LI->isVolatile()) return false; // Don't hack volatile loads + Loads.push_back(LI); + } else { + // Other uses than load? + return false; + } + } else { + return false; // Not a load or a GEP. + } + + // Now, see if it is safe to promote this load / loads of this GEP. Loading + // is safe if Operands, or a prefix of Operands, is marked as safe. + if (!PrefixIn(Operands, SafeToUnconditionallyLoad)) + return false; + + // See if we are already promoting a load with these indices. If not, check + // to make sure that we aren't promoting too many elements. If so, nothing + // to do. + if (ToPromote.find(Operands) == ToPromote.end()) { + if (maxElements > 0 && ToPromote.size() == maxElements) { + DOUT << "argpromotion not promoting argument '" + << Arg->getName() << "' because it would require adding more " + << "than " << maxElements << " arguments to the function.\n"; + // We limit aggregate promotion to only promoting up to a fixed number + // of elements of the aggregate. + return false; + } + ToPromote.insert(Operands); + } + } + + if (Loads.empty()) return true; // No users, this is a dead argument. + + // Okay, now we know that the argument is only used by load instructions and + // it is safe to unconditionally perform all of them. Use alias analysis to + // check to see if the pointer is guaranteed to not be modified from entry of + // the function to each of the load instructions. + + // Because there could be several/many load instructions, remember which + // blocks we know to be transparent to the load. + SmallPtrSet TranspBlocks; + + AliasAnalysis &AA = getAnalysis(); + TargetData &TD = getAnalysis(); + + for (unsigned i = 0, e = Loads.size(); i != e; ++i) { + // Check to see if the load is invalidated from the start of the block to + // the load itself. + LoadInst *Load = Loads[i]; + BasicBlock *BB = Load->getParent(); + + const PointerType *LoadTy = + cast(Load->getPointerOperand()->getType()); + unsigned LoadSize = (unsigned)TD.getTypeStoreSize(LoadTy->getElementType()); + + if (AA.canInstructionRangeModify(BB->front(), *Load, Arg, LoadSize)) + return false; // Pointer is invalidated! + + // Now check every path from the entry block to the load for transparency. + // To do this, we perform a depth first search on the inverse CFG from the + // loading block. + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) + for (idf_ext_iterator > + I = idf_ext_begin(*PI, TranspBlocks), + E = idf_ext_end(*PI, TranspBlocks); I != E; ++I) + if (AA.canBasicBlockModify(**I, Arg, LoadSize)) + return false; + } + + // If the path from the entry of the function to each load is free of + // instructions that potentially invalidate the load, we can make the + // transformation! + return true; +} + +/// DoPromotion - This method actually performs the promotion of the specified +/// arguments, and returns the new function. At this point, we know that it's +/// safe to do so. +Function *ArgPromotion::DoPromotion(Function *F, + SmallPtrSet &ArgsToPromote, + SmallPtrSet &ByValArgsToTransform) { + + // Start by computing a new prototype for the function, which is the same as + // the old function, but has modified arguments. + const FunctionType *FTy = F->getFunctionType(); + std::vector Params; + + typedef std::set ScalarizeTable; + + // ScalarizedElements - If we are promoting a pointer that has elements + // accessed out of it, keep track of which elements are accessed so that we + // can add one argument for each. + // + // Arguments that are directly loaded will have a zero element value here, to + // handle cases where there are both a direct load and GEP accesses. + // + std::map ScalarizedElements; + + // OriginalLoads - Keep track of a representative load instruction from the + // original function so that we can tell the alias analysis implementation + // what the new GEP/Load instructions we are inserting look like. + std::map OriginalLoads; + + // Attributes - Keep track of the parameter attributes for the arguments + // that we are *not* promoting. For the ones that we do promote, the parameter + // attributes are lost + SmallVector AttributesVec; + const AttrListPtr &PAL = F->getAttributes(); + + // Add any return attributes. + if (Attributes attrs = PAL.getRetAttributes()) + AttributesVec.push_back(AttributeWithIndex::get(0, attrs)); + + // First, determine the new argument list + unsigned ArgIndex = 1; + for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; + ++I, ++ArgIndex) { + if (ByValArgsToTransform.count(I)) { + // Simple byval argument? Just add all the struct element types. + const Type *AgTy = cast(I->getType())->getElementType(); + const StructType *STy = cast(AgTy); + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) + Params.push_back(STy->getElementType(i)); + ++NumByValArgsPromoted; + } else if (!ArgsToPromote.count(I)) { + // Unchanged argument + Params.push_back(I->getType()); + if (Attributes attrs = PAL.getParamAttributes(ArgIndex)) + AttributesVec.push_back(AttributeWithIndex::get(Params.size(), attrs)); + } else if (I->use_empty()) { + // Dead argument (which are always marked as promotable) + ++NumArgumentsDead; + } else { + // Okay, this is being promoted. This means that the only uses are loads + // or GEPs which are only used by loads + + // In this table, we will track which indices are loaded from the argument + // (where direct loads are tracked as no indices). + ScalarizeTable &ArgIndices = ScalarizedElements[I]; + for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E; + ++UI) { + Instruction *User = cast(*UI); + assert(isa(User) || isa(User)); + IndicesVector Indices; + Indices.reserve(User->getNumOperands() - 1); + // Since loads will only have a single operand, and GEPs only a single + // non-index operand, this will record direct loads without any indices, + // and gep+loads with the GEP indices. + for (User::op_iterator II = User->op_begin() + 1, IE = User->op_end(); + II != IE; ++II) + Indices.push_back(cast(*II)->getSExtValue()); + // GEPs with a single 0 index can be merged with direct loads + if (Indices.size() == 1 && Indices.front() == 0) + Indices.clear(); + ArgIndices.insert(Indices); + LoadInst *OrigLoad; + if (LoadInst *L = dyn_cast(User)) + OrigLoad = L; + else + // Take any load, we will use it only to update Alias Analysis + OrigLoad = cast(User->use_back()); + OriginalLoads[Indices] = OrigLoad; + } + + // Add a parameter to the function for each element passed in. + for (ScalarizeTable::iterator SI = ArgIndices.begin(), + E = ArgIndices.end(); SI != E; ++SI) { + // not allowed to dereference ->begin() if size() is 0 + Params.push_back(GetElementPtrInst::getIndexedType(I->getType(), + SI->begin(), + SI->end())); + assert(Params.back()); + } + + if (ArgIndices.size() == 1 && ArgIndices.begin()->empty()) + ++NumArgumentsPromoted; + else + ++NumAggregatesPromoted; + } + } + + // Add any function attributes. + if (Attributes attrs = PAL.getFnAttributes()) + AttributesVec.push_back(AttributeWithIndex::get(~0, attrs)); + + const Type *RetTy = FTy->getReturnType(); + + // Work around LLVM bug PR56: the CWriter cannot emit varargs functions which + // have zero fixed arguments. + bool ExtraArgHack = false; + if (Params.empty() && FTy->isVarArg()) { + ExtraArgHack = true; + Params.push_back(Type::Int32Ty); + } + + // Construct the new function type using the new arguments. + FunctionType *NFTy = FunctionType::get(RetTy, Params, FTy->isVarArg()); + + // Create the new function body and insert it into the module... + Function *NF = Function::Create(NFTy, F->getLinkage(), F->getName()); + NF->copyAttributesFrom(F); + + // Recompute the parameter attributes list based on the new arguments for + // the function. + NF->setAttributes(AttrListPtr::get(AttributesVec.begin(), AttributesVec.end())); + AttributesVec.clear(); + + F->getParent()->getFunctionList().insert(F, NF); + NF->takeName(F); + + // Get the alias analysis information that we need to update to reflect our + // changes. + AliasAnalysis &AA = getAnalysis(); + + // Get the callgraph information that we need to update to reflect our + // changes. + CallGraph &CG = getAnalysis(); + + // Loop over all of the callers of the function, transforming the call sites + // to pass in the loaded pointers. + // + SmallVector Args; + while (!F->use_empty()) { + CallSite CS = CallSite::get(F->use_back()); + Instruction *Call = CS.getInstruction(); + const AttrListPtr &CallPAL = CS.getAttributes(); + + // Add any return attributes. + if (Attributes attrs = CallPAL.getRetAttributes()) + AttributesVec.push_back(AttributeWithIndex::get(0, attrs)); + + // Loop over the operands, inserting GEP and loads in the caller as + // appropriate. + CallSite::arg_iterator AI = CS.arg_begin(); + ArgIndex = 1; + for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); + I != E; ++I, ++AI, ++ArgIndex) + if (!ArgsToPromote.count(I) && !ByValArgsToTransform.count(I)) { + Args.push_back(*AI); // Unmodified argument + + if (Attributes Attrs = CallPAL.getParamAttributes(ArgIndex)) + AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs)); + + } else if (ByValArgsToTransform.count(I)) { + // Emit a GEP and load for each element of the struct. + const Type *AgTy = cast(I->getType())->getElementType(); + const StructType *STy = cast(AgTy); + Value *Idxs[2] = { ConstantInt::get(Type::Int32Ty, 0), 0 }; + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { + Idxs[1] = ConstantInt::get(Type::Int32Ty, i); + Value *Idx = GetElementPtrInst::Create(*AI, Idxs, Idxs+2, + (*AI)->getName()+"."+utostr(i), + Call); + // TODO: Tell AA about the new values? + Args.push_back(new LoadInst(Idx, Idx->getName()+".val", Call)); + } + } else if (!I->use_empty()) { + // Non-dead argument: insert GEPs and loads as appropriate. + ScalarizeTable &ArgIndices = ScalarizedElements[I]; + // Store the Value* version of the indices in here, but declare it now + // for reuse + std::vector Ops; + for (ScalarizeTable::iterator SI = ArgIndices.begin(), + E = ArgIndices.end(); SI != E; ++SI) { + Value *V = *AI; + LoadInst *OrigLoad = OriginalLoads[*SI]; + if (!SI->empty()) { + Ops.reserve(SI->size()); + const Type *ElTy = V->getType(); + for (IndicesVector::const_iterator II = SI->begin(), + IE = SI->end(); II != IE; ++II) { + // Use i32 to index structs, and i64 for others (pointers/arrays). + // This satisfies GEP constraints. + const Type *IdxTy = (isa(ElTy) ? Type::Int32Ty : Type::Int64Ty); + Ops.push_back(ConstantInt::get(IdxTy, *II)); + // Keep track of the type we're currently indexing + ElTy = cast(ElTy)->getTypeAtIndex(*II); + } + // And create a GEP to extract those indices + V = GetElementPtrInst::Create(V, Ops.begin(), Ops.end(), + V->getName()+".idx", Call); + Ops.clear(); + AA.copyValue(OrigLoad->getOperand(0), V); + } + Args.push_back(new LoadInst(V, V->getName()+".val", Call)); + AA.copyValue(OrigLoad, Args.back()); + } + } + + if (ExtraArgHack) + Args.push_back(Constant::getNullValue(Type::Int32Ty)); + + // Push any varargs arguments on the list + for (; AI != CS.arg_end(); ++AI, ++ArgIndex) { + Args.push_back(*AI); + if (Attributes Attrs = CallPAL.getParamAttributes(ArgIndex)) + AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs)); + } + + // Add any function attributes. + if (Attributes attrs = CallPAL.getFnAttributes()) + AttributesVec.push_back(AttributeWithIndex::get(~0, attrs)); + + Instruction *New; + if (InvokeInst *II = dyn_cast(Call)) { + New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(), + Args.begin(), Args.end(), "", Call); + cast(New)->setCallingConv(CS.getCallingConv()); + cast(New)->setAttributes(AttrListPtr::get(AttributesVec.begin(), + AttributesVec.end())); + } else { + New = CallInst::Create(NF, Args.begin(), Args.end(), "", Call); + cast(New)->setCallingConv(CS.getCallingConv()); + cast(New)->setAttributes(AttrListPtr::get(AttributesVec.begin(), + AttributesVec.end())); + if (cast(Call)->isTailCall()) + cast(New)->setTailCall(); + } + Args.clear(); + AttributesVec.clear(); + + // Update the alias analysis implementation to know that we are replacing + // the old call with a new one. + AA.replaceWithNewValue(Call, New); + + // Update the callgraph to know that the callsite has been transformed. + CG[Call->getParent()->getParent()]->replaceCallSite(Call, New); + + if (!Call->use_empty()) { + Call->replaceAllUsesWith(New); + New->takeName(Call); + } + + // Finally, remove the old call from the program, reducing the use-count of + // F. + Call->eraseFromParent(); + } + + // Since we have now created the new function, splice the body of the old + // function right into the new function, leaving the old rotting hulk of the + // function empty. + NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList()); + + // Loop over the argument list, transfering uses of the old arguments over to + // the new arguments, also transfering over the names as well. + // + for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(), + I2 = NF->arg_begin(); I != E; ++I) { + if (!ArgsToPromote.count(I) && !ByValArgsToTransform.count(I)) { + // If this is an unmodified argument, move the name and users over to the + // new version. + I->replaceAllUsesWith(I2); + I2->takeName(I); + AA.replaceWithNewValue(I, I2); + ++I2; + continue; + } + + if (ByValArgsToTransform.count(I)) { + // In the callee, we create an alloca, and store each of the new incoming + // arguments into the alloca. + Instruction *InsertPt = NF->begin()->begin(); + + // Just add all the struct element types. + const Type *AgTy = cast(I->getType())->getElementType(); + Value *TheAlloca = new AllocaInst(AgTy, 0, "", InsertPt); + const StructType *STy = cast(AgTy); + Value *Idxs[2] = { ConstantInt::get(Type::Int32Ty, 0), 0 }; + + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { + Idxs[1] = ConstantInt::get(Type::Int32Ty, i); + std::string Name = TheAlloca->getName()+"."+utostr(i); + Value *Idx = GetElementPtrInst::Create(TheAlloca, Idxs, Idxs+2, + Name, InsertPt); + I2->setName(I->getName()+"."+utostr(i)); + new StoreInst(I2++, Idx, InsertPt); + } + + // Anything that used the arg should now use the alloca. + I->replaceAllUsesWith(TheAlloca); + TheAlloca->takeName(I); + AA.replaceWithNewValue(I, TheAlloca); + continue; + } + + if (I->use_empty()) { + AA.deleteValue(I); + continue; + } + + // Otherwise, if we promoted this argument, then all users are load + // instructions (or GEPs with only load users), and all loads should be + // using the new argument that we added. + ScalarizeTable &ArgIndices = ScalarizedElements[I]; + + while (!I->use_empty()) { + if (LoadInst *LI = dyn_cast(I->use_back())) { + assert(ArgIndices.begin()->empty() && + "Load element should sort to front!"); + I2->setName(I->getName()+".val"); + LI->replaceAllUsesWith(I2); + AA.replaceWithNewValue(LI, I2); + LI->eraseFromParent(); + DOUT << "*** Promoted load of argument '" << I->getName() + << "' in function '" << F->getName() << "'\n"; + } else { + GetElementPtrInst *GEP = cast(I->use_back()); + IndicesVector Operands; + Operands.reserve(GEP->getNumIndices()); + for (User::op_iterator II = GEP->idx_begin(), IE = GEP->idx_end(); + II != IE; ++II) + Operands.push_back(cast(*II)->getSExtValue()); + + // GEPs with a single 0 index can be merged with direct loads + if (Operands.size() == 1 && Operands.front() == 0) + Operands.clear(); + + Function::arg_iterator TheArg = I2; + for (ScalarizeTable::iterator It = ArgIndices.begin(); + *It != Operands; ++It, ++TheArg) { + assert(It != ArgIndices.end() && "GEP not handled??"); + } + + std::string NewName = I->getName(); + for (unsigned i = 0, e = Operands.size(); i != e; ++i) { + NewName += "." + utostr(Operands[i]); + } + NewName += ".val"; + TheArg->setName(NewName); + + DOUT << "*** Promoted agg argument '" << TheArg->getName() + << "' of function '" << NF->getName() << "'\n"; + + // All of the uses must be load instructions. Replace them all with + // the argument specified by ArgNo. + while (!GEP->use_empty()) { + LoadInst *L = cast(GEP->use_back()); + L->replaceAllUsesWith(TheArg); + AA.replaceWithNewValue(L, TheArg); + L->eraseFromParent(); + } + AA.deleteValue(GEP); + GEP->eraseFromParent(); + } + } + + // Increment I2 past all of the arguments added for this promoted pointer. + for (unsigned i = 0, e = ArgIndices.size(); i != e; ++i) + ++I2; + } + + // Notify the alias analysis implementation that we inserted a new argument. + if (ExtraArgHack) + AA.copyValue(Constant::getNullValue(Type::Int32Ty), NF->arg_begin()); + + + // Tell the alias analysis that the old function is about to disappear. + AA.replaceWithNewValue(F, NF); + + // Now that the old function is dead, delete it. + F->eraseFromParent(); + return NF; +} diff --git a/lib/Transforms/IPO/CMakeLists.txt b/lib/Transforms/IPO/CMakeLists.txt new file mode 100644 index 000000000000..4b85e1388a68 --- /dev/null +++ b/lib/Transforms/IPO/CMakeLists.txt @@ -0,0 +1,25 @@ +add_llvm_library(LLVMipo + FunctionAttrs.cpp + ArgumentPromotion.cpp + ConstantMerge.cpp + DeadArgumentElimination.cpp + DeadTypeElimination.cpp + ExtractGV.cpp + GlobalDCE.cpp + GlobalOpt.cpp + IndMemRemoval.cpp + InlineAlways.cpp + Inliner.cpp + InlineSimple.cpp + Internalize.cpp + IPConstantPropagation.cpp + LoopExtractor.cpp + LowerSetJmp.cpp + MergeFunctions.cpp + PartialSpecialization.cpp + PruneEH.cpp + RaiseAllocations.cpp + StripDeadPrototypes.cpp + StripSymbols.cpp + StructRetPromotion.cpp + ) diff --git a/lib/Transforms/IPO/ConstantMerge.cpp b/lib/Transforms/IPO/ConstantMerge.cpp new file mode 100644 index 000000000000..237e6db1d335 --- /dev/null +++ b/lib/Transforms/IPO/ConstantMerge.cpp @@ -0,0 +1,114 @@ +//===- ConstantMerge.cpp - Merge duplicate global constants ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interface to a pass that merges duplicate global +// constants together into a single constant that is shared. This is useful +// because some passes (ie TraceValues) insert a lot of string constants into +// the program, regardless of whether or not an existing string is available. +// +// Algorithm: ConstantMerge is designed to build up a map of available constants +// and eliminate duplicates when it is initialized. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "constmerge" +#include "llvm/Transforms/IPO.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/Compiler.h" +#include +using namespace llvm; + +STATISTIC(NumMerged, "Number of global constants merged"); + +namespace { + struct VISIBILITY_HIDDEN ConstantMerge : public ModulePass { + static char ID; // Pass identification, replacement for typeid + ConstantMerge() : ModulePass(&ID) {} + + // run - For this pass, process all of the globals in the module, + // eliminating duplicate constants. + // + bool runOnModule(Module &M); + }; +} + +char ConstantMerge::ID = 0; +static RegisterPass +X("constmerge", "Merge Duplicate Global Constants"); + +ModulePass *llvm::createConstantMergePass() { return new ConstantMerge(); } + +bool ConstantMerge::runOnModule(Module &M) { + // Map unique constant/section pairs to globals. We don't want to merge + // globals in different sections. + std::map, GlobalVariable*> CMap; + + // Replacements - This vector contains a list of replacements to perform. + std::vector > Replacements; + + bool MadeChange = false; + + // Iterate constant merging while we are still making progress. Merging two + // constants together may allow us to merge other constants together if the + // second level constants have initializers which point to the globals that + // were just merged. + while (1) { + // First pass: identify all globals that can be merged together, filling in + // the Replacements vector. We cannot do the replacement in this pass + // because doing so may cause initializers of other globals to be rewritten, + // invalidating the Constant* pointers in CMap. + // + for (Module::global_iterator GVI = M.global_begin(), E = M.global_end(); + GVI != E; ) { + GlobalVariable *GV = GVI++; + + // If this GV is dead, remove it. + GV->removeDeadConstantUsers(); + if (GV->use_empty() && GV->hasLocalLinkage()) { + GV->eraseFromParent(); + continue; + } + + // Only process constants with initializers. + if (GV->isConstant() && GV->hasInitializer()) { + Constant *Init = GV->getInitializer(); + + // Check to see if the initializer is already known. + GlobalVariable *&Slot = CMap[std::make_pair(Init, GV->getSection())]; + + if (Slot == 0) { // Nope, add it to the map. + Slot = GV; + } else if (GV->hasLocalLinkage()) { // Yup, this is a duplicate! + // Make all uses of the duplicate constant use the canonical version. + Replacements.push_back(std::make_pair(GV, Slot)); + } + } + } + + if (Replacements.empty()) + return MadeChange; + CMap.clear(); + + // Now that we have figured out which replacements must be made, do them all + // now. This avoid invalidating the pointers in CMap, which are unneeded + // now. + for (unsigned i = 0, e = Replacements.size(); i != e; ++i) { + // Eliminate any uses of the dead global... + Replacements[i].first->replaceAllUsesWith(Replacements[i].second); + + // Delete the global value from the module... + M.getGlobalList().erase(Replacements[i].first); + } + + NumMerged += Replacements.size(); + Replacements.clear(); + } +} diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp new file mode 100644 index 000000000000..666db7e8d74b --- /dev/null +++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -0,0 +1,944 @@ +//===-- DeadArgumentElimination.cpp - Eliminate dead arguments ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass deletes dead arguments from internal functions. Dead argument +// elimination removes arguments which are directly dead, as well as arguments +// only passed into function calls as dead arguments of other functions. This +// pass also deletes dead return values in a similar way. +// +// This pass is often useful as a cleanup pass to run after aggressive +// interprocedural passes, which add possibly-dead arguments or return values. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "deadargelim" +#include "llvm/Transforms/IPO.h" +#include "llvm/CallingConv.h" +#include "llvm/Constant.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/Compiler.h" +#include +#include +using namespace llvm; + +STATISTIC(NumArgumentsEliminated, "Number of unread args removed"); +STATISTIC(NumRetValsEliminated , "Number of unused return values removed"); + +namespace { + /// DAE - The dead argument elimination pass. + /// + class VISIBILITY_HIDDEN DAE : public ModulePass { + public: + + /// Struct that represents (part of) either a return value or a function + /// argument. Used so that arguments and return values can be used + /// interchangably. + struct RetOrArg { + RetOrArg(const Function* F, unsigned Idx, bool IsArg) : F(F), Idx(Idx), + IsArg(IsArg) {} + const Function *F; + unsigned Idx; + bool IsArg; + + /// Make RetOrArg comparable, so we can put it into a map. + bool operator<(const RetOrArg &O) const { + if (F != O.F) + return F < O.F; + else if (Idx != O.Idx) + return Idx < O.Idx; + else + return IsArg < O.IsArg; + } + + /// Make RetOrArg comparable, so we can easily iterate the multimap. + bool operator==(const RetOrArg &O) const { + return F == O.F && Idx == O.Idx && IsArg == O.IsArg; + } + + std::string getDescription() const { + return std::string((IsArg ? "Argument #" : "Return value #")) + + utostr(Idx) + " of function " + F->getName(); + } + }; + + /// Liveness enum - During our initial pass over the program, we determine + /// that things are either alive or maybe alive. We don't mark anything + /// explicitly dead (even if we know they are), since anything not alive + /// with no registered uses (in Uses) will never be marked alive and will + /// thus become dead in the end. + enum Liveness { Live, MaybeLive }; + + /// Convenience wrapper + RetOrArg CreateRet(const Function *F, unsigned Idx) { + return RetOrArg(F, Idx, false); + } + /// Convenience wrapper + RetOrArg CreateArg(const Function *F, unsigned Idx) { + return RetOrArg(F, Idx, true); + } + + typedef std::multimap UseMap; + /// This maps a return value or argument to any MaybeLive return values or + /// arguments it uses. This allows the MaybeLive values to be marked live + /// when any of its users is marked live. + /// For example (indices are left out for clarity): + /// - Uses[ret F] = ret G + /// This means that F calls G, and F returns the value returned by G. + /// - Uses[arg F] = ret G + /// This means that some function calls G and passes its result as an + /// argument to F. + /// - Uses[ret F] = arg F + /// This means that F returns one of its own arguments. + /// - Uses[arg F] = arg G + /// This means that G calls F and passes one of its own (G's) arguments + /// directly to F. + UseMap Uses; + + typedef std::set LiveSet; + typedef std::set LiveFuncSet; + + /// This set contains all values that have been determined to be live. + LiveSet LiveValues; + /// This set contains all values that are cannot be changed in any way. + LiveFuncSet LiveFunctions; + + typedef SmallVector UseVector; + + public: + static char ID; // Pass identification, replacement for typeid + DAE() : ModulePass(&ID) {} + bool runOnModule(Module &M); + + virtual bool ShouldHackArguments() const { return false; } + + private: + Liveness MarkIfNotLive(RetOrArg Use, UseVector &MaybeLiveUses); + Liveness SurveyUse(Value::use_iterator U, UseVector &MaybeLiveUses, + unsigned RetValNum = 0); + Liveness SurveyUses(Value *V, UseVector &MaybeLiveUses); + + void SurveyFunction(Function &F); + void MarkValue(const RetOrArg &RA, Liveness L, + const UseVector &MaybeLiveUses); + void MarkLive(const RetOrArg &RA); + void MarkLive(const Function &F); + void PropagateLiveness(const RetOrArg &RA); + bool RemoveDeadStuffFromFunction(Function *F); + bool DeleteDeadVarargs(Function &Fn); + }; +} + + +char DAE::ID = 0; +static RegisterPass +X("deadargelim", "Dead Argument Elimination"); + +namespace { + /// DAH - DeadArgumentHacking pass - Same as dead argument elimination, but + /// deletes arguments to functions which are external. This is only for use + /// by bugpoint. + struct DAH : public DAE { + static char ID; + virtual bool ShouldHackArguments() const { return true; } + }; +} + +char DAH::ID = 0; +static RegisterPass +Y("deadarghaX0r", "Dead Argument Hacking (BUGPOINT USE ONLY; DO NOT USE)"); + +/// createDeadArgEliminationPass - This pass removes arguments from functions +/// which are not used by the body of the function. +/// +ModulePass *llvm::createDeadArgEliminationPass() { return new DAE(); } +ModulePass *llvm::createDeadArgHackingPass() { return new DAH(); } + +/// DeleteDeadVarargs - If this is an function that takes a ... list, and if +/// llvm.vastart is never called, the varargs list is dead for the function. +bool DAE::DeleteDeadVarargs(Function &Fn) { + assert(Fn.getFunctionType()->isVarArg() && "Function isn't varargs!"); + if (Fn.isDeclaration() || !Fn.hasLocalLinkage()) return false; + + // Ensure that the function is only directly called. + for (Value::use_iterator I = Fn.use_begin(), E = Fn.use_end(); I != E; ++I) { + // If this use is anything other than a call site, give up. + CallSite CS = CallSite::get(*I); + Instruction *TheCall = CS.getInstruction(); + if (!TheCall) return false; // Not a direct call site? + + // The addr of this function is passed to the call. + if (!CS.isCallee(I)) return false; + } + + // Okay, we know we can transform this function if safe. Scan its body + // looking for calls to llvm.vastart. + for (Function::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { + if (IntrinsicInst *II = dyn_cast(I)) { + if (II->getIntrinsicID() == Intrinsic::vastart) + return false; + } + } + } + + // If we get here, there are no calls to llvm.vastart in the function body, + // remove the "..." and adjust all the calls. + + // Start by computing a new prototype for the function, which is the same as + // the old function, but doesn't have isVarArg set. + const FunctionType *FTy = Fn.getFunctionType(); + std::vector Params(FTy->param_begin(), FTy->param_end()); + FunctionType *NFTy = FunctionType::get(FTy->getReturnType(), Params, false); + unsigned NumArgs = Params.size(); + + // Create the new function body and insert it into the module... + Function *NF = Function::Create(NFTy, Fn.getLinkage()); + NF->copyAttributesFrom(&Fn); + Fn.getParent()->getFunctionList().insert(&Fn, NF); + NF->takeName(&Fn); + + // Loop over all of the callers of the function, transforming the call sites + // to pass in a smaller number of arguments into the new function. + // + std::vector Args; + while (!Fn.use_empty()) { + CallSite CS = CallSite::get(Fn.use_back()); + Instruction *Call = CS.getInstruction(); + + // Pass all the same arguments. + Args.assign(CS.arg_begin(), CS.arg_begin()+NumArgs); + + // Drop any attributes that were on the vararg arguments. + AttrListPtr PAL = CS.getAttributes(); + if (!PAL.isEmpty() && PAL.getSlot(PAL.getNumSlots() - 1).Index > NumArgs) { + SmallVector AttributesVec; + for (unsigned i = 0; PAL.getSlot(i).Index <= NumArgs; ++i) + AttributesVec.push_back(PAL.getSlot(i)); + if (Attributes FnAttrs = PAL.getFnAttributes()) + AttributesVec.push_back(AttributeWithIndex::get(~0, FnAttrs)); + PAL = AttrListPtr::get(AttributesVec.begin(), AttributesVec.end()); + } + + Instruction *New; + if (InvokeInst *II = dyn_cast(Call)) { + New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(), + Args.begin(), Args.end(), "", Call); + cast(New)->setCallingConv(CS.getCallingConv()); + cast(New)->setAttributes(PAL); + } else { + New = CallInst::Create(NF, Args.begin(), Args.end(), "", Call); + cast(New)->setCallingConv(CS.getCallingConv()); + cast(New)->setAttributes(PAL); + if (cast(Call)->isTailCall()) + cast(New)->setTailCall(); + } + Args.clear(); + + if (!Call->use_empty()) + Call->replaceAllUsesWith(New); + + New->takeName(Call); + + // Finally, remove the old call from the program, reducing the use-count of + // F. + Call->eraseFromParent(); + } + + // Since we have now created the new function, splice the body of the old + // function right into the new function, leaving the old rotting hulk of the + // function empty. + NF->getBasicBlockList().splice(NF->begin(), Fn.getBasicBlockList()); + + // Loop over the argument list, transfering uses of the old arguments over to + // the new arguments, also transfering over the names as well. While we're at + // it, remove the dead arguments from the DeadArguments list. + // + for (Function::arg_iterator I = Fn.arg_begin(), E = Fn.arg_end(), + I2 = NF->arg_begin(); I != E; ++I, ++I2) { + // Move the name and users over to the new version. + I->replaceAllUsesWith(I2); + I2->takeName(I); + } + + // Finally, nuke the old function. + Fn.eraseFromParent(); + return true; +} + +/// Convenience function that returns the number of return values. It returns 0 +/// for void functions and 1 for functions not returning a struct. It returns +/// the number of struct elements for functions returning a struct. +static unsigned NumRetVals(const Function *F) { + if (F->getReturnType() == Type::VoidTy) + return 0; + else if (const StructType *STy = dyn_cast(F->getReturnType())) + return STy->getNumElements(); + else + return 1; +} + +/// MarkIfNotLive - This checks Use for liveness in LiveValues. If Use is not +/// live, it adds Use to the MaybeLiveUses argument. Returns the determined +/// liveness of Use. +DAE::Liveness DAE::MarkIfNotLive(RetOrArg Use, UseVector &MaybeLiveUses) { + // We're live if our use or its Function is already marked as live. + if (LiveFunctions.count(Use.F) || LiveValues.count(Use)) + return Live; + + // We're maybe live otherwise, but remember that we must become live if + // Use becomes live. + MaybeLiveUses.push_back(Use); + return MaybeLive; +} + + +/// SurveyUse - This looks at a single use of an argument or return value +/// and determines if it should be alive or not. Adds this use to MaybeLiveUses +/// if it causes the used value to become MaybeAlive. +/// +/// RetValNum is the return value number to use when this use is used in a +/// return instruction. This is used in the recursion, you should always leave +/// it at 0. +DAE::Liveness DAE::SurveyUse(Value::use_iterator U, UseVector &MaybeLiveUses, + unsigned RetValNum) { + Value *V = *U; + if (ReturnInst *RI = dyn_cast(V)) { + // The value is returned from a function. It's only live when the + // function's return value is live. We use RetValNum here, for the case + // that U is really a use of an insertvalue instruction that uses the + // orginal Use. + RetOrArg Use = CreateRet(RI->getParent()->getParent(), RetValNum); + // We might be live, depending on the liveness of Use. + return MarkIfNotLive(Use, MaybeLiveUses); + } + if (InsertValueInst *IV = dyn_cast(V)) { + if (U.getOperandNo() != InsertValueInst::getAggregateOperandIndex() + && IV->hasIndices()) + // The use we are examining is inserted into an aggregate. Our liveness + // depends on all uses of that aggregate, but if it is used as a return + // value, only index at which we were inserted counts. + RetValNum = *IV->idx_begin(); + + // Note that if we are used as the aggregate operand to the insertvalue, + // we don't change RetValNum, but do survey all our uses. + + Liveness Result = MaybeLive; + for (Value::use_iterator I = IV->use_begin(), + E = V->use_end(); I != E; ++I) { + Result = SurveyUse(I, MaybeLiveUses, RetValNum); + if (Result == Live) + break; + } + return Result; + } + CallSite CS = CallSite::get(V); + if (CS.getInstruction()) { + Function *F = CS.getCalledFunction(); + if (F) { + // Used in a direct call. + + // Find the argument number. We know for sure that this use is an + // argument, since if it was the function argument this would be an + // indirect call and the we know can't be looking at a value of the + // label type (for the invoke instruction). + unsigned ArgNo = CS.getArgumentNo(U.getOperandNo()); + + if (ArgNo >= F->getFunctionType()->getNumParams()) + // The value is passed in through a vararg! Must be live. + return Live; + + assert(CS.getArgument(ArgNo) + == CS.getInstruction()->getOperand(U.getOperandNo()) + && "Argument is not where we expected it"); + + // Value passed to a normal call. It's only live when the corresponding + // argument to the called function turns out live. + RetOrArg Use = CreateArg(F, ArgNo); + return MarkIfNotLive(Use, MaybeLiveUses); + } + } + // Used in any other way? Value must be live. + return Live; +} + +/// SurveyUses - This looks at all the uses of the given value +/// Returns the Liveness deduced from the uses of this value. +/// +/// Adds all uses that cause the result to be MaybeLive to MaybeLiveRetUses. If +/// the result is Live, MaybeLiveUses might be modified but its content should +/// be ignored (since it might not be complete). +DAE::Liveness DAE::SurveyUses(Value *V, UseVector &MaybeLiveUses) { + // Assume it's dead (which will only hold if there are no uses at all..). + Liveness Result = MaybeLive; + // Check each use. + for (Value::use_iterator I = V->use_begin(), + E = V->use_end(); I != E; ++I) { + Result = SurveyUse(I, MaybeLiveUses); + if (Result == Live) + break; + } + return Result; +} + +// SurveyFunction - This performs the initial survey of the specified function, +// checking out whether or not it uses any of its incoming arguments or whether +// any callers use the return value. This fills in the LiveValues set and Uses +// map. +// +// We consider arguments of non-internal functions to be intrinsically alive as +// well as arguments to functions which have their "address taken". +// +void DAE::SurveyFunction(Function &F) { + unsigned RetCount = NumRetVals(&F); + // Assume all return values are dead + typedef SmallVector RetVals; + RetVals RetValLiveness(RetCount, MaybeLive); + + typedef SmallVector RetUses; + // These vectors map each return value to the uses that make it MaybeLive, so + // we can add those to the Uses map if the return value really turns out to be + // MaybeLive. Initialized to a list of RetCount empty lists. + RetUses MaybeLiveRetUses(RetCount); + + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) + if (ReturnInst *RI = dyn_cast(BB->getTerminator())) + if (RI->getNumOperands() != 0 && RI->getOperand(0)->getType() + != F.getFunctionType()->getReturnType()) { + // We don't support old style multiple return values. + MarkLive(F); + return; + } + + if (!F.hasLocalLinkage() && (!ShouldHackArguments() || F.isIntrinsic())) { + MarkLive(F); + return; + } + + DOUT << "DAE - Inspecting callers for fn: " << F.getName() << "\n"; + // Keep track of the number of live retvals, so we can skip checks once all + // of them turn out to be live. + unsigned NumLiveRetVals = 0; + const Type *STy = dyn_cast(F.getReturnType()); + // Loop all uses of the function. + for (Value::use_iterator I = F.use_begin(), E = F.use_end(); I != E; ++I) { + // If the function is PASSED IN as an argument, its address has been + // taken. + CallSite CS = CallSite::get(*I); + if (!CS.getInstruction() || !CS.isCallee(I)) { + MarkLive(F); + return; + } + + // If this use is anything other than a call site, the function is alive. + Instruction *TheCall = CS.getInstruction(); + if (!TheCall) { // Not a direct call site? + MarkLive(F); + return; + } + + // If we end up here, we are looking at a direct call to our function. + + // Now, check how our return value(s) is/are used in this caller. Don't + // bother checking return values if all of them are live already. + if (NumLiveRetVals != RetCount) { + if (STy) { + // Check all uses of the return value. + for (Value::use_iterator I = TheCall->use_begin(), + E = TheCall->use_end(); I != E; ++I) { + ExtractValueInst *Ext = dyn_cast(*I); + if (Ext && Ext->hasIndices()) { + // This use uses a part of our return value, survey the uses of + // that part and store the results for this index only. + unsigned Idx = *Ext->idx_begin(); + if (RetValLiveness[Idx] != Live) { + RetValLiveness[Idx] = SurveyUses(Ext, MaybeLiveRetUses[Idx]); + if (RetValLiveness[Idx] == Live) + NumLiveRetVals++; + } + } else { + // Used by something else than extractvalue. Mark all return + // values as live. + for (unsigned i = 0; i != RetCount; ++i ) + RetValLiveness[i] = Live; + NumLiveRetVals = RetCount; + break; + } + } + } else { + // Single return value + RetValLiveness[0] = SurveyUses(TheCall, MaybeLiveRetUses[0]); + if (RetValLiveness[0] == Live) + NumLiveRetVals = RetCount; + } + } + } + + // Now we've inspected all callers, record the liveness of our return values. + for (unsigned i = 0; i != RetCount; ++i) + MarkValue(CreateRet(&F, i), RetValLiveness[i], MaybeLiveRetUses[i]); + + DOUT << "DAE - Inspecting args for fn: " << F.getName() << "\n"; + + // Now, check all of our arguments. + unsigned i = 0; + UseVector MaybeLiveArgUses; + for (Function::arg_iterator AI = F.arg_begin(), + E = F.arg_end(); AI != E; ++AI, ++i) { + // See what the effect of this use is (recording any uses that cause + // MaybeLive in MaybeLiveArgUses). + Liveness Result = SurveyUses(AI, MaybeLiveArgUses); + // Mark the result. + MarkValue(CreateArg(&F, i), Result, MaybeLiveArgUses); + // Clear the vector again for the next iteration. + MaybeLiveArgUses.clear(); + } +} + +/// MarkValue - This function marks the liveness of RA depending on L. If L is +/// MaybeLive, it also takes all uses in MaybeLiveUses and records them in Uses, +/// such that RA will be marked live if any use in MaybeLiveUses gets marked +/// live later on. +void DAE::MarkValue(const RetOrArg &RA, Liveness L, + const UseVector &MaybeLiveUses) { + switch (L) { + case Live: MarkLive(RA); break; + case MaybeLive: + { + // Note any uses of this value, so this return value can be + // marked live whenever one of the uses becomes live. + for (UseVector::const_iterator UI = MaybeLiveUses.begin(), + UE = MaybeLiveUses.end(); UI != UE; ++UI) + Uses.insert(std::make_pair(*UI, RA)); + break; + } + } +} + +/// MarkLive - Mark the given Function as alive, meaning that it cannot be +/// changed in any way. Additionally, +/// mark any values that are used as this function's parameters or by its return +/// values (according to Uses) live as well. +void DAE::MarkLive(const Function &F) { + DOUT << "DAE - Intrinsically live fn: " << F.getName() << "\n"; + // Mark the function as live. + LiveFunctions.insert(&F); + // Mark all arguments as live. + for (unsigned i = 0, e = F.arg_size(); i != e; ++i) + PropagateLiveness(CreateArg(&F, i)); + // Mark all return values as live. + for (unsigned i = 0, e = NumRetVals(&F); i != e; ++i) + PropagateLiveness(CreateRet(&F, i)); +} + +/// MarkLive - Mark the given return value or argument as live. Additionally, +/// mark any values that are used by this value (according to Uses) live as +/// well. +void DAE::MarkLive(const RetOrArg &RA) { + if (LiveFunctions.count(RA.F)) + return; // Function was already marked Live. + + if (!LiveValues.insert(RA).second) + return; // We were already marked Live. + + DOUT << "DAE - Marking " << RA.getDescription() << " live\n"; + PropagateLiveness(RA); +} + +/// PropagateLiveness - Given that RA is a live value, propagate it's liveness +/// to any other values it uses (according to Uses). +void DAE::PropagateLiveness(const RetOrArg &RA) { + // We don't use upper_bound (or equal_range) here, because our recursive call + // to ourselves is likely to cause the upper_bound (which is the first value + // not belonging to RA) to become erased and the iterator invalidated. + UseMap::iterator Begin = Uses.lower_bound(RA); + UseMap::iterator E = Uses.end(); + UseMap::iterator I; + for (I = Begin; I != E && I->first == RA; ++I) + MarkLive(I->second); + + // Erase RA from the Uses map (from the lower bound to wherever we ended up + // after the loop). + Uses.erase(Begin, I); +} + +// RemoveDeadStuffFromFunction - Remove any arguments and return values from F +// that are not in LiveValues. Transform the function and all of the callees of +// the function to not have these arguments and return values. +// +bool DAE::RemoveDeadStuffFromFunction(Function *F) { + // Don't modify fully live functions + if (LiveFunctions.count(F)) + return false; + + // Start by computing a new prototype for the function, which is the same as + // the old function, but has fewer arguments and a different return type. + const FunctionType *FTy = F->getFunctionType(); + std::vector Params; + + // Set up to build a new list of parameter attributes. + SmallVector AttributesVec; + const AttrListPtr &PAL = F->getAttributes(); + + // The existing function return attributes. + Attributes RAttrs = PAL.getRetAttributes(); + Attributes FnAttrs = PAL.getFnAttributes(); + + // Find out the new return value. + + const Type *RetTy = FTy->getReturnType(); + const Type *NRetTy = NULL; + unsigned RetCount = NumRetVals(F); + // -1 means unused, other numbers are the new index + SmallVector NewRetIdxs(RetCount, -1); + std::vector RetTypes; + if (RetTy == Type::VoidTy) { + NRetTy = Type::VoidTy; + } else { + const StructType *STy = dyn_cast(RetTy); + if (STy) + // Look at each of the original return values individually. + for (unsigned i = 0; i != RetCount; ++i) { + RetOrArg Ret = CreateRet(F, i); + if (LiveValues.erase(Ret)) { + RetTypes.push_back(STy->getElementType(i)); + NewRetIdxs[i] = RetTypes.size() - 1; + } else { + ++NumRetValsEliminated; + DOUT << "DAE - Removing return value " << i << " from " + << F->getNameStart() << "\n"; + } + } + else + // We used to return a single value. + if (LiveValues.erase(CreateRet(F, 0))) { + RetTypes.push_back(RetTy); + NewRetIdxs[0] = 0; + } else { + DOUT << "DAE - Removing return value from " << F->getNameStart() + << "\n"; + ++NumRetValsEliminated; + } + if (RetTypes.size() > 1) + // More than one return type? Return a struct with them. Also, if we used + // to return a struct and didn't change the number of return values, + // return a struct again. This prevents changing {something} into + // something and {} into void. + // Make the new struct packed if we used to return a packed struct + // already. + NRetTy = StructType::get(RetTypes, STy->isPacked()); + else if (RetTypes.size() == 1) + // One return type? Just a simple value then, but only if we didn't use to + // return a struct with that simple value before. + NRetTy = RetTypes.front(); + else if (RetTypes.size() == 0) + // No return types? Make it void, but only if we didn't use to return {}. + NRetTy = Type::VoidTy; + } + + assert(NRetTy && "No new return type found?"); + + // Remove any incompatible attributes, but only if we removed all return + // values. Otherwise, ensure that we don't have any conflicting attributes + // here. Currently, this should not be possible, but special handling might be + // required when new return value attributes are added. + if (NRetTy == Type::VoidTy) + RAttrs &= ~Attribute::typeIncompatible(NRetTy); + else + assert((RAttrs & Attribute::typeIncompatible(NRetTy)) == 0 + && "Return attributes no longer compatible?"); + + if (RAttrs) + AttributesVec.push_back(AttributeWithIndex::get(0, RAttrs)); + + // Remember which arguments are still alive. + SmallVector ArgAlive(FTy->getNumParams(), false); + // Construct the new parameter list from non-dead arguments. Also construct + // a new set of parameter attributes to correspond. Skip the first parameter + // attribute, since that belongs to the return value. + unsigned i = 0; + for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); + I != E; ++I, ++i) { + RetOrArg Arg = CreateArg(F, i); + if (LiveValues.erase(Arg)) { + Params.push_back(I->getType()); + ArgAlive[i] = true; + + // Get the original parameter attributes (skipping the first one, that is + // for the return value. + if (Attributes Attrs = PAL.getParamAttributes(i + 1)) + AttributesVec.push_back(AttributeWithIndex::get(Params.size(), Attrs)); + } else { + ++NumArgumentsEliminated; + DOUT << "DAE - Removing argument " << i << " (" << I->getNameStart() + << ") from " << F->getNameStart() << "\n"; + } + } + + if (FnAttrs != Attribute::None) + AttributesVec.push_back(AttributeWithIndex::get(~0, FnAttrs)); + + // Reconstruct the AttributesList based on the vector we constructed. + AttrListPtr NewPAL = AttrListPtr::get(AttributesVec.begin(), AttributesVec.end()); + + // Work around LLVM bug PR56: the CWriter cannot emit varargs functions which + // have zero fixed arguments. + // + // Note that we apply this hack for a vararg fuction that does not have any + // arguments anymore, but did have them before (so don't bother fixing + // functions that were already broken wrt CWriter). + bool ExtraArgHack = false; + if (Params.empty() && FTy->isVarArg() && FTy->getNumParams() != 0) { + ExtraArgHack = true; + Params.push_back(Type::Int32Ty); + } + + // Create the new function type based on the recomputed parameters. + FunctionType *NFTy = FunctionType::get(NRetTy, Params, FTy->isVarArg()); + + // No change? + if (NFTy == FTy) + return false; + + // Create the new function body and insert it into the module... + Function *NF = Function::Create(NFTy, F->getLinkage()); + NF->copyAttributesFrom(F); + NF->setAttributes(NewPAL); + // Insert the new function before the old function, so we won't be processing + // it again. + F->getParent()->getFunctionList().insert(F, NF); + NF->takeName(F); + + // Loop over all of the callers of the function, transforming the call sites + // to pass in a smaller number of arguments into the new function. + // + std::vector Args; + while (!F->use_empty()) { + CallSite CS = CallSite::get(F->use_back()); + Instruction *Call = CS.getInstruction(); + + AttributesVec.clear(); + const AttrListPtr &CallPAL = CS.getAttributes(); + + // The call return attributes. + Attributes RAttrs = CallPAL.getRetAttributes(); + Attributes FnAttrs = CallPAL.getFnAttributes(); + // Adjust in case the function was changed to return void. + RAttrs &= ~Attribute::typeIncompatible(NF->getReturnType()); + if (RAttrs) + AttributesVec.push_back(AttributeWithIndex::get(0, RAttrs)); + + // Declare these outside of the loops, so we can reuse them for the second + // loop, which loops the varargs. + CallSite::arg_iterator I = CS.arg_begin(); + unsigned i = 0; + // Loop over those operands, corresponding to the normal arguments to the + // original function, and add those that are still alive. + for (unsigned e = FTy->getNumParams(); i != e; ++I, ++i) + if (ArgAlive[i]) { + Args.push_back(*I); + // Get original parameter attributes, but skip return attributes. + if (Attributes Attrs = CallPAL.getParamAttributes(i + 1)) + AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs)); + } + + if (ExtraArgHack) + Args.push_back(UndefValue::get(Type::Int32Ty)); + + // Push any varargs arguments on the list. Don't forget their attributes. + for (CallSite::arg_iterator E = CS.arg_end(); I != E; ++I, ++i) { + Args.push_back(*I); + if (Attributes Attrs = CallPAL.getParamAttributes(i + 1)) + AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs)); + } + + if (FnAttrs != Attribute::None) + AttributesVec.push_back(AttributeWithIndex::get(~0, FnAttrs)); + + // Reconstruct the AttributesList based on the vector we constructed. + AttrListPtr NewCallPAL = AttrListPtr::get(AttributesVec.begin(), + AttributesVec.end()); + + Instruction *New; + if (InvokeInst *II = dyn_cast(Call)) { + New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(), + Args.begin(), Args.end(), "", Call); + cast(New)->setCallingConv(CS.getCallingConv()); + cast(New)->setAttributes(NewCallPAL); + } else { + New = CallInst::Create(NF, Args.begin(), Args.end(), "", Call); + cast(New)->setCallingConv(CS.getCallingConv()); + cast(New)->setAttributes(NewCallPAL); + if (cast(Call)->isTailCall()) + cast(New)->setTailCall(); + } + Args.clear(); + + if (!Call->use_empty()) { + if (New->getType() == Call->getType()) { + // Return type not changed? Just replace users then. + Call->replaceAllUsesWith(New); + New->takeName(Call); + } else if (New->getType() == Type::VoidTy) { + // Our return value has uses, but they will get removed later on. + // Replace by null for now. + Call->replaceAllUsesWith(Constant::getNullValue(Call->getType())); + } else { + assert(isa(RetTy) && + "Return type changed, but not into a void. The old return type" + " must have been a struct!"); + Instruction *InsertPt = Call; + if (InvokeInst *II = dyn_cast(Call)) { + BasicBlock::iterator IP = II->getNormalDest()->begin(); + while (isa(IP)) ++IP; + InsertPt = IP; + } + + // We used to return a struct. Instead of doing smart stuff with all the + // uses of this struct, we will just rebuild it using + // extract/insertvalue chaining and let instcombine clean that up. + // + // Start out building up our return value from undef + Value *RetVal = llvm::UndefValue::get(RetTy); + for (unsigned i = 0; i != RetCount; ++i) + if (NewRetIdxs[i] != -1) { + Value *V; + if (RetTypes.size() > 1) + // We are still returning a struct, so extract the value from our + // return value + V = ExtractValueInst::Create(New, NewRetIdxs[i], "newret", + InsertPt); + else + // We are now returning a single element, so just insert that + V = New; + // Insert the value at the old position + RetVal = InsertValueInst::Create(RetVal, V, i, "oldret", InsertPt); + } + // Now, replace all uses of the old call instruction with the return + // struct we built + Call->replaceAllUsesWith(RetVal); + New->takeName(Call); + } + } + + // Finally, remove the old call from the program, reducing the use-count of + // F. + Call->eraseFromParent(); + } + + // Since we have now created the new function, splice the body of the old + // function right into the new function, leaving the old rotting hulk of the + // function empty. + NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList()); + + // Loop over the argument list, transfering uses of the old arguments over to + // the new arguments, also transfering over the names as well. + i = 0; + for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(), + I2 = NF->arg_begin(); I != E; ++I, ++i) + if (ArgAlive[i]) { + // If this is a live argument, move the name and users over to the new + // version. + I->replaceAllUsesWith(I2); + I2->takeName(I); + ++I2; + } else { + // If this argument is dead, replace any uses of it with null constants + // (these are guaranteed to become unused later on). + I->replaceAllUsesWith(Constant::getNullValue(I->getType())); + } + + // If we change the return value of the function we must rewrite any return + // instructions. Check this now. + if (F->getReturnType() != NF->getReturnType()) + for (Function::iterator BB = NF->begin(), E = NF->end(); BB != E; ++BB) + if (ReturnInst *RI = dyn_cast(BB->getTerminator())) { + Value *RetVal; + + if (NFTy->getReturnType() == Type::VoidTy) { + RetVal = 0; + } else { + assert (isa(RetTy)); + // The original return value was a struct, insert + // extractvalue/insertvalue chains to extract only the values we need + // to return and insert them into our new result. + // This does generate messy code, but we'll let it to instcombine to + // clean that up. + Value *OldRet = RI->getOperand(0); + // Start out building up our return value from undef + RetVal = llvm::UndefValue::get(NRetTy); + for (unsigned i = 0; i != RetCount; ++i) + if (NewRetIdxs[i] != -1) { + ExtractValueInst *EV = ExtractValueInst::Create(OldRet, i, + "oldret", RI); + if (RetTypes.size() > 1) { + // We're still returning a struct, so reinsert the value into + // our new return value at the new index + + RetVal = InsertValueInst::Create(RetVal, EV, NewRetIdxs[i], + "newret", RI); + } else { + // We are now only returning a simple value, so just return the + // extracted value. + RetVal = EV; + } + } + } + // Replace the return instruction with one returning the new return + // value (possibly 0 if we became void). + ReturnInst::Create(RetVal, RI); + BB->getInstList().erase(RI); + } + + // Now that the old function is dead, delete it. + F->eraseFromParent(); + + return true; +} + +bool DAE::runOnModule(Module &M) { + bool Changed = false; + + // First pass: Do a simple check to see if any functions can have their "..." + // removed. We can do this if they never call va_start. This loop cannot be + // fused with the next loop, because deleting a function invalidates + // information computed while surveying other functions. + DOUT << "DAE - Deleting dead varargs\n"; + for (Module::iterator I = M.begin(), E = M.end(); I != E; ) { + Function &F = *I++; + if (F.getFunctionType()->isVarArg()) + Changed |= DeleteDeadVarargs(F); + } + + // Second phase:loop through the module, determining which arguments are live. + // We assume all arguments are dead unless proven otherwise (allowing us to + // determine that dead arguments passed into recursive functions are dead). + // + DOUT << "DAE - Determining liveness\n"; + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) + SurveyFunction(*I); + + // Now, remove all dead arguments and return values from each function in + // turn + for (Module::iterator I = M.begin(), E = M.end(); I != E; ) { + // Increment now, because the function will probably get removed (ie + // replaced by a new one). + Function *F = I++; + Changed |= RemoveDeadStuffFromFunction(F); + } + return Changed; +} diff --git a/lib/Transforms/IPO/DeadTypeElimination.cpp b/lib/Transforms/IPO/DeadTypeElimination.cpp new file mode 100644 index 000000000000..85aed2b7915d --- /dev/null +++ b/lib/Transforms/IPO/DeadTypeElimination.cpp @@ -0,0 +1,107 @@ +//===- DeadTypeElimination.cpp - Eliminate unused types for symbol table --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass is used to cleanup the output of GCC. It eliminate names for types +// that are unused in the entire translation unit, using the FindUsedTypes pass. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "deadtypeelim" +#include "llvm/Transforms/IPO.h" +#include "llvm/Analysis/FindUsedTypes.h" +#include "llvm/Module.h" +#include "llvm/TypeSymbolTable.h" +#include "llvm/DerivedTypes.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/Compiler.h" +using namespace llvm; + +STATISTIC(NumKilled, "Number of unused typenames removed from symtab"); + +namespace { + struct VISIBILITY_HIDDEN DTE : public ModulePass { + static char ID; // Pass identification, replacement for typeid + DTE() : ModulePass(&ID) {} + + // doPassInitialization - For this pass, it removes global symbol table + // entries for primitive types. These are never used for linking in GCC and + // they make the output uglier to look at, so we nuke them. + // + // Also, initialize instance variables. + // + bool runOnModule(Module &M); + + // getAnalysisUsage - This function needs FindUsedTypes to do its job... + // + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + } + }; +} + +char DTE::ID = 0; +static RegisterPass X("deadtypeelim", "Dead Type Elimination"); + +ModulePass *llvm::createDeadTypeEliminationPass() { + return new DTE(); +} + + +// ShouldNukeSymtabEntry - Return true if this module level symbol table entry +// should be eliminated. +// +static inline bool ShouldNukeSymtabEntry(const Type *Ty){ + // Nuke all names for primitive types! + if (Ty->isPrimitiveType() || Ty->isInteger()) + return true; + + // Nuke all pointers to primitive types as well... + if (const PointerType *PT = dyn_cast(Ty)) + if (PT->getElementType()->isPrimitiveType() || + PT->getElementType()->isInteger()) + return true; + + return false; +} + +// run - For this pass, it removes global symbol table entries for primitive +// types. These are never used for linking in GCC and they make the output +// uglier to look at, so we nuke them. Also eliminate types that are never used +// in the entire program as indicated by FindUsedTypes. +// +bool DTE::runOnModule(Module &M) { + bool Changed = false; + + TypeSymbolTable &ST = M.getTypeSymbolTable(); + std::set UsedTypes = getAnalysis().getTypes(); + + // Check the symbol table for superfluous type entries... + // + // Grab the 'type' plane of the module symbol... + TypeSymbolTable::iterator TI = ST.begin(); + TypeSymbolTable::iterator TE = ST.end(); + while ( TI != TE ) { + // If this entry should be unconditionally removed, or if we detect that + // the type is not used, remove it. + const Type *RHS = TI->second; + if (ShouldNukeSymtabEntry(RHS) || !UsedTypes.count(RHS)) { + ST.remove(TI++); + ++NumKilled; + Changed = true; + } else { + ++TI; + // We only need to leave one name for each type. + UsedTypes.erase(RHS); + } + } + + return Changed; +} + +// vim: sw=2 diff --git a/lib/Transforms/IPO/ExtractGV.cpp b/lib/Transforms/IPO/ExtractGV.cpp new file mode 100644 index 000000000000..0c529d239d98 --- /dev/null +++ b/lib/Transforms/IPO/ExtractGV.cpp @@ -0,0 +1,173 @@ +//===-- ExtractGV.cpp - Global Value extraction pass ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass extracts global values +// +//===----------------------------------------------------------------------===// + +#include "llvm/Instructions.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/Constants.h" +#include "llvm/Transforms/IPO.h" +#include "llvm/Support/Compiler.h" +#include +using namespace llvm; + +namespace { + /// @brief A pass to extract specific functions and their dependencies. + class VISIBILITY_HIDDEN GVExtractorPass : public ModulePass { + std::vector Named; + bool deleteStuff; + bool reLink; + public: + static char ID; // Pass identification, replacement for typeid + + /// FunctionExtractorPass - If deleteFn is true, this pass deletes as the + /// specified function. Otherwise, it deletes as much of the module as + /// possible, except for the function specified. + /// + explicit GVExtractorPass(std::vector& GVs, bool deleteS = true, + bool relinkCallees = false) + : ModulePass(&ID), Named(GVs), deleteStuff(deleteS), + reLink(relinkCallees) {} + + bool runOnModule(Module &M) { + if (Named.size() == 0) { + return false; // Nothing to extract + } + + if (deleteStuff) + return deleteGV(); + M.setModuleInlineAsm(""); + return isolateGV(M); + } + + bool deleteGV() { + for (std::vector::iterator GI = Named.begin(), + GE = Named.end(); GI != GE; ++GI) { + if (Function* NamedFunc = dyn_cast(*GI)) { + // If we're in relinking mode, set linkage of all internal callees to + // external. This will allow us extract function, and then - link + // everything together + if (reLink) { + for (Function::iterator B = NamedFunc->begin(), BE = NamedFunc->end(); + B != BE; ++B) { + for (BasicBlock::iterator I = B->begin(), E = B->end(); + I != E; ++I) { + if (CallInst* callInst = dyn_cast(&*I)) { + Function* Callee = callInst->getCalledFunction(); + if (Callee && Callee->hasLocalLinkage()) + Callee->setLinkage(GlobalValue::ExternalLinkage); + } + } + } + } + + NamedFunc->setLinkage(GlobalValue::ExternalLinkage); + NamedFunc->deleteBody(); + assert(NamedFunc->isDeclaration() && "This didn't make the function external!"); + } else { + if (!(*GI)->isDeclaration()) { + cast(*GI)->setInitializer(0); //clear the initializer + (*GI)->setLinkage(GlobalValue::ExternalLinkage); + } + } + } + return true; + } + + bool isolateGV(Module &M) { + // Mark all globals internal + // FIXME: what should we do with private linkage? + for (Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E; ++I) + if (!I->isDeclaration()) { + I->setLinkage(GlobalValue::InternalLinkage); + } + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) + if (!I->isDeclaration()) { + I->setLinkage(GlobalValue::InternalLinkage); + } + + // Make sure our result is globally accessible... + // by putting them in the used array + { + std::vector AUGs; + const Type *SBP= PointerType::getUnqual(Type::Int8Ty); + for (std::vector::iterator GI = Named.begin(), + GE = Named.end(); GI != GE; ++GI) { + (*GI)->setLinkage(GlobalValue::ExternalLinkage); + AUGs.push_back(ConstantExpr::getBitCast(*GI, SBP)); + } + ArrayType *AT = ArrayType::get(SBP, AUGs.size()); + Constant *Init = ConstantArray::get(AT, AUGs); + GlobalValue *gv = new GlobalVariable(AT, false, + GlobalValue::AppendingLinkage, + Init, "llvm.used", &M); + gv->setSection("llvm.metadata"); + } + + // All of the functions may be used by global variables or the named + // globals. Loop through them and create a new, external functions that + // can be "used", instead of ones with bodies. + std::vector NewFunctions; + + Function *Last = --M.end(); // Figure out where the last real fn is. + + for (Module::iterator I = M.begin(); ; ++I) { + if (std::find(Named.begin(), Named.end(), &*I) == Named.end()) { + Function *New = Function::Create(I->getFunctionType(), + GlobalValue::ExternalLinkage); + New->copyAttributesFrom(I); + + // If it's not the named function, delete the body of the function + I->dropAllReferences(); + + M.getFunctionList().push_back(New); + NewFunctions.push_back(New); + New->takeName(I); + } + + if (&*I == Last) break; // Stop after processing the last function + } + + // Now that we have replacements all set up, loop through the module, + // deleting the old functions, replacing them with the newly created + // functions. + if (!NewFunctions.empty()) { + unsigned FuncNum = 0; + Module::iterator I = M.begin(); + do { + if (std::find(Named.begin(), Named.end(), &*I) == Named.end()) { + // Make everything that uses the old function use the new dummy fn + I->replaceAllUsesWith(NewFunctions[FuncNum++]); + + Function *Old = I; + ++I; // Move the iterator to the new function + + // Delete the old function! + M.getFunctionList().erase(Old); + + } else { + ++I; // Skip the function we are extracting + } + } while (&*I != NewFunctions[0]); + } + + return true; + } + }; + + char GVExtractorPass::ID = 0; +} + +ModulePass *llvm::createGVExtractionPass(std::vector& GVs, + bool deleteFn, bool relinkCallees) { + return new GVExtractorPass(GVs, deleteFn, relinkCallees); +} diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp new file mode 100644 index 000000000000..e8315247b23c --- /dev/null +++ b/lib/Transforms/IPO/FunctionAttrs.cpp @@ -0,0 +1,347 @@ +//===- FunctionAttrs.cpp - Pass which marks functions readnone or readonly ===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a simple interprocedural pass which walks the +// call-graph, looking for functions which do not access or only read +// non-local memory, and marking them readnone/readonly. In addition, +// it marks function arguments (of pointer type) 'nocapture' if a call +// to the function does not create any copies of the pointer value that +// outlive the call. This more or less means that the pointer is only +// dereferenced, and not returned from the function or stored in a global. +// This pass is implemented as a bottom-up traversal of the call-graph. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "functionattrs" +#include "llvm/Transforms/IPO.h" +#include "llvm/CallGraphSCCPass.h" +#include "llvm/GlobalVariable.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/CaptureTracking.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/UniqueVector.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/InstIterator.h" +using namespace llvm; + +STATISTIC(NumReadNone, "Number of functions marked readnone"); +STATISTIC(NumReadOnly, "Number of functions marked readonly"); +STATISTIC(NumNoCapture, "Number of arguments marked nocapture"); +STATISTIC(NumNoAlias, "Number of function returns marked noalias"); + +namespace { + struct VISIBILITY_HIDDEN FunctionAttrs : public CallGraphSCCPass { + static char ID; // Pass identification, replacement for typeid + FunctionAttrs() : CallGraphSCCPass(&ID) {} + + // runOnSCC - Analyze the SCC, performing the transformation if possible. + bool runOnSCC(const std::vector &SCC); + + // AddReadAttrs - Deduce readonly/readnone attributes for the SCC. + bool AddReadAttrs(const std::vector &SCC); + + // AddNoCaptureAttrs - Deduce nocapture attributes for the SCC. + bool AddNoCaptureAttrs(const std::vector &SCC); + + // IsFunctionMallocLike - Does this function allocate new memory? + bool IsFunctionMallocLike(Function *F, + SmallPtrSet &) const; + + // AddNoAliasAttrs - Deduce noalias attributes for the SCC. + bool AddNoAliasAttrs(const std::vector &SCC); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + CallGraphSCCPass::getAnalysisUsage(AU); + } + + bool PointsToLocalMemory(Value *V); + }; +} + +char FunctionAttrs::ID = 0; +static RegisterPass +X("functionattrs", "Deduce function attributes"); + +Pass *llvm::createFunctionAttrsPass() { return new FunctionAttrs(); } + + +/// PointsToLocalMemory - Returns whether the given pointer value points to +/// memory that is local to the function. Global constants are considered +/// local to all functions. +bool FunctionAttrs::PointsToLocalMemory(Value *V) { + V = V->getUnderlyingObject(); + // An alloca instruction defines local memory. + if (isa(V)) + return true; + // A global constant counts as local memory for our purposes. + if (GlobalVariable *GV = dyn_cast(V)) + return GV->isConstant(); + // Could look through phi nodes and selects here, but it doesn't seem + // to be useful in practice. + return false; +} + +/// AddReadAttrs - Deduce readonly/readnone attributes for the SCC. +bool FunctionAttrs::AddReadAttrs(const std::vector &SCC) { + SmallPtrSet SCCNodes; + CallGraph &CG = getAnalysis(); + + // Fill SCCNodes with the elements of the SCC. Used for quickly + // looking up whether a given CallGraphNode is in this SCC. + for (unsigned i = 0, e = SCC.size(); i != e; ++i) + SCCNodes.insert(SCC[i]); + + // Check if any of the functions in the SCC read or write memory. If they + // write memory then they can't be marked readnone or readonly. + bool ReadsMemory = false; + for (unsigned i = 0, e = SCC.size(); i != e; ++i) { + Function *F = SCC[i]->getFunction(); + + if (F == 0) + // External node - may write memory. Just give up. + return false; + + if (F->doesNotAccessMemory()) + // Already perfect! + continue; + + // Definitions with weak linkage may be overridden at linktime with + // something that writes memory, so treat them like declarations. + if (F->isDeclaration() || F->mayBeOverridden()) { + if (!F->onlyReadsMemory()) + // May write memory. Just give up. + return false; + + ReadsMemory = true; + continue; + } + + // Scan the function body for instructions that may read or write memory. + for (inst_iterator II = inst_begin(F), E = inst_end(F); II != E; ++II) { + Instruction *I = &*II; + + // Some instructions can be ignored even if they read or write memory. + // Detect these now, skipping to the next instruction if one is found. + CallSite CS = CallSite::get(I); + if (CS.getInstruction()) { + // Ignore calls to functions in the same SCC. + if (SCCNodes.count(CG[CS.getCalledFunction()])) + continue; + } else if (LoadInst *LI = dyn_cast(I)) { + // Ignore loads from local memory. + if (PointsToLocalMemory(LI->getPointerOperand())) + continue; + } else if (StoreInst *SI = dyn_cast(I)) { + // Ignore stores to local memory. + if (PointsToLocalMemory(SI->getPointerOperand())) + continue; + } + + // Any remaining instructions need to be taken seriously! Check if they + // read or write memory. + if (I->mayWriteToMemory()) + // Writes memory. Just give up. + return false; + + if (isa(I)) + // MallocInst claims not to write memory! PR3754. + return false; + + // If this instruction may read memory, remember that. + ReadsMemory |= I->mayReadFromMemory(); + } + } + + // Success! Functions in this SCC do not access memory, or only read memory. + // Give them the appropriate attribute. + bool MadeChange = false; + for (unsigned i = 0, e = SCC.size(); i != e; ++i) { + Function *F = SCC[i]->getFunction(); + + if (F->doesNotAccessMemory()) + // Already perfect! + continue; + + if (F->onlyReadsMemory() && ReadsMemory) + // No change. + continue; + + MadeChange = true; + + // Clear out any existing attributes. + F->removeAttribute(~0, Attribute::ReadOnly | Attribute::ReadNone); + + // Add in the new attribute. + F->addAttribute(~0, ReadsMemory? Attribute::ReadOnly : Attribute::ReadNone); + + if (ReadsMemory) + ++NumReadOnly; + else + ++NumReadNone; + } + + return MadeChange; +} + +/// AddNoCaptureAttrs - Deduce nocapture attributes for the SCC. +bool FunctionAttrs::AddNoCaptureAttrs(const std::vector &SCC) { + bool Changed = false; + + // Check each function in turn, determining which pointer arguments are not + // captured. + for (unsigned i = 0, e = SCC.size(); i != e; ++i) { + Function *F = SCC[i]->getFunction(); + + if (F == 0) + // External node - skip it; + continue; + + // Definitions with weak linkage may be overridden at linktime with + // something that writes memory, so treat them like declarations. + if (F->isDeclaration() || F->mayBeOverridden()) + continue; + + for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end(); A!=E; ++A) + if (isa(A->getType()) && !A->hasNoCaptureAttr() && + !PointerMayBeCaptured(A, true)) { + A->addAttr(Attribute::NoCapture); + ++NumNoCapture; + Changed = true; + } + } + + return Changed; +} + +/// IsFunctionMallocLike - A function is malloc-like if it returns either null +/// or a pointer that doesn't alias any other pointer visible to the caller. +bool FunctionAttrs::IsFunctionMallocLike(Function *F, + SmallPtrSet &SCCNodes) const { + CallGraph &CG = getAnalysis(); + + UniqueVector FlowsToReturn; + for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) + if (ReturnInst *Ret = dyn_cast(I->getTerminator())) + FlowsToReturn.insert(Ret->getReturnValue()); + + for (unsigned i = 0; i != FlowsToReturn.size(); ++i) { + Value *RetVal = FlowsToReturn[i+1]; // UniqueVector[0] is reserved. + + if (Constant *C = dyn_cast(RetVal)) { + if (!C->isNullValue() && !isa(C)) + return false; + + continue; + } + + if (isa(RetVal)) + return false; + + if (Instruction *RVI = dyn_cast(RetVal)) + switch (RVI->getOpcode()) { + // Extend the analysis by looking upwards. + case Instruction::GetElementPtr: + case Instruction::BitCast: + FlowsToReturn.insert(RVI->getOperand(0)); + continue; + case Instruction::Select: { + SelectInst *SI = cast(RVI); + FlowsToReturn.insert(SI->getTrueValue()); + FlowsToReturn.insert(SI->getFalseValue()); + } continue; + case Instruction::PHI: { + PHINode *PN = cast(RVI); + for (int i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + FlowsToReturn.insert(PN->getIncomingValue(i)); + } continue; + + // Check whether the pointer came from an allocation. + case Instruction::Alloca: + case Instruction::Malloc: + break; + case Instruction::Call: + case Instruction::Invoke: { + CallSite CS(RVI); + if (CS.paramHasAttr(0, Attribute::NoAlias)) + break; + if (CS.getCalledFunction() && + SCCNodes.count(CG[CS.getCalledFunction()])) + break; + } // fall-through + default: + return false; // Did not come from an allocation. + } + + if (PointerMayBeCaptured(RetVal, false)) + return false; + } + + return true; +} + +/// AddNoAliasAttrs - Deduce noalias attributes for the SCC. +bool FunctionAttrs::AddNoAliasAttrs(const std::vector &SCC) { + SmallPtrSet SCCNodes; + + // Fill SCCNodes with the elements of the SCC. Used for quickly + // looking up whether a given CallGraphNode is in this SCC. + for (unsigned i = 0, e = SCC.size(); i != e; ++i) + SCCNodes.insert(SCC[i]); + + // Check each function in turn, determining which functions return noalias + // pointers. + for (unsigned i = 0, e = SCC.size(); i != e; ++i) { + Function *F = SCC[i]->getFunction(); + + if (F == 0) + // External node - skip it; + return false; + + // Already noalias. + if (F->doesNotAlias(0)) + continue; + + // Definitions with weak linkage may be overridden at linktime, so + // treat them like declarations. + if (F->isDeclaration() || F->mayBeOverridden()) + return false; + + // We annotate noalias return values, which are only applicable to + // pointer types. + if (!isa(F->getReturnType())) + continue; + + if (!IsFunctionMallocLike(F, SCCNodes)) + return false; + } + + bool MadeChange = false; + for (unsigned i = 0, e = SCC.size(); i != e; ++i) { + Function *F = SCC[i]->getFunction(); + if (F->doesNotAlias(0) || !isa(F->getReturnType())) + continue; + + F->setDoesNotAlias(0); + ++NumNoAlias; + MadeChange = true; + } + + return MadeChange; +} + +bool FunctionAttrs::runOnSCC(const std::vector &SCC) { + bool Changed = AddReadAttrs(SCC); + Changed |= AddNoCaptureAttrs(SCC); + Changed |= AddNoAliasAttrs(SCC); + return Changed; +} diff --git a/lib/Transforms/IPO/GlobalDCE.cpp b/lib/Transforms/IPO/GlobalDCE.cpp new file mode 100644 index 000000000000..db378b0d0b28 --- /dev/null +++ b/lib/Transforms/IPO/GlobalDCE.cpp @@ -0,0 +1,227 @@ +//===-- GlobalDCE.cpp - DCE unreachable internal functions ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This transform is designed to eliminate unreachable internal globals from the +// program. It uses an aggressive algorithm, searching out globals that are +// known to be alive. After it finds all of the globals which are needed, it +// deletes whatever is left over. This allows it to delete recursive chunks of +// the program which are unreachable. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "globaldce" +#include "llvm/Transforms/IPO.h" +#include "llvm/Constants.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/Compiler.h" +#include +using namespace llvm; + +STATISTIC(NumAliases , "Number of global aliases removed"); +STATISTIC(NumFunctions, "Number of functions removed"); +STATISTIC(NumVariables, "Number of global variables removed"); + +namespace { + struct VISIBILITY_HIDDEN GlobalDCE : public ModulePass { + static char ID; // Pass identification, replacement for typeid + GlobalDCE() : ModulePass(&ID) {} + + // run - Do the GlobalDCE pass on the specified module, optionally updating + // the specified callgraph to reflect the changes. + // + bool runOnModule(Module &M); + + private: + std::set AliveGlobals; + + /// GlobalIsNeeded - mark the specific global value as needed, and + /// recursively mark anything that it uses as also needed. + void GlobalIsNeeded(GlobalValue *GV); + void MarkUsedGlobalsAsNeeded(Constant *C); + + bool SafeToDestroyConstant(Constant* C); + bool RemoveUnusedGlobalValue(GlobalValue &GV); + }; +} + +char GlobalDCE::ID = 0; +static RegisterPass X("globaldce", "Dead Global Elimination"); + +ModulePass *llvm::createGlobalDCEPass() { return new GlobalDCE(); } + +bool GlobalDCE::runOnModule(Module &M) { + bool Changed = false; + // Loop over the module, adding globals which are obviously necessary. + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { + Changed |= RemoveUnusedGlobalValue(*I); + // Functions with external linkage are needed if they have a body + if (!I->hasLocalLinkage() && !I->hasLinkOnceLinkage() && + !I->isDeclaration() && !I->hasAvailableExternallyLinkage()) + GlobalIsNeeded(I); + } + + for (Module::global_iterator I = M.global_begin(), E = M.global_end(); + I != E; ++I) { + Changed |= RemoveUnusedGlobalValue(*I); + // Externally visible & appending globals are needed, if they have an + // initializer. + if (!I->hasLocalLinkage() && !I->hasLinkOnceLinkage() && + !I->isDeclaration() && !I->hasAvailableExternallyLinkage()) + GlobalIsNeeded(I); + } + + for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end(); + I != E; ++I) { + Changed |= RemoveUnusedGlobalValue(*I); + // Externally visible aliases are needed. + if (!I->hasLocalLinkage() && !I->hasLinkOnceLinkage()) + GlobalIsNeeded(I); + } + + // Now that all globals which are needed are in the AliveGlobals set, we loop + // through the program, deleting those which are not alive. + // + + // The first pass is to drop initializers of global variables which are dead. + std::vector DeadGlobalVars; // Keep track of dead globals + for (Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E; ++I) + if (!AliveGlobals.count(I)) { + DeadGlobalVars.push_back(I); // Keep track of dead globals + I->setInitializer(0); + } + + // The second pass drops the bodies of functions which are dead... + std::vector DeadFunctions; + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) + if (!AliveGlobals.count(I)) { + DeadFunctions.push_back(I); // Keep track of dead globals + if (!I->isDeclaration()) + I->deleteBody(); + } + + // The third pass drops targets of aliases which are dead... + std::vector DeadAliases; + for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end(); I != E; + ++I) + if (!AliveGlobals.count(I)) { + DeadAliases.push_back(I); + I->setAliasee(0); + } + + if (!DeadFunctions.empty()) { + // Now that all interferences have been dropped, delete the actual objects + // themselves. + for (unsigned i = 0, e = DeadFunctions.size(); i != e; ++i) { + RemoveUnusedGlobalValue(*DeadFunctions[i]); + M.getFunctionList().erase(DeadFunctions[i]); + } + NumFunctions += DeadFunctions.size(); + Changed = true; + } + + if (!DeadGlobalVars.empty()) { + for (unsigned i = 0, e = DeadGlobalVars.size(); i != e; ++i) { + RemoveUnusedGlobalValue(*DeadGlobalVars[i]); + M.getGlobalList().erase(DeadGlobalVars[i]); + } + NumVariables += DeadGlobalVars.size(); + Changed = true; + } + + // Now delete any dead aliases. + if (!DeadAliases.empty()) { + for (unsigned i = 0, e = DeadAliases.size(); i != e; ++i) { + RemoveUnusedGlobalValue(*DeadAliases[i]); + M.getAliasList().erase(DeadAliases[i]); + } + NumAliases += DeadAliases.size(); + Changed = true; + } + + // Make sure that all memory is released + AliveGlobals.clear(); + return Changed; +} + +/// GlobalIsNeeded - the specific global value as needed, and +/// recursively mark anything that it uses as also needed. +void GlobalDCE::GlobalIsNeeded(GlobalValue *G) { + std::set::iterator I = AliveGlobals.find(G); + + // If the global is already in the set, no need to reprocess it. + if (I != AliveGlobals.end()) return; + + // Otherwise insert it now, so we do not infinitely recurse + AliveGlobals.insert(I, G); + + if (GlobalVariable *GV = dyn_cast(G)) { + // If this is a global variable, we must make sure to add any global values + // referenced by the initializer to the alive set. + if (GV->hasInitializer()) + MarkUsedGlobalsAsNeeded(GV->getInitializer()); + } else if (GlobalAlias *GA = dyn_cast(G)) { + // The target of a global alias is needed. + MarkUsedGlobalsAsNeeded(GA->getAliasee()); + } else { + // Otherwise this must be a function object. We have to scan the body of + // the function looking for constants and global values which are used as + // operands. Any operands of these types must be processed to ensure that + // any globals used will be marked as needed. + Function *F = cast(G); + // For all basic blocks... + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) + // For all instructions... + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) + // For all operands... + for (User::op_iterator U = I->op_begin(), E = I->op_end(); U != E; ++U) + if (GlobalValue *GV = dyn_cast(*U)) + GlobalIsNeeded(GV); + else if (Constant *C = dyn_cast(*U)) + MarkUsedGlobalsAsNeeded(C); + } +} + +void GlobalDCE::MarkUsedGlobalsAsNeeded(Constant *C) { + if (GlobalValue *GV = dyn_cast(C)) + GlobalIsNeeded(GV); + else { + // Loop over all of the operands of the constant, adding any globals they + // use to the list of needed globals. + for (User::op_iterator I = C->op_begin(), E = C->op_end(); I != E; ++I) + MarkUsedGlobalsAsNeeded(cast(*I)); + } +} + +// RemoveUnusedGlobalValue - Loop over all of the uses of the specified +// GlobalValue, looking for the constant pointer ref that may be pointing to it. +// If found, check to see if the constant pointer ref is safe to destroy, and if +// so, nuke it. This will reduce the reference count on the global value, which +// might make it deader. +// +bool GlobalDCE::RemoveUnusedGlobalValue(GlobalValue &GV) { + if (GV.use_empty()) return false; + GV.removeDeadConstantUsers(); + return GV.use_empty(); +} + +// SafeToDestroyConstant - It is safe to destroy a constant iff it is only used +// by constants itself. Note that constants cannot be cyclic, so this test is +// pretty easy to implement recursively. +// +bool GlobalDCE::SafeToDestroyConstant(Constant *C) { + for (Value::use_iterator I = C->use_begin(), E = C->use_end(); I != E; ++I) + if (Constant *User = dyn_cast(*I)) { + if (!SafeToDestroyConstant(User)) return false; + } else { + return false; + } + return true; +} diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp new file mode 100644 index 000000000000..2c01cc30bd69 --- /dev/null +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -0,0 +1,2485 @@ +//===- GlobalOpt.cpp - Optimize Global Variables --------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass transforms simple global variables that never have their address +// taken. If obviously true, it marks read/write globals as constant, deletes +// variables only stored to, etc. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "globalopt" +#include "llvm/Transforms/IPO.h" +#include "llvm/CallingConv.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/GetElementPtrTypeIterator.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/STLExtras.h" +#include +using namespace llvm; + +STATISTIC(NumMarked , "Number of globals marked constant"); +STATISTIC(NumSRA , "Number of aggregate globals broken into scalars"); +STATISTIC(NumHeapSRA , "Number of heap objects SRA'd"); +STATISTIC(NumSubstitute,"Number of globals with initializers stored into them"); +STATISTIC(NumDeleted , "Number of globals deleted"); +STATISTIC(NumFnDeleted , "Number of functions deleted"); +STATISTIC(NumGlobUses , "Number of global uses devirtualized"); +STATISTIC(NumLocalized , "Number of globals localized"); +STATISTIC(NumShrunkToBool , "Number of global vars shrunk to booleans"); +STATISTIC(NumFastCallFns , "Number of functions converted to fastcc"); +STATISTIC(NumCtorsEvaluated, "Number of static ctors evaluated"); +STATISTIC(NumNestRemoved , "Number of nest attributes removed"); +STATISTIC(NumAliasesResolved, "Number of global aliases resolved"); +STATISTIC(NumAliasesRemoved, "Number of global aliases eliminated"); + +namespace { + struct VISIBILITY_HIDDEN GlobalOpt : public ModulePass { + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + } + static char ID; // Pass identification, replacement for typeid + GlobalOpt() : ModulePass(&ID) {} + + bool runOnModule(Module &M); + + private: + GlobalVariable *FindGlobalCtors(Module &M); + bool OptimizeFunctions(Module &M); + bool OptimizeGlobalVars(Module &M); + bool OptimizeGlobalAliases(Module &M); + bool OptimizeGlobalCtorsList(GlobalVariable *&GCL); + bool ProcessInternalGlobal(GlobalVariable *GV,Module::global_iterator &GVI); + }; +} + +char GlobalOpt::ID = 0; +static RegisterPass X("globalopt", "Global Variable Optimizer"); + +ModulePass *llvm::createGlobalOptimizerPass() { return new GlobalOpt(); } + +namespace { + +/// GlobalStatus - As we analyze each global, keep track of some information +/// about it. If we find out that the address of the global is taken, none of +/// this info will be accurate. +struct VISIBILITY_HIDDEN GlobalStatus { + /// isLoaded - True if the global is ever loaded. If the global isn't ever + /// loaded it can be deleted. + bool isLoaded; + + /// StoredType - Keep track of what stores to the global look like. + /// + enum StoredType { + /// NotStored - There is no store to this global. It can thus be marked + /// constant. + NotStored, + + /// isInitializerStored - This global is stored to, but the only thing + /// stored is the constant it was initialized with. This is only tracked + /// for scalar globals. + isInitializerStored, + + /// isStoredOnce - This global is stored to, but only its initializer and + /// one other value is ever stored to it. If this global isStoredOnce, we + /// track the value stored to it in StoredOnceValue below. This is only + /// tracked for scalar globals. + isStoredOnce, + + /// isStored - This global is stored to by multiple values or something else + /// that we cannot track. + isStored + } StoredType; + + /// StoredOnceValue - If only one value (besides the initializer constant) is + /// ever stored to this global, keep track of what value it is. + Value *StoredOnceValue; + + /// AccessingFunction/HasMultipleAccessingFunctions - These start out + /// null/false. When the first accessing function is noticed, it is recorded. + /// When a second different accessing function is noticed, + /// HasMultipleAccessingFunctions is set to true. + Function *AccessingFunction; + bool HasMultipleAccessingFunctions; + + /// HasNonInstructionUser - Set to true if this global has a user that is not + /// an instruction (e.g. a constant expr or GV initializer). + bool HasNonInstructionUser; + + /// HasPHIUser - Set to true if this global has a user that is a PHI node. + bool HasPHIUser; + + GlobalStatus() : isLoaded(false), StoredType(NotStored), StoredOnceValue(0), + AccessingFunction(0), HasMultipleAccessingFunctions(false), + HasNonInstructionUser(false), HasPHIUser(false) {} +}; + +} + +/// ConstantIsDead - Return true if the specified constant is (transitively) +/// dead. The constant may be used by other constants (e.g. constant arrays and +/// constant exprs) as long as they are dead, but it cannot be used by anything +/// else. +static bool ConstantIsDead(Constant *C) { + if (isa(C)) return false; + + for (Value::use_iterator UI = C->use_begin(), E = C->use_end(); UI != E; ++UI) + if (Constant *CU = dyn_cast(*UI)) { + if (!ConstantIsDead(CU)) return false; + } else + return false; + return true; +} + + +/// AnalyzeGlobal - Look at all uses of the global and fill in the GlobalStatus +/// structure. If the global has its address taken, return true to indicate we +/// can't do anything with it. +/// +static bool AnalyzeGlobal(Value *V, GlobalStatus &GS, + SmallPtrSet &PHIUsers) { + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI) + if (ConstantExpr *CE = dyn_cast(*UI)) { + GS.HasNonInstructionUser = true; + + if (AnalyzeGlobal(CE, GS, PHIUsers)) return true; + + } else if (Instruction *I = dyn_cast(*UI)) { + if (!GS.HasMultipleAccessingFunctions) { + Function *F = I->getParent()->getParent(); + if (GS.AccessingFunction == 0) + GS.AccessingFunction = F; + else if (GS.AccessingFunction != F) + GS.HasMultipleAccessingFunctions = true; + } + if (LoadInst *LI = dyn_cast(I)) { + GS.isLoaded = true; + if (LI->isVolatile()) return true; // Don't hack on volatile loads. + } else if (StoreInst *SI = dyn_cast(I)) { + // Don't allow a store OF the address, only stores TO the address. + if (SI->getOperand(0) == V) return true; + + if (SI->isVolatile()) return true; // Don't hack on volatile stores. + + // If this is a direct store to the global (i.e., the global is a scalar + // value, not an aggregate), keep more specific information about + // stores. + if (GS.StoredType != GlobalStatus::isStored) { + if (GlobalVariable *GV = dyn_cast(SI->getOperand(1))){ + Value *StoredVal = SI->getOperand(0); + if (StoredVal == GV->getInitializer()) { + if (GS.StoredType < GlobalStatus::isInitializerStored) + GS.StoredType = GlobalStatus::isInitializerStored; + } else if (isa(StoredVal) && + cast(StoredVal)->getOperand(0) == GV) { + // G = G + if (GS.StoredType < GlobalStatus::isInitializerStored) + GS.StoredType = GlobalStatus::isInitializerStored; + } else if (GS.StoredType < GlobalStatus::isStoredOnce) { + GS.StoredType = GlobalStatus::isStoredOnce; + GS.StoredOnceValue = StoredVal; + } else if (GS.StoredType == GlobalStatus::isStoredOnce && + GS.StoredOnceValue == StoredVal) { + // noop. + } else { + GS.StoredType = GlobalStatus::isStored; + } + } else { + GS.StoredType = GlobalStatus::isStored; + } + } + } else if (isa(I)) { + if (AnalyzeGlobal(I, GS, PHIUsers)) return true; + } else if (isa(I)) { + if (AnalyzeGlobal(I, GS, PHIUsers)) return true; + } else if (PHINode *PN = dyn_cast(I)) { + // PHI nodes we can check just like select or GEP instructions, but we + // have to be careful about infinite recursion. + if (PHIUsers.insert(PN)) // Not already visited. + if (AnalyzeGlobal(I, GS, PHIUsers)) return true; + GS.HasPHIUser = true; + } else if (isa(I)) { + } else if (isa(I)) { + if (I->getOperand(1) == V) + GS.StoredType = GlobalStatus::isStored; + if (I->getOperand(2) == V) + GS.isLoaded = true; + } else if (isa(I)) { + assert(I->getOperand(1) == V && "Memset only takes one pointer!"); + GS.StoredType = GlobalStatus::isStored; + } else { + return true; // Any other non-load instruction might take address! + } + } else if (Constant *C = dyn_cast(*UI)) { + GS.HasNonInstructionUser = true; + // We might have a dead and dangling constant hanging off of here. + if (!ConstantIsDead(C)) + return true; + } else { + GS.HasNonInstructionUser = true; + // Otherwise must be some other user. + return true; + } + + return false; +} + +static Constant *getAggregateConstantElement(Constant *Agg, Constant *Idx) { + ConstantInt *CI = dyn_cast(Idx); + if (!CI) return 0; + unsigned IdxV = CI->getZExtValue(); + + if (ConstantStruct *CS = dyn_cast(Agg)) { + if (IdxV < CS->getNumOperands()) return CS->getOperand(IdxV); + } else if (ConstantArray *CA = dyn_cast(Agg)) { + if (IdxV < CA->getNumOperands()) return CA->getOperand(IdxV); + } else if (ConstantVector *CP = dyn_cast(Agg)) { + if (IdxV < CP->getNumOperands()) return CP->getOperand(IdxV); + } else if (isa(Agg)) { + if (const StructType *STy = dyn_cast(Agg->getType())) { + if (IdxV < STy->getNumElements()) + return Constant::getNullValue(STy->getElementType(IdxV)); + } else if (const SequentialType *STy = + dyn_cast(Agg->getType())) { + return Constant::getNullValue(STy->getElementType()); + } + } else if (isa(Agg)) { + if (const StructType *STy = dyn_cast(Agg->getType())) { + if (IdxV < STy->getNumElements()) + return UndefValue::get(STy->getElementType(IdxV)); + } else if (const SequentialType *STy = + dyn_cast(Agg->getType())) { + return UndefValue::get(STy->getElementType()); + } + } + return 0; +} + + +/// CleanupConstantGlobalUsers - We just marked GV constant. Loop over all +/// users of the global, cleaning up the obvious ones. This is largely just a +/// quick scan over the use list to clean up the easy and obvious cruft. This +/// returns true if it made a change. +static bool CleanupConstantGlobalUsers(Value *V, Constant *Init) { + bool Changed = false; + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;) { + User *U = *UI++; + + if (LoadInst *LI = dyn_cast(U)) { + if (Init) { + // Replace the load with the initializer. + LI->replaceAllUsesWith(Init); + LI->eraseFromParent(); + Changed = true; + } + } else if (StoreInst *SI = dyn_cast(U)) { + // Store must be unreachable or storing Init into the global. + SI->eraseFromParent(); + Changed = true; + } else if (ConstantExpr *CE = dyn_cast(U)) { + if (CE->getOpcode() == Instruction::GetElementPtr) { + Constant *SubInit = 0; + if (Init) + SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE); + Changed |= CleanupConstantGlobalUsers(CE, SubInit); + } else if (CE->getOpcode() == Instruction::BitCast && + isa(CE->getType())) { + // Pointer cast, delete any stores and memsets to the global. + Changed |= CleanupConstantGlobalUsers(CE, 0); + } + + if (CE->use_empty()) { + CE->destroyConstant(); + Changed = true; + } + } else if (GetElementPtrInst *GEP = dyn_cast(U)) { + // Do not transform "gepinst (gep constexpr (GV))" here, because forming + // "gepconstexpr (gep constexpr (GV))" will cause the two gep's to fold + // and will invalidate our notion of what Init is. + Constant *SubInit = 0; + if (!isa(GEP->getOperand(0))) { + ConstantExpr *CE = + dyn_cast_or_null(ConstantFoldInstruction(GEP)); + if (Init && CE && CE->getOpcode() == Instruction::GetElementPtr) + SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE); + } + Changed |= CleanupConstantGlobalUsers(GEP, SubInit); + + if (GEP->use_empty()) { + GEP->eraseFromParent(); + Changed = true; + } + } else if (MemIntrinsic *MI = dyn_cast(U)) { // memset/cpy/mv + if (MI->getRawDest() == V) { + MI->eraseFromParent(); + Changed = true; + } + + } else if (Constant *C = dyn_cast(U)) { + // If we have a chain of dead constantexprs or other things dangling from + // us, and if they are all dead, nuke them without remorse. + if (ConstantIsDead(C)) { + C->destroyConstant(); + // This could have invalidated UI, start over from scratch. + CleanupConstantGlobalUsers(V, Init); + return true; + } + } + } + return Changed; +} + +/// isSafeSROAElementUse - Return true if the specified instruction is a safe +/// user of a derived expression from a global that we want to SROA. +static bool isSafeSROAElementUse(Value *V) { + // We might have a dead and dangling constant hanging off of here. + if (Constant *C = dyn_cast(V)) + return ConstantIsDead(C); + + Instruction *I = dyn_cast(V); + if (!I) return false; + + // Loads are ok. + if (isa(I)) return true; + + // Stores *to* the pointer are ok. + if (StoreInst *SI = dyn_cast(I)) + return SI->getOperand(0) != V; + + // Otherwise, it must be a GEP. + GetElementPtrInst *GEPI = dyn_cast(I); + if (GEPI == 0) return false; + + if (GEPI->getNumOperands() < 3 || !isa(GEPI->getOperand(1)) || + !cast(GEPI->getOperand(1))->isNullValue()) + return false; + + for (Value::use_iterator I = GEPI->use_begin(), E = GEPI->use_end(); + I != E; ++I) + if (!isSafeSROAElementUse(*I)) + return false; + return true; +} + + +/// IsUserOfGlobalSafeForSRA - U is a direct user of the specified global value. +/// Look at it and its uses and decide whether it is safe to SROA this global. +/// +static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) { + // The user of the global must be a GEP Inst or a ConstantExpr GEP. + if (!isa(U) && + (!isa(U) || + cast(U)->getOpcode() != Instruction::GetElementPtr)) + return false; + + // Check to see if this ConstantExpr GEP is SRA'able. In particular, we + // don't like < 3 operand CE's, and we don't like non-constant integer + // indices. This enforces that all uses are 'gep GV, 0, C, ...' for some + // value of C. + if (U->getNumOperands() < 3 || !isa(U->getOperand(1)) || + !cast(U->getOperand(1))->isNullValue() || + !isa(U->getOperand(2))) + return false; + + gep_type_iterator GEPI = gep_type_begin(U), E = gep_type_end(U); + ++GEPI; // Skip over the pointer index. + + // If this is a use of an array allocation, do a bit more checking for sanity. + if (const ArrayType *AT = dyn_cast(*GEPI)) { + uint64_t NumElements = AT->getNumElements(); + ConstantInt *Idx = cast(U->getOperand(2)); + + // Check to make sure that index falls within the array. If not, + // something funny is going on, so we won't do the optimization. + // + if (Idx->getZExtValue() >= NumElements) + return false; + + // We cannot scalar repl this level of the array unless any array + // sub-indices are in-range constants. In particular, consider: + // A[0][i]. We cannot know that the user isn't doing invalid things like + // allowing i to index an out-of-range subscript that accesses A[1]. + // + // Scalar replacing *just* the outer index of the array is probably not + // going to be a win anyway, so just give up. + for (++GEPI; // Skip array index. + GEPI != E && (isa(*GEPI) || isa(*GEPI)); + ++GEPI) { + uint64_t NumElements; + if (const ArrayType *SubArrayTy = dyn_cast(*GEPI)) + NumElements = SubArrayTy->getNumElements(); + else + NumElements = cast(*GEPI)->getNumElements(); + + ConstantInt *IdxVal = dyn_cast(GEPI.getOperand()); + if (!IdxVal || IdxVal->getZExtValue() >= NumElements) + return false; + } + } + + for (Value::use_iterator I = U->use_begin(), E = U->use_end(); I != E; ++I) + if (!isSafeSROAElementUse(*I)) + return false; + return true; +} + +/// GlobalUsersSafeToSRA - Look at all uses of the global and decide whether it +/// is safe for us to perform this transformation. +/// +static bool GlobalUsersSafeToSRA(GlobalValue *GV) { + for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); + UI != E; ++UI) { + if (!IsUserOfGlobalSafeForSRA(*UI, GV)) + return false; + } + return true; +} + + +/// SRAGlobal - Perform scalar replacement of aggregates on the specified global +/// variable. This opens the door for other optimizations by exposing the +/// behavior of the program in a more fine-grained way. We have determined that +/// this transformation is safe already. We return the first global variable we +/// insert so that the caller can reprocess it. +static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) { + // Make sure this global only has simple uses that we can SRA. + if (!GlobalUsersSafeToSRA(GV)) + return 0; + + assert(GV->hasLocalLinkage() && !GV->isConstant()); + Constant *Init = GV->getInitializer(); + const Type *Ty = Init->getType(); + + std::vector NewGlobals; + Module::GlobalListType &Globals = GV->getParent()->getGlobalList(); + + // Get the alignment of the global, either explicit or target-specific. + unsigned StartAlignment = GV->getAlignment(); + if (StartAlignment == 0) + StartAlignment = TD.getABITypeAlignment(GV->getType()); + + if (const StructType *STy = dyn_cast(Ty)) { + NewGlobals.reserve(STy->getNumElements()); + const StructLayout &Layout = *TD.getStructLayout(STy); + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { + Constant *In = getAggregateConstantElement(Init, + ConstantInt::get(Type::Int32Ty, i)); + assert(In && "Couldn't get element of initializer?"); + GlobalVariable *NGV = new GlobalVariable(STy->getElementType(i), false, + GlobalVariable::InternalLinkage, + In, GV->getName()+"."+utostr(i), + (Module *)NULL, + GV->isThreadLocal(), + GV->getType()->getAddressSpace()); + Globals.insert(GV, NGV); + NewGlobals.push_back(NGV); + + // Calculate the known alignment of the field. If the original aggregate + // had 256 byte alignment for example, something might depend on that: + // propagate info to each field. + uint64_t FieldOffset = Layout.getElementOffset(i); + unsigned NewAlign = (unsigned)MinAlign(StartAlignment, FieldOffset); + if (NewAlign > TD.getABITypeAlignment(STy->getElementType(i))) + NGV->setAlignment(NewAlign); + } + } else if (const SequentialType *STy = dyn_cast(Ty)) { + unsigned NumElements = 0; + if (const ArrayType *ATy = dyn_cast(STy)) + NumElements = ATy->getNumElements(); + else + NumElements = cast(STy)->getNumElements(); + + if (NumElements > 16 && GV->hasNUsesOrMore(16)) + return 0; // It's not worth it. + NewGlobals.reserve(NumElements); + + uint64_t EltSize = TD.getTypeAllocSize(STy->getElementType()); + unsigned EltAlign = TD.getABITypeAlignment(STy->getElementType()); + for (unsigned i = 0, e = NumElements; i != e; ++i) { + Constant *In = getAggregateConstantElement(Init, + ConstantInt::get(Type::Int32Ty, i)); + assert(In && "Couldn't get element of initializer?"); + + GlobalVariable *NGV = new GlobalVariable(STy->getElementType(), false, + GlobalVariable::InternalLinkage, + In, GV->getName()+"."+utostr(i), + (Module *)NULL, + GV->isThreadLocal(), + GV->getType()->getAddressSpace()); + Globals.insert(GV, NGV); + NewGlobals.push_back(NGV); + + // Calculate the known alignment of the field. If the original aggregate + // had 256 byte alignment for example, something might depend on that: + // propagate info to each field. + unsigned NewAlign = (unsigned)MinAlign(StartAlignment, EltSize*i); + if (NewAlign > EltAlign) + NGV->setAlignment(NewAlign); + } + } + + if (NewGlobals.empty()) + return 0; + + DOUT << "PERFORMING GLOBAL SRA ON: " << *GV; + + Constant *NullInt = Constant::getNullValue(Type::Int32Ty); + + // Loop over all of the uses of the global, replacing the constantexpr geps, + // with smaller constantexpr geps or direct references. + while (!GV->use_empty()) { + User *GEP = GV->use_back(); + assert(((isa(GEP) && + cast(GEP)->getOpcode()==Instruction::GetElementPtr)|| + isa(GEP)) && "NonGEP CE's are not SRAable!"); + + // Ignore the 1th operand, which has to be zero or else the program is quite + // broken (undefined). Get the 2nd operand, which is the structure or array + // index. + unsigned Val = cast(GEP->getOperand(2))->getZExtValue(); + if (Val >= NewGlobals.size()) Val = 0; // Out of bound array access. + + Value *NewPtr = NewGlobals[Val]; + + // Form a shorter GEP if needed. + if (GEP->getNumOperands() > 3) { + if (ConstantExpr *CE = dyn_cast(GEP)) { + SmallVector Idxs; + Idxs.push_back(NullInt); + for (unsigned i = 3, e = CE->getNumOperands(); i != e; ++i) + Idxs.push_back(CE->getOperand(i)); + NewPtr = ConstantExpr::getGetElementPtr(cast(NewPtr), + &Idxs[0], Idxs.size()); + } else { + GetElementPtrInst *GEPI = cast(GEP); + SmallVector Idxs; + Idxs.push_back(NullInt); + for (unsigned i = 3, e = GEPI->getNumOperands(); i != e; ++i) + Idxs.push_back(GEPI->getOperand(i)); + NewPtr = GetElementPtrInst::Create(NewPtr, Idxs.begin(), Idxs.end(), + GEPI->getName()+"."+utostr(Val), GEPI); + } + } + GEP->replaceAllUsesWith(NewPtr); + + if (GetElementPtrInst *GEPI = dyn_cast(GEP)) + GEPI->eraseFromParent(); + else + cast(GEP)->destroyConstant(); + } + + // Delete the old global, now that it is dead. + Globals.erase(GV); + ++NumSRA; + + // Loop over the new globals array deleting any globals that are obviously + // dead. This can arise due to scalarization of a structure or an array that + // has elements that are dead. + unsigned FirstGlobal = 0; + for (unsigned i = 0, e = NewGlobals.size(); i != e; ++i) + if (NewGlobals[i]->use_empty()) { + Globals.erase(NewGlobals[i]); + if (FirstGlobal == i) ++FirstGlobal; + } + + return FirstGlobal != NewGlobals.size() ? NewGlobals[FirstGlobal] : 0; +} + +/// AllUsesOfValueWillTrapIfNull - Return true if all users of the specified +/// value will trap if the value is dynamically null. PHIs keeps track of any +/// phi nodes we've seen to avoid reprocessing them. +static bool AllUsesOfValueWillTrapIfNull(Value *V, + SmallPtrSet &PHIs) { + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI) + if (isa(*UI)) { + // Will trap. + } else if (StoreInst *SI = dyn_cast(*UI)) { + if (SI->getOperand(0) == V) { + //cerr << "NONTRAPPING USE: " << **UI; + return false; // Storing the value. + } + } else if (CallInst *CI = dyn_cast(*UI)) { + if (CI->getOperand(0) != V) { + //cerr << "NONTRAPPING USE: " << **UI; + return false; // Not calling the ptr + } + } else if (InvokeInst *II = dyn_cast(*UI)) { + if (II->getOperand(0) != V) { + //cerr << "NONTRAPPING USE: " << **UI; + return false; // Not calling the ptr + } + } else if (BitCastInst *CI = dyn_cast(*UI)) { + if (!AllUsesOfValueWillTrapIfNull(CI, PHIs)) return false; + } else if (GetElementPtrInst *GEPI = dyn_cast(*UI)) { + if (!AllUsesOfValueWillTrapIfNull(GEPI, PHIs)) return false; + } else if (PHINode *PN = dyn_cast(*UI)) { + // If we've already seen this phi node, ignore it, it has already been + // checked. + if (PHIs.insert(PN)) + return AllUsesOfValueWillTrapIfNull(PN, PHIs); + } else if (isa(*UI) && + isa(UI->getOperand(1))) { + // Ignore setcc X, null + } else { + //cerr << "NONTRAPPING USE: " << **UI; + return false; + } + return true; +} + +/// AllUsesOfLoadedValueWillTrapIfNull - Return true if all uses of any loads +/// from GV will trap if the loaded value is null. Note that this also permits +/// comparisons of the loaded value against null, as a special case. +static bool AllUsesOfLoadedValueWillTrapIfNull(GlobalVariable *GV) { + for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI!=E; ++UI) + if (LoadInst *LI = dyn_cast(*UI)) { + SmallPtrSet PHIs; + if (!AllUsesOfValueWillTrapIfNull(LI, PHIs)) + return false; + } else if (isa(*UI)) { + // Ignore stores to the global. + } else { + // We don't know or understand this user, bail out. + //cerr << "UNKNOWN USER OF GLOBAL!: " << **UI; + return false; + } + + return true; +} + +static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) { + bool Changed = false; + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ) { + Instruction *I = cast(*UI++); + if (LoadInst *LI = dyn_cast(I)) { + LI->setOperand(0, NewV); + Changed = true; + } else if (StoreInst *SI = dyn_cast(I)) { + if (SI->getOperand(1) == V) { + SI->setOperand(1, NewV); + Changed = true; + } + } else if (isa(I) || isa(I)) { + if (I->getOperand(0) == V) { + // Calling through the pointer! Turn into a direct call, but be careful + // that the pointer is not also being passed as an argument. + I->setOperand(0, NewV); + Changed = true; + bool PassedAsArg = false; + for (unsigned i = 1, e = I->getNumOperands(); i != e; ++i) + if (I->getOperand(i) == V) { + PassedAsArg = true; + I->setOperand(i, NewV); + } + + if (PassedAsArg) { + // Being passed as an argument also. Be careful to not invalidate UI! + UI = V->use_begin(); + } + } + } else if (CastInst *CI = dyn_cast(I)) { + Changed |= OptimizeAwayTrappingUsesOfValue(CI, + ConstantExpr::getCast(CI->getOpcode(), + NewV, CI->getType())); + if (CI->use_empty()) { + Changed = true; + CI->eraseFromParent(); + } + } else if (GetElementPtrInst *GEPI = dyn_cast(I)) { + // Should handle GEP here. + SmallVector Idxs; + Idxs.reserve(GEPI->getNumOperands()-1); + for (User::op_iterator i = GEPI->op_begin() + 1, e = GEPI->op_end(); + i != e; ++i) + if (Constant *C = dyn_cast(*i)) + Idxs.push_back(C); + else + break; + if (Idxs.size() == GEPI->getNumOperands()-1) + Changed |= OptimizeAwayTrappingUsesOfValue(GEPI, + ConstantExpr::getGetElementPtr(NewV, &Idxs[0], + Idxs.size())); + if (GEPI->use_empty()) { + Changed = true; + GEPI->eraseFromParent(); + } + } + } + + return Changed; +} + + +/// OptimizeAwayTrappingUsesOfLoads - The specified global has only one non-null +/// value stored into it. If there are uses of the loaded value that would trap +/// if the loaded value is dynamically null, then we know that they cannot be +/// reachable with a null optimize away the load. +static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV) { + bool Changed = false; + + // Keep track of whether we are able to remove all the uses of the global + // other than the store that defines it. + bool AllNonStoreUsesGone = true; + + // Replace all uses of loads with uses of uses of the stored value. + for (Value::use_iterator GUI = GV->use_begin(), E = GV->use_end(); GUI != E;){ + User *GlobalUser = *GUI++; + if (LoadInst *LI = dyn_cast(GlobalUser)) { + Changed |= OptimizeAwayTrappingUsesOfValue(LI, LV); + // If we were able to delete all uses of the loads + if (LI->use_empty()) { + LI->eraseFromParent(); + Changed = true; + } else { + AllNonStoreUsesGone = false; + } + } else if (isa(GlobalUser)) { + // Ignore the store that stores "LV" to the global. + assert(GlobalUser->getOperand(1) == GV && + "Must be storing *to* the global"); + } else { + AllNonStoreUsesGone = false; + + // If we get here we could have other crazy uses that are transitively + // loaded. + assert((isa(GlobalUser) || isa(GlobalUser) || + isa(GlobalUser)) && "Only expect load and stores!"); + } + } + + if (Changed) { + DOUT << "OPTIMIZED LOADS FROM STORED ONCE POINTER: " << *GV; + ++NumGlobUses; + } + + // If we nuked all of the loads, then none of the stores are needed either, + // nor is the global. + if (AllNonStoreUsesGone) { + DOUT << " *** GLOBAL NOW DEAD!\n"; + CleanupConstantGlobalUsers(GV, 0); + if (GV->use_empty()) { + GV->eraseFromParent(); + ++NumDeleted; + } + Changed = true; + } + return Changed; +} + +/// ConstantPropUsersOf - Walk the use list of V, constant folding all of the +/// instructions that are foldable. +static void ConstantPropUsersOf(Value *V) { + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ) + if (Instruction *I = dyn_cast(*UI++)) + if (Constant *NewC = ConstantFoldInstruction(I)) { + I->replaceAllUsesWith(NewC); + + // Advance UI to the next non-I use to avoid invalidating it! + // Instructions could multiply use V. + while (UI != E && *UI == I) + ++UI; + I->eraseFromParent(); + } +} + +/// OptimizeGlobalAddressOfMalloc - This function takes the specified global +/// variable, and transforms the program as if it always contained the result of +/// the specified malloc. Because it is always the result of the specified +/// malloc, there is no reason to actually DO the malloc. Instead, turn the +/// malloc into a global, and any loads of GV as uses of the new global. +static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, + MallocInst *MI) { + DOUT << "PROMOTING MALLOC GLOBAL: " << *GV << " MALLOC = " << *MI; + ConstantInt *NElements = cast(MI->getArraySize()); + + if (NElements->getZExtValue() != 1) { + // If we have an array allocation, transform it to a single element + // allocation to make the code below simpler. + Type *NewTy = ArrayType::get(MI->getAllocatedType(), + NElements->getZExtValue()); + MallocInst *NewMI = + new MallocInst(NewTy, Constant::getNullValue(Type::Int32Ty), + MI->getAlignment(), MI->getName(), MI); + Value* Indices[2]; + Indices[0] = Indices[1] = Constant::getNullValue(Type::Int32Ty); + Value *NewGEP = GetElementPtrInst::Create(NewMI, Indices, Indices + 2, + NewMI->getName()+".el0", MI); + MI->replaceAllUsesWith(NewGEP); + MI->eraseFromParent(); + MI = NewMI; + } + + // Create the new global variable. The contents of the malloc'd memory is + // undefined, so initialize with an undef value. + Constant *Init = UndefValue::get(MI->getAllocatedType()); + GlobalVariable *NewGV = new GlobalVariable(MI->getAllocatedType(), false, + GlobalValue::InternalLinkage, Init, + GV->getName()+".body", + (Module *)NULL, + GV->isThreadLocal()); + // FIXME: This new global should have the alignment returned by malloc. Code + // could depend on malloc returning large alignment (on the mac, 16 bytes) but + // this would only guarantee some lower alignment. + GV->getParent()->getGlobalList().insert(GV, NewGV); + + // Anything that used the malloc now uses the global directly. + MI->replaceAllUsesWith(NewGV); + + Constant *RepValue = NewGV; + if (NewGV->getType() != GV->getType()->getElementType()) + RepValue = ConstantExpr::getBitCast(RepValue, + GV->getType()->getElementType()); + + // If there is a comparison against null, we will insert a global bool to + // keep track of whether the global was initialized yet or not. + GlobalVariable *InitBool = + new GlobalVariable(Type::Int1Ty, false, GlobalValue::InternalLinkage, + ConstantInt::getFalse(), GV->getName()+".init", + (Module *)NULL, GV->isThreadLocal()); + bool InitBoolUsed = false; + + // Loop over all uses of GV, processing them in turn. + std::vector Stores; + while (!GV->use_empty()) + if (LoadInst *LI = dyn_cast(GV->use_back())) { + while (!LI->use_empty()) { + Use &LoadUse = LI->use_begin().getUse(); + if (!isa(LoadUse.getUser())) + LoadUse = RepValue; + else { + ICmpInst *CI = cast(LoadUse.getUser()); + // Replace the cmp X, 0 with a use of the bool value. + Value *LV = new LoadInst(InitBool, InitBool->getName()+".val", CI); + InitBoolUsed = true; + switch (CI->getPredicate()) { + default: assert(0 && "Unknown ICmp Predicate!"); + case ICmpInst::ICMP_ULT: + case ICmpInst::ICMP_SLT: + LV = ConstantInt::getFalse(); // X < null -> always false + break; + case ICmpInst::ICMP_ULE: + case ICmpInst::ICMP_SLE: + case ICmpInst::ICMP_EQ: + LV = BinaryOperator::CreateNot(LV, "notinit", CI); + break; + case ICmpInst::ICMP_NE: + case ICmpInst::ICMP_UGE: + case ICmpInst::ICMP_SGE: + case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_SGT: + break; // no change. + } + CI->replaceAllUsesWith(LV); + CI->eraseFromParent(); + } + } + LI->eraseFromParent(); + } else { + StoreInst *SI = cast(GV->use_back()); + // The global is initialized when the store to it occurs. + new StoreInst(ConstantInt::getTrue(), InitBool, SI); + SI->eraseFromParent(); + } + + // If the initialization boolean was used, insert it, otherwise delete it. + if (!InitBoolUsed) { + while (!InitBool->use_empty()) // Delete initializations + cast(InitBool->use_back())->eraseFromParent(); + delete InitBool; + } else + GV->getParent()->getGlobalList().insert(GV, InitBool); + + + // Now the GV is dead, nuke it and the malloc. + GV->eraseFromParent(); + MI->eraseFromParent(); + + // To further other optimizations, loop over all users of NewGV and try to + // constant prop them. This will promote GEP instructions with constant + // indices into GEP constant-exprs, which will allow global-opt to hack on it. + ConstantPropUsersOf(NewGV); + if (RepValue != NewGV) + ConstantPropUsersOf(RepValue); + + return NewGV; +} + +/// ValueIsOnlyUsedLocallyOrStoredToOneGlobal - Scan the use-list of V checking +/// to make sure that there are no complex uses of V. We permit simple things +/// like dereferencing the pointer, but not storing through the address, unless +/// it is to the specified global. +static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(Instruction *V, + GlobalVariable *GV, + SmallPtrSet &PHIs) { + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){ + Instruction *Inst = dyn_cast(*UI); + if (Inst == 0) return false; + + if (isa(Inst) || isa(Inst)) { + continue; // Fine, ignore. + } + + if (StoreInst *SI = dyn_cast(Inst)) { + if (SI->getOperand(0) == V && SI->getOperand(1) != GV) + return false; // Storing the pointer itself... bad. + continue; // Otherwise, storing through it, or storing into GV... fine. + } + + if (isa(Inst)) { + if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(Inst, GV, PHIs)) + return false; + continue; + } + + if (PHINode *PN = dyn_cast(Inst)) { + // PHIs are ok if all uses are ok. Don't infinitely recurse through PHI + // cycles. + if (PHIs.insert(PN)) + if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(PN, GV, PHIs)) + return false; + continue; + } + + if (BitCastInst *BCI = dyn_cast(Inst)) { + if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(BCI, GV, PHIs)) + return false; + continue; + } + + return false; + } + return true; +} + +/// ReplaceUsesOfMallocWithGlobal - The Alloc pointer is stored into GV +/// somewhere. Transform all uses of the allocation into loads from the +/// global and uses of the resultant pointer. Further, delete the store into +/// GV. This assumes that these value pass the +/// 'ValueIsOnlyUsedLocallyOrStoredToOneGlobal' predicate. +static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc, + GlobalVariable *GV) { + while (!Alloc->use_empty()) { + Instruction *U = cast(*Alloc->use_begin()); + Instruction *InsertPt = U; + if (StoreInst *SI = dyn_cast(U)) { + // If this is the store of the allocation into the global, remove it. + if (SI->getOperand(1) == GV) { + SI->eraseFromParent(); + continue; + } + } else if (PHINode *PN = dyn_cast(U)) { + // Insert the load in the corresponding predecessor, not right before the + // PHI. + InsertPt = PN->getIncomingBlock(Alloc->use_begin())->getTerminator(); + } else if (isa(U)) { + // Must be bitcast between the malloc and store to initialize the global. + ReplaceUsesOfMallocWithGlobal(U, GV); + U->eraseFromParent(); + continue; + } else if (GetElementPtrInst *GEPI = dyn_cast(U)) { + // If this is a "GEP bitcast" and the user is a store to the global, then + // just process it as a bitcast. + if (GEPI->hasAllZeroIndices() && GEPI->hasOneUse()) + if (StoreInst *SI = dyn_cast(GEPI->use_back())) + if (SI->getOperand(1) == GV) { + // Must be bitcast GEP between the malloc and store to initialize + // the global. + ReplaceUsesOfMallocWithGlobal(GEPI, GV); + GEPI->eraseFromParent(); + continue; + } + } + + // Insert a load from the global, and use it instead of the malloc. + Value *NL = new LoadInst(GV, GV->getName()+".val", InsertPt); + U->replaceUsesOfWith(Alloc, NL); + } +} + +/// LoadUsesSimpleEnoughForHeapSRA - Verify that all uses of V (a load, or a phi +/// of a load) are simple enough to perform heap SRA on. This permits GEP's +/// that index through the array and struct field, icmps of null, and PHIs. +static bool LoadUsesSimpleEnoughForHeapSRA(Value *V, + SmallPtrSet &LoadUsingPHIs, + SmallPtrSet &LoadUsingPHIsPerLoad) { + // We permit two users of the load: setcc comparing against the null + // pointer, and a getelementptr of a specific form. + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){ + Instruction *User = cast(*UI); + + // Comparison against null is ok. + if (ICmpInst *ICI = dyn_cast(User)) { + if (!isa(ICI->getOperand(1))) + return false; + continue; + } + + // getelementptr is also ok, but only a simple form. + if (GetElementPtrInst *GEPI = dyn_cast(User)) { + // Must index into the array and into the struct. + if (GEPI->getNumOperands() < 3) + return false; + + // Otherwise the GEP is ok. + continue; + } + + if (PHINode *PN = dyn_cast(User)) { + if (!LoadUsingPHIsPerLoad.insert(PN)) + // This means some phi nodes are dependent on each other. + // Avoid infinite looping! + return false; + if (!LoadUsingPHIs.insert(PN)) + // If we have already analyzed this PHI, then it is safe. + continue; + + // Make sure all uses of the PHI are simple enough to transform. + if (!LoadUsesSimpleEnoughForHeapSRA(PN, + LoadUsingPHIs, LoadUsingPHIsPerLoad)) + return false; + + continue; + } + + // Otherwise we don't know what this is, not ok. + return false; + } + + return true; +} + + +/// AllGlobalLoadUsesSimpleEnoughForHeapSRA - If all users of values loaded from +/// GV are simple enough to perform HeapSRA, return true. +static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(GlobalVariable *GV, + MallocInst *MI) { + SmallPtrSet LoadUsingPHIs; + SmallPtrSet LoadUsingPHIsPerLoad; + for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E; + ++UI) + if (LoadInst *LI = dyn_cast(*UI)) { + if (!LoadUsesSimpleEnoughForHeapSRA(LI, LoadUsingPHIs, + LoadUsingPHIsPerLoad)) + return false; + LoadUsingPHIsPerLoad.clear(); + } + + // If we reach here, we know that all uses of the loads and transitive uses + // (through PHI nodes) are simple enough to transform. However, we don't know + // that all inputs the to the PHI nodes are in the same equivalence sets. + // Check to verify that all operands of the PHIs are either PHIS that can be + // transformed, loads from GV, or MI itself. + for (SmallPtrSet::iterator I = LoadUsingPHIs.begin(), + E = LoadUsingPHIs.end(); I != E; ++I) { + PHINode *PN = *I; + for (unsigned op = 0, e = PN->getNumIncomingValues(); op != e; ++op) { + Value *InVal = PN->getIncomingValue(op); + + // PHI of the stored value itself is ok. + if (InVal == MI) continue; + + if (PHINode *InPN = dyn_cast(InVal)) { + // One of the PHIs in our set is (optimistically) ok. + if (LoadUsingPHIs.count(InPN)) + continue; + return false; + } + + // Load from GV is ok. + if (LoadInst *LI = dyn_cast(InVal)) + if (LI->getOperand(0) == GV) + continue; + + // UNDEF? NULL? + + // Anything else is rejected. + return false; + } + } + + return true; +} + +static Value *GetHeapSROAValue(Value *V, unsigned FieldNo, + DenseMap > &InsertedScalarizedValues, + std::vector > &PHIsToRewrite) { + std::vector &FieldVals = InsertedScalarizedValues[V]; + + if (FieldNo >= FieldVals.size()) + FieldVals.resize(FieldNo+1); + + // If we already have this value, just reuse the previously scalarized + // version. + if (Value *FieldVal = FieldVals[FieldNo]) + return FieldVal; + + // Depending on what instruction this is, we have several cases. + Value *Result; + if (LoadInst *LI = dyn_cast(V)) { + // This is a scalarized version of the load from the global. Just create + // a new Load of the scalarized global. + Result = new LoadInst(GetHeapSROAValue(LI->getOperand(0), FieldNo, + InsertedScalarizedValues, + PHIsToRewrite), + LI->getName()+".f" + utostr(FieldNo), LI); + } else if (PHINode *PN = dyn_cast(V)) { + // PN's type is pointer to struct. Make a new PHI of pointer to struct + // field. + const StructType *ST = + cast(cast(PN->getType())->getElementType()); + + Result =PHINode::Create(PointerType::getUnqual(ST->getElementType(FieldNo)), + PN->getName()+".f"+utostr(FieldNo), PN); + PHIsToRewrite.push_back(std::make_pair(PN, FieldNo)); + } else { + assert(0 && "Unknown usable value"); + Result = 0; + } + + return FieldVals[FieldNo] = Result; +} + +/// RewriteHeapSROALoadUser - Given a load instruction and a value derived from +/// the load, rewrite the derived value to use the HeapSRoA'd load. +static void RewriteHeapSROALoadUser(Instruction *LoadUser, + DenseMap > &InsertedScalarizedValues, + std::vector > &PHIsToRewrite) { + // If this is a comparison against null, handle it. + if (ICmpInst *SCI = dyn_cast(LoadUser)) { + assert(isa(SCI->getOperand(1))); + // If we have a setcc of the loaded pointer, we can use a setcc of any + // field. + Value *NPtr = GetHeapSROAValue(SCI->getOperand(0), 0, + InsertedScalarizedValues, PHIsToRewrite); + + Value *New = new ICmpInst(SCI->getPredicate(), NPtr, + Constant::getNullValue(NPtr->getType()), + SCI->getName(), SCI); + SCI->replaceAllUsesWith(New); + SCI->eraseFromParent(); + return; + } + + // Handle 'getelementptr Ptr, Idx, i32 FieldNo ...' + if (GetElementPtrInst *GEPI = dyn_cast(LoadUser)) { + assert(GEPI->getNumOperands() >= 3 && isa(GEPI->getOperand(2)) + && "Unexpected GEPI!"); + + // Load the pointer for this field. + unsigned FieldNo = cast(GEPI->getOperand(2))->getZExtValue(); + Value *NewPtr = GetHeapSROAValue(GEPI->getOperand(0), FieldNo, + InsertedScalarizedValues, PHIsToRewrite); + + // Create the new GEP idx vector. + SmallVector GEPIdx; + GEPIdx.push_back(GEPI->getOperand(1)); + GEPIdx.append(GEPI->op_begin()+3, GEPI->op_end()); + + Value *NGEPI = GetElementPtrInst::Create(NewPtr, + GEPIdx.begin(), GEPIdx.end(), + GEPI->getName(), GEPI); + GEPI->replaceAllUsesWith(NGEPI); + GEPI->eraseFromParent(); + return; + } + + // Recursively transform the users of PHI nodes. This will lazily create the + // PHIs that are needed for individual elements. Keep track of what PHIs we + // see in InsertedScalarizedValues so that we don't get infinite loops (very + // antisocial). If the PHI is already in InsertedScalarizedValues, it has + // already been seen first by another load, so its uses have already been + // processed. + PHINode *PN = cast(LoadUser); + bool Inserted; + DenseMap >::iterator InsertPos; + tie(InsertPos, Inserted) = + InsertedScalarizedValues.insert(std::make_pair(PN, std::vector())); + if (!Inserted) return; + + // If this is the first time we've seen this PHI, recursively process all + // users. + for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end(); UI != E; ) { + Instruction *User = cast(*UI++); + RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite); + } +} + +/// RewriteUsesOfLoadForHeapSRoA - We are performing Heap SRoA on a global. Ptr +/// is a value loaded from the global. Eliminate all uses of Ptr, making them +/// use FieldGlobals instead. All uses of loaded values satisfy +/// AllGlobalLoadUsesSimpleEnoughForHeapSRA. +static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load, + DenseMap > &InsertedScalarizedValues, + std::vector > &PHIsToRewrite) { + for (Value::use_iterator UI = Load->use_begin(), E = Load->use_end(); + UI != E; ) { + Instruction *User = cast(*UI++); + RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite); + } + + if (Load->use_empty()) { + Load->eraseFromParent(); + InsertedScalarizedValues.erase(Load); + } +} + +/// PerformHeapAllocSRoA - MI is an allocation of an array of structures. Break +/// it up into multiple allocations of arrays of the fields. +static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, MallocInst *MI){ + DOUT << "SROA HEAP ALLOC: " << *GV << " MALLOC = " << *MI; + const StructType *STy = cast(MI->getAllocatedType()); + + // There is guaranteed to be at least one use of the malloc (storing + // it into GV). If there are other uses, change them to be uses of + // the global to simplify later code. This also deletes the store + // into GV. + ReplaceUsesOfMallocWithGlobal(MI, GV); + + // Okay, at this point, there are no users of the malloc. Insert N + // new mallocs at the same place as MI, and N globals. + std::vector FieldGlobals; + std::vector FieldMallocs; + + for (unsigned FieldNo = 0, e = STy->getNumElements(); FieldNo != e;++FieldNo){ + const Type *FieldTy = STy->getElementType(FieldNo); + const Type *PFieldTy = PointerType::getUnqual(FieldTy); + + GlobalVariable *NGV = + new GlobalVariable(PFieldTy, false, GlobalValue::InternalLinkage, + Constant::getNullValue(PFieldTy), + GV->getName() + ".f" + utostr(FieldNo), GV, + GV->isThreadLocal()); + FieldGlobals.push_back(NGV); + + MallocInst *NMI = new MallocInst(FieldTy, MI->getArraySize(), + MI->getName() + ".f" + utostr(FieldNo),MI); + FieldMallocs.push_back(NMI); + new StoreInst(NMI, NGV, MI); + } + + // The tricky aspect of this transformation is handling the case when malloc + // fails. In the original code, malloc failing would set the result pointer + // of malloc to null. In this case, some mallocs could succeed and others + // could fail. As such, we emit code that looks like this: + // F0 = malloc(field0) + // F1 = malloc(field1) + // F2 = malloc(field2) + // if (F0 == 0 || F1 == 0 || F2 == 0) { + // if (F0) { free(F0); F0 = 0; } + // if (F1) { free(F1); F1 = 0; } + // if (F2) { free(F2); F2 = 0; } + // } + Value *RunningOr = 0; + for (unsigned i = 0, e = FieldMallocs.size(); i != e; ++i) { + Value *Cond = new ICmpInst(ICmpInst::ICMP_EQ, FieldMallocs[i], + Constant::getNullValue(FieldMallocs[i]->getType()), + "isnull", MI); + if (!RunningOr) + RunningOr = Cond; // First seteq + else + RunningOr = BinaryOperator::CreateOr(RunningOr, Cond, "tmp", MI); + } + + // Split the basic block at the old malloc. + BasicBlock *OrigBB = MI->getParent(); + BasicBlock *ContBB = OrigBB->splitBasicBlock(MI, "malloc_cont"); + + // Create the block to check the first condition. Put all these blocks at the + // end of the function as they are unlikely to be executed. + BasicBlock *NullPtrBlock = BasicBlock::Create("malloc_ret_null", + OrigBB->getParent()); + + // Remove the uncond branch from OrigBB to ContBB, turning it into a cond + // branch on RunningOr. + OrigBB->getTerminator()->eraseFromParent(); + BranchInst::Create(NullPtrBlock, ContBB, RunningOr, OrigBB); + + // Within the NullPtrBlock, we need to emit a comparison and branch for each + // pointer, because some may be null while others are not. + for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) { + Value *GVVal = new LoadInst(FieldGlobals[i], "tmp", NullPtrBlock); + Value *Cmp = new ICmpInst(ICmpInst::ICMP_NE, GVVal, + Constant::getNullValue(GVVal->getType()), + "tmp", NullPtrBlock); + BasicBlock *FreeBlock = BasicBlock::Create("free_it", OrigBB->getParent()); + BasicBlock *NextBlock = BasicBlock::Create("next", OrigBB->getParent()); + BranchInst::Create(FreeBlock, NextBlock, Cmp, NullPtrBlock); + + // Fill in FreeBlock. + new FreeInst(GVVal, FreeBlock); + new StoreInst(Constant::getNullValue(GVVal->getType()), FieldGlobals[i], + FreeBlock); + BranchInst::Create(NextBlock, FreeBlock); + + NullPtrBlock = NextBlock; + } + + BranchInst::Create(ContBB, NullPtrBlock); + + // MI is no longer needed, remove it. + MI->eraseFromParent(); + + /// InsertedScalarizedLoads - As we process loads, if we can't immediately + /// update all uses of the load, keep track of what scalarized loads are + /// inserted for a given load. + DenseMap > InsertedScalarizedValues; + InsertedScalarizedValues[GV] = FieldGlobals; + + std::vector > PHIsToRewrite; + + // Okay, the malloc site is completely handled. All of the uses of GV are now + // loads, and all uses of those loads are simple. Rewrite them to use loads + // of the per-field globals instead. + for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E;) { + Instruction *User = cast(*UI++); + + if (LoadInst *LI = dyn_cast(User)) { + RewriteUsesOfLoadForHeapSRoA(LI, InsertedScalarizedValues, PHIsToRewrite); + continue; + } + + // Must be a store of null. + StoreInst *SI = cast(User); + assert(isa(SI->getOperand(0)) && + "Unexpected heap-sra user!"); + + // Insert a store of null into each global. + for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) { + const PointerType *PT = cast(FieldGlobals[i]->getType()); + Constant *Null = Constant::getNullValue(PT->getElementType()); + new StoreInst(Null, FieldGlobals[i], SI); + } + // Erase the original store. + SI->eraseFromParent(); + } + + // While we have PHIs that are interesting to rewrite, do it. + while (!PHIsToRewrite.empty()) { + PHINode *PN = PHIsToRewrite.back().first; + unsigned FieldNo = PHIsToRewrite.back().second; + PHIsToRewrite.pop_back(); + PHINode *FieldPN = cast(InsertedScalarizedValues[PN][FieldNo]); + assert(FieldPN->getNumIncomingValues() == 0 &&"Already processed this phi"); + + // Add all the incoming values. This can materialize more phis. + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + Value *InVal = PN->getIncomingValue(i); + InVal = GetHeapSROAValue(InVal, FieldNo, InsertedScalarizedValues, + PHIsToRewrite); + FieldPN->addIncoming(InVal, PN->getIncomingBlock(i)); + } + } + + // Drop all inter-phi links and any loads that made it this far. + for (DenseMap >::iterator + I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end(); + I != E; ++I) { + if (PHINode *PN = dyn_cast(I->first)) + PN->dropAllReferences(); + else if (LoadInst *LI = dyn_cast(I->first)) + LI->dropAllReferences(); + } + + // Delete all the phis and loads now that inter-references are dead. + for (DenseMap >::iterator + I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end(); + I != E; ++I) { + if (PHINode *PN = dyn_cast(I->first)) + PN->eraseFromParent(); + else if (LoadInst *LI = dyn_cast(I->first)) + LI->eraseFromParent(); + } + + // The old global is now dead, remove it. + GV->eraseFromParent(); + + ++NumHeapSRA; + return cast(FieldGlobals[0]); +} + +/// TryToOptimizeStoreOfMallocToGlobal - This function is called when we see a +/// pointer global variable with a single value stored it that is a malloc or +/// cast of malloc. +static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, + MallocInst *MI, + Module::global_iterator &GVI, + TargetData &TD) { + // If this is a malloc of an abstract type, don't touch it. + if (!MI->getAllocatedType()->isSized()) + return false; + + // We can't optimize this global unless all uses of it are *known* to be + // of the malloc value, not of the null initializer value (consider a use + // that compares the global's value against zero to see if the malloc has + // been reached). To do this, we check to see if all uses of the global + // would trap if the global were null: this proves that they must all + // happen after the malloc. + if (!AllUsesOfLoadedValueWillTrapIfNull(GV)) + return false; + + // We can't optimize this if the malloc itself is used in a complex way, + // for example, being stored into multiple globals. This allows the + // malloc to be stored into the specified global, loaded setcc'd, and + // GEP'd. These are all things we could transform to using the global + // for. + { + SmallPtrSet PHIs; + if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(MI, GV, PHIs)) + return false; + } + + + // If we have a global that is only initialized with a fixed size malloc, + // transform the program to use global memory instead of malloc'd memory. + // This eliminates dynamic allocation, avoids an indirection accessing the + // data, and exposes the resultant global to further GlobalOpt. + if (ConstantInt *NElements = dyn_cast(MI->getArraySize())) { + // Restrict this transformation to only working on small allocations + // (2048 bytes currently), as we don't want to introduce a 16M global or + // something. + if (NElements->getZExtValue()* + TD.getTypeAllocSize(MI->getAllocatedType()) < 2048) { + GVI = OptimizeGlobalAddressOfMalloc(GV, MI); + return true; + } + } + + // If the allocation is an array of structures, consider transforming this + // into multiple malloc'd arrays, one for each field. This is basically + // SRoA for malloc'd memory. + const Type *AllocTy = MI->getAllocatedType(); + + // If this is an allocation of a fixed size array of structs, analyze as a + // variable size array. malloc [100 x struct],1 -> malloc struct, 100 + if (!MI->isArrayAllocation()) + if (const ArrayType *AT = dyn_cast(AllocTy)) + AllocTy = AT->getElementType(); + + if (const StructType *AllocSTy = dyn_cast(AllocTy)) { + // This the structure has an unreasonable number of fields, leave it + // alone. + if (AllocSTy->getNumElements() <= 16 && AllocSTy->getNumElements() != 0 && + AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV, MI)) { + + // If this is a fixed size array, transform the Malloc to be an alloc of + // structs. malloc [100 x struct],1 -> malloc struct, 100 + if (const ArrayType *AT = dyn_cast(MI->getAllocatedType())) { + MallocInst *NewMI = + new MallocInst(AllocSTy, + ConstantInt::get(Type::Int32Ty, AT->getNumElements()), + "", MI); + NewMI->takeName(MI); + Value *Cast = new BitCastInst(NewMI, MI->getType(), "tmp", MI); + MI->replaceAllUsesWith(Cast); + MI->eraseFromParent(); + MI = NewMI; + } + + GVI = PerformHeapAllocSRoA(GV, MI); + return true; + } + } + + return false; +} + +// OptimizeOnceStoredGlobal - Try to optimize globals based on the knowledge +// that only one value (besides its initializer) is ever stored to the global. +static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, + Module::global_iterator &GVI, + TargetData &TD) { + // Ignore no-op GEPs and bitcasts. + StoredOnceVal = StoredOnceVal->stripPointerCasts(); + + // If we are dealing with a pointer global that is initialized to null and + // only has one (non-null) value stored into it, then we can optimize any + // users of the loaded value (often calls and loads) that would trap if the + // value was null. + if (isa(GV->getInitializer()->getType()) && + GV->getInitializer()->isNullValue()) { + if (Constant *SOVC = dyn_cast(StoredOnceVal)) { + if (GV->getInitializer()->getType() != SOVC->getType()) + SOVC = ConstantExpr::getBitCast(SOVC, GV->getInitializer()->getType()); + + // Optimize away any trapping uses of the loaded value. + if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC)) + return true; + } else if (MallocInst *MI = dyn_cast(StoredOnceVal)) { + if (TryToOptimizeStoreOfMallocToGlobal(GV, MI, GVI, TD)) + return true; + } + } + + return false; +} + +/// TryToShrinkGlobalToBoolean - At this point, we have learned that the only +/// two values ever stored into GV are its initializer and OtherVal. See if we +/// can shrink the global into a boolean and select between the two values +/// whenever it is used. This exposes the values to other scalar optimizations. +static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { + const Type *GVElType = GV->getType()->getElementType(); + + // If GVElType is already i1, it is already shrunk. If the type of the GV is + // an FP value, pointer or vector, don't do this optimization because a select + // between them is very expensive and unlikely to lead to later + // simplification. In these cases, we typically end up with "cond ? v1 : v2" + // where v1 and v2 both require constant pool loads, a big loss. + if (GVElType == Type::Int1Ty || GVElType->isFloatingPoint() || + isa(GVElType) || isa(GVElType)) + return false; + + // Walk the use list of the global seeing if all the uses are load or store. + // If there is anything else, bail out. + for (Value::use_iterator I = GV->use_begin(), E = GV->use_end(); I != E; ++I) + if (!isa(I) && !isa(I)) + return false; + + DOUT << " *** SHRINKING TO BOOL: " << *GV; + + // Create the new global, initializing it to false. + GlobalVariable *NewGV = new GlobalVariable(Type::Int1Ty, false, + GlobalValue::InternalLinkage, ConstantInt::getFalse(), + GV->getName()+".b", + (Module *)NULL, + GV->isThreadLocal()); + GV->getParent()->getGlobalList().insert(GV, NewGV); + + Constant *InitVal = GV->getInitializer(); + assert(InitVal->getType() != Type::Int1Ty && "No reason to shrink to bool!"); + + // If initialized to zero and storing one into the global, we can use a cast + // instead of a select to synthesize the desired value. + bool IsOneZero = false; + if (ConstantInt *CI = dyn_cast(OtherVal)) + IsOneZero = InitVal->isNullValue() && CI->isOne(); + + while (!GV->use_empty()) { + Instruction *UI = cast(GV->use_back()); + if (StoreInst *SI = dyn_cast(UI)) { + // Change the store into a boolean store. + bool StoringOther = SI->getOperand(0) == OtherVal; + // Only do this if we weren't storing a loaded value. + Value *StoreVal; + if (StoringOther || SI->getOperand(0) == InitVal) + StoreVal = ConstantInt::get(Type::Int1Ty, StoringOther); + else { + // Otherwise, we are storing a previously loaded copy. To do this, + // change the copy from copying the original value to just copying the + // bool. + Instruction *StoredVal = cast(SI->getOperand(0)); + + // If we're already replaced the input, StoredVal will be a cast or + // select instruction. If not, it will be a load of the original + // global. + if (LoadInst *LI = dyn_cast(StoredVal)) { + assert(LI->getOperand(0) == GV && "Not a copy!"); + // Insert a new load, to preserve the saved value. + StoreVal = new LoadInst(NewGV, LI->getName()+".b", LI); + } else { + assert((isa(StoredVal) || isa(StoredVal)) && + "This is not a form that we understand!"); + StoreVal = StoredVal->getOperand(0); + assert(isa(StoreVal) && "Not a load of NewGV!"); + } + } + new StoreInst(StoreVal, NewGV, SI); + } else { + // Change the load into a load of bool then a select. + LoadInst *LI = cast(UI); + LoadInst *NLI = new LoadInst(NewGV, LI->getName()+".b", LI); + Value *NSI; + if (IsOneZero) + NSI = new ZExtInst(NLI, LI->getType(), "", LI); + else + NSI = SelectInst::Create(NLI, OtherVal, InitVal, "", LI); + NSI->takeName(LI); + LI->replaceAllUsesWith(NSI); + } + UI->eraseFromParent(); + } + + GV->eraseFromParent(); + return true; +} + + +/// ProcessInternalGlobal - Analyze the specified global variable and optimize +/// it if possible. If we make a change, return true. +bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, + Module::global_iterator &GVI) { + SmallPtrSet PHIUsers; + GlobalStatus GS; + GV->removeDeadConstantUsers(); + + if (GV->use_empty()) { + DOUT << "GLOBAL DEAD: " << *GV; + GV->eraseFromParent(); + ++NumDeleted; + return true; + } + + if (!AnalyzeGlobal(GV, GS, PHIUsers)) { +#if 0 + cerr << "Global: " << *GV; + cerr << " isLoaded = " << GS.isLoaded << "\n"; + cerr << " StoredType = "; + switch (GS.StoredType) { + case GlobalStatus::NotStored: cerr << "NEVER STORED\n"; break; + case GlobalStatus::isInitializerStored: cerr << "INIT STORED\n"; break; + case GlobalStatus::isStoredOnce: cerr << "STORED ONCE\n"; break; + case GlobalStatus::isStored: cerr << "stored\n"; break; + } + if (GS.StoredType == GlobalStatus::isStoredOnce && GS.StoredOnceValue) + cerr << " StoredOnceValue = " << *GS.StoredOnceValue << "\n"; + if (GS.AccessingFunction && !GS.HasMultipleAccessingFunctions) + cerr << " AccessingFunction = " << GS.AccessingFunction->getName() + << "\n"; + cerr << " HasMultipleAccessingFunctions = " + << GS.HasMultipleAccessingFunctions << "\n"; + cerr << " HasNonInstructionUser = " << GS.HasNonInstructionUser<<"\n"; + cerr << "\n"; +#endif + + // If this is a first class global and has only one accessing function + // and this function is main (which we know is not recursive we can make + // this global a local variable) we replace the global with a local alloca + // in this function. + // + // NOTE: It doesn't make sense to promote non single-value types since we + // are just replacing static memory to stack memory. + if (!GS.HasMultipleAccessingFunctions && + GS.AccessingFunction && !GS.HasNonInstructionUser && + GV->getType()->getElementType()->isSingleValueType() && + GS.AccessingFunction->getName() == "main" && + GS.AccessingFunction->hasExternalLinkage()) { + DOUT << "LOCALIZING GLOBAL: " << *GV; + Instruction* FirstI = GS.AccessingFunction->getEntryBlock().begin(); + const Type* ElemTy = GV->getType()->getElementType(); + // FIXME: Pass Global's alignment when globals have alignment + AllocaInst* Alloca = new AllocaInst(ElemTy, NULL, GV->getName(), FirstI); + if (!isa(GV->getInitializer())) + new StoreInst(GV->getInitializer(), Alloca, FirstI); + + GV->replaceAllUsesWith(Alloca); + GV->eraseFromParent(); + ++NumLocalized; + return true; + } + + // If the global is never loaded (but may be stored to), it is dead. + // Delete it now. + if (!GS.isLoaded) { + DOUT << "GLOBAL NEVER LOADED: " << *GV; + + // Delete any stores we can find to the global. We may not be able to + // make it completely dead though. + bool Changed = CleanupConstantGlobalUsers(GV, GV->getInitializer()); + + // If the global is dead now, delete it. + if (GV->use_empty()) { + GV->eraseFromParent(); + ++NumDeleted; + Changed = true; + } + return Changed; + + } else if (GS.StoredType <= GlobalStatus::isInitializerStored) { + DOUT << "MARKING CONSTANT: " << *GV; + GV->setConstant(true); + + // Clean up any obviously simplifiable users now. + CleanupConstantGlobalUsers(GV, GV->getInitializer()); + + // If the global is dead now, just nuke it. + if (GV->use_empty()) { + DOUT << " *** Marking constant allowed us to simplify " + << "all users and delete global!\n"; + GV->eraseFromParent(); + ++NumDeleted; + } + + ++NumMarked; + return true; + } else if (!GV->getInitializer()->getType()->isSingleValueType()) { + if (GlobalVariable *FirstNewGV = SRAGlobal(GV, + getAnalysis())) { + GVI = FirstNewGV; // Don't skip the newly produced globals! + return true; + } + } else if (GS.StoredType == GlobalStatus::isStoredOnce) { + // If the initial value for the global was an undef value, and if only + // one other value was stored into it, we can just change the + // initializer to be the stored value, then delete all stores to the + // global. This allows us to mark it constant. + if (Constant *SOVConstant = dyn_cast(GS.StoredOnceValue)) + if (isa(GV->getInitializer())) { + // Change the initial value here. + GV->setInitializer(SOVConstant); + + // Clean up any obviously simplifiable users now. + CleanupConstantGlobalUsers(GV, GV->getInitializer()); + + if (GV->use_empty()) { + DOUT << " *** Substituting initializer allowed us to " + << "simplify all users and delete global!\n"; + GV->eraseFromParent(); + ++NumDeleted; + } else { + GVI = GV; + } + ++NumSubstitute; + return true; + } + + // Try to optimize globals based on the knowledge that only one value + // (besides its initializer) is ever stored to the global. + if (OptimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GVI, + getAnalysis())) + return true; + + // Otherwise, if the global was not a boolean, we can shrink it to be a + // boolean. + if (Constant *SOVConstant = dyn_cast(GS.StoredOnceValue)) + if (TryToShrinkGlobalToBoolean(GV, SOVConstant)) { + ++NumShrunkToBool; + return true; + } + } + } + return false; +} + +/// OnlyCalledDirectly - Return true if the specified function is only called +/// directly. In other words, its address is never taken. +static bool OnlyCalledDirectly(Function *F) { + for (Value::use_iterator UI = F->use_begin(), E = F->use_end(); UI != E;++UI){ + Instruction *User = dyn_cast(*UI); + if (!User) return false; + if (!isa(User) && !isa(User)) return false; + + // See if the function address is passed as an argument. + for (User::op_iterator i = User->op_begin() + 1, e = User->op_end(); + i != e; ++i) + if (*i == F) return false; + } + return true; +} + +/// ChangeCalleesToFastCall - Walk all of the direct calls of the specified +/// function, changing them to FastCC. +static void ChangeCalleesToFastCall(Function *F) { + for (Value::use_iterator UI = F->use_begin(), E = F->use_end(); UI != E;++UI){ + CallSite User(cast(*UI)); + User.setCallingConv(CallingConv::Fast); + } +} + +static AttrListPtr StripNest(const AttrListPtr &Attrs) { + for (unsigned i = 0, e = Attrs.getNumSlots(); i != e; ++i) { + if ((Attrs.getSlot(i).Attrs & Attribute::Nest) == 0) + continue; + + // There can be only one. + return Attrs.removeAttr(Attrs.getSlot(i).Index, Attribute::Nest); + } + + return Attrs; +} + +static void RemoveNestAttribute(Function *F) { + F->setAttributes(StripNest(F->getAttributes())); + for (Value::use_iterator UI = F->use_begin(), E = F->use_end(); UI != E;++UI){ + CallSite User(cast(*UI)); + User.setAttributes(StripNest(User.getAttributes())); + } +} + +bool GlobalOpt::OptimizeFunctions(Module &M) { + bool Changed = false; + // Optimize functions. + for (Module::iterator FI = M.begin(), E = M.end(); FI != E; ) { + Function *F = FI++; + // Functions without names cannot be referenced outside this module. + if (!F->hasName() && !F->isDeclaration()) + F->setLinkage(GlobalValue::InternalLinkage); + F->removeDeadConstantUsers(); + if (F->use_empty() && (F->hasLocalLinkage() || + F->hasLinkOnceLinkage())) { + M.getFunctionList().erase(F); + Changed = true; + ++NumFnDeleted; + } else if (F->hasLocalLinkage()) { + if (F->getCallingConv() == CallingConv::C && !F->isVarArg() && + OnlyCalledDirectly(F)) { + // If this function has C calling conventions, is not a varargs + // function, and is only called directly, promote it to use the Fast + // calling convention. + F->setCallingConv(CallingConv::Fast); + ChangeCalleesToFastCall(F); + ++NumFastCallFns; + Changed = true; + } + + if (F->getAttributes().hasAttrSomewhere(Attribute::Nest) && + OnlyCalledDirectly(F)) { + // The function is not used by a trampoline intrinsic, so it is safe + // to remove the 'nest' attribute. + RemoveNestAttribute(F); + ++NumNestRemoved; + Changed = true; + } + } + } + return Changed; +} + +bool GlobalOpt::OptimizeGlobalVars(Module &M) { + bool Changed = false; + for (Module::global_iterator GVI = M.global_begin(), E = M.global_end(); + GVI != E; ) { + GlobalVariable *GV = GVI++; + // Global variables without names cannot be referenced outside this module. + if (!GV->hasName() && !GV->isDeclaration()) + GV->setLinkage(GlobalValue::InternalLinkage); + if (!GV->isConstant() && GV->hasLocalLinkage() && + GV->hasInitializer()) + Changed |= ProcessInternalGlobal(GV, GVI); + } + return Changed; +} + +/// FindGlobalCtors - Find the llvm.globalctors list, verifying that all +/// initializers have an init priority of 65535. +GlobalVariable *GlobalOpt::FindGlobalCtors(Module &M) { + for (Module::global_iterator I = M.global_begin(), E = M.global_end(); + I != E; ++I) + if (I->getName() == "llvm.global_ctors") { + // Found it, verify it's an array of { int, void()* }. + const ArrayType *ATy =dyn_cast(I->getType()->getElementType()); + if (!ATy) return 0; + const StructType *STy = dyn_cast(ATy->getElementType()); + if (!STy || STy->getNumElements() != 2 || + STy->getElementType(0) != Type::Int32Ty) return 0; + const PointerType *PFTy = dyn_cast(STy->getElementType(1)); + if (!PFTy) return 0; + const FunctionType *FTy = dyn_cast(PFTy->getElementType()); + if (!FTy || FTy->getReturnType() != Type::VoidTy || FTy->isVarArg() || + FTy->getNumParams() != 0) + return 0; + + // Verify that the initializer is simple enough for us to handle. + if (!I->hasInitializer()) return 0; + ConstantArray *CA = dyn_cast(I->getInitializer()); + if (!CA) return 0; + for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) + if (ConstantStruct *CS = dyn_cast(*i)) { + if (isa(CS->getOperand(1))) + continue; + + // Must have a function or null ptr. + if (!isa(CS->getOperand(1))) + return 0; + + // Init priority must be standard. + ConstantInt *CI = dyn_cast(CS->getOperand(0)); + if (!CI || CI->getZExtValue() != 65535) + return 0; + } else { + return 0; + } + + return I; + } + return 0; +} + +/// ParseGlobalCtors - Given a llvm.global_ctors list that we can understand, +/// return a list of the functions and null terminator as a vector. +static std::vector ParseGlobalCtors(GlobalVariable *GV) { + ConstantArray *CA = cast(GV->getInitializer()); + std::vector Result; + Result.reserve(CA->getNumOperands()); + for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) { + ConstantStruct *CS = cast(*i); + Result.push_back(dyn_cast(CS->getOperand(1))); + } + return Result; +} + +/// InstallGlobalCtors - Given a specified llvm.global_ctors list, install the +/// specified array, returning the new global to use. +static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL, + const std::vector &Ctors) { + // If we made a change, reassemble the initializer list. + std::vector CSVals; + CSVals.push_back(ConstantInt::get(Type::Int32Ty, 65535)); + CSVals.push_back(0); + + // Create the new init list. + std::vector CAList; + for (unsigned i = 0, e = Ctors.size(); i != e; ++i) { + if (Ctors[i]) { + CSVals[1] = Ctors[i]; + } else { + const Type *FTy = FunctionType::get(Type::VoidTy, + std::vector(), false); + const PointerType *PFTy = PointerType::getUnqual(FTy); + CSVals[1] = Constant::getNullValue(PFTy); + CSVals[0] = ConstantInt::get(Type::Int32Ty, 2147483647); + } + CAList.push_back(ConstantStruct::get(CSVals)); + } + + // Create the array initializer. + const Type *StructTy = + cast(GCL->getType()->getElementType())->getElementType(); + Constant *CA = ConstantArray::get(ArrayType::get(StructTy, CAList.size()), + CAList); + + // If we didn't change the number of elements, don't create a new GV. + if (CA->getType() == GCL->getInitializer()->getType()) { + GCL->setInitializer(CA); + return GCL; + } + + // Create the new global and insert it next to the existing list. + GlobalVariable *NGV = new GlobalVariable(CA->getType(), GCL->isConstant(), + GCL->getLinkage(), CA, "", + (Module *)NULL, + GCL->isThreadLocal()); + GCL->getParent()->getGlobalList().insert(GCL, NGV); + NGV->takeName(GCL); + + // Nuke the old list, replacing any uses with the new one. + if (!GCL->use_empty()) { + Constant *V = NGV; + if (V->getType() != GCL->getType()) + V = ConstantExpr::getBitCast(V, GCL->getType()); + GCL->replaceAllUsesWith(V); + } + GCL->eraseFromParent(); + + if (Ctors.size()) + return NGV; + else + return 0; +} + + +static Constant *getVal(DenseMap &ComputedValues, + Value *V) { + if (Constant *CV = dyn_cast(V)) return CV; + Constant *R = ComputedValues[V]; + assert(R && "Reference to an uncomputed value!"); + return R; +} + +/// isSimpleEnoughPointerToCommit - Return true if this constant is simple +/// enough for us to understand. In particular, if it is a cast of something, +/// we punt. We basically just support direct accesses to globals and GEP's of +/// globals. This should be kept up to date with CommitValueTo. +static bool isSimpleEnoughPointerToCommit(Constant *C) { + if (GlobalVariable *GV = dyn_cast(C)) { + if (!GV->hasExternalLinkage() && !GV->hasLocalLinkage()) + return false; // do not allow weak/linkonce/dllimport/dllexport linkage. + return !GV->isDeclaration(); // reject external globals. + } + if (ConstantExpr *CE = dyn_cast(C)) + // Handle a constantexpr gep. + if (CE->getOpcode() == Instruction::GetElementPtr && + isa(CE->getOperand(0))) { + GlobalVariable *GV = cast(CE->getOperand(0)); + if (!GV->hasExternalLinkage() && !GV->hasLocalLinkage()) + return false; // do not allow weak/linkonce/dllimport/dllexport linkage. + return GV->hasInitializer() && + ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE); + } + return false; +} + +/// EvaluateStoreInto - Evaluate a piece of a constantexpr store into a global +/// initializer. This returns 'Init' modified to reflect 'Val' stored into it. +/// At this point, the GEP operands of Addr [0, OpNo) have been stepped into. +static Constant *EvaluateStoreInto(Constant *Init, Constant *Val, + ConstantExpr *Addr, unsigned OpNo) { + // Base case of the recursion. + if (OpNo == Addr->getNumOperands()) { + assert(Val->getType() == Init->getType() && "Type mismatch!"); + return Val; + } + + if (const StructType *STy = dyn_cast(Init->getType())) { + std::vector Elts; + + // Break up the constant into its elements. + if (ConstantStruct *CS = dyn_cast(Init)) { + for (User::op_iterator i = CS->op_begin(), e = CS->op_end(); i != e; ++i) + Elts.push_back(cast(*i)); + } else if (isa(Init)) { + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) + Elts.push_back(Constant::getNullValue(STy->getElementType(i))); + } else if (isa(Init)) { + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) + Elts.push_back(UndefValue::get(STy->getElementType(i))); + } else { + assert(0 && "This code is out of sync with " + " ConstantFoldLoadThroughGEPConstantExpr"); + } + + // Replace the element that we are supposed to. + ConstantInt *CU = cast(Addr->getOperand(OpNo)); + unsigned Idx = CU->getZExtValue(); + assert(Idx < STy->getNumElements() && "Struct index out of range!"); + Elts[Idx] = EvaluateStoreInto(Elts[Idx], Val, Addr, OpNo+1); + + // Return the modified struct. + return ConstantStruct::get(&Elts[0], Elts.size(), STy->isPacked()); + } else { + ConstantInt *CI = cast(Addr->getOperand(OpNo)); + const ArrayType *ATy = cast(Init->getType()); + + // Break up the array into elements. + std::vector Elts; + if (ConstantArray *CA = dyn_cast(Init)) { + for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) + Elts.push_back(cast(*i)); + } else if (isa(Init)) { + Constant *Elt = Constant::getNullValue(ATy->getElementType()); + Elts.assign(ATy->getNumElements(), Elt); + } else if (isa(Init)) { + Constant *Elt = UndefValue::get(ATy->getElementType()); + Elts.assign(ATy->getNumElements(), Elt); + } else { + assert(0 && "This code is out of sync with " + " ConstantFoldLoadThroughGEPConstantExpr"); + } + + assert(CI->getZExtValue() < ATy->getNumElements()); + Elts[CI->getZExtValue()] = + EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1); + return ConstantArray::get(ATy, Elts); + } +} + +/// CommitValueTo - We have decided that Addr (which satisfies the predicate +/// isSimpleEnoughPointerToCommit) should get Val as its value. Make it happen. +static void CommitValueTo(Constant *Val, Constant *Addr) { + if (GlobalVariable *GV = dyn_cast(Addr)) { + assert(GV->hasInitializer()); + GV->setInitializer(Val); + return; + } + + ConstantExpr *CE = cast(Addr); + GlobalVariable *GV = cast(CE->getOperand(0)); + + Constant *Init = GV->getInitializer(); + Init = EvaluateStoreInto(Init, Val, CE, 2); + GV->setInitializer(Init); +} + +/// ComputeLoadResult - Return the value that would be computed by a load from +/// P after the stores reflected by 'memory' have been performed. If we can't +/// decide, return null. +static Constant *ComputeLoadResult(Constant *P, + const DenseMap &Memory) { + // If this memory location has been recently stored, use the stored value: it + // is the most up-to-date. + DenseMap::const_iterator I = Memory.find(P); + if (I != Memory.end()) return I->second; + + // Access it. + if (GlobalVariable *GV = dyn_cast(P)) { + if (GV->hasInitializer()) + return GV->getInitializer(); + return 0; + } + + // Handle a constantexpr getelementptr. + if (ConstantExpr *CE = dyn_cast(P)) + if (CE->getOpcode() == Instruction::GetElementPtr && + isa(CE->getOperand(0))) { + GlobalVariable *GV = cast(CE->getOperand(0)); + if (GV->hasInitializer()) + return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE); + } + + return 0; // don't know how to evaluate. +} + +/// EvaluateFunction - Evaluate a call to function F, returning true if +/// successful, false if we can't evaluate it. ActualArgs contains the formal +/// arguments for the function. +static bool EvaluateFunction(Function *F, Constant *&RetVal, + const std::vector &ActualArgs, + std::vector &CallStack, + DenseMap &MutatedMemory, + std::vector &AllocaTmps) { + // Check to see if this function is already executing (recursion). If so, + // bail out. TODO: we might want to accept limited recursion. + if (std::find(CallStack.begin(), CallStack.end(), F) != CallStack.end()) + return false; + + CallStack.push_back(F); + + /// Values - As we compute SSA register values, we store their contents here. + DenseMap Values; + + // Initialize arguments to the incoming values specified. + unsigned ArgNo = 0; + for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); AI != E; + ++AI, ++ArgNo) + Values[AI] = ActualArgs[ArgNo]; + + /// ExecutedBlocks - We only handle non-looping, non-recursive code. As such, + /// we can only evaluate any one basic block at most once. This set keeps + /// track of what we have executed so we can detect recursive cases etc. + SmallPtrSet ExecutedBlocks; + + // CurInst - The current instruction we're evaluating. + BasicBlock::iterator CurInst = F->begin()->begin(); + + // This is the main evaluation loop. + while (1) { + Constant *InstResult = 0; + + if (StoreInst *SI = dyn_cast(CurInst)) { + if (SI->isVolatile()) return false; // no volatile accesses. + Constant *Ptr = getVal(Values, SI->getOperand(1)); + if (!isSimpleEnoughPointerToCommit(Ptr)) + // If this is too complex for us to commit, reject it. + return false; + Constant *Val = getVal(Values, SI->getOperand(0)); + MutatedMemory[Ptr] = Val; + } else if (BinaryOperator *BO = dyn_cast(CurInst)) { + InstResult = ConstantExpr::get(BO->getOpcode(), + getVal(Values, BO->getOperand(0)), + getVal(Values, BO->getOperand(1))); + } else if (CmpInst *CI = dyn_cast(CurInst)) { + InstResult = ConstantExpr::getCompare(CI->getPredicate(), + getVal(Values, CI->getOperand(0)), + getVal(Values, CI->getOperand(1))); + } else if (CastInst *CI = dyn_cast(CurInst)) { + InstResult = ConstantExpr::getCast(CI->getOpcode(), + getVal(Values, CI->getOperand(0)), + CI->getType()); + } else if (SelectInst *SI = dyn_cast(CurInst)) { + InstResult = ConstantExpr::getSelect(getVal(Values, SI->getOperand(0)), + getVal(Values, SI->getOperand(1)), + getVal(Values, SI->getOperand(2))); + } else if (GetElementPtrInst *GEP = dyn_cast(CurInst)) { + Constant *P = getVal(Values, GEP->getOperand(0)); + SmallVector GEPOps; + for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end(); + i != e; ++i) + GEPOps.push_back(getVal(Values, *i)); + InstResult = ConstantExpr::getGetElementPtr(P, &GEPOps[0], GEPOps.size()); + } else if (LoadInst *LI = dyn_cast(CurInst)) { + if (LI->isVolatile()) return false; // no volatile accesses. + InstResult = ComputeLoadResult(getVal(Values, LI->getOperand(0)), + MutatedMemory); + if (InstResult == 0) return false; // Could not evaluate load. + } else if (AllocaInst *AI = dyn_cast(CurInst)) { + if (AI->isArrayAllocation()) return false; // Cannot handle array allocs. + const Type *Ty = AI->getType()->getElementType(); + AllocaTmps.push_back(new GlobalVariable(Ty, false, + GlobalValue::InternalLinkage, + UndefValue::get(Ty), + AI->getName())); + InstResult = AllocaTmps.back(); + } else if (CallInst *CI = dyn_cast(CurInst)) { + + // Debug info can safely be ignored here. + if (isa(CI)) { + ++CurInst; + continue; + } + + // Cannot handle inline asm. + if (isa(CI->getOperand(0))) return false; + + // Resolve function pointers. + Function *Callee = dyn_cast(getVal(Values, CI->getOperand(0))); + if (!Callee) return false; // Cannot resolve. + + std::vector Formals; + for (User::op_iterator i = CI->op_begin() + 1, e = CI->op_end(); + i != e; ++i) + Formals.push_back(getVal(Values, *i)); + + if (Callee->isDeclaration()) { + // If this is a function we can constant fold, do it. + if (Constant *C = ConstantFoldCall(Callee, &Formals[0], + Formals.size())) { + InstResult = C; + } else { + return false; + } + } else { + if (Callee->getFunctionType()->isVarArg()) + return false; + + Constant *RetVal; + // Execute the call, if successful, use the return value. + if (!EvaluateFunction(Callee, RetVal, Formals, CallStack, + MutatedMemory, AllocaTmps)) + return false; + InstResult = RetVal; + } + } else if (isa(CurInst)) { + BasicBlock *NewBB = 0; + if (BranchInst *BI = dyn_cast(CurInst)) { + if (BI->isUnconditional()) { + NewBB = BI->getSuccessor(0); + } else { + ConstantInt *Cond = + dyn_cast(getVal(Values, BI->getCondition())); + if (!Cond) return false; // Cannot determine. + + NewBB = BI->getSuccessor(!Cond->getZExtValue()); + } + } else if (SwitchInst *SI = dyn_cast(CurInst)) { + ConstantInt *Val = + dyn_cast(getVal(Values, SI->getCondition())); + if (!Val) return false; // Cannot determine. + NewBB = SI->getSuccessor(SI->findCaseValue(Val)); + } else if (ReturnInst *RI = dyn_cast(CurInst)) { + if (RI->getNumOperands()) + RetVal = getVal(Values, RI->getOperand(0)); + + CallStack.pop_back(); // return from fn. + return true; // We succeeded at evaluating this ctor! + } else { + // invoke, unwind, unreachable. + return false; // Cannot handle this terminator. + } + + // Okay, we succeeded in evaluating this control flow. See if we have + // executed the new block before. If so, we have a looping function, + // which we cannot evaluate in reasonable time. + if (!ExecutedBlocks.insert(NewBB)) + return false; // looped! + + // Okay, we have never been in this block before. Check to see if there + // are any PHI nodes. If so, evaluate them with information about where + // we came from. + BasicBlock *OldBB = CurInst->getParent(); + CurInst = NewBB->begin(); + PHINode *PN; + for (; (PN = dyn_cast(CurInst)); ++CurInst) + Values[PN] = getVal(Values, PN->getIncomingValueForBlock(OldBB)); + + // Do NOT increment CurInst. We know that the terminator had no value. + continue; + } else { + // Did not know how to evaluate this! + return false; + } + + if (!CurInst->use_empty()) + Values[CurInst] = InstResult; + + // Advance program counter. + ++CurInst; + } +} + +/// EvaluateStaticConstructor - Evaluate static constructors in the function, if +/// we can. Return true if we can, false otherwise. +static bool EvaluateStaticConstructor(Function *F) { + /// MutatedMemory - For each store we execute, we update this map. Loads + /// check this to get the most up-to-date value. If evaluation is successful, + /// this state is committed to the process. + DenseMap MutatedMemory; + + /// AllocaTmps - To 'execute' an alloca, we create a temporary global variable + /// to represent its body. This vector is needed so we can delete the + /// temporary globals when we are done. + std::vector AllocaTmps; + + /// CallStack - This is used to detect recursion. In pathological situations + /// we could hit exponential behavior, but at least there is nothing + /// unbounded. + std::vector CallStack; + + // Call the function. + Constant *RetValDummy; + bool EvalSuccess = EvaluateFunction(F, RetValDummy, std::vector(), + CallStack, MutatedMemory, AllocaTmps); + if (EvalSuccess) { + // We succeeded at evaluation: commit the result. + DOUT << "FULLY EVALUATED GLOBAL CTOR FUNCTION '" + << F->getName() << "' to " << MutatedMemory.size() + << " stores.\n"; + for (DenseMap::iterator I = MutatedMemory.begin(), + E = MutatedMemory.end(); I != E; ++I) + CommitValueTo(I->second, I->first); + } + + // At this point, we are done interpreting. If we created any 'alloca' + // temporaries, release them now. + while (!AllocaTmps.empty()) { + GlobalVariable *Tmp = AllocaTmps.back(); + AllocaTmps.pop_back(); + + // If there are still users of the alloca, the program is doing something + // silly, e.g. storing the address of the alloca somewhere and using it + // later. Since this is undefined, we'll just make it be null. + if (!Tmp->use_empty()) + Tmp->replaceAllUsesWith(Constant::getNullValue(Tmp->getType())); + delete Tmp; + } + + return EvalSuccess; +} + + + +/// OptimizeGlobalCtorsList - Simplify and evaluation global ctors if possible. +/// Return true if anything changed. +bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) { + std::vector Ctors = ParseGlobalCtors(GCL); + bool MadeChange = false; + if (Ctors.empty()) return false; + + // Loop over global ctors, optimizing them when we can. + for (unsigned i = 0; i != Ctors.size(); ++i) { + Function *F = Ctors[i]; + // Found a null terminator in the middle of the list, prune off the rest of + // the list. + if (F == 0) { + if (i != Ctors.size()-1) { + Ctors.resize(i+1); + MadeChange = true; + } + break; + } + + // We cannot simplify external ctor functions. + if (F->empty()) continue; + + // If we can evaluate the ctor at compile time, do. + if (EvaluateStaticConstructor(F)) { + Ctors.erase(Ctors.begin()+i); + MadeChange = true; + --i; + ++NumCtorsEvaluated; + continue; + } + } + + if (!MadeChange) return false; + + GCL = InstallGlobalCtors(GCL, Ctors); + return true; +} + +bool GlobalOpt::OptimizeGlobalAliases(Module &M) { + bool Changed = false; + + for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end(); + I != E;) { + Module::alias_iterator J = I++; + // Aliases without names cannot be referenced outside this module. + if (!J->hasName() && !J->isDeclaration()) + J->setLinkage(GlobalValue::InternalLinkage); + // If the aliasee may change at link time, nothing can be done - bail out. + if (J->mayBeOverridden()) + continue; + + Constant *Aliasee = J->getAliasee(); + GlobalValue *Target = cast(Aliasee->stripPointerCasts()); + Target->removeDeadConstantUsers(); + bool hasOneUse = Target->hasOneUse() && Aliasee->hasOneUse(); + + // Make all users of the alias use the aliasee instead. + if (!J->use_empty()) { + J->replaceAllUsesWith(Aliasee); + ++NumAliasesResolved; + Changed = true; + } + + // If the aliasee has internal linkage, give it the name and linkage + // of the alias, and delete the alias. This turns: + // define internal ... @f(...) + // @a = alias ... @f + // into: + // define ... @a(...) + if (!Target->hasLocalLinkage()) + continue; + + // The transform is only useful if the alias does not have internal linkage. + if (J->hasLocalLinkage()) + continue; + + // Do not perform the transform if multiple aliases potentially target the + // aliasee. This check also ensures that it is safe to replace the section + // and other attributes of the aliasee with those of the alias. + if (!hasOneUse) + continue; + + // Give the aliasee the name, linkage and other attributes of the alias. + Target->takeName(J); + Target->setLinkage(J->getLinkage()); + Target->GlobalValue::copyAttributesFrom(J); + + // Delete the alias. + M.getAliasList().erase(J); + ++NumAliasesRemoved; + Changed = true; + } + + return Changed; +} + +bool GlobalOpt::runOnModule(Module &M) { + bool Changed = false; + + // Try to find the llvm.globalctors list. + GlobalVariable *GlobalCtors = FindGlobalCtors(M); + + bool LocalChange = true; + while (LocalChange) { + LocalChange = false; + + // Delete functions that are trivially dead, ccc -> fastcc + LocalChange |= OptimizeFunctions(M); + + // Optimize global_ctors list. + if (GlobalCtors) + LocalChange |= OptimizeGlobalCtorsList(GlobalCtors); + + // Optimize non-address-taken globals. + LocalChange |= OptimizeGlobalVars(M); + + // Resolve aliases, when possible. + LocalChange |= OptimizeGlobalAliases(M); + Changed |= LocalChange; + } + + // TODO: Move all global ctors functions to the end of the module for code + // layout. + + return Changed; +} diff --git a/lib/Transforms/IPO/IPConstantPropagation.cpp b/lib/Transforms/IPO/IPConstantPropagation.cpp new file mode 100644 index 000000000000..2dc855824691 --- /dev/null +++ b/lib/Transforms/IPO/IPConstantPropagation.cpp @@ -0,0 +1,277 @@ +//===-- IPConstantPropagation.cpp - Propagate constants through calls -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass implements an _extremely_ simple interprocedural constant +// propagation pass. It could certainly be improved in many different ways, +// like using a worklist. This pass makes arguments dead, but does not remove +// them. The existing dead argument elimination pass should be run after this +// to clean up the mess. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "ipconstprop" +#include "llvm/Transforms/IPO.h" +#include "llvm/Constants.h" +#include "llvm/Instructions.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/Compiler.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/SmallVector.h" +using namespace llvm; + +STATISTIC(NumArgumentsProped, "Number of args turned into constants"); +STATISTIC(NumReturnValProped, "Number of return values turned into constants"); + +namespace { + /// IPCP - The interprocedural constant propagation pass + /// + struct VISIBILITY_HIDDEN IPCP : public ModulePass { + static char ID; // Pass identification, replacement for typeid + IPCP() : ModulePass(&ID) {} + + bool runOnModule(Module &M); + private: + bool PropagateConstantsIntoArguments(Function &F); + bool PropagateConstantReturn(Function &F); + }; +} + +char IPCP::ID = 0; +static RegisterPass +X("ipconstprop", "Interprocedural constant propagation"); + +ModulePass *llvm::createIPConstantPropagationPass() { return new IPCP(); } + +bool IPCP::runOnModule(Module &M) { + bool Changed = false; + bool LocalChange = true; + + // FIXME: instead of using smart algorithms, we just iterate until we stop + // making changes. + while (LocalChange) { + LocalChange = false; + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) + if (!I->isDeclaration()) { + // Delete any klingons. + I->removeDeadConstantUsers(); + if (I->hasLocalLinkage()) + LocalChange |= PropagateConstantsIntoArguments(*I); + Changed |= PropagateConstantReturn(*I); + } + Changed |= LocalChange; + } + return Changed; +} + +/// PropagateConstantsIntoArguments - Look at all uses of the specified +/// function. If all uses are direct call sites, and all pass a particular +/// constant in for an argument, propagate that constant in as the argument. +/// +bool IPCP::PropagateConstantsIntoArguments(Function &F) { + if (F.arg_empty() || F.use_empty()) return false; // No arguments? Early exit. + + // For each argument, keep track of its constant value and whether it is a + // constant or not. The bool is driven to true when found to be non-constant. + SmallVector, 16> ArgumentConstants; + ArgumentConstants.resize(F.arg_size()); + + unsigned NumNonconstant = 0; + for (Value::use_iterator UI = F.use_begin(), E = F.use_end(); UI != E; ++UI) { + // Used by a non-instruction, or not the callee of a function, do not + // transform. + if (!isa(*UI) && !isa(*UI)) + return false; + + CallSite CS = CallSite::get(cast(*UI)); + if (!CS.isCallee(UI)) + return false; + + // Check out all of the potentially constant arguments. Note that we don't + // inspect varargs here. + CallSite::arg_iterator AI = CS.arg_begin(); + Function::arg_iterator Arg = F.arg_begin(); + for (unsigned i = 0, e = ArgumentConstants.size(); i != e; + ++i, ++AI, ++Arg) { + + // If this argument is known non-constant, ignore it. + if (ArgumentConstants[i].second) + continue; + + Constant *C = dyn_cast(*AI); + if (C && ArgumentConstants[i].first == 0) { + ArgumentConstants[i].first = C; // First constant seen. + } else if (C && ArgumentConstants[i].first == C) { + // Still the constant value we think it is. + } else if (*AI == &*Arg) { + // Ignore recursive calls passing argument down. + } else { + // Argument became non-constant. If all arguments are non-constant now, + // give up on this function. + if (++NumNonconstant == ArgumentConstants.size()) + return false; + ArgumentConstants[i].second = true; + } + } + } + + // If we got to this point, there is a constant argument! + assert(NumNonconstant != ArgumentConstants.size()); + bool MadeChange = false; + Function::arg_iterator AI = F.arg_begin(); + for (unsigned i = 0, e = ArgumentConstants.size(); i != e; ++i, ++AI) { + // Do we have a constant argument? + if (ArgumentConstants[i].second || AI->use_empty()) + continue; + + Value *V = ArgumentConstants[i].first; + if (V == 0) V = UndefValue::get(AI->getType()); + AI->replaceAllUsesWith(V); + ++NumArgumentsProped; + MadeChange = true; + } + return MadeChange; +} + + +// Check to see if this function returns one or more constants. If so, replace +// all callers that use those return values with the constant value. This will +// leave in the actual return values and instructions, but deadargelim will +// clean that up. +// +// Additionally if a function always returns one of its arguments directly, +// callers will be updated to use the value they pass in directly instead of +// using the return value. +bool IPCP::PropagateConstantReturn(Function &F) { + if (F.getReturnType() == Type::VoidTy) + return false; // No return value. + + // If this function could be overridden later in the link stage, we can't + // propagate information about its results into callers. + if (F.mayBeOverridden()) + return false; + + // Check to see if this function returns a constant. + SmallVector RetVals; + const StructType *STy = dyn_cast(F.getReturnType()); + if (STy) + for (unsigned i = 0, e = STy->getNumElements(); i < e; ++i) + RetVals.push_back(UndefValue::get(STy->getElementType(i))); + else + RetVals.push_back(UndefValue::get(F.getReturnType())); + + unsigned NumNonConstant = 0; + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) + if (ReturnInst *RI = dyn_cast(BB->getTerminator())) { + for (unsigned i = 0, e = RetVals.size(); i != e; ++i) { + // Already found conflicting return values? + Value *RV = RetVals[i]; + if (!RV) + continue; + + // Find the returned value + Value *V; + if (!STy) + V = RI->getOperand(i); + else + V = FindInsertedValue(RI->getOperand(0), i); + + if (V) { + // Ignore undefs, we can change them into anything + if (isa(V)) + continue; + + // Try to see if all the rets return the same constant or argument. + if (isa(V) || isa(V)) { + if (isa(RV)) { + // No value found yet? Try the current one. + RetVals[i] = V; + continue; + } + // Returning the same value? Good. + if (RV == V) + continue; + } + } + // Different or no known return value? Don't propagate this return + // value. + RetVals[i] = 0; + // All values non constant? Stop looking. + if (++NumNonConstant == RetVals.size()) + return false; + } + } + + // If we got here, the function returns at least one constant value. Loop + // over all users, replacing any uses of the return value with the returned + // constant. + bool MadeChange = false; + for (Value::use_iterator UI = F.use_begin(), E = F.use_end(); UI != E; ++UI) { + CallSite CS = CallSite::get(*UI); + Instruction* Call = CS.getInstruction(); + + // Not a call instruction or a call instruction that's not calling F + // directly? + if (!Call || !CS.isCallee(UI)) + continue; + + // Call result not used? + if (Call->use_empty()) + continue; + + MadeChange = true; + + if (STy == 0) { + Value* New = RetVals[0]; + if (Argument *A = dyn_cast(New)) + // Was an argument returned? Then find the corresponding argument in + // the call instruction and use that. + New = CS.getArgument(A->getArgNo()); + Call->replaceAllUsesWith(New); + continue; + } + + for (Value::use_iterator I = Call->use_begin(), E = Call->use_end(); + I != E;) { + Instruction *Ins = dyn_cast(*I); + + // Increment now, so we can remove the use + ++I; + + // Not an instruction? Ignore + if (!Ins) + continue; + + // Find the index of the retval to replace with + int index = -1; + if (ExtractValueInst *EV = dyn_cast(Ins)) + if (EV->hasIndices()) + index = *EV->idx_begin(); + + // If this use uses a specific return value, and we have a replacement, + // replace it. + if (index != -1) { + Value *New = RetVals[index]; + if (New) { + if (Argument *A = dyn_cast(New)) + // Was an argument returned? Then find the corresponding argument in + // the call instruction and use that. + New = CS.getArgument(A->getArgNo()); + Ins->replaceAllUsesWith(New); + Ins->eraseFromParent(); + } + } + } + } + + if (MadeChange) ++NumReturnValProped; + return MadeChange; +} diff --git a/lib/Transforms/IPO/IPO.cpp b/lib/Transforms/IPO/IPO.cpp new file mode 100644 index 000000000000..43066076ca68 --- /dev/null +++ b/lib/Transforms/IPO/IPO.cpp @@ -0,0 +1,75 @@ +//===-- Scalar.cpp --------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the C bindings for libLLVMIPO.a, which implements +// several transformations over the LLVM intermediate representation. +// +//===----------------------------------------------------------------------===// + +#include "llvm-c/Transforms/IPO.h" +#include "llvm/PassManager.h" +#include "llvm/Transforms/IPO.h" + +using namespace llvm; + +void LLVMAddArgumentPromotionPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createArgumentPromotionPass()); +} + +void LLVMAddConstantMergePass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createConstantMergePass()); +} + +void LLVMAddDeadArgEliminationPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createDeadArgEliminationPass()); +} + +void LLVMAddDeadTypeEliminationPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createDeadTypeEliminationPass()); +} + +void LLVMAddFunctionAttrsPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createFunctionAttrsPass()); +} + +void LLVMAddFunctionInliningPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createFunctionInliningPass()); +} + +void LLVMAddGlobalDCEPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createGlobalDCEPass()); +} + +void LLVMAddGlobalOptimizerPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createGlobalOptimizerPass()); +} + +void LLVMAddIPConstantPropagationPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createIPConstantPropagationPass()); +} + +void LLVMAddLowerSetJmpPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createLowerSetJmpPass()); +} + +void LLVMAddPruneEHPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createPruneEHPass()); +} + +void LLVMAddRaiseAllocationsPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createRaiseAllocationsPass()); +} + +void LLVMAddStripDeadPrototypesPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createStripDeadPrototypesPass()); +} + +void LLVMAddStripSymbolsPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createStripSymbolsPass()); +} diff --git a/lib/Transforms/IPO/IndMemRemoval.cpp b/lib/Transforms/IPO/IndMemRemoval.cpp new file mode 100644 index 000000000000..b55dea2c759c --- /dev/null +++ b/lib/Transforms/IPO/IndMemRemoval.cpp @@ -0,0 +1,89 @@ +//===-- IndMemRemoval.cpp - Remove indirect allocations and frees ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass finds places where memory allocation functions may escape into +// indirect land. Some transforms are much easier (aka possible) only if free +// or malloc are not called indirectly. +// Thus find places where the address of memory functions are taken and construct +// bounce functions with direct calls of those functions. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "indmemrem" +#include "llvm/Transforms/IPO.h" +#include "llvm/Pass.h" +#include "llvm/Module.h" +#include "llvm/Instructions.h" +#include "llvm/Type.h" +#include "llvm/DerivedTypes.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/Compiler.h" +using namespace llvm; + +STATISTIC(NumBounceSites, "Number of sites modified"); +STATISTIC(NumBounce , "Number of bounce functions created"); + +namespace { + class VISIBILITY_HIDDEN IndMemRemPass : public ModulePass { + public: + static char ID; // Pass identification, replacement for typeid + IndMemRemPass() : ModulePass(&ID) {} + + virtual bool runOnModule(Module &M); + }; +} // end anonymous namespace + +char IndMemRemPass::ID = 0; +static RegisterPass +X("indmemrem","Indirect Malloc and Free Removal"); + +bool IndMemRemPass::runOnModule(Module &M) { + // In theory, all direct calls of malloc and free should be promoted + // to intrinsics. Therefore, this goes through and finds where the + // address of free or malloc are taken and replaces those with bounce + // functions, ensuring that all malloc and free that might happen + // happen through intrinsics. + bool changed = false; + if (Function* F = M.getFunction("free")) { + if (F->isDeclaration() && F->arg_size() == 1 && !F->use_empty()) { + Function* FN = Function::Create(F->getFunctionType(), + GlobalValue::LinkOnceAnyLinkage, + "free_llvm_bounce", &M); + BasicBlock* bb = BasicBlock::Create("entry",FN); + Instruction* R = ReturnInst::Create(bb); + new FreeInst(FN->arg_begin(), R); + ++NumBounce; + NumBounceSites += F->getNumUses(); + F->replaceAllUsesWith(FN); + changed = true; + } + } + if (Function* F = M.getFunction("malloc")) { + if (F->isDeclaration() && F->arg_size() == 1 && !F->use_empty()) { + Function* FN = Function::Create(F->getFunctionType(), + GlobalValue::LinkOnceAnyLinkage, + "malloc_llvm_bounce", &M); + FN->setDoesNotAlias(0); + BasicBlock* bb = BasicBlock::Create("entry",FN); + Instruction* c = CastInst::CreateIntegerCast( + FN->arg_begin(), Type::Int32Ty, false, "c", bb); + Instruction* a = new MallocInst(Type::Int8Ty, c, "m", bb); + ReturnInst::Create(a, bb); + ++NumBounce; + NumBounceSites += F->getNumUses(); + F->replaceAllUsesWith(FN); + changed = true; + } + } + return changed; +} + +ModulePass *llvm::createIndMemRemPass() { + return new IndMemRemPass(); +} diff --git a/lib/Transforms/IPO/InlineAlways.cpp b/lib/Transforms/IPO/InlineAlways.cpp new file mode 100644 index 000000000000..5f9ea5453c1f --- /dev/null +++ b/lib/Transforms/IPO/InlineAlways.cpp @@ -0,0 +1,75 @@ +//===- InlineAlways.cpp - Code to inline always_inline functions ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a custom inliner that handles only functions that +// are marked as "always inline". +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "inline" +#include "llvm/CallingConv.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Module.h" +#include "llvm/Type.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/IPO/InlinerPass.h" +#include "llvm/Transforms/Utils/InlineCost.h" +#include "llvm/ADT/SmallPtrSet.h" + +using namespace llvm; + +namespace { + + // AlwaysInliner only inlines functions that are mark as "always inline". + class VISIBILITY_HIDDEN AlwaysInliner : public Inliner { + // Functions that are never inlined + SmallPtrSet NeverInline; + InlineCostAnalyzer CA; + public: + // Use extremely low threshold. + AlwaysInliner() : Inliner(&ID, -2000000000) {} + static char ID; // Pass identification, replacement for typeid + InlineCost getInlineCost(CallSite CS) { + return CA.getInlineCost(CS, NeverInline); + } + float getInlineFudgeFactor(CallSite CS) { + return CA.getInlineFudgeFactor(CS); + } + void resetCachedCostInfo(Function *Caller) { + return CA.resetCachedCostInfo(Caller); + } + virtual bool doFinalization(CallGraph &CG) { + return removeDeadFunctions(CG, &NeverInline); + } + virtual bool doInitialization(CallGraph &CG); + }; +} + +char AlwaysInliner::ID = 0; +static RegisterPass +X("always-inline", "Inliner for always_inline functions"); + +Pass *llvm::createAlwaysInlinerPass() { return new AlwaysInliner(); } + +// doInitialization - Initializes the vector of functions that have not +// been annotated with the "always inline" attribute. +bool AlwaysInliner::doInitialization(CallGraph &CG) { + Module &M = CG.getModule(); + + for (Module::iterator I = M.begin(), E = M.end(); + I != E; ++I) + if (!I->isDeclaration() && !I->hasFnAttr(Attribute::AlwaysInline)) + NeverInline.insert(I); + + return false; +} diff --git a/lib/Transforms/IPO/InlineSimple.cpp b/lib/Transforms/IPO/InlineSimple.cpp new file mode 100644 index 000000000000..e107a0023ce6 --- /dev/null +++ b/lib/Transforms/IPO/InlineSimple.cpp @@ -0,0 +1,106 @@ +//===- InlineSimple.cpp - Code to perform simple function inlining --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements bottom-up inlining of functions into callees. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "inline" +#include "llvm/CallingConv.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Module.h" +#include "llvm/Type.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/IPO/InlinerPass.h" +#include "llvm/Transforms/Utils/InlineCost.h" +#include "llvm/ADT/SmallPtrSet.h" + +using namespace llvm; + +namespace { + + class VISIBILITY_HIDDEN SimpleInliner : public Inliner { + // Functions that are never inlined + SmallPtrSet NeverInline; + InlineCostAnalyzer CA; + public: + SimpleInliner() : Inliner(&ID) {} + SimpleInliner(int Threshold) : Inliner(&ID, Threshold) {} + static char ID; // Pass identification, replacement for typeid + InlineCost getInlineCost(CallSite CS) { + return CA.getInlineCost(CS, NeverInline); + } + float getInlineFudgeFactor(CallSite CS) { + return CA.getInlineFudgeFactor(CS); + } + void resetCachedCostInfo(Function *Caller) { + CA.resetCachedCostInfo(Caller); + } + virtual bool doInitialization(CallGraph &CG); + }; +} + +char SimpleInliner::ID = 0; +static RegisterPass +X("inline", "Function Integration/Inlining"); + +Pass *llvm::createFunctionInliningPass() { return new SimpleInliner(); } + +Pass *llvm::createFunctionInliningPass(int Threshold) { + return new SimpleInliner(Threshold); +} + +// doInitialization - Initializes the vector of functions that have been +// annotated with the noinline attribute. +bool SimpleInliner::doInitialization(CallGraph &CG) { + + Module &M = CG.getModule(); + + for (Module::iterator I = M.begin(), E = M.end(); + I != E; ++I) + if (!I->isDeclaration() && I->hasFnAttr(Attribute::NoInline)) + NeverInline.insert(I); + + // Get llvm.noinline + GlobalVariable *GV = M.getNamedGlobal("llvm.noinline"); + + if (GV == 0) + return false; + + // Don't crash on invalid code + if (!GV->hasInitializer()) + return false; + + const ConstantArray *InitList = dyn_cast(GV->getInitializer()); + + if (InitList == 0) + return false; + + // Iterate over each element and add to the NeverInline set + for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) { + + // Get Source + const Constant *Elt = InitList->getOperand(i); + + if (const ConstantExpr *CE = dyn_cast(Elt)) + if (CE->getOpcode() == Instruction::BitCast) + Elt = CE->getOperand(0); + + // Insert into set of functions to never inline + if (const Function *F = dyn_cast(Elt)) + NeverInline.insert(F); + } + + return false; +} + diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp new file mode 100644 index 000000000000..b382837289bd --- /dev/null +++ b/lib/Transforms/IPO/Inliner.cpp @@ -0,0 +1,278 @@ +//===- Inliner.cpp - Code common to all inliners --------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the mechanics required to implement inlining without +// missing any calls and updating the call graph. The decisions of which calls +// are profitable to inline are implemented elsewhere. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "inline" +#include "llvm/Module.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Transforms/IPO/InlinerPass.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/Statistic.h" +#include +using namespace llvm; + +STATISTIC(NumInlined, "Number of functions inlined"); +STATISTIC(NumDeleted, "Number of functions deleted because all callers found"); + +static cl::opt +InlineLimit("inline-threshold", cl::Hidden, cl::init(200), + cl::desc("Control the amount of inlining to perform (default = 200)")); + +Inliner::Inliner(void *ID) + : CallGraphSCCPass(ID), InlineThreshold(InlineLimit) {} + +Inliner::Inliner(void *ID, int Threshold) + : CallGraphSCCPass(ID), InlineThreshold(Threshold) {} + +/// getAnalysisUsage - For this class, we declare that we require and preserve +/// the call graph. If the derived class implements this method, it should +/// always explicitly call the implementation here. +void Inliner::getAnalysisUsage(AnalysisUsage &Info) const { + Info.addRequired(); + CallGraphSCCPass::getAnalysisUsage(Info); +} + +// InlineCallIfPossible - If it is possible to inline the specified call site, +// do so and update the CallGraph for this operation. +bool Inliner::InlineCallIfPossible(CallSite CS, CallGraph &CG, + const SmallPtrSet &SCCFunctions, + const TargetData &TD) { + Function *Callee = CS.getCalledFunction(); + Function *Caller = CS.getCaller(); + + if (!InlineFunction(CS, &CG, &TD)) return false; + + // If the inlined function had a higher stack protection level than the + // calling function, then bump up the caller's stack protection level. + if (Callee->hasFnAttr(Attribute::StackProtectReq)) + Caller->addFnAttr(Attribute::StackProtectReq); + else if (Callee->hasFnAttr(Attribute::StackProtect) && + !Caller->hasFnAttr(Attribute::StackProtectReq)) + Caller->addFnAttr(Attribute::StackProtect); + + // If we inlined the last possible call site to the function, delete the + // function body now. + if (Callee->use_empty() && (Callee->hasLocalLinkage() || + Callee->hasAvailableExternallyLinkage()) && + !SCCFunctions.count(Callee)) { + DOUT << " -> Deleting dead function: " << Callee->getName() << "\n"; + CallGraphNode *CalleeNode = CG[Callee]; + + // Remove any call graph edges from the callee to its callees. + CalleeNode->removeAllCalledFunctions(); + + resetCachedCostInfo(CalleeNode->getFunction()); + + // Removing the node for callee from the call graph and delete it. + delete CG.removeFunctionFromModule(CalleeNode); + ++NumDeleted; + } + return true; +} + +/// shouldInline - Return true if the inliner should attempt to inline +/// at the given CallSite. +bool Inliner::shouldInline(CallSite CS) { + InlineCost IC = getInlineCost(CS); + float FudgeFactor = getInlineFudgeFactor(CS); + + if (IC.isAlways()) { + DOUT << " Inlining: cost=always" + << ", Call: " << *CS.getInstruction(); + return true; + } + + if (IC.isNever()) { + DOUT << " NOT Inlining: cost=never" + << ", Call: " << *CS.getInstruction(); + return false; + } + + int Cost = IC.getValue(); + int CurrentThreshold = InlineThreshold; + Function *Fn = CS.getCaller(); + if (Fn && !Fn->isDeclaration() + && Fn->hasFnAttr(Attribute::OptimizeForSize) + && InlineThreshold != 50) { + CurrentThreshold = 50; + } + + if (Cost >= (int)(CurrentThreshold * FudgeFactor)) { + DOUT << " NOT Inlining: cost=" << Cost + << ", Call: " << *CS.getInstruction(); + return false; + } else { + DOUT << " Inlining: cost=" << Cost + << ", Call: " << *CS.getInstruction(); + return true; + } +} + +bool Inliner::runOnSCC(const std::vector &SCC) { + CallGraph &CG = getAnalysis(); + TargetData &TD = getAnalysis(); + + SmallPtrSet SCCFunctions; + DOUT << "Inliner visiting SCC:"; + for (unsigned i = 0, e = SCC.size(); i != e; ++i) { + Function *F = SCC[i]->getFunction(); + if (F) SCCFunctions.insert(F); + DOUT << " " << (F ? F->getName() : "INDIRECTNODE"); + } + + // Scan through and identify all call sites ahead of time so that we only + // inline call sites in the original functions, not call sites that result + // from inlining other functions. + std::vector CallSites; + + for (unsigned i = 0, e = SCC.size(); i != e; ++i) + if (Function *F = SCC[i]->getFunction()) + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) + for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) { + CallSite CS = CallSite::get(I); + if (CS.getInstruction() && !isa(I) && + (!CS.getCalledFunction() || + !CS.getCalledFunction()->isDeclaration())) + CallSites.push_back(CS); + } + + DOUT << ": " << CallSites.size() << " call sites.\n"; + + // Now that we have all of the call sites, move the ones to functions in the + // current SCC to the end of the list. + unsigned FirstCallInSCC = CallSites.size(); + for (unsigned i = 0; i < FirstCallInSCC; ++i) + if (Function *F = CallSites[i].getCalledFunction()) + if (SCCFunctions.count(F)) + std::swap(CallSites[i--], CallSites[--FirstCallInSCC]); + + // Now that we have all of the call sites, loop over them and inline them if + // it looks profitable to do so. + bool Changed = false; + bool LocalChange; + do { + LocalChange = false; + // Iterate over the outer loop because inlining functions can cause indirect + // calls to become direct calls. + for (unsigned CSi = 0; CSi != CallSites.size(); ++CSi) + if (Function *Callee = CallSites[CSi].getCalledFunction()) { + // Calls to external functions are never inlinable. + if (Callee->isDeclaration()) { + if (SCC.size() == 1) { + std::swap(CallSites[CSi], CallSites.back()); + CallSites.pop_back(); + } else { + // Keep the 'in SCC / not in SCC' boundary correct. + CallSites.erase(CallSites.begin()+CSi); + } + --CSi; + continue; + } + + // If the policy determines that we should inline this function, + // try to do so. + CallSite CS = CallSites[CSi]; + if (shouldInline(CS)) { + Function *Caller = CS.getCaller(); + // Attempt to inline the function... + if (InlineCallIfPossible(CS, CG, SCCFunctions, TD)) { + // Remove any cached cost info for this caller, as inlining the + // callee has increased the size of the caller (which may be the + // same as the callee). + resetCachedCostInfo(Caller); + + // Remove this call site from the list. If possible, use + // swap/pop_back for efficiency, but do not use it if doing so would + // move a call site to a function in this SCC before the + // 'FirstCallInSCC' barrier. + if (SCC.size() == 1) { + std::swap(CallSites[CSi], CallSites.back()); + CallSites.pop_back(); + } else { + CallSites.erase(CallSites.begin()+CSi); + } + --CSi; + + ++NumInlined; + Changed = true; + LocalChange = true; + } + } + } + } while (LocalChange); + + return Changed; +} + +// doFinalization - Remove now-dead linkonce functions at the end of +// processing to avoid breaking the SCC traversal. +bool Inliner::doFinalization(CallGraph &CG) { + return removeDeadFunctions(CG); +} + + /// removeDeadFunctions - Remove dead functions that are not included in + /// DNR (Do Not Remove) list. +bool Inliner::removeDeadFunctions(CallGraph &CG, + SmallPtrSet *DNR) { + std::set FunctionsToRemove; + + // Scan for all of the functions, looking for ones that should now be removed + // from the program. Insert the dead ones in the FunctionsToRemove set. + for (CallGraph::iterator I = CG.begin(), E = CG.end(); I != E; ++I) { + CallGraphNode *CGN = I->second; + if (Function *F = CGN ? CGN->getFunction() : 0) { + // If the only remaining users of the function are dead constants, remove + // them. + F->removeDeadConstantUsers(); + + if (DNR && DNR->count(F)) + continue; + + if ((F->hasLinkOnceLinkage() || F->hasLocalLinkage()) && + F->use_empty()) { + + // Remove any call graph edges from the function to its callees. + CGN->removeAllCalledFunctions(); + + // Remove any edges from the external node to the function's call graph + // node. These edges might have been made irrelegant due to + // optimization of the program. + CG.getExternalCallingNode()->removeAnyCallEdgeTo(CGN); + + // Removing the node for callee from the call graph and delete it. + FunctionsToRemove.insert(CGN); + } + } + } + + // Now that we know which functions to delete, do so. We didn't want to do + // this inline, because that would invalidate our CallGraph::iterator + // objects. :( + bool Changed = false; + for (std::set::iterator I = FunctionsToRemove.begin(), + E = FunctionsToRemove.end(); I != E; ++I) { + resetCachedCostInfo((*I)->getFunction()); + delete CG.removeFunctionFromModule(*I); + ++NumDeleted; + Changed = true; + } + + return Changed; +} diff --git a/lib/Transforms/IPO/Internalize.cpp b/lib/Transforms/IPO/Internalize.cpp new file mode 100644 index 000000000000..5093ae90b5ba --- /dev/null +++ b/lib/Transforms/IPO/Internalize.cpp @@ -0,0 +1,184 @@ +//===-- Internalize.cpp - Mark functions internal -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass loops over all of the functions in the input module, looking for a +// main function. If a main function is found, all other functions and all +// global variables with initializers are marked as internal. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "internalize" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Transforms/IPO.h" +#include "llvm/Pass.h" +#include "llvm/Module.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/Statistic.h" +#include +#include +using namespace llvm; + +STATISTIC(NumAliases , "Number of aliases internalized"); +STATISTIC(NumFunctions, "Number of functions internalized"); +STATISTIC(NumGlobals , "Number of global vars internalized"); + +// APIFile - A file which contains a list of symbols that should not be marked +// external. +static cl::opt +APIFile("internalize-public-api-file", cl::value_desc("filename"), + cl::desc("A file containing list of symbol names to preserve")); + +// APIList - A list of symbols that should not be marked internal. +static cl::list +APIList("internalize-public-api-list", cl::value_desc("list"), + cl::desc("A list of symbol names to preserve"), + cl::CommaSeparated); + +namespace { + class VISIBILITY_HIDDEN InternalizePass : public ModulePass { + std::set ExternalNames; + /// If no api symbols were specified and a main function is defined, + /// assume the main function is the only API + bool AllButMain; + public: + static char ID; // Pass identification, replacement for typeid + explicit InternalizePass(bool AllButMain = true); + explicit InternalizePass(const std::vector & exportList); + void LoadFile(const char *Filename); + virtual bool runOnModule(Module &M); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addPreserved(); + } + }; +} // end anonymous namespace + +char InternalizePass::ID = 0; +static RegisterPass +X("internalize", "Internalize Global Symbols"); + +InternalizePass::InternalizePass(bool AllButMain) + : ModulePass(&ID), AllButMain(AllButMain){ + if (!APIFile.empty()) // If a filename is specified, use it. + LoadFile(APIFile.c_str()); + if (!APIList.empty()) // If a list is specified, use it as well. + ExternalNames.insert(APIList.begin(), APIList.end()); +} + +InternalizePass::InternalizePass(const std::vector&exportList) + : ModulePass(&ID), AllButMain(false){ + for(std::vector::const_iterator itr = exportList.begin(); + itr != exportList.end(); itr++) { + ExternalNames.insert(*itr); + } +} + +void InternalizePass::LoadFile(const char *Filename) { + // Load the APIFile... + std::ifstream In(Filename); + if (!In.good()) { + cerr << "WARNING: Internalize couldn't load file '" << Filename + << "'! Continuing as if it's empty.\n"; + return; // Just continue as if the file were empty + } + while (In) { + std::string Symbol; + In >> Symbol; + if (!Symbol.empty()) + ExternalNames.insert(Symbol); + } +} + +bool InternalizePass::runOnModule(Module &M) { + CallGraph *CG = getAnalysisIfAvailable(); + CallGraphNode *ExternalNode = CG ? CG->getExternalCallingNode() : 0; + + if (ExternalNames.empty()) { + // Return if we're not in 'all but main' mode and have no external api + if (!AllButMain) + return false; + // If no list or file of symbols was specified, check to see if there is a + // "main" symbol defined in the module. If so, use it, otherwise do not + // internalize the module, it must be a library or something. + // + Function *MainFunc = M.getFunction("main"); + if (MainFunc == 0 || MainFunc->isDeclaration()) + return false; // No main found, must be a library... + + // Preserve main, internalize all else. + ExternalNames.insert(MainFunc->getName()); + } + + bool Changed = false; + + // Mark all functions not in the api as internal. + // FIXME: maybe use private linkage? + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) + if (!I->isDeclaration() && // Function must be defined here + !I->hasLocalLinkage() && // Can't already have internal linkage + !ExternalNames.count(I->getName())) {// Not marked to keep external? + I->setLinkage(GlobalValue::InternalLinkage); + // Remove a callgraph edge from the external node to this function. + if (ExternalNode) ExternalNode->removeOneAbstractEdgeTo((*CG)[I]); + Changed = true; + ++NumFunctions; + DOUT << "Internalizing func " << I->getName() << "\n"; + } + + // Never internalize the llvm.used symbol. It is used to implement + // attribute((used)). + ExternalNames.insert("llvm.used"); + + // Never internalize anchors used by the machine module info, else the info + // won't find them. (see MachineModuleInfo.) + ExternalNames.insert("llvm.dbg.compile_units"); + ExternalNames.insert("llvm.dbg.global_variables"); + ExternalNames.insert("llvm.dbg.subprograms"); + ExternalNames.insert("llvm.global_ctors"); + ExternalNames.insert("llvm.global_dtors"); + ExternalNames.insert("llvm.noinline"); + ExternalNames.insert("llvm.global.annotations"); + + // Mark all global variables with initializers that are not in the api as + // internal as well. + // FIXME: maybe use private linkage? + for (Module::global_iterator I = M.global_begin(), E = M.global_end(); + I != E; ++I) + if (!I->isDeclaration() && !I->hasLocalLinkage() && + !ExternalNames.count(I->getName())) { + I->setLinkage(GlobalValue::InternalLinkage); + Changed = true; + ++NumGlobals; + DOUT << "Internalized gvar " << I->getName() << "\n"; + } + + // Mark all aliases that are not in the api as internal as well. + for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end(); + I != E; ++I) + if (!I->isDeclaration() && !I->hasInternalLinkage() && + !ExternalNames.count(I->getName())) { + I->setLinkage(GlobalValue::InternalLinkage); + Changed = true; + ++NumAliases; + DOUT << "Internalized alias " << I->getName() << "\n"; + } + + return Changed; +} + +ModulePass *llvm::createInternalizePass(bool AllButMain) { + return new InternalizePass(AllButMain); +} + +ModulePass *llvm::createInternalizePass(const std::vector &el) { + return new InternalizePass(el); +} diff --git a/lib/Transforms/IPO/LoopExtractor.cpp b/lib/Transforms/IPO/LoopExtractor.cpp new file mode 100644 index 000000000000..0c654438d508 --- /dev/null +++ b/lib/Transforms/IPO/LoopExtractor.cpp @@ -0,0 +1,261 @@ +//===- LoopExtractor.cpp - Extract each loop into a new function ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// A pass wrapper around the ExtractLoop() scalar transformation to extract each +// top-level loop into its own new function. If the loop is the ONLY loop in a +// given function, it is not touched. This is a pass most useful for debugging +// via bugpoint. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "loop-extract" +#include "llvm/Transforms/IPO.h" +#include "llvm/Instructions.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/FunctionUtils.h" +#include "llvm/ADT/Statistic.h" +#include +#include +using namespace llvm; + +STATISTIC(NumExtracted, "Number of loops extracted"); + +namespace { + // FIXME: This is not a function pass, but the PassManager doesn't allow + // Module passes to require FunctionPasses, so we can't get loop info if we're + // not a function pass. + struct VISIBILITY_HIDDEN LoopExtractor : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + unsigned NumLoops; + + explicit LoopExtractor(unsigned numLoops = ~0) + : FunctionPass(&ID), NumLoops(numLoops) {} + + virtual bool runOnFunction(Function &F); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequiredID(BreakCriticalEdgesID); + AU.addRequiredID(LoopSimplifyID); + AU.addRequired(); + AU.addRequired(); + } + }; +} + +char LoopExtractor::ID = 0; +static RegisterPass +X("loop-extract", "Extract loops into new functions"); + +namespace { + /// SingleLoopExtractor - For bugpoint. + struct SingleLoopExtractor : public LoopExtractor { + static char ID; // Pass identification, replacement for typeid + SingleLoopExtractor() : LoopExtractor(1) {} + }; +} // End anonymous namespace + +char SingleLoopExtractor::ID = 0; +static RegisterPass +Y("loop-extract-single", "Extract at most one loop into a new function"); + +// createLoopExtractorPass - This pass extracts all natural loops from the +// program into a function if it can. +// +FunctionPass *llvm::createLoopExtractorPass() { return new LoopExtractor(); } + +bool LoopExtractor::runOnFunction(Function &F) { + LoopInfo &LI = getAnalysis(); + + // If this function has no loops, there is nothing to do. + if (LI.empty()) + return false; + + DominatorTree &DT = getAnalysis(); + + // If there is more than one top-level loop in this function, extract all of + // the loops. + bool Changed = false; + if (LI.end()-LI.begin() > 1) { + for (LoopInfo::iterator i = LI.begin(), e = LI.end(); i != e; ++i) { + if (NumLoops == 0) return Changed; + --NumLoops; + Changed |= ExtractLoop(DT, *i) != 0; + ++NumExtracted; + } + } else { + // Otherwise there is exactly one top-level loop. If this function is more + // than a minimal wrapper around the loop, extract the loop. + Loop *TLL = *LI.begin(); + bool ShouldExtractLoop = false; + + // Extract the loop if the entry block doesn't branch to the loop header. + TerminatorInst *EntryTI = F.getEntryBlock().getTerminator(); + if (!isa(EntryTI) || + !cast(EntryTI)->isUnconditional() || + EntryTI->getSuccessor(0) != TLL->getHeader()) + ShouldExtractLoop = true; + else { + // Check to see if any exits from the loop are more than just return + // blocks. + SmallVector ExitBlocks; + TLL->getExitBlocks(ExitBlocks); + for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) + if (!isa(ExitBlocks[i]->getTerminator())) { + ShouldExtractLoop = true; + break; + } + } + + if (ShouldExtractLoop) { + if (NumLoops == 0) return Changed; + --NumLoops; + Changed |= ExtractLoop(DT, TLL) != 0; + ++NumExtracted; + } else { + // Okay, this function is a minimal container around the specified loop. + // If we extract the loop, we will continue to just keep extracting it + // infinitely... so don't extract it. However, if the loop contains any + // subloops, extract them. + for (Loop::iterator i = TLL->begin(), e = TLL->end(); i != e; ++i) { + if (NumLoops == 0) return Changed; + --NumLoops; + Changed |= ExtractLoop(DT, *i) != 0; + ++NumExtracted; + } + } + } + + return Changed; +} + +// createSingleLoopExtractorPass - This pass extracts one natural loop from the +// program into a function if it can. This is used by bugpoint. +// +FunctionPass *llvm::createSingleLoopExtractorPass() { + return new SingleLoopExtractor(); +} + + +// BlockFile - A file which contains a list of blocks that should not be +// extracted. +static cl::opt +BlockFile("extract-blocks-file", cl::value_desc("filename"), + cl::desc("A file containing list of basic blocks to not extract"), + cl::Hidden); + +namespace { + /// BlockExtractorPass - This pass is used by bugpoint to extract all blocks + /// from the module into their own functions except for those specified by the + /// BlocksToNotExtract list. + class BlockExtractorPass : public ModulePass { + void LoadFile(const char *Filename); + + std::vector BlocksToNotExtract; + std::vector > BlocksToNotExtractByName; + public: + static char ID; // Pass identification, replacement for typeid + explicit BlockExtractorPass(const std::vector &B) + : ModulePass(&ID), BlocksToNotExtract(B) { + if (!BlockFile.empty()) + LoadFile(BlockFile.c_str()); + } + BlockExtractorPass() : ModulePass(&ID) {} + + bool runOnModule(Module &M); + }; +} + +char BlockExtractorPass::ID = 0; +static RegisterPass +XX("extract-blocks", "Extract Basic Blocks From Module (for bugpoint use)"); + +// createBlockExtractorPass - This pass extracts all blocks (except those +// specified in the argument list) from the functions in the module. +// +ModulePass *llvm::createBlockExtractorPass(const std::vector &BTNE) +{ + return new BlockExtractorPass(BTNE); +} + +void BlockExtractorPass::LoadFile(const char *Filename) { + // Load the BlockFile... + std::ifstream In(Filename); + if (!In.good()) { + cerr << "WARNING: BlockExtractor couldn't load file '" << Filename + << "'!\n"; + return; + } + while (In) { + std::string FunctionName, BlockName; + In >> FunctionName; + In >> BlockName; + if (!BlockName.empty()) + BlocksToNotExtractByName.push_back( + std::make_pair(FunctionName, BlockName)); + } +} + +bool BlockExtractorPass::runOnModule(Module &M) { + std::set TranslatedBlocksToNotExtract; + for (unsigned i = 0, e = BlocksToNotExtract.size(); i != e; ++i) { + BasicBlock *BB = BlocksToNotExtract[i]; + Function *F = BB->getParent(); + + // Map the corresponding function in this module. + Function *MF = M.getFunction(F->getName()); + assert(MF->getFunctionType() == F->getFunctionType() && "Wrong function?"); + + // Figure out which index the basic block is in its function. + Function::iterator BBI = MF->begin(); + std::advance(BBI, std::distance(F->begin(), Function::iterator(BB))); + TranslatedBlocksToNotExtract.insert(BBI); + } + + while (!BlocksToNotExtractByName.empty()) { + // There's no way to find BBs by name without looking at every BB inside + // every Function. Fortunately, this is always empty except when used by + // bugpoint in which case correctness is more important than performance. + + std::string &FuncName = BlocksToNotExtractByName.back().first; + std::string &BlockName = BlocksToNotExtractByName.back().second; + + for (Module::iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) { + Function &F = *FI; + if (F.getName() != FuncName) continue; + + for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) { + BasicBlock &BB = *BI; + if (BB.getName() != BlockName) continue; + + TranslatedBlocksToNotExtract.insert(BI); + } + } + + BlocksToNotExtractByName.pop_back(); + } + + // Now that we know which blocks to not extract, figure out which ones we WANT + // to extract. + std::vector BlocksToExtract; + for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) + if (!TranslatedBlocksToNotExtract.count(BB)) + BlocksToExtract.push_back(BB); + + for (unsigned i = 0, e = BlocksToExtract.size(); i != e; ++i) + ExtractBasicBlock(BlocksToExtract[i]); + + return !BlocksToExtract.empty(); +} diff --git a/lib/Transforms/IPO/LowerSetJmp.cpp b/lib/Transforms/IPO/LowerSetJmp.cpp new file mode 100644 index 000000000000..dfc040b83342 --- /dev/null +++ b/lib/Transforms/IPO/LowerSetJmp.cpp @@ -0,0 +1,536 @@ +//===- LowerSetJmp.cpp - Code pertaining to lowering set/long jumps -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the lowering of setjmp and longjmp to use the +// LLVM invoke and unwind instructions as necessary. +// +// Lowering of longjmp is fairly trivial. We replace the call with a +// call to the LLVM library function "__llvm_sjljeh_throw_longjmp()". +// This unwinds the stack for us calling all of the destructors for +// objects allocated on the stack. +// +// At a setjmp call, the basic block is split and the setjmp removed. +// The calls in a function that have a setjmp are converted to invoke +// where the except part checks to see if it's a longjmp exception and, +// if so, if it's handled in the function. If it is, then it gets the +// value returned by the longjmp and goes to where the basic block was +// split. Invoke instructions are handled in a similar fashion with the +// original except block being executed if it isn't a longjmp except +// that is handled by that function. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// FIXME: This pass doesn't deal with PHI statements just yet. That is, +// we expect this to occur before SSAification is done. This would seem +// to make sense, but in general, it might be a good idea to make this +// pass invokable via the "opt" command at will. +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "lowersetjmp" +#include "llvm/Transforms/IPO.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Instructions.h" +#include "llvm/Intrinsics.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/InstVisitor.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/VectorExtras.h" +#include "llvm/ADT/SmallVector.h" +#include +using namespace llvm; + +STATISTIC(LongJmpsTransformed, "Number of longjmps transformed"); +STATISTIC(SetJmpsTransformed , "Number of setjmps transformed"); +STATISTIC(CallsTransformed , "Number of calls invokified"); +STATISTIC(InvokesTransformed , "Number of invokes modified"); + +namespace { + //===--------------------------------------------------------------------===// + // LowerSetJmp pass implementation. + class VISIBILITY_HIDDEN LowerSetJmp : public ModulePass, + public InstVisitor { + // LLVM library functions... + Constant *InitSJMap; // __llvm_sjljeh_init_setjmpmap + Constant *DestroySJMap; // __llvm_sjljeh_destroy_setjmpmap + Constant *AddSJToMap; // __llvm_sjljeh_add_setjmp_to_map + Constant *ThrowLongJmp; // __llvm_sjljeh_throw_longjmp + Constant *TryCatchLJ; // __llvm_sjljeh_try_catching_longjmp_exception + Constant *IsLJException; // __llvm_sjljeh_is_longjmp_exception + Constant *GetLJValue; // __llvm_sjljeh_get_longjmp_value + + typedef std::pair SwitchValuePair; + + // Keep track of those basic blocks reachable via a depth-first search of + // the CFG from a setjmp call. We only need to transform those "call" and + // "invoke" instructions that are reachable from the setjmp call site. + std::set DFSBlocks; + + // The setjmp map is going to hold information about which setjmps + // were called (each setjmp gets its own number) and with which + // buffer it was called. + std::map SJMap; + + // The rethrow basic block map holds the basic block to branch to if + // the exception isn't handled in the current function and needs to + // be rethrown. + std::map RethrowBBMap; + + // The preliminary basic block map holds a basic block that grabs the + // exception and determines if it's handled by the current function. + std::map PrelimBBMap; + + // The switch/value map holds a switch inst/call inst pair. The + // switch inst controls which handler (if any) gets called and the + // value is the value returned to that handler by the call to + // __llvm_sjljeh_get_longjmp_value. + std::map SwitchValMap; + + // A map of which setjmps we've seen so far in a function. + std::map SetJmpIDMap; + + AllocaInst* GetSetJmpMap(Function* Func); + BasicBlock* GetRethrowBB(Function* Func); + SwitchValuePair GetSJSwitch(Function* Func, BasicBlock* Rethrow); + + void TransformLongJmpCall(CallInst* Inst); + void TransformSetJmpCall(CallInst* Inst); + + bool IsTransformableFunction(const std::string& Name); + public: + static char ID; // Pass identification, replacement for typeid + LowerSetJmp() : ModulePass(&ID) {} + + void visitCallInst(CallInst& CI); + void visitInvokeInst(InvokeInst& II); + void visitReturnInst(ReturnInst& RI); + void visitUnwindInst(UnwindInst& UI); + + bool runOnModule(Module& M); + bool doInitialization(Module& M); + }; +} // end anonymous namespace + +char LowerSetJmp::ID = 0; +static RegisterPass X("lowersetjmp", "Lower Set Jump"); + +// run - Run the transformation on the program. We grab the function +// prototypes for longjmp and setjmp. If they are used in the program, +// then we can go directly to the places they're at and transform them. +bool LowerSetJmp::runOnModule(Module& M) { + bool Changed = false; + + // These are what the functions are called. + Function* SetJmp = M.getFunction("llvm.setjmp"); + Function* LongJmp = M.getFunction("llvm.longjmp"); + + // This program doesn't have longjmp and setjmp calls. + if ((!LongJmp || LongJmp->use_empty()) && + (!SetJmp || SetJmp->use_empty())) return false; + + // Initialize some values and functions we'll need to transform the + // setjmp/longjmp functions. + doInitialization(M); + + if (SetJmp) { + for (Value::use_iterator B = SetJmp->use_begin(), E = SetJmp->use_end(); + B != E; ++B) { + BasicBlock* BB = cast(*B)->getParent(); + for (df_ext_iterator I = df_ext_begin(BB, DFSBlocks), + E = df_ext_end(BB, DFSBlocks); I != E; ++I) + /* empty */; + } + + while (!SetJmp->use_empty()) { + assert(isa(SetJmp->use_back()) && + "User of setjmp intrinsic not a call?"); + TransformSetJmpCall(cast(SetJmp->use_back())); + Changed = true; + } + } + + if (LongJmp) + while (!LongJmp->use_empty()) { + assert(isa(LongJmp->use_back()) && + "User of longjmp intrinsic not a call?"); + TransformLongJmpCall(cast(LongJmp->use_back())); + Changed = true; + } + + // Now go through the affected functions and convert calls and invokes + // to new invokes... + for (std::map::iterator + B = SJMap.begin(), E = SJMap.end(); B != E; ++B) { + Function* F = B->first; + for (Function::iterator BB = F->begin(), BE = F->end(); BB != BE; ++BB) + for (BasicBlock::iterator IB = BB->begin(), IE = BB->end(); IB != IE; ) { + visit(*IB++); + if (IB != BB->end() && IB->getParent() != BB) + break; // The next instruction got moved to a different block! + } + } + + DFSBlocks.clear(); + SJMap.clear(); + RethrowBBMap.clear(); + PrelimBBMap.clear(); + SwitchValMap.clear(); + SetJmpIDMap.clear(); + + return Changed; +} + +// doInitialization - For the lower long/setjmp pass, this ensures that a +// module contains a declaration for the intrisic functions we are going +// to call to convert longjmp and setjmp calls. +// +// This function is always successful, unless it isn't. +bool LowerSetJmp::doInitialization(Module& M) +{ + const Type *SBPTy = PointerType::getUnqual(Type::Int8Ty); + const Type *SBPPTy = PointerType::getUnqual(SBPTy); + + // N.B. See llvm/runtime/GCCLibraries/libexception/SJLJ-Exception.h for + // a description of the following library functions. + + // void __llvm_sjljeh_init_setjmpmap(void**) + InitSJMap = M.getOrInsertFunction("__llvm_sjljeh_init_setjmpmap", + Type::VoidTy, SBPPTy, (Type *)0); + // void __llvm_sjljeh_destroy_setjmpmap(void**) + DestroySJMap = M.getOrInsertFunction("__llvm_sjljeh_destroy_setjmpmap", + Type::VoidTy, SBPPTy, (Type *)0); + + // void __llvm_sjljeh_add_setjmp_to_map(void**, void*, unsigned) + AddSJToMap = M.getOrInsertFunction("__llvm_sjljeh_add_setjmp_to_map", + Type::VoidTy, SBPPTy, SBPTy, + Type::Int32Ty, (Type *)0); + + // void __llvm_sjljeh_throw_longjmp(int*, int) + ThrowLongJmp = M.getOrInsertFunction("__llvm_sjljeh_throw_longjmp", + Type::VoidTy, SBPTy, Type::Int32Ty, + (Type *)0); + + // unsigned __llvm_sjljeh_try_catching_longjmp_exception(void **) + TryCatchLJ = + M.getOrInsertFunction("__llvm_sjljeh_try_catching_longjmp_exception", + Type::Int32Ty, SBPPTy, (Type *)0); + + // bool __llvm_sjljeh_is_longjmp_exception() + IsLJException = M.getOrInsertFunction("__llvm_sjljeh_is_longjmp_exception", + Type::Int1Ty, (Type *)0); + + // int __llvm_sjljeh_get_longjmp_value() + GetLJValue = M.getOrInsertFunction("__llvm_sjljeh_get_longjmp_value", + Type::Int32Ty, (Type *)0); + return true; +} + +// IsTransformableFunction - Return true if the function name isn't one +// of the ones we don't want transformed. Currently, don't transform any +// "llvm.{setjmp,longjmp}" functions and none of the setjmp/longjmp error +// handling functions (beginning with __llvm_sjljeh_...they don't throw +// exceptions). +bool LowerSetJmp::IsTransformableFunction(const std::string& Name) { + std::string SJLJEh("__llvm_sjljeh"); + + if (Name.size() > SJLJEh.size()) + return std::string(Name.begin(), Name.begin() + SJLJEh.size()) != SJLJEh; + + return true; +} + +// TransformLongJmpCall - Transform a longjmp call into a call to the +// internal __llvm_sjljeh_throw_longjmp function. It then takes care of +// throwing the exception for us. +void LowerSetJmp::TransformLongJmpCall(CallInst* Inst) +{ + const Type* SBPTy = PointerType::getUnqual(Type::Int8Ty); + + // Create the call to "__llvm_sjljeh_throw_longjmp". This takes the + // same parameters as "longjmp", except that the buffer is cast to a + // char*. It returns "void", so it doesn't need to replace any of + // Inst's uses and doesn't get a name. + CastInst* CI = + new BitCastInst(Inst->getOperand(1), SBPTy, "LJBuf", Inst); + SmallVector Args; + Args.push_back(CI); + Args.push_back(Inst->getOperand(2)); + CallInst::Create(ThrowLongJmp, Args.begin(), Args.end(), "", Inst); + + SwitchValuePair& SVP = SwitchValMap[Inst->getParent()->getParent()]; + + // If the function has a setjmp call in it (they are transformed first) + // we should branch to the basic block that determines if this longjmp + // is applicable here. Otherwise, issue an unwind. + if (SVP.first) + BranchInst::Create(SVP.first->getParent(), Inst); + else + new UnwindInst(Inst); + + // Remove all insts after the branch/unwind inst. Go from back to front to + // avoid replaceAllUsesWith if possible. + BasicBlock *BB = Inst->getParent(); + Instruction *Removed; + do { + Removed = &BB->back(); + // If the removed instructions have any users, replace them now. + if (!Removed->use_empty()) + Removed->replaceAllUsesWith(UndefValue::get(Removed->getType())); + Removed->eraseFromParent(); + } while (Removed != Inst); + + ++LongJmpsTransformed; +} + +// GetSetJmpMap - Retrieve (create and initialize, if necessary) the +// setjmp map. This map is going to hold information about which setjmps +// were called (each setjmp gets its own number) and with which buffer it +// was called. There can be only one! +AllocaInst* LowerSetJmp::GetSetJmpMap(Function* Func) +{ + if (SJMap[Func]) return SJMap[Func]; + + // Insert the setjmp map initialization before the first instruction in + // the function. + Instruction* Inst = Func->getEntryBlock().begin(); + assert(Inst && "Couldn't find even ONE instruction in entry block!"); + + // Fill in the alloca and call to initialize the SJ map. + const Type *SBPTy = PointerType::getUnqual(Type::Int8Ty); + AllocaInst* Map = new AllocaInst(SBPTy, 0, "SJMap", Inst); + CallInst::Create(InitSJMap, Map, "", Inst); + return SJMap[Func] = Map; +} + +// GetRethrowBB - Only one rethrow basic block is needed per function. +// If this is a longjmp exception but not handled in this block, this BB +// performs the rethrow. +BasicBlock* LowerSetJmp::GetRethrowBB(Function* Func) +{ + if (RethrowBBMap[Func]) return RethrowBBMap[Func]; + + // The basic block we're going to jump to if we need to rethrow the + // exception. + BasicBlock* Rethrow = BasicBlock::Create("RethrowExcept", Func); + + // Fill in the "Rethrow" BB with a call to rethrow the exception. This + // is the last instruction in the BB since at this point the runtime + // should exit this function and go to the next function. + new UnwindInst(Rethrow); + return RethrowBBMap[Func] = Rethrow; +} + +// GetSJSwitch - Return the switch statement that controls which handler +// (if any) gets called and the value returned to that handler. +LowerSetJmp::SwitchValuePair LowerSetJmp::GetSJSwitch(Function* Func, + BasicBlock* Rethrow) +{ + if (SwitchValMap[Func].first) return SwitchValMap[Func]; + + BasicBlock* LongJmpPre = BasicBlock::Create("LongJmpBlkPre", Func); + + // Keep track of the preliminary basic block for some of the other + // transformations. + PrelimBBMap[Func] = LongJmpPre; + + // Grab the exception. + CallInst* Cond = CallInst::Create(IsLJException, "IsLJExcept", LongJmpPre); + + // The "decision basic block" gets the number associated with the + // setjmp call returning to switch on and the value returned by + // longjmp. + BasicBlock* DecisionBB = BasicBlock::Create("LJDecisionBB", Func); + + BranchInst::Create(DecisionBB, Rethrow, Cond, LongJmpPre); + + // Fill in the "decision" basic block. + CallInst* LJVal = CallInst::Create(GetLJValue, "LJVal", DecisionBB); + CallInst* SJNum = CallInst::Create(TryCatchLJ, GetSetJmpMap(Func), "SJNum", + DecisionBB); + + SwitchInst* SI = SwitchInst::Create(SJNum, Rethrow, 0, DecisionBB); + return SwitchValMap[Func] = SwitchValuePair(SI, LJVal); +} + +// TransformSetJmpCall - The setjmp call is a bit trickier to transform. +// We're going to convert all setjmp calls to nops. Then all "call" and +// "invoke" instructions in the function are converted to "invoke" where +// the "except" branch is used when returning from a longjmp call. +void LowerSetJmp::TransformSetJmpCall(CallInst* Inst) +{ + BasicBlock* ABlock = Inst->getParent(); + Function* Func = ABlock->getParent(); + + // Add this setjmp to the setjmp map. + const Type* SBPTy = PointerType::getUnqual(Type::Int8Ty); + CastInst* BufPtr = + new BitCastInst(Inst->getOperand(1), SBPTy, "SBJmpBuf", Inst); + std::vector Args = + make_vector(GetSetJmpMap(Func), BufPtr, + ConstantInt::get(Type::Int32Ty, + SetJmpIDMap[Func]++), 0); + CallInst::Create(AddSJToMap, Args.begin(), Args.end(), "", Inst); + + // We are guaranteed that there are no values live across basic blocks + // (because we are "not in SSA form" yet), but there can still be values live + // in basic blocks. Because of this, splitting the setjmp block can cause + // values above the setjmp to not dominate uses which are after the setjmp + // call. For all of these occasions, we must spill the value to the stack. + // + std::set InstrsAfterCall; + + // The call is probably very close to the end of the basic block, for the + // common usage pattern of: 'if (setjmp(...))', so keep track of the + // instructions after the call. + for (BasicBlock::iterator I = ++BasicBlock::iterator(Inst), E = ABlock->end(); + I != E; ++I) + InstrsAfterCall.insert(I); + + for (BasicBlock::iterator II = ABlock->begin(); + II != BasicBlock::iterator(Inst); ++II) + // Loop over all of the uses of instruction. If any of them are after the + // call, "spill" the value to the stack. + for (Value::use_iterator UI = II->use_begin(), E = II->use_end(); + UI != E; ++UI) + if (cast(*UI)->getParent() != ABlock || + InstrsAfterCall.count(cast(*UI))) { + DemoteRegToStack(*II); + break; + } + InstrsAfterCall.clear(); + + // Change the setjmp call into a branch statement. We'll remove the + // setjmp call in a little bit. No worries. + BasicBlock* SetJmpContBlock = ABlock->splitBasicBlock(Inst); + assert(SetJmpContBlock && "Couldn't split setjmp BB!!"); + + SetJmpContBlock->setName(ABlock->getName()+"SetJmpCont"); + + // Add the SetJmpContBlock to the set of blocks reachable from a setjmp. + DFSBlocks.insert(SetJmpContBlock); + + // This PHI node will be in the new block created from the + // splitBasicBlock call. + PHINode* PHI = PHINode::Create(Type::Int32Ty, "SetJmpReturn", Inst); + + // Coming from a call to setjmp, the return is 0. + PHI->addIncoming(ConstantInt::getNullValue(Type::Int32Ty), ABlock); + + // Add the case for this setjmp's number... + SwitchValuePair SVP = GetSJSwitch(Func, GetRethrowBB(Func)); + SVP.first->addCase(ConstantInt::get(Type::Int32Ty, SetJmpIDMap[Func] - 1), + SetJmpContBlock); + + // Value coming from the handling of the exception. + PHI->addIncoming(SVP.second, SVP.second->getParent()); + + // Replace all uses of this instruction with the PHI node created by + // the eradication of setjmp. + Inst->replaceAllUsesWith(PHI); + Inst->eraseFromParent(); + + ++SetJmpsTransformed; +} + +// visitCallInst - This converts all LLVM call instructions into invoke +// instructions. The except part of the invoke goes to the "LongJmpBlkPre" +// that grabs the exception and proceeds to determine if it's a longjmp +// exception or not. +void LowerSetJmp::visitCallInst(CallInst& CI) +{ + if (CI.getCalledFunction()) + if (!IsTransformableFunction(CI.getCalledFunction()->getName()) || + CI.getCalledFunction()->isIntrinsic()) return; + + BasicBlock* OldBB = CI.getParent(); + + // If not reachable from a setjmp call, don't transform. + if (!DFSBlocks.count(OldBB)) return; + + BasicBlock* NewBB = OldBB->splitBasicBlock(CI); + assert(NewBB && "Couldn't split BB of \"call\" instruction!!"); + DFSBlocks.insert(NewBB); + NewBB->setName("Call2Invoke"); + + Function* Func = OldBB->getParent(); + + // Construct the new "invoke" instruction. + TerminatorInst* Term = OldBB->getTerminator(); + std::vector Params(CI.op_begin() + 1, CI.op_end()); + InvokeInst* II = + InvokeInst::Create(CI.getCalledValue(), NewBB, PrelimBBMap[Func], + Params.begin(), Params.end(), CI.getName(), Term); + II->setCallingConv(CI.getCallingConv()); + II->setAttributes(CI.getAttributes()); + + // Replace the old call inst with the invoke inst and remove the call. + CI.replaceAllUsesWith(II); + CI.eraseFromParent(); + + // The old terminator is useless now that we have the invoke inst. + Term->eraseFromParent(); + ++CallsTransformed; +} + +// visitInvokeInst - Converting the "invoke" instruction is fairly +// straight-forward. The old exception part is replaced by a query asking +// if this is a longjmp exception. If it is, then it goes to the longjmp +// exception blocks. Otherwise, control is passed the old exception. +void LowerSetJmp::visitInvokeInst(InvokeInst& II) +{ + if (II.getCalledFunction()) + if (!IsTransformableFunction(II.getCalledFunction()->getName()) || + II.getCalledFunction()->isIntrinsic()) return; + + BasicBlock* BB = II.getParent(); + + // If not reachable from a setjmp call, don't transform. + if (!DFSBlocks.count(BB)) return; + + BasicBlock* ExceptBB = II.getUnwindDest(); + + Function* Func = BB->getParent(); + BasicBlock* NewExceptBB = BasicBlock::Create("InvokeExcept", Func); + + // If this is a longjmp exception, then branch to the preliminary BB of + // the longjmp exception handling. Otherwise, go to the old exception. + CallInst* IsLJExcept = CallInst::Create(IsLJException, "IsLJExcept", + NewExceptBB); + + BranchInst::Create(PrelimBBMap[Func], ExceptBB, IsLJExcept, NewExceptBB); + + II.setUnwindDest(NewExceptBB); + ++InvokesTransformed; +} + +// visitReturnInst - We want to destroy the setjmp map upon exit from the +// function. +void LowerSetJmp::visitReturnInst(ReturnInst &RI) { + Function* Func = RI.getParent()->getParent(); + CallInst::Create(DestroySJMap, GetSetJmpMap(Func), "", &RI); +} + +// visitUnwindInst - We want to destroy the setjmp map upon exit from the +// function. +void LowerSetJmp::visitUnwindInst(UnwindInst &UI) { + Function* Func = UI.getParent()->getParent(); + CallInst::Create(DestroySJMap, GetSetJmpMap(Func), "", &UI); +} + +ModulePass *llvm::createLowerSetJmpPass() { + return new LowerSetJmp(); +} + diff --git a/lib/Transforms/IPO/Makefile b/lib/Transforms/IPO/Makefile new file mode 100644 index 000000000000..5c42374139aa --- /dev/null +++ b/lib/Transforms/IPO/Makefile @@ -0,0 +1,15 @@ +##===- lib/Transforms/IPO/Makefile -------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../.. +LIBRARYNAME = LLVMipo +BUILD_ARCHIVE = 1 + +include $(LEVEL)/Makefile.common + diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp new file mode 100644 index 000000000000..17bc2d41a4cf --- /dev/null +++ b/lib/Transforms/IPO/MergeFunctions.cpp @@ -0,0 +1,377 @@ +//===- MergeFunctions.cpp - Merge identical functions ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass looks for equivalent functions that are mergable and folds them. +// +// A Function will not be analyzed if: +// * it is overridable at runtime (except for weak linkage), or +// * it is used by anything other than the callee parameter of a call/invoke +// +// A hash is computed from the function, based on its type and number of +// basic blocks. +// +// Once all hashes are computed, we perform an expensive equality comparison +// on each function pair. This takes n^2/2 comparisons per bucket, so it's +// important that the hash function be high quality. The equality comparison +// iterates through each instruction in each basic block. +// +// When a match is found, the functions are folded. We can only fold two +// functions when we know that the definition of one of them is not +// overridable. +// * fold a function marked internal by replacing all of its users. +// * fold extern or weak functions by replacing them with a global alias +// +//===----------------------------------------------------------------------===// +// +// Future work: +// +// * fold vector::push_back and vector::push_back. +// +// These two functions have different types, but in a way that doesn't matter +// to us. As long as we never see an S or T itself, using S* and S** is the +// same as using a T* and T**. +// +// * virtual functions. +// +// Many functions have their address taken by the virtual function table for +// the object they belong to. However, as long as it's only used for a lookup +// and call, this is irrelevant, and we'd like to fold such implementations. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "mergefunc" +#include "llvm/Transforms/IPO.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Constants.h" +#include "llvm/InlineAsm.h" +#include "llvm/Instructions.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include +#include +using namespace llvm; + +STATISTIC(NumFunctionsMerged, "Number of functions merged"); +STATISTIC(NumMergeFails, "Number of identical function pairings not merged"); + +namespace { + struct VISIBILITY_HIDDEN MergeFunctions : public ModulePass { + static char ID; // Pass identification, replacement for typeid + MergeFunctions() : ModulePass((intptr_t)&ID) {} + + bool runOnModule(Module &M); + }; +} + +char MergeFunctions::ID = 0; +static RegisterPass +X("mergefunc", "Merge Functions"); + +ModulePass *llvm::createMergeFunctionsPass() { + return new MergeFunctions(); +} + +static unsigned long hash(const Function *F) { + return F->size() ^ reinterpret_cast(F->getType()); + //return F->size() ^ F->arg_size() ^ F->getReturnType(); +} + +static bool compare(const Value *V, const Value *U) { + assert(!isa(V) && !isa(U) && + "Must not compare basic blocks."); + + assert(V->getType() == U->getType() && + "Two of the same operation have operands of different type."); + + // TODO: If the constant is an expression of F, we should accept that it's + // equal to the same expression in terms of G. + if (isa(V)) + return V == U; + + // The caller has ensured that ValueMap[V] != U. Since Arguments are + // pre-loaded into the ValueMap, and Instructions are added as we go, we know + // that this can only be a mis-match. + if (isa(V) || isa(V)) + return false; + + if (isa(V) && isa(U)) { + const InlineAsm *IAF = cast(V); + const InlineAsm *IAG = cast(U); + return IAF->getAsmString() == IAG->getAsmString() && + IAF->getConstraintString() == IAG->getConstraintString(); + } + + return false; +} + +static bool equals(const BasicBlock *BB1, const BasicBlock *BB2, + DenseMap &ValueMap, + DenseMap &SpeculationMap) { + // Specutively add it anyways. If it's false, we'll notice a difference later, and + // this won't matter. + ValueMap[BB1] = BB2; + + BasicBlock::const_iterator FI = BB1->begin(), FE = BB1->end(); + BasicBlock::const_iterator GI = BB2->begin(), GE = BB2->end(); + + do { + if (!FI->isSameOperationAs(const_cast(&*GI))) + return false; + + if (FI->getNumOperands() != GI->getNumOperands()) + return false; + + if (ValueMap[FI] == GI) { + ++FI, ++GI; + continue; + } + + if (ValueMap[FI] != NULL) + return false; + + for (unsigned i = 0, e = FI->getNumOperands(); i != e; ++i) { + Value *OpF = FI->getOperand(i); + Value *OpG = GI->getOperand(i); + + if (ValueMap[OpF] == OpG) + continue; + + if (ValueMap[OpF] != NULL) + return false; + + assert(OpF->getType() == OpG->getType() && + "Two of the same operation has operands of different type."); + + if (OpF->getValueID() != OpG->getValueID()) + return false; + + if (isa(FI)) { + if (SpeculationMap[OpF] == NULL) + SpeculationMap[OpF] = OpG; + else if (SpeculationMap[OpF] != OpG) + return false; + continue; + } else if (isa(OpF)) { + assert(isa(FI) && + "BasicBlock referenced by non-Terminator non-PHI"); + // This call changes the ValueMap, hence we can't use + // Value *& = ValueMap[...] + if (!equals(cast(OpF), cast(OpG), ValueMap, + SpeculationMap)) + return false; + } else { + if (!compare(OpF, OpG)) + return false; + } + + ValueMap[OpF] = OpG; + } + + ValueMap[FI] = GI; + ++FI, ++GI; + } while (FI != FE && GI != GE); + + return FI == FE && GI == GE; +} + +static bool equals(const Function *F, const Function *G) { + // We need to recheck everything, but check the things that weren't included + // in the hash first. + + if (F->getAttributes() != G->getAttributes()) + return false; + + if (F->hasGC() != G->hasGC()) + return false; + + if (F->hasGC() && F->getGC() != G->getGC()) + return false; + + if (F->hasSection() != G->hasSection()) + return false; + + if (F->hasSection() && F->getSection() != G->getSection()) + return false; + + // TODO: if it's internal and only used in direct calls, we could handle this + // case too. + if (F->getCallingConv() != G->getCallingConv()) + return false; + + // TODO: We want to permit cases where two functions take T* and S* but + // only load or store them into T** and S**. + if (F->getType() != G->getType()) + return false; + + DenseMap ValueMap; + DenseMap SpeculationMap; + ValueMap[F] = G; + + assert(F->arg_size() == G->arg_size() && + "Identical functions have a different number of args."); + + for (Function::const_arg_iterator fi = F->arg_begin(), gi = G->arg_begin(), + fe = F->arg_end(); fi != fe; ++fi, ++gi) + ValueMap[fi] = gi; + + if (!equals(&F->getEntryBlock(), &G->getEntryBlock(), ValueMap, + SpeculationMap)) + return false; + + for (DenseMap::iterator + I = SpeculationMap.begin(), E = SpeculationMap.end(); I != E; ++I) { + if (ValueMap[I->first] != I->second) + return false; + } + + return true; +} + +static bool fold(std::vector &FnVec, unsigned i, unsigned j) { + if (FnVec[i]->mayBeOverridden() && !FnVec[j]->mayBeOverridden()) + std::swap(FnVec[i], FnVec[j]); + + Function *F = FnVec[i]; + Function *G = FnVec[j]; + + if (!F->mayBeOverridden()) { + if (G->hasLocalLinkage()) { + F->setAlignment(std::max(F->getAlignment(), G->getAlignment())); + G->replaceAllUsesWith(F); + G->eraseFromParent(); + ++NumFunctionsMerged; + return true; + } + + if (G->hasExternalLinkage() || G->hasWeakLinkage()) { + GlobalAlias *GA = new GlobalAlias(G->getType(), G->getLinkage(), "", + F, G->getParent()); + F->setAlignment(std::max(F->getAlignment(), G->getAlignment())); + GA->takeName(G); + GA->setVisibility(G->getVisibility()); + G->replaceAllUsesWith(GA); + G->eraseFromParent(); + ++NumFunctionsMerged; + return true; + } + } + + if (F->hasWeakLinkage() && G->hasWeakLinkage()) { + GlobalAlias *GA_F = new GlobalAlias(F->getType(), F->getLinkage(), "", + 0, F->getParent()); + GA_F->takeName(F); + GA_F->setVisibility(F->getVisibility()); + F->setAlignment(std::max(F->getAlignment(), G->getAlignment())); + F->replaceAllUsesWith(GA_F); + F->setName("folded." + GA_F->getName()); + F->setLinkage(GlobalValue::ExternalLinkage); + GA_F->setAliasee(F); + + GlobalAlias *GA_G = new GlobalAlias(G->getType(), G->getLinkage(), "", + F, G->getParent()); + GA_G->takeName(G); + GA_G->setVisibility(G->getVisibility()); + G->replaceAllUsesWith(GA_G); + G->eraseFromParent(); + + ++NumFunctionsMerged; + return true; + } + + DOUT << "Failed on " << F->getName() << " and " << G->getName() << "\n"; + + ++NumMergeFails; + return false; +} + +static bool hasAddressTaken(User *U) { + for (User::use_iterator I = U->use_begin(), E = U->use_end(); I != E; ++I) { + User *Use = *I; + + // 'call (bitcast @F to ...)' happens a lot. + while (isa(Use) && Use->hasOneUse()) { + Use = *Use->use_begin(); + } + + if (isa(Use)) { + if (hasAddressTaken(Use)) + return true; + } + + if (!isa(Use) && !isa(Use)) + return true; + + // Make sure we aren't passing U as a parameter to call instead of the + // callee. + if (CallSite(cast(Use)).hasArgument(U)) + return true; + } + + return false; +} + +bool MergeFunctions::runOnModule(Module &M) { + bool Changed = false; + + std::map > FnMap; + + for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { + if (F->isDeclaration() || F->isIntrinsic()) + continue; + + if (!F->hasLocalLinkage() && !F->hasExternalLinkage() && + !F->hasWeakLinkage()) + continue; + + if (hasAddressTaken(F)) + continue; + + FnMap[hash(F)].push_back(F); + } + + // TODO: instead of running in a loop, we could also fold functions in callgraph + // order. Constructing the CFG probably isn't cheaper than just running in a loop. + + bool LocalChanged; + do { + LocalChanged = false; + for (std::map >::iterator + I = FnMap.begin(), E = FnMap.end(); I != E; ++I) { + DOUT << "size: " << FnMap.size() << "\n"; + std::vector &FnVec = I->second; + DOUT << "hash (" << I->first << "): " << FnVec.size() << "\n"; + + for (int i = 0, e = FnVec.size(); i != e; ++i) { + for (int j = i + 1; j != e; ++j) { + bool isEqual = equals(FnVec[i], FnVec[j]); + + DOUT << " " << FnVec[i]->getName() + << (isEqual ? " == " : " != ") + << FnVec[j]->getName() << "\n"; + + if (isEqual) { + if (fold(FnVec, i, j)) { + LocalChanged = true; + FnVec.erase(FnVec.begin() + j); + --j, --e; + } + } + } + } + + } + Changed |= LocalChanged; + } while (LocalChanged); + + return Changed; +} diff --git a/lib/Transforms/IPO/PartialSpecialization.cpp b/lib/Transforms/IPO/PartialSpecialization.cpp new file mode 100644 index 000000000000..0e1fdb9915ac --- /dev/null +++ b/lib/Transforms/IPO/PartialSpecialization.cpp @@ -0,0 +1,191 @@ +//===-- PartialSpecialization.cpp - Specialize for common constants--------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass finds function arguments that are often a common constant and +// specializes a version of the called function for that constant. +// +// This pass simply does the cloning for functions it specializes. It depends +// on IPSCCP and DAE to clean up the results. +// +// The initial heuristic favors constant arguments that are used in control +// flow. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "partialspecialization" +#include "llvm/Transforms/IPO.h" +#include "llvm/Constant.h" +#include "llvm/Instructions.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/Compiler.h" +#include "llvm/ADT/DenseSet.h" +#include +using namespace llvm; + +STATISTIC(numSpecialized, "Number of specialized functions created"); + +// Call must be used at least occasionally +static const int CallsMin = 5; + +// Must have 10% of calls having the same constant to specialize on +static const double ConstValPercent = .1; + +namespace { + class VISIBILITY_HIDDEN PartSpec : public ModulePass { + void scanForInterest(Function&, SmallVector&); + int scanDistribution(Function&, int, std::map&); + public : + static char ID; // Pass identification, replacement for typeid + PartSpec() : ModulePass(&ID) {} + bool runOnModule(Module &M); + }; +} + +char PartSpec::ID = 0; +static RegisterPass +X("partialspecialization", "Partial Specialization"); + +// Specialize F by replacing the arguments (keys) in replacements with the +// constants (values). Replace all calls to F with those constants with +// a call to the specialized function. Returns the specialized function +static Function* +SpecializeFunction(Function* F, + DenseMap& replacements) { + // arg numbers of deleted arguments + DenseSet deleted; + for (DenseMap::iterator + repb = replacements.begin(), repe = replacements.end(); + repb != repe; ++repb) + deleted.insert(cast(repb->first)->getArgNo()); + + Function* NF = CloneFunction(F, replacements); + NF->setLinkage(GlobalValue::InternalLinkage); + F->getParent()->getFunctionList().push_back(NF); + + for (Value::use_iterator ii = F->use_begin(), ee = F->use_end(); + ii != ee; ) { + Value::use_iterator i = ii; + ++ii; + if (isa(i) || isa(i)) { + CallSite CS(cast(i)); + if (CS.getCalledFunction() == F) { + + SmallVector args; + for (unsigned x = 0; x < CS.arg_size(); ++x) + if (!deleted.count(x)) + args.push_back(CS.getArgument(x)); + Value* NCall; + if (CallInst *CI = dyn_cast(i)) { + NCall = CallInst::Create(NF, args.begin(), args.end(), + CI->getName(), CI); + cast(NCall)->setTailCall(CI->isTailCall()); + cast(NCall)->setCallingConv(CI->getCallingConv()); + } else { + InvokeInst *II = cast(i); + NCall = InvokeInst::Create(NF, II->getNormalDest(), + II->getUnwindDest(), + args.begin(), args.end(), + II->getName(), II); + cast(NCall)->setCallingConv(II->getCallingConv()); + } + CS.getInstruction()->replaceAllUsesWith(NCall); + CS.getInstruction()->eraseFromParent(); + } + } + } + return NF; +} + + +bool PartSpec::runOnModule(Module &M) { + bool Changed = false; + for (Module::iterator I = M.begin(); I != M.end(); ++I) { + Function &F = *I; + if (F.isDeclaration() || F.mayBeOverridden()) continue; + SmallVector interestingArgs; + scanForInterest(F, interestingArgs); + + // Find the first interesting Argument that we can specialize on + // If there are multiple interesting Arguments, then those will be found + // when processing the cloned function. + bool breakOuter = false; + for (unsigned int x = 0; !breakOuter && x < interestingArgs.size(); ++x) { + std::map distribution; + int total = scanDistribution(F, interestingArgs[x], distribution); + if (total > CallsMin) + for (std::map::iterator ii = distribution.begin(), + ee = distribution.end(); ii != ee; ++ii) + if (total > ii->second && ii->first && + ii->second > total * ConstValPercent) { + DenseMap m; + Function::arg_iterator arg = F.arg_begin(); + for (int y = 0; y < interestingArgs[x]; ++y) + ++arg; + m[&*arg] = ii->first; + SpecializeFunction(&F, m); + ++numSpecialized; + breakOuter = true; + Changed = true; + } + } + } + return Changed; +} + +/// scanForInterest - This function decides which arguments would be worth +/// specializing on. +void PartSpec::scanForInterest(Function& F, SmallVector& args) { + for(Function::arg_iterator ii = F.arg_begin(), ee = F.arg_end(); + ii != ee; ++ii) { + for(Value::use_iterator ui = ii->use_begin(), ue = ii->use_end(); + ui != ue; ++ui) { + + bool interesting = false; + + if (isa(ui)) interesting = true; + else if (isa(ui)) + interesting = ui->getOperand(0) == ii; + else if (isa(ui)) + interesting = ui->getOperand(0) == ii; + else if (isa(ui)) interesting = true; + else if (isa(ui)) interesting = true; + + if (interesting) { + args.push_back(std::distance(F.arg_begin(), ii)); + break; + } + } + } +} + +/// scanDistribution - Construct a histogram of constants for arg of F at arg. +int PartSpec::scanDistribution(Function& F, int arg, + std::map& dist) { + bool hasIndirect = false; + int total = 0; + for(Value::use_iterator ii = F.use_begin(), ee = F.use_end(); + ii != ee; ++ii) + if ((isa(ii) || isa(ii)) + && ii->getOperand(0) == &F) { + ++dist[dyn_cast(ii->getOperand(arg + 1))]; + ++total; + } else + hasIndirect = true; + + // Preserve the original address taken function even if all other uses + // will be specialized. + if (hasIndirect) ++total; + return total; +} + +ModulePass* llvm::createPartialSpecializationPass() { return new PartSpec(); } diff --git a/lib/Transforms/IPO/PruneEH.cpp b/lib/Transforms/IPO/PruneEH.cpp new file mode 100644 index 000000000000..2b52f464b674 --- /dev/null +++ b/lib/Transforms/IPO/PruneEH.cpp @@ -0,0 +1,255 @@ +//===- PruneEH.cpp - Pass which deletes unused exception handlers ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a simple interprocedural pass which walks the +// call-graph, turning invoke instructions into calls, iff the callee cannot +// throw an exception, and marking functions 'nounwind' if they cannot throw. +// It implements this as a bottom-up traversal of the call-graph. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "prune-eh" +#include "llvm/Transforms/IPO.h" +#include "llvm/CallGraphSCCPass.h" +#include "llvm/Constants.h" +#include "llvm/Function.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Compiler.h" +#include +#include +using namespace llvm; + +STATISTIC(NumRemoved, "Number of invokes removed"); +STATISTIC(NumUnreach, "Number of noreturn calls optimized"); + +namespace { + struct VISIBILITY_HIDDEN PruneEH : public CallGraphSCCPass { + static char ID; // Pass identification, replacement for typeid + PruneEH() : CallGraphSCCPass(&ID) {} + + // runOnSCC - Analyze the SCC, performing the transformation if possible. + bool runOnSCC(const std::vector &SCC); + + bool SimplifyFunction(Function *F); + void DeleteBasicBlock(BasicBlock *BB); + }; +} + +char PruneEH::ID = 0; +static RegisterPass +X("prune-eh", "Remove unused exception handling info"); + +Pass *llvm::createPruneEHPass() { return new PruneEH(); } + + +bool PruneEH::runOnSCC(const std::vector &SCC) { + SmallPtrSet SCCNodes; + CallGraph &CG = getAnalysis(); + bool MadeChange = false; + + // Fill SCCNodes with the elements of the SCC. Used for quickly + // looking up whether a given CallGraphNode is in this SCC. + for (unsigned i = 0, e = SCC.size(); i != e; ++i) + SCCNodes.insert(SCC[i]); + + // First pass, scan all of the functions in the SCC, simplifying them + // according to what we know. + for (unsigned i = 0, e = SCC.size(); i != e; ++i) + if (Function *F = SCC[i]->getFunction()) + MadeChange |= SimplifyFunction(F); + + // Next, check to see if any callees might throw or if there are any external + // functions in this SCC: if so, we cannot prune any functions in this SCC. + // Definitions that are weak and not declared non-throwing might be + // overridden at linktime with something that throws, so assume that. + // If this SCC includes the unwind instruction, we KNOW it throws, so + // obviously the SCC might throw. + // + bool SCCMightUnwind = false, SCCMightReturn = false; + for (unsigned i = 0, e = SCC.size(); + (!SCCMightUnwind || !SCCMightReturn) && i != e; ++i) { + Function *F = SCC[i]->getFunction(); + if (F == 0) { + SCCMightUnwind = true; + SCCMightReturn = true; + } else if (F->isDeclaration() || F->mayBeOverridden()) { + SCCMightUnwind |= !F->doesNotThrow(); + SCCMightReturn |= !F->doesNotReturn(); + } else { + bool CheckUnwind = !SCCMightUnwind && !F->doesNotThrow(); + bool CheckReturn = !SCCMightReturn && !F->doesNotReturn(); + + if (!CheckUnwind && !CheckReturn) + continue; + + // Check to see if this function performs an unwind or calls an + // unwinding function. + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { + if (CheckUnwind && isa(BB->getTerminator())) { + // Uses unwind! + SCCMightUnwind = true; + } else if (CheckReturn && isa(BB->getTerminator())) { + SCCMightReturn = true; + } + + // Invoke instructions don't allow unwinding to continue, so we are + // only interested in call instructions. + if (CheckUnwind && !SCCMightUnwind) + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) + if (CallInst *CI = dyn_cast(I)) { + if (CI->doesNotThrow()) { + // This call cannot throw. + } else if (Function *Callee = CI->getCalledFunction()) { + CallGraphNode *CalleeNode = CG[Callee]; + // If the callee is outside our current SCC then we may + // throw because it might. + if (!SCCNodes.count(CalleeNode)) { + SCCMightUnwind = true; + break; + } + } else { + // Indirect call, it might throw. + SCCMightUnwind = true; + break; + } + } + if (SCCMightUnwind && SCCMightReturn) break; + } + } + } + + // If the SCC doesn't unwind or doesn't throw, note this fact. + if (!SCCMightUnwind || !SCCMightReturn) + for (unsigned i = 0, e = SCC.size(); i != e; ++i) { + Attributes NewAttributes = Attribute::None; + + if (!SCCMightUnwind) + NewAttributes |= Attribute::NoUnwind; + if (!SCCMightReturn) + NewAttributes |= Attribute::NoReturn; + + const AttrListPtr &PAL = SCC[i]->getFunction()->getAttributes(); + const AttrListPtr &NPAL = PAL.addAttr(~0, NewAttributes); + if (PAL != NPAL) { + MadeChange = true; + SCC[i]->getFunction()->setAttributes(NPAL); + } + } + + for (unsigned i = 0, e = SCC.size(); i != e; ++i) { + // Convert any invoke instructions to non-throwing functions in this node + // into call instructions with a branch. This makes the exception blocks + // dead. + if (Function *F = SCC[i]->getFunction()) + MadeChange |= SimplifyFunction(F); + } + + return MadeChange; +} + + +// SimplifyFunction - Given information about callees, simplify the specified +// function if we have invokes to non-unwinding functions or code after calls to +// no-return functions. +bool PruneEH::SimplifyFunction(Function *F) { + CallGraph &CG = getAnalysis(); + CallGraphNode *CGN = CG[F]; + + bool MadeChange = false; + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { + if (InvokeInst *II = dyn_cast(BB->getTerminator())) + if (II->doesNotThrow()) { + SmallVector Args(II->op_begin()+3, II->op_end()); + // Insert a call instruction before the invoke. + CallInst *Call = CallInst::Create(II->getCalledValue(), + Args.begin(), Args.end(), "", II); + Call->takeName(II); + Call->setCallingConv(II->getCallingConv()); + Call->setAttributes(II->getAttributes()); + + // Anything that used the value produced by the invoke instruction + // now uses the value produced by the call instruction. + II->replaceAllUsesWith(Call); + BasicBlock *UnwindBlock = II->getUnwindDest(); + UnwindBlock->removePredecessor(II->getParent()); + + // Fix up the call graph. + CGN->replaceCallSite(II, Call); + + // Insert a branch to the normal destination right before the + // invoke. + BranchInst::Create(II->getNormalDest(), II); + + // Finally, delete the invoke instruction! + BB->getInstList().pop_back(); + + // If the unwind block is now dead, nuke it. + if (pred_begin(UnwindBlock) == pred_end(UnwindBlock)) + DeleteBasicBlock(UnwindBlock); // Delete the new BB. + + ++NumRemoved; + MadeChange = true; + } + + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) + if (CallInst *CI = dyn_cast(I++)) + if (CI->doesNotReturn() && !isa(I)) { + // This call calls a function that cannot return. Insert an + // unreachable instruction after it and simplify the code. Do this + // by splitting the BB, adding the unreachable, then deleting the + // new BB. + BasicBlock *New = BB->splitBasicBlock(I); + + // Remove the uncond branch and add an unreachable. + BB->getInstList().pop_back(); + new UnreachableInst(BB); + + DeleteBasicBlock(New); // Delete the new BB. + MadeChange = true; + ++NumUnreach; + break; + } + } + + return MadeChange; +} + +/// DeleteBasicBlock - remove the specified basic block from the program, +/// updating the callgraph to reflect any now-obsolete edges due to calls that +/// exist in the BB. +void PruneEH::DeleteBasicBlock(BasicBlock *BB) { + assert(pred_begin(BB) == pred_end(BB) && "BB is not dead!"); + CallGraph &CG = getAnalysis(); + + CallGraphNode *CGN = CG[BB->getParent()]; + for (BasicBlock::iterator I = BB->end(), E = BB->begin(); I != E; ) { + --I; + if (CallInst *CI = dyn_cast(I)) { + if (!isa(I)) + CGN->removeCallEdgeFor(CI); + } else if (InvokeInst *II = dyn_cast(I)) + CGN->removeCallEdgeFor(II); + if (!I->use_empty()) + I->replaceAllUsesWith(UndefValue::get(I->getType())); + } + + // Get the list of successors of this block. + std::vector Succs(succ_begin(BB), succ_end(BB)); + + for (unsigned i = 0, e = Succs.size(); i != e; ++i) + Succs[i]->removePredecessor(BB); + + BB->eraseFromParent(); +} diff --git a/lib/Transforms/IPO/RaiseAllocations.cpp b/lib/Transforms/IPO/RaiseAllocations.cpp new file mode 100644 index 000000000000..a81bbdb3c53d --- /dev/null +++ b/lib/Transforms/IPO/RaiseAllocations.cpp @@ -0,0 +1,251 @@ +//===- RaiseAllocations.cpp - Convert @malloc & @free calls to insts ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the RaiseAllocations pass which convert malloc and free +// calls to malloc and free instructions. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "raiseallocs" +#include "llvm/Transforms/IPO.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Module.h" +#include "llvm/Instructions.h" +#include "llvm/Pass.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/Compiler.h" +#include "llvm/ADT/Statistic.h" +#include +using namespace llvm; + +STATISTIC(NumRaised, "Number of allocations raised"); + +namespace { + // RaiseAllocations - Turn @malloc and @free calls into the appropriate + // instruction. + // + class VISIBILITY_HIDDEN RaiseAllocations : public ModulePass { + Function *MallocFunc; // Functions in the module we are processing + Function *FreeFunc; // Initialized by doPassInitializationVirt + public: + static char ID; // Pass identification, replacement for typeid + RaiseAllocations() + : ModulePass(&ID), MallocFunc(0), FreeFunc(0) {} + + // doPassInitialization - For the raise allocations pass, this finds a + // declaration for malloc and free if they exist. + // + void doInitialization(Module &M); + + // run - This method does the actual work of converting instructions over. + // + bool runOnModule(Module &M); + }; +} // end anonymous namespace + +char RaiseAllocations::ID = 0; +static RegisterPass +X("raiseallocs", "Raise allocations from calls to instructions"); + +// createRaiseAllocationsPass - The interface to this file... +ModulePass *llvm::createRaiseAllocationsPass() { + return new RaiseAllocations(); +} + + +// If the module has a symbol table, they might be referring to the malloc and +// free functions. If this is the case, grab the method pointers that the +// module is using. +// +// Lookup @malloc and @free in the symbol table, for later use. If they don't +// exist, or are not external, we do not worry about converting calls to that +// function into the appropriate instruction. +// +void RaiseAllocations::doInitialization(Module &M) { + + // Get Malloc and free prototypes if they exist! + MallocFunc = M.getFunction("malloc"); + if (MallocFunc) { + const FunctionType* TyWeHave = MallocFunc->getFunctionType(); + + // Get the expected prototype for malloc + const FunctionType *Malloc1Type = + FunctionType::get(PointerType::getUnqual(Type::Int8Ty), + std::vector(1, Type::Int64Ty), false); + + // Chck to see if we got the expected malloc + if (TyWeHave != Malloc1Type) { + // Check to see if the prototype is wrong, giving us sbyte*(uint) * malloc + // This handles the common declaration of: 'void *malloc(unsigned);' + const FunctionType *Malloc2Type = + FunctionType::get(PointerType::getUnqual(Type::Int8Ty), + std::vector(1, Type::Int32Ty), false); + if (TyWeHave != Malloc2Type) { + // Check to see if the prototype is missing, giving us + // sbyte*(...) * malloc + // This handles the common declaration of: 'void *malloc();' + const FunctionType *Malloc3Type = + FunctionType::get(PointerType::getUnqual(Type::Int8Ty), + std::vector(), true); + if (TyWeHave != Malloc3Type) + // Give up + MallocFunc = 0; + } + } + } + + FreeFunc = M.getFunction("free"); + if (FreeFunc) { + const FunctionType* TyWeHave = FreeFunc->getFunctionType(); + + // Get the expected prototype for void free(i8*) + const FunctionType *Free1Type = FunctionType::get(Type::VoidTy, + std::vector(1, PointerType::getUnqual(Type::Int8Ty)), false); + + if (TyWeHave != Free1Type) { + // Check to see if the prototype was forgotten, giving us + // void (...) * free + // This handles the common forward declaration of: 'void free();' + const FunctionType* Free2Type = FunctionType::get(Type::VoidTy, + std::vector(),true); + + if (TyWeHave != Free2Type) { + // One last try, check to see if we can find free as + // int (...)* free. This handles the case where NOTHING was declared. + const FunctionType* Free3Type = FunctionType::get(Type::Int32Ty, + std::vector(),true); + + if (TyWeHave != Free3Type) { + // Give up. + FreeFunc = 0; + } + } + } + } + + // Don't mess with locally defined versions of these functions... + if (MallocFunc && !MallocFunc->isDeclaration()) MallocFunc = 0; + if (FreeFunc && !FreeFunc->isDeclaration()) FreeFunc = 0; +} + +// run - Transform calls into instructions... +// +bool RaiseAllocations::runOnModule(Module &M) { + // Find the malloc/free prototypes... + doInitialization(M); + + bool Changed = false; + + // First, process all of the malloc calls... + if (MallocFunc) { + std::vector Users(MallocFunc->use_begin(), MallocFunc->use_end()); + std::vector EqPointers; // Values equal to MallocFunc + while (!Users.empty()) { + User *U = Users.back(); + Users.pop_back(); + + if (Instruction *I = dyn_cast(U)) { + CallSite CS = CallSite::get(I); + if (CS.getInstruction() && !CS.arg_empty() && + (CS.getCalledFunction() == MallocFunc || + std::find(EqPointers.begin(), EqPointers.end(), + CS.getCalledValue()) != EqPointers.end())) { + + Value *Source = *CS.arg_begin(); + + // If no prototype was provided for malloc, we may need to cast the + // source size. + if (Source->getType() != Type::Int32Ty) + Source = + CastInst::CreateIntegerCast(Source, Type::Int32Ty, false/*ZExt*/, + "MallocAmtCast", I); + + MallocInst *MI = new MallocInst(Type::Int8Ty, Source, "", I); + MI->takeName(I); + I->replaceAllUsesWith(MI); + + // If the old instruction was an invoke, add an unconditional branch + // before the invoke, which will become the new terminator. + if (InvokeInst *II = dyn_cast(I)) + BranchInst::Create(II->getNormalDest(), I); + + // Delete the old call site + I->eraseFromParent(); + Changed = true; + ++NumRaised; + } + } else if (GlobalValue *GV = dyn_cast(U)) { + Users.insert(Users.end(), GV->use_begin(), GV->use_end()); + EqPointers.push_back(GV); + } else if (ConstantExpr *CE = dyn_cast(U)) { + if (CE->isCast()) { + Users.insert(Users.end(), CE->use_begin(), CE->use_end()); + EqPointers.push_back(CE); + } + } + } + } + + // Next, process all free calls... + if (FreeFunc) { + std::vector Users(FreeFunc->use_begin(), FreeFunc->use_end()); + std::vector EqPointers; // Values equal to FreeFunc + + while (!Users.empty()) { + User *U = Users.back(); + Users.pop_back(); + + if (Instruction *I = dyn_cast(U)) { + if (isa(I)) + continue; + CallSite CS = CallSite::get(I); + if (CS.getInstruction() && !CS.arg_empty() && + (CS.getCalledFunction() == FreeFunc || + std::find(EqPointers.begin(), EqPointers.end(), + CS.getCalledValue()) != EqPointers.end())) { + + // If no prototype was provided for free, we may need to cast the + // source pointer. This should be really uncommon, but it's necessary + // just in case we are dealing with weird code like this: + // free((long)ptr); + // + Value *Source = *CS.arg_begin(); + if (!isa(Source->getType())) + Source = new IntToPtrInst(Source, + PointerType::getUnqual(Type::Int8Ty), + "FreePtrCast", I); + new FreeInst(Source, I); + + // If the old instruction was an invoke, add an unconditional branch + // before the invoke, which will become the new terminator. + if (InvokeInst *II = dyn_cast(I)) + BranchInst::Create(II->getNormalDest(), I); + + // Delete the old call site + if (I->getType() != Type::VoidTy) + I->replaceAllUsesWith(UndefValue::get(I->getType())); + I->eraseFromParent(); + Changed = true; + ++NumRaised; + } + } else if (GlobalValue *GV = dyn_cast(U)) { + Users.insert(Users.end(), GV->use_begin(), GV->use_end()); + EqPointers.push_back(GV); + } else if (ConstantExpr *CE = dyn_cast(U)) { + if (CE->isCast()) { + Users.insert(Users.end(), CE->use_begin(), CE->use_end()); + EqPointers.push_back(CE); + } + } + } + } + + return Changed; +} diff --git a/lib/Transforms/IPO/StripDeadPrototypes.cpp b/lib/Transforms/IPO/StripDeadPrototypes.cpp new file mode 100644 index 000000000000..a94d78e276c5 --- /dev/null +++ b/lib/Transforms/IPO/StripDeadPrototypes.cpp @@ -0,0 +1,72 @@ +//===-- StripDeadPrototypes.cpp - Remove unused function declarations ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass loops over all of the functions in the input module, looking for +// dead declarations and removes them. Dead declarations are declarations of +// functions for which no implementation is available (i.e., declarations for +// unused library functions). +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "strip-dead-prototypes" +#include "llvm/Transforms/IPO.h" +#include "llvm/Pass.h" +#include "llvm/Module.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/Compiler.h" +using namespace llvm; + +STATISTIC(NumDeadPrototypes, "Number of dead prototypes removed"); + +namespace { + +/// @brief Pass to remove unused function declarations. +class VISIBILITY_HIDDEN StripDeadPrototypesPass : public ModulePass { +public: + static char ID; // Pass identification, replacement for typeid + StripDeadPrototypesPass() : ModulePass(&ID) { } + virtual bool runOnModule(Module &M); +}; + +} // end anonymous namespace + +char StripDeadPrototypesPass::ID = 0; +static RegisterPass +X("strip-dead-prototypes", "Strip Unused Function Prototypes"); + +bool StripDeadPrototypesPass::runOnModule(Module &M) { + bool MadeChange = false; + + // Erase dead function prototypes. + for (Module::iterator I = M.begin(), E = M.end(); I != E; ) { + Function *F = I++; + // Function must be a prototype and unused. + if (F->isDeclaration() && F->use_empty()) { + F->eraseFromParent(); + ++NumDeadPrototypes; + MadeChange = true; + } + } + + // Erase dead global var prototypes. + for (Module::global_iterator I = M.global_begin(), E = M.global_end(); + I != E; ) { + GlobalVariable *GV = I++; + // Global must be a prototype and unused. + if (GV->isDeclaration() && GV->use_empty()) + GV->eraseFromParent(); + } + + // Return an indication of whether we changed anything or not. + return MadeChange; +} + +ModulePass *llvm::createStripDeadPrototypesPass() { + return new StripDeadPrototypesPass(); +} diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp new file mode 100644 index 000000000000..ab8fe5f125e8 --- /dev/null +++ b/lib/Transforms/IPO/StripSymbols.cpp @@ -0,0 +1,415 @@ +//===- StripSymbols.cpp - Strip symbols and debug info from a module ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The StripSymbols transformation implements code stripping. Specifically, it +// can delete: +// +// * names for virtual registers +// * symbols for internal globals and functions +// * debug information +// +// Note that this transformation makes code much less readable, so it should +// only be used in situations where the 'strip' utility would be used, such as +// reducing code size or making it harder to reverse engineer code. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/IPO.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Instructions.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/ValueSymbolTable.h" +#include "llvm/TypeSymbolTable.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Support/Compiler.h" +#include "llvm/ADT/SmallPtrSet.h" +using namespace llvm; + +namespace { + class VISIBILITY_HIDDEN StripSymbols : public ModulePass { + bool OnlyDebugInfo; + public: + static char ID; // Pass identification, replacement for typeid + explicit StripSymbols(bool ODI = false) + : ModulePass(&ID), OnlyDebugInfo(ODI) {} + + virtual bool runOnModule(Module &M); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + } + }; + + class VISIBILITY_HIDDEN StripNonDebugSymbols : public ModulePass { + public: + static char ID; // Pass identification, replacement for typeid + explicit StripNonDebugSymbols() + : ModulePass(&ID) {} + + virtual bool runOnModule(Module &M); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + } + }; + + class VISIBILITY_HIDDEN StripDebugDeclare : public ModulePass { + public: + static char ID; // Pass identification, replacement for typeid + explicit StripDebugDeclare() + : ModulePass(&ID) {} + + virtual bool runOnModule(Module &M); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + } + }; +} + +char StripSymbols::ID = 0; +static RegisterPass +X("strip", "Strip all symbols from a module"); + +ModulePass *llvm::createStripSymbolsPass(bool OnlyDebugInfo) { + return new StripSymbols(OnlyDebugInfo); +} + +char StripNonDebugSymbols::ID = 0; +static RegisterPass +Y("strip-nondebug", "Strip all symbols, except dbg symbols, from a module"); + +ModulePass *llvm::createStripNonDebugSymbolsPass() { + return new StripNonDebugSymbols(); +} + +char StripDebugDeclare::ID = 0; +static RegisterPass +Z("strip-debug-declare", "Strip all llvm.dbg.declare intrinsics"); + +ModulePass *llvm::createStripDebugDeclarePass() { + return new StripDebugDeclare(); +} + +/// OnlyUsedBy - Return true if V is only used by Usr. +static bool OnlyUsedBy(Value *V, Value *Usr) { + for(Value::use_iterator I = V->use_begin(), E = V->use_end(); I != E; ++I) { + User *U = *I; + if (U != Usr) + return false; + } + return true; +} + +static void RemoveDeadConstant(Constant *C) { + assert(C->use_empty() && "Constant is not dead!"); + SmallPtrSet Operands; + for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) + if (isa(C->getOperand(i)->getType()) && + OnlyUsedBy(C->getOperand(i), C)) + Operands.insert(C->getOperand(i)); + if (GlobalVariable *GV = dyn_cast(C)) { + if (!GV->hasLocalLinkage()) return; // Don't delete non static globals. + GV->eraseFromParent(); + } + else if (!isa(C)) + if (isa(C->getType())) + C->destroyConstant(); + + // If the constant referenced anything, see if we can delete it as well. + for (SmallPtrSet::iterator OI = Operands.begin(), + OE = Operands.end(); OI != OE; ++OI) + RemoveDeadConstant(*OI); +} + +// Strip the symbol table of its names. +// +static void StripSymtab(ValueSymbolTable &ST, bool PreserveDbgInfo) { + for (ValueSymbolTable::iterator VI = ST.begin(), VE = ST.end(); VI != VE; ) { + Value *V = VI->getValue(); + ++VI; + if (!isa(V) || cast(V)->hasLocalLinkage()) { + if (!PreserveDbgInfo || strncmp(V->getNameStart(), "llvm.dbg", 8)) + // Set name to "", removing from symbol table! + V->setName(""); + } + } +} + +// Strip the symbol table of its names. +static void StripTypeSymtab(TypeSymbolTable &ST, bool PreserveDbgInfo) { + for (TypeSymbolTable::iterator TI = ST.begin(), E = ST.end(); TI != E; ) { + if (PreserveDbgInfo && strncmp(TI->first.c_str(), "llvm.dbg", 8) == 0) + ++TI; + else + ST.remove(TI++); + } +} + +/// Find values that are marked as llvm.used. +void findUsedValues(Module &M, + SmallPtrSet& llvmUsedValues) { + if (GlobalVariable *LLVMUsed = M.getGlobalVariable("llvm.used")) { + llvmUsedValues.insert(LLVMUsed); + // Collect values that are preserved as per explicit request. + // llvm.used is used to list these values. + if (ConstantArray *Inits = + dyn_cast(LLVMUsed->getInitializer())) { + for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i) { + if (GlobalValue *GV = dyn_cast(Inits->getOperand(i))) + llvmUsedValues.insert(GV); + else if (ConstantExpr *CE = + dyn_cast(Inits->getOperand(i))) + if (CE->getOpcode() == Instruction::BitCast) + if (GlobalValue *GV = dyn_cast(CE->getOperand(0))) + llvmUsedValues.insert(GV); + } + } + } +} + +/// StripSymbolNames - Strip symbol names. +bool StripSymbolNames(Module &M, bool PreserveDbgInfo) { + + SmallPtrSet llvmUsedValues; + findUsedValues(M, llvmUsedValues); + + for (Module::global_iterator I = M.global_begin(), E = M.global_end(); + I != E; ++I) { + if (I->hasLocalLinkage() && llvmUsedValues.count(I) == 0) + if (!PreserveDbgInfo || strncmp(I->getNameStart(), "llvm.dbg", 8)) + I->setName(""); // Internal symbols can't participate in linkage + } + + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { + if (I->hasLocalLinkage() && llvmUsedValues.count(I) == 0) + if (!PreserveDbgInfo || strncmp(I->getNameStart(), "llvm.dbg", 8)) + I->setName(""); // Internal symbols can't participate in linkage + StripSymtab(I->getValueSymbolTable(), PreserveDbgInfo); + } + + // Remove all names from types. + StripTypeSymtab(M.getTypeSymbolTable(), PreserveDbgInfo); + + return true; +} + +// StripDebugInfo - Strip debug info in the module if it exists. +// To do this, we remove llvm.dbg.func.start, llvm.dbg.stoppoint, and +// llvm.dbg.region.end calls, and any globals they point to if now dead. +bool StripDebugInfo(Module &M) { + + SmallPtrSet llvmUsedValues; + findUsedValues(M, llvmUsedValues); + + // Delete all dbg variables. + for (Module::global_iterator I = M.global_begin(), E = M.global_end(); + I != E; ++I) { + GlobalVariable *GV = dyn_cast(I); + if (!GV) continue; + if (!GV->use_empty() && llvmUsedValues.count(I) == 0) { + if (strncmp(GV->getNameStart(), "llvm.dbg", 8) == 0) { + GV->replaceAllUsesWith(UndefValue::get(GV->getType())); + } + } + } + + Function *FuncStart = M.getFunction("llvm.dbg.func.start"); + Function *StopPoint = M.getFunction("llvm.dbg.stoppoint"); + Function *RegionStart = M.getFunction("llvm.dbg.region.start"); + Function *RegionEnd = M.getFunction("llvm.dbg.region.end"); + Function *Declare = M.getFunction("llvm.dbg.declare"); + + std::vector DeadConstants; + + // Remove all of the calls to the debugger intrinsics, and remove them from + // the module. + if (FuncStart) { + while (!FuncStart->use_empty()) { + CallInst *CI = cast(FuncStart->use_back()); + Value *Arg = CI->getOperand(1); + assert(CI->use_empty() && "llvm.dbg intrinsic should have void result"); + CI->eraseFromParent(); + if (Arg->use_empty()) + if (Constant *C = dyn_cast(Arg)) + DeadConstants.push_back(C); + } + FuncStart->eraseFromParent(); + } + if (StopPoint) { + while (!StopPoint->use_empty()) { + CallInst *CI = cast(StopPoint->use_back()); + Value *Arg = CI->getOperand(3); + assert(CI->use_empty() && "llvm.dbg intrinsic should have void result"); + CI->eraseFromParent(); + if (Arg->use_empty()) + if (Constant *C = dyn_cast(Arg)) + DeadConstants.push_back(C); + } + StopPoint->eraseFromParent(); + } + if (RegionStart) { + while (!RegionStart->use_empty()) { + CallInst *CI = cast(RegionStart->use_back()); + Value *Arg = CI->getOperand(1); + assert(CI->use_empty() && "llvm.dbg intrinsic should have void result"); + CI->eraseFromParent(); + if (Arg->use_empty()) + if (Constant *C = dyn_cast(Arg)) + DeadConstants.push_back(C); + } + RegionStart->eraseFromParent(); + } + if (RegionEnd) { + while (!RegionEnd->use_empty()) { + CallInst *CI = cast(RegionEnd->use_back()); + Value *Arg = CI->getOperand(1); + assert(CI->use_empty() && "llvm.dbg intrinsic should have void result"); + CI->eraseFromParent(); + if (Arg->use_empty()) + if (Constant *C = dyn_cast(Arg)) + DeadConstants.push_back(C); + } + RegionEnd->eraseFromParent(); + } + if (Declare) { + while (!Declare->use_empty()) { + CallInst *CI = cast(Declare->use_back()); + Value *Arg1 = CI->getOperand(1); + Value *Arg2 = CI->getOperand(2); + assert(CI->use_empty() && "llvm.dbg intrinsic should have void result"); + CI->eraseFromParent(); + if (Arg1->use_empty()) { + if (Constant *C = dyn_cast(Arg1)) + DeadConstants.push_back(C); + else + RecursivelyDeleteTriviallyDeadInstructions(Arg1); + } + if (Arg2->use_empty()) + if (Constant *C = dyn_cast(Arg2)) + DeadConstants.push_back(C); + } + Declare->eraseFromParent(); + } + + // llvm.dbg.compile_units and llvm.dbg.subprograms are marked as linkonce + // but since we are removing all debug information, make them internal now. + // FIXME: Use private linkage maybe? + if (Constant *C = M.getNamedGlobal("llvm.dbg.compile_units")) + if (GlobalVariable *GV = dyn_cast(C)) + GV->setLinkage(GlobalValue::InternalLinkage); + + if (Constant *C = M.getNamedGlobal("llvm.dbg.subprograms")) + if (GlobalVariable *GV = dyn_cast(C)) + GV->setLinkage(GlobalValue::InternalLinkage); + + if (Constant *C = M.getNamedGlobal("llvm.dbg.global_variables")) + if (GlobalVariable *GV = dyn_cast(C)) + GV->setLinkage(GlobalValue::InternalLinkage); + + // Delete all dbg variables. + for (Module::global_iterator I = M.global_begin(), E = M.global_end(); + I != E; ++I) { + GlobalVariable *GV = dyn_cast(I); + if (!GV) continue; + if (GV->use_empty() && llvmUsedValues.count(I) == 0 + && (!GV->hasSection() + || strcmp(GV->getSection().c_str(), "llvm.metadata") == 0)) + DeadConstants.push_back(GV); + } + + if (DeadConstants.empty()) + return false; + + // Delete any internal globals that were only used by the debugger intrinsics. + while (!DeadConstants.empty()) { + Constant *C = DeadConstants.back(); + DeadConstants.pop_back(); + if (GlobalVariable *GV = dyn_cast(C)) { + if (GV->hasLocalLinkage()) + RemoveDeadConstant(GV); + } + else + RemoveDeadConstant(C); + } + + // Remove all llvm.dbg types. + TypeSymbolTable &ST = M.getTypeSymbolTable(); + for (TypeSymbolTable::iterator TI = ST.begin(), TE = ST.end(); TI != TE; ) { + if (!strncmp(TI->first.c_str(), "llvm.dbg.", 9)) + ST.remove(TI++); + else + ++TI; + } + + return true; +} + +bool StripSymbols::runOnModule(Module &M) { + bool Changed = false; + Changed |= StripDebugInfo(M); + if (!OnlyDebugInfo) + Changed |= StripSymbolNames(M, false); + return Changed; +} + +bool StripNonDebugSymbols::runOnModule(Module &M) { + return StripSymbolNames(M, true); +} + +bool StripDebugDeclare::runOnModule(Module &M) { + + Function *Declare = M.getFunction("llvm.dbg.declare"); + std::vector DeadConstants; + + if (Declare) { + while (!Declare->use_empty()) { + CallInst *CI = cast(Declare->use_back()); + Value *Arg1 = CI->getOperand(1); + Value *Arg2 = CI->getOperand(2); + assert(CI->use_empty() && "llvm.dbg intrinsic should have void result"); + CI->eraseFromParent(); + if (Arg1->use_empty()) { + if (Constant *C = dyn_cast(Arg1)) + DeadConstants.push_back(C); + else + RecursivelyDeleteTriviallyDeadInstructions(Arg1); + } + if (Arg2->use_empty()) + if (Constant *C = dyn_cast(Arg2)) + DeadConstants.push_back(C); + } + Declare->eraseFromParent(); + } + + // Delete all llvm.dbg.global_variables. + for (Module::global_iterator I = M.global_begin(), E = M.global_end(); + I != E; ++I) { + GlobalVariable *GV = dyn_cast(I); + if (!GV) continue; + if (GV->use_empty() && GV->hasName() + && strncmp(GV->getNameStart(), "llvm.dbg.global_variable", 24) == 0) + DeadConstants.push_back(GV); + } + + while (!DeadConstants.empty()) { + Constant *C = DeadConstants.back(); + DeadConstants.pop_back(); + if (GlobalVariable *GV = dyn_cast(C)) { + if (GV->hasLocalLinkage()) + RemoveDeadConstant(GV); + } + else + RemoveDeadConstant(C); + } + + return true; +} diff --git a/lib/Transforms/IPO/StructRetPromotion.cpp b/lib/Transforms/IPO/StructRetPromotion.cpp new file mode 100644 index 000000000000..9f54388aa45e --- /dev/null +++ b/lib/Transforms/IPO/StructRetPromotion.cpp @@ -0,0 +1,351 @@ +//===-- StructRetPromotion.cpp - Promote sret arguments ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass finds functions that return a struct (using a pointer to the struct +// as the first argument of the function, marked with the 'sret' attribute) and +// replaces them with a new function that simply returns each of the elements of +// that struct (using multiple return values). +// +// This pass works under a number of conditions: +// 1. The returned struct must not contain other structs +// 2. The returned struct must only be used to load values from +// 3. The placeholder struct passed in is the result of an alloca +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "sretpromotion" +#include "llvm/Transforms/IPO.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Module.h" +#include "llvm/CallGraphSCCPass.h" +#include "llvm/Instructions.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/Compiler.h" +using namespace llvm; + +STATISTIC(NumRejectedSRETUses , "Number of sret rejected due to unexpected uses"); +STATISTIC(NumSRET , "Number of sret promoted"); +namespace { + /// SRETPromotion - This pass removes sret parameter and updates + /// function to use multiple return value. + /// + struct VISIBILITY_HIDDEN SRETPromotion : public CallGraphSCCPass { + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + CallGraphSCCPass::getAnalysisUsage(AU); + } + + virtual bool runOnSCC(const std::vector &SCC); + static char ID; // Pass identification, replacement for typeid + SRETPromotion() : CallGraphSCCPass(&ID) {} + + private: + bool PromoteReturn(CallGraphNode *CGN); + bool isSafeToUpdateAllCallers(Function *F); + Function *cloneFunctionBody(Function *F, const StructType *STy); + void updateCallSites(Function *F, Function *NF); + bool nestedStructType(const StructType *STy); + }; +} + +char SRETPromotion::ID = 0; +static RegisterPass +X("sretpromotion", "Promote sret arguments to multiple ret values"); + +Pass *llvm::createStructRetPromotionPass() { + return new SRETPromotion(); +} + +bool SRETPromotion::runOnSCC(const std::vector &SCC) { + bool Changed = false; + + for (unsigned i = 0, e = SCC.size(); i != e; ++i) + Changed |= PromoteReturn(SCC[i]); + + return Changed; +} + +/// PromoteReturn - This method promotes function that uses StructRet paramater +/// into a function that uses mulitple return value. +bool SRETPromotion::PromoteReturn(CallGraphNode *CGN) { + Function *F = CGN->getFunction(); + + if (!F || F->isDeclaration() || !F->hasLocalLinkage()) + return false; + + // Make sure that function returns struct. + if (F->arg_size() == 0 || !F->hasStructRetAttr() || F->doesNotReturn()) + return false; + + DOUT << "SretPromotion: Looking at sret function " << F->getNameStart() << "\n"; + + assert (F->getReturnType() == Type::VoidTy && "Invalid function return type"); + Function::arg_iterator AI = F->arg_begin(); + const llvm::PointerType *FArgType = dyn_cast(AI->getType()); + assert (FArgType && "Invalid sret parameter type"); + const llvm::StructType *STy = + dyn_cast(FArgType->getElementType()); + assert (STy && "Invalid sret parameter element type"); + + // Check if it is ok to perform this promotion. + if (isSafeToUpdateAllCallers(F) == false) { + DOUT << "SretPromotion: Not all callers can be updated\n"; + NumRejectedSRETUses++; + return false; + } + + DOUT << "SretPromotion: sret argument will be promoted\n"; + NumSRET++; + // [1] Replace use of sret parameter + AllocaInst *TheAlloca = new AllocaInst (STy, NULL, "mrv", + F->getEntryBlock().begin()); + Value *NFirstArg = F->arg_begin(); + NFirstArg->replaceAllUsesWith(TheAlloca); + + // [2] Find and replace ret instructions + for (Function::iterator FI = F->begin(), FE = F->end(); FI != FE; ++FI) + for(BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; ) { + Instruction *I = BI; + ++BI; + if (isa(I)) { + Value *NV = new LoadInst(TheAlloca, "mrv.ld", I); + ReturnInst *NR = ReturnInst::Create(NV, I); + I->replaceAllUsesWith(NR); + I->eraseFromParent(); + } + } + + // [3] Create the new function body and insert it into the module. + Function *NF = cloneFunctionBody(F, STy); + + // [4] Update all call sites to use new function + updateCallSites(F, NF); + + F->eraseFromParent(); + getAnalysis().changeFunction(F, NF); + return true; +} + +// Check if it is ok to perform this promotion. +bool SRETPromotion::isSafeToUpdateAllCallers(Function *F) { + + if (F->use_empty()) + // No users. OK to modify signature. + return true; + + for (Value::use_iterator FnUseI = F->use_begin(), FnUseE = F->use_end(); + FnUseI != FnUseE; ++FnUseI) { + // The function is passed in as an argument to (possibly) another function, + // we can't change it! + CallSite CS = CallSite::get(*FnUseI); + Instruction *Call = CS.getInstruction(); + // The function is used by something else than a call or invoke instruction, + // we can't change it! + if (!Call || !CS.isCallee(FnUseI)) + return false; + CallSite::arg_iterator AI = CS.arg_begin(); + Value *FirstArg = *AI; + + if (!isa(FirstArg)) + return false; + + // Check FirstArg's users. + for (Value::use_iterator ArgI = FirstArg->use_begin(), + ArgE = FirstArg->use_end(); ArgI != ArgE; ++ArgI) { + + // If FirstArg user is a CallInst that does not correspond to current + // call site then this function F is not suitable for sret promotion. + if (CallInst *CI = dyn_cast(ArgI)) { + if (CI != Call) + return false; + } + // If FirstArg user is a GEP whose all users are not LoadInst then + // this function F is not suitable for sret promotion. + else if (GetElementPtrInst *GEP = dyn_cast(ArgI)) { + // TODO : Use dom info and insert PHINodes to collect get results + // from multiple call sites for this GEP. + if (GEP->getParent() != Call->getParent()) + return false; + for (Value::use_iterator GEPI = GEP->use_begin(), GEPE = GEP->use_end(); + GEPI != GEPE; ++GEPI) + if (!isa(GEPI)) + return false; + } + // Any other FirstArg users make this function unsuitable for sret + // promotion. + else + return false; + } + } + + return true; +} + +/// cloneFunctionBody - Create a new function based on F and +/// insert it into module. Remove first argument. Use STy as +/// the return type for new function. +Function *SRETPromotion::cloneFunctionBody(Function *F, + const StructType *STy) { + + const FunctionType *FTy = F->getFunctionType(); + std::vector Params; + + // Attributes - Keep track of the parameter attributes for the arguments. + SmallVector AttributesVec; + const AttrListPtr &PAL = F->getAttributes(); + + // Add any return attributes. + if (Attributes attrs = PAL.getRetAttributes()) + AttributesVec.push_back(AttributeWithIndex::get(0, attrs)); + + // Skip first argument. + Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); + ++I; + // 0th parameter attribute is reserved for return type. + // 1th parameter attribute is for first 1st sret argument. + unsigned ParamIndex = 2; + while (I != E) { + Params.push_back(I->getType()); + if (Attributes Attrs = PAL.getParamAttributes(ParamIndex)) + AttributesVec.push_back(AttributeWithIndex::get(ParamIndex - 1, Attrs)); + ++I; + ++ParamIndex; + } + + // Add any fn attributes. + if (Attributes attrs = PAL.getFnAttributes()) + AttributesVec.push_back(AttributeWithIndex::get(~0, attrs)); + + + FunctionType *NFTy = FunctionType::get(STy, Params, FTy->isVarArg()); + Function *NF = Function::Create(NFTy, F->getLinkage()); + NF->takeName(F); + NF->copyAttributesFrom(F); + NF->setAttributes(AttrListPtr::get(AttributesVec.begin(), AttributesVec.end())); + F->getParent()->getFunctionList().insert(F, NF); + NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList()); + + // Replace arguments + I = F->arg_begin(); + E = F->arg_end(); + Function::arg_iterator NI = NF->arg_begin(); + ++I; + while (I != E) { + I->replaceAllUsesWith(NI); + NI->takeName(I); + ++I; + ++NI; + } + + return NF; +} + +/// updateCallSites - Update all sites that call F to use NF. +void SRETPromotion::updateCallSites(Function *F, Function *NF) { + CallGraph &CG = getAnalysis(); + SmallVector Args; + + // Attributes - Keep track of the parameter attributes for the arguments. + SmallVector ArgAttrsVec; + + while (!F->use_empty()) { + CallSite CS = CallSite::get(*F->use_begin()); + Instruction *Call = CS.getInstruction(); + + const AttrListPtr &PAL = F->getAttributes(); + // Add any return attributes. + if (Attributes attrs = PAL.getRetAttributes()) + ArgAttrsVec.push_back(AttributeWithIndex::get(0, attrs)); + + // Copy arguments, however skip first one. + CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end(); + Value *FirstCArg = *AI; + ++AI; + // 0th parameter attribute is reserved for return type. + // 1th parameter attribute is for first 1st sret argument. + unsigned ParamIndex = 2; + while (AI != AE) { + Args.push_back(*AI); + if (Attributes Attrs = PAL.getParamAttributes(ParamIndex)) + ArgAttrsVec.push_back(AttributeWithIndex::get(ParamIndex - 1, Attrs)); + ++ParamIndex; + ++AI; + } + + // Add any function attributes. + if (Attributes attrs = PAL.getFnAttributes()) + ArgAttrsVec.push_back(AttributeWithIndex::get(~0, attrs)); + + AttrListPtr NewPAL = AttrListPtr::get(ArgAttrsVec.begin(), ArgAttrsVec.end()); + + // Build new call instruction. + Instruction *New; + if (InvokeInst *II = dyn_cast(Call)) { + New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(), + Args.begin(), Args.end(), "", Call); + cast(New)->setCallingConv(CS.getCallingConv()); + cast(New)->setAttributes(NewPAL); + } else { + New = CallInst::Create(NF, Args.begin(), Args.end(), "", Call); + cast(New)->setCallingConv(CS.getCallingConv()); + cast(New)->setAttributes(NewPAL); + if (cast(Call)->isTailCall()) + cast(New)->setTailCall(); + } + Args.clear(); + ArgAttrsVec.clear(); + New->takeName(Call); + + // Update the callgraph to know that the callsite has been transformed. + CG[Call->getParent()->getParent()]->replaceCallSite(Call, New); + + // Update all users of sret parameter to extract value using extractvalue. + for (Value::use_iterator UI = FirstCArg->use_begin(), + UE = FirstCArg->use_end(); UI != UE; ) { + User *U2 = *UI++; + CallInst *C2 = dyn_cast(U2); + if (C2 && (C2 == Call)) + continue; + else if (GetElementPtrInst *UGEP = dyn_cast(U2)) { + ConstantInt *Idx = dyn_cast(UGEP->getOperand(2)); + assert (Idx && "Unexpected getelementptr index!"); + Value *GR = ExtractValueInst::Create(New, Idx->getZExtValue(), + "evi", UGEP); + while(!UGEP->use_empty()) { + // isSafeToUpdateAllCallers has checked that all GEP uses are + // LoadInsts + LoadInst *L = cast(*UGEP->use_begin()); + L->replaceAllUsesWith(GR); + L->eraseFromParent(); + } + UGEP->eraseFromParent(); + } + else assert( 0 && "Unexpected sret parameter use"); + } + Call->eraseFromParent(); + } +} + +/// nestedStructType - Return true if STy includes any +/// other aggregate types +bool SRETPromotion::nestedStructType(const StructType *STy) { + unsigned Num = STy->getNumElements(); + for (unsigned i = 0; i < Num; i++) { + const Type *Ty = STy->getElementType(i); + if (!Ty->isSingleValueType() && Ty != Type::VoidTy) + return true; + } + return false; +} diff --git a/lib/Transforms/Instrumentation/BlockProfiling.cpp b/lib/Transforms/Instrumentation/BlockProfiling.cpp new file mode 100644 index 000000000000..2bd9809a3961 --- /dev/null +++ b/lib/Transforms/Instrumentation/BlockProfiling.cpp @@ -0,0 +1,126 @@ +//===- BlockProfiling.cpp - Insert counters for block profiling -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass instruments the specified program with counters for basic block or +// function profiling. This is the most basic form of profiling, which can tell +// which blocks are hot, but cannot reliably detect hot paths through the CFG. +// Block profiling counts the number of times each basic block executes, and +// function profiling counts the number of times each function is called. +// +// Note that this implementation is very naive. Control equivalent regions of +// the CFG should not require duplicate counters, but we do put duplicate +// counters in. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Streams.h" +#include "llvm/Transforms/Instrumentation.h" +#include "RSProfiling.h" +#include "ProfilingUtils.h" +using namespace llvm; + +namespace { + class VISIBILITY_HIDDEN FunctionProfiler : public RSProfilers_std { + public: + static char ID; + bool runOnModule(Module &M); + }; +} + +char FunctionProfiler::ID = 0; + +static RegisterPass +X("insert-function-profiling", + "Insert instrumentation for function profiling"); +static RegisterAnalysisGroup XG(X); + +ModulePass *llvm::createFunctionProfilerPass() { + return new FunctionProfiler(); +} + +bool FunctionProfiler::runOnModule(Module &M) { + Function *Main = M.getFunction("main"); + if (Main == 0) { + cerr << "WARNING: cannot insert function profiling into a module" + << " with no main function!\n"; + return false; // No main, no instrumentation! + } + + unsigned NumFunctions = 0; + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) + if (!I->isDeclaration()) + ++NumFunctions; + + const Type *ATy = ArrayType::get(Type::Int32Ty, NumFunctions); + GlobalVariable *Counters = + new GlobalVariable(ATy, false, GlobalValue::InternalLinkage, + Constant::getNullValue(ATy), "FuncProfCounters", &M); + + // Instrument all of the functions... + unsigned i = 0; + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) + if (!I->isDeclaration()) + // Insert counter at the start of the function + IncrementCounterInBlock(&I->getEntryBlock(), i++, Counters); + + // Add the initialization call to main. + InsertProfilingInitCall(Main, "llvm_start_func_profiling", Counters); + return true; +} + + +namespace { + class BlockProfiler : public RSProfilers_std { + bool runOnModule(Module &M); + public: + static char ID; + }; +} + +char BlockProfiler::ID = 0; +static RegisterPass +Y("insert-block-profiling", "Insert instrumentation for block profiling"); +static RegisterAnalysisGroup YG(Y); + +ModulePass *llvm::createBlockProfilerPass() { return new BlockProfiler(); } + +bool BlockProfiler::runOnModule(Module &M) { + Function *Main = M.getFunction("main"); + if (Main == 0) { + cerr << "WARNING: cannot insert block profiling into a module" + << " with no main function!\n"; + return false; // No main, no instrumentation! + } + + unsigned NumBlocks = 0; + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) + NumBlocks += I->size(); + + const Type *ATy = ArrayType::get(Type::Int32Ty, NumBlocks); + GlobalVariable *Counters = + new GlobalVariable(ATy, false, GlobalValue::InternalLinkage, + Constant::getNullValue(ATy), "BlockProfCounters", &M); + + // Instrument all of the blocks... + unsigned i = 0; + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) + for (Function::iterator BB = I->begin(), E = I->end(); BB != E; ++BB) + // Insert counter at the start of the block + IncrementCounterInBlock(BB, i++, Counters); + + // Add the initialization call to main. + InsertProfilingInitCall(Main, "llvm_start_block_profiling", Counters); + return true; +} + diff --git a/lib/Transforms/Instrumentation/CMakeLists.txt b/lib/Transforms/Instrumentation/CMakeLists.txt new file mode 100644 index 000000000000..d7c518d282f8 --- /dev/null +++ b/lib/Transforms/Instrumentation/CMakeLists.txt @@ -0,0 +1,6 @@ +add_llvm_library(LLVMInstrumentation + BlockProfiling.cpp + EdgeProfiling.cpp + ProfilingUtils.cpp + RSProfiling.cpp + ) diff --git a/lib/Transforms/Instrumentation/EdgeProfiling.cpp b/lib/Transforms/Instrumentation/EdgeProfiling.cpp new file mode 100644 index 000000000000..0831f3b7a480 --- /dev/null +++ b/lib/Transforms/Instrumentation/EdgeProfiling.cpp @@ -0,0 +1,101 @@ +//===- EdgeProfiling.cpp - Insert counters for edge profiling -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass instruments the specified program with counters for edge profiling. +// Edge profiling can give a reasonable approximation of the hot paths through a +// program, and is used for a wide variety of program transformations. +// +// Note that this implementation is very naive. We insert a counter for *every* +// edge in the program, instead of using control flow information to prune the +// number of counters inserted. +// +//===----------------------------------------------------------------------===// + +#include "ProfilingUtils.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Streams.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Instrumentation.h" +#include +using namespace llvm; + +namespace { + class VISIBILITY_HIDDEN EdgeProfiler : public ModulePass { + bool runOnModule(Module &M); + public: + static char ID; // Pass identification, replacement for typeid + EdgeProfiler() : ModulePass(&ID) {} + }; +} + +char EdgeProfiler::ID = 0; +static RegisterPass +X("insert-edge-profiling", "Insert instrumentation for edge profiling"); + +ModulePass *llvm::createEdgeProfilerPass() { return new EdgeProfiler(); } + +bool EdgeProfiler::runOnModule(Module &M) { + Function *Main = M.getFunction("main"); + if (Main == 0) { + cerr << "WARNING: cannot insert edge profiling into a module" + << " with no main function!\n"; + return false; // No main, no instrumentation! + } + + std::set BlocksToInstrument; + unsigned NumEdges = 0; + for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { + // Keep track of which blocks need to be instrumented. We don't want to + // instrument blocks that are added as the result of breaking critical + // edges! + BlocksToInstrument.insert(BB); + NumEdges += BB->getTerminator()->getNumSuccessors(); + } + + const Type *ATy = ArrayType::get(Type::Int32Ty, NumEdges); + GlobalVariable *Counters = + new GlobalVariable(ATy, false, GlobalValue::InternalLinkage, + Constant::getNullValue(ATy), "EdgeProfCounters", &M); + + // Instrument all of the edges... + unsigned i = 0; + for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) + if (BlocksToInstrument.count(BB)) { // Don't instrument inserted blocks + // Okay, we have to add a counter of each outgoing edge. If the + // outgoing edge is not critical don't split it, just insert the counter + // in the source or destination of the edge. + TerminatorInst *TI = BB->getTerminator(); + for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; ++s) { + // If the edge is critical, split it. + SplitCriticalEdge(TI, s, this); + + // Okay, we are guaranteed that the edge is no longer critical. If we + // only have a single successor, insert the counter in this block, + // otherwise insert it in the successor block. + if (TI->getNumSuccessors() == 1) { + // Insert counter at the start of the block + IncrementCounterInBlock(BB, i++, Counters); + } else { + // Insert counter at the start of the block + IncrementCounterInBlock(TI->getSuccessor(s), i++, Counters); + } + } + } + + // Add the initialization call to main. + InsertProfilingInitCall(Main, "llvm_start_edge_profiling", Counters); + return true; +} + diff --git a/lib/Transforms/Instrumentation/Makefile b/lib/Transforms/Instrumentation/Makefile new file mode 100644 index 000000000000..6cbc7a9cd88a --- /dev/null +++ b/lib/Transforms/Instrumentation/Makefile @@ -0,0 +1,15 @@ +##===- lib/Transforms/Instrumentation/Makefile -------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../.. +LIBRARYNAME = LLVMInstrumentation +BUILD_ARCHIVE = 1 + +include $(LEVEL)/Makefile.common + diff --git a/lib/Transforms/Instrumentation/ProfilingUtils.cpp b/lib/Transforms/Instrumentation/ProfilingUtils.cpp new file mode 100644 index 000000000000..48071f115692 --- /dev/null +++ b/lib/Transforms/Instrumentation/ProfilingUtils.cpp @@ -0,0 +1,120 @@ +//===- ProfilingUtils.cpp - Helper functions shared by profilers ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a few helper functions which are used by profile +// instrumentation code to instrument the code. This allows the profiler pass +// to worry about *what* to insert, and these functions take care of *how* to do +// it. +// +//===----------------------------------------------------------------------===// + +#include "ProfilingUtils.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Instructions.h" +#include "llvm/Module.h" + +void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName, + GlobalValue *Array) { + const Type *ArgVTy = + PointerType::getUnqual(PointerType::getUnqual(Type::Int8Ty)); + const PointerType *UIntPtr = PointerType::getUnqual(Type::Int32Ty); + Module &M = *MainFn->getParent(); + Constant *InitFn = M.getOrInsertFunction(FnName, Type::Int32Ty, Type::Int32Ty, + ArgVTy, UIntPtr, Type::Int32Ty, + (Type *)0); + + // This could force argc and argv into programs that wouldn't otherwise have + // them, but instead we just pass null values in. + std::vector Args(4); + Args[0] = Constant::getNullValue(Type::Int32Ty); + Args[1] = Constant::getNullValue(ArgVTy); + + // Skip over any allocas in the entry block. + BasicBlock *Entry = MainFn->begin(); + BasicBlock::iterator InsertPos = Entry->begin(); + while (isa(InsertPos)) ++InsertPos; + + std::vector GEPIndices(2, Constant::getNullValue(Type::Int32Ty)); + unsigned NumElements = 0; + if (Array) { + Args[2] = ConstantExpr::getGetElementPtr(Array, &GEPIndices[0], + GEPIndices.size()); + NumElements = + cast(Array->getType()->getElementType())->getNumElements(); + } else { + // If this profiling instrumentation doesn't have a constant array, just + // pass null. + Args[2] = ConstantPointerNull::get(UIntPtr); + } + Args[3] = ConstantInt::get(Type::Int32Ty, NumElements); + + Instruction *InitCall = CallInst::Create(InitFn, Args.begin(), Args.end(), + "newargc", InsertPos); + + // If argc or argv are not available in main, just pass null values in. + Function::arg_iterator AI; + switch (MainFn->arg_size()) { + default: + case 2: + AI = MainFn->arg_begin(); ++AI; + if (AI->getType() != ArgVTy) { + Instruction::CastOps opcode = CastInst::getCastOpcode(AI, false, ArgVTy, + false); + InitCall->setOperand(2, + CastInst::Create(opcode, AI, ArgVTy, "argv.cast", InitCall)); + } else { + InitCall->setOperand(2, AI); + } + /* FALL THROUGH */ + + case 1: + AI = MainFn->arg_begin(); + // If the program looked at argc, have it look at the return value of the + // init call instead. + if (AI->getType() != Type::Int32Ty) { + Instruction::CastOps opcode; + if (!AI->use_empty()) { + opcode = CastInst::getCastOpcode(InitCall, true, AI->getType(), true); + AI->replaceAllUsesWith( + CastInst::Create(opcode, InitCall, AI->getType(), "", InsertPos)); + } + opcode = CastInst::getCastOpcode(AI, true, Type::Int32Ty, true); + InitCall->setOperand(1, + CastInst::Create(opcode, AI, Type::Int32Ty, "argc.cast", InitCall)); + } else { + AI->replaceAllUsesWith(InitCall); + InitCall->setOperand(1, AI); + } + + case 0: break; + } +} + +void llvm::IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum, + GlobalValue *CounterArray) { + // Insert the increment after any alloca or PHI instructions... + BasicBlock::iterator InsertPos = BB->getFirstNonPHI(); + while (isa(InsertPos)) + ++InsertPos; + + // Create the getelementptr constant expression + std::vector Indices(2); + Indices[0] = Constant::getNullValue(Type::Int32Ty); + Indices[1] = ConstantInt::get(Type::Int32Ty, CounterNum); + Constant *ElementPtr = + ConstantExpr::getGetElementPtr(CounterArray, &Indices[0], Indices.size()); + + // Load, increment and store the value back. + Value *OldVal = new LoadInst(ElementPtr, "OldFuncCounter", InsertPos); + Value *NewVal = BinaryOperator::Create(Instruction::Add, OldVal, + ConstantInt::get(Type::Int32Ty, 1), + "NewFuncCounter", InsertPos); + new StoreInst(NewVal, ElementPtr, InsertPos); +} diff --git a/lib/Transforms/Instrumentation/ProfilingUtils.h b/lib/Transforms/Instrumentation/ProfilingUtils.h new file mode 100644 index 000000000000..94efffec8a3d --- /dev/null +++ b/lib/Transforms/Instrumentation/ProfilingUtils.h @@ -0,0 +1,31 @@ +//===- ProfilingUtils.h - Helper functions shared by profilers --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines a few helper functions which are used by profile +// instrumentation code to instrument the code. This allows the profiler pass +// to worry about *what* to insert, and these functions take care of *how* to do +// it. +// +//===----------------------------------------------------------------------===// + +#ifndef PROFILINGUTILS_H +#define PROFILINGUTILS_H + +namespace llvm { + class Function; + class GlobalValue; + class BasicBlock; + + void InsertProfilingInitCall(Function *MainFn, const char *FnName, + GlobalValue *Arr = 0); + void IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum, + GlobalValue *CounterArray); +} + +#endif diff --git a/lib/Transforms/Instrumentation/RSProfiling.cpp b/lib/Transforms/Instrumentation/RSProfiling.cpp new file mode 100644 index 000000000000..c6cf4dfd6ebf --- /dev/null +++ b/lib/Transforms/Instrumentation/RSProfiling.cpp @@ -0,0 +1,653 @@ +//===- RSProfiling.cpp - Various profiling using random sampling ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// These passes implement a random sampling based profiling. Different methods +// of choosing when to sample are supported, as well as different types of +// profiling. This is done as two passes. The first is a sequence of profiling +// passes which insert profiling into the program, and remember what they +// inserted. +// +// The second stage duplicates all instructions in a function, ignoring the +// profiling code, then connects the two versions togeather at the entry and at +// backedges. At each connection point a choice is made as to whether to jump +// to the profiled code (take a sample) or execute the unprofiled code. +// +// It is highly recommended that after this pass one runs mem2reg and adce +// (instcombine load-vn gdce dse also are good to run afterwards) +// +// This design is intended to make the profiling passes independent of the RS +// framework, but any profiling pass that implements the RSProfiling interface +// is compatible with the rs framework (and thus can be sampled) +// +// TODO: obviously the block and function profiling are almost identical to the +// existing ones, so they can be unified (esp since these passes are valid +// without the rs framework). +// TODO: Fix choice code so that frequency is not hard coded +// +//===----------------------------------------------------------------------===// + +#include "llvm/Pass.h" +#include "llvm/Module.h" +#include "llvm/Instructions.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Intrinsics.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Transforms/Instrumentation.h" +#include "RSProfiling.h" +#include +#include +#include +using namespace llvm; + +namespace { + enum RandomMeth { + GBV, GBVO, HOSTCC + }; +} + +static cl::opt RandomMethod("profile-randomness", + cl::desc("How to randomly choose to profile:"), + cl::values( + clEnumValN(GBV, "global", "global counter"), + clEnumValN(GBVO, "ra_global", + "register allocated global counter"), + clEnumValN(HOSTCC, "rdcc", "cycle counter"), + clEnumValEnd)); + +namespace { + /// NullProfilerRS - The basic profiler that does nothing. It is the default + /// profiler and thus terminates RSProfiler chains. It is useful for + /// measuring framework overhead + class VISIBILITY_HIDDEN NullProfilerRS : public RSProfilers { + public: + static char ID; // Pass identification, replacement for typeid + bool isProfiling(Value* v) { + return false; + } + bool runOnModule(Module &M) { + return false; + } + void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + } + }; +} + +static RegisterAnalysisGroup A("Profiling passes"); +static RegisterPass NP("insert-null-profiling-rs", + "Measure profiling framework overhead"); +static RegisterAnalysisGroup NPT(NP); + +namespace { + /// Chooser - Something that chooses when to make a sample of the profiled code + class VISIBILITY_HIDDEN Chooser { + public: + /// ProcessChoicePoint - is called for each basic block inserted to choose + /// between normal and sample code + virtual void ProcessChoicePoint(BasicBlock*) = 0; + /// PrepFunction - is called once per function before other work is done. + /// This gives the opertunity to insert new allocas and such. + virtual void PrepFunction(Function*) = 0; + virtual ~Chooser() {} + }; + + //Things that implement sampling policies + //A global value that is read-mod-stored to choose when to sample. + //A sample is taken when the global counter hits 0 + class VISIBILITY_HIDDEN GlobalRandomCounter : public Chooser { + GlobalVariable* Counter; + Value* ResetValue; + const Type* T; + public: + GlobalRandomCounter(Module& M, const Type* t, uint64_t resetval); + virtual ~GlobalRandomCounter(); + virtual void PrepFunction(Function* F); + virtual void ProcessChoicePoint(BasicBlock* bb); + }; + + //Same is GRC, but allow register allocation of the global counter + class VISIBILITY_HIDDEN GlobalRandomCounterOpt : public Chooser { + GlobalVariable* Counter; + Value* ResetValue; + AllocaInst* AI; + const Type* T; + public: + GlobalRandomCounterOpt(Module& M, const Type* t, uint64_t resetval); + virtual ~GlobalRandomCounterOpt(); + virtual void PrepFunction(Function* F); + virtual void ProcessChoicePoint(BasicBlock* bb); + }; + + //Use the cycle counter intrinsic as a source of pseudo randomness when + //deciding when to sample. + class VISIBILITY_HIDDEN CycleCounter : public Chooser { + uint64_t rm; + Constant *F; + public: + CycleCounter(Module& m, uint64_t resetmask); + virtual ~CycleCounter(); + virtual void PrepFunction(Function* F); + virtual void ProcessChoicePoint(BasicBlock* bb); + }; + + /// ProfilerRS - Insert the random sampling framework + struct VISIBILITY_HIDDEN ProfilerRS : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + ProfilerRS() : FunctionPass(&ID) {} + + std::map TransCache; + std::set ChoicePoints; + Chooser* c; + + //Translate and duplicate values for the new profile free version of stuff + Value* Translate(Value* v); + //Duplicate an entire function (with out profiling) + void Duplicate(Function& F, RSProfilers& LI); + //Called once for each backedge, handle the insertion of choice points and + //the interconection of the two versions of the code + void ProcessBackEdge(BasicBlock* src, BasicBlock* dst, Function& F); + bool runOnFunction(Function& F); + bool doInitialization(Module &M); + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + }; +} + +static RegisterPass +X("insert-rs-profiling-framework", + "Insert random sampling instrumentation framework"); + +char RSProfilers::ID = 0; +char NullProfilerRS::ID = 0; +char ProfilerRS::ID = 0; + +//Local utilities +static void ReplacePhiPred(BasicBlock* btarget, + BasicBlock* bold, BasicBlock* bnew); + +static void CollapsePhi(BasicBlock* btarget, BasicBlock* bsrc); + +template +static void recBackEdge(BasicBlock* bb, T& BackEdges, + std::map& color, + std::map& depth, + std::map& finish, + int& time); + +//find the back edges and where they go to +template +static void getBackEdges(Function& F, T& BackEdges); + + +/////////////////////////////////////// +// Methods of choosing when to profile +/////////////////////////////////////// + +GlobalRandomCounter::GlobalRandomCounter(Module& M, const Type* t, + uint64_t resetval) : T(t) { + ConstantInt* Init = ConstantInt::get(T, resetval); + ResetValue = Init; + Counter = new GlobalVariable(T, false, GlobalValue::InternalLinkage, + Init, "RandomSteeringCounter", &M); +} + +GlobalRandomCounter::~GlobalRandomCounter() {} + +void GlobalRandomCounter::PrepFunction(Function* F) {} + +void GlobalRandomCounter::ProcessChoicePoint(BasicBlock* bb) { + BranchInst* t = cast(bb->getTerminator()); + + //decrement counter + LoadInst* l = new LoadInst(Counter, "counter", t); + + ICmpInst* s = new ICmpInst(ICmpInst::ICMP_EQ, l, ConstantInt::get(T, 0), + "countercc", t); + + Value* nv = BinaryOperator::CreateSub(l, ConstantInt::get(T, 1), + "counternew", t); + new StoreInst(nv, Counter, t); + t->setCondition(s); + + //reset counter + BasicBlock* oldnext = t->getSuccessor(0); + BasicBlock* resetblock = BasicBlock::Create("reset", oldnext->getParent(), + oldnext); + TerminatorInst* t2 = BranchInst::Create(oldnext, resetblock); + t->setSuccessor(0, resetblock); + new StoreInst(ResetValue, Counter, t2); + ReplacePhiPred(oldnext, bb, resetblock); +} + +GlobalRandomCounterOpt::GlobalRandomCounterOpt(Module& M, const Type* t, + uint64_t resetval) + : AI(0), T(t) { + ConstantInt* Init = ConstantInt::get(T, resetval); + ResetValue = Init; + Counter = new GlobalVariable(T, false, GlobalValue::InternalLinkage, + Init, "RandomSteeringCounter", &M); +} + +GlobalRandomCounterOpt::~GlobalRandomCounterOpt() {} + +void GlobalRandomCounterOpt::PrepFunction(Function* F) { + //make a local temporary to cache the global + BasicBlock& bb = F->getEntryBlock(); + BasicBlock::iterator InsertPt = bb.begin(); + AI = new AllocaInst(T, 0, "localcounter", InsertPt); + LoadInst* l = new LoadInst(Counter, "counterload", InsertPt); + new StoreInst(l, AI, InsertPt); + + //modify all functions and return values to restore the local variable to/from + //the global variable + for(Function::iterator fib = F->begin(), fie = F->end(); + fib != fie; ++fib) + for(BasicBlock::iterator bib = fib->begin(), bie = fib->end(); + bib != bie; ++bib) + if (isa(bib)) { + LoadInst* l = new LoadInst(AI, "counter", bib); + new StoreInst(l, Counter, bib); + l = new LoadInst(Counter, "counter", ++bib); + new StoreInst(l, AI, bib--); + } else if (isa(bib)) { + LoadInst* l = new LoadInst(AI, "counter", bib); + new StoreInst(l, Counter, bib); + + BasicBlock* bb = cast(bib)->getNormalDest(); + BasicBlock::iterator i = bb->getFirstNonPHI(); + l = new LoadInst(Counter, "counter", i); + + bb = cast(bib)->getUnwindDest(); + i = bb->getFirstNonPHI(); + l = new LoadInst(Counter, "counter", i); + new StoreInst(l, AI, i); + } else if (isa(&*bib) || isa(&*bib)) { + LoadInst* l = new LoadInst(AI, "counter", bib); + new StoreInst(l, Counter, bib); + } +} + +void GlobalRandomCounterOpt::ProcessChoicePoint(BasicBlock* bb) { + BranchInst* t = cast(bb->getTerminator()); + + //decrement counter + LoadInst* l = new LoadInst(AI, "counter", t); + + ICmpInst* s = new ICmpInst(ICmpInst::ICMP_EQ, l, ConstantInt::get(T, 0), + "countercc", t); + + Value* nv = BinaryOperator::CreateSub(l, ConstantInt::get(T, 1), + "counternew", t); + new StoreInst(nv, AI, t); + t->setCondition(s); + + //reset counter + BasicBlock* oldnext = t->getSuccessor(0); + BasicBlock* resetblock = BasicBlock::Create("reset", oldnext->getParent(), + oldnext); + TerminatorInst* t2 = BranchInst::Create(oldnext, resetblock); + t->setSuccessor(0, resetblock); + new StoreInst(ResetValue, AI, t2); + ReplacePhiPred(oldnext, bb, resetblock); +} + + +CycleCounter::CycleCounter(Module& m, uint64_t resetmask) : rm(resetmask) { + F = Intrinsic::getDeclaration(&m, Intrinsic::readcyclecounter); +} + +CycleCounter::~CycleCounter() {} + +void CycleCounter::PrepFunction(Function* F) {} + +void CycleCounter::ProcessChoicePoint(BasicBlock* bb) { + BranchInst* t = cast(bb->getTerminator()); + + CallInst* c = CallInst::Create(F, "rdcc", t); + BinaryOperator* b = + BinaryOperator::CreateAnd(c, ConstantInt::get(Type::Int64Ty, rm), + "mrdcc", t); + + ICmpInst *s = new ICmpInst(ICmpInst::ICMP_EQ, b, + ConstantInt::get(Type::Int64Ty, 0), + "mrdccc", t); + + t->setCondition(s); +} + +/////////////////////////////////////// +// Profiling: +/////////////////////////////////////// +bool RSProfilers_std::isProfiling(Value* v) { + if (profcode.find(v) != profcode.end()) + return true; + //else + RSProfilers& LI = getAnalysis(); + return LI.isProfiling(v); +} + +void RSProfilers_std::IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum, + GlobalValue *CounterArray) { + // Insert the increment after any alloca or PHI instructions... + BasicBlock::iterator InsertPos = BB->getFirstNonPHI(); + while (isa(InsertPos)) + ++InsertPos; + + // Create the getelementptr constant expression + std::vector Indices(2); + Indices[0] = Constant::getNullValue(Type::Int32Ty); + Indices[1] = ConstantInt::get(Type::Int32Ty, CounterNum); + Constant *ElementPtr = ConstantExpr::getGetElementPtr(CounterArray, + &Indices[0], 2); + + // Load, increment and store the value back. + Value *OldVal = new LoadInst(ElementPtr, "OldCounter", InsertPos); + profcode.insert(OldVal); + Value *NewVal = BinaryOperator::CreateAdd(OldVal, + ConstantInt::get(Type::Int32Ty, 1), + "NewCounter", InsertPos); + profcode.insert(NewVal); + profcode.insert(new StoreInst(NewVal, ElementPtr, InsertPos)); +} + +void RSProfilers_std::getAnalysisUsage(AnalysisUsage &AU) const { + //grab any outstanding profiler, or get the null one + AU.addRequired(); +} + +/////////////////////////////////////// +// RS Framework +/////////////////////////////////////// + +Value* ProfilerRS::Translate(Value* v) { + if(TransCache[v]) + return TransCache[v]; + + if (BasicBlock* bb = dyn_cast(v)) { + if (bb == &bb->getParent()->getEntryBlock()) + TransCache[bb] = bb; //don't translate entry block + else + TransCache[bb] = BasicBlock::Create("dup_" + bb->getName(), + bb->getParent(), NULL); + return TransCache[bb]; + } else if (Instruction* i = dyn_cast(v)) { + //we have already translated this + //do not translate entry block allocas + if(&i->getParent()->getParent()->getEntryBlock() == i->getParent()) { + TransCache[i] = i; + return i; + } else { + //translate this + Instruction* i2 = i->clone(); + if (i->hasName()) + i2->setName("dup_" + i->getName()); + TransCache[i] = i2; + //NumNewInst++; + for (unsigned x = 0; x < i2->getNumOperands(); ++x) + i2->setOperand(x, Translate(i2->getOperand(x))); + return i2; + } + } else if (isa(v) || isa(v) || isa(v)) { + TransCache[v] = v; + return v; + } + assert(0 && "Value not handled"); + return 0; +} + +void ProfilerRS::Duplicate(Function& F, RSProfilers& LI) +{ + //perform a breadth first search, building up a duplicate of the code + std::queue worklist; + std::set seen; + + //This loop ensures proper BB order, to help performance + for (Function::iterator fib = F.begin(), fie = F.end(); fib != fie; ++fib) + worklist.push(fib); + while (!worklist.empty()) { + Translate(worklist.front()); + worklist.pop(); + } + + //remember than reg2mem created a new entry block we don't want to duplicate + worklist.push(F.getEntryBlock().getTerminator()->getSuccessor(0)); + seen.insert(&F.getEntryBlock()); + + while (!worklist.empty()) { + BasicBlock* bb = worklist.front(); + worklist.pop(); + if(seen.find(bb) == seen.end()) { + BasicBlock* bbtarget = cast(Translate(bb)); + BasicBlock::InstListType& instlist = bbtarget->getInstList(); + for (BasicBlock::iterator iib = bb->begin(), iie = bb->end(); + iib != iie; ++iib) { + //NumOldInst++; + if (!LI.isProfiling(&*iib)) { + Instruction* i = cast(Translate(iib)); + instlist.insert(bbtarget->end(), i); + } + } + //updated search state; + seen.insert(bb); + TerminatorInst* ti = bb->getTerminator(); + for (unsigned x = 0; x < ti->getNumSuccessors(); ++x) { + BasicBlock* bbs = ti->getSuccessor(x); + if (seen.find(bbs) == seen.end()) { + worklist.push(bbs); + } + } + } + } +} + +void ProfilerRS::ProcessBackEdge(BasicBlock* src, BasicBlock* dst, Function& F) { + //given a backedge from B -> A, and translations A' and B', + //a: insert C and C' + //b: add branches in C to A and A' and in C' to A and A' + //c: mod terminators@B, replace A with C + //d: mod terminators@B', replace A' with C' + //e: mod phis@A for pred B to be pred C + // if multiple entries, simplify to one + //f: mod phis@A' for pred B' to be pred C' + // if multiple entries, simplify to one + //g: for all phis@A with pred C using x + // add in edge from C' using x' + // add in edge from C using x in A' + + //a: + Function::iterator BBN = src; ++BBN; + BasicBlock* bbC = BasicBlock::Create("choice", &F, BBN); + //ChoicePoints.insert(bbC); + BBN = cast(Translate(src)); + BasicBlock* bbCp = BasicBlock::Create("choice", &F, ++BBN); + ChoicePoints.insert(bbCp); + + //b: + BranchInst::Create(cast(Translate(dst)), bbC); + BranchInst::Create(dst, cast(Translate(dst)), + ConstantInt::get(Type::Int1Ty, true), bbCp); + //c: + { + TerminatorInst* iB = src->getTerminator(); + for (unsigned x = 0; x < iB->getNumSuccessors(); ++x) + if (iB->getSuccessor(x) == dst) + iB->setSuccessor(x, bbC); + } + //d: + { + TerminatorInst* iBp = cast(Translate(src->getTerminator())); + for (unsigned x = 0; x < iBp->getNumSuccessors(); ++x) + if (iBp->getSuccessor(x) == cast(Translate(dst))) + iBp->setSuccessor(x, bbCp); + } + //e: + ReplacePhiPred(dst, src, bbC); + //src could be a switch, in which case we are replacing several edges with one + //thus collapse those edges int the Phi + CollapsePhi(dst, bbC); + //f: + ReplacePhiPred(cast(Translate(dst)), + cast(Translate(src)),bbCp); + CollapsePhi(cast(Translate(dst)), bbCp); + //g: + for(BasicBlock::iterator ib = dst->begin(), ie = dst->end(); ib != ie; + ++ib) + if (PHINode* phi = dyn_cast(&*ib)) { + for(unsigned x = 0; x < phi->getNumIncomingValues(); ++x) + if(bbC == phi->getIncomingBlock(x)) { + phi->addIncoming(Translate(phi->getIncomingValue(x)), bbCp); + cast(Translate(phi))->addIncoming(phi->getIncomingValue(x), + bbC); + } + phi->removeIncomingValue(bbC); + } +} + +bool ProfilerRS::runOnFunction(Function& F) { + if (!F.isDeclaration()) { + std::set > BackEdges; + RSProfilers& LI = getAnalysis(); + + getBackEdges(F, BackEdges); + Duplicate(F, LI); + //assume that stuff worked. now connect the duplicated basic blocks + //with the originals in such a way as to preserve ssa. yuk! + for (std::set >::iterator + ib = BackEdges.begin(), ie = BackEdges.end(); ib != ie; ++ib) + ProcessBackEdge(ib->first, ib->second, F); + + //oh, and add the edge from the reg2mem created entry node to the + //duplicated second node + TerminatorInst* T = F.getEntryBlock().getTerminator(); + ReplaceInstWithInst(T, BranchInst::Create(T->getSuccessor(0), + cast( + Translate(T->getSuccessor(0))), + ConstantInt::get(Type::Int1Ty, + true))); + + //do whatever is needed now that the function is duplicated + c->PrepFunction(&F); + + //add entry node to choice points + ChoicePoints.insert(&F.getEntryBlock()); + + for (std::set::iterator + ii = ChoicePoints.begin(), ie = ChoicePoints.end(); ii != ie; ++ii) + c->ProcessChoicePoint(*ii); + + ChoicePoints.clear(); + TransCache.clear(); + + return true; + } + return false; +} + +bool ProfilerRS::doInitialization(Module &M) { + switch (RandomMethod) { + case GBV: + c = new GlobalRandomCounter(M, Type::Int32Ty, (1 << 14) - 1); + break; + case GBVO: + c = new GlobalRandomCounterOpt(M, Type::Int32Ty, (1 << 14) - 1); + break; + case HOSTCC: + c = new CycleCounter(M, (1 << 14) - 1); + break; + }; + return true; +} + +void ProfilerRS::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + AU.addRequiredID(DemoteRegisterToMemoryID); +} + +/////////////////////////////////////// +// Utilities: +/////////////////////////////////////// +static void ReplacePhiPred(BasicBlock* btarget, + BasicBlock* bold, BasicBlock* bnew) { + for(BasicBlock::iterator ib = btarget->begin(), ie = btarget->end(); + ib != ie; ++ib) + if (PHINode* phi = dyn_cast(&*ib)) { + for(unsigned x = 0; x < phi->getNumIncomingValues(); ++x) + if(bold == phi->getIncomingBlock(x)) + phi->setIncomingBlock(x, bnew); + } +} + +static void CollapsePhi(BasicBlock* btarget, BasicBlock* bsrc) { + for(BasicBlock::iterator ib = btarget->begin(), ie = btarget->end(); + ib != ie; ++ib) + if (PHINode* phi = dyn_cast(&*ib)) { + std::map counter; + for(unsigned i = 0; i < phi->getNumIncomingValues(); ) { + if (counter[phi->getIncomingBlock(i)]) { + assert(phi->getIncomingValue(i) == counter[phi->getIncomingBlock(i)]); + phi->removeIncomingValue(i, false); + } else { + counter[phi->getIncomingBlock(i)] = phi->getIncomingValue(i); + ++i; + } + } + } +} + +template +static void recBackEdge(BasicBlock* bb, T& BackEdges, + std::map& color, + std::map& depth, + std::map& finish, + int& time) +{ + color[bb] = 1; + ++time; + depth[bb] = time; + TerminatorInst* t= bb->getTerminator(); + for(unsigned i = 0; i < t->getNumSuccessors(); ++i) { + BasicBlock* bbnew = t->getSuccessor(i); + if (color[bbnew] == 0) + recBackEdge(bbnew, BackEdges, color, depth, finish, time); + else if (color[bbnew] == 1) { + BackEdges.insert(std::make_pair(bb, bbnew)); + //NumBackEdges++; + } + } + color[bb] = 2; + ++time; + finish[bb] = time; +} + + + +//find the back edges and where they go to +template +static void getBackEdges(Function& F, T& BackEdges) { + std::map color; + std::map depth; + std::map finish; + int time = 0; + recBackEdge(&F.getEntryBlock(), BackEdges, color, depth, finish, time); + DOUT << F.getName() << " " << BackEdges.size() << "\n"; +} + + +//Creation functions +ModulePass* llvm::createNullProfilerRSPass() { + return new NullProfilerRS(); +} + +FunctionPass* llvm::createRSProfilingPass() { + return new ProfilerRS(); +} diff --git a/lib/Transforms/Instrumentation/RSProfiling.h b/lib/Transforms/Instrumentation/RSProfiling.h new file mode 100644 index 000000000000..8bbe7c7b28fe --- /dev/null +++ b/lib/Transforms/Instrumentation/RSProfiling.h @@ -0,0 +1,31 @@ +//===- RSProfiling.h - Various profiling using random sampling ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// See notes in RSProfiling.cpp +// +//===----------------------------------------------------------------------===// +#include "llvm/Transforms/RSProfiling.h" +#include + +namespace llvm { + /// RSProfilers_std - a simple support class for profilers that handles most + /// of the work of chaining and tracking inserted code. + struct RSProfilers_std : public RSProfilers { + static char ID; + std::set profcode; + // Lookup up values in profcode + virtual bool isProfiling(Value* v); + // handles required chaining + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + // places counter updates in basic blocks and recordes added instructions in + // profcode + void IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum, + GlobalValue *CounterArray); + }; +} diff --git a/lib/Transforms/Makefile b/lib/Transforms/Makefile new file mode 100644 index 000000000000..5fe1eeb5c752 --- /dev/null +++ b/lib/Transforms/Makefile @@ -0,0 +1,20 @@ +##===- lib/Transforms/Makefile -----------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../.. +PARALLEL_DIRS = Utils Instrumentation Scalar IPO Hello + +include $(LEVEL)/Makefile.config + +# No support for plugins on windows targets +ifeq ($(OS), $(filter $(OS), Cygwin MingW)) + PARALLEL_DIRS := $(filter-out Hello, $(PARALLEL_DIRS)) +endif + +include $(LEVEL)/Makefile.common diff --git a/lib/Transforms/Scalar/ADCE.cpp b/lib/Transforms/Scalar/ADCE.cpp new file mode 100644 index 000000000000..9c55f664ebbd --- /dev/null +++ b/lib/Transforms/Scalar/ADCE.cpp @@ -0,0 +1,98 @@ +//===- DCE.cpp - Code to perform dead code elimination --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Aggressive Dead Code Elimination pass. This pass +// optimistically assumes that all instructions are dead until proven otherwise, +// allowing it to eliminate dead computations that other DCE passes do not +// catch, particularly involving loop computations. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "adce" +#include "llvm/Transforms/Scalar.h" +#include "llvm/BasicBlock.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Pass.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/InstIterator.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" + +using namespace llvm; + +STATISTIC(NumRemoved, "Number of instructions removed"); + +namespace { + struct VISIBILITY_HIDDEN ADCE : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + ADCE() : FunctionPass(&ID) {} + + virtual bool runOnFunction(Function& F); + + virtual void getAnalysisUsage(AnalysisUsage& AU) const { + AU.setPreservesCFG(); + } + + }; +} + +char ADCE::ID = 0; +static RegisterPass X("adce", "Aggressive Dead Code Elimination"); + +bool ADCE::runOnFunction(Function& F) { + SmallPtrSet alive; + SmallVector worklist; + + // Collect the set of "root" instructions that are known live. + for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) + if (isa(I.getInstructionIterator()) || + isa(I.getInstructionIterator()) || + I->mayHaveSideEffects()) { + alive.insert(I.getInstructionIterator()); + worklist.push_back(I.getInstructionIterator()); + } + + // Propagate liveness backwards to operands. + while (!worklist.empty()) { + Instruction* curr = worklist.back(); + worklist.pop_back(); + + for (Instruction::op_iterator OI = curr->op_begin(), OE = curr->op_end(); + OI != OE; ++OI) + if (Instruction* Inst = dyn_cast(OI)) + if (alive.insert(Inst)) + worklist.push_back(Inst); + } + + // The inverse of the live set is the dead set. These are those instructions + // which have no side effects and do not influence the control flow or return + // value of the function, and may therefore be deleted safely. + // NOTE: We reuse the worklist vector here for memory efficiency. + for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) + if (!alive.count(I.getInstructionIterator())) { + worklist.push_back(I.getInstructionIterator()); + I->dropAllReferences(); + } + + for (SmallVector::iterator I = worklist.begin(), + E = worklist.end(); I != E; ++I) { + NumRemoved++; + (*I)->eraseFromParent(); + } + + return !worklist.empty(); +} + +FunctionPass *llvm::createAggressiveDCEPass() { + return new ADCE(); +} diff --git a/lib/Transforms/Scalar/BasicBlockPlacement.cpp b/lib/Transforms/Scalar/BasicBlockPlacement.cpp new file mode 100644 index 000000000000..fb9b88005b6a --- /dev/null +++ b/lib/Transforms/Scalar/BasicBlockPlacement.cpp @@ -0,0 +1,148 @@ +//===-- BasicBlockPlacement.cpp - Basic Block Code Layout optimization ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a very simple profile guided basic block placement +// algorithm. The idea is to put frequently executed blocks together at the +// start of the function, and hopefully increase the number of fall-through +// conditional branches. If there is no profile information for a particular +// function, this pass basically orders blocks in depth-first order +// +// The algorithm implemented here is basically "Algo1" from "Profile Guided Code +// Positioning" by Pettis and Hansen, except that it uses basic block counts +// instead of edge counts. This should be improved in many ways, but is very +// simple for now. +// +// Basically we "place" the entry block, then loop over all successors in a DFO, +// placing the most frequently executed successor until we run out of blocks. I +// told you this was _extremely_ simplistic. :) This is also much slower than it +// could be. When it becomes important, this pass will be rewritten to use a +// better algorithm, and then we can worry about efficiency. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "block-placement" +#include "llvm/Analysis/ProfileInfo.h" +#include "llvm/Function.h" +#include "llvm/Pass.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Compiler.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Transforms/Scalar.h" +#include +using namespace llvm; + +STATISTIC(NumMoved, "Number of basic blocks moved"); + +namespace { + struct VISIBILITY_HIDDEN BlockPlacement : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + BlockPlacement() : FunctionPass(&ID) {} + + virtual bool runOnFunction(Function &F); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequired(); + //AU.addPreserved(); // Does this work? + } + private: + /// PI - The profile information that is guiding us. + /// + ProfileInfo *PI; + + /// NumMovedBlocks - Every time we move a block, increment this counter. + /// + unsigned NumMovedBlocks; + + /// PlacedBlocks - Every time we place a block, remember it so we don't get + /// into infinite loops. + std::set PlacedBlocks; + + /// InsertPos - This an iterator to the next place we want to insert a + /// block. + Function::iterator InsertPos; + + /// PlaceBlocks - Recursively place the specified blocks and any unplaced + /// successors. + void PlaceBlocks(BasicBlock *BB); + }; +} + +char BlockPlacement::ID = 0; +static RegisterPass +X("block-placement", "Profile Guided Basic Block Placement"); + +FunctionPass *llvm::createBlockPlacementPass() { return new BlockPlacement(); } + +bool BlockPlacement::runOnFunction(Function &F) { + PI = &getAnalysis(); + + NumMovedBlocks = 0; + InsertPos = F.begin(); + + // Recursively place all blocks. + PlaceBlocks(F.begin()); + + PlacedBlocks.clear(); + NumMoved += NumMovedBlocks; + return NumMovedBlocks != 0; +} + + +/// PlaceBlocks - Recursively place the specified blocks and any unplaced +/// successors. +void BlockPlacement::PlaceBlocks(BasicBlock *BB) { + assert(!PlacedBlocks.count(BB) && "Already placed this block!"); + PlacedBlocks.insert(BB); + + // Place the specified block. + if (&*InsertPos != BB) { + // Use splice to move the block into the right place. This avoids having to + // remove the block from the function then readd it, which causes a bunch of + // symbol table traffic that is entirely pointless. + Function::BasicBlockListType &Blocks = BB->getParent()->getBasicBlockList(); + Blocks.splice(InsertPos, Blocks, BB); + + ++NumMovedBlocks; + } else { + // This block is already in the right place, we don't have to do anything. + ++InsertPos; + } + + // Keep placing successors until we run out of ones to place. Note that this + // loop is very inefficient (N^2) for blocks with many successors, like switch + // statements. FIXME! + while (1) { + // Okay, now place any unplaced successors. + succ_iterator SI = succ_begin(BB), E = succ_end(BB); + + // Scan for the first unplaced successor. + for (; SI != E && PlacedBlocks.count(*SI); ++SI) + /*empty*/; + if (SI == E) return; // No more successors to place. + + unsigned MaxExecutionCount = PI->getExecutionCount(*SI); + BasicBlock *MaxSuccessor = *SI; + + // Scan for more frequently executed successors + for (; SI != E; ++SI) + if (!PlacedBlocks.count(*SI)) { + unsigned Count = PI->getExecutionCount(*SI); + if (Count > MaxExecutionCount || + // Prefer to not disturb the code. + (Count == MaxExecutionCount && *SI == &*InsertPos)) { + MaxExecutionCount = Count; + MaxSuccessor = *SI; + } + } + + // Now that we picked the maximally executed successor, place it. + PlaceBlocks(MaxSuccessor); + } +} diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt new file mode 100644 index 000000000000..7a7c48b16155 --- /dev/null +++ b/lib/Transforms/Scalar/CMakeLists.txt @@ -0,0 +1,33 @@ +add_llvm_library(LLVMScalarOpts + ADCE.cpp + BasicBlockPlacement.cpp + CodeGenPrepare.cpp + CondPropagate.cpp + ConstantProp.cpp + DCE.cpp + DeadStoreElimination.cpp + GVN.cpp + GVNPRE.cpp + IndVarSimplify.cpp + InstructionCombining.cpp + JumpThreading.cpp + LICM.cpp + LoopDeletion.cpp + LoopIndexSplit.cpp + LoopRotation.cpp + LoopStrengthReduce.cpp + LoopUnroll.cpp + LoopUnswitch.cpp + MemCpyOptimizer.cpp + PredicateSimplifier.cpp + Reassociate.cpp + Reg2Mem.cpp + SCCP.cpp + Scalar.cpp + ScalarReplAggregates.cpp + SimplifyCFGPass.cpp + SimplifyHalfPowrLibCalls.cpp + SimplifyLibCalls.cpp + TailDuplication.cpp + TailRecursionElimination.cpp + ) diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp new file mode 100644 index 000000000000..342b1e563d0e --- /dev/null +++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp @@ -0,0 +1,873 @@ +//===- CodeGenPrepare.cpp - Prepare a function for code generation --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass munges the code in the input function to better prepare it for +// SelectionDAG-based code generation. This works around limitations in it's +// basic-block-at-a-time approach. It should eventually be removed. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "codegenprepare" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/InlineAsm.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Pass.h" +#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Transforms/Utils/AddrModeMatcher.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/GetElementPtrTypeIterator.h" +#include "llvm/Support/PatternMatch.h" +using namespace llvm; +using namespace llvm::PatternMatch; + +static cl::opt FactorCommonPreds("split-critical-paths-tweak", + cl::init(false), cl::Hidden); + +namespace { + class VISIBILITY_HIDDEN CodeGenPrepare : public FunctionPass { + /// TLI - Keep a pointer of a TargetLowering to consult for determining + /// transformation profitability. + const TargetLowering *TLI; + + /// BackEdges - Keep a set of all the loop back edges. + /// + SmallSet, 8> BackEdges; + public: + static char ID; // Pass identification, replacement for typeid + explicit CodeGenPrepare(const TargetLowering *tli = 0) + : FunctionPass(&ID), TLI(tli) {} + bool runOnFunction(Function &F); + + private: + bool EliminateMostlyEmptyBlocks(Function &F); + bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const; + void EliminateMostlyEmptyBlock(BasicBlock *BB); + bool OptimizeBlock(BasicBlock &BB); + bool OptimizeMemoryInst(Instruction *I, Value *Addr, const Type *AccessTy, + DenseMap &SunkAddrs); + bool OptimizeInlineAsmInst(Instruction *I, CallSite CS, + DenseMap &SunkAddrs); + bool OptimizeExtUses(Instruction *I); + void findLoopBackEdges(const Function &F); + }; +} + +char CodeGenPrepare::ID = 0; +static RegisterPass X("codegenprepare", + "Optimize for code generation"); + +FunctionPass *llvm::createCodeGenPreparePass(const TargetLowering *TLI) { + return new CodeGenPrepare(TLI); +} + +/// findLoopBackEdges - Do a DFS walk to find loop back edges. +/// +void CodeGenPrepare::findLoopBackEdges(const Function &F) { + SmallVector, 32> Edges; + FindFunctionBackedges(F, Edges); + + BackEdges.insert(Edges.begin(), Edges.end()); +} + + +bool CodeGenPrepare::runOnFunction(Function &F) { + bool EverMadeChange = false; + + // First pass, eliminate blocks that contain only PHI nodes and an + // unconditional branch. + EverMadeChange |= EliminateMostlyEmptyBlocks(F); + + // Now find loop back edges. + findLoopBackEdges(F); + + bool MadeChange = true; + while (MadeChange) { + MadeChange = false; + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) + MadeChange |= OptimizeBlock(*BB); + EverMadeChange |= MadeChange; + } + return EverMadeChange; +} + +/// EliminateMostlyEmptyBlocks - eliminate blocks that contain only PHI nodes, +/// debug info directives, and an unconditional branch. Passes before isel +/// (e.g. LSR/loopsimplify) often split edges in ways that are non-optimal for +/// isel. Start by eliminating these blocks so we can split them the way we +/// want them. +bool CodeGenPrepare::EliminateMostlyEmptyBlocks(Function &F) { + bool MadeChange = false; + // Note that this intentionally skips the entry block. + for (Function::iterator I = ++F.begin(), E = F.end(); I != E; ) { + BasicBlock *BB = I++; + + // If this block doesn't end with an uncond branch, ignore it. + BranchInst *BI = dyn_cast(BB->getTerminator()); + if (!BI || !BI->isUnconditional()) + continue; + + // If the instruction before the branch (skipping debug info) isn't a phi + // node, then other stuff is happening here. + BasicBlock::iterator BBI = BI; + if (BBI != BB->begin()) { + --BBI; + while (isa(BBI)) { + if (BBI == BB->begin()) + break; + --BBI; + } + if (!isa(BBI) && !isa(BBI)) + continue; + } + + // Do not break infinite loops. + BasicBlock *DestBB = BI->getSuccessor(0); + if (DestBB == BB) + continue; + + if (!CanMergeBlocks(BB, DestBB)) + continue; + + EliminateMostlyEmptyBlock(BB); + MadeChange = true; + } + return MadeChange; +} + +/// CanMergeBlocks - Return true if we can merge BB into DestBB if there is a +/// single uncond branch between them, and BB contains no other non-phi +/// instructions. +bool CodeGenPrepare::CanMergeBlocks(const BasicBlock *BB, + const BasicBlock *DestBB) const { + // We only want to eliminate blocks whose phi nodes are used by phi nodes in + // the successor. If there are more complex condition (e.g. preheaders), + // don't mess around with them. + BasicBlock::const_iterator BBI = BB->begin(); + while (const PHINode *PN = dyn_cast(BBI++)) { + for (Value::use_const_iterator UI = PN->use_begin(), E = PN->use_end(); + UI != E; ++UI) { + const Instruction *User = cast(*UI); + if (User->getParent() != DestBB || !isa(User)) + return false; + // If User is inside DestBB block and it is a PHINode then check + // incoming value. If incoming value is not from BB then this is + // a complex condition (e.g. preheaders) we want to avoid here. + if (User->getParent() == DestBB) { + if (const PHINode *UPN = dyn_cast(User)) + for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) { + Instruction *Insn = dyn_cast(UPN->getIncomingValue(I)); + if (Insn && Insn->getParent() == BB && + Insn->getParent() != UPN->getIncomingBlock(I)) + return false; + } + } + } + } + + // If BB and DestBB contain any common predecessors, then the phi nodes in BB + // and DestBB may have conflicting incoming values for the block. If so, we + // can't merge the block. + const PHINode *DestBBPN = dyn_cast(DestBB->begin()); + if (!DestBBPN) return true; // no conflict. + + // Collect the preds of BB. + SmallPtrSet BBPreds; + if (const PHINode *BBPN = dyn_cast(BB->begin())) { + // It is faster to get preds from a PHI than with pred_iterator. + for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i) + BBPreds.insert(BBPN->getIncomingBlock(i)); + } else { + BBPreds.insert(pred_begin(BB), pred_end(BB)); + } + + // Walk the preds of DestBB. + for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) { + BasicBlock *Pred = DestBBPN->getIncomingBlock(i); + if (BBPreds.count(Pred)) { // Common predecessor? + BBI = DestBB->begin(); + while (const PHINode *PN = dyn_cast(BBI++)) { + const Value *V1 = PN->getIncomingValueForBlock(Pred); + const Value *V2 = PN->getIncomingValueForBlock(BB); + + // If V2 is a phi node in BB, look up what the mapped value will be. + if (const PHINode *V2PN = dyn_cast(V2)) + if (V2PN->getParent() == BB) + V2 = V2PN->getIncomingValueForBlock(Pred); + + // If there is a conflict, bail out. + if (V1 != V2) return false; + } + } + } + + return true; +} + + +/// EliminateMostlyEmptyBlock - Eliminate a basic block that have only phi's and +/// an unconditional branch in it. +void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) { + BranchInst *BI = cast(BB->getTerminator()); + BasicBlock *DestBB = BI->getSuccessor(0); + + DOUT << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" << *BB << *DestBB; + + // If the destination block has a single pred, then this is a trivial edge, + // just collapse it. + if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) { + if (SinglePred != DestBB) { + // Remember if SinglePred was the entry block of the function. If so, we + // will need to move BB back to the entry position. + bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock(); + MergeBasicBlockIntoOnlyPred(DestBB); + + if (isEntry && BB != &BB->getParent()->getEntryBlock()) + BB->moveBefore(&BB->getParent()->getEntryBlock()); + + DOUT << "AFTER:\n" << *DestBB << "\n\n\n"; + return; + } + } + + // Otherwise, we have multiple predecessors of BB. Update the PHIs in DestBB + // to handle the new incoming edges it is about to have. + PHINode *PN; + for (BasicBlock::iterator BBI = DestBB->begin(); + (PN = dyn_cast(BBI)); ++BBI) { + // Remove the incoming value for BB, and remember it. + Value *InVal = PN->removeIncomingValue(BB, false); + + // Two options: either the InVal is a phi node defined in BB or it is some + // value that dominates BB. + PHINode *InValPhi = dyn_cast(InVal); + if (InValPhi && InValPhi->getParent() == BB) { + // Add all of the input values of the input PHI as inputs of this phi. + for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i) + PN->addIncoming(InValPhi->getIncomingValue(i), + InValPhi->getIncomingBlock(i)); + } else { + // Otherwise, add one instance of the dominating value for each edge that + // we will be adding. + if (PHINode *BBPN = dyn_cast(BB->begin())) { + for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i) + PN->addIncoming(InVal, BBPN->getIncomingBlock(i)); + } else { + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) + PN->addIncoming(InVal, *PI); + } + } + } + + // The PHIs are now updated, change everything that refers to BB to use + // DestBB and remove BB. + BB->replaceAllUsesWith(DestBB); + BB->eraseFromParent(); + + DOUT << "AFTER:\n" << *DestBB << "\n\n\n"; +} + + +/// SplitEdgeNicely - Split the critical edge from TI to its specified +/// successor if it will improve codegen. We only do this if the successor has +/// phi nodes (otherwise critical edges are ok). If there is already another +/// predecessor of the succ that is empty (and thus has no phi nodes), use it +/// instead of introducing a new block. +static void SplitEdgeNicely(TerminatorInst *TI, unsigned SuccNum, + SmallSet, 8> &BackEdges, + Pass *P) { + BasicBlock *TIBB = TI->getParent(); + BasicBlock *Dest = TI->getSuccessor(SuccNum); + assert(isa(Dest->begin()) && + "This should only be called if Dest has a PHI!"); + + // Do not split edges to EH landing pads. + if (InvokeInst *Invoke = dyn_cast(TI)) { + if (Invoke->getSuccessor(1) == Dest) + return; + } + + // As a hack, never split backedges of loops. Even though the copy for any + // PHIs inserted on the backedge would be dead for exits from the loop, we + // assume that the cost of *splitting* the backedge would be too high. + if (BackEdges.count(std::make_pair(TIBB, Dest))) + return; + + if (!FactorCommonPreds) { + /// TIPHIValues - This array is lazily computed to determine the values of + /// PHIs in Dest that TI would provide. + SmallVector TIPHIValues; + + // Check to see if Dest has any blocks that can be used as a split edge for + // this terminator. + for (pred_iterator PI = pred_begin(Dest), E = pred_end(Dest); PI != E; ++PI) { + BasicBlock *Pred = *PI; + // To be usable, the pred has to end with an uncond branch to the dest. + BranchInst *PredBr = dyn_cast(Pred->getTerminator()); + if (!PredBr || !PredBr->isUnconditional()) + continue; + // Must be empty other than the branch and debug info. + BasicBlock::iterator I = Pred->begin(); + while (isa(I)) + I++; + if (dyn_cast(I) != PredBr) + continue; + // Cannot be the entry block; its label does not get emitted. + if (Pred == &(Dest->getParent()->getEntryBlock())) + continue; + + // Finally, since we know that Dest has phi nodes in it, we have to make + // sure that jumping to Pred will have the same effect as going to Dest in + // terms of PHI values. + PHINode *PN; + unsigned PHINo = 0; + bool FoundMatch = true; + for (BasicBlock::iterator I = Dest->begin(); + (PN = dyn_cast(I)); ++I, ++PHINo) { + if (PHINo == TIPHIValues.size()) + TIPHIValues.push_back(PN->getIncomingValueForBlock(TIBB)); + + // If the PHI entry doesn't work, we can't use this pred. + if (TIPHIValues[PHINo] != PN->getIncomingValueForBlock(Pred)) { + FoundMatch = false; + break; + } + } + + // If we found a workable predecessor, change TI to branch to Succ. + if (FoundMatch) { + Dest->removePredecessor(TIBB); + TI->setSuccessor(SuccNum, Pred); + return; + } + } + + SplitCriticalEdge(TI, SuccNum, P, true); + return; + } + + PHINode *PN; + SmallVector TIPHIValues; + for (BasicBlock::iterator I = Dest->begin(); + (PN = dyn_cast(I)); ++I) + TIPHIValues.push_back(PN->getIncomingValueForBlock(TIBB)); + + SmallVector IdenticalPreds; + for (pred_iterator PI = pred_begin(Dest), E = pred_end(Dest); PI != E; ++PI) { + BasicBlock *Pred = *PI; + if (BackEdges.count(std::make_pair(Pred, Dest))) + continue; + if (PI == TIBB) + IdenticalPreds.push_back(Pred); + else { + bool Identical = true; + unsigned PHINo = 0; + for (BasicBlock::iterator I = Dest->begin(); + (PN = dyn_cast(I)); ++I, ++PHINo) + if (TIPHIValues[PHINo] != PN->getIncomingValueForBlock(Pred)) { + Identical = false; + break; + } + if (Identical) + IdenticalPreds.push_back(Pred); + } + } + + assert(!IdenticalPreds.empty()); + SplitBlockPredecessors(Dest, &IdenticalPreds[0], IdenticalPreds.size(), + ".critedge", P); +} + + +/// OptimizeNoopCopyExpression - If the specified cast instruction is a noop +/// copy (e.g. it's casting from one pointer type to another, int->uint, or +/// int->sbyte on PPC), sink it into user blocks to reduce the number of virtual +/// registers that must be created and coalesced. +/// +/// Return true if any changes are made. +/// +static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){ + // If this is a noop copy, + MVT SrcVT = TLI.getValueType(CI->getOperand(0)->getType()); + MVT DstVT = TLI.getValueType(CI->getType()); + + // This is an fp<->int conversion? + if (SrcVT.isInteger() != DstVT.isInteger()) + return false; + + // If this is an extension, it will be a zero or sign extension, which + // isn't a noop. + if (SrcVT.bitsLT(DstVT)) return false; + + // If these values will be promoted, find out what they will be promoted + // to. This helps us consider truncates on PPC as noop copies when they + // are. + if (TLI.getTypeAction(SrcVT) == TargetLowering::Promote) + SrcVT = TLI.getTypeToTransformTo(SrcVT); + if (TLI.getTypeAction(DstVT) == TargetLowering::Promote) + DstVT = TLI.getTypeToTransformTo(DstVT); + + // If, after promotion, these are the same types, this is a noop copy. + if (SrcVT != DstVT) + return false; + + BasicBlock *DefBB = CI->getParent(); + + /// InsertedCasts - Only insert a cast in each block once. + DenseMap InsertedCasts; + + bool MadeChange = false; + for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end(); + UI != E; ) { + Use &TheUse = UI.getUse(); + Instruction *User = cast(*UI); + + // Figure out which BB this cast is used in. For PHI's this is the + // appropriate predecessor block. + BasicBlock *UserBB = User->getParent(); + if (PHINode *PN = dyn_cast(User)) { + UserBB = PN->getIncomingBlock(UI); + } + + // Preincrement use iterator so we don't invalidate it. + ++UI; + + // If this user is in the same block as the cast, don't change the cast. + if (UserBB == DefBB) continue; + + // If we have already inserted a cast into this block, use it. + CastInst *&InsertedCast = InsertedCasts[UserBB]; + + if (!InsertedCast) { + BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI(); + + InsertedCast = + CastInst::Create(CI->getOpcode(), CI->getOperand(0), CI->getType(), "", + InsertPt); + MadeChange = true; + } + + // Replace a use of the cast with a use of the new cast. + TheUse = InsertedCast; + } + + // If we removed all uses, nuke the cast. + if (CI->use_empty()) { + CI->eraseFromParent(); + MadeChange = true; + } + + return MadeChange; +} + +/// OptimizeCmpExpression - sink the given CmpInst into user blocks to reduce +/// the number of virtual registers that must be created and coalesced. This is +/// a clear win except on targets with multiple condition code registers +/// (PowerPC), where it might lose; some adjustment may be wanted there. +/// +/// Return true if any changes are made. +static bool OptimizeCmpExpression(CmpInst *CI) { + BasicBlock *DefBB = CI->getParent(); + + /// InsertedCmp - Only insert a cmp in each block once. + DenseMap InsertedCmps; + + bool MadeChange = false; + for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end(); + UI != E; ) { + Use &TheUse = UI.getUse(); + Instruction *User = cast(*UI); + + // Preincrement use iterator so we don't invalidate it. + ++UI; + + // Don't bother for PHI nodes. + if (isa(User)) + continue; + + // Figure out which BB this cmp is used in. + BasicBlock *UserBB = User->getParent(); + + // If this user is in the same block as the cmp, don't change the cmp. + if (UserBB == DefBB) continue; + + // If we have already inserted a cmp into this block, use it. + CmpInst *&InsertedCmp = InsertedCmps[UserBB]; + + if (!InsertedCmp) { + BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI(); + + InsertedCmp = + CmpInst::Create(CI->getOpcode(), CI->getPredicate(), CI->getOperand(0), + CI->getOperand(1), "", InsertPt); + MadeChange = true; + } + + // Replace a use of the cmp with a use of the new cmp. + TheUse = InsertedCmp; + } + + // If we removed all uses, nuke the cmp. + if (CI->use_empty()) + CI->eraseFromParent(); + + return MadeChange; +} + +//===----------------------------------------------------------------------===// +// Memory Optimization +//===----------------------------------------------------------------------===// + +/// IsNonLocalValue - Return true if the specified values are defined in a +/// different basic block than BB. +static bool IsNonLocalValue(Value *V, BasicBlock *BB) { + if (Instruction *I = dyn_cast(V)) + return I->getParent() != BB; + return false; +} + +/// OptimizeMemoryInst - Load and Store Instructions have often have +/// addressing modes that can do significant amounts of computation. As such, +/// instruction selection will try to get the load or store to do as much +/// computation as possible for the program. The problem is that isel can only +/// see within a single block. As such, we sink as much legal addressing mode +/// stuff into the block as possible. +/// +/// This method is used to optimize both load/store and inline asms with memory +/// operands. +bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, + const Type *AccessTy, + DenseMap &SunkAddrs) { + // Figure out what addressing mode will be built up for this operation. + SmallVector AddrModeInsts; + ExtAddrMode AddrMode = AddressingModeMatcher::Match(Addr, AccessTy,MemoryInst, + AddrModeInsts, *TLI); + + // Check to see if any of the instructions supersumed by this addr mode are + // non-local to I's BB. + bool AnyNonLocal = false; + for (unsigned i = 0, e = AddrModeInsts.size(); i != e; ++i) { + if (IsNonLocalValue(AddrModeInsts[i], MemoryInst->getParent())) { + AnyNonLocal = true; + break; + } + } + + // If all the instructions matched are already in this BB, don't do anything. + if (!AnyNonLocal) { + DEBUG(cerr << "CGP: Found local addrmode: " << AddrMode << "\n"); + return false; + } + + // Insert this computation right after this user. Since our caller is + // scanning from the top of the BB to the bottom, reuse of the expr are + // guaranteed to happen later. + BasicBlock::iterator InsertPt = MemoryInst; + + // Now that we determined the addressing expression we want to use and know + // that we have to sink it into this block. Check to see if we have already + // done this for some other load/store instr in this block. If so, reuse the + // computation. + Value *&SunkAddr = SunkAddrs[Addr]; + if (SunkAddr) { + DEBUG(cerr << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for " + << *MemoryInst); + if (SunkAddr->getType() != Addr->getType()) + SunkAddr = new BitCastInst(SunkAddr, Addr->getType(), "tmp", InsertPt); + } else { + DEBUG(cerr << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for " + << *MemoryInst); + const Type *IntPtrTy = TLI->getTargetData()->getIntPtrType(); + + Value *Result = 0; + // Start with the scale value. + if (AddrMode.Scale) { + Value *V = AddrMode.ScaledReg; + if (V->getType() == IntPtrTy) { + // done. + } else if (isa(V->getType())) { + V = new PtrToIntInst(V, IntPtrTy, "sunkaddr", InsertPt); + } else if (cast(IntPtrTy)->getBitWidth() < + cast(V->getType())->getBitWidth()) { + V = new TruncInst(V, IntPtrTy, "sunkaddr", InsertPt); + } else { + V = new SExtInst(V, IntPtrTy, "sunkaddr", InsertPt); + } + if (AddrMode.Scale != 1) + V = BinaryOperator::CreateMul(V, ConstantInt::get(IntPtrTy, + AddrMode.Scale), + "sunkaddr", InsertPt); + Result = V; + } + + // Add in the base register. + if (AddrMode.BaseReg) { + Value *V = AddrMode.BaseReg; + if (V->getType() != IntPtrTy) + V = new PtrToIntInst(V, IntPtrTy, "sunkaddr", InsertPt); + if (Result) + Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt); + else + Result = V; + } + + // Add in the BaseGV if present. + if (AddrMode.BaseGV) { + Value *V = new PtrToIntInst(AddrMode.BaseGV, IntPtrTy, "sunkaddr", + InsertPt); + if (Result) + Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt); + else + Result = V; + } + + // Add in the Base Offset if present. + if (AddrMode.BaseOffs) { + Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs); + if (Result) + Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt); + else + Result = V; + } + + if (Result == 0) + SunkAddr = Constant::getNullValue(Addr->getType()); + else + SunkAddr = new IntToPtrInst(Result, Addr->getType(), "sunkaddr",InsertPt); + } + + MemoryInst->replaceUsesOfWith(Addr, SunkAddr); + + if (Addr->use_empty()) + RecursivelyDeleteTriviallyDeadInstructions(Addr); + return true; +} + +/// OptimizeInlineAsmInst - If there are any memory operands, use +/// OptimizeMemoryInst to sink their address computing into the block when +/// possible / profitable. +bool CodeGenPrepare::OptimizeInlineAsmInst(Instruction *I, CallSite CS, + DenseMap &SunkAddrs) { + bool MadeChange = false; + InlineAsm *IA = cast(CS.getCalledValue()); + + // Do a prepass over the constraints, canonicalizing them, and building up the + // ConstraintOperands list. + std::vector + ConstraintInfos = IA->ParseConstraints(); + + /// ConstraintOperands - Information about all of the constraints. + std::vector ConstraintOperands; + unsigned ArgNo = 0; // ArgNo - The argument of the CallInst. + for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) { + ConstraintOperands. + push_back(TargetLowering::AsmOperandInfo(ConstraintInfos[i])); + TargetLowering::AsmOperandInfo &OpInfo = ConstraintOperands.back(); + + // Compute the value type for each operand. + switch (OpInfo.Type) { + case InlineAsm::isOutput: + if (OpInfo.isIndirect) + OpInfo.CallOperandVal = CS.getArgument(ArgNo++); + break; + case InlineAsm::isInput: + OpInfo.CallOperandVal = CS.getArgument(ArgNo++); + break; + case InlineAsm::isClobber: + // Nothing to do. + break; + } + + // Compute the constraint code and ConstraintType to use. + TLI->ComputeConstraintToUse(OpInfo, SDValue(), + OpInfo.ConstraintType == TargetLowering::C_Memory); + + if (OpInfo.ConstraintType == TargetLowering::C_Memory && + OpInfo.isIndirect) { + Value *OpVal = OpInfo.CallOperandVal; + MadeChange |= OptimizeMemoryInst(I, OpVal, OpVal->getType(), SunkAddrs); + } + } + + return MadeChange; +} + +bool CodeGenPrepare::OptimizeExtUses(Instruction *I) { + BasicBlock *DefBB = I->getParent(); + + // If both result of the {s|z}xt and its source are live out, rewrite all + // other uses of the source with result of extension. + Value *Src = I->getOperand(0); + if (Src->hasOneUse()) + return false; + + // Only do this xform if truncating is free. + if (TLI && !TLI->isTruncateFree(I->getType(), Src->getType())) + return false; + + // Only safe to perform the optimization if the source is also defined in + // this block. + if (!isa(Src) || DefBB != cast(Src)->getParent()) + return false; + + bool DefIsLiveOut = false; + for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); + UI != E; ++UI) { + Instruction *User = cast(*UI); + + // Figure out which BB this ext is used in. + BasicBlock *UserBB = User->getParent(); + if (UserBB == DefBB) continue; + DefIsLiveOut = true; + break; + } + if (!DefIsLiveOut) + return false; + + // Make sure non of the uses are PHI nodes. + for (Value::use_iterator UI = Src->use_begin(), E = Src->use_end(); + UI != E; ++UI) { + Instruction *User = cast(*UI); + BasicBlock *UserBB = User->getParent(); + if (UserBB == DefBB) continue; + // Be conservative. We don't want this xform to end up introducing + // reloads just before load / store instructions. + if (isa(User) || isa(User) || isa(User)) + return false; + } + + // InsertedTruncs - Only insert one trunc in each block once. + DenseMap InsertedTruncs; + + bool MadeChange = false; + for (Value::use_iterator UI = Src->use_begin(), E = Src->use_end(); + UI != E; ++UI) { + Use &TheUse = UI.getUse(); + Instruction *User = cast(*UI); + + // Figure out which BB this ext is used in. + BasicBlock *UserBB = User->getParent(); + if (UserBB == DefBB) continue; + + // Both src and def are live in this block. Rewrite the use. + Instruction *&InsertedTrunc = InsertedTruncs[UserBB]; + + if (!InsertedTrunc) { + BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI(); + + InsertedTrunc = new TruncInst(I, Src->getType(), "", InsertPt); + } + + // Replace a use of the {s|z}ext source with a use of the result. + TheUse = InsertedTrunc; + + MadeChange = true; + } + + return MadeChange; +} + +// In this pass we look for GEP and cast instructions that are used +// across basic blocks and rewrite them to improve basic-block-at-a-time +// selection. +bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) { + bool MadeChange = false; + + // Split all critical edges where the dest block has a PHI. + TerminatorInst *BBTI = BB.getTerminator(); + if (BBTI->getNumSuccessors() > 1) { + for (unsigned i = 0, e = BBTI->getNumSuccessors(); i != e; ++i) { + BasicBlock *SuccBB = BBTI->getSuccessor(i); + if (isa(SuccBB->begin()) && isCriticalEdge(BBTI, i, true)) + SplitEdgeNicely(BBTI, i, BackEdges, this); + } + } + + // Keep track of non-local addresses that have been sunk into this block. + // This allows us to avoid inserting duplicate code for blocks with multiple + // load/stores of the same address. + DenseMap SunkAddrs; + + for (BasicBlock::iterator BBI = BB.begin(), E = BB.end(); BBI != E; ) { + Instruction *I = BBI++; + + if (CastInst *CI = dyn_cast(I)) { + // If the source of the cast is a constant, then this should have + // already been constant folded. The only reason NOT to constant fold + // it is if something (e.g. LSR) was careful to place the constant + // evaluation in a block other than then one that uses it (e.g. to hoist + // the address of globals out of a loop). If this is the case, we don't + // want to forward-subst the cast. + if (isa(CI->getOperand(0))) + continue; + + bool Change = false; + if (TLI) { + Change = OptimizeNoopCopyExpression(CI, *TLI); + MadeChange |= Change; + } + + if (!Change && (isa(I) || isa(I))) + MadeChange |= OptimizeExtUses(I); + } else if (CmpInst *CI = dyn_cast(I)) { + MadeChange |= OptimizeCmpExpression(CI); + } else if (LoadInst *LI = dyn_cast(I)) { + if (TLI) + MadeChange |= OptimizeMemoryInst(I, I->getOperand(0), LI->getType(), + SunkAddrs); + } else if (StoreInst *SI = dyn_cast(I)) { + if (TLI) + MadeChange |= OptimizeMemoryInst(I, SI->getOperand(1), + SI->getOperand(0)->getType(), + SunkAddrs); + } else if (GetElementPtrInst *GEPI = dyn_cast(I)) { + if (GEPI->hasAllZeroIndices()) { + /// The GEP operand must be a pointer, so must its result -> BitCast + Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(), + GEPI->getName(), GEPI); + GEPI->replaceAllUsesWith(NC); + GEPI->eraseFromParent(); + MadeChange = true; + BBI = NC; + } + } else if (CallInst *CI = dyn_cast(I)) { + // If we found an inline asm expession, and if the target knows how to + // lower it to normal LLVM code, do so now. + if (TLI && isa(CI->getCalledValue())) + if (const TargetAsmInfo *TAI = + TLI->getTargetMachine().getTargetAsmInfo()) { + if (TAI->ExpandInlineAsm(CI)) { + BBI = BB.begin(); + // Avoid processing instructions out of order, which could cause + // reuse before a value is defined. + SunkAddrs.clear(); + } else + // Sink address computing for memory operands into the block. + MadeChange |= OptimizeInlineAsmInst(I, &(*CI), SunkAddrs); + } + } + } + + return MadeChange; +} diff --git a/lib/Transforms/Scalar/CondPropagate.cpp b/lib/Transforms/Scalar/CondPropagate.cpp new file mode 100644 index 000000000000..c85d0317d65f --- /dev/null +++ b/lib/Transforms/Scalar/CondPropagate.cpp @@ -0,0 +1,295 @@ +//===-- CondPropagate.cpp - Propagate Conditional Expressions -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass propagates information about conditional expressions through the +// program, allowing it to eliminate conditional branches in some cases. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "condprop" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Constants.h" +#include "llvm/Function.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Pass.h" +#include "llvm/Type.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Streams.h" +using namespace llvm; + +STATISTIC(NumBrThread, "Number of CFG edges threaded through branches"); +STATISTIC(NumSwThread, "Number of CFG edges threaded through switches"); + +namespace { + struct VISIBILITY_HIDDEN CondProp : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + CondProp() : FunctionPass(&ID) {} + + virtual bool runOnFunction(Function &F); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequiredID(BreakCriticalEdgesID); + //AU.addRequired(); + } + + private: + bool MadeChange; + SmallVector DeadBlocks; + void SimplifyBlock(BasicBlock *BB); + void SimplifyPredecessors(BranchInst *BI); + void SimplifyPredecessors(SwitchInst *SI); + void RevectorBlockTo(BasicBlock *FromBB, BasicBlock *ToBB); + bool RevectorBlockTo(BasicBlock *FromBB, Value *Cond, BranchInst *BI); + }; +} + +char CondProp::ID = 0; +static RegisterPass X("condprop", "Conditional Propagation"); + +FunctionPass *llvm::createCondPropagationPass() { + return new CondProp(); +} + +bool CondProp::runOnFunction(Function &F) { + bool EverMadeChange = false; + DeadBlocks.clear(); + + // While we are simplifying blocks, keep iterating. + do { + MadeChange = false; + for (Function::iterator BB = F.begin(), E = F.end(); BB != E;) + SimplifyBlock(BB++); + EverMadeChange = EverMadeChange || MadeChange; + } while (MadeChange); + + if (EverMadeChange) { + while (!DeadBlocks.empty()) { + BasicBlock *BB = DeadBlocks.back(); DeadBlocks.pop_back(); + DeleteDeadBlock(BB); + } + } + return EverMadeChange; +} + +void CondProp::SimplifyBlock(BasicBlock *BB) { + if (BranchInst *BI = dyn_cast(BB->getTerminator())) { + // If this is a conditional branch based on a phi node that is defined in + // this block, see if we can simplify predecessors of this block. + if (BI->isConditional() && isa(BI->getCondition()) && + cast(BI->getCondition())->getParent() == BB) + SimplifyPredecessors(BI); + + } else if (SwitchInst *SI = dyn_cast(BB->getTerminator())) { + if (isa(SI->getCondition()) && + cast(SI->getCondition())->getParent() == BB) + SimplifyPredecessors(SI); + } + + // If possible, simplify the terminator of this block. + if (ConstantFoldTerminator(BB)) + MadeChange = true; + + // If this block ends with an unconditional branch and the only successor has + // only this block as a predecessor, merge the two blocks together. + if (BranchInst *BI = dyn_cast(BB->getTerminator())) + if (BI->isUnconditional() && BI->getSuccessor(0)->getSinglePredecessor() && + BB != BI->getSuccessor(0)) { + BasicBlock *Succ = BI->getSuccessor(0); + + // If Succ has any PHI nodes, they are all single-entry PHI's. Eliminate + // them. + FoldSingleEntryPHINodes(Succ); + + // Remove BI. + BI->eraseFromParent(); + + // Move over all of the instructions. + BB->getInstList().splice(BB->end(), Succ->getInstList()); + + // Any phi nodes that had entries for Succ now have entries from BB. + Succ->replaceAllUsesWith(BB); + + // Succ is now dead, but we cannot delete it without potentially + // invalidating iterators elsewhere. Just insert an unreachable + // instruction in it and delete this block later on. + new UnreachableInst(Succ); + DeadBlocks.push_back(Succ); + MadeChange = true; + } +} + +// SimplifyPredecessors(branches) - We know that BI is a conditional branch +// based on a PHI node defined in this block. If the phi node contains constant +// operands, then the blocks corresponding to those operands can be modified to +// jump directly to the destination instead of going through this block. +void CondProp::SimplifyPredecessors(BranchInst *BI) { + // TODO: We currently only handle the most trival case, where the PHI node has + // one use (the branch), and is the only instruction besides the branch and dbg + // intrinsics in the block. + PHINode *PN = cast(BI->getCondition()); + + if (PN->getNumIncomingValues() == 1) { + // Eliminate single-entry PHI nodes. + FoldSingleEntryPHINodes(PN->getParent()); + return; + } + + + if (!PN->hasOneUse()) return; + + BasicBlock *BB = BI->getParent(); + if (&*BB->begin() != PN) + return; + BasicBlock::iterator BBI = BB->begin(); + BasicBlock::iterator BBE = BB->end(); + while (BBI != BBE && isa(++BBI)) /* empty */; + if (&*BBI != BI) + return; + + // Ok, we have this really simple case, walk the PHI operands, looking for + // constants. Walk from the end to remove operands from the end when + // possible, and to avoid invalidating "i". + for (unsigned i = PN->getNumIncomingValues(); i != 0; --i) { + Value *InVal = PN->getIncomingValue(i-1); + if (!RevectorBlockTo(PN->getIncomingBlock(i-1), InVal, BI)) + continue; + + ++NumBrThread; + + // If there were two predecessors before this simplification, or if the + // PHI node contained all the same value except for the one we just + // substituted, the PHI node may be deleted. Don't iterate through it the + // last time. + if (BI->getCondition() != PN) return; + } +} + +// SimplifyPredecessors(switch) - We know that SI is switch based on a PHI node +// defined in this block. If the phi node contains constant operands, then the +// blocks corresponding to those operands can be modified to jump directly to +// the destination instead of going through this block. +void CondProp::SimplifyPredecessors(SwitchInst *SI) { + // TODO: We currently only handle the most trival case, where the PHI node has + // one use (the branch), and is the only instruction besides the branch and + // dbg intrinsics in the block. + PHINode *PN = cast(SI->getCondition()); + if (!PN->hasOneUse()) return; + + BasicBlock *BB = SI->getParent(); + if (&*BB->begin() != PN) + return; + BasicBlock::iterator BBI = BB->begin(); + BasicBlock::iterator BBE = BB->end(); + while (BBI != BBE && isa(++BBI)) /* empty */; + if (&*BBI != SI) + return; + + bool RemovedPreds = false; + + // Ok, we have this really simple case, walk the PHI operands, looking for + // constants. Walk from the end to remove operands from the end when + // possible, and to avoid invalidating "i". + for (unsigned i = PN->getNumIncomingValues(); i != 0; --i) + if (ConstantInt *CI = dyn_cast(PN->getIncomingValue(i-1))) { + // If we have a constant, forward the edge from its current to its + // ultimate destination. + unsigned DestCase = SI->findCaseValue(CI); + RevectorBlockTo(PN->getIncomingBlock(i-1), + SI->getSuccessor(DestCase)); + ++NumSwThread; + RemovedPreds = true; + + // If there were two predecessors before this simplification, or if the + // PHI node contained all the same value except for the one we just + // substituted, the PHI node may be deleted. Don't iterate through it the + // last time. + if (SI->getCondition() != PN) return; + } +} + + +// RevectorBlockTo - Revector the unconditional branch at the end of FromBB to +// the ToBB block, which is one of the successors of its current successor. +void CondProp::RevectorBlockTo(BasicBlock *FromBB, BasicBlock *ToBB) { + BranchInst *FromBr = cast(FromBB->getTerminator()); + assert(FromBr->isUnconditional() && "FromBB should end with uncond br!"); + + // Get the old block we are threading through. + BasicBlock *OldSucc = FromBr->getSuccessor(0); + + // OldSucc had multiple successors. If ToBB has multiple predecessors, then + // the edge between them would be critical, which we already took care of. + // If ToBB has single operand PHI node then take care of it here. + FoldSingleEntryPHINodes(ToBB); + + // Update PHI nodes in OldSucc to know that FromBB no longer branches to it. + OldSucc->removePredecessor(FromBB); + + // Change FromBr to branch to the new destination. + FromBr->setSuccessor(0, ToBB); + + MadeChange = true; +} + +bool CondProp::RevectorBlockTo(BasicBlock *FromBB, Value *Cond, BranchInst *BI){ + BranchInst *FromBr = cast(FromBB->getTerminator()); + if (!FromBr->isUnconditional()) + return false; + + // Get the old block we are threading through. + BasicBlock *OldSucc = FromBr->getSuccessor(0); + + // If the condition is a constant, simply revector the unconditional branch at + // the end of FromBB to one of the successors of its current successor. + if (ConstantInt *CB = dyn_cast(Cond)) { + BasicBlock *ToBB = BI->getSuccessor(CB->isZero()); + + // OldSucc had multiple successors. If ToBB has multiple predecessors, then + // the edge between them would be critical, which we already took care of. + // If ToBB has single operand PHI node then take care of it here. + FoldSingleEntryPHINodes(ToBB); + + // Update PHI nodes in OldSucc to know that FromBB no longer branches to it. + OldSucc->removePredecessor(FromBB); + + // Change FromBr to branch to the new destination. + FromBr->setSuccessor(0, ToBB); + } else { + BasicBlock *Succ0 = BI->getSuccessor(0); + // Do not perform transform if the new destination has PHI nodes. The + // transform will add new preds to the PHI's. + if (isa(Succ0->begin())) + return false; + + BasicBlock *Succ1 = BI->getSuccessor(1); + if (isa(Succ1->begin())) + return false; + + // Insert the new conditional branch. + BranchInst::Create(Succ0, Succ1, Cond, FromBr); + + FoldSingleEntryPHINodes(Succ0); + FoldSingleEntryPHINodes(Succ1); + + // Update PHI nodes in OldSucc to know that FromBB no longer branches to it. + OldSucc->removePredecessor(FromBB); + + // Delete the old branch. + FromBr->eraseFromParent(); + } + + MadeChange = true; + return true; +} diff --git a/lib/Transforms/Scalar/ConstantProp.cpp b/lib/Transforms/Scalar/ConstantProp.cpp new file mode 100644 index 000000000000..b933488cf636 --- /dev/null +++ b/lib/Transforms/Scalar/ConstantProp.cpp @@ -0,0 +1,90 @@ +//===- ConstantProp.cpp - Code to perform Simple Constant Propagation -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements constant propagation and merging: +// +// Specifically, this: +// * Converts instructions like "add int 1, 2" into 3 +// +// Notice that: +// * This pass has a habit of making definitions be dead. It is a good idea +// to run a DIE pass sometime after running this pass. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "constprop" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Constant.h" +#include "llvm/Instruction.h" +#include "llvm/Pass.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/InstIterator.h" +#include "llvm/ADT/Statistic.h" +#include +using namespace llvm; + +STATISTIC(NumInstKilled, "Number of instructions killed"); + +namespace { + struct VISIBILITY_HIDDEN ConstantPropagation : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + ConstantPropagation() : FunctionPass(&ID) {} + + bool runOnFunction(Function &F); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + } + }; +} + +char ConstantPropagation::ID = 0; +static RegisterPass +X("constprop", "Simple constant propagation"); + +FunctionPass *llvm::createConstantPropagationPass() { + return new ConstantPropagation(); +} + + +bool ConstantPropagation::runOnFunction(Function &F) { + // Initialize the worklist to all of the instructions ready to process... + std::set WorkList; + for(inst_iterator i = inst_begin(F), e = inst_end(F); i != e; ++i) { + WorkList.insert(&*i); + } + bool Changed = false; + + while (!WorkList.empty()) { + Instruction *I = *WorkList.begin(); + WorkList.erase(WorkList.begin()); // Get an element from the worklist... + + if (!I->use_empty()) // Don't muck with dead instructions... + if (Constant *C = ConstantFoldInstruction(I)) { + // Add all of the users of this instruction to the worklist, they might + // be constant propagatable now... + for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); + UI != UE; ++UI) + WorkList.insert(cast(*UI)); + + // Replace all of the uses of a variable with uses of the constant. + I->replaceAllUsesWith(C); + + // Remove the dead instruction. + WorkList.erase(I); + I->eraseFromParent(); + + // We made a change to the function... + Changed = true; + ++NumInstKilled; + } + } + return Changed; +} diff --git a/lib/Transforms/Scalar/DCE.cpp b/lib/Transforms/Scalar/DCE.cpp new file mode 100644 index 000000000000..8bb504c09c6e --- /dev/null +++ b/lib/Transforms/Scalar/DCE.cpp @@ -0,0 +1,133 @@ +//===- DCE.cpp - Code to perform dead code elimination --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements dead inst elimination and dead code elimination. +// +// Dead Inst Elimination performs a single pass over the function removing +// instructions that are obviously dead. Dead Code Elimination is similar, but +// it rechecks instructions that were used by removed instructions to see if +// they are newly dead. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "dce" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Instruction.h" +#include "llvm/Pass.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/InstIterator.h" +#include "llvm/ADT/Statistic.h" +#include +using namespace llvm; + +STATISTIC(DIEEliminated, "Number of insts removed by DIE pass"); +STATISTIC(DCEEliminated, "Number of insts removed"); + +namespace { + //===--------------------------------------------------------------------===// + // DeadInstElimination pass implementation + // + struct VISIBILITY_HIDDEN DeadInstElimination : public BasicBlockPass { + static char ID; // Pass identification, replacement for typeid + DeadInstElimination() : BasicBlockPass(&ID) {} + virtual bool runOnBasicBlock(BasicBlock &BB) { + bool Changed = false; + for (BasicBlock::iterator DI = BB.begin(); DI != BB.end(); ) { + Instruction *Inst = DI++; + if (isInstructionTriviallyDead(Inst)) { + Inst->eraseFromParent(); + Changed = true; + ++DIEEliminated; + } + } + return Changed; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + } + }; +} + +char DeadInstElimination::ID = 0; +static RegisterPass +X("die", "Dead Instruction Elimination"); + +Pass *llvm::createDeadInstEliminationPass() { + return new DeadInstElimination(); +} + + +namespace { + //===--------------------------------------------------------------------===// + // DeadCodeElimination pass implementation + // + struct DCE : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + DCE() : FunctionPass(&ID) {} + + virtual bool runOnFunction(Function &F); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + } + }; +} + +char DCE::ID = 0; +static RegisterPass Y("dce", "Dead Code Elimination"); + +bool DCE::runOnFunction(Function &F) { + // Start out with all of the instructions in the worklist... + std::vector WorkList; + for (inst_iterator i = inst_begin(F), e = inst_end(F); i != e; ++i) + WorkList.push_back(&*i); + + // Loop over the worklist finding instructions that are dead. If they are + // dead make them drop all of their uses, making other instructions + // potentially dead, and work until the worklist is empty. + // + bool MadeChange = false; + while (!WorkList.empty()) { + Instruction *I = WorkList.back(); + WorkList.pop_back(); + + if (isInstructionTriviallyDead(I)) { // If the instruction is dead. + // Loop over all of the values that the instruction uses, if there are + // instructions being used, add them to the worklist, because they might + // go dead after this one is removed. + // + for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI) + if (Instruction *Used = dyn_cast(*OI)) + WorkList.push_back(Used); + + // Remove the instruction. + I->eraseFromParent(); + + // Remove the instruction from the worklist if it still exists in it. + for (std::vector::iterator WI = WorkList.begin(); + WI != WorkList.end(); ) { + if (*WI == I) + WI = WorkList.erase(WI); + else + ++WI; + } + + MadeChange = true; + ++DCEEliminated; + } + } + return MadeChange; +} + +FunctionPass *llvm::createDeadCodeEliminationPass() { + return new DCE(); +} + diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp new file mode 100644 index 000000000000..b923c92bd300 --- /dev/null +++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -0,0 +1,461 @@ +//===- DeadStoreElimination.cpp - Fast Dead Store Elimination -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a trivial dead store elimination that only considers +// basic-block local redundant stores. +// +// FIXME: This should eventually be extended to be a post-dominator tree +// traversal. Doing so would be pretty trivial. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "dse" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Constants.h" +#include "llvm/Function.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Pass.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/MemoryDependenceAnalysis.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Support/Compiler.h" +using namespace llvm; + +STATISTIC(NumFastStores, "Number of stores deleted"); +STATISTIC(NumFastOther , "Number of other instrs removed"); + +namespace { + struct VISIBILITY_HIDDEN DSE : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + DSE() : FunctionPass(&ID) {} + + virtual bool runOnFunction(Function &F) { + bool Changed = false; + for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) + Changed |= runOnBasicBlock(*I); + return Changed; + } + + bool runOnBasicBlock(BasicBlock &BB); + bool handleFreeWithNonTrivialDependency(FreeInst *F, MemDepResult Dep); + bool handleEndBlock(BasicBlock &BB); + bool RemoveUndeadPointers(Value* Ptr, uint64_t killPointerSize, + BasicBlock::iterator& BBI, + SmallPtrSet& deadPointers); + void DeleteDeadInstruction(Instruction *I, + SmallPtrSet *deadPointers = 0); + + + // getAnalysisUsage - We require post dominance frontiers (aka Control + // Dependence Graph) + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addPreserved(); + AU.addPreserved(); + AU.addPreserved(); + } + }; +} + +char DSE::ID = 0; +static RegisterPass X("dse", "Dead Store Elimination"); + +FunctionPass *llvm::createDeadStoreEliminationPass() { return new DSE(); } + +bool DSE::runOnBasicBlock(BasicBlock &BB) { + MemoryDependenceAnalysis& MD = getAnalysis(); + TargetData &TD = getAnalysis(); + + bool MadeChange = false; + + // Do a top-down walk on the BB + for (BasicBlock::iterator BBI = BB.begin(), BBE = BB.end(); BBI != BBE; ) { + Instruction *Inst = BBI++; + + // If we find a store or a free, get it's memory dependence. + if (!isa(Inst) && !isa(Inst)) + continue; + + // Don't molest volatile stores or do queries that will return "clobber". + if (StoreInst *SI = dyn_cast(Inst)) + if (SI->isVolatile()) + continue; + + MemDepResult InstDep = MD.getDependency(Inst); + + // Ignore non-local stores. + // FIXME: cross-block DSE would be fun. :) + if (InstDep.isNonLocal()) continue; + + // Handle frees whose dependencies are non-trivial. + if (FreeInst *FI = dyn_cast(Inst)) { + MadeChange |= handleFreeWithNonTrivialDependency(FI, InstDep); + continue; + } + + StoreInst *SI = cast(Inst); + + // If not a definite must-alias dependency, ignore it. + if (!InstDep.isDef()) + continue; + + // If this is a store-store dependence, then the previous store is dead so + // long as this store is at least as big as it. + if (StoreInst *DepStore = dyn_cast(InstDep.getInst())) + if (TD.getTypeStoreSize(DepStore->getOperand(0)->getType()) <= + TD.getTypeStoreSize(SI->getOperand(0)->getType())) { + // Delete the store and now-dead instructions that feed it. + DeleteDeadInstruction(DepStore); + NumFastStores++; + MadeChange = true; + + if (BBI != BB.begin()) + --BBI; + continue; + } + + // If we're storing the same value back to a pointer that we just + // loaded from, then the store can be removed. + if (LoadInst *DepLoad = dyn_cast(InstDep.getInst())) { + if (SI->getPointerOperand() == DepLoad->getPointerOperand() && + SI->getOperand(0) == DepLoad) { + DeleteDeadInstruction(SI); + if (BBI != BB.begin()) + --BBI; + NumFastStores++; + MadeChange = true; + continue; + } + } + } + + // If this block ends in a return, unwind, or unreachable, all allocas are + // dead at its end, which means stores to them are also dead. + if (BB.getTerminator()->getNumSuccessors() == 0) + MadeChange |= handleEndBlock(BB); + + return MadeChange; +} + +/// handleFreeWithNonTrivialDependency - Handle frees of entire structures whose +/// dependency is a store to a field of that structure. +bool DSE::handleFreeWithNonTrivialDependency(FreeInst *F, MemDepResult Dep) { + AliasAnalysis &AA = getAnalysis(); + + StoreInst *Dependency = dyn_cast_or_null(Dep.getInst()); + if (!Dependency || Dependency->isVolatile()) + return false; + + Value *DepPointer = Dependency->getPointerOperand()->getUnderlyingObject(); + + // Check for aliasing. + if (AA.alias(F->getPointerOperand(), 1, DepPointer, 1) != + AliasAnalysis::MustAlias) + return false; + + // DCE instructions only used to calculate that store + DeleteDeadInstruction(Dependency); + NumFastStores++; + return true; +} + +/// handleEndBlock - Remove dead stores to stack-allocated locations in the +/// function end block. Ex: +/// %A = alloca i32 +/// ... +/// store i32 1, i32* %A +/// ret void +bool DSE::handleEndBlock(BasicBlock &BB) { + TargetData &TD = getAnalysis(); + AliasAnalysis &AA = getAnalysis(); + + bool MadeChange = false; + + // Pointers alloca'd in this function are dead in the end block + SmallPtrSet deadPointers; + + // Find all of the alloca'd pointers in the entry block. + BasicBlock *Entry = BB.getParent()->begin(); + for (BasicBlock::iterator I = Entry->begin(), E = Entry->end(); I != E; ++I) + if (AllocaInst *AI = dyn_cast(I)) + deadPointers.insert(AI); + + // Treat byval arguments the same, stores to them are dead at the end of the + // function. + for (Function::arg_iterator AI = BB.getParent()->arg_begin(), + AE = BB.getParent()->arg_end(); AI != AE; ++AI) + if (AI->hasByValAttr()) + deadPointers.insert(AI); + + // Scan the basic block backwards + for (BasicBlock::iterator BBI = BB.end(); BBI != BB.begin(); ){ + --BBI; + + // If we find a store whose pointer is dead. + if (StoreInst* S = dyn_cast(BBI)) { + if (!S->isVolatile()) { + // See through pointer-to-pointer bitcasts + Value* pointerOperand = S->getPointerOperand()->getUnderlyingObject(); + + // Alloca'd pointers or byval arguments (which are functionally like + // alloca's) are valid candidates for removal. + if (deadPointers.count(pointerOperand)) { + // DCE instructions only used to calculate that store. + BBI++; + DeleteDeadInstruction(S, &deadPointers); + NumFastStores++; + MadeChange = true; + } + } + + continue; + } + + // We can also remove memcpy's to local variables at the end of a function. + if (MemCpyInst *M = dyn_cast(BBI)) { + Value *dest = M->getDest()->getUnderlyingObject(); + + if (deadPointers.count(dest)) { + BBI++; + DeleteDeadInstruction(M, &deadPointers); + NumFastOther++; + MadeChange = true; + continue; + } + + // Because a memcpy is also a load, we can't skip it if we didn't remove + // it. + } + + Value* killPointer = 0; + uint64_t killPointerSize = ~0UL; + + // If we encounter a use of the pointer, it is no longer considered dead + if (LoadInst *L = dyn_cast(BBI)) { + // However, if this load is unused and not volatile, we can go ahead and + // remove it, and not have to worry about it making our pointer undead! + if (L->use_empty() && !L->isVolatile()) { + BBI++; + DeleteDeadInstruction(L, &deadPointers); + NumFastOther++; + MadeChange = true; + continue; + } + + killPointer = L->getPointerOperand(); + } else if (VAArgInst* V = dyn_cast(BBI)) { + killPointer = V->getOperand(0); + } else if (isa(BBI) && + isa(cast(BBI)->getLength())) { + killPointer = cast(BBI)->getSource(); + killPointerSize = cast( + cast(BBI)->getLength())->getZExtValue(); + } else if (AllocaInst* A = dyn_cast(BBI)) { + deadPointers.erase(A); + + // Dead alloca's can be DCE'd when we reach them + if (A->use_empty()) { + BBI++; + DeleteDeadInstruction(A, &deadPointers); + NumFastOther++; + MadeChange = true; + } + + continue; + } else if (CallSite::get(BBI).getInstruction() != 0) { + // If this call does not access memory, it can't + // be undeadifying any of our pointers. + CallSite CS = CallSite::get(BBI); + if (AA.doesNotAccessMemory(CS)) + continue; + + unsigned modRef = 0; + unsigned other = 0; + + // Remove any pointers made undead by the call from the dead set + std::vector dead; + for (SmallPtrSet::iterator I = deadPointers.begin(), + E = deadPointers.end(); I != E; ++I) { + // HACK: if we detect that our AA is imprecise, it's not + // worth it to scan the rest of the deadPointers set. Just + // assume that the AA will return ModRef for everything, and + // go ahead and bail. + if (modRef >= 16 && other == 0) { + deadPointers.clear(); + return MadeChange; + } + + // Get size information for the alloca + unsigned pointerSize = ~0U; + if (AllocaInst* A = dyn_cast(*I)) { + if (ConstantInt* C = dyn_cast(A->getArraySize())) + pointerSize = C->getZExtValue() * + TD.getTypeAllocSize(A->getAllocatedType()); + } else { + const PointerType* PT = cast( + cast(*I)->getType()); + pointerSize = TD.getTypeAllocSize(PT->getElementType()); + } + + // See if the call site touches it + AliasAnalysis::ModRefResult A = AA.getModRefInfo(CS, *I, pointerSize); + + if (A == AliasAnalysis::ModRef) + modRef++; + else + other++; + + if (A == AliasAnalysis::ModRef || A == AliasAnalysis::Ref) + dead.push_back(*I); + } + + for (std::vector::iterator I = dead.begin(), E = dead.end(); + I != E; ++I) + deadPointers.erase(*I); + + continue; + } else if (isInstructionTriviallyDead(BBI)) { + // For any non-memory-affecting non-terminators, DCE them as we reach them + Instruction *Inst = BBI; + BBI++; + DeleteDeadInstruction(Inst, &deadPointers); + NumFastOther++; + MadeChange = true; + continue; + } + + if (!killPointer) + continue; + + killPointer = killPointer->getUnderlyingObject(); + + // Deal with undead pointers + MadeChange |= RemoveUndeadPointers(killPointer, killPointerSize, BBI, + deadPointers); + } + + return MadeChange; +} + +/// RemoveUndeadPointers - check for uses of a pointer that make it +/// undead when scanning for dead stores to alloca's. +bool DSE::RemoveUndeadPointers(Value* killPointer, uint64_t killPointerSize, + BasicBlock::iterator &BBI, + SmallPtrSet& deadPointers) { + TargetData &TD = getAnalysis(); + AliasAnalysis &AA = getAnalysis(); + + // If the kill pointer can be easily reduced to an alloca, + // don't bother doing extraneous AA queries. + if (deadPointers.count(killPointer)) { + deadPointers.erase(killPointer); + return false; + } + + // A global can't be in the dead pointer set. + if (isa(killPointer)) + return false; + + bool MadeChange = false; + + SmallVector undead; + + for (SmallPtrSet::iterator I = deadPointers.begin(), + E = deadPointers.end(); I != E; ++I) { + // Get size information for the alloca. + unsigned pointerSize = ~0U; + if (AllocaInst* A = dyn_cast(*I)) { + if (ConstantInt* C = dyn_cast(A->getArraySize())) + pointerSize = C->getZExtValue() * + TD.getTypeAllocSize(A->getAllocatedType()); + } else { + const PointerType* PT = cast(cast(*I)->getType()); + pointerSize = TD.getTypeAllocSize(PT->getElementType()); + } + + // See if this pointer could alias it + AliasAnalysis::AliasResult A = AA.alias(*I, pointerSize, + killPointer, killPointerSize); + + // If it must-alias and a store, we can delete it + if (isa(BBI) && A == AliasAnalysis::MustAlias) { + StoreInst* S = cast(BBI); + + // Remove it! + BBI++; + DeleteDeadInstruction(S, &deadPointers); + NumFastStores++; + MadeChange = true; + + continue; + + // Otherwise, it is undead + } else if (A != AliasAnalysis::NoAlias) + undead.push_back(*I); + } + + for (SmallVector::iterator I = undead.begin(), E = undead.end(); + I != E; ++I) + deadPointers.erase(*I); + + return MadeChange; +} + +/// DeleteDeadInstruction - Delete this instruction. Before we do, go through +/// and zero out all the operands of this instruction. If any of them become +/// dead, delete them and the computation tree that feeds them. +/// +/// If ValueSet is non-null, remove any deleted instructions from it as well. +/// +void DSE::DeleteDeadInstruction(Instruction *I, + SmallPtrSet *ValueSet) { + SmallVector NowDeadInsts; + + NowDeadInsts.push_back(I); + --NumFastOther; + + // Before we touch this instruction, remove it from memdep! + MemoryDependenceAnalysis &MDA = getAnalysis(); + while (!NowDeadInsts.empty()) { + Instruction *DeadInst = NowDeadInsts.back(); + NowDeadInsts.pop_back(); + + ++NumFastOther; + + // This instruction is dead, zap it, in stages. Start by removing it from + // MemDep, which needs to know the operands and needs it to be in the + // function. + MDA.removeInstruction(DeadInst); + + for (unsigned op = 0, e = DeadInst->getNumOperands(); op != e; ++op) { + Value *Op = DeadInst->getOperand(op); + DeadInst->setOperand(op, 0); + + // If this operand just became dead, add it to the NowDeadInsts list. + if (!Op->use_empty()) continue; + + if (Instruction *OpI = dyn_cast(Op)) + if (isInstructionTriviallyDead(OpI)) + NowDeadInsts.push_back(OpI); + } + + DeadInst->eraseFromParent(); + + if (ValueSet) ValueSet->erase(DeadInst); + } +} diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp new file mode 100644 index 000000000000..733dfa97a154 --- /dev/null +++ b/lib/Transforms/Scalar/GVN.cpp @@ -0,0 +1,1738 @@ +//===- GVN.cpp - Eliminate redundant values and loads ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass performs global value numbering to eliminate fully redundant +// instructions. It also performs simple dead load elimination. +// +// Note that this pass does the value numbering itself; it does not use the +// ValueNumbering analysis passes. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "gvn" +#include "llvm/Transforms/Scalar.h" +#include "llvm/BasicBlock.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Value.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/MemoryDependenceAnalysis.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include +using namespace llvm; + +STATISTIC(NumGVNInstr, "Number of instructions deleted"); +STATISTIC(NumGVNLoad, "Number of loads deleted"); +STATISTIC(NumGVNPRE, "Number of instructions PRE'd"); +STATISTIC(NumGVNBlocks, "Number of blocks merged"); +STATISTIC(NumPRELoad, "Number of loads PRE'd"); + +static cl::opt EnablePRE("enable-pre", + cl::init(true), cl::Hidden); +cl::opt EnableLoadPRE("enable-load-pre", cl::init(true)); + +//===----------------------------------------------------------------------===// +// ValueTable Class +//===----------------------------------------------------------------------===// + +/// This class holds the mapping between values and value numbers. It is used +/// as an efficient mechanism to determine the expression-wise equivalence of +/// two values. +namespace { + struct VISIBILITY_HIDDEN Expression { + enum ExpressionOpcode { ADD, SUB, MUL, UDIV, SDIV, FDIV, UREM, SREM, + FREM, SHL, LSHR, ASHR, AND, OR, XOR, ICMPEQ, + ICMPNE, ICMPUGT, ICMPUGE, ICMPULT, ICMPULE, + ICMPSGT, ICMPSGE, ICMPSLT, ICMPSLE, FCMPOEQ, + FCMPOGT, FCMPOGE, FCMPOLT, FCMPOLE, FCMPONE, + FCMPORD, FCMPUNO, FCMPUEQ, FCMPUGT, FCMPUGE, + FCMPULT, FCMPULE, FCMPUNE, EXTRACT, INSERT, + SHUFFLE, SELECT, TRUNC, ZEXT, SEXT, FPTOUI, + FPTOSI, UITOFP, SITOFP, FPTRUNC, FPEXT, + PTRTOINT, INTTOPTR, BITCAST, GEP, CALL, CONSTANT, + EMPTY, TOMBSTONE }; + + ExpressionOpcode opcode; + const Type* type; + uint32_t firstVN; + uint32_t secondVN; + uint32_t thirdVN; + SmallVector varargs; + Value* function; + + Expression() { } + Expression(ExpressionOpcode o) : opcode(o) { } + + bool operator==(const Expression &other) const { + if (opcode != other.opcode) + return false; + else if (opcode == EMPTY || opcode == TOMBSTONE) + return true; + else if (type != other.type) + return false; + else if (function != other.function) + return false; + else if (firstVN != other.firstVN) + return false; + else if (secondVN != other.secondVN) + return false; + else if (thirdVN != other.thirdVN) + return false; + else { + if (varargs.size() != other.varargs.size()) + return false; + + for (size_t i = 0; i < varargs.size(); ++i) + if (varargs[i] != other.varargs[i]) + return false; + + return true; + } + } + + bool operator!=(const Expression &other) const { + return !(*this == other); + } + }; + + class VISIBILITY_HIDDEN ValueTable { + private: + DenseMap valueNumbering; + DenseMap expressionNumbering; + AliasAnalysis* AA; + MemoryDependenceAnalysis* MD; + DominatorTree* DT; + + uint32_t nextValueNumber; + + Expression::ExpressionOpcode getOpcode(BinaryOperator* BO); + Expression::ExpressionOpcode getOpcode(CmpInst* C); + Expression::ExpressionOpcode getOpcode(CastInst* C); + Expression create_expression(BinaryOperator* BO); + Expression create_expression(CmpInst* C); + Expression create_expression(ShuffleVectorInst* V); + Expression create_expression(ExtractElementInst* C); + Expression create_expression(InsertElementInst* V); + Expression create_expression(SelectInst* V); + Expression create_expression(CastInst* C); + Expression create_expression(GetElementPtrInst* G); + Expression create_expression(CallInst* C); + Expression create_expression(Constant* C); + public: + ValueTable() : nextValueNumber(1) { } + uint32_t lookup_or_add(Value* V); + uint32_t lookup(Value* V) const; + void add(Value* V, uint32_t num); + void clear(); + void erase(Value* v); + unsigned size(); + void setAliasAnalysis(AliasAnalysis* A) { AA = A; } + AliasAnalysis *getAliasAnalysis() const { return AA; } + void setMemDep(MemoryDependenceAnalysis* M) { MD = M; } + void setDomTree(DominatorTree* D) { DT = D; } + uint32_t getNextUnusedValueNumber() { return nextValueNumber; } + void verifyRemoved(const Value *) const; + }; +} + +namespace llvm { +template <> struct DenseMapInfo { + static inline Expression getEmptyKey() { + return Expression(Expression::EMPTY); + } + + static inline Expression getTombstoneKey() { + return Expression(Expression::TOMBSTONE); + } + + static unsigned getHashValue(const Expression e) { + unsigned hash = e.opcode; + + hash = e.firstVN + hash * 37; + hash = e.secondVN + hash * 37; + hash = e.thirdVN + hash * 37; + + hash = ((unsigned)((uintptr_t)e.type >> 4) ^ + (unsigned)((uintptr_t)e.type >> 9)) + + hash * 37; + + for (SmallVector::const_iterator I = e.varargs.begin(), + E = e.varargs.end(); I != E; ++I) + hash = *I + hash * 37; + + hash = ((unsigned)((uintptr_t)e.function >> 4) ^ + (unsigned)((uintptr_t)e.function >> 9)) + + hash * 37; + + return hash; + } + static bool isEqual(const Expression &LHS, const Expression &RHS) { + return LHS == RHS; + } + static bool isPod() { return true; } +}; +} + +//===----------------------------------------------------------------------===// +// ValueTable Internal Functions +//===----------------------------------------------------------------------===// +Expression::ExpressionOpcode ValueTable::getOpcode(BinaryOperator* BO) { + switch(BO->getOpcode()) { + default: // THIS SHOULD NEVER HAPPEN + assert(0 && "Binary operator with unknown opcode?"); + case Instruction::Add: return Expression::ADD; + case Instruction::Sub: return Expression::SUB; + case Instruction::Mul: return Expression::MUL; + case Instruction::UDiv: return Expression::UDIV; + case Instruction::SDiv: return Expression::SDIV; + case Instruction::FDiv: return Expression::FDIV; + case Instruction::URem: return Expression::UREM; + case Instruction::SRem: return Expression::SREM; + case Instruction::FRem: return Expression::FREM; + case Instruction::Shl: return Expression::SHL; + case Instruction::LShr: return Expression::LSHR; + case Instruction::AShr: return Expression::ASHR; + case Instruction::And: return Expression::AND; + case Instruction::Or: return Expression::OR; + case Instruction::Xor: return Expression::XOR; + } +} + +Expression::ExpressionOpcode ValueTable::getOpcode(CmpInst* C) { + if (isa(C) || isa(C)) { + switch (C->getPredicate()) { + default: // THIS SHOULD NEVER HAPPEN + assert(0 && "Comparison with unknown predicate?"); + case ICmpInst::ICMP_EQ: return Expression::ICMPEQ; + case ICmpInst::ICMP_NE: return Expression::ICMPNE; + case ICmpInst::ICMP_UGT: return Expression::ICMPUGT; + case ICmpInst::ICMP_UGE: return Expression::ICMPUGE; + case ICmpInst::ICMP_ULT: return Expression::ICMPULT; + case ICmpInst::ICMP_ULE: return Expression::ICMPULE; + case ICmpInst::ICMP_SGT: return Expression::ICMPSGT; + case ICmpInst::ICMP_SGE: return Expression::ICMPSGE; + case ICmpInst::ICMP_SLT: return Expression::ICMPSLT; + case ICmpInst::ICMP_SLE: return Expression::ICMPSLE; + } + } + assert((isa(C) || isa(C)) && "Unknown compare"); + switch (C->getPredicate()) { + default: // THIS SHOULD NEVER HAPPEN + assert(0 && "Comparison with unknown predicate?"); + case FCmpInst::FCMP_OEQ: return Expression::FCMPOEQ; + case FCmpInst::FCMP_OGT: return Expression::FCMPOGT; + case FCmpInst::FCMP_OGE: return Expression::FCMPOGE; + case FCmpInst::FCMP_OLT: return Expression::FCMPOLT; + case FCmpInst::FCMP_OLE: return Expression::FCMPOLE; + case FCmpInst::FCMP_ONE: return Expression::FCMPONE; + case FCmpInst::FCMP_ORD: return Expression::FCMPORD; + case FCmpInst::FCMP_UNO: return Expression::FCMPUNO; + case FCmpInst::FCMP_UEQ: return Expression::FCMPUEQ; + case FCmpInst::FCMP_UGT: return Expression::FCMPUGT; + case FCmpInst::FCMP_UGE: return Expression::FCMPUGE; + case FCmpInst::FCMP_ULT: return Expression::FCMPULT; + case FCmpInst::FCMP_ULE: return Expression::FCMPULE; + case FCmpInst::FCMP_UNE: return Expression::FCMPUNE; + } +} + +Expression::ExpressionOpcode ValueTable::getOpcode(CastInst* C) { + switch(C->getOpcode()) { + default: // THIS SHOULD NEVER HAPPEN + assert(0 && "Cast operator with unknown opcode?"); + case Instruction::Trunc: return Expression::TRUNC; + case Instruction::ZExt: return Expression::ZEXT; + case Instruction::SExt: return Expression::SEXT; + case Instruction::FPToUI: return Expression::FPTOUI; + case Instruction::FPToSI: return Expression::FPTOSI; + case Instruction::UIToFP: return Expression::UITOFP; + case Instruction::SIToFP: return Expression::SITOFP; + case Instruction::FPTrunc: return Expression::FPTRUNC; + case Instruction::FPExt: return Expression::FPEXT; + case Instruction::PtrToInt: return Expression::PTRTOINT; + case Instruction::IntToPtr: return Expression::INTTOPTR; + case Instruction::BitCast: return Expression::BITCAST; + } +} + +Expression ValueTable::create_expression(CallInst* C) { + Expression e; + + e.type = C->getType(); + e.firstVN = 0; + e.secondVN = 0; + e.thirdVN = 0; + e.function = C->getCalledFunction(); + e.opcode = Expression::CALL; + + for (CallInst::op_iterator I = C->op_begin()+1, E = C->op_end(); + I != E; ++I) + e.varargs.push_back(lookup_or_add(*I)); + + return e; +} + +Expression ValueTable::create_expression(BinaryOperator* BO) { + Expression e; + + e.firstVN = lookup_or_add(BO->getOperand(0)); + e.secondVN = lookup_or_add(BO->getOperand(1)); + e.thirdVN = 0; + e.function = 0; + e.type = BO->getType(); + e.opcode = getOpcode(BO); + + return e; +} + +Expression ValueTable::create_expression(CmpInst* C) { + Expression e; + + e.firstVN = lookup_or_add(C->getOperand(0)); + e.secondVN = lookup_or_add(C->getOperand(1)); + e.thirdVN = 0; + e.function = 0; + e.type = C->getType(); + e.opcode = getOpcode(C); + + return e; +} + +Expression ValueTable::create_expression(CastInst* C) { + Expression e; + + e.firstVN = lookup_or_add(C->getOperand(0)); + e.secondVN = 0; + e.thirdVN = 0; + e.function = 0; + e.type = C->getType(); + e.opcode = getOpcode(C); + + return e; +} + +Expression ValueTable::create_expression(ShuffleVectorInst* S) { + Expression e; + + e.firstVN = lookup_or_add(S->getOperand(0)); + e.secondVN = lookup_or_add(S->getOperand(1)); + e.thirdVN = lookup_or_add(S->getOperand(2)); + e.function = 0; + e.type = S->getType(); + e.opcode = Expression::SHUFFLE; + + return e; +} + +Expression ValueTable::create_expression(ExtractElementInst* E) { + Expression e; + + e.firstVN = lookup_or_add(E->getOperand(0)); + e.secondVN = lookup_or_add(E->getOperand(1)); + e.thirdVN = 0; + e.function = 0; + e.type = E->getType(); + e.opcode = Expression::EXTRACT; + + return e; +} + +Expression ValueTable::create_expression(InsertElementInst* I) { + Expression e; + + e.firstVN = lookup_or_add(I->getOperand(0)); + e.secondVN = lookup_or_add(I->getOperand(1)); + e.thirdVN = lookup_or_add(I->getOperand(2)); + e.function = 0; + e.type = I->getType(); + e.opcode = Expression::INSERT; + + return e; +} + +Expression ValueTable::create_expression(SelectInst* I) { + Expression e; + + e.firstVN = lookup_or_add(I->getCondition()); + e.secondVN = lookup_or_add(I->getTrueValue()); + e.thirdVN = lookup_or_add(I->getFalseValue()); + e.function = 0; + e.type = I->getType(); + e.opcode = Expression::SELECT; + + return e; +} + +Expression ValueTable::create_expression(GetElementPtrInst* G) { + Expression e; + + e.firstVN = lookup_or_add(G->getPointerOperand()); + e.secondVN = 0; + e.thirdVN = 0; + e.function = 0; + e.type = G->getType(); + e.opcode = Expression::GEP; + + for (GetElementPtrInst::op_iterator I = G->idx_begin(), E = G->idx_end(); + I != E; ++I) + e.varargs.push_back(lookup_or_add(*I)); + + return e; +} + +//===----------------------------------------------------------------------===// +// ValueTable External Functions +//===----------------------------------------------------------------------===// + +/// add - Insert a value into the table with a specified value number. +void ValueTable::add(Value* V, uint32_t num) { + valueNumbering.insert(std::make_pair(V, num)); +} + +/// lookup_or_add - Returns the value number for the specified value, assigning +/// it a new number if it did not have one before. +uint32_t ValueTable::lookup_or_add(Value* V) { + DenseMap::iterator VI = valueNumbering.find(V); + if (VI != valueNumbering.end()) + return VI->second; + + if (CallInst* C = dyn_cast(V)) { + if (AA->doesNotAccessMemory(C)) { + Expression e = create_expression(C); + + DenseMap::iterator EI = expressionNumbering.find(e); + if (EI != expressionNumbering.end()) { + valueNumbering.insert(std::make_pair(V, EI->second)); + return EI->second; + } else { + expressionNumbering.insert(std::make_pair(e, nextValueNumber)); + valueNumbering.insert(std::make_pair(V, nextValueNumber)); + + return nextValueNumber++; + } + } else if (AA->onlyReadsMemory(C)) { + Expression e = create_expression(C); + + if (expressionNumbering.find(e) == expressionNumbering.end()) { + expressionNumbering.insert(std::make_pair(e, nextValueNumber)); + valueNumbering.insert(std::make_pair(V, nextValueNumber)); + return nextValueNumber++; + } + + MemDepResult local_dep = MD->getDependency(C); + + if (!local_dep.isDef() && !local_dep.isNonLocal()) { + valueNumbering.insert(std::make_pair(V, nextValueNumber)); + return nextValueNumber++; + } + + if (local_dep.isDef()) { + CallInst* local_cdep = cast(local_dep.getInst()); + + if (local_cdep->getNumOperands() != C->getNumOperands()) { + valueNumbering.insert(std::make_pair(V, nextValueNumber)); + return nextValueNumber++; + } + + for (unsigned i = 1; i < C->getNumOperands(); ++i) { + uint32_t c_vn = lookup_or_add(C->getOperand(i)); + uint32_t cd_vn = lookup_or_add(local_cdep->getOperand(i)); + if (c_vn != cd_vn) { + valueNumbering.insert(std::make_pair(V, nextValueNumber)); + return nextValueNumber++; + } + } + + uint32_t v = lookup_or_add(local_cdep); + valueNumbering.insert(std::make_pair(V, v)); + return v; + } + + // Non-local case. + const MemoryDependenceAnalysis::NonLocalDepInfo &deps = + MD->getNonLocalCallDependency(CallSite(C)); + // FIXME: call/call dependencies for readonly calls should return def, not + // clobber! Move the checking logic to MemDep! + CallInst* cdep = 0; + + // Check to see if we have a single dominating call instruction that is + // identical to C. + for (unsigned i = 0, e = deps.size(); i != e; ++i) { + const MemoryDependenceAnalysis::NonLocalDepEntry *I = &deps[i]; + // Ignore non-local dependencies. + if (I->second.isNonLocal()) + continue; + + // We don't handle non-depedencies. If we already have a call, reject + // instruction dependencies. + if (I->second.isClobber() || cdep != 0) { + cdep = 0; + break; + } + + CallInst *NonLocalDepCall = dyn_cast(I->second.getInst()); + // FIXME: All duplicated with non-local case. + if (NonLocalDepCall && DT->properlyDominates(I->first, C->getParent())){ + cdep = NonLocalDepCall; + continue; + } + + cdep = 0; + break; + } + + if (!cdep) { + valueNumbering.insert(std::make_pair(V, nextValueNumber)); + return nextValueNumber++; + } + + if (cdep->getNumOperands() != C->getNumOperands()) { + valueNumbering.insert(std::make_pair(V, nextValueNumber)); + return nextValueNumber++; + } + for (unsigned i = 1; i < C->getNumOperands(); ++i) { + uint32_t c_vn = lookup_or_add(C->getOperand(i)); + uint32_t cd_vn = lookup_or_add(cdep->getOperand(i)); + if (c_vn != cd_vn) { + valueNumbering.insert(std::make_pair(V, nextValueNumber)); + return nextValueNumber++; + } + } + + uint32_t v = lookup_or_add(cdep); + valueNumbering.insert(std::make_pair(V, v)); + return v; + + } else { + valueNumbering.insert(std::make_pair(V, nextValueNumber)); + return nextValueNumber++; + } + } else if (BinaryOperator* BO = dyn_cast(V)) { + Expression e = create_expression(BO); + + DenseMap::iterator EI = expressionNumbering.find(e); + if (EI != expressionNumbering.end()) { + valueNumbering.insert(std::make_pair(V, EI->second)); + return EI->second; + } else { + expressionNumbering.insert(std::make_pair(e, nextValueNumber)); + valueNumbering.insert(std::make_pair(V, nextValueNumber)); + + return nextValueNumber++; + } + } else if (CmpInst* C = dyn_cast(V)) { + Expression e = create_expression(C); + + DenseMap::iterator EI = expressionNumbering.find(e); + if (EI != expressionNumbering.end()) { + valueNumbering.insert(std::make_pair(V, EI->second)); + return EI->second; + } else { + expressionNumbering.insert(std::make_pair(e, nextValueNumber)); + valueNumbering.insert(std::make_pair(V, nextValueNumber)); + + return nextValueNumber++; + } + } else if (ShuffleVectorInst* U = dyn_cast(V)) { + Expression e = create_expression(U); + + DenseMap::iterator EI = expressionNumbering.find(e); + if (EI != expressionNumbering.end()) { + valueNumbering.insert(std::make_pair(V, EI->second)); + return EI->second; + } else { + expressionNumbering.insert(std::make_pair(e, nextValueNumber)); + valueNumbering.insert(std::make_pair(V, nextValueNumber)); + + return nextValueNumber++; + } + } else if (ExtractElementInst* U = dyn_cast(V)) { + Expression e = create_expression(U); + + DenseMap::iterator EI = expressionNumbering.find(e); + if (EI != expressionNumbering.end()) { + valueNumbering.insert(std::make_pair(V, EI->second)); + return EI->second; + } else { + expressionNumbering.insert(std::make_pair(e, nextValueNumber)); + valueNumbering.insert(std::make_pair(V, nextValueNumber)); + + return nextValueNumber++; + } + } else if (InsertElementInst* U = dyn_cast(V)) { + Expression e = create_expression(U); + + DenseMap::iterator EI = expressionNumbering.find(e); + if (EI != expressionNumbering.end()) { + valueNumbering.insert(std::make_pair(V, EI->second)); + return EI->second; + } else { + expressionNumbering.insert(std::make_pair(e, nextValueNumber)); + valueNumbering.insert(std::make_pair(V, nextValueNumber)); + + return nextValueNumber++; + } + } else if (SelectInst* U = dyn_cast(V)) { + Expression e = create_expression(U); + + DenseMap::iterator EI = expressionNumbering.find(e); + if (EI != expressionNumbering.end()) { + valueNumbering.insert(std::make_pair(V, EI->second)); + return EI->second; + } else { + expressionNumbering.insert(std::make_pair(e, nextValueNumber)); + valueNumbering.insert(std::make_pair(V, nextValueNumber)); + + return nextValueNumber++; + } + } else if (CastInst* U = dyn_cast(V)) { + Expression e = create_expression(U); + + DenseMap::iterator EI = expressionNumbering.find(e); + if (EI != expressionNumbering.end()) { + valueNumbering.insert(std::make_pair(V, EI->second)); + return EI->second; + } else { + expressionNumbering.insert(std::make_pair(e, nextValueNumber)); + valueNumbering.insert(std::make_pair(V, nextValueNumber)); + + return nextValueNumber++; + } + } else if (GetElementPtrInst* U = dyn_cast(V)) { + Expression e = create_expression(U); + + DenseMap::iterator EI = expressionNumbering.find(e); + if (EI != expressionNumbering.end()) { + valueNumbering.insert(std::make_pair(V, EI->second)); + return EI->second; + } else { + expressionNumbering.insert(std::make_pair(e, nextValueNumber)); + valueNumbering.insert(std::make_pair(V, nextValueNumber)); + + return nextValueNumber++; + } + } else { + valueNumbering.insert(std::make_pair(V, nextValueNumber)); + return nextValueNumber++; + } +} + +/// lookup - Returns the value number of the specified value. Fails if +/// the value has not yet been numbered. +uint32_t ValueTable::lookup(Value* V) const { + DenseMap::iterator VI = valueNumbering.find(V); + assert(VI != valueNumbering.end() && "Value not numbered?"); + return VI->second; +} + +/// clear - Remove all entries from the ValueTable +void ValueTable::clear() { + valueNumbering.clear(); + expressionNumbering.clear(); + nextValueNumber = 1; +} + +/// erase - Remove a value from the value numbering +void ValueTable::erase(Value* V) { + valueNumbering.erase(V); +} + +/// verifyRemoved - Verify that the value is removed from all internal data +/// structures. +void ValueTable::verifyRemoved(const Value *V) const { + for (DenseMap::iterator + I = valueNumbering.begin(), E = valueNumbering.end(); I != E; ++I) { + assert(I->first != V && "Inst still occurs in value numbering map!"); + } +} + +//===----------------------------------------------------------------------===// +// GVN Pass +//===----------------------------------------------------------------------===// + +namespace { + struct VISIBILITY_HIDDEN ValueNumberScope { + ValueNumberScope* parent; + DenseMap table; + + ValueNumberScope(ValueNumberScope* p) : parent(p) { } + }; +} + +namespace { + + class VISIBILITY_HIDDEN GVN : public FunctionPass { + bool runOnFunction(Function &F); + public: + static char ID; // Pass identification, replacement for typeid + GVN() : FunctionPass(&ID) { } + + private: + MemoryDependenceAnalysis *MD; + DominatorTree *DT; + + ValueTable VN; + DenseMap localAvail; + + typedef DenseMap > PhiMapType; + PhiMapType phiMap; + + + // This transformation requires dominator postdominator info + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + + AU.addPreserved(); + AU.addPreserved(); + } + + // Helper fuctions + // FIXME: eliminate or document these better + bool processLoad(LoadInst* L, + SmallVectorImpl &toErase); + bool processInstruction(Instruction* I, + SmallVectorImpl &toErase); + bool processNonLocalLoad(LoadInst* L, + SmallVectorImpl &toErase); + bool processBlock(BasicBlock* BB); + Value *GetValueForBlock(BasicBlock *BB, Instruction* orig, + DenseMap &Phis, + bool top_level = false); + void dump(DenseMap& d); + bool iterateOnFunction(Function &F); + Value* CollapsePhi(PHINode* p); + bool isSafeReplacement(PHINode* p, Instruction* inst); + bool performPRE(Function& F); + Value* lookupNumber(BasicBlock* BB, uint32_t num); + bool mergeBlockIntoPredecessor(BasicBlock* BB); + Value* AttemptRedundancyElimination(Instruction* orig, unsigned valno); + void cleanupGlobalSets(); + void verifyRemoved(const Instruction *I) const; + }; + + char GVN::ID = 0; +} + +// createGVNPass - The public interface to this file... +FunctionPass *llvm::createGVNPass() { return new GVN(); } + +static RegisterPass X("gvn", + "Global Value Numbering"); + +void GVN::dump(DenseMap& d) { + printf("{\n"); + for (DenseMap::iterator I = d.begin(), + E = d.end(); I != E; ++I) { + printf("%d\n", I->first); + I->second->dump(); + } + printf("}\n"); +} + +Value* GVN::CollapsePhi(PHINode* p) { + Value* constVal = p->hasConstantValue(); + if (!constVal) return 0; + + Instruction* inst = dyn_cast(constVal); + if (!inst) + return constVal; + + if (DT->dominates(inst, p)) + if (isSafeReplacement(p, inst)) + return inst; + return 0; +} + +bool GVN::isSafeReplacement(PHINode* p, Instruction* inst) { + if (!isa(inst)) + return true; + + for (Instruction::use_iterator UI = p->use_begin(), E = p->use_end(); + UI != E; ++UI) + if (PHINode* use_phi = dyn_cast(UI)) + if (use_phi->getParent() == inst->getParent()) + return false; + + return true; +} + +/// GetValueForBlock - Get the value to use within the specified basic block. +/// available values are in Phis. +Value *GVN::GetValueForBlock(BasicBlock *BB, Instruction* orig, + DenseMap &Phis, + bool top_level) { + + // If we have already computed this value, return the previously computed val. + DenseMap::iterator V = Phis.find(BB); + if (V != Phis.end() && !top_level) return V->second; + + // If the block is unreachable, just return undef, since this path + // can't actually occur at runtime. + if (!DT->isReachableFromEntry(BB)) + return Phis[BB] = UndefValue::get(orig->getType()); + + if (BasicBlock *Pred = BB->getSinglePredecessor()) { + Value *ret = GetValueForBlock(Pred, orig, Phis); + Phis[BB] = ret; + return ret; + } + + // Get the number of predecessors of this block so we can reserve space later. + // If there is already a PHI in it, use the #preds from it, otherwise count. + // Getting it from the PHI is constant time. + unsigned NumPreds; + if (PHINode *ExistingPN = dyn_cast(BB->begin())) + NumPreds = ExistingPN->getNumIncomingValues(); + else + NumPreds = std::distance(pred_begin(BB), pred_end(BB)); + + // Otherwise, the idom is the loop, so we need to insert a PHI node. Do so + // now, then get values to fill in the incoming values for the PHI. + PHINode *PN = PHINode::Create(orig->getType(), orig->getName()+".rle", + BB->begin()); + PN->reserveOperandSpace(NumPreds); + + Phis.insert(std::make_pair(BB, PN)); + + // Fill in the incoming values for the block. + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { + Value* val = GetValueForBlock(*PI, orig, Phis); + PN->addIncoming(val, *PI); + } + + VN.getAliasAnalysis()->copyValue(orig, PN); + + // Attempt to collapse PHI nodes that are trivially redundant + Value* v = CollapsePhi(PN); + if (!v) { + // Cache our phi construction results + if (LoadInst* L = dyn_cast(orig)) + phiMap[L->getPointerOperand()].insert(PN); + else + phiMap[orig].insert(PN); + + return PN; + } + + PN->replaceAllUsesWith(v); + if (isa(v->getType())) + MD->invalidateCachedPointerInfo(v); + + for (DenseMap::iterator I = Phis.begin(), + E = Phis.end(); I != E; ++I) + if (I->second == PN) + I->second = v; + + DEBUG(cerr << "GVN removed: " << *PN); + MD->removeInstruction(PN); + PN->eraseFromParent(); + DEBUG(verifyRemoved(PN)); + + Phis[BB] = v; + return v; +} + +/// IsValueFullyAvailableInBlock - Return true if we can prove that the value +/// we're analyzing is fully available in the specified block. As we go, keep +/// track of which blocks we know are fully alive in FullyAvailableBlocks. This +/// map is actually a tri-state map with the following values: +/// 0) we know the block *is not* fully available. +/// 1) we know the block *is* fully available. +/// 2) we do not know whether the block is fully available or not, but we are +/// currently speculating that it will be. +/// 3) we are speculating for this block and have used that to speculate for +/// other blocks. +static bool IsValueFullyAvailableInBlock(BasicBlock *BB, + DenseMap &FullyAvailableBlocks) { + // Optimistically assume that the block is fully available and check to see + // if we already know about this block in one lookup. + std::pair::iterator, char> IV = + FullyAvailableBlocks.insert(std::make_pair(BB, 2)); + + // If the entry already existed for this block, return the precomputed value. + if (!IV.second) { + // If this is a speculative "available" value, mark it as being used for + // speculation of other blocks. + if (IV.first->second == 2) + IV.first->second = 3; + return IV.first->second != 0; + } + + // Otherwise, see if it is fully available in all predecessors. + pred_iterator PI = pred_begin(BB), PE = pred_end(BB); + + // If this block has no predecessors, it isn't live-in here. + if (PI == PE) + goto SpeculationFailure; + + for (; PI != PE; ++PI) + // If the value isn't fully available in one of our predecessors, then it + // isn't fully available in this block either. Undo our previous + // optimistic assumption and bail out. + if (!IsValueFullyAvailableInBlock(*PI, FullyAvailableBlocks)) + goto SpeculationFailure; + + return true; + +// SpeculationFailure - If we get here, we found out that this is not, after +// all, a fully-available block. We have a problem if we speculated on this and +// used the speculation to mark other blocks as available. +SpeculationFailure: + char &BBVal = FullyAvailableBlocks[BB]; + + // If we didn't speculate on this, just return with it set to false. + if (BBVal == 2) { + BBVal = 0; + return false; + } + + // If we did speculate on this value, we could have blocks set to 1 that are + // incorrect. Walk the (transitive) successors of this block and mark them as + // 0 if set to one. + SmallVector BBWorklist; + BBWorklist.push_back(BB); + + while (!BBWorklist.empty()) { + BasicBlock *Entry = BBWorklist.pop_back_val(); + // Note that this sets blocks to 0 (unavailable) if they happen to not + // already be in FullyAvailableBlocks. This is safe. + char &EntryVal = FullyAvailableBlocks[Entry]; + if (EntryVal == 0) continue; // Already unavailable. + + // Mark as unavailable. + EntryVal = 0; + + for (succ_iterator I = succ_begin(Entry), E = succ_end(Entry); I != E; ++I) + BBWorklist.push_back(*I); + } + + return false; +} + +/// processNonLocalLoad - Attempt to eliminate a load whose dependencies are +/// non-local by performing PHI construction. +bool GVN::processNonLocalLoad(LoadInst *LI, + SmallVectorImpl &toErase) { + // Find the non-local dependencies of the load. + SmallVector Deps; + MD->getNonLocalPointerDependency(LI->getOperand(0), true, LI->getParent(), + Deps); + //DEBUG(cerr << "INVESTIGATING NONLOCAL LOAD: " << Deps.size() << *LI); + + // If we had to process more than one hundred blocks to find the + // dependencies, this load isn't worth worrying about. Optimizing + // it will be too expensive. + if (Deps.size() > 100) + return false; + + // If we had a phi translation failure, we'll have a single entry which is a + // clobber in the current block. Reject this early. + if (Deps.size() == 1 && Deps[0].second.isClobber()) + return false; + + // Filter out useless results (non-locals, etc). Keep track of the blocks + // where we have a value available in repl, also keep track of whether we see + // dependencies that produce an unknown value for the load (such as a call + // that could potentially clobber the load). + SmallVector, 16> ValuesPerBlock; + SmallVector UnavailableBlocks; + + for (unsigned i = 0, e = Deps.size(); i != e; ++i) { + BasicBlock *DepBB = Deps[i].first; + MemDepResult DepInfo = Deps[i].second; + + if (DepInfo.isClobber()) { + UnavailableBlocks.push_back(DepBB); + continue; + } + + Instruction *DepInst = DepInfo.getInst(); + + // Loading the allocation -> undef. + if (isa(DepInst)) { + ValuesPerBlock.push_back(std::make_pair(DepBB, + UndefValue::get(LI->getType()))); + continue; + } + + if (StoreInst* S = dyn_cast(DepInst)) { + // Reject loads and stores that are to the same address but are of + // different types. + // NOTE: 403.gcc does have this case (e.g. in readonly_fields_p) because + // of bitfield access, it would be interesting to optimize for it at some + // point. + if (S->getOperand(0)->getType() != LI->getType()) { + UnavailableBlocks.push_back(DepBB); + continue; + } + + ValuesPerBlock.push_back(std::make_pair(DepBB, S->getOperand(0))); + + } else if (LoadInst* LD = dyn_cast(DepInst)) { + if (LD->getType() != LI->getType()) { + UnavailableBlocks.push_back(DepBB); + continue; + } + ValuesPerBlock.push_back(std::make_pair(DepBB, LD)); + } else { + UnavailableBlocks.push_back(DepBB); + continue; + } + } + + // If we have no predecessors that produce a known value for this load, exit + // early. + if (ValuesPerBlock.empty()) return false; + + // If all of the instructions we depend on produce a known value for this + // load, then it is fully redundant and we can use PHI insertion to compute + // its value. Insert PHIs and remove the fully redundant value now. + if (UnavailableBlocks.empty()) { + // Use cached PHI construction information from previous runs + SmallPtrSet &p = phiMap[LI->getPointerOperand()]; + // FIXME: What does phiMap do? Are we positive it isn't getting invalidated? + for (SmallPtrSet::iterator I = p.begin(), E = p.end(); + I != E; ++I) { + if ((*I)->getParent() == LI->getParent()) { + DEBUG(cerr << "GVN REMOVING NONLOCAL LOAD #1: " << *LI); + LI->replaceAllUsesWith(*I); + if (isa((*I)->getType())) + MD->invalidateCachedPointerInfo(*I); + toErase.push_back(LI); + NumGVNLoad++; + return true; + } + + ValuesPerBlock.push_back(std::make_pair((*I)->getParent(), *I)); + } + + DEBUG(cerr << "GVN REMOVING NONLOCAL LOAD: " << *LI); + + DenseMap BlockReplValues; + BlockReplValues.insert(ValuesPerBlock.begin(), ValuesPerBlock.end()); + // Perform PHI construction. + Value* v = GetValueForBlock(LI->getParent(), LI, BlockReplValues, true); + LI->replaceAllUsesWith(v); + + if (isa(v)) + v->takeName(LI); + if (isa(v->getType())) + MD->invalidateCachedPointerInfo(v); + toErase.push_back(LI); + NumGVNLoad++; + return true; + } + + if (!EnablePRE || !EnableLoadPRE) + return false; + + // Okay, we have *some* definitions of the value. This means that the value + // is available in some of our (transitive) predecessors. Lets think about + // doing PRE of this load. This will involve inserting a new load into the + // predecessor when it's not available. We could do this in general, but + // prefer to not increase code size. As such, we only do this when we know + // that we only have to insert *one* load (which means we're basically moving + // the load, not inserting a new one). + + SmallPtrSet Blockers; + for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i) + Blockers.insert(UnavailableBlocks[i]); + + // Lets find first basic block with more than one predecessor. Walk backwards + // through predecessors if needed. + BasicBlock *LoadBB = LI->getParent(); + BasicBlock *TmpBB = LoadBB; + + bool isSinglePred = false; + while (TmpBB->getSinglePredecessor()) { + isSinglePred = true; + TmpBB = TmpBB->getSinglePredecessor(); + if (!TmpBB) // If haven't found any, bail now. + return false; + if (TmpBB == LoadBB) // Infinite (unreachable) loop. + return false; + if (Blockers.count(TmpBB)) + return false; + } + + assert(TmpBB); + LoadBB = TmpBB; + + // If we have a repl set with LI itself in it, this means we have a loop where + // at least one of the values is LI. Since this means that we won't be able + // to eliminate LI even if we insert uses in the other predecessors, we will + // end up increasing code size. Reject this by scanning for LI. + for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) + if (ValuesPerBlock[i].second == LI) + return false; + + if (isSinglePred) { + bool isHot = false; + for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) + if (Instruction *I = dyn_cast(ValuesPerBlock[i].second)) + // "Hot" Instruction is in some loop (because it dominates its dep. + // instruction). + if (DT->dominates(LI, I)) { + isHot = true; + break; + } + + // We are interested only in "hot" instructions. We don't want to do any + // mis-optimizations here. + if (!isHot) + return false; + } + + // Okay, we have some hope :). Check to see if the loaded value is fully + // available in all but one predecessor. + // FIXME: If we could restructure the CFG, we could make a common pred with + // all the preds that don't have an available LI and insert a new load into + // that one block. + BasicBlock *UnavailablePred = 0; + + DenseMap FullyAvailableBlocks; + for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) + FullyAvailableBlocks[ValuesPerBlock[i].first] = true; + for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i) + FullyAvailableBlocks[UnavailableBlocks[i]] = false; + + for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB); + PI != E; ++PI) { + if (IsValueFullyAvailableInBlock(*PI, FullyAvailableBlocks)) + continue; + + // If this load is not available in multiple predecessors, reject it. + if (UnavailablePred && UnavailablePred != *PI) + return false; + UnavailablePred = *PI; + } + + assert(UnavailablePred != 0 && + "Fully available value should be eliminated above!"); + + // If the loaded pointer is PHI node defined in this block, do PHI translation + // to get its value in the predecessor. + Value *LoadPtr = LI->getOperand(0)->DoPHITranslation(LoadBB, UnavailablePred); + + // Make sure the value is live in the predecessor. If it was defined by a + // non-PHI instruction in this block, we don't know how to recompute it above. + if (Instruction *LPInst = dyn_cast(LoadPtr)) + if (!DT->dominates(LPInst->getParent(), UnavailablePred)) { + DEBUG(cerr << "COULDN'T PRE LOAD BECAUSE PTR IS UNAVAILABLE IN PRED: " + << *LPInst << *LI << "\n"); + return false; + } + + // We don't currently handle critical edges :( + if (UnavailablePred->getTerminator()->getNumSuccessors() != 1) { + DEBUG(cerr << "COULD NOT PRE LOAD BECAUSE OF CRITICAL EDGE '" + << UnavailablePred->getName() << "': " << *LI); + return false; + } + + // Okay, we can eliminate this load by inserting a reload in the predecessor + // and using PHI construction to get the value in the other predecessors, do + // it. + DEBUG(cerr << "GVN REMOVING PRE LOAD: " << *LI); + + Value *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre", false, + LI->getAlignment(), + UnavailablePred->getTerminator()); + + SmallPtrSet &p = phiMap[LI->getPointerOperand()]; + for (SmallPtrSet::iterator I = p.begin(), E = p.end(); + I != E; ++I) + ValuesPerBlock.push_back(std::make_pair((*I)->getParent(), *I)); + + DenseMap BlockReplValues; + BlockReplValues.insert(ValuesPerBlock.begin(), ValuesPerBlock.end()); + BlockReplValues[UnavailablePred] = NewLoad; + + // Perform PHI construction. + Value* v = GetValueForBlock(LI->getParent(), LI, BlockReplValues, true); + LI->replaceAllUsesWith(v); + if (isa(v)) + v->takeName(LI); + if (isa(v->getType())) + MD->invalidateCachedPointerInfo(v); + toErase.push_back(LI); + NumPRELoad++; + return true; +} + +/// processLoad - Attempt to eliminate a load, first by eliminating it +/// locally, and then attempting non-local elimination if that fails. +bool GVN::processLoad(LoadInst *L, SmallVectorImpl &toErase) { + if (L->isVolatile()) + return false; + + Value* pointer = L->getPointerOperand(); + + // ... to a pointer that has been loaded from before... + MemDepResult dep = MD->getDependency(L); + + // If the value isn't available, don't do anything! + if (dep.isClobber()) { + DEBUG( + // fast print dep, using operator<< on instruction would be too slow + DOUT << "GVN: load "; + WriteAsOperand(*DOUT.stream(), L); + Instruction *I = dep.getInst(); + DOUT << " is clobbered by " << *I; + ); + return false; + } + + // If it is defined in another block, try harder. + if (dep.isNonLocal()) + return processNonLocalLoad(L, toErase); + + Instruction *DepInst = dep.getInst(); + if (StoreInst *DepSI = dyn_cast(DepInst)) { + // Only forward substitute stores to loads of the same type. + // FIXME: Could do better! + if (DepSI->getPointerOperand()->getType() != pointer->getType()) + return false; + + // Remove it! + L->replaceAllUsesWith(DepSI->getOperand(0)); + if (isa(DepSI->getOperand(0)->getType())) + MD->invalidateCachedPointerInfo(DepSI->getOperand(0)); + toErase.push_back(L); + NumGVNLoad++; + return true; + } + + if (LoadInst *DepLI = dyn_cast(DepInst)) { + // Only forward substitute stores to loads of the same type. + // FIXME: Could do better! load i32 -> load i8 -> truncate on little endian. + if (DepLI->getType() != L->getType()) + return false; + + // Remove it! + L->replaceAllUsesWith(DepLI); + if (isa(DepLI->getType())) + MD->invalidateCachedPointerInfo(DepLI); + toErase.push_back(L); + NumGVNLoad++; + return true; + } + + // If this load really doesn't depend on anything, then we must be loading an + // undef value. This can happen when loading for a fresh allocation with no + // intervening stores, for example. + if (isa(DepInst)) { + L->replaceAllUsesWith(UndefValue::get(L->getType())); + toErase.push_back(L); + NumGVNLoad++; + return true; + } + + return false; +} + +Value* GVN::lookupNumber(BasicBlock* BB, uint32_t num) { + DenseMap::iterator I = localAvail.find(BB); + if (I == localAvail.end()) + return 0; + + ValueNumberScope* locals = I->second; + + while (locals) { + DenseMap::iterator I = locals->table.find(num); + if (I != locals->table.end()) + return I->second; + else + locals = locals->parent; + } + + return 0; +} + +/// AttemptRedundancyElimination - If the "fast path" of redundancy elimination +/// by inheritance from the dominator fails, see if we can perform phi +/// construction to eliminate the redundancy. +Value* GVN::AttemptRedundancyElimination(Instruction* orig, unsigned valno) { + BasicBlock* BaseBlock = orig->getParent(); + + SmallPtrSet Visited; + SmallVector Stack; + Stack.push_back(BaseBlock); + + DenseMap Results; + + // Walk backwards through our predecessors, looking for instances of the + // value number we're looking for. Instances are recorded in the Results + // map, which is then used to perform phi construction. + while (!Stack.empty()) { + BasicBlock* Current = Stack.back(); + Stack.pop_back(); + + // If we've walked all the way to a proper dominator, then give up. Cases + // where the instance is in the dominator will have been caught by the fast + // path, and any cases that require phi construction further than this are + // probably not worth it anyways. Note that this is a SIGNIFICANT compile + // time improvement. + if (DT->properlyDominates(Current, orig->getParent())) return 0; + + DenseMap::iterator LA = + localAvail.find(Current); + if (LA == localAvail.end()) return 0; + DenseMap::iterator V = LA->second->table.find(valno); + + if (V != LA->second->table.end()) { + // Found an instance, record it. + Results.insert(std::make_pair(Current, V->second)); + continue; + } + + // If we reach the beginning of the function, then give up. + if (pred_begin(Current) == pred_end(Current)) + return 0; + + for (pred_iterator PI = pred_begin(Current), PE = pred_end(Current); + PI != PE; ++PI) + if (Visited.insert(*PI)) + Stack.push_back(*PI); + } + + // If we didn't find instances, give up. Otherwise, perform phi construction. + if (Results.size() == 0) + return 0; + else + return GetValueForBlock(BaseBlock, orig, Results, true); +} + +/// processInstruction - When calculating availability, handle an instruction +/// by inserting it into the appropriate sets +bool GVN::processInstruction(Instruction *I, + SmallVectorImpl &toErase) { + if (LoadInst* L = dyn_cast(I)) { + bool changed = processLoad(L, toErase); + + if (!changed) { + unsigned num = VN.lookup_or_add(L); + localAvail[I->getParent()]->table.insert(std::make_pair(num, L)); + } + + return changed; + } + + uint32_t nextNum = VN.getNextUnusedValueNumber(); + unsigned num = VN.lookup_or_add(I); + + if (BranchInst* BI = dyn_cast(I)) { + localAvail[I->getParent()]->table.insert(std::make_pair(num, I)); + + if (!BI->isConditional() || isa(BI->getCondition())) + return false; + + Value* branchCond = BI->getCondition(); + uint32_t condVN = VN.lookup_or_add(branchCond); + + BasicBlock* trueSucc = BI->getSuccessor(0); + BasicBlock* falseSucc = BI->getSuccessor(1); + + if (trueSucc->getSinglePredecessor()) + localAvail[trueSucc]->table[condVN] = ConstantInt::getTrue(); + if (falseSucc->getSinglePredecessor()) + localAvail[falseSucc]->table[condVN] = ConstantInt::getFalse(); + + return false; + + // Allocations are always uniquely numbered, so we can save time and memory + // by fast failing them. + } else if (isa(I) || isa(I)) { + localAvail[I->getParent()]->table.insert(std::make_pair(num, I)); + return false; + } + + // Collapse PHI nodes + if (PHINode* p = dyn_cast(I)) { + Value* constVal = CollapsePhi(p); + + if (constVal) { + for (PhiMapType::iterator PI = phiMap.begin(), PE = phiMap.end(); + PI != PE; ++PI) + PI->second.erase(p); + + p->replaceAllUsesWith(constVal); + if (isa(constVal->getType())) + MD->invalidateCachedPointerInfo(constVal); + VN.erase(p); + + toErase.push_back(p); + } else { + localAvail[I->getParent()]->table.insert(std::make_pair(num, I)); + } + + // If the number we were assigned was a brand new VN, then we don't + // need to do a lookup to see if the number already exists + // somewhere in the domtree: it can't! + } else if (num == nextNum) { + localAvail[I->getParent()]->table.insert(std::make_pair(num, I)); + + // Perform fast-path value-number based elimination of values inherited from + // dominators. + } else if (Value* repl = lookupNumber(I->getParent(), num)) { + // Remove it! + VN.erase(I); + I->replaceAllUsesWith(repl); + if (isa(repl->getType())) + MD->invalidateCachedPointerInfo(repl); + toErase.push_back(I); + return true; + +#if 0 + // Perform slow-pathvalue-number based elimination with phi construction. + } else if (Value* repl = AttemptRedundancyElimination(I, num)) { + // Remove it! + VN.erase(I); + I->replaceAllUsesWith(repl); + if (isa(repl->getType())) + MD->invalidateCachedPointerInfo(repl); + toErase.push_back(I); + return true; +#endif + } else { + localAvail[I->getParent()]->table.insert(std::make_pair(num, I)); + } + + return false; +} + +/// runOnFunction - This is the main transformation entry point for a function. +bool GVN::runOnFunction(Function& F) { + MD = &getAnalysis(); + DT = &getAnalysis(); + VN.setAliasAnalysis(&getAnalysis()); + VN.setMemDep(MD); + VN.setDomTree(DT); + + bool changed = false; + bool shouldContinue = true; + + // Merge unconditional branches, allowing PRE to catch more + // optimization opportunities. + for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ) { + BasicBlock* BB = FI; + ++FI; + bool removedBlock = MergeBlockIntoPredecessor(BB, this); + if (removedBlock) NumGVNBlocks++; + + changed |= removedBlock; + } + + unsigned Iteration = 0; + + while (shouldContinue) { + DEBUG(cerr << "GVN iteration: " << Iteration << "\n"); + shouldContinue = iterateOnFunction(F); + changed |= shouldContinue; + ++Iteration; + } + + if (EnablePRE) { + bool PREChanged = true; + while (PREChanged) { + PREChanged = performPRE(F); + changed |= PREChanged; + } + } + // FIXME: Should perform GVN again after PRE does something. PRE can move + // computations into blocks where they become fully redundant. Note that + // we can't do this until PRE's critical edge splitting updates memdep. + // Actually, when this happens, we should just fully integrate PRE into GVN. + + cleanupGlobalSets(); + + return changed; +} + + +bool GVN::processBlock(BasicBlock* BB) { + // FIXME: Kill off toErase by doing erasing eagerly in a helper function (and + // incrementing BI before processing an instruction). + SmallVector toErase; + bool changed_function = false; + + for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); + BI != BE;) { + changed_function |= processInstruction(BI, toErase); + if (toErase.empty()) { + ++BI; + continue; + } + + // If we need some instructions deleted, do it now. + NumGVNInstr += toErase.size(); + + // Avoid iterator invalidation. + bool AtStart = BI == BB->begin(); + if (!AtStart) + --BI; + + for (SmallVector::iterator I = toErase.begin(), + E = toErase.end(); I != E; ++I) { + DEBUG(cerr << "GVN removed: " << **I); + MD->removeInstruction(*I); + (*I)->eraseFromParent(); + DEBUG(verifyRemoved(*I)); + } + toErase.clear(); + + if (AtStart) + BI = BB->begin(); + else + ++BI; + } + + return changed_function; +} + +/// performPRE - Perform a purely local form of PRE that looks for diamond +/// control flow patterns and attempts to perform simple PRE at the join point. +bool GVN::performPRE(Function& F) { + bool Changed = false; + SmallVector, 4> toSplit; + DenseMap predMap; + for (df_iterator DI = df_begin(&F.getEntryBlock()), + DE = df_end(&F.getEntryBlock()); DI != DE; ++DI) { + BasicBlock* CurrentBlock = *DI; + + // Nothing to PRE in the entry block. + if (CurrentBlock == &F.getEntryBlock()) continue; + + for (BasicBlock::iterator BI = CurrentBlock->begin(), + BE = CurrentBlock->end(); BI != BE; ) { + Instruction *CurInst = BI++; + + if (isa(CurInst) || isa(CurInst) || + isa(CurInst) || (CurInst->getType() == Type::VoidTy) || + CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() || + isa(CurInst)) + continue; + + uint32_t valno = VN.lookup(CurInst); + + // Look for the predecessors for PRE opportunities. We're + // only trying to solve the basic diamond case, where + // a value is computed in the successor and one predecessor, + // but not the other. We also explicitly disallow cases + // where the successor is its own predecessor, because they're + // more complicated to get right. + unsigned numWith = 0; + unsigned numWithout = 0; + BasicBlock* PREPred = 0; + predMap.clear(); + + for (pred_iterator PI = pred_begin(CurrentBlock), + PE = pred_end(CurrentBlock); PI != PE; ++PI) { + // We're not interested in PRE where the block is its + // own predecessor, on in blocks with predecessors + // that are not reachable. + if (*PI == CurrentBlock) { + numWithout = 2; + break; + } else if (!localAvail.count(*PI)) { + numWithout = 2; + break; + } + + DenseMap::iterator predV = + localAvail[*PI]->table.find(valno); + if (predV == localAvail[*PI]->table.end()) { + PREPred = *PI; + numWithout++; + } else if (predV->second == CurInst) { + numWithout = 2; + } else { + predMap[*PI] = predV->second; + numWith++; + } + } + + // Don't do PRE when it might increase code size, i.e. when + // we would need to insert instructions in more than one pred. + if (numWithout != 1 || numWith == 0) + continue; + + // We can't do PRE safely on a critical edge, so instead we schedule + // the edge to be split and perform the PRE the next time we iterate + // on the function. + unsigned succNum = 0; + for (unsigned i = 0, e = PREPred->getTerminator()->getNumSuccessors(); + i != e; ++i) + if (PREPred->getTerminator()->getSuccessor(i) == CurrentBlock) { + succNum = i; + break; + } + + if (isCriticalEdge(PREPred->getTerminator(), succNum)) { + toSplit.push_back(std::make_pair(PREPred->getTerminator(), succNum)); + continue; + } + + // Instantiate the expression the in predecessor that lacked it. + // Because we are going top-down through the block, all value numbers + // will be available in the predecessor by the time we need them. Any + // that weren't original present will have been instantiated earlier + // in this loop. + Instruction* PREInstr = CurInst->clone(); + bool success = true; + for (unsigned i = 0, e = CurInst->getNumOperands(); i != e; ++i) { + Value *Op = PREInstr->getOperand(i); + if (isa(Op) || isa(Op) || isa(Op)) + continue; + + if (Value *V = lookupNumber(PREPred, VN.lookup(Op))) { + PREInstr->setOperand(i, V); + } else { + success = false; + break; + } + } + + // Fail out if we encounter an operand that is not available in + // the PRE predecessor. This is typically because of loads which + // are not value numbered precisely. + if (!success) { + delete PREInstr; + DEBUG(verifyRemoved(PREInstr)); + continue; + } + + PREInstr->insertBefore(PREPred->getTerminator()); + PREInstr->setName(CurInst->getName() + ".pre"); + predMap[PREPred] = PREInstr; + VN.add(PREInstr, valno); + NumGVNPRE++; + + // Update the availability map to include the new instruction. + localAvail[PREPred]->table.insert(std::make_pair(valno, PREInstr)); + + // Create a PHI to make the value available in this block. + PHINode* Phi = PHINode::Create(CurInst->getType(), + CurInst->getName() + ".pre-phi", + CurrentBlock->begin()); + for (pred_iterator PI = pred_begin(CurrentBlock), + PE = pred_end(CurrentBlock); PI != PE; ++PI) + Phi->addIncoming(predMap[*PI], *PI); + + VN.add(Phi, valno); + localAvail[CurrentBlock]->table[valno] = Phi; + + CurInst->replaceAllUsesWith(Phi); + if (isa(Phi->getType())) + MD->invalidateCachedPointerInfo(Phi); + VN.erase(CurInst); + + DEBUG(cerr << "GVN PRE removed: " << *CurInst); + MD->removeInstruction(CurInst); + CurInst->eraseFromParent(); + DEBUG(verifyRemoved(CurInst)); + Changed = true; + } + } + + for (SmallVector, 4>::iterator + I = toSplit.begin(), E = toSplit.end(); I != E; ++I) + SplitCriticalEdge(I->first, I->second, this); + + return Changed || toSplit.size(); +} + +/// iterateOnFunction - Executes one iteration of GVN +bool GVN::iterateOnFunction(Function &F) { + cleanupGlobalSets(); + + for (df_iterator DI = df_begin(DT->getRootNode()), + DE = df_end(DT->getRootNode()); DI != DE; ++DI) { + if (DI->getIDom()) + localAvail[DI->getBlock()] = + new ValueNumberScope(localAvail[DI->getIDom()->getBlock()]); + else + localAvail[DI->getBlock()] = new ValueNumberScope(0); + } + + // Top-down walk of the dominator tree + bool changed = false; +#if 0 + // Needed for value numbering with phi construction to work. + ReversePostOrderTraversal RPOT(&F); + for (ReversePostOrderTraversal::rpo_iterator RI = RPOT.begin(), + RE = RPOT.end(); RI != RE; ++RI) + changed |= processBlock(*RI); +#else + for (df_iterator DI = df_begin(DT->getRootNode()), + DE = df_end(DT->getRootNode()); DI != DE; ++DI) + changed |= processBlock(DI->getBlock()); +#endif + + return changed; +} + +void GVN::cleanupGlobalSets() { + VN.clear(); + phiMap.clear(); + + for (DenseMap::iterator + I = localAvail.begin(), E = localAvail.end(); I != E; ++I) + delete I->second; + localAvail.clear(); +} + +/// verifyRemoved - Verify that the specified instruction does not occur in our +/// internal data structures. +void GVN::verifyRemoved(const Instruction *Inst) const { + VN.verifyRemoved(Inst); + + // Walk through the PHI map to make sure the instruction isn't hiding in there + // somewhere. + for (PhiMapType::iterator + I = phiMap.begin(), E = phiMap.end(); I != E; ++I) { + assert(I->first != Inst && "Inst is still a key in PHI map!"); + + for (SmallPtrSet::iterator + II = I->second.begin(), IE = I->second.end(); II != IE; ++II) { + assert(*II != Inst && "Inst is still a value in PHI map!"); + } + } + + // Walk through the value number scope to make sure the instruction isn't + // ferreted away in it. + for (DenseMap::iterator + I = localAvail.begin(), E = localAvail.end(); I != E; ++I) { + const ValueNumberScope *VNS = I->second; + + while (VNS) { + for (DenseMap::iterator + II = VNS->table.begin(), IE = VNS->table.end(); II != IE; ++II) { + assert(II->second != Inst && "Inst still in value numbering scope!"); + } + + VNS = VNS->parent; + } + } +} diff --git a/lib/Transforms/Scalar/GVNPRE.cpp b/lib/Transforms/Scalar/GVNPRE.cpp new file mode 100644 index 000000000000..e3b09379a22d --- /dev/null +++ b/lib/Transforms/Scalar/GVNPRE.cpp @@ -0,0 +1,1885 @@ +//===- GVNPRE.cpp - Eliminate redundant values and expressions ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass performs a hybrid of global value numbering and partial redundancy +// elimination, known as GVN-PRE. It performs partial redundancy elimination on +// values, rather than lexical expressions, allowing a more comprehensive view +// the optimization. It replaces redundant values with uses of earlier +// occurences of the same value. While this is beneficial in that it eliminates +// unneeded computation, it also increases register pressure by creating large +// live ranges, and should be used with caution on platforms that are very +// sensitive to register pressure. +// +// Note that this pass does the value numbering itself, it does not use the +// ValueNumbering analysis passes. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "gvnpre" +#include "llvm/Value.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Instructions.h" +#include "llvm/Function.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include +#include +#include +using namespace llvm; + +//===----------------------------------------------------------------------===// +// ValueTable Class +//===----------------------------------------------------------------------===// + +namespace { + +/// This class holds the mapping between values and value numbers. It is used +/// as an efficient mechanism to determine the expression-wise equivalence of +/// two values. + +struct Expression { + enum ExpressionOpcode { ADD, SUB, MUL, UDIV, SDIV, FDIV, UREM, SREM, + FREM, SHL, LSHR, ASHR, AND, OR, XOR, ICMPEQ, + ICMPNE, ICMPUGT, ICMPUGE, ICMPULT, ICMPULE, + ICMPSGT, ICMPSGE, ICMPSLT, ICMPSLE, FCMPOEQ, + FCMPOGT, FCMPOGE, FCMPOLT, FCMPOLE, FCMPONE, + FCMPORD, FCMPUNO, FCMPUEQ, FCMPUGT, FCMPUGE, + FCMPULT, FCMPULE, FCMPUNE, EXTRACT, INSERT, + SHUFFLE, SELECT, TRUNC, ZEXT, SEXT, FPTOUI, + FPTOSI, UITOFP, SITOFP, FPTRUNC, FPEXT, + PTRTOINT, INTTOPTR, BITCAST, GEP, EMPTY, + TOMBSTONE }; + + ExpressionOpcode opcode; + const Type* type; + uint32_t firstVN; + uint32_t secondVN; + uint32_t thirdVN; + SmallVector varargs; + + Expression() { } + explicit Expression(ExpressionOpcode o) : opcode(o) { } + + bool operator==(const Expression &other) const { + if (opcode != other.opcode) + return false; + else if (opcode == EMPTY || opcode == TOMBSTONE) + return true; + else if (type != other.type) + return false; + else if (firstVN != other.firstVN) + return false; + else if (secondVN != other.secondVN) + return false; + else if (thirdVN != other.thirdVN) + return false; + else { + if (varargs.size() != other.varargs.size()) + return false; + + for (size_t i = 0; i < varargs.size(); ++i) + if (varargs[i] != other.varargs[i]) + return false; + + return true; + } + } + + bool operator!=(const Expression &other) const { + if (opcode != other.opcode) + return true; + else if (opcode == EMPTY || opcode == TOMBSTONE) + return false; + else if (type != other.type) + return true; + else if (firstVN != other.firstVN) + return true; + else if (secondVN != other.secondVN) + return true; + else if (thirdVN != other.thirdVN) + return true; + else { + if (varargs.size() != other.varargs.size()) + return true; + + for (size_t i = 0; i < varargs.size(); ++i) + if (varargs[i] != other.varargs[i]) + return true; + + return false; + } + } +}; + +} + +namespace { + class VISIBILITY_HIDDEN ValueTable { + private: + DenseMap valueNumbering; + DenseMap expressionNumbering; + + uint32_t nextValueNumber; + + Expression::ExpressionOpcode getOpcode(BinaryOperator* BO); + Expression::ExpressionOpcode getOpcode(CmpInst* C); + Expression::ExpressionOpcode getOpcode(CastInst* C); + Expression create_expression(BinaryOperator* BO); + Expression create_expression(CmpInst* C); + Expression create_expression(ShuffleVectorInst* V); + Expression create_expression(ExtractElementInst* C); + Expression create_expression(InsertElementInst* V); + Expression create_expression(SelectInst* V); + Expression create_expression(CastInst* C); + Expression create_expression(GetElementPtrInst* G); + public: + ValueTable() { nextValueNumber = 1; } + uint32_t lookup_or_add(Value* V); + uint32_t lookup(Value* V) const; + void add(Value* V, uint32_t num); + void clear(); + void erase(Value* v); + unsigned size(); + }; +} + +namespace llvm { +template <> struct DenseMapInfo { + static inline Expression getEmptyKey() { + return Expression(Expression::EMPTY); + } + + static inline Expression getTombstoneKey() { + return Expression(Expression::TOMBSTONE); + } + + static unsigned getHashValue(const Expression e) { + unsigned hash = e.opcode; + + hash = e.firstVN + hash * 37; + hash = e.secondVN + hash * 37; + hash = e.thirdVN + hash * 37; + + hash = ((unsigned)((uintptr_t)e.type >> 4) ^ + (unsigned)((uintptr_t)e.type >> 9)) + + hash * 37; + + for (SmallVector::const_iterator I = e.varargs.begin(), + E = e.varargs.end(); I != E; ++I) + hash = *I + hash * 37; + + return hash; + } + static bool isEqual(const Expression &LHS, const Expression &RHS) { + return LHS == RHS; + } + static bool isPod() { return true; } +}; +} + +//===----------------------------------------------------------------------===// +// ValueTable Internal Functions +//===----------------------------------------------------------------------===// +Expression::ExpressionOpcode + ValueTable::getOpcode(BinaryOperator* BO) { + switch(BO->getOpcode()) { + case Instruction::Add: + return Expression::ADD; + case Instruction::Sub: + return Expression::SUB; + case Instruction::Mul: + return Expression::MUL; + case Instruction::UDiv: + return Expression::UDIV; + case Instruction::SDiv: + return Expression::SDIV; + case Instruction::FDiv: + return Expression::FDIV; + case Instruction::URem: + return Expression::UREM; + case Instruction::SRem: + return Expression::SREM; + case Instruction::FRem: + return Expression::FREM; + case Instruction::Shl: + return Expression::SHL; + case Instruction::LShr: + return Expression::LSHR; + case Instruction::AShr: + return Expression::ASHR; + case Instruction::And: + return Expression::AND; + case Instruction::Or: + return Expression::OR; + case Instruction::Xor: + return Expression::XOR; + + // THIS SHOULD NEVER HAPPEN + default: + assert(0 && "Binary operator with unknown opcode?"); + return Expression::ADD; + } +} + +Expression::ExpressionOpcode ValueTable::getOpcode(CmpInst* C) { + if (C->getOpcode() == Instruction::ICmp) { + switch (C->getPredicate()) { + case ICmpInst::ICMP_EQ: + return Expression::ICMPEQ; + case ICmpInst::ICMP_NE: + return Expression::ICMPNE; + case ICmpInst::ICMP_UGT: + return Expression::ICMPUGT; + case ICmpInst::ICMP_UGE: + return Expression::ICMPUGE; + case ICmpInst::ICMP_ULT: + return Expression::ICMPULT; + case ICmpInst::ICMP_ULE: + return Expression::ICMPULE; + case ICmpInst::ICMP_SGT: + return Expression::ICMPSGT; + case ICmpInst::ICMP_SGE: + return Expression::ICMPSGE; + case ICmpInst::ICMP_SLT: + return Expression::ICMPSLT; + case ICmpInst::ICMP_SLE: + return Expression::ICMPSLE; + + // THIS SHOULD NEVER HAPPEN + default: + assert(0 && "Comparison with unknown predicate?"); + return Expression::ICMPEQ; + } + } else { + switch (C->getPredicate()) { + case FCmpInst::FCMP_OEQ: + return Expression::FCMPOEQ; + case FCmpInst::FCMP_OGT: + return Expression::FCMPOGT; + case FCmpInst::FCMP_OGE: + return Expression::FCMPOGE; + case FCmpInst::FCMP_OLT: + return Expression::FCMPOLT; + case FCmpInst::FCMP_OLE: + return Expression::FCMPOLE; + case FCmpInst::FCMP_ONE: + return Expression::FCMPONE; + case FCmpInst::FCMP_ORD: + return Expression::FCMPORD; + case FCmpInst::FCMP_UNO: + return Expression::FCMPUNO; + case FCmpInst::FCMP_UEQ: + return Expression::FCMPUEQ; + case FCmpInst::FCMP_UGT: + return Expression::FCMPUGT; + case FCmpInst::FCMP_UGE: + return Expression::FCMPUGE; + case FCmpInst::FCMP_ULT: + return Expression::FCMPULT; + case FCmpInst::FCMP_ULE: + return Expression::FCMPULE; + case FCmpInst::FCMP_UNE: + return Expression::FCMPUNE; + + // THIS SHOULD NEVER HAPPEN + default: + assert(0 && "Comparison with unknown predicate?"); + return Expression::FCMPOEQ; + } + } +} + +Expression::ExpressionOpcode + ValueTable::getOpcode(CastInst* C) { + switch(C->getOpcode()) { + case Instruction::Trunc: + return Expression::TRUNC; + case Instruction::ZExt: + return Expression::ZEXT; + case Instruction::SExt: + return Expression::SEXT; + case Instruction::FPToUI: + return Expression::FPTOUI; + case Instruction::FPToSI: + return Expression::FPTOSI; + case Instruction::UIToFP: + return Expression::UITOFP; + case Instruction::SIToFP: + return Expression::SITOFP; + case Instruction::FPTrunc: + return Expression::FPTRUNC; + case Instruction::FPExt: + return Expression::FPEXT; + case Instruction::PtrToInt: + return Expression::PTRTOINT; + case Instruction::IntToPtr: + return Expression::INTTOPTR; + case Instruction::BitCast: + return Expression::BITCAST; + + // THIS SHOULD NEVER HAPPEN + default: + assert(0 && "Cast operator with unknown opcode?"); + return Expression::BITCAST; + } +} + +Expression ValueTable::create_expression(BinaryOperator* BO) { + Expression e; + + e.firstVN = lookup_or_add(BO->getOperand(0)); + e.secondVN = lookup_or_add(BO->getOperand(1)); + e.thirdVN = 0; + e.type = BO->getType(); + e.opcode = getOpcode(BO); + + return e; +} + +Expression ValueTable::create_expression(CmpInst* C) { + Expression e; + + e.firstVN = lookup_or_add(C->getOperand(0)); + e.secondVN = lookup_or_add(C->getOperand(1)); + e.thirdVN = 0; + e.type = C->getType(); + e.opcode = getOpcode(C); + + return e; +} + +Expression ValueTable::create_expression(CastInst* C) { + Expression e; + + e.firstVN = lookup_or_add(C->getOperand(0)); + e.secondVN = 0; + e.thirdVN = 0; + e.type = C->getType(); + e.opcode = getOpcode(C); + + return e; +} + +Expression ValueTable::create_expression(ShuffleVectorInst* S) { + Expression e; + + e.firstVN = lookup_or_add(S->getOperand(0)); + e.secondVN = lookup_or_add(S->getOperand(1)); + e.thirdVN = lookup_or_add(S->getOperand(2)); + e.type = S->getType(); + e.opcode = Expression::SHUFFLE; + + return e; +} + +Expression ValueTable::create_expression(ExtractElementInst* E) { + Expression e; + + e.firstVN = lookup_or_add(E->getOperand(0)); + e.secondVN = lookup_or_add(E->getOperand(1)); + e.thirdVN = 0; + e.type = E->getType(); + e.opcode = Expression::EXTRACT; + + return e; +} + +Expression ValueTable::create_expression(InsertElementInst* I) { + Expression e; + + e.firstVN = lookup_or_add(I->getOperand(0)); + e.secondVN = lookup_or_add(I->getOperand(1)); + e.thirdVN = lookup_or_add(I->getOperand(2)); + e.type = I->getType(); + e.opcode = Expression::INSERT; + + return e; +} + +Expression ValueTable::create_expression(SelectInst* I) { + Expression e; + + e.firstVN = lookup_or_add(I->getCondition()); + e.secondVN = lookup_or_add(I->getTrueValue()); + e.thirdVN = lookup_or_add(I->getFalseValue()); + e.type = I->getType(); + e.opcode = Expression::SELECT; + + return e; +} + +Expression ValueTable::create_expression(GetElementPtrInst* G) { + Expression e; + + e.firstVN = lookup_or_add(G->getPointerOperand()); + e.secondVN = 0; + e.thirdVN = 0; + e.type = G->getType(); + e.opcode = Expression::GEP; + + for (GetElementPtrInst::op_iterator I = G->idx_begin(), E = G->idx_end(); + I != E; ++I) + e.varargs.push_back(lookup_or_add(*I)); + + return e; +} + +//===----------------------------------------------------------------------===// +// ValueTable External Functions +//===----------------------------------------------------------------------===// + +/// lookup_or_add - Returns the value number for the specified value, assigning +/// it a new number if it did not have one before. +uint32_t ValueTable::lookup_or_add(Value* V) { + DenseMap::iterator VI = valueNumbering.find(V); + if (VI != valueNumbering.end()) + return VI->second; + + + if (BinaryOperator* BO = dyn_cast(V)) { + Expression e = create_expression(BO); + + DenseMap::iterator EI = expressionNumbering.find(e); + if (EI != expressionNumbering.end()) { + valueNumbering.insert(std::make_pair(V, EI->second)); + return EI->second; + } else { + expressionNumbering.insert(std::make_pair(e, nextValueNumber)); + valueNumbering.insert(std::make_pair(V, nextValueNumber)); + + return nextValueNumber++; + } + } else if (CmpInst* C = dyn_cast(V)) { + Expression e = create_expression(C); + + DenseMap::iterator EI = expressionNumbering.find(e); + if (EI != expressionNumbering.end()) { + valueNumbering.insert(std::make_pair(V, EI->second)); + return EI->second; + } else { + expressionNumbering.insert(std::make_pair(e, nextValueNumber)); + valueNumbering.insert(std::make_pair(V, nextValueNumber)); + + return nextValueNumber++; + } + } else if (ShuffleVectorInst* U = dyn_cast(V)) { + Expression e = create_expression(U); + + DenseMap::iterator EI = expressionNumbering.find(e); + if (EI != expressionNumbering.end()) { + valueNumbering.insert(std::make_pair(V, EI->second)); + return EI->second; + } else { + expressionNumbering.insert(std::make_pair(e, nextValueNumber)); + valueNumbering.insert(std::make_pair(V, nextValueNumber)); + + return nextValueNumber++; + } + } else if (ExtractElementInst* U = dyn_cast(V)) { + Expression e = create_expression(U); + + DenseMap::iterator EI = expressionNumbering.find(e); + if (EI != expressionNumbering.end()) { + valueNumbering.insert(std::make_pair(V, EI->second)); + return EI->second; + } else { + expressionNumbering.insert(std::make_pair(e, nextValueNumber)); + valueNumbering.insert(std::make_pair(V, nextValueNumber)); + + return nextValueNumber++; + } + } else if (InsertElementInst* U = dyn_cast(V)) { + Expression e = create_expression(U); + + DenseMap::iterator EI = expressionNumbering.find(e); + if (EI != expressionNumbering.end()) { + valueNumbering.insert(std::make_pair(V, EI->second)); + return EI->second; + } else { + expressionNumbering.insert(std::make_pair(e, nextValueNumber)); + valueNumbering.insert(std::make_pair(V, nextValueNumber)); + + return nextValueNumber++; + } + } else if (SelectInst* U = dyn_cast(V)) { + Expression e = create_expression(U); + + DenseMap::iterator EI = expressionNumbering.find(e); + if (EI != expressionNumbering.end()) { + valueNumbering.insert(std::make_pair(V, EI->second)); + return EI->second; + } else { + expressionNumbering.insert(std::make_pair(e, nextValueNumber)); + valueNumbering.insert(std::make_pair(V, nextValueNumber)); + + return nextValueNumber++; + } + } else if (CastInst* U = dyn_cast(V)) { + Expression e = create_expression(U); + + DenseMap::iterator EI = expressionNumbering.find(e); + if (EI != expressionNumbering.end()) { + valueNumbering.insert(std::make_pair(V, EI->second)); + return EI->second; + } else { + expressionNumbering.insert(std::make_pair(e, nextValueNumber)); + valueNumbering.insert(std::make_pair(V, nextValueNumber)); + + return nextValueNumber++; + } + } else if (GetElementPtrInst* U = dyn_cast(V)) { + Expression e = create_expression(U); + + DenseMap::iterator EI = expressionNumbering.find(e); + if (EI != expressionNumbering.end()) { + valueNumbering.insert(std::make_pair(V, EI->second)); + return EI->second; + } else { + expressionNumbering.insert(std::make_pair(e, nextValueNumber)); + valueNumbering.insert(std::make_pair(V, nextValueNumber)); + + return nextValueNumber++; + } + } else { + valueNumbering.insert(std::make_pair(V, nextValueNumber)); + return nextValueNumber++; + } +} + +/// lookup - Returns the value number of the specified value. Fails if +/// the value has not yet been numbered. +uint32_t ValueTable::lookup(Value* V) const { + DenseMap::iterator VI = valueNumbering.find(V); + if (VI != valueNumbering.end()) + return VI->second; + else + assert(0 && "Value not numbered?"); + + return 0; +} + +/// add - Add the specified value with the given value number, removing +/// its old number, if any +void ValueTable::add(Value* V, uint32_t num) { + DenseMap::iterator VI = valueNumbering.find(V); + if (VI != valueNumbering.end()) + valueNumbering.erase(VI); + valueNumbering.insert(std::make_pair(V, num)); +} + +/// clear - Remove all entries from the ValueTable +void ValueTable::clear() { + valueNumbering.clear(); + expressionNumbering.clear(); + nextValueNumber = 1; +} + +/// erase - Remove a value from the value numbering +void ValueTable::erase(Value* V) { + valueNumbering.erase(V); +} + +/// size - Return the number of assigned value numbers +unsigned ValueTable::size() { + // NOTE: zero is never assigned + return nextValueNumber; +} + +namespace { + +//===----------------------------------------------------------------------===// +// ValueNumberedSet Class +//===----------------------------------------------------------------------===// + +class ValueNumberedSet { + private: + SmallPtrSet contents; + BitVector numbers; + public: + ValueNumberedSet() { numbers.resize(1); } + ValueNumberedSet(const ValueNumberedSet& other) { + numbers = other.numbers; + contents = other.contents; + } + + typedef SmallPtrSet::iterator iterator; + + iterator begin() { return contents.begin(); } + iterator end() { return contents.end(); } + + bool insert(Value* v) { return contents.insert(v); } + void insert(iterator I, iterator E) { contents.insert(I, E); } + void erase(Value* v) { contents.erase(v); } + unsigned count(Value* v) { return contents.count(v); } + size_t size() { return contents.size(); } + + void set(unsigned i) { + if (i >= numbers.size()) + numbers.resize(i+1); + + numbers.set(i); + } + + void operator=(const ValueNumberedSet& other) { + contents = other.contents; + numbers = other.numbers; + } + + void reset(unsigned i) { + if (i < numbers.size()) + numbers.reset(i); + } + + bool test(unsigned i) { + if (i >= numbers.size()) + return false; + + return numbers.test(i); + } + + void clear() { + contents.clear(); + numbers.clear(); + } +}; + +} + +//===----------------------------------------------------------------------===// +// GVNPRE Pass +//===----------------------------------------------------------------------===// + +namespace { + + class VISIBILITY_HIDDEN GVNPRE : public FunctionPass { + bool runOnFunction(Function &F); + public: + static char ID; // Pass identification, replacement for typeid + GVNPRE() : FunctionPass(&ID) {} + + private: + ValueTable VN; + SmallVector createdExpressions; + + DenseMap availableOut; + DenseMap anticipatedIn; + DenseMap generatedPhis; + + // This transformation requires dominator postdominator info + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequiredID(BreakCriticalEdgesID); + AU.addRequired(); + AU.addRequired(); + } + + // Helper fuctions + // FIXME: eliminate or document these better + void dump(ValueNumberedSet& s) const ; + void clean(ValueNumberedSet& set) ; + Value* find_leader(ValueNumberedSet& vals, uint32_t v) ; + Value* phi_translate(Value* V, BasicBlock* pred, BasicBlock* succ) ; + void phi_translate_set(ValueNumberedSet& anticIn, BasicBlock* pred, + BasicBlock* succ, ValueNumberedSet& out) ; + + void topo_sort(ValueNumberedSet& set, + SmallVector& vec) ; + + void cleanup() ; + bool elimination() ; + + void val_insert(ValueNumberedSet& s, Value* v) ; + void val_replace(ValueNumberedSet& s, Value* v) ; + bool dependsOnInvoke(Value* V) ; + void buildsets_availout(BasicBlock::iterator I, + ValueNumberedSet& currAvail, + ValueNumberedSet& currPhis, + ValueNumberedSet& currExps, + SmallPtrSet& currTemps); + bool buildsets_anticout(BasicBlock* BB, + ValueNumberedSet& anticOut, + SmallPtrSet& visited); + unsigned buildsets_anticin(BasicBlock* BB, + ValueNumberedSet& anticOut, + ValueNumberedSet& currExps, + SmallPtrSet& currTemps, + SmallPtrSet& visited); + void buildsets(Function& F) ; + + void insertion_pre(Value* e, BasicBlock* BB, + DenseMap& avail, + std::map& new_set); + unsigned insertion_mergepoint(SmallVector& workList, + df_iterator& D, + std::map& new_set); + bool insertion(Function& F) ; + + }; + + char GVNPRE::ID = 0; + +} + +// createGVNPREPass - The public interface to this file... +FunctionPass *llvm::createGVNPREPass() { return new GVNPRE(); } + +static RegisterPass X("gvnpre", + "Global Value Numbering/Partial Redundancy Elimination"); + + +STATISTIC(NumInsertedVals, "Number of values inserted"); +STATISTIC(NumInsertedPhis, "Number of PHI nodes inserted"); +STATISTIC(NumEliminated, "Number of redundant instructions eliminated"); + +/// find_leader - Given a set and a value number, return the first +/// element of the set with that value number, or 0 if no such element +/// is present +Value* GVNPRE::find_leader(ValueNumberedSet& vals, uint32_t v) { + if (!vals.test(v)) + return 0; + + for (ValueNumberedSet::iterator I = vals.begin(), E = vals.end(); + I != E; ++I) + if (v == VN.lookup(*I)) + return *I; + + assert(0 && "No leader found, but present bit is set?"); + return 0; +} + +/// val_insert - Insert a value into a set only if there is not a value +/// with the same value number already in the set +void GVNPRE::val_insert(ValueNumberedSet& s, Value* v) { + uint32_t num = VN.lookup(v); + if (!s.test(num)) + s.insert(v); +} + +/// val_replace - Insert a value into a set, replacing any values already in +/// the set that have the same value number +void GVNPRE::val_replace(ValueNumberedSet& s, Value* v) { + if (s.count(v)) return; + + uint32_t num = VN.lookup(v); + Value* leader = find_leader(s, num); + if (leader != 0) + s.erase(leader); + s.insert(v); + s.set(num); +} + +/// phi_translate - Given a value, its parent block, and a predecessor of its +/// parent, translate the value into legal for the predecessor block. This +/// means translating its operands (and recursively, their operands) through +/// any phi nodes in the parent into values available in the predecessor +Value* GVNPRE::phi_translate(Value* V, BasicBlock* pred, BasicBlock* succ) { + if (V == 0) + return 0; + + // Unary Operations + if (CastInst* U = dyn_cast(V)) { + Value* newOp1 = 0; + if (isa(U->getOperand(0))) + newOp1 = phi_translate(U->getOperand(0), pred, succ); + else + newOp1 = U->getOperand(0); + + if (newOp1 == 0) + return 0; + + if (newOp1 != U->getOperand(0)) { + Instruction* newVal = 0; + if (CastInst* C = dyn_cast(U)) + newVal = CastInst::Create(C->getOpcode(), + newOp1, C->getType(), + C->getName()+".expr"); + + uint32_t v = VN.lookup_or_add(newVal); + + Value* leader = find_leader(availableOut[pred], v); + if (leader == 0) { + createdExpressions.push_back(newVal); + return newVal; + } else { + VN.erase(newVal); + delete newVal; + return leader; + } + } + + // Binary Operations + } if (isa(V) || isa(V) || + isa(V)) { + User* U = cast(V); + + Value* newOp1 = 0; + if (isa(U->getOperand(0))) + newOp1 = phi_translate(U->getOperand(0), pred, succ); + else + newOp1 = U->getOperand(0); + + if (newOp1 == 0) + return 0; + + Value* newOp2 = 0; + if (isa(U->getOperand(1))) + newOp2 = phi_translate(U->getOperand(1), pred, succ); + else + newOp2 = U->getOperand(1); + + if (newOp2 == 0) + return 0; + + if (newOp1 != U->getOperand(0) || newOp2 != U->getOperand(1)) { + Instruction* newVal = 0; + if (BinaryOperator* BO = dyn_cast(U)) + newVal = BinaryOperator::Create(BO->getOpcode(), + newOp1, newOp2, + BO->getName()+".expr"); + else if (CmpInst* C = dyn_cast(U)) + newVal = CmpInst::Create(C->getOpcode(), + C->getPredicate(), + newOp1, newOp2, + C->getName()+".expr"); + else if (ExtractElementInst* E = dyn_cast(U)) + newVal = new ExtractElementInst(newOp1, newOp2, E->getName()+".expr"); + + uint32_t v = VN.lookup_or_add(newVal); + + Value* leader = find_leader(availableOut[pred], v); + if (leader == 0) { + createdExpressions.push_back(newVal); + return newVal; + } else { + VN.erase(newVal); + delete newVal; + return leader; + } + } + + // Ternary Operations + } else if (isa(V) || isa(V) || + isa(V)) { + User* U = cast(V); + + Value* newOp1 = 0; + if (isa(U->getOperand(0))) + newOp1 = phi_translate(U->getOperand(0), pred, succ); + else + newOp1 = U->getOperand(0); + + if (newOp1 == 0) + return 0; + + Value* newOp2 = 0; + if (isa(U->getOperand(1))) + newOp2 = phi_translate(U->getOperand(1), pred, succ); + else + newOp2 = U->getOperand(1); + + if (newOp2 == 0) + return 0; + + Value* newOp3 = 0; + if (isa(U->getOperand(2))) + newOp3 = phi_translate(U->getOperand(2), pred, succ); + else + newOp3 = U->getOperand(2); + + if (newOp3 == 0) + return 0; + + if (newOp1 != U->getOperand(0) || + newOp2 != U->getOperand(1) || + newOp3 != U->getOperand(2)) { + Instruction* newVal = 0; + if (ShuffleVectorInst* S = dyn_cast(U)) + newVal = new ShuffleVectorInst(newOp1, newOp2, newOp3, + S->getName() + ".expr"); + else if (InsertElementInst* I = dyn_cast(U)) + newVal = InsertElementInst::Create(newOp1, newOp2, newOp3, + I->getName() + ".expr"); + else if (SelectInst* I = dyn_cast(U)) + newVal = SelectInst::Create(newOp1, newOp2, newOp3, + I->getName() + ".expr"); + + uint32_t v = VN.lookup_or_add(newVal); + + Value* leader = find_leader(availableOut[pred], v); + if (leader == 0) { + createdExpressions.push_back(newVal); + return newVal; + } else { + VN.erase(newVal); + delete newVal; + return leader; + } + } + + // Varargs operators + } else if (GetElementPtrInst* U = dyn_cast(V)) { + Value* newOp1 = 0; + if (isa(U->getPointerOperand())) + newOp1 = phi_translate(U->getPointerOperand(), pred, succ); + else + newOp1 = U->getPointerOperand(); + + if (newOp1 == 0) + return 0; + + bool changed_idx = false; + SmallVector newIdx; + for (GetElementPtrInst::op_iterator I = U->idx_begin(), E = U->idx_end(); + I != E; ++I) + if (isa(*I)) { + Value* newVal = phi_translate(*I, pred, succ); + newIdx.push_back(newVal); + if (newVal != *I) + changed_idx = true; + } else { + newIdx.push_back(*I); + } + + if (newOp1 != U->getPointerOperand() || changed_idx) { + Instruction* newVal = + GetElementPtrInst::Create(newOp1, + newIdx.begin(), newIdx.end(), + U->getName()+".expr"); + + uint32_t v = VN.lookup_or_add(newVal); + + Value* leader = find_leader(availableOut[pred], v); + if (leader == 0) { + createdExpressions.push_back(newVal); + return newVal; + } else { + VN.erase(newVal); + delete newVal; + return leader; + } + } + + // PHI Nodes + } else if (PHINode* P = dyn_cast(V)) { + if (P->getParent() == succ) + return P->getIncomingValueForBlock(pred); + } + + return V; +} + +/// phi_translate_set - Perform phi translation on every element of a set +void GVNPRE::phi_translate_set(ValueNumberedSet& anticIn, + BasicBlock* pred, BasicBlock* succ, + ValueNumberedSet& out) { + for (ValueNumberedSet::iterator I = anticIn.begin(), + E = anticIn.end(); I != E; ++I) { + Value* V = phi_translate(*I, pred, succ); + if (V != 0 && !out.test(VN.lookup_or_add(V))) { + out.insert(V); + out.set(VN.lookup(V)); + } + } +} + +/// dependsOnInvoke - Test if a value has an phi node as an operand, any of +/// whose inputs is an invoke instruction. If this is true, we cannot safely +/// PRE the instruction or anything that depends on it. +bool GVNPRE::dependsOnInvoke(Value* V) { + if (PHINode* p = dyn_cast(V)) { + for (PHINode::op_iterator I = p->op_begin(), E = p->op_end(); I != E; ++I) + if (isa(*I)) + return true; + return false; + } else { + return false; + } +} + +/// clean - Remove all non-opaque values from the set whose operands are not +/// themselves in the set, as well as all values that depend on invokes (see +/// above) +void GVNPRE::clean(ValueNumberedSet& set) { + SmallVector worklist; + worklist.reserve(set.size()); + topo_sort(set, worklist); + + for (unsigned i = 0; i < worklist.size(); ++i) { + Value* v = worklist[i]; + + // Handle unary ops + if (CastInst* U = dyn_cast(v)) { + bool lhsValid = !isa(U->getOperand(0)); + lhsValid |= set.test(VN.lookup(U->getOperand(0))); + if (lhsValid) + lhsValid = !dependsOnInvoke(U->getOperand(0)); + + if (!lhsValid) { + set.erase(U); + set.reset(VN.lookup(U)); + } + + // Handle binary ops + } else if (isa(v) || isa(v) || + isa(v)) { + User* U = cast(v); + + bool lhsValid = !isa(U->getOperand(0)); + lhsValid |= set.test(VN.lookup(U->getOperand(0))); + if (lhsValid) + lhsValid = !dependsOnInvoke(U->getOperand(0)); + + bool rhsValid = !isa(U->getOperand(1)); + rhsValid |= set.test(VN.lookup(U->getOperand(1))); + if (rhsValid) + rhsValid = !dependsOnInvoke(U->getOperand(1)); + + if (!lhsValid || !rhsValid) { + set.erase(U); + set.reset(VN.lookup(U)); + } + + // Handle ternary ops + } else if (isa(v) || isa(v) || + isa(v)) { + User* U = cast(v); + + bool lhsValid = !isa(U->getOperand(0)); + lhsValid |= set.test(VN.lookup(U->getOperand(0))); + if (lhsValid) + lhsValid = !dependsOnInvoke(U->getOperand(0)); + + bool rhsValid = !isa(U->getOperand(1)); + rhsValid |= set.test(VN.lookup(U->getOperand(1))); + if (rhsValid) + rhsValid = !dependsOnInvoke(U->getOperand(1)); + + bool thirdValid = !isa(U->getOperand(2)); + thirdValid |= set.test(VN.lookup(U->getOperand(2))); + if (thirdValid) + thirdValid = !dependsOnInvoke(U->getOperand(2)); + + if (!lhsValid || !rhsValid || !thirdValid) { + set.erase(U); + set.reset(VN.lookup(U)); + } + + // Handle varargs ops + } else if (GetElementPtrInst* U = dyn_cast(v)) { + bool ptrValid = !isa(U->getPointerOperand()); + ptrValid |= set.test(VN.lookup(U->getPointerOperand())); + if (ptrValid) + ptrValid = !dependsOnInvoke(U->getPointerOperand()); + + bool varValid = true; + for (GetElementPtrInst::op_iterator I = U->idx_begin(), E = U->idx_end(); + I != E; ++I) + if (varValid) { + varValid &= !isa(*I) || set.test(VN.lookup(*I)); + varValid &= !dependsOnInvoke(*I); + } + + if (!ptrValid || !varValid) { + set.erase(U); + set.reset(VN.lookup(U)); + } + } + } +} + +/// topo_sort - Given a set of values, sort them by topological +/// order into the provided vector. +void GVNPRE::topo_sort(ValueNumberedSet& set, SmallVector& vec) { + SmallPtrSet visited; + SmallVector stack; + for (ValueNumberedSet::iterator I = set.begin(), E = set.end(); + I != E; ++I) { + if (visited.count(*I) == 0) + stack.push_back(*I); + + while (!stack.empty()) { + Value* e = stack.back(); + + // Handle unary ops + if (CastInst* U = dyn_cast(e)) { + Value* l = find_leader(set, VN.lookup(U->getOperand(0))); + + if (l != 0 && isa(l) && + visited.count(l) == 0) + stack.push_back(l); + else { + vec.push_back(e); + visited.insert(e); + stack.pop_back(); + } + + // Handle binary ops + } else if (isa(e) || isa(e) || + isa(e)) { + User* U = cast(e); + Value* l = find_leader(set, VN.lookup(U->getOperand(0))); + Value* r = find_leader(set, VN.lookup(U->getOperand(1))); + + if (l != 0 && isa(l) && + visited.count(l) == 0) + stack.push_back(l); + else if (r != 0 && isa(r) && + visited.count(r) == 0) + stack.push_back(r); + else { + vec.push_back(e); + visited.insert(e); + stack.pop_back(); + } + + // Handle ternary ops + } else if (isa(e) || isa(e) || + isa(e)) { + User* U = cast(e); + Value* l = find_leader(set, VN.lookup(U->getOperand(0))); + Value* r = find_leader(set, VN.lookup(U->getOperand(1))); + Value* m = find_leader(set, VN.lookup(U->getOperand(2))); + + if (l != 0 && isa(l) && + visited.count(l) == 0) + stack.push_back(l); + else if (r != 0 && isa(r) && + visited.count(r) == 0) + stack.push_back(r); + else if (m != 0 && isa(m) && + visited.count(m) == 0) + stack.push_back(m); + else { + vec.push_back(e); + visited.insert(e); + stack.pop_back(); + } + + // Handle vararg ops + } else if (GetElementPtrInst* U = dyn_cast(e)) { + Value* p = find_leader(set, VN.lookup(U->getPointerOperand())); + + if (p != 0 && isa(p) && + visited.count(p) == 0) + stack.push_back(p); + else { + bool push_va = false; + for (GetElementPtrInst::op_iterator I = U->idx_begin(), + E = U->idx_end(); I != E; ++I) { + Value * v = find_leader(set, VN.lookup(*I)); + if (v != 0 && isa(v) && visited.count(v) == 0) { + stack.push_back(v); + push_va = true; + } + } + + if (!push_va) { + vec.push_back(e); + visited.insert(e); + stack.pop_back(); + } + } + + // Handle opaque ops + } else { + visited.insert(e); + vec.push_back(e); + stack.pop_back(); + } + } + + stack.clear(); + } +} + +/// dump - Dump a set of values to standard error +void GVNPRE::dump(ValueNumberedSet& s) const { + DOUT << "{ "; + for (ValueNumberedSet::iterator I = s.begin(), E = s.end(); + I != E; ++I) { + DOUT << "" << VN.lookup(*I) << ": "; + DEBUG((*I)->dump()); + } + DOUT << "}\n\n"; +} + +/// elimination - Phase 3 of the main algorithm. Perform full redundancy +/// elimination by walking the dominator tree and removing any instruction that +/// is dominated by another instruction with the same value number. +bool GVNPRE::elimination() { + bool changed_function = false; + + SmallVector, 8> replace; + SmallVector erase; + + DominatorTree& DT = getAnalysis(); + + for (df_iterator DI = df_begin(DT.getRootNode()), + E = df_end(DT.getRootNode()); DI != E; ++DI) { + BasicBlock* BB = DI->getBlock(); + + for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); + BI != BE; ++BI) { + + if (isa(BI) || isa(BI) || + isa(BI) || isa(BI) || + isa(BI) || isa(BI) || + isa(BI) || isa(BI)) { + + if (availableOut[BB].test(VN.lookup(BI)) && + !availableOut[BB].count(BI)) { + Value *leader = find_leader(availableOut[BB], VN.lookup(BI)); + if (Instruction* Instr = dyn_cast(leader)) + if (Instr->getParent() != 0 && Instr != BI) { + replace.push_back(std::make_pair(BI, leader)); + erase.push_back(BI); + ++NumEliminated; + } + } + } + } + } + + while (!replace.empty()) { + std::pair rep = replace.back(); + replace.pop_back(); + rep.first->replaceAllUsesWith(rep.second); + changed_function = true; + } + + for (SmallVector::iterator I = erase.begin(), + E = erase.end(); I != E; ++I) + (*I)->eraseFromParent(); + + return changed_function; +} + +/// cleanup - Delete any extraneous values that were created to represent +/// expressions without leaders. +void GVNPRE::cleanup() { + while (!createdExpressions.empty()) { + Instruction* I = createdExpressions.back(); + createdExpressions.pop_back(); + + delete I; + } +} + +/// buildsets_availout - When calculating availability, handle an instruction +/// by inserting it into the appropriate sets +void GVNPRE::buildsets_availout(BasicBlock::iterator I, + ValueNumberedSet& currAvail, + ValueNumberedSet& currPhis, + ValueNumberedSet& currExps, + SmallPtrSet& currTemps) { + // Handle PHI nodes + if (PHINode* p = dyn_cast(I)) { + unsigned num = VN.lookup_or_add(p); + + currPhis.insert(p); + currPhis.set(num); + + // Handle unary ops + } else if (CastInst* U = dyn_cast(I)) { + Value* leftValue = U->getOperand(0); + + unsigned num = VN.lookup_or_add(U); + + if (isa(leftValue)) + if (!currExps.test(VN.lookup(leftValue))) { + currExps.insert(leftValue); + currExps.set(VN.lookup(leftValue)); + } + + if (!currExps.test(num)) { + currExps.insert(U); + currExps.set(num); + } + + // Handle binary ops + } else if (isa(I) || isa(I) || + isa(I)) { + User* U = cast(I); + Value* leftValue = U->getOperand(0); + Value* rightValue = U->getOperand(1); + + unsigned num = VN.lookup_or_add(U); + + if (isa(leftValue)) + if (!currExps.test(VN.lookup(leftValue))) { + currExps.insert(leftValue); + currExps.set(VN.lookup(leftValue)); + } + + if (isa(rightValue)) + if (!currExps.test(VN.lookup(rightValue))) { + currExps.insert(rightValue); + currExps.set(VN.lookup(rightValue)); + } + + if (!currExps.test(num)) { + currExps.insert(U); + currExps.set(num); + } + + // Handle ternary ops + } else if (isa(I) || isa(I) || + isa(I)) { + User* U = cast(I); + Value* leftValue = U->getOperand(0); + Value* rightValue = U->getOperand(1); + Value* thirdValue = U->getOperand(2); + + VN.lookup_or_add(U); + + unsigned num = VN.lookup_or_add(U); + + if (isa(leftValue)) + if (!currExps.test(VN.lookup(leftValue))) { + currExps.insert(leftValue); + currExps.set(VN.lookup(leftValue)); + } + if (isa(rightValue)) + if (!currExps.test(VN.lookup(rightValue))) { + currExps.insert(rightValue); + currExps.set(VN.lookup(rightValue)); + } + if (isa(thirdValue)) + if (!currExps.test(VN.lookup(thirdValue))) { + currExps.insert(thirdValue); + currExps.set(VN.lookup(thirdValue)); + } + + if (!currExps.test(num)) { + currExps.insert(U); + currExps.set(num); + } + + // Handle vararg ops + } else if (GetElementPtrInst* U = dyn_cast(I)) { + Value* ptrValue = U->getPointerOperand(); + + VN.lookup_or_add(U); + + unsigned num = VN.lookup_or_add(U); + + if (isa(ptrValue)) + if (!currExps.test(VN.lookup(ptrValue))) { + currExps.insert(ptrValue); + currExps.set(VN.lookup(ptrValue)); + } + + for (GetElementPtrInst::op_iterator OI = U->idx_begin(), OE = U->idx_end(); + OI != OE; ++OI) + if (isa(*OI) && !currExps.test(VN.lookup(*OI))) { + currExps.insert(*OI); + currExps.set(VN.lookup(*OI)); + } + + if (!currExps.test(VN.lookup(U))) { + currExps.insert(U); + currExps.set(num); + } + + // Handle opaque ops + } else if (!I->isTerminator()){ + VN.lookup_or_add(I); + + currTemps.insert(I); + } + + if (!I->isTerminator()) + if (!currAvail.test(VN.lookup(I))) { + currAvail.insert(I); + currAvail.set(VN.lookup(I)); + } +} + +/// buildsets_anticout - When walking the postdom tree, calculate the ANTIC_OUT +/// set as a function of the ANTIC_IN set of the block's predecessors +bool GVNPRE::buildsets_anticout(BasicBlock* BB, + ValueNumberedSet& anticOut, + SmallPtrSet& visited) { + if (BB->getTerminator()->getNumSuccessors() == 1) { + if (BB->getTerminator()->getSuccessor(0) != BB && + visited.count(BB->getTerminator()->getSuccessor(0)) == 0) { + return true; + } + else { + phi_translate_set(anticipatedIn[BB->getTerminator()->getSuccessor(0)], + BB, BB->getTerminator()->getSuccessor(0), anticOut); + } + } else if (BB->getTerminator()->getNumSuccessors() > 1) { + BasicBlock* first = BB->getTerminator()->getSuccessor(0); + for (ValueNumberedSet::iterator I = anticipatedIn[first].begin(), + E = anticipatedIn[first].end(); I != E; ++I) { + anticOut.insert(*I); + anticOut.set(VN.lookup(*I)); + } + + for (unsigned i = 1; i < BB->getTerminator()->getNumSuccessors(); ++i) { + BasicBlock* currSucc = BB->getTerminator()->getSuccessor(i); + ValueNumberedSet& succAnticIn = anticipatedIn[currSucc]; + + SmallVector temp; + + for (ValueNumberedSet::iterator I = anticOut.begin(), + E = anticOut.end(); I != E; ++I) + if (!succAnticIn.test(VN.lookup(*I))) + temp.push_back(*I); + + for (SmallVector::iterator I = temp.begin(), E = temp.end(); + I != E; ++I) { + anticOut.erase(*I); + anticOut.reset(VN.lookup(*I)); + } + } + } + + return false; +} + +/// buildsets_anticin - Walk the postdom tree, calculating ANTIC_OUT for +/// each block. ANTIC_IN is then a function of ANTIC_OUT and the GEN +/// sets populated in buildsets_availout +unsigned GVNPRE::buildsets_anticin(BasicBlock* BB, + ValueNumberedSet& anticOut, + ValueNumberedSet& currExps, + SmallPtrSet& currTemps, + SmallPtrSet& visited) { + ValueNumberedSet& anticIn = anticipatedIn[BB]; + unsigned old = anticIn.size(); + + bool defer = buildsets_anticout(BB, anticOut, visited); + if (defer) + return 0; + + anticIn.clear(); + + for (ValueNumberedSet::iterator I = anticOut.begin(), + E = anticOut.end(); I != E; ++I) { + anticIn.insert(*I); + anticIn.set(VN.lookup(*I)); + } + for (ValueNumberedSet::iterator I = currExps.begin(), + E = currExps.end(); I != E; ++I) { + if (!anticIn.test(VN.lookup(*I))) { + anticIn.insert(*I); + anticIn.set(VN.lookup(*I)); + } + } + + for (SmallPtrSet::iterator I = currTemps.begin(), + E = currTemps.end(); I != E; ++I) { + anticIn.erase(*I); + anticIn.reset(VN.lookup(*I)); + } + + clean(anticIn); + anticOut.clear(); + + if (old != anticIn.size()) + return 2; + else + return 1; +} + +/// buildsets - Phase 1 of the main algorithm. Construct the AVAIL_OUT +/// and the ANTIC_IN sets. +void GVNPRE::buildsets(Function& F) { + DenseMap generatedExpressions; + DenseMap > generatedTemporaries; + + DominatorTree &DT = getAnalysis(); + + // Phase 1, Part 1: calculate AVAIL_OUT + + // Top-down walk of the dominator tree + for (df_iterator DI = df_begin(DT.getRootNode()), + E = df_end(DT.getRootNode()); DI != E; ++DI) { + + // Get the sets to update for this block + ValueNumberedSet& currExps = generatedExpressions[DI->getBlock()]; + ValueNumberedSet& currPhis = generatedPhis[DI->getBlock()]; + SmallPtrSet& currTemps = generatedTemporaries[DI->getBlock()]; + ValueNumberedSet& currAvail = availableOut[DI->getBlock()]; + + BasicBlock* BB = DI->getBlock(); + + // A block inherits AVAIL_OUT from its dominator + if (DI->getIDom() != 0) + currAvail = availableOut[DI->getIDom()->getBlock()]; + + for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); + BI != BE; ++BI) + buildsets_availout(BI, currAvail, currPhis, currExps, + currTemps); + + } + + // Phase 1, Part 2: calculate ANTIC_IN + + SmallPtrSet visited; + SmallPtrSet block_changed; + for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) + block_changed.insert(FI); + + bool changed = true; + unsigned iterations = 0; + + while (changed) { + changed = false; + ValueNumberedSet anticOut; + + // Postorder walk of the CFG + for (po_iterator BBI = po_begin(&F.getEntryBlock()), + BBE = po_end(&F.getEntryBlock()); BBI != BBE; ++BBI) { + BasicBlock* BB = *BBI; + + if (block_changed.count(BB) != 0) { + unsigned ret = buildsets_anticin(BB, anticOut,generatedExpressions[BB], + generatedTemporaries[BB], visited); + + if (ret == 0) { + changed = true; + continue; + } else { + visited.insert(BB); + + if (ret == 2) + for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); + PI != PE; ++PI) { + block_changed.insert(*PI); + } + else + block_changed.erase(BB); + + changed |= (ret == 2); + } + } + } + + iterations++; + } +} + +/// insertion_pre - When a partial redundancy has been identified, eliminate it +/// by inserting appropriate values into the predecessors and a phi node in +/// the main block +void GVNPRE::insertion_pre(Value* e, BasicBlock* BB, + DenseMap& avail, + std::map& new_sets) { + for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) { + Value* e2 = avail[*PI]; + if (!availableOut[*PI].test(VN.lookup(e2))) { + User* U = cast(e2); + + Value* s1 = 0; + if (isa(U->getOperand(0)) || + isa(U->getOperand(0)) || + isa(U->getOperand(0)) || + isa(U->getOperand(0)) || + isa(U->getOperand(0)) || + isa(U->getOperand(0)) || + isa(U->getOperand(0)) || + isa(U->getOperand(0))) + s1 = find_leader(availableOut[*PI], VN.lookup(U->getOperand(0))); + else + s1 = U->getOperand(0); + + Value* s2 = 0; + + if (isa(U) || + isa(U) || + isa(U) || + isa(U) || + isa(U) || + isa(U)) { + if (isa(U->getOperand(1)) || + isa(U->getOperand(1)) || + isa(U->getOperand(1)) || + isa(U->getOperand(1)) || + isa(U->getOperand(1)) || + isa(U->getOperand(1)) || + isa(U->getOperand(1)) || + isa(U->getOperand(1))) { + s2 = find_leader(availableOut[*PI], VN.lookup(U->getOperand(1))); + } else { + s2 = U->getOperand(1); + } + } + + // Ternary Operators + Value* s3 = 0; + if (isa(U) || + isa(U) || + isa(U)) { + if (isa(U->getOperand(2)) || + isa(U->getOperand(2)) || + isa(U->getOperand(2)) || + isa(U->getOperand(2)) || + isa(U->getOperand(2)) || + isa(U->getOperand(2)) || + isa(U->getOperand(2)) || + isa(U->getOperand(2))) { + s3 = find_leader(availableOut[*PI], VN.lookup(U->getOperand(2))); + } else { + s3 = U->getOperand(2); + } + } + + // Vararg operators + SmallVector sVarargs; + if (GetElementPtrInst* G = dyn_cast(U)) { + for (GetElementPtrInst::op_iterator OI = G->idx_begin(), + OE = G->idx_end(); OI != OE; ++OI) { + if (isa(*OI) || + isa(*OI) || + isa(*OI) || + isa(*OI) || + isa(*OI) || + isa(*OI) || + isa(*OI) || + isa(*OI)) { + sVarargs.push_back(find_leader(availableOut[*PI], + VN.lookup(*OI))); + } else { + sVarargs.push_back(*OI); + } + } + } + + Value* newVal = 0; + if (BinaryOperator* BO = dyn_cast(U)) + newVal = BinaryOperator::Create(BO->getOpcode(), s1, s2, + BO->getName()+".gvnpre", + (*PI)->getTerminator()); + else if (CmpInst* C = dyn_cast(U)) + newVal = CmpInst::Create(C->getOpcode(), C->getPredicate(), s1, s2, + C->getName()+".gvnpre", + (*PI)->getTerminator()); + else if (ShuffleVectorInst* S = dyn_cast(U)) + newVal = new ShuffleVectorInst(s1, s2, s3, S->getName()+".gvnpre", + (*PI)->getTerminator()); + else if (InsertElementInst* S = dyn_cast(U)) + newVal = InsertElementInst::Create(s1, s2, s3, S->getName()+".gvnpre", + (*PI)->getTerminator()); + else if (ExtractElementInst* S = dyn_cast(U)) + newVal = new ExtractElementInst(s1, s2, S->getName()+".gvnpre", + (*PI)->getTerminator()); + else if (SelectInst* S = dyn_cast(U)) + newVal = SelectInst::Create(s1, s2, s3, S->getName()+".gvnpre", + (*PI)->getTerminator()); + else if (CastInst* C = dyn_cast(U)) + newVal = CastInst::Create(C->getOpcode(), s1, C->getType(), + C->getName()+".gvnpre", + (*PI)->getTerminator()); + else if (GetElementPtrInst* G = dyn_cast(U)) + newVal = GetElementPtrInst::Create(s1, sVarargs.begin(), sVarargs.end(), + G->getName()+".gvnpre", + (*PI)->getTerminator()); + + VN.add(newVal, VN.lookup(U)); + + ValueNumberedSet& predAvail = availableOut[*PI]; + val_replace(predAvail, newVal); + val_replace(new_sets[*PI], newVal); + predAvail.set(VN.lookup(newVal)); + + DenseMap::iterator av = avail.find(*PI); + if (av != avail.end()) + avail.erase(av); + avail.insert(std::make_pair(*PI, newVal)); + + ++NumInsertedVals; + } + } + + PHINode* p = 0; + + for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) { + if (p == 0) + p = PHINode::Create(avail[*PI]->getType(), "gvnpre-join", BB->begin()); + + p->addIncoming(avail[*PI], *PI); + } + + VN.add(p, VN.lookup(e)); + val_replace(availableOut[BB], p); + availableOut[BB].set(VN.lookup(e)); + generatedPhis[BB].insert(p); + generatedPhis[BB].set(VN.lookup(e)); + new_sets[BB].insert(p); + new_sets[BB].set(VN.lookup(e)); + + ++NumInsertedPhis; +} + +/// insertion_mergepoint - When walking the dom tree, check at each merge +/// block for the possibility of a partial redundancy. If present, eliminate it +unsigned GVNPRE::insertion_mergepoint(SmallVector& workList, + df_iterator& D, + std::map& new_sets) { + bool changed_function = false; + bool new_stuff = false; + + BasicBlock* BB = D->getBlock(); + for (unsigned i = 0; i < workList.size(); ++i) { + Value* e = workList[i]; + + if (isa(e) || isa(e) || + isa(e) || isa(e) || + isa(e) || isa(e) || isa(e) || + isa(e)) { + if (availableOut[D->getIDom()->getBlock()].test(VN.lookup(e))) + continue; + + DenseMap avail; + bool by_some = false; + bool all_same = true; + Value * first_s = 0; + + for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; + ++PI) { + Value *e2 = phi_translate(e, *PI, BB); + Value *e3 = find_leader(availableOut[*PI], VN.lookup(e2)); + + if (e3 == 0) { + DenseMap::iterator av = avail.find(*PI); + if (av != avail.end()) + avail.erase(av); + avail.insert(std::make_pair(*PI, e2)); + all_same = false; + } else { + DenseMap::iterator av = avail.find(*PI); + if (av != avail.end()) + avail.erase(av); + avail.insert(std::make_pair(*PI, e3)); + + by_some = true; + if (first_s == 0) + first_s = e3; + else if (first_s != e3) + all_same = false; + } + } + + if (by_some && !all_same && + !generatedPhis[BB].test(VN.lookup(e))) { + insertion_pre(e, BB, avail, new_sets); + + changed_function = true; + new_stuff = true; + } + } + } + + unsigned retval = 0; + if (changed_function) + retval += 1; + if (new_stuff) + retval += 2; + + return retval; +} + +/// insert - Phase 2 of the main algorithm. Walk the dominator tree looking for +/// merge points. When one is found, check for a partial redundancy. If one is +/// present, eliminate it. Repeat this walk until no changes are made. +bool GVNPRE::insertion(Function& F) { + bool changed_function = false; + + DominatorTree &DT = getAnalysis(); + + std::map new_sets; + bool new_stuff = true; + while (new_stuff) { + new_stuff = false; + for (df_iterator DI = df_begin(DT.getRootNode()), + E = df_end(DT.getRootNode()); DI != E; ++DI) { + BasicBlock* BB = DI->getBlock(); + + if (BB == 0) + continue; + + ValueNumberedSet& availOut = availableOut[BB]; + ValueNumberedSet& anticIn = anticipatedIn[BB]; + + // Replace leaders with leaders inherited from dominator + if (DI->getIDom() != 0) { + ValueNumberedSet& dom_set = new_sets[DI->getIDom()->getBlock()]; + for (ValueNumberedSet::iterator I = dom_set.begin(), + E = dom_set.end(); I != E; ++I) { + val_replace(new_sets[BB], *I); + val_replace(availOut, *I); + } + } + + // If there is more than one predecessor... + if (pred_begin(BB) != pred_end(BB) && ++pred_begin(BB) != pred_end(BB)) { + SmallVector workList; + workList.reserve(anticIn.size()); + topo_sort(anticIn, workList); + + unsigned result = insertion_mergepoint(workList, DI, new_sets); + if (result & 1) + changed_function = true; + if (result & 2) + new_stuff = true; + } + } + } + + return changed_function; +} + +// GVNPRE::runOnFunction - This is the main transformation entry point for a +// function. +// +bool GVNPRE::runOnFunction(Function &F) { + // Clean out global sets from any previous functions + VN.clear(); + createdExpressions.clear(); + availableOut.clear(); + anticipatedIn.clear(); + generatedPhis.clear(); + + bool changed_function = false; + + // Phase 1: BuildSets + // This phase calculates the AVAIL_OUT and ANTIC_IN sets + buildsets(F); + + // Phase 2: Insert + // This phase inserts values to make partially redundant values + // fully redundant + changed_function |= insertion(F); + + // Phase 3: Eliminate + // This phase performs trivial full redundancy elimination + changed_function |= elimination(); + + // Phase 4: Cleanup + // This phase cleans up values that were created solely + // as leaders for expressions + cleanup(); + + return changed_function; +} diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp new file mode 100644 index 000000000000..ca7aa7bd30db --- /dev/null +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -0,0 +1,880 @@ +//===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This transformation analyzes and transforms the induction variables (and +// computations derived from them) into simpler forms suitable for subsequent +// analysis and transformation. +// +// This transformation makes the following changes to each loop with an +// identifiable induction variable: +// 1. All loops are transformed to have a SINGLE canonical induction variable +// which starts at zero and steps by one. +// 2. The canonical induction variable is guaranteed to be the first PHI node +// in the loop header block. +// 3. Any pointer arithmetic recurrences are raised to use array subscripts. +// +// If the trip count of a loop is computable, this pass also makes the following +// changes: +// 1. The exit condition for the loop is canonicalized to compare the +// induction value against the exit value. This turns loops like: +// 'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)' +// 2. Any use outside of the loop of an expression derived from the indvar +// is changed to compute the derived value outside of the loop, eliminating +// the dependence on the exit value of the induction variable. If the only +// purpose of the loop is to compute the exit value of some derived +// expression, this transformation will make the loop dead. +// +// This transformation should be followed by strength reduction after all of the +// desired loop transformations have been performed. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "indvars" +#include "llvm/Transforms/Scalar.h" +#include "llvm/BasicBlock.h" +#include "llvm/Constants.h" +#include "llvm/Instructions.h" +#include "llvm/Type.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/IVUsers.h" +#include "llvm/Analysis/ScalarEvolutionExpander.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +using namespace llvm; + +STATISTIC(NumRemoved , "Number of aux indvars removed"); +STATISTIC(NumInserted, "Number of canonical indvars added"); +STATISTIC(NumReplaced, "Number of exit values replaced"); +STATISTIC(NumLFTR , "Number of loop exit tests replaced"); + +namespace { + class VISIBILITY_HIDDEN IndVarSimplify : public LoopPass { + IVUsers *IU; + LoopInfo *LI; + ScalarEvolution *SE; + bool Changed; + public: + + static char ID; // Pass identification, replacement for typeid + IndVarSimplify() : LoopPass(&ID) {} + + virtual bool runOnLoop(Loop *L, LPPassManager &LPM); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + AU.addRequired(); + AU.addRequiredID(LCSSAID); + AU.addRequiredID(LoopSimplifyID); + AU.addRequired(); + AU.addRequired(); + AU.addPreserved(); + AU.addPreservedID(LoopSimplifyID); + AU.addPreserved(); + AU.addPreservedID(LCSSAID); + AU.setPreservesCFG(); + } + + private: + + void RewriteNonIntegerIVs(Loop *L); + + ICmpInst *LinearFunctionTestReplace(Loop *L, SCEVHandle BackedgeTakenCount, + Value *IndVar, + BasicBlock *ExitingBlock, + BranchInst *BI, + SCEVExpander &Rewriter); + void RewriteLoopExitValues(Loop *L, const SCEV *BackedgeTakenCount); + + void RewriteIVExpressions(Loop *L, const Type *LargestType, + SCEVExpander &Rewriter); + + void SinkUnusedInvariants(Loop *L, SCEVExpander &Rewriter); + + void FixUsesBeforeDefs(Loop *L, SCEVExpander &Rewriter); + + void HandleFloatingPointIV(Loop *L, PHINode *PH); + }; +} + +char IndVarSimplify::ID = 0; +static RegisterPass +X("indvars", "Canonicalize Induction Variables"); + +Pass *llvm::createIndVarSimplifyPass() { + return new IndVarSimplify(); +} + +/// LinearFunctionTestReplace - This method rewrites the exit condition of the +/// loop to be a canonical != comparison against the incremented loop induction +/// variable. This pass is able to rewrite the exit tests of any loop where the +/// SCEV analysis can determine a loop-invariant trip count of the loop, which +/// is actually a much broader range than just linear tests. +ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L, + SCEVHandle BackedgeTakenCount, + Value *IndVar, + BasicBlock *ExitingBlock, + BranchInst *BI, + SCEVExpander &Rewriter) { + // If the exiting block is not the same as the backedge block, we must compare + // against the preincremented value, otherwise we prefer to compare against + // the post-incremented value. + Value *CmpIndVar; + SCEVHandle RHS = BackedgeTakenCount; + if (ExitingBlock == L->getLoopLatch()) { + // Add one to the "backedge-taken" count to get the trip count. + // If this addition may overflow, we have to be more pessimistic and + // cast the induction variable before doing the add. + SCEVHandle Zero = SE->getIntegerSCEV(0, BackedgeTakenCount->getType()); + SCEVHandle N = + SE->getAddExpr(BackedgeTakenCount, + SE->getIntegerSCEV(1, BackedgeTakenCount->getType())); + if ((isa(N) && !N->isZero()) || + SE->isLoopGuardedByCond(L, ICmpInst::ICMP_NE, N, Zero)) { + // No overflow. Cast the sum. + RHS = SE->getTruncateOrZeroExtend(N, IndVar->getType()); + } else { + // Potential overflow. Cast before doing the add. + RHS = SE->getTruncateOrZeroExtend(BackedgeTakenCount, + IndVar->getType()); + RHS = SE->getAddExpr(RHS, + SE->getIntegerSCEV(1, IndVar->getType())); + } + + // The BackedgeTaken expression contains the number of times that the + // backedge branches to the loop header. This is one less than the + // number of times the loop executes, so use the incremented indvar. + CmpIndVar = L->getCanonicalInductionVariableIncrement(); + } else { + // We have to use the preincremented value... + RHS = SE->getTruncateOrZeroExtend(BackedgeTakenCount, + IndVar->getType()); + CmpIndVar = IndVar; + } + + // Expand the code for the iteration count into the preheader of the loop. + BasicBlock *Preheader = L->getLoopPreheader(); + Value *ExitCnt = Rewriter.expandCodeFor(RHS, CmpIndVar->getType(), + Preheader->getTerminator()); + + // Insert a new icmp_ne or icmp_eq instruction before the branch. + ICmpInst::Predicate Opcode; + if (L->contains(BI->getSuccessor(0))) + Opcode = ICmpInst::ICMP_NE; + else + Opcode = ICmpInst::ICMP_EQ; + + DOUT << "INDVARS: Rewriting loop exit condition to:\n" + << " LHS:" << *CmpIndVar // includes a newline + << " op:\t" + << (Opcode == ICmpInst::ICMP_NE ? "!=" : "==") << "\n" + << " RHS:\t" << *RHS << "\n"; + + ICmpInst *Cond = new ICmpInst(Opcode, CmpIndVar, ExitCnt, "exitcond", BI); + + Instruction *OrigCond = cast(BI->getCondition()); + // It's tempting to use replaceAllUsesWith here to fully replace the old + // comparison, but that's not immediately safe, since users of the old + // comparison may not be dominated by the new comparison. Instead, just + // update the branch to use the new comparison; in the common case this + // will make old comparison dead. + BI->setCondition(Cond); + RecursivelyDeleteTriviallyDeadInstructions(OrigCond); + + ++NumLFTR; + Changed = true; + return Cond; +} + +/// RewriteLoopExitValues - Check to see if this loop has a computable +/// loop-invariant execution count. If so, this means that we can compute the +/// final value of any expressions that are recurrent in the loop, and +/// substitute the exit values from the loop into any instructions outside of +/// the loop that use the final values of the current expressions. +/// +/// This is mostly redundant with the regular IndVarSimplify activities that +/// happen later, except that it's more powerful in some cases, because it's +/// able to brute-force evaluate arbitrary instructions as long as they have +/// constant operands at the beginning of the loop. +void IndVarSimplify::RewriteLoopExitValues(Loop *L, + const SCEV *BackedgeTakenCount) { + // Verify the input to the pass in already in LCSSA form. + assert(L->isLCSSAForm()); + + BasicBlock *Preheader = L->getLoopPreheader(); + + // Scan all of the instructions in the loop, looking at those that have + // extra-loop users and which are recurrences. + SCEVExpander Rewriter(*SE); + + // We insert the code into the preheader of the loop if the loop contains + // multiple exit blocks, or in the exit block if there is exactly one. + BasicBlock *BlockToInsertInto; + SmallVector ExitBlocks; + L->getUniqueExitBlocks(ExitBlocks); + if (ExitBlocks.size() == 1) + BlockToInsertInto = ExitBlocks[0]; + else + BlockToInsertInto = Preheader; + BasicBlock::iterator InsertPt = BlockToInsertInto->getFirstNonPHI(); + + std::map ExitValues; + + // Find all values that are computed inside the loop, but used outside of it. + // Because of LCSSA, these values will only occur in LCSSA PHI Nodes. Scan + // the exit blocks of the loop to find them. + for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { + BasicBlock *ExitBB = ExitBlocks[i]; + + // If there are no PHI nodes in this exit block, then no values defined + // inside the loop are used on this path, skip it. + PHINode *PN = dyn_cast(ExitBB->begin()); + if (!PN) continue; + + unsigned NumPreds = PN->getNumIncomingValues(); + + // Iterate over all of the PHI nodes. + BasicBlock::iterator BBI = ExitBB->begin(); + while ((PN = dyn_cast(BBI++))) { + if (PN->use_empty()) + continue; // dead use, don't replace it + // Iterate over all of the values in all the PHI nodes. + for (unsigned i = 0; i != NumPreds; ++i) { + // If the value being merged in is not integer or is not defined + // in the loop, skip it. + Value *InVal = PN->getIncomingValue(i); + if (!isa(InVal) || + // SCEV only supports integer expressions for now. + (!isa(InVal->getType()) && + !isa(InVal->getType()))) + continue; + + // If this pred is for a subloop, not L itself, skip it. + if (LI->getLoopFor(PN->getIncomingBlock(i)) != L) + continue; // The Block is in a subloop, skip it. + + // Check that InVal is defined in the loop. + Instruction *Inst = cast(InVal); + if (!L->contains(Inst->getParent())) + continue; + + // Okay, this instruction has a user outside of the current loop + // and varies predictably *inside* the loop. Evaluate the value it + // contains when the loop exits, if possible. + SCEVHandle ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop()); + if (!ExitValue->isLoopInvariant(L)) + continue; + + Changed = true; + ++NumReplaced; + + // See if we already computed the exit value for the instruction, if so, + // just reuse it. + Value *&ExitVal = ExitValues[Inst]; + if (!ExitVal) + ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), InsertPt); + + DOUT << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal + << " LoopVal = " << *Inst << "\n"; + + PN->setIncomingValue(i, ExitVal); + + // If this instruction is dead now, delete it. + RecursivelyDeleteTriviallyDeadInstructions(Inst); + + // See if this is a single-entry LCSSA PHI node. If so, we can (and + // have to) remove + // the PHI entirely. This is safe, because the NewVal won't be variant + // in the loop, so we don't need an LCSSA phi node anymore. + if (NumPreds == 1) { + PN->replaceAllUsesWith(ExitVal); + RecursivelyDeleteTriviallyDeadInstructions(PN); + break; + } + } + } + } +} + +void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) { + // First step. Check to see if there are any floating-point recurrences. + // If there are, change them into integer recurrences, permitting analysis by + // the SCEV routines. + // + BasicBlock *Header = L->getHeader(); + + SmallVector PHIs; + for (BasicBlock::iterator I = Header->begin(); + PHINode *PN = dyn_cast(I); ++I) + PHIs.push_back(PN); + + for (unsigned i = 0, e = PHIs.size(); i != e; ++i) + if (PHINode *PN = dyn_cast_or_null(PHIs[i])) + HandleFloatingPointIV(L, PN); + + // If the loop previously had floating-point IV, ScalarEvolution + // may not have been able to compute a trip count. Now that we've done some + // re-writing, the trip count may be computable. + if (Changed) + SE->forgetLoopBackedgeTakenCount(L); +} + +bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { + IU = &getAnalysis(); + LI = &getAnalysis(); + SE = &getAnalysis(); + Changed = false; + + // If there are any floating-point recurrences, attempt to + // transform them to use integer recurrences. + RewriteNonIntegerIVs(L); + + BasicBlock *Header = L->getHeader(); + BasicBlock *ExitingBlock = L->getExitingBlock(); // may be null + SCEVHandle BackedgeTakenCount = SE->getBackedgeTakenCount(L); + + // Check to see if this loop has a computable loop-invariant execution count. + // If so, this means that we can compute the final value of any expressions + // that are recurrent in the loop, and substitute the exit values from the + // loop into any instructions outside of the loop that use the final values of + // the current expressions. + // + if (!isa(BackedgeTakenCount)) + RewriteLoopExitValues(L, BackedgeTakenCount); + + // Compute the type of the largest recurrence expression, and decide whether + // a canonical induction variable should be inserted. + const Type *LargestType = 0; + bool NeedCannIV = false; + if (!isa(BackedgeTakenCount)) { + LargestType = BackedgeTakenCount->getType(); + LargestType = SE->getEffectiveSCEVType(LargestType); + // If we have a known trip count and a single exit block, we'll be + // rewriting the loop exit test condition below, which requires a + // canonical induction variable. + if (ExitingBlock) + NeedCannIV = true; + } + for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) { + SCEVHandle Stride = IU->StrideOrder[i]; + const Type *Ty = SE->getEffectiveSCEVType(Stride->getType()); + if (!LargestType || + SE->getTypeSizeInBits(Ty) > + SE->getTypeSizeInBits(LargestType)) + LargestType = Ty; + + std::map::iterator SI = + IU->IVUsesByStride.find(IU->StrideOrder[i]); + assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!"); + + if (!SI->second->Users.empty()) + NeedCannIV = true; + } + + // Create a rewriter object which we'll use to transform the code with. + SCEVExpander Rewriter(*SE); + + // Now that we know the largest of of the induction variable expressions + // in this loop, insert a canonical induction variable of the largest size. + Value *IndVar = 0; + if (NeedCannIV) { + IndVar = Rewriter.getOrInsertCanonicalInductionVariable(L,LargestType); + ++NumInserted; + Changed = true; + DOUT << "INDVARS: New CanIV: " << *IndVar; + } + + // If we have a trip count expression, rewrite the loop's exit condition + // using it. We can currently only handle loops with a single exit. + ICmpInst *NewICmp = 0; + if (!isa(BackedgeTakenCount) && ExitingBlock) { + assert(NeedCannIV && + "LinearFunctionTestReplace requires a canonical induction variable"); + // Can't rewrite non-branch yet. + if (BranchInst *BI = dyn_cast(ExitingBlock->getTerminator())) + NewICmp = LinearFunctionTestReplace(L, BackedgeTakenCount, IndVar, + ExitingBlock, BI, Rewriter); + } + + Rewriter.setInsertionPoint(Header->getFirstNonPHI()); + + // Rewrite IV-derived expressions. Clears the rewriter cache. + RewriteIVExpressions(L, LargestType, Rewriter); + + // The Rewriter may only be used for isInsertedInstruction queries from this + // point on. + + // Loop-invariant instructions in the preheader that aren't used in the + // loop may be sunk below the loop to reduce register pressure. + SinkUnusedInvariants(L, Rewriter); + + // Reorder instructions to avoid use-before-def conditions. + FixUsesBeforeDefs(L, Rewriter); + + // For completeness, inform IVUsers of the IV use in the newly-created + // loop exit test instruction. + if (NewICmp) + IU->AddUsersIfInteresting(cast(NewICmp->getOperand(0))); + + // Clean up dead instructions. + DeleteDeadPHIs(L->getHeader()); + // Check a post-condition. + assert(L->isLCSSAForm() && "Indvars did not leave the loop in lcssa form!"); + return Changed; +} + +void IndVarSimplify::RewriteIVExpressions(Loop *L, const Type *LargestType, + SCEVExpander &Rewriter) { + SmallVector DeadInsts; + + // Rewrite all induction variable expressions in terms of the canonical + // induction variable. + // + // If there were induction variables of other sizes or offsets, manually + // add the offsets to the primary induction variable and cast, avoiding + // the need for the code evaluation methods to insert induction variables + // of different sizes. + for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) { + SCEVHandle Stride = IU->StrideOrder[i]; + + std::map::iterator SI = + IU->IVUsesByStride.find(IU->StrideOrder[i]); + assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!"); + ilist &List = SI->second->Users; + for (ilist::iterator UI = List.begin(), + E = List.end(); UI != E; ++UI) { + SCEVHandle Offset = UI->getOffset(); + Value *Op = UI->getOperandValToReplace(); + Instruction *User = UI->getUser(); + bool isSigned = UI->isSigned(); + + // Compute the final addrec to expand into code. + SCEVHandle AR = IU->getReplacementExpr(*UI); + + // FIXME: It is an extremely bad idea to indvar substitute anything more + // complex than affine induction variables. Doing so will put expensive + // polynomial evaluations inside of the loop, and the str reduction pass + // currently can only reduce affine polynomials. For now just disable + // indvar subst on anything more complex than an affine addrec, unless + // it can be expanded to a trivial value. + if (!Stride->isLoopInvariant(L) && + !isa(AR) && + L->contains(User->getParent())) + continue; + + Value *NewVal = 0; + if (AR->isLoopInvariant(L)) { + BasicBlock::iterator I = Rewriter.getInsertionPoint(); + // Expand loop-invariant values in the loop preheader. They will + // be sunk to the exit block later, if possible. + NewVal = + Rewriter.expandCodeFor(AR, LargestType, + L->getLoopPreheader()->getTerminator()); + Rewriter.setInsertionPoint(I); + ++NumReplaced; + } else { + const Type *IVTy = Offset->getType(); + const Type *UseTy = Op->getType(); + + // Promote the Offset and Stride up to the canonical induction + // variable's bit width. + SCEVHandle PromotedOffset = Offset; + SCEVHandle PromotedStride = Stride; + if (SE->getTypeSizeInBits(IVTy) != SE->getTypeSizeInBits(LargestType)) { + // It doesn't matter for correctness whether zero or sign extension + // is used here, since the value is truncated away below, but if the + // value is signed, sign extension is more likely to be folded. + if (isSigned) { + PromotedOffset = SE->getSignExtendExpr(PromotedOffset, LargestType); + PromotedStride = SE->getSignExtendExpr(PromotedStride, LargestType); + } else { + PromotedOffset = SE->getZeroExtendExpr(PromotedOffset, LargestType); + // If the stride is obviously negative, use sign extension to + // produce things like x-1 instead of x+255. + if (isa(PromotedStride) && + cast(PromotedStride) + ->getValue()->getValue().isNegative()) + PromotedStride = SE->getSignExtendExpr(PromotedStride, + LargestType); + else + PromotedStride = SE->getZeroExtendExpr(PromotedStride, + LargestType); + } + } + + // Create the SCEV representing the offset from the canonical + // induction variable, still in the canonical induction variable's + // type, so that all expanded arithmetic is done in the same type. + SCEVHandle NewAR = SE->getAddRecExpr(SE->getIntegerSCEV(0, LargestType), + PromotedStride, L); + // Add the PromotedOffset as a separate step, because it may not be + // loop-invariant. + NewAR = SE->getAddExpr(NewAR, PromotedOffset); + + // Expand the addrec into instructions. + Value *V = Rewriter.expandCodeFor(NewAR); + + // Insert an explicit cast if necessary to truncate the value + // down to the original stride type. This is done outside of + // SCEVExpander because in SCEV expressions, a truncate of an + // addrec is always folded. + if (LargestType != IVTy) { + if (SE->getTypeSizeInBits(IVTy) != SE->getTypeSizeInBits(LargestType)) + NewAR = SE->getTruncateExpr(NewAR, IVTy); + if (Rewriter.isInsertedExpression(NewAR)) + V = Rewriter.expandCodeFor(NewAR); + else { + V = Rewriter.InsertCastOfTo(CastInst::getCastOpcode(V, false, + IVTy, false), + V, IVTy); + assert(!isa(V) && !isa(V) && + "LargestType wasn't actually the largest type!"); + // Force the rewriter to use this trunc whenever this addrec + // appears so that it doesn't insert new phi nodes or + // arithmetic in a different type. + Rewriter.addInsertedValue(V, NewAR); + } + } + + DOUT << "INDVARS: Made offset-and-trunc IV for offset " + << *IVTy << " " << *Offset << ": "; + DEBUG(WriteAsOperand(*DOUT, V, false)); + DOUT << "\n"; + + // Now expand it into actual Instructions and patch it into place. + NewVal = Rewriter.expandCodeFor(AR, UseTy); + } + + // Patch the new value into place. + if (Op->hasName()) + NewVal->takeName(Op); + User->replaceUsesOfWith(Op, NewVal); + UI->setOperandValToReplace(NewVal); + DOUT << "INDVARS: Rewrote IV '" << *AR << "' " << *Op + << " into = " << *NewVal << "\n"; + ++NumRemoved; + Changed = true; + + // The old value may be dead now. + DeadInsts.push_back(Op); + } + } + + // Clear the rewriter cache, because values that are in the rewriter's cache + // can be deleted in the loop below, causing the AssertingVH in the cache to + // trigger. + Rewriter.clear(); + // Now that we're done iterating through lists, clean up any instructions + // which are now dead. + while (!DeadInsts.empty()) { + Instruction *Inst = dyn_cast_or_null(DeadInsts.pop_back_val()); + if (Inst) + RecursivelyDeleteTriviallyDeadInstructions(Inst); + } +} + +/// If there's a single exit block, sink any loop-invariant values that +/// were defined in the preheader but not used inside the loop into the +/// exit block to reduce register pressure in the loop. +void IndVarSimplify::SinkUnusedInvariants(Loop *L, SCEVExpander &Rewriter) { + BasicBlock *ExitBlock = L->getExitBlock(); + if (!ExitBlock) return; + + Instruction *NonPHI = ExitBlock->getFirstNonPHI(); + BasicBlock *Preheader = L->getLoopPreheader(); + BasicBlock::iterator I = Preheader->getTerminator(); + while (I != Preheader->begin()) { + --I; + // New instructions were inserted at the end of the preheader. Only + // consider those new instructions. + if (!Rewriter.isInsertedInstruction(I)) + break; + // Determine if there is a use in or before the loop (direct or + // otherwise). + bool UsedInLoop = false; + for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); + UI != UE; ++UI) { + BasicBlock *UseBB = cast(UI)->getParent(); + if (PHINode *P = dyn_cast(UI)) { + unsigned i = + PHINode::getIncomingValueNumForOperand(UI.getOperandNo()); + UseBB = P->getIncomingBlock(i); + } + if (UseBB == Preheader || L->contains(UseBB)) { + UsedInLoop = true; + break; + } + } + // If there is, the def must remain in the preheader. + if (UsedInLoop) + continue; + // Otherwise, sink it to the exit block. + Instruction *ToMove = I; + bool Done = false; + if (I != Preheader->begin()) + --I; + else + Done = true; + ToMove->moveBefore(NonPHI); + if (Done) + break; + } +} + +/// Re-schedule the inserted instructions to put defs before uses. This +/// fixes problems that arrise when SCEV expressions contain loop-variant +/// values unrelated to the induction variable which are defined inside the +/// loop. FIXME: It would be better to insert instructions in the right +/// place so that this step isn't needed. +void IndVarSimplify::FixUsesBeforeDefs(Loop *L, SCEVExpander &Rewriter) { + // Visit all the blocks in the loop in pre-order dom-tree dfs order. + DominatorTree *DT = &getAnalysis(); + std::map NumPredsLeft; + SmallVector Worklist; + Worklist.push_back(DT->getNode(L->getHeader())); + do { + DomTreeNode *Node = Worklist.pop_back_val(); + for (DomTreeNode::iterator I = Node->begin(), E = Node->end(); I != E; ++I) + if (L->contains((*I)->getBlock())) + Worklist.push_back(*I); + BasicBlock *BB = Node->getBlock(); + // Visit all the instructions in the block top down. + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { + // Count the number of operands that aren't properly dominating. + unsigned NumPreds = 0; + if (Rewriter.isInsertedInstruction(I) && !isa(I)) + for (User::op_iterator OI = I->op_begin(), OE = I->op_end(); + OI != OE; ++OI) + if (Instruction *Inst = dyn_cast(OI)) + if (L->contains(Inst->getParent()) && !NumPredsLeft.count(Inst)) + ++NumPreds; + NumPredsLeft[I] = NumPreds; + // Notify uses of the position of this instruction, and move the + // users (and their dependents, recursively) into place after this + // instruction if it is their last outstanding operand. + for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); + UI != UE; ++UI) { + Instruction *Inst = cast(UI); + std::map::iterator Z = NumPredsLeft.find(Inst); + if (Z != NumPredsLeft.end() && Z->second != 0 && --Z->second == 0) { + SmallVector UseWorkList; + UseWorkList.push_back(Inst); + BasicBlock::iterator InsertPt = I; + if (InvokeInst *II = dyn_cast(InsertPt)) + InsertPt = II->getNormalDest()->begin(); + else + ++InsertPt; + while (isa(InsertPt)) ++InsertPt; + do { + Instruction *Use = UseWorkList.pop_back_val(); + Use->moveBefore(InsertPt); + NumPredsLeft.erase(Use); + for (Value::use_iterator IUI = Use->use_begin(), + IUE = Use->use_end(); IUI != IUE; ++IUI) { + Instruction *IUIInst = cast(IUI); + if (L->contains(IUIInst->getParent()) && + Rewriter.isInsertedInstruction(IUIInst) && + !isa(IUIInst)) + UseWorkList.push_back(IUIInst); + } + } while (!UseWorkList.empty()); + } + } + } + } while (!Worklist.empty()); +} + +/// Return true if it is OK to use SIToFPInst for an inducation variable +/// with given inital and exit values. +static bool useSIToFPInst(ConstantFP &InitV, ConstantFP &ExitV, + uint64_t intIV, uint64_t intEV) { + + if (InitV.getValueAPF().isNegative() || ExitV.getValueAPF().isNegative()) + return true; + + // If the iteration range can be handled by SIToFPInst then use it. + APInt Max = APInt::getSignedMaxValue(32); + if (Max.getZExtValue() > static_cast(abs64(intEV - intIV))) + return true; + + return false; +} + +/// convertToInt - Convert APF to an integer, if possible. +static bool convertToInt(const APFloat &APF, uint64_t *intVal) { + + bool isExact = false; + if (&APF.getSemantics() == &APFloat::PPCDoubleDouble) + return false; + if (APF.convertToInteger(intVal, 32, APF.isNegative(), + APFloat::rmTowardZero, &isExact) + != APFloat::opOK) + return false; + if (!isExact) + return false; + return true; + +} + +/// HandleFloatingPointIV - If the loop has floating induction variable +/// then insert corresponding integer induction variable if possible. +/// For example, +/// for(double i = 0; i < 10000; ++i) +/// bar(i) +/// is converted into +/// for(int i = 0; i < 10000; ++i) +/// bar((double)i); +/// +void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PH) { + + unsigned IncomingEdge = L->contains(PH->getIncomingBlock(0)); + unsigned BackEdge = IncomingEdge^1; + + // Check incoming value. + ConstantFP *InitValue = dyn_cast(PH->getIncomingValue(IncomingEdge)); + if (!InitValue) return; + uint64_t newInitValue = Type::Int32Ty->getPrimitiveSizeInBits(); + if (!convertToInt(InitValue->getValueAPF(), &newInitValue)) + return; + + // Check IV increment. Reject this PH if increement operation is not + // an add or increment value can not be represented by an integer. + BinaryOperator *Incr = + dyn_cast(PH->getIncomingValue(BackEdge)); + if (!Incr) return; + if (Incr->getOpcode() != Instruction::Add) return; + ConstantFP *IncrValue = NULL; + unsigned IncrVIndex = 1; + if (Incr->getOperand(1) == PH) + IncrVIndex = 0; + IncrValue = dyn_cast(Incr->getOperand(IncrVIndex)); + if (!IncrValue) return; + uint64_t newIncrValue = Type::Int32Ty->getPrimitiveSizeInBits(); + if (!convertToInt(IncrValue->getValueAPF(), &newIncrValue)) + return; + + // Check Incr uses. One user is PH and the other users is exit condition used + // by the conditional terminator. + Value::use_iterator IncrUse = Incr->use_begin(); + Instruction *U1 = cast(IncrUse++); + if (IncrUse == Incr->use_end()) return; + Instruction *U2 = cast(IncrUse++); + if (IncrUse != Incr->use_end()) return; + + // Find exit condition. + FCmpInst *EC = dyn_cast(U1); + if (!EC) + EC = dyn_cast(U2); + if (!EC) return; + + if (BranchInst *BI = dyn_cast(EC->getParent()->getTerminator())) { + if (!BI->isConditional()) return; + if (BI->getCondition() != EC) return; + } + + // Find exit value. If exit value can not be represented as an interger then + // do not handle this floating point PH. + ConstantFP *EV = NULL; + unsigned EVIndex = 1; + if (EC->getOperand(1) == Incr) + EVIndex = 0; + EV = dyn_cast(EC->getOperand(EVIndex)); + if (!EV) return; + uint64_t intEV = Type::Int32Ty->getPrimitiveSizeInBits(); + if (!convertToInt(EV->getValueAPF(), &intEV)) + return; + + // Find new predicate for integer comparison. + CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE; + switch (EC->getPredicate()) { + case CmpInst::FCMP_OEQ: + case CmpInst::FCMP_UEQ: + NewPred = CmpInst::ICMP_EQ; + break; + case CmpInst::FCMP_OGT: + case CmpInst::FCMP_UGT: + NewPred = CmpInst::ICMP_UGT; + break; + case CmpInst::FCMP_OGE: + case CmpInst::FCMP_UGE: + NewPred = CmpInst::ICMP_UGE; + break; + case CmpInst::FCMP_OLT: + case CmpInst::FCMP_ULT: + NewPred = CmpInst::ICMP_ULT; + break; + case CmpInst::FCMP_OLE: + case CmpInst::FCMP_ULE: + NewPred = CmpInst::ICMP_ULE; + break; + default: + break; + } + if (NewPred == CmpInst::BAD_ICMP_PREDICATE) return; + + // Insert new integer induction variable. + PHINode *NewPHI = PHINode::Create(Type::Int32Ty, + PH->getName()+".int", PH); + NewPHI->addIncoming(ConstantInt::get(Type::Int32Ty, newInitValue), + PH->getIncomingBlock(IncomingEdge)); + + Value *NewAdd = BinaryOperator::CreateAdd(NewPHI, + ConstantInt::get(Type::Int32Ty, + newIncrValue), + Incr->getName()+".int", Incr); + NewPHI->addIncoming(NewAdd, PH->getIncomingBlock(BackEdge)); + + // The back edge is edge 1 of newPHI, whatever it may have been in the + // original PHI. + ConstantInt *NewEV = ConstantInt::get(Type::Int32Ty, intEV); + Value *LHS = (EVIndex == 1 ? NewPHI->getIncomingValue(1) : NewEV); + Value *RHS = (EVIndex == 1 ? NewEV : NewPHI->getIncomingValue(1)); + ICmpInst *NewEC = new ICmpInst(NewPred, LHS, RHS, EC->getNameStart(), + EC->getParent()->getTerminator()); + + // In the following deltions, PH may become dead and may be deleted. + // Use a WeakVH to observe whether this happens. + WeakVH WeakPH = PH; + + // Delete old, floating point, exit comparision instruction. + NewEC->takeName(EC); + EC->replaceAllUsesWith(NewEC); + RecursivelyDeleteTriviallyDeadInstructions(EC); + + // Delete old, floating point, increment instruction. + Incr->replaceAllUsesWith(UndefValue::get(Incr->getType())); + RecursivelyDeleteTriviallyDeadInstructions(Incr); + + // Replace floating induction variable, if it isn't already deleted. + // Give SIToFPInst preference over UIToFPInst because it is faster on + // platforms that are widely used. + if (WeakPH && !PH->use_empty()) { + if (useSIToFPInst(*InitValue, *EV, newInitValue, intEV)) { + SIToFPInst *Conv = new SIToFPInst(NewPHI, PH->getType(), "indvar.conv", + PH->getParent()->getFirstNonPHI()); + PH->replaceAllUsesWith(Conv); + } else { + UIToFPInst *Conv = new UIToFPInst(NewPHI, PH->getType(), "indvar.conv", + PH->getParent()->getFirstNonPHI()); + PH->replaceAllUsesWith(Conv); + } + RecursivelyDeleteTriviallyDeadInstructions(PH); + } + + // Add a new IVUsers entry for the newly-created integer PHI. + IU->AddUsersIfInteresting(NewPHI); +} diff --git a/lib/Transforms/Scalar/InstructionCombining.cpp b/lib/Transforms/Scalar/InstructionCombining.cpp new file mode 100644 index 000000000000..e6f854f1a56b --- /dev/null +++ b/lib/Transforms/Scalar/InstructionCombining.cpp @@ -0,0 +1,12919 @@ +//===- InstructionCombining.cpp - Combine multiple instructions -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// InstructionCombining - Combine instructions to form fewer, simple +// instructions. This pass does not modify the CFG. This pass is where +// algebraic simplification happens. +// +// This pass combines things like: +// %Y = add i32 %X, 1 +// %Z = add i32 %Y, 1 +// into: +// %Z = add i32 %X, 2 +// +// This is a simple worklist driven algorithm. +// +// This pass guarantees that the following canonicalizations are performed on +// the program: +// 1. If a binary operator has a constant operand, it is moved to the RHS +// 2. Bitwise operators with constant operands are always grouped so that +// shifts are performed first, then or's, then and's, then xor's. +// 3. Compare instructions are converted from <,>,<=,>= to ==,!= if possible +// 4. All cmp instructions on boolean values are replaced with logical ops +// 5. add X, X is represented as (X*2) => (X << 1) +// 6. Multiplies with a power-of-two constant argument are transformed into +// shifts. +// ... etc. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "instcombine" +#include "llvm/Transforms/Scalar.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Pass.h" +#include "llvm/DerivedTypes.h" +#include "llvm/GlobalVariable.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/ConstantRange.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/GetElementPtrTypeIterator.h" +#include "llvm/Support/InstVisitor.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/PatternMatch.h" +#include "llvm/Support/Compiler.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +#include +#include +#include +using namespace llvm; +using namespace llvm::PatternMatch; + +STATISTIC(NumCombined , "Number of insts combined"); +STATISTIC(NumConstProp, "Number of constant folds"); +STATISTIC(NumDeadInst , "Number of dead inst eliminated"); +STATISTIC(NumDeadStore, "Number of dead stores eliminated"); +STATISTIC(NumSunkInst , "Number of instructions sunk"); + +namespace { + class VISIBILITY_HIDDEN InstCombiner + : public FunctionPass, + public InstVisitor { + // Worklist of all of the instructions that need to be simplified. + SmallVector Worklist; + DenseMap WorklistMap; + TargetData *TD; + bool MustPreserveLCSSA; + public: + static char ID; // Pass identification, replacement for typeid + InstCombiner() : FunctionPass(&ID) {} + + /// AddToWorkList - Add the specified instruction to the worklist if it + /// isn't already in it. + void AddToWorkList(Instruction *I) { + if (WorklistMap.insert(std::make_pair(I, Worklist.size())).second) + Worklist.push_back(I); + } + + // RemoveFromWorkList - remove I from the worklist if it exists. + void RemoveFromWorkList(Instruction *I) { + DenseMap::iterator It = WorklistMap.find(I); + if (It == WorklistMap.end()) return; // Not in worklist. + + // Don't bother moving everything down, just null out the slot. + Worklist[It->second] = 0; + + WorklistMap.erase(It); + } + + Instruction *RemoveOneFromWorkList() { + Instruction *I = Worklist.back(); + Worklist.pop_back(); + WorklistMap.erase(I); + return I; + } + + + /// AddUsersToWorkList - When an instruction is simplified, add all users of + /// the instruction to the work lists because they might get more simplified + /// now. + /// + void AddUsersToWorkList(Value &I) { + for (Value::use_iterator UI = I.use_begin(), UE = I.use_end(); + UI != UE; ++UI) + AddToWorkList(cast(*UI)); + } + + /// AddUsesToWorkList - When an instruction is simplified, add operands to + /// the work lists because they might get more simplified now. + /// + void AddUsesToWorkList(Instruction &I) { + for (User::op_iterator i = I.op_begin(), e = I.op_end(); i != e; ++i) + if (Instruction *Op = dyn_cast(*i)) + AddToWorkList(Op); + } + + /// AddSoonDeadInstToWorklist - The specified instruction is about to become + /// dead. Add all of its operands to the worklist, turning them into + /// undef's to reduce the number of uses of those instructions. + /// + /// Return the specified operand before it is turned into an undef. + /// + Value *AddSoonDeadInstToWorklist(Instruction &I, unsigned op) { + Value *R = I.getOperand(op); + + for (User::op_iterator i = I.op_begin(), e = I.op_end(); i != e; ++i) + if (Instruction *Op = dyn_cast(*i)) { + AddToWorkList(Op); + // Set the operand to undef to drop the use. + *i = UndefValue::get(Op->getType()); + } + + return R; + } + + public: + virtual bool runOnFunction(Function &F); + + bool DoOneIteration(Function &F, unsigned ItNum); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + AU.addPreservedID(LCSSAID); + AU.setPreservesCFG(); + } + + TargetData &getTargetData() const { return *TD; } + + // Visitation implementation - Implement instruction combining for different + // instruction types. The semantics are as follows: + // Return Value: + // null - No change was made + // I - Change was made, I is still valid, I may be dead though + // otherwise - Change was made, replace I with returned instruction + // + Instruction *visitAdd(BinaryOperator &I); + Instruction *visitSub(BinaryOperator &I); + Instruction *visitMul(BinaryOperator &I); + Instruction *visitURem(BinaryOperator &I); + Instruction *visitSRem(BinaryOperator &I); + Instruction *visitFRem(BinaryOperator &I); + bool SimplifyDivRemOfSelect(BinaryOperator &I); + Instruction *commonRemTransforms(BinaryOperator &I); + Instruction *commonIRemTransforms(BinaryOperator &I); + Instruction *commonDivTransforms(BinaryOperator &I); + Instruction *commonIDivTransforms(BinaryOperator &I); + Instruction *visitUDiv(BinaryOperator &I); + Instruction *visitSDiv(BinaryOperator &I); + Instruction *visitFDiv(BinaryOperator &I); + Instruction *FoldAndOfICmps(Instruction &I, ICmpInst *LHS, ICmpInst *RHS); + Instruction *visitAnd(BinaryOperator &I); + Instruction *FoldOrOfICmps(Instruction &I, ICmpInst *LHS, ICmpInst *RHS); + Instruction *FoldOrWithConstants(BinaryOperator &I, Value *Op, + Value *A, Value *B, Value *C); + Instruction *visitOr (BinaryOperator &I); + Instruction *visitXor(BinaryOperator &I); + Instruction *visitShl(BinaryOperator &I); + Instruction *visitAShr(BinaryOperator &I); + Instruction *visitLShr(BinaryOperator &I); + Instruction *commonShiftTransforms(BinaryOperator &I); + Instruction *FoldFCmp_IntToFP_Cst(FCmpInst &I, Instruction *LHSI, + Constant *RHSC); + Instruction *visitFCmpInst(FCmpInst &I); + Instruction *visitICmpInst(ICmpInst &I); + Instruction *visitICmpInstWithCastAndCast(ICmpInst &ICI); + Instruction *visitICmpInstWithInstAndIntCst(ICmpInst &ICI, + Instruction *LHS, + ConstantInt *RHS); + Instruction *FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, + ConstantInt *DivRHS); + + Instruction *FoldGEPICmp(User *GEPLHS, Value *RHS, + ICmpInst::Predicate Cond, Instruction &I); + Instruction *FoldShiftByConstant(Value *Op0, ConstantInt *Op1, + BinaryOperator &I); + Instruction *commonCastTransforms(CastInst &CI); + Instruction *commonIntCastTransforms(CastInst &CI); + Instruction *commonPointerCastTransforms(CastInst &CI); + Instruction *visitTrunc(TruncInst &CI); + Instruction *visitZExt(ZExtInst &CI); + Instruction *visitSExt(SExtInst &CI); + Instruction *visitFPTrunc(FPTruncInst &CI); + Instruction *visitFPExt(CastInst &CI); + Instruction *visitFPToUI(FPToUIInst &FI); + Instruction *visitFPToSI(FPToSIInst &FI); + Instruction *visitUIToFP(CastInst &CI); + Instruction *visitSIToFP(CastInst &CI); + Instruction *visitPtrToInt(PtrToIntInst &CI); + Instruction *visitIntToPtr(IntToPtrInst &CI); + Instruction *visitBitCast(BitCastInst &CI); + Instruction *FoldSelectOpOp(SelectInst &SI, Instruction *TI, + Instruction *FI); + Instruction *FoldSelectIntoOp(SelectInst &SI, Value*, Value*); + Instruction *visitSelectInst(SelectInst &SI); + Instruction *visitSelectInstWithICmp(SelectInst &SI, ICmpInst *ICI); + Instruction *visitCallInst(CallInst &CI); + Instruction *visitInvokeInst(InvokeInst &II); + Instruction *visitPHINode(PHINode &PN); + Instruction *visitGetElementPtrInst(GetElementPtrInst &GEP); + Instruction *visitAllocationInst(AllocationInst &AI); + Instruction *visitFreeInst(FreeInst &FI); + Instruction *visitLoadInst(LoadInst &LI); + Instruction *visitStoreInst(StoreInst &SI); + Instruction *visitBranchInst(BranchInst &BI); + Instruction *visitSwitchInst(SwitchInst &SI); + Instruction *visitInsertElementInst(InsertElementInst &IE); + Instruction *visitExtractElementInst(ExtractElementInst &EI); + Instruction *visitShuffleVectorInst(ShuffleVectorInst &SVI); + Instruction *visitExtractValueInst(ExtractValueInst &EV); + + // visitInstruction - Specify what to return for unhandled instructions... + Instruction *visitInstruction(Instruction &I) { return 0; } + + private: + Instruction *visitCallSite(CallSite CS); + bool transformConstExprCastCall(CallSite CS); + Instruction *transformCallThroughTrampoline(CallSite CS); + Instruction *transformZExtICmp(ICmpInst *ICI, Instruction &CI, + bool DoXform = true); + bool WillNotOverflowSignedAdd(Value *LHS, Value *RHS); + DbgDeclareInst *hasOneUsePlusDeclare(Value *V); + + + public: + // InsertNewInstBefore - insert an instruction New before instruction Old + // in the program. Add the new instruction to the worklist. + // + Instruction *InsertNewInstBefore(Instruction *New, Instruction &Old) { + assert(New && New->getParent() == 0 && + "New instruction already inserted into a basic block!"); + BasicBlock *BB = Old.getParent(); + BB->getInstList().insert(&Old, New); // Insert inst + AddToWorkList(New); + return New; + } + + /// InsertCastBefore - Insert a cast of V to TY before the instruction POS. + /// This also adds the cast to the worklist. Finally, this returns the + /// cast. + Value *InsertCastBefore(Instruction::CastOps opc, Value *V, const Type *Ty, + Instruction &Pos) { + if (V->getType() == Ty) return V; + + if (Constant *CV = dyn_cast(V)) + return ConstantExpr::getCast(opc, CV, Ty); + + Instruction *C = CastInst::Create(opc, V, Ty, V->getName(), &Pos); + AddToWorkList(C); + return C; + } + + Value *InsertBitCastBefore(Value *V, const Type *Ty, Instruction &Pos) { + return InsertCastBefore(Instruction::BitCast, V, Ty, Pos); + } + + + // ReplaceInstUsesWith - This method is to be used when an instruction is + // found to be dead, replacable with another preexisting expression. Here + // we add all uses of I to the worklist, replace all uses of I with the new + // value, then return I, so that the inst combiner will know that I was + // modified. + // + Instruction *ReplaceInstUsesWith(Instruction &I, Value *V) { + AddUsersToWorkList(I); // Add all modified instrs to worklist + if (&I != V) { + I.replaceAllUsesWith(V); + return &I; + } else { + // If we are replacing the instruction with itself, this must be in a + // segment of unreachable code, so just clobber the instruction. + I.replaceAllUsesWith(UndefValue::get(I.getType())); + return &I; + } + } + + // EraseInstFromFunction - When dealing with an instruction that has side + // effects or produces a void value, we can't rely on DCE to delete the + // instruction. Instead, visit methods should return the value returned by + // this function. + Instruction *EraseInstFromFunction(Instruction &I) { + assert(I.use_empty() && "Cannot erase instruction that is used!"); + AddUsesToWorkList(I); + RemoveFromWorkList(&I); + I.eraseFromParent(); + return 0; // Don't do anything with FI + } + + void ComputeMaskedBits(Value *V, const APInt &Mask, APInt &KnownZero, + APInt &KnownOne, unsigned Depth = 0) const { + return llvm::ComputeMaskedBits(V, Mask, KnownZero, KnownOne, TD, Depth); + } + + bool MaskedValueIsZero(Value *V, const APInt &Mask, + unsigned Depth = 0) const { + return llvm::MaskedValueIsZero(V, Mask, TD, Depth); + } + unsigned ComputeNumSignBits(Value *Op, unsigned Depth = 0) const { + return llvm::ComputeNumSignBits(Op, TD, Depth); + } + + private: + + /// SimplifyCommutative - This performs a few simplifications for + /// commutative operators. + bool SimplifyCommutative(BinaryOperator &I); + + /// SimplifyCompare - This reorders the operands of a CmpInst to get them in + /// most-complex to least-complex order. + bool SimplifyCompare(CmpInst &I); + + /// SimplifyDemandedUseBits - Attempts to replace V with a simpler value + /// based on the demanded bits. + Value *SimplifyDemandedUseBits(Value *V, APInt DemandedMask, + APInt& KnownZero, APInt& KnownOne, + unsigned Depth); + bool SimplifyDemandedBits(Use &U, APInt DemandedMask, + APInt& KnownZero, APInt& KnownOne, + unsigned Depth=0); + + /// SimplifyDemandedInstructionBits - Inst is an integer instruction that + /// SimplifyDemandedBits knows about. See if the instruction has any + /// properties that allow us to simplify its operands. + bool SimplifyDemandedInstructionBits(Instruction &Inst); + + Value *SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, + APInt& UndefElts, unsigned Depth = 0); + + // FoldOpIntoPhi - Given a binary operator or cast instruction which has a + // PHI node as operand #0, see if we can fold the instruction into the PHI + // (which is only possible if all operands to the PHI are constants). + Instruction *FoldOpIntoPhi(Instruction &I); + + // FoldPHIArgOpIntoPHI - If all operands to a PHI node are the same "unary" + // operator and they all are only used by the PHI, PHI together their + // inputs, and do the operation once, to the result of the PHI. + Instruction *FoldPHIArgOpIntoPHI(PHINode &PN); + Instruction *FoldPHIArgBinOpIntoPHI(PHINode &PN); + Instruction *FoldPHIArgGEPIntoPHI(PHINode &PN); + + + Instruction *OptAndOp(Instruction *Op, ConstantInt *OpRHS, + ConstantInt *AndRHS, BinaryOperator &TheAnd); + + Value *FoldLogicalPlusAnd(Value *LHS, Value *RHS, ConstantInt *Mask, + bool isSub, Instruction &I); + Instruction *InsertRangeTest(Value *V, Constant *Lo, Constant *Hi, + bool isSigned, bool Inside, Instruction &IB); + Instruction *PromoteCastOfAllocation(BitCastInst &CI, AllocationInst &AI); + Instruction *MatchBSwap(BinaryOperator &I); + bool SimplifyStoreAtEndOfBlock(StoreInst &SI); + Instruction *SimplifyMemTransfer(MemIntrinsic *MI); + Instruction *SimplifyMemSet(MemSetInst *MI); + + + Value *EvaluateInDifferentType(Value *V, const Type *Ty, bool isSigned); + + bool CanEvaluateInDifferentType(Value *V, const IntegerType *Ty, + unsigned CastOpc, int &NumCastsRemoved); + unsigned GetOrEnforceKnownAlignment(Value *V, + unsigned PrefAlign = 0); + + }; +} + +char InstCombiner::ID = 0; +static RegisterPass +X("instcombine", "Combine redundant instructions"); + +// getComplexity: Assign a complexity or rank value to LLVM Values... +// 0 -> undef, 1 -> Const, 2 -> Other, 3 -> Arg, 3 -> Unary, 4 -> OtherInst +static unsigned getComplexity(Value *V) { + if (isa(V)) { + if (BinaryOperator::isNeg(V) || BinaryOperator::isNot(V)) + return 3; + return 4; + } + if (isa(V)) return 3; + return isa(V) ? (isa(V) ? 0 : 1) : 2; +} + +// isOnlyUse - Return true if this instruction will be deleted if we stop using +// it. +static bool isOnlyUse(Value *V) { + return V->hasOneUse() || isa(V); +} + +// getPromotedType - Return the specified type promoted as it would be to pass +// though a va_arg area... +static const Type *getPromotedType(const Type *Ty) { + if (const IntegerType* ITy = dyn_cast(Ty)) { + if (ITy->getBitWidth() < 32) + return Type::Int32Ty; + } + return Ty; +} + +/// getBitCastOperand - If the specified operand is a CastInst, a constant +/// expression bitcast, or a GetElementPtrInst with all zero indices, return the +/// operand value, otherwise return null. +static Value *getBitCastOperand(Value *V) { + if (BitCastInst *I = dyn_cast(V)) + // BitCastInst? + return I->getOperand(0); + else if (GetElementPtrInst *GEP = dyn_cast(V)) { + // GetElementPtrInst? + if (GEP->hasAllZeroIndices()) + return GEP->getOperand(0); + } else if (ConstantExpr *CE = dyn_cast(V)) { + if (CE->getOpcode() == Instruction::BitCast) + // BitCast ConstantExp? + return CE->getOperand(0); + else if (CE->getOpcode() == Instruction::GetElementPtr) { + // GetElementPtr ConstantExp? + for (User::op_iterator I = CE->op_begin() + 1, E = CE->op_end(); + I != E; ++I) { + ConstantInt *CI = dyn_cast(I); + if (!CI || !CI->isZero()) + // Any non-zero indices? Not cast-like. + return 0; + } + // All-zero indices? This is just like casting. + return CE->getOperand(0); + } + } + return 0; +} + +/// This function is a wrapper around CastInst::isEliminableCastPair. It +/// simply extracts arguments and returns what that function returns. +static Instruction::CastOps +isEliminableCastPair( + const CastInst *CI, ///< The first cast instruction + unsigned opcode, ///< The opcode of the second cast instruction + const Type *DstTy, ///< The target type for the second cast instruction + TargetData *TD ///< The target data for pointer size +) { + + const Type *SrcTy = CI->getOperand(0)->getType(); // A from above + const Type *MidTy = CI->getType(); // B from above + + // Get the opcodes of the two Cast instructions + Instruction::CastOps firstOp = Instruction::CastOps(CI->getOpcode()); + Instruction::CastOps secondOp = Instruction::CastOps(opcode); + + unsigned Res = CastInst::isEliminableCastPair(firstOp, secondOp, SrcTy, MidTy, + DstTy, TD->getIntPtrType()); + + // We don't want to form an inttoptr or ptrtoint that converts to an integer + // type that differs from the pointer size. + if ((Res == Instruction::IntToPtr && SrcTy != TD->getIntPtrType()) || + (Res == Instruction::PtrToInt && DstTy != TD->getIntPtrType())) + Res = 0; + + return Instruction::CastOps(Res); +} + +/// ValueRequiresCast - Return true if the cast from "V to Ty" actually results +/// in any code being generated. It does not require codegen if V is simple +/// enough or if the cast can be folded into other casts. +static bool ValueRequiresCast(Instruction::CastOps opcode, const Value *V, + const Type *Ty, TargetData *TD) { + if (V->getType() == Ty || isa(V)) return false; + + // If this is another cast that can be eliminated, it isn't codegen either. + if (const CastInst *CI = dyn_cast(V)) + if (isEliminableCastPair(CI, opcode, Ty, TD)) + return false; + return true; +} + +// SimplifyCommutative - This performs a few simplifications for commutative +// operators: +// +// 1. Order operands such that they are listed from right (least complex) to +// left (most complex). This puts constants before unary operators before +// binary operators. +// +// 2. Transform: (op (op V, C1), C2) ==> (op V, (op C1, C2)) +// 3. Transform: (op (op V1, C1), (op V2, C2)) ==> (op (op V1, V2), (op C1,C2)) +// +bool InstCombiner::SimplifyCommutative(BinaryOperator &I) { + bool Changed = false; + if (getComplexity(I.getOperand(0)) < getComplexity(I.getOperand(1))) + Changed = !I.swapOperands(); + + if (!I.isAssociative()) return Changed; + Instruction::BinaryOps Opcode = I.getOpcode(); + if (BinaryOperator *Op = dyn_cast(I.getOperand(0))) + if (Op->getOpcode() == Opcode && isa(Op->getOperand(1))) { + if (isa(I.getOperand(1))) { + Constant *Folded = ConstantExpr::get(I.getOpcode(), + cast(I.getOperand(1)), + cast(Op->getOperand(1))); + I.setOperand(0, Op->getOperand(0)); + I.setOperand(1, Folded); + return true; + } else if (BinaryOperator *Op1=dyn_cast(I.getOperand(1))) + if (Op1->getOpcode() == Opcode && isa(Op1->getOperand(1)) && + isOnlyUse(Op) && isOnlyUse(Op1)) { + Constant *C1 = cast(Op->getOperand(1)); + Constant *C2 = cast(Op1->getOperand(1)); + + // Fold (op (op V1, C1), (op V2, C2)) ==> (op (op V1, V2), (op C1,C2)) + Constant *Folded = ConstantExpr::get(I.getOpcode(), C1, C2); + Instruction *New = BinaryOperator::Create(Opcode, Op->getOperand(0), + Op1->getOperand(0), + Op1->getName(), &I); + AddToWorkList(New); + I.setOperand(0, New); + I.setOperand(1, Folded); + return true; + } + } + return Changed; +} + +/// SimplifyCompare - For a CmpInst this function just orders the operands +/// so that theyare listed from right (least complex) to left (most complex). +/// This puts constants before unary operators before binary operators. +bool InstCombiner::SimplifyCompare(CmpInst &I) { + if (getComplexity(I.getOperand(0)) >= getComplexity(I.getOperand(1))) + return false; + I.swapOperands(); + // Compare instructions are not associative so there's nothing else we can do. + return true; +} + +// dyn_castNegVal - Given a 'sub' instruction, return the RHS of the instruction +// if the LHS is a constant zero (which is the 'negate' form). +// +static inline Value *dyn_castNegVal(Value *V) { + if (BinaryOperator::isNeg(V)) + return BinaryOperator::getNegArgument(V); + + // Constants can be considered to be negated values if they can be folded. + if (ConstantInt *C = dyn_cast(V)) + return ConstantExpr::getNeg(C); + + if (ConstantVector *C = dyn_cast(V)) + if (C->getType()->getElementType()->isInteger()) + return ConstantExpr::getNeg(C); + + return 0; +} + +static inline Value *dyn_castNotVal(Value *V) { + if (BinaryOperator::isNot(V)) + return BinaryOperator::getNotArgument(V); + + // Constants can be considered to be not'ed values... + if (ConstantInt *C = dyn_cast(V)) + return ConstantInt::get(~C->getValue()); + return 0; +} + +// dyn_castFoldableMul - If this value is a multiply that can be folded into +// other computations (because it has a constant operand), return the +// non-constant operand of the multiply, and set CST to point to the multiplier. +// Otherwise, return null. +// +static inline Value *dyn_castFoldableMul(Value *V, ConstantInt *&CST) { + if (V->hasOneUse() && V->getType()->isInteger()) + if (Instruction *I = dyn_cast(V)) { + if (I->getOpcode() == Instruction::Mul) + if ((CST = dyn_cast(I->getOperand(1)))) + return I->getOperand(0); + if (I->getOpcode() == Instruction::Shl) + if ((CST = dyn_cast(I->getOperand(1)))) { + // The multiplier is really 1 << CST. + uint32_t BitWidth = cast(V->getType())->getBitWidth(); + uint32_t CSTVal = CST->getLimitedValue(BitWidth); + CST = ConstantInt::get(APInt(BitWidth, 1).shl(CSTVal)); + return I->getOperand(0); + } + } + return 0; +} + +/// dyn_castGetElementPtr - If this is a getelementptr instruction or constant +/// expression, return it. +static User *dyn_castGetElementPtr(Value *V) { + if (isa(V)) return cast(V); + if (ConstantExpr *CE = dyn_cast(V)) + if (CE->getOpcode() == Instruction::GetElementPtr) + return cast(V); + return false; +} + +/// getOpcode - If this is an Instruction or a ConstantExpr, return the +/// opcode value. Otherwise return UserOp1. +static unsigned getOpcode(const Value *V) { + if (const Instruction *I = dyn_cast(V)) + return I->getOpcode(); + if (const ConstantExpr *CE = dyn_cast(V)) + return CE->getOpcode(); + // Use UserOp1 to mean there's no opcode. + return Instruction::UserOp1; +} + +/// AddOne - Add one to a ConstantInt +static ConstantInt *AddOne(ConstantInt *C) { + APInt Val(C->getValue()); + return ConstantInt::get(++Val); +} +/// SubOne - Subtract one from a ConstantInt +static ConstantInt *SubOne(ConstantInt *C) { + APInt Val(C->getValue()); + return ConstantInt::get(--Val); +} +/// Add - Add two ConstantInts together +static ConstantInt *Add(ConstantInt *C1, ConstantInt *C2) { + return ConstantInt::get(C1->getValue() + C2->getValue()); +} +/// And - Bitwise AND two ConstantInts together +static ConstantInt *And(ConstantInt *C1, ConstantInt *C2) { + return ConstantInt::get(C1->getValue() & C2->getValue()); +} +/// Subtract - Subtract one ConstantInt from another +static ConstantInt *Subtract(ConstantInt *C1, ConstantInt *C2) { + return ConstantInt::get(C1->getValue() - C2->getValue()); +} +/// Multiply - Multiply two ConstantInts together +static ConstantInt *Multiply(ConstantInt *C1, ConstantInt *C2) { + return ConstantInt::get(C1->getValue() * C2->getValue()); +} +/// MultiplyOverflows - True if the multiply can not be expressed in an int +/// this size. +static bool MultiplyOverflows(ConstantInt *C1, ConstantInt *C2, bool sign) { + uint32_t W = C1->getBitWidth(); + APInt LHSExt = C1->getValue(), RHSExt = C2->getValue(); + if (sign) { + LHSExt.sext(W * 2); + RHSExt.sext(W * 2); + } else { + LHSExt.zext(W * 2); + RHSExt.zext(W * 2); + } + + APInt MulExt = LHSExt * RHSExt; + + if (sign) { + APInt Min = APInt::getSignedMinValue(W).sext(W * 2); + APInt Max = APInt::getSignedMaxValue(W).sext(W * 2); + return MulExt.slt(Min) || MulExt.sgt(Max); + } else + return MulExt.ugt(APInt::getLowBitsSet(W * 2, W)); +} + + +/// ShrinkDemandedConstant - Check to see if the specified operand of the +/// specified instruction is a constant integer. If so, check to see if there +/// are any bits set in the constant that are not demanded. If so, shrink the +/// constant and return true. +static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo, + APInt Demanded) { + assert(I && "No instruction?"); + assert(OpNo < I->getNumOperands() && "Operand index too large"); + + // If the operand is not a constant integer, nothing to do. + ConstantInt *OpC = dyn_cast(I->getOperand(OpNo)); + if (!OpC) return false; + + // If there are no bits set that aren't demanded, nothing to do. + Demanded.zextOrTrunc(OpC->getValue().getBitWidth()); + if ((~Demanded & OpC->getValue()) == 0) + return false; + + // This instruction is producing bits that are not demanded. Shrink the RHS. + Demanded &= OpC->getValue(); + I->setOperand(OpNo, ConstantInt::get(Demanded)); + return true; +} + +// ComputeSignedMinMaxValuesFromKnownBits - Given a signed integer type and a +// set of known zero and one bits, compute the maximum and minimum values that +// could have the specified known zero and known one bits, returning them in +// min/max. +static void ComputeSignedMinMaxValuesFromKnownBits(const APInt& KnownZero, + const APInt& KnownOne, + APInt& Min, APInt& Max) { + assert(KnownZero.getBitWidth() == KnownOne.getBitWidth() && + KnownZero.getBitWidth() == Min.getBitWidth() && + KnownZero.getBitWidth() == Max.getBitWidth() && + "KnownZero, KnownOne and Min, Max must have equal bitwidth."); + APInt UnknownBits = ~(KnownZero|KnownOne); + + // The minimum value is when all unknown bits are zeros, EXCEPT for the sign + // bit if it is unknown. + Min = KnownOne; + Max = KnownOne|UnknownBits; + + if (UnknownBits.isNegative()) { // Sign bit is unknown + Min.set(Min.getBitWidth()-1); + Max.clear(Max.getBitWidth()-1); + } +} + +// ComputeUnsignedMinMaxValuesFromKnownBits - Given an unsigned integer type and +// a set of known zero and one bits, compute the maximum and minimum values that +// could have the specified known zero and known one bits, returning them in +// min/max. +static void ComputeUnsignedMinMaxValuesFromKnownBits(const APInt &KnownZero, + const APInt &KnownOne, + APInt &Min, APInt &Max) { + assert(KnownZero.getBitWidth() == KnownOne.getBitWidth() && + KnownZero.getBitWidth() == Min.getBitWidth() && + KnownZero.getBitWidth() == Max.getBitWidth() && + "Ty, KnownZero, KnownOne and Min, Max must have equal bitwidth."); + APInt UnknownBits = ~(KnownZero|KnownOne); + + // The minimum value is when the unknown bits are all zeros. + Min = KnownOne; + // The maximum value is when the unknown bits are all ones. + Max = KnownOne|UnknownBits; +} + +/// SimplifyDemandedInstructionBits - Inst is an integer instruction that +/// SimplifyDemandedBits knows about. See if the instruction has any +/// properties that allow us to simplify its operands. +bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) { + unsigned BitWidth = cast(Inst.getType())->getBitWidth(); + APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); + APInt DemandedMask(APInt::getAllOnesValue(BitWidth)); + + Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask, + KnownZero, KnownOne, 0); + if (V == 0) return false; + if (V == &Inst) return true; + ReplaceInstUsesWith(Inst, V); + return true; +} + +/// SimplifyDemandedBits - This form of SimplifyDemandedBits simplifies the +/// specified instruction operand if possible, updating it in place. It returns +/// true if it made any change and false otherwise. +bool InstCombiner::SimplifyDemandedBits(Use &U, APInt DemandedMask, + APInt &KnownZero, APInt &KnownOne, + unsigned Depth) { + Value *NewVal = SimplifyDemandedUseBits(U.get(), DemandedMask, + KnownZero, KnownOne, Depth); + if (NewVal == 0) return false; + U.set(NewVal); + return true; +} + + +/// SimplifyDemandedUseBits - This function attempts to replace V with a simpler +/// value based on the demanded bits. When this function is called, it is known +/// that only the bits set in DemandedMask of the result of V are ever used +/// downstream. Consequently, depending on the mask and V, it may be possible +/// to replace V with a constant or one of its operands. In such cases, this +/// function does the replacement and returns true. In all other cases, it +/// returns false after analyzing the expression and setting KnownOne and known +/// to be one in the expression. KnownZero contains all the bits that are known +/// to be zero in the expression. These are provided to potentially allow the +/// caller (which might recursively be SimplifyDemandedBits itself) to simplify +/// the expression. KnownOne and KnownZero always follow the invariant that +/// KnownOne & KnownZero == 0. That is, a bit can't be both 1 and 0. Note that +/// the bits in KnownOne and KnownZero may only be accurate for those bits set +/// in DemandedMask. Note also that the bitwidth of V, DemandedMask, KnownZero +/// and KnownOne must all be the same. +/// +/// This returns null if it did not change anything and it permits no +/// simplification. This returns V itself if it did some simplification of V's +/// operands based on the information about what bits are demanded. This returns +/// some other non-null value if it found out that V is equal to another value +/// in the context where the specified bits are demanded, but not for all users. +Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, + APInt &KnownZero, APInt &KnownOne, + unsigned Depth) { + assert(V != 0 && "Null pointer of Value???"); + assert(Depth <= 6 && "Limit Search Depth"); + uint32_t BitWidth = DemandedMask.getBitWidth(); + const Type *VTy = V->getType(); + assert((TD || !isa(VTy)) && + "SimplifyDemandedBits needs to know bit widths!"); + assert((!TD || TD->getTypeSizeInBits(VTy) == BitWidth) && + (!isa(VTy) || + VTy->getPrimitiveSizeInBits() == BitWidth) && + KnownZero.getBitWidth() == BitWidth && + KnownOne.getBitWidth() == BitWidth && + "Value *V, DemandedMask, KnownZero and KnownOne \ + must have same BitWidth"); + if (ConstantInt *CI = dyn_cast(V)) { + // We know all of the bits for a constant! + KnownOne = CI->getValue() & DemandedMask; + KnownZero = ~KnownOne & DemandedMask; + return 0; + } + if (isa(V)) { + // We know all of the bits for a constant! + KnownOne.clear(); + KnownZero = DemandedMask; + return 0; + } + + KnownZero.clear(); + KnownOne.clear(); + if (DemandedMask == 0) { // Not demanding any bits from V. + if (isa(V)) + return 0; + return UndefValue::get(VTy); + } + + if (Depth == 6) // Limit search depth. + return 0; + + APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); + APInt &RHSKnownZero = KnownZero, &RHSKnownOne = KnownOne; + + Instruction *I = dyn_cast(V); + if (!I) { + ComputeMaskedBits(V, DemandedMask, RHSKnownZero, RHSKnownOne, Depth); + return 0; // Only analyze instructions. + } + + // If there are multiple uses of this value and we aren't at the root, then + // we can't do any simplifications of the operands, because DemandedMask + // only reflects the bits demanded by *one* of the users. + if (Depth != 0 && !I->hasOneUse()) { + // Despite the fact that we can't simplify this instruction in all User's + // context, we can at least compute the knownzero/knownone bits, and we can + // do simplifications that apply to *just* the one user if we know that + // this instruction has a simpler value in that context. + if (I->getOpcode() == Instruction::And) { + // If either the LHS or the RHS are Zero, the result is zero. + ComputeMaskedBits(I->getOperand(1), DemandedMask, + RHSKnownZero, RHSKnownOne, Depth+1); + ComputeMaskedBits(I->getOperand(0), DemandedMask & ~RHSKnownZero, + LHSKnownZero, LHSKnownOne, Depth+1); + + // If all of the demanded bits are known 1 on one side, return the other. + // These bits cannot contribute to the result of the 'and' in this + // context. + if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) == + (DemandedMask & ~LHSKnownZero)) + return I->getOperand(0); + if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) == + (DemandedMask & ~RHSKnownZero)) + return I->getOperand(1); + + // If all of the demanded bits in the inputs are known zeros, return zero. + if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask) + return Constant::getNullValue(VTy); + + } else if (I->getOpcode() == Instruction::Or) { + // We can simplify (X|Y) -> X or Y in the user's context if we know that + // only bits from X or Y are demanded. + + // If either the LHS or the RHS are One, the result is One. + ComputeMaskedBits(I->getOperand(1), DemandedMask, + RHSKnownZero, RHSKnownOne, Depth+1); + ComputeMaskedBits(I->getOperand(0), DemandedMask & ~RHSKnownOne, + LHSKnownZero, LHSKnownOne, Depth+1); + + // If all of the demanded bits are known zero on one side, return the + // other. These bits cannot contribute to the result of the 'or' in this + // context. + if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) == + (DemandedMask & ~LHSKnownOne)) + return I->getOperand(0); + if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) == + (DemandedMask & ~RHSKnownOne)) + return I->getOperand(1); + + // If all of the potentially set bits on one side are known to be set on + // the other side, just use the 'other' side. + if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) == + (DemandedMask & (~RHSKnownZero))) + return I->getOperand(0); + if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) == + (DemandedMask & (~LHSKnownZero))) + return I->getOperand(1); + } + + // Compute the KnownZero/KnownOne bits to simplify things downstream. + ComputeMaskedBits(I, DemandedMask, KnownZero, KnownOne, Depth); + return 0; + } + + // If this is the root being simplified, allow it to have multiple uses, + // just set the DemandedMask to all bits so that we can try to simplify the + // operands. This allows visitTruncInst (for example) to simplify the + // operand of a trunc without duplicating all the logic below. + if (Depth == 0 && !V->hasOneUse()) + DemandedMask = APInt::getAllOnesValue(BitWidth); + + switch (I->getOpcode()) { + default: + ComputeMaskedBits(I, DemandedMask, RHSKnownZero, RHSKnownOne, Depth); + break; + case Instruction::And: + // If either the LHS or the RHS are Zero, the result is zero. + if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, + RHSKnownZero, RHSKnownOne, Depth+1) || + SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownZero, + LHSKnownZero, LHSKnownOne, Depth+1)) + return I; + assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); + assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); + + // If all of the demanded bits are known 1 on one side, return the other. + // These bits cannot contribute to the result of the 'and'. + if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) == + (DemandedMask & ~LHSKnownZero)) + return I->getOperand(0); + if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) == + (DemandedMask & ~RHSKnownZero)) + return I->getOperand(1); + + // If all of the demanded bits in the inputs are known zeros, return zero. + if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask) + return Constant::getNullValue(VTy); + + // If the RHS is a constant, see if we can simplify it. + if (ShrinkDemandedConstant(I, 1, DemandedMask & ~LHSKnownZero)) + return I; + + // Output known-1 bits are only known if set in both the LHS & RHS. + RHSKnownOne &= LHSKnownOne; + // Output known-0 are known to be clear if zero in either the LHS | RHS. + RHSKnownZero |= LHSKnownZero; + break; + case Instruction::Or: + // If either the LHS or the RHS are One, the result is One. + if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, + RHSKnownZero, RHSKnownOne, Depth+1) || + SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownOne, + LHSKnownZero, LHSKnownOne, Depth+1)) + return I; + assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); + assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); + + // If all of the demanded bits are known zero on one side, return the other. + // These bits cannot contribute to the result of the 'or'. + if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) == + (DemandedMask & ~LHSKnownOne)) + return I->getOperand(0); + if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) == + (DemandedMask & ~RHSKnownOne)) + return I->getOperand(1); + + // If all of the potentially set bits on one side are known to be set on + // the other side, just use the 'other' side. + if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) == + (DemandedMask & (~RHSKnownZero))) + return I->getOperand(0); + if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) == + (DemandedMask & (~LHSKnownZero))) + return I->getOperand(1); + + // If the RHS is a constant, see if we can simplify it. + if (ShrinkDemandedConstant(I, 1, DemandedMask)) + return I; + + // Output known-0 bits are only known if clear in both the LHS & RHS. + RHSKnownZero &= LHSKnownZero; + // Output known-1 are known to be set if set in either the LHS | RHS. + RHSKnownOne |= LHSKnownOne; + break; + case Instruction::Xor: { + if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, + RHSKnownZero, RHSKnownOne, Depth+1) || + SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, + LHSKnownZero, LHSKnownOne, Depth+1)) + return I; + assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); + assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); + + // If all of the demanded bits are known zero on one side, return the other. + // These bits cannot contribute to the result of the 'xor'. + if ((DemandedMask & RHSKnownZero) == DemandedMask) + return I->getOperand(0); + if ((DemandedMask & LHSKnownZero) == DemandedMask) + return I->getOperand(1); + + // Output known-0 bits are known if clear or set in both the LHS & RHS. + APInt KnownZeroOut = (RHSKnownZero & LHSKnownZero) | + (RHSKnownOne & LHSKnownOne); + // Output known-1 are known to be set if set in only one of the LHS, RHS. + APInt KnownOneOut = (RHSKnownZero & LHSKnownOne) | + (RHSKnownOne & LHSKnownZero); + + // If all of the demanded bits are known to be zero on one side or the + // other, turn this into an *inclusive* or. + // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0 + if ((DemandedMask & ~RHSKnownZero & ~LHSKnownZero) == 0) { + Instruction *Or = + BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1), + I->getName()); + return InsertNewInstBefore(Or, *I); + } + + // If all of the demanded bits on one side are known, and all of the set + // bits on that side are also known to be set on the other side, turn this + // into an AND, as we know the bits will be cleared. + // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2 + if ((DemandedMask & (RHSKnownZero|RHSKnownOne)) == DemandedMask) { + // all known + if ((RHSKnownOne & LHSKnownOne) == RHSKnownOne) { + Constant *AndC = ConstantInt::get(~RHSKnownOne & DemandedMask); + Instruction *And = + BinaryOperator::CreateAnd(I->getOperand(0), AndC, "tmp"); + return InsertNewInstBefore(And, *I); + } + } + + // If the RHS is a constant, see if we can simplify it. + // FIXME: for XOR, we prefer to force bits to 1 if they will make a -1. + if (ShrinkDemandedConstant(I, 1, DemandedMask)) + return I; + + RHSKnownZero = KnownZeroOut; + RHSKnownOne = KnownOneOut; + break; + } + case Instruction::Select: + if (SimplifyDemandedBits(I->getOperandUse(2), DemandedMask, + RHSKnownZero, RHSKnownOne, Depth+1) || + SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, + LHSKnownZero, LHSKnownOne, Depth+1)) + return I; + assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); + assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); + + // If the operands are constants, see if we can simplify them. + if (ShrinkDemandedConstant(I, 1, DemandedMask) || + ShrinkDemandedConstant(I, 2, DemandedMask)) + return I; + + // Only known if known in both the LHS and RHS. + RHSKnownOne &= LHSKnownOne; + RHSKnownZero &= LHSKnownZero; + break; + case Instruction::Trunc: { + unsigned truncBf = I->getOperand(0)->getType()->getPrimitiveSizeInBits(); + DemandedMask.zext(truncBf); + RHSKnownZero.zext(truncBf); + RHSKnownOne.zext(truncBf); + if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, + RHSKnownZero, RHSKnownOne, Depth+1)) + return I; + DemandedMask.trunc(BitWidth); + RHSKnownZero.trunc(BitWidth); + RHSKnownOne.trunc(BitWidth); + assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); + break; + } + case Instruction::BitCast: + if (!I->getOperand(0)->getType()->isInteger()) + return false; // vector->int or fp->int? + if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, + RHSKnownZero, RHSKnownOne, Depth+1)) + return I; + assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); + break; + case Instruction::ZExt: { + // Compute the bits in the result that are not present in the input. + unsigned SrcBitWidth =I->getOperand(0)->getType()->getPrimitiveSizeInBits(); + + DemandedMask.trunc(SrcBitWidth); + RHSKnownZero.trunc(SrcBitWidth); + RHSKnownOne.trunc(SrcBitWidth); + if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, + RHSKnownZero, RHSKnownOne, Depth+1)) + return I; + DemandedMask.zext(BitWidth); + RHSKnownZero.zext(BitWidth); + RHSKnownOne.zext(BitWidth); + assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); + // The top bits are known to be zero. + RHSKnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth); + break; + } + case Instruction::SExt: { + // Compute the bits in the result that are not present in the input. + unsigned SrcBitWidth =I->getOperand(0)->getType()->getPrimitiveSizeInBits(); + + APInt InputDemandedBits = DemandedMask & + APInt::getLowBitsSet(BitWidth, SrcBitWidth); + + APInt NewBits(APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth)); + // If any of the sign extended bits are demanded, we know that the sign + // bit is demanded. + if ((NewBits & DemandedMask) != 0) + InputDemandedBits.set(SrcBitWidth-1); + + InputDemandedBits.trunc(SrcBitWidth); + RHSKnownZero.trunc(SrcBitWidth); + RHSKnownOne.trunc(SrcBitWidth); + if (SimplifyDemandedBits(I->getOperandUse(0), InputDemandedBits, + RHSKnownZero, RHSKnownOne, Depth+1)) + return I; + InputDemandedBits.zext(BitWidth); + RHSKnownZero.zext(BitWidth); + RHSKnownOne.zext(BitWidth); + assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); + + // If the sign bit of the input is known set or clear, then we know the + // top bits of the result. + + // If the input sign bit is known zero, or if the NewBits are not demanded + // convert this into a zero extension. + if (RHSKnownZero[SrcBitWidth-1] || (NewBits & ~DemandedMask) == NewBits) { + // Convert to ZExt cast + CastInst *NewCast = new ZExtInst(I->getOperand(0), VTy, I->getName()); + return InsertNewInstBefore(NewCast, *I); + } else if (RHSKnownOne[SrcBitWidth-1]) { // Input sign bit known set + RHSKnownOne |= NewBits; + } + break; + } + case Instruction::Add: { + // Figure out what the input bits are. If the top bits of the and result + // are not demanded, then the add doesn't demand them from its input + // either. + unsigned NLZ = DemandedMask.countLeadingZeros(); + + // If there is a constant on the RHS, there are a variety of xformations + // we can do. + if (ConstantInt *RHS = dyn_cast(I->getOperand(1))) { + // If null, this should be simplified elsewhere. Some of the xforms here + // won't work if the RHS is zero. + if (RHS->isZero()) + break; + + // If the top bit of the output is demanded, demand everything from the + // input. Otherwise, we demand all the input bits except NLZ top bits. + APInt InDemandedBits(APInt::getLowBitsSet(BitWidth, BitWidth - NLZ)); + + // Find information about known zero/one bits in the input. + if (SimplifyDemandedBits(I->getOperandUse(0), InDemandedBits, + LHSKnownZero, LHSKnownOne, Depth+1)) + return I; + + // If the RHS of the add has bits set that can't affect the input, reduce + // the constant. + if (ShrinkDemandedConstant(I, 1, InDemandedBits)) + return I; + + // Avoid excess work. + if (LHSKnownZero == 0 && LHSKnownOne == 0) + break; + + // Turn it into OR if input bits are zero. + if ((LHSKnownZero & RHS->getValue()) == RHS->getValue()) { + Instruction *Or = + BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1), + I->getName()); + return InsertNewInstBefore(Or, *I); + } + + // We can say something about the output known-zero and known-one bits, + // depending on potential carries from the input constant and the + // unknowns. For example if the LHS is known to have at most the 0x0F0F0 + // bits set and the RHS constant is 0x01001, then we know we have a known + // one mask of 0x00001 and a known zero mask of 0xE0F0E. + + // To compute this, we first compute the potential carry bits. These are + // the bits which may be modified. I'm not aware of a better way to do + // this scan. + const APInt &RHSVal = RHS->getValue(); + APInt CarryBits((~LHSKnownZero + RHSVal) ^ (~LHSKnownZero ^ RHSVal)); + + // Now that we know which bits have carries, compute the known-1/0 sets. + + // Bits are known one if they are known zero in one operand and one in the + // other, and there is no input carry. + RHSKnownOne = ((LHSKnownZero & RHSVal) | + (LHSKnownOne & ~RHSVal)) & ~CarryBits; + + // Bits are known zero if they are known zero in both operands and there + // is no input carry. + RHSKnownZero = LHSKnownZero & ~RHSVal & ~CarryBits; + } else { + // If the high-bits of this ADD are not demanded, then it does not demand + // the high bits of its LHS or RHS. + if (DemandedMask[BitWidth-1] == 0) { + // Right fill the mask of bits for this ADD to demand the most + // significant bit and all those below it. + APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ)); + if (SimplifyDemandedBits(I->getOperandUse(0), DemandedFromOps, + LHSKnownZero, LHSKnownOne, Depth+1) || + SimplifyDemandedBits(I->getOperandUse(1), DemandedFromOps, + LHSKnownZero, LHSKnownOne, Depth+1)) + return I; + } + } + break; + } + case Instruction::Sub: + // If the high-bits of this SUB are not demanded, then it does not demand + // the high bits of its LHS or RHS. + if (DemandedMask[BitWidth-1] == 0) { + // Right fill the mask of bits for this SUB to demand the most + // significant bit and all those below it. + uint32_t NLZ = DemandedMask.countLeadingZeros(); + APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ)); + if (SimplifyDemandedBits(I->getOperandUse(0), DemandedFromOps, + LHSKnownZero, LHSKnownOne, Depth+1) || + SimplifyDemandedBits(I->getOperandUse(1), DemandedFromOps, + LHSKnownZero, LHSKnownOne, Depth+1)) + return I; + } + // Otherwise just hand the sub off to ComputeMaskedBits to fill in + // the known zeros and ones. + ComputeMaskedBits(V, DemandedMask, RHSKnownZero, RHSKnownOne, Depth); + break; + case Instruction::Shl: + if (ConstantInt *SA = dyn_cast(I->getOperand(1))) { + uint64_t ShiftAmt = SA->getLimitedValue(BitWidth); + APInt DemandedMaskIn(DemandedMask.lshr(ShiftAmt)); + if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, + RHSKnownZero, RHSKnownOne, Depth+1)) + return I; + assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); + RHSKnownZero <<= ShiftAmt; + RHSKnownOne <<= ShiftAmt; + // low bits known zero. + if (ShiftAmt) + RHSKnownZero |= APInt::getLowBitsSet(BitWidth, ShiftAmt); + } + break; + case Instruction::LShr: + // For a logical shift right + if (ConstantInt *SA = dyn_cast(I->getOperand(1))) { + uint64_t ShiftAmt = SA->getLimitedValue(BitWidth); + + // Unsigned shift right. + APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt)); + if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, + RHSKnownZero, RHSKnownOne, Depth+1)) + return I; + assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); + RHSKnownZero = APIntOps::lshr(RHSKnownZero, ShiftAmt); + RHSKnownOne = APIntOps::lshr(RHSKnownOne, ShiftAmt); + if (ShiftAmt) { + // Compute the new bits that are at the top now. + APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt)); + RHSKnownZero |= HighBits; // high bits known zero. + } + } + break; + case Instruction::AShr: + // If this is an arithmetic shift right and only the low-bit is set, we can + // always convert this into a logical shr, even if the shift amount is + // variable. The low bit of the shift cannot be an input sign bit unless + // the shift amount is >= the size of the datatype, which is undefined. + if (DemandedMask == 1) { + // Perform the logical shift right. + Instruction *NewVal = BinaryOperator::CreateLShr( + I->getOperand(0), I->getOperand(1), I->getName()); + return InsertNewInstBefore(NewVal, *I); + } + + // If the sign bit is the only bit demanded by this ashr, then there is no + // need to do it, the shift doesn't change the high bit. + if (DemandedMask.isSignBit()) + return I->getOperand(0); + + if (ConstantInt *SA = dyn_cast(I->getOperand(1))) { + uint32_t ShiftAmt = SA->getLimitedValue(BitWidth); + + // Signed shift right. + APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt)); + // If any of the "high bits" are demanded, we should set the sign bit as + // demanded. + if (DemandedMask.countLeadingZeros() <= ShiftAmt) + DemandedMaskIn.set(BitWidth-1); + if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, + RHSKnownZero, RHSKnownOne, Depth+1)) + return I; + assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); + // Compute the new bits that are at the top now. + APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt)); + RHSKnownZero = APIntOps::lshr(RHSKnownZero, ShiftAmt); + RHSKnownOne = APIntOps::lshr(RHSKnownOne, ShiftAmt); + + // Handle the sign bits. + APInt SignBit(APInt::getSignBit(BitWidth)); + // Adjust to where it is now in the mask. + SignBit = APIntOps::lshr(SignBit, ShiftAmt); + + // If the input sign bit is known to be zero, or if none of the top bits + // are demanded, turn this into an unsigned shift right. + if (BitWidth <= ShiftAmt || RHSKnownZero[BitWidth-ShiftAmt-1] || + (HighBits & ~DemandedMask) == HighBits) { + // Perform the logical shift right. + Instruction *NewVal = BinaryOperator::CreateLShr( + I->getOperand(0), SA, I->getName()); + return InsertNewInstBefore(NewVal, *I); + } else if ((RHSKnownOne & SignBit) != 0) { // New bits are known one. + RHSKnownOne |= HighBits; + } + } + break; + case Instruction::SRem: + if (ConstantInt *Rem = dyn_cast(I->getOperand(1))) { + APInt RA = Rem->getValue().abs(); + if (RA.isPowerOf2()) { + if (DemandedMask.ule(RA)) // srem won't affect demanded bits + return I->getOperand(0); + + APInt LowBits = RA - 1; + APInt Mask2 = LowBits | APInt::getSignBit(BitWidth); + if (SimplifyDemandedBits(I->getOperandUse(0), Mask2, + LHSKnownZero, LHSKnownOne, Depth+1)) + return I; + + if (LHSKnownZero[BitWidth-1] || ((LHSKnownZero & LowBits) == LowBits)) + LHSKnownZero |= ~LowBits; + + KnownZero |= LHSKnownZero & DemandedMask; + + assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); + } + } + break; + case Instruction::URem: { + APInt KnownZero2(BitWidth, 0), KnownOne2(BitWidth, 0); + APInt AllOnes = APInt::getAllOnesValue(BitWidth); + if (SimplifyDemandedBits(I->getOperandUse(0), AllOnes, + KnownZero2, KnownOne2, Depth+1) || + SimplifyDemandedBits(I->getOperandUse(1), AllOnes, + KnownZero2, KnownOne2, Depth+1)) + return I; + + unsigned Leaders = KnownZero2.countLeadingOnes(); + Leaders = std::max(Leaders, + KnownZero2.countLeadingOnes()); + KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & DemandedMask; + break; + } + case Instruction::Call: + if (IntrinsicInst *II = dyn_cast(I)) { + switch (II->getIntrinsicID()) { + default: break; + case Intrinsic::bswap: { + // If the only bits demanded come from one byte of the bswap result, + // just shift the input byte into position to eliminate the bswap. + unsigned NLZ = DemandedMask.countLeadingZeros(); + unsigned NTZ = DemandedMask.countTrailingZeros(); + + // Round NTZ down to the next byte. If we have 11 trailing zeros, then + // we need all the bits down to bit 8. Likewise, round NLZ. If we + // have 14 leading zeros, round to 8. + NLZ &= ~7; + NTZ &= ~7; + // If we need exactly one byte, we can do this transformation. + if (BitWidth-NLZ-NTZ == 8) { + unsigned ResultBit = NTZ; + unsigned InputBit = BitWidth-NTZ-8; + + // Replace this with either a left or right shift to get the byte into + // the right place. + Instruction *NewVal; + if (InputBit > ResultBit) + NewVal = BinaryOperator::CreateLShr(I->getOperand(1), + ConstantInt::get(I->getType(), InputBit-ResultBit)); + else + NewVal = BinaryOperator::CreateShl(I->getOperand(1), + ConstantInt::get(I->getType(), ResultBit-InputBit)); + NewVal->takeName(I); + return InsertNewInstBefore(NewVal, *I); + } + + // TODO: Could compute known zero/one bits based on the input. + break; + } + } + } + ComputeMaskedBits(V, DemandedMask, RHSKnownZero, RHSKnownOne, Depth); + break; + } + + // If the client is only demanding bits that we know, return the known + // constant. + if ((DemandedMask & (RHSKnownZero|RHSKnownOne)) == DemandedMask) { + Constant *C = ConstantInt::get(RHSKnownOne); + if (isa(V->getType())) + C = ConstantExpr::getIntToPtr(C, V->getType()); + return C; + } + return false; +} + + +/// SimplifyDemandedVectorElts - The specified value produces a vector with +/// any number of elements. DemandedElts contains the set of elements that are +/// actually used by the caller. This method analyzes which elements of the +/// operand are undef and returns that information in UndefElts. +/// +/// If the information about demanded elements can be used to simplify the +/// operation, the operation is simplified, then the resultant value is +/// returned. This returns null if no change was made. +Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, + APInt& UndefElts, + unsigned Depth) { + unsigned VWidth = cast(V->getType())->getNumElements(); + APInt EltMask(APInt::getAllOnesValue(VWidth)); + assert((DemandedElts & ~EltMask) == 0 && "Invalid DemandedElts!"); + + if (isa(V)) { + // If the entire vector is undefined, just return this info. + UndefElts = EltMask; + return 0; + } else if (DemandedElts == 0) { // If nothing is demanded, provide undef. + UndefElts = EltMask; + return UndefValue::get(V->getType()); + } + + UndefElts = 0; + if (ConstantVector *CP = dyn_cast(V)) { + const Type *EltTy = cast(V->getType())->getElementType(); + Constant *Undef = UndefValue::get(EltTy); + + std::vector Elts; + for (unsigned i = 0; i != VWidth; ++i) + if (!DemandedElts[i]) { // If not demanded, set to undef. + Elts.push_back(Undef); + UndefElts.set(i); + } else if (isa(CP->getOperand(i))) { // Already undef. + Elts.push_back(Undef); + UndefElts.set(i); + } else { // Otherwise, defined. + Elts.push_back(CP->getOperand(i)); + } + + // If we changed the constant, return it. + Constant *NewCP = ConstantVector::get(Elts); + return NewCP != CP ? NewCP : 0; + } else if (isa(V)) { + // Simplify the CAZ to a ConstantVector where the non-demanded elements are + // set to undef. + + // Check if this is identity. If so, return 0 since we are not simplifying + // anything. + if (DemandedElts == ((1ULL << VWidth) -1)) + return 0; + + const Type *EltTy = cast(V->getType())->getElementType(); + Constant *Zero = Constant::getNullValue(EltTy); + Constant *Undef = UndefValue::get(EltTy); + std::vector Elts; + for (unsigned i = 0; i != VWidth; ++i) { + Constant *Elt = DemandedElts[i] ? Zero : Undef; + Elts.push_back(Elt); + } + UndefElts = DemandedElts ^ EltMask; + return ConstantVector::get(Elts); + } + + // Limit search depth. + if (Depth == 10) + return 0; + + // If multiple users are using the root value, procede with + // simplification conservatively assuming that all elements + // are needed. + if (!V->hasOneUse()) { + // Quit if we find multiple users of a non-root value though. + // They'll be handled when it's their turn to be visited by + // the main instcombine process. + if (Depth != 0) + // TODO: Just compute the UndefElts information recursively. + return 0; + + // Conservatively assume that all elements are needed. + DemandedElts = EltMask; + } + + Instruction *I = dyn_cast(V); + if (!I) return 0; // Only analyze instructions. + + bool MadeChange = false; + APInt UndefElts2(VWidth, 0); + Value *TmpV; + switch (I->getOpcode()) { + default: break; + + case Instruction::InsertElement: { + // If this is a variable index, we don't know which element it overwrites. + // demand exactly the same input as we produce. + ConstantInt *Idx = dyn_cast(I->getOperand(2)); + if (Idx == 0) { + // Note that we can't propagate undef elt info, because we don't know + // which elt is getting updated. + TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts, + UndefElts2, Depth+1); + if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; } + break; + } + + // If this is inserting an element that isn't demanded, remove this + // insertelement. + unsigned IdxNo = Idx->getZExtValue(); + if (IdxNo >= VWidth || !DemandedElts[IdxNo]) + return AddSoonDeadInstToWorklist(*I, 0); + + // Otherwise, the element inserted overwrites whatever was there, so the + // input demanded set is simpler than the output set. + APInt DemandedElts2 = DemandedElts; + DemandedElts2.clear(IdxNo); + TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts2, + UndefElts, Depth+1); + if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; } + + // The inserted element is defined. + UndefElts.clear(IdxNo); + break; + } + case Instruction::ShuffleVector: { + ShuffleVectorInst *Shuffle = cast(I); + uint64_t LHSVWidth = + cast(Shuffle->getOperand(0)->getType())->getNumElements(); + APInt LeftDemanded(LHSVWidth, 0), RightDemanded(LHSVWidth, 0); + for (unsigned i = 0; i < VWidth; i++) { + if (DemandedElts[i]) { + unsigned MaskVal = Shuffle->getMaskValue(i); + if (MaskVal != -1u) { + assert(MaskVal < LHSVWidth * 2 && + "shufflevector mask index out of range!"); + if (MaskVal < LHSVWidth) + LeftDemanded.set(MaskVal); + else + RightDemanded.set(MaskVal - LHSVWidth); + } + } + } + + APInt UndefElts4(LHSVWidth, 0); + TmpV = SimplifyDemandedVectorElts(I->getOperand(0), LeftDemanded, + UndefElts4, Depth+1); + if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; } + + APInt UndefElts3(LHSVWidth, 0); + TmpV = SimplifyDemandedVectorElts(I->getOperand(1), RightDemanded, + UndefElts3, Depth+1); + if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; } + + bool NewUndefElts = false; + for (unsigned i = 0; i < VWidth; i++) { + unsigned MaskVal = Shuffle->getMaskValue(i); + if (MaskVal == -1u) { + UndefElts.set(i); + } else if (MaskVal < LHSVWidth) { + if (UndefElts4[MaskVal]) { + NewUndefElts = true; + UndefElts.set(i); + } + } else { + if (UndefElts3[MaskVal - LHSVWidth]) { + NewUndefElts = true; + UndefElts.set(i); + } + } + } + + if (NewUndefElts) { + // Add additional discovered undefs. + std::vector Elts; + for (unsigned i = 0; i < VWidth; ++i) { + if (UndefElts[i]) + Elts.push_back(UndefValue::get(Type::Int32Ty)); + else + Elts.push_back(ConstantInt::get(Type::Int32Ty, + Shuffle->getMaskValue(i))); + } + I->setOperand(2, ConstantVector::get(Elts)); + MadeChange = true; + } + break; + } + case Instruction::BitCast: { + // Vector->vector casts only. + const VectorType *VTy = dyn_cast(I->getOperand(0)->getType()); + if (!VTy) break; + unsigned InVWidth = VTy->getNumElements(); + APInt InputDemandedElts(InVWidth, 0); + unsigned Ratio; + + if (VWidth == InVWidth) { + // If we are converting from <4 x i32> -> <4 x f32>, we demand the same + // elements as are demanded of us. + Ratio = 1; + InputDemandedElts = DemandedElts; + } else if (VWidth > InVWidth) { + // Untested so far. + break; + + // If there are more elements in the result than there are in the source, + // then an input element is live if any of the corresponding output + // elements are live. + Ratio = VWidth/InVWidth; + for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) { + if (DemandedElts[OutIdx]) + InputDemandedElts.set(OutIdx/Ratio); + } + } else { + // Untested so far. + break; + + // If there are more elements in the source than there are in the result, + // then an input element is live if the corresponding output element is + // live. + Ratio = InVWidth/VWidth; + for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx) + if (DemandedElts[InIdx/Ratio]) + InputDemandedElts.set(InIdx); + } + + // div/rem demand all inputs, because they don't want divide by zero. + TmpV = SimplifyDemandedVectorElts(I->getOperand(0), InputDemandedElts, + UndefElts2, Depth+1); + if (TmpV) { + I->setOperand(0, TmpV); + MadeChange = true; + } + + UndefElts = UndefElts2; + if (VWidth > InVWidth) { + assert(0 && "Unimp"); + // If there are more elements in the result than there are in the source, + // then an output element is undef if the corresponding input element is + // undef. + for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) + if (UndefElts2[OutIdx/Ratio]) + UndefElts.set(OutIdx); + } else if (VWidth < InVWidth) { + assert(0 && "Unimp"); + // If there are more elements in the source than there are in the result, + // then a result element is undef if all of the corresponding input + // elements are undef. + UndefElts = ~0ULL >> (64-VWidth); // Start out all undef. + for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx) + if (!UndefElts2[InIdx]) // Not undef? + UndefElts.clear(InIdx/Ratio); // Clear undef bit. + } + break; + } + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + case Instruction::Add: + case Instruction::Sub: + case Instruction::Mul: + // div/rem demand all inputs, because they don't want divide by zero. + TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts, + UndefElts, Depth+1); + if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; } + TmpV = SimplifyDemandedVectorElts(I->getOperand(1), DemandedElts, + UndefElts2, Depth+1); + if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; } + + // Output elements are undefined if both are undefined. Consider things + // like undef&0. The result is known zero, not undef. + UndefElts &= UndefElts2; + break; + + case Instruction::Call: { + IntrinsicInst *II = dyn_cast(I); + if (!II) break; + switch (II->getIntrinsicID()) { + default: break; + + // Binary vector operations that work column-wise. A dest element is a + // function of the corresponding input elements from the two inputs. + case Intrinsic::x86_sse_sub_ss: + case Intrinsic::x86_sse_mul_ss: + case Intrinsic::x86_sse_min_ss: + case Intrinsic::x86_sse_max_ss: + case Intrinsic::x86_sse2_sub_sd: + case Intrinsic::x86_sse2_mul_sd: + case Intrinsic::x86_sse2_min_sd: + case Intrinsic::x86_sse2_max_sd: + TmpV = SimplifyDemandedVectorElts(II->getOperand(1), DemandedElts, + UndefElts, Depth+1); + if (TmpV) { II->setOperand(1, TmpV); MadeChange = true; } + TmpV = SimplifyDemandedVectorElts(II->getOperand(2), DemandedElts, + UndefElts2, Depth+1); + if (TmpV) { II->setOperand(2, TmpV); MadeChange = true; } + + // If only the low elt is demanded and this is a scalarizable intrinsic, + // scalarize it now. + if (DemandedElts == 1) { + switch (II->getIntrinsicID()) { + default: break; + case Intrinsic::x86_sse_sub_ss: + case Intrinsic::x86_sse_mul_ss: + case Intrinsic::x86_sse2_sub_sd: + case Intrinsic::x86_sse2_mul_sd: + // TODO: Lower MIN/MAX/ABS/etc + Value *LHS = II->getOperand(1); + Value *RHS = II->getOperand(2); + // Extract the element as scalars. + LHS = InsertNewInstBefore(new ExtractElementInst(LHS, 0U,"tmp"), *II); + RHS = InsertNewInstBefore(new ExtractElementInst(RHS, 0U,"tmp"), *II); + + switch (II->getIntrinsicID()) { + default: assert(0 && "Case stmts out of sync!"); + case Intrinsic::x86_sse_sub_ss: + case Intrinsic::x86_sse2_sub_sd: + TmpV = InsertNewInstBefore(BinaryOperator::CreateSub(LHS, RHS, + II->getName()), *II); + break; + case Intrinsic::x86_sse_mul_ss: + case Intrinsic::x86_sse2_mul_sd: + TmpV = InsertNewInstBefore(BinaryOperator::CreateMul(LHS, RHS, + II->getName()), *II); + break; + } + + Instruction *New = + InsertElementInst::Create(UndefValue::get(II->getType()), TmpV, 0U, + II->getName()); + InsertNewInstBefore(New, *II); + AddSoonDeadInstToWorklist(*II, 0); + return New; + } + } + + // Output elements are undefined if both are undefined. Consider things + // like undef&0. The result is known zero, not undef. + UndefElts &= UndefElts2; + break; + } + break; + } + } + return MadeChange ? I : 0; +} + + +/// AssociativeOpt - Perform an optimization on an associative operator. This +/// function is designed to check a chain of associative operators for a +/// potential to apply a certain optimization. Since the optimization may be +/// applicable if the expression was reassociated, this checks the chain, then +/// reassociates the expression as necessary to expose the optimization +/// opportunity. This makes use of a special Functor, which must define +/// 'shouldApply' and 'apply' methods. +/// +template +static Instruction *AssociativeOpt(BinaryOperator &Root, const Functor &F) { + unsigned Opcode = Root.getOpcode(); + Value *LHS = Root.getOperand(0); + + // Quick check, see if the immediate LHS matches... + if (F.shouldApply(LHS)) + return F.apply(Root); + + // Otherwise, if the LHS is not of the same opcode as the root, return. + Instruction *LHSI = dyn_cast(LHS); + while (LHSI && LHSI->getOpcode() == Opcode && LHSI->hasOneUse()) { + // Should we apply this transform to the RHS? + bool ShouldApply = F.shouldApply(LHSI->getOperand(1)); + + // If not to the RHS, check to see if we should apply to the LHS... + if (!ShouldApply && F.shouldApply(LHSI->getOperand(0))) { + cast(LHSI)->swapOperands(); // Make the LHS the RHS + ShouldApply = true; + } + + // If the functor wants to apply the optimization to the RHS of LHSI, + // reassociate the expression from ((? op A) op B) to (? op (A op B)) + if (ShouldApply) { + // Now all of the instructions are in the current basic block, go ahead + // and perform the reassociation. + Instruction *TmpLHSI = cast(Root.getOperand(0)); + + // First move the selected RHS to the LHS of the root... + Root.setOperand(0, LHSI->getOperand(1)); + + // Make what used to be the LHS of the root be the user of the root... + Value *ExtraOperand = TmpLHSI->getOperand(1); + if (&Root == TmpLHSI) { + Root.replaceAllUsesWith(Constant::getNullValue(TmpLHSI->getType())); + return 0; + } + Root.replaceAllUsesWith(TmpLHSI); // Users now use TmpLHSI + TmpLHSI->setOperand(1, &Root); // TmpLHSI now uses the root + BasicBlock::iterator ARI = &Root; ++ARI; + TmpLHSI->moveBefore(ARI); // Move TmpLHSI to after Root + ARI = Root; + + // Now propagate the ExtraOperand down the chain of instructions until we + // get to LHSI. + while (TmpLHSI != LHSI) { + Instruction *NextLHSI = cast(TmpLHSI->getOperand(0)); + // Move the instruction to immediately before the chain we are + // constructing to avoid breaking dominance properties. + NextLHSI->moveBefore(ARI); + ARI = NextLHSI; + + Value *NextOp = NextLHSI->getOperand(1); + NextLHSI->setOperand(1, ExtraOperand); + TmpLHSI = NextLHSI; + ExtraOperand = NextOp; + } + + // Now that the instructions are reassociated, have the functor perform + // the transformation... + return F.apply(Root); + } + + LHSI = dyn_cast(LHSI->getOperand(0)); + } + return 0; +} + +namespace { + +// AddRHS - Implements: X + X --> X << 1 +struct AddRHS { + Value *RHS; + AddRHS(Value *rhs) : RHS(rhs) {} + bool shouldApply(Value *LHS) const { return LHS == RHS; } + Instruction *apply(BinaryOperator &Add) const { + return BinaryOperator::CreateShl(Add.getOperand(0), + ConstantInt::get(Add.getType(), 1)); + } +}; + +// AddMaskingAnd - Implements (A & C1)+(B & C2) --> (A & C1)|(B & C2) +// iff C1&C2 == 0 +struct AddMaskingAnd { + Constant *C2; + AddMaskingAnd(Constant *c) : C2(c) {} + bool shouldApply(Value *LHS) const { + ConstantInt *C1; + return match(LHS, m_And(m_Value(), m_ConstantInt(C1))) && + ConstantExpr::getAnd(C1, C2)->isNullValue(); + } + Instruction *apply(BinaryOperator &Add) const { + return BinaryOperator::CreateOr(Add.getOperand(0), Add.getOperand(1)); + } +}; + +} + +static Value *FoldOperationIntoSelectOperand(Instruction &I, Value *SO, + InstCombiner *IC) { + if (CastInst *CI = dyn_cast(&I)) { + return IC->InsertCastBefore(CI->getOpcode(), SO, I.getType(), I); + } + + // Figure out if the constant is the left or the right argument. + bool ConstIsRHS = isa(I.getOperand(1)); + Constant *ConstOperand = cast(I.getOperand(ConstIsRHS)); + + if (Constant *SOC = dyn_cast(SO)) { + if (ConstIsRHS) + return ConstantExpr::get(I.getOpcode(), SOC, ConstOperand); + return ConstantExpr::get(I.getOpcode(), ConstOperand, SOC); + } + + Value *Op0 = SO, *Op1 = ConstOperand; + if (!ConstIsRHS) + std::swap(Op0, Op1); + Instruction *New; + if (BinaryOperator *BO = dyn_cast(&I)) + New = BinaryOperator::Create(BO->getOpcode(), Op0, Op1,SO->getName()+".op"); + else if (CmpInst *CI = dyn_cast(&I)) + New = CmpInst::Create(CI->getOpcode(), CI->getPredicate(), Op0, Op1, + SO->getName()+".cmp"); + else { + assert(0 && "Unknown binary instruction type!"); + abort(); + } + return IC->InsertNewInstBefore(New, I); +} + +// FoldOpIntoSelect - Given an instruction with a select as one operand and a +// constant as the other operand, try to fold the binary operator into the +// select arguments. This also works for Cast instructions, which obviously do +// not have a second operand. +static Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI, + InstCombiner *IC) { + // Don't modify shared select instructions + if (!SI->hasOneUse()) return 0; + Value *TV = SI->getOperand(1); + Value *FV = SI->getOperand(2); + + if (isa(TV) || isa(FV)) { + // Bool selects with constant operands can be folded to logical ops. + if (SI->getType() == Type::Int1Ty) return 0; + + Value *SelectTrueVal = FoldOperationIntoSelectOperand(Op, TV, IC); + Value *SelectFalseVal = FoldOperationIntoSelectOperand(Op, FV, IC); + + return SelectInst::Create(SI->getCondition(), SelectTrueVal, + SelectFalseVal); + } + return 0; +} + + +/// FoldOpIntoPhi - Given a binary operator or cast instruction which has a PHI +/// node as operand #0, see if we can fold the instruction into the PHI (which +/// is only possible if all operands to the PHI are constants). +Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) { + PHINode *PN = cast(I.getOperand(0)); + unsigned NumPHIValues = PN->getNumIncomingValues(); + if (!PN->hasOneUse() || NumPHIValues == 0) return 0; + + // Check to see if all of the operands of the PHI are constants. If there is + // one non-constant value, remember the BB it is. If there is more than one + // or if *it* is a PHI, bail out. + BasicBlock *NonConstBB = 0; + for (unsigned i = 0; i != NumPHIValues; ++i) + if (!isa(PN->getIncomingValue(i))) { + if (NonConstBB) return 0; // More than one non-const value. + if (isa(PN->getIncomingValue(i))) return 0; // Itself a phi. + NonConstBB = PN->getIncomingBlock(i); + + // If the incoming non-constant value is in I's block, we have an infinite + // loop. + if (NonConstBB == I.getParent()) + return 0; + } + + // If there is exactly one non-constant value, we can insert a copy of the + // operation in that block. However, if this is a critical edge, we would be + // inserting the computation one some other paths (e.g. inside a loop). Only + // do this if the pred block is unconditionally branching into the phi block. + if (NonConstBB) { + BranchInst *BI = dyn_cast(NonConstBB->getTerminator()); + if (!BI || !BI->isUnconditional()) return 0; + } + + // Okay, we can do the transformation: create the new PHI node. + PHINode *NewPN = PHINode::Create(I.getType(), ""); + NewPN->reserveOperandSpace(PN->getNumOperands()/2); + InsertNewInstBefore(NewPN, *PN); + NewPN->takeName(PN); + + // Next, add all of the operands to the PHI. + if (I.getNumOperands() == 2) { + Constant *C = cast(I.getOperand(1)); + for (unsigned i = 0; i != NumPHIValues; ++i) { + Value *InV = 0; + if (Constant *InC = dyn_cast(PN->getIncomingValue(i))) { + if (CmpInst *CI = dyn_cast(&I)) + InV = ConstantExpr::getCompare(CI->getPredicate(), InC, C); + else + InV = ConstantExpr::get(I.getOpcode(), InC, C); + } else { + assert(PN->getIncomingBlock(i) == NonConstBB); + if (BinaryOperator *BO = dyn_cast(&I)) + InV = BinaryOperator::Create(BO->getOpcode(), + PN->getIncomingValue(i), C, "phitmp", + NonConstBB->getTerminator()); + else if (CmpInst *CI = dyn_cast(&I)) + InV = CmpInst::Create(CI->getOpcode(), + CI->getPredicate(), + PN->getIncomingValue(i), C, "phitmp", + NonConstBB->getTerminator()); + else + assert(0 && "Unknown binop!"); + + AddToWorkList(cast(InV)); + } + NewPN->addIncoming(InV, PN->getIncomingBlock(i)); + } + } else { + CastInst *CI = cast(&I); + const Type *RetTy = CI->getType(); + for (unsigned i = 0; i != NumPHIValues; ++i) { + Value *InV; + if (Constant *InC = dyn_cast(PN->getIncomingValue(i))) { + InV = ConstantExpr::getCast(CI->getOpcode(), InC, RetTy); + } else { + assert(PN->getIncomingBlock(i) == NonConstBB); + InV = CastInst::Create(CI->getOpcode(), PN->getIncomingValue(i), + I.getType(), "phitmp", + NonConstBB->getTerminator()); + AddToWorkList(cast(InV)); + } + NewPN->addIncoming(InV, PN->getIncomingBlock(i)); + } + } + return ReplaceInstUsesWith(I, NewPN); +} + + +/// WillNotOverflowSignedAdd - Return true if we can prove that: +/// (sext (add LHS, RHS)) === (add (sext LHS), (sext RHS)) +/// This basically requires proving that the add in the original type would not +/// overflow to change the sign bit or have a carry out. +bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS) { + // There are different heuristics we can use for this. Here are some simple + // ones. + + // Add has the property that adding any two 2's complement numbers can only + // have one carry bit which can change a sign. As such, if LHS and RHS each + // have at least two sign bits, we know that the addition of the two values will + // sign extend fine. + if (ComputeNumSignBits(LHS) > 1 && ComputeNumSignBits(RHS) > 1) + return true; + + + // If one of the operands only has one non-zero bit, and if the other operand + // has a known-zero bit in a more significant place than it (not including the + // sign bit) the ripple may go up to and fill the zero, but won't change the + // sign. For example, (X & ~4) + 1. + + // TODO: Implement. + + return false; +} + + +Instruction *InstCombiner::visitAdd(BinaryOperator &I) { + bool Changed = SimplifyCommutative(I); + Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); + + if (Constant *RHSC = dyn_cast(RHS)) { + // X + undef -> undef + if (isa(RHS)) + return ReplaceInstUsesWith(I, RHS); + + // X + 0 --> X + if (!I.getType()->isFPOrFPVector()) { // NOTE: -0 + +0 = +0. + if (RHSC->isNullValue()) + return ReplaceInstUsesWith(I, LHS); + } else if (ConstantFP *CFP = dyn_cast(RHSC)) { + if (CFP->isExactlyValue(ConstantFP::getNegativeZero + (I.getType())->getValueAPF())) + return ReplaceInstUsesWith(I, LHS); + } + + if (ConstantInt *CI = dyn_cast(RHSC)) { + // X + (signbit) --> X ^ signbit + const APInt& Val = CI->getValue(); + uint32_t BitWidth = Val.getBitWidth(); + if (Val == APInt::getSignBit(BitWidth)) + return BinaryOperator::CreateXor(LHS, RHS); + + // See if SimplifyDemandedBits can simplify this. This handles stuff like + // (X & 254)+1 -> (X&254)|1 + if (!isa(I.getType()) && SimplifyDemandedInstructionBits(I)) + return &I; + + // zext(i1) - 1 -> select i1, 0, -1 + if (ZExtInst *ZI = dyn_cast(LHS)) + if (CI->isAllOnesValue() && + ZI->getOperand(0)->getType() == Type::Int1Ty) + return SelectInst::Create(ZI->getOperand(0), + Constant::getNullValue(I.getType()), + ConstantInt::getAllOnesValue(I.getType())); + } + + if (isa(LHS)) + if (Instruction *NV = FoldOpIntoPhi(I)) + return NV; + + ConstantInt *XorRHS = 0; + Value *XorLHS = 0; + if (isa(RHSC) && + match(LHS, m_Xor(m_Value(XorLHS), m_ConstantInt(XorRHS)))) { + uint32_t TySizeBits = I.getType()->getPrimitiveSizeInBits(); + const APInt& RHSVal = cast(RHSC)->getValue(); + + uint32_t Size = TySizeBits / 2; + APInt C0080Val(APInt(TySizeBits, 1ULL).shl(Size - 1)); + APInt CFF80Val(-C0080Val); + do { + if (TySizeBits > Size) { + // If we have ADD(XOR(AND(X, 0xFF), 0x80), 0xF..F80), it's a sext. + // If we have ADD(XOR(AND(X, 0xFF), 0xF..F80), 0x80), it's a sext. + if ((RHSVal == CFF80Val && XorRHS->getValue() == C0080Val) || + (RHSVal == C0080Val && XorRHS->getValue() == CFF80Val)) { + // This is a sign extend if the top bits are known zero. + if (!MaskedValueIsZero(XorLHS, + APInt::getHighBitsSet(TySizeBits, TySizeBits - Size))) + Size = 0; // Not a sign ext, but can't be any others either. + break; + } + } + Size >>= 1; + C0080Val = APIntOps::lshr(C0080Val, Size); + CFF80Val = APIntOps::ashr(CFF80Val, Size); + } while (Size >= 1); + + // FIXME: This shouldn't be necessary. When the backends can handle types + // with funny bit widths then this switch statement should be removed. It + // is just here to get the size of the "middle" type back up to something + // that the back ends can handle. + const Type *MiddleType = 0; + switch (Size) { + default: break; + case 32: MiddleType = Type::Int32Ty; break; + case 16: MiddleType = Type::Int16Ty; break; + case 8: MiddleType = Type::Int8Ty; break; + } + if (MiddleType) { + Instruction *NewTrunc = new TruncInst(XorLHS, MiddleType, "sext"); + InsertNewInstBefore(NewTrunc, I); + return new SExtInst(NewTrunc, I.getType(), I.getName()); + } + } + } + + if (I.getType() == Type::Int1Ty) + return BinaryOperator::CreateXor(LHS, RHS); + + // X + X --> X << 1 + if (I.getType()->isInteger()) { + if (Instruction *Result = AssociativeOpt(I, AddRHS(RHS))) return Result; + + if (Instruction *RHSI = dyn_cast(RHS)) { + if (RHSI->getOpcode() == Instruction::Sub) + if (LHS == RHSI->getOperand(1)) // A + (B - A) --> B + return ReplaceInstUsesWith(I, RHSI->getOperand(0)); + } + if (Instruction *LHSI = dyn_cast(LHS)) { + if (LHSI->getOpcode() == Instruction::Sub) + if (RHS == LHSI->getOperand(1)) // (B - A) + A --> B + return ReplaceInstUsesWith(I, LHSI->getOperand(0)); + } + } + + // -A + B --> B - A + // -A + -B --> -(A + B) + if (Value *LHSV = dyn_castNegVal(LHS)) { + if (LHS->getType()->isIntOrIntVector()) { + if (Value *RHSV = dyn_castNegVal(RHS)) { + Instruction *NewAdd = BinaryOperator::CreateAdd(LHSV, RHSV, "sum"); + InsertNewInstBefore(NewAdd, I); + return BinaryOperator::CreateNeg(NewAdd); + } + } + + return BinaryOperator::CreateSub(RHS, LHSV); + } + + // A + -B --> A - B + if (!isa(RHS)) + if (Value *V = dyn_castNegVal(RHS)) + return BinaryOperator::CreateSub(LHS, V); + + + ConstantInt *C2; + if (Value *X = dyn_castFoldableMul(LHS, C2)) { + if (X == RHS) // X*C + X --> X * (C+1) + return BinaryOperator::CreateMul(RHS, AddOne(C2)); + + // X*C1 + X*C2 --> X * (C1+C2) + ConstantInt *C1; + if (X == dyn_castFoldableMul(RHS, C1)) + return BinaryOperator::CreateMul(X, Add(C1, C2)); + } + + // X + X*C --> X * (C+1) + if (dyn_castFoldableMul(RHS, C2) == LHS) + return BinaryOperator::CreateMul(LHS, AddOne(C2)); + + // X + ~X --> -1 since ~X = -X-1 + if (dyn_castNotVal(LHS) == RHS || dyn_castNotVal(RHS) == LHS) + return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType())); + + + // (A & C1)+(B & C2) --> (A & C1)|(B & C2) iff C1&C2 == 0 + if (match(RHS, m_And(m_Value(), m_ConstantInt(C2)))) + if (Instruction *R = AssociativeOpt(I, AddMaskingAnd(C2))) + return R; + + // A+B --> A|B iff A and B have no bits set in common. + if (const IntegerType *IT = dyn_cast(I.getType())) { + APInt Mask = APInt::getAllOnesValue(IT->getBitWidth()); + APInt LHSKnownOne(IT->getBitWidth(), 0); + APInt LHSKnownZero(IT->getBitWidth(), 0); + ComputeMaskedBits(LHS, Mask, LHSKnownZero, LHSKnownOne); + if (LHSKnownZero != 0) { + APInt RHSKnownOne(IT->getBitWidth(), 0); + APInt RHSKnownZero(IT->getBitWidth(), 0); + ComputeMaskedBits(RHS, Mask, RHSKnownZero, RHSKnownOne); + + // No bits in common -> bitwise or. + if ((LHSKnownZero|RHSKnownZero).isAllOnesValue()) + return BinaryOperator::CreateOr(LHS, RHS); + } + } + + // W*X + Y*Z --> W * (X+Z) iff W == Y + if (I.getType()->isIntOrIntVector()) { + Value *W, *X, *Y, *Z; + if (match(LHS, m_Mul(m_Value(W), m_Value(X))) && + match(RHS, m_Mul(m_Value(Y), m_Value(Z)))) { + if (W != Y) { + if (W == Z) { + std::swap(Y, Z); + } else if (Y == X) { + std::swap(W, X); + } else if (X == Z) { + std::swap(Y, Z); + std::swap(W, X); + } + } + + if (W == Y) { + Value *NewAdd = InsertNewInstBefore(BinaryOperator::CreateAdd(X, Z, + LHS->getName()), I); + return BinaryOperator::CreateMul(W, NewAdd); + } + } + } + + if (ConstantInt *CRHS = dyn_cast(RHS)) { + Value *X = 0; + if (match(LHS, m_Not(m_Value(X)))) // ~X + C --> (C-1) - X + return BinaryOperator::CreateSub(SubOne(CRHS), X); + + // (X & FF00) + xx00 -> (X+xx00) & FF00 + if (LHS->hasOneUse() && match(LHS, m_And(m_Value(X), m_ConstantInt(C2)))) { + Constant *Anded = And(CRHS, C2); + if (Anded == CRHS) { + // See if all bits from the first bit set in the Add RHS up are included + // in the mask. First, get the rightmost bit. + const APInt& AddRHSV = CRHS->getValue(); + + // Form a mask of all bits from the lowest bit added through the top. + APInt AddRHSHighBits(~((AddRHSV & -AddRHSV)-1)); + + // See if the and mask includes all of these bits. + APInt AddRHSHighBitsAnd(AddRHSHighBits & C2->getValue()); + + if (AddRHSHighBits == AddRHSHighBitsAnd) { + // Okay, the xform is safe. Insert the new add pronto. + Value *NewAdd = InsertNewInstBefore(BinaryOperator::CreateAdd(X, CRHS, + LHS->getName()), I); + return BinaryOperator::CreateAnd(NewAdd, C2); + } + } + } + + // Try to fold constant add into select arguments. + if (SelectInst *SI = dyn_cast(LHS)) + if (Instruction *R = FoldOpIntoSelect(I, SI, this)) + return R; + } + + // add (cast *A to intptrtype) B -> + // cast (GEP (cast *A to sbyte*) B) --> intptrtype + { + CastInst *CI = dyn_cast(LHS); + Value *Other = RHS; + if (!CI) { + CI = dyn_cast(RHS); + Other = LHS; + } + if (CI && CI->getType()->isSized() && + (CI->getType()->getPrimitiveSizeInBits() == + TD->getIntPtrType()->getPrimitiveSizeInBits()) + && isa(CI->getOperand(0)->getType())) { + unsigned AS = + cast(CI->getOperand(0)->getType())->getAddressSpace(); + Value *I2 = InsertBitCastBefore(CI->getOperand(0), + PointerType::get(Type::Int8Ty, AS), I); + I2 = InsertNewInstBefore(GetElementPtrInst::Create(I2, Other, "ctg2"), I); + return new PtrToIntInst(I2, CI->getType()); + } + } + + // add (select X 0 (sub n A)) A --> select X A n + { + SelectInst *SI = dyn_cast(LHS); + Value *A = RHS; + if (!SI) { + SI = dyn_cast(RHS); + A = LHS; + } + if (SI && SI->hasOneUse()) { + Value *TV = SI->getTrueValue(); + Value *FV = SI->getFalseValue(); + Value *N; + + // Can we fold the add into the argument of the select? + // We check both true and false select arguments for a matching subtract. + if (match(FV, m_Zero()) && match(TV, m_Sub(m_Value(N), m_Specific(A)))) + // Fold the add into the true select value. + return SelectInst::Create(SI->getCondition(), N, A); + if (match(TV, m_Zero()) && match(FV, m_Sub(m_Value(N), m_Specific(A)))) + // Fold the add into the false select value. + return SelectInst::Create(SI->getCondition(), A, N); + } + } + + // Check for X+0.0. Simplify it to X if we know X is not -0.0. + if (ConstantFP *CFP = dyn_cast(RHS)) + if (CFP->getValueAPF().isPosZero() && CannotBeNegativeZero(LHS)) + return ReplaceInstUsesWith(I, LHS); + + // Check for (add (sext x), y), see if we can merge this into an + // integer add followed by a sext. + if (SExtInst *LHSConv = dyn_cast(LHS)) { + // (add (sext x), cst) --> (sext (add x, cst')) + if (ConstantInt *RHSC = dyn_cast(RHS)) { + Constant *CI = + ConstantExpr::getTrunc(RHSC, LHSConv->getOperand(0)->getType()); + if (LHSConv->hasOneUse() && + ConstantExpr::getSExt(CI, I.getType()) == RHSC && + WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) { + // Insert the new, smaller add. + Instruction *NewAdd = BinaryOperator::CreateAdd(LHSConv->getOperand(0), + CI, "addconv"); + InsertNewInstBefore(NewAdd, I); + return new SExtInst(NewAdd, I.getType()); + } + } + + // (add (sext x), (sext y)) --> (sext (add int x, y)) + if (SExtInst *RHSConv = dyn_cast(RHS)) { + // Only do this if x/y have the same type, if at last one of them has a + // single use (so we don't increase the number of sexts), and if the + // integer add will not overflow. + if (LHSConv->getOperand(0)->getType()==RHSConv->getOperand(0)->getType()&& + (LHSConv->hasOneUse() || RHSConv->hasOneUse()) && + WillNotOverflowSignedAdd(LHSConv->getOperand(0), + RHSConv->getOperand(0))) { + // Insert the new integer add. + Instruction *NewAdd = BinaryOperator::CreateAdd(LHSConv->getOperand(0), + RHSConv->getOperand(0), + "addconv"); + InsertNewInstBefore(NewAdd, I); + return new SExtInst(NewAdd, I.getType()); + } + } + } + + // Check for (add double (sitofp x), y), see if we can merge this into an + // integer add followed by a promotion. + if (SIToFPInst *LHSConv = dyn_cast(LHS)) { + // (add double (sitofp x), fpcst) --> (sitofp (add int x, intcst)) + // ... if the constant fits in the integer value. This is useful for things + // like (double)(x & 1234) + 4.0 -> (double)((X & 1234)+4) which no longer + // requires a constant pool load, and generally allows the add to be better + // instcombined. + if (ConstantFP *CFP = dyn_cast(RHS)) { + Constant *CI = + ConstantExpr::getFPToSI(CFP, LHSConv->getOperand(0)->getType()); + if (LHSConv->hasOneUse() && + ConstantExpr::getSIToFP(CI, I.getType()) == CFP && + WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) { + // Insert the new integer add. + Instruction *NewAdd = BinaryOperator::CreateAdd(LHSConv->getOperand(0), + CI, "addconv"); + InsertNewInstBefore(NewAdd, I); + return new SIToFPInst(NewAdd, I.getType()); + } + } + + // (add double (sitofp x), (sitofp y)) --> (sitofp (add int x, y)) + if (SIToFPInst *RHSConv = dyn_cast(RHS)) { + // Only do this if x/y have the same type, if at last one of them has a + // single use (so we don't increase the number of int->fp conversions), + // and if the integer add will not overflow. + if (LHSConv->getOperand(0)->getType()==RHSConv->getOperand(0)->getType()&& + (LHSConv->hasOneUse() || RHSConv->hasOneUse()) && + WillNotOverflowSignedAdd(LHSConv->getOperand(0), + RHSConv->getOperand(0))) { + // Insert the new integer add. + Instruction *NewAdd = BinaryOperator::CreateAdd(LHSConv->getOperand(0), + RHSConv->getOperand(0), + "addconv"); + InsertNewInstBefore(NewAdd, I); + return new SIToFPInst(NewAdd, I.getType()); + } + } + } + + return Changed ? &I : 0; +} + +Instruction *InstCombiner::visitSub(BinaryOperator &I) { + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + + if (Op0 == Op1 && // sub X, X -> 0 + !I.getType()->isFPOrFPVector()) + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); + + // If this is a 'B = x-(-A)', change to B = x+A... + if (Value *V = dyn_castNegVal(Op1)) + return BinaryOperator::CreateAdd(Op0, V); + + if (isa(Op0)) + return ReplaceInstUsesWith(I, Op0); // undef - X -> undef + if (isa(Op1)) + return ReplaceInstUsesWith(I, Op1); // X - undef -> undef + + if (ConstantInt *C = dyn_cast(Op0)) { + // Replace (-1 - A) with (~A)... + if (C->isAllOnesValue()) + return BinaryOperator::CreateNot(Op1); + + // C - ~X == X + (1+C) + Value *X = 0; + if (match(Op1, m_Not(m_Value(X)))) + return BinaryOperator::CreateAdd(X, AddOne(C)); + + // -(X >>u 31) -> (X >>s 31) + // -(X >>s 31) -> (X >>u 31) + if (C->isZero()) { + if (BinaryOperator *SI = dyn_cast(Op1)) { + if (SI->getOpcode() == Instruction::LShr) { + if (ConstantInt *CU = dyn_cast(SI->getOperand(1))) { + // Check to see if we are shifting out everything but the sign bit. + if (CU->getLimitedValue(SI->getType()->getPrimitiveSizeInBits()) == + SI->getType()->getPrimitiveSizeInBits()-1) { + // Ok, the transformation is safe. Insert AShr. + return BinaryOperator::Create(Instruction::AShr, + SI->getOperand(0), CU, SI->getName()); + } + } + } + else if (SI->getOpcode() == Instruction::AShr) { + if (ConstantInt *CU = dyn_cast(SI->getOperand(1))) { + // Check to see if we are shifting out everything but the sign bit. + if (CU->getLimitedValue(SI->getType()->getPrimitiveSizeInBits()) == + SI->getType()->getPrimitiveSizeInBits()-1) { + // Ok, the transformation is safe. Insert LShr. + return BinaryOperator::CreateLShr( + SI->getOperand(0), CU, SI->getName()); + } + } + } + } + } + + // Try to fold constant sub into select arguments. + if (SelectInst *SI = dyn_cast(Op1)) + if (Instruction *R = FoldOpIntoSelect(I, SI, this)) + return R; + } + + if (I.getType() == Type::Int1Ty) + return BinaryOperator::CreateXor(Op0, Op1); + + if (BinaryOperator *Op1I = dyn_cast(Op1)) { + if (Op1I->getOpcode() == Instruction::Add && + !Op0->getType()->isFPOrFPVector()) { + if (Op1I->getOperand(0) == Op0) // X-(X+Y) == -Y + return BinaryOperator::CreateNeg(Op1I->getOperand(1), I.getName()); + else if (Op1I->getOperand(1) == Op0) // X-(Y+X) == -Y + return BinaryOperator::CreateNeg(Op1I->getOperand(0), I.getName()); + else if (ConstantInt *CI1 = dyn_cast(I.getOperand(0))) { + if (ConstantInt *CI2 = dyn_cast(Op1I->getOperand(1))) + // C1-(X+C2) --> (C1-C2)-X + return BinaryOperator::CreateSub(Subtract(CI1, CI2), + Op1I->getOperand(0)); + } + } + + if (Op1I->hasOneUse()) { + // Replace (x - (y - z)) with (x + (z - y)) if the (y - z) subexpression + // is not used by anyone else... + // + if (Op1I->getOpcode() == Instruction::Sub && + !Op1I->getType()->isFPOrFPVector()) { + // Swap the two operands of the subexpr... + Value *IIOp0 = Op1I->getOperand(0), *IIOp1 = Op1I->getOperand(1); + Op1I->setOperand(0, IIOp1); + Op1I->setOperand(1, IIOp0); + + // Create the new top level add instruction... + return BinaryOperator::CreateAdd(Op0, Op1); + } + + // Replace (A - (A & B)) with (A & ~B) if this is the only use of (A&B)... + // + if (Op1I->getOpcode() == Instruction::And && + (Op1I->getOperand(0) == Op0 || Op1I->getOperand(1) == Op0)) { + Value *OtherOp = Op1I->getOperand(Op1I->getOperand(0) == Op0); + + Value *NewNot = + InsertNewInstBefore(BinaryOperator::CreateNot(OtherOp, "B.not"), I); + return BinaryOperator::CreateAnd(Op0, NewNot); + } + + // 0 - (X sdiv C) -> (X sdiv -C) + if (Op1I->getOpcode() == Instruction::SDiv) + if (ConstantInt *CSI = dyn_cast(Op0)) + if (CSI->isZero()) + if (Constant *DivRHS = dyn_cast(Op1I->getOperand(1))) + return BinaryOperator::CreateSDiv(Op1I->getOperand(0), + ConstantExpr::getNeg(DivRHS)); + + // X - X*C --> X * (1-C) + ConstantInt *C2 = 0; + if (dyn_castFoldableMul(Op1I, C2) == Op0) { + Constant *CP1 = Subtract(ConstantInt::get(I.getType(), 1), C2); + return BinaryOperator::CreateMul(Op0, CP1); + } + } + } + + if (!Op0->getType()->isFPOrFPVector()) + if (BinaryOperator *Op0I = dyn_cast(Op0)) { + if (Op0I->getOpcode() == Instruction::Add) { + if (Op0I->getOperand(0) == Op1) // (Y+X)-Y == X + return ReplaceInstUsesWith(I, Op0I->getOperand(1)); + else if (Op0I->getOperand(1) == Op1) // (X+Y)-Y == X + return ReplaceInstUsesWith(I, Op0I->getOperand(0)); + } else if (Op0I->getOpcode() == Instruction::Sub) { + if (Op0I->getOperand(0) == Op1) // (X-Y)-X == -Y + return BinaryOperator::CreateNeg(Op0I->getOperand(1), I.getName()); + } + } + + ConstantInt *C1; + if (Value *X = dyn_castFoldableMul(Op0, C1)) { + if (X == Op1) // X*C - X --> X * (C-1) + return BinaryOperator::CreateMul(Op1, SubOne(C1)); + + ConstantInt *C2; // X*C1 - X*C2 -> X * (C1-C2) + if (X == dyn_castFoldableMul(Op1, C2)) + return BinaryOperator::CreateMul(X, Subtract(C1, C2)); + } + return 0; +} + +/// isSignBitCheck - Given an exploded icmp instruction, return true if the +/// comparison only checks the sign bit. If it only checks the sign bit, set +/// TrueIfSigned if the result of the comparison is true when the input value is +/// signed. +static bool isSignBitCheck(ICmpInst::Predicate pred, ConstantInt *RHS, + bool &TrueIfSigned) { + switch (pred) { + case ICmpInst::ICMP_SLT: // True if LHS s< 0 + TrueIfSigned = true; + return RHS->isZero(); + case ICmpInst::ICMP_SLE: // True if LHS s<= RHS and RHS == -1 + TrueIfSigned = true; + return RHS->isAllOnesValue(); + case ICmpInst::ICMP_SGT: // True if LHS s> -1 + TrueIfSigned = false; + return RHS->isAllOnesValue(); + case ICmpInst::ICMP_UGT: + // True if LHS u> RHS and RHS == high-bit-mask - 1 + TrueIfSigned = true; + return RHS->getValue() == + APInt::getSignedMaxValue(RHS->getType()->getPrimitiveSizeInBits()); + case ICmpInst::ICMP_UGE: + // True if LHS u>= RHS and RHS == high-bit-mask (2^7, 2^15, 2^31, etc) + TrueIfSigned = true; + return RHS->getValue().isSignBit(); + default: + return false; + } +} + +Instruction *InstCombiner::visitMul(BinaryOperator &I) { + bool Changed = SimplifyCommutative(I); + Value *Op0 = I.getOperand(0); + + if (isa(I.getOperand(1))) // undef * X -> 0 + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); + + // Simplify mul instructions with a constant RHS... + if (Constant *Op1 = dyn_cast(I.getOperand(1))) { + if (ConstantInt *CI = dyn_cast(Op1)) { + + // ((X << C1)*C2) == (X * (C2 << C1)) + if (BinaryOperator *SI = dyn_cast(Op0)) + if (SI->getOpcode() == Instruction::Shl) + if (Constant *ShOp = dyn_cast(SI->getOperand(1))) + return BinaryOperator::CreateMul(SI->getOperand(0), + ConstantExpr::getShl(CI, ShOp)); + + if (CI->isZero()) + return ReplaceInstUsesWith(I, Op1); // X * 0 == 0 + if (CI->equalsInt(1)) // X * 1 == X + return ReplaceInstUsesWith(I, Op0); + if (CI->isAllOnesValue()) // X * -1 == 0 - X + return BinaryOperator::CreateNeg(Op0, I.getName()); + + const APInt& Val = cast(CI)->getValue(); + if (Val.isPowerOf2()) { // Replace X*(2^C) with X << C + return BinaryOperator::CreateShl(Op0, + ConstantInt::get(Op0->getType(), Val.logBase2())); + } + } else if (ConstantFP *Op1F = dyn_cast(Op1)) { + if (Op1F->isNullValue()) + return ReplaceInstUsesWith(I, Op1); + + // "In IEEE floating point, x*1 is not equivalent to x for nans. However, + // ANSI says we can drop signals, so we can do this anyway." (from GCC) + if (Op1F->isExactlyValue(1.0)) + return ReplaceInstUsesWith(I, Op0); // Eliminate 'mul double %X, 1.0' + } else if (isa(Op1->getType())) { + if (isa(Op1)) + return ReplaceInstUsesWith(I, Op1); + + if (ConstantVector *Op1V = dyn_cast(Op1)) { + if (Op1V->isAllOnesValue()) // X * -1 == 0 - X + return BinaryOperator::CreateNeg(Op0, I.getName()); + + // As above, vector X*splat(1.0) -> X in all defined cases. + if (Constant *Splat = Op1V->getSplatValue()) { + if (ConstantFP *F = dyn_cast(Splat)) + if (F->isExactlyValue(1.0)) + return ReplaceInstUsesWith(I, Op0); + if (ConstantInt *CI = dyn_cast(Splat)) + if (CI->equalsInt(1)) + return ReplaceInstUsesWith(I, Op0); + } + } + } + + if (BinaryOperator *Op0I = dyn_cast(Op0)) + if (Op0I->getOpcode() == Instruction::Add && Op0I->hasOneUse() && + isa(Op0I->getOperand(1)) && isa(Op1)) { + // Canonicalize (X+C1)*C2 -> X*C2+C1*C2. + Instruction *Add = BinaryOperator::CreateMul(Op0I->getOperand(0), + Op1, "tmp"); + InsertNewInstBefore(Add, I); + Value *C1C2 = ConstantExpr::getMul(Op1, + cast(Op0I->getOperand(1))); + return BinaryOperator::CreateAdd(Add, C1C2); + + } + + // Try to fold constant mul into select arguments. + if (SelectInst *SI = dyn_cast(Op0)) + if (Instruction *R = FoldOpIntoSelect(I, SI, this)) + return R; + + if (isa(Op0)) + if (Instruction *NV = FoldOpIntoPhi(I)) + return NV; + } + + if (Value *Op0v = dyn_castNegVal(Op0)) // -X * -Y = X*Y + if (Value *Op1v = dyn_castNegVal(I.getOperand(1))) + return BinaryOperator::CreateMul(Op0v, Op1v); + + // (X / Y) * Y = X - (X % Y) + // (X / Y) * -Y = (X % Y) - X + { + Value *Op1 = I.getOperand(1); + BinaryOperator *BO = dyn_cast(Op0); + if (!BO || + (BO->getOpcode() != Instruction::UDiv && + BO->getOpcode() != Instruction::SDiv)) { + Op1 = Op0; + BO = dyn_cast(I.getOperand(1)); + } + Value *Neg = dyn_castNegVal(Op1); + if (BO && BO->hasOneUse() && + (BO->getOperand(1) == Op1 || BO->getOperand(1) == Neg) && + (BO->getOpcode() == Instruction::UDiv || + BO->getOpcode() == Instruction::SDiv)) { + Value *Op0BO = BO->getOperand(0), *Op1BO = BO->getOperand(1); + + Instruction *Rem; + if (BO->getOpcode() == Instruction::UDiv) + Rem = BinaryOperator::CreateURem(Op0BO, Op1BO); + else + Rem = BinaryOperator::CreateSRem(Op0BO, Op1BO); + + InsertNewInstBefore(Rem, I); + Rem->takeName(BO); + + if (Op1BO == Op1) + return BinaryOperator::CreateSub(Op0BO, Rem); + else + return BinaryOperator::CreateSub(Rem, Op0BO); + } + } + + if (I.getType() == Type::Int1Ty) + return BinaryOperator::CreateAnd(Op0, I.getOperand(1)); + + // If one of the operands of the multiply is a cast from a boolean value, then + // we know the bool is either zero or one, so this is a 'masking' multiply. + // See if we can simplify things based on how the boolean was originally + // formed. + CastInst *BoolCast = 0; + if (ZExtInst *CI = dyn_cast(Op0)) + if (CI->getOperand(0)->getType() == Type::Int1Ty) + BoolCast = CI; + if (!BoolCast) + if (ZExtInst *CI = dyn_cast(I.getOperand(1))) + if (CI->getOperand(0)->getType() == Type::Int1Ty) + BoolCast = CI; + if (BoolCast) { + if (ICmpInst *SCI = dyn_cast(BoolCast->getOperand(0))) { + Value *SCIOp0 = SCI->getOperand(0), *SCIOp1 = SCI->getOperand(1); + const Type *SCOpTy = SCIOp0->getType(); + bool TIS = false; + + // If the icmp is true iff the sign bit of X is set, then convert this + // multiply into a shift/and combination. + if (isa(SCIOp1) && + isSignBitCheck(SCI->getPredicate(), cast(SCIOp1), TIS) && + TIS) { + // Shift the X value right to turn it into "all signbits". + Constant *Amt = ConstantInt::get(SCIOp0->getType(), + SCOpTy->getPrimitiveSizeInBits()-1); + Value *V = + InsertNewInstBefore( + BinaryOperator::Create(Instruction::AShr, SCIOp0, Amt, + BoolCast->getOperand(0)->getName()+ + ".mask"), I); + + // If the multiply type is not the same as the source type, sign extend + // or truncate to the multiply type. + if (I.getType() != V->getType()) { + uint32_t SrcBits = V->getType()->getPrimitiveSizeInBits(); + uint32_t DstBits = I.getType()->getPrimitiveSizeInBits(); + Instruction::CastOps opcode = + (SrcBits == DstBits ? Instruction::BitCast : + (SrcBits < DstBits ? Instruction::SExt : Instruction::Trunc)); + V = InsertCastBefore(opcode, V, I.getType(), I); + } + + Value *OtherOp = Op0 == BoolCast ? I.getOperand(1) : Op0; + return BinaryOperator::CreateAnd(V, OtherOp); + } + } + } + + return Changed ? &I : 0; +} + +/// SimplifyDivRemOfSelect - Try to fold a divide or remainder of a select +/// instruction. +bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) { + SelectInst *SI = cast(I.getOperand(1)); + + // div/rem X, (Cond ? 0 : Y) -> div/rem X, Y + int NonNullOperand = -1; + if (Constant *ST = dyn_cast(SI->getOperand(1))) + if (ST->isNullValue()) + NonNullOperand = 2; + // div/rem X, (Cond ? Y : 0) -> div/rem X, Y + if (Constant *ST = dyn_cast(SI->getOperand(2))) + if (ST->isNullValue()) + NonNullOperand = 1; + + if (NonNullOperand == -1) + return false; + + Value *SelectCond = SI->getOperand(0); + + // Change the div/rem to use 'Y' instead of the select. + I.setOperand(1, SI->getOperand(NonNullOperand)); + + // Okay, we know we replace the operand of the div/rem with 'Y' with no + // problem. However, the select, or the condition of the select may have + // multiple uses. Based on our knowledge that the operand must be non-zero, + // propagate the known value for the select into other uses of it, and + // propagate a known value of the condition into its other users. + + // If the select and condition only have a single use, don't bother with this, + // early exit. + if (SI->use_empty() && SelectCond->hasOneUse()) + return true; + + // Scan the current block backward, looking for other uses of SI. + BasicBlock::iterator BBI = &I, BBFront = I.getParent()->begin(); + + while (BBI != BBFront) { + --BBI; + // If we found a call to a function, we can't assume it will return, so + // information from below it cannot be propagated above it. + if (isa(BBI) && !isa(BBI)) + break; + + // Replace uses of the select or its condition with the known values. + for (Instruction::op_iterator I = BBI->op_begin(), E = BBI->op_end(); + I != E; ++I) { + if (*I == SI) { + *I = SI->getOperand(NonNullOperand); + AddToWorkList(BBI); + } else if (*I == SelectCond) { + *I = NonNullOperand == 1 ? ConstantInt::getTrue() : + ConstantInt::getFalse(); + AddToWorkList(BBI); + } + } + + // If we past the instruction, quit looking for it. + if (&*BBI == SI) + SI = 0; + if (&*BBI == SelectCond) + SelectCond = 0; + + // If we ran out of things to eliminate, break out of the loop. + if (SelectCond == 0 && SI == 0) + break; + + } + return true; +} + + +/// This function implements the transforms on div instructions that work +/// regardless of the kind of div instruction it is (udiv, sdiv, or fdiv). It is +/// used by the visitors to those instructions. +/// @brief Transforms common to all three div instructions +Instruction *InstCombiner::commonDivTransforms(BinaryOperator &I) { + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + + // undef / X -> 0 for integer. + // undef / X -> undef for FP (the undef could be a snan). + if (isa(Op0)) { + if (Op0->getType()->isFPOrFPVector()) + return ReplaceInstUsesWith(I, Op0); + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); + } + + // X / undef -> undef + if (isa(Op1)) + return ReplaceInstUsesWith(I, Op1); + + return 0; +} + +/// This function implements the transforms common to both integer division +/// instructions (udiv and sdiv). It is called by the visitors to those integer +/// division instructions. +/// @brief Common integer divide transforms +Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) { + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + + // (sdiv X, X) --> 1 (udiv X, X) --> 1 + if (Op0 == Op1) { + if (const VectorType *Ty = dyn_cast(I.getType())) { + ConstantInt *CI = ConstantInt::get(Ty->getElementType(), 1); + std::vector Elts(Ty->getNumElements(), CI); + return ReplaceInstUsesWith(I, ConstantVector::get(Elts)); + } + + ConstantInt *CI = ConstantInt::get(I.getType(), 1); + return ReplaceInstUsesWith(I, CI); + } + + if (Instruction *Common = commonDivTransforms(I)) + return Common; + + // Handle cases involving: [su]div X, (select Cond, Y, Z) + // This does not apply for fdiv. + if (isa(Op1) && SimplifyDivRemOfSelect(I)) + return &I; + + if (ConstantInt *RHS = dyn_cast(Op1)) { + // div X, 1 == X + if (RHS->equalsInt(1)) + return ReplaceInstUsesWith(I, Op0); + + // (X / C1) / C2 -> X / (C1*C2) + if (Instruction *LHS = dyn_cast(Op0)) + if (Instruction::BinaryOps(LHS->getOpcode()) == I.getOpcode()) + if (ConstantInt *LHSRHS = dyn_cast(LHS->getOperand(1))) { + if (MultiplyOverflows(RHS, LHSRHS, I.getOpcode()==Instruction::SDiv)) + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); + else + return BinaryOperator::Create(I.getOpcode(), LHS->getOperand(0), + Multiply(RHS, LHSRHS)); + } + + if (!RHS->isZero()) { // avoid X udiv 0 + if (SelectInst *SI = dyn_cast(Op0)) + if (Instruction *R = FoldOpIntoSelect(I, SI, this)) + return R; + if (isa(Op0)) + if (Instruction *NV = FoldOpIntoPhi(I)) + return NV; + } + } + + // 0 / X == 0, we don't need to preserve faults! + if (ConstantInt *LHS = dyn_cast(Op0)) + if (LHS->equalsInt(0)) + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); + + // It can't be division by zero, hence it must be division by one. + if (I.getType() == Type::Int1Ty) + return ReplaceInstUsesWith(I, Op0); + + if (ConstantVector *Op1V = dyn_cast(Op1)) { + if (ConstantInt *X = cast_or_null(Op1V->getSplatValue())) + // div X, 1 == X + if (X->isOne()) + return ReplaceInstUsesWith(I, Op0); + } + + return 0; +} + +Instruction *InstCombiner::visitUDiv(BinaryOperator &I) { + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + + // Handle the integer div common cases + if (Instruction *Common = commonIDivTransforms(I)) + return Common; + + if (ConstantInt *C = dyn_cast(Op1)) { + // X udiv C^2 -> X >> C + // Check to see if this is an unsigned division with an exact power of 2, + // if so, convert to a right shift. + if (C->getValue().isPowerOf2()) // 0 not included in isPowerOf2 + return BinaryOperator::CreateLShr(Op0, + ConstantInt::get(Op0->getType(), C->getValue().logBase2())); + + // X udiv C, where C >= signbit + if (C->getValue().isNegative()) { + Value *IC = InsertNewInstBefore(new ICmpInst(ICmpInst::ICMP_ULT, Op0, C), + I); + return SelectInst::Create(IC, Constant::getNullValue(I.getType()), + ConstantInt::get(I.getType(), 1)); + } + } + + // X udiv (C1 << N), where C1 is "1< X >> (N+C2) + if (BinaryOperator *RHSI = dyn_cast(I.getOperand(1))) { + if (RHSI->getOpcode() == Instruction::Shl && + isa(RHSI->getOperand(0))) { + const APInt& C1 = cast(RHSI->getOperand(0))->getValue(); + if (C1.isPowerOf2()) { + Value *N = RHSI->getOperand(1); + const Type *NTy = N->getType(); + if (uint32_t C2 = C1.logBase2()) { + Constant *C2V = ConstantInt::get(NTy, C2); + N = InsertNewInstBefore(BinaryOperator::CreateAdd(N, C2V, "tmp"), I); + } + return BinaryOperator::CreateLShr(Op0, N); + } + } + } + + // udiv X, (Select Cond, C1, C2) --> Select Cond, (shr X, C1), (shr X, C2) + // where C1&C2 are powers of two. + if (SelectInst *SI = dyn_cast(Op1)) + if (ConstantInt *STO = dyn_cast(SI->getOperand(1))) + if (ConstantInt *SFO = dyn_cast(SI->getOperand(2))) { + const APInt &TVA = STO->getValue(), &FVA = SFO->getValue(); + if (TVA.isPowerOf2() && FVA.isPowerOf2()) { + // Compute the shift amounts + uint32_t TSA = TVA.logBase2(), FSA = FVA.logBase2(); + // Construct the "on true" case of the select + Constant *TC = ConstantInt::get(Op0->getType(), TSA); + Instruction *TSI = BinaryOperator::CreateLShr( + Op0, TC, SI->getName()+".t"); + TSI = InsertNewInstBefore(TSI, I); + + // Construct the "on false" case of the select + Constant *FC = ConstantInt::get(Op0->getType(), FSA); + Instruction *FSI = BinaryOperator::CreateLShr( + Op0, FC, SI->getName()+".f"); + FSI = InsertNewInstBefore(FSI, I); + + // construct the select instruction and return it. + return SelectInst::Create(SI->getOperand(0), TSI, FSI, SI->getName()); + } + } + return 0; +} + +Instruction *InstCombiner::visitSDiv(BinaryOperator &I) { + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + + // Handle the integer div common cases + if (Instruction *Common = commonIDivTransforms(I)) + return Common; + + if (ConstantInt *RHS = dyn_cast(Op1)) { + // sdiv X, -1 == -X + if (RHS->isAllOnesValue()) + return BinaryOperator::CreateNeg(Op0); + } + + // If the sign bits of both operands are zero (i.e. we can prove they are + // unsigned inputs), turn this into a udiv. + if (I.getType()->isInteger()) { + APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits())); + if (MaskedValueIsZero(Op1, Mask) && MaskedValueIsZero(Op0, Mask)) { + // X sdiv Y -> X udiv Y, iff X and Y don't have sign bit set + return BinaryOperator::CreateUDiv(Op0, Op1, I.getName()); + } + } + + return 0; +} + +Instruction *InstCombiner::visitFDiv(BinaryOperator &I) { + return commonDivTransforms(I); +} + +/// This function implements the transforms on rem instructions that work +/// regardless of the kind of rem instruction it is (urem, srem, or frem). It +/// is used by the visitors to those instructions. +/// @brief Transforms common to all three rem instructions +Instruction *InstCombiner::commonRemTransforms(BinaryOperator &I) { + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + + if (isa(Op0)) { // undef % X -> 0 + if (I.getType()->isFPOrFPVector()) + return ReplaceInstUsesWith(I, Op0); // X % undef -> undef (could be SNaN) + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); + } + if (isa(Op1)) + return ReplaceInstUsesWith(I, Op1); // X % undef -> undef + + // Handle cases involving: rem X, (select Cond, Y, Z) + if (isa(Op1) && SimplifyDivRemOfSelect(I)) + return &I; + + return 0; +} + +/// This function implements the transforms common to both integer remainder +/// instructions (urem and srem). It is called by the visitors to those integer +/// remainder instructions. +/// @brief Common integer remainder transforms +Instruction *InstCombiner::commonIRemTransforms(BinaryOperator &I) { + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + + if (Instruction *common = commonRemTransforms(I)) + return common; + + // 0 % X == 0 for integer, we don't need to preserve faults! + if (Constant *LHS = dyn_cast(Op0)) + if (LHS->isNullValue()) + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); + + if (ConstantInt *RHS = dyn_cast(Op1)) { + // X % 0 == undef, we don't need to preserve faults! + if (RHS->equalsInt(0)) + return ReplaceInstUsesWith(I, UndefValue::get(I.getType())); + + if (RHS->equalsInt(1)) // X % 1 == 0 + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); + + if (Instruction *Op0I = dyn_cast(Op0)) { + if (SelectInst *SI = dyn_cast(Op0I)) { + if (Instruction *R = FoldOpIntoSelect(I, SI, this)) + return R; + } else if (isa(Op0I)) { + if (Instruction *NV = FoldOpIntoPhi(I)) + return NV; + } + + // See if we can fold away this rem instruction. + if (SimplifyDemandedInstructionBits(I)) + return &I; + } + } + + return 0; +} + +Instruction *InstCombiner::visitURem(BinaryOperator &I) { + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + + if (Instruction *common = commonIRemTransforms(I)) + return common; + + if (ConstantInt *RHS = dyn_cast(Op1)) { + // X urem C^2 -> X and C + // Check to see if this is an unsigned remainder with an exact power of 2, + // if so, convert to a bitwise and. + if (ConstantInt *C = dyn_cast(RHS)) + if (C->getValue().isPowerOf2()) + return BinaryOperator::CreateAnd(Op0, SubOne(C)); + } + + if (Instruction *RHSI = dyn_cast(I.getOperand(1))) { + // Turn A % (C << N), where C is 2^k, into A & ((C << N)-1) + if (RHSI->getOpcode() == Instruction::Shl && + isa(RHSI->getOperand(0))) { + if (cast(RHSI->getOperand(0))->getValue().isPowerOf2()) { + Constant *N1 = ConstantInt::getAllOnesValue(I.getType()); + Value *Add = InsertNewInstBefore(BinaryOperator::CreateAdd(RHSI, N1, + "tmp"), I); + return BinaryOperator::CreateAnd(Op0, Add); + } + } + } + + // urem X, (select Cond, 2^C1, 2^C2) --> select Cond, (and X, C1), (and X, C2) + // where C1&C2 are powers of two. + if (SelectInst *SI = dyn_cast(Op1)) { + if (ConstantInt *STO = dyn_cast(SI->getOperand(1))) + if (ConstantInt *SFO = dyn_cast(SI->getOperand(2))) { + // STO == 0 and SFO == 0 handled above. + if ((STO->getValue().isPowerOf2()) && + (SFO->getValue().isPowerOf2())) { + Value *TrueAnd = InsertNewInstBefore( + BinaryOperator::CreateAnd(Op0, SubOne(STO), SI->getName()+".t"), I); + Value *FalseAnd = InsertNewInstBefore( + BinaryOperator::CreateAnd(Op0, SubOne(SFO), SI->getName()+".f"), I); + return SelectInst::Create(SI->getOperand(0), TrueAnd, FalseAnd); + } + } + } + + return 0; +} + +Instruction *InstCombiner::visitSRem(BinaryOperator &I) { + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + + // Handle the integer rem common cases + if (Instruction *common = commonIRemTransforms(I)) + return common; + + if (Value *RHSNeg = dyn_castNegVal(Op1)) + if (!isa(RHSNeg) || + (isa(RHSNeg) && + cast(RHSNeg)->getValue().isStrictlyPositive())) { + // X % -Y -> X % Y + AddUsesToWorkList(I); + I.setOperand(1, RHSNeg); + return &I; + } + + // If the sign bits of both operands are zero (i.e. we can prove they are + // unsigned inputs), turn this into a urem. + if (I.getType()->isInteger()) { + APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits())); + if (MaskedValueIsZero(Op1, Mask) && MaskedValueIsZero(Op0, Mask)) { + // X srem Y -> X urem Y, iff X and Y don't have sign bit set + return BinaryOperator::CreateURem(Op0, Op1, I.getName()); + } + } + + // If it's a constant vector, flip any negative values positive. + if (ConstantVector *RHSV = dyn_cast(Op1)) { + unsigned VWidth = RHSV->getNumOperands(); + + bool hasNegative = false; + for (unsigned i = 0; !hasNegative && i != VWidth; ++i) + if (ConstantInt *RHS = dyn_cast(RHSV->getOperand(i))) + if (RHS->getValue().isNegative()) + hasNegative = true; + + if (hasNegative) { + std::vector Elts(VWidth); + for (unsigned i = 0; i != VWidth; ++i) { + if (ConstantInt *RHS = dyn_cast(RHSV->getOperand(i))) { + if (RHS->getValue().isNegative()) + Elts[i] = cast(ConstantExpr::getNeg(RHS)); + else + Elts[i] = RHS; + } + } + + Constant *NewRHSV = ConstantVector::get(Elts); + if (NewRHSV != RHSV) { + AddUsesToWorkList(I); + I.setOperand(1, NewRHSV); + return &I; + } + } + } + + return 0; +} + +Instruction *InstCombiner::visitFRem(BinaryOperator &I) { + return commonRemTransforms(I); +} + +// isOneBitSet - Return true if there is exactly one bit set in the specified +// constant. +static bool isOneBitSet(const ConstantInt *CI) { + return CI->getValue().isPowerOf2(); +} + +// isHighOnes - Return true if the constant is of the form 1+0+. +// This is the same as lowones(~X). +static bool isHighOnes(const ConstantInt *CI) { + return (~CI->getValue() + 1).isPowerOf2(); +} + +/// getICmpCode - Encode a icmp predicate into a three bit mask. These bits +/// are carefully arranged to allow folding of expressions such as: +/// +/// (A < B) | (A > B) --> (A != B) +/// +/// Note that this is only valid if the first and second predicates have the +/// same sign. Is illegal to do: (A u< B) | (A s> B) +/// +/// Three bits are used to represent the condition, as follows: +/// 0 A > B +/// 1 A == B +/// 2 A < B +/// +/// <=> Value Definition +/// 000 0 Always false +/// 001 1 A > B +/// 010 2 A == B +/// 011 3 A >= B +/// 100 4 A < B +/// 101 5 A != B +/// 110 6 A <= B +/// 111 7 Always true +/// +static unsigned getICmpCode(const ICmpInst *ICI) { + switch (ICI->getPredicate()) { + // False -> 0 + case ICmpInst::ICMP_UGT: return 1; // 001 + case ICmpInst::ICMP_SGT: return 1; // 001 + case ICmpInst::ICMP_EQ: return 2; // 010 + case ICmpInst::ICMP_UGE: return 3; // 011 + case ICmpInst::ICMP_SGE: return 3; // 011 + case ICmpInst::ICMP_ULT: return 4; // 100 + case ICmpInst::ICMP_SLT: return 4; // 100 + case ICmpInst::ICMP_NE: return 5; // 101 + case ICmpInst::ICMP_ULE: return 6; // 110 + case ICmpInst::ICMP_SLE: return 6; // 110 + // True -> 7 + default: + assert(0 && "Invalid ICmp predicate!"); + return 0; + } +} + +/// getFCmpCode - Similar to getICmpCode but for FCmpInst. This encodes a fcmp +/// predicate into a three bit mask. It also returns whether it is an ordered +/// predicate by reference. +static unsigned getFCmpCode(FCmpInst::Predicate CC, bool &isOrdered) { + isOrdered = false; + switch (CC) { + case FCmpInst::FCMP_ORD: isOrdered = true; return 0; // 000 + case FCmpInst::FCMP_UNO: return 0; // 000 + case FCmpInst::FCMP_OGT: isOrdered = true; return 1; // 001 + case FCmpInst::FCMP_UGT: return 1; // 001 + case FCmpInst::FCMP_OEQ: isOrdered = true; return 2; // 010 + case FCmpInst::FCMP_UEQ: return 2; // 010 + case FCmpInst::FCMP_OGE: isOrdered = true; return 3; // 011 + case FCmpInst::FCMP_UGE: return 3; // 011 + case FCmpInst::FCMP_OLT: isOrdered = true; return 4; // 100 + case FCmpInst::FCMP_ULT: return 4; // 100 + case FCmpInst::FCMP_ONE: isOrdered = true; return 5; // 101 + case FCmpInst::FCMP_UNE: return 5; // 101 + case FCmpInst::FCMP_OLE: isOrdered = true; return 6; // 110 + case FCmpInst::FCMP_ULE: return 6; // 110 + // True -> 7 + default: + // Not expecting FCMP_FALSE and FCMP_TRUE; + assert(0 && "Unexpected FCmp predicate!"); + return 0; + } +} + +/// getICmpValue - This is the complement of getICmpCode, which turns an +/// opcode and two operands into either a constant true or false, or a brand +/// new ICmp instruction. The sign is passed in to determine which kind +/// of predicate to use in the new icmp instruction. +static Value *getICmpValue(bool sign, unsigned code, Value *LHS, Value *RHS) { + switch (code) { + default: assert(0 && "Illegal ICmp code!"); + case 0: return ConstantInt::getFalse(); + case 1: + if (sign) + return new ICmpInst(ICmpInst::ICMP_SGT, LHS, RHS); + else + return new ICmpInst(ICmpInst::ICMP_UGT, LHS, RHS); + case 2: return new ICmpInst(ICmpInst::ICMP_EQ, LHS, RHS); + case 3: + if (sign) + return new ICmpInst(ICmpInst::ICMP_SGE, LHS, RHS); + else + return new ICmpInst(ICmpInst::ICMP_UGE, LHS, RHS); + case 4: + if (sign) + return new ICmpInst(ICmpInst::ICMP_SLT, LHS, RHS); + else + return new ICmpInst(ICmpInst::ICMP_ULT, LHS, RHS); + case 5: return new ICmpInst(ICmpInst::ICMP_NE, LHS, RHS); + case 6: + if (sign) + return new ICmpInst(ICmpInst::ICMP_SLE, LHS, RHS); + else + return new ICmpInst(ICmpInst::ICMP_ULE, LHS, RHS); + case 7: return ConstantInt::getTrue(); + } +} + +/// getFCmpValue - This is the complement of getFCmpCode, which turns an +/// opcode and two operands into either a FCmp instruction. isordered is passed +/// in to determine which kind of predicate to use in the new fcmp instruction. +static Value *getFCmpValue(bool isordered, unsigned code, + Value *LHS, Value *RHS) { + switch (code) { + default: assert(0 && "Illegal FCmp code!"); + case 0: + if (isordered) + return new FCmpInst(FCmpInst::FCMP_ORD, LHS, RHS); + else + return new FCmpInst(FCmpInst::FCMP_UNO, LHS, RHS); + case 1: + if (isordered) + return new FCmpInst(FCmpInst::FCMP_OGT, LHS, RHS); + else + return new FCmpInst(FCmpInst::FCMP_UGT, LHS, RHS); + case 2: + if (isordered) + return new FCmpInst(FCmpInst::FCMP_OEQ, LHS, RHS); + else + return new FCmpInst(FCmpInst::FCMP_UEQ, LHS, RHS); + case 3: + if (isordered) + return new FCmpInst(FCmpInst::FCMP_OGE, LHS, RHS); + else + return new FCmpInst(FCmpInst::FCMP_UGE, LHS, RHS); + case 4: + if (isordered) + return new FCmpInst(FCmpInst::FCMP_OLT, LHS, RHS); + else + return new FCmpInst(FCmpInst::FCMP_ULT, LHS, RHS); + case 5: + if (isordered) + return new FCmpInst(FCmpInst::FCMP_ONE, LHS, RHS); + else + return new FCmpInst(FCmpInst::FCMP_UNE, LHS, RHS); + case 6: + if (isordered) + return new FCmpInst(FCmpInst::FCMP_OLE, LHS, RHS); + else + return new FCmpInst(FCmpInst::FCMP_ULE, LHS, RHS); + case 7: return ConstantInt::getTrue(); + } +} + +/// PredicatesFoldable - Return true if both predicates match sign or if at +/// least one of them is an equality comparison (which is signless). +static bool PredicatesFoldable(ICmpInst::Predicate p1, ICmpInst::Predicate p2) { + return (ICmpInst::isSignedPredicate(p1) == ICmpInst::isSignedPredicate(p2)) || + (ICmpInst::isSignedPredicate(p1) && ICmpInst::isEquality(p2)) || + (ICmpInst::isSignedPredicate(p2) && ICmpInst::isEquality(p1)); +} + +namespace { +// FoldICmpLogical - Implements (icmp1 A, B) & (icmp2 A, B) --> (icmp3 A, B) +struct FoldICmpLogical { + InstCombiner &IC; + Value *LHS, *RHS; + ICmpInst::Predicate pred; + FoldICmpLogical(InstCombiner &ic, ICmpInst *ICI) + : IC(ic), LHS(ICI->getOperand(0)), RHS(ICI->getOperand(1)), + pred(ICI->getPredicate()) {} + bool shouldApply(Value *V) const { + if (ICmpInst *ICI = dyn_cast(V)) + if (PredicatesFoldable(pred, ICI->getPredicate())) + return ((ICI->getOperand(0) == LHS && ICI->getOperand(1) == RHS) || + (ICI->getOperand(0) == RHS && ICI->getOperand(1) == LHS)); + return false; + } + Instruction *apply(Instruction &Log) const { + ICmpInst *ICI = cast(Log.getOperand(0)); + if (ICI->getOperand(0) != LHS) { + assert(ICI->getOperand(1) == LHS); + ICI->swapOperands(); // Swap the LHS and RHS of the ICmp + } + + ICmpInst *RHSICI = cast(Log.getOperand(1)); + unsigned LHSCode = getICmpCode(ICI); + unsigned RHSCode = getICmpCode(RHSICI); + unsigned Code; + switch (Log.getOpcode()) { + case Instruction::And: Code = LHSCode & RHSCode; break; + case Instruction::Or: Code = LHSCode | RHSCode; break; + case Instruction::Xor: Code = LHSCode ^ RHSCode; break; + default: assert(0 && "Illegal logical opcode!"); return 0; + } + + bool isSigned = ICmpInst::isSignedPredicate(RHSICI->getPredicate()) || + ICmpInst::isSignedPredicate(ICI->getPredicate()); + + Value *RV = getICmpValue(isSigned, Code, LHS, RHS); + if (Instruction *I = dyn_cast(RV)) + return I; + // Otherwise, it's a constant boolean value... + return IC.ReplaceInstUsesWith(Log, RV); + } +}; +} // end anonymous namespace + +// OptAndOp - This handles expressions of the form ((val OP C1) & C2). Where +// the Op parameter is 'OP', OpRHS is 'C1', and AndRHS is 'C2'. Op is +// guaranteed to be a binary operator. +Instruction *InstCombiner::OptAndOp(Instruction *Op, + ConstantInt *OpRHS, + ConstantInt *AndRHS, + BinaryOperator &TheAnd) { + Value *X = Op->getOperand(0); + Constant *Together = 0; + if (!Op->isShift()) + Together = And(AndRHS, OpRHS); + + switch (Op->getOpcode()) { + case Instruction::Xor: + if (Op->hasOneUse()) { + // (X ^ C1) & C2 --> (X & C2) ^ (C1&C2) + Instruction *And = BinaryOperator::CreateAnd(X, AndRHS); + InsertNewInstBefore(And, TheAnd); + And->takeName(Op); + return BinaryOperator::CreateXor(And, Together); + } + break; + case Instruction::Or: + if (Together == AndRHS) // (X | C) & C --> C + return ReplaceInstUsesWith(TheAnd, AndRHS); + + if (Op->hasOneUse() && Together != OpRHS) { + // (X | C1) & C2 --> (X | (C1&C2)) & C2 + Instruction *Or = BinaryOperator::CreateOr(X, Together); + InsertNewInstBefore(Or, TheAnd); + Or->takeName(Op); + return BinaryOperator::CreateAnd(Or, AndRHS); + } + break; + case Instruction::Add: + if (Op->hasOneUse()) { + // Adding a one to a single bit bit-field should be turned into an XOR + // of the bit. First thing to check is to see if this AND is with a + // single bit constant. + const APInt& AndRHSV = cast(AndRHS)->getValue(); + + // If there is only one bit set... + if (isOneBitSet(cast(AndRHS))) { + // Ok, at this point, we know that we are masking the result of the + // ADD down to exactly one bit. If the constant we are adding has + // no bits set below this bit, then we can eliminate the ADD. + const APInt& AddRHS = cast(OpRHS)->getValue(); + + // Check to see if any bits below the one bit set in AndRHSV are set. + if ((AddRHS & (AndRHSV-1)) == 0) { + // If not, the only thing that can effect the output of the AND is + // the bit specified by AndRHSV. If that bit is set, the effect of + // the XOR is to toggle the bit. If it is clear, then the ADD has + // no effect. + if ((AddRHS & AndRHSV) == 0) { // Bit is not set, noop + TheAnd.setOperand(0, X); + return &TheAnd; + } else { + // Pull the XOR out of the AND. + Instruction *NewAnd = BinaryOperator::CreateAnd(X, AndRHS); + InsertNewInstBefore(NewAnd, TheAnd); + NewAnd->takeName(Op); + return BinaryOperator::CreateXor(NewAnd, AndRHS); + } + } + } + } + break; + + case Instruction::Shl: { + // We know that the AND will not produce any of the bits shifted in, so if + // the anded constant includes them, clear them now! + // + uint32_t BitWidth = AndRHS->getType()->getBitWidth(); + uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth); + APInt ShlMask(APInt::getHighBitsSet(BitWidth, BitWidth-OpRHSVal)); + ConstantInt *CI = ConstantInt::get(AndRHS->getValue() & ShlMask); + + if (CI->getValue() == ShlMask) { + // Masking out bits that the shift already masks + return ReplaceInstUsesWith(TheAnd, Op); // No need for the and. + } else if (CI != AndRHS) { // Reducing bits set in and. + TheAnd.setOperand(1, CI); + return &TheAnd; + } + break; + } + case Instruction::LShr: + { + // We know that the AND will not produce any of the bits shifted in, so if + // the anded constant includes them, clear them now! This only applies to + // unsigned shifts, because a signed shr may bring in set bits! + // + uint32_t BitWidth = AndRHS->getType()->getBitWidth(); + uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth); + APInt ShrMask(APInt::getLowBitsSet(BitWidth, BitWidth - OpRHSVal)); + ConstantInt *CI = ConstantInt::get(AndRHS->getValue() & ShrMask); + + if (CI->getValue() == ShrMask) { + // Masking out bits that the shift already masks. + return ReplaceInstUsesWith(TheAnd, Op); + } else if (CI != AndRHS) { + TheAnd.setOperand(1, CI); // Reduce bits set in and cst. + return &TheAnd; + } + break; + } + case Instruction::AShr: + // Signed shr. + // See if this is shifting in some sign extension, then masking it out + // with an and. + if (Op->hasOneUse()) { + uint32_t BitWidth = AndRHS->getType()->getBitWidth(); + uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth); + APInt ShrMask(APInt::getLowBitsSet(BitWidth, BitWidth - OpRHSVal)); + Constant *C = ConstantInt::get(AndRHS->getValue() & ShrMask); + if (C == AndRHS) { // Masking out bits shifted in. + // (Val ashr C1) & C2 -> (Val lshr C1) & C2 + // Make the argument unsigned. + Value *ShVal = Op->getOperand(0); + ShVal = InsertNewInstBefore( + BinaryOperator::CreateLShr(ShVal, OpRHS, + Op->getName()), TheAnd); + return BinaryOperator::CreateAnd(ShVal, AndRHS, TheAnd.getName()); + } + } + break; + } + return 0; +} + + +/// InsertRangeTest - Emit a computation of: (V >= Lo && V < Hi) if Inside is +/// true, otherwise (V < Lo || V >= Hi). In pratice, we emit the more efficient +/// (V-Lo) (ConstantExpr::getICmp((isSigned ? + ICmpInst::ICMP_SLE:ICmpInst::ICMP_ULE), Lo, Hi))->getZExtValue() && + "Lo is not <= Hi in range emission code!"); + + if (Inside) { + if (Lo == Hi) // Trivially false. + return new ICmpInst(ICmpInst::ICMP_NE, V, V); + + // V >= Min && V < Hi --> V < Hi + if (cast(Lo)->isMinValue(isSigned)) { + ICmpInst::Predicate pred = (isSigned ? + ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT); + return new ICmpInst(pred, V, Hi); + } + + // Emit V-Lo getName()+".off"); + InsertNewInstBefore(Add, IB); + Constant *UpperBound = ConstantExpr::getAdd(NegLo, Hi); + return new ICmpInst(ICmpInst::ICMP_ULT, Add, UpperBound); + } + + if (Lo == Hi) // Trivially true. + return new ICmpInst(ICmpInst::ICMP_EQ, V, V); + + // V < Min || V >= Hi -> V > Hi-1 + Hi = SubOne(cast(Hi)); + if (cast(Lo)->isMinValue(isSigned)) { + ICmpInst::Predicate pred = (isSigned ? + ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT); + return new ICmpInst(pred, V, Hi); + } + + // Emit V-Lo >u Hi-1-Lo + // Note that Hi has already had one subtracted from it, above. + ConstantInt *NegLo = cast(ConstantExpr::getNeg(Lo)); + Instruction *Add = BinaryOperator::CreateAdd(V, NegLo, V->getName()+".off"); + InsertNewInstBefore(Add, IB); + Constant *LowerBound = ConstantExpr::getAdd(NegLo, Hi); + return new ICmpInst(ICmpInst::ICMP_UGT, Add, LowerBound); +} + +// isRunOfOnes - Returns true iff Val consists of one contiguous run of 1s with +// any number of 0s on either side. The 1s are allowed to wrap from LSB to +// MSB, so 0x000FFF0, 0x0000FFFF, and 0xFF0000FF are all runs. 0x0F0F0000 is +// not, since all 1s are not contiguous. +static bool isRunOfOnes(ConstantInt *Val, uint32_t &MB, uint32_t &ME) { + const APInt& V = Val->getValue(); + uint32_t BitWidth = Val->getType()->getBitWidth(); + if (!APIntOps::isShiftedMask(BitWidth, V)) return false; + + // look for the first zero bit after the run of ones + MB = BitWidth - ((V - 1) ^ V).countLeadingZeros(); + // look for the first non-zero bit + ME = V.getActiveBits(); + return true; +} + +/// FoldLogicalPlusAnd - This is part of an expression (LHS +/- RHS) & Mask, +/// where isSub determines whether the operator is a sub. If we can fold one of +/// the following xforms: +/// +/// ((A & N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == Mask +/// ((A | N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == 0 +/// ((A ^ N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == 0 +/// +/// return (A +/- B). +/// +Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS, + ConstantInt *Mask, bool isSub, + Instruction &I) { + Instruction *LHSI = dyn_cast(LHS); + if (!LHSI || LHSI->getNumOperands() != 2 || + !isa(LHSI->getOperand(1))) return 0; + + ConstantInt *N = cast(LHSI->getOperand(1)); + + switch (LHSI->getOpcode()) { + default: return 0; + case Instruction::And: + if (And(N, Mask) == Mask) { + // If the AndRHS is a power of two minus one (0+1+), this is simple. + if ((Mask->getValue().countLeadingZeros() + + Mask->getValue().countPopulation()) == + Mask->getValue().getBitWidth()) + break; + + // Otherwise, if Mask is 0+1+0+, and if B is known to have the low 0+ + // part, we don't need any explicit masks to take them out of A. If that + // is all N is, ignore it. + uint32_t MB = 0, ME = 0; + if (isRunOfOnes(Mask, MB, ME)) { // begin/end bit of run, inclusive + uint32_t BitWidth = cast(RHS->getType())->getBitWidth(); + APInt Mask(APInt::getLowBitsSet(BitWidth, MB-1)); + if (MaskedValueIsZero(RHS, Mask)) + break; + } + } + return 0; + case Instruction::Or: + case Instruction::Xor: + // If the AndRHS is a power of two minus one (0+1+), and N&Mask == 0 + if ((Mask->getValue().countLeadingZeros() + + Mask->getValue().countPopulation()) == Mask->getValue().getBitWidth() + && And(N, Mask)->isZero()) + break; + return 0; + } + + Instruction *New; + if (isSub) + New = BinaryOperator::CreateSub(LHSI->getOperand(0), RHS, "fold"); + else + New = BinaryOperator::CreateAdd(LHSI->getOperand(0), RHS, "fold"); + return InsertNewInstBefore(New, I); +} + +/// FoldAndOfICmps - Fold (icmp)&(icmp) if possible. +Instruction *InstCombiner::FoldAndOfICmps(Instruction &I, + ICmpInst *LHS, ICmpInst *RHS) { + Value *Val, *Val2; + ConstantInt *LHSCst, *RHSCst; + ICmpInst::Predicate LHSCC, RHSCC; + + // This only handles icmp of constants: (icmp1 A, C1) & (icmp2 B, C2). + if (!match(LHS, m_ICmp(LHSCC, m_Value(Val), m_ConstantInt(LHSCst))) || + !match(RHS, m_ICmp(RHSCC, m_Value(Val2), m_ConstantInt(RHSCst)))) + return 0; + + // (icmp ult A, C) & (icmp ult B, C) --> (icmp ult (A|B), C) + // where C is a power of 2 + if (LHSCst == RHSCst && LHSCC == RHSCC && LHSCC == ICmpInst::ICMP_ULT && + LHSCst->getValue().isPowerOf2()) { + Instruction *NewOr = BinaryOperator::CreateOr(Val, Val2); + InsertNewInstBefore(NewOr, I); + return new ICmpInst(LHSCC, NewOr, LHSCst); + } + + // From here on, we only handle: + // (icmp1 A, C1) & (icmp2 A, C2) --> something simpler. + if (Val != Val2) return 0; + + // ICMP_[US][GL]E X, CST is folded to ICMP_[US][GL]T elsewhere. + if (LHSCC == ICmpInst::ICMP_UGE || LHSCC == ICmpInst::ICMP_ULE || + RHSCC == ICmpInst::ICMP_UGE || RHSCC == ICmpInst::ICMP_ULE || + LHSCC == ICmpInst::ICMP_SGE || LHSCC == ICmpInst::ICMP_SLE || + RHSCC == ICmpInst::ICMP_SGE || RHSCC == ICmpInst::ICMP_SLE) + return 0; + + // We can't fold (ugt x, C) & (sgt x, C2). + if (!PredicatesFoldable(LHSCC, RHSCC)) + return 0; + + // Ensure that the larger constant is on the RHS. + bool ShouldSwap; + if (ICmpInst::isSignedPredicate(LHSCC) || + (ICmpInst::isEquality(LHSCC) && + ICmpInst::isSignedPredicate(RHSCC))) + ShouldSwap = LHSCst->getValue().sgt(RHSCst->getValue()); + else + ShouldSwap = LHSCst->getValue().ugt(RHSCst->getValue()); + + if (ShouldSwap) { + std::swap(LHS, RHS); + std::swap(LHSCst, RHSCst); + std::swap(LHSCC, RHSCC); + } + + // At this point, we know we have have two icmp instructions + // comparing a value against two constants and and'ing the result + // together. Because of the above check, we know that we only have + // icmp eq, icmp ne, icmp [su]lt, and icmp [SU]gt here. We also know + // (from the FoldICmpLogical check above), that the two constants + // are not equal and that the larger constant is on the RHS + assert(LHSCst != RHSCst && "Compares not folded above?"); + + switch (LHSCC) { + default: assert(0 && "Unknown integer condition code!"); + case ICmpInst::ICMP_EQ: + switch (RHSCC) { + default: assert(0 && "Unknown integer condition code!"); + case ICmpInst::ICMP_EQ: // (X == 13 & X == 15) -> false + case ICmpInst::ICMP_UGT: // (X == 13 & X > 15) -> false + case ICmpInst::ICMP_SGT: // (X == 13 & X > 15) -> false + return ReplaceInstUsesWith(I, ConstantInt::getFalse()); + case ICmpInst::ICMP_NE: // (X == 13 & X != 15) -> X == 13 + case ICmpInst::ICMP_ULT: // (X == 13 & X < 15) -> X == 13 + case ICmpInst::ICMP_SLT: // (X == 13 & X < 15) -> X == 13 + return ReplaceInstUsesWith(I, LHS); + } + case ICmpInst::ICMP_NE: + switch (RHSCC) { + default: assert(0 && "Unknown integer condition code!"); + case ICmpInst::ICMP_ULT: + if (LHSCst == SubOne(RHSCst)) // (X != 13 & X u< 14) -> X < 13 + return new ICmpInst(ICmpInst::ICMP_ULT, Val, LHSCst); + break; // (X != 13 & X u< 15) -> no change + case ICmpInst::ICMP_SLT: + if (LHSCst == SubOne(RHSCst)) // (X != 13 & X s< 14) -> X < 13 + return new ICmpInst(ICmpInst::ICMP_SLT, Val, LHSCst); + break; // (X != 13 & X s< 15) -> no change + case ICmpInst::ICMP_EQ: // (X != 13 & X == 15) -> X == 15 + case ICmpInst::ICMP_UGT: // (X != 13 & X u> 15) -> X u> 15 + case ICmpInst::ICMP_SGT: // (X != 13 & X s> 15) -> X s> 15 + return ReplaceInstUsesWith(I, RHS); + case ICmpInst::ICMP_NE: + if (LHSCst == SubOne(RHSCst)){// (X != 13 & X != 14) -> X-13 >u 1 + Constant *AddCST = ConstantExpr::getNeg(LHSCst); + Instruction *Add = BinaryOperator::CreateAdd(Val, AddCST, + Val->getName()+".off"); + InsertNewInstBefore(Add, I); + return new ICmpInst(ICmpInst::ICMP_UGT, Add, + ConstantInt::get(Add->getType(), 1)); + } + break; // (X != 13 & X != 15) -> no change + } + break; + case ICmpInst::ICMP_ULT: + switch (RHSCC) { + default: assert(0 && "Unknown integer condition code!"); + case ICmpInst::ICMP_EQ: // (X u< 13 & X == 15) -> false + case ICmpInst::ICMP_UGT: // (X u< 13 & X u> 15) -> false + return ReplaceInstUsesWith(I, ConstantInt::getFalse()); + case ICmpInst::ICMP_SGT: // (X u< 13 & X s> 15) -> no change + break; + case ICmpInst::ICMP_NE: // (X u< 13 & X != 15) -> X u< 13 + case ICmpInst::ICMP_ULT: // (X u< 13 & X u< 15) -> X u< 13 + return ReplaceInstUsesWith(I, LHS); + case ICmpInst::ICMP_SLT: // (X u< 13 & X s< 15) -> no change + break; + } + break; + case ICmpInst::ICMP_SLT: + switch (RHSCC) { + default: assert(0 && "Unknown integer condition code!"); + case ICmpInst::ICMP_EQ: // (X s< 13 & X == 15) -> false + case ICmpInst::ICMP_SGT: // (X s< 13 & X s> 15) -> false + return ReplaceInstUsesWith(I, ConstantInt::getFalse()); + case ICmpInst::ICMP_UGT: // (X s< 13 & X u> 15) -> no change + break; + case ICmpInst::ICMP_NE: // (X s< 13 & X != 15) -> X < 13 + case ICmpInst::ICMP_SLT: // (X s< 13 & X s< 15) -> X < 13 + return ReplaceInstUsesWith(I, LHS); + case ICmpInst::ICMP_ULT: // (X s< 13 & X u< 15) -> no change + break; + } + break; + case ICmpInst::ICMP_UGT: + switch (RHSCC) { + default: assert(0 && "Unknown integer condition code!"); + case ICmpInst::ICMP_EQ: // (X u> 13 & X == 15) -> X == 15 + case ICmpInst::ICMP_UGT: // (X u> 13 & X u> 15) -> X u> 15 + return ReplaceInstUsesWith(I, RHS); + case ICmpInst::ICMP_SGT: // (X u> 13 & X s> 15) -> no change + break; + case ICmpInst::ICMP_NE: + if (RHSCst == AddOne(LHSCst)) // (X u> 13 & X != 14) -> X u> 14 + return new ICmpInst(LHSCC, Val, RHSCst); + break; // (X u> 13 & X != 15) -> no change + case ICmpInst::ICMP_ULT: // (X u> 13 & X u< 15) -> (X-14) 13 & X s< 15) -> no change + break; + } + break; + case ICmpInst::ICMP_SGT: + switch (RHSCC) { + default: assert(0 && "Unknown integer condition code!"); + case ICmpInst::ICMP_EQ: // (X s> 13 & X == 15) -> X == 15 + case ICmpInst::ICMP_SGT: // (X s> 13 & X s> 15) -> X s> 15 + return ReplaceInstUsesWith(I, RHS); + case ICmpInst::ICMP_UGT: // (X s> 13 & X u> 15) -> no change + break; + case ICmpInst::ICMP_NE: + if (RHSCst == AddOne(LHSCst)) // (X s> 13 & X != 14) -> X s> 14 + return new ICmpInst(LHSCC, Val, RHSCst); + break; // (X s> 13 & X != 15) -> no change + case ICmpInst::ICMP_SLT: // (X s> 13 & X s< 15) -> (X-14) s< 1 + return InsertRangeTest(Val, AddOne(LHSCst), RHSCst, true, true, I); + case ICmpInst::ICMP_ULT: // (X s> 13 & X u< 15) -> no change + break; + } + break; + } + + return 0; +} + + +Instruction *InstCombiner::visitAnd(BinaryOperator &I) { + bool Changed = SimplifyCommutative(I); + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + + if (isa(Op1)) // X & undef -> 0 + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); + + // and X, X = X + if (Op0 == Op1) + return ReplaceInstUsesWith(I, Op1); + + // See if we can simplify any instructions used by the instruction whose sole + // purpose is to compute bits we don't care about. + if (!isa(I.getType())) { + if (SimplifyDemandedInstructionBits(I)) + return &I; + } else { + if (ConstantVector *CP = dyn_cast(Op1)) { + if (CP->isAllOnesValue()) // X & <-1,-1> -> X + return ReplaceInstUsesWith(I, I.getOperand(0)); + } else if (isa(Op1)) { + return ReplaceInstUsesWith(I, Op1); // X & <0,0> -> <0,0> + } + } + + if (ConstantInt *AndRHS = dyn_cast(Op1)) { + const APInt& AndRHSMask = AndRHS->getValue(); + APInt NotAndRHS(~AndRHSMask); + + // Optimize a variety of ((val OP C1) & C2) combinations... + if (isa(Op0)) { + Instruction *Op0I = cast(Op0); + Value *Op0LHS = Op0I->getOperand(0); + Value *Op0RHS = Op0I->getOperand(1); + switch (Op0I->getOpcode()) { + case Instruction::Xor: + case Instruction::Or: + // If the mask is only needed on one incoming arm, push it up. + if (Op0I->hasOneUse()) { + if (MaskedValueIsZero(Op0LHS, NotAndRHS)) { + // Not masking anything out for the LHS, move to RHS. + Instruction *NewRHS = BinaryOperator::CreateAnd(Op0RHS, AndRHS, + Op0RHS->getName()+".masked"); + InsertNewInstBefore(NewRHS, I); + return BinaryOperator::Create( + cast(Op0I)->getOpcode(), Op0LHS, NewRHS); + } + if (!isa(Op0RHS) && + MaskedValueIsZero(Op0RHS, NotAndRHS)) { + // Not masking anything out for the RHS, move to LHS. + Instruction *NewLHS = BinaryOperator::CreateAnd(Op0LHS, AndRHS, + Op0LHS->getName()+".masked"); + InsertNewInstBefore(NewLHS, I); + return BinaryOperator::Create( + cast(Op0I)->getOpcode(), NewLHS, Op0RHS); + } + } + + break; + case Instruction::Add: + // ((A & N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == AndRHS. + // ((A | N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == 0 + // ((A ^ N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == 0 + if (Value *V = FoldLogicalPlusAnd(Op0LHS, Op0RHS, AndRHS, false, I)) + return BinaryOperator::CreateAnd(V, AndRHS); + if (Value *V = FoldLogicalPlusAnd(Op0RHS, Op0LHS, AndRHS, false, I)) + return BinaryOperator::CreateAnd(V, AndRHS); // Add commutes + break; + + case Instruction::Sub: + // ((A & N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == AndRHS. + // ((A | N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == 0 + // ((A ^ N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == 0 + if (Value *V = FoldLogicalPlusAnd(Op0LHS, Op0RHS, AndRHS, true, I)) + return BinaryOperator::CreateAnd(V, AndRHS); + + // (A - N) & AndRHS -> -N & AndRHS iff A&AndRHS==0 and AndRHS + // has 1's for all bits that the subtraction with A might affect. + if (Op0I->hasOneUse()) { + uint32_t BitWidth = AndRHSMask.getBitWidth(); + uint32_t Zeros = AndRHSMask.countLeadingZeros(); + APInt Mask = APInt::getLowBitsSet(BitWidth, BitWidth - Zeros); + + ConstantInt *A = dyn_cast(Op0LHS); + if (!(A && A->isZero()) && // avoid infinite recursion. + MaskedValueIsZero(Op0LHS, Mask)) { + Instruction *NewNeg = BinaryOperator::CreateNeg(Op0RHS); + InsertNewInstBefore(NewNeg, I); + return BinaryOperator::CreateAnd(NewNeg, AndRHS); + } + } + break; + + case Instruction::Shl: + case Instruction::LShr: + // (1 << x) & 1 --> zext(x == 0) + // (1 >> x) & 1 --> zext(x == 0) + if (AndRHSMask == 1 && Op0LHS == AndRHS) { + Instruction *NewICmp = new ICmpInst(ICmpInst::ICMP_EQ, Op0RHS, + Constant::getNullValue(I.getType())); + InsertNewInstBefore(NewICmp, I); + return new ZExtInst(NewICmp, I.getType()); + } + break; + } + + if (ConstantInt *Op0CI = dyn_cast(Op0I->getOperand(1))) + if (Instruction *Res = OptAndOp(Op0I, Op0CI, AndRHS, I)) + return Res; + } else if (CastInst *CI = dyn_cast(Op0)) { + // If this is an integer truncation or change from signed-to-unsigned, and + // if the source is an and/or with immediate, transform it. This + // frequently occurs for bitfield accesses. + if (Instruction *CastOp = dyn_cast(CI->getOperand(0))) { + if ((isa(CI) || isa(CI)) && + CastOp->getNumOperands() == 2) + if (ConstantInt *AndCI = dyn_cast(CastOp->getOperand(1))) { + if (CastOp->getOpcode() == Instruction::And) { + // Change: and (cast (and X, C1) to T), C2 + // into : and (cast X to T), trunc_or_bitcast(C1)&C2 + // This will fold the two constants together, which may allow + // other simplifications. + Instruction *NewCast = CastInst::CreateTruncOrBitCast( + CastOp->getOperand(0), I.getType(), + CastOp->getName()+".shrunk"); + NewCast = InsertNewInstBefore(NewCast, I); + // trunc_or_bitcast(C1)&C2 + Constant *C3 = ConstantExpr::getTruncOrBitCast(AndCI,I.getType()); + C3 = ConstantExpr::getAnd(C3, AndRHS); + return BinaryOperator::CreateAnd(NewCast, C3); + } else if (CastOp->getOpcode() == Instruction::Or) { + // Change: and (cast (or X, C1) to T), C2 + // into : trunc(C1)&C2 iff trunc(C1)&C2 == C2 + Constant *C3 = ConstantExpr::getTruncOrBitCast(AndCI,I.getType()); + if (ConstantExpr::getAnd(C3, AndRHS) == AndRHS) // trunc(C1)&C2 + return ReplaceInstUsesWith(I, AndRHS); + } + } + } + } + + // Try to fold constant and into select arguments. + if (SelectInst *SI = dyn_cast(Op0)) + if (Instruction *R = FoldOpIntoSelect(I, SI, this)) + return R; + if (isa(Op0)) + if (Instruction *NV = FoldOpIntoPhi(I)) + return NV; + } + + Value *Op0NotVal = dyn_castNotVal(Op0); + Value *Op1NotVal = dyn_castNotVal(Op1); + + if (Op0NotVal == Op1 || Op1NotVal == Op0) // A & ~A == ~A & A == 0 + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); + + // (~A & ~B) == (~(A | B)) - De Morgan's Law + if (Op0NotVal && Op1NotVal && isOnlyUse(Op0) && isOnlyUse(Op1)) { + Instruction *Or = BinaryOperator::CreateOr(Op0NotVal, Op1NotVal, + I.getName()+".demorgan"); + InsertNewInstBefore(Or, I); + return BinaryOperator::CreateNot(Or); + } + + { + Value *A = 0, *B = 0, *C = 0, *D = 0; + if (match(Op0, m_Or(m_Value(A), m_Value(B)))) { + if (A == Op1 || B == Op1) // (A | ?) & A --> A + return ReplaceInstUsesWith(I, Op1); + + // (A|B) & ~(A&B) -> A^B + if (match(Op1, m_Not(m_And(m_Value(C), m_Value(D))))) { + if ((A == C && B == D) || (A == D && B == C)) + return BinaryOperator::CreateXor(A, B); + } + } + + if (match(Op1, m_Or(m_Value(A), m_Value(B)))) { + if (A == Op0 || B == Op0) // A & (A | ?) --> A + return ReplaceInstUsesWith(I, Op0); + + // ~(A&B) & (A|B) -> A^B + if (match(Op0, m_Not(m_And(m_Value(C), m_Value(D))))) { + if ((A == C && B == D) || (A == D && B == C)) + return BinaryOperator::CreateXor(A, B); + } + } + + if (Op0->hasOneUse() && + match(Op0, m_Xor(m_Value(A), m_Value(B)))) { + if (A == Op1) { // (A^B)&A -> A&(A^B) + I.swapOperands(); // Simplify below + std::swap(Op0, Op1); + } else if (B == Op1) { // (A^B)&B -> B&(B^A) + cast(Op0)->swapOperands(); + I.swapOperands(); // Simplify below + std::swap(Op0, Op1); + } + } + + if (Op1->hasOneUse() && + match(Op1, m_Xor(m_Value(A), m_Value(B)))) { + if (B == Op0) { // B&(A^B) -> B&(B^A) + cast(Op1)->swapOperands(); + std::swap(A, B); + } + if (A == Op0) { // A&(A^B) -> A & ~B + Instruction *NotB = BinaryOperator::CreateNot(B, "tmp"); + InsertNewInstBefore(NotB, I); + return BinaryOperator::CreateAnd(A, NotB); + } + } + + // (A&((~A)|B)) -> A&B + if (match(Op0, m_Or(m_Not(m_Specific(Op1)), m_Value(A))) || + match(Op0, m_Or(m_Value(A), m_Not(m_Specific(Op1))))) + return BinaryOperator::CreateAnd(A, Op1); + if (match(Op1, m_Or(m_Not(m_Specific(Op0)), m_Value(A))) || + match(Op1, m_Or(m_Value(A), m_Not(m_Specific(Op0))))) + return BinaryOperator::CreateAnd(A, Op0); + } + + if (ICmpInst *RHS = dyn_cast(Op1)) { + // (icmp1 A, B) & (icmp2 A, B) --> (icmp3 A, B) + if (Instruction *R = AssociativeOpt(I, FoldICmpLogical(*this, RHS))) + return R; + + if (ICmpInst *LHS = dyn_cast(Op0)) + if (Instruction *Res = FoldAndOfICmps(I, LHS, RHS)) + return Res; + } + + // fold (and (cast A), (cast B)) -> (cast (and A, B)) + if (CastInst *Op0C = dyn_cast(Op0)) + if (CastInst *Op1C = dyn_cast(Op1)) + if (Op0C->getOpcode() == Op1C->getOpcode()) { // same cast kind ? + const Type *SrcTy = Op0C->getOperand(0)->getType(); + if (SrcTy == Op1C->getOperand(0)->getType() && SrcTy->isInteger() && + // Only do this if the casts both really cause code to be generated. + ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0), + I.getType(), TD) && + ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0), + I.getType(), TD)) { + Instruction *NewOp = BinaryOperator::CreateAnd(Op0C->getOperand(0), + Op1C->getOperand(0), + I.getName()); + InsertNewInstBefore(NewOp, I); + return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType()); + } + } + + // (X >> Z) & (Y >> Z) -> (X&Y) >> Z for all shifts. + if (BinaryOperator *SI1 = dyn_cast(Op1)) { + if (BinaryOperator *SI0 = dyn_cast(Op0)) + if (SI0->isShift() && SI0->getOpcode() == SI1->getOpcode() && + SI0->getOperand(1) == SI1->getOperand(1) && + (SI0->hasOneUse() || SI1->hasOneUse())) { + Instruction *NewOp = + InsertNewInstBefore(BinaryOperator::CreateAnd(SI0->getOperand(0), + SI1->getOperand(0), + SI0->getName()), I); + return BinaryOperator::Create(SI1->getOpcode(), NewOp, + SI1->getOperand(1)); + } + } + + // If and'ing two fcmp, try combine them into one. + if (FCmpInst *LHS = dyn_cast(I.getOperand(0))) { + if (FCmpInst *RHS = dyn_cast(I.getOperand(1))) { + if (LHS->getPredicate() == FCmpInst::FCMP_ORD && + RHS->getPredicate() == FCmpInst::FCMP_ORD) { + // (fcmp ord x, c) & (fcmp ord y, c) -> (fcmp ord x, y) + if (ConstantFP *LHSC = dyn_cast(LHS->getOperand(1))) + if (ConstantFP *RHSC = dyn_cast(RHS->getOperand(1))) { + // If either of the constants are nans, then the whole thing returns + // false. + if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN()) + return ReplaceInstUsesWith(I, ConstantInt::getFalse()); + return new FCmpInst(FCmpInst::FCMP_ORD, LHS->getOperand(0), + RHS->getOperand(0)); + } + } else { + Value *Op0LHS, *Op0RHS, *Op1LHS, *Op1RHS; + FCmpInst::Predicate Op0CC, Op1CC; + if (match(Op0, m_FCmp(Op0CC, m_Value(Op0LHS), m_Value(Op0RHS))) && + match(Op1, m_FCmp(Op1CC, m_Value(Op1LHS), m_Value(Op1RHS)))) { + if (Op0LHS == Op1RHS && Op0RHS == Op1LHS) { + // Swap RHS operands to match LHS. + Op1CC = FCmpInst::getSwappedPredicate(Op1CC); + std::swap(Op1LHS, Op1RHS); + } + if (Op0LHS == Op1LHS && Op0RHS == Op1RHS) { + // Simplify (fcmp cc0 x, y) & (fcmp cc1 x, y). + if (Op0CC == Op1CC) + return new FCmpInst((FCmpInst::Predicate)Op0CC, Op0LHS, Op0RHS); + else if (Op0CC == FCmpInst::FCMP_FALSE || + Op1CC == FCmpInst::FCMP_FALSE) + return ReplaceInstUsesWith(I, ConstantInt::getFalse()); + else if (Op0CC == FCmpInst::FCMP_TRUE) + return ReplaceInstUsesWith(I, Op1); + else if (Op1CC == FCmpInst::FCMP_TRUE) + return ReplaceInstUsesWith(I, Op0); + bool Op0Ordered; + bool Op1Ordered; + unsigned Op0Pred = getFCmpCode(Op0CC, Op0Ordered); + unsigned Op1Pred = getFCmpCode(Op1CC, Op1Ordered); + if (Op1Pred == 0) { + std::swap(Op0, Op1); + std::swap(Op0Pred, Op1Pred); + std::swap(Op0Ordered, Op1Ordered); + } + if (Op0Pred == 0) { + // uno && ueq -> uno && (uno || eq) -> ueq + // ord && olt -> ord && (ord && lt) -> olt + if (Op0Ordered == Op1Ordered) + return ReplaceInstUsesWith(I, Op1); + // uno && oeq -> uno && (ord && eq) -> false + // uno && ord -> false + if (!Op0Ordered) + return ReplaceInstUsesWith(I, ConstantInt::getFalse()); + // ord && ueq -> ord && (uno || eq) -> oeq + return cast(getFCmpValue(true, Op1Pred, + Op0LHS, Op0RHS)); + } + } + } + } + } + } + + return Changed ? &I : 0; +} + +/// CollectBSwapParts - Analyze the specified subexpression and see if it is +/// capable of providing pieces of a bswap. The subexpression provides pieces +/// of a bswap if it is proven that each of the non-zero bytes in the output of +/// the expression came from the corresponding "byte swapped" byte in some other +/// value. For example, if the current subexpression is "(shl i32 %X, 24)" then +/// we know that the expression deposits the low byte of %X into the high byte +/// of the bswap result and that all other bytes are zero. This expression is +/// accepted, the high byte of ByteValues is set to X to indicate a correct +/// match. +/// +/// This function returns true if the match was unsuccessful and false if so. +/// On entry to the function the "OverallLeftShift" is a signed integer value +/// indicating the number of bytes that the subexpression is later shifted. For +/// example, if the expression is later right shifted by 16 bits, the +/// OverallLeftShift value would be -2 on entry. This is used to specify which +/// byte of ByteValues is actually being set. +/// +/// Similarly, ByteMask is a bitmask where a bit is clear if its corresponding +/// byte is masked to zero by a user. For example, in (X & 255), X will be +/// processed with a bytemask of 1. Because bytemask is 32-bits, this limits +/// this function to working on up to 32-byte (256 bit) values. ByteMask is +/// always in the local (OverallLeftShift) coordinate space. +/// +static bool CollectBSwapParts(Value *V, int OverallLeftShift, uint32_t ByteMask, + SmallVector &ByteValues) { + if (Instruction *I = dyn_cast(V)) { + // If this is an or instruction, it may be an inner node of the bswap. + if (I->getOpcode() == Instruction::Or) { + return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask, + ByteValues) || + CollectBSwapParts(I->getOperand(1), OverallLeftShift, ByteMask, + ByteValues); + } + + // If this is a logical shift by a constant multiple of 8, recurse with + // OverallLeftShift and ByteMask adjusted. + if (I->isLogicalShift() && isa(I->getOperand(1))) { + unsigned ShAmt = + cast(I->getOperand(1))->getLimitedValue(~0U); + // Ensure the shift amount is defined and of a byte value. + if ((ShAmt & 7) || (ShAmt > 8*ByteValues.size())) + return true; + + unsigned ByteShift = ShAmt >> 3; + if (I->getOpcode() == Instruction::Shl) { + // X << 2 -> collect(X, +2) + OverallLeftShift += ByteShift; + ByteMask >>= ByteShift; + } else { + // X >>u 2 -> collect(X, -2) + OverallLeftShift -= ByteShift; + ByteMask <<= ByteShift; + ByteMask &= (~0U >> (32-ByteValues.size())); + } + + if (OverallLeftShift >= (int)ByteValues.size()) return true; + if (OverallLeftShift <= -(int)ByteValues.size()) return true; + + return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask, + ByteValues); + } + + // If this is a logical 'and' with a mask that clears bytes, clear the + // corresponding bytes in ByteMask. + if (I->getOpcode() == Instruction::And && + isa(I->getOperand(1))) { + // Scan every byte of the and mask, seeing if the byte is either 0 or 255. + unsigned NumBytes = ByteValues.size(); + APInt Byte(I->getType()->getPrimitiveSizeInBits(), 255); + const APInt &AndMask = cast(I->getOperand(1))->getValue(); + + for (unsigned i = 0; i != NumBytes; ++i, Byte <<= 8) { + // If this byte is masked out by a later operation, we don't care what + // the and mask is. + if ((ByteMask & (1 << i)) == 0) + continue; + + // If the AndMask is all zeros for this byte, clear the bit. + APInt MaskB = AndMask & Byte; + if (MaskB == 0) { + ByteMask &= ~(1U << i); + continue; + } + + // If the AndMask is not all ones for this byte, it's not a bytezap. + if (MaskB != Byte) + return true; + + // Otherwise, this byte is kept. + } + + return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask, + ByteValues); + } + } + + // Okay, we got to something that isn't a shift, 'or' or 'and'. This must be + // the input value to the bswap. Some observations: 1) if more than one byte + // is demanded from this input, then it could not be successfully assembled + // into a byteswap. At least one of the two bytes would not be aligned with + // their ultimate destination. + if (!isPowerOf2_32(ByteMask)) return true; + unsigned InputByteNo = CountTrailingZeros_32(ByteMask); + + // 2) The input and ultimate destinations must line up: if byte 3 of an i32 + // is demanded, it needs to go into byte 0 of the result. This means that the + // byte needs to be shifted until it lands in the right byte bucket. The + // shift amount depends on the position: if the byte is coming from the high + // part of the value (e.g. byte 3) then it must be shifted right. If from the + // low part, it must be shifted left. + unsigned DestByteNo = InputByteNo + OverallLeftShift; + if (InputByteNo < ByteValues.size()/2) { + if (ByteValues.size()-1-DestByteNo != InputByteNo) + return true; + } else { + if (ByteValues.size()-1-DestByteNo != InputByteNo) + return true; + } + + // If the destination byte value is already defined, the values are or'd + // together, which isn't a bswap (unless it's an or of the same bits). + if (ByteValues[DestByteNo] && ByteValues[DestByteNo] != V) + return true; + ByteValues[DestByteNo] = V; + return false; +} + +/// MatchBSwap - Given an OR instruction, check to see if this is a bswap idiom. +/// If so, insert the new bswap intrinsic and return it. +Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) { + const IntegerType *ITy = dyn_cast(I.getType()); + if (!ITy || ITy->getBitWidth() % 16 || + // ByteMask only allows up to 32-byte values. + ITy->getBitWidth() > 32*8) + return 0; // Can only bswap pairs of bytes. Can't do vectors. + + /// ByteValues - For each byte of the result, we keep track of which value + /// defines each byte. + SmallVector ByteValues; + ByteValues.resize(ITy->getBitWidth()/8); + + // Try to find all the pieces corresponding to the bswap. + uint32_t ByteMask = ~0U >> (32-ByteValues.size()); + if (CollectBSwapParts(&I, 0, ByteMask, ByteValues)) + return 0; + + // Check to see if all of the bytes come from the same value. + Value *V = ByteValues[0]; + if (V == 0) return 0; // Didn't find a byte? Must be zero. + + // Check to make sure that all of the bytes come from the same value. + for (unsigned i = 1, e = ByteValues.size(); i != e; ++i) + if (ByteValues[i] != V) + return 0; + const Type *Tys[] = { ITy }; + Module *M = I.getParent()->getParent()->getParent(); + Function *F = Intrinsic::getDeclaration(M, Intrinsic::bswap, Tys, 1); + return CallInst::Create(F, V); +} + +/// MatchSelectFromAndOr - We have an expression of the form (A&C)|(B&D). Check +/// If A is (cond?-1:0) and either B or D is ~(cond?-1,0) or (cond?0,-1), then +/// we can simplify this expression to "cond ? C : D or B". +static Instruction *MatchSelectFromAndOr(Value *A, Value *B, + Value *C, Value *D) { + // If A is not a select of -1/0, this cannot match. + Value *Cond = 0; + if (!match(A, m_SelectCst<-1, 0>(m_Value(Cond)))) + return 0; + + // ((cond?-1:0)&C) | (B&(cond?0:-1)) -> cond ? C : B. + if (match(D, m_SelectCst<0, -1>(m_Specific(Cond)))) + return SelectInst::Create(Cond, C, B); + if (match(D, m_Not(m_SelectCst<-1, 0>(m_Specific(Cond))))) + return SelectInst::Create(Cond, C, B); + // ((cond?-1:0)&C) | ((cond?0:-1)&D) -> cond ? C : D. + if (match(B, m_SelectCst<0, -1>(m_Specific(Cond)))) + return SelectInst::Create(Cond, C, D); + if (match(B, m_Not(m_SelectCst<-1, 0>(m_Specific(Cond))))) + return SelectInst::Create(Cond, C, D); + return 0; +} + +/// FoldOrOfICmps - Fold (icmp)|(icmp) if possible. +Instruction *InstCombiner::FoldOrOfICmps(Instruction &I, + ICmpInst *LHS, ICmpInst *RHS) { + Value *Val, *Val2; + ConstantInt *LHSCst, *RHSCst; + ICmpInst::Predicate LHSCC, RHSCC; + + // This only handles icmp of constants: (icmp1 A, C1) | (icmp2 B, C2). + if (!match(LHS, m_ICmp(LHSCC, m_Value(Val), m_ConstantInt(LHSCst))) || + !match(RHS, m_ICmp(RHSCC, m_Value(Val2), m_ConstantInt(RHSCst)))) + return 0; + + // From here on, we only handle: + // (icmp1 A, C1) | (icmp2 A, C2) --> something simpler. + if (Val != Val2) return 0; + + // ICMP_[US][GL]E X, CST is folded to ICMP_[US][GL]T elsewhere. + if (LHSCC == ICmpInst::ICMP_UGE || LHSCC == ICmpInst::ICMP_ULE || + RHSCC == ICmpInst::ICMP_UGE || RHSCC == ICmpInst::ICMP_ULE || + LHSCC == ICmpInst::ICMP_SGE || LHSCC == ICmpInst::ICMP_SLE || + RHSCC == ICmpInst::ICMP_SGE || RHSCC == ICmpInst::ICMP_SLE) + return 0; + + // We can't fold (ugt x, C) | (sgt x, C2). + if (!PredicatesFoldable(LHSCC, RHSCC)) + return 0; + + // Ensure that the larger constant is on the RHS. + bool ShouldSwap; + if (ICmpInst::isSignedPredicate(LHSCC) || + (ICmpInst::isEquality(LHSCC) && + ICmpInst::isSignedPredicate(RHSCC))) + ShouldSwap = LHSCst->getValue().sgt(RHSCst->getValue()); + else + ShouldSwap = LHSCst->getValue().ugt(RHSCst->getValue()); + + if (ShouldSwap) { + std::swap(LHS, RHS); + std::swap(LHSCst, RHSCst); + std::swap(LHSCC, RHSCC); + } + + // At this point, we know we have have two icmp instructions + // comparing a value against two constants and or'ing the result + // together. Because of the above check, we know that we only have + // ICMP_EQ, ICMP_NE, ICMP_LT, and ICMP_GT here. We also know (from the + // FoldICmpLogical check above), that the two constants are not + // equal. + assert(LHSCst != RHSCst && "Compares not folded above?"); + + switch (LHSCC) { + default: assert(0 && "Unknown integer condition code!"); + case ICmpInst::ICMP_EQ: + switch (RHSCC) { + default: assert(0 && "Unknown integer condition code!"); + case ICmpInst::ICMP_EQ: + if (LHSCst == SubOne(RHSCst)) { // (X == 13 | X == 14) -> X-13 getName()+".off"); + InsertNewInstBefore(Add, I); + AddCST = Subtract(AddOne(RHSCst), LHSCst); + return new ICmpInst(ICmpInst::ICMP_ULT, Add, AddCST); + } + break; // (X == 13 | X == 15) -> no change + case ICmpInst::ICMP_UGT: // (X == 13 | X u> 14) -> no change + case ICmpInst::ICMP_SGT: // (X == 13 | X s> 14) -> no change + break; + case ICmpInst::ICMP_NE: // (X == 13 | X != 15) -> X != 15 + case ICmpInst::ICMP_ULT: // (X == 13 | X u< 15) -> X u< 15 + case ICmpInst::ICMP_SLT: // (X == 13 | X s< 15) -> X s< 15 + return ReplaceInstUsesWith(I, RHS); + } + break; + case ICmpInst::ICMP_NE: + switch (RHSCC) { + default: assert(0 && "Unknown integer condition code!"); + case ICmpInst::ICMP_EQ: // (X != 13 | X == 15) -> X != 13 + case ICmpInst::ICMP_UGT: // (X != 13 | X u> 15) -> X != 13 + case ICmpInst::ICMP_SGT: // (X != 13 | X s> 15) -> X != 13 + return ReplaceInstUsesWith(I, LHS); + case ICmpInst::ICMP_NE: // (X != 13 | X != 15) -> true + case ICmpInst::ICMP_ULT: // (X != 13 | X u< 15) -> true + case ICmpInst::ICMP_SLT: // (X != 13 | X s< 15) -> true + return ReplaceInstUsesWith(I, ConstantInt::getTrue()); + } + break; + case ICmpInst::ICMP_ULT: + switch (RHSCC) { + default: assert(0 && "Unknown integer condition code!"); + case ICmpInst::ICMP_EQ: // (X u< 13 | X == 14) -> no change + break; + case ICmpInst::ICMP_UGT: // (X u< 13 | X u> 15) -> (X-13) u> 2 + // If RHSCst is [us]MAXINT, it is always false. Not handling + // this can cause overflow. + if (RHSCst->isMaxValue(false)) + return ReplaceInstUsesWith(I, LHS); + return InsertRangeTest(Val, LHSCst, AddOne(RHSCst), false, false, I); + case ICmpInst::ICMP_SGT: // (X u< 13 | X s> 15) -> no change + break; + case ICmpInst::ICMP_NE: // (X u< 13 | X != 15) -> X != 15 + case ICmpInst::ICMP_ULT: // (X u< 13 | X u< 15) -> X u< 15 + return ReplaceInstUsesWith(I, RHS); + case ICmpInst::ICMP_SLT: // (X u< 13 | X s< 15) -> no change + break; + } + break; + case ICmpInst::ICMP_SLT: + switch (RHSCC) { + default: assert(0 && "Unknown integer condition code!"); + case ICmpInst::ICMP_EQ: // (X s< 13 | X == 14) -> no change + break; + case ICmpInst::ICMP_SGT: // (X s< 13 | X s> 15) -> (X-13) s> 2 + // If RHSCst is [us]MAXINT, it is always false. Not handling + // this can cause overflow. + if (RHSCst->isMaxValue(true)) + return ReplaceInstUsesWith(I, LHS); + return InsertRangeTest(Val, LHSCst, AddOne(RHSCst), true, false, I); + case ICmpInst::ICMP_UGT: // (X s< 13 | X u> 15) -> no change + break; + case ICmpInst::ICMP_NE: // (X s< 13 | X != 15) -> X != 15 + case ICmpInst::ICMP_SLT: // (X s< 13 | X s< 15) -> X s< 15 + return ReplaceInstUsesWith(I, RHS); + case ICmpInst::ICMP_ULT: // (X s< 13 | X u< 15) -> no change + break; + } + break; + case ICmpInst::ICMP_UGT: + switch (RHSCC) { + default: assert(0 && "Unknown integer condition code!"); + case ICmpInst::ICMP_EQ: // (X u> 13 | X == 15) -> X u> 13 + case ICmpInst::ICMP_UGT: // (X u> 13 | X u> 15) -> X u> 13 + return ReplaceInstUsesWith(I, LHS); + case ICmpInst::ICMP_SGT: // (X u> 13 | X s> 15) -> no change + break; + case ICmpInst::ICMP_NE: // (X u> 13 | X != 15) -> true + case ICmpInst::ICMP_ULT: // (X u> 13 | X u< 15) -> true + return ReplaceInstUsesWith(I, ConstantInt::getTrue()); + case ICmpInst::ICMP_SLT: // (X u> 13 | X s< 15) -> no change + break; + } + break; + case ICmpInst::ICMP_SGT: + switch (RHSCC) { + default: assert(0 && "Unknown integer condition code!"); + case ICmpInst::ICMP_EQ: // (X s> 13 | X == 15) -> X > 13 + case ICmpInst::ICMP_SGT: // (X s> 13 | X s> 15) -> X > 13 + return ReplaceInstUsesWith(I, LHS); + case ICmpInst::ICMP_UGT: // (X s> 13 | X u> 15) -> no change + break; + case ICmpInst::ICMP_NE: // (X s> 13 | X != 15) -> true + case ICmpInst::ICMP_SLT: // (X s> 13 | X s< 15) -> true + return ReplaceInstUsesWith(I, ConstantInt::getTrue()); + case ICmpInst::ICMP_ULT: // (X s> 13 | X u< 15) -> no change + break; + } + break; + } + return 0; +} + +/// FoldOrWithConstants - This helper function folds: +/// +/// ((A | B) & C1) | (B & C2) +/// +/// into: +/// +/// (A & C1) | B +/// +/// when the XOR of the two constants is "all ones" (-1). +Instruction *InstCombiner::FoldOrWithConstants(BinaryOperator &I, Value *Op, + Value *A, Value *B, Value *C) { + ConstantInt *CI1 = dyn_cast(C); + if (!CI1) return 0; + + Value *V1 = 0; + ConstantInt *CI2 = 0; + if (!match(Op, m_And(m_Value(V1), m_ConstantInt(CI2)))) return 0; + + APInt Xor = CI1->getValue() ^ CI2->getValue(); + if (!Xor.isAllOnesValue()) return 0; + + if (V1 == A || V1 == B) { + Instruction *NewOp = + InsertNewInstBefore(BinaryOperator::CreateAnd((V1 == A) ? B : A, CI1), I); + return BinaryOperator::CreateOr(NewOp, V1); + } + + return 0; +} + +Instruction *InstCombiner::visitOr(BinaryOperator &I) { + bool Changed = SimplifyCommutative(I); + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + + if (isa(Op1)) // X | undef -> -1 + return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType())); + + // or X, X = X + if (Op0 == Op1) + return ReplaceInstUsesWith(I, Op0); + + // See if we can simplify any instructions used by the instruction whose sole + // purpose is to compute bits we don't care about. + if (!isa(I.getType())) { + if (SimplifyDemandedInstructionBits(I)) + return &I; + } else if (isa(Op1)) { + return ReplaceInstUsesWith(I, Op0); // X | <0,0> -> X + } else if (ConstantVector *CP = dyn_cast(Op1)) { + if (CP->isAllOnesValue()) // X | <-1,-1> -> <-1,-1> + return ReplaceInstUsesWith(I, I.getOperand(1)); + } + + + + // or X, -1 == -1 + if (ConstantInt *RHS = dyn_cast(Op1)) { + ConstantInt *C1 = 0; Value *X = 0; + // (X & C1) | C2 --> (X | C2) & (C1|C2) + if (match(Op0, m_And(m_Value(X), m_ConstantInt(C1))) && isOnlyUse(Op0)) { + Instruction *Or = BinaryOperator::CreateOr(X, RHS); + InsertNewInstBefore(Or, I); + Or->takeName(Op0); + return BinaryOperator::CreateAnd(Or, + ConstantInt::get(RHS->getValue() | C1->getValue())); + } + + // (X ^ C1) | C2 --> (X | C2) ^ (C1&~C2) + if (match(Op0, m_Xor(m_Value(X), m_ConstantInt(C1))) && isOnlyUse(Op0)) { + Instruction *Or = BinaryOperator::CreateOr(X, RHS); + InsertNewInstBefore(Or, I); + Or->takeName(Op0); + return BinaryOperator::CreateXor(Or, + ConstantInt::get(C1->getValue() & ~RHS->getValue())); + } + + // Try to fold constant and into select arguments. + if (SelectInst *SI = dyn_cast(Op0)) + if (Instruction *R = FoldOpIntoSelect(I, SI, this)) + return R; + if (isa(Op0)) + if (Instruction *NV = FoldOpIntoPhi(I)) + return NV; + } + + Value *A = 0, *B = 0; + ConstantInt *C1 = 0, *C2 = 0; + + if (match(Op0, m_And(m_Value(A), m_Value(B)))) + if (A == Op1 || B == Op1) // (A & ?) | A --> A + return ReplaceInstUsesWith(I, Op1); + if (match(Op1, m_And(m_Value(A), m_Value(B)))) + if (A == Op0 || B == Op0) // A | (A & ?) --> A + return ReplaceInstUsesWith(I, Op0); + + // (A | B) | C and A | (B | C) -> bswap if possible. + // (A >> B) | (C << D) and (A << B) | (B >> C) -> bswap if possible. + if (match(Op0, m_Or(m_Value(), m_Value())) || + match(Op1, m_Or(m_Value(), m_Value())) || + (match(Op0, m_Shift(m_Value(), m_Value())) && + match(Op1, m_Shift(m_Value(), m_Value())))) { + if (Instruction *BSwap = MatchBSwap(I)) + return BSwap; + } + + // (X^C)|Y -> (X|Y)^C iff Y&C == 0 + if (Op0->hasOneUse() && match(Op0, m_Xor(m_Value(A), m_ConstantInt(C1))) && + MaskedValueIsZero(Op1, C1->getValue())) { + Instruction *NOr = BinaryOperator::CreateOr(A, Op1); + InsertNewInstBefore(NOr, I); + NOr->takeName(Op0); + return BinaryOperator::CreateXor(NOr, C1); + } + + // Y|(X^C) -> (X|Y)^C iff Y&C == 0 + if (Op1->hasOneUse() && match(Op1, m_Xor(m_Value(A), m_ConstantInt(C1))) && + MaskedValueIsZero(Op0, C1->getValue())) { + Instruction *NOr = BinaryOperator::CreateOr(A, Op0); + InsertNewInstBefore(NOr, I); + NOr->takeName(Op0); + return BinaryOperator::CreateXor(NOr, C1); + } + + // (A & C)|(B & D) + Value *C = 0, *D = 0; + if (match(Op0, m_And(m_Value(A), m_Value(C))) && + match(Op1, m_And(m_Value(B), m_Value(D)))) { + Value *V1 = 0, *V2 = 0, *V3 = 0; + C1 = dyn_cast(C); + C2 = dyn_cast(D); + if (C1 && C2) { // (A & C1)|(B & C2) + // If we have: ((V + N) & C1) | (V & C2) + // .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0 + // replace with V+N. + if (C1->getValue() == ~C2->getValue()) { + if ((C2->getValue() & (C2->getValue()+1)) == 0 && // C2 == 0+1+ + match(A, m_Add(m_Value(V1), m_Value(V2)))) { + // Add commutes, try both ways. + if (V1 == B && MaskedValueIsZero(V2, C2->getValue())) + return ReplaceInstUsesWith(I, A); + if (V2 == B && MaskedValueIsZero(V1, C2->getValue())) + return ReplaceInstUsesWith(I, A); + } + // Or commutes, try both ways. + if ((C1->getValue() & (C1->getValue()+1)) == 0 && + match(B, m_Add(m_Value(V1), m_Value(V2)))) { + // Add commutes, try both ways. + if (V1 == A && MaskedValueIsZero(V2, C1->getValue())) + return ReplaceInstUsesWith(I, B); + if (V2 == A && MaskedValueIsZero(V1, C1->getValue())) + return ReplaceInstUsesWith(I, B); + } + } + V1 = 0; V2 = 0; V3 = 0; + } + + // Check to see if we have any common things being and'ed. If so, find the + // terms for V1 & (V2|V3). + if (isOnlyUse(Op0) || isOnlyUse(Op1)) { + if (A == B) // (A & C)|(A & D) == A & (C|D) + V1 = A, V2 = C, V3 = D; + else if (A == D) // (A & C)|(B & A) == A & (B|C) + V1 = A, V2 = B, V3 = C; + else if (C == B) // (A & C)|(C & D) == C & (A|D) + V1 = C, V2 = A, V3 = D; + else if (C == D) // (A & C)|(B & C) == C & (A|B) + V1 = C, V2 = A, V3 = B; + + if (V1) { + Value *Or = + InsertNewInstBefore(BinaryOperator::CreateOr(V2, V3, "tmp"), I); + return BinaryOperator::CreateAnd(V1, Or); + } + } + + // (A & (C0?-1:0)) | (B & ~(C0?-1:0)) -> C0 ? A : B, and commuted variants + if (Instruction *Match = MatchSelectFromAndOr(A, B, C, D)) + return Match; + if (Instruction *Match = MatchSelectFromAndOr(B, A, D, C)) + return Match; + if (Instruction *Match = MatchSelectFromAndOr(C, B, A, D)) + return Match; + if (Instruction *Match = MatchSelectFromAndOr(D, A, B, C)) + return Match; + + // ((A&~B)|(~A&B)) -> A^B + if ((match(C, m_Not(m_Specific(D))) && + match(B, m_Not(m_Specific(A))))) + return BinaryOperator::CreateXor(A, D); + // ((~B&A)|(~A&B)) -> A^B + if ((match(A, m_Not(m_Specific(D))) && + match(B, m_Not(m_Specific(C))))) + return BinaryOperator::CreateXor(C, D); + // ((A&~B)|(B&~A)) -> A^B + if ((match(C, m_Not(m_Specific(B))) && + match(D, m_Not(m_Specific(A))))) + return BinaryOperator::CreateXor(A, B); + // ((~B&A)|(B&~A)) -> A^B + if ((match(A, m_Not(m_Specific(B))) && + match(D, m_Not(m_Specific(C))))) + return BinaryOperator::CreateXor(C, B); + } + + // (X >> Z) | (Y >> Z) -> (X|Y) >> Z for all shifts. + if (BinaryOperator *SI1 = dyn_cast(Op1)) { + if (BinaryOperator *SI0 = dyn_cast(Op0)) + if (SI0->isShift() && SI0->getOpcode() == SI1->getOpcode() && + SI0->getOperand(1) == SI1->getOperand(1) && + (SI0->hasOneUse() || SI1->hasOneUse())) { + Instruction *NewOp = + InsertNewInstBefore(BinaryOperator::CreateOr(SI0->getOperand(0), + SI1->getOperand(0), + SI0->getName()), I); + return BinaryOperator::Create(SI1->getOpcode(), NewOp, + SI1->getOperand(1)); + } + } + + // ((A|B)&1)|(B&-2) -> (A&1) | B + if (match(Op0, m_And(m_Or(m_Value(A), m_Value(B)), m_Value(C))) || + match(Op0, m_And(m_Value(C), m_Or(m_Value(A), m_Value(B))))) { + Instruction *Ret = FoldOrWithConstants(I, Op1, A, B, C); + if (Ret) return Ret; + } + // (B&-2)|((A|B)&1) -> (A&1) | B + if (match(Op1, m_And(m_Or(m_Value(A), m_Value(B)), m_Value(C))) || + match(Op1, m_And(m_Value(C), m_Or(m_Value(A), m_Value(B))))) { + Instruction *Ret = FoldOrWithConstants(I, Op0, A, B, C); + if (Ret) return Ret; + } + + if (match(Op0, m_Not(m_Value(A)))) { // ~A | Op1 + if (A == Op1) // ~A | A == -1 + return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType())); + } else { + A = 0; + } + // Note, A is still live here! + if (match(Op1, m_Not(m_Value(B)))) { // Op0 | ~B + if (Op0 == B) + return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType())); + + // (~A | ~B) == (~(A & B)) - De Morgan's Law + if (A && isOnlyUse(Op0) && isOnlyUse(Op1)) { + Value *And = InsertNewInstBefore(BinaryOperator::CreateAnd(A, B, + I.getName()+".demorgan"), I); + return BinaryOperator::CreateNot(And); + } + } + + // (icmp1 A, B) | (icmp2 A, B) --> (icmp3 A, B) + if (ICmpInst *RHS = dyn_cast(I.getOperand(1))) { + if (Instruction *R = AssociativeOpt(I, FoldICmpLogical(*this, RHS))) + return R; + + if (ICmpInst *LHS = dyn_cast(I.getOperand(0))) + if (Instruction *Res = FoldOrOfICmps(I, LHS, RHS)) + return Res; + } + + // fold (or (cast A), (cast B)) -> (cast (or A, B)) + if (CastInst *Op0C = dyn_cast(Op0)) { + if (CastInst *Op1C = dyn_cast(Op1)) + if (Op0C->getOpcode() == Op1C->getOpcode()) {// same cast kind ? + if (!isa(Op0C->getOperand(0)) || + !isa(Op1C->getOperand(0))) { + const Type *SrcTy = Op0C->getOperand(0)->getType(); + if (SrcTy == Op1C->getOperand(0)->getType() && SrcTy->isInteger() && + // Only do this if the casts both really cause code to be + // generated. + ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0), + I.getType(), TD) && + ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0), + I.getType(), TD)) { + Instruction *NewOp = BinaryOperator::CreateOr(Op0C->getOperand(0), + Op1C->getOperand(0), + I.getName()); + InsertNewInstBefore(NewOp, I); + return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType()); + } + } + } + } + + + // (fcmp uno x, c) | (fcmp uno y, c) -> (fcmp uno x, y) + if (FCmpInst *LHS = dyn_cast(I.getOperand(0))) { + if (FCmpInst *RHS = dyn_cast(I.getOperand(1))) { + if (LHS->getPredicate() == FCmpInst::FCMP_UNO && + RHS->getPredicate() == FCmpInst::FCMP_UNO && + LHS->getOperand(0)->getType() == RHS->getOperand(0)->getType()) { + if (ConstantFP *LHSC = dyn_cast(LHS->getOperand(1))) + if (ConstantFP *RHSC = dyn_cast(RHS->getOperand(1))) { + // If either of the constants are nans, then the whole thing returns + // true. + if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN()) + return ReplaceInstUsesWith(I, ConstantInt::getTrue()); + + // Otherwise, no need to compare the two constants, compare the + // rest. + return new FCmpInst(FCmpInst::FCMP_UNO, LHS->getOperand(0), + RHS->getOperand(0)); + } + } else { + Value *Op0LHS, *Op0RHS, *Op1LHS, *Op1RHS; + FCmpInst::Predicate Op0CC, Op1CC; + if (match(Op0, m_FCmp(Op0CC, m_Value(Op0LHS), m_Value(Op0RHS))) && + match(Op1, m_FCmp(Op1CC, m_Value(Op1LHS), m_Value(Op1RHS)))) { + if (Op0LHS == Op1RHS && Op0RHS == Op1LHS) { + // Swap RHS operands to match LHS. + Op1CC = FCmpInst::getSwappedPredicate(Op1CC); + std::swap(Op1LHS, Op1RHS); + } + if (Op0LHS == Op1LHS && Op0RHS == Op1RHS) { + // Simplify (fcmp cc0 x, y) | (fcmp cc1 x, y). + if (Op0CC == Op1CC) + return new FCmpInst((FCmpInst::Predicate)Op0CC, Op0LHS, Op0RHS); + else if (Op0CC == FCmpInst::FCMP_TRUE || + Op1CC == FCmpInst::FCMP_TRUE) + return ReplaceInstUsesWith(I, ConstantInt::getTrue()); + else if (Op0CC == FCmpInst::FCMP_FALSE) + return ReplaceInstUsesWith(I, Op1); + else if (Op1CC == FCmpInst::FCMP_FALSE) + return ReplaceInstUsesWith(I, Op0); + bool Op0Ordered; + bool Op1Ordered; + unsigned Op0Pred = getFCmpCode(Op0CC, Op0Ordered); + unsigned Op1Pred = getFCmpCode(Op1CC, Op1Ordered); + if (Op0Ordered == Op1Ordered) { + // If both are ordered or unordered, return a new fcmp with + // or'ed predicates. + Value *RV = getFCmpValue(Op0Ordered, Op0Pred|Op1Pred, + Op0LHS, Op0RHS); + if (Instruction *I = dyn_cast(RV)) + return I; + // Otherwise, it's a constant boolean value... + return ReplaceInstUsesWith(I, RV); + } + } + } + } + } + } + + return Changed ? &I : 0; +} + +namespace { + +// XorSelf - Implements: X ^ X --> 0 +struct XorSelf { + Value *RHS; + XorSelf(Value *rhs) : RHS(rhs) {} + bool shouldApply(Value *LHS) const { return LHS == RHS; } + Instruction *apply(BinaryOperator &Xor) const { + return &Xor; + } +}; + +} + +Instruction *InstCombiner::visitXor(BinaryOperator &I) { + bool Changed = SimplifyCommutative(I); + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + + if (isa(Op1)) { + if (isa(Op0)) + // Handle undef ^ undef -> 0 special case. This is a common + // idiom (misuse). + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); + return ReplaceInstUsesWith(I, Op1); // X ^ undef -> undef + } + + // xor X, X = 0, even if X is nested in a sequence of Xor's. + if (Instruction *Result = AssociativeOpt(I, XorSelf(Op1))) { + assert(Result == &I && "AssociativeOpt didn't work?"); Result=Result; + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); + } + + // See if we can simplify any instructions used by the instruction whose sole + // purpose is to compute bits we don't care about. + if (!isa(I.getType())) { + if (SimplifyDemandedInstructionBits(I)) + return &I; + } else if (isa(Op1)) { + return ReplaceInstUsesWith(I, Op0); // X ^ <0,0> -> X + } + + // Is this a ~ operation? + if (Value *NotOp = dyn_castNotVal(&I)) { + // ~(~X & Y) --> (X | ~Y) - De Morgan's Law + // ~(~X | Y) === (X & ~Y) - De Morgan's Law + if (BinaryOperator *Op0I = dyn_cast(NotOp)) { + if (Op0I->getOpcode() == Instruction::And || + Op0I->getOpcode() == Instruction::Or) { + if (dyn_castNotVal(Op0I->getOperand(1))) Op0I->swapOperands(); + if (Value *Op0NotVal = dyn_castNotVal(Op0I->getOperand(0))) { + Instruction *NotY = + BinaryOperator::CreateNot(Op0I->getOperand(1), + Op0I->getOperand(1)->getName()+".not"); + InsertNewInstBefore(NotY, I); + if (Op0I->getOpcode() == Instruction::And) + return BinaryOperator::CreateOr(Op0NotVal, NotY); + else + return BinaryOperator::CreateAnd(Op0NotVal, NotY); + } + } + } + } + + + if (ConstantInt *RHS = dyn_cast(Op1)) { + if (RHS == ConstantInt::getTrue() && Op0->hasOneUse()) { + // xor (cmp A, B), true = not (cmp A, B) = !cmp A, B + if (ICmpInst *ICI = dyn_cast(Op0)) + return new ICmpInst(ICI->getInversePredicate(), + ICI->getOperand(0), ICI->getOperand(1)); + + if (FCmpInst *FCI = dyn_cast(Op0)) + return new FCmpInst(FCI->getInversePredicate(), + FCI->getOperand(0), FCI->getOperand(1)); + } + + // fold (xor(zext(cmp)), 1) and (xor(sext(cmp)), -1) to ext(!cmp). + if (CastInst *Op0C = dyn_cast(Op0)) { + if (CmpInst *CI = dyn_cast(Op0C->getOperand(0))) { + if (CI->hasOneUse() && Op0C->hasOneUse()) { + Instruction::CastOps Opcode = Op0C->getOpcode(); + if (Opcode == Instruction::ZExt || Opcode == Instruction::SExt) { + if (RHS == ConstantExpr::getCast(Opcode, ConstantInt::getTrue(), + Op0C->getDestTy())) { + Instruction *NewCI = InsertNewInstBefore(CmpInst::Create( + CI->getOpcode(), CI->getInversePredicate(), + CI->getOperand(0), CI->getOperand(1)), I); + NewCI->takeName(CI); + return CastInst::Create(Opcode, NewCI, Op0C->getType()); + } + } + } + } + } + + if (BinaryOperator *Op0I = dyn_cast(Op0)) { + // ~(c-X) == X-c-1 == X+(-c-1) + if (Op0I->getOpcode() == Instruction::Sub && RHS->isAllOnesValue()) + if (Constant *Op0I0C = dyn_cast(Op0I->getOperand(0))) { + Constant *NegOp0I0C = ConstantExpr::getNeg(Op0I0C); + Constant *ConstantRHS = ConstantExpr::getSub(NegOp0I0C, + ConstantInt::get(I.getType(), 1)); + return BinaryOperator::CreateAdd(Op0I->getOperand(1), ConstantRHS); + } + + if (ConstantInt *Op0CI = dyn_cast(Op0I->getOperand(1))) { + if (Op0I->getOpcode() == Instruction::Add) { + // ~(X-c) --> (-c-1)-X + if (RHS->isAllOnesValue()) { + Constant *NegOp0CI = ConstantExpr::getNeg(Op0CI); + return BinaryOperator::CreateSub( + ConstantExpr::getSub(NegOp0CI, + ConstantInt::get(I.getType(), 1)), + Op0I->getOperand(0)); + } else if (RHS->getValue().isSignBit()) { + // (X + C) ^ signbit -> (X + C + signbit) + Constant *C = ConstantInt::get(RHS->getValue() + Op0CI->getValue()); + return BinaryOperator::CreateAdd(Op0I->getOperand(0), C); + + } + } else if (Op0I->getOpcode() == Instruction::Or) { + // (X|C1)^C2 -> X^(C1|C2) iff X&~C1 == 0 + if (MaskedValueIsZero(Op0I->getOperand(0), Op0CI->getValue())) { + Constant *NewRHS = ConstantExpr::getOr(Op0CI, RHS); + // Anything in both C1 and C2 is known to be zero, remove it from + // NewRHS. + Constant *CommonBits = And(Op0CI, RHS); + NewRHS = ConstantExpr::getAnd(NewRHS, + ConstantExpr::getNot(CommonBits)); + AddToWorkList(Op0I); + I.setOperand(0, Op0I->getOperand(0)); + I.setOperand(1, NewRHS); + return &I; + } + } + } + } + + // Try to fold constant and into select arguments. + if (SelectInst *SI = dyn_cast(Op0)) + if (Instruction *R = FoldOpIntoSelect(I, SI, this)) + return R; + if (isa(Op0)) + if (Instruction *NV = FoldOpIntoPhi(I)) + return NV; + } + + if (Value *X = dyn_castNotVal(Op0)) // ~A ^ A == -1 + if (X == Op1) + return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType())); + + if (Value *X = dyn_castNotVal(Op1)) // A ^ ~A == -1 + if (X == Op0) + return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType())); + + + BinaryOperator *Op1I = dyn_cast(Op1); + if (Op1I) { + Value *A, *B; + if (match(Op1I, m_Or(m_Value(A), m_Value(B)))) { + if (A == Op0) { // B^(B|A) == (A|B)^B + Op1I->swapOperands(); + I.swapOperands(); + std::swap(Op0, Op1); + } else if (B == Op0) { // B^(A|B) == (A|B)^B + I.swapOperands(); // Simplified below. + std::swap(Op0, Op1); + } + } else if (match(Op1I, m_Xor(m_Specific(Op0), m_Value(B)))) { + return ReplaceInstUsesWith(I, B); // A^(A^B) == B + } else if (match(Op1I, m_Xor(m_Value(A), m_Specific(Op0)))) { + return ReplaceInstUsesWith(I, A); // A^(B^A) == B + } else if (match(Op1I, m_And(m_Value(A), m_Value(B))) && Op1I->hasOneUse()){ + if (A == Op0) { // A^(A&B) -> A^(B&A) + Op1I->swapOperands(); + std::swap(A, B); + } + if (B == Op0) { // A^(B&A) -> (B&A)^A + I.swapOperands(); // Simplified below. + std::swap(Op0, Op1); + } + } + } + + BinaryOperator *Op0I = dyn_cast(Op0); + if (Op0I) { + Value *A, *B; + if (match(Op0I, m_Or(m_Value(A), m_Value(B))) && Op0I->hasOneUse()) { + if (A == Op1) // (B|A)^B == (A|B)^B + std::swap(A, B); + if (B == Op1) { // (A|B)^B == A & ~B + Instruction *NotB = + InsertNewInstBefore(BinaryOperator::CreateNot(Op1, "tmp"), I); + return BinaryOperator::CreateAnd(A, NotB); + } + } else if (match(Op0I, m_Xor(m_Specific(Op1), m_Value(B)))) { + return ReplaceInstUsesWith(I, B); // (A^B)^A == B + } else if (match(Op0I, m_Xor(m_Value(A), m_Specific(Op1)))) { + return ReplaceInstUsesWith(I, A); // (B^A)^A == B + } else if (match(Op0I, m_And(m_Value(A), m_Value(B))) && Op0I->hasOneUse()){ + if (A == Op1) // (A&B)^A -> (B&A)^A + std::swap(A, B); + if (B == Op1 && // (B&A)^A == ~B & A + !isa(Op1)) { // Canonical form is (B&C)^C + Instruction *N = + InsertNewInstBefore(BinaryOperator::CreateNot(A, "tmp"), I); + return BinaryOperator::CreateAnd(N, Op1); + } + } + } + + // (X >> Z) ^ (Y >> Z) -> (X^Y) >> Z for all shifts. + if (Op0I && Op1I && Op0I->isShift() && + Op0I->getOpcode() == Op1I->getOpcode() && + Op0I->getOperand(1) == Op1I->getOperand(1) && + (Op1I->hasOneUse() || Op1I->hasOneUse())) { + Instruction *NewOp = + InsertNewInstBefore(BinaryOperator::CreateXor(Op0I->getOperand(0), + Op1I->getOperand(0), + Op0I->getName()), I); + return BinaryOperator::Create(Op1I->getOpcode(), NewOp, + Op1I->getOperand(1)); + } + + if (Op0I && Op1I) { + Value *A, *B, *C, *D; + // (A & B)^(A | B) -> A ^ B + if (match(Op0I, m_And(m_Value(A), m_Value(B))) && + match(Op1I, m_Or(m_Value(C), m_Value(D)))) { + if ((A == C && B == D) || (A == D && B == C)) + return BinaryOperator::CreateXor(A, B); + } + // (A | B)^(A & B) -> A ^ B + if (match(Op0I, m_Or(m_Value(A), m_Value(B))) && + match(Op1I, m_And(m_Value(C), m_Value(D)))) { + if ((A == C && B == D) || (A == D && B == C)) + return BinaryOperator::CreateXor(A, B); + } + + // (A & B)^(C & D) + if ((Op0I->hasOneUse() || Op1I->hasOneUse()) && + match(Op0I, m_And(m_Value(A), m_Value(B))) && + match(Op1I, m_And(m_Value(C), m_Value(D)))) { + // (X & Y)^(X & Y) -> (Y^Z) & X + Value *X = 0, *Y = 0, *Z = 0; + if (A == C) + X = A, Y = B, Z = D; + else if (A == D) + X = A, Y = B, Z = C; + else if (B == C) + X = B, Y = A, Z = D; + else if (B == D) + X = B, Y = A, Z = C; + + if (X) { + Instruction *NewOp = + InsertNewInstBefore(BinaryOperator::CreateXor(Y, Z, Op0->getName()), I); + return BinaryOperator::CreateAnd(NewOp, X); + } + } + } + + // (icmp1 A, B) ^ (icmp2 A, B) --> (icmp3 A, B) + if (ICmpInst *RHS = dyn_cast(I.getOperand(1))) + if (Instruction *R = AssociativeOpt(I, FoldICmpLogical(*this, RHS))) + return R; + + // fold (xor (cast A), (cast B)) -> (cast (xor A, B)) + if (CastInst *Op0C = dyn_cast(Op0)) { + if (CastInst *Op1C = dyn_cast(Op1)) + if (Op0C->getOpcode() == Op1C->getOpcode()) { // same cast kind? + const Type *SrcTy = Op0C->getOperand(0)->getType(); + if (SrcTy == Op1C->getOperand(0)->getType() && SrcTy->isInteger() && + // Only do this if the casts both really cause code to be generated. + ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0), + I.getType(), TD) && + ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0), + I.getType(), TD)) { + Instruction *NewOp = BinaryOperator::CreateXor(Op0C->getOperand(0), + Op1C->getOperand(0), + I.getName()); + InsertNewInstBefore(NewOp, I); + return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType()); + } + } + } + + return Changed ? &I : 0; +} + +/// AddWithOverflow - Compute Result = In1+In2, returning true if the result +/// overflowed for this type. +static bool AddWithOverflow(ConstantInt *&Result, ConstantInt *In1, + ConstantInt *In2, bool IsSigned = false) { + Result = cast(Add(In1, In2)); + + if (IsSigned) + if (In2->getValue().isNegative()) + return Result->getValue().sgt(In1->getValue()); + else + return Result->getValue().slt(In1->getValue()); + else + return Result->getValue().ult(In1->getValue()); +} + +/// SubWithOverflow - Compute Result = In1-In2, returning true if the result +/// overflowed for this type. +static bool SubWithOverflow(ConstantInt *&Result, ConstantInt *In1, + ConstantInt *In2, bool IsSigned = false) { + Result = cast(Subtract(In1, In2)); + + if (IsSigned) + if (In2->getValue().isNegative()) + return Result->getValue().slt(In1->getValue()); + else + return Result->getValue().sgt(In1->getValue()); + else + return Result->getValue().ugt(In1->getValue()); +} + +/// EmitGEPOffset - Given a getelementptr instruction/constantexpr, emit the +/// code necessary to compute the offset from the base pointer (without adding +/// in the base pointer). Return the result as a signed integer of intptr size. +static Value *EmitGEPOffset(User *GEP, Instruction &I, InstCombiner &IC) { + TargetData &TD = IC.getTargetData(); + gep_type_iterator GTI = gep_type_begin(GEP); + const Type *IntPtrTy = TD.getIntPtrType(); + Value *Result = Constant::getNullValue(IntPtrTy); + + // Build a mask for high order bits. + unsigned IntPtrWidth = TD.getPointerSizeInBits(); + uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth); + + for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end(); i != e; + ++i, ++GTI) { + Value *Op = *i; + uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()) & PtrSizeMask; + if (ConstantInt *OpC = dyn_cast(Op)) { + if (OpC->isZero()) continue; + + // Handle a struct index, which adds its field offset to the pointer. + if (const StructType *STy = dyn_cast(*GTI)) { + Size = TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue()); + + if (ConstantInt *RC = dyn_cast(Result)) + Result = ConstantInt::get(RC->getValue() + APInt(IntPtrWidth, Size)); + else + Result = IC.InsertNewInstBefore( + BinaryOperator::CreateAdd(Result, + ConstantInt::get(IntPtrTy, Size), + GEP->getName()+".offs"), I); + continue; + } + + Constant *Scale = ConstantInt::get(IntPtrTy, Size); + Constant *OC = ConstantExpr::getIntegerCast(OpC, IntPtrTy, true /*SExt*/); + Scale = ConstantExpr::getMul(OC, Scale); + if (Constant *RC = dyn_cast(Result)) + Result = ConstantExpr::getAdd(RC, Scale); + else { + // Emit an add instruction. + Result = IC.InsertNewInstBefore( + BinaryOperator::CreateAdd(Result, Scale, + GEP->getName()+".offs"), I); + } + continue; + } + // Convert to correct type. + if (Op->getType() != IntPtrTy) { + if (Constant *OpC = dyn_cast(Op)) + Op = ConstantExpr::getIntegerCast(OpC, IntPtrTy, true); + else + Op = IC.InsertNewInstBefore(CastInst::CreateIntegerCast(Op, IntPtrTy, + true, + Op->getName()+".c"), I); + } + if (Size != 1) { + Constant *Scale = ConstantInt::get(IntPtrTy, Size); + if (Constant *OpC = dyn_cast(Op)) + Op = ConstantExpr::getMul(OpC, Scale); + else // We'll let instcombine(mul) convert this to a shl if possible. + Op = IC.InsertNewInstBefore(BinaryOperator::CreateMul(Op, Scale, + GEP->getName()+".idx"), I); + } + + // Emit an add instruction. + if (isa(Op) && isa(Result)) + Result = ConstantExpr::getAdd(cast(Op), + cast(Result)); + else + Result = IC.InsertNewInstBefore(BinaryOperator::CreateAdd(Op, Result, + GEP->getName()+".offs"), I); + } + return Result; +} + + +/// EvaluateGEPOffsetExpression - Return an value that can be used to compare of +/// the *offset* implied by GEP to zero. For example, if we have &A[i], we want +/// to return 'i' for "icmp ne i, 0". Note that, in general, indices can be +/// complex, and scales are involved. The above expression would also be legal +/// to codegen as "icmp ne (i*4), 0" (assuming A is a pointer to i32). This +/// later form is less amenable to optimization though, and we are allowed to +/// generate the first by knowing that pointer arithmetic doesn't overflow. +/// +/// If we can't emit an optimized form for this expression, this returns null. +/// +static Value *EvaluateGEPOffsetExpression(User *GEP, Instruction &I, + InstCombiner &IC) { + TargetData &TD = IC.getTargetData(); + gep_type_iterator GTI = gep_type_begin(GEP); + + // Check to see if this gep only has a single variable index. If so, and if + // any constant indices are a multiple of its scale, then we can compute this + // in terms of the scale of the variable index. For example, if the GEP + // implies an offset of "12 + i*4", then we can codegen this as "3 + i", + // because the expression will cross zero at the same point. + unsigned i, e = GEP->getNumOperands(); + int64_t Offset = 0; + for (i = 1; i != e; ++i, ++GTI) { + if (ConstantInt *CI = dyn_cast(GEP->getOperand(i))) { + // Compute the aggregate offset of constant indices. + if (CI->isZero()) continue; + + // Handle a struct index, which adds its field offset to the pointer. + if (const StructType *STy = dyn_cast(*GTI)) { + Offset += TD.getStructLayout(STy)->getElementOffset(CI->getZExtValue()); + } else { + uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()); + Offset += Size*CI->getSExtValue(); + } + } else { + // Found our variable index. + break; + } + } + + // If there are no variable indices, we must have a constant offset, just + // evaluate it the general way. + if (i == e) return 0; + + Value *VariableIdx = GEP->getOperand(i); + // Determine the scale factor of the variable element. For example, this is + // 4 if the variable index is into an array of i32. + uint64_t VariableScale = TD.getTypeAllocSize(GTI.getIndexedType()); + + // Verify that there are no other variable indices. If so, emit the hard way. + for (++i, ++GTI; i != e; ++i, ++GTI) { + ConstantInt *CI = dyn_cast(GEP->getOperand(i)); + if (!CI) return 0; + + // Compute the aggregate offset of constant indices. + if (CI->isZero()) continue; + + // Handle a struct index, which adds its field offset to the pointer. + if (const StructType *STy = dyn_cast(*GTI)) { + Offset += TD.getStructLayout(STy)->getElementOffset(CI->getZExtValue()); + } else { + uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()); + Offset += Size*CI->getSExtValue(); + } + } + + // Okay, we know we have a single variable index, which must be a + // pointer/array/vector index. If there is no offset, life is simple, return + // the index. + unsigned IntPtrWidth = TD.getPointerSizeInBits(); + if (Offset == 0) { + // Cast to intptrty in case a truncation occurs. If an extension is needed, + // we don't need to bother extending: the extension won't affect where the + // computation crosses zero. + if (VariableIdx->getType()->getPrimitiveSizeInBits() > IntPtrWidth) + VariableIdx = new TruncInst(VariableIdx, TD.getIntPtrType(), + VariableIdx->getNameStart(), &I); + return VariableIdx; + } + + // Otherwise, there is an index. The computation we will do will be modulo + // the pointer size, so get it. + uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth); + + Offset &= PtrSizeMask; + VariableScale &= PtrSizeMask; + + // To do this transformation, any constant index must be a multiple of the + // variable scale factor. For example, we can evaluate "12 + 4*i" as "3 + i", + // but we can't evaluate "10 + 3*i" in terms of i. Check that the offset is a + // multiple of the variable scale. + int64_t NewOffs = Offset / (int64_t)VariableScale; + if (Offset != NewOffs*(int64_t)VariableScale) + return 0; + + // Okay, we can do this evaluation. Start by converting the index to intptr. + const Type *IntPtrTy = TD.getIntPtrType(); + if (VariableIdx->getType() != IntPtrTy) + VariableIdx = CastInst::CreateIntegerCast(VariableIdx, IntPtrTy, + true /*SExt*/, + VariableIdx->getNameStart(), &I); + Constant *OffsetVal = ConstantInt::get(IntPtrTy, NewOffs); + return BinaryOperator::CreateAdd(VariableIdx, OffsetVal, "offset", &I); +} + + +/// FoldGEPICmp - Fold comparisons between a GEP instruction and something +/// else. At this point we know that the GEP is on the LHS of the comparison. +Instruction *InstCombiner::FoldGEPICmp(User *GEPLHS, Value *RHS, + ICmpInst::Predicate Cond, + Instruction &I) { + assert(dyn_castGetElementPtr(GEPLHS) && "LHS is not a getelementptr!"); + + // Look through bitcasts. + if (BitCastInst *BCI = dyn_cast(RHS)) + RHS = BCI->getOperand(0); + + Value *PtrBase = GEPLHS->getOperand(0); + if (PtrBase == RHS) { + // ((gep Ptr, OFFSET) cmp Ptr) ---> (OFFSET cmp 0). + // This transformation (ignoring the base and scales) is valid because we + // know pointers can't overflow. See if we can output an optimized form. + Value *Offset = EvaluateGEPOffsetExpression(GEPLHS, I, *this); + + // If not, synthesize the offset the hard way. + if (Offset == 0) + Offset = EmitGEPOffset(GEPLHS, I, *this); + return new ICmpInst(ICmpInst::getSignedPredicate(Cond), Offset, + Constant::getNullValue(Offset->getType())); + } else if (User *GEPRHS = dyn_castGetElementPtr(RHS)) { + // If the base pointers are different, but the indices are the same, just + // compare the base pointer. + if (PtrBase != GEPRHS->getOperand(0)) { + bool IndicesTheSame = GEPLHS->getNumOperands()==GEPRHS->getNumOperands(); + IndicesTheSame &= GEPLHS->getOperand(0)->getType() == + GEPRHS->getOperand(0)->getType(); + if (IndicesTheSame) + for (unsigned i = 1, e = GEPLHS->getNumOperands(); i != e; ++i) + if (GEPLHS->getOperand(i) != GEPRHS->getOperand(i)) { + IndicesTheSame = false; + break; + } + + // If all indices are the same, just compare the base pointers. + if (IndicesTheSame) + return new ICmpInst(ICmpInst::getSignedPredicate(Cond), + GEPLHS->getOperand(0), GEPRHS->getOperand(0)); + + // Otherwise, the base pointers are different and the indices are + // different, bail out. + return 0; + } + + // If one of the GEPs has all zero indices, recurse. + bool AllZeros = true; + for (unsigned i = 1, e = GEPLHS->getNumOperands(); i != e; ++i) + if (!isa(GEPLHS->getOperand(i)) || + !cast(GEPLHS->getOperand(i))->isNullValue()) { + AllZeros = false; + break; + } + if (AllZeros) + return FoldGEPICmp(GEPRHS, GEPLHS->getOperand(0), + ICmpInst::getSwappedPredicate(Cond), I); + + // If the other GEP has all zero indices, recurse. + AllZeros = true; + for (unsigned i = 1, e = GEPRHS->getNumOperands(); i != e; ++i) + if (!isa(GEPRHS->getOperand(i)) || + !cast(GEPRHS->getOperand(i))->isNullValue()) { + AllZeros = false; + break; + } + if (AllZeros) + return FoldGEPICmp(GEPLHS, GEPRHS->getOperand(0), Cond, I); + + if (GEPLHS->getNumOperands() == GEPRHS->getNumOperands()) { + // If the GEPs only differ by one index, compare it. + unsigned NumDifferences = 0; // Keep track of # differences. + unsigned DiffOperand = 0; // The operand that differs. + for (unsigned i = 1, e = GEPRHS->getNumOperands(); i != e; ++i) + if (GEPLHS->getOperand(i) != GEPRHS->getOperand(i)) { + if (GEPLHS->getOperand(i)->getType()->getPrimitiveSizeInBits() != + GEPRHS->getOperand(i)->getType()->getPrimitiveSizeInBits()) { + // Irreconcilable differences. + NumDifferences = 2; + break; + } else { + if (NumDifferences++) break; + DiffOperand = i; + } + } + + if (NumDifferences == 0) // SAME GEP? + return ReplaceInstUsesWith(I, // No comparison is needed here. + ConstantInt::get(Type::Int1Ty, + ICmpInst::isTrueWhenEqual(Cond))); + + else if (NumDifferences == 1) { + Value *LHSV = GEPLHS->getOperand(DiffOperand); + Value *RHSV = GEPRHS->getOperand(DiffOperand); + // Make sure we do a signed comparison here. + return new ICmpInst(ICmpInst::getSignedPredicate(Cond), LHSV, RHSV); + } + } + + // Only lower this if the icmp is the only user of the GEP or if we expect + // the result to fold to a constant! + if ((isa(GEPLHS) || GEPLHS->hasOneUse()) && + (isa(GEPRHS) || GEPRHS->hasOneUse())) { + // ((gep Ptr, OFFSET1) cmp (gep Ptr, OFFSET2) ---> (OFFSET1 cmp OFFSET2) + Value *L = EmitGEPOffset(GEPLHS, I, *this); + Value *R = EmitGEPOffset(GEPRHS, I, *this); + return new ICmpInst(ICmpInst::getSignedPredicate(Cond), L, R); + } + } + return 0; +} + +/// FoldFCmp_IntToFP_Cst - Fold fcmp ([us]itofp x, cst) if possible. +/// +Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I, + Instruction *LHSI, + Constant *RHSC) { + if (!isa(RHSC)) return 0; + const APFloat &RHS = cast(RHSC)->getValueAPF(); + + // Get the width of the mantissa. We don't want to hack on conversions that + // might lose information from the integer, e.g. "i64 -> float" + int MantissaWidth = LHSI->getType()->getFPMantissaWidth(); + if (MantissaWidth == -1) return 0; // Unknown. + + // Check to see that the input is converted from an integer type that is small + // enough that preserves all bits. TODO: check here for "known" sign bits. + // This would allow us to handle (fptosi (x >>s 62) to float) if x is i64 f.e. + unsigned InputSize = LHSI->getOperand(0)->getType()->getPrimitiveSizeInBits(); + + // If this is a uitofp instruction, we need an extra bit to hold the sign. + bool LHSUnsigned = isa(LHSI); + if (LHSUnsigned) + ++InputSize; + + // If the conversion would lose info, don't hack on this. + if ((int)InputSize > MantissaWidth) + return 0; + + // Otherwise, we can potentially simplify the comparison. We know that it + // will always come through as an integer value and we know the constant is + // not a NAN (it would have been previously simplified). + assert(!RHS.isNaN() && "NaN comparison not already folded!"); + + ICmpInst::Predicate Pred; + switch (I.getPredicate()) { + default: assert(0 && "Unexpected predicate!"); + case FCmpInst::FCMP_UEQ: + case FCmpInst::FCMP_OEQ: + Pred = ICmpInst::ICMP_EQ; + break; + case FCmpInst::FCMP_UGT: + case FCmpInst::FCMP_OGT: + Pred = LHSUnsigned ? ICmpInst::ICMP_UGT : ICmpInst::ICMP_SGT; + break; + case FCmpInst::FCMP_UGE: + case FCmpInst::FCMP_OGE: + Pred = LHSUnsigned ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_SGE; + break; + case FCmpInst::FCMP_ULT: + case FCmpInst::FCMP_OLT: + Pred = LHSUnsigned ? ICmpInst::ICMP_ULT : ICmpInst::ICMP_SLT; + break; + case FCmpInst::FCMP_ULE: + case FCmpInst::FCMP_OLE: + Pred = LHSUnsigned ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_SLE; + break; + case FCmpInst::FCMP_UNE: + case FCmpInst::FCMP_ONE: + Pred = ICmpInst::ICMP_NE; + break; + case FCmpInst::FCMP_ORD: + return ReplaceInstUsesWith(I, ConstantInt::getTrue()); + case FCmpInst::FCMP_UNO: + return ReplaceInstUsesWith(I, ConstantInt::getFalse()); + } + + const IntegerType *IntTy = cast(LHSI->getOperand(0)->getType()); + + // Now we know that the APFloat is a normal number, zero or inf. + + // See if the FP constant is too large for the integer. For example, + // comparing an i8 to 300.0. + unsigned IntWidth = IntTy->getPrimitiveSizeInBits(); + + if (!LHSUnsigned) { + // If the RHS value is > SignedMax, fold the comparison. This handles +INF + // and large values. + APFloat SMax(RHS.getSemantics(), APFloat::fcZero, false); + SMax.convertFromAPInt(APInt::getSignedMaxValue(IntWidth), true, + APFloat::rmNearestTiesToEven); + if (SMax.compare(RHS) == APFloat::cmpLessThan) { // smax < 13123.0 + if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SLT || + Pred == ICmpInst::ICMP_SLE) + return ReplaceInstUsesWith(I, ConstantInt::getTrue()); + return ReplaceInstUsesWith(I, ConstantInt::getFalse()); + } + } else { + // If the RHS value is > UnsignedMax, fold the comparison. This handles + // +INF and large values. + APFloat UMax(RHS.getSemantics(), APFloat::fcZero, false); + UMax.convertFromAPInt(APInt::getMaxValue(IntWidth), false, + APFloat::rmNearestTiesToEven); + if (UMax.compare(RHS) == APFloat::cmpLessThan) { // umax < 13123.0 + if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_ULT || + Pred == ICmpInst::ICMP_ULE) + return ReplaceInstUsesWith(I, ConstantInt::getTrue()); + return ReplaceInstUsesWith(I, ConstantInt::getFalse()); + } + } + + if (!LHSUnsigned) { + // See if the RHS value is < SignedMin. + APFloat SMin(RHS.getSemantics(), APFloat::fcZero, false); + SMin.convertFromAPInt(APInt::getSignedMinValue(IntWidth), true, + APFloat::rmNearestTiesToEven); + if (SMin.compare(RHS) == APFloat::cmpGreaterThan) { // smin > 12312.0 + if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SGT || + Pred == ICmpInst::ICMP_SGE) + return ReplaceInstUsesWith(I,ConstantInt::getTrue()); + return ReplaceInstUsesWith(I, ConstantInt::getFalse()); + } + } + + // Okay, now we know that the FP constant fits in the range [SMIN, SMAX] or + // [0, UMAX], but it may still be fractional. See if it is fractional by + // casting the FP value to the integer value and back, checking for equality. + // Don't do this for zero, because -0.0 is not fractional. + Constant *RHSInt = LHSUnsigned + ? ConstantExpr::getFPToUI(RHSC, IntTy) + : ConstantExpr::getFPToSI(RHSC, IntTy); + if (!RHS.isZero()) { + bool Equal = LHSUnsigned + ? ConstantExpr::getUIToFP(RHSInt, RHSC->getType()) == RHSC + : ConstantExpr::getSIToFP(RHSInt, RHSC->getType()) == RHSC; + if (!Equal) { + // If we had a comparison against a fractional value, we have to adjust + // the compare predicate and sometimes the value. RHSC is rounded towards + // zero at this point. + switch (Pred) { + default: assert(0 && "Unexpected integer comparison!"); + case ICmpInst::ICMP_NE: // (float)int != 4.4 --> true + return ReplaceInstUsesWith(I, ConstantInt::getTrue()); + case ICmpInst::ICMP_EQ: // (float)int == 4.4 --> false + return ReplaceInstUsesWith(I, ConstantInt::getFalse()); + case ICmpInst::ICMP_ULE: + // (float)int <= 4.4 --> int <= 4 + // (float)int <= -4.4 --> false + if (RHS.isNegative()) + return ReplaceInstUsesWith(I, ConstantInt::getFalse()); + break; + case ICmpInst::ICMP_SLE: + // (float)int <= 4.4 --> int <= 4 + // (float)int <= -4.4 --> int < -4 + if (RHS.isNegative()) + Pred = ICmpInst::ICMP_SLT; + break; + case ICmpInst::ICMP_ULT: + // (float)int < -4.4 --> false + // (float)int < 4.4 --> int <= 4 + if (RHS.isNegative()) + return ReplaceInstUsesWith(I, ConstantInt::getFalse()); + Pred = ICmpInst::ICMP_ULE; + break; + case ICmpInst::ICMP_SLT: + // (float)int < -4.4 --> int < -4 + // (float)int < 4.4 --> int <= 4 + if (!RHS.isNegative()) + Pred = ICmpInst::ICMP_SLE; + break; + case ICmpInst::ICMP_UGT: + // (float)int > 4.4 --> int > 4 + // (float)int > -4.4 --> true + if (RHS.isNegative()) + return ReplaceInstUsesWith(I, ConstantInt::getTrue()); + break; + case ICmpInst::ICMP_SGT: + // (float)int > 4.4 --> int > 4 + // (float)int > -4.4 --> int >= -4 + if (RHS.isNegative()) + Pred = ICmpInst::ICMP_SGE; + break; + case ICmpInst::ICMP_UGE: + // (float)int >= -4.4 --> true + // (float)int >= 4.4 --> int > 4 + if (!RHS.isNegative()) + return ReplaceInstUsesWith(I, ConstantInt::getTrue()); + Pred = ICmpInst::ICMP_UGT; + break; + case ICmpInst::ICMP_SGE: + // (float)int >= -4.4 --> int >= -4 + // (float)int >= 4.4 --> int > 4 + if (!RHS.isNegative()) + Pred = ICmpInst::ICMP_SGT; + break; + } + } + } + + // Lower this FP comparison into an appropriate integer version of the + // comparison. + return new ICmpInst(Pred, LHSI->getOperand(0), RHSInt); +} + +Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) { + bool Changed = SimplifyCompare(I); + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + + // Fold trivial predicates. + if (I.getPredicate() == FCmpInst::FCMP_FALSE) + return ReplaceInstUsesWith(I, ConstantInt::getFalse()); + if (I.getPredicate() == FCmpInst::FCMP_TRUE) + return ReplaceInstUsesWith(I, ConstantInt::getTrue()); + + // Simplify 'fcmp pred X, X' + if (Op0 == Op1) { + switch (I.getPredicate()) { + default: assert(0 && "Unknown predicate!"); + case FCmpInst::FCMP_UEQ: // True if unordered or equal + case FCmpInst::FCMP_UGE: // True if unordered, greater than, or equal + case FCmpInst::FCMP_ULE: // True if unordered, less than, or equal + return ReplaceInstUsesWith(I, ConstantInt::getTrue()); + case FCmpInst::FCMP_OGT: // True if ordered and greater than + case FCmpInst::FCMP_OLT: // True if ordered and less than + case FCmpInst::FCMP_ONE: // True if ordered and operands are unequal + return ReplaceInstUsesWith(I, ConstantInt::getFalse()); + + case FCmpInst::FCMP_UNO: // True if unordered: isnan(X) | isnan(Y) + case FCmpInst::FCMP_ULT: // True if unordered or less than + case FCmpInst::FCMP_UGT: // True if unordered or greater than + case FCmpInst::FCMP_UNE: // True if unordered or not equal + // Canonicalize these to be 'fcmp uno %X, 0.0'. + I.setPredicate(FCmpInst::FCMP_UNO); + I.setOperand(1, Constant::getNullValue(Op0->getType())); + return &I; + + case FCmpInst::FCMP_ORD: // True if ordered (no nans) + case FCmpInst::FCMP_OEQ: // True if ordered and equal + case FCmpInst::FCMP_OGE: // True if ordered and greater than or equal + case FCmpInst::FCMP_OLE: // True if ordered and less than or equal + // Canonicalize these to be 'fcmp ord %X, 0.0'. + I.setPredicate(FCmpInst::FCMP_ORD); + I.setOperand(1, Constant::getNullValue(Op0->getType())); + return &I; + } + } + + if (isa(Op1)) // fcmp pred X, undef -> undef + return ReplaceInstUsesWith(I, UndefValue::get(Type::Int1Ty)); + + // Handle fcmp with constant RHS + if (Constant *RHSC = dyn_cast(Op1)) { + // If the constant is a nan, see if we can fold the comparison based on it. + if (ConstantFP *CFP = dyn_cast(RHSC)) { + if (CFP->getValueAPF().isNaN()) { + if (FCmpInst::isOrdered(I.getPredicate())) // True if ordered and... + return ReplaceInstUsesWith(I, ConstantInt::getFalse()); + assert(FCmpInst::isUnordered(I.getPredicate()) && + "Comparison must be either ordered or unordered!"); + // True if unordered. + return ReplaceInstUsesWith(I, ConstantInt::getTrue()); + } + } + + if (Instruction *LHSI = dyn_cast(Op0)) + switch (LHSI->getOpcode()) { + case Instruction::PHI: + // Only fold fcmp into the PHI if the phi and fcmp are in the same + // block. If in the same block, we're encouraging jump threading. If + // not, we are just pessimizing the code by making an i1 phi. + if (LHSI->getParent() == I.getParent()) + if (Instruction *NV = FoldOpIntoPhi(I)) + return NV; + break; + case Instruction::SIToFP: + case Instruction::UIToFP: + if (Instruction *NV = FoldFCmp_IntToFP_Cst(I, LHSI, RHSC)) + return NV; + break; + case Instruction::Select: + // If either operand of the select is a constant, we can fold the + // comparison into the select arms, which will cause one to be + // constant folded and the select turned into a bitwise or. + Value *Op1 = 0, *Op2 = 0; + if (LHSI->hasOneUse()) { + if (Constant *C = dyn_cast(LHSI->getOperand(1))) { + // Fold the known value into the constant operand. + Op1 = ConstantExpr::getCompare(I.getPredicate(), C, RHSC); + // Insert a new FCmp of the other select operand. + Op2 = InsertNewInstBefore(new FCmpInst(I.getPredicate(), + LHSI->getOperand(2), RHSC, + I.getName()), I); + } else if (Constant *C = dyn_cast(LHSI->getOperand(2))) { + // Fold the known value into the constant operand. + Op2 = ConstantExpr::getCompare(I.getPredicate(), C, RHSC); + // Insert a new FCmp of the other select operand. + Op1 = InsertNewInstBefore(new FCmpInst(I.getPredicate(), + LHSI->getOperand(1), RHSC, + I.getName()), I); + } + } + + if (Op1) + return SelectInst::Create(LHSI->getOperand(0), Op1, Op2); + break; + } + } + + return Changed ? &I : 0; +} + +Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { + bool Changed = SimplifyCompare(I); + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + const Type *Ty = Op0->getType(); + + // icmp X, X + if (Op0 == Op1) + return ReplaceInstUsesWith(I, ConstantInt::get(Type::Int1Ty, + I.isTrueWhenEqual())); + + if (isa(Op1)) // X icmp undef -> undef + return ReplaceInstUsesWith(I, UndefValue::get(Type::Int1Ty)); + + // icmp , - Global/Stack value + // addresses never equal each other! We already know that Op0 != Op1. + if ((isa(Op0) || isa(Op0) || + isa(Op0)) && + (isa(Op1) || isa(Op1) || + isa(Op1))) + return ReplaceInstUsesWith(I, ConstantInt::get(Type::Int1Ty, + !I.isTrueWhenEqual())); + + // icmp's with boolean values can always be turned into bitwise operations + if (Ty == Type::Int1Ty) { + switch (I.getPredicate()) { + default: assert(0 && "Invalid icmp instruction!"); + case ICmpInst::ICMP_EQ: { // icmp eq i1 A, B -> ~(A^B) + Instruction *Xor = BinaryOperator::CreateXor(Op0, Op1, I.getName()+"tmp"); + InsertNewInstBefore(Xor, I); + return BinaryOperator::CreateNot(Xor); + } + case ICmpInst::ICMP_NE: // icmp eq i1 A, B -> A^B + return BinaryOperator::CreateXor(Op0, Op1); + + case ICmpInst::ICMP_UGT: + std::swap(Op0, Op1); // Change icmp ugt -> icmp ult + // FALL THROUGH + case ICmpInst::ICMP_ULT:{ // icmp ult i1 A, B -> ~A & B + Instruction *Not = BinaryOperator::CreateNot(Op0, I.getName()+"tmp"); + InsertNewInstBefore(Not, I); + return BinaryOperator::CreateAnd(Not, Op1); + } + case ICmpInst::ICMP_SGT: + std::swap(Op0, Op1); // Change icmp sgt -> icmp slt + // FALL THROUGH + case ICmpInst::ICMP_SLT: { // icmp slt i1 A, B -> A & ~B + Instruction *Not = BinaryOperator::CreateNot(Op1, I.getName()+"tmp"); + InsertNewInstBefore(Not, I); + return BinaryOperator::CreateAnd(Not, Op0); + } + case ICmpInst::ICMP_UGE: + std::swap(Op0, Op1); // Change icmp uge -> icmp ule + // FALL THROUGH + case ICmpInst::ICMP_ULE: { // icmp ule i1 A, B -> ~A | B + Instruction *Not = BinaryOperator::CreateNot(Op0, I.getName()+"tmp"); + InsertNewInstBefore(Not, I); + return BinaryOperator::CreateOr(Not, Op1); + } + case ICmpInst::ICMP_SGE: + std::swap(Op0, Op1); // Change icmp sge -> icmp sle + // FALL THROUGH + case ICmpInst::ICMP_SLE: { // icmp sle i1 A, B -> A | ~B + Instruction *Not = BinaryOperator::CreateNot(Op1, I.getName()+"tmp"); + InsertNewInstBefore(Not, I); + return BinaryOperator::CreateOr(Not, Op0); + } + } + } + + unsigned BitWidth = 0; + if (TD) + BitWidth = TD->getTypeSizeInBits(Ty); + else if (isa(Ty)) + BitWidth = Ty->getPrimitiveSizeInBits(); + + bool isSignBit = false; + + // See if we are doing a comparison with a constant. + if (ConstantInt *CI = dyn_cast(Op1)) { + Value *A = 0, *B = 0; + + // (icmp ne/eq (sub A B) 0) -> (icmp ne/eq A, B) + if (I.isEquality() && CI->isNullValue() && + match(Op0, m_Sub(m_Value(A), m_Value(B)))) { + // (icmp cond A B) if cond is equality + return new ICmpInst(I.getPredicate(), A, B); + } + + // If we have an icmp le or icmp ge instruction, turn it into the + // appropriate icmp lt or icmp gt instruction. This allows us to rely on + // them being folded in the code below. + switch (I.getPredicate()) { + default: break; + case ICmpInst::ICMP_ULE: + if (CI->isMaxValue(false)) // A <=u MAX -> TRUE + return ReplaceInstUsesWith(I, ConstantInt::getTrue()); + return new ICmpInst(ICmpInst::ICMP_ULT, Op0, AddOne(CI)); + case ICmpInst::ICMP_SLE: + if (CI->isMaxValue(true)) // A <=s MAX -> TRUE + return ReplaceInstUsesWith(I, ConstantInt::getTrue()); + return new ICmpInst(ICmpInst::ICMP_SLT, Op0, AddOne(CI)); + case ICmpInst::ICMP_UGE: + if (CI->isMinValue(false)) // A >=u MIN -> TRUE + return ReplaceInstUsesWith(I, ConstantInt::getTrue()); + return new ICmpInst( ICmpInst::ICMP_UGT, Op0, SubOne(CI)); + case ICmpInst::ICMP_SGE: + if (CI->isMinValue(true)) // A >=s MIN -> TRUE + return ReplaceInstUsesWith(I, ConstantInt::getTrue()); + return new ICmpInst(ICmpInst::ICMP_SGT, Op0, SubOne(CI)); + } + + // If this comparison is a normal comparison, it demands all + // bits, if it is a sign bit comparison, it only demands the sign bit. + bool UnusedBit; + isSignBit = isSignBitCheck(I.getPredicate(), CI, UnusedBit); + } + + // See if we can fold the comparison based on range information we can get + // by checking whether bits are known to be zero or one in the input. + if (BitWidth != 0) { + APInt Op0KnownZero(BitWidth, 0), Op0KnownOne(BitWidth, 0); + APInt Op1KnownZero(BitWidth, 0), Op1KnownOne(BitWidth, 0); + + if (SimplifyDemandedBits(I.getOperandUse(0), + isSignBit ? APInt::getSignBit(BitWidth) + : APInt::getAllOnesValue(BitWidth), + Op0KnownZero, Op0KnownOne, 0)) + return &I; + if (SimplifyDemandedBits(I.getOperandUse(1), + APInt::getAllOnesValue(BitWidth), + Op1KnownZero, Op1KnownOne, 0)) + return &I; + + // Given the known and unknown bits, compute a range that the LHS could be + // in. Compute the Min, Max and RHS values based on the known bits. For the + // EQ and NE we use unsigned values. + APInt Op0Min(BitWidth, 0), Op0Max(BitWidth, 0); + APInt Op1Min(BitWidth, 0), Op1Max(BitWidth, 0); + if (ICmpInst::isSignedPredicate(I.getPredicate())) { + ComputeSignedMinMaxValuesFromKnownBits(Op0KnownZero, Op0KnownOne, + Op0Min, Op0Max); + ComputeSignedMinMaxValuesFromKnownBits(Op1KnownZero, Op1KnownOne, + Op1Min, Op1Max); + } else { + ComputeUnsignedMinMaxValuesFromKnownBits(Op0KnownZero, Op0KnownOne, + Op0Min, Op0Max); + ComputeUnsignedMinMaxValuesFromKnownBits(Op1KnownZero, Op1KnownOne, + Op1Min, Op1Max); + } + + // If Min and Max are known to be the same, then SimplifyDemandedBits + // figured out that the LHS is a constant. Just constant fold this now so + // that code below can assume that Min != Max. + if (!isa(Op0) && Op0Min == Op0Max) + return new ICmpInst(I.getPredicate(), ConstantInt::get(Op0Min), Op1); + if (!isa(Op1) && Op1Min == Op1Max) + return new ICmpInst(I.getPredicate(), Op0, ConstantInt::get(Op1Min)); + + // Based on the range information we know about the LHS, see if we can + // simplify this comparison. For example, (x&4) < 8 is always true. + switch (I.getPredicate()) { + default: assert(0 && "Unknown icmp opcode!"); + case ICmpInst::ICMP_EQ: + if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max)) + return ReplaceInstUsesWith(I, ConstantInt::getFalse()); + break; + case ICmpInst::ICMP_NE: + if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max)) + return ReplaceInstUsesWith(I, ConstantInt::getTrue()); + break; + case ICmpInst::ICMP_ULT: + if (Op0Max.ult(Op1Min)) // A true if max(A) < min(B) + return ReplaceInstUsesWith(I, ConstantInt::getTrue()); + if (Op0Min.uge(Op1Max)) // A false if min(A) >= max(B) + return ReplaceInstUsesWith(I, ConstantInt::getFalse()); + if (Op1Min == Op0Max) // A A != B if max(A) == min(B) + return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); + if (ConstantInt *CI = dyn_cast(Op1)) { + if (Op1Max == Op0Min+1) // A A == C-1 if min(A)+1 == C + return new ICmpInst(ICmpInst::ICMP_EQ, Op0, SubOne(CI)); + + // (x (x >s -1) -> true if sign bit clear + if (CI->isMinValue(true)) + return new ICmpInst(ICmpInst::ICMP_SGT, Op0, + ConstantInt::getAllOnesValue(Op0->getType())); + } + break; + case ICmpInst::ICMP_UGT: + if (Op0Min.ugt(Op1Max)) // A >u B -> true if min(A) > max(B) + return ReplaceInstUsesWith(I, ConstantInt::getTrue()); + if (Op0Max.ule(Op1Min)) // A >u B -> false if max(A) <= max(B) + return ReplaceInstUsesWith(I, ConstantInt::getFalse()); + + if (Op1Max == Op0Min) // A >u B -> A != B if min(A) == max(B) + return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); + if (ConstantInt *CI = dyn_cast(Op1)) { + if (Op1Min == Op0Max-1) // A >u C -> A == C+1 if max(a)-1 == C + return new ICmpInst(ICmpInst::ICMP_EQ, Op0, AddOne(CI)); + + // (x >u 2147483647) -> (x true if sign bit set + if (CI->isMaxValue(true)) + return new ICmpInst(ICmpInst::ICMP_SLT, Op0, + ConstantInt::getNullValue(Op0->getType())); + } + break; + case ICmpInst::ICMP_SLT: + if (Op0Max.slt(Op1Min)) // A true if max(A) < min(C) + return ReplaceInstUsesWith(I, ConstantInt::getTrue()); + if (Op0Min.sge(Op1Max)) // A false if min(A) >= max(C) + return ReplaceInstUsesWith(I, ConstantInt::getFalse()); + if (Op1Min == Op0Max) // A A != B if max(A) == min(B) + return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); + if (ConstantInt *CI = dyn_cast(Op1)) { + if (Op1Max == Op0Min+1) // A A == C-1 if min(A)+1 == C + return new ICmpInst(ICmpInst::ICMP_EQ, Op0, SubOne(CI)); + } + break; + case ICmpInst::ICMP_SGT: + if (Op0Min.sgt(Op1Max)) // A >s B -> true if min(A) > max(B) + return ReplaceInstUsesWith(I, ConstantInt::getTrue()); + if (Op0Max.sle(Op1Min)) // A >s B -> false if max(A) <= min(B) + return ReplaceInstUsesWith(I, ConstantInt::getFalse()); + + if (Op1Max == Op0Min) // A >s B -> A != B if min(A) == max(B) + return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); + if (ConstantInt *CI = dyn_cast(Op1)) { + if (Op1Min == Op0Max-1) // A >s C -> A == C+1 if max(A)-1 == C + return new ICmpInst(ICmpInst::ICMP_EQ, Op0, AddOne(CI)); + } + break; + case ICmpInst::ICMP_SGE: + assert(!isa(Op1) && "ICMP_SGE with ConstantInt not folded!"); + if (Op0Min.sge(Op1Max)) // A >=s B -> true if min(A) >= max(B) + return ReplaceInstUsesWith(I, ConstantInt::getTrue()); + if (Op0Max.slt(Op1Min)) // A >=s B -> false if max(A) < min(B) + return ReplaceInstUsesWith(I, ConstantInt::getFalse()); + break; + case ICmpInst::ICMP_SLE: + assert(!isa(Op1) && "ICMP_SLE with ConstantInt not folded!"); + if (Op0Max.sle(Op1Min)) // A <=s B -> true if max(A) <= min(B) + return ReplaceInstUsesWith(I, ConstantInt::getTrue()); + if (Op0Min.sgt(Op1Max)) // A <=s B -> false if min(A) > max(B) + return ReplaceInstUsesWith(I, ConstantInt::getFalse()); + break; + case ICmpInst::ICMP_UGE: + assert(!isa(Op1) && "ICMP_UGE with ConstantInt not folded!"); + if (Op0Min.uge(Op1Max)) // A >=u B -> true if min(A) >= max(B) + return ReplaceInstUsesWith(I, ConstantInt::getTrue()); + if (Op0Max.ult(Op1Min)) // A >=u B -> false if max(A) < min(B) + return ReplaceInstUsesWith(I, ConstantInt::getFalse()); + break; + case ICmpInst::ICMP_ULE: + assert(!isa(Op1) && "ICMP_ULE with ConstantInt not folded!"); + if (Op0Max.ule(Op1Min)) // A <=u B -> true if max(A) <= min(B) + return ReplaceInstUsesWith(I, ConstantInt::getTrue()); + if (Op0Min.ugt(Op1Max)) // A <=u B -> false if min(A) > max(B) + return ReplaceInstUsesWith(I, ConstantInt::getFalse()); + break; + } + + // Turn a signed comparison into an unsigned one if both operands + // are known to have the same sign. + if (I.isSignedPredicate() && + ((Op0KnownZero.isNegative() && Op1KnownZero.isNegative()) || + (Op0KnownOne.isNegative() && Op1KnownOne.isNegative()))) + return new ICmpInst(I.getUnsignedPredicate(), Op0, Op1); + } + + // Test if the ICmpInst instruction is used exclusively by a select as + // part of a minimum or maximum operation. If so, refrain from doing + // any other folding. This helps out other analyses which understand + // non-obfuscated minimum and maximum idioms, such as ScalarEvolution + // and CodeGen. And in this case, at least one of the comparison + // operands has at least one user besides the compare (the select), + // which would often largely negate the benefit of folding anyway. + if (I.hasOneUse()) + if (SelectInst *SI = dyn_cast(*I.use_begin())) + if ((SI->getOperand(1) == Op0 && SI->getOperand(2) == Op1) || + (SI->getOperand(2) == Op0 && SI->getOperand(1) == Op1)) + return 0; + + // See if we are doing a comparison between a constant and an instruction that + // can be folded into the comparison. + if (ConstantInt *CI = dyn_cast(Op1)) { + // Since the RHS is a ConstantInt (CI), if the left hand side is an + // instruction, see if that instruction also has constants so that the + // instruction can be folded into the icmp + if (Instruction *LHSI = dyn_cast(Op0)) + if (Instruction *Res = visitICmpInstWithInstAndIntCst(I, LHSI, CI)) + return Res; + } + + // Handle icmp with constant (but not simple integer constant) RHS + if (Constant *RHSC = dyn_cast(Op1)) { + if (Instruction *LHSI = dyn_cast(Op0)) + switch (LHSI->getOpcode()) { + case Instruction::GetElementPtr: + if (RHSC->isNullValue()) { + // icmp pred GEP (P, int 0, int 0, int 0), null -> icmp pred P, null + bool isAllZeros = true; + for (unsigned i = 1, e = LHSI->getNumOperands(); i != e; ++i) + if (!isa(LHSI->getOperand(i)) || + !cast(LHSI->getOperand(i))->isNullValue()) { + isAllZeros = false; + break; + } + if (isAllZeros) + return new ICmpInst(I.getPredicate(), LHSI->getOperand(0), + Constant::getNullValue(LHSI->getOperand(0)->getType())); + } + break; + + case Instruction::PHI: + // Only fold icmp into the PHI if the phi and fcmp are in the same + // block. If in the same block, we're encouraging jump threading. If + // not, we are just pessimizing the code by making an i1 phi. + if (LHSI->getParent() == I.getParent()) + if (Instruction *NV = FoldOpIntoPhi(I)) + return NV; + break; + case Instruction::Select: { + // If either operand of the select is a constant, we can fold the + // comparison into the select arms, which will cause one to be + // constant folded and the select turned into a bitwise or. + Value *Op1 = 0, *Op2 = 0; + if (LHSI->hasOneUse()) { + if (Constant *C = dyn_cast(LHSI->getOperand(1))) { + // Fold the known value into the constant operand. + Op1 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC); + // Insert a new ICmp of the other select operand. + Op2 = InsertNewInstBefore(new ICmpInst(I.getPredicate(), + LHSI->getOperand(2), RHSC, + I.getName()), I); + } else if (Constant *C = dyn_cast(LHSI->getOperand(2))) { + // Fold the known value into the constant operand. + Op2 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC); + // Insert a new ICmp of the other select operand. + Op1 = InsertNewInstBefore(new ICmpInst(I.getPredicate(), + LHSI->getOperand(1), RHSC, + I.getName()), I); + } + } + + if (Op1) + return SelectInst::Create(LHSI->getOperand(0), Op1, Op2); + break; + } + case Instruction::Malloc: + // If we have (malloc != null), and if the malloc has a single use, we + // can assume it is successful and remove the malloc. + if (LHSI->hasOneUse() && isa(RHSC)) { + AddToWorkList(LHSI); + return ReplaceInstUsesWith(I, ConstantInt::get(Type::Int1Ty, + !I.isTrueWhenEqual())); + } + break; + } + } + + // If we can optimize a 'icmp GEP, P' or 'icmp P, GEP', do so now. + if (User *GEP = dyn_castGetElementPtr(Op0)) + if (Instruction *NI = FoldGEPICmp(GEP, Op1, I.getPredicate(), I)) + return NI; + if (User *GEP = dyn_castGetElementPtr(Op1)) + if (Instruction *NI = FoldGEPICmp(GEP, Op0, + ICmpInst::getSwappedPredicate(I.getPredicate()), I)) + return NI; + + // Test to see if the operands of the icmp are casted versions of other + // values. If the ptr->ptr cast can be stripped off both arguments, we do so + // now. + if (BitCastInst *CI = dyn_cast(Op0)) { + if (isa(Op0->getType()) && + (isa(Op1) || isa(Op1))) { + // We keep moving the cast from the left operand over to the right + // operand, where it can often be eliminated completely. + Op0 = CI->getOperand(0); + + // If operand #1 is a bitcast instruction, it must also be a ptr->ptr cast + // so eliminate it as well. + if (BitCastInst *CI2 = dyn_cast(Op1)) + Op1 = CI2->getOperand(0); + + // If Op1 is a constant, we can fold the cast into the constant. + if (Op0->getType() != Op1->getType()) { + if (Constant *Op1C = dyn_cast(Op1)) { + Op1 = ConstantExpr::getBitCast(Op1C, Op0->getType()); + } else { + // Otherwise, cast the RHS right before the icmp + Op1 = InsertBitCastBefore(Op1, Op0->getType(), I); + } + } + return new ICmpInst(I.getPredicate(), Op0, Op1); + } + } + + if (isa(Op0)) { + // Handle the special case of: icmp (cast bool to X), + // This comes up when you have code like + // int X = A < B; + // if (X) ... + // For generality, we handle any zero-extension of any operand comparison + // with a constant or another cast from the same type. + if (isa(Op1) || isa(Op1)) + if (Instruction *R = visitICmpInstWithCastAndCast(I)) + return R; + } + + // See if it's the same type of instruction on the left and right. + if (BinaryOperator *Op0I = dyn_cast(Op0)) { + if (BinaryOperator *Op1I = dyn_cast(Op1)) { + if (Op0I->getOpcode() == Op1I->getOpcode() && Op0I->hasOneUse() && + Op1I->hasOneUse() && Op0I->getOperand(1) == Op1I->getOperand(1)) { + switch (Op0I->getOpcode()) { + default: break; + case Instruction::Add: + case Instruction::Sub: + case Instruction::Xor: + if (I.isEquality()) // a+x icmp eq/ne b+x --> a icmp b + return new ICmpInst(I.getPredicate(), Op0I->getOperand(0), + Op1I->getOperand(0)); + // icmp u/s (a ^ signbit), (b ^ signbit) --> icmp s/u a, b + if (ConstantInt *CI = dyn_cast(Op0I->getOperand(1))) { + if (CI->getValue().isSignBit()) { + ICmpInst::Predicate Pred = I.isSignedPredicate() + ? I.getUnsignedPredicate() + : I.getSignedPredicate(); + return new ICmpInst(Pred, Op0I->getOperand(0), + Op1I->getOperand(0)); + } + + if (CI->getValue().isMaxSignedValue()) { + ICmpInst::Predicate Pred = I.isSignedPredicate() + ? I.getUnsignedPredicate() + : I.getSignedPredicate(); + Pred = I.getSwappedPredicate(Pred); + return new ICmpInst(Pred, Op0I->getOperand(0), + Op1I->getOperand(0)); + } + } + break; + case Instruction::Mul: + if (!I.isEquality()) + break; + + if (ConstantInt *CI = dyn_cast(Op0I->getOperand(1))) { + // a * Cst icmp eq/ne b * Cst --> a & Mask icmp b & Mask + // Mask = -1 >> count-trailing-zeros(Cst). + if (!CI->isZero() && !CI->isOne()) { + const APInt &AP = CI->getValue(); + ConstantInt *Mask = ConstantInt::get( + APInt::getLowBitsSet(AP.getBitWidth(), + AP.getBitWidth() - + AP.countTrailingZeros())); + Instruction *And1 = BinaryOperator::CreateAnd(Op0I->getOperand(0), + Mask); + Instruction *And2 = BinaryOperator::CreateAnd(Op1I->getOperand(0), + Mask); + InsertNewInstBefore(And1, I); + InsertNewInstBefore(And2, I); + return new ICmpInst(I.getPredicate(), And1, And2); + } + } + break; + } + } + } + } + + // ~x < ~y --> y < x + { Value *A, *B; + if (match(Op0, m_Not(m_Value(A))) && + match(Op1, m_Not(m_Value(B)))) + return new ICmpInst(I.getPredicate(), B, A); + } + + if (I.isEquality()) { + Value *A, *B, *C, *D; + + // -x == -y --> x == y + if (match(Op0, m_Neg(m_Value(A))) && + match(Op1, m_Neg(m_Value(B)))) + return new ICmpInst(I.getPredicate(), A, B); + + if (match(Op0, m_Xor(m_Value(A), m_Value(B)))) { + if (A == Op1 || B == Op1) { // (A^B) == A -> B == 0 + Value *OtherVal = A == Op1 ? B : A; + return new ICmpInst(I.getPredicate(), OtherVal, + Constant::getNullValue(A->getType())); + } + + if (match(Op1, m_Xor(m_Value(C), m_Value(D)))) { + // A^c1 == C^c2 --> A == C^(c1^c2) + ConstantInt *C1, *C2; + if (match(B, m_ConstantInt(C1)) && + match(D, m_ConstantInt(C2)) && Op1->hasOneUse()) { + Constant *NC = ConstantInt::get(C1->getValue() ^ C2->getValue()); + Instruction *Xor = BinaryOperator::CreateXor(C, NC, "tmp"); + return new ICmpInst(I.getPredicate(), A, + InsertNewInstBefore(Xor, I)); + } + + // A^B == A^D -> B == D + if (A == C) return new ICmpInst(I.getPredicate(), B, D); + if (A == D) return new ICmpInst(I.getPredicate(), B, C); + if (B == C) return new ICmpInst(I.getPredicate(), A, D); + if (B == D) return new ICmpInst(I.getPredicate(), A, C); + } + } + + if (match(Op1, m_Xor(m_Value(A), m_Value(B))) && + (A == Op0 || B == Op0)) { + // A == (A^B) -> B == 0 + Value *OtherVal = A == Op0 ? B : A; + return new ICmpInst(I.getPredicate(), OtherVal, + Constant::getNullValue(A->getType())); + } + + // (A-B) == A -> B == 0 + if (match(Op0, m_Sub(m_Specific(Op1), m_Value(B)))) + return new ICmpInst(I.getPredicate(), B, + Constant::getNullValue(B->getType())); + + // A == (A-B) -> B == 0 + if (match(Op1, m_Sub(m_Specific(Op0), m_Value(B)))) + return new ICmpInst(I.getPredicate(), B, + Constant::getNullValue(B->getType())); + + // (X&Z) == (Y&Z) -> (X^Y) & Z == 0 + if (Op0->hasOneUse() && Op1->hasOneUse() && + match(Op0, m_And(m_Value(A), m_Value(B))) && + match(Op1, m_And(m_Value(C), m_Value(D)))) { + Value *X = 0, *Y = 0, *Z = 0; + + if (A == C) { + X = B; Y = D; Z = A; + } else if (A == D) { + X = B; Y = C; Z = A; + } else if (B == C) { + X = A; Y = D; Z = B; + } else if (B == D) { + X = A; Y = C; Z = B; + } + + if (X) { // Build (X^Y) & Z + Op1 = InsertNewInstBefore(BinaryOperator::CreateXor(X, Y, "tmp"), I); + Op1 = InsertNewInstBefore(BinaryOperator::CreateAnd(Op1, Z, "tmp"), I); + I.setOperand(0, Op1); + I.setOperand(1, Constant::getNullValue(Op1->getType())); + return &I; + } + } + } + return Changed ? &I : 0; +} + + +/// FoldICmpDivCst - Fold "icmp pred, ([su]div X, DivRHS), CmpRHS" where DivRHS +/// and CmpRHS are both known to be integer constants. +Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, + ConstantInt *DivRHS) { + ConstantInt *CmpRHS = cast(ICI.getOperand(1)); + const APInt &CmpRHSV = CmpRHS->getValue(); + + // FIXME: If the operand types don't match the type of the divide + // then don't attempt this transform. The code below doesn't have the + // logic to deal with a signed divide and an unsigned compare (and + // vice versa). This is because (x /s C1) getOpcode() == Instruction::SDiv; + if (!ICI.isEquality() && DivIsSigned != ICI.isSignedPredicate()) + return 0; + if (DivRHS->isZero()) + return 0; // The ProdOV computation fails on divide by zero. + if (DivIsSigned && DivRHS->isAllOnesValue()) + return 0; // The overflow computation also screws up here + if (DivRHS->isOne()) + return 0; // Not worth bothering, and eliminates some funny cases + // with INT_MIN. + + // Compute Prod = CI * DivRHS. We are essentially solving an equation + // of form X/C1=C2. We solve for X by multiplying C1 (DivRHS) and + // C2 (CI). By solving for X we can turn this into a range check + // instead of computing a divide. + ConstantInt *Prod = Multiply(CmpRHS, DivRHS); + + // Determine if the product overflows by seeing if the product is + // not equal to the divide. Make sure we do the same kind of divide + // as in the LHS instruction that we're folding. + bool ProdOV = (DivIsSigned ? ConstantExpr::getSDiv(Prod, DivRHS) : + ConstantExpr::getUDiv(Prod, DivRHS)) != CmpRHS; + + // Get the ICmp opcode + ICmpInst::Predicate Pred = ICI.getPredicate(); + + // Figure out the interval that is being checked. For example, a comparison + // like "X /u 5 == 0" is really checking that X is in the interval [0, 5). + // Compute this interval based on the constants involved and the signedness of + // the compare/divide. This computes a half-open interval, keeping track of + // whether either value in the interval overflows. After analysis each + // overflow variable is set to 0 if it's corresponding bound variable is valid + // -1 if overflowed off the bottom end, or +1 if overflowed off the top end. + int LoOverflow = 0, HiOverflow = 0; + ConstantInt *LoBound = 0, *HiBound = 0; + + if (!DivIsSigned) { // udiv + // e.g. X/5 op 3 --> [15, 20) + LoBound = Prod; + HiOverflow = LoOverflow = ProdOV; + if (!HiOverflow) + HiOverflow = AddWithOverflow(HiBound, LoBound, DivRHS, false); + } else if (DivRHS->getValue().isStrictlyPositive()) { // Divisor is > 0. + if (CmpRHSV == 0) { // (X / pos) op 0 + // Can't overflow. e.g. X/2 op 0 --> [-1, 2) + LoBound = cast(ConstantExpr::getNeg(SubOne(DivRHS))); + HiBound = DivRHS; + } else if (CmpRHSV.isStrictlyPositive()) { // (X / pos) op pos + LoBound = Prod; // e.g. X/5 op 3 --> [15, 20) + HiOverflow = LoOverflow = ProdOV; + if (!HiOverflow) + HiOverflow = AddWithOverflow(HiBound, Prod, DivRHS, true); + } else { // (X / pos) op neg + // e.g. X/5 op -3 --> [-15-4, -15+1) --> [-19, -14) + HiBound = AddOne(Prod); + LoOverflow = HiOverflow = ProdOV ? -1 : 0; + if (!LoOverflow) { + ConstantInt* DivNeg = cast(ConstantExpr::getNeg(DivRHS)); + LoOverflow = AddWithOverflow(LoBound, HiBound, DivNeg, + true) ? -1 : 0; + } + } + } else if (DivRHS->getValue().isNegative()) { // Divisor is < 0. + if (CmpRHSV == 0) { // (X / neg) op 0 + // e.g. X/-5 op 0 --> [-4, 5) + LoBound = AddOne(DivRHS); + HiBound = cast(ConstantExpr::getNeg(DivRHS)); + if (HiBound == DivRHS) { // -INTMIN = INTMIN + HiOverflow = 1; // [INTMIN+1, overflow) + HiBound = 0; // e.g. X/INTMIN = 0 --> X > INTMIN + } + } else if (CmpRHSV.isStrictlyPositive()) { // (X / neg) op pos + // e.g. X/-5 op 3 --> [-19, -14) + HiBound = AddOne(Prod); + HiOverflow = LoOverflow = ProdOV ? -1 : 0; + if (!LoOverflow) + LoOverflow = AddWithOverflow(LoBound, HiBound, DivRHS, true) ? -1 : 0; + } else { // (X / neg) op neg + LoBound = Prod; // e.g. X/-5 op -3 --> [15, 20) + LoOverflow = HiOverflow = ProdOV; + if (!HiOverflow) + HiOverflow = SubWithOverflow(HiBound, Prod, DivRHS, true); + } + + // Dividing by a negative swaps the condition. LT <-> GT + Pred = ICmpInst::getSwappedPredicate(Pred); + } + + Value *X = DivI->getOperand(0); + switch (Pred) { + default: assert(0 && "Unhandled icmp opcode!"); + case ICmpInst::ICMP_EQ: + if (LoOverflow && HiOverflow) + return ReplaceInstUsesWith(ICI, ConstantInt::getFalse()); + else if (HiOverflow) + return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : + ICmpInst::ICMP_UGE, X, LoBound); + else if (LoOverflow) + return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : + ICmpInst::ICMP_ULT, X, HiBound); + else + return InsertRangeTest(X, LoBound, HiBound, DivIsSigned, true, ICI); + case ICmpInst::ICMP_NE: + if (LoOverflow && HiOverflow) + return ReplaceInstUsesWith(ICI, ConstantInt::getTrue()); + else if (HiOverflow) + return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : + ICmpInst::ICMP_ULT, X, LoBound); + else if (LoOverflow) + return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : + ICmpInst::ICMP_UGE, X, HiBound); + else + return InsertRangeTest(X, LoBound, HiBound, DivIsSigned, false, ICI); + case ICmpInst::ICMP_ULT: + case ICmpInst::ICMP_SLT: + if (LoOverflow == +1) // Low bound is greater than input range. + return ReplaceInstUsesWith(ICI, ConstantInt::getTrue()); + if (LoOverflow == -1) // Low bound is less than input range. + return ReplaceInstUsesWith(ICI, ConstantInt::getFalse()); + return new ICmpInst(Pred, X, LoBound); + case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_SGT: + if (HiOverflow == +1) // High bound greater than input range. + return ReplaceInstUsesWith(ICI, ConstantInt::getFalse()); + else if (HiOverflow == -1) // High bound less than input range. + return ReplaceInstUsesWith(ICI, ConstantInt::getTrue()); + if (Pred == ICmpInst::ICMP_UGT) + return new ICmpInst(ICmpInst::ICMP_UGE, X, HiBound); + else + return new ICmpInst(ICmpInst::ICMP_SGE, X, HiBound); + } +} + + +/// visitICmpInstWithInstAndIntCst - Handle "icmp (instr, intcst)". +/// +Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, + Instruction *LHSI, + ConstantInt *RHS) { + const APInt &RHSV = RHS->getValue(); + + switch (LHSI->getOpcode()) { + case Instruction::Trunc: + if (ICI.isEquality() && LHSI->hasOneUse()) { + // Simplify icmp eq (trunc x to i8), 42 -> icmp eq x, 42|highbits if all + // of the high bits truncated out of x are known. + unsigned DstBits = LHSI->getType()->getPrimitiveSizeInBits(), + SrcBits = LHSI->getOperand(0)->getType()->getPrimitiveSizeInBits(); + APInt Mask(APInt::getHighBitsSet(SrcBits, SrcBits-DstBits)); + APInt KnownZero(SrcBits, 0), KnownOne(SrcBits, 0); + ComputeMaskedBits(LHSI->getOperand(0), Mask, KnownZero, KnownOne); + + // If all the high bits are known, we can do this xform. + if ((KnownZero|KnownOne).countLeadingOnes() >= SrcBits-DstBits) { + // Pull in the high bits from known-ones set. + APInt NewRHS(RHS->getValue()); + NewRHS.zext(SrcBits); + NewRHS |= KnownOne; + return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0), + ConstantInt::get(NewRHS)); + } + } + break; + + case Instruction::Xor: // (icmp pred (xor X, XorCST), CI) + if (ConstantInt *XorCST = dyn_cast(LHSI->getOperand(1))) { + // If this is a comparison that tests the signbit (X < 0) or (x > -1), + // fold the xor. + if ((ICI.getPredicate() == ICmpInst::ICMP_SLT && RHSV == 0) || + (ICI.getPredicate() == ICmpInst::ICMP_SGT && RHSV.isAllOnesValue())) { + Value *CompareVal = LHSI->getOperand(0); + + // If the sign bit of the XorCST is not set, there is no change to + // the operation, just stop using the Xor. + if (!XorCST->getValue().isNegative()) { + ICI.setOperand(0, CompareVal); + AddToWorkList(LHSI); + return &ICI; + } + + // Was the old condition true if the operand is positive? + bool isTrueIfPositive = ICI.getPredicate() == ICmpInst::ICMP_SGT; + + // If so, the new one isn't. + isTrueIfPositive ^= true; + + if (isTrueIfPositive) + return new ICmpInst(ICmpInst::ICMP_SGT, CompareVal, SubOne(RHS)); + else + return new ICmpInst(ICmpInst::ICMP_SLT, CompareVal, AddOne(RHS)); + } + + if (LHSI->hasOneUse()) { + // (icmp u/s (xor A SignBit), C) -> (icmp s/u A, (xor C SignBit)) + if (!ICI.isEquality() && XorCST->getValue().isSignBit()) { + const APInt &SignBit = XorCST->getValue(); + ICmpInst::Predicate Pred = ICI.isSignedPredicate() + ? ICI.getUnsignedPredicate() + : ICI.getSignedPredicate(); + return new ICmpInst(Pred, LHSI->getOperand(0), + ConstantInt::get(RHSV ^ SignBit)); + } + + // (icmp u/s (xor A ~SignBit), C) -> (icmp s/u (xor C ~SignBit), A) + if (!ICI.isEquality() && XorCST->getValue().isMaxSignedValue()) { + const APInt &NotSignBit = XorCST->getValue(); + ICmpInst::Predicate Pred = ICI.isSignedPredicate() + ? ICI.getUnsignedPredicate() + : ICI.getSignedPredicate(); + Pred = ICI.getSwappedPredicate(Pred); + return new ICmpInst(Pred, LHSI->getOperand(0), + ConstantInt::get(RHSV ^ NotSignBit)); + } + } + } + break; + case Instruction::And: // (icmp pred (and X, AndCST), RHS) + if (LHSI->hasOneUse() && isa(LHSI->getOperand(1)) && + LHSI->getOperand(0)->hasOneUse()) { + ConstantInt *AndCST = cast(LHSI->getOperand(1)); + + // If the LHS is an AND of a truncating cast, we can widen the + // and/compare to be the input width without changing the value + // produced, eliminating a cast. + if (TruncInst *Cast = dyn_cast(LHSI->getOperand(0))) { + // We can do this transformation if either the AND constant does not + // have its sign bit set or if it is an equality comparison. + // Extending a relational comparison when we're checking the sign + // bit would not work. + if (Cast->hasOneUse() && + (ICI.isEquality() || + (AndCST->getValue().isNonNegative() && RHSV.isNonNegative()))) { + uint32_t BitWidth = + cast(Cast->getOperand(0)->getType())->getBitWidth(); + APInt NewCST = AndCST->getValue(); + NewCST.zext(BitWidth); + APInt NewCI = RHSV; + NewCI.zext(BitWidth); + Instruction *NewAnd = + BinaryOperator::CreateAnd(Cast->getOperand(0), + ConstantInt::get(NewCST),LHSI->getName()); + InsertNewInstBefore(NewAnd, ICI); + return new ICmpInst(ICI.getPredicate(), NewAnd, + ConstantInt::get(NewCI)); + } + } + + // If this is: (X >> C1) & C2 != C3 (where any shift and any compare + // could exist), turn it into (X & (C2 << C1)) != (C3 << C1). This + // happens a LOT in code produced by the C front-end, for bitfield + // access. + BinaryOperator *Shift = dyn_cast(LHSI->getOperand(0)); + if (Shift && !Shift->isShift()) + Shift = 0; + + ConstantInt *ShAmt; + ShAmt = Shift ? dyn_cast(Shift->getOperand(1)) : 0; + const Type *Ty = Shift ? Shift->getType() : 0; // Type of the shift. + const Type *AndTy = AndCST->getType(); // Type of the and. + + // We can fold this as long as we can't shift unknown bits + // into the mask. This can only happen with signed shift + // rights, as they sign-extend. + if (ShAmt) { + bool CanFold = Shift->isLogicalShift(); + if (!CanFold) { + // To test for the bad case of the signed shr, see if any + // of the bits shifted in could be tested after the mask. + uint32_t TyBits = Ty->getPrimitiveSizeInBits(); + int ShAmtVal = TyBits - ShAmt->getLimitedValue(TyBits); + + uint32_t BitWidth = AndTy->getPrimitiveSizeInBits(); + if ((APInt::getHighBitsSet(BitWidth, BitWidth-ShAmtVal) & + AndCST->getValue()) == 0) + CanFold = true; + } + + if (CanFold) { + Constant *NewCst; + if (Shift->getOpcode() == Instruction::Shl) + NewCst = ConstantExpr::getLShr(RHS, ShAmt); + else + NewCst = ConstantExpr::getShl(RHS, ShAmt); + + // Check to see if we are shifting out any of the bits being + // compared. + if (ConstantExpr::get(Shift->getOpcode(), NewCst, ShAmt) != RHS) { + // If we shifted bits out, the fold is not going to work out. + // As a special case, check to see if this means that the + // result is always true or false now. + if (ICI.getPredicate() == ICmpInst::ICMP_EQ) + return ReplaceInstUsesWith(ICI, ConstantInt::getFalse()); + if (ICI.getPredicate() == ICmpInst::ICMP_NE) + return ReplaceInstUsesWith(ICI, ConstantInt::getTrue()); + } else { + ICI.setOperand(1, NewCst); + Constant *NewAndCST; + if (Shift->getOpcode() == Instruction::Shl) + NewAndCST = ConstantExpr::getLShr(AndCST, ShAmt); + else + NewAndCST = ConstantExpr::getShl(AndCST, ShAmt); + LHSI->setOperand(1, NewAndCST); + LHSI->setOperand(0, Shift->getOperand(0)); + AddToWorkList(Shift); // Shift is dead. + AddUsesToWorkList(ICI); + return &ICI; + } + } + } + + // Turn ((X >> Y) & C) == 0 into (X & (C << Y)) == 0. The later is + // preferable because it allows the C<hasOneUse() && RHSV == 0 && + ICI.isEquality() && !Shift->isArithmeticShift() && + !isa(Shift->getOperand(0))) { + // Compute C << Y. + Value *NS; + if (Shift->getOpcode() == Instruction::LShr) { + NS = BinaryOperator::CreateShl(AndCST, + Shift->getOperand(1), "tmp"); + } else { + // Insert a logical shift. + NS = BinaryOperator::CreateLShr(AndCST, + Shift->getOperand(1), "tmp"); + } + InsertNewInstBefore(cast(NS), ICI); + + // Compute X & (C << Y). + Instruction *NewAnd = + BinaryOperator::CreateAnd(Shift->getOperand(0), NS, LHSI->getName()); + InsertNewInstBefore(NewAnd, ICI); + + ICI.setOperand(0, NewAnd); + return &ICI; + } + } + break; + + case Instruction::Shl: { // (icmp pred (shl X, ShAmt), CI) + ConstantInt *ShAmt = dyn_cast(LHSI->getOperand(1)); + if (!ShAmt) break; + + uint32_t TypeBits = RHSV.getBitWidth(); + + // Check that the shift amount is in range. If not, don't perform + // undefined shifts. When the shift is visited it will be + // simplified. + if (ShAmt->uge(TypeBits)) + break; + + if (ICI.isEquality()) { + // If we are comparing against bits always shifted out, the + // comparison cannot succeed. + Constant *Comp = + ConstantExpr::getShl(ConstantExpr::getLShr(RHS, ShAmt), ShAmt); + if (Comp != RHS) {// Comparing against a bit that we know is zero. + bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE; + Constant *Cst = ConstantInt::get(Type::Int1Ty, IsICMP_NE); + return ReplaceInstUsesWith(ICI, Cst); + } + + if (LHSI->hasOneUse()) { + // Otherwise strength reduce the shift into an and. + uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits); + Constant *Mask = + ConstantInt::get(APInt::getLowBitsSet(TypeBits, TypeBits-ShAmtVal)); + + Instruction *AndI = + BinaryOperator::CreateAnd(LHSI->getOperand(0), + Mask, LHSI->getName()+".mask"); + Value *And = InsertNewInstBefore(AndI, ICI); + return new ICmpInst(ICI.getPredicate(), And, + ConstantInt::get(RHSV.lshr(ShAmtVal))); + } + } + + // Otherwise, if this is a comparison of the sign bit, simplify to and/test. + bool TrueIfSigned = false; + if (LHSI->hasOneUse() && + isSignBitCheck(ICI.getPredicate(), RHS, TrueIfSigned)) { + // (X << 31) (X&1) != 0 + Constant *Mask = ConstantInt::get(APInt(TypeBits, 1) << + (TypeBits-ShAmt->getZExtValue()-1)); + Instruction *AndI = + BinaryOperator::CreateAnd(LHSI->getOperand(0), + Mask, LHSI->getName()+".mask"); + Value *And = InsertNewInstBefore(AndI, ICI); + + return new ICmpInst(TrueIfSigned ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ, + And, Constant::getNullValue(And->getType())); + } + break; + } + + case Instruction::LShr: // (icmp pred (shr X, ShAmt), CI) + case Instruction::AShr: { + // Only handle equality comparisons of shift-by-constant. + ConstantInt *ShAmt = dyn_cast(LHSI->getOperand(1)); + if (!ShAmt || !ICI.isEquality()) break; + + // Check that the shift amount is in range. If not, don't perform + // undefined shifts. When the shift is visited it will be + // simplified. + uint32_t TypeBits = RHSV.getBitWidth(); + if (ShAmt->uge(TypeBits)) + break; + + uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits); + + // If we are comparing against bits always shifted out, the + // comparison cannot succeed. + APInt Comp = RHSV << ShAmtVal; + if (LHSI->getOpcode() == Instruction::LShr) + Comp = Comp.lshr(ShAmtVal); + else + Comp = Comp.ashr(ShAmtVal); + + if (Comp != RHSV) { // Comparing against a bit that we know is zero. + bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE; + Constant *Cst = ConstantInt::get(Type::Int1Ty, IsICMP_NE); + return ReplaceInstUsesWith(ICI, Cst); + } + + // Otherwise, check to see if the bits shifted out are known to be zero. + // If so, we can compare against the unshifted value: + // (X & 4) >> 1 == 2 --> (X & 4) == 4. + if (LHSI->hasOneUse() && + MaskedValueIsZero(LHSI->getOperand(0), + APInt::getLowBitsSet(Comp.getBitWidth(), ShAmtVal))) { + return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0), + ConstantExpr::getShl(RHS, ShAmt)); + } + + if (LHSI->hasOneUse()) { + // Otherwise strength reduce the shift into an and. + APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal)); + Constant *Mask = ConstantInt::get(Val); + + Instruction *AndI = + BinaryOperator::CreateAnd(LHSI->getOperand(0), + Mask, LHSI->getName()+".mask"); + Value *And = InsertNewInstBefore(AndI, ICI); + return new ICmpInst(ICI.getPredicate(), And, + ConstantExpr::getShl(RHS, ShAmt)); + } + break; + } + + case Instruction::SDiv: + case Instruction::UDiv: + // Fold: icmp pred ([us]div X, C1), C2 -> range test + // Fold this div into the comparison, producing a range check. + // Determine, based on the divide type, what the range is being + // checked. If there is an overflow on the low or high side, remember + // it, otherwise compute the range [low, hi) bounding the new value. + // See: InsertRangeTest above for the kinds of replacements possible. + if (ConstantInt *DivRHS = dyn_cast(LHSI->getOperand(1))) + if (Instruction *R = FoldICmpDivCst(ICI, cast(LHSI), + DivRHS)) + return R; + break; + + case Instruction::Add: + // Fold: icmp pred (add, X, C1), C2 + + if (!ICI.isEquality()) { + ConstantInt *LHSC = dyn_cast(LHSI->getOperand(1)); + if (!LHSC) break; + const APInt &LHSV = LHSC->getValue(); + + ConstantRange CR = ICI.makeConstantRange(ICI.getPredicate(), RHSV) + .subtract(LHSV); + + if (ICI.isSignedPredicate()) { + if (CR.getLower().isSignBit()) { + return new ICmpInst(ICmpInst::ICMP_SLT, LHSI->getOperand(0), + ConstantInt::get(CR.getUpper())); + } else if (CR.getUpper().isSignBit()) { + return new ICmpInst(ICmpInst::ICMP_SGE, LHSI->getOperand(0), + ConstantInt::get(CR.getLower())); + } + } else { + if (CR.getLower().isMinValue()) { + return new ICmpInst(ICmpInst::ICMP_ULT, LHSI->getOperand(0), + ConstantInt::get(CR.getUpper())); + } else if (CR.getUpper().isMinValue()) { + return new ICmpInst(ICmpInst::ICMP_UGE, LHSI->getOperand(0), + ConstantInt::get(CR.getLower())); + } + } + } + break; + } + + // Simplify icmp_eq and icmp_ne instructions with integer constant RHS. + if (ICI.isEquality()) { + bool isICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE; + + // If the first operand is (add|sub|and|or|xor|rem) with a constant, and + // the second operand is a constant, simplify a bit. + if (BinaryOperator *BO = dyn_cast(LHSI)) { + switch (BO->getOpcode()) { + case Instruction::SRem: + // If we have a signed (X % (2^c)) == 0, turn it into an unsigned one. + if (RHSV == 0 && isa(BO->getOperand(1)) &&BO->hasOneUse()){ + const APInt &V = cast(BO->getOperand(1))->getValue(); + if (V.sgt(APInt(V.getBitWidth(), 1)) && V.isPowerOf2()) { + Instruction *NewRem = + BinaryOperator::CreateURem(BO->getOperand(0), BO->getOperand(1), + BO->getName()); + InsertNewInstBefore(NewRem, ICI); + return new ICmpInst(ICI.getPredicate(), NewRem, + Constant::getNullValue(BO->getType())); + } + } + break; + case Instruction::Add: + // Replace ((add A, B) != C) with (A != C-B) if B & C are constants. + if (ConstantInt *BOp1C = dyn_cast(BO->getOperand(1))) { + if (BO->hasOneUse()) + return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), + Subtract(RHS, BOp1C)); + } else if (RHSV == 0) { + // Replace ((add A, B) != 0) with (A != -B) if A or B is + // efficiently invertible, or if the add has just this one use. + Value *BOp0 = BO->getOperand(0), *BOp1 = BO->getOperand(1); + + if (Value *NegVal = dyn_castNegVal(BOp1)) + return new ICmpInst(ICI.getPredicate(), BOp0, NegVal); + else if (Value *NegVal = dyn_castNegVal(BOp0)) + return new ICmpInst(ICI.getPredicate(), NegVal, BOp1); + else if (BO->hasOneUse()) { + Instruction *Neg = BinaryOperator::CreateNeg(BOp1); + InsertNewInstBefore(Neg, ICI); + Neg->takeName(BO); + return new ICmpInst(ICI.getPredicate(), BOp0, Neg); + } + } + break; + case Instruction::Xor: + // For the xor case, we can xor two constants together, eliminating + // the explicit xor. + if (Constant *BOC = dyn_cast(BO->getOperand(1))) + return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), + ConstantExpr::getXor(RHS, BOC)); + + // FALLTHROUGH + case Instruction::Sub: + // Replace (([sub|xor] A, B) != 0) with (A != B) + if (RHSV == 0) + return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), + BO->getOperand(1)); + break; + + case Instruction::Or: + // If bits are being or'd in that are not present in the constant we + // are comparing against, then the comparison could never succeed! + if (Constant *BOC = dyn_cast(BO->getOperand(1))) { + Constant *NotCI = ConstantExpr::getNot(RHS); + if (!ConstantExpr::getAnd(BOC, NotCI)->isNullValue()) + return ReplaceInstUsesWith(ICI, ConstantInt::get(Type::Int1Ty, + isICMP_NE)); + } + break; + + case Instruction::And: + if (ConstantInt *BOC = dyn_cast(BO->getOperand(1))) { + // If bits are being compared against that are and'd out, then the + // comparison can never succeed! + if ((RHSV & ~BOC->getValue()) != 0) + return ReplaceInstUsesWith(ICI, ConstantInt::get(Type::Int1Ty, + isICMP_NE)); + + // If we have ((X & C) == C), turn it into ((X & C) != 0). + if (RHS == BOC && RHSV.isPowerOf2()) + return new ICmpInst(isICMP_NE ? ICmpInst::ICMP_EQ : + ICmpInst::ICMP_NE, LHSI, + Constant::getNullValue(RHS->getType())); + + // Replace (and X, (1 << size(X)-1) != 0) with x s< 0 + if (BOC->getValue().isSignBit()) { + Value *X = BO->getOperand(0); + Constant *Zero = Constant::getNullValue(X->getType()); + ICmpInst::Predicate pred = isICMP_NE ? + ICmpInst::ICMP_SLT : ICmpInst::ICMP_SGE; + return new ICmpInst(pred, X, Zero); + } + + // ((X & ~7) == 0) --> X < 8 + if (RHSV == 0 && isHighOnes(BOC)) { + Value *X = BO->getOperand(0); + Constant *NegX = ConstantExpr::getNeg(BOC); + ICmpInst::Predicate pred = isICMP_NE ? + ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT; + return new ICmpInst(pred, X, NegX); + } + } + default: break; + } + } else if (IntrinsicInst *II = dyn_cast(LHSI)) { + // Handle icmp {eq|ne} , intcst. + if (II->getIntrinsicID() == Intrinsic::bswap) { + AddToWorkList(II); + ICI.setOperand(0, II->getOperand(1)); + ICI.setOperand(1, ConstantInt::get(RHSV.byteSwap())); + return &ICI; + } + } + } + return 0; +} + +/// visitICmpInstWithCastAndCast - Handle icmp (cast x to y), (cast/cst). +/// We only handle extending casts so far. +/// +Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) { + const CastInst *LHSCI = cast(ICI.getOperand(0)); + Value *LHSCIOp = LHSCI->getOperand(0); + const Type *SrcTy = LHSCIOp->getType(); + const Type *DestTy = LHSCI->getType(); + Value *RHSCIOp; + + // Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the + // integer type is the same size as the pointer type. + if (LHSCI->getOpcode() == Instruction::PtrToInt && + getTargetData().getPointerSizeInBits() == + cast(DestTy)->getBitWidth()) { + Value *RHSOp = 0; + if (Constant *RHSC = dyn_cast(ICI.getOperand(1))) { + RHSOp = ConstantExpr::getIntToPtr(RHSC, SrcTy); + } else if (PtrToIntInst *RHSC = dyn_cast(ICI.getOperand(1))) { + RHSOp = RHSC->getOperand(0); + // If the pointer types don't match, insert a bitcast. + if (LHSCIOp->getType() != RHSOp->getType()) + RHSOp = InsertBitCastBefore(RHSOp, LHSCIOp->getType(), ICI); + } + + if (RHSOp) + return new ICmpInst(ICI.getPredicate(), LHSCIOp, RHSOp); + } + + // The code below only handles extension cast instructions, so far. + // Enforce this. + if (LHSCI->getOpcode() != Instruction::ZExt && + LHSCI->getOpcode() != Instruction::SExt) + return 0; + + bool isSignedExt = LHSCI->getOpcode() == Instruction::SExt; + bool isSignedCmp = ICI.isSignedPredicate(); + + if (CastInst *CI = dyn_cast(ICI.getOperand(1))) { + // Not an extension from the same type? + RHSCIOp = CI->getOperand(0); + if (RHSCIOp->getType() != LHSCIOp->getType()) + return 0; + + // If the signedness of the two casts doesn't agree (i.e. one is a sext + // and the other is a zext), then we can't handle this. + if (CI->getOpcode() != LHSCI->getOpcode()) + return 0; + + // Deal with equality cases early. + if (ICI.isEquality()) + return new ICmpInst(ICI.getPredicate(), LHSCIOp, RHSCIOp); + + // A signed comparison of sign extended values simplifies into a + // signed comparison. + if (isSignedCmp && isSignedExt) + return new ICmpInst(ICI.getPredicate(), LHSCIOp, RHSCIOp); + + // The other three cases all fold into an unsigned comparison. + return new ICmpInst(ICI.getUnsignedPredicate(), LHSCIOp, RHSCIOp); + } + + // If we aren't dealing with a constant on the RHS, exit early + ConstantInt *CI = dyn_cast(ICI.getOperand(1)); + if (!CI) + return 0; + + // Compute the constant that would happen if we truncated to SrcTy then + // reextended to DestTy. + Constant *Res1 = ConstantExpr::getTrunc(CI, SrcTy); + Constant *Res2 = ConstantExpr::getCast(LHSCI->getOpcode(), Res1, DestTy); + + // If the re-extended constant didn't change... + if (Res2 == CI) { + // Make sure that sign of the Cmp and the sign of the Cast are the same. + // For example, we might have: + // %A = sext short %X to uint + // %B = icmp ugt uint %A, 1330 + // It is incorrect to transform this into + // %B = icmp ugt short %X, 1330 + // because %A may have negative value. + // + // However, we allow this when the compare is EQ/NE, because they are + // signless. + if (isSignedExt == isSignedCmp || ICI.isEquality()) + return new ICmpInst(ICI.getPredicate(), LHSCIOp, Res1); + return 0; + } + + // The re-extended constant changed so the constant cannot be represented + // in the shorter type. Consequently, we cannot emit a simple comparison. + + // First, handle some easy cases. We know the result cannot be equal at this + // point so handle the ICI.isEquality() cases + if (ICI.getPredicate() == ICmpInst::ICMP_EQ) + return ReplaceInstUsesWith(ICI, ConstantInt::getFalse()); + if (ICI.getPredicate() == ICmpInst::ICMP_NE) + return ReplaceInstUsesWith(ICI, ConstantInt::getTrue()); + + // Evaluate the comparison for LT (we invert for GT below). LE and GE cases + // should have been folded away previously and not enter in here. + Value *Result; + if (isSignedCmp) { + // We're performing a signed comparison. + if (cast(CI)->getValue().isNegative()) + Result = ConstantInt::getFalse(); // X < (small) --> false + else + Result = ConstantInt::getTrue(); // X < (large) --> true + } else { + // We're performing an unsigned comparison. + if (isSignedExt) { + // We're performing an unsigned comp with a sign extended value. + // This is true if the input is >= 0. [aka >s -1] + Constant *NegOne = ConstantInt::getAllOnesValue(SrcTy); + Result = InsertNewInstBefore(new ICmpInst(ICmpInst::ICMP_SGT, LHSCIOp, + NegOne, ICI.getName()), ICI); + } else { + // Unsigned extend & unsigned compare -> always true. + Result = ConstantInt::getTrue(); + } + } + + // Finally, return the value computed. + if (ICI.getPredicate() == ICmpInst::ICMP_ULT || + ICI.getPredicate() == ICmpInst::ICMP_SLT) + return ReplaceInstUsesWith(ICI, Result); + + assert((ICI.getPredicate()==ICmpInst::ICMP_UGT || + ICI.getPredicate()==ICmpInst::ICMP_SGT) && + "ICmp should be folded!"); + if (Constant *CI = dyn_cast(Result)) + return ReplaceInstUsesWith(ICI, ConstantExpr::getNot(CI)); + return BinaryOperator::CreateNot(Result); +} + +Instruction *InstCombiner::visitShl(BinaryOperator &I) { + return commonShiftTransforms(I); +} + +Instruction *InstCombiner::visitLShr(BinaryOperator &I) { + return commonShiftTransforms(I); +} + +Instruction *InstCombiner::visitAShr(BinaryOperator &I) { + if (Instruction *R = commonShiftTransforms(I)) + return R; + + Value *Op0 = I.getOperand(0); + + // ashr int -1, X = -1 (for any arithmetic shift rights of ~0) + if (ConstantInt *CSI = dyn_cast(Op0)) + if (CSI->isAllOnesValue()) + return ReplaceInstUsesWith(I, CSI); + + // See if we can turn a signed shr into an unsigned shr. + if (!isa(I.getType())) { + if (MaskedValueIsZero(Op0, + APInt::getSignBit(I.getType()->getPrimitiveSizeInBits()))) + return BinaryOperator::CreateLShr(Op0, I.getOperand(1)); + + // Arithmetic shifting an all-sign-bit value is a no-op. + unsigned NumSignBits = ComputeNumSignBits(Op0); + if (NumSignBits == Op0->getType()->getPrimitiveSizeInBits()) + return ReplaceInstUsesWith(I, Op0); + } + + return 0; +} + +Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) { + assert(I.getOperand(1)->getType() == I.getOperand(0)->getType()); + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + + // shl X, 0 == X and shr X, 0 == X + // shl 0, X == 0 and shr 0, X == 0 + if (Op1 == Constant::getNullValue(Op1->getType()) || + Op0 == Constant::getNullValue(Op0->getType())) + return ReplaceInstUsesWith(I, Op0); + + if (isa(Op0)) { + if (I.getOpcode() == Instruction::AShr) // undef >>s X -> undef + return ReplaceInstUsesWith(I, Op0); + else // undef << X -> 0, undef >>u X -> 0 + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); + } + if (isa(Op1)) { + if (I.getOpcode() == Instruction::AShr) // X >>s undef -> X + return ReplaceInstUsesWith(I, Op0); + else // X << undef, X >>u undef -> 0 + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); + } + + // See if we can fold away this shift. + if (!isa(I.getType()) && SimplifyDemandedInstructionBits(I)) + return &I; + + // Try to fold constant and into select arguments. + if (isa(Op0)) + if (SelectInst *SI = dyn_cast(Op1)) + if (Instruction *R = FoldOpIntoSelect(I, SI, this)) + return R; + + if (ConstantInt *CUI = dyn_cast(Op1)) + if (Instruction *Res = FoldShiftByConstant(Op0, CUI, I)) + return Res; + return 0; +} + +Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, + BinaryOperator &I) { + bool isLeftShift = I.getOpcode() == Instruction::Shl; + + // See if we can simplify any instructions used by the instruction whose sole + // purpose is to compute bits we don't care about. + uint32_t TypeBits = Op0->getType()->getPrimitiveSizeInBits(); + + // shl uint X, 32 = 0 and shr ubyte Y, 9 = 0, ... just don't eliminate shr + // of a signed value. + // + if (Op1->uge(TypeBits)) { + if (I.getOpcode() != Instruction::AShr) + return ReplaceInstUsesWith(I, Constant::getNullValue(Op0->getType())); + else { + I.setOperand(1, ConstantInt::get(I.getType(), TypeBits-1)); + return &I; + } + } + + // ((X*C1) << C2) == (X * (C1 << C2)) + if (BinaryOperator *BO = dyn_cast(Op0)) + if (BO->getOpcode() == Instruction::Mul && isLeftShift) + if (Constant *BOOp = dyn_cast(BO->getOperand(1))) + return BinaryOperator::CreateMul(BO->getOperand(0), + ConstantExpr::getShl(BOOp, Op1)); + + // Try to fold constant and into select arguments. + if (SelectInst *SI = dyn_cast(Op0)) + if (Instruction *R = FoldOpIntoSelect(I, SI, this)) + return R; + if (isa(Op0)) + if (Instruction *NV = FoldOpIntoPhi(I)) + return NV; + + // Fold shift2(trunc(shift1(x,c1)), c2) -> trunc(shift2(shift1(x,c1),c2)) + if (TruncInst *TI = dyn_cast(Op0)) { + Instruction *TrOp = dyn_cast(TI->getOperand(0)); + // If 'shift2' is an ashr, we would have to get the sign bit into a funny + // place. Don't try to do this transformation in this case. Also, we + // require that the input operand is a shift-by-constant so that we have + // confidence that the shifts will get folded together. We could do this + // xform in more cases, but it is unlikely to be profitable. + if (TrOp && I.isLogicalShift() && TrOp->isShift() && + isa(TrOp->getOperand(1))) { + // Okay, we'll do this xform. Make the shift of shift. + Constant *ShAmt = ConstantExpr::getZExt(Op1, TrOp->getType()); + Instruction *NSh = BinaryOperator::Create(I.getOpcode(), TrOp, ShAmt, + I.getName()); + InsertNewInstBefore(NSh, I); // (shift2 (shift1 & 0x00FF), c2) + + // For logical shifts, the truncation has the effect of making the high + // part of the register be zeros. Emulate this by inserting an AND to + // clear the top bits as needed. This 'and' will usually be zapped by + // other xforms later if dead. + unsigned SrcSize = TrOp->getType()->getPrimitiveSizeInBits(); + unsigned DstSize = TI->getType()->getPrimitiveSizeInBits(); + APInt MaskV(APInt::getLowBitsSet(SrcSize, DstSize)); + + // The mask we constructed says what the trunc would do if occurring + // between the shifts. We want to know the effect *after* the second + // shift. We know that it is a logical shift by a constant, so adjust the + // mask as appropriate. + if (I.getOpcode() == Instruction::Shl) + MaskV <<= Op1->getZExtValue(); + else { + assert(I.getOpcode() == Instruction::LShr && "Unknown logical shift"); + MaskV = MaskV.lshr(Op1->getZExtValue()); + } + + Instruction *And = BinaryOperator::CreateAnd(NSh, ConstantInt::get(MaskV), + TI->getName()); + InsertNewInstBefore(And, I); // shift1 & 0x00FF + + // Return the value truncated to the interesting size. + return new TruncInst(And, I.getType()); + } + } + + if (Op0->hasOneUse()) { + if (BinaryOperator *Op0BO = dyn_cast(Op0)) { + // Turn ((X >> C) + Y) << C -> (X + (Y << C)) & (~0 << C) + Value *V1, *V2; + ConstantInt *CC; + switch (Op0BO->getOpcode()) { + default: break; + case Instruction::Add: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: { + // These operators commute. + // Turn (Y + (X >> C)) << C -> (X + (Y << C)) & (~0 << C) + if (isLeftShift && Op0BO->getOperand(1)->hasOneUse() && + match(Op0BO->getOperand(1), m_Shr(m_Value(V1), m_Specific(Op1)))){ + Instruction *YS = BinaryOperator::CreateShl( + Op0BO->getOperand(0), Op1, + Op0BO->getName()); + InsertNewInstBefore(YS, I); // (Y << C) + Instruction *X = + BinaryOperator::Create(Op0BO->getOpcode(), YS, V1, + Op0BO->getOperand(1)->getName()); + InsertNewInstBefore(X, I); // (X + (Y << C)) + uint32_t Op1Val = Op1->getLimitedValue(TypeBits); + return BinaryOperator::CreateAnd(X, ConstantInt::get( + APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val))); + } + + // Turn (Y + ((X >> C) & CC)) << C -> ((X & (CC << C)) + (Y << C)) + Value *Op0BOOp1 = Op0BO->getOperand(1); + if (isLeftShift && Op0BOOp1->hasOneUse() && + match(Op0BOOp1, + m_And(m_Shr(m_Value(V1), m_Specific(Op1)), + m_ConstantInt(CC))) && + cast(Op0BOOp1)->getOperand(0)->hasOneUse()) { + Instruction *YS = BinaryOperator::CreateShl( + Op0BO->getOperand(0), Op1, + Op0BO->getName()); + InsertNewInstBefore(YS, I); // (Y << C) + Instruction *XM = + BinaryOperator::CreateAnd(V1, ConstantExpr::getShl(CC, Op1), + V1->getName()+".mask"); + InsertNewInstBefore(XM, I); // X & (CC << C) + + return BinaryOperator::Create(Op0BO->getOpcode(), YS, XM); + } + } + + // FALL THROUGH. + case Instruction::Sub: { + // Turn ((X >> C) + Y) << C -> (X + (Y << C)) & (~0 << C) + if (isLeftShift && Op0BO->getOperand(0)->hasOneUse() && + match(Op0BO->getOperand(0), m_Shr(m_Value(V1), m_Specific(Op1)))){ + Instruction *YS = BinaryOperator::CreateShl( + Op0BO->getOperand(1), Op1, + Op0BO->getName()); + InsertNewInstBefore(YS, I); // (Y << C) + Instruction *X = + BinaryOperator::Create(Op0BO->getOpcode(), V1, YS, + Op0BO->getOperand(0)->getName()); + InsertNewInstBefore(X, I); // (X + (Y << C)) + uint32_t Op1Val = Op1->getLimitedValue(TypeBits); + return BinaryOperator::CreateAnd(X, ConstantInt::get( + APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val))); + } + + // Turn (((X >> C)&CC) + Y) << C -> (X + (Y << C)) & (CC << C) + if (isLeftShift && Op0BO->getOperand(0)->hasOneUse() && + match(Op0BO->getOperand(0), + m_And(m_Shr(m_Value(V1), m_Value(V2)), + m_ConstantInt(CC))) && V2 == Op1 && + cast(Op0BO->getOperand(0)) + ->getOperand(0)->hasOneUse()) { + Instruction *YS = BinaryOperator::CreateShl( + Op0BO->getOperand(1), Op1, + Op0BO->getName()); + InsertNewInstBefore(YS, I); // (Y << C) + Instruction *XM = + BinaryOperator::CreateAnd(V1, ConstantExpr::getShl(CC, Op1), + V1->getName()+".mask"); + InsertNewInstBefore(XM, I); // X & (CC << C) + + return BinaryOperator::Create(Op0BO->getOpcode(), XM, YS); + } + + break; + } + } + + + // If the operand is an bitwise operator with a constant RHS, and the + // shift is the only use, we can pull it out of the shift. + if (ConstantInt *Op0C = dyn_cast(Op0BO->getOperand(1))) { + bool isValid = true; // Valid only for And, Or, Xor + bool highBitSet = false; // Transform if high bit of constant set? + + switch (Op0BO->getOpcode()) { + default: isValid = false; break; // Do not perform transform! + case Instruction::Add: + isValid = isLeftShift; + break; + case Instruction::Or: + case Instruction::Xor: + highBitSet = false; + break; + case Instruction::And: + highBitSet = true; + break; + } + + // If this is a signed shift right, and the high bit is modified + // by the logical operation, do not perform the transformation. + // The highBitSet boolean indicates the value of the high bit of + // the constant which would cause it to be modified for this + // operation. + // + if (isValid && I.getOpcode() == Instruction::AShr) + isValid = Op0C->getValue()[TypeBits-1] == highBitSet; + + if (isValid) { + Constant *NewRHS = ConstantExpr::get(I.getOpcode(), Op0C, Op1); + + Instruction *NewShift = + BinaryOperator::Create(I.getOpcode(), Op0BO->getOperand(0), Op1); + InsertNewInstBefore(NewShift, I); + NewShift->takeName(Op0BO); + + return BinaryOperator::Create(Op0BO->getOpcode(), NewShift, + NewRHS); + } + } + } + } + + // Find out if this is a shift of a shift by a constant. + BinaryOperator *ShiftOp = dyn_cast(Op0); + if (ShiftOp && !ShiftOp->isShift()) + ShiftOp = 0; + + if (ShiftOp && isa(ShiftOp->getOperand(1))) { + ConstantInt *ShiftAmt1C = cast(ShiftOp->getOperand(1)); + uint32_t ShiftAmt1 = ShiftAmt1C->getLimitedValue(TypeBits); + uint32_t ShiftAmt2 = Op1->getLimitedValue(TypeBits); + assert(ShiftAmt2 != 0 && "Should have been simplified earlier"); + if (ShiftAmt1 == 0) return 0; // Will be simplified in the future. + Value *X = ShiftOp->getOperand(0); + + uint32_t AmtSum = ShiftAmt1+ShiftAmt2; // Fold into one big shift. + + const IntegerType *Ty = cast(I.getType()); + + // Check for (X << c1) << c2 and (X >> c1) >> c2 + if (I.getOpcode() == ShiftOp->getOpcode()) { + // If this is oversized composite shift, then unsigned shifts get 0, ashr + // saturates. + if (AmtSum >= TypeBits) { + if (I.getOpcode() != Instruction::AShr) + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); + AmtSum = TypeBits-1; // Saturate to 31 for i32 ashr. + } + + return BinaryOperator::Create(I.getOpcode(), X, + ConstantInt::get(Ty, AmtSum)); + } else if (ShiftOp->getOpcode() == Instruction::LShr && + I.getOpcode() == Instruction::AShr) { + if (AmtSum >= TypeBits) + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); + + // ((X >>u C1) >>s C2) -> (X >>u (C1+C2)) since C1 != 0. + return BinaryOperator::CreateLShr(X, ConstantInt::get(Ty, AmtSum)); + } else if (ShiftOp->getOpcode() == Instruction::AShr && + I.getOpcode() == Instruction::LShr) { + // ((X >>s C1) >>u C2) -> ((X >>s (C1+C2)) & mask) since C1 != 0. + if (AmtSum >= TypeBits) + AmtSum = TypeBits-1; + + Instruction *Shift = + BinaryOperator::CreateAShr(X, ConstantInt::get(Ty, AmtSum)); + InsertNewInstBefore(Shift, I); + + APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2)); + return BinaryOperator::CreateAnd(Shift, ConstantInt::get(Mask)); + } + + // Okay, if we get here, one shift must be left, and the other shift must be + // right. See if the amounts are equal. + if (ShiftAmt1 == ShiftAmt2) { + // If we have ((X >>? C) << C), turn this into X & (-1 << C). + if (I.getOpcode() == Instruction::Shl) { + APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt1)); + return BinaryOperator::CreateAnd(X, ConstantInt::get(Mask)); + } + // If we have ((X << C) >>u C), turn this into X & (-1 >>u C). + if (I.getOpcode() == Instruction::LShr) { + APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt1)); + return BinaryOperator::CreateAnd(X, ConstantInt::get(Mask)); + } + // We can simplify ((X << C) >>s C) into a trunc + sext. + // NOTE: we could do this for any C, but that would make 'unusual' integer + // types. For now, just stick to ones well-supported by the code + // generators. + const Type *SExtType = 0; + switch (Ty->getBitWidth() - ShiftAmt1) { + case 1 : + case 8 : + case 16 : + case 32 : + case 64 : + case 128: + SExtType = IntegerType::get(Ty->getBitWidth() - ShiftAmt1); + break; + default: break; + } + if (SExtType) { + Instruction *NewTrunc = new TruncInst(X, SExtType, "sext"); + InsertNewInstBefore(NewTrunc, I); + return new SExtInst(NewTrunc, Ty); + } + // Otherwise, we can't handle it yet. + } else if (ShiftAmt1 < ShiftAmt2) { + uint32_t ShiftDiff = ShiftAmt2-ShiftAmt1; + + // (X >>? C1) << C2 --> X << (C2-C1) & (-1 << C2) + if (I.getOpcode() == Instruction::Shl) { + assert(ShiftOp->getOpcode() == Instruction::LShr || + ShiftOp->getOpcode() == Instruction::AShr); + Instruction *Shift = + BinaryOperator::CreateShl(X, ConstantInt::get(Ty, ShiftDiff)); + InsertNewInstBefore(Shift, I); + + APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2)); + return BinaryOperator::CreateAnd(Shift, ConstantInt::get(Mask)); + } + + // (X << C1) >>u C2 --> X >>u (C2-C1) & (-1 >> C2) + if (I.getOpcode() == Instruction::LShr) { + assert(ShiftOp->getOpcode() == Instruction::Shl); + Instruction *Shift = + BinaryOperator::CreateLShr(X, ConstantInt::get(Ty, ShiftDiff)); + InsertNewInstBefore(Shift, I); + + APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2)); + return BinaryOperator::CreateAnd(Shift, ConstantInt::get(Mask)); + } + + // We can't handle (X << C1) >>s C2, it shifts arbitrary bits in. + } else { + assert(ShiftAmt2 < ShiftAmt1); + uint32_t ShiftDiff = ShiftAmt1-ShiftAmt2; + + // (X >>? C1) << C2 --> X >>? (C1-C2) & (-1 << C2) + if (I.getOpcode() == Instruction::Shl) { + assert(ShiftOp->getOpcode() == Instruction::LShr || + ShiftOp->getOpcode() == Instruction::AShr); + Instruction *Shift = + BinaryOperator::Create(ShiftOp->getOpcode(), X, + ConstantInt::get(Ty, ShiftDiff)); + InsertNewInstBefore(Shift, I); + + APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2)); + return BinaryOperator::CreateAnd(Shift, ConstantInt::get(Mask)); + } + + // (X << C1) >>u C2 --> X << (C1-C2) & (-1 >> C2) + if (I.getOpcode() == Instruction::LShr) { + assert(ShiftOp->getOpcode() == Instruction::Shl); + Instruction *Shift = + BinaryOperator::CreateShl(X, ConstantInt::get(Ty, ShiftDiff)); + InsertNewInstBefore(Shift, I); + + APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2)); + return BinaryOperator::CreateAnd(Shift, ConstantInt::get(Mask)); + } + + // We can't handle (X << C1) >>a C2, it shifts arbitrary bits in. + } + } + return 0; +} + + +/// DecomposeSimpleLinearExpr - Analyze 'Val', seeing if it is a simple linear +/// expression. If so, decompose it, returning some value X, such that Val is +/// X*Scale+Offset. +/// +static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale, + int &Offset) { + assert(Val->getType() == Type::Int32Ty && "Unexpected allocation size type!"); + if (ConstantInt *CI = dyn_cast(Val)) { + Offset = CI->getZExtValue(); + Scale = 0; + return ConstantInt::get(Type::Int32Ty, 0); + } else if (BinaryOperator *I = dyn_cast(Val)) { + if (ConstantInt *RHS = dyn_cast(I->getOperand(1))) { + if (I->getOpcode() == Instruction::Shl) { + // This is a value scaled by '1 << the shift amt'. + Scale = 1U << RHS->getZExtValue(); + Offset = 0; + return I->getOperand(0); + } else if (I->getOpcode() == Instruction::Mul) { + // This value is scaled by 'RHS'. + Scale = RHS->getZExtValue(); + Offset = 0; + return I->getOperand(0); + } else if (I->getOpcode() == Instruction::Add) { + // We have X+C. Check to see if we really have (X*C2)+C1, + // where C1 is divisible by C2. + unsigned SubScale; + Value *SubVal = + DecomposeSimpleLinearExpr(I->getOperand(0), SubScale, Offset); + Offset += RHS->getZExtValue(); + Scale = SubScale; + return SubVal; + } + } + } + + // Otherwise, we can't look past this. + Scale = 1; + Offset = 0; + return Val; +} + + +/// PromoteCastOfAllocation - If we find a cast of an allocation instruction, +/// try to eliminate the cast by moving the type information into the alloc. +Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, + AllocationInst &AI) { + const PointerType *PTy = cast(CI.getType()); + + // Remove any uses of AI that are dead. + assert(!CI.use_empty() && "Dead instructions should be removed earlier!"); + + for (Value::use_iterator UI = AI.use_begin(), E = AI.use_end(); UI != E; ) { + Instruction *User = cast(*UI++); + if (isInstructionTriviallyDead(User)) { + while (UI != E && *UI == User) + ++UI; // If this instruction uses AI more than once, don't break UI. + + ++NumDeadInst; + DOUT << "IC: DCE: " << *User; + EraseInstFromFunction(*User); + } + } + + // Get the type really allocated and the type casted to. + const Type *AllocElTy = AI.getAllocatedType(); + const Type *CastElTy = PTy->getElementType(); + if (!AllocElTy->isSized() || !CastElTy->isSized()) return 0; + + unsigned AllocElTyAlign = TD->getABITypeAlignment(AllocElTy); + unsigned CastElTyAlign = TD->getABITypeAlignment(CastElTy); + if (CastElTyAlign < AllocElTyAlign) return 0; + + // If the allocation has multiple uses, only promote it if we are strictly + // increasing the alignment of the resultant allocation. If we keep it the + // same, we open the door to infinite loops of various kinds. (A reference + // from a dbg.declare doesn't count as a use for this purpose.) + if (!AI.hasOneUse() && !hasOneUsePlusDeclare(&AI) && + CastElTyAlign == AllocElTyAlign) return 0; + + uint64_t AllocElTySize = TD->getTypeAllocSize(AllocElTy); + uint64_t CastElTySize = TD->getTypeAllocSize(CastElTy); + if (CastElTySize == 0 || AllocElTySize == 0) return 0; + + // See if we can satisfy the modulus by pulling a scale out of the array + // size argument. + unsigned ArraySizeScale; + int ArrayOffset; + Value *NumElements = // See if the array size is a decomposable linear expr. + DecomposeSimpleLinearExpr(AI.getOperand(0), ArraySizeScale, ArrayOffset); + + // If we can now satisfy the modulus, by using a non-1 scale, we really can + // do the xform. + if ((AllocElTySize*ArraySizeScale) % CastElTySize != 0 || + (AllocElTySize*ArrayOffset ) % CastElTySize != 0) return 0; + + unsigned Scale = (AllocElTySize*ArraySizeScale)/CastElTySize; + Value *Amt = 0; + if (Scale == 1) { + Amt = NumElements; + } else { + // If the allocation size is constant, form a constant mul expression + Amt = ConstantInt::get(Type::Int32Ty, Scale); + if (isa(NumElements)) + Amt = Multiply(cast(NumElements), cast(Amt)); + // otherwise multiply the amount and the number of elements + else { + Instruction *Tmp = BinaryOperator::CreateMul(Amt, NumElements, "tmp"); + Amt = InsertNewInstBefore(Tmp, AI); + } + } + + if (int Offset = (AllocElTySize*ArrayOffset)/CastElTySize) { + Value *Off = ConstantInt::get(Type::Int32Ty, Offset, true); + Instruction *Tmp = BinaryOperator::CreateAdd(Amt, Off, "tmp"); + Amt = InsertNewInstBefore(Tmp, AI); + } + + AllocationInst *New; + if (isa(AI)) + New = new MallocInst(CastElTy, Amt, AI.getAlignment()); + else + New = new AllocaInst(CastElTy, Amt, AI.getAlignment()); + InsertNewInstBefore(New, AI); + New->takeName(&AI); + + // If the allocation has one real use plus a dbg.declare, just remove the + // declare. + if (DbgDeclareInst *DI = hasOneUsePlusDeclare(&AI)) { + EraseInstFromFunction(*DI); + } + // If the allocation has multiple real uses, insert a cast and change all + // things that used it to use the new cast. This will also hack on CI, but it + // will die soon. + else if (!AI.hasOneUse()) { + AddUsesToWorkList(AI); + // New is the allocation instruction, pointer typed. AI is the original + // allocation instruction, also pointer typed. Thus, cast to use is BitCast. + CastInst *NewCast = new BitCastInst(New, AI.getType(), "tmpcast"); + InsertNewInstBefore(NewCast, AI); + AI.replaceAllUsesWith(NewCast); + } + return ReplaceInstUsesWith(CI, New); +} + +/// CanEvaluateInDifferentType - Return true if we can take the specified value +/// and return it as type Ty without inserting any new casts and without +/// changing the computed value. This is used by code that tries to decide +/// whether promoting or shrinking integer operations to wider or smaller types +/// will allow us to eliminate a truncate or extend. +/// +/// This is a truncation operation if Ty is smaller than V->getType(), or an +/// extension operation if Ty is larger. +/// +/// If CastOpc is a truncation, then Ty will be a type smaller than V. We +/// should return true if trunc(V) can be computed by computing V in the smaller +/// type. If V is an instruction, then trunc(inst(x,y)) can be computed as +/// inst(trunc(x),trunc(y)), which only makes sense if x and y can be +/// efficiently truncated. +/// +/// If CastOpc is a sext or zext, we are asking if the low bits of the value can +/// bit computed in a larger type, which is then and'd or sext_in_reg'd to get +/// the final result. +bool InstCombiner::CanEvaluateInDifferentType(Value *V, const IntegerType *Ty, + unsigned CastOpc, + int &NumCastsRemoved){ + // We can always evaluate constants in another type. + if (isa(V)) + return true; + + Instruction *I = dyn_cast(V); + if (!I) return false; + + const IntegerType *OrigTy = cast(V->getType()); + + // If this is an extension or truncate, we can often eliminate it. + if (isa(I) || isa(I) || isa(I)) { + // If this is a cast from the destination type, we can trivially eliminate + // it, and this will remove a cast overall. + if (I->getOperand(0)->getType() == Ty) { + // If the first operand is itself a cast, and is eliminable, do not count + // this as an eliminable cast. We would prefer to eliminate those two + // casts first. + if (!isa(I->getOperand(0)) && I->hasOneUse()) + ++NumCastsRemoved; + return true; + } + } + + // We can't extend or shrink something that has multiple uses: doing so would + // require duplicating the instruction in general, which isn't profitable. + if (!I->hasOneUse()) return false; + + unsigned Opc = I->getOpcode(); + switch (Opc) { + case Instruction::Add: + case Instruction::Sub: + case Instruction::Mul: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + // These operators can all arbitrarily be extended or truncated. + return CanEvaluateInDifferentType(I->getOperand(0), Ty, CastOpc, + NumCastsRemoved) && + CanEvaluateInDifferentType(I->getOperand(1), Ty, CastOpc, + NumCastsRemoved); + + case Instruction::Shl: + // If we are truncating the result of this SHL, and if it's a shift of a + // constant amount, we can always perform a SHL in a smaller type. + if (ConstantInt *CI = dyn_cast(I->getOperand(1))) { + uint32_t BitWidth = Ty->getBitWidth(); + if (BitWidth < OrigTy->getBitWidth() && + CI->getLimitedValue(BitWidth) < BitWidth) + return CanEvaluateInDifferentType(I->getOperand(0), Ty, CastOpc, + NumCastsRemoved); + } + break; + case Instruction::LShr: + // If this is a truncate of a logical shr, we can truncate it to a smaller + // lshr iff we know that the bits we would otherwise be shifting in are + // already zeros. + if (ConstantInt *CI = dyn_cast(I->getOperand(1))) { + uint32_t OrigBitWidth = OrigTy->getBitWidth(); + uint32_t BitWidth = Ty->getBitWidth(); + if (BitWidth < OrigBitWidth && + MaskedValueIsZero(I->getOperand(0), + APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth)) && + CI->getLimitedValue(BitWidth) < BitWidth) { + return CanEvaluateInDifferentType(I->getOperand(0), Ty, CastOpc, + NumCastsRemoved); + } + } + break; + case Instruction::ZExt: + case Instruction::SExt: + case Instruction::Trunc: + // If this is the same kind of case as our original (e.g. zext+zext), we + // can safely replace it. Note that replacing it does not reduce the number + // of casts in the input. + if (Opc == CastOpc) + return true; + + // sext (zext ty1), ty2 -> zext ty2 + if (CastOpc == Instruction::SExt && Opc == Instruction::ZExt) + return true; + break; + case Instruction::Select: { + SelectInst *SI = cast(I); + return CanEvaluateInDifferentType(SI->getTrueValue(), Ty, CastOpc, + NumCastsRemoved) && + CanEvaluateInDifferentType(SI->getFalseValue(), Ty, CastOpc, + NumCastsRemoved); + } + case Instruction::PHI: { + // We can change a phi if we can change all operands. + PHINode *PN = cast(I); + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (!CanEvaluateInDifferentType(PN->getIncomingValue(i), Ty, CastOpc, + NumCastsRemoved)) + return false; + return true; + } + default: + // TODO: Can handle more cases here. + break; + } + + return false; +} + +/// EvaluateInDifferentType - Given an expression that +/// CanEvaluateInDifferentType returns true for, actually insert the code to +/// evaluate the expression. +Value *InstCombiner::EvaluateInDifferentType(Value *V, const Type *Ty, + bool isSigned) { + if (Constant *C = dyn_cast(V)) + return ConstantExpr::getIntegerCast(C, Ty, isSigned /*Sext or ZExt*/); + + // Otherwise, it must be an instruction. + Instruction *I = cast(V); + Instruction *Res = 0; + unsigned Opc = I->getOpcode(); + switch (Opc) { + case Instruction::Add: + case Instruction::Sub: + case Instruction::Mul: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + case Instruction::AShr: + case Instruction::LShr: + case Instruction::Shl: { + Value *LHS = EvaluateInDifferentType(I->getOperand(0), Ty, isSigned); + Value *RHS = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned); + Res = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS); + break; + } + case Instruction::Trunc: + case Instruction::ZExt: + case Instruction::SExt: + // If the source type of the cast is the type we're trying for then we can + // just return the source. There's no need to insert it because it is not + // new. + if (I->getOperand(0)->getType() == Ty) + return I->getOperand(0); + + // Otherwise, must be the same type of cast, so just reinsert a new one. + Res = CastInst::Create(cast(I)->getOpcode(), I->getOperand(0), + Ty); + break; + case Instruction::Select: { + Value *True = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned); + Value *False = EvaluateInDifferentType(I->getOperand(2), Ty, isSigned); + Res = SelectInst::Create(I->getOperand(0), True, False); + break; + } + case Instruction::PHI: { + PHINode *OPN = cast(I); + PHINode *NPN = PHINode::Create(Ty); + for (unsigned i = 0, e = OPN->getNumIncomingValues(); i != e; ++i) { + Value *V =EvaluateInDifferentType(OPN->getIncomingValue(i), Ty, isSigned); + NPN->addIncoming(V, OPN->getIncomingBlock(i)); + } + Res = NPN; + break; + } + default: + // TODO: Can handle more cases here. + assert(0 && "Unreachable!"); + break; + } + + Res->takeName(I); + return InsertNewInstBefore(Res, *I); +} + +/// @brief Implement the transforms common to all CastInst visitors. +Instruction *InstCombiner::commonCastTransforms(CastInst &CI) { + Value *Src = CI.getOperand(0); + + // Many cases of "cast of a cast" are eliminable. If it's eliminable we just + // eliminate it now. + if (CastInst *CSrc = dyn_cast(Src)) { // A->B->C cast + if (Instruction::CastOps opc = + isEliminableCastPair(CSrc, CI.getOpcode(), CI.getType(), TD)) { + // The first cast (CSrc) is eliminable so we need to fix up or replace + // the second cast (CI). CSrc will then have a good chance of being dead. + return CastInst::Create(opc, CSrc->getOperand(0), CI.getType()); + } + } + + // If we are casting a select then fold the cast into the select + if (SelectInst *SI = dyn_cast(Src)) + if (Instruction *NV = FoldOpIntoSelect(CI, SI, this)) + return NV; + + // If we are casting a PHI then fold the cast into the PHI + if (isa(Src)) + if (Instruction *NV = FoldOpIntoPhi(CI)) + return NV; + + return 0; +} + +/// FindElementAtOffset - Given a type and a constant offset, determine whether +/// or not there is a sequence of GEP indices into the type that will land us at +/// the specified offset. If so, fill them into NewIndices and return the +/// resultant element type, otherwise return null. +static const Type *FindElementAtOffset(const Type *Ty, int64_t Offset, + SmallVectorImpl &NewIndices, + const TargetData *TD) { + if (!Ty->isSized()) return 0; + + // Start with the index over the outer type. Note that the type size + // might be zero (even if the offset isn't zero) if the indexed type + // is something like [0 x {int, int}] + const Type *IntPtrTy = TD->getIntPtrType(); + int64_t FirstIdx = 0; + if (int64_t TySize = TD->getTypeAllocSize(Ty)) { + FirstIdx = Offset/TySize; + Offset -= FirstIdx*TySize; + + // Handle hosts where % returns negative instead of values [0..TySize). + if (Offset < 0) { + --FirstIdx; + Offset += TySize; + assert(Offset >= 0); + } + assert((uint64_t)Offset < (uint64_t)TySize && "Out of range offset"); + } + + NewIndices.push_back(ConstantInt::get(IntPtrTy, FirstIdx)); + + // Index into the types. If we fail, set OrigBase to null. + while (Offset) { + // Indexing into tail padding between struct/array elements. + if (uint64_t(Offset*8) >= TD->getTypeSizeInBits(Ty)) + return 0; + + if (const StructType *STy = dyn_cast(Ty)) { + const StructLayout *SL = TD->getStructLayout(STy); + assert(Offset < (int64_t)SL->getSizeInBytes() && + "Offset must stay within the indexed type"); + + unsigned Elt = SL->getElementContainingOffset(Offset); + NewIndices.push_back(ConstantInt::get(Type::Int32Ty, Elt)); + + Offset -= SL->getElementOffset(Elt); + Ty = STy->getElementType(Elt); + } else if (const ArrayType *AT = dyn_cast(Ty)) { + uint64_t EltSize = TD->getTypeAllocSize(AT->getElementType()); + assert(EltSize && "Cannot index into a zero-sized array"); + NewIndices.push_back(ConstantInt::get(IntPtrTy,Offset/EltSize)); + Offset %= EltSize; + Ty = AT->getElementType(); + } else { + // Otherwise, we can't index into the middle of this atomic type, bail. + return 0; + } + } + + return Ty; +} + +/// @brief Implement the transforms for cast of pointer (bitcast/ptrtoint) +Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) { + Value *Src = CI.getOperand(0); + + if (GetElementPtrInst *GEP = dyn_cast(Src)) { + // If casting the result of a getelementptr instruction with no offset, turn + // this into a cast of the original pointer! + if (GEP->hasAllZeroIndices()) { + // Changing the cast operand is usually not a good idea but it is safe + // here because the pointer operand is being replaced with another + // pointer operand so the opcode doesn't need to change. + AddToWorkList(GEP); + CI.setOperand(0, GEP->getOperand(0)); + return &CI; + } + + // If the GEP has a single use, and the base pointer is a bitcast, and the + // GEP computes a constant offset, see if we can convert these three + // instructions into fewer. This typically happens with unions and other + // non-type-safe code. + if (GEP->hasOneUse() && isa(GEP->getOperand(0))) { + if (GEP->hasAllConstantIndices()) { + // We are guaranteed to get a constant from EmitGEPOffset. + ConstantInt *OffsetV = cast(EmitGEPOffset(GEP, CI, *this)); + int64_t Offset = OffsetV->getSExtValue(); + + // Get the base pointer input of the bitcast, and the type it points to. + Value *OrigBase = cast(GEP->getOperand(0))->getOperand(0); + const Type *GEPIdxTy = + cast(OrigBase->getType())->getElementType(); + SmallVector NewIndices; + if (FindElementAtOffset(GEPIdxTy, Offset, NewIndices, TD)) { + // If we were able to index down into an element, create the GEP + // and bitcast the result. This eliminates one bitcast, potentially + // two. + Instruction *NGEP = GetElementPtrInst::Create(OrigBase, + NewIndices.begin(), + NewIndices.end(), ""); + InsertNewInstBefore(NGEP, CI); + NGEP->takeName(GEP); + + if (isa(CI)) + return new BitCastInst(NGEP, CI.getType()); + assert(isa(CI)); + return new PtrToIntInst(NGEP, CI.getType()); + } + } + } + } + + return commonCastTransforms(CI); +} + +/// isSafeIntegerType - Return true if this is a basic integer type, not a crazy +/// type like i42. We don't want to introduce operations on random non-legal +/// integer types where they don't already exist in the code. In the future, +/// we should consider making this based off target-data, so that 32-bit targets +/// won't get i64 operations etc. +static bool isSafeIntegerType(const Type *Ty) { + switch (Ty->getPrimitiveSizeInBits()) { + case 8: + case 16: + case 32: + case 64: + return true; + default: + return false; + } +} + +/// Only the TRUNC, ZEXT, SEXT, and BITCAST can both operand and result as +/// integer types. This function implements the common transforms for all those +/// cases. +/// @brief Implement the transforms common to CastInst with integer operands +Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) { + if (Instruction *Result = commonCastTransforms(CI)) + return Result; + + Value *Src = CI.getOperand(0); + const Type *SrcTy = Src->getType(); + const Type *DestTy = CI.getType(); + uint32_t SrcBitSize = SrcTy->getPrimitiveSizeInBits(); + uint32_t DestBitSize = DestTy->getPrimitiveSizeInBits(); + + // See if we can simplify any instructions used by the LHS whose sole + // purpose is to compute bits we don't care about. + if (SimplifyDemandedInstructionBits(CI)) + return &CI; + + // If the source isn't an instruction or has more than one use then we + // can't do anything more. + Instruction *SrcI = dyn_cast(Src); + if (!SrcI || !Src->hasOneUse()) + return 0; + + // Attempt to propagate the cast into the instruction for int->int casts. + int NumCastsRemoved = 0; + if (!isa(CI) && + // Only do this if the dest type is a simple type, don't convert the + // expression tree to something weird like i93 unless the source is also + // strange. + (isSafeIntegerType(DestTy) || !isSafeIntegerType(SrcI->getType())) && + CanEvaluateInDifferentType(SrcI, cast(DestTy), + CI.getOpcode(), NumCastsRemoved)) { + // If this cast is a truncate, evaluting in a different type always + // eliminates the cast, so it is always a win. If this is a zero-extension, + // we need to do an AND to maintain the clear top-part of the computation, + // so we require that the input have eliminated at least one cast. If this + // is a sign extension, we insert two new casts (to do the extension) so we + // require that two casts have been eliminated. + bool DoXForm = false; + bool JustReplace = false; + switch (CI.getOpcode()) { + default: + // All the others use floating point so we shouldn't actually + // get here because of the check above. + assert(0 && "Unknown cast type"); + case Instruction::Trunc: + DoXForm = true; + break; + case Instruction::ZExt: { + DoXForm = NumCastsRemoved >= 1; + if (!DoXForm && 0) { + // If it's unnecessary to issue an AND to clear the high bits, it's + // always profitable to do this xform. + Value *TryRes = EvaluateInDifferentType(SrcI, DestTy, false); + APInt Mask(APInt::getBitsSet(DestBitSize, SrcBitSize, DestBitSize)); + if (MaskedValueIsZero(TryRes, Mask)) + return ReplaceInstUsesWith(CI, TryRes); + + if (Instruction *TryI = dyn_cast(TryRes)) + if (TryI->use_empty()) + EraseInstFromFunction(*TryI); + } + break; + } + case Instruction::SExt: { + DoXForm = NumCastsRemoved >= 2; + if (!DoXForm && !isa(SrcI) && 0) { + // If we do not have to emit the truncate + sext pair, then it's always + // profitable to do this xform. + // + // It's not safe to eliminate the trunc + sext pair if one of the + // eliminated cast is a truncate. e.g. + // t2 = trunc i32 t1 to i16 + // t3 = sext i16 t2 to i32 + // != + // i32 t1 + Value *TryRes = EvaluateInDifferentType(SrcI, DestTy, true); + unsigned NumSignBits = ComputeNumSignBits(TryRes); + if (NumSignBits > (DestBitSize - SrcBitSize)) + return ReplaceInstUsesWith(CI, TryRes); + + if (Instruction *TryI = dyn_cast(TryRes)) + if (TryI->use_empty()) + EraseInstFromFunction(*TryI); + } + break; + } + } + + if (DoXForm) { + DOUT << "ICE: EvaluateInDifferentType converting expression type to avoid" + << " cast: " << CI; + Value *Res = EvaluateInDifferentType(SrcI, DestTy, + CI.getOpcode() == Instruction::SExt); + if (JustReplace) + // Just replace this cast with the result. + return ReplaceInstUsesWith(CI, Res); + + assert(Res->getType() == DestTy); + switch (CI.getOpcode()) { + default: assert(0 && "Unknown cast type!"); + case Instruction::Trunc: + case Instruction::BitCast: + // Just replace this cast with the result. + return ReplaceInstUsesWith(CI, Res); + case Instruction::ZExt: { + assert(SrcBitSize < DestBitSize && "Not a zext?"); + + // If the high bits are already zero, just replace this cast with the + // result. + APInt Mask(APInt::getBitsSet(DestBitSize, SrcBitSize, DestBitSize)); + if (MaskedValueIsZero(Res, Mask)) + return ReplaceInstUsesWith(CI, Res); + + // We need to emit an AND to clear the high bits. + Constant *C = ConstantInt::get(APInt::getLowBitsSet(DestBitSize, + SrcBitSize)); + return BinaryOperator::CreateAnd(Res, C); + } + case Instruction::SExt: { + // If the high bits are already filled with sign bit, just replace this + // cast with the result. + unsigned NumSignBits = ComputeNumSignBits(Res); + if (NumSignBits > (DestBitSize - SrcBitSize)) + return ReplaceInstUsesWith(CI, Res); + + // We need to emit a cast to truncate, then a cast to sext. + return CastInst::Create(Instruction::SExt, + InsertCastBefore(Instruction::Trunc, Res, Src->getType(), + CI), DestTy); + } + } + } + } + + Value *Op0 = SrcI->getNumOperands() > 0 ? SrcI->getOperand(0) : 0; + Value *Op1 = SrcI->getNumOperands() > 1 ? SrcI->getOperand(1) : 0; + + switch (SrcI->getOpcode()) { + case Instruction::Add: + case Instruction::Mul: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + // If we are discarding information, rewrite. + if (DestBitSize <= SrcBitSize && DestBitSize != 1) { + // Don't insert two casts if they cannot be eliminated. We allow + // two casts to be inserted if the sizes are the same. This could + // only be converting signedness, which is a noop. + if (DestBitSize == SrcBitSize || + !ValueRequiresCast(CI.getOpcode(), Op1, DestTy,TD) || + !ValueRequiresCast(CI.getOpcode(), Op0, DestTy, TD)) { + Instruction::CastOps opcode = CI.getOpcode(); + Value *Op0c = InsertCastBefore(opcode, Op0, DestTy, *SrcI); + Value *Op1c = InsertCastBefore(opcode, Op1, DestTy, *SrcI); + return BinaryOperator::Create( + cast(SrcI)->getOpcode(), Op0c, Op1c); + } + } + + // cast (xor bool X, true) to int --> xor (cast bool X to int), 1 + if (isa(CI) && SrcBitSize == 1 && + SrcI->getOpcode() == Instruction::Xor && + Op1 == ConstantInt::getTrue() && + (!Op0->hasOneUse() || !isa(Op0))) { + Value *New = InsertCastBefore(Instruction::ZExt, Op0, DestTy, CI); + return BinaryOperator::CreateXor(New, ConstantInt::get(CI.getType(), 1)); + } + break; + case Instruction::SDiv: + case Instruction::UDiv: + case Instruction::SRem: + case Instruction::URem: + // If we are just changing the sign, rewrite. + if (DestBitSize == SrcBitSize) { + // Don't insert two casts if they cannot be eliminated. We allow + // two casts to be inserted if the sizes are the same. This could + // only be converting signedness, which is a noop. + if (!ValueRequiresCast(CI.getOpcode(), Op1, DestTy, TD) || + !ValueRequiresCast(CI.getOpcode(), Op0, DestTy, TD)) { + Value *Op0c = InsertCastBefore(Instruction::BitCast, + Op0, DestTy, *SrcI); + Value *Op1c = InsertCastBefore(Instruction::BitCast, + Op1, DestTy, *SrcI); + return BinaryOperator::Create( + cast(SrcI)->getOpcode(), Op0c, Op1c); + } + } + break; + + case Instruction::Shl: + // Allow changing the sign of the source operand. Do not allow + // changing the size of the shift, UNLESS the shift amount is a + // constant. We must not change variable sized shifts to a smaller + // size, because it is undefined to shift more bits out than exist + // in the value. + if (DestBitSize == SrcBitSize || + (DestBitSize < SrcBitSize && isa(Op1))) { + Instruction::CastOps opcode = (DestBitSize == SrcBitSize ? + Instruction::BitCast : Instruction::Trunc); + Value *Op0c = InsertCastBefore(opcode, Op0, DestTy, *SrcI); + Value *Op1c = InsertCastBefore(opcode, Op1, DestTy, *SrcI); + return BinaryOperator::CreateShl(Op0c, Op1c); + } + break; + case Instruction::AShr: + // If this is a signed shr, and if all bits shifted in are about to be + // truncated off, turn it into an unsigned shr to allow greater + // simplifications. + if (DestBitSize < SrcBitSize && + isa(Op1)) { + uint32_t ShiftAmt = cast(Op1)->getLimitedValue(SrcBitSize); + if (SrcBitSize > ShiftAmt && SrcBitSize-ShiftAmt >= DestBitSize) { + // Insert the new logical shift right. + return BinaryOperator::CreateLShr(Op0, Op1); + } + } + break; + } + return 0; +} + +Instruction *InstCombiner::visitTrunc(TruncInst &CI) { + if (Instruction *Result = commonIntCastTransforms(CI)) + return Result; + + Value *Src = CI.getOperand(0); + const Type *Ty = CI.getType(); + uint32_t DestBitWidth = Ty->getPrimitiveSizeInBits(); + uint32_t SrcBitWidth = cast(Src->getType())->getBitWidth(); + + // Canonicalize trunc x to i1 -> (icmp ne (and x, 1), 0) + if (DestBitWidth == 1) { + Constant *One = ConstantInt::get(Src->getType(), 1); + Src = InsertNewInstBefore(BinaryOperator::CreateAnd(Src, One, "tmp"), CI); + Value *Zero = Constant::getNullValue(Src->getType()); + return new ICmpInst(ICmpInst::ICMP_NE, Src, Zero); + } + + // Optimize trunc(lshr(), c) to pull the shift through the truncate. + ConstantInt *ShAmtV = 0; + Value *ShiftOp = 0; + if (Src->hasOneUse() && + match(Src, m_LShr(m_Value(ShiftOp), m_ConstantInt(ShAmtV)))) { + uint32_t ShAmt = ShAmtV->getLimitedValue(SrcBitWidth); + + // Get a mask for the bits shifting in. + APInt Mask(APInt::getLowBitsSet(SrcBitWidth, ShAmt).shl(DestBitWidth)); + if (MaskedValueIsZero(ShiftOp, Mask)) { + if (ShAmt >= DestBitWidth) // All zeros. + return ReplaceInstUsesWith(CI, Constant::getNullValue(Ty)); + + // Okay, we can shrink this. Truncate the input, then return a new + // shift. + Value *V1 = InsertCastBefore(Instruction::Trunc, ShiftOp, Ty, CI); + Value *V2 = ConstantExpr::getTrunc(ShAmtV, Ty); + return BinaryOperator::CreateLShr(V1, V2); + } + } + + return 0; +} + +/// transformZExtICmp - Transform (zext icmp) to bitwise / integer operations +/// in order to eliminate the icmp. +Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI, + bool DoXform) { + // If we are just checking for a icmp eq of a single bit and zext'ing it + // to an integer, then shift the bit to the appropriate place and then + // cast to integer to avoid the comparison. + if (ConstantInt *Op1C = dyn_cast(ICI->getOperand(1))) { + const APInt &Op1CV = Op1C->getValue(); + + // zext (x x>>u31 true if signbit set. + // zext (x >s -1) to i32 --> (x>>u31)^1 true if signbit clear. + if ((ICI->getPredicate() == ICmpInst::ICMP_SLT && Op1CV == 0) || + (ICI->getPredicate() == ICmpInst::ICMP_SGT &&Op1CV.isAllOnesValue())) { + if (!DoXform) return ICI; + + Value *In = ICI->getOperand(0); + Value *Sh = ConstantInt::get(In->getType(), + In->getType()->getPrimitiveSizeInBits()-1); + In = InsertNewInstBefore(BinaryOperator::CreateLShr(In, Sh, + In->getName()+".lobit"), + CI); + if (In->getType() != CI.getType()) + In = CastInst::CreateIntegerCast(In, CI.getType(), + false/*ZExt*/, "tmp", &CI); + + if (ICI->getPredicate() == ICmpInst::ICMP_SGT) { + Constant *One = ConstantInt::get(In->getType(), 1); + In = InsertNewInstBefore(BinaryOperator::CreateXor(In, One, + In->getName()+".not"), + CI); + } + + return ReplaceInstUsesWith(CI, In); + } + + + + // zext (X == 0) to i32 --> X^1 iff X has only the low bit set. + // zext (X == 0) to i32 --> (X>>1)^1 iff X has only the 2nd bit set. + // zext (X == 1) to i32 --> X iff X has only the low bit set. + // zext (X == 2) to i32 --> X>>1 iff X has only the 2nd bit set. + // zext (X != 0) to i32 --> X iff X has only the low bit set. + // zext (X != 0) to i32 --> X>>1 iff X has only the 2nd bit set. + // zext (X != 1) to i32 --> X^1 iff X has only the low bit set. + // zext (X != 2) to i32 --> (X>>1)^1 iff X has only the 2nd bit set. + if ((Op1CV == 0 || Op1CV.isPowerOf2()) && + // This only works for EQ and NE + ICI->isEquality()) { + // If Op1C some other power of two, convert: + uint32_t BitWidth = Op1C->getType()->getBitWidth(); + APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); + APInt TypeMask(APInt::getAllOnesValue(BitWidth)); + ComputeMaskedBits(ICI->getOperand(0), TypeMask, KnownZero, KnownOne); + + APInt KnownZeroMask(~KnownZero); + if (KnownZeroMask.isPowerOf2()) { // Exactly 1 possible 1? + if (!DoXform) return ICI; + + bool isNE = ICI->getPredicate() == ICmpInst::ICMP_NE; + if (Op1CV != 0 && (Op1CV != KnownZeroMask)) { + // (X&4) == 2 --> false + // (X&4) != 2 --> true + Constant *Res = ConstantInt::get(Type::Int1Ty, isNE); + Res = ConstantExpr::getZExt(Res, CI.getType()); + return ReplaceInstUsesWith(CI, Res); + } + + uint32_t ShiftAmt = KnownZeroMask.logBase2(); + Value *In = ICI->getOperand(0); + if (ShiftAmt) { + // Perform a logical shr by shiftamt. + // Insert the shift to put the result in the low bit. + In = InsertNewInstBefore(BinaryOperator::CreateLShr(In, + ConstantInt::get(In->getType(), ShiftAmt), + In->getName()+".lobit"), CI); + } + + if ((Op1CV != 0) == isNE) { // Toggle the low bit. + Constant *One = ConstantInt::get(In->getType(), 1); + In = BinaryOperator::CreateXor(In, One, "tmp"); + InsertNewInstBefore(cast(In), CI); + } + + if (CI.getType() == In->getType()) + return ReplaceInstUsesWith(CI, In); + else + return CastInst::CreateIntegerCast(In, CI.getType(), false/*ZExt*/); + } + } + } + + return 0; +} + +Instruction *InstCombiner::visitZExt(ZExtInst &CI) { + // If one of the common conversion will work .. + if (Instruction *Result = commonIntCastTransforms(CI)) + return Result; + + Value *Src = CI.getOperand(0); + + // If this is a TRUNC followed by a ZEXT then we are dealing with integral + // types and if the sizes are just right we can convert this into a logical + // 'and' which will be much cheaper than the pair of casts. + if (TruncInst *CSrc = dyn_cast(Src)) { // A->B->C cast + // Get the sizes of the types involved. We know that the intermediate type + // will be smaller than A or C, but don't know the relation between A and C. + Value *A = CSrc->getOperand(0); + unsigned SrcSize = A->getType()->getPrimitiveSizeInBits(); + unsigned MidSize = CSrc->getType()->getPrimitiveSizeInBits(); + unsigned DstSize = CI.getType()->getPrimitiveSizeInBits(); + // If we're actually extending zero bits, then if + // SrcSize < DstSize: zext(a & mask) + // SrcSize == DstSize: a & mask + // SrcSize > DstSize: trunc(a) & mask + if (SrcSize < DstSize) { + APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize)); + Constant *AndConst = ConstantInt::get(AndValue); + Instruction *And = + BinaryOperator::CreateAnd(A, AndConst, CSrc->getName()+".mask"); + InsertNewInstBefore(And, CI); + return new ZExtInst(And, CI.getType()); + } else if (SrcSize == DstSize) { + APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize)); + return BinaryOperator::CreateAnd(A, ConstantInt::get(AndValue)); + } else if (SrcSize > DstSize) { + Instruction *Trunc = new TruncInst(A, CI.getType(), "tmp"); + InsertNewInstBefore(Trunc, CI); + APInt AndValue(APInt::getLowBitsSet(DstSize, MidSize)); + return BinaryOperator::CreateAnd(Trunc, ConstantInt::get(AndValue)); + } + } + + if (ICmpInst *ICI = dyn_cast(Src)) + return transformZExtICmp(ICI, CI); + + BinaryOperator *SrcI = dyn_cast(Src); + if (SrcI && SrcI->getOpcode() == Instruction::Or) { + // zext (or icmp, icmp) --> or (zext icmp), (zext icmp) if at least one + // of the (zext icmp) will be transformed. + ICmpInst *LHS = dyn_cast(SrcI->getOperand(0)); + ICmpInst *RHS = dyn_cast(SrcI->getOperand(1)); + if (LHS && RHS && LHS->hasOneUse() && RHS->hasOneUse() && + (transformZExtICmp(LHS, CI, false) || + transformZExtICmp(RHS, CI, false))) { + Value *LCast = InsertCastBefore(Instruction::ZExt, LHS, CI.getType(), CI); + Value *RCast = InsertCastBefore(Instruction::ZExt, RHS, CI.getType(), CI); + return BinaryOperator::Create(Instruction::Or, LCast, RCast); + } + } + + return 0; +} + +Instruction *InstCombiner::visitSExt(SExtInst &CI) { + if (Instruction *I = commonIntCastTransforms(CI)) + return I; + + Value *Src = CI.getOperand(0); + + // Canonicalize sign-extend from i1 to a select. + if (Src->getType() == Type::Int1Ty) + return SelectInst::Create(Src, + ConstantInt::getAllOnesValue(CI.getType()), + Constant::getNullValue(CI.getType())); + + // See if the value being truncated is already sign extended. If so, just + // eliminate the trunc/sext pair. + if (getOpcode(Src) == Instruction::Trunc) { + Value *Op = cast(Src)->getOperand(0); + unsigned OpBits = cast(Op->getType())->getBitWidth(); + unsigned MidBits = cast(Src->getType())->getBitWidth(); + unsigned DestBits = cast(CI.getType())->getBitWidth(); + unsigned NumSignBits = ComputeNumSignBits(Op); + + if (OpBits == DestBits) { + // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign + // bits, it is already ready. + if (NumSignBits > DestBits-MidBits) + return ReplaceInstUsesWith(CI, Op); + } else if (OpBits < DestBits) { + // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign + // bits, just sext from i32. + if (NumSignBits > OpBits-MidBits) + return new SExtInst(Op, CI.getType(), "tmp"); + } else { + // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign + // bits, just truncate to i32. + if (NumSignBits > OpBits-MidBits) + return new TruncInst(Op, CI.getType(), "tmp"); + } + } + + // If the input is a shl/ashr pair of a same constant, then this is a sign + // extension from a smaller value. If we could trust arbitrary bitwidth + // integers, we could turn this into a truncate to the smaller bit and then + // use a sext for the whole extension. Since we don't, look deeper and check + // for a truncate. If the source and dest are the same type, eliminate the + // trunc and extend and just do shifts. For example, turn: + // %a = trunc i32 %i to i8 + // %b = shl i8 %a, 6 + // %c = ashr i8 %b, 6 + // %d = sext i8 %c to i32 + // into: + // %a = shl i32 %i, 30 + // %d = ashr i32 %a, 30 + Value *A = 0; + ConstantInt *BA = 0, *CA = 0; + if (match(Src, m_AShr(m_Shl(m_Value(A), m_ConstantInt(BA)), + m_ConstantInt(CA))) && + BA == CA && isa(A)) { + Value *I = cast(A)->getOperand(0); + if (I->getType() == CI.getType()) { + unsigned MidSize = Src->getType()->getPrimitiveSizeInBits(); + unsigned SrcDstSize = CI.getType()->getPrimitiveSizeInBits(); + unsigned ShAmt = CA->getZExtValue()+SrcDstSize-MidSize; + Constant *ShAmtV = ConstantInt::get(CI.getType(), ShAmt); + I = InsertNewInstBefore(BinaryOperator::CreateShl(I, ShAmtV, + CI.getName()), CI); + return BinaryOperator::CreateAShr(I, ShAmtV); + } + } + + return 0; +} + +/// FitsInFPType - Return a Constant* for the specified FP constant if it fits +/// in the specified FP type without changing its value. +static Constant *FitsInFPType(ConstantFP *CFP, const fltSemantics &Sem) { + bool losesInfo; + APFloat F = CFP->getValueAPF(); + (void)F.convert(Sem, APFloat::rmNearestTiesToEven, &losesInfo); + if (!losesInfo) + return ConstantFP::get(F); + return 0; +} + +/// LookThroughFPExtensions - If this is an fp extension instruction, look +/// through it until we get the source value. +static Value *LookThroughFPExtensions(Value *V) { + if (Instruction *I = dyn_cast(V)) + if (I->getOpcode() == Instruction::FPExt) + return LookThroughFPExtensions(I->getOperand(0)); + + // If this value is a constant, return the constant in the smallest FP type + // that can accurately represent it. This allows us to turn + // (float)((double)X+2.0) into x+2.0f. + if (ConstantFP *CFP = dyn_cast(V)) { + if (CFP->getType() == Type::PPC_FP128Ty) + return V; // No constant folding of this. + // See if the value can be truncated to float and then reextended. + if (Value *V = FitsInFPType(CFP, APFloat::IEEEsingle)) + return V; + if (CFP->getType() == Type::DoubleTy) + return V; // Won't shrink. + if (Value *V = FitsInFPType(CFP, APFloat::IEEEdouble)) + return V; + // Don't try to shrink to various long double types. + } + + return V; +} + +Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { + if (Instruction *I = commonCastTransforms(CI)) + return I; + + // If we have fptrunc(add (fpextend x), (fpextend y)), where x and y are + // smaller than the destination type, we can eliminate the truncate by doing + // the add as the smaller type. This applies to add/sub/mul/div as well as + // many builtins (sqrt, etc). + BinaryOperator *OpI = dyn_cast(CI.getOperand(0)); + if (OpI && OpI->hasOneUse()) { + switch (OpI->getOpcode()) { + default: break; + case Instruction::Add: + case Instruction::Sub: + case Instruction::Mul: + case Instruction::FDiv: + case Instruction::FRem: + const Type *SrcTy = OpI->getType(); + Value *LHSTrunc = LookThroughFPExtensions(OpI->getOperand(0)); + Value *RHSTrunc = LookThroughFPExtensions(OpI->getOperand(1)); + if (LHSTrunc->getType() != SrcTy && + RHSTrunc->getType() != SrcTy) { + unsigned DstSize = CI.getType()->getPrimitiveSizeInBits(); + // If the source types were both smaller than the destination type of + // the cast, do this xform. + if (LHSTrunc->getType()->getPrimitiveSizeInBits() <= DstSize && + RHSTrunc->getType()->getPrimitiveSizeInBits() <= DstSize) { + LHSTrunc = InsertCastBefore(Instruction::FPExt, LHSTrunc, + CI.getType(), CI); + RHSTrunc = InsertCastBefore(Instruction::FPExt, RHSTrunc, + CI.getType(), CI); + return BinaryOperator::Create(OpI->getOpcode(), LHSTrunc, RHSTrunc); + } + } + break; + } + } + return 0; +} + +Instruction *InstCombiner::visitFPExt(CastInst &CI) { + return commonCastTransforms(CI); +} + +Instruction *InstCombiner::visitFPToUI(FPToUIInst &FI) { + Instruction *OpI = dyn_cast(FI.getOperand(0)); + if (OpI == 0) + return commonCastTransforms(FI); + + // fptoui(uitofp(X)) --> X + // fptoui(sitofp(X)) --> X + // This is safe if the intermediate type has enough bits in its mantissa to + // accurately represent all values of X. For example, do not do this with + // i64->float->i64. This is also safe for sitofp case, because any negative + // 'X' value would cause an undefined result for the fptoui. + if ((isa(OpI) || isa(OpI)) && + OpI->getOperand(0)->getType() == FI.getType() && + (int)FI.getType()->getPrimitiveSizeInBits() < /*extra bit for sign */ + OpI->getType()->getFPMantissaWidth()) + return ReplaceInstUsesWith(FI, OpI->getOperand(0)); + + return commonCastTransforms(FI); +} + +Instruction *InstCombiner::visitFPToSI(FPToSIInst &FI) { + Instruction *OpI = dyn_cast(FI.getOperand(0)); + if (OpI == 0) + return commonCastTransforms(FI); + + // fptosi(sitofp(X)) --> X + // fptosi(uitofp(X)) --> X + // This is safe if the intermediate type has enough bits in its mantissa to + // accurately represent all values of X. For example, do not do this with + // i64->float->i64. This is also safe for sitofp case, because any negative + // 'X' value would cause an undefined result for the fptoui. + if ((isa(OpI) || isa(OpI)) && + OpI->getOperand(0)->getType() == FI.getType() && + (int)FI.getType()->getPrimitiveSizeInBits() <= + OpI->getType()->getFPMantissaWidth()) + return ReplaceInstUsesWith(FI, OpI->getOperand(0)); + + return commonCastTransforms(FI); +} + +Instruction *InstCombiner::visitUIToFP(CastInst &CI) { + return commonCastTransforms(CI); +} + +Instruction *InstCombiner::visitSIToFP(CastInst &CI) { + return commonCastTransforms(CI); +} + +Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) { + // If the destination integer type is smaller than the intptr_t type for + // this target, do a ptrtoint to intptr_t then do a trunc. This allows the + // trunc to be exposed to other transforms. Don't do this for extending + // ptrtoint's, because we don't know if the target sign or zero extends its + // pointers. + if (CI.getType()->getPrimitiveSizeInBits() < TD->getPointerSizeInBits()) { + Value *P = InsertNewInstBefore(new PtrToIntInst(CI.getOperand(0), + TD->getIntPtrType(), + "tmp"), CI); + return new TruncInst(P, CI.getType()); + } + + return commonPointerCastTransforms(CI); +} + +Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) { + // If the source integer type is larger than the intptr_t type for + // this target, do a trunc to the intptr_t type, then inttoptr of it. This + // allows the trunc to be exposed to other transforms. Don't do this for + // extending inttoptr's, because we don't know if the target sign or zero + // extends to pointers. + if (CI.getOperand(0)->getType()->getPrimitiveSizeInBits() > + TD->getPointerSizeInBits()) { + Value *P = InsertNewInstBefore(new TruncInst(CI.getOperand(0), + TD->getIntPtrType(), + "tmp"), CI); + return new IntToPtrInst(P, CI.getType()); + } + + if (Instruction *I = commonCastTransforms(CI)) + return I; + + const Type *DestPointee = cast(CI.getType())->getElementType(); + if (!DestPointee->isSized()) return 0; + + // If this is inttoptr(add (ptrtoint x), cst), try to turn this into a GEP. + ConstantInt *Cst; + Value *X; + if (match(CI.getOperand(0), m_Add(m_Cast(m_Value(X)), + m_ConstantInt(Cst)))) { + // If the source and destination operands have the same type, see if this + // is a single-index GEP. + if (X->getType() == CI.getType()) { + // Get the size of the pointee type. + uint64_t Size = TD->getTypeAllocSize(DestPointee); + + // Convert the constant to intptr type. + APInt Offset = Cst->getValue(); + Offset.sextOrTrunc(TD->getPointerSizeInBits()); + + // If Offset is evenly divisible by Size, we can do this xform. + if (Size && !APIntOps::srem(Offset, APInt(Offset.getBitWidth(), Size))){ + Offset = APIntOps::sdiv(Offset, APInt(Offset.getBitWidth(), Size)); + return GetElementPtrInst::Create(X, ConstantInt::get(Offset)); + } + } + // TODO: Could handle other cases, e.g. where add is indexing into field of + // struct etc. + } else if (CI.getOperand(0)->hasOneUse() && + match(CI.getOperand(0), m_Add(m_Value(X), m_ConstantInt(Cst)))) { + // Otherwise, if this is inttoptr(add x, cst), try to turn this into an + // "inttoptr+GEP" instead of "add+intptr". + + // Get the size of the pointee type. + uint64_t Size = TD->getTypeAllocSize(DestPointee); + + // Convert the constant to intptr type. + APInt Offset = Cst->getValue(); + Offset.sextOrTrunc(TD->getPointerSizeInBits()); + + // If Offset is evenly divisible by Size, we can do this xform. + if (Size && !APIntOps::srem(Offset, APInt(Offset.getBitWidth(), Size))){ + Offset = APIntOps::sdiv(Offset, APInt(Offset.getBitWidth(), Size)); + + Instruction *P = InsertNewInstBefore(new IntToPtrInst(X, CI.getType(), + "tmp"), CI); + return GetElementPtrInst::Create(P, ConstantInt::get(Offset), "tmp"); + } + } + return 0; +} + +Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { + // If the operands are integer typed then apply the integer transforms, + // otherwise just apply the common ones. + Value *Src = CI.getOperand(0); + const Type *SrcTy = Src->getType(); + const Type *DestTy = CI.getType(); + + if (SrcTy->isInteger() && DestTy->isInteger()) { + if (Instruction *Result = commonIntCastTransforms(CI)) + return Result; + } else if (isa(SrcTy)) { + if (Instruction *I = commonPointerCastTransforms(CI)) + return I; + } else { + if (Instruction *Result = commonCastTransforms(CI)) + return Result; + } + + + // Get rid of casts from one type to the same type. These are useless and can + // be replaced by the operand. + if (DestTy == Src->getType()) + return ReplaceInstUsesWith(CI, Src); + + if (const PointerType *DstPTy = dyn_cast(DestTy)) { + const PointerType *SrcPTy = cast(SrcTy); + const Type *DstElTy = DstPTy->getElementType(); + const Type *SrcElTy = SrcPTy->getElementType(); + + // If the address spaces don't match, don't eliminate the bitcast, which is + // required for changing types. + if (SrcPTy->getAddressSpace() != DstPTy->getAddressSpace()) + return 0; + + // If we are casting a malloc or alloca to a pointer to a type of the same + // size, rewrite the allocation instruction to allocate the "right" type. + if (AllocationInst *AI = dyn_cast(Src)) + if (Instruction *V = PromoteCastOfAllocation(CI, *AI)) + return V; + + // If the source and destination are pointers, and this cast is equivalent + // to a getelementptr X, 0, 0, 0... turn it into the appropriate gep. + // This can enhance SROA and other transforms that want type-safe pointers. + Constant *ZeroUInt = Constant::getNullValue(Type::Int32Ty); + unsigned NumZeros = 0; + while (SrcElTy != DstElTy && + isa(SrcElTy) && !isa(SrcElTy) && + SrcElTy->getNumContainedTypes() /* not "{}" */) { + SrcElTy = cast(SrcElTy)->getTypeAtIndex(ZeroUInt); + ++NumZeros; + } + + // If we found a path from the src to dest, create the getelementptr now. + if (SrcElTy == DstElTy) { + SmallVector Idxs(NumZeros+1, ZeroUInt); + return GetElementPtrInst::Create(Src, Idxs.begin(), Idxs.end(), "", + ((Instruction*) NULL)); + } + } + + if (ShuffleVectorInst *SVI = dyn_cast(Src)) { + if (SVI->hasOneUse()) { + // Okay, we have (bitconvert (shuffle ..)). Check to see if this is + // a bitconvert to a vector with the same # elts. + if (isa(DestTy) && + cast(DestTy)->getNumElements() == + SVI->getType()->getNumElements() && + SVI->getType()->getNumElements() == + cast(SVI->getOperand(0)->getType())->getNumElements()) { + CastInst *Tmp; + // If either of the operands is a cast from CI.getType(), then + // evaluating the shuffle in the casted destination's type will allow + // us to eliminate at least one cast. + if (((Tmp = dyn_cast(SVI->getOperand(0))) && + Tmp->getOperand(0)->getType() == DestTy) || + ((Tmp = dyn_cast(SVI->getOperand(1))) && + Tmp->getOperand(0)->getType() == DestTy)) { + Value *LHS = InsertCastBefore(Instruction::BitCast, + SVI->getOperand(0), DestTy, CI); + Value *RHS = InsertCastBefore(Instruction::BitCast, + SVI->getOperand(1), DestTy, CI); + // Return a new shuffle vector. Use the same element ID's, as we + // know the vector types match #elts. + return new ShuffleVectorInst(LHS, RHS, SVI->getOperand(2)); + } + } + } + } + return 0; +} + +/// GetSelectFoldableOperands - We want to turn code that looks like this: +/// %C = or %A, %B +/// %D = select %cond, %C, %A +/// into: +/// %C = select %cond, %B, 0 +/// %D = or %A, %C +/// +/// Assuming that the specified instruction is an operand to the select, return +/// a bitmask indicating which operands of this instruction are foldable if they +/// equal the other incoming value of the select. +/// +static unsigned GetSelectFoldableOperands(Instruction *I) { + switch (I->getOpcode()) { + case Instruction::Add: + case Instruction::Mul: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + return 3; // Can fold through either operand. + case Instruction::Sub: // Can only fold on the amount subtracted. + case Instruction::Shl: // Can only fold on the shift amount. + case Instruction::LShr: + case Instruction::AShr: + return 1; + default: + return 0; // Cannot fold + } +} + +/// GetSelectFoldableConstant - For the same transformation as the previous +/// function, return the identity constant that goes into the select. +static Constant *GetSelectFoldableConstant(Instruction *I) { + switch (I->getOpcode()) { + default: assert(0 && "This cannot happen!"); abort(); + case Instruction::Add: + case Instruction::Sub: + case Instruction::Or: + case Instruction::Xor: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + return Constant::getNullValue(I->getType()); + case Instruction::And: + return Constant::getAllOnesValue(I->getType()); + case Instruction::Mul: + return ConstantInt::get(I->getType(), 1); + } +} + +/// FoldSelectOpOp - Here we have (select c, TI, FI), and we know that TI and FI +/// have the same opcode and only one use each. Try to simplify this. +Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI, + Instruction *FI) { + if (TI->getNumOperands() == 1) { + // If this is a non-volatile load or a cast from the same type, + // merge. + if (TI->isCast()) { + if (TI->getOperand(0)->getType() != FI->getOperand(0)->getType()) + return 0; + } else { + return 0; // unknown unary op. + } + + // Fold this by inserting a select from the input values. + SelectInst *NewSI = SelectInst::Create(SI.getCondition(), TI->getOperand(0), + FI->getOperand(0), SI.getName()+".v"); + InsertNewInstBefore(NewSI, SI); + return CastInst::Create(Instruction::CastOps(TI->getOpcode()), NewSI, + TI->getType()); + } + + // Only handle binary operators here. + if (!isa(TI)) + return 0; + + // Figure out if the operations have any operands in common. + Value *MatchOp, *OtherOpT, *OtherOpF; + bool MatchIsOpZero; + if (TI->getOperand(0) == FI->getOperand(0)) { + MatchOp = TI->getOperand(0); + OtherOpT = TI->getOperand(1); + OtherOpF = FI->getOperand(1); + MatchIsOpZero = true; + } else if (TI->getOperand(1) == FI->getOperand(1)) { + MatchOp = TI->getOperand(1); + OtherOpT = TI->getOperand(0); + OtherOpF = FI->getOperand(0); + MatchIsOpZero = false; + } else if (!TI->isCommutative()) { + return 0; + } else if (TI->getOperand(0) == FI->getOperand(1)) { + MatchOp = TI->getOperand(0); + OtherOpT = TI->getOperand(1); + OtherOpF = FI->getOperand(0); + MatchIsOpZero = true; + } else if (TI->getOperand(1) == FI->getOperand(0)) { + MatchOp = TI->getOperand(1); + OtherOpT = TI->getOperand(0); + OtherOpF = FI->getOperand(1); + MatchIsOpZero = true; + } else { + return 0; + } + + // If we reach here, they do have operations in common. + SelectInst *NewSI = SelectInst::Create(SI.getCondition(), OtherOpT, + OtherOpF, SI.getName()+".v"); + InsertNewInstBefore(NewSI, SI); + + if (BinaryOperator *BO = dyn_cast(TI)) { + if (MatchIsOpZero) + return BinaryOperator::Create(BO->getOpcode(), MatchOp, NewSI); + else + return BinaryOperator::Create(BO->getOpcode(), NewSI, MatchOp); + } + assert(0 && "Shouldn't get here"); + return 0; +} + +static bool isSelect01(Constant *C1, Constant *C2) { + ConstantInt *C1I = dyn_cast(C1); + if (!C1I) + return false; + ConstantInt *C2I = dyn_cast(C2); + if (!C2I) + return false; + return (C1I->isZero() || C1I->isOne()) && (C2I->isZero() || C2I->isOne()); +} + +/// FoldSelectIntoOp - Try fold the select into one of the operands to +/// facilitate further optimization. +Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal, + Value *FalseVal) { + // See the comment above GetSelectFoldableOperands for a description of the + // transformation we are doing here. + if (Instruction *TVI = dyn_cast(TrueVal)) { + if (TVI->hasOneUse() && TVI->getNumOperands() == 2 && + !isa(FalseVal)) { + if (unsigned SFO = GetSelectFoldableOperands(TVI)) { + unsigned OpToFold = 0; + if ((SFO & 1) && FalseVal == TVI->getOperand(0)) { + OpToFold = 1; + } else if ((SFO & 2) && FalseVal == TVI->getOperand(1)) { + OpToFold = 2; + } + + if (OpToFold) { + Constant *C = GetSelectFoldableConstant(TVI); + Value *OOp = TVI->getOperand(2-OpToFold); + // Avoid creating select between 2 constants unless it's selecting + // between 0 and 1. + if (!isa(OOp) || isSelect01(C, cast(OOp))) { + Instruction *NewSel = SelectInst::Create(SI.getCondition(), OOp, C); + InsertNewInstBefore(NewSel, SI); + NewSel->takeName(TVI); + if (BinaryOperator *BO = dyn_cast(TVI)) + return BinaryOperator::Create(BO->getOpcode(), FalseVal, NewSel); + assert(0 && "Unknown instruction!!"); + } + } + } + } + } + + if (Instruction *FVI = dyn_cast(FalseVal)) { + if (FVI->hasOneUse() && FVI->getNumOperands() == 2 && + !isa(TrueVal)) { + if (unsigned SFO = GetSelectFoldableOperands(FVI)) { + unsigned OpToFold = 0; + if ((SFO & 1) && TrueVal == FVI->getOperand(0)) { + OpToFold = 1; + } else if ((SFO & 2) && TrueVal == FVI->getOperand(1)) { + OpToFold = 2; + } + + if (OpToFold) { + Constant *C = GetSelectFoldableConstant(FVI); + Value *OOp = FVI->getOperand(2-OpToFold); + // Avoid creating select between 2 constants unless it's selecting + // between 0 and 1. + if (!isa(OOp) || isSelect01(C, cast(OOp))) { + Instruction *NewSel = SelectInst::Create(SI.getCondition(), C, OOp); + InsertNewInstBefore(NewSel, SI); + NewSel->takeName(FVI); + if (BinaryOperator *BO = dyn_cast(FVI)) + return BinaryOperator::Create(BO->getOpcode(), TrueVal, NewSel); + assert(0 && "Unknown instruction!!"); + } + } + } + } + } + + return 0; +} + +/// visitSelectInstWithICmp - Visit a SelectInst that has an +/// ICmpInst as its first operand. +/// +Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI, + ICmpInst *ICI) { + bool Changed = false; + ICmpInst::Predicate Pred = ICI->getPredicate(); + Value *CmpLHS = ICI->getOperand(0); + Value *CmpRHS = ICI->getOperand(1); + Value *TrueVal = SI.getTrueValue(); + Value *FalseVal = SI.getFalseValue(); + + // Check cases where the comparison is with a constant that + // can be adjusted to fit the min/max idiom. We may edit ICI in + // place here, so make sure the select is the only user. + if (ICI->hasOneUse()) + if (ConstantInt *CI = dyn_cast(CmpRHS)) { + switch (Pred) { + default: break; + case ICmpInst::ICMP_ULT: + case ICmpInst::ICMP_SLT: { + // X < MIN ? T : F --> F + if (CI->isMinValue(Pred == ICmpInst::ICMP_SLT)) + return ReplaceInstUsesWith(SI, FalseVal); + // X < C ? X : C-1 --> X > C-1 ? C-1 : X + Constant *AdjustedRHS = SubOne(CI); + if ((CmpLHS == TrueVal && AdjustedRHS == FalseVal) || + (CmpLHS == FalseVal && AdjustedRHS == TrueVal)) { + Pred = ICmpInst::getSwappedPredicate(Pred); + CmpRHS = AdjustedRHS; + std::swap(FalseVal, TrueVal); + ICI->setPredicate(Pred); + ICI->setOperand(1, CmpRHS); + SI.setOperand(1, TrueVal); + SI.setOperand(2, FalseVal); + Changed = true; + } + break; + } + case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_SGT: { + // X > MAX ? T : F --> F + if (CI->isMaxValue(Pred == ICmpInst::ICMP_SGT)) + return ReplaceInstUsesWith(SI, FalseVal); + // X > C ? X : C+1 --> X < C+1 ? C+1 : X + Constant *AdjustedRHS = AddOne(CI); + if ((CmpLHS == TrueVal && AdjustedRHS == FalseVal) || + (CmpLHS == FalseVal && AdjustedRHS == TrueVal)) { + Pred = ICmpInst::getSwappedPredicate(Pred); + CmpRHS = AdjustedRHS; + std::swap(FalseVal, TrueVal); + ICI->setPredicate(Pred); + ICI->setOperand(1, CmpRHS); + SI.setOperand(1, TrueVal); + SI.setOperand(2, FalseVal); + Changed = true; + } + break; + } + } + + // (x ashr x, 31 -> all ones if signed + // (x >s -1) ? -1 : 0 -> ashr x, 31 -> all ones if not signed + CmpInst::Predicate Pred = CmpInst::BAD_ICMP_PREDICATE; + if (match(TrueVal, m_ConstantInt<-1>()) && + match(FalseVal, m_ConstantInt<0>())) + Pred = ICI->getPredicate(); + else if (match(TrueVal, m_ConstantInt<0>()) && + match(FalseVal, m_ConstantInt<-1>())) + Pred = CmpInst::getInversePredicate(ICI->getPredicate()); + + if (Pred != CmpInst::BAD_ICMP_PREDICATE) { + // If we are just checking for a icmp eq of a single bit and zext'ing it + // to an integer, then shift the bit to the appropriate place and then + // cast to integer to avoid the comparison. + const APInt &Op1CV = CI->getValue(); + + // sext (x x>>s31 true if signbit set. + // sext (x >s -1) to i32 --> (x>>s31)^-1 true if signbit clear. + if ((Pred == ICmpInst::ICMP_SLT && Op1CV == 0) || + (Pred == ICmpInst::ICMP_SGT && Op1CV.isAllOnesValue())) { + Value *In = ICI->getOperand(0); + Value *Sh = ConstantInt::get(In->getType(), + In->getType()->getPrimitiveSizeInBits()-1); + In = InsertNewInstBefore(BinaryOperator::CreateAShr(In, Sh, + In->getName()+".lobit"), + *ICI); + if (In->getType() != SI.getType()) + In = CastInst::CreateIntegerCast(In, SI.getType(), + true/*SExt*/, "tmp", ICI); + + if (Pred == ICmpInst::ICMP_SGT) + In = InsertNewInstBefore(BinaryOperator::CreateNot(In, + In->getName()+".not"), *ICI); + + return ReplaceInstUsesWith(SI, In); + } + } + } + + if (CmpLHS == TrueVal && CmpRHS == FalseVal) { + // Transform (X == Y) ? X : Y -> Y + if (Pred == ICmpInst::ICMP_EQ) + return ReplaceInstUsesWith(SI, FalseVal); + // Transform (X != Y) ? X : Y -> X + if (Pred == ICmpInst::ICMP_NE) + return ReplaceInstUsesWith(SI, TrueVal); + /// NOTE: if we wanted to, this is where to detect integer MIN/MAX + + } else if (CmpLHS == FalseVal && CmpRHS == TrueVal) { + // Transform (X == Y) ? Y : X -> X + if (Pred == ICmpInst::ICMP_EQ) + return ReplaceInstUsesWith(SI, FalseVal); + // Transform (X != Y) ? Y : X -> Y + if (Pred == ICmpInst::ICMP_NE) + return ReplaceInstUsesWith(SI, TrueVal); + /// NOTE: if we wanted to, this is where to detect integer MIN/MAX + } + + /// NOTE: if we wanted to, this is where to detect integer ABS + + return Changed ? &SI : 0; +} + +Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { + Value *CondVal = SI.getCondition(); + Value *TrueVal = SI.getTrueValue(); + Value *FalseVal = SI.getFalseValue(); + + // select true, X, Y -> X + // select false, X, Y -> Y + if (ConstantInt *C = dyn_cast(CondVal)) + return ReplaceInstUsesWith(SI, C->getZExtValue() ? TrueVal : FalseVal); + + // select C, X, X -> X + if (TrueVal == FalseVal) + return ReplaceInstUsesWith(SI, TrueVal); + + if (isa(TrueVal)) // select C, undef, X -> X + return ReplaceInstUsesWith(SI, FalseVal); + if (isa(FalseVal)) // select C, X, undef -> X + return ReplaceInstUsesWith(SI, TrueVal); + if (isa(CondVal)) { // select undef, X, Y -> X or Y + if (isa(TrueVal)) + return ReplaceInstUsesWith(SI, TrueVal); + else + return ReplaceInstUsesWith(SI, FalseVal); + } + + if (SI.getType() == Type::Int1Ty) { + if (ConstantInt *C = dyn_cast(TrueVal)) { + if (C->getZExtValue()) { + // Change: A = select B, true, C --> A = or B, C + return BinaryOperator::CreateOr(CondVal, FalseVal); + } else { + // Change: A = select B, false, C --> A = and !B, C + Value *NotCond = + InsertNewInstBefore(BinaryOperator::CreateNot(CondVal, + "not."+CondVal->getName()), SI); + return BinaryOperator::CreateAnd(NotCond, FalseVal); + } + } else if (ConstantInt *C = dyn_cast(FalseVal)) { + if (C->getZExtValue() == false) { + // Change: A = select B, C, false --> A = and B, C + return BinaryOperator::CreateAnd(CondVal, TrueVal); + } else { + // Change: A = select B, C, true --> A = or !B, C + Value *NotCond = + InsertNewInstBefore(BinaryOperator::CreateNot(CondVal, + "not."+CondVal->getName()), SI); + return BinaryOperator::CreateOr(NotCond, TrueVal); + } + } + + // select a, b, a -> a&b + // select a, a, b -> a|b + if (CondVal == TrueVal) + return BinaryOperator::CreateOr(CondVal, FalseVal); + else if (CondVal == FalseVal) + return BinaryOperator::CreateAnd(CondVal, TrueVal); + } + + // Selecting between two integer constants? + if (ConstantInt *TrueValC = dyn_cast(TrueVal)) + if (ConstantInt *FalseValC = dyn_cast(FalseVal)) { + // select C, 1, 0 -> zext C to int + if (FalseValC->isZero() && TrueValC->getValue() == 1) { + return CastInst::Create(Instruction::ZExt, CondVal, SI.getType()); + } else if (TrueValC->isZero() && FalseValC->getValue() == 1) { + // select C, 0, 1 -> zext !C to int + Value *NotCond = + InsertNewInstBefore(BinaryOperator::CreateNot(CondVal, + "not."+CondVal->getName()), SI); + return CastInst::Create(Instruction::ZExt, NotCond, SI.getType()); + } + + if (ICmpInst *IC = dyn_cast(SI.getCondition())) { + + // (x ashr x, 31 + if (TrueValC->isAllOnesValue() && FalseValC->isZero()) + if (ConstantInt *CmpCst = dyn_cast(IC->getOperand(1))) { + if (IC->getPredicate() == ICmpInst::ICMP_SLT && CmpCst->isZero()) { + // The comparison constant and the result are not neccessarily the + // same width. Make an all-ones value by inserting a AShr. + Value *X = IC->getOperand(0); + uint32_t Bits = X->getType()->getPrimitiveSizeInBits(); + Constant *ShAmt = ConstantInt::get(X->getType(), Bits-1); + Instruction *SRA = BinaryOperator::Create(Instruction::AShr, X, + ShAmt, "ones"); + InsertNewInstBefore(SRA, SI); + + // Then cast to the appropriate width. + return CastInst::CreateIntegerCast(SRA, SI.getType(), true); + } + } + + + // If one of the constants is zero (we know they can't both be) and we + // have an icmp instruction with zero, and we have an 'and' with the + // non-constant value, eliminate this whole mess. This corresponds to + // cases like this: ((X & 27) ? 27 : 0) + if (TrueValC->isZero() || FalseValC->isZero()) + if (IC->isEquality() && isa(IC->getOperand(1)) && + cast(IC->getOperand(1))->isNullValue()) + if (Instruction *ICA = dyn_cast(IC->getOperand(0))) + if (ICA->getOpcode() == Instruction::And && + isa(ICA->getOperand(1)) && + (ICA->getOperand(1) == TrueValC || + ICA->getOperand(1) == FalseValC) && + isOneBitSet(cast(ICA->getOperand(1)))) { + // Okay, now we know that everything is set up, we just don't + // know whether we have a icmp_ne or icmp_eq and whether the + // true or false val is the zero. + bool ShouldNotVal = !TrueValC->isZero(); + ShouldNotVal ^= IC->getPredicate() == ICmpInst::ICMP_NE; + Value *V = ICA; + if (ShouldNotVal) + V = InsertNewInstBefore(BinaryOperator::Create( + Instruction::Xor, V, ICA->getOperand(1)), SI); + return ReplaceInstUsesWith(SI, V); + } + } + } + + // See if we are selecting two values based on a comparison of the two values. + if (FCmpInst *FCI = dyn_cast(CondVal)) { + if (FCI->getOperand(0) == TrueVal && FCI->getOperand(1) == FalseVal) { + // Transform (X == Y) ? X : Y -> Y + if (FCI->getPredicate() == FCmpInst::FCMP_OEQ) { + // This is not safe in general for floating point: + // consider X== -0, Y== +0. + // It becomes safe if either operand is a nonzero constant. + ConstantFP *CFPt, *CFPf; + if (((CFPt = dyn_cast(TrueVal)) && + !CFPt->getValueAPF().isZero()) || + ((CFPf = dyn_cast(FalseVal)) && + !CFPf->getValueAPF().isZero())) + return ReplaceInstUsesWith(SI, FalseVal); + } + // Transform (X != Y) ? X : Y -> X + if (FCI->getPredicate() == FCmpInst::FCMP_ONE) + return ReplaceInstUsesWith(SI, TrueVal); + // NOTE: if we wanted to, this is where to detect MIN/MAX + + } else if (FCI->getOperand(0) == FalseVal && FCI->getOperand(1) == TrueVal){ + // Transform (X == Y) ? Y : X -> X + if (FCI->getPredicate() == FCmpInst::FCMP_OEQ) { + // This is not safe in general for floating point: + // consider X== -0, Y== +0. + // It becomes safe if either operand is a nonzero constant. + ConstantFP *CFPt, *CFPf; + if (((CFPt = dyn_cast(TrueVal)) && + !CFPt->getValueAPF().isZero()) || + ((CFPf = dyn_cast(FalseVal)) && + !CFPf->getValueAPF().isZero())) + return ReplaceInstUsesWith(SI, FalseVal); + } + // Transform (X != Y) ? Y : X -> Y + if (FCI->getPredicate() == FCmpInst::FCMP_ONE) + return ReplaceInstUsesWith(SI, TrueVal); + // NOTE: if we wanted to, this is where to detect MIN/MAX + } + // NOTE: if we wanted to, this is where to detect ABS + } + + // See if we are selecting two values based on a comparison of the two values. + if (ICmpInst *ICI = dyn_cast(CondVal)) + if (Instruction *Result = visitSelectInstWithICmp(SI, ICI)) + return Result; + + if (Instruction *TI = dyn_cast(TrueVal)) + if (Instruction *FI = dyn_cast(FalseVal)) + if (TI->hasOneUse() && FI->hasOneUse()) { + Instruction *AddOp = 0, *SubOp = 0; + + // Turn (select C, (op X, Y), (op X, Z)) -> (op X, (select C, Y, Z)) + if (TI->getOpcode() == FI->getOpcode()) + if (Instruction *IV = FoldSelectOpOp(SI, TI, FI)) + return IV; + + // Turn select C, (X+Y), (X-Y) --> (X+(select C, Y, (-Y))). This is + // even legal for FP. + if (TI->getOpcode() == Instruction::Sub && + FI->getOpcode() == Instruction::Add) { + AddOp = FI; SubOp = TI; + } else if (FI->getOpcode() == Instruction::Sub && + TI->getOpcode() == Instruction::Add) { + AddOp = TI; SubOp = FI; + } + + if (AddOp) { + Value *OtherAddOp = 0; + if (SubOp->getOperand(0) == AddOp->getOperand(0)) { + OtherAddOp = AddOp->getOperand(1); + } else if (SubOp->getOperand(0) == AddOp->getOperand(1)) { + OtherAddOp = AddOp->getOperand(0); + } + + if (OtherAddOp) { + // So at this point we know we have (Y -> OtherAddOp): + // select C, (add X, Y), (sub X, Z) + Value *NegVal; // Compute -Z + if (Constant *C = dyn_cast(SubOp->getOperand(1))) { + NegVal = ConstantExpr::getNeg(C); + } else { + NegVal = InsertNewInstBefore( + BinaryOperator::CreateNeg(SubOp->getOperand(1), "tmp"), SI); + } + + Value *NewTrueOp = OtherAddOp; + Value *NewFalseOp = NegVal; + if (AddOp != TI) + std::swap(NewTrueOp, NewFalseOp); + Instruction *NewSel = + SelectInst::Create(CondVal, NewTrueOp, + NewFalseOp, SI.getName() + ".p"); + + NewSel = InsertNewInstBefore(NewSel, SI); + return BinaryOperator::CreateAdd(SubOp->getOperand(0), NewSel); + } + } + } + + // See if we can fold the select into one of our operands. + if (SI.getType()->isInteger()) { + Instruction *FoldI = FoldSelectIntoOp(SI, TrueVal, FalseVal); + if (FoldI) + return FoldI; + } + + if (BinaryOperator::isNot(CondVal)) { + SI.setOperand(0, BinaryOperator::getNotArgument(CondVal)); + SI.setOperand(1, FalseVal); + SI.setOperand(2, TrueVal); + return &SI; + } + + return 0; +} + +/// EnforceKnownAlignment - If the specified pointer points to an object that +/// we control, modify the object's alignment to PrefAlign. This isn't +/// often possible though. If alignment is important, a more reliable approach +/// is to simply align all global variables and allocation instructions to +/// their preferred alignment from the beginning. +/// +static unsigned EnforceKnownAlignment(Value *V, + unsigned Align, unsigned PrefAlign) { + + User *U = dyn_cast(V); + if (!U) return Align; + + switch (getOpcode(U)) { + default: break; + case Instruction::BitCast: + return EnforceKnownAlignment(U->getOperand(0), Align, PrefAlign); + case Instruction::GetElementPtr: { + // If all indexes are zero, it is just the alignment of the base pointer. + bool AllZeroOperands = true; + for (User::op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e; ++i) + if (!isa(*i) || + !cast(*i)->isNullValue()) { + AllZeroOperands = false; + break; + } + + if (AllZeroOperands) { + // Treat this like a bitcast. + return EnforceKnownAlignment(U->getOperand(0), Align, PrefAlign); + } + break; + } + } + + if (GlobalValue *GV = dyn_cast(V)) { + // If there is a large requested alignment and we can, bump up the alignment + // of the global. + if (!GV->isDeclaration()) { + if (GV->getAlignment() >= PrefAlign) + Align = GV->getAlignment(); + else { + GV->setAlignment(PrefAlign); + Align = PrefAlign; + } + } + } else if (AllocationInst *AI = dyn_cast(V)) { + // If there is a requested alignment and if this is an alloca, round up. We + // don't do this for malloc, because some systems can't respect the request. + if (isa(AI)) { + if (AI->getAlignment() >= PrefAlign) + Align = AI->getAlignment(); + else { + AI->setAlignment(PrefAlign); + Align = PrefAlign; + } + } + } + + return Align; +} + +/// GetOrEnforceKnownAlignment - If the specified pointer has an alignment that +/// we can determine, return it, otherwise return 0. If PrefAlign is specified, +/// and it is more than the alignment of the ultimate object, see if we can +/// increase the alignment of the ultimate object, making this check succeed. +unsigned InstCombiner::GetOrEnforceKnownAlignment(Value *V, + unsigned PrefAlign) { + unsigned BitWidth = TD ? TD->getTypeSizeInBits(V->getType()) : + sizeof(PrefAlign) * CHAR_BIT; + APInt Mask = APInt::getAllOnesValue(BitWidth); + APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); + ComputeMaskedBits(V, Mask, KnownZero, KnownOne); + unsigned TrailZ = KnownZero.countTrailingOnes(); + unsigned Align = 1u << std::min(BitWidth - 1, TrailZ); + + if (PrefAlign > Align) + Align = EnforceKnownAlignment(V, Align, PrefAlign); + + // We don't need to make any adjustment. + return Align; +} + +Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { + unsigned DstAlign = GetOrEnforceKnownAlignment(MI->getOperand(1)); + unsigned SrcAlign = GetOrEnforceKnownAlignment(MI->getOperand(2)); + unsigned MinAlign = std::min(DstAlign, SrcAlign); + unsigned CopyAlign = MI->getAlignment(); + + if (CopyAlign < MinAlign) { + MI->setAlignment(MinAlign); + return MI; + } + + // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with + // load/store. + ConstantInt *MemOpLength = dyn_cast(MI->getOperand(3)); + if (MemOpLength == 0) return 0; + + // Source and destination pointer types are always "i8*" for intrinsic. See + // if the size is something we can handle with a single primitive load/store. + // A single load+store correctly handles overlapping memory in the memmove + // case. + unsigned Size = MemOpLength->getZExtValue(); + if (Size == 0) return MI; // Delete this mem transfer. + + if (Size > 8 || (Size&(Size-1))) + return 0; // If not 1/2/4/8 bytes, exit. + + // Use an integer load+store unless we can find something better. + Type *NewPtrTy = PointerType::getUnqual(IntegerType::get(Size<<3)); + + // Memcpy forces the use of i8* for the source and destination. That means + // that if you're using memcpy to move one double around, you'll get a cast + // from double* to i8*. We'd much rather use a double load+store rather than + // an i64 load+store, here because this improves the odds that the source or + // dest address will be promotable. See if we can find a better type than the + // integer datatype. + if (Value *Op = getBitCastOperand(MI->getOperand(1))) { + const Type *SrcETy = cast(Op->getType())->getElementType(); + if (SrcETy->isSized() && TD->getTypeStoreSize(SrcETy) == Size) { + // The SrcETy might be something like {{{double}}} or [1 x double]. Rip + // down through these levels if so. + while (!SrcETy->isSingleValueType()) { + if (const StructType *STy = dyn_cast(SrcETy)) { + if (STy->getNumElements() == 1) + SrcETy = STy->getElementType(0); + else + break; + } else if (const ArrayType *ATy = dyn_cast(SrcETy)) { + if (ATy->getNumElements() == 1) + SrcETy = ATy->getElementType(); + else + break; + } else + break; + } + + if (SrcETy->isSingleValueType()) + NewPtrTy = PointerType::getUnqual(SrcETy); + } + } + + + // If the memcpy/memmove provides better alignment info than we can + // infer, use it. + SrcAlign = std::max(SrcAlign, CopyAlign); + DstAlign = std::max(DstAlign, CopyAlign); + + Value *Src = InsertBitCastBefore(MI->getOperand(2), NewPtrTy, *MI); + Value *Dest = InsertBitCastBefore(MI->getOperand(1), NewPtrTy, *MI); + Instruction *L = new LoadInst(Src, "tmp", false, SrcAlign); + InsertNewInstBefore(L, *MI); + InsertNewInstBefore(new StoreInst(L, Dest, false, DstAlign), *MI); + + // Set the size of the copy to 0, it will be deleted on the next iteration. + MI->setOperand(3, Constant::getNullValue(MemOpLength->getType())); + return MI; +} + +Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) { + unsigned Alignment = GetOrEnforceKnownAlignment(MI->getDest()); + if (MI->getAlignment() < Alignment) { + MI->setAlignment(Alignment); + return MI; + } + + // Extract the length and alignment and fill if they are constant. + ConstantInt *LenC = dyn_cast(MI->getLength()); + ConstantInt *FillC = dyn_cast(MI->getValue()); + if (!LenC || !FillC || FillC->getType() != Type::Int8Ty) + return 0; + uint64_t Len = LenC->getZExtValue(); + Alignment = MI->getAlignment(); + + // If the length is zero, this is a no-op + if (Len == 0) return MI; // memset(d,c,0,a) -> noop + + // memset(s,c,n) -> store s, c (for n=1,2,4,8) + if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) { + const Type *ITy = IntegerType::get(Len*8); // n=1 -> i8. + + Value *Dest = MI->getDest(); + Dest = InsertBitCastBefore(Dest, PointerType::getUnqual(ITy), *MI); + + // Alignment 0 is identity for alignment 1 for memset, but not store. + if (Alignment == 0) Alignment = 1; + + // Extract the fill value and store. + uint64_t Fill = FillC->getZExtValue()*0x0101010101010101ULL; + InsertNewInstBefore(new StoreInst(ConstantInt::get(ITy, Fill), Dest, false, + Alignment), *MI); + + // Set the size of the copy to 0, it will be deleted on the next iteration. + MI->setLength(Constant::getNullValue(LenC->getType())); + return MI; + } + + return 0; +} + + +/// visitCallInst - CallInst simplification. This mostly only handles folding +/// of intrinsic instructions. For normal calls, it allows visitCallSite to do +/// the heavy lifting. +/// +Instruction *InstCombiner::visitCallInst(CallInst &CI) { + // If the caller function is nounwind, mark the call as nounwind, even if the + // callee isn't. + if (CI.getParent()->getParent()->doesNotThrow() && + !CI.doesNotThrow()) { + CI.setDoesNotThrow(); + return &CI; + } + + + + IntrinsicInst *II = dyn_cast(&CI); + if (!II) return visitCallSite(&CI); + + // Intrinsics cannot occur in an invoke, so handle them here instead of in + // visitCallSite. + if (MemIntrinsic *MI = dyn_cast(II)) { + bool Changed = false; + + // memmove/cpy/set of zero bytes is a noop. + if (Constant *NumBytes = dyn_cast(MI->getLength())) { + if (NumBytes->isNullValue()) return EraseInstFromFunction(CI); + + if (ConstantInt *CI = dyn_cast(NumBytes)) + if (CI->getZExtValue() == 1) { + // Replace the instruction with just byte operations. We would + // transform other cases to loads/stores, but we don't know if + // alignment is sufficient. + } + } + + // If we have a memmove and the source operation is a constant global, + // then the source and dest pointers can't alias, so we can change this + // into a call to memcpy. + if (MemMoveInst *MMI = dyn_cast(MI)) { + if (GlobalVariable *GVSrc = dyn_cast(MMI->getSource())) + if (GVSrc->isConstant()) { + Module *M = CI.getParent()->getParent()->getParent(); + Intrinsic::ID MemCpyID = Intrinsic::memcpy; + const Type *Tys[1]; + Tys[0] = CI.getOperand(3)->getType(); + CI.setOperand(0, + Intrinsic::getDeclaration(M, MemCpyID, Tys, 1)); + Changed = true; + } + + // memmove(x,x,size) -> noop. + if (MMI->getSource() == MMI->getDest()) + return EraseInstFromFunction(CI); + } + + // If we can determine a pointer alignment that is bigger than currently + // set, update the alignment. + if (isa(MI)) { + if (Instruction *I = SimplifyMemTransfer(MI)) + return I; + } else if (MemSetInst *MSI = dyn_cast(MI)) { + if (Instruction *I = SimplifyMemSet(MSI)) + return I; + } + + if (Changed) return II; + } + + switch (II->getIntrinsicID()) { + default: break; + case Intrinsic::bswap: + // bswap(bswap(x)) -> x + if (IntrinsicInst *Operand = dyn_cast(II->getOperand(1))) + if (Operand->getIntrinsicID() == Intrinsic::bswap) + return ReplaceInstUsesWith(CI, Operand->getOperand(1)); + break; + case Intrinsic::ppc_altivec_lvx: + case Intrinsic::ppc_altivec_lvxl: + case Intrinsic::x86_sse_loadu_ps: + case Intrinsic::x86_sse2_loadu_pd: + case Intrinsic::x86_sse2_loadu_dq: + // Turn PPC lvx -> load if the pointer is known aligned. + // Turn X86 loadups -> load if the pointer is known aligned. + if (GetOrEnforceKnownAlignment(II->getOperand(1), 16) >= 16) { + Value *Ptr = InsertBitCastBefore(II->getOperand(1), + PointerType::getUnqual(II->getType()), + CI); + return new LoadInst(Ptr); + } + break; + case Intrinsic::ppc_altivec_stvx: + case Intrinsic::ppc_altivec_stvxl: + // Turn stvx -> store if the pointer is known aligned. + if (GetOrEnforceKnownAlignment(II->getOperand(2), 16) >= 16) { + const Type *OpPtrTy = + PointerType::getUnqual(II->getOperand(1)->getType()); + Value *Ptr = InsertBitCastBefore(II->getOperand(2), OpPtrTy, CI); + return new StoreInst(II->getOperand(1), Ptr); + } + break; + case Intrinsic::x86_sse_storeu_ps: + case Intrinsic::x86_sse2_storeu_pd: + case Intrinsic::x86_sse2_storeu_dq: + // Turn X86 storeu -> store if the pointer is known aligned. + if (GetOrEnforceKnownAlignment(II->getOperand(1), 16) >= 16) { + const Type *OpPtrTy = + PointerType::getUnqual(II->getOperand(2)->getType()); + Value *Ptr = InsertBitCastBefore(II->getOperand(1), OpPtrTy, CI); + return new StoreInst(II->getOperand(2), Ptr); + } + break; + + case Intrinsic::x86_sse_cvttss2si: { + // These intrinsics only demands the 0th element of its input vector. If + // we can simplify the input based on that, do so now. + unsigned VWidth = + cast(II->getOperand(1)->getType())->getNumElements(); + APInt DemandedElts(VWidth, 1); + APInt UndefElts(VWidth, 0); + if (Value *V = SimplifyDemandedVectorElts(II->getOperand(1), DemandedElts, + UndefElts)) { + II->setOperand(1, V); + return II; + } + break; + } + + case Intrinsic::ppc_altivec_vperm: + // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant. + if (ConstantVector *Mask = dyn_cast(II->getOperand(3))) { + assert(Mask->getNumOperands() == 16 && "Bad type for intrinsic!"); + + // Check that all of the elements are integer constants or undefs. + bool AllEltsOk = true; + for (unsigned i = 0; i != 16; ++i) { + if (!isa(Mask->getOperand(i)) && + !isa(Mask->getOperand(i))) { + AllEltsOk = false; + break; + } + } + + if (AllEltsOk) { + // Cast the input vectors to byte vectors. + Value *Op0 =InsertBitCastBefore(II->getOperand(1),Mask->getType(),CI); + Value *Op1 =InsertBitCastBefore(II->getOperand(2),Mask->getType(),CI); + Value *Result = UndefValue::get(Op0->getType()); + + // Only extract each element once. + Value *ExtractedElts[32]; + memset(ExtractedElts, 0, sizeof(ExtractedElts)); + + for (unsigned i = 0; i != 16; ++i) { + if (isa(Mask->getOperand(i))) + continue; + unsigned Idx=cast(Mask->getOperand(i))->getZExtValue(); + Idx &= 31; // Match the hardware behavior. + + if (ExtractedElts[Idx] == 0) { + Instruction *Elt = + new ExtractElementInst(Idx < 16 ? Op0 : Op1, Idx&15, "tmp"); + InsertNewInstBefore(Elt, CI); + ExtractedElts[Idx] = Elt; + } + + // Insert this value into the result vector. + Result = InsertElementInst::Create(Result, ExtractedElts[Idx], + i, "tmp"); + InsertNewInstBefore(cast(Result), CI); + } + return CastInst::Create(Instruction::BitCast, Result, CI.getType()); + } + } + break; + + case Intrinsic::stackrestore: { + // If the save is right next to the restore, remove the restore. This can + // happen when variable allocas are DCE'd. + if (IntrinsicInst *SS = dyn_cast(II->getOperand(1))) { + if (SS->getIntrinsicID() == Intrinsic::stacksave) { + BasicBlock::iterator BI = SS; + if (&*++BI == II) + return EraseInstFromFunction(CI); + } + } + + // Scan down this block to see if there is another stack restore in the + // same block without an intervening call/alloca. + BasicBlock::iterator BI = II; + TerminatorInst *TI = II->getParent()->getTerminator(); + bool CannotRemove = false; + for (++BI; &*BI != TI; ++BI) { + if (isa(BI)) { + CannotRemove = true; + break; + } + if (CallInst *BCI = dyn_cast(BI)) { + if (IntrinsicInst *II = dyn_cast(BCI)) { + // If there is a stackrestore below this one, remove this one. + if (II->getIntrinsicID() == Intrinsic::stackrestore) + return EraseInstFromFunction(CI); + // Otherwise, ignore the intrinsic. + } else { + // If we found a non-intrinsic call, we can't remove the stack + // restore. + CannotRemove = true; + break; + } + } + } + + // If the stack restore is in a return/unwind block and if there are no + // allocas or calls between the restore and the return, nuke the restore. + if (!CannotRemove && (isa(TI) || isa(TI))) + return EraseInstFromFunction(CI); + break; + } + } + + return visitCallSite(II); +} + +// InvokeInst simplification +// +Instruction *InstCombiner::visitInvokeInst(InvokeInst &II) { + return visitCallSite(&II); +} + +/// isSafeToEliminateVarargsCast - If this cast does not affect the value +/// passed through the varargs area, we can eliminate the use of the cast. +static bool isSafeToEliminateVarargsCast(const CallSite CS, + const CastInst * const CI, + const TargetData * const TD, + const int ix) { + if (!CI->isLosslessCast()) + return false; + + // The size of ByVal arguments is derived from the type, so we + // can't change to a type with a different size. If the size were + // passed explicitly we could avoid this check. + if (!CS.paramHasAttr(ix, Attribute::ByVal)) + return true; + + const Type* SrcTy = + cast(CI->getOperand(0)->getType())->getElementType(); + const Type* DstTy = cast(CI->getType())->getElementType(); + if (!SrcTy->isSized() || !DstTy->isSized()) + return false; + if (TD->getTypeAllocSize(SrcTy) != TD->getTypeAllocSize(DstTy)) + return false; + return true; +} + +// visitCallSite - Improvements for call and invoke instructions. +// +Instruction *InstCombiner::visitCallSite(CallSite CS) { + bool Changed = false; + + // If the callee is a constexpr cast of a function, attempt to move the cast + // to the arguments of the call/invoke. + if (transformConstExprCastCall(CS)) return 0; + + Value *Callee = CS.getCalledValue(); + + if (Function *CalleeF = dyn_cast(Callee)) + if (CalleeF->getCallingConv() != CS.getCallingConv()) { + Instruction *OldCall = CS.getInstruction(); + // If the call and callee calling conventions don't match, this call must + // be unreachable, as the call is undefined. + new StoreInst(ConstantInt::getTrue(), + UndefValue::get(PointerType::getUnqual(Type::Int1Ty)), + OldCall); + if (!OldCall->use_empty()) + OldCall->replaceAllUsesWith(UndefValue::get(OldCall->getType())); + if (isa(OldCall)) // Not worth removing an invoke here. + return EraseInstFromFunction(*OldCall); + return 0; + } + + if (isa(Callee) || isa(Callee)) { + // This instruction is not reachable, just remove it. We insert a store to + // undef so that we know that this code is not reachable, despite the fact + // that we can't modify the CFG here. + new StoreInst(ConstantInt::getTrue(), + UndefValue::get(PointerType::getUnqual(Type::Int1Ty)), + CS.getInstruction()); + + if (!CS.getInstruction()->use_empty()) + CS.getInstruction()-> + replaceAllUsesWith(UndefValue::get(CS.getInstruction()->getType())); + + if (InvokeInst *II = dyn_cast(CS.getInstruction())) { + // Don't break the CFG, insert a dummy cond branch. + BranchInst::Create(II->getNormalDest(), II->getUnwindDest(), + ConstantInt::getTrue(), II); + } + return EraseInstFromFunction(*CS.getInstruction()); + } + + if (BitCastInst *BC = dyn_cast(Callee)) + if (IntrinsicInst *In = dyn_cast(BC->getOperand(0))) + if (In->getIntrinsicID() == Intrinsic::init_trampoline) + return transformCallThroughTrampoline(CS); + + const PointerType *PTy = cast(Callee->getType()); + const FunctionType *FTy = cast(PTy->getElementType()); + if (FTy->isVarArg()) { + int ix = FTy->getNumParams() + (isa(Callee) ? 3 : 1); + // See if we can optimize any arguments passed through the varargs area of + // the call. + for (CallSite::arg_iterator I = CS.arg_begin()+FTy->getNumParams(), + E = CS.arg_end(); I != E; ++I, ++ix) { + CastInst *CI = dyn_cast(*I); + if (CI && isSafeToEliminateVarargsCast(CS, CI, TD, ix)) { + *I = CI->getOperand(0); + Changed = true; + } + } + } + + if (isa(Callee) && !CS.doesNotThrow()) { + // Inline asm calls cannot throw - mark them 'nounwind'. + CS.setDoesNotThrow(); + Changed = true; + } + + return Changed ? CS.getInstruction() : 0; +} + +// transformConstExprCastCall - If the callee is a constexpr cast of a function, +// attempt to move the cast to the arguments of the call/invoke. +// +bool InstCombiner::transformConstExprCastCall(CallSite CS) { + if (!isa(CS.getCalledValue())) return false; + ConstantExpr *CE = cast(CS.getCalledValue()); + if (CE->getOpcode() != Instruction::BitCast || + !isa(CE->getOperand(0))) + return false; + Function *Callee = cast(CE->getOperand(0)); + Instruction *Caller = CS.getInstruction(); + const AttrListPtr &CallerPAL = CS.getAttributes(); + + // Okay, this is a cast from a function to a different type. Unless doing so + // would cause a type conversion of one of our arguments, change this call to + // be a direct call with arguments casted to the appropriate types. + // + const FunctionType *FT = Callee->getFunctionType(); + const Type *OldRetTy = Caller->getType(); + const Type *NewRetTy = FT->getReturnType(); + + if (isa(NewRetTy)) + return false; // TODO: Handle multiple return values. + + // Check to see if we are changing the return type... + if (OldRetTy != NewRetTy) { + if (Callee->isDeclaration() && + // Conversion is ok if changing from one pointer type to another or from + // a pointer to an integer of the same size. + !((isa(OldRetTy) || OldRetTy == TD->getIntPtrType()) && + (isa(NewRetTy) || NewRetTy == TD->getIntPtrType()))) + return false; // Cannot transform this return value. + + if (!Caller->use_empty() && + // void -> non-void is handled specially + NewRetTy != Type::VoidTy && !CastInst::isCastable(NewRetTy, OldRetTy)) + return false; // Cannot transform this return value. + + if (!CallerPAL.isEmpty() && !Caller->use_empty()) { + Attributes RAttrs = CallerPAL.getRetAttributes(); + if (RAttrs & Attribute::typeIncompatible(NewRetTy)) + return false; // Attribute not compatible with transformed value. + } + + // If the callsite is an invoke instruction, and the return value is used by + // a PHI node in a successor, we cannot change the return type of the call + // because there is no place to put the cast instruction (without breaking + // the critical edge). Bail out in this case. + if (!Caller->use_empty()) + if (InvokeInst *II = dyn_cast(Caller)) + for (Value::use_iterator UI = II->use_begin(), E = II->use_end(); + UI != E; ++UI) + if (PHINode *PN = dyn_cast(*UI)) + if (PN->getParent() == II->getNormalDest() || + PN->getParent() == II->getUnwindDest()) + return false; + } + + unsigned NumActualArgs = unsigned(CS.arg_end()-CS.arg_begin()); + unsigned NumCommonArgs = std::min(FT->getNumParams(), NumActualArgs); + + CallSite::arg_iterator AI = CS.arg_begin(); + for (unsigned i = 0, e = NumCommonArgs; i != e; ++i, ++AI) { + const Type *ParamTy = FT->getParamType(i); + const Type *ActTy = (*AI)->getType(); + + if (!CastInst::isCastable(ActTy, ParamTy)) + return false; // Cannot transform this parameter value. + + if (CallerPAL.getParamAttributes(i + 1) + & Attribute::typeIncompatible(ParamTy)) + return false; // Attribute not compatible with transformed value. + + // Converting from one pointer type to another or between a pointer and an + // integer of the same size is safe even if we do not have a body. + bool isConvertible = ActTy == ParamTy || + ((isa(ParamTy) || ParamTy == TD->getIntPtrType()) && + (isa(ActTy) || ActTy == TD->getIntPtrType())); + if (Callee->isDeclaration() && !isConvertible) return false; + } + + if (FT->getNumParams() < NumActualArgs && !FT->isVarArg() && + Callee->isDeclaration()) + return false; // Do not delete arguments unless we have a function body. + + if (FT->getNumParams() < NumActualArgs && FT->isVarArg() && + !CallerPAL.isEmpty()) + // In this case we have more arguments than the new function type, but we + // won't be dropping them. Check that these extra arguments have attributes + // that are compatible with being a vararg call argument. + for (unsigned i = CallerPAL.getNumSlots(); i; --i) { + if (CallerPAL.getSlot(i - 1).Index <= FT->getNumParams()) + break; + Attributes PAttrs = CallerPAL.getSlot(i - 1).Attrs; + if (PAttrs & Attribute::VarArgsIncompatible) + return false; + } + + // Okay, we decided that this is a safe thing to do: go ahead and start + // inserting cast instructions as necessary... + std::vector Args; + Args.reserve(NumActualArgs); + SmallVector attrVec; + attrVec.reserve(NumCommonArgs); + + // Get any return attributes. + Attributes RAttrs = CallerPAL.getRetAttributes(); + + // If the return value is not being used, the type may not be compatible + // with the existing attributes. Wipe out any problematic attributes. + RAttrs &= ~Attribute::typeIncompatible(NewRetTy); + + // Add the new return attributes. + if (RAttrs) + attrVec.push_back(AttributeWithIndex::get(0, RAttrs)); + + AI = CS.arg_begin(); + for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) { + const Type *ParamTy = FT->getParamType(i); + if ((*AI)->getType() == ParamTy) { + Args.push_back(*AI); + } else { + Instruction::CastOps opcode = CastInst::getCastOpcode(*AI, + false, ParamTy, false); + CastInst *NewCast = CastInst::Create(opcode, *AI, ParamTy, "tmp"); + Args.push_back(InsertNewInstBefore(NewCast, *Caller)); + } + + // Add any parameter attributes. + if (Attributes PAttrs = CallerPAL.getParamAttributes(i + 1)) + attrVec.push_back(AttributeWithIndex::get(i + 1, PAttrs)); + } + + // If the function takes more arguments than the call was taking, add them + // now... + for (unsigned i = NumCommonArgs; i != FT->getNumParams(); ++i) + Args.push_back(Constant::getNullValue(FT->getParamType(i))); + + // If we are removing arguments to the function, emit an obnoxious warning... + if (FT->getNumParams() < NumActualArgs) { + if (!FT->isVarArg()) { + cerr << "WARNING: While resolving call to function '" + << Callee->getName() << "' arguments were dropped!\n"; + } else { + // Add all of the arguments in their promoted form to the arg list... + for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) { + const Type *PTy = getPromotedType((*AI)->getType()); + if (PTy != (*AI)->getType()) { + // Must promote to pass through va_arg area! + Instruction::CastOps opcode = CastInst::getCastOpcode(*AI, false, + PTy, false); + Instruction *Cast = CastInst::Create(opcode, *AI, PTy, "tmp"); + InsertNewInstBefore(Cast, *Caller); + Args.push_back(Cast); + } else { + Args.push_back(*AI); + } + + // Add any parameter attributes. + if (Attributes PAttrs = CallerPAL.getParamAttributes(i + 1)) + attrVec.push_back(AttributeWithIndex::get(i + 1, PAttrs)); + } + } + } + + if (Attributes FnAttrs = CallerPAL.getFnAttributes()) + attrVec.push_back(AttributeWithIndex::get(~0, FnAttrs)); + + if (NewRetTy == Type::VoidTy) + Caller->setName(""); // Void type should not have a name. + + const AttrListPtr &NewCallerPAL = AttrListPtr::get(attrVec.begin(),attrVec.end()); + + Instruction *NC; + if (InvokeInst *II = dyn_cast(Caller)) { + NC = InvokeInst::Create(Callee, II->getNormalDest(), II->getUnwindDest(), + Args.begin(), Args.end(), + Caller->getName(), Caller); + cast(NC)->setCallingConv(II->getCallingConv()); + cast(NC)->setAttributes(NewCallerPAL); + } else { + NC = CallInst::Create(Callee, Args.begin(), Args.end(), + Caller->getName(), Caller); + CallInst *CI = cast(Caller); + if (CI->isTailCall()) + cast(NC)->setTailCall(); + cast(NC)->setCallingConv(CI->getCallingConv()); + cast(NC)->setAttributes(NewCallerPAL); + } + + // Insert a cast of the return type as necessary. + Value *NV = NC; + if (OldRetTy != NV->getType() && !Caller->use_empty()) { + if (NV->getType() != Type::VoidTy) { + Instruction::CastOps opcode = CastInst::getCastOpcode(NC, false, + OldRetTy, false); + NV = NC = CastInst::Create(opcode, NC, OldRetTy, "tmp"); + + // If this is an invoke instruction, we should insert it after the first + // non-phi, instruction in the normal successor block. + if (InvokeInst *II = dyn_cast(Caller)) { + BasicBlock::iterator I = II->getNormalDest()->getFirstNonPHI(); + InsertNewInstBefore(NC, *I); + } else { + // Otherwise, it's a call, just insert cast right after the call instr + InsertNewInstBefore(NC, *Caller); + } + AddUsersToWorkList(*Caller); + } else { + NV = UndefValue::get(Caller->getType()); + } + } + + if (Caller->getType() != Type::VoidTy && !Caller->use_empty()) + Caller->replaceAllUsesWith(NV); + Caller->eraseFromParent(); + RemoveFromWorkList(Caller); + return true; +} + +// transformCallThroughTrampoline - Turn a call to a function created by the +// init_trampoline intrinsic into a direct call to the underlying function. +// +Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) { + Value *Callee = CS.getCalledValue(); + const PointerType *PTy = cast(Callee->getType()); + const FunctionType *FTy = cast(PTy->getElementType()); + const AttrListPtr &Attrs = CS.getAttributes(); + + // If the call already has the 'nest' attribute somewhere then give up - + // otherwise 'nest' would occur twice after splicing in the chain. + if (Attrs.hasAttrSomewhere(Attribute::Nest)) + return 0; + + IntrinsicInst *Tramp = + cast(cast(Callee)->getOperand(0)); + + Function *NestF = cast(Tramp->getOperand(2)->stripPointerCasts()); + const PointerType *NestFPTy = cast(NestF->getType()); + const FunctionType *NestFTy = cast(NestFPTy->getElementType()); + + const AttrListPtr &NestAttrs = NestF->getAttributes(); + if (!NestAttrs.isEmpty()) { + unsigned NestIdx = 1; + const Type *NestTy = 0; + Attributes NestAttr = Attribute::None; + + // Look for a parameter marked with the 'nest' attribute. + for (FunctionType::param_iterator I = NestFTy->param_begin(), + E = NestFTy->param_end(); I != E; ++NestIdx, ++I) + if (NestAttrs.paramHasAttr(NestIdx, Attribute::Nest)) { + // Record the parameter type and any other attributes. + NestTy = *I; + NestAttr = NestAttrs.getParamAttributes(NestIdx); + break; + } + + if (NestTy) { + Instruction *Caller = CS.getInstruction(); + std::vector NewArgs; + NewArgs.reserve(unsigned(CS.arg_end()-CS.arg_begin())+1); + + SmallVector NewAttrs; + NewAttrs.reserve(Attrs.getNumSlots() + 1); + + // Insert the nest argument into the call argument list, which may + // mean appending it. Likewise for attributes. + + // Add any result attributes. + if (Attributes Attr = Attrs.getRetAttributes()) + NewAttrs.push_back(AttributeWithIndex::get(0, Attr)); + + { + unsigned Idx = 1; + CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); + do { + if (Idx == NestIdx) { + // Add the chain argument and attributes. + Value *NestVal = Tramp->getOperand(3); + if (NestVal->getType() != NestTy) + NestVal = new BitCastInst(NestVal, NestTy, "nest", Caller); + NewArgs.push_back(NestVal); + NewAttrs.push_back(AttributeWithIndex::get(NestIdx, NestAttr)); + } + + if (I == E) + break; + + // Add the original argument and attributes. + NewArgs.push_back(*I); + if (Attributes Attr = Attrs.getParamAttributes(Idx)) + NewAttrs.push_back + (AttributeWithIndex::get(Idx + (Idx >= NestIdx), Attr)); + + ++Idx, ++I; + } while (1); + } + + // Add any function attributes. + if (Attributes Attr = Attrs.getFnAttributes()) + NewAttrs.push_back(AttributeWithIndex::get(~0, Attr)); + + // The trampoline may have been bitcast to a bogus type (FTy). + // Handle this by synthesizing a new function type, equal to FTy + // with the chain parameter inserted. + + std::vector NewTypes; + NewTypes.reserve(FTy->getNumParams()+1); + + // Insert the chain's type into the list of parameter types, which may + // mean appending it. + { + unsigned Idx = 1; + FunctionType::param_iterator I = FTy->param_begin(), + E = FTy->param_end(); + + do { + if (Idx == NestIdx) + // Add the chain's type. + NewTypes.push_back(NestTy); + + if (I == E) + break; + + // Add the original type. + NewTypes.push_back(*I); + + ++Idx, ++I; + } while (1); + } + + // Replace the trampoline call with a direct call. Let the generic + // code sort out any function type mismatches. + FunctionType *NewFTy = + FunctionType::get(FTy->getReturnType(), NewTypes, FTy->isVarArg()); + Constant *NewCallee = NestF->getType() == PointerType::getUnqual(NewFTy) ? + NestF : ConstantExpr::getBitCast(NestF, PointerType::getUnqual(NewFTy)); + const AttrListPtr &NewPAL = AttrListPtr::get(NewAttrs.begin(),NewAttrs.end()); + + Instruction *NewCaller; + if (InvokeInst *II = dyn_cast(Caller)) { + NewCaller = InvokeInst::Create(NewCallee, + II->getNormalDest(), II->getUnwindDest(), + NewArgs.begin(), NewArgs.end(), + Caller->getName(), Caller); + cast(NewCaller)->setCallingConv(II->getCallingConv()); + cast(NewCaller)->setAttributes(NewPAL); + } else { + NewCaller = CallInst::Create(NewCallee, NewArgs.begin(), NewArgs.end(), + Caller->getName(), Caller); + if (cast(Caller)->isTailCall()) + cast(NewCaller)->setTailCall(); + cast(NewCaller)-> + setCallingConv(cast(Caller)->getCallingConv()); + cast(NewCaller)->setAttributes(NewPAL); + } + if (Caller->getType() != Type::VoidTy && !Caller->use_empty()) + Caller->replaceAllUsesWith(NewCaller); + Caller->eraseFromParent(); + RemoveFromWorkList(Caller); + return 0; + } + } + + // Replace the trampoline call with a direct call. Since there is no 'nest' + // parameter, there is no need to adjust the argument list. Let the generic + // code sort out any function type mismatches. + Constant *NewCallee = + NestF->getType() == PTy ? NestF : ConstantExpr::getBitCast(NestF, PTy); + CS.setCalledFunction(NewCallee); + return CS.getInstruction(); +} + +/// FoldPHIArgBinOpIntoPHI - If we have something like phi [add (a,b), add(c,d)] +/// and if a/b/c/d and the add's all have a single use, turn this into two phi's +/// and a single binop. +Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) { + Instruction *FirstInst = cast(PN.getIncomingValue(0)); + assert(isa(FirstInst) || isa(FirstInst)); + unsigned Opc = FirstInst->getOpcode(); + Value *LHSVal = FirstInst->getOperand(0); + Value *RHSVal = FirstInst->getOperand(1); + + const Type *LHSType = LHSVal->getType(); + const Type *RHSType = RHSVal->getType(); + + // Scan to see if all operands are the same opcode, all have one use, and all + // kill their operands (i.e. the operands have one use). + for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) { + Instruction *I = dyn_cast(PN.getIncomingValue(i)); + if (!I || I->getOpcode() != Opc || !I->hasOneUse() || + // Verify type of the LHS matches so we don't fold cmp's of different + // types or GEP's with different index types. + I->getOperand(0)->getType() != LHSType || + I->getOperand(1)->getType() != RHSType) + return 0; + + // If they are CmpInst instructions, check their predicates + if (Opc == Instruction::ICmp || Opc == Instruction::FCmp) + if (cast(I)->getPredicate() != + cast(FirstInst)->getPredicate()) + return 0; + + // Keep track of which operand needs a phi node. + if (I->getOperand(0) != LHSVal) LHSVal = 0; + if (I->getOperand(1) != RHSVal) RHSVal = 0; + } + + // Otherwise, this is safe to transform! + + Value *InLHS = FirstInst->getOperand(0); + Value *InRHS = FirstInst->getOperand(1); + PHINode *NewLHS = 0, *NewRHS = 0; + if (LHSVal == 0) { + NewLHS = PHINode::Create(LHSType, + FirstInst->getOperand(0)->getName() + ".pn"); + NewLHS->reserveOperandSpace(PN.getNumOperands()/2); + NewLHS->addIncoming(InLHS, PN.getIncomingBlock(0)); + InsertNewInstBefore(NewLHS, PN); + LHSVal = NewLHS; + } + + if (RHSVal == 0) { + NewRHS = PHINode::Create(RHSType, + FirstInst->getOperand(1)->getName() + ".pn"); + NewRHS->reserveOperandSpace(PN.getNumOperands()/2); + NewRHS->addIncoming(InRHS, PN.getIncomingBlock(0)); + InsertNewInstBefore(NewRHS, PN); + RHSVal = NewRHS; + } + + // Add all operands to the new PHIs. + if (NewLHS || NewRHS) { + for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { + Instruction *InInst = cast(PN.getIncomingValue(i)); + if (NewLHS) { + Value *NewInLHS = InInst->getOperand(0); + NewLHS->addIncoming(NewInLHS, PN.getIncomingBlock(i)); + } + if (NewRHS) { + Value *NewInRHS = InInst->getOperand(1); + NewRHS->addIncoming(NewInRHS, PN.getIncomingBlock(i)); + } + } + } + + if (BinaryOperator *BinOp = dyn_cast(FirstInst)) + return BinaryOperator::Create(BinOp->getOpcode(), LHSVal, RHSVal); + CmpInst *CIOp = cast(FirstInst); + return CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(), LHSVal, + RHSVal); +} + +Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) { + GetElementPtrInst *FirstInst =cast(PN.getIncomingValue(0)); + + SmallVector FixedOperands(FirstInst->op_begin(), + FirstInst->op_end()); + // This is true if all GEP bases are allocas and if all indices into them are + // constants. + bool AllBasePointersAreAllocas = true; + + // Scan to see if all operands are the same opcode, all have one use, and all + // kill their operands (i.e. the operands have one use). + for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) { + GetElementPtrInst *GEP= dyn_cast(PN.getIncomingValue(i)); + if (!GEP || !GEP->hasOneUse() || GEP->getType() != FirstInst->getType() || + GEP->getNumOperands() != FirstInst->getNumOperands()) + return 0; + + // Keep track of whether or not all GEPs are of alloca pointers. + if (AllBasePointersAreAllocas && + (!isa(GEP->getOperand(0)) || + !GEP->hasAllConstantIndices())) + AllBasePointersAreAllocas = false; + + // Compare the operand lists. + for (unsigned op = 0, e = FirstInst->getNumOperands(); op != e; ++op) { + if (FirstInst->getOperand(op) == GEP->getOperand(op)) + continue; + + // Don't merge two GEPs when two operands differ (introducing phi nodes) + // if one of the PHIs has a constant for the index. The index may be + // substantially cheaper to compute for the constants, so making it a + // variable index could pessimize the path. This also handles the case + // for struct indices, which must always be constant. + if (isa(FirstInst->getOperand(op)) || + isa(GEP->getOperand(op))) + return 0; + + if (FirstInst->getOperand(op)->getType() !=GEP->getOperand(op)->getType()) + return 0; + FixedOperands[op] = 0; // Needs a PHI. + } + } + + // If all of the base pointers of the PHI'd GEPs are from allocas, don't + // bother doing this transformation. At best, this will just save a bit of + // offset calculation, but all the predecessors will have to materialize the + // stack address into a register anyway. We'd actually rather *clone* the + // load up into the predecessors so that we have a load of a gep of an alloca, + // which can usually all be folded into the load. + if (AllBasePointersAreAllocas) + return 0; + + // Otherwise, this is safe to transform. Insert PHI nodes for each operand + // that is variable. + SmallVector OperandPhis(FixedOperands.size()); + + bool HasAnyPHIs = false; + for (unsigned i = 0, e = FixedOperands.size(); i != e; ++i) { + if (FixedOperands[i]) continue; // operand doesn't need a phi. + Value *FirstOp = FirstInst->getOperand(i); + PHINode *NewPN = PHINode::Create(FirstOp->getType(), + FirstOp->getName()+".pn"); + InsertNewInstBefore(NewPN, PN); + + NewPN->reserveOperandSpace(e); + NewPN->addIncoming(FirstOp, PN.getIncomingBlock(0)); + OperandPhis[i] = NewPN; + FixedOperands[i] = NewPN; + HasAnyPHIs = true; + } + + + // Add all operands to the new PHIs. + if (HasAnyPHIs) { + for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { + GetElementPtrInst *InGEP =cast(PN.getIncomingValue(i)); + BasicBlock *InBB = PN.getIncomingBlock(i); + + for (unsigned op = 0, e = OperandPhis.size(); op != e; ++op) + if (PHINode *OpPhi = OperandPhis[op]) + OpPhi->addIncoming(InGEP->getOperand(op), InBB); + } + } + + Value *Base = FixedOperands[0]; + return GetElementPtrInst::Create(Base, FixedOperands.begin()+1, + FixedOperands.end()); +} + + +/// isSafeAndProfitableToSinkLoad - Return true if we know that it is safe to +/// sink the load out of the block that defines it. This means that it must be +/// obvious the value of the load is not changed from the point of the load to +/// the end of the block it is in. +/// +/// Finally, it is safe, but not profitable, to sink a load targetting a +/// non-address-taken alloca. Doing so will cause us to not promote the alloca +/// to a register. +static bool isSafeAndProfitableToSinkLoad(LoadInst *L) { + BasicBlock::iterator BBI = L, E = L->getParent()->end(); + + for (++BBI; BBI != E; ++BBI) + if (BBI->mayWriteToMemory()) + return false; + + // Check for non-address taken alloca. If not address-taken already, it isn't + // profitable to do this xform. + if (AllocaInst *AI = dyn_cast(L->getOperand(0))) { + bool isAddressTaken = false; + for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); + UI != E; ++UI) { + if (isa(UI)) continue; + if (StoreInst *SI = dyn_cast(*UI)) { + // If storing TO the alloca, then the address isn't taken. + if (SI->getOperand(1) == AI) continue; + } + isAddressTaken = true; + break; + } + + if (!isAddressTaken && AI->isStaticAlloca()) + return false; + } + + // If this load is a load from a GEP with a constant offset from an alloca, + // then we don't want to sink it. In its present form, it will be + // load [constant stack offset]. Sinking it will cause us to have to + // materialize the stack addresses in each predecessor in a register only to + // do a shared load from register in the successor. + if (GetElementPtrInst *GEP = dyn_cast(L->getOperand(0))) + if (AllocaInst *AI = dyn_cast(GEP->getOperand(0))) + if (AI->isStaticAlloca() && GEP->hasAllConstantIndices()) + return false; + + return true; +} + + +// FoldPHIArgOpIntoPHI - If all operands to a PHI node are the same "unary" +// operator and they all are only used by the PHI, PHI together their +// inputs, and do the operation once, to the result of the PHI. +Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) { + Instruction *FirstInst = cast(PN.getIncomingValue(0)); + + // Scan the instruction, looking for input operations that can be folded away. + // If all input operands to the phi are the same instruction (e.g. a cast from + // the same type or "+42") we can pull the operation through the PHI, reducing + // code size and simplifying code. + Constant *ConstantOp = 0; + const Type *CastSrcTy = 0; + bool isVolatile = false; + if (isa(FirstInst)) { + CastSrcTy = FirstInst->getOperand(0)->getType(); + } else if (isa(FirstInst) || isa(FirstInst)) { + // Can fold binop, compare or shift here if the RHS is a constant, + // otherwise call FoldPHIArgBinOpIntoPHI. + ConstantOp = dyn_cast(FirstInst->getOperand(1)); + if (ConstantOp == 0) + return FoldPHIArgBinOpIntoPHI(PN); + } else if (LoadInst *LI = dyn_cast(FirstInst)) { + isVolatile = LI->isVolatile(); + // We can't sink the load if the loaded value could be modified between the + // load and the PHI. + if (LI->getParent() != PN.getIncomingBlock(0) || + !isSafeAndProfitableToSinkLoad(LI)) + return 0; + + // If the PHI is of volatile loads and the load block has multiple + // successors, sinking it would remove a load of the volatile value from + // the path through the other successor. + if (isVolatile && + LI->getParent()->getTerminator()->getNumSuccessors() != 1) + return 0; + + } else if (isa(FirstInst)) { + return FoldPHIArgGEPIntoPHI(PN); + } else { + return 0; // Cannot fold this operation. + } + + // Check to see if all arguments are the same operation. + for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { + if (!isa(PN.getIncomingValue(i))) return 0; + Instruction *I = cast(PN.getIncomingValue(i)); + if (!I->hasOneUse() || !I->isSameOperationAs(FirstInst)) + return 0; + if (CastSrcTy) { + if (I->getOperand(0)->getType() != CastSrcTy) + return 0; // Cast operation must match. + } else if (LoadInst *LI = dyn_cast(I)) { + // We can't sink the load if the loaded value could be modified between + // the load and the PHI. + if (LI->isVolatile() != isVolatile || + LI->getParent() != PN.getIncomingBlock(i) || + !isSafeAndProfitableToSinkLoad(LI)) + return 0; + + // If the PHI is of volatile loads and the load block has multiple + // successors, sinking it would remove a load of the volatile value from + // the path through the other successor. + if (isVolatile && + LI->getParent()->getTerminator()->getNumSuccessors() != 1) + return 0; + + } else if (I->getOperand(1) != ConstantOp) { + return 0; + } + } + + // Okay, they are all the same operation. Create a new PHI node of the + // correct type, and PHI together all of the LHS's of the instructions. + PHINode *NewPN = PHINode::Create(FirstInst->getOperand(0)->getType(), + PN.getName()+".in"); + NewPN->reserveOperandSpace(PN.getNumOperands()/2); + + Value *InVal = FirstInst->getOperand(0); + NewPN->addIncoming(InVal, PN.getIncomingBlock(0)); + + // Add all operands to the new PHI. + for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { + Value *NewInVal = cast(PN.getIncomingValue(i))->getOperand(0); + if (NewInVal != InVal) + InVal = 0; + NewPN->addIncoming(NewInVal, PN.getIncomingBlock(i)); + } + + Value *PhiVal; + if (InVal) { + // The new PHI unions all of the same values together. This is really + // common, so we handle it intelligently here for compile-time speed. + PhiVal = InVal; + delete NewPN; + } else { + InsertNewInstBefore(NewPN, PN); + PhiVal = NewPN; + } + + // Insert and return the new operation. + if (CastInst* FirstCI = dyn_cast(FirstInst)) + return CastInst::Create(FirstCI->getOpcode(), PhiVal, PN.getType()); + if (BinaryOperator *BinOp = dyn_cast(FirstInst)) + return BinaryOperator::Create(BinOp->getOpcode(), PhiVal, ConstantOp); + if (CmpInst *CIOp = dyn_cast(FirstInst)) + return CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(), + PhiVal, ConstantOp); + assert(isa(FirstInst) && "Unknown operation"); + + // If this was a volatile load that we are merging, make sure to loop through + // and mark all the input loads as non-volatile. If we don't do this, we will + // insert a new volatile load and the old ones will not be deletable. + if (isVolatile) + for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) + cast(PN.getIncomingValue(i))->setVolatile(false); + + return new LoadInst(PhiVal, "", isVolatile); +} + +/// DeadPHICycle - Return true if this PHI node is only used by a PHI node cycle +/// that is dead. +static bool DeadPHICycle(PHINode *PN, + SmallPtrSet &PotentiallyDeadPHIs) { + if (PN->use_empty()) return true; + if (!PN->hasOneUse()) return false; + + // Remember this node, and if we find the cycle, return. + if (!PotentiallyDeadPHIs.insert(PN)) + return true; + + // Don't scan crazily complex things. + if (PotentiallyDeadPHIs.size() == 16) + return false; + + if (PHINode *PU = dyn_cast(PN->use_back())) + return DeadPHICycle(PU, PotentiallyDeadPHIs); + + return false; +} + +/// PHIsEqualValue - Return true if this phi node is always equal to +/// NonPhiInVal. This happens with mutually cyclic phi nodes like: +/// z = some value; x = phi (y, z); y = phi (x, z) +static bool PHIsEqualValue(PHINode *PN, Value *NonPhiInVal, + SmallPtrSet &ValueEqualPHIs) { + // See if we already saw this PHI node. + if (!ValueEqualPHIs.insert(PN)) + return true; + + // Don't scan crazily complex things. + if (ValueEqualPHIs.size() == 16) + return false; + + // Scan the operands to see if they are either phi nodes or are equal to + // the value. + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + Value *Op = PN->getIncomingValue(i); + if (PHINode *OpPN = dyn_cast(Op)) { + if (!PHIsEqualValue(OpPN, NonPhiInVal, ValueEqualPHIs)) + return false; + } else if (Op != NonPhiInVal) + return false; + } + + return true; +} + + +// PHINode simplification +// +Instruction *InstCombiner::visitPHINode(PHINode &PN) { + // If LCSSA is around, don't mess with Phi nodes + if (MustPreserveLCSSA) return 0; + + if (Value *V = PN.hasConstantValue()) + return ReplaceInstUsesWith(PN, V); + + // If all PHI operands are the same operation, pull them through the PHI, + // reducing code size. + if (isa(PN.getIncomingValue(0)) && + isa(PN.getIncomingValue(1)) && + cast(PN.getIncomingValue(0))->getOpcode() == + cast(PN.getIncomingValue(1))->getOpcode() && + // FIXME: The hasOneUse check will fail for PHIs that use the value more + // than themselves more than once. + PN.getIncomingValue(0)->hasOneUse()) + if (Instruction *Result = FoldPHIArgOpIntoPHI(PN)) + return Result; + + // If this is a trivial cycle in the PHI node graph, remove it. Basically, if + // this PHI only has a single use (a PHI), and if that PHI only has one use (a + // PHI)... break the cycle. + if (PN.hasOneUse()) { + Instruction *PHIUser = cast(PN.use_back()); + if (PHINode *PU = dyn_cast(PHIUser)) { + SmallPtrSet PotentiallyDeadPHIs; + PotentiallyDeadPHIs.insert(&PN); + if (DeadPHICycle(PU, PotentiallyDeadPHIs)) + return ReplaceInstUsesWith(PN, UndefValue::get(PN.getType())); + } + + // If this phi has a single use, and if that use just computes a value for + // the next iteration of a loop, delete the phi. This occurs with unused + // induction variables, e.g. "for (int j = 0; ; ++j);". Detecting this + // common case here is good because the only other things that catch this + // are induction variable analysis (sometimes) and ADCE, which is only run + // late. + if (PHIUser->hasOneUse() && + (isa(PHIUser) || isa(PHIUser)) && + PHIUser->use_back() == &PN) { + return ReplaceInstUsesWith(PN, UndefValue::get(PN.getType())); + } + } + + // We sometimes end up with phi cycles that non-obviously end up being the + // same value, for example: + // z = some value; x = phi (y, z); y = phi (x, z) + // where the phi nodes don't necessarily need to be in the same block. Do a + // quick check to see if the PHI node only contains a single non-phi value, if + // so, scan to see if the phi cycle is actually equal to that value. + { + unsigned InValNo = 0, NumOperandVals = PN.getNumIncomingValues(); + // Scan for the first non-phi operand. + while (InValNo != NumOperandVals && + isa(PN.getIncomingValue(InValNo))) + ++InValNo; + + if (InValNo != NumOperandVals) { + Value *NonPhiInVal = PN.getOperand(InValNo); + + // Scan the rest of the operands to see if there are any conflicts, if so + // there is no need to recursively scan other phis. + for (++InValNo; InValNo != NumOperandVals; ++InValNo) { + Value *OpVal = PN.getIncomingValue(InValNo); + if (OpVal != NonPhiInVal && !isa(OpVal)) + break; + } + + // If we scanned over all operands, then we have one unique value plus + // phi values. Scan PHI nodes to see if they all merge in each other or + // the value. + if (InValNo == NumOperandVals) { + SmallPtrSet ValueEqualPHIs; + if (PHIsEqualValue(&PN, NonPhiInVal, ValueEqualPHIs)) + return ReplaceInstUsesWith(PN, NonPhiInVal); + } + } + } + return 0; +} + +static Value *InsertCastToIntPtrTy(Value *V, const Type *DTy, + Instruction *InsertPoint, + InstCombiner *IC) { + unsigned PtrSize = DTy->getPrimitiveSizeInBits(); + unsigned VTySize = V->getType()->getPrimitiveSizeInBits(); + // We must cast correctly to the pointer type. Ensure that we + // sign extend the integer value if it is smaller as this is + // used for address computation. + Instruction::CastOps opcode = + (VTySize < PtrSize ? Instruction::SExt : + (VTySize == PtrSize ? Instruction::BitCast : Instruction::Trunc)); + return IC->InsertCastBefore(opcode, V, DTy, *InsertPoint); +} + + +Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { + Value *PtrOp = GEP.getOperand(0); + // Is it 'getelementptr %P, i32 0' or 'getelementptr %P' + // If so, eliminate the noop. + if (GEP.getNumOperands() == 1) + return ReplaceInstUsesWith(GEP, PtrOp); + + if (isa(GEP.getOperand(0))) + return ReplaceInstUsesWith(GEP, UndefValue::get(GEP.getType())); + + bool HasZeroPointerIndex = false; + if (Constant *C = dyn_cast(GEP.getOperand(1))) + HasZeroPointerIndex = C->isNullValue(); + + if (GEP.getNumOperands() == 2 && HasZeroPointerIndex) + return ReplaceInstUsesWith(GEP, PtrOp); + + // Eliminate unneeded casts for indices. + bool MadeChange = false; + + gep_type_iterator GTI = gep_type_begin(GEP); + for (User::op_iterator i = GEP.op_begin() + 1, e = GEP.op_end(); + i != e; ++i, ++GTI) { + if (isa(*GTI)) { + if (CastInst *CI = dyn_cast(*i)) { + if (CI->getOpcode() == Instruction::ZExt || + CI->getOpcode() == Instruction::SExt) { + const Type *SrcTy = CI->getOperand(0)->getType(); + // We can eliminate a cast from i32 to i64 iff the target + // is a 32-bit pointer target. + if (SrcTy->getPrimitiveSizeInBits() >= TD->getPointerSizeInBits()) { + MadeChange = true; + *i = CI->getOperand(0); + } + } + } + // If we are using a wider index than needed for this platform, shrink it + // to what we need. If narrower, sign-extend it to what we need. + // If the incoming value needs a cast instruction, + // insert it. This explicit cast can make subsequent optimizations more + // obvious. + Value *Op = *i; + if (TD->getTypeSizeInBits(Op->getType()) > TD->getPointerSizeInBits()) { + if (Constant *C = dyn_cast(Op)) { + *i = ConstantExpr::getTrunc(C, TD->getIntPtrType()); + MadeChange = true; + } else { + Op = InsertCastBefore(Instruction::Trunc, Op, TD->getIntPtrType(), + GEP); + *i = Op; + MadeChange = true; + } + } else if (TD->getTypeSizeInBits(Op->getType()) < TD->getPointerSizeInBits()) { + if (Constant *C = dyn_cast(Op)) { + *i = ConstantExpr::getSExt(C, TD->getIntPtrType()); + MadeChange = true; + } else { + Op = InsertCastBefore(Instruction::SExt, Op, TD->getIntPtrType(), + GEP); + *i = Op; + MadeChange = true; + } + } + } + } + if (MadeChange) return &GEP; + + // Combine Indices - If the source pointer to this getelementptr instruction + // is a getelementptr instruction, combine the indices of the two + // getelementptr instructions into a single instruction. + // + SmallVector SrcGEPOperands; + if (User *Src = dyn_castGetElementPtr(PtrOp)) + SrcGEPOperands.append(Src->op_begin(), Src->op_end()); + + if (!SrcGEPOperands.empty()) { + // Note that if our source is a gep chain itself that we wait for that + // chain to be resolved before we perform this transformation. This + // avoids us creating a TON of code in some cases. + // + if (isa(SrcGEPOperands[0]) && + cast(SrcGEPOperands[0])->getNumOperands() == 2) + return 0; // Wait until our source is folded to completion. + + SmallVector Indices; + + // Find out whether the last index in the source GEP is a sequential idx. + bool EndsWithSequential = false; + for (gep_type_iterator I = gep_type_begin(*cast(PtrOp)), + E = gep_type_end(*cast(PtrOp)); I != E; ++I) + EndsWithSequential = !isa(*I); + + // Can we combine the two pointer arithmetics offsets? + if (EndsWithSequential) { + // Replace: gep (gep %P, long B), long A, ... + // With: T = long A+B; gep %P, T, ... + // + Value *Sum, *SO1 = SrcGEPOperands.back(), *GO1 = GEP.getOperand(1); + if (SO1 == Constant::getNullValue(SO1->getType())) { + Sum = GO1; + } else if (GO1 == Constant::getNullValue(GO1->getType())) { + Sum = SO1; + } else { + // If they aren't the same type, convert both to an integer of the + // target's pointer size. + if (SO1->getType() != GO1->getType()) { + if (Constant *SO1C = dyn_cast(SO1)) { + SO1 = ConstantExpr::getIntegerCast(SO1C, GO1->getType(), true); + } else if (Constant *GO1C = dyn_cast(GO1)) { + GO1 = ConstantExpr::getIntegerCast(GO1C, SO1->getType(), true); + } else { + unsigned PS = TD->getPointerSizeInBits(); + if (TD->getTypeSizeInBits(SO1->getType()) == PS) { + // Convert GO1 to SO1's type. + GO1 = InsertCastToIntPtrTy(GO1, SO1->getType(), &GEP, this); + + } else if (TD->getTypeSizeInBits(GO1->getType()) == PS) { + // Convert SO1 to GO1's type. + SO1 = InsertCastToIntPtrTy(SO1, GO1->getType(), &GEP, this); + } else { + const Type *PT = TD->getIntPtrType(); + SO1 = InsertCastToIntPtrTy(SO1, PT, &GEP, this); + GO1 = InsertCastToIntPtrTy(GO1, PT, &GEP, this); + } + } + } + if (isa(SO1) && isa(GO1)) + Sum = ConstantExpr::getAdd(cast(SO1), cast(GO1)); + else { + Sum = BinaryOperator::CreateAdd(SO1, GO1, PtrOp->getName()+".sum"); + InsertNewInstBefore(cast(Sum), GEP); + } + } + + // Recycle the GEP we already have if possible. + if (SrcGEPOperands.size() == 2) { + GEP.setOperand(0, SrcGEPOperands[0]); + GEP.setOperand(1, Sum); + return &GEP; + } else { + Indices.insert(Indices.end(), SrcGEPOperands.begin()+1, + SrcGEPOperands.end()-1); + Indices.push_back(Sum); + Indices.insert(Indices.end(), GEP.op_begin()+2, GEP.op_end()); + } + } else if (isa(*GEP.idx_begin()) && + cast(*GEP.idx_begin())->isNullValue() && + SrcGEPOperands.size() != 1) { + // Otherwise we can do the fold if the first index of the GEP is a zero + Indices.insert(Indices.end(), SrcGEPOperands.begin()+1, + SrcGEPOperands.end()); + Indices.insert(Indices.end(), GEP.idx_begin()+1, GEP.idx_end()); + } + + if (!Indices.empty()) + return GetElementPtrInst::Create(SrcGEPOperands[0], Indices.begin(), + Indices.end(), GEP.getName()); + + } else if (GlobalValue *GV = dyn_cast(PtrOp)) { + // GEP of global variable. If all of the indices for this GEP are + // constants, we can promote this to a constexpr instead of an instruction. + + // Scan for nonconstants... + SmallVector Indices; + User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); + for (; I != E && isa(*I); ++I) + Indices.push_back(cast(*I)); + + if (I == E) { // If they are all constants... + Constant *CE = ConstantExpr::getGetElementPtr(GV, + &Indices[0],Indices.size()); + + // Replace all uses of the GEP with the new constexpr... + return ReplaceInstUsesWith(GEP, CE); + } + } else if (Value *X = getBitCastOperand(PtrOp)) { // Is the operand a cast? + if (!isa(X->getType())) { + // Not interesting. Source pointer must be a cast from pointer. + } else if (HasZeroPointerIndex) { + // transform: GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... + // into : GEP [10 x i8]* X, i32 0, ... + // + // Likewise, transform: GEP (bitcast i8* X to [0 x i8]*), i32 0, ... + // into : GEP i8* X, ... + // + // This occurs when the program declares an array extern like "int X[];" + const PointerType *CPTy = cast(PtrOp->getType()); + const PointerType *XTy = cast(X->getType()); + if (const ArrayType *CATy = + dyn_cast(CPTy->getElementType())) { + // GEP (bitcast i8* X to [0 x i8]*), i32 0, ... ? + if (CATy->getElementType() == XTy->getElementType()) { + // -> GEP i8* X, ... + SmallVector Indices(GEP.idx_begin()+1, GEP.idx_end()); + return GetElementPtrInst::Create(X, Indices.begin(), Indices.end(), + GEP.getName()); + } else if (const ArrayType *XATy = + dyn_cast(XTy->getElementType())) { + // GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... ? + if (CATy->getElementType() == XATy->getElementType()) { + // -> GEP [10 x i8]* X, i32 0, ... + // At this point, we know that the cast source type is a pointer + // to an array of the same type as the destination pointer + // array. Because the array type is never stepped over (there + // is a leading zero) we can fold the cast into this GEP. + GEP.setOperand(0, X); + return &GEP; + } + } + } + } else if (GEP.getNumOperands() == 2) { + // Transform things like: + // %t = getelementptr i32* bitcast ([2 x i32]* %str to i32*), i32 %V + // into: %t1 = getelementptr [2 x i32]* %str, i32 0, i32 %V; bitcast + const Type *SrcElTy = cast(X->getType())->getElementType(); + const Type *ResElTy=cast(PtrOp->getType())->getElementType(); + if (isa(SrcElTy) && + TD->getTypeAllocSize(cast(SrcElTy)->getElementType()) == + TD->getTypeAllocSize(ResElTy)) { + Value *Idx[2]; + Idx[0] = Constant::getNullValue(Type::Int32Ty); + Idx[1] = GEP.getOperand(1); + Value *V = InsertNewInstBefore( + GetElementPtrInst::Create(X, Idx, Idx + 2, GEP.getName()), GEP); + // V and GEP are both pointer types --> BitCast + return new BitCastInst(V, GEP.getType()); + } + + // Transform things like: + // getelementptr i8* bitcast ([100 x double]* X to i8*), i32 %tmp + // (where tmp = 8*tmp2) into: + // getelementptr [100 x double]* %arr, i32 0, i32 %tmp2; bitcast + + if (isa(SrcElTy) && ResElTy == Type::Int8Ty) { + uint64_t ArrayEltSize = + TD->getTypeAllocSize(cast(SrcElTy)->getElementType()); + + // Check to see if "tmp" is a scale by a multiple of ArrayEltSize. We + // allow either a mul, shift, or constant here. + Value *NewIdx = 0; + ConstantInt *Scale = 0; + if (ArrayEltSize == 1) { + NewIdx = GEP.getOperand(1); + Scale = ConstantInt::get(NewIdx->getType(), 1); + } else if (ConstantInt *CI = dyn_cast(GEP.getOperand(1))) { + NewIdx = ConstantInt::get(CI->getType(), 1); + Scale = CI; + } else if (Instruction *Inst =dyn_cast(GEP.getOperand(1))){ + if (Inst->getOpcode() == Instruction::Shl && + isa(Inst->getOperand(1))) { + ConstantInt *ShAmt = cast(Inst->getOperand(1)); + uint32_t ShAmtVal = ShAmt->getLimitedValue(64); + Scale = ConstantInt::get(Inst->getType(), 1ULL << ShAmtVal); + NewIdx = Inst->getOperand(0); + } else if (Inst->getOpcode() == Instruction::Mul && + isa(Inst->getOperand(1))) { + Scale = cast(Inst->getOperand(1)); + NewIdx = Inst->getOperand(0); + } + } + + // If the index will be to exactly the right offset with the scale taken + // out, perform the transformation. Note, we don't know whether Scale is + // signed or not. We'll use unsigned version of division/modulo + // operation after making sure Scale doesn't have the sign bit set. + if (ArrayEltSize && Scale && Scale->getSExtValue() >= 0LL && + Scale->getZExtValue() % ArrayEltSize == 0) { + Scale = ConstantInt::get(Scale->getType(), + Scale->getZExtValue() / ArrayEltSize); + if (Scale->getZExtValue() != 1) { + Constant *C = ConstantExpr::getIntegerCast(Scale, NewIdx->getType(), + false /*ZExt*/); + Instruction *Sc = BinaryOperator::CreateMul(NewIdx, C, "idxscale"); + NewIdx = InsertNewInstBefore(Sc, GEP); + } + + // Insert the new GEP instruction. + Value *Idx[2]; + Idx[0] = Constant::getNullValue(Type::Int32Ty); + Idx[1] = NewIdx; + Instruction *NewGEP = + GetElementPtrInst::Create(X, Idx, Idx + 2, GEP.getName()); + NewGEP = InsertNewInstBefore(NewGEP, GEP); + // The NewGEP must be pointer typed, so must the old one -> BitCast + return new BitCastInst(NewGEP, GEP.getType()); + } + } + } + } + + /// See if we can simplify: + /// X = bitcast A to B* + /// Y = gep X, <...constant indices...> + /// into a gep of the original struct. This is important for SROA and alias + /// analysis of unions. If "A" is also a bitcast, wait for A/X to be merged. + if (BitCastInst *BCI = dyn_cast(PtrOp)) { + if (!isa(BCI->getOperand(0)) && GEP.hasAllConstantIndices()) { + // Determine how much the GEP moves the pointer. We are guaranteed to get + // a constant back from EmitGEPOffset. + ConstantInt *OffsetV = cast(EmitGEPOffset(&GEP, GEP, *this)); + int64_t Offset = OffsetV->getSExtValue(); + + // If this GEP instruction doesn't move the pointer, just replace the GEP + // with a bitcast of the real input to the dest type. + if (Offset == 0) { + // If the bitcast is of an allocation, and the allocation will be + // converted to match the type of the cast, don't touch this. + if (isa(BCI->getOperand(0))) { + // See if the bitcast simplifies, if so, don't nuke this GEP yet. + if (Instruction *I = visitBitCast(*BCI)) { + if (I != BCI) { + I->takeName(BCI); + BCI->getParent()->getInstList().insert(BCI, I); + ReplaceInstUsesWith(*BCI, I); + } + return &GEP; + } + } + return new BitCastInst(BCI->getOperand(0), GEP.getType()); + } + + // Otherwise, if the offset is non-zero, we need to find out if there is a + // field at Offset in 'A's type. If so, we can pull the cast through the + // GEP. + SmallVector NewIndices; + const Type *InTy = + cast(BCI->getOperand(0)->getType())->getElementType(); + if (FindElementAtOffset(InTy, Offset, NewIndices, TD)) { + Instruction *NGEP = + GetElementPtrInst::Create(BCI->getOperand(0), NewIndices.begin(), + NewIndices.end()); + if (NGEP->getType() == GEP.getType()) return NGEP; + InsertNewInstBefore(NGEP, GEP); + NGEP->takeName(&GEP); + return new BitCastInst(NGEP, GEP.getType()); + } + } + } + + return 0; +} + +Instruction *InstCombiner::visitAllocationInst(AllocationInst &AI) { + // Convert: malloc Ty, C - where C is a constant != 1 into: malloc [C x Ty], 1 + if (AI.isArrayAllocation()) { // Check C != 1 + if (const ConstantInt *C = dyn_cast(AI.getArraySize())) { + const Type *NewTy = + ArrayType::get(AI.getAllocatedType(), C->getZExtValue()); + AllocationInst *New = 0; + + // Create and insert the replacement instruction... + if (isa(AI)) + New = new MallocInst(NewTy, 0, AI.getAlignment(), AI.getName()); + else { + assert(isa(AI) && "Unknown type of allocation inst!"); + New = new AllocaInst(NewTy, 0, AI.getAlignment(), AI.getName()); + } + + InsertNewInstBefore(New, AI); + + // Scan to the end of the allocation instructions, to skip over a block of + // allocas if possible...also skip interleaved debug info + // + BasicBlock::iterator It = New; + while (isa(*It) || isa(*It)) ++It; + + // Now that I is pointing to the first non-allocation-inst in the block, + // insert our getelementptr instruction... + // + Value *NullIdx = Constant::getNullValue(Type::Int32Ty); + Value *Idx[2]; + Idx[0] = NullIdx; + Idx[1] = NullIdx; + Value *V = GetElementPtrInst::Create(New, Idx, Idx + 2, + New->getName()+".sub", It); + + // Now make everything use the getelementptr instead of the original + // allocation. + return ReplaceInstUsesWith(AI, V); + } else if (isa(AI.getArraySize())) { + return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType())); + } + } + + if (isa(AI) && AI.getAllocatedType()->isSized()) { + // If alloca'ing a zero byte object, replace the alloca with a null pointer. + // Note that we only do this for alloca's, because malloc should allocate + // and return a unique pointer, even for a zero byte allocation. + if (TD->getTypeAllocSize(AI.getAllocatedType()) == 0) + return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType())); + + // If the alignment is 0 (unspecified), assign it the preferred alignment. + if (AI.getAlignment() == 0) + AI.setAlignment(TD->getPrefTypeAlignment(AI.getAllocatedType())); + } + + return 0; +} + +Instruction *InstCombiner::visitFreeInst(FreeInst &FI) { + Value *Op = FI.getOperand(0); + + // free undef -> unreachable. + if (isa(Op)) { + // Insert a new store to null because we cannot modify the CFG here. + new StoreInst(ConstantInt::getTrue(), + UndefValue::get(PointerType::getUnqual(Type::Int1Ty)), &FI); + return EraseInstFromFunction(FI); + } + + // If we have 'free null' delete the instruction. This can happen in stl code + // when lots of inlining happens. + if (isa(Op)) + return EraseInstFromFunction(FI); + + // Change free * (cast * X to *) into free * X + if (BitCastInst *CI = dyn_cast(Op)) { + FI.setOperand(0, CI->getOperand(0)); + return &FI; + } + + // Change free (gep X, 0,0,0,0) into free(X) + if (GetElementPtrInst *GEPI = dyn_cast(Op)) { + if (GEPI->hasAllZeroIndices()) { + AddToWorkList(GEPI); + FI.setOperand(0, GEPI->getOperand(0)); + return &FI; + } + } + + // Change free(malloc) into nothing, if the malloc has a single use. + if (MallocInst *MI = dyn_cast(Op)) + if (MI->hasOneUse()) { + EraseInstFromFunction(FI); + return EraseInstFromFunction(*MI); + } + + return 0; +} + + +/// InstCombineLoadCast - Fold 'load (cast P)' -> cast (load P)' when possible. +static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI, + const TargetData *TD) { + User *CI = cast(LI.getOperand(0)); + Value *CastOp = CI->getOperand(0); + + if (TD) { + if (ConstantExpr *CE = dyn_cast(CI)) { + // Instead of loading constant c string, use corresponding integer value + // directly if string length is small enough. + std::string Str; + if (GetConstantStringInfo(CE->getOperand(0), Str) && !Str.empty()) { + unsigned len = Str.length(); + const Type *Ty = cast(CE->getType())->getElementType(); + unsigned numBits = Ty->getPrimitiveSizeInBits(); + // Replace LI with immediate integer store. + if ((numBits >> 3) == len + 1) { + APInt StrVal(numBits, 0); + APInt SingleChar(numBits, 0); + if (TD->isLittleEndian()) { + for (signed i = len-1; i >= 0; i--) { + SingleChar = (uint64_t) Str[i] & UCHAR_MAX; + StrVal = (StrVal << 8) | SingleChar; + } + } else { + for (unsigned i = 0; i < len; i++) { + SingleChar = (uint64_t) Str[i] & UCHAR_MAX; + StrVal = (StrVal << 8) | SingleChar; + } + // Append NULL at the end. + SingleChar = 0; + StrVal = (StrVal << 8) | SingleChar; + } + Value *NL = ConstantInt::get(StrVal); + return IC.ReplaceInstUsesWith(LI, NL); + } + } + } + } + + const PointerType *DestTy = cast(CI->getType()); + const Type *DestPTy = DestTy->getElementType(); + if (const PointerType *SrcTy = dyn_cast(CastOp->getType())) { + + // If the address spaces don't match, don't eliminate the cast. + if (DestTy->getAddressSpace() != SrcTy->getAddressSpace()) + return 0; + + const Type *SrcPTy = SrcTy->getElementType(); + + if (DestPTy->isInteger() || isa(DestPTy) || + isa(DestPTy)) { + // If the source is an array, the code below will not succeed. Check to + // see if a trivial 'gep P, 0, 0' will help matters. Only do this for + // constants. + if (const ArrayType *ASrcTy = dyn_cast(SrcPTy)) + if (Constant *CSrc = dyn_cast(CastOp)) + if (ASrcTy->getNumElements() != 0) { + Value *Idxs[2]; + Idxs[0] = Idxs[1] = Constant::getNullValue(Type::Int32Ty); + CastOp = ConstantExpr::getGetElementPtr(CSrc, Idxs, 2); + SrcTy = cast(CastOp->getType()); + SrcPTy = SrcTy->getElementType(); + } + + if ((SrcPTy->isInteger() || isa(SrcPTy) || + isa(SrcPTy)) && + // Do not allow turning this into a load of an integer, which is then + // casted to a pointer, this pessimizes pointer analysis a lot. + (isa(SrcPTy) == isa(LI.getType())) && + IC.getTargetData().getTypeSizeInBits(SrcPTy) == + IC.getTargetData().getTypeSizeInBits(DestPTy)) { + + // Okay, we are casting from one integer or pointer type to another of + // the same size. Instead of casting the pointer before the load, cast + // the result of the loaded value. + Value *NewLoad = IC.InsertNewInstBefore(new LoadInst(CastOp, + CI->getName(), + LI.isVolatile()),LI); + // Now cast the result of the load. + return new BitCastInst(NewLoad, LI.getType()); + } + } + } + return 0; +} + +/// isSafeToLoadUnconditionally - Return true if we know that executing a load +/// from this value cannot trap. If it is not obviously safe to load from the +/// specified pointer, we do a quick local scan of the basic block containing +/// ScanFrom, to determine if the address is already accessed. +static bool isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom) { + // If it is an alloca it is always safe to load from. + if (isa(V)) return true; + + // If it is a global variable it is mostly safe to load from. + if (const GlobalValue *GV = dyn_cast(V)) + // Don't try to evaluate aliases. External weak GV can be null. + return !isa(GV) && !GV->hasExternalWeakLinkage(); + + // Otherwise, be a little bit agressive by scanning the local block where we + // want to check to see if the pointer is already being loaded or stored + // from/to. If so, the previous load or store would have already trapped, + // so there is no harm doing an extra load (also, CSE will later eliminate + // the load entirely). + BasicBlock::iterator BBI = ScanFrom, E = ScanFrom->getParent()->begin(); + + while (BBI != E) { + --BBI; + + // If we see a free or a call (which might do a free) the pointer could be + // marked invalid. + if (isa(BBI) || + (isa(BBI) && !isa(BBI))) + return false; + + if (LoadInst *LI = dyn_cast(BBI)) { + if (LI->getOperand(0) == V) return true; + } else if (StoreInst *SI = dyn_cast(BBI)) { + if (SI->getOperand(1) == V) return true; + } + + } + return false; +} + +Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { + Value *Op = LI.getOperand(0); + + // Attempt to improve the alignment. + unsigned KnownAlign = + GetOrEnforceKnownAlignment(Op, TD->getPrefTypeAlignment(LI.getType())); + if (KnownAlign > + (LI.getAlignment() == 0 ? TD->getABITypeAlignment(LI.getType()) : + LI.getAlignment())) + LI.setAlignment(KnownAlign); + + // load (cast X) --> cast (load X) iff safe + if (isa(Op)) + if (Instruction *Res = InstCombineLoadCast(*this, LI, TD)) + return Res; + + // None of the following transforms are legal for volatile loads. + if (LI.isVolatile()) return 0; + + // Do really simple store-to-load forwarding and load CSE, to catch cases + // where there are several consequtive memory accesses to the same location, + // separated by a few arithmetic operations. + BasicBlock::iterator BBI = &LI; + if (Value *AvailableVal = FindAvailableLoadedValue(Op, LI.getParent(), BBI,6)) + return ReplaceInstUsesWith(LI, AvailableVal); + + if (GetElementPtrInst *GEPI = dyn_cast(Op)) { + const Value *GEPI0 = GEPI->getOperand(0); + // TODO: Consider a target hook for valid address spaces for this xform. + if (isa(GEPI0) && + cast(GEPI0->getType())->getAddressSpace() == 0) { + // Insert a new store to null instruction before the load to indicate + // that this code is not reachable. We do this instead of inserting + // an unreachable instruction directly because we cannot modify the + // CFG. + new StoreInst(UndefValue::get(LI.getType()), + Constant::getNullValue(Op->getType()), &LI); + return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType())); + } + } + + if (Constant *C = dyn_cast(Op)) { + // load null/undef -> undef + // TODO: Consider a target hook for valid address spaces for this xform. + if (isa(C) || (C->isNullValue() && + cast(Op->getType())->getAddressSpace() == 0)) { + // Insert a new store to null instruction before the load to indicate that + // this code is not reachable. We do this instead of inserting an + // unreachable instruction directly because we cannot modify the CFG. + new StoreInst(UndefValue::get(LI.getType()), + Constant::getNullValue(Op->getType()), &LI); + return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType())); + } + + // Instcombine load (constant global) into the value loaded. + if (GlobalVariable *GV = dyn_cast(Op)) + if (GV->isConstant() && GV->hasDefinitiveInitializer()) + return ReplaceInstUsesWith(LI, GV->getInitializer()); + + // Instcombine load (constantexpr_GEP global, 0, ...) into the value loaded. + if (ConstantExpr *CE = dyn_cast(Op)) { + if (CE->getOpcode() == Instruction::GetElementPtr) { + if (GlobalVariable *GV = dyn_cast(CE->getOperand(0))) + if (GV->isConstant() && GV->hasDefinitiveInitializer()) + if (Constant *V = + ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE)) + return ReplaceInstUsesWith(LI, V); + if (CE->getOperand(0)->isNullValue()) { + // Insert a new store to null instruction before the load to indicate + // that this code is not reachable. We do this instead of inserting + // an unreachable instruction directly because we cannot modify the + // CFG. + new StoreInst(UndefValue::get(LI.getType()), + Constant::getNullValue(Op->getType()), &LI); + return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType())); + } + + } else if (CE->isCast()) { + if (Instruction *Res = InstCombineLoadCast(*this, LI, TD)) + return Res; + } + } + } + + // If this load comes from anywhere in a constant global, and if the global + // is all undef or zero, we know what it loads. + if (GlobalVariable *GV = dyn_cast(Op->getUnderlyingObject())){ + if (GV->isConstant() && GV->hasDefinitiveInitializer()) { + if (GV->getInitializer()->isNullValue()) + return ReplaceInstUsesWith(LI, Constant::getNullValue(LI.getType())); + else if (isa(GV->getInitializer())) + return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType())); + } + } + + if (Op->hasOneUse()) { + // Change select and PHI nodes to select values instead of addresses: this + // helps alias analysis out a lot, allows many others simplifications, and + // exposes redundancy in the code. + // + // Note that we cannot do the transformation unless we know that the + // introduced loads cannot trap! Something like this is valid as long as + // the condition is always false: load (select bool %C, int* null, int* %G), + // but it would not be valid if we transformed it to load from null + // unconditionally. + // + if (SelectInst *SI = dyn_cast(Op)) { + // load (select (Cond, &V1, &V2)) --> select(Cond, load &V1, load &V2). + if (isSafeToLoadUnconditionally(SI->getOperand(1), SI) && + isSafeToLoadUnconditionally(SI->getOperand(2), SI)) { + Value *V1 = InsertNewInstBefore(new LoadInst(SI->getOperand(1), + SI->getOperand(1)->getName()+".val"), LI); + Value *V2 = InsertNewInstBefore(new LoadInst(SI->getOperand(2), + SI->getOperand(2)->getName()+".val"), LI); + return SelectInst::Create(SI->getCondition(), V1, V2); + } + + // load (select (cond, null, P)) -> load P + if (Constant *C = dyn_cast(SI->getOperand(1))) + if (C->isNullValue()) { + LI.setOperand(0, SI->getOperand(2)); + return &LI; + } + + // load (select (cond, P, null)) -> load P + if (Constant *C = dyn_cast(SI->getOperand(2))) + if (C->isNullValue()) { + LI.setOperand(0, SI->getOperand(1)); + return &LI; + } + } + } + return 0; +} + +/// InstCombineStoreToCast - Fold store V, (cast P) -> store (cast V), P +/// when possible. This makes it generally easy to do alias analysis and/or +/// SROA/mem2reg of the memory object. +static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) { + User *CI = cast(SI.getOperand(1)); + Value *CastOp = CI->getOperand(0); + + const Type *DestPTy = cast(CI->getType())->getElementType(); + const PointerType *SrcTy = dyn_cast(CastOp->getType()); + if (SrcTy == 0) return 0; + + const Type *SrcPTy = SrcTy->getElementType(); + + if (!DestPTy->isInteger() && !isa(DestPTy)) + return 0; + + /// NewGEPIndices - If SrcPTy is an aggregate type, we can emit a "noop gep" + /// to its first element. This allows us to handle things like: + /// store i32 xxx, (bitcast {foo*, float}* %P to i32*) + /// on 32-bit hosts. + SmallVector NewGEPIndices; + + // If the source is an array, the code below will not succeed. Check to + // see if a trivial 'gep P, 0, 0' will help matters. Only do this for + // constants. + if (isa(SrcPTy) || isa(SrcPTy)) { + // Index through pointer. + Constant *Zero = Constant::getNullValue(Type::Int32Ty); + NewGEPIndices.push_back(Zero); + + while (1) { + if (const StructType *STy = dyn_cast(SrcPTy)) { + if (!STy->getNumElements()) /* Struct can be empty {} */ + break; + NewGEPIndices.push_back(Zero); + SrcPTy = STy->getElementType(0); + } else if (const ArrayType *ATy = dyn_cast(SrcPTy)) { + NewGEPIndices.push_back(Zero); + SrcPTy = ATy->getElementType(); + } else { + break; + } + } + + SrcTy = PointerType::get(SrcPTy, SrcTy->getAddressSpace()); + } + + if (!SrcPTy->isInteger() && !isa(SrcPTy)) + return 0; + + // If the pointers point into different address spaces or if they point to + // values with different sizes, we can't do the transformation. + if (SrcTy->getAddressSpace() != + cast(CI->getType())->getAddressSpace() || + IC.getTargetData().getTypeSizeInBits(SrcPTy) != + IC.getTargetData().getTypeSizeInBits(DestPTy)) + return 0; + + // Okay, we are casting from one integer or pointer type to another of + // the same size. Instead of casting the pointer before + // the store, cast the value to be stored. + Value *NewCast; + Value *SIOp0 = SI.getOperand(0); + Instruction::CastOps opcode = Instruction::BitCast; + const Type* CastSrcTy = SIOp0->getType(); + const Type* CastDstTy = SrcPTy; + if (isa(CastDstTy)) { + if (CastSrcTy->isInteger()) + opcode = Instruction::IntToPtr; + } else if (isa(CastDstTy)) { + if (isa(SIOp0->getType())) + opcode = Instruction::PtrToInt; + } + + // SIOp0 is a pointer to aggregate and this is a store to the first field, + // emit a GEP to index into its first field. + if (!NewGEPIndices.empty()) { + if (Constant *C = dyn_cast(CastOp)) + CastOp = ConstantExpr::getGetElementPtr(C, &NewGEPIndices[0], + NewGEPIndices.size()); + else + CastOp = IC.InsertNewInstBefore( + GetElementPtrInst::Create(CastOp, NewGEPIndices.begin(), + NewGEPIndices.end()), SI); + } + + if (Constant *C = dyn_cast(SIOp0)) + NewCast = ConstantExpr::getCast(opcode, C, CastDstTy); + else + NewCast = IC.InsertNewInstBefore( + CastInst::Create(opcode, SIOp0, CastDstTy, SIOp0->getName()+".c"), + SI); + return new StoreInst(NewCast, CastOp); +} + +/// equivalentAddressValues - Test if A and B will obviously have the same +/// value. This includes recognizing that %t0 and %t1 will have the same +/// value in code like this: +/// %t0 = getelementptr \@a, 0, 3 +/// store i32 0, i32* %t0 +/// %t1 = getelementptr \@a, 0, 3 +/// %t2 = load i32* %t1 +/// +static bool equivalentAddressValues(Value *A, Value *B) { + // Test if the values are trivially equivalent. + if (A == B) return true; + + // Test if the values come form identical arithmetic instructions. + if (isa(A) || + isa(A) || + isa(A) || + isa(A)) + if (Instruction *BI = dyn_cast(B)) + if (cast(A)->isIdenticalTo(BI)) + return true; + + // Otherwise they may not be equivalent. + return false; +} + +// If this instruction has two uses, one of which is a llvm.dbg.declare, +// return the llvm.dbg.declare. +DbgDeclareInst *InstCombiner::hasOneUsePlusDeclare(Value *V) { + if (!V->hasNUses(2)) + return 0; + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); + UI != E; ++UI) { + if (DbgDeclareInst *DI = dyn_cast(UI)) + return DI; + if (isa(UI) && UI->hasOneUse()) { + if (DbgDeclareInst *DI = dyn_cast(UI->use_begin())) + return DI; + } + } + return 0; +} + +Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { + Value *Val = SI.getOperand(0); + Value *Ptr = SI.getOperand(1); + + if (isa(Ptr)) { // store X, undef -> noop (even if volatile) + EraseInstFromFunction(SI); + ++NumCombined; + return 0; + } + + // If the RHS is an alloca with a single use, zapify the store, making the + // alloca dead. + // If the RHS is an alloca with a two uses, the other one being a + // llvm.dbg.declare, zapify the store and the declare, making the + // alloca dead. We must do this to prevent declare's from affecting + // codegen. + if (!SI.isVolatile()) { + if (Ptr->hasOneUse()) { + if (isa(Ptr)) { + EraseInstFromFunction(SI); + ++NumCombined; + return 0; + } + if (GetElementPtrInst *GEP = dyn_cast(Ptr)) { + if (isa(GEP->getOperand(0))) { + if (GEP->getOperand(0)->hasOneUse()) { + EraseInstFromFunction(SI); + ++NumCombined; + return 0; + } + if (DbgDeclareInst *DI = hasOneUsePlusDeclare(GEP->getOperand(0))) { + EraseInstFromFunction(*DI); + EraseInstFromFunction(SI); + ++NumCombined; + return 0; + } + } + } + } + if (DbgDeclareInst *DI = hasOneUsePlusDeclare(Ptr)) { + EraseInstFromFunction(*DI); + EraseInstFromFunction(SI); + ++NumCombined; + return 0; + } + } + + // Attempt to improve the alignment. + unsigned KnownAlign = + GetOrEnforceKnownAlignment(Ptr, TD->getPrefTypeAlignment(Val->getType())); + if (KnownAlign > + (SI.getAlignment() == 0 ? TD->getABITypeAlignment(Val->getType()) : + SI.getAlignment())) + SI.setAlignment(KnownAlign); + + // Do really simple DSE, to catch cases where there are several consecutive + // stores to the same location, separated by a few arithmetic operations. This + // situation often occurs with bitfield accesses. + BasicBlock::iterator BBI = &SI; + for (unsigned ScanInsts = 6; BBI != SI.getParent()->begin() && ScanInsts; + --ScanInsts) { + --BBI; + // Don't count debug info directives, lest they affect codegen, + // and we skip pointer-to-pointer bitcasts, which are NOPs. + // It is necessary for correctness to skip those that feed into a + // llvm.dbg.declare, as these are not present when debugging is off. + if (isa(BBI) || + (isa(BBI) && isa(BBI->getType()))) { + ScanInsts++; + continue; + } + + if (StoreInst *PrevSI = dyn_cast(BBI)) { + // Prev store isn't volatile, and stores to the same location? + if (!PrevSI->isVolatile() &&equivalentAddressValues(PrevSI->getOperand(1), + SI.getOperand(1))) { + ++NumDeadStore; + ++BBI; + EraseInstFromFunction(*PrevSI); + continue; + } + break; + } + + // If this is a load, we have to stop. However, if the loaded value is from + // the pointer we're loading and is producing the pointer we're storing, + // then *this* store is dead (X = load P; store X -> P). + if (LoadInst *LI = dyn_cast(BBI)) { + if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr) && + !SI.isVolatile()) { + EraseInstFromFunction(SI); + ++NumCombined; + return 0; + } + // Otherwise, this is a load from some other location. Stores before it + // may not be dead. + break; + } + + // Don't skip over loads or things that can modify memory. + if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory()) + break; + } + + + if (SI.isVolatile()) return 0; // Don't hack volatile stores. + + // store X, null -> turns into 'unreachable' in SimplifyCFG + if (isa(Ptr)) { + if (!isa(Val)) { + SI.setOperand(0, UndefValue::get(Val->getType())); + if (Instruction *U = dyn_cast(Val)) + AddToWorkList(U); // Dropped a use. + ++NumCombined; + } + return 0; // Do not modify these! + } + + // store undef, Ptr -> noop + if (isa(Val)) { + EraseInstFromFunction(SI); + ++NumCombined; + return 0; + } + + // If the pointer destination is a cast, see if we can fold the cast into the + // source instead. + if (isa(Ptr)) + if (Instruction *Res = InstCombineStoreToCast(*this, SI)) + return Res; + if (ConstantExpr *CE = dyn_cast(Ptr)) + if (CE->isCast()) + if (Instruction *Res = InstCombineStoreToCast(*this, SI)) + return Res; + + + // If this store is the last instruction in the basic block (possibly + // excepting debug info instructions and the pointer bitcasts that feed + // into them), and if the block ends with an unconditional branch, try + // to move it to the successor block. + BBI = &SI; + do { + ++BBI; + } while (isa(BBI) || + (isa(BBI) && isa(BBI->getType()))); + if (BranchInst *BI = dyn_cast(BBI)) + if (BI->isUnconditional()) + if (SimplifyStoreAtEndOfBlock(SI)) + return 0; // xform done! + + return 0; +} + +/// SimplifyStoreAtEndOfBlock - Turn things like: +/// if () { *P = v1; } else { *P = v2 } +/// into a phi node with a store in the successor. +/// +/// Simplify things like: +/// *P = v1; if () { *P = v2; } +/// into a phi node with a store in the successor. +/// +bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { + BasicBlock *StoreBB = SI.getParent(); + + // Check to see if the successor block has exactly two incoming edges. If + // so, see if the other predecessor contains a store to the same location. + // if so, insert a PHI node (if needed) and move the stores down. + BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0); + + // Determine whether Dest has exactly two predecessors and, if so, compute + // the other predecessor. + pred_iterator PI = pred_begin(DestBB); + BasicBlock *OtherBB = 0; + if (*PI != StoreBB) + OtherBB = *PI; + ++PI; + if (PI == pred_end(DestBB)) + return false; + + if (*PI != StoreBB) { + if (OtherBB) + return false; + OtherBB = *PI; + } + if (++PI != pred_end(DestBB)) + return false; + + // Bail out if all the relevant blocks aren't distinct (this can happen, + // for example, if SI is in an infinite loop) + if (StoreBB == DestBB || OtherBB == DestBB) + return false; + + // Verify that the other block ends in a branch and is not otherwise empty. + BasicBlock::iterator BBI = OtherBB->getTerminator(); + BranchInst *OtherBr = dyn_cast(BBI); + if (!OtherBr || BBI == OtherBB->begin()) + return false; + + // If the other block ends in an unconditional branch, check for the 'if then + // else' case. there is an instruction before the branch. + StoreInst *OtherStore = 0; + if (OtherBr->isUnconditional()) { + --BBI; + // Skip over debugging info. + while (isa(BBI) || + (isa(BBI) && isa(BBI->getType()))) { + if (BBI==OtherBB->begin()) + return false; + --BBI; + } + // If this isn't a store, or isn't a store to the same location, bail out. + OtherStore = dyn_cast(BBI); + if (!OtherStore || OtherStore->getOperand(1) != SI.getOperand(1)) + return false; + } else { + // Otherwise, the other block ended with a conditional branch. If one of the + // destinations is StoreBB, then we have the if/then case. + if (OtherBr->getSuccessor(0) != StoreBB && + OtherBr->getSuccessor(1) != StoreBB) + return false; + + // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an + // if/then triangle. See if there is a store to the same ptr as SI that + // lives in OtherBB. + for (;; --BBI) { + // Check to see if we find the matching store. + if ((OtherStore = dyn_cast(BBI))) { + if (OtherStore->getOperand(1) != SI.getOperand(1)) + return false; + break; + } + // If we find something that may be using or overwriting the stored + // value, or if we run out of instructions, we can't do the xform. + if (BBI->mayReadFromMemory() || BBI->mayWriteToMemory() || + BBI == OtherBB->begin()) + return false; + } + + // In order to eliminate the store in OtherBr, we have to + // make sure nothing reads or overwrites the stored value in + // StoreBB. + for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) { + // FIXME: This should really be AA driven. + if (I->mayReadFromMemory() || I->mayWriteToMemory()) + return false; + } + } + + // Insert a PHI node now if we need it. + Value *MergedVal = OtherStore->getOperand(0); + if (MergedVal != SI.getOperand(0)) { + PHINode *PN = PHINode::Create(MergedVal->getType(), "storemerge"); + PN->reserveOperandSpace(2); + PN->addIncoming(SI.getOperand(0), SI.getParent()); + PN->addIncoming(OtherStore->getOperand(0), OtherBB); + MergedVal = InsertNewInstBefore(PN, DestBB->front()); + } + + // Advance to a place where it is safe to insert the new store and + // insert it. + BBI = DestBB->getFirstNonPHI(); + InsertNewInstBefore(new StoreInst(MergedVal, SI.getOperand(1), + OtherStore->isVolatile()), *BBI); + + // Nuke the old stores. + EraseInstFromFunction(SI); + EraseInstFromFunction(*OtherStore); + ++NumCombined; + return true; +} + + +Instruction *InstCombiner::visitBranchInst(BranchInst &BI) { + // Change br (not X), label True, label False to: br X, label False, True + Value *X = 0; + BasicBlock *TrueDest; + BasicBlock *FalseDest; + if (match(&BI, m_Br(m_Not(m_Value(X)), TrueDest, FalseDest)) && + !isa(X)) { + // Swap Destinations and condition... + BI.setCondition(X); + BI.setSuccessor(0, FalseDest); + BI.setSuccessor(1, TrueDest); + return &BI; + } + + // Cannonicalize fcmp_one -> fcmp_oeq + FCmpInst::Predicate FPred; Value *Y; + if (match(&BI, m_Br(m_FCmp(FPred, m_Value(X), m_Value(Y)), + TrueDest, FalseDest))) + if ((FPred == FCmpInst::FCMP_ONE || FPred == FCmpInst::FCMP_OLE || + FPred == FCmpInst::FCMP_OGE) && BI.getCondition()->hasOneUse()) { + FCmpInst *I = cast(BI.getCondition()); + FCmpInst::Predicate NewPred = FCmpInst::getInversePredicate(FPred); + Instruction *NewSCC = new FCmpInst(NewPred, X, Y, "", I); + NewSCC->takeName(I); + // Swap Destinations and condition... + BI.setCondition(NewSCC); + BI.setSuccessor(0, FalseDest); + BI.setSuccessor(1, TrueDest); + RemoveFromWorkList(I); + I->eraseFromParent(); + AddToWorkList(NewSCC); + return &BI; + } + + // Cannonicalize icmp_ne -> icmp_eq + ICmpInst::Predicate IPred; + if (match(&BI, m_Br(m_ICmp(IPred, m_Value(X), m_Value(Y)), + TrueDest, FalseDest))) + if ((IPred == ICmpInst::ICMP_NE || IPred == ICmpInst::ICMP_ULE || + IPred == ICmpInst::ICMP_SLE || IPred == ICmpInst::ICMP_UGE || + IPred == ICmpInst::ICMP_SGE) && BI.getCondition()->hasOneUse()) { + ICmpInst *I = cast(BI.getCondition()); + ICmpInst::Predicate NewPred = ICmpInst::getInversePredicate(IPred); + Instruction *NewSCC = new ICmpInst(NewPred, X, Y, "", I); + NewSCC->takeName(I); + // Swap Destinations and condition... + BI.setCondition(NewSCC); + BI.setSuccessor(0, FalseDest); + BI.setSuccessor(1, TrueDest); + RemoveFromWorkList(I); + I->eraseFromParent();; + AddToWorkList(NewSCC); + return &BI; + } + + return 0; +} + +Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) { + Value *Cond = SI.getCondition(); + if (Instruction *I = dyn_cast(Cond)) { + if (I->getOpcode() == Instruction::Add) + if (ConstantInt *AddRHS = dyn_cast(I->getOperand(1))) { + // change 'switch (X+4) case 1:' into 'switch (X) case -3' + for (unsigned i = 2, e = SI.getNumOperands(); i != e; i += 2) + SI.setOperand(i,ConstantExpr::getSub(cast(SI.getOperand(i)), + AddRHS)); + SI.setOperand(0, I->getOperand(0)); + AddToWorkList(I); + return &SI; + } + } + return 0; +} + +Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { + Value *Agg = EV.getAggregateOperand(); + + if (!EV.hasIndices()) + return ReplaceInstUsesWith(EV, Agg); + + if (Constant *C = dyn_cast(Agg)) { + if (isa(C)) + return ReplaceInstUsesWith(EV, UndefValue::get(EV.getType())); + + if (isa(C)) + return ReplaceInstUsesWith(EV, Constant::getNullValue(EV.getType())); + + if (isa(C) || isa(C)) { + // Extract the element indexed by the first index out of the constant + Value *V = C->getOperand(*EV.idx_begin()); + if (EV.getNumIndices() > 1) + // Extract the remaining indices out of the constant indexed by the + // first index + return ExtractValueInst::Create(V, EV.idx_begin() + 1, EV.idx_end()); + else + return ReplaceInstUsesWith(EV, V); + } + return 0; // Can't handle other constants + } + if (InsertValueInst *IV = dyn_cast(Agg)) { + // We're extracting from an insertvalue instruction, compare the indices + const unsigned *exti, *exte, *insi, *inse; + for (exti = EV.idx_begin(), insi = IV->idx_begin(), + exte = EV.idx_end(), inse = IV->idx_end(); + exti != exte && insi != inse; + ++exti, ++insi) { + if (*insi != *exti) + // The insert and extract both reference distinctly different elements. + // This means the extract is not influenced by the insert, and we can + // replace the aggregate operand of the extract with the aggregate + // operand of the insert. i.e., replace + // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1 + // %E = extractvalue { i32, { i32 } } %I, 0 + // with + // %E = extractvalue { i32, { i32 } } %A, 0 + return ExtractValueInst::Create(IV->getAggregateOperand(), + EV.idx_begin(), EV.idx_end()); + } + if (exti == exte && insi == inse) + // Both iterators are at the end: Index lists are identical. Replace + // %B = insertvalue { i32, { i32 } } %A, i32 42, 1, 0 + // %C = extractvalue { i32, { i32 } } %B, 1, 0 + // with "i32 42" + return ReplaceInstUsesWith(EV, IV->getInsertedValueOperand()); + if (exti == exte) { + // The extract list is a prefix of the insert list. i.e. replace + // %I = insertvalue { i32, { i32 } } %A, i32 42, 1, 0 + // %E = extractvalue { i32, { i32 } } %I, 1 + // with + // %X = extractvalue { i32, { i32 } } %A, 1 + // %E = insertvalue { i32 } %X, i32 42, 0 + // by switching the order of the insert and extract (though the + // insertvalue should be left in, since it may have other uses). + Value *NewEV = InsertNewInstBefore( + ExtractValueInst::Create(IV->getAggregateOperand(), + EV.idx_begin(), EV.idx_end()), + EV); + return InsertValueInst::Create(NewEV, IV->getInsertedValueOperand(), + insi, inse); + } + if (insi == inse) + // The insert list is a prefix of the extract list + // We can simply remove the common indices from the extract and make it + // operate on the inserted value instead of the insertvalue result. + // i.e., replace + // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1 + // %E = extractvalue { i32, { i32 } } %I, 1, 0 + // with + // %E extractvalue { i32 } { i32 42 }, 0 + return ExtractValueInst::Create(IV->getInsertedValueOperand(), + exti, exte); + } + // Can't simplify extracts from other values. Note that nested extracts are + // already simplified implicitely by the above (extract ( extract (insert) ) + // will be translated into extract ( insert ( extract ) ) first and then just + // the value inserted, if appropriate). + return 0; +} + +/// CheapToScalarize - Return true if the value is cheaper to scalarize than it +/// is to leave as a vector operation. +static bool CheapToScalarize(Value *V, bool isConstant) { + if (isa(V)) + return true; + if (ConstantVector *C = dyn_cast(V)) { + if (isConstant) return true; + // If all elts are the same, we can extract. + Constant *Op0 = C->getOperand(0); + for (unsigned i = 1; i < C->getNumOperands(); ++i) + if (C->getOperand(i) != Op0) + return false; + return true; + } + Instruction *I = dyn_cast(V); + if (!I) return false; + + // Insert element gets simplified to the inserted element or is deleted if + // this is constant idx extract element and its a constant idx insertelt. + if (I->getOpcode() == Instruction::InsertElement && isConstant && + isa(I->getOperand(2))) + return true; + if (I->getOpcode() == Instruction::Load && I->hasOneUse()) + return true; + if (BinaryOperator *BO = dyn_cast(I)) + if (BO->hasOneUse() && + (CheapToScalarize(BO->getOperand(0), isConstant) || + CheapToScalarize(BO->getOperand(1), isConstant))) + return true; + if (CmpInst *CI = dyn_cast(I)) + if (CI->hasOneUse() && + (CheapToScalarize(CI->getOperand(0), isConstant) || + CheapToScalarize(CI->getOperand(1), isConstant))) + return true; + + return false; +} + +/// Read and decode a shufflevector mask. +/// +/// It turns undef elements into values that are larger than the number of +/// elements in the input. +static std::vector getShuffleMask(const ShuffleVectorInst *SVI) { + unsigned NElts = SVI->getType()->getNumElements(); + if (isa(SVI->getOperand(2))) + return std::vector(NElts, 0); + if (isa(SVI->getOperand(2))) + return std::vector(NElts, 2*NElts); + + std::vector Result; + const ConstantVector *CP = cast(SVI->getOperand(2)); + for (User::const_op_iterator i = CP->op_begin(), e = CP->op_end(); i!=e; ++i) + if (isa(*i)) + Result.push_back(NElts*2); // undef -> 8 + else + Result.push_back(cast(*i)->getZExtValue()); + return Result; +} + +/// FindScalarElement - Given a vector and an element number, see if the scalar +/// value is already around as a register, for example if it were inserted then +/// extracted from the vector. +static Value *FindScalarElement(Value *V, unsigned EltNo) { + assert(isa(V->getType()) && "Not looking at a vector?"); + const VectorType *PTy = cast(V->getType()); + unsigned Width = PTy->getNumElements(); + if (EltNo >= Width) // Out of range access. + return UndefValue::get(PTy->getElementType()); + + if (isa(V)) + return UndefValue::get(PTy->getElementType()); + else if (isa(V)) + return Constant::getNullValue(PTy->getElementType()); + else if (ConstantVector *CP = dyn_cast(V)) + return CP->getOperand(EltNo); + else if (InsertElementInst *III = dyn_cast(V)) { + // If this is an insert to a variable element, we don't know what it is. + if (!isa(III->getOperand(2))) + return 0; + unsigned IIElt = cast(III->getOperand(2))->getZExtValue(); + + // If this is an insert to the element we are looking for, return the + // inserted value. + if (EltNo == IIElt) + return III->getOperand(1); + + // Otherwise, the insertelement doesn't modify the value, recurse on its + // vector input. + return FindScalarElement(III->getOperand(0), EltNo); + } else if (ShuffleVectorInst *SVI = dyn_cast(V)) { + unsigned LHSWidth = + cast(SVI->getOperand(0)->getType())->getNumElements(); + unsigned InEl = getShuffleMask(SVI)[EltNo]; + if (InEl < LHSWidth) + return FindScalarElement(SVI->getOperand(0), InEl); + else if (InEl < LHSWidth*2) + return FindScalarElement(SVI->getOperand(1), InEl - LHSWidth); + else + return UndefValue::get(PTy->getElementType()); + } + + // Otherwise, we don't know. + return 0; +} + +Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { + // If vector val is undef, replace extract with scalar undef. + if (isa(EI.getOperand(0))) + return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType())); + + // If vector val is constant 0, replace extract with scalar 0. + if (isa(EI.getOperand(0))) + return ReplaceInstUsesWith(EI, Constant::getNullValue(EI.getType())); + + if (ConstantVector *C = dyn_cast(EI.getOperand(0))) { + // If vector val is constant with all elements the same, replace EI with + // that element. When the elements are not identical, we cannot replace yet + // (we do that below, but only when the index is constant). + Constant *op0 = C->getOperand(0); + for (unsigned i = 1; i < C->getNumOperands(); ++i) + if (C->getOperand(i) != op0) { + op0 = 0; + break; + } + if (op0) + return ReplaceInstUsesWith(EI, op0); + } + + // If extracting a specified index from the vector, see if we can recursively + // find a previously computed scalar that was inserted into the vector. + if (ConstantInt *IdxC = dyn_cast(EI.getOperand(1))) { + unsigned IndexVal = IdxC->getZExtValue(); + unsigned VectorWidth = + cast(EI.getOperand(0)->getType())->getNumElements(); + + // If this is extracting an invalid index, turn this into undef, to avoid + // crashing the code below. + if (IndexVal >= VectorWidth) + return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType())); + + // This instruction only demands the single element from the input vector. + // If the input vector has a single use, simplify it based on this use + // property. + if (EI.getOperand(0)->hasOneUse() && VectorWidth != 1) { + APInt UndefElts(VectorWidth, 0); + APInt DemandedMask(VectorWidth, 1 << IndexVal); + if (Value *V = SimplifyDemandedVectorElts(EI.getOperand(0), + DemandedMask, UndefElts)) { + EI.setOperand(0, V); + return &EI; + } + } + + if (Value *Elt = FindScalarElement(EI.getOperand(0), IndexVal)) + return ReplaceInstUsesWith(EI, Elt); + + // If the this extractelement is directly using a bitcast from a vector of + // the same number of elements, see if we can find the source element from + // it. In this case, we will end up needing to bitcast the scalars. + if (BitCastInst *BCI = dyn_cast(EI.getOperand(0))) { + if (const VectorType *VT = + dyn_cast(BCI->getOperand(0)->getType())) + if (VT->getNumElements() == VectorWidth) + if (Value *Elt = FindScalarElement(BCI->getOperand(0), IndexVal)) + return new BitCastInst(Elt, EI.getType()); + } + } + + if (Instruction *I = dyn_cast(EI.getOperand(0))) { + if (I->hasOneUse()) { + // Push extractelement into predecessor operation if legal and + // profitable to do so + if (BinaryOperator *BO = dyn_cast(I)) { + bool isConstantElt = isa(EI.getOperand(1)); + if (CheapToScalarize(BO, isConstantElt)) { + ExtractElementInst *newEI0 = + new ExtractElementInst(BO->getOperand(0), EI.getOperand(1), + EI.getName()+".lhs"); + ExtractElementInst *newEI1 = + new ExtractElementInst(BO->getOperand(1), EI.getOperand(1), + EI.getName()+".rhs"); + InsertNewInstBefore(newEI0, EI); + InsertNewInstBefore(newEI1, EI); + return BinaryOperator::Create(BO->getOpcode(), newEI0, newEI1); + } + } else if (isa(I)) { + unsigned AS = + cast(I->getOperand(0)->getType())->getAddressSpace(); + Value *Ptr = InsertBitCastBefore(I->getOperand(0), + PointerType::get(EI.getType(), AS),EI); + GetElementPtrInst *GEP = + GetElementPtrInst::Create(Ptr, EI.getOperand(1), I->getName()+".gep"); + InsertNewInstBefore(GEP, EI); + return new LoadInst(GEP); + } + } + if (InsertElementInst *IE = dyn_cast(I)) { + // Extracting the inserted element? + if (IE->getOperand(2) == EI.getOperand(1)) + return ReplaceInstUsesWith(EI, IE->getOperand(1)); + // If the inserted and extracted elements are constants, they must not + // be the same value, extract from the pre-inserted value instead. + if (isa(IE->getOperand(2)) && + isa(EI.getOperand(1))) { + AddUsesToWorkList(EI); + EI.setOperand(0, IE->getOperand(0)); + return &EI; + } + } else if (ShuffleVectorInst *SVI = dyn_cast(I)) { + // If this is extracting an element from a shufflevector, figure out where + // it came from and extract from the appropriate input element instead. + if (ConstantInt *Elt = dyn_cast(EI.getOperand(1))) { + unsigned SrcIdx = getShuffleMask(SVI)[Elt->getZExtValue()]; + Value *Src; + unsigned LHSWidth = + cast(SVI->getOperand(0)->getType())->getNumElements(); + + if (SrcIdx < LHSWidth) + Src = SVI->getOperand(0); + else if (SrcIdx < LHSWidth*2) { + SrcIdx -= LHSWidth; + Src = SVI->getOperand(1); + } else { + return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType())); + } + return new ExtractElementInst(Src, SrcIdx); + } + } + } + return 0; +} + +/// CollectSingleShuffleElements - If V is a shuffle of values that ONLY returns +/// elements from either LHS or RHS, return the shuffle mask and true. +/// Otherwise, return false. +static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, + std::vector &Mask) { + assert(V->getType() == LHS->getType() && V->getType() == RHS->getType() && + "Invalid CollectSingleShuffleElements"); + unsigned NumElts = cast(V->getType())->getNumElements(); + + if (isa(V)) { + Mask.assign(NumElts, UndefValue::get(Type::Int32Ty)); + return true; + } else if (V == LHS) { + for (unsigned i = 0; i != NumElts; ++i) + Mask.push_back(ConstantInt::get(Type::Int32Ty, i)); + return true; + } else if (V == RHS) { + for (unsigned i = 0; i != NumElts; ++i) + Mask.push_back(ConstantInt::get(Type::Int32Ty, i+NumElts)); + return true; + } else if (InsertElementInst *IEI = dyn_cast(V)) { + // If this is an insert of an extract from some other vector, include it. + Value *VecOp = IEI->getOperand(0); + Value *ScalarOp = IEI->getOperand(1); + Value *IdxOp = IEI->getOperand(2); + + if (!isa(IdxOp)) + return false; + unsigned InsertedIdx = cast(IdxOp)->getZExtValue(); + + if (isa(ScalarOp)) { // inserting undef into vector. + // Okay, we can handle this if the vector we are insertinting into is + // transitively ok. + if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask)) { + // If so, update the mask to reflect the inserted undef. + Mask[InsertedIdx] = UndefValue::get(Type::Int32Ty); + return true; + } + } else if (ExtractElementInst *EI = dyn_cast(ScalarOp)){ + if (isa(EI->getOperand(1)) && + EI->getOperand(0)->getType() == V->getType()) { + unsigned ExtractedIdx = + cast(EI->getOperand(1))->getZExtValue(); + + // This must be extracting from either LHS or RHS. + if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) { + // Okay, we can handle this if the vector we are insertinting into is + // transitively ok. + if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask)) { + // If so, update the mask to reflect the inserted value. + if (EI->getOperand(0) == LHS) { + Mask[InsertedIdx % NumElts] = + ConstantInt::get(Type::Int32Ty, ExtractedIdx); + } else { + assert(EI->getOperand(0) == RHS); + Mask[InsertedIdx % NumElts] = + ConstantInt::get(Type::Int32Ty, ExtractedIdx+NumElts); + + } + return true; + } + } + } + } + } + // TODO: Handle shufflevector here! + + return false; +} + +/// CollectShuffleElements - We are building a shuffle of V, using RHS as the +/// RHS of the shuffle instruction, if it is not null. Return a shuffle mask +/// that computes V and the LHS value of the shuffle. +static Value *CollectShuffleElements(Value *V, std::vector &Mask, + Value *&RHS) { + assert(isa(V->getType()) && + (RHS == 0 || V->getType() == RHS->getType()) && + "Invalid shuffle!"); + unsigned NumElts = cast(V->getType())->getNumElements(); + + if (isa(V)) { + Mask.assign(NumElts, UndefValue::get(Type::Int32Ty)); + return V; + } else if (isa(V)) { + Mask.assign(NumElts, ConstantInt::get(Type::Int32Ty, 0)); + return V; + } else if (InsertElementInst *IEI = dyn_cast(V)) { + // If this is an insert of an extract from some other vector, include it. + Value *VecOp = IEI->getOperand(0); + Value *ScalarOp = IEI->getOperand(1); + Value *IdxOp = IEI->getOperand(2); + + if (ExtractElementInst *EI = dyn_cast(ScalarOp)) { + if (isa(EI->getOperand(1)) && isa(IdxOp) && + EI->getOperand(0)->getType() == V->getType()) { + unsigned ExtractedIdx = + cast(EI->getOperand(1))->getZExtValue(); + unsigned InsertedIdx = cast(IdxOp)->getZExtValue(); + + // Either the extracted from or inserted into vector must be RHSVec, + // otherwise we'd end up with a shuffle of three inputs. + if (EI->getOperand(0) == RHS || RHS == 0) { + RHS = EI->getOperand(0); + Value *V = CollectShuffleElements(VecOp, Mask, RHS); + Mask[InsertedIdx % NumElts] = + ConstantInt::get(Type::Int32Ty, NumElts+ExtractedIdx); + return V; + } + + if (VecOp == RHS) { + Value *V = CollectShuffleElements(EI->getOperand(0), Mask, RHS); + // Everything but the extracted element is replaced with the RHS. + for (unsigned i = 0; i != NumElts; ++i) { + if (i != InsertedIdx) + Mask[i] = ConstantInt::get(Type::Int32Ty, NumElts+i); + } + return V; + } + + // If this insertelement is a chain that comes from exactly these two + // vectors, return the vector and the effective shuffle. + if (CollectSingleShuffleElements(IEI, EI->getOperand(0), RHS, Mask)) + return EI->getOperand(0); + + } + } + } + // TODO: Handle shufflevector here! + + // Otherwise, can't do anything fancy. Return an identity vector. + for (unsigned i = 0; i != NumElts; ++i) + Mask.push_back(ConstantInt::get(Type::Int32Ty, i)); + return V; +} + +Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) { + Value *VecOp = IE.getOperand(0); + Value *ScalarOp = IE.getOperand(1); + Value *IdxOp = IE.getOperand(2); + + // Inserting an undef or into an undefined place, remove this. + if (isa(ScalarOp) || isa(IdxOp)) + ReplaceInstUsesWith(IE, VecOp); + + // If the inserted element was extracted from some other vector, and if the + // indexes are constant, try to turn this into a shufflevector operation. + if (ExtractElementInst *EI = dyn_cast(ScalarOp)) { + if (isa(EI->getOperand(1)) && isa(IdxOp) && + EI->getOperand(0)->getType() == IE.getType()) { + unsigned NumVectorElts = IE.getType()->getNumElements(); + unsigned ExtractedIdx = + cast(EI->getOperand(1))->getZExtValue(); + unsigned InsertedIdx = cast(IdxOp)->getZExtValue(); + + if (ExtractedIdx >= NumVectorElts) // Out of range extract. + return ReplaceInstUsesWith(IE, VecOp); + + if (InsertedIdx >= NumVectorElts) // Out of range insert. + return ReplaceInstUsesWith(IE, UndefValue::get(IE.getType())); + + // If we are extracting a value from a vector, then inserting it right + // back into the same place, just use the input vector. + if (EI->getOperand(0) == VecOp && ExtractedIdx == InsertedIdx) + return ReplaceInstUsesWith(IE, VecOp); + + // We could theoretically do this for ANY input. However, doing so could + // turn chains of insertelement instructions into a chain of shufflevector + // instructions, and right now we do not merge shufflevectors. As such, + // only do this in a situation where it is clear that there is benefit. + if (isa(VecOp) || isa(VecOp)) { + // Turn this into shuffle(EIOp0, VecOp, Mask). The result has all of + // the values of VecOp, except then one read from EIOp0. + // Build a new shuffle mask. + std::vector Mask; + if (isa(VecOp)) + Mask.assign(NumVectorElts, UndefValue::get(Type::Int32Ty)); + else { + assert(isa(VecOp) && "Unknown thing"); + Mask.assign(NumVectorElts, ConstantInt::get(Type::Int32Ty, + NumVectorElts)); + } + Mask[InsertedIdx] = ConstantInt::get(Type::Int32Ty, ExtractedIdx); + return new ShuffleVectorInst(EI->getOperand(0), VecOp, + ConstantVector::get(Mask)); + } + + // If this insertelement isn't used by some other insertelement, turn it + // (and any insertelements it points to), into one big shuffle. + if (!IE.hasOneUse() || !isa(IE.use_back())) { + std::vector Mask; + Value *RHS = 0; + Value *LHS = CollectShuffleElements(&IE, Mask, RHS); + if (RHS == 0) RHS = UndefValue::get(LHS->getType()); + // We now have a shuffle of LHS, RHS, Mask. + return new ShuffleVectorInst(LHS, RHS, ConstantVector::get(Mask)); + } + } + } + + return 0; +} + + +Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { + Value *LHS = SVI.getOperand(0); + Value *RHS = SVI.getOperand(1); + std::vector Mask = getShuffleMask(&SVI); + + bool MadeChange = false; + + // Undefined shuffle mask -> undefined value. + if (isa(SVI.getOperand(2))) + return ReplaceInstUsesWith(SVI, UndefValue::get(SVI.getType())); + + unsigned VWidth = cast(SVI.getType())->getNumElements(); + + if (VWidth != cast(LHS->getType())->getNumElements()) + return 0; + + APInt UndefElts(VWidth, 0); + APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth)); + if (SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) { + LHS = SVI.getOperand(0); + RHS = SVI.getOperand(1); + MadeChange = true; + } + + // Canonicalize shuffle(x ,x,mask) -> shuffle(x, undef,mask') + // Canonicalize shuffle(undef,x,mask) -> shuffle(x, undef,mask'). + if (LHS == RHS || isa(LHS)) { + if (isa(LHS) && LHS == RHS) { + // shuffle(undef,undef,mask) -> undef. + return ReplaceInstUsesWith(SVI, LHS); + } + + // Remap any references to RHS to use LHS. + std::vector Elts; + for (unsigned i = 0, e = Mask.size(); i != e; ++i) { + if (Mask[i] >= 2*e) + Elts.push_back(UndefValue::get(Type::Int32Ty)); + else { + if ((Mask[i] >= e && isa(RHS)) || + (Mask[i] < e && isa(LHS))) { + Mask[i] = 2*e; // Turn into undef. + Elts.push_back(UndefValue::get(Type::Int32Ty)); + } else { + Mask[i] = Mask[i] % e; // Force to LHS. + Elts.push_back(ConstantInt::get(Type::Int32Ty, Mask[i])); + } + } + } + SVI.setOperand(0, SVI.getOperand(1)); + SVI.setOperand(1, UndefValue::get(RHS->getType())); + SVI.setOperand(2, ConstantVector::get(Elts)); + LHS = SVI.getOperand(0); + RHS = SVI.getOperand(1); + MadeChange = true; + } + + // Analyze the shuffle, are the LHS or RHS and identity shuffles? + bool isLHSID = true, isRHSID = true; + + for (unsigned i = 0, e = Mask.size(); i != e; ++i) { + if (Mask[i] >= e*2) continue; // Ignore undef values. + // Is this an identity shuffle of the LHS value? + isLHSID &= (Mask[i] == i); + + // Is this an identity shuffle of the RHS value? + isRHSID &= (Mask[i]-e == i); + } + + // Eliminate identity shuffles. + if (isLHSID) return ReplaceInstUsesWith(SVI, LHS); + if (isRHSID) return ReplaceInstUsesWith(SVI, RHS); + + // If the LHS is a shufflevector itself, see if we can combine it with this + // one without producing an unusual shuffle. Here we are really conservative: + // we are absolutely afraid of producing a shuffle mask not in the input + // program, because the code gen may not be smart enough to turn a merged + // shuffle into two specific shuffles: it may produce worse code. As such, + // we only merge two shuffles if the result is one of the two input shuffle + // masks. In this case, merging the shuffles just removes one instruction, + // which we know is safe. This is good for things like turning: + // (splat(splat)) -> splat. + if (ShuffleVectorInst *LHSSVI = dyn_cast(LHS)) { + if (isa(RHS)) { + std::vector LHSMask = getShuffleMask(LHSSVI); + + std::vector NewMask; + for (unsigned i = 0, e = Mask.size(); i != e; ++i) + if (Mask[i] >= 2*e) + NewMask.push_back(2*e); + else + NewMask.push_back(LHSMask[Mask[i]]); + + // If the result mask is equal to the src shuffle or this shuffle mask, do + // the replacement. + if (NewMask == LHSMask || NewMask == Mask) { + unsigned LHSInNElts = + cast(LHSSVI->getOperand(0)->getType())->getNumElements(); + std::vector Elts; + for (unsigned i = 0, e = NewMask.size(); i != e; ++i) { + if (NewMask[i] >= LHSInNElts*2) { + Elts.push_back(UndefValue::get(Type::Int32Ty)); + } else { + Elts.push_back(ConstantInt::get(Type::Int32Ty, NewMask[i])); + } + } + return new ShuffleVectorInst(LHSSVI->getOperand(0), + LHSSVI->getOperand(1), + ConstantVector::get(Elts)); + } + } + } + + return MadeChange ? &SVI : 0; +} + + + + +/// TryToSinkInstruction - Try to move the specified instruction from its +/// current block into the beginning of DestBlock, which can only happen if it's +/// safe to move the instruction past all of the instructions between it and the +/// end of its block. +static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) { + assert(I->hasOneUse() && "Invariants didn't hold!"); + + // Cannot move control-flow-involving, volatile loads, vaarg, etc. + if (isa(I) || I->mayHaveSideEffects() || isa(I)) + return false; + + // Do not sink alloca instructions out of the entry block. + if (isa(I) && I->getParent() == + &DestBlock->getParent()->getEntryBlock()) + return false; + + // We can only sink load instructions if there is nothing between the load and + // the end of block that could change the value. + if (I->mayReadFromMemory()) { + for (BasicBlock::iterator Scan = I, E = I->getParent()->end(); + Scan != E; ++Scan) + if (Scan->mayWriteToMemory()) + return false; + } + + BasicBlock::iterator InsertPos = DestBlock->getFirstNonPHI(); + + CopyPrecedingStopPoint(I, InsertPos); + I->moveBefore(InsertPos); + ++NumSunkInst; + return true; +} + + +/// AddReachableCodeToWorklist - Walk the function in depth-first order, adding +/// all reachable code to the worklist. +/// +/// This has a couple of tricks to make the code faster and more powerful. In +/// particular, we constant fold and DCE instructions as we go, to avoid adding +/// them to the worklist (this significantly speeds up instcombine on code where +/// many instructions are dead or constant). Additionally, if we find a branch +/// whose condition is a known constant, we only visit the reachable successors. +/// +static void AddReachableCodeToWorklist(BasicBlock *BB, + SmallPtrSet &Visited, + InstCombiner &IC, + const TargetData *TD) { + SmallVector Worklist; + Worklist.push_back(BB); + + while (!Worklist.empty()) { + BB = Worklist.back(); + Worklist.pop_back(); + + // We have now visited this block! If we've already been here, ignore it. + if (!Visited.insert(BB)) continue; + + DbgInfoIntrinsic *DBI_Prev = NULL; + for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) { + Instruction *Inst = BBI++; + + // DCE instruction if trivially dead. + if (isInstructionTriviallyDead(Inst)) { + ++NumDeadInst; + DOUT << "IC: DCE: " << *Inst; + Inst->eraseFromParent(); + continue; + } + + // ConstantProp instruction if trivially constant. + if (Constant *C = ConstantFoldInstruction(Inst, TD)) { + DOUT << "IC: ConstFold to: " << *C << " from: " << *Inst; + Inst->replaceAllUsesWith(C); + ++NumConstProp; + Inst->eraseFromParent(); + continue; + } + + // If there are two consecutive llvm.dbg.stoppoint calls then + // it is likely that the optimizer deleted code in between these + // two intrinsics. + DbgInfoIntrinsic *DBI_Next = dyn_cast(Inst); + if (DBI_Next) { + if (DBI_Prev + && DBI_Prev->getIntrinsicID() == llvm::Intrinsic::dbg_stoppoint + && DBI_Next->getIntrinsicID() == llvm::Intrinsic::dbg_stoppoint) { + IC.RemoveFromWorkList(DBI_Prev); + DBI_Prev->eraseFromParent(); + } + DBI_Prev = DBI_Next; + } else { + DBI_Prev = 0; + } + + IC.AddToWorkList(Inst); + } + + // Recursively visit successors. If this is a branch or switch on a + // constant, only visit the reachable successor. + TerminatorInst *TI = BB->getTerminator(); + if (BranchInst *BI = dyn_cast(TI)) { + if (BI->isConditional() && isa(BI->getCondition())) { + bool CondVal = cast(BI->getCondition())->getZExtValue(); + BasicBlock *ReachableBB = BI->getSuccessor(!CondVal); + Worklist.push_back(ReachableBB); + continue; + } + } else if (SwitchInst *SI = dyn_cast(TI)) { + if (ConstantInt *Cond = dyn_cast(SI->getCondition())) { + // See if this is an explicit destination. + for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i) + if (SI->getCaseValue(i) == Cond) { + BasicBlock *ReachableBB = SI->getSuccessor(i); + Worklist.push_back(ReachableBB); + continue; + } + + // Otherwise it is the default destination. + Worklist.push_back(SI->getSuccessor(0)); + continue; + } + } + + for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) + Worklist.push_back(TI->getSuccessor(i)); + } +} + +bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) { + bool Changed = false; + TD = &getAnalysis(); + + DEBUG(DOUT << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on " + << F.getNameStr() << "\n"); + + { + // Do a depth-first traversal of the function, populate the worklist with + // the reachable instructions. Ignore blocks that are not reachable. Keep + // track of which blocks we visit. + SmallPtrSet Visited; + AddReachableCodeToWorklist(F.begin(), Visited, *this, TD); + + // Do a quick scan over the function. If we find any blocks that are + // unreachable, remove any instructions inside of them. This prevents + // the instcombine code from having to deal with some bad special cases. + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) + if (!Visited.count(BB)) { + Instruction *Term = BB->getTerminator(); + while (Term != BB->begin()) { // Remove instrs bottom-up + BasicBlock::iterator I = Term; --I; + + DOUT << "IC: DCE: " << *I; + // A debug intrinsic shouldn't force another iteration if we weren't + // going to do one without it. + if (!isa(I)) { + ++NumDeadInst; + Changed = true; + } + if (!I->use_empty()) + I->replaceAllUsesWith(UndefValue::get(I->getType())); + I->eraseFromParent(); + } + } + } + + while (!Worklist.empty()) { + Instruction *I = RemoveOneFromWorkList(); + if (I == 0) continue; // skip null values. + + // Check to see if we can DCE the instruction. + if (isInstructionTriviallyDead(I)) { + // Add operands to the worklist. + if (I->getNumOperands() < 4) + AddUsesToWorkList(*I); + ++NumDeadInst; + + DOUT << "IC: DCE: " << *I; + + I->eraseFromParent(); + RemoveFromWorkList(I); + Changed = true; + continue; + } + + // Instruction isn't dead, see if we can constant propagate it. + if (Constant *C = ConstantFoldInstruction(I, TD)) { + DOUT << "IC: ConstFold to: " << *C << " from: " << *I; + + // Add operands to the worklist. + AddUsesToWorkList(*I); + ReplaceInstUsesWith(*I, C); + + ++NumConstProp; + I->eraseFromParent(); + RemoveFromWorkList(I); + Changed = true; + continue; + } + + if (TD && + (I->getType()->getTypeID() == Type::VoidTyID || + I->isTrapping())) { + // See if we can constant fold its operands. + for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i) + if (ConstantExpr *CE = dyn_cast(i)) + if (Constant *NewC = ConstantFoldConstantExpression(CE, TD)) + if (NewC != CE) { + i->set(NewC); + Changed = true; + } + } + + // See if we can trivially sink this instruction to a successor basic block. + if (I->hasOneUse()) { + BasicBlock *BB = I->getParent(); + BasicBlock *UserParent = cast(I->use_back())->getParent(); + if (UserParent != BB) { + bool UserIsSuccessor = false; + // See if the user is one of our successors. + for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI) + if (*SI == UserParent) { + UserIsSuccessor = true; + break; + } + + // If the user is one of our immediate successors, and if that successor + // only has us as a predecessors (we'd have to split the critical edge + // otherwise), we can keep going. + if (UserIsSuccessor && !isa(I->use_back()) && + next(pred_begin(UserParent)) == pred_end(UserParent)) + // Okay, the CFG is simple enough, try to sink this instruction. + Changed |= TryToSinkInstruction(I, UserParent); + } + } + + // Now that we have an instruction, try combining it to simplify it... +#ifndef NDEBUG + std::string OrigI; +#endif + DEBUG(std::ostringstream SS; I->print(SS); OrigI = SS.str();); + if (Instruction *Result = visit(*I)) { + ++NumCombined; + // Should we replace the old instruction with a new one? + if (Result != I) { + DOUT << "IC: Old = " << *I + << " New = " << *Result; + + // Everything uses the new instruction now. + I->replaceAllUsesWith(Result); + + // Push the new instruction and any users onto the worklist. + AddToWorkList(Result); + AddUsersToWorkList(*Result); + + // Move the name to the new instruction first. + Result->takeName(I); + + // Insert the new instruction into the basic block... + BasicBlock *InstParent = I->getParent(); + BasicBlock::iterator InsertPos = I; + + if (!isa(Result)) // If combining a PHI, don't insert + while (isa(InsertPos)) // middle of a block of PHIs. + ++InsertPos; + + InstParent->getInstList().insert(InsertPos, Result); + + // Make sure that we reprocess all operands now that we reduced their + // use counts. + AddUsesToWorkList(*I); + + // Instructions can end up on the worklist more than once. Make sure + // we do not process an instruction that has been deleted. + RemoveFromWorkList(I); + + // Erase the old instruction. + InstParent->getInstList().erase(I); + } else { +#ifndef NDEBUG + DOUT << "IC: Mod = " << OrigI + << " New = " << *I; +#endif + + // If the instruction was modified, it's possible that it is now dead. + // if so, remove it. + if (isInstructionTriviallyDead(I)) { + // Make sure we process all operands now that we are reducing their + // use counts. + AddUsesToWorkList(*I); + + // Instructions may end up in the worklist more than once. Erase all + // occurrences of this instruction. + RemoveFromWorkList(I); + I->eraseFromParent(); + } else { + AddToWorkList(I); + AddUsersToWorkList(*I); + } + } + Changed = true; + } + } + + assert(WorklistMap.empty() && "Worklist empty, but map not?"); + + // Do an explicit clear, this shrinks the map if needed. + WorklistMap.clear(); + return Changed; +} + + +bool InstCombiner::runOnFunction(Function &F) { + MustPreserveLCSSA = mustPreserveAnalysisID(LCSSAID); + + bool EverMadeChange = false; + + // Iterate while there is work to do. + unsigned Iteration = 0; + while (DoOneIteration(F, Iteration++)) + EverMadeChange = true; + return EverMadeChange; +} + +FunctionPass *llvm::createInstructionCombiningPass() { + return new InstCombiner(); +} diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp new file mode 100644 index 000000000000..c0ca2df1ce11 --- /dev/null +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -0,0 +1,954 @@ +//===- JumpThreading.cpp - Thread control through conditional blocks ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Jump Threading pass. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "jump-threading" +#include "llvm/Transforms/Scalar.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Pass.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Target/TargetData.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ValueHandle.h" +using namespace llvm; + +STATISTIC(NumThreads, "Number of jumps threaded"); +STATISTIC(NumFolds, "Number of terminators folded"); + +static cl::opt +Threshold("jump-threading-threshold", + cl::desc("Max block size to duplicate for jump threading"), + cl::init(6), cl::Hidden); + +namespace { + /// This pass performs 'jump threading', which looks at blocks that have + /// multiple predecessors and multiple successors. If one or more of the + /// predecessors of the block can be proven to always jump to one of the + /// successors, we forward the edge from the predecessor to the successor by + /// duplicating the contents of this block. + /// + /// An example of when this can occur is code like this: + /// + /// if () { ... + /// X = 4; + /// } + /// if (X < 3) { + /// + /// In this case, the unconditional branch at the end of the first if can be + /// revectored to the false side of the second if. + /// + class VISIBILITY_HIDDEN JumpThreading : public FunctionPass { + TargetData *TD; +#ifdef NDEBUG + SmallPtrSet LoopHeaders; +#else + SmallSet, 16> LoopHeaders; +#endif + public: + static char ID; // Pass identification + JumpThreading() : FunctionPass(&ID) {} + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + } + + bool runOnFunction(Function &F); + void FindLoopHeaders(Function &F); + + bool ProcessBlock(BasicBlock *BB); + bool ThreadEdge(BasicBlock *BB, BasicBlock *PredBB, BasicBlock *SuccBB, + unsigned JumpThreadCost); + BasicBlock *FactorCommonPHIPreds(PHINode *PN, Constant *CstVal); + bool ProcessBranchOnDuplicateCond(BasicBlock *PredBB, BasicBlock *DestBB); + bool ProcessSwitchOnDuplicateCond(BasicBlock *PredBB, BasicBlock *DestBB); + + bool ProcessJumpOnPHI(PHINode *PN); + bool ProcessBranchOnLogical(Value *V, BasicBlock *BB, bool isAnd); + bool ProcessBranchOnCompare(CmpInst *Cmp, BasicBlock *BB); + + bool SimplifyPartiallyRedundantLoad(LoadInst *LI); + }; +} + +char JumpThreading::ID = 0; +static RegisterPass +X("jump-threading", "Jump Threading"); + +// Public interface to the Jump Threading pass +FunctionPass *llvm::createJumpThreadingPass() { return new JumpThreading(); } + +/// runOnFunction - Top level algorithm. +/// +bool JumpThreading::runOnFunction(Function &F) { + DOUT << "Jump threading on function '" << F.getNameStart() << "'\n"; + TD = &getAnalysis(); + + FindLoopHeaders(F); + + bool AnotherIteration = true, EverChanged = false; + while (AnotherIteration) { + AnotherIteration = false; + bool Changed = false; + for (Function::iterator I = F.begin(), E = F.end(); I != E;) { + BasicBlock *BB = I; + while (ProcessBlock(BB)) + Changed = true; + + ++I; + + // If the block is trivially dead, zap it. This eliminates the successor + // edges which simplifies the CFG. + if (pred_begin(BB) == pred_end(BB) && + BB != &BB->getParent()->getEntryBlock()) { + DOUT << " JT: Deleting dead block '" << BB->getNameStart() + << "' with terminator: " << *BB->getTerminator(); + LoopHeaders.erase(BB); + DeleteDeadBlock(BB); + Changed = true; + } + } + AnotherIteration = Changed; + EverChanged |= Changed; + } + + LoopHeaders.clear(); + return EverChanged; +} + +/// FindLoopHeaders - We do not want jump threading to turn proper loop +/// structures into irreducible loops. Doing this breaks up the loop nesting +/// hierarchy and pessimizes later transformations. To prevent this from +/// happening, we first have to find the loop headers. Here we approximate this +/// by finding targets of backedges in the CFG. +/// +/// Note that there definitely are cases when we want to allow threading of +/// edges across a loop header. For example, threading a jump from outside the +/// loop (the preheader) to an exit block of the loop is definitely profitable. +/// It is also almost always profitable to thread backedges from within the loop +/// to exit blocks, and is often profitable to thread backedges to other blocks +/// within the loop (forming a nested loop). This simple analysis is not rich +/// enough to track all of these properties and keep it up-to-date as the CFG +/// mutates, so we don't allow any of these transformations. +/// +void JumpThreading::FindLoopHeaders(Function &F) { + SmallVector, 32> Edges; + FindFunctionBackedges(F, Edges); + + for (unsigned i = 0, e = Edges.size(); i != e; ++i) + LoopHeaders.insert(const_cast(Edges[i].second)); +} + + +/// FactorCommonPHIPreds - If there are multiple preds with the same incoming +/// value for the PHI, factor them together so we get one block to thread for +/// the whole group. +/// This is important for things like "phi i1 [true, true, false, true, x]" +/// where we only need to clone the block for the true blocks once. +/// +BasicBlock *JumpThreading::FactorCommonPHIPreds(PHINode *PN, Constant *CstVal) { + SmallVector CommonPreds; + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (PN->getIncomingValue(i) == CstVal) + CommonPreds.push_back(PN->getIncomingBlock(i)); + + if (CommonPreds.size() == 1) + return CommonPreds[0]; + + DOUT << " Factoring out " << CommonPreds.size() + << " common predecessors.\n"; + return SplitBlockPredecessors(PN->getParent(), + &CommonPreds[0], CommonPreds.size(), + ".thr_comm", this); +} + + +/// getJumpThreadDuplicationCost - Return the cost of duplicating this block to +/// thread across it. +static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB) { + /// Ignore PHI nodes, these will be flattened when duplication happens. + BasicBlock::const_iterator I = BB->getFirstNonPHI(); + + // Sum up the cost of each instruction until we get to the terminator. Don't + // include the terminator because the copy won't include it. + unsigned Size = 0; + for (; !isa(I); ++I) { + // Debugger intrinsics don't incur code size. + if (isa(I)) continue; + + // If this is a pointer->pointer bitcast, it is free. + if (isa(I) && isa(I->getType())) + continue; + + // All other instructions count for at least one unit. + ++Size; + + // Calls are more expensive. If they are non-intrinsic calls, we model them + // as having cost of 4. If they are a non-vector intrinsic, we model them + // as having cost of 2 total, and if they are a vector intrinsic, we model + // them as having cost 1. + if (const CallInst *CI = dyn_cast(I)) { + if (!isa(CI)) + Size += 3; + else if (isa(CI->getType())) + Size += 1; + } + } + + // Threading through a switch statement is particularly profitable. If this + // block ends in a switch, decrease its cost to make it more likely to happen. + if (isa(I)) + Size = Size > 6 ? Size-6 : 0; + + return Size; +} + +/// ProcessBlock - If there are any predecessors whose control can be threaded +/// through to a successor, transform them now. +bool JumpThreading::ProcessBlock(BasicBlock *BB) { + // If this block has a single predecessor, and if that pred has a single + // successor, merge the blocks. This encourages recursive jump threading + // because now the condition in this block can be threaded through + // predecessors of our predecessor block. + if (BasicBlock *SinglePred = BB->getSinglePredecessor()) + if (SinglePred->getTerminator()->getNumSuccessors() == 1 && + SinglePred != BB) { + // If SinglePred was a loop header, BB becomes one. + if (LoopHeaders.erase(SinglePred)) + LoopHeaders.insert(BB); + + // Remember if SinglePred was the entry block of the function. If so, we + // will need to move BB back to the entry position. + bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock(); + MergeBasicBlockIntoOnlyPred(BB); + + if (isEntry && BB != &BB->getParent()->getEntryBlock()) + BB->moveBefore(&BB->getParent()->getEntryBlock()); + return true; + } + + // See if this block ends with a branch or switch. If so, see if the + // condition is a phi node. If so, and if an entry of the phi node is a + // constant, we can thread the block. + Value *Condition; + if (BranchInst *BI = dyn_cast(BB->getTerminator())) { + // Can't thread an unconditional jump. + if (BI->isUnconditional()) return false; + Condition = BI->getCondition(); + } else if (SwitchInst *SI = dyn_cast(BB->getTerminator())) + Condition = SI->getCondition(); + else + return false; // Must be an invoke. + + // If the terminator of this block is branching on a constant, simplify the + // terminator to an unconditional branch. This can occur due to threading in + // other blocks. + if (isa(Condition)) { + DOUT << " In block '" << BB->getNameStart() + << "' folding terminator: " << *BB->getTerminator(); + ++NumFolds; + ConstantFoldTerminator(BB); + return true; + } + + // If the terminator is branching on an undef, we can pick any of the + // successors to branch to. Since this is arbitrary, we pick the successor + // with the fewest predecessors. This should reduce the in-degree of the + // others. + if (isa(Condition)) { + TerminatorInst *BBTerm = BB->getTerminator(); + unsigned MinSucc = 0; + BasicBlock *TestBB = BBTerm->getSuccessor(MinSucc); + // Compute the successor with the minimum number of predecessors. + unsigned MinNumPreds = std::distance(pred_begin(TestBB), pred_end(TestBB)); + for (unsigned i = 1, e = BBTerm->getNumSuccessors(); i != e; ++i) { + TestBB = BBTerm->getSuccessor(i); + unsigned NumPreds = std::distance(pred_begin(TestBB), pred_end(TestBB)); + if (NumPreds < MinNumPreds) + MinSucc = i; + } + + // Fold the branch/switch. + for (unsigned i = 0, e = BBTerm->getNumSuccessors(); i != e; ++i) { + if (i == MinSucc) continue; + BBTerm->getSuccessor(i)->removePredecessor(BB); + } + + DOUT << " In block '" << BB->getNameStart() + << "' folding undef terminator: " << *BBTerm; + BranchInst::Create(BBTerm->getSuccessor(MinSucc), BBTerm); + BBTerm->eraseFromParent(); + return true; + } + + Instruction *CondInst = dyn_cast(Condition); + + // If the condition is an instruction defined in another block, see if a + // predecessor has the same condition: + // br COND, BBX, BBY + // BBX: + // br COND, BBZ, BBW + if (!Condition->hasOneUse() && // Multiple uses. + (CondInst == 0 || CondInst->getParent() != BB)) { // Non-local definition. + pred_iterator PI = pred_begin(BB), E = pred_end(BB); + if (isa(BB->getTerminator())) { + for (; PI != E; ++PI) + if (BranchInst *PBI = dyn_cast((*PI)->getTerminator())) + if (PBI->isConditional() && PBI->getCondition() == Condition && + ProcessBranchOnDuplicateCond(*PI, BB)) + return true; + } else { + assert(isa(BB->getTerminator()) && "Unknown jump terminator"); + for (; PI != E; ++PI) + if (SwitchInst *PSI = dyn_cast((*PI)->getTerminator())) + if (PSI->getCondition() == Condition && + ProcessSwitchOnDuplicateCond(*PI, BB)) + return true; + } + } + + // If there is only a single predecessor of this block, nothing to fold. + if (BB->getSinglePredecessor()) + return false; + + // All the rest of our checks depend on the condition being an instruction. + if (CondInst == 0) + return false; + + // See if this is a phi node in the current block. + if (PHINode *PN = dyn_cast(CondInst)) + if (PN->getParent() == BB) + return ProcessJumpOnPHI(PN); + + // If this is a conditional branch whose condition is and/or of a phi, try to + // simplify it. + if ((CondInst->getOpcode() == Instruction::And || + CondInst->getOpcode() == Instruction::Or) && + isa(BB->getTerminator()) && + ProcessBranchOnLogical(CondInst, BB, + CondInst->getOpcode() == Instruction::And)) + return true; + + // If we have "br (phi != 42)" and the phi node has any constant values as + // operands, we can thread through this block. + if (CmpInst *CondCmp = dyn_cast(CondInst)) + if (isa(CondCmp->getOperand(0)) && + isa(CondCmp->getOperand(1)) && + ProcessBranchOnCompare(CondCmp, BB)) + return true; + + // Check for some cases that are worth simplifying. Right now we want to look + // for loads that are used by a switch or by the condition for the branch. If + // we see one, check to see if it's partially redundant. If so, insert a PHI + // which can then be used to thread the values. + // + // This is particularly important because reg2mem inserts loads and stores all + // over the place, and this blocks jump threading if we don't zap them. + Value *SimplifyValue = CondInst; + if (CmpInst *CondCmp = dyn_cast(SimplifyValue)) + if (isa(CondCmp->getOperand(1))) + SimplifyValue = CondCmp->getOperand(0); + + if (LoadInst *LI = dyn_cast(SimplifyValue)) + if (SimplifyPartiallyRedundantLoad(LI)) + return true; + + // TODO: If we have: "br (X > 0)" and we have a predecessor where we know + // "(X == 4)" thread through this block. + + return false; +} + +/// ProcessBranchOnDuplicateCond - We found a block and a predecessor of that +/// block that jump on exactly the same condition. This means that we almost +/// always know the direction of the edge in the DESTBB: +/// PREDBB: +/// br COND, DESTBB, BBY +/// DESTBB: +/// br COND, BBZ, BBW +/// +/// If DESTBB has multiple predecessors, we can't just constant fold the branch +/// in DESTBB, we have to thread over it. +bool JumpThreading::ProcessBranchOnDuplicateCond(BasicBlock *PredBB, + BasicBlock *BB) { + BranchInst *PredBI = cast(PredBB->getTerminator()); + + // If both successors of PredBB go to DESTBB, we don't know anything. We can + // fold the branch to an unconditional one, which allows other recursive + // simplifications. + bool BranchDir; + if (PredBI->getSuccessor(1) != BB) + BranchDir = true; + else if (PredBI->getSuccessor(0) != BB) + BranchDir = false; + else { + DOUT << " In block '" << PredBB->getNameStart() + << "' folding terminator: " << *PredBB->getTerminator(); + ++NumFolds; + ConstantFoldTerminator(PredBB); + return true; + } + + BranchInst *DestBI = cast(BB->getTerminator()); + + // If the dest block has one predecessor, just fix the branch condition to a + // constant and fold it. + if (BB->getSinglePredecessor()) { + DOUT << " In block '" << BB->getNameStart() + << "' folding condition to '" << BranchDir << "': " + << *BB->getTerminator(); + ++NumFolds; + DestBI->setCondition(ConstantInt::get(Type::Int1Ty, BranchDir)); + ConstantFoldTerminator(BB); + return true; + } + + // Otherwise we need to thread from PredBB to DestBB's successor which + // involves code duplication. Check to see if it is worth it. + unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB); + if (JumpThreadCost > Threshold) { + DOUT << " Not threading BB '" << BB->getNameStart() + << "' - Cost is too high: " << JumpThreadCost << "\n"; + return false; + } + + // Next, figure out which successor we are threading to. + BasicBlock *SuccBB = DestBI->getSuccessor(!BranchDir); + + // Ok, try to thread it! + return ThreadEdge(BB, PredBB, SuccBB, JumpThreadCost); +} + +/// ProcessSwitchOnDuplicateCond - We found a block and a predecessor of that +/// block that switch on exactly the same condition. This means that we almost +/// always know the direction of the edge in the DESTBB: +/// PREDBB: +/// switch COND [... DESTBB, BBY ... ] +/// DESTBB: +/// switch COND [... BBZ, BBW ] +/// +/// Optimizing switches like this is very important, because simplifycfg builds +/// switches out of repeated 'if' conditions. +bool JumpThreading::ProcessSwitchOnDuplicateCond(BasicBlock *PredBB, + BasicBlock *DestBB) { + // Can't thread edge to self. + if (PredBB == DestBB) + return false; + + + SwitchInst *PredSI = cast(PredBB->getTerminator()); + SwitchInst *DestSI = cast(DestBB->getTerminator()); + + // There are a variety of optimizations that we can potentially do on these + // blocks: we order them from most to least preferable. + + // If DESTBB *just* contains the switch, then we can forward edges from PREDBB + // directly to their destination. This does not introduce *any* code size + // growth. Skip debug info first. + BasicBlock::iterator BBI = DestBB->begin(); + while (isa(BBI)) + BBI++; + + // FIXME: Thread if it just contains a PHI. + if (isa(BBI)) { + bool MadeChange = false; + // Ignore the default edge for now. + for (unsigned i = 1, e = DestSI->getNumSuccessors(); i != e; ++i) { + ConstantInt *DestVal = DestSI->getCaseValue(i); + BasicBlock *DestSucc = DestSI->getSuccessor(i); + + // Okay, DestSI has a case for 'DestVal' that goes to 'DestSucc'. See if + // PredSI has an explicit case for it. If so, forward. If it is covered + // by the default case, we can't update PredSI. + unsigned PredCase = PredSI->findCaseValue(DestVal); + if (PredCase == 0) continue; + + // If PredSI doesn't go to DestBB on this value, then it won't reach the + // case on this condition. + if (PredSI->getSuccessor(PredCase) != DestBB && + DestSI->getSuccessor(i) != DestBB) + continue; + + // Otherwise, we're safe to make the change. Make sure that the edge from + // DestSI to DestSucc is not critical and has no PHI nodes. + DOUT << "FORWARDING EDGE " << *DestVal << " FROM: " << *PredSI; + DOUT << "THROUGH: " << *DestSI; + + // If the destination has PHI nodes, just split the edge for updating + // simplicity. + if (isa(DestSucc->begin()) && !DestSucc->getSinglePredecessor()){ + SplitCriticalEdge(DestSI, i, this); + DestSucc = DestSI->getSuccessor(i); + } + FoldSingleEntryPHINodes(DestSucc); + PredSI->setSuccessor(PredCase, DestSucc); + MadeChange = true; + } + + if (MadeChange) + return true; + } + + return false; +} + + +/// SimplifyPartiallyRedundantLoad - If LI is an obviously partially redundant +/// load instruction, eliminate it by replacing it with a PHI node. This is an +/// important optimization that encourages jump threading, and needs to be run +/// interlaced with other jump threading tasks. +bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { + // Don't hack volatile loads. + if (LI->isVolatile()) return false; + + // If the load is defined in a block with exactly one predecessor, it can't be + // partially redundant. + BasicBlock *LoadBB = LI->getParent(); + if (LoadBB->getSinglePredecessor()) + return false; + + Value *LoadedPtr = LI->getOperand(0); + + // If the loaded operand is defined in the LoadBB, it can't be available. + // FIXME: Could do PHI translation, that would be fun :) + if (Instruction *PtrOp = dyn_cast(LoadedPtr)) + if (PtrOp->getParent() == LoadBB) + return false; + + // Scan a few instructions up from the load, to see if it is obviously live at + // the entry to its block. + BasicBlock::iterator BBIt = LI; + + if (Value *AvailableVal = FindAvailableLoadedValue(LoadedPtr, LoadBB, + BBIt, 6)) { + // If the value if the load is locally available within the block, just use + // it. This frequently occurs for reg2mem'd allocas. + //cerr << "LOAD ELIMINATED:\n" << *BBIt << *LI << "\n"; + + // If the returned value is the load itself, replace with an undef. This can + // only happen in dead loops. + if (AvailableVal == LI) AvailableVal = UndefValue::get(LI->getType()); + LI->replaceAllUsesWith(AvailableVal); + LI->eraseFromParent(); + return true; + } + + // Otherwise, if we scanned the whole block and got to the top of the block, + // we know the block is locally transparent to the load. If not, something + // might clobber its value. + if (BBIt != LoadBB->begin()) + return false; + + + SmallPtrSet PredsScanned; + typedef SmallVector, 8> AvailablePredsTy; + AvailablePredsTy AvailablePreds; + BasicBlock *OneUnavailablePred = 0; + + // If we got here, the loaded value is transparent through to the start of the + // block. Check to see if it is available in any of the predecessor blocks. + for (pred_iterator PI = pred_begin(LoadBB), PE = pred_end(LoadBB); + PI != PE; ++PI) { + BasicBlock *PredBB = *PI; + + // If we already scanned this predecessor, skip it. + if (!PredsScanned.insert(PredBB)) + continue; + + // Scan the predecessor to see if the value is available in the pred. + BBIt = PredBB->end(); + Value *PredAvailable = FindAvailableLoadedValue(LoadedPtr, PredBB, BBIt, 6); + if (!PredAvailable) { + OneUnavailablePred = PredBB; + continue; + } + + // If so, this load is partially redundant. Remember this info so that we + // can create a PHI node. + AvailablePreds.push_back(std::make_pair(PredBB, PredAvailable)); + } + + // If the loaded value isn't available in any predecessor, it isn't partially + // redundant. + if (AvailablePreds.empty()) return false; + + // Okay, the loaded value is available in at least one (and maybe all!) + // predecessors. If the value is unavailable in more than one unique + // predecessor, we want to insert a merge block for those common predecessors. + // This ensures that we only have to insert one reload, thus not increasing + // code size. + BasicBlock *UnavailablePred = 0; + + // If there is exactly one predecessor where the value is unavailable, the + // already computed 'OneUnavailablePred' block is it. If it ends in an + // unconditional branch, we know that it isn't a critical edge. + if (PredsScanned.size() == AvailablePreds.size()+1 && + OneUnavailablePred->getTerminator()->getNumSuccessors() == 1) { + UnavailablePred = OneUnavailablePred; + } else if (PredsScanned.size() != AvailablePreds.size()) { + // Otherwise, we had multiple unavailable predecessors or we had a critical + // edge from the one. + SmallVector PredsToSplit; + SmallPtrSet AvailablePredSet; + + for (unsigned i = 0, e = AvailablePreds.size(); i != e; ++i) + AvailablePredSet.insert(AvailablePreds[i].first); + + // Add all the unavailable predecessors to the PredsToSplit list. + for (pred_iterator PI = pred_begin(LoadBB), PE = pred_end(LoadBB); + PI != PE; ++PI) + if (!AvailablePredSet.count(*PI)) + PredsToSplit.push_back(*PI); + + // Split them out to their own block. + UnavailablePred = + SplitBlockPredecessors(LoadBB, &PredsToSplit[0], PredsToSplit.size(), + "thread-split", this); + } + + // If the value isn't available in all predecessors, then there will be + // exactly one where it isn't available. Insert a load on that edge and add + // it to the AvailablePreds list. + if (UnavailablePred) { + assert(UnavailablePred->getTerminator()->getNumSuccessors() == 1 && + "Can't handle critical edge here!"); + Value *NewVal = new LoadInst(LoadedPtr, LI->getName()+".pr", + UnavailablePred->getTerminator()); + AvailablePreds.push_back(std::make_pair(UnavailablePred, NewVal)); + } + + // Now we know that each predecessor of this block has a value in + // AvailablePreds, sort them for efficient access as we're walking the preds. + array_pod_sort(AvailablePreds.begin(), AvailablePreds.end()); + + // Create a PHI node at the start of the block for the PRE'd load value. + PHINode *PN = PHINode::Create(LI->getType(), "", LoadBB->begin()); + PN->takeName(LI); + + // Insert new entries into the PHI for each predecessor. A single block may + // have multiple entries here. + for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB); PI != E; + ++PI) { + AvailablePredsTy::iterator I = + std::lower_bound(AvailablePreds.begin(), AvailablePreds.end(), + std::make_pair(*PI, (Value*)0)); + + assert(I != AvailablePreds.end() && I->first == *PI && + "Didn't find entry for predecessor!"); + + PN->addIncoming(I->second, I->first); + } + + //cerr << "PRE: " << *LI << *PN << "\n"; + + LI->replaceAllUsesWith(PN); + LI->eraseFromParent(); + + return true; +} + + +/// ProcessJumpOnPHI - We have a conditional branch of switch on a PHI node in +/// the current block. See if there are any simplifications we can do based on +/// inputs to the phi node. +/// +bool JumpThreading::ProcessJumpOnPHI(PHINode *PN) { + // See if the phi node has any constant values. If so, we can determine where + // the corresponding predecessor will branch. + ConstantInt *PredCst = 0; + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if ((PredCst = dyn_cast(PN->getIncomingValue(i)))) + break; + + // If no incoming value has a constant, we don't know the destination of any + // predecessors. + if (PredCst == 0) + return false; + + // See if the cost of duplicating this block is low enough. + BasicBlock *BB = PN->getParent(); + unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB); + if (JumpThreadCost > Threshold) { + DOUT << " Not threading BB '" << BB->getNameStart() + << "' - Cost is too high: " << JumpThreadCost << "\n"; + return false; + } + + // If so, we can actually do this threading. Merge any common predecessors + // that will act the same. + BasicBlock *PredBB = FactorCommonPHIPreds(PN, PredCst); + + // Next, figure out which successor we are threading to. + BasicBlock *SuccBB; + if (BranchInst *BI = dyn_cast(BB->getTerminator())) + SuccBB = BI->getSuccessor(PredCst == ConstantInt::getFalse()); + else { + SwitchInst *SI = cast(BB->getTerminator()); + SuccBB = SI->getSuccessor(SI->findCaseValue(PredCst)); + } + + // Ok, try to thread it! + return ThreadEdge(BB, PredBB, SuccBB, JumpThreadCost); +} + +/// ProcessJumpOnLogicalPHI - PN's basic block contains a conditional branch +/// whose condition is an AND/OR where one side is PN. If PN has constant +/// operands that permit us to evaluate the condition for some operand, thread +/// through the block. For example with: +/// br (and X, phi(Y, Z, false)) +/// the predecessor corresponding to the 'false' will always jump to the false +/// destination of the branch. +/// +bool JumpThreading::ProcessBranchOnLogical(Value *V, BasicBlock *BB, + bool isAnd) { + // If this is a binary operator tree of the same AND/OR opcode, check the + // LHS/RHS. + if (BinaryOperator *BO = dyn_cast(V)) + if ((isAnd && BO->getOpcode() == Instruction::And) || + (!isAnd && BO->getOpcode() == Instruction::Or)) { + if (ProcessBranchOnLogical(BO->getOperand(0), BB, isAnd)) + return true; + if (ProcessBranchOnLogical(BO->getOperand(1), BB, isAnd)) + return true; + } + + // If this isn't a PHI node, we can't handle it. + PHINode *PN = dyn_cast(V); + if (!PN || PN->getParent() != BB) return false; + + // We can only do the simplification for phi nodes of 'false' with AND or + // 'true' with OR. See if we have any entries in the phi for this. + unsigned PredNo = ~0U; + ConstantInt *PredCst = ConstantInt::get(Type::Int1Ty, !isAnd); + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + if (PN->getIncomingValue(i) == PredCst) { + PredNo = i; + break; + } + } + + // If no match, bail out. + if (PredNo == ~0U) + return false; + + // See if the cost of duplicating this block is low enough. + unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB); + if (JumpThreadCost > Threshold) { + DOUT << " Not threading BB '" << BB->getNameStart() + << "' - Cost is too high: " << JumpThreadCost << "\n"; + return false; + } + + // If so, we can actually do this threading. Merge any common predecessors + // that will act the same. + BasicBlock *PredBB = FactorCommonPHIPreds(PN, PredCst); + + // Next, figure out which successor we are threading to. If this was an AND, + // the constant must be FALSE, and we must be targeting the 'false' block. + // If this is an OR, the constant must be TRUE, and we must be targeting the + // 'true' block. + BasicBlock *SuccBB = BB->getTerminator()->getSuccessor(isAnd); + + // Ok, try to thread it! + return ThreadEdge(BB, PredBB, SuccBB, JumpThreadCost); +} + +/// ProcessBranchOnCompare - We found a branch on a comparison between a phi +/// node and a constant. If the PHI node contains any constants as inputs, we +/// can fold the compare for that edge and thread through it. +bool JumpThreading::ProcessBranchOnCompare(CmpInst *Cmp, BasicBlock *BB) { + PHINode *PN = cast(Cmp->getOperand(0)); + Constant *RHS = cast(Cmp->getOperand(1)); + + // If the phi isn't in the current block, an incoming edge to this block + // doesn't control the destination. + if (PN->getParent() != BB) + return false; + + // We can do this simplification if any comparisons fold to true or false. + // See if any do. + Constant *PredCst = 0; + bool TrueDirection = false; + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + PredCst = dyn_cast(PN->getIncomingValue(i)); + if (PredCst == 0) continue; + + Constant *Res; + if (ICmpInst *ICI = dyn_cast(Cmp)) + Res = ConstantExpr::getICmp(ICI->getPredicate(), PredCst, RHS); + else + Res = ConstantExpr::getFCmp(cast(Cmp)->getPredicate(), + PredCst, RHS); + // If this folded to a constant expr, we can't do anything. + if (ConstantInt *ResC = dyn_cast(Res)) { + TrueDirection = ResC->getZExtValue(); + break; + } + // If this folded to undef, just go the false way. + if (isa(Res)) { + TrueDirection = false; + break; + } + + // Otherwise, we can't fold this input. + PredCst = 0; + } + + // If no match, bail out. + if (PredCst == 0) + return false; + + // See if the cost of duplicating this block is low enough. + unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB); + if (JumpThreadCost > Threshold) { + DOUT << " Not threading BB '" << BB->getNameStart() + << "' - Cost is too high: " << JumpThreadCost << "\n"; + return false; + } + + // If so, we can actually do this threading. Merge any common predecessors + // that will act the same. + BasicBlock *PredBB = FactorCommonPHIPreds(PN, PredCst); + + // Next, get our successor. + BasicBlock *SuccBB = BB->getTerminator()->getSuccessor(!TrueDirection); + + // Ok, try to thread it! + return ThreadEdge(BB, PredBB, SuccBB, JumpThreadCost); +} + + +/// ThreadEdge - We have decided that it is safe and profitable to thread an +/// edge from PredBB to SuccBB across BB. Transform the IR to reflect this +/// change. +bool JumpThreading::ThreadEdge(BasicBlock *BB, BasicBlock *PredBB, + BasicBlock *SuccBB, unsigned JumpThreadCost) { + + // If threading to the same block as we come from, we would infinite loop. + if (SuccBB == BB) { + DOUT << " Not threading across BB '" << BB->getNameStart() + << "' - would thread to self!\n"; + return false; + } + + // If threading this would thread across a loop header, don't thread the edge. + // See the comments above FindLoopHeaders for justifications and caveats. + if (LoopHeaders.count(BB)) { + DOUT << " Not threading from '" << PredBB->getNameStart() + << "' across loop header BB '" << BB->getNameStart() + << "' to dest BB '" << SuccBB->getNameStart() + << "' - it might create an irreducible loop!\n"; + return false; + } + + // And finally, do it! + DOUT << " Threading edge from '" << PredBB->getNameStart() << "' to '" + << SuccBB->getNameStart() << "' with cost: " << JumpThreadCost + << ", across block:\n " + << *BB << "\n"; + + // Jump Threading can not update SSA properties correctly if the values + // defined in the duplicated block are used outside of the block itself. For + // this reason, we spill all values that are used outside of BB to the stack. + for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) { + if (!I->isUsedOutsideOfBlock(BB)) + continue; + + // We found a use of I outside of BB. Create a new stack slot to + // break this inter-block usage pattern. + DemoteRegToStack(*I); + } + + // We are going to have to map operands from the original BB block to the new + // copy of the block 'NewBB'. If there are PHI nodes in BB, evaluate them to + // account for entry from PredBB. + DenseMap ValueMapping; + + BasicBlock *NewBB = + BasicBlock::Create(BB->getName()+".thread", BB->getParent(), BB); + NewBB->moveAfter(PredBB); + + BasicBlock::iterator BI = BB->begin(); + for (; PHINode *PN = dyn_cast(BI); ++BI) + ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB); + + // Clone the non-phi instructions of BB into NewBB, keeping track of the + // mapping and using it to remap operands in the cloned instructions. + for (; !isa(BI); ++BI) { + Instruction *New = BI->clone(); + New->setName(BI->getNameStart()); + NewBB->getInstList().push_back(New); + ValueMapping[BI] = New; + + // Remap operands to patch up intra-block references. + for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i) + if (Instruction *Inst = dyn_cast(New->getOperand(i))) + if (Value *Remapped = ValueMapping[Inst]) + New->setOperand(i, Remapped); + } + + // We didn't copy the terminator from BB over to NewBB, because there is now + // an unconditional jump to SuccBB. Insert the unconditional jump. + BranchInst::Create(SuccBB, NewBB); + + // Check to see if SuccBB has PHI nodes. If so, we need to add entries to the + // PHI nodes for NewBB now. + for (BasicBlock::iterator PNI = SuccBB->begin(); isa(PNI); ++PNI) { + PHINode *PN = cast(PNI); + // Ok, we have a PHI node. Figure out what the incoming value was for the + // DestBlock. + Value *IV = PN->getIncomingValueForBlock(BB); + + // Remap the value if necessary. + if (Instruction *Inst = dyn_cast(IV)) + if (Value *MappedIV = ValueMapping[Inst]) + IV = MappedIV; + PN->addIncoming(IV, NewBB); + } + + // Ok, NewBB is good to go. Update the terminator of PredBB to jump to + // NewBB instead of BB. This eliminates predecessors from BB, which requires + // us to simplify any PHI nodes in BB. + TerminatorInst *PredTerm = PredBB->getTerminator(); + for (unsigned i = 0, e = PredTerm->getNumSuccessors(); i != e; ++i) + if (PredTerm->getSuccessor(i) == BB) { + BB->removePredecessor(PredBB); + PredTerm->setSuccessor(i, NewBB); + } + + // At this point, the IR is fully up to date and consistent. Do a quick scan + // over the new instructions and zap any that are constants or dead. This + // frequently happens because of phi translation. + BI = NewBB->begin(); + for (BasicBlock::iterator E = NewBB->end(); BI != E; ) { + Instruction *Inst = BI++; + if (Constant *C = ConstantFoldInstruction(Inst, TD)) { + Inst->replaceAllUsesWith(C); + Inst->eraseFromParent(); + continue; + } + + RecursivelyDeleteTriviallyDeadInstructions(Inst); + } + + // Threaded an edge! + ++NumThreads; + return true; +} diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp new file mode 100644 index 000000000000..102146945a0b --- /dev/null +++ b/lib/Transforms/Scalar/LICM.cpp @@ -0,0 +1,885 @@ +//===-- LICM.cpp - Loop Invariant Code Motion Pass ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass performs loop invariant code motion, attempting to remove as much +// code from the body of a loop as possible. It does this by either hoisting +// code into the preheader block, or by sinking code to the exit blocks if it is +// safe. This pass also promotes must-aliased memory locations in the loop to +// live in registers, thus hoisting and sinking "invariant" loads and stores. +// +// This pass uses alias analysis for two purposes: +// +// 1. Moving loop invariant loads and calls out of loops. If we can determine +// that a load or call inside of a loop never aliases anything stored to, +// we can hoist it or sink it like any other instruction. +// 2. Scalar Promotion of Memory - If there is a store instruction inside of +// the loop, we try to move the store to happen AFTER the loop instead of +// inside of the loop. This can only happen if a few conditions are true: +// A. The pointer stored through is loop invariant +// B. There are no stores or loads in the loop which _may_ alias the +// pointer. There are no calls in the loop which mod/ref the pointer. +// If these conditions are true, we can promote the loads and stores in the +// loop of the pointer to use a temporary alloca'd variable. We then use +// the mem2reg functionality to construct the appropriate SSA form for the +// variable. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "licm" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Instructions.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AliasSetTracker.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Transforms/Utils/PromoteMemToReg.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/Statistic.h" +#include +using namespace llvm; + +STATISTIC(NumSunk , "Number of instructions sunk out of loop"); +STATISTIC(NumHoisted , "Number of instructions hoisted out of loop"); +STATISTIC(NumMovedLoads, "Number of load insts hoisted or sunk"); +STATISTIC(NumMovedCalls, "Number of call insts hoisted or sunk"); +STATISTIC(NumPromoted , "Number of memory locations promoted to registers"); + +static cl::opt +DisablePromotion("disable-licm-promotion", cl::Hidden, + cl::desc("Disable memory promotion in LICM pass")); + +// This feature is currently disabled by default because CodeGen is not yet +// capable of rematerializing these constants in PIC mode, so it can lead to +// degraded performance. Compile test/CodeGen/X86/remat-constant.ll with +// -relocation-model=pic to see an example of this. +static cl::opt +EnableLICMConstantMotion("enable-licm-constant-variables", cl::Hidden, + cl::desc("Enable hoisting/sinking of constant " + "global variables")); + +namespace { + struct VISIBILITY_HIDDEN LICM : public LoopPass { + static char ID; // Pass identification, replacement for typeid + LICM() : LoopPass(&ID) {} + + virtual bool runOnLoop(Loop *L, LPPassManager &LPM); + + /// This transformation requires natural loop information & requires that + /// loop preheaders be inserted into the CFG... + /// + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequiredID(LoopSimplifyID); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); // For scalar promotion (mem2reg) + AU.addRequired(); + AU.addPreserved(); + AU.addPreserved(); + } + + bool doFinalization() { + // Free the values stored in the map + for (std::map::iterator + I = LoopToAliasMap.begin(), E = LoopToAliasMap.end(); I != E; ++I) + delete I->second; + + LoopToAliasMap.clear(); + return false; + } + + private: + // Various analyses that we use... + AliasAnalysis *AA; // Current AliasAnalysis information + LoopInfo *LI; // Current LoopInfo + DominatorTree *DT; // Dominator Tree for the current Loop... + DominanceFrontier *DF; // Current Dominance Frontier + + // State that is updated as we process loops + bool Changed; // Set to true when we change anything. + BasicBlock *Preheader; // The preheader block of the current loop... + Loop *CurLoop; // The current loop we are working on... + AliasSetTracker *CurAST; // AliasSet information for the current loop... + std::map LoopToAliasMap; + + /// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info. + void cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To, Loop *L); + + /// deleteAnalysisValue - Simple Analysis hook. Delete value V from alias + /// set. + void deleteAnalysisValue(Value *V, Loop *L); + + /// SinkRegion - Walk the specified region of the CFG (defined by all blocks + /// dominated by the specified block, and that are in the current loop) in + /// reverse depth first order w.r.t the DominatorTree. This allows us to + /// visit uses before definitions, allowing us to sink a loop body in one + /// pass without iteration. + /// + void SinkRegion(DomTreeNode *N); + + /// HoistRegion - Walk the specified region of the CFG (defined by all + /// blocks dominated by the specified block, and that are in the current + /// loop) in depth first order w.r.t the DominatorTree. This allows us to + /// visit definitions before uses, allowing us to hoist a loop body in one + /// pass without iteration. + /// + void HoistRegion(DomTreeNode *N); + + /// inSubLoop - Little predicate that returns true if the specified basic + /// block is in a subloop of the current one, not the current one itself. + /// + bool inSubLoop(BasicBlock *BB) { + assert(CurLoop->contains(BB) && "Only valid if BB is IN the loop"); + for (Loop::iterator I = CurLoop->begin(), E = CurLoop->end(); I != E; ++I) + if ((*I)->contains(BB)) + return true; // A subloop actually contains this block! + return false; + } + + /// isExitBlockDominatedByBlockInLoop - This method checks to see if the + /// specified exit block of the loop is dominated by the specified block + /// that is in the body of the loop. We use these constraints to + /// dramatically limit the amount of the dominator tree that needs to be + /// searched. + bool isExitBlockDominatedByBlockInLoop(BasicBlock *ExitBlock, + BasicBlock *BlockInLoop) const { + // If the block in the loop is the loop header, it must be dominated! + BasicBlock *LoopHeader = CurLoop->getHeader(); + if (BlockInLoop == LoopHeader) + return true; + + DomTreeNode *BlockInLoopNode = DT->getNode(BlockInLoop); + DomTreeNode *IDom = DT->getNode(ExitBlock); + + // Because the exit block is not in the loop, we know we have to get _at + // least_ its immediate dominator. + do { + // Get next Immediate Dominator. + IDom = IDom->getIDom(); + + // If we have got to the header of the loop, then the instructions block + // did not dominate the exit node, so we can't hoist it. + if (IDom->getBlock() == LoopHeader) + return false; + + } while (IDom != BlockInLoopNode); + + return true; + } + + /// sink - When an instruction is found to only be used outside of the loop, + /// this function moves it to the exit blocks and patches up SSA form as + /// needed. + /// + void sink(Instruction &I); + + /// hoist - When an instruction is found to only use loop invariant operands + /// that is safe to hoist, this instruction is called to do the dirty work. + /// + void hoist(Instruction &I); + + /// isSafeToExecuteUnconditionally - Only sink or hoist an instruction if it + /// is not a trapping instruction or if it is a trapping instruction and is + /// guaranteed to execute. + /// + bool isSafeToExecuteUnconditionally(Instruction &I); + + /// pointerInvalidatedByLoop - Return true if the body of this loop may + /// store into the memory location pointed to by V. + /// + bool pointerInvalidatedByLoop(Value *V, unsigned Size) { + // Check to see if any of the basic blocks in CurLoop invalidate *V. + return CurAST->getAliasSetForPointer(V, Size).isMod(); + } + + bool canSinkOrHoistInst(Instruction &I); + bool isLoopInvariantInst(Instruction &I); + bool isNotUsedInLoop(Instruction &I); + + /// PromoteValuesInLoop - Look at the stores in the loop and promote as many + /// to scalars as we can. + /// + void PromoteValuesInLoop(); + + /// FindPromotableValuesInLoop - Check the current loop for stores to + /// definite pointers, which are not loaded and stored through may aliases. + /// If these are found, create an alloca for the value, add it to the + /// PromotedValues list, and keep track of the mapping from value to + /// alloca... + /// + void FindPromotableValuesInLoop( + std::vector > &PromotedValues, + std::map &Val2AlMap); + }; +} + +char LICM::ID = 0; +static RegisterPass X("licm", "Loop Invariant Code Motion"); + +Pass *llvm::createLICMPass() { return new LICM(); } + +/// Hoist expressions out of the specified loop. Note, alias info for inner +/// loop is not preserved so it is not a good idea to run LICM multiple +/// times on one loop. +/// +bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) { + Changed = false; + + // Get our Loop and Alias Analysis information... + LI = &getAnalysis(); + AA = &getAnalysis(); + DF = &getAnalysis(); + DT = &getAnalysis(); + + CurAST = new AliasSetTracker(*AA); + // Collect Alias info from subloops + for (Loop::iterator LoopItr = L->begin(), LoopItrE = L->end(); + LoopItr != LoopItrE; ++LoopItr) { + Loop *InnerL = *LoopItr; + AliasSetTracker *InnerAST = LoopToAliasMap[InnerL]; + assert (InnerAST && "Where is my AST?"); + + // What if InnerLoop was modified by other passes ? + CurAST->add(*InnerAST); + } + + CurLoop = L; + + // Get the preheader block to move instructions into... + Preheader = L->getLoopPreheader(); + assert(Preheader&&"Preheader insertion pass guarantees we have a preheader!"); + + // Loop over the body of this loop, looking for calls, invokes, and stores. + // Because subloops have already been incorporated into AST, we skip blocks in + // subloops. + // + for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); + I != E; ++I) { + BasicBlock *BB = *I; + if (LI->getLoopFor(BB) == L) // Ignore blocks in subloops... + CurAST->add(*BB); // Incorporate the specified basic block + } + + // We want to visit all of the instructions in this loop... that are not parts + // of our subloops (they have already had their invariants hoisted out of + // their loop, into this loop, so there is no need to process the BODIES of + // the subloops). + // + // Traverse the body of the loop in depth first order on the dominator tree so + // that we are guaranteed to see definitions before we see uses. This allows + // us to sink instructions in one pass, without iteration. After sinking + // instructions, we perform another pass to hoist them out of the loop. + // + SinkRegion(DT->getNode(L->getHeader())); + HoistRegion(DT->getNode(L->getHeader())); + + // Now that all loop invariants have been removed from the loop, promote any + // memory references to scalars that we can... + if (!DisablePromotion) + PromoteValuesInLoop(); + + // Clear out loops state information for the next iteration + CurLoop = 0; + Preheader = 0; + + LoopToAliasMap[L] = CurAST; + return Changed; +} + +/// SinkRegion - Walk the specified region of the CFG (defined by all blocks +/// dominated by the specified block, and that are in the current loop) in +/// reverse depth first order w.r.t the DominatorTree. This allows us to visit +/// uses before definitions, allowing us to sink a loop body in one pass without +/// iteration. +/// +void LICM::SinkRegion(DomTreeNode *N) { + assert(N != 0 && "Null dominator tree node?"); + BasicBlock *BB = N->getBlock(); + + // If this subregion is not in the top level loop at all, exit. + if (!CurLoop->contains(BB)) return; + + // We are processing blocks in reverse dfo, so process children first... + const std::vector &Children = N->getChildren(); + for (unsigned i = 0, e = Children.size(); i != e; ++i) + SinkRegion(Children[i]); + + // Only need to process the contents of this block if it is not part of a + // subloop (which would already have been processed). + if (inSubLoop(BB)) return; + + for (BasicBlock::iterator II = BB->end(); II != BB->begin(); ) { + Instruction &I = *--II; + + // Check to see if we can sink this instruction to the exit blocks + // of the loop. We can do this if the all users of the instruction are + // outside of the loop. In this case, it doesn't even matter if the + // operands of the instruction are loop invariant. + // + if (isNotUsedInLoop(I) && canSinkOrHoistInst(I)) { + ++II; + sink(I); + } + } +} + + +/// HoistRegion - Walk the specified region of the CFG (defined by all blocks +/// dominated by the specified block, and that are in the current loop) in depth +/// first order w.r.t the DominatorTree. This allows us to visit definitions +/// before uses, allowing us to hoist a loop body in one pass without iteration. +/// +void LICM::HoistRegion(DomTreeNode *N) { + assert(N != 0 && "Null dominator tree node?"); + BasicBlock *BB = N->getBlock(); + + // If this subregion is not in the top level loop at all, exit. + if (!CurLoop->contains(BB)) return; + + // Only need to process the contents of this block if it is not part of a + // subloop (which would already have been processed). + if (!inSubLoop(BB)) + for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ) { + Instruction &I = *II++; + + // Try hoisting the instruction out to the preheader. We can only do this + // if all of the operands of the instruction are loop invariant and if it + // is safe to hoist the instruction. + // + if (isLoopInvariantInst(I) && canSinkOrHoistInst(I) && + isSafeToExecuteUnconditionally(I)) + hoist(I); + } + + const std::vector &Children = N->getChildren(); + for (unsigned i = 0, e = Children.size(); i != e; ++i) + HoistRegion(Children[i]); +} + +/// canSinkOrHoistInst - Return true if the hoister and sinker can handle this +/// instruction. +/// +bool LICM::canSinkOrHoistInst(Instruction &I) { + // Loads have extra constraints we have to verify before we can hoist them. + if (LoadInst *LI = dyn_cast(&I)) { + if (LI->isVolatile()) + return false; // Don't hoist volatile loads! + + // Loads from constant memory are always safe to move, even if they end up + // in the same alias set as something that ends up being modified. + if (EnableLICMConstantMotion && + AA->pointsToConstantMemory(LI->getOperand(0))) + return true; + + // Don't hoist loads which have may-aliased stores in loop. + unsigned Size = 0; + if (LI->getType()->isSized()) + Size = AA->getTargetData().getTypeStoreSize(LI->getType()); + return !pointerInvalidatedByLoop(LI->getOperand(0), Size); + } else if (CallInst *CI = dyn_cast(&I)) { + // Handle obvious cases efficiently. + AliasAnalysis::ModRefBehavior Behavior = AA->getModRefBehavior(CI); + if (Behavior == AliasAnalysis::DoesNotAccessMemory) + return true; + else if (Behavior == AliasAnalysis::OnlyReadsMemory) { + // If this call only reads from memory and there are no writes to memory + // in the loop, we can hoist or sink the call as appropriate. + bool FoundMod = false; + for (AliasSetTracker::iterator I = CurAST->begin(), E = CurAST->end(); + I != E; ++I) { + AliasSet &AS = *I; + if (!AS.isForwardingAliasSet() && AS.isMod()) { + FoundMod = true; + break; + } + } + if (!FoundMod) return true; + } + + // FIXME: This should use mod/ref information to see if we can hoist or sink + // the call. + + return false; + } + + // Otherwise these instructions are hoistable/sinkable + return isa(I) || isa(I) || + isa(I) || isa(I) || isa(I) || + isa(I) || isa(I) || + isa(I); +} + +/// isNotUsedInLoop - Return true if the only users of this instruction are +/// outside of the loop. If this is true, we can sink the instruction to the +/// exit blocks of the loop. +/// +bool LICM::isNotUsedInLoop(Instruction &I) { + for (Value::use_iterator UI = I.use_begin(), E = I.use_end(); UI != E; ++UI) { + Instruction *User = cast(*UI); + if (PHINode *PN = dyn_cast(User)) { + // PHI node uses occur in predecessor blocks! + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (PN->getIncomingValue(i) == &I) + if (CurLoop->contains(PN->getIncomingBlock(i))) + return false; + } else if (CurLoop->contains(User->getParent())) { + return false; + } + } + return true; +} + + +/// isLoopInvariantInst - Return true if all operands of this instruction are +/// loop invariant. We also filter out non-hoistable instructions here just for +/// efficiency. +/// +bool LICM::isLoopInvariantInst(Instruction &I) { + // The instruction is loop invariant if all of its operands are loop-invariant + for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) + if (!CurLoop->isLoopInvariant(I.getOperand(i))) + return false; + + // If we got this far, the instruction is loop invariant! + return true; +} + +/// sink - When an instruction is found to only be used outside of the loop, +/// this function moves it to the exit blocks and patches up SSA form as needed. +/// This method is guaranteed to remove the original instruction from its +/// position, and may either delete it or move it to outside of the loop. +/// +void LICM::sink(Instruction &I) { + DOUT << "LICM sinking instruction: " << I; + + SmallVector ExitBlocks; + CurLoop->getExitBlocks(ExitBlocks); + + if (isa(I)) ++NumMovedLoads; + else if (isa(I)) ++NumMovedCalls; + ++NumSunk; + Changed = true; + + // The case where there is only a single exit node of this loop is common + // enough that we handle it as a special (more efficient) case. It is more + // efficient to handle because there are no PHI nodes that need to be placed. + if (ExitBlocks.size() == 1) { + if (!isExitBlockDominatedByBlockInLoop(ExitBlocks[0], I.getParent())) { + // Instruction is not used, just delete it. + CurAST->deleteValue(&I); + if (!I.use_empty()) // If I has users in unreachable blocks, eliminate. + I.replaceAllUsesWith(UndefValue::get(I.getType())); + I.eraseFromParent(); + } else { + // Move the instruction to the start of the exit block, after any PHI + // nodes in it. + I.removeFromParent(); + + BasicBlock::iterator InsertPt = ExitBlocks[0]->getFirstNonPHI(); + ExitBlocks[0]->getInstList().insert(InsertPt, &I); + } + } else if (ExitBlocks.empty()) { + // The instruction is actually dead if there ARE NO exit blocks. + CurAST->deleteValue(&I); + if (!I.use_empty()) // If I has users in unreachable blocks, eliminate. + I.replaceAllUsesWith(UndefValue::get(I.getType())); + I.eraseFromParent(); + } else { + // Otherwise, if we have multiple exits, use the PromoteMem2Reg function to + // do all of the hard work of inserting PHI nodes as necessary. We convert + // the value into a stack object to get it to do this. + + // Firstly, we create a stack object to hold the value... + AllocaInst *AI = 0; + + if (I.getType() != Type::VoidTy) { + AI = new AllocaInst(I.getType(), 0, I.getName(), + I.getParent()->getParent()->getEntryBlock().begin()); + CurAST->add(AI); + } + + // Secondly, insert load instructions for each use of the instruction + // outside of the loop. + while (!I.use_empty()) { + Instruction *U = cast(I.use_back()); + + // If the user is a PHI Node, we actually have to insert load instructions + // in all predecessor blocks, not in the PHI block itself! + if (PHINode *UPN = dyn_cast(U)) { + // Only insert into each predecessor once, so that we don't have + // different incoming values from the same block! + std::map InsertedBlocks; + for (unsigned i = 0, e = UPN->getNumIncomingValues(); i != e; ++i) + if (UPN->getIncomingValue(i) == &I) { + BasicBlock *Pred = UPN->getIncomingBlock(i); + Value *&PredVal = InsertedBlocks[Pred]; + if (!PredVal) { + // Insert a new load instruction right before the terminator in + // the predecessor block. + PredVal = new LoadInst(AI, "", Pred->getTerminator()); + CurAST->add(cast(PredVal)); + } + + UPN->setIncomingValue(i, PredVal); + } + + } else { + LoadInst *L = new LoadInst(AI, "", U); + U->replaceUsesOfWith(&I, L); + CurAST->add(L); + } + } + + // Thirdly, insert a copy of the instruction in each exit block of the loop + // that is dominated by the instruction, storing the result into the memory + // location. Be careful not to insert the instruction into any particular + // basic block more than once. + std::set InsertedBlocks; + BasicBlock *InstOrigBB = I.getParent(); + + for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { + BasicBlock *ExitBlock = ExitBlocks[i]; + + if (isExitBlockDominatedByBlockInLoop(ExitBlock, InstOrigBB)) { + // If we haven't already processed this exit block, do so now. + if (InsertedBlocks.insert(ExitBlock).second) { + // Insert the code after the last PHI node... + BasicBlock::iterator InsertPt = ExitBlock->getFirstNonPHI(); + + // If this is the first exit block processed, just move the original + // instruction, otherwise clone the original instruction and insert + // the copy. + Instruction *New; + if (InsertedBlocks.size() == 1) { + I.removeFromParent(); + ExitBlock->getInstList().insert(InsertPt, &I); + New = &I; + } else { + New = I.clone(); + CurAST->copyValue(&I, New); + if (!I.getName().empty()) + New->setName(I.getName()+".le"); + ExitBlock->getInstList().insert(InsertPt, New); + } + + // Now that we have inserted the instruction, store it into the alloca + if (AI) new StoreInst(New, AI, InsertPt); + } + } + } + + // If the instruction doesn't dominate any exit blocks, it must be dead. + if (InsertedBlocks.empty()) { + CurAST->deleteValue(&I); + I.eraseFromParent(); + } + + // Finally, promote the fine value to SSA form. + if (AI) { + std::vector Allocas; + Allocas.push_back(AI); + PromoteMemToReg(Allocas, *DT, *DF, CurAST); + } + } +} + +/// hoist - When an instruction is found to only use loop invariant operands +/// that is safe to hoist, this instruction is called to do the dirty work. +/// +void LICM::hoist(Instruction &I) { + DOUT << "LICM hoisting to " << Preheader->getName() << ": " << I; + + // Remove the instruction from its current basic block... but don't delete the + // instruction. + I.removeFromParent(); + + // Insert the new node in Preheader, before the terminator. + Preheader->getInstList().insert(Preheader->getTerminator(), &I); + + if (isa(I)) ++NumMovedLoads; + else if (isa(I)) ++NumMovedCalls; + ++NumHoisted; + Changed = true; +} + +/// isSafeToExecuteUnconditionally - Only sink or hoist an instruction if it is +/// not a trapping instruction or if it is a trapping instruction and is +/// guaranteed to execute. +/// +bool LICM::isSafeToExecuteUnconditionally(Instruction &Inst) { + // If it is not a trapping instruction, it is always safe to hoist. + if (!Inst.isTrapping()) return true; + + // Otherwise we have to check to make sure that the instruction dominates all + // of the exit blocks. If it doesn't, then there is a path out of the loop + // which does not execute this instruction, so we can't hoist it. + + // If the instruction is in the header block for the loop (which is very + // common), it is always guaranteed to dominate the exit blocks. Since this + // is a common case, and can save some work, check it now. + if (Inst.getParent() == CurLoop->getHeader()) + return true; + + // It's always safe to load from a global or alloca. + if (isa(Inst)) + if (isa(Inst.getOperand(0)) || + isa(Inst.getOperand(0))) + return true; + + // Get the exit blocks for the current loop. + SmallVector ExitBlocks; + CurLoop->getExitBlocks(ExitBlocks); + + // For each exit block, get the DT node and walk up the DT until the + // instruction's basic block is found or we exit the loop. + for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) + if (!isExitBlockDominatedByBlockInLoop(ExitBlocks[i], Inst.getParent())) + return false; + + return true; +} + + +/// PromoteValuesInLoop - Try to promote memory values to scalars by sinking +/// stores out of the loop and moving loads to before the loop. We do this by +/// looping over the stores in the loop, looking for stores to Must pointers +/// which are loop invariant. We promote these memory locations to use allocas +/// instead. These allocas can easily be raised to register values by the +/// PromoteMem2Reg functionality. +/// +void LICM::PromoteValuesInLoop() { + // PromotedValues - List of values that are promoted out of the loop. Each + // value has an alloca instruction for it, and a canonical version of the + // pointer. + std::vector > PromotedValues; + std::map ValueToAllocaMap; // Map of ptr to alloca + + FindPromotableValuesInLoop(PromotedValues, ValueToAllocaMap); + if (ValueToAllocaMap.empty()) return; // If there are values to promote. + + Changed = true; + NumPromoted += PromotedValues.size(); + + std::vector PointerValueNumbers; + + // Emit a copy from the value into the alloca'd value in the loop preheader + TerminatorInst *LoopPredInst = Preheader->getTerminator(); + for (unsigned i = 0, e = PromotedValues.size(); i != e; ++i) { + Value *Ptr = PromotedValues[i].second; + + // If we are promoting a pointer value, update alias information for the + // inserted load. + Value *LoadValue = 0; + if (isa(cast(Ptr->getType())->getElementType())) { + // Locate a load or store through the pointer, and assign the same value + // to LI as we are loading or storing. Since we know that the value is + // stored in this loop, this will always succeed. + for (Value::use_iterator UI = Ptr->use_begin(), E = Ptr->use_end(); + UI != E; ++UI) + if (LoadInst *LI = dyn_cast(*UI)) { + LoadValue = LI; + break; + } else if (StoreInst *SI = dyn_cast(*UI)) { + if (SI->getOperand(1) == Ptr) { + LoadValue = SI->getOperand(0); + break; + } + } + assert(LoadValue && "No store through the pointer found!"); + PointerValueNumbers.push_back(LoadValue); // Remember this for later. + } + + // Load from the memory we are promoting. + LoadInst *LI = new LoadInst(Ptr, Ptr->getName()+".promoted", LoopPredInst); + + if (LoadValue) CurAST->copyValue(LoadValue, LI); + + // Store into the temporary alloca. + new StoreInst(LI, PromotedValues[i].first, LoopPredInst); + } + + // Scan the basic blocks in the loop, replacing uses of our pointers with + // uses of the allocas in question. + // + for (Loop::block_iterator I = CurLoop->block_begin(), + E = CurLoop->block_end(); I != E; ++I) { + BasicBlock *BB = *I; + // Rewrite all loads and stores in the block of the pointer... + for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) { + if (LoadInst *L = dyn_cast(II)) { + std::map::iterator + I = ValueToAllocaMap.find(L->getOperand(0)); + if (I != ValueToAllocaMap.end()) + L->setOperand(0, I->second); // Rewrite load instruction... + } else if (StoreInst *S = dyn_cast(II)) { + std::map::iterator + I = ValueToAllocaMap.find(S->getOperand(1)); + if (I != ValueToAllocaMap.end()) + S->setOperand(1, I->second); // Rewrite store instruction... + } + } + } + + // Now that the body of the loop uses the allocas instead of the original + // memory locations, insert code to copy the alloca value back into the + // original memory location on all exits from the loop. Note that we only + // want to insert one copy of the code in each exit block, though the loop may + // exit to the same block more than once. + // + SmallPtrSet ProcessedBlocks; + + SmallVector ExitBlocks; + CurLoop->getExitBlocks(ExitBlocks); + for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { + if (!ProcessedBlocks.insert(ExitBlocks[i])) + continue; + + // Copy all of the allocas into their memory locations. + BasicBlock::iterator BI = ExitBlocks[i]->getFirstNonPHI(); + Instruction *InsertPos = BI; + unsigned PVN = 0; + for (unsigned i = 0, e = PromotedValues.size(); i != e; ++i) { + // Load from the alloca. + LoadInst *LI = new LoadInst(PromotedValues[i].first, "", InsertPos); + + // If this is a pointer type, update alias info appropriately. + if (isa(LI->getType())) + CurAST->copyValue(PointerValueNumbers[PVN++], LI); + + // Store into the memory we promoted. + new StoreInst(LI, PromotedValues[i].second, InsertPos); + } + } + + // Now that we have done the deed, use the mem2reg functionality to promote + // all of the new allocas we just created into real SSA registers. + // + std::vector PromotedAllocas; + PromotedAllocas.reserve(PromotedValues.size()); + for (unsigned i = 0, e = PromotedValues.size(); i != e; ++i) + PromotedAllocas.push_back(PromotedValues[i].first); + PromoteMemToReg(PromotedAllocas, *DT, *DF, CurAST); +} + +/// FindPromotableValuesInLoop - Check the current loop for stores to definite +/// pointers, which are not loaded and stored through may aliases and are safe +/// for promotion. If these are found, create an alloca for the value, add it +/// to the PromotedValues list, and keep track of the mapping from value to +/// alloca. +void LICM::FindPromotableValuesInLoop( + std::vector > &PromotedValues, + std::map &ValueToAllocaMap) { + Instruction *FnStart = CurLoop->getHeader()->getParent()->begin()->begin(); + + // Loop over all of the alias sets in the tracker object. + for (AliasSetTracker::iterator I = CurAST->begin(), E = CurAST->end(); + I != E; ++I) { + AliasSet &AS = *I; + // We can promote this alias set if it has a store, if it is a "Must" alias + // set, if the pointer is loop invariant, and if we are not eliminating any + // volatile loads or stores. + if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() || + AS.isVolatile() || !CurLoop->isLoopInvariant(AS.begin()->getValue())) + continue; + + assert(!AS.empty() && + "Must alias set should have at least one pointer element in it!"); + Value *V = AS.begin()->getValue(); + + // Check that all of the pointers in the alias set have the same type. We + // cannot (yet) promote a memory location that is loaded and stored in + // different sizes. + { + bool PointerOk = true; + for (AliasSet::iterator I = AS.begin(), E = AS.end(); I != E; ++I) + if (V->getType() != I->getValue()->getType()) { + PointerOk = false; + break; + } + if (!PointerOk) + continue; + } + + // It isn't safe to promote a load/store from the loop if the load/store is + // conditional. For example, turning: + // + // for () { if (c) *P += 1; } + // + // into: + // + // tmp = *P; for () { if (c) tmp +=1; } *P = tmp; + // + // is not safe, because *P may only be valid to access if 'c' is true. + // + // It is safe to promote P if all uses are direct load/stores and if at + // least one is guaranteed to be executed. + bool GuaranteedToExecute = false; + bool InvalidInst = false; + for (Value::use_iterator UI = V->use_begin(), UE = V->use_end(); + UI != UE; ++UI) { + // Ignore instructions not in this loop. + Instruction *Use = dyn_cast(*UI); + if (!Use || !CurLoop->contains(Use->getParent())) + continue; + + if (!isa(Use) && !isa(Use)) { + InvalidInst = true; + break; + } + + if (!GuaranteedToExecute) + GuaranteedToExecute = isSafeToExecuteUnconditionally(*Use); + } + + // If there is an non-load/store instruction in the loop, we can't promote + // it. If there isn't a guaranteed-to-execute instruction, we can't + // promote. + if (InvalidInst || !GuaranteedToExecute) + continue; + + const Type *Ty = cast(V->getType())->getElementType(); + AllocaInst *AI = new AllocaInst(Ty, 0, V->getName()+".tmp", FnStart); + PromotedValues.push_back(std::make_pair(AI, V)); + + // Update the AST and alias analysis. + CurAST->copyValue(V, AI); + + for (AliasSet::iterator I = AS.begin(), E = AS.end(); I != E; ++I) + ValueToAllocaMap.insert(std::make_pair(I->getValue(), AI)); + + DOUT << "LICM: Promoting value: " << *V << "\n"; + } +} + +/// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info. +void LICM::cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To, Loop *L) { + AliasSetTracker *AST = LoopToAliasMap[L]; + if (!AST) + return; + + AST->copyValue(From, To); +} + +/// deleteAnalysisValue - Simple Analysis hook. Delete value V from alias +/// set. +void LICM::deleteAnalysisValue(Value *V, Loop *L) { + AliasSetTracker *AST = LoopToAliasMap[L]; + if (!AST) + return; + + AST->deleteValue(V); +} diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp new file mode 100644 index 000000000000..65126728c7fc --- /dev/null +++ b/lib/Transforms/Scalar/LoopDeletion.cpp @@ -0,0 +1,280 @@ +//===- LoopDeletion.cpp - Dead Loop Deletion Pass ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Dead Loop Deletion Pass. This pass is responsible +// for eliminating loops with non-infinite computable trip counts that have no +// side effects or volatile instructions, and do not contribute to the +// computation of the function's return value. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "loop-delete" + +#include "llvm/Transforms/Scalar.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/SmallVector.h" + +using namespace llvm; + +STATISTIC(NumDeleted, "Number of loops deleted"); + +namespace { + class VISIBILITY_HIDDEN LoopDeletion : public LoopPass { + public: + static char ID; // Pass ID, replacement for typeid + LoopDeletion() : LoopPass(&ID) {} + + // Possibly eliminate loop L if it is dead. + bool runOnLoop(Loop* L, LPPassManager& LPM); + + bool SingleDominatingExit(Loop* L, + SmallVector& exitingBlocks); + bool IsLoopDead(Loop* L, SmallVector& exitingBlocks, + SmallVector& exitBlocks); + bool IsLoopInvariantInst(Instruction *I, Loop* L); + + virtual void getAnalysisUsage(AnalysisUsage& AU) const { + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addRequiredID(LoopSimplifyID); + AU.addRequiredID(LCSSAID); + + AU.addPreserved(); + AU.addPreserved(); + AU.addPreserved(); + AU.addPreservedID(LoopSimplifyID); + AU.addPreservedID(LCSSAID); + AU.addPreserved(); + } + }; +} + +char LoopDeletion::ID = 0; +static RegisterPass X("loop-deletion", "Delete dead loops"); + +Pass* llvm::createLoopDeletionPass() { + return new LoopDeletion(); +} + +/// SingleDominatingExit - Checks that there is only a single blocks that +/// branches out of the loop, and that it also g the latch block. Loops +/// with multiple or non-latch-dominating exiting blocks could be dead, but we'd +/// have to do more extensive analysis to make sure, for instance, that the +/// control flow logic involved was or could be made loop-invariant. +bool LoopDeletion::SingleDominatingExit(Loop* L, + SmallVector& exitingBlocks) { + + if (exitingBlocks.size() != 1) + return false; + + BasicBlock* latch = L->getLoopLatch(); + if (!latch) + return false; + + DominatorTree& DT = getAnalysis(); + return DT.dominates(exitingBlocks[0], latch); +} + +/// IsLoopInvariantInst - Checks if an instruction is invariant with respect to +/// a loop, which is defined as being true if all of its operands are defined +/// outside of the loop. These instructions can be hoisted out of the loop +/// if their results are needed. This could be made more aggressive by +/// recursively checking the operands for invariance, but it's not clear that +/// it's worth it. +bool LoopDeletion::IsLoopInvariantInst(Instruction *I, Loop* L) { + // PHI nodes are not loop invariant if defined in the loop. + if (isa(I) && L->contains(I->getParent())) + return false; + + // The instruction is loop invariant if all of its operands are loop-invariant + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) + if (!L->isLoopInvariant(I->getOperand(i))) + return false; + + // If we got this far, the instruction is loop invariant! + return true; +} + +/// IsLoopDead - Determined if a loop is dead. This assumes that we've already +/// checked for unique exit and exiting blocks, and that the code is in LCSSA +/// form. +bool LoopDeletion::IsLoopDead(Loop* L, + SmallVector& exitingBlocks, + SmallVector& exitBlocks) { + BasicBlock* exitingBlock = exitingBlocks[0]; + BasicBlock* exitBlock = exitBlocks[0]; + + // Make sure that all PHI entries coming from the loop are loop invariant. + // Because the code is in LCSSA form, any values used outside of the loop + // must pass through a PHI in the exit block, meaning that this check is + // sufficient to guarantee that no loop-variant values are used outside + // of the loop. + BasicBlock::iterator BI = exitBlock->begin(); + while (PHINode* P = dyn_cast(BI)) { + Value* incoming = P->getIncomingValueForBlock(exitingBlock); + if (Instruction* I = dyn_cast(incoming)) + if (!IsLoopInvariantInst(I, L)) + return false; + + BI++; + } + + // Make sure that no instructions in the block have potential side-effects. + // This includes instructions that could write to memory, and loads that are + // marked volatile. This could be made more aggressive by using aliasing + // information to identify readonly and readnone calls. + for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end(); + LI != LE; ++LI) { + for (BasicBlock::iterator BI = (*LI)->begin(), BE = (*LI)->end(); + BI != BE; ++BI) { + if (BI->mayHaveSideEffects()) + return false; + } + } + + return true; +} + +/// runOnLoop - Remove dead loops, by which we mean loops that do not impact the +/// observable behavior of the program other than finite running time. Note +/// we do ensure that this never remove a loop that might be infinite, as doing +/// so could change the halting/non-halting nature of a program. +/// NOTE: This entire process relies pretty heavily on LoopSimplify and LCSSA +/// in order to make various safety checks work. +bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) { + // We can only remove the loop if there is a preheader that we can + // branch from after removing it. + BasicBlock* preheader = L->getLoopPreheader(); + if (!preheader) + return false; + + // We can't remove loops that contain subloops. If the subloops were dead, + // they would already have been removed in earlier executions of this pass. + if (L->begin() != L->end()) + return false; + + SmallVector exitingBlocks; + L->getExitingBlocks(exitingBlocks); + + SmallVector exitBlocks; + L->getUniqueExitBlocks(exitBlocks); + + // We require that the loop only have a single exit block. Otherwise, we'd + // be in the situation of needing to be able to solve statically which exit + // block will be branched to, or trying to preserve the branching logic in + // a loop invariant manner. + if (exitBlocks.size() != 1) + return false; + + // Loops with multiple exits or exits that don't dominate the latch + // are too complicated to handle correctly. + if (!SingleDominatingExit(L, exitingBlocks)) + return false; + + // Finally, we have to check that the loop really is dead. + if (!IsLoopDead(L, exitingBlocks, exitBlocks)) + return false; + + // Don't remove loops for which we can't solve the trip count. + // They could be infinite, in which case we'd be changing program behavior. + ScalarEvolution& SE = getAnalysis(); + SCEVHandle S = SE.getBackedgeTakenCount(L); + if (isa(S)) + return false; + + // Now that we know the removal is safe, remove the loop by changing the + // branch from the preheader to go to the single exit block. + BasicBlock* exitBlock = exitBlocks[0]; + BasicBlock* exitingBlock = exitingBlocks[0]; + + // Because we're deleting a large chunk of code at once, the sequence in which + // we remove things is very important to avoid invalidation issues. Don't + // mess with this unless you have good reason and know what you're doing. + + // Move simple loop-invariant expressions out of the loop, since they + // might be needed by the exit phis. + for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end(); + LI != LE; ++LI) + for (BasicBlock::iterator BI = (*LI)->begin(), BE = (*LI)->end(); + BI != BE; ) { + Instruction* I = BI++; + if (!I->use_empty() && IsLoopInvariantInst(I, L)) + I->moveBefore(preheader->getTerminator()); + } + + // Connect the preheader directly to the exit block. + TerminatorInst* TI = preheader->getTerminator(); + TI->replaceUsesOfWith(L->getHeader(), exitBlock); + + // Rewrite phis in the exit block to get their inputs from + // the preheader instead of the exiting block. + BasicBlock::iterator BI = exitBlock->begin(); + while (PHINode* P = dyn_cast(BI)) { + P->replaceUsesOfWith(exitingBlock, preheader); + BI++; + } + + // Update the dominator tree and remove the instructions and blocks that will + // be deleted from the reference counting scheme. + DominatorTree& DT = getAnalysis(); + DominanceFrontier* DF = getAnalysisIfAvailable(); + SmallPtrSet ChildNodes; + for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end(); + LI != LE; ++LI) { + // Move all of the block's children to be children of the preheader, which + // allows us to remove the domtree entry for the block. + ChildNodes.insert(DT[*LI]->begin(), DT[*LI]->end()); + for (SmallPtrSet::iterator DI = ChildNodes.begin(), + DE = ChildNodes.end(); DI != DE; ++DI) { + DT.changeImmediateDominator(*DI, DT[preheader]); + if (DF) DF->changeImmediateDominator((*DI)->getBlock(), preheader, &DT); + } + + ChildNodes.clear(); + DT.eraseNode(*LI); + if (DF) DF->removeBlock(*LI); + + // Remove the block from the reference counting scheme, so that we can + // delete it freely later. + (*LI)->dropAllReferences(); + } + + // Tell ScalarEvolution that the loop is deleted. Do this before + // deleting the loop so that ScalarEvolution can look at the loop + // to determine what it needs to clean up. + SE.forgetLoopBackedgeTakenCount(L); + + // Erase the instructions and the blocks without having to worry + // about ordering because we already dropped the references. + // NOTE: This iteration is safe because erasing the block does not remove its + // entry from the loop's block list. We do that in the next section. + for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end(); + LI != LE; ++LI) + (*LI)->eraseFromParent(); + + // Finally, the blocks from loopinfo. This has to happen late because + // otherwise our loop iterators won't work. + LoopInfo& loopInfo = getAnalysis(); + SmallPtrSet blocks; + blocks.insert(L->block_begin(), L->block_end()); + for (SmallPtrSet::iterator I = blocks.begin(), + E = blocks.end(); I != E; ++I) + loopInfo.removeBlock(*I); + + // The last step is to inform the loop pass manager that we've + // eliminated this loop. + LPM.deleteLoopFromQueue(L); + + NumDeleted++; + + return true; +} diff --git a/lib/Transforms/Scalar/LoopIndexSplit.cpp b/lib/Transforms/Scalar/LoopIndexSplit.cpp new file mode 100644 index 000000000000..9c785968e1d4 --- /dev/null +++ b/lib/Transforms/Scalar/LoopIndexSplit.cpp @@ -0,0 +1,1237 @@ +//===- LoopIndexSplit.cpp - Loop Index Splitting Pass ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements Loop Index Splitting Pass. This pass handles three +// kinds of loops. +// +// [1] A loop may be eliminated if the body is executed exactly once. +// For example, +// +// for (i = 0; i < N; ++i) { +// if (i == X) { +// body; +// } +// } +// +// is transformed to +// +// i = X; +// body; +// +// [2] A loop's iteration space may be shrunk if the loop body is executed +// for a proper sub-range of the loop's iteration space. For example, +// +// for (i = 0; i < N; ++i) { +// if (i > A && i < B) { +// ... +// } +// } +// +// is transformed to iterators from A to B, if A > 0 and B < N. +// +// [3] A loop may be split if the loop body is dominated by a branch. +// For example, +// +// for (i = LB; i < UB; ++i) { if (i < SV) A; else B; } +// +// is transformed into +// +// AEV = BSV = SV +// for (i = LB; i < min(UB, AEV); ++i) +// A; +// for (i = max(LB, BSV); i < UB; ++i); +// B; +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "loop-index-split" + +#include "llvm/Transforms/Scalar.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Support/Compiler.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/Statistic.h" + +using namespace llvm; + +STATISTIC(NumIndexSplit, "Number of loop index split"); +STATISTIC(NumIndexSplitRemoved, "Number of loops eliminated by loop index split"); +STATISTIC(NumRestrictBounds, "Number of loop iteration space restricted"); + +namespace { + + class VISIBILITY_HIDDEN LoopIndexSplit : public LoopPass { + + public: + static char ID; // Pass ID, replacement for typeid + LoopIndexSplit() : LoopPass(&ID) {} + + // Index split Loop L. Return true if loop is split. + bool runOnLoop(Loop *L, LPPassManager &LPM); + + void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addPreserved(); + AU.addRequiredID(LCSSAID); + AU.addPreservedID(LCSSAID); + AU.addRequired(); + AU.addPreserved(); + AU.addRequiredID(LoopSimplifyID); + AU.addPreservedID(LoopSimplifyID); + AU.addRequired(); + AU.addRequired(); + AU.addPreserved(); + AU.addPreserved(); + } + + private: + /// processOneIterationLoop -- Eliminate loop if loop body is executed + /// only once. For example, + /// for (i = 0; i < N; ++i) { + /// if ( i == X) { + /// ... + /// } + /// } + /// + bool processOneIterationLoop(); + + // -- Routines used by updateLoopIterationSpace(); + + /// updateLoopIterationSpace -- Update loop's iteration space if loop + /// body is executed for certain IV range only. For example, + /// + /// for (i = 0; i < N; ++i) { + /// if ( i > A && i < B) { + /// ... + /// } + /// } + /// is transformed to iterators from A to B, if A > 0 and B < N. + /// + bool updateLoopIterationSpace(); + + /// restrictLoopBound - Op dominates loop body. Op compares an IV based value + /// with a loop invariant value. Update loop's lower and upper bound based on + /// the loop invariant value. + bool restrictLoopBound(ICmpInst &Op); + + // --- Routines used by splitLoop(). --- / + + bool splitLoop(); + + /// removeBlocks - Remove basic block DeadBB and all blocks dominated by + /// DeadBB. This routine is used to remove split condition's dead branch, + /// dominated by DeadBB. LiveBB dominates split conidition's other branch. + void removeBlocks(BasicBlock *DeadBB, Loop *LP, BasicBlock *LiveBB); + + /// moveExitCondition - Move exit condition EC into split condition block. + void moveExitCondition(BasicBlock *CondBB, BasicBlock *ActiveBB, + BasicBlock *ExitBB, ICmpInst *EC, ICmpInst *SC, + PHINode *IV, Instruction *IVAdd, Loop *LP, + unsigned); + + /// updatePHINodes - CFG has been changed. + /// Before + /// - ExitBB's single predecessor was Latch + /// - Latch's second successor was Header + /// Now + /// - ExitBB's single predecessor was Header + /// - Latch's one and only successor was Header + /// + /// Update ExitBB PHINodes' to reflect this change. + void updatePHINodes(BasicBlock *ExitBB, BasicBlock *Latch, + BasicBlock *Header, + PHINode *IV, Instruction *IVIncrement, Loop *LP); + + // --- Utility routines --- / + + /// cleanBlock - A block is considered clean if all non terminal + /// instructions are either PHINodes or IV based values. + bool cleanBlock(BasicBlock *BB); + + /// IVisLT - If Op is comparing IV based value with an loop invariant and + /// IV based value is less than the loop invariant then return the loop + /// invariant. Otherwise return NULL. + Value * IVisLT(ICmpInst &Op); + + /// IVisLE - If Op is comparing IV based value with an loop invariant and + /// IV based value is less than or equal to the loop invariant then + /// return the loop invariant. Otherwise return NULL. + Value * IVisLE(ICmpInst &Op); + + /// IVisGT - If Op is comparing IV based value with an loop invariant and + /// IV based value is greater than the loop invariant then return the loop + /// invariant. Otherwise return NULL. + Value * IVisGT(ICmpInst &Op); + + /// IVisGE - If Op is comparing IV based value with an loop invariant and + /// IV based value is greater than or equal to the loop invariant then + /// return the loop invariant. Otherwise return NULL. + Value * IVisGE(ICmpInst &Op); + + private: + + // Current Loop information. + Loop *L; + LPPassManager *LPM; + LoopInfo *LI; + DominatorTree *DT; + DominanceFrontier *DF; + + PHINode *IndVar; + ICmpInst *ExitCondition; + ICmpInst *SplitCondition; + Value *IVStartValue; + Value *IVExitValue; + Instruction *IVIncrement; + SmallPtrSet IVBasedValues; + }; +} + +char LoopIndexSplit::ID = 0; +static RegisterPass +X("loop-index-split", "Index Split Loops"); + +Pass *llvm::createLoopIndexSplitPass() { + return new LoopIndexSplit(); +} + +// Index split Loop L. Return true if loop is split. +bool LoopIndexSplit::runOnLoop(Loop *IncomingLoop, LPPassManager &LPM_Ref) { + L = IncomingLoop; + LPM = &LPM_Ref; + + // FIXME - Nested loops make dominator info updates tricky. + if (!L->getSubLoops().empty()) + return false; + + DT = &getAnalysis(); + LI = &getAnalysis(); + DF = &getAnalysis(); + + // Initialize loop data. + IndVar = L->getCanonicalInductionVariable(); + if (!IndVar) return false; + + bool P1InLoop = L->contains(IndVar->getIncomingBlock(1)); + IVStartValue = IndVar->getIncomingValue(!P1InLoop); + IVIncrement = dyn_cast(IndVar->getIncomingValue(P1InLoop)); + if (!IVIncrement) return false; + + IVBasedValues.clear(); + IVBasedValues.insert(IndVar); + IVBasedValues.insert(IVIncrement); + for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); + I != E; ++I) + for(BasicBlock::iterator BI = (*I)->begin(), BE = (*I)->end(); + BI != BE; ++BI) { + if (BinaryOperator *BO = dyn_cast(BI)) + if (BO != IVIncrement + && (BO->getOpcode() == Instruction::Add + || BO->getOpcode() == Instruction::Sub)) + if (IVBasedValues.count(BO->getOperand(0)) + && L->isLoopInvariant(BO->getOperand(1))) + IVBasedValues.insert(BO); + } + + // Reject loop if loop exit condition is not suitable. + BasicBlock *ExitingBlock = L->getExitingBlock(); + if (!ExitingBlock) + return false; + BranchInst *EBR = dyn_cast(ExitingBlock->getTerminator()); + if (!EBR) return false; + ExitCondition = dyn_cast(EBR->getCondition()); + if (!ExitCondition) return false; + if (ExitingBlock != L->getLoopLatch()) return false; + IVExitValue = ExitCondition->getOperand(1); + if (!L->isLoopInvariant(IVExitValue)) + IVExitValue = ExitCondition->getOperand(0); + if (!L->isLoopInvariant(IVExitValue)) + return false; + + // If start value is more then exit value where induction variable + // increments by 1 then we are potentially dealing with an infinite loop. + // Do not index split this loop. + if (ConstantInt *SV = dyn_cast(IVStartValue)) + if (ConstantInt *EV = dyn_cast(IVExitValue)) + if (SV->getSExtValue() > EV->getSExtValue()) + return false; + + if (processOneIterationLoop()) + return true; + + if (updateLoopIterationSpace()) + return true; + + if (splitLoop()) + return true; + + return false; +} + +// --- Helper routines --- +// isUsedOutsideLoop - Returns true iff V is used outside the loop L. +static bool isUsedOutsideLoop(Value *V, Loop *L) { + for(Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI) + if (!L->contains(cast(*UI)->getParent())) + return true; + return false; +} + +// Return V+1 +static Value *getPlusOne(Value *V, bool Sign, Instruction *InsertPt) { + ConstantInt *One = ConstantInt::get(V->getType(), 1, Sign); + return BinaryOperator::CreateAdd(V, One, "lsp", InsertPt); +} + +// Return V-1 +static Value *getMinusOne(Value *V, bool Sign, Instruction *InsertPt) { + ConstantInt *One = ConstantInt::get(V->getType(), 1, Sign); + return BinaryOperator::CreateSub(V, One, "lsp", InsertPt); +} + +// Return min(V1, V1) +static Value *getMin(Value *V1, Value *V2, bool Sign, Instruction *InsertPt) { + + Value *C = new ICmpInst(Sign ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, + V1, V2, "lsp", InsertPt); + return SelectInst::Create(C, V1, V2, "lsp", InsertPt); +} + +// Return max(V1, V2) +static Value *getMax(Value *V1, Value *V2, bool Sign, Instruction *InsertPt) { + + Value *C = new ICmpInst(Sign ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, + V1, V2, "lsp", InsertPt); + return SelectInst::Create(C, V2, V1, "lsp", InsertPt); +} + +/// processOneIterationLoop -- Eliminate loop if loop body is executed +/// only once. For example, +/// for (i = 0; i < N; ++i) { +/// if ( i == X) { +/// ... +/// } +/// } +/// +bool LoopIndexSplit::processOneIterationLoop() { + SplitCondition = NULL; + BasicBlock *Latch = L->getLoopLatch(); + BasicBlock *Header = L->getHeader(); + BranchInst *BR = dyn_cast(Header->getTerminator()); + if (!BR) return false; + if (!isa(Latch->getTerminator())) return false; + if (BR->isUnconditional()) return false; + SplitCondition = dyn_cast(BR->getCondition()); + if (!SplitCondition) return false; + if (SplitCondition == ExitCondition) return false; + if (SplitCondition->getPredicate() != ICmpInst::ICMP_EQ) return false; + if (BR->getOperand(1) != Latch) return false; + if (!IVBasedValues.count(SplitCondition->getOperand(0)) + && !IVBasedValues.count(SplitCondition->getOperand(1))) + return false; + + // If IV is used outside the loop then this loop traversal is required. + // FIXME: Calculate and use last IV value. + if (isUsedOutsideLoop(IVIncrement, L)) + return false; + + // If BR operands are not IV or not loop invariants then skip this loop. + Value *OPV = SplitCondition->getOperand(0); + Value *SplitValue = SplitCondition->getOperand(1); + if (!L->isLoopInvariant(SplitValue)) + std::swap(OPV, SplitValue); + if (!L->isLoopInvariant(SplitValue)) + return false; + Instruction *OPI = dyn_cast(OPV); + if (!OPI) + return false; + if (OPI->getParent() != Header || isUsedOutsideLoop(OPI, L)) + return false; + Value *StartValue = IVStartValue; + Value *ExitValue = IVExitValue;; + + if (OPV != IndVar) { + // If BR operand is IV based then use this operand to calculate + // effective conditions for loop body. + BinaryOperator *BOPV = dyn_cast(OPV); + if (!BOPV) + return false; + if (BOPV->getOpcode() != Instruction::Add) + return false; + StartValue = BinaryOperator::CreateAdd(OPV, StartValue, "" , BR); + ExitValue = BinaryOperator::CreateAdd(OPV, ExitValue, "" , BR); + } + + if (!cleanBlock(Header)) + return false; + + if (!cleanBlock(Latch)) + return false; + + // If the merge point for BR is not loop latch then skip this loop. + if (BR->getSuccessor(0) != Latch) { + DominanceFrontier::iterator DF0 = DF->find(BR->getSuccessor(0)); + assert (DF0 != DF->end() && "Unable to find dominance frontier"); + if (!DF0->second.count(Latch)) + return false; + } + + if (BR->getSuccessor(1) != Latch) { + DominanceFrontier::iterator DF1 = DF->find(BR->getSuccessor(1)); + assert (DF1 != DF->end() && "Unable to find dominance frontier"); + if (!DF1->second.count(Latch)) + return false; + } + + // Now, Current loop L contains compare instruction + // that compares induction variable, IndVar, against loop invariant. And + // entire (i.e. meaningful) loop body is dominated by this compare + // instruction. In such case eliminate + // loop structure surrounding this loop body. For example, + // for (int i = start; i < end; ++i) { + // if ( i == somevalue) { + // loop_body + // } + // } + // can be transformed into + // if (somevalue >= start && somevalue < end) { + // i = somevalue; + // loop_body + // } + + // Replace index variable with split value in loop body. Loop body is executed + // only when index variable is equal to split value. + IndVar->replaceAllUsesWith(SplitValue); + + // Replace split condition in header. + // Transform + // SplitCondition : icmp eq i32 IndVar, SplitValue + // into + // c1 = icmp uge i32 SplitValue, StartValue + // c2 = icmp ult i32 SplitValue, ExitValue + // and i32 c1, c2 + Instruction *C1 = new ICmpInst(ExitCondition->isSignedPredicate() ? + ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE, + SplitValue, StartValue, "lisplit", BR); + + CmpInst::Predicate C2P = ExitCondition->getPredicate(); + BranchInst *LatchBR = cast(Latch->getTerminator()); + if (LatchBR->getOperand(0) != Header) + C2P = CmpInst::getInversePredicate(C2P); + Instruction *C2 = new ICmpInst(C2P, SplitValue, ExitValue, "lisplit", BR); + Instruction *NSplitCond = BinaryOperator::CreateAnd(C1, C2, "lisplit", BR); + + SplitCondition->replaceAllUsesWith(NSplitCond); + SplitCondition->eraseFromParent(); + + // Remove Latch to Header edge. + BasicBlock *LatchSucc = NULL; + Header->removePredecessor(Latch); + for (succ_iterator SI = succ_begin(Latch), E = succ_end(Latch); + SI != E; ++SI) { + if (Header != *SI) + LatchSucc = *SI; + } + + // Clean up latch block. + Value *LatchBRCond = LatchBR->getCondition(); + LatchBR->setUnconditionalDest(LatchSucc); + RecursivelyDeleteTriviallyDeadInstructions(LatchBRCond); + + LPM->deleteLoopFromQueue(L); + + // Update Dominator Info. + // Only CFG change done is to remove Latch to Header edge. This + // does not change dominator tree because Latch did not dominate + // Header. + if (DF) { + DominanceFrontier::iterator HeaderDF = DF->find(Header); + if (HeaderDF != DF->end()) + DF->removeFromFrontier(HeaderDF, Header); + + DominanceFrontier::iterator LatchDF = DF->find(Latch); + if (LatchDF != DF->end()) + DF->removeFromFrontier(LatchDF, Header); + } + + ++NumIndexSplitRemoved; + return true; +} + +/// restrictLoopBound - Op dominates loop body. Op compares an IV based value +/// with a loop invariant value. Update loop's lower and upper bound based on +/// the loop invariant value. +bool LoopIndexSplit::restrictLoopBound(ICmpInst &Op) { + bool Sign = Op.isSignedPredicate(); + Instruction *PHTerm = L->getLoopPreheader()->getTerminator(); + + if (IVisGT(*ExitCondition) || IVisGE(*ExitCondition)) { + BranchInst *EBR = + cast(ExitCondition->getParent()->getTerminator()); + ExitCondition->setPredicate(ExitCondition->getInversePredicate()); + BasicBlock *T = EBR->getSuccessor(0); + EBR->setSuccessor(0, EBR->getSuccessor(1)); + EBR->setSuccessor(1, T); + } + + // New upper and lower bounds. + Value *NLB = NULL; + Value *NUB = NULL; + if (Value *V = IVisLT(Op)) { + // Restrict upper bound. + if (IVisLE(*ExitCondition)) + V = getMinusOne(V, Sign, PHTerm); + NUB = getMin(V, IVExitValue, Sign, PHTerm); + } else if (Value *V = IVisLE(Op)) { + // Restrict upper bound. + if (IVisLT(*ExitCondition)) + V = getPlusOne(V, Sign, PHTerm); + NUB = getMin(V, IVExitValue, Sign, PHTerm); + } else if (Value *V = IVisGT(Op)) { + // Restrict lower bound. + V = getPlusOne(V, Sign, PHTerm); + NLB = getMax(V, IVStartValue, Sign, PHTerm); + } else if (Value *V = IVisGE(Op)) + // Restrict lower bound. + NLB = getMax(V, IVStartValue, Sign, PHTerm); + + if (!NLB && !NUB) + return false; + + if (NLB) { + unsigned i = IndVar->getBasicBlockIndex(L->getLoopPreheader()); + IndVar->setIncomingValue(i, NLB); + } + + if (NUB) { + unsigned i = (ExitCondition->getOperand(0) != IVExitValue); + ExitCondition->setOperand(i, NUB); + } + return true; +} + +/// updateLoopIterationSpace -- Update loop's iteration space if loop +/// body is executed for certain IV range only. For example, +/// +/// for (i = 0; i < N; ++i) { +/// if ( i > A && i < B) { +/// ... +/// } +/// } +/// is transformed to iterators from A to B, if A > 0 and B < N. +/// +bool LoopIndexSplit::updateLoopIterationSpace() { + SplitCondition = NULL; + if (ExitCondition->getPredicate() == ICmpInst::ICMP_NE + || ExitCondition->getPredicate() == ICmpInst::ICMP_EQ) + return false; + BasicBlock *Latch = L->getLoopLatch(); + BasicBlock *Header = L->getHeader(); + BranchInst *BR = dyn_cast(Header->getTerminator()); + if (!BR) return false; + if (!isa(Latch->getTerminator())) return false; + if (BR->isUnconditional()) return false; + BinaryOperator *AND = dyn_cast(BR->getCondition()); + if (!AND) return false; + if (AND->getOpcode() != Instruction::And) return false; + ICmpInst *Op0 = dyn_cast(AND->getOperand(0)); + ICmpInst *Op1 = dyn_cast(AND->getOperand(1)); + if (!Op0 || !Op1) + return false; + IVBasedValues.insert(AND); + IVBasedValues.insert(Op0); + IVBasedValues.insert(Op1); + if (!cleanBlock(Header)) return false; + BasicBlock *ExitingBlock = ExitCondition->getParent(); + if (!cleanBlock(ExitingBlock)) return false; + + // If the merge point for BR is not loop latch then skip this loop. + if (BR->getSuccessor(0) != Latch) { + DominanceFrontier::iterator DF0 = DF->find(BR->getSuccessor(0)); + assert (DF0 != DF->end() && "Unable to find dominance frontier"); + if (!DF0->second.count(Latch)) + return false; + } + + if (BR->getSuccessor(1) != Latch) { + DominanceFrontier::iterator DF1 = DF->find(BR->getSuccessor(1)); + assert (DF1 != DF->end() && "Unable to find dominance frontier"); + if (!DF1->second.count(Latch)) + return false; + } + + // Verify that loop exiting block has only two predecessor, where one pred + // is split condition block. The other predecessor will become exiting block's + // dominator after CFG is updated. TODO : Handle CFG's where exiting block has + // more then two predecessors. This requires extra work in updating dominator + // information. + BasicBlock *ExitingBBPred = NULL; + for (pred_iterator PI = pred_begin(ExitingBlock), PE = pred_end(ExitingBlock); + PI != PE; ++PI) { + BasicBlock *BB = *PI; + if (Header == BB) + continue; + if (ExitingBBPred) + return false; + else + ExitingBBPred = BB; + } + + if (!restrictLoopBound(*Op0)) + return false; + + if (!restrictLoopBound(*Op1)) + return false; + + // Update CFG. + if (BR->getSuccessor(0) == ExitingBlock) + BR->setUnconditionalDest(BR->getSuccessor(1)); + else + BR->setUnconditionalDest(BR->getSuccessor(0)); + + AND->eraseFromParent(); + if (Op0->use_empty()) + Op0->eraseFromParent(); + if (Op1->use_empty()) + Op1->eraseFromParent(); + + // Update domiantor info. Now, ExitingBlock has only one predecessor, + // ExitingBBPred, and it is ExitingBlock's immediate domiantor. + DT->changeImmediateDominator(ExitingBlock, ExitingBBPred); + + BasicBlock *ExitBlock = ExitingBlock->getTerminator()->getSuccessor(1); + if (L->contains(ExitBlock)) + ExitBlock = ExitingBlock->getTerminator()->getSuccessor(0); + + // If ExitingBlock is a member of the loop basic blocks' DF list then + // replace ExitingBlock with header and exit block in the DF list + DominanceFrontier::iterator ExitingBlockDF = DF->find(ExitingBlock); + for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); + I != E; ++I) { + BasicBlock *BB = *I; + if (BB == Header || BB == ExitingBlock) + continue; + DominanceFrontier::iterator BBDF = DF->find(BB); + DominanceFrontier::DomSetType::iterator DomSetI = BBDF->second.begin(); + DominanceFrontier::DomSetType::iterator DomSetE = BBDF->second.end(); + while (DomSetI != DomSetE) { + DominanceFrontier::DomSetType::iterator CurrentItr = DomSetI; + ++DomSetI; + BasicBlock *DFBB = *CurrentItr; + if (DFBB == ExitingBlock) { + BBDF->second.erase(DFBB); + for (DominanceFrontier::DomSetType::iterator + EBI = ExitingBlockDF->second.begin(), + EBE = ExitingBlockDF->second.end(); EBI != EBE; ++EBI) + BBDF->second.insert(*EBI); + } + } + } + NumRestrictBounds++; + return true; +} + +/// removeBlocks - Remove basic block DeadBB and all blocks dominated by DeadBB. +/// This routine is used to remove split condition's dead branch, dominated by +/// DeadBB. LiveBB dominates split conidition's other branch. +void LoopIndexSplit::removeBlocks(BasicBlock *DeadBB, Loop *LP, + BasicBlock *LiveBB) { + + // First update DeadBB's dominance frontier. + SmallVector FrontierBBs; + DominanceFrontier::iterator DeadBBDF = DF->find(DeadBB); + if (DeadBBDF != DF->end()) { + SmallVector PredBlocks; + + DominanceFrontier::DomSetType DeadBBSet = DeadBBDF->second; + for (DominanceFrontier::DomSetType::iterator DeadBBSetI = DeadBBSet.begin(), + DeadBBSetE = DeadBBSet.end(); DeadBBSetI != DeadBBSetE; ++DeadBBSetI) + { + BasicBlock *FrontierBB = *DeadBBSetI; + FrontierBBs.push_back(FrontierBB); + + // Rremove any PHI incoming edge from blocks dominated by DeadBB. + PredBlocks.clear(); + for(pred_iterator PI = pred_begin(FrontierBB), PE = pred_end(FrontierBB); + PI != PE; ++PI) { + BasicBlock *P = *PI; + if (P == DeadBB || DT->dominates(DeadBB, P)) + PredBlocks.push_back(P); + } + + for(BasicBlock::iterator FBI = FrontierBB->begin(), FBE = FrontierBB->end(); + FBI != FBE; ++FBI) { + if (PHINode *PN = dyn_cast(FBI)) { + for(SmallVector::iterator PI = PredBlocks.begin(), + PE = PredBlocks.end(); PI != PE; ++PI) { + BasicBlock *P = *PI; + PN->removeIncomingValue(P); + } + } + else + break; + } + } + } + + // Now remove DeadBB and all nodes dominated by DeadBB in df order. + SmallVector WorkList; + DomTreeNode *DN = DT->getNode(DeadBB); + for (df_iterator DI = df_begin(DN), + E = df_end(DN); DI != E; ++DI) { + BasicBlock *BB = DI->getBlock(); + WorkList.push_back(BB); + BB->replaceAllUsesWith(UndefValue::get(Type::LabelTy)); + } + + while (!WorkList.empty()) { + BasicBlock *BB = WorkList.back(); WorkList.pop_back(); + LPM->deleteSimpleAnalysisValue(BB, LP); + for(BasicBlock::iterator BBI = BB->begin(), BBE = BB->end(); + BBI != BBE; ) { + Instruction *I = BBI; + ++BBI; + I->replaceAllUsesWith(UndefValue::get(I->getType())); + LPM->deleteSimpleAnalysisValue(I, LP); + I->eraseFromParent(); + } + DT->eraseNode(BB); + DF->removeBlock(BB); + LI->removeBlock(BB); + BB->eraseFromParent(); + } + + // Update Frontier BBs' dominator info. + while (!FrontierBBs.empty()) { + BasicBlock *FBB = FrontierBBs.back(); FrontierBBs.pop_back(); + BasicBlock *NewDominator = FBB->getSinglePredecessor(); + if (!NewDominator) { + pred_iterator PI = pred_begin(FBB), PE = pred_end(FBB); + NewDominator = *PI; + ++PI; + if (NewDominator != LiveBB) { + for(; PI != PE; ++PI) { + BasicBlock *P = *PI; + if (P == LiveBB) { + NewDominator = LiveBB; + break; + } + NewDominator = DT->findNearestCommonDominator(NewDominator, P); + } + } + } + assert (NewDominator && "Unable to fix dominator info."); + DT->changeImmediateDominator(FBB, NewDominator); + DF->changeImmediateDominator(FBB, NewDominator, DT); + } + +} + +// moveExitCondition - Move exit condition EC into split condition block CondBB. +void LoopIndexSplit::moveExitCondition(BasicBlock *CondBB, BasicBlock *ActiveBB, + BasicBlock *ExitBB, ICmpInst *EC, + ICmpInst *SC, PHINode *IV, + Instruction *IVAdd, Loop *LP, + unsigned ExitValueNum) { + + BasicBlock *ExitingBB = EC->getParent(); + Instruction *CurrentBR = CondBB->getTerminator(); + + // Move exit condition into split condition block. + EC->moveBefore(CurrentBR); + EC->setOperand(ExitValueNum == 0 ? 1 : 0, IV); + + // Move exiting block's branch into split condition block. Update its branch + // destination. + BranchInst *ExitingBR = cast(ExitingBB->getTerminator()); + ExitingBR->moveBefore(CurrentBR); + BasicBlock *OrigDestBB = NULL; + if (ExitingBR->getSuccessor(0) == ExitBB) { + OrigDestBB = ExitingBR->getSuccessor(1); + ExitingBR->setSuccessor(1, ActiveBB); + } + else { + OrigDestBB = ExitingBR->getSuccessor(0); + ExitingBR->setSuccessor(0, ActiveBB); + } + + // Remove split condition and current split condition branch. + SC->eraseFromParent(); + CurrentBR->eraseFromParent(); + + // Connect exiting block to original destination. + BranchInst::Create(OrigDestBB, ExitingBB); + + // Update PHINodes + updatePHINodes(ExitBB, ExitingBB, CondBB, IV, IVAdd, LP); + + // Fix dominator info. + // ExitBB is now dominated by CondBB + DT->changeImmediateDominator(ExitBB, CondBB); + DF->changeImmediateDominator(ExitBB, CondBB, DT); + + // Blocks outside the loop may have been in the dominance frontier of blocks + // inside the condition; this is now impossible because the blocks inside the + // condition no loger dominate the exit. Remove the relevant blocks from + // the dominance frontiers. + for (Loop::block_iterator I = LP->block_begin(), E = LP->block_end(); + I != E; ++I) { + if (*I == CondBB || !DT->dominates(CondBB, *I)) continue; + DominanceFrontier::iterator BBDF = DF->find(*I); + DominanceFrontier::DomSetType::iterator DomSetI = BBDF->second.begin(); + DominanceFrontier::DomSetType::iterator DomSetE = BBDF->second.end(); + while (DomSetI != DomSetE) { + DominanceFrontier::DomSetType::iterator CurrentItr = DomSetI; + ++DomSetI; + BasicBlock *DFBB = *CurrentItr; + if (!LP->contains(DFBB)) + BBDF->second.erase(DFBB); + } + } +} + +/// updatePHINodes - CFG has been changed. +/// Before +/// - ExitBB's single predecessor was Latch +/// - Latch's second successor was Header +/// Now +/// - ExitBB's single predecessor is Header +/// - Latch's one and only successor is Header +/// +/// Update ExitBB PHINodes' to reflect this change. +void LoopIndexSplit::updatePHINodes(BasicBlock *ExitBB, BasicBlock *Latch, + BasicBlock *Header, + PHINode *IV, Instruction *IVIncrement, + Loop *LP) { + + for (BasicBlock::iterator BI = ExitBB->begin(), BE = ExitBB->end(); + BI != BE; ) { + PHINode *PN = dyn_cast(BI); + ++BI; + if (!PN) + break; + + Value *V = PN->getIncomingValueForBlock(Latch); + if (PHINode *PHV = dyn_cast(V)) { + // PHV is in Latch. PHV has one use is in ExitBB PHINode. And one use + // in Header which is new incoming value for PN. + Value *NewV = NULL; + for (Value::use_iterator UI = PHV->use_begin(), E = PHV->use_end(); + UI != E; ++UI) + if (PHINode *U = dyn_cast(*UI)) + if (LP->contains(U->getParent())) { + NewV = U; + break; + } + + // Add incoming value from header only if PN has any use inside the loop. + if (NewV) + PN->addIncoming(NewV, Header); + + } else if (Instruction *PHI = dyn_cast(V)) { + // If this instruction is IVIncrement then IV is new incoming value + // from header otherwise this instruction must be incoming value from + // header because loop is in LCSSA form. + if (PHI == IVIncrement) + PN->addIncoming(IV, Header); + else + PN->addIncoming(V, Header); + } else + // Otherwise this is an incoming value from header because loop is in + // LCSSA form. + PN->addIncoming(V, Header); + + // Remove incoming value from Latch. + PN->removeIncomingValue(Latch); + } +} + +bool LoopIndexSplit::splitLoop() { + SplitCondition = NULL; + if (ExitCondition->getPredicate() == ICmpInst::ICMP_NE + || ExitCondition->getPredicate() == ICmpInst::ICMP_EQ) + return false; + BasicBlock *Header = L->getHeader(); + BasicBlock *Latch = L->getLoopLatch(); + BranchInst *SBR = NULL; // Split Condition Branch + BranchInst *EBR = cast(ExitCondition->getParent()->getTerminator()); + // If Exiting block includes loop variant instructions then this + // loop may not be split safely. + BasicBlock *ExitingBlock = ExitCondition->getParent(); + if (!cleanBlock(ExitingBlock)) return false; + + for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); + I != E; ++I) { + BranchInst *BR = dyn_cast((*I)->getTerminator()); + if (!BR || BR->isUnconditional()) continue; + ICmpInst *CI = dyn_cast(BR->getCondition()); + if (!CI || CI == ExitCondition + || CI->getPredicate() == ICmpInst::ICMP_NE + || CI->getPredicate() == ICmpInst::ICMP_EQ) + continue; + + // Unable to handle triangle loops at the moment. + // In triangle loop, split condition is in header and one of the + // the split destination is loop latch. If split condition is EQ + // then such loops are already handle in processOneIterationLoop(). + if (Header == (*I) + && (Latch == BR->getSuccessor(0) || Latch == BR->getSuccessor(1))) + continue; + + // If the block does not dominate the latch then this is not a diamond. + // Such loop may not benefit from index split. + if (!DT->dominates((*I), Latch)) + continue; + + // If split condition branches heads do not have single predecessor, + // SplitCondBlock, then is not possible to remove inactive branch. + if (!BR->getSuccessor(0)->getSinglePredecessor() + || !BR->getSuccessor(1)->getSinglePredecessor()) + return false; + + // If the merge point for BR is not loop latch then skip this condition. + if (BR->getSuccessor(0) != Latch) { + DominanceFrontier::iterator DF0 = DF->find(BR->getSuccessor(0)); + assert (DF0 != DF->end() && "Unable to find dominance frontier"); + if (!DF0->second.count(Latch)) + continue; + } + + if (BR->getSuccessor(1) != Latch) { + DominanceFrontier::iterator DF1 = DF->find(BR->getSuccessor(1)); + assert (DF1 != DF->end() && "Unable to find dominance frontier"); + if (!DF1->second.count(Latch)) + continue; + } + SplitCondition = CI; + SBR = BR; + break; + } + + if (!SplitCondition) + return false; + + // If the predicate sign does not match then skip. + if (ExitCondition->isSignedPredicate() != SplitCondition->isSignedPredicate()) + return false; + + unsigned EVOpNum = (ExitCondition->getOperand(1) == IVExitValue); + unsigned SVOpNum = IVBasedValues.count(SplitCondition->getOperand(0)); + Value *SplitValue = SplitCondition->getOperand(SVOpNum); + if (!L->isLoopInvariant(SplitValue)) + return false; + if (!IVBasedValues.count(SplitCondition->getOperand(!SVOpNum))) + return false; + + // Normalize loop conditions so that it is easier to calculate new loop + // bounds. + if (IVisGT(*ExitCondition) || IVisGE(*ExitCondition)) { + ExitCondition->setPredicate(ExitCondition->getInversePredicate()); + BasicBlock *T = EBR->getSuccessor(0); + EBR->setSuccessor(0, EBR->getSuccessor(1)); + EBR->setSuccessor(1, T); + } + + if (IVisGT(*SplitCondition) || IVisGE(*SplitCondition)) { + SplitCondition->setPredicate(SplitCondition->getInversePredicate()); + BasicBlock *T = SBR->getSuccessor(0); + SBR->setSuccessor(0, SBR->getSuccessor(1)); + SBR->setSuccessor(1, T); + } + + //[*] Calculate new loop bounds. + Value *AEV = SplitValue; + Value *BSV = SplitValue; + bool Sign = SplitCondition->isSignedPredicate(); + Instruction *PHTerm = L->getLoopPreheader()->getTerminator(); + + if (IVisLT(*ExitCondition)) { + if (IVisLT(*SplitCondition)) { + /* Do nothing */ + } + else if (IVisLE(*SplitCondition)) { + AEV = getPlusOne(SplitValue, Sign, PHTerm); + BSV = getPlusOne(SplitValue, Sign, PHTerm); + } else { + assert (0 && "Unexpected split condition!"); + } + } + else if (IVisLE(*ExitCondition)) { + if (IVisLT(*SplitCondition)) { + AEV = getMinusOne(SplitValue, Sign, PHTerm); + } + else if (IVisLE(*SplitCondition)) { + BSV = getPlusOne(SplitValue, Sign, PHTerm); + } else { + assert (0 && "Unexpected split condition!"); + } + } else { + assert (0 && "Unexpected exit condition!"); + } + AEV = getMin(AEV, IVExitValue, Sign, PHTerm); + BSV = getMax(BSV, IVStartValue, Sign, PHTerm); + + // [*] Clone Loop + DenseMap ValueMap; + Loop *BLoop = CloneLoop(L, LPM, LI, ValueMap, this); + Loop *ALoop = L; + + // [*] ALoop's exiting edge enters BLoop's header. + // ALoop's original exit block becomes BLoop's exit block. + PHINode *B_IndVar = cast(ValueMap[IndVar]); + BasicBlock *A_ExitingBlock = ExitCondition->getParent(); + BranchInst *A_ExitInsn = + dyn_cast(A_ExitingBlock->getTerminator()); + assert (A_ExitInsn && "Unable to find suitable loop exit branch"); + BasicBlock *B_ExitBlock = A_ExitInsn->getSuccessor(1); + BasicBlock *B_Header = BLoop->getHeader(); + if (ALoop->contains(B_ExitBlock)) { + B_ExitBlock = A_ExitInsn->getSuccessor(0); + A_ExitInsn->setSuccessor(0, B_Header); + } else + A_ExitInsn->setSuccessor(1, B_Header); + + // [*] Update ALoop's exit value using new exit value. + ExitCondition->setOperand(EVOpNum, AEV); + + // [*] Update BLoop's header phi nodes. Remove incoming PHINode's from + // original loop's preheader. Add incoming PHINode values from + // ALoop's exiting block. Update BLoop header's domiantor info. + + // Collect inverse map of Header PHINodes. + DenseMap InverseMap; + for (BasicBlock::iterator BI = ALoop->getHeader()->begin(), + BE = ALoop->getHeader()->end(); BI != BE; ++BI) { + if (PHINode *PN = dyn_cast(BI)) { + PHINode *PNClone = cast(ValueMap[PN]); + InverseMap[PNClone] = PN; + } else + break; + } + + BasicBlock *A_Preheader = ALoop->getLoopPreheader(); + for (BasicBlock::iterator BI = B_Header->begin(), BE = B_Header->end(); + BI != BE; ++BI) { + if (PHINode *PN = dyn_cast(BI)) { + // Remove incoming value from original preheader. + PN->removeIncomingValue(A_Preheader); + + // Add incoming value from A_ExitingBlock. + if (PN == B_IndVar) + PN->addIncoming(BSV, A_ExitingBlock); + else { + PHINode *OrigPN = cast(InverseMap[PN]); + Value *V2 = NULL; + // If loop header is also loop exiting block then + // OrigPN is incoming value for B loop header. + if (A_ExitingBlock == ALoop->getHeader()) + V2 = OrigPN; + else + V2 = OrigPN->getIncomingValueForBlock(A_ExitingBlock); + PN->addIncoming(V2, A_ExitingBlock); + } + } else + break; + } + + DT->changeImmediateDominator(B_Header, A_ExitingBlock); + DF->changeImmediateDominator(B_Header, A_ExitingBlock, DT); + + // [*] Update BLoop's exit block. Its new predecessor is BLoop's exit + // block. Remove incoming PHINode values from ALoop's exiting block. + // Add new incoming values from BLoop's incoming exiting value. + // Update BLoop exit block's dominator info.. + BasicBlock *B_ExitingBlock = cast(ValueMap[A_ExitingBlock]); + for (BasicBlock::iterator BI = B_ExitBlock->begin(), BE = B_ExitBlock->end(); + BI != BE; ++BI) { + if (PHINode *PN = dyn_cast(BI)) { + PN->addIncoming(ValueMap[PN->getIncomingValueForBlock(A_ExitingBlock)], + B_ExitingBlock); + PN->removeIncomingValue(A_ExitingBlock); + } else + break; + } + + DT->changeImmediateDominator(B_ExitBlock, B_ExitingBlock); + DF->changeImmediateDominator(B_ExitBlock, B_ExitingBlock, DT); + + //[*] Split ALoop's exit edge. This creates a new block which + // serves two purposes. First one is to hold PHINode defnitions + // to ensure that ALoop's LCSSA form. Second use it to act + // as a preheader for BLoop. + BasicBlock *A_ExitBlock = SplitEdge(A_ExitingBlock, B_Header, this); + + //[*] Preserve ALoop's LCSSA form. Create new forwarding PHINodes + // in A_ExitBlock to redefine outgoing PHI definitions from ALoop. + for(BasicBlock::iterator BI = B_Header->begin(), BE = B_Header->end(); + BI != BE; ++BI) { + if (PHINode *PN = dyn_cast(BI)) { + Value *V1 = PN->getIncomingValueForBlock(A_ExitBlock); + PHINode *newPHI = PHINode::Create(PN->getType(), PN->getName()); + newPHI->addIncoming(V1, A_ExitingBlock); + A_ExitBlock->getInstList().push_front(newPHI); + PN->removeIncomingValue(A_ExitBlock); + PN->addIncoming(newPHI, A_ExitBlock); + } else + break; + } + + //[*] Eliminate split condition's inactive branch from ALoop. + BasicBlock *A_SplitCondBlock = SplitCondition->getParent(); + BranchInst *A_BR = cast(A_SplitCondBlock->getTerminator()); + BasicBlock *A_InactiveBranch = NULL; + BasicBlock *A_ActiveBranch = NULL; + A_ActiveBranch = A_BR->getSuccessor(0); + A_InactiveBranch = A_BR->getSuccessor(1); + A_BR->setUnconditionalDest(A_ActiveBranch); + removeBlocks(A_InactiveBranch, L, A_ActiveBranch); + + //[*] Eliminate split condition's inactive branch in from BLoop. + BasicBlock *B_SplitCondBlock = cast(ValueMap[A_SplitCondBlock]); + BranchInst *B_BR = cast(B_SplitCondBlock->getTerminator()); + BasicBlock *B_InactiveBranch = NULL; + BasicBlock *B_ActiveBranch = NULL; + B_ActiveBranch = B_BR->getSuccessor(1); + B_InactiveBranch = B_BR->getSuccessor(0); + B_BR->setUnconditionalDest(B_ActiveBranch); + removeBlocks(B_InactiveBranch, BLoop, B_ActiveBranch); + + BasicBlock *A_Header = ALoop->getHeader(); + if (A_ExitingBlock == A_Header) + return true; + + //[*] Move exit condition into split condition block to avoid + // executing dead loop iteration. + ICmpInst *B_ExitCondition = cast(ValueMap[ExitCondition]); + Instruction *B_IndVarIncrement = cast(ValueMap[IVIncrement]); + ICmpInst *B_SplitCondition = cast(ValueMap[SplitCondition]); + + moveExitCondition(A_SplitCondBlock, A_ActiveBranch, A_ExitBlock, ExitCondition, + cast(SplitCondition), IndVar, IVIncrement, + ALoop, EVOpNum); + + moveExitCondition(B_SplitCondBlock, B_ActiveBranch, + B_ExitBlock, B_ExitCondition, + B_SplitCondition, B_IndVar, B_IndVarIncrement, + BLoop, EVOpNum); + + NumIndexSplit++; + return true; +} + +/// cleanBlock - A block is considered clean if all non terminal instructions +/// are either, PHINodes, IV based. +bool LoopIndexSplit::cleanBlock(BasicBlock *BB) { + Instruction *Terminator = BB->getTerminator(); + for(BasicBlock::iterator BI = BB->begin(), BE = BB->end(); + BI != BE; ++BI) { + Instruction *I = BI; + + if (isa(I) || I == Terminator || I == ExitCondition + || I == SplitCondition || IVBasedValues.count(I) + || isa(I)) + continue; + + if (I->mayHaveSideEffects()) + return false; + + // I is used only inside this block then it is OK. + bool usedOutsideBB = false; + for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); + UI != UE; ++UI) { + Instruction *U = cast(UI); + if (U->getParent() != BB) + usedOutsideBB = true; + } + if (!usedOutsideBB) + continue; + + // Otherwise we have a instruction that may not allow loop spliting. + return false; + } + return true; +} + +/// IVisLT - If Op is comparing IV based value with an loop invariant and +/// IV based value is less than the loop invariant then return the loop +/// invariant. Otherwise return NULL. +Value * LoopIndexSplit::IVisLT(ICmpInst &Op) { + ICmpInst::Predicate P = Op.getPredicate(); + if ((P == ICmpInst::ICMP_SLT || P == ICmpInst::ICMP_ULT) + && IVBasedValues.count(Op.getOperand(0)) + && L->isLoopInvariant(Op.getOperand(1))) + return Op.getOperand(1); + + if ((P == ICmpInst::ICMP_SGT || P == ICmpInst::ICMP_UGT) + && IVBasedValues.count(Op.getOperand(1)) + && L->isLoopInvariant(Op.getOperand(0))) + return Op.getOperand(0); + + return NULL; +} + +/// IVisLE - If Op is comparing IV based value with an loop invariant and +/// IV based value is less than or equal to the loop invariant then +/// return the loop invariant. Otherwise return NULL. +Value * LoopIndexSplit::IVisLE(ICmpInst &Op) { + ICmpInst::Predicate P = Op.getPredicate(); + if ((P == ICmpInst::ICMP_SLE || P == ICmpInst::ICMP_ULE) + && IVBasedValues.count(Op.getOperand(0)) + && L->isLoopInvariant(Op.getOperand(1))) + return Op.getOperand(1); + + if ((P == ICmpInst::ICMP_SGE || P == ICmpInst::ICMP_UGE) + && IVBasedValues.count(Op.getOperand(1)) + && L->isLoopInvariant(Op.getOperand(0))) + return Op.getOperand(0); + + return NULL; +} + +/// IVisGT - If Op is comparing IV based value with an loop invariant and +/// IV based value is greater than the loop invariant then return the loop +/// invariant. Otherwise return NULL. +Value * LoopIndexSplit::IVisGT(ICmpInst &Op) { + ICmpInst::Predicate P = Op.getPredicate(); + if ((P == ICmpInst::ICMP_SGT || P == ICmpInst::ICMP_UGT) + && IVBasedValues.count(Op.getOperand(0)) + && L->isLoopInvariant(Op.getOperand(1))) + return Op.getOperand(1); + + if ((P == ICmpInst::ICMP_SLT || P == ICmpInst::ICMP_ULT) + && IVBasedValues.count(Op.getOperand(1)) + && L->isLoopInvariant(Op.getOperand(0))) + return Op.getOperand(0); + + return NULL; +} + +/// IVisGE - If Op is comparing IV based value with an loop invariant and +/// IV based value is greater than or equal to the loop invariant then +/// return the loop invariant. Otherwise return NULL. +Value * LoopIndexSplit::IVisGE(ICmpInst &Op) { + ICmpInst::Predicate P = Op.getPredicate(); + if ((P == ICmpInst::ICMP_SGE || P == ICmpInst::ICMP_UGE) + && IVBasedValues.count(Op.getOperand(0)) + && L->isLoopInvariant(Op.getOperand(1))) + return Op.getOperand(1); + + if ((P == ICmpInst::ICMP_SLE || P == ICmpInst::ICMP_ULE) + && IVBasedValues.count(Op.getOperand(1)) + && L->isLoopInvariant(Op.getOperand(0))) + return Op.getOperand(0); + + return NULL; +} + diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp new file mode 100644 index 000000000000..a0882301332d --- /dev/null +++ b/lib/Transforms/Scalar/LoopRotation.cpp @@ -0,0 +1,572 @@ +//===- LoopRotation.cpp - Loop Rotation Pass ------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements Loop Rotation Pass. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "loop-rotate" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Function.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/SmallVector.h" +using namespace llvm; + +#define MAX_HEADER_SIZE 16 + +STATISTIC(NumRotated, "Number of loops rotated"); +namespace { + + class VISIBILITY_HIDDEN RenameData { + public: + RenameData(Instruction *O, Value *P, Instruction *H) + : Original(O), PreHeader(P), Header(H) { } + public: + Instruction *Original; // Original instruction + Value *PreHeader; // Original pre-header replacement + Instruction *Header; // New header replacement + }; + + class VISIBILITY_HIDDEN LoopRotate : public LoopPass { + + public: + static char ID; // Pass ID, replacement for typeid + LoopRotate() : LoopPass(&ID) {} + + // Rotate Loop L as many times as possible. Return true if + // loop is rotated at least once. + bool runOnLoop(Loop *L, LPPassManager &LPM); + + // LCSSA form makes instruction renaming easier. + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequiredID(LoopSimplifyID); + AU.addPreservedID(LoopSimplifyID); + AU.addRequiredID(LCSSAID); + AU.addPreservedID(LCSSAID); + AU.addPreserved(); + AU.addPreserved(); + AU.addPreserved(); + AU.addPreserved(); + } + + // Helper functions + + /// Do actual work + bool rotateLoop(Loop *L, LPPassManager &LPM); + + /// Initialize local data + void initialize(); + + /// Make sure all Exit block PHINodes have required incoming values. + /// If incoming value is constant or defined outside the loop then + /// PHINode may not have an entry for original pre-header. + void updateExitBlock(); + + /// Return true if this instruction is used outside original header. + bool usedOutsideOriginalHeader(Instruction *In); + + /// Find Replacement information for instruction. Return NULL if it is + /// not available. + const RenameData *findReplacementData(Instruction *I); + + /// After loop rotation, loop pre-header has multiple sucessors. + /// Insert one forwarding basic block to ensure that loop pre-header + /// has only one successor. + void preserveCanonicalLoopForm(LPPassManager &LPM); + + private: + + Loop *L; + BasicBlock *OrigHeader; + BasicBlock *OrigPreHeader; + BasicBlock *OrigLatch; + BasicBlock *NewHeader; + BasicBlock *Exit; + LPPassManager *LPM_Ptr; + SmallVector LoopHeaderInfo; + }; +} + +char LoopRotate::ID = 0; +static RegisterPass X("loop-rotate", "Rotate Loops"); + +Pass *llvm::createLoopRotatePass() { return new LoopRotate(); } + +/// Rotate Loop L as many times as possible. Return true if +/// loop is rotated at least once. +bool LoopRotate::runOnLoop(Loop *Lp, LPPassManager &LPM) { + + bool RotatedOneLoop = false; + initialize(); + LPM_Ptr = &LPM; + + // One loop can be rotated multiple times. + while (rotateLoop(Lp,LPM)) { + RotatedOneLoop = true; + initialize(); + } + + return RotatedOneLoop; +} + +/// Rotate loop LP. Return true if the loop is rotated. +bool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) { + L = Lp; + + OrigHeader = L->getHeader(); + OrigPreHeader = L->getLoopPreheader(); + OrigLatch = L->getLoopLatch(); + + // If loop has only one block then there is not much to rotate. + if (L->getBlocks().size() == 1) + return false; + + assert(OrigHeader && OrigLatch && OrigPreHeader && + "Loop is not in canonical form"); + + // If loop header is not one of the loop exit block then + // either this loop is already rotated or it is not + // suitable for loop rotation transformations. + if (!L->isLoopExit(OrigHeader)) + return false; + + BranchInst *BI = dyn_cast(OrigHeader->getTerminator()); + if (!BI) + return false; + assert(BI->isConditional() && "Branch Instruction is not conditional"); + + // Updating PHInodes in loops with multiple exits adds complexity. + // Keep it simple, and restrict loop rotation to loops with one exit only. + // In future, lift this restriction and support for multiple exits if + // required. + SmallVector ExitBlocks; + L->getExitBlocks(ExitBlocks); + if (ExitBlocks.size() > 1) + return false; + + // Check size of original header and reject + // loop if it is very big. + unsigned Size = 0; + + // FIXME: Use common api to estimate size. + for (BasicBlock::const_iterator OI = OrigHeader->begin(), + OE = OrigHeader->end(); OI != OE; ++OI) { + if (isa(OI)) + continue; // PHI nodes don't count. + if (isa(OI)) + continue; // Debug intrinsics don't count as size. + Size++; + } + + if (Size > MAX_HEADER_SIZE) + return false; + + // Now, this loop is suitable for rotation. + + // Find new Loop header. NewHeader is a Header's one and only successor + // that is inside loop. Header's other successor is outside the + // loop. Otherwise loop is not suitable for rotation. + Exit = BI->getSuccessor(0); + NewHeader = BI->getSuccessor(1); + if (L->contains(Exit)) + std::swap(Exit, NewHeader); + assert(NewHeader && "Unable to determine new loop header"); + assert(L->contains(NewHeader) && !L->contains(Exit) && + "Unable to determine loop header and exit blocks"); + + // This code assumes that new header has exactly one predecessor. Remove any + // single entry PHI nodes in it. + assert(NewHeader->getSinglePredecessor() && + "New header doesn't have one pred!"); + FoldSingleEntryPHINodes(NewHeader); + + // Copy PHI nodes and other instructions from original header + // into original pre-header. Unlike original header, original pre-header is + // not a member of loop. + // + // New loop header is one and only successor of original header that + // is inside the loop. All other original header successors are outside + // the loop. Copy PHI Nodes from original header into new loop header. + // Add second incoming value, from original loop pre-header into these phi + // nodes. If a value defined in original header is used outside original + // header then new loop header will need new phi nodes with two incoming + // values, one definition from original header and second definition is + // from original loop pre-header. + + // Remove terminator from Original pre-header. Original pre-header will + // receive a clone of original header terminator as a new terminator. + OrigPreHeader->getInstList().pop_back(); + BasicBlock::iterator I = OrigHeader->begin(), E = OrigHeader->end(); + PHINode *PN = 0; + for (; (PN = dyn_cast(I)); ++I) { + // PHI nodes are not copied into original pre-header. Instead their values + // are directly propagated. + Value *NPV = PN->getIncomingValueForBlock(OrigPreHeader); + + // Create new PHI node with two incoming values for NewHeader. + // One incoming value is from OrigLatch (through OrigHeader) and + // second incoming value is from original pre-header. + PHINode *NH = PHINode::Create(PN->getType(), PN->getName(), + NewHeader->begin()); + NH->addIncoming(PN->getIncomingValueForBlock(OrigLatch), OrigHeader); + NH->addIncoming(NPV, OrigPreHeader); + + // "In" can be replaced by NH at various places. + LoopHeaderInfo.push_back(RenameData(PN, NPV, NH)); + } + + // Now, handle non-phi instructions. + for (; I != E; ++I) { + Instruction *In = I; + assert(!isa(In) && "PHINode is not expected here"); + + // This is not a PHI instruction. Insert its clone into original pre-header. + // If this instruction is using a value from same basic block then + // update it to use value from cloned instruction. + Instruction *C = In->clone(); + C->setName(In->getName()); + OrigPreHeader->getInstList().push_back(C); + + for (unsigned opi = 0, e = In->getNumOperands(); opi != e; ++opi) { + Instruction *OpInsn = dyn_cast(In->getOperand(opi)); + if (!OpInsn) continue; // Ignore non-instruction values. + if (const RenameData *D = findReplacementData(OpInsn)) + C->setOperand(opi, D->PreHeader); + } + + // If this instruction is used outside this basic block then + // create new PHINode for this instruction. + Instruction *NewHeaderReplacement = NULL; + if (usedOutsideOriginalHeader(In)) { + PHINode *PN = PHINode::Create(In->getType(), In->getName(), + NewHeader->begin()); + PN->addIncoming(In, OrigHeader); + PN->addIncoming(C, OrigPreHeader); + NewHeaderReplacement = PN; + } + LoopHeaderInfo.push_back(RenameData(In, C, NewHeaderReplacement)); + } + + // Rename uses of original header instructions to reflect their new + // definitions (either from original pre-header node or from newly created + // new header PHINodes. + // + // Original header instructions are used in + // 1) Original header: + // + // If instruction is used in non-phi instructions then it is using + // defintion from original heder iteself. Do not replace this use + // with definition from new header or original pre-header. + // + // If instruction is used in phi node then it is an incoming + // value. Rename its use to reflect new definition from new-preheader + // or new header. + // + // 2) Inside loop but not in original header + // + // Replace this use to reflect definition from new header. + for (unsigned LHI = 0, LHI_E = LoopHeaderInfo.size(); LHI != LHI_E; ++LHI) { + const RenameData &ILoopHeaderInfo = LoopHeaderInfo[LHI]; + + if (!ILoopHeaderInfo.Header) + continue; + + Instruction *OldPhi = ILoopHeaderInfo.Original; + Instruction *NewPhi = ILoopHeaderInfo.Header; + + // Before replacing uses, collect them first, so that iterator is + // not invalidated. + SmallVector AllUses; + for (Value::use_iterator UI = OldPhi->use_begin(), UE = OldPhi->use_end(); + UI != UE; ++UI) + AllUses.push_back(cast(UI)); + + for (SmallVector::iterator UI = AllUses.begin(), + UE = AllUses.end(); UI != UE; ++UI) { + Instruction *U = *UI; + BasicBlock *Parent = U->getParent(); + + // Used inside original header + if (Parent == OrigHeader) { + // Do not rename uses inside original header non-phi instructions. + PHINode *PU = dyn_cast(U); + if (!PU) + continue; + + // Do not rename uses inside original header phi nodes, if the + // incoming value is for new header. + if (PU->getBasicBlockIndex(NewHeader) != -1 + && PU->getIncomingValueForBlock(NewHeader) == U) + continue; + + U->replaceUsesOfWith(OldPhi, NewPhi); + continue; + } + + // Used inside loop, but not in original header. + if (L->contains(U->getParent())) { + if (U != NewPhi) + U->replaceUsesOfWith(OldPhi, NewPhi); + continue; + } + + // Used inside Exit Block. Since we are in LCSSA form, U must be PHINode. + if (U->getParent() == Exit) { + assert(isa(U) && "Use in Exit Block that is not PHINode"); + + PHINode *UPhi = cast(U); + // UPhi already has one incoming argument from original header. + // Add second incoming argument from new Pre header. + UPhi->addIncoming(ILoopHeaderInfo.PreHeader, OrigPreHeader); + } else { + // Used outside Exit block. Create a new PHI node from exit block + // to receive value from ne new header ane pre header. + PHINode *PN = PHINode::Create(U->getType(), U->getName(), + Exit->begin()); + PN->addIncoming(ILoopHeaderInfo.PreHeader, OrigPreHeader); + PN->addIncoming(OldPhi, OrigHeader); + U->replaceUsesOfWith(OldPhi, PN); + } + } + } + + /// Make sure all Exit block PHINodes have required incoming values. + updateExitBlock(); + + // Update CFG + + // Removing incoming branch from loop preheader to original header. + // Now original header is inside the loop. + for (BasicBlock::iterator I = OrigHeader->begin(), E = OrigHeader->end(); + I != E; ++I) + if (PHINode *PN = dyn_cast(I)) + PN->removeIncomingValue(OrigPreHeader); + + // Make NewHeader as the new header for the loop. + L->moveToHeader(NewHeader); + + preserveCanonicalLoopForm(LPM); + + NumRotated++; + return true; +} + +/// Make sure all Exit block PHINodes have required incoming values. +/// If incoming value is constant or defined outside the loop then +/// PHINode may not have an entry for original pre-header. +void LoopRotate::updateExitBlock() { + + for (BasicBlock::iterator I = Exit->begin(), E = Exit->end(); + I != E; ++I) { + + PHINode *PN = dyn_cast(I); + if (!PN) + break; + + // There is already one incoming value from original pre-header block. + if (PN->getBasicBlockIndex(OrigPreHeader) != -1) + continue; + + const RenameData *ILoopHeaderInfo; + Value *V = PN->getIncomingValueForBlock(OrigHeader); + if (isa(V) && + (ILoopHeaderInfo = findReplacementData(cast(V)))) { + assert(ILoopHeaderInfo->PreHeader && "Missing New Preheader Instruction"); + PN->addIncoming(ILoopHeaderInfo->PreHeader, OrigPreHeader); + } else { + PN->addIncoming(V, OrigPreHeader); + } + } +} + +/// Initialize local data +void LoopRotate::initialize() { + L = NULL; + OrigHeader = NULL; + OrigPreHeader = NULL; + NewHeader = NULL; + Exit = NULL; + + LoopHeaderInfo.clear(); +} + +/// Return true if this instruction is used by any instructions in the loop that +/// aren't in original header. +bool LoopRotate::usedOutsideOriginalHeader(Instruction *In) { + for (Value::use_iterator UI = In->use_begin(), UE = In->use_end(); + UI != UE; ++UI) { + BasicBlock *UserBB = cast(UI)->getParent(); + if (UserBB != OrigHeader && L->contains(UserBB)) + return true; + } + + return false; +} + +/// Find Replacement information for instruction. Return NULL if it is +/// not available. +const RenameData *LoopRotate::findReplacementData(Instruction *In) { + + // Since LoopHeaderInfo is small, linear walk is OK. + for (unsigned LHI = 0, LHI_E = LoopHeaderInfo.size(); LHI != LHI_E; ++LHI) { + const RenameData &ILoopHeaderInfo = LoopHeaderInfo[LHI]; + if (ILoopHeaderInfo.Original == In) + return &ILoopHeaderInfo; + } + return NULL; +} + +/// After loop rotation, loop pre-header has multiple sucessors. +/// Insert one forwarding basic block to ensure that loop pre-header +/// has only one successor. +void LoopRotate::preserveCanonicalLoopForm(LPPassManager &LPM) { + + // Right now original pre-header has two successors, new header and + // exit block. Insert new block between original pre-header and + // new header such that loop's new pre-header has only one successor. + BasicBlock *NewPreHeader = BasicBlock::Create("bb.nph", + OrigHeader->getParent(), + NewHeader); + LoopInfo &LI = LPM.getAnalysis(); + if (Loop *PL = LI.getLoopFor(OrigPreHeader)) + PL->addBasicBlockToLoop(NewPreHeader, LI.getBase()); + BranchInst::Create(NewHeader, NewPreHeader); + + BranchInst *OrigPH_BI = cast(OrigPreHeader->getTerminator()); + if (OrigPH_BI->getSuccessor(0) == NewHeader) + OrigPH_BI->setSuccessor(0, NewPreHeader); + else { + assert(OrigPH_BI->getSuccessor(1) == NewHeader && + "Unexpected original pre-header terminator"); + OrigPH_BI->setSuccessor(1, NewPreHeader); + } + + for (BasicBlock::iterator I = NewHeader->begin(), E = NewHeader->end(); + I != E; ++I) { + PHINode *PN = dyn_cast(I); + if (!PN) + break; + + int index = PN->getBasicBlockIndex(OrigPreHeader); + assert(index != -1 && "Expected incoming value from Original PreHeader"); + PN->setIncomingBlock(index, NewPreHeader); + assert(PN->getBasicBlockIndex(OrigPreHeader) == -1 && + "Expected only one incoming value from Original PreHeader"); + } + + if (DominatorTree *DT = getAnalysisIfAvailable()) { + DT->addNewBlock(NewPreHeader, OrigPreHeader); + DT->changeImmediateDominator(L->getHeader(), NewPreHeader); + DT->changeImmediateDominator(Exit, OrigPreHeader); + for (Loop::block_iterator BI = L->block_begin(), BE = L->block_end(); + BI != BE; ++BI) { + BasicBlock *B = *BI; + if (L->getHeader() != B) { + DomTreeNode *Node = DT->getNode(B); + if (Node && Node->getBlock() == OrigHeader) + DT->changeImmediateDominator(*BI, L->getHeader()); + } + } + DT->changeImmediateDominator(OrigHeader, OrigLatch); + } + + if (DominanceFrontier *DF = getAnalysisIfAvailable()) { + // New Preheader's dominance frontier is Exit block. + DominanceFrontier::DomSetType NewPHSet; + NewPHSet.insert(Exit); + DF->addBasicBlock(NewPreHeader, NewPHSet); + + // New Header's dominance frontier now includes itself and Exit block + DominanceFrontier::iterator HeadI = DF->find(L->getHeader()); + if (HeadI != DF->end()) { + DominanceFrontier::DomSetType & HeaderSet = HeadI->second; + HeaderSet.clear(); + HeaderSet.insert(L->getHeader()); + HeaderSet.insert(Exit); + } else { + DominanceFrontier::DomSetType HeaderSet; + HeaderSet.insert(L->getHeader()); + HeaderSet.insert(Exit); + DF->addBasicBlock(L->getHeader(), HeaderSet); + } + + // Original header (new Loop Latch)'s dominance frontier is Exit. + DominanceFrontier::iterator LatchI = DF->find(L->getLoopLatch()); + if (LatchI != DF->end()) { + DominanceFrontier::DomSetType &LatchSet = LatchI->second; + LatchSet = LatchI->second; + LatchSet.clear(); + LatchSet.insert(Exit); + } else { + DominanceFrontier::DomSetType LatchSet; + LatchSet.insert(Exit); + DF->addBasicBlock(L->getHeader(), LatchSet); + } + + // If a loop block dominates new loop latch then its frontier is + // new header and Exit. + BasicBlock *NewLatch = L->getLoopLatch(); + DominatorTree *DT = getAnalysisIfAvailable(); + for (Loop::block_iterator BI = L->block_begin(), BE = L->block_end(); + BI != BE; ++BI) { + BasicBlock *B = *BI; + if (DT->dominates(B, NewLatch)) { + DominanceFrontier::iterator BDFI = DF->find(B); + if (BDFI != DF->end()) { + DominanceFrontier::DomSetType &BSet = BDFI->second; + BSet = BDFI->second; + BSet.clear(); + BSet.insert(L->getHeader()); + BSet.insert(Exit); + } else { + DominanceFrontier::DomSetType BSet; + BSet.insert(L->getHeader()); + BSet.insert(Exit); + DF->addBasicBlock(B, BSet); + } + } + } + } + + // Preserve canonical loop form, which means Exit block should + // have only one predecessor. + BasicBlock *NExit = SplitEdge(L->getLoopLatch(), Exit, this); + + // Preserve LCSSA. + BasicBlock::iterator I = Exit->begin(), E = Exit->end(); + PHINode *PN = NULL; + for (; (PN = dyn_cast(I)); ++I) { + unsigned N = PN->getNumIncomingValues(); + for (unsigned index = 0; index < N; ++index) + if (PN->getIncomingBlock(index) == NExit) { + PHINode *NewPN = PHINode::Create(PN->getType(), PN->getName(), + NExit->begin()); + NewPN->addIncoming(PN->getIncomingValue(index), L->getLoopLatch()); + PN->setIncomingValue(index, NewPN); + PN->setIncomingBlock(index, NExit); + break; + } + } + + assert(NewHeader && L->getHeader() == NewHeader && + "Invalid loop header after loop rotation"); + assert(NewPreHeader && L->getLoopPreheader() == NewPreHeader && + "Invalid loop preheader after loop rotation"); + assert(L->getLoopLatch() && + "Invalid loop latch after loop rotation"); +} diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp new file mode 100644 index 000000000000..92270b5b6473 --- /dev/null +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -0,0 +1,2605 @@ +//===- LoopStrengthReduce.cpp - Strength Reduce IVs in Loops --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This transformation analyzes and transforms the induction variables (and +// computations derived from them) into forms suitable for efficient execution +// on the target. +// +// This pass performs a strength reduction on array references inside loops that +// have as one or more of their components the loop induction variable, it +// rewrites expressions to take advantage of scaled-index addressing modes +// available on the target, and it performs a variety of other optimizations +// related to loop induction variables. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "loop-reduce" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Constants.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Type.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/IVUsers.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/ScalarEvolutionExpander.h" +#include "llvm/Transforms/Utils/AddrModeMatcher.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ValueHandle.h" +#include "llvm/Target/TargetLowering.h" +#include +using namespace llvm; + +STATISTIC(NumReduced , "Number of IV uses strength reduced"); +STATISTIC(NumInserted, "Number of PHIs inserted"); +STATISTIC(NumVariable, "Number of PHIs with variable strides"); +STATISTIC(NumEliminated, "Number of strides eliminated"); +STATISTIC(NumShadow, "Number of Shadow IVs optimized"); +STATISTIC(NumImmSunk, "Number of common expr immediates sunk into uses"); +STATISTIC(NumLoopCond, "Number of loop terminating conds optimized"); + +static cl::opt EnableFullLSRMode("enable-full-lsr", + cl::init(false), + cl::Hidden); + +namespace { + + struct BasedUser; + + /// IVInfo - This structure keeps track of one IV expression inserted during + /// StrengthReduceStridedIVUsers. It contains the stride, the common base, as + /// well as the PHI node and increment value created for rewrite. + struct VISIBILITY_HIDDEN IVExpr { + SCEVHandle Stride; + SCEVHandle Base; + PHINode *PHI; + + IVExpr(const SCEVHandle &stride, const SCEVHandle &base, PHINode *phi) + : Stride(stride), Base(base), PHI(phi) {} + }; + + /// IVsOfOneStride - This structure keeps track of all IV expression inserted + /// during StrengthReduceStridedIVUsers for a particular stride of the IV. + struct VISIBILITY_HIDDEN IVsOfOneStride { + std::vector IVs; + + void addIV(const SCEVHandle &Stride, const SCEVHandle &Base, PHINode *PHI) { + IVs.push_back(IVExpr(Stride, Base, PHI)); + } + }; + + class VISIBILITY_HIDDEN LoopStrengthReduce : public LoopPass { + IVUsers *IU; + LoopInfo *LI; + DominatorTree *DT; + ScalarEvolution *SE; + bool Changed; + + /// IVsByStride - Keep track of all IVs that have been inserted for a + /// particular stride. + std::map IVsByStride; + + /// StrideNoReuse - Keep track of all the strides whose ivs cannot be + /// reused (nor should they be rewritten to reuse other strides). + SmallSet StrideNoReuse; + + /// DeadInsts - Keep track of instructions we may have made dead, so that + /// we can remove them after we are done working. + SmallVector DeadInsts; + + /// TLI - Keep a pointer of a TargetLowering to consult for determining + /// transformation profitability. + const TargetLowering *TLI; + + public: + static char ID; // Pass ID, replacement for typeid + explicit LoopStrengthReduce(const TargetLowering *tli = NULL) : + LoopPass(&ID), TLI(tli) { + } + + bool runOnLoop(Loop *L, LPPassManager &LPM); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + // We split critical edges, so we change the CFG. However, we do update + // many analyses if they are around. + AU.addPreservedID(LoopSimplifyID); + AU.addPreserved(); + AU.addPreserved(); + AU.addPreserved(); + + AU.addRequiredID(LoopSimplifyID); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); + } + + private: + ICmpInst *ChangeCompareStride(Loop *L, ICmpInst *Cond, + IVStrideUse* &CondUse, + const SCEVHandle* &CondStride); + + void OptimizeIndvars(Loop *L); + void OptimizeLoopCountIV(Loop *L); + void OptimizeLoopTermCond(Loop *L); + + /// OptimizeShadowIV - If IV is used in a int-to-float cast + /// inside the loop then try to eliminate the cast opeation. + void OptimizeShadowIV(Loop *L); + + /// OptimizeSMax - Rewrite the loop's terminating condition + /// if it uses an smax computation. + ICmpInst *OptimizeSMax(Loop *L, ICmpInst *Cond, + IVStrideUse* &CondUse); + + bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse, + const SCEVHandle *&CondStride); + bool RequiresTypeConversion(const Type *Ty, const Type *NewTy); + SCEVHandle CheckForIVReuse(bool, bool, bool, const SCEVHandle&, + IVExpr&, const Type*, + const std::vector& UsersToProcess); + bool ValidScale(bool, int64_t, + const std::vector& UsersToProcess); + bool ValidOffset(bool, int64_t, int64_t, + const std::vector& UsersToProcess); + SCEVHandle CollectIVUsers(const SCEVHandle &Stride, + IVUsersOfOneStride &Uses, + Loop *L, + bool &AllUsesAreAddresses, + bool &AllUsesAreOutsideLoop, + std::vector &UsersToProcess); + bool ShouldUseFullStrengthReductionMode( + const std::vector &UsersToProcess, + const Loop *L, + bool AllUsesAreAddresses, + SCEVHandle Stride); + void PrepareToStrengthReduceFully( + std::vector &UsersToProcess, + SCEVHandle Stride, + SCEVHandle CommonExprs, + const Loop *L, + SCEVExpander &PreheaderRewriter); + void PrepareToStrengthReduceFromSmallerStride( + std::vector &UsersToProcess, + Value *CommonBaseV, + const IVExpr &ReuseIV, + Instruction *PreInsertPt); + void PrepareToStrengthReduceWithNewPhi( + std::vector &UsersToProcess, + SCEVHandle Stride, + SCEVHandle CommonExprs, + Value *CommonBaseV, + Instruction *IVIncInsertPt, + const Loop *L, + SCEVExpander &PreheaderRewriter); + void StrengthReduceStridedIVUsers(const SCEVHandle &Stride, + IVUsersOfOneStride &Uses, + Loop *L); + void DeleteTriviallyDeadInstructions(); + }; +} + +char LoopStrengthReduce::ID = 0; +static RegisterPass +X("loop-reduce", "Loop Strength Reduction"); + +Pass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) { + return new LoopStrengthReduce(TLI); +} + +/// DeleteTriviallyDeadInstructions - If any of the instructions is the +/// specified set are trivially dead, delete them and see if this makes any of +/// their operands subsequently dead. +void LoopStrengthReduce::DeleteTriviallyDeadInstructions() { + if (DeadInsts.empty()) return; + + while (!DeadInsts.empty()) { + Instruction *I = dyn_cast_or_null(DeadInsts.back()); + DeadInsts.pop_back(); + + if (I == 0 || !isInstructionTriviallyDead(I)) + continue; + + for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI) { + if (Instruction *U = dyn_cast(*OI)) { + *OI = 0; + if (U->use_empty()) + DeadInsts.push_back(U); + } + } + + I->eraseFromParent(); + Changed = true; + } +} + +/// containsAddRecFromDifferentLoop - Determine whether expression S involves a +/// subexpression that is an AddRec from a loop other than L. An outer loop +/// of L is OK, but not an inner loop nor a disjoint loop. +static bool containsAddRecFromDifferentLoop(SCEVHandle S, Loop *L) { + // This is very common, put it first. + if (isa(S)) + return false; + if (const SCEVCommutativeExpr *AE = dyn_cast(S)) { + for (unsigned int i=0; i< AE->getNumOperands(); i++) + if (containsAddRecFromDifferentLoop(AE->getOperand(i), L)) + return true; + return false; + } + if (const SCEVAddRecExpr *AE = dyn_cast(S)) { + if (const Loop *newLoop = AE->getLoop()) { + if (newLoop == L) + return false; + // if newLoop is an outer loop of L, this is OK. + if (!LoopInfoBase::isNotAlreadyContainedIn(L, newLoop)) + return false; + } + return true; + } + if (const SCEVUDivExpr *DE = dyn_cast(S)) + return containsAddRecFromDifferentLoop(DE->getLHS(), L) || + containsAddRecFromDifferentLoop(DE->getRHS(), L); +#if 0 + // SCEVSDivExpr has been backed out temporarily, but will be back; we'll + // need this when it is. + if (const SCEVSDivExpr *DE = dyn_cast(S)) + return containsAddRecFromDifferentLoop(DE->getLHS(), L) || + containsAddRecFromDifferentLoop(DE->getRHS(), L); +#endif + if (const SCEVCastExpr *CE = dyn_cast(S)) + return containsAddRecFromDifferentLoop(CE->getOperand(), L); + return false; +} + +/// isAddressUse - Returns true if the specified instruction is using the +/// specified value as an address. +static bool isAddressUse(Instruction *Inst, Value *OperandVal) { + bool isAddress = isa(Inst); + if (StoreInst *SI = dyn_cast(Inst)) { + if (SI->getOperand(1) == OperandVal) + isAddress = true; + } else if (IntrinsicInst *II = dyn_cast(Inst)) { + // Addressing modes can also be folded into prefetches and a variety + // of intrinsics. + switch (II->getIntrinsicID()) { + default: break; + case Intrinsic::prefetch: + case Intrinsic::x86_sse2_loadu_dq: + case Intrinsic::x86_sse2_loadu_pd: + case Intrinsic::x86_sse_loadu_ps: + case Intrinsic::x86_sse_storeu_ps: + case Intrinsic::x86_sse2_storeu_pd: + case Intrinsic::x86_sse2_storeu_dq: + case Intrinsic::x86_sse2_storel_dq: + if (II->getOperand(1) == OperandVal) + isAddress = true; + break; + } + } + return isAddress; +} + +/// getAccessType - Return the type of the memory being accessed. +static const Type *getAccessType(const Instruction *Inst) { + const Type *AccessTy = Inst->getType(); + if (const StoreInst *SI = dyn_cast(Inst)) + AccessTy = SI->getOperand(0)->getType(); + else if (const IntrinsicInst *II = dyn_cast(Inst)) { + // Addressing modes can also be folded into prefetches and a variety + // of intrinsics. + switch (II->getIntrinsicID()) { + default: break; + case Intrinsic::x86_sse_storeu_ps: + case Intrinsic::x86_sse2_storeu_pd: + case Intrinsic::x86_sse2_storeu_dq: + case Intrinsic::x86_sse2_storel_dq: + AccessTy = II->getOperand(1)->getType(); + break; + } + } + return AccessTy; +} + +namespace { + /// BasedUser - For a particular base value, keep information about how we've + /// partitioned the expression so far. + struct BasedUser { + /// SE - The current ScalarEvolution object. + ScalarEvolution *SE; + + /// Base - The Base value for the PHI node that needs to be inserted for + /// this use. As the use is processed, information gets moved from this + /// field to the Imm field (below). BasedUser values are sorted by this + /// field. + SCEVHandle Base; + + /// Inst - The instruction using the induction variable. + Instruction *Inst; + + /// OperandValToReplace - The operand value of Inst to replace with the + /// EmittedBase. + Value *OperandValToReplace; + + /// isSigned - The stride (and thus also the Base) of this use may be in + /// a narrower type than the use itself (OperandValToReplace->getType()). + /// When this is the case, the isSigned field indicates whether the + /// IV expression should be signed-extended instead of zero-extended to + /// fit the type of the use. + bool isSigned; + + /// Imm - The immediate value that should be added to the base immediately + /// before Inst, because it will be folded into the imm field of the + /// instruction. This is also sometimes used for loop-variant values that + /// must be added inside the loop. + SCEVHandle Imm; + + /// Phi - The induction variable that performs the striding that + /// should be used for this user. + PHINode *Phi; + + // isUseOfPostIncrementedValue - True if this should use the + // post-incremented version of this IV, not the preincremented version. + // This can only be set in special cases, such as the terminating setcc + // instruction for a loop and uses outside the loop that are dominated by + // the loop. + bool isUseOfPostIncrementedValue; + + BasedUser(IVStrideUse &IVSU, ScalarEvolution *se) + : SE(se), Base(IVSU.getOffset()), Inst(IVSU.getUser()), + OperandValToReplace(IVSU.getOperandValToReplace()), + isSigned(IVSU.isSigned()), + Imm(SE->getIntegerSCEV(0, Base->getType())), + isUseOfPostIncrementedValue(IVSU.isUseOfPostIncrementedValue()) {} + + // Once we rewrite the code to insert the new IVs we want, update the + // operands of Inst to use the new expression 'NewBase', with 'Imm' added + // to it. + void RewriteInstructionToUseNewBase(const SCEVHandle &NewBase, + Instruction *InsertPt, + SCEVExpander &Rewriter, Loop *L, Pass *P, + LoopInfo &LI, + SmallVectorImpl &DeadInsts); + + Value *InsertCodeForBaseAtPosition(const SCEVHandle &NewBase, + const Type *Ty, + SCEVExpander &Rewriter, + Instruction *IP, Loop *L, + LoopInfo &LI); + void dump() const; + }; +} + +void BasedUser::dump() const { + cerr << " Base=" << *Base; + cerr << " Imm=" << *Imm; + cerr << " Inst: " << *Inst; +} + +Value *BasedUser::InsertCodeForBaseAtPosition(const SCEVHandle &NewBase, + const Type *Ty, + SCEVExpander &Rewriter, + Instruction *IP, Loop *L, + LoopInfo &LI) { + // Figure out where we *really* want to insert this code. In particular, if + // the user is inside of a loop that is nested inside of L, we really don't + // want to insert this expression before the user, we'd rather pull it out as + // many loops as possible. + Instruction *BaseInsertPt = IP; + + // Figure out the most-nested loop that IP is in. + Loop *InsertLoop = LI.getLoopFor(IP->getParent()); + + // If InsertLoop is not L, and InsertLoop is nested inside of L, figure out + // the preheader of the outer-most loop where NewBase is not loop invariant. + if (L->contains(IP->getParent())) + while (InsertLoop && NewBase->isLoopInvariant(InsertLoop)) { + BaseInsertPt = InsertLoop->getLoopPreheader()->getTerminator(); + InsertLoop = InsertLoop->getParentLoop(); + } + + Value *Base = Rewriter.expandCodeFor(NewBase, 0, BaseInsertPt); + + SCEVHandle NewValSCEV = SE->getUnknown(Base); + + // If there is no immediate value, skip the next part. + if (!Imm->isZero()) { + // If we are inserting the base and imm values in the same block, make sure + // to adjust the IP position if insertion reused a result. + if (IP == BaseInsertPt) + IP = Rewriter.getInsertionPoint(); + + // Always emit the immediate (if non-zero) into the same block as the user. + NewValSCEV = SE->getAddExpr(NewValSCEV, Imm); + } + + if (isSigned) + NewValSCEV = SE->getTruncateOrSignExtend(NewValSCEV, Ty); + else + NewValSCEV = SE->getTruncateOrZeroExtend(NewValSCEV, Ty); + + return Rewriter.expandCodeFor(NewValSCEV, Ty, IP); +} + + +// Once we rewrite the code to insert the new IVs we want, update the +// operands of Inst to use the new expression 'NewBase', with 'Imm' added +// to it. NewBasePt is the last instruction which contributes to the +// value of NewBase in the case that it's a diffferent instruction from +// the PHI that NewBase is computed from, or null otherwise. +// +void BasedUser::RewriteInstructionToUseNewBase(const SCEVHandle &NewBase, + Instruction *NewBasePt, + SCEVExpander &Rewriter, Loop *L, Pass *P, + LoopInfo &LI, + SmallVectorImpl &DeadInsts) { + if (!isa(Inst)) { + // By default, insert code at the user instruction. + BasicBlock::iterator InsertPt = Inst; + + // However, if the Operand is itself an instruction, the (potentially + // complex) inserted code may be shared by many users. Because of this, we + // want to emit code for the computation of the operand right before its old + // computation. This is usually safe, because we obviously used to use the + // computation when it was computed in its current block. However, in some + // cases (e.g. use of a post-incremented induction variable) the NewBase + // value will be pinned to live somewhere after the original computation. + // In this case, we have to back off. + // + // If this is a use outside the loop (which means after, since it is based + // on a loop indvar) we use the post-incremented value, so that we don't + // artificially make the preinc value live out the bottom of the loop. + if (!isUseOfPostIncrementedValue && L->contains(Inst->getParent())) { + if (NewBasePt && isa(OperandValToReplace)) { + InsertPt = NewBasePt; + ++InsertPt; + } else if (Instruction *OpInst + = dyn_cast(OperandValToReplace)) { + InsertPt = OpInst; + while (isa(InsertPt)) ++InsertPt; + } + } + Value *NewVal = InsertCodeForBaseAtPosition(NewBase, + OperandValToReplace->getType(), + Rewriter, InsertPt, L, LI); + // Replace the use of the operand Value with the new Phi we just created. + Inst->replaceUsesOfWith(OperandValToReplace, NewVal); + + DOUT << " Replacing with "; + DEBUG(WriteAsOperand(*DOUT, NewVal, /*PrintType=*/false)); + DOUT << ", which has value " << *NewBase << " plus IMM " << *Imm << "\n"; + return; + } + + // PHI nodes are more complex. We have to insert one copy of the NewBase+Imm + // expression into each operand block that uses it. Note that PHI nodes can + // have multiple entries for the same predecessor. We use a map to make sure + // that a PHI node only has a single Value* for each predecessor (which also + // prevents us from inserting duplicate code in some blocks). + DenseMap InsertedCode; + PHINode *PN = cast(Inst); + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + if (PN->getIncomingValue(i) == OperandValToReplace) { + // If the original expression is outside the loop, put the replacement + // code in the same place as the original expression, + // which need not be an immediate predecessor of this PHI. This way we + // need only one copy of it even if it is referenced multiple times in + // the PHI. We don't do this when the original expression is inside the + // loop because multiple copies sometimes do useful sinking of code in + // that case(?). + Instruction *OldLoc = dyn_cast(OperandValToReplace); + if (L->contains(OldLoc->getParent())) { + // If this is a critical edge, split the edge so that we do not insert + // the code on all predecessor/successor paths. We do this unless this + // is the canonical backedge for this loop, as this can make some + // inserted code be in an illegal position. + BasicBlock *PHIPred = PN->getIncomingBlock(i); + if (e != 1 && PHIPred->getTerminator()->getNumSuccessors() > 1 && + (PN->getParent() != L->getHeader() || !L->contains(PHIPred))) { + + // First step, split the critical edge. + SplitCriticalEdge(PHIPred, PN->getParent(), P, false); + + // Next step: move the basic block. In particular, if the PHI node + // is outside of the loop, and PredTI is in the loop, we want to + // move the block to be immediately before the PHI block, not + // immediately after PredTI. + if (L->contains(PHIPred) && !L->contains(PN->getParent())) { + BasicBlock *NewBB = PN->getIncomingBlock(i); + NewBB->moveBefore(PN->getParent()); + } + + // Splitting the edge can reduce the number of PHI entries we have. + e = PN->getNumIncomingValues(); + } + } + Value *&Code = InsertedCode[PN->getIncomingBlock(i)]; + if (!Code) { + // Insert the code into the end of the predecessor block. + Instruction *InsertPt = (L->contains(OldLoc->getParent())) ? + PN->getIncomingBlock(i)->getTerminator() : + OldLoc->getParent()->getTerminator(); + Code = InsertCodeForBaseAtPosition(NewBase, PN->getType(), + Rewriter, InsertPt, L, LI); + + DOUT << " Changing PHI use to "; + DEBUG(WriteAsOperand(*DOUT, Code, /*PrintType=*/false)); + DOUT << ", which has value " << *NewBase << " plus IMM " << *Imm << "\n"; + } + + // Replace the use of the operand Value with the new Phi we just created. + PN->setIncomingValue(i, Code); + Rewriter.clear(); + } + } + + // PHI node might have become a constant value after SplitCriticalEdge. + DeadInsts.push_back(Inst); +} + + +/// fitsInAddressMode - Return true if V can be subsumed within an addressing +/// mode, and does not need to be put in a register first. +static bool fitsInAddressMode(const SCEVHandle &V, const Type *AccessTy, + const TargetLowering *TLI, bool HasBaseReg) { + if (const SCEVConstant *SC = dyn_cast(V)) { + int64_t VC = SC->getValue()->getSExtValue(); + if (TLI) { + TargetLowering::AddrMode AM; + AM.BaseOffs = VC; + AM.HasBaseReg = HasBaseReg; + return TLI->isLegalAddressingMode(AM, AccessTy); + } else { + // Defaults to PPC. PPC allows a sign-extended 16-bit immediate field. + return (VC > -(1 << 16) && VC < (1 << 16)-1); + } + } + + if (const SCEVUnknown *SU = dyn_cast(V)) + if (GlobalValue *GV = dyn_cast(SU->getValue())) { + if (TLI) { + TargetLowering::AddrMode AM; + AM.BaseGV = GV; + AM.HasBaseReg = HasBaseReg; + return TLI->isLegalAddressingMode(AM, AccessTy); + } else { + // Default: assume global addresses are not legal. + } + } + + return false; +} + +/// MoveLoopVariantsToImmediateField - Move any subexpressions from Val that are +/// loop varying to the Imm operand. +static void MoveLoopVariantsToImmediateField(SCEVHandle &Val, SCEVHandle &Imm, + Loop *L, ScalarEvolution *SE) { + if (Val->isLoopInvariant(L)) return; // Nothing to do. + + if (const SCEVAddExpr *SAE = dyn_cast(Val)) { + std::vector NewOps; + NewOps.reserve(SAE->getNumOperands()); + + for (unsigned i = 0; i != SAE->getNumOperands(); ++i) + if (!SAE->getOperand(i)->isLoopInvariant(L)) { + // If this is a loop-variant expression, it must stay in the immediate + // field of the expression. + Imm = SE->getAddExpr(Imm, SAE->getOperand(i)); + } else { + NewOps.push_back(SAE->getOperand(i)); + } + + if (NewOps.empty()) + Val = SE->getIntegerSCEV(0, Val->getType()); + else + Val = SE->getAddExpr(NewOps); + } else if (const SCEVAddRecExpr *SARE = dyn_cast(Val)) { + // Try to pull immediates out of the start value of nested addrec's. + SCEVHandle Start = SARE->getStart(); + MoveLoopVariantsToImmediateField(Start, Imm, L, SE); + + std::vector Ops(SARE->op_begin(), SARE->op_end()); + Ops[0] = Start; + Val = SE->getAddRecExpr(Ops, SARE->getLoop()); + } else { + // Otherwise, all of Val is variant, move the whole thing over. + Imm = SE->getAddExpr(Imm, Val); + Val = SE->getIntegerSCEV(0, Val->getType()); + } +} + + +/// MoveImmediateValues - Look at Val, and pull out any additions of constants +/// that can fit into the immediate field of instructions in the target. +/// Accumulate these immediate values into the Imm value. +static void MoveImmediateValues(const TargetLowering *TLI, + const Type *AccessTy, + SCEVHandle &Val, SCEVHandle &Imm, + bool isAddress, Loop *L, + ScalarEvolution *SE) { + if (const SCEVAddExpr *SAE = dyn_cast(Val)) { + std::vector NewOps; + NewOps.reserve(SAE->getNumOperands()); + + for (unsigned i = 0; i != SAE->getNumOperands(); ++i) { + SCEVHandle NewOp = SAE->getOperand(i); + MoveImmediateValues(TLI, AccessTy, NewOp, Imm, isAddress, L, SE); + + if (!NewOp->isLoopInvariant(L)) { + // If this is a loop-variant expression, it must stay in the immediate + // field of the expression. + Imm = SE->getAddExpr(Imm, NewOp); + } else { + NewOps.push_back(NewOp); + } + } + + if (NewOps.empty()) + Val = SE->getIntegerSCEV(0, Val->getType()); + else + Val = SE->getAddExpr(NewOps); + return; + } else if (const SCEVAddRecExpr *SARE = dyn_cast(Val)) { + // Try to pull immediates out of the start value of nested addrec's. + SCEVHandle Start = SARE->getStart(); + MoveImmediateValues(TLI, AccessTy, Start, Imm, isAddress, L, SE); + + if (Start != SARE->getStart()) { + std::vector Ops(SARE->op_begin(), SARE->op_end()); + Ops[0] = Start; + Val = SE->getAddRecExpr(Ops, SARE->getLoop()); + } + return; + } else if (const SCEVMulExpr *SME = dyn_cast(Val)) { + // Transform "8 * (4 + v)" -> "32 + 8*V" if "32" fits in the immed field. + if (isAddress && + fitsInAddressMode(SME->getOperand(0), AccessTy, TLI, false) && + SME->getNumOperands() == 2 && SME->isLoopInvariant(L)) { + + SCEVHandle SubImm = SE->getIntegerSCEV(0, Val->getType()); + SCEVHandle NewOp = SME->getOperand(1); + MoveImmediateValues(TLI, AccessTy, NewOp, SubImm, isAddress, L, SE); + + // If we extracted something out of the subexpressions, see if we can + // simplify this! + if (NewOp != SME->getOperand(1)) { + // Scale SubImm up by "8". If the result is a target constant, we are + // good. + SubImm = SE->getMulExpr(SubImm, SME->getOperand(0)); + if (fitsInAddressMode(SubImm, AccessTy, TLI, false)) { + // Accumulate the immediate. + Imm = SE->getAddExpr(Imm, SubImm); + + // Update what is left of 'Val'. + Val = SE->getMulExpr(SME->getOperand(0), NewOp); + return; + } + } + } + } + + // Loop-variant expressions must stay in the immediate field of the + // expression. + if ((isAddress && fitsInAddressMode(Val, AccessTy, TLI, false)) || + !Val->isLoopInvariant(L)) { + Imm = SE->getAddExpr(Imm, Val); + Val = SE->getIntegerSCEV(0, Val->getType()); + return; + } + + // Otherwise, no immediates to move. +} + +static void MoveImmediateValues(const TargetLowering *TLI, + Instruction *User, + SCEVHandle &Val, SCEVHandle &Imm, + bool isAddress, Loop *L, + ScalarEvolution *SE) { + const Type *AccessTy = getAccessType(User); + MoveImmediateValues(TLI, AccessTy, Val, Imm, isAddress, L, SE); +} + +/// SeparateSubExprs - Decompose Expr into all of the subexpressions that are +/// added together. This is used to reassociate common addition subexprs +/// together for maximal sharing when rewriting bases. +static void SeparateSubExprs(std::vector &SubExprs, + SCEVHandle Expr, + ScalarEvolution *SE) { + if (const SCEVAddExpr *AE = dyn_cast(Expr)) { + for (unsigned j = 0, e = AE->getNumOperands(); j != e; ++j) + SeparateSubExprs(SubExprs, AE->getOperand(j), SE); + } else if (const SCEVAddRecExpr *SARE = dyn_cast(Expr)) { + SCEVHandle Zero = SE->getIntegerSCEV(0, Expr->getType()); + if (SARE->getOperand(0) == Zero) { + SubExprs.push_back(Expr); + } else { + // Compute the addrec with zero as its base. + std::vector Ops(SARE->op_begin(), SARE->op_end()); + Ops[0] = Zero; // Start with zero base. + SubExprs.push_back(SE->getAddRecExpr(Ops, SARE->getLoop())); + + + SeparateSubExprs(SubExprs, SARE->getOperand(0), SE); + } + } else if (!Expr->isZero()) { + // Do not add zero. + SubExprs.push_back(Expr); + } +} + +// This is logically local to the following function, but C++ says we have +// to make it file scope. +struct SubExprUseData { unsigned Count; bool notAllUsesAreFree; }; + +/// RemoveCommonExpressionsFromUseBases - Look through all of the Bases of all +/// the Uses, removing any common subexpressions, except that if all such +/// subexpressions can be folded into an addressing mode for all uses inside +/// the loop (this case is referred to as "free" in comments herein) we do +/// not remove anything. This looks for things like (a+b+c) and +/// (a+c+d) and computes the common (a+c) subexpression. The common expression +/// is *removed* from the Bases and returned. +static SCEVHandle +RemoveCommonExpressionsFromUseBases(std::vector &Uses, + ScalarEvolution *SE, Loop *L, + const TargetLowering *TLI) { + unsigned NumUses = Uses.size(); + + // Only one use? This is a very common case, so we handle it specially and + // cheaply. + SCEVHandle Zero = SE->getIntegerSCEV(0, Uses[0].Base->getType()); + SCEVHandle Result = Zero; + SCEVHandle FreeResult = Zero; + if (NumUses == 1) { + // If the use is inside the loop, use its base, regardless of what it is: + // it is clearly shared across all the IV's. If the use is outside the loop + // (which means after it) we don't want to factor anything *into* the loop, + // so just use 0 as the base. + if (L->contains(Uses[0].Inst->getParent())) + std::swap(Result, Uses[0].Base); + return Result; + } + + // To find common subexpressions, count how many of Uses use each expression. + // If any subexpressions are used Uses.size() times, they are common. + // Also track whether all uses of each expression can be moved into an + // an addressing mode "for free"; such expressions are left within the loop. + // struct SubExprUseData { unsigned Count; bool notAllUsesAreFree; }; + std::map SubExpressionUseData; + + // UniqueSubExprs - Keep track of all of the subexpressions we see in the + // order we see them. + std::vector UniqueSubExprs; + + std::vector SubExprs; + unsigned NumUsesInsideLoop = 0; + for (unsigned i = 0; i != NumUses; ++i) { + // If the user is outside the loop, just ignore it for base computation. + // Since the user is outside the loop, it must be *after* the loop (if it + // were before, it could not be based on the loop IV). We don't want users + // after the loop to affect base computation of values *inside* the loop, + // because we can always add their offsets to the result IV after the loop + // is done, ensuring we get good code inside the loop. + if (!L->contains(Uses[i].Inst->getParent())) + continue; + NumUsesInsideLoop++; + + // If the base is zero (which is common), return zero now, there are no + // CSEs we can find. + if (Uses[i].Base == Zero) return Zero; + + // If this use is as an address we may be able to put CSEs in the addressing + // mode rather than hoisting them. + bool isAddrUse = isAddressUse(Uses[i].Inst, Uses[i].OperandValToReplace); + // We may need the AccessTy below, but only when isAddrUse, so compute it + // only in that case. + const Type *AccessTy = 0; + if (isAddrUse) + AccessTy = getAccessType(Uses[i].Inst); + + // Split the expression into subexprs. + SeparateSubExprs(SubExprs, Uses[i].Base, SE); + // Add one to SubExpressionUseData.Count for each subexpr present, and + // if the subexpr is not a valid immediate within an addressing mode use, + // set SubExpressionUseData.notAllUsesAreFree. We definitely want to + // hoist these out of the loop (if they are common to all uses). + for (unsigned j = 0, e = SubExprs.size(); j != e; ++j) { + if (++SubExpressionUseData[SubExprs[j]].Count == 1) + UniqueSubExprs.push_back(SubExprs[j]); + if (!isAddrUse || !fitsInAddressMode(SubExprs[j], AccessTy, TLI, false)) + SubExpressionUseData[SubExprs[j]].notAllUsesAreFree = true; + } + SubExprs.clear(); + } + + // Now that we know how many times each is used, build Result. Iterate over + // UniqueSubexprs so that we have a stable ordering. + for (unsigned i = 0, e = UniqueSubExprs.size(); i != e; ++i) { + std::map::iterator I = + SubExpressionUseData.find(UniqueSubExprs[i]); + assert(I != SubExpressionUseData.end() && "Entry not found?"); + if (I->second.Count == NumUsesInsideLoop) { // Found CSE! + if (I->second.notAllUsesAreFree) + Result = SE->getAddExpr(Result, I->first); + else + FreeResult = SE->getAddExpr(FreeResult, I->first); + } else + // Remove non-cse's from SubExpressionUseData. + SubExpressionUseData.erase(I); + } + + if (FreeResult != Zero) { + // We have some subexpressions that can be subsumed into addressing + // modes in every use inside the loop. However, it's possible that + // there are so many of them that the combined FreeResult cannot + // be subsumed, or that the target cannot handle both a FreeResult + // and a Result in the same instruction (for example because it would + // require too many registers). Check this. + for (unsigned i=0; icontains(Uses[i].Inst->getParent())) + continue; + // We know this is an addressing mode use; if there are any uses that + // are not, FreeResult would be Zero. + const Type *AccessTy = getAccessType(Uses[i].Inst); + if (!fitsInAddressMode(FreeResult, AccessTy, TLI, Result!=Zero)) { + // FIXME: could split up FreeResult into pieces here, some hoisted + // and some not. There is no obvious advantage to this. + Result = SE->getAddExpr(Result, FreeResult); + FreeResult = Zero; + break; + } + } + } + + // If we found no CSE's, return now. + if (Result == Zero) return Result; + + // If we still have a FreeResult, remove its subexpressions from + // SubExpressionUseData. This means they will remain in the use Bases. + if (FreeResult != Zero) { + SeparateSubExprs(SubExprs, FreeResult, SE); + for (unsigned j = 0, e = SubExprs.size(); j != e; ++j) { + std::map::iterator I = + SubExpressionUseData.find(SubExprs[j]); + SubExpressionUseData.erase(I); + } + SubExprs.clear(); + } + + // Otherwise, remove all of the CSE's we found from each of the base values. + for (unsigned i = 0; i != NumUses; ++i) { + // Uses outside the loop don't necessarily include the common base, but + // the final IV value coming into those uses does. Instead of trying to + // remove the pieces of the common base, which might not be there, + // subtract off the base to compensate for this. + if (!L->contains(Uses[i].Inst->getParent())) { + Uses[i].Base = SE->getMinusSCEV(Uses[i].Base, Result); + continue; + } + + // Split the expression into subexprs. + SeparateSubExprs(SubExprs, Uses[i].Base, SE); + + // Remove any common subexpressions. + for (unsigned j = 0, e = SubExprs.size(); j != e; ++j) + if (SubExpressionUseData.count(SubExprs[j])) { + SubExprs.erase(SubExprs.begin()+j); + --j; --e; + } + + // Finally, add the non-shared expressions together. + if (SubExprs.empty()) + Uses[i].Base = Zero; + else + Uses[i].Base = SE->getAddExpr(SubExprs); + SubExprs.clear(); + } + + return Result; +} + +/// ValidScale - Check whether the given Scale is valid for all loads and +/// stores in UsersToProcess. +/// +bool LoopStrengthReduce::ValidScale(bool HasBaseReg, int64_t Scale, + const std::vector& UsersToProcess) { + if (!TLI) + return true; + + for (unsigned i = 0, e = UsersToProcess.size(); i!=e; ++i) { + // If this is a load or other access, pass the type of the access in. + const Type *AccessTy = Type::VoidTy; + if (isAddressUse(UsersToProcess[i].Inst, + UsersToProcess[i].OperandValToReplace)) + AccessTy = getAccessType(UsersToProcess[i].Inst); + else if (isa(UsersToProcess[i].Inst)) + continue; + + TargetLowering::AddrMode AM; + if (const SCEVConstant *SC = dyn_cast(UsersToProcess[i].Imm)) + AM.BaseOffs = SC->getValue()->getSExtValue(); + AM.HasBaseReg = HasBaseReg || !UsersToProcess[i].Base->isZero(); + AM.Scale = Scale; + + // If load[imm+r*scale] is illegal, bail out. + if (!TLI->isLegalAddressingMode(AM, AccessTy)) + return false; + } + return true; +} + +/// ValidOffset - Check whether the given Offset is valid for all loads and +/// stores in UsersToProcess. +/// +bool LoopStrengthReduce::ValidOffset(bool HasBaseReg, + int64_t Offset, + int64_t Scale, + const std::vector& UsersToProcess) { + if (!TLI) + return true; + + for (unsigned i=0, e = UsersToProcess.size(); i!=e; ++i) { + // If this is a load or other access, pass the type of the access in. + const Type *AccessTy = Type::VoidTy; + if (isAddressUse(UsersToProcess[i].Inst, + UsersToProcess[i].OperandValToReplace)) + AccessTy = getAccessType(UsersToProcess[i].Inst); + else if (isa(UsersToProcess[i].Inst)) + continue; + + TargetLowering::AddrMode AM; + if (const SCEVConstant *SC = dyn_cast(UsersToProcess[i].Imm)) + AM.BaseOffs = SC->getValue()->getSExtValue(); + AM.BaseOffs = (uint64_t)AM.BaseOffs + (uint64_t)Offset; + AM.HasBaseReg = HasBaseReg || !UsersToProcess[i].Base->isZero(); + AM.Scale = Scale; + + // If load[imm+r*scale] is illegal, bail out. + if (!TLI->isLegalAddressingMode(AM, AccessTy)) + return false; + } + return true; +} + +/// RequiresTypeConversion - Returns true if converting Ty1 to Ty2 is not +/// a nop. +bool LoopStrengthReduce::RequiresTypeConversion(const Type *Ty1, + const Type *Ty2) { + if (Ty1 == Ty2) + return false; + Ty1 = SE->getEffectiveSCEVType(Ty1); + Ty2 = SE->getEffectiveSCEVType(Ty2); + if (Ty1 == Ty2) + return false; + if (Ty1->canLosslesslyBitCastTo(Ty2)) + return false; + if (TLI && TLI->isTruncateFree(Ty1, Ty2)) + return false; + return true; +} + +/// CheckForIVReuse - Returns the multiple if the stride is the multiple +/// of a previous stride and it is a legal value for the target addressing +/// mode scale component and optional base reg. This allows the users of +/// this stride to be rewritten as prev iv * factor. It returns 0 if no +/// reuse is possible. Factors can be negative on same targets, e.g. ARM. +/// +/// If all uses are outside the loop, we don't require that all multiplies +/// be folded into the addressing mode, nor even that the factor be constant; +/// a multiply (executed once) outside the loop is better than another IV +/// within. Well, usually. +SCEVHandle LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg, + bool AllUsesAreAddresses, + bool AllUsesAreOutsideLoop, + const SCEVHandle &Stride, + IVExpr &IV, const Type *Ty, + const std::vector& UsersToProcess) { + if (StrideNoReuse.count(Stride)) + return SE->getIntegerSCEV(0, Stride->getType()); + + if (const SCEVConstant *SC = dyn_cast(Stride)) { + int64_t SInt = SC->getValue()->getSExtValue(); + for (unsigned NewStride = 0, e = IU->StrideOrder.size(); + NewStride != e; ++NewStride) { + std::map::iterator SI = + IVsByStride.find(IU->StrideOrder[NewStride]); + if (SI == IVsByStride.end() || !isa(SI->first) || + StrideNoReuse.count(SI->first)) + continue; + int64_t SSInt = cast(SI->first)->getValue()->getSExtValue(); + if (SI->first != Stride && + (unsigned(abs64(SInt)) < SSInt || (SInt % SSInt) != 0)) + continue; + int64_t Scale = SInt / SSInt; + // Check that this stride is valid for all the types used for loads and + // stores; if it can be used for some and not others, we might as well use + // the original stride everywhere, since we have to create the IV for it + // anyway. If the scale is 1, then we don't need to worry about folding + // multiplications. + if (Scale == 1 || + (AllUsesAreAddresses && + ValidScale(HasBaseReg, Scale, UsersToProcess))) { + // Prefer to reuse an IV with a base of zero. + for (std::vector::iterator II = SI->second.IVs.begin(), + IE = SI->second.IVs.end(); II != IE; ++II) + // Only reuse previous IV if it would not require a type conversion + // and if the base difference can be folded. + if (II->Base->isZero() && + !RequiresTypeConversion(II->Base->getType(), Ty)) { + IV = *II; + return SE->getIntegerSCEV(Scale, Stride->getType()); + } + // Otherwise, settle for an IV with a foldable base. + if (AllUsesAreAddresses) + for (std::vector::iterator II = SI->second.IVs.begin(), + IE = SI->second.IVs.end(); II != IE; ++II) + // Only reuse previous IV if it would not require a type conversion + // and if the base difference can be folded. + if (SE->getEffectiveSCEVType(II->Base->getType()) == + SE->getEffectiveSCEVType(Ty) && + isa(II->Base)) { + int64_t Base = + cast(II->Base)->getValue()->getSExtValue(); + if (Base > INT32_MIN && Base <= INT32_MAX && + ValidOffset(HasBaseReg, -Base * Scale, + Scale, UsersToProcess)) { + IV = *II; + return SE->getIntegerSCEV(Scale, Stride->getType()); + } + } + } + } + } else if (AllUsesAreOutsideLoop) { + // Accept nonconstant strides here; it is really really right to substitute + // an existing IV if we can. + for (unsigned NewStride = 0, e = IU->StrideOrder.size(); + NewStride != e; ++NewStride) { + std::map::iterator SI = + IVsByStride.find(IU->StrideOrder[NewStride]); + if (SI == IVsByStride.end() || !isa(SI->first)) + continue; + int64_t SSInt = cast(SI->first)->getValue()->getSExtValue(); + if (SI->first != Stride && SSInt != 1) + continue; + for (std::vector::iterator II = SI->second.IVs.begin(), + IE = SI->second.IVs.end(); II != IE; ++II) + // Accept nonzero base here. + // Only reuse previous IV if it would not require a type conversion. + if (!RequiresTypeConversion(II->Base->getType(), Ty)) { + IV = *II; + return Stride; + } + } + // Special case, old IV is -1*x and this one is x. Can treat this one as + // -1*old. + for (unsigned NewStride = 0, e = IU->StrideOrder.size(); + NewStride != e; ++NewStride) { + std::map::iterator SI = + IVsByStride.find(IU->StrideOrder[NewStride]); + if (SI == IVsByStride.end()) + continue; + if (const SCEVMulExpr *ME = dyn_cast(SI->first)) + if (const SCEVConstant *SC = dyn_cast(ME->getOperand(0))) + if (Stride == ME->getOperand(1) && + SC->getValue()->getSExtValue() == -1LL) + for (std::vector::iterator II = SI->second.IVs.begin(), + IE = SI->second.IVs.end(); II != IE; ++II) + // Accept nonzero base here. + // Only reuse previous IV if it would not require type conversion. + if (!RequiresTypeConversion(II->Base->getType(), Ty)) { + IV = *II; + return SE->getIntegerSCEV(-1LL, Stride->getType()); + } + } + } + return SE->getIntegerSCEV(0, Stride->getType()); +} + +/// PartitionByIsUseOfPostIncrementedValue - Simple boolean predicate that +/// returns true if Val's isUseOfPostIncrementedValue is true. +static bool PartitionByIsUseOfPostIncrementedValue(const BasedUser &Val) { + return Val.isUseOfPostIncrementedValue; +} + +/// isNonConstantNegative - Return true if the specified scev is negated, but +/// not a constant. +static bool isNonConstantNegative(const SCEVHandle &Expr) { + const SCEVMulExpr *Mul = dyn_cast(Expr); + if (!Mul) return false; + + // If there is a constant factor, it will be first. + const SCEVConstant *SC = dyn_cast(Mul->getOperand(0)); + if (!SC) return false; + + // Return true if the value is negative, this matches things like (-42 * V). + return SC->getValue()->getValue().isNegative(); +} + +// CollectIVUsers - Transform our list of users and offsets to a bit more +// complex table. In this new vector, each 'BasedUser' contains 'Base', the base +// of the strided accesses, as well as the old information from Uses. We +// progressively move information from the Base field to the Imm field, until +// we eventually have the full access expression to rewrite the use. +SCEVHandle LoopStrengthReduce::CollectIVUsers(const SCEVHandle &Stride, + IVUsersOfOneStride &Uses, + Loop *L, + bool &AllUsesAreAddresses, + bool &AllUsesAreOutsideLoop, + std::vector &UsersToProcess) { + // FIXME: Generalize to non-affine IV's. + if (!Stride->isLoopInvariant(L)) + return SE->getIntegerSCEV(0, Stride->getType()); + + UsersToProcess.reserve(Uses.Users.size()); + for (ilist::iterator I = Uses.Users.begin(), + E = Uses.Users.end(); I != E; ++I) { + UsersToProcess.push_back(BasedUser(*I, SE)); + + // Move any loop variant operands from the offset field to the immediate + // field of the use, so that we don't try to use something before it is + // computed. + MoveLoopVariantsToImmediateField(UsersToProcess.back().Base, + UsersToProcess.back().Imm, L, SE); + assert(UsersToProcess.back().Base->isLoopInvariant(L) && + "Base value is not loop invariant!"); + } + + // We now have a whole bunch of uses of like-strided induction variables, but + // they might all have different bases. We want to emit one PHI node for this + // stride which we fold as many common expressions (between the IVs) into as + // possible. Start by identifying the common expressions in the base values + // for the strides (e.g. if we have "A+C+B" and "A+B+D" as our bases, find + // "A+B"), emit it to the preheader, then remove the expression from the + // UsersToProcess base values. + SCEVHandle CommonExprs = + RemoveCommonExpressionsFromUseBases(UsersToProcess, SE, L, TLI); + + // Next, figure out what we can represent in the immediate fields of + // instructions. If we can represent anything there, move it to the imm + // fields of the BasedUsers. We do this so that it increases the commonality + // of the remaining uses. + unsigned NumPHI = 0; + bool HasAddress = false; + for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) { + // If the user is not in the current loop, this means it is using the exit + // value of the IV. Do not put anything in the base, make sure it's all in + // the immediate field to allow as much factoring as possible. + if (!L->contains(UsersToProcess[i].Inst->getParent())) { + UsersToProcess[i].Imm = SE->getAddExpr(UsersToProcess[i].Imm, + UsersToProcess[i].Base); + UsersToProcess[i].Base = + SE->getIntegerSCEV(0, UsersToProcess[i].Base->getType()); + } else { + // Not all uses are outside the loop. + AllUsesAreOutsideLoop = false; + + // Addressing modes can be folded into loads and stores. Be careful that + // the store is through the expression, not of the expression though. + bool isPHI = false; + bool isAddress = isAddressUse(UsersToProcess[i].Inst, + UsersToProcess[i].OperandValToReplace); + if (isa(UsersToProcess[i].Inst)) { + isPHI = true; + ++NumPHI; + } + + if (isAddress) + HasAddress = true; + + // If this use isn't an address, then not all uses are addresses. + if (!isAddress && !isPHI) + AllUsesAreAddresses = false; + + MoveImmediateValues(TLI, UsersToProcess[i].Inst, UsersToProcess[i].Base, + UsersToProcess[i].Imm, isAddress, L, SE); + } + } + + // If one of the use is a PHI node and all other uses are addresses, still + // allow iv reuse. Essentially we are trading one constant multiplication + // for one fewer iv. + if (NumPHI > 1) + AllUsesAreAddresses = false; + + // There are no in-loop address uses. + if (AllUsesAreAddresses && (!HasAddress && !AllUsesAreOutsideLoop)) + AllUsesAreAddresses = false; + + return CommonExprs; +} + +/// ShouldUseFullStrengthReductionMode - Test whether full strength-reduction +/// is valid and profitable for the given set of users of a stride. In +/// full strength-reduction mode, all addresses at the current stride are +/// strength-reduced all the way down to pointer arithmetic. +/// +bool LoopStrengthReduce::ShouldUseFullStrengthReductionMode( + const std::vector &UsersToProcess, + const Loop *L, + bool AllUsesAreAddresses, + SCEVHandle Stride) { + if (!EnableFullLSRMode) + return false; + + // The heuristics below aim to avoid increasing register pressure, but + // fully strength-reducing all the addresses increases the number of + // add instructions, so don't do this when optimizing for size. + // TODO: If the loop is large, the savings due to simpler addresses + // may oughtweight the costs of the extra increment instructions. + if (L->getHeader()->getParent()->hasFnAttr(Attribute::OptimizeForSize)) + return false; + + // TODO: For now, don't do full strength reduction if there could + // potentially be greater-stride multiples of the current stride + // which could reuse the current stride IV. + if (IU->StrideOrder.back() != Stride) + return false; + + // Iterate through the uses to find conditions that automatically rule out + // full-lsr mode. + for (unsigned i = 0, e = UsersToProcess.size(); i != e; ) { + const SCEV *Base = UsersToProcess[i].Base; + const SCEV *Imm = UsersToProcess[i].Imm; + // If any users have a loop-variant component, they can't be fully + // strength-reduced. + if (Imm && !Imm->isLoopInvariant(L)) + return false; + // If there are to users with the same base and the difference between + // the two Imm values can't be folded into the address, full + // strength reduction would increase register pressure. + do { + const SCEV *CurImm = UsersToProcess[i].Imm; + if ((CurImm || Imm) && CurImm != Imm) { + if (!CurImm) CurImm = SE->getIntegerSCEV(0, Stride->getType()); + if (!Imm) Imm = SE->getIntegerSCEV(0, Stride->getType()); + const Instruction *Inst = UsersToProcess[i].Inst; + const Type *AccessTy = getAccessType(Inst); + SCEVHandle Diff = SE->getMinusSCEV(UsersToProcess[i].Imm, Imm); + if (!Diff->isZero() && + (!AllUsesAreAddresses || + !fitsInAddressMode(Diff, AccessTy, TLI, /*HasBaseReg=*/true))) + return false; + } + } while (++i != e && Base == UsersToProcess[i].Base); + } + + // If there's exactly one user in this stride, fully strength-reducing it + // won't increase register pressure. If it's starting from a non-zero base, + // it'll be simpler this way. + if (UsersToProcess.size() == 1 && !UsersToProcess[0].Base->isZero()) + return true; + + // Otherwise, if there are any users in this stride that don't require + // a register for their base, full strength-reduction will increase + // register pressure. + for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) + if (UsersToProcess[i].Base->isZero()) + return false; + + // Otherwise, go for it. + return true; +} + +/// InsertAffinePhi Create and insert a PHI node for an induction variable +/// with the specified start and step values in the specified loop. +/// +/// If NegateStride is true, the stride should be negated by using a +/// subtract instead of an add. +/// +/// Return the created phi node. +/// +static PHINode *InsertAffinePhi(SCEVHandle Start, SCEVHandle Step, + Instruction *IVIncInsertPt, + const Loop *L, + SCEVExpander &Rewriter) { + assert(Start->isLoopInvariant(L) && "New PHI start is not loop invariant!"); + assert(Step->isLoopInvariant(L) && "New PHI stride is not loop invariant!"); + + BasicBlock *Header = L->getHeader(); + BasicBlock *Preheader = L->getLoopPreheader(); + BasicBlock *LatchBlock = L->getLoopLatch(); + const Type *Ty = Start->getType(); + Ty = Rewriter.SE.getEffectiveSCEVType(Ty); + + PHINode *PN = PHINode::Create(Ty, "lsr.iv", Header->begin()); + PN->addIncoming(Rewriter.expandCodeFor(Start, Ty, Preheader->getTerminator()), + Preheader); + + // If the stride is negative, insert a sub instead of an add for the + // increment. + bool isNegative = isNonConstantNegative(Step); + SCEVHandle IncAmount = Step; + if (isNegative) + IncAmount = Rewriter.SE.getNegativeSCEV(Step); + + // Insert an add instruction right before the terminator corresponding + // to the back-edge or just before the only use. The location is determined + // by the caller and passed in as IVIncInsertPt. + Value *StepV = Rewriter.expandCodeFor(IncAmount, Ty, + Preheader->getTerminator()); + Instruction *IncV; + if (isNegative) { + IncV = BinaryOperator::CreateSub(PN, StepV, "lsr.iv.next", + IVIncInsertPt); + } else { + IncV = BinaryOperator::CreateAdd(PN, StepV, "lsr.iv.next", + IVIncInsertPt); + } + if (!isa(StepV)) ++NumVariable; + + PN->addIncoming(IncV, LatchBlock); + + ++NumInserted; + return PN; +} + +static void SortUsersToProcess(std::vector &UsersToProcess) { + // We want to emit code for users inside the loop first. To do this, we + // rearrange BasedUser so that the entries at the end have + // isUseOfPostIncrementedValue = false, because we pop off the end of the + // vector (so we handle them first). + std::partition(UsersToProcess.begin(), UsersToProcess.end(), + PartitionByIsUseOfPostIncrementedValue); + + // Sort this by base, so that things with the same base are handled + // together. By partitioning first and stable-sorting later, we are + // guaranteed that within each base we will pop off users from within the + // loop before users outside of the loop with a particular base. + // + // We would like to use stable_sort here, but we can't. The problem is that + // SCEVHandle's don't have a deterministic ordering w.r.t to each other, so + // we don't have anything to do a '<' comparison on. Because we think the + // number of uses is small, do a horrible bubble sort which just relies on + // ==. + for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) { + // Get a base value. + SCEVHandle Base = UsersToProcess[i].Base; + + // Compact everything with this base to be consecutive with this one. + for (unsigned j = i+1; j != e; ++j) { + if (UsersToProcess[j].Base == Base) { + std::swap(UsersToProcess[i+1], UsersToProcess[j]); + ++i; + } + } + } +} + +/// PrepareToStrengthReduceFully - Prepare to fully strength-reduce +/// UsersToProcess, meaning lowering addresses all the way down to direct +/// pointer arithmetic. +/// +void +LoopStrengthReduce::PrepareToStrengthReduceFully( + std::vector &UsersToProcess, + SCEVHandle Stride, + SCEVHandle CommonExprs, + const Loop *L, + SCEVExpander &PreheaderRewriter) { + DOUT << " Fully reducing all users\n"; + + // Rewrite the UsersToProcess records, creating a separate PHI for each + // unique Base value. + Instruction *IVIncInsertPt = L->getLoopLatch()->getTerminator(); + for (unsigned i = 0, e = UsersToProcess.size(); i != e; ) { + // TODO: The uses are grouped by base, but not sorted. We arbitrarily + // pick the first Imm value here to start with, and adjust it for the + // other uses. + SCEVHandle Imm = UsersToProcess[i].Imm; + SCEVHandle Base = UsersToProcess[i].Base; + SCEVHandle Start = SE->getAddExpr(CommonExprs, Base, Imm); + PHINode *Phi = InsertAffinePhi(Start, Stride, IVIncInsertPt, L, + PreheaderRewriter); + // Loop over all the users with the same base. + do { + UsersToProcess[i].Base = SE->getIntegerSCEV(0, Stride->getType()); + UsersToProcess[i].Imm = SE->getMinusSCEV(UsersToProcess[i].Imm, Imm); + UsersToProcess[i].Phi = Phi; + assert(UsersToProcess[i].Imm->isLoopInvariant(L) && + "ShouldUseFullStrengthReductionMode should reject this!"); + } while (++i != e && Base == UsersToProcess[i].Base); + } +} + +/// FindIVIncInsertPt - Return the location to insert the increment instruction. +/// If the only use if a use of postinc value, (must be the loop termination +/// condition), then insert it just before the use. +static Instruction *FindIVIncInsertPt(std::vector &UsersToProcess, + const Loop *L) { + if (UsersToProcess.size() == 1 && + UsersToProcess[0].isUseOfPostIncrementedValue && + L->contains(UsersToProcess[0].Inst->getParent())) + return UsersToProcess[0].Inst; + return L->getLoopLatch()->getTerminator(); +} + +/// PrepareToStrengthReduceWithNewPhi - Insert a new induction variable for the +/// given users to share. +/// +void +LoopStrengthReduce::PrepareToStrengthReduceWithNewPhi( + std::vector &UsersToProcess, + SCEVHandle Stride, + SCEVHandle CommonExprs, + Value *CommonBaseV, + Instruction *IVIncInsertPt, + const Loop *L, + SCEVExpander &PreheaderRewriter) { + DOUT << " Inserting new PHI:\n"; + + PHINode *Phi = InsertAffinePhi(SE->getUnknown(CommonBaseV), + Stride, IVIncInsertPt, L, + PreheaderRewriter); + + // Remember this in case a later stride is multiple of this. + IVsByStride[Stride].addIV(Stride, CommonExprs, Phi); + + // All the users will share this new IV. + for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) + UsersToProcess[i].Phi = Phi; + + DOUT << " IV="; + DEBUG(WriteAsOperand(*DOUT, Phi, /*PrintType=*/false)); + DOUT << "\n"; +} + +/// PrepareToStrengthReduceFromSmallerStride - Prepare for the given users to +/// reuse an induction variable with a stride that is a factor of the current +/// induction variable. +/// +void +LoopStrengthReduce::PrepareToStrengthReduceFromSmallerStride( + std::vector &UsersToProcess, + Value *CommonBaseV, + const IVExpr &ReuseIV, + Instruction *PreInsertPt) { + DOUT << " Rewriting in terms of existing IV of STRIDE " << *ReuseIV.Stride + << " and BASE " << *ReuseIV.Base << "\n"; + + // All the users will share the reused IV. + for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) + UsersToProcess[i].Phi = ReuseIV.PHI; + + Constant *C = dyn_cast(CommonBaseV); + if (C && + (!C->isNullValue() && + !fitsInAddressMode(SE->getUnknown(CommonBaseV), CommonBaseV->getType(), + TLI, false))) + // We want the common base emitted into the preheader! This is just + // using cast as a copy so BitCast (no-op cast) is appropriate + CommonBaseV = new BitCastInst(CommonBaseV, CommonBaseV->getType(), + "commonbase", PreInsertPt); +} + +static bool IsImmFoldedIntoAddrMode(GlobalValue *GV, int64_t Offset, + const Type *AccessTy, + std::vector &UsersToProcess, + const TargetLowering *TLI) { + SmallVector AddrModeInsts; + for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) { + if (UsersToProcess[i].isUseOfPostIncrementedValue) + continue; + ExtAddrMode AddrMode = + AddressingModeMatcher::Match(UsersToProcess[i].OperandValToReplace, + AccessTy, UsersToProcess[i].Inst, + AddrModeInsts, *TLI); + if (GV && GV != AddrMode.BaseGV) + return false; + if (Offset && !AddrMode.BaseOffs) + // FIXME: How to accurate check it's immediate offset is folded. + return false; + AddrModeInsts.clear(); + } + return true; +} + +/// StrengthReduceStridedIVUsers - Strength reduce all of the users of a single +/// stride of IV. All of the users may have different starting values, and this +/// may not be the only stride. +void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEVHandle &Stride, + IVUsersOfOneStride &Uses, + Loop *L) { + // If all the users are moved to another stride, then there is nothing to do. + if (Uses.Users.empty()) + return; + + // Keep track if every use in UsersToProcess is an address. If they all are, + // we may be able to rewrite the entire collection of them in terms of a + // smaller-stride IV. + bool AllUsesAreAddresses = true; + + // Keep track if every use of a single stride is outside the loop. If so, + // we want to be more aggressive about reusing a smaller-stride IV; a + // multiply outside the loop is better than another IV inside. Well, usually. + bool AllUsesAreOutsideLoop = true; + + // Transform our list of users and offsets to a bit more complex table. In + // this new vector, each 'BasedUser' contains 'Base' the base of the + // strided accessas well as the old information from Uses. We progressively + // move information from the Base field to the Imm field, until we eventually + // have the full access expression to rewrite the use. + std::vector UsersToProcess; + SCEVHandle CommonExprs = CollectIVUsers(Stride, Uses, L, AllUsesAreAddresses, + AllUsesAreOutsideLoop, + UsersToProcess); + + // Sort the UsersToProcess array so that users with common bases are + // next to each other. + SortUsersToProcess(UsersToProcess); + + // If we managed to find some expressions in common, we'll need to carry + // their value in a register and add it in for each use. This will take up + // a register operand, which potentially restricts what stride values are + // valid. + bool HaveCommonExprs = !CommonExprs->isZero(); + const Type *ReplacedTy = CommonExprs->getType(); + + // If all uses are addresses, consider sinking the immediate part of the + // common expression back into uses if they can fit in the immediate fields. + if (TLI && HaveCommonExprs && AllUsesAreAddresses) { + SCEVHandle NewCommon = CommonExprs; + SCEVHandle Imm = SE->getIntegerSCEV(0, ReplacedTy); + MoveImmediateValues(TLI, Type::VoidTy, NewCommon, Imm, true, L, SE); + if (!Imm->isZero()) { + bool DoSink = true; + + // If the immediate part of the common expression is a GV, check if it's + // possible to fold it into the target addressing mode. + GlobalValue *GV = 0; + if (const SCEVUnknown *SU = dyn_cast(Imm)) + GV = dyn_cast(SU->getValue()); + int64_t Offset = 0; + if (const SCEVConstant *SC = dyn_cast(Imm)) + Offset = SC->getValue()->getSExtValue(); + if (GV || Offset) + // Pass VoidTy as the AccessTy to be conservative, because + // there could be multiple access types among all the uses. + DoSink = IsImmFoldedIntoAddrMode(GV, Offset, Type::VoidTy, + UsersToProcess, TLI); + + if (DoSink) { + DOUT << " Sinking " << *Imm << " back down into uses\n"; + for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) + UsersToProcess[i].Imm = SE->getAddExpr(UsersToProcess[i].Imm, Imm); + CommonExprs = NewCommon; + HaveCommonExprs = !CommonExprs->isZero(); + ++NumImmSunk; + } + } + } + + // Now that we know what we need to do, insert the PHI node itself. + // + DOUT << "LSR: Examining IVs of TYPE " << *ReplacedTy << " of STRIDE " + << *Stride << ":\n" + << " Common base: " << *CommonExprs << "\n"; + + SCEVExpander Rewriter(*SE); + SCEVExpander PreheaderRewriter(*SE); + + BasicBlock *Preheader = L->getLoopPreheader(); + Instruction *PreInsertPt = Preheader->getTerminator(); + BasicBlock *LatchBlock = L->getLoopLatch(); + Instruction *IVIncInsertPt = LatchBlock->getTerminator(); + + Value *CommonBaseV = Constant::getNullValue(ReplacedTy); + + SCEVHandle RewriteFactor = SE->getIntegerSCEV(0, ReplacedTy); + IVExpr ReuseIV(SE->getIntegerSCEV(0, Type::Int32Ty), + SE->getIntegerSCEV(0, Type::Int32Ty), + 0); + + /// Choose a strength-reduction strategy and prepare for it by creating + /// the necessary PHIs and adjusting the bookkeeping. + if (ShouldUseFullStrengthReductionMode(UsersToProcess, L, + AllUsesAreAddresses, Stride)) { + PrepareToStrengthReduceFully(UsersToProcess, Stride, CommonExprs, L, + PreheaderRewriter); + } else { + // Emit the initial base value into the loop preheader. + CommonBaseV = PreheaderRewriter.expandCodeFor(CommonExprs, ReplacedTy, + PreInsertPt); + + // If all uses are addresses, check if it is possible to reuse an IV. The + // new IV must have a stride that is a multiple of the old stride; the + // multiple must be a number that can be encoded in the scale field of the + // target addressing mode; and we must have a valid instruction after this + // substitution, including the immediate field, if any. + RewriteFactor = CheckForIVReuse(HaveCommonExprs, AllUsesAreAddresses, + AllUsesAreOutsideLoop, + Stride, ReuseIV, ReplacedTy, + UsersToProcess); + if (!RewriteFactor->isZero()) + PrepareToStrengthReduceFromSmallerStride(UsersToProcess, CommonBaseV, + ReuseIV, PreInsertPt); + else { + IVIncInsertPt = FindIVIncInsertPt(UsersToProcess, L); + PrepareToStrengthReduceWithNewPhi(UsersToProcess, Stride, CommonExprs, + CommonBaseV, IVIncInsertPt, + L, PreheaderRewriter); + } + } + + // Process all the users now, replacing their strided uses with + // strength-reduced forms. This outer loop handles all bases, the inner + // loop handles all users of a particular base. + while (!UsersToProcess.empty()) { + SCEVHandle Base = UsersToProcess.back().Base; + Instruction *Inst = UsersToProcess.back().Inst; + + // Emit the code for Base into the preheader. + Value *BaseV = 0; + if (!Base->isZero()) { + BaseV = PreheaderRewriter.expandCodeFor(Base, 0, PreInsertPt); + + DOUT << " INSERTING code for BASE = " << *Base << ":"; + if (BaseV->hasName()) + DOUT << " Result value name = %" << BaseV->getNameStr(); + DOUT << "\n"; + + // If BaseV is a non-zero constant, make sure that it gets inserted into + // the preheader, instead of being forward substituted into the uses. We + // do this by forcing a BitCast (noop cast) to be inserted into the + // preheader in this case. + if (!fitsInAddressMode(Base, getAccessType(Inst), TLI, false)) { + // We want this constant emitted into the preheader! This is just + // using cast as a copy so BitCast (no-op cast) is appropriate + BaseV = new BitCastInst(BaseV, BaseV->getType(), "preheaderinsert", + PreInsertPt); + } + } + + // Emit the code to add the immediate offset to the Phi value, just before + // the instructions that we identified as using this stride and base. + do { + // FIXME: Use emitted users to emit other users. + BasedUser &User = UsersToProcess.back(); + + DOUT << " Examining "; + if (User.isUseOfPostIncrementedValue) + DOUT << "postinc"; + else + DOUT << "preinc"; + DOUT << " use "; + DEBUG(WriteAsOperand(*DOUT, UsersToProcess.back().OperandValToReplace, + /*PrintType=*/false)); + DOUT << " in Inst: " << *(User.Inst); + + // If this instruction wants to use the post-incremented value, move it + // after the post-inc and use its value instead of the PHI. + Value *RewriteOp = User.Phi; + if (User.isUseOfPostIncrementedValue) { + RewriteOp = User.Phi->getIncomingValueForBlock(LatchBlock); + // If this user is in the loop, make sure it is the last thing in the + // loop to ensure it is dominated by the increment. In case it's the + // only use of the iv, the increment instruction is already before the + // use. + if (L->contains(User.Inst->getParent()) && User.Inst != IVIncInsertPt) + User.Inst->moveBefore(IVIncInsertPt); + } + + SCEVHandle RewriteExpr = SE->getUnknown(RewriteOp); + + if (SE->getEffectiveSCEVType(RewriteOp->getType()) != + SE->getEffectiveSCEVType(ReplacedTy)) { + assert(SE->getTypeSizeInBits(RewriteOp->getType()) > + SE->getTypeSizeInBits(ReplacedTy) && + "Unexpected widening cast!"); + RewriteExpr = SE->getTruncateExpr(RewriteExpr, ReplacedTy); + } + + // If we had to insert new instructions for RewriteOp, we have to + // consider that they may not have been able to end up immediately + // next to RewriteOp, because non-PHI instructions may never precede + // PHI instructions in a block. In this case, remember where the last + // instruction was inserted so that if we're replacing a different + // PHI node, we can use the later point to expand the final + // RewriteExpr. + Instruction *NewBasePt = dyn_cast(RewriteOp); + if (RewriteOp == User.Phi) NewBasePt = 0; + + // Clear the SCEVExpander's expression map so that we are guaranteed + // to have the code emitted where we expect it. + Rewriter.clear(); + + // If we are reusing the iv, then it must be multiplied by a constant + // factor to take advantage of the addressing mode scale component. + if (!RewriteFactor->isZero()) { + // If we're reusing an IV with a nonzero base (currently this happens + // only when all reuses are outside the loop) subtract that base here. + // The base has been used to initialize the PHI node but we don't want + // it here. + if (!ReuseIV.Base->isZero()) { + SCEVHandle typedBase = ReuseIV.Base; + if (SE->getEffectiveSCEVType(RewriteExpr->getType()) != + SE->getEffectiveSCEVType(ReuseIV.Base->getType())) { + // It's possible the original IV is a larger type than the new IV, + // in which case we have to truncate the Base. We checked in + // RequiresTypeConversion that this is valid. + assert(SE->getTypeSizeInBits(RewriteExpr->getType()) < + SE->getTypeSizeInBits(ReuseIV.Base->getType()) && + "Unexpected lengthening conversion!"); + typedBase = SE->getTruncateExpr(ReuseIV.Base, + RewriteExpr->getType()); + } + RewriteExpr = SE->getMinusSCEV(RewriteExpr, typedBase); + } + + // Multiply old variable, with base removed, by new scale factor. + RewriteExpr = SE->getMulExpr(RewriteFactor, + RewriteExpr); + + // The common base is emitted in the loop preheader. But since we + // are reusing an IV, it has not been used to initialize the PHI node. + // Add it to the expression used to rewrite the uses. + // When this use is outside the loop, we earlier subtracted the + // common base, and are adding it back here. Use the same expression + // as before, rather than CommonBaseV, so DAGCombiner will zap it. + if (!CommonExprs->isZero()) { + if (L->contains(User.Inst->getParent())) + RewriteExpr = SE->getAddExpr(RewriteExpr, + SE->getUnknown(CommonBaseV)); + else + RewriteExpr = SE->getAddExpr(RewriteExpr, CommonExprs); + } + } + + // Now that we know what we need to do, insert code before User for the + // immediate and any loop-variant expressions. + if (BaseV) + // Add BaseV to the PHI value if needed. + RewriteExpr = SE->getAddExpr(RewriteExpr, SE->getUnknown(BaseV)); + + User.RewriteInstructionToUseNewBase(RewriteExpr, NewBasePt, + Rewriter, L, this, *LI, + DeadInsts); + + // Mark old value we replaced as possibly dead, so that it is eliminated + // if we just replaced the last use of that value. + DeadInsts.push_back(User.OperandValToReplace); + + UsersToProcess.pop_back(); + ++NumReduced; + + // If there are any more users to process with the same base, process them + // now. We sorted by base above, so we just have to check the last elt. + } while (!UsersToProcess.empty() && UsersToProcess.back().Base == Base); + // TODO: Next, find out which base index is the most common, pull it out. + } + + // IMPORTANT TODO: Figure out how to partition the IV's with this stride, but + // different starting values, into different PHIs. +} + +/// FindIVUserForCond - If Cond has an operand that is an expression of an IV, +/// set the IV user and stride information and return true, otherwise return +/// false. +bool LoopStrengthReduce::FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse, + const SCEVHandle *&CondStride) { + for (unsigned Stride = 0, e = IU->StrideOrder.size(); + Stride != e && !CondUse; ++Stride) { + std::map::iterator SI = + IU->IVUsesByStride.find(IU->StrideOrder[Stride]); + assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!"); + + for (ilist::iterator UI = SI->second->Users.begin(), + E = SI->second->Users.end(); UI != E; ++UI) + if (UI->getUser() == Cond) { + // NOTE: we could handle setcc instructions with multiple uses here, but + // InstCombine does it as well for simple uses, it's not clear that it + // occurs enough in real life to handle. + CondUse = UI; + CondStride = &SI->first; + return true; + } + } + return false; +} + +namespace { + // Constant strides come first which in turns are sorted by their absolute + // values. If absolute values are the same, then positive strides comes first. + // e.g. + // 4, -1, X, 1, 2 ==> 1, -1, 2, 4, X + struct StrideCompare { + const ScalarEvolution *SE; + explicit StrideCompare(const ScalarEvolution *se) : SE(se) {} + + bool operator()(const SCEVHandle &LHS, const SCEVHandle &RHS) { + const SCEVConstant *LHSC = dyn_cast(LHS); + const SCEVConstant *RHSC = dyn_cast(RHS); + if (LHSC && RHSC) { + int64_t LV = LHSC->getValue()->getSExtValue(); + int64_t RV = RHSC->getValue()->getSExtValue(); + uint64_t ALV = (LV < 0) ? -LV : LV; + uint64_t ARV = (RV < 0) ? -RV : RV; + if (ALV == ARV) { + if (LV != RV) + return LV > RV; + } else { + return ALV < ARV; + } + + // If it's the same value but different type, sort by bit width so + // that we emit larger induction variables before smaller + // ones, letting the smaller be re-written in terms of larger ones. + return SE->getTypeSizeInBits(RHS->getType()) < + SE->getTypeSizeInBits(LHS->getType()); + } + return LHSC && !RHSC; + } + }; +} + +/// ChangeCompareStride - If a loop termination compare instruction is the +/// only use of its stride, and the compaison is against a constant value, +/// try eliminate the stride by moving the compare instruction to another +/// stride and change its constant operand accordingly. e.g. +/// +/// loop: +/// ... +/// v1 = v1 + 3 +/// v2 = v2 + 1 +/// if (v2 < 10) goto loop +/// => +/// loop: +/// ... +/// v1 = v1 + 3 +/// if (v1 < 30) goto loop +ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond, + IVStrideUse* &CondUse, + const SCEVHandle* &CondStride) { + // If there's only one stride in the loop, there's nothing to do here. + if (IU->StrideOrder.size() < 2) + return Cond; + // If there are other users of the condition's stride, don't bother + // trying to change the condition because the stride will still + // remain. + std::map::iterator I = + IU->IVUsesByStride.find(*CondStride); + if (I == IU->IVUsesByStride.end() || + I->second->Users.size() != 1) + return Cond; + // Only handle constant strides for now. + const SCEVConstant *SC = dyn_cast(*CondStride); + if (!SC) return Cond; + + ICmpInst::Predicate Predicate = Cond->getPredicate(); + int64_t CmpSSInt = SC->getValue()->getSExtValue(); + unsigned BitWidth = SE->getTypeSizeInBits((*CondStride)->getType()); + uint64_t SignBit = 1ULL << (BitWidth-1); + const Type *CmpTy = Cond->getOperand(0)->getType(); + const Type *NewCmpTy = NULL; + unsigned TyBits = SE->getTypeSizeInBits(CmpTy); + unsigned NewTyBits = 0; + SCEVHandle *NewStride = NULL; + Value *NewCmpLHS = NULL; + Value *NewCmpRHS = NULL; + int64_t Scale = 1; + SCEVHandle NewOffset = SE->getIntegerSCEV(0, CmpTy); + + if (ConstantInt *C = dyn_cast(Cond->getOperand(1))) { + int64_t CmpVal = C->getValue().getSExtValue(); + + // Check stride constant and the comparision constant signs to detect + // overflow. + if ((CmpVal & SignBit) != (CmpSSInt & SignBit)) + return Cond; + + // Look for a suitable stride / iv as replacement. + for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) { + std::map::iterator SI = + IU->IVUsesByStride.find(IU->StrideOrder[i]); + if (!isa(SI->first)) + continue; + int64_t SSInt = cast(SI->first)->getValue()->getSExtValue(); + if (SSInt == CmpSSInt || + abs64(SSInt) < abs64(CmpSSInt) || + (SSInt % CmpSSInt) != 0) + continue; + + Scale = SSInt / CmpSSInt; + int64_t NewCmpVal = CmpVal * Scale; + APInt Mul = APInt(BitWidth*2, CmpVal, true); + Mul = Mul * APInt(BitWidth*2, Scale, true); + // Check for overflow. + if (!Mul.isSignedIntN(BitWidth)) + continue; + // Check for overflow in the stride's type too. + if (!Mul.isSignedIntN(SE->getTypeSizeInBits(SI->first->getType()))) + continue; + + // Watch out for overflow. + if (ICmpInst::isSignedPredicate(Predicate) && + (CmpVal & SignBit) != (NewCmpVal & SignBit)) + continue; + + if (NewCmpVal == CmpVal) + continue; + // Pick the best iv to use trying to avoid a cast. + NewCmpLHS = NULL; + for (ilist::iterator UI = SI->second->Users.begin(), + E = SI->second->Users.end(); UI != E; ++UI) { + Value *Op = UI->getOperandValToReplace(); + + // If the IVStrideUse implies a cast, check for an actual cast which + // can be used to find the original IV expression. + if (SE->getEffectiveSCEVType(Op->getType()) != + SE->getEffectiveSCEVType(SI->first->getType())) { + CastInst *CI = dyn_cast(Op); + // If it's not a simple cast, it's complicated. + if (!CI) + continue; + // If it's a cast from a type other than the stride type, + // it's complicated. + if (CI->getOperand(0)->getType() != SI->first->getType()) + continue; + // Ok, we found the IV expression in the stride's type. + Op = CI->getOperand(0); + } + + NewCmpLHS = Op; + if (NewCmpLHS->getType() == CmpTy) + break; + } + if (!NewCmpLHS) + continue; + + NewCmpTy = NewCmpLHS->getType(); + NewTyBits = SE->getTypeSizeInBits(NewCmpTy); + const Type *NewCmpIntTy = IntegerType::get(NewTyBits); + if (RequiresTypeConversion(NewCmpTy, CmpTy)) { + // Check if it is possible to rewrite it using + // an iv / stride of a smaller integer type. + unsigned Bits = NewTyBits; + if (ICmpInst::isSignedPredicate(Predicate)) + --Bits; + uint64_t Mask = (1ULL << Bits) - 1; + if (((uint64_t)NewCmpVal & Mask) != (uint64_t)NewCmpVal) + continue; + } + + // Don't rewrite if use offset is non-constant and the new type is + // of a different type. + // FIXME: too conservative? + if (NewTyBits != TyBits && !isa(CondUse->getOffset())) + continue; + + bool AllUsesAreAddresses = true; + bool AllUsesAreOutsideLoop = true; + std::vector UsersToProcess; + SCEVHandle CommonExprs = CollectIVUsers(SI->first, *SI->second, L, + AllUsesAreAddresses, + AllUsesAreOutsideLoop, + UsersToProcess); + // Avoid rewriting the compare instruction with an iv of new stride + // if it's likely the new stride uses will be rewritten using the + // stride of the compare instruction. + if (AllUsesAreAddresses && + ValidScale(!CommonExprs->isZero(), Scale, UsersToProcess)) + continue; + + // Avoid rewriting the compare instruction with an iv which has + // implicit extension or truncation built into it. + // TODO: This is over-conservative. + if (SE->getTypeSizeInBits(CondUse->getOffset()->getType()) != TyBits) + continue; + + // If scale is negative, use swapped predicate unless it's testing + // for equality. + if (Scale < 0 && !Cond->isEquality()) + Predicate = ICmpInst::getSwappedPredicate(Predicate); + + NewStride = &IU->StrideOrder[i]; + if (!isa(NewCmpTy)) + NewCmpRHS = ConstantInt::get(NewCmpTy, NewCmpVal); + else { + ConstantInt *CI = ConstantInt::get(NewCmpIntTy, NewCmpVal); + NewCmpRHS = ConstantExpr::getIntToPtr(CI, NewCmpTy); + } + NewOffset = TyBits == NewTyBits + ? SE->getMulExpr(CondUse->getOffset(), + SE->getConstant(ConstantInt::get(CmpTy, Scale))) + : SE->getConstant(ConstantInt::get(NewCmpIntTy, + cast(CondUse->getOffset())->getValue() + ->getSExtValue()*Scale)); + break; + } + } + + // Forgo this transformation if it the increment happens to be + // unfortunately positioned after the condition, and the condition + // has multiple uses which prevent it from being moved immediately + // before the branch. See + // test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-*.ll + // for an example of this situation. + if (!Cond->hasOneUse()) { + for (BasicBlock::iterator I = Cond, E = Cond->getParent()->end(); + I != E; ++I) + if (I == NewCmpLHS) + return Cond; + } + + if (NewCmpRHS) { + // Create a new compare instruction using new stride / iv. + ICmpInst *OldCond = Cond; + // Insert new compare instruction. + Cond = new ICmpInst(Predicate, NewCmpLHS, NewCmpRHS, + L->getHeader()->getName() + ".termcond", + OldCond); + + // Remove the old compare instruction. The old indvar is probably dead too. + DeadInsts.push_back(CondUse->getOperandValToReplace()); + OldCond->replaceAllUsesWith(Cond); + OldCond->eraseFromParent(); + + IU->IVUsesByStride[*NewStride]->addUser(NewOffset, Cond, NewCmpLHS, false); + CondUse = &IU->IVUsesByStride[*NewStride]->Users.back(); + CondStride = NewStride; + ++NumEliminated; + Changed = true; + } + + return Cond; +} + +/// OptimizeSMax - Rewrite the loop's terminating condition if it uses +/// an smax computation. +/// +/// This is a narrow solution to a specific, but acute, problem. For loops +/// like this: +/// +/// i = 0; +/// do { +/// p[i] = 0.0; +/// } while (++i < n); +/// +/// where the comparison is signed, the trip count isn't just 'n', because +/// 'n' could be negative. And unfortunately this can come up even for loops +/// where the user didn't use a C do-while loop. For example, seemingly +/// well-behaved top-test loops will commonly be lowered like this: +// +/// if (n > 0) { +/// i = 0; +/// do { +/// p[i] = 0.0; +/// } while (++i < n); +/// } +/// +/// and then it's possible for subsequent optimization to obscure the if +/// test in such a way that indvars can't find it. +/// +/// When indvars can't find the if test in loops like this, it creates a +/// signed-max expression, which allows it to give the loop a canonical +/// induction variable: +/// +/// i = 0; +/// smax = n < 1 ? 1 : n; +/// do { +/// p[i] = 0.0; +/// } while (++i != smax); +/// +/// Canonical induction variables are necessary because the loop passes +/// are designed around them. The most obvious example of this is the +/// LoopInfo analysis, which doesn't remember trip count values. It +/// expects to be able to rediscover the trip count each time it is +/// needed, and it does this using a simple analyis that only succeeds if +/// the loop has a canonical induction variable. +/// +/// However, when it comes time to generate code, the maximum operation +/// can be quite costly, especially if it's inside of an outer loop. +/// +/// This function solves this problem by detecting this type of loop and +/// rewriting their conditions from ICMP_NE back to ICMP_SLT, and deleting +/// the instructions for the maximum computation. +/// +ICmpInst *LoopStrengthReduce::OptimizeSMax(Loop *L, ICmpInst *Cond, + IVStrideUse* &CondUse) { + // Check that the loop matches the pattern we're looking for. + if (Cond->getPredicate() != CmpInst::ICMP_EQ && + Cond->getPredicate() != CmpInst::ICMP_NE) + return Cond; + + SelectInst *Sel = dyn_cast(Cond->getOperand(1)); + if (!Sel || !Sel->hasOneUse()) return Cond; + + SCEVHandle BackedgeTakenCount = SE->getBackedgeTakenCount(L); + if (isa(BackedgeTakenCount)) + return Cond; + SCEVHandle One = SE->getIntegerSCEV(1, BackedgeTakenCount->getType()); + + // Add one to the backedge-taken count to get the trip count. + SCEVHandle IterationCount = SE->getAddExpr(BackedgeTakenCount, One); + + // Check for a max calculation that matches the pattern. + const SCEVSMaxExpr *SMax = dyn_cast(IterationCount); + if (!SMax || SMax != SE->getSCEV(Sel)) return Cond; + + SCEVHandle SMaxLHS = SMax->getOperand(0); + SCEVHandle SMaxRHS = SMax->getOperand(1); + if (!SMaxLHS || SMaxLHS != One) return Cond; + + // Check the relevant induction variable for conformance to + // the pattern. + SCEVHandle IV = SE->getSCEV(Cond->getOperand(0)); + const SCEVAddRecExpr *AR = dyn_cast(IV); + if (!AR || !AR->isAffine() || + AR->getStart() != One || + AR->getStepRecurrence(*SE) != One) + return Cond; + + assert(AR->getLoop() == L && + "Loop condition operand is an addrec in a different loop!"); + + // Check the right operand of the select, and remember it, as it will + // be used in the new comparison instruction. + Value *NewRHS = 0; + if (SE->getSCEV(Sel->getOperand(1)) == SMaxRHS) + NewRHS = Sel->getOperand(1); + else if (SE->getSCEV(Sel->getOperand(2)) == SMaxRHS) + NewRHS = Sel->getOperand(2); + if (!NewRHS) return Cond; + + // Ok, everything looks ok to change the condition into an SLT or SGE and + // delete the max calculation. + ICmpInst *NewCond = + new ICmpInst(Cond->getPredicate() == CmpInst::ICMP_NE ? + CmpInst::ICMP_SLT : + CmpInst::ICMP_SGE, + Cond->getOperand(0), NewRHS, "scmp", Cond); + + // Delete the max calculation instructions. + Cond->replaceAllUsesWith(NewCond); + CondUse->setUser(NewCond); + Instruction *Cmp = cast(Sel->getOperand(0)); + Cond->eraseFromParent(); + Sel->eraseFromParent(); + if (Cmp->use_empty()) + Cmp->eraseFromParent(); + return NewCond; +} + +/// OptimizeShadowIV - If IV is used in a int-to-float cast +/// inside the loop then try to eliminate the cast opeation. +void LoopStrengthReduce::OptimizeShadowIV(Loop *L) { + + SCEVHandle BackedgeTakenCount = SE->getBackedgeTakenCount(L); + if (isa(BackedgeTakenCount)) + return; + + for (unsigned Stride = 0, e = IU->StrideOrder.size(); Stride != e; + ++Stride) { + std::map::iterator SI = + IU->IVUsesByStride.find(IU->StrideOrder[Stride]); + assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!"); + if (!isa(SI->first)) + continue; + + for (ilist::iterator UI = SI->second->Users.begin(), + E = SI->second->Users.end(); UI != E; /* empty */) { + ilist::iterator CandidateUI = UI; + ++UI; + Instruction *ShadowUse = CandidateUI->getUser(); + const Type *DestTy = NULL; + + /* If shadow use is a int->float cast then insert a second IV + to eliminate this cast. + + for (unsigned i = 0; i < n; ++i) + foo((double)i); + + is transformed into + + double d = 0.0; + for (unsigned i = 0; i < n; ++i, ++d) + foo(d); + */ + if (UIToFPInst *UCast = dyn_cast(CandidateUI->getUser())) + DestTy = UCast->getDestTy(); + else if (SIToFPInst *SCast = dyn_cast(CandidateUI->getUser())) + DestTy = SCast->getDestTy(); + if (!DestTy) continue; + + if (TLI) { + // If target does not support DestTy natively then do not apply + // this transformation. + MVT DVT = TLI->getValueType(DestTy); + if (!TLI->isTypeLegal(DVT)) continue; + } + + PHINode *PH = dyn_cast(ShadowUse->getOperand(0)); + if (!PH) continue; + if (PH->getNumIncomingValues() != 2) continue; + + const Type *SrcTy = PH->getType(); + int Mantissa = DestTy->getFPMantissaWidth(); + if (Mantissa == -1) continue; + if ((int)SE->getTypeSizeInBits(SrcTy) > Mantissa) + continue; + + unsigned Entry, Latch; + if (PH->getIncomingBlock(0) == L->getLoopPreheader()) { + Entry = 0; + Latch = 1; + } else { + Entry = 1; + Latch = 0; + } + + ConstantInt *Init = dyn_cast(PH->getIncomingValue(Entry)); + if (!Init) continue; + ConstantFP *NewInit = ConstantFP::get(DestTy, Init->getZExtValue()); + + BinaryOperator *Incr = + dyn_cast(PH->getIncomingValue(Latch)); + if (!Incr) continue; + if (Incr->getOpcode() != Instruction::Add + && Incr->getOpcode() != Instruction::Sub) + continue; + + /* Initialize new IV, double d = 0.0 in above example. */ + ConstantInt *C = NULL; + if (Incr->getOperand(0) == PH) + C = dyn_cast(Incr->getOperand(1)); + else if (Incr->getOperand(1) == PH) + C = dyn_cast(Incr->getOperand(0)); + else + continue; + + if (!C) continue; + + /* Add new PHINode. */ + PHINode *NewPH = PHINode::Create(DestTy, "IV.S.", PH); + + /* create new increment. '++d' in above example. */ + ConstantFP *CFP = ConstantFP::get(DestTy, C->getZExtValue()); + BinaryOperator *NewIncr = + BinaryOperator::Create(Incr->getOpcode(), + NewPH, CFP, "IV.S.next.", Incr); + + NewPH->addIncoming(NewInit, PH->getIncomingBlock(Entry)); + NewPH->addIncoming(NewIncr, PH->getIncomingBlock(Latch)); + + /* Remove cast operation */ + ShadowUse->replaceAllUsesWith(NewPH); + ShadowUse->eraseFromParent(); + NumShadow++; + break; + } + } +} + +// OptimizeIndvars - Now that IVUsesByStride is set up with all of the indvar +// uses in the loop, look to see if we can eliminate some, in favor of using +// common indvars for the different uses. +void LoopStrengthReduce::OptimizeIndvars(Loop *L) { + // TODO: implement optzns here. + + OptimizeShadowIV(L); +} + +/// OptimizeLoopTermCond - Change loop terminating condition to use the +/// postinc iv when possible. +void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) { + // Finally, get the terminating condition for the loop if possible. If we + // can, we want to change it to use a post-incremented version of its + // induction variable, to allow coalescing the live ranges for the IV into + // one register value. + BasicBlock *LatchBlock = L->getLoopLatch(); + BasicBlock *ExitBlock = L->getExitingBlock(); + if (!ExitBlock) + // Multiple exits, just look at the exit in the latch block if there is one. + ExitBlock = LatchBlock; + BranchInst *TermBr = dyn_cast(ExitBlock->getTerminator()); + if (!TermBr) + return; + if (TermBr->isUnconditional() || !isa(TermBr->getCondition())) + return; + + // Search IVUsesByStride to find Cond's IVUse if there is one. + IVStrideUse *CondUse = 0; + const SCEVHandle *CondStride = 0; + ICmpInst *Cond = cast(TermBr->getCondition()); + if (!FindIVUserForCond(Cond, CondUse, CondStride)) + return; // setcc doesn't use the IV. + + if (ExitBlock != LatchBlock) { + if (!Cond->hasOneUse()) + // See below, we don't want the condition to be cloned. + return; + + // If exiting block is the latch block, we know it's safe and profitable to + // transform the icmp to use post-inc iv. Otherwise do so only if it would + // not reuse another iv and its iv would be reused by other uses. We are + // optimizing for the case where the icmp is the only use of the iv. + IVUsersOfOneStride &StrideUses = *IU->IVUsesByStride[*CondStride]; + for (ilist::iterator I = StrideUses.Users.begin(), + E = StrideUses.Users.end(); I != E; ++I) { + if (I->getUser() == Cond) + continue; + if (!I->isUseOfPostIncrementedValue()) + return; + } + + // FIXME: This is expensive, and worse still ChangeCompareStride does a + // similar check. Can we perform all the icmp related transformations after + // StrengthReduceStridedIVUsers? + if (const SCEVConstant *SC = dyn_cast(*CondStride)) { + int64_t SInt = SC->getValue()->getSExtValue(); + for (unsigned NewStride = 0, ee = IU->StrideOrder.size(); NewStride != ee; + ++NewStride) { + std::map::iterator SI = + IU->IVUsesByStride.find(IU->StrideOrder[NewStride]); + if (!isa(SI->first) || SI->first == *CondStride) + continue; + int64_t SSInt = + cast(SI->first)->getValue()->getSExtValue(); + if (SSInt == SInt) + return; // This can definitely be reused. + if (unsigned(abs64(SSInt)) < SInt || (SSInt % SInt) != 0) + continue; + int64_t Scale = SSInt / SInt; + bool AllUsesAreAddresses = true; + bool AllUsesAreOutsideLoop = true; + std::vector UsersToProcess; + SCEVHandle CommonExprs = CollectIVUsers(SI->first, *SI->second, L, + AllUsesAreAddresses, + AllUsesAreOutsideLoop, + UsersToProcess); + // Avoid rewriting the compare instruction with an iv of new stride + // if it's likely the new stride uses will be rewritten using the + // stride of the compare instruction. + if (AllUsesAreAddresses && + ValidScale(!CommonExprs->isZero(), Scale, UsersToProcess)) + return; + } + } + + StrideNoReuse.insert(*CondStride); + } + + // If the trip count is computed in terms of an smax (due to ScalarEvolution + // being unable to find a sufficient guard, for example), change the loop + // comparison to use SLT instead of NE. + Cond = OptimizeSMax(L, Cond, CondUse); + + // If possible, change stride and operands of the compare instruction to + // eliminate one stride. + if (ExitBlock == LatchBlock) + Cond = ChangeCompareStride(L, Cond, CondUse, CondStride); + + // It's possible for the setcc instruction to be anywhere in the loop, and + // possible for it to have multiple users. If it is not immediately before + // the latch block branch, move it. + if (&*++BasicBlock::iterator(Cond) != (Instruction*)TermBr) { + if (Cond->hasOneUse()) { // Condition has a single use, just move it. + Cond->moveBefore(TermBr); + } else { + // Otherwise, clone the terminating condition and insert into the loopend. + Cond = cast(Cond->clone()); + Cond->setName(L->getHeader()->getName() + ".termcond"); + LatchBlock->getInstList().insert(TermBr, Cond); + + // Clone the IVUse, as the old use still exists! + IU->IVUsesByStride[*CondStride]->addUser(CondUse->getOffset(), Cond, + CondUse->getOperandValToReplace(), + false); + CondUse = &IU->IVUsesByStride[*CondStride]->Users.back(); + } + } + + // If we get to here, we know that we can transform the setcc instruction to + // use the post-incremented version of the IV, allowing us to coalesce the + // live ranges for the IV correctly. + CondUse->setOffset(SE->getMinusSCEV(CondUse->getOffset(), *CondStride)); + CondUse->setIsUseOfPostIncrementedValue(true); + Changed = true; + + ++NumLoopCond; +} + +// OptimizeLoopCountIV - If, after all sharing of IVs, the IV used for deciding +// when to exit the loop is used only for that purpose, try to rearrange things +// so it counts down to a test against zero. +void LoopStrengthReduce::OptimizeLoopCountIV(Loop *L) { + + // If the number of times the loop is executed isn't computable, give up. + SCEVHandle BackedgeTakenCount = SE->getBackedgeTakenCount(L); + if (isa(BackedgeTakenCount)) + return; + + // Get the terminating condition for the loop if possible (this isn't + // necessarily in the latch, or a block that's a predecessor of the header). + SmallVector ExitBlocks; + L->getExitBlocks(ExitBlocks); + if (ExitBlocks.size() != 1) return; + + // Okay, there is one exit block. Try to find the condition that causes the + // loop to be exited. + BasicBlock *ExitBlock = ExitBlocks[0]; + + BasicBlock *ExitingBlock = 0; + for (pred_iterator PI = pred_begin(ExitBlock), E = pred_end(ExitBlock); + PI != E; ++PI) + if (L->contains(*PI)) { + if (ExitingBlock == 0) + ExitingBlock = *PI; + else + return; // More than one block exiting! + } + assert(ExitingBlock && "No exits from loop, something is broken!"); + + // Okay, we've computed the exiting block. See what condition causes us to + // exit. + // + // FIXME: we should be able to handle switch instructions (with a single exit) + BranchInst *TermBr = dyn_cast(ExitingBlock->getTerminator()); + if (TermBr == 0) return; + assert(TermBr->isConditional() && "If unconditional, it can't be in loop!"); + if (!isa(TermBr->getCondition())) + return; + ICmpInst *Cond = cast(TermBr->getCondition()); + + // Handle only tests for equality for the moment, and only stride 1. + if (Cond->getPredicate() != CmpInst::ICMP_EQ) + return; + SCEVHandle IV = SE->getSCEV(Cond->getOperand(0)); + const SCEVAddRecExpr *AR = dyn_cast(IV); + SCEVHandle One = SE->getIntegerSCEV(1, BackedgeTakenCount->getType()); + if (!AR || !AR->isAffine() || AR->getStepRecurrence(*SE) != One) + return; + // If the RHS of the comparison is defined inside the loop, the rewrite + // cannot be done. + if (Instruction *CR = dyn_cast(Cond->getOperand(1))) + if (L->contains(CR->getParent())) + return; + + // Make sure the IV is only used for counting. Value may be preinc or + // postinc; 2 uses in either case. + if (!Cond->getOperand(0)->hasNUses(2)) + return; + PHINode *phi = dyn_cast(Cond->getOperand(0)); + Instruction *incr; + if (phi && phi->getParent()==L->getHeader()) { + // value tested is preinc. Find the increment. + // A CmpInst is not a BinaryOperator; we depend on this. + Instruction::use_iterator UI = phi->use_begin(); + incr = dyn_cast(UI); + if (!incr) + incr = dyn_cast(++UI); + // 1 use for postinc value, the phi. Unnecessarily conservative? + if (!incr || !incr->hasOneUse() || incr->getOpcode()!=Instruction::Add) + return; + } else { + // Value tested is postinc. Find the phi node. + incr = dyn_cast(Cond->getOperand(0)); + if (!incr || incr->getOpcode()!=Instruction::Add) + return; + + Instruction::use_iterator UI = Cond->getOperand(0)->use_begin(); + phi = dyn_cast(UI); + if (!phi) + phi = dyn_cast(++UI); + // 1 use for preinc value, the increment. + if (!phi || phi->getParent()!=L->getHeader() || !phi->hasOneUse()) + return; + } + + // Replace the increment with a decrement. + BinaryOperator *decr = + BinaryOperator::Create(Instruction::Sub, incr->getOperand(0), + incr->getOperand(1), "tmp", incr); + incr->replaceAllUsesWith(decr); + incr->eraseFromParent(); + + // Substitute endval-startval for the original startval, and 0 for the + // original endval. Since we're only testing for equality this is OK even + // if the computation wraps around. + BasicBlock *Preheader = L->getLoopPreheader(); + Instruction *PreInsertPt = Preheader->getTerminator(); + int inBlock = L->contains(phi->getIncomingBlock(0)) ? 1 : 0; + Value *startVal = phi->getIncomingValue(inBlock); + Value *endVal = Cond->getOperand(1); + // FIXME check for case where both are constant + ConstantInt* Zero = ConstantInt::get(Cond->getOperand(1)->getType(), 0); + BinaryOperator *NewStartVal = + BinaryOperator::Create(Instruction::Sub, endVal, startVal, + "tmp", PreInsertPt); + phi->setIncomingValue(inBlock, NewStartVal); + Cond->setOperand(1, Zero); + + Changed = true; +} + +bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) { + + IU = &getAnalysis(); + LI = &getAnalysis(); + DT = &getAnalysis(); + SE = &getAnalysis(); + Changed = false; + + if (!IU->IVUsesByStride.empty()) { +#ifndef NDEBUG + DOUT << "\nLSR on \"" << L->getHeader()->getParent()->getNameStart() + << "\" "; + DEBUG(L->dump()); +#endif + + // Sort the StrideOrder so we process larger strides first. + std::stable_sort(IU->StrideOrder.begin(), IU->StrideOrder.end(), + StrideCompare(SE)); + + // Optimize induction variables. Some indvar uses can be transformed to use + // strides that will be needed for other purposes. A common example of this + // is the exit test for the loop, which can often be rewritten to use the + // computation of some other indvar to decide when to terminate the loop. + OptimizeIndvars(L); + + // Change loop terminating condition to use the postinc iv when possible + // and optimize loop terminating compare. FIXME: Move this after + // StrengthReduceStridedIVUsers? + OptimizeLoopTermCond(L); + + // FIXME: We can shrink overlarge IV's here. e.g. if the code has + // computation in i64 values and the target doesn't support i64, demote + // the computation to 32-bit if safe. + + // FIXME: Attempt to reuse values across multiple IV's. In particular, we + // could have something like "for(i) { foo(i*8); bar(i*16) }", which should + // be codegened as "for (j = 0;; j+=8) { foo(j); bar(j+j); }" on X86/PPC. + // Need to be careful that IV's are all the same type. Only works for + // intptr_t indvars. + + // IVsByStride keeps IVs for one particular loop. + assert(IVsByStride.empty() && "Stale entries in IVsByStride?"); + + // Note: this processes each stride/type pair individually. All users + // passed into StrengthReduceStridedIVUsers have the same type AND stride. + // Also, note that we iterate over IVUsesByStride indirectly by using + // StrideOrder. This extra layer of indirection makes the ordering of + // strides deterministic - not dependent on map order. + for (unsigned Stride = 0, e = IU->StrideOrder.size(); + Stride != e; ++Stride) { + std::map::iterator SI = + IU->IVUsesByStride.find(IU->StrideOrder[Stride]); + assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!"); + // FIXME: Generalize to non-affine IV's. + if (!SI->first->isLoopInvariant(L)) + continue; + StrengthReduceStridedIVUsers(SI->first, *SI->second, L); + } + } + + // After all sharing is done, see if we can adjust the loop to test against + // zero instead of counting up to a maximum. This is usually faster. + OptimizeLoopCountIV(L); + + // We're done analyzing this loop; release all the state we built up for it. + IVsByStride.clear(); + StrideNoReuse.clear(); + + // Clean up after ourselves + if (!DeadInsts.empty()) + DeleteTriviallyDeadInstructions(); + + // At this point, it is worth checking to see if any recurrence PHIs are also + // dead, so that we can remove them as well. + DeleteDeadPHIs(L->getHeader()); + + return Changed; +} diff --git a/lib/Transforms/Scalar/LoopUnroll.cpp b/lib/Transforms/Scalar/LoopUnroll.cpp new file mode 100644 index 000000000000..23757cdb2d29 --- /dev/null +++ b/lib/Transforms/Scalar/LoopUnroll.cpp @@ -0,0 +1,183 @@ +//===-- LoopUnroll.cpp - Loop unroller pass -------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass implements a simple loop unroller. It works best when loops have +// been canonicalized by the -indvars pass, allowing it to determine the trip +// counts of loops easily. +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "loop-unroll" +#include "llvm/IntrinsicInst.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Transforms/Utils/UnrollLoop.h" +#include + +using namespace llvm; + +static cl::opt +UnrollThreshold("unroll-threshold", cl::init(100), cl::Hidden, + cl::desc("The cut-off point for automatic loop unrolling")); + +static cl::opt +UnrollCount("unroll-count", cl::init(0), cl::Hidden, + cl::desc("Use this unroll count for all loops, for testing purposes")); + +static cl::opt +UnrollAllowPartial("unroll-allow-partial", cl::init(false), cl::Hidden, + cl::desc("Allows loops to be partially unrolled until " + "-unroll-threshold loop size is reached.")); + +namespace { + class VISIBILITY_HIDDEN LoopUnroll : public LoopPass { + public: + static char ID; // Pass ID, replacement for typeid + LoopUnroll() : LoopPass(&ID) {} + + /// A magic value for use with the Threshold parameter to indicate + /// that the loop unroll should be performed regardless of how much + /// code expansion would result. + static const unsigned NoThreshold = UINT_MAX; + + bool runOnLoop(Loop *L, LPPassManager &LPM); + + /// This transformation requires natural loop information & requires that + /// loop preheaders be inserted into the CFG... + /// + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequiredID(LoopSimplifyID); + AU.addRequiredID(LCSSAID); + AU.addRequired(); + AU.addPreservedID(LCSSAID); + AU.addPreserved(); + // FIXME: Loop unroll requires LCSSA. And LCSSA requires dom info. + // If loop unroll does not preserve dom info then LCSSA pass on next + // loop will receive invalid dom info. + // For now, recreate dom info, if loop is unrolled. + AU.addPreserved(); + AU.addPreserved(); + } + }; +} + +char LoopUnroll::ID = 0; +static RegisterPass X("loop-unroll", "Unroll loops"); + +Pass *llvm::createLoopUnrollPass() { return new LoopUnroll(); } + +/// ApproximateLoopSize - Approximate the size of the loop. +static unsigned ApproximateLoopSize(const Loop *L) { + unsigned Size = 0; + for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); + I != E; ++I) { + BasicBlock *BB = *I; + Instruction *Term = BB->getTerminator(); + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { + if (isa(I) && BB == L->getHeader()) { + // Ignore PHI nodes in the header. + } else if (I->hasOneUse() && I->use_back() == Term) { + // Ignore instructions only used by the loop terminator. + } else if (isa(I)) { + // Ignore debug instructions + } else if (isa(I) && I->hasOneUse()) { + // Ignore GEP as they generally are subsumed into a load or store. + } else if (isa(I)) { + // Estimate size overhead introduced by call instructions which + // is higher than other instructions. Here 3 and 10 are magic + // numbers that help one isolated test case from PR2067 without + // negatively impacting measured benchmarks. + if (isa(I)) + Size = Size + 3; + else + Size = Size + 10; + } else { + ++Size; + } + + // TODO: Ignore expressions derived from PHI and constants if inval of phi + // is a constant, or if operation is associative. This will get induction + // variables. + } + } + + return Size; +} + +bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { + assert(L->isLCSSAForm()); + LoopInfo *LI = &getAnalysis(); + + BasicBlock *Header = L->getHeader(); + DOUT << "Loop Unroll: F[" << Header->getParent()->getName() + << "] Loop %" << Header->getName() << "\n"; + + // Find trip count + unsigned TripCount = L->getSmallConstantTripCount(); + unsigned Count = UnrollCount; + + // Automatically select an unroll count. + if (Count == 0) { + // Conservative heuristic: if we know the trip count, see if we can + // completely unroll (subject to the threshold, checked below); otherwise + // try to find greatest modulo of the trip count which is still under + // threshold value. + if (TripCount != 0) { + Count = TripCount; + } else { + return false; + } + } + + // Enforce the threshold. + if (UnrollThreshold != NoThreshold) { + unsigned LoopSize = ApproximateLoopSize(L); + DOUT << " Loop Size = " << LoopSize << "\n"; + uint64_t Size = (uint64_t)LoopSize*Count; + if (TripCount != 1 && Size > UnrollThreshold) { + DOUT << " Too large to fully unroll with count: " << Count + << " because size: " << Size << ">" << UnrollThreshold << "\n"; + if (UnrollAllowPartial) { + // Reduce unroll count to be modulo of TripCount for partial unrolling + Count = UnrollThreshold / LoopSize; + while (Count != 0 && TripCount%Count != 0) { + Count--; + } + if (Count < 2) { + DOUT << " could not unroll partially\n"; + return false; + } else { + DOUT << " partially unrolling with count: " << Count << "\n"; + } + } else { + DOUT << " will not try to unroll partially because " + << "-unroll-allow-partial not given\n"; + return false; + } + } + } + + // Unroll the loop. + Function *F = L->getHeader()->getParent(); + if (!UnrollLoop(L, Count, LI, &LPM)) + return false; + + // FIXME: Reconstruct dom info, because it is not preserved properly. + DominatorTree *DT = getAnalysisIfAvailable(); + if (DT) { + DT->runOnFunction(*F); + DominanceFrontier *DF = getAnalysisIfAvailable(); + if (DF) + DF->runOnFunction(*F); + } + return true; +} diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp new file mode 100644 index 000000000000..e3e881f0812b --- /dev/null +++ b/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -0,0 +1,1098 @@ +//===-- LoopUnswitch.cpp - Hoist loop-invariant conditionals in loop ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass transforms loops that contain branches on loop-invariant conditions +// to have multiple loops. For example, it turns the left into the right code: +// +// for (...) if (lic) +// A for (...) +// if (lic) A; B; C +// B else +// C for (...) +// A; C +// +// This can increase the size of the code exponentially (doubling it every time +// a loop is unswitched) so we only unswitch if the resultant code will be +// smaller than a threshold. +// +// This pass expects LICM to be run before it to hoist invariant conditions out +// of the loop, to make the unswitching opportunity obvious. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "loop-unswitch" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/Instructions.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include +#include +using namespace llvm; + +STATISTIC(NumBranches, "Number of branches unswitched"); +STATISTIC(NumSwitches, "Number of switches unswitched"); +STATISTIC(NumSelects , "Number of selects unswitched"); +STATISTIC(NumTrivial , "Number of unswitches that are trivial"); +STATISTIC(NumSimplify, "Number of simplifications of unswitched code"); + +static cl::opt +Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"), + cl::init(10), cl::Hidden); + +namespace { + class VISIBILITY_HIDDEN LoopUnswitch : public LoopPass { + LoopInfo *LI; // Loop information + LPPassManager *LPM; + + // LoopProcessWorklist - Used to check if second loop needs processing + // after RewriteLoopBodyWithConditionConstant rewrites first loop. + std::vector LoopProcessWorklist; + SmallPtrSet UnswitchedVals; + + bool OptimizeForSize; + bool redoLoop; + + Loop *currentLoop; + DominanceFrontier *DF; + DominatorTree *DT; + BasicBlock *loopHeader; + BasicBlock *loopPreheader; + + // LoopBlocks contains all of the basic blocks of the loop, including the + // preheader of the loop, the body of the loop, and the exit blocks of the + // loop, in that order. + std::vector LoopBlocks; + // NewBlocks contained cloned copy of basic blocks from LoopBlocks. + std::vector NewBlocks; + + public: + static char ID; // Pass ID, replacement for typeid + explicit LoopUnswitch(bool Os = false) : + LoopPass(&ID), OptimizeForSize(Os), redoLoop(false), + currentLoop(NULL), DF(NULL), DT(NULL), loopHeader(NULL), + loopPreheader(NULL) {} + + bool runOnLoop(Loop *L, LPPassManager &LPM); + bool processCurrentLoop(); + + /// This transformation requires natural loop information & requires that + /// loop preheaders be inserted into the CFG... + /// + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequiredID(LoopSimplifyID); + AU.addPreservedID(LoopSimplifyID); + AU.addRequired(); + AU.addPreserved(); + AU.addRequiredID(LCSSAID); + AU.addPreservedID(LCSSAID); + AU.addPreserved(); + AU.addPreserved(); + } + + private: + + /// RemoveLoopFromWorklist - If the specified loop is on the loop worklist, + /// remove it. + void RemoveLoopFromWorklist(Loop *L) { + std::vector::iterator I = std::find(LoopProcessWorklist.begin(), + LoopProcessWorklist.end(), L); + if (I != LoopProcessWorklist.end()) + LoopProcessWorklist.erase(I); + } + + void initLoopData() { + loopHeader = currentLoop->getHeader(); + loopPreheader = currentLoop->getLoopPreheader(); + } + + /// Split all of the edges from inside the loop to their exit blocks. + /// Update the appropriate Phi nodes as we do so. + void SplitExitEdges(Loop *L, const SmallVector &ExitBlocks); + + bool UnswitchIfProfitable(Value *LoopCond, Constant *Val); + unsigned getLoopUnswitchCost(Value *LIC); + void UnswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val, + BasicBlock *ExitBlock); + void UnswitchNontrivialCondition(Value *LIC, Constant *OnVal, Loop *L); + + void RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC, + Constant *Val, bool isEqual); + + void EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val, + BasicBlock *TrueDest, + BasicBlock *FalseDest, + Instruction *InsertPt); + + void SimplifyCode(std::vector &Worklist, Loop *L); + void RemoveBlockIfDead(BasicBlock *BB, + std::vector &Worklist, Loop *l); + void RemoveLoopFromHierarchy(Loop *L); + bool IsTrivialUnswitchCondition(Value *Cond, Constant **Val = 0, + BasicBlock **LoopExit = 0); + + }; +} +char LoopUnswitch::ID = 0; +static RegisterPass X("loop-unswitch", "Unswitch loops"); + +Pass *llvm::createLoopUnswitchPass(bool Os) { + return new LoopUnswitch(Os); +} + +/// FindLIVLoopCondition - Cond is a condition that occurs in L. If it is +/// invariant in the loop, or has an invariant piece, return the invariant. +/// Otherwise, return null. +static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed) { + // Constants should be folded, not unswitched on! + if (isa(Cond)) return 0; + + // TODO: Handle: br (VARIANT|INVARIANT). + // TODO: Hoist simple expressions out of loops. + if (L->isLoopInvariant(Cond)) return Cond; + + if (BinaryOperator *BO = dyn_cast(Cond)) + if (BO->getOpcode() == Instruction::And || + BO->getOpcode() == Instruction::Or) { + // If either the left or right side is invariant, we can unswitch on this, + // which will cause the branch to go away in one loop and the condition to + // simplify in the other one. + if (Value *LHS = FindLIVLoopCondition(BO->getOperand(0), L, Changed)) + return LHS; + if (Value *RHS = FindLIVLoopCondition(BO->getOperand(1), L, Changed)) + return RHS; + } + + return 0; +} + +bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) { + LI = &getAnalysis(); + LPM = &LPM_Ref; + DF = getAnalysisIfAvailable(); + DT = getAnalysisIfAvailable(); + currentLoop = L; + Function *F = currentLoop->getHeader()->getParent(); + bool Changed = false; + do { + assert(currentLoop->isLCSSAForm()); + redoLoop = false; + Changed |= processCurrentLoop(); + } while(redoLoop); + + if (Changed) { + // FIXME: Reconstruct dom info, because it is not preserved properly. + if (DT) + DT->runOnFunction(*F); + if (DF) + DF->runOnFunction(*F); + } + return Changed; +} + +/// processCurrentLoop - Do actual work and unswitch loop if possible +/// and profitable. +bool LoopUnswitch::processCurrentLoop() { + bool Changed = false; + + // Loop over all of the basic blocks in the loop. If we find an interior + // block that is branching on a loop-invariant condition, we can unswitch this + // loop. + for (Loop::block_iterator I = currentLoop->block_begin(), + E = currentLoop->block_end(); + I != E; ++I) { + TerminatorInst *TI = (*I)->getTerminator(); + if (BranchInst *BI = dyn_cast(TI)) { + // If this isn't branching on an invariant condition, we can't unswitch + // it. + if (BI->isConditional()) { + // See if this, or some part of it, is loop invariant. If so, we can + // unswitch on it if we desire. + Value *LoopCond = FindLIVLoopCondition(BI->getCondition(), + currentLoop, Changed); + if (LoopCond && UnswitchIfProfitable(LoopCond, + ConstantInt::getTrue())) { + ++NumBranches; + return true; + } + } + } else if (SwitchInst *SI = dyn_cast(TI)) { + Value *LoopCond = FindLIVLoopCondition(SI->getCondition(), + currentLoop, Changed); + if (LoopCond && SI->getNumCases() > 1) { + // Find a value to unswitch on: + // FIXME: this should chose the most expensive case! + Constant *UnswitchVal = SI->getCaseValue(1); + // Do not process same value again and again. + if (!UnswitchedVals.insert(UnswitchVal)) + continue; + + if (UnswitchIfProfitable(LoopCond, UnswitchVal)) { + ++NumSwitches; + return true; + } + } + } + + // Scan the instructions to check for unswitchable values. + for (BasicBlock::iterator BBI = (*I)->begin(), E = (*I)->end(); + BBI != E; ++BBI) + if (SelectInst *SI = dyn_cast(BBI)) { + Value *LoopCond = FindLIVLoopCondition(SI->getCondition(), + currentLoop, Changed); + if (LoopCond && UnswitchIfProfitable(LoopCond, + ConstantInt::getTrue())) { + ++NumSelects; + return true; + } + } + } + return Changed; +} + +/// isTrivialLoopExitBlock - Check to see if all paths from BB either: +/// 1. Exit the loop with no side effects. +/// 2. Branch to the latch block with no side-effects. +/// +/// If these conditions are true, we return true and set ExitBB to the block we +/// exit through. +/// +static bool isTrivialLoopExitBlockHelper(Loop *L, BasicBlock *BB, + BasicBlock *&ExitBB, + std::set &Visited) { + if (!Visited.insert(BB).second) { + // Already visited and Ok, end of recursion. + return true; + } else if (!L->contains(BB)) { + // Otherwise, this is a loop exit, this is fine so long as this is the + // first exit. + if (ExitBB != 0) return false; + ExitBB = BB; + return true; + } + + // Otherwise, this is an unvisited intra-loop node. Check all successors. + for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI) { + // Check to see if the successor is a trivial loop exit. + if (!isTrivialLoopExitBlockHelper(L, *SI, ExitBB, Visited)) + return false; + } + + // Okay, everything after this looks good, check to make sure that this block + // doesn't include any side effects. + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) + if (I->mayHaveSideEffects()) + return false; + + return true; +} + +/// isTrivialLoopExitBlock - Return true if the specified block unconditionally +/// leads to an exit from the specified loop, and has no side-effects in the +/// process. If so, return the block that is exited to, otherwise return null. +static BasicBlock *isTrivialLoopExitBlock(Loop *L, BasicBlock *BB) { + std::set Visited; + Visited.insert(L->getHeader()); // Branches to header are ok. + BasicBlock *ExitBB = 0; + if (isTrivialLoopExitBlockHelper(L, BB, ExitBB, Visited)) + return ExitBB; + return 0; +} + +/// IsTrivialUnswitchCondition - Check to see if this unswitch condition is +/// trivial: that is, that the condition controls whether or not the loop does +/// anything at all. If this is a trivial condition, unswitching produces no +/// code duplications (equivalently, it produces a simpler loop and a new empty +/// loop, which gets deleted). +/// +/// If this is a trivial condition, return true, otherwise return false. When +/// returning true, this sets Cond and Val to the condition that controls the +/// trivial condition: when Cond dynamically equals Val, the loop is known to +/// exit. Finally, this sets LoopExit to the BB that the loop exits to when +/// Cond == Val. +/// +bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val, + BasicBlock **LoopExit) { + BasicBlock *Header = currentLoop->getHeader(); + TerminatorInst *HeaderTerm = Header->getTerminator(); + + BasicBlock *LoopExitBB = 0; + if (BranchInst *BI = dyn_cast(HeaderTerm)) { + // If the header block doesn't end with a conditional branch on Cond, we + // can't handle it. + if (!BI->isConditional() || BI->getCondition() != Cond) + return false; + + // Check to see if a successor of the branch is guaranteed to go to the + // latch block or exit through a one exit block without having any + // side-effects. If so, determine the value of Cond that causes it to do + // this. + if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop, + BI->getSuccessor(0)))) { + if (Val) *Val = ConstantInt::getTrue(); + } else if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop, + BI->getSuccessor(1)))) { + if (Val) *Val = ConstantInt::getFalse(); + } + } else if (SwitchInst *SI = dyn_cast(HeaderTerm)) { + // If this isn't a switch on Cond, we can't handle it. + if (SI->getCondition() != Cond) return false; + + // Check to see if a successor of the switch is guaranteed to go to the + // latch block or exit through a one exit block without having any + // side-effects. If so, determine the value of Cond that causes it to do + // this. Note that we can't trivially unswitch on the default case. + for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i) + if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop, + SI->getSuccessor(i)))) { + // Okay, we found a trivial case, remember the value that is trivial. + if (Val) *Val = SI->getCaseValue(i); + break; + } + } + + // If we didn't find a single unique LoopExit block, or if the loop exit block + // contains phi nodes, this isn't trivial. + if (!LoopExitBB || isa(LoopExitBB->begin())) + return false; // Can't handle this. + + if (LoopExit) *LoopExit = LoopExitBB; + + // We already know that nothing uses any scalar values defined inside of this + // loop. As such, we just have to check to see if this loop will execute any + // side-effecting instructions (e.g. stores, calls, volatile loads) in the + // part of the loop that the code *would* execute. We already checked the + // tail, check the header now. + for (BasicBlock::iterator I = Header->begin(), E = Header->end(); I != E; ++I) + if (I->mayHaveSideEffects()) + return false; + return true; +} + +/// getLoopUnswitchCost - Return the cost (code size growth) that will happen if +/// we choose to unswitch current loop on the specified value. +/// +unsigned LoopUnswitch::getLoopUnswitchCost(Value *LIC) { + // If the condition is trivial, always unswitch. There is no code growth for + // this case. + if (IsTrivialUnswitchCondition(LIC)) + return 0; + + // FIXME: This is really overly conservative. However, more liberal + // estimations have thus far resulted in excessive unswitching, which is bad + // both in compile time and in code size. This should be replaced once + // someone figures out how a good estimation. + return currentLoop->getBlocks().size(); + + unsigned Cost = 0; + // FIXME: this is brain dead. It should take into consideration code + // shrinkage. + for (Loop::block_iterator I = currentLoop->block_begin(), + E = currentLoop->block_end(); + I != E; ++I) { + BasicBlock *BB = *I; + // Do not include empty blocks in the cost calculation. This happen due to + // loop canonicalization and will be removed. + if (BB->begin() == BasicBlock::iterator(BB->getTerminator())) + continue; + + // Count basic blocks. + ++Cost; + } + + return Cost; +} + +/// UnswitchIfProfitable - We have found that we can unswitch currentLoop when +/// LoopCond == Val to simplify the loop. If we decide that this is profitable, +/// unswitch the loop, reprocess the pieces, then return true. +bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val){ + + initLoopData(); + Function *F = loopHeader->getParent(); + + + // Check to see if it would be profitable to unswitch current loop. + unsigned Cost = getLoopUnswitchCost(LoopCond); + + // Do not do non-trivial unswitch while optimizing for size. + if (Cost && OptimizeForSize) + return false; + if (Cost && !F->isDeclaration() && F->hasFnAttr(Attribute::OptimizeForSize)) + return false; + + if (Cost > Threshold) { + // FIXME: this should estimate growth by the amount of code shared by the + // resultant unswitched loops. + // + DOUT << "NOT unswitching loop %" + << currentLoop->getHeader()->getName() << ", cost too high: " + << currentLoop->getBlocks().size() << "\n"; + return false; + } + + Constant *CondVal; + BasicBlock *ExitBlock; + if (IsTrivialUnswitchCondition(LoopCond, &CondVal, &ExitBlock)) { + UnswitchTrivialCondition(currentLoop, LoopCond, CondVal, ExitBlock); + } else { + UnswitchNontrivialCondition(LoopCond, Val, currentLoop); + } + + return true; +} + +// RemapInstruction - Convert the instruction operands from referencing the +// current values into those specified by ValueMap. +// +static inline void RemapInstruction(Instruction *I, + DenseMap &ValueMap) { + for (unsigned op = 0, E = I->getNumOperands(); op != E; ++op) { + Value *Op = I->getOperand(op); + DenseMap::iterator It = ValueMap.find(Op); + if (It != ValueMap.end()) Op = It->second; + I->setOperand(op, Op); + } +} + +/// CloneLoop - Recursively clone the specified loop and all of its children, +/// mapping the blocks with the specified map. +static Loop *CloneLoop(Loop *L, Loop *PL, DenseMap &VM, + LoopInfo *LI, LPPassManager *LPM) { + Loop *New = new Loop(); + + LPM->insertLoop(New, PL); + + // Add all of the blocks in L to the new loop. + for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); + I != E; ++I) + if (LI->getLoopFor(*I) == L) + New->addBasicBlockToLoop(cast(VM[*I]), LI->getBase()); + + // Add all of the subloops to the new loop. + for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) + CloneLoop(*I, New, VM, LI, LPM); + + return New; +} + +/// EmitPreheaderBranchOnCondition - Emit a conditional branch on two values +/// if LIC == Val, branch to TrueDst, otherwise branch to FalseDest. Insert the +/// code immediately before InsertPt. +void LoopUnswitch::EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val, + BasicBlock *TrueDest, + BasicBlock *FalseDest, + Instruction *InsertPt) { + // Insert a conditional branch on LIC to the two preheaders. The original + // code is the true version and the new code is the false version. + Value *BranchVal = LIC; + if (!isa(Val) || Val->getType() != Type::Int1Ty) + BranchVal = new ICmpInst(ICmpInst::ICMP_EQ, LIC, Val, "tmp", InsertPt); + else if (Val != ConstantInt::getTrue()) + // We want to enter the new loop when the condition is true. + std::swap(TrueDest, FalseDest); + + // Insert the new branch. + BranchInst::Create(TrueDest, FalseDest, BranchVal, InsertPt); +} + +/// UnswitchTrivialCondition - Given a loop that has a trivial unswitchable +/// condition in it (a cond branch from its header block to its latch block, +/// where the path through the loop that doesn't execute its body has no +/// side-effects), unswitch it. This doesn't involve any code duplication, just +/// moving the conditional branch outside of the loop and updating loop info. +void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond, + Constant *Val, + BasicBlock *ExitBlock) { + DOUT << "loop-unswitch: Trivial-Unswitch loop %" + << loopHeader->getName() << " [" << L->getBlocks().size() + << " blocks] in Function " << L->getHeader()->getParent()->getName() + << " on cond: " << *Val << " == " << *Cond << "\n"; + + // First step, split the preheader, so that we know that there is a safe place + // to insert the conditional branch. We will change loopPreheader to have a + // conditional branch on Cond. + BasicBlock *NewPH = SplitEdge(loopPreheader, loopHeader, this); + + // Now that we have a place to insert the conditional branch, create a place + // to branch to: this is the exit block out of the loop that we should + // short-circuit to. + + // Split this block now, so that the loop maintains its exit block, and so + // that the jump from the preheader can execute the contents of the exit block + // without actually branching to it (the exit block should be dominated by the + // loop header, not the preheader). + assert(!L->contains(ExitBlock) && "Exit block is in the loop?"); + BasicBlock *NewExit = SplitBlock(ExitBlock, ExitBlock->begin(), this); + + // Okay, now we have a position to branch from and a position to branch to, + // insert the new conditional branch. + EmitPreheaderBranchOnCondition(Cond, Val, NewExit, NewPH, + loopPreheader->getTerminator()); + LPM->deleteSimpleAnalysisValue(loopPreheader->getTerminator(), L); + loopPreheader->getTerminator()->eraseFromParent(); + + // We need to reprocess this loop, it could be unswitched again. + redoLoop = true; + + // Now that we know that the loop is never entered when this condition is a + // particular value, rewrite the loop with this info. We know that this will + // at least eliminate the old branch. + RewriteLoopBodyWithConditionConstant(L, Cond, Val, false); + ++NumTrivial; +} + +/// SplitExitEdges - Split all of the edges from inside the loop to their exit +/// blocks. Update the appropriate Phi nodes as we do so. +void LoopUnswitch::SplitExitEdges(Loop *L, + const SmallVector &ExitBlocks) +{ + + for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { + BasicBlock *ExitBlock = ExitBlocks[i]; + std::vector Preds(pred_begin(ExitBlock), pred_end(ExitBlock)); + + for (unsigned j = 0, e = Preds.size(); j != e; ++j) { + BasicBlock* NewExitBlock = SplitEdge(Preds[j], ExitBlock, this); + BasicBlock* StartBlock = Preds[j]; + BasicBlock* EndBlock; + if (NewExitBlock->getSinglePredecessor() == ExitBlock) { + EndBlock = NewExitBlock; + NewExitBlock = EndBlock->getSinglePredecessor(); + } else { + EndBlock = ExitBlock; + } + + std::set InsertedPHIs; + PHINode* OldLCSSA = 0; + for (BasicBlock::iterator I = EndBlock->begin(); + (OldLCSSA = dyn_cast(I)); ++I) { + Value* OldValue = OldLCSSA->getIncomingValueForBlock(NewExitBlock); + PHINode* NewLCSSA = PHINode::Create(OldLCSSA->getType(), + OldLCSSA->getName() + ".us-lcssa", + NewExitBlock->getTerminator()); + NewLCSSA->addIncoming(OldValue, StartBlock); + OldLCSSA->setIncomingValue(OldLCSSA->getBasicBlockIndex(NewExitBlock), + NewLCSSA); + InsertedPHIs.insert(NewLCSSA); + } + + BasicBlock::iterator InsertPt = EndBlock->getFirstNonPHI(); + for (BasicBlock::iterator I = NewExitBlock->begin(); + (OldLCSSA = dyn_cast(I)) && InsertedPHIs.count(OldLCSSA) == 0; + ++I) { + PHINode *NewLCSSA = PHINode::Create(OldLCSSA->getType(), + OldLCSSA->getName() + ".us-lcssa", + InsertPt); + OldLCSSA->replaceAllUsesWith(NewLCSSA); + NewLCSSA->addIncoming(OldLCSSA, NewExitBlock); + } + + } + } + +} + +/// UnswitchNontrivialCondition - We determined that the loop is profitable +/// to unswitch when LIC equal Val. Split it into loop versions and test the +/// condition outside of either loop. Return the loops created as Out1/Out2. +void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val, + Loop *L) { + Function *F = loopHeader->getParent(); + DOUT << "loop-unswitch: Unswitching loop %" + << loopHeader->getName() << " [" << L->getBlocks().size() + << " blocks] in Function " << F->getName() + << " when '" << *Val << "' == " << *LIC << "\n"; + + LoopBlocks.clear(); + NewBlocks.clear(); + + // First step, split the preheader and exit blocks, and add these blocks to + // the LoopBlocks list. + BasicBlock *NewPreheader = SplitEdge(loopPreheader, loopHeader, this); + LoopBlocks.push_back(NewPreheader); + + // We want the loop to come after the preheader, but before the exit blocks. + LoopBlocks.insert(LoopBlocks.end(), L->block_begin(), L->block_end()); + + SmallVector ExitBlocks; + L->getUniqueExitBlocks(ExitBlocks); + + // Split all of the edges from inside the loop to their exit blocks. Update + // the appropriate Phi nodes as we do so. + SplitExitEdges(L, ExitBlocks); + + // The exit blocks may have been changed due to edge splitting, recompute. + ExitBlocks.clear(); + L->getUniqueExitBlocks(ExitBlocks); + + // Add exit blocks to the loop blocks. + LoopBlocks.insert(LoopBlocks.end(), ExitBlocks.begin(), ExitBlocks.end()); + + // Next step, clone all of the basic blocks that make up the loop (including + // the loop preheader and exit blocks), keeping track of the mapping between + // the instructions and blocks. + NewBlocks.reserve(LoopBlocks.size()); + DenseMap ValueMap; + for (unsigned i = 0, e = LoopBlocks.size(); i != e; ++i) { + BasicBlock *New = CloneBasicBlock(LoopBlocks[i], ValueMap, ".us", F); + NewBlocks.push_back(New); + ValueMap[LoopBlocks[i]] = New; // Keep the BB mapping. + LPM->cloneBasicBlockSimpleAnalysis(LoopBlocks[i], New, L); + } + + // Splice the newly inserted blocks into the function right before the + // original preheader. + F->getBasicBlockList().splice(LoopBlocks[0], F->getBasicBlockList(), + NewBlocks[0], F->end()); + + // Now we create the new Loop object for the versioned loop. + Loop *NewLoop = CloneLoop(L, L->getParentLoop(), ValueMap, LI, LPM); + Loop *ParentLoop = L->getParentLoop(); + if (ParentLoop) { + // Make sure to add the cloned preheader and exit blocks to the parent loop + // as well. + ParentLoop->addBasicBlockToLoop(NewBlocks[0], LI->getBase()); + } + + for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { + BasicBlock *NewExit = cast(ValueMap[ExitBlocks[i]]); + // The new exit block should be in the same loop as the old one. + if (Loop *ExitBBLoop = LI->getLoopFor(ExitBlocks[i])) + ExitBBLoop->addBasicBlockToLoop(NewExit, LI->getBase()); + + assert(NewExit->getTerminator()->getNumSuccessors() == 1 && + "Exit block should have been split to have one successor!"); + BasicBlock *ExitSucc = NewExit->getTerminator()->getSuccessor(0); + + // If the successor of the exit block had PHI nodes, add an entry for + // NewExit. + PHINode *PN; + for (BasicBlock::iterator I = ExitSucc->begin(); + (PN = dyn_cast(I)); ++I) { + Value *V = PN->getIncomingValueForBlock(ExitBlocks[i]); + DenseMap::iterator It = ValueMap.find(V); + if (It != ValueMap.end()) V = It->second; + PN->addIncoming(V, NewExit); + } + } + + // Rewrite the code to refer to itself. + for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i) + for (BasicBlock::iterator I = NewBlocks[i]->begin(), + E = NewBlocks[i]->end(); I != E; ++I) + RemapInstruction(I, ValueMap); + + // Rewrite the original preheader to select between versions of the loop. + BranchInst *OldBR = cast(loopPreheader->getTerminator()); + assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == LoopBlocks[0] && + "Preheader splitting did not work correctly!"); + + // Emit the new branch that selects between the two versions of this loop. + EmitPreheaderBranchOnCondition(LIC, Val, NewBlocks[0], LoopBlocks[0], OldBR); + LPM->deleteSimpleAnalysisValue(OldBR, L); + OldBR->eraseFromParent(); + + LoopProcessWorklist.push_back(NewLoop); + redoLoop = true; + + // Now we rewrite the original code to know that the condition is true and the + // new code to know that the condition is false. + RewriteLoopBodyWithConditionConstant(L , LIC, Val, false); + + // It's possible that simplifying one loop could cause the other to be + // deleted. If so, don't simplify it. + if (!LoopProcessWorklist.empty() && LoopProcessWorklist.back() == NewLoop) + RewriteLoopBodyWithConditionConstant(NewLoop, LIC, Val, true); + +} + +/// RemoveFromWorklist - Remove all instances of I from the worklist vector +/// specified. +static void RemoveFromWorklist(Instruction *I, + std::vector &Worklist) { + std::vector::iterator WI = std::find(Worklist.begin(), + Worklist.end(), I); + while (WI != Worklist.end()) { + unsigned Offset = WI-Worklist.begin(); + Worklist.erase(WI); + WI = std::find(Worklist.begin()+Offset, Worklist.end(), I); + } +} + +/// ReplaceUsesOfWith - When we find that I really equals V, remove I from the +/// program, replacing all uses with V and update the worklist. +static void ReplaceUsesOfWith(Instruction *I, Value *V, + std::vector &Worklist, + Loop *L, LPPassManager *LPM) { + DOUT << "Replace with '" << *V << "': " << *I; + + // Add uses to the worklist, which may be dead now. + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) + if (Instruction *Use = dyn_cast(I->getOperand(i))) + Worklist.push_back(Use); + + // Add users to the worklist which may be simplified now. + for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); + UI != E; ++UI) + Worklist.push_back(cast(*UI)); + LPM->deleteSimpleAnalysisValue(I, L); + RemoveFromWorklist(I, Worklist); + I->replaceAllUsesWith(V); + I->eraseFromParent(); + ++NumSimplify; +} + +/// RemoveBlockIfDead - If the specified block is dead, remove it, update loop +/// information, and remove any dead successors it has. +/// +void LoopUnswitch::RemoveBlockIfDead(BasicBlock *BB, + std::vector &Worklist, + Loop *L) { + if (pred_begin(BB) != pred_end(BB)) { + // This block isn't dead, since an edge to BB was just removed, see if there + // are any easy simplifications we can do now. + if (BasicBlock *Pred = BB->getSinglePredecessor()) { + // If it has one pred, fold phi nodes in BB. + while (isa(BB->begin())) + ReplaceUsesOfWith(BB->begin(), + cast(BB->begin())->getIncomingValue(0), + Worklist, L, LPM); + + // If this is the header of a loop and the only pred is the latch, we now + // have an unreachable loop. + if (Loop *L = LI->getLoopFor(BB)) + if (loopHeader == BB && L->contains(Pred)) { + // Remove the branch from the latch to the header block, this makes + // the header dead, which will make the latch dead (because the header + // dominates the latch). + LPM->deleteSimpleAnalysisValue(Pred->getTerminator(), L); + Pred->getTerminator()->eraseFromParent(); + new UnreachableInst(Pred); + + // The loop is now broken, remove it from LI. + RemoveLoopFromHierarchy(L); + + // Reprocess the header, which now IS dead. + RemoveBlockIfDead(BB, Worklist, L); + return; + } + + // If pred ends in a uncond branch, add uncond branch to worklist so that + // the two blocks will get merged. + if (BranchInst *BI = dyn_cast(Pred->getTerminator())) + if (BI->isUnconditional()) + Worklist.push_back(BI); + } + return; + } + + DOUT << "Nuking dead block: " << *BB; + + // Remove the instructions in the basic block from the worklist. + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { + RemoveFromWorklist(I, Worklist); + + // Anything that uses the instructions in this basic block should have their + // uses replaced with undefs. + if (!I->use_empty()) + I->replaceAllUsesWith(UndefValue::get(I->getType())); + } + + // If this is the edge to the header block for a loop, remove the loop and + // promote all subloops. + if (Loop *BBLoop = LI->getLoopFor(BB)) { + if (BBLoop->getLoopLatch() == BB) + RemoveLoopFromHierarchy(BBLoop); + } + + // Remove the block from the loop info, which removes it from any loops it + // was in. + LI->removeBlock(BB); + + + // Remove phi node entries in successors for this block. + TerminatorInst *TI = BB->getTerminator(); + SmallVector Succs; + for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) { + Succs.push_back(TI->getSuccessor(i)); + TI->getSuccessor(i)->removePredecessor(BB); + } + + // Unique the successors, remove anything with multiple uses. + array_pod_sort(Succs.begin(), Succs.end()); + Succs.erase(std::unique(Succs.begin(), Succs.end()), Succs.end()); + + // Remove the basic block, including all of the instructions contained in it. + LPM->deleteSimpleAnalysisValue(BB, L); + BB->eraseFromParent(); + // Remove successor blocks here that are not dead, so that we know we only + // have dead blocks in this list. Nondead blocks have a way of becoming dead, + // then getting removed before we revisit them, which is badness. + // + for (unsigned i = 0; i != Succs.size(); ++i) + if (pred_begin(Succs[i]) != pred_end(Succs[i])) { + // One exception is loop headers. If this block was the preheader for a + // loop, then we DO want to visit the loop so the loop gets deleted. + // We know that if the successor is a loop header, that this loop had to + // be the preheader: the case where this was the latch block was handled + // above and headers can only have two predecessors. + if (!LI->isLoopHeader(Succs[i])) { + Succs.erase(Succs.begin()+i); + --i; + } + } + + for (unsigned i = 0, e = Succs.size(); i != e; ++i) + RemoveBlockIfDead(Succs[i], Worklist, L); +} + +/// RemoveLoopFromHierarchy - We have discovered that the specified loop has +/// become unwrapped, either because the backedge was deleted, or because the +/// edge into the header was removed. If the edge into the header from the +/// latch block was removed, the loop is unwrapped but subloops are still alive, +/// so they just reparent loops. If the loops are actually dead, they will be +/// removed later. +void LoopUnswitch::RemoveLoopFromHierarchy(Loop *L) { + LPM->deleteLoopFromQueue(L); + RemoveLoopFromWorklist(L); +} + +// RewriteLoopBodyWithConditionConstant - We know either that the value LIC has +// the value specified by Val in the specified loop, or we know it does NOT have +// that value. Rewrite any uses of LIC or of properties correlated to it. +void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC, + Constant *Val, + bool IsEqual) { + assert(!isa(LIC) && "Why are we unswitching on a constant?"); + + // FIXME: Support correlated properties, like: + // for (...) + // if (li1 < li2) + // ... + // if (li1 > li2) + // ... + + // FOLD boolean conditions (X|LIC), (X&LIC). Fold conditional branches, + // selects, switches. + std::vector Users(LIC->use_begin(), LIC->use_end()); + std::vector Worklist; + + // If we know that LIC == Val, or that LIC == NotVal, just replace uses of LIC + // in the loop with the appropriate one directly. + if (IsEqual || (isa(Val) && Val->getType() == Type::Int1Ty)) { + Value *Replacement; + if (IsEqual) + Replacement = Val; + else + Replacement = ConstantInt::get(Type::Int1Ty, + !cast(Val)->getZExtValue()); + + for (unsigned i = 0, e = Users.size(); i != e; ++i) + if (Instruction *U = cast(Users[i])) { + if (!L->contains(U->getParent())) + continue; + U->replaceUsesOfWith(LIC, Replacement); + Worklist.push_back(U); + } + } else { + // Otherwise, we don't know the precise value of LIC, but we do know that it + // is certainly NOT "Val". As such, simplify any uses in the loop that we + // can. This case occurs when we unswitch switch statements. + for (unsigned i = 0, e = Users.size(); i != e; ++i) + if (Instruction *U = cast(Users[i])) { + if (!L->contains(U->getParent())) + continue; + + Worklist.push_back(U); + + // If we know that LIC is not Val, use this info to simplify code. + if (SwitchInst *SI = dyn_cast(U)) { + for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i) { + if (SI->getCaseValue(i) == Val) { + // Found a dead case value. Don't remove PHI nodes in the + // successor if they become single-entry, those PHI nodes may + // be in the Users list. + + // FIXME: This is a hack. We need to keep the successor around + // and hooked up so as to preserve the loop structure, because + // trying to update it is complicated. So instead we preserve the + // loop structure and put the block on an dead code path. + + BasicBlock *SISucc = SI->getSuccessor(i); + BasicBlock* Old = SI->getParent(); + BasicBlock* Split = SplitBlock(Old, SI, this); + + Instruction* OldTerm = Old->getTerminator(); + BranchInst::Create(Split, SISucc, + ConstantInt::getTrue(), OldTerm); + + LPM->deleteSimpleAnalysisValue(Old->getTerminator(), L); + Old->getTerminator()->eraseFromParent(); + + PHINode *PN; + for (BasicBlock::iterator II = SISucc->begin(); + (PN = dyn_cast(II)); ++II) { + Value *InVal = PN->removeIncomingValue(Split, false); + PN->addIncoming(InVal, Old); + } + + SI->removeCase(i); + break; + } + } + } + + // TODO: We could do other simplifications, for example, turning + // LIC == Val -> false. + } + } + + SimplifyCode(Worklist, L); +} + +/// SimplifyCode - Okay, now that we have simplified some instructions in the +/// loop, walk over it and constant prop, dce, and fold control flow where +/// possible. Note that this is effectively a very simple loop-structure-aware +/// optimizer. During processing of this loop, L could very well be deleted, so +/// it must not be used. +/// +/// FIXME: When the loop optimizer is more mature, separate this out to a new +/// pass. +/// +void LoopUnswitch::SimplifyCode(std::vector &Worklist, Loop *L) { + while (!Worklist.empty()) { + Instruction *I = Worklist.back(); + Worklist.pop_back(); + + // Simple constant folding. + if (Constant *C = ConstantFoldInstruction(I)) { + ReplaceUsesOfWith(I, C, Worklist, L, LPM); + continue; + } + + // Simple DCE. + if (isInstructionTriviallyDead(I)) { + DOUT << "Remove dead instruction '" << *I; + + // Add uses to the worklist, which may be dead now. + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) + if (Instruction *Use = dyn_cast(I->getOperand(i))) + Worklist.push_back(Use); + LPM->deleteSimpleAnalysisValue(I, L); + RemoveFromWorklist(I, Worklist); + I->eraseFromParent(); + ++NumSimplify; + continue; + } + + // Special case hacks that appear commonly in unswitched code. + switch (I->getOpcode()) { + case Instruction::Select: + if (ConstantInt *CB = dyn_cast(I->getOperand(0))) { + ReplaceUsesOfWith(I, I->getOperand(!CB->getZExtValue()+1), Worklist, L, + LPM); + continue; + } + break; + case Instruction::And: + if (isa(I->getOperand(0)) && + I->getOperand(0)->getType() == Type::Int1Ty) // constant -> RHS + cast(I)->swapOperands(); + if (ConstantInt *CB = dyn_cast(I->getOperand(1))) + if (CB->getType() == Type::Int1Ty) { + if (CB->isOne()) // X & 1 -> X + ReplaceUsesOfWith(I, I->getOperand(0), Worklist, L, LPM); + else // X & 0 -> 0 + ReplaceUsesOfWith(I, I->getOperand(1), Worklist, L, LPM); + continue; + } + break; + case Instruction::Or: + if (isa(I->getOperand(0)) && + I->getOperand(0)->getType() == Type::Int1Ty) // constant -> RHS + cast(I)->swapOperands(); + if (ConstantInt *CB = dyn_cast(I->getOperand(1))) + if (CB->getType() == Type::Int1Ty) { + if (CB->isOne()) // X | 1 -> 1 + ReplaceUsesOfWith(I, I->getOperand(1), Worklist, L, LPM); + else // X | 0 -> X + ReplaceUsesOfWith(I, I->getOperand(0), Worklist, L, LPM); + continue; + } + break; + case Instruction::Br: { + BranchInst *BI = cast(I); + if (BI->isUnconditional()) { + // If BI's parent is the only pred of the successor, fold the two blocks + // together. + BasicBlock *Pred = BI->getParent(); + BasicBlock *Succ = BI->getSuccessor(0); + BasicBlock *SinglePred = Succ->getSinglePredecessor(); + if (!SinglePred) continue; // Nothing to do. + assert(SinglePred == Pred && "CFG broken"); + + DOUT << "Merging blocks: " << Pred->getName() << " <- " + << Succ->getName() << "\n"; + + // Resolve any single entry PHI nodes in Succ. + while (PHINode *PN = dyn_cast(Succ->begin())) + ReplaceUsesOfWith(PN, PN->getIncomingValue(0), Worklist, L, LPM); + + // Move all of the successor contents from Succ to Pred. + Pred->getInstList().splice(BI, Succ->getInstList(), Succ->begin(), + Succ->end()); + LPM->deleteSimpleAnalysisValue(BI, L); + BI->eraseFromParent(); + RemoveFromWorklist(BI, Worklist); + + // If Succ has any successors with PHI nodes, update them to have + // entries coming from Pred instead of Succ. + Succ->replaceAllUsesWith(Pred); + + // Remove Succ from the loop tree. + LI->removeBlock(Succ); + LPM->deleteSimpleAnalysisValue(Succ, L); + Succ->eraseFromParent(); + ++NumSimplify; + } else if (ConstantInt *CB = dyn_cast(BI->getCondition())){ + // Conditional branch. Turn it into an unconditional branch, then + // remove dead blocks. + break; // FIXME: Enable. + + DOUT << "Folded branch: " << *BI; + BasicBlock *DeadSucc = BI->getSuccessor(CB->getZExtValue()); + BasicBlock *LiveSucc = BI->getSuccessor(!CB->getZExtValue()); + DeadSucc->removePredecessor(BI->getParent(), true); + Worklist.push_back(BranchInst::Create(LiveSucc, BI)); + LPM->deleteSimpleAnalysisValue(BI, L); + BI->eraseFromParent(); + RemoveFromWorklist(BI, Worklist); + ++NumSimplify; + + RemoveBlockIfDead(DeadSucc, Worklist, L); + } + break; + } + } + } +} diff --git a/lib/Transforms/Scalar/Makefile b/lib/Transforms/Scalar/Makefile new file mode 100644 index 000000000000..cc42fd00ac7d --- /dev/null +++ b/lib/Transforms/Scalar/Makefile @@ -0,0 +1,15 @@ +##===- lib/Transforms/Scalar/Makefile ----------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../.. +LIBRARYNAME = LLVMScalarOpts +BUILD_ARCHIVE = 1 + +include $(LEVEL)/Makefile.common + diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp new file mode 100644 index 000000000000..5cf05183ec05 --- /dev/null +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -0,0 +1,741 @@ +//===- MemCpyOptimizer.cpp - Optimize use of memcpy and friends -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass performs various transformations related to eliminating memcpy +// calls, or transforming sets of stores into memset's. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "memcpyopt" +#include "llvm/Transforms/Scalar.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Instructions.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/MemoryDependenceAnalysis.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/GetElementPtrTypeIterator.h" +#include "llvm/Target/TargetData.h" +#include +using namespace llvm; + +STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted"); +STATISTIC(NumMemSetInfer, "Number of memsets inferred"); + +/// isBytewiseValue - If the specified value can be set by repeating the same +/// byte in memory, return the i8 value that it is represented with. This is +/// true for all i8 values obviously, but is also true for i32 0, i32 -1, +/// i16 0xF0F0, double 0.0 etc. If the value can't be handled with a repeated +/// byte store (e.g. i16 0x1234), return null. +static Value *isBytewiseValue(Value *V) { + // All byte-wide stores are splatable, even of arbitrary variables. + if (V->getType() == Type::Int8Ty) return V; + + // Constant float and double values can be handled as integer values if the + // corresponding integer value is "byteable". An important case is 0.0. + if (ConstantFP *CFP = dyn_cast(V)) { + if (CFP->getType() == Type::FloatTy) + V = ConstantExpr::getBitCast(CFP, Type::Int32Ty); + if (CFP->getType() == Type::DoubleTy) + V = ConstantExpr::getBitCast(CFP, Type::Int64Ty); + // Don't handle long double formats, which have strange constraints. + } + + // We can handle constant integers that are power of two in size and a + // multiple of 8 bits. + if (ConstantInt *CI = dyn_cast(V)) { + unsigned Width = CI->getBitWidth(); + if (isPowerOf2_32(Width) && Width > 8) { + // We can handle this value if the recursive binary decomposition is the + // same at all levels. + APInt Val = CI->getValue(); + APInt Val2; + while (Val.getBitWidth() != 8) { + unsigned NextWidth = Val.getBitWidth()/2; + Val2 = Val.lshr(NextWidth); + Val2.trunc(Val.getBitWidth()/2); + Val.trunc(Val.getBitWidth()/2); + + // If the top/bottom halves aren't the same, reject it. + if (Val != Val2) + return 0; + } + return ConstantInt::get(Val); + } + } + + // Conceptually, we could handle things like: + // %a = zext i8 %X to i16 + // %b = shl i16 %a, 8 + // %c = or i16 %a, %b + // but until there is an example that actually needs this, it doesn't seem + // worth worrying about. + return 0; +} + +static int64_t GetOffsetFromIndex(const GetElementPtrInst *GEP, unsigned Idx, + bool &VariableIdxFound, TargetData &TD) { + // Skip over the first indices. + gep_type_iterator GTI = gep_type_begin(GEP); + for (unsigned i = 1; i != Idx; ++i, ++GTI) + /*skip along*/; + + // Compute the offset implied by the rest of the indices. + int64_t Offset = 0; + for (unsigned i = Idx, e = GEP->getNumOperands(); i != e; ++i, ++GTI) { + ConstantInt *OpC = dyn_cast(GEP->getOperand(i)); + if (OpC == 0) + return VariableIdxFound = true; + if (OpC->isZero()) continue; // No offset. + + // Handle struct indices, which add their field offset to the pointer. + if (const StructType *STy = dyn_cast(*GTI)) { + Offset += TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue()); + continue; + } + + // Otherwise, we have a sequential type like an array or vector. Multiply + // the index by the ElementSize. + uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()); + Offset += Size*OpC->getSExtValue(); + } + + return Offset; +} + +/// IsPointerOffset - Return true if Ptr1 is provably equal to Ptr2 plus a +/// constant offset, and return that constant offset. For example, Ptr1 might +/// be &A[42], and Ptr2 might be &A[40]. In this case offset would be -8. +static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset, + TargetData &TD) { + // Right now we handle the case when Ptr1/Ptr2 are both GEPs with an identical + // base. After that base, they may have some number of common (and + // potentially variable) indices. After that they handle some constant + // offset, which determines their offset from each other. At this point, we + // handle no other case. + GetElementPtrInst *GEP1 = dyn_cast(Ptr1); + GetElementPtrInst *GEP2 = dyn_cast(Ptr2); + if (!GEP1 || !GEP2 || GEP1->getOperand(0) != GEP2->getOperand(0)) + return false; + + // Skip any common indices and track the GEP types. + unsigned Idx = 1; + for (; Idx != GEP1->getNumOperands() && Idx != GEP2->getNumOperands(); ++Idx) + if (GEP1->getOperand(Idx) != GEP2->getOperand(Idx)) + break; + + bool VariableIdxFound = false; + int64_t Offset1 = GetOffsetFromIndex(GEP1, Idx, VariableIdxFound, TD); + int64_t Offset2 = GetOffsetFromIndex(GEP2, Idx, VariableIdxFound, TD); + if (VariableIdxFound) return false; + + Offset = Offset2-Offset1; + return true; +} + + +/// MemsetRange - Represents a range of memset'd bytes with the ByteVal value. +/// This allows us to analyze stores like: +/// store 0 -> P+1 +/// store 0 -> P+0 +/// store 0 -> P+3 +/// store 0 -> P+2 +/// which sometimes happens with stores to arrays of structs etc. When we see +/// the first store, we make a range [1, 2). The second store extends the range +/// to [0, 2). The third makes a new range [2, 3). The fourth store joins the +/// two ranges into [0, 3) which is memset'able. +namespace { +struct MemsetRange { + // Start/End - A semi range that describes the span that this range covers. + // The range is closed at the start and open at the end: [Start, End). + int64_t Start, End; + + /// StartPtr - The getelementptr instruction that points to the start of the + /// range. + Value *StartPtr; + + /// Alignment - The known alignment of the first store. + unsigned Alignment; + + /// TheStores - The actual stores that make up this range. + SmallVector TheStores; + + bool isProfitableToUseMemset(const TargetData &TD) const; + +}; +} // end anon namespace + +bool MemsetRange::isProfitableToUseMemset(const TargetData &TD) const { + // If we found more than 8 stores to merge or 64 bytes, use memset. + if (TheStores.size() >= 8 || End-Start >= 64) return true; + + // Assume that the code generator is capable of merging pairs of stores + // together if it wants to. + if (TheStores.size() <= 2) return false; + + // If we have fewer than 8 stores, it can still be worthwhile to do this. + // For example, merging 4 i8 stores into an i32 store is useful almost always. + // However, merging 2 32-bit stores isn't useful on a 32-bit architecture (the + // memset will be split into 2 32-bit stores anyway) and doing so can + // pessimize the llvm optimizer. + // + // Since we don't have perfect knowledge here, make some assumptions: assume + // the maximum GPR width is the same size as the pointer size and assume that + // this width can be stored. If so, check to see whether we will end up + // actually reducing the number of stores used. + unsigned Bytes = unsigned(End-Start); + unsigned NumPointerStores = Bytes/TD.getPointerSize(); + + // Assume the remaining bytes if any are done a byte at a time. + unsigned NumByteStores = Bytes - NumPointerStores*TD.getPointerSize(); + + // If we will reduce the # stores (according to this heuristic), do the + // transformation. This encourages merging 4 x i8 -> i32 and 2 x i16 -> i32 + // etc. + return TheStores.size() > NumPointerStores+NumByteStores; +} + + +namespace { +class MemsetRanges { + /// Ranges - A sorted list of the memset ranges. We use std::list here + /// because each element is relatively large and expensive to copy. + std::list Ranges; + typedef std::list::iterator range_iterator; + TargetData &TD; +public: + MemsetRanges(TargetData &td) : TD(td) {} + + typedef std::list::const_iterator const_iterator; + const_iterator begin() const { return Ranges.begin(); } + const_iterator end() const { return Ranges.end(); } + bool empty() const { return Ranges.empty(); } + + void addStore(int64_t OffsetFromFirst, StoreInst *SI); +}; + +} // end anon namespace + + +/// addStore - Add a new store to the MemsetRanges data structure. This adds a +/// new range for the specified store at the specified offset, merging into +/// existing ranges as appropriate. +void MemsetRanges::addStore(int64_t Start, StoreInst *SI) { + int64_t End = Start+TD.getTypeStoreSize(SI->getOperand(0)->getType()); + + // Do a linear search of the ranges to see if this can be joined and/or to + // find the insertion point in the list. We keep the ranges sorted for + // simplicity here. This is a linear search of a linked list, which is ugly, + // however the number of ranges is limited, so this won't get crazy slow. + range_iterator I = Ranges.begin(), E = Ranges.end(); + + while (I != E && Start > I->End) + ++I; + + // We now know that I == E, in which case we didn't find anything to merge + // with, or that Start <= I->End. If End < I->Start or I == E, then we need + // to insert a new range. Handle this now. + if (I == E || End < I->Start) { + MemsetRange &R = *Ranges.insert(I, MemsetRange()); + R.Start = Start; + R.End = End; + R.StartPtr = SI->getPointerOperand(); + R.Alignment = SI->getAlignment(); + R.TheStores.push_back(SI); + return; + } + + // This store overlaps with I, add it. + I->TheStores.push_back(SI); + + // At this point, we may have an interval that completely contains our store. + // If so, just add it to the interval and return. + if (I->Start <= Start && I->End >= End) + return; + + // Now we know that Start <= I->End and End >= I->Start so the range overlaps + // but is not entirely contained within the range. + + // See if the range extends the start of the range. In this case, it couldn't + // possibly cause it to join the prior range, because otherwise we would have + // stopped on *it*. + if (Start < I->Start) { + I->Start = Start; + I->StartPtr = SI->getPointerOperand(); + } + + // Now we know that Start <= I->End and Start >= I->Start (so the startpoint + // is in or right at the end of I), and that End >= I->Start. Extend I out to + // End. + if (End > I->End) { + I->End = End; + range_iterator NextI = I; + while (++NextI != E && End >= NextI->Start) { + // Merge the range in. + I->TheStores.append(NextI->TheStores.begin(), NextI->TheStores.end()); + if (NextI->End > I->End) + I->End = NextI->End; + Ranges.erase(NextI); + NextI = I; + } + } +} + +//===----------------------------------------------------------------------===// +// MemCpyOpt Pass +//===----------------------------------------------------------------------===// + +namespace { + + class VISIBILITY_HIDDEN MemCpyOpt : public FunctionPass { + bool runOnFunction(Function &F); + public: + static char ID; // Pass identification, replacement for typeid + MemCpyOpt() : FunctionPass(&ID) {} + + private: + // This transformation requires dominator postdominator info + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addPreserved(); + AU.addPreserved(); + AU.addPreserved(); + } + + // Helper fuctions + bool processStore(StoreInst *SI, BasicBlock::iterator& BBI); + bool processMemCpy(MemCpyInst* M); + bool performCallSlotOptzn(MemCpyInst* cpy, CallInst* C); + bool iterateOnFunction(Function &F); + }; + + char MemCpyOpt::ID = 0; +} + +// createMemCpyOptPass - The public interface to this file... +FunctionPass *llvm::createMemCpyOptPass() { return new MemCpyOpt(); } + +static RegisterPass X("memcpyopt", + "MemCpy Optimization"); + + + +/// processStore - When GVN is scanning forward over instructions, we look for +/// some other patterns to fold away. In particular, this looks for stores to +/// neighboring locations of memory. If it sees enough consequtive ones +/// (currently 4) it attempts to merge them together into a memcpy/memset. +bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator& BBI) { + if (SI->isVolatile()) return false; + + // There are two cases that are interesting for this code to handle: memcpy + // and memset. Right now we only handle memset. + + // Ensure that the value being stored is something that can be memset'able a + // byte at a time like "0" or "-1" or any width, as well as things like + // 0xA0A0A0A0 and 0.0. + Value *ByteVal = isBytewiseValue(SI->getOperand(0)); + if (!ByteVal) + return false; + + TargetData &TD = getAnalysis(); + AliasAnalysis &AA = getAnalysis(); + + // Okay, so we now have a single store that can be splatable. Scan to find + // all subsequent stores of the same value to offset from the same pointer. + // Join these together into ranges, so we can decide whether contiguous blocks + // are stored. + MemsetRanges Ranges(TD); + + Value *StartPtr = SI->getPointerOperand(); + + BasicBlock::iterator BI = SI; + for (++BI; !isa(BI); ++BI) { + if (isa(BI) || isa(BI)) { + // If the call is readnone, ignore it, otherwise bail out. We don't even + // allow readonly here because we don't want something like: + // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A). + if (AA.getModRefBehavior(CallSite::get(BI)) == + AliasAnalysis::DoesNotAccessMemory) + continue; + + // TODO: If this is a memset, try to join it in. + + break; + } else if (isa(BI) || isa(BI)) + break; + + // If this is a non-store instruction it is fine, ignore it. + StoreInst *NextStore = dyn_cast(BI); + if (NextStore == 0) continue; + + // If this is a store, see if we can merge it in. + if (NextStore->isVolatile()) break; + + // Check to see if this stored value is of the same byte-splattable value. + if (ByteVal != isBytewiseValue(NextStore->getOperand(0))) + break; + + // Check to see if this store is to a constant offset from the start ptr. + int64_t Offset; + if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(), Offset, TD)) + break; + + Ranges.addStore(Offset, NextStore); + } + + // If we have no ranges, then we just had a single store with nothing that + // could be merged in. This is a very common case of course. + if (Ranges.empty()) + return false; + + // If we had at least one store that could be merged in, add the starting + // store as well. We try to avoid this unless there is at least something + // interesting as a small compile-time optimization. + Ranges.addStore(0, SI); + + + Function *MemSetF = 0; + + // Now that we have full information about ranges, loop over the ranges and + // emit memset's for anything big enough to be worthwhile. + bool MadeChange = false; + for (MemsetRanges::const_iterator I = Ranges.begin(), E = Ranges.end(); + I != E; ++I) { + const MemsetRange &Range = *I; + + if (Range.TheStores.size() == 1) continue; + + // If it is profitable to lower this range to memset, do so now. + if (!Range.isProfitableToUseMemset(TD)) + continue; + + // Otherwise, we do want to transform this! Create a new memset. We put + // the memset right before the first instruction that isn't part of this + // memset block. This ensure that the memset is dominated by any addressing + // instruction needed by the start of the block. + BasicBlock::iterator InsertPt = BI; + + if (MemSetF == 0) { + const Type *Tys[] = {Type::Int64Ty}; + MemSetF = Intrinsic::getDeclaration(SI->getParent()->getParent() + ->getParent(), Intrinsic::memset, + Tys, 1); + } + + // Get the starting pointer of the block. + StartPtr = Range.StartPtr; + + // Cast the start ptr to be i8* as memset requires. + const Type *i8Ptr = PointerType::getUnqual(Type::Int8Ty); + if (StartPtr->getType() != i8Ptr) + StartPtr = new BitCastInst(StartPtr, i8Ptr, StartPtr->getNameStart(), + InsertPt); + + Value *Ops[] = { + StartPtr, ByteVal, // Start, value + ConstantInt::get(Type::Int64Ty, Range.End-Range.Start), // size + ConstantInt::get(Type::Int32Ty, Range.Alignment) // align + }; + Value *C = CallInst::Create(MemSetF, Ops, Ops+4, "", InsertPt); + DEBUG(cerr << "Replace stores:\n"; + for (unsigned i = 0, e = Range.TheStores.size(); i != e; ++i) + cerr << *Range.TheStores[i]; + cerr << "With: " << *C); C=C; + + // Don't invalidate the iterator + BBI = BI; + + // Zap all the stores. + for (SmallVector::const_iterator SI = Range.TheStores.begin(), + SE = Range.TheStores.end(); SI != SE; ++SI) + (*SI)->eraseFromParent(); + ++NumMemSetInfer; + MadeChange = true; + } + + return MadeChange; +} + + +/// performCallSlotOptzn - takes a memcpy and a call that it depends on, +/// and checks for the possibility of a call slot optimization by having +/// the call write its result directly into the destination of the memcpy. +bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) { + // The general transformation to keep in mind is + // + // call @func(..., src, ...) + // memcpy(dest, src, ...) + // + // -> + // + // memcpy(dest, src, ...) + // call @func(..., dest, ...) + // + // Since moving the memcpy is technically awkward, we additionally check that + // src only holds uninitialized values at the moment of the call, meaning that + // the memcpy can be discarded rather than moved. + + // Deliberately get the source and destination with bitcasts stripped away, + // because we'll need to do type comparisons based on the underlying type. + Value* cpyDest = cpy->getDest(); + Value* cpySrc = cpy->getSource(); + CallSite CS = CallSite::get(C); + + // We need to be able to reason about the size of the memcpy, so we require + // that it be a constant. + ConstantInt* cpyLength = dyn_cast(cpy->getLength()); + if (!cpyLength) + return false; + + // Require that src be an alloca. This simplifies the reasoning considerably. + AllocaInst* srcAlloca = dyn_cast(cpySrc); + if (!srcAlloca) + return false; + + // Check that all of src is copied to dest. + TargetData& TD = getAnalysis(); + + ConstantInt* srcArraySize = dyn_cast(srcAlloca->getArraySize()); + if (!srcArraySize) + return false; + + uint64_t srcSize = TD.getTypeAllocSize(srcAlloca->getAllocatedType()) * + srcArraySize->getZExtValue(); + + if (cpyLength->getZExtValue() < srcSize) + return false; + + // Check that accessing the first srcSize bytes of dest will not cause a + // trap. Otherwise the transform is invalid since it might cause a trap + // to occur earlier than it otherwise would. + if (AllocaInst* A = dyn_cast(cpyDest)) { + // The destination is an alloca. Check it is larger than srcSize. + ConstantInt* destArraySize = dyn_cast(A->getArraySize()); + if (!destArraySize) + return false; + + uint64_t destSize = TD.getTypeAllocSize(A->getAllocatedType()) * + destArraySize->getZExtValue(); + + if (destSize < srcSize) + return false; + } else if (Argument* A = dyn_cast(cpyDest)) { + // If the destination is an sret parameter then only accesses that are + // outside of the returned struct type can trap. + if (!A->hasStructRetAttr()) + return false; + + const Type* StructTy = cast(A->getType())->getElementType(); + uint64_t destSize = TD.getTypeAllocSize(StructTy); + + if (destSize < srcSize) + return false; + } else { + return false; + } + + // Check that src is not accessed except via the call and the memcpy. This + // guarantees that it holds only undefined values when passed in (so the final + // memcpy can be dropped), that it is not read or written between the call and + // the memcpy, and that writing beyond the end of it is undefined. + SmallVector srcUseList(srcAlloca->use_begin(), + srcAlloca->use_end()); + while (!srcUseList.empty()) { + User* UI = srcUseList.back(); + srcUseList.pop_back(); + + if (isa(UI)) { + for (User::use_iterator I = UI->use_begin(), E = UI->use_end(); + I != E; ++I) + srcUseList.push_back(*I); + } else if (GetElementPtrInst* G = dyn_cast(UI)) { + if (G->hasAllZeroIndices()) + for (User::use_iterator I = UI->use_begin(), E = UI->use_end(); + I != E; ++I) + srcUseList.push_back(*I); + else + return false; + } else if (UI != C && UI != cpy) { + return false; + } + } + + // Since we're changing the parameter to the callsite, we need to make sure + // that what would be the new parameter dominates the callsite. + DominatorTree& DT = getAnalysis(); + if (Instruction* cpyDestInst = dyn_cast(cpyDest)) + if (!DT.dominates(cpyDestInst, C)) + return false; + + // In addition to knowing that the call does not access src in some + // unexpected manner, for example via a global, which we deduce from + // the use analysis, we also need to know that it does not sneakily + // access dest. We rely on AA to figure this out for us. + AliasAnalysis& AA = getAnalysis(); + if (AA.getModRefInfo(C, cpy->getRawDest(), srcSize) != + AliasAnalysis::NoModRef) + return false; + + // All the checks have passed, so do the transformation. + bool changedArgument = false; + for (unsigned i = 0; i < CS.arg_size(); ++i) + if (CS.getArgument(i)->stripPointerCasts() == cpySrc) { + if (cpySrc->getType() != cpyDest->getType()) + cpyDest = CastInst::CreatePointerCast(cpyDest, cpySrc->getType(), + cpyDest->getName(), C); + changedArgument = true; + if (CS.getArgument(i)->getType() != cpyDest->getType()) + CS.setArgument(i, CastInst::CreatePointerCast(cpyDest, + CS.getArgument(i)->getType(), cpyDest->getName(), C)); + else + CS.setArgument(i, cpyDest); + } + + if (!changedArgument) + return false; + + // Drop any cached information about the call, because we may have changed + // its dependence information by changing its parameter. + MemoryDependenceAnalysis& MD = getAnalysis(); + MD.removeInstruction(C); + + // Remove the memcpy + MD.removeInstruction(cpy); + cpy->eraseFromParent(); + NumMemCpyInstr++; + + return true; +} + +/// processMemCpy - perform simplication of memcpy's. If we have memcpy A which +/// copies X to Y, and memcpy B which copies Y to Z, then we can rewrite B to be +/// a memcpy from X to Z (or potentially a memmove, depending on circumstances). +/// This allows later passes to remove the first memcpy altogether. +bool MemCpyOpt::processMemCpy(MemCpyInst* M) { + MemoryDependenceAnalysis& MD = getAnalysis(); + + // The are two possible optimizations we can do for memcpy: + // a) memcpy-memcpy xform which exposes redundance for DSE + // b) call-memcpy xform for return slot optimization + MemDepResult dep = MD.getDependency(M); + if (!dep.isClobber()) + return false; + if (!isa(dep.getInst())) { + if (CallInst* C = dyn_cast(dep.getInst())) + return performCallSlotOptzn(M, C); + return false; + } + + MemCpyInst* MDep = cast(dep.getInst()); + + // We can only transforms memcpy's where the dest of one is the source of the + // other + if (M->getSource() != MDep->getDest()) + return false; + + // Second, the length of the memcpy's must be the same, or the preceeding one + // must be larger than the following one. + ConstantInt* C1 = dyn_cast(MDep->getLength()); + ConstantInt* C2 = dyn_cast(M->getLength()); + if (!C1 || !C2) + return false; + + uint64_t DepSize = C1->getValue().getZExtValue(); + uint64_t CpySize = C2->getValue().getZExtValue(); + + if (DepSize < CpySize) + return false; + + // Finally, we have to make sure that the dest of the second does not + // alias the source of the first + AliasAnalysis& AA = getAnalysis(); + if (AA.alias(M->getRawDest(), CpySize, MDep->getRawSource(), DepSize) != + AliasAnalysis::NoAlias) + return false; + else if (AA.alias(M->getRawDest(), CpySize, M->getRawSource(), CpySize) != + AliasAnalysis::NoAlias) + return false; + else if (AA.alias(MDep->getRawDest(), DepSize, MDep->getRawSource(), DepSize) + != AliasAnalysis::NoAlias) + return false; + + // If all checks passed, then we can transform these memcpy's + const Type *Tys[1]; + Tys[0] = M->getLength()->getType(); + Function* MemCpyFun = Intrinsic::getDeclaration( + M->getParent()->getParent()->getParent(), + M->getIntrinsicID(), Tys, 1); + + Value *Args[4] = { + M->getRawDest(), MDep->getRawSource(), M->getLength(), M->getAlignmentCst() + }; + + CallInst* C = CallInst::Create(MemCpyFun, Args, Args+4, "", M); + + + // If C and M don't interfere, then this is a valid transformation. If they + // did, this would mean that the two sources overlap, which would be bad. + if (MD.getDependency(C) == dep) { + MD.removeInstruction(M); + M->eraseFromParent(); + NumMemCpyInstr++; + return true; + } + + // Otherwise, there was no point in doing this, so we remove the call we + // inserted and act like nothing happened. + MD.removeInstruction(C); + C->eraseFromParent(); + return false; +} + +// MemCpyOpt::runOnFunction - This is the main transformation entry point for a +// function. +// +bool MemCpyOpt::runOnFunction(Function& F) { + + bool changed = false; + bool shouldContinue = true; + + while (shouldContinue) { + shouldContinue = iterateOnFunction(F); + changed |= shouldContinue; + } + + return changed; +} + + +// MemCpyOpt::iterateOnFunction - Executes one iteration of GVN +bool MemCpyOpt::iterateOnFunction(Function &F) { + bool changed_function = false; + + // Walk all instruction in the function + for (Function::iterator BB = F.begin(), BBE = F.end(); BB != BBE; ++BB) { + for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); + BI != BE;) { + // Avoid invalidating the iterator + Instruction* I = BI++; + + if (StoreInst *SI = dyn_cast(I)) + changed_function |= processStore(SI, BI); + else if (MemCpyInst* M = dyn_cast(I)) { + changed_function |= processMemCpy(M); + } + } + } + + return changed_function; +} diff --git a/lib/Transforms/Scalar/PredicateSimplifier.cpp b/lib/Transforms/Scalar/PredicateSimplifier.cpp new file mode 100644 index 000000000000..a7e4d6eec443 --- /dev/null +++ b/lib/Transforms/Scalar/PredicateSimplifier.cpp @@ -0,0 +1,2725 @@ +//===-- PredicateSimplifier.cpp - Path Sensitive Simplifier ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Path-sensitive optimizer. In a branch where x == y, replace uses of +// x with y. Permits further optimization, such as the elimination of +// the unreachable call: +// +// void test(int *p, int *q) +// { +// if (p != q) +// return; +// +// if (*p != *q) +// foo(); // unreachable +// } +// +//===----------------------------------------------------------------------===// +// +// The InequalityGraph focusses on four properties; equals, not equals, +// less-than and less-than-or-equals-to. The greater-than forms are also held +// just to allow walking from a lesser node to a greater one. These properties +// are stored in a lattice; LE can become LT or EQ, NE can become LT or GT. +// +// These relationships define a graph between values of the same type. Each +// Value is stored in a map table that retrieves the associated Node. This +// is how EQ relationships are stored; the map contains pointers from equal +// Value to the same node. The node contains a most canonical Value* form +// and the list of known relationships with other nodes. +// +// If two nodes are known to be inequal, then they will contain pointers to +// each other with an "NE" relationship. If node getNode(%x) is less than +// getNode(%y), then the %x node will contain <%y, GT> and %y will contain +// <%x, LT>. This allows us to tie nodes together into a graph like this: +// +// %a < %b < %c < %d +// +// with four nodes representing the properties. The InequalityGraph provides +// querying with "isRelatedBy" and mutators "addEquality" and "addInequality". +// To find a relationship, we start with one of the nodes any binary search +// through its list to find where the relationships with the second node start. +// Then we iterate through those to find the first relationship that dominates +// our context node. +// +// To create these properties, we wait until a branch or switch instruction +// implies that a particular value is true (or false). The VRPSolver is +// responsible for analyzing the variable and seeing what new inferences +// can be made from each property. For example: +// +// %P = icmp ne i32* %ptr, null +// %a = and i1 %P, %Q +// br i1 %a label %cond_true, label %cond_false +// +// For the true branch, the VRPSolver will start with %a EQ true and look at +// the definition of %a and find that it can infer that %P and %Q are both +// true. From %P being true, it can infer that %ptr NE null. For the false +// branch it can't infer anything from the "and" instruction. +// +// Besides branches, we can also infer properties from instruction that may +// have undefined behaviour in certain cases. For example, the dividend of +// a division may never be zero. After the division instruction, we may assume +// that the dividend is not equal to zero. +// +//===----------------------------------------------------------------------===// +// +// The ValueRanges class stores the known integer bounds of a Value. When we +// encounter i8 %a u< %b, the ValueRanges stores that %a = [1, 255] and +// %b = [0, 254]. +// +// It never stores an empty range, because that means that the code is +// unreachable. It never stores a single-element range since that's an equality +// relationship and better stored in the InequalityGraph, nor an empty range +// since that is better stored in UnreachableBlocks. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "predsimplify" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Instructions.h" +#include "llvm/Pass.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/SetOperations.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/ConstantRange.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/InstVisitor.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Transforms/Utils/Local.h" +#include +#include +#include +using namespace llvm; + +STATISTIC(NumVarsReplaced, "Number of argument substitutions"); +STATISTIC(NumInstruction , "Number of instructions removed"); +STATISTIC(NumSimple , "Number of simple replacements"); +STATISTIC(NumBlocks , "Number of blocks marked unreachable"); +STATISTIC(NumSnuggle , "Number of comparisons snuggled"); + +namespace { + class DomTreeDFS { + public: + class Node { + friend class DomTreeDFS; + public: + typedef std::vector::iterator iterator; + typedef std::vector::const_iterator const_iterator; + + unsigned getDFSNumIn() const { return DFSin; } + unsigned getDFSNumOut() const { return DFSout; } + + BasicBlock *getBlock() const { return BB; } + + iterator begin() { return Children.begin(); } + iterator end() { return Children.end(); } + + const_iterator begin() const { return Children.begin(); } + const_iterator end() const { return Children.end(); } + + bool dominates(const Node *N) const { + return DFSin <= N->DFSin && DFSout >= N->DFSout; + } + + bool DominatedBy(const Node *N) const { + return N->dominates(this); + } + + /// Sorts by the number of descendants. With this, you can iterate + /// through a sorted list and the first matching entry is the most + /// specific match for your basic block. The order provided is stable; + /// DomTreeDFS::Nodes with the same number of descendants are sorted by + /// DFS in number. + bool operator<(const Node &N) const { + unsigned spread = DFSout - DFSin; + unsigned N_spread = N.DFSout - N.DFSin; + if (spread == N_spread) return DFSin < N.DFSin; + return spread < N_spread; + } + bool operator>(const Node &N) const { return N < *this; } + + private: + unsigned DFSin, DFSout; + BasicBlock *BB; + + std::vector Children; + }; + + // XXX: this may be slow. Instead of using "new" for each node, consider + // putting them in a vector to keep them contiguous. + explicit DomTreeDFS(DominatorTree *DT) { + std::stack > S; + + Entry = new Node; + Entry->BB = DT->getRootNode()->getBlock(); + S.push(std::make_pair(Entry, DT->getRootNode())); + + NodeMap[Entry->BB] = Entry; + + while (!S.empty()) { + std::pair &Pair = S.top(); + Node *N = Pair.first; + DomTreeNode *DTNode = Pair.second; + S.pop(); + + for (DomTreeNode::iterator I = DTNode->begin(), E = DTNode->end(); + I != E; ++I) { + Node *NewNode = new Node; + NewNode->BB = (*I)->getBlock(); + N->Children.push_back(NewNode); + S.push(std::make_pair(NewNode, *I)); + + NodeMap[NewNode->BB] = NewNode; + } + } + + renumber(); + +#ifndef NDEBUG + DEBUG(dump()); +#endif + } + +#ifndef NDEBUG + virtual +#endif + ~DomTreeDFS() { + std::stack S; + + S.push(Entry); + while (!S.empty()) { + Node *N = S.top(); S.pop(); + + for (Node::iterator I = N->begin(), E = N->end(); I != E; ++I) + S.push(*I); + + delete N; + } + } + + /// getRootNode - This returns the entry node for the CFG of the function. + Node *getRootNode() const { return Entry; } + + /// getNodeForBlock - return the node for the specified basic block. + Node *getNodeForBlock(BasicBlock *BB) const { + if (!NodeMap.count(BB)) return 0; + return const_cast(this)->NodeMap[BB]; + } + + /// dominates - returns true if the basic block for I1 dominates that of + /// the basic block for I2. If the instructions belong to the same basic + /// block, the instruction first instruction sequentially in the block is + /// considered dominating. + bool dominates(Instruction *I1, Instruction *I2) { + BasicBlock *BB1 = I1->getParent(), + *BB2 = I2->getParent(); + if (BB1 == BB2) { + if (isa(I1)) return false; + if (isa(I2)) return true; + if ( isa(I1) && !isa(I2)) return true; + if (!isa(I1) && isa(I2)) return false; + + for (BasicBlock::const_iterator I = BB2->begin(), E = BB2->end(); + I != E; ++I) { + if (&*I == I1) return true; + else if (&*I == I2) return false; + } + assert(!"Instructions not found in parent BasicBlock?"); + } else { + Node *Node1 = getNodeForBlock(BB1), + *Node2 = getNodeForBlock(BB2); + return Node1 && Node2 && Node1->dominates(Node2); + } + return false; // Not reached + } + + private: + /// renumber - calculates the depth first search numberings and applies + /// them onto the nodes. + void renumber() { + std::stack > S; + unsigned n = 0; + + Entry->DFSin = ++n; + S.push(std::make_pair(Entry, Entry->begin())); + + while (!S.empty()) { + std::pair &Pair = S.top(); + Node *N = Pair.first; + Node::iterator &I = Pair.second; + + if (I == N->end()) { + N->DFSout = ++n; + S.pop(); + } else { + Node *Next = *I++; + Next->DFSin = ++n; + S.push(std::make_pair(Next, Next->begin())); + } + } + } + +#ifndef NDEBUG + virtual void dump() const { + dump(*cerr.stream()); + } + + void dump(std::ostream &os) const { + os << "Predicate simplifier DomTreeDFS: \n"; + dump(Entry, 0, os); + os << "\n\n"; + } + + void dump(Node *N, int depth, std::ostream &os) const { + ++depth; + for (int i = 0; i < depth; ++i) { os << " "; } + os << "[" << depth << "] "; + + os << N->getBlock()->getName() << " (" << N->getDFSNumIn() + << ", " << N->getDFSNumOut() << ")\n"; + + for (Node::iterator I = N->begin(), E = N->end(); I != E; ++I) + dump(*I, depth, os); + } +#endif + + Node *Entry; + std::map NodeMap; + }; + + // SLT SGT ULT UGT EQ + // 0 1 0 1 0 -- GT 10 + // 0 1 0 1 1 -- GE 11 + // 0 1 1 0 0 -- SGTULT 12 + // 0 1 1 0 1 -- SGEULE 13 + // 0 1 1 1 0 -- SGT 14 + // 0 1 1 1 1 -- SGE 15 + // 1 0 0 1 0 -- SLTUGT 18 + // 1 0 0 1 1 -- SLEUGE 19 + // 1 0 1 0 0 -- LT 20 + // 1 0 1 0 1 -- LE 21 + // 1 0 1 1 0 -- SLT 22 + // 1 0 1 1 1 -- SLE 23 + // 1 1 0 1 0 -- UGT 26 + // 1 1 0 1 1 -- UGE 27 + // 1 1 1 0 0 -- ULT 28 + // 1 1 1 0 1 -- ULE 29 + // 1 1 1 1 0 -- NE 30 + enum LatticeBits { + EQ_BIT = 1, UGT_BIT = 2, ULT_BIT = 4, SGT_BIT = 8, SLT_BIT = 16 + }; + enum LatticeVal { + GT = SGT_BIT | UGT_BIT, + GE = GT | EQ_BIT, + LT = SLT_BIT | ULT_BIT, + LE = LT | EQ_BIT, + NE = SLT_BIT | SGT_BIT | ULT_BIT | UGT_BIT, + SGTULT = SGT_BIT | ULT_BIT, + SGEULE = SGTULT | EQ_BIT, + SLTUGT = SLT_BIT | UGT_BIT, + SLEUGE = SLTUGT | EQ_BIT, + ULT = SLT_BIT | SGT_BIT | ULT_BIT, + UGT = SLT_BIT | SGT_BIT | UGT_BIT, + SLT = SLT_BIT | ULT_BIT | UGT_BIT, + SGT = SGT_BIT | ULT_BIT | UGT_BIT, + SLE = SLT | EQ_BIT, + SGE = SGT | EQ_BIT, + ULE = ULT | EQ_BIT, + UGE = UGT | EQ_BIT + }; + +#ifndef NDEBUG + /// validPredicate - determines whether a given value is actually a lattice + /// value. Only used in assertions or debugging. + static bool validPredicate(LatticeVal LV) { + switch (LV) { + case GT: case GE: case LT: case LE: case NE: + case SGTULT: case SGT: case SGEULE: + case SLTUGT: case SLT: case SLEUGE: + case ULT: case UGT: + case SLE: case SGE: case ULE: case UGE: + return true; + default: + return false; + } + } +#endif + + /// reversePredicate - reverse the direction of the inequality + static LatticeVal reversePredicate(LatticeVal LV) { + unsigned reverse = LV ^ (SLT_BIT|SGT_BIT|ULT_BIT|UGT_BIT); //preserve EQ_BIT + + if ((reverse & (SLT_BIT|SGT_BIT)) == 0) + reverse |= (SLT_BIT|SGT_BIT); + + if ((reverse & (ULT_BIT|UGT_BIT)) == 0) + reverse |= (ULT_BIT|UGT_BIT); + + LatticeVal Rev = static_cast(reverse); + assert(validPredicate(Rev) && "Failed reversing predicate."); + return Rev; + } + + /// ValueNumbering stores the scope-specific value numbers for a given Value. + class VISIBILITY_HIDDEN ValueNumbering { + + /// VNPair is a tuple of {Value, index number, DomTreeDFS::Node}. It + /// includes the comparison operators necessary to allow you to store it + /// in a sorted vector. + class VISIBILITY_HIDDEN VNPair { + public: + Value *V; + unsigned index; + DomTreeDFS::Node *Subtree; + + VNPair(Value *V, unsigned index, DomTreeDFS::Node *Subtree) + : V(V), index(index), Subtree(Subtree) {} + + bool operator==(const VNPair &RHS) const { + return V == RHS.V && Subtree == RHS.Subtree; + } + + bool operator<(const VNPair &RHS) const { + if (V != RHS.V) return V < RHS.V; + return *Subtree < *RHS.Subtree; + } + + bool operator<(Value *RHS) const { + return V < RHS; + } + + bool operator>(Value *RHS) const { + return V > RHS; + } + + friend bool operator<(Value *RHS, const VNPair &pair) { + return pair.operator>(RHS); + } + }; + + typedef std::vector VNMapType; + VNMapType VNMap; + + /// The canonical choice for value number at index. + std::vector Values; + + DomTreeDFS *DTDFS; + + public: +#ifndef NDEBUG + virtual ~ValueNumbering() {} + virtual void dump() { + dump(*cerr.stream()); + } + + void dump(std::ostream &os) { + for (unsigned i = 1; i <= Values.size(); ++i) { + os << i << " = "; + WriteAsOperand(os, Values[i-1]); + os << " {"; + for (unsigned j = 0; j < VNMap.size(); ++j) { + if (VNMap[j].index == i) { + WriteAsOperand(os, VNMap[j].V); + os << " (" << VNMap[j].Subtree->getDFSNumIn() << ") "; + } + } + os << "}\n"; + } + } +#endif + + /// compare - returns true if V1 is a better canonical value than V2. + bool compare(Value *V1, Value *V2) const { + if (isa(V1)) + return !isa(V2); + else if (isa(V2)) + return false; + else if (isa(V1)) + return !isa(V2); + else if (isa(V2)) + return false; + + Instruction *I1 = dyn_cast(V1); + Instruction *I2 = dyn_cast(V2); + + if (!I1 || !I2) + return V1->getNumUses() < V2->getNumUses(); + + return DTDFS->dominates(I1, I2); + } + + ValueNumbering(DomTreeDFS *DTDFS) : DTDFS(DTDFS) {} + + /// valueNumber - finds the value number for V under the Subtree. If + /// there is no value number, returns zero. + unsigned valueNumber(Value *V, DomTreeDFS::Node *Subtree) { + if (!(isa(V) || isa(V) || isa(V)) + || V->getType() == Type::VoidTy) return 0; + + VNMapType::iterator E = VNMap.end(); + VNPair pair(V, 0, Subtree); + VNMapType::iterator I = std::lower_bound(VNMap.begin(), E, pair); + while (I != E && I->V == V) { + if (I->Subtree->dominates(Subtree)) + return I->index; + ++I; + } + return 0; + } + + /// getOrInsertVN - always returns a value number, creating it if necessary. + unsigned getOrInsertVN(Value *V, DomTreeDFS::Node *Subtree) { + if (unsigned n = valueNumber(V, Subtree)) + return n; + else + return newVN(V); + } + + /// newVN - creates a new value number. Value V must not already have a + /// value number assigned. + unsigned newVN(Value *V) { + assert((isa(V) || isa(V) || isa(V)) && + "Bad Value for value numbering."); + assert(V->getType() != Type::VoidTy && "Won't value number a void value"); + + Values.push_back(V); + + VNPair pair = VNPair(V, Values.size(), DTDFS->getRootNode()); + VNMapType::iterator I = std::lower_bound(VNMap.begin(), VNMap.end(), pair); + assert((I == VNMap.end() || value(I->index) != V) && + "Attempt to create a duplicate value number."); + VNMap.insert(I, pair); + + return Values.size(); + } + + /// value - returns the Value associated with a value number. + Value *value(unsigned index) const { + assert(index != 0 && "Zero index is reserved for not found."); + assert(index <= Values.size() && "Index out of range."); + return Values[index-1]; + } + + /// canonicalize - return a Value that is equal to V under Subtree. + Value *canonicalize(Value *V, DomTreeDFS::Node *Subtree) { + if (isa(V)) return V; + + if (unsigned n = valueNumber(V, Subtree)) + return value(n); + else + return V; + } + + /// addEquality - adds that value V belongs to the set of equivalent + /// values defined by value number n under Subtree. + void addEquality(unsigned n, Value *V, DomTreeDFS::Node *Subtree) { + assert(canonicalize(value(n), Subtree) == value(n) && + "Node's 'canonical' choice isn't best within this subtree."); + + // Suppose that we are given "%x -> node #1 (%y)". The problem is that + // we may already have "%z -> node #2 (%x)" somewhere above us in the + // graph. We need to find those edges and add "%z -> node #1 (%y)" + // to keep the lookups canonical. + + std::vector ToRepoint(1, V); + + if (unsigned Conflict = valueNumber(V, Subtree)) { + for (VNMapType::iterator I = VNMap.begin(), E = VNMap.end(); + I != E; ++I) { + if (I->index == Conflict && I->Subtree->dominates(Subtree)) + ToRepoint.push_back(I->V); + } + } + + for (std::vector::iterator VI = ToRepoint.begin(), + VE = ToRepoint.end(); VI != VE; ++VI) { + Value *V = *VI; + + VNPair pair(V, n, Subtree); + VNMapType::iterator B = VNMap.begin(), E = VNMap.end(); + VNMapType::iterator I = std::lower_bound(B, E, pair); + if (I != E && I->V == V && I->Subtree == Subtree) + I->index = n; // Update best choice + else + VNMap.insert(I, pair); // New Value + + // XXX: we currently don't have to worry about updating values with + // more specific Subtrees, but we will need to for PHI node support. + +#ifndef NDEBUG + Value *V_n = value(n); + if (isa(V) && isa(V_n)) { + assert(V == V_n && "Constant equals different constant?"); + } +#endif + } + } + + /// remove - removes all references to value V. + void remove(Value *V) { + VNMapType::iterator B = VNMap.begin(), E = VNMap.end(); + VNPair pair(V, 0, DTDFS->getRootNode()); + VNMapType::iterator J = std::upper_bound(B, E, pair); + VNMapType::iterator I = J; + + while (I != B && (I == E || I->V == V)) --I; + + VNMap.erase(I, J); + } + }; + + /// The InequalityGraph stores the relationships between values. + /// Each Value in the graph is assigned to a Node. Nodes are pointer + /// comparable for equality. The caller is expected to maintain the logical + /// consistency of the system. + /// + /// The InequalityGraph class may invalidate Node*s after any mutator call. + /// @brief The InequalityGraph stores the relationships between values. + class VISIBILITY_HIDDEN InequalityGraph { + ValueNumbering &VN; + DomTreeDFS::Node *TreeRoot; + + InequalityGraph(); // DO NOT IMPLEMENT + InequalityGraph(InequalityGraph &); // DO NOT IMPLEMENT + public: + InequalityGraph(ValueNumbering &VN, DomTreeDFS::Node *TreeRoot) + : VN(VN), TreeRoot(TreeRoot) {} + + class Node; + + /// An Edge is contained inside a Node making one end of the edge implicit + /// and contains a pointer to the other end. The edge contains a lattice + /// value specifying the relationship and an DomTreeDFS::Node specifying + /// the root in the dominator tree to which this edge applies. + class VISIBILITY_HIDDEN Edge { + public: + Edge(unsigned T, LatticeVal V, DomTreeDFS::Node *ST) + : To(T), LV(V), Subtree(ST) {} + + unsigned To; + LatticeVal LV; + DomTreeDFS::Node *Subtree; + + bool operator<(const Edge &edge) const { + if (To != edge.To) return To < edge.To; + return *Subtree < *edge.Subtree; + } + + bool operator<(unsigned to) const { + return To < to; + } + + bool operator>(unsigned to) const { + return To > to; + } + + friend bool operator<(unsigned to, const Edge &edge) { + return edge.operator>(to); + } + }; + + /// A single node in the InequalityGraph. This stores the canonical Value + /// for the node, as well as the relationships with the neighbours. + /// + /// @brief A single node in the InequalityGraph. + class VISIBILITY_HIDDEN Node { + friend class InequalityGraph; + + typedef SmallVector RelationsType; + RelationsType Relations; + + // TODO: can this idea improve performance? + //friend class std::vector; + //Node(Node &N) { RelationsType.swap(N.RelationsType); } + + public: + typedef RelationsType::iterator iterator; + typedef RelationsType::const_iterator const_iterator; + +#ifndef NDEBUG + virtual ~Node() {} + virtual void dump() const { + dump(*cerr.stream()); + } + private: + void dump(std::ostream &os) const { + static const std::string names[32] = + { "000000", "000001", "000002", "000003", "000004", "000005", + "000006", "000007", "000008", "000009", " >", " >=", + " s>u<", "s>=u<=", " s>", " s>=", "000016", "000017", + " s", "s<=u>=", " <", " <=", " s<", " s<=", + "000024", "000025", " u>", " u>=", " u<", " u<=", + " !=", "000031" }; + for (Node::const_iterator NI = begin(), NE = end(); NI != NE; ++NI) { + os << names[NI->LV] << " " << NI->To + << " (" << NI->Subtree->getDFSNumIn() << "), "; + } + } + public: +#endif + + iterator begin() { return Relations.begin(); } + iterator end() { return Relations.end(); } + const_iterator begin() const { return Relations.begin(); } + const_iterator end() const { return Relations.end(); } + + iterator find(unsigned n, DomTreeDFS::Node *Subtree) { + iterator E = end(); + for (iterator I = std::lower_bound(begin(), E, n); + I != E && I->To == n; ++I) { + if (Subtree->DominatedBy(I->Subtree)) + return I; + } + return E; + } + + const_iterator find(unsigned n, DomTreeDFS::Node *Subtree) const { + const_iterator E = end(); + for (const_iterator I = std::lower_bound(begin(), E, n); + I != E && I->To == n; ++I) { + if (Subtree->DominatedBy(I->Subtree)) + return I; + } + return E; + } + + /// update - updates the lattice value for a given node, creating a new + /// entry if one doesn't exist. The new lattice value must not be + /// inconsistent with any previously existing value. + void update(unsigned n, LatticeVal R, DomTreeDFS::Node *Subtree) { + assert(validPredicate(R) && "Invalid predicate."); + + Edge edge(n, R, Subtree); + iterator B = begin(), E = end(); + iterator I = std::lower_bound(B, E, edge); + + iterator J = I; + while (J != E && J->To == n) { + if (Subtree->DominatedBy(J->Subtree)) + break; + ++J; + } + + if (J != E && J->To == n) { + edge.LV = static_cast(J->LV & R); + assert(validPredicate(edge.LV) && "Invalid union of lattice values."); + + if (edge.LV == J->LV) + return; // This update adds nothing new. + } + + if (I != B) { + // We also have to tighten any edge beneath our update. + for (iterator K = I - 1; K->To == n; --K) { + if (K->Subtree->DominatedBy(Subtree)) { + LatticeVal LV = static_cast(K->LV & edge.LV); + assert(validPredicate(LV) && "Invalid union of lattice values"); + K->LV = LV; + } + if (K == B) break; + } + } + + // Insert new edge at Subtree if it isn't already there. + if (I == E || I->To != n || Subtree != I->Subtree) + Relations.insert(I, edge); + } + }; + + private: + + std::vector Nodes; + + public: + /// node - returns the node object at a given value number. The pointer + /// returned may be invalidated on the next call to node(). + Node *node(unsigned index) { + assert(VN.value(index)); // This triggers the necessary checks. + if (Nodes.size() < index) Nodes.resize(index); + return &Nodes[index-1]; + } + + /// isRelatedBy - true iff n1 op n2 + bool isRelatedBy(unsigned n1, unsigned n2, DomTreeDFS::Node *Subtree, + LatticeVal LV) { + if (n1 == n2) return LV & EQ_BIT; + + Node *N1 = node(n1); + Node::iterator I = N1->find(n2, Subtree), E = N1->end(); + if (I != E) return (I->LV & LV) == I->LV; + + return false; + } + + // The add* methods assume that your input is logically valid and may + // assertion-fail or infinitely loop if you attempt a contradiction. + + /// addInequality - Sets n1 op n2. + /// It is also an error to call this on an inequality that is already true. + void addInequality(unsigned n1, unsigned n2, DomTreeDFS::Node *Subtree, + LatticeVal LV1) { + assert(n1 != n2 && "A node can't be inequal to itself."); + + if (LV1 != NE) + assert(!isRelatedBy(n1, n2, Subtree, reversePredicate(LV1)) && + "Contradictory inequality."); + + // Suppose we're adding %n1 < %n2. Find all the %a < %n1 and + // add %a < %n2 too. This keeps the graph fully connected. + if (LV1 != NE) { + // Break up the relationship into signed and unsigned comparison parts. + // If the signed parts of %a op1 %n1 match that of %n1 op2 %n2, and + // op1 and op2 aren't NE, then add %a op3 %n2. The new relationship + // should have the EQ_BIT iff it's set for both op1 and op2. + + unsigned LV1_s = LV1 & (SLT_BIT|SGT_BIT); + unsigned LV1_u = LV1 & (ULT_BIT|UGT_BIT); + + for (Node::iterator I = node(n1)->begin(), E = node(n1)->end(); I != E; ++I) { + if (I->LV != NE && I->To != n2) { + + DomTreeDFS::Node *Local_Subtree = NULL; + if (Subtree->DominatedBy(I->Subtree)) + Local_Subtree = Subtree; + else if (I->Subtree->DominatedBy(Subtree)) + Local_Subtree = I->Subtree; + + if (Local_Subtree) { + unsigned new_relationship = 0; + LatticeVal ILV = reversePredicate(I->LV); + unsigned ILV_s = ILV & (SLT_BIT|SGT_BIT); + unsigned ILV_u = ILV & (ULT_BIT|UGT_BIT); + + if (LV1_s != (SLT_BIT|SGT_BIT) && ILV_s == LV1_s) + new_relationship |= ILV_s; + if (LV1_u != (ULT_BIT|UGT_BIT) && ILV_u == LV1_u) + new_relationship |= ILV_u; + + if (new_relationship) { + if ((new_relationship & (SLT_BIT|SGT_BIT)) == 0) + new_relationship |= (SLT_BIT|SGT_BIT); + if ((new_relationship & (ULT_BIT|UGT_BIT)) == 0) + new_relationship |= (ULT_BIT|UGT_BIT); + if ((LV1 & EQ_BIT) && (ILV & EQ_BIT)) + new_relationship |= EQ_BIT; + + LatticeVal NewLV = static_cast(new_relationship); + + node(I->To)->update(n2, NewLV, Local_Subtree); + node(n2)->update(I->To, reversePredicate(NewLV), Local_Subtree); + } + } + } + } + + for (Node::iterator I = node(n2)->begin(), E = node(n2)->end(); I != E; ++I) { + if (I->LV != NE && I->To != n1) { + DomTreeDFS::Node *Local_Subtree = NULL; + if (Subtree->DominatedBy(I->Subtree)) + Local_Subtree = Subtree; + else if (I->Subtree->DominatedBy(Subtree)) + Local_Subtree = I->Subtree; + + if (Local_Subtree) { + unsigned new_relationship = 0; + unsigned ILV_s = I->LV & (SLT_BIT|SGT_BIT); + unsigned ILV_u = I->LV & (ULT_BIT|UGT_BIT); + + if (LV1_s != (SLT_BIT|SGT_BIT) && ILV_s == LV1_s) + new_relationship |= ILV_s; + + if (LV1_u != (ULT_BIT|UGT_BIT) && ILV_u == LV1_u) + new_relationship |= ILV_u; + + if (new_relationship) { + if ((new_relationship & (SLT_BIT|SGT_BIT)) == 0) + new_relationship |= (SLT_BIT|SGT_BIT); + if ((new_relationship & (ULT_BIT|UGT_BIT)) == 0) + new_relationship |= (ULT_BIT|UGT_BIT); + if ((LV1 & EQ_BIT) && (I->LV & EQ_BIT)) + new_relationship |= EQ_BIT; + + LatticeVal NewLV = static_cast(new_relationship); + + node(n1)->update(I->To, NewLV, Local_Subtree); + node(I->To)->update(n1, reversePredicate(NewLV), Local_Subtree); + } + } + } + } + } + + node(n1)->update(n2, LV1, Subtree); + node(n2)->update(n1, reversePredicate(LV1), Subtree); + } + + /// remove - removes a node from the graph by removing all references to + /// and from it. + void remove(unsigned n) { + Node *N = node(n); + for (Node::iterator NI = N->begin(), NE = N->end(); NI != NE; ++NI) { + Node::iterator Iter = node(NI->To)->find(n, TreeRoot); + do { + node(NI->To)->Relations.erase(Iter); + Iter = node(NI->To)->find(n, TreeRoot); + } while (Iter != node(NI->To)->end()); + } + N->Relations.clear(); + } + +#ifndef NDEBUG + virtual ~InequalityGraph() {} + virtual void dump() { + dump(*cerr.stream()); + } + + void dump(std::ostream &os) { + for (unsigned i = 1; i <= Nodes.size(); ++i) { + os << i << " = {"; + node(i)->dump(os); + os << "}\n"; + } + } +#endif + }; + + class VRPSolver; + + /// ValueRanges tracks the known integer ranges and anti-ranges of the nodes + /// in the InequalityGraph. + class VISIBILITY_HIDDEN ValueRanges { + ValueNumbering &VN; + TargetData *TD; + + class VISIBILITY_HIDDEN ScopedRange { + typedef std::vector > + RangeListType; + RangeListType RangeList; + + static bool swo(const std::pair &LHS, + const std::pair &RHS) { + return *LHS.first < *RHS.first; + } + + public: +#ifndef NDEBUG + virtual ~ScopedRange() {} + virtual void dump() const { + dump(*cerr.stream()); + } + + void dump(std::ostream &os) const { + os << "{"; + for (const_iterator I = begin(), E = end(); I != E; ++I) { + os << &I->second << " (" << I->first->getDFSNumIn() << "), "; + } + os << "}"; + } +#endif + + typedef RangeListType::iterator iterator; + typedef RangeListType::const_iterator const_iterator; + + iterator begin() { return RangeList.begin(); } + iterator end() { return RangeList.end(); } + const_iterator begin() const { return RangeList.begin(); } + const_iterator end() const { return RangeList.end(); } + + iterator find(DomTreeDFS::Node *Subtree) { + static ConstantRange empty(1, false); + iterator E = end(); + iterator I = std::lower_bound(begin(), E, + std::make_pair(Subtree, empty), swo); + + while (I != E && !I->first->dominates(Subtree)) ++I; + return I; + } + + const_iterator find(DomTreeDFS::Node *Subtree) const { + static const ConstantRange empty(1, false); + const_iterator E = end(); + const_iterator I = std::lower_bound(begin(), E, + std::make_pair(Subtree, empty), swo); + + while (I != E && !I->first->dominates(Subtree)) ++I; + return I; + } + + void update(const ConstantRange &CR, DomTreeDFS::Node *Subtree) { + assert(!CR.isEmptySet() && "Empty ConstantRange."); + assert(!CR.isSingleElement() && "Refusing to store single element."); + + static ConstantRange empty(1, false); + iterator E = end(); + iterator I = + std::lower_bound(begin(), E, std::make_pair(Subtree, empty), swo); + + if (I != end() && I->first == Subtree) { + ConstantRange CR2 = I->second.maximalIntersectWith(CR); + assert(!CR2.isEmptySet() && !CR2.isSingleElement() && + "Invalid union of ranges."); + I->second = CR2; + } else + RangeList.insert(I, std::make_pair(Subtree, CR)); + } + }; + + std::vector Ranges; + + void update(unsigned n, const ConstantRange &CR, DomTreeDFS::Node *Subtree){ + if (CR.isFullSet()) return; + if (Ranges.size() < n) Ranges.resize(n); + Ranges[n-1].update(CR, Subtree); + } + + /// create - Creates a ConstantRange that matches the given LatticeVal + /// relation with a given integer. + ConstantRange create(LatticeVal LV, const ConstantRange &CR) { + assert(!CR.isEmptySet() && "Can't deal with empty set."); + + if (LV == NE) + return makeConstantRange(ICmpInst::ICMP_NE, CR); + + unsigned LV_s = LV & (SGT_BIT|SLT_BIT); + unsigned LV_u = LV & (UGT_BIT|ULT_BIT); + bool hasEQ = LV & EQ_BIT; + + ConstantRange Range(CR.getBitWidth()); + + if (LV_s == SGT_BIT) { + Range = Range.maximalIntersectWith(makeConstantRange( + hasEQ ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_SGT, CR)); + } else if (LV_s == SLT_BIT) { + Range = Range.maximalIntersectWith(makeConstantRange( + hasEQ ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_SLT, CR)); + } + + if (LV_u == UGT_BIT) { + Range = Range.maximalIntersectWith(makeConstantRange( + hasEQ ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_UGT, CR)); + } else if (LV_u == ULT_BIT) { + Range = Range.maximalIntersectWith(makeConstantRange( + hasEQ ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_ULT, CR)); + } + + return Range; + } + + /// makeConstantRange - Creates a ConstantRange representing the set of all + /// value that match the ICmpInst::Predicate with any of the values in CR. + ConstantRange makeConstantRange(ICmpInst::Predicate ICmpOpcode, + const ConstantRange &CR) { + uint32_t W = CR.getBitWidth(); + switch (ICmpOpcode) { + default: assert(!"Invalid ICmp opcode to makeConstantRange()"); + case ICmpInst::ICMP_EQ: + return ConstantRange(CR.getLower(), CR.getUpper()); + case ICmpInst::ICMP_NE: + if (CR.isSingleElement()) + return ConstantRange(CR.getUpper(), CR.getLower()); + return ConstantRange(W); + case ICmpInst::ICMP_ULT: + return ConstantRange(APInt::getMinValue(W), CR.getUnsignedMax()); + case ICmpInst::ICMP_SLT: + return ConstantRange(APInt::getSignedMinValue(W), CR.getSignedMax()); + case ICmpInst::ICMP_ULE: { + APInt UMax(CR.getUnsignedMax()); + if (UMax.isMaxValue()) + return ConstantRange(W); + return ConstantRange(APInt::getMinValue(W), UMax + 1); + } + case ICmpInst::ICMP_SLE: { + APInt SMax(CR.getSignedMax()); + if (SMax.isMaxSignedValue() || (SMax+1).isMaxSignedValue()) + return ConstantRange(W); + return ConstantRange(APInt::getSignedMinValue(W), SMax + 1); + } + case ICmpInst::ICMP_UGT: + return ConstantRange(CR.getUnsignedMin() + 1, APInt::getNullValue(W)); + case ICmpInst::ICMP_SGT: + return ConstantRange(CR.getSignedMin() + 1, + APInt::getSignedMinValue(W)); + case ICmpInst::ICMP_UGE: { + APInt UMin(CR.getUnsignedMin()); + if (UMin.isMinValue()) + return ConstantRange(W); + return ConstantRange(UMin, APInt::getNullValue(W)); + } + case ICmpInst::ICMP_SGE: { + APInt SMin(CR.getSignedMin()); + if (SMin.isMinSignedValue()) + return ConstantRange(W); + return ConstantRange(SMin, APInt::getSignedMinValue(W)); + } + } + } + +#ifndef NDEBUG + bool isCanonical(Value *V, DomTreeDFS::Node *Subtree) { + return V == VN.canonicalize(V, Subtree); + } +#endif + + public: + + ValueRanges(ValueNumbering &VN, TargetData *TD) : VN(VN), TD(TD) {} + +#ifndef NDEBUG + virtual ~ValueRanges() {} + + virtual void dump() const { + dump(*cerr.stream()); + } + + void dump(std::ostream &os) const { + for (unsigned i = 0, e = Ranges.size(); i != e; ++i) { + os << (i+1) << " = "; + Ranges[i].dump(os); + os << "\n"; + } + } +#endif + + /// range - looks up the ConstantRange associated with a value number. + ConstantRange range(unsigned n, DomTreeDFS::Node *Subtree) { + assert(VN.value(n)); // performs range checks + + if (n <= Ranges.size()) { + ScopedRange::iterator I = Ranges[n-1].find(Subtree); + if (I != Ranges[n-1].end()) return I->second; + } + + Value *V = VN.value(n); + ConstantRange CR = range(V); + return CR; + } + + /// range - determine a range from a Value without performing any lookups. + ConstantRange range(Value *V) const { + if (ConstantInt *C = dyn_cast(V)) + return ConstantRange(C->getValue()); + else if (isa(V)) + return ConstantRange(APInt::getNullValue(typeToWidth(V->getType()))); + else + return ConstantRange(typeToWidth(V->getType())); + } + + // typeToWidth - returns the number of bits necessary to store a value of + // this type, or zero if unknown. + uint32_t typeToWidth(const Type *Ty) const { + if (TD) + return TD->getTypeSizeInBits(Ty); + else + return Ty->getPrimitiveSizeInBits(); + } + + static bool isRelatedBy(const ConstantRange &CR1, const ConstantRange &CR2, + LatticeVal LV) { + switch (LV) { + default: assert(!"Impossible lattice value!"); + case NE: + return CR1.maximalIntersectWith(CR2).isEmptySet(); + case ULT: + return CR1.getUnsignedMax().ult(CR2.getUnsignedMin()); + case ULE: + return CR1.getUnsignedMax().ule(CR2.getUnsignedMin()); + case UGT: + return CR1.getUnsignedMin().ugt(CR2.getUnsignedMax()); + case UGE: + return CR1.getUnsignedMin().uge(CR2.getUnsignedMax()); + case SLT: + return CR1.getSignedMax().slt(CR2.getSignedMin()); + case SLE: + return CR1.getSignedMax().sle(CR2.getSignedMin()); + case SGT: + return CR1.getSignedMin().sgt(CR2.getSignedMax()); + case SGE: + return CR1.getSignedMin().sge(CR2.getSignedMax()); + case LT: + return CR1.getUnsignedMax().ult(CR2.getUnsignedMin()) && + CR1.getSignedMax().slt(CR2.getUnsignedMin()); + case LE: + return CR1.getUnsignedMax().ule(CR2.getUnsignedMin()) && + CR1.getSignedMax().sle(CR2.getUnsignedMin()); + case GT: + return CR1.getUnsignedMin().ugt(CR2.getUnsignedMax()) && + CR1.getSignedMin().sgt(CR2.getSignedMax()); + case GE: + return CR1.getUnsignedMin().uge(CR2.getUnsignedMax()) && + CR1.getSignedMin().sge(CR2.getSignedMax()); + case SLTUGT: + return CR1.getSignedMax().slt(CR2.getSignedMin()) && + CR1.getUnsignedMin().ugt(CR2.getUnsignedMax()); + case SLEUGE: + return CR1.getSignedMax().sle(CR2.getSignedMin()) && + CR1.getUnsignedMin().uge(CR2.getUnsignedMax()); + case SGTULT: + return CR1.getSignedMin().sgt(CR2.getSignedMax()) && + CR1.getUnsignedMax().ult(CR2.getUnsignedMin()); + case SGEULE: + return CR1.getSignedMin().sge(CR2.getSignedMax()) && + CR1.getUnsignedMax().ule(CR2.getUnsignedMin()); + } + } + + bool isRelatedBy(unsigned n1, unsigned n2, DomTreeDFS::Node *Subtree, + LatticeVal LV) { + ConstantRange CR1 = range(n1, Subtree); + ConstantRange CR2 = range(n2, Subtree); + + // True iff all values in CR1 are LV to all values in CR2. + return isRelatedBy(CR1, CR2, LV); + } + + void addToWorklist(Value *V, Constant *C, ICmpInst::Predicate Pred, + VRPSolver *VRP); + void markBlock(VRPSolver *VRP); + + void mergeInto(Value **I, unsigned n, unsigned New, + DomTreeDFS::Node *Subtree, VRPSolver *VRP) { + ConstantRange CR_New = range(New, Subtree); + ConstantRange Merged = CR_New; + + for (; n != 0; ++I, --n) { + unsigned i = VN.valueNumber(*I, Subtree); + ConstantRange CR_Kill = i ? range(i, Subtree) : range(*I); + if (CR_Kill.isFullSet()) continue; + Merged = Merged.maximalIntersectWith(CR_Kill); + } + + if (Merged.isFullSet() || Merged == CR_New) return; + + applyRange(New, Merged, Subtree, VRP); + } + + void applyRange(unsigned n, const ConstantRange &CR, + DomTreeDFS::Node *Subtree, VRPSolver *VRP) { + ConstantRange Merged = CR.maximalIntersectWith(range(n, Subtree)); + if (Merged.isEmptySet()) { + markBlock(VRP); + return; + } + + if (const APInt *I = Merged.getSingleElement()) { + Value *V = VN.value(n); // XXX: redesign worklist. + const Type *Ty = V->getType(); + if (Ty->isInteger()) { + addToWorklist(V, ConstantInt::get(*I), ICmpInst::ICMP_EQ, VRP); + return; + } else if (const PointerType *PTy = dyn_cast(Ty)) { + assert(*I == 0 && "Pointer is null but not zero?"); + addToWorklist(V, ConstantPointerNull::get(PTy), + ICmpInst::ICMP_EQ, VRP); + return; + } + } + + update(n, Merged, Subtree); + } + + void addNotEquals(unsigned n1, unsigned n2, DomTreeDFS::Node *Subtree, + VRPSolver *VRP) { + ConstantRange CR1 = range(n1, Subtree); + ConstantRange CR2 = range(n2, Subtree); + + uint32_t W = CR1.getBitWidth(); + + if (const APInt *I = CR1.getSingleElement()) { + if (CR2.isFullSet()) { + ConstantRange NewCR2(CR1.getUpper(), CR1.getLower()); + applyRange(n2, NewCR2, Subtree, VRP); + } else if (*I == CR2.getLower()) { + APInt NewLower(CR2.getLower() + 1), + NewUpper(CR2.getUpper()); + if (NewLower == NewUpper) + NewLower = NewUpper = APInt::getMinValue(W); + + ConstantRange NewCR2(NewLower, NewUpper); + applyRange(n2, NewCR2, Subtree, VRP); + } else if (*I == CR2.getUpper() - 1) { + APInt NewLower(CR2.getLower()), + NewUpper(CR2.getUpper() - 1); + if (NewLower == NewUpper) + NewLower = NewUpper = APInt::getMinValue(W); + + ConstantRange NewCR2(NewLower, NewUpper); + applyRange(n2, NewCR2, Subtree, VRP); + } + } + + if (const APInt *I = CR2.getSingleElement()) { + if (CR1.isFullSet()) { + ConstantRange NewCR1(CR2.getUpper(), CR2.getLower()); + applyRange(n1, NewCR1, Subtree, VRP); + } else if (*I == CR1.getLower()) { + APInt NewLower(CR1.getLower() + 1), + NewUpper(CR1.getUpper()); + if (NewLower == NewUpper) + NewLower = NewUpper = APInt::getMinValue(W); + + ConstantRange NewCR1(NewLower, NewUpper); + applyRange(n1, NewCR1, Subtree, VRP); + } else if (*I == CR1.getUpper() - 1) { + APInt NewLower(CR1.getLower()), + NewUpper(CR1.getUpper() - 1); + if (NewLower == NewUpper) + NewLower = NewUpper = APInt::getMinValue(W); + + ConstantRange NewCR1(NewLower, NewUpper); + applyRange(n1, NewCR1, Subtree, VRP); + } + } + } + + void addInequality(unsigned n1, unsigned n2, DomTreeDFS::Node *Subtree, + LatticeVal LV, VRPSolver *VRP) { + assert(!isRelatedBy(n1, n2, Subtree, LV) && "Asked to do useless work."); + + if (LV == NE) { + addNotEquals(n1, n2, Subtree, VRP); + return; + } + + ConstantRange CR1 = range(n1, Subtree); + ConstantRange CR2 = range(n2, Subtree); + + if (!CR1.isSingleElement()) { + ConstantRange NewCR1 = CR1.maximalIntersectWith(create(LV, CR2)); + if (NewCR1 != CR1) + applyRange(n1, NewCR1, Subtree, VRP); + } + + if (!CR2.isSingleElement()) { + ConstantRange NewCR2 = CR2.maximalIntersectWith( + create(reversePredicate(LV), CR1)); + if (NewCR2 != CR2) + applyRange(n2, NewCR2, Subtree, VRP); + } + } + }; + + /// UnreachableBlocks keeps tracks of blocks that are for one reason or + /// another discovered to be unreachable. This is used to cull the graph when + /// analyzing instructions, and to mark blocks with the "unreachable" + /// terminator instruction after the function has executed. + class VISIBILITY_HIDDEN UnreachableBlocks { + private: + std::vector DeadBlocks; + + public: + /// mark - mark a block as dead + void mark(BasicBlock *BB) { + std::vector::iterator E = DeadBlocks.end(); + std::vector::iterator I = + std::lower_bound(DeadBlocks.begin(), E, BB); + + if (I == E || *I != BB) DeadBlocks.insert(I, BB); + } + + /// isDead - returns whether a block is known to be dead already + bool isDead(BasicBlock *BB) { + std::vector::iterator E = DeadBlocks.end(); + std::vector::iterator I = + std::lower_bound(DeadBlocks.begin(), E, BB); + + return I != E && *I == BB; + } + + /// kill - replace the dead blocks' terminator with an UnreachableInst. + bool kill() { + bool modified = false; + for (std::vector::iterator I = DeadBlocks.begin(), + E = DeadBlocks.end(); I != E; ++I) { + BasicBlock *BB = *I; + + DOUT << "unreachable block: " << BB->getName() << "\n"; + + for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); + SI != SE; ++SI) { + BasicBlock *Succ = *SI; + Succ->removePredecessor(BB); + } + + TerminatorInst *TI = BB->getTerminator(); + TI->replaceAllUsesWith(UndefValue::get(TI->getType())); + TI->eraseFromParent(); + new UnreachableInst(BB); + ++NumBlocks; + modified = true; + } + DeadBlocks.clear(); + return modified; + } + }; + + /// VRPSolver keeps track of how changes to one variable affect other + /// variables, and forwards changes along to the InequalityGraph. It + /// also maintains the correct choice for "canonical" in the IG. + /// @brief VRPSolver calculates inferences from a new relationship. + class VISIBILITY_HIDDEN VRPSolver { + private: + friend class ValueRanges; + + struct Operation { + Value *LHS, *RHS; + ICmpInst::Predicate Op; + + BasicBlock *ContextBB; // XXX use a DomTreeDFS::Node instead + Instruction *ContextInst; + }; + std::deque WorkList; + + ValueNumbering &VN; + InequalityGraph &IG; + UnreachableBlocks &UB; + ValueRanges &VR; + DomTreeDFS *DTDFS; + DomTreeDFS::Node *Top; + BasicBlock *TopBB; + Instruction *TopInst; + bool &modified; + + typedef InequalityGraph::Node Node; + + // below - true if the Instruction is dominated by the current context + // block or instruction + bool below(Instruction *I) { + BasicBlock *BB = I->getParent(); + if (TopInst && TopInst->getParent() == BB) { + if (isa(TopInst)) return false; + if (isa(I)) return true; + if ( isa(TopInst) && !isa(I)) return true; + if (!isa(TopInst) && isa(I)) return false; + + for (BasicBlock::const_iterator Iter = BB->begin(), E = BB->end(); + Iter != E; ++Iter) { + if (&*Iter == TopInst) return true; + else if (&*Iter == I) return false; + } + assert(!"Instructions not found in parent BasicBlock?"); + } else { + DomTreeDFS::Node *Node = DTDFS->getNodeForBlock(BB); + if (!Node) return false; + return Top->dominates(Node); + } + return false; // Not reached + } + + // aboveOrBelow - true if the Instruction either dominates or is dominated + // by the current context block or instruction + bool aboveOrBelow(Instruction *I) { + BasicBlock *BB = I->getParent(); + DomTreeDFS::Node *Node = DTDFS->getNodeForBlock(BB); + if (!Node) return false; + + return Top == Node || Top->dominates(Node) || Node->dominates(Top); + } + + bool makeEqual(Value *V1, Value *V2) { + DOUT << "makeEqual(" << *V1 << ", " << *V2 << ")\n"; + DOUT << "context is "; + if (TopInst) DOUT << "I: " << *TopInst << "\n"; + else DOUT << "BB: " << TopBB->getName() + << "(" << Top->getDFSNumIn() << ")\n"; + + assert(V1->getType() == V2->getType() && + "Can't make two values with different types equal."); + + if (V1 == V2) return true; + + if (isa(V1) && isa(V2)) + return false; + + unsigned n1 = VN.valueNumber(V1, Top), n2 = VN.valueNumber(V2, Top); + + if (n1 && n2) { + if (n1 == n2) return true; + if (IG.isRelatedBy(n1, n2, Top, NE)) return false; + } + + if (n1) assert(V1 == VN.value(n1) && "Value isn't canonical."); + if (n2) assert(V2 == VN.value(n2) && "Value isn't canonical."); + + assert(!VN.compare(V2, V1) && "Please order parameters to makeEqual."); + + assert(!isa(V2) && "Tried to remove a constant."); + + SetVector Remove; + if (n2) Remove.insert(n2); + + if (n1 && n2) { + // Suppose we're being told that %x == %y, and %x <= %z and %y >= %z. + // We can't just merge %x and %y because the relationship with %z would + // be EQ and that's invalid. What we're doing is looking for any nodes + // %z such that %x <= %z and %y >= %z, and vice versa. + + Node::iterator end = IG.node(n2)->end(); + + // Find the intersection between N1 and N2 which is dominated by + // Top. If we find %x where N1 <= %x <= N2 (or >=) then add %x to + // Remove. + for (Node::iterator I = IG.node(n1)->begin(), E = IG.node(n1)->end(); + I != E; ++I) { + if (!(I->LV & EQ_BIT) || !Top->DominatedBy(I->Subtree)) continue; + + unsigned ILV_s = I->LV & (SLT_BIT|SGT_BIT); + unsigned ILV_u = I->LV & (ULT_BIT|UGT_BIT); + Node::iterator NI = IG.node(n2)->find(I->To, Top); + if (NI != end) { + LatticeVal NILV = reversePredicate(NI->LV); + unsigned NILV_s = NILV & (SLT_BIT|SGT_BIT); + unsigned NILV_u = NILV & (ULT_BIT|UGT_BIT); + + if ((ILV_s != (SLT_BIT|SGT_BIT) && ILV_s == NILV_s) || + (ILV_u != (ULT_BIT|UGT_BIT) && ILV_u == NILV_u)) + Remove.insert(I->To); + } + } + + // See if one of the nodes about to be removed is actually a better + // canonical choice than n1. + unsigned orig_n1 = n1; + SetVector::iterator DontRemove = Remove.end(); + for (SetVector::iterator I = Remove.begin()+1 /* skip n2 */, + E = Remove.end(); I != E; ++I) { + unsigned n = *I; + Value *V = VN.value(n); + if (VN.compare(V, V1)) { + V1 = V; + n1 = n; + DontRemove = I; + } + } + if (DontRemove != Remove.end()) { + unsigned n = *DontRemove; + Remove.remove(n); + Remove.insert(orig_n1); + } + } + + // We'd like to allow makeEqual on two values to perform a simple + // substitution without creating nodes in the IG whenever possible. + // + // The first iteration through this loop operates on V2 before going + // through the Remove list and operating on those too. If all of the + // iterations performed simple replacements then we exit early. + bool mergeIGNode = false; + unsigned i = 0; + for (Value *R = V2; i == 0 || i < Remove.size(); ++i) { + if (i) R = VN.value(Remove[i]); // skip n2. + + // Try to replace the whole instruction. If we can, we're done. + Instruction *I2 = dyn_cast(R); + if (I2 && below(I2)) { + std::vector ToNotify; + for (Value::use_iterator UI = R->use_begin(), UE = R->use_end(); + UI != UE;) { + Use &TheUse = UI.getUse(); + ++UI; + if (Instruction *I = dyn_cast(TheUse.getUser())) + ToNotify.push_back(I); + } + + DOUT << "Simply removing " << *I2 + << ", replacing with " << *V1 << "\n"; + I2->replaceAllUsesWith(V1); + // leave it dead; it'll get erased later. + ++NumInstruction; + modified = true; + + for (std::vector::iterator II = ToNotify.begin(), + IE = ToNotify.end(); II != IE; ++II) { + opsToDef(*II); + } + + continue; + } + + // Otherwise, replace all dominated uses. + for (Value::use_iterator UI = R->use_begin(), UE = R->use_end(); + UI != UE;) { + Use &TheUse = UI.getUse(); + ++UI; + if (Instruction *I = dyn_cast(TheUse.getUser())) { + if (below(I)) { + TheUse.set(V1); + modified = true; + ++NumVarsReplaced; + opsToDef(I); + } + } + } + + // If that killed the instruction, stop here. + if (I2 && isInstructionTriviallyDead(I2)) { + DOUT << "Killed all uses of " << *I2 + << ", replacing with " << *V1 << "\n"; + continue; + } + + // If we make it to here, then we will need to create a node for N1. + // Otherwise, we can skip out early! + mergeIGNode = true; + } + + if (!isa(V1)) { + if (Remove.empty()) { + VR.mergeInto(&V2, 1, VN.getOrInsertVN(V1, Top), Top, this); + } else { + std::vector RemoveVals; + RemoveVals.reserve(Remove.size()); + + for (SetVector::iterator I = Remove.begin(), + E = Remove.end(); I != E; ++I) { + Value *V = VN.value(*I); + if (!V->use_empty()) + RemoveVals.push_back(V); + } + VR.mergeInto(&RemoveVals[0], RemoveVals.size(), + VN.getOrInsertVN(V1, Top), Top, this); + } + } + + if (mergeIGNode) { + // Create N1. + if (!n1) n1 = VN.getOrInsertVN(V1, Top); + IG.node(n1); // Ensure that IG.Nodes won't get resized + + // Migrate relationships from removed nodes to N1. + for (SetVector::iterator I = Remove.begin(), E = Remove.end(); + I != E; ++I) { + unsigned n = *I; + for (Node::iterator NI = IG.node(n)->begin(), NE = IG.node(n)->end(); + NI != NE; ++NI) { + if (NI->Subtree->DominatedBy(Top)) { + if (NI->To == n1) { + assert((NI->LV & EQ_BIT) && "Node inequal to itself."); + continue; + } + if (Remove.count(NI->To)) + continue; + + IG.node(NI->To)->update(n1, reversePredicate(NI->LV), Top); + IG.node(n1)->update(NI->To, NI->LV, Top); + } + } + } + + // Point V2 (and all items in Remove) to N1. + if (!n2) + VN.addEquality(n1, V2, Top); + else { + for (SetVector::iterator I = Remove.begin(), + E = Remove.end(); I != E; ++I) { + VN.addEquality(n1, VN.value(*I), Top); + } + } + + // If !Remove.empty() then V2 = Remove[0]->getValue(). + // Even when Remove is empty, we still want to process V2. + i = 0; + for (Value *R = V2; i == 0 || i < Remove.size(); ++i) { + if (i) R = VN.value(Remove[i]); // skip n2. + + if (Instruction *I2 = dyn_cast(R)) { + if (aboveOrBelow(I2)) + defToOps(I2); + } + for (Value::use_iterator UI = V2->use_begin(), UE = V2->use_end(); + UI != UE;) { + Use &TheUse = UI.getUse(); + ++UI; + if (Instruction *I = dyn_cast(TheUse.getUser())) { + if (aboveOrBelow(I)) + opsToDef(I); + } + } + } + } + + // re-opsToDef all dominated users of V1. + if (Instruction *I = dyn_cast(V1)) { + for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); + UI != UE;) { + Use &TheUse = UI.getUse(); + ++UI; + Value *V = TheUse.getUser(); + if (!V->use_empty()) { + if (Instruction *Inst = dyn_cast(V)) { + if (aboveOrBelow(Inst)) + opsToDef(Inst); + } + } + } + } + + return true; + } + + /// cmpInstToLattice - converts an CmpInst::Predicate to lattice value + /// Requires that the lattice value be valid; does not accept ICMP_EQ. + static LatticeVal cmpInstToLattice(ICmpInst::Predicate Pred) { + switch (Pred) { + case ICmpInst::ICMP_EQ: + assert(!"No matching lattice value."); + return static_cast(EQ_BIT); + default: + assert(!"Invalid 'icmp' predicate."); + case ICmpInst::ICMP_NE: + return NE; + case ICmpInst::ICMP_UGT: + return UGT; + case ICmpInst::ICMP_UGE: + return UGE; + case ICmpInst::ICMP_ULT: + return ULT; + case ICmpInst::ICMP_ULE: + return ULE; + case ICmpInst::ICMP_SGT: + return SGT; + case ICmpInst::ICMP_SGE: + return SGE; + case ICmpInst::ICMP_SLT: + return SLT; + case ICmpInst::ICMP_SLE: + return SLE; + } + } + + public: + VRPSolver(ValueNumbering &VN, InequalityGraph &IG, UnreachableBlocks &UB, + ValueRanges &VR, DomTreeDFS *DTDFS, bool &modified, + BasicBlock *TopBB) + : VN(VN), + IG(IG), + UB(UB), + VR(VR), + DTDFS(DTDFS), + Top(DTDFS->getNodeForBlock(TopBB)), + TopBB(TopBB), + TopInst(NULL), + modified(modified) + { + assert(Top && "VRPSolver created for unreachable basic block."); + } + + VRPSolver(ValueNumbering &VN, InequalityGraph &IG, UnreachableBlocks &UB, + ValueRanges &VR, DomTreeDFS *DTDFS, bool &modified, + Instruction *TopInst) + : VN(VN), + IG(IG), + UB(UB), + VR(VR), + DTDFS(DTDFS), + Top(DTDFS->getNodeForBlock(TopInst->getParent())), + TopBB(TopInst->getParent()), + TopInst(TopInst), + modified(modified) + { + assert(Top && "VRPSolver created for unreachable basic block."); + assert(Top->getBlock() == TopInst->getParent() && "Context mismatch."); + } + + bool isRelatedBy(Value *V1, Value *V2, ICmpInst::Predicate Pred) const { + if (Constant *C1 = dyn_cast(V1)) + if (Constant *C2 = dyn_cast(V2)) + return ConstantExpr::getCompare(Pred, C1, C2) == + ConstantInt::getTrue(); + + unsigned n1 = VN.valueNumber(V1, Top); + unsigned n2 = VN.valueNumber(V2, Top); + + if (n1 && n2) { + if (n1 == n2) return Pred == ICmpInst::ICMP_EQ || + Pred == ICmpInst::ICMP_ULE || + Pred == ICmpInst::ICMP_UGE || + Pred == ICmpInst::ICMP_SLE || + Pred == ICmpInst::ICMP_SGE; + if (Pred == ICmpInst::ICMP_EQ) return false; + if (IG.isRelatedBy(n1, n2, Top, cmpInstToLattice(Pred))) return true; + if (VR.isRelatedBy(n1, n2, Top, cmpInstToLattice(Pred))) return true; + } + + if ((n1 && !n2 && isa(V2)) || + (n2 && !n1 && isa(V1))) { + ConstantRange CR1 = n1 ? VR.range(n1, Top) : VR.range(V1); + ConstantRange CR2 = n2 ? VR.range(n2, Top) : VR.range(V2); + + if (Pred == ICmpInst::ICMP_EQ) + return CR1.isSingleElement() && + CR1.getSingleElement() == CR2.getSingleElement(); + + return VR.isRelatedBy(CR1, CR2, cmpInstToLattice(Pred)); + } + if (Pred == ICmpInst::ICMP_EQ) return V1 == V2; + return false; + } + + /// add - adds a new property to the work queue + void add(Value *V1, Value *V2, ICmpInst::Predicate Pred, + Instruction *I = NULL) { + DOUT << "adding " << *V1 << " " << Pred << " " << *V2; + if (I) DOUT << " context: " << *I; + else DOUT << " default context (" << Top->getDFSNumIn() << ")"; + DOUT << "\n"; + + assert(V1->getType() == V2->getType() && + "Can't relate two values with different types."); + + WorkList.push_back(Operation()); + Operation &O = WorkList.back(); + O.LHS = V1, O.RHS = V2, O.Op = Pred, O.ContextInst = I; + O.ContextBB = I ? I->getParent() : TopBB; + } + + /// defToOps - Given an instruction definition that we've learned something + /// new about, find any new relationships between its operands. + void defToOps(Instruction *I) { + Instruction *NewContext = below(I) ? I : TopInst; + Value *Canonical = VN.canonicalize(I, Top); + + if (BinaryOperator *BO = dyn_cast(I)) { + const Type *Ty = BO->getType(); + assert(!Ty->isFPOrFPVector() && "Float in work queue!"); + + Value *Op0 = VN.canonicalize(BO->getOperand(0), Top); + Value *Op1 = VN.canonicalize(BO->getOperand(1), Top); + + // TODO: "and i32 -1, %x" EQ %y then %x EQ %y. + + switch (BO->getOpcode()) { + case Instruction::And: { + // "and i32 %a, %b" EQ -1 then %a EQ -1 and %b EQ -1 + ConstantInt *CI = ConstantInt::getAllOnesValue(Ty); + if (Canonical == CI) { + add(CI, Op0, ICmpInst::ICMP_EQ, NewContext); + add(CI, Op1, ICmpInst::ICMP_EQ, NewContext); + } + } break; + case Instruction::Or: { + // "or i32 %a, %b" EQ 0 then %a EQ 0 and %b EQ 0 + Constant *Zero = Constant::getNullValue(Ty); + if (Canonical == Zero) { + add(Zero, Op0, ICmpInst::ICMP_EQ, NewContext); + add(Zero, Op1, ICmpInst::ICMP_EQ, NewContext); + } + } break; + case Instruction::Xor: { + // "xor i32 %c, %a" EQ %b then %a EQ %c ^ %b + // "xor i32 %c, %a" EQ %c then %a EQ 0 + // "xor i32 %c, %a" NE %c then %a NE 0 + // Repeat the above, with order of operands reversed. + Value *LHS = Op0; + Value *RHS = Op1; + if (!isa(LHS)) std::swap(LHS, RHS); + + if (ConstantInt *CI = dyn_cast(Canonical)) { + if (ConstantInt *Arg = dyn_cast(LHS)) { + add(RHS, ConstantInt::get(CI->getValue() ^ Arg->getValue()), + ICmpInst::ICMP_EQ, NewContext); + } + } + if (Canonical == LHS) { + if (isa(Canonical)) + add(RHS, Constant::getNullValue(Ty), ICmpInst::ICMP_EQ, + NewContext); + } else if (isRelatedBy(LHS, Canonical, ICmpInst::ICMP_NE)) { + add(RHS, Constant::getNullValue(Ty), ICmpInst::ICMP_NE, + NewContext); + } + } break; + default: + break; + } + } else if (ICmpInst *IC = dyn_cast(I)) { + // "icmp ult i32 %a, %y" EQ true then %a u< y + // etc. + + if (Canonical == ConstantInt::getTrue()) { + add(IC->getOperand(0), IC->getOperand(1), IC->getPredicate(), + NewContext); + } else if (Canonical == ConstantInt::getFalse()) { + add(IC->getOperand(0), IC->getOperand(1), + ICmpInst::getInversePredicate(IC->getPredicate()), NewContext); + } + } else if (SelectInst *SI = dyn_cast(I)) { + if (I->getType()->isFPOrFPVector()) return; + + // Given: "%a = select i1 %x, i32 %b, i32 %c" + // %a EQ %b and %b NE %c then %x EQ true + // %a EQ %c and %b NE %c then %x EQ false + + Value *True = SI->getTrueValue(); + Value *False = SI->getFalseValue(); + if (isRelatedBy(True, False, ICmpInst::ICMP_NE)) { + if (Canonical == VN.canonicalize(True, Top) || + isRelatedBy(Canonical, False, ICmpInst::ICMP_NE)) + add(SI->getCondition(), ConstantInt::getTrue(), + ICmpInst::ICMP_EQ, NewContext); + else if (Canonical == VN.canonicalize(False, Top) || + isRelatedBy(Canonical, True, ICmpInst::ICMP_NE)) + add(SI->getCondition(), ConstantInt::getFalse(), + ICmpInst::ICMP_EQ, NewContext); + } + } else if (GetElementPtrInst *GEPI = dyn_cast(I)) { + for (GetElementPtrInst::op_iterator OI = GEPI->idx_begin(), + OE = GEPI->idx_end(); OI != OE; ++OI) { + ConstantInt *Op = dyn_cast(VN.canonicalize(*OI, Top)); + if (!Op || !Op->isZero()) return; + } + // TODO: The GEPI indices are all zero. Copy from definition to operand, + // jumping the type plane as needed. + if (isRelatedBy(GEPI, Constant::getNullValue(GEPI->getType()), + ICmpInst::ICMP_NE)) { + Value *Ptr = GEPI->getPointerOperand(); + add(Ptr, Constant::getNullValue(Ptr->getType()), ICmpInst::ICMP_NE, + NewContext); + } + } else if (CastInst *CI = dyn_cast(I)) { + const Type *SrcTy = CI->getSrcTy(); + + unsigned ci = VN.getOrInsertVN(CI, Top); + uint32_t W = VR.typeToWidth(SrcTy); + if (!W) return; + ConstantRange CR = VR.range(ci, Top); + + if (CR.isFullSet()) return; + + switch (CI->getOpcode()) { + default: break; + case Instruction::ZExt: + case Instruction::SExt: + VR.applyRange(VN.getOrInsertVN(CI->getOperand(0), Top), + CR.truncate(W), Top, this); + break; + case Instruction::BitCast: + VR.applyRange(VN.getOrInsertVN(CI->getOperand(0), Top), + CR, Top, this); + break; + } + } + } + + /// opsToDef - A new relationship was discovered involving one of this + /// instruction's operands. Find any new relationship involving the + /// definition, or another operand. + void opsToDef(Instruction *I) { + Instruction *NewContext = below(I) ? I : TopInst; + + if (BinaryOperator *BO = dyn_cast(I)) { + Value *Op0 = VN.canonicalize(BO->getOperand(0), Top); + Value *Op1 = VN.canonicalize(BO->getOperand(1), Top); + + if (ConstantInt *CI0 = dyn_cast(Op0)) + if (ConstantInt *CI1 = dyn_cast(Op1)) { + add(BO, ConstantExpr::get(BO->getOpcode(), CI0, CI1), + ICmpInst::ICMP_EQ, NewContext); + return; + } + + // "%y = and i1 true, %x" then %x EQ %y + // "%y = or i1 false, %x" then %x EQ %y + // "%x = add i32 %y, 0" then %x EQ %y + // "%x = mul i32 %y, 0" then %x EQ 0 + + Instruction::BinaryOps Opcode = BO->getOpcode(); + const Type *Ty = BO->getType(); + assert(!Ty->isFPOrFPVector() && "Float in work queue!"); + + Constant *Zero = Constant::getNullValue(Ty); + Constant *One = ConstantInt::get(Ty, 1); + ConstantInt *AllOnes = ConstantInt::getAllOnesValue(Ty); + + switch (Opcode) { + default: break; + case Instruction::LShr: + case Instruction::AShr: + case Instruction::Shl: + if (Op1 == Zero) { + add(BO, Op0, ICmpInst::ICMP_EQ, NewContext); + return; + } + break; + case Instruction::Sub: + if (Op1 == Zero) { + add(BO, Op0, ICmpInst::ICMP_EQ, NewContext); + return; + } + if (ConstantInt *CI0 = dyn_cast(Op0)) { + unsigned n_ci0 = VN.getOrInsertVN(Op1, Top); + ConstantRange CR = VR.range(n_ci0, Top); + if (!CR.isFullSet()) { + CR.subtract(CI0->getValue()); + unsigned n_bo = VN.getOrInsertVN(BO, Top); + VR.applyRange(n_bo, CR, Top, this); + return; + } + } + if (ConstantInt *CI1 = dyn_cast(Op1)) { + unsigned n_ci1 = VN.getOrInsertVN(Op0, Top); + ConstantRange CR = VR.range(n_ci1, Top); + if (!CR.isFullSet()) { + CR.subtract(CI1->getValue()); + unsigned n_bo = VN.getOrInsertVN(BO, Top); + VR.applyRange(n_bo, CR, Top, this); + return; + } + } + break; + case Instruction::Or: + if (Op0 == AllOnes || Op1 == AllOnes) { + add(BO, AllOnes, ICmpInst::ICMP_EQ, NewContext); + return; + } + if (Op0 == Zero) { + add(BO, Op1, ICmpInst::ICMP_EQ, NewContext); + return; + } else if (Op1 == Zero) { + add(BO, Op0, ICmpInst::ICMP_EQ, NewContext); + return; + } + break; + case Instruction::Add: + if (ConstantInt *CI0 = dyn_cast(Op0)) { + unsigned n_ci0 = VN.getOrInsertVN(Op1, Top); + ConstantRange CR = VR.range(n_ci0, Top); + if (!CR.isFullSet()) { + CR.subtract(-CI0->getValue()); + unsigned n_bo = VN.getOrInsertVN(BO, Top); + VR.applyRange(n_bo, CR, Top, this); + return; + } + } + if (ConstantInt *CI1 = dyn_cast(Op1)) { + unsigned n_ci1 = VN.getOrInsertVN(Op0, Top); + ConstantRange CR = VR.range(n_ci1, Top); + if (!CR.isFullSet()) { + CR.subtract(-CI1->getValue()); + unsigned n_bo = VN.getOrInsertVN(BO, Top); + VR.applyRange(n_bo, CR, Top, this); + return; + } + } + // fall-through + case Instruction::Xor: + if (Op0 == Zero) { + add(BO, Op1, ICmpInst::ICMP_EQ, NewContext); + return; + } else if (Op1 == Zero) { + add(BO, Op0, ICmpInst::ICMP_EQ, NewContext); + return; + } + break; + case Instruction::And: + if (Op0 == AllOnes) { + add(BO, Op1, ICmpInst::ICMP_EQ, NewContext); + return; + } else if (Op1 == AllOnes) { + add(BO, Op0, ICmpInst::ICMP_EQ, NewContext); + return; + } + if (Op0 == Zero || Op1 == Zero) { + add(BO, Zero, ICmpInst::ICMP_EQ, NewContext); + return; + } + break; + case Instruction::Mul: + if (Op0 == Zero || Op1 == Zero) { + add(BO, Zero, ICmpInst::ICMP_EQ, NewContext); + return; + } + if (Op0 == One) { + add(BO, Op1, ICmpInst::ICMP_EQ, NewContext); + return; + } else if (Op1 == One) { + add(BO, Op0, ICmpInst::ICMP_EQ, NewContext); + return; + } + break; + } + + // "%x = add i32 %y, %z" and %x EQ %y then %z EQ 0 + // "%x = add i32 %y, %z" and %x EQ %z then %y EQ 0 + // "%x = shl i32 %y, %z" and %x EQ %y and %y NE 0 then %z EQ 0 + // "%x = udiv i32 %y, %z" and %x EQ %y and %y NE 0 then %z EQ 1 + + Value *Known = Op0, *Unknown = Op1, + *TheBO = VN.canonicalize(BO, Top); + if (Known != TheBO) std::swap(Known, Unknown); + if (Known == TheBO) { + switch (Opcode) { + default: break; + case Instruction::LShr: + case Instruction::AShr: + case Instruction::Shl: + if (!isRelatedBy(Known, Zero, ICmpInst::ICMP_NE)) break; + // otherwise, fall-through. + case Instruction::Sub: + if (Unknown == Op0) break; + // otherwise, fall-through. + case Instruction::Xor: + case Instruction::Add: + add(Unknown, Zero, ICmpInst::ICMP_EQ, NewContext); + break; + case Instruction::UDiv: + case Instruction::SDiv: + if (Unknown == Op1) break; + if (isRelatedBy(Known, Zero, ICmpInst::ICMP_NE)) + add(Unknown, One, ICmpInst::ICMP_EQ, NewContext); + break; + } + } + + // TODO: "%a = add i32 %b, 1" and %b > %z then %a >= %z. + + } else if (ICmpInst *IC = dyn_cast(I)) { + // "%a = icmp ult i32 %b, %c" and %b u< %c then %a EQ true + // "%a = icmp ult i32 %b, %c" and %b u>= %c then %a EQ false + // etc. + + Value *Op0 = VN.canonicalize(IC->getOperand(0), Top); + Value *Op1 = VN.canonicalize(IC->getOperand(1), Top); + + ICmpInst::Predicate Pred = IC->getPredicate(); + if (isRelatedBy(Op0, Op1, Pred)) + add(IC, ConstantInt::getTrue(), ICmpInst::ICMP_EQ, NewContext); + else if (isRelatedBy(Op0, Op1, ICmpInst::getInversePredicate(Pred))) + add(IC, ConstantInt::getFalse(), ICmpInst::ICMP_EQ, NewContext); + + } else if (SelectInst *SI = dyn_cast(I)) { + if (I->getType()->isFPOrFPVector()) return; + + // Given: "%a = select i1 %x, i32 %b, i32 %c" + // %x EQ true then %a EQ %b + // %x EQ false then %a EQ %c + // %b EQ %c then %a EQ %b + + Value *Canonical = VN.canonicalize(SI->getCondition(), Top); + if (Canonical == ConstantInt::getTrue()) { + add(SI, SI->getTrueValue(), ICmpInst::ICMP_EQ, NewContext); + } else if (Canonical == ConstantInt::getFalse()) { + add(SI, SI->getFalseValue(), ICmpInst::ICMP_EQ, NewContext); + } else if (VN.canonicalize(SI->getTrueValue(), Top) == + VN.canonicalize(SI->getFalseValue(), Top)) { + add(SI, SI->getTrueValue(), ICmpInst::ICMP_EQ, NewContext); + } + } else if (CastInst *CI = dyn_cast(I)) { + const Type *DestTy = CI->getDestTy(); + if (DestTy->isFPOrFPVector()) return; + + Value *Op = VN.canonicalize(CI->getOperand(0), Top); + Instruction::CastOps Opcode = CI->getOpcode(); + + if (Constant *C = dyn_cast(Op)) { + add(CI, ConstantExpr::getCast(Opcode, C, DestTy), + ICmpInst::ICMP_EQ, NewContext); + } + + uint32_t W = VR.typeToWidth(DestTy); + unsigned ci = VN.getOrInsertVN(CI, Top); + ConstantRange CR = VR.range(VN.getOrInsertVN(Op, Top), Top); + + if (!CR.isFullSet()) { + switch (Opcode) { + default: break; + case Instruction::ZExt: + VR.applyRange(ci, CR.zeroExtend(W), Top, this); + break; + case Instruction::SExt: + VR.applyRange(ci, CR.signExtend(W), Top, this); + break; + case Instruction::Trunc: { + ConstantRange Result = CR.truncate(W); + if (!Result.isFullSet()) + VR.applyRange(ci, Result, Top, this); + } break; + case Instruction::BitCast: + VR.applyRange(ci, CR, Top, this); + break; + // TODO: other casts? + } + } + } else if (GetElementPtrInst *GEPI = dyn_cast(I)) { + for (GetElementPtrInst::op_iterator OI = GEPI->idx_begin(), + OE = GEPI->idx_end(); OI != OE; ++OI) { + ConstantInt *Op = dyn_cast(VN.canonicalize(*OI, Top)); + if (!Op || !Op->isZero()) return; + } + // TODO: The GEPI indices are all zero. Copy from operand to definition, + // jumping the type plane as needed. + Value *Ptr = GEPI->getPointerOperand(); + if (isRelatedBy(Ptr, Constant::getNullValue(Ptr->getType()), + ICmpInst::ICMP_NE)) { + add(GEPI, Constant::getNullValue(GEPI->getType()), ICmpInst::ICMP_NE, + NewContext); + } + } + } + + /// solve - process the work queue + void solve() { + //DOUT << "WorkList entry, size: " << WorkList.size() << "\n"; + while (!WorkList.empty()) { + //DOUT << "WorkList size: " << WorkList.size() << "\n"; + + Operation &O = WorkList.front(); + TopInst = O.ContextInst; + TopBB = O.ContextBB; + Top = DTDFS->getNodeForBlock(TopBB); // XXX move this into Context + + O.LHS = VN.canonicalize(O.LHS, Top); + O.RHS = VN.canonicalize(O.RHS, Top); + + assert(O.LHS == VN.canonicalize(O.LHS, Top) && "Canonicalize isn't."); + assert(O.RHS == VN.canonicalize(O.RHS, Top) && "Canonicalize isn't."); + + DOUT << "solving " << *O.LHS << " " << O.Op << " " << *O.RHS; + if (O.ContextInst) DOUT << " context inst: " << *O.ContextInst; + else DOUT << " context block: " << O.ContextBB->getName(); + DOUT << "\n"; + + DEBUG(VN.dump()); + DEBUG(IG.dump()); + DEBUG(VR.dump()); + + // If they're both Constant, skip it. Check for contradiction and mark + // the BB as unreachable if so. + if (Constant *CI_L = dyn_cast(O.LHS)) { + if (Constant *CI_R = dyn_cast(O.RHS)) { + if (ConstantExpr::getCompare(O.Op, CI_L, CI_R) == + ConstantInt::getFalse()) + UB.mark(TopBB); + + WorkList.pop_front(); + continue; + } + } + + if (VN.compare(O.LHS, O.RHS)) { + std::swap(O.LHS, O.RHS); + O.Op = ICmpInst::getSwappedPredicate(O.Op); + } + + if (O.Op == ICmpInst::ICMP_EQ) { + if (!makeEqual(O.RHS, O.LHS)) + UB.mark(TopBB); + } else { + LatticeVal LV = cmpInstToLattice(O.Op); + + if ((LV & EQ_BIT) && + isRelatedBy(O.LHS, O.RHS, ICmpInst::getSwappedPredicate(O.Op))) { + if (!makeEqual(O.RHS, O.LHS)) + UB.mark(TopBB); + } else { + if (isRelatedBy(O.LHS, O.RHS, ICmpInst::getInversePredicate(O.Op))){ + UB.mark(TopBB); + WorkList.pop_front(); + continue; + } + + unsigned n1 = VN.getOrInsertVN(O.LHS, Top); + unsigned n2 = VN.getOrInsertVN(O.RHS, Top); + + if (n1 == n2) { + if (O.Op != ICmpInst::ICMP_UGE && O.Op != ICmpInst::ICMP_ULE && + O.Op != ICmpInst::ICMP_SGE && O.Op != ICmpInst::ICMP_SLE) + UB.mark(TopBB); + + WorkList.pop_front(); + continue; + } + + if (VR.isRelatedBy(n1, n2, Top, LV) || + IG.isRelatedBy(n1, n2, Top, LV)) { + WorkList.pop_front(); + continue; + } + + VR.addInequality(n1, n2, Top, LV, this); + if ((!isa(O.RHS) && !isa(O.LHS)) || + LV == NE) + IG.addInequality(n1, n2, Top, LV); + + if (Instruction *I1 = dyn_cast(O.LHS)) { + if (aboveOrBelow(I1)) + defToOps(I1); + } + if (isa(O.LHS) || isa(O.LHS)) { + for (Value::use_iterator UI = O.LHS->use_begin(), + UE = O.LHS->use_end(); UI != UE;) { + Use &TheUse = UI.getUse(); + ++UI; + if (Instruction *I = dyn_cast(TheUse.getUser())) { + if (aboveOrBelow(I)) + opsToDef(I); + } + } + } + if (Instruction *I2 = dyn_cast(O.RHS)) { + if (aboveOrBelow(I2)) + defToOps(I2); + } + if (isa(O.RHS) || isa(O.RHS)) { + for (Value::use_iterator UI = O.RHS->use_begin(), + UE = O.RHS->use_end(); UI != UE;) { + Use &TheUse = UI.getUse(); + ++UI; + if (Instruction *I = dyn_cast(TheUse.getUser())) { + if (aboveOrBelow(I)) + opsToDef(I); + } + } + } + } + } + WorkList.pop_front(); + } + } + }; + + void ValueRanges::addToWorklist(Value *V, Constant *C, + ICmpInst::Predicate Pred, VRPSolver *VRP) { + VRP->add(V, C, Pred, VRP->TopInst); + } + + void ValueRanges::markBlock(VRPSolver *VRP) { + VRP->UB.mark(VRP->TopBB); + } + + /// PredicateSimplifier - This class is a simplifier that replaces + /// one equivalent variable with another. It also tracks what + /// can't be equal and will solve setcc instructions when possible. + /// @brief Root of the predicate simplifier optimization. + class VISIBILITY_HIDDEN PredicateSimplifier : public FunctionPass { + DomTreeDFS *DTDFS; + bool modified; + ValueNumbering *VN; + InequalityGraph *IG; + UnreachableBlocks UB; + ValueRanges *VR; + + std::vector WorkList; + + public: + static char ID; // Pass identification, replacement for typeid + PredicateSimplifier() : FunctionPass(&ID) {} + + bool runOnFunction(Function &F); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequiredID(BreakCriticalEdgesID); + AU.addRequired(); + AU.addRequired(); + AU.addPreserved(); + } + + private: + /// Forwards - Adds new properties to VRPSolver and uses them to + /// simplify instructions. Because new properties sometimes apply to + /// a transition from one BasicBlock to another, this will use the + /// PredicateSimplifier::proceedToSuccessor(s) interface to enter the + /// basic block. + /// @brief Performs abstract execution of the program. + class VISIBILITY_HIDDEN Forwards : public InstVisitor { + friend class InstVisitor; + PredicateSimplifier *PS; + DomTreeDFS::Node *DTNode; + + public: + ValueNumbering &VN; + InequalityGraph &IG; + UnreachableBlocks &UB; + ValueRanges &VR; + + Forwards(PredicateSimplifier *PS, DomTreeDFS::Node *DTNode) + : PS(PS), DTNode(DTNode), VN(*PS->VN), IG(*PS->IG), UB(PS->UB), + VR(*PS->VR) {} + + void visitTerminatorInst(TerminatorInst &TI); + void visitBranchInst(BranchInst &BI); + void visitSwitchInst(SwitchInst &SI); + + void visitAllocaInst(AllocaInst &AI); + void visitLoadInst(LoadInst &LI); + void visitStoreInst(StoreInst &SI); + + void visitSExtInst(SExtInst &SI); + void visitZExtInst(ZExtInst &ZI); + + void visitBinaryOperator(BinaryOperator &BO); + void visitICmpInst(ICmpInst &IC); + }; + + // Used by terminator instructions to proceed from the current basic + // block to the next. Verifies that "current" dominates "next", + // then calls visitBasicBlock. + void proceedToSuccessors(DomTreeDFS::Node *Current) { + for (DomTreeDFS::Node::iterator I = Current->begin(), + E = Current->end(); I != E; ++I) { + WorkList.push_back(*I); + } + } + + void proceedToSuccessor(DomTreeDFS::Node *Next) { + WorkList.push_back(Next); + } + + // Visits each instruction in the basic block. + void visitBasicBlock(DomTreeDFS::Node *Node) { + BasicBlock *BB = Node->getBlock(); + DOUT << "Entering Basic Block: " << BB->getName() + << " (" << Node->getDFSNumIn() << ")\n"; + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) { + visitInstruction(I++, Node); + } + } + + // Tries to simplify each Instruction and add new properties. + void visitInstruction(Instruction *I, DomTreeDFS::Node *DT) { + DOUT << "Considering instruction " << *I << "\n"; + DEBUG(VN->dump()); + DEBUG(IG->dump()); + DEBUG(VR->dump()); + + // Sometimes instructions are killed in earlier analysis. + if (isInstructionTriviallyDead(I)) { + ++NumSimple; + modified = true; + if (unsigned n = VN->valueNumber(I, DTDFS->getRootNode())) + if (VN->value(n) == I) IG->remove(n); + VN->remove(I); + I->eraseFromParent(); + return; + } + +#ifndef NDEBUG + // Try to replace the whole instruction. + Value *V = VN->canonicalize(I, DT); + assert(V == I && "Late instruction canonicalization."); + if (V != I) { + modified = true; + ++NumInstruction; + DOUT << "Removing " << *I << ", replacing with " << *V << "\n"; + if (unsigned n = VN->valueNumber(I, DTDFS->getRootNode())) + if (VN->value(n) == I) IG->remove(n); + VN->remove(I); + I->replaceAllUsesWith(V); + I->eraseFromParent(); + return; + } + + // Try to substitute operands. + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { + Value *Oper = I->getOperand(i); + Value *V = VN->canonicalize(Oper, DT); + assert(V == Oper && "Late operand canonicalization."); + if (V != Oper) { + modified = true; + ++NumVarsReplaced; + DOUT << "Resolving " << *I; + I->setOperand(i, V); + DOUT << " into " << *I; + } + } +#endif + + std::string name = I->getParent()->getName(); + DOUT << "push (%" << name << ")\n"; + Forwards visit(this, DT); + visit.visit(*I); + DOUT << "pop (%" << name << ")\n"; + } + }; + + bool PredicateSimplifier::runOnFunction(Function &F) { + DominatorTree *DT = &getAnalysis(); + DTDFS = new DomTreeDFS(DT); + TargetData *TD = &getAnalysis(); + + DOUT << "Entering Function: " << F.getName() << "\n"; + + modified = false; + DomTreeDFS::Node *Root = DTDFS->getRootNode(); + VN = new ValueNumbering(DTDFS); + IG = new InequalityGraph(*VN, Root); + VR = new ValueRanges(*VN, TD); + WorkList.push_back(Root); + + do { + DomTreeDFS::Node *DTNode = WorkList.back(); + WorkList.pop_back(); + if (!UB.isDead(DTNode->getBlock())) visitBasicBlock(DTNode); + } while (!WorkList.empty()); + + delete DTDFS; + delete VR; + delete IG; + delete VN; + + modified |= UB.kill(); + + return modified; + } + + void PredicateSimplifier::Forwards::visitTerminatorInst(TerminatorInst &TI) { + PS->proceedToSuccessors(DTNode); + } + + void PredicateSimplifier::Forwards::visitBranchInst(BranchInst &BI) { + if (BI.isUnconditional()) { + PS->proceedToSuccessors(DTNode); + return; + } + + Value *Condition = BI.getCondition(); + BasicBlock *TrueDest = BI.getSuccessor(0); + BasicBlock *FalseDest = BI.getSuccessor(1); + + if (isa(Condition) || TrueDest == FalseDest) { + PS->proceedToSuccessors(DTNode); + return; + } + + for (DomTreeDFS::Node::iterator I = DTNode->begin(), E = DTNode->end(); + I != E; ++I) { + BasicBlock *Dest = (*I)->getBlock(); + DOUT << "Branch thinking about %" << Dest->getName() + << "(" << PS->DTDFS->getNodeForBlock(Dest)->getDFSNumIn() << ")\n"; + + if (Dest == TrueDest) { + DOUT << "(" << DTNode->getBlock()->getName() << ") true set:\n"; + VRPSolver VRP(VN, IG, UB, VR, PS->DTDFS, PS->modified, Dest); + VRP.add(ConstantInt::getTrue(), Condition, ICmpInst::ICMP_EQ); + VRP.solve(); + DEBUG(VN.dump()); + DEBUG(IG.dump()); + DEBUG(VR.dump()); + } else if (Dest == FalseDest) { + DOUT << "(" << DTNode->getBlock()->getName() << ") false set:\n"; + VRPSolver VRP(VN, IG, UB, VR, PS->DTDFS, PS->modified, Dest); + VRP.add(ConstantInt::getFalse(), Condition, ICmpInst::ICMP_EQ); + VRP.solve(); + DEBUG(VN.dump()); + DEBUG(IG.dump()); + DEBUG(VR.dump()); + } + + PS->proceedToSuccessor(*I); + } + } + + void PredicateSimplifier::Forwards::visitSwitchInst(SwitchInst &SI) { + Value *Condition = SI.getCondition(); + + // Set the EQProperty in each of the cases BBs, and the NEProperties + // in the default BB. + + for (DomTreeDFS::Node::iterator I = DTNode->begin(), E = DTNode->end(); + I != E; ++I) { + BasicBlock *BB = (*I)->getBlock(); + DOUT << "Switch thinking about BB %" << BB->getName() + << "(" << PS->DTDFS->getNodeForBlock(BB)->getDFSNumIn() << ")\n"; + + VRPSolver VRP(VN, IG, UB, VR, PS->DTDFS, PS->modified, BB); + if (BB == SI.getDefaultDest()) { + for (unsigned i = 1, e = SI.getNumCases(); i < e; ++i) + if (SI.getSuccessor(i) != BB) + VRP.add(Condition, SI.getCaseValue(i), ICmpInst::ICMP_NE); + VRP.solve(); + } else if (ConstantInt *CI = SI.findCaseDest(BB)) { + VRP.add(Condition, CI, ICmpInst::ICMP_EQ); + VRP.solve(); + } + PS->proceedToSuccessor(*I); + } + } + + void PredicateSimplifier::Forwards::visitAllocaInst(AllocaInst &AI) { + VRPSolver VRP(VN, IG, UB, VR, PS->DTDFS, PS->modified, &AI); + VRP.add(Constant::getNullValue(AI.getType()), &AI, ICmpInst::ICMP_NE); + VRP.solve(); + } + + void PredicateSimplifier::Forwards::visitLoadInst(LoadInst &LI) { + Value *Ptr = LI.getPointerOperand(); + // avoid "load i8* null" -> null NE null. + if (isa(Ptr)) return; + + VRPSolver VRP(VN, IG, UB, VR, PS->DTDFS, PS->modified, &LI); + VRP.add(Constant::getNullValue(Ptr->getType()), Ptr, ICmpInst::ICMP_NE); + VRP.solve(); + } + + void PredicateSimplifier::Forwards::visitStoreInst(StoreInst &SI) { + Value *Ptr = SI.getPointerOperand(); + if (isa(Ptr)) return; + + VRPSolver VRP(VN, IG, UB, VR, PS->DTDFS, PS->modified, &SI); + VRP.add(Constant::getNullValue(Ptr->getType()), Ptr, ICmpInst::ICMP_NE); + VRP.solve(); + } + + void PredicateSimplifier::Forwards::visitSExtInst(SExtInst &SI) { + VRPSolver VRP(VN, IG, UB, VR, PS->DTDFS, PS->modified, &SI); + uint32_t SrcBitWidth = cast(SI.getSrcTy())->getBitWidth(); + uint32_t DstBitWidth = cast(SI.getDestTy())->getBitWidth(); + APInt Min(APInt::getHighBitsSet(DstBitWidth, DstBitWidth-SrcBitWidth+1)); + APInt Max(APInt::getLowBitsSet(DstBitWidth, SrcBitWidth-1)); + VRP.add(ConstantInt::get(Min), &SI, ICmpInst::ICMP_SLE); + VRP.add(ConstantInt::get(Max), &SI, ICmpInst::ICMP_SGE); + VRP.solve(); + } + + void PredicateSimplifier::Forwards::visitZExtInst(ZExtInst &ZI) { + VRPSolver VRP(VN, IG, UB, VR, PS->DTDFS, PS->modified, &ZI); + uint32_t SrcBitWidth = cast(ZI.getSrcTy())->getBitWidth(); + uint32_t DstBitWidth = cast(ZI.getDestTy())->getBitWidth(); + APInt Max(APInt::getLowBitsSet(DstBitWidth, SrcBitWidth)); + VRP.add(ConstantInt::get(Max), &ZI, ICmpInst::ICMP_UGE); + VRP.solve(); + } + + void PredicateSimplifier::Forwards::visitBinaryOperator(BinaryOperator &BO) { + Instruction::BinaryOps ops = BO.getOpcode(); + + switch (ops) { + default: break; + case Instruction::URem: + case Instruction::SRem: + case Instruction::UDiv: + case Instruction::SDiv: { + Value *Divisor = BO.getOperand(1); + VRPSolver VRP(VN, IG, UB, VR, PS->DTDFS, PS->modified, &BO); + VRP.add(Constant::getNullValue(Divisor->getType()), Divisor, + ICmpInst::ICMP_NE); + VRP.solve(); + break; + } + } + + switch (ops) { + default: break; + case Instruction::Shl: { + VRPSolver VRP(VN, IG, UB, VR, PS->DTDFS, PS->modified, &BO); + VRP.add(&BO, BO.getOperand(0), ICmpInst::ICMP_UGE); + VRP.solve(); + } break; + case Instruction::AShr: { + VRPSolver VRP(VN, IG, UB, VR, PS->DTDFS, PS->modified, &BO); + VRP.add(&BO, BO.getOperand(0), ICmpInst::ICMP_SLE); + VRP.solve(); + } break; + case Instruction::LShr: + case Instruction::UDiv: { + VRPSolver VRP(VN, IG, UB, VR, PS->DTDFS, PS->modified, &BO); + VRP.add(&BO, BO.getOperand(0), ICmpInst::ICMP_ULE); + VRP.solve(); + } break; + case Instruction::URem: { + VRPSolver VRP(VN, IG, UB, VR, PS->DTDFS, PS->modified, &BO); + VRP.add(&BO, BO.getOperand(1), ICmpInst::ICMP_ULE); + VRP.solve(); + } break; + case Instruction::And: { + VRPSolver VRP(VN, IG, UB, VR, PS->DTDFS, PS->modified, &BO); + VRP.add(&BO, BO.getOperand(0), ICmpInst::ICMP_ULE); + VRP.add(&BO, BO.getOperand(1), ICmpInst::ICMP_ULE); + VRP.solve(); + } break; + case Instruction::Or: { + VRPSolver VRP(VN, IG, UB, VR, PS->DTDFS, PS->modified, &BO); + VRP.add(&BO, BO.getOperand(0), ICmpInst::ICMP_UGE); + VRP.add(&BO, BO.getOperand(1), ICmpInst::ICMP_UGE); + VRP.solve(); + } break; + } + } + + void PredicateSimplifier::Forwards::visitICmpInst(ICmpInst &IC) { + // If possible, squeeze the ICmp predicate into something simpler. + // Eg., if x = [0, 4) and we're being asked icmp uge %x, 3 then change + // the predicate to eq. + + // XXX: once we do full PHI handling, modifying the instruction in the + // Forwards visitor will cause missed optimizations. + + ICmpInst::Predicate Pred = IC.getPredicate(); + + switch (Pred) { + default: break; + case ICmpInst::ICMP_ULE: Pred = ICmpInst::ICMP_ULT; break; + case ICmpInst::ICMP_UGE: Pred = ICmpInst::ICMP_UGT; break; + case ICmpInst::ICMP_SLE: Pred = ICmpInst::ICMP_SLT; break; + case ICmpInst::ICMP_SGE: Pred = ICmpInst::ICMP_SGT; break; + } + if (Pred != IC.getPredicate()) { + VRPSolver VRP(VN, IG, UB, VR, PS->DTDFS, PS->modified, &IC); + if (VRP.isRelatedBy(IC.getOperand(1), IC.getOperand(0), + ICmpInst::ICMP_NE)) { + ++NumSnuggle; + PS->modified = true; + IC.setPredicate(Pred); + } + } + + Pred = IC.getPredicate(); + + if (ConstantInt *Op1 = dyn_cast(IC.getOperand(1))) { + ConstantInt *NextVal = 0; + switch (Pred) { + default: break; + case ICmpInst::ICMP_SLT: + case ICmpInst::ICMP_ULT: + if (Op1->getValue() != 0) + NextVal = ConstantInt::get(Op1->getValue()-1); + break; + case ICmpInst::ICMP_SGT: + case ICmpInst::ICMP_UGT: + if (!Op1->getValue().isAllOnesValue()) + NextVal = ConstantInt::get(Op1->getValue()+1); + break; + } + + if (NextVal) { + VRPSolver VRP(VN, IG, UB, VR, PS->DTDFS, PS->modified, &IC); + if (VRP.isRelatedBy(IC.getOperand(0), NextVal, + ICmpInst::getInversePredicate(Pred))) { + ICmpInst *NewIC = new ICmpInst(ICmpInst::ICMP_EQ, IC.getOperand(0), + NextVal, "", &IC); + NewIC->takeName(&IC); + IC.replaceAllUsesWith(NewIC); + + // XXX: prove this isn't necessary + if (unsigned n = VN.valueNumber(&IC, PS->DTDFS->getRootNode())) + if (VN.value(n) == &IC) IG.remove(n); + VN.remove(&IC); + + IC.eraseFromParent(); + ++NumSnuggle; + PS->modified = true; + } + } + } + } +} + +char PredicateSimplifier::ID = 0; +static RegisterPass +X("predsimplify", "Predicate Simplifier"); + +FunctionPass *llvm::createPredicateSimplifierPass() { + return new PredicateSimplifier(); +} diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp new file mode 100644 index 000000000000..293cf9248b7e --- /dev/null +++ b/lib/Transforms/Scalar/Reassociate.cpp @@ -0,0 +1,896 @@ +//===- Reassociate.cpp - Reassociate binary expressions -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass reassociates commutative expressions in an order that is designed +// to promote better constant propagation, GCSE, LICM, PRE... +// +// For example: 4 + (x + 5) -> x + (4 + 5) +// +// In the implementation of this algorithm, constants are assigned rank = 0, +// function arguments are rank = 1, and other values are assigned ranks +// corresponding to the reverse post order traversal of current function +// (starting at 2), which effectively gives values in deep loops higher rank +// than values not in loops. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "reassociate" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Pass.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ValueHandle.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/Statistic.h" +#include +#include +using namespace llvm; + +STATISTIC(NumLinear , "Number of insts linearized"); +STATISTIC(NumChanged, "Number of insts reassociated"); +STATISTIC(NumAnnihil, "Number of expr tree annihilated"); +STATISTIC(NumFactor , "Number of multiplies factored"); + +namespace { + struct VISIBILITY_HIDDEN ValueEntry { + unsigned Rank; + Value *Op; + ValueEntry(unsigned R, Value *O) : Rank(R), Op(O) {} + }; + inline bool operator<(const ValueEntry &LHS, const ValueEntry &RHS) { + return LHS.Rank > RHS.Rank; // Sort so that highest rank goes to start. + } +} + +#ifndef NDEBUG +/// PrintOps - Print out the expression identified in the Ops list. +/// +static void PrintOps(Instruction *I, const std::vector &Ops) { + Module *M = I->getParent()->getParent()->getParent(); + cerr << Instruction::getOpcodeName(I->getOpcode()) << " " + << *Ops[0].Op->getType(); + for (unsigned i = 0, e = Ops.size(); i != e; ++i) { + WriteAsOperand(*cerr.stream() << " ", Ops[i].Op, false, M); + cerr << "," << Ops[i].Rank; + } +} +#endif + +namespace { + class VISIBILITY_HIDDEN Reassociate : public FunctionPass { + std::map RankMap; + std::map, unsigned> ValueRankMap; + bool MadeChange; + public: + static char ID; // Pass identification, replacement for typeid + Reassociate() : FunctionPass(&ID) {} + + bool runOnFunction(Function &F); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + } + private: + void BuildRankMap(Function &F); + unsigned getRank(Value *V); + void ReassociateExpression(BinaryOperator *I); + void RewriteExprTree(BinaryOperator *I, std::vector &Ops, + unsigned Idx = 0); + Value *OptimizeExpression(BinaryOperator *I, std::vector &Ops); + void LinearizeExprTree(BinaryOperator *I, std::vector &Ops); + void LinearizeExpr(BinaryOperator *I); + Value *RemoveFactorFromExpression(Value *V, Value *Factor); + void ReassociateBB(BasicBlock *BB); + + void RemoveDeadBinaryOp(Value *V); + }; +} + +char Reassociate::ID = 0; +static RegisterPass X("reassociate", "Reassociate expressions"); + +// Public interface to the Reassociate pass +FunctionPass *llvm::createReassociatePass() { return new Reassociate(); } + +void Reassociate::RemoveDeadBinaryOp(Value *V) { + Instruction *Op = dyn_cast(V); + if (!Op || !isa(Op) || !isa(Op) || !Op->use_empty()) + return; + + Value *LHS = Op->getOperand(0), *RHS = Op->getOperand(1); + RemoveDeadBinaryOp(LHS); + RemoveDeadBinaryOp(RHS); +} + + +static bool isUnmovableInstruction(Instruction *I) { + if (I->getOpcode() == Instruction::PHI || + I->getOpcode() == Instruction::Alloca || + I->getOpcode() == Instruction::Load || + I->getOpcode() == Instruction::Malloc || + I->getOpcode() == Instruction::Invoke || + (I->getOpcode() == Instruction::Call && + !isa(I)) || + I->getOpcode() == Instruction::UDiv || + I->getOpcode() == Instruction::SDiv || + I->getOpcode() == Instruction::FDiv || + I->getOpcode() == Instruction::URem || + I->getOpcode() == Instruction::SRem || + I->getOpcode() == Instruction::FRem) + return true; + return false; +} + +void Reassociate::BuildRankMap(Function &F) { + unsigned i = 2; + + // Assign distinct ranks to function arguments + for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) + ValueRankMap[&*I] = ++i; + + ReversePostOrderTraversal RPOT(&F); + for (ReversePostOrderTraversal::rpo_iterator I = RPOT.begin(), + E = RPOT.end(); I != E; ++I) { + BasicBlock *BB = *I; + unsigned BBRank = RankMap[BB] = ++i << 16; + + // Walk the basic block, adding precomputed ranks for any instructions that + // we cannot move. This ensures that the ranks for these instructions are + // all different in the block. + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) + if (isUnmovableInstruction(I)) + ValueRankMap[&*I] = ++BBRank; + } +} + +unsigned Reassociate::getRank(Value *V) { + if (isa(V)) return ValueRankMap[V]; // Function argument... + + Instruction *I = dyn_cast(V); + if (I == 0) return 0; // Otherwise it's a global or constant, rank 0. + + unsigned &CachedRank = ValueRankMap[I]; + if (CachedRank) return CachedRank; // Rank already known? + + // If this is an expression, return the 1+MAX(rank(LHS), rank(RHS)) so that + // we can reassociate expressions for code motion! Since we do not recurse + // for PHI nodes, we cannot have infinite recursion here, because there + // cannot be loops in the value graph that do not go through PHI nodes. + unsigned Rank = 0, MaxRank = RankMap[I->getParent()]; + for (unsigned i = 0, e = I->getNumOperands(); + i != e && Rank != MaxRank; ++i) + Rank = std::max(Rank, getRank(I->getOperand(i))); + + // If this is a not or neg instruction, do not count it for rank. This + // assures us that X and ~X will have the same rank. + if (!I->getType()->isInteger() || + (!BinaryOperator::isNot(I) && !BinaryOperator::isNeg(I))) + ++Rank; + + //DOUT << "Calculated Rank[" << V->getName() << "] = " + // << Rank << "\n"; + + return CachedRank = Rank; +} + +/// isReassociableOp - Return true if V is an instruction of the specified +/// opcode and if it only has one use. +static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode) { + if ((V->hasOneUse() || V->use_empty()) && isa(V) && + cast(V)->getOpcode() == Opcode) + return cast(V); + return 0; +} + +/// LowerNegateToMultiply - Replace 0-X with X*-1. +/// +static Instruction *LowerNegateToMultiply(Instruction *Neg, + std::map, unsigned> &ValueRankMap) { + Constant *Cst = ConstantInt::getAllOnesValue(Neg->getType()); + + Instruction *Res = BinaryOperator::CreateMul(Neg->getOperand(1), Cst, "",Neg); + ValueRankMap.erase(Neg); + Res->takeName(Neg); + Neg->replaceAllUsesWith(Res); + Neg->eraseFromParent(); + return Res; +} + +// Given an expression of the form '(A+B)+(D+C)', turn it into '(((A+B)+C)+D)'. +// Note that if D is also part of the expression tree that we recurse to +// linearize it as well. Besides that case, this does not recurse into A,B, or +// C. +void Reassociate::LinearizeExpr(BinaryOperator *I) { + BinaryOperator *LHS = cast(I->getOperand(0)); + BinaryOperator *RHS = cast(I->getOperand(1)); + assert(isReassociableOp(LHS, I->getOpcode()) && + isReassociableOp(RHS, I->getOpcode()) && + "Not an expression that needs linearization?"); + + DOUT << "Linear" << *LHS << *RHS << *I; + + // Move the RHS instruction to live immediately before I, avoiding breaking + // dominator properties. + RHS->moveBefore(I); + + // Move operands around to do the linearization. + I->setOperand(1, RHS->getOperand(0)); + RHS->setOperand(0, LHS); + I->setOperand(0, RHS); + + ++NumLinear; + MadeChange = true; + DOUT << "Linearized: " << *I; + + // If D is part of this expression tree, tail recurse. + if (isReassociableOp(I->getOperand(1), I->getOpcode())) + LinearizeExpr(I); +} + + +/// LinearizeExprTree - Given an associative binary expression tree, traverse +/// all of the uses putting it into canonical form. This forces a left-linear +/// form of the the expression (((a+b)+c)+d), and collects information about the +/// rank of the non-tree operands. +/// +/// NOTE: These intentionally destroys the expression tree operands (turning +/// them into undef values) to reduce #uses of the values. This means that the +/// caller MUST use something like RewriteExprTree to put the values back in. +/// +void Reassociate::LinearizeExprTree(BinaryOperator *I, + std::vector &Ops) { + Value *LHS = I->getOperand(0), *RHS = I->getOperand(1); + unsigned Opcode = I->getOpcode(); + + // First step, linearize the expression if it is in ((A+B)+(C+D)) form. + BinaryOperator *LHSBO = isReassociableOp(LHS, Opcode); + BinaryOperator *RHSBO = isReassociableOp(RHS, Opcode); + + // If this is a multiply expression tree and it contains internal negations, + // transform them into multiplies by -1 so they can be reassociated. + if (I->getOpcode() == Instruction::Mul) { + if (!LHSBO && LHS->hasOneUse() && BinaryOperator::isNeg(LHS)) { + LHS = LowerNegateToMultiply(cast(LHS), ValueRankMap); + LHSBO = isReassociableOp(LHS, Opcode); + } + if (!RHSBO && RHS->hasOneUse() && BinaryOperator::isNeg(RHS)) { + RHS = LowerNegateToMultiply(cast(RHS), ValueRankMap); + RHSBO = isReassociableOp(RHS, Opcode); + } + } + + if (!LHSBO) { + if (!RHSBO) { + // Neither the LHS or RHS as part of the tree, thus this is a leaf. As + // such, just remember these operands and their rank. + Ops.push_back(ValueEntry(getRank(LHS), LHS)); + Ops.push_back(ValueEntry(getRank(RHS), RHS)); + + // Clear the leaves out. + I->setOperand(0, UndefValue::get(I->getType())); + I->setOperand(1, UndefValue::get(I->getType())); + return; + } else { + // Turn X+(Y+Z) -> (Y+Z)+X + std::swap(LHSBO, RHSBO); + std::swap(LHS, RHS); + bool Success = !I->swapOperands(); + assert(Success && "swapOperands failed"); + Success = false; + MadeChange = true; + } + } else if (RHSBO) { + // Turn (A+B)+(C+D) -> (((A+B)+C)+D). This guarantees the the RHS is not + // part of the expression tree. + LinearizeExpr(I); + LHS = LHSBO = cast(I->getOperand(0)); + RHS = I->getOperand(1); + RHSBO = 0; + } + + // Okay, now we know that the LHS is a nested expression and that the RHS is + // not. Perform reassociation. + assert(!isReassociableOp(RHS, Opcode) && "LinearizeExpr failed!"); + + // Move LHS right before I to make sure that the tree expression dominates all + // values. + LHSBO->moveBefore(I); + + // Linearize the expression tree on the LHS. + LinearizeExprTree(LHSBO, Ops); + + // Remember the RHS operand and its rank. + Ops.push_back(ValueEntry(getRank(RHS), RHS)); + + // Clear the RHS leaf out. + I->setOperand(1, UndefValue::get(I->getType())); +} + +// RewriteExprTree - Now that the operands for this expression tree are +// linearized and optimized, emit them in-order. This function is written to be +// tail recursive. +void Reassociate::RewriteExprTree(BinaryOperator *I, + std::vector &Ops, + unsigned i) { + if (i+2 == Ops.size()) { + if (I->getOperand(0) != Ops[i].Op || + I->getOperand(1) != Ops[i+1].Op) { + Value *OldLHS = I->getOperand(0); + DOUT << "RA: " << *I; + I->setOperand(0, Ops[i].Op); + I->setOperand(1, Ops[i+1].Op); + DOUT << "TO: " << *I; + MadeChange = true; + ++NumChanged; + + // If we reassociated a tree to fewer operands (e.g. (1+a+2) -> (a+3) + // delete the extra, now dead, nodes. + RemoveDeadBinaryOp(OldLHS); + } + return; + } + assert(i+2 < Ops.size() && "Ops index out of range!"); + + if (I->getOperand(1) != Ops[i].Op) { + DOUT << "RA: " << *I; + I->setOperand(1, Ops[i].Op); + DOUT << "TO: " << *I; + MadeChange = true; + ++NumChanged; + } + + BinaryOperator *LHS = cast(I->getOperand(0)); + assert(LHS->getOpcode() == I->getOpcode() && + "Improper expression tree!"); + + // Compactify the tree instructions together with each other to guarantee + // that the expression tree is dominated by all of Ops. + LHS->moveBefore(I); + RewriteExprTree(LHS, Ops, i+1); +} + + + +// NegateValue - Insert instructions before the instruction pointed to by BI, +// that computes the negative version of the value specified. The negative +// version of the value is returned, and BI is left pointing at the instruction +// that should be processed next by the reassociation pass. +// +static Value *NegateValue(Value *V, Instruction *BI) { + // We are trying to expose opportunity for reassociation. One of the things + // that we want to do to achieve this is to push a negation as deep into an + // expression chain as possible, to expose the add instructions. In practice, + // this means that we turn this: + // X = -(A+12+C+D) into X = -A + -12 + -C + -D = -12 + -A + -C + -D + // so that later, a: Y = 12+X could get reassociated with the -12 to eliminate + // the constants. We assume that instcombine will clean up the mess later if + // we introduce tons of unnecessary negation instructions... + // + if (Instruction *I = dyn_cast(V)) + if (I->getOpcode() == Instruction::Add && I->hasOneUse()) { + // Push the negates through the add. + I->setOperand(0, NegateValue(I->getOperand(0), BI)); + I->setOperand(1, NegateValue(I->getOperand(1), BI)); + + // We must move the add instruction here, because the neg instructions do + // not dominate the old add instruction in general. By moving it, we are + // assured that the neg instructions we just inserted dominate the + // instruction we are about to insert after them. + // + I->moveBefore(BI); + I->setName(I->getName()+".neg"); + return I; + } + + // Insert a 'neg' instruction that subtracts the value from zero to get the + // negation. + // + return BinaryOperator::CreateNeg(V, V->getName() + ".neg", BI); +} + +/// ShouldBreakUpSubtract - Return true if we should break up this subtract of +/// X-Y into (X + -Y). +static bool ShouldBreakUpSubtract(Instruction *Sub) { + // If this is a negation, we can't split it up! + if (BinaryOperator::isNeg(Sub)) + return false; + + // Don't bother to break this up unless either the LHS is an associable add or + // subtract or if this is only used by one. + if (isReassociableOp(Sub->getOperand(0), Instruction::Add) || + isReassociableOp(Sub->getOperand(0), Instruction::Sub)) + return true; + if (isReassociableOp(Sub->getOperand(1), Instruction::Add) || + isReassociableOp(Sub->getOperand(1), Instruction::Sub)) + return true; + if (Sub->hasOneUse() && + (isReassociableOp(Sub->use_back(), Instruction::Add) || + isReassociableOp(Sub->use_back(), Instruction::Sub))) + return true; + + return false; +} + +/// BreakUpSubtract - If we have (X-Y), and if either X is an add, or if this is +/// only used by an add, transform this into (X+(0-Y)) to promote better +/// reassociation. +static Instruction *BreakUpSubtract(Instruction *Sub, + std::map, unsigned> &ValueRankMap) { + // Convert a subtract into an add and a neg instruction... so that sub + // instructions can be commuted with other add instructions... + // + // Calculate the negative value of Operand 1 of the sub instruction... + // and set it as the RHS of the add instruction we just made... + // + Value *NegVal = NegateValue(Sub->getOperand(1), Sub); + Instruction *New = + BinaryOperator::CreateAdd(Sub->getOperand(0), NegVal, "", Sub); + New->takeName(Sub); + + // Everyone now refers to the add instruction. + ValueRankMap.erase(Sub); + Sub->replaceAllUsesWith(New); + Sub->eraseFromParent(); + + DOUT << "Negated: " << *New; + return New; +} + +/// ConvertShiftToMul - If this is a shift of a reassociable multiply or is used +/// by one, change this into a multiply by a constant to assist with further +/// reassociation. +static Instruction *ConvertShiftToMul(Instruction *Shl, + std::map, unsigned> &ValueRankMap) { + // If an operand of this shift is a reassociable multiply, or if the shift + // is used by a reassociable multiply or add, turn into a multiply. + if (isReassociableOp(Shl->getOperand(0), Instruction::Mul) || + (Shl->hasOneUse() && + (isReassociableOp(Shl->use_back(), Instruction::Mul) || + isReassociableOp(Shl->use_back(), Instruction::Add)))) { + Constant *MulCst = ConstantInt::get(Shl->getType(), 1); + MulCst = ConstantExpr::getShl(MulCst, cast(Shl->getOperand(1))); + + Instruction *Mul = BinaryOperator::CreateMul(Shl->getOperand(0), MulCst, + "", Shl); + ValueRankMap.erase(Shl); + Mul->takeName(Shl); + Shl->replaceAllUsesWith(Mul); + Shl->eraseFromParent(); + return Mul; + } + return 0; +} + +// Scan backwards and forwards among values with the same rank as element i to +// see if X exists. If X does not exist, return i. +static unsigned FindInOperandList(std::vector &Ops, unsigned i, + Value *X) { + unsigned XRank = Ops[i].Rank; + unsigned e = Ops.size(); + for (unsigned j = i+1; j != e && Ops[j].Rank == XRank; ++j) + if (Ops[j].Op == X) + return j; + // Scan backwards + for (unsigned j = i-1; j != ~0U && Ops[j].Rank == XRank; --j) + if (Ops[j].Op == X) + return j; + return i; +} + +/// EmitAddTreeOfValues - Emit a tree of add instructions, summing Ops together +/// and returning the result. Insert the tree before I. +static Value *EmitAddTreeOfValues(Instruction *I, std::vector &Ops) { + if (Ops.size() == 1) return Ops.back(); + + Value *V1 = Ops.back(); + Ops.pop_back(); + Value *V2 = EmitAddTreeOfValues(I, Ops); + return BinaryOperator::CreateAdd(V2, V1, "tmp", I); +} + +/// RemoveFactorFromExpression - If V is an expression tree that is a +/// multiplication sequence, and if this sequence contains a multiply by Factor, +/// remove Factor from the tree and return the new tree. +Value *Reassociate::RemoveFactorFromExpression(Value *V, Value *Factor) { + BinaryOperator *BO = isReassociableOp(V, Instruction::Mul); + if (!BO) return 0; + + std::vector Factors; + LinearizeExprTree(BO, Factors); + + bool FoundFactor = false; + for (unsigned i = 0, e = Factors.size(); i != e; ++i) + if (Factors[i].Op == Factor) { + FoundFactor = true; + Factors.erase(Factors.begin()+i); + break; + } + if (!FoundFactor) { + // Make sure to restore the operands to the expression tree. + RewriteExprTree(BO, Factors); + return 0; + } + + if (Factors.size() == 1) return Factors[0].Op; + + RewriteExprTree(BO, Factors); + return BO; +} + +/// FindSingleUseMultiplyFactors - If V is a single-use multiply, recursively +/// add its operands as factors, otherwise add V to the list of factors. +static void FindSingleUseMultiplyFactors(Value *V, + std::vector &Factors) { + BinaryOperator *BO; + if ((!V->hasOneUse() && !V->use_empty()) || + !(BO = dyn_cast(V)) || + BO->getOpcode() != Instruction::Mul) { + Factors.push_back(V); + return; + } + + // Otherwise, add the LHS and RHS to the list of factors. + FindSingleUseMultiplyFactors(BO->getOperand(1), Factors); + FindSingleUseMultiplyFactors(BO->getOperand(0), Factors); +} + + + +Value *Reassociate::OptimizeExpression(BinaryOperator *I, + std::vector &Ops) { + // Now that we have the linearized expression tree, try to optimize it. + // Start by folding any constants that we found. + bool IterateOptimization = false; + if (Ops.size() == 1) return Ops[0].Op; + + unsigned Opcode = I->getOpcode(); + + if (Constant *V1 = dyn_cast(Ops[Ops.size()-2].Op)) + if (Constant *V2 = dyn_cast(Ops.back().Op)) { + Ops.pop_back(); + Ops.back().Op = ConstantExpr::get(Opcode, V1, V2); + return OptimizeExpression(I, Ops); + } + + // Check for destructive annihilation due to a constant being used. + if (ConstantInt *CstVal = dyn_cast(Ops.back().Op)) + switch (Opcode) { + default: break; + case Instruction::And: + if (CstVal->isZero()) { // ... & 0 -> 0 + ++NumAnnihil; + return CstVal; + } else if (CstVal->isAllOnesValue()) { // ... & -1 -> ... + Ops.pop_back(); + } + break; + case Instruction::Mul: + if (CstVal->isZero()) { // ... * 0 -> 0 + ++NumAnnihil; + return CstVal; + } else if (cast(CstVal)->isOne()) { + Ops.pop_back(); // ... * 1 -> ... + } + break; + case Instruction::Or: + if (CstVal->isAllOnesValue()) { // ... | -1 -> -1 + ++NumAnnihil; + return CstVal; + } + // FALLTHROUGH! + case Instruction::Add: + case Instruction::Xor: + if (CstVal->isZero()) // ... [|^+] 0 -> ... + Ops.pop_back(); + break; + } + if (Ops.size() == 1) return Ops[0].Op; + + // Handle destructive annihilation do to identities between elements in the + // argument list here. + switch (Opcode) { + default: break; + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + // Scan the operand lists looking for X and ~X pairs, along with X,X pairs. + // If we find any, we can simplify the expression. X&~X == 0, X|~X == -1. + for (unsigned i = 0, e = Ops.size(); i != e; ++i) { + // First, check for X and ~X in the operand list. + assert(i < Ops.size()); + if (BinaryOperator::isNot(Ops[i].Op)) { // Cannot occur for ^. + Value *X = BinaryOperator::getNotArgument(Ops[i].Op); + unsigned FoundX = FindInOperandList(Ops, i, X); + if (FoundX != i) { + if (Opcode == Instruction::And) { // ...&X&~X = 0 + ++NumAnnihil; + return Constant::getNullValue(X->getType()); + } else if (Opcode == Instruction::Or) { // ...|X|~X = -1 + ++NumAnnihil; + return ConstantInt::getAllOnesValue(X->getType()); + } + } + } + + // Next, check for duplicate pairs of values, which we assume are next to + // each other, due to our sorting criteria. + assert(i < Ops.size()); + if (i+1 != Ops.size() && Ops[i+1].Op == Ops[i].Op) { + if (Opcode == Instruction::And || Opcode == Instruction::Or) { + // Drop duplicate values. + Ops.erase(Ops.begin()+i); + --i; --e; + IterateOptimization = true; + ++NumAnnihil; + } else { + assert(Opcode == Instruction::Xor); + if (e == 2) { + ++NumAnnihil; + return Constant::getNullValue(Ops[0].Op->getType()); + } + // ... X^X -> ... + Ops.erase(Ops.begin()+i, Ops.begin()+i+2); + i -= 1; e -= 2; + IterateOptimization = true; + ++NumAnnihil; + } + } + } + break; + + case Instruction::Add: + // Scan the operand lists looking for X and -X pairs. If we find any, we + // can simplify the expression. X+-X == 0. + for (unsigned i = 0, e = Ops.size(); i != e; ++i) { + assert(i < Ops.size()); + // Check for X and -X in the operand list. + if (BinaryOperator::isNeg(Ops[i].Op)) { + Value *X = BinaryOperator::getNegArgument(Ops[i].Op); + unsigned FoundX = FindInOperandList(Ops, i, X); + if (FoundX != i) { + // Remove X and -X from the operand list. + if (Ops.size() == 2) { + ++NumAnnihil; + return Constant::getNullValue(X->getType()); + } else { + Ops.erase(Ops.begin()+i); + if (i < FoundX) + --FoundX; + else + --i; // Need to back up an extra one. + Ops.erase(Ops.begin()+FoundX); + IterateOptimization = true; + ++NumAnnihil; + --i; // Revisit element. + e -= 2; // Removed two elements. + } + } + } + } + + + // Scan the operand list, checking to see if there are any common factors + // between operands. Consider something like A*A+A*B*C+D. We would like to + // reassociate this to A*(A+B*C)+D, which reduces the number of multiplies. + // To efficiently find this, we count the number of times a factor occurs + // for any ADD operands that are MULs. + std::map FactorOccurrences; + unsigned MaxOcc = 0; + Value *MaxOccVal = 0; + for (unsigned i = 0, e = Ops.size(); i != e; ++i) { + if (BinaryOperator *BOp = dyn_cast(Ops[i].Op)) { + if (BOp->getOpcode() == Instruction::Mul && BOp->use_empty()) { + // Compute all of the factors of this added value. + std::vector Factors; + FindSingleUseMultiplyFactors(BOp, Factors); + assert(Factors.size() > 1 && "Bad linearize!"); + + // Add one to FactorOccurrences for each unique factor in this op. + if (Factors.size() == 2) { + unsigned Occ = ++FactorOccurrences[Factors[0]]; + if (Occ > MaxOcc) { MaxOcc = Occ; MaxOccVal = Factors[0]; } + if (Factors[0] != Factors[1]) { // Don't double count A*A. + Occ = ++FactorOccurrences[Factors[1]]; + if (Occ > MaxOcc) { MaxOcc = Occ; MaxOccVal = Factors[1]; } + } + } else { + std::set Duplicates; + for (unsigned i = 0, e = Factors.size(); i != e; ++i) { + if (Duplicates.insert(Factors[i]).second) { + unsigned Occ = ++FactorOccurrences[Factors[i]]; + if (Occ > MaxOcc) { MaxOcc = Occ; MaxOccVal = Factors[i]; } + } + } + } + } + } + } + + // If any factor occurred more than one time, we can pull it out. + if (MaxOcc > 1) { + DOUT << "\nFACTORING [" << MaxOcc << "]: " << *MaxOccVal << "\n"; + + // Create a new instruction that uses the MaxOccVal twice. If we don't do + // this, we could otherwise run into situations where removing a factor + // from an expression will drop a use of maxocc, and this can cause + // RemoveFactorFromExpression on successive values to behave differently. + Instruction *DummyInst = BinaryOperator::CreateAdd(MaxOccVal, MaxOccVal); + std::vector NewMulOps; + for (unsigned i = 0, e = Ops.size(); i != e; ++i) { + if (Value *V = RemoveFactorFromExpression(Ops[i].Op, MaxOccVal)) { + NewMulOps.push_back(V); + Ops.erase(Ops.begin()+i); + --i; --e; + } + } + + // No need for extra uses anymore. + delete DummyInst; + + unsigned NumAddedValues = NewMulOps.size(); + Value *V = EmitAddTreeOfValues(I, NewMulOps); + Value *V2 = BinaryOperator::CreateMul(V, MaxOccVal, "tmp", I); + + // Now that we have inserted V and its sole use, optimize it. This allows + // us to handle cases that require multiple factoring steps, such as this: + // A*A*B + A*A*C --> A*(A*B+A*C) --> A*(A*(B+C)) + if (NumAddedValues > 1) + ReassociateExpression(cast(V)); + + ++NumFactor; + + if (Ops.empty()) + return V2; + + // Add the new value to the list of things being added. + Ops.insert(Ops.begin(), ValueEntry(getRank(V2), V2)); + + // Rewrite the tree so that there is now a use of V. + RewriteExprTree(I, Ops); + return OptimizeExpression(I, Ops); + } + break; + //case Instruction::Mul: + } + + if (IterateOptimization) + return OptimizeExpression(I, Ops); + return 0; +} + + +/// ReassociateBB - Inspect all of the instructions in this basic block, +/// reassociating them as we go. +void Reassociate::ReassociateBB(BasicBlock *BB) { + for (BasicBlock::iterator BBI = BB->begin(); BBI != BB->end(); ) { + Instruction *BI = BBI++; + if (BI->getOpcode() == Instruction::Shl && + isa(BI->getOperand(1))) + if (Instruction *NI = ConvertShiftToMul(BI, ValueRankMap)) { + MadeChange = true; + BI = NI; + } + + // Reject cases where it is pointless to do this. + if (!isa(BI) || BI->getType()->isFloatingPoint() || + isa(BI->getType())) + continue; // Floating point ops are not associative. + + // If this is a subtract instruction which is not already in negate form, + // see if we can convert it to X+-Y. + if (BI->getOpcode() == Instruction::Sub) { + if (ShouldBreakUpSubtract(BI)) { + BI = BreakUpSubtract(BI, ValueRankMap); + MadeChange = true; + } else if (BinaryOperator::isNeg(BI)) { + // Otherwise, this is a negation. See if the operand is a multiply tree + // and if this is not an inner node of a multiply tree. + if (isReassociableOp(BI->getOperand(1), Instruction::Mul) && + (!BI->hasOneUse() || + !isReassociableOp(BI->use_back(), Instruction::Mul))) { + BI = LowerNegateToMultiply(BI, ValueRankMap); + MadeChange = true; + } + } + } + + // If this instruction is a commutative binary operator, process it. + if (!BI->isAssociative()) continue; + BinaryOperator *I = cast(BI); + + // If this is an interior node of a reassociable tree, ignore it until we + // get to the root of the tree, to avoid N^2 analysis. + if (I->hasOneUse() && isReassociableOp(I->use_back(), I->getOpcode())) + continue; + + // If this is an add tree that is used by a sub instruction, ignore it + // until we process the subtract. + if (I->hasOneUse() && I->getOpcode() == Instruction::Add && + cast(I->use_back())->getOpcode() == Instruction::Sub) + continue; + + ReassociateExpression(I); + } +} + +void Reassociate::ReassociateExpression(BinaryOperator *I) { + + // First, walk the expression tree, linearizing the tree, collecting + std::vector Ops; + LinearizeExprTree(I, Ops); + + DOUT << "RAIn:\t"; DEBUG(PrintOps(I, Ops)); DOUT << "\n"; + + // Now that we have linearized the tree to a list and have gathered all of + // the operands and their ranks, sort the operands by their rank. Use a + // stable_sort so that values with equal ranks will have their relative + // positions maintained (and so the compiler is deterministic). Note that + // this sorts so that the highest ranking values end up at the beginning of + // the vector. + std::stable_sort(Ops.begin(), Ops.end()); + + // OptimizeExpression - Now that we have the expression tree in a convenient + // sorted form, optimize it globally if possible. + if (Value *V = OptimizeExpression(I, Ops)) { + // This expression tree simplified to something that isn't a tree, + // eliminate it. + DOUT << "Reassoc to scalar: " << *V << "\n"; + I->replaceAllUsesWith(V); + RemoveDeadBinaryOp(I); + return; + } + + // We want to sink immediates as deeply as possible except in the case where + // this is a multiply tree used only by an add, and the immediate is a -1. + // In this case we reassociate to put the negation on the outside so that we + // can fold the negation into the add: (-X)*Y + Z -> Z-X*Y + if (I->getOpcode() == Instruction::Mul && I->hasOneUse() && + cast(I->use_back())->getOpcode() == Instruction::Add && + isa(Ops.back().Op) && + cast(Ops.back().Op)->isAllOnesValue()) { + Ops.insert(Ops.begin(), Ops.back()); + Ops.pop_back(); + } + + DOUT << "RAOut:\t"; DEBUG(PrintOps(I, Ops)); DOUT << "\n"; + + if (Ops.size() == 1) { + // This expression tree simplified to something that isn't a tree, + // eliminate it. + I->replaceAllUsesWith(Ops[0].Op); + RemoveDeadBinaryOp(I); + } else { + // Now that we ordered and optimized the expressions, splat them back into + // the expression tree, removing any unneeded nodes. + RewriteExprTree(I, Ops); + } +} + + +bool Reassociate::runOnFunction(Function &F) { + // Recalculate the rank map for F + BuildRankMap(F); + + MadeChange = false; + for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) + ReassociateBB(FI); + + // We are done with the rank map... + RankMap.clear(); + ValueRankMap.clear(); + return MadeChange; +} + diff --git a/lib/Transforms/Scalar/Reg2Mem.cpp b/lib/Transforms/Scalar/Reg2Mem.cpp new file mode 100644 index 000000000000..46b2952b4cc5 --- /dev/null +++ b/lib/Transforms/Scalar/Reg2Mem.cpp @@ -0,0 +1,125 @@ +//===- Reg2Mem.cpp - Convert registers to allocas -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file demotes all registers to memory references. It is intented to be +// the inverse of PromoteMemoryToRegister. By converting to loads, the only +// values live accross basic blocks are allocas and loads before phi nodes. +// It is intended that this should make CFG hacking much easier. +// To make later hacking easier, the entry block is split into two, such that +// all introduced allocas and nothing else are in the entry block. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "reg2mem" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Pass.h" +#include "llvm/Function.h" +#include "llvm/Module.h" +#include "llvm/BasicBlock.h" +#include "llvm/Instructions.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/CFG.h" +#include +using namespace llvm; + +STATISTIC(NumRegsDemoted, "Number of registers demoted"); +STATISTIC(NumPhisDemoted, "Number of phi-nodes demoted"); + +namespace { + struct VISIBILITY_HIDDEN RegToMem : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + RegToMem() : FunctionPass(&ID) {} + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequiredID(BreakCriticalEdgesID); + AU.addPreservedID(BreakCriticalEdgesID); + } + + bool valueEscapes(Instruction* i) { + BasicBlock* bb = i->getParent(); + for (Value::use_iterator ii = i->use_begin(), ie = i->use_end(); + ii != ie; ++ii) + if (cast(*ii)->getParent() != bb || + isa(*ii)) + return true; + return false; + } + + virtual bool runOnFunction(Function &F) { + if (!F.isDeclaration()) { + // Insert all new allocas into entry block. + BasicBlock* BBEntry = &F.getEntryBlock(); + assert(pred_begin(BBEntry) == pred_end(BBEntry) && + "Entry block to function must not have predecessors!"); + + // Find first non-alloca instruction and create insertion point. This is + // safe if block is well-formed: it always have terminator, otherwise + // we'll get and assertion. + BasicBlock::iterator I = BBEntry->begin(); + while (isa(I)) ++I; + + CastInst *AllocaInsertionPoint = + CastInst::Create(Instruction::BitCast, + Constant::getNullValue(Type::Int32Ty), Type::Int32Ty, + "reg2mem alloca point", I); + + // Find the escaped instructions. But don't create stack slots for + // allocas in entry block. + std::list worklist; + for (Function::iterator ibb = F.begin(), ibe = F.end(); + ibb != ibe; ++ibb) + for (BasicBlock::iterator iib = ibb->begin(), iie = ibb->end(); + iib != iie; ++iib) { + if (!(isa(iib) && iib->getParent() == BBEntry) && + valueEscapes(iib)) { + worklist.push_front(&*iib); + } + } + + // Demote escaped instructions + NumRegsDemoted += worklist.size(); + for (std::list::iterator ilb = worklist.begin(), + ile = worklist.end(); ilb != ile; ++ilb) + DemoteRegToStack(**ilb, false, AllocaInsertionPoint); + + worklist.clear(); + + // Find all phi's + for (Function::iterator ibb = F.begin(), ibe = F.end(); + ibb != ibe; ++ibb) + for (BasicBlock::iterator iib = ibb->begin(), iie = ibb->end(); + iib != iie; ++iib) + if (isa(iib)) + worklist.push_front(&*iib); + + // Demote phi nodes + NumPhisDemoted += worklist.size(); + for (std::list::iterator ilb = worklist.begin(), + ile = worklist.end(); ilb != ile; ++ilb) + DemotePHIToStack(cast(*ilb), AllocaInsertionPoint); + + return true; + } + return false; + } + }; +} + +char RegToMem::ID = 0; +static RegisterPass +X("reg2mem", "Demote all values to stack slots"); + +// createDemoteRegisterToMemory - Provide an entry point to create this pass. +// +const PassInfo *const llvm::DemoteRegisterToMemoryID = &X; +FunctionPass *llvm::createDemoteRegisterToMemoryPass() { + return new RegToMem(); +} diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp new file mode 100644 index 000000000000..d73519c04e35 --- /dev/null +++ b/lib/Transforms/Scalar/SCCP.cpp @@ -0,0 +1,1855 @@ +//===- SCCP.cpp - Sparse Conditional Constant Propagation -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements sparse conditional constant propagation and merging: +// +// Specifically, this: +// * Assumes values are constant unless proven otherwise +// * Assumes BasicBlocks are dead unless proven otherwise +// * Proves values to be constant, and replaces them with constants +// * Proves conditional branches to be unconditional +// +// Notice that: +// * This pass has a habit of making definitions be dead. It is a good idea +// to to run a DCE pass sometime after running this pass. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "sccp" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/IPO.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Instructions.h" +#include "llvm/Pass.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/InstVisitor.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +#include +#include +using namespace llvm; + +STATISTIC(NumInstRemoved, "Number of instructions removed"); +STATISTIC(NumDeadBlocks , "Number of basic blocks unreachable"); + +STATISTIC(IPNumInstRemoved, "Number of instructions removed by IPSCCP"); +STATISTIC(IPNumDeadBlocks , "Number of basic blocks unreachable by IPSCCP"); +STATISTIC(IPNumArgsElimed ,"Number of arguments constant propagated by IPSCCP"); +STATISTIC(IPNumGlobalConst, "Number of globals found to be constant by IPSCCP"); + +namespace { +/// LatticeVal class - This class represents the different lattice values that +/// an LLVM value may occupy. It is a simple class with value semantics. +/// +class VISIBILITY_HIDDEN LatticeVal { + enum { + /// undefined - This LLVM Value has no known value yet. + undefined, + + /// constant - This LLVM Value has a specific constant value. + constant, + + /// forcedconstant - This LLVM Value was thought to be undef until + /// ResolvedUndefsIn. This is treated just like 'constant', but if merged + /// with another (different) constant, it goes to overdefined, instead of + /// asserting. + forcedconstant, + + /// overdefined - This instruction is not known to be constant, and we know + /// it has a value. + overdefined + } LatticeValue; // The current lattice position + + Constant *ConstantVal; // If Constant value, the current value +public: + inline LatticeVal() : LatticeValue(undefined), ConstantVal(0) {} + + // markOverdefined - Return true if this is a new status to be in... + inline bool markOverdefined() { + if (LatticeValue != overdefined) { + LatticeValue = overdefined; + return true; + } + return false; + } + + // markConstant - Return true if this is a new status for us. + inline bool markConstant(Constant *V) { + if (LatticeValue != constant) { + if (LatticeValue == undefined) { + LatticeValue = constant; + assert(V && "Marking constant with NULL"); + ConstantVal = V; + } else { + assert(LatticeValue == forcedconstant && + "Cannot move from overdefined to constant!"); + // Stay at forcedconstant if the constant is the same. + if (V == ConstantVal) return false; + + // Otherwise, we go to overdefined. Assumptions made based on the + // forced value are possibly wrong. Assuming this is another constant + // could expose a contradiction. + LatticeValue = overdefined; + } + return true; + } else { + assert(ConstantVal == V && "Marking constant with different value"); + } + return false; + } + + inline void markForcedConstant(Constant *V) { + assert(LatticeValue == undefined && "Can't force a defined value!"); + LatticeValue = forcedconstant; + ConstantVal = V; + } + + inline bool isUndefined() const { return LatticeValue == undefined; } + inline bool isConstant() const { + return LatticeValue == constant || LatticeValue == forcedconstant; + } + inline bool isOverdefined() const { return LatticeValue == overdefined; } + + inline Constant *getConstant() const { + assert(isConstant() && "Cannot get the constant of a non-constant!"); + return ConstantVal; + } +}; + +//===----------------------------------------------------------------------===// +// +/// SCCPSolver - This class is a general purpose solver for Sparse Conditional +/// Constant Propagation. +/// +class SCCPSolver : public InstVisitor { + DenseSet BBExecutable;// The basic blocks that are executable + std::map ValueState; // The state each value is in. + + /// GlobalValue - If we are tracking any values for the contents of a global + /// variable, we keep a mapping from the constant accessor to the element of + /// the global, to the currently known value. If the value becomes + /// overdefined, it's entry is simply removed from this map. + DenseMap TrackedGlobals; + + /// TrackedRetVals - If we are tracking arguments into and the return + /// value out of a function, it will have an entry in this map, indicating + /// what the known return value for the function is. + DenseMap TrackedRetVals; + + /// TrackedMultipleRetVals - Same as TrackedRetVals, but used for functions + /// that return multiple values. + DenseMap, LatticeVal> TrackedMultipleRetVals; + + // The reason for two worklists is that overdefined is the lowest state + // on the lattice, and moving things to overdefined as fast as possible + // makes SCCP converge much faster. + // By having a separate worklist, we accomplish this because everything + // possibly overdefined will become overdefined at the soonest possible + // point. + SmallVector OverdefinedInstWorkList; + SmallVector InstWorkList; + + + SmallVector BBWorkList; // The BasicBlock work list + + /// UsersOfOverdefinedPHIs - Keep track of any users of PHI nodes that are not + /// overdefined, despite the fact that the PHI node is overdefined. + std::multimap UsersOfOverdefinedPHIs; + + /// KnownFeasibleEdges - Entries in this set are edges which have already had + /// PHI nodes retriggered. + typedef std::pair Edge; + DenseSet KnownFeasibleEdges; +public: + + /// MarkBlockExecutable - This method can be used by clients to mark all of + /// the blocks that are known to be intrinsically live in the processed unit. + void MarkBlockExecutable(BasicBlock *BB) { + DOUT << "Marking Block Executable: " << BB->getNameStart() << "\n"; + BBExecutable.insert(BB); // Basic block is executable! + BBWorkList.push_back(BB); // Add the block to the work list! + } + + /// TrackValueOfGlobalVariable - Clients can use this method to + /// inform the SCCPSolver that it should track loads and stores to the + /// specified global variable if it can. This is only legal to call if + /// performing Interprocedural SCCP. + void TrackValueOfGlobalVariable(GlobalVariable *GV) { + const Type *ElTy = GV->getType()->getElementType(); + if (ElTy->isFirstClassType()) { + LatticeVal &IV = TrackedGlobals[GV]; + if (!isa(GV->getInitializer())) + IV.markConstant(GV->getInitializer()); + } + } + + /// AddTrackedFunction - If the SCCP solver is supposed to track calls into + /// and out of the specified function (which cannot have its address taken), + /// this method must be called. + void AddTrackedFunction(Function *F) { + assert(F->hasLocalLinkage() && "Can only track internal functions!"); + // Add an entry, F -> undef. + if (const StructType *STy = dyn_cast(F->getReturnType())) { + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) + TrackedMultipleRetVals.insert(std::make_pair(std::make_pair(F, i), + LatticeVal())); + } else + TrackedRetVals.insert(std::make_pair(F, LatticeVal())); + } + + /// Solve - Solve for constants and executable blocks. + /// + void Solve(); + + /// ResolvedUndefsIn - While solving the dataflow for a function, we assume + /// that branches on undef values cannot reach any of their successors. + /// However, this is not a safe assumption. After we solve dataflow, this + /// method should be use to handle this. If this returns true, the solver + /// should be rerun. + bool ResolvedUndefsIn(Function &F); + + bool isBlockExecutable(BasicBlock *BB) const { + return BBExecutable.count(BB); + } + + /// getValueMapping - Once we have solved for constants, return the mapping of + /// LLVM values to LatticeVals. + std::map &getValueMapping() { + return ValueState; + } + + /// getTrackedRetVals - Get the inferred return value map. + /// + const DenseMap &getTrackedRetVals() { + return TrackedRetVals; + } + + /// getTrackedGlobals - Get and return the set of inferred initializers for + /// global variables. + const DenseMap &getTrackedGlobals() { + return TrackedGlobals; + } + + inline void markOverdefined(Value *V) { + markOverdefined(ValueState[V], V); + } + +private: + // markConstant - Make a value be marked as "constant". If the value + // is not already a constant, add it to the instruction work list so that + // the users of the instruction are updated later. + // + inline void markConstant(LatticeVal &IV, Value *V, Constant *C) { + if (IV.markConstant(C)) { + DOUT << "markConstant: " << *C << ": " << *V; + InstWorkList.push_back(V); + } + } + + inline void markForcedConstant(LatticeVal &IV, Value *V, Constant *C) { + IV.markForcedConstant(C); + DOUT << "markForcedConstant: " << *C << ": " << *V; + InstWorkList.push_back(V); + } + + inline void markConstant(Value *V, Constant *C) { + markConstant(ValueState[V], V, C); + } + + // markOverdefined - Make a value be marked as "overdefined". If the + // value is not already overdefined, add it to the overdefined instruction + // work list so that the users of the instruction are updated later. + inline void markOverdefined(LatticeVal &IV, Value *V) { + if (IV.markOverdefined()) { + DEBUG(DOUT << "markOverdefined: "; + if (Function *F = dyn_cast(V)) + DOUT << "Function '" << F->getName() << "'\n"; + else + DOUT << *V); + // Only instructions go on the work list + OverdefinedInstWorkList.push_back(V); + } + } + + inline void mergeInValue(LatticeVal &IV, Value *V, LatticeVal &MergeWithV) { + if (IV.isOverdefined() || MergeWithV.isUndefined()) + return; // Noop. + if (MergeWithV.isOverdefined()) + markOverdefined(IV, V); + else if (IV.isUndefined()) + markConstant(IV, V, MergeWithV.getConstant()); + else if (IV.getConstant() != MergeWithV.getConstant()) + markOverdefined(IV, V); + } + + inline void mergeInValue(Value *V, LatticeVal &MergeWithV) { + return mergeInValue(ValueState[V], V, MergeWithV); + } + + + // getValueState - Return the LatticeVal object that corresponds to the value. + // This function is necessary because not all values should start out in the + // underdefined state... Argument's should be overdefined, and + // constants should be marked as constants. If a value is not known to be an + // Instruction object, then use this accessor to get its value from the map. + // + inline LatticeVal &getValueState(Value *V) { + std::map::iterator I = ValueState.find(V); + if (I != ValueState.end()) return I->second; // Common case, in the map + + if (Constant *C = dyn_cast(V)) { + if (isa(V)) { + // Nothing to do, remain undefined. + } else { + LatticeVal &LV = ValueState[C]; + LV.markConstant(C); // Constants are constant + return LV; + } + } + // All others are underdefined by default... + return ValueState[V]; + } + + // markEdgeExecutable - Mark a basic block as executable, adding it to the BB + // work list if it is not already executable... + // + void markEdgeExecutable(BasicBlock *Source, BasicBlock *Dest) { + if (!KnownFeasibleEdges.insert(Edge(Source, Dest)).second) + return; // This edge is already known to be executable! + + if (BBExecutable.count(Dest)) { + DOUT << "Marking Edge Executable: " << Source->getNameStart() + << " -> " << Dest->getNameStart() << "\n"; + + // The destination is already executable, but we just made an edge + // feasible that wasn't before. Revisit the PHI nodes in the block + // because they have potentially new operands. + for (BasicBlock::iterator I = Dest->begin(); isa(I); ++I) + visitPHINode(*cast(I)); + + } else { + MarkBlockExecutable(Dest); + } + } + + // getFeasibleSuccessors - Return a vector of booleans to indicate which + // successors are reachable from a given terminator instruction. + // + void getFeasibleSuccessors(TerminatorInst &TI, SmallVector &Succs); + + // isEdgeFeasible - Return true if the control flow edge from the 'From' basic + // block to the 'To' basic block is currently feasible... + // + bool isEdgeFeasible(BasicBlock *From, BasicBlock *To); + + // OperandChangedState - This method is invoked on all of the users of an + // instruction that was just changed state somehow.... Based on this + // information, we need to update the specified user of this instruction. + // + void OperandChangedState(User *U) { + // Only instructions use other variable values! + Instruction &I = cast(*U); + if (BBExecutable.count(I.getParent())) // Inst is executable? + visit(I); + } + +private: + friend class InstVisitor; + + // visit implementations - Something changed in this instruction... Either an + // operand made a transition, or the instruction is newly executable. Change + // the value type of I to reflect these changes if appropriate. + // + void visitPHINode(PHINode &I); + + // Terminators + void visitReturnInst(ReturnInst &I); + void visitTerminatorInst(TerminatorInst &TI); + + void visitCastInst(CastInst &I); + void visitSelectInst(SelectInst &I); + void visitBinaryOperator(Instruction &I); + void visitCmpInst(CmpInst &I); + void visitExtractElementInst(ExtractElementInst &I); + void visitInsertElementInst(InsertElementInst &I); + void visitShuffleVectorInst(ShuffleVectorInst &I); + void visitExtractValueInst(ExtractValueInst &EVI); + void visitInsertValueInst(InsertValueInst &IVI); + + // Instructions that cannot be folded away... + void visitStoreInst (Instruction &I); + void visitLoadInst (LoadInst &I); + void visitGetElementPtrInst(GetElementPtrInst &I); + void visitCallInst (CallInst &I) { visitCallSite(CallSite::get(&I)); } + void visitInvokeInst (InvokeInst &II) { + visitCallSite(CallSite::get(&II)); + visitTerminatorInst(II); + } + void visitCallSite (CallSite CS); + void visitUnwindInst (TerminatorInst &I) { /*returns void*/ } + void visitUnreachableInst(TerminatorInst &I) { /*returns void*/ } + void visitAllocationInst(Instruction &I) { markOverdefined(&I); } + void visitVANextInst (Instruction &I) { markOverdefined(&I); } + void visitVAArgInst (Instruction &I) { markOverdefined(&I); } + void visitFreeInst (Instruction &I) { /*returns void*/ } + + void visitInstruction(Instruction &I) { + // If a new instruction is added to LLVM that we don't handle... + cerr << "SCCP: Don't know how to handle: " << I; + markOverdefined(&I); // Just in case + } +}; + +} // end anonymous namespace + + +// getFeasibleSuccessors - Return a vector of booleans to indicate which +// successors are reachable from a given terminator instruction. +// +void SCCPSolver::getFeasibleSuccessors(TerminatorInst &TI, + SmallVector &Succs) { + Succs.resize(TI.getNumSuccessors()); + if (BranchInst *BI = dyn_cast(&TI)) { + if (BI->isUnconditional()) { + Succs[0] = true; + } else { + LatticeVal &BCValue = getValueState(BI->getCondition()); + if (BCValue.isOverdefined() || + (BCValue.isConstant() && !isa(BCValue.getConstant()))) { + // Overdefined condition variables, and branches on unfoldable constant + // conditions, mean the branch could go either way. + Succs[0] = Succs[1] = true; + } else if (BCValue.isConstant()) { + // Constant condition variables mean the branch can only go a single way + Succs[BCValue.getConstant() == ConstantInt::getFalse()] = true; + } + } + } else if (isa(&TI)) { + // Invoke instructions successors are always executable. + Succs[0] = Succs[1] = true; + } else if (SwitchInst *SI = dyn_cast(&TI)) { + LatticeVal &SCValue = getValueState(SI->getCondition()); + if (SCValue.isOverdefined() || // Overdefined condition? + (SCValue.isConstant() && !isa(SCValue.getConstant()))) { + // All destinations are executable! + Succs.assign(TI.getNumSuccessors(), true); + } else if (SCValue.isConstant()) + Succs[SI->findCaseValue(cast(SCValue.getConstant()))] = true; + } else { + assert(0 && "SCCP: Don't know how to handle this terminator!"); + } +} + + +// isEdgeFeasible - Return true if the control flow edge from the 'From' basic +// block to the 'To' basic block is currently feasible... +// +bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) { + assert(BBExecutable.count(To) && "Dest should always be alive!"); + + // Make sure the source basic block is executable!! + if (!BBExecutable.count(From)) return false; + + // Check to make sure this edge itself is actually feasible now... + TerminatorInst *TI = From->getTerminator(); + if (BranchInst *BI = dyn_cast(TI)) { + if (BI->isUnconditional()) + return true; + else { + LatticeVal &BCValue = getValueState(BI->getCondition()); + if (BCValue.isOverdefined()) { + // Overdefined condition variables mean the branch could go either way. + return true; + } else if (BCValue.isConstant()) { + // Not branching on an evaluatable constant? + if (!isa(BCValue.getConstant())) return true; + + // Constant condition variables mean the branch can only go a single way + return BI->getSuccessor(BCValue.getConstant() == + ConstantInt::getFalse()) == To; + } + return false; + } + } else if (isa(TI)) { + // Invoke instructions successors are always executable. + return true; + } else if (SwitchInst *SI = dyn_cast(TI)) { + LatticeVal &SCValue = getValueState(SI->getCondition()); + if (SCValue.isOverdefined()) { // Overdefined condition? + // All destinations are executable! + return true; + } else if (SCValue.isConstant()) { + Constant *CPV = SCValue.getConstant(); + if (!isa(CPV)) + return true; // not a foldable constant? + + // Make sure to skip the "default value" which isn't a value + for (unsigned i = 1, E = SI->getNumSuccessors(); i != E; ++i) + if (SI->getSuccessorValue(i) == CPV) // Found the taken branch... + return SI->getSuccessor(i) == To; + + // Constant value not equal to any of the branches... must execute + // default branch then... + return SI->getDefaultDest() == To; + } + return false; + } else { + cerr << "Unknown terminator instruction: " << *TI; + abort(); + } +} + +// visit Implementations - Something changed in this instruction... Either an +// operand made a transition, or the instruction is newly executable. Change +// the value type of I to reflect these changes if appropriate. This method +// makes sure to do the following actions: +// +// 1. If a phi node merges two constants in, and has conflicting value coming +// from different branches, or if the PHI node merges in an overdefined +// value, then the PHI node becomes overdefined. +// 2. If a phi node merges only constants in, and they all agree on value, the +// PHI node becomes a constant value equal to that. +// 3. If V <- x (op) y && isConstant(x) && isConstant(y) V = Constant +// 4. If V <- x (op) y && (isOverdefined(x) || isOverdefined(y)) V = Overdefined +// 5. If V <- MEM or V <- CALL or V <- (unknown) then V = Overdefined +// 6. If a conditional branch has a value that is constant, make the selected +// destination executable +// 7. If a conditional branch has a value that is overdefined, make all +// successors executable. +// +void SCCPSolver::visitPHINode(PHINode &PN) { + LatticeVal &PNIV = getValueState(&PN); + if (PNIV.isOverdefined()) { + // There may be instructions using this PHI node that are not overdefined + // themselves. If so, make sure that they know that the PHI node operand + // changed. + std::multimap::iterator I, E; + tie(I, E) = UsersOfOverdefinedPHIs.equal_range(&PN); + if (I != E) { + SmallVector Users; + for (; I != E; ++I) Users.push_back(I->second); + while (!Users.empty()) { + visit(Users.back()); + Users.pop_back(); + } + } + return; // Quick exit + } + + // Super-extra-high-degree PHI nodes are unlikely to ever be marked constant, + // and slow us down a lot. Just mark them overdefined. + if (PN.getNumIncomingValues() > 64) { + markOverdefined(PNIV, &PN); + return; + } + + // Look at all of the executable operands of the PHI node. If any of them + // are overdefined, the PHI becomes overdefined as well. If they are all + // constant, and they agree with each other, the PHI becomes the identical + // constant. If they are constant and don't agree, the PHI is overdefined. + // If there are no executable operands, the PHI remains undefined. + // + Constant *OperandVal = 0; + for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) { + LatticeVal &IV = getValueState(PN.getIncomingValue(i)); + if (IV.isUndefined()) continue; // Doesn't influence PHI node. + + if (isEdgeFeasible(PN.getIncomingBlock(i), PN.getParent())) { + if (IV.isOverdefined()) { // PHI node becomes overdefined! + markOverdefined(&PN); + return; + } + + if (OperandVal == 0) { // Grab the first value... + OperandVal = IV.getConstant(); + } else { // Another value is being merged in! + // There is already a reachable operand. If we conflict with it, + // then the PHI node becomes overdefined. If we agree with it, we + // can continue on. + + // Check to see if there are two different constants merging... + if (IV.getConstant() != OperandVal) { + // Yes there is. This means the PHI node is not constant. + // You must be overdefined poor PHI. + // + markOverdefined(&PN); // The PHI node now becomes overdefined + return; // I'm done analyzing you + } + } + } + } + + // If we exited the loop, this means that the PHI node only has constant + // arguments that agree with each other(and OperandVal is the constant) or + // OperandVal is null because there are no defined incoming arguments. If + // this is the case, the PHI remains undefined. + // + if (OperandVal) + markConstant(&PN, OperandVal); // Acquire operand value +} + +void SCCPSolver::visitReturnInst(ReturnInst &I) { + if (I.getNumOperands() == 0) return; // Ret void + + Function *F = I.getParent()->getParent(); + // If we are tracking the return value of this function, merge it in. + if (!F->hasLocalLinkage()) + return; + + if (!TrackedRetVals.empty() && I.getNumOperands() == 1) { + DenseMap::iterator TFRVI = + TrackedRetVals.find(F); + if (TFRVI != TrackedRetVals.end() && + !TFRVI->second.isOverdefined()) { + LatticeVal &IV = getValueState(I.getOperand(0)); + mergeInValue(TFRVI->second, F, IV); + return; + } + } + + // Handle functions that return multiple values. + if (!TrackedMultipleRetVals.empty() && I.getNumOperands() > 1) { + for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) { + DenseMap, LatticeVal>::iterator + It = TrackedMultipleRetVals.find(std::make_pair(F, i)); + if (It == TrackedMultipleRetVals.end()) break; + mergeInValue(It->second, F, getValueState(I.getOperand(i))); + } + } else if (!TrackedMultipleRetVals.empty() && + I.getNumOperands() == 1 && + isa(I.getOperand(0)->getType())) { + for (unsigned i = 0, e = I.getOperand(0)->getType()->getNumContainedTypes(); + i != e; ++i) { + DenseMap, LatticeVal>::iterator + It = TrackedMultipleRetVals.find(std::make_pair(F, i)); + if (It == TrackedMultipleRetVals.end()) break; + Value *Val = FindInsertedValue(I.getOperand(0), i); + mergeInValue(It->second, F, getValueState(Val)); + } + } +} + +void SCCPSolver::visitTerminatorInst(TerminatorInst &TI) { + SmallVector SuccFeasible; + getFeasibleSuccessors(TI, SuccFeasible); + + BasicBlock *BB = TI.getParent(); + + // Mark all feasible successors executable... + for (unsigned i = 0, e = SuccFeasible.size(); i != e; ++i) + if (SuccFeasible[i]) + markEdgeExecutable(BB, TI.getSuccessor(i)); +} + +void SCCPSolver::visitCastInst(CastInst &I) { + Value *V = I.getOperand(0); + LatticeVal &VState = getValueState(V); + if (VState.isOverdefined()) // Inherit overdefinedness of operand + markOverdefined(&I); + else if (VState.isConstant()) // Propagate constant value + markConstant(&I, ConstantExpr::getCast(I.getOpcode(), + VState.getConstant(), I.getType())); +} + +void SCCPSolver::visitExtractValueInst(ExtractValueInst &EVI) { + Value *Aggr = EVI.getAggregateOperand(); + + // If the operand to the extractvalue is an undef, the result is undef. + if (isa(Aggr)) + return; + + // Currently only handle single-index extractvalues. + if (EVI.getNumIndices() != 1) { + markOverdefined(&EVI); + return; + } + + Function *F = 0; + if (CallInst *CI = dyn_cast(Aggr)) + F = CI->getCalledFunction(); + else if (InvokeInst *II = dyn_cast(Aggr)) + F = II->getCalledFunction(); + + // TODO: If IPSCCP resolves the callee of this function, we could propagate a + // result back! + if (F == 0 || TrackedMultipleRetVals.empty()) { + markOverdefined(&EVI); + return; + } + + // See if we are tracking the result of the callee. If not tracking this + // function (for example, it is a declaration) just move to overdefined. + if (!TrackedMultipleRetVals.count(std::make_pair(F, *EVI.idx_begin()))) { + markOverdefined(&EVI); + return; + } + + // Otherwise, the value will be merged in here as a result of CallSite + // handling. +} + +void SCCPSolver::visitInsertValueInst(InsertValueInst &IVI) { + Value *Aggr = IVI.getAggregateOperand(); + Value *Val = IVI.getInsertedValueOperand(); + + // If the operands to the insertvalue are undef, the result is undef. + if (isa(Aggr) && isa(Val)) + return; + + // Currently only handle single-index insertvalues. + if (IVI.getNumIndices() != 1) { + markOverdefined(&IVI); + return; + } + + // Currently only handle insertvalue instructions that are in a single-use + // chain that builds up a return value. + for (const InsertValueInst *TmpIVI = &IVI; ; ) { + if (!TmpIVI->hasOneUse()) { + markOverdefined(&IVI); + return; + } + const Value *V = *TmpIVI->use_begin(); + if (isa(V)) + break; + TmpIVI = dyn_cast(V); + if (!TmpIVI) { + markOverdefined(&IVI); + return; + } + } + + // See if we are tracking the result of the callee. + Function *F = IVI.getParent()->getParent(); + DenseMap, LatticeVal>::iterator + It = TrackedMultipleRetVals.find(std::make_pair(F, *IVI.idx_begin())); + + // Merge in the inserted member value. + if (It != TrackedMultipleRetVals.end()) + mergeInValue(It->second, F, getValueState(Val)); + + // Mark the aggregate result of the IVI overdefined; any tracking that we do + // will be done on the individual member values. + markOverdefined(&IVI); +} + +void SCCPSolver::visitSelectInst(SelectInst &I) { + LatticeVal &CondValue = getValueState(I.getCondition()); + if (CondValue.isUndefined()) + return; + if (CondValue.isConstant()) { + if (ConstantInt *CondCB = dyn_cast(CondValue.getConstant())){ + mergeInValue(&I, getValueState(CondCB->getZExtValue() ? I.getTrueValue() + : I.getFalseValue())); + return; + } + } + + // Otherwise, the condition is overdefined or a constant we can't evaluate. + // See if we can produce something better than overdefined based on the T/F + // value. + LatticeVal &TVal = getValueState(I.getTrueValue()); + LatticeVal &FVal = getValueState(I.getFalseValue()); + + // select ?, C, C -> C. + if (TVal.isConstant() && FVal.isConstant() && + TVal.getConstant() == FVal.getConstant()) { + markConstant(&I, FVal.getConstant()); + return; + } + + if (TVal.isUndefined()) { // select ?, undef, X -> X. + mergeInValue(&I, FVal); + } else if (FVal.isUndefined()) { // select ?, X, undef -> X. + mergeInValue(&I, TVal); + } else { + markOverdefined(&I); + } +} + +// Handle BinaryOperators and Shift Instructions... +void SCCPSolver::visitBinaryOperator(Instruction &I) { + LatticeVal &IV = ValueState[&I]; + if (IV.isOverdefined()) return; + + LatticeVal &V1State = getValueState(I.getOperand(0)); + LatticeVal &V2State = getValueState(I.getOperand(1)); + + if (V1State.isOverdefined() || V2State.isOverdefined()) { + // If this is an AND or OR with 0 or -1, it doesn't matter that the other + // operand is overdefined. + if (I.getOpcode() == Instruction::And || I.getOpcode() == Instruction::Or) { + LatticeVal *NonOverdefVal = 0; + if (!V1State.isOverdefined()) { + NonOverdefVal = &V1State; + } else if (!V2State.isOverdefined()) { + NonOverdefVal = &V2State; + } + + if (NonOverdefVal) { + if (NonOverdefVal->isUndefined()) { + // Could annihilate value. + if (I.getOpcode() == Instruction::And) + markConstant(IV, &I, Constant::getNullValue(I.getType())); + else if (const VectorType *PT = dyn_cast(I.getType())) + markConstant(IV, &I, ConstantVector::getAllOnesValue(PT)); + else + markConstant(IV, &I, ConstantInt::getAllOnesValue(I.getType())); + return; + } else { + if (I.getOpcode() == Instruction::And) { + if (NonOverdefVal->getConstant()->isNullValue()) { + markConstant(IV, &I, NonOverdefVal->getConstant()); + return; // X and 0 = 0 + } + } else { + if (ConstantInt *CI = + dyn_cast(NonOverdefVal->getConstant())) + if (CI->isAllOnesValue()) { + markConstant(IV, &I, NonOverdefVal->getConstant()); + return; // X or -1 = -1 + } + } + } + } + } + + + // If both operands are PHI nodes, it is possible that this instruction has + // a constant value, despite the fact that the PHI node doesn't. Check for + // this condition now. + if (PHINode *PN1 = dyn_cast(I.getOperand(0))) + if (PHINode *PN2 = dyn_cast(I.getOperand(1))) + if (PN1->getParent() == PN2->getParent()) { + // Since the two PHI nodes are in the same basic block, they must have + // entries for the same predecessors. Walk the predecessor list, and + // if all of the incoming values are constants, and the result of + // evaluating this expression with all incoming value pairs is the + // same, then this expression is a constant even though the PHI node + // is not a constant! + LatticeVal Result; + for (unsigned i = 0, e = PN1->getNumIncomingValues(); i != e; ++i) { + LatticeVal &In1 = getValueState(PN1->getIncomingValue(i)); + BasicBlock *InBlock = PN1->getIncomingBlock(i); + LatticeVal &In2 = + getValueState(PN2->getIncomingValueForBlock(InBlock)); + + if (In1.isOverdefined() || In2.isOverdefined()) { + Result.markOverdefined(); + break; // Cannot fold this operation over the PHI nodes! + } else if (In1.isConstant() && In2.isConstant()) { + Constant *V = ConstantExpr::get(I.getOpcode(), In1.getConstant(), + In2.getConstant()); + if (Result.isUndefined()) + Result.markConstant(V); + else if (Result.isConstant() && Result.getConstant() != V) { + Result.markOverdefined(); + break; + } + } + } + + // If we found a constant value here, then we know the instruction is + // constant despite the fact that the PHI nodes are overdefined. + if (Result.isConstant()) { + markConstant(IV, &I, Result.getConstant()); + // Remember that this instruction is virtually using the PHI node + // operands. + UsersOfOverdefinedPHIs.insert(std::make_pair(PN1, &I)); + UsersOfOverdefinedPHIs.insert(std::make_pair(PN2, &I)); + return; + } else if (Result.isUndefined()) { + return; + } + + // Okay, this really is overdefined now. Since we might have + // speculatively thought that this was not overdefined before, and + // added ourselves to the UsersOfOverdefinedPHIs list for the PHIs, + // make sure to clean out any entries that we put there, for + // efficiency. + std::multimap::iterator It, E; + tie(It, E) = UsersOfOverdefinedPHIs.equal_range(PN1); + while (It != E) { + if (It->second == &I) { + UsersOfOverdefinedPHIs.erase(It++); + } else + ++It; + } + tie(It, E) = UsersOfOverdefinedPHIs.equal_range(PN2); + while (It != E) { + if (It->second == &I) { + UsersOfOverdefinedPHIs.erase(It++); + } else + ++It; + } + } + + markOverdefined(IV, &I); + } else if (V1State.isConstant() && V2State.isConstant()) { + markConstant(IV, &I, ConstantExpr::get(I.getOpcode(), V1State.getConstant(), + V2State.getConstant())); + } +} + +// Handle ICmpInst instruction... +void SCCPSolver::visitCmpInst(CmpInst &I) { + LatticeVal &IV = ValueState[&I]; + if (IV.isOverdefined()) return; + + LatticeVal &V1State = getValueState(I.getOperand(0)); + LatticeVal &V2State = getValueState(I.getOperand(1)); + + if (V1State.isOverdefined() || V2State.isOverdefined()) { + // If both operands are PHI nodes, it is possible that this instruction has + // a constant value, despite the fact that the PHI node doesn't. Check for + // this condition now. + if (PHINode *PN1 = dyn_cast(I.getOperand(0))) + if (PHINode *PN2 = dyn_cast(I.getOperand(1))) + if (PN1->getParent() == PN2->getParent()) { + // Since the two PHI nodes are in the same basic block, they must have + // entries for the same predecessors. Walk the predecessor list, and + // if all of the incoming values are constants, and the result of + // evaluating this expression with all incoming value pairs is the + // same, then this expression is a constant even though the PHI node + // is not a constant! + LatticeVal Result; + for (unsigned i = 0, e = PN1->getNumIncomingValues(); i != e; ++i) { + LatticeVal &In1 = getValueState(PN1->getIncomingValue(i)); + BasicBlock *InBlock = PN1->getIncomingBlock(i); + LatticeVal &In2 = + getValueState(PN2->getIncomingValueForBlock(InBlock)); + + if (In1.isOverdefined() || In2.isOverdefined()) { + Result.markOverdefined(); + break; // Cannot fold this operation over the PHI nodes! + } else if (In1.isConstant() && In2.isConstant()) { + Constant *V = ConstantExpr::getCompare(I.getPredicate(), + In1.getConstant(), + In2.getConstant()); + if (Result.isUndefined()) + Result.markConstant(V); + else if (Result.isConstant() && Result.getConstant() != V) { + Result.markOverdefined(); + break; + } + } + } + + // If we found a constant value here, then we know the instruction is + // constant despite the fact that the PHI nodes are overdefined. + if (Result.isConstant()) { + markConstant(IV, &I, Result.getConstant()); + // Remember that this instruction is virtually using the PHI node + // operands. + UsersOfOverdefinedPHIs.insert(std::make_pair(PN1, &I)); + UsersOfOverdefinedPHIs.insert(std::make_pair(PN2, &I)); + return; + } else if (Result.isUndefined()) { + return; + } + + // Okay, this really is overdefined now. Since we might have + // speculatively thought that this was not overdefined before, and + // added ourselves to the UsersOfOverdefinedPHIs list for the PHIs, + // make sure to clean out any entries that we put there, for + // efficiency. + std::multimap::iterator It, E; + tie(It, E) = UsersOfOverdefinedPHIs.equal_range(PN1); + while (It != E) { + if (It->second == &I) { + UsersOfOverdefinedPHIs.erase(It++); + } else + ++It; + } + tie(It, E) = UsersOfOverdefinedPHIs.equal_range(PN2); + while (It != E) { + if (It->second == &I) { + UsersOfOverdefinedPHIs.erase(It++); + } else + ++It; + } + } + + markOverdefined(IV, &I); + } else if (V1State.isConstant() && V2State.isConstant()) { + markConstant(IV, &I, ConstantExpr::getCompare(I.getPredicate(), + V1State.getConstant(), + V2State.getConstant())); + } +} + +void SCCPSolver::visitExtractElementInst(ExtractElementInst &I) { + // FIXME : SCCP does not handle vectors properly. + markOverdefined(&I); + return; + +#if 0 + LatticeVal &ValState = getValueState(I.getOperand(0)); + LatticeVal &IdxState = getValueState(I.getOperand(1)); + + if (ValState.isOverdefined() || IdxState.isOverdefined()) + markOverdefined(&I); + else if(ValState.isConstant() && IdxState.isConstant()) + markConstant(&I, ConstantExpr::getExtractElement(ValState.getConstant(), + IdxState.getConstant())); +#endif +} + +void SCCPSolver::visitInsertElementInst(InsertElementInst &I) { + // FIXME : SCCP does not handle vectors properly. + markOverdefined(&I); + return; +#if 0 + LatticeVal &ValState = getValueState(I.getOperand(0)); + LatticeVal &EltState = getValueState(I.getOperand(1)); + LatticeVal &IdxState = getValueState(I.getOperand(2)); + + if (ValState.isOverdefined() || EltState.isOverdefined() || + IdxState.isOverdefined()) + markOverdefined(&I); + else if(ValState.isConstant() && EltState.isConstant() && + IdxState.isConstant()) + markConstant(&I, ConstantExpr::getInsertElement(ValState.getConstant(), + EltState.getConstant(), + IdxState.getConstant())); + else if (ValState.isUndefined() && EltState.isConstant() && + IdxState.isConstant()) + markConstant(&I,ConstantExpr::getInsertElement(UndefValue::get(I.getType()), + EltState.getConstant(), + IdxState.getConstant())); +#endif +} + +void SCCPSolver::visitShuffleVectorInst(ShuffleVectorInst &I) { + // FIXME : SCCP does not handle vectors properly. + markOverdefined(&I); + return; +#if 0 + LatticeVal &V1State = getValueState(I.getOperand(0)); + LatticeVal &V2State = getValueState(I.getOperand(1)); + LatticeVal &MaskState = getValueState(I.getOperand(2)); + + if (MaskState.isUndefined() || + (V1State.isUndefined() && V2State.isUndefined())) + return; // Undefined output if mask or both inputs undefined. + + if (V1State.isOverdefined() || V2State.isOverdefined() || + MaskState.isOverdefined()) { + markOverdefined(&I); + } else { + // A mix of constant/undef inputs. + Constant *V1 = V1State.isConstant() ? + V1State.getConstant() : UndefValue::get(I.getType()); + Constant *V2 = V2State.isConstant() ? + V2State.getConstant() : UndefValue::get(I.getType()); + Constant *Mask = MaskState.isConstant() ? + MaskState.getConstant() : UndefValue::get(I.getOperand(2)->getType()); + markConstant(&I, ConstantExpr::getShuffleVector(V1, V2, Mask)); + } +#endif +} + +// Handle getelementptr instructions... if all operands are constants then we +// can turn this into a getelementptr ConstantExpr. +// +void SCCPSolver::visitGetElementPtrInst(GetElementPtrInst &I) { + LatticeVal &IV = ValueState[&I]; + if (IV.isOverdefined()) return; + + SmallVector Operands; + Operands.reserve(I.getNumOperands()); + + for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) { + LatticeVal &State = getValueState(I.getOperand(i)); + if (State.isUndefined()) + return; // Operands are not resolved yet... + else if (State.isOverdefined()) { + markOverdefined(IV, &I); + return; + } + assert(State.isConstant() && "Unknown state!"); + Operands.push_back(State.getConstant()); + } + + Constant *Ptr = Operands[0]; + Operands.erase(Operands.begin()); // Erase the pointer from idx list... + + markConstant(IV, &I, ConstantExpr::getGetElementPtr(Ptr, &Operands[0], + Operands.size())); +} + +void SCCPSolver::visitStoreInst(Instruction &SI) { + if (TrackedGlobals.empty() || !isa(SI.getOperand(1))) + return; + GlobalVariable *GV = cast(SI.getOperand(1)); + DenseMap::iterator I = TrackedGlobals.find(GV); + if (I == TrackedGlobals.end() || I->second.isOverdefined()) return; + + // Get the value we are storing into the global. + LatticeVal &PtrVal = getValueState(SI.getOperand(0)); + + mergeInValue(I->second, GV, PtrVal); + if (I->second.isOverdefined()) + TrackedGlobals.erase(I); // No need to keep tracking this! +} + + +// Handle load instructions. If the operand is a constant pointer to a constant +// global, we can replace the load with the loaded constant value! +void SCCPSolver::visitLoadInst(LoadInst &I) { + LatticeVal &IV = ValueState[&I]; + if (IV.isOverdefined()) return; + + LatticeVal &PtrVal = getValueState(I.getOperand(0)); + if (PtrVal.isUndefined()) return; // The pointer is not resolved yet! + if (PtrVal.isConstant() && !I.isVolatile()) { + Value *Ptr = PtrVal.getConstant(); + // TODO: Consider a target hook for valid address spaces for this xform. + if (isa(Ptr) && + cast(Ptr->getType())->getAddressSpace() == 0) { + // load null -> null + markConstant(IV, &I, Constant::getNullValue(I.getType())); + return; + } + + // Transform load (constant global) into the value loaded. + if (GlobalVariable *GV = dyn_cast(Ptr)) { + if (GV->isConstant()) { + if (GV->hasDefinitiveInitializer()) { + markConstant(IV, &I, GV->getInitializer()); + return; + } + } else if (!TrackedGlobals.empty()) { + // If we are tracking this global, merge in the known value for it. + DenseMap::iterator It = + TrackedGlobals.find(GV); + if (It != TrackedGlobals.end()) { + mergeInValue(IV, &I, It->second); + return; + } + } + } + + // Transform load (constantexpr_GEP global, 0, ...) into the value loaded. + if (ConstantExpr *CE = dyn_cast(Ptr)) + if (CE->getOpcode() == Instruction::GetElementPtr) + if (GlobalVariable *GV = dyn_cast(CE->getOperand(0))) + if (GV->isConstant() && GV->hasDefinitiveInitializer()) + if (Constant *V = + ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE)) { + markConstant(IV, &I, V); + return; + } + } + + // Otherwise we cannot say for certain what value this load will produce. + // Bail out. + markOverdefined(IV, &I); +} + +void SCCPSolver::visitCallSite(CallSite CS) { + Function *F = CS.getCalledFunction(); + Instruction *I = CS.getInstruction(); + + // The common case is that we aren't tracking the callee, either because we + // are not doing interprocedural analysis or the callee is indirect, or is + // external. Handle these cases first. + if (F == 0 || !F->hasLocalLinkage()) { +CallOverdefined: + // Void return and not tracking callee, just bail. + if (I->getType() == Type::VoidTy) return; + + // Otherwise, if we have a single return value case, and if the function is + // a declaration, maybe we can constant fold it. + if (!isa(I->getType()) && F && F->isDeclaration() && + canConstantFoldCallTo(F)) { + + SmallVector Operands; + for (CallSite::arg_iterator AI = CS.arg_begin(), E = CS.arg_end(); + AI != E; ++AI) { + LatticeVal &State = getValueState(*AI); + if (State.isUndefined()) + return; // Operands are not resolved yet. + else if (State.isOverdefined()) { + markOverdefined(I); + return; + } + assert(State.isConstant() && "Unknown state!"); + Operands.push_back(State.getConstant()); + } + + // If we can constant fold this, mark the result of the call as a + // constant. + if (Constant *C = ConstantFoldCall(F, Operands.data(), Operands.size())) { + markConstant(I, C); + return; + } + } + + // Otherwise, we don't know anything about this call, mark it overdefined. + markOverdefined(I); + return; + } + + // If this is a single/zero retval case, see if we're tracking the function. + DenseMap::iterator TFRVI = TrackedRetVals.find(F); + if (TFRVI != TrackedRetVals.end()) { + // If so, propagate the return value of the callee into this call result. + mergeInValue(I, TFRVI->second); + } else if (isa(I->getType())) { + // Check to see if we're tracking this callee, if not, handle it in the + // common path above. + DenseMap, LatticeVal>::iterator + TMRVI = TrackedMultipleRetVals.find(std::make_pair(F, 0)); + if (TMRVI == TrackedMultipleRetVals.end()) + goto CallOverdefined; + + // If we are tracking this callee, propagate the return values of the call + // into this call site. We do this by walking all the uses. Single-index + // ExtractValueInst uses can be tracked; anything more complicated is + // currently handled conservatively. + for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); + UI != E; ++UI) { + if (ExtractValueInst *EVI = dyn_cast(*UI)) { + if (EVI->getNumIndices() == 1) { + mergeInValue(EVI, + TrackedMultipleRetVals[std::make_pair(F, *EVI->idx_begin())]); + continue; + } + } + // The aggregate value is used in a way not handled here. Assume nothing. + markOverdefined(*UI); + } + } else { + // Otherwise we're not tracking this callee, so handle it in the + // common path above. + goto CallOverdefined; + } + + // Finally, if this is the first call to the function hit, mark its entry + // block executable. + if (!BBExecutable.count(F->begin())) + MarkBlockExecutable(F->begin()); + + // Propagate information from this call site into the callee. + CallSite::arg_iterator CAI = CS.arg_begin(); + for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); + AI != E; ++AI, ++CAI) { + LatticeVal &IV = ValueState[AI]; + if (!IV.isOverdefined()) + mergeInValue(IV, AI, getValueState(*CAI)); + } +} + + +void SCCPSolver::Solve() { + // Process the work lists until they are empty! + while (!BBWorkList.empty() || !InstWorkList.empty() || + !OverdefinedInstWorkList.empty()) { + // Process the instruction work list... + while (!OverdefinedInstWorkList.empty()) { + Value *I = OverdefinedInstWorkList.back(); + OverdefinedInstWorkList.pop_back(); + + DOUT << "\nPopped off OI-WL: " << *I; + + // "I" got into the work list because it either made the transition from + // bottom to constant + // + // Anything on this worklist that is overdefined need not be visited + // since all of its users will have already been marked as overdefined + // Update all of the users of this instruction's value... + // + for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); + UI != E; ++UI) + OperandChangedState(*UI); + } + // Process the instruction work list... + while (!InstWorkList.empty()) { + Value *I = InstWorkList.back(); + InstWorkList.pop_back(); + + DOUT << "\nPopped off I-WL: " << *I; + + // "I" got into the work list because it either made the transition from + // bottom to constant + // + // Anything on this worklist that is overdefined need not be visited + // since all of its users will have already been marked as overdefined. + // Update all of the users of this instruction's value... + // + if (!getValueState(I).isOverdefined()) + for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); + UI != E; ++UI) + OperandChangedState(*UI); + } + + // Process the basic block work list... + while (!BBWorkList.empty()) { + BasicBlock *BB = BBWorkList.back(); + BBWorkList.pop_back(); + + DOUT << "\nPopped off BBWL: " << *BB; + + // Notify all instructions in this basic block that they are newly + // executable. + visit(BB); + } + } +} + +/// ResolvedUndefsIn - While solving the dataflow for a function, we assume +/// that branches on undef values cannot reach any of their successors. +/// However, this is not a safe assumption. After we solve dataflow, this +/// method should be use to handle this. If this returns true, the solver +/// should be rerun. +/// +/// This method handles this by finding an unresolved branch and marking it one +/// of the edges from the block as being feasible, even though the condition +/// doesn't say it would otherwise be. This allows SCCP to find the rest of the +/// CFG and only slightly pessimizes the analysis results (by marking one, +/// potentially infeasible, edge feasible). This cannot usefully modify the +/// constraints on the condition of the branch, as that would impact other users +/// of the value. +/// +/// This scan also checks for values that use undefs, whose results are actually +/// defined. For example, 'zext i8 undef to i32' should produce all zeros +/// conservatively, as "(zext i8 X -> i32) & 0xFF00" must always return zero, +/// even if X isn't defined. +bool SCCPSolver::ResolvedUndefsIn(Function &F) { + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { + if (!BBExecutable.count(BB)) + continue; + + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { + // Look for instructions which produce undef values. + if (I->getType() == Type::VoidTy) continue; + + LatticeVal &LV = getValueState(I); + if (!LV.isUndefined()) continue; + + // Get the lattice values of the first two operands for use below. + LatticeVal &Op0LV = getValueState(I->getOperand(0)); + LatticeVal Op1LV; + if (I->getNumOperands() == 2) { + // If this is a two-operand instruction, and if both operands are + // undefs, the result stays undef. + Op1LV = getValueState(I->getOperand(1)); + if (Op0LV.isUndefined() && Op1LV.isUndefined()) + continue; + } + + // If this is an instructions whose result is defined even if the input is + // not fully defined, propagate the information. + const Type *ITy = I->getType(); + switch (I->getOpcode()) { + default: break; // Leave the instruction as an undef. + case Instruction::ZExt: + // After a zero extend, we know the top part is zero. SExt doesn't have + // to be handled here, because we don't know whether the top part is 1's + // or 0's. + assert(Op0LV.isUndefined()); + markForcedConstant(LV, I, Constant::getNullValue(ITy)); + return true; + case Instruction::Mul: + case Instruction::And: + // undef * X -> 0. X could be zero. + // undef & X -> 0. X could be zero. + markForcedConstant(LV, I, Constant::getNullValue(ITy)); + return true; + + case Instruction::Or: + // undef | X -> -1. X could be -1. + if (const VectorType *PTy = dyn_cast(ITy)) + markForcedConstant(LV, I, ConstantVector::getAllOnesValue(PTy)); + else + markForcedConstant(LV, I, ConstantInt::getAllOnesValue(ITy)); + return true; + + case Instruction::SDiv: + case Instruction::UDiv: + case Instruction::SRem: + case Instruction::URem: + // X / undef -> undef. No change. + // X % undef -> undef. No change. + if (Op1LV.isUndefined()) break; + + // undef / X -> 0. X could be maxint. + // undef % X -> 0. X could be 1. + markForcedConstant(LV, I, Constant::getNullValue(ITy)); + return true; + + case Instruction::AShr: + // undef >>s X -> undef. No change. + if (Op0LV.isUndefined()) break; + + // X >>s undef -> X. X could be 0, X could have the high-bit known set. + if (Op0LV.isConstant()) + markForcedConstant(LV, I, Op0LV.getConstant()); + else + markOverdefined(LV, I); + return true; + case Instruction::LShr: + case Instruction::Shl: + // undef >> X -> undef. No change. + // undef << X -> undef. No change. + if (Op0LV.isUndefined()) break; + + // X >> undef -> 0. X could be 0. + // X << undef -> 0. X could be 0. + markForcedConstant(LV, I, Constant::getNullValue(ITy)); + return true; + case Instruction::Select: + // undef ? X : Y -> X or Y. There could be commonality between X/Y. + if (Op0LV.isUndefined()) { + if (!Op1LV.isConstant()) // Pick the constant one if there is any. + Op1LV = getValueState(I->getOperand(2)); + } else if (Op1LV.isUndefined()) { + // c ? undef : undef -> undef. No change. + Op1LV = getValueState(I->getOperand(2)); + if (Op1LV.isUndefined()) + break; + // Otherwise, c ? undef : x -> x. + } else { + // Leave Op1LV as Operand(1)'s LatticeValue. + } + + if (Op1LV.isConstant()) + markForcedConstant(LV, I, Op1LV.getConstant()); + else + markOverdefined(LV, I); + return true; + case Instruction::Call: + // If a call has an undef result, it is because it is constant foldable + // but one of the inputs was undef. Just force the result to + // overdefined. + markOverdefined(LV, I); + return true; + } + } + + TerminatorInst *TI = BB->getTerminator(); + if (BranchInst *BI = dyn_cast(TI)) { + if (!BI->isConditional()) continue; + if (!getValueState(BI->getCondition()).isUndefined()) + continue; + } else if (SwitchInst *SI = dyn_cast(TI)) { + if (SI->getNumSuccessors()<2) // no cases + continue; + if (!getValueState(SI->getCondition()).isUndefined()) + continue; + } else { + continue; + } + + // If the edge to the second successor isn't thought to be feasible yet, + // mark it so now. We pick the second one so that this goes to some + // enumerated value in a switch instead of going to the default destination. + if (KnownFeasibleEdges.count(Edge(BB, TI->getSuccessor(1)))) + continue; + + // Otherwise, it isn't already thought to be feasible. Mark it as such now + // and return. This will make other blocks reachable, which will allow new + // values to be discovered and existing ones to be moved in the lattice. + markEdgeExecutable(BB, TI->getSuccessor(1)); + + // This must be a conditional branch of switch on undef. At this point, + // force the old terminator to branch to the first successor. This is + // required because we are now influencing the dataflow of the function with + // the assumption that this edge is taken. If we leave the branch condition + // as undef, then further analysis could think the undef went another way + // leading to an inconsistent set of conclusions. + if (BranchInst *BI = dyn_cast(TI)) { + BI->setCondition(ConstantInt::getFalse()); + } else { + SwitchInst *SI = cast(TI); + SI->setCondition(SI->getCaseValue(1)); + } + + return true; + } + + return false; +} + + +namespace { + //===--------------------------------------------------------------------===// + // + /// SCCP Class - This class uses the SCCPSolver to implement a per-function + /// Sparse Conditional Constant Propagator. + /// + struct VISIBILITY_HIDDEN SCCP : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + SCCP() : FunctionPass(&ID) {} + + // runOnFunction - Run the Sparse Conditional Constant Propagation + // algorithm, and return true if the function was modified. + // + bool runOnFunction(Function &F); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + } + }; +} // end anonymous namespace + +char SCCP::ID = 0; +static RegisterPass +X("sccp", "Sparse Conditional Constant Propagation"); + +// createSCCPPass - This is the public interface to this file... +FunctionPass *llvm::createSCCPPass() { + return new SCCP(); +} + + +// runOnFunction() - Run the Sparse Conditional Constant Propagation algorithm, +// and return true if the function was modified. +// +bool SCCP::runOnFunction(Function &F) { + DOUT << "SCCP on function '" << F.getNameStart() << "'\n"; + SCCPSolver Solver; + + // Mark the first block of the function as being executable. + Solver.MarkBlockExecutable(F.begin()); + + // Mark all arguments to the function as being overdefined. + for (Function::arg_iterator AI = F.arg_begin(), E = F.arg_end(); AI != E;++AI) + Solver.markOverdefined(AI); + + // Solve for constants. + bool ResolvedUndefs = true; + while (ResolvedUndefs) { + Solver.Solve(); + DOUT << "RESOLVING UNDEFs\n"; + ResolvedUndefs = Solver.ResolvedUndefsIn(F); + } + + bool MadeChanges = false; + + // If we decided that there are basic blocks that are dead in this function, + // delete their contents now. Note that we cannot actually delete the blocks, + // as we cannot modify the CFG of the function. + // + SmallVector Insts; + std::map &Values = Solver.getValueMapping(); + + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) + if (!Solver.isBlockExecutable(BB)) { + DOUT << " BasicBlock Dead:" << *BB; + ++NumDeadBlocks; + + // Delete the instructions backwards, as it has a reduced likelihood of + // having to update as many def-use and use-def chains. + for (BasicBlock::iterator I = BB->begin(), E = BB->getTerminator(); + I != E; ++I) + Insts.push_back(I); + while (!Insts.empty()) { + Instruction *I = Insts.back(); + Insts.pop_back(); + if (!I->use_empty()) + I->replaceAllUsesWith(UndefValue::get(I->getType())); + BB->getInstList().erase(I); + MadeChanges = true; + ++NumInstRemoved; + } + } else { + // Iterate over all of the instructions in a function, replacing them with + // constants if we have found them to be of constant values. + // + for (BasicBlock::iterator BI = BB->begin(), E = BB->end(); BI != E; ) { + Instruction *Inst = BI++; + if (Inst->getType() == Type::VoidTy || + isa(Inst)) + continue; + + LatticeVal &IV = Values[Inst]; + if (!IV.isConstant() && !IV.isUndefined()) + continue; + + Constant *Const = IV.isConstant() + ? IV.getConstant() : UndefValue::get(Inst->getType()); + DOUT << " Constant: " << *Const << " = " << *Inst; + + // Replaces all of the uses of a variable with uses of the constant. + Inst->replaceAllUsesWith(Const); + + // Delete the instruction. + Inst->eraseFromParent(); + + // Hey, we just changed something! + MadeChanges = true; + ++NumInstRemoved; + } + } + + return MadeChanges; +} + +namespace { + //===--------------------------------------------------------------------===// + // + /// IPSCCP Class - This class implements interprocedural Sparse Conditional + /// Constant Propagation. + /// + struct VISIBILITY_HIDDEN IPSCCP : public ModulePass { + static char ID; + IPSCCP() : ModulePass(&ID) {} + bool runOnModule(Module &M); + }; +} // end anonymous namespace + +char IPSCCP::ID = 0; +static RegisterPass +Y("ipsccp", "Interprocedural Sparse Conditional Constant Propagation"); + +// createIPSCCPPass - This is the public interface to this file... +ModulePass *llvm::createIPSCCPPass() { + return new IPSCCP(); +} + + +static bool AddressIsTaken(GlobalValue *GV) { + // Delete any dead constantexpr klingons. + GV->removeDeadConstantUsers(); + + for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); + UI != E; ++UI) + if (StoreInst *SI = dyn_cast(*UI)) { + if (SI->getOperand(0) == GV || SI->isVolatile()) + return true; // Storing addr of GV. + } else if (isa(*UI) || isa(*UI)) { + // Make sure we are calling the function, not passing the address. + CallSite CS = CallSite::get(cast(*UI)); + if (CS.hasArgument(GV)) + return true; + } else if (LoadInst *LI = dyn_cast(*UI)) { + if (LI->isVolatile()) + return true; + } else { + return true; + } + return false; +} + +bool IPSCCP::runOnModule(Module &M) { + SCCPSolver Solver; + + // Loop over all functions, marking arguments to those with their addresses + // taken or that are external as overdefined. + // + for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) + if (!F->hasLocalLinkage() || AddressIsTaken(F)) { + if (!F->isDeclaration()) + Solver.MarkBlockExecutable(F->begin()); + for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); + AI != E; ++AI) + Solver.markOverdefined(AI); + } else { + Solver.AddTrackedFunction(F); + } + + // Loop over global variables. We inform the solver about any internal global + // variables that do not have their 'addresses taken'. If they don't have + // their addresses taken, we can propagate constants through them. + for (Module::global_iterator G = M.global_begin(), E = M.global_end(); + G != E; ++G) + if (!G->isConstant() && G->hasLocalLinkage() && !AddressIsTaken(G)) + Solver.TrackValueOfGlobalVariable(G); + + // Solve for constants. + bool ResolvedUndefs = true; + while (ResolvedUndefs) { + Solver.Solve(); + + DOUT << "RESOLVING UNDEFS\n"; + ResolvedUndefs = false; + for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) + ResolvedUndefs |= Solver.ResolvedUndefsIn(*F); + } + + bool MadeChanges = false; + + // Iterate over all of the instructions in the module, replacing them with + // constants if we have found them to be of constant values. + // + SmallVector Insts; + SmallVector BlocksToErase; + std::map &Values = Solver.getValueMapping(); + + for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { + for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); + AI != E; ++AI) + if (!AI->use_empty()) { + LatticeVal &IV = Values[AI]; + if (IV.isConstant() || IV.isUndefined()) { + Constant *CST = IV.isConstant() ? + IV.getConstant() : UndefValue::get(AI->getType()); + DOUT << "*** Arg " << *AI << " = " << *CST <<"\n"; + + // Replaces all of the uses of a variable with uses of the + // constant. + AI->replaceAllUsesWith(CST); + ++IPNumArgsElimed; + } + } + + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) + if (!Solver.isBlockExecutable(BB)) { + DOUT << " BasicBlock Dead:" << *BB; + ++IPNumDeadBlocks; + + // Delete the instructions backwards, as it has a reduced likelihood of + // having to update as many def-use and use-def chains. + TerminatorInst *TI = BB->getTerminator(); + for (BasicBlock::iterator I = BB->begin(), E = TI; I != E; ++I) + Insts.push_back(I); + + while (!Insts.empty()) { + Instruction *I = Insts.back(); + Insts.pop_back(); + if (!I->use_empty()) + I->replaceAllUsesWith(UndefValue::get(I->getType())); + BB->getInstList().erase(I); + MadeChanges = true; + ++IPNumInstRemoved; + } + + for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) { + BasicBlock *Succ = TI->getSuccessor(i); + if (!Succ->empty() && isa(Succ->begin())) + TI->getSuccessor(i)->removePredecessor(BB); + } + if (!TI->use_empty()) + TI->replaceAllUsesWith(UndefValue::get(TI->getType())); + BB->getInstList().erase(TI); + + if (&*BB != &F->front()) + BlocksToErase.push_back(BB); + else + new UnreachableInst(BB); + + } else { + for (BasicBlock::iterator BI = BB->begin(), E = BB->end(); BI != E; ) { + Instruction *Inst = BI++; + if (Inst->getType() == Type::VoidTy) + continue; + + LatticeVal &IV = Values[Inst]; + if (!IV.isConstant() && !IV.isUndefined()) + continue; + + Constant *Const = IV.isConstant() + ? IV.getConstant() : UndefValue::get(Inst->getType()); + DOUT << " Constant: " << *Const << " = " << *Inst; + + // Replaces all of the uses of a variable with uses of the + // constant. + Inst->replaceAllUsesWith(Const); + + // Delete the instruction. + if (!isa(Inst) && !isa(Inst)) + Inst->eraseFromParent(); + + // Hey, we just changed something! + MadeChanges = true; + ++IPNumInstRemoved; + } + } + + // Now that all instructions in the function are constant folded, erase dead + // blocks, because we can now use ConstantFoldTerminator to get rid of + // in-edges. + for (unsigned i = 0, e = BlocksToErase.size(); i != e; ++i) { + // If there are any PHI nodes in this successor, drop entries for BB now. + BasicBlock *DeadBB = BlocksToErase[i]; + while (!DeadBB->use_empty()) { + Instruction *I = cast(DeadBB->use_back()); + bool Folded = ConstantFoldTerminator(I->getParent()); + if (!Folded) { + // The constant folder may not have been able to fold the terminator + // if this is a branch or switch on undef. Fold it manually as a + // branch to the first successor. +#ifndef NDEBUG + if (BranchInst *BI = dyn_cast(I)) { + assert(BI->isConditional() && isa(BI->getCondition()) && + "Branch should be foldable!"); + } else if (SwitchInst *SI = dyn_cast(I)) { + assert(isa(SI->getCondition()) && "Switch should fold"); + } else { + assert(0 && "Didn't fold away reference to block!"); + } +#endif + + // Make this an uncond branch to the first successor. + TerminatorInst *TI = I->getParent()->getTerminator(); + BranchInst::Create(TI->getSuccessor(0), TI); + + // Remove entries in successor phi nodes to remove edges. + for (unsigned i = 1, e = TI->getNumSuccessors(); i != e; ++i) + TI->getSuccessor(i)->removePredecessor(TI->getParent()); + + // Remove the old terminator. + TI->eraseFromParent(); + } + } + + // Finally, delete the basic block. + F->getBasicBlockList().erase(DeadBB); + } + BlocksToErase.clear(); + } + + // If we inferred constant or undef return values for a function, we replaced + // all call uses with the inferred value. This means we don't need to bother + // actually returning anything from the function. Replace all return + // instructions with return undef. + // TODO: Process multiple value ret instructions also. + const DenseMap &RV = Solver.getTrackedRetVals(); + for (DenseMap::const_iterator I = RV.begin(), + E = RV.end(); I != E; ++I) + if (!I->second.isOverdefined() && + I->first->getReturnType() != Type::VoidTy) { + Function *F = I->first; + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) + if (ReturnInst *RI = dyn_cast(BB->getTerminator())) + if (!isa(RI->getOperand(0))) + RI->setOperand(0, UndefValue::get(F->getReturnType())); + } + + // If we infered constant or undef values for globals variables, we can delete + // the global and any stores that remain to it. + const DenseMap &TG = Solver.getTrackedGlobals(); + for (DenseMap::const_iterator I = TG.begin(), + E = TG.end(); I != E; ++I) { + GlobalVariable *GV = I->first; + assert(!I->second.isOverdefined() && + "Overdefined values should have been taken out of the map!"); + DOUT << "Found that GV '" << GV->getNameStart() << "' is constant!\n"; + while (!GV->use_empty()) { + StoreInst *SI = cast(GV->use_back()); + SI->eraseFromParent(); + } + M.getGlobalList().erase(GV); + ++IPNumGlobalConst; + } + + return MadeChanges; +} diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp new file mode 100644 index 000000000000..5669da0f1a4a --- /dev/null +++ b/lib/Transforms/Scalar/Scalar.cpp @@ -0,0 +1,111 @@ +//===-- Scalar.cpp --------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the C bindings for libLLVMScalarOpts.a, which implements +// several scalar transformations over the LLVM intermediate representation. +// +//===----------------------------------------------------------------------===// + +#include "llvm-c/Transforms/Scalar.h" +#include "llvm/PassManager.h" +#include "llvm/Transforms/Scalar.h" + +using namespace llvm; + +void LLVMAddAggressiveDCEPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createAggressiveDCEPass()); +} + +void LLVMAddCFGSimplificationPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createCFGSimplificationPass()); +} + +void LLVMAddCondPropagationPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createCondPropagationPass()); +} + +void LLVMAddDeadStoreEliminationPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createDeadStoreEliminationPass()); +} + +void LLVMAddGVNPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createGVNPass()); +} + +void LLVMAddIndVarSimplifyPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createIndVarSimplifyPass()); +} + +void LLVMAddInstructionCombiningPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createInstructionCombiningPass()); +} + +void LLVMAddJumpThreadingPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createJumpThreadingPass()); +} + +void LLVMAddLICMPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createLICMPass()); +} + +void LLVMAddLoopDeletionPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createLoopDeletionPass()); +} + +void LLVMAddLoopIndexSplitPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createLoopIndexSplitPass()); +} + +void LLVMAddLoopRotatePass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createLoopRotatePass()); +} + +void LLVMAddLoopUnrollPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createLoopUnrollPass()); +} + +void LLVMAddLoopUnswitchPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createLoopUnswitchPass()); +} + +void LLVMAddMemCpyOptPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createMemCpyOptPass()); +} + +void LLVMAddPromoteMemoryToRegisterPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createPromoteMemoryToRegisterPass()); +} + +void LLVMAddReassociatePass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createReassociatePass()); +} + +void LLVMAddSCCPPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createSCCPPass()); +} + +void LLVMAddScalarReplAggregatesPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createScalarReplAggregatesPass()); +} + +void LLVMAddSimplifyLibCallsPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createSimplifyLibCallsPass()); +} + +void LLVMAddTailCallEliminationPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createTailCallEliminationPass()); +} + +void LLVMAddConstantPropagationPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createConstantPropagationPass()); +} + +void LLVMAddDemoteMemoryToRegisterPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createDemoteRegisterToMemoryPass()); +} diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp new file mode 100644 index 000000000000..9935f12f893b --- /dev/null +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -0,0 +1,1820 @@ +//===- ScalarReplAggregates.cpp - Scalar Replacement of Aggregates --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This transformation implements the well known scalar replacement of +// aggregates transformation. This xform breaks up alloca instructions of +// aggregate type (structure or array) into individual alloca instructions for +// each member (if possible). Then, if possible, it transforms the individual +// alloca instructions into nice clean scalar SSA form. +// +// This combines a simple SRoA algorithm with the Mem2Reg algorithm because +// often interact, especially for C++ programs. As such, iterating between +// SRoA, then Mem2Reg until we run out of things to promote works well. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "scalarrepl" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/GlobalVariable.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Pass.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Transforms/Utils/PromoteMemToReg.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/GetElementPtrTypeIterator.h" +#include "llvm/Support/IRBuilder.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/Compiler.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" +using namespace llvm; + +STATISTIC(NumReplaced, "Number of allocas broken up"); +STATISTIC(NumPromoted, "Number of allocas promoted"); +STATISTIC(NumConverted, "Number of aggregates converted to scalar"); +STATISTIC(NumGlobals, "Number of allocas copied from constant global"); + +namespace { + struct VISIBILITY_HIDDEN SROA : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + explicit SROA(signed T = -1) : FunctionPass(&ID) { + if (T == -1) + SRThreshold = 128; + else + SRThreshold = T; + } + + bool runOnFunction(Function &F); + + bool performScalarRepl(Function &F); + bool performPromotion(Function &F); + + // getAnalysisUsage - This pass does not require any passes, but we know it + // will not alter the CFG, so say so. + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.setPreservesCFG(); + } + + private: + TargetData *TD; + + /// AllocaInfo - When analyzing uses of an alloca instruction, this captures + /// information about the uses. All these fields are initialized to false + /// and set to true when something is learned. + struct AllocaInfo { + /// isUnsafe - This is set to true if the alloca cannot be SROA'd. + bool isUnsafe : 1; + + /// needsCleanup - This is set to true if there is some use of the alloca + /// that requires cleanup. + bool needsCleanup : 1; + + /// isMemCpySrc - This is true if this aggregate is memcpy'd from. + bool isMemCpySrc : 1; + + /// isMemCpyDst - This is true if this aggregate is memcpy'd into. + bool isMemCpyDst : 1; + + AllocaInfo() + : isUnsafe(false), needsCleanup(false), + isMemCpySrc(false), isMemCpyDst(false) {} + }; + + unsigned SRThreshold; + + void MarkUnsafe(AllocaInfo &I) { I.isUnsafe = true; } + + int isSafeAllocaToScalarRepl(AllocationInst *AI); + + void isSafeUseOfAllocation(Instruction *User, AllocationInst *AI, + AllocaInfo &Info); + void isSafeElementUse(Value *Ptr, bool isFirstElt, AllocationInst *AI, + AllocaInfo &Info); + void isSafeMemIntrinsicOnAllocation(MemIntrinsic *MI, AllocationInst *AI, + unsigned OpNo, AllocaInfo &Info); + void isSafeUseOfBitCastedAllocation(BitCastInst *User, AllocationInst *AI, + AllocaInfo &Info); + + void DoScalarReplacement(AllocationInst *AI, + std::vector &WorkList); + void CleanupGEP(GetElementPtrInst *GEP); + void CleanupAllocaUsers(AllocationInst *AI); + AllocaInst *AddNewAlloca(Function &F, const Type *Ty, AllocationInst *Base); + + void RewriteBitCastUserOfAlloca(Instruction *BCInst, AllocationInst *AI, + SmallVector &NewElts); + + void RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst, + AllocationInst *AI, + SmallVector &NewElts); + void RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocationInst *AI, + SmallVector &NewElts); + void RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocationInst *AI, + SmallVector &NewElts); + + bool CanConvertToScalar(Value *V, bool &IsNotTrivial, const Type *&VecTy, + bool &SawVec, uint64_t Offset, unsigned AllocaSize); + void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset); + Value *ConvertScalar_ExtractValue(Value *NV, const Type *ToType, + uint64_t Offset, IRBuilder<> &Builder); + Value *ConvertScalar_InsertValue(Value *StoredVal, Value *ExistingVal, + uint64_t Offset, IRBuilder<> &Builder); + static Instruction *isOnlyCopiedFromConstantGlobal(AllocationInst *AI); + }; +} + +char SROA::ID = 0; +static RegisterPass X("scalarrepl", "Scalar Replacement of Aggregates"); + +// Public interface to the ScalarReplAggregates pass +FunctionPass *llvm::createScalarReplAggregatesPass(signed int Threshold) { + return new SROA(Threshold); +} + + +bool SROA::runOnFunction(Function &F) { + TD = &getAnalysis(); + + bool Changed = performPromotion(F); + while (1) { + bool LocalChange = performScalarRepl(F); + if (!LocalChange) break; // No need to repromote if no scalarrepl + Changed = true; + LocalChange = performPromotion(F); + if (!LocalChange) break; // No need to re-scalarrepl if no promotion + } + + return Changed; +} + + +bool SROA::performPromotion(Function &F) { + std::vector Allocas; + DominatorTree &DT = getAnalysis(); + DominanceFrontier &DF = getAnalysis(); + + BasicBlock &BB = F.getEntryBlock(); // Get the entry node for the function + + bool Changed = false; + + while (1) { + Allocas.clear(); + + // Find allocas that are safe to promote, by looking at all instructions in + // the entry node + for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I) + if (AllocaInst *AI = dyn_cast(I)) // Is it an alloca? + if (isAllocaPromotable(AI)) + Allocas.push_back(AI); + + if (Allocas.empty()) break; + + PromoteMemToReg(Allocas, DT, DF); + NumPromoted += Allocas.size(); + Changed = true; + } + + return Changed; +} + +/// getNumSAElements - Return the number of elements in the specific struct or +/// array. +static uint64_t getNumSAElements(const Type *T) { + if (const StructType *ST = dyn_cast(T)) + return ST->getNumElements(); + return cast(T)->getNumElements(); +} + +// performScalarRepl - This algorithm is a simple worklist driven algorithm, +// which runs on all of the malloc/alloca instructions in the function, removing +// them if they are only used by getelementptr instructions. +// +bool SROA::performScalarRepl(Function &F) { + std::vector WorkList; + + // Scan the entry basic block, adding any alloca's and mallocs to the worklist + BasicBlock &BB = F.getEntryBlock(); + for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I) + if (AllocationInst *A = dyn_cast(I)) + WorkList.push_back(A); + + // Process the worklist + bool Changed = false; + while (!WorkList.empty()) { + AllocationInst *AI = WorkList.back(); + WorkList.pop_back(); + + // Handle dead allocas trivially. These can be formed by SROA'ing arrays + // with unused elements. + if (AI->use_empty()) { + AI->eraseFromParent(); + continue; + } + + // If this alloca is impossible for us to promote, reject it early. + if (AI->isArrayAllocation() || !AI->getAllocatedType()->isSized()) + continue; + + // Check to see if this allocation is only modified by a memcpy/memmove from + // a constant global. If this is the case, we can change all users to use + // the constant global instead. This is commonly produced by the CFE by + // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A' + // is only subsequently read. + if (Instruction *TheCopy = isOnlyCopiedFromConstantGlobal(AI)) { + DOUT << "Found alloca equal to global: " << *AI; + DOUT << " memcpy = " << *TheCopy; + Constant *TheSrc = cast(TheCopy->getOperand(2)); + AI->replaceAllUsesWith(ConstantExpr::getBitCast(TheSrc, AI->getType())); + TheCopy->eraseFromParent(); // Don't mutate the global. + AI->eraseFromParent(); + ++NumGlobals; + Changed = true; + continue; + } + + // Check to see if we can perform the core SROA transformation. We cannot + // transform the allocation instruction if it is an array allocation + // (allocations OF arrays are ok though), and an allocation of a scalar + // value cannot be decomposed at all. + uint64_t AllocaSize = TD->getTypeAllocSize(AI->getAllocatedType()); + + // Do not promote any struct whose size is too big. + if (AllocaSize > SRThreshold) continue; + + if ((isa(AI->getAllocatedType()) || + isa(AI->getAllocatedType())) && + // Do not promote any struct into more than "32" separate vars. + getNumSAElements(AI->getAllocatedType()) <= SRThreshold/4) { + // Check that all of the users of the allocation are capable of being + // transformed. + switch (isSafeAllocaToScalarRepl(AI)) { + default: assert(0 && "Unexpected value!"); + case 0: // Not safe to scalar replace. + break; + case 1: // Safe, but requires cleanup/canonicalizations first + CleanupAllocaUsers(AI); + // FALL THROUGH. + case 3: // Safe to scalar replace. + DoScalarReplacement(AI, WorkList); + Changed = true; + continue; + } + } + + // If we can turn this aggregate value (potentially with casts) into a + // simple scalar value that can be mem2reg'd into a register value. + // IsNotTrivial tracks whether this is something that mem2reg could have + // promoted itself. If so, we don't want to transform it needlessly. Note + // that we can't just check based on the type: the alloca may be of an i32 + // but that has pointer arithmetic to set byte 3 of it or something. + bool IsNotTrivial = false; + const Type *VectorTy = 0; + bool HadAVector = false; + if (CanConvertToScalar(AI, IsNotTrivial, VectorTy, HadAVector, + 0, unsigned(AllocaSize)) && IsNotTrivial) { + AllocaInst *NewAI; + // If we were able to find a vector type that can handle this with + // insert/extract elements, and if there was at least one use that had + // a vector type, promote this to a vector. We don't want to promote + // random stuff that doesn't use vectors (e.g. <9 x double>) because then + // we just get a lot of insert/extracts. If at least one vector is + // involved, then we probably really do have a union of vector/array. + if (VectorTy && isa(VectorTy) && HadAVector) { + DOUT << "CONVERT TO VECTOR: " << *AI << " TYPE = " << *VectorTy <<"\n"; + + // Create and insert the vector alloca. + NewAI = new AllocaInst(VectorTy, 0, "", AI->getParent()->begin()); + ConvertUsesToScalar(AI, NewAI, 0); + } else { + DOUT << "CONVERT TO SCALAR INTEGER: " << *AI << "\n"; + + // Create and insert the integer alloca. + const Type *NewTy = IntegerType::get(AllocaSize*8); + NewAI = new AllocaInst(NewTy, 0, "", AI->getParent()->begin()); + ConvertUsesToScalar(AI, NewAI, 0); + } + NewAI->takeName(AI); + AI->eraseFromParent(); + ++NumConverted; + Changed = true; + continue; + } + + // Otherwise, couldn't process this alloca. + } + + return Changed; +} + +/// DoScalarReplacement - This alloca satisfied the isSafeAllocaToScalarRepl +/// predicate, do SROA now. +void SROA::DoScalarReplacement(AllocationInst *AI, + std::vector &WorkList) { + DOUT << "Found inst to SROA: " << *AI; + SmallVector ElementAllocas; + if (const StructType *ST = dyn_cast(AI->getAllocatedType())) { + ElementAllocas.reserve(ST->getNumContainedTypes()); + for (unsigned i = 0, e = ST->getNumContainedTypes(); i != e; ++i) { + AllocaInst *NA = new AllocaInst(ST->getContainedType(i), 0, + AI->getAlignment(), + AI->getName() + "." + utostr(i), AI); + ElementAllocas.push_back(NA); + WorkList.push_back(NA); // Add to worklist for recursive processing + } + } else { + const ArrayType *AT = cast(AI->getAllocatedType()); + ElementAllocas.reserve(AT->getNumElements()); + const Type *ElTy = AT->getElementType(); + for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) { + AllocaInst *NA = new AllocaInst(ElTy, 0, AI->getAlignment(), + AI->getName() + "." + utostr(i), AI); + ElementAllocas.push_back(NA); + WorkList.push_back(NA); // Add to worklist for recursive processing + } + } + + // Now that we have created the alloca instructions that we want to use, + // expand the getelementptr instructions to use them. + // + while (!AI->use_empty()) { + Instruction *User = cast(AI->use_back()); + if (BitCastInst *BCInst = dyn_cast(User)) { + RewriteBitCastUserOfAlloca(BCInst, AI, ElementAllocas); + BCInst->eraseFromParent(); + continue; + } + + // Replace: + // %res = load { i32, i32 }* %alloc + // with: + // %load.0 = load i32* %alloc.0 + // %insert.0 insertvalue { i32, i32 } zeroinitializer, i32 %load.0, 0 + // %load.1 = load i32* %alloc.1 + // %insert = insertvalue { i32, i32 } %insert.0, i32 %load.1, 1 + // (Also works for arrays instead of structs) + if (LoadInst *LI = dyn_cast(User)) { + Value *Insert = UndefValue::get(LI->getType()); + for (unsigned i = 0, e = ElementAllocas.size(); i != e; ++i) { + Value *Load = new LoadInst(ElementAllocas[i], "load", LI); + Insert = InsertValueInst::Create(Insert, Load, i, "insert", LI); + } + LI->replaceAllUsesWith(Insert); + LI->eraseFromParent(); + continue; + } + + // Replace: + // store { i32, i32 } %val, { i32, i32 }* %alloc + // with: + // %val.0 = extractvalue { i32, i32 } %val, 0 + // store i32 %val.0, i32* %alloc.0 + // %val.1 = extractvalue { i32, i32 } %val, 1 + // store i32 %val.1, i32* %alloc.1 + // (Also works for arrays instead of structs) + if (StoreInst *SI = dyn_cast(User)) { + Value *Val = SI->getOperand(0); + for (unsigned i = 0, e = ElementAllocas.size(); i != e; ++i) { + Value *Extract = ExtractValueInst::Create(Val, i, Val->getName(), SI); + new StoreInst(Extract, ElementAllocas[i], SI); + } + SI->eraseFromParent(); + continue; + } + + GetElementPtrInst *GEPI = cast(User); + // We now know that the GEP is of the form: GEP , 0, + unsigned Idx = + (unsigned)cast(GEPI->getOperand(2))->getZExtValue(); + + assert(Idx < ElementAllocas.size() && "Index out of range?"); + AllocaInst *AllocaToUse = ElementAllocas[Idx]; + + Value *RepValue; + if (GEPI->getNumOperands() == 3) { + // Do not insert a new getelementptr instruction with zero indices, only + // to have it optimized out later. + RepValue = AllocaToUse; + } else { + // We are indexing deeply into the structure, so we still need a + // getelement ptr instruction to finish the indexing. This may be + // expanded itself once the worklist is rerun. + // + SmallVector NewArgs; + NewArgs.push_back(Constant::getNullValue(Type::Int32Ty)); + NewArgs.append(GEPI->op_begin()+3, GEPI->op_end()); + RepValue = GetElementPtrInst::Create(AllocaToUse, NewArgs.begin(), + NewArgs.end(), "", GEPI); + RepValue->takeName(GEPI); + } + + // If this GEP is to the start of the aggregate, check for memcpys. + if (Idx == 0 && GEPI->hasAllZeroIndices()) + RewriteBitCastUserOfAlloca(GEPI, AI, ElementAllocas); + + // Move all of the users over to the new GEP. + GEPI->replaceAllUsesWith(RepValue); + // Delete the old GEP + GEPI->eraseFromParent(); + } + + // Finally, delete the Alloca instruction + AI->eraseFromParent(); + NumReplaced++; +} + + +/// isSafeElementUse - Check to see if this use is an allowed use for a +/// getelementptr instruction of an array aggregate allocation. isFirstElt +/// indicates whether Ptr is known to the start of the aggregate. +/// +void SROA::isSafeElementUse(Value *Ptr, bool isFirstElt, AllocationInst *AI, + AllocaInfo &Info) { + for (Value::use_iterator I = Ptr->use_begin(), E = Ptr->use_end(); + I != E; ++I) { + Instruction *User = cast(*I); + switch (User->getOpcode()) { + case Instruction::Load: break; + case Instruction::Store: + // Store is ok if storing INTO the pointer, not storing the pointer + if (User->getOperand(0) == Ptr) return MarkUnsafe(Info); + break; + case Instruction::GetElementPtr: { + GetElementPtrInst *GEP = cast(User); + bool AreAllZeroIndices = isFirstElt; + if (GEP->getNumOperands() > 1) { + if (!isa(GEP->getOperand(1)) || + !cast(GEP->getOperand(1))->isZero()) + // Using pointer arithmetic to navigate the array. + return MarkUnsafe(Info); + + if (AreAllZeroIndices) + AreAllZeroIndices = GEP->hasAllZeroIndices(); + } + isSafeElementUse(GEP, AreAllZeroIndices, AI, Info); + if (Info.isUnsafe) return; + break; + } + case Instruction::BitCast: + if (isFirstElt) { + isSafeUseOfBitCastedAllocation(cast(User), AI, Info); + if (Info.isUnsafe) return; + break; + } + DOUT << " Transformation preventing inst: " << *User; + return MarkUnsafe(Info); + case Instruction::Call: + if (MemIntrinsic *MI = dyn_cast(User)) { + if (isFirstElt) { + isSafeMemIntrinsicOnAllocation(MI, AI, I.getOperandNo(), Info); + if (Info.isUnsafe) return; + break; + } + } + DOUT << " Transformation preventing inst: " << *User; + return MarkUnsafe(Info); + default: + DOUT << " Transformation preventing inst: " << *User; + return MarkUnsafe(Info); + } + } + return; // All users look ok :) +} + +/// AllUsersAreLoads - Return true if all users of this value are loads. +static bool AllUsersAreLoads(Value *Ptr) { + for (Value::use_iterator I = Ptr->use_begin(), E = Ptr->use_end(); + I != E; ++I) + if (cast(*I)->getOpcode() != Instruction::Load) + return false; + return true; +} + +/// isSafeUseOfAllocation - Check to see if this user is an allowed use for an +/// aggregate allocation. +/// +void SROA::isSafeUseOfAllocation(Instruction *User, AllocationInst *AI, + AllocaInfo &Info) { + if (BitCastInst *C = dyn_cast(User)) + return isSafeUseOfBitCastedAllocation(C, AI, Info); + + if (LoadInst *LI = dyn_cast(User)) + if (!LI->isVolatile()) + return;// Loads (returning a first class aggregrate) are always rewritable + + if (StoreInst *SI = dyn_cast(User)) + if (!SI->isVolatile() && SI->getOperand(0) != AI) + return;// Store is ok if storing INTO the pointer, not storing the pointer + + GetElementPtrInst *GEPI = dyn_cast(User); + if (GEPI == 0) + return MarkUnsafe(Info); + + gep_type_iterator I = gep_type_begin(GEPI), E = gep_type_end(GEPI); + + // The GEP is not safe to transform if not of the form "GEP , 0, ". + if (I == E || + I.getOperand() != Constant::getNullValue(I.getOperand()->getType())) { + return MarkUnsafe(Info); + } + + ++I; + if (I == E) return MarkUnsafe(Info); // ran out of GEP indices?? + + bool IsAllZeroIndices = true; + + // If the first index is a non-constant index into an array, see if we can + // handle it as a special case. + if (const ArrayType *AT = dyn_cast(*I)) { + if (!isa(I.getOperand())) { + IsAllZeroIndices = 0; + uint64_t NumElements = AT->getNumElements(); + + // If this is an array index and the index is not constant, we cannot + // promote... that is unless the array has exactly one or two elements in + // it, in which case we CAN promote it, but we have to canonicalize this + // out if this is the only problem. + if ((NumElements == 1 || NumElements == 2) && + AllUsersAreLoads(GEPI)) { + Info.needsCleanup = true; + return; // Canonicalization required! + } + return MarkUnsafe(Info); + } + } + + // Walk through the GEP type indices, checking the types that this indexes + // into. + for (; I != E; ++I) { + // Ignore struct elements, no extra checking needed for these. + if (isa(*I)) + continue; + + ConstantInt *IdxVal = dyn_cast(I.getOperand()); + if (!IdxVal) return MarkUnsafe(Info); + + // Are all indices still zero? + IsAllZeroIndices &= IdxVal->isZero(); + + if (const ArrayType *AT = dyn_cast(*I)) { + // This GEP indexes an array. Verify that this is an in-range constant + // integer. Specifically, consider A[0][i]. We cannot know that the user + // isn't doing invalid things like allowing i to index an out-of-range + // subscript that accesses A[1]. Because of this, we have to reject SROA + // of any accesses into structs where any of the components are variables. + if (IdxVal->getZExtValue() >= AT->getNumElements()) + return MarkUnsafe(Info); + } else if (const VectorType *VT = dyn_cast(*I)) { + if (IdxVal->getZExtValue() >= VT->getNumElements()) + return MarkUnsafe(Info); + } + } + + // If there are any non-simple uses of this getelementptr, make sure to reject + // them. + return isSafeElementUse(GEPI, IsAllZeroIndices, AI, Info); +} + +/// isSafeMemIntrinsicOnAllocation - Return true if the specified memory +/// intrinsic can be promoted by SROA. At this point, we know that the operand +/// of the memintrinsic is a pointer to the beginning of the allocation. +void SROA::isSafeMemIntrinsicOnAllocation(MemIntrinsic *MI, AllocationInst *AI, + unsigned OpNo, AllocaInfo &Info) { + // If not constant length, give up. + ConstantInt *Length = dyn_cast(MI->getLength()); + if (!Length) return MarkUnsafe(Info); + + // If not the whole aggregate, give up. + if (Length->getZExtValue() != + TD->getTypeAllocSize(AI->getType()->getElementType())) + return MarkUnsafe(Info); + + // We only know about memcpy/memset/memmove. + if (!isa(MI)) + return MarkUnsafe(Info); + + // Otherwise, we can transform it. Determine whether this is a memcpy/set + // into or out of the aggregate. + if (OpNo == 1) + Info.isMemCpyDst = true; + else { + assert(OpNo == 2); + Info.isMemCpySrc = true; + } +} + +/// isSafeUseOfBitCastedAllocation - Return true if all users of this bitcast +/// are +void SROA::isSafeUseOfBitCastedAllocation(BitCastInst *BC, AllocationInst *AI, + AllocaInfo &Info) { + for (Value::use_iterator UI = BC->use_begin(), E = BC->use_end(); + UI != E; ++UI) { + if (BitCastInst *BCU = dyn_cast(UI)) { + isSafeUseOfBitCastedAllocation(BCU, AI, Info); + } else if (MemIntrinsic *MI = dyn_cast(UI)) { + isSafeMemIntrinsicOnAllocation(MI, AI, UI.getOperandNo(), Info); + } else if (StoreInst *SI = dyn_cast(UI)) { + if (SI->isVolatile()) + return MarkUnsafe(Info); + + // If storing the entire alloca in one chunk through a bitcasted pointer + // to integer, we can transform it. This happens (for example) when you + // cast a {i32,i32}* to i64* and store through it. This is similar to the + // memcpy case and occurs in various "byval" cases and emulated memcpys. + if (isa(SI->getOperand(0)->getType()) && + TD->getTypeAllocSize(SI->getOperand(0)->getType()) == + TD->getTypeAllocSize(AI->getType()->getElementType())) { + Info.isMemCpyDst = true; + continue; + } + return MarkUnsafe(Info); + } else if (LoadInst *LI = dyn_cast(UI)) { + if (LI->isVolatile()) + return MarkUnsafe(Info); + + // If loading the entire alloca in one chunk through a bitcasted pointer + // to integer, we can transform it. This happens (for example) when you + // cast a {i32,i32}* to i64* and load through it. This is similar to the + // memcpy case and occurs in various "byval" cases and emulated memcpys. + if (isa(LI->getType()) && + TD->getTypeAllocSize(LI->getType()) == + TD->getTypeAllocSize(AI->getType()->getElementType())) { + Info.isMemCpySrc = true; + continue; + } + return MarkUnsafe(Info); + } else if (isa(UI)) { + // If one user is DbgInfoIntrinsic then check if all users are + // DbgInfoIntrinsics. + if (OnlyUsedByDbgInfoIntrinsics(BC)) { + Info.needsCleanup = true; + return; + } + else + MarkUnsafe(Info); + } + else { + return MarkUnsafe(Info); + } + if (Info.isUnsafe) return; + } +} + +/// RewriteBitCastUserOfAlloca - BCInst (transitively) bitcasts AI, or indexes +/// to its first element. Transform users of the cast to use the new values +/// instead. +void SROA::RewriteBitCastUserOfAlloca(Instruction *BCInst, AllocationInst *AI, + SmallVector &NewElts) { + Value::use_iterator UI = BCInst->use_begin(), UE = BCInst->use_end(); + while (UI != UE) { + Instruction *User = cast(*UI++); + if (BitCastInst *BCU = dyn_cast(User)) { + RewriteBitCastUserOfAlloca(BCU, AI, NewElts); + if (BCU->use_empty()) BCU->eraseFromParent(); + continue; + } + + if (MemIntrinsic *MI = dyn_cast(User)) { + // This must be memcpy/memmove/memset of the entire aggregate. + // Split into one per element. + RewriteMemIntrinUserOfAlloca(MI, BCInst, AI, NewElts); + continue; + } + + if (StoreInst *SI = dyn_cast(User)) { + // If this is a store of the entire alloca from an integer, rewrite it. + RewriteStoreUserOfWholeAlloca(SI, AI, NewElts); + continue; + } + + if (LoadInst *LI = dyn_cast(User)) { + // If this is a load of the entire alloca to an integer, rewrite it. + RewriteLoadUserOfWholeAlloca(LI, AI, NewElts); + continue; + } + + // Otherwise it must be some other user of a gep of the first pointer. Just + // leave these alone. + continue; + } +} + +/// RewriteMemIntrinUserOfAlloca - MI is a memcpy/memset/memmove from or to AI. +/// Rewrite it to copy or set the elements of the scalarized memory. +void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst, + AllocationInst *AI, + SmallVector &NewElts) { + + // If this is a memcpy/memmove, construct the other pointer as the + // appropriate type. The "Other" pointer is the pointer that goes to memory + // that doesn't have anything to do with the alloca that we are promoting. For + // memset, this Value* stays null. + Value *OtherPtr = 0; + unsigned MemAlignment = MI->getAlignment(); + if (MemTransferInst *MTI = dyn_cast(MI)) { // memmove/memcopy + if (BCInst == MTI->getRawDest()) + OtherPtr = MTI->getRawSource(); + else { + assert(BCInst == MTI->getRawSource()); + OtherPtr = MTI->getRawDest(); + } + } + + // If there is an other pointer, we want to convert it to the same pointer + // type as AI has, so we can GEP through it safely. + if (OtherPtr) { + // It is likely that OtherPtr is a bitcast, if so, remove it. + if (BitCastInst *BC = dyn_cast(OtherPtr)) + OtherPtr = BC->getOperand(0); + // All zero GEPs are effectively bitcasts. + if (GetElementPtrInst *GEP = dyn_cast(OtherPtr)) + if (GEP->hasAllZeroIndices()) + OtherPtr = GEP->getOperand(0); + + if (ConstantExpr *BCE = dyn_cast(OtherPtr)) + if (BCE->getOpcode() == Instruction::BitCast) + OtherPtr = BCE->getOperand(0); + + // If the pointer is not the right type, insert a bitcast to the right + // type. + if (OtherPtr->getType() != AI->getType()) + OtherPtr = new BitCastInst(OtherPtr, AI->getType(), OtherPtr->getName(), + MI); + } + + // Process each element of the aggregate. + Value *TheFn = MI->getOperand(0); + const Type *BytePtrTy = MI->getRawDest()->getType(); + bool SROADest = MI->getRawDest() == BCInst; + + Constant *Zero = Constant::getNullValue(Type::Int32Ty); + + for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { + // If this is a memcpy/memmove, emit a GEP of the other element address. + Value *OtherElt = 0; + unsigned OtherEltAlign = MemAlignment; + + if (OtherPtr) { + Value *Idx[2] = { Zero, ConstantInt::get(Type::Int32Ty, i) }; + OtherElt = GetElementPtrInst::Create(OtherPtr, Idx, Idx + 2, + OtherPtr->getNameStr()+"."+utostr(i), + MI); + uint64_t EltOffset; + const PointerType *OtherPtrTy = cast(OtherPtr->getType()); + if (const StructType *ST = + dyn_cast(OtherPtrTy->getElementType())) { + EltOffset = TD->getStructLayout(ST)->getElementOffset(i); + } else { + const Type *EltTy = + cast(OtherPtr->getType())->getElementType(); + EltOffset = TD->getTypeAllocSize(EltTy)*i; + } + + // The alignment of the other pointer is the guaranteed alignment of the + // element, which is affected by both the known alignment of the whole + // mem intrinsic and the alignment of the element. If the alignment of + // the memcpy (f.e.) is 32 but the element is at a 4-byte offset, then the + // known alignment is just 4 bytes. + OtherEltAlign = (unsigned)MinAlign(OtherEltAlign, EltOffset); + } + + Value *EltPtr = NewElts[i]; + const Type *EltTy = cast(EltPtr->getType())->getElementType(); + + // If we got down to a scalar, insert a load or store as appropriate. + if (EltTy->isSingleValueType()) { + if (isa(MI)) { + if (SROADest) { + // From Other to Alloca. + Value *Elt = new LoadInst(OtherElt, "tmp", false, OtherEltAlign, MI); + new StoreInst(Elt, EltPtr, MI); + } else { + // From Alloca to Other. + Value *Elt = new LoadInst(EltPtr, "tmp", MI); + new StoreInst(Elt, OtherElt, false, OtherEltAlign, MI); + } + continue; + } + assert(isa(MI)); + + // If the stored element is zero (common case), just store a null + // constant. + Constant *StoreVal; + if (ConstantInt *CI = dyn_cast(MI->getOperand(2))) { + if (CI->isZero()) { + StoreVal = Constant::getNullValue(EltTy); // 0.0, null, 0, <0,0> + } else { + // If EltTy is a vector type, get the element type. + const Type *ValTy = EltTy; + if (const VectorType *VTy = dyn_cast(ValTy)) + ValTy = VTy->getElementType(); + + // Construct an integer with the right value. + unsigned EltSize = TD->getTypeSizeInBits(ValTy); + APInt OneVal(EltSize, CI->getZExtValue()); + APInt TotalVal(OneVal); + // Set each byte. + for (unsigned i = 0; 8*i < EltSize; ++i) { + TotalVal = TotalVal.shl(8); + TotalVal |= OneVal; + } + + // Convert the integer value to the appropriate type. + StoreVal = ConstantInt::get(TotalVal); + if (isa(ValTy)) + StoreVal = ConstantExpr::getIntToPtr(StoreVal, ValTy); + else if (ValTy->isFloatingPoint()) + StoreVal = ConstantExpr::getBitCast(StoreVal, ValTy); + assert(StoreVal->getType() == ValTy && "Type mismatch!"); + + // If the requested value was a vector constant, create it. + if (EltTy != ValTy) { + unsigned NumElts = cast(ValTy)->getNumElements(); + SmallVector Elts(NumElts, StoreVal); + StoreVal = ConstantVector::get(&Elts[0], NumElts); + } + } + new StoreInst(StoreVal, EltPtr, MI); + continue; + } + // Otherwise, if we're storing a byte variable, use a memset call for + // this element. + } + + // Cast the element pointer to BytePtrTy. + if (EltPtr->getType() != BytePtrTy) + EltPtr = new BitCastInst(EltPtr, BytePtrTy, EltPtr->getNameStr(), MI); + + // Cast the other pointer (if we have one) to BytePtrTy. + if (OtherElt && OtherElt->getType() != BytePtrTy) + OtherElt = new BitCastInst(OtherElt, BytePtrTy,OtherElt->getNameStr(), + MI); + + unsigned EltSize = TD->getTypeAllocSize(EltTy); + + // Finally, insert the meminst for this element. + if (isa(MI)) { + Value *Ops[] = { + SROADest ? EltPtr : OtherElt, // Dest ptr + SROADest ? OtherElt : EltPtr, // Src ptr + ConstantInt::get(MI->getOperand(3)->getType(), EltSize), // Size + ConstantInt::get(Type::Int32Ty, OtherEltAlign) // Align + }; + CallInst::Create(TheFn, Ops, Ops + 4, "", MI); + } else { + assert(isa(MI)); + Value *Ops[] = { + EltPtr, MI->getOperand(2), // Dest, Value, + ConstantInt::get(MI->getOperand(3)->getType(), EltSize), // Size + Zero // Align + }; + CallInst::Create(TheFn, Ops, Ops + 4, "", MI); + } + } + MI->eraseFromParent(); +} + +/// RewriteStoreUserOfWholeAlloca - We found an store of an integer that +/// overwrites the entire allocation. Extract out the pieces of the stored +/// integer and store them individually. +void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, + AllocationInst *AI, + SmallVector &NewElts){ + // Extract each element out of the integer according to its structure offset + // and store the element value to the individual alloca. + Value *SrcVal = SI->getOperand(0); + const Type *AllocaEltTy = AI->getType()->getElementType(); + uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy); + + // If this isn't a store of an integer to the whole alloca, it may be a store + // to the first element. Just ignore the store in this case and normal SROA + // will handle it. + if (!isa(SrcVal->getType()) || + TD->getTypeAllocSizeInBits(SrcVal->getType()) != AllocaSizeBits) + return; + // Handle tail padding by extending the operand + if (TD->getTypeSizeInBits(SrcVal->getType()) != AllocaSizeBits) + SrcVal = new ZExtInst(SrcVal, IntegerType::get(AllocaSizeBits), "", SI); + + DOUT << "PROMOTING STORE TO WHOLE ALLOCA: " << *AI << *SI; + + // There are two forms here: AI could be an array or struct. Both cases + // have different ways to compute the element offset. + if (const StructType *EltSTy = dyn_cast(AllocaEltTy)) { + const StructLayout *Layout = TD->getStructLayout(EltSTy); + + for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { + // Get the number of bits to shift SrcVal to get the value. + const Type *FieldTy = EltSTy->getElementType(i); + uint64_t Shift = Layout->getElementOffsetInBits(i); + + if (TD->isBigEndian()) + Shift = AllocaSizeBits-Shift-TD->getTypeAllocSizeInBits(FieldTy); + + Value *EltVal = SrcVal; + if (Shift) { + Value *ShiftVal = ConstantInt::get(EltVal->getType(), Shift); + EltVal = BinaryOperator::CreateLShr(EltVal, ShiftVal, + "sroa.store.elt", SI); + } + + // Truncate down to an integer of the right size. + uint64_t FieldSizeBits = TD->getTypeSizeInBits(FieldTy); + + // Ignore zero sized fields like {}, they obviously contain no data. + if (FieldSizeBits == 0) continue; + + if (FieldSizeBits != AllocaSizeBits) + EltVal = new TruncInst(EltVal, IntegerType::get(FieldSizeBits), "", SI); + Value *DestField = NewElts[i]; + if (EltVal->getType() == FieldTy) { + // Storing to an integer field of this size, just do it. + } else if (FieldTy->isFloatingPoint() || isa(FieldTy)) { + // Bitcast to the right element type (for fp/vector values). + EltVal = new BitCastInst(EltVal, FieldTy, "", SI); + } else { + // Otherwise, bitcast the dest pointer (for aggregates). + DestField = new BitCastInst(DestField, + PointerType::getUnqual(EltVal->getType()), + "", SI); + } + new StoreInst(EltVal, DestField, SI); + } + + } else { + const ArrayType *ATy = cast(AllocaEltTy); + const Type *ArrayEltTy = ATy->getElementType(); + uint64_t ElementOffset = TD->getTypeAllocSizeInBits(ArrayEltTy); + uint64_t ElementSizeBits = TD->getTypeSizeInBits(ArrayEltTy); + + uint64_t Shift; + + if (TD->isBigEndian()) + Shift = AllocaSizeBits-ElementOffset; + else + Shift = 0; + + for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { + // Ignore zero sized fields like {}, they obviously contain no data. + if (ElementSizeBits == 0) continue; + + Value *EltVal = SrcVal; + if (Shift) { + Value *ShiftVal = ConstantInt::get(EltVal->getType(), Shift); + EltVal = BinaryOperator::CreateLShr(EltVal, ShiftVal, + "sroa.store.elt", SI); + } + + // Truncate down to an integer of the right size. + if (ElementSizeBits != AllocaSizeBits) + EltVal = new TruncInst(EltVal, IntegerType::get(ElementSizeBits),"",SI); + Value *DestField = NewElts[i]; + if (EltVal->getType() == ArrayEltTy) { + // Storing to an integer field of this size, just do it. + } else if (ArrayEltTy->isFloatingPoint() || isa(ArrayEltTy)) { + // Bitcast to the right element type (for fp/vector values). + EltVal = new BitCastInst(EltVal, ArrayEltTy, "", SI); + } else { + // Otherwise, bitcast the dest pointer (for aggregates). + DestField = new BitCastInst(DestField, + PointerType::getUnqual(EltVal->getType()), + "", SI); + } + new StoreInst(EltVal, DestField, SI); + + if (TD->isBigEndian()) + Shift -= ElementOffset; + else + Shift += ElementOffset; + } + } + + SI->eraseFromParent(); +} + +/// RewriteLoadUserOfWholeAlloca - We found an load of the entire allocation to +/// an integer. Load the individual pieces to form the aggregate value. +void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocationInst *AI, + SmallVector &NewElts) { + // Extract each element out of the NewElts according to its structure offset + // and form the result value. + const Type *AllocaEltTy = AI->getType()->getElementType(); + uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy); + + // If this isn't a load of the whole alloca to an integer, it may be a load + // of the first element. Just ignore the load in this case and normal SROA + // will handle it. + if (!isa(LI->getType()) || + TD->getTypeAllocSizeInBits(LI->getType()) != AllocaSizeBits) + return; + + DOUT << "PROMOTING LOAD OF WHOLE ALLOCA: " << *AI << *LI; + + // There are two forms here: AI could be an array or struct. Both cases + // have different ways to compute the element offset. + const StructLayout *Layout = 0; + uint64_t ArrayEltBitOffset = 0; + if (const StructType *EltSTy = dyn_cast(AllocaEltTy)) { + Layout = TD->getStructLayout(EltSTy); + } else { + const Type *ArrayEltTy = cast(AllocaEltTy)->getElementType(); + ArrayEltBitOffset = TD->getTypeAllocSizeInBits(ArrayEltTy); + } + + Value *ResultVal = Constant::getNullValue(IntegerType::get(AllocaSizeBits)); + + for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { + // Load the value from the alloca. If the NewElt is an aggregate, cast + // the pointer to an integer of the same size before doing the load. + Value *SrcField = NewElts[i]; + const Type *FieldTy = + cast(SrcField->getType())->getElementType(); + uint64_t FieldSizeBits = TD->getTypeSizeInBits(FieldTy); + + // Ignore zero sized fields like {}, they obviously contain no data. + if (FieldSizeBits == 0) continue; + + const IntegerType *FieldIntTy = IntegerType::get(FieldSizeBits); + if (!isa(FieldTy) && !FieldTy->isFloatingPoint() && + !isa(FieldTy)) + SrcField = new BitCastInst(SrcField, PointerType::getUnqual(FieldIntTy), + "", LI); + SrcField = new LoadInst(SrcField, "sroa.load.elt", LI); + + // If SrcField is a fp or vector of the right size but that isn't an + // integer type, bitcast to an integer so we can shift it. + if (SrcField->getType() != FieldIntTy) + SrcField = new BitCastInst(SrcField, FieldIntTy, "", LI); + + // Zero extend the field to be the same size as the final alloca so that + // we can shift and insert it. + if (SrcField->getType() != ResultVal->getType()) + SrcField = new ZExtInst(SrcField, ResultVal->getType(), "", LI); + + // Determine the number of bits to shift SrcField. + uint64_t Shift; + if (Layout) // Struct case. + Shift = Layout->getElementOffsetInBits(i); + else // Array case. + Shift = i*ArrayEltBitOffset; + + if (TD->isBigEndian()) + Shift = AllocaSizeBits-Shift-FieldIntTy->getBitWidth(); + + if (Shift) { + Value *ShiftVal = ConstantInt::get(SrcField->getType(), Shift); + SrcField = BinaryOperator::CreateShl(SrcField, ShiftVal, "", LI); + } + + ResultVal = BinaryOperator::CreateOr(SrcField, ResultVal, "", LI); + } + + // Handle tail padding by truncating the result + if (TD->getTypeSizeInBits(LI->getType()) != AllocaSizeBits) + ResultVal = new TruncInst(ResultVal, LI->getType(), "", LI); + + LI->replaceAllUsesWith(ResultVal); + LI->eraseFromParent(); +} + + +/// HasPadding - Return true if the specified type has any structure or +/// alignment padding, false otherwise. +static bool HasPadding(const Type *Ty, const TargetData &TD) { + if (const StructType *STy = dyn_cast(Ty)) { + const StructLayout *SL = TD.getStructLayout(STy); + unsigned PrevFieldBitOffset = 0; + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { + unsigned FieldBitOffset = SL->getElementOffsetInBits(i); + + // Padding in sub-elements? + if (HasPadding(STy->getElementType(i), TD)) + return true; + + // Check to see if there is any padding between this element and the + // previous one. + if (i) { + unsigned PrevFieldEnd = + PrevFieldBitOffset+TD.getTypeSizeInBits(STy->getElementType(i-1)); + if (PrevFieldEnd < FieldBitOffset) + return true; + } + + PrevFieldBitOffset = FieldBitOffset; + } + + // Check for tail padding. + if (unsigned EltCount = STy->getNumElements()) { + unsigned PrevFieldEnd = PrevFieldBitOffset + + TD.getTypeSizeInBits(STy->getElementType(EltCount-1)); + if (PrevFieldEnd < SL->getSizeInBits()) + return true; + } + + } else if (const ArrayType *ATy = dyn_cast(Ty)) { + return HasPadding(ATy->getElementType(), TD); + } else if (const VectorType *VTy = dyn_cast(Ty)) { + return HasPadding(VTy->getElementType(), TD); + } + return TD.getTypeSizeInBits(Ty) != TD.getTypeAllocSizeInBits(Ty); +} + +/// isSafeStructAllocaToScalarRepl - Check to see if the specified allocation of +/// an aggregate can be broken down into elements. Return 0 if not, 3 if safe, +/// or 1 if safe after canonicalization has been performed. +/// +int SROA::isSafeAllocaToScalarRepl(AllocationInst *AI) { + // Loop over the use list of the alloca. We can only transform it if all of + // the users are safe to transform. + AllocaInfo Info; + + for (Value::use_iterator I = AI->use_begin(), E = AI->use_end(); + I != E; ++I) { + isSafeUseOfAllocation(cast(*I), AI, Info); + if (Info.isUnsafe) { + DOUT << "Cannot transform: " << *AI << " due to user: " << **I; + return 0; + } + } + + // Okay, we know all the users are promotable. If the aggregate is a memcpy + // source and destination, we have to be careful. In particular, the memcpy + // could be moving around elements that live in structure padding of the LLVM + // types, but may actually be used. In these cases, we refuse to promote the + // struct. + if (Info.isMemCpySrc && Info.isMemCpyDst && + HasPadding(AI->getType()->getElementType(), *TD)) + return 0; + + // If we require cleanup, return 1, otherwise return 3. + return Info.needsCleanup ? 1 : 3; +} + +/// CleanupGEP - GEP is used by an Alloca, which can be prompted after the GEP +/// is canonicalized here. +void SROA::CleanupGEP(GetElementPtrInst *GEPI) { + gep_type_iterator I = gep_type_begin(GEPI); + ++I; + + const ArrayType *AT = dyn_cast(*I); + if (!AT) + return; + + uint64_t NumElements = AT->getNumElements(); + + if (isa(I.getOperand())) + return; + + if (NumElements == 1) { + GEPI->setOperand(2, Constant::getNullValue(Type::Int32Ty)); + return; + } + + assert(NumElements == 2 && "Unhandled case!"); + // All users of the GEP must be loads. At each use of the GEP, insert + // two loads of the appropriate indexed GEP and select between them. + Value *IsOne = new ICmpInst(ICmpInst::ICMP_NE, I.getOperand(), + Constant::getNullValue(I.getOperand()->getType()), + "isone", GEPI); + // Insert the new GEP instructions, which are properly indexed. + SmallVector Indices(GEPI->op_begin()+1, GEPI->op_end()); + Indices[1] = Constant::getNullValue(Type::Int32Ty); + Value *ZeroIdx = GetElementPtrInst::Create(GEPI->getOperand(0), + Indices.begin(), + Indices.end(), + GEPI->getName()+".0", GEPI); + Indices[1] = ConstantInt::get(Type::Int32Ty, 1); + Value *OneIdx = GetElementPtrInst::Create(GEPI->getOperand(0), + Indices.begin(), + Indices.end(), + GEPI->getName()+".1", GEPI); + // Replace all loads of the variable index GEP with loads from both + // indexes and a select. + while (!GEPI->use_empty()) { + LoadInst *LI = cast(GEPI->use_back()); + Value *Zero = new LoadInst(ZeroIdx, LI->getName()+".0", LI); + Value *One = new LoadInst(OneIdx , LI->getName()+".1", LI); + Value *R = SelectInst::Create(IsOne, One, Zero, LI->getName(), LI); + LI->replaceAllUsesWith(R); + LI->eraseFromParent(); + } + GEPI->eraseFromParent(); +} + + +/// CleanupAllocaUsers - If SROA reported that it can promote the specified +/// allocation, but only if cleaned up, perform the cleanups required. +void SROA::CleanupAllocaUsers(AllocationInst *AI) { + // At this point, we know that the end result will be SROA'd and promoted, so + // we can insert ugly code if required so long as sroa+mem2reg will clean it + // up. + for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); + UI != E; ) { + User *U = *UI++; + if (GetElementPtrInst *GEPI = dyn_cast(U)) + CleanupGEP(GEPI); + else if (Instruction *I = dyn_cast(U)) { + SmallVector DbgInUses; + if (!isa(I) && OnlyUsedByDbgInfoIntrinsics(I, &DbgInUses)) { + // Safe to remove debug info uses. + while (!DbgInUses.empty()) { + DbgInfoIntrinsic *DI = DbgInUses.back(); DbgInUses.pop_back(); + DI->eraseFromParent(); + } + I->eraseFromParent(); + } + } + } +} + +/// MergeInType - Add the 'In' type to the accumulated type (Accum) so far at +/// the offset specified by Offset (which is specified in bytes). +/// +/// There are two cases we handle here: +/// 1) A union of vector types of the same size and potentially its elements. +/// Here we turn element accesses into insert/extract element operations. +/// This promotes a <4 x float> with a store of float to the third element +/// into a <4 x float> that uses insert element. +/// 2) A fully general blob of memory, which we turn into some (potentially +/// large) integer type with extract and insert operations where the loads +/// and stores would mutate the memory. +static void MergeInType(const Type *In, uint64_t Offset, const Type *&VecTy, + unsigned AllocaSize, const TargetData &TD) { + // If this could be contributing to a vector, analyze it. + if (VecTy != Type::VoidTy) { // either null or a vector type. + + // If the In type is a vector that is the same size as the alloca, see if it + // matches the existing VecTy. + if (const VectorType *VInTy = dyn_cast(In)) { + if (VInTy->getBitWidth()/8 == AllocaSize && Offset == 0) { + // If we're storing/loading a vector of the right size, allow it as a + // vector. If this the first vector we see, remember the type so that + // we know the element size. + if (VecTy == 0) + VecTy = VInTy; + return; + } + } else if (In == Type::FloatTy || In == Type::DoubleTy || + (isa(In) && In->getPrimitiveSizeInBits() >= 8 && + isPowerOf2_32(In->getPrimitiveSizeInBits()))) { + // If we're accessing something that could be an element of a vector, see + // if the implied vector agrees with what we already have and if Offset is + // compatible with it. + unsigned EltSize = In->getPrimitiveSizeInBits()/8; + if (Offset % EltSize == 0 && + AllocaSize % EltSize == 0 && + (VecTy == 0 || + cast(VecTy)->getElementType() + ->getPrimitiveSizeInBits()/8 == EltSize)) { + if (VecTy == 0) + VecTy = VectorType::get(In, AllocaSize/EltSize); + return; + } + } + } + + // Otherwise, we have a case that we can't handle with an optimized vector + // form. We can still turn this into a large integer. + VecTy = Type::VoidTy; +} + +/// CanConvertToScalar - V is a pointer. If we can convert the pointee and all +/// its accesses to use a to single vector type, return true, and set VecTy to +/// the new type. If we could convert the alloca into a single promotable +/// integer, return true but set VecTy to VoidTy. Further, if the use is not a +/// completely trivial use that mem2reg could promote, set IsNotTrivial. Offset +/// is the current offset from the base of the alloca being analyzed. +/// +/// If we see at least one access to the value that is as a vector type, set the +/// SawVec flag. +/// +bool SROA::CanConvertToScalar(Value *V, bool &IsNotTrivial, const Type *&VecTy, + bool &SawVec, uint64_t Offset, + unsigned AllocaSize) { + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) { + Instruction *User = cast(*UI); + + if (LoadInst *LI = dyn_cast(User)) { + // Don't break volatile loads. + if (LI->isVolatile()) + return false; + MergeInType(LI->getType(), Offset, VecTy, AllocaSize, *TD); + SawVec |= isa(LI->getType()); + continue; + } + + if (StoreInst *SI = dyn_cast(User)) { + // Storing the pointer, not into the value? + if (SI->getOperand(0) == V || SI->isVolatile()) return 0; + MergeInType(SI->getOperand(0)->getType(), Offset, VecTy, AllocaSize, *TD); + SawVec |= isa(SI->getOperand(0)->getType()); + continue; + } + + if (BitCastInst *BCI = dyn_cast(User)) { + if (!CanConvertToScalar(BCI, IsNotTrivial, VecTy, SawVec, Offset, + AllocaSize)) + return false; + IsNotTrivial = true; + continue; + } + + if (GetElementPtrInst *GEP = dyn_cast(User)) { + // If this is a GEP with a variable indices, we can't handle it. + if (!GEP->hasAllConstantIndices()) + return false; + + // Compute the offset that this GEP adds to the pointer. + SmallVector Indices(GEP->op_begin()+1, GEP->op_end()); + uint64_t GEPOffset = TD->getIndexedOffset(GEP->getOperand(0)->getType(), + &Indices[0], Indices.size()); + // See if all uses can be converted. + if (!CanConvertToScalar(GEP, IsNotTrivial, VecTy, SawVec,Offset+GEPOffset, + AllocaSize)) + return false; + IsNotTrivial = true; + continue; + } + + // If this is a constant sized memset of a constant value (e.g. 0) we can + // handle it. + if (MemSetInst *MSI = dyn_cast(User)) { + // Store of constant value and constant size. + if (isa(MSI->getValue()) && + isa(MSI->getLength())) { + IsNotTrivial = true; + continue; + } + } + + // If this is a memcpy or memmove into or out of the whole allocation, we + // can handle it like a load or store of the scalar type. + if (MemTransferInst *MTI = dyn_cast(User)) { + if (ConstantInt *Len = dyn_cast(MTI->getLength())) + if (Len->getZExtValue() == AllocaSize && Offset == 0) { + IsNotTrivial = true; + continue; + } + } + + // Ignore dbg intrinsic. + if (isa(User)) + continue; + + // Otherwise, we cannot handle this! + return false; + } + + return true; +} + + +/// ConvertUsesToScalar - Convert all of the users of Ptr to use the new alloca +/// directly. This happens when we are converting an "integer union" to a +/// single integer scalar, or when we are converting a "vector union" to a +/// vector with insert/extractelement instructions. +/// +/// Offset is an offset from the original alloca, in bits that need to be +/// shifted to the right. By the end of this, there should be no uses of Ptr. +void SROA::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset) { + while (!Ptr->use_empty()) { + Instruction *User = cast(Ptr->use_back()); + + if (BitCastInst *CI = dyn_cast(User)) { + ConvertUsesToScalar(CI, NewAI, Offset); + CI->eraseFromParent(); + continue; + } + + if (GetElementPtrInst *GEP = dyn_cast(User)) { + // Compute the offset that this GEP adds to the pointer. + SmallVector Indices(GEP->op_begin()+1, GEP->op_end()); + uint64_t GEPOffset = TD->getIndexedOffset(GEP->getOperand(0)->getType(), + &Indices[0], Indices.size()); + ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8); + GEP->eraseFromParent(); + continue; + } + + IRBuilder<> Builder(User->getParent(), User); + + if (LoadInst *LI = dyn_cast(User)) { + // The load is a bit extract from NewAI shifted right by Offset bits. + Value *LoadedVal = Builder.CreateLoad(NewAI, "tmp"); + Value *NewLoadVal + = ConvertScalar_ExtractValue(LoadedVal, LI->getType(), Offset, Builder); + LI->replaceAllUsesWith(NewLoadVal); + LI->eraseFromParent(); + continue; + } + + if (StoreInst *SI = dyn_cast(User)) { + assert(SI->getOperand(0) != Ptr && "Consistency error!"); + Value *Old = Builder.CreateLoad(NewAI, (NewAI->getName()+".in").c_str()); + Value *New = ConvertScalar_InsertValue(SI->getOperand(0), Old, Offset, + Builder); + Builder.CreateStore(New, NewAI); + SI->eraseFromParent(); + continue; + } + + // If this is a constant sized memset of a constant value (e.g. 0) we can + // transform it into a store of the expanded constant value. + if (MemSetInst *MSI = dyn_cast(User)) { + assert(MSI->getRawDest() == Ptr && "Consistency error!"); + unsigned NumBytes = cast(MSI->getLength())->getZExtValue(); + if (NumBytes != 0) { + unsigned Val = cast(MSI->getValue())->getZExtValue(); + + // Compute the value replicated the right number of times. + APInt APVal(NumBytes*8, Val); + + // Splat the value if non-zero. + if (Val) + for (unsigned i = 1; i != NumBytes; ++i) + APVal |= APVal << 8; + + Value *Old = Builder.CreateLoad(NewAI, (NewAI->getName()+".in").c_str()); + Value *New = ConvertScalar_InsertValue(ConstantInt::get(APVal), Old, + Offset, Builder); + Builder.CreateStore(New, NewAI); + } + MSI->eraseFromParent(); + continue; + } + + // If this is a memcpy or memmove into or out of the whole allocation, we + // can handle it like a load or store of the scalar type. + if (MemTransferInst *MTI = dyn_cast(User)) { + assert(Offset == 0 && "must be store to start of alloca"); + + // If the source and destination are both to the same alloca, then this is + // a noop copy-to-self, just delete it. Otherwise, emit a load and store + // as appropriate. + AllocaInst *OrigAI = cast(Ptr->getUnderlyingObject()); + + if (MTI->getSource()->getUnderlyingObject() != OrigAI) { + // Dest must be OrigAI, change this to be a load from the original + // pointer (bitcasted), then a store to our new alloca. + assert(MTI->getRawDest() == Ptr && "Neither use is of pointer?"); + Value *SrcPtr = MTI->getSource(); + SrcPtr = Builder.CreateBitCast(SrcPtr, NewAI->getType()); + + LoadInst *SrcVal = Builder.CreateLoad(SrcPtr, "srcval"); + SrcVal->setAlignment(MTI->getAlignment()); + Builder.CreateStore(SrcVal, NewAI); + } else if (MTI->getDest()->getUnderlyingObject() != OrigAI) { + // Src must be OrigAI, change this to be a load from NewAI then a store + // through the original dest pointer (bitcasted). + assert(MTI->getRawSource() == Ptr && "Neither use is of pointer?"); + LoadInst *SrcVal = Builder.CreateLoad(NewAI, "srcval"); + + Value *DstPtr = Builder.CreateBitCast(MTI->getDest(), NewAI->getType()); + StoreInst *NewStore = Builder.CreateStore(SrcVal, DstPtr); + NewStore->setAlignment(MTI->getAlignment()); + } else { + // Noop transfer. Src == Dst + } + + + MTI->eraseFromParent(); + continue; + } + + // If user is a dbg info intrinsic then it is safe to remove it. + if (isa(User)) { + User->eraseFromParent(); + continue; + } + + assert(0 && "Unsupported operation!"); + abort(); + } +} + +/// ConvertScalar_ExtractValue - Extract a value of type ToType from an integer +/// or vector value FromVal, extracting the bits from the offset specified by +/// Offset. This returns the value, which is of type ToType. +/// +/// This happens when we are converting an "integer union" to a single +/// integer scalar, or when we are converting a "vector union" to a vector with +/// insert/extractelement instructions. +/// +/// Offset is an offset from the original alloca, in bits that need to be +/// shifted to the right. +Value *SROA::ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType, + uint64_t Offset, IRBuilder<> &Builder) { + // If the load is of the whole new alloca, no conversion is needed. + if (FromVal->getType() == ToType && Offset == 0) + return FromVal; + + // If the result alloca is a vector type, this is either an element + // access or a bitcast to another vector type of the same size. + if (const VectorType *VTy = dyn_cast(FromVal->getType())) { + if (isa(ToType)) + return Builder.CreateBitCast(FromVal, ToType, "tmp"); + + // Otherwise it must be an element access. + unsigned Elt = 0; + if (Offset) { + unsigned EltSize = TD->getTypeAllocSizeInBits(VTy->getElementType()); + Elt = Offset/EltSize; + assert(EltSize*Elt == Offset && "Invalid modulus in validity checking"); + } + // Return the element extracted out of it. + Value *V = Builder.CreateExtractElement(FromVal, + ConstantInt::get(Type::Int32Ty,Elt), + "tmp"); + if (V->getType() != ToType) + V = Builder.CreateBitCast(V, ToType, "tmp"); + return V; + } + + // If ToType is a first class aggregate, extract out each of the pieces and + // use insertvalue's to form the FCA. + if (const StructType *ST = dyn_cast(ToType)) { + const StructLayout &Layout = *TD->getStructLayout(ST); + Value *Res = UndefValue::get(ST); + for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) { + Value *Elt = ConvertScalar_ExtractValue(FromVal, ST->getElementType(i), + Offset+Layout.getElementOffsetInBits(i), + Builder); + Res = Builder.CreateInsertValue(Res, Elt, i, "tmp"); + } + return Res; + } + + if (const ArrayType *AT = dyn_cast(ToType)) { + uint64_t EltSize = TD->getTypeAllocSizeInBits(AT->getElementType()); + Value *Res = UndefValue::get(AT); + for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) { + Value *Elt = ConvertScalar_ExtractValue(FromVal, AT->getElementType(), + Offset+i*EltSize, Builder); + Res = Builder.CreateInsertValue(Res, Elt, i, "tmp"); + } + return Res; + } + + // Otherwise, this must be a union that was converted to an integer value. + const IntegerType *NTy = cast(FromVal->getType()); + + // If this is a big-endian system and the load is narrower than the + // full alloca type, we need to do a shift to get the right bits. + int ShAmt = 0; + if (TD->isBigEndian()) { + // On big-endian machines, the lowest bit is stored at the bit offset + // from the pointer given by getTypeStoreSizeInBits. This matters for + // integers with a bitwidth that is not a multiple of 8. + ShAmt = TD->getTypeStoreSizeInBits(NTy) - + TD->getTypeStoreSizeInBits(ToType) - Offset; + } else { + ShAmt = Offset; + } + + // Note: we support negative bitwidths (with shl) which are not defined. + // We do this to support (f.e.) loads off the end of a structure where + // only some bits are used. + if (ShAmt > 0 && (unsigned)ShAmt < NTy->getBitWidth()) + FromVal = Builder.CreateLShr(FromVal, ConstantInt::get(FromVal->getType(), + ShAmt), "tmp"); + else if (ShAmt < 0 && (unsigned)-ShAmt < NTy->getBitWidth()) + FromVal = Builder.CreateShl(FromVal, ConstantInt::get(FromVal->getType(), + -ShAmt), "tmp"); + + // Finally, unconditionally truncate the integer to the right width. + unsigned LIBitWidth = TD->getTypeSizeInBits(ToType); + if (LIBitWidth < NTy->getBitWidth()) + FromVal = Builder.CreateTrunc(FromVal, IntegerType::get(LIBitWidth), "tmp"); + else if (LIBitWidth > NTy->getBitWidth()) + FromVal = Builder.CreateZExt(FromVal, IntegerType::get(LIBitWidth), "tmp"); + + // If the result is an integer, this is a trunc or bitcast. + if (isa(ToType)) { + // Should be done. + } else if (ToType->isFloatingPoint() || isa(ToType)) { + // Just do a bitcast, we know the sizes match up. + FromVal = Builder.CreateBitCast(FromVal, ToType, "tmp"); + } else { + // Otherwise must be a pointer. + FromVal = Builder.CreateIntToPtr(FromVal, ToType, "tmp"); + } + assert(FromVal->getType() == ToType && "Didn't convert right?"); + return FromVal; +} + + +/// ConvertScalar_InsertValue - Insert the value "SV" into the existing integer +/// or vector value "Old" at the offset specified by Offset. +/// +/// This happens when we are converting an "integer union" to a +/// single integer scalar, or when we are converting a "vector union" to a +/// vector with insert/extractelement instructions. +/// +/// Offset is an offset from the original alloca, in bits that need to be +/// shifted to the right. +Value *SROA::ConvertScalar_InsertValue(Value *SV, Value *Old, + uint64_t Offset, IRBuilder<> &Builder) { + + // Convert the stored type to the actual type, shift it left to insert + // then 'or' into place. + const Type *AllocaType = Old->getType(); + + if (const VectorType *VTy = dyn_cast(AllocaType)) { + uint64_t VecSize = TD->getTypeAllocSizeInBits(VTy); + uint64_t ValSize = TD->getTypeAllocSizeInBits(SV->getType()); + + // Changing the whole vector with memset or with an access of a different + // vector type? + if (ValSize == VecSize) + return Builder.CreateBitCast(SV, AllocaType, "tmp"); + + uint64_t EltSize = TD->getTypeAllocSizeInBits(VTy->getElementType()); + + // Must be an element insertion. + unsigned Elt = Offset/EltSize; + + if (SV->getType() != VTy->getElementType()) + SV = Builder.CreateBitCast(SV, VTy->getElementType(), "tmp"); + + SV = Builder.CreateInsertElement(Old, SV, + ConstantInt::get(Type::Int32Ty, Elt), + "tmp"); + return SV; + } + + // If SV is a first-class aggregate value, insert each value recursively. + if (const StructType *ST = dyn_cast(SV->getType())) { + const StructLayout &Layout = *TD->getStructLayout(ST); + for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) { + Value *Elt = Builder.CreateExtractValue(SV, i, "tmp"); + Old = ConvertScalar_InsertValue(Elt, Old, + Offset+Layout.getElementOffsetInBits(i), + Builder); + } + return Old; + } + + if (const ArrayType *AT = dyn_cast(SV->getType())) { + uint64_t EltSize = TD->getTypeAllocSizeInBits(AT->getElementType()); + for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) { + Value *Elt = Builder.CreateExtractValue(SV, i, "tmp"); + Old = ConvertScalar_InsertValue(Elt, Old, Offset+i*EltSize, Builder); + } + return Old; + } + + // If SV is a float, convert it to the appropriate integer type. + // If it is a pointer, do the same. + unsigned SrcWidth = TD->getTypeSizeInBits(SV->getType()); + unsigned DestWidth = TD->getTypeSizeInBits(AllocaType); + unsigned SrcStoreWidth = TD->getTypeStoreSizeInBits(SV->getType()); + unsigned DestStoreWidth = TD->getTypeStoreSizeInBits(AllocaType); + if (SV->getType()->isFloatingPoint() || isa(SV->getType())) + SV = Builder.CreateBitCast(SV, IntegerType::get(SrcWidth), "tmp"); + else if (isa(SV->getType())) + SV = Builder.CreatePtrToInt(SV, TD->getIntPtrType(), "tmp"); + + // Zero extend or truncate the value if needed. + if (SV->getType() != AllocaType) { + if (SV->getType()->getPrimitiveSizeInBits() < + AllocaType->getPrimitiveSizeInBits()) + SV = Builder.CreateZExt(SV, AllocaType, "tmp"); + else { + // Truncation may be needed if storing more than the alloca can hold + // (undefined behavior). + SV = Builder.CreateTrunc(SV, AllocaType, "tmp"); + SrcWidth = DestWidth; + SrcStoreWidth = DestStoreWidth; + } + } + + // If this is a big-endian system and the store is narrower than the + // full alloca type, we need to do a shift to get the right bits. + int ShAmt = 0; + if (TD->isBigEndian()) { + // On big-endian machines, the lowest bit is stored at the bit offset + // from the pointer given by getTypeStoreSizeInBits. This matters for + // integers with a bitwidth that is not a multiple of 8. + ShAmt = DestStoreWidth - SrcStoreWidth - Offset; + } else { + ShAmt = Offset; + } + + // Note: we support negative bitwidths (with shr) which are not defined. + // We do this to support (f.e.) stores off the end of a structure where + // only some bits in the structure are set. + APInt Mask(APInt::getLowBitsSet(DestWidth, SrcWidth)); + if (ShAmt > 0 && (unsigned)ShAmt < DestWidth) { + SV = Builder.CreateShl(SV, ConstantInt::get(SV->getType(), ShAmt), "tmp"); + Mask <<= ShAmt; + } else if (ShAmt < 0 && (unsigned)-ShAmt < DestWidth) { + SV = Builder.CreateLShr(SV, ConstantInt::get(SV->getType(), -ShAmt), "tmp"); + Mask = Mask.lshr(-ShAmt); + } + + // Mask out the bits we are about to insert from the old value, and or + // in the new bits. + if (SrcWidth != DestWidth) { + assert(DestWidth > SrcWidth); + Old = Builder.CreateAnd(Old, ConstantInt::get(~Mask), "mask"); + SV = Builder.CreateOr(Old, SV, "ins"); + } + return SV; +} + + + +/// PointsToConstantGlobal - Return true if V (possibly indirectly) points to +/// some part of a constant global variable. This intentionally only accepts +/// constant expressions because we don't can't rewrite arbitrary instructions. +static bool PointsToConstantGlobal(Value *V) { + if (GlobalVariable *GV = dyn_cast(V)) + return GV->isConstant(); + if (ConstantExpr *CE = dyn_cast(V)) + if (CE->getOpcode() == Instruction::BitCast || + CE->getOpcode() == Instruction::GetElementPtr) + return PointsToConstantGlobal(CE->getOperand(0)); + return false; +} + +/// isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived) +/// pointer to an alloca. Ignore any reads of the pointer, return false if we +/// see any stores or other unknown uses. If we see pointer arithmetic, keep +/// track of whether it moves the pointer (with isOffset) but otherwise traverse +/// the uses. If we see a memcpy/memmove that targets an unoffseted pointer to +/// the alloca, and if the source pointer is a pointer to a constant global, we +/// can optimize this. +static bool isOnlyCopiedFromConstantGlobal(Value *V, Instruction *&TheCopy, + bool isOffset) { + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) { + if (LoadInst *LI = dyn_cast(*UI)) + // Ignore non-volatile loads, they are always ok. + if (!LI->isVolatile()) + continue; + + if (BitCastInst *BCI = dyn_cast(*UI)) { + // If uses of the bitcast are ok, we are ok. + if (!isOnlyCopiedFromConstantGlobal(BCI, TheCopy, isOffset)) + return false; + continue; + } + if (GetElementPtrInst *GEP = dyn_cast(*UI)) { + // If the GEP has all zero indices, it doesn't offset the pointer. If it + // doesn't, it does. + if (!isOnlyCopiedFromConstantGlobal(GEP, TheCopy, + isOffset || !GEP->hasAllZeroIndices())) + return false; + continue; + } + + // If this is isn't our memcpy/memmove, reject it as something we can't + // handle. + if (!isa(*UI)) + return false; + + // If we already have seen a copy, reject the second one. + if (TheCopy) return false; + + // If the pointer has been offset from the start of the alloca, we can't + // safely handle this. + if (isOffset) return false; + + // If the memintrinsic isn't using the alloca as the dest, reject it. + if (UI.getOperandNo() != 1) return false; + + MemIntrinsic *MI = cast(*UI); + + // If the source of the memcpy/move is not a constant global, reject it. + if (!PointsToConstantGlobal(MI->getOperand(2))) + return false; + + // Otherwise, the transform is safe. Remember the copy instruction. + TheCopy = MI; + } + return true; +} + +/// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only +/// modified by a copy from a constant global. If we can prove this, we can +/// replace any uses of the alloca with uses of the global directly. +Instruction *SROA::isOnlyCopiedFromConstantGlobal(AllocationInst *AI) { + Instruction *TheCopy = 0; + if (::isOnlyCopiedFromConstantGlobal(AI, TheCopy, false)) + return TheCopy; + return 0; +} diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp new file mode 100644 index 000000000000..b499279c6969 --- /dev/null +++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -0,0 +1,232 @@ +//===- SimplifyCFGPass.cpp - CFG Simplification Pass ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements dead code elimination and basic block merging, along +// with a collection of other peephole control flow optimizations. For example: +// +// * Removes basic blocks with no predecessors. +// * Merges a basic block into its predecessor if there is only one and the +// predecessor only has one successor. +// * Eliminates PHI nodes for basic blocks with a single predecessor. +// * Eliminates a basic block that only contains an unconditional branch. +// * Changes invoke instructions to nounwind functions to be calls. +// * Change things like "if (x) if (y)" into "if (x&y)". +// * etc.. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "simplifycfg" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Constants.h" +#include "llvm/Instructions.h" +#include "llvm/Module.h" +#include "llvm/Attributes.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Pass.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +using namespace llvm; + +STATISTIC(NumSimpl, "Number of blocks simplified"); + +namespace { + struct VISIBILITY_HIDDEN CFGSimplifyPass : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + CFGSimplifyPass() : FunctionPass(&ID) {} + + virtual bool runOnFunction(Function &F); + }; +} + +char CFGSimplifyPass::ID = 0; +static RegisterPass X("simplifycfg", "Simplify the CFG"); + +// Public interface to the CFGSimplification pass +FunctionPass *llvm::createCFGSimplificationPass() { + return new CFGSimplifyPass(); +} + +/// ChangeToUnreachable - Insert an unreachable instruction before the specified +/// instruction, making it and the rest of the code in the block dead. +static void ChangeToUnreachable(Instruction *I) { + BasicBlock *BB = I->getParent(); + // Loop over all of the successors, removing BB's entry from any PHI + // nodes. + for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI) + (*SI)->removePredecessor(BB); + + new UnreachableInst(I); + + // All instructions after this are dead. + BasicBlock::iterator BBI = I, BBE = BB->end(); + while (BBI != BBE) { + if (!BBI->use_empty()) + BBI->replaceAllUsesWith(UndefValue::get(BBI->getType())); + BB->getInstList().erase(BBI++); + } +} + +/// ChangeToCall - Convert the specified invoke into a normal call. +static void ChangeToCall(InvokeInst *II) { + BasicBlock *BB = II->getParent(); + SmallVector Args(II->op_begin()+3, II->op_end()); + CallInst *NewCall = CallInst::Create(II->getCalledValue(), Args.begin(), + Args.end(), "", II); + NewCall->takeName(II); + NewCall->setCallingConv(II->getCallingConv()); + NewCall->setAttributes(II->getAttributes()); + II->replaceAllUsesWith(NewCall); + + // Follow the call by a branch to the normal destination. + BranchInst::Create(II->getNormalDest(), II); + + // Update PHI nodes in the unwind destination + II->getUnwindDest()->removePredecessor(BB); + BB->getInstList().erase(II); +} + +static bool MarkAliveBlocks(BasicBlock *BB, + SmallPtrSet &Reachable) { + + SmallVector Worklist; + Worklist.push_back(BB); + bool Changed = false; + while (!Worklist.empty()) { + BB = Worklist.back(); + Worklist.pop_back(); + + if (!Reachable.insert(BB)) + continue; + + // Do a quick scan of the basic block, turning any obviously unreachable + // instructions into LLVM unreachable insts. The instruction combining pass + // canonicalizes unreachable insts into stores to null or undef. + for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E;++BBI){ + if (CallInst *CI = dyn_cast(BBI)) { + if (CI->doesNotReturn()) { + // If we found a call to a no-return function, insert an unreachable + // instruction after it. Make sure there isn't *already* one there + // though. + ++BBI; + if (!isa(BBI)) { + ChangeToUnreachable(BBI); + Changed = true; + } + break; + } + } + + if (StoreInst *SI = dyn_cast(BBI)) + if (isa(SI->getOperand(1)) || + isa(SI->getOperand(1))) { + ChangeToUnreachable(SI); + Changed = true; + break; + } + } + + // Turn invokes that call 'nounwind' functions into ordinary calls. + if (InvokeInst *II = dyn_cast(BB->getTerminator())) + if (II->doesNotThrow()) { + ChangeToCall(II); + Changed = true; + } + + Changed |= ConstantFoldTerminator(BB); + for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI) + Worklist.push_back(*SI); + } + return Changed; +} + +/// RemoveUnreachableBlocksFromFn - Remove blocks that are not reachable, even +/// if they are in a dead cycle. Return true if a change was made, false +/// otherwise. +static bool RemoveUnreachableBlocksFromFn(Function &F) { + SmallPtrSet Reachable; + bool Changed = MarkAliveBlocks(F.begin(), Reachable); + + // If there are unreachable blocks in the CFG... + if (Reachable.size() == F.size()) + return Changed; + + assert(Reachable.size() < F.size()); + NumSimpl += F.size()-Reachable.size(); + + // Loop over all of the basic blocks that are not reachable, dropping all of + // their internal references... + for (Function::iterator BB = ++F.begin(), E = F.end(); BB != E; ++BB) { + if (Reachable.count(BB)) + continue; + + for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI) + if (Reachable.count(*SI)) + (*SI)->removePredecessor(BB); + BB->dropAllReferences(); + } + + for (Function::iterator I = ++F.begin(); I != F.end();) + if (!Reachable.count(I)) + I = F.getBasicBlockList().erase(I); + else + ++I; + + return true; +} + +/// IterativeSimplifyCFG - Call SimplifyCFG on all the blocks in the function, +/// iterating until no more changes are made. +static bool IterativeSimplifyCFG(Function &F) { + bool Changed = false; + bool LocalChange = true; + while (LocalChange) { + LocalChange = false; + + // Loop over all of the basic blocks (except the first one) and remove them + // if they are unneeded... + // + for (Function::iterator BBIt = ++F.begin(); BBIt != F.end(); ) { + if (SimplifyCFG(BBIt++)) { + LocalChange = true; + ++NumSimpl; + } + } + Changed |= LocalChange; + } + return Changed; +} + +// It is possible that we may require multiple passes over the code to fully +// simplify the CFG. +// +bool CFGSimplifyPass::runOnFunction(Function &F) { + bool EverChanged = RemoveUnreachableBlocksFromFn(F); + EverChanged |= IterativeSimplifyCFG(F); + + // If neither pass changed anything, we're done. + if (!EverChanged) return false; + + // IterativeSimplifyCFG can (rarely) make some loops dead. If this happens, + // RemoveUnreachableBlocksFromFn is needed to nuke them, which means we should + // iterate between the two optimizations. We structure the code like this to + // avoid reruning IterativeSimplifyCFG if the second pass of + // RemoveUnreachableBlocksFromFn doesn't do anything. + if (!RemoveUnreachableBlocksFromFn(F)) + return true; + + do { + EverChanged = IterativeSimplifyCFG(F); + EverChanged |= RemoveUnreachableBlocksFromFn(F); + } while (EverChanged); + + return true; +} diff --git a/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp b/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp new file mode 100644 index 000000000000..4aad17d7236d --- /dev/null +++ b/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp @@ -0,0 +1,159 @@ +//===- SimplifyHalfPowrLibCalls.cpp - Optimize specific half_powr calls ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a simple pass that applies an experimental +// transformation on calls to specific functions. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "simplify-libcalls-halfpowr" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Instructions.h" +#include "llvm/Intrinsics.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Target/TargetData.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Config/config.h" +using namespace llvm; + +namespace { + /// This pass optimizes well half_powr function calls. + /// + class VISIBILITY_HIDDEN SimplifyHalfPowrLibCalls : public FunctionPass { + const TargetData *TD; + public: + static char ID; // Pass identification + SimplifyHalfPowrLibCalls() : FunctionPass(&ID) {} + + bool runOnFunction(Function &F); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + } + + Instruction * + InlineHalfPowrs(const std::vector &HalfPowrs, + Instruction *InsertPt); + }; + char SimplifyHalfPowrLibCalls::ID = 0; +} // end anonymous namespace. + +static RegisterPass +X("simplify-libcalls-halfpowr", "Simplify half_powr library calls"); + +// Public interface to the Simplify HalfPowr LibCalls pass. +FunctionPass *llvm::createSimplifyHalfPowrLibCallsPass() { + return new SimplifyHalfPowrLibCalls(); +} + +/// InlineHalfPowrs - Inline a sequence of adjacent half_powr calls, rearranging +/// their control flow to better facilitate subsequent optimization. +Instruction * +SimplifyHalfPowrLibCalls::InlineHalfPowrs(const std::vector &HalfPowrs, + Instruction *InsertPt) { + std::vector Bodies; + BasicBlock *NewBlock = 0; + + for (unsigned i = 0, e = HalfPowrs.size(); i != e; ++i) { + CallInst *Call = cast(HalfPowrs[i]); + Function *Callee = Call->getCalledFunction(); + + // Minimally sanity-check the CFG of half_powr to ensure that it contains + // the the kind of code we expect. If we're running this pass, we have + // reason to believe it will be what we expect. + Function::iterator I = Callee->begin(); + BasicBlock *Prologue = I++; + if (I == Callee->end()) break; + BasicBlock *SubnormalHandling = I++; + if (I == Callee->end()) break; + BasicBlock *Body = I++; + if (I != Callee->end()) break; + if (SubnormalHandling->getSinglePredecessor() != Prologue) + break; + BranchInst *PBI = dyn_cast(Prologue->getTerminator()); + if (!PBI || !PBI->isConditional()) + break; + BranchInst *SNBI = dyn_cast(SubnormalHandling->getTerminator()); + if (!SNBI || SNBI->isConditional()) + break; + if (!isa(Body->getTerminator())) + break; + + Instruction *NextInst = next(BasicBlock::iterator(Call)); + + // Inline the call, taking care of what code ends up where. + NewBlock = SplitBlock(NextInst->getParent(), NextInst, this); + + bool B = InlineFunction(Call, 0, TD); + assert(B && "half_powr didn't inline?"); B=B; + + BasicBlock *NewBody = NewBlock->getSinglePredecessor(); + assert(NewBody); + Bodies.push_back(NewBody); + } + + if (!NewBlock) + return InsertPt; + + // Put the code for all the bodies into one block, to facilitate + // subsequent optimization. + (void)SplitEdge(NewBlock->getSinglePredecessor(), NewBlock, this); + for (unsigned i = 0, e = Bodies.size(); i != e; ++i) { + BasicBlock *Body = Bodies[i]; + Instruction *FNP = Body->getFirstNonPHI(); + // Splice the insts from body into NewBlock. + NewBlock->getInstList().splice(NewBlock->begin(), Body->getInstList(), + FNP, Body->getTerminator()); + } + + return NewBlock->begin(); +} + +/// runOnFunction - Top level algorithm. +/// +bool SimplifyHalfPowrLibCalls::runOnFunction(Function &F) { + TD = &getAnalysis(); + + bool Changed = false; + std::vector HalfPowrs; + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { + // Look for calls. + bool IsHalfPowr = false; + if (CallInst *CI = dyn_cast(I)) { + // Look for direct calls and calls to non-external functions. + Function *Callee = CI->getCalledFunction(); + if (Callee && Callee->hasExternalLinkage()) { + // Look for calls with well-known names. + const char *CalleeName = Callee->getNameStart(); + if (strcmp(CalleeName, "__half_powrf4") == 0) + IsHalfPowr = true; + } + } + if (IsHalfPowr) + HalfPowrs.push_back(I); + // We're looking for sequences of up to three such calls, which we'll + // simplify as a group. + if ((!IsHalfPowr && !HalfPowrs.empty()) || HalfPowrs.size() == 3) { + I = InlineHalfPowrs(HalfPowrs, I); + E = I->getParent()->end(); + HalfPowrs.clear(); + Changed = true; + } + } + assert(HalfPowrs.empty() && "Block had no terminator!"); + } + + return Changed; +} diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp new file mode 100644 index 000000000000..4b0064090cf3 --- /dev/null +++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp @@ -0,0 +1,2429 @@ +//===- SimplifyLibCalls.cpp - Optimize specific well-known library calls --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a simple pass that applies a variety of small +// optimizations for calls to specific well-known function calls (e.g. runtime +// library functions). For example, a call to the function "exit(3)" that +// occurs within the main() function can be transformed into a simple "return 3" +// instruction. Any optimization that takes this form (replace call to library +// function with simpler code that provides the same result) belongs in this +// file. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "simplify-libcalls" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Intrinsics.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/IRBuilder.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Target/TargetData.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Config/config.h" +using namespace llvm; + +STATISTIC(NumSimplified, "Number of library calls simplified"); +STATISTIC(NumAnnotated, "Number of attributes added to library functions"); + +//===----------------------------------------------------------------------===// +// Optimizer Base Class +//===----------------------------------------------------------------------===// + +/// This class is the abstract base class for the set of optimizations that +/// corresponds to one library call. +namespace { +class VISIBILITY_HIDDEN LibCallOptimization { +protected: + Function *Caller; + const TargetData *TD; +public: + LibCallOptimization() { } + virtual ~LibCallOptimization() {} + + /// CallOptimizer - This pure virtual method is implemented by base classes to + /// do various optimizations. If this returns null then no transformation was + /// performed. If it returns CI, then it transformed the call and CI is to be + /// deleted. If it returns something else, replace CI with the new value and + /// delete CI. + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) + =0; + + Value *OptimizeCall(CallInst *CI, const TargetData &TD, IRBuilder<> &B) { + Caller = CI->getParent()->getParent(); + this->TD = &TD; + return CallOptimizer(CI->getCalledFunction(), CI, B); + } + + /// CastToCStr - Return V if it is an i8*, otherwise cast it to i8*. + Value *CastToCStr(Value *V, IRBuilder<> &B); + + /// EmitStrLen - Emit a call to the strlen function to the builder, for the + /// specified pointer. Ptr is required to be some pointer type, and the + /// return value has 'intptr_t' type. + Value *EmitStrLen(Value *Ptr, IRBuilder<> &B); + + /// EmitMemCpy - Emit a call to the memcpy function to the builder. This + /// always expects that the size has type 'intptr_t' and Dst/Src are pointers. + Value *EmitMemCpy(Value *Dst, Value *Src, Value *Len, + unsigned Align, IRBuilder<> &B); + + /// EmitMemChr - Emit a call to the memchr function. This assumes that Ptr is + /// a pointer, Val is an i32 value, and Len is an 'intptr_t' value. + Value *EmitMemChr(Value *Ptr, Value *Val, Value *Len, IRBuilder<> &B); + + /// EmitMemCmp - Emit a call to the memcmp function. + Value *EmitMemCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B); + + /// EmitMemSet - Emit a call to the memset function + Value *EmitMemSet(Value *Dst, Value *Val, Value *Len, IRBuilder<> &B); + + /// EmitUnaryFloatFnCall - Emit a call to the unary function named 'Name' (e.g. + /// 'floor'). This function is known to take a single of type matching 'Op' + /// and returns one value with the same type. If 'Op' is a long double, 'l' + /// is added as the suffix of name, if 'Op' is a float, we add a 'f' suffix. + Value *EmitUnaryFloatFnCall(Value *Op, const char *Name, IRBuilder<> &B); + + /// EmitPutChar - Emit a call to the putchar function. This assumes that Char + /// is an integer. + void EmitPutChar(Value *Char, IRBuilder<> &B); + + /// EmitPutS - Emit a call to the puts function. This assumes that Str is + /// some pointer. + void EmitPutS(Value *Str, IRBuilder<> &B); + + /// EmitFPutC - Emit a call to the fputc function. This assumes that Char is + /// an i32, and File is a pointer to FILE. + void EmitFPutC(Value *Char, Value *File, IRBuilder<> &B); + + /// EmitFPutS - Emit a call to the puts function. Str is required to be a + /// pointer and File is a pointer to FILE. + void EmitFPutS(Value *Str, Value *File, IRBuilder<> &B); + + /// EmitFWrite - Emit a call to the fwrite function. This assumes that Ptr is + /// a pointer, Size is an 'intptr_t', and File is a pointer to FILE. + void EmitFWrite(Value *Ptr, Value *Size, Value *File, IRBuilder<> &B); + +}; +} // End anonymous namespace. + +/// CastToCStr - Return V if it is an i8*, otherwise cast it to i8*. +Value *LibCallOptimization::CastToCStr(Value *V, IRBuilder<> &B) { + return B.CreateBitCast(V, PointerType::getUnqual(Type::Int8Ty), "cstr"); +} + +/// EmitStrLen - Emit a call to the strlen function to the builder, for the +/// specified pointer. This always returns an integer value of size intptr_t. +Value *LibCallOptimization::EmitStrLen(Value *Ptr, IRBuilder<> &B) { + Module *M = Caller->getParent(); + AttributeWithIndex AWI[2]; + AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture); + AWI[1] = AttributeWithIndex::get(~0u, Attribute::ReadOnly | + Attribute::NoUnwind); + + Constant *StrLen =M->getOrInsertFunction("strlen", AttrListPtr::get(AWI, 2), + TD->getIntPtrType(), + PointerType::getUnqual(Type::Int8Ty), + NULL); + return B.CreateCall(StrLen, CastToCStr(Ptr, B), "strlen"); +} + +/// EmitMemCpy - Emit a call to the memcpy function to the builder. This always +/// expects that the size has type 'intptr_t' and Dst/Src are pointers. +Value *LibCallOptimization::EmitMemCpy(Value *Dst, Value *Src, Value *Len, + unsigned Align, IRBuilder<> &B) { + Module *M = Caller->getParent(); + Intrinsic::ID IID = Intrinsic::memcpy; + const Type *Tys[1]; + Tys[0] = Len->getType(); + Value *MemCpy = Intrinsic::getDeclaration(M, IID, Tys, 1); + return B.CreateCall4(MemCpy, CastToCStr(Dst, B), CastToCStr(Src, B), Len, + ConstantInt::get(Type::Int32Ty, Align)); +} + +/// EmitMemChr - Emit a call to the memchr function. This assumes that Ptr is +/// a pointer, Val is an i32 value, and Len is an 'intptr_t' value. +Value *LibCallOptimization::EmitMemChr(Value *Ptr, Value *Val, + Value *Len, IRBuilder<> &B) { + Module *M = Caller->getParent(); + AttributeWithIndex AWI; + AWI = AttributeWithIndex::get(~0u, Attribute::ReadOnly | Attribute::NoUnwind); + + Value *MemChr = M->getOrInsertFunction("memchr", AttrListPtr::get(&AWI, 1), + PointerType::getUnqual(Type::Int8Ty), + PointerType::getUnqual(Type::Int8Ty), + Type::Int32Ty, TD->getIntPtrType(), + NULL); + return B.CreateCall3(MemChr, CastToCStr(Ptr, B), Val, Len, "memchr"); +} + +/// EmitMemCmp - Emit a call to the memcmp function. +Value *LibCallOptimization::EmitMemCmp(Value *Ptr1, Value *Ptr2, + Value *Len, IRBuilder<> &B) { + Module *M = Caller->getParent(); + AttributeWithIndex AWI[3]; + AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture); + AWI[1] = AttributeWithIndex::get(2, Attribute::NoCapture); + AWI[2] = AttributeWithIndex::get(~0u, Attribute::ReadOnly | + Attribute::NoUnwind); + + Value *MemCmp = M->getOrInsertFunction("memcmp", AttrListPtr::get(AWI, 3), + Type::Int32Ty, + PointerType::getUnqual(Type::Int8Ty), + PointerType::getUnqual(Type::Int8Ty), + TD->getIntPtrType(), NULL); + return B.CreateCall3(MemCmp, CastToCStr(Ptr1, B), CastToCStr(Ptr2, B), + Len, "memcmp"); +} + +/// EmitMemSet - Emit a call to the memset function +Value *LibCallOptimization::EmitMemSet(Value *Dst, Value *Val, + Value *Len, IRBuilder<> &B) { + Module *M = Caller->getParent(); + Intrinsic::ID IID = Intrinsic::memset; + const Type *Tys[1]; + Tys[0] = Len->getType(); + Value *MemSet = Intrinsic::getDeclaration(M, IID, Tys, 1); + Value *Align = ConstantInt::get(Type::Int32Ty, 1); + return B.CreateCall4(MemSet, CastToCStr(Dst, B), Val, Len, Align); +} + +/// EmitUnaryFloatFnCall - Emit a call to the unary function named 'Name' (e.g. +/// 'floor'). This function is known to take a single of type matching 'Op' and +/// returns one value with the same type. If 'Op' is a long double, 'l' is +/// added as the suffix of name, if 'Op' is a float, we add a 'f' suffix. +Value *LibCallOptimization::EmitUnaryFloatFnCall(Value *Op, const char *Name, + IRBuilder<> &B) { + char NameBuffer[20]; + if (Op->getType() != Type::DoubleTy) { + // If we need to add a suffix, copy into NameBuffer. + unsigned NameLen = strlen(Name); + assert(NameLen < sizeof(NameBuffer)-2); + memcpy(NameBuffer, Name, NameLen); + if (Op->getType() == Type::FloatTy) + NameBuffer[NameLen] = 'f'; // floorf + else + NameBuffer[NameLen] = 'l'; // floorl + NameBuffer[NameLen+1] = 0; + Name = NameBuffer; + } + + Module *M = Caller->getParent(); + Value *Callee = M->getOrInsertFunction(Name, Op->getType(), + Op->getType(), NULL); + return B.CreateCall(Callee, Op, Name); +} + +/// EmitPutChar - Emit a call to the putchar function. This assumes that Char +/// is an integer. +void LibCallOptimization::EmitPutChar(Value *Char, IRBuilder<> &B) { + Module *M = Caller->getParent(); + Value *F = M->getOrInsertFunction("putchar", Type::Int32Ty, + Type::Int32Ty, NULL); + B.CreateCall(F, B.CreateIntCast(Char, Type::Int32Ty, "chari"), "putchar"); +} + +/// EmitPutS - Emit a call to the puts function. This assumes that Str is +/// some pointer. +void LibCallOptimization::EmitPutS(Value *Str, IRBuilder<> &B) { + Module *M = Caller->getParent(); + AttributeWithIndex AWI[2]; + AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture); + AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind); + + Value *F = M->getOrInsertFunction("puts", AttrListPtr::get(AWI, 2), + Type::Int32Ty, + PointerType::getUnqual(Type::Int8Ty), NULL); + B.CreateCall(F, CastToCStr(Str, B), "puts"); +} + +/// EmitFPutC - Emit a call to the fputc function. This assumes that Char is +/// an integer and File is a pointer to FILE. +void LibCallOptimization::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B) { + Module *M = Caller->getParent(); + AttributeWithIndex AWI[2]; + AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture); + AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind); + Constant *F; + if (isa(File->getType())) + F = M->getOrInsertFunction("fputc", AttrListPtr::get(AWI, 2), Type::Int32Ty, + Type::Int32Ty, File->getType(), NULL); + + else + F = M->getOrInsertFunction("fputc", Type::Int32Ty, Type::Int32Ty, + File->getType(), NULL); + Char = B.CreateIntCast(Char, Type::Int32Ty, "chari"); + B.CreateCall2(F, Char, File, "fputc"); +} + +/// EmitFPutS - Emit a call to the puts function. Str is required to be a +/// pointer and File is a pointer to FILE. +void LibCallOptimization::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B) { + Module *M = Caller->getParent(); + AttributeWithIndex AWI[3]; + AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture); + AWI[1] = AttributeWithIndex::get(2, Attribute::NoCapture); + AWI[2] = AttributeWithIndex::get(~0u, Attribute::NoUnwind); + Constant *F; + if (isa(File->getType())) + F = M->getOrInsertFunction("fputs", AttrListPtr::get(AWI, 3), Type::Int32Ty, + PointerType::getUnqual(Type::Int8Ty), + File->getType(), NULL); + else + F = M->getOrInsertFunction("fputs", Type::Int32Ty, + PointerType::getUnqual(Type::Int8Ty), + File->getType(), NULL); + B.CreateCall2(F, CastToCStr(Str, B), File, "fputs"); +} + +/// EmitFWrite - Emit a call to the fwrite function. This assumes that Ptr is +/// a pointer, Size is an 'intptr_t', and File is a pointer to FILE. +void LibCallOptimization::EmitFWrite(Value *Ptr, Value *Size, Value *File, + IRBuilder<> &B) { + Module *M = Caller->getParent(); + AttributeWithIndex AWI[3]; + AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture); + AWI[1] = AttributeWithIndex::get(4, Attribute::NoCapture); + AWI[2] = AttributeWithIndex::get(~0u, Attribute::NoUnwind); + Constant *F; + if (isa(File->getType())) + F = M->getOrInsertFunction("fwrite", AttrListPtr::get(AWI, 3), + TD->getIntPtrType(), + PointerType::getUnqual(Type::Int8Ty), + TD->getIntPtrType(), TD->getIntPtrType(), + File->getType(), NULL); + else + F = M->getOrInsertFunction("fwrite", TD->getIntPtrType(), + PointerType::getUnqual(Type::Int8Ty), + TD->getIntPtrType(), TD->getIntPtrType(), + File->getType(), NULL); + B.CreateCall4(F, CastToCStr(Ptr, B), Size, + ConstantInt::get(TD->getIntPtrType(), 1), File); +} + +//===----------------------------------------------------------------------===// +// Helper Functions +//===----------------------------------------------------------------------===// + +/// GetStringLengthH - If we can compute the length of the string pointed to by +/// the specified pointer, return 'len+1'. If we can't, return 0. +static uint64_t GetStringLengthH(Value *V, SmallPtrSet &PHIs) { + // Look through noop bitcast instructions. + if (BitCastInst *BCI = dyn_cast(V)) + return GetStringLengthH(BCI->getOperand(0), PHIs); + + // If this is a PHI node, there are two cases: either we have already seen it + // or we haven't. + if (PHINode *PN = dyn_cast(V)) { + if (!PHIs.insert(PN)) + return ~0ULL; // already in the set. + + // If it was new, see if all the input strings are the same length. + uint64_t LenSoFar = ~0ULL; + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + uint64_t Len = GetStringLengthH(PN->getIncomingValue(i), PHIs); + if (Len == 0) return 0; // Unknown length -> unknown. + + if (Len == ~0ULL) continue; + + if (Len != LenSoFar && LenSoFar != ~0ULL) + return 0; // Disagree -> unknown. + LenSoFar = Len; + } + + // Success, all agree. + return LenSoFar; + } + + // strlen(select(c,x,y)) -> strlen(x) ^ strlen(y) + if (SelectInst *SI = dyn_cast(V)) { + uint64_t Len1 = GetStringLengthH(SI->getTrueValue(), PHIs); + if (Len1 == 0) return 0; + uint64_t Len2 = GetStringLengthH(SI->getFalseValue(), PHIs); + if (Len2 == 0) return 0; + if (Len1 == ~0ULL) return Len2; + if (Len2 == ~0ULL) return Len1; + if (Len1 != Len2) return 0; + return Len1; + } + + // If the value is not a GEP instruction nor a constant expression with a + // GEP instruction, then return unknown. + User *GEP = 0; + if (GetElementPtrInst *GEPI = dyn_cast(V)) { + GEP = GEPI; + } else if (ConstantExpr *CE = dyn_cast(V)) { + if (CE->getOpcode() != Instruction::GetElementPtr) + return 0; + GEP = CE; + } else { + return 0; + } + + // Make sure the GEP has exactly three arguments. + if (GEP->getNumOperands() != 3) + return 0; + + // Check to make sure that the first operand of the GEP is an integer and + // has value 0 so that we are sure we're indexing into the initializer. + if (ConstantInt *Idx = dyn_cast(GEP->getOperand(1))) { + if (!Idx->isZero()) + return 0; + } else + return 0; + + // If the second index isn't a ConstantInt, then this is a variable index + // into the array. If this occurs, we can't say anything meaningful about + // the string. + uint64_t StartIdx = 0; + if (ConstantInt *CI = dyn_cast(GEP->getOperand(2))) + StartIdx = CI->getZExtValue(); + else + return 0; + + // The GEP instruction, constant or instruction, must reference a global + // variable that is a constant and is initialized. The referenced constant + // initializer is the array that we'll use for optimization. + GlobalVariable* GV = dyn_cast(GEP->getOperand(0)); + if (!GV || !GV->isConstant() || !GV->hasInitializer()) + return 0; + Constant *GlobalInit = GV->getInitializer(); + + // Handle the ConstantAggregateZero case, which is a degenerate case. The + // initializer is constant zero so the length of the string must be zero. + if (isa(GlobalInit)) + return 1; // Len = 0 offset by 1. + + // Must be a Constant Array + ConstantArray *Array = dyn_cast(GlobalInit); + if (!Array || Array->getType()->getElementType() != Type::Int8Ty) + return false; + + // Get the number of elements in the array + uint64_t NumElts = Array->getType()->getNumElements(); + + // Traverse the constant array from StartIdx (derived above) which is + // the place the GEP refers to in the array. + for (unsigned i = StartIdx; i != NumElts; ++i) { + Constant *Elt = Array->getOperand(i); + ConstantInt *CI = dyn_cast(Elt); + if (!CI) // This array isn't suitable, non-int initializer. + return 0; + if (CI->isZero()) + return i-StartIdx+1; // We found end of string, success! + } + + return 0; // The array isn't null terminated, conservatively return 'unknown'. +} + +/// GetStringLength - If we can compute the length of the string pointed to by +/// the specified pointer, return 'len+1'. If we can't, return 0. +static uint64_t GetStringLength(Value *V) { + if (!isa(V->getType())) return 0; + + SmallPtrSet PHIs; + uint64_t Len = GetStringLengthH(V, PHIs); + // If Len is ~0ULL, we had an infinite phi cycle: this is dead code, so return + // an empty string as a length. + return Len == ~0ULL ? 1 : Len; +} + +/// IsOnlyUsedInZeroEqualityComparison - Return true if it only matters that the +/// value is equal or not-equal to zero. +static bool IsOnlyUsedInZeroEqualityComparison(Value *V) { + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); + UI != E; ++UI) { + if (ICmpInst *IC = dyn_cast(*UI)) + if (IC->isEquality()) + if (Constant *C = dyn_cast(IC->getOperand(1))) + if (C->isNullValue()) + continue; + // Unknown instruction. + return false; + } + return true; +} + +//===----------------------------------------------------------------------===// +// Miscellaneous LibCall Optimizations +//===----------------------------------------------------------------------===// + +namespace { +//===---------------------------------------===// +// 'exit' Optimizations + +/// ExitOpt - int main() { exit(4); } --> int main() { return 4; } +struct VISIBILITY_HIDDEN ExitOpt : public LibCallOptimization { + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Verify we have a reasonable prototype for exit. + if (Callee->arg_size() == 0 || !CI->use_empty()) + return 0; + + // Verify the caller is main, and that the result type of main matches the + // argument type of exit. + if (!Caller->isName("main") || !Caller->hasExternalLinkage() || + Caller->getReturnType() != CI->getOperand(1)->getType()) + return 0; + + TerminatorInst *OldTI = CI->getParent()->getTerminator(); + + // Create the return after the call. + ReturnInst *RI = B.CreateRet(CI->getOperand(1)); + + // Drop all successor phi node entries. + for (unsigned i = 0, e = OldTI->getNumSuccessors(); i != e; ++i) + OldTI->getSuccessor(i)->removePredecessor(CI->getParent()); + + // Erase all instructions from after our return instruction until the end of + // the block. + BasicBlock::iterator FirstDead = RI; ++FirstDead; + CI->getParent()->getInstList().erase(FirstDead, CI->getParent()->end()); + return CI; + } +}; + +//===----------------------------------------------------------------------===// +// String and Memory LibCall Optimizations +//===----------------------------------------------------------------------===// + +//===---------------------------------------===// +// 'strcat' Optimizations + +struct VISIBILITY_HIDDEN StrCatOpt : public LibCallOptimization { + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Verify the "strcat" function prototype. + const FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || + FT->getReturnType() != PointerType::getUnqual(Type::Int8Ty) || + FT->getParamType(0) != FT->getReturnType() || + FT->getParamType(1) != FT->getReturnType()) + return 0; + + // Extract some information from the instruction + Value *Dst = CI->getOperand(1); + Value *Src = CI->getOperand(2); + + // See if we can get the length of the input string. + uint64_t Len = GetStringLength(Src); + if (Len == 0) return 0; + --Len; // Unbias length. + + // Handle the simple, do-nothing case: strcat(x, "") -> x + if (Len == 0) + return Dst; + + EmitStrLenMemCpy(Src, Dst, Len, B); + return Dst; + } + + void EmitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len, IRBuilder<> &B) { + // We need to find the end of the destination string. That's where the + // memory is to be moved to. We just generate a call to strlen. + Value *DstLen = EmitStrLen(Dst, B); + + // Now that we have the destination's length, we must index into the + // destination's pointer to get the actual memcpy destination (end of + // the string .. we're concatenating). + Value *CpyDst = B.CreateGEP(Dst, DstLen, "endptr"); + + // We have enough information to now generate the memcpy call to do the + // concatenation for us. Make a memcpy to copy the nul byte with align = 1. + EmitMemCpy(CpyDst, Src, ConstantInt::get(TD->getIntPtrType(), Len+1), 1, B); + } +}; + +//===---------------------------------------===// +// 'strncat' Optimizations + +struct VISIBILITY_HIDDEN StrNCatOpt : public StrCatOpt { + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Verify the "strncat" function prototype. + const FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 3 || + FT->getReturnType() != PointerType::getUnqual(Type::Int8Ty) || + FT->getParamType(0) != FT->getReturnType() || + FT->getParamType(1) != FT->getReturnType() || + !isa(FT->getParamType(2))) + return 0; + + // Extract some information from the instruction + Value *Dst = CI->getOperand(1); + Value *Src = CI->getOperand(2); + uint64_t Len; + + // We don't do anything if length is not constant + if (ConstantInt *LengthArg = dyn_cast(CI->getOperand(3))) + Len = LengthArg->getZExtValue(); + else + return 0; + + // See if we can get the length of the input string. + uint64_t SrcLen = GetStringLength(Src); + if (SrcLen == 0) return 0; + --SrcLen; // Unbias length. + + // Handle the simple, do-nothing cases: + // strncat(x, "", c) -> x + // strncat(x, c, 0) -> x + if (SrcLen == 0 || Len == 0) return Dst; + + // We don't optimize this case + if (Len < SrcLen) return 0; + + // strncat(x, s, c) -> strcat(x, s) + // s is constant so the strcat can be optimized further + EmitStrLenMemCpy(Src, Dst, SrcLen, B); + return Dst; + } +}; + +//===---------------------------------------===// +// 'strchr' Optimizations + +struct VISIBILITY_HIDDEN StrChrOpt : public LibCallOptimization { + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Verify the "strchr" function prototype. + const FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || + FT->getReturnType() != PointerType::getUnqual(Type::Int8Ty) || + FT->getParamType(0) != FT->getReturnType()) + return 0; + + Value *SrcStr = CI->getOperand(1); + + // If the second operand is non-constant, see if we can compute the length + // of the input string and turn this into memchr. + ConstantInt *CharC = dyn_cast(CI->getOperand(2)); + if (CharC == 0) { + uint64_t Len = GetStringLength(SrcStr); + if (Len == 0 || FT->getParamType(1) != Type::Int32Ty) // memchr needs i32. + return 0; + + return EmitMemChr(SrcStr, CI->getOperand(2), // include nul. + ConstantInt::get(TD->getIntPtrType(), Len), B); + } + + // Otherwise, the character is a constant, see if the first argument is + // a string literal. If so, we can constant fold. + std::string Str; + if (!GetConstantStringInfo(SrcStr, Str)) + return 0; + + // strchr can find the nul character. + Str += '\0'; + char CharValue = CharC->getSExtValue(); + + // Compute the offset. + uint64_t i = 0; + while (1) { + if (i == Str.size()) // Didn't find the char. strchr returns null. + return Constant::getNullValue(CI->getType()); + // Did we find our match? + if (Str[i] == CharValue) + break; + ++i; + } + + // strchr(s+n,c) -> gep(s+n+i,c) + Value *Idx = ConstantInt::get(Type::Int64Ty, i); + return B.CreateGEP(SrcStr, Idx, "strchr"); + } +}; + +//===---------------------------------------===// +// 'strcmp' Optimizations + +struct VISIBILITY_HIDDEN StrCmpOpt : public LibCallOptimization { + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Verify the "strcmp" function prototype. + const FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || FT->getReturnType() != Type::Int32Ty || + FT->getParamType(0) != FT->getParamType(1) || + FT->getParamType(0) != PointerType::getUnqual(Type::Int8Ty)) + return 0; + + Value *Str1P = CI->getOperand(1), *Str2P = CI->getOperand(2); + if (Str1P == Str2P) // strcmp(x,x) -> 0 + return ConstantInt::get(CI->getType(), 0); + + std::string Str1, Str2; + bool HasStr1 = GetConstantStringInfo(Str1P, Str1); + bool HasStr2 = GetConstantStringInfo(Str2P, Str2); + + if (HasStr1 && Str1.empty()) // strcmp("", x) -> *x + return B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"), CI->getType()); + + if (HasStr2 && Str2.empty()) // strcmp(x,"") -> *x + return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType()); + + // strcmp(x, y) -> cnst (if both x and y are constant strings) + if (HasStr1 && HasStr2) + return ConstantInt::get(CI->getType(), strcmp(Str1.c_str(),Str2.c_str())); + + // strcmp(P, "x") -> memcmp(P, "x", 2) + uint64_t Len1 = GetStringLength(Str1P); + uint64_t Len2 = GetStringLength(Str2P); + if (Len1 || Len2) { + // Choose the smallest Len excluding 0 which means 'unknown'. + if (!Len1 || (Len2 && Len2 < Len1)) + Len1 = Len2; + return EmitMemCmp(Str1P, Str2P, + ConstantInt::get(TD->getIntPtrType(), Len1), B); + } + + return 0; + } +}; + +//===---------------------------------------===// +// 'strncmp' Optimizations + +struct VISIBILITY_HIDDEN StrNCmpOpt : public LibCallOptimization { + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Verify the "strncmp" function prototype. + const FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 3 || FT->getReturnType() != Type::Int32Ty || + FT->getParamType(0) != FT->getParamType(1) || + FT->getParamType(0) != PointerType::getUnqual(Type::Int8Ty) || + !isa(FT->getParamType(2))) + return 0; + + Value *Str1P = CI->getOperand(1), *Str2P = CI->getOperand(2); + if (Str1P == Str2P) // strncmp(x,x,n) -> 0 + return ConstantInt::get(CI->getType(), 0); + + // Get the length argument if it is constant. + uint64_t Length; + if (ConstantInt *LengthArg = dyn_cast(CI->getOperand(3))) + Length = LengthArg->getZExtValue(); + else + return 0; + + if (Length == 0) // strncmp(x,y,0) -> 0 + return ConstantInt::get(CI->getType(), 0); + + std::string Str1, Str2; + bool HasStr1 = GetConstantStringInfo(Str1P, Str1); + bool HasStr2 = GetConstantStringInfo(Str2P, Str2); + + if (HasStr1 && Str1.empty()) // strncmp("", x, n) -> *x + return B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"), CI->getType()); + + if (HasStr2 && Str2.empty()) // strncmp(x, "", n) -> *x + return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType()); + + // strncmp(x, y) -> cnst (if both x and y are constant strings) + if (HasStr1 && HasStr2) + return ConstantInt::get(CI->getType(), + strncmp(Str1.c_str(), Str2.c_str(), Length)); + return 0; + } +}; + + +//===---------------------------------------===// +// 'strcpy' Optimizations + +struct VISIBILITY_HIDDEN StrCpyOpt : public LibCallOptimization { + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Verify the "strcpy" function prototype. + const FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) || + FT->getParamType(0) != FT->getParamType(1) || + FT->getParamType(0) != PointerType::getUnqual(Type::Int8Ty)) + return 0; + + Value *Dst = CI->getOperand(1), *Src = CI->getOperand(2); + if (Dst == Src) // strcpy(x,x) -> x + return Src; + + // See if we can get the length of the input string. + uint64_t Len = GetStringLength(Src); + if (Len == 0) return 0; + + // We have enough information to now generate the memcpy call to do the + // concatenation for us. Make a memcpy to copy the nul byte with align = 1. + EmitMemCpy(Dst, Src, ConstantInt::get(TD->getIntPtrType(), Len), 1, B); + return Dst; + } +}; + +//===---------------------------------------===// +// 'strncpy' Optimizations + +struct VISIBILITY_HIDDEN StrNCpyOpt : public LibCallOptimization { + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + const FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || + FT->getParamType(0) != FT->getParamType(1) || + FT->getParamType(0) != PointerType::getUnqual(Type::Int8Ty) || + !isa(FT->getParamType(2))) + return 0; + + Value *Dst = CI->getOperand(1); + Value *Src = CI->getOperand(2); + Value *LenOp = CI->getOperand(3); + + // See if we can get the length of the input string. + uint64_t SrcLen = GetStringLength(Src); + if (SrcLen == 0) return 0; + --SrcLen; + + if (SrcLen == 0) { + // strncpy(x, "", y) -> memset(x, '\0', y, 1) + EmitMemSet(Dst, ConstantInt::get(Type::Int8Ty, '\0'), LenOp, B); + return Dst; + } + + uint64_t Len; + if (ConstantInt *LengthArg = dyn_cast(LenOp)) + Len = LengthArg->getZExtValue(); + else + return 0; + + if (Len == 0) return Dst; // strncpy(x, y, 0) -> x + + // Let strncpy handle the zero padding + if (Len > SrcLen+1) return 0; + + // strncpy(x, s, c) -> memcpy(x, s, c, 1) [s and c are constant] + EmitMemCpy(Dst, Src, ConstantInt::get(TD->getIntPtrType(), Len), 1, B); + + return Dst; + } +}; + +//===---------------------------------------===// +// 'strlen' Optimizations + +struct VISIBILITY_HIDDEN StrLenOpt : public LibCallOptimization { + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + const FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 1 || + FT->getParamType(0) != PointerType::getUnqual(Type::Int8Ty) || + !isa(FT->getReturnType())) + return 0; + + Value *Src = CI->getOperand(1); + + // Constant folding: strlen("xyz") -> 3 + if (uint64_t Len = GetStringLength(Src)) + return ConstantInt::get(CI->getType(), Len-1); + + // Handle strlen(p) != 0. + if (!IsOnlyUsedInZeroEqualityComparison(CI)) return 0; + + // strlen(x) != 0 --> *x != 0 + // strlen(x) == 0 --> *x == 0 + return B.CreateZExt(B.CreateLoad(Src, "strlenfirst"), CI->getType()); + } +}; + +//===---------------------------------------===// +// 'strto*' Optimizations + +struct VISIBILITY_HIDDEN StrToOpt : public LibCallOptimization { + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + const FunctionType *FT = Callee->getFunctionType(); + if ((FT->getNumParams() != 2 && FT->getNumParams() != 3) || + !isa(FT->getParamType(0)) || + !isa(FT->getParamType(1))) + return 0; + + Value *EndPtr = CI->getOperand(2); + if (isa(EndPtr)) { + CI->setOnlyReadsMemory(); + CI->addAttribute(1, Attribute::NoCapture); + } + + return 0; + } +}; + + +//===---------------------------------------===// +// 'memcmp' Optimizations + +struct VISIBILITY_HIDDEN MemCmpOpt : public LibCallOptimization { + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + const FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 3 || !isa(FT->getParamType(0)) || + !isa(FT->getParamType(1)) || + FT->getReturnType() != Type::Int32Ty) + return 0; + + Value *LHS = CI->getOperand(1), *RHS = CI->getOperand(2); + + if (LHS == RHS) // memcmp(s,s,x) -> 0 + return Constant::getNullValue(CI->getType()); + + // Make sure we have a constant length. + ConstantInt *LenC = dyn_cast(CI->getOperand(3)); + if (!LenC) return 0; + uint64_t Len = LenC->getZExtValue(); + + if (Len == 0) // memcmp(s1,s2,0) -> 0 + return Constant::getNullValue(CI->getType()); + + if (Len == 1) { // memcmp(S1,S2,1) -> *LHS - *RHS + Value *LHSV = B.CreateLoad(CastToCStr(LHS, B), "lhsv"); + Value *RHSV = B.CreateLoad(CastToCStr(RHS, B), "rhsv"); + return B.CreateSExt(B.CreateSub(LHSV, RHSV, "chardiff"), CI->getType()); + } + + // memcmp(S1,S2,2) != 0 -> (*(short*)LHS ^ *(short*)RHS) != 0 + // memcmp(S1,S2,4) != 0 -> (*(int*)LHS ^ *(int*)RHS) != 0 + if ((Len == 2 || Len == 4) && IsOnlyUsedInZeroEqualityComparison(CI)) { + const Type *PTy = PointerType::getUnqual(Len == 2 ? + Type::Int16Ty : Type::Int32Ty); + LHS = B.CreateBitCast(LHS, PTy, "tmp"); + RHS = B.CreateBitCast(RHS, PTy, "tmp"); + LoadInst *LHSV = B.CreateLoad(LHS, "lhsv"); + LoadInst *RHSV = B.CreateLoad(RHS, "rhsv"); + LHSV->setAlignment(1); RHSV->setAlignment(1); // Unaligned loads. + return B.CreateZExt(B.CreateXor(LHSV, RHSV, "shortdiff"), CI->getType()); + } + + return 0; + } +}; + +//===---------------------------------------===// +// 'memcpy' Optimizations + +struct VISIBILITY_HIDDEN MemCpyOpt : public LibCallOptimization { + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + const FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || + !isa(FT->getParamType(0)) || + !isa(FT->getParamType(1)) || + FT->getParamType(2) != TD->getIntPtrType()) + return 0; + + // memcpy(x, y, n) -> llvm.memcpy(x, y, n, 1) + EmitMemCpy(CI->getOperand(1), CI->getOperand(2), CI->getOperand(3), 1, B); + return CI->getOperand(1); + } +}; + +//===---------------------------------------===// +// 'memmove' Optimizations + +struct VISIBILITY_HIDDEN MemMoveOpt : public LibCallOptimization { + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + const FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || + !isa(FT->getParamType(0)) || + !isa(FT->getParamType(1)) || + FT->getParamType(2) != TD->getIntPtrType()) + return 0; + + // memmove(x, y, n) -> llvm.memmove(x, y, n, 1) + Module *M = Caller->getParent(); + Intrinsic::ID IID = Intrinsic::memmove; + const Type *Tys[1]; + Tys[0] = TD->getIntPtrType(); + Value *MemMove = Intrinsic::getDeclaration(M, IID, Tys, 1); + Value *Dst = CastToCStr(CI->getOperand(1), B); + Value *Src = CastToCStr(CI->getOperand(2), B); + Value *Size = CI->getOperand(3); + Value *Align = ConstantInt::get(Type::Int32Ty, 1); + B.CreateCall4(MemMove, Dst, Src, Size, Align); + return CI->getOperand(1); + } +}; + +//===---------------------------------------===// +// 'memset' Optimizations + +struct VISIBILITY_HIDDEN MemSetOpt : public LibCallOptimization { + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + const FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || + !isa(FT->getParamType(0)) || + FT->getParamType(1) != TD->getIntPtrType() || + FT->getParamType(2) != TD->getIntPtrType()) + return 0; + + // memset(p, v, n) -> llvm.memset(p, v, n, 1) + Value *Val = B.CreateTrunc(CI->getOperand(2), Type::Int8Ty); + EmitMemSet(CI->getOperand(1), Val, CI->getOperand(3), B); + return CI->getOperand(1); + } +}; + +//===----------------------------------------------------------------------===// +// Math Library Optimizations +//===----------------------------------------------------------------------===// + +//===---------------------------------------===// +// 'pow*' Optimizations + +struct VISIBILITY_HIDDEN PowOpt : public LibCallOptimization { + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + const FunctionType *FT = Callee->getFunctionType(); + // Just make sure this has 2 arguments of the same FP type, which match the + // result type. + if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) || + FT->getParamType(0) != FT->getParamType(1) || + !FT->getParamType(0)->isFloatingPoint()) + return 0; + + Value *Op1 = CI->getOperand(1), *Op2 = CI->getOperand(2); + if (ConstantFP *Op1C = dyn_cast(Op1)) { + if (Op1C->isExactlyValue(1.0)) // pow(1.0, x) -> 1.0 + return Op1C; + if (Op1C->isExactlyValue(2.0)) // pow(2.0, x) -> exp2(x) + return EmitUnaryFloatFnCall(Op2, "exp2", B); + } + + ConstantFP *Op2C = dyn_cast(Op2); + if (Op2C == 0) return 0; + + if (Op2C->getValueAPF().isZero()) // pow(x, 0.0) -> 1.0 + return ConstantFP::get(CI->getType(), 1.0); + + if (Op2C->isExactlyValue(0.5)) { + // FIXME: This is not safe for -0.0 and -inf. This can only be done when + // 'unsafe' math optimizations are allowed. + // x pow(x, 0.5) sqrt(x) + // --------------------------------------------- + // -0.0 +0.0 -0.0 + // -inf +inf NaN +#if 0 + // pow(x, 0.5) -> sqrt(x) + return B.CreateCall(get_sqrt(), Op1, "sqrt"); +#endif + } + + if (Op2C->isExactlyValue(1.0)) // pow(x, 1.0) -> x + return Op1; + if (Op2C->isExactlyValue(2.0)) // pow(x, 2.0) -> x*x + return B.CreateMul(Op1, Op1, "pow2"); + if (Op2C->isExactlyValue(-1.0)) // pow(x, -1.0) -> 1.0/x + return B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0), Op1, "powrecip"); + return 0; + } +}; + +//===---------------------------------------===// +// 'exp2' Optimizations + +struct VISIBILITY_HIDDEN Exp2Opt : public LibCallOptimization { + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + const FunctionType *FT = Callee->getFunctionType(); + // Just make sure this has 1 argument of FP type, which matches the + // result type. + if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) || + !FT->getParamType(0)->isFloatingPoint()) + return 0; + + Value *Op = CI->getOperand(1); + // Turn exp2(sitofp(x)) -> ldexp(1.0, sext(x)) if sizeof(x) <= 32 + // Turn exp2(uitofp(x)) -> ldexp(1.0, zext(x)) if sizeof(x) < 32 + Value *LdExpArg = 0; + if (SIToFPInst *OpC = dyn_cast(Op)) { + if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() <= 32) + LdExpArg = B.CreateSExt(OpC->getOperand(0), Type::Int32Ty, "tmp"); + } else if (UIToFPInst *OpC = dyn_cast(Op)) { + if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() < 32) + LdExpArg = B.CreateZExt(OpC->getOperand(0), Type::Int32Ty, "tmp"); + } + + if (LdExpArg) { + const char *Name; + if (Op->getType() == Type::FloatTy) + Name = "ldexpf"; + else if (Op->getType() == Type::DoubleTy) + Name = "ldexp"; + else + Name = "ldexpl"; + + Constant *One = ConstantFP::get(APFloat(1.0f)); + if (Op->getType() != Type::FloatTy) + One = ConstantExpr::getFPExtend(One, Op->getType()); + + Module *M = Caller->getParent(); + Value *Callee = M->getOrInsertFunction(Name, Op->getType(), + Op->getType(), Type::Int32Ty,NULL); + return B.CreateCall2(Callee, One, LdExpArg); + } + return 0; + } +}; + + +//===---------------------------------------===// +// Double -> Float Shrinking Optimizations for Unary Functions like 'floor' + +struct VISIBILITY_HIDDEN UnaryDoubleFPOpt : public LibCallOptimization { + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + const FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 1 || FT->getReturnType() != Type::DoubleTy || + FT->getParamType(0) != Type::DoubleTy) + return 0; + + // If this is something like 'floor((double)floatval)', convert to floorf. + FPExtInst *Cast = dyn_cast(CI->getOperand(1)); + if (Cast == 0 || Cast->getOperand(0)->getType() != Type::FloatTy) + return 0; + + // floor((double)floatval) -> (double)floorf(floatval) + Value *V = Cast->getOperand(0); + V = EmitUnaryFloatFnCall(V, Callee->getNameStart(), B); + return B.CreateFPExt(V, Type::DoubleTy); + } +}; + +//===----------------------------------------------------------------------===// +// Integer Optimizations +//===----------------------------------------------------------------------===// + +//===---------------------------------------===// +// 'ffs*' Optimizations + +struct VISIBILITY_HIDDEN FFSOpt : public LibCallOptimization { + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + const FunctionType *FT = Callee->getFunctionType(); + // Just make sure this has 2 arguments of the same FP type, which match the + // result type. + if (FT->getNumParams() != 1 || FT->getReturnType() != Type::Int32Ty || + !isa(FT->getParamType(0))) + return 0; + + Value *Op = CI->getOperand(1); + + // Constant fold. + if (ConstantInt *CI = dyn_cast(Op)) { + if (CI->getValue() == 0) // ffs(0) -> 0. + return Constant::getNullValue(CI->getType()); + return ConstantInt::get(Type::Int32Ty, // ffs(c) -> cttz(c)+1 + CI->getValue().countTrailingZeros()+1); + } + + // ffs(x) -> x != 0 ? (i32)llvm.cttz(x)+1 : 0 + const Type *ArgType = Op->getType(); + Value *F = Intrinsic::getDeclaration(Callee->getParent(), + Intrinsic::cttz, &ArgType, 1); + Value *V = B.CreateCall(F, Op, "cttz"); + V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1), "tmp"); + V = B.CreateIntCast(V, Type::Int32Ty, false, "tmp"); + + Value *Cond = B.CreateICmpNE(Op, Constant::getNullValue(ArgType), "tmp"); + return B.CreateSelect(Cond, V, ConstantInt::get(Type::Int32Ty, 0)); + } +}; + +//===---------------------------------------===// +// 'isdigit' Optimizations + +struct VISIBILITY_HIDDEN IsDigitOpt : public LibCallOptimization { + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + const FunctionType *FT = Callee->getFunctionType(); + // We require integer(i32) + if (FT->getNumParams() != 1 || !isa(FT->getReturnType()) || + FT->getParamType(0) != Type::Int32Ty) + return 0; + + // isdigit(c) -> (c-'0') getOperand(1); + Op = B.CreateSub(Op, ConstantInt::get(Type::Int32Ty, '0'), "isdigittmp"); + Op = B.CreateICmpULT(Op, ConstantInt::get(Type::Int32Ty, 10), "isdigit"); + return B.CreateZExt(Op, CI->getType()); + } +}; + +//===---------------------------------------===// +// 'isascii' Optimizations + +struct VISIBILITY_HIDDEN IsAsciiOpt : public LibCallOptimization { + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + const FunctionType *FT = Callee->getFunctionType(); + // We require integer(i32) + if (FT->getNumParams() != 1 || !isa(FT->getReturnType()) || + FT->getParamType(0) != Type::Int32Ty) + return 0; + + // isascii(c) -> c getOperand(1); + Op = B.CreateICmpULT(Op, ConstantInt::get(Type::Int32Ty, 128), "isascii"); + return B.CreateZExt(Op, CI->getType()); + } +}; + +//===---------------------------------------===// +// 'abs', 'labs', 'llabs' Optimizations + +struct VISIBILITY_HIDDEN AbsOpt : public LibCallOptimization { + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + const FunctionType *FT = Callee->getFunctionType(); + // We require integer(integer) where the types agree. + if (FT->getNumParams() != 1 || !isa(FT->getReturnType()) || + FT->getParamType(0) != FT->getReturnType()) + return 0; + + // abs(x) -> x >s -1 ? x : -x + Value *Op = CI->getOperand(1); + Value *Pos = B.CreateICmpSGT(Op,ConstantInt::getAllOnesValue(Op->getType()), + "ispos"); + Value *Neg = B.CreateNeg(Op, "neg"); + return B.CreateSelect(Pos, Op, Neg); + } +}; + + +//===---------------------------------------===// +// 'toascii' Optimizations + +struct VISIBILITY_HIDDEN ToAsciiOpt : public LibCallOptimization { + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + const FunctionType *FT = Callee->getFunctionType(); + // We require i32(i32) + if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) || + FT->getParamType(0) != Type::Int32Ty) + return 0; + + // isascii(c) -> c & 0x7f + return B.CreateAnd(CI->getOperand(1), ConstantInt::get(CI->getType(),0x7F)); + } +}; + +//===----------------------------------------------------------------------===// +// Formatting and IO Optimizations +//===----------------------------------------------------------------------===// + +//===---------------------------------------===// +// 'printf' Optimizations + +struct VISIBILITY_HIDDEN PrintFOpt : public LibCallOptimization { + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Require one fixed pointer argument and an integer/void result. + const FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() < 1 || !isa(FT->getParamType(0)) || + !(isa(FT->getReturnType()) || + FT->getReturnType() == Type::VoidTy)) + return 0; + + // Check for a fixed format string. + std::string FormatStr; + if (!GetConstantStringInfo(CI->getOperand(1), FormatStr)) + return 0; + + // Empty format string -> noop. + if (FormatStr.empty()) // Tolerate printf's declared void. + return CI->use_empty() ? (Value*)CI : ConstantInt::get(CI->getType(), 0); + + // printf("x") -> putchar('x'), even for '%'. + if (FormatStr.size() == 1) { + EmitPutChar(ConstantInt::get(Type::Int32Ty, FormatStr[0]), B); + return CI->use_empty() ? (Value*)CI : ConstantInt::get(CI->getType(), 1); + } + + // printf("foo\n") --> puts("foo") + if (FormatStr[FormatStr.size()-1] == '\n' && + FormatStr.find('%') == std::string::npos) { // no format characters. + // Create a string literal with no \n on it. We expect the constant merge + // pass to be run after this pass, to merge duplicate strings. + FormatStr.erase(FormatStr.end()-1); + Constant *C = ConstantArray::get(FormatStr, true); + C = new GlobalVariable(C->getType(), true,GlobalVariable::InternalLinkage, + C, "str", Callee->getParent()); + EmitPutS(C, B); + return CI->use_empty() ? (Value*)CI : + ConstantInt::get(CI->getType(), FormatStr.size()+1); + } + + // Optimize specific format strings. + // printf("%c", chr) --> putchar(*(i8*)dst) + if (FormatStr == "%c" && CI->getNumOperands() > 2 && + isa(CI->getOperand(2)->getType())) { + EmitPutChar(CI->getOperand(2), B); + return CI->use_empty() ? (Value*)CI : ConstantInt::get(CI->getType(), 1); + } + + // printf("%s\n", str) --> puts(str) + if (FormatStr == "%s\n" && CI->getNumOperands() > 2 && + isa(CI->getOperand(2)->getType()) && + CI->use_empty()) { + EmitPutS(CI->getOperand(2), B); + return CI; + } + return 0; + } +}; + +//===---------------------------------------===// +// 'sprintf' Optimizations + +struct VISIBILITY_HIDDEN SPrintFOpt : public LibCallOptimization { + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Require two fixed pointer arguments and an integer result. + const FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || !isa(FT->getParamType(0)) || + !isa(FT->getParamType(1)) || + !isa(FT->getReturnType())) + return 0; + + // Check for a fixed format string. + std::string FormatStr; + if (!GetConstantStringInfo(CI->getOperand(2), FormatStr)) + return 0; + + // If we just have a format string (nothing else crazy) transform it. + if (CI->getNumOperands() == 3) { + // Make sure there's no % in the constant array. We could try to handle + // %% -> % in the future if we cared. + for (unsigned i = 0, e = FormatStr.size(); i != e; ++i) + if (FormatStr[i] == '%') + return 0; // we found a format specifier, bail out. + + // sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1) + EmitMemCpy(CI->getOperand(1), CI->getOperand(2), // Copy the nul byte. + ConstantInt::get(TD->getIntPtrType(), FormatStr.size()+1),1,B); + return ConstantInt::get(CI->getType(), FormatStr.size()); + } + + // The remaining optimizations require the format string to be "%s" or "%c" + // and have an extra operand. + if (FormatStr.size() != 2 || FormatStr[0] != '%' || CI->getNumOperands() <4) + return 0; + + // Decode the second character of the format string. + if (FormatStr[1] == 'c') { + // sprintf(dst, "%c", chr) --> *(i8*)dst = chr; *((i8*)dst+1) = 0 + if (!isa(CI->getOperand(3)->getType())) return 0; + Value *V = B.CreateTrunc(CI->getOperand(3), Type::Int8Ty, "char"); + Value *Ptr = CastToCStr(CI->getOperand(1), B); + B.CreateStore(V, Ptr); + Ptr = B.CreateGEP(Ptr, ConstantInt::get(Type::Int32Ty, 1), "nul"); + B.CreateStore(Constant::getNullValue(Type::Int8Ty), Ptr); + + return ConstantInt::get(CI->getType(), 1); + } + + if (FormatStr[1] == 's') { + // sprintf(dest, "%s", str) -> llvm.memcpy(dest, str, strlen(str)+1, 1) + if (!isa(CI->getOperand(3)->getType())) return 0; + + Value *Len = EmitStrLen(CI->getOperand(3), B); + Value *IncLen = B.CreateAdd(Len, ConstantInt::get(Len->getType(), 1), + "leninc"); + EmitMemCpy(CI->getOperand(1), CI->getOperand(3), IncLen, 1, B); + + // The sprintf result is the unincremented number of bytes in the string. + return B.CreateIntCast(Len, CI->getType(), false); + } + return 0; + } +}; + +//===---------------------------------------===// +// 'fwrite' Optimizations + +struct VISIBILITY_HIDDEN FWriteOpt : public LibCallOptimization { + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Require a pointer, an integer, an integer, a pointer, returning integer. + const FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 4 || !isa(FT->getParamType(0)) || + !isa(FT->getParamType(1)) || + !isa(FT->getParamType(2)) || + !isa(FT->getParamType(3)) || + !isa(FT->getReturnType())) + return 0; + + // Get the element size and count. + ConstantInt *SizeC = dyn_cast(CI->getOperand(2)); + ConstantInt *CountC = dyn_cast(CI->getOperand(3)); + if (!SizeC || !CountC) return 0; + uint64_t Bytes = SizeC->getZExtValue()*CountC->getZExtValue(); + + // If this is writing zero records, remove the call (it's a noop). + if (Bytes == 0) + return ConstantInt::get(CI->getType(), 0); + + // If this is writing one byte, turn it into fputc. + if (Bytes == 1) { // fwrite(S,1,1,F) -> fputc(S[0],F) + Value *Char = B.CreateLoad(CastToCStr(CI->getOperand(1), B), "char"); + EmitFPutC(Char, CI->getOperand(4), B); + return ConstantInt::get(CI->getType(), 1); + } + + return 0; + } +}; + +//===---------------------------------------===// +// 'fputs' Optimizations + +struct VISIBILITY_HIDDEN FPutsOpt : public LibCallOptimization { + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Require two pointers. Also, we can't optimize if return value is used. + const FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || !isa(FT->getParamType(0)) || + !isa(FT->getParamType(1)) || + !CI->use_empty()) + return 0; + + // fputs(s,F) --> fwrite(s,1,strlen(s),F) + uint64_t Len = GetStringLength(CI->getOperand(1)); + if (!Len) return 0; + EmitFWrite(CI->getOperand(1), ConstantInt::get(TD->getIntPtrType(), Len-1), + CI->getOperand(2), B); + return CI; // Known to have no uses (see above). + } +}; + +//===---------------------------------------===// +// 'fprintf' Optimizations + +struct VISIBILITY_HIDDEN FPrintFOpt : public LibCallOptimization { + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Require two fixed paramters as pointers and integer result. + const FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || !isa(FT->getParamType(0)) || + !isa(FT->getParamType(1)) || + !isa(FT->getReturnType())) + return 0; + + // All the optimizations depend on the format string. + std::string FormatStr; + if (!GetConstantStringInfo(CI->getOperand(2), FormatStr)) + return 0; + + // fprintf(F, "foo") --> fwrite("foo", 3, 1, F) + if (CI->getNumOperands() == 3) { + for (unsigned i = 0, e = FormatStr.size(); i != e; ++i) + if (FormatStr[i] == '%') // Could handle %% -> % if we cared. + return 0; // We found a format specifier. + + EmitFWrite(CI->getOperand(2), ConstantInt::get(TD->getIntPtrType(), + FormatStr.size()), + CI->getOperand(1), B); + return ConstantInt::get(CI->getType(), FormatStr.size()); + } + + // The remaining optimizations require the format string to be "%s" or "%c" + // and have an extra operand. + if (FormatStr.size() != 2 || FormatStr[0] != '%' || CI->getNumOperands() <4) + return 0; + + // Decode the second character of the format string. + if (FormatStr[1] == 'c') { + // fprintf(F, "%c", chr) --> *(i8*)dst = chr + if (!isa(CI->getOperand(3)->getType())) return 0; + EmitFPutC(CI->getOperand(3), CI->getOperand(1), B); + return ConstantInt::get(CI->getType(), 1); + } + + if (FormatStr[1] == 's') { + // fprintf(F, "%s", str) -> fputs(str, F) + if (!isa(CI->getOperand(3)->getType()) || !CI->use_empty()) + return 0; + EmitFPutS(CI->getOperand(3), CI->getOperand(1), B); + return CI; + } + return 0; + } +}; + +} // end anonymous namespace. + +//===----------------------------------------------------------------------===// +// SimplifyLibCalls Pass Implementation +//===----------------------------------------------------------------------===// + +namespace { + /// This pass optimizes well known library functions from libc and libm. + /// + class VISIBILITY_HIDDEN SimplifyLibCalls : public FunctionPass { + StringMap Optimizations; + // Miscellaneous LibCall Optimizations + ExitOpt Exit; + // String and Memory LibCall Optimizations + StrCatOpt StrCat; StrNCatOpt StrNCat; StrChrOpt StrChr; StrCmpOpt StrCmp; + StrNCmpOpt StrNCmp; StrCpyOpt StrCpy; StrNCpyOpt StrNCpy; StrLenOpt StrLen; + StrToOpt StrTo; MemCmpOpt MemCmp; MemCpyOpt MemCpy; MemMoveOpt MemMove; + MemSetOpt MemSet; + // Math Library Optimizations + PowOpt Pow; Exp2Opt Exp2; UnaryDoubleFPOpt UnaryDoubleFP; + // Integer Optimizations + FFSOpt FFS; AbsOpt Abs; IsDigitOpt IsDigit; IsAsciiOpt IsAscii; + ToAsciiOpt ToAscii; + // Formatting and IO Optimizations + SPrintFOpt SPrintF; PrintFOpt PrintF; + FWriteOpt FWrite; FPutsOpt FPuts; FPrintFOpt FPrintF; + + bool Modified; // This is only used by doInitialization. + public: + static char ID; // Pass identification + SimplifyLibCalls() : FunctionPass(&ID) {} + + void InitOptimizations(); + bool runOnFunction(Function &F); + + void setDoesNotAccessMemory(Function &F); + void setOnlyReadsMemory(Function &F); + void setDoesNotThrow(Function &F); + void setDoesNotCapture(Function &F, unsigned n); + void setDoesNotAlias(Function &F, unsigned n); + bool doInitialization(Module &M); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + } + }; + char SimplifyLibCalls::ID = 0; +} // end anonymous namespace. + +static RegisterPass +X("simplify-libcalls", "Simplify well-known library calls"); + +// Public interface to the Simplify LibCalls pass. +FunctionPass *llvm::createSimplifyLibCallsPass() { + return new SimplifyLibCalls(); +} + +/// Optimizations - Populate the Optimizations map with all the optimizations +/// we know. +void SimplifyLibCalls::InitOptimizations() { + // Miscellaneous LibCall Optimizations + Optimizations["exit"] = &Exit; + + // String and Memory LibCall Optimizations + Optimizations["strcat"] = &StrCat; + Optimizations["strncat"] = &StrNCat; + Optimizations["strchr"] = &StrChr; + Optimizations["strcmp"] = &StrCmp; + Optimizations["strncmp"] = &StrNCmp; + Optimizations["strcpy"] = &StrCpy; + Optimizations["strncpy"] = &StrNCpy; + Optimizations["strlen"] = &StrLen; + Optimizations["strtol"] = &StrTo; + Optimizations["strtod"] = &StrTo; + Optimizations["strtof"] = &StrTo; + Optimizations["strtoul"] = &StrTo; + Optimizations["strtoll"] = &StrTo; + Optimizations["strtold"] = &StrTo; + Optimizations["strtoull"] = &StrTo; + Optimizations["memcmp"] = &MemCmp; + Optimizations["memcpy"] = &MemCpy; + Optimizations["memmove"] = &MemMove; + Optimizations["memset"] = &MemSet; + + // Math Library Optimizations + Optimizations["powf"] = &Pow; + Optimizations["pow"] = &Pow; + Optimizations["powl"] = &Pow; + Optimizations["llvm.pow.f32"] = &Pow; + Optimizations["llvm.pow.f64"] = &Pow; + Optimizations["llvm.pow.f80"] = &Pow; + Optimizations["llvm.pow.f128"] = &Pow; + Optimizations["llvm.pow.ppcf128"] = &Pow; + Optimizations["exp2l"] = &Exp2; + Optimizations["exp2"] = &Exp2; + Optimizations["exp2f"] = &Exp2; + Optimizations["llvm.exp2.ppcf128"] = &Exp2; + Optimizations["llvm.exp2.f128"] = &Exp2; + Optimizations["llvm.exp2.f80"] = &Exp2; + Optimizations["llvm.exp2.f64"] = &Exp2; + Optimizations["llvm.exp2.f32"] = &Exp2; + +#ifdef HAVE_FLOORF + Optimizations["floor"] = &UnaryDoubleFP; +#endif +#ifdef HAVE_CEILF + Optimizations["ceil"] = &UnaryDoubleFP; +#endif +#ifdef HAVE_ROUNDF + Optimizations["round"] = &UnaryDoubleFP; +#endif +#ifdef HAVE_RINTF + Optimizations["rint"] = &UnaryDoubleFP; +#endif +#ifdef HAVE_NEARBYINTF + Optimizations["nearbyint"] = &UnaryDoubleFP; +#endif + + // Integer Optimizations + Optimizations["ffs"] = &FFS; + Optimizations["ffsl"] = &FFS; + Optimizations["ffsll"] = &FFS; + Optimizations["abs"] = &Abs; + Optimizations["labs"] = &Abs; + Optimizations["llabs"] = &Abs; + Optimizations["isdigit"] = &IsDigit; + Optimizations["isascii"] = &IsAscii; + Optimizations["toascii"] = &ToAscii; + + // Formatting and IO Optimizations + Optimizations["sprintf"] = &SPrintF; + Optimizations["printf"] = &PrintF; + Optimizations["fwrite"] = &FWrite; + Optimizations["fputs"] = &FPuts; + Optimizations["fprintf"] = &FPrintF; +} + + +/// runOnFunction - Top level algorithm. +/// +bool SimplifyLibCalls::runOnFunction(Function &F) { + if (Optimizations.empty()) + InitOptimizations(); + + const TargetData &TD = getAnalysis(); + + IRBuilder<> Builder; + + bool Changed = false; + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) { + // Ignore non-calls. + CallInst *CI = dyn_cast(I++); + if (!CI) continue; + + // Ignore indirect calls and calls to non-external functions. + Function *Callee = CI->getCalledFunction(); + if (Callee == 0 || !Callee->isDeclaration() || + !(Callee->hasExternalLinkage() || Callee->hasDLLImportLinkage())) + continue; + + // Ignore unknown calls. + const char *CalleeName = Callee->getNameStart(); + StringMap::iterator OMI = + Optimizations.find(CalleeName, CalleeName+Callee->getNameLen()); + if (OMI == Optimizations.end()) continue; + + // Set the builder to the instruction after the call. + Builder.SetInsertPoint(BB, I); + + // Try to optimize this call. + Value *Result = OMI->second->OptimizeCall(CI, TD, Builder); + if (Result == 0) continue; + + DEBUG(DOUT << "SimplifyLibCalls simplified: " << *CI; + DOUT << " into: " << *Result << "\n"); + + // Something changed! + Changed = true; + ++NumSimplified; + + // Inspect the instruction after the call (which was potentially just + // added) next. + I = CI; ++I; + + if (CI != Result && !CI->use_empty()) { + CI->replaceAllUsesWith(Result); + if (!Result->hasName()) + Result->takeName(CI); + } + CI->eraseFromParent(); + } + } + return Changed; +} + +// Utility methods for doInitialization. + +void SimplifyLibCalls::setDoesNotAccessMemory(Function &F) { + if (!F.doesNotAccessMemory()) { + F.setDoesNotAccessMemory(); + ++NumAnnotated; + Modified = true; + } +} +void SimplifyLibCalls::setOnlyReadsMemory(Function &F) { + if (!F.onlyReadsMemory()) { + F.setOnlyReadsMemory(); + ++NumAnnotated; + Modified = true; + } +} +void SimplifyLibCalls::setDoesNotThrow(Function &F) { + if (!F.doesNotThrow()) { + F.setDoesNotThrow(); + ++NumAnnotated; + Modified = true; + } +} +void SimplifyLibCalls::setDoesNotCapture(Function &F, unsigned n) { + if (!F.doesNotCapture(n)) { + F.setDoesNotCapture(n); + ++NumAnnotated; + Modified = true; + } +} +void SimplifyLibCalls::setDoesNotAlias(Function &F, unsigned n) { + if (!F.doesNotAlias(n)) { + F.setDoesNotAlias(n); + ++NumAnnotated; + Modified = true; + } +} + +/// doInitialization - Add attributes to well-known functions. +/// +bool SimplifyLibCalls::doInitialization(Module &M) { + Modified = false; + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { + Function &F = *I; + if (!F.isDeclaration()) + continue; + + unsigned NameLen = F.getNameLen(); + if (!NameLen) + continue; + + const FunctionType *FTy = F.getFunctionType(); + + const char *NameStr = F.getNameStart(); + switch (NameStr[0]) { + case 's': + if (NameLen == 6 && !strcmp(NameStr, "strlen")) { + if (FTy->getNumParams() != 1 || + !isa(FTy->getParamType(0))) + continue; + setOnlyReadsMemory(F); + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + } else if ((NameLen == 6 && !strcmp(NameStr, "strcpy")) || + (NameLen == 6 && !strcmp(NameStr, "stpcpy")) || + (NameLen == 6 && !strcmp(NameStr, "strcat")) || + (NameLen == 6 && !strcmp(NameStr, "strtol")) || + (NameLen == 6 && !strcmp(NameStr, "strtod")) || + (NameLen == 6 && !strcmp(NameStr, "strtof")) || + (NameLen == 7 && !strcmp(NameStr, "strtoul")) || + (NameLen == 7 && !strcmp(NameStr, "strtoll")) || + (NameLen == 7 && !strcmp(NameStr, "strtold")) || + (NameLen == 7 && !strcmp(NameStr, "strncat")) || + (NameLen == 7 && !strcmp(NameStr, "strncpy")) || + (NameLen == 8 && !strcmp(NameStr, "strtoull"))) { + if (FTy->getNumParams() < 2 || + !isa(FTy->getParamType(1))) + continue; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + } else if (NameLen == 7 && !strcmp(NameStr, "strxfrm")) { + if (FTy->getNumParams() != 3 || + !isa(FTy->getParamType(0)) || + !isa(FTy->getParamType(1))) + continue; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + } else if ((NameLen == 6 && !strcmp(NameStr, "strcmp")) || + (NameLen == 6 && !strcmp(NameStr, "strspn")) || + (NameLen == 7 && !strcmp(NameStr, "strncmp")) || + (NameLen == 7 && !strcmp(NameStr, "strcspn")) || + (NameLen == 7 && !strcmp(NameStr, "strcoll")) || + (NameLen == 10 && !strcmp(NameStr, "strcasecmp")) || + (NameLen == 11 && !strcmp(NameStr, "strncasecmp"))) { + if (FTy->getNumParams() < 2 || + !isa(FTy->getParamType(0)) || + !isa(FTy->getParamType(1))) + continue; + setOnlyReadsMemory(F); + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + } else if ((NameLen == 6 && !strcmp(NameStr, "strstr")) || + (NameLen == 7 && !strcmp(NameStr, "strpbrk"))) { + if (FTy->getNumParams() != 2 || + !isa(FTy->getParamType(1))) + continue; + setOnlyReadsMemory(F); + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + } else if ((NameLen == 6 && !strcmp(NameStr, "strtok")) || + (NameLen == 8 && !strcmp(NameStr, "strtok_r"))) { + if (FTy->getNumParams() < 2 || + !isa(FTy->getParamType(1))) + continue; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + } else if ((NameLen == 5 && !strcmp(NameStr, "scanf")) || + (NameLen == 6 && !strcmp(NameStr, "setbuf")) || + (NameLen == 7 && !strcmp(NameStr, "setvbuf"))) { + if (FTy->getNumParams() < 1 || + !isa(FTy->getParamType(0))) + continue; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + } else if ((NameLen == 6 && !strcmp(NameStr, "strdup")) || + (NameLen == 7 && !strcmp(NameStr, "strndup"))) { + if (FTy->getNumParams() < 1 || + !isa(FTy->getReturnType()) || + !isa(FTy->getParamType(0))) + continue; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + setDoesNotCapture(F, 1); + } else if ((NameLen == 4 && !strcmp(NameStr, "stat")) || + (NameLen == 6 && !strcmp(NameStr, "sscanf")) || + (NameLen == 7 && !strcmp(NameStr, "sprintf")) || + (NameLen == 7 && !strcmp(NameStr, "statvfs"))) { + if (FTy->getNumParams() < 2 || + !isa(FTy->getParamType(0)) || + !isa(FTy->getParamType(1))) + continue; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + } else if (NameLen == 8 && !strcmp(NameStr, "snprintf")) { + if (FTy->getNumParams() != 3 || + !isa(FTy->getParamType(0)) || + !isa(FTy->getParamType(2))) + continue; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 3); + } else if (NameLen == 9 && !strcmp(NameStr, "setitimer")) { + if (FTy->getNumParams() != 3 || + !isa(FTy->getParamType(1)) || + !isa(FTy->getParamType(2))) + continue; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + setDoesNotCapture(F, 3); + } else if (NameLen == 6 && !strcmp(NameStr, "system")) { + if (FTy->getNumParams() != 1 || + !isa(FTy->getParamType(0))) + continue; + // May throw; "system" is a valid pthread cancellation point. + setDoesNotCapture(F, 1); + } + break; + case 'm': + if (NameLen == 6 && !strcmp(NameStr, "memcmp")) { + if (FTy->getNumParams() != 3 || + !isa(FTy->getParamType(0)) || + !isa(FTy->getParamType(1))) + continue; + setOnlyReadsMemory(F); + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + } else if ((NameLen == 6 && !strcmp(NameStr, "memchr")) || + (NameLen == 7 && !strcmp(NameStr, "memrchr"))) { + if (FTy->getNumParams() != 3) + continue; + setOnlyReadsMemory(F); + setDoesNotThrow(F); + } else if ((NameLen == 4 && !strcmp(NameStr, "modf")) || + (NameLen == 5 && !strcmp(NameStr, "modff")) || + (NameLen == 5 && !strcmp(NameStr, "modfl")) || + (NameLen == 6 && !strcmp(NameStr, "memcpy")) || + (NameLen == 7 && !strcmp(NameStr, "memccpy")) || + (NameLen == 7 && !strcmp(NameStr, "memmove"))) { + if (FTy->getNumParams() < 2 || + !isa(FTy->getParamType(1))) + continue; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + } else if (NameLen == 8 && !strcmp(NameStr, "memalign")) { + if (!isa(FTy->getReturnType())) + continue; + setDoesNotAlias(F, 0); + } else if ((NameLen == 5 && !strcmp(NameStr, "mkdir")) || + (NameLen == 6 && !strcmp(NameStr, "mktime"))) { + if (FTy->getNumParams() == 0 || + !isa(FTy->getParamType(0))) + continue; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + } + break; + case 'r': + if (NameLen == 7 && !strcmp(NameStr, "realloc")) { + if (FTy->getNumParams() != 2 || + !isa(FTy->getParamType(0)) || + !isa(FTy->getReturnType())) + continue; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + setDoesNotCapture(F, 1); + } else if (NameLen == 4 && !strcmp(NameStr, "read")) { + if (FTy->getNumParams() != 3 || + !isa(FTy->getParamType(1))) + continue; + // May throw; "read" is a valid pthread cancellation point. + setDoesNotCapture(F, 2); + } else if ((NameLen == 5 && !strcmp(NameStr, "rmdir")) || + (NameLen == 6 && !strcmp(NameStr, "rewind")) || + (NameLen == 6 && !strcmp(NameStr, "remove")) || + (NameLen == 8 && !strcmp(NameStr, "realpath"))) { + if (FTy->getNumParams() < 1 || + !isa(FTy->getParamType(0))) + continue; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + } else if ((NameLen == 6 && !strcmp(NameStr, "rename")) || + (NameLen == 8 && !strcmp(NameStr, "readlink"))) { + if (FTy->getNumParams() < 2 || + !isa(FTy->getParamType(0)) || + !isa(FTy->getParamType(1))) + continue; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + } + break; + case 'w': + if (NameLen == 5 && !strcmp(NameStr, "write")) { + if (FTy->getNumParams() != 3 || + !isa(FTy->getParamType(1))) + continue; + // May throw; "write" is a valid pthread cancellation point. + setDoesNotCapture(F, 2); + } + break; + case 'b': + if (NameLen == 5 && !strcmp(NameStr, "bcopy")) { + if (FTy->getNumParams() != 3 || + !isa(FTy->getParamType(0)) || + !isa(FTy->getParamType(1))) + continue; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + } else if (NameLen == 4 && !strcmp(NameStr, "bcmp")) { + if (FTy->getNumParams() != 3 || + !isa(FTy->getParamType(0)) || + !isa(FTy->getParamType(1))) + continue; + setDoesNotThrow(F); + setOnlyReadsMemory(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + } else if (NameLen == 5 && !strcmp(NameStr, "bzero")) { + if (FTy->getNumParams() != 2 || + !isa(FTy->getParamType(0))) + continue; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + } + break; + case 'c': + if (NameLen == 6 && !strcmp(NameStr, "calloc")) { + if (FTy->getNumParams() != 2 || + !isa(FTy->getReturnType())) + continue; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + } else if ((NameLen == 5 && !strcmp(NameStr, "chmod")) || + (NameLen == 5 && !strcmp(NameStr, "chown")) || + (NameLen == 7 && !strcmp(NameStr, "ctermid")) || + (NameLen == 8 && !strcmp(NameStr, "clearerr")) || + (NameLen == 8 && !strcmp(NameStr, "closedir"))) { + if (FTy->getNumParams() == 0 || + !isa(FTy->getParamType(0))) + continue; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + } + break; + case 'a': + if ((NameLen == 4 && !strcmp(NameStr, "atoi")) || + (NameLen == 4 && !strcmp(NameStr, "atol")) || + (NameLen == 4 && !strcmp(NameStr, "atof")) || + (NameLen == 5 && !strcmp(NameStr, "atoll"))) { + if (FTy->getNumParams() != 1 || + !isa(FTy->getParamType(0))) + continue; + setDoesNotThrow(F); + setOnlyReadsMemory(F); + setDoesNotCapture(F, 1); + } else if (NameLen == 6 && !strcmp(NameStr, "access")) { + if (FTy->getNumParams() != 2 || + !isa(FTy->getParamType(0))) + continue; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + } + break; + case 'f': + if (NameLen == 5 && !strcmp(NameStr, "fopen")) { + if (FTy->getNumParams() != 2 || + !isa(FTy->getReturnType()) || + !isa(FTy->getParamType(0)) || + !isa(FTy->getParamType(1))) + continue; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + } else if (NameLen == 6 && !strcmp(NameStr, "fdopen")) { + if (FTy->getNumParams() != 2 || + !isa(FTy->getReturnType()) || + !isa(FTy->getParamType(1))) + continue; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + setDoesNotCapture(F, 2); + } else if ((NameLen == 4 && !strcmp(NameStr, "feof")) || + (NameLen == 4 && !strcmp(NameStr, "free")) || + (NameLen == 5 && !strcmp(NameStr, "fseek")) || + (NameLen == 5 && !strcmp(NameStr, "ftell")) || + (NameLen == 5 && !strcmp(NameStr, "fgetc")) || + (NameLen == 6 && !strcmp(NameStr, "fseeko")) || + (NameLen == 6 && !strcmp(NameStr, "ftello")) || + (NameLen == 6 && !strcmp(NameStr, "fileno")) || + (NameLen == 6 && !strcmp(NameStr, "fflush")) || + (NameLen == 6 && !strcmp(NameStr, "fclose")) || + (NameLen == 7 && !strcmp(NameStr, "fsetpos")) || + (NameLen == 9 && !strcmp(NameStr, "flockfile")) || + (NameLen == 11 && !strcmp(NameStr, "funlockfile")) || + (NameLen == 12 && !strcmp(NameStr, "ftrylockfile"))) { + if (FTy->getNumParams() == 0 || + !isa(FTy->getParamType(0))) + continue; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + } else if (NameLen == 6 && !strcmp(NameStr, "ferror")) { + if (FTy->getNumParams() != 1 || + !isa(FTy->getParamType(0))) + continue; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setOnlyReadsMemory(F); + } else if ((NameLen == 5 && !strcmp(NameStr, "fputc")) || + (NameLen == 5 && !strcmp(NameStr, "fstat")) || + (NameLen == 5 && !strcmp(NameStr, "frexp")) || + (NameLen == 6 && !strcmp(NameStr, "frexpf")) || + (NameLen == 6 && !strcmp(NameStr, "frexpl")) || + (NameLen == 8 && !strcmp(NameStr, "fstatvfs"))) { + if (FTy->getNumParams() != 2 || + !isa(FTy->getParamType(1))) + continue; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + } else if (NameLen == 5 && !strcmp(NameStr, "fgets")) { + if (FTy->getNumParams() != 3 || + !isa(FTy->getParamType(0)) || + !isa(FTy->getParamType(2))) + continue; + setDoesNotThrow(F); + setDoesNotCapture(F, 3); + } else if ((NameLen == 5 && !strcmp(NameStr, "fread")) || + (NameLen == 6 && !strcmp(NameStr, "fwrite"))) { + if (FTy->getNumParams() != 4 || + !isa(FTy->getParamType(0)) || + !isa(FTy->getParamType(3))) + continue; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 4); + } else if ((NameLen == 5 && !strcmp(NameStr, "fputs")) || + (NameLen == 6 && !strcmp(NameStr, "fscanf")) || + (NameLen == 7 && !strcmp(NameStr, "fprintf")) || + (NameLen == 7 && !strcmp(NameStr, "fgetpos"))) { + if (FTy->getNumParams() < 2 || + !isa(FTy->getParamType(0)) || + !isa(FTy->getParamType(1))) + continue; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + } + break; + case 'g': + if ((NameLen == 4 && !strcmp(NameStr, "getc")) || + (NameLen == 10 && !strcmp(NameStr, "getlogin_r")) || + (NameLen == 13 && !strcmp(NameStr, "getc_unlocked"))) { + if (FTy->getNumParams() == 0 || + !isa(FTy->getParamType(0))) + continue; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + } else if (NameLen == 6 && !strcmp(NameStr, "getenv")) { + if (FTy->getNumParams() != 1 || + !isa(FTy->getParamType(0))) + continue; + setDoesNotThrow(F); + setOnlyReadsMemory(F); + setDoesNotCapture(F, 1); + } else if ((NameLen == 4 && !strcmp(NameStr, "gets")) || + (NameLen == 7 && !strcmp(NameStr, "getchar"))) { + setDoesNotThrow(F); + } else if (NameLen == 9 && !strcmp(NameStr, "getitimer")) { + if (FTy->getNumParams() != 2 || + !isa(FTy->getParamType(1))) + continue; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + } else if (NameLen == 8 && !strcmp(NameStr, "getpwnam")) { + if (FTy->getNumParams() != 1 || + !isa(FTy->getParamType(0))) + continue; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + } + break; + case 'u': + if (NameLen == 6 && !strcmp(NameStr, "ungetc")) { + if (FTy->getNumParams() != 2 || + !isa(FTy->getParamType(1))) + continue; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + } else if ((NameLen == 5 && !strcmp(NameStr, "uname")) || + (NameLen == 6 && !strcmp(NameStr, "unlink")) || + (NameLen == 8 && !strcmp(NameStr, "unsetenv"))) { + if (FTy->getNumParams() != 1 || + !isa(FTy->getParamType(0))) + continue; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + } else if ((NameLen == 5 && !strcmp(NameStr, "utime")) || + (NameLen == 6 && !strcmp(NameStr, "utimes"))) { + if (FTy->getNumParams() != 2 || + !isa(FTy->getParamType(0)) || + !isa(FTy->getParamType(1))) + continue; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + } + break; + case 'p': + if (NameLen == 4 && !strcmp(NameStr, "putc")) { + if (FTy->getNumParams() != 2 || + !isa(FTy->getParamType(1))) + continue; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + } else if ((NameLen == 4 && !strcmp(NameStr, "puts")) || + (NameLen == 6 && !strcmp(NameStr, "printf")) || + (NameLen == 6 && !strcmp(NameStr, "perror"))) { + if (FTy->getNumParams() != 1 || + !isa(FTy->getParamType(0))) + continue; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + } else if ((NameLen == 5 && !strcmp(NameStr, "pread")) || + (NameLen == 6 && !strcmp(NameStr, "pwrite"))) { + if (FTy->getNumParams() != 4 || + !isa(FTy->getParamType(1))) + continue; + // May throw; these are valid pthread cancellation points. + setDoesNotCapture(F, 2); + } else if (NameLen == 7 && !strcmp(NameStr, "putchar")) { + setDoesNotThrow(F); + } else if (NameLen == 5 && !strcmp(NameStr, "popen")) { + if (FTy->getNumParams() != 2 || + !isa(FTy->getReturnType()) || + !isa(FTy->getParamType(0)) || + !isa(FTy->getParamType(1))) + continue; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + } else if (NameLen == 6 && !strcmp(NameStr, "pclose")) { + if (FTy->getNumParams() != 1 || + !isa(FTy->getParamType(0))) + continue; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + } + break; + case 'v': + if (NameLen == 6 && !strcmp(NameStr, "vscanf")) { + if (FTy->getNumParams() != 2 || + !isa(FTy->getParamType(1))) + continue; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + } else if ((NameLen == 7 && !strcmp(NameStr, "vsscanf")) || + (NameLen == 7 && !strcmp(NameStr, "vfscanf"))) { + if (FTy->getNumParams() != 3 || + !isa(FTy->getParamType(1)) || + !isa(FTy->getParamType(2))) + continue; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + } else if (NameLen == 6 && !strcmp(NameStr, "valloc")) { + if (!isa(FTy->getReturnType())) + continue; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + } else if (NameLen == 7 && !strcmp(NameStr, "vprintf")) { + if (FTy->getNumParams() != 2 || + !isa(FTy->getParamType(0))) + continue; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + } else if ((NameLen == 8 && !strcmp(NameStr, "vfprintf")) || + (NameLen == 8 && !strcmp(NameStr, "vsprintf"))) { + if (FTy->getNumParams() != 3 || + !isa(FTy->getParamType(0)) || + !isa(FTy->getParamType(1))) + continue; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + } else if (NameLen == 9 && !strcmp(NameStr, "vsnprintf")) { + if (FTy->getNumParams() != 4 || + !isa(FTy->getParamType(0)) || + !isa(FTy->getParamType(2))) + continue; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 3); + } + break; + case 'o': + if (NameLen == 4 && !strcmp(NameStr, "open")) { + if (FTy->getNumParams() < 2 || + !isa(FTy->getParamType(0))) + continue; + // May throw; "open" is a valid pthread cancellation point. + setDoesNotCapture(F, 1); + } else if (NameLen == 7 && !strcmp(NameStr, "opendir")) { + if (FTy->getNumParams() != 1 || + !isa(FTy->getReturnType()) || + !isa(FTy->getParamType(0))) + continue; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + setDoesNotCapture(F, 1); + } + break; + case 't': + if (NameLen == 7 && !strcmp(NameStr, "tmpfile")) { + if (!isa(FTy->getReturnType())) + continue; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + } else if (NameLen == 5 && !strcmp(NameStr, "times")) { + if (FTy->getNumParams() != 1 || + !isa(FTy->getParamType(0))) + continue; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + } + break; + case 'h': + if ((NameLen == 5 && !strcmp(NameStr, "htonl")) || + (NameLen == 5 && !strcmp(NameStr, "htons"))) { + setDoesNotThrow(F); + setDoesNotAccessMemory(F); + } + break; + case 'n': + if ((NameLen == 5 && !strcmp(NameStr, "ntohl")) || + (NameLen == 5 && !strcmp(NameStr, "ntohs"))) { + setDoesNotThrow(F); + setDoesNotAccessMemory(F); + } + break; + case 'l': + if (NameLen == 5 && !strcmp(NameStr, "lstat")) { + if (FTy->getNumParams() != 2 || + !isa(FTy->getParamType(0)) || + !isa(FTy->getParamType(1))) + continue; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + } else if (NameLen == 6 && !strcmp(NameStr, "lchown")) { + if (FTy->getNumParams() != 3 || + !isa(FTy->getParamType(0))) + continue; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + } + break; + case 'q': + if (NameLen == 5 && !strcmp(NameStr, "qsort")) { + if (FTy->getNumParams() != 4 || + !isa(FTy->getParamType(3))) + continue; + // May throw; places call through function pointer. + setDoesNotCapture(F, 4); + } + break; + case '_': + if ((NameLen == 8 && !strcmp(NameStr, "__strdup")) || + (NameLen == 9 && !strcmp(NameStr, "__strndup"))) { + if (FTy->getNumParams() < 1 || + !isa(FTy->getReturnType()) || + !isa(FTy->getParamType(0))) + continue; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + setDoesNotCapture(F, 1); + } else if (NameLen == 10 && !strcmp(NameStr, "__strtok_r")) { + if (FTy->getNumParams() != 3 || + !isa(FTy->getParamType(1))) + continue; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + } else if (NameLen == 8 && !strcmp(NameStr, "_IO_getc")) { + if (FTy->getNumParams() != 1 || + !isa(FTy->getParamType(0))) + continue; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + } else if (NameLen == 8 && !strcmp(NameStr, "_IO_putc")) { + if (FTy->getNumParams() != 2 || + !isa(FTy->getParamType(1))) + continue; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + } + break; + case 1: + if (NameLen == 15 && !strcmp(NameStr, "\1__isoc99_scanf")) { + if (FTy->getNumParams() < 1 || + !isa(FTy->getParamType(0))) + continue; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + } else if ((NameLen == 7 && !strcmp(NameStr, "\1stat64")) || + (NameLen == 8 && !strcmp(NameStr, "\1lstat64")) || + (NameLen == 10 && !strcmp(NameStr, "\1statvfs64")) || + (NameLen == 16 && !strcmp(NameStr, "\1__isoc99_sscanf"))) { + if (FTy->getNumParams() < 1 || + !isa(FTy->getParamType(0)) || + !isa(FTy->getParamType(1))) + continue; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + } else if (NameLen == 8 && !strcmp(NameStr, "\1fopen64")) { + if (FTy->getNumParams() != 2 || + !isa(FTy->getReturnType()) || + !isa(FTy->getParamType(0)) || + !isa(FTy->getParamType(1))) + continue; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + } else if ((NameLen == 9 && !strcmp(NameStr, "\1fseeko64")) || + (NameLen == 9 && !strcmp(NameStr, "\1ftello64"))) { + if (FTy->getNumParams() == 0 || + !isa(FTy->getParamType(0))) + continue; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + } else if (NameLen == 10 && !strcmp(NameStr, "\1tmpfile64")) { + if (!isa(FTy->getReturnType())) + continue; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + } else if ((NameLen == 8 && !strcmp(NameStr, "\1fstat64")) || + (NameLen == 11 && !strcmp(NameStr, "\1fstatvfs64"))) { + if (FTy->getNumParams() != 2 || + !isa(FTy->getParamType(1))) + continue; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + } else if (NameLen == 7 && !strcmp(NameStr, "\1open64")) { + if (FTy->getNumParams() < 2 || + !isa(FTy->getParamType(0))) + continue; + // May throw; "open" is a valid pthread cancellation point. + setDoesNotCapture(F, 1); + } + break; + } + } + return Modified; +} + +// TODO: +// Additional cases that we need to add to this file: +// +// cbrt: +// * cbrt(expN(X)) -> expN(x/3) +// * cbrt(sqrt(x)) -> pow(x,1/6) +// * cbrt(sqrt(x)) -> pow(x,1/9) +// +// cos, cosf, cosl: +// * cos(-x) -> cos(x) +// +// exp, expf, expl: +// * exp(log(x)) -> x +// +// log, logf, logl: +// * log(exp(x)) -> x +// * log(x**y) -> y*log(x) +// * log(exp(y)) -> y*log(e) +// * log(exp2(y)) -> y*log(2) +// * log(exp10(y)) -> y*log(10) +// * log(sqrt(x)) -> 0.5*log(x) +// * log(pow(x,y)) -> y*log(x) +// +// lround, lroundf, lroundl: +// * lround(cnst) -> cnst' +// +// memcmp: +// * memcmp(x,y,l) -> cnst +// (if all arguments are constant and strlen(x) <= l and strlen(y) <= l) +// +// pow, powf, powl: +// * pow(exp(x),y) -> exp(x*y) +// * pow(sqrt(x),y) -> pow(x,y*0.5) +// * pow(pow(x,y),z)-> pow(x,y*z) +// +// puts: +// * puts("") -> putchar("\n") +// +// round, roundf, roundl: +// * round(cnst) -> cnst' +// +// signbit: +// * signbit(cnst) -> cnst' +// * signbit(nncst) -> 0 (if pstv is a non-negative constant) +// +// sqrt, sqrtf, sqrtl: +// * sqrt(expN(x)) -> expN(x*0.5) +// * sqrt(Nroot(x)) -> pow(x,1/(2*N)) +// * sqrt(pow(x,y)) -> pow(|x|,y*0.5) +// +// stpcpy: +// * stpcpy(str, "literal") -> +// llvm.memcpy(str,"literal",strlen("literal")+1,1) +// strrchr: +// * strrchr(s,c) -> reverse_offset_of_in(c,s) +// (if c is a constant integer and s is a constant string) +// * strrchr(s1,0) -> strchr(s1,0) +// +// strpbrk: +// * strpbrk(s,a) -> offset_in_for(s,a) +// (if s and a are both constant strings) +// * strpbrk(s,"") -> 0 +// * strpbrk(s,a) -> strchr(s,a[0]) (if a is constant string of length 1) +// +// strspn, strcspn: +// * strspn(s,a) -> const_int (if both args are constant) +// * strspn("",a) -> 0 +// * strspn(s,"") -> 0 +// * strcspn(s,a) -> const_int (if both args are constant) +// * strcspn("",a) -> 0 +// * strcspn(s,"") -> strlen(a) +// +// strstr: +// * strstr(x,x) -> x +// * strstr(s1,s2) -> offset_of_s2_in(s1) +// (if s1 and s2 are constant strings) +// +// tan, tanf, tanl: +// * tan(atan(x)) -> x +// +// trunc, truncf, truncl: +// * trunc(cnst) -> cnst' +// +// diff --git a/lib/Transforms/Scalar/TailDuplication.cpp b/lib/Transforms/Scalar/TailDuplication.cpp new file mode 100644 index 000000000000..99a7dee39887 --- /dev/null +++ b/lib/Transforms/Scalar/TailDuplication.cpp @@ -0,0 +1,365 @@ +//===- TailDuplication.cpp - Simplify CFG through tail duplication --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass performs a limited form of tail duplication, intended to simplify +// CFGs by removing some unconditional branches. This pass is necessary to +// straighten out loops created by the C front-end, but also is capable of +// making other code nicer. After this pass is run, the CFG simplify pass +// should be run to clean up the mess. +// +// This pass could be enhanced in the future to use profile information to be +// more aggressive. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "tailduplicate" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Constant.h" +#include "llvm/Function.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Pass.h" +#include "llvm/Type.h" +#include "llvm/Support/CFG.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/SmallPtrSet.h" +#include +using namespace llvm; + +STATISTIC(NumEliminated, "Number of unconditional branches eliminated"); + +static cl::opt +TailDupThreshold("taildup-threshold", + cl::desc("Max block size to tail duplicate"), + cl::init(1), cl::Hidden); + +namespace { + class VISIBILITY_HIDDEN TailDup : public FunctionPass { + bool runOnFunction(Function &F); + public: + static char ID; // Pass identification, replacement for typeid + TailDup() : FunctionPass(&ID) {} + + private: + inline bool shouldEliminateUnconditionalBranch(TerminatorInst *, unsigned); + inline void eliminateUnconditionalBranch(BranchInst *BI); + SmallPtrSet CycleDetector; + }; +} + +char TailDup::ID = 0; +static RegisterPass X("tailduplicate", "Tail Duplication"); + +// Public interface to the Tail Duplication pass +FunctionPass *llvm::createTailDuplicationPass() { return new TailDup(); } + +/// runOnFunction - Top level algorithm - Loop over each unconditional branch in +/// the function, eliminating it if it looks attractive enough. CycleDetector +/// prevents infinite loops by checking that we aren't redirecting a branch to +/// a place it already pointed to earlier; see PR 2323. +bool TailDup::runOnFunction(Function &F) { + bool Changed = false; + CycleDetector.clear(); + for (Function::iterator I = F.begin(), E = F.end(); I != E; ) { + if (shouldEliminateUnconditionalBranch(I->getTerminator(), + TailDupThreshold)) { + eliminateUnconditionalBranch(cast(I->getTerminator())); + Changed = true; + } else { + ++I; + CycleDetector.clear(); + } + } + return Changed; +} + +/// shouldEliminateUnconditionalBranch - Return true if this branch looks +/// attractive to eliminate. We eliminate the branch if the destination basic +/// block has <= 5 instructions in it, not counting PHI nodes. In practice, +/// since one of these is a terminator instruction, this means that we will add +/// up to 4 instructions to the new block. +/// +/// We don't count PHI nodes in the count since they will be removed when the +/// contents of the block are copied over. +/// +bool TailDup::shouldEliminateUnconditionalBranch(TerminatorInst *TI, + unsigned Threshold) { + BranchInst *BI = dyn_cast(TI); + if (!BI || !BI->isUnconditional()) return false; // Not an uncond branch! + + BasicBlock *Dest = BI->getSuccessor(0); + if (Dest == BI->getParent()) return false; // Do not loop infinitely! + + // Do not inline a block if we will just get another branch to the same block! + TerminatorInst *DTI = Dest->getTerminator(); + if (BranchInst *DBI = dyn_cast(DTI)) + if (DBI->isUnconditional() && DBI->getSuccessor(0) == Dest) + return false; // Do not loop infinitely! + + // FIXME: DemoteRegToStack cannot yet demote invoke instructions to the stack, + // because doing so would require breaking critical edges. This should be + // fixed eventually. + if (!DTI->use_empty()) + return false; + + // Do not bother with blocks with only a single predecessor: simplify + // CFG will fold these two blocks together! + pred_iterator PI = pred_begin(Dest), PE = pred_end(Dest); + ++PI; + if (PI == PE) return false; // Exactly one predecessor! + + BasicBlock::iterator I = Dest->getFirstNonPHI(); + + for (unsigned Size = 0; I != Dest->end(); ++I) { + if (Size == Threshold) return false; // The block is too large. + + // Don't tail duplicate call instructions. They are very large compared to + // other instructions. + if (isa(I) || isa(I)) return false; + + // Allso alloca and malloc. + if (isa(I)) return false; + + // Some vector instructions can expand into a number of instructions. + if (isa(I) || isa(I) || + isa(I)) return false; + + // Only count instructions that are not debugger intrinsics. + if (!isa(I)) ++Size; + } + + // Do not tail duplicate a block that has thousands of successors into a block + // with a single successor if the block has many other predecessors. This can + // cause an N^2 explosion in CFG edges (and PHI node entries), as seen in + // cases that have a large number of indirect gotos. + unsigned NumSuccs = DTI->getNumSuccessors(); + if (NumSuccs > 8) { + unsigned TooMany = 128; + if (NumSuccs >= TooMany) return false; + TooMany = TooMany/NumSuccs; + for (; PI != PE; ++PI) + if (TooMany-- == 0) return false; + } + + // If this unconditional branch is a fall-through, be careful about + // tail duplicating it. In particular, we don't want to taildup it if the + // original block will still be there after taildup is completed: doing so + // would eliminate the fall-through, requiring unconditional branches. + Function::iterator DestI = Dest; + if (&*--DestI == BI->getParent()) { + // The uncond branch is a fall-through. Tail duplication of the block is + // will eliminate the fall-through-ness and end up cloning the terminator + // at the end of the Dest block. Since the original Dest block will + // continue to exist, this means that one or the other will not be able to + // fall through. One typical example that this helps with is code like: + // if (a) + // foo(); + // if (b) + // foo(); + // Cloning the 'if b' block into the end of the first foo block is messy. + + // The messy case is when the fall-through block falls through to other + // blocks. This is what we would be preventing if we cloned the block. + DestI = Dest; + if (++DestI != Dest->getParent()->end()) { + BasicBlock *DestSucc = DestI; + // If any of Dest's successors are fall-throughs, don't do this xform. + for (succ_iterator SI = succ_begin(Dest), SE = succ_end(Dest); + SI != SE; ++SI) + if (*SI == DestSucc) + return false; + } + } + + // Finally, check that we haven't redirected to this target block earlier; + // there are cases where we loop forever if we don't check this (PR 2323). + if (!CycleDetector.insert(Dest)) + return false; + + return true; +} + +/// FindObviousSharedDomOf - We know there is a branch from SrcBlock to +/// DestBlock, and that SrcBlock is not the only predecessor of DstBlock. If we +/// can find a predecessor of SrcBlock that is a dominator of both SrcBlock and +/// DstBlock, return it. +static BasicBlock *FindObviousSharedDomOf(BasicBlock *SrcBlock, + BasicBlock *DstBlock) { + // SrcBlock must have a single predecessor. + pred_iterator PI = pred_begin(SrcBlock), PE = pred_end(SrcBlock); + if (PI == PE || ++PI != PE) return 0; + + BasicBlock *SrcPred = *pred_begin(SrcBlock); + + // Look at the predecessors of DstBlock. One of them will be SrcBlock. If + // there is only one other pred, get it, otherwise we can't handle it. + PI = pred_begin(DstBlock); PE = pred_end(DstBlock); + BasicBlock *DstOtherPred = 0; + if (*PI == SrcBlock) { + if (++PI == PE) return 0; + DstOtherPred = *PI; + if (++PI != PE) return 0; + } else { + DstOtherPred = *PI; + if (++PI == PE || *PI != SrcBlock || ++PI != PE) return 0; + } + + // We can handle two situations here: "if then" and "if then else" blocks. An + // 'if then' situation is just where DstOtherPred == SrcPred. + if (DstOtherPred == SrcPred) + return SrcPred; + + // Check to see if we have an "if then else" situation, which means that + // DstOtherPred will have a single predecessor and it will be SrcPred. + PI = pred_begin(DstOtherPred); PE = pred_end(DstOtherPred); + if (PI != PE && *PI == SrcPred) { + if (++PI != PE) return 0; // Not a single pred. + return SrcPred; // Otherwise, it's an "if then" situation. Return the if. + } + + // Otherwise, this is something we can't handle. + return 0; +} + + +/// eliminateUnconditionalBranch - Clone the instructions from the destination +/// block into the source block, eliminating the specified unconditional branch. +/// If the destination block defines values used by successors of the dest +/// block, we may need to insert PHI nodes. +/// +void TailDup::eliminateUnconditionalBranch(BranchInst *Branch) { + BasicBlock *SourceBlock = Branch->getParent(); + BasicBlock *DestBlock = Branch->getSuccessor(0); + assert(SourceBlock != DestBlock && "Our predicate is broken!"); + + DOUT << "TailDuplication[" << SourceBlock->getParent()->getName() + << "]: Eliminating branch: " << *Branch; + + // See if we can avoid duplicating code by moving it up to a dominator of both + // blocks. + if (BasicBlock *DomBlock = FindObviousSharedDomOf(SourceBlock, DestBlock)) { + DOUT << "Found shared dominator: " << DomBlock->getName() << "\n"; + + // If there are non-phi instructions in DestBlock that have no operands + // defined in DestBlock, and if the instruction has no side effects, we can + // move the instruction to DomBlock instead of duplicating it. + BasicBlock::iterator BBI = DestBlock->getFirstNonPHI(); + while (!isa(BBI)) { + Instruction *I = BBI++; + + bool CanHoist = !I->isTrapping() && !I->mayHaveSideEffects(); + if (CanHoist) { + for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) + if (Instruction *OpI = dyn_cast(I->getOperand(op))) + if (OpI->getParent() == DestBlock || + (isa(OpI) && OpI->getParent() == DomBlock)) { + CanHoist = false; + break; + } + if (CanHoist) { + // Remove from DestBlock, move right before the term in DomBlock. + DestBlock->getInstList().remove(I); + DomBlock->getInstList().insert(DomBlock->getTerminator(), I); + DOUT << "Hoisted: " << *I; + } + } + } + } + + // Tail duplication can not update SSA properties correctly if the values + // defined in the duplicated tail are used outside of the tail itself. For + // this reason, we spill all values that are used outside of the tail to the + // stack. + for (BasicBlock::iterator I = DestBlock->begin(); I != DestBlock->end(); ++I) + if (I->isUsedOutsideOfBlock(DestBlock)) { + // We found a use outside of the tail. Create a new stack slot to + // break this inter-block usage pattern. + DemoteRegToStack(*I); + } + + // We are going to have to map operands from the original block B to the new + // copy of the block B'. If there are PHI nodes in the DestBlock, these PHI + // nodes also define part of this mapping. Loop over these PHI nodes, adding + // them to our mapping. + // + std::map ValueMapping; + + BasicBlock::iterator BI = DestBlock->begin(); + bool HadPHINodes = isa(BI); + for (; PHINode *PN = dyn_cast(BI); ++BI) + ValueMapping[PN] = PN->getIncomingValueForBlock(SourceBlock); + + // Clone the non-phi instructions of the dest block into the source block, + // keeping track of the mapping... + // + for (; BI != DestBlock->end(); ++BI) { + Instruction *New = BI->clone(); + New->setName(BI->getName()); + SourceBlock->getInstList().push_back(New); + ValueMapping[BI] = New; + } + + // Now that we have built the mapping information and cloned all of the + // instructions (giving us a new terminator, among other things), walk the new + // instructions, rewriting references of old instructions to use new + // instructions. + // + BI = Branch; ++BI; // Get an iterator to the first new instruction + for (; BI != SourceBlock->end(); ++BI) + for (unsigned i = 0, e = BI->getNumOperands(); i != e; ++i) + if (Value *Remapped = ValueMapping[BI->getOperand(i)]) + BI->setOperand(i, Remapped); + + // Next we check to see if any of the successors of DestBlock had PHI nodes. + // If so, we need to add entries to the PHI nodes for SourceBlock now. + for (succ_iterator SI = succ_begin(DestBlock), SE = succ_end(DestBlock); + SI != SE; ++SI) { + BasicBlock *Succ = *SI; + for (BasicBlock::iterator PNI = Succ->begin(); isa(PNI); ++PNI) { + PHINode *PN = cast(PNI); + // Ok, we have a PHI node. Figure out what the incoming value was for the + // DestBlock. + Value *IV = PN->getIncomingValueForBlock(DestBlock); + + // Remap the value if necessary... + if (Value *MappedIV = ValueMapping[IV]) + IV = MappedIV; + PN->addIncoming(IV, SourceBlock); + } + } + + // Next, remove the old branch instruction, and any PHI node entries that we + // had. + BI = Branch; ++BI; // Get an iterator to the first new instruction + DestBlock->removePredecessor(SourceBlock); // Remove entries in PHI nodes... + SourceBlock->getInstList().erase(Branch); // Destroy the uncond branch... + + // Final step: now that we have finished everything up, walk the cloned + // instructions one last time, constant propagating and DCE'ing them, because + // they may not be needed anymore. + // + if (HadPHINodes) { + while (BI != SourceBlock->end()) { + Instruction *Inst = BI++; + if (isInstructionTriviallyDead(Inst)) + Inst->eraseFromParent(); + else if (Constant *C = ConstantFoldInstruction(Inst)) { + Inst->replaceAllUsesWith(C); + Inst->eraseFromParent(); + } + } + } + + ++NumEliminated; // We just killed a branch! +} diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp new file mode 100644 index 000000000000..682d069923e4 --- /dev/null +++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -0,0 +1,479 @@ +//===- TailRecursionElimination.cpp - Eliminate Tail Calls ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file transforms calls of the current function (self recursion) followed +// by a return instruction with a branch to the entry of the function, creating +// a loop. This pass also implements the following extensions to the basic +// algorithm: +// +// 1. Trivial instructions between the call and return do not prevent the +// transformation from taking place, though currently the analysis cannot +// support moving any really useful instructions (only dead ones). +// 2. This pass transforms functions that are prevented from being tail +// recursive by an associative expression to use an accumulator variable, +// thus compiling the typical naive factorial or 'fib' implementation into +// efficient code. +// 3. TRE is performed if the function returns void, if the return +// returns the result returned by the call, or if the function returns a +// run-time constant on all exits from the function. It is possible, though +// unlikely, that the return returns something else (like constant 0), and +// can still be TRE'd. It can be TRE'd if ALL OTHER return instructions in +// the function return the exact same value. +// 4. If it can prove that callees do not access theier caller stack frame, +// they are marked as eligible for tail call elimination (by the code +// generator). +// +// There are several improvements that could be made: +// +// 1. If the function has any alloca instructions, these instructions will be +// moved out of the entry block of the function, causing them to be +// evaluated each time through the tail recursion. Safely keeping allocas +// in the entry block requires analysis to proves that the tail-called +// function does not read or write the stack object. +// 2. Tail recursion is only performed if the call immediately preceeds the +// return instruction. It's possible that there could be a jump between +// the call and the return. +// 3. There can be intervening operations between the call and the return that +// prevent the TRE from occurring. For example, there could be GEP's and +// stores to memory that will not be read or written by the call. This +// requires some substantial analysis (such as with DSA) to prove safe to +// move ahead of the call, but doing so could allow many more TREs to be +// performed, for example in TreeAdd/TreeAlloc from the treeadd benchmark. +// 4. The algorithm we use to detect if callees access their caller stack +// frames is very primitive. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "tailcallelim" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/Instructions.h" +#include "llvm/Pass.h" +#include "llvm/Support/CFG.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/Compiler.h" +using namespace llvm; + +STATISTIC(NumEliminated, "Number of tail calls removed"); +STATISTIC(NumAccumAdded, "Number of accumulators introduced"); + +namespace { + struct VISIBILITY_HIDDEN TailCallElim : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + TailCallElim() : FunctionPass(&ID) {} + + virtual bool runOnFunction(Function &F); + + private: + bool ProcessReturningBlock(ReturnInst *RI, BasicBlock *&OldEntry, + bool &TailCallsAreMarkedTail, + std::vector &ArgumentPHIs, + bool CannotTailCallElimCallsMarkedTail); + bool CanMoveAboveCall(Instruction *I, CallInst *CI); + Value *CanTransformAccumulatorRecursion(Instruction *I, CallInst *CI); + }; +} + +char TailCallElim::ID = 0; +static RegisterPass X("tailcallelim", "Tail Call Elimination"); + +// Public interface to the TailCallElimination pass +FunctionPass *llvm::createTailCallEliminationPass() { + return new TailCallElim(); +} + + +/// AllocaMightEscapeToCalls - Return true if this alloca may be accessed by +/// callees of this function. We only do very simple analysis right now, this +/// could be expanded in the future to use mod/ref information for particular +/// call sites if desired. +static bool AllocaMightEscapeToCalls(AllocaInst *AI) { + // FIXME: do simple 'address taken' analysis. + return true; +} + +/// FunctionContainsAllocas - Scan the specified basic block for alloca +/// instructions. If it contains any that might be accessed by calls, return +/// true. +static bool CheckForEscapingAllocas(BasicBlock *BB, + bool &CannotTCETailMarkedCall) { + bool RetVal = false; + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) + if (AllocaInst *AI = dyn_cast(I)) { + RetVal |= AllocaMightEscapeToCalls(AI); + + // If this alloca is in the body of the function, or if it is a variable + // sized allocation, we cannot tail call eliminate calls marked 'tail' + // with this mechanism. + if (BB != &BB->getParent()->getEntryBlock() || + !isa(AI->getArraySize())) + CannotTCETailMarkedCall = true; + } + return RetVal; +} + +bool TailCallElim::runOnFunction(Function &F) { + // If this function is a varargs function, we won't be able to PHI the args + // right, so don't even try to convert it... + if (F.getFunctionType()->isVarArg()) return false; + + BasicBlock *OldEntry = 0; + bool TailCallsAreMarkedTail = false; + std::vector ArgumentPHIs; + bool MadeChange = false; + + bool FunctionContainsEscapingAllocas = false; + + // CannotTCETailMarkedCall - If true, we cannot perform TCE on tail calls + // marked with the 'tail' attribute, because doing so would cause the stack + // size to increase (real TCE would deallocate variable sized allocas, TCE + // doesn't). + bool CannotTCETailMarkedCall = false; + + // Loop over the function, looking for any returning blocks, and keeping track + // of whether this function has any non-trivially used allocas. + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { + if (FunctionContainsEscapingAllocas && CannotTCETailMarkedCall) + break; + + FunctionContainsEscapingAllocas |= + CheckForEscapingAllocas(BB, CannotTCETailMarkedCall); + } + + /// FIXME: The code generator produces really bad code when an 'escaping + /// alloca' is changed from being a static alloca to being a dynamic alloca. + /// Until this is resolved, disable this transformation if that would ever + /// happen. This bug is PR962. + if (FunctionContainsEscapingAllocas) + return false; + + + // Second pass, change any tail calls to loops. + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) + if (ReturnInst *Ret = dyn_cast(BB->getTerminator())) + MadeChange |= ProcessReturningBlock(Ret, OldEntry, TailCallsAreMarkedTail, + ArgumentPHIs,CannotTCETailMarkedCall); + + // If we eliminated any tail recursions, it's possible that we inserted some + // silly PHI nodes which just merge an initial value (the incoming operand) + // with themselves. Check to see if we did and clean up our mess if so. This + // occurs when a function passes an argument straight through to its tail + // call. + if (!ArgumentPHIs.empty()) { + for (unsigned i = 0, e = ArgumentPHIs.size(); i != e; ++i) { + PHINode *PN = ArgumentPHIs[i]; + + // If the PHI Node is a dynamic constant, replace it with the value it is. + if (Value *PNV = PN->hasConstantValue()) { + PN->replaceAllUsesWith(PNV); + PN->eraseFromParent(); + } + } + } + + // Finally, if this function contains no non-escaping allocas, mark all calls + // in the function as eligible for tail calls (there is no stack memory for + // them to access). + if (!FunctionContainsEscapingAllocas) + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) + if (CallInst *CI = dyn_cast(I)) { + CI->setTailCall(); + MadeChange = true; + } + + return MadeChange; +} + + +/// CanMoveAboveCall - Return true if it is safe to move the specified +/// instruction from after the call to before the call, assuming that all +/// instructions between the call and this instruction are movable. +/// +bool TailCallElim::CanMoveAboveCall(Instruction *I, CallInst *CI) { + // FIXME: We can move load/store/call/free instructions above the call if the + // call does not mod/ref the memory location being processed. + if (I->mayHaveSideEffects() || isa(I)) + return false; + + // Otherwise, if this is a side-effect free instruction, check to make sure + // that it does not use the return value of the call. If it doesn't use the + // return value of the call, it must only use things that are defined before + // the call, or movable instructions between the call and the instruction + // itself. + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) + if (I->getOperand(i) == CI) + return false; + return true; +} + +// isDynamicConstant - Return true if the specified value is the same when the +// return would exit as it was when the initial iteration of the recursive +// function was executed. +// +// We currently handle static constants and arguments that are not modified as +// part of the recursion. +// +static bool isDynamicConstant(Value *V, CallInst *CI) { + if (isa(V)) return true; // Static constants are always dyn consts + + // Check to see if this is an immutable argument, if so, the value + // will be available to initialize the accumulator. + if (Argument *Arg = dyn_cast(V)) { + // Figure out which argument number this is... + unsigned ArgNo = 0; + Function *F = CI->getParent()->getParent(); + for (Function::arg_iterator AI = F->arg_begin(); &*AI != Arg; ++AI) + ++ArgNo; + + // If we are passing this argument into call as the corresponding + // argument operand, then the argument is dynamically constant. + // Otherwise, we cannot transform this function safely. + if (CI->getOperand(ArgNo+1) == Arg) + return true; + } + // Not a constant or immutable argument, we can't safely transform. + return false; +} + +// getCommonReturnValue - Check to see if the function containing the specified +// return instruction and tail call consistently returns the same +// runtime-constant value at all exit points. If so, return the returned value. +// +static Value *getCommonReturnValue(ReturnInst *TheRI, CallInst *CI) { + Function *F = TheRI->getParent()->getParent(); + Value *ReturnedValue = 0; + + // TODO: Handle multiple value ret instructions; + if (isa(F->getReturnType())) + return 0; + + for (Function::iterator BBI = F->begin(), E = F->end(); BBI != E; ++BBI) + if (ReturnInst *RI = dyn_cast(BBI->getTerminator())) + if (RI != TheRI) { + Value *RetOp = RI->getOperand(0); + + // We can only perform this transformation if the value returned is + // evaluatable at the start of the initial invocation of the function, + // instead of at the end of the evaluation. + // + if (!isDynamicConstant(RetOp, CI)) + return 0; + + if (ReturnedValue && RetOp != ReturnedValue) + return 0; // Cannot transform if differing values are returned. + ReturnedValue = RetOp; + } + return ReturnedValue; +} + +/// CanTransformAccumulatorRecursion - If the specified instruction can be +/// transformed using accumulator recursion elimination, return the constant +/// which is the start of the accumulator value. Otherwise return null. +/// +Value *TailCallElim::CanTransformAccumulatorRecursion(Instruction *I, + CallInst *CI) { + if (!I->isAssociative()) return 0; + assert(I->getNumOperands() == 2 && + "Associative operations should have 2 args!"); + + // Exactly one operand should be the result of the call instruction... + if ((I->getOperand(0) == CI && I->getOperand(1) == CI) || + (I->getOperand(0) != CI && I->getOperand(1) != CI)) + return 0; + + // The only user of this instruction we allow is a single return instruction. + if (!I->hasOneUse() || !isa(I->use_back())) + return 0; + + // Ok, now we have to check all of the other return instructions in this + // function. If they return non-constants or differing values, then we cannot + // transform the function safely. + return getCommonReturnValue(cast(I->use_back()), CI); +} + +bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry, + bool &TailCallsAreMarkedTail, + std::vector &ArgumentPHIs, + bool CannotTailCallElimCallsMarkedTail) { + BasicBlock *BB = Ret->getParent(); + Function *F = BB->getParent(); + + if (&BB->front() == Ret) // Make sure there is something before the ret... + return false; + + // If the return is in the entry block, then making this transformation would + // turn infinite recursion into an infinite loop. This transformation is ok + // in theory, but breaks some code like: + // double fabs(double f) { return __builtin_fabs(f); } // a 'fabs' call + // disable this xform in this case, because the code generator will lower the + // call to fabs into inline code. + if (BB == &F->getEntryBlock()) + return false; + + // Scan backwards from the return, checking to see if there is a tail call in + // this block. If so, set CI to it. + CallInst *CI; + BasicBlock::iterator BBI = Ret; + while (1) { + CI = dyn_cast(BBI); + if (CI && CI->getCalledFunction() == F) + break; + + if (BBI == BB->begin()) + return false; // Didn't find a potential tail call. + --BBI; + } + + // If this call is marked as a tail call, and if there are dynamic allocas in + // the function, we cannot perform this optimization. + if (CI->isTailCall() && CannotTailCallElimCallsMarkedTail) + return false; + + // If we are introducing accumulator recursion to eliminate associative + // operations after the call instruction, this variable contains the initial + // value for the accumulator. If this value is set, we actually perform + // accumulator recursion elimination instead of simple tail recursion + // elimination. + Value *AccumulatorRecursionEliminationInitVal = 0; + Instruction *AccumulatorRecursionInstr = 0; + + // Ok, we found a potential tail call. We can currently only transform the + // tail call if all of the instructions between the call and the return are + // movable to above the call itself, leaving the call next to the return. + // Check that this is the case now. + for (BBI = CI, ++BBI; &*BBI != Ret; ++BBI) + if (!CanMoveAboveCall(BBI, CI)) { + // If we can't move the instruction above the call, it might be because it + // is an associative operation that could be tranformed using accumulator + // recursion elimination. Check to see if this is the case, and if so, + // remember the initial accumulator value for later. + if ((AccumulatorRecursionEliminationInitVal = + CanTransformAccumulatorRecursion(BBI, CI))) { + // Yes, this is accumulator recursion. Remember which instruction + // accumulates. + AccumulatorRecursionInstr = BBI; + } else { + return false; // Otherwise, we cannot eliminate the tail recursion! + } + } + + // We can only transform call/return pairs that either ignore the return value + // of the call and return void, ignore the value of the call and return a + // constant, return the value returned by the tail call, or that are being + // accumulator recursion variable eliminated. + if (Ret->getNumOperands() == 1 && Ret->getReturnValue() != CI && + !isa(Ret->getReturnValue()) && + AccumulatorRecursionEliminationInitVal == 0 && + !getCommonReturnValue(Ret, CI)) + return false; + + // OK! We can transform this tail call. If this is the first one found, + // create the new entry block, allowing us to branch back to the old entry. + if (OldEntry == 0) { + OldEntry = &F->getEntryBlock(); + BasicBlock *NewEntry = BasicBlock::Create("", F, OldEntry); + NewEntry->takeName(OldEntry); + OldEntry->setName("tailrecurse"); + BranchInst::Create(OldEntry, NewEntry); + + // If this tail call is marked 'tail' and if there are any allocas in the + // entry block, move them up to the new entry block. + TailCallsAreMarkedTail = CI->isTailCall(); + if (TailCallsAreMarkedTail) + // Move all fixed sized allocas from OldEntry to NewEntry. + for (BasicBlock::iterator OEBI = OldEntry->begin(), E = OldEntry->end(), + NEBI = NewEntry->begin(); OEBI != E; ) + if (AllocaInst *AI = dyn_cast(OEBI++)) + if (isa(AI->getArraySize())) + AI->moveBefore(NEBI); + + // Now that we have created a new block, which jumps to the entry + // block, insert a PHI node for each argument of the function. + // For now, we initialize each PHI to only have the real arguments + // which are passed in. + Instruction *InsertPos = OldEntry->begin(); + for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); + I != E; ++I) { + PHINode *PN = PHINode::Create(I->getType(), + I->getName() + ".tr", InsertPos); + I->replaceAllUsesWith(PN); // Everyone use the PHI node now! + PN->addIncoming(I, NewEntry); + ArgumentPHIs.push_back(PN); + } + } + + // If this function has self recursive calls in the tail position where some + // are marked tail and some are not, only transform one flavor or another. We + // have to choose whether we move allocas in the entry block to the new entry + // block or not, so we can't make a good choice for both. NOTE: We could do + // slightly better here in the case that the function has no entry block + // allocas. + if (TailCallsAreMarkedTail && !CI->isTailCall()) + return false; + + // Ok, now that we know we have a pseudo-entry block WITH all of the + // required PHI nodes, add entries into the PHI node for the actual + // parameters passed into the tail-recursive call. + for (unsigned i = 0, e = CI->getNumOperands()-1; i != e; ++i) + ArgumentPHIs[i]->addIncoming(CI->getOperand(i+1), BB); + + // If we are introducing an accumulator variable to eliminate the recursion, + // do so now. Note that we _know_ that no subsequent tail recursion + // eliminations will happen on this function because of the way the + // accumulator recursion predicate is set up. + // + if (AccumulatorRecursionEliminationInitVal) { + Instruction *AccRecInstr = AccumulatorRecursionInstr; + // Start by inserting a new PHI node for the accumulator. + PHINode *AccPN = PHINode::Create(AccRecInstr->getType(), "accumulator.tr", + OldEntry->begin()); + + // Loop over all of the predecessors of the tail recursion block. For the + // real entry into the function we seed the PHI with the initial value, + // computed earlier. For any other existing branches to this block (due to + // other tail recursions eliminated) the accumulator is not modified. + // Because we haven't added the branch in the current block to OldEntry yet, + // it will not show up as a predecessor. + for (pred_iterator PI = pred_begin(OldEntry), PE = pred_end(OldEntry); + PI != PE; ++PI) { + if (*PI == &F->getEntryBlock()) + AccPN->addIncoming(AccumulatorRecursionEliminationInitVal, *PI); + else + AccPN->addIncoming(AccPN, *PI); + } + + // Add an incoming argument for the current block, which is computed by our + // associative accumulator instruction. + AccPN->addIncoming(AccRecInstr, BB); + + // Next, rewrite the accumulator recursion instruction so that it does not + // use the result of the call anymore, instead, use the PHI node we just + // inserted. + AccRecInstr->setOperand(AccRecInstr->getOperand(0) != CI, AccPN); + + // Finally, rewrite any return instructions in the program to return the PHI + // node instead of the "initval" that they do currently. This loop will + // actually rewrite the return value we are destroying, but that's ok. + for (Function::iterator BBI = F->begin(), E = F->end(); BBI != E; ++BBI) + if (ReturnInst *RI = dyn_cast(BBI->getTerminator())) + RI->setOperand(0, AccPN); + ++NumAccumAdded; + } + + // Now that all of the PHI nodes are in place, remove the call and + // ret instructions, replacing them with an unconditional branch. + BranchInst::Create(OldEntry, Ret); + BB->getInstList().erase(Ret); // Remove return. + BB->getInstList().erase(CI); // Remove call. + ++NumEliminated; + return true; +} diff --git a/lib/Transforms/Utils/AddrModeMatcher.cpp b/lib/Transforms/Utils/AddrModeMatcher.cpp new file mode 100644 index 000000000000..71049fa212d3 --- /dev/null +++ b/lib/Transforms/Utils/AddrModeMatcher.cpp @@ -0,0 +1,594 @@ +//===- AddrModeMatcher.cpp - Addressing mode matching facility --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements target addressing mode matcher class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/AddrModeMatcher.h" +#include "llvm/DerivedTypes.h" +#include "llvm/GlobalValue.h" +#include "llvm/Instruction.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Support/GetElementPtrTypeIterator.h" +#include "llvm/Support/PatternMatch.h" + +using namespace llvm; +using namespace llvm::PatternMatch; + +void ExtAddrMode::print(OStream &OS) const { + bool NeedPlus = false; + OS << "["; + if (BaseGV) { + OS << (NeedPlus ? " + " : "") + << "GV:"; + WriteAsOperand(*OS.stream(), BaseGV, /*PrintType=*/false); + NeedPlus = true; + } + + if (BaseOffs) + OS << (NeedPlus ? " + " : "") << BaseOffs, NeedPlus = true; + + if (BaseReg) { + OS << (NeedPlus ? " + " : "") + << "Base:"; + WriteAsOperand(*OS.stream(), BaseReg, /*PrintType=*/false); + NeedPlus = true; + } + if (Scale) { + OS << (NeedPlus ? " + " : "") + << Scale << "*"; + WriteAsOperand(*OS.stream(), ScaledReg, /*PrintType=*/false); + NeedPlus = true; + } + + OS << ']'; +} + +void ExtAddrMode::dump() const { + print(cerr); + cerr << '\n'; +} + + +/// MatchScaledValue - Try adding ScaleReg*Scale to the current addressing mode. +/// Return true and update AddrMode if this addr mode is legal for the target, +/// false if not. +bool AddressingModeMatcher::MatchScaledValue(Value *ScaleReg, int64_t Scale, + unsigned Depth) { + // If Scale is 1, then this is the same as adding ScaleReg to the addressing + // mode. Just process that directly. + if (Scale == 1) + return MatchAddr(ScaleReg, Depth); + + // If the scale is 0, it takes nothing to add this. + if (Scale == 0) + return true; + + // If we already have a scale of this value, we can add to it, otherwise, we + // need an available scale field. + if (AddrMode.Scale != 0 && AddrMode.ScaledReg != ScaleReg) + return false; + + ExtAddrMode TestAddrMode = AddrMode; + + // Add scale to turn X*4+X*3 -> X*7. This could also do things like + // [A+B + A*7] -> [B+A*8]. + TestAddrMode.Scale += Scale; + TestAddrMode.ScaledReg = ScaleReg; + + // If the new address isn't legal, bail out. + if (!TLI.isLegalAddressingMode(TestAddrMode, AccessTy)) + return false; + + // It was legal, so commit it. + AddrMode = TestAddrMode; + + // Okay, we decided that we can add ScaleReg+Scale to AddrMode. Check now + // to see if ScaleReg is actually X+C. If so, we can turn this into adding + // X*Scale + C*Scale to addr mode. + ConstantInt *CI = 0; Value *AddLHS = 0; + if (isa(ScaleReg) && // not a constant expr. + match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI)))) { + TestAddrMode.ScaledReg = AddLHS; + TestAddrMode.BaseOffs += CI->getSExtValue()*TestAddrMode.Scale; + + // If this addressing mode is legal, commit it and remember that we folded + // this instruction. + if (TLI.isLegalAddressingMode(TestAddrMode, AccessTy)) { + AddrModeInsts.push_back(cast(ScaleReg)); + AddrMode = TestAddrMode; + return true; + } + } + + // Otherwise, not (x+c)*scale, just return what we have. + return true; +} + +/// MightBeFoldableInst - This is a little filter, which returns true if an +/// addressing computation involving I might be folded into a load/store +/// accessing it. This doesn't need to be perfect, but needs to accept at least +/// the set of instructions that MatchOperationAddr can. +static bool MightBeFoldableInst(Instruction *I) { + switch (I->getOpcode()) { + case Instruction::BitCast: + // Don't touch identity bitcasts. + if (I->getType() == I->getOperand(0)->getType()) + return false; + return isa(I->getType()) || isa(I->getType()); + case Instruction::PtrToInt: + // PtrToInt is always a noop, as we know that the int type is pointer sized. + return true; + case Instruction::IntToPtr: + // We know the input is intptr_t, so this is foldable. + return true; + case Instruction::Add: + return true; + case Instruction::Mul: + case Instruction::Shl: + // Can only handle X*C and X << C. + return isa(I->getOperand(1)); + case Instruction::GetElementPtr: + return true; + default: + return false; + } +} + + +/// MatchOperationAddr - Given an instruction or constant expr, see if we can +/// fold the operation into the addressing mode. If so, update the addressing +/// mode and return true, otherwise return false without modifying AddrMode. +bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode, + unsigned Depth) { + // Avoid exponential behavior on extremely deep expression trees. + if (Depth >= 5) return false; + + switch (Opcode) { + case Instruction::PtrToInt: + // PtrToInt is always a noop, as we know that the int type is pointer sized. + return MatchAddr(AddrInst->getOperand(0), Depth); + case Instruction::IntToPtr: + // This inttoptr is a no-op if the integer type is pointer sized. + if (TLI.getValueType(AddrInst->getOperand(0)->getType()) == + TLI.getPointerTy()) + return MatchAddr(AddrInst->getOperand(0), Depth); + return false; + case Instruction::BitCast: + // BitCast is always a noop, and we can handle it as long as it is + // int->int or pointer->pointer (we don't want int<->fp or something). + if ((isa(AddrInst->getOperand(0)->getType()) || + isa(AddrInst->getOperand(0)->getType())) && + // Don't touch identity bitcasts. These were probably put here by LSR, + // and we don't want to mess around with them. Assume it knows what it + // is doing. + AddrInst->getOperand(0)->getType() != AddrInst->getType()) + return MatchAddr(AddrInst->getOperand(0), Depth); + return false; + case Instruction::Add: { + // Check to see if we can merge in the RHS then the LHS. If so, we win. + ExtAddrMode BackupAddrMode = AddrMode; + unsigned OldSize = AddrModeInsts.size(); + if (MatchAddr(AddrInst->getOperand(1), Depth+1) && + MatchAddr(AddrInst->getOperand(0), Depth+1)) + return true; + + // Restore the old addr mode info. + AddrMode = BackupAddrMode; + AddrModeInsts.resize(OldSize); + + // Otherwise this was over-aggressive. Try merging in the LHS then the RHS. + if (MatchAddr(AddrInst->getOperand(0), Depth+1) && + MatchAddr(AddrInst->getOperand(1), Depth+1)) + return true; + + // Otherwise we definitely can't merge the ADD in. + AddrMode = BackupAddrMode; + AddrModeInsts.resize(OldSize); + break; + } + //case Instruction::Or: + // TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD. + //break; + case Instruction::Mul: + case Instruction::Shl: { + // Can only handle X*C and X << C. + ConstantInt *RHS = dyn_cast(AddrInst->getOperand(1)); + if (!RHS) return false; + int64_t Scale = RHS->getSExtValue(); + if (Opcode == Instruction::Shl) + Scale = 1 << Scale; + + return MatchScaledValue(AddrInst->getOperand(0), Scale, Depth); + } + case Instruction::GetElementPtr: { + // Scan the GEP. We check it if it contains constant offsets and at most + // one variable offset. + int VariableOperand = -1; + unsigned VariableScale = 0; + + int64_t ConstantOffset = 0; + const TargetData *TD = TLI.getTargetData(); + gep_type_iterator GTI = gep_type_begin(AddrInst); + for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) { + if (const StructType *STy = dyn_cast(*GTI)) { + const StructLayout *SL = TD->getStructLayout(STy); + unsigned Idx = + cast(AddrInst->getOperand(i))->getZExtValue(); + ConstantOffset += SL->getElementOffset(Idx); + } else { + uint64_t TypeSize = TD->getTypeAllocSize(GTI.getIndexedType()); + if (ConstantInt *CI = dyn_cast(AddrInst->getOperand(i))) { + ConstantOffset += CI->getSExtValue()*TypeSize; + } else if (TypeSize) { // Scales of zero don't do anything. + // We only allow one variable index at the moment. + if (VariableOperand != -1) + return false; + + // Remember the variable index. + VariableOperand = i; + VariableScale = TypeSize; + } + } + } + + // A common case is for the GEP to only do a constant offset. In this case, + // just add it to the disp field and check validity. + if (VariableOperand == -1) { + AddrMode.BaseOffs += ConstantOffset; + if (ConstantOffset == 0 || TLI.isLegalAddressingMode(AddrMode, AccessTy)){ + // Check to see if we can fold the base pointer in too. + if (MatchAddr(AddrInst->getOperand(0), Depth+1)) + return true; + } + AddrMode.BaseOffs -= ConstantOffset; + return false; + } + + // Save the valid addressing mode in case we can't match. + ExtAddrMode BackupAddrMode = AddrMode; + unsigned OldSize = AddrModeInsts.size(); + + // See if the scale and offset amount is valid for this target. + AddrMode.BaseOffs += ConstantOffset; + + // Match the base operand of the GEP. + if (!MatchAddr(AddrInst->getOperand(0), Depth+1)) { + // If it couldn't be matched, just stuff the value in a register. + if (AddrMode.HasBaseReg) { + AddrMode = BackupAddrMode; + AddrModeInsts.resize(OldSize); + return false; + } + AddrMode.HasBaseReg = true; + AddrMode.BaseReg = AddrInst->getOperand(0); + } + + // Match the remaining variable portion of the GEP. + if (!MatchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale, + Depth)) { + // If it couldn't be matched, try stuffing the base into a register + // instead of matching it, and retrying the match of the scale. + AddrMode = BackupAddrMode; + AddrModeInsts.resize(OldSize); + if (AddrMode.HasBaseReg) + return false; + AddrMode.HasBaseReg = true; + AddrMode.BaseReg = AddrInst->getOperand(0); + AddrMode.BaseOffs += ConstantOffset; + if (!MatchScaledValue(AddrInst->getOperand(VariableOperand), + VariableScale, Depth)) { + // If even that didn't work, bail. + AddrMode = BackupAddrMode; + AddrModeInsts.resize(OldSize); + return false; + } + } + + return true; + } + } + return false; +} + +/// MatchAddr - If we can, try to add the value of 'Addr' into the current +/// addressing mode. If Addr can't be added to AddrMode this returns false and +/// leaves AddrMode unmodified. This assumes that Addr is either a pointer type +/// or intptr_t for the target. +/// +bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) { + if (ConstantInt *CI = dyn_cast(Addr)) { + // Fold in immediates if legal for the target. + AddrMode.BaseOffs += CI->getSExtValue(); + if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) + return true; + AddrMode.BaseOffs -= CI->getSExtValue(); + } else if (GlobalValue *GV = dyn_cast(Addr)) { + // If this is a global variable, try to fold it into the addressing mode. + if (AddrMode.BaseGV == 0) { + AddrMode.BaseGV = GV; + if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) + return true; + AddrMode.BaseGV = 0; + } + } else if (Instruction *I = dyn_cast(Addr)) { + ExtAddrMode BackupAddrMode = AddrMode; + unsigned OldSize = AddrModeInsts.size(); + + // Check to see if it is possible to fold this operation. + if (MatchOperationAddr(I, I->getOpcode(), Depth)) { + // Okay, it's possible to fold this. Check to see if it is actually + // *profitable* to do so. We use a simple cost model to avoid increasing + // register pressure too much. + if (I->hasOneUse() || + IsProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) { + AddrModeInsts.push_back(I); + return true; + } + + // It isn't profitable to do this, roll back. + //cerr << "NOT FOLDING: " << *I; + AddrMode = BackupAddrMode; + AddrModeInsts.resize(OldSize); + } + } else if (ConstantExpr *CE = dyn_cast(Addr)) { + if (MatchOperationAddr(CE, CE->getOpcode(), Depth)) + return true; + } else if (isa(Addr)) { + // Null pointer gets folded without affecting the addressing mode. + return true; + } + + // Worse case, the target should support [reg] addressing modes. :) + if (!AddrMode.HasBaseReg) { + AddrMode.HasBaseReg = true; + AddrMode.BaseReg = Addr; + // Still check for legality in case the target supports [imm] but not [i+r]. + if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) + return true; + AddrMode.HasBaseReg = false; + AddrMode.BaseReg = 0; + } + + // If the base register is already taken, see if we can do [r+r]. + if (AddrMode.Scale == 0) { + AddrMode.Scale = 1; + AddrMode.ScaledReg = Addr; + if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) + return true; + AddrMode.Scale = 0; + AddrMode.ScaledReg = 0; + } + // Couldn't match. + return false; +} + + +/// IsOperandAMemoryOperand - Check to see if all uses of OpVal by the specified +/// inline asm call are due to memory operands. If so, return true, otherwise +/// return false. +static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal, + const TargetLowering &TLI) { + std::vector + Constraints = IA->ParseConstraints(); + + unsigned ArgNo = 1; // ArgNo - The operand of the CallInst. + for (unsigned i = 0, e = Constraints.size(); i != e; ++i) { + TargetLowering::AsmOperandInfo OpInfo(Constraints[i]); + + // Compute the value type for each operand. + switch (OpInfo.Type) { + case InlineAsm::isOutput: + if (OpInfo.isIndirect) + OpInfo.CallOperandVal = CI->getOperand(ArgNo++); + break; + case InlineAsm::isInput: + OpInfo.CallOperandVal = CI->getOperand(ArgNo++); + break; + case InlineAsm::isClobber: + // Nothing to do. + break; + } + + // Compute the constraint code and ConstraintType to use. + TLI.ComputeConstraintToUse(OpInfo, SDValue(), + OpInfo.ConstraintType == TargetLowering::C_Memory); + + // If this asm operand is our Value*, and if it isn't an indirect memory + // operand, we can't fold it! + if (OpInfo.CallOperandVal == OpVal && + (OpInfo.ConstraintType != TargetLowering::C_Memory || + !OpInfo.isIndirect)) + return false; + } + + return true; +} + + +/// FindAllMemoryUses - Recursively walk all the uses of I until we find a +/// memory use. If we find an obviously non-foldable instruction, return true. +/// Add the ultimately found memory instructions to MemoryUses. +static bool FindAllMemoryUses(Instruction *I, + SmallVectorImpl > &MemoryUses, + SmallPtrSet &ConsideredInsts, + const TargetLowering &TLI) { + // If we already considered this instruction, we're done. + if (!ConsideredInsts.insert(I)) + return false; + + // If this is an obviously unfoldable instruction, bail out. + if (!MightBeFoldableInst(I)) + return true; + + // Loop over all the uses, recursively processing them. + for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); + UI != E; ++UI) { + if (LoadInst *LI = dyn_cast(*UI)) { + MemoryUses.push_back(std::make_pair(LI, UI.getOperandNo())); + continue; + } + + if (StoreInst *SI = dyn_cast(*UI)) { + if (UI.getOperandNo() == 0) return true; // Storing addr, not into addr. + MemoryUses.push_back(std::make_pair(SI, UI.getOperandNo())); + continue; + } + + if (CallInst *CI = dyn_cast(*UI)) { + InlineAsm *IA = dyn_cast(CI->getCalledValue()); + if (IA == 0) return true; + + // If this is a memory operand, we're cool, otherwise bail out. + if (!IsOperandAMemoryOperand(CI, IA, I, TLI)) + return true; + continue; + } + + if (FindAllMemoryUses(cast(*UI), MemoryUses, ConsideredInsts, + TLI)) + return true; + } + + return false; +} + + +/// ValueAlreadyLiveAtInst - Retrn true if Val is already known to be live at +/// the use site that we're folding it into. If so, there is no cost to +/// include it in the addressing mode. KnownLive1 and KnownLive2 are two values +/// that we know are live at the instruction already. +bool AddressingModeMatcher::ValueAlreadyLiveAtInst(Value *Val,Value *KnownLive1, + Value *KnownLive2) { + // If Val is either of the known-live values, we know it is live! + if (Val == 0 || Val == KnownLive1 || Val == KnownLive2) + return true; + + // All values other than instructions and arguments (e.g. constants) are live. + if (!isa(Val) && !isa(Val)) return true; + + // If Val is a constant sized alloca in the entry block, it is live, this is + // true because it is just a reference to the stack/frame pointer, which is + // live for the whole function. + if (AllocaInst *AI = dyn_cast(Val)) + if (AI->isStaticAlloca()) + return true; + + // Check to see if this value is already used in the memory instruction's + // block. If so, it's already live into the block at the very least, so we + // can reasonably fold it. + BasicBlock *MemBB = MemoryInst->getParent(); + for (Value::use_iterator UI = Val->use_begin(), E = Val->use_end(); + UI != E; ++UI) + // We know that uses of arguments and instructions have to be instructions. + if (cast(*UI)->getParent() == MemBB) + return true; + + return false; +} + + + +/// IsProfitableToFoldIntoAddressingMode - It is possible for the addressing +/// mode of the machine to fold the specified instruction into a load or store +/// that ultimately uses it. However, the specified instruction has multiple +/// uses. Given this, it may actually increase register pressure to fold it +/// into the load. For example, consider this code: +/// +/// X = ... +/// Y = X+1 +/// use(Y) -> nonload/store +/// Z = Y+1 +/// load Z +/// +/// In this case, Y has multiple uses, and can be folded into the load of Z +/// (yielding load [X+2]). However, doing this will cause both "X" and "X+1" to +/// be live at the use(Y) line. If we don't fold Y into load Z, we use one +/// fewer register. Since Y can't be folded into "use(Y)" we don't increase the +/// number of computations either. +/// +/// Note that this (like most of CodeGenPrepare) is just a rough heuristic. If +/// X was live across 'load Z' for other reasons, we actually *would* want to +/// fold the addressing mode in the Z case. This would make Y die earlier. +bool AddressingModeMatcher:: +IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore, + ExtAddrMode &AMAfter) { + if (IgnoreProfitability) return true; + + // AMBefore is the addressing mode before this instruction was folded into it, + // and AMAfter is the addressing mode after the instruction was folded. Get + // the set of registers referenced by AMAfter and subtract out those + // referenced by AMBefore: this is the set of values which folding in this + // address extends the lifetime of. + // + // Note that there are only two potential values being referenced here, + // BaseReg and ScaleReg (global addresses are always available, as are any + // folded immediates). + Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg; + + // If the BaseReg or ScaledReg was referenced by the previous addrmode, their + // lifetime wasn't extended by adding this instruction. + if (ValueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg)) + BaseReg = 0; + if (ValueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg)) + ScaledReg = 0; + + // If folding this instruction (and it's subexprs) didn't extend any live + // ranges, we're ok with it. + if (BaseReg == 0 && ScaledReg == 0) + return true; + + // If all uses of this instruction are ultimately load/store/inlineasm's, + // check to see if their addressing modes will include this instruction. If + // so, we can fold it into all uses, so it doesn't matter if it has multiple + // uses. + SmallVector, 16> MemoryUses; + SmallPtrSet ConsideredInsts; + if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI)) + return false; // Has a non-memory, non-foldable use! + + // Now that we know that all uses of this instruction are part of a chain of + // computation involving only operations that could theoretically be folded + // into a memory use, loop over each of these uses and see if they could + // *actually* fold the instruction. + SmallVector MatchedAddrModeInsts; + for (unsigned i = 0, e = MemoryUses.size(); i != e; ++i) { + Instruction *User = MemoryUses[i].first; + unsigned OpNo = MemoryUses[i].second; + + // Get the access type of this use. If the use isn't a pointer, we don't + // know what it accesses. + Value *Address = User->getOperand(OpNo); + if (!isa(Address->getType())) + return false; + const Type *AddressAccessTy = + cast(Address->getType())->getElementType(); + + // Do a match against the root of this address, ignoring profitability. This + // will tell us if the addressing mode for the memory operation will + // *actually* cover the shared instruction. + ExtAddrMode Result; + AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, AddressAccessTy, + MemoryInst, Result); + Matcher.IgnoreProfitability = true; + bool Success = Matcher.MatchAddr(Address, 0); + Success = Success; assert(Success && "Couldn't select *anything*?"); + + // If the match didn't cover I, then it won't be shared by it. + if (std::find(MatchedAddrModeInsts.begin(), MatchedAddrModeInsts.end(), + I) == MatchedAddrModeInsts.end()) + return false; + + MatchedAddrModeInsts.clear(); + } + + return true; +} diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp new file mode 100644 index 000000000000..6d1180d0dd9a --- /dev/null +++ b/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -0,0 +1,622 @@ +//===-- BasicBlockUtils.cpp - BasicBlock Utilities -------------------------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This family of functions perform manipulations on basic blocks, and +// instructions contained within basic blocks. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Function.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Constant.h" +#include "llvm/Type.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Support/ValueHandle.h" +#include +using namespace llvm; + +/// DeleteDeadBlock - Delete the specified block, which must have no +/// predecessors. +void llvm::DeleteDeadBlock(BasicBlock *BB) { + assert((pred_begin(BB) == pred_end(BB) || + // Can delete self loop. + BB->getSinglePredecessor() == BB) && "Block is not dead!"); + TerminatorInst *BBTerm = BB->getTerminator(); + + // Loop through all of our successors and make sure they know that one + // of their predecessors is going away. + for (unsigned i = 0, e = BBTerm->getNumSuccessors(); i != e; ++i) + BBTerm->getSuccessor(i)->removePredecessor(BB); + + // Zap all the instructions in the block. + while (!BB->empty()) { + Instruction &I = BB->back(); + // If this instruction is used, replace uses with an arbitrary value. + // Because control flow can't get here, we don't care what we replace the + // value with. Note that since this block is unreachable, and all values + // contained within it must dominate their uses, that all uses will + // eventually be removed (they are themselves dead). + if (!I.use_empty()) + I.replaceAllUsesWith(UndefValue::get(I.getType())); + BB->getInstList().pop_back(); + } + + // Zap the block! + BB->eraseFromParent(); +} + +/// FoldSingleEntryPHINodes - We know that BB has one predecessor. If there are +/// any single-entry PHI nodes in it, fold them away. This handles the case +/// when all entries to the PHI nodes in a block are guaranteed equal, such as +/// when the block has exactly one predecessor. +void llvm::FoldSingleEntryPHINodes(BasicBlock *BB) { + if (!isa(BB->begin())) + return; + + while (PHINode *PN = dyn_cast(BB->begin())) { + if (PN->getIncomingValue(0) != PN) + PN->replaceAllUsesWith(PN->getIncomingValue(0)); + else + PN->replaceAllUsesWith(UndefValue::get(PN->getType())); + PN->eraseFromParent(); + } +} + + +/// DeleteDeadPHIs - Examine each PHI in the given block and delete it if it +/// is dead. Also recursively delete any operands that become dead as +/// a result. This includes tracing the def-use list from the PHI to see if +/// it is ultimately unused or if it reaches an unused cycle. +void llvm::DeleteDeadPHIs(BasicBlock *BB) { + // Recursively deleting a PHI may cause multiple PHIs to be deleted + // or RAUW'd undef, so use an array of WeakVH for the PHIs to delete. + SmallVector PHIs; + for (BasicBlock::iterator I = BB->begin(); + PHINode *PN = dyn_cast(I); ++I) + PHIs.push_back(PN); + + for (unsigned i = 0, e = PHIs.size(); i != e; ++i) + if (PHINode *PN = dyn_cast_or_null(PHIs[i].operator Value*())) + RecursivelyDeleteDeadPHINode(PN); +} + +/// MergeBlockIntoPredecessor - Attempts to merge a block into its predecessor, +/// if possible. The return value indicates success or failure. +bool llvm::MergeBlockIntoPredecessor(BasicBlock* BB, Pass* P) { + pred_iterator PI(pred_begin(BB)), PE(pred_end(BB)); + // Can't merge the entry block. + if (pred_begin(BB) == pred_end(BB)) return false; + + BasicBlock *PredBB = *PI++; + for (; PI != PE; ++PI) // Search all predecessors, see if they are all same + if (*PI != PredBB) { + PredBB = 0; // There are multiple different predecessors... + break; + } + + // Can't merge if there are multiple predecessors. + if (!PredBB) return false; + // Don't break self-loops. + if (PredBB == BB) return false; + // Don't break invokes. + if (isa(PredBB->getTerminator())) return false; + + succ_iterator SI(succ_begin(PredBB)), SE(succ_end(PredBB)); + BasicBlock* OnlySucc = BB; + for (; SI != SE; ++SI) + if (*SI != OnlySucc) { + OnlySucc = 0; // There are multiple distinct successors! + break; + } + + // Can't merge if there are multiple successors. + if (!OnlySucc) return false; + + // Can't merge if there is PHI loop. + for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE; ++BI) { + if (PHINode *PN = dyn_cast(BI)) { + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (PN->getIncomingValue(i) == PN) + return false; + } else + break; + } + + // Begin by getting rid of unneeded PHIs. + while (PHINode *PN = dyn_cast(&BB->front())) { + PN->replaceAllUsesWith(PN->getIncomingValue(0)); + BB->getInstList().pop_front(); // Delete the phi node... + } + + // Delete the unconditional branch from the predecessor... + PredBB->getInstList().pop_back(); + + // Move all definitions in the successor to the predecessor... + PredBB->getInstList().splice(PredBB->end(), BB->getInstList()); + + // Make all PHI nodes that referred to BB now refer to Pred as their + // source... + BB->replaceAllUsesWith(PredBB); + + // Inherit predecessors name if it exists. + if (!PredBB->hasName()) + PredBB->takeName(BB); + + // Finally, erase the old block and update dominator info. + if (P) { + if (DominatorTree* DT = P->getAnalysisIfAvailable()) { + DomTreeNode* DTN = DT->getNode(BB); + DomTreeNode* PredDTN = DT->getNode(PredBB); + + if (DTN) { + SmallPtrSet Children(DTN->begin(), DTN->end()); + for (SmallPtrSet::iterator DI = Children.begin(), + DE = Children.end(); DI != DE; ++DI) + DT->changeImmediateDominator(*DI, PredDTN); + + DT->eraseNode(BB); + } + } + } + + BB->eraseFromParent(); + + + return true; +} + +/// ReplaceInstWithValue - Replace all uses of an instruction (specified by BI) +/// with a value, then remove and delete the original instruction. +/// +void llvm::ReplaceInstWithValue(BasicBlock::InstListType &BIL, + BasicBlock::iterator &BI, Value *V) { + Instruction &I = *BI; + // Replaces all of the uses of the instruction with uses of the value + I.replaceAllUsesWith(V); + + // Make sure to propagate a name if there is one already. + if (I.hasName() && !V->hasName()) + V->takeName(&I); + + // Delete the unnecessary instruction now... + BI = BIL.erase(BI); +} + + +/// ReplaceInstWithInst - Replace the instruction specified by BI with the +/// instruction specified by I. The original instruction is deleted and BI is +/// updated to point to the new instruction. +/// +void llvm::ReplaceInstWithInst(BasicBlock::InstListType &BIL, + BasicBlock::iterator &BI, Instruction *I) { + assert(I->getParent() == 0 && + "ReplaceInstWithInst: Instruction already inserted into basic block!"); + + // Insert the new instruction into the basic block... + BasicBlock::iterator New = BIL.insert(BI, I); + + // Replace all uses of the old instruction, and delete it. + ReplaceInstWithValue(BIL, BI, I); + + // Move BI back to point to the newly inserted instruction + BI = New; +} + +/// ReplaceInstWithInst - Replace the instruction specified by From with the +/// instruction specified by To. +/// +void llvm::ReplaceInstWithInst(Instruction *From, Instruction *To) { + BasicBlock::iterator BI(From); + ReplaceInstWithInst(From->getParent()->getInstList(), BI, To); +} + +/// RemoveSuccessor - Change the specified terminator instruction such that its +/// successor SuccNum no longer exists. Because this reduces the outgoing +/// degree of the current basic block, the actual terminator instruction itself +/// may have to be changed. In the case where the last successor of the block +/// is deleted, a return instruction is inserted in its place which can cause a +/// surprising change in program behavior if it is not expected. +/// +void llvm::RemoveSuccessor(TerminatorInst *TI, unsigned SuccNum) { + assert(SuccNum < TI->getNumSuccessors() && + "Trying to remove a nonexistant successor!"); + + // If our old successor block contains any PHI nodes, remove the entry in the + // PHI nodes that comes from this branch... + // + BasicBlock *BB = TI->getParent(); + TI->getSuccessor(SuccNum)->removePredecessor(BB); + + TerminatorInst *NewTI = 0; + switch (TI->getOpcode()) { + case Instruction::Br: + // If this is a conditional branch... convert to unconditional branch. + if (TI->getNumSuccessors() == 2) { + cast(TI)->setUnconditionalDest(TI->getSuccessor(1-SuccNum)); + } else { // Otherwise convert to a return instruction... + Value *RetVal = 0; + + // Create a value to return... if the function doesn't return null... + if (BB->getParent()->getReturnType() != Type::VoidTy) + RetVal = Constant::getNullValue(BB->getParent()->getReturnType()); + + // Create the return... + NewTI = ReturnInst::Create(RetVal); + } + break; + + case Instruction::Invoke: // Should convert to call + case Instruction::Switch: // Should remove entry + default: + case Instruction::Ret: // Cannot happen, has no successors! + assert(0 && "Unhandled terminator instruction type in RemoveSuccessor!"); + abort(); + } + + if (NewTI) // If it's a different instruction, replace. + ReplaceInstWithInst(TI, NewTI); +} + +/// SplitEdge - Split the edge connecting specified block. Pass P must +/// not be NULL. +BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, Pass *P) { + TerminatorInst *LatchTerm = BB->getTerminator(); + unsigned SuccNum = 0; +#ifndef NDEBUG + unsigned e = LatchTerm->getNumSuccessors(); +#endif + for (unsigned i = 0; ; ++i) { + assert(i != e && "Didn't find edge?"); + if (LatchTerm->getSuccessor(i) == Succ) { + SuccNum = i; + break; + } + } + + // If this is a critical edge, let SplitCriticalEdge do it. + if (SplitCriticalEdge(BB->getTerminator(), SuccNum, P)) + return LatchTerm->getSuccessor(SuccNum); + + // If the edge isn't critical, then BB has a single successor or Succ has a + // single pred. Split the block. + BasicBlock::iterator SplitPoint; + if (BasicBlock *SP = Succ->getSinglePredecessor()) { + // If the successor only has a single pred, split the top of the successor + // block. + assert(SP == BB && "CFG broken"); + SP = NULL; + return SplitBlock(Succ, Succ->begin(), P); + } else { + // Otherwise, if BB has a single successor, split it at the bottom of the + // block. + assert(BB->getTerminator()->getNumSuccessors() == 1 && + "Should have a single succ!"); + return SplitBlock(BB, BB->getTerminator(), P); + } +} + +/// SplitBlock - Split the specified block at the specified instruction - every +/// thing before SplitPt stays in Old and everything starting with SplitPt moves +/// to a new block. The two blocks are joined by an unconditional branch and +/// the loop info is updated. +/// +BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, Pass *P) { + BasicBlock::iterator SplitIt = SplitPt; + while (isa(SplitIt)) + ++SplitIt; + BasicBlock *New = Old->splitBasicBlock(SplitIt, Old->getName()+".split"); + + // The new block lives in whichever loop the old one did. + if (LoopInfo* LI = P->getAnalysisIfAvailable()) + if (Loop *L = LI->getLoopFor(Old)) + L->addBasicBlockToLoop(New, LI->getBase()); + + if (DominatorTree *DT = P->getAnalysisIfAvailable()) + { + // Old dominates New. New node domiantes all other nodes dominated by Old. + DomTreeNode *OldNode = DT->getNode(Old); + std::vector Children; + for (DomTreeNode::iterator I = OldNode->begin(), E = OldNode->end(); + I != E; ++I) + Children.push_back(*I); + + DomTreeNode *NewNode = DT->addNewBlock(New,Old); + + for (std::vector::iterator I = Children.begin(), + E = Children.end(); I != E; ++I) + DT->changeImmediateDominator(*I, NewNode); + } + + if (DominanceFrontier *DF = P->getAnalysisIfAvailable()) + DF->splitBlock(Old); + + return New; +} + + +/// SplitBlockPredecessors - This method transforms BB by introducing a new +/// basic block into the function, and moving some of the predecessors of BB to +/// be predecessors of the new block. The new predecessors are indicated by the +/// Preds array, which has NumPreds elements in it. The new block is given a +/// suffix of 'Suffix'. +/// +/// This currently updates the LLVM IR, AliasAnalysis, DominatorTree and +/// DominanceFrontier, but no other analyses. +BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, + BasicBlock *const *Preds, + unsigned NumPreds, const char *Suffix, + Pass *P) { + // Create new basic block, insert right before the original block. + BasicBlock *NewBB = + BasicBlock::Create(BB->getName()+Suffix, BB->getParent(), BB); + + // The new block unconditionally branches to the old block. + BranchInst *BI = BranchInst::Create(BB, NewBB); + + // Move the edges from Preds to point to NewBB instead of BB. + for (unsigned i = 0; i != NumPreds; ++i) + Preds[i]->getTerminator()->replaceUsesOfWith(BB, NewBB); + + // Update dominator tree and dominator frontier if available. + DominatorTree *DT = P ? P->getAnalysisIfAvailable() : 0; + if (DT) + DT->splitBlock(NewBB); + if (DominanceFrontier *DF = P ? P->getAnalysisIfAvailable():0) + DF->splitBlock(NewBB); + AliasAnalysis *AA = P ? P->getAnalysisIfAvailable() : 0; + + + // Insert a new PHI node into NewBB for every PHI node in BB and that new PHI + // node becomes an incoming value for BB's phi node. However, if the Preds + // list is empty, we need to insert dummy entries into the PHI nodes in BB to + // account for the newly created predecessor. + if (NumPreds == 0) { + // Insert dummy values as the incoming value. + for (BasicBlock::iterator I = BB->begin(); isa(I); ++I) + cast(I)->addIncoming(UndefValue::get(I->getType()), NewBB); + return NewBB; + } + + // Otherwise, create a new PHI node in NewBB for each PHI node in BB. + for (BasicBlock::iterator I = BB->begin(); isa(I); ) { + PHINode *PN = cast(I++); + + // Check to see if all of the values coming in are the same. If so, we + // don't need to create a new PHI node. + Value *InVal = PN->getIncomingValueForBlock(Preds[0]); + for (unsigned i = 1; i != NumPreds; ++i) + if (InVal != PN->getIncomingValueForBlock(Preds[i])) { + InVal = 0; + break; + } + + if (InVal) { + // If all incoming values for the new PHI would be the same, just don't + // make a new PHI. Instead, just remove the incoming values from the old + // PHI. + for (unsigned i = 0; i != NumPreds; ++i) + PN->removeIncomingValue(Preds[i], false); + } else { + // If the values coming into the block are not the same, we need a PHI. + // Create the new PHI node, insert it into NewBB at the end of the block + PHINode *NewPHI = + PHINode::Create(PN->getType(), PN->getName()+".ph", BI); + if (AA) AA->copyValue(PN, NewPHI); + + // Move all of the PHI values for 'Preds' to the new PHI. + for (unsigned i = 0; i != NumPreds; ++i) { + Value *V = PN->removeIncomingValue(Preds[i], false); + NewPHI->addIncoming(V, Preds[i]); + } + InVal = NewPHI; + } + + // Add an incoming value to the PHI node in the loop for the preheader + // edge. + PN->addIncoming(InVal, NewBB); + + // Check to see if we can eliminate this phi node. + if (Value *V = PN->hasConstantValue(DT != 0)) { + Instruction *I = dyn_cast(V); + if (!I || DT == 0 || DT->dominates(I, PN)) { + PN->replaceAllUsesWith(V); + if (AA) AA->deleteValue(PN); + PN->eraseFromParent(); + } + } + } + + return NewBB; +} + +/// FindFunctionBackedges - Analyze the specified function to find all of the +/// loop backedges in the function and return them. This is a relatively cheap +/// (compared to computing dominators and loop info) analysis. +/// +/// The output is added to Result, as pairs of edge info. +void llvm::FindFunctionBackedges(const Function &F, + SmallVectorImpl > &Result) { + const BasicBlock *BB = &F.getEntryBlock(); + if (succ_begin(BB) == succ_end(BB)) + return; + + SmallPtrSet Visited; + SmallVector, 8> VisitStack; + SmallPtrSet InStack; + + Visited.insert(BB); + VisitStack.push_back(std::make_pair(BB, succ_begin(BB))); + InStack.insert(BB); + do { + std::pair &Top = VisitStack.back(); + const BasicBlock *ParentBB = Top.first; + succ_const_iterator &I = Top.second; + + bool FoundNew = false; + while (I != succ_end(ParentBB)) { + BB = *I++; + if (Visited.insert(BB)) { + FoundNew = true; + break; + } + // Successor is in VisitStack, it's a back edge. + if (InStack.count(BB)) + Result.push_back(std::make_pair(ParentBB, BB)); + } + + if (FoundNew) { + // Go down one level if there is a unvisited successor. + InStack.insert(BB); + VisitStack.push_back(std::make_pair(BB, succ_begin(BB))); + } else { + // Go up one level. + InStack.erase(VisitStack.pop_back_val().first); + } + } while (!VisitStack.empty()); + + +} + + + +/// AreEquivalentAddressValues - Test if A and B will obviously have the same +/// value. This includes recognizing that %t0 and %t1 will have the same +/// value in code like this: +/// %t0 = getelementptr \@a, 0, 3 +/// store i32 0, i32* %t0 +/// %t1 = getelementptr \@a, 0, 3 +/// %t2 = load i32* %t1 +/// +static bool AreEquivalentAddressValues(const Value *A, const Value *B) { + // Test if the values are trivially equivalent. + if (A == B) return true; + + // Test if the values come form identical arithmetic instructions. + if (isa(A) || isa(A) || + isa(A) || isa(A)) + if (const Instruction *BI = dyn_cast(B)) + if (cast(A)->isIdenticalTo(BI)) + return true; + + // Otherwise they may not be equivalent. + return false; +} + +/// FindAvailableLoadedValue - Scan the ScanBB block backwards (starting at the +/// instruction before ScanFrom) checking to see if we have the value at the +/// memory address *Ptr locally available within a small number of instructions. +/// If the value is available, return it. +/// +/// If not, return the iterator for the last validated instruction that the +/// value would be live through. If we scanned the entire block and didn't find +/// something that invalidates *Ptr or provides it, ScanFrom would be left at +/// begin() and this returns null. ScanFrom could also be left +/// +/// MaxInstsToScan specifies the maximum instructions to scan in the block. If +/// it is set to 0, it will scan the whole block. You can also optionally +/// specify an alias analysis implementation, which makes this more precise. +Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB, + BasicBlock::iterator &ScanFrom, + unsigned MaxInstsToScan, + AliasAnalysis *AA) { + if (MaxInstsToScan == 0) MaxInstsToScan = ~0U; + + // If we're using alias analysis to disambiguate get the size of *Ptr. + unsigned AccessSize = 0; + if (AA) { + const Type *AccessTy = cast(Ptr->getType())->getElementType(); + AccessSize = AA->getTargetData().getTypeStoreSizeInBits(AccessTy); + } + + while (ScanFrom != ScanBB->begin()) { + // We must ignore debug info directives when counting (otherwise they + // would affect codegen). + Instruction *Inst = --ScanFrom; + if (isa(Inst)) + continue; + // We skip pointer-to-pointer bitcasts, which are NOPs. + // It is necessary for correctness to skip those that feed into a + // llvm.dbg.declare, as these are not present when debugging is off. + if (isa(Inst) && isa(Inst->getType())) + continue; + + // Restore ScanFrom to expected value in case next test succeeds + ScanFrom++; + + // Don't scan huge blocks. + if (MaxInstsToScan-- == 0) return 0; + + --ScanFrom; + // If this is a load of Ptr, the loaded value is available. + if (LoadInst *LI = dyn_cast(Inst)) + if (AreEquivalentAddressValues(LI->getOperand(0), Ptr)) + return LI; + + if (StoreInst *SI = dyn_cast(Inst)) { + // If this is a store through Ptr, the value is available! + if (AreEquivalentAddressValues(SI->getOperand(1), Ptr)) + return SI->getOperand(0); + + // If Ptr is an alloca and this is a store to a different alloca, ignore + // the store. This is a trivial form of alias analysis that is important + // for reg2mem'd code. + if ((isa(Ptr) || isa(Ptr)) && + (isa(SI->getOperand(1)) || + isa(SI->getOperand(1)))) + continue; + + // If we have alias analysis and it says the store won't modify the loaded + // value, ignore the store. + if (AA && + (AA->getModRefInfo(SI, Ptr, AccessSize) & AliasAnalysis::Mod) == 0) + continue; + + // Otherwise the store that may or may not alias the pointer, bail out. + ++ScanFrom; + return 0; + } + + // If this is some other instruction that may clobber Ptr, bail out. + if (Inst->mayWriteToMemory()) { + // If alias analysis claims that it really won't modify the load, + // ignore it. + if (AA && + (AA->getModRefInfo(Inst, Ptr, AccessSize) & AliasAnalysis::Mod) == 0) + continue; + + // May modify the pointer, bail out. + ++ScanFrom; + return 0; + } + } + + // Got to the start of the block, we didn't find it, but are done for this + // block. + return 0; +} + +/// CopyPrecedingStopPoint - If I is immediately preceded by a StopPoint, +/// make a copy of the stoppoint before InsertPos (presumably before copying +/// or moving I). +void llvm::CopyPrecedingStopPoint(Instruction *I, + BasicBlock::iterator InsertPos) { + if (I != I->getParent()->begin()) { + BasicBlock::iterator BBI = I; --BBI; + if (DbgStopPointInst *DSPI = dyn_cast(BBI)) { + CallInst *newDSPI = DSPI->clone(); + newDSPI->insertBefore(InsertPos); + } + } +} diff --git a/lib/Transforms/Utils/BasicInliner.cpp b/lib/Transforms/Utils/BasicInliner.cpp new file mode 100644 index 000000000000..1650cfa30653 --- /dev/null +++ b/lib/Transforms/Utils/BasicInliner.cpp @@ -0,0 +1,181 @@ +//===- BasicInliner.cpp - Basic function level inliner --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines a simple function based inliner that does not use +// call graph information. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "basicinliner" + +#include "llvm/Module.h" +#include "llvm/Function.h" +#include "llvm/Transforms/Utils/BasicInliner.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/SmallPtrSet.h" +#include + +using namespace llvm; + +static cl::opt +BasicInlineThreshold("basic-inline-threshold", cl::Hidden, cl::init(200), + cl::desc("Control the amount of basic inlining to perform (default = 200)")); + +namespace llvm { + + /// BasicInlinerImpl - BasicInliner implemantation class. This hides + /// container info, used by basic inliner, from public interface. + struct VISIBILITY_HIDDEN BasicInlinerImpl { + + BasicInlinerImpl(const BasicInlinerImpl&); // DO NOT IMPLEMENT + void operator=(const BasicInlinerImpl&); // DO NO IMPLEMENT + public: + BasicInlinerImpl(TargetData *T) : TD(T) {} + + /// addFunction - Add function into the list of functions to process. + /// All functions must be inserted using this interface before invoking + /// inlineFunctions(). + void addFunction(Function *F) { + Functions.push_back(F); + } + + /// neverInlineFunction - Sometimes a function is never to be inlined + /// because of one or other reason. + void neverInlineFunction(Function *F) { + NeverInline.insert(F); + } + + /// inlineFuctions - Walk all call sites in all functions supplied by + /// client. Inline as many call sites as possible. Delete completely + /// inlined functions. + void inlineFunctions(); + + private: + TargetData *TD; + std::vector Functions; + SmallPtrSet NeverInline; + SmallPtrSet DeadFunctions; + InlineCostAnalyzer CA; + }; + +/// inlineFuctions - Walk all call sites in all functions supplied by +/// client. Inline as many call sites as possible. Delete completely +/// inlined functions. +void BasicInlinerImpl::inlineFunctions() { + + // Scan through and identify all call sites ahead of time so that we only + // inline call sites in the original functions, not call sites that result + // from inlining other functions. + std::vector CallSites; + + for (std::vector::iterator FI = Functions.begin(), + FE = Functions.end(); FI != FE; ++FI) { + Function *F = *FI; + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) + for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) { + CallSite CS = CallSite::get(I); + if (CS.getInstruction() && CS.getCalledFunction() + && !CS.getCalledFunction()->isDeclaration()) + CallSites.push_back(CS); + } + } + + DOUT << ": " << CallSites.size() << " call sites.\n"; + + // Inline call sites. + bool Changed = false; + do { + Changed = false; + for (unsigned index = 0; index != CallSites.size() && !CallSites.empty(); + ++index) { + CallSite CS = CallSites[index]; + if (Function *Callee = CS.getCalledFunction()) { + + // Eliminate calls that are never inlinable. + if (Callee->isDeclaration() || + CS.getInstruction()->getParent()->getParent() == Callee) { + CallSites.erase(CallSites.begin() + index); + --index; + continue; + } + InlineCost IC = CA.getInlineCost(CS, NeverInline); + if (IC.isAlways()) { + DOUT << " Inlining: cost=always" + <<", call: " << *CS.getInstruction(); + } else if (IC.isNever()) { + DOUT << " NOT Inlining: cost=never" + <<", call: " << *CS.getInstruction(); + continue; + } else { + int Cost = IC.getValue(); + + if (Cost >= (int) BasicInlineThreshold) { + DOUT << " NOT Inlining: cost = " << Cost + << ", call: " << *CS.getInstruction(); + continue; + } else { + DOUT << " Inlining: cost = " << Cost + << ", call: " << *CS.getInstruction(); + } + } + + // Inline + if (InlineFunction(CS, NULL, TD)) { + if (Callee->use_empty() && (Callee->hasLocalLinkage() || + Callee->hasAvailableExternallyLinkage())) + DeadFunctions.insert(Callee); + Changed = true; + CallSites.erase(CallSites.begin() + index); + --index; + } + } + } + } while (Changed); + + // Remove completely inlined functions from module. + for(SmallPtrSet::iterator I = DeadFunctions.begin(), + E = DeadFunctions.end(); I != E; ++I) { + Function *D = *I; + Module *M = D->getParent(); + M->getFunctionList().remove(D); + } +} + +BasicInliner::BasicInliner(TargetData *TD) { + Impl = new BasicInlinerImpl(TD); +} + +BasicInliner::~BasicInliner() { + delete Impl; +} + +/// addFunction - Add function into the list of functions to process. +/// All functions must be inserted using this interface before invoking +/// inlineFunctions(). +void BasicInliner::addFunction(Function *F) { + Impl->addFunction(F); +} + +/// neverInlineFunction - Sometimes a function is never to be inlined because +/// of one or other reason. +void BasicInliner::neverInlineFunction(Function *F) { + Impl->neverInlineFunction(F); +} + +/// inlineFuctions - Walk all call sites in all functions supplied by +/// client. Inline as many call sites as possible. Delete completely +/// inlined functions. +void BasicInliner::inlineFunctions() { + Impl->inlineFunctions(); +} + +} diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp new file mode 100644 index 000000000000..c4fd1eae43cd --- /dev/null +++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp @@ -0,0 +1,282 @@ +//===- BreakCriticalEdges.cpp - Critical Edge Elimination Pass ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// BreakCriticalEdges pass - Break all of the critical edges in the CFG by +// inserting a dummy basic block. This pass may be "required" by passes that +// cannot deal with critical edges. For this usage, the structure type is +// forward declared. This pass obviously invalidates the CFG, but can update +// forward dominator (set, immediate dominators, tree, and frontier) +// information. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "break-crit-edges" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Function.h" +#include "llvm/Instructions.h" +#include "llvm/Type.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Compiler.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +using namespace llvm; + +STATISTIC(NumBroken, "Number of blocks inserted"); + +namespace { + struct VISIBILITY_HIDDEN BreakCriticalEdges : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + BreakCriticalEdges() : FunctionPass(&ID) {} + + virtual bool runOnFunction(Function &F); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addPreserved(); + AU.addPreserved(); + AU.addPreserved(); + + // No loop canonicalization guarantees are broken by this pass. + AU.addPreservedID(LoopSimplifyID); + } + }; +} + +char BreakCriticalEdges::ID = 0; +static RegisterPass +X("break-crit-edges", "Break critical edges in CFG"); + +// Publically exposed interface to pass... +const PassInfo *const llvm::BreakCriticalEdgesID = &X; +FunctionPass *llvm::createBreakCriticalEdgesPass() { + return new BreakCriticalEdges(); +} + +// runOnFunction - Loop over all of the edges in the CFG, breaking critical +// edges as they are found. +// +bool BreakCriticalEdges::runOnFunction(Function &F) { + bool Changed = false; + for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) { + TerminatorInst *TI = I->getTerminator(); + if (TI->getNumSuccessors() > 1) + for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) + if (SplitCriticalEdge(TI, i, this)) { + ++NumBroken; + Changed = true; + } + } + + return Changed; +} + +//===----------------------------------------------------------------------===// +// Implementation of the external critical edge manipulation functions +//===----------------------------------------------------------------------===// + +// isCriticalEdge - Return true if the specified edge is a critical edge. +// Critical edges are edges from a block with multiple successors to a block +// with multiple predecessors. +// +bool llvm::isCriticalEdge(const TerminatorInst *TI, unsigned SuccNum, + bool AllowIdenticalEdges) { + assert(SuccNum < TI->getNumSuccessors() && "Illegal edge specification!"); + if (TI->getNumSuccessors() == 1) return false; + + const BasicBlock *Dest = TI->getSuccessor(SuccNum); + pred_const_iterator I = pred_begin(Dest), E = pred_end(Dest); + + // If there is more than one predecessor, this is a critical edge... + assert(I != E && "No preds, but we have an edge to the block?"); + const BasicBlock *FirstPred = *I; + ++I; // Skip one edge due to the incoming arc from TI. + if (!AllowIdenticalEdges) + return I != E; + + // If AllowIdenticalEdges is true, then we allow this edge to be considered + // non-critical iff all preds come from TI's block. + while (I != E) { + if (*I != FirstPred) + return true; + // Note: leave this as is until no one ever compiles with either gcc 4.0.1 + // or Xcode 2. This seems to work around the pred_iterator assert in PR 2207 + E = pred_end(*I); + ++I; + } + return false; +} + +/// SplitCriticalEdge - If this edge is a critical edge, insert a new node to +/// split the critical edge. This will update DominatorTree and +/// DominatorFrontier information if it is available, thus calling this pass +/// will not invalidate any of them. This returns true if the edge was split, +/// false otherwise. This ensures that all edges to that dest go to one block +/// instead of each going to a different block. +// +bool llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, Pass *P, + bool MergeIdenticalEdges) { + if (!isCriticalEdge(TI, SuccNum, MergeIdenticalEdges)) return false; + BasicBlock *TIBB = TI->getParent(); + BasicBlock *DestBB = TI->getSuccessor(SuccNum); + + // Create a new basic block, linking it into the CFG. + BasicBlock *NewBB = BasicBlock::Create(TIBB->getName() + "." + + DestBB->getName() + "_crit_edge"); + // Create our unconditional branch... + BranchInst::Create(DestBB, NewBB); + + // Branch to the new block, breaking the edge. + TI->setSuccessor(SuccNum, NewBB); + + // Insert the block into the function... right after the block TI lives in. + Function &F = *TIBB->getParent(); + Function::iterator FBBI = TIBB; + F.getBasicBlockList().insert(++FBBI, NewBB); + + // If there are any PHI nodes in DestBB, we need to update them so that they + // merge incoming values from NewBB instead of from TIBB. + // + for (BasicBlock::iterator I = DestBB->begin(); isa(I); ++I) { + PHINode *PN = cast(I); + // We no longer enter through TIBB, now we come in through NewBB. Revector + // exactly one entry in the PHI node that used to come from TIBB to come + // from NewBB. + int BBIdx = PN->getBasicBlockIndex(TIBB); + PN->setIncomingBlock(BBIdx, NewBB); + } + + // If there are any other edges from TIBB to DestBB, update those to go + // through the split block, making those edges non-critical as well (and + // reducing the number of phi entries in the DestBB if relevant). + if (MergeIdenticalEdges) { + for (unsigned i = SuccNum+1, e = TI->getNumSuccessors(); i != e; ++i) { + if (TI->getSuccessor(i) != DestBB) continue; + + // Remove an entry for TIBB from DestBB phi nodes. + DestBB->removePredecessor(TIBB); + + // We found another edge to DestBB, go to NewBB instead. + TI->setSuccessor(i, NewBB); + } + } + + + + // If we don't have a pass object, we can't update anything... + if (P == 0) return true; + + // Now update analysis information. Since the only predecessor of NewBB is + // the TIBB, TIBB clearly dominates NewBB. TIBB usually doesn't dominate + // anything, as there are other successors of DestBB. However, if all other + // predecessors of DestBB are already dominated by DestBB (e.g. DestBB is a + // loop header) then NewBB dominates DestBB. + SmallVector OtherPreds; + + for (pred_iterator I = pred_begin(DestBB), E = pred_end(DestBB); I != E; ++I) + if (*I != NewBB) + OtherPreds.push_back(*I); + + bool NewBBDominatesDestBB = true; + + // Should we update DominatorTree information? + if (DominatorTree *DT = P->getAnalysisIfAvailable()) { + DomTreeNode *TINode = DT->getNode(TIBB); + + // The new block is not the immediate dominator for any other nodes, but + // TINode is the immediate dominator for the new node. + // + if (TINode) { // Don't break unreachable code! + DomTreeNode *NewBBNode = DT->addNewBlock(NewBB, TIBB); + DomTreeNode *DestBBNode = 0; + + // If NewBBDominatesDestBB hasn't been computed yet, do so with DT. + if (!OtherPreds.empty()) { + DestBBNode = DT->getNode(DestBB); + while (!OtherPreds.empty() && NewBBDominatesDestBB) { + if (DomTreeNode *OPNode = DT->getNode(OtherPreds.back())) + NewBBDominatesDestBB = DT->dominates(DestBBNode, OPNode); + OtherPreds.pop_back(); + } + OtherPreds.clear(); + } + + // If NewBBDominatesDestBB, then NewBB dominates DestBB, otherwise it + // doesn't dominate anything. + if (NewBBDominatesDestBB) { + if (!DestBBNode) DestBBNode = DT->getNode(DestBB); + DT->changeImmediateDominator(DestBBNode, NewBBNode); + } + } + } + + // Should we update DominanceFrontier information? + if (DominanceFrontier *DF = P->getAnalysisIfAvailable()) { + // If NewBBDominatesDestBB hasn't been computed yet, do so with DF. + if (!OtherPreds.empty()) { + // FIXME: IMPLEMENT THIS! + assert(0 && "Requiring domfrontiers but not idom/domtree/domset." + " not implemented yet!"); + } + + // Since the new block is dominated by its only predecessor TIBB, + // it cannot be in any block's dominance frontier. If NewBB dominates + // DestBB, its dominance frontier is the same as DestBB's, otherwise it is + // just {DestBB}. + DominanceFrontier::DomSetType NewDFSet; + if (NewBBDominatesDestBB) { + DominanceFrontier::iterator I = DF->find(DestBB); + if (I != DF->end()) { + DF->addBasicBlock(NewBB, I->second); + + if (I->second.count(DestBB)) { + // However NewBB's frontier does not include DestBB. + DominanceFrontier::iterator NF = DF->find(NewBB); + DF->removeFromFrontier(NF, DestBB); + } + } + else + DF->addBasicBlock(NewBB, DominanceFrontier::DomSetType()); + } else { + DominanceFrontier::DomSetType NewDFSet; + NewDFSet.insert(DestBB); + DF->addBasicBlock(NewBB, NewDFSet); + } + } + + // Update LoopInfo if it is around. + if (LoopInfo *LI = P->getAnalysisIfAvailable()) { + // If one or the other blocks were not in a loop, the new block is not + // either, and thus LI doesn't need to be updated. + if (Loop *TIL = LI->getLoopFor(TIBB)) + if (Loop *DestLoop = LI->getLoopFor(DestBB)) { + if (TIL == DestLoop) { + // Both in the same loop, the NewBB joins loop. + DestLoop->addBasicBlockToLoop(NewBB, LI->getBase()); + } else if (TIL->contains(DestLoop->getHeader())) { + // Edge from an outer loop to an inner loop. Add to the outer loop. + TIL->addBasicBlockToLoop(NewBB, LI->getBase()); + } else if (DestLoop->contains(TIL->getHeader())) { + // Edge from an inner loop to an outer loop. Add to the outer loop. + DestLoop->addBasicBlockToLoop(NewBB, LI->getBase()); + } else { + // Edge from two loops with no containment relation. Because these + // are natural loops, we know that the destination block must be the + // header of its loop (adding a branch into a loop elsewhere would + // create an irreducible loop). + assert(DestLoop->getHeader() == DestBB && + "Should not create irreducible loops!"); + if (Loop *P = DestLoop->getParentLoop()) + P->addBasicBlockToLoop(NewBB, LI->getBase()); + } + } + } + return true; +} diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt new file mode 100644 index 000000000000..6628b4b1aa93 --- /dev/null +++ b/lib/Transforms/Utils/CMakeLists.txt @@ -0,0 +1,27 @@ +add_llvm_library(LLVMTransformUtils + AddrModeMatcher.cpp + BasicBlockUtils.cpp + BasicInliner.cpp + BreakCriticalEdges.cpp + CloneFunction.cpp + CloneLoop.cpp + CloneModule.cpp + CloneTrace.cpp + CodeExtractor.cpp + DemoteRegToStack.cpp + InlineCost.cpp + InlineFunction.cpp + LCSSA.cpp + Local.cpp + LoopSimplify.cpp + LowerAllocations.cpp + LowerInvoke.cpp + LowerSwitch.cpp + Mem2Reg.cpp + PromoteMemoryToRegister.cpp + SimplifyCFG.cpp + UnifyFunctionExitNodes.cpp + UnrollLoop.cpp + ValueMapper.cpp + InstructionNamer.cpp + ) diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp new file mode 100644 index 000000000000..d0fdefa3f689 --- /dev/null +++ b/lib/Transforms/Utils/CloneFunction.cpp @@ -0,0 +1,533 @@ +//===- CloneFunction.cpp - Clone a function into another function ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the CloneFunctionInto interface, which is used as the +// low-level function cloner. This is used by the CloneFunction and function +// inliner to do the dirty work of copying the body of a function around. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/GlobalVariable.h" +#include "llvm/Function.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Transforms/Utils/ValueMapper.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/DebugInfo.h" +#include "llvm/ADT/SmallVector.h" +#include +using namespace llvm; + +// CloneBasicBlock - See comments in Cloning.h +BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, + DenseMap &ValueMap, + const char *NameSuffix, Function *F, + ClonedCodeInfo *CodeInfo) { + BasicBlock *NewBB = BasicBlock::Create("", F); + if (BB->hasName()) NewBB->setName(BB->getName()+NameSuffix); + + bool hasCalls = false, hasDynamicAllocas = false, hasStaticAllocas = false; + + // Loop over all instructions, and copy them over. + for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end(); + II != IE; ++II) { + Instruction *NewInst = II->clone(); + if (II->hasName()) + NewInst->setName(II->getName()+NameSuffix); + NewBB->getInstList().push_back(NewInst); + ValueMap[II] = NewInst; // Add instruction map to value. + + hasCalls |= (isa(II) && !isa(II)); + if (const AllocaInst *AI = dyn_cast(II)) { + if (isa(AI->getArraySize())) + hasStaticAllocas = true; + else + hasDynamicAllocas = true; + } + } + + if (CodeInfo) { + CodeInfo->ContainsCalls |= hasCalls; + CodeInfo->ContainsUnwinds |= isa(BB->getTerminator()); + CodeInfo->ContainsDynamicAllocas |= hasDynamicAllocas; + CodeInfo->ContainsDynamicAllocas |= hasStaticAllocas && + BB != &BB->getParent()->getEntryBlock(); + } + return NewBB; +} + +// Clone OldFunc into NewFunc, transforming the old arguments into references to +// ArgMap values. +// +void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, + DenseMap &ValueMap, + std::vector &Returns, + const char *NameSuffix, ClonedCodeInfo *CodeInfo) { + assert(NameSuffix && "NameSuffix cannot be null!"); + +#ifndef NDEBUG + for (Function::const_arg_iterator I = OldFunc->arg_begin(), + E = OldFunc->arg_end(); I != E; ++I) + assert(ValueMap.count(I) && "No mapping from source argument specified!"); +#endif + + // Clone any attributes. + if (NewFunc->arg_size() == OldFunc->arg_size()) + NewFunc->copyAttributesFrom(OldFunc); + else { + //Some arguments were deleted with the ValueMap. Copy arguments one by one + for (Function::const_arg_iterator I = OldFunc->arg_begin(), + E = OldFunc->arg_end(); I != E; ++I) + if (Argument* Anew = dyn_cast(ValueMap[I])) + Anew->addAttr( OldFunc->getAttributes() + .getParamAttributes(I->getArgNo() + 1)); + NewFunc->setAttributes(NewFunc->getAttributes() + .addAttr(0, OldFunc->getAttributes() + .getRetAttributes())); + NewFunc->setAttributes(NewFunc->getAttributes() + .addAttr(~0, OldFunc->getAttributes() + .getFnAttributes())); + + } + + // Loop over all of the basic blocks in the function, cloning them as + // appropriate. Note that we save BE this way in order to handle cloning of + // recursive functions into themselves. + // + for (Function::const_iterator BI = OldFunc->begin(), BE = OldFunc->end(); + BI != BE; ++BI) { + const BasicBlock &BB = *BI; + + // Create a new basic block and copy instructions into it! + BasicBlock *CBB = CloneBasicBlock(&BB, ValueMap, NameSuffix, NewFunc, + CodeInfo); + ValueMap[&BB] = CBB; // Add basic block mapping. + + if (ReturnInst *RI = dyn_cast(CBB->getTerminator())) + Returns.push_back(RI); + } + + // Loop over all of the instructions in the function, fixing up operand + // references as we go. This uses ValueMap to do all the hard work. + // + for (Function::iterator BB = cast(ValueMap[OldFunc->begin()]), + BE = NewFunc->end(); BB != BE; ++BB) + // Loop over all instructions, fixing each one as we find it... + for (BasicBlock::iterator II = BB->begin(); II != BB->end(); ++II) + RemapInstruction(II, ValueMap); +} + +/// CloneFunction - Return a copy of the specified function, but without +/// embedding the function into another module. Also, any references specified +/// in the ValueMap are changed to refer to their mapped value instead of the +/// original one. If any of the arguments to the function are in the ValueMap, +/// the arguments are deleted from the resultant function. The ValueMap is +/// updated to include mappings from all of the instructions and basicblocks in +/// the function from their old to new values. +/// +Function *llvm::CloneFunction(const Function *F, + DenseMap &ValueMap, + ClonedCodeInfo *CodeInfo) { + std::vector ArgTypes; + + // The user might be deleting arguments to the function by specifying them in + // the ValueMap. If so, we need to not add the arguments to the arg ty vector + // + for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); + I != E; ++I) + if (ValueMap.count(I) == 0) // Haven't mapped the argument to anything yet? + ArgTypes.push_back(I->getType()); + + // Create a new function type... + FunctionType *FTy = FunctionType::get(F->getFunctionType()->getReturnType(), + ArgTypes, F->getFunctionType()->isVarArg()); + + // Create the new function... + Function *NewF = Function::Create(FTy, F->getLinkage(), F->getName()); + + // Loop over the arguments, copying the names of the mapped arguments over... + Function::arg_iterator DestI = NewF->arg_begin(); + for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); + I != E; ++I) + if (ValueMap.count(I) == 0) { // Is this argument preserved? + DestI->setName(I->getName()); // Copy the name over... + ValueMap[I] = DestI++; // Add mapping to ValueMap + } + + std::vector Returns; // Ignore returns cloned... + CloneFunctionInto(NewF, F, ValueMap, Returns, "", CodeInfo); + return NewF; +} + + + +namespace { + /// PruningFunctionCloner - This class is a private class used to implement + /// the CloneAndPruneFunctionInto method. + struct VISIBILITY_HIDDEN PruningFunctionCloner { + Function *NewFunc; + const Function *OldFunc; + DenseMap &ValueMap; + std::vector &Returns; + const char *NameSuffix; + ClonedCodeInfo *CodeInfo; + const TargetData *TD; + Value *DbgFnStart; + public: + PruningFunctionCloner(Function *newFunc, const Function *oldFunc, + DenseMap &valueMap, + std::vector &returns, + const char *nameSuffix, + ClonedCodeInfo *codeInfo, + const TargetData *td) + : NewFunc(newFunc), OldFunc(oldFunc), ValueMap(valueMap), Returns(returns), + NameSuffix(nameSuffix), CodeInfo(codeInfo), TD(td), DbgFnStart(NULL) { + } + + /// CloneBlock - The specified block is found to be reachable, clone it and + /// anything that it can reach. + void CloneBlock(const BasicBlock *BB, + std::vector &ToClone); + + public: + /// ConstantFoldMappedInstruction - Constant fold the specified instruction, + /// mapping its operands through ValueMap if they are available. + Constant *ConstantFoldMappedInstruction(const Instruction *I); + }; +} + +/// CloneBlock - The specified block is found to be reachable, clone it and +/// anything that it can reach. +void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, + std::vector &ToClone){ + Value *&BBEntry = ValueMap[BB]; + + // Have we already cloned this block? + if (BBEntry) return; + + // Nope, clone it now. + BasicBlock *NewBB; + BBEntry = NewBB = BasicBlock::Create(); + if (BB->hasName()) NewBB->setName(BB->getName()+NameSuffix); + + bool hasCalls = false, hasDynamicAllocas = false, hasStaticAllocas = false; + + // Loop over all instructions, and copy them over, DCE'ing as we go. This + // loop doesn't include the terminator. + for (BasicBlock::const_iterator II = BB->begin(), IE = --BB->end(); + II != IE; ++II) { + // If this instruction constant folds, don't bother cloning the instruction, + // instead, just add the constant to the value map. + if (Constant *C = ConstantFoldMappedInstruction(II)) { + ValueMap[II] = C; + continue; + } + + // Do not clone llvm.dbg.region.end. It will be adjusted by the inliner. + if (const DbgFuncStartInst *DFSI = dyn_cast(II)) { + if (DbgFnStart == NULL) { + DISubprogram SP(cast(DFSI->getSubprogram())); + if (SP.describes(BB->getParent())) + DbgFnStart = DFSI->getSubprogram(); + } + } + if (const DbgRegionEndInst *DREIS = dyn_cast(II)) { + if (DREIS->getContext() == DbgFnStart) + continue; + } + + Instruction *NewInst = II->clone(); + if (II->hasName()) + NewInst->setName(II->getName()+NameSuffix); + NewBB->getInstList().push_back(NewInst); + ValueMap[II] = NewInst; // Add instruction map to value. + + hasCalls |= (isa(II) && !isa(II)); + if (const AllocaInst *AI = dyn_cast(II)) { + if (isa(AI->getArraySize())) + hasStaticAllocas = true; + else + hasDynamicAllocas = true; + } + } + + // Finally, clone over the terminator. + const TerminatorInst *OldTI = BB->getTerminator(); + bool TerminatorDone = false; + if (const BranchInst *BI = dyn_cast(OldTI)) { + if (BI->isConditional()) { + // If the condition was a known constant in the callee... + ConstantInt *Cond = dyn_cast(BI->getCondition()); + // Or is a known constant in the caller... + if (Cond == 0) + Cond = dyn_cast_or_null(ValueMap[BI->getCondition()]); + + // Constant fold to uncond branch! + if (Cond) { + BasicBlock *Dest = BI->getSuccessor(!Cond->getZExtValue()); + ValueMap[OldTI] = BranchInst::Create(Dest, NewBB); + ToClone.push_back(Dest); + TerminatorDone = true; + } + } + } else if (const SwitchInst *SI = dyn_cast(OldTI)) { + // If switching on a value known constant in the caller. + ConstantInt *Cond = dyn_cast(SI->getCondition()); + if (Cond == 0) // Or known constant after constant prop in the callee... + Cond = dyn_cast_or_null(ValueMap[SI->getCondition()]); + if (Cond) { // Constant fold to uncond branch! + BasicBlock *Dest = SI->getSuccessor(SI->findCaseValue(Cond)); + ValueMap[OldTI] = BranchInst::Create(Dest, NewBB); + ToClone.push_back(Dest); + TerminatorDone = true; + } + } + + if (!TerminatorDone) { + Instruction *NewInst = OldTI->clone(); + if (OldTI->hasName()) + NewInst->setName(OldTI->getName()+NameSuffix); + NewBB->getInstList().push_back(NewInst); + ValueMap[OldTI] = NewInst; // Add instruction map to value. + + // Recursively clone any reachable successor blocks. + const TerminatorInst *TI = BB->getTerminator(); + for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) + ToClone.push_back(TI->getSuccessor(i)); + } + + if (CodeInfo) { + CodeInfo->ContainsCalls |= hasCalls; + CodeInfo->ContainsUnwinds |= isa(OldTI); + CodeInfo->ContainsDynamicAllocas |= hasDynamicAllocas; + CodeInfo->ContainsDynamicAllocas |= hasStaticAllocas && + BB != &BB->getParent()->front(); + } + + if (ReturnInst *RI = dyn_cast(NewBB->getTerminator())) + Returns.push_back(RI); +} + +/// ConstantFoldMappedInstruction - Constant fold the specified instruction, +/// mapping its operands through ValueMap if they are available. +Constant *PruningFunctionCloner:: +ConstantFoldMappedInstruction(const Instruction *I) { + SmallVector Ops; + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) + if (Constant *Op = dyn_cast_or_null(MapValue(I->getOperand(i), + ValueMap))) + Ops.push_back(Op); + else + return 0; // All operands not constant! + + if (const CmpInst *CI = dyn_cast(I)) + return ConstantFoldCompareInstOperands(CI->getPredicate(), + &Ops[0], Ops.size(), TD); + + if (const LoadInst *LI = dyn_cast(I)) + if (ConstantExpr *CE = dyn_cast(Ops[0])) + if (!LI->isVolatile() && CE->getOpcode() == Instruction::GetElementPtr) + if (GlobalVariable *GV = dyn_cast(CE->getOperand(0))) + if (GV->isConstant() && GV->hasDefinitiveInitializer()) + return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), + CE); + + return ConstantFoldInstOperands(I->getOpcode(), I->getType(), &Ops[0], + Ops.size(), TD); +} + +/// CloneAndPruneFunctionInto - This works exactly like CloneFunctionInto, +/// except that it does some simple constant prop and DCE on the fly. The +/// effect of this is to copy significantly less code in cases where (for +/// example) a function call with constant arguments is inlined, and those +/// constant arguments cause a significant amount of code in the callee to be +/// dead. Since this doesn't produce an exact copy of the input, it can't be +/// used for things like CloneFunction or CloneModule. +void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, + DenseMap &ValueMap, + std::vector &Returns, + const char *NameSuffix, + ClonedCodeInfo *CodeInfo, + const TargetData *TD) { + assert(NameSuffix && "NameSuffix cannot be null!"); + +#ifndef NDEBUG + for (Function::const_arg_iterator II = OldFunc->arg_begin(), + E = OldFunc->arg_end(); II != E; ++II) + assert(ValueMap.count(II) && "No mapping from source argument specified!"); +#endif + + PruningFunctionCloner PFC(NewFunc, OldFunc, ValueMap, Returns, + NameSuffix, CodeInfo, TD); + + // Clone the entry block, and anything recursively reachable from it. + std::vector CloneWorklist; + CloneWorklist.push_back(&OldFunc->getEntryBlock()); + while (!CloneWorklist.empty()) { + const BasicBlock *BB = CloneWorklist.back(); + CloneWorklist.pop_back(); + PFC.CloneBlock(BB, CloneWorklist); + } + + // Loop over all of the basic blocks in the old function. If the block was + // reachable, we have cloned it and the old block is now in the value map: + // insert it into the new function in the right order. If not, ignore it. + // + // Defer PHI resolution until rest of function is resolved. + std::vector PHIToResolve; + for (Function::const_iterator BI = OldFunc->begin(), BE = OldFunc->end(); + BI != BE; ++BI) { + BasicBlock *NewBB = cast_or_null(ValueMap[BI]); + if (NewBB == 0) continue; // Dead block. + + // Add the new block to the new function. + NewFunc->getBasicBlockList().push_back(NewBB); + + // Loop over all of the instructions in the block, fixing up operand + // references as we go. This uses ValueMap to do all the hard work. + // + BasicBlock::iterator I = NewBB->begin(); + + // Handle PHI nodes specially, as we have to remove references to dead + // blocks. + if (PHINode *PN = dyn_cast(I)) { + // Skip over all PHI nodes, remembering them for later. + BasicBlock::const_iterator OldI = BI->begin(); + for (; (PN = dyn_cast(I)); ++I, ++OldI) + PHIToResolve.push_back(cast(OldI)); + } + + // Otherwise, remap the rest of the instructions normally. + for (; I != NewBB->end(); ++I) + RemapInstruction(I, ValueMap); + } + + // Defer PHI resolution until rest of function is resolved, PHI resolution + // requires the CFG to be up-to-date. + for (unsigned phino = 0, e = PHIToResolve.size(); phino != e; ) { + const PHINode *OPN = PHIToResolve[phino]; + unsigned NumPreds = OPN->getNumIncomingValues(); + const BasicBlock *OldBB = OPN->getParent(); + BasicBlock *NewBB = cast(ValueMap[OldBB]); + + // Map operands for blocks that are live and remove operands for blocks + // that are dead. + for (; phino != PHIToResolve.size() && + PHIToResolve[phino]->getParent() == OldBB; ++phino) { + OPN = PHIToResolve[phino]; + PHINode *PN = cast(ValueMap[OPN]); + for (unsigned pred = 0, e = NumPreds; pred != e; ++pred) { + if (BasicBlock *MappedBlock = + cast_or_null(ValueMap[PN->getIncomingBlock(pred)])) { + Value *InVal = MapValue(PN->getIncomingValue(pred), ValueMap); + assert(InVal && "Unknown input value?"); + PN->setIncomingValue(pred, InVal); + PN->setIncomingBlock(pred, MappedBlock); + } else { + PN->removeIncomingValue(pred, false); + --pred, --e; // Revisit the next entry. + } + } + } + + // The loop above has removed PHI entries for those blocks that are dead + // and has updated others. However, if a block is live (i.e. copied over) + // but its terminator has been changed to not go to this block, then our + // phi nodes will have invalid entries. Update the PHI nodes in this + // case. + PHINode *PN = cast(NewBB->begin()); + NumPreds = std::distance(pred_begin(NewBB), pred_end(NewBB)); + if (NumPreds != PN->getNumIncomingValues()) { + assert(NumPreds < PN->getNumIncomingValues()); + // Count how many times each predecessor comes to this block. + std::map PredCount; + for (pred_iterator PI = pred_begin(NewBB), E = pred_end(NewBB); + PI != E; ++PI) + --PredCount[*PI]; + + // Figure out how many entries to remove from each PHI. + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + ++PredCount[PN->getIncomingBlock(i)]; + + // At this point, the excess predecessor entries are positive in the + // map. Loop over all of the PHIs and remove excess predecessor + // entries. + BasicBlock::iterator I = NewBB->begin(); + for (; (PN = dyn_cast(I)); ++I) { + for (std::map::iterator PCI =PredCount.begin(), + E = PredCount.end(); PCI != E; ++PCI) { + BasicBlock *Pred = PCI->first; + for (unsigned NumToRemove = PCI->second; NumToRemove; --NumToRemove) + PN->removeIncomingValue(Pred, false); + } + } + } + + // If the loops above have made these phi nodes have 0 or 1 operand, + // replace them with undef or the input value. We must do this for + // correctness, because 0-operand phis are not valid. + PN = cast(NewBB->begin()); + if (PN->getNumIncomingValues() == 0) { + BasicBlock::iterator I = NewBB->begin(); + BasicBlock::const_iterator OldI = OldBB->begin(); + while ((PN = dyn_cast(I++))) { + Value *NV = UndefValue::get(PN->getType()); + PN->replaceAllUsesWith(NV); + assert(ValueMap[OldI] == PN && "ValueMap mismatch"); + ValueMap[OldI] = NV; + PN->eraseFromParent(); + ++OldI; + } + } + // NOTE: We cannot eliminate single entry phi nodes here, because of + // ValueMap. Single entry phi nodes can have multiple ValueMap entries + // pointing at them. Thus, deleting one would require scanning the ValueMap + // to update any entries in it that would require that. This would be + // really slow. + } + + // Now that the inlined function body has been fully constructed, go through + // and zap unconditional fall-through branches. This happen all the time when + // specializing code: code specialization turns conditional branches into + // uncond branches, and this code folds them. + Function::iterator I = cast(ValueMap[&OldFunc->getEntryBlock()]); + while (I != NewFunc->end()) { + BranchInst *BI = dyn_cast(I->getTerminator()); + if (!BI || BI->isConditional()) { ++I; continue; } + + // Note that we can't eliminate uncond branches if the destination has + // single-entry PHI nodes. Eliminating the single-entry phi nodes would + // require scanning the ValueMap to update any entries that point to the phi + // node. + BasicBlock *Dest = BI->getSuccessor(0); + if (!Dest->getSinglePredecessor() || isa(Dest->begin())) { + ++I; continue; + } + + // We know all single-entry PHI nodes in the inlined function have been + // removed, so we just need to splice the blocks. + BI->eraseFromParent(); + + // Move all the instructions in the succ to the pred. + I->getInstList().splice(I->end(), Dest->getInstList()); + + // Make all PHI nodes that referred to Dest now refer to I as their source. + Dest->replaceAllUsesWith(I); + + // Remove the dest block. + Dest->eraseFromParent(); + + // Do not increment I, iteratively merge all things this block branches to. + } +} diff --git a/lib/Transforms/Utils/CloneLoop.cpp b/lib/Transforms/Utils/CloneLoop.cpp new file mode 100644 index 000000000000..7e000a1a75fe --- /dev/null +++ b/lib/Transforms/Utils/CloneLoop.cpp @@ -0,0 +1,152 @@ +//===- CloneLoop.cpp - Clone loop nest ------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the CloneLoop interface which makes a copy of a loop. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/BasicBlock.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/ADT/DenseMap.h" + + +using namespace llvm; + +/// CloneDominatorInfo - Clone basicblock's dominator tree and, if available, +/// dominance info. It is expected that basic block is already cloned. +static void CloneDominatorInfo(BasicBlock *BB, + DenseMap &ValueMap, + DominatorTree *DT, + DominanceFrontier *DF) { + + assert (DT && "DominatorTree is not available"); + DenseMap::iterator BI = ValueMap.find(BB); + assert (BI != ValueMap.end() && "BasicBlock clone is missing"); + BasicBlock *NewBB = cast(BI->second); + + // NewBB already got dominator info. + if (DT->getNode(NewBB)) + return; + + assert (DT->getNode(BB) && "BasicBlock does not have dominator info"); + // Entry block is not expected here. Infinite loops are not to cloned. + assert (DT->getNode(BB)->getIDom() && "BasicBlock does not have immediate dominator"); + BasicBlock *BBDom = DT->getNode(BB)->getIDom()->getBlock(); + + // NewBB's dominator is either BB's dominator or BB's dominator's clone. + BasicBlock *NewBBDom = BBDom; + DenseMap::iterator BBDomI = ValueMap.find(BBDom); + if (BBDomI != ValueMap.end()) { + NewBBDom = cast(BBDomI->second); + if (!DT->getNode(NewBBDom)) + CloneDominatorInfo(BBDom, ValueMap, DT, DF); + } + DT->addNewBlock(NewBB, NewBBDom); + + // Copy cloned dominance frontiner set + if (DF) { + DominanceFrontier::DomSetType NewDFSet; + DominanceFrontier::iterator DFI = DF->find(BB); + if ( DFI != DF->end()) { + DominanceFrontier::DomSetType S = DFI->second; + for (DominanceFrontier::DomSetType::iterator I = S.begin(), E = S.end(); + I != E; ++I) { + BasicBlock *DB = *I; + DenseMap::iterator IDM = ValueMap.find(DB); + if (IDM != ValueMap.end()) + NewDFSet.insert(cast(IDM->second)); + else + NewDFSet.insert(DB); + } + } + DF->addBasicBlock(NewBB, NewDFSet); + } +} + +/// CloneLoop - Clone Loop. Clone dominator info. Populate ValueMap +/// using old blocks to new blocks mapping. +Loop *llvm::CloneLoop(Loop *OrigL, LPPassManager *LPM, LoopInfo *LI, + DenseMap &ValueMap, Pass *P) { + + DominatorTree *DT = NULL; + DominanceFrontier *DF = NULL; + if (P) { + DT = P->getAnalysisIfAvailable(); + DF = P->getAnalysisIfAvailable(); + } + + SmallVector NewBlocks; + + // Populate loop nest. + SmallVector LoopNest; + LoopNest.push_back(OrigL); + + + Loop *NewParentLoop = NULL; + while (!LoopNest.empty()) { + Loop *L = LoopNest.pop_back_val(); + Loop *NewLoop = new Loop(); + + if (!NewParentLoop) + NewParentLoop = NewLoop; + + LPM->insertLoop(NewLoop, L->getParentLoop()); + + // Clone Basic Blocks. + for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); + I != E; ++I) { + BasicBlock *BB = *I; + BasicBlock *NewBB = CloneBasicBlock(BB, ValueMap, ".clone"); + ValueMap[BB] = NewBB; + if (P) + LPM->cloneBasicBlockSimpleAnalysis(BB, NewBB, L); + NewLoop->addBasicBlockToLoop(NewBB, LI->getBase()); + NewBlocks.push_back(NewBB); + } + + // Clone dominator info. + if (DT) + for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); + I != E; ++I) { + BasicBlock *BB = *I; + CloneDominatorInfo(BB, ValueMap, DT, DF); + } + + // Process sub loops + for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) + LoopNest.push_back(*I); + } + + // Remap instructions to reference operands from ValueMap. + for(SmallVector::iterator NBItr = NewBlocks.begin(), + NBE = NewBlocks.end(); NBItr != NBE; ++NBItr) { + BasicBlock *NB = *NBItr; + for(BasicBlock::iterator BI = NB->begin(), BE = NB->end(); + BI != BE; ++BI) { + Instruction *Insn = BI; + for (unsigned index = 0, num_ops = Insn->getNumOperands(); + index != num_ops; ++index) { + Value *Op = Insn->getOperand(index); + DenseMap::iterator OpItr = ValueMap.find(Op); + if (OpItr != ValueMap.end()) + Insn->setOperand(index, OpItr->second); + } + } + } + + BasicBlock *Latch = OrigL->getLoopLatch(); + Function *F = Latch->getParent(); + F->getBasicBlockList().insert(OrigL->getHeader(), + NewBlocks.begin(), NewBlocks.end()); + + + return NewParentLoop; +} diff --git a/lib/Transforms/Utils/CloneModule.cpp b/lib/Transforms/Utils/CloneModule.cpp new file mode 100644 index 000000000000..337fa8a44bbe --- /dev/null +++ b/lib/Transforms/Utils/CloneModule.cpp @@ -0,0 +1,126 @@ +//===- CloneModule.cpp - Clone an entire module ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the CloneModule interface which makes a copy of an +// entire module. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Module.h" +#include "llvm/DerivedTypes.h" +#include "llvm/TypeSymbolTable.h" +#include "llvm/Constant.h" +#include "llvm/Transforms/Utils/ValueMapper.h" +using namespace llvm; + +/// CloneModule - Return an exact copy of the specified module. This is not as +/// easy as it might seem because we have to worry about making copies of global +/// variables and functions, and making their (initializers and references, +/// respectively) refer to the right globals. +/// +Module *llvm::CloneModule(const Module *M) { + // Create the value map that maps things from the old module over to the new + // module. + DenseMap ValueMap; + return CloneModule(M, ValueMap); +} + +Module *llvm::CloneModule(const Module *M, + DenseMap &ValueMap) { + // First off, we need to create the new module... + Module *New = new Module(M->getModuleIdentifier()); + New->setDataLayout(M->getDataLayout()); + New->setTargetTriple(M->getTargetTriple()); + New->setModuleInlineAsm(M->getModuleInlineAsm()); + + // Copy all of the type symbol table entries over. + const TypeSymbolTable &TST = M->getTypeSymbolTable(); + for (TypeSymbolTable::const_iterator TI = TST.begin(), TE = TST.end(); + TI != TE; ++TI) + New->addTypeName(TI->first, TI->second); + + // Copy all of the dependent libraries over. + for (Module::lib_iterator I = M->lib_begin(), E = M->lib_end(); I != E; ++I) + New->addLibrary(*I); + + // Loop over all of the global variables, making corresponding globals in the + // new module. Here we add them to the ValueMap and to the new Module. We + // don't worry about attributes or initializers, they will come later. + // + for (Module::const_global_iterator I = M->global_begin(), E = M->global_end(); + I != E; ++I) { + GlobalVariable *GV = new GlobalVariable(I->getType()->getElementType(), + false, + GlobalValue::ExternalLinkage, 0, + I->getName(), New); + GV->setAlignment(I->getAlignment()); + ValueMap[I] = GV; + } + + // Loop over the functions in the module, making external functions as before + for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) { + Function *NF = + Function::Create(cast(I->getType()->getElementType()), + GlobalValue::ExternalLinkage, I->getName(), New); + NF->copyAttributesFrom(I); + ValueMap[I] = NF; + } + + // Loop over the aliases in the module + for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end(); + I != E; ++I) + ValueMap[I] = new GlobalAlias(I->getType(), GlobalAlias::ExternalLinkage, + I->getName(), NULL, New); + + // Now that all of the things that global variable initializer can refer to + // have been created, loop through and copy the global variable referrers + // over... We also set the attributes on the global now. + // + for (Module::const_global_iterator I = M->global_begin(), E = M->global_end(); + I != E; ++I) { + GlobalVariable *GV = cast(ValueMap[I]); + if (I->hasInitializer()) + GV->setInitializer(cast(MapValue(I->getInitializer(), + ValueMap))); + GV->setLinkage(I->getLinkage()); + GV->setThreadLocal(I->isThreadLocal()); + GV->setConstant(I->isConstant()); + } + + // Similarly, copy over function bodies now... + // + for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) { + Function *F = cast(ValueMap[I]); + if (!I->isDeclaration()) { + Function::arg_iterator DestI = F->arg_begin(); + for (Function::const_arg_iterator J = I->arg_begin(); J != I->arg_end(); + ++J) { + DestI->setName(J->getName()); + ValueMap[J] = DestI++; + } + + std::vector Returns; // Ignore returns cloned... + CloneFunctionInto(F, I, ValueMap, Returns); + } + + F->setLinkage(I->getLinkage()); + } + + // And aliases + for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end(); + I != E; ++I) { + GlobalAlias *GA = cast(ValueMap[I]); + GA->setLinkage(I->getLinkage()); + if (const Constant* C = I->getAliasee()) + GA->setAliasee(cast(MapValue(C, ValueMap))); + } + + return New; +} diff --git a/lib/Transforms/Utils/CloneTrace.cpp b/lib/Transforms/Utils/CloneTrace.cpp new file mode 100644 index 000000000000..07111393e275 --- /dev/null +++ b/lib/Transforms/Utils/CloneTrace.cpp @@ -0,0 +1,119 @@ +//===- CloneTrace.cpp - Clone a trace -------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the CloneTrace interface, which is used when writing +// runtime optimizations. It takes a vector of basic blocks clones the basic +// blocks, removes internal phi nodes, adds it to the same function as the +// original (although there is no jump to it) and returns the new vector of +// basic blocks. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/Trace.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Instructions.h" +#include "llvm/Function.h" +#include "llvm/Transforms/Utils/ValueMapper.h" +using namespace llvm; + +//Clones the trace (a vector of basic blocks) +std::vector +llvm::CloneTrace(const std::vector &origTrace) { + std::vector clonedTrace; + DenseMap ValueMap; + + //First, loop over all the Basic Blocks in the trace and copy + //them using CloneBasicBlock. Also fix the phi nodes during + //this loop. To fix the phi nodes, we delete incoming branches + //that are not in the trace. + for (std::vector::const_iterator T = origTrace.begin(), + End = origTrace.end(); T != End; ++T) { + + //Clone Basic Block + BasicBlock *clonedBlock = + CloneBasicBlock(*T, ValueMap, ".tr", (*T)->getParent()); + + //Add it to our new trace + clonedTrace.push_back(clonedBlock); + + //Add this new mapping to our Value Map + ValueMap[*T] = clonedBlock; + + //Loop over the phi instructions and delete operands + //that are from blocks not in the trace + //only do this if we are NOT the first block + if (T != origTrace.begin()) { + for (BasicBlock::iterator I = clonedBlock->begin(); + isa(I); ++I) { + PHINode *PN = cast(I); + //get incoming value for the previous BB + Value *V = PN->getIncomingValueForBlock(*(T-1)); + assert(V && "No incoming value from a BasicBlock in our trace!"); + + //remap our phi node to point to incoming value + ValueMap[*&I] = V; + + //remove phi node + clonedBlock->getInstList().erase(PN); + } + } + } + + //Second loop to do the remapping + for (std::vector::const_iterator BB = clonedTrace.begin(), + BE = clonedTrace.end(); BB != BE; ++BB) { + for (BasicBlock::iterator I = (*BB)->begin(); I != (*BB)->end(); ++I) { + //Loop over all the operands of the instruction + for (unsigned op=0, E = I->getNumOperands(); op != E; ++op) { + const Value *Op = I->getOperand(op); + + //Get it out of the value map + Value *V = ValueMap[Op]; + + //If not in the value map, then its outside our trace so ignore + if (V != 0) + I->setOperand(op,V); + } + } + } + + //return new vector of basic blocks + return clonedTrace; +} + +/// CloneTraceInto - Clone T into NewFunc. Original<->clone mapping is +/// saved in ValueMap. +/// +void llvm::CloneTraceInto(Function *NewFunc, Trace &T, + DenseMap &ValueMap, + const char *NameSuffix) { + assert(NameSuffix && "NameSuffix cannot be null!"); + + // Loop over all of the basic blocks in the trace, cloning them as + // appropriate. + // + for (Trace::const_iterator BI = T.begin(), BE = T.end(); BI != BE; ++BI) { + const BasicBlock *BB = *BI; + + // Create a new basic block and copy instructions into it! + BasicBlock *CBB = CloneBasicBlock(BB, ValueMap, NameSuffix, NewFunc); + ValueMap[BB] = CBB; // Add basic block mapping. + } + + // Loop over all of the instructions in the new function, fixing up operand + // references as we go. This uses ValueMap to do all the hard work. + // + for (Function::iterator BB = + cast(ValueMap[T.getEntryBasicBlock()]), + BE = NewFunc->end(); BB != BE; ++BB) + // Loop over all instructions, fixing each one as we find it... + for (BasicBlock::iterator II = BB->begin(); II != BB->end(); ++II) + RemapInstruction(II, ValueMap); +} + diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp new file mode 100644 index 000000000000..6d5904e30886 --- /dev/null +++ b/lib/Transforms/Utils/CodeExtractor.cpp @@ -0,0 +1,746 @@ +//===- CodeExtractor.cpp - Pull code region into a new function -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the interface to tear out a code region, such as an +// individual loop or a parallel section, into a new function, replacing it with +// a call to the new function. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/FunctionUtils.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Instructions.h" +#include "llvm/Intrinsics.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/Verifier.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/StringExtras.h" +#include +#include +using namespace llvm; + +// Provide a command-line option to aggregate function arguments into a struct +// for functions produced by the code extractor. This is useful when converting +// extracted functions to pthread-based code, as only one argument (void*) can +// be passed in to pthread_create(). +static cl::opt +AggregateArgsOpt("aggregate-extracted-args", cl::Hidden, + cl::desc("Aggregate arguments to code-extracted functions")); + +namespace { + class VISIBILITY_HIDDEN CodeExtractor { + typedef std::vector Values; + std::set BlocksToExtract; + DominatorTree* DT; + bool AggregateArgs; + unsigned NumExitBlocks; + const Type *RetTy; + public: + CodeExtractor(DominatorTree* dt = 0, bool AggArgs = false) + : DT(dt), AggregateArgs(AggArgs||AggregateArgsOpt), NumExitBlocks(~0U) {} + + Function *ExtractCodeRegion(const std::vector &code); + + bool isEligible(const std::vector &code); + + private: + /// definedInRegion - Return true if the specified value is defined in the + /// extracted region. + bool definedInRegion(Value *V) const { + if (Instruction *I = dyn_cast(V)) + if (BlocksToExtract.count(I->getParent())) + return true; + return false; + } + + /// definedInCaller - Return true if the specified value is defined in the + /// function being code extracted, but not in the region being extracted. + /// These values must be passed in as live-ins to the function. + bool definedInCaller(Value *V) const { + if (isa(V)) return true; + if (Instruction *I = dyn_cast(V)) + if (!BlocksToExtract.count(I->getParent())) + return true; + return false; + } + + void severSplitPHINodes(BasicBlock *&Header); + void splitReturnBlocks(); + void findInputsOutputs(Values &inputs, Values &outputs); + + Function *constructFunction(const Values &inputs, + const Values &outputs, + BasicBlock *header, + BasicBlock *newRootNode, BasicBlock *newHeader, + Function *oldFunction, Module *M); + + void moveCodeToFunction(Function *newFunction); + + void emitCallAndSwitchStatement(Function *newFunction, + BasicBlock *newHeader, + Values &inputs, + Values &outputs); + + }; +} + +/// severSplitPHINodes - If a PHI node has multiple inputs from outside of the +/// region, we need to split the entry block of the region so that the PHI node +/// is easier to deal with. +void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) { + bool HasPredsFromRegion = false; + unsigned NumPredsOutsideRegion = 0; + + if (Header != &Header->getParent()->getEntryBlock()) { + PHINode *PN = dyn_cast(Header->begin()); + if (!PN) return; // No PHI nodes. + + // If the header node contains any PHI nodes, check to see if there is more + // than one entry from outside the region. If so, we need to sever the + // header block into two. + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (BlocksToExtract.count(PN->getIncomingBlock(i))) + HasPredsFromRegion = true; + else + ++NumPredsOutsideRegion; + + // If there is one (or fewer) predecessor from outside the region, we don't + // need to do anything special. + if (NumPredsOutsideRegion <= 1) return; + } + + // Otherwise, we need to split the header block into two pieces: one + // containing PHI nodes merging values from outside of the region, and a + // second that contains all of the code for the block and merges back any + // incoming values from inside of the region. + BasicBlock::iterator AfterPHIs = Header->getFirstNonPHI(); + BasicBlock *NewBB = Header->splitBasicBlock(AfterPHIs, + Header->getName()+".ce"); + + // We only want to code extract the second block now, and it becomes the new + // header of the region. + BasicBlock *OldPred = Header; + BlocksToExtract.erase(OldPred); + BlocksToExtract.insert(NewBB); + Header = NewBB; + + // Okay, update dominator sets. The blocks that dominate the new one are the + // blocks that dominate TIBB plus the new block itself. + if (DT) + DT->splitBlock(NewBB); + + // Okay, now we need to adjust the PHI nodes and any branches from within the + // region to go to the new header block instead of the old header block. + if (HasPredsFromRegion) { + PHINode *PN = cast(OldPred->begin()); + // Loop over all of the predecessors of OldPred that are in the region, + // changing them to branch to NewBB instead. + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (BlocksToExtract.count(PN->getIncomingBlock(i))) { + TerminatorInst *TI = PN->getIncomingBlock(i)->getTerminator(); + TI->replaceUsesOfWith(OldPred, NewBB); + } + + // Okay, everthing within the region is now branching to the right block, we + // just have to update the PHI nodes now, inserting PHI nodes into NewBB. + for (AfterPHIs = OldPred->begin(); isa(AfterPHIs); ++AfterPHIs) { + PHINode *PN = cast(AfterPHIs); + // Create a new PHI node in the new region, which has an incoming value + // from OldPred of PN. + PHINode *NewPN = PHINode::Create(PN->getType(), PN->getName()+".ce", + NewBB->begin()); + NewPN->addIncoming(PN, OldPred); + + // Loop over all of the incoming value in PN, moving them to NewPN if they + // are from the extracted region. + for (unsigned i = 0; i != PN->getNumIncomingValues(); ++i) { + if (BlocksToExtract.count(PN->getIncomingBlock(i))) { + NewPN->addIncoming(PN->getIncomingValue(i), PN->getIncomingBlock(i)); + PN->removeIncomingValue(i); + --i; + } + } + } + } +} + +void CodeExtractor::splitReturnBlocks() { + for (std::set::iterator I = BlocksToExtract.begin(), + E = BlocksToExtract.end(); I != E; ++I) + if (ReturnInst *RI = dyn_cast((*I)->getTerminator())) + (*I)->splitBasicBlock(RI, (*I)->getName()+".ret"); +} + +// findInputsOutputs - Find inputs to, outputs from the code region. +// +void CodeExtractor::findInputsOutputs(Values &inputs, Values &outputs) { + std::set ExitBlocks; + for (std::set::const_iterator ci = BlocksToExtract.begin(), + ce = BlocksToExtract.end(); ci != ce; ++ci) { + BasicBlock *BB = *ci; + + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { + // If a used value is defined outside the region, it's an input. If an + // instruction is used outside the region, it's an output. + for (User::op_iterator O = I->op_begin(), E = I->op_end(); O != E; ++O) + if (definedInCaller(*O)) + inputs.push_back(*O); + + // Consider uses of this instruction (outputs). + for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); + UI != E; ++UI) + if (!definedInRegion(*UI)) { + outputs.push_back(I); + break; + } + } // for: insts + + // Keep track of the exit blocks from the region. + TerminatorInst *TI = BB->getTerminator(); + for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) + if (!BlocksToExtract.count(TI->getSuccessor(i))) + ExitBlocks.insert(TI->getSuccessor(i)); + } // for: basic blocks + + NumExitBlocks = ExitBlocks.size(); + + // Eliminate duplicates. + std::sort(inputs.begin(), inputs.end()); + inputs.erase(std::unique(inputs.begin(), inputs.end()), inputs.end()); + std::sort(outputs.begin(), outputs.end()); + outputs.erase(std::unique(outputs.begin(), outputs.end()), outputs.end()); +} + +/// constructFunction - make a function based on inputs and outputs, as follows: +/// f(in0, ..., inN, out0, ..., outN) +/// +Function *CodeExtractor::constructFunction(const Values &inputs, + const Values &outputs, + BasicBlock *header, + BasicBlock *newRootNode, + BasicBlock *newHeader, + Function *oldFunction, + Module *M) { + DOUT << "inputs: " << inputs.size() << "\n"; + DOUT << "outputs: " << outputs.size() << "\n"; + + // This function returns unsigned, outputs will go back by reference. + switch (NumExitBlocks) { + case 0: + case 1: RetTy = Type::VoidTy; break; + case 2: RetTy = Type::Int1Ty; break; + default: RetTy = Type::Int16Ty; break; + } + + std::vector paramTy; + + // Add the types of the input values to the function's argument list + for (Values::const_iterator i = inputs.begin(), + e = inputs.end(); i != e; ++i) { + const Value *value = *i; + DOUT << "value used in func: " << *value << "\n"; + paramTy.push_back(value->getType()); + } + + // Add the types of the output values to the function's argument list. + for (Values::const_iterator I = outputs.begin(), E = outputs.end(); + I != E; ++I) { + DOUT << "instr used in func: " << **I << "\n"; + if (AggregateArgs) + paramTy.push_back((*I)->getType()); + else + paramTy.push_back(PointerType::getUnqual((*I)->getType())); + } + + DOUT << "Function type: " << *RetTy << " f("; + for (std::vector::iterator i = paramTy.begin(), + e = paramTy.end(); i != e; ++i) + DOUT << **i << ", "; + DOUT << ")\n"; + + if (AggregateArgs && (inputs.size() + outputs.size() > 0)) { + PointerType *StructPtr = PointerType::getUnqual(StructType::get(paramTy)); + paramTy.clear(); + paramTy.push_back(StructPtr); + } + const FunctionType *funcType = FunctionType::get(RetTy, paramTy, false); + + // Create the new function + Function *newFunction = Function::Create(funcType, + GlobalValue::InternalLinkage, + oldFunction->getName() + "_" + + header->getName(), M); + // If the old function is no-throw, so is the new one. + if (oldFunction->doesNotThrow()) + newFunction->setDoesNotThrow(true); + + newFunction->getBasicBlockList().push_back(newRootNode); + + // Create an iterator to name all of the arguments we inserted. + Function::arg_iterator AI = newFunction->arg_begin(); + + // Rewrite all users of the inputs in the extracted region to use the + // arguments (or appropriate addressing into struct) instead. + for (unsigned i = 0, e = inputs.size(); i != e; ++i) { + Value *RewriteVal; + if (AggregateArgs) { + Value *Idx[2]; + Idx[0] = Constant::getNullValue(Type::Int32Ty); + Idx[1] = ConstantInt::get(Type::Int32Ty, i); + std::string GEPname = "gep_" + inputs[i]->getName(); + TerminatorInst *TI = newFunction->begin()->getTerminator(); + GetElementPtrInst *GEP = GetElementPtrInst::Create(AI, Idx, Idx+2, + GEPname, TI); + RewriteVal = new LoadInst(GEP, "load" + GEPname, TI); + } else + RewriteVal = AI++; + + std::vector Users(inputs[i]->use_begin(), inputs[i]->use_end()); + for (std::vector::iterator use = Users.begin(), useE = Users.end(); + use != useE; ++use) + if (Instruction* inst = dyn_cast(*use)) + if (BlocksToExtract.count(inst->getParent())) + inst->replaceUsesOfWith(inputs[i], RewriteVal); + } + + // Set names for input and output arguments. + if (!AggregateArgs) { + AI = newFunction->arg_begin(); + for (unsigned i = 0, e = inputs.size(); i != e; ++i, ++AI) + AI->setName(inputs[i]->getName()); + for (unsigned i = 0, e = outputs.size(); i != e; ++i, ++AI) + AI->setName(outputs[i]->getName()+".out"); + } + + // Rewrite branches to basic blocks outside of the loop to new dummy blocks + // within the new function. This must be done before we lose track of which + // blocks were originally in the code region. + std::vector Users(header->use_begin(), header->use_end()); + for (unsigned i = 0, e = Users.size(); i != e; ++i) + // The BasicBlock which contains the branch is not in the region + // modify the branch target to a new block + if (TerminatorInst *TI = dyn_cast(Users[i])) + if (!BlocksToExtract.count(TI->getParent()) && + TI->getParent()->getParent() == oldFunction) + TI->replaceUsesOfWith(header, newHeader); + + return newFunction; +} + +/// emitCallAndSwitchStatement - This method sets up the caller side by adding +/// the call instruction, splitting any PHI nodes in the header block as +/// necessary. +void CodeExtractor:: +emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, + Values &inputs, Values &outputs) { + // Emit a call to the new function, passing in: *pointer to struct (if + // aggregating parameters), or plan inputs and allocated memory for outputs + std::vector params, StructValues, ReloadOutputs; + + // Add inputs as params, or to be filled into the struct + for (Values::iterator i = inputs.begin(), e = inputs.end(); i != e; ++i) + if (AggregateArgs) + StructValues.push_back(*i); + else + params.push_back(*i); + + // Create allocas for the outputs + for (Values::iterator i = outputs.begin(), e = outputs.end(); i != e; ++i) { + if (AggregateArgs) { + StructValues.push_back(*i); + } else { + AllocaInst *alloca = + new AllocaInst((*i)->getType(), 0, (*i)->getName()+".loc", + codeReplacer->getParent()->begin()->begin()); + ReloadOutputs.push_back(alloca); + params.push_back(alloca); + } + } + + AllocaInst *Struct = 0; + if (AggregateArgs && (inputs.size() + outputs.size() > 0)) { + std::vector ArgTypes; + for (Values::iterator v = StructValues.begin(), + ve = StructValues.end(); v != ve; ++v) + ArgTypes.push_back((*v)->getType()); + + // Allocate a struct at the beginning of this function + Type *StructArgTy = StructType::get(ArgTypes); + Struct = + new AllocaInst(StructArgTy, 0, "structArg", + codeReplacer->getParent()->begin()->begin()); + params.push_back(Struct); + + for (unsigned i = 0, e = inputs.size(); i != e; ++i) { + Value *Idx[2]; + Idx[0] = Constant::getNullValue(Type::Int32Ty); + Idx[1] = ConstantInt::get(Type::Int32Ty, i); + GetElementPtrInst *GEP = + GetElementPtrInst::Create(Struct, Idx, Idx + 2, + "gep_" + StructValues[i]->getName()); + codeReplacer->getInstList().push_back(GEP); + StoreInst *SI = new StoreInst(StructValues[i], GEP); + codeReplacer->getInstList().push_back(SI); + } + } + + // Emit the call to the function + CallInst *call = CallInst::Create(newFunction, params.begin(), params.end(), + NumExitBlocks > 1 ? "targetBlock" : ""); + codeReplacer->getInstList().push_back(call); + + Function::arg_iterator OutputArgBegin = newFunction->arg_begin(); + unsigned FirstOut = inputs.size(); + if (!AggregateArgs) + std::advance(OutputArgBegin, inputs.size()); + + // Reload the outputs passed in by reference + for (unsigned i = 0, e = outputs.size(); i != e; ++i) { + Value *Output = 0; + if (AggregateArgs) { + Value *Idx[2]; + Idx[0] = Constant::getNullValue(Type::Int32Ty); + Idx[1] = ConstantInt::get(Type::Int32Ty, FirstOut + i); + GetElementPtrInst *GEP + = GetElementPtrInst::Create(Struct, Idx, Idx + 2, + "gep_reload_" + outputs[i]->getName()); + codeReplacer->getInstList().push_back(GEP); + Output = GEP; + } else { + Output = ReloadOutputs[i]; + } + LoadInst *load = new LoadInst(Output, outputs[i]->getName()+".reload"); + codeReplacer->getInstList().push_back(load); + std::vector Users(outputs[i]->use_begin(), outputs[i]->use_end()); + for (unsigned u = 0, e = Users.size(); u != e; ++u) { + Instruction *inst = cast(Users[u]); + if (!BlocksToExtract.count(inst->getParent())) + inst->replaceUsesOfWith(outputs[i], load); + } + } + + // Now we can emit a switch statement using the call as a value. + SwitchInst *TheSwitch = + SwitchInst::Create(ConstantInt::getNullValue(Type::Int16Ty), + codeReplacer, 0, codeReplacer); + + // Since there may be multiple exits from the original region, make the new + // function return an unsigned, switch on that number. This loop iterates + // over all of the blocks in the extracted region, updating any terminator + // instructions in the to-be-extracted region that branch to blocks that are + // not in the region to be extracted. + std::map ExitBlockMap; + + unsigned switchVal = 0; + for (std::set::const_iterator i = BlocksToExtract.begin(), + e = BlocksToExtract.end(); i != e; ++i) { + TerminatorInst *TI = (*i)->getTerminator(); + for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) + if (!BlocksToExtract.count(TI->getSuccessor(i))) { + BasicBlock *OldTarget = TI->getSuccessor(i); + // add a new basic block which returns the appropriate value + BasicBlock *&NewTarget = ExitBlockMap[OldTarget]; + if (!NewTarget) { + // If we don't already have an exit stub for this non-extracted + // destination, create one now! + NewTarget = BasicBlock::Create(OldTarget->getName() + ".exitStub", + newFunction); + unsigned SuccNum = switchVal++; + + Value *brVal = 0; + switch (NumExitBlocks) { + case 0: + case 1: break; // No value needed. + case 2: // Conditional branch, return a bool + brVal = ConstantInt::get(Type::Int1Ty, !SuccNum); + break; + default: + brVal = ConstantInt::get(Type::Int16Ty, SuccNum); + break; + } + + ReturnInst *NTRet = ReturnInst::Create(brVal, NewTarget); + + // Update the switch instruction. + TheSwitch->addCase(ConstantInt::get(Type::Int16Ty, SuccNum), + OldTarget); + + // Restore values just before we exit + Function::arg_iterator OAI = OutputArgBegin; + for (unsigned out = 0, e = outputs.size(); out != e; ++out) { + // For an invoke, the normal destination is the only one that is + // dominated by the result of the invocation + BasicBlock *DefBlock = cast(outputs[out])->getParent(); + + bool DominatesDef = true; + + if (InvokeInst *Invoke = dyn_cast(outputs[out])) { + DefBlock = Invoke->getNormalDest(); + + // Make sure we are looking at the original successor block, not + // at a newly inserted exit block, which won't be in the dominator + // info. + for (std::map::iterator I = + ExitBlockMap.begin(), E = ExitBlockMap.end(); I != E; ++I) + if (DefBlock == I->second) { + DefBlock = I->first; + break; + } + + // In the extract block case, if the block we are extracting ends + // with an invoke instruction, make sure that we don't emit a + // store of the invoke value for the unwind block. + if (!DT && DefBlock != OldTarget) + DominatesDef = false; + } + + if (DT) + DominatesDef = DT->dominates(DefBlock, OldTarget); + + if (DominatesDef) { + if (AggregateArgs) { + Value *Idx[2]; + Idx[0] = Constant::getNullValue(Type::Int32Ty); + Idx[1] = ConstantInt::get(Type::Int32Ty,FirstOut+out); + GetElementPtrInst *GEP = + GetElementPtrInst::Create(OAI, Idx, Idx + 2, + "gep_" + outputs[out]->getName(), + NTRet); + new StoreInst(outputs[out], GEP, NTRet); + } else { + new StoreInst(outputs[out], OAI, NTRet); + } + } + // Advance output iterator even if we don't emit a store + if (!AggregateArgs) ++OAI; + } + } + + // rewrite the original branch instruction with this new target + TI->setSuccessor(i, NewTarget); + } + } + + // Now that we've done the deed, simplify the switch instruction. + const Type *OldFnRetTy = TheSwitch->getParent()->getParent()->getReturnType(); + switch (NumExitBlocks) { + case 0: + // There are no successors (the block containing the switch itself), which + // means that previously this was the last part of the function, and hence + // this should be rewritten as a `ret' + + // Check if the function should return a value + if (OldFnRetTy == Type::VoidTy) { + ReturnInst::Create(0, TheSwitch); // Return void + } else if (OldFnRetTy == TheSwitch->getCondition()->getType()) { + // return what we have + ReturnInst::Create(TheSwitch->getCondition(), TheSwitch); + } else { + // Otherwise we must have code extracted an unwind or something, just + // return whatever we want. + ReturnInst::Create(Constant::getNullValue(OldFnRetTy), TheSwitch); + } + + TheSwitch->eraseFromParent(); + break; + case 1: + // Only a single destination, change the switch into an unconditional + // branch. + BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch); + TheSwitch->eraseFromParent(); + break; + case 2: + BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch->getSuccessor(2), + call, TheSwitch); + TheSwitch->eraseFromParent(); + break; + default: + // Otherwise, make the default destination of the switch instruction be one + // of the other successors. + TheSwitch->setOperand(0, call); + TheSwitch->setSuccessor(0, TheSwitch->getSuccessor(NumExitBlocks)); + TheSwitch->removeCase(NumExitBlocks); // Remove redundant case + break; + } +} + +void CodeExtractor::moveCodeToFunction(Function *newFunction) { + Function *oldFunc = (*BlocksToExtract.begin())->getParent(); + Function::BasicBlockListType &oldBlocks = oldFunc->getBasicBlockList(); + Function::BasicBlockListType &newBlocks = newFunction->getBasicBlockList(); + + for (std::set::const_iterator i = BlocksToExtract.begin(), + e = BlocksToExtract.end(); i != e; ++i) { + // Delete the basic block from the old function, and the list of blocks + oldBlocks.remove(*i); + + // Insert this basic block into the new function + newBlocks.push_back(*i); + } +} + +/// ExtractRegion - Removes a loop from a function, replaces it with a call to +/// new function. Returns pointer to the new function. +/// +/// algorithm: +/// +/// find inputs and outputs for the region +/// +/// for inputs: add to function as args, map input instr* to arg# +/// for outputs: add allocas for scalars, +/// add to func as args, map output instr* to arg# +/// +/// rewrite func to use argument #s instead of instr* +/// +/// for each scalar output in the function: at every exit, store intermediate +/// computed result back into memory. +/// +Function *CodeExtractor:: +ExtractCodeRegion(const std::vector &code) { + if (!isEligible(code)) + return 0; + + // 1) Find inputs, outputs + // 2) Construct new function + // * Add allocas for defs, pass as args by reference + // * Pass in uses as args + // 3) Move code region, add call instr to func + // + BlocksToExtract.insert(code.begin(), code.end()); + + Values inputs, outputs; + + // Assumption: this is a single-entry code region, and the header is the first + // block in the region. + BasicBlock *header = code[0]; + + for (unsigned i = 1, e = code.size(); i != e; ++i) + for (pred_iterator PI = pred_begin(code[i]), E = pred_end(code[i]); + PI != E; ++PI) + assert(BlocksToExtract.count(*PI) && + "No blocks in this region may have entries from outside the region" + " except for the first block!"); + + // If we have to split PHI nodes or the entry block, do so now. + severSplitPHINodes(header); + + // If we have any return instructions in the region, split those blocks so + // that the return is not in the region. + splitReturnBlocks(); + + Function *oldFunction = header->getParent(); + + // This takes place of the original loop + BasicBlock *codeReplacer = BasicBlock::Create("codeRepl", oldFunction, + header); + + // The new function needs a root node because other nodes can branch to the + // head of the region, but the entry node of a function cannot have preds. + BasicBlock *newFuncRoot = BasicBlock::Create("newFuncRoot"); + newFuncRoot->getInstList().push_back(BranchInst::Create(header)); + + // Find inputs to, outputs from the code region. + findInputsOutputs(inputs, outputs); + + // Construct new function based on inputs/outputs & add allocas for all defs. + Function *newFunction = constructFunction(inputs, outputs, header, + newFuncRoot, + codeReplacer, oldFunction, + oldFunction->getParent()); + + emitCallAndSwitchStatement(newFunction, codeReplacer, inputs, outputs); + + moveCodeToFunction(newFunction); + + // Loop over all of the PHI nodes in the header block, and change any + // references to the old incoming edge to be the new incoming edge. + for (BasicBlock::iterator I = header->begin(); isa(I); ++I) { + PHINode *PN = cast(I); + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (!BlocksToExtract.count(PN->getIncomingBlock(i))) + PN->setIncomingBlock(i, newFuncRoot); + } + + // Look at all successors of the codeReplacer block. If any of these blocks + // had PHI nodes in them, we need to update the "from" block to be the code + // replacer, not the original block in the extracted region. + std::vector Succs(succ_begin(codeReplacer), + succ_end(codeReplacer)); + for (unsigned i = 0, e = Succs.size(); i != e; ++i) + for (BasicBlock::iterator I = Succs[i]->begin(); isa(I); ++I) { + PHINode *PN = cast(I); + std::set ProcessedPreds; + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (BlocksToExtract.count(PN->getIncomingBlock(i))) { + if (ProcessedPreds.insert(PN->getIncomingBlock(i)).second) + PN->setIncomingBlock(i, codeReplacer); + else { + // There were multiple entries in the PHI for this block, now there + // is only one, so remove the duplicated entries. + PN->removeIncomingValue(i, false); + --i; --e; + } + } + } + + //cerr << "NEW FUNCTION: " << *newFunction; + // verifyFunction(*newFunction); + + // cerr << "OLD FUNCTION: " << *oldFunction; + // verifyFunction(*oldFunction); + + DEBUG(if (verifyFunction(*newFunction)) abort()); + return newFunction; +} + +bool CodeExtractor::isEligible(const std::vector &code) { + // Deny code region if it contains allocas or vastarts. + for (std::vector::const_iterator BB = code.begin(), e=code.end(); + BB != e; ++BB) + for (BasicBlock::const_iterator I = (*BB)->begin(), Ie = (*BB)->end(); + I != Ie; ++I) + if (isa(*I)) + return false; + else if (const CallInst *CI = dyn_cast(I)) + if (const Function *F = CI->getCalledFunction()) + if (F->getIntrinsicID() == Intrinsic::vastart) + return false; + return true; +} + + +/// ExtractCodeRegion - slurp a sequence of basic blocks into a brand new +/// function +/// +Function* llvm::ExtractCodeRegion(DominatorTree &DT, + const std::vector &code, + bool AggregateArgs) { + return CodeExtractor(&DT, AggregateArgs).ExtractCodeRegion(code); +} + +/// ExtractBasicBlock - slurp a natural loop into a brand new function +/// +Function* llvm::ExtractLoop(DominatorTree &DT, Loop *L, bool AggregateArgs) { + return CodeExtractor(&DT, AggregateArgs).ExtractCodeRegion(L->getBlocks()); +} + +/// ExtractBasicBlock - slurp a basic block into a brand new function +/// +Function* llvm::ExtractBasicBlock(BasicBlock *BB, bool AggregateArgs) { + std::vector Blocks; + Blocks.push_back(BB); + return CodeExtractor(0, AggregateArgs).ExtractCodeRegion(Blocks); +} diff --git a/lib/Transforms/Utils/DemoteRegToStack.cpp b/lib/Transforms/Utils/DemoteRegToStack.cpp new file mode 100644 index 000000000000..b8dd75413342 --- /dev/null +++ b/lib/Transforms/Utils/DemoteRegToStack.cpp @@ -0,0 +1,144 @@ +//===- DemoteRegToStack.cpp - Move a virtual register to the stack --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provide the function DemoteRegToStack(). This function takes a +// virtual register computed by an Instruction and replaces it with a slot in +// the stack frame, allocated via alloca. It returns the pointer to the +// AllocaInst inserted. After this function is called on an instruction, we are +// guaranteed that the only user of the instruction is a store that is +// immediately after it. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Function.h" +#include "llvm/Instructions.h" +#include "llvm/Type.h" +#include +using namespace llvm; + +/// DemoteRegToStack - This function takes a virtual register computed by an +/// Instruction and replaces it with a slot in the stack frame, allocated via +/// alloca. This allows the CFG to be changed around without fear of +/// invalidating the SSA information for the value. It returns the pointer to +/// the alloca inserted to create a stack slot for I. +/// +AllocaInst* llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads, + Instruction *AllocaPoint) { + if (I.use_empty()) { + I.eraseFromParent(); + return 0; + } + + // Create a stack slot to hold the value. + AllocaInst *Slot; + if (AllocaPoint) { + Slot = new AllocaInst(I.getType(), 0, I.getName()+".reg2mem", AllocaPoint); + } else { + Function *F = I.getParent()->getParent(); + Slot = new AllocaInst(I.getType(), 0, I.getName()+".reg2mem", + F->getEntryBlock().begin()); + } + + // Change all of the users of the instruction to read from the stack slot + // instead. + while (!I.use_empty()) { + Instruction *U = cast(I.use_back()); + if (PHINode *PN = dyn_cast(U)) { + // If this is a PHI node, we can't insert a load of the value before the + // use. Instead, insert the load in the predecessor block corresponding + // to the incoming value. + // + // Note that if there are multiple edges from a basic block to this PHI + // node that we cannot multiple loads. The problem is that the resultant + // PHI node will have multiple values (from each load) coming in from the + // same block, which is illegal SSA form. For this reason, we keep track + // and reuse loads we insert. + std::map Loads; + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (PN->getIncomingValue(i) == &I) { + Value *&V = Loads[PN->getIncomingBlock(i)]; + if (V == 0) { + // Insert the load into the predecessor block + V = new LoadInst(Slot, I.getName()+".reload", VolatileLoads, + PN->getIncomingBlock(i)->getTerminator()); + } + PN->setIncomingValue(i, V); + } + + } else { + // If this is a normal instruction, just insert a load. + Value *V = new LoadInst(Slot, I.getName()+".reload", VolatileLoads, U); + U->replaceUsesOfWith(&I, V); + } + } + + + // Insert stores of the computed value into the stack slot. We have to be + // careful is I is an invoke instruction though, because we can't insert the + // store AFTER the terminator instruction. + BasicBlock::iterator InsertPt; + if (!isa(I)) { + InsertPt = &I; + ++InsertPt; + } else { + // We cannot demote invoke instructions to the stack if their normal edge + // is critical. + InvokeInst &II = cast(I); + assert(II.getNormalDest()->getSinglePredecessor() && + "Cannot demote invoke with a critical successor!"); + InsertPt = II.getNormalDest()->begin(); + } + + for (; isa(InsertPt); ++InsertPt) + /* empty */; // Don't insert before any PHI nodes. + new StoreInst(&I, Slot, InsertPt); + + return Slot; +} + + +/// DemotePHIToStack - This function takes a virtual register computed by a phi +/// node and replaces it with a slot in the stack frame, allocated via alloca. +/// The phi node is deleted and it returns the pointer to the alloca inserted. +AllocaInst* llvm::DemotePHIToStack(PHINode *P, Instruction *AllocaPoint) { + if (P->use_empty()) { + P->eraseFromParent(); + return 0; + } + + // Create a stack slot to hold the value. + AllocaInst *Slot; + if (AllocaPoint) { + Slot = new AllocaInst(P->getType(), 0, P->getName()+".reg2mem", AllocaPoint); + } else { + Function *F = P->getParent()->getParent(); + Slot = new AllocaInst(P->getType(), 0, P->getName()+".reg2mem", + F->getEntryBlock().begin()); + } + + // Iterate over each operand, insert store in each predecessor. + for (unsigned i = 0, e = P->getNumIncomingValues(); i < e; ++i) { + if (InvokeInst *II = dyn_cast(P->getIncomingValue(i))) { + assert(II->getParent() != P->getIncomingBlock(i) && + "Invoke edge not supported yet"); II=II; + } + new StoreInst(P->getIncomingValue(i), Slot, + P->getIncomingBlock(i)->getTerminator()); + } + + // Insert load in place of the phi and replace all uses. + Value *V = new LoadInst(Slot, P->getName()+".reload", P); + P->replaceAllUsesWith(V); + + // Delete phi. + P->eraseFromParent(); + + return Slot; +} diff --git a/lib/Transforms/Utils/InlineCost.cpp b/lib/Transforms/Utils/InlineCost.cpp new file mode 100644 index 000000000000..87aff01a5857 --- /dev/null +++ b/lib/Transforms/Utils/InlineCost.cpp @@ -0,0 +1,315 @@ +//===- InlineCost.cpp - Cost analysis for inliner -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements inline cost analysis. +// +//===----------------------------------------------------------------------===// + + +#include "llvm/Transforms/Utils/InlineCost.h" +#include "llvm/Support/CallSite.h" +#include "llvm/CallingConv.h" +#include "llvm/IntrinsicInst.h" + +using namespace llvm; + +// CountCodeReductionForConstant - Figure out an approximation for how many +// instructions will be constant folded if the specified value is constant. +// +unsigned InlineCostAnalyzer::FunctionInfo:: + CountCodeReductionForConstant(Value *V) { + unsigned Reduction = 0; + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI) + if (isa(*UI)) + Reduction += 40; // Eliminating a conditional branch is a big win + else if (SwitchInst *SI = dyn_cast(*UI)) + // Eliminating a switch is a big win, proportional to the number of edges + // deleted. + Reduction += (SI->getNumSuccessors()-1) * 40; + else if (CallInst *CI = dyn_cast(*UI)) { + // Turning an indirect call into a direct call is a BIG win + Reduction += CI->getCalledValue() == V ? 500 : 0; + } else if (InvokeInst *II = dyn_cast(*UI)) { + // Turning an indirect call into a direct call is a BIG win + Reduction += II->getCalledValue() == V ? 500 : 0; + } else { + // Figure out if this instruction will be removed due to simple constant + // propagation. + Instruction &Inst = cast(**UI); + bool AllOperandsConstant = true; + for (unsigned i = 0, e = Inst.getNumOperands(); i != e; ++i) + if (!isa(Inst.getOperand(i)) && Inst.getOperand(i) != V) { + AllOperandsConstant = false; + break; + } + + if (AllOperandsConstant) { + // We will get to remove this instruction... + Reduction += 7; + + // And any other instructions that use it which become constants + // themselves. + Reduction += CountCodeReductionForConstant(&Inst); + } + } + + return Reduction; +} + +// CountCodeReductionForAlloca - Figure out an approximation of how much smaller +// the function will be if it is inlined into a context where an argument +// becomes an alloca. +// +unsigned InlineCostAnalyzer::FunctionInfo:: + CountCodeReductionForAlloca(Value *V) { + if (!isa(V->getType())) return 0; // Not a pointer + unsigned Reduction = 0; + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){ + Instruction *I = cast(*UI); + if (isa(I) || isa(I)) + Reduction += 10; + else if (GetElementPtrInst *GEP = dyn_cast(I)) { + // If the GEP has variable indices, we won't be able to do much with it. + if (!GEP->hasAllConstantIndices()) + Reduction += CountCodeReductionForAlloca(GEP)+15; + } else { + // If there is some other strange instruction, we're not going to be able + // to do much if we inline this. + return 0; + } + } + + return Reduction; +} + +/// analyzeFunction - Fill in the current structure with information gleaned +/// from the specified function. +void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F) { + unsigned NumInsts = 0, NumBlocks = 0, NumVectorInsts = 0; + + // Look at the size of the callee. Each basic block counts as 20 units, and + // each instruction counts as 5. + for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { + for (BasicBlock::const_iterator II = BB->begin(), E = BB->end(); + II != E; ++II) { + if (isa(II)) continue; // PHI nodes don't count. + + // Special handling for calls. + if (isa(II) || isa(II)) { + if (isa(II)) + continue; // Debug intrinsics don't count as size. + + CallSite CS = CallSite::get(const_cast(&*II)); + + // If this function contains a call to setjmp or _setjmp, never inline + // it. This is a hack because we depend on the user marking their local + // variables as volatile if they are live across a setjmp call, and they + // probably won't do this in callers. + if (Function *F = CS.getCalledFunction()) + if (F->isDeclaration() && + (F->isName("setjmp") || F->isName("_setjmp"))) { + NeverInline = true; + return; + } + + // Calls often compile into many machine instructions. Bump up their + // cost to reflect this. + if (!isa(II)) + NumInsts += 5; + } + + if (const AllocaInst *AI = dyn_cast(II)) { + if (!AI->isStaticAlloca()) + this->usesDynamicAlloca = true; + } + + if (isa(II) || isa(II->getType())) + ++NumVectorInsts; + + // Noop casts, including ptr <-> int, don't count. + if (const CastInst *CI = dyn_cast(II)) { + if (CI->isLosslessCast() || isa(CI) || + isa(CI)) + continue; + } else if (const GetElementPtrInst *GEPI = + dyn_cast(II)) { + // If a GEP has all constant indices, it will probably be folded with + // a load/store. + if (GEPI->hasAllConstantIndices()) + continue; + } + + ++NumInsts; + } + + ++NumBlocks; + } + + this->NumBlocks = NumBlocks; + this->NumInsts = NumInsts; + this->NumVectorInsts = NumVectorInsts; + + // Check out all of the arguments to the function, figuring out how much + // code can be eliminated if one of the arguments is a constant. + for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I) + ArgumentWeights.push_back(ArgInfo(CountCodeReductionForConstant(I), + CountCodeReductionForAlloca(I))); +} + + + +// getInlineCost - The heuristic used to determine if we should inline the +// function call or not. +// +InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, + SmallPtrSet &NeverInline) { + Instruction *TheCall = CS.getInstruction(); + Function *Callee = CS.getCalledFunction(); + Function *Caller = TheCall->getParent()->getParent(); + + // Don't inline functions which can be redefined at link-time to mean + // something else. + if (Callee->mayBeOverridden() || + // Don't inline functions marked noinline. + Callee->hasFnAttr(Attribute::NoInline) || NeverInline.count(Callee)) + return llvm::InlineCost::getNever(); + + // InlineCost - This value measures how good of an inline candidate this call + // site is to inline. A lower inline cost make is more likely for the call to + // be inlined. This value may go negative. + // + int InlineCost = 0; + + // If there is only one call of the function, and it has internal linkage, + // make it almost guaranteed to be inlined. + // + if ((Callee->hasLocalLinkage() || Callee->hasAvailableExternallyLinkage()) && + Callee->hasOneUse()) + InlineCost -= 15000; + + // If this function uses the coldcc calling convention, prefer not to inline + // it. + if (Callee->getCallingConv() == CallingConv::Cold) + InlineCost += 2000; + + // If the instruction after the call, or if the normal destination of the + // invoke is an unreachable instruction, the function is noreturn. As such, + // there is little point in inlining this. + if (InvokeInst *II = dyn_cast(TheCall)) { + if (isa(II->getNormalDest()->begin())) + InlineCost += 10000; + } else if (isa(++BasicBlock::iterator(TheCall))) + InlineCost += 10000; + + // Get information about the callee... + FunctionInfo &CalleeFI = CachedFunctionInfo[Callee]; + + // If we haven't calculated this information yet, do so now. + if (CalleeFI.NumBlocks == 0) + CalleeFI.analyzeFunction(Callee); + + // If we should never inline this, return a huge cost. + if (CalleeFI.NeverInline) + return InlineCost::getNever(); + + // FIXME: It would be nice to kill off CalleeFI.NeverInline. Then we + // could move this up and avoid computing the FunctionInfo for + // things we are going to just return always inline for. This + // requires handling setjmp somewhere else, however. + if (!Callee->isDeclaration() && Callee->hasFnAttr(Attribute::AlwaysInline)) + return InlineCost::getAlways(); + + if (CalleeFI.usesDynamicAlloca) { + // Get infomation about the caller... + FunctionInfo &CallerFI = CachedFunctionInfo[Caller]; + + // If we haven't calculated this information yet, do so now. + if (CallerFI.NumBlocks == 0) + CallerFI.analyzeFunction(Caller); + + // Don't inline a callee with dynamic alloca into a caller without them. + // Functions containing dynamic alloca's are inefficient in various ways; + // don't create more inefficiency. + if (!CallerFI.usesDynamicAlloca) + return InlineCost::getNever(); + } + + // Add to the inline quality for properties that make the call valuable to + // inline. This includes factors that indicate that the result of inlining + // the function will be optimizable. Currently this just looks at arguments + // passed into the function. + // + unsigned ArgNo = 0; + for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); + I != E; ++I, ++ArgNo) { + // Each argument passed in has a cost at both the caller and the callee + // sides. This favors functions that take many arguments over functions + // that take few arguments. + InlineCost -= 20; + + // If this is a function being passed in, it is very likely that we will be + // able to turn an indirect function call into a direct function call. + if (isa(I)) + InlineCost -= 100; + + // If an alloca is passed in, inlining this function is likely to allow + // significant future optimization possibilities (like scalar promotion, and + // scalarization), so encourage the inlining of the function. + // + else if (isa(I)) { + if (ArgNo < CalleeFI.ArgumentWeights.size()) + InlineCost -= CalleeFI.ArgumentWeights[ArgNo].AllocaWeight; + + // If this is a constant being passed into the function, use the argument + // weights calculated for the callee to determine how much will be folded + // away with this information. + } else if (isa(I)) { + if (ArgNo < CalleeFI.ArgumentWeights.size()) + InlineCost -= CalleeFI.ArgumentWeights[ArgNo].ConstantWeight; + } + } + + // Now that we have considered all of the factors that make the call site more + // likely to be inlined, look at factors that make us not want to inline it. + + // Don't inline into something too big, which would make it bigger. + // + InlineCost += Caller->size()/15; + + // Look at the size of the callee. Each instruction counts as 5. + InlineCost += CalleeFI.NumInsts*5; + + return llvm::InlineCost::get(InlineCost); +} + +// getInlineFudgeFactor - Return a > 1.0 factor if the inliner should use a +// higher threshold to determine if the function call should be inlined. +float InlineCostAnalyzer::getInlineFudgeFactor(CallSite CS) { + Function *Callee = CS.getCalledFunction(); + + // Get information about the callee... + FunctionInfo &CalleeFI = CachedFunctionInfo[Callee]; + + // If we haven't calculated this information yet, do so now. + if (CalleeFI.NumBlocks == 0) + CalleeFI.analyzeFunction(Callee); + + float Factor = 1.0f; + // Single BB functions are often written to be inlined. + if (CalleeFI.NumBlocks == 1) + Factor += 0.5f; + + // Be more aggressive if the function contains a good chunk (if it mades up + // at least 10% of the instructions) of vector instructions. + if (CalleeFI.NumVectorInsts > CalleeFI.NumInsts/2) + Factor += 2.0f; + else if (CalleeFI.NumVectorInsts > CalleeFI.NumInsts/10) + Factor += 1.5f; + return Factor; +} diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp new file mode 100644 index 000000000000..4989c00ceb81 --- /dev/null +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -0,0 +1,656 @@ +//===- InlineFunction.cpp - Code to perform function inlining -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements inlining of a function into a call site, resolving +// parameters and the return value as appropriate. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Module.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Intrinsics.h" +#include "llvm/Attributes.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/DebugInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/CallSite.h" +using namespace llvm; + +bool llvm::InlineFunction(CallInst *CI, CallGraph *CG, const TargetData *TD) { + return InlineFunction(CallSite(CI), CG, TD); +} +bool llvm::InlineFunction(InvokeInst *II, CallGraph *CG, const TargetData *TD) { + return InlineFunction(CallSite(II), CG, TD); +} + +/// HandleInlinedInvoke - If we inlined an invoke site, we need to convert calls +/// in the body of the inlined function into invokes and turn unwind +/// instructions into branches to the invoke unwind dest. +/// +/// II is the invoke instruction being inlined. FirstNewBlock is the first +/// block of the inlined code (the last block is the end of the function), +/// and InlineCodeInfo is information about the code that got inlined. +static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock, + ClonedCodeInfo &InlinedCodeInfo, + CallGraph *CG) { + BasicBlock *InvokeDest = II->getUnwindDest(); + std::vector InvokeDestPHIValues; + + // If there are PHI nodes in the unwind destination block, we need to + // keep track of which values came into them from this invoke, then remove + // the entry for this block. + BasicBlock *InvokeBlock = II->getParent(); + for (BasicBlock::iterator I = InvokeDest->begin(); isa(I); ++I) { + PHINode *PN = cast(I); + // Save the value to use for this edge. + InvokeDestPHIValues.push_back(PN->getIncomingValueForBlock(InvokeBlock)); + } + + Function *Caller = FirstNewBlock->getParent(); + + // The inlined code is currently at the end of the function, scan from the + // start of the inlined code to its end, checking for stuff we need to + // rewrite. + if (InlinedCodeInfo.ContainsCalls || InlinedCodeInfo.ContainsUnwinds) { + for (Function::iterator BB = FirstNewBlock, E = Caller->end(); + BB != E; ++BB) { + if (InlinedCodeInfo.ContainsCalls) { + for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ){ + Instruction *I = BBI++; + + // We only need to check for function calls: inlined invoke + // instructions require no special handling. + if (!isa(I)) continue; + CallInst *CI = cast(I); + + // If this call cannot unwind, don't convert it to an invoke. + if (CI->doesNotThrow()) + continue; + + // Convert this function call into an invoke instruction. + // First, split the basic block. + BasicBlock *Split = BB->splitBasicBlock(CI, CI->getName()+".noexc"); + + // Next, create the new invoke instruction, inserting it at the end + // of the old basic block. + SmallVector InvokeArgs(CI->op_begin()+1, CI->op_end()); + InvokeInst *II = + InvokeInst::Create(CI->getCalledValue(), Split, InvokeDest, + InvokeArgs.begin(), InvokeArgs.end(), + CI->getName(), BB->getTerminator()); + II->setCallingConv(CI->getCallingConv()); + II->setAttributes(CI->getAttributes()); + + // Make sure that anything using the call now uses the invoke! + CI->replaceAllUsesWith(II); + + // Update the callgraph. + if (CG) { + // We should be able to do this: + // (*CG)[Caller]->replaceCallSite(CI, II); + // but that fails if the old call site isn't in the call graph, + // which, because of LLVM bug 3601, it sometimes isn't. + CallGraphNode *CGN = (*CG)[Caller]; + for (CallGraphNode::iterator NI = CGN->begin(), NE = CGN->end(); + NI != NE; ++NI) { + if (NI->first == CI) { + NI->first = II; + break; + } + } + } + + // Delete the unconditional branch inserted by splitBasicBlock + BB->getInstList().pop_back(); + Split->getInstList().pop_front(); // Delete the original call + + // Update any PHI nodes in the exceptional block to indicate that + // there is now a new entry in them. + unsigned i = 0; + for (BasicBlock::iterator I = InvokeDest->begin(); + isa(I); ++I, ++i) { + PHINode *PN = cast(I); + PN->addIncoming(InvokeDestPHIValues[i], BB); + } + + // This basic block is now complete, start scanning the next one. + break; + } + } + + if (UnwindInst *UI = dyn_cast(BB->getTerminator())) { + // An UnwindInst requires special handling when it gets inlined into an + // invoke site. Once this happens, we know that the unwind would cause + // a control transfer to the invoke exception destination, so we can + // transform it into a direct branch to the exception destination. + BranchInst::Create(InvokeDest, UI); + + // Delete the unwind instruction! + UI->eraseFromParent(); + + // Update any PHI nodes in the exceptional block to indicate that + // there is now a new entry in them. + unsigned i = 0; + for (BasicBlock::iterator I = InvokeDest->begin(); + isa(I); ++I, ++i) { + PHINode *PN = cast(I); + PN->addIncoming(InvokeDestPHIValues[i], BB); + } + } + } + } + + // Now that everything is happy, we have one final detail. The PHI nodes in + // the exception destination block still have entries due to the original + // invoke instruction. Eliminate these entries (which might even delete the + // PHI node) now. + InvokeDest->removePredecessor(II->getParent()); +} + +/// UpdateCallGraphAfterInlining - Once we have cloned code over from a callee +/// into the caller, update the specified callgraph to reflect the changes we +/// made. Note that it's possible that not all code was copied over, so only +/// some edges of the callgraph may remain. +static void UpdateCallGraphAfterInlining(CallSite CS, + Function::iterator FirstNewBlock, + DenseMap &ValueMap, + CallGraph &CG) { + const Function *Caller = CS.getInstruction()->getParent()->getParent(); + const Function *Callee = CS.getCalledFunction(); + CallGraphNode *CalleeNode = CG[Callee]; + CallGraphNode *CallerNode = CG[Caller]; + + // Since we inlined some uninlined call sites in the callee into the caller, + // add edges from the caller to all of the callees of the callee. + CallGraphNode::iterator I = CalleeNode->begin(), E = CalleeNode->end(); + + // Consider the case where CalleeNode == CallerNode. + CallGraphNode::CalledFunctionsVector CallCache; + if (CalleeNode == CallerNode) { + CallCache.assign(I, E); + I = CallCache.begin(); + E = CallCache.end(); + } + + for (; I != E; ++I) { + const Instruction *OrigCall = I->first.getInstruction(); + + DenseMap::iterator VMI = ValueMap.find(OrigCall); + // Only copy the edge if the call was inlined! + if (VMI != ValueMap.end() && VMI->second) { + // If the call was inlined, but then constant folded, there is no edge to + // add. Check for this case. + if (Instruction *NewCall = dyn_cast(VMI->second)) + CallerNode->addCalledFunction(CallSite::get(NewCall), I->second); + } + } + // Update the call graph by deleting the edge from Callee to Caller. We must + // do this after the loop above in case Caller and Callee are the same. + CallerNode->removeCallEdgeFor(CS); +} + +/// findFnRegionEndMarker - This is a utility routine that is used by +/// InlineFunction. Return llvm.dbg.region.end intrinsic that corresponds +/// to the llvm.dbg.func.start of the function F. Otherwise return NULL. +static const DbgRegionEndInst *findFnRegionEndMarker(const Function *F) { + + GlobalVariable *FnStart = NULL; + const DbgRegionEndInst *FnEnd = NULL; + for (Function::const_iterator FI = F->begin(), FE =F->end(); FI != FE; ++FI) + for (BasicBlock::const_iterator BI = FI->begin(), BE = FI->end(); BI != BE; + ++BI) { + if (FnStart == NULL) { + if (const DbgFuncStartInst *FSI = dyn_cast(BI)) { + DISubprogram SP(cast(FSI->getSubprogram())); + assert (SP.isNull() == false && "Invalid llvm.dbg.func.start"); + if (SP.describes(F)) + FnStart = SP.getGV(); + } + } else { + if (const DbgRegionEndInst *REI = dyn_cast(BI)) + if (REI->getContext() == FnStart) + FnEnd = REI; + } + } + return FnEnd; +} + +// InlineFunction - This function inlines the called function into the basic +// block of the caller. This returns false if it is not possible to inline this +// call. The program is still in a well defined state if this occurs though. +// +// Note that this only does one level of inlining. For example, if the +// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now +// exists in the instruction stream. Similiarly this will inline a recursive +// function by one level. +// +bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD) { + Instruction *TheCall = CS.getInstruction(); + assert(TheCall->getParent() && TheCall->getParent()->getParent() && + "Instruction not in function!"); + + const Function *CalledFunc = CS.getCalledFunction(); + if (CalledFunc == 0 || // Can't inline external function or indirect + CalledFunc->isDeclaration() || // call, or call to a vararg function! + CalledFunc->getFunctionType()->isVarArg()) return false; + + + // If the call to the callee is not a tail call, we must clear the 'tail' + // flags on any calls that we inline. + bool MustClearTailCallFlags = + !(isa(TheCall) && cast(TheCall)->isTailCall()); + + // If the call to the callee cannot throw, set the 'nounwind' flag on any + // calls that we inline. + bool MarkNoUnwind = CS.doesNotThrow(); + + BasicBlock *OrigBB = TheCall->getParent(); + Function *Caller = OrigBB->getParent(); + + // GC poses two hazards to inlining, which only occur when the callee has GC: + // 1. If the caller has no GC, then the callee's GC must be propagated to the + // caller. + // 2. If the caller has a differing GC, it is invalid to inline. + if (CalledFunc->hasGC()) { + if (!Caller->hasGC()) + Caller->setGC(CalledFunc->getGC()); + else if (CalledFunc->getGC() != Caller->getGC()) + return false; + } + + // Get an iterator to the last basic block in the function, which will have + // the new function inlined after it. + // + Function::iterator LastBlock = &Caller->back(); + + // Make sure to capture all of the return instructions from the cloned + // function. + std::vector Returns; + ClonedCodeInfo InlinedFunctionInfo; + Function::iterator FirstNewBlock; + + { // Scope to destroy ValueMap after cloning. + DenseMap ValueMap; + + assert(CalledFunc->arg_size() == CS.arg_size() && + "No varargs calls can be inlined!"); + + // Calculate the vector of arguments to pass into the function cloner, which + // matches up the formal to the actual argument values. + CallSite::arg_iterator AI = CS.arg_begin(); + unsigned ArgNo = 0; + for (Function::const_arg_iterator I = CalledFunc->arg_begin(), + E = CalledFunc->arg_end(); I != E; ++I, ++AI, ++ArgNo) { + Value *ActualArg = *AI; + + // When byval arguments actually inlined, we need to make the copy implied + // by them explicit. However, we don't do this if the callee is readonly + // or readnone, because the copy would be unneeded: the callee doesn't + // modify the struct. + if (CalledFunc->paramHasAttr(ArgNo+1, Attribute::ByVal) && + !CalledFunc->onlyReadsMemory()) { + const Type *AggTy = cast(I->getType())->getElementType(); + const Type *VoidPtrTy = PointerType::getUnqual(Type::Int8Ty); + + // Create the alloca. If we have TargetData, use nice alignment. + unsigned Align = 1; + if (TD) Align = TD->getPrefTypeAlignment(AggTy); + Value *NewAlloca = new AllocaInst(AggTy, 0, Align, I->getName(), + Caller->begin()->begin()); + // Emit a memcpy. + const Type *Tys[] = { Type::Int64Ty }; + Function *MemCpyFn = Intrinsic::getDeclaration(Caller->getParent(), + Intrinsic::memcpy, + Tys, 1); + Value *DestCast = new BitCastInst(NewAlloca, VoidPtrTy, "tmp", TheCall); + Value *SrcCast = new BitCastInst(*AI, VoidPtrTy, "tmp", TheCall); + + Value *Size; + if (TD == 0) + Size = ConstantExpr::getSizeOf(AggTy); + else + Size = ConstantInt::get(Type::Int64Ty, TD->getTypeStoreSize(AggTy)); + + // Always generate a memcpy of alignment 1 here because we don't know + // the alignment of the src pointer. Other optimizations can infer + // better alignment. + Value *CallArgs[] = { + DestCast, SrcCast, Size, ConstantInt::get(Type::Int32Ty, 1) + }; + CallInst *TheMemCpy = + CallInst::Create(MemCpyFn, CallArgs, CallArgs+4, "", TheCall); + + // If we have a call graph, update it. + if (CG) { + CallGraphNode *MemCpyCGN = CG->getOrInsertFunction(MemCpyFn); + CallGraphNode *CallerNode = (*CG)[Caller]; + CallerNode->addCalledFunction(TheMemCpy, MemCpyCGN); + } + + // Uses of the argument in the function should use our new alloca + // instead. + ActualArg = NewAlloca; + } + + ValueMap[I] = ActualArg; + } + + // Adjust llvm.dbg.region.end. If the CalledFunc has region end + // marker then clone that marker after next stop point at the + // call site. The function body cloner does not clone original + // region end marker from the CalledFunc. This will ensure that + // inlined function's scope ends at the right place. + const DbgRegionEndInst *DREI = findFnRegionEndMarker(CalledFunc); + if (DREI) { + for (BasicBlock::iterator BI = TheCall, + BE = TheCall->getParent()->end(); BI != BE; ++BI) { + if (DbgStopPointInst *DSPI = dyn_cast(BI)) { + if (DbgRegionEndInst *NewDREI = + dyn_cast(DREI->clone())) + NewDREI->insertAfter(DSPI); + break; + } + } + } + + // We want the inliner to prune the code as it copies. We would LOVE to + // have no dead or constant instructions leftover after inlining occurs + // (which can happen, e.g., because an argument was constant), but we'll be + // happy with whatever the cloner can do. + CloneAndPruneFunctionInto(Caller, CalledFunc, ValueMap, Returns, ".i", + &InlinedFunctionInfo, TD); + + // Remember the first block that is newly cloned over. + FirstNewBlock = LastBlock; ++FirstNewBlock; + + // Update the callgraph if requested. + if (CG) + UpdateCallGraphAfterInlining(CS, FirstNewBlock, ValueMap, *CG); + } + + // If there are any alloca instructions in the block that used to be the entry + // block for the callee, move them to the entry block of the caller. First + // calculate which instruction they should be inserted before. We insert the + // instructions at the end of the current alloca list. + // + { + BasicBlock::iterator InsertPoint = Caller->begin()->begin(); + for (BasicBlock::iterator I = FirstNewBlock->begin(), + E = FirstNewBlock->end(); I != E; ) + if (AllocaInst *AI = dyn_cast(I++)) { + // If the alloca is now dead, remove it. This often occurs due to code + // specialization. + if (AI->use_empty()) { + AI->eraseFromParent(); + continue; + } + + if (isa(AI->getArraySize())) { + // Scan for the block of allocas that we can move over, and move them + // all at once. + while (isa(I) && + isa(cast(I)->getArraySize())) + ++I; + + // Transfer all of the allocas over in a block. Using splice means + // that the instructions aren't removed from the symbol table, then + // reinserted. + Caller->getEntryBlock().getInstList().splice( + InsertPoint, + FirstNewBlock->getInstList(), + AI, I); + } + } + } + + // If the inlined code contained dynamic alloca instructions, wrap the inlined + // code with llvm.stacksave/llvm.stackrestore intrinsics. + if (InlinedFunctionInfo.ContainsDynamicAllocas) { + Module *M = Caller->getParent(); + // Get the two intrinsics we care about. + Constant *StackSave, *StackRestore; + StackSave = Intrinsic::getDeclaration(M, Intrinsic::stacksave); + StackRestore = Intrinsic::getDeclaration(M, Intrinsic::stackrestore); + + // If we are preserving the callgraph, add edges to the stacksave/restore + // functions for the calls we insert. + CallGraphNode *StackSaveCGN = 0, *StackRestoreCGN = 0, *CallerNode = 0; + if (CG) { + // We know that StackSave/StackRestore are Function*'s, because they are + // intrinsics which must have the right types. + StackSaveCGN = CG->getOrInsertFunction(cast(StackSave)); + StackRestoreCGN = CG->getOrInsertFunction(cast(StackRestore)); + CallerNode = (*CG)[Caller]; + } + + // Insert the llvm.stacksave. + CallInst *SavedPtr = CallInst::Create(StackSave, "savedstack", + FirstNewBlock->begin()); + if (CG) CallerNode->addCalledFunction(SavedPtr, StackSaveCGN); + + // Insert a call to llvm.stackrestore before any return instructions in the + // inlined function. + for (unsigned i = 0, e = Returns.size(); i != e; ++i) { + CallInst *CI = CallInst::Create(StackRestore, SavedPtr, "", Returns[i]); + if (CG) CallerNode->addCalledFunction(CI, StackRestoreCGN); + } + + // Count the number of StackRestore calls we insert. + unsigned NumStackRestores = Returns.size(); + + // If we are inlining an invoke instruction, insert restores before each + // unwind. These unwinds will be rewritten into branches later. + if (InlinedFunctionInfo.ContainsUnwinds && isa(TheCall)) { + for (Function::iterator BB = FirstNewBlock, E = Caller->end(); + BB != E; ++BB) + if (UnwindInst *UI = dyn_cast(BB->getTerminator())) { + CallInst::Create(StackRestore, SavedPtr, "", UI); + ++NumStackRestores; + } + } + } + + // If we are inlining tail call instruction through a call site that isn't + // marked 'tail', we must remove the tail marker for any calls in the inlined + // code. Also, calls inlined through a 'nounwind' call site should be marked + // 'nounwind'. + if (InlinedFunctionInfo.ContainsCalls && + (MustClearTailCallFlags || MarkNoUnwind)) { + for (Function::iterator BB = FirstNewBlock, E = Caller->end(); + BB != E; ++BB) + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) + if (CallInst *CI = dyn_cast(I)) { + if (MustClearTailCallFlags) + CI->setTailCall(false); + if (MarkNoUnwind) + CI->setDoesNotThrow(); + } + } + + // If we are inlining through a 'nounwind' call site then any inlined 'unwind' + // instructions are unreachable. + if (InlinedFunctionInfo.ContainsUnwinds && MarkNoUnwind) + for (Function::iterator BB = FirstNewBlock, E = Caller->end(); + BB != E; ++BB) { + TerminatorInst *Term = BB->getTerminator(); + if (isa(Term)) { + new UnreachableInst(Term); + BB->getInstList().erase(Term); + } + } + + // If we are inlining for an invoke instruction, we must make sure to rewrite + // any inlined 'unwind' instructions into branches to the invoke exception + // destination, and call instructions into invoke instructions. + if (InvokeInst *II = dyn_cast(TheCall)) + HandleInlinedInvoke(II, FirstNewBlock, InlinedFunctionInfo, CG); + + // If we cloned in _exactly one_ basic block, and if that block ends in a + // return instruction, we splice the body of the inlined callee directly into + // the calling basic block. + if (Returns.size() == 1 && std::distance(FirstNewBlock, Caller->end()) == 1) { + // Move all of the instructions right before the call. + OrigBB->getInstList().splice(TheCall, FirstNewBlock->getInstList(), + FirstNewBlock->begin(), FirstNewBlock->end()); + // Remove the cloned basic block. + Caller->getBasicBlockList().pop_back(); + + // If the call site was an invoke instruction, add a branch to the normal + // destination. + if (InvokeInst *II = dyn_cast(TheCall)) + BranchInst::Create(II->getNormalDest(), TheCall); + + // If the return instruction returned a value, replace uses of the call with + // uses of the returned value. + if (!TheCall->use_empty()) { + ReturnInst *R = Returns[0]; + if (TheCall == R->getReturnValue()) + TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType())); + else + TheCall->replaceAllUsesWith(R->getReturnValue()); + } + // Since we are now done with the Call/Invoke, we can delete it. + TheCall->eraseFromParent(); + + // Since we are now done with the return instruction, delete it also. + Returns[0]->eraseFromParent(); + + // We are now done with the inlining. + return true; + } + + // Otherwise, we have the normal case, of more than one block to inline or + // multiple return sites. + + // We want to clone the entire callee function into the hole between the + // "starter" and "ender" blocks. How we accomplish this depends on whether + // this is an invoke instruction or a call instruction. + BasicBlock *AfterCallBB; + if (InvokeInst *II = dyn_cast(TheCall)) { + + // Add an unconditional branch to make this look like the CallInst case... + BranchInst *NewBr = BranchInst::Create(II->getNormalDest(), TheCall); + + // Split the basic block. This guarantees that no PHI nodes will have to be + // updated due to new incoming edges, and make the invoke case more + // symmetric to the call case. + AfterCallBB = OrigBB->splitBasicBlock(NewBr, + CalledFunc->getName()+".exit"); + + } else { // It's a call + // If this is a call instruction, we need to split the basic block that + // the call lives in. + // + AfterCallBB = OrigBB->splitBasicBlock(TheCall, + CalledFunc->getName()+".exit"); + } + + // Change the branch that used to go to AfterCallBB to branch to the first + // basic block of the inlined function. + // + TerminatorInst *Br = OrigBB->getTerminator(); + assert(Br && Br->getOpcode() == Instruction::Br && + "splitBasicBlock broken!"); + Br->setOperand(0, FirstNewBlock); + + + // Now that the function is correct, make it a little bit nicer. In + // particular, move the basic blocks inserted from the end of the function + // into the space made by splitting the source basic block. + Caller->getBasicBlockList().splice(AfterCallBB, Caller->getBasicBlockList(), + FirstNewBlock, Caller->end()); + + // Handle all of the return instructions that we just cloned in, and eliminate + // any users of the original call/invoke instruction. + const Type *RTy = CalledFunc->getReturnType(); + + if (Returns.size() > 1) { + // The PHI node should go at the front of the new basic block to merge all + // possible incoming values. + PHINode *PHI = 0; + if (!TheCall->use_empty()) { + PHI = PHINode::Create(RTy, TheCall->getName(), + AfterCallBB->begin()); + // Anything that used the result of the function call should now use the + // PHI node as their operand. + TheCall->replaceAllUsesWith(PHI); + } + + // Loop over all of the return instructions adding entries to the PHI node + // as appropriate. + if (PHI) { + for (unsigned i = 0, e = Returns.size(); i != e; ++i) { + ReturnInst *RI = Returns[i]; + assert(RI->getReturnValue()->getType() == PHI->getType() && + "Ret value not consistent in function!"); + PHI->addIncoming(RI->getReturnValue(), RI->getParent()); + } + } + + // Add a branch to the merge points and remove return instructions. + for (unsigned i = 0, e = Returns.size(); i != e; ++i) { + ReturnInst *RI = Returns[i]; + BranchInst::Create(AfterCallBB, RI); + RI->eraseFromParent(); + } + } else if (!Returns.empty()) { + // Otherwise, if there is exactly one return value, just replace anything + // using the return value of the call with the computed value. + if (!TheCall->use_empty()) { + if (TheCall == Returns[0]->getReturnValue()) + TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType())); + else + TheCall->replaceAllUsesWith(Returns[0]->getReturnValue()); + } + + // Splice the code from the return block into the block that it will return + // to, which contains the code that was after the call. + BasicBlock *ReturnBB = Returns[0]->getParent(); + AfterCallBB->getInstList().splice(AfterCallBB->begin(), + ReturnBB->getInstList()); + + // Update PHI nodes that use the ReturnBB to use the AfterCallBB. + ReturnBB->replaceAllUsesWith(AfterCallBB); + + // Delete the return instruction now and empty ReturnBB now. + Returns[0]->eraseFromParent(); + ReturnBB->eraseFromParent(); + } else if (!TheCall->use_empty()) { + // No returns, but something is using the return value of the call. Just + // nuke the result. + TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType())); + } + + // Since we are now done with the Call/Invoke, we can delete it. + TheCall->eraseFromParent(); + + // We should always be able to fold the entry block of the function into the + // single predecessor of the block... + assert(cast(Br)->isUnconditional() && "splitBasicBlock broken!"); + BasicBlock *CalleeEntry = cast(Br)->getSuccessor(0); + + // Splice the code entry block into calling block, right before the + // unconditional branch. + OrigBB->getInstList().splice(Br, CalleeEntry->getInstList()); + CalleeEntry->replaceAllUsesWith(OrigBB); // Update PHI nodes + + // Remove the unconditional branch. + OrigBB->getInstList().erase(Br); + + // Now we can remove the CalleeEntry block, which is now empty. + Caller->getBasicBlockList().erase(CalleeEntry); + + return true; +} diff --git a/lib/Transforms/Utils/InstructionNamer.cpp b/lib/Transforms/Utils/InstructionNamer.cpp new file mode 100644 index 000000000000..4f8a1603948a --- /dev/null +++ b/lib/Transforms/Utils/InstructionNamer.cpp @@ -0,0 +1,63 @@ +//===- InstructionNamer.cpp - Give anonymous instructions names -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is a little utility pass that gives instructions names, this is mostly +// useful when diffing the effect of an optimization because deleting an +// unnamed instruction can change all other instruction numbering, making the +// diff very noisy. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Scalar.h" +#include "llvm/Function.h" +#include "llvm/Pass.h" +#include "llvm/Type.h" +using namespace llvm; + +namespace { + struct InstNamer : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + InstNamer() : FunctionPass(&ID) {} + + void getAnalysisUsage(AnalysisUsage &Info) const { + Info.setPreservesAll(); + } + + bool runOnFunction(Function &F) { + for (Function::arg_iterator AI = F.arg_begin(), AE = F.arg_end(); + AI != AE; ++AI) + if (!AI->hasName() && AI->getType() != Type::VoidTy) + AI->setName("tmp"); + + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { + if (!BB->hasName()) + BB->setName("BB"); + + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) + if (!I->hasName() && I->getType() != Type::VoidTy) + I->setName("tmp"); + } + return true; + } + }; + + char InstNamer::ID = 0; + static RegisterPass X("instnamer", + "Assign names to anonymous instructions"); +} + + +const PassInfo *const llvm::InstructionNamerID = &X; +//===----------------------------------------------------------------------===// +// +// InstructionNamer - Give any unnamed non-void instructions "tmp" names. +// +FunctionPass *llvm::createInstructionNamerPass() { + return new InstNamer(); +} diff --git a/lib/Transforms/Utils/LCSSA.cpp b/lib/Transforms/Utils/LCSSA.cpp new file mode 100644 index 000000000000..7d4f3a343e62 --- /dev/null +++ b/lib/Transforms/Utils/LCSSA.cpp @@ -0,0 +1,276 @@ +//===-- LCSSA.cpp - Convert loops into loop-closed SSA form ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass transforms loops by placing phi nodes at the end of the loops for +// all values that are live across the loop boundary. For example, it turns +// the left into the right code: +// +// for (...) for (...) +// if (c) if (c) +// X1 = ... X1 = ... +// else else +// X2 = ... X2 = ... +// X3 = phi(X1, X2) X3 = phi(X1, X2) +// ... = X3 + 4 X4 = phi(X3) +// ... = X4 + 4 +// +// This is still valid LLVM; the extra phi nodes are purely redundant, and will +// be trivially eliminated by InstCombine. The major benefit of this +// transformation is that it makes many other loop optimizations, such as +// LoopUnswitching, simpler. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "lcssa" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Constants.h" +#include "llvm/Pass.h" +#include "llvm/Function.h" +#include "llvm/Instructions.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/PredIteratorCache.h" +#include +#include +using namespace llvm; + +STATISTIC(NumLCSSA, "Number of live out of a loop variables"); + +namespace { + struct VISIBILITY_HIDDEN LCSSA : public LoopPass { + static char ID; // Pass identification, replacement for typeid + LCSSA() : LoopPass(&ID) {} + + // Cached analysis information for the current function. + LoopInfo *LI; + DominatorTree *DT; + std::vector LoopBlocks; + PredIteratorCache PredCache; + + virtual bool runOnLoop(Loop *L, LPPassManager &LPM); + + void ProcessInstruction(Instruction* Instr, + const SmallVector& exitBlocks); + + /// This transformation requires natural loop information & requires that + /// loop preheaders be inserted into the CFG. It maintains both of these, + /// as well as the CFG. It also requires dominator information. + /// + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequiredID(LoopSimplifyID); + AU.addPreservedID(LoopSimplifyID); + AU.addRequired(); + AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); + AU.addPreserved(); + + // Request DominanceFrontier now, even though LCSSA does + // not use it. This allows Pass Manager to schedule Dominance + // Frontier early enough such that one LPPassManager can handle + // multiple loop transformation passes. + AU.addRequired(); + AU.addPreserved(); + } + private: + void getLoopValuesUsedOutsideLoop(Loop *L, + SetVector &AffectedValues, + const SmallVector& exitBlocks); + + Value *GetValueForBlock(DomTreeNode *BB, Instruction *OrigInst, + DenseMap &Phis); + + /// inLoop - returns true if the given block is within the current loop + bool inLoop(BasicBlock* B) { + return std::binary_search(LoopBlocks.begin(), LoopBlocks.end(), B); + } + }; +} + +char LCSSA::ID = 0; +static RegisterPass X("lcssa", "Loop-Closed SSA Form Pass"); + +Pass *llvm::createLCSSAPass() { return new LCSSA(); } +const PassInfo *const llvm::LCSSAID = &X; + +/// runOnFunction - Process all loops in the function, inner-most out. +bool LCSSA::runOnLoop(Loop *L, LPPassManager &LPM) { + PredCache.clear(); + + LI = &LPM.getAnalysis(); + DT = &getAnalysis(); + + // Speed up queries by creating a sorted list of blocks + LoopBlocks.clear(); + LoopBlocks.insert(LoopBlocks.end(), L->block_begin(), L->block_end()); + std::sort(LoopBlocks.begin(), LoopBlocks.end()); + + SmallVector exitBlocks; + L->getExitBlocks(exitBlocks); + + SetVector AffectedValues; + getLoopValuesUsedOutsideLoop(L, AffectedValues, exitBlocks); + + // If no values are affected, we can save a lot of work, since we know that + // nothing will be changed. + if (AffectedValues.empty()) + return false; + + // Iterate over all affected values for this loop and insert Phi nodes + // for them in the appropriate exit blocks + + for (SetVector::iterator I = AffectedValues.begin(), + E = AffectedValues.end(); I != E; ++I) + ProcessInstruction(*I, exitBlocks); + + assert(L->isLCSSAForm()); + + return true; +} + +/// processInstruction - Given a live-out instruction, insert LCSSA Phi nodes, +/// eliminate all out-of-loop uses. +void LCSSA::ProcessInstruction(Instruction *Instr, + const SmallVector& exitBlocks) { + ++NumLCSSA; // We are applying the transformation + + // Keep track of the blocks that have the value available already. + DenseMap Phis; + + DomTreeNode *InstrNode = DT->getNode(Instr->getParent()); + + // Insert the LCSSA phi's into the exit blocks (dominated by the value), and + // add them to the Phi's map. + for (SmallVector::const_iterator BBI = exitBlocks.begin(), + BBE = exitBlocks.end(); BBI != BBE; ++BBI) { + BasicBlock *BB = *BBI; + DomTreeNode *ExitBBNode = DT->getNode(BB); + Value *&Phi = Phis[ExitBBNode]; + if (!Phi && DT->dominates(InstrNode, ExitBBNode)) { + PHINode *PN = PHINode::Create(Instr->getType(), Instr->getName()+".lcssa", + BB->begin()); + PN->reserveOperandSpace(PredCache.GetNumPreds(BB)); + + // Remember that this phi makes the value alive in this block. + Phi = PN; + + // Add inputs from inside the loop for this PHI. + for (BasicBlock** PI = PredCache.GetPreds(BB); *PI; ++PI) + PN->addIncoming(Instr, *PI); + } + } + + + // Record all uses of Instr outside the loop. We need to rewrite these. The + // LCSSA phis won't be included because they use the value in the loop. + for (Value::use_iterator UI = Instr->use_begin(), E = Instr->use_end(); + UI != E;) { + BasicBlock *UserBB = cast(*UI)->getParent(); + if (PHINode *P = dyn_cast(*UI)) { + UserBB = P->getIncomingBlock(UI); + } + + // If the user is in the loop, don't rewrite it! + if (UserBB == Instr->getParent() || inLoop(UserBB)) { + ++UI; + continue; + } + + // Otherwise, patch up uses of the value with the appropriate LCSSA Phi, + // inserting PHI nodes into join points where needed. + Value *Val = GetValueForBlock(DT->getNode(UserBB), Instr, Phis); + + // Preincrement the iterator to avoid invalidating it when we change the + // value. + Use &U = UI.getUse(); + ++UI; + U.set(Val); + } +} + +/// getLoopValuesUsedOutsideLoop - Return any values defined in the loop that +/// are used by instructions outside of it. +void LCSSA::getLoopValuesUsedOutsideLoop(Loop *L, + SetVector &AffectedValues, + const SmallVector& exitBlocks) { + // FIXME: For large loops, we may be able to avoid a lot of use-scanning + // by using dominance information. In particular, if a block does not + // dominate any of the loop exits, then none of the values defined in the + // block could be used outside the loop. + for (Loop::block_iterator BB = L->block_begin(), BE = L->block_end(); + BB != BE; ++BB) { + for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end(); I != E; ++I) + for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); UI != UE; + ++UI) { + BasicBlock *UserBB = cast(*UI)->getParent(); + if (PHINode* p = dyn_cast(*UI)) { + UserBB = p->getIncomingBlock(UI); + } + + if (*BB != UserBB && !inLoop(UserBB)) { + AffectedValues.insert(I); + break; + } + } + } +} + +/// GetValueForBlock - Get the value to use within the specified basic block. +/// available values are in Phis. +Value *LCSSA::GetValueForBlock(DomTreeNode *BB, Instruction *OrigInst, + DenseMap &Phis) { + // If there is no dominator info for this BB, it is unreachable. + if (BB == 0) + return UndefValue::get(OrigInst->getType()); + + // If we have already computed this value, return the previously computed val. + if (Phis.count(BB)) return Phis[BB]; + + DomTreeNode *IDom = BB->getIDom(); + + // Otherwise, there are two cases: we either have to insert a PHI node or we + // don't. We need to insert a PHI node if this block is not dominated by one + // of the exit nodes from the loop (the loop could have multiple exits, and + // though the value defined *inside* the loop dominated all its uses, each + // exit by itself may not dominate all the uses). + // + // The simplest way to check for this condition is by checking to see if the + // idom is in the loop. If so, we *know* that none of the exit blocks + // dominate this block. Note that we *know* that the block defining the + // original instruction is in the idom chain, because if it weren't, then the + // original value didn't dominate this use. + if (!inLoop(IDom->getBlock())) { + // Idom is not in the loop, we must still be "below" the exit block and must + // be fully dominated by the value live in the idom. + Value* val = GetValueForBlock(IDom, OrigInst, Phis); + Phis.insert(std::make_pair(BB, val)); + return val; + } + + BasicBlock *BBN = BB->getBlock(); + + // Otherwise, the idom is the loop, so we need to insert a PHI node. Do so + // now, then get values to fill in the incoming values for the PHI. + PHINode *PN = PHINode::Create(OrigInst->getType(), + OrigInst->getName() + ".lcssa", BBN->begin()); + PN->reserveOperandSpace(PredCache.GetNumPreds(BBN)); + Phis.insert(std::make_pair(BB, PN)); + + // Fill in the incoming values for the block. + for (BasicBlock** PI = PredCache.GetPreds(BBN); *PI; ++PI) + PN->addIncoming(GetValueForBlock(DT->getNode(*PI), OrigInst, Phis), *PI); + return PN; +} + diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp new file mode 100644 index 000000000000..94483b816e3b --- /dev/null +++ b/lib/Transforms/Utils/Local.cpp @@ -0,0 +1,338 @@ +//===-- Local.cpp - Functions to perform local transformations ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This family of functions perform various local transformations to the +// program. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Constants.h" +#include "llvm/GlobalVariable.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Instructions.h" +#include "llvm/Intrinsics.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/DebugInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Support/GetElementPtrTypeIterator.h" +#include "llvm/Support/MathExtras.h" +using namespace llvm; + +//===----------------------------------------------------------------------===// +// Local constant propagation. +// + +// ConstantFoldTerminator - If a terminator instruction is predicated on a +// constant value, convert it into an unconditional branch to the constant +// destination. +// +bool llvm::ConstantFoldTerminator(BasicBlock *BB) { + TerminatorInst *T = BB->getTerminator(); + + // Branch - See if we are conditional jumping on constant + if (BranchInst *BI = dyn_cast(T)) { + if (BI->isUnconditional()) return false; // Can't optimize uncond branch + BasicBlock *Dest1 = BI->getSuccessor(0); + BasicBlock *Dest2 = BI->getSuccessor(1); + + if (ConstantInt *Cond = dyn_cast(BI->getCondition())) { + // Are we branching on constant? + // YES. Change to unconditional branch... + BasicBlock *Destination = Cond->getZExtValue() ? Dest1 : Dest2; + BasicBlock *OldDest = Cond->getZExtValue() ? Dest2 : Dest1; + + //cerr << "Function: " << T->getParent()->getParent() + // << "\nRemoving branch from " << T->getParent() + // << "\n\nTo: " << OldDest << endl; + + // Let the basic block know that we are letting go of it. Based on this, + // it will adjust it's PHI nodes. + assert(BI->getParent() && "Terminator not inserted in block!"); + OldDest->removePredecessor(BI->getParent()); + + // Set the unconditional destination, and change the insn to be an + // unconditional branch. + BI->setUnconditionalDest(Destination); + return true; + } else if (Dest2 == Dest1) { // Conditional branch to same location? + // This branch matches something like this: + // br bool %cond, label %Dest, label %Dest + // and changes it into: br label %Dest + + // Let the basic block know that we are letting go of one copy of it. + assert(BI->getParent() && "Terminator not inserted in block!"); + Dest1->removePredecessor(BI->getParent()); + + // Change a conditional branch to unconditional. + BI->setUnconditionalDest(Dest1); + return true; + } + } else if (SwitchInst *SI = dyn_cast(T)) { + // If we are switching on a constant, we can convert the switch into a + // single branch instruction! + ConstantInt *CI = dyn_cast(SI->getCondition()); + BasicBlock *TheOnlyDest = SI->getSuccessor(0); // The default dest + BasicBlock *DefaultDest = TheOnlyDest; + assert(TheOnlyDest == SI->getDefaultDest() && + "Default destination is not successor #0?"); + + // Figure out which case it goes to... + for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i) { + // Found case matching a constant operand? + if (SI->getSuccessorValue(i) == CI) { + TheOnlyDest = SI->getSuccessor(i); + break; + } + + // Check to see if this branch is going to the same place as the default + // dest. If so, eliminate it as an explicit compare. + if (SI->getSuccessor(i) == DefaultDest) { + // Remove this entry... + DefaultDest->removePredecessor(SI->getParent()); + SI->removeCase(i); + --i; --e; // Don't skip an entry... + continue; + } + + // Otherwise, check to see if the switch only branches to one destination. + // We do this by reseting "TheOnlyDest" to null when we find two non-equal + // destinations. + if (SI->getSuccessor(i) != TheOnlyDest) TheOnlyDest = 0; + } + + if (CI && !TheOnlyDest) { + // Branching on a constant, but not any of the cases, go to the default + // successor. + TheOnlyDest = SI->getDefaultDest(); + } + + // If we found a single destination that we can fold the switch into, do so + // now. + if (TheOnlyDest) { + // Insert the new branch.. + BranchInst::Create(TheOnlyDest, SI); + BasicBlock *BB = SI->getParent(); + + // Remove entries from PHI nodes which we no longer branch to... + for (unsigned i = 0, e = SI->getNumSuccessors(); i != e; ++i) { + // Found case matching a constant operand? + BasicBlock *Succ = SI->getSuccessor(i); + if (Succ == TheOnlyDest) + TheOnlyDest = 0; // Don't modify the first branch to TheOnlyDest + else + Succ->removePredecessor(BB); + } + + // Delete the old switch... + BB->getInstList().erase(SI); + return true; + } else if (SI->getNumSuccessors() == 2) { + // Otherwise, we can fold this switch into a conditional branch + // instruction if it has only one non-default destination. + Value *Cond = new ICmpInst(ICmpInst::ICMP_EQ, SI->getCondition(), + SI->getSuccessorValue(1), "cond", SI); + // Insert the new branch... + BranchInst::Create(SI->getSuccessor(1), SI->getSuccessor(0), Cond, SI); + + // Delete the old switch... + SI->eraseFromParent(); + return true; + } + } + return false; +} + + +//===----------------------------------------------------------------------===// +// Local dead code elimination... +// + +/// isInstructionTriviallyDead - Return true if the result produced by the +/// instruction is not used, and the instruction has no side effects. +/// +bool llvm::isInstructionTriviallyDead(Instruction *I) { + if (!I->use_empty() || isa(I)) return false; + + // We don't want debug info removed by anything this general. + if (isa(I)) return false; + + if (!I->mayHaveSideEffects()) return true; + + // Special case intrinsics that "may have side effects" but can be deleted + // when dead. + if (IntrinsicInst *II = dyn_cast(I)) + // Safe to delete llvm.stacksave if dead. + if (II->getIntrinsicID() == Intrinsic::stacksave) + return true; + return false; +} + +/// RecursivelyDeleteTriviallyDeadInstructions - If the specified value is a +/// trivially dead instruction, delete it. If that makes any of its operands +/// trivially dead, delete them too, recursively. +void llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V) { + Instruction *I = dyn_cast(V); + if (!I || !I->use_empty() || !isInstructionTriviallyDead(I)) + return; + + SmallVector DeadInsts; + DeadInsts.push_back(I); + + while (!DeadInsts.empty()) { + I = DeadInsts.pop_back_val(); + + // Null out all of the instruction's operands to see if any operand becomes + // dead as we go. + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { + Value *OpV = I->getOperand(i); + I->setOperand(i, 0); + + if (!OpV->use_empty()) continue; + + // If the operand is an instruction that became dead as we nulled out the + // operand, and if it is 'trivially' dead, delete it in a future loop + // iteration. + if (Instruction *OpI = dyn_cast(OpV)) + if (isInstructionTriviallyDead(OpI)) + DeadInsts.push_back(OpI); + } + + I->eraseFromParent(); + } +} + +/// RecursivelyDeleteDeadPHINode - If the specified value is an effectively +/// dead PHI node, due to being a def-use chain of single-use nodes that +/// either forms a cycle or is terminated by a trivially dead instruction, +/// delete it. If that makes any of its operands trivially dead, delete them +/// too, recursively. +void +llvm::RecursivelyDeleteDeadPHINode(PHINode *PN) { + + // We can remove a PHI if it is on a cycle in the def-use graph + // where each node in the cycle has degree one, i.e. only one use, + // and is an instruction with no side effects. + if (!PN->hasOneUse()) + return; + + SmallPtrSet PHIs; + PHIs.insert(PN); + for (Instruction *J = cast(*PN->use_begin()); + J->hasOneUse() && !J->mayHaveSideEffects(); + J = cast(*J->use_begin())) + // If we find a PHI more than once, we're on a cycle that + // won't prove fruitful. + if (PHINode *JP = dyn_cast(J)) + if (!PHIs.insert(cast(JP))) { + // Break the cycle and delete the PHI and its operands. + JP->replaceAllUsesWith(UndefValue::get(JP->getType())); + RecursivelyDeleteTriviallyDeadInstructions(JP); + break; + } +} + +//===----------------------------------------------------------------------===// +// Control Flow Graph Restructuring... +// + +/// MergeBasicBlockIntoOnlyPred - DestBB is a block with one predecessor and its +/// predecessor is known to have one successor (DestBB!). Eliminate the edge +/// between them, moving the instructions in the predecessor into DestBB and +/// deleting the predecessor block. +/// +void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB) { + // If BB has single-entry PHI nodes, fold them. + while (PHINode *PN = dyn_cast(DestBB->begin())) { + Value *NewVal = PN->getIncomingValue(0); + // Replace self referencing PHI with undef, it must be dead. + if (NewVal == PN) NewVal = UndefValue::get(PN->getType()); + PN->replaceAllUsesWith(NewVal); + PN->eraseFromParent(); + } + + BasicBlock *PredBB = DestBB->getSinglePredecessor(); + assert(PredBB && "Block doesn't have a single predecessor!"); + + // Splice all the instructions from PredBB to DestBB. + PredBB->getTerminator()->eraseFromParent(); + DestBB->getInstList().splice(DestBB->begin(), PredBB->getInstList()); + + // Anything that branched to PredBB now branches to DestBB. + PredBB->replaceAllUsesWith(DestBB); + + // Nuke BB. + PredBB->eraseFromParent(); +} + +/// OnlyUsedByDbgIntrinsics - Return true if the instruction I is only used +/// by DbgIntrinsics. If DbgInUses is specified then the vector is filled +/// with the DbgInfoIntrinsic that use the instruction I. +bool llvm::OnlyUsedByDbgInfoIntrinsics(Instruction *I, + SmallVectorImpl *DbgInUses) { + if (DbgInUses) + DbgInUses->clear(); + + for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); UI != UE; + ++UI) { + if (DbgInfoIntrinsic *DI = dyn_cast(*UI)) { + if (DbgInUses) + DbgInUses->push_back(DI); + } else { + if (DbgInUses) + DbgInUses->clear(); + return false; + } + } + return true; +} + +/// UserIsDebugInfo - Return true if U is a constant expr used by +/// llvm.dbg.variable or llvm.dbg.global_variable +bool llvm::UserIsDebugInfo(User *U) { + ConstantExpr *CE = dyn_cast(U); + + if (!CE || CE->getNumUses() != 1) + return false; + + Constant *Init = dyn_cast(CE->use_back()); + if (!Init || Init->getNumUses() != 1) + return false; + + GlobalVariable *GV = dyn_cast(Init->use_back()); + if (!GV || !GV->hasInitializer() || GV->getInitializer() != Init) + return false; + + DIVariable DV(GV); + if (!DV.isNull()) + return true; // User is llvm.dbg.variable + + DIGlobalVariable DGV(GV); + if (!DGV.isNull()) + return true; // User is llvm.dbg.global_variable + + return false; +} + +/// RemoveDbgInfoUser - Remove an User which is representing debug info. +void llvm::RemoveDbgInfoUser(User *U) { + assert (UserIsDebugInfo(U) && "Unexpected User!"); + ConstantExpr *CE = cast(U); + while (!CE->use_empty()) { + Constant *C = cast(CE->use_back()); + while (!C->use_empty()) { + GlobalVariable *GV = cast(C->use_back()); + GV->eraseFromParent(); + } + C->destroyConstant(); + } + CE->destroyConstant(); +} diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp new file mode 100644 index 000000000000..03d273d25d79 --- /dev/null +++ b/lib/Transforms/Utils/LoopSimplify.cpp @@ -0,0 +1,600 @@ +//===- LoopSimplify.cpp - Loop Canonicalization Pass ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass performs several transformations to transform natural loops into a +// simpler form, which makes subsequent analyses and transformations simpler and +// more effective. +// +// Loop pre-header insertion guarantees that there is a single, non-critical +// entry edge from outside of the loop to the loop header. This simplifies a +// number of analyses and transformations, such as LICM. +// +// Loop exit-block insertion guarantees that all exit blocks from the loop +// (blocks which are outside of the loop that have predecessors inside of the +// loop) only have predecessors from inside of the loop (and are thus dominated +// by the loop header). This simplifies transformations such as store-sinking +// that are built into LICM. +// +// This pass also guarantees that loops will have exactly one backedge. +// +// Note that the simplifycfg pass will clean up blocks which are split out but +// end up being unnecessary, so usage of this pass should not pessimize +// generated code. +// +// This pass obviously modifies the CFG, but updates loop information and +// dominator information. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "loopsimplify" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Constants.h" +#include "llvm/Instructions.h" +#include "llvm/Function.h" +#include "llvm/Type.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Compiler.h" +#include "llvm/ADT/SetOperations.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/DepthFirstIterator.h" +using namespace llvm; + +STATISTIC(NumInserted, "Number of pre-header or exit blocks inserted"); +STATISTIC(NumNested , "Number of nested loops split out"); + +namespace { + struct VISIBILITY_HIDDEN LoopSimplify : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + LoopSimplify() : FunctionPass(&ID) {} + + // AA - If we have an alias analysis object to update, this is it, otherwise + // this is null. + AliasAnalysis *AA; + LoopInfo *LI; + DominatorTree *DT; + virtual bool runOnFunction(Function &F); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + // We need loop information to identify the loops... + AU.addRequired(); + AU.addRequired(); + + AU.addPreserved(); + AU.addPreserved(); + AU.addPreserved(); + AU.addPreserved(); + AU.addPreservedID(BreakCriticalEdgesID); // No critical edges added. + } + + /// verifyAnalysis() - Verify loop nest. + void verifyAnalysis() const { +#ifndef NDEBUG + LoopInfo *NLI = &getAnalysis(); + for (LoopInfo::iterator I = NLI->begin(), E = NLI->end(); I != E; ++I) + (*I)->verifyLoop(); +#endif + } + + private: + bool ProcessLoop(Loop *L); + BasicBlock *RewriteLoopExitBlock(Loop *L, BasicBlock *Exit); + void InsertPreheaderForLoop(Loop *L); + Loop *SeparateNestedLoop(Loop *L); + void InsertUniqueBackedgeBlock(Loop *L); + void PlaceSplitBlockCarefully(BasicBlock *NewBB, + SmallVectorImpl &SplitPreds, + Loop *L); + }; +} + +char LoopSimplify::ID = 0; +static RegisterPass +X("loopsimplify", "Canonicalize natural loops", true); + +// Publically exposed interface to pass... +const PassInfo *const llvm::LoopSimplifyID = &X; +FunctionPass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); } + +/// runOnFunction - Run down all loops in the CFG (recursively, but we could do +/// it in any convenient order) inserting preheaders... +/// +bool LoopSimplify::runOnFunction(Function &F) { + bool Changed = false; + LI = &getAnalysis(); + AA = getAnalysisIfAvailable(); + DT = &getAnalysis(); + + // Check to see that no blocks (other than the header) in loops have + // predecessors that are not in loops. This is not valid for natural loops, + // but can occur if the blocks are unreachable. Since they are unreachable we + // can just shamelessly destroy their terminators to make them not branch into + // the loop! + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { + // This case can only occur for unreachable blocks. Blocks that are + // unreachable can't be in loops, so filter those blocks out. + if (LI->getLoopFor(BB)) continue; + + bool BlockUnreachable = false; + TerminatorInst *TI = BB->getTerminator(); + + // Check to see if any successors of this block are non-loop-header loops + // that are not the header. + for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) { + // If this successor is not in a loop, BB is clearly ok. + Loop *L = LI->getLoopFor(TI->getSuccessor(i)); + if (!L) continue; + + // If the succ is the loop header, and if L is a top-level loop, then this + // is an entrance into a loop through the header, which is also ok. + if (L->getHeader() == TI->getSuccessor(i) && L->getParentLoop() == 0) + continue; + + // Otherwise, this is an entrance into a loop from some place invalid. + // Either the loop structure is invalid and this is not a natural loop (in + // which case the compiler is buggy somewhere else) or BB is unreachable. + BlockUnreachable = true; + break; + } + + // If this block is ok, check the next one. + if (!BlockUnreachable) continue; + + // Otherwise, this block is dead. To clean up the CFG and to allow later + // loop transformations to ignore this case, we delete the edges into the + // loop by replacing the terminator. + + // Remove PHI entries from the successors. + for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) + TI->getSuccessor(i)->removePredecessor(BB); + + // Add a new unreachable instruction before the old terminator. + new UnreachableInst(TI); + + // Delete the dead terminator. + if (AA) AA->deleteValue(TI); + if (!TI->use_empty()) + TI->replaceAllUsesWith(UndefValue::get(TI->getType())); + TI->eraseFromParent(); + Changed |= true; + } + + for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) + Changed |= ProcessLoop(*I); + + return Changed; +} + +/// ProcessLoop - Walk the loop structure in depth first order, ensuring that +/// all loops have preheaders. +/// +bool LoopSimplify::ProcessLoop(Loop *L) { + bool Changed = false; +ReprocessLoop: + + // Canonicalize inner loops before outer loops. Inner loop canonicalization + // can provide work for the outer loop to canonicalize. + for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) + Changed |= ProcessLoop(*I); + + assert(L->getBlocks()[0] == L->getHeader() && + "Header isn't first block in loop?"); + + // Does the loop already have a preheader? If so, don't insert one. + if (L->getLoopPreheader() == 0) { + InsertPreheaderForLoop(L); + NumInserted++; + Changed = true; + } + + // Next, check to make sure that all exit nodes of the loop only have + // predecessors that are inside of the loop. This check guarantees that the + // loop preheader/header will dominate the exit blocks. If the exit block has + // predecessors from outside of the loop, split the edge now. + SmallVector ExitBlocks; + L->getExitBlocks(ExitBlocks); + + SetVector ExitBlockSet(ExitBlocks.begin(), ExitBlocks.end()); + for (SetVector::iterator I = ExitBlockSet.begin(), + E = ExitBlockSet.end(); I != E; ++I) { + BasicBlock *ExitBlock = *I; + for (pred_iterator PI = pred_begin(ExitBlock), PE = pred_end(ExitBlock); + PI != PE; ++PI) + // Must be exactly this loop: no subloops, parent loops, or non-loop preds + // allowed. + if (!L->contains(*PI)) { + RewriteLoopExitBlock(L, ExitBlock); + NumInserted++; + Changed = true; + break; + } + } + + // If the header has more than two predecessors at this point (from the + // preheader and from multiple backedges), we must adjust the loop. + unsigned NumBackedges = L->getNumBackEdges(); + if (NumBackedges != 1) { + // If this is really a nested loop, rip it out into a child loop. Don't do + // this for loops with a giant number of backedges, just factor them into a + // common backedge instead. + if (NumBackedges < 8) { + if (Loop *NL = SeparateNestedLoop(L)) { + ++NumNested; + // This is a big restructuring change, reprocess the whole loop. + ProcessLoop(NL); + Changed = true; + // GCC doesn't tail recursion eliminate this. + goto ReprocessLoop; + } + } + + // If we either couldn't, or didn't want to, identify nesting of the loops, + // insert a new block that all backedges target, then make it jump to the + // loop header. + InsertUniqueBackedgeBlock(L); + NumInserted++; + Changed = true; + } + + // Scan over the PHI nodes in the loop header. Since they now have only two + // incoming values (the loop is canonicalized), we may have simplified the PHI + // down to 'X = phi [X, Y]', which should be replaced with 'Y'. + PHINode *PN; + for (BasicBlock::iterator I = L->getHeader()->begin(); + (PN = dyn_cast(I++)); ) + if (Value *V = PN->hasConstantValue()) { + if (AA) AA->deleteValue(PN); + PN->replaceAllUsesWith(V); + PN->eraseFromParent(); + } + + return Changed; +} + +/// InsertPreheaderForLoop - Once we discover that a loop doesn't have a +/// preheader, this method is called to insert one. This method has two phases: +/// preheader insertion and analysis updating. +/// +void LoopSimplify::InsertPreheaderForLoop(Loop *L) { + BasicBlock *Header = L->getHeader(); + + // Compute the set of predecessors of the loop that are not in the loop. + SmallVector OutsideBlocks; + for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header); + PI != PE; ++PI) + if (!L->contains(*PI)) // Coming in from outside the loop? + OutsideBlocks.push_back(*PI); // Keep track of it... + + // Split out the loop pre-header. + BasicBlock *NewBB = + SplitBlockPredecessors(Header, &OutsideBlocks[0], OutsideBlocks.size(), + ".preheader", this); + + + //===--------------------------------------------------------------------===// + // Update analysis results now that we have performed the transformation + // + + // We know that we have loop information to update... update it now. + if (Loop *Parent = L->getParentLoop()) + Parent->addBasicBlockToLoop(NewBB, LI->getBase()); + + // Make sure that NewBB is put someplace intelligent, which doesn't mess up + // code layout too horribly. + PlaceSplitBlockCarefully(NewBB, OutsideBlocks, L); +} + +/// RewriteLoopExitBlock - Ensure that the loop preheader dominates all exit +/// blocks. This method is used to split exit blocks that have predecessors +/// outside of the loop. +BasicBlock *LoopSimplify::RewriteLoopExitBlock(Loop *L, BasicBlock *Exit) { + SmallVector LoopBlocks; + for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit); I != E; ++I) + if (L->contains(*I)) + LoopBlocks.push_back(*I); + + assert(!LoopBlocks.empty() && "No edges coming in from outside the loop?"); + BasicBlock *NewBB = SplitBlockPredecessors(Exit, &LoopBlocks[0], + LoopBlocks.size(), ".loopexit", + this); + + // Update Loop Information - we know that the new block will be in whichever + // loop the Exit block is in. Note that it may not be in that immediate loop, + // if the successor is some other loop header. In that case, we continue + // walking up the loop tree to find a loop that contains both the successor + // block and the predecessor block. + Loop *SuccLoop = LI->getLoopFor(Exit); + while (SuccLoop && !SuccLoop->contains(L->getHeader())) + SuccLoop = SuccLoop->getParentLoop(); + if (SuccLoop) + SuccLoop->addBasicBlockToLoop(NewBB, LI->getBase()); + + return NewBB; +} + +/// AddBlockAndPredsToSet - Add the specified block, and all of its +/// predecessors, to the specified set, if it's not already in there. Stop +/// predecessor traversal when we reach StopBlock. +static void AddBlockAndPredsToSet(BasicBlock *InputBB, BasicBlock *StopBlock, + std::set &Blocks) { + std::vector WorkList; + WorkList.push_back(InputBB); + do { + BasicBlock *BB = WorkList.back(); WorkList.pop_back(); + if (Blocks.insert(BB).second && BB != StopBlock) + // If BB is not already processed and it is not a stop block then + // insert its predecessor in the work list + for (pred_iterator I = pred_begin(BB), E = pred_end(BB); I != E; ++I) { + BasicBlock *WBB = *I; + WorkList.push_back(WBB); + } + } while(!WorkList.empty()); +} + +/// FindPHIToPartitionLoops - The first part of loop-nestification is to find a +/// PHI node that tells us how to partition the loops. +static PHINode *FindPHIToPartitionLoops(Loop *L, DominatorTree *DT, + AliasAnalysis *AA) { + for (BasicBlock::iterator I = L->getHeader()->begin(); isa(I); ) { + PHINode *PN = cast(I); + ++I; + if (Value *V = PN->hasConstantValue()) + if (!isa(V) || DT->dominates(cast(V), PN)) { + // This is a degenerate PHI already, don't modify it! + PN->replaceAllUsesWith(V); + if (AA) AA->deleteValue(PN); + PN->eraseFromParent(); + continue; + } + + // Scan this PHI node looking for a use of the PHI node by itself. + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (PN->getIncomingValue(i) == PN && + L->contains(PN->getIncomingBlock(i))) + // We found something tasty to remove. + return PN; + } + return 0; +} + +// PlaceSplitBlockCarefully - If the block isn't already, move the new block to +// right after some 'outside block' block. This prevents the preheader from +// being placed inside the loop body, e.g. when the loop hasn't been rotated. +void LoopSimplify::PlaceSplitBlockCarefully(BasicBlock *NewBB, + SmallVectorImpl &SplitPreds, + Loop *L) { + // Check to see if NewBB is already well placed. + Function::iterator BBI = NewBB; --BBI; + for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) { + if (&*BBI == SplitPreds[i]) + return; + } + + // If it isn't already after an outside block, move it after one. This is + // always good as it makes the uncond branch from the outside block into a + // fall-through. + + // Figure out *which* outside block to put this after. Prefer an outside + // block that neighbors a BB actually in the loop. + BasicBlock *FoundBB = 0; + for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) { + Function::iterator BBI = SplitPreds[i]; + if (++BBI != NewBB->getParent()->end() && + L->contains(BBI)) { + FoundBB = SplitPreds[i]; + break; + } + } + + // If our heuristic for a *good* bb to place this after doesn't find + // anything, just pick something. It's likely better than leaving it within + // the loop. + if (!FoundBB) + FoundBB = SplitPreds[0]; + NewBB->moveAfter(FoundBB); +} + + +/// SeparateNestedLoop - If this loop has multiple backedges, try to pull one of +/// them out into a nested loop. This is important for code that looks like +/// this: +/// +/// Loop: +/// ... +/// br cond, Loop, Next +/// ... +/// br cond2, Loop, Out +/// +/// To identify this common case, we look at the PHI nodes in the header of the +/// loop. PHI nodes with unchanging values on one backedge correspond to values +/// that change in the "outer" loop, but not in the "inner" loop. +/// +/// If we are able to separate out a loop, return the new outer loop that was +/// created. +/// +Loop *LoopSimplify::SeparateNestedLoop(Loop *L) { + PHINode *PN = FindPHIToPartitionLoops(L, DT, AA); + if (PN == 0) return 0; // No known way to partition. + + // Pull out all predecessors that have varying values in the loop. This + // handles the case when a PHI node has multiple instances of itself as + // arguments. + SmallVector OuterLoopPreds; + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (PN->getIncomingValue(i) != PN || + !L->contains(PN->getIncomingBlock(i))) + OuterLoopPreds.push_back(PN->getIncomingBlock(i)); + + BasicBlock *Header = L->getHeader(); + BasicBlock *NewBB = SplitBlockPredecessors(Header, &OuterLoopPreds[0], + OuterLoopPreds.size(), + ".outer", this); + + // Make sure that NewBB is put someplace intelligent, which doesn't mess up + // code layout too horribly. + PlaceSplitBlockCarefully(NewBB, OuterLoopPreds, L); + + // Create the new outer loop. + Loop *NewOuter = new Loop(); + + // Change the parent loop to use the outer loop as its child now. + if (Loop *Parent = L->getParentLoop()) + Parent->replaceChildLoopWith(L, NewOuter); + else + LI->changeTopLevelLoop(L, NewOuter); + + // This block is going to be our new header block: add it to this loop and all + // parent loops. + NewOuter->addBasicBlockToLoop(NewBB, LI->getBase()); + + // L is now a subloop of our outer loop. + NewOuter->addChildLoop(L); + + for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); + I != E; ++I) + NewOuter->addBlockEntry(*I); + + // Determine which blocks should stay in L and which should be moved out to + // the Outer loop now. + std::set BlocksInL; + for (pred_iterator PI = pred_begin(Header), E = pred_end(Header); PI!=E; ++PI) + if (DT->dominates(Header, *PI)) + AddBlockAndPredsToSet(*PI, Header, BlocksInL); + + + // Scan all of the loop children of L, moving them to OuterLoop if they are + // not part of the inner loop. + const std::vector &SubLoops = L->getSubLoops(); + for (size_t I = 0; I != SubLoops.size(); ) + if (BlocksInL.count(SubLoops[I]->getHeader())) + ++I; // Loop remains in L + else + NewOuter->addChildLoop(L->removeChildLoop(SubLoops.begin() + I)); + + // Now that we know which blocks are in L and which need to be moved to + // OuterLoop, move any blocks that need it. + for (unsigned i = 0; i != L->getBlocks().size(); ++i) { + BasicBlock *BB = L->getBlocks()[i]; + if (!BlocksInL.count(BB)) { + // Move this block to the parent, updating the exit blocks sets + L->removeBlockFromLoop(BB); + if ((*LI)[BB] == L) + LI->changeLoopFor(BB, NewOuter); + --i; + } + } + + return NewOuter; +} + + + +/// InsertUniqueBackedgeBlock - This method is called when the specified loop +/// has more than one backedge in it. If this occurs, revector all of these +/// backedges to target a new basic block and have that block branch to the loop +/// header. This ensures that loops have exactly one backedge. +/// +void LoopSimplify::InsertUniqueBackedgeBlock(Loop *L) { + assert(L->getNumBackEdges() > 1 && "Must have > 1 backedge!"); + + // Get information about the loop + BasicBlock *Preheader = L->getLoopPreheader(); + BasicBlock *Header = L->getHeader(); + Function *F = Header->getParent(); + + // Figure out which basic blocks contain back-edges to the loop header. + std::vector BackedgeBlocks; + for (pred_iterator I = pred_begin(Header), E = pred_end(Header); I != E; ++I) + if (*I != Preheader) BackedgeBlocks.push_back(*I); + + // Create and insert the new backedge block... + BasicBlock *BEBlock = BasicBlock::Create(Header->getName()+".backedge", F); + BranchInst *BETerminator = BranchInst::Create(Header, BEBlock); + + // Move the new backedge block to right after the last backedge block. + Function::iterator InsertPos = BackedgeBlocks.back(); ++InsertPos; + F->getBasicBlockList().splice(InsertPos, F->getBasicBlockList(), BEBlock); + + // Now that the block has been inserted into the function, create PHI nodes in + // the backedge block which correspond to any PHI nodes in the header block. + for (BasicBlock::iterator I = Header->begin(); isa(I); ++I) { + PHINode *PN = cast(I); + PHINode *NewPN = PHINode::Create(PN->getType(), PN->getName()+".be", + BETerminator); + NewPN->reserveOperandSpace(BackedgeBlocks.size()); + if (AA) AA->copyValue(PN, NewPN); + + // Loop over the PHI node, moving all entries except the one for the + // preheader over to the new PHI node. + unsigned PreheaderIdx = ~0U; + bool HasUniqueIncomingValue = true; + Value *UniqueValue = 0; + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + BasicBlock *IBB = PN->getIncomingBlock(i); + Value *IV = PN->getIncomingValue(i); + if (IBB == Preheader) { + PreheaderIdx = i; + } else { + NewPN->addIncoming(IV, IBB); + if (HasUniqueIncomingValue) { + if (UniqueValue == 0) + UniqueValue = IV; + else if (UniqueValue != IV) + HasUniqueIncomingValue = false; + } + } + } + + // Delete all of the incoming values from the old PN except the preheader's + assert(PreheaderIdx != ~0U && "PHI has no preheader entry??"); + if (PreheaderIdx != 0) { + PN->setIncomingValue(0, PN->getIncomingValue(PreheaderIdx)); + PN->setIncomingBlock(0, PN->getIncomingBlock(PreheaderIdx)); + } + // Nuke all entries except the zero'th. + for (unsigned i = 0, e = PN->getNumIncomingValues()-1; i != e; ++i) + PN->removeIncomingValue(e-i, false); + + // Finally, add the newly constructed PHI node as the entry for the BEBlock. + PN->addIncoming(NewPN, BEBlock); + + // As an optimization, if all incoming values in the new PhiNode (which is a + // subset of the incoming values of the old PHI node) have the same value, + // eliminate the PHI Node. + if (HasUniqueIncomingValue) { + NewPN->replaceAllUsesWith(UniqueValue); + if (AA) AA->deleteValue(NewPN); + BEBlock->getInstList().erase(NewPN); + } + } + + // Now that all of the PHI nodes have been inserted and adjusted, modify the + // backedge blocks to just to the BEBlock instead of the header. + for (unsigned i = 0, e = BackedgeBlocks.size(); i != e; ++i) { + TerminatorInst *TI = BackedgeBlocks[i]->getTerminator(); + for (unsigned Op = 0, e = TI->getNumSuccessors(); Op != e; ++Op) + if (TI->getSuccessor(Op) == Header) + TI->setSuccessor(Op, BEBlock); + } + + //===--- Update all analyses which we must preserve now -----------------===// + + // Update Loop Information - we know that this block is now in the current + // loop and all parent loops. + L->addBasicBlockToLoop(BEBlock, LI->getBase()); + + // Update dominator information + DT->splitBlock(BEBlock); + if (DominanceFrontier *DF = getAnalysisIfAvailable()) + DF->splitBlock(BEBlock); +} diff --git a/lib/Transforms/Utils/LowerAllocations.cpp b/lib/Transforms/Utils/LowerAllocations.cpp new file mode 100644 index 000000000000..32498958e34f --- /dev/null +++ b/lib/Transforms/Utils/LowerAllocations.cpp @@ -0,0 +1,177 @@ +//===- LowerAllocations.cpp - Reduce malloc & free insts to calls ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The LowerAllocations transformation is a target-dependent tranformation +// because it depends on the size of data types and alignment constraints. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "lowerallocs" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h" +#include "llvm/Module.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Instructions.h" +#include "llvm/Constants.h" +#include "llvm/Pass.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Support/Compiler.h" +using namespace llvm; + +STATISTIC(NumLowered, "Number of allocations lowered"); + +namespace { + /// LowerAllocations - Turn malloc and free instructions into %malloc and + /// %free calls. + /// + class VISIBILITY_HIDDEN LowerAllocations : public BasicBlockPass { + Constant *MallocFunc; // Functions in the module we are processing + Constant *FreeFunc; // Initialized by doInitialization + bool LowerMallocArgToInteger; + public: + static char ID; // Pass ID, replacement for typeid + explicit LowerAllocations(bool LowerToInt = false) + : BasicBlockPass(&ID), MallocFunc(0), FreeFunc(0), + LowerMallocArgToInteger(LowerToInt) {} + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + AU.setPreservesCFG(); + + // This is a cluster of orthogonal Transforms: + AU.addPreserved(); + AU.addPreservedID(PromoteMemoryToRegisterID); + AU.addPreservedID(LowerSwitchID); + AU.addPreservedID(LowerInvokePassID); + } + + /// doPassInitialization - For the lower allocations pass, this ensures that + /// a module contains a declaration for a malloc and a free function. + /// + bool doInitialization(Module &M); + + virtual bool doInitialization(Function &F) { + return doInitialization(*F.getParent()); + } + + /// runOnBasicBlock - This method does the actual work of converting + /// instructions over, assuming that the pass has already been initialized. + /// + bool runOnBasicBlock(BasicBlock &BB); + }; +} + +char LowerAllocations::ID = 0; +static RegisterPass +X("lowerallocs", "Lower allocations from instructions to calls"); + +// Publically exposed interface to pass... +const PassInfo *const llvm::LowerAllocationsID = &X; +// createLowerAllocationsPass - Interface to this file... +Pass *llvm::createLowerAllocationsPass(bool LowerMallocArgToInteger) { + return new LowerAllocations(LowerMallocArgToInteger); +} + + +// doInitialization - For the lower allocations pass, this ensures that a +// module contains a declaration for a malloc and a free function. +// +// This function is always successful. +// +bool LowerAllocations::doInitialization(Module &M) { + const Type *BPTy = PointerType::getUnqual(Type::Int8Ty); + // Prototype malloc as "char* malloc(...)", because we don't know in + // doInitialization whether size_t is int or long. + FunctionType *FT = FunctionType::get(BPTy, std::vector(), true); + MallocFunc = M.getOrInsertFunction("malloc", FT); + FreeFunc = M.getOrInsertFunction("free" , Type::VoidTy, BPTy, (Type *)0); + return true; +} + +// runOnBasicBlock - This method does the actual work of converting +// instructions over, assuming that the pass has already been initialized. +// +bool LowerAllocations::runOnBasicBlock(BasicBlock &BB) { + bool Changed = false; + assert(MallocFunc && FreeFunc && "Pass not initialized!"); + + BasicBlock::InstListType &BBIL = BB.getInstList(); + + const TargetData &TD = getAnalysis(); + const Type *IntPtrTy = TD.getIntPtrType(); + + // Loop over all of the instructions, looking for malloc or free instructions + for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I) { + if (MallocInst *MI = dyn_cast(I)) { + const Type *AllocTy = MI->getType()->getElementType(); + + // malloc(type) becomes sbyte *malloc(size) + Value *MallocArg; + if (LowerMallocArgToInteger) + MallocArg = ConstantInt::get(Type::Int64Ty, + TD.getTypeAllocSize(AllocTy)); + else + MallocArg = ConstantExpr::getSizeOf(AllocTy); + MallocArg = ConstantExpr::getTruncOrBitCast(cast(MallocArg), + IntPtrTy); + + if (MI->isArrayAllocation()) { + if (isa(MallocArg) && + cast(MallocArg)->isOne()) { + MallocArg = MI->getOperand(0); // Operand * 1 = Operand + } else if (Constant *CO = dyn_cast(MI->getOperand(0))) { + CO = ConstantExpr::getIntegerCast(CO, IntPtrTy, false /*ZExt*/); + MallocArg = ConstantExpr::getMul(CO, cast(MallocArg)); + } else { + Value *Scale = MI->getOperand(0); + if (Scale->getType() != IntPtrTy) + Scale = CastInst::CreateIntegerCast(Scale, IntPtrTy, false /*ZExt*/, + "", I); + + // Multiply it by the array size if necessary... + MallocArg = BinaryOperator::Create(Instruction::Mul, Scale, + MallocArg, "", I); + } + } + + // Create the call to Malloc. + CallInst *MCall = CallInst::Create(MallocFunc, MallocArg, "", I); + MCall->setTailCall(); + + // Create a cast instruction to convert to the right type... + Value *MCast; + if (MCall->getType() != Type::VoidTy) + MCast = new BitCastInst(MCall, MI->getType(), "", I); + else + MCast = Constant::getNullValue(MI->getType()); + + // Replace all uses of the old malloc inst with the cast inst + MI->replaceAllUsesWith(MCast); + I = --BBIL.erase(I); // remove and delete the malloc instr... + Changed = true; + ++NumLowered; + } else if (FreeInst *FI = dyn_cast(I)) { + Value *PtrCast = + new BitCastInst(FI->getOperand(0), + PointerType::getUnqual(Type::Int8Ty), "", I); + + // Insert a call to the free function... + CallInst::Create(FreeFunc, PtrCast, "", I)->setTailCall(); + + // Delete the old free instruction + I = --BBIL.erase(I); + Changed = true; + ++NumLowered; + } + } + + return Changed; +} + diff --git a/lib/Transforms/Utils/LowerInvoke.cpp b/lib/Transforms/Utils/LowerInvoke.cpp new file mode 100644 index 000000000000..1f6b1a2a6846 --- /dev/null +++ b/lib/Transforms/Utils/LowerInvoke.cpp @@ -0,0 +1,614 @@ +//===- LowerInvoke.cpp - Eliminate Invoke & Unwind instructions -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This transformation is designed for use by code generators which do not yet +// support stack unwinding. This pass supports two models of exception handling +// lowering, the 'cheap' support and the 'expensive' support. +// +// 'Cheap' exception handling support gives the program the ability to execute +// any program which does not "throw an exception", by turning 'invoke' +// instructions into calls and by turning 'unwind' instructions into calls to +// abort(). If the program does dynamically use the unwind instruction, the +// program will print a message then abort. +// +// 'Expensive' exception handling support gives the full exception handling +// support to the program at the cost of making the 'invoke' instruction +// really expensive. It basically inserts setjmp/longjmp calls to emulate the +// exception handling as necessary. +// +// Because the 'expensive' support slows down programs a lot, and EH is only +// used for a subset of the programs, it must be specifically enabled by an +// option. +// +// Note that after this pass runs the CFG is not entirely accurate (exceptional +// control flow edges are not correct anymore) so only very simple things should +// be done after the lowerinvoke pass has run (like generation of native code). +// This should not be used as a general purpose "my LLVM-to-LLVM pass doesn't +// support the invoke instruction yet" lowering pass. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "lowerinvoke" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Instructions.h" +#include "llvm/Intrinsics.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Target/TargetLowering.h" +#include +#include +using namespace llvm; + +STATISTIC(NumInvokes, "Number of invokes replaced"); +STATISTIC(NumUnwinds, "Number of unwinds replaced"); +STATISTIC(NumSpilled, "Number of registers live across unwind edges"); + +static cl::opt ExpensiveEHSupport("enable-correct-eh-support", + cl::desc("Make the -lowerinvoke pass insert expensive, but correct, EH code")); + +namespace { + class VISIBILITY_HIDDEN LowerInvoke : public FunctionPass { + // Used for both models. + Constant *WriteFn; + Constant *AbortFn; + Value *AbortMessage; + unsigned AbortMessageLength; + + // Used for expensive EH support. + const Type *JBLinkTy; + GlobalVariable *JBListHead; + Constant *SetJmpFn, *LongJmpFn; + + // We peek in TLI to grab the target's jmp_buf size and alignment + const TargetLowering *TLI; + + public: + static char ID; // Pass identification, replacement for typeid + explicit LowerInvoke(const TargetLowering *tli = NULL) + : FunctionPass(&ID), TLI(tli) { } + bool doInitialization(Module &M); + bool runOnFunction(Function &F); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + // This is a cluster of orthogonal Transforms + AU.addPreservedID(PromoteMemoryToRegisterID); + AU.addPreservedID(LowerSwitchID); + AU.addPreservedID(LowerAllocationsID); + } + + private: + void createAbortMessage(Module *M); + void writeAbortMessage(Instruction *IB); + bool insertCheapEHSupport(Function &F); + void splitLiveRangesLiveAcrossInvokes(std::vector &Invokes); + void rewriteExpensiveInvoke(InvokeInst *II, unsigned InvokeNo, + AllocaInst *InvokeNum, SwitchInst *CatchSwitch); + bool insertExpensiveEHSupport(Function &F); + }; +} + +char LowerInvoke::ID = 0; +static RegisterPass +X("lowerinvoke", "Lower invoke and unwind, for unwindless code generators"); + +const PassInfo *const llvm::LowerInvokePassID = &X; + +// Public Interface To the LowerInvoke pass. +FunctionPass *llvm::createLowerInvokePass(const TargetLowering *TLI) { + return new LowerInvoke(TLI); +} + +// doInitialization - Make sure that there is a prototype for abort in the +// current module. +bool LowerInvoke::doInitialization(Module &M) { + const Type *VoidPtrTy = PointerType::getUnqual(Type::Int8Ty); + AbortMessage = 0; + if (ExpensiveEHSupport) { + // Insert a type for the linked list of jump buffers. + unsigned JBSize = TLI ? TLI->getJumpBufSize() : 0; + JBSize = JBSize ? JBSize : 200; + const Type *JmpBufTy = ArrayType::get(VoidPtrTy, JBSize); + + { // The type is recursive, so use a type holder. + std::vector Elements; + Elements.push_back(JmpBufTy); + OpaqueType *OT = OpaqueType::get(); + Elements.push_back(PointerType::getUnqual(OT)); + PATypeHolder JBLType(StructType::get(Elements)); + OT->refineAbstractTypeTo(JBLType.get()); // Complete the cycle. + JBLinkTy = JBLType.get(); + M.addTypeName("llvm.sjljeh.jmpbufty", JBLinkTy); + } + + const Type *PtrJBList = PointerType::getUnqual(JBLinkTy); + + // Now that we've done that, insert the jmpbuf list head global, unless it + // already exists. + if (!(JBListHead = M.getGlobalVariable("llvm.sjljeh.jblist", PtrJBList))) { + JBListHead = new GlobalVariable(PtrJBList, false, + GlobalValue::LinkOnceAnyLinkage, + Constant::getNullValue(PtrJBList), + "llvm.sjljeh.jblist", &M); + } + +// VisualStudio defines setjmp as _setjmp via #include / , +// so it looks like Intrinsic::_setjmp +#if defined(_MSC_VER) && defined(setjmp) +#define setjmp_undefined_for_visual_studio +#undef setjmp +#endif + + SetJmpFn = Intrinsic::getDeclaration(&M, Intrinsic::setjmp); + +#if defined(_MSC_VER) && defined(setjmp_undefined_for_visual_studio) +// let's return it to _setjmp state in case anyone ever needs it after this +// point under VisualStudio +#define setjmp _setjmp +#endif + + LongJmpFn = Intrinsic::getDeclaration(&M, Intrinsic::longjmp); + } + + // We need the 'write' and 'abort' functions for both models. + AbortFn = M.getOrInsertFunction("abort", Type::VoidTy, (Type *)0); +#if 0 // "write" is Unix-specific.. code is going away soon anyway. + WriteFn = M.getOrInsertFunction("write", Type::VoidTy, Type::Int32Ty, + VoidPtrTy, Type::Int32Ty, (Type *)0); +#else + WriteFn = 0; +#endif + return true; +} + +void LowerInvoke::createAbortMessage(Module *M) { + if (ExpensiveEHSupport) { + // The abort message for expensive EH support tells the user that the + // program 'unwound' without an 'invoke' instruction. + Constant *Msg = + ConstantArray::get("ERROR: Exception thrown, but not caught!\n"); + AbortMessageLength = Msg->getNumOperands()-1; // don't include \0 + + GlobalVariable *MsgGV = new GlobalVariable(Msg->getType(), true, + GlobalValue::InternalLinkage, + Msg, "abortmsg", M); + std::vector GEPIdx(2, Constant::getNullValue(Type::Int32Ty)); + AbortMessage = ConstantExpr::getGetElementPtr(MsgGV, &GEPIdx[0], 2); + } else { + // The abort message for cheap EH support tells the user that EH is not + // enabled. + Constant *Msg = + ConstantArray::get("Exception handler needed, but not enabled. Recompile" + " program with -enable-correct-eh-support.\n"); + AbortMessageLength = Msg->getNumOperands()-1; // don't include \0 + + GlobalVariable *MsgGV = new GlobalVariable(Msg->getType(), true, + GlobalValue::InternalLinkage, + Msg, "abortmsg", M); + std::vector GEPIdx(2, Constant::getNullValue(Type::Int32Ty)); + AbortMessage = ConstantExpr::getGetElementPtr(MsgGV, &GEPIdx[0], 2); + } +} + + +void LowerInvoke::writeAbortMessage(Instruction *IB) { +#if 0 + if (AbortMessage == 0) + createAbortMessage(IB->getParent()->getParent()->getParent()); + + // These are the arguments we WANT... + Value* Args[3]; + Args[0] = ConstantInt::get(Type::Int32Ty, 2); + Args[1] = AbortMessage; + Args[2] = ConstantInt::get(Type::Int32Ty, AbortMessageLength); + (new CallInst(WriteFn, Args, 3, "", IB))->setTailCall(); +#endif +} + +bool LowerInvoke::insertCheapEHSupport(Function &F) { + bool Changed = false; + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) + if (InvokeInst *II = dyn_cast(BB->getTerminator())) { + std::vector CallArgs(II->op_begin()+3, II->op_end()); + // Insert a normal call instruction... + CallInst *NewCall = CallInst::Create(II->getCalledValue(), + CallArgs.begin(), CallArgs.end(), "",II); + NewCall->takeName(II); + NewCall->setCallingConv(II->getCallingConv()); + NewCall->setAttributes(II->getAttributes()); + II->replaceAllUsesWith(NewCall); + + // Insert an unconditional branch to the normal destination. + BranchInst::Create(II->getNormalDest(), II); + + // Remove any PHI node entries from the exception destination. + II->getUnwindDest()->removePredecessor(BB); + + // Remove the invoke instruction now. + BB->getInstList().erase(II); + + ++NumInvokes; Changed = true; + } else if (UnwindInst *UI = dyn_cast(BB->getTerminator())) { + // Insert a new call to write(2, AbortMessage, AbortMessageLength); + writeAbortMessage(UI); + + // Insert a call to abort() + CallInst::Create(AbortFn, "", UI)->setTailCall(); + + // Insert a return instruction. This really should be a "barrier", as it + // is unreachable. + ReturnInst::Create(F.getReturnType() == Type::VoidTy ? 0 : + Constant::getNullValue(F.getReturnType()), UI); + + // Remove the unwind instruction now. + BB->getInstList().erase(UI); + + ++NumUnwinds; Changed = true; + } + return Changed; +} + +/// rewriteExpensiveInvoke - Insert code and hack the function to replace the +/// specified invoke instruction with a call. +void LowerInvoke::rewriteExpensiveInvoke(InvokeInst *II, unsigned InvokeNo, + AllocaInst *InvokeNum, + SwitchInst *CatchSwitch) { + ConstantInt *InvokeNoC = ConstantInt::get(Type::Int32Ty, InvokeNo); + + // If the unwind edge has phi nodes, split the edge. + if (isa(II->getUnwindDest()->begin())) { + SplitCriticalEdge(II, 1, this); + + // If there are any phi nodes left, they must have a single predecessor. + while (PHINode *PN = dyn_cast(II->getUnwindDest()->begin())) { + PN->replaceAllUsesWith(PN->getIncomingValue(0)); + PN->eraseFromParent(); + } + } + + // Insert a store of the invoke num before the invoke and store zero into the + // location afterward. + new StoreInst(InvokeNoC, InvokeNum, true, II); // volatile + + BasicBlock::iterator NI = II->getNormalDest()->getFirstNonPHI(); + // nonvolatile. + new StoreInst(Constant::getNullValue(Type::Int32Ty), InvokeNum, false, NI); + + // Add a switch case to our unwind block. + CatchSwitch->addCase(InvokeNoC, II->getUnwindDest()); + + // Insert a normal call instruction. + std::vector CallArgs(II->op_begin()+3, II->op_end()); + CallInst *NewCall = CallInst::Create(II->getCalledValue(), + CallArgs.begin(), CallArgs.end(), "", + II); + NewCall->takeName(II); + NewCall->setCallingConv(II->getCallingConv()); + NewCall->setAttributes(II->getAttributes()); + II->replaceAllUsesWith(NewCall); + + // Replace the invoke with an uncond branch. + BranchInst::Create(II->getNormalDest(), NewCall->getParent()); + II->eraseFromParent(); +} + +/// MarkBlocksLiveIn - Insert BB and all of its predescessors into LiveBBs until +/// we reach blocks we've already seen. +static void MarkBlocksLiveIn(BasicBlock *BB, std::set &LiveBBs) { + if (!LiveBBs.insert(BB).second) return; // already been here. + + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) + MarkBlocksLiveIn(*PI, LiveBBs); +} + +// First thing we need to do is scan the whole function for values that are +// live across unwind edges. Each value that is live across an unwind edge +// we spill into a stack location, guaranteeing that there is nothing live +// across the unwind edge. This process also splits all critical edges +// coming out of invoke's. +void LowerInvoke:: +splitLiveRangesLiveAcrossInvokes(std::vector &Invokes) { + // First step, split all critical edges from invoke instructions. + for (unsigned i = 0, e = Invokes.size(); i != e; ++i) { + InvokeInst *II = Invokes[i]; + SplitCriticalEdge(II, 0, this); + SplitCriticalEdge(II, 1, this); + assert(!isa(II->getNormalDest()) && + !isa(II->getUnwindDest()) && + "critical edge splitting left single entry phi nodes?"); + } + + Function *F = Invokes.back()->getParent()->getParent(); + + // To avoid having to handle incoming arguments specially, we lower each arg + // to a copy instruction in the entry block. This ensures that the argument + // value itself cannot be live across the entry block. + BasicBlock::iterator AfterAllocaInsertPt = F->begin()->begin(); + while (isa(AfterAllocaInsertPt) && + isa(cast(AfterAllocaInsertPt)->getArraySize())) + ++AfterAllocaInsertPt; + for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); + AI != E; ++AI) { + // This is always a no-op cast because we're casting AI to AI->getType() so + // src and destination types are identical. BitCast is the only possibility. + CastInst *NC = new BitCastInst( + AI, AI->getType(), AI->getName()+".tmp", AfterAllocaInsertPt); + AI->replaceAllUsesWith(NC); + // Normally its is forbidden to replace a CastInst's operand because it + // could cause the opcode to reflect an illegal conversion. However, we're + // replacing it here with the same value it was constructed with to simply + // make NC its user. + NC->setOperand(0, AI); + } + + // Finally, scan the code looking for instructions with bad live ranges. + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) + for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) { + // Ignore obvious cases we don't have to handle. In particular, most + // instructions either have no uses or only have a single use inside the + // current block. Ignore them quickly. + Instruction *Inst = II; + if (Inst->use_empty()) continue; + if (Inst->hasOneUse() && + cast(Inst->use_back())->getParent() == BB && + !isa(Inst->use_back())) continue; + + // If this is an alloca in the entry block, it's not a real register + // value. + if (AllocaInst *AI = dyn_cast(Inst)) + if (isa(AI->getArraySize()) && BB == F->begin()) + continue; + + // Avoid iterator invalidation by copying users to a temporary vector. + std::vector Users; + for (Value::use_iterator UI = Inst->use_begin(), E = Inst->use_end(); + UI != E; ++UI) { + Instruction *User = cast(*UI); + if (User->getParent() != BB || isa(User)) + Users.push_back(User); + } + + // Scan all of the uses and see if the live range is live across an unwind + // edge. If we find a use live across an invoke edge, create an alloca + // and spill the value. + std::set InvokesWithStoreInserted; + + // Find all of the blocks that this value is live in. + std::set LiveBBs; + LiveBBs.insert(Inst->getParent()); + while (!Users.empty()) { + Instruction *U = Users.back(); + Users.pop_back(); + + if (!isa(U)) { + MarkBlocksLiveIn(U->getParent(), LiveBBs); + } else { + // Uses for a PHI node occur in their predecessor block. + PHINode *PN = cast(U); + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (PN->getIncomingValue(i) == Inst) + MarkBlocksLiveIn(PN->getIncomingBlock(i), LiveBBs); + } + } + + // Now that we know all of the blocks that this thing is live in, see if + // it includes any of the unwind locations. + bool NeedsSpill = false; + for (unsigned i = 0, e = Invokes.size(); i != e; ++i) { + BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest(); + if (UnwindBlock != BB && LiveBBs.count(UnwindBlock)) { + NeedsSpill = true; + } + } + + // If we decided we need a spill, do it. + if (NeedsSpill) { + ++NumSpilled; + DemoteRegToStack(*Inst, true); + } + } +} + +bool LowerInvoke::insertExpensiveEHSupport(Function &F) { + std::vector Returns; + std::vector Unwinds; + std::vector Invokes; + + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) + if (ReturnInst *RI = dyn_cast(BB->getTerminator())) { + // Remember all return instructions in case we insert an invoke into this + // function. + Returns.push_back(RI); + } else if (InvokeInst *II = dyn_cast(BB->getTerminator())) { + Invokes.push_back(II); + } else if (UnwindInst *UI = dyn_cast(BB->getTerminator())) { + Unwinds.push_back(UI); + } + + if (Unwinds.empty() && Invokes.empty()) return false; + + NumInvokes += Invokes.size(); + NumUnwinds += Unwinds.size(); + + // TODO: This is not an optimal way to do this. In particular, this always + // inserts setjmp calls into the entries of functions with invoke instructions + // even though there are possibly paths through the function that do not + // execute any invokes. In particular, for functions with early exits, e.g. + // the 'addMove' method in hexxagon, it would be nice to not have to do the + // setjmp stuff on the early exit path. This requires a bit of dataflow, but + // would not be too hard to do. + + // If we have an invoke instruction, insert a setjmp that dominates all + // invokes. After the setjmp, use a cond branch that goes to the original + // code path on zero, and to a designated 'catch' block of nonzero. + Value *OldJmpBufPtr = 0; + if (!Invokes.empty()) { + // First thing we need to do is scan the whole function for values that are + // live across unwind edges. Each value that is live across an unwind edge + // we spill into a stack location, guaranteeing that there is nothing live + // across the unwind edge. This process also splits all critical edges + // coming out of invoke's. + splitLiveRangesLiveAcrossInvokes(Invokes); + + BasicBlock *EntryBB = F.begin(); + + // Create an alloca for the incoming jump buffer ptr and the new jump buffer + // that needs to be restored on all exits from the function. This is an + // alloca because the value needs to be live across invokes. + unsigned Align = TLI ? TLI->getJumpBufAlignment() : 0; + AllocaInst *JmpBuf = + new AllocaInst(JBLinkTy, 0, Align, "jblink", F.begin()->begin()); + + std::vector Idx; + Idx.push_back(Constant::getNullValue(Type::Int32Ty)); + Idx.push_back(ConstantInt::get(Type::Int32Ty, 1)); + OldJmpBufPtr = GetElementPtrInst::Create(JmpBuf, Idx.begin(), Idx.end(), + "OldBuf", EntryBB->getTerminator()); + + // Copy the JBListHead to the alloca. + Value *OldBuf = new LoadInst(JBListHead, "oldjmpbufptr", true, + EntryBB->getTerminator()); + new StoreInst(OldBuf, OldJmpBufPtr, true, EntryBB->getTerminator()); + + // Add the new jumpbuf to the list. + new StoreInst(JmpBuf, JBListHead, true, EntryBB->getTerminator()); + + // Create the catch block. The catch block is basically a big switch + // statement that goes to all of the invoke catch blocks. + BasicBlock *CatchBB = BasicBlock::Create("setjmp.catch", &F); + + // Create an alloca which keeps track of which invoke is currently + // executing. For normal calls it contains zero. + AllocaInst *InvokeNum = new AllocaInst(Type::Int32Ty, 0, "invokenum", + EntryBB->begin()); + new StoreInst(ConstantInt::get(Type::Int32Ty, 0), InvokeNum, true, + EntryBB->getTerminator()); + + // Insert a load in the Catch block, and a switch on its value. By default, + // we go to a block that just does an unwind (which is the correct action + // for a standard call). + BasicBlock *UnwindBB = BasicBlock::Create("unwindbb", &F); + Unwinds.push_back(new UnwindInst(UnwindBB)); + + Value *CatchLoad = new LoadInst(InvokeNum, "invoke.num", true, CatchBB); + SwitchInst *CatchSwitch = + SwitchInst::Create(CatchLoad, UnwindBB, Invokes.size(), CatchBB); + + // Now that things are set up, insert the setjmp call itself. + + // Split the entry block to insert the conditional branch for the setjmp. + BasicBlock *ContBlock = EntryBB->splitBasicBlock(EntryBB->getTerminator(), + "setjmp.cont"); + + Idx[1] = ConstantInt::get(Type::Int32Ty, 0); + Value *JmpBufPtr = GetElementPtrInst::Create(JmpBuf, Idx.begin(), Idx.end(), + "TheJmpBuf", + EntryBB->getTerminator()); + JmpBufPtr = new BitCastInst(JmpBufPtr, PointerType::getUnqual(Type::Int8Ty), + "tmp", EntryBB->getTerminator()); + Value *SJRet = CallInst::Create(SetJmpFn, JmpBufPtr, "sjret", + EntryBB->getTerminator()); + + // Compare the return value to zero. + Value *IsNormal = new ICmpInst(ICmpInst::ICMP_EQ, SJRet, + Constant::getNullValue(SJRet->getType()), + "notunwind", EntryBB->getTerminator()); + // Nuke the uncond branch. + EntryBB->getTerminator()->eraseFromParent(); + + // Put in a new condbranch in its place. + BranchInst::Create(ContBlock, CatchBB, IsNormal, EntryBB); + + // At this point, we are all set up, rewrite each invoke instruction. + for (unsigned i = 0, e = Invokes.size(); i != e; ++i) + rewriteExpensiveInvoke(Invokes[i], i+1, InvokeNum, CatchSwitch); + } + + // We know that there is at least one unwind. + + // Create three new blocks, the block to load the jmpbuf ptr and compare + // against null, the block to do the longjmp, and the error block for if it + // is null. Add them at the end of the function because they are not hot. + BasicBlock *UnwindHandler = BasicBlock::Create("dounwind", &F); + BasicBlock *UnwindBlock = BasicBlock::Create("unwind", &F); + BasicBlock *TermBlock = BasicBlock::Create("unwinderror", &F); + + // If this function contains an invoke, restore the old jumpbuf ptr. + Value *BufPtr; + if (OldJmpBufPtr) { + // Before the return, insert a copy from the saved value to the new value. + BufPtr = new LoadInst(OldJmpBufPtr, "oldjmpbufptr", UnwindHandler); + new StoreInst(BufPtr, JBListHead, UnwindHandler); + } else { + BufPtr = new LoadInst(JBListHead, "ehlist", UnwindHandler); + } + + // Load the JBList, if it's null, then there was no catch! + Value *NotNull = new ICmpInst(ICmpInst::ICMP_NE, BufPtr, + Constant::getNullValue(BufPtr->getType()), + "notnull", UnwindHandler); + BranchInst::Create(UnwindBlock, TermBlock, NotNull, UnwindHandler); + + // Create the block to do the longjmp. + // Get a pointer to the jmpbuf and longjmp. + std::vector Idx; + Idx.push_back(Constant::getNullValue(Type::Int32Ty)); + Idx.push_back(ConstantInt::get(Type::Int32Ty, 0)); + Idx[0] = GetElementPtrInst::Create(BufPtr, Idx.begin(), Idx.end(), "JmpBuf", + UnwindBlock); + Idx[0] = new BitCastInst(Idx[0], PointerType::getUnqual(Type::Int8Ty), + "tmp", UnwindBlock); + Idx[1] = ConstantInt::get(Type::Int32Ty, 1); + CallInst::Create(LongJmpFn, Idx.begin(), Idx.end(), "", UnwindBlock); + new UnreachableInst(UnwindBlock); + + // Set up the term block ("throw without a catch"). + new UnreachableInst(TermBlock); + + // Insert a new call to write(2, AbortMessage, AbortMessageLength); + writeAbortMessage(TermBlock->getTerminator()); + + // Insert a call to abort() + CallInst::Create(AbortFn, "", + TermBlock->getTerminator())->setTailCall(); + + + // Replace all unwinds with a branch to the unwind handler. + for (unsigned i = 0, e = Unwinds.size(); i != e; ++i) { + BranchInst::Create(UnwindHandler, Unwinds[i]); + Unwinds[i]->eraseFromParent(); + } + + // Finally, for any returns from this function, if this function contains an + // invoke, restore the old jmpbuf pointer to its input value. + if (OldJmpBufPtr) { + for (unsigned i = 0, e = Returns.size(); i != e; ++i) { + ReturnInst *R = Returns[i]; + + // Before the return, insert a copy from the saved value to the new value. + Value *OldBuf = new LoadInst(OldJmpBufPtr, "oldjmpbufptr", true, R); + new StoreInst(OldBuf, JBListHead, true, R); + } + } + + return true; +} + +bool LowerInvoke::runOnFunction(Function &F) { + if (ExpensiveEHSupport) + return insertExpensiveEHSupport(F); + else + return insertCheapEHSupport(F); +} diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp new file mode 100644 index 000000000000..1da59360fc2b --- /dev/null +++ b/lib/Transforms/Utils/LowerSwitch.cpp @@ -0,0 +1,323 @@ +//===- LowerSwitch.cpp - Eliminate Switch instructions --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The LowerSwitch transformation rewrites switch instructions with a sequence +// of branches, which allows targets to get away with not implementing the +// switch instruction until it is convenient. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h" +#include "llvm/Constants.h" +#include "llvm/Function.h" +#include "llvm/Instructions.h" +#include "llvm/Pass.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/raw_ostream.h" +#include +using namespace llvm; + +namespace { + /// LowerSwitch Pass - Replace all SwitchInst instructions with chained branch + /// instructions. Note that this cannot be a BasicBlock pass because it + /// modifies the CFG! + class VISIBILITY_HIDDEN LowerSwitch : public FunctionPass { + public: + static char ID; // Pass identification, replacement for typeid + LowerSwitch() : FunctionPass(&ID) {} + + virtual bool runOnFunction(Function &F); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + // This is a cluster of orthogonal Transforms + AU.addPreserved(); + AU.addPreservedID(PromoteMemoryToRegisterID); + AU.addPreservedID(LowerInvokePassID); + AU.addPreservedID(LowerAllocationsID); + } + + struct CaseRange { + Constant* Low; + Constant* High; + BasicBlock* BB; + + CaseRange() : Low(0), High(0), BB(0) { } + CaseRange(Constant* low, Constant* high, BasicBlock* bb) : + Low(low), High(high), BB(bb) { } + }; + + typedef std::vector CaseVector; + typedef std::vector::iterator CaseItr; + private: + void processSwitchInst(SwitchInst *SI); + + BasicBlock* switchConvert(CaseItr Begin, CaseItr End, Value* Val, + BasicBlock* OrigBlock, BasicBlock* Default); + BasicBlock* newLeafBlock(CaseRange& Leaf, Value* Val, + BasicBlock* OrigBlock, BasicBlock* Default); + unsigned Clusterify(CaseVector& Cases, SwitchInst *SI); + }; + + /// The comparison function for sorting the switch case values in the vector. + /// WARNING: Case ranges should be disjoint! + struct CaseCmp { + bool operator () (const LowerSwitch::CaseRange& C1, + const LowerSwitch::CaseRange& C2) { + + const ConstantInt* CI1 = cast(C1.Low); + const ConstantInt* CI2 = cast(C2.High); + return CI1->getValue().slt(CI2->getValue()); + } + }; +} + +char LowerSwitch::ID = 0; +static RegisterPass +X("lowerswitch", "Lower SwitchInst's to branches"); + +// Publically exposed interface to pass... +const PassInfo *const llvm::LowerSwitchID = &X; +// createLowerSwitchPass - Interface to this file... +FunctionPass *llvm::createLowerSwitchPass() { + return new LowerSwitch(); +} + +bool LowerSwitch::runOnFunction(Function &F) { + bool Changed = false; + + for (Function::iterator I = F.begin(), E = F.end(); I != E; ) { + BasicBlock *Cur = I++; // Advance over block so we don't traverse new blocks + + if (SwitchInst *SI = dyn_cast(Cur->getTerminator())) { + Changed = true; + processSwitchInst(SI); + } + } + + return Changed; +} + +// operator<< - Used for debugging purposes. +// +static std::ostream& operator<<(std::ostream &O, + const LowerSwitch::CaseVector &C) { + O << "["; + + for (LowerSwitch::CaseVector::const_iterator B = C.begin(), + E = C.end(); B != E; ) { + O << *B->Low << " -" << *B->High; + if (++B != E) O << ", "; + } + + return O << "]"; +} + +static OStream& operator<<(OStream &O, const LowerSwitch::CaseVector &C) { + if (O.stream()) *O.stream() << C; + return O; +} + +// switchConvert - Convert the switch statement into a binary lookup of +// the case values. The function recursively builds this tree. +// +BasicBlock* LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, + Value* Val, BasicBlock* OrigBlock, + BasicBlock* Default) +{ + unsigned Size = End - Begin; + + if (Size == 1) + return newLeafBlock(*Begin, Val, OrigBlock, Default); + + unsigned Mid = Size / 2; + std::vector LHS(Begin, Begin + Mid); + DOUT << "LHS: " << LHS << "\n"; + std::vector RHS(Begin + Mid, End); + DOUT << "RHS: " << RHS << "\n"; + + CaseRange& Pivot = *(Begin + Mid); + DEBUG(errs() << "Pivot ==> " + << cast(Pivot.Low)->getValue() << " -" + << cast(Pivot.High)->getValue() << "\n"); + + BasicBlock* LBranch = switchConvert(LHS.begin(), LHS.end(), Val, + OrigBlock, Default); + BasicBlock* RBranch = switchConvert(RHS.begin(), RHS.end(), Val, + OrigBlock, Default); + + // Create a new node that checks if the value is < pivot. Go to the + // left branch if it is and right branch if not. + Function* F = OrigBlock->getParent(); + BasicBlock* NewNode = BasicBlock::Create("NodeBlock"); + Function::iterator FI = OrigBlock; + F->getBasicBlockList().insert(++FI, NewNode); + + ICmpInst* Comp = new ICmpInst(ICmpInst::ICMP_SLT, Val, Pivot.Low, "Pivot"); + NewNode->getInstList().push_back(Comp); + BranchInst::Create(LBranch, RBranch, Comp, NewNode); + return NewNode; +} + +// newLeafBlock - Create a new leaf block for the binary lookup tree. It +// checks if the switch's value == the case's value. If not, then it +// jumps to the default branch. At this point in the tree, the value +// can't be another valid case value, so the jump to the "default" branch +// is warranted. +// +BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val, + BasicBlock* OrigBlock, + BasicBlock* Default) +{ + Function* F = OrigBlock->getParent(); + BasicBlock* NewLeaf = BasicBlock::Create("LeafBlock"); + Function::iterator FI = OrigBlock; + F->getBasicBlockList().insert(++FI, NewLeaf); + + // Emit comparison + ICmpInst* Comp = NULL; + if (Leaf.Low == Leaf.High) { + // Make the seteq instruction... + Comp = new ICmpInst(ICmpInst::ICMP_EQ, Val, Leaf.Low, + "SwitchLeaf", NewLeaf); + } else { + // Make range comparison + if (cast(Leaf.Low)->isMinValue(true /*isSigned*/)) { + // Val >= Min && Val <= Hi --> Val <= Hi + Comp = new ICmpInst(ICmpInst::ICMP_SLE, Val, Leaf.High, + "SwitchLeaf", NewLeaf); + } else if (cast(Leaf.Low)->isZero()) { + // Val >= 0 && Val <= Hi --> Val <=u Hi + Comp = new ICmpInst(ICmpInst::ICMP_ULE, Val, Leaf.High, + "SwitchLeaf", NewLeaf); + } else { + // Emit V-Lo <=u Hi-Lo + Constant* NegLo = ConstantExpr::getNeg(Leaf.Low); + Instruction* Add = BinaryOperator::CreateAdd(Val, NegLo, + Val->getName()+".off", + NewLeaf); + Constant *UpperBound = ConstantExpr::getAdd(NegLo, Leaf.High); + Comp = new ICmpInst(ICmpInst::ICMP_ULE, Add, UpperBound, + "SwitchLeaf", NewLeaf); + } + } + + // Make the conditional branch... + BasicBlock* Succ = Leaf.BB; + BranchInst::Create(Succ, Default, Comp, NewLeaf); + + // If there were any PHI nodes in this successor, rewrite one entry + // from OrigBlock to come from NewLeaf. + for (BasicBlock::iterator I = Succ->begin(); isa(I); ++I) { + PHINode* PN = cast(I); + // Remove all but one incoming entries from the cluster + uint64_t Range = cast(Leaf.High)->getSExtValue() - + cast(Leaf.Low)->getSExtValue(); + for (uint64_t j = 0; j < Range; ++j) { + PN->removeIncomingValue(OrigBlock); + } + + int BlockIdx = PN->getBasicBlockIndex(OrigBlock); + assert(BlockIdx != -1 && "Switch didn't go to this successor??"); + PN->setIncomingBlock((unsigned)BlockIdx, NewLeaf); + } + + return NewLeaf; +} + +// Clusterify - Transform simple list of Cases into list of CaseRange's +unsigned LowerSwitch::Clusterify(CaseVector& Cases, SwitchInst *SI) { + unsigned numCmps = 0; + + // Start with "simple" cases + for (unsigned i = 1; i < SI->getNumSuccessors(); ++i) + Cases.push_back(CaseRange(SI->getSuccessorValue(i), + SI->getSuccessorValue(i), + SI->getSuccessor(i))); + std::sort(Cases.begin(), Cases.end(), CaseCmp()); + + // Merge case into clusters + if (Cases.size()>=2) + for (CaseItr I=Cases.begin(), J=next(Cases.begin()); J!=Cases.end(); ) { + int64_t nextValue = cast(J->Low)->getSExtValue(); + int64_t currentValue = cast(I->High)->getSExtValue(); + BasicBlock* nextBB = J->BB; + BasicBlock* currentBB = I->BB; + + // If the two neighboring cases go to the same destination, merge them + // into a single case. + if ((nextValue-currentValue==1) && (currentBB == nextBB)) { + I->High = J->High; + J = Cases.erase(J); + } else { + I = J++; + } + } + + for (CaseItr I=Cases.begin(), E=Cases.end(); I!=E; ++I, ++numCmps) { + if (I->Low != I->High) + // A range counts double, since it requires two compares. + ++numCmps; + } + + return numCmps; +} + +// processSwitchInst - Replace the specified switch instruction with a sequence +// of chained if-then insts in a balanced binary search. +// +void LowerSwitch::processSwitchInst(SwitchInst *SI) { + BasicBlock *CurBlock = SI->getParent(); + BasicBlock *OrigBlock = CurBlock; + Function *F = CurBlock->getParent(); + Value *Val = SI->getOperand(0); // The value we are switching on... + BasicBlock* Default = SI->getDefaultDest(); + + // If there is only the default destination, don't bother with the code below. + if (SI->getNumOperands() == 2) { + BranchInst::Create(SI->getDefaultDest(), CurBlock); + CurBlock->getInstList().erase(SI); + return; + } + + // Create a new, empty default block so that the new hierarchy of + // if-then statements go to this and the PHI nodes are happy. + BasicBlock* NewDefault = BasicBlock::Create("NewDefault"); + F->getBasicBlockList().insert(Default, NewDefault); + + BranchInst::Create(Default, NewDefault); + + // If there is an entry in any PHI nodes for the default edge, make sure + // to update them as well. + for (BasicBlock::iterator I = Default->begin(); isa(I); ++I) { + PHINode *PN = cast(I); + int BlockIdx = PN->getBasicBlockIndex(OrigBlock); + assert(BlockIdx != -1 && "Switch didn't go to this successor??"); + PN->setIncomingBlock((unsigned)BlockIdx, NewDefault); + } + + // Prepare cases vector. + CaseVector Cases; + unsigned numCmps = Clusterify(Cases, SI); + + DOUT << "Clusterify finished. Total clusters: " << Cases.size() + << ". Total compares: " << numCmps << "\n"; + DOUT << "Cases: " << Cases << "\n"; + + BasicBlock* SwitchBlock = switchConvert(Cases.begin(), Cases.end(), Val, + OrigBlock, NewDefault); + + // Branch to our shiny new if-then stuff... + BranchInst::Create(SwitchBlock, OrigBlock); + + // We are now done with the switch instruction, delete it. + CurBlock->getInstList().erase(SI); +} diff --git a/lib/Transforms/Utils/Makefile b/lib/Transforms/Utils/Makefile new file mode 100644 index 000000000000..d1e9336d67f0 --- /dev/null +++ b/lib/Transforms/Utils/Makefile @@ -0,0 +1,15 @@ +##===- lib/Transforms/Utils/Makefile -----------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../.. +LIBRARYNAME = LLVMTransformUtils +BUILD_ARCHIVE = 1 + +include $(LEVEL)/Makefile.common + diff --git a/lib/Transforms/Utils/Mem2Reg.cpp b/lib/Transforms/Utils/Mem2Reg.cpp new file mode 100644 index 000000000000..2b06d778e145 --- /dev/null +++ b/lib/Transforms/Utils/Mem2Reg.cpp @@ -0,0 +1,92 @@ +//===- Mem2Reg.cpp - The -mem2reg pass, a wrapper around the Utils lib ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass is a simple pass wrapper around the PromoteMemToReg function call +// exposed by the Utils library. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "mem2reg" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/PromoteMemToReg.h" +#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Instructions.h" +#include "llvm/Function.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/Compiler.h" +using namespace llvm; + +STATISTIC(NumPromoted, "Number of alloca's promoted"); + +namespace { + struct VISIBILITY_HIDDEN PromotePass : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + PromotePass() : FunctionPass(&ID) {} + + // runOnFunction - To run this pass, first we calculate the alloca + // instructions that are safe for promotion, then we promote each one. + // + virtual bool runOnFunction(Function &F); + + // getAnalysisUsage - We need dominance frontiers + // + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + AU.addRequired(); + AU.setPreservesCFG(); + // This is a cluster of orthogonal Transforms + AU.addPreserved(); + AU.addPreservedID(LowerSwitchID); + AU.addPreservedID(LowerInvokePassID); + AU.addPreservedID(LowerAllocationsID); + } + }; +} // end of anonymous namespace + +char PromotePass::ID = 0; +static RegisterPass X("mem2reg", "Promote Memory to Register"); + +bool PromotePass::runOnFunction(Function &F) { + std::vector Allocas; + + BasicBlock &BB = F.getEntryBlock(); // Get the entry node for the function + + bool Changed = false; + + DominatorTree &DT = getAnalysis(); + DominanceFrontier &DF = getAnalysis(); + + while (1) { + Allocas.clear(); + + // Find allocas that are safe to promote, by looking at all instructions in + // the entry node + for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I) + if (AllocaInst *AI = dyn_cast(I)) // Is it an alloca? + if (isAllocaPromotable(AI)) + Allocas.push_back(AI); + + if (Allocas.empty()) break; + + PromoteMemToReg(Allocas, DT, DF); + NumPromoted += Allocas.size(); + Changed = true; + } + + return Changed; +} + +// Publically exposed interface to pass... +const PassInfo *const llvm::PromoteMemoryToRegisterID = &X; +// createPromoteMemoryToRegister - Provide an entry point to create this pass. +// +FunctionPass *llvm::createPromoteMemoryToRegisterPass() { + return new PromotePass(); +} diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp new file mode 100644 index 000000000000..b717699b7e05 --- /dev/null +++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -0,0 +1,1003 @@ +//===- PromoteMemoryToRegister.cpp - Convert allocas to registers ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file promotes memory references to be register references. It promotes +// alloca instructions which only have loads and stores as uses. An alloca is +// transformed by using dominator frontiers to place PHI nodes, then traversing +// the function in depth-first order to rewrite loads and stores as appropriate. +// This is just the standard SSA construction algorithm to construct "pruned" +// SSA form. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "mem2reg" +#include "llvm/Transforms/Utils/PromoteMemToReg.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/AliasSetTracker.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Compiler.h" +#include +using namespace llvm; + +STATISTIC(NumLocalPromoted, "Number of alloca's promoted within one block"); +STATISTIC(NumSingleStore, "Number of alloca's promoted with a single store"); +STATISTIC(NumDeadAlloca, "Number of dead alloca's removed"); +STATISTIC(NumPHIInsert, "Number of PHI nodes inserted"); + +// Provide DenseMapInfo for all pointers. +namespace llvm { +template<> +struct DenseMapInfo > { + typedef std::pair EltTy; + static inline EltTy getEmptyKey() { + return EltTy(reinterpret_cast(-1), ~0U); + } + static inline EltTy getTombstoneKey() { + return EltTy(reinterpret_cast(-2), 0U); + } + static unsigned getHashValue(const std::pair &Val) { + return DenseMapInfo::getHashValue(Val.first) + Val.second*2; + } + static bool isEqual(const EltTy &LHS, const EltTy &RHS) { + return LHS == RHS; + } + static bool isPod() { return true; } +}; +} + +/// isAllocaPromotable - Return true if this alloca is legal for promotion. +/// This is true if there are only loads and stores to the alloca. +/// +bool llvm::isAllocaPromotable(const AllocaInst *AI) { + // FIXME: If the memory unit is of pointer or integer type, we can permit + // assignments to subsections of the memory unit. + + // Only allow direct and non-volatile loads and stores... + for (Value::use_const_iterator UI = AI->use_begin(), UE = AI->use_end(); + UI != UE; ++UI) // Loop over all of the uses of the alloca + if (const LoadInst *LI = dyn_cast(*UI)) { + if (LI->isVolatile()) + return false; + } else if (const StoreInst *SI = dyn_cast(*UI)) { + if (SI->getOperand(0) == AI) + return false; // Don't allow a store OF the AI, only INTO the AI. + if (SI->isVolatile()) + return false; + } else if (const BitCastInst *BC = dyn_cast(*UI)) { + // A bitcast that does not feed into debug info inhibits promotion. + if (!BC->hasOneUse() || !isa(*BC->use_begin())) + return false; + // If the only use is by debug info, this alloca will not exist in + // non-debug code, so don't try to promote; this ensures the same + // codegen with debug info. Otherwise, debug info should not + // inhibit promotion (but we must examine other uses). + if (AI->hasOneUse()) + return false; + } else { + return false; + } + + return true; +} + +namespace { + struct AllocaInfo; + + // Data package used by RenamePass() + class VISIBILITY_HIDDEN RenamePassData { + public: + typedef std::vector ValVector; + + RenamePassData() {} + RenamePassData(BasicBlock *B, BasicBlock *P, + const ValVector &V) : BB(B), Pred(P), Values(V) {} + BasicBlock *BB; + BasicBlock *Pred; + ValVector Values; + + void swap(RenamePassData &RHS) { + std::swap(BB, RHS.BB); + std::swap(Pred, RHS.Pred); + Values.swap(RHS.Values); + } + }; + + /// LargeBlockInfo - This assigns and keeps a per-bb relative ordering of + /// load/store instructions in the block that directly load or store an alloca. + /// + /// This functionality is important because it avoids scanning large basic + /// blocks multiple times when promoting many allocas in the same block. + class VISIBILITY_HIDDEN LargeBlockInfo { + /// InstNumbers - For each instruction that we track, keep the index of the + /// instruction. The index starts out as the number of the instruction from + /// the start of the block. + DenseMap InstNumbers; + public: + + /// isInterestingInstruction - This code only looks at accesses to allocas. + static bool isInterestingInstruction(const Instruction *I) { + return (isa(I) && isa(I->getOperand(0))) || + (isa(I) && isa(I->getOperand(1))); + } + + /// getInstructionIndex - Get or calculate the index of the specified + /// instruction. + unsigned getInstructionIndex(const Instruction *I) { + assert(isInterestingInstruction(I) && + "Not a load/store to/from an alloca?"); + + // If we already have this instruction number, return it. + DenseMap::iterator It = InstNumbers.find(I); + if (It != InstNumbers.end()) return It->second; + + // Scan the whole block to get the instruction. This accumulates + // information for every interesting instruction in the block, in order to + // avoid gratuitus rescans. + const BasicBlock *BB = I->getParent(); + unsigned InstNo = 0; + for (BasicBlock::const_iterator BBI = BB->begin(), E = BB->end(); + BBI != E; ++BBI) + if (isInterestingInstruction(BBI)) + InstNumbers[BBI] = InstNo++; + It = InstNumbers.find(I); + + assert(It != InstNumbers.end() && "Didn't insert instruction?"); + return It->second; + } + + void deleteValue(const Instruction *I) { + InstNumbers.erase(I); + } + + void clear() { + InstNumbers.clear(); + } + }; + + struct VISIBILITY_HIDDEN PromoteMem2Reg { + /// Allocas - The alloca instructions being promoted. + /// + std::vector Allocas; + DominatorTree &DT; + DominanceFrontier &DF; + + /// AST - An AliasSetTracker object to update. If null, don't update it. + /// + AliasSetTracker *AST; + + /// AllocaLookup - Reverse mapping of Allocas. + /// + std::map AllocaLookup; + + /// NewPhiNodes - The PhiNodes we're adding. + /// + DenseMap, PHINode*> NewPhiNodes; + + /// PhiToAllocaMap - For each PHI node, keep track of which entry in Allocas + /// it corresponds to. + DenseMap PhiToAllocaMap; + + /// PointerAllocaValues - If we are updating an AliasSetTracker, then for + /// each alloca that is of pointer type, we keep track of what to copyValue + /// to the inserted PHI nodes here. + /// + std::vector PointerAllocaValues; + + /// Visited - The set of basic blocks the renamer has already visited. + /// + SmallPtrSet Visited; + + /// BBNumbers - Contains a stable numbering of basic blocks to avoid + /// non-determinstic behavior. + DenseMap BBNumbers; + + /// BBNumPreds - Lazily compute the number of predecessors a block has. + DenseMap BBNumPreds; + public: + PromoteMem2Reg(const std::vector &A, DominatorTree &dt, + DominanceFrontier &df, AliasSetTracker *ast) + : Allocas(A), DT(dt), DF(df), AST(ast) {} + + void run(); + + /// properlyDominates - Return true if I1 properly dominates I2. + /// + bool properlyDominates(Instruction *I1, Instruction *I2) const { + if (InvokeInst *II = dyn_cast(I1)) + I1 = II->getNormalDest()->begin(); + return DT.properlyDominates(I1->getParent(), I2->getParent()); + } + + /// dominates - Return true if BB1 dominates BB2 using the DominatorTree. + /// + bool dominates(BasicBlock *BB1, BasicBlock *BB2) const { + return DT.dominates(BB1, BB2); + } + + private: + void RemoveFromAllocasList(unsigned &AllocaIdx) { + Allocas[AllocaIdx] = Allocas.back(); + Allocas.pop_back(); + --AllocaIdx; + } + + unsigned getNumPreds(const BasicBlock *BB) { + unsigned &NP = BBNumPreds[BB]; + if (NP == 0) + NP = std::distance(pred_begin(BB), pred_end(BB))+1; + return NP-1; + } + + void DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum, + AllocaInfo &Info); + void ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info, + const SmallPtrSet &DefBlocks, + SmallPtrSet &LiveInBlocks); + + void RewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info, + LargeBlockInfo &LBI); + void PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info, + LargeBlockInfo &LBI); + + + void RenamePass(BasicBlock *BB, BasicBlock *Pred, + RenamePassData::ValVector &IncVals, + std::vector &Worklist); + bool QueuePhiNode(BasicBlock *BB, unsigned AllocaIdx, unsigned &Version, + SmallPtrSet &InsertedPHINodes); + }; + + struct AllocaInfo { + std::vector DefiningBlocks; + std::vector UsingBlocks; + + StoreInst *OnlyStore; + BasicBlock *OnlyBlock; + bool OnlyUsedInOneBlock; + + Value *AllocaPointerVal; + + void clear() { + DefiningBlocks.clear(); + UsingBlocks.clear(); + OnlyStore = 0; + OnlyBlock = 0; + OnlyUsedInOneBlock = true; + AllocaPointerVal = 0; + } + + /// AnalyzeAlloca - Scan the uses of the specified alloca, filling in our + /// ivars. + void AnalyzeAlloca(AllocaInst *AI) { + clear(); + + // As we scan the uses of the alloca instruction, keep track of stores, + // and decide whether all of the loads and stores to the alloca are within + // the same basic block. + for (Value::use_iterator U = AI->use_begin(), E = AI->use_end(); + U != E;) { + Instruction *User = cast(*U); + ++U; + if (BitCastInst *BC = dyn_cast(User)) { + // Remove any uses of this alloca in DbgInfoInstrinsics. + assert(BC->hasOneUse() && "Unexpected alloca uses!"); + DbgInfoIntrinsic *DI = cast(*BC->use_begin()); + DI->eraseFromParent(); + BC->eraseFromParent(); + continue; + } + else if (StoreInst *SI = dyn_cast(User)) { + // Remember the basic blocks which define new values for the alloca + DefiningBlocks.push_back(SI->getParent()); + AllocaPointerVal = SI->getOperand(0); + OnlyStore = SI; + } else { + LoadInst *LI = cast(User); + // Otherwise it must be a load instruction, keep track of variable + // reads. + UsingBlocks.push_back(LI->getParent()); + AllocaPointerVal = LI; + } + + if (OnlyUsedInOneBlock) { + if (OnlyBlock == 0) + OnlyBlock = User->getParent(); + else if (OnlyBlock != User->getParent()) + OnlyUsedInOneBlock = false; + } + } + } + }; +} // end of anonymous namespace + + +void PromoteMem2Reg::run() { + Function &F = *DF.getRoot()->getParent(); + + if (AST) PointerAllocaValues.resize(Allocas.size()); + + AllocaInfo Info; + LargeBlockInfo LBI; + + for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) { + AllocaInst *AI = Allocas[AllocaNum]; + + assert(isAllocaPromotable(AI) && + "Cannot promote non-promotable alloca!"); + assert(AI->getParent()->getParent() == &F && + "All allocas should be in the same function, which is same as DF!"); + + if (AI->use_empty()) { + // If there are no uses of the alloca, just delete it now. + if (AST) AST->deleteValue(AI); + AI->eraseFromParent(); + + // Remove the alloca from the Allocas list, since it has been processed + RemoveFromAllocasList(AllocaNum); + ++NumDeadAlloca; + continue; + } + + // Calculate the set of read and write-locations for each alloca. This is + // analogous to finding the 'uses' and 'definitions' of each variable. + Info.AnalyzeAlloca(AI); + + // If there is only a single store to this value, replace any loads of + // it that are directly dominated by the definition with the value stored. + if (Info.DefiningBlocks.size() == 1) { + RewriteSingleStoreAlloca(AI, Info, LBI); + + // Finally, after the scan, check to see if the store is all that is left. + if (Info.UsingBlocks.empty()) { + // Remove the (now dead) store and alloca. + Info.OnlyStore->eraseFromParent(); + LBI.deleteValue(Info.OnlyStore); + + if (AST) AST->deleteValue(AI); + AI->eraseFromParent(); + LBI.deleteValue(AI); + + // The alloca has been processed, move on. + RemoveFromAllocasList(AllocaNum); + + ++NumSingleStore; + continue; + } + } + + // If the alloca is only read and written in one basic block, just perform a + // linear sweep over the block to eliminate it. + if (Info.OnlyUsedInOneBlock) { + PromoteSingleBlockAlloca(AI, Info, LBI); + + // Finally, after the scan, check to see if the stores are all that is + // left. + if (Info.UsingBlocks.empty()) { + + // Remove the (now dead) stores and alloca. + while (!AI->use_empty()) { + StoreInst *SI = cast(AI->use_back()); + SI->eraseFromParent(); + LBI.deleteValue(SI); + } + + if (AST) AST->deleteValue(AI); + AI->eraseFromParent(); + LBI.deleteValue(AI); + + // The alloca has been processed, move on. + RemoveFromAllocasList(AllocaNum); + + ++NumLocalPromoted; + continue; + } + } + + // If we haven't computed a numbering for the BB's in the function, do so + // now. + if (BBNumbers.empty()) { + unsigned ID = 0; + for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) + BBNumbers[I] = ID++; + } + + // If we have an AST to keep updated, remember some pointer value that is + // stored into the alloca. + if (AST) + PointerAllocaValues[AllocaNum] = Info.AllocaPointerVal; + + // Keep the reverse mapping of the 'Allocas' array for the rename pass. + AllocaLookup[Allocas[AllocaNum]] = AllocaNum; + + // At this point, we're committed to promoting the alloca using IDF's, and + // the standard SSA construction algorithm. Determine which blocks need PHI + // nodes and see if we can optimize out some work by avoiding insertion of + // dead phi nodes. + DetermineInsertionPoint(AI, AllocaNum, Info); + } + + if (Allocas.empty()) + return; // All of the allocas must have been trivial! + + LBI.clear(); + + + // Set the incoming values for the basic block to be null values for all of + // the alloca's. We do this in case there is a load of a value that has not + // been stored yet. In this case, it will get this null value. + // + RenamePassData::ValVector Values(Allocas.size()); + for (unsigned i = 0, e = Allocas.size(); i != e; ++i) + Values[i] = UndefValue::get(Allocas[i]->getAllocatedType()); + + // Walks all basic blocks in the function performing the SSA rename algorithm + // and inserting the phi nodes we marked as necessary + // + std::vector RenamePassWorkList; + RenamePassWorkList.push_back(RenamePassData(F.begin(), 0, Values)); + while (!RenamePassWorkList.empty()) { + RenamePassData RPD; + RPD.swap(RenamePassWorkList.back()); + RenamePassWorkList.pop_back(); + // RenamePass may add new worklist entries. + RenamePass(RPD.BB, RPD.Pred, RPD.Values, RenamePassWorkList); + } + + // The renamer uses the Visited set to avoid infinite loops. Clear it now. + Visited.clear(); + + // Remove the allocas themselves from the function. + for (unsigned i = 0, e = Allocas.size(); i != e; ++i) { + Instruction *A = Allocas[i]; + + // If there are any uses of the alloca instructions left, they must be in + // sections of dead code that were not processed on the dominance frontier. + // Just delete the users now. + // + if (!A->use_empty()) + A->replaceAllUsesWith(UndefValue::get(A->getType())); + if (AST) AST->deleteValue(A); + A->eraseFromParent(); + } + + + // Loop over all of the PHI nodes and see if there are any that we can get + // rid of because they merge all of the same incoming values. This can + // happen due to undef values coming into the PHI nodes. This process is + // iterative, because eliminating one PHI node can cause others to be removed. + bool EliminatedAPHI = true; + while (EliminatedAPHI) { + EliminatedAPHI = false; + + for (DenseMap, PHINode*>::iterator I = + NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E;) { + PHINode *PN = I->second; + + // If this PHI node merges one value and/or undefs, get the value. + if (Value *V = PN->hasConstantValue(true)) { + if (!isa(V) || + properlyDominates(cast(V), PN)) { + if (AST && isa(PN->getType())) + AST->deleteValue(PN); + PN->replaceAllUsesWith(V); + PN->eraseFromParent(); + NewPhiNodes.erase(I++); + EliminatedAPHI = true; + continue; + } + } + ++I; + } + } + + // At this point, the renamer has added entries to PHI nodes for all reachable + // code. Unfortunately, there may be unreachable blocks which the renamer + // hasn't traversed. If this is the case, the PHI nodes may not + // have incoming values for all predecessors. Loop over all PHI nodes we have + // created, inserting undef values if they are missing any incoming values. + // + for (DenseMap, PHINode*>::iterator I = + NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E; ++I) { + // We want to do this once per basic block. As such, only process a block + // when we find the PHI that is the first entry in the block. + PHINode *SomePHI = I->second; + BasicBlock *BB = SomePHI->getParent(); + if (&BB->front() != SomePHI) + continue; + + // Only do work here if there the PHI nodes are missing incoming values. We + // know that all PHI nodes that were inserted in a block will have the same + // number of incoming values, so we can just check any of them. + if (SomePHI->getNumIncomingValues() == getNumPreds(BB)) + continue; + + // Get the preds for BB. + SmallVector Preds(pred_begin(BB), pred_end(BB)); + + // Ok, now we know that all of the PHI nodes are missing entries for some + // basic blocks. Start by sorting the incoming predecessors for efficient + // access. + std::sort(Preds.begin(), Preds.end()); + + // Now we loop through all BB's which have entries in SomePHI and remove + // them from the Preds list. + for (unsigned i = 0, e = SomePHI->getNumIncomingValues(); i != e; ++i) { + // Do a log(n) search of the Preds list for the entry we want. + SmallVector::iterator EntIt = + std::lower_bound(Preds.begin(), Preds.end(), + SomePHI->getIncomingBlock(i)); + assert(EntIt != Preds.end() && *EntIt == SomePHI->getIncomingBlock(i)&& + "PHI node has entry for a block which is not a predecessor!"); + + // Remove the entry + Preds.erase(EntIt); + } + + // At this point, the blocks left in the preds list must have dummy + // entries inserted into every PHI nodes for the block. Update all the phi + // nodes in this block that we are inserting (there could be phis before + // mem2reg runs). + unsigned NumBadPreds = SomePHI->getNumIncomingValues(); + BasicBlock::iterator BBI = BB->begin(); + while ((SomePHI = dyn_cast(BBI++)) && + SomePHI->getNumIncomingValues() == NumBadPreds) { + Value *UndefVal = UndefValue::get(SomePHI->getType()); + for (unsigned pred = 0, e = Preds.size(); pred != e; ++pred) + SomePHI->addIncoming(UndefVal, Preds[pred]); + } + } + + NewPhiNodes.clear(); +} + + +/// ComputeLiveInBlocks - Determine which blocks the value is live in. These +/// are blocks which lead to uses. Knowing this allows us to avoid inserting +/// PHI nodes into blocks which don't lead to uses (thus, the inserted phi nodes +/// would be dead). +void PromoteMem2Reg:: +ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info, + const SmallPtrSet &DefBlocks, + SmallPtrSet &LiveInBlocks) { + + // To determine liveness, we must iterate through the predecessors of blocks + // where the def is live. Blocks are added to the worklist if we need to + // check their predecessors. Start with all the using blocks. + SmallVector LiveInBlockWorklist; + LiveInBlockWorklist.insert(LiveInBlockWorklist.end(), + Info.UsingBlocks.begin(), Info.UsingBlocks.end()); + + // If any of the using blocks is also a definition block, check to see if the + // definition occurs before or after the use. If it happens before the use, + // the value isn't really live-in. + for (unsigned i = 0, e = LiveInBlockWorklist.size(); i != e; ++i) { + BasicBlock *BB = LiveInBlockWorklist[i]; + if (!DefBlocks.count(BB)) continue; + + // Okay, this is a block that both uses and defines the value. If the first + // reference to the alloca is a def (store), then we know it isn't live-in. + for (BasicBlock::iterator I = BB->begin(); ; ++I) { + if (StoreInst *SI = dyn_cast(I)) { + if (SI->getOperand(1) != AI) continue; + + // We found a store to the alloca before a load. The alloca is not + // actually live-in here. + LiveInBlockWorklist[i] = LiveInBlockWorklist.back(); + LiveInBlockWorklist.pop_back(); + --i, --e; + break; + } else if (LoadInst *LI = dyn_cast(I)) { + if (LI->getOperand(0) != AI) continue; + + // Okay, we found a load before a store to the alloca. It is actually + // live into this block. + break; + } + } + } + + // Now that we have a set of blocks where the phi is live-in, recursively add + // their predecessors until we find the full region the value is live. + while (!LiveInBlockWorklist.empty()) { + BasicBlock *BB = LiveInBlockWorklist.pop_back_val(); + + // The block really is live in here, insert it into the set. If already in + // the set, then it has already been processed. + if (!LiveInBlocks.insert(BB)) + continue; + + // Since the value is live into BB, it is either defined in a predecessor or + // live into it to. Add the preds to the worklist unless they are a + // defining block. + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { + BasicBlock *P = *PI; + + // The value is not live into a predecessor if it defines the value. + if (DefBlocks.count(P)) + continue; + + // Otherwise it is, add to the worklist. + LiveInBlockWorklist.push_back(P); + } + } +} + +/// DetermineInsertionPoint - At this point, we're committed to promoting the +/// alloca using IDF's, and the standard SSA construction algorithm. Determine +/// which blocks need phi nodes and see if we can optimize out some work by +/// avoiding insertion of dead phi nodes. +void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum, + AllocaInfo &Info) { + + // Unique the set of defining blocks for efficient lookup. + SmallPtrSet DefBlocks; + DefBlocks.insert(Info.DefiningBlocks.begin(), Info.DefiningBlocks.end()); + + // Determine which blocks the value is live in. These are blocks which lead + // to uses. + SmallPtrSet LiveInBlocks; + ComputeLiveInBlocks(AI, Info, DefBlocks, LiveInBlocks); + + // Compute the locations where PhiNodes need to be inserted. Look at the + // dominance frontier of EACH basic-block we have a write in. + unsigned CurrentVersion = 0; + SmallPtrSet InsertedPHINodes; + std::vector > DFBlocks; + while (!Info.DefiningBlocks.empty()) { + BasicBlock *BB = Info.DefiningBlocks.back(); + Info.DefiningBlocks.pop_back(); + + // Look up the DF for this write, add it to defining blocks. + DominanceFrontier::const_iterator it = DF.find(BB); + if (it == DF.end()) continue; + + const DominanceFrontier::DomSetType &S = it->second; + + // In theory we don't need the indirection through the DFBlocks vector. + // In practice, the order of calling QueuePhiNode would depend on the + // (unspecified) ordering of basic blocks in the dominance frontier, + // which would give PHI nodes non-determinstic subscripts. Fix this by + // processing blocks in order of the occurance in the function. + for (DominanceFrontier::DomSetType::const_iterator P = S.begin(), + PE = S.end(); P != PE; ++P) { + // If the frontier block is not in the live-in set for the alloca, don't + // bother processing it. + if (!LiveInBlocks.count(*P)) + continue; + + DFBlocks.push_back(std::make_pair(BBNumbers[*P], *P)); + } + + // Sort by which the block ordering in the function. + if (DFBlocks.size() > 1) + std::sort(DFBlocks.begin(), DFBlocks.end()); + + for (unsigned i = 0, e = DFBlocks.size(); i != e; ++i) { + BasicBlock *BB = DFBlocks[i].second; + if (QueuePhiNode(BB, AllocaNum, CurrentVersion, InsertedPHINodes)) + Info.DefiningBlocks.push_back(BB); + } + DFBlocks.clear(); + } +} + +/// RewriteSingleStoreAlloca - If there is only a single store to this value, +/// replace any loads of it that are directly dominated by the definition with +/// the value stored. +void PromoteMem2Reg::RewriteSingleStoreAlloca(AllocaInst *AI, + AllocaInfo &Info, + LargeBlockInfo &LBI) { + StoreInst *OnlyStore = Info.OnlyStore; + bool StoringGlobalVal = !isa(OnlyStore->getOperand(0)); + BasicBlock *StoreBB = OnlyStore->getParent(); + int StoreIndex = -1; + + // Clear out UsingBlocks. We will reconstruct it here if needed. + Info.UsingBlocks.clear(); + + for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E; ) { + Instruction *UserInst = cast(*UI++); + if (!isa(UserInst)) { + assert(UserInst == OnlyStore && "Should only have load/stores"); + continue; + } + LoadInst *LI = cast(UserInst); + + // Okay, if we have a load from the alloca, we want to replace it with the + // only value stored to the alloca. We can do this if the value is + // dominated by the store. If not, we use the rest of the mem2reg machinery + // to insert the phi nodes as needed. + if (!StoringGlobalVal) { // Non-instructions are always dominated. + if (LI->getParent() == StoreBB) { + // If we have a use that is in the same block as the store, compare the + // indices of the two instructions to see which one came first. If the + // load came before the store, we can't handle it. + if (StoreIndex == -1) + StoreIndex = LBI.getInstructionIndex(OnlyStore); + + if (unsigned(StoreIndex) > LBI.getInstructionIndex(LI)) { + // Can't handle this load, bail out. + Info.UsingBlocks.push_back(StoreBB); + continue; + } + + } else if (LI->getParent() != StoreBB && + !dominates(StoreBB, LI->getParent())) { + // If the load and store are in different blocks, use BB dominance to + // check their relationships. If the store doesn't dom the use, bail + // out. + Info.UsingBlocks.push_back(LI->getParent()); + continue; + } + } + + // Otherwise, we *can* safely rewrite this load. + LI->replaceAllUsesWith(OnlyStore->getOperand(0)); + if (AST && isa(LI->getType())) + AST->deleteValue(LI); + LI->eraseFromParent(); + LBI.deleteValue(LI); + } +} + + +/// StoreIndexSearchPredicate - This is a helper predicate used to search by the +/// first element of a pair. +struct StoreIndexSearchPredicate { + bool operator()(const std::pair &LHS, + const std::pair &RHS) { + return LHS.first < RHS.first; + } +}; + +/// PromoteSingleBlockAlloca - Many allocas are only used within a single basic +/// block. If this is the case, avoid traversing the CFG and inserting a lot of +/// potentially useless PHI nodes by just performing a single linear pass over +/// the basic block using the Alloca. +/// +/// If we cannot promote this alloca (because it is read before it is written), +/// return true. This is necessary in cases where, due to control flow, the +/// alloca is potentially undefined on some control flow paths. e.g. code like +/// this is potentially correct: +/// +/// for (...) { if (c) { A = undef; undef = B; } } +/// +/// ... so long as A is not used before undef is set. +/// +void PromoteMem2Reg::PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info, + LargeBlockInfo &LBI) { + // The trickiest case to handle is when we have large blocks. Because of this, + // this code is optimized assuming that large blocks happen. This does not + // significantly pessimize the small block case. This uses LargeBlockInfo to + // make it efficient to get the index of various operations in the block. + + // Clear out UsingBlocks. We will reconstruct it here if needed. + Info.UsingBlocks.clear(); + + // Walk the use-def list of the alloca, getting the locations of all stores. + typedef SmallVector, 64> StoresByIndexTy; + StoresByIndexTy StoresByIndex; + + for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); + UI != E; ++UI) + if (StoreInst *SI = dyn_cast(*UI)) + StoresByIndex.push_back(std::make_pair(LBI.getInstructionIndex(SI), SI)); + + // If there are no stores to the alloca, just replace any loads with undef. + if (StoresByIndex.empty()) { + for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) + if (LoadInst *LI = dyn_cast(*UI++)) { + LI->replaceAllUsesWith(UndefValue::get(LI->getType())); + if (AST && isa(LI->getType())) + AST->deleteValue(LI); + LBI.deleteValue(LI); + LI->eraseFromParent(); + } + return; + } + + // Sort the stores by their index, making it efficient to do a lookup with a + // binary search. + std::sort(StoresByIndex.begin(), StoresByIndex.end()); + + // Walk all of the loads from this alloca, replacing them with the nearest + // store above them, if any. + for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) { + LoadInst *LI = dyn_cast(*UI++); + if (!LI) continue; + + unsigned LoadIdx = LBI.getInstructionIndex(LI); + + // Find the nearest store that has a lower than this load. + StoresByIndexTy::iterator I = + std::lower_bound(StoresByIndex.begin(), StoresByIndex.end(), + std::pair(LoadIdx, 0), + StoreIndexSearchPredicate()); + + // If there is no store before this load, then we can't promote this load. + if (I == StoresByIndex.begin()) { + // Can't handle this load, bail out. + Info.UsingBlocks.push_back(LI->getParent()); + continue; + } + + // Otherwise, there was a store before this load, the load takes its value. + --I; + LI->replaceAllUsesWith(I->second->getOperand(0)); + if (AST && isa(LI->getType())) + AST->deleteValue(LI); + LI->eraseFromParent(); + LBI.deleteValue(LI); + } +} + + +// QueuePhiNode - queues a phi-node to be added to a basic-block for a specific +// Alloca returns true if there wasn't already a phi-node for that variable +// +bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo, + unsigned &Version, + SmallPtrSet &InsertedPHINodes) { + // Look up the basic-block in question. + PHINode *&PN = NewPhiNodes[std::make_pair(BB, AllocaNo)]; + + // If the BB already has a phi node added for the i'th alloca then we're done! + if (PN) return false; + + // Create a PhiNode using the dereferenced type... and add the phi-node to the + // BasicBlock. + PN = PHINode::Create(Allocas[AllocaNo]->getAllocatedType(), + Allocas[AllocaNo]->getName() + "." + + utostr(Version++), BB->begin()); + ++NumPHIInsert; + PhiToAllocaMap[PN] = AllocaNo; + PN->reserveOperandSpace(getNumPreds(BB)); + + InsertedPHINodes.insert(PN); + + if (AST && isa(PN->getType())) + AST->copyValue(PointerAllocaValues[AllocaNo], PN); + + return true; +} + +// RenamePass - Recursively traverse the CFG of the function, renaming loads and +// stores to the allocas which we are promoting. IncomingVals indicates what +// value each Alloca contains on exit from the predecessor block Pred. +// +void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred, + RenamePassData::ValVector &IncomingVals, + std::vector &Worklist) { +NextIteration: + // If we are inserting any phi nodes into this BB, they will already be in the + // block. + if (PHINode *APN = dyn_cast(BB->begin())) { + // If we have PHI nodes to update, compute the number of edges from Pred to + // BB. + if (PhiToAllocaMap.count(APN)) { + // We want to be able to distinguish between PHI nodes being inserted by + // this invocation of mem2reg from those phi nodes that already existed in + // the IR before mem2reg was run. We determine that APN is being inserted + // because it is missing incoming edges. All other PHI nodes being + // inserted by this pass of mem2reg will have the same number of incoming + // operands so far. Remember this count. + unsigned NewPHINumOperands = APN->getNumOperands(); + + unsigned NumEdges = 0; + for (succ_iterator I = succ_begin(Pred), E = succ_end(Pred); I != E; ++I) + if (*I == BB) + ++NumEdges; + assert(NumEdges && "Must be at least one edge from Pred to BB!"); + + // Add entries for all the phis. + BasicBlock::iterator PNI = BB->begin(); + do { + unsigned AllocaNo = PhiToAllocaMap[APN]; + + // Add N incoming values to the PHI node. + for (unsigned i = 0; i != NumEdges; ++i) + APN->addIncoming(IncomingVals[AllocaNo], Pred); + + // The currently active variable for this block is now the PHI. + IncomingVals[AllocaNo] = APN; + + // Get the next phi node. + ++PNI; + APN = dyn_cast(PNI); + if (APN == 0) break; + + // Verify that it is missing entries. If not, it is not being inserted + // by this mem2reg invocation so we want to ignore it. + } while (APN->getNumOperands() == NewPHINumOperands); + } + } + + // Don't revisit blocks. + if (!Visited.insert(BB)) return; + + for (BasicBlock::iterator II = BB->begin(); !isa(II); ) { + Instruction *I = II++; // get the instruction, increment iterator + + if (LoadInst *LI = dyn_cast(I)) { + AllocaInst *Src = dyn_cast(LI->getPointerOperand()); + if (!Src) continue; + + std::map::iterator AI = AllocaLookup.find(Src); + if (AI == AllocaLookup.end()) continue; + + Value *V = IncomingVals[AI->second]; + + // Anything using the load now uses the current value. + LI->replaceAllUsesWith(V); + if (AST && isa(LI->getType())) + AST->deleteValue(LI); + BB->getInstList().erase(LI); + } else if (StoreInst *SI = dyn_cast(I)) { + // Delete this instruction and mark the name as the current holder of the + // value + AllocaInst *Dest = dyn_cast(SI->getPointerOperand()); + if (!Dest) continue; + + std::map::iterator ai = AllocaLookup.find(Dest); + if (ai == AllocaLookup.end()) + continue; + + // what value were we writing? + IncomingVals[ai->second] = SI->getOperand(0); + BB->getInstList().erase(SI); + } + } + + // 'Recurse' to our successors. + succ_iterator I = succ_begin(BB), E = succ_end(BB); + if (I == E) return; + + // Keep track of the successors so we don't visit the same successor twice + SmallPtrSet VisitedSuccs; + + // Handle the first successor without using the worklist. + VisitedSuccs.insert(*I); + Pred = BB; + BB = *I; + ++I; + + for (; I != E; ++I) + if (VisitedSuccs.insert(*I)) + Worklist.push_back(RenamePassData(*I, Pred, IncomingVals)); + + goto NextIteration; +} + +/// PromoteMemToReg - Promote the specified list of alloca instructions into +/// scalar registers, inserting PHI nodes as appropriate. This function makes +/// use of DominanceFrontier information. This function does not modify the CFG +/// of the function at all. All allocas must be from the same function. +/// +/// If AST is specified, the specified tracker is updated to reflect changes +/// made to the IR. +/// +void llvm::PromoteMemToReg(const std::vector &Allocas, + DominatorTree &DT, DominanceFrontier &DF, + AliasSetTracker *AST) { + // If there is nothing to do, bail out... + if (Allocas.empty()) return; + + PromoteMem2Reg(Allocas, DT, DF, AST).run(); +} diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp new file mode 100644 index 000000000000..2cde765560b8 --- /dev/null +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -0,0 +1,2213 @@ +//===- SimplifyCFG.cpp - Code to perform CFG simplification ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Peephole optimize the CFG. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "simplifycfg" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Constants.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Type.h" +#include "llvm/DerivedTypes.h" +#include "llvm/GlobalVariable.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Debug.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include +#include +#include +#include +using namespace llvm; + +STATISTIC(NumSpeculations, "Number of speculative executed instructions"); + +/// SafeToMergeTerminators - Return true if it is safe to merge these two +/// terminator instructions together. +/// +static bool SafeToMergeTerminators(TerminatorInst *SI1, TerminatorInst *SI2) { + if (SI1 == SI2) return false; // Can't merge with self! + + // It is not safe to merge these two switch instructions if they have a common + // successor, and if that successor has a PHI node, and if *that* PHI node has + // conflicting incoming values from the two switch blocks. + BasicBlock *SI1BB = SI1->getParent(); + BasicBlock *SI2BB = SI2->getParent(); + SmallPtrSet SI1Succs(succ_begin(SI1BB), succ_end(SI1BB)); + + for (succ_iterator I = succ_begin(SI2BB), E = succ_end(SI2BB); I != E; ++I) + if (SI1Succs.count(*I)) + for (BasicBlock::iterator BBI = (*I)->begin(); + isa(BBI); ++BBI) { + PHINode *PN = cast(BBI); + if (PN->getIncomingValueForBlock(SI1BB) != + PN->getIncomingValueForBlock(SI2BB)) + return false; + } + + return true; +} + +/// AddPredecessorToBlock - Update PHI nodes in Succ to indicate that there will +/// now be entries in it from the 'NewPred' block. The values that will be +/// flowing into the PHI nodes will be the same as those coming in from +/// ExistPred, an existing predecessor of Succ. +static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred, + BasicBlock *ExistPred) { + assert(std::find(succ_begin(ExistPred), succ_end(ExistPred), Succ) != + succ_end(ExistPred) && "ExistPred is not a predecessor of Succ!"); + if (!isa(Succ->begin())) return; // Quick exit if nothing to do + + PHINode *PN; + for (BasicBlock::iterator I = Succ->begin(); + (PN = dyn_cast(I)); ++I) + PN->addIncoming(PN->getIncomingValueForBlock(ExistPred), NewPred); +} + +/// CanPropagatePredecessorsForPHIs - Return true if we can fold BB, an +/// almost-empty BB ending in an unconditional branch to Succ, into succ. +/// +/// Assumption: Succ is the single successor for BB. +/// +static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) { + assert(*succ_begin(BB) == Succ && "Succ is not successor of BB!"); + + DOUT << "Looking to fold " << BB->getNameStart() << " into " + << Succ->getNameStart() << "\n"; + // Shortcut, if there is only a single predecessor it must be BB and merging + // is always safe + if (Succ->getSinglePredecessor()) return true; + + typedef SmallPtrSet InstrSet; + InstrSet BBPHIs; + + // Make a list of all phi nodes in BB + BasicBlock::iterator BBI = BB->begin(); + while (isa(*BBI)) BBPHIs.insert(BBI++); + + // Make a list of the predecessors of BB + typedef SmallPtrSet BlockSet; + BlockSet BBPreds(pred_begin(BB), pred_end(BB)); + + // Use that list to make another list of common predecessors of BB and Succ + BlockSet CommonPreds; + for (pred_iterator PI = pred_begin(Succ), PE = pred_end(Succ); + PI != PE; ++PI) + if (BBPreds.count(*PI)) + CommonPreds.insert(*PI); + + // Shortcut, if there are no common predecessors, merging is always safe + if (CommonPreds.empty()) + return true; + + // Look at all the phi nodes in Succ, to see if they present a conflict when + // merging these blocks + for (BasicBlock::iterator I = Succ->begin(); isa(I); ++I) { + PHINode *PN = cast(I); + + // If the incoming value from BB is again a PHINode in + // BB which has the same incoming value for *PI as PN does, we can + // merge the phi nodes and then the blocks can still be merged + PHINode *BBPN = dyn_cast(PN->getIncomingValueForBlock(BB)); + if (BBPN && BBPN->getParent() == BB) { + for (BlockSet::iterator PI = CommonPreds.begin(), PE = CommonPreds.end(); + PI != PE; PI++) { + if (BBPN->getIncomingValueForBlock(*PI) + != PN->getIncomingValueForBlock(*PI)) { + DOUT << "Can't fold, phi node " << *PN->getNameStart() << " in " + << Succ->getNameStart() << " is conflicting with " + << BBPN->getNameStart() << " with regard to common predecessor " + << (*PI)->getNameStart() << "\n"; + return false; + } + } + // Remove this phinode from the list of phis in BB, since it has been + // handled. + BBPHIs.erase(BBPN); + } else { + Value* Val = PN->getIncomingValueForBlock(BB); + for (BlockSet::iterator PI = CommonPreds.begin(), PE = CommonPreds.end(); + PI != PE; PI++) { + // See if the incoming value for the common predecessor is equal to the + // one for BB, in which case this phi node will not prevent the merging + // of the block. + if (Val != PN->getIncomingValueForBlock(*PI)) { + DOUT << "Can't fold, phi node " << *PN->getNameStart() << " in " + << Succ->getNameStart() << " is conflicting with regard to common " + << "predecessor " << (*PI)->getNameStart() << "\n"; + return false; + } + } + } + } + + // If there are any other phi nodes in BB that don't have a phi node in Succ + // to merge with, they must be moved to Succ completely. However, for any + // predecessors of Succ, branches will be added to the phi node that just + // point to itself. So, for any common predecessors, this must not cause + // conflicts. + for (InstrSet::iterator I = BBPHIs.begin(), E = BBPHIs.end(); + I != E; I++) { + PHINode *PN = cast(*I); + for (BlockSet::iterator PI = CommonPreds.begin(), PE = CommonPreds.end(); + PI != PE; PI++) + if (PN->getIncomingValueForBlock(*PI) != PN) { + DOUT << "Can't fold, phi node " << *PN->getNameStart() << " in " + << BB->getNameStart() << " is conflicting with regard to common " + << "predecessor " << (*PI)->getNameStart() << "\n"; + return false; + } + } + + return true; +} + +/// TryToSimplifyUncondBranchFromEmptyBlock - BB contains an unconditional +/// branch to Succ, and contains no instructions other than PHI nodes and the +/// branch. If possible, eliminate BB. +static bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, + BasicBlock *Succ) { + // Check to see if merging these blocks would cause conflicts for any of the + // phi nodes in BB or Succ. If not, we can safely merge. + if (!CanPropagatePredecessorsForPHIs(BB, Succ)) return false; + + DOUT << "Killing Trivial BB: \n" << *BB; + + if (isa(Succ->begin())) { + // If there is more than one pred of succ, and there are PHI nodes in + // the successor, then we need to add incoming edges for the PHI nodes + // + const SmallVector BBPreds(pred_begin(BB), pred_end(BB)); + + // Loop over all of the PHI nodes in the successor of BB. + for (BasicBlock::iterator I = Succ->begin(); isa(I); ++I) { + PHINode *PN = cast(I); + Value *OldVal = PN->removeIncomingValue(BB, false); + assert(OldVal && "No entry in PHI for Pred BB!"); + + // If this incoming value is one of the PHI nodes in BB, the new entries + // in the PHI node are the entries from the old PHI. + if (isa(OldVal) && cast(OldVal)->getParent() == BB) { + PHINode *OldValPN = cast(OldVal); + for (unsigned i = 0, e = OldValPN->getNumIncomingValues(); i != e; ++i) + // Note that, since we are merging phi nodes and BB and Succ might + // have common predecessors, we could end up with a phi node with + // identical incoming branches. This will be cleaned up later (and + // will trigger asserts if we try to clean it up now, without also + // simplifying the corresponding conditional branch). + PN->addIncoming(OldValPN->getIncomingValue(i), + OldValPN->getIncomingBlock(i)); + } else { + // Add an incoming value for each of the new incoming values. + for (unsigned i = 0, e = BBPreds.size(); i != e; ++i) + PN->addIncoming(OldVal, BBPreds[i]); + } + } + } + + if (isa(&BB->front())) { + SmallVector + OldSuccPreds(pred_begin(Succ), pred_end(Succ)); + + // Move all PHI nodes in BB to Succ if they are alive, otherwise + // delete them. + while (PHINode *PN = dyn_cast(&BB->front())) { + if (PN->use_empty()) { + // Just remove the dead phi. This happens if Succ's PHIs were the only + // users of the PHI nodes. + PN->eraseFromParent(); + continue; + } + + // The instruction is alive, so this means that BB must dominate all + // predecessors of Succ (Since all uses of the PN are after its + // definition, so in Succ or a block dominated by Succ. If a predecessor + // of Succ would not be dominated by BB, PN would violate the def before + // use SSA demand). Therefore, we can simply move the phi node to the + // next block. + Succ->getInstList().splice(Succ->begin(), + BB->getInstList(), BB->begin()); + + // We need to add new entries for the PHI node to account for + // predecessors of Succ that the PHI node does not take into + // account. At this point, since we know that BB dominated succ and all + // of its predecessors, this means that we should any newly added + // incoming edges should use the PHI node itself as the value for these + // edges, because they are loop back edges. + for (unsigned i = 0, e = OldSuccPreds.size(); i != e; ++i) + if (OldSuccPreds[i] != BB) + PN->addIncoming(PN, OldSuccPreds[i]); + } + } + + // Everything that jumped to BB now goes to Succ. + BB->replaceAllUsesWith(Succ); + if (!Succ->hasName()) Succ->takeName(BB); + BB->eraseFromParent(); // Delete the old basic block. + return true; +} + +/// GetIfCondition - Given a basic block (BB) with two predecessors (and +/// presumably PHI nodes in it), check to see if the merge at this block is due +/// to an "if condition". If so, return the boolean condition that determines +/// which entry into BB will be taken. Also, return by references the block +/// that will be entered from if the condition is true, and the block that will +/// be entered if the condition is false. +/// +/// +static Value *GetIfCondition(BasicBlock *BB, + BasicBlock *&IfTrue, BasicBlock *&IfFalse) { + assert(std::distance(pred_begin(BB), pred_end(BB)) == 2 && + "Function can only handle blocks with 2 predecessors!"); + BasicBlock *Pred1 = *pred_begin(BB); + BasicBlock *Pred2 = *++pred_begin(BB); + + // We can only handle branches. Other control flow will be lowered to + // branches if possible anyway. + if (!isa(Pred1->getTerminator()) || + !isa(Pred2->getTerminator())) + return 0; + BranchInst *Pred1Br = cast(Pred1->getTerminator()); + BranchInst *Pred2Br = cast(Pred2->getTerminator()); + + // Eliminate code duplication by ensuring that Pred1Br is conditional if + // either are. + if (Pred2Br->isConditional()) { + // If both branches are conditional, we don't have an "if statement". In + // reality, we could transform this case, but since the condition will be + // required anyway, we stand no chance of eliminating it, so the xform is + // probably not profitable. + if (Pred1Br->isConditional()) + return 0; + + std::swap(Pred1, Pred2); + std::swap(Pred1Br, Pred2Br); + } + + if (Pred1Br->isConditional()) { + // If we found a conditional branch predecessor, make sure that it branches + // to BB and Pred2Br. If it doesn't, this isn't an "if statement". + if (Pred1Br->getSuccessor(0) == BB && + Pred1Br->getSuccessor(1) == Pred2) { + IfTrue = Pred1; + IfFalse = Pred2; + } else if (Pred1Br->getSuccessor(0) == Pred2 && + Pred1Br->getSuccessor(1) == BB) { + IfTrue = Pred2; + IfFalse = Pred1; + } else { + // We know that one arm of the conditional goes to BB, so the other must + // go somewhere unrelated, and this must not be an "if statement". + return 0; + } + + // The only thing we have to watch out for here is to make sure that Pred2 + // doesn't have incoming edges from other blocks. If it does, the condition + // doesn't dominate BB. + if (++pred_begin(Pred2) != pred_end(Pred2)) + return 0; + + return Pred1Br->getCondition(); + } + + // Ok, if we got here, both predecessors end with an unconditional branch to + // BB. Don't panic! If both blocks only have a single (identical) + // predecessor, and THAT is a conditional branch, then we're all ok! + if (pred_begin(Pred1) == pred_end(Pred1) || + ++pred_begin(Pred1) != pred_end(Pred1) || + pred_begin(Pred2) == pred_end(Pred2) || + ++pred_begin(Pred2) != pred_end(Pred2) || + *pred_begin(Pred1) != *pred_begin(Pred2)) + return 0; + + // Otherwise, if this is a conditional branch, then we can use it! + BasicBlock *CommonPred = *pred_begin(Pred1); + if (BranchInst *BI = dyn_cast(CommonPred->getTerminator())) { + assert(BI->isConditional() && "Two successors but not conditional?"); + if (BI->getSuccessor(0) == Pred1) { + IfTrue = Pred1; + IfFalse = Pred2; + } else { + IfTrue = Pred2; + IfFalse = Pred1; + } + return BI->getCondition(); + } + return 0; +} + +/// DominatesMergePoint - If we have a merge point of an "if condition" as +/// accepted above, return true if the specified value dominates the block. We +/// don't handle the true generality of domination here, just a special case +/// which works well enough for us. +/// +/// If AggressiveInsts is non-null, and if V does not dominate BB, we check to +/// see if V (which must be an instruction) is cheap to compute and is +/// non-trapping. If both are true, the instruction is inserted into the set +/// and true is returned. +static bool DominatesMergePoint(Value *V, BasicBlock *BB, + std::set *AggressiveInsts) { + Instruction *I = dyn_cast(V); + if (!I) { + // Non-instructions all dominate instructions, but not all constantexprs + // can be executed unconditionally. + if (ConstantExpr *C = dyn_cast(V)) + if (C->canTrap()) + return false; + return true; + } + BasicBlock *PBB = I->getParent(); + + // We don't want to allow weird loops that might have the "if condition" in + // the bottom of this block. + if (PBB == BB) return false; + + // If this instruction is defined in a block that contains an unconditional + // branch to BB, then it must be in the 'conditional' part of the "if + // statement". + if (BranchInst *BI = dyn_cast(PBB->getTerminator())) + if (BI->isUnconditional() && BI->getSuccessor(0) == BB) { + if (!AggressiveInsts) return false; + // Okay, it looks like the instruction IS in the "condition". Check to + // see if its a cheap instruction to unconditionally compute, and if it + // only uses stuff defined outside of the condition. If so, hoist it out. + switch (I->getOpcode()) { + default: return false; // Cannot hoist this out safely. + case Instruction::Load: { + // We can hoist loads that are non-volatile and obviously cannot trap. + if (cast(I)->isVolatile()) + return false; + // FIXME: A computation of a constant can trap! + if (!isa(I->getOperand(0)) && + !isa(I->getOperand(0))) + return false; + // External weak globals may have address 0, so we can't load them. + Value *V2 = I->getOperand(0)->getUnderlyingObject(); + if (V2) { + GlobalVariable* GV = dyn_cast(V2); + if (GV && GV->hasExternalWeakLinkage()) + return false; + } + // Finally, we have to check to make sure there are no instructions + // before the load in its basic block, as we are going to hoist the loop + // out to its predecessor. + BasicBlock::iterator IP = PBB->begin(); + while (isa(IP)) + IP++; + if (IP != BasicBlock::iterator(I)) + return false; + break; + } + case Instruction::Add: + case Instruction::Sub: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + case Instruction::ICmp: + case Instruction::FCmp: + if (I->getOperand(0)->getType()->isFPOrFPVector()) + return false; // FP arithmetic might trap. + break; // These are all cheap and non-trapping instructions. + } + + // Okay, we can only really hoist these out if their operands are not + // defined in the conditional region. + for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i) + if (!DominatesMergePoint(*i, BB, 0)) + return false; + // Okay, it's safe to do this! Remember this instruction. + AggressiveInsts->insert(I); + } + + return true; +} + +/// GatherConstantSetEQs - Given a potentially 'or'd together collection of +/// icmp_eq instructions that compare a value against a constant, return the +/// value being compared, and stick the constant into the Values vector. +static Value *GatherConstantSetEQs(Value *V, std::vector &Values){ + if (Instruction *Inst = dyn_cast(V)) { + if (Inst->getOpcode() == Instruction::ICmp && + cast(Inst)->getPredicate() == ICmpInst::ICMP_EQ) { + if (ConstantInt *C = dyn_cast(Inst->getOperand(1))) { + Values.push_back(C); + return Inst->getOperand(0); + } else if (ConstantInt *C = dyn_cast(Inst->getOperand(0))) { + Values.push_back(C); + return Inst->getOperand(1); + } + } else if (Inst->getOpcode() == Instruction::Or) { + if (Value *LHS = GatherConstantSetEQs(Inst->getOperand(0), Values)) + if (Value *RHS = GatherConstantSetEQs(Inst->getOperand(1), Values)) + if (LHS == RHS) + return LHS; + } + } + return 0; +} + +/// GatherConstantSetNEs - Given a potentially 'and'd together collection of +/// setne instructions that compare a value against a constant, return the value +/// being compared, and stick the constant into the Values vector. +static Value *GatherConstantSetNEs(Value *V, std::vector &Values){ + if (Instruction *Inst = dyn_cast(V)) { + if (Inst->getOpcode() == Instruction::ICmp && + cast(Inst)->getPredicate() == ICmpInst::ICMP_NE) { + if (ConstantInt *C = dyn_cast(Inst->getOperand(1))) { + Values.push_back(C); + return Inst->getOperand(0); + } else if (ConstantInt *C = dyn_cast(Inst->getOperand(0))) { + Values.push_back(C); + return Inst->getOperand(1); + } + } else if (Inst->getOpcode() == Instruction::And) { + if (Value *LHS = GatherConstantSetNEs(Inst->getOperand(0), Values)) + if (Value *RHS = GatherConstantSetNEs(Inst->getOperand(1), Values)) + if (LHS == RHS) + return LHS; + } + } + return 0; +} + +/// GatherValueComparisons - If the specified Cond is an 'and' or 'or' of a +/// bunch of comparisons of one value against constants, return the value and +/// the constants being compared. +static bool GatherValueComparisons(Instruction *Cond, Value *&CompVal, + std::vector &Values) { + if (Cond->getOpcode() == Instruction::Or) { + CompVal = GatherConstantSetEQs(Cond, Values); + + // Return true to indicate that the condition is true if the CompVal is + // equal to one of the constants. + return true; + } else if (Cond->getOpcode() == Instruction::And) { + CompVal = GatherConstantSetNEs(Cond, Values); + + // Return false to indicate that the condition is false if the CompVal is + // equal to one of the constants. + return false; + } + return false; +} + +static void EraseTerminatorInstAndDCECond(TerminatorInst *TI) { + Instruction* Cond = 0; + if (SwitchInst *SI = dyn_cast(TI)) { + Cond = dyn_cast(SI->getCondition()); + } else if (BranchInst *BI = dyn_cast(TI)) { + if (BI->isConditional()) + Cond = dyn_cast(BI->getCondition()); + } + + TI->eraseFromParent(); + if (Cond) RecursivelyDeleteTriviallyDeadInstructions(Cond); +} + +/// isValueEqualityComparison - Return true if the specified terminator checks +/// to see if a value is equal to constant integer value. +static Value *isValueEqualityComparison(TerminatorInst *TI) { + if (SwitchInst *SI = dyn_cast(TI)) { + // Do not permit merging of large switch instructions into their + // predecessors unless there is only one predecessor. + if (SI->getNumSuccessors() * std::distance(pred_begin(SI->getParent()), + pred_end(SI->getParent())) > 128) + return 0; + + return SI->getCondition(); + } + if (BranchInst *BI = dyn_cast(TI)) + if (BI->isConditional() && BI->getCondition()->hasOneUse()) + if (ICmpInst *ICI = dyn_cast(BI->getCondition())) + if ((ICI->getPredicate() == ICmpInst::ICMP_EQ || + ICI->getPredicate() == ICmpInst::ICMP_NE) && + isa(ICI->getOperand(1))) + return ICI->getOperand(0); + return 0; +} + +/// GetValueEqualityComparisonCases - Given a value comparison instruction, +/// decode all of the 'cases' that it represents and return the 'default' block. +static BasicBlock * +GetValueEqualityComparisonCases(TerminatorInst *TI, + std::vector > &Cases) { + if (SwitchInst *SI = dyn_cast(TI)) { + Cases.reserve(SI->getNumCases()); + for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i) + Cases.push_back(std::make_pair(SI->getCaseValue(i), SI->getSuccessor(i))); + return SI->getDefaultDest(); + } + + BranchInst *BI = cast(TI); + ICmpInst *ICI = cast(BI->getCondition()); + Cases.push_back(std::make_pair(cast(ICI->getOperand(1)), + BI->getSuccessor(ICI->getPredicate() == + ICmpInst::ICMP_NE))); + return BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_EQ); +} + + +/// EliminateBlockCases - Given a vector of bb/value pairs, remove any entries +/// in the list that match the specified block. +static void EliminateBlockCases(BasicBlock *BB, + std::vector > &Cases) { + for (unsigned i = 0, e = Cases.size(); i != e; ++i) + if (Cases[i].second == BB) { + Cases.erase(Cases.begin()+i); + --i; --e; + } +} + +/// ValuesOverlap - Return true if there are any keys in C1 that exist in C2 as +/// well. +static bool +ValuesOverlap(std::vector > &C1, + std::vector > &C2) { + std::vector > *V1 = &C1, *V2 = &C2; + + // Make V1 be smaller than V2. + if (V1->size() > V2->size()) + std::swap(V1, V2); + + if (V1->size() == 0) return false; + if (V1->size() == 1) { + // Just scan V2. + ConstantInt *TheVal = (*V1)[0].first; + for (unsigned i = 0, e = V2->size(); i != e; ++i) + if (TheVal == (*V2)[i].first) + return true; + } + + // Otherwise, just sort both lists and compare element by element. + std::sort(V1->begin(), V1->end()); + std::sort(V2->begin(), V2->end()); + unsigned i1 = 0, i2 = 0, e1 = V1->size(), e2 = V2->size(); + while (i1 != e1 && i2 != e2) { + if ((*V1)[i1].first == (*V2)[i2].first) + return true; + if ((*V1)[i1].first < (*V2)[i2].first) + ++i1; + else + ++i2; + } + return false; +} + +/// SimplifyEqualityComparisonWithOnlyPredecessor - If TI is known to be a +/// terminator instruction and its block is known to only have a single +/// predecessor block, check to see if that predecessor is also a value +/// comparison with the same value, and if that comparison determines the +/// outcome of this comparison. If so, simplify TI. This does a very limited +/// form of jump threading. +static bool SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, + BasicBlock *Pred) { + Value *PredVal = isValueEqualityComparison(Pred->getTerminator()); + if (!PredVal) return false; // Not a value comparison in predecessor. + + Value *ThisVal = isValueEqualityComparison(TI); + assert(ThisVal && "This isn't a value comparison!!"); + if (ThisVal != PredVal) return false; // Different predicates. + + // Find out information about when control will move from Pred to TI's block. + std::vector > PredCases; + BasicBlock *PredDef = GetValueEqualityComparisonCases(Pred->getTerminator(), + PredCases); + EliminateBlockCases(PredDef, PredCases); // Remove default from cases. + + // Find information about how control leaves this block. + std::vector > ThisCases; + BasicBlock *ThisDef = GetValueEqualityComparisonCases(TI, ThisCases); + EliminateBlockCases(ThisDef, ThisCases); // Remove default from cases. + + // If TI's block is the default block from Pred's comparison, potentially + // simplify TI based on this knowledge. + if (PredDef == TI->getParent()) { + // If we are here, we know that the value is none of those cases listed in + // PredCases. If there are any cases in ThisCases that are in PredCases, we + // can simplify TI. + if (ValuesOverlap(PredCases, ThisCases)) { + if (isa(TI)) { + // Okay, one of the successors of this condbr is dead. Convert it to a + // uncond br. + assert(ThisCases.size() == 1 && "Branch can only have one case!"); + // Insert the new branch. + Instruction *NI = BranchInst::Create(ThisDef, TI); + + // Remove PHI node entries for the dead edge. + ThisCases[0].second->removePredecessor(TI->getParent()); + + DOUT << "Threading pred instr: " << *Pred->getTerminator() + << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n"; + + EraseTerminatorInstAndDCECond(TI); + return true; + + } else { + SwitchInst *SI = cast(TI); + // Okay, TI has cases that are statically dead, prune them away. + SmallPtrSet DeadCases; + for (unsigned i = 0, e = PredCases.size(); i != e; ++i) + DeadCases.insert(PredCases[i].first); + + DOUT << "Threading pred instr: " << *Pred->getTerminator() + << "Through successor TI: " << *TI; + + for (unsigned i = SI->getNumCases()-1; i != 0; --i) + if (DeadCases.count(SI->getCaseValue(i))) { + SI->getSuccessor(i)->removePredecessor(TI->getParent()); + SI->removeCase(i); + } + + DOUT << "Leaving: " << *TI << "\n"; + return true; + } + } + + } else { + // Otherwise, TI's block must correspond to some matched value. Find out + // which value (or set of values) this is. + ConstantInt *TIV = 0; + BasicBlock *TIBB = TI->getParent(); + for (unsigned i = 0, e = PredCases.size(); i != e; ++i) + if (PredCases[i].second == TIBB) { + if (TIV == 0) + TIV = PredCases[i].first; + else + return false; // Cannot handle multiple values coming to this block. + } + assert(TIV && "No edge from pred to succ?"); + + // Okay, we found the one constant that our value can be if we get into TI's + // BB. Find out which successor will unconditionally be branched to. + BasicBlock *TheRealDest = 0; + for (unsigned i = 0, e = ThisCases.size(); i != e; ++i) + if (ThisCases[i].first == TIV) { + TheRealDest = ThisCases[i].second; + break; + } + + // If not handled by any explicit cases, it is handled by the default case. + if (TheRealDest == 0) TheRealDest = ThisDef; + + // Remove PHI node entries for dead edges. + BasicBlock *CheckEdge = TheRealDest; + for (succ_iterator SI = succ_begin(TIBB), e = succ_end(TIBB); SI != e; ++SI) + if (*SI != CheckEdge) + (*SI)->removePredecessor(TIBB); + else + CheckEdge = 0; + + // Insert the new branch. + Instruction *NI = BranchInst::Create(TheRealDest, TI); + + DOUT << "Threading pred instr: " << *Pred->getTerminator() + << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n"; + + EraseTerminatorInstAndDCECond(TI); + return true; + } + return false; +} + +namespace { + /// ConstantIntOrdering - This class implements a stable ordering of constant + /// integers that does not depend on their address. This is important for + /// applications that sort ConstantInt's to ensure uniqueness. + struct ConstantIntOrdering { + bool operator()(const ConstantInt *LHS, const ConstantInt *RHS) const { + return LHS->getValue().ult(RHS->getValue()); + } + }; +} + +/// FoldValueComparisonIntoPredecessors - The specified terminator is a value +/// equality comparison instruction (either a switch or a branch on "X == c"). +/// See if any of the predecessors of the terminator block are value comparisons +/// on the same value. If so, and if safe to do so, fold them together. +static bool FoldValueComparisonIntoPredecessors(TerminatorInst *TI) { + BasicBlock *BB = TI->getParent(); + Value *CV = isValueEqualityComparison(TI); // CondVal + assert(CV && "Not a comparison?"); + bool Changed = false; + + SmallVector Preds(pred_begin(BB), pred_end(BB)); + while (!Preds.empty()) { + BasicBlock *Pred = Preds.pop_back_val(); + + // See if the predecessor is a comparison with the same value. + TerminatorInst *PTI = Pred->getTerminator(); + Value *PCV = isValueEqualityComparison(PTI); // PredCondVal + + if (PCV == CV && SafeToMergeTerminators(TI, PTI)) { + // Figure out which 'cases' to copy from SI to PSI. + std::vector > BBCases; + BasicBlock *BBDefault = GetValueEqualityComparisonCases(TI, BBCases); + + std::vector > PredCases; + BasicBlock *PredDefault = GetValueEqualityComparisonCases(PTI, PredCases); + + // Based on whether the default edge from PTI goes to BB or not, fill in + // PredCases and PredDefault with the new switch cases we would like to + // build. + SmallVector NewSuccessors; + + if (PredDefault == BB) { + // If this is the default destination from PTI, only the edges in TI + // that don't occur in PTI, or that branch to BB will be activated. + std::set PTIHandled; + for (unsigned i = 0, e = PredCases.size(); i != e; ++i) + if (PredCases[i].second != BB) + PTIHandled.insert(PredCases[i].first); + else { + // The default destination is BB, we don't need explicit targets. + std::swap(PredCases[i], PredCases.back()); + PredCases.pop_back(); + --i; --e; + } + + // Reconstruct the new switch statement we will be building. + if (PredDefault != BBDefault) { + PredDefault->removePredecessor(Pred); + PredDefault = BBDefault; + NewSuccessors.push_back(BBDefault); + } + for (unsigned i = 0, e = BBCases.size(); i != e; ++i) + if (!PTIHandled.count(BBCases[i].first) && + BBCases[i].second != BBDefault) { + PredCases.push_back(BBCases[i]); + NewSuccessors.push_back(BBCases[i].second); + } + + } else { + // If this is not the default destination from PSI, only the edges + // in SI that occur in PSI with a destination of BB will be + // activated. + std::set PTIHandled; + for (unsigned i = 0, e = PredCases.size(); i != e; ++i) + if (PredCases[i].second == BB) { + PTIHandled.insert(PredCases[i].first); + std::swap(PredCases[i], PredCases.back()); + PredCases.pop_back(); + --i; --e; + } + + // Okay, now we know which constants were sent to BB from the + // predecessor. Figure out where they will all go now. + for (unsigned i = 0, e = BBCases.size(); i != e; ++i) + if (PTIHandled.count(BBCases[i].first)) { + // If this is one we are capable of getting... + PredCases.push_back(BBCases[i]); + NewSuccessors.push_back(BBCases[i].second); + PTIHandled.erase(BBCases[i].first);// This constant is taken care of + } + + // If there are any constants vectored to BB that TI doesn't handle, + // they must go to the default destination of TI. + for (std::set::iterator I = + PTIHandled.begin(), + E = PTIHandled.end(); I != E; ++I) { + PredCases.push_back(std::make_pair(*I, BBDefault)); + NewSuccessors.push_back(BBDefault); + } + } + + // Okay, at this point, we know which new successor Pred will get. Make + // sure we update the number of entries in the PHI nodes for these + // successors. + for (unsigned i = 0, e = NewSuccessors.size(); i != e; ++i) + AddPredecessorToBlock(NewSuccessors[i], Pred, BB); + + // Now that the successors are updated, create the new Switch instruction. + SwitchInst *NewSI = SwitchInst::Create(CV, PredDefault, + PredCases.size(), PTI); + for (unsigned i = 0, e = PredCases.size(); i != e; ++i) + NewSI->addCase(PredCases[i].first, PredCases[i].second); + + EraseTerminatorInstAndDCECond(PTI); + + // Okay, last check. If BB is still a successor of PSI, then we must + // have an infinite loop case. If so, add an infinitely looping block + // to handle the case to preserve the behavior of the code. + BasicBlock *InfLoopBlock = 0; + for (unsigned i = 0, e = NewSI->getNumSuccessors(); i != e; ++i) + if (NewSI->getSuccessor(i) == BB) { + if (InfLoopBlock == 0) { + // Insert it at the end of the function, because it's either code, + // or it won't matter if it's hot. :) + InfLoopBlock = BasicBlock::Create("infloop", BB->getParent()); + BranchInst::Create(InfLoopBlock, InfLoopBlock); + } + NewSI->setSuccessor(i, InfLoopBlock); + } + + Changed = true; + } + } + return Changed; +} + +/// HoistThenElseCodeToIf - Given a conditional branch that goes to BB1 and +/// BB2, hoist any common code in the two blocks up into the branch block. The +/// caller of this function guarantees that BI's block dominates BB1 and BB2. +static bool HoistThenElseCodeToIf(BranchInst *BI) { + // This does very trivial matching, with limited scanning, to find identical + // instructions in the two blocks. In particular, we don't want to get into + // O(M*N) situations here where M and N are the sizes of BB1 and BB2. As + // such, we currently just scan for obviously identical instructions in an + // identical order. + BasicBlock *BB1 = BI->getSuccessor(0); // The true destination. + BasicBlock *BB2 = BI->getSuccessor(1); // The false destination + + BasicBlock::iterator BB1_Itr = BB1->begin(); + BasicBlock::iterator BB2_Itr = BB2->begin(); + + Instruction *I1 = BB1_Itr++, *I2 = BB2_Itr++; + while (isa(I1)) + I1 = BB1_Itr++; + while (isa(I2)) + I2 = BB2_Itr++; + if (I1->getOpcode() != I2->getOpcode() || isa(I1) || + isa(I1) || !I1->isIdenticalTo(I2)) + return false; + + // If we get here, we can hoist at least one instruction. + BasicBlock *BIParent = BI->getParent(); + + do { + // If we are hoisting the terminator instruction, don't move one (making a + // broken BB), instead clone it, and remove BI. + if (isa(I1)) + goto HoistTerminator; + + // For a normal instruction, we just move one to right before the branch, + // then replace all uses of the other with the first. Finally, we remove + // the now redundant second instruction. + BIParent->getInstList().splice(BI, BB1->getInstList(), I1); + if (!I2->use_empty()) + I2->replaceAllUsesWith(I1); + BB2->getInstList().erase(I2); + + I1 = BB1_Itr++; + while (isa(I1)) + I1 = BB1_Itr++; + I2 = BB2_Itr++; + while (isa(I2)) + I2 = BB2_Itr++; + } while (I1->getOpcode() == I2->getOpcode() && I1->isIdenticalTo(I2)); + + return true; + +HoistTerminator: + // Okay, it is safe to hoist the terminator. + Instruction *NT = I1->clone(); + BIParent->getInstList().insert(BI, NT); + if (NT->getType() != Type::VoidTy) { + I1->replaceAllUsesWith(NT); + I2->replaceAllUsesWith(NT); + NT->takeName(I1); + } + + // Hoisting one of the terminators from our successor is a great thing. + // Unfortunately, the successors of the if/else blocks may have PHI nodes in + // them. If they do, all PHI entries for BB1/BB2 must agree for all PHI + // nodes, so we insert select instruction to compute the final result. + std::map, SelectInst*> InsertedSelects; + for (succ_iterator SI = succ_begin(BB1), E = succ_end(BB1); SI != E; ++SI) { + PHINode *PN; + for (BasicBlock::iterator BBI = SI->begin(); + (PN = dyn_cast(BBI)); ++BBI) { + Value *BB1V = PN->getIncomingValueForBlock(BB1); + Value *BB2V = PN->getIncomingValueForBlock(BB2); + if (BB1V != BB2V) { + // These values do not agree. Insert a select instruction before NT + // that determines the right value. + SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)]; + if (SI == 0) + SI = SelectInst::Create(BI->getCondition(), BB1V, BB2V, + BB1V->getName()+"."+BB2V->getName(), NT); + // Make the PHI node use the select for all incoming values for BB1/BB2 + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (PN->getIncomingBlock(i) == BB1 || PN->getIncomingBlock(i) == BB2) + PN->setIncomingValue(i, SI); + } + } + } + + // Update any PHI nodes in our new successors. + for (succ_iterator SI = succ_begin(BB1), E = succ_end(BB1); SI != E; ++SI) + AddPredecessorToBlock(*SI, BIParent, BB1); + + EraseTerminatorInstAndDCECond(BI); + return true; +} + +/// SpeculativelyExecuteBB - Given a conditional branch that goes to BB1 +/// and an BB2 and the only successor of BB1 is BB2, hoist simple code +/// (for now, restricted to a single instruction that's side effect free) from +/// the BB1 into the branch block to speculatively execute it. +static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) { + // Only speculatively execution a single instruction (not counting the + // terminator) for now. + Instruction *HInst = NULL; + Instruction *Term = BB1->getTerminator(); + for (BasicBlock::iterator BBI = BB1->begin(), BBE = BB1->end(); + BBI != BBE; ++BBI) { + Instruction *I = BBI; + // Skip debug info. + if (isa(I)) continue; + if (I == Term) break; + + if (!HInst) + HInst = I; + else + return false; + } + if (!HInst) + return false; + + // Be conservative for now. FP select instruction can often be expensive. + Value *BrCond = BI->getCondition(); + if (isa(BrCond) && + cast(BrCond)->getOpcode() == Instruction::FCmp) + return false; + + // If BB1 is actually on the false edge of the conditional branch, remember + // to swap the select operands later. + bool Invert = false; + if (BB1 != BI->getSuccessor(0)) { + assert(BB1 == BI->getSuccessor(1) && "No edge from 'if' block?"); + Invert = true; + } + + // Turn + // BB: + // %t1 = icmp + // br i1 %t1, label %BB1, label %BB2 + // BB1: + // %t3 = add %t2, c + // br label BB2 + // BB2: + // => + // BB: + // %t1 = icmp + // %t4 = add %t2, c + // %t3 = select i1 %t1, %t2, %t3 + switch (HInst->getOpcode()) { + default: return false; // Not safe / profitable to hoist. + case Instruction::Add: + case Instruction::Sub: + // FP arithmetic might trap. Not worth doing for vector ops. + if (HInst->getType()->isFloatingPoint() + || isa(HInst->getType())) + return false; + break; + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + // Don't mess with vector operations. + if (isa(HInst->getType())) + return false; + break; // These are all cheap and non-trapping instructions. + } + + // If the instruction is obviously dead, don't try to predicate it. + if (HInst->use_empty()) { + HInst->eraseFromParent(); + return true; + } + + // Can we speculatively execute the instruction? And what is the value + // if the condition is false? Consider the phi uses, if the incoming value + // from the "if" block are all the same V, then V is the value of the + // select if the condition is false. + BasicBlock *BIParent = BI->getParent(); + SmallVector PHIUses; + Value *FalseV = NULL; + + BasicBlock *BB2 = BB1->getTerminator()->getSuccessor(0); + for (Value::use_iterator UI = HInst->use_begin(), E = HInst->use_end(); + UI != E; ++UI) { + // Ignore any user that is not a PHI node in BB2. These can only occur in + // unreachable blocks, because they would not be dominated by the instr. + PHINode *PN = dyn_cast(UI); + if (!PN || PN->getParent() != BB2) + return false; + PHIUses.push_back(PN); + + Value *PHIV = PN->getIncomingValueForBlock(BIParent); + if (!FalseV) + FalseV = PHIV; + else if (FalseV != PHIV) + return false; // Inconsistent value when condition is false. + } + + assert(FalseV && "Must have at least one user, and it must be a PHI"); + + // Do not hoist the instruction if any of its operands are defined but not + // used in this BB. The transformation will prevent the operand from + // being sunk into the use block. + for (User::op_iterator i = HInst->op_begin(), e = HInst->op_end(); + i != e; ++i) { + Instruction *OpI = dyn_cast(*i); + if (OpI && OpI->getParent() == BIParent && + !OpI->isUsedInBasicBlock(BIParent)) + return false; + } + + // If we get here, we can hoist the instruction. Try to place it + // before the icmp instruction preceding the conditional branch. + BasicBlock::iterator InsertPos = BI; + if (InsertPos != BIParent->begin()) + --InsertPos; + // Skip debug info between condition and branch. + while (InsertPos != BIParent->begin() && isa(InsertPos)) + --InsertPos; + if (InsertPos == BrCond && !isa(BrCond)) { + SmallPtrSet BB1Insns; + for(BasicBlock::iterator BB1I = BB1->begin(), BB1E = BB1->end(); + BB1I != BB1E; ++BB1I) + BB1Insns.insert(BB1I); + for(Value::use_iterator UI = BrCond->use_begin(), UE = BrCond->use_end(); + UI != UE; ++UI) { + Instruction *Use = cast(*UI); + if (BB1Insns.count(Use)) { + // If BrCond uses the instruction that place it just before + // branch instruction. + InsertPos = BI; + break; + } + } + } else + InsertPos = BI; + BIParent->getInstList().splice(InsertPos, BB1->getInstList(), HInst); + + // Create a select whose true value is the speculatively executed value and + // false value is the previously determined FalseV. + SelectInst *SI; + if (Invert) + SI = SelectInst::Create(BrCond, FalseV, HInst, + FalseV->getName() + "." + HInst->getName(), BI); + else + SI = SelectInst::Create(BrCond, HInst, FalseV, + HInst->getName() + "." + FalseV->getName(), BI); + + // Make the PHI node use the select for all incoming values for "then" and + // "if" blocks. + for (unsigned i = 0, e = PHIUses.size(); i != e; ++i) { + PHINode *PN = PHIUses[i]; + for (unsigned j = 0, ee = PN->getNumIncomingValues(); j != ee; ++j) + if (PN->getIncomingBlock(j) == BB1 || + PN->getIncomingBlock(j) == BIParent) + PN->setIncomingValue(j, SI); + } + + ++NumSpeculations; + return true; +} + +/// BlockIsSimpleEnoughToThreadThrough - Return true if we can thread a branch +/// across this block. +static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) { + BranchInst *BI = cast(BB->getTerminator()); + unsigned Size = 0; + + for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) { + if (isa(BBI)) + continue; + if (Size > 10) return false; // Don't clone large BB's. + ++Size; + + // We can only support instructions that do not define values that are + // live outside of the current basic block. + for (Value::use_iterator UI = BBI->use_begin(), E = BBI->use_end(); + UI != E; ++UI) { + Instruction *U = cast(*UI); + if (U->getParent() != BB || isa(U)) return false; + } + + // Looks ok, continue checking. + } + + return true; +} + +/// FoldCondBranchOnPHI - If we have a conditional branch on a PHI node value +/// that is defined in the same block as the branch and if any PHI entries are +/// constants, thread edges corresponding to that entry to be branches to their +/// ultimate destination. +static bool FoldCondBranchOnPHI(BranchInst *BI) { + BasicBlock *BB = BI->getParent(); + PHINode *PN = dyn_cast(BI->getCondition()); + // NOTE: we currently cannot transform this case if the PHI node is used + // outside of the block. + if (!PN || PN->getParent() != BB || !PN->hasOneUse()) + return false; + + // Degenerate case of a single entry PHI. + if (PN->getNumIncomingValues() == 1) { + FoldSingleEntryPHINodes(PN->getParent()); + return true; + } + + // Now we know that this block has multiple preds and two succs. + if (!BlockIsSimpleEnoughToThreadThrough(BB)) return false; + + // Okay, this is a simple enough basic block. See if any phi values are + // constants. + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + ConstantInt *CB; + if ((CB = dyn_cast(PN->getIncomingValue(i))) && + CB->getType() == Type::Int1Ty) { + // Okay, we now know that all edges from PredBB should be revectored to + // branch to RealDest. + BasicBlock *PredBB = PN->getIncomingBlock(i); + BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue()); + + if (RealDest == BB) continue; // Skip self loops. + + // The dest block might have PHI nodes, other predecessors and other + // difficult cases. Instead of being smart about this, just insert a new + // block that jumps to the destination block, effectively splitting + // the edge we are about to create. + BasicBlock *EdgeBB = BasicBlock::Create(RealDest->getName()+".critedge", + RealDest->getParent(), RealDest); + BranchInst::Create(RealDest, EdgeBB); + PHINode *PN; + for (BasicBlock::iterator BBI = RealDest->begin(); + (PN = dyn_cast(BBI)); ++BBI) { + Value *V = PN->getIncomingValueForBlock(BB); + PN->addIncoming(V, EdgeBB); + } + + // BB may have instructions that are being threaded over. Clone these + // instructions into EdgeBB. We know that there will be no uses of the + // cloned instructions outside of EdgeBB. + BasicBlock::iterator InsertPt = EdgeBB->begin(); + std::map TranslateMap; // Track translated values. + for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) { + if (PHINode *PN = dyn_cast(BBI)) { + TranslateMap[PN] = PN->getIncomingValueForBlock(PredBB); + } else { + // Clone the instruction. + Instruction *N = BBI->clone(); + if (BBI->hasName()) N->setName(BBI->getName()+".c"); + + // Update operands due to translation. + for (User::op_iterator i = N->op_begin(), e = N->op_end(); + i != e; ++i) { + std::map::iterator PI = + TranslateMap.find(*i); + if (PI != TranslateMap.end()) + *i = PI->second; + } + + // Check for trivial simplification. + if (Constant *C = ConstantFoldInstruction(N)) { + TranslateMap[BBI] = C; + delete N; // Constant folded away, don't need actual inst + } else { + // Insert the new instruction into its new home. + EdgeBB->getInstList().insert(InsertPt, N); + if (!BBI->use_empty()) + TranslateMap[BBI] = N; + } + } + } + + // Loop over all of the edges from PredBB to BB, changing them to branch + // to EdgeBB instead. + TerminatorInst *PredBBTI = PredBB->getTerminator(); + for (unsigned i = 0, e = PredBBTI->getNumSuccessors(); i != e; ++i) + if (PredBBTI->getSuccessor(i) == BB) { + BB->removePredecessor(PredBB); + PredBBTI->setSuccessor(i, EdgeBB); + } + + // Recurse, simplifying any other constants. + return FoldCondBranchOnPHI(BI) | true; + } + } + + return false; +} + +/// FoldTwoEntryPHINode - Given a BB that starts with the specified two-entry +/// PHI node, see if we can eliminate it. +static bool FoldTwoEntryPHINode(PHINode *PN) { + // Ok, this is a two entry PHI node. Check to see if this is a simple "if + // statement", which has a very simple dominance structure. Basically, we + // are trying to find the condition that is being branched on, which + // subsequently causes this merge to happen. We really want control + // dependence information for this check, but simplifycfg can't keep it up + // to date, and this catches most of the cases we care about anyway. + // + BasicBlock *BB = PN->getParent(); + BasicBlock *IfTrue, *IfFalse; + Value *IfCond = GetIfCondition(BB, IfTrue, IfFalse); + if (!IfCond) return false; + + // Okay, we found that we can merge this two-entry phi node into a select. + // Doing so would require us to fold *all* two entry phi nodes in this block. + // At some point this becomes non-profitable (particularly if the target + // doesn't support cmov's). Only do this transformation if there are two or + // fewer PHI nodes in this block. + unsigned NumPhis = 0; + for (BasicBlock::iterator I = BB->begin(); isa(I); ++NumPhis, ++I) + if (NumPhis > 2) + return false; + + DOUT << "FOUND IF CONDITION! " << *IfCond << " T: " + << IfTrue->getName() << " F: " << IfFalse->getName() << "\n"; + + // Loop over the PHI's seeing if we can promote them all to select + // instructions. While we are at it, keep track of the instructions + // that need to be moved to the dominating block. + std::set AggressiveInsts; + + BasicBlock::iterator AfterPHIIt = BB->begin(); + while (isa(AfterPHIIt)) { + PHINode *PN = cast(AfterPHIIt++); + if (PN->getIncomingValue(0) == PN->getIncomingValue(1)) { + if (PN->getIncomingValue(0) != PN) + PN->replaceAllUsesWith(PN->getIncomingValue(0)); + else + PN->replaceAllUsesWith(UndefValue::get(PN->getType())); + } else if (!DominatesMergePoint(PN->getIncomingValue(0), BB, + &AggressiveInsts) || + !DominatesMergePoint(PN->getIncomingValue(1), BB, + &AggressiveInsts)) { + return false; + } + } + + // If we all PHI nodes are promotable, check to make sure that all + // instructions in the predecessor blocks can be promoted as well. If + // not, we won't be able to get rid of the control flow, so it's not + // worth promoting to select instructions. + BasicBlock *DomBlock = 0, *IfBlock1 = 0, *IfBlock2 = 0; + PN = cast(BB->begin()); + BasicBlock *Pred = PN->getIncomingBlock(0); + if (cast(Pred->getTerminator())->isUnconditional()) { + IfBlock1 = Pred; + DomBlock = *pred_begin(Pred); + for (BasicBlock::iterator I = Pred->begin(); + !isa(I); ++I) + if (!AggressiveInsts.count(I) && !isa(I)) { + // This is not an aggressive instruction that we can promote. + // Because of this, we won't be able to get rid of the control + // flow, so the xform is not worth it. + return false; + } + } + + Pred = PN->getIncomingBlock(1); + if (cast(Pred->getTerminator())->isUnconditional()) { + IfBlock2 = Pred; + DomBlock = *pred_begin(Pred); + for (BasicBlock::iterator I = Pred->begin(); + !isa(I); ++I) + if (!AggressiveInsts.count(I) && !isa(I)) { + // This is not an aggressive instruction that we can promote. + // Because of this, we won't be able to get rid of the control + // flow, so the xform is not worth it. + return false; + } + } + + // If we can still promote the PHI nodes after this gauntlet of tests, + // do all of the PHI's now. + + // Move all 'aggressive' instructions, which are defined in the + // conditional parts of the if's up to the dominating block. + if (IfBlock1) { + DomBlock->getInstList().splice(DomBlock->getTerminator(), + IfBlock1->getInstList(), + IfBlock1->begin(), + IfBlock1->getTerminator()); + } + if (IfBlock2) { + DomBlock->getInstList().splice(DomBlock->getTerminator(), + IfBlock2->getInstList(), + IfBlock2->begin(), + IfBlock2->getTerminator()); + } + + while (PHINode *PN = dyn_cast(BB->begin())) { + // Change the PHI node into a select instruction. + Value *TrueVal = + PN->getIncomingValue(PN->getIncomingBlock(0) == IfFalse); + Value *FalseVal = + PN->getIncomingValue(PN->getIncomingBlock(0) == IfTrue); + + Value *NV = SelectInst::Create(IfCond, TrueVal, FalseVal, "", AfterPHIIt); + PN->replaceAllUsesWith(NV); + NV->takeName(PN); + + BB->getInstList().erase(PN); + } + return true; +} + +/// isTerminatorFirstRelevantInsn - Return true if Term is very first +/// instruction ignoring Phi nodes and dbg intrinsics. +static bool isTerminatorFirstRelevantInsn(BasicBlock *BB, Instruction *Term) { + BasicBlock::iterator BBI = Term; + while (BBI != BB->begin()) { + --BBI; + if (!isa(BBI)) + break; + } + + if (isa(BBI) || &*BBI == Term || isa(BBI)) + return true; + return false; +} + +/// SimplifyCondBranchToTwoReturns - If we found a conditional branch that goes +/// to two returning blocks, try to merge them together into one return, +/// introducing a select if the return values disagree. +static bool SimplifyCondBranchToTwoReturns(BranchInst *BI) { + assert(BI->isConditional() && "Must be a conditional branch"); + BasicBlock *TrueSucc = BI->getSuccessor(0); + BasicBlock *FalseSucc = BI->getSuccessor(1); + ReturnInst *TrueRet = cast(TrueSucc->getTerminator()); + ReturnInst *FalseRet = cast(FalseSucc->getTerminator()); + + // Check to ensure both blocks are empty (just a return) or optionally empty + // with PHI nodes. If there are other instructions, merging would cause extra + // computation on one path or the other. + if (!isTerminatorFirstRelevantInsn(TrueSucc, TrueRet)) + return false; + if (!isTerminatorFirstRelevantInsn(FalseSucc, FalseRet)) + return false; + + // Okay, we found a branch that is going to two return nodes. If + // there is no return value for this function, just change the + // branch into a return. + if (FalseRet->getNumOperands() == 0) { + TrueSucc->removePredecessor(BI->getParent()); + FalseSucc->removePredecessor(BI->getParent()); + ReturnInst::Create(0, BI); + EraseTerminatorInstAndDCECond(BI); + return true; + } + + // Otherwise, figure out what the true and false return values are + // so we can insert a new select instruction. + Value *TrueValue = TrueRet->getReturnValue(); + Value *FalseValue = FalseRet->getReturnValue(); + + // Unwrap any PHI nodes in the return blocks. + if (PHINode *TVPN = dyn_cast_or_null(TrueValue)) + if (TVPN->getParent() == TrueSucc) + TrueValue = TVPN->getIncomingValueForBlock(BI->getParent()); + if (PHINode *FVPN = dyn_cast_or_null(FalseValue)) + if (FVPN->getParent() == FalseSucc) + FalseValue = FVPN->getIncomingValueForBlock(BI->getParent()); + + // In order for this transformation to be safe, we must be able to + // unconditionally execute both operands to the return. This is + // normally the case, but we could have a potentially-trapping + // constant expression that prevents this transformation from being + // safe. + if (ConstantExpr *TCV = dyn_cast_or_null(TrueValue)) + if (TCV->canTrap()) + return false; + if (ConstantExpr *FCV = dyn_cast_or_null(FalseValue)) + if (FCV->canTrap()) + return false; + + // Okay, we collected all the mapped values and checked them for sanity, and + // defined to really do this transformation. First, update the CFG. + TrueSucc->removePredecessor(BI->getParent()); + FalseSucc->removePredecessor(BI->getParent()); + + // Insert select instructions where needed. + Value *BrCond = BI->getCondition(); + if (TrueValue) { + // Insert a select if the results differ. + if (TrueValue == FalseValue || isa(FalseValue)) { + } else if (isa(TrueValue)) { + TrueValue = FalseValue; + } else { + TrueValue = SelectInst::Create(BrCond, TrueValue, + FalseValue, "retval", BI); + } + } + + Value *RI = !TrueValue ? + ReturnInst::Create(BI) : + ReturnInst::Create(TrueValue, BI); + + DOUT << "\nCHANGING BRANCH TO TWO RETURNS INTO SELECT:" + << "\n " << *BI << "NewRet = " << *RI + << "TRUEBLOCK: " << *TrueSucc << "FALSEBLOCK: "<< *FalseSucc; + + EraseTerminatorInstAndDCECond(BI); + + return true; +} + +/// FoldBranchToCommonDest - If this basic block is ONLY a setcc and a branch, +/// and if a predecessor branches to us and one of our successors, fold the +/// setcc into the predecessor and use logical operations to pick the right +/// destination. +static bool FoldBranchToCommonDest(BranchInst *BI) { + BasicBlock *BB = BI->getParent(); + Instruction *Cond = dyn_cast(BI->getCondition()); + if (Cond == 0) return false; + + + // Only allow this if the condition is a simple instruction that can be + // executed unconditionally. It must be in the same block as the branch, and + // must be at the front of the block. + BasicBlock::iterator FrontIt = BB->front(); + // Ignore dbg intrinsics. + while(isa(FrontIt)) + ++FrontIt; + if ((!isa(Cond) && !isa(Cond)) || + Cond->getParent() != BB || &*FrontIt != Cond || !Cond->hasOneUse()) { + return false; + } + + // Make sure the instruction after the condition is the cond branch. + BasicBlock::iterator CondIt = Cond; ++CondIt; + // Ingore dbg intrinsics. + while(isa(CondIt)) + ++CondIt; + if (&*CondIt != BI) { + assert (!isa(CondIt) && "Hey do not forget debug info!"); + return false; + } + + // Cond is known to be a compare or binary operator. Check to make sure that + // neither operand is a potentially-trapping constant expression. + if (ConstantExpr *CE = dyn_cast(Cond->getOperand(0))) + if (CE->canTrap()) + return false; + if (ConstantExpr *CE = dyn_cast(Cond->getOperand(1))) + if (CE->canTrap()) + return false; + + + // Finally, don't infinitely unroll conditional loops. + BasicBlock *TrueDest = BI->getSuccessor(0); + BasicBlock *FalseDest = BI->getSuccessor(1); + if (TrueDest == BB || FalseDest == BB) + return false; + + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { + BasicBlock *PredBlock = *PI; + BranchInst *PBI = dyn_cast(PredBlock->getTerminator()); + + // Check that we have two conditional branches. If there is a PHI node in + // the common successor, verify that the same value flows in from both + // blocks. + if (PBI == 0 || PBI->isUnconditional() || + !SafeToMergeTerminators(BI, PBI)) + continue; + + Instruction::BinaryOps Opc; + bool InvertPredCond = false; + + if (PBI->getSuccessor(0) == TrueDest) + Opc = Instruction::Or; + else if (PBI->getSuccessor(1) == FalseDest) + Opc = Instruction::And; + else if (PBI->getSuccessor(0) == FalseDest) + Opc = Instruction::And, InvertPredCond = true; + else if (PBI->getSuccessor(1) == TrueDest) + Opc = Instruction::Or, InvertPredCond = true; + else + continue; + + DOUT << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB; + + // If we need to invert the condition in the pred block to match, do so now. + if (InvertPredCond) { + Value *NewCond = + BinaryOperator::CreateNot(PBI->getCondition(), + PBI->getCondition()->getName()+".not", PBI); + PBI->setCondition(NewCond); + BasicBlock *OldTrue = PBI->getSuccessor(0); + BasicBlock *OldFalse = PBI->getSuccessor(1); + PBI->setSuccessor(0, OldFalse); + PBI->setSuccessor(1, OldTrue); + } + + // Clone Cond into the predecessor basic block, and or/and the + // two conditions together. + Instruction *New = Cond->clone(); + PredBlock->getInstList().insert(PBI, New); + New->takeName(Cond); + Cond->setName(New->getName()+".old"); + + Value *NewCond = BinaryOperator::Create(Opc, PBI->getCondition(), + New, "or.cond", PBI); + PBI->setCondition(NewCond); + if (PBI->getSuccessor(0) == BB) { + AddPredecessorToBlock(TrueDest, PredBlock, BB); + PBI->setSuccessor(0, TrueDest); + } + if (PBI->getSuccessor(1) == BB) { + AddPredecessorToBlock(FalseDest, PredBlock, BB); + PBI->setSuccessor(1, FalseDest); + } + return true; + } + return false; +} + +/// SimplifyCondBranchToCondBranch - If we have a conditional branch as a +/// predecessor of another block, this function tries to simplify it. We know +/// that PBI and BI are both conditional branches, and BI is in one of the +/// successor blocks of PBI - PBI branches to BI. +static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { + assert(PBI->isConditional() && BI->isConditional()); + BasicBlock *BB = BI->getParent(); + + // If this block ends with a branch instruction, and if there is a + // predecessor that ends on a branch of the same condition, make + // this conditional branch redundant. + if (PBI->getCondition() == BI->getCondition() && + PBI->getSuccessor(0) != PBI->getSuccessor(1)) { + // Okay, the outcome of this conditional branch is statically + // knowable. If this block had a single pred, handle specially. + if (BB->getSinglePredecessor()) { + // Turn this into a branch on constant. + bool CondIsTrue = PBI->getSuccessor(0) == BB; + BI->setCondition(ConstantInt::get(Type::Int1Ty, CondIsTrue)); + return true; // Nuke the branch on constant. + } + + // Otherwise, if there are multiple predecessors, insert a PHI that merges + // in the constant and simplify the block result. Subsequent passes of + // simplifycfg will thread the block. + if (BlockIsSimpleEnoughToThreadThrough(BB)) { + PHINode *NewPN = PHINode::Create(Type::Int1Ty, + BI->getCondition()->getName() + ".pr", + BB->begin()); + // Okay, we're going to insert the PHI node. Since PBI is not the only + // predecessor, compute the PHI'd conditional value for all of the preds. + // Any predecessor where the condition is not computable we keep symbolic. + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) + if ((PBI = dyn_cast((*PI)->getTerminator())) && + PBI != BI && PBI->isConditional() && + PBI->getCondition() == BI->getCondition() && + PBI->getSuccessor(0) != PBI->getSuccessor(1)) { + bool CondIsTrue = PBI->getSuccessor(0) == BB; + NewPN->addIncoming(ConstantInt::get(Type::Int1Ty, + CondIsTrue), *PI); + } else { + NewPN->addIncoming(BI->getCondition(), *PI); + } + + BI->setCondition(NewPN); + return true; + } + } + + // If this is a conditional branch in an empty block, and if any + // predecessors is a conditional branch to one of our destinations, + // fold the conditions into logical ops and one cond br. + BasicBlock::iterator BBI = BB->begin(); + // Ignore dbg intrinsics. + while (isa(BBI)) + ++BBI; + if (&*BBI != BI) + return false; + + + if (ConstantExpr *CE = dyn_cast(BI->getCondition())) + if (CE->canTrap()) + return false; + + int PBIOp, BIOp; + if (PBI->getSuccessor(0) == BI->getSuccessor(0)) + PBIOp = BIOp = 0; + else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) + PBIOp = 0, BIOp = 1; + else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) + PBIOp = 1, BIOp = 0; + else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) + PBIOp = BIOp = 1; + else + return false; + + // Check to make sure that the other destination of this branch + // isn't BB itself. If so, this is an infinite loop that will + // keep getting unwound. + if (PBI->getSuccessor(PBIOp) == BB) + return false; + + // Do not perform this transformation if it would require + // insertion of a large number of select instructions. For targets + // without predication/cmovs, this is a big pessimization. + BasicBlock *CommonDest = PBI->getSuccessor(PBIOp); + + unsigned NumPhis = 0; + for (BasicBlock::iterator II = CommonDest->begin(); + isa(II); ++II, ++NumPhis) + if (NumPhis > 2) // Disable this xform. + return false; + + // Finally, if everything is ok, fold the branches to logical ops. + BasicBlock *OtherDest = BI->getSuccessor(BIOp ^ 1); + + DOUT << "FOLDING BRs:" << *PBI->getParent() + << "AND: " << *BI->getParent(); + + + // If OtherDest *is* BB, then BB is a basic block with a single conditional + // branch in it, where one edge (OtherDest) goes back to itself but the other + // exits. We don't *know* that the program avoids the infinite loop + // (even though that seems likely). If we do this xform naively, we'll end up + // recursively unpeeling the loop. Since we know that (after the xform is + // done) that the block *is* infinite if reached, we just make it an obviously + // infinite loop with no cond branch. + if (OtherDest == BB) { + // Insert it at the end of the function, because it's either code, + // or it won't matter if it's hot. :) + BasicBlock *InfLoopBlock = BasicBlock::Create("infloop", BB->getParent()); + BranchInst::Create(InfLoopBlock, InfLoopBlock); + OtherDest = InfLoopBlock; + } + + DOUT << *PBI->getParent()->getParent(); + + // BI may have other predecessors. Because of this, we leave + // it alone, but modify PBI. + + // Make sure we get to CommonDest on True&True directions. + Value *PBICond = PBI->getCondition(); + if (PBIOp) + PBICond = BinaryOperator::CreateNot(PBICond, + PBICond->getName()+".not", + PBI); + Value *BICond = BI->getCondition(); + if (BIOp) + BICond = BinaryOperator::CreateNot(BICond, + BICond->getName()+".not", + PBI); + // Merge the conditions. + Value *Cond = BinaryOperator::CreateOr(PBICond, BICond, "brmerge", PBI); + + // Modify PBI to branch on the new condition to the new dests. + PBI->setCondition(Cond); + PBI->setSuccessor(0, CommonDest); + PBI->setSuccessor(1, OtherDest); + + // OtherDest may have phi nodes. If so, add an entry from PBI's + // block that are identical to the entries for BI's block. + PHINode *PN; + for (BasicBlock::iterator II = OtherDest->begin(); + (PN = dyn_cast(II)); ++II) { + Value *V = PN->getIncomingValueForBlock(BB); + PN->addIncoming(V, PBI->getParent()); + } + + // We know that the CommonDest already had an edge from PBI to + // it. If it has PHIs though, the PHIs may have different + // entries for BB and PBI's BB. If so, insert a select to make + // them agree. + for (BasicBlock::iterator II = CommonDest->begin(); + (PN = dyn_cast(II)); ++II) { + Value *BIV = PN->getIncomingValueForBlock(BB); + unsigned PBBIdx = PN->getBasicBlockIndex(PBI->getParent()); + Value *PBIV = PN->getIncomingValue(PBBIdx); + if (BIV != PBIV) { + // Insert a select in PBI to pick the right value. + Value *NV = SelectInst::Create(PBICond, PBIV, BIV, + PBIV->getName()+".mux", PBI); + PN->setIncomingValue(PBBIdx, NV); + } + } + + DOUT << "INTO: " << *PBI->getParent(); + + DOUT << *PBI->getParent()->getParent(); + + // This basic block is probably dead. We know it has at least + // one fewer predecessor. + return true; +} + + +/// SimplifyCFG - This function is used to do simplification of a CFG. For +/// example, it adjusts branches to branches to eliminate the extra hop, it +/// eliminates unreachable basic blocks, and does other "peephole" optimization +/// of the CFG. It returns true if a modification was made. +/// +/// WARNING: The entry node of a function may not be simplified. +/// +bool llvm::SimplifyCFG(BasicBlock *BB) { + bool Changed = false; + Function *M = BB->getParent(); + + assert(BB && BB->getParent() && "Block not embedded in function!"); + assert(BB->getTerminator() && "Degenerate basic block encountered!"); + assert(&BB->getParent()->getEntryBlock() != BB && + "Can't Simplify entry block!"); + + // Remove basic blocks that have no predecessors... or that just have themself + // as a predecessor. These are unreachable. + if (pred_begin(BB) == pred_end(BB) || BB->getSinglePredecessor() == BB) { + DOUT << "Removing BB: \n" << *BB; + DeleteDeadBlock(BB); + return true; + } + + // Check to see if we can constant propagate this terminator instruction + // away... + Changed |= ConstantFoldTerminator(BB); + + // If there is a trivial two-entry PHI node in this basic block, and we can + // eliminate it, do so now. + if (PHINode *PN = dyn_cast(BB->begin())) + if (PN->getNumIncomingValues() == 2) + Changed |= FoldTwoEntryPHINode(PN); + + // If this is a returning block with only PHI nodes in it, fold the return + // instruction into any unconditional branch predecessors. + // + // If any predecessor is a conditional branch that just selects among + // different return values, fold the replace the branch/return with a select + // and return. + if (ReturnInst *RI = dyn_cast(BB->getTerminator())) { + if (isTerminatorFirstRelevantInsn(BB, BB->getTerminator())) { + // Find predecessors that end with branches. + SmallVector UncondBranchPreds; + SmallVector CondBranchPreds; + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { + TerminatorInst *PTI = (*PI)->getTerminator(); + if (BranchInst *BI = dyn_cast(PTI)) { + if (BI->isUnconditional()) + UncondBranchPreds.push_back(*PI); + else + CondBranchPreds.push_back(BI); + } + } + + // If we found some, do the transformation! + if (!UncondBranchPreds.empty()) { + while (!UncondBranchPreds.empty()) { + BasicBlock *Pred = UncondBranchPreds.pop_back_val(); + DOUT << "FOLDING: " << *BB + << "INTO UNCOND BRANCH PRED: " << *Pred; + Instruction *UncondBranch = Pred->getTerminator(); + // Clone the return and add it to the end of the predecessor. + Instruction *NewRet = RI->clone(); + Pred->getInstList().push_back(NewRet); + + BasicBlock::iterator BBI = RI; + if (BBI != BB->begin()) { + // Move region end info into the predecessor. + if (DbgRegionEndInst *DREI = dyn_cast(--BBI)) + DREI->moveBefore(NewRet); + } + + // If the return instruction returns a value, and if the value was a + // PHI node in "BB", propagate the right value into the return. + for (User::op_iterator i = NewRet->op_begin(), e = NewRet->op_end(); + i != e; ++i) + if (PHINode *PN = dyn_cast(*i)) + if (PN->getParent() == BB) + *i = PN->getIncomingValueForBlock(Pred); + + // Update any PHI nodes in the returning block to realize that we no + // longer branch to them. + BB->removePredecessor(Pred); + Pred->getInstList().erase(UncondBranch); + } + + // If we eliminated all predecessors of the block, delete the block now. + if (pred_begin(BB) == pred_end(BB)) + // We know there are no successors, so just nuke the block. + M->getBasicBlockList().erase(BB); + + return true; + } + + // Check out all of the conditional branches going to this return + // instruction. If any of them just select between returns, change the + // branch itself into a select/return pair. + while (!CondBranchPreds.empty()) { + BranchInst *BI = CondBranchPreds.pop_back_val(); + + // Check to see if the non-BB successor is also a return block. + if (isa(BI->getSuccessor(0)->getTerminator()) && + isa(BI->getSuccessor(1)->getTerminator()) && + SimplifyCondBranchToTwoReturns(BI)) + return true; + } + } + } else if (isa(BB->begin())) { + // Check to see if the first instruction in this block is just an unwind. + // If so, replace any invoke instructions which use this as an exception + // destination with call instructions, and any unconditional branch + // predecessor with an unwind. + // + SmallVector Preds(pred_begin(BB), pred_end(BB)); + while (!Preds.empty()) { + BasicBlock *Pred = Preds.back(); + if (BranchInst *BI = dyn_cast(Pred->getTerminator())) { + if (BI->isUnconditional()) { + Pred->getInstList().pop_back(); // nuke uncond branch + new UnwindInst(Pred); // Use unwind. + Changed = true; + } + } else if (InvokeInst *II = dyn_cast(Pred->getTerminator())) + if (II->getUnwindDest() == BB) { + // Insert a new branch instruction before the invoke, because this + // is now a fall through... + BranchInst *BI = BranchInst::Create(II->getNormalDest(), II); + Pred->getInstList().remove(II); // Take out of symbol table + + // Insert the call now... + SmallVector Args(II->op_begin()+3, II->op_end()); + CallInst *CI = CallInst::Create(II->getCalledValue(), + Args.begin(), Args.end(), + II->getName(), BI); + CI->setCallingConv(II->getCallingConv()); + CI->setAttributes(II->getAttributes()); + // If the invoke produced a value, the Call now does instead + II->replaceAllUsesWith(CI); + delete II; + Changed = true; + } + + Preds.pop_back(); + } + + // If this block is now dead, remove it. + if (pred_begin(BB) == pred_end(BB)) { + // We know there are no successors, so just nuke the block. + M->getBasicBlockList().erase(BB); + return true; + } + + } else if (SwitchInst *SI = dyn_cast(BB->getTerminator())) { + if (isValueEqualityComparison(SI)) { + // If we only have one predecessor, and if it is a branch on this value, + // see if that predecessor totally determines the outcome of this switch. + if (BasicBlock *OnlyPred = BB->getSinglePredecessor()) + if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred)) + return SimplifyCFG(BB) || 1; + + // If the block only contains the switch, see if we can fold the block + // away into any preds. + BasicBlock::iterator BBI = BB->begin(); + // Ignore dbg intrinsics. + while (isa(BBI)) + ++BBI; + if (SI == &*BBI) + if (FoldValueComparisonIntoPredecessors(SI)) + return SimplifyCFG(BB) || 1; + } + } else if (BranchInst *BI = dyn_cast(BB->getTerminator())) { + if (BI->isUnconditional()) { + BasicBlock::iterator BBI = BB->getFirstNonPHI(); + + BasicBlock *Succ = BI->getSuccessor(0); + // Ignore dbg intrinsics. + while (isa(BBI)) + ++BBI; + if (BBI->isTerminator() && // Terminator is the only non-phi instruction! + Succ != BB) // Don't hurt infinite loops! + if (TryToSimplifyUncondBranchFromEmptyBlock(BB, Succ)) + return true; + + } else { // Conditional branch + if (isValueEqualityComparison(BI)) { + // If we only have one predecessor, and if it is a branch on this value, + // see if that predecessor totally determines the outcome of this + // switch. + if (BasicBlock *OnlyPred = BB->getSinglePredecessor()) + if (SimplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred)) + return SimplifyCFG(BB) || 1; + + // This block must be empty, except for the setcond inst, if it exists. + // Ignore dbg intrinsics. + BasicBlock::iterator I = BB->begin(); + // Ignore dbg intrinsics. + while (isa(I)) + ++I; + if (&*I == BI) { + if (FoldValueComparisonIntoPredecessors(BI)) + return SimplifyCFG(BB) | true; + } else if (&*I == cast(BI->getCondition())){ + ++I; + // Ignore dbg intrinsics. + while (isa(I)) + ++I; + if(&*I == BI) { + if (FoldValueComparisonIntoPredecessors(BI)) + return SimplifyCFG(BB) | true; + } + } + } + + // If this is a branch on a phi node in the current block, thread control + // through this block if any PHI node entries are constants. + if (PHINode *PN = dyn_cast(BI->getCondition())) + if (PN->getParent() == BI->getParent()) + if (FoldCondBranchOnPHI(BI)) + return SimplifyCFG(BB) | true; + + // If this basic block is ONLY a setcc and a branch, and if a predecessor + // branches to us and one of our successors, fold the setcc into the + // predecessor and use logical operations to pick the right destination. + if (FoldBranchToCommonDest(BI)) + return SimplifyCFG(BB) | 1; + + + // Scan predecessor blocks for conditional branches. + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) + if (BranchInst *PBI = dyn_cast((*PI)->getTerminator())) + if (PBI != BI && PBI->isConditional()) + if (SimplifyCondBranchToCondBranch(PBI, BI)) + return SimplifyCFG(BB) | true; + } + } else if (isa(BB->getTerminator())) { + // If there are any instructions immediately before the unreachable that can + // be removed, do so. + Instruction *Unreachable = BB->getTerminator(); + while (Unreachable != BB->begin()) { + BasicBlock::iterator BBI = Unreachable; + --BBI; + // Do not delete instructions that can have side effects, like calls + // (which may never return) and volatile loads and stores. + if (isa(BBI) && !isa(BBI)) break; + + if (StoreInst *SI = dyn_cast(BBI)) + if (SI->isVolatile()) + break; + + if (LoadInst *LI = dyn_cast(BBI)) + if (LI->isVolatile()) + break; + + // Delete this instruction + BB->getInstList().erase(BBI); + Changed = true; + } + + // If the unreachable instruction is the first in the block, take a gander + // at all of the predecessors of this instruction, and simplify them. + if (&BB->front() == Unreachable) { + SmallVector Preds(pred_begin(BB), pred_end(BB)); + for (unsigned i = 0, e = Preds.size(); i != e; ++i) { + TerminatorInst *TI = Preds[i]->getTerminator(); + + if (BranchInst *BI = dyn_cast(TI)) { + if (BI->isUnconditional()) { + if (BI->getSuccessor(0) == BB) { + new UnreachableInst(TI); + TI->eraseFromParent(); + Changed = true; + } + } else { + if (BI->getSuccessor(0) == BB) { + BranchInst::Create(BI->getSuccessor(1), BI); + EraseTerminatorInstAndDCECond(BI); + } else if (BI->getSuccessor(1) == BB) { + BranchInst::Create(BI->getSuccessor(0), BI); + EraseTerminatorInstAndDCECond(BI); + Changed = true; + } + } + } else if (SwitchInst *SI = dyn_cast(TI)) { + for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i) + if (SI->getSuccessor(i) == BB) { + BB->removePredecessor(SI->getParent()); + SI->removeCase(i); + --i; --e; + Changed = true; + } + // If the default value is unreachable, figure out the most popular + // destination and make it the default. + if (SI->getSuccessor(0) == BB) { + std::map Popularity; + for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i) + Popularity[SI->getSuccessor(i)]++; + + // Find the most popular block. + unsigned MaxPop = 0; + BasicBlock *MaxBlock = 0; + for (std::map::iterator + I = Popularity.begin(), E = Popularity.end(); I != E; ++I) { + if (I->second > MaxPop) { + MaxPop = I->second; + MaxBlock = I->first; + } + } + if (MaxBlock) { + // Make this the new default, allowing us to delete any explicit + // edges to it. + SI->setSuccessor(0, MaxBlock); + Changed = true; + + // If MaxBlock has phinodes in it, remove MaxPop-1 entries from + // it. + if (isa(MaxBlock->begin())) + for (unsigned i = 0; i != MaxPop-1; ++i) + MaxBlock->removePredecessor(SI->getParent()); + + for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i) + if (SI->getSuccessor(i) == MaxBlock) { + SI->removeCase(i); + --i; --e; + } + } + } + } else if (InvokeInst *II = dyn_cast(TI)) { + if (II->getUnwindDest() == BB) { + // Convert the invoke to a call instruction. This would be a good + // place to note that the call does not throw though. + BranchInst *BI = BranchInst::Create(II->getNormalDest(), II); + II->removeFromParent(); // Take out of symbol table + + // Insert the call now... + SmallVector Args(II->op_begin()+3, II->op_end()); + CallInst *CI = CallInst::Create(II->getCalledValue(), + Args.begin(), Args.end(), + II->getName(), BI); + CI->setCallingConv(II->getCallingConv()); + CI->setAttributes(II->getAttributes()); + // If the invoke produced a value, the Call does now instead. + II->replaceAllUsesWith(CI); + delete II; + Changed = true; + } + } + } + + // If this block is now dead, remove it. + if (pred_begin(BB) == pred_end(BB)) { + // We know there are no successors, so just nuke the block. + M->getBasicBlockList().erase(BB); + return true; + } + } + } + + // Merge basic blocks into their predecessor if there is only one distinct + // pred, and if there is only one distinct successor of the predecessor, and + // if there are no PHI nodes. + // + if (MergeBlockIntoPredecessor(BB)) + return true; + + // Otherwise, if this block only has a single predecessor, and if that block + // is a conditional branch, see if we can hoist any code from this block up + // into our predecessor. + pred_iterator PI(pred_begin(BB)), PE(pred_end(BB)); + BasicBlock *OnlyPred = *PI++; + for (; PI != PE; ++PI) // Search all predecessors, see if they are all same + if (*PI != OnlyPred) { + OnlyPred = 0; // There are multiple different predecessors... + break; + } + + if (OnlyPred) + if (BranchInst *BI = dyn_cast(OnlyPred->getTerminator())) + if (BI->isConditional()) { + // Get the other block. + BasicBlock *OtherBB = BI->getSuccessor(BI->getSuccessor(0) == BB); + PI = pred_begin(OtherBB); + ++PI; + + if (PI == pred_end(OtherBB)) { + // We have a conditional branch to two blocks that are only reachable + // from the condbr. We know that the condbr dominates the two blocks, + // so see if there is any identical code in the "then" and "else" + // blocks. If so, we can hoist it up to the branching block. + Changed |= HoistThenElseCodeToIf(BI); + } else { + BasicBlock* OnlySucc = NULL; + for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); + SI != SE; ++SI) { + if (!OnlySucc) + OnlySucc = *SI; + else if (*SI != OnlySucc) { + OnlySucc = 0; // There are multiple distinct successors! + break; + } + } + + if (OnlySucc == OtherBB) { + // If BB's only successor is the other successor of the predecessor, + // i.e. a triangle, see if we can hoist any code from this block up + // to the "if" block. + Changed |= SpeculativelyExecuteBB(BI, BB); + } + } + } + + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) + if (BranchInst *BI = dyn_cast((*PI)->getTerminator())) + // Change br (X == 0 | X == 1), T, F into a switch instruction. + if (BI->isConditional() && isa(BI->getCondition())) { + Instruction *Cond = cast(BI->getCondition()); + // If this is a bunch of seteq's or'd together, or if it's a bunch of + // 'setne's and'ed together, collect them. + Value *CompVal = 0; + std::vector Values; + bool TrueWhenEqual = GatherValueComparisons(Cond, CompVal, Values); + if (CompVal && CompVal->getType()->isInteger()) { + // There might be duplicate constants in the list, which the switch + // instruction can't handle, remove them now. + std::sort(Values.begin(), Values.end(), ConstantIntOrdering()); + Values.erase(std::unique(Values.begin(), Values.end()), Values.end()); + + // Figure out which block is which destination. + BasicBlock *DefaultBB = BI->getSuccessor(1); + BasicBlock *EdgeBB = BI->getSuccessor(0); + if (!TrueWhenEqual) std::swap(DefaultBB, EdgeBB); + + // Create the new switch instruction now. + SwitchInst *New = SwitchInst::Create(CompVal, DefaultBB, + Values.size(), BI); + + // Add all of the 'cases' to the switch instruction. + for (unsigned i = 0, e = Values.size(); i != e; ++i) + New->addCase(Values[i], EdgeBB); + + // We added edges from PI to the EdgeBB. As such, if there were any + // PHI nodes in EdgeBB, they need entries to be added corresponding to + // the number of edges added. + for (BasicBlock::iterator BBI = EdgeBB->begin(); + isa(BBI); ++BBI) { + PHINode *PN = cast(BBI); + Value *InVal = PN->getIncomingValueForBlock(*PI); + for (unsigned i = 0, e = Values.size()-1; i != e; ++i) + PN->addIncoming(InVal, *PI); + } + + // Erase the old branch instruction. + EraseTerminatorInstAndDCECond(BI); + return true; + } + } + + return Changed; +} diff --git a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp new file mode 100644 index 000000000000..848f2b87c4ee --- /dev/null +++ b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp @@ -0,0 +1,139 @@ +//===- UnifyFunctionExitNodes.cpp - Make all functions have a single exit -===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass is used to ensure that functions have at most one return +// instruction in them. Additionally, it keeps track of which node is the new +// exit node of the CFG. If there are no exit nodes in the CFG, the getExitNode +// method will return a null pointer. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/BasicBlock.h" +#include "llvm/Function.h" +#include "llvm/Instructions.h" +#include "llvm/Type.h" +#include "llvm/ADT/StringExtras.h" +using namespace llvm; + +char UnifyFunctionExitNodes::ID = 0; +static RegisterPass +X("mergereturn", "Unify function exit nodes"); + +Pass *llvm::createUnifyFunctionExitNodesPass() { + return new UnifyFunctionExitNodes(); +} + +void UnifyFunctionExitNodes::getAnalysisUsage(AnalysisUsage &AU) const{ + // We preserve the non-critical-edgeness property + AU.addPreservedID(BreakCriticalEdgesID); + // This is a cluster of orthogonal Transforms + AU.addPreservedID(PromoteMemoryToRegisterID); + AU.addPreservedID(LowerSwitchID); +} + +// UnifyAllExitNodes - Unify all exit nodes of the CFG by creating a new +// BasicBlock, and converting all returns to unconditional branches to this +// new basic block. The singular exit node is returned. +// +// If there are no return stmts in the Function, a null pointer is returned. +// +bool UnifyFunctionExitNodes::runOnFunction(Function &F) { + // Loop over all of the blocks in a function, tracking all of the blocks that + // return. + // + std::vector ReturningBlocks; + std::vector UnwindingBlocks; + std::vector UnreachableBlocks; + for(Function::iterator I = F.begin(), E = F.end(); I != E; ++I) + if (isa(I->getTerminator())) + ReturningBlocks.push_back(I); + else if (isa(I->getTerminator())) + UnwindingBlocks.push_back(I); + else if (isa(I->getTerminator())) + UnreachableBlocks.push_back(I); + + // Handle unwinding blocks first. + if (UnwindingBlocks.empty()) { + UnwindBlock = 0; + } else if (UnwindingBlocks.size() == 1) { + UnwindBlock = UnwindingBlocks.front(); + } else { + UnwindBlock = BasicBlock::Create("UnifiedUnwindBlock", &F); + new UnwindInst(UnwindBlock); + + for (std::vector::iterator I = UnwindingBlocks.begin(), + E = UnwindingBlocks.end(); I != E; ++I) { + BasicBlock *BB = *I; + BB->getInstList().pop_back(); // Remove the unwind insn + BranchInst::Create(UnwindBlock, BB); + } + } + + // Then unreachable blocks. + if (UnreachableBlocks.empty()) { + UnreachableBlock = 0; + } else if (UnreachableBlocks.size() == 1) { + UnreachableBlock = UnreachableBlocks.front(); + } else { + UnreachableBlock = BasicBlock::Create("UnifiedUnreachableBlock", &F); + new UnreachableInst(UnreachableBlock); + + for (std::vector::iterator I = UnreachableBlocks.begin(), + E = UnreachableBlocks.end(); I != E; ++I) { + BasicBlock *BB = *I; + BB->getInstList().pop_back(); // Remove the unreachable inst. + BranchInst::Create(UnreachableBlock, BB); + } + } + + // Now handle return blocks. + if (ReturningBlocks.empty()) { + ReturnBlock = 0; + return false; // No blocks return + } else if (ReturningBlocks.size() == 1) { + ReturnBlock = ReturningBlocks.front(); // Already has a single return block + return false; + } + + // Otherwise, we need to insert a new basic block into the function, add a PHI + // nodes (if the function returns values), and convert all of the return + // instructions into unconditional branches. + // + BasicBlock *NewRetBlock = BasicBlock::Create("UnifiedReturnBlock", &F); + + PHINode *PN = 0; + if (F.getReturnType() == Type::VoidTy) { + ReturnInst::Create(NULL, NewRetBlock); + } else { + // If the function doesn't return void... add a PHI node to the block... + PN = PHINode::Create(F.getReturnType(), "UnifiedRetVal"); + NewRetBlock->getInstList().push_back(PN); + ReturnInst::Create(PN, NewRetBlock); + } + + // Loop over all of the blocks, replacing the return instruction with an + // unconditional branch. + // + for (std::vector::iterator I = ReturningBlocks.begin(), + E = ReturningBlocks.end(); I != E; ++I) { + BasicBlock *BB = *I; + + // Add an incoming element to the PHI node for every return instruction that + // is merging into this new block... + if (PN) + PN->addIncoming(BB->getTerminator()->getOperand(0), BB); + + BB->getInstList().pop_back(); // Remove the return insn + BranchInst::Create(NewRetBlock, BB); + } + ReturnBlock = NewRetBlock; + return true; +} diff --git a/lib/Transforms/Utils/UnrollLoop.cpp b/lib/Transforms/Utils/UnrollLoop.cpp new file mode 100644 index 000000000000..caef7ec5c45f --- /dev/null +++ b/lib/Transforms/Utils/UnrollLoop.cpp @@ -0,0 +1,369 @@ +//===-- UnrollLoop.cpp - Loop unrolling utilities -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements some loop unrolling utilities. It does not define any +// actual pass or policy, but provides a single function to perform loop +// unrolling. +// +// It works best when loops have been canonicalized by the -indvars pass, +// allowing it to determine the trip counts of loops easily. +// +// The process of unrolling can produce extraneous basic blocks linked with +// unconditional branches. This will be corrected in the future. +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "loop-unroll" +#include "llvm/Transforms/Utils/UnrollLoop.h" +#include "llvm/BasicBlock.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/Local.h" +#include + +using namespace llvm; + +// TODO: Should these be here or in LoopUnroll? +STATISTIC(NumCompletelyUnrolled, "Number of loops completely unrolled"); +STATISTIC(NumUnrolled, "Number of loops unrolled (completely or otherwise)"); + +/// RemapInstruction - Convert the instruction operands from referencing the +/// current values into those specified by ValueMap. +static inline void RemapInstruction(Instruction *I, + DenseMap &ValueMap) { + for (unsigned op = 0, E = I->getNumOperands(); op != E; ++op) { + Value *Op = I->getOperand(op); + DenseMap::iterator It = ValueMap.find(Op); + if (It != ValueMap.end()) Op = It->second; + I->setOperand(op, Op); + } +} + +/// FoldBlockIntoPredecessor - Folds a basic block into its predecessor if it +/// only has one predecessor, and that predecessor only has one successor. +/// The LoopInfo Analysis that is passed will be kept consistent. +/// Returns the new combined block. +static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI) { + // Merge basic blocks into their predecessor if there is only one distinct + // pred, and if there is only one distinct successor of the predecessor, and + // if there are no PHI nodes. + BasicBlock *OnlyPred = BB->getSinglePredecessor(); + if (!OnlyPred) return 0; + + if (OnlyPred->getTerminator()->getNumSuccessors() != 1) + return 0; + + DOUT << "Merging: " << *BB << "into: " << *OnlyPred; + + // Resolve any PHI nodes at the start of the block. They are all + // guaranteed to have exactly one entry if they exist, unless there are + // multiple duplicate (but guaranteed to be equal) entries for the + // incoming edges. This occurs when there are multiple edges from + // OnlyPred to OnlySucc. + FoldSingleEntryPHINodes(BB); + + // Delete the unconditional branch from the predecessor... + OnlyPred->getInstList().pop_back(); + + // Move all definitions in the successor to the predecessor... + OnlyPred->getInstList().splice(OnlyPred->end(), BB->getInstList()); + + // Make all PHI nodes that referred to BB now refer to Pred as their + // source... + BB->replaceAllUsesWith(OnlyPred); + + std::string OldName = BB->getName(); + + // Erase basic block from the function... + LI->removeBlock(BB); + BB->eraseFromParent(); + + // Inherit predecessor's name if it exists... + if (!OldName.empty() && !OnlyPred->hasName()) + OnlyPred->setName(OldName); + + return OnlyPred; +} + +/// Unroll the given loop by Count. The loop must be in LCSSA form. Returns true +/// if unrolling was succesful, or false if the loop was unmodified. Unrolling +/// can only fail when the loop's latch block is not terminated by a conditional +/// branch instruction. However, if the trip count (and multiple) are not known, +/// loop unrolling will mostly produce more code that is no faster. +/// +/// The LoopInfo Analysis that is passed will be kept consistent. +/// +/// If a LoopPassManager is passed in, and the loop is fully removed, it will be +/// removed from the LoopPassManager as well. LPM can also be NULL. +bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM) { + assert(L->isLCSSAForm()); + + BasicBlock *Header = L->getHeader(); + BasicBlock *LatchBlock = L->getLoopLatch(); + BranchInst *BI = dyn_cast(LatchBlock->getTerminator()); + + if (!BI || BI->isUnconditional()) { + // The loop-rotate pass can be helpful to avoid this in many cases. + DOUT << " Can't unroll; loop not terminated by a conditional branch.\n"; + return false; + } + + // Find trip count + unsigned TripCount = L->getSmallConstantTripCount(); + // Find trip multiple if count is not available + unsigned TripMultiple = 1; + if (TripCount == 0) + TripMultiple = L->getSmallConstantTripMultiple(); + + if (TripCount != 0) + DOUT << " Trip Count = " << TripCount << "\n"; + if (TripMultiple != 1) + DOUT << " Trip Multiple = " << TripMultiple << "\n"; + + // Effectively "DCE" unrolled iterations that are beyond the tripcount + // and will never be executed. + if (TripCount != 0 && Count > TripCount) + Count = TripCount; + + assert(Count > 0); + assert(TripMultiple > 0); + assert(TripCount == 0 || TripCount % TripMultiple == 0); + + // Are we eliminating the loop control altogether? + bool CompletelyUnroll = Count == TripCount; + + // If we know the trip count, we know the multiple... + unsigned BreakoutTrip = 0; + if (TripCount != 0) { + BreakoutTrip = TripCount % Count; + TripMultiple = 0; + } else { + // Figure out what multiple to use. + BreakoutTrip = TripMultiple = + (unsigned)GreatestCommonDivisor64(Count, TripMultiple); + } + + if (CompletelyUnroll) { + DOUT << "COMPLETELY UNROLLING loop %" << Header->getName() + << " with trip count " << TripCount << "!\n"; + } else { + DOUT << "UNROLLING loop %" << Header->getName() + << " by " << Count; + if (TripMultiple == 0 || BreakoutTrip != TripMultiple) { + DOUT << " with a breakout at trip " << BreakoutTrip; + } else if (TripMultiple != 1) { + DOUT << " with " << TripMultiple << " trips per branch"; + } + DOUT << "!\n"; + } + + std::vector LoopBlocks = L->getBlocks(); + + bool ContinueOnTrue = L->contains(BI->getSuccessor(0)); + BasicBlock *LoopExit = BI->getSuccessor(ContinueOnTrue); + + // For the first iteration of the loop, we should use the precloned values for + // PHI nodes. Insert associations now. + typedef DenseMap ValueMapTy; + ValueMapTy LastValueMap; + std::vector OrigPHINode; + for (BasicBlock::iterator I = Header->begin(); isa(I); ++I) { + PHINode *PN = cast(I); + OrigPHINode.push_back(PN); + if (Instruction *I = + dyn_cast(PN->getIncomingValueForBlock(LatchBlock))) + if (L->contains(I->getParent())) + LastValueMap[I] = I; + } + + std::vector Headers; + std::vector Latches; + Headers.push_back(Header); + Latches.push_back(LatchBlock); + + for (unsigned It = 1; It != Count; ++It) { + char SuffixBuffer[100]; + sprintf(SuffixBuffer, ".%d", It); + + std::vector NewBlocks; + + for (std::vector::iterator BB = LoopBlocks.begin(), + E = LoopBlocks.end(); BB != E; ++BB) { + ValueMapTy ValueMap; + BasicBlock *New = CloneBasicBlock(*BB, ValueMap, SuffixBuffer); + Header->getParent()->getBasicBlockList().push_back(New); + + // Loop over all of the PHI nodes in the block, changing them to use the + // incoming values from the previous block. + if (*BB == Header) + for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) { + PHINode *NewPHI = cast(ValueMap[OrigPHINode[i]]); + Value *InVal = NewPHI->getIncomingValueForBlock(LatchBlock); + if (Instruction *InValI = dyn_cast(InVal)) + if (It > 1 && L->contains(InValI->getParent())) + InVal = LastValueMap[InValI]; + ValueMap[OrigPHINode[i]] = InVal; + New->getInstList().erase(NewPHI); + } + + // Update our running map of newest clones + LastValueMap[*BB] = New; + for (ValueMapTy::iterator VI = ValueMap.begin(), VE = ValueMap.end(); + VI != VE; ++VI) + LastValueMap[VI->first] = VI->second; + + L->addBasicBlockToLoop(New, LI->getBase()); + + // Add phi entries for newly created values to all exit blocks except + // the successor of the latch block. The successor of the exit block will + // be updated specially after unrolling all the way. + if (*BB != LatchBlock) + for (Value::use_iterator UI = (*BB)->use_begin(), UE = (*BB)->use_end(); + UI != UE;) { + Instruction *UseInst = cast(*UI); + ++UI; + if (isa(UseInst) && !L->contains(UseInst->getParent())) { + PHINode *phi = cast(UseInst); + Value *Incoming = phi->getIncomingValueForBlock(*BB); + phi->addIncoming(Incoming, New); + } + } + + // Keep track of new headers and latches as we create them, so that + // we can insert the proper branches later. + if (*BB == Header) + Headers.push_back(New); + if (*BB == LatchBlock) { + Latches.push_back(New); + + // Also, clear out the new latch's back edge so that it doesn't look + // like a new loop, so that it's amenable to being merged with adjacent + // blocks later on. + TerminatorInst *Term = New->getTerminator(); + assert(L->contains(Term->getSuccessor(!ContinueOnTrue))); + assert(Term->getSuccessor(ContinueOnTrue) == LoopExit); + Term->setSuccessor(!ContinueOnTrue, NULL); + } + + NewBlocks.push_back(New); + } + + // Remap all instructions in the most recent iteration + for (unsigned i = 0; i < NewBlocks.size(); ++i) + for (BasicBlock::iterator I = NewBlocks[i]->begin(), + E = NewBlocks[i]->end(); I != E; ++I) + RemapInstruction(I, LastValueMap); + } + + // The latch block exits the loop. If there are any PHI nodes in the + // successor blocks, update them to use the appropriate values computed as the + // last iteration of the loop. + if (Count != 1) { + SmallPtrSet Users; + for (Value::use_iterator UI = LatchBlock->use_begin(), + UE = LatchBlock->use_end(); UI != UE; ++UI) + if (PHINode *phi = dyn_cast(*UI)) + Users.insert(phi); + + BasicBlock *LastIterationBB = cast(LastValueMap[LatchBlock]); + for (SmallPtrSet::iterator SI = Users.begin(), SE = Users.end(); + SI != SE; ++SI) { + PHINode *PN = *SI; + Value *InVal = PN->removeIncomingValue(LatchBlock, false); + // If this value was defined in the loop, take the value defined by the + // last iteration of the loop. + if (Instruction *InValI = dyn_cast(InVal)) { + if (L->contains(InValI->getParent())) + InVal = LastValueMap[InVal]; + } + PN->addIncoming(InVal, LastIterationBB); + } + } + + // Now, if we're doing complete unrolling, loop over the PHI nodes in the + // original block, setting them to their incoming values. + if (CompletelyUnroll) { + BasicBlock *Preheader = L->getLoopPreheader(); + for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) { + PHINode *PN = OrigPHINode[i]; + PN->replaceAllUsesWith(PN->getIncomingValueForBlock(Preheader)); + Header->getInstList().erase(PN); + } + } + + // Now that all the basic blocks for the unrolled iterations are in place, + // set up the branches to connect them. + for (unsigned i = 0, e = Latches.size(); i != e; ++i) { + // The original branch was replicated in each unrolled iteration. + BranchInst *Term = cast(Latches[i]->getTerminator()); + + // The branch destination. + unsigned j = (i + 1) % e; + BasicBlock *Dest = Headers[j]; + bool NeedConditional = true; + + // For a complete unroll, make the last iteration end with a branch + // to the exit block. + if (CompletelyUnroll && j == 0) { + Dest = LoopExit; + NeedConditional = false; + } + + // If we know the trip count or a multiple of it, we can safely use an + // unconditional branch for some iterations. + if (j != BreakoutTrip && (TripMultiple == 0 || j % TripMultiple != 0)) { + NeedConditional = false; + } + + if (NeedConditional) { + // Update the conditional branch's successor for the following + // iteration. + Term->setSuccessor(!ContinueOnTrue, Dest); + } else { + Term->setUnconditionalDest(Dest); + // Merge adjacent basic blocks, if possible. + if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest, LI)) { + std::replace(Latches.begin(), Latches.end(), Dest, Fold); + std::replace(Headers.begin(), Headers.end(), Dest, Fold); + } + } + } + + // At this point, the code is well formed. We now do a quick sweep over the + // inserted code, doing constant propagation and dead code elimination as we + // go. + const std::vector &NewLoopBlocks = L->getBlocks(); + for (std::vector::const_iterator BB = NewLoopBlocks.begin(), + BBE = NewLoopBlocks.end(); BB != BBE; ++BB) + for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end(); I != E; ) { + Instruction *Inst = I++; + + if (isInstructionTriviallyDead(Inst)) + (*BB)->getInstList().erase(Inst); + else if (Constant *C = ConstantFoldInstruction(Inst)) { + Inst->replaceAllUsesWith(C); + (*BB)->getInstList().erase(Inst); + } + } + + NumCompletelyUnrolled += CompletelyUnroll; + ++NumUnrolled; + // Remove the loop from the LoopPassManager if it's completely removed. + if (CompletelyUnroll && LPM != NULL) + LPM->deleteLoopFromQueue(L); + + // If we didn't completely unroll the loop, it should still be in LCSSA form. + if (!CompletelyUnroll) + assert(L->isLCSSAForm()); + + return true; +} diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp new file mode 100644 index 000000000000..20b676d0fb8d --- /dev/null +++ b/lib/Transforms/Utils/ValueMapper.cpp @@ -0,0 +1,143 @@ +//===- ValueMapper.cpp - Interface shared by lib/Transforms/Utils ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the MapValue function, which is shared by various parts of +// the lib/Transforms/Utils library. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/ValueMapper.h" +#include "llvm/Constants.h" +#include "llvm/GlobalValue.h" +#include "llvm/Instruction.h" +#include "llvm/MDNode.h" +#include "llvm/ADT/SmallVector.h" +using namespace llvm; + +Value *llvm::MapValue(const Value *V, ValueMapTy &VM) { + Value *&VMSlot = VM[V]; + if (VMSlot) return VMSlot; // Does it exist in the map yet? + + // NOTE: VMSlot can be invalidated by any reference to VM, which can grow the + // DenseMap. This includes any recursive calls to MapValue. + + // Global values do not need to be seeded into the ValueMap if they are using + // the identity mapping. + if (isa(V) || isa(V)) + return VMSlot = const_cast(V); + + if (Constant *C = const_cast(dyn_cast(V))) { + if (isa(C) || isa(C) || + isa(C) || isa(C) || + isa(C) || isa(C)) + return VMSlot = C; // Primitive constants map directly + else if (ConstantArray *CA = dyn_cast(C)) { + for (User::op_iterator b = CA->op_begin(), i = b, e = CA->op_end(); + i != e; ++i) { + Value *MV = MapValue(*i, VM); + if (MV != *i) { + // This array must contain a reference to a global, make a new array + // and return it. + // + std::vector Values; + Values.reserve(CA->getNumOperands()); + for (User::op_iterator j = b; j != i; ++j) + Values.push_back(cast(*j)); + Values.push_back(cast(MV)); + for (++i; i != e; ++i) + Values.push_back(cast(MapValue(*i, VM))); + return VM[V] = ConstantArray::get(CA->getType(), Values); + } + } + return VM[V] = C; + + } else if (ConstantStruct *CS = dyn_cast(C)) { + for (User::op_iterator b = CS->op_begin(), i = b, e = CS->op_end(); + i != e; ++i) { + Value *MV = MapValue(*i, VM); + if (MV != *i) { + // This struct must contain a reference to a global, make a new struct + // and return it. + // + std::vector Values; + Values.reserve(CS->getNumOperands()); + for (User::op_iterator j = b; j != i; ++j) + Values.push_back(cast(*j)); + Values.push_back(cast(MV)); + for (++i; i != e; ++i) + Values.push_back(cast(MapValue(*i, VM))); + return VM[V] = ConstantStruct::get(CS->getType(), Values); + } + } + return VM[V] = C; + + } else if (ConstantExpr *CE = dyn_cast(C)) { + std::vector Ops; + for (User::op_iterator i = CE->op_begin(), e = CE->op_end(); i != e; ++i) + Ops.push_back(cast(MapValue(*i, VM))); + return VM[V] = CE->getWithOperands(Ops); + } else if (ConstantVector *CP = dyn_cast(C)) { + for (User::op_iterator b = CP->op_begin(), i = b, e = CP->op_end(); + i != e; ++i) { + Value *MV = MapValue(*i, VM); + if (MV != *i) { + // This vector value must contain a reference to a global, make a new + // vector constant and return it. + // + std::vector Values; + Values.reserve(CP->getNumOperands()); + for (User::op_iterator j = b; j != i; ++j) + Values.push_back(cast(*j)); + Values.push_back(cast(MV)); + for (++i; i != e; ++i) + Values.push_back(cast(MapValue(*i, VM))); + return VM[V] = ConstantVector::get(Values); + } + } + return VM[V] = C; + + } else if (MDNode *N = dyn_cast(C)) { + for (MDNode::const_elem_iterator b = N->elem_begin(), i = b, + e = N->elem_end(); i != e; ++i) { + if (!*i) continue; + + Value *MV = MapValue(*i, VM); + if (MV != *i) { + // This MDNode must contain a reference to a global, make a new MDNode + // and return it. + SmallVector Values; + Values.reserve(N->getNumElements()); + for (MDNode::const_elem_iterator j = b; j != i; ++j) + Values.push_back(*j); + Values.push_back(MV); + for (++i; i != e; ++i) + Values.push_back(MapValue(*i, VM)); + return VM[V] = MDNode::get(Values.data(), Values.size()); + } + } + return VM[V] = C; + + } else { + assert(0 && "Unknown type of constant!"); + } + } + + return 0; +} + +/// RemapInstruction - Convert the instruction operands from referencing the +/// current values into those specified by ValueMap. +/// +void llvm::RemapInstruction(Instruction *I, ValueMapTy &ValueMap) { + for (User::op_iterator op = I->op_begin(), E = I->op_end(); op != E; ++op) { + Value *V = MapValue(*op, ValueMap); + assert(V && "Referenced value not in value map!"); + *op = V; + } +} diff --git a/lib/VMCore/AsmWriter.cpp b/lib/VMCore/AsmWriter.cpp new file mode 100644 index 000000000000..6b369b680d72 --- /dev/null +++ b/lib/VMCore/AsmWriter.cpp @@ -0,0 +1,1880 @@ +//===-- AsmWriter.cpp - Printing LLVM as an assembly file -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This library implements the functionality defined in llvm/Assembly/Writer.h +// +// Note that these routines must be extremely tolerant of various errors in the +// LLVM code, because it can be used for debugging transformations. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Assembly/Writer.h" +#include "llvm/Assembly/PrintModulePass.h" +#include "llvm/Assembly/AsmAnnotationWriter.h" +#include "llvm/CallingConv.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/InlineAsm.h" +#include "llvm/Instruction.h" +#include "llvm/Instructions.h" +#include "llvm/MDNode.h" +#include "llvm/Module.h" +#include "llvm/ValueSymbolTable.h" +#include "llvm/TypeSymbolTable.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +using namespace llvm; + +// Make virtual table appear in this compilation unit. +AssemblyAnnotationWriter::~AssemblyAnnotationWriter() {} + +//===----------------------------------------------------------------------===// +// Helper Functions +//===----------------------------------------------------------------------===// + +static const Module *getModuleFromVal(const Value *V) { + if (const Argument *MA = dyn_cast(V)) + return MA->getParent() ? MA->getParent()->getParent() : 0; + + if (const BasicBlock *BB = dyn_cast(V)) + return BB->getParent() ? BB->getParent()->getParent() : 0; + + if (const Instruction *I = dyn_cast(V)) { + const Function *M = I->getParent() ? I->getParent()->getParent() : 0; + return M ? M->getParent() : 0; + } + + if (const GlobalValue *GV = dyn_cast(V)) + return GV->getParent(); + return 0; +} + +// PrintEscapedString - Print each character of the specified string, escaping +// it if it is not printable or if it is an escape char. +static void PrintEscapedString(const char *Str, unsigned Length, + raw_ostream &Out) { + for (unsigned i = 0; i != Length; ++i) { + unsigned char C = Str[i]; + if (isprint(C) && C != '\\' && C != '"') + Out << C; + else + Out << '\\' << hexdigit(C >> 4) << hexdigit(C & 0x0F); + } +} + +// PrintEscapedString - Print each character of the specified string, escaping +// it if it is not printable or if it is an escape char. +static void PrintEscapedString(const std::string &Str, raw_ostream &Out) { + PrintEscapedString(Str.c_str(), Str.size(), Out); +} + +enum PrefixType { + GlobalPrefix, + LabelPrefix, + LocalPrefix, + NoPrefix +}; + +/// PrintLLVMName - Turn the specified name into an 'LLVM name', which is either +/// prefixed with % (if the string only contains simple characters) or is +/// surrounded with ""'s (if it has special chars in it). Print it out. +static void PrintLLVMName(raw_ostream &OS, const char *NameStr, + unsigned NameLen, PrefixType Prefix) { + assert(NameStr && "Cannot get empty name!"); + switch (Prefix) { + default: assert(0 && "Bad prefix!"); + case NoPrefix: break; + case GlobalPrefix: OS << '@'; break; + case LabelPrefix: break; + case LocalPrefix: OS << '%'; break; + } + + // Scan the name to see if it needs quotes first. + bool NeedsQuotes = isdigit(NameStr[0]); + if (!NeedsQuotes) { + for (unsigned i = 0; i != NameLen; ++i) { + char C = NameStr[i]; + if (!isalnum(C) && C != '-' && C != '.' && C != '_') { + NeedsQuotes = true; + break; + } + } + } + + // If we didn't need any quotes, just write out the name in one blast. + if (!NeedsQuotes) { + OS.write(NameStr, NameLen); + return; + } + + // Okay, we need quotes. Output the quotes and escape any scary characters as + // needed. + OS << '"'; + PrintEscapedString(NameStr, NameLen, OS); + OS << '"'; +} + +/// PrintLLVMName - Turn the specified name into an 'LLVM name', which is either +/// prefixed with % (if the string only contains simple characters) or is +/// surrounded with ""'s (if it has special chars in it). Print it out. +static void PrintLLVMName(raw_ostream &OS, const Value *V) { + PrintLLVMName(OS, V->getNameStart(), V->getNameLen(), + isa(V) ? GlobalPrefix : LocalPrefix); +} + +//===----------------------------------------------------------------------===// +// TypePrinting Class: Type printing machinery +//===----------------------------------------------------------------------===// + +static DenseMap &getTypeNamesMap(void *M) { + return *static_cast*>(M); +} + +void TypePrinting::clear() { + getTypeNamesMap(TypeNames).clear(); +} + +bool TypePrinting::hasTypeName(const Type *Ty) const { + return getTypeNamesMap(TypeNames).count(Ty); +} + +void TypePrinting::addTypeName(const Type *Ty, const std::string &N) { + getTypeNamesMap(TypeNames).insert(std::make_pair(Ty, N)); +} + + +TypePrinting::TypePrinting() { + TypeNames = new DenseMap(); +} + +TypePrinting::~TypePrinting() { + delete &getTypeNamesMap(TypeNames); +} + +/// CalcTypeName - Write the specified type to the specified raw_ostream, making +/// use of type names or up references to shorten the type name where possible. +void TypePrinting::CalcTypeName(const Type *Ty, + SmallVectorImpl &TypeStack, + raw_ostream &OS, bool IgnoreTopLevelName) { + // Check to see if the type is named. + if (!IgnoreTopLevelName) { + DenseMap &TM = getTypeNamesMap(TypeNames); + DenseMap::iterator I = TM.find(Ty); + if (I != TM.end()) { + OS << I->second; + return; + } + } + + // Check to see if the Type is already on the stack... + unsigned Slot = 0, CurSize = TypeStack.size(); + while (Slot < CurSize && TypeStack[Slot] != Ty) ++Slot; // Scan for type + + // This is another base case for the recursion. In this case, we know + // that we have looped back to a type that we have previously visited. + // Generate the appropriate upreference to handle this. + if (Slot < CurSize) { + OS << '\\' << unsigned(CurSize-Slot); // Here's the upreference + return; + } + + TypeStack.push_back(Ty); // Recursive case: Add us to the stack.. + + switch (Ty->getTypeID()) { + case Type::VoidTyID: OS << "void"; break; + case Type::FloatTyID: OS << "float"; break; + case Type::DoubleTyID: OS << "double"; break; + case Type::X86_FP80TyID: OS << "x86_fp80"; break; + case Type::FP128TyID: OS << "fp128"; break; + case Type::PPC_FP128TyID: OS << "ppc_fp128"; break; + case Type::LabelTyID: OS << "label"; break; + case Type::MetadataTyID: OS << "metadata"; break; + case Type::IntegerTyID: + OS << 'i' << cast(Ty)->getBitWidth(); + break; + + case Type::FunctionTyID: { + const FunctionType *FTy = cast(Ty); + CalcTypeName(FTy->getReturnType(), TypeStack, OS); + OS << " ("; + for (FunctionType::param_iterator I = FTy->param_begin(), + E = FTy->param_end(); I != E; ++I) { + if (I != FTy->param_begin()) + OS << ", "; + CalcTypeName(*I, TypeStack, OS); + } + if (FTy->isVarArg()) { + if (FTy->getNumParams()) OS << ", "; + OS << "..."; + } + OS << ')'; + break; + } + case Type::StructTyID: { + const StructType *STy = cast(Ty); + if (STy->isPacked()) + OS << '<'; + OS << "{ "; + for (StructType::element_iterator I = STy->element_begin(), + E = STy->element_end(); I != E; ++I) { + CalcTypeName(*I, TypeStack, OS); + if (next(I) != STy->element_end()) + OS << ','; + OS << ' '; + } + OS << '}'; + if (STy->isPacked()) + OS << '>'; + break; + } + case Type::PointerTyID: { + const PointerType *PTy = cast(Ty); + CalcTypeName(PTy->getElementType(), TypeStack, OS); + if (unsigned AddressSpace = PTy->getAddressSpace()) + OS << " addrspace(" << AddressSpace << ')'; + OS << '*'; + break; + } + case Type::ArrayTyID: { + const ArrayType *ATy = cast(Ty); + OS << '[' << ATy->getNumElements() << " x "; + CalcTypeName(ATy->getElementType(), TypeStack, OS); + OS << ']'; + break; + } + case Type::VectorTyID: { + const VectorType *PTy = cast(Ty); + OS << "<" << PTy->getNumElements() << " x "; + CalcTypeName(PTy->getElementType(), TypeStack, OS); + OS << '>'; + break; + } + case Type::OpaqueTyID: + OS << "opaque"; + break; + default: + OS << ""; + break; + } + + TypeStack.pop_back(); // Remove self from stack. +} + +/// printTypeInt - The internal guts of printing out a type that has a +/// potentially named portion. +/// +void TypePrinting::print(const Type *Ty, raw_ostream &OS, + bool IgnoreTopLevelName) { + // Check to see if the type is named. + DenseMap &TM = getTypeNamesMap(TypeNames); + if (!IgnoreTopLevelName) { + DenseMap::iterator I = TM.find(Ty); + if (I != TM.end()) { + OS << I->second; + return; + } + } + + // Otherwise we have a type that has not been named but is a derived type. + // Carefully recurse the type hierarchy to print out any contained symbolic + // names. + SmallVector TypeStack; + std::string TypeName; + + raw_string_ostream TypeOS(TypeName); + CalcTypeName(Ty, TypeStack, TypeOS, IgnoreTopLevelName); + OS << TypeOS.str(); + + // Cache type name for later use. + if (!IgnoreTopLevelName) + TM.insert(std::make_pair(Ty, TypeOS.str())); +} + +namespace { + class TypeFinder { + // To avoid walking constant expressions multiple times and other IR + // objects, we keep several helper maps. + DenseSet VisitedConstants; + DenseSet VisitedTypes; + + TypePrinting &TP; + std::vector &NumberedTypes; + public: + TypeFinder(TypePrinting &tp, std::vector &numberedTypes) + : TP(tp), NumberedTypes(numberedTypes) {} + + void Run(const Module &M) { + // Get types from the type symbol table. This gets opaque types referened + // only through derived named types. + const TypeSymbolTable &ST = M.getTypeSymbolTable(); + for (TypeSymbolTable::const_iterator TI = ST.begin(), E = ST.end(); + TI != E; ++TI) + IncorporateType(TI->second); + + // Get types from global variables. + for (Module::const_global_iterator I = M.global_begin(), + E = M.global_end(); I != E; ++I) { + IncorporateType(I->getType()); + if (I->hasInitializer()) + IncorporateValue(I->getInitializer()); + } + + // Get types from aliases. + for (Module::const_alias_iterator I = M.alias_begin(), + E = M.alias_end(); I != E; ++I) { + IncorporateType(I->getType()); + IncorporateValue(I->getAliasee()); + } + + // Get types from functions. + for (Module::const_iterator FI = M.begin(), E = M.end(); FI != E; ++FI) { + IncorporateType(FI->getType()); + + for (Function::const_iterator BB = FI->begin(), E = FI->end(); + BB != E;++BB) + for (BasicBlock::const_iterator II = BB->begin(), + E = BB->end(); II != E; ++II) { + const Instruction &I = *II; + // Incorporate the type of the instruction and all its operands. + IncorporateType(I.getType()); + for (User::const_op_iterator OI = I.op_begin(), OE = I.op_end(); + OI != OE; ++OI) + IncorporateValue(*OI); + } + } + } + + private: + void IncorporateType(const Type *Ty) { + // Check to see if we're already visited this type. + if (!VisitedTypes.insert(Ty).second) + return; + + // If this is a structure or opaque type, add a name for the type. + if (((isa(Ty) && cast(Ty)->getNumElements()) + || isa(Ty)) && !TP.hasTypeName(Ty)) { + TP.addTypeName(Ty, "%"+utostr(unsigned(NumberedTypes.size()))); + NumberedTypes.push_back(Ty); + } + + // Recursively walk all contained types. + for (Type::subtype_iterator I = Ty->subtype_begin(), + E = Ty->subtype_end(); I != E; ++I) + IncorporateType(*I); + } + + /// IncorporateValue - This method is used to walk operand lists finding + /// types hiding in constant expressions and other operands that won't be + /// walked in other ways. GlobalValues, basic blocks, instructions, and + /// inst operands are all explicitly enumerated. + void IncorporateValue(const Value *V) { + if (V == 0 || !isa(V) || isa(V)) return; + + // Already visited? + if (!VisitedConstants.insert(V).second) + return; + + // Check this type. + IncorporateType(V->getType()); + + // Look in operands for types. + const Constant *C = cast(V); + for (Constant::const_op_iterator I = C->op_begin(), + E = C->op_end(); I != E;++I) + IncorporateValue(*I); + } + }; +} // end anonymous namespace + + +/// AddModuleTypesToPrinter - Add all of the symbolic type names for types in +/// the specified module to the TypePrinter and all numbered types to it and the +/// NumberedTypes table. +static void AddModuleTypesToPrinter(TypePrinting &TP, + std::vector &NumberedTypes, + const Module *M) { + if (M == 0) return; + + // If the module has a symbol table, take all global types and stuff their + // names into the TypeNames map. + const TypeSymbolTable &ST = M->getTypeSymbolTable(); + for (TypeSymbolTable::const_iterator TI = ST.begin(), E = ST.end(); + TI != E; ++TI) { + const Type *Ty = cast(TI->second); + + // As a heuristic, don't insert pointer to primitive types, because + // they are used too often to have a single useful name. + if (const PointerType *PTy = dyn_cast(Ty)) { + const Type *PETy = PTy->getElementType(); + if ((PETy->isPrimitiveType() || PETy->isInteger()) && + !isa(PETy)) + continue; + } + + // Likewise don't insert primitives either. + if (Ty->isInteger() || Ty->isPrimitiveType()) + continue; + + // Get the name as a string and insert it into TypeNames. + std::string NameStr; + raw_string_ostream NameOS(NameStr); + PrintLLVMName(NameOS, TI->first.c_str(), TI->first.length(), LocalPrefix); + TP.addTypeName(Ty, NameOS.str()); + } + + // Walk the entire module to find references to unnamed structure and opaque + // types. This is required for correctness by opaque types (because multiple + // uses of an unnamed opaque type needs to be referred to by the same ID) and + // it shrinks complex recursive structure types substantially in some cases. + TypeFinder(TP, NumberedTypes).Run(*M); +} + + +/// WriteTypeSymbolic - This attempts to write the specified type as a symbolic +/// type, iff there is an entry in the modules symbol table for the specified +/// type or one of it's component types. +/// +void llvm::WriteTypeSymbolic(raw_ostream &OS, const Type *Ty, const Module *M) { + TypePrinting Printer; + std::vector NumberedTypes; + AddModuleTypesToPrinter(Printer, NumberedTypes, M); + Printer.print(Ty, OS); +} + +//===----------------------------------------------------------------------===// +// SlotTracker Class: Enumerate slot numbers for unnamed values +//===----------------------------------------------------------------------===// + +namespace { + +/// This class provides computation of slot numbers for LLVM Assembly writing. +/// +class SlotTracker { +public: + /// ValueMap - A mapping of Values to slot numbers + typedef DenseMap ValueMap; + +private: + /// TheModule - The module for which we are holding slot numbers + const Module* TheModule; + + /// TheFunction - The function for which we are holding slot numbers + const Function* TheFunction; + bool FunctionProcessed; + + /// mMap - The TypePlanes map for the module level data + ValueMap mMap; + unsigned mNext; + + /// fMap - The TypePlanes map for the function level data + ValueMap fMap; + unsigned fNext; + +public: + /// Construct from a module + explicit SlotTracker(const Module *M); + /// Construct from a function, starting out in incorp state. + explicit SlotTracker(const Function *F); + + /// Return the slot number of the specified value in it's type + /// plane. If something is not in the SlotTracker, return -1. + int getLocalSlot(const Value *V); + int getGlobalSlot(const GlobalValue *V); + + /// If you'd like to deal with a function instead of just a module, use + /// this method to get its data into the SlotTracker. + void incorporateFunction(const Function *F) { + TheFunction = F; + FunctionProcessed = false; + } + + /// After calling incorporateFunction, use this method to remove the + /// most recently incorporated function from the SlotTracker. This + /// will reset the state of the machine back to just the module contents. + void purgeFunction(); + + // Implementation Details +private: + /// This function does the actual initialization. + inline void initialize(); + + /// CreateModuleSlot - Insert the specified GlobalValue* into the slot table. + void CreateModuleSlot(const GlobalValue *V); + + /// CreateFunctionSlot - Insert the specified Value* into the slot table. + void CreateFunctionSlot(const Value *V); + + /// Add all of the module level global variables (and their initializers) + /// and function declarations, but not the contents of those functions. + void processModule(); + + /// Add all of the functions arguments, basic blocks, and instructions + void processFunction(); + + SlotTracker(const SlotTracker &); // DO NOT IMPLEMENT + void operator=(const SlotTracker &); // DO NOT IMPLEMENT +}; + +} // end anonymous namespace + + +static SlotTracker *createSlotTracker(const Value *V) { + if (const Argument *FA = dyn_cast(V)) + return new SlotTracker(FA->getParent()); + + if (const Instruction *I = dyn_cast(V)) + return new SlotTracker(I->getParent()->getParent()); + + if (const BasicBlock *BB = dyn_cast(V)) + return new SlotTracker(BB->getParent()); + + if (const GlobalVariable *GV = dyn_cast(V)) + return new SlotTracker(GV->getParent()); + + if (const GlobalAlias *GA = dyn_cast(V)) + return new SlotTracker(GA->getParent()); + + if (const Function *Func = dyn_cast(V)) + return new SlotTracker(Func); + + return 0; +} + +#if 0 +#define ST_DEBUG(X) cerr << X +#else +#define ST_DEBUG(X) +#endif + +// Module level constructor. Causes the contents of the Module (sans functions) +// to be added to the slot table. +SlotTracker::SlotTracker(const Module *M) + : TheModule(M), TheFunction(0), FunctionProcessed(false), mNext(0), fNext(0) { +} + +// Function level constructor. Causes the contents of the Module and the one +// function provided to be added to the slot table. +SlotTracker::SlotTracker(const Function *F) + : TheModule(F ? F->getParent() : 0), TheFunction(F), FunctionProcessed(false), + mNext(0), fNext(0) { +} + +inline void SlotTracker::initialize() { + if (TheModule) { + processModule(); + TheModule = 0; ///< Prevent re-processing next time we're called. + } + + if (TheFunction && !FunctionProcessed) + processFunction(); +} + +// Iterate through all the global variables, functions, and global +// variable initializers and create slots for them. +void SlotTracker::processModule() { + ST_DEBUG("begin processModule!\n"); + + // Add all of the unnamed global variables to the value table. + for (Module::const_global_iterator I = TheModule->global_begin(), + E = TheModule->global_end(); I != E; ++I) + if (!I->hasName()) + CreateModuleSlot(I); + + // Add all the unnamed functions to the table. + for (Module::const_iterator I = TheModule->begin(), E = TheModule->end(); + I != E; ++I) + if (!I->hasName()) + CreateModuleSlot(I); + + ST_DEBUG("end processModule!\n"); +} + + +// Process the arguments, basic blocks, and instructions of a function. +void SlotTracker::processFunction() { + ST_DEBUG("begin processFunction!\n"); + fNext = 0; + + // Add all the function arguments with no names. + for(Function::const_arg_iterator AI = TheFunction->arg_begin(), + AE = TheFunction->arg_end(); AI != AE; ++AI) + if (!AI->hasName()) + CreateFunctionSlot(AI); + + ST_DEBUG("Inserting Instructions:\n"); + + // Add all of the basic blocks and instructions with no names. + for (Function::const_iterator BB = TheFunction->begin(), + E = TheFunction->end(); BB != E; ++BB) { + if (!BB->hasName()) + CreateFunctionSlot(BB); + for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) + if (I->getType() != Type::VoidTy && !I->hasName()) + CreateFunctionSlot(I); + } + + FunctionProcessed = true; + + ST_DEBUG("end processFunction!\n"); +} + +/// Clean up after incorporating a function. This is the only way to get out of +/// the function incorporation state that affects get*Slot/Create*Slot. Function +/// incorporation state is indicated by TheFunction != 0. +void SlotTracker::purgeFunction() { + ST_DEBUG("begin purgeFunction!\n"); + fMap.clear(); // Simply discard the function level map + TheFunction = 0; + FunctionProcessed = false; + ST_DEBUG("end purgeFunction!\n"); +} + +/// getGlobalSlot - Get the slot number of a global value. +int SlotTracker::getGlobalSlot(const GlobalValue *V) { + // Check for uninitialized state and do lazy initialization. + initialize(); + + // Find the type plane in the module map + ValueMap::iterator MI = mMap.find(V); + return MI == mMap.end() ? -1 : (int)MI->second; +} + + +/// getLocalSlot - Get the slot number for a value that is local to a function. +int SlotTracker::getLocalSlot(const Value *V) { + assert(!isa(V) && "Can't get a constant or global slot with this!"); + + // Check for uninitialized state and do lazy initialization. + initialize(); + + ValueMap::iterator FI = fMap.find(V); + return FI == fMap.end() ? -1 : (int)FI->second; +} + + +/// CreateModuleSlot - Insert the specified GlobalValue* into the slot table. +void SlotTracker::CreateModuleSlot(const GlobalValue *V) { + assert(V && "Can't insert a null Value into SlotTracker!"); + assert(V->getType() != Type::VoidTy && "Doesn't need a slot!"); + assert(!V->hasName() && "Doesn't need a slot!"); + + unsigned DestSlot = mNext++; + mMap[V] = DestSlot; + + ST_DEBUG(" Inserting value [" << V->getType() << "] = " << V << " slot=" << + DestSlot << " ["); + // G = Global, F = Function, A = Alias, o = other + ST_DEBUG((isa(V) ? 'G' : + (isa(V) ? 'F' : + (isa(V) ? 'A' : 'o'))) << "]\n"); +} + + +/// CreateSlot - Create a new slot for the specified value if it has no name. +void SlotTracker::CreateFunctionSlot(const Value *V) { + assert(V->getType() != Type::VoidTy && !V->hasName() && + "Doesn't need a slot!"); + + unsigned DestSlot = fNext++; + fMap[V] = DestSlot; + + // G = Global, F = Function, o = other + ST_DEBUG(" Inserting value [" << V->getType() << "] = " << V << " slot=" << + DestSlot << " [o]\n"); +} + + + +//===----------------------------------------------------------------------===// +// AsmWriter Implementation +//===----------------------------------------------------------------------===// + +static void WriteAsOperandInternal(raw_ostream &Out, const Value *V, + TypePrinting &TypePrinter, + SlotTracker *Machine); + + + +static const char *getPredicateText(unsigned predicate) { + const char * pred = "unknown"; + switch (predicate) { + case FCmpInst::FCMP_FALSE: pred = "false"; break; + case FCmpInst::FCMP_OEQ: pred = "oeq"; break; + case FCmpInst::FCMP_OGT: pred = "ogt"; break; + case FCmpInst::FCMP_OGE: pred = "oge"; break; + case FCmpInst::FCMP_OLT: pred = "olt"; break; + case FCmpInst::FCMP_OLE: pred = "ole"; break; + case FCmpInst::FCMP_ONE: pred = "one"; break; + case FCmpInst::FCMP_ORD: pred = "ord"; break; + case FCmpInst::FCMP_UNO: pred = "uno"; break; + case FCmpInst::FCMP_UEQ: pred = "ueq"; break; + case FCmpInst::FCMP_UGT: pred = "ugt"; break; + case FCmpInst::FCMP_UGE: pred = "uge"; break; + case FCmpInst::FCMP_ULT: pred = "ult"; break; + case FCmpInst::FCMP_ULE: pred = "ule"; break; + case FCmpInst::FCMP_UNE: pred = "une"; break; + case FCmpInst::FCMP_TRUE: pred = "true"; break; + case ICmpInst::ICMP_EQ: pred = "eq"; break; + case ICmpInst::ICMP_NE: pred = "ne"; break; + case ICmpInst::ICMP_SGT: pred = "sgt"; break; + case ICmpInst::ICMP_SGE: pred = "sge"; break; + case ICmpInst::ICMP_SLT: pred = "slt"; break; + case ICmpInst::ICMP_SLE: pred = "sle"; break; + case ICmpInst::ICMP_UGT: pred = "ugt"; break; + case ICmpInst::ICMP_UGE: pred = "uge"; break; + case ICmpInst::ICMP_ULT: pred = "ult"; break; + case ICmpInst::ICMP_ULE: pred = "ule"; break; + } + return pred; +} + +static void WriteConstantInt(raw_ostream &Out, const Constant *CV, + TypePrinting &TypePrinter, SlotTracker *Machine) { + if (const ConstantInt *CI = dyn_cast(CV)) { + if (CI->getType() == Type::Int1Ty) { + Out << (CI->getZExtValue() ? "true" : "false"); + return; + } + Out << CI->getValue(); + return; + } + + if (const ConstantFP *CFP = dyn_cast(CV)) { + if (&CFP->getValueAPF().getSemantics() == &APFloat::IEEEdouble || + &CFP->getValueAPF().getSemantics() == &APFloat::IEEEsingle) { + // We would like to output the FP constant value in exponential notation, + // but we cannot do this if doing so will lose precision. Check here to + // make sure that we only output it in exponential format if we can parse + // the value back and get the same value. + // + bool ignored; + bool isDouble = &CFP->getValueAPF().getSemantics()==&APFloat::IEEEdouble; + double Val = isDouble ? CFP->getValueAPF().convertToDouble() : + CFP->getValueAPF().convertToFloat(); + std::string StrVal = ftostr(CFP->getValueAPF()); + + // Check to make sure that the stringized number is not some string like + // "Inf" or NaN, that atof will accept, but the lexer will not. Check + // that the string matches the "[-+]?[0-9]" regex. + // + if ((StrVal[0] >= '0' && StrVal[0] <= '9') || + ((StrVal[0] == '-' || StrVal[0] == '+') && + (StrVal[1] >= '0' && StrVal[1] <= '9'))) { + // Reparse stringized version! + if (atof(StrVal.c_str()) == Val) { + Out << StrVal; + return; + } + } + // Otherwise we could not reparse it to exactly the same value, so we must + // output the string in hexadecimal format! Note that loading and storing + // floating point types changes the bits of NaNs on some hosts, notably + // x86, so we must not use these types. + assert(sizeof(double) == sizeof(uint64_t) && + "assuming that double is 64 bits!"); + char Buffer[40]; + APFloat apf = CFP->getValueAPF(); + // Floats are represented in ASCII IR as double, convert. + if (!isDouble) + apf.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, + &ignored); + Out << "0x" << + utohex_buffer(uint64_t(apf.bitcastToAPInt().getZExtValue()), + Buffer+40); + return; + } + + // Some form of long double. These appear as a magic letter identifying + // the type, then a fixed number of hex digits. + Out << "0x"; + if (&CFP->getValueAPF().getSemantics() == &APFloat::x87DoubleExtended) { + Out << 'K'; + // api needed to prevent premature destruction + APInt api = CFP->getValueAPF().bitcastToAPInt(); + const uint64_t* p = api.getRawData(); + uint64_t word = p[1]; + int shiftcount=12; + int width = api.getBitWidth(); + for (int j=0; j>shiftcount) & 15; + if (nibble < 10) + Out << (unsigned char)(nibble + '0'); + else + Out << (unsigned char)(nibble - 10 + 'A'); + if (shiftcount == 0 && j+4 < width) { + word = *p; + shiftcount = 64; + if (width-j-4 < 64) + shiftcount = width-j-4; + } + } + return; + } else if (&CFP->getValueAPF().getSemantics() == &APFloat::IEEEquad) + Out << 'L'; + else if (&CFP->getValueAPF().getSemantics() == &APFloat::PPCDoubleDouble) + Out << 'M'; + else + assert(0 && "Unsupported floating point type"); + // api needed to prevent premature destruction + APInt api = CFP->getValueAPF().bitcastToAPInt(); + const uint64_t* p = api.getRawData(); + uint64_t word = *p; + int shiftcount=60; + int width = api.getBitWidth(); + for (int j=0; j>shiftcount) & 15; + if (nibble < 10) + Out << (unsigned char)(nibble + '0'); + else + Out << (unsigned char)(nibble - 10 + 'A'); + if (shiftcount == 0 && j+4 < width) { + word = *(++p); + shiftcount = 64; + if (width-j-4 < 64) + shiftcount = width-j-4; + } + } + return; + } + + if (isa(CV)) { + Out << "zeroinitializer"; + return; + } + + if (const ConstantArray *CA = dyn_cast(CV)) { + // As a special case, print the array as a string if it is an array of + // i8 with ConstantInt values. + // + const Type *ETy = CA->getType()->getElementType(); + if (CA->isString()) { + Out << "c\""; + PrintEscapedString(CA->getAsString(), Out); + Out << '"'; + } else { // Cannot output in string format... + Out << '['; + if (CA->getNumOperands()) { + TypePrinter.print(ETy, Out); + Out << ' '; + WriteAsOperandInternal(Out, CA->getOperand(0), + TypePrinter, Machine); + for (unsigned i = 1, e = CA->getNumOperands(); i != e; ++i) { + Out << ", "; + TypePrinter.print(ETy, Out); + Out << ' '; + WriteAsOperandInternal(Out, CA->getOperand(i), TypePrinter, Machine); + } + } + Out << ']'; + } + return; + } + + if (const ConstantStruct *CS = dyn_cast(CV)) { + if (CS->getType()->isPacked()) + Out << '<'; + Out << '{'; + unsigned N = CS->getNumOperands(); + if (N) { + Out << ' '; + TypePrinter.print(CS->getOperand(0)->getType(), Out); + Out << ' '; + + WriteAsOperandInternal(Out, CS->getOperand(0), TypePrinter, Machine); + + for (unsigned i = 1; i < N; i++) { + Out << ", "; + TypePrinter.print(CS->getOperand(i)->getType(), Out); + Out << ' '; + + WriteAsOperandInternal(Out, CS->getOperand(i), TypePrinter, Machine); + } + Out << ' '; + } + + Out << '}'; + if (CS->getType()->isPacked()) + Out << '>'; + return; + } + + if (const ConstantVector *CP = dyn_cast(CV)) { + const Type *ETy = CP->getType()->getElementType(); + assert(CP->getNumOperands() > 0 && + "Number of operands for a PackedConst must be > 0"); + Out << '<'; + TypePrinter.print(ETy, Out); + Out << ' '; + WriteAsOperandInternal(Out, CP->getOperand(0), TypePrinter, Machine); + for (unsigned i = 1, e = CP->getNumOperands(); i != e; ++i) { + Out << ", "; + TypePrinter.print(ETy, Out); + Out << ' '; + WriteAsOperandInternal(Out, CP->getOperand(i), TypePrinter, Machine); + } + Out << '>'; + return; + } + + if (isa(CV)) { + Out << "null"; + return; + } + + if (isa(CV)) { + Out << "undef"; + return; + } + + if (const MDString *S = dyn_cast(CV)) { + Out << "!\""; + PrintEscapedString(S->begin(), S->size(), Out); + Out << '"'; + return; + } + + if (const MDNode *N = dyn_cast(CV)) { + Out << "!{"; + for (MDNode::const_elem_iterator I = N->elem_begin(), E = N->elem_end(); + I != E;) { + if (!*I) { + Out << "null"; + } else { + TypePrinter.print((*I)->getType(), Out); + Out << ' '; + WriteAsOperandInternal(Out, *I, TypePrinter, Machine); + } + + if (++I != E) + Out << ", "; + } + Out << "}"; + return; + } + + if (const ConstantExpr *CE = dyn_cast(CV)) { + Out << CE->getOpcodeName(); + if (CE->isCompare()) + Out << ' ' << getPredicateText(CE->getPredicate()); + Out << " ("; + + for (User::const_op_iterator OI=CE->op_begin(); OI != CE->op_end(); ++OI) { + TypePrinter.print((*OI)->getType(), Out); + Out << ' '; + WriteAsOperandInternal(Out, *OI, TypePrinter, Machine); + if (OI+1 != CE->op_end()) + Out << ", "; + } + + if (CE->hasIndices()) { + const SmallVector &Indices = CE->getIndices(); + for (unsigned i = 0, e = Indices.size(); i != e; ++i) + Out << ", " << Indices[i]; + } + + if (CE->isCast()) { + Out << " to "; + TypePrinter.print(CE->getType(), Out); + } + + Out << ')'; + return; + } + + Out << ""; +} + + +/// WriteAsOperand - Write the name of the specified value out to the specified +/// ostream. This can be useful when you just want to print int %reg126, not +/// the whole instruction that generated it. +/// +static void WriteAsOperandInternal(raw_ostream &Out, const Value *V, + TypePrinting &TypePrinter, + SlotTracker *Machine) { + if (V->hasName()) { + PrintLLVMName(Out, V); + return; + } + + const Constant *CV = dyn_cast(V); + if (CV && !isa(CV)) { + WriteConstantInt(Out, CV, TypePrinter, Machine); + return; + } + + if (const InlineAsm *IA = dyn_cast(V)) { + Out << "asm "; + if (IA->hasSideEffects()) + Out << "sideeffect "; + Out << '"'; + PrintEscapedString(IA->getAsmString(), Out); + Out << "\", \""; + PrintEscapedString(IA->getConstraintString(), Out); + Out << '"'; + return; + } + + char Prefix = '%'; + int Slot; + if (Machine) { + if (const GlobalValue *GV = dyn_cast(V)) { + Slot = Machine->getGlobalSlot(GV); + Prefix = '@'; + } else { + Slot = Machine->getLocalSlot(V); + } + } else { + Machine = createSlotTracker(V); + if (Machine) { + if (const GlobalValue *GV = dyn_cast(V)) { + Slot = Machine->getGlobalSlot(GV); + Prefix = '@'; + } else { + Slot = Machine->getLocalSlot(V); + } + } else { + Slot = -1; + } + delete Machine; + } + + if (Slot != -1) + Out << Prefix << Slot; + else + Out << ""; +} + +/// WriteAsOperand - Write the name of the specified value out to the specified +/// ostream. This can be useful when you just want to print int %reg126, not +/// the whole instruction that generated it. +/// +void llvm::WriteAsOperand(std::ostream &Out, const Value *V, bool PrintType, + const Module *Context) { + raw_os_ostream OS(Out); + WriteAsOperand(OS, V, PrintType, Context); +} + +void llvm::WriteAsOperand(raw_ostream &Out, const Value *V, bool PrintType, + const Module *Context) { + if (Context == 0) Context = getModuleFromVal(V); + + TypePrinting TypePrinter; + std::vector NumberedTypes; + AddModuleTypesToPrinter(TypePrinter, NumberedTypes, Context); + if (PrintType) { + TypePrinter.print(V->getType(), Out); + Out << ' '; + } + + WriteAsOperandInternal(Out, V, TypePrinter, 0); +} + + +namespace { + +class AssemblyWriter { + raw_ostream &Out; + SlotTracker &Machine; + const Module *TheModule; + TypePrinting TypePrinter; + AssemblyAnnotationWriter *AnnotationWriter; + std::vector NumberedTypes; +public: + inline AssemblyWriter(raw_ostream &o, SlotTracker &Mac, const Module *M, + AssemblyAnnotationWriter *AAW) + : Out(o), Machine(Mac), TheModule(M), AnnotationWriter(AAW) { + AddModuleTypesToPrinter(TypePrinter, NumberedTypes, M); + } + + void write(const Module *M) { printModule(M); } + + void write(const GlobalValue *G) { + if (const GlobalVariable *GV = dyn_cast(G)) + printGlobal(GV); + else if (const GlobalAlias *GA = dyn_cast(G)) + printAlias(GA); + else if (const Function *F = dyn_cast(G)) + printFunction(F); + else + assert(0 && "Unknown global"); + } + + void write(const BasicBlock *BB) { printBasicBlock(BB); } + void write(const Instruction *I) { printInstruction(*I); } + + void writeOperand(const Value *Op, bool PrintType); + void writeParamOperand(const Value *Operand, Attributes Attrs); + + const Module* getModule() { return TheModule; } + +private: + void printModule(const Module *M); + void printTypeSymbolTable(const TypeSymbolTable &ST); + void printGlobal(const GlobalVariable *GV); + void printAlias(const GlobalAlias *GV); + void printFunction(const Function *F); + void printArgument(const Argument *FA, Attributes Attrs); + void printBasicBlock(const BasicBlock *BB); + void printInstruction(const Instruction &I); + + // printInfoComment - Print a little comment after the instruction indicating + // which slot it occupies. + void printInfoComment(const Value &V); +}; +} // end of anonymous namespace + + +void AssemblyWriter::writeOperand(const Value *Operand, bool PrintType) { + if (Operand == 0) { + Out << ""; + } else { + if (PrintType) { + TypePrinter.print(Operand->getType(), Out); + Out << ' '; + } + WriteAsOperandInternal(Out, Operand, TypePrinter, &Machine); + } +} + +void AssemblyWriter::writeParamOperand(const Value *Operand, + Attributes Attrs) { + if (Operand == 0) { + Out << ""; + } else { + // Print the type + TypePrinter.print(Operand->getType(), Out); + // Print parameter attributes list + if (Attrs != Attribute::None) + Out << ' ' << Attribute::getAsString(Attrs); + Out << ' '; + // Print the operand + WriteAsOperandInternal(Out, Operand, TypePrinter, &Machine); + } +} + +void AssemblyWriter::printModule(const Module *M) { + if (!M->getModuleIdentifier().empty() && + // Don't print the ID if it will start a new line (which would + // require a comment char before it). + M->getModuleIdentifier().find('\n') == std::string::npos) + Out << "; ModuleID = '" << M->getModuleIdentifier() << "'\n"; + + if (!M->getDataLayout().empty()) + Out << "target datalayout = \"" << M->getDataLayout() << "\"\n"; + if (!M->getTargetTriple().empty()) + Out << "target triple = \"" << M->getTargetTriple() << "\"\n"; + + if (!M->getModuleInlineAsm().empty()) { + // Split the string into lines, to make it easier to read the .ll file. + std::string Asm = M->getModuleInlineAsm(); + size_t CurPos = 0; + size_t NewLine = Asm.find_first_of('\n', CurPos); + while (NewLine != std::string::npos) { + // We found a newline, print the portion of the asm string from the + // last newline up to this newline. + Out << "module asm \""; + PrintEscapedString(std::string(Asm.begin()+CurPos, Asm.begin()+NewLine), + Out); + Out << "\"\n"; + CurPos = NewLine+1; + NewLine = Asm.find_first_of('\n', CurPos); + } + Out << "module asm \""; + PrintEscapedString(std::string(Asm.begin()+CurPos, Asm.end()), Out); + Out << "\"\n"; + } + + // Loop over the dependent libraries and emit them. + Module::lib_iterator LI = M->lib_begin(); + Module::lib_iterator LE = M->lib_end(); + if (LI != LE) { + Out << "deplibs = [ "; + while (LI != LE) { + Out << '"' << *LI << '"'; + ++LI; + if (LI != LE) + Out << ", "; + } + Out << " ]\n"; + } + + // Loop over the symbol table, emitting all id'd types. + printTypeSymbolTable(M->getTypeSymbolTable()); + + for (Module::const_global_iterator I = M->global_begin(), E = M->global_end(); + I != E; ++I) + printGlobal(I); + + // Output all aliases. + if (!M->alias_empty()) Out << "\n"; + for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end(); + I != E; ++I) + printAlias(I); + + // Output all of the functions. + for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) + printFunction(I); +} + +static void PrintLinkage(GlobalValue::LinkageTypes LT, raw_ostream &Out) { + switch (LT) { + case GlobalValue::PrivateLinkage: Out << "private "; break; + case GlobalValue::InternalLinkage: Out << "internal "; break; + case GlobalValue::AvailableExternallyLinkage: + Out << "available_externally "; + break; + case GlobalValue::LinkOnceAnyLinkage: Out << "linkonce "; break; + case GlobalValue::LinkOnceODRLinkage: Out << "linkonce_odr "; break; + case GlobalValue::WeakAnyLinkage: Out << "weak "; break; + case GlobalValue::WeakODRLinkage: Out << "weak_odr "; break; + case GlobalValue::CommonLinkage: Out << "common "; break; + case GlobalValue::AppendingLinkage: Out << "appending "; break; + case GlobalValue::DLLImportLinkage: Out << "dllimport "; break; + case GlobalValue::DLLExportLinkage: Out << "dllexport "; break; + case GlobalValue::ExternalWeakLinkage: Out << "extern_weak "; break; + case GlobalValue::ExternalLinkage: break; + case GlobalValue::GhostLinkage: + Out << "GhostLinkage not allowed in AsmWriter!\n"; + abort(); + } +} + + +static void PrintVisibility(GlobalValue::VisibilityTypes Vis, + raw_ostream &Out) { + switch (Vis) { + default: assert(0 && "Invalid visibility style!"); + case GlobalValue::DefaultVisibility: break; + case GlobalValue::HiddenVisibility: Out << "hidden "; break; + case GlobalValue::ProtectedVisibility: Out << "protected "; break; + } +} + +void AssemblyWriter::printGlobal(const GlobalVariable *GV) { + if (GV->hasName()) { + PrintLLVMName(Out, GV); + Out << " = "; + } + + if (!GV->hasInitializer() && GV->hasExternalLinkage()) + Out << "external "; + + PrintLinkage(GV->getLinkage(), Out); + PrintVisibility(GV->getVisibility(), Out); + + if (GV->isThreadLocal()) Out << "thread_local "; + if (unsigned AddressSpace = GV->getType()->getAddressSpace()) + Out << "addrspace(" << AddressSpace << ") "; + Out << (GV->isConstant() ? "constant " : "global "); + TypePrinter.print(GV->getType()->getElementType(), Out); + + if (GV->hasInitializer()) { + Out << ' '; + writeOperand(GV->getInitializer(), false); + } + + if (GV->hasSection()) + Out << ", section \"" << GV->getSection() << '"'; + if (GV->getAlignment()) + Out << ", align " << GV->getAlignment(); + + printInfoComment(*GV); + Out << '\n'; +} + +void AssemblyWriter::printAlias(const GlobalAlias *GA) { + // Don't crash when dumping partially built GA + if (!GA->hasName()) + Out << "<> = "; + else { + PrintLLVMName(Out, GA); + Out << " = "; + } + PrintVisibility(GA->getVisibility(), Out); + + Out << "alias "; + + PrintLinkage(GA->getLinkage(), Out); + + const Constant *Aliasee = GA->getAliasee(); + + if (const GlobalVariable *GV = dyn_cast(Aliasee)) { + TypePrinter.print(GV->getType(), Out); + Out << ' '; + PrintLLVMName(Out, GV); + } else if (const Function *F = dyn_cast(Aliasee)) { + TypePrinter.print(F->getFunctionType(), Out); + Out << "* "; + + WriteAsOperandInternal(Out, F, TypePrinter, &Machine); + } else if (const GlobalAlias *GA = dyn_cast(Aliasee)) { + TypePrinter.print(GA->getType(), Out); + Out << ' '; + PrintLLVMName(Out, GA); + } else { + const ConstantExpr *CE = cast(Aliasee); + // The only valid GEP is an all zero GEP. + assert((CE->getOpcode() == Instruction::BitCast || + CE->getOpcode() == Instruction::GetElementPtr) && + "Unsupported aliasee"); + writeOperand(CE, false); + } + + printInfoComment(*GA); + Out << '\n'; +} + +void AssemblyWriter::printTypeSymbolTable(const TypeSymbolTable &ST) { + // Emit all numbered types. + for (unsigned i = 0, e = NumberedTypes.size(); i != e; ++i) { + Out << "\ttype "; + + // Make sure we print out at least one level of the type structure, so + // that we do not get %2 = type %2 + TypePrinter.printAtLeastOneLevel(NumberedTypes[i], Out); + Out << "\t\t; type %" << i << '\n'; + } + + // Print the named types. + for (TypeSymbolTable::const_iterator TI = ST.begin(), TE = ST.end(); + TI != TE; ++TI) { + Out << '\t'; + PrintLLVMName(Out, &TI->first[0], TI->first.size(), LocalPrefix); + Out << " = type "; + + // Make sure we print out at least one level of the type structure, so + // that we do not get %FILE = type %FILE + TypePrinter.printAtLeastOneLevel(TI->second, Out); + Out << '\n'; + } +} + +/// printFunction - Print all aspects of a function. +/// +void AssemblyWriter::printFunction(const Function *F) { + // Print out the return type and name. + Out << '\n'; + + if (AnnotationWriter) AnnotationWriter->emitFunctionAnnot(F, Out); + + if (F->isDeclaration()) + Out << "declare "; + else + Out << "define "; + + PrintLinkage(F->getLinkage(), Out); + PrintVisibility(F->getVisibility(), Out); + + // Print the calling convention. + switch (F->getCallingConv()) { + case CallingConv::C: break; // default + case CallingConv::Fast: Out << "fastcc "; break; + case CallingConv::Cold: Out << "coldcc "; break; + case CallingConv::X86_StdCall: Out << "x86_stdcallcc "; break; + case CallingConv::X86_FastCall: Out << "x86_fastcallcc "; break; + default: Out << "cc" << F->getCallingConv() << " "; break; + } + + const FunctionType *FT = F->getFunctionType(); + const AttrListPtr &Attrs = F->getAttributes(); + Attributes RetAttrs = Attrs.getRetAttributes(); + if (RetAttrs != Attribute::None) + Out << Attribute::getAsString(Attrs.getRetAttributes()) << ' '; + TypePrinter.print(F->getReturnType(), Out); + Out << ' '; + WriteAsOperandInternal(Out, F, TypePrinter, &Machine); + Out << '('; + Machine.incorporateFunction(F); + + // Loop over the arguments, printing them... + + unsigned Idx = 1; + if (!F->isDeclaration()) { + // If this isn't a declaration, print the argument names as well. + for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); + I != E; ++I) { + // Insert commas as we go... the first arg doesn't get a comma + if (I != F->arg_begin()) Out << ", "; + printArgument(I, Attrs.getParamAttributes(Idx)); + Idx++; + } + } else { + // Otherwise, print the types from the function type. + for (unsigned i = 0, e = FT->getNumParams(); i != e; ++i) { + // Insert commas as we go... the first arg doesn't get a comma + if (i) Out << ", "; + + // Output type... + TypePrinter.print(FT->getParamType(i), Out); + + Attributes ArgAttrs = Attrs.getParamAttributes(i+1); + if (ArgAttrs != Attribute::None) + Out << ' ' << Attribute::getAsString(ArgAttrs); + } + } + + // Finish printing arguments... + if (FT->isVarArg()) { + if (FT->getNumParams()) Out << ", "; + Out << "..."; // Output varargs portion of signature! + } + Out << ')'; + Attributes FnAttrs = Attrs.getFnAttributes(); + if (FnAttrs != Attribute::None) + Out << ' ' << Attribute::getAsString(Attrs.getFnAttributes()); + if (F->hasSection()) + Out << " section \"" << F->getSection() << '"'; + if (F->getAlignment()) + Out << " align " << F->getAlignment(); + if (F->hasGC()) + Out << " gc \"" << F->getGC() << '"'; + if (F->isDeclaration()) { + Out << "\n"; + } else { + Out << " {"; + + // Output all of its basic blocks... for the function + for (Function::const_iterator I = F->begin(), E = F->end(); I != E; ++I) + printBasicBlock(I); + + Out << "}\n"; + } + + Machine.purgeFunction(); +} + +/// printArgument - This member is called for every argument that is passed into +/// the function. Simply print it out +/// +void AssemblyWriter::printArgument(const Argument *Arg, + Attributes Attrs) { + // Output type... + TypePrinter.print(Arg->getType(), Out); + + // Output parameter attributes list + if (Attrs != Attribute::None) + Out << ' ' << Attribute::getAsString(Attrs); + + // Output name, if available... + if (Arg->hasName()) { + Out << ' '; + PrintLLVMName(Out, Arg); + } +} + +/// printBasicBlock - This member is called for each basic block in a method. +/// +void AssemblyWriter::printBasicBlock(const BasicBlock *BB) { + if (BB->hasName()) { // Print out the label if it exists... + Out << "\n"; + PrintLLVMName(Out, BB->getNameStart(), BB->getNameLen(), LabelPrefix); + Out << ':'; + } else if (!BB->use_empty()) { // Don't print block # of no uses... + Out << "\n;